summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.get_maintainer.ignore1
-rw-r--r--.mailmap19
-rw-r--r--CREDITS18
-rw-r--r--Documentation/ABI/testing/debugfs-intel-iommu276
-rw-r--r--Documentation/ABI/testing/sysfs-bus-vdpa10
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-queues45
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-statistics48
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu1
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon14
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon14
-rw-r--r--Documentation/ABI/testing/sysfs-nvmem-cells16
-rw-r--r--Documentation/ABI/testing/sysfs-platform-silicom1
-rw-r--r--Documentation/Makefile5
-rw-r--r--Documentation/RCU/checklist.rst32
-rw-r--r--Documentation/RCU/rcu_dereference.rst5
-rw-r--r--Documentation/RCU/torture.rst2
-rw-r--r--Documentation/RCU/whatisRCU.rst19
-rw-r--r--Documentation/accel/introduction.rst4
-rw-r--r--Documentation/admin-guide/RAS/address-translation.rst24
-rw-r--r--Documentation/admin-guide/RAS/error-decoding.rst (renamed from Documentation/RAS/ras.rst)11
-rw-r--r--Documentation/admin-guide/RAS/index.rst7
-rw-r--r--Documentation/admin-guide/RAS/main.rst (renamed from Documentation/admin-guide/ras.rst)10
-rw-r--r--Documentation/admin-guide/README.rst69
-rw-r--r--Documentation/admin-guide/cgroup-v1/cpusets.rst2
-rw-r--r--Documentation/admin-guide/cgroup-v1/hugetlb.rst20
-rw-r--r--Documentation/admin-guide/device-mapper/index.rst2
-rw-r--r--Documentation/admin-guide/device-mapper/vdo-design.rst633
-rw-r--r--Documentation/admin-guide/device-mapper/vdo.rst406
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst1
-rw-r--r--Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst104
-rw-r--r--Documentation/admin-guide/hw-vuln/spectre.rst8
-rw-r--r--Documentation/admin-guide/index.rst4
-rw-r--r--Documentation/admin-guide/kdump/kdump.rst7
-rw-r--r--Documentation/admin-guide/kernel-parameters.rst6
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt629
-rw-r--r--Documentation/admin-guide/kernel-per-CPU-kthreads.rst16
-rw-r--r--Documentation/admin-guide/sysctl/net.rst5
-rw-r--r--Documentation/admin-guide/tainted-kernels.rst4
-rw-r--r--Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst1952
-rw-r--r--Documentation/arch/arm64/silicon-errata.rst7
-rw-r--r--Documentation/arch/x86/amd-memory-encryption.rst16
-rw-r--r--Documentation/arch/x86/boot.rst3
-rw-r--r--Documentation/arch/x86/mds.rst38
-rw-r--r--Documentation/arch/x86/pti.rst6
-rw-r--r--Documentation/arch/x86/topology.rst24
-rw-r--r--Documentation/arch/x86/x86_64/fred.rst96
-rw-r--r--Documentation/arch/x86/x86_64/index.rst1
-rw-r--r--Documentation/bpf/kfuncs.rst8
-rw-r--r--Documentation/bpf/map_lpm_trie.rst2
-rw-r--r--Documentation/bpf/standardization/instruction-set.rst594
-rw-r--r--Documentation/bpf/verifier.rst2
-rw-r--r--Documentation/conf.py12
-rw-r--r--Documentation/core-api/workqueue.rst43
-rw-r--r--Documentation/dev-tools/checkpatch.rst4
-rw-r--r--Documentation/dev-tools/kasan.rst21
-rw-r--r--Documentation/dev-tools/kselftest.rst16
-rw-r--r--Documentation/dev-tools/kunit/usage.rst19
-rw-r--r--Documentation/dev-tools/ubsan.rst28
-rw-r--r--Documentation/devicetree/bindings/Makefile5
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic.yaml15
-rw-r--r--Documentation/devicetree/bindings/arm/arm,realview.yaml6
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-at91.yaml6
-rw-r--r--Documentation/devicetree/bindings/arm/fsl.yaml41
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-38x.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-38x.yaml70
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek.yaml198
-rw-r--r--Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt58
-rw-r--r--Documentation/devicetree/bindings/arm/qcom.yaml58
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip.yaml71
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi.yaml12
-rw-r--r--Documentation/devicetree/bindings/arm/tegra.yaml8
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.yaml58
-rw-r--r--Documentation/devicetree/bindings/arm/ti/k3.yaml14
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-mtk.txt51
-rw-r--r--Documentation/devicetree/bindings/ata/atmel-at91_cf.txt19
-rw-r--r--Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml3
-rw-r--r--Documentation/devicetree/bindings/ata/mediatek,mtk-ahci.yaml98
-rw-r--r--Documentation/devicetree/bindings/bus/imx-weim.txt117
-rw-r--r--Documentation/devicetree/bindings/clock/google,gs101-clock.yaml32
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sc8180x.yaml7
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml7
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8550-tcsr.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8650-dispcc.yaml106
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-mssr.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml7
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml6
-rw-r--r--Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml96
-rw-r--r--Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml2
-rw-r--r--Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml3
-rw-r--r--Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml (renamed from Documentation/devicetree/bindings/gpu/img,powervr.yaml)4
-rw-r--r--Documentation/devicetree/bindings/gpu/img,powervr-sgx.yaml138
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/starfive,jh8100-intc.yaml61
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.yaml20
-rw-r--r--Documentation/devicetree/bindings/leds/common.yaml12
-rw-r--r--Documentation/devicetree/bindings/leds/leds-bcm63138.yaml4
-rw-r--r--Documentation/devicetree/bindings/leds/leds-bcm6328.yaml4
-rw-r--r--Documentation/devicetree/bindings/leds/leds-bcm6358.txt2
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml4
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pwm.yaml5
-rw-r--r--Documentation/devicetree/bindings/media/cnm,wave521c.yaml4
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml31
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml3
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim-peripherals.yaml31
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim.yaml204
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mc-peripheral-props.yaml1
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-emc.yaml2
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml1
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml7
-rw-r--r--Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/can/tcan4x5x.txt3
-rw-r--r--Documentation/devicetree/bindings/net/can/xilinx,can.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/cdns,macb.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/dsa/ar9331.txt147
-rw-r--r--Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/dsa/qca,ar9331.yaml161
-rw-r--r--Documentation/devicetree/bindings/net/dsa/realtek.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-phy-package.yaml52
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fec.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/marvell,prestera.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/qca,qca808x.yaml54
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ethqos.yaml9
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipa.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml15
-rw-r--r--Documentation/devicetree/bindings/net/qcom,qca807x.yaml184
-rw-r--r--Documentation/devicetree/bindings/net/renesas,etheravb.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/renesas,ethertsn.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml17
-rw-r--r--Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml72
-rw-r--r--Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/ti,dp83822.yaml34
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml33
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml1
-rw-r--r--Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml1
-rw-r--r--Documentation/devicetree/bindings/reset/renesas,rst.yaml1
-rw-r--r--Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml3
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,pbs.yaml46
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,saw2.yaml (renamed from Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml)46
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas-soc.yaml73
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas.yaml25
-rw-r--r--Documentation/devicetree/bindings/soc/rockchip/grf.yaml22
-rw-r--r--Documentation/devicetree/bindings/soc/samsung/samsung,exynos-sysreg.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/xilinx/xilinx.yaml70
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml5
-rw-r--r--Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max9808x.yaml2
-rw-r--r--Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml2
-rw-r--r--Documentation/devicetree/bindings/tpm/tpm-common.yaml2
-rw-r--r--Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml9
-rw-r--r--Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml3
-rw-r--r--Documentation/devicetree/bindings/usb/microchip,usb5744.yaml3
-rw-r--r--Documentation/devicetree/bindings/usb/xlnx,usb2.yaml3
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.yaml8
-rw-r--r--Documentation/doc-guide/kernel-doc.rst45
-rw-r--r--Documentation/doc-guide/maintainer-profile.rst7
-rw-r--r--Documentation/doc-guide/sphinx.rst18
-rw-r--r--Documentation/driver-api/dpll.rst2
-rw-r--r--Documentation/driver-api/index.rst169
-rw-r--r--Documentation/fault-injection/index.rst2
-rw-r--r--Documentation/filesystems/files.rst2
-rw-r--r--Documentation/filesystems/fscrypt.rst27
-rw-r--r--Documentation/filesystems/index.rst1
-rw-r--r--Documentation/filesystems/locking.rst2
-rw-r--r--Documentation/filesystems/ntfs.rst466
-rw-r--r--Documentation/filesystems/overlayfs.rst16
-rw-r--r--Documentation/filesystems/proc.rst4
-rw-r--r--Documentation/filesystems/vfs.rst16
-rw-r--r--Documentation/index.rst53
-rw-r--r--Documentation/kbuild/Kconfig.recursion-issue-016
-rw-r--r--Documentation/maintainer/maintainer-entry-profile.rst3
-rw-r--r--Documentation/mm/slub.rst60
-rw-r--r--Documentation/netlink/genetlink-c.yaml41
-rw-r--r--Documentation/netlink/genetlink-legacy.yaml41
-rw-r--r--Documentation/netlink/genetlink.yaml21
-rw-r--r--Documentation/netlink/netlink-raw.yaml37
-rw-r--r--Documentation/netlink/specs/devlink.yaml2
-rw-r--r--Documentation/netlink/specs/dpll.yaml44
-rw-r--r--Documentation/netlink/specs/mptcp_pm.yaml3
-rw-r--r--Documentation/netlink/specs/netdev.yaml91
-rw-r--r--Documentation/netlink/specs/nlctrl.yaml206
-rw-r--r--Documentation/netlink/specs/rt_link.yaml10
-rw-r--r--Documentation/netlink/specs/tc.yaml2119
-rw-r--r--Documentation/networking/af_xdp.rst33
-rw-r--r--Documentation/networking/bonding.rst12
-rw-r--r--Documentation/networking/bridge.rst2
-rw-r--r--Documentation/networking/can.rst34
-rw-r--r--Documentation/networking/device_drivers/ethernet/amazon/ena.rst6
-rw-r--r--Documentation/networking/device_drivers/ethernet/index.rst1
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/ice.rst21
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst24
-rw-r--r--Documentation/networking/device_drivers/wwan/t7xx.rst46
-rw-r--r--Documentation/networking/devlink/devlink-port.rst2
-rw-r--r--Documentation/networking/devlink/mlx5.rst9
-rw-r--r--Documentation/networking/index.rst1
-rw-r--r--Documentation/networking/ip-sysctl.rst14
-rw-r--r--Documentation/networking/l2tp.rst135
-rw-r--r--Documentation/networking/multi-pf-netdev.rst174
-rw-r--r--Documentation/networking/net_cachelines/inet_sock.rst6
-rw-r--r--Documentation/networking/net_cachelines/net_device.rst4
-rw-r--r--Documentation/networking/net_cachelines/tcp_sock.rst4
-rw-r--r--Documentation/networking/netconsole.rst66
-rw-r--r--Documentation/networking/netdevices.rst4
-rw-r--r--Documentation/networking/sfp-phylink.rst147
-rw-r--r--Documentation/networking/statistics.rst15
-rw-r--r--Documentation/networking/xfrm_device.rst4
-rw-r--r--Documentation/process/changes.rst6
-rw-r--r--Documentation/process/coding-style.rst13
-rw-r--r--Documentation/process/cve.rst121
-rw-r--r--Documentation/process/embargoed-hardware-issues.rst2
-rw-r--r--Documentation/process/howto.rst4
-rw-r--r--Documentation/process/index.rst1
-rw-r--r--Documentation/process/maintainer-netdev.rst2
-rw-r--r--Documentation/process/maintainer-tip.rst34
-rw-r--r--Documentation/process/researcher-guidelines.rst2
-rw-r--r--Documentation/process/security-bugs.rst5
-rw-r--r--Documentation/process/submit-checklist.rst161
-rw-r--r--Documentation/rust/general-information.rst24
-rw-r--r--Documentation/rust/index.rst1
-rw-r--r--Documentation/rust/testing.rst135
-rw-r--r--Documentation/sphinx/kernel_feat.py2
-rw-r--r--Documentation/sphinx/kerneldoc-preamble.sty7
-rw-r--r--Documentation/sphinx/kerneldoc.py6
-rw-r--r--Documentation/sphinx/requirements.txt7
-rw-r--r--Documentation/sphinx/templates/kernel-toc.html4
-rw-r--r--Documentation/sphinx/translations.py10
-rw-r--r--Documentation/staging/rpmsg.rst2
-rw-r--r--Documentation/subsystem-apis.rst2
-rw-r--r--Documentation/translations/it_IT/RCU/index.rst19
-rw-r--r--Documentation/translations/it_IT/RCU/torture.rst369
-rw-r--r--Documentation/translations/it_IT/core-api/index.rst12
-rw-r--r--Documentation/translations/it_IT/i2c/i2c-protocol.rst99
-rw-r--r--Documentation/translations/it_IT/i2c/index.rst46
-rw-r--r--Documentation/translations/it_IT/i2c/summary.rst64
-rw-r--r--Documentation/translations/it_IT/index.rst2
-rw-r--r--Documentation/translations/it_IT/locking/index.rst20
-rw-r--r--Documentation/translations/it_IT/locking/lockdep-design.rst678
-rw-r--r--Documentation/translations/it_IT/locking/lockstat.rst230
-rw-r--r--Documentation/translations/it_IT/locking/locktorture.rst181
-rw-r--r--Documentation/translations/it_IT/locking/locktypes.rst547
-rw-r--r--Documentation/translations/it_IT/networking/netdev-FAQ.rst13
-rw-r--r--Documentation/translations/it_IT/process/coding-style.rst6
-rw-r--r--Documentation/translations/it_IT/subsystem-apis.rst47
-rw-r--r--Documentation/translations/ja_JP/index.rst2
-rw-r--r--Documentation/translations/ja_JP/process/howto.rst (renamed from Documentation/translations/ja_JP/howto.rst)0
-rw-r--r--Documentation/translations/sp_SP/process/coding-style.rst6
-rw-r--r--Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst2
-rw-r--r--Documentation/translations/sp_SP/process/researcher-guidelines.rst2
-rw-r--r--Documentation/translations/zh_CN/process/coding-style.rst4
-rw-r--r--Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst2
-rw-r--r--Documentation/translations/zh_CN/userspace-api/accelerators/ocxl.rst4
-rw-r--r--Documentation/translations/zh_TW/process/coding-style.rst4
-rw-r--r--Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst2
-rw-r--r--Documentation/usb/gadget-testing.rst22
-rw-r--r--Documentation/userspace-api/index.rst47
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst4
-rw-r--r--Documentation/userspace-api/netlink/netlink-raw.rst42
-rw-r--r--Documentation/userspace-api/perf_ring_buffer.rst830
-rw-r--r--Documentation/virt/coco/sev-guest.rst52
-rw-r--r--Documentation/virt/hyperv/index.rst1
-rw-r--r--Documentation/virt/hyperv/vpci.rst316
-rw-r--r--Documentation/virt/kvm/api.rst5
-rw-r--r--MAINTAINERS313
-rw-r--r--Makefile24
-rw-r--r--README2
-rw-r--r--arch/Kconfig95
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/page.h2
-rw-r--r--arch/alpha/kernel/smp.c5
-rw-r--r--arch/arc/Kconfig3
-rw-r--r--arch/arc/include/asm/jump_label.h4
-rw-r--r--arch/arc/include/uapi/asm/page.h6
-rw-r--r--arch/arc/kernel/smp.c5
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/Makefile4
-rw-r--r--arch/arm/boot/compressed/misc.c2
-rw-r--r--arch/arm/boot/compressed/misc.h2
-rw-r--r--arch/arm/boot/dts/allwinner/sun8i-r40-feta40i.dtsi7
-rw-r--r--arch/arm/boot/dts/amazon/alpine.dtsi1
-rw-r--r--arch/arm/boot/dts/amlogic/meson.dtsi6
-rw-r--r--arch/arm/boot/dts/amlogic/meson8.dtsi1
-rw-r--r--arch/arm/boot/dts/amlogic/meson8b.dtsi1
-rw-r--r--arch/arm/boot/dts/arm/arm-realview-pb1176.dts2
-rw-r--r--arch/arm/boot/dts/arm/integratorap-im-pd1.dts3
-rw-r--r--arch/arm/boot/dts/arm/versatile-ab.dts3
-rw-r--r--arch/arm/boot/dts/arm/vexpress-v2p-ca9.dts4
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts4
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts4
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts2
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-g4.dtsi14
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-g5.dtsi15
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-g6.dtsi18
-rw-r--r--arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi4
-rw-r--r--arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi3
-rw-r--r--arch/arm/boot/dts/broadcom/bcm-hr2.dtsi1
-rw-r--r--arch/arm/boot/dts/broadcom/bcm-nsp.dtsi2
-rw-r--r--arch/arm/boot/dts/gemini/gemini-dlink-dir-685.dts30
-rw-r--r--arch/arm/boot/dts/gemini/gemini-dlink-dns-313.dts4
-rw-r--r--arch/arm/boot/dts/gemini/gemini-sl93512r.dts16
-rw-r--r--arch/arm/boot/dts/gemini/gemini-sq201.dts16
-rw-r--r--arch/arm/boot/dts/gemini/gemini-wbd111.dts6
-rw-r--r--arch/arm/boot/dts/gemini/gemini-wbd222.dts6
-rw-r--r--arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts2
-rw-r--r--arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts2
-rw-r--r--arch/arm/boot/dts/marvell/armada-385-clearfog-gtr-l8.dts38
-rw-r--r--arch/arm/boot/dts/marvell/armada-385-clearfog-gtr-s4.dts2
-rw-r--r--arch/arm/boot/dts/marvell/armada-385-clearfog-gtr.dtsi84
-rw-r--r--arch/arm/boot/dts/marvell/armada-388-clearfog.dts5
-rw-r--r--arch/arm/boot/dts/marvell/dove-cubox.dts4
-rw-r--r--arch/arm/boot/dts/marvell/kirkwood-l-50.dts2
-rw-r--r--arch/arm/boot/dts/marvell/mmp2-brownstone.dts2
-rw-r--r--arch/arm/boot/dts/microchip/Makefile2
-rw-r--r--arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts482
-rw-r--r--arch/arm/boot/dts/microchip/at91sam9g25-gardena-smart-gateway.dts2
-rw-r--r--arch/arm/boot/dts/microchip/at91sam9x5ek.dtsi2
-rw-r--r--arch/arm/boot/dts/microchip/sam9x60.dtsi64
-rw-r--r--arch/arm/boot/dts/microchip/sama7g5.dtsi56
-rw-r--r--arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi2
-rw-r--r--arch/arm/boot/dts/nvidia/Makefile2
-rw-r--r--arch/arm/boot/dts/nvidia/tegra124-nyan.dtsi1
-rw-r--r--arch/arm/boot/dts/nvidia/tegra124-venice2.dts1
-rw-r--r--arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi1
-rw-r--r--arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi1
-rw-r--r--arch/arm/boot/dts/nvidia/tegra30-asus-nexus7-grouper-common.dtsi3
-rw-r--r--arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi1
-rw-r--r--arch/arm/boot/dts/nvidia/tegra30-lg-p880.dts489
-rw-r--r--arch/arm/boot/dts/nvidia/tegra30-lg-p895.dts496
-rw-r--r--arch/arm/boot/dts/nvidia/tegra30-lg-x3.dtsi1812
-rw-r--r--arch/arm/boot/dts/nxp/imx/Makefile6
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx1-apf9328.dts2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx1.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx27.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx31.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx35.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx51.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso87
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6dl-sielaff.dts533
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6dl-yapp4-common.dtsi25
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval-v1.2.dts200
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval.dts108
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval.dtsi120
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts3
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi1
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi1
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi1
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6qdl-hummingboard.dtsi7
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6qdl-hummingboard2.dtsi5
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi1
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi1
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi10
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6qdl.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6sl-tolino-shine2hd.dts6
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6sl.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6sx.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ul-14x14-evk.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6uldev.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ul.dtsi18
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ull-dhcom-som-cfg-sdcard.dtsi4
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ull-dhcom-som.dtsi4
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ull-dhcor-som.dtsi7
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ull.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi325
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7-tqma7.dtsi144
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7d-mba7.dts94
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts1
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7s.dtsi26
-rw-r--r--arch/arm/boot/dts/nxp/ls/ls1021a.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/mxs/imx28-evk.dts2
-rw-r--r--arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts1
-rw-r--r--arch/arm/boot/dts/qcom/Makefile1
-rw-r--r--arch/arm/boot/dts/qcom/qcom-apq8026-lg-lenok.dts38
-rw-r--r--arch/arm/boot/dts/qcom/qcom-apq8026-samsung-matisse-wifi.dts452
-rw-r--r--arch/arm/boot/dts/qcom/qcom-apq8064.dtsi70
-rw-r--r--arch/arm/boot/dts/qcom/qcom-apq8084.dtsi13
-rw-r--r--arch/arm/boot/dts/qcom/qcom-ipq4019-ap.dk01.1.dtsi138
-rw-r--r--arch/arm/boot/dts/qcom/qcom-ipq4019.dtsi35
-rw-r--r--arch/arm/boot/dts/qcom/qcom-ipq8064.dtsi12
-rw-r--r--arch/arm/boot/dts/qcom/qcom-msm8226-samsung-matisse-common.dtsi457
-rw-r--r--arch/arm/boot/dts/qcom/qcom-msm8226.dtsi734
-rw-r--r--arch/arm/boot/dts/qcom/qcom-msm8660.dtsi17
-rw-r--r--arch/arm/boot/dts/qcom/qcom-msm8926-htc-memul.dts15
-rw-r--r--arch/arm/boot/dts/qcom/qcom-msm8926-samsung-matisselte.dts37
-rw-r--r--arch/arm/boot/dts/qcom/qcom-msm8960-pins.dtsi21
-rw-r--r--arch/arm/boot/dts/qcom/qcom-msm8960-samsung-expressatt.dts71
-rw-r--r--arch/arm/boot/dts/qcom/qcom-msm8960.dtsi48
-rw-r--r--arch/arm/boot/dts/qcom/qcom-msm8974.dtsi33
-rw-r--r--arch/arm/boot/dts/qcom/qcom-sdx55.dtsi40
-rw-r--r--arch/arm/boot/dts/qcom/qcom-sdx65.dtsi48
-rw-r--r--arch/arm/boot/dts/renesas/r8a73a4-ape6evm.dts12
-rw-r--r--arch/arm/boot/dts/renesas/r8a73a4.dtsi23
-rw-r--r--arch/arm/boot/dts/renesas/r8a7740.dtsi2
-rw-r--r--arch/arm/boot/dts/renesas/r8a7778.dtsi11
-rw-r--r--arch/arm/boot/dts/renesas/r8a7779.dtsi9
-rw-r--r--arch/arm/boot/dts/renesas/r8a7790-lager.dts1
-rw-r--r--arch/arm/boot/dts/renesas/r8a7790-stout.dts1
-rw-r--r--arch/arm/boot/dts/renesas/r8a7791-koelsch.dts1
-rw-r--r--arch/arm/boot/dts/renesas/r8a7791-porter.dts1
-rw-r--r--arch/arm/boot/dts/renesas/r8a7792-blanche.dts1
-rw-r--r--arch/arm/boot/dts/renesas/r8a7793-gose.dts1
-rw-r--r--arch/arm/boot/dts/renesas/r8a7794-alt.dts1
-rw-r--r--arch/arm/boot/dts/renesas/r8a7794-silk.dts1
-rw-r--r--arch/arm/boot/dts/rockchip/rk3128-xpi-3128.dts29
-rw-r--r--arch/arm/boot/dts/rockchip/rk3128.dtsi60
-rw-r--r--arch/arm/boot/dts/rockchip/rk322x.dtsi16
-rw-r--r--arch/arm/boot/dts/rockchip/rk3288.dtsi16
-rw-r--r--arch/arm/boot/dts/rockchip/rv1108.dtsi8
-rw-r--r--arch/arm/boot/dts/rockchip/rv1126-sonoff-ihost.dtsi10
-rw-r--r--arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi1
-rw-r--r--arch/arm/boot/dts/samsung/exynos4412-i9300.dts2
-rw-r--r--arch/arm/boot/dts/samsung/exynos4412-i9305.dts2
-rw-r--r--arch/arm/boot/dts/samsung/exynos4412-n710x.dts2
-rw-r--r--arch/arm/boot/dts/samsung/exynos4412-p4note.dtsi53
-rw-r--r--arch/arm/boot/dts/samsung/exynos5420-galaxy-tab-common.dtsi34
-rw-r--r--arch/arm/boot/dts/samsung/exynos5420-peach-pit.dts1
-rw-r--r--arch/arm/boot/dts/samsung/exynos5422-odroidxu3-common.dtsi16
-rw-r--r--arch/arm/boot/dts/samsung/exynos5800-peach-pi.dts1
-rw-r--r--arch/arm/boot/dts/st/Makefile1
-rw-r--r--arch/arm/boot/dts/st/stih407-pinctrl.dtsi8
-rw-r--r--arch/arm/boot/dts/st/stm32429i-eval.dts1
-rw-r--r--arch/arm/boot/dts/st/stm32f769-disco-mb1166-reva09.dts13
-rw-r--r--arch/arm/boot/dts/st/stm32f769-disco.dts70
-rw-r--r--arch/arm/boot/dts/st/stm32f769.dtsi20
-rw-r--r--arch/arm/boot/dts/st/stm32mp131.dtsi7
-rw-r--r--arch/arm/boot/dts/st/stm32mp135f-dk.dts8
-rw-r--r--arch/arm/boot/dts/st/stm32mp157.dtsi2
-rw-r--r--arch/arm/boot/dts/st/stm32mp157a-dk1-scmi.dts2
-rw-r--r--arch/arm/boot/dts/st/stm32mp157c-dk2-scmi.dts2
-rw-r--r--arch/arm/boot/dts/st/stm32mp157c-dk2.dts1
-rw-r--r--arch/arm/boot/dts/st/stm32mp157c-ed1-scmi.dts2
-rw-r--r--arch/arm/boot/dts/st/stm32mp157c-ev1-scmi.dts2
-rw-r--r--arch/arm/boot/dts/st/stm32mp157c-lxa-tac-gen2.dts2
-rw-r--r--arch/arm/boot/dts/st/stm32mp15xc-lxa-tac.dtsi6
-rw-r--r--arch/arm/boot/dts/ti/davinci/da850.dtsi4
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-clocks.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2e-clocks.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2e-evm.dts2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2e-netcp.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2e.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2g-evm.dts2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2g-ice.dts2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2g-netcp.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2g.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2hk-clocks.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2hk-evm.dts2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2hk-netcp.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2hk.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2l-clocks.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2l-evm.dts2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2l-netcp.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone-k2l.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/keystone/keystone.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-baltos-ir2110.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-baltos-ir3220.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-baltos-ir5221.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-baltos-leds.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-baltos.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-base0033.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi4
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-cm-t335.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-evmsk.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-guardian.dts4
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-icev2.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-igep0033.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-myirtech-myc.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-myirtech-myd.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-nano.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-netcan-plus-1xx.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-netcom-plus-2xx.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-netcom-plus-8xx.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-pdu001.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-sancloud-bbe-extended-wifi.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-sancloud-bbe-lite.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-sbc-t335.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-sl50.dts3
-rw-r--r--arch/arm/boot/dts/ti/omap/am33xx.dtsi9
-rw-r--r--arch/arm/boot/dts/ti/omap/am3517.dtsi11
-rw-r--r--arch/arm/boot/dts/ti/omap/am4372.dtsi6
-rw-r--r--arch/arm/boot/dts/ti/omap/am437x-cm-t43.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am437x-sbc-t43.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts3
-rw-r--r--arch/arm/boot/dts/ti/omap/am57xx-cl-som-am57x.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/am57xx-sbc-am57x.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/compulab-sb-som.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/dra7-l4.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/dra7.dtsi17
-rw-r--r--arch/arm/boot/dts/ti/omap/dra74x-p.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/dra7xx-clocks.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/omap34xx.dtsi11
-rw-r--r--arch/arm/boot/dts/ti/omap/omap36xx.dtsi9
-rw-r--r--arch/arm/boot/dts/ti/omap/omap4-epson-embt2ws.dts1
-rw-r--r--arch/arm/boot/dts/ti/omap/omap4-panda-common.dtsi1
-rw-r--r--arch/arm/boot/dts/ti/omap/omap4-sdp.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/omap4.dtsi9
-rw-r--r--arch/arm/boot/dts/ti/omap/omap5-igep0050.dts2
-rw-r--r--arch/arm/boot/dts/ti/omap/omap5.dtsi9
-rw-r--r--arch/arm/boot/dts/ti/omap/twl4030.dtsi2
-rw-r--r--arch/arm/boot/dts/ti/omap/twl6030.dtsi4
-rw-r--r--arch/arm/configs/exynos_defconfig3
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig2
-rw-r--r--arch/arm/configs/multi_v7_defconfig21
-rw-r--r--arch/arm/configs/shmobile_defconfig2
-rw-r--r--arch/arm/include/asm/elf.h1
-rw-r--r--arch/arm/include/asm/jump_label.h4
-rw-r--r--arch/arm/include/asm/page.h2
-rw-r--r--arch/arm/include/asm/vdso_datapage.h26
-rw-r--r--arch/arm/include/asm/word-at-a-time.h3
-rw-r--r--arch/arm/kernel/asm-offsets.c4
-rw-r--r--arch/arm/kernel/vdso.c4
-rw-r--r--arch/arm/mach-ep93xx/core.c1
-rw-r--r--arch/arm/mach-imx/mmdc.c6
-rw-r--r--arch/arm/mach-omap1/Kconfig1
-rw-r--r--arch/arm/mach-omap2/am33xx-restart.c5
-rw-r--r--arch/arm/mach-omap2/board-generic.c6
-rw-r--r--arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c2
-rw-r--r--arch/arm/mach-omap2/clockdomain.c4
-rw-r--r--arch/arm/mach-omap2/cm33xx.c2
-rw-r--r--arch/arm/mach-omap2/cminst44xx.c2
-rw-r--r--arch/arm/mach-omap2/omap-secure.c4
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c9
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_common_data.c6
-rw-r--r--arch/arm/mach-omap2/pmic-cpcap.c24
-rw-r--r--arch/arm/mach-omap2/powerdomain.c2
-rw-r--r--arch/arm/mach-omap2/prm-regbits-33xx.h1
-rw-r--r--arch/arm/mach-omap2/prm.h1
-rw-r--r--arch/arm/mach-omap2/prm33xx.c22
-rw-r--r--arch/arm/mach-omap2/prm44xx.c2
-rw-r--r--arch/arm/mach-omap2/prm_common.c10
-rw-r--r--arch/arm/mach-omap2/wd_timer.c4
-rw-r--r--arch/arm/mach-qcom/Kconfig41
-rw-r--r--arch/arm/mach-s3c/cpu.h2
-rw-r--r--arch/arm/mach-s3c/s3c6410.c2
-rw-r--r--arch/arm/mach-s3c/s3c64xx.c2
-rw-r--r--arch/arm/mach-s5pv210/pm.c2
-rw-r--r--arch/arm/mach-zynq/slcr.c5
-rw-r--r--arch/arm/mm/fault.c2
-rw-r--r--arch/arm/mm/ioremap.c8
-rw-r--r--arch/arm64/Kconfig31
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/boot/dts/allwinner/Makefile3
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts2
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi2
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi7
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1-manta.dts2
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1.dtsi4
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-pi.dts2
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h616.dtsi155
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h618-longan-module-3h.dtsi75
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h618-longanpi-3h.dts144
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h618-transpeed-8k618-t.dts23
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts356
-rw-r--r--arch/arm64/boot/dts/amazon/alpine-v2.dtsi1
-rw-r--r--arch/arm64/boot/dts/amazon/alpine-v3.dtsi1
-rw-r--r--arch/arm64/boot/dts/amlogic/Makefile6
-rw-r--r--arch/arm64/boot/dts/amlogic/amlogic-c3.dtsi7
-rw-r--r--arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi12
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-a1-ad402.dts2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-a1.dtsi2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j1xx.dtsi30
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-axg-s400.dts16
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-axg.dtsi8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am-brcm.dtso31
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am-realtek.dtso21
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am.dts462
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12a-radxa-zero.dts12
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts14
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts16
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts14
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12b-odroid.dtsi20
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi10
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gx-libretech-pc.dtsi12
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905w-jethome-jethub-j80.dts8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc-v2.dts12
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxm-s912-libretech-pc.dts2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi16
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-libretech-cottonwood.dtsi6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-ac2xx.dtsi10
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-bananapi.dtsi14
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi20
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts12
-rw-r--r--arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi1
-rw-r--r--arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi1
-rw-r--r--arch/arm64/boot/dts/exynos/exynos850.dtsi64
-rw-r--r--arch/arm64/boot/dts/exynos/google/gs101-oriole.dts24
-rw-r--r--arch/arm64/boot/dts/exynos/google/gs101-pinctrl.dtsi2
-rw-r--r--arch/arm64/boot/dts/exynos/google/gs101.dtsi133
-rw-r--r--arch/arm64/boot/dts/freescale/Makefile27
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi10
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi6
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi32
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-apalis-eval-v1.1.dtsi26
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-apalis-eval-v1.2.dtsi124
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-apalis-eval.dtsi22
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi330
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi63
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-gpu0.dtsi27
-rw-r--r--arch/arm64/boot/dts/freescale/imx8dxl-evk.dts101
-rw-r--r--arch/arm64/boot/dts/freescale/imx8dxl-ss-adma.dtsi77
-rw-r--r--arch/arm64/boot/dts/freescale/imx8dxl.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8dxp-tqma8xdp-mba8xx.dts16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8dxp-tqma8xdp.dtsi24
-rw-r--r--arch/arm64/boot/dts/freescale/imx8dxp.dtsi24
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi69
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts294
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts38
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi567
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts14
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi40
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts14
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-evk.dtsi36
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-rve-gateway.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-tqma8mqnl-mba8mx-usbotg.dtso64
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-tqma8mqnl-mba8mx.dts5
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts11
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi71
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts84
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts12
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-evk.dts33
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts107
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts9
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi10
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp.dtsi14
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-tqma8mq-mba8mx.dts24
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-apalis-eval-v1.2.dts16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-apalis-eval.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-apalis-v1.1-eval-v1.2.dts26
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-apalis-v1.1-eval.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-mek.dts26
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-ss-conn.dtsi5
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi55
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm.dtsi41
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qxp-tqma8xqp-mba8xx.dts16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qxp-tqma8xqp.dtsi14
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qxp.dtsi8
-rw-r--r--arch/arm64/boot/dts/freescale/imx8ulp-evk.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-phyboard-segin.dts117
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-phycore-som.dtsi126
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-var-som-symphony.dts351
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-var-som.dtsi110
-rw-r--r--arch/arm64/boot/dts/freescale/imx93.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/mba8mx.dtsi21
-rw-r--r--arch/arm64/boot/dts/freescale/mba8xx.dtsi554
-rw-r--r--arch/arm64/boot/dts/freescale/tqma8xx.dtsi265
-rw-r--r--arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi4
-rw-r--r--arch/arm64/boot/dts/lg/lg1312.dtsi1
-rw-r--r--arch/arm64/boot/dts/lg/lg1313.dtsi1
-rw-r--r--arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi31
-rw-r--r--arch/arm64/boot/dts/marvell/ac5-98dx35xx-rd.dts4
-rw-r--r--arch/arm64/boot/dts/marvell/armada-37xx.dtsi10
-rw-r--r--arch/arm64/boot/dts/marvell/armada-ap807.dtsi3
-rw-r--r--arch/arm64/boot/dts/marvell/armada-ap80x.dtsi1
-rw-r--r--arch/arm64/boot/dts/marvell/armada-cp11x.dtsi10
-rw-r--r--arch/arm64/boot/dts/mediatek/Makefile14
-rw-r--r--arch/arm64/boot/dts/mediatek/mt2712-evb.dts4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt2712e.dtsi2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt6797.dtsi8
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts13
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts25
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7981b-xiaomi-ax3000t.dts15
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7981b.dtsi105
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a-acelink-ew-7886cax.dts173
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3-nand.dtso2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a-rfb.dts31
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a.dtsi180
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986b-rfb.dts31
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7988a-bananapi-bpi-r4.dts11
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7988a.dtsi136
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173-elm-hana-rev7.dts2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi3
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173-evb.dts2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173.dtsi19
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi7
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183.dtsi11
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-krabby.dtsi129
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393216.dts39
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393217.dts39
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393218.dts26
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-rusty-sku196608.dts26
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix-sku131072.dts18
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix-sku131073.dts18
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix.dtsi199
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacool-sku327681.dts57
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacool-sku327683.dts24
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacruel-sku262144.dts44
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacruel-sku262148.dts26
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi1681
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186.dtsi93
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi7
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8192.dtsi10
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi27
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-demo.dts19
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-evb.dts12
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195.dtsi128
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts17
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8395-radxa-nio-12l.dts825
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra132-norrin.dts1
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi50
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts51
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi27
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra234-p3701.dtsi1953
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts3
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra234-p3767-0000.dtsi14
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra234-p3767-0005.dtsi14
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra234-p3767.dtsi86
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra234-p3768-0000+p3767-0000.dts7
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra234-p3768-0000+p3767-0005.dts12
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts1
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra234.dtsi2144
-rw-r--r--arch/arm64/boot/dts/qcom/Makefile6
-rw-r--r--arch/arm64/boot/dts/qcom/apq8016-sbc-d3-camera-mezzanine.dts8
-rw-r--r--arch/arm64/boot/dts/qcom/ipq5332.dtsi8
-rw-r--r--arch/arm64/boot/dts/qcom/ipq6018.dtsi167
-rw-r--r--arch/arm64/boot/dts/qcom/ipq8074.dtsi32
-rw-r--r--arch/arm64/boot/dts/qcom/ipq9574.dtsi12
-rw-r--r--arch/arm64/boot/dts/qcom/msm8216-samsung-fortuna3g.dts11
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916-samsung-fortuna-common.dtsi203
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916-samsung-gprimeltecan.dts27
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916-samsung-grandprimelte.dts16
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916-samsung-rossa-common.dtsi16
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916-samsung-rossa.dts16
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916.dtsi9
-rw-r--r--arch/arm64/boot/dts/qcom/msm8939.dtsi11
-rw-r--r--arch/arm64/boot/dts/qcom/msm8953.dtsi155
-rw-r--r--arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/msm8994-sony-xperia-kitakami.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/msm8994.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/msm8996.dtsi57
-rw-r--r--arch/arm64/boot/dts/qcom/msm8998.dtsi26
-rw-r--r--arch/arm64/boot/dts/qcom/pm4125.dtsi (renamed from arch/arm64/boot/dts/qcom/pm2250.dtsi)38
-rw-r--r--arch/arm64/boot/dts/qcom/pmi632.dtsi39
-rw-r--r--arch/arm64/boot/dts/qcom/qcm2290.dtsi7
-rw-r--r--arch/arm64/boot/dts/qcom/qcm6490-fairphone-fp5.dts56
-rw-r--r--arch/arm64/boot/dts/qcom/qcm6490-idp.dts39
-rw-r--r--arch/arm64/boot/dts/qcom/qcs404.dtsi16
-rw-r--r--arch/arm64/boot/dts/qcom/qcs6490-rb3gen2.dts23
-rw-r--r--arch/arm64/boot/dts/qcom/qrb2210-rb1.dts96
-rw-r--r--arch/arm64/boot/dts/qcom/qrb4210-rb2.dts50
-rw-r--r--arch/arm64/boot/dts/qcom/sa8295p-adp.dts68
-rw-r--r--arch/arm64/boot/dts/qcom/sa8540p-ride.dts4
-rw-r--r--arch/arm64/boot/dts/qcom/sa8540p.dtsi3
-rw-r--r--arch/arm64/boot/dts/qcom/sa8775p.dtsi119
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180.dtsi86
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi28
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280-idp-ec-h1.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280.dtsi125
-rw-r--r--arch/arm64/boot/dts/qcom/sc8180x.dtsi141
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-crd.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts41
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-pmics.dtsi39
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp.dtsi601
-rw-r--r--arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts5
-rw-r--r--arch/arm64/boot/dts/qcom/sdm450-motorola-ali.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/sdm450.dtsi14
-rw-r--r--arch/arm64/boot/dts/qcom/sdm630-sony-xperia-nile.dtsi16
-rw-r--r--arch/arm64/boot/dts/qcom/sdm630.dtsi62
-rw-r--r--arch/arm64/boot/dts/qcom/sdm632.dtsi8
-rw-r--r--arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts6
-rw-r--r--arch/arm64/boot/dts/qcom/sdm670.dtsi14
-rw-r--r--arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/sdm845-db845c.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi8
-rw-r--r--arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/sdm845.dtsi63
-rw-r--r--arch/arm64/boot/dts/qcom/sm4450.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sm6115.dtsi98
-rw-r--r--arch/arm64/boot/dts/qcom/sm6125.dtsi17
-rw-r--r--arch/arm64/boot/dts/qcom/sm6350.dtsi597
-rw-r--r--arch/arm64/boot/dts/qcom/sm6375.dtsi12
-rw-r--r--arch/arm64/boot/dts/qcom/sm7125-xiaomi-common.dtsi26
-rw-r--r--arch/arm64/boot/dts/qcom/sm7125-xiaomi-curtana.dts16
-rw-r--r--arch/arm64/boot/dts/qcom/sm7225-fairphone-fp4.dts61
-rw-r--r--arch/arm64/boot/dts/qcom/sm8150.dtsi115
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-common.dtsi3
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250.dtsi107
-rw-r--r--arch/arm64/boot/dts/qcom/sm8350.dtsi87
-rw-r--r--arch/arm64/boot/dts/qcom/sm8450-hdk.dts6
-rw-r--r--arch/arm64/boot/dts/qcom/sm8450.dtsi81
-rw-r--r--arch/arm64/boot/dts/qcom/sm8550-hdk.dts1306
-rw-r--r--arch/arm64/boot/dts/qcom/sm8550-mtp.dts11
-rw-r--r--arch/arm64/boot/dts/qcom/sm8550-qrd.dts53
-rw-r--r--arch/arm64/boot/dts/qcom/sm8550.dtsi189
-rw-r--r--arch/arm64/boot/dts/qcom/sm8650-mtp.dts157
-rw-r--r--arch/arm64/boot/dts/qcom/sm8650-qrd.dts441
-rw-r--r--arch/arm64/boot/dts/qcom/sm8650.dtsi83
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-crd.dts450
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-qcp.dts175
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100.dtsi1777
-rw-r--r--arch/arm64/boot/dts/renesas/Makefile8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774a1.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774b1.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774c0.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774e1.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77951.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77960.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77961.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77965.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77970.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77980.dtsi17
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77990.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77995.dtsi11
-rw-r--r--arch/arm64/boot/dts/renesas/r8a779a0.dtsi21
-rw-r--r--arch/arm64/boot/dts/renesas/r8a779f0.dtsi17
-rw-r--r--arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-cpu.dts13
-rw-r--r--arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-cpu.dtsi368
-rw-r--r--arch/arm64/boot/dts/renesas/r8a779g0-white-hawk.dts58
-rw-r--r--arch/arm64/boot/dts/renesas/r8a779g0.dtsi105
-rw-r--r--arch/arm64/boot/dts/renesas/r8a779g2-white-hawk-single.dts26
-rw-r--r--arch/arm64/boot/dts/renesas/r8a779g2.dtsi12
-rw-r--r--arch/arm64/boot/dts/renesas/r8a779h0-gray-hawk-single.dts230
-rw-r--r--arch/arm64/boot/dts/renesas/r8a779h0.dtsi664
-rw-r--r--arch/arm64/boot/dts/renesas/r9a07g043u.dtsi81
-rw-r--r--arch/arm64/boot/dts/renesas/r9a07g043u11-smarc-cru-csi-ov5645.dtso21
-rw-r--r--arch/arm64/boot/dts/renesas/r9a07g044.dtsi68
-rw-r--r--arch/arm64/boot/dts/renesas/r9a07g054.dtsi69
-rw-r--r--arch/arm64/boot/dts/renesas/r9a08g045.dtsi27
-rw-r--r--arch/arm64/boot/dts/renesas/rzg2l-smarc.dtsi14
-rw-r--r--arch/arm64/boot/dts/renesas/rzg2lc-smarc.dtsi14
-rw-r--r--arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi9
-rw-r--r--arch/arm64/boot/dts/renesas/rzg3s-smarc.dtsi53
-rw-r--r--arch/arm64/boot/dts/renesas/ulcb-kf.dtsi79
-rw-r--r--arch/arm64/boot/dts/renesas/white-hawk-common.dtsi65
-rw-r--r--arch/arm64/boot/dts/renesas/white-hawk-cpu-common.dtsi375
-rw-r--r--arch/arm64/boot/dts/renesas/white-hawk-csi-dsi.dtsi (renamed from arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-csi-dsi.dtsi)2
-rw-r--r--arch/arm64/boot/dts/renesas/white-hawk-ethernet.dtsi (renamed from arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-ethernet.dtsi)2
-rw-r--r--arch/arm64/boot/dts/rockchip/Makefile11
-rw-r--r--arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi6
-rw-r--r--arch/arm64/boot/dts/rockchip/px30.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi12
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4c.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399.dtsi82
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc-d.dts60
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc-s.dts19
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc.dtsi237
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353x.dtsi74
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg503.dts74
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi74
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-pinetab2-v0.1.dts28
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-pinetab2-v2.0.dts48
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi943
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rgb10max3.dts87
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rgb30.dts18
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rk2023.dts18
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rk2023.dtsi18
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts86
-rw-r--r--arch/arm64/boot/dts/rockchip/rk356x.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts8
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-common.dtsi466
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-io.dts10
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-io.dtsi232
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-wifi.dtso56
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a.dtsi25
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6b-io.dts76
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6b.dtsi383
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts39
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts8
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-tiger-haikou.dts266
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi690
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts688
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts18
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6c.dts14
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6s.dts764
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s.dtsi24
-rw-r--r--arch/arm64/boot/dts/st/stm32mp251.dtsi12
-rw-r--r--arch/arm64/boot/dts/st/stm32mp255.dtsi17
-rw-r--r--arch/arm64/boot/dts/tesla/fsd.dtsi2
-rw-r--r--arch/arm64/boot/dts/ti/Makefile55
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-lp-sk.dts4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-main.dtsi30
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-mcu.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-phycore-som.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-thermal.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi1
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-verdin-dev.dtsi1
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-verdin-mallow.dtsi10
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-verdin-wifi.dtsi1
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi59
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-wakeup.dtsi38
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am625-beagleplay-csi2-ov5640.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am625-beagleplay-csi2-tevi-ov5640.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts54
-rw-r--r--arch/arm64/boot/dts/ti/k3-am625-phyboard-lyra-rdk.dts6
-rw-r--r--arch/arm64/boot/dts/ti/k3-am625-sk.dts4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am625.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62a-main.dtsi80
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62a-mcu.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62a-thermal.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62a-wakeup.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62a.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62a7-sk.dts123
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62a7.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62p-main.dtsi154
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62p-mcu.dtsi6
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62p-thermal.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62p-wakeup.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62p.dtsi6
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62p5-sk.dts11
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62p5.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62x-phyboard-lyra-gpio-fan.dtso50
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62x-sk-common.dtsi8
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62x-sk-hdmi-audio.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am64-main.dtsi69
-rw-r--r--arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am64-phycore-som.dtsi13
-rw-r--r--arch/arm64/boot/dts/ti/k3-am64-thermal.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-am64.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am642-evm-icssg1-dualemac.dtso79
-rw-r--r--arch/arm64/boot/dts/ti/k3-am642-evm.dts119
-rw-r--r--arch/arm64/boot/dts/ti/k3-am642-hummingboard-t-pcie.dtso45
-rw-r--r--arch/arm64/boot/dts/ti/k3-am642-hummingboard-t-usb3.dtso44
-rw-r--r--arch/arm64/boot/dts/ti/k3-am642-hummingboard-t.dts292
-rw-r--r--arch/arm64/boot/dts/ti/k3-am642-phyboard-electra-rdk.dts30
-rw-r--r--arch/arm64/boot/dts/ti/k3-am642-sk.dts14
-rw-r--r--arch/arm64/boot/dts/ti/k3-am642-sr-som.dtsi594
-rw-r--r--arch/arm64/boot/dts/ti/k3-am642-tqma64xxl-mbax4xxl.dts1
-rw-r--r--arch/arm64/boot/dts/ti/k3-am642.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-iot2050-arduino-connector.dtsi768
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg1.dtsi7
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg2.dtsi27
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-iot2050-common.dtsi887
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-iot2050-dp.dtsi98
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-iot2050-usb3.dtsi27
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-main.dtsi44
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am652.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-common.dtsi8
-rw-r--r--arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-pg2.dts4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic.dts7
-rw-r--r--arch/arm64/boot/dts/ti/k3-am654-base-board-rocktech-rk101-panel.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am654-base-board.dts8
-rw-r--r--arch/arm64/boot/dts/ti/k3-am654-icssg2.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am654-idk.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am654-industrial-thermal.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-am654-pcie-usb2.dtso59
-rw-r--r--arch/arm64/boot/dts/ti/k3-am654-pcie-usb3.dtso61
-rw-r--r--arch/arm64/boot/dts/ti/k3-am654.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-common.dtsi2
-rw-r--r--arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-m2.dts22
-rw-r--r--arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-pg2.dts12
-rw-r--r--arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-sm.dts189
-rw-r--r--arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced.dts3
-rw-r--r--arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts54
-rw-r--r--arch/arm64/boot/dts/ti/k3-am68-sk-som.dtsi20
-rw-r--r--arch/arm64/boot/dts/ti/k3-am69-sk.dts253
-rw-r--r--arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts109
-rw-r--r--arch/arm64/boot/dts/ti/k3-j7200-evm-quad-port-eth-exp.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j7200-main.dtsi315
-rw-r--r--arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi57
-rw-r--r--arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi47
-rw-r--r--arch/arm64/boot/dts/ti/k3-j7200-thermal.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-j7200.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-beagleboneai64.dts26
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-evm-gesi-exp-board.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-evm-pcie0-ep.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-evm-quad-port-eth-exp.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-main.dtsi149
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi8
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso165
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-sk.dts45
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-som-p0.dtsi22
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-thermal.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts31
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2-evm-gesi-exp-board.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2-evm-pcie1-ep.dtso4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi143
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi6
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2-som-p0.dtsi20
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2-thermal.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j722s-evm.dts383
-rw-r--r--arch/arm64/boot/dts/ti/k3-j722s.dtsi89
-rw-r--r--arch/arm64/boot/dts/ti/k3-j784s4-evm.dts32
-rw-r--r--arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi215
-rw-r--r--arch/arm64/boot/dts/ti/k3-j784s4-mcu-wakeup.dtsi6
-rw-r--r--arch/arm64/boot/dts/ti/k3-j784s4-thermal.dtsi5
-rw-r--r--arch/arm64/boot/dts/ti/k3-j784s4.dtsi6
-rw-r--r--arch/arm64/boot/dts/ti/k3-pinctrl.h7
-rw-r--r--arch/arm64/boot/dts/ti/k3-serdes.h4
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-clk-ccf.dtsi16
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revA.dtso36
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revB.dtso37
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm015-dc1.dts2
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts2
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm019-dc5.dts4
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts2
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-zcu102-revA.dts6
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revA.dts2
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revC.dts2
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-zcu106-revA.dts6
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-zcu111-revA.dts4
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-zcu1275-revA.dts2
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp.dtsi85
-rw-r--r--arch/arm64/configs/defconfig35
-rw-r--r--arch/arm64/configs/virt.config4
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c11
-rw-r--r--arch/arm64/include/asm/alternative-macros.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h2
-rw-r--r--arch/arm64/include/asm/cputype.h4
-rw-r--r--arch/arm64/include/asm/fpsimd.h14
-rw-r--r--arch/arm64/include/asm/jump_label.h4
-rw-r--r--arch/arm64/include/asm/page-def.h2
-rw-r--r--arch/arm64/include/asm/patching.h2
-rw-r--r--arch/arm64/include/asm/vdso.h3
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h3
-rw-r--r--arch/arm64/kernel/Makefile6
-rw-r--r--arch/arm64/kernel/cpu_errata.c3
-rw-r--r--arch/arm64/kernel/fpsimd.c18
-rw-r--r--arch/arm64/kernel/patching.c75
-rw-r--r--arch/arm64/kernel/ptrace.c3
-rw-r--r--arch/arm64/kernel/signal.c4
-rw-r--r--arch/arm64/kernel/stacktrace.c28
-rw-r--r--arch/arm64/kernel/suspend.c3
-rw-r--r--arch/arm64/kernel/vdso.c5
-rw-r--r--arch/arm64/kernel/vdso32/Makefile9
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c2
-rw-r--r--arch/arm64/kvm/pkvm.c27
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c5
-rw-r--r--arch/arm64/net/bpf_jit_comp.c286
-rw-r--r--arch/csky/Kconfig1
-rw-r--r--arch/csky/include/asm/jump_label.h4
-rw-r--r--arch/csky/include/asm/page.h7
-rw-r--r--arch/csky/include/asm/vdso.h5
-rw-r--r--arch/csky/kernel/smp.c4
-rw-r--r--arch/csky/kernel/vdso.c14
-rw-r--r--arch/hexagon/Kconfig24
-rw-r--r--arch/hexagon/include/asm/page.h12
-rw-r--r--arch/hexagon/kernel/smp.c4
-rw-r--r--arch/loongarch/Kconfig44
-rw-r--r--arch/loongarch/boot/dts/loongson-2k0500-ref.dts2
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000-ref.dts2
-rw-r--r--arch/loongarch/include/asm/acpi.h4
-rw-r--r--arch/loongarch/include/asm/jump_label.h4
-rw-r--r--arch/loongarch/include/asm/kvm_vcpu.h4
-rw-r--r--arch/loongarch/include/asm/page.h10
-rw-r--r--arch/loongarch/kernel/acpi.c4
-rw-r--r--arch/loongarch/kernel/setup.c6
-rw-r--r--arch/loongarch/kernel/smp.c123
-rw-r--r--arch/loongarch/kernel/vdso.c6
-rw-r--r--arch/loongarch/kvm/mmu.c4
-rw-r--r--arch/loongarch/kvm/vcpu.c81
-rw-r--r--arch/loongarch/mm/kasan_init.c3
-rw-r--r--arch/loongarch/mm/tlb.c16
-rw-r--r--arch/loongarch/vdso/Makefile1
-rw-r--r--arch/m68k/Kconfig3
-rw-r--r--arch/m68k/Kconfig.cpu2
-rw-r--r--arch/m68k/Makefile4
-rw-r--r--arch/m68k/configs/amiga_defconfig3
-rw-r--r--arch/m68k/configs/apollo_defconfig3
-rw-r--r--arch/m68k/configs/atari_defconfig3
-rw-r--r--arch/m68k/configs/bvme6000_defconfig3
-rw-r--r--arch/m68k/configs/hp300_defconfig3
-rw-r--r--arch/m68k/configs/mac_defconfig3
-rw-r--r--arch/m68k/configs/multi_defconfig3
-rw-r--r--arch/m68k/configs/mvme147_defconfig3
-rw-r--r--arch/m68k/configs/mvme16x_defconfig3
-rw-r--r--arch/m68k/configs/q40_defconfig3
-rw-r--r--arch/m68k/configs/sun3_defconfig3
-rw-r--r--arch/m68k/configs/sun3x_defconfig3
-rw-r--r--arch/m68k/emu/nfblock.c10
-rw-r--r--arch/m68k/include/asm/page.h6
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/include/asm/page.h2
-rw-r--r--arch/mips/Kconfig60
-rw-r--r--arch/mips/alchemy/common/prom.c1
-rw-r--r--arch/mips/alchemy/common/setup.c4
-rw-r--r--arch/mips/bcm63xx/boards/board_bcm963xx.c2
-rw-r--r--arch/mips/bcm63xx/dev-rng.c2
-rw-r--r--arch/mips/bcm63xx/dev-uart.c1
-rw-r--r--arch/mips/bcm63xx/dev-wdt.c2
-rw-r--r--arch/mips/bcm63xx/irq.c2
-rw-r--r--arch/mips/bcm63xx/setup.c2
-rw-r--r--arch/mips/bcm63xx/timer.c2
-rw-r--r--arch/mips/cobalt/setup.c3
-rw-r--r--arch/mips/fw/arc/memory.c2
-rw-r--r--arch/mips/include/asm/checksum.h3
-rw-r--r--arch/mips/include/asm/jump_label.h4
-rw-r--r--arch/mips/include/asm/mach-au1x00/au1000.h3
-rw-r--r--arch/mips/include/asm/mach-cobalt/cobalt.h3
-rw-r--r--arch/mips/include/asm/page.h16
-rw-r--r--arch/mips/include/asm/ptrace.h3
-rw-r--r--arch/mips/include/asm/vdso.h5
-rw-r--r--arch/mips/kernel/elf.c6
-rw-r--r--arch/mips/kernel/ptrace.c7
-rw-r--r--arch/mips/kernel/traps.c8
-rw-r--r--arch/mips/kernel/vdso.c2
-rw-r--r--arch/mips/lantiq/prom.c7
-rw-r--r--arch/mips/loongson64/init.c5
-rw-r--r--arch/mips/loongson64/numa.c2
-rw-r--r--arch/mips/sgi-ip27/Makefile2
-rw-r--r--arch/mips/sgi-ip27/ip27-berr.c4
-rw-r--r--arch/mips/sgi-ip27/ip27-common.h2
-rw-r--r--arch/mips/sgi-ip27/ip27-hubio.c185
-rw-r--r--arch/mips/sgi-ip27/ip27-irq.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c1
-rw-r--r--arch/mips/sgi-ip27/ip27-nmi.c25
-rw-r--r--arch/mips/sgi-ip30/ip30-console.c1
-rw-r--r--arch/mips/sgi-ip30/ip30-setup.c1
-rw-r--r--arch/mips/sgi-ip32/crime.c6
-rw-r--r--arch/mips/sgi-ip32/ip32-berr.c2
-rw-r--r--arch/mips/sgi-ip32/ip32-common.h15
-rw-r--r--arch/mips/sgi-ip32/ip32-irq.c6
-rw-r--r--arch/mips/sgi-ip32/ip32-memory.c1
-rw-r--r--arch/mips/sgi-ip32/ip32-reset.c2
-rw-r--r--arch/mips/sgi-ip32/ip32-setup.c3
-rw-r--r--arch/nios2/Kconfig1
-rw-r--r--arch/nios2/include/asm/page.h2
-rw-r--r--arch/openrisc/Kconfig1
-rw-r--r--arch/openrisc/include/asm/page.h7
-rw-r--r--arch/openrisc/kernel/smp.c4
-rw-r--r--arch/parisc/Kconfig6
-rw-r--r--arch/parisc/Makefile4
-rw-r--r--arch/parisc/include/asm/assembly.h1
-rw-r--r--arch/parisc/include/asm/extable.h64
-rw-r--r--arch/parisc/include/asm/jump_label.h4
-rw-r--r--arch/parisc/include/asm/kprobes.h3
-rw-r--r--arch/parisc/include/asm/page.h10
-rw-r--r--arch/parisc/include/asm/special_insns.h6
-rw-r--r--arch/parisc/include/asm/uaccess.h48
-rw-r--r--arch/parisc/kernel/cache.c10
-rw-r--r--arch/parisc/kernel/drivers.c5
-rw-r--r--arch/parisc/kernel/ftrace.c2
-rw-r--r--arch/parisc/kernel/processor.c8
-rw-r--r--arch/parisc/kernel/unaligned.c44
-rw-r--r--arch/parisc/kernel/unwind.c14
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S2
-rw-r--r--arch/parisc/mm/fault.c11
-rw-r--r--arch/powerpc/Kconfig33
-rw-r--r--arch/powerpc/include/asm/ftrace.h10
-rw-r--r--arch/powerpc/include/asm/jump_label.h4
-rw-r--r--arch/powerpc/include/asm/page.h2
-rw-r--r--arch/powerpc/include/asm/papr-sysparm.h2
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h10
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/rtas.h4
-rw-r--r--arch/powerpc/include/asm/sections.h1
-rw-r--r--arch/powerpc/include/asm/thread_info.h2
-rw-r--r--arch/powerpc/include/asm/uaccess.h12
-rw-r--r--arch/powerpc/include/asm/word-at-a-time.h4
-rw-r--r--arch/powerpc/include/uapi/asm/papr-sysparm.h2
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S20
-rw-r--r--arch/powerpc/kernel/cpu_specs_e500mc.h3
-rw-r--r--arch/powerpc/kernel/interrupt_64.S4
-rw-r--r--arch/powerpc/kernel/iommu.c64
-rw-r--r--arch/powerpc/kernel/irq_64.c2
-rw-r--r--arch/powerpc/kernel/isa-bridge.c4
-rw-r--r--arch/powerpc/kernel/rtas.c9
-rw-r--r--arch/powerpc/kernel/smp.c6
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c12
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg.c5
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c26
-rw-r--r--arch/powerpc/kvm/book3s_hv_nestedv2.c20
-rw-r--r--arch/powerpc/mm/kasan/init_32.c1
-rw-r--r--arch/powerpc/platforms/85xx/mpc8536_ds.c2
-rw-r--r--arch/powerpc/platforms/85xx/mvme2500.c2
-rw-r--r--arch/powerpc/platforms/85xx/p1010rdb.c2
-rw-r--r--arch/powerpc/platforms/85xx/p1022_ds.c6
-rw-r--r--arch/powerpc/platforms/85xx/p1022_rdk.c6
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.c2
-rw-r--r--arch/powerpc/platforms/85xx/xes_mpc85xx.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c156
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c8
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c4
-rw-r--r--arch/powerpc/sysdev/udbg_memcons.c6
-rw-r--r--arch/riscv/Kconfig18
-rw-r--r--arch/riscv/Kconfig.socs32
-rw-r--r--arch/riscv/boot/dts/Makefile2
-rw-r--r--arch/riscv/boot/dts/canaan/Makefile2
-rw-r--r--arch/riscv/boot/dts/microchip/Makefile1
-rw-r--r--arch/riscv/boot/dts/sifive/Makefile1
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts1
-rw-r--r--arch/riscv/boot/dts/sophgo/sg2042.dtsi89
-rw-r--r--arch/riscv/boot/dts/starfive/jh7100.dtsi16
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110.dtsi4
-rw-r--r--arch/riscv/configs/nommu_k210_defconfig2
-rw-r--r--arch/riscv/configs/nommu_k210_sdcard_defconfig2
-rw-r--r--arch/riscv/include/asm/arch_hweight.h4
-rw-r--r--arch/riscv/include/asm/bitops.h8
-rw-r--r--arch/riscv/include/asm/cfi.h17
-rw-r--r--arch/riscv/include/asm/checksum.h2
-rw-r--r--arch/riscv/include/asm/cpufeature.h4
-rw-r--r--arch/riscv/include/asm/csr.h2
-rw-r--r--arch/riscv/include/asm/ftrace.h5
-rw-r--r--arch/riscv/include/asm/hugetlb.h5
-rw-r--r--arch/riscv/include/asm/hwcap.h2
-rw-r--r--arch/riscv/include/asm/jump_label.h4
-rw-r--r--arch/riscv/include/asm/page.h2
-rw-r--r--arch/riscv/include/asm/pgalloc.h20
-rw-r--r--arch/riscv/include/asm/pgtable-64.h2
-rw-r--r--arch/riscv/include/asm/pgtable.h6
-rw-r--r--arch/riscv/include/asm/stacktrace.h5
-rw-r--r--arch/riscv/include/asm/suspend.h1
-rw-r--r--arch/riscv/include/asm/tlb.h2
-rw-r--r--arch/riscv/include/asm/tlbflush.h1
-rw-r--r--arch/riscv/include/asm/vmalloc.h61
-rw-r--r--arch/riscv/include/asm/word-at-a-time.h3
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h27
-rw-r--r--arch/riscv/kernel/Makefile2
-rw-r--r--arch/riscv/kernel/cfi.c53
-rw-r--r--arch/riscv/kernel/cpufeature.c31
-rw-r--r--arch/riscv/kernel/paravirt.c6
-rw-r--r--arch/riscv/kernel/return_address.c48
-rw-r--r--arch/riscv/kernel/smpboot.c4
-rw-r--r--arch/riscv/kernel/suspend.c4
-rw-r--r--arch/riscv/kernel/vdso.c10
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c54
-rw-r--r--arch/riscv/kvm/vcpu_sbi_sta.c20
-rw-r--r--arch/riscv/lib/csum.c10
-rw-r--r--arch/riscv/mm/hugetlbpage.c80
-rw-r--r--arch/riscv/mm/init.c4
-rw-r--r--arch/riscv/mm/tlbflush.c4
-rw-r--r--arch/riscv/net/bpf_jit.h136
-rw-r--r--arch/riscv/net/bpf_jit_comp32.c2
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c234
-rw-r--r--arch/riscv/net/bpf_jit_core.c9
-rw-r--r--arch/s390/Kconfig21
-rw-r--r--arch/s390/Makefile10
-rw-r--r--arch/s390/boot/.gitignore1
-rw-r--r--arch/s390/boot/Makefile25
-rw-r--r--arch/s390/boot/boot.h6
-rw-r--r--arch/s390/boot/startup.c75
-rw-r--r--arch/s390/boot/vmlinux.lds.S48
-rw-r--r--arch/s390/configs/compat.config3
-rw-r--r--arch/s390/configs/debug_defconfig11
-rw-r--r--arch/s390/configs/defconfig10
-rw-r--r--arch/s390/configs/zfcpdump_defconfig2
-rw-r--r--arch/s390/crypto/chacha-glue.c4
-rw-r--r--arch/s390/crypto/chacha-s390.S2
-rw-r--r--arch/s390/crypto/crc32-vx.c11
-rw-r--r--arch/s390/crypto/crc32-vx.h12
-rw-r--r--arch/s390/crypto/crc32be-vx.c (renamed from arch/s390/crypto/crc32be-vx.S)177
-rw-r--r--arch/s390/crypto/crc32le-vx.c (renamed from arch/s390/crypto/crc32le-vx.S)249
-rw-r--r--arch/s390/crypto/paes_s390.c16
-rw-r--r--arch/s390/hypfs/hypfs_diag0c.c3
-rw-r--r--arch/s390/hypfs/hypfs_sprp.c4
-rw-r--r--arch/s390/include/asm/access-regs.h38
-rw-r--r--arch/s390/include/asm/appldata.h4
-rw-r--r--arch/s390/include/asm/asm-prototypes.h2
-rw-r--r--arch/s390/include/asm/bug.h4
-rw-r--r--arch/s390/include/asm/checksum.h29
-rw-r--r--arch/s390/include/asm/diag.h15
-rw-r--r--arch/s390/include/asm/entry-common.h5
-rw-r--r--arch/s390/include/asm/fpu-insn-asm.h (renamed from arch/s390/include/asm/vx-insn-asm.h)71
-rw-r--r--arch/s390/include/asm/fpu-insn.h486
-rw-r--r--arch/s390/include/asm/fpu-types.h51
-rw-r--r--arch/s390/include/asm/fpu.h295
-rw-r--r--arch/s390/include/asm/fpu/api.h126
-rw-r--r--arch/s390/include/asm/fpu/internal.h67
-rw-r--r--arch/s390/include/asm/fpu/types.h38
-rw-r--r--arch/s390/include/asm/jump_label.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h5
-rw-r--r--arch/s390/include/asm/lowcore.h2
-rw-r--r--arch/s390/include/asm/page.h2
-rw-r--r--arch/s390/include/asm/pai.h3
-rw-r--r--arch/s390/include/asm/pci.h3
-rw-r--r--arch/s390/include/asm/physmem_info.h1
-rw-r--r--arch/s390/include/asm/processor.h11
-rw-r--r--arch/s390/include/asm/ptrace.h4
-rw-r--r--arch/s390/include/asm/stacktrace.h1
-rw-r--r--arch/s390/include/asm/switch_to.h49
-rw-r--r--arch/s390/include/asm/vdso/data.h1
-rw-r--r--arch/s390/include/asm/vx-insn.h19
-rw-r--r--arch/s390/include/asm/word-at-a-time.h3
-rw-r--r--arch/s390/kernel/cache.c1
-rw-r--r--arch/s390/kernel/compat_signal.c22
-rw-r--r--arch/s390/kernel/crash_dump.c2
-rw-r--r--arch/s390/kernel/diag.c31
-rw-r--r--arch/s390/kernel/early.c3
-rw-r--r--arch/s390/kernel/entry.S19
-rw-r--r--arch/s390/kernel/entry.h1
-rw-r--r--arch/s390/kernel/fpu.c372
-rw-r--r--arch/s390/kernel/ipl.c3
-rw-r--r--arch/s390/kernel/machine_kexec.c3
-rw-r--r--arch/s390/kernel/nmi.c168
-rw-r--r--arch/s390/kernel/os_info.c6
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c83
-rw-r--r--arch/s390/kernel/perf_pai_ext.c52
-rw-r--r--arch/s390/kernel/perf_regs.c10
-rw-r--r--arch/s390/kernel/process.c31
-rw-r--r--arch/s390/kernel/ptrace.c101
-rw-r--r--arch/s390/kernel/setup.c12
-rw-r--r--arch/s390/kernel/signal.c20
-rw-r--r--arch/s390/kernel/smp.c3
-rw-r--r--arch/s390/kernel/sysinfo.c27
-rw-r--r--arch/s390/kernel/text_amode31.S2
-rw-r--r--arch/s390/kernel/time.c6
-rw-r--r--arch/s390/kernel/traps.c12
-rw-r--r--arch/s390/kernel/uprobes.c1
-rw-r--r--arch/s390/kernel/vdso.c5
-rw-r--r--arch/s390/kernel/vdso32/Makefile2
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S1
-rw-r--r--arch/s390/kernel/vdso64/Makefile3
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S1
-rw-r--r--arch/s390/kernel/vmlinux.lds.S54
-rw-r--r--arch/s390/kvm/gaccess.c5
-rw-r--r--arch/s390/kvm/interrupt.c6
-rw-r--r--arch/s390/kvm/kvm-s390.c33
-rw-r--r--arch/s390/kvm/kvm-s390.h18
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/s390/kvm/vsie.c4
-rw-r--r--arch/s390/lib/Makefile1
-rw-r--r--arch/s390/lib/csum-partial.c91
-rw-r--r--arch/s390/mm/extmem.c4
-rw-r--r--arch/s390/mm/gmap.c1
-rw-r--r--arch/s390/mm/mmap.c19
-rw-r--r--arch/s390/pci/pci.c22
-rw-r--r--arch/s390/pci/pci_debug.c10
-rw-r--r--arch/s390/pci/pci_event.c15
-rw-r--r--arch/s390/pci/pci_sysfs.c70
-rw-r--r--arch/s390/tools/.gitignore1
-rw-r--r--arch/s390/tools/Makefile5
-rw-r--r--arch/s390/tools/relocs.c387
-rw-r--r--arch/sh/boot/compressed/Makefile1
-rw-r--r--arch/sh/include/asm/page.h13
-rw-r--r--arch/sh/include/asm/word-at-a-time.h2
-rw-r--r--arch/sh/mm/Kconfig42
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/Makefile2
-rw-r--r--arch/sparc/include/asm/jump_label.h4
-rw-r--r--arch/sparc/include/asm/page_32.h2
-rw-r--r--arch/sparc/include/asm/page_64.h3
-rw-r--r--arch/sparc/kernel/smp_64.c4
-rw-r--r--arch/sparc/vdso/Makefile1
-rw-r--r--arch/sparc/video/Makefile2
-rw-r--r--arch/um/Kconfig1
-rw-r--r--arch/um/Makefile4
-rw-r--r--arch/um/drivers/net_kern.c2
-rw-r--r--arch/um/drivers/ubd_kern.c135
-rw-r--r--arch/um/drivers/vector_kern.c2
-rw-r--r--arch/um/drivers/vector_user.c4
-rw-r--r--arch/um/include/asm/cpufeature.h2
-rw-r--r--arch/um/include/asm/page.h2
-rw-r--r--arch/um/include/shared/user.h3
-rw-r--r--arch/um/os-Linux/drivers/ethertap_user.c2
-rw-r--r--arch/um/os-Linux/drivers/tuntap_user.c2
-rw-r--r--arch/um/os-Linux/umid.c6
-rw-r--r--arch/x86/Kbuild2
-rw-r--r--arch/x86/Kconfig91
-rw-r--r--arch/x86/Kconfig.cpu2
-rw-r--r--arch/x86/Makefile24
-rw-r--r--arch/x86/boot/compressed/acpi.c2
-rw-r--r--arch/x86/boot/compressed/cmdline.c2
-rw-r--r--arch/x86/boot/compressed/efi.c2
-rw-r--r--arch/x86/boot/compressed/efi.h9
-rw-r--r--arch/x86/boot/compressed/ident_map_64.c6
-rw-r--r--arch/x86/boot/compressed/misc.c57
-rw-r--r--arch/x86/boot/compressed/misc.h3
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c1
-rw-r--r--arch/x86/boot/compressed/sev.c10
-rw-r--r--arch/x86/boot/header.S18
-rw-r--r--arch/x86/boot/setup.ld6
-rw-r--r--arch/x86/coco/core.c7
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/entry/Makefile5
-rw-r--r--arch/x86/entry/calling.h115
-rw-r--r--arch/x86/entry/entry.S26
-rw-r--r--arch/x86/entry/entry_32.S9
-rw-r--r--arch/x86/entry/entry_64.S40
-rw-r--r--arch/x86/entry/entry_64_compat.S1
-rw-r--r--arch/x86/entry/entry_64_fred.S131
-rw-r--r--arch/x86/entry/entry_fred.c294
-rw-r--r--arch/x86/entry/thunk_32.S34
-rw-r--r--arch/x86/entry/thunk_64.S33
-rw-r--r--arch/x86/entry/vdso/Makefile57
-rw-r--r--arch/x86/entry/vdso/vma.c57
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/events/amd/core.c2
-rw-r--r--arch/x86/events/amd/uncore.c2
-rw-r--r--arch/x86/events/intel/core.c1
-rw-r--r--arch/x86/events/intel/cstate.c2
-rw-r--r--arch/x86/events/intel/ds.c1
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/events/intel/uncore_nhmex.c4
-rw-r--r--arch/x86/events/intel/uncore_snb.c8
-rw-r--r--arch/x86/events/intel/uncore_snbep.c18
-rw-r--r--arch/x86/events/rapl.c2
-rw-r--r--arch/x86/hyperv/hv_vtl.c12
-rw-r--r--arch/x86/hyperv/ivm.c65
-rw-r--r--arch/x86/include/asm/apic.h22
-rw-r--r--arch/x86/include/asm/asm-prototypes.h1
-rw-r--r--arch/x86/include/asm/asm.h14
-rw-r--r--arch/x86/include/asm/barrier.h2
-rw-r--r--arch/x86/include/asm/coco.h12
-rw-r--r--arch/x86/include/asm/cpu.h10
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h10
-rw-r--r--arch/x86/include/asm/cpuid.h36
-rw-r--r--arch/x86/include/asm/current.h9
-rw-r--r--arch/x86/include/asm/debugreg.h24
-rw-r--r--arch/x86/include/asm/desc.h3
-rw-r--r--arch/x86/include/asm/disabled-features.h26
-rw-r--r--arch/x86/include/asm/efi.h14
-rw-r--r--arch/x86/include/asm/elf.h1
-rw-r--r--arch/x86/include/asm/entry-common.h1
-rw-r--r--arch/x86/include/asm/extable_fixup_types.h4
-rw-r--r--arch/x86/include/asm/fpu/sched.h10
-rw-r--r--arch/x86/include/asm/fred.h97
-rw-r--r--arch/x86/include/asm/fsgsbase.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/ia32.h4
-rw-r--r--arch/x86/include/asm/idtentry.h88
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/io.h2
-rw-r--r--arch/x86/include/asm/io_apic.h1
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/kexec.h1
-rw-r--r--arch/x86/include/asm/kmsan.h17
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/kvmclock.h2
-rw-r--r--arch/x86/include/asm/linkage.h16
-rw-r--r--arch/x86/include/asm/local.h16
-rw-r--r--arch/x86/include/asm/mem_encrypt.h23
-rw-r--r--arch/x86/include/asm/mpspec.h66
-rw-r--r--arch/x86/include/asm/msr-index.h87
-rw-r--r--arch/x86/include/asm/msr.h44
-rw-r--r--arch/x86/include/asm/nmi.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h80
-rw-r--r--arch/x86/include/asm/page.h6
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/pci.h13
-rw-r--r--arch/x86/include/asm/percpu.h189
-rw-r--r--arch/x86/include/asm/perf_event_p4.h4
-rw-r--r--arch/x86/include/asm/pgalloc.h2
-rw-r--r--arch/x86/include/asm/pgtable-3level.h2
-rw-r--r--arch/x86/include/asm/pgtable.h18
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/preempt.h2
-rw-r--r--arch/x86/include/asm/processor-flags.h2
-rw-r--r--arch/x86/include/asm/processor.h48
-rw-r--r--arch/x86/include/asm/prom.h4
-rw-r--r--arch/x86/include/asm/pti.h2
-rw-r--r--arch/x86/include/asm/ptrace.h104
-rw-r--r--arch/x86/include/asm/resctrl.h90
-rw-r--r--arch/x86/include/asm/rmwcc.h2
-rw-r--r--arch/x86/include/asm/set_memory.h1
-rw-r--r--arch/x86/include/asm/setup.h2
-rw-r--r--arch/x86/include/asm/setup_data.h32
-rw-r--r--arch/x86/include/asm/sev.h53
-rw-r--r--arch/x86/include/asm/smp.h11
-rw-r--r--arch/x86/include/asm/spec-ctrl.h2
-rw-r--r--arch/x86/include/asm/special_insns.h15
-rw-r--r--arch/x86/include/asm/static_call.h2
-rw-r--r--arch/x86/include/asm/switch_to.h8
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h25
-rw-r--r--arch/x86/include/asm/text-patching.h2
-rw-r--r--arch/x86/include/asm/thread_info.h12
-rw-r--r--arch/x86/include/asm/topology.h90
-rw-r--r--arch/x86/include/asm/trap_pf.h20
-rw-r--r--arch/x86/include/asm/trapnr.h12
-rw-r--r--arch/x86/include/asm/tsc.h3
-rw-r--r--arch/x86/include/asm/uaccess.h10
-rw-r--r--arch/x86/include/asm/uaccess_64.h18
-rw-r--r--arch/x86/include/asm/vmx.h17
-rw-r--r--arch/x86/include/asm/vsyscall.h10
-rw-r--r--arch/x86/include/asm/word-at-a-time.h3
-rw-r--r--arch/x86/include/asm/x86_init.h12
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h73
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h7
-rw-r--r--arch/x86/include/uapi/asm/setup_data.h83
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c59
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S24
-rw-r--r--arch/x86/kernel/alternative.c25
-rw-r--r--arch/x86/kernel/amd_nb.c4
-rw-r--r--arch/x86/kernel/apic/apic.c207
-rw-r--r--arch/x86/kernel/apic/apic_common.c15
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c16
-rw-r--r--arch/x86/kernel/apic/apic_noop.c5
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c19
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c20
-rw-r--r--arch/x86/kernel/apic/io_apic.c94
-rw-r--r--arch/x86/kernel/apic/local.h5
-rw-r--r--arch/x86/kernel/apic/probe_32.c9
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c4
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c12
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c69
-rw-r--r--arch/x86/kernel/apm_32.c29
-rw-r--r--arch/x86/kernel/asm-offsets.c2
-rw-r--r--arch/x86/kernel/callthunks.c36
-rw-r--r--arch/x86/kernel/cpu/Makefile15
-rw-r--r--arch/x86/kernel/cpu/acrn.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c209
-rw-r--r--arch/x86/kernel/cpu/bugs.c157
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c15
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/common.c226
-rw-r--r--arch/x86/kernel/cpu/cpu.h13
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c2
-rw-r--r--arch/x86/kernel/cpu/debugfs.c47
-rw-r--r--arch/x86/kernel/cpu/hygon.c129
-rw-r--r--arch/x86/kernel/cpu/intel.c197
-rw-r--r--arch/x86/kernel/cpu/intel_pconfig.c2
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c4
-rw-r--r--arch/x86/kernel/cpu/mce/core.c28
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c8
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c15
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c3
-rw-r--r--arch/x86/kernel/cpu/rdrand.c1
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c111
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c48
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h75
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c501
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c15
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c388
-rw-r--r--arch/x86/kernel/cpu/topology.c573
-rw-r--r--arch/x86/kernel/cpu/topology.h67
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c183
-rw-r--r--arch/x86/kernel/cpu/topology_common.c218
-rw-r--r--arch/x86/kernel/cpu/topology_ext.c130
-rw-r--r--arch/x86/kernel/cpu/zhaoxin.c4
-rw-r--r--arch/x86/kernel/crash.c4
-rw-r--r--arch/x86/kernel/devicetree.c4
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/e820.c8
-rw-r--r--arch/x86/kernel/espfix_64.c8
-rw-r--r--arch/x86/kernel/fpu/bugs.c2
-rw-r--r--arch/x86/kernel/fpu/signal.c13
-rw-r--r--arch/x86/kernel/fred.c59
-rw-r--r--arch/x86/kernel/ftrace.c3
-rw-r--r--arch/x86/kernel/head64.c179
-rw-r--r--arch/x86/kernel/head_32.S4
-rw-r--r--arch/x86/kernel/head_64.S151
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/idt.c4
-rw-r--r--arch/x86/kernel/irqinit.c7
-rw-r--r--arch/x86/kernel/jailhouse.c30
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c5
-rw-r--r--arch/x86/kernel/kprobes/opt.c2
-rw-r--r--arch/x86/kernel/kvm.c5
-rw-r--r--arch/x86/kernel/kvmclock.c4
-rw-r--r--arch/x86/kernel/ldt.c8
-rw-r--r--arch/x86/kernel/mpparse.c31
-rw-r--r--arch/x86/kernel/nmi.c55
-rw-r--r--arch/x86/kernel/process.c99
-rw-r--r--arch/x86/kernel/process_32.c7
-rw-r--r--arch/x86/kernel/process_64.c74
-rw-r--r--arch/x86/kernel/setup.c37
-rw-r--r--arch/x86/kernel/sev-shared.c139
-rw-r--r--arch/x86/kernel/sev.c68
-rw-r--r--arch/x86/kernel/sev_verify_cbit.S2
-rw-r--r--arch/x86/kernel/smp.c10
-rw-r--r--arch/x86/kernel/smpboot.c242
-rw-r--r--arch/x86/kernel/static_call.c2
-rw-r--r--arch/x86/kernel/step.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c7
-rw-r--r--arch/x86/kernel/traps.c78
-rw-r--r--arch/x86/kernel/tsc.c34
-rw-r--r--arch/x86/kernel/vmlinux.lds.S11
-rw-r--r--arch/x86/kernel/vsmp_64.c13
-rw-r--r--arch/x86/kernel/x86_init.c5
-rw-r--r--arch/x86/kvm/Kconfig7
-rw-r--r--arch/x86/kvm/hyperv.c50
-rw-r--r--arch/x86/kvm/hyperv.h3
-rw-r--r--arch/x86/kvm/lapic.c5
-rw-r--r--arch/x86/kvm/mmu/mmu.c48
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h2
-rw-r--r--arch/x86/kvm/svm/nested.c2
-rw-r--r--arch/x86/kvm/svm/sev.c60
-rw-r--r--arch/x86/kvm/svm/svm.c19
-rw-r--r--arch/x86/kvm/svm/svm.h1
-rw-r--r--arch/x86/kvm/svm/svm_ops.h6
-rw-r--r--arch/x86/kvm/svm/vmenter.S4
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c2
-rw-r--r--arch/x86/kvm/vmx/run_flags.h7
-rw-r--r--arch/x86/kvm/vmx/vmenter.S9
-rw-r--r--arch/x86/kvm/vmx/vmx.c38
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h6
-rw-r--r--arch/x86/kvm/x86.c44
-rw-r--r--arch/x86/lib/Makefile15
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S12
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S30
-rw-r--r--arch/x86/lib/getuser.S24
-rw-r--r--arch/x86/lib/insn-eval.c6
-rw-r--r--arch/x86/lib/insn.c58
-rw-r--r--arch/x86/lib/msr-smp.c12
-rw-r--r--arch/x86/lib/msr.c6
-rw-r--r--arch/x86/lib/putuser.S20
-rw-r--r--arch/x86/lib/retpoline.S41
-rw-r--r--arch/x86/lib/x86-opcode-map.txt4
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/amdtopology.c34
-rw-r--r--arch/x86/mm/debug_pagetables.c4
-rw-r--r--arch/x86/mm/dump_pagetables.c4
-rw-r--r--arch/x86/mm/extable.c78
-rw-r--r--arch/x86/mm/fault.c46
-rw-r--r--arch/x86/mm/ident_map.c23
-rw-r--r--arch/x86/mm/maccess.c10
-rw-r--r--arch/x86/mm/mem_encrypt.c55
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c108
-rw-r--r--arch/x86/mm/numa.c21
-rw-r--r--arch/x86/mm/pat/memtype.c9
-rw-r--r--arch/x86/mm/pat/set_memory.c43
-rw-r--r--arch/x86/mm/pgtable.c4
-rw-r--r--arch/x86/mm/tlb.c10
-rw-r--r--arch/x86/net/bpf_jit_comp.c240
-rw-r--r--arch/x86/net/bpf_jit_comp32.c2
-rw-r--r--arch/x86/platform/ce4100/ce4100.c14
-rw-r--r--arch/x86/platform/efi/efi.c5
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c5
-rw-r--r--arch/x86/platform/pvh/enlighten.c1
-rw-r--r--arch/x86/purgatory/Makefile2
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S33
-rw-r--r--arch/x86/tools/relocs.c8
-rw-r--r--arch/x86/virt/svm/Makefile3
-rw-r--r--arch/x86/virt/svm/sev.c560
-rw-r--r--arch/x86/xen/apic.c27
-rw-r--r--arch/x86/xen/enlighten_hvm.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c3
-rw-r--r--arch/x86/xen/enlighten_pvh.c1
-rw-r--r--arch/x86/xen/smp.c14
-rw-r--r--arch/x86/xen/smp.h2
-rw-r--r--arch/x86/xen/smp_pv.c69
-rw-r--r--arch/x86/xen/vga.c1
-rw-r--r--arch/x86/xen/xen-asm.S10
-rw-r--r--arch/x86/xen/xen-head.S2
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--arch/xtensa/include/asm/jump_label.h4
-rw-r--r--arch/xtensa/include/asm/page.h2
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c8
-rw-r--r--block/bdev.c254
-rw-r--r--block/bfq-cgroup.c14
-rw-r--r--block/bfq-iosched.c28
-rw-r--r--block/bio-integrity.c1
-rw-r--r--block/bio.c47
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-cgroup.h1
-rw-r--r--block/blk-core.c43
-rw-r--r--block/blk-crypto-fallback.c1
-rw-r--r--block/blk-flush.c2
-rw-r--r--block/blk-integrity.c1
-rw-r--r--block/blk-iocost.c15
-rw-r--r--block/blk-iolatency.c6
-rw-r--r--block/blk-lib.c70
-rw-r--r--block/blk-map.c13
-rw-r--r--block/blk-merge.c8
-rw-r--r--block/blk-mq.c198
-rw-r--r--block/blk-settings.c329
-rw-r--r--block/blk-stat.c2
-rw-r--r--block/blk-sysfs.c59
-rw-r--r--block/blk-throttle.c10
-rw-r--r--block/blk-wbt.c10
-rw-r--r--block/blk-zoned.c20
-rw-r--r--block/blk.h89
-rw-r--r--block/bounce.c1
-rw-r--r--block/bsg-lib.c2
-rw-r--r--block/fops.c51
-rw-r--r--block/genhd.c26
-rw-r--r--block/holder.c12
-rw-r--r--block/ioctl.c20
-rw-r--r--block/opal_proto.h1
-rw-r--r--block/partitions/core.c12
-rw-r--r--block/partitions/mac.c2
-rw-r--r--block/sed-opal.c22
-rw-r--r--block/t10-pi.c72
-rw-r--r--crypto/algif_hash.c5
-rw-r--r--crypto/cbc.c3
-rw-r--r--crypto/lskcipher.c6
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c20
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c131
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h5
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c1
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c142
-rw-r--r--drivers/accel/ivpu/ivpu_gem.h3
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx.c60
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx.c38
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c6
-rw-r--r--drivers/accel/ivpu/ivpu_job.c162
-rw-r--r--drivers/accel/ivpu/ivpu_job.h3
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c58
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.h1
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c9
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c91
-rw-r--r--drivers/accel/ivpu/ivpu_pm.h6
-rw-r--r--drivers/acpi/apei/ghes.c89
-rw-r--r--drivers/acpi/ec.c112
-rw-r--r--drivers/android/binder.c10
-rw-r--r--drivers/ata/Kconfig5
-rw-r--r--drivers/ata/ahci.c484
-rw-r--r--drivers/ata/ahci.h9
-rw-r--r--drivers/ata/ahci_ceva.c125
-rw-r--r--drivers/ata/libahci.c21
-rw-r--r--drivers/ata/libata-core.c59
-rw-r--r--drivers/ata/libata-sata.c2
-rw-r--r--drivers/ata/pata_parport/pata_parport.c2
-rw-r--r--drivers/atm/fore200e.c6
-rw-r--r--drivers/atm/idt77252.c2
-rw-r--r--drivers/base/arch_topology.c13
-rw-r--r--drivers/base/base.h2
-rw-r--r--drivers/base/core.c26
-rw-r--r--drivers/base/cpu.c3
-rw-r--r--drivers/base/platform-msi.c119
-rw-r--r--drivers/base/regmap/regmap-kunit.c57
-rw-r--r--drivers/bcma/main.c2
-rw-r--r--drivers/block/amiflop.c2
-rw-r--r--drivers/block/aoe/aoeblk.c20
-rw-r--r--drivers/block/aoe/aoecmd.c12
-rw-r--r--drivers/block/aoe/aoenet.c1
-rw-r--r--drivers/block/ataflop.c2
-rw-r--r--drivers/block/brd.c26
-rw-r--r--drivers/block/drbd/drbd_int.h4
-rw-r--r--drivers/block/drbd/drbd_main.c17
-rw-r--r--drivers/block/drbd/drbd_nl.c268
-rw-r--r--drivers/block/drbd/drbd_state.c24
-rw-r--r--drivers/block/drbd/drbd_state_change.h8
-rw-r--r--drivers/block/floppy.c17
-rw-r--r--drivers/block/loop.c75
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c13
-rw-r--r--drivers/block/n64cart.c12
-rw-r--r--drivers/block/nbd.c49
-rw-r--r--drivers/block/null_blk/main.c535
-rw-r--r--drivers/block/null_blk/null_blk.h24
-rw-r--r--drivers/block/null_blk/trace.h5
-rw-r--r--drivers/block/null_blk/zoned.c25
-rw-r--r--drivers/block/pktcdvd.c109
-rw-r--r--drivers/block/ps3disk.c17
-rw-r--r--drivers/block/ps3vram.c6
-rw-r--r--drivers/block/rbd.c63
-rw-r--r--drivers/block/rnbd/rnbd-clt.c64
-rw-r--r--drivers/block/rnbd/rnbd-srv.c28
-rw-r--r--drivers/block/rnbd/rnbd-srv.h2
-rw-r--r--drivers/block/sunvdc.c18
-rw-r--r--drivers/block/swim.c8
-rw-r--r--drivers/block/swim3.c2
-rw-r--r--drivers/block/ublk_drv.c111
-rw-r--r--drivers/block/virtio_blk.c310
-rw-r--r--drivers/block/xen-blkback/blkback.c4
-rw-r--r--drivers/block/xen-blkback/common.h4
-rw-r--r--drivers/block/xen-blkback/xenbus.c37
-rw-r--r--drivers/block/xen-blkfront.c53
-rw-r--r--drivers/block/z2ram.c2
-rw-r--r--drivers/block/zram/zram_drv.c77
-rw-r--r--drivers/block/zram/zram_drv.h2
-rw-r--r--drivers/bluetooth/btbcm.c12
-rw-r--r--drivers/bluetooth/btintel.c116
-rw-r--r--drivers/bluetooth/btmtk.c5
-rw-r--r--drivers/bluetooth/btmtk.h1
-rw-r--r--drivers/bluetooth/btnxpuart.c27
-rw-r--r--drivers/bluetooth/btqca.c2
-rw-r--r--drivers/bluetooth/btrtl.c14
-rw-r--r--drivers/bluetooth/btusb.c30
-rw-r--r--drivers/bluetooth/hci_bcm4377.c3
-rw-r--r--drivers/bluetooth/hci_h5.c5
-rw-r--r--drivers/bluetooth/hci_qca.c28
-rw-r--r--drivers/bluetooth/hci_serdev.c9
-rw-r--r--drivers/bluetooth/hci_uart.h12
-rw-r--r--drivers/bus/Kconfig5
-rw-r--r--drivers/bus/imx-weim.c2
-rw-r--r--drivers/bus/sunxi-rsb.c4
-rw-r--r--drivers/bus/ti-sysc.c2
-rw-r--r--drivers/cache/ax45mp_cache.c4
-rw-r--r--drivers/cdrom/gdrom.c20
-rw-r--r--drivers/clk/rockchip/clk-rk3588.c5
-rw-r--r--drivers/clk/rockchip/clk.c17
-rw-r--r--drivers/clk/rockchip/clk.h2
-rw-r--r--drivers/clk/samsung/clk-gs101.c2
-rw-r--r--drivers/clocksource/arm_arch_timer.c6
-rw-r--r--drivers/comedi/drivers/comedi_8255.c1
-rw-r--r--drivers/comedi/drivers/comedi_test.c30
-rw-r--r--drivers/connector/cn_proc.c5
-rw-r--r--drivers/counter/counter-core.c7
-rw-r--r--drivers/cpufreq/amd-pstate.c7
-rw-r--r--drivers/cpufreq/intel_pstate.c58
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c34
-rw-r--r--drivers/crypto/caam/caamalg_qi2.c7
-rw-r--r--drivers/crypto/caam/caamhash.c7
-rw-r--r--drivers/crypto/ccp/Kconfig2
-rw-r--r--drivers/crypto/ccp/sev-dev.c1160
-rw-r--r--drivers/crypto/ccp/sev-dev.h5
-rw-r--r--drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c1
-rw-r--r--drivers/crypto/rockchip/rk3288_crypto_ahash.c4
-rw-r--r--drivers/crypto/virtio/virtio_crypto_akcipher_algs.c5
-rw-r--r--drivers/cxl/acpi.c46
-rw-r--r--drivers/cxl/core/cdat.c86
-rw-r--r--drivers/cxl/core/mbox.c4
-rw-r--r--drivers/cxl/core/memdev.c63
-rw-r--r--drivers/cxl/core/pci.c49
-rw-r--r--drivers/cxl/core/region.c66
-rw-r--r--drivers/cxl/core/trace.h6
-rw-r--r--drivers/cxl/cxl.h2
-rw-r--r--drivers/cxl/cxlmem.h10
-rw-r--r--drivers/cxl/mem.c56
-rw-r--r--drivers/cxl/pci.c83
-rw-r--r--drivers/dax/super.c3
-rw-r--r--drivers/dma-buf/heaps/cma_heap.c7
-rw-r--r--drivers/dma/at_hdmac.c32
-rw-r--r--drivers/dma/dw-edma/dw-edma-v0-core.c17
-rw-r--r--drivers/dma/dw-edma/dw-hdma-v0-core.c39
-rw-r--r--drivers/dma/dw-edma/dw-hdma-v0-regs.h2
-rw-r--r--drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c10
-rw-r--r--drivers/dma/fsl-edma-common.c2
-rw-r--r--drivers/dma/fsl-edma-common.h5
-rw-r--r--drivers/dma/fsl-edma-main.c4
-rw-r--r--drivers/dma/fsl-qdma.c71
-rw-r--r--drivers/dma/idxd/cdev.c2
-rw-r--r--drivers/dma/idxd/debugfs.c2
-rw-r--r--drivers/dma/idxd/idxd.h1
-rw-r--r--drivers/dma/idxd/init.c15
-rw-r--r--drivers/dma/idxd/irq.c3
-rw-r--r--drivers/dma/mv_xor_v2.c8
-rw-r--r--drivers/dma/ptdma/ptdma-dmaengine.c2
-rw-r--r--drivers/dma/qcom/hidma.c6
-rw-r--r--drivers/dma/ti/edma.c10
-rw-r--r--drivers/dma/ti/k3-udma.c10
-rw-r--r--drivers/dpll/dpll_core.c101
-rw-r--r--drivers/dpll/dpll_core.h6
-rw-r--r--drivers/dpll/dpll_netlink.c124
-rw-r--r--drivers/dpll/dpll_nl.c4
-rw-r--r--drivers/dpll/dpll_nl.h2
-rw-r--r--drivers/edac/Kconfig1
-rw-r--r--drivers/edac/amd64_edac.c290
-rw-r--r--drivers/edac/i10nm_base.c1
-rw-r--r--drivers/edac/igen6_edac.c2
-rw-r--r--drivers/edac/mce_amd.c4
-rw-r--r--drivers/edac/synopsys_edac.c4
-rw-r--r--drivers/edac/versal_edac.c199
-rw-r--r--drivers/firewire/core-card.c32
-rw-r--r--drivers/firewire/core-device.c18
-rw-r--r--drivers/firewire/ohci.c2
-rw-r--r--drivers/firmware/arm_ffa/bus.c2
-rw-r--r--drivers/firmware/arm_ffa/driver.c85
-rw-r--r--drivers/firmware/arm_scmi/bus.c26
-rw-r--r--drivers/firmware/arm_scmi/clock.c197
-rw-r--r--drivers/firmware/arm_scmi/common.h3
-rw-r--r--drivers/firmware/arm_scmi/driver.c99
-rw-r--r--drivers/firmware/arm_scmi/mailbox.c14
-rw-r--r--drivers/firmware/arm_scmi/notify.c17
-rw-r--r--drivers/firmware/arm_scmi/notify.h4
-rw-r--r--drivers/firmware/arm_scmi/optee.c6
-rw-r--r--drivers/firmware/arm_scmi/perf.c170
-rw-r--r--drivers/firmware/arm_scmi/power.c30
-rw-r--r--drivers/firmware/arm_scmi/powercap.c45
-rw-r--r--drivers/firmware/arm_scmi/protocols.h5
-rw-r--r--drivers/firmware/arm_scmi/raw_mode.c12
-rw-r--r--drivers/firmware/arm_scmi/reset.c37
-rw-r--r--drivers/firmware/arm_scmi/sensors.c37
-rw-r--r--drivers/firmware/arm_scmi/shmem.c8
-rw-r--r--drivers/firmware/arm_scmi/smc.c7
-rw-r--r--drivers/firmware/arm_scmi/system.c16
-rw-r--r--drivers/firmware/efi/arm-runtime.c2
-rw-r--r--drivers/firmware/efi/capsule-loader.c2
-rw-r--r--drivers/firmware/efi/cper.c19
-rw-r--r--drivers/firmware/efi/efi-init.c19
-rw-r--r--drivers/firmware/efi/efi-pstore.c43
-rw-r--r--drivers/firmware/efi/libstub/Makefile4
-rw-r--r--drivers/firmware/efi/libstub/alignedmem.c1
-rw-r--r--drivers/firmware/efi/libstub/efi-stub-helper.c8
-rw-r--r--drivers/firmware/efi/libstub/efistub.h5
-rw-r--r--drivers/firmware/efi/libstub/kaslr.c2
-rw-r--r--drivers/firmware/efi/libstub/randomalloc.c12
-rw-r--r--drivers/firmware/efi/libstub/x86-stub.c28
-rw-r--r--drivers/firmware/efi/libstub/x86-stub.h4
-rw-r--r--drivers/firmware/efi/libstub/zboot.c2
-rw-r--r--drivers/firmware/efi/riscv-runtime.c2
-rw-r--r--drivers/firmware/microchip/mpfs-auto-update.c5
-rw-r--r--drivers/firmware/sysfb.c2
-rw-r--r--drivers/firmware/tegra/bpmp-debugfs.c2
-rw-r--r--drivers/gpio/gpio-74x164.c4
-rw-r--r--drivers/gpio/gpio-eic-sprd.c32
-rw-r--r--drivers/gpio/gpiolib-acpi.c14
-rw-r--r--drivers/gpio/gpiolib.c23
-rw-r--r--drivers/gpu/drm/Kconfig5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c10
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c124
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c47
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_state.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c63
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c76
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/resource.h20
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.c26
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h47
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.c5
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.h1
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h1
-rw-r--r--drivers/gpu/drm/amd/include/amdgpu_reg_state.h2
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c12
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c29
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c47
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c9
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c9
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c9
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c73
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c71
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c5
-rw-r--r--drivers/gpu/drm/bridge/analogix/anx7625.c7
-rw-r--r--drivers/gpu/drm/bridge/analogix/anx7625.h2
-rw-r--r--drivers/gpu/drm/bridge/aux-hpd-bridge.c70
-rw-r--r--drivers/gpu/drm/bridge/parade-ps8640.c23
-rw-r--r--drivers/gpu/drm/bridge/samsung-dsim.c32
-rw-r--r--drivers/gpu/drm/bridge/sii902x.c42
-rw-r--r--drivers/gpu/drm/display/drm_dp_mst_topology.c2
-rw-r--r--drivers/gpu/drm/drm_buddy.c22
-rw-r--r--drivers/gpu/drm/drm_crtc.c1
-rw-r--r--drivers/gpu/drm/drm_gem_vram_helper.c44
-rw-r--r--drivers/gpu/drm/drm_prime.c2
-rw-r--r--drivers/gpu/drm/drm_probe_helper.c8
-rw-r--r--drivers/gpu/drm/drm_syncobj.c19
-rw-r--r--drivers/gpu/drm/exynos/exynos5433_drm_decon.c4
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimd.c6
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gsc.c2
-rw-r--r--drivers/gpu/drm/i915/Kconfig2
-rw-r--r--drivers/gpu/drm/i915/Makefile1
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power_well.c17
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h7
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c19
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.h2
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_hdcp.c47
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c1
-rw-r--r--drivers/gpu/drm/i915/display/intel_modeset_setup.c13
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c17
-rw-r--r--drivers/gpu/drm/i915/display/intel_sdvo.c10
-rw-r--r--drivers/gpu/drm/i915/display/intel_tv.c10
-rw-r--r--drivers/gpu/drm/i915/display/intel_vdsc_regs.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c3
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c3
-rw-r--r--drivers/gpu/drm/i915/intel_gvt.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c6
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_cvbs.c1
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_dsi.c1
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_hdmi.c1
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c8
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c3
-rw-r--r--drivers/gpu/drm/msm/dp/dp_ctrl.c5
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.c20
-rw-r--r--drivers/gpu/drm/msm/dp/dp_link.c22
-rw-r--r--drivers/gpu/drm/msm/dp/dp_reg.h3
-rw-r--r--drivers/gpu/drm/msm/msm_gem_prime.c4
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c11
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.c32
-rw-r--r--drivers/gpu/drm/msm/msm_mdss.c1
-rw-r--r--drivers/gpu/drm/msm/msm_ringbuffer.c7
-rw-r--r--drivers/gpu/drm/nouveau/Kconfig8
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/client.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.c29
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.h2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c7
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_exec.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.c48
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.h6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_gem.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.c38
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.h6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/client.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/object.c26
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c85
-rw-r--r--drivers/gpu/drm/panel/Kconfig2
-rw-r--r--drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c8
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c2
-rw-r--r--drivers/gpu/drm/panel/panel-simple.c2
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop2.c4
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c6
-rw-r--r--drivers/gpu/drm/tegra/drm.c26
-rw-r--r--drivers/gpu/drm/tests/drm_buddy_test.c306
-rw-r--r--drivers/gpu/drm/tests/drm_mm_test.c11
-rw-r--r--drivers/gpu/drm/ttm/ttm_device.c21
-rw-r--r--drivers/gpu/drm/ttm/ttm_pool.c2
-rw-r--r--drivers/gpu/drm/v3d/v3d_submit.c35
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_drv.c1
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h4
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h4
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h8
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h6
-rw-r--r--drivers/gpu/drm/xe/abi/guc_messages_abi.h20
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h13
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c8
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs_test.c1
-rw-r--r--drivers/gpu/drm/xe/tests/xe_wa_test.c3
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c11
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h1
-rw-r--r--drivers/gpu/drm/xe/xe_device.c41
-rw-r--r--drivers/gpu/drm/xe/xe_device.h4
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_display.c6
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c2
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c12
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c10
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c129
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h20
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c12
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence.c6
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c2
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c26
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c42
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c6
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c82
-rw-r--r--drivers/gpu/drm/xe/xe_pt_walk.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pt_walk.h19
-rw-r--r--drivers/gpu/drm/xe/xe_query.c50
-rw-r--r--drivers/gpu/drm/xe/xe_range_fence.c7
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c1
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c60
-rw-r--r--drivers/gpu/drm/xe/xe_sync.h9
-rw-r--r--drivers/gpu/drm/xe/xe_sync_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_tile.c5
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h59
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c353
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h16
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h33
-rw-r--r--drivers/gpu/host1x/dev.c15
-rw-r--r--drivers/gpu/host1x/dev.h6
-rw-r--r--drivers/hid/bpf/hid_bpf_dispatch.c109
-rw-r--r--drivers/hid/bpf/hid_bpf_dispatch.h4
-rw-r--r--drivers/hid/bpf/hid_bpf_jmp_table.c40
-rw-r--r--drivers/hid/hid-ids.h3
-rw-r--r--drivers/hid/hid-logitech-hidpp.c15
-rw-r--r--drivers/hid/hid-multitouch.c4
-rw-r--r--drivers/hid/hid-nvidia-shield.c4
-rw-r--r--drivers/hid/hid-steam.c36
-rw-r--r--drivers/hid/hidraw.c7
-rw-r--r--drivers/hid/i2c-hid/i2c-hid-core.c6
-rw-r--r--drivers/hid/i2c-hid/i2c-hid-of.c1
-rw-r--r--drivers/hid/intel-ish-hid/ishtp/bus.c2
-rw-r--r--drivers/hid/intel-ish-hid/ishtp/client.c4
-rw-r--r--drivers/hid/wacom_sys.c63
-rw-r--r--drivers/hid/wacom_wac.c9
-rw-r--r--drivers/hv/channel.c176
-rw-r--r--drivers/hv/hv_util.c31
-rw-r--r--drivers/hv/vmbus_drv.c2
-rw-r--r--drivers/hwmon/aspeed-pwm-tacho.c7
-rw-r--r--drivers/hwmon/coretemp.c44
-rw-r--r--drivers/hwmon/fam15h_power.c2
-rw-r--r--drivers/hwmon/gigabyte_waterforce.c2
-rw-r--r--drivers/hwmon/nct6775-core.c14
-rw-r--r--drivers/hwmon/pmbus/mp2975.c16
-rw-r--r--drivers/i2c/busses/Makefile6
-rw-r--r--drivers/i2c/busses/i2c-aspeed.c1
-rw-r--r--drivers/i2c/busses/i2c-i801.c10
-rw-r--r--drivers/i2c/busses/i2c-imx.c5
-rw-r--r--drivers/i2c/busses/i2c-pasemi-core.c6
-rw-r--r--drivers/i2c/busses/i2c-qcom-geni.c14
-rw-r--r--drivers/i2c/busses/i2c-wmt.c6
-rw-r--r--drivers/iio/accel/Kconfig2
-rw-r--r--drivers/iio/accel/adxl367.c8
-rw-r--r--drivers/iio/accel/adxl367_i2c.c2
-rw-r--r--drivers/iio/adc/ad4130.c12
-rw-r--r--drivers/iio/adc/ad7091r8.c2
-rw-r--r--drivers/iio/humidity/Kconfig12
-rw-r--r--drivers/iio/humidity/Makefile1
-rw-r--r--drivers/iio/humidity/hdc3020.c2
-rw-r--r--drivers/iio/imu/bno055/Kconfig1
-rw-r--r--drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c2
-rw-r--r--drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c5
-rw-r--r--drivers/iio/industrialio-core.c5
-rw-r--r--drivers/iio/light/hid-sensor-als.c1
-rw-r--r--drivers/iio/magnetometer/rm3100-core.c10
-rw-r--r--drivers/iio/pressure/bmp280-spi.c51
-rw-r--r--drivers/iio/pressure/dlhl60d.c7
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c43
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c3
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c3
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c6
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c2
-rw-r--r--drivers/infiniband/hw/irdma/defs.h1
-rw-r--r--drivers/infiniband/hw/irdma/hw.c8
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c9
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c6
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c2
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c2
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c4
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c17
-rw-r--r--drivers/input/joystick/xpad.c8
-rw-r--r--drivers/input/keyboard/atkbd.c14
-rw-r--r--drivers/input/keyboard/gpio_keys_polled.c10
-rw-r--r--drivers/input/rmi4/rmi_driver.c6
-rw-r--r--drivers/input/serio/i8042-acpipnpio.h14
-rw-r--r--drivers/input/touchscreen/goodix.c3
-rw-r--r--drivers/interconnect/qcom/sc8180x.c1
-rw-r--r--drivers/interconnect/qcom/sm8550.c1
-rw-r--r--drivers/interconnect/qcom/sm8650.c2
-rw-r--r--drivers/interconnect/qcom/x1e80100.c1
-rw-r--r--drivers/iommu/Kconfig8
-rw-r--r--drivers/iommu/Makefile3
-rw-r--r--drivers/iommu/amd/amd_iommu.h42
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h34
-rw-r--r--drivers/iommu/amd/init.c137
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c21
-rw-r--r--drivers/iommu/amd/iommu.c634
-rw-r--r--drivers/iommu/apple-dart.c3
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c60
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c819
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h4
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c1
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c20
-rw-r--r--drivers/iommu/arm/arm-smmu/qcom_iommu.c3
-rw-r--r--drivers/iommu/dma-iommu.c5
-rw-r--r--drivers/iommu/exynos-iommu.c2
-rw-r--r--drivers/iommu/intel/Kconfig12
-rw-r--r--drivers/iommu/intel/Makefile2
-rw-r--r--drivers/iommu/intel/dmar.c26
-rw-r--r--drivers/iommu/intel/iommu.c551
-rw-r--r--drivers/iommu/intel/iommu.h21
-rw-r--r--drivers/iommu/intel/nested.c16
-rw-r--r--drivers/iommu/intel/pasid.c210
-rw-r--r--drivers/iommu/intel/pasid.h3
-rw-r--r--drivers/iommu/intel/perf.c2
-rw-r--r--drivers/iommu/intel/svm.c76
-rw-r--r--drivers/iommu/io-pgfault.c463
-rw-r--r--drivers/iommu/iommu-priv.h5
-rw-r--r--drivers/iommu/iommu-sva.c88
-rw-r--r--drivers/iommu/iommu-sva.h71
-rw-r--r--drivers/iommu/iommu.c298
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c3
-rw-r--r--drivers/iommu/iommufd/io_pagetable.c9
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h1
-rw-r--r--drivers/iommu/iommufd/iova_bitmap.c68
-rw-r--r--drivers/iommu/iommufd/selftest.c148
-rw-r--r--drivers/iommu/iova.c143
-rw-r--r--drivers/iommu/ipmmu-vmsa.c19
-rw-r--r--drivers/iommu/irq_remapping.c3
-rw-r--r--drivers/iommu/msm_iommu.c4
-rw-r--r--drivers/iommu/mtk_iommu.c5
-rw-r--r--drivers/iommu/mtk_iommu_v1.c7
-rw-r--r--drivers/iommu/of_iommu.c2
-rw-r--r--drivers/iommu/rockchip-iommu.c2
-rw-r--r--drivers/iommu/sprd-iommu.c3
-rw-r--r--drivers/iommu/sun50i-iommu.c2
-rw-r--r--drivers/iommu/tegra-smmu.c4
-rw-r--r--drivers/iommu/virtio-iommu.c3
-rw-r--r--drivers/irqchip/Kconfig11
-rw-r--r--drivers/irqchip/Makefile1
-rw-r--r--drivers/irqchip/irq-bcm6345-l1.c2
-rw-r--r--drivers/irqchip/irq-bcm7038-l1.c2
-rw-r--r--drivers/irqchip/irq-brcmstb-l2.c5
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c68
-rw-r--r--drivers/irqchip/irq-gic-v3.c57
-rw-r--r--drivers/irqchip/irq-gic.c27
-rw-r--r--drivers/irqchip/irq-imgpdc.c7
-rw-r--r--drivers/irqchip/irq-imx-intmux.c18
-rw-r--r--drivers/irqchip/irq-imx-irqsteer.c14
-rw-r--r--drivers/irqchip/irq-keystone.c5
-rw-r--r--drivers/irqchip/irq-loongson-eiointc.c24
-rw-r--r--drivers/irqchip/irq-ls-scfg-msi.c12
-rw-r--r--drivers/irqchip/irq-madera.c8
-rw-r--r--drivers/irqchip/irq-mbigen.c8
-rw-r--r--drivers/irqchip/irq-meson-gpio.c5
-rw-r--r--drivers/irqchip/irq-mvebu-pic.c12
-rw-r--r--drivers/irqchip/irq-pruss-intc.c14
-rw-r--r--drivers/irqchip/irq-qcom-mpm.c4
-rw-r--r--drivers/irqchip/irq-renesas-intc-irqpin.c11
-rw-r--r--drivers/irqchip/irq-renesas-irqc.c9
-rw-r--r--drivers/irqchip/irq-renesas-rza1.c7
-rw-r--r--drivers/irqchip/irq-riscv-intc.c104
-rw-r--r--drivers/irqchip/irq-sifive-plic.c283
-rw-r--r--drivers/irqchip/irq-starfive-jh8100-intc.c207
-rw-r--r--drivers/irqchip/irq-stm32-exti.c9
-rw-r--r--drivers/irqchip/irq-ts4800.c12
-rw-r--r--drivers/irqchip/irq-vic.c3
-rw-r--r--drivers/isdn/capi/capi.c21
-rw-r--r--drivers/isdn/mISDN/dsp_pipeline.c16
-rw-r--r--drivers/mailbox/bcm-flexrm-mailbox.c8
-rw-r--r--drivers/md/Kconfig2
-rw-r--r--drivers/md/Makefile1
-rw-r--r--drivers/md/bcache/bcache.h4
-rw-r--r--drivers/md/bcache/super.c133
-rw-r--r--drivers/md/dm-bio-prison-v1.c2
-rw-r--r--drivers/md/dm-bufio.c74
-rw-r--r--drivers/md/dm-cache-policy-smq.c2
-rw-r--r--drivers/md/dm-core.h2
-rw-r--r--drivers/md/dm-crypt.c141
-rw-r--r--drivers/md/dm-dust.c2
-rw-r--r--drivers/md/dm-ebs-target.c2
-rw-r--r--drivers/md/dm-flakey.c2
-rw-r--r--drivers/md/dm-integrity.c105
-rw-r--r--drivers/md/dm-io.c23
-rw-r--r--drivers/md/dm-ioctl.c5
-rw-r--r--drivers/md/dm-kcopyd.c4
-rw-r--r--drivers/md/dm-log-userspace-base.c2
-rw-r--r--drivers/md/dm-log.c6
-rw-r--r--drivers/md/dm-mpath.c2
-rw-r--r--drivers/md/dm-ps-round-robin.c2
-rw-r--r--drivers/md/dm-raid.c101
-rw-r--r--drivers/md/dm-raid1.c6
-rw-r--r--drivers/md/dm-region-hash.c2
-rw-r--r--drivers/md/dm-snap-persistent.c4
-rw-r--r--drivers/md/dm-stats.c9
-rw-r--r--drivers/md/dm-table.c9
-rw-r--r--drivers/md/dm-thin.c22
-rw-r--r--drivers/md/dm-vdo/Kconfig17
-rw-r--r--drivers/md/dm-vdo/Makefile57
-rw-r--r--drivers/md/dm-vdo/action-manager.c388
-rw-r--r--drivers/md/dm-vdo/action-manager.h110
-rw-r--r--drivers/md/dm-vdo/admin-state.c506
-rw-r--r--drivers/md/dm-vdo/admin-state.h178
-rw-r--r--drivers/md/dm-vdo/block-map.c3318
-rw-r--r--drivers/md/dm-vdo/block-map.h394
-rw-r--r--drivers/md/dm-vdo/completion.c140
-rw-r--r--drivers/md/dm-vdo/completion.h152
-rw-r--r--drivers/md/dm-vdo/constants.h96
-rw-r--r--drivers/md/dm-vdo/cpu.h59
-rw-r--r--drivers/md/dm-vdo/data-vio.c2063
-rw-r--r--drivers/md/dm-vdo/data-vio.h670
-rw-r--r--drivers/md/dm-vdo/dedupe.c3003
-rw-r--r--drivers/md/dm-vdo/dedupe.h120
-rw-r--r--drivers/md/dm-vdo/dm-vdo-target.c2910
-rw-r--r--drivers/md/dm-vdo/dump.c275
-rw-r--r--drivers/md/dm-vdo/dump.h17
-rw-r--r--drivers/md/dm-vdo/encodings.c1483
-rw-r--r--drivers/md/dm-vdo/encodings.h1298
-rw-r--r--drivers/md/dm-vdo/errors.c307
-rw-r--r--drivers/md/dm-vdo/errors.h73
-rw-r--r--drivers/md/dm-vdo/flush.c560
-rw-r--r--drivers/md/dm-vdo/flush.h44
-rw-r--r--drivers/md/dm-vdo/funnel-queue.c170
-rw-r--r--drivers/md/dm-vdo/funnel-queue.h110
-rw-r--r--drivers/md/dm-vdo/funnel-workqueue.c638
-rw-r--r--drivers/md/dm-vdo/funnel-workqueue.h51
-rw-r--r--drivers/md/dm-vdo/indexer/chapter-index.c293
-rw-r--r--drivers/md/dm-vdo/indexer/chapter-index.h61
-rw-r--r--drivers/md/dm-vdo/indexer/config.c376
-rw-r--r--drivers/md/dm-vdo/indexer/config.h124
-rw-r--r--drivers/md/dm-vdo/indexer/delta-index.c1970
-rw-r--r--drivers/md/dm-vdo/indexer/delta-index.h279
-rw-r--r--drivers/md/dm-vdo/indexer/funnel-requestqueue.c279
-rw-r--r--drivers/md/dm-vdo/indexer/funnel-requestqueue.h31
-rw-r--r--drivers/md/dm-vdo/indexer/geometry.c201
-rw-r--r--drivers/md/dm-vdo/indexer/geometry.h140
-rw-r--r--drivers/md/dm-vdo/indexer/hash-utils.h66
-rw-r--r--drivers/md/dm-vdo/indexer/index-layout.c1765
-rw-r--r--drivers/md/dm-vdo/indexer/index-layout.h43
-rw-r--r--drivers/md/dm-vdo/indexer/index-page-map.c173
-rw-r--r--drivers/md/dm-vdo/indexer/index-page-map.h50
-rw-r--r--drivers/md/dm-vdo/indexer/index-session.c739
-rw-r--r--drivers/md/dm-vdo/indexer/index-session.h85
-rw-r--r--drivers/md/dm-vdo/indexer/index.c1388
-rw-r--r--drivers/md/dm-vdo/indexer/index.h83
-rw-r--r--drivers/md/dm-vdo/indexer/indexer.h353
-rw-r--r--drivers/md/dm-vdo/indexer/io-factory.c415
-rw-r--r--drivers/md/dm-vdo/indexer/io-factory.h64
-rw-r--r--drivers/md/dm-vdo/indexer/open-chapter.c426
-rw-r--r--drivers/md/dm-vdo/indexer/open-chapter.h79
-rw-r--r--drivers/md/dm-vdo/indexer/radix-sort.c330
-rw-r--r--drivers/md/dm-vdo/indexer/radix-sort.h26
-rw-r--r--drivers/md/dm-vdo/indexer/sparse-cache.c624
-rw-r--r--drivers/md/dm-vdo/indexer/sparse-cache.h46
-rw-r--r--drivers/md/dm-vdo/indexer/volume-index.c1283
-rw-r--r--drivers/md/dm-vdo/indexer/volume-index.h193
-rw-r--r--drivers/md/dm-vdo/indexer/volume.c1693
-rw-r--r--drivers/md/dm-vdo/indexer/volume.h172
-rw-r--r--drivers/md/dm-vdo/int-map.c707
-rw-r--r--drivers/md/dm-vdo/int-map.h39
-rw-r--r--drivers/md/dm-vdo/io-submitter.c477
-rw-r--r--drivers/md/dm-vdo/io-submitter.h47
-rw-r--r--drivers/md/dm-vdo/logger.c239
-rw-r--r--drivers/md/dm-vdo/logger.h100
-rw-r--r--drivers/md/dm-vdo/logical-zone.c373
-rw-r--r--drivers/md/dm-vdo/logical-zone.h89
-rw-r--r--drivers/md/dm-vdo/memory-alloc.c438
-rw-r--r--drivers/md/dm-vdo/memory-alloc.h162
-rw-r--r--drivers/md/dm-vdo/message-stats.c432
-rw-r--r--drivers/md/dm-vdo/message-stats.h13
-rw-r--r--drivers/md/dm-vdo/murmurhash3.c175
-rw-r--r--drivers/md/dm-vdo/murmurhash3.h15
-rw-r--r--drivers/md/dm-vdo/numeric.h78
-rw-r--r--drivers/md/dm-vdo/packer.c780
-rw-r--r--drivers/md/dm-vdo/packer.h122
-rw-r--r--drivers/md/dm-vdo/permassert.c26
-rw-r--r--drivers/md/dm-vdo/permassert.h45
-rw-r--r--drivers/md/dm-vdo/physical-zone.c644
-rw-r--r--drivers/md/dm-vdo/physical-zone.h115
-rw-r--r--drivers/md/dm-vdo/priority-table.c224
-rw-r--r--drivers/md/dm-vdo/priority-table.h47
-rw-r--r--drivers/md/dm-vdo/recovery-journal.c1762
-rw-r--r--drivers/md/dm-vdo/recovery-journal.h316
-rw-r--r--drivers/md/dm-vdo/repair.c1756
-rw-r--r--drivers/md/dm-vdo/repair.h14
-rw-r--r--drivers/md/dm-vdo/slab-depot.c5101
-rw-r--r--drivers/md/dm-vdo/slab-depot.h601
-rw-r--r--drivers/md/dm-vdo/statistics.h278
-rw-r--r--drivers/md/dm-vdo/status-codes.c94
-rw-r--r--drivers/md/dm-vdo/status-codes.h86
-rw-r--r--drivers/md/dm-vdo/string-utils.c22
-rw-r--r--drivers/md/dm-vdo/string-utils.h23
-rw-r--r--drivers/md/dm-vdo/thread-device.c34
-rw-r--r--drivers/md/dm-vdo/thread-device.h20
-rw-r--r--drivers/md/dm-vdo/thread-registry.c93
-rw-r--r--drivers/md/dm-vdo/thread-registry.h32
-rw-r--r--drivers/md/dm-vdo/thread-utils.c108
-rw-r--r--drivers/md/dm-vdo/thread-utils.h20
-rw-r--r--drivers/md/dm-vdo/time-utils.h28
-rw-r--r--drivers/md/dm-vdo/types.h393
-rw-r--r--drivers/md/dm-vdo/vdo.c1730
-rw-r--r--drivers/md/dm-vdo/vdo.h362
-rw-r--r--drivers/md/dm-vdo/vio.c500
-rw-r--r--drivers/md/dm-vdo/vio.h199
-rw-r--r--drivers/md/dm-vdo/wait-queue.c205
-rw-r--r--drivers/md/dm-vdo/wait-queue.h138
-rw-r--r--drivers/md/dm-verity-fec.c21
-rw-r--r--drivers/md/dm-verity-target.c159
-rw-r--r--drivers/md/dm-verity.h16
-rw-r--r--drivers/md/dm-writecache.c18
-rw-r--r--drivers/md/dm-zoned-metadata.c5
-rw-r--r--drivers/md/dm.c55
-rw-r--r--drivers/md/md-bitmap.c18
-rw-r--r--drivers/md/md-linear.h17
-rw-r--r--drivers/md/md-multipath.h32
-rw-r--r--drivers/md/md.c482
-rw-r--r--drivers/md/md.h79
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c2
-rw-r--r--drivers/md/raid0.c42
-rw-r--r--drivers/md/raid1-10.c69
-rw-r--r--drivers/md/raid1.c603
-rw-r--r--drivers/md/raid1.h1
-rw-r--r--drivers/md/raid10.c159
-rw-r--r--drivers/md/raid5-ppl.c3
-rw-r--r--drivers/md/raid5.c302
-rw-r--r--drivers/media/common/videobuf2/videobuf2-core.c2
-rw-r--r--drivers/media/common/videobuf2/videobuf2-v4l2.c55
-rw-r--r--drivers/media/platform/chips-media/wave5/wave5-vpu.c2
-rw-r--r--drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c3
-rw-r--r--drivers/media/platform/rockchip/rkisp1/rkisp1-common.h2
-rw-r--r--drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c3
-rw-r--r--drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c24
-rw-r--r--drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c3
-rw-r--r--drivers/media/rc/Kconfig1
-rw-r--r--drivers/media/rc/bpf-lirc.c8
-rw-r--r--drivers/media/rc/ir_toy.c2
-rw-r--r--drivers/media/rc/lirc_dev.c5
-rw-r--r--drivers/media/rc/rc-core-priv.h2
-rw-r--r--drivers/memory/emif.c65
-rw-r--r--drivers/memory/stm32-fmc2-ebi.c729
-rw-r--r--drivers/memory/tegra/tegra234.c48
-rw-r--r--drivers/memstick/core/ms_block.c14
-rw-r--r--drivers/memstick/core/mspro_block.c15
-rw-r--r--drivers/misc/fastrpc.c12
-rw-r--r--drivers/misc/lis3lv02d/lis3lv02d_i2c.c21
-rw-r--r--drivers/misc/lkdtm/bugs.c3
-rw-r--r--drivers/misc/lkdtm/core.c22
-rw-r--r--drivers/misc/lkdtm/heap.c2
-rw-r--r--drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c8
-rw-r--r--drivers/misc/mei/hw-me-regs.h2
-rw-r--r--drivers/misc/mei/pci-me.c2
-rw-r--r--drivers/misc/mei/vsc-tp.c1
-rw-r--r--drivers/misc/open-dice.c2
-rw-r--r--drivers/misc/vmw_vmci/vmci_datagram.c10
-rw-r--r--drivers/mmc/core/mmc.c2
-rw-r--r--drivers/mmc/core/queue.c97
-rw-r--r--drivers/mmc/core/slot-gpio.c6
-rw-r--r--drivers/mmc/host/mmci_stm32_sdmmc.c24
-rw-r--r--drivers/mmc/host/sdhci-pci-o2micro.c30
-rw-r--r--drivers/mmc/host/sdhci-xenon-phy.c48
-rw-r--r--drivers/mtd/devices/block2mtd.c46
-rw-r--r--drivers/mtd/mtd_blkdevs.c12
-rw-r--r--drivers/mtd/mtdcore.c1
-rw-r--r--drivers/mtd/nand/raw/marvell_nand.c13
-rw-r--r--drivers/mtd/nand/spi/gigadevice.c6
-rw-r--r--drivers/mtd/ubi/block.c6
-rw-r--r--drivers/net/amt.c10
-rw-r--r--drivers/net/arcnet/arc-rawmode.c1
-rw-r--r--drivers/net/arcnet/arc-rimi.c1
-rw-r--r--drivers/net/arcnet/arcnet.c1
-rw-r--r--drivers/net/arcnet/capmode.c1
-rw-r--r--drivers/net/arcnet/com20020-pci.c1
-rw-r--r--drivers/net/arcnet/com20020.c1
-rw-r--r--drivers/net/arcnet/com20020_cs.c1
-rw-r--r--drivers/net/arcnet/com90io.c1
-rw-r--r--drivers/net/arcnet/com90xx.c1
-rw-r--r--drivers/net/arcnet/rfc1051.c1
-rw-r--r--drivers/net/arcnet/rfc1201.c1
-rw-r--r--drivers/net/bareudp.c25
-rw-r--r--drivers/net/bonding/bond_3ad.c165
-rw-r--r--drivers/net/bonding/bond_main.c63
-rw-r--r--drivers/net/bonding/bond_netlink.c16
-rw-r--r--drivers/net/bonding/bond_options.c28
-rw-r--r--drivers/net/can/Kconfig3
-rw-r--r--drivers/net/can/Makefile1
-rw-r--r--drivers/net/can/dev/netlink.c2
-rw-r--r--drivers/net/can/esd/Kconfig12
-rw-r--r--drivers/net/can/esd/Makefile7
-rw-r--r--drivers/net/can/esd/esd_402_pci-core.c514
-rw-r--r--drivers/net/can/esd/esdacc.c764
-rw-r--r--drivers/net/can/esd/esdacc.h356
-rw-r--r--drivers/net/can/kvaser_pciefd.c62
-rw-r--r--drivers/net/can/m_can/m_can.c579
-rw-r--r--drivers/net/can/m_can/m_can.h35
-rw-r--r--drivers/net/can/m_can/m_can_pci.c1
-rw-r--r--drivers/net/can/m_can/m_can_platform.c5
-rw-r--r--drivers/net/can/m_can/tcan4x5x-core.c33
-rw-r--r--drivers/net/can/softing/softing_fw.c2
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c2
-rw-r--r--drivers/net/can/usb/Kconfig1
-rw-r--r--drivers/net/can/usb/gs_usb.c2
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c3
-rw-r--r--drivers/net/can/vxcan.c2
-rw-r--r--drivers/net/can/xilinx_can.c169
-rw-r--r--drivers/net/dsa/Kconfig2
-rw-r--r--drivers/net/dsa/b53/b53_common.c42
-rw-r--r--drivers/net/dsa/b53/b53_priv.h7
-rw-r--r--drivers/net/dsa/bcm_sf2.c2
-rw-r--r--drivers/net/dsa/dsa_loop_bdinfo.c1
-rw-r--r--drivers/net/dsa/microchip/ksz8795.c404
-rw-r--r--drivers/net/dsa/microchip/ksz8795_reg.h1
-rw-r--r--drivers/net/dsa/microchip/ksz9477_i2c.c4
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c112
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h2
-rw-r--r--drivers/net/dsa/microchip/ksz_spi.c5
-rw-r--r--drivers/net/dsa/mt7530-mdio.c7
-rw-r--r--drivers/net/dsa/mt7530.c571
-rw-r--r--drivers/net/dsa/mt7530.h38
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c11
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.h4
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2_scratch.c35
-rw-r--r--drivers/net/dsa/mv88e6xxx/pcs-6185.c3
-rw-r--r--drivers/net/dsa/qca/qca8k-8xxx.c22
-rw-r--r--drivers/net/dsa/qca/qca8k-common.c4
-rw-r--r--drivers/net/dsa/qca/qca8k.h4
-rw-r--r--drivers/net/dsa/realtek/Kconfig20
-rw-r--r--drivers/net/dsa/realtek/Makefile13
-rw-r--r--drivers/net/dsa/realtek/realtek-mdio.c205
-rw-r--r--drivers/net/dsa/realtek/realtek-mdio.h48
-rw-r--r--drivers/net/dsa/realtek/realtek-smi.c279
-rw-r--r--drivers/net/dsa/realtek/realtek-smi.h48
-rw-r--r--drivers/net/dsa/realtek/realtek.h14
-rw-r--r--drivers/net/dsa/realtek/rtl8365mb.c132
-rw-r--r--drivers/net/dsa/realtek/rtl8366-core.c22
-rw-r--r--drivers/net/dsa/realtek/rtl8366rb.c119
-rw-r--r--drivers/net/dsa/realtek/rtl83xx.c335
-rw-r--r--drivers/net/dsa/realtek/rtl83xx.h24
-rw-r--r--drivers/net/dummy.c11
-rw-r--r--drivers/net/ethernet/8390/8390.c1
-rw-r--r--drivers/net/ethernet/8390/8390p.c1
-rw-r--r--drivers/net/ethernet/8390/apne.c1
-rw-r--r--drivers/net/ethernet/8390/hydra.c1
-rw-r--r--drivers/net/ethernet/8390/stnic.c1
-rw-r--r--drivers/net/ethernet/8390/zorro8390.c1
-rw-r--r--drivers/net/ethernet/Kconfig3
-rw-r--r--drivers/net/ethernet/adi/Kconfig1
-rw-r--r--drivers/net/ethernet/adi/adin1110.c10
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c323
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.h7
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_eth_com.c49
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_eth_com.h39
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c181
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_regs_defs.h1
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_xdp.c1
-rw-r--r--drivers/net/ethernet/amd/pds_core/adminq.c74
-rw-r--r--drivers/net/ethernet/amd/pds_core/auxbus.c30
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.c133
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.h6
-rw-r--r--drivers/net/ethernet/amd/pds_core/debugfs.c12
-rw-r--r--drivers/net/ethernet/amd/pds_core/dev.c30
-rw-r--r--drivers/net/ethernet/amd/pds_core/devlink.c3
-rw-r--r--drivers/net/ethernet/amd/pds_core/fw.c3
-rw-r--r--drivers/net/ethernet/amd/pds_core/main.c79
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c25
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ptp.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.c13
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.h1
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp.c96
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp.h25
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c12
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c212
-rw-r--r--drivers/net/ethernet/broadcom/bcm4908_enet.c1
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c1
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-bcma.c1
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-platform.c1
-rw-r--r--drivers/net/ethernet/broadcom/bgmac.c1
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c23
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c50
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c14
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c958
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h75
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c471
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c16
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c11
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c54
-rw-r--r--drivers/net/ethernet/broadcom/tg3.h2
-rw-r--r--drivers/net/ethernet/brocade/bna/bnad.c12
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_core.c1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c14
-rw-r--r--drivers/net/ethernet/cirrus/ep93xx_eth.c1
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c2
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_vic.c3
-rw-r--r--drivers/net/ethernet/ec_bhf.c1
-rw-r--r--drivers/net/ethernet/engleder/tsnep_main.c53
-rw-r--r--drivers/net/ethernet/ezchip/nps_enet.c1
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c5
-rw-r--r--drivers/net/ethernet/freescale/fec.h2
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c151
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.c18
-rw-r--r--drivers/net/ethernet/freescale/fsl_pq_mdio.c1
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c4
-rw-r--r--drivers/net/ethernet/google/gve/gve.h171
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c50
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.h20
-rw-r--r--drivers/net/ethernet/google/gve/gve_dqo.h18
-rw-r--r--drivers/net/ethernet/google/gve/gve_ethtool.c62
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c928
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c143
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c159
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c128
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx_dqo.c108
-rw-r--r--drivers/net/ethernet/google/gve/gve_utils.c48
-rw-r--r--drivers/net/ethernet/google/gve/gve_utils.h8
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hnae.c13
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c44
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c16
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h1
-rw-r--r--drivers/net/ethernet/i825xx/sun3_82586.c2
-rw-r--r--drivers/net/ethernet/intel/Kconfig9
-rw-r--r--drivers/net/ethernet/intel/e100.c4
-rw-r--r--drivers/net/ethernet/intel/e1000e/e1000.h20
-rw-r--r--drivers/net/ethernet/intel/e1000e/ethtool.c23
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c2
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c8
-rw-r--r--drivers/net/ethernet/intel/e1000e/ptp.c22
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h93
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_client.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_dcb.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_dcb.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c10
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c97
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c13
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c638
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c49
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c42
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c4
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c13
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_arfs.c1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c181
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.h10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c183
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ddp.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_debugfs.c13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devids.h8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devlink.c68
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dpll.c99
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c95
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flow.h31
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fwlog.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c25
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c309
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h22
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c276
-rw-r--r--drivers/net/ethernet/intel/ice/ice_osdep.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c229
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h34
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.h31
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c40
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.h9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c35
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf.h146
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq.c7
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq_api.h5
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_dev.c1
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c41
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_main.c6
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c1
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_vf_dev.c3
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_virtchnl.c2280
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_virtchnl.h70
-rw-r--r--drivers/net/ethernet/intel/idpf/virtchnl2.h2
-rw-r--r--drivers/net/ethernet/intel/igb/igb.h2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c43
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c62
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c5
-rw-r--r--drivers/net/ethernet/intel/igbvf/netdev.c2
-rw-r--r--drivers/net/ethernet/intel/igc/Makefile1
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h10
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ethtool.c25
-rw-r--r--drivers/net/ethernet/intel/igc/igc_leds.c280
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c54
-rw-r--r--drivers/net/ethernet/intel/igc/igc_phy.c6
-rw-r--r--drivers/net/ethernet/intel/igc/igc_regs.h1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h16
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c70
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c155
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.c262
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.h112
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c12
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h10
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c26
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h30
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c12
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h35
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c70
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c118
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c46
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h10
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c242
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h54
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c8
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h7
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_type.h189
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c66
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h18
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c297
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c17
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c2
-rw-r--r--drivers/net/ethernet/litex/litex_liteeth.c1
-rw-r--r--drivers/net/ethernet/marvell/Kconfig1
-rw-r--r--drivers/net/ethernet/marvell/Makefile1
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c4
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c27
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig19
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/Makefile10
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c489
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c500
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h160
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c273
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c1231
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h334
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c430
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h166
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h154
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h162
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c510
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h224
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c330
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h276
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.c1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc.h16
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h617
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c16
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h32
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c20
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c186
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c75
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c8
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c14
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c14
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c11
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c7
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c5
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed_wo.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_clock.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw_qos.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dpll.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c82
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c183
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c524
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c734
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/minimal.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c168
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h19
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c327
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c8
-rw-r--r--drivers/net/ethernet/microchip/encx24j600-regmap.c5
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ethtool.c4
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c2
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ptp.c4
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_lag.c9
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_port.c5
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c4
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c1
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.h1
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_packet.c2
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c88
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/conntrack.c46
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/lag_conf.c5
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c6
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c4
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic.h2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c19
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_debugfs.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.c115
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h90
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_ethtool.c12
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_fw.c5
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c379
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h23
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c120
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_stats.c18
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c950
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.h4
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_rdma.c2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c64
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_fp.c2
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c3
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c1
-rw-r--r--drivers/net/ethernet/qualcomm/qca_7k.c17
-rw-r--r--drivers/net/ethernet/qualcomm/qca_7k.h16
-rw-r--r--drivers/net/ethernet/qualcomm/qca_7k_common.c17
-rw-r--r--drivers/net/ethernet/qualcomm/qca_7k_common.h29
-rw-r--r--drivers/net/ethernet/qualcomm/qca_debug.c21
-rw-r--r--drivers/net/ethernet/qualcomm/qca_debug.h15
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.c71
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.h22
-rw-r--r--drivers/net/ethernet/qualcomm/qca_uart.c17
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c1
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.h4
-rw-r--r--drivers/net/ethernet/realtek/r8169_leds.c145
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c267
-rw-r--r--drivers/net/ethernet/realtek/r8169_phy_config.c7
-rw-r--r--drivers/net/ethernet/renesas/Kconfig1
-rw-r--r--drivers/net/ethernet/renesas/ravb.h60
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c1207
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c2
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h1
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c6
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c1
-rw-r--r--drivers/net/ethernet/sfc/efx_common.c2
-rw-r--r--drivers/net/ethernet/sfc/falcon/efx.c2
-rw-r--r--drivers/net/ethernet/sfc/rx_common.c1
-rw-r--r--drivers/net/ethernet/sfc/siena/efx_common.c2
-rw-r--r--drivers/net/ethernet/sfc/siena/rx_common.c1
-rw-r--r--drivers/net/ethernet/sfc/siena/tx_common.c5
-rw-r--r--drivers/net/ethernet/sfc/tx_common.c5
-rw-r--r--drivers/net/ethernet/sfc/tx_tso.c4
-rw-r--r--drivers/net/ethernet/smsc/smc91x.c1
-rw-r--r--drivers/net/ethernet/smsc/smsc911x.c1
-rw-r--r--drivers/net/ethernet/smsc/smsc9420.c1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h60
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c35
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c32
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c58
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc.h4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc_core.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_est.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c137
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c235
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c87
-rw-r--r--drivers/net/ethernet/sun/sunvnet_common.c4
-rw-r--r--drivers/net/ethernet/ti/Kconfig1
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-ethtool.c4
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw-common.c1
-rw-r--r--drivers/net/ethernet/ti/cpsw.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw_ethtool.c4
-rw-r--r--drivers/net/ethernet/ti/cpsw_new.c3
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.h4
-rw-r--r--drivers/net/ethernet/ti/cpts.c17
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_ethtool.c4
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.c4
-rw-r--r--drivers/net/ethernet/toshiba/ps3_gelic_net.c20
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c2
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c22
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h1
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/Makefile1
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c269
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h7
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_main.c141
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c82
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h3
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_type.h17
-rw-r--r--drivers/net/ethernet/wiznet/w5300.c3
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c3
-rw-r--r--drivers/net/ethernet/xircom/xirc2ps_cs.c4
-rw-r--r--drivers/net/fddi/skfp/skfddi.c1
-rw-r--r--drivers/net/fjes/fjes_hw.c37
-rw-r--r--drivers/net/geneve.c58
-rw-r--r--drivers/net/gtp.c53
-rw-r--r--drivers/net/hyperv/netvsc.c5
-rw-r--r--drivers/net/hyperv/netvsc_drv.c86
-rw-r--r--drivers/net/ieee802154/at86rf230.c5
-rw-r--r--drivers/net/ieee802154/ca8210.c10
-rw-r--r--drivers/net/ieee802154/fakelb.c1
-rw-r--r--drivers/net/ieee802154/mcr20a.c5
-rw-r--r--drivers/net/ieee802154/mrf24j40.c4
-rw-r--r--drivers/net/ipa/ipa.h5
-rw-r--r--drivers/net/ipa/ipa_cmd.c6
-rw-r--r--drivers/net/ipa/ipa_endpoint.c29
-rw-r--r--drivers/net/ipa/ipa_interrupt.c119
-rw-r--r--drivers/net/ipa/ipa_interrupt.h30
-rw-r--r--drivers/net/ipa/ipa_main.c60
-rw-r--r--drivers/net/ipa/ipa_mem.c37
-rw-r--r--drivers/net/ipa/ipa_mem.h5
-rw-r--r--drivers/net/ipa/ipa_modem.c110
-rw-r--r--drivers/net/ipa/ipa_power.c108
-rw-r--r--drivers/net/ipa/ipa_power.h29
-rw-r--r--drivers/net/ipa/ipa_qmi.c10
-rw-r--r--drivers/net/ipa/ipa_reg.c8
-rw-r--r--drivers/net/ipa/ipa_reg.h4
-rw-r--r--drivers/net/ipa/ipa_smp2p.c33
-rw-r--r--drivers/net/ipa/ipa_smp2p.h7
-rw-r--r--drivers/net/ipa/ipa_table.c18
-rw-r--r--drivers/net/ipa/ipa_uc.c9
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c2
-rw-r--r--drivers/net/ipvlan/ipvtap.c1
-rw-r--r--drivers/net/loopback.c1
-rw-r--r--drivers/net/macsec.c37
-rw-r--r--drivers/net/macvlan.c2
-rw-r--r--drivers/net/mdio/mdio-bcm-unimac.c94
-rw-r--r--drivers/net/mdio/mdio-ipq4019.c109
-rw-r--r--drivers/net/mdio/of_mdio.c79
-rw-r--r--drivers/net/netconsole.c365
-rw-r--r--drivers/net/netdevsim/bus.c149
-rw-r--r--drivers/net/netdevsim/dev.c8
-rw-r--r--drivers/net/netdevsim/netdev.c53
-rw-r--r--drivers/net/netdevsim/netdevsim.h3
-rw-r--r--drivers/net/netkit.c2
-rw-r--r--drivers/net/nlmon.c24
-rw-r--r--drivers/net/pcs/pcs-lynx.c1
-rw-r--r--drivers/net/pcs/pcs-mtk-lynxi.c1
-rw-r--r--drivers/net/pcs/pcs-rzn1-miic.c5
-rw-r--r--drivers/net/pcs/pcs-xpcs.c18
-rw-r--r--drivers/net/phy/Kconfig8
-rw-r--r--drivers/net/phy/Makefile2
-rw-r--r--drivers/net/phy/adin1100.c55
-rw-r--r--drivers/net/phy/aquantia/aquantia_main.c103
-rw-r--r--drivers/net/phy/at803x.c2432
-rw-r--r--drivers/net/phy/broadcom.c3
-rw-r--r--drivers/net/phy/dp83822.c211
-rw-r--r--drivers/net/phy/dp83867.c22
-rw-r--r--drivers/net/phy/marvell-88q2xxx.c640
-rw-r--r--drivers/net/phy/marvell-88x2222.c2
-rw-r--r--drivers/net/phy/marvell.c7
-rw-r--r--drivers/net/phy/mdio_bus.c48
-rw-r--r--drivers/net/phy/mdio_devres.c1
-rw-r--r--drivers/net/phy/mediatek-ge-soc.c147
-rw-r--r--drivers/net/phy/micrel.c120
-rw-r--r--drivers/net/phy/mxl-gpy.c20
-rw-r--r--drivers/net/phy/phy-c45.c137
-rw-r--r--drivers/net/phy/phy.c61
-rw-r--r--drivers/net/phy/phy_device.c208
-rw-r--r--drivers/net/phy/phylink.c8
-rw-r--r--drivers/net/phy/qcom/Kconfig30
-rw-r--r--drivers/net/phy/qcom/Makefile6
-rw-r--r--drivers/net/phy/qcom/at803x.c1106
-rw-r--r--drivers/net/phy/qcom/qca807x.c849
-rw-r--r--drivers/net/phy/qcom/qca808x.c663
-rw-r--r--drivers/net/phy/qcom/qca83xx.c275
-rw-r--r--drivers/net/phy/qcom/qcom-phy-lib.c676
-rw-r--r--drivers/net/phy/qcom/qcom.h243
-rw-r--r--drivers/net/phy/realtek.c48
-rw-r--r--drivers/net/phy/xilinx_gmii2rgmii.c2
-rw-r--r--drivers/net/plip/plip.c1
-rw-r--r--drivers/net/ppp/bsd_comp.c1
-rw-r--r--drivers/net/ppp/ppp_async.c5
-rw-r--r--drivers/net/ppp/ppp_deflate.c1
-rw-r--r--drivers/net/ppp/ppp_generic.c21
-rw-r--r--drivers/net/ppp/ppp_synctty.c1
-rw-r--r--drivers/net/ppp/pppoe.c23
-rw-r--r--drivers/net/team/team.c2
-rw-r--r--drivers/net/tun.c45
-rw-r--r--drivers/net/usb/Kconfig1
-rw-r--r--drivers/net/usb/ax88179_178a.c20
-rw-r--r--drivers/net/usb/cdc_mbim.c2
-rw-r--r--drivers/net/usb/dm9601.c2
-rw-r--r--drivers/net/usb/hso.c2
-rw-r--r--drivers/net/usb/lan78xx.c12
-rw-r--r--drivers/net/usb/r8152.c49
-rw-r--r--drivers/net/usb/smsc95xx.c5
-rw-r--r--drivers/net/usb/sr9800.c4
-rw-r--r--drivers/net/usb/usbnet.c13
-rw-r--r--drivers/net/veth.c117
-rw-r--r--drivers/net/vsockmon.c19
-rw-r--r--drivers/net/vxlan/vxlan_core.c68
-rw-r--r--drivers/net/wan/Kconfig12
-rw-r--r--drivers/net/wan/Makefile1
-rw-r--r--drivers/net/wan/framer/framer-core.c30
-rw-r--r--drivers/net/wan/framer/pef2256/pef2256.c6
-rw-r--r--drivers/net/wan/fsl_qmc_hdlc.c797
-rw-r--r--drivers/net/wireguard/receive.c2
-rw-r--r--drivers/net/wireless/admtek/adm8211.c4
-rw-r--r--drivers/net/wireless/ath/ar5523/ar5523.c5
-rw-r--r--drivers/net/wireless/ath/ath10k/core.c4
-rw-r--r--drivers/net/wireless/ath/ath10k/coredump.h8
-rw-r--r--drivers/net/wireless/ath/ath10k/htt.c3
-rw-r--r--drivers/net/wireless/ath/ath10k/htt.h12
-rw-r--r--drivers/net/wireless/ath/ath10k/mac.c12
-rw-r--r--drivers/net/wireless/ath/ath10k/pci.c10
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi-tlv.c11
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi-tlv.h4
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi.c26
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi.h62
-rw-r--r--drivers/net/wireless/ath/ath11k/core.c108
-rw-r--r--drivers/net/wireless/ath/ath11k/core.h44
-rw-r--r--drivers/net/wireless/ath/ath11k/debugfs.c25
-rw-r--r--drivers/net/wireless/ath/ath11k/debugfs.h12
-rw-r--r--drivers/net/wireless/ath/ath11k/dp.c20
-rw-r--r--drivers/net/wireless/ath/ath11k/dp_tx.c6
-rw-r--r--drivers/net/wireless/ath/ath11k/hal.c19
-rw-r--r--drivers/net/wireless/ath/ath11k/hal.h3
-rw-r--r--drivers/net/wireless/ath/ath11k/hal_rx.c4
-rw-r--r--drivers/net/wireless/ath/ath11k/hw.c2
-rw-r--r--drivers/net/wireless/ath/ath11k/hw.h1
-rw-r--r--drivers/net/wireless/ath/ath11k/mac.c1214
-rw-r--r--drivers/net/wireless/ath/ath11k/mac.h5
-rw-r--r--drivers/net/wireless/ath/ath11k/mhi.c73
-rw-r--r--drivers/net/wireless/ath/ath11k/pci.c62
-rw-r--r--drivers/net/wireless/ath/ath11k/pci.h3
-rw-r--r--drivers/net/wireless/ath/ath11k/pcic.c11
-rw-r--r--drivers/net/wireless/ath/ath11k/qmi.c5
-rw-r--r--drivers/net/wireless/ath/ath11k/reg.c267
-rw-r--r--drivers/net/wireless/ath/ath11k/reg.h11
-rw-r--r--drivers/net/wireless/ath/ath11k/testmode.c2
-rw-r--r--drivers/net/wireless/ath/ath11k/thermal.c5
-rw-r--r--drivers/net/wireless/ath/ath11k/wmi.c303
-rw-r--r--drivers/net/wireless/ath/ath11k/wmi.h151
-rw-r--r--drivers/net/wireless/ath/ath12k/Makefile4
-rw-r--r--drivers/net/wireless/ath/ath12k/core.c270
-rw-r--r--drivers/net/wireless/ath/ath12k/core.h84
-rw-r--r--drivers/net/wireless/ath/ath12k/dp.c25
-rw-r--r--drivers/net/wireless/ath/ath12k/dp.h20
-rw-r--r--drivers/net/wireless/ath/ath12k/dp_mon.c9
-rw-r--r--drivers/net/wireless/ath/ath12k/dp_rx.c166
-rw-r--r--drivers/net/wireless/ath/ath12k/dp_tx.c30
-rw-r--r--drivers/net/wireless/ath/ath12k/fw.c171
-rw-r--r--drivers/net/wireless/ath/ath12k/fw.h33
-rw-r--r--drivers/net/wireless/ath/ath12k/hal.c415
-rw-r--r--drivers/net/wireless/ath/ath12k/hal.h20
-rw-r--r--drivers/net/wireless/ath/ath12k/hal_desc.h20
-rw-r--r--drivers/net/wireless/ath/ath12k/hal_rx.c15
-rw-r--r--drivers/net/wireless/ath/ath12k/hw.c33
-rw-r--r--drivers/net/wireless/ath/ath12k/hw.h55
-rw-r--r--drivers/net/wireless/ath/ath12k/mac.c1309
-rw-r--r--drivers/net/wireless/ath/ath12k/mac.h4
-rw-r--r--drivers/net/wireless/ath/ath12k/mhi.c52
-rw-r--r--drivers/net/wireless/ath/ath12k/p2p.c142
-rw-r--r--drivers/net/wireless/ath/ath12k/p2p.h23
-rw-r--r--drivers/net/wireless/ath/ath12k/pci.c94
-rw-r--r--drivers/net/wireless/ath/ath12k/pci.h6
-rw-r--r--drivers/net/wireless/ath/ath12k/qmi.c429
-rw-r--r--drivers/net/wireless/ath/ath12k/qmi.h35
-rw-r--r--drivers/net/wireless/ath/ath12k/reg.c13
-rw-r--r--drivers/net/wireless/ath/ath12k/rx_desc.h116
-rw-r--r--drivers/net/wireless/ath/ath12k/trace.h29
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.c330
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.h202
-rw-r--r--drivers/net/wireless/ath/ath5k/mac80211-ops.c4
-rw-r--r--drivers/net/wireless/ath/ath6kl/cfg80211.c2
-rw-r--r--drivers/net/wireless/ath/ath9k/ahb.c6
-rw-r--r--drivers/net/wireless/ath/ath9k/antenna.c2
-rw-r--r--drivers/net/wireless/ath/ath9k/ar9003_phy.h9
-rw-r--r--drivers/net/wireless/ath/ath9k/beacon.c4
-rw-r--r--drivers/net/wireless/ath/ath9k/htc.h2
-rw-r--r--drivers/net/wireless/ath/ath9k/htc_drv_beacon.c4
-rw-r--r--drivers/net/wireless/ath/ath9k/htc_drv_init.c4
-rw-r--r--drivers/net/wireless/ath/ath9k/htc_drv_main.c4
-rw-r--r--drivers/net/wireless/ath/ath9k/htc_drv_txrx.c4
-rw-r--r--drivers/net/wireless/ath/ath9k/main.c4
-rw-r--r--drivers/net/wireless/ath/ath9k/reg_aic.h4
-rw-r--r--drivers/net/wireless/ath/ath9k/wmi.c10
-rw-r--r--drivers/net/wireless/ath/ath9k/xmit.c3
-rw-r--r--drivers/net/wireless/ath/carl9170/main.c4
-rw-r--r--drivers/net/wireless/ath/carl9170/tx.c2
-rw-r--r--drivers/net/wireless/ath/wcn36xx/main.c5
-rw-r--r--drivers/net/wireless/atmel/at76c50x-usb.c4
-rw-r--r--drivers/net/wireless/broadcom/b43/b43.h16
-rw-r--r--drivers/net/wireless/broadcom/b43/dma.c4
-rw-r--r--drivers/net/wireless/broadcom/b43/main.c20
-rw-r--r--drivers/net/wireless/broadcom/b43/phy_ht.c6
-rw-r--r--drivers/net/wireless/broadcom/b43/phy_n.c4
-rw-r--r--drivers/net/wireless/broadcom/b43/pio.c6
-rw-r--r--drivers/net/wireless/broadcom/b43legacy/main.c4
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/core.c26
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c1
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c82
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c18
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c12
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/core.c46
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c1
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c9
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c11
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c152
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h60
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c116
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h127
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwvid.c13
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwvid.h40
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/core.c27
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c1
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.c1
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c4
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c6
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_int.h2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c27
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c5
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h2
-rw-r--r--drivers/net/wireless/intel/iwlegacy/3945-mac.c4
-rw-r--r--drivers/net/wireless/intel/iwlegacy/4965-mac.c4
-rw-r--r--drivers/net/wireless/intel/iwlegacy/common.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/Kconfig9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/Makefile3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/ax210.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/bz.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/sc.c40
-rw-r--r--drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.c632
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.h216
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/coex.h14
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/d3.h2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/debug.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/location.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h27
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/mac.h10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h32
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h17
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/power.h40
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/sta.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/tx.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/txq.h12
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.c30
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.h2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/error-dump.h23
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/file.h27
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/pnvm.c49
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/regulatory.c500
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/regulatory.h199
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/runtime.h22
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/uefi.c427
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/uefi.h210
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-config.h22
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c71
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-drv.c32
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-drv.h10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-fh.h36
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c80
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-prph.h9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-trans.h69
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/coex.c132
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/constants.h3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/d3.c159
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c51
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c19
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c340
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/link.c27
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c49
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c243
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-key.c36
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c171
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h95
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/nvm.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c100
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c40
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/power.c29
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rs.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rx.c30
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c55
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/scan.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sf.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sta.c62
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sta.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/time-event.c195
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/tx.c151
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/utils.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c49
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/queue/tx.c18
-rw-r--r--drivers/net/wireless/intel/iwlwifi/tests/Makefile7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/tests/devinfo.c54
-rw-r--r--drivers/net/wireless/intel/iwlwifi/tests/module.c10
-rw-r--r--drivers/net/wireless/intersil/p54/fwio.c2
-rw-r--r--drivers/net/wireless/intersil/p54/main.c4
-rw-r--r--drivers/net/wireless/intersil/p54/p54spi.c1
-rw-r--r--drivers/net/wireless/marvell/libertas/cmd.c13
-rw-r--r--drivers/net/wireless/marvell/libertas_tf/main.c4
-rw-r--r--drivers/net/wireless/marvell/mwifiex/11h.c2
-rw-r--r--drivers/net/wireless/marvell/mwifiex/11n.c12
-rw-r--r--drivers/net/wireless/marvell/mwifiex/cfg80211.c2
-rw-r--r--drivers/net/wireless/marvell/mwifiex/debugfs.c22
-rw-r--r--drivers/net/wireless/marvell/mwifiex/fw.h2
-rw-r--r--drivers/net/wireless/marvell/mwifiex/main.h2
-rw-r--r--drivers/net/wireless/marvell/mwifiex/scan.c14
-rw-r--r--drivers/net/wireless/marvell/mwifiex/wmm.c2
-rw-r--r--drivers/net/wireless/marvell/mwl8k.c4
-rw-r--r--drivers/net/wireless/mediatek/mt76/Makefile2
-rw-r--r--drivers/net/wireless/mediatek/mt76/agg-rx.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/dma.c106
-rw-r--r--drivers/net/wireless/mediatek/mt76/dma.h9
-rw-r--r--drivers/net/wireless/mediatek/mt76/mac80211.c32
-rw-r--r--drivers/net/wireless/mediatek/mt76/mmio.c107
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76.h61
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/main.c5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/main.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/mcu.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/mmio.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/sdio.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/usb.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76_connac.h5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c8
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x0/pci.c5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x0/usb.c5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_util.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/pci.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/usb.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/dma.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mac.c9
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/main.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mcu.c6
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mmio.c56
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/soc.c3
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/init.c6
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/main.c14
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/mcu.c4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/pci.c14
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/sdio.c3
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/usb.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/init.c56
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/main.c27
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/mcu.c212
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/mcu.h94
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/pci.c4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/usb.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt792x.h2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt792x_acpi_sar.c38
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt792x_core.c9
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt792x_dma.c15
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt792x_regs.h8
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt792x_usb.c75
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/dma.c5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/init.c12
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mac.c81
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/main.c11
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mcu.c37
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mcu.h4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mmio.c75
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h6
-rw-r--r--drivers/net/wireless/mediatek/mt76/sdio.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/usb.c53
-rw-r--r--drivers/net/wireless/mediatek/mt76/util.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/wed.c213
-rw-r--r--drivers/net/wireless/mediatek/mt7601u/main.c4
-rw-r--r--drivers/net/wireless/microchip/wilc1000/cfg80211.c16
-rw-r--r--drivers/net/wireless/microchip/wilc1000/hif.c110
-rw-r--r--drivers/net/wireless/microchip/wilc1000/netdev.c94
-rw-r--r--drivers/net/wireless/microchip/wilc1000/netdev.h6
-rw-r--r--drivers/net/wireless/microchip/wilc1000/sdio.c1
-rw-r--r--drivers/net/wireless/microchip/wilc1000/spi.c82
-rw-r--r--drivers/net/wireless/microchip/wilc1000/wlan.c40
-rw-r--r--drivers/net/wireless/microchip/wilc1000/wlan.h11
-rw-r--r--drivers/net/wireless/purelifi/plfxlc/mac.c5
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/event.c2
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2400pci.c4
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2500pci.c4
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2500usb.c4
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2800lib.c8
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2800pci.c4
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2800soc.c4
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2800usb.c4
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00crypto.c5
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt61pci.c4
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt73usb.c4
-rw-r--r--drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c4
-rw-r--r--drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c4
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h28
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c3
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188f.c2
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c1
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c1
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192f.c33
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8710b.c1
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c1
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c1
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c588
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h15
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/core.c4
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/efuse.c36
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/efuse.h4
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/pci.c13
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c4
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/def.h2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c7
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c109
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/usb.c195
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/usb.h2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/wifi.h43
-rw-r--r--drivers/net/wireless/realtek/rtw88/debug.c44
-rw-r--r--drivers/net/wireless/realtek/rtw88/mac.c7
-rw-r--r--drivers/net/wireless/realtek/rtw88/mac80211.c4
-rw-r--r--drivers/net/wireless/realtek/rtw88/main.c2
-rw-r--r--drivers/net/wireless/realtek/rtw88/pci.c4
-rw-r--r--drivers/net/wireless/realtek/rtw88/phy.c3
-rw-r--r--drivers/net/wireless/realtek/rtw88/reg.h3
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8821c.c2
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8821cu.c40
-rw-r--r--drivers/net/wireless/realtek/rtw88/usb.c40
-rw-r--r--drivers/net/wireless/realtek/rtw89/cam.c61
-rw-r--r--drivers/net/wireless/realtek/rtw89/cam.h109
-rw-r--r--drivers/net/wireless/realtek/rtw89/chan.c646
-rw-r--r--drivers/net/wireless/realtek/rtw89/chan.h5
-rw-r--r--drivers/net/wireless/realtek/rtw89/coex.c393
-rw-r--r--drivers/net/wireless/realtek/rtw89/coex.h21
-rw-r--r--drivers/net/wireless/realtek/rtw89/core.c382
-rw-r--r--drivers/net/wireless/realtek/rtw89/core.h362
-rw-r--r--drivers/net/wireless/realtek/rtw89/debug.c9
-rw-r--r--drivers/net/wireless/realtek/rtw89/efuse.h1
-rw-r--r--drivers/net/wireless/realtek/rtw89/efuse_be.c142
-rw-r--r--drivers/net/wireless/realtek/rtw89/fw.c2496
-rw-r--r--drivers/net/wireless/realtek/rtw89/fw.h1532
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac.c341
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac.h93
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac80211.c19
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac_be.c363
-rw-r--r--drivers/net/wireless/realtek/rtw89/pci.c215
-rw-r--r--drivers/net/wireless/realtek/rtw89/pci.h67
-rw-r--r--drivers/net/wireless/realtek/rtw89/pci_be.c121
-rw-r--r--drivers/net/wireless/realtek/rtw89/phy.c1105
-rw-r--r--drivers/net/wireless/realtek/rtw89/phy.h113
-rw-r--r--drivers/net/wireless/realtek/rtw89/phy_be.c331
-rw-r--r--drivers/net/wireless/realtek/rtw89/ps.c10
-rw-r--r--drivers/net/wireless/realtek/rtw89/reg.h572
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8851b.c162
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8851b_table.c72
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8851be.c2
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852a.c78
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852ae.c1
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852b.c82
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852b_table.c142
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852be.c1
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852c.c81
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852ce.c1
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8922a.c1773
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8922a_rfk.c378
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8922a_rfk.h18
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8922ae.c3
-rw-r--r--drivers/net/wireless/realtek/rtw89/wow.c50
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_mac80211.c8
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_usb.c12
-rw-r--r--drivers/net/wireless/silabs/wfx/sta.c19
-rw-r--r--drivers/net/wireless/st/cw1200/cw1200_sdio.c42
-rw-r--r--drivers/net/wireless/st/cw1200/cw1200_spi.c75
-rw-r--r--drivers/net/wireless/st/cw1200/main.c4
-rw-r--r--drivers/net/wireless/ti/wl1251/main.c4
-rw-r--r--drivers/net/wireless/ti/wl1251/sdio.c1
-rw-r--r--drivers/net/wireless/ti/wl1251/spi.c1
-rw-r--r--drivers/net/wireless/ti/wl12xx/main.c1
-rw-r--r--drivers/net/wireless/ti/wl18xx/main.c1
-rw-r--r--drivers/net/wireless/ti/wlcore/event.c2
-rw-r--r--drivers/net/wireless/ti/wlcore/main.c7
-rw-r--r--drivers/net/wireless/ti/wlcore/sdio.c10
-rw-r--r--drivers/net/wireless/ti/wlcore/spi.c1
-rw-r--r--drivers/net/wireless/virtual/mac80211_hwsim.c147
-rw-r--r--drivers/net/wireless/virtual/mac80211_hwsim.h5
-rw-r--r--drivers/net/wireless/virtual/virt_wifi.c2
-rw-r--r--drivers/net/wireless/zydas/zd1211rw/zd_def.h2
-rw-r--r--drivers/net/wireless/zydas/zd1211rw/zd_mac.c4
-rw-r--r--drivers/net/wireless/zydas/zd1211rw/zd_usb.c5
-rw-r--r--drivers/net/wwan/t7xx/t7xx_hif_cldma.c47
-rw-r--r--drivers/net/wwan/t7xx/t7xx_hif_cldma.h18
-rw-r--r--drivers/net/wwan/t7xx/t7xx_modem_ops.c14
-rw-r--r--drivers/net/wwan/t7xx/t7xx_modem_ops.h1
-rw-r--r--drivers/net/wwan/t7xx/t7xx_pci.c103
-rw-r--r--drivers/net/wwan/t7xx/t7xx_pci.h14
-rw-r--r--drivers/net/wwan/t7xx/t7xx_port.h4
-rw-r--r--drivers/net/wwan/t7xx/t7xx_port_proxy.c110
-rw-r--r--drivers/net/wwan/t7xx/t7xx_port_proxy.h10
-rw-r--r--drivers/net/wwan/t7xx/t7xx_port_wwan.c115
-rw-r--r--drivers/net/wwan/t7xx/t7xx_reg.h24
-rw-r--r--drivers/net/wwan/t7xx/t7xx_state_monitor.c132
-rw-r--r--drivers/net/wwan/t7xx/t7xx_state_monitor.h1
-rw-r--r--drivers/net/wwan/wwan_core.c36
-rw-r--r--drivers/net/wwan/wwan_hwsim.c16
-rw-r--r--drivers/net/xen-netback/netback.c107
-rw-r--r--drivers/nvdimm/btt.c14
-rw-r--r--drivers/nvdimm/pmem.c14
-rw-r--r--drivers/nvme/common/auth.c1
-rw-r--r--drivers/nvme/common/keyring.c1
-rw-r--r--drivers/nvme/host/apple.c15
-rw-r--r--drivers/nvme/host/auth.c19
-rw-r--r--drivers/nvme/host/constants.c10
-rw-r--r--drivers/nvme/host/core.c553
-rw-r--r--drivers/nvme/host/fabrics.c42
-rw-r--r--drivers/nvme/host/fabrics.h8
-rw-r--r--drivers/nvme/host/fc.c60
-rw-r--r--drivers/nvme/host/ioctl.c2
-rw-r--r--drivers/nvme/host/multipath.c32
-rw-r--r--drivers/nvme/host/nvme.h74
-rw-r--r--drivers/nvme/host/pci.c3
-rw-r--r--drivers/nvme/host/rdma.c37
-rw-r--r--drivers/nvme/host/sysfs.c71
-rw-r--r--drivers/nvme/host/tcp.c18
-rw-r--r--drivers/nvme/host/zns.c24
-rw-r--r--drivers/nvme/target/admin-cmd.c2
-rw-r--r--drivers/nvme/target/configfs.c28
-rw-r--r--drivers/nvme/target/core.c23
-rw-r--r--drivers/nvme/target/discovery.c4
-rw-r--r--drivers/nvme/target/fabrics-cmd.c9
-rw-r--r--drivers/nvme/target/fc.c186
-rw-r--r--drivers/nvme/target/fcloop.c24
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c16
-rw-r--r--drivers/nvme/target/loop.c9
-rw-r--r--drivers/nvme/target/nvmet.h8
-rw-r--r--drivers/nvme/target/passthru.c2
-rw-r--r--drivers/nvme/target/rdma.c11
-rw-r--r--drivers/nvme/target/tcp.c6
-rw-r--r--drivers/nvme/target/zns.c5
-rw-r--r--drivers/nvmem/core.c5
-rw-r--r--drivers/of/property.c65
-rw-r--r--drivers/of/unittest.c12
-rw-r--r--drivers/pci/bus.c49
-rw-r--r--drivers/pci/controller/dwc/pcie-designware-ep.c10
-rw-r--r--drivers/pci/controller/dwc/pcie-qcom.c2
-rw-r--r--drivers/pci/hotplug/s390_pci_hpc.c65
-rw-r--r--drivers/pci/msi/irqdomain.c2
-rw-r--r--drivers/pci/pci.c119
-rw-r--r--drivers/pci/pci.h9
-rw-r--r--drivers/pci/pcie/aspm.c13
-rw-r--r--drivers/perf/arm-cmn.c11
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c4
-rw-r--r--drivers/perf/cxl_pmu.c12
-rw-r--r--drivers/perf/riscv_pmu.c18
-rw-r--r--drivers/perf/riscv_pmu_legacy.c10
-rw-r--r--drivers/perf/riscv_pmu_sbi.c8
-rw-r--r--drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c2
-rw-r--r--drivers/phy/microchip/lan966x_serdes.c2
-rw-r--r--drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c166
-rw-r--r--drivers/phy/qualcomm/phy-qcom-m31.c2
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-combo.c16
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-usb.c32
-rw-r--r--drivers/phy/renesas/phy-rcar-gen3-usb2.c4
-rw-r--r--drivers/phy/ti/phy-omap-usb2.c4
-rw-r--r--drivers/pinctrl/core.c2
-rw-r--r--drivers/pinctrl/pinctrl-amd.c2
-rw-r--r--drivers/pinctrl/stm32/pinctrl-stm32mp257.c2
-rw-r--r--drivers/platform/mellanox/mlxbf-pmc.c4
-rw-r--r--drivers/platform/mellanox/mlxbf-tmfifo.c67
-rw-r--r--drivers/platform/x86/amd/pmf/Kconfig1
-rw-r--r--drivers/platform/x86/amd/pmf/core.c17
-rw-r--r--drivers/platform/x86/amd/pmf/pmf.h5
-rw-r--r--drivers/platform/x86/amd/pmf/spc.c36
-rw-r--r--drivers/platform/x86/amd/pmf/tee-if.c82
-rw-r--r--drivers/platform/x86/intel/ifs/load.c3
-rw-r--r--drivers/platform/x86/intel/int0002_vgpio.c2
-rw-r--r--drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c82
-rw-r--r--drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h32
-rw-r--r--drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c2
-rw-r--r--drivers/platform/x86/intel/vbtn.c3
-rw-r--r--drivers/platform/x86/intel/wmi/sbl-fw-update.c4
-rw-r--r--drivers/platform/x86/p2sb.c177
-rw-r--r--drivers/platform/x86/serdev_helpers.h80
-rw-r--r--drivers/platform/x86/think-lmi.c20
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c5
-rw-r--r--drivers/platform/x86/touchscreen_dmi.c64
-rw-r--r--drivers/platform/x86/wmi.c181
-rw-r--r--drivers/platform/x86/x86-android-tablets/core.c38
-rw-r--r--drivers/platform/x86/x86-android-tablets/lenovo.c1
-rw-r--r--drivers/platform/x86/x86-android-tablets/other.c4
-rw-r--r--drivers/platform/x86/x86-android-tablets/x86-android-tablets.h1
-rw-r--r--drivers/pmdomain/arm/scmi_perf_domain.c3
-rw-r--r--drivers/pmdomain/core.c2
-rw-r--r--drivers/pmdomain/mediatek/mtk-pm-domains.c15
-rw-r--r--drivers/pmdomain/qcom/rpmhpd.c8
-rw-r--r--drivers/pmdomain/renesas/r8a77980-sysc.c3
-rw-r--r--drivers/power/supply/Kconfig1
-rw-r--r--drivers/power/supply/bq27xxx_battery_i2c.c4
-rw-r--r--drivers/power/supply/qcom_battmgr.c109
-rw-r--r--drivers/powercap/intel_rapl_common.c2
-rw-r--r--drivers/ptp/Kconfig12
-rw-r--r--drivers/ptp/Makefile1
-rw-r--r--drivers/ptp/ptp_clock.c66
-rw-r--r--drivers/ptp/ptp_fc3.c1014
-rw-r--r--drivers/ptp/ptp_fc3.h45
-rw-r--r--drivers/ptp/ptp_kvm_common.c10
-rw-r--r--drivers/ptp/ptp_kvm_x86.c4
-rw-r--r--drivers/ptp/ptp_ocp.c311
-rw-r--r--drivers/ptp/ptp_private.h2
-rw-r--r--drivers/ptp/ptp_sysfs.c13
-rw-r--r--drivers/ptp/ptp_vclock.c2
-rw-r--r--drivers/ras/Kconfig13
-rw-r--r--drivers/ras/Makefile3
-rw-r--r--drivers/ras/amd/atl/Kconfig21
-rw-r--r--drivers/ras/amd/atl/Makefile18
-rw-r--r--drivers/ras/amd/atl/access.c133
-rw-r--r--drivers/ras/amd/atl/core.c225
-rw-r--r--drivers/ras/amd/atl/dehash.c500
-rw-r--r--drivers/ras/amd/atl/denormalize.c718
-rw-r--r--drivers/ras/amd/atl/internal.h306
-rw-r--r--drivers/ras/amd/atl/map.c682
-rw-r--r--drivers/ras/amd/atl/reg_fields.h606
-rw-r--r--drivers/ras/amd/atl/system.c288
-rw-r--r--drivers/ras/amd/atl/umc.c341
-rw-r--r--drivers/ras/amd/fmpm.c1013
-rw-r--r--drivers/ras/cec.c10
-rw-r--r--drivers/ras/debugfs.c8
-rw-r--r--drivers/ras/debugfs.h2
-rw-r--r--drivers/ras/ras.c31
-rw-r--r--drivers/regulator/max5970-regulator.c10
-rw-r--r--drivers/regulator/pwm-regulator.c43
-rw-r--r--drivers/regulator/rk808-regulator.c10
-rw-r--r--drivers/regulator/ti-abb-regulator.c22
-rw-r--r--drivers/rtc/lib_test.c2
-rw-r--r--drivers/s390/block/dasd.c190
-rw-r--r--drivers/s390/block/dasd_3990_erp.c80
-rw-r--r--drivers/s390/block/dasd_alias.c8
-rw-r--r--drivers/s390/block/dasd_devmap.c34
-rw-r--r--drivers/s390/block/dasd_diag.c26
-rw-r--r--drivers/s390/block/dasd_eckd.c186
-rw-r--r--drivers/s390/block/dasd_eer.c7
-rw-r--r--drivers/s390/block/dasd_erp.c9
-rw-r--r--drivers/s390/block/dasd_fba.c88
-rw-r--r--drivers/s390/block/dasd_genhd.c54
-rw-r--r--drivers/s390/block/dasd_int.h37
-rw-r--r--drivers/s390/block/dasd_ioctl.c8
-rw-r--r--drivers/s390/block/dasd_proc.c5
-rw-r--r--drivers/s390/block/dcssblk.c10
-rw-r--r--drivers/s390/block/scm_blk.c17
-rw-r--r--drivers/s390/char/vmur.c4
-rw-r--r--drivers/s390/char/zcore.c1
-rw-r--r--drivers/s390/cio/ccwgroup.c4
-rw-r--r--drivers/s390/cio/chsc.c4
-rw-r--r--drivers/s390/cio/chsc_sch.c20
-rw-r--r--drivers/s390/cio/cmf.c6
-rw-r--r--drivers/s390/cio/css.c4
-rw-r--r--drivers/s390/cio/device.c4
-rw-r--r--drivers/s390/cio/device_ops.c6
-rw-r--r--drivers/s390/cio/scm.c4
-rw-r--r--drivers/s390/crypto/ap_bus.c257
-rw-r--r--drivers/s390/crypto/ap_bus.h8
-rw-r--r--drivers/s390/crypto/ap_debug.h4
-rw-r--r--drivers/s390/crypto/ap_queue.c31
-rw-r--r--drivers/s390/crypto/pkey_api.c226
-rw-r--r--drivers/s390/crypto/vfio_ap_drv.c2
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c35
-rw-r--r--drivers/s390/crypto/zcrypt_api.c228
-rw-r--r--drivers/s390/crypto/zcrypt_api.h9
-rw-r--r--drivers/s390/crypto/zcrypt_ccamisc.c214
-rw-r--r--drivers/s390/crypto/zcrypt_debug.h4
-rw-r--r--drivers/s390/crypto/zcrypt_ep11misc.c127
-rw-r--r--drivers/s390/crypto/zcrypt_error.h5
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype50.c14
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype6.c45
-rw-r--r--drivers/s390/net/qeth_l3_main.c9
-rw-r--r--drivers/scsi/Kconfig2
-rw-r--r--drivers/scsi/fcoe/fcoe_ctlr.c20
-rw-r--r--drivers/scsi/fnic/fnic.h3
-rw-r--r--drivers/scsi/fnic/fnic_fcs.c5
-rw-r--r--drivers/scsi/fnic/fnic_main.c1
-rw-r--r--drivers/scsi/fnic/fnic_scsi.c4
-rw-r--r--drivers/scsi/initio.c3
-rw-r--r--drivers/scsi/isci/request.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c12
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_transport.c7
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c4
-rw-r--r--drivers/scsi/scsi.c22
-rw-r--r--drivers/scsi/scsi_error.c9
-rw-r--r--drivers/scsi/scsi_lib.c4
-rw-r--r--drivers/scsi/scsi_priv.h2
-rw-r--r--drivers/scsi/scsi_scan.c2
-rw-r--r--drivers/scsi/sd.c26
-rw-r--r--drivers/scsi/smartpqi/smartpqi_init.c5
-rw-r--r--drivers/scsi/storvsc_drv.c12
-rw-r--r--drivers/scsi/virtio_scsi.c2
-rw-r--r--drivers/soc/apple/mailbox.c6
-rw-r--r--drivers/soc/mediatek/Kconfig9
-rw-r--r--drivers/soc/mediatek/Makefile1
-rw-r--r--drivers/soc/mediatek/mtk-socinfo.c191
-rw-r--r--drivers/soc/microchip/Kconfig2
-rw-r--r--drivers/soc/qcom/Kconfig9
-rw-r--r--drivers/soc/qcom/Makefile2
-rw-r--r--drivers/soc/qcom/apr.c2
-rw-r--r--drivers/soc/qcom/llcc-qcom.c2
-rw-r--r--drivers/soc/qcom/pmic_glink.c21
-rw-r--r--drivers/soc/qcom/pmic_glink_altmode.c16
-rw-r--r--drivers/soc/qcom/qcom-geni-se.c1
-rw-r--r--drivers/soc/qcom/qcom-pbs.c236
-rw-r--r--drivers/soc/qcom/qcom_aoss.c105
-rw-r--r--drivers/soc/qcom/smem.c11
-rw-r--r--drivers/soc/qcom/smp2p.c6
-rw-r--r--drivers/soc/qcom/socinfo.c7
-rw-r--r--drivers/soc/qcom/spm.c248
-rw-r--r--drivers/soc/qcom/trace-aoss.h48
-rw-r--r--drivers/soc/renesas/Kconfig17
-rw-r--r--drivers/soc/renesas/rcar-rst.c1
-rw-r--r--drivers/soc/renesas/renesas-soc.c8
-rw-r--r--drivers/soc/samsung/Kconfig1
-rw-r--r--drivers/soc/samsung/exynos-pmu.c235
-rw-r--r--drivers/soc/samsung/exynos-pmu.h1
-rw-r--r--drivers/soc/tegra/Kconfig5
-rw-r--r--drivers/soc/tegra/fuse/fuse-tegra.c118
-rw-r--r--drivers/soc/tegra/fuse/fuse-tegra30.c23
-rw-r--r--drivers/soc/tegra/fuse/fuse.h8
-rw-r--r--drivers/soc/tegra/fuse/tegra-apbmisc.c108
-rw-r--r--drivers/soc/tegra/pmc.c87
-rw-r--r--drivers/spi/spi-bcm-qspi.c4
-rw-r--r--drivers/spi/spi-cadence-quadspi.c33
-rw-r--r--drivers/spi/spi-cadence.c17
-rw-r--r--drivers/spi/spi-cs42l43.c8
-rw-r--r--drivers/spi/spi-hisi-sfc-v3xx.c5
-rw-r--r--drivers/spi/spi-imx.c13
-rw-r--r--drivers/spi/spi-intel-pci.c3
-rw-r--r--drivers/spi/spi-mxs.c3
-rw-r--r--drivers/spi/spi-omap2-mcspi.c137
-rw-r--r--drivers/spi/spi-ppc4xx.c7
-rw-r--r--drivers/spi/spi-sh-msiof.c16
-rw-r--r--drivers/spi/spi.c4
-rw-r--r--drivers/ssb/main.c2
-rw-r--r--drivers/staging/fieldbus/Documentation/devicetree/bindings/fieldbus/arcx,anybus-controller.txt2
-rw-r--r--drivers/staging/iio/impedance-analyzer/ad5933.c2
-rw-r--r--drivers/staging/media/atomisp/pci/atomisp_cmd.c58
-rw-r--r--drivers/staging/media/atomisp/pci/atomisp_internal.h4
-rw-r--r--drivers/staging/media/atomisp/pci/atomisp_ioctl.c52
-rw-r--r--drivers/staging/media/atomisp/pci/atomisp_v4l2.c59
-rw-r--r--drivers/staging/vt6655/device_main.c6
-rw-r--r--drivers/staging/vt6656/main_usb.c6
-rw-r--r--drivers/target/target_core_configfs.c48
-rw-r--r--drivers/target/target_core_iblock.c18
-rw-r--r--drivers/target/target_core_iblock.h2
-rw-r--r--drivers/target/target_core_pscsi.c31
-rw-r--r--drivers/target/target_core_pscsi.h2
-rw-r--r--drivers/tee/optee/device.c3
-rw-r--r--drivers/tee/tee_core.c2
-rw-r--r--drivers/thermal/intel/intel_hfi.c2
-rw-r--r--drivers/thermal/intel/intel_powerclamp.c34
-rw-r--r--drivers/thermal/intel/x86_pkg_temp_thermal.c2
-rw-r--r--drivers/thunderbolt/switch.c3
-rw-r--r--drivers/thunderbolt/tb_regs.h2
-rw-r--r--drivers/thunderbolt/usb4.c2
-rw-r--r--drivers/tty/hvc/Kconfig8
-rw-r--r--drivers/tty/serial/8250/8250_dw.c6
-rw-r--r--drivers/tty/serial/8250/8250_pci1xxxx.c2
-rw-r--r--drivers/tty/serial/amba-pl011.c60
-rw-r--r--drivers/tty/serial/fsl_lpuart.c7
-rw-r--r--drivers/tty/serial/imx.c22
-rw-r--r--drivers/tty/serial/max310x.c53
-rw-r--r--drivers/tty/serial/mxs-auart.c5
-rw-r--r--drivers/tty/serial/qcom_geni_serial.c10
-rw-r--r--drivers/tty/serial/serial_core.c2
-rw-r--r--drivers/tty/serial/serial_port.c25
-rw-r--r--drivers/tty/serial/stm32-usart.c4
-rw-r--r--drivers/tty/vt/vt.c2
-rw-r--r--drivers/ufs/core/ufshcd.c9
-rw-r--r--drivers/ufs/host/ufs-qcom.c8
-rw-r--r--drivers/usb/cdns3/cdns3-gadget.c8
-rw-r--r--drivers/usb/cdns3/core.c1
-rw-r--r--drivers/usb/cdns3/drd.c13
-rw-r--r--drivers/usb/cdns3/drd.h6
-rw-r--r--drivers/usb/cdns3/host.c16
-rw-r--r--drivers/usb/chipidea/ci.h2
-rw-r--r--drivers/usb/chipidea/core.c44
-rw-r--r--drivers/usb/common/ulpi.c2
-rw-r--r--drivers/usb/core/hcd.c23
-rw-r--r--drivers/usb/core/hub.c46
-rw-r--r--drivers/usb/core/port.c5
-rw-r--r--drivers/usb/dwc3/core.h1
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c4
-rw-r--r--drivers/usb/dwc3/gadget.c17
-rw-r--r--drivers/usb/dwc3/gadget.h2
-rw-r--r--drivers/usb/dwc3/host.c4
-rw-r--r--drivers/usb/gadget/function/f_mass_storage.c20
-rw-r--r--drivers/usb/gadget/function/f_ncm.c18
-rw-r--r--drivers/usb/gadget/udc/omap_udc.c3
-rw-r--r--drivers/usb/gadget/udc/pch_udc.c1
-rw-r--r--drivers/usb/host/uhci-grlib.c1
-rw-r--r--drivers/usb/host/xhci-mem.c14
-rw-r--r--drivers/usb/host/xhci-plat.c3
-rw-r--r--drivers/usb/host/xhci-ring.c88
-rw-r--r--drivers/usb/host/xhci.h1
-rw-r--r--drivers/usb/isp1760/isp1760-hcd.c8
-rw-r--r--drivers/usb/roles/class.c29
-rw-r--r--drivers/usb/serial/cp210x.c1
-rw-r--r--drivers/usb/serial/option.c1
-rw-r--r--drivers/usb/serial/qcserial.c2
-rw-r--r--drivers/usb/storage/isd200.c23
-rw-r--r--drivers/usb/storage/scsiglue.c7
-rw-r--r--drivers/usb/storage/uas.c7
-rw-r--r--drivers/usb/typec/altmodes/displayport.c18
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c13
-rw-r--r--drivers/usb/typec/ucsi/ucsi.c2
-rw-r--r--drivers/usb/typec/ucsi/ucsi_acpi.c88
-rw-r--r--drivers/usb/typec/ucsi/ucsi_glink.c1
-rw-r--r--drivers/vhost/net.c91
-rw-r--r--drivers/video/fbdev/core/fbcon.c11
-rw-r--r--drivers/video/fbdev/hyperv_fb.c2
-rw-r--r--drivers/video/fbdev/savage/savagefb_driver.c3
-rw-r--r--drivers/video/fbdev/sis/sis_main.c2
-rw-r--r--drivers/video/fbdev/stifb.c2
-rw-r--r--drivers/video/fbdev/vt8500lcdfb.c1
-rw-r--r--drivers/watchdog/Kconfig1
-rw-r--r--drivers/watchdog/s3c2410_wdt.c8
-rw-r--r--drivers/xen/events/events_base.c10
-rw-r--r--drivers/xen/gntalloc.c2
-rw-r--r--drivers/xen/pcpu.c2
-rw-r--r--drivers/xen/privcmd.c15
-rw-r--r--drivers/xen/xen-balloon.c2
-rw-r--r--drivers/xen/xenbus/xenbus_client.c15
-rw-r--r--drivers/zorro/zorro-driver.c2
-rw-r--r--drivers/zorro/zorro.h2
-rw-r--r--fs/9p/v9fs.c2
-rw-r--r--fs/9p/vfs_file.c40
-rw-r--r--fs/Kconfig8
-rw-r--r--fs/Makefile3
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/affs.h1
-rw-r--r--fs/affs/super.c5
-rw-r--r--fs/afs/dir.c32
-rw-r--r--fs/afs/dynroot.c9
-rw-r--r--fs/afs/file.c8
-rw-r--r--fs/afs/flock.c60
-rw-r--r--fs/afs/internal.h6
-rw-r--r--fs/afs/main.c3
-rw-r--r--fs/afs/proc.c5
-rw-r--r--fs/afs/server.c14
-rw-r--r--fs/afs/volume.c4
-rw-r--r--fs/aio.c17
-rw-r--r--fs/attr.c7
-rw-r--r--fs/backing-file.c4
-rw-r--r--fs/bcachefs/alloc_background.c2
-rw-r--r--fs/bcachefs/backpointers.c8
-rw-r--r--fs/bcachefs/bcachefs.h12
-rw-r--r--fs/bcachefs/btree_iter.c4
-rw-r--r--fs/bcachefs/btree_locking.c4
-rw-r--r--fs/bcachefs/btree_update_interior.c3
-rw-r--r--fs/bcachefs/debug.c2
-rw-r--r--fs/bcachefs/fs-io-buffered.c21
-rw-r--r--fs/bcachefs/fs-io-direct.c2
-rw-r--r--fs/bcachefs/fs-io.c2
-rw-r--r--fs/bcachefs/fs-ioctl.c4
-rw-r--r--fs/bcachefs/fs.c9
-rw-r--r--fs/bcachefs/fsck.c23
-rw-r--r--fs/bcachefs/io_write.c1
-rw-r--r--fs/bcachefs/journal.c2
-rw-r--r--fs/bcachefs/journal_io.c7
-rw-r--r--fs/bcachefs/journal_reclaim.c10
-rw-r--r--fs/bcachefs/mean_and_variance.h2
-rw-r--r--fs/bcachefs/printbuf.c1
-rw-r--r--fs/bcachefs/recovery.c11
-rw-r--r--fs/bcachefs/sb-members.c2
-rw-r--r--fs/bcachefs/snapshot.c2
-rw-r--r--fs/bcachefs/str_hash.h22
-rw-r--r--fs/bcachefs/super-io.c22
-rw-r--r--fs/bcachefs/super.c4
-rw-r--r--fs/bcachefs/super_types.h2
-rw-r--r--fs/bcachefs/thread_with_file.c2
-rw-r--r--fs/bcachefs/util.c17
-rw-r--r--fs/bcachefs/util.h4
-rw-r--r--fs/befs/linuxvfs.c3
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/btrfs/accessors.c15
-rw-r--r--fs/btrfs/accessors.h50
-rw-r--r--fs/btrfs/acl.c1
-rw-r--r--fs/btrfs/acl.h11
-rw-r--r--fs/btrfs/async-thread.c1
-rw-r--r--fs/btrfs/async-thread.h3
-rw-r--r--fs/btrfs/backref.c119
-rw-r--r--fs/btrfs/backref.h136
-rw-r--r--fs/btrfs/bio.c17
-rw-r--r--fs/btrfs/bio.h2
-rw-r--r--fs/btrfs/block-group.c95
-rw-r--r--fs/btrfs/block-group.h21
-rw-r--r--fs/btrfs/block-rsv.c3
-rw-r--r--fs/btrfs/block-rsv.h39
-rw-r--r--fs/btrfs/btrfs_inode.h25
-rw-r--r--fs/btrfs/compression.c39
-rw-r--r--fs/btrfs/compression.h16
-rw-r--r--fs/btrfs/ctree.c10
-rw-r--r--fs/btrfs/ctree.h28
-rw-r--r--fs/btrfs/defrag.c106
-rw-r--r--fs/btrfs/defrag.h10
-rw-r--r--fs/btrfs/delalloc-space.c31
-rw-r--r--fs/btrfs/delalloc-space.h4
-rw-r--r--fs/btrfs/delayed-inode.c21
-rw-r--r--fs/btrfs/delayed-inode.h21
-rw-r--r--fs/btrfs/delayed-ref.c85
-rw-r--r--fs/btrfs/delayed-ref.h82
-rw-r--r--fs/btrfs/dev-replace.c43
-rw-r--r--fs/btrfs/dev-replace.h4
-rw-r--r--fs/btrfs/dir-item.h6
-rw-r--r--fs/btrfs/disk-io.c59
-rw-r--r--fs/btrfs/disk-io.h22
-rw-r--r--fs/btrfs/export.c12
-rw-r--r--fs/btrfs/export.h4
-rw-r--r--fs/btrfs/extent-io-tree.c6
-rw-r--r--fs/btrfs/extent-io-tree.h7
-rw-r--r--fs/btrfs/extent-tree.c104
-rw-r--r--fs/btrfs/extent-tree.h10
-rw-r--r--fs/btrfs/extent_io.c475
-rw-r--r--fs/btrfs/extent_io.h44
-rw-r--r--fs/btrfs/extent_map.c23
-rw-r--r--fs/btrfs/extent_map.h8
-rw-r--r--fs/btrfs/file-item.c6
-rw-r--r--fs/btrfs/file-item.h13
-rw-r--r--fs/btrfs/file.c43
-rw-r--r--fs/btrfs/file.h15
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/free-space-cache.h15
-rw-r--r--fs/btrfs/free-space-tree.c56
-rw-r--r--fs/btrfs/free-space-tree.h6
-rw-r--r--fs/btrfs/fs.h59
-rw-r--r--fs/btrfs/inode-item.c1
-rw-r--r--fs/btrfs/inode-item.h5
-rw-r--r--fs/btrfs/inode.c308
-rw-r--r--fs/btrfs/ioctl.c150
-rw-r--r--fs/btrfs/ioctl.h9
-rw-r--r--fs/btrfs/locking.c3
-rw-r--r--fs/btrfs/locking.h8
-rw-r--r--fs/btrfs/lru_cache.h7
-rw-r--r--fs/btrfs/lzo.c36
-rw-r--r--fs/btrfs/messages.c2
-rw-r--r--fs/btrfs/misc.h2
-rw-r--r--fs/btrfs/ordered-data.c6
-rw-r--r--fs/btrfs/ordered-data.h15
-rw-r--r--fs/btrfs/orphan.c1
-rw-r--r--fs/btrfs/orphan.h5
-rw-r--r--fs/btrfs/print-tree.h3
-rw-r--r--fs/btrfs/props.c3
-rw-r--r--fs/btrfs/props.h7
-rw-r--r--fs/btrfs/qgroup.c162
-rw-r--r--fs/btrfs/qgroup.h20
-rw-r--r--fs/btrfs/raid-stripe-tree.c1
-rw-r--r--fs/btrfs/raid-stripe-tree.h5
-rw-r--r--fs/btrfs/raid56.c31
-rw-r--r--fs/btrfs/raid56.h9
-rw-r--r--fs/btrfs/rcu-string.h6
-rw-r--r--fs/btrfs/ref-verify.c6
-rw-r--r--fs/btrfs/ref-verify.h9
-rw-r--r--fs/btrfs/reflink.c12
-rw-r--r--fs/btrfs/reflink.h4
-rw-r--r--fs/btrfs/relocation.c5
-rw-r--r--fs/btrfs/relocation.h9
-rw-r--r--fs/btrfs/root-tree.c17
-rw-r--r--fs/btrfs/root-tree.h10
-rw-r--r--fs/btrfs/scrub.c45
-rw-r--r--fs/btrfs/scrub.h6
-rw-r--r--fs/btrfs/send.c87
-rw-r--r--fs/btrfs/send.h8
-rw-r--r--fs/btrfs/space-info.c27
-rw-r--r--fs/btrfs/space-info.h9
-rw-r--r--fs/btrfs/subpage.c77
-rw-r--r--fs/btrfs/subpage.h21
-rw-r--r--fs/btrfs/super.c17
-rw-r--r--fs/btrfs/super.h7
-rw-r--r--fs/btrfs/sysfs.c53
-rw-r--r--fs/btrfs/sysfs.h9
-rw-r--r--fs/btrfs/tests/extent-io-tests.c28
-rw-r--r--fs/btrfs/tests/inode-tests.c40
-rw-r--r--fs/btrfs/transaction.c59
-rw-r--r--fs/btrfs/transaction.h18
-rw-r--r--fs/btrfs/tree-checker.c10
-rw-r--r--fs/btrfs/tree-checker.h2
-rw-r--r--fs/btrfs/tree-log.c141
-rw-r--r--fs/btrfs/tree-log.h49
-rw-r--r--fs/btrfs/tree-mod-log.c13
-rw-r--r--fs/btrfs/tree-mod-log.h8
-rw-r--r--fs/btrfs/ulist.c1
-rw-r--r--fs/btrfs/ulist.h1
-rw-r--r--fs/btrfs/uuid-tree.c3
-rw-r--r--fs/btrfs/uuid-tree.h5
-rw-r--r--fs/btrfs/verity.c1
-rw-r--r--fs/btrfs/verity.h7
-rw-r--r--fs/btrfs/volumes.c192
-rw-r--r--fs/btrfs/volumes.h57
-rw-r--r--fs/btrfs/xattr.h6
-rw-r--r--fs/btrfs/zlib.c73
-rw-r--r--fs/btrfs/zoned.c55
-rw-r--r--fs/btrfs/zoned.h15
-rw-r--r--fs/btrfs/zstd.c77
-rw-r--r--fs/buffer.c22
-rw-r--r--fs/cachefiles/cache.c2
-rw-r--r--fs/cachefiles/daemon.c1
-rw-r--r--fs/cachefiles/ondemand.c3
-rw-r--r--fs/ceph/caps.c74
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/locks.c74
-rw-r--r--fs/ceph/mds_client.c57
-rw-r--r--fs/ceph/mds_client.h7
-rw-r--r--fs/ceph/mdsmap.c7
-rw-r--r--fs/ceph/mdsmap.h6
-rw-r--r--fs/ceph/super.c18
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/coda/inode.c147
-rw-r--r--fs/coredump.c45
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/crypto/fname.c8
-rw-r--r--fs/crypto/fscrypt_private.h14
-rw-r--r--fs/crypto/hooks.c15
-rw-r--r--fs/crypto/keyring.c8
-rw-r--r--fs/crypto/keysetup.c11
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/dlm/plock.c44
-rw-r--r--fs/ecryptfs/crypto.c10
-rw-r--r--fs/efivarfs/internal.h2
-rw-r--r--fs/efivarfs/super.c7
-rw-r--r--fs/efivarfs/vars.c23
-rw-r--r--fs/efs/super.c118
-rw-r--r--fs/erofs/compress.h12
-rw-r--r--fs/erofs/data.c7
-rw-r--r--fs/erofs/decompressor.c8
-rw-r--r--fs/erofs/decompressor_deflate.c22
-rw-r--r--fs/erofs/decompressor_lzma.c20
-rw-r--r--fs/erofs/fscache.c304
-rw-r--r--fs/erofs/inode.c16
-rw-r--r--fs/erofs/internal.h11
-rw-r--r--fs/erofs/namei.c28
-rw-r--r--fs/erofs/super.c48
-rw-r--r--fs/erofs/utils.c4
-rw-r--r--fs/erofs/zdata.c361
-rw-r--r--fs/eventfd.c16
-rw-r--r--fs/eventpoll.c147
-rw-r--r--fs/exec.c53
-rw-r--r--fs/exfat/cache.c2
-rw-r--r--fs/exfat/exfat_fs.h1
-rw-r--r--fs/exfat/file.c37
-rw-r--r--fs/exfat/inode.c7
-rw-r--r--fs/exfat/nls.c14
-rw-r--r--fs/exfat/super.c22
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c124
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/fsmap.c8
-rw-r--r--fs/ext4/indirect.c2
-rw-r--r--fs/ext4/inode.c90
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/ext4/mballoc.c140
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/move_extent.c10
-rw-r--r--fs/ext4/namei.c1
-rw-r--r--fs/ext4/super.c60
-rw-r--r--fs/ext4/symlink.c8
-rw-r--r--fs/f2fs/f2fs.h14
-rw-r--r--fs/f2fs/namei.c1
-rw-r--r--fs/f2fs/segment.c15
-rw-r--r--fs/f2fs/super.c15
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/inode.c5
-rw-r--r--fs/fcntl.c72
-rw-r--r--fs/fhandle.c2
-rw-r--r--fs/file_table.c86
-rw-r--r--fs/freevxfs/vxfs_super.c2
-rw-r--r--fs/fs-writeback.c25
-rw-r--r--fs/fs_parser.c4
-rw-r--r--fs/fuse/cuse.c3
-rw-r--r--fs/fuse/file.c14
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/fuse/inode.c15
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/dentry.c23
-rw-r--r--fs/gfs2/file.c16
-rw-r--r--fs/gfs2/inode.c8
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h1
-rw-r--r--fs/hfsplus/super.c12
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hugetlbfs/inode.c44
-rw-r--r--fs/inode.c8
-rw-r--r--fs/internal.h8
-rw-r--r--fs/ioctl.c33
-rw-r--r--fs/iomap/buffered-io.c579
-rw-r--r--fs/iomap/direct-io.c1
-rw-r--r--fs/iomap/trace.h48
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jfs/jfs_dmap.c8
-rw-r--r--fs/jfs/jfs_logmgr.c26
-rw-r--r--fs/jfs/jfs_logmgr.h2
-rw-r--r--fs/jfs/jfs_mount.c2
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/kernfs/mount.c4
-rw-r--r--fs/libfs.c344
-rw-r--r--fs/lockd/clnt4xdr.c14
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c65
-rw-r--r--fs/lockd/clntxdr.c14
-rw-r--r--fs/lockd/svc.c3
-rw-r--r--fs/lockd/svc4proc.c10
-rw-r--r--fs/lockd/svclock.c64
-rw-r--r--fs/lockd/svcproc.c10
-rw-r--r--fs/lockd/svcsubs.c24
-rw-r--r--fs/lockd/xdr.c14
-rw-r--r--fs/lockd/xdr4.c14
-rw-r--r--fs/locks.c894
-rw-r--r--fs/mbcache.c4
-rw-r--r--fs/minix/inode.c2
-rw-r--r--fs/mnt_idmapping.c2
-rw-r--r--fs/mpage.c1
-rw-r--r--fs/namei.c26
-rw-r--r--fs/namespace.c11
-rw-r--r--fs/netfs/buffered_read.c12
-rw-r--r--fs/netfs/buffered_write.c18
-rw-r--r--fs/netfs/direct_write.c5
-rw-r--r--fs/netfs/fscache_cache.c3
-rw-r--r--fs/netfs/io.c4
-rw-r--r--fs/netfs/misc.c2
-rw-r--r--fs/nfs/blocklayout/blocklayout.h2
-rw-r--r--fs/nfs/blocklayout/dev.c68
-rw-r--r--fs/nfs/callback.c3
-rw-r--r--fs/nfs/client.c13
-rw-r--r--fs/nfs/delegation.c4
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/direct.c3
-rw-r--r--fs/nfs/file.c22
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs42xattr.c2
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c39
-rw-r--r--fs/nfs/nfs4state.c22
-rw-r--r--fs/nfs/nfs4trace.h4
-rw-r--r--fs/nfs/nfs4xdr.c8
-rw-r--r--fs/nfs/write.c8
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/nfsd/cache.h2
-rw-r--r--fs/nfsd/filecache.c80
-rw-r--r--fs/nfsd/filecache.h1
-rw-r--r--fs/nfsd/netns.h29
-rw-r--r--fs/nfsd/nfs3proc.c6
-rw-r--r--fs/nfsd/nfs3xdr.c5
-rw-r--r--fs/nfsd/nfs4callback.c193
-rw-r--r--fs/nfsd/nfs4layouts.c96
-rw-r--r--fs/nfsd/nfs4proc.c13
-rw-r--r--fs/nfsd/nfs4state.c987
-rw-r--r--fs/nfsd/nfs4xdr.c58
-rw-r--r--fs/nfsd/nfscache.c43
-rw-r--r--fs/nfsd/nfsctl.c17
-rw-r--r--fs/nfsd/nfsd.h3
-rw-r--r--fs/nfsd/nfsfh.c3
-rw-r--r--fs/nfsd/nfsproc.c6
-rw-r--r--fs/nfsd/nfssvc.c16
-rw-r--r--fs/nfsd/pnfs.h8
-rw-r--r--fs/nfsd/state.h83
-rw-r--r--fs/nfsd/stats.c52
-rw-r--r--fs/nfsd/stats.h70
-rw-r--r--fs/nfsd/trace.h212
-rw-r--r--fs/nfsd/vfs.c87
-rw-r--r--fs/nfsd/vfs.h4
-rw-r--r--fs/nfsd/xdr3.h2
-rw-r--r--fs/nfsd/xdr4cb.h18
-rw-r--r--fs/nilfs2/file.c8
-rw-r--r--fs/nilfs2/recovery.c7
-rw-r--r--fs/nilfs2/segment.c8
-rw-r--r--fs/nsfs.c121
-rw-r--r--fs/ntfs/Kconfig81
-rw-r--r--fs/ntfs/Makefile15
-rw-r--r--fs/ntfs/aops.c1744
-rw-r--r--fs/ntfs/aops.h88
-rw-r--r--fs/ntfs/attrib.c2624
-rw-r--r--fs/ntfs/attrib.h102
-rw-r--r--fs/ntfs/bitmap.c179
-rw-r--r--fs/ntfs/bitmap.h104
-rw-r--r--fs/ntfs/collate.c110
-rw-r--r--fs/ntfs/collate.h36
-rw-r--r--fs/ntfs/compress.c950
-rw-r--r--fs/ntfs/debug.c159
-rw-r--r--fs/ntfs/debug.h57
-rw-r--r--fs/ntfs/dir.c1540
-rw-r--r--fs/ntfs/dir.h34
-rw-r--r--fs/ntfs/endian.h79
-rw-r--r--fs/ntfs/file.c1997
-rw-r--r--fs/ntfs/index.c440
-rw-r--r--fs/ntfs/index.h134
-rw-r--r--fs/ntfs/inode.c3102
-rw-r--r--fs/ntfs/inode.h310
-rw-r--r--fs/ntfs/layout.h2421
-rw-r--r--fs/ntfs/lcnalloc.c1000
-rw-r--r--fs/ntfs/lcnalloc.h131
-rw-r--r--fs/ntfs/logfile.c849
-rw-r--r--fs/ntfs/logfile.h295
-rw-r--r--fs/ntfs/malloc.h77
-rw-r--r--fs/ntfs/mft.c2907
-rw-r--r--fs/ntfs/mft.h110
-rw-r--r--fs/ntfs/mst.c189
-rw-r--r--fs/ntfs/namei.c392
-rw-r--r--fs/ntfs/ntfs.h150
-rw-r--r--fs/ntfs/quota.c103
-rw-r--r--fs/ntfs/quota.h21
-rw-r--r--fs/ntfs/runlist.c1893
-rw-r--r--fs/ntfs/runlist.h88
-rw-r--r--fs/ntfs/super.c3202
-rw-r--r--fs/ntfs/sysctl.c58
-rw-r--r--fs/ntfs/sysctl.h27
-rw-r--r--fs/ntfs/time.h89
-rw-r--r--fs/ntfs/types.h55
-rw-r--r--fs/ntfs/unistr.c384
-rw-r--r--fs/ntfs/upcase.c73
-rw-r--r--fs/ntfs/usnjrnl.c70
-rw-r--r--fs/ntfs/usnjrnl.h191
-rw-r--r--fs/ntfs/volume.h164
-rw-r--r--fs/ntfs3/attrib.c45
-rw-r--r--fs/ntfs3/attrlist.c12
-rw-r--r--fs/ntfs3/bitmap.c4
-rw-r--r--fs/ntfs3/dir.c48
-rw-r--r--fs/ntfs3/file.c76
-rw-r--r--fs/ntfs3/frecord.c19
-rw-r--r--fs/ntfs3/fslog.c232
-rw-r--r--fs/ntfs3/fsntfs.c29
-rw-r--r--fs/ntfs3/index.c8
-rw-r--r--fs/ntfs3/inode.c32
-rw-r--r--fs/ntfs3/namei.c14
-rw-r--r--fs/ntfs3/ntfs.h4
-rw-r--r--fs/ntfs3/ntfs_fs.h29
-rw-r--r--fs/ntfs3/record.c18
-rw-r--r--fs/ntfs3/super.c56
-rw-r--r--fs/ntfs3/xattr.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c32
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c2
-rw-r--r--fs/ocfs2/locks.c12
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/super.c11
-rw-r--r--fs/open.c56
-rw-r--r--fs/openpromfs/inode.c2
-rw-r--r--fs/overlayfs/copy_up.c14
-rw-r--r--fs/overlayfs/namei.c43
-rw-r--r--fs/overlayfs/overlayfs.h23
-rw-r--r--fs/overlayfs/ovl_entry.h4
-rw-r--r--fs/overlayfs/params.c14
-rw-r--r--fs/overlayfs/readdir.c7
-rw-r--r--fs/overlayfs/super.c69
-rw-r--r--fs/overlayfs/util.c71
-rw-r--r--fs/pidfs.c290
-rw-r--r--fs/pipe.c81
-rw-r--r--fs/posix_acl.c9
-rw-r--r--fs/proc/array.c66
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/inode.c21
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/pstore/inode.c10
-rw-r--r--fs/pstore/ram.c1
-rw-r--r--fs/pstore/zone.c3
-rw-r--r--fs/qnx4/inode.c49
-rw-r--r--fs/qnx6/inode.c2
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/ramfs/inode.c32
-rw-r--r--fs/reiserfs/journal.c38
-rw-r--r--fs/reiserfs/procfs.c2
-rw-r--r--fs/reiserfs/reiserfs.h8
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/remap_range.c31
-rw-r--r--fs/romfs/super.c6
-rw-r--r--fs/select.c15
-rw-r--r--fs/smb/client/cached_dir.c25
-rw-r--r--fs/smb/client/cifsencrypt.c2
-rw-r--r--fs/smb/client/cifsfs.c27
-rw-r--r--fs/smb/client/cifsglob.h20
-rw-r--r--fs/smb/client/cifssmb.c8
-rw-r--r--fs/smb/client/connect.c25
-rw-r--r--fs/smb/client/dfs.c7
-rw-r--r--fs/smb/client/file.c100
-rw-r--r--fs/smb/client/fs_context.c13
-rw-r--r--fs/smb/client/inode.c8
-rw-r--r--fs/smb/client/namespace.c16
-rw-r--r--fs/smb/client/readdir.c17
-rw-r--r--fs/smb/client/sess.c7
-rw-r--r--fs/smb/client/smb2file.c2
-rw-r--r--fs/smb/client/smb2inode.c33
-rw-r--r--fs/smb/client/smb2ops.c155
-rw-r--r--fs/smb/client/smb2pdu.c305
-rw-r--r--fs/smb/client/smb2proto.h5
-rw-r--r--fs/smb/client/smbencrypt.c7
-rw-r--r--fs/smb/client/transport.c18
-rw-r--r--fs/smb/server/ksmbd_netlink.h3
-rw-r--r--fs/smb/server/misc.c1
-rw-r--r--fs/smb/server/smb2pdu.c52
-rw-r--r--fs/smb/server/transport_ipc.c4
-rw-r--r--fs/smb/server/transport_tcp.c2
-rw-r--r--fs/smb/server/vfs.c14
-rw-r--r--fs/super.c31
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/sysv/itree.c10
-rw-r--r--fs/tracefs/event_inode.c620
-rw-r--r--fs/tracefs/inode.c103
-rw-r--r--fs/tracefs/internal.h45
-rw-r--r--fs/ubifs/dir.c1
-rw-r--r--fs/ubifs/super.c7
-rw-r--r--fs/udf/super.c1
-rw-r--r--fs/ufs/super.c3
-rw-r--r--fs/vboxsf/super.c3
-rw-r--r--fs/verity/fsverity_private.h1
-rw-r--r--fs/verity/measure.c4
-rw-r--r--fs/verity/open.c1
-rw-r--r--fs/verity/verify.c48
-rw-r--r--fs/xattr.c9
-rw-r--r--fs/xfs/libxfs/xfs_attr.c6
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c14
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.h16
-rw-r--r--fs/xfs/libxfs/xfs_sb.c14
-rw-r--r--fs/xfs/libxfs/xfs_sb.h2
-rw-r--r--fs/xfs/libxfs/xfs_types.h12
-rw-r--r--fs/xfs/scrub/rtbitmap.c1
-rw-r--r--fs/xfs/scrub/rtsummary.c1
-rw-r--r--fs/xfs/xfs_aops.c9
-rw-r--r--fs/xfs/xfs_buf.c10
-rw-r--r--fs/xfs/xfs_buf.h4
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_super.c79
-rw-r--r--fs/zonefs/file.c45
-rw-r--r--fs/zonefs/super.c237
-rw-r--r--include/asm-generic/barrier.h2
-rw-r--r--include/asm-generic/word-at-a-time.h3
-rw-r--r--include/drm/bridge/aux-bridge.h15
-rw-r--r--include/drm/drm_gem_vram_helper.h16
-rw-r--r--include/dt-bindings/arm/qcom,ids.h5
-rw-r--r--include/dt-bindings/clock/exynos850.h2
-rw-r--r--include/dt-bindings/clock/google,gs101.h129
-rw-r--r--include/dt-bindings/clock/qcom,gcc-msm8953.h4
-rw-r--r--include/dt-bindings/clock/qcom,gcc-sc8180x.h2
-rw-r--r--include/dt-bindings/clock/qcom,x1e80100-camcc.h135
-rw-r--r--include/dt-bindings/clock/qcom,x1e80100-dispcc.h98
-rw-r--r--include/dt-bindings/clock/qcom,x1e80100-gpucc.h41
-rw-r--r--include/dt-bindings/clock/qcom,x1e80100-tcsr.h23
-rw-r--r--include/dt-bindings/clock/renesas,r8a779h0-cpg-mssr.h96
-rw-r--r--include/dt-bindings/clock/rockchip,rk3588-cru.h3
-rw-r--r--include/dt-bindings/mfd/stm32f7-rcc.h1
-rw-r--r--include/dt-bindings/power/renesas,r8a779h0-sysc.h49
-rw-r--r--include/dt-bindings/reset/qcom,x1e80100-gpucc.h19
-rw-r--r--include/kunit/test.h12
-rw-r--r--include/linux/amd-iommu.h6
-rw-r--r--include/linux/arm_ffa.h2
-rw-r--r--include/linux/async.h1
-rw-r--r--include/linux/atomic/atomic-arch-fallback.h46
-rw-r--r--include/linux/atomic/atomic-instrumented.h68
-rw-r--r--include/linux/atomic/atomic-long.h24
-rw-r--r--include/linux/backing-dev-defs.h7
-rw-r--r--include/linux/backing-dev.h1
-rw-r--r--include/linux/bitfield.h3
-rw-r--r--include/linux/bitmap.h116
-rw-r--r--include/linux/blk-integrity.h1
-rw-r--r--include/linux/blk-mq.h12
-rw-r--r--include/linux/blk_types.h44
-rw-r--r--include/linux/blkdev.h86
-rw-r--r--include/linux/bpf-cgroup.h3
-rw-r--r--include/linux/bpf.h202
-rw-r--r--include/linux/bpf_local_storage.h30
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/bpf_verifier.h16
-rw-r--r--include/linux/btf.h36
-rw-r--r--include/linux/btf_ids.h21
-rw-r--r--include/linux/bvec.h2
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/ceph/osd_client.h3
-rw-r--r--include/linux/clocksource.h14
-rw-r--r--include/linux/clocksource_ids.h3
-rw-r--r--include/linux/compiler-gcc.h22
-rw-r--r--include/linux/compiler.h41
-rw-r--r--include/linux/compiler_attributes.h16
-rw-r--r--include/linux/compiler_types.h20
-rw-r--r--include/linux/cper.h23
-rw-r--r--include/linux/cpu.h6
-rw-r--r--include/linux/cpuhotplug.h2
-rw-r--r--include/linux/cpumask.h16
-rw-r--r--include/linux/cpuset.h10
-rw-r--r--include/linux/cxl-event.h18
-rw-r--r--include/linux/dcache.h19
-rw-r--r--include/linux/device-mapper.h2
-rw-r--r--include/linux/dm-bufio.h7
-rw-r--r--include/linux/dm-io.h3
-rw-r--r--include/linux/dmaengine.h3
-rw-r--r--include/linux/dpll.h20
-rw-r--r--include/linux/dynamic_queue_limits.h45
-rw-r--r--include/linux/ethtool.h14
-rw-r--r--include/linux/evm.h117
-rw-r--r--include/linux/file.h2
-rw-r--r--include/linux/filelock.h129
-rw-r--r--include/linux/filter.h28
-rw-r--r--include/linux/fortify-string.h122
-rw-r--r--include/linux/framer/framer-provider.h15
-rw-r--r--include/linux/fs.h143
-rw-r--r--include/linux/fscrypt.h66
-rw-r--r--include/linux/gfp.h25
-rw-r--r--include/linux/gpio/driver.h18
-rw-r--r--include/linux/hid_bpf.h11
-rw-r--r--include/linux/hrtimer.h117
-rw-r--r--include/linux/hrtimer_defs.h104
-rw-r--r--include/linux/hyperv.h22
-rw-r--r--include/linux/ieee80211.h169
-rw-r--r--include/linux/if_tun.h16
-rw-r--r--include/linux/iio/adc/ad_sigma_delta.h4
-rw-r--r--include/linux/iio/common/st_sensors.h4
-rw-r--r--include/linux/iio/imu/adis.h3
-rw-r--r--include/linux/ima.h142
-rw-r--r--include/linux/indirect_call_wrapper.h2
-rw-r--r--include/linux/inet_diag.h1
-rw-r--r--include/linux/inetdevice.h14
-rw-r--r--include/linux/integrity.h27
-rw-r--r--include/linux/io.h7
-rw-r--r--include/linux/io_uring_types.h137
-rw-r--r--include/linux/iomap.h19
-rw-r--r--include/linux/iommu.h298
-rw-r--r--include/linux/ipv6.h14
-rw-r--r--include/linux/irq.h2
-rw-r--r--include/linux/irqdomain.h17
-rw-r--r--include/linux/irqdomain_defs.h2
-rw-r--r--include/linux/irqhandler.h2
-rw-r--r--include/linux/jiffies.h15
-rw-r--r--include/linux/kasan.h6
-rw-r--r--include/linux/kernel.h44
-rw-r--r--include/linux/kvm_host.h26
-rw-r--r--include/linux/libata.h2
-rw-r--r--include/linux/lockd/lockd.h8
-rw-r--r--include/linux/lockd/xdr.h2
-rw-r--r--include/linux/lsm_hook_defs.h39
-rw-r--r--include/linux/maple_tree.h7
-rw-r--r--include/linux/marvell_phy.h1
-rw-r--r--include/linux/mdio.h63
-rw-r--r--include/linux/memblock.h2
-rw-r--r--include/linux/mfd/idtRC38xxx_reg.h273
-rw-r--r--include/linux/mlx5/driver.h2
-rw-r--r--include/linux/mlx5/fs.h1
-rw-r--r--include/linux/mlx5/mlx5_ifc.h31
-rw-r--r--include/linux/mlx5/qp.h5
-rw-r--r--include/linux/mlx5/vport.h1
-rw-r--r--include/linux/mman.h1
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/module.h2
-rw-r--r--include/linux/msi.h24
-rw-r--r--include/linux/mutex.h8
-rw-r--r--include/linux/net.h5
-rw-r--r--include/linux/netdevice.h167
-rw-r--r--include/linux/netfilter.h2
-rw-r--r--include/linux/netfilter/ipset/ip_set.h4
-rw-r--r--include/linux/netlink.h2
-rw-r--r--include/linux/nfs_fs_sb.h2
-rw-r--r--include/linux/ns_common.h2
-rw-r--r--include/linux/nvme-rdma.h6
-rw-r--r--include/linux/nvme.h13
-rw-r--r--include/linux/objtool.h2
-rw-r--r--include/linux/overflow.h115
-rw-r--r--include/linux/pci.h10
-rw-r--r--include/linux/phy.h53
-rw-r--r--include/linux/phylink.h7
-rw-r--r--include/linux/pid.h10
-rw-r--r--include/linux/pidfs.h9
-rw-r--r--include/linux/pktcdvd.h4
-rw-r--r--include/linux/platform_data/brcmfmac.h2
-rw-r--r--include/linux/platform_data/mdio-bcm-unimac.h3
-rw-r--r--include/linux/platform_data/microchip-ksz.h1
-rw-r--r--include/linux/platform_data/net-cw1200.h4
-rw-r--r--include/linux/poison.h3
-rw-r--r--include/linux/poll.h4
-rw-r--r--include/linux/printk.h2
-rw-r--r--include/linux/proc_fs.h1
-rw-r--r--include/linux/proc_ns.h2
-rw-r--r--include/linux/psp-sev.h321
-rw-r--r--include/linux/pti.h2
-rw-r--r--include/linux/ptp_clock_kernel.h3
-rw-r--r--include/linux/ptp_kvm.h4
-rw-r--r--include/linux/ptrace.h4
-rw-r--r--include/linux/ras.h18
-rw-r--r--include/linux/rcu_sync.h1
-rw-r--r--include/linux/rcupdate.h4
-rw-r--r--include/linux/refcount.h9
-rw-r--r--include/linux/resctrl.h48
-rw-r--r--include/linux/rtnetlink.h3
-rw-r--r--include/linux/rw_hint.h24
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/sched/sd_flags.h4
-rw-r--r--include/linux/sched/signal.h2
-rw-r--r--include/linux/sched/topology.h12
-rw-r--r--include/linux/scmi_protocol.h21
-rw-r--r--include/linux/security.h102
-rw-r--r--include/linux/seq_buf.h17
-rw-r--r--include/linux/serial_core.h32
-rw-r--r--include/linux/skbuff.h141
-rw-r--r--include/linux/skmsg.h6
-rw-r--r--include/linux/slab.h94
-rw-r--r--include/linux/smp.h17
-rw-r--r--include/linux/soc/andes/irq.h18
-rw-r--r--include/linux/soc/qcom/apr.h2
-rw-r--r--include/linux/soc/qcom/qcom-pbs.h30
-rw-r--r--include/linux/soc/samsung/exynos-pmu.h11
-rw-r--r--include/linux/sock_diag.h10
-rw-r--r--include/linux/spi/spi.h2
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/linux/string.h87
-rw-r--r--include/linux/string_choices.h11
-rw-r--r--include/linux/string_helpers.h10
-rw-r--r--include/linux/sunrpc/svc.h5
-rw-r--r--include/linux/sunrpc/svc_rdma.h55
-rw-r--r--include/linux/swap.h7
-rw-r--r--include/linux/swiotlb.h3
-rw-r--r--include/linux/syscalls.h1
-rw-r--r--include/linux/tcp.h16
-rw-r--r--include/linux/tee_drv.h2
-rw-r--r--include/linux/tick.h28
-rw-r--r--include/linux/timekeeping.h10
-rw-r--r--include/linux/timer.h16
-rw-r--r--include/linux/trace_seq.h8
-rw-r--r--include/linux/udp.h10
-rw-r--r--include/linux/uio.h16
-rw-r--r--include/linux/units.h5
-rw-r--r--include/linux/usb/gadget.h1
-rw-r--r--include/linux/usb/hcd.h2
-rw-r--r--include/linux/vmalloc.h5
-rw-r--r--include/linux/wordpart.h42
-rw-r--r--include/linux/workqueue.h141
-rw-r--r--include/linux/wwan.h2
-rw-r--r--include/net/act_api.h2
-rw-r--r--include/net/addrconf.h7
-rw-r--r--include/net/af_unix.h42
-rw-r--r--include/net/bluetooth/bluetooth.h2
-rw-r--r--include/net/bluetooth/hci.h19
-rw-r--r--include/net/bluetooth/hci_core.h37
-rw-r--r--include/net/bluetooth/hci_sync.h22
-rw-r--r--include/net/bluetooth/l2cap.h44
-rw-r--r--include/net/bond_3ad.h2
-rw-r--r--include/net/bond_options.h1
-rw-r--r--include/net/bonding.h23
-rw-r--r--include/net/busy_poll.h4
-rw-r--r--include/net/cfg80211.h142
-rw-r--r--include/net/cfg802154.h1
-rw-r--r--include/net/dropreason-core.h26
-rw-r--r--include/net/dsa.h4
-rw-r--r--include/net/dst.h1
-rw-r--r--include/net/eee.h38
-rw-r--r--include/net/genetlink.h6
-rw-r--r--include/net/gro.h46
-rw-r--r--include/net/hotdata.h52
-rw-r--r--include/net/if_inet6.h4
-rw-r--r--include/net/inet_connection_sock.h8
-rw-r--r--include/net/inet_sock.h6
-rw-r--r--include/net/ioam6.h4
-rw-r--r--include/net/ip.h2
-rw-r--r--include/net/ip6_fib.h52
-rw-r--r--include/net/ip6_route.h5
-rw-r--r--include/net/ip_fib.h1
-rw-r--r--include/net/ip_tunnels.h3
-rw-r--r--include/net/ipv6.h8
-rw-r--r--include/net/llc_pdu.h6
-rw-r--r--include/net/mac80211.h163
-rw-r--r--include/net/mctp.h7
-rw-r--r--include/net/net_namespace.h5
-rw-r--r--include/net/netdev_queues.h56
-rw-r--r--include/net/netfilter/nf_flow_table.h2
-rw-r--r--include/net/netfilter/nf_queue.h1
-rw-r--r--include/net/netfilter/nf_tables.h73
-rw-r--r--include/net/netfilter/nf_tables_core.h2
-rw-r--r--include/net/netlabel.h7
-rw-r--r--include/net/netmem.h41
-rw-r--r--include/net/nexthop.h34
-rw-r--r--include/net/nfc/nfc.h2
-rw-r--r--include/net/page_pool/types.h13
-rw-r--r--include/net/pkt_cls.h2
-rw-r--r--include/net/pkt_sched.h2
-rw-r--r--include/net/protocol.h3
-rw-r--r--include/net/request_sock.h39
-rw-r--r--include/net/route.h7
-rw-r--r--include/net/rps.h125
-rw-r--r--include/net/rtnetlink.h1
-rw-r--r--include/net/sch_generic.h11
-rw-r--r--include/net/scm.h1
-rw-r--r--include/net/sctp/structs.h5
-rw-r--r--include/net/sock.h191
-rw-r--r--include/net/switchdev.h3
-rw-r--r--include/net/tc_wrapper.h2
-rw-r--r--include/net/tcp.h53
-rw-r--r--include/net/tls.h5
-rw-r--r--include/net/xdp_sock_drv.h27
-rw-r--r--include/net/xfrm.h14
-rw-r--r--include/scsi/scsi_device.h5
-rw-r--r--include/soc/qcom/qcom-spmi-pmic.h2
-rw-r--r--include/soc/qcom/spm.h23
-rw-r--r--include/soc/tegra/fuse.h1
-rw-r--r--include/soc/tegra/pmc.h18
-rw-r--r--include/sound/cs35l56.h7
-rw-r--r--include/sound/soc-card.h2
-rw-r--r--include/sound/tas2781.h1
-rw-r--r--include/trace/events/afs.h29
-rw-r--r--include/trace/events/ext4.h11
-rw-r--r--include/trace/events/filelock.h102
-rw-r--r--include/trace/events/io_uring.h30
-rw-r--r--include/trace/events/napi.h33
-rw-r--r--include/trace/events/qdisc.h20
-rw-r--r--include/trace/events/rpcrdma.h4
-rw-r--r--include/trace/events/rxrpc.h206
-rw-r--r--include/trace/events/tcp.h16
-rw-r--r--include/trace/events/timer_migration.h298
-rw-r--r--include/trace/misc/nfs.h34
-rw-r--r--include/uapi/drm/ivpu_accel.h26
-rw-r--r--include/uapi/drm/nouveau_drm.h14
-rw-r--r--include/uapi/drm/xe_drm.h22
-rw-r--r--include/uapi/linux/bpf.h122
-rw-r--r--include/uapi/linux/btrfs.h4
-rw-r--r--include/uapi/linux/can.h9
-rw-r--r--include/uapi/linux/can/isotp.h1
-rw-r--r--include/uapi/linux/can/raw.h16
-rw-r--r--include/uapi/linux/devlink.h5
-rw-r--r--include/uapi/linux/dpll.h30
-rw-r--r--include/uapi/linux/ethtool.h48
-rw-r--r--include/uapi/linux/eventpoll.h13
-rw-r--r--include/uapi/linux/fs.h30
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/iio/types.h4
-rw-r--r--include/uapi/linux/in6.h2
-rw-r--r--include/uapi/linux/io_uring.h13
-rw-r--r--include/uapi/linux/ioam6_genl.h20
-rw-r--r--include/uapi/linux/iommu.h161
-rw-r--r--include/uapi/linux/lsm.h2
-rw-r--r--include/uapi/linux/magic.h1
-rw-r--r--include/uapi/linux/mctp.h32
-rw-r--r--include/uapi/linux/mdio.h4
-rw-r--r--include/uapi/linux/netdev.h20
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h8
-rw-r--r--include/uapi/linux/nexthop.h45
-rw-r--r--include/uapi/linux/nl80211.h71
-rw-r--r--include/uapi/linux/pidfd.h8
-rw-r--r--include/uapi/linux/psp-sev.h59
-rw-r--r--include/uapi/linux/ptp_clock.h13
-rw-r--r--include/uapi/linux/serial.h13
-rw-r--r--include/uapi/linux/tc_act/tc_pedit.h2
-rw-r--r--include/uapi/linux/ublk_cmd.h2
-rw-r--r--include/uapi/sound/asound.h4
-rw-r--r--include/uapi/xen/gntalloc.h5
-rw-r--r--include/vdso/datapage.h14
-rw-r--r--include/vdso/helpers.h8
-rw-r--r--init/Kconfig34
-rw-r--r--init/do_mounts.c3
-rw-r--r--init/do_mounts.h9
-rw-r--r--init/init_task.c1
-rw-r--r--init/initramfs.c8
-rw-r--r--init/main.c8
-rw-r--r--io_uring/Makefile3
-rw-r--r--io_uring/cancel.c3
-rw-r--r--io_uring/cancel.h10
-rw-r--r--io_uring/fdinfo.c18
-rw-r--r--io_uring/filetable.h2
-rw-r--r--io_uring/io_uring.c249
-rw-r--r--io_uring/io_uring.h85
-rw-r--r--io_uring/kbuf.c35
-rw-r--r--io_uring/kbuf.h61
-rw-r--r--io_uring/napi.c332
-rw-r--r--io_uring/napi.h104
-rw-r--r--io_uring/net.c419
-rw-r--r--io_uring/opdef.c11
-rw-r--r--io_uring/openclose.c4
-rw-r--r--io_uring/poll.c82
-rw-r--r--io_uring/poll.h9
-rw-r--r--io_uring/register.c13
-rw-r--r--io_uring/rsrc.h2
-rw-r--r--io_uring/rw.c23
-rw-r--r--io_uring/sqpoll.c59
-rw-r--r--io_uring/sqpoll.h1
-rw-r--r--io_uring/truncate.c48
-rw-r--r--io_uring/truncate.h4
-rw-r--r--io_uring/uring_cmd.c1
-rw-r--r--io_uring/xattr.c2
-rw-r--r--kernel/async.c17
-rw-r--r--kernel/audit.c4
-rw-r--r--kernel/auditfilter.c2
-rw-r--r--kernel/backtracetest.c18
-rw-r--r--kernel/bpf/Kconfig1
-rw-r--r--kernel/bpf/Makefile5
-rw-r--r--kernel/bpf/arena.c558
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/bpf_iter.c4
-rw-r--r--kernel/bpf/bpf_local_storage.c52
-rw-r--r--kernel/bpf/bpf_lsm.c21
-rw-r--r--kernel/bpf/bpf_struct_ops.c737
-rw-r--r--kernel/bpf/bpf_struct_ops_types.h12
-rw-r--r--kernel/bpf/btf.c566
-rw-r--r--kernel/bpf/cgroup.c11
-rw-r--r--kernel/bpf/core.c46
-rw-r--r--kernel/bpf/cpumap.c6
-rw-r--r--kernel/bpf/cpumask.c4
-rw-r--r--kernel/bpf/devmap.c11
-rw-r--r--kernel/bpf/disasm.c14
-rw-r--r--kernel/bpf/hashtab.c14
-rw-r--r--kernel/bpf/helpers.c28
-rw-r--r--kernel/bpf/inode.c276
-rw-r--r--kernel/bpf/log.c65
-rw-r--r--kernel/bpf/lpm_trie.c20
-rw-r--r--kernel/bpf/map_iter.c4
-rw-r--r--kernel/bpf/stackmap.c9
-rw-r--r--kernel/bpf/syscall.c298
-rw-r--r--kernel/bpf/task_iter.c2
-rw-r--r--kernel/bpf/token.c278
-rw-r--r--kernel/bpf/trampoline.c4
-rw-r--r--kernel/bpf/verifier.c749
-rw-r--r--kernel/cgroup/cpuset.c9
-rw-r--r--kernel/cgroup/rstat.c4
-rw-r--r--kernel/configs/debug.config6
-rw-r--r--kernel/configs/hardening.config7
-rw-r--r--kernel/context_tracking.c4
-rw-r--r--kernel/cpu.c26
-rw-r--r--kernel/cred.c4
-rw-r--r--kernel/dma/direct.c9
-rw-r--r--kernel/dma/swiotlb.c46
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/exit.c41
-rw-r--r--kernel/fork.c149
-rw-r--r--kernel/futex/core.c15
-rw-r--r--kernel/futex/pi.c11
-rw-r--r--kernel/irq/irq_sim.c28
-rw-r--r--kernel/irq/irqdesc.c112
-rw-r--r--kernel/irq/irqdomain.c28
-rw-r--r--kernel/irq/manage.c109
-rw-r--r--kernel/irq/matrix.c28
-rw-r--r--kernel/irq/msi.c184
-rw-r--r--kernel/kprobes.c4
-rw-r--r--kernel/locking/percpu-rwsem.c11
-rw-r--r--kernel/locking/qspinlock_paravirt.h2
-rw-r--r--kernel/locking/rtmutex.c9
-rw-r--r--kernel/locking/rwsem.c6
-rw-r--r--kernel/nsproxy.c2
-rw-r--r--kernel/panic.c8
-rw-r--r--kernel/pid.c57
-rw-r--r--kernel/power/swap.c28
-rw-r--r--kernel/printk/nbcon.c41
-rw-r--r--kernel/printk/printk.c112
-rw-r--r--kernel/printk/printk_ringbuffer.c335
-rw-r--r--kernel/printk/printk_ringbuffer.h54
-rw-r--r--kernel/rcu/Kconfig13
-rw-r--r--kernel/rcu/rcu.h19
-rw-r--r--kernel/rcu/rcuscale.c6
-rw-r--r--kernel/rcu/rcutorture.c13
-rw-r--r--kernel/rcu/srcutree.c24
-rw-r--r--kernel/rcu/sync.c16
-rw-r--r--kernel/rcu/tasks.h135
-rw-r--r--kernel/rcu/tiny.c1
-rw-r--r--kernel/rcu/tree.c271
-rw-r--r--kernel/rcu/tree.h20
-rw-r--r--kernel/rcu/tree_exp.h86
-rw-r--r--kernel/rcu/tree_nocb.h69
-rw-r--r--kernel/rcu/tree_plugin.h52
-rw-r--r--kernel/sched/core.c21
-rw-r--r--kernel/sched/fair.c110
-rw-r--r--kernel/sched/idle.c22
-rw-r--r--kernel/sched/membarrier.c6
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/sched/topology.c35
-rw-r--r--kernel/signal.c110
-rw-r--r--kernel/softirq.c5
-rw-r--r--kernel/sys.c50
-rw-r--r--kernel/time/Kconfig5
-rw-r--r--kernel/time/Makefile3
-rw-r--r--kernel/time/clockevents.c2
-rw-r--r--kernel/time/clocksource-wdtest.c13
-rw-r--r--kernel/time/clocksource.c37
-rw-r--r--kernel/time/hrtimer.c28
-rw-r--r--kernel/time/tick-common.c31
-rw-r--r--kernel/time/tick-internal.h16
-rw-r--r--kernel/time/tick-sched.c366
-rw-r--r--kernel/time/tick-sched.h42
-rw-r--r--kernel/time/time_test.c2
-rw-r--r--kernel/time/timekeeping.c33
-rw-r--r--kernel/time/timer.c571
-rw-r--r--kernel/time/timer_list.c10
-rw-r--r--kernel/time/timer_migration.c1793
-rw-r--r--kernel/time/timer_migration.h140
-rw-r--r--kernel/trace/bpf_trace.c27
-rw-r--r--kernel/trace/fprobe.c14
-rw-r--r--kernel/trace/ftrace.c10
-rw-r--r--kernel/trace/ring_buffer.c177
-rw-r--r--kernel/trace/trace.c111
-rw-r--r--kernel/trace/trace_btf.c4
-rw-r--r--kernel/trace/trace_events_synth.c3
-rw-r--r--kernel/trace/trace_events_trigger.c6
-rw-r--r--kernel/trace/trace_osnoise.c6
-rw-r--r--kernel/trace/trace_output.c6
-rw-r--r--kernel/trace/trace_probe.c32
-rw-r--r--kernel/trace/trace_probe.h3
-rw-r--r--kernel/trace/tracing_map.c7
-rw-r--r--kernel/workqueue.c1849
-rw-r--r--lib/Kconfig.debug41
-rw-r--r--lib/Kconfig.ubsan28
-rw-r--r--lib/Makefile9
-rw-r--r--lib/bitmap.c7
-rw-r--r--lib/checksum_kunit.c17
-rw-r--r--lib/cmdline_kunit.c2
-rw-r--r--lib/dump_stack.c16
-rw-r--r--lib/dynamic_queue_limits.c74
-rw-r--r--lib/fortify_kunit.c662
-rw-r--r--lib/iov_iter.c83
-rw-r--r--lib/kobject.c24
-rw-r--r--lib/kunit/device-impl.h2
-rw-r--r--lib/kunit/device.c24
-rw-r--r--lib/kunit/executor.c10
-rw-r--r--lib/kunit/executor_test.c2
-rw-r--r--lib/kunit/kunit-test.c2
-rw-r--r--lib/kunit/test.c17
-rw-r--r--lib/livepatch/Makefile14
-rw-r--r--lib/maple_tree.c93
-rw-r--r--lib/memcpy_kunit.c4
-rw-r--r--lib/nlattr.c4
-rw-r--r--lib/overflow_kunit.c67
-rw-r--r--lib/raid6/s390vx.uc62
-rw-r--r--lib/seq_buf.c49
-rw-r--r--lib/stackdepot.c523
-rw-r--r--lib/stackinit_kunit.c19
-rw-r--r--lib/string.c23
-rw-r--r--lib/string_helpers.c89
-rw-r--r--lib/string_helpers_kunit.c (renamed from lib/test-string_helpers.c)255
-rw-r--r--lib/string_kunit.c199
-rw-r--r--lib/test_bitmap.c42
-rw-r--r--lib/test_blackhole_dev.c3
-rw-r--r--lib/test_maple_tree.c44
-rw-r--r--lib/test_string.c257
-rw-r--r--lib/test_ubsan.c41
-rw-r--r--lib/ubsan.c68
-rw-r--r--lib/ubsan.h4
-rw-r--r--mm/Kconfig.debug6
-rw-r--r--mm/backing-dev.c27
-rw-r--r--mm/compaction.c7
-rw-r--r--mm/damon/core.c15
-rw-r--r--mm/damon/lru_sort.c43
-rw-r--r--mm/damon/reclaim.c18
-rw-r--r--mm/damon/sysfs-schemes.c6
-rw-r--r--mm/debug_vm_pgtable.c8
-rw-r--r--mm/filemap.c60
-rw-r--r--mm/huge_memory.c18
-rw-r--r--mm/kasan/common.c8
-rw-r--r--mm/kasan/generic.c93
-rw-r--r--mm/kasan/kasan.h10
-rw-r--r--mm/kasan/quarantine.c5
-rw-r--r--mm/madvise.c1
-rw-r--r--mm/memblock.c9
-rw-r--r--mm/memcontrol.c95
-rw-r--r--mm/memory-failure.c5
-rw-r--r--mm/memory.c26
-rw-r--r--mm/migrate.c8
-rw-r--r--mm/mmap.c16
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c32
-rw-r--r--mm/readahead.c4
-rw-r--r--mm/shmem.c8
-rw-r--r--mm/slab.h11
-rw-r--r--mm/slab_common.c29
-rw-r--r--mm/slub.c116
-rw-r--r--mm/swap.h5
-rw-r--r--mm/swap_state.c10
-rw-r--r--mm/swapfile.c35
-rw-r--r--mm/userfaultfd.c35
-rw-r--r--mm/util.c17
-rw-r--r--mm/vmalloc.c83
-rw-r--r--mm/vmscan.c5
-rw-r--r--mm/zswap.c27
-rw-r--r--net/6lowpan/core.c1
-rw-r--r--net/8021q/vlan_dev.c30
-rw-r--r--net/8021q/vlan_netlink.c4
-rw-r--r--net/8021q/vlanproc.c46
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile2
-rw-r--r--net/atm/mpc.c1
-rw-r--r--net/batman-adv/distributed-arp-table.c3
-rw-r--r--net/batman-adv/main.c14
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c3
-rw-r--r--net/batman-adv/netlink.c1
-rw-r--r--net/bluetooth/6lowpan.c4
-rw-r--r--net/bluetooth/Kconfig8
-rw-r--r--net/bluetooth/Makefile1
-rw-r--r--net/bluetooth/a2mp.c1054
-rw-r--r--net/bluetooth/a2mp.h154
-rw-r--r--net/bluetooth/af_bluetooth.c10
-rw-r--r--net/bluetooth/amp.c590
-rw-r--r--net/bluetooth/amp.h60
-rw-r--r--net/bluetooth/bnep/core.c5
-rw-r--r--net/bluetooth/eir.c29
-rw-r--r--net/bluetooth/hci_conn.c200
-rw-r--r--net/bluetooth/hci_core.c177
-rw-r--r--net/bluetooth/hci_event.c249
-rw-r--r--net/bluetooth/hci_request.c2
-rw-r--r--net/bluetooth/hci_sock.c4
-rw-r--r--net/bluetooth/hci_sync.c440
-rw-r--r--net/bluetooth/iso.c104
-rw-r--r--net/bluetooth/l2cap_core.c1087
-rw-r--r--net/bluetooth/l2cap_sock.c21
-rw-r--r--net/bluetooth/mgmt.c124
-rw-r--r--net/bluetooth/msft.c3
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/sco.c3
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c36
-rw-r--r--net/bpf/test_run.c12
-rw-r--r--net/bridge/br.c15
-rw-r--r--net/bridge/br_device.c27
-rw-r--r--net/bridge/br_fdb.c5
-rw-r--r--net/bridge/br_multicast.c20
-rw-r--r--net/bridge/br_netfilter_hooks.c96
-rw-r--r--net/bridge/br_netlink.c3
-rw-r--r--net/bridge/br_private.h4
-rw-r--r--net/bridge/br_switchdev.c84
-rw-r--r--net/bridge/br_vlan.c4
-rw-r--r--net/bridge/netfilter/Kconfig7
-rw-r--r--net/bridge/netfilter/Makefile2
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c30
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/bcm.c69
-rw-r--r--net/can/isotp.c5
-rw-r--r--net/can/j1939/j1939-priv.h3
-rw-r--r--net/can/j1939/main.c2
-rw-r--r--net/can/j1939/socket.c46
-rw-r--r--net/can/raw.c104
-rw-r--r--net/ceph/messenger_v1.c33
-rw-r--r--net/ceph/messenger_v2.c7
-rw-r--r--net/ceph/osd_client.c27
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c447
-rw-r--r--net/core/dev.h10
-rw-r--r--net/core/dst.c6
-rw-r--r--net/core/filter.c217
-rw-r--r--net/core/gro.c40
-rw-r--r--net/core/gro_cells.c3
-rw-r--r--net/core/gso.c4
-rw-r--r--net/core/gso_test.c2
-rw-r--r--net/core/hotdata.c22
-rw-r--r--net/core/link_watch.c13
-rw-r--r--net/core/net-procfs.c55
-rw-r--r--net/core/net-sysfs.c137
-rw-r--r--net/core/net_namespace.c33
-rw-r--r--net/core/netdev-genl-gen.c12
-rw-r--r--net/core/netdev-genl-gen.h2
-rw-r--r--net/core/netdev-genl.c227
-rw-r--r--net/core/page_pool.c64
-rw-r--r--net/core/page_pool_user.c5
-rw-r--r--net/core/request_sock.c3
-rw-r--r--net/core/rtnetlink.c160
-rw-r--r--net/core/scm.c5
-rw-r--r--net/core/skbuff.c196
-rw-r--r--net/core/skmsg.c7
-rw-r--r--net/core/sock.c116
-rw-r--r--net/core/sock_diag.c120
-rw-r--r--net/core/sysctl_net_core.c34
-rw-r--r--net/core/xdp.c15
-rw-r--r--net/dccp/ackvec.c8
-rw-r--r--net/dccp/diag.c1
-rw-r--r--net/devlink/core.c24
-rw-r--r--net/devlink/netlink_gen.c2
-rw-r--r--net/devlink/port.c4
-rw-r--r--net/dsa/dsa.c7
-rw-r--r--net/dsa/tag_sja1105.c4
-rw-r--r--net/dsa/user.c28
-rw-r--r--net/ethtool/eee.c62
-rw-r--r--net/ethtool/ioctl.c60
-rw-r--r--net/ethtool/netlink.c14
-rw-r--r--net/handshake/handshake-test.c5
-rw-r--r--net/hsr/hsr_device.c34
-rw-r--r--net/hsr/hsr_forward.c4
-rw-r--r--net/ieee802154/6lowpan/core.c3
-rw-r--r--net/ieee802154/socket.c1
-rw-r--r--net/ieee802154/sysfs.c2
-rw-r--r--net/ieee802154/sysfs.h2
-rw-r--r--net/ipv4/af_inet.c62
-rw-r--r--net/ipv4/ah4.c1
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/bpf_tcp_ca.c26
-rw-r--r--net/ipv4/cipso_ipv4.c5
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c324
-rw-r--r--net/ipv4/esp4.c1
-rw-r--r--net/ipv4/fib_frontend.c51
-rw-r--r--net/ipv4/fib_trie.c6
-rw-r--r--net/ipv4/fou_bpf.c4
-rw-r--r--net/ipv4/fou_core.c2
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c11
-rw-r--r--net/ipv4/inet_diag.c101
-rw-r--r--net/ipv4/inet_hashtables.c28
-rw-r--r--net/ipv4/inetpeer.c5
-rw-r--r--net/ipv4/ip_gre.c25
-rw-r--r--net/ipv4/ip_output.c28
-rw-r--r--net/ipv4/ip_sockglue.c19
-rw-r--r--net/ipv4/ip_tunnel.c82
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ip_vti.c9
-rw-r--r--net/ipv4/ipip.c9
-rw-r--r--net/ipv4/ipmr.c15
-rw-r--r--net/ipv4/netfilter/Kconfig44
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/nexthop.c367
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c22
-rw-r--r--net/ipv4/raw_diag.c1
-rw-r--r--net/ipv4/route.c9
-rw-r--r--net/ipv4/syncookies.c61
-rw-r--r--net/ipv4/tcp.c25
-rw-r--r--net/ipv4/tcp_ao.c2
-rw-r--r--net/ipv4/tcp_bbr.c4
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--net/ipv4/tcp_cubic.c4
-rw-r--r--net/ipv4/tcp_dctcp.c4
-rw-r--r--net/ipv4/tcp_diag.c1
-rw-r--r--net/ipv4/tcp_input.c51
-rw-r--r--net/ipv4/tcp_ipv4.c17
-rw-r--r--net/ipv4/tcp_minisocks.c10
-rw-r--r--net/ipv4/tcp_offload.c36
-rw-r--r--net/ipv4/tunnel4.c1
-rw-r--r--net/ipv4/udp.c19
-rw-r--r--net/ipv4/udp_diag.c2
-rw-r--r--net/ipv4/udp_offload.c17
-rw-r--r--net/ipv4/udp_tunnel_core.c1
-rw-r--r--net/ipv4/xfrm4_input.c2
-rw-r--r--net/ipv4/xfrm4_tunnel.c1
-rw-r--r--net/ipv6/addrconf.c804
-rw-r--r--net/ipv6/addrconf_core.c21
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/ah6.c1
-rw-r--r--net/ipv6/anycast.c61
-rw-r--r--net/ipv6/calipso.c5
-rw-r--r--net/ipv6/esp6.c1
-rw-r--r--net/ipv6/exthdrs.c44
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/ipv6/inet6_hashtables.c8
-rw-r--r--net/ipv6/ioam6.c72
-rw-r--r--net/ipv6/ip6_fib.c92
-rw-r--r--net/ipv6/ip6_gre.c14
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_offload.c18
-rw-r--r--net/ipv6/ip6_output.c25
-rw-r--r--net/ipv6/ip6_tunnel.c46
-rw-r--r--net/ipv6/ip6_udp_tunnel.c1
-rw-r--r--net/ipv6/ip6_vti.c13
-rw-r--r--net/ipv6/ip6mr.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c8
-rw-r--r--net/ipv6/mcast.c15
-rw-r--r--net/ipv6/mip6.c1
-rw-r--r--net/ipv6/ndisc.c84
-rw-r--r--net/ipv6/netfilter/Kconfig20
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c4
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c4
-rw-r--r--net/ipv6/output_core.c4
-rw-r--r--net/ipv6/raw.c22
-rw-r--r--net/ipv6/reassembly.c4
-rw-r--r--net/ipv6/route.c68
-rw-r--r--net/ipv6/seg6.c20
-rw-r--r--net/ipv6/seg6_hmac.c8
-rw-r--r--net/ipv6/sit.c28
-rw-r--r--net/ipv6/syncookies.c31
-rw-r--r--net/ipv6/tcp_ipv6.c39
-rw-r--r--net/ipv6/tcpv6_offload.c16
-rw-r--r--net/ipv6/tunnel6.c1
-rw-r--r--net/ipv6/udp.c24
-rw-r--r--net/ipv6/udp_offload.c21
-rw-r--r--net/ipv6/xfrm6_input.c2
-rw-r--r--net/ipv6/xfrm6_tunnel.c6
-rw-r--r--net/iucv/af_iucv.c10
-rw-r--r--net/iucv/iucv.c19
-rw-r--r--net/kcm/kcmsock.c18
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/llc/af_llc.c26
-rw-r--r--net/llc/llc_core.c7
-rw-r--r--net/mac80211/Kconfig1
-rw-r--r--net/mac80211/Makefile2
-rw-r--r--net/mac80211/agg-tx.c2
-rw-r--r--net/mac80211/cfg.c388
-rw-r--r--net/mac80211/chan.c708
-rw-r--r--net/mac80211/debug.h18
-rw-r--r--net/mac80211/debugfs.c3
-rw-r--r--net/mac80211/debugfs_netdev.c4
-rw-r--r--net/mac80211/debugfs_netdev.h5
-rw-r--r--net/mac80211/driver-ops.c14
-rw-r--r--net/mac80211/driver-ops.h27
-rw-r--r--net/mac80211/ht.c6
-rw-r--r--net/mac80211/ibss.c55
-rw-r--r--net/mac80211/ieee80211_i.h205
-rw-r--r--net/mac80211/iface.c38
-rw-r--r--net/mac80211/key.c20
-rw-r--r--net/mac80211/link.c15
-rw-r--r--net/mac80211/main.c231
-rw-r--r--net/mac80211/mesh.c162
-rw-r--r--net/mac80211/mesh.h3
-rw-r--r--net/mac80211/mesh_pathtbl.c6
-rw-r--r--net/mac80211/mesh_plink.c28
-rw-r--r--net/mac80211/mlme.c3150
-rw-r--r--net/mac80211/ocb.c5
-rw-r--r--net/mac80211/offchannel.c21
-rw-r--r--net/mac80211/parse.c971
-rw-r--r--net/mac80211/rate.c17
-rw-r--r--net/mac80211/rx.c53
-rw-r--r--net/mac80211/scan.c90
-rw-r--r--net/mac80211/spectmgmt.c337
-rw-r--r--net/mac80211/sta_info.c28
-rw-r--r--net/mac80211/sta_info.h20
-rw-r--r--net/mac80211/tdls.c73
-rw-r--r--net/mac80211/tests/elems.c5
-rw-r--r--net/mac80211/trace.h201
-rw-r--r--net/mac80211/trace_msg.h2
-rw-r--r--net/mac80211/tx.c74
-rw-r--r--net/mac80211/util.c1794
-rw-r--r--net/mac80211/vht.c52
-rw-r--r--net/mac80211/wbrf.c2
-rw-r--r--net/mac80211/wpa.c33
-rw-r--r--net/mac802154/llsec.c18
-rw-r--r--net/mctp/Kconfig1
-rw-r--r--net/mctp/af_mctp.c117
-rw-r--r--net/mctp/route.c117
-rw-r--r--net/mctp/test/route-test.c413
-rw-r--r--net/mctp/test/utils.c2
-rw-r--r--net/mpls/af_mpls.c4
-rw-r--r--net/mpls/mpls_gso.c3
-rw-r--r--net/mpls/mpls_iptunnel.c2
-rw-r--r--net/mptcp/diag.c12
-rw-r--r--net/mptcp/fastopen.c6
-rw-r--r--net/mptcp/mptcp_diag.c2
-rw-r--r--net/mptcp/mptcp_pm_gen.c7
-rw-r--r--net/mptcp/mptcp_pm_gen.h2
-rw-r--r--net/mptcp/options.c31
-rw-r--r--net/mptcp/pm.c29
-rw-r--r--net/mptcp/pm_netlink.c184
-rw-r--r--net/mptcp/pm_userspace.c259
-rw-r--r--net/mptcp/protocol.c193
-rw-r--r--net/mptcp/protocol.h133
-rw-r--r--net/mptcp/sockopt.c73
-rw-r--r--net/mptcp/subflow.c98
-rw-r--r--net/mptcp/token_test.c7
-rw-r--r--net/netfilter/Kconfig12
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h14
-rw-r--r--net/netfilter/ipset/ip_set_core.c39
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h19
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
-rw-r--r--net/netfilter/nf_bpf_link.c2
-rw-r--r--net/netfilter/nf_conncount.c8
-rw-r--r--net/netfilter/nf_conntrack_bpf.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c12
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c10
-rw-r--r--net/netfilter/nf_flow_table_core.c17
-rw-r--r--net/netfilter/nf_log.c16
-rw-r--r--net/netfilter/nf_nat_bpf.c4
-rw-r--r--net/netfilter/nf_nat_core.c5
-rw-r--r--net/netfilter/nf_queue.c106
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c161
-rw-r--r--net/netfilter/nf_tables_core.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c155
-rw-r--r--net/netfilter/nft_chain_filter.c11
-rw-r--r--net/netfilter/nft_compat.c49
-rw-r--r--net/netfilter/nft_ct.c30
-rw-r--r--net/netfilter/nft_flow_offload.c6
-rw-r--r--net/netfilter/nft_limit.c23
-rw-r--r--net/netfilter/nft_lookup.c2
-rw-r--r--net/netfilter/nft_nat.c5
-rw-r--r--net/netfilter/nft_osf.c11
-rw-r--r--net/netfilter/nft_rt.c5
-rw-r--r--net/netfilter/nft_set_hash.c8
-rw-r--r--net/netfilter/nft_set_pipapo.c311
-rw-r--r--net/netfilter/nft_set_pipapo.h59
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c78
-rw-r--r--net/netfilter/nft_set_rbtree.c17
-rw-r--r--net/netfilter/nft_socket.c5
-rw-r--r--net/netfilter/nft_synproxy.c7
-rw-r--r--net/netfilter/nft_tproxy.c5
-rw-r--r--net/netfilter/nft_tunnel.c1
-rw-r--r--net/netfilter/nft_xfrm.c5
-rw-r--r--net/netfilter/utils.c37
-rw-r--r--net/netfilter/x_tables.c3
-rw-r--r--net/netlabel/netlabel_kapi.c10
-rw-r--r--net/netlink/af_netlink.c80
-rw-r--r--net/netlink/af_netlink.h5
-rw-r--r--net/netlink/diag.c3
-rw-r--r--net/netlink/genetlink.c42
-rw-r--r--net/netrom/af_netrom.c14
-rw-r--r--net/netrom/nr_dev.c2
-rw-r--r--net/netrom/nr_in.c6
-rw-r--r--net/netrom/nr_out.c2
-rw-r--r--net/netrom/nr_route.c8
-rw-r--r--net/netrom/nr_subr.c5
-rw-r--r--net/nfc/core.c2
-rw-r--r--net/nfc/hci/llc.c20
-rw-r--r--net/nfc/nci/core.c4
-rw-r--r--net/openvswitch/flow_netlink.c49
-rw-r--r--net/packet/af_packet.c11
-rw-r--r--net/packet/diag.c3
-rw-r--r--net/phonet/datagram.c4
-rw-r--r--net/phonet/pep.c41
-rw-r--r--net/rds/af_rds.c2
-rw-r--r--net/rds/connection.c4
-rw-r--r--net/rds/rdma.c3
-rw-r--r--net/rds/recv.c13
-rw-r--r--net/rds/send.c6
-rw-r--r--net/rxrpc/af_rxrpc.c12
-rw-r--r--net/rxrpc/ar-internal.h123
-rw-r--r--net/rxrpc/call_event.c335
-rw-r--r--net/rxrpc/call_object.c57
-rw-r--r--net/rxrpc/conn_client.c4
-rw-r--r--net/rxrpc/conn_event.c26
-rw-r--r--net/rxrpc/conn_object.c4
-rw-r--r--net/rxrpc/input.c201
-rw-r--r--net/rxrpc/insecure.c11
-rw-r--r--net/rxrpc/io_thread.c11
-rw-r--r--net/rxrpc/local_object.c3
-rw-r--r--net/rxrpc/misc.c8
-rw-r--r--net/rxrpc/output.c445
-rw-r--r--net/rxrpc/proc.c12
-rw-r--r--net/rxrpc/protocol.h6
-rw-r--r--net/rxrpc/rtt.c36
-rw-r--r--net/rxrpc/rxkad.c61
-rw-r--r--net/rxrpc/sendmsg.c63
-rw-r--r--net/rxrpc/sysctl.c16
-rw-r--r--net/rxrpc/txbuf.c174
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_bpf.c1
-rw-r--r--net/sched/act_connmark.c1
-rw-r--r--net/sched/act_csum.c1
-rw-r--r--net/sched/act_ct.c1
-rw-r--r--net/sched/act_ctinfo.c1
-rw-r--r--net/sched/act_gact.c1
-rw-r--r--net/sched/act_gate.c1
-rw-r--r--net/sched/act_ife.c1
-rw-r--r--net/sched/act_mirred.c39
-rw-r--r--net/sched/act_mpls.c1
-rw-r--r--net/sched/act_nat.c1
-rw-r--r--net/sched/act_pedit.c3
-rw-r--r--net/sched/act_police.c1
-rw-r--r--net/sched/act_sample.c1
-rw-r--r--net/sched/act_simple.c1
-rw-r--r--net/sched/act_skbedit.c1
-rw-r--r--net/sched/act_skbmod.c1
-rw-r--r--net/sched/act_tunnel_key.c1
-rw-r--r--net/sched/act_vlan.c1
-rw-r--r--net/sched/cls_api.c11
-rw-r--r--net/sched/cls_basic.c1
-rw-r--r--net/sched/cls_bpf.c1
-rw-r--r--net/sched/cls_cgroup.c1
-rw-r--r--net/sched/cls_flow.c1
-rw-r--r--net/sched/cls_flower.c29
-rw-r--r--net/sched/cls_fw.c1
-rw-r--r--net/sched/cls_matchall.c1
-rw-r--r--net/sched/cls_route.c1
-rw-r--r--net/sched/cls_u32.c1
-rw-r--r--net/sched/em_canid.c1
-rw-r--r--net/sched/em_cmp.c1
-rw-r--r--net/sched/em_meta.c1
-rw-r--r--net/sched/em_nbyte.c1
-rw-r--r--net/sched/em_text.c1
-rw-r--r--net/sched/em_u32.c1
-rw-r--r--net/sched/sch_api.c6
-rw-r--r--net/sched/sch_cake.c1
-rw-r--r--net/sched/sch_cbs.c1
-rw-r--r--net/sched/sch_choke.c1
-rw-r--r--net/sched/sch_codel.c33
-rw-r--r--net/sched/sch_drr.c1
-rw-r--r--net/sched/sch_etf.c1
-rw-r--r--net/sched/sch_ets.c1
-rw-r--r--net/sched/sch_fq.c1
-rw-r--r--net/sched/sch_fq_codel.c1
-rw-r--r--net/sched/sch_generic.c3
-rw-r--r--net/sched/sch_gred.c1
-rw-r--r--net/sched/sch_hfsc.c1
-rw-r--r--net/sched/sch_hhf.c1
-rw-r--r--net/sched/sch_htb.c1
-rw-r--r--net/sched/sch_ingress.c3
-rw-r--r--net/sched/sch_mqprio.c1
-rw-r--r--net/sched/sch_multiq.c1
-rw-r--r--net/sched/sch_netem.c1
-rw-r--r--net/sched/sch_pie.c1
-rw-r--r--net/sched/sch_plug.c1
-rw-r--r--net/sched/sch_prio.c1
-rw-r--r--net/sched/sch_qfq.c1
-rw-r--r--net/sched/sch_red.c1
-rw-r--r--net/sched/sch_sfb.c1
-rw-r--r--net/sched/sch_sfq.c1
-rw-r--r--net/sched/sch_skbprio.c1
-rw-r--r--net/sched/sch_taprio.c73
-rw-r--r--net/sched/sch_tbf.c1
-rw-r--r--net/sctp/diag.c1
-rw-r--r--net/sctp/inqueue.c14
-rw-r--r--net/sctp/protocol.c10
-rw-r--r--net/sctp/socket.c1
-rw-r--r--net/smc/af_smc.c23
-rw-r--r--net/smc/smc.h4
-rw-r--r--net/smc/smc_clc.c6
-rw-r--r--net/smc/smc_clc.h2
-rw-r--r--net/smc/smc_core.c14
-rw-r--r--net/smc/smc_diag.c3
-rw-r--r--net/smc/smc_ism.h10
-rw-r--r--net/smc/smc_pnet.c10
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c14
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c11
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c27
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/svc.c44
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c245
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c151
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c15
-rw-r--r--net/sunrpc/xprtsock.c9
-rw-r--r--net/switchdev/switchdev.c73
-rw-r--r--net/tipc/Kconfig7
-rw-r--r--net/tipc/Makefile4
-rw-r--r--net/tipc/bearer.c17
-rw-r--r--net/tipc/diag.c1
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/tipc/socket.c1
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/tls/tls_sw.c193
-rw-r--r--net/unix/Kconfig5
-rw-r--r--net/unix/Makefile2
-rw-r--r--net/unix/af_unix.c106
-rw-r--r--net/unix/diag.c3
-rw-r--r--net/unix/garbage.c210
-rw-r--r--net/unix/scm.c159
-rw-r--r--net/unix/scm.h10
-rw-r--r--net/vmw_vsock/diag.c1
-rw-r--r--net/wireless/Kconfig1
-rw-r--r--net/wireless/chan.c377
-rw-r--r--net/wireless/core.c3
-rw-r--r--net/wireless/core.h52
-rw-r--r--net/wireless/mlme.c146
-rw-r--r--net/wireless/nl80211.c392
-rw-r--r--net/wireless/reg.c17
-rw-r--r--net/wireless/scan.c847
-rw-r--r--net/wireless/sme.c3
-rw-r--r--net/wireless/tests/Makefile2
-rw-r--r--net/wireless/tests/chan.c228
-rw-r--r--net/wireless/tests/fragmentation.c30
-rw-r--r--net/wireless/tests/scan.c277
-rw-r--r--net/wireless/trace.h62
-rw-r--r--net/wireless/util.c90
-rw-r--r--net/x25/Kconfig2
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/xdp/xsk.c20
-rw-r--r--net/xdp/xsk_buff_pool.c1
-rw-r--r--net/xdp/xsk_diag.c1
-rw-r--r--net/xfrm/espintcp.c4
-rw-r--r--net/xfrm/xfrm_algo.c1
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_interface_bpf.c4
-rw-r--r--net/xfrm/xfrm_interface_core.c26
-rw-r--r--net/xfrm/xfrm_output.c6
-rw-r--r--net/xfrm/xfrm_policy.c153
-rw-r--r--net/xfrm/xfrm_proc.c1
-rw-r--r--net/xfrm/xfrm_state.c17
-rw-r--r--net/xfrm/xfrm_state_bpf.c4
-rw-r--r--net/xfrm/xfrm_user.c6
-rw-r--r--rust/Makefile48
-rw-r--r--rust/alloc/alloc.rs12
-rw-r--r--rust/alloc/boxed.rs34
-rw-r--r--rust/alloc/collections/mod.rs1
-rw-r--r--rust/alloc/lib.rs9
-rw-r--r--rust/alloc/raw_vec.rs77
-rw-r--r--rust/alloc/vec/into_iter.rs16
-rw-r--r--rust/alloc/vec/mod.rs81
-rw-r--r--rust/bindings/bindings_helper.h5
-rw-r--r--rust/kernel/allocator.rs2
-rw-r--r--rust/kernel/error.rs10
-rw-r--r--rust/kernel/init.rs22
-rw-r--r--rust/kernel/ioctl.rs6
-rw-r--r--rust/kernel/lib.rs37
-rw-r--r--rust/kernel/net/phy.rs24
-rw-r--r--rust/kernel/str.rs193
-rw-r--r--rust/kernel/sync.rs5
-rw-r--r--rust/kernel/sync/arc.rs30
-rw-r--r--rust/kernel/sync/condvar.rs110
-rw-r--r--rust/kernel/sync/lock.rs19
-rw-r--r--rust/kernel/sync/lock/mutex.rs3
-rw-r--r--rust/kernel/sync/lock/spinlock.rs5
-rw-r--r--rust/kernel/sync/locked_by.rs7
-rw-r--r--rust/kernel/task.rs24
-rw-r--r--rust/kernel/time.rs20
-rw-r--r--rust/kernel/types.rs22
-rw-r--r--rust/kernel/workqueue.rs84
-rw-r--r--rust/macros/module.rs7
-rw-r--r--samples/bpf/asm_goto_workaround.h8
-rw-r--r--samples/bpf/map_perf_test_user.c2
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c2
-rw-r--r--samples/cgroup/.gitignore3
-rw-r--r--samples/seccomp/user-trap.c8
-rw-r--r--scripts/Kconfig.include2
-rw-r--r--scripts/Makefile.build8
-rw-r--r--scripts/Makefile.compiler2
-rw-r--r--scripts/Makefile.defconf8
-rw-r--r--scripts/Makefile.extrawarn2
-rw-r--r--scripts/Makefile.host2
-rw-r--r--scripts/Makefile.lib16
-rw-r--r--scripts/Makefile.ubsan5
-rw-r--r--scripts/Makefile.vmlinux_o2
-rw-r--r--scripts/atomic/kerneldoc/add_unless1
-rw-r--r--scripts/atomic/kerneldoc/cmpxchg1
-rw-r--r--scripts/atomic/kerneldoc/dec_if_positive1
-rw-r--r--scripts/atomic/kerneldoc/dec_unless_positive1
-rw-r--r--scripts/atomic/kerneldoc/inc_not_zero1
-rw-r--r--scripts/atomic/kerneldoc/inc_unless_negative1
-rw-r--r--scripts/atomic/kerneldoc/try_cmpxchg3
-rwxr-xr-xscripts/bpf_doc.py4
-rwxr-xr-xscripts/clang-tools/gen_compile_commands.py2
-rw-r--r--scripts/coccinelle/api/string_choices.cocci41
-rw-r--r--scripts/coccinelle/misc/struct_size.cocci74
-rw-r--r--scripts/gdb/linux/constants.py.in2
-rw-r--r--scripts/gdb/linux/mm.py2
-rw-r--r--scripts/gdb/linux/symbols.py2
-rw-r--r--scripts/generate_rust_target.rs2
-rw-r--r--scripts/kconfig/symbol.c4
-rwxr-xr-xscripts/kernel-doc2531
-rwxr-xr-xscripts/leaking_addresses.pl90
-rwxr-xr-xscripts/link-vmlinux.sh9
-rwxr-xr-xscripts/min-tool-version.sh2
-rwxr-xr-xscripts/mksysmap13
-rw-r--r--scripts/mod/modpost.c17
-rw-r--r--scripts/mod/modpost.h6
-rw-r--r--scripts/mod/sumversion.c7
-rw-r--r--scripts/package/kernel.spec22
-rwxr-xr-xscripts/sphinx-pre-install32
-rw-r--r--security/apparmor/lsm.c6
-rw-r--r--security/integrity/Makefile1
-rw-r--r--security/integrity/digsig.c3
-rw-r--r--security/integrity/digsig_asymmetric.c23
-rw-r--r--security/integrity/evm/Kconfig1
-rw-r--r--security/integrity/evm/evm.h19
-rw-r--r--security/integrity/evm/evm_crypto.c4
-rw-r--r--security/integrity/evm/evm_main.c195
-rw-r--r--security/integrity/iint.c197
-rw-r--r--security/integrity/ima/Kconfig1
-rw-r--r--security/integrity/ima/Makefile2
-rw-r--r--security/integrity/ima/ima.h148
-rw-r--r--security/integrity/ima/ima_api.c23
-rw-r--r--security/integrity/ima/ima_appraise.c66
-rw-r--r--security/integrity/ima/ima_iint.c142
-rw-r--r--security/integrity/ima/ima_init.c2
-rw-r--r--security/integrity/ima/ima_main.c148
-rw-r--r--security/integrity/ima/ima_policy.c2
-rw-r--r--security/integrity/integrity.h80
-rw-r--r--security/keys/encrypted-keys/encrypted.c4
-rw-r--r--security/keys/key.c10
-rw-r--r--security/landlock/fs.c4
-rw-r--r--security/security.c844
-rw-r--r--security/selinux/hooks.c80
-rw-r--r--security/selinux/ss/avtab.c105
-rw-r--r--security/selinux/ss/avtab.h74
-rw-r--r--security/selinux/ss/conditional.c68
-rw-r--r--security/selinux/ss/conditional.h23
-rw-r--r--security/selinux/ss/constraint.h67
-rw-r--r--security/selinux/ss/context.c2
-rw-r--r--security/selinux/ss/context.h41
-rw-r--r--security/selinux/ss/ebitmap.c56
-rw-r--r--security/selinux/ss/ebitmap.h42
-rw-r--r--security/selinux/ss/hashtab.c23
-rw-r--r--security/selinux/ss/hashtab.h35
-rw-r--r--security/selinux/ss/mls.c83
-rw-r--r--security/selinux/ss/mls.h58
-rw-r--r--security/selinux/ss/mls_types.h32
-rw-r--r--security/selinux/ss/policydb.c405
-rw-r--r--security/selinux/ss/policydb.h192
-rw-r--r--security/selinux/ss/services.h3
-rw-r--r--security/selinux/ss/sidtab.c69
-rw-r--r--security/selinux/ss/sidtab.h36
-rw-r--r--security/selinux/ss/symtab.c4
-rw-r--r--security/selinux/ss/symtab.h9
-rw-r--r--security/smack/smack_lsm.c106
-rw-r--r--security/tomoyo/common.c3
-rw-r--r--security/tomoyo/tomoyo.c3
-rw-r--r--sound/core/Makefile1
-rw-r--r--sound/core/pcm.c4
-rw-r--r--sound/core/pcm_native.c5
-rw-r--r--sound/core/ump.c4
-rw-r--r--sound/firewire/amdtp-stream.c2
-rw-r--r--sound/pci/hda/Kconfig4
-rw-r--r--sound/pci/hda/cs35l41_hda_property.c14
-rw-r--r--sound/pci/hda/cs35l56_hda.c136
-rw-r--r--sound/pci/hda/hda_controller.c3
-rw-r--r--sound/pci/hda/hda_intel.c6
-rw-r--r--sound/pci/hda/patch_conexant.c18
-rw-r--r--sound/pci/hda/patch_cs8409.c1
-rw-r--r--sound/pci/hda/patch_realtek.c77
-rw-r--r--sound/pci/hda/tas2781_hda_i2c.c2
-rw-r--r--sound/soc/amd/acp/acp-mach-common.c16
-rw-r--r--sound/soc/amd/acp/acp-sof-mach.c4
-rw-r--r--sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c8
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c42
-rw-r--r--sound/soc/amd/yc/pci-acp6x.c1
-rw-r--r--sound/soc/codecs/cs35l45.c2
-rw-r--r--sound/soc/codecs/cs35l56-shared.c142
-rw-r--r--sound/soc/codecs/cs35l56.c383
-rw-r--r--sound/soc/codecs/cs35l56.h2
-rw-r--r--sound/soc/codecs/cs42l43.c48
-rw-r--r--[-rwxr-xr-x]sound/soc/codecs/es8326.c186
-rw-r--r--sound/soc/codecs/es8326.h3
-rw-r--r--sound/soc/codecs/lpass-wsa-macro.c7
-rw-r--r--sound/soc/codecs/madera.c2
-rw-r--r--sound/soc/codecs/rt5645.c26
-rw-r--r--sound/soc/codecs/tas2781-comlib.c3
-rw-r--r--sound/soc/codecs/tas2781-i2c.c2
-rw-r--r--sound/soc/codecs/wcd9335.c4
-rw-r--r--sound/soc/codecs/wcd934x.c1
-rw-r--r--sound/soc/codecs/wcd938x.c8
-rw-r--r--sound/soc/codecs/wm8962.c29
-rw-r--r--sound/soc/codecs/wm_adsp.c73
-rw-r--r--sound/soc/codecs/wsa883x.c6
-rw-r--r--sound/soc/fsl/fsl_xcvr.c12
-rw-r--r--sound/soc/intel/avs/core.c3
-rw-r--r--sound/soc/intel/avs/topology.c2
-rw-r--r--sound/soc/intel/boards/bytcht_cx2072x.c3
-rw-r--r--sound/soc/intel/boards/bytcht_da7213.c3
-rw-r--r--sound/soc/intel/boards/bytcht_es8316.c3
-rw-r--r--sound/soc/intel/boards/bytcr_rt5640.c15
-rw-r--r--sound/soc/intel/boards/bytcr_rt5651.c3
-rw-r--r--sound/soc/intel/boards/bytcr_wm5102.c3
-rw-r--r--sound/soc/intel/boards/cht_bsw_rt5645.c7
-rw-r--r--sound/soc/intel/boards/cht_bsw_rt5672.c3
-rw-r--r--sound/soc/qcom/lpass-cdc-dma.c2
-rw-r--r--sound/soc/qcom/qdsp6/q6apm-dai.c8
-rw-r--r--sound/soc/qcom/sc8280xp.c12
-rw-r--r--sound/soc/sh/rcar/adg.c7
-rw-r--r--sound/soc/soc-card.c24
-rw-r--r--sound/soc/soc-core.c5
-rw-r--r--sound/soc/sof/amd/acp-ipc.c2
-rw-r--r--sound/soc/sof/amd/acp.c17
-rw-r--r--sound/soc/sof/intel/pci-lnl.c2
-rw-r--r--sound/soc/sof/intel/pci-tgl.c64
-rw-r--r--sound/soc/sof/ipc3-topology.c55
-rw-r--r--sound/soc/sof/ipc3.c2
-rw-r--r--sound/soc/sof/ipc4-pcm.c13
-rw-r--r--sound/soc/sunxi/sun4i-spdif.c5
-rw-r--r--sound/usb/clock.c34
-rw-r--r--sound/usb/format.c20
-rw-r--r--sound/usb/midi.c73
-rw-r--r--sound/usb/midi2.c2
-rw-r--r--sound/usb/quirks.c38
-rw-r--r--sound/virtio/virtio_card.c2
-rw-r--r--sound/virtio/virtio_ctl_msg.c2
-rw-r--r--sound/virtio/virtio_pcm_msg.c2
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h11
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h18
-rw-r--r--tools/arch/x86/include/asm/msr-index.h21
-rw-r--r--tools/arch/x86/include/asm/rmwcc.h2
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--tools/arch/x86/lib/insn.c58
-rw-r--r--tools/arch/x86/lib/memcpy_64.S4
-rw-r--r--tools/arch/x86/lib/memset_64.S4
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst58
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst2
-rw-r--r--tools/bpf/bpftool/gen.c277
-rw-r--r--tools/bpf/bpftool/link.c94
-rw-r--r--tools/bpf/bpftool/map.c2
-rw-r--r--tools/bpf/bpftool/prog.c2
-rw-r--r--tools/bpf/resolve_btfids/main.c70
-rw-r--r--tools/include/asm-generic/unaligned.h24
-rw-r--r--tools/include/linux/btf_ids.h9
-rw-r--r--tools/include/linux/compiler_types.h4
-rw-r--r--tools/include/linux/slab.h1
-rw-r--r--tools/include/uapi/asm-generic/unistd.h15
-rw-r--r--tools/include/uapi/drm/drm.h72
-rw-r--r--tools/include/uapi/drm/i915_drm.h12
-rw-r--r--tools/include/uapi/linux/bpf.h123
-rw-r--r--tools/include/uapi/linux/fcntl.h3
-rw-r--r--tools/include/uapi/linux/if_link.h1
-rw-r--r--tools/include/uapi/linux/kvm.h140
-rw-r--r--tools/include/uapi/linux/mount.h70
-rw-r--r--tools/include/uapi/linux/netdev.h20
-rw-r--r--tools/include/uapi/linux/stat.h1
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/bpf.c42
-rw-r--r--tools/lib/bpf/bpf.h79
-rw-r--r--tools/lib/bpf/bpf_core_read.h60
-rw-r--r--tools/lib/bpf/bpf_helpers.h4
-rw-r--r--tools/lib/bpf/btf.c43
-rw-r--r--tools/lib/bpf/elf.c2
-rw-r--r--tools/lib/bpf/features.c583
-rw-r--r--tools/lib/bpf/libbpf.c1158
-rw-r--r--tools/lib/bpf/libbpf.h23
-rw-r--r--tools/lib/bpf/libbpf.map6
-rw-r--r--tools/lib/bpf/libbpf_internal.h68
-rw-r--r--tools/lib/bpf/libbpf_probes.c19
-rw-r--r--tools/lib/bpf/linker.c2
-rw-r--r--tools/lib/bpf/netlink.c4
-rw-r--r--tools/lib/bpf/str_error.h3
-rw-r--r--tools/net/ynl/Makefile4
-rw-r--r--tools/net/ynl/Makefile.deps5
-rwxr-xr-xtools/net/ynl/cli.py43
-rw-r--r--tools/net/ynl/generated/Makefile9
-rw-r--r--tools/net/ynl/lib/Makefile5
-rw-r--r--tools/net/ynl/lib/__init__.py4
-rw-r--r--tools/net/ynl/lib/nlspec.py11
-rw-r--r--tools/net/ynl/lib/ynl-priv.h359
-rw-r--r--tools/net/ynl/lib/ynl.c407
-rw-r--r--tools/net/ynl/lib/ynl.h5
-rw-r--r--tools/net/ynl/lib/ynl.py311
-rw-r--r--tools/net/ynl/samples/.gitignore1
-rw-r--r--tools/net/ynl/samples/Makefile6
-rw-r--r--tools/net/ynl/samples/ovs.c60
-rw-r--r--tools/net/ynl/samples/page-pool.c2
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py124
-rwxr-xr-xtools/net/ynl/ynl-gen-rst.py9
-rw-r--r--tools/objtool/arch/x86/decode.c19
-rw-r--r--tools/objtool/arch/x86/special.c2
-rw-r--r--tools/objtool/check.c16
-rw-r--r--tools/objtool/noreturns.h2
-rw-r--r--tools/perf/Documentation/perf-list.txt4
-rw-r--r--tools/perf/Makefile.perf10
-rw-r--r--tools/perf/builtin-list.c211
-rw-r--r--tools/perf/builtin-record.c4
-rw-r--r--tools/perf/builtin-top.c2
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json254
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json25
-rwxr-xr-xtools/perf/tests/shell/daemon.sh34
-rwxr-xr-xtools/perf/tests/shell/list.sh21
-rwxr-xr-xtools/perf/tests/shell/script.sh12
-rw-r--r--tools/perf/trace/beauty/statx.c1
-rw-r--r--tools/perf/util/evlist.c9
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/include/linux/linkage.h4
-rw-r--r--tools/perf/util/metricgroup.c2
-rw-r--r--tools/perf/util/print-events.c2
-rw-r--r--tools/perf/util/synthetic-events.c4
-rw-r--r--tools/power/cpupower/bench/Makefile2
-rw-r--r--tools/testing/cxl/Kbuild3
-rw-r--r--tools/testing/cxl/test/Kbuild2
-rw-r--r--tools/testing/cxl/test/cxl.c63
-rw-r--r--tools/testing/cxl/test/mock.c14
-rw-r--r--tools/testing/cxl/test/mock.h1
-rw-r--r--tools/testing/kunit/configs/all_tests.config6
-rw-r--r--tools/testing/kunit/kunit_kernel.py1
-rw-r--r--tools/testing/nvdimm/Kbuild2
-rw-r--r--tools/testing/selftests/Makefile10
-rw-r--r--tools/testing/selftests/alsa/test-pcmtest-driver.c4
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch643
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x3
-rw-r--r--tools/testing/selftests/bpf/Makefile51
-rw-r--r--tools/testing/selftests/bpf/README.rst32
-rw-r--r--tools/testing/selftests/bpf/bench.c40
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c182
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh9
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_alloc.h67
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h70
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_htab.h100
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_list.h92
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h76
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h30
-rw-r--r--tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile19
-rw-r--r--tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c84
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c129
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h65
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_htab.c88
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_list.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/decap_sanity.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fill_link_info.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/iters.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_probes.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_str.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_fixup.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_redirect.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_reroute.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/read_vsyscall.c57
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_destroy.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c159
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c90
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c150
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/token.c1052
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_failure.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdpwall.c2
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab.c48
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab_asm.c5
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c87
-rw-r--r--tools/testing/selftests/bpf/progs/async_stack_depth.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bad_struct_ops.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bad_struct_ops2.c14
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_compiler.h33
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h16
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c26
-rw-r--r--tools/testing/selftests/bpf/progs/connect_unix_prog.c3
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h57
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername_unix_prog.c3
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname_unix_prog.c3
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c9
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task.c12
-rw-r--r--tools/testing/selftests/bpf/progs/kptr_xchg_inline.c48
-rw-r--r--tools/testing/selftests/bpf/progs/loop4.c4
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/priv_map.c13
-rw-r--r--tools/testing/selftests/bpf/progs/priv_prog.c13
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h17
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h7
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c120
-rw-r--r--tools/testing/selftests/bpf/progs/read_vsyscall.c45
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c3
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c3
-rw-r--r--tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c4
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h18
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_autocreate.c52
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c32
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c29
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c24
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_module.c56
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c102
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_recursion.c17
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_fill_link_info.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func1.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_seg6local.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_ptr_untrusted.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_seg6_loop.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_siphash.h64
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_ctx.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock_fail.c44
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c595
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h140
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_dynptr.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_loop.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c5
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c34
-rw-r--r--tools/testing/selftests/bpf/progs/token_lsm.c32
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_failure.c20
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c28
-rw-r--r--tools/testing/selftests/bpf/progs/type_cast.c13
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c146
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c182
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_subprogs.c29
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c173
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c24
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c553
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spin_lock.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c6
-rw-r--r--tools/testing/selftests/bpf/progs/xdping_kern.c3
-rw-r--r--tools/testing/selftests/bpf/test_loader.c13
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c18
-rw-r--r--tools/testing/selftests/bpf/test_maps.c6
-rw-r--r--tools/testing/selftests/bpf/test_progs.c77
-rw-r--r--tools/testing/selftests/bpf/test_progs.h10
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c3
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c60
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c96
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h10
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_loop_inline.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c6
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c2
-rw-r--r--tools/testing/selftests/core/close_range_test.c1
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile7
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh19
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh21
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh48
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh2
-rw-r--r--tools/testing/selftests/drivers/net/bonding/lag_lib.sh18
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh2
l---------tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh1
-rw-r--r--tools/testing/selftests/drivers/net/bonding/settings2
-rw-r--r--tools/testing/selftests/drivers/net/dsa/Makefile18
l---------tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh2
l---------tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh2
l---------tools/testing/selftests/drivers/net/dsa/bridge_mld.sh2
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh2
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh2
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh2
l---------tools/testing/selftests/drivers/net/dsa/lib.sh1
l---------tools/testing/selftests/drivers/net/dsa/local_termination.sh2
l---------tools/testing/selftests/drivers/net/dsa/no_forwarding.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh9
l---------tools/testing/selftests/drivers/net/dsa/tc_actions.sh2
l---------tools/testing/selftests/drivers/net/dsa/tc_common.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh2
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/Makefile18
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh143
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh47
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile7
-rw-r--r--tools/testing/selftests/drivers/net/team/config4
-rwxr-xr-xtools/testing/selftests/drivers/net/team/dev_addr_lists.sh4
l---------tools/testing/selftests/drivers/net/team/lag_lib.sh1
l---------tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh1
-rw-r--r--tools/testing/selftests/dt/Makefile2
-rwxr-xr-xtools/testing/selftests/dt/test_unprobed_devices.sh19
-rw-r--r--tools/testing/selftests/exec/execveat.c2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c10
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest2
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc42
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc2
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c13
-rw-r--r--tools/testing/selftests/hid/tests/test_wacom_generic.py8
-rw-r--r--tools/testing/selftests/iommu/config5
-rw-r--r--tools/testing/selftests/iommu/iommufd.c78
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h39
-rw-r--r--tools/testing/selftests/kselftest.h45
-rw-r--r--tools/testing/selftests/kselftest/ktap_helpers.sh (renamed from tools/testing/selftests/dt/ktap_helpers.sh)45
-rw-r--r--tools/testing/selftests/kselftest_harness.h198
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c12
-rw-r--r--tools/testing/selftests/kvm/aarch64/hypercalls.c16
-rw-r--r--tools/testing/selftests/kvm/aarch64/page_fault_test.c6
-rw-r--r--tools/testing/selftests/kvm/aarch64/smccc_filter.c2
-rw-r--r--tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c12
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c4
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c4
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c54
-rw-r--r--tools/testing/selftests/kvm/get-reg-list.c2
-rw-r--r--tools/testing/selftests/kvm/guest_print_test.c8
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c6
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h2
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c2
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c4
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c4
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c2
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c19
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c2
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c25
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c21
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c6
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c2
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c6
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c110
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c4
-rw-r--r--tools/testing/selftests/kvm/s390x/memop.c31
-rw-r--r--tools/testing/selftests/kvm/s390x/resets.c4
-rw-r--r--tools/testing/selftests/kvm/s390x/sync_regs_test.c20
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c18
-rw-r--r--tools/testing/selftests/kvm/system_counter_offset_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/cpuid_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c21
-rw-r--r--tools/testing/selftests/kvm/x86_64/flds_emulation.h2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_clock.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c9
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_ipi.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_clock_test.c42
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c28
-rw-r--r--tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c10
-rw-r--r--tools/testing/selftests/kvm/x86_64/ucna_injection_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_io_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c16
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c19
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/xss_msr_test.c2
-rw-r--r--tools/testing/selftests/landlock/base_test.c2
-rw-r--r--tools/testing/selftests/landlock/common.h106
-rw-r--r--tools/testing/selftests/landlock/fs_test.c37
-rw-r--r--tools/testing/selftests/landlock/net_test.c17
-rw-r--r--tools/testing/selftests/landlock/ptrace_test.c7
-rw-r--r--tools/testing/selftests/lib.mk42
-rw-r--r--tools/testing/selftests/livepatch/.gitignore1
-rw-r--r--tools/testing/selftests/livepatch/Makefile5
-rw-r--r--tools/testing/selftests/livepatch/README25
-rw-r--r--tools/testing/selftests/livepatch/config1
-rw-r--r--tools/testing/selftests/livepatch/functions.sh84
-rwxr-xr-xtools/testing/selftests/livepatch/test-callbacks.sh50
-rwxr-xr-xtools/testing/selftests/livepatch/test-ftrace.sh6
-rwxr-xr-xtools/testing/selftests/livepatch/test-livepatch.sh10
-rwxr-xr-xtools/testing/selftests/livepatch/test-shadow-vars.sh2
-rwxr-xr-xtools/testing/selftests/livepatch/test-state.sh18
-rwxr-xr-xtools/testing/selftests/livepatch/test-syscall.sh53
-rwxr-xr-xtools/testing/selftests/livepatch/test-sysfs.sh6
-rw-r--r--tools/testing/selftests/livepatch/test_klp-call_getpid.c44
-rw-r--r--tools/testing/selftests/livepatch/test_modules/Makefile26
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c (renamed from lib/livepatch/test_klp_atomic_replace.c)0
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c (renamed from lib/livepatch/test_klp_callbacks_busy.c)0
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c (renamed from lib/livepatch/test_klp_callbacks_demo.c)0
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c (renamed from lib/livepatch/test_klp_callbacks_demo2.c)0
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c (renamed from lib/livepatch/test_klp_callbacks_mod.c)0
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c (renamed from lib/livepatch/test_klp_livepatch.c)0
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c (renamed from lib/livepatch/test_klp_shadow_vars.c)0
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_state.c (renamed from lib/livepatch/test_klp_state.c)0
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_state2.c (renamed from lib/livepatch/test_klp_state2.c)0
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_state3.c (renamed from lib/livepatch/test_klp_state3.c)0
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c116
-rw-r--r--tools/testing/selftests/lsm/lsm_list_modules_test.c6
-rwxr-xr-xtools/testing/selftests/mm/charge_reserved_hugetlb.sh2
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c4
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c2
-rw-r--r--tools/testing/selftests/mm/map_hugetlb.c7
-rw-r--r--tools/testing/selftests/mm/mremap_test.c27
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c6
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh6
-rwxr-xr-xtools/testing/selftests/mm/write_hugetlb_memory.sh2
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c4
-rw-r--r--tools/testing/selftests/mqueue/setting1
-rw-r--r--tools/testing/selftests/net/Makefile11
-rwxr-xr-xtools/testing/selftests/net/big_tcp.sh4
-rwxr-xr-xtools/testing/selftests/net/cmsg_ipv6.sh4
-rw-r--r--tools/testing/selftests/net/config50
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh34
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh6
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh148
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile6
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_locked_port.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh14
-rw-r--r--tools/testing/selftests/net/forwarding/config35
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh16
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh16
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh.sh41
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh42
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh16
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh6
-rw-r--r--tools/testing/selftests/net/forwarding/ip6gre_lib.sh4
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh65
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh52
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh129
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh17
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh43
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_police.sh16
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh10
-rwxr-xr-xtools/testing/selftests/net/fq_band_pktlimit.sh14
-rwxr-xr-xtools/testing/selftests/net/gro.sh5
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh38
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c95
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c10
-rw-r--r--tools/testing/selftests/net/lib.sh5
-rw-r--r--tools/testing/selftests/net/mptcp/config3
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh130
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh248
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh341
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh219
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh104
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh71
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c39
-rw-r--r--tools/testing/selftests/net/mptcp/settings2
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh69
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh175
-rw-r--r--[-rwxr-xr-x]tools/testing/selftests/net/net_helper.sh11
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh75
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py71
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh43
-rwxr-xr-xtools/testing/selftests/net/rps_default_mask.sh6
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh8
-rw-r--r--[-rwxr-xr-x]tools/testing/selftests/net/setup_loopback.sh0
-rw-r--r--tools/testing/selftests/net/setup_veth.sh2
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c68
-rw-r--r--tools/testing/selftests/net/so_txtime.c7
-rwxr-xr-xtools/testing/selftests/net/so_txtime.sh29
-rw-r--r--tools/testing/selftests/net/tcp_ao/config10
-rw-r--r--tools/testing/selftests/net/tcp_ao/key-management.c46
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/sock.c12
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c138
-rw-r--r--tools/testing/selftests/net/tcp_ao/settings1
-rw-r--r--tools/testing/selftests/net/tcp_ao/unsigned-md5.c12
-rwxr-xr-xtools/testing/selftests/net/test_bridge_backup_port.sh23
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_mdb.sh36
-rw-r--r--tools/testing/selftests/net/tls.c59
-rw-r--r--tools/testing/selftests/net/txtimestamp.c3
-rwxr-xr-xtools/testing/selftests/net/txtimestamp.sh12
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh4
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh4
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh6
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh22
-rw-r--r--tools/testing/selftests/net/udpgso.c134
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh49
-rw-r--r--tools/testing/selftests/net/udpgso_bench_rx.c2
-rwxr-xr-xtools/testing/selftests/net/veth.sh18
-rw-r--r--tools/testing/selftests/net/xdp_dummy.c13
-rw-r--r--tools/testing/selftests/netfilter/Makefile3
-rw-r--r--tools/testing/selftests/netfilter/bridge_netfilter.sh188
-rw-r--r--tools/testing/selftests/netfilter/conntrack_dump_flush.c43
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c31
-rw-r--r--tools/testing/selftests/power_supply/Makefile4
-rw-r--r--tools/testing/selftests/power_supply/helpers.sh178
-rwxr-xr-xtools/testing/selftests/power_supply/test_power_supply_properties.sh114
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_signal.c16
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c4
-rw-r--r--tools/testing/selftests/powerpc/primitives/linux/bitops.h0
l---------tools/testing/selftests/powerpc/primitives/linux/wordpart.h1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh2
-rw-r--r--tools/testing/selftests/resctrl/cache.c287
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c421
-rw-r--r--tools/testing/selftests/resctrl/cmt_test.c80
-rw-r--r--tools/testing/selftests/resctrl/fill_buf.c132
-rw-r--r--tools/testing/selftests/resctrl/mba_test.c30
-rw-r--r--tools/testing/selftests/resctrl/mbm_test.c34
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h145
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c207
-rw-r--r--tools/testing/selftests/resctrl/resctrl_val.c138
-rw-r--r--tools/testing/selftests/resctrl/resctrlfs.c405
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c14
-rw-r--r--tools/testing/selftests/rseq/param_test.c22
-rw-r--r--tools/testing/selftests/rust/Makefile4
-rw-r--r--tools/testing/selftests/rust/config5
-rwxr-xr-xtools/testing/selftests/rust/test_probe_samples.sh41
-rw-r--r--tools/testing/selftests/sched/cs_prctl_test.c2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c142
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c50
-rw-r--r--tools/testing/selftests/tc-testing/config1
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json403
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json2
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py2
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh3
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/.gitignore1
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/.gitignore1
-rw-r--r--tools/testing/selftests/uevent/.gitignore1
-rw-r--r--tools/testing/vsock/util.c17
-rw-r--r--tools/testing/vsock/util.h4
-rw-r--r--tools/testing/vsock/vsock_diag_test.c23
-rw-r--r--tools/testing/vsock/vsock_test.c102
-rw-r--r--tools/testing/vsock/vsock_test_zerocopy.c12
-rw-r--r--tools/testing/vsock/vsock_uring_test.c17
-rw-r--r--tools/tracing/rtla/Makefile7
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c9
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c6
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c9
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c6
-rw-r--r--tools/tracing/rtla/src/utils.c14
-rw-r--r--tools/tracing/rtla/src/utils.h2
-rw-r--r--tools/verification/rv/Makefile7
-rw-r--r--tools/verification/rv/src/in_kernel.c2
-rw-r--r--tools/virtio/.gitignore1
-rw-r--r--tools/virtio/Makefile8
-rw-r--r--tools/virtio/linux/virtio_config.h4
-rw-r--r--tools/virtio/vhost_net_test.c532
-rw-r--r--tools/workqueue/wq_dump.py104
-rw-r--r--virt/kvm/kvm_main.c8
6200 files changed, 266425 insertions, 111759 deletions
diff --git a/.get_maintainer.ignore b/.get_maintainer.ignore
index c298bab3d320..7d1b30aae874 100644
--- a/.get_maintainer.ignore
+++ b/.get_maintainer.ignore
@@ -1,4 +1,5 @@
Alan Cox <alan@lxorguk.ukuu.org.uk>
Alan Cox <root@hraefn.swansea.linux.org.uk>
Christoph Hellwig <hch@lst.de>
+Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Marc Gonzalez <marc.w.gonzalez@free.fr>
diff --git a/.mailmap b/.mailmap
index 04998f7bda81..e90797de3256 100644
--- a/.mailmap
+++ b/.mailmap
@@ -191,10 +191,11 @@ Gao Xiang <xiang@kernel.org> <gaoxiang25@huawei.com>
Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com>
Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com>
Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com>
-Geliang Tang <geliang.tang@linux.dev> <geliang.tang@suse.com>
-Geliang Tang <geliang.tang@linux.dev> <geliangtang@xiaomi.com>
-Geliang Tang <geliang.tang@linux.dev> <geliangtang@gmail.com>
-Geliang Tang <geliang.tang@linux.dev> <geliangtang@163.com>
+Geliang Tang <geliang@kernel.org> <geliang.tang@linux.dev>
+Geliang Tang <geliang@kernel.org> <geliang.tang@suse.com>
+Geliang Tang <geliang@kernel.org> <geliangtang@xiaomi.com>
+Geliang Tang <geliang@kernel.org> <geliangtang@gmail.com>
+Geliang Tang <geliang@kernel.org> <geliangtang@163.com>
Georgi Djakov <djakov@kernel.org> <georgi.djakov@linaro.org>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com>
@@ -289,6 +290,7 @@ Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
John Crispin <john@phrozen.org> <blogic@openwrt.org>
John Fastabend <john.fastabend@gmail.com> <john.r.fastabend@intel.com>
John Keeping <john@keeping.me.uk> <john@metanate.com>
+John Moon <john@jmoon.dev> <quic_johmoo@quicinc.com>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
John Stultz <johnstul@us.ibm.com>
<jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com>
@@ -323,6 +325,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org>
Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org>
Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com>
+Kishon Vijay Abraham I <kishon@kernel.org> <kishon@ti.com>
Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
Koushik <raghavendra.koushik@neterion.com>
@@ -344,6 +347,7 @@ Leonid I Ananiev <leonid.i.ananiev@intel.com>
Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
Leon Romanovsky <leon@kernel.org> <leonro@nvidia.com>
+Leo Yan <leo.yan@linux.dev> <leo.yan@linaro.org>
Liam Mark <quic_lmark@quicinc.com> <lmark@codeaurora.org>
Linas Vepstas <linas@austin.ibm.com>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
@@ -550,6 +554,7 @@ Senthilkumar N L <quic_snlakshm@quicinc.com> <snlakshm@codeaurora.org>
Serge Hallyn <sergeh@kernel.org> <serge.hallyn@canonical.com>
Serge Hallyn <sergeh@kernel.org> <serue@us.ibm.com>
Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
+Shakeel Butt <shakeel.butt@linux.dev> <shakeelb@google.com>
Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io>
Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@intel.com>
Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@oracle.com>
@@ -568,6 +573,7 @@ Simon Kelley <simon@thekelleys.org.uk>
Sricharan Ramabadhran <quic_srichara@quicinc.com> <sricharan@codeaurora.org>
Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org>
Sriram R <quic_srirrama@quicinc.com> <srirrama@codeaurora.org>
+Stefan Wahren <wahrenst@gmx.net> <stefan.wahren@i2se.com>
Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
Stephen Hemminger <stephen@networkplumber.org> <shemminger@osdl.org>
@@ -605,6 +611,11 @@ TripleX Chung <xxx.phy@gmail.com> <triplex@zh-kernel.org>
TripleX Chung <xxx.phy@gmail.com> <zhongyu@18mail.cn>
Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com>
Tudor Ambarus <tudor.ambarus@linaro.org> <tudor.ambarus@microchip.com>
+Tvrtko Ursulin <tursulin@ursulin.net> <tvrtko.ursulin@intel.com>
+Tvrtko Ursulin <tursulin@ursulin.net> <tvrtko.ursulin@linux.intel.com>
+Tvrtko Ursulin <tursulin@ursulin.net> <tvrtko.ursulin@sophos.com>
+Tvrtko Ursulin <tursulin@ursulin.net> <tvrtko.ursulin@onelan.co.uk>
+Tvrtko Ursulin <tursulin@ursulin.net> <tvrtko@ursulin.net>
Tycho Andersen <tycho@tycho.pizza> <tycho@tycho.ws>
Tzung-Bi Shih <tzungbi@kernel.org> <tzungbi@google.com>
Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
diff --git a/CREDITS b/CREDITS
index 5797e8f7e92b..3c2bb55847c6 100644
--- a/CREDITS
+++ b/CREDITS
@@ -63,6 +63,11 @@ D: dosfs, LILO, some fd features, ATM, various other hacks here and there
S: Buenos Aires
S: Argentina
+NTFS FILESYSTEM
+N: Anton Altaparmakov
+E: anton@tuxera.com
+D: NTFS filesystem
+
N: Tim Alpaerts
E: tim_alpaerts@toyota-motor-europe.com
D: 802.2 class II logical link control layer,
@@ -2161,6 +2166,19 @@ N: Mike Kravetz
E: mike.kravetz@oracle.com
D: Maintenance and development of the hugetlb subsystem
+N: Seth Jennings
+E: sjenning@redhat.com
+D: Creation and maintenance of zswap
+
+N: Dan Streetman
+E: ddstreet@ieee.org
+D: Maintenance and development of zswap
+D: Creation and maintenance of the zpool API
+
+N: Vitaly Wool
+E: vitaly.wool@konsulko.com
+D: Maintenance and development of zswap
+
N: Andreas S. Krebs
E: akrebs@altavista.net
D: CYPRESS CY82C693 chipset IDE, Digital's PC-Alpha 164SX boards
diff --git a/Documentation/ABI/testing/debugfs-intel-iommu b/Documentation/ABI/testing/debugfs-intel-iommu
new file mode 100644
index 000000000000..2ab8464504a9
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-intel-iommu
@@ -0,0 +1,276 @@
+What: /sys/kernel/debug/iommu/intel/iommu_regset
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps all the register contents for each IOMMU device.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/iommu_regset
+
+ IOMMU: dmar0 Register Base Address: 26be37000
+
+ Name Offset Contents
+ VER 0x00 0x0000000000000010
+ GCMD 0x18 0x0000000000000000
+ GSTS 0x1c 0x00000000c7000000
+ FSTS 0x34 0x0000000000000000
+ FECTL 0x38 0x0000000000000000
+
+ [...]
+
+ IOMMU: dmar1 Register Base Address: fed90000
+
+ Name Offset Contents
+ VER 0x00 0x0000000000000010
+ GCMD 0x18 0x0000000000000000
+ GSTS 0x1c 0x00000000c7000000
+ FSTS 0x34 0x0000000000000000
+ FECTL 0x38 0x0000000000000000
+
+ [...]
+
+ IOMMU: dmar2 Register Base Address: fed91000
+
+ Name Offset Contents
+ VER 0x00 0x0000000000000010
+ GCMD 0x18 0x0000000000000000
+ GSTS 0x1c 0x00000000c7000000
+ FSTS 0x34 0x0000000000000000
+ FECTL 0x38 0x0000000000000000
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/ir_translation_struct
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps the table entries for Interrupt
+ remapping and Interrupt posting.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/ir_translation_struct
+
+ Remapped Interrupt supported on IOMMU: dmar0
+ IR table address:100900000
+
+ Entry SrcID DstID Vct IRTE_high IRTE_low
+ 0 00:0a.0 00000080 24 0000000000040050 000000800024000d
+ 1 00:0a.0 00000001 ef 0000000000040050 0000000100ef000d
+
+ Remapped Interrupt supported on IOMMU: dmar1
+ IR table address:100300000
+ Entry SrcID DstID Vct IRTE_high IRTE_low
+ 0 00:02.0 00000002 26 0000000000040010 000000020026000d
+
+ [...]
+
+ ****
+
+ Posted Interrupt supported on IOMMU: dmar0
+ IR table address:100900000
+ Entry SrcID PDA_high PDA_low Vct IRTE_high IRTE_low
+
+What: /sys/kernel/debug/iommu/intel/dmar_translation_struct
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps Intel IOMMU DMA remapping tables, such
+ as root table, context table, PASID directory and PASID
+ table entries in debugfs. For legacy mode, it doesn't
+ support PASID, and hence PASID field is defaulted to
+ '-1' and other PASID related fields are invalid.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/dmar_translation_struct
+
+ IOMMU dmar1: Root Table Address: 0x103027000
+ B.D.F Root_entry
+ 00:02.0 0x0000000000000000:0x000000010303e001
+
+ Context_entry
+ 0x0000000000000102:0x000000010303f005
+
+ PASID PASID_table_entry
+ -1 0x0000000000000000:0x0000000000000000:0x0000000000000000
+
+ IOMMU dmar0: Root Table Address: 0x103028000
+ B.D.F Root_entry
+ 00:0a.0 0x0000000000000000:0x00000001038a7001
+
+ Context_entry
+ 0x0000000000000000:0x0000000103220e7d
+
+ PASID PASID_table_entry
+ 0 0x0000000000000000:0x0000000000800002:0x00000001038a5089
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/invalidation_queue
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file exports invalidation queue internals of each
+ IOMMU device.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/invalidation_queue
+
+ Invalidation queue on IOMMU: dmar0
+ Base: 0x10022e000 Head: 20 Tail: 20
+ Index qw0 qw1 qw2
+ 0 0000000000000014 0000000000000000 0000000000000000
+ 1 0000000200000025 0000000100059c04 0000000000000000
+ 2 0000000000000014 0000000000000000 0000000000000000
+
+ qw3 status
+ 0000000000000000 0000000000000000
+ 0000000000000000 0000000000000000
+ 0000000000000000 0000000000000000
+
+ [...]
+
+ Invalidation queue on IOMMU: dmar1
+ Base: 0x10026e000 Head: 32 Tail: 32
+ Index qw0 qw1 status
+ 0 0000000000000004 0000000000000000 0000000000000000
+ 1 0000000200000025 0000000100059804 0000000000000000
+ 2 0000000000000011 0000000000000000 0000000000000000
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/dmar_perf_latency
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file is used to control and show counts of
+ execution time ranges for various types per DMAR.
+
+ Firstly, write a value to
+ /sys/kernel/debug/iommu/intel/dmar_perf_latency
+ to enable sampling.
+
+ The possible values are as follows:
+
+ * 0 - disable sampling all latency data
+
+ * 1 - enable sampling IOTLB invalidation latency data
+
+ * 2 - enable sampling devTLB invalidation latency data
+
+ * 3 - enable sampling intr entry cache invalidation latency data
+
+ Next, read /sys/kernel/debug/iommu/intel/dmar_perf_latency gives
+ a snapshot of sampling result of all enabled monitors.
+
+ Examples in Kabylake:
+
+ ::
+
+ 1) Disable sampling all latency data:
+
+ $ sudo echo 0 > /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ 2) Enable sampling IOTLB invalidation latency data
+
+ $ sudo echo 1 > /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ IOMMU: dmar0 Register Base Address: 26be37000
+ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms
+ inv_iotlb 0 0 0 0 0
+
+ 1ms-10ms >=10ms min(us) max(us) average(us)
+ inv_iotlb 0 0 0 0 0
+
+ [...]
+
+ IOMMU: dmar2 Register Base Address: fed91000
+ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms
+ inv_iotlb 0 0 18 0 0
+
+ 1ms-10ms >=10ms min(us) max(us) average(us)
+ inv_iotlb 0 0 2 2 2
+
+ 3) Enable sampling devTLB invalidation latency data
+
+ $ sudo echo 2 > /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ IOMMU: dmar0 Register Base Address: 26be37000
+ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms
+ inv_devtlb 0 0 0 0 0
+
+ >=10ms min(us) max(us) average(us)
+ inv_devtlb 0 0 0 0
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/<bdf>/domain_translation_struct
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps a specified page table of Intel IOMMU
+ in legacy mode or scalable mode.
+
+ For a device that only supports legacy mode, dump its
+ page table by the debugfs file in the debugfs device
+ directory. e.g.
+ /sys/kernel/debug/iommu/intel/0000:00:02.0/domain_translation_struct.
+
+ For a device that supports scalable mode, dump the
+ page table of specified pasid by the debugfs file in
+ the debugfs pasid directory. e.g.
+ /sys/kernel/debug/iommu/intel/0000:00:02.0/1/domain_translation_struct.
+
+ Examples in Kabylake:
+
+ ::
+
+ 1) Dump the page table of device "0000:00:02.0" that only supports legacy mode.
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/0000:00:02.0/domain_translation_struct
+
+ Device 0000:00:02.0 @0x1017f8000
+ IOVA_PFN PML5E PML4E
+ 0x000000008d800 | 0x0000000000000000 0x00000001017f9003
+ 0x000000008d801 | 0x0000000000000000 0x00000001017f9003
+ 0x000000008d802 | 0x0000000000000000 0x00000001017f9003
+
+ PDPE PDE PTE
+ 0x00000001017fa003 0x00000001017fb003 0x000000008d800003
+ 0x00000001017fa003 0x00000001017fb003 0x000000008d801003
+ 0x00000001017fa003 0x00000001017fb003 0x000000008d802003
+
+ [...]
+
+ 2) Dump the page table of device "0000:00:0a.0" with PASID "1" that
+ supports scalable mode.
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/0000:00:0a.0/1/domain_translation_struct
+
+ Device 0000:00:0a.0 with pasid 1 @0x10c112000
+ IOVA_PFN PML5E PML4E
+ 0x0000000000000 | 0x0000000000000000 0x000000010df93003
+ 0x0000000000001 | 0x0000000000000000 0x000000010df93003
+ 0x0000000000002 | 0x0000000000000000 0x000000010df93003
+
+ PDPE PDE PTE
+ 0x0000000106ae6003 0x0000000104b38003 0x0000000147c00803
+ 0x0000000106ae6003 0x0000000104b38003 0x0000000147c01803
+ 0x0000000106ae6003 0x0000000104b38003 0x0000000147c02803
+
+ [...]
diff --git a/Documentation/ABI/testing/sysfs-bus-vdpa b/Documentation/ABI/testing/sysfs-bus-vdpa
index 4da53878bff6..2c833b5163f2 100644
--- a/Documentation/ABI/testing/sysfs-bus-vdpa
+++ b/Documentation/ABI/testing/sysfs-bus-vdpa
@@ -1,6 +1,6 @@
What: /sys/bus/vdpa/drivers_autoprobe
Date: March 2020
-Contact: virtualization@lists.linux-foundation.org
+Contact: virtualization@lists.linux.dev
Description:
This file determines whether new devices are immediately bound
to a driver after the creation. It initially contains 1, which
@@ -12,7 +12,7 @@ Description:
What: /sys/bus/vdpa/driver_probe
Date: March 2020
-Contact: virtualization@lists.linux-foundation.org
+Contact: virtualization@lists.linux.dev
Description:
Writing a device name to this file will cause the kernel binds
devices to a compatible driver.
@@ -22,7 +22,7 @@ Description:
What: /sys/bus/vdpa/drivers/.../bind
Date: March 2020
-Contact: virtualization@lists.linux-foundation.org
+Contact: virtualization@lists.linux.dev
Description:
Writing a device name to this file will cause the driver to
attempt to bind to the device. This is useful for overriding
@@ -30,7 +30,7 @@ Description:
What: /sys/bus/vdpa/drivers/.../unbind
Date: March 2020
-Contact: virtualization@lists.linux-foundation.org
+Contact: virtualization@lists.linux.dev
Description:
Writing a device name to this file will cause the driver to
attempt to unbind from the device. This may be useful when
@@ -38,7 +38,7 @@ Description:
What: /sys/bus/vdpa/devices/.../driver_override
Date: November 2021
-Contact: virtualization@lists.linux-foundation.org
+Contact: virtualization@lists.linux.dev
Description:
This file allows the driver for a device to be specified.
When specified, only a driver with a name matching the value
diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues
index 906ff3ca928a..84aa25e0d14d 100644
--- a/Documentation/ABI/testing/sysfs-class-net-queues
+++ b/Documentation/ABI/testing/sysfs-class-net-queues
@@ -1,4 +1,4 @@
-What: /sys/class/<iface>/queues/rx-<queue>/rps_cpus
+What: /sys/class/net/<iface>/queues/rx-<queue>/rps_cpus
Date: March 2010
KernelVersion: 2.6.35
Contact: netdev@vger.kernel.org
@@ -8,7 +8,7 @@ Description:
network device queue. Possible values depend on the number
of available CPU(s) in the system.
-What: /sys/class/<iface>/queues/rx-<queue>/rps_flow_cnt
+What: /sys/class/net/<iface>/queues/rx-<queue>/rps_flow_cnt
Date: April 2010
KernelVersion: 2.6.35
Contact: netdev@vger.kernel.org
@@ -16,7 +16,7 @@ Description:
Number of Receive Packet Steering flows being currently
processed by this particular network device receive queue.
-What: /sys/class/<iface>/queues/tx-<queue>/tx_timeout
+What: /sys/class/net/<iface>/queues/tx-<queue>/tx_timeout
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -24,7 +24,7 @@ Description:
Indicates the number of transmit timeout events seen by this
network interface transmit queue.
-What: /sys/class/<iface>/queues/tx-<queue>/tx_maxrate
+What: /sys/class/net/<iface>/queues/tx-<queue>/tx_maxrate
Date: March 2015
KernelVersion: 4.1
Contact: netdev@vger.kernel.org
@@ -32,7 +32,7 @@ Description:
A Mbps max-rate set for the queue, a value of zero means disabled,
default is disabled.
-What: /sys/class/<iface>/queues/tx-<queue>/xps_cpus
+What: /sys/class/net/<iface>/queues/tx-<queue>/xps_cpus
Date: November 2010
KernelVersion: 2.6.38
Contact: netdev@vger.kernel.org
@@ -42,7 +42,7 @@ Description:
network device transmit queue. Possible values depend on the
number of available CPU(s) in the system.
-What: /sys/class/<iface>/queues/tx-<queue>/xps_rxqs
+What: /sys/class/net/<iface>/queues/tx-<queue>/xps_rxqs
Date: June 2018
KernelVersion: 4.18.0
Contact: netdev@vger.kernel.org
@@ -53,7 +53,7 @@ Description:
number of available receive queue(s) in the network device.
Default is disabled.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -62,7 +62,7 @@ Description:
of this particular network device transmit queue.
Default value is 1000.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/inflight
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/inflight
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -70,7 +70,7 @@ Description:
Indicates the number of bytes (objects) in flight on this
network device transmit queue.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -79,7 +79,7 @@ Description:
on this network device transmit queue. This value is clamped
to be within the bounds defined by limit_max and limit_min.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -88,7 +88,7 @@ Description:
queued on this network device transmit queue. See
include/linux/dynamic_queue_limits.h for the default value.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -96,3 +96,26 @@ Description:
Indicates the absolute minimum limit of bytes allowed to be
queued on this network device transmit queue. Default value is
0.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_thrs
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Tx completion stall detection threshold in ms. Kernel will
+ guarantee to detect all stalls longer than this threshold but
+ may also detect stalls longer than half of the threshold.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_cnt
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Number of detected Tx completion stalls.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_max
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Longest detected Tx completion stall. Write 0 to clear.
diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics
index 55db27815361..53e508c6936a 100644
--- a/Documentation/ABI/testing/sysfs-class-net-statistics
+++ b/Documentation/ABI/testing/sysfs-class-net-statistics
@@ -1,4 +1,4 @@
-What: /sys/class/<iface>/statistics/collisions
+What: /sys/class/net/<iface>/statistics/collisions
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -6,7 +6,7 @@ Description:
Indicates the number of collisions seen by this network device.
This value might not be relevant with all MAC layers.
-What: /sys/class/<iface>/statistics/multicast
+What: /sys/class/net/<iface>/statistics/multicast
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -14,7 +14,7 @@ Description:
Indicates the number of multicast packets received by this
network device.
-What: /sys/class/<iface>/statistics/rx_bytes
+What: /sys/class/net/<iface>/statistics/rx_bytes
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -23,7 +23,7 @@ Description:
See the network driver for the exact meaning of when this
value is incremented.
-What: /sys/class/<iface>/statistics/rx_compressed
+What: /sys/class/net/<iface>/statistics/rx_compressed
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -32,7 +32,7 @@ Description:
network device. This value might only be relevant for interfaces
that support packet compression (e.g: PPP).
-What: /sys/class/<iface>/statistics/rx_crc_errors
+What: /sys/class/net/<iface>/statistics/rx_crc_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -41,7 +41,7 @@ Description:
by this network device. Note that the specific meaning might
depend on the MAC layer used by the interface.
-What: /sys/class/<iface>/statistics/rx_dropped
+What: /sys/class/net/<iface>/statistics/rx_dropped
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -51,7 +51,7 @@ Description:
packet processing. See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/rx_errors
+What: /sys/class/net/<iface>/statistics/rx_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -59,7 +59,7 @@ Description:
Indicates the number of receive errors on this network device.
See the network driver for the exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_fifo_errors
+What: /sys/class/net/<iface>/statistics/rx_fifo_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -68,7 +68,7 @@ Description:
network device. See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/rx_frame_errors
+What: /sys/class/net/<iface>/statistics/rx_frame_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -78,7 +78,7 @@ Description:
on the MAC layer protocol used. See the network driver for
the exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_length_errors
+What: /sys/class/net/<iface>/statistics/rx_length_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -87,7 +87,7 @@ Description:
error, oversized or undersized. See the network driver for the
exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_missed_errors
+What: /sys/class/net/<iface>/statistics/rx_missed_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -96,7 +96,7 @@ Description:
due to lack of capacity in the receive side. See the network
driver for the exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_nohandler
+What: /sys/class/net/<iface>/statistics/rx_nohandler
Date: February 2016
KernelVersion: 4.6
Contact: netdev@vger.kernel.org
@@ -104,7 +104,7 @@ Description:
Indicates the number of received packets that were dropped on
an inactive device by the network core.
-What: /sys/class/<iface>/statistics/rx_over_errors
+What: /sys/class/net/<iface>/statistics/rx_over_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -114,7 +114,7 @@ Description:
(e.g: larger than MTU). See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/rx_packets
+What: /sys/class/net/<iface>/statistics/rx_packets
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -122,7 +122,7 @@ Description:
Indicates the total number of good packets received by this
network device.
-What: /sys/class/<iface>/statistics/tx_aborted_errors
+What: /sys/class/net/<iface>/statistics/tx_aborted_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -132,7 +132,7 @@ Description:
a medium collision). See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/tx_bytes
+What: /sys/class/net/<iface>/statistics/tx_bytes
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -143,7 +143,7 @@ Description:
transmitted packets or all packets that have been queued for
transmission.
-What: /sys/class/<iface>/statistics/tx_carrier_errors
+What: /sys/class/net/<iface>/statistics/tx_carrier_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -152,7 +152,7 @@ Description:
because of carrier errors (e.g: physical link down). See the
network driver for the exact meaning of this value.
-What: /sys/class/<iface>/statistics/tx_compressed
+What: /sys/class/net/<iface>/statistics/tx_compressed
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -161,7 +161,7 @@ Description:
this might only be relevant for devices that support
compression (e.g: PPP).
-What: /sys/class/<iface>/statistics/tx_dropped
+What: /sys/class/net/<iface>/statistics/tx_dropped
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -170,7 +170,7 @@ Description:
See the driver for the exact reasons as to why the packets were
dropped.
-What: /sys/class/<iface>/statistics/tx_errors
+What: /sys/class/net/<iface>/statistics/tx_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -179,7 +179,7 @@ Description:
a network device. See the driver for the exact reasons as to
why the packets were dropped.
-What: /sys/class/<iface>/statistics/tx_fifo_errors
+What: /sys/class/net/<iface>/statistics/tx_fifo_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -188,7 +188,7 @@ Description:
FIFO error. See the driver for the exact reasons as to why the
packets were dropped.
-What: /sys/class/<iface>/statistics/tx_heartbeat_errors
+What: /sys/class/net/<iface>/statistics/tx_heartbeat_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -197,7 +197,7 @@ Description:
reported as heartbeat errors. See the driver for the exact
reasons as to why the packets were dropped.
-What: /sys/class/<iface>/statistics/tx_packets
+What: /sys/class/net/<iface>/statistics/tx_packets
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -206,7 +206,7 @@ Description:
device. See the driver for whether this reports the number of all
attempted or successful transmissions.
-What: /sys/class/<iface>/statistics/tx_window_errors
+What: /sys/class/net/<iface>/statistics/tx_window_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index a1db6db47505..710d47be11e0 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -516,6 +516,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/mds
/sys/devices/system/cpu/vulnerabilities/meltdown
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+ /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling
/sys/devices/system/cpu/vulnerabilities/retbleed
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
/sys/devices/system/cpu/vulnerabilities/spectre_v1
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index 8d7d8f05f6cd..92fe7c5c5ac1 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -1,4 +1,4 @@
-What: /sys/devices/.../hwmon/hwmon<i>/in0_input
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/in0_input
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -6,7 +6,7 @@ Description: RO. Current Voltage in millivolt.
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_max
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_max
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -20,7 +20,7 @@ Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts.
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_rated_max
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -28,7 +28,7 @@ Description: RO. Card default power limit (default TDP setting).
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_max_interval
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -37,7 +37,7 @@ Description: RW. Sustained power limit interval (Tau in PL1/Tau) in
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_crit
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_crit
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -50,7 +50,7 @@ Description: RW. Card reactive critical (I1) power limit in microwatts.
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/curr1_crit
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/curr1_crit
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -63,7 +63,7 @@ Description: RW. Card reactive critical (I1) power limit in milliamperes.
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/energy1_input
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/energy1_input
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
index 8c321bc9dc04..023fd82de3f7 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -1,4 +1,4 @@
-What: /sys/devices/.../hwmon/hwmon<i>/power1_max
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_max
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
@@ -12,7 +12,7 @@ Description: RW. Card reactive sustained (PL1) power limit in microwatts.
Only supported for particular Intel xe graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_rated_max
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
@@ -20,7 +20,7 @@ Description: RO. Card default power limit (default TDP setting).
Only supported for particular Intel xe graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_crit
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_crit
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
@@ -33,7 +33,7 @@ Description: RW. Card reactive critical (I1) power limit in microwatts.
Only supported for particular Intel xe graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/curr1_crit
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/curr1_crit
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
@@ -44,7 +44,7 @@ Description: RW. Card reactive critical (I1) power limit in milliamperes.
the operating frequency if the power averaged over a window
exceeds this limit.
-What: /sys/devices/.../hwmon/hwmon<i>/in0_input
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/in0_input
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
@@ -52,7 +52,7 @@ Description: RO. Current Voltage in millivolt.
Only supported for particular Intel xe graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/energy1_input
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/energy1_input
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
@@ -60,7 +60,7 @@ Description: RO. Energy input of device in microjoules.
Only supported for particular Intel xe graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_max_interval
Date: October 2023
KernelVersion: 6.6
Contact: intel-xe@lists.freedesktop.org
diff --git a/Documentation/ABI/testing/sysfs-nvmem-cells b/Documentation/ABI/testing/sysfs-nvmem-cells
index 7af70adf3690..c7c9444f92a8 100644
--- a/Documentation/ABI/testing/sysfs-nvmem-cells
+++ b/Documentation/ABI/testing/sysfs-nvmem-cells
@@ -4,18 +4,18 @@ KernelVersion: 6.5
Contact: Miquel Raynal <miquel.raynal@bootlin.com>
Description:
The "cells" folder contains one file per cell exposed by the
- NVMEM device. The name of the file is: <name>@<where>, with
- <name> being the cell name and <where> its location in the NVMEM
- device, in hexadecimal (without the '0x' prefix, to mimic device
- tree node names). The length of the file is the size of the cell
- (when known). The content of the file is the binary content of
- the cell (may sometimes be ASCII, likely without trailing
- character).
+ NVMEM device. The name of the file is: "<name>@<byte>,<bit>",
+ with <name> being the cell name and <where> its location in
+ the NVMEM device, in hexadecimal bytes and bits (without the
+ '0x' prefix, to mimic device tree node names). The length of
+ the file is the size of the cell (when known). The content of
+ the file is the binary content of the cell (may sometimes be
+ ASCII, likely without trailing character).
Note: This file is only present if CONFIG_NVMEM_SYSFS
is enabled.
Example::
- hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d
+ hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d,0
00000000 54 4e 34 38 4d 2d 50 2d 44 4e |TN48M-P-DN|
0000000a
diff --git a/Documentation/ABI/testing/sysfs-platform-silicom b/Documentation/ABI/testing/sysfs-platform-silicom
index 2288b3665d16..4d1cc5bdbcc5 100644
--- a/Documentation/ABI/testing/sysfs-platform-silicom
+++ b/Documentation/ABI/testing/sysfs-platform-silicom
@@ -10,6 +10,7 @@ What: /sys/devices/platform/silicom-platform/power_cycle
Date: November 2023
KernelVersion: 6.7
Contact: Henry Shi <henrys@silicom-usa.com>
+Description:
This file allow user to power cycle the platform.
Default value is 0; when set to 1, it powers down
the platform, waits 5 seconds, then powers on the
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 3885bbe260eb..b68f8c816897 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -111,7 +111,9 @@ $(YNL_INDEX): $(YNL_RST_FILES)
$(YNL_RST_DIR)/%.rst: $(YNL_YAML_DIR)/%.yaml $(YNL_TOOL)
$(Q)$(YNL_TOOL) -i $< -o $@
-htmldocs: $(YNL_INDEX)
+htmldocs texinfodocs latexdocs epubdocs xmldocs: $(YNL_INDEX)
+
+htmldocs:
@$(srctree)/scripts/sphinx-pre-install --version-check
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
@@ -176,6 +178,7 @@ refcheckdocs:
$(Q)cd $(srctree);scripts/documentation-file-ref-check
cleandocs:
+ $(Q)rm -f $(YNL_INDEX) $(YNL_RST_FILES)
$(Q)rm -rf $(BUILDDIR)
$(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/userspace-api/media clean
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index 2d42998a89a6..3e6407de231c 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -68,7 +68,8 @@ over a rather long period of time, but improvements are always welcome!
rcu_read_lock_sched(), or by the appropriate update-side lock.
Explicit disabling of preemption (preempt_disable(), for example)
can serve as rcu_read_lock_sched(), but is less readable and
- prevents lockdep from detecting locking issues.
+ prevents lockdep from detecting locking issues. Acquiring a
+ spinlock also enters an RCU read-side critical section.
Please note that you *cannot* rely on code known to be built
only in non-preemptible kernels. Such code can and will break,
@@ -382,16 +383,17 @@ over a rather long period of time, but improvements are always welcome!
must use whatever locking or other synchronization is required
to safely access and/or modify that data structure.
- Do not assume that RCU callbacks will be executed on the same
- CPU that executed the corresponding call_rcu() or call_srcu().
- For example, if a given CPU goes offline while having an RCU
- callback pending, then that RCU callback will execute on some
- surviving CPU. (If this was not the case, a self-spawning RCU
- callback would prevent the victim CPU from ever going offline.)
- Furthermore, CPUs designated by rcu_nocbs= might well *always*
- have their RCU callbacks executed on some other CPUs, in fact,
- for some real-time workloads, this is the whole point of using
- the rcu_nocbs= kernel boot parameter.
+ Do not assume that RCU callbacks will be executed on
+ the same CPU that executed the corresponding call_rcu(),
+ call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(), or
+ call_rcu_tasks_trace(). For example, if a given CPU goes offline
+ while having an RCU callback pending, then that RCU callback
+ will execute on some surviving CPU. (If this was not the case,
+ a self-spawning RCU callback would prevent the victim CPU from
+ ever going offline.) Furthermore, CPUs designated by rcu_nocbs=
+ might well *always* have their RCU callbacks executed on some
+ other CPUs, in fact, for some real-time workloads, this is the
+ whole point of using the rcu_nocbs= kernel boot parameter.
In addition, do not assume that callbacks queued in a given order
will be invoked in that order, even if they all are queued on the
@@ -444,7 +446,7 @@ over a rather long period of time, but improvements are always welcome!
real-time workloads than is synchronize_rcu_expedited().
It is also permissible to sleep in RCU Tasks Trace read-side
- critical, which are delimited by rcu_read_lock_trace() and
+ critical section, which are delimited by rcu_read_lock_trace() and
rcu_read_unlock_trace(). However, this is a specialized flavor
of RCU, and you should not use it without first checking with
its current users. In most cases, you should instead use SRCU.
@@ -490,6 +492,12 @@ over a rather long period of time, but improvements are always welcome!
since the last time that you passed that same object to
call_rcu() (or friends).
+ CONFIG_RCU_STRICT_GRACE_PERIOD:
+ combine with KASAN to check for pointers leaked out
+ of RCU read-side critical sections. This Kconfig
+ option is tough on both performance and scalability,
+ and so is limited to four-CPU systems.
+
__rcu sparse checks:
tag the pointer to the RCU-protected data structure
with __rcu, and sparse will warn you if you access that
diff --git a/Documentation/RCU/rcu_dereference.rst b/Documentation/RCU/rcu_dereference.rst
index 659d5913784d..2524dcdadde2 100644
--- a/Documentation/RCU/rcu_dereference.rst
+++ b/Documentation/RCU/rcu_dereference.rst
@@ -408,7 +408,10 @@ member of the rcu_dereference() to use in various situations:
RCU flavors, an RCU read-side critical section is entered
using rcu_read_lock(), anything that disables bottom halves,
anything that disables interrupts, or anything that disables
- preemption.
+ preemption. Please note that spinlock critical sections
+ are also implied RCU read-side critical sections, even when
+ they are preemptible, as they are in kernels built with
+ CONFIG_PREEMPT_RT=y.
2. If the access might be within an RCU read-side critical section
on the one hand, or protected by (say) my_lock on the other,
diff --git a/Documentation/RCU/torture.rst b/Documentation/RCU/torture.rst
index 49e7beea6ae1..4b1f99c4181f 100644
--- a/Documentation/RCU/torture.rst
+++ b/Documentation/RCU/torture.rst
@@ -318,7 +318,7 @@ Suppose that a previous kvm.sh run left its output in this directory::
tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28
-Then this run can be re-run without rebuilding as follow:
+Then this run can be re-run without rebuilding as follow::
kvm-again.sh tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index 60ce02475142..872ac665223f 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -172,14 +172,25 @@ rcu_read_lock()
critical section. Reference counts may be used in conjunction
with RCU to maintain longer-term references to data structures.
+ Note that anything that disables bottom halves, preemption,
+ or interrupts also enters an RCU read-side critical section.
+ Acquiring a spinlock also enters an RCU read-side critical
+ sections, even for spinlocks that do not disable preemption,
+ as is the case in kernels built with CONFIG_PREEMPT_RT=y.
+ Sleeplocks do *not* enter RCU read-side critical sections.
+
rcu_read_unlock()
^^^^^^^^^^^^^^^^^
void rcu_read_unlock(void);
This temporal primitives is used by a reader to inform the
reclaimer that the reader is exiting an RCU read-side critical
- section. Note that RCU read-side critical sections may be nested
- and/or overlapping.
+ section. Anything that enables bottom halves, preemption,
+ or interrupts also exits an RCU read-side critical section.
+ Releasing a spinlock also exits an RCU read-side critical section.
+
+ Note that RCU read-side critical sections may be nested and/or
+ overlapping.
synchronize_rcu()
^^^^^^^^^^^^^^^^^
@@ -952,8 +963,8 @@ unfortunately any spinlock in a ``SLAB_TYPESAFE_BY_RCU`` object must be
initialized after each and every call to kmem_cache_alloc(), which renders
reference-free spinlock acquisition completely unsafe. Therefore, when
using ``SLAB_TYPESAFE_BY_RCU``, make proper use of a reference counter.
-(Those willing to use a kmem_cache constructor may also use locking,
-including cache-friendly sequence locking.)
+(Those willing to initialize their locks in a kmem_cache constructor
+may also use locking, including cache-friendly sequence locking.)
With traditional reference counting -- such as that implemented by the
kref library in Linux -- there is typically code that runs when the last
diff --git a/Documentation/accel/introduction.rst b/Documentation/accel/introduction.rst
index 89984dfececf..ae3030136637 100644
--- a/Documentation/accel/introduction.rst
+++ b/Documentation/accel/introduction.rst
@@ -101,8 +101,8 @@ External References
email threads
-------------
-* `Initial discussion on the New subsystem for acceleration devices <https://lkml.org/lkml/2022/7/31/83>`_ - Oded Gabbay (2022)
-* `patch-set to add the new subsystem <https://lkml.org/lkml/2022/10/22/544>`_ - Oded Gabbay (2022)
+* `Initial discussion on the New subsystem for acceleration devices <https://lore.kernel.org/lkml/CAFCwf11=9qpNAepL7NL+YAV_QO=Wv6pnWPhKHKAepK3fNn+2Dg@mail.gmail.com/>`_ - Oded Gabbay (2022)
+* `patch-set to add the new subsystem <https://lore.kernel.org/lkml/20221022214622.18042-1-ogabbay@kernel.org/>`_ - Oded Gabbay (2022)
Conference talks
----------------
diff --git a/Documentation/admin-guide/RAS/address-translation.rst b/Documentation/admin-guide/RAS/address-translation.rst
new file mode 100644
index 000000000000..f0ca17b43cd3
--- /dev/null
+++ b/Documentation/admin-guide/RAS/address-translation.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Address translation
+===================
+
+x86 AMD
+-------
+
+Zen-based AMD systems include a Data Fabric that manages the layout of
+physical memory. Devices attached to the Fabric, like memory controllers,
+I/O, etc., may not have a complete view of the system physical memory map.
+These devices may provide a "normalized", i.e. device physical, address
+when reporting memory errors. Normalized addresses must be translated to
+a system physical address for the kernel to action on the memory.
+
+AMD Address Translation Library (CONFIG_AMD_ATL) provides translation for
+this case.
+
+Glossary of acronyms used in address translation for Zen-based systems
+
+* CCM = Cache Coherent Moderator
+* COD = Cluster-on-Die
+* COH_ST = Coherent Station
+* DF = Data Fabric
diff --git a/Documentation/RAS/ras.rst b/Documentation/admin-guide/RAS/error-decoding.rst
index 2556b397cd27..26a72f3fe5de 100644
--- a/Documentation/RAS/ras.rst
+++ b/Documentation/admin-guide/RAS/error-decoding.rst
@@ -1,15 +1,10 @@
.. SPDX-License-Identifier: GPL-2.0
-Reliability, Availability and Serviceability features
-=====================================================
-
-This documents different aspects of the RAS functionality present in the
-kernel.
-
Error decoding
----------------
+==============
-* x86
+x86
+---
Error decoding on AMD systems should be done using the rasdaemon tool:
https://github.com/mchehab/rasdaemon/
diff --git a/Documentation/admin-guide/RAS/index.rst b/Documentation/admin-guide/RAS/index.rst
new file mode 100644
index 000000000000..f4087040a7c0
--- /dev/null
+++ b/Documentation/admin-guide/RAS/index.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. toctree::
+ :maxdepth: 2
+
+ main
+ error-decoding
+ address-translation
diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/RAS/main.rst
index 8e03751d126d..7ac1d4ccc509 100644
--- a/Documentation/admin-guide/ras.rst
+++ b/Documentation/admin-guide/RAS/main.rst
@@ -1,8 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
.. include:: <isonum.txt>
-============================================
-Reliability, Availability and Serviceability
-============================================
+==================================================
+Reliability, Availability and Serviceability (RAS)
+==================================================
+
+This documents different aspects of the RAS functionality present in the
+kernel.
RAS concepts
************
diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst
index 9a969c0157f1..f2bebff6a733 100644
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@@ -262,9 +262,11 @@ Compiling the kernel
- Make sure you have at least gcc 5.1 available.
For more information, refer to :ref:`Documentation/process/changes.rst <changes>`.
- - Do a ``make`` to create a compressed kernel image. It is also
- possible to do ``make install`` if you have lilo installed to suit the
- kernel makefiles, but you may want to check your particular lilo setup first.
+ - Do a ``make`` to create a compressed kernel image. It is also possible to do
+ ``make install`` if you have lilo installed or if your distribution has an
+ install script recognised by the kernel's installer. Most popular
+ distributions will have a recognized install script. You may want to
+ check your distribution's setup first.
To do the actual install, you have to be root, but none of the normal
build should require that. Don't take the name of root in vain.
@@ -301,32 +303,51 @@ Compiling the kernel
image (e.g. .../linux/arch/x86/boot/bzImage after compilation)
to the place where your regular bootable kernel is found.
- - Booting a kernel directly from a floppy without the assistance of a
- bootloader such as LILO, is no longer supported.
-
- If you boot Linux from the hard drive, chances are you use LILO, which
- uses the kernel image as specified in the file /etc/lilo.conf. The
- kernel image file is usually /vmlinuz, /boot/vmlinuz, /bzImage or
- /boot/bzImage. To use the new kernel, save a copy of the old image
- and copy the new image over the old one. Then, you MUST RERUN LILO
- to update the loading map! If you don't, you won't be able to boot
- the new kernel image.
-
- Reinstalling LILO is usually a matter of running /sbin/lilo.
- You may wish to edit /etc/lilo.conf to specify an entry for your
- old kernel image (say, /vmlinux.old) in case the new one does not
- work. See the LILO docs for more information.
-
- After reinstalling LILO, you should be all set. Shutdown the system,
+ - Booting a kernel directly from a storage device without the assistance
+ of a bootloader such as LILO or GRUB, is no longer supported in BIOS
+ (non-EFI systems). On UEFI/EFI systems, however, you can use EFISTUB
+ which allows the motherboard to boot directly to the kernel.
+ On modern workstations and desktops, it's generally recommended to use a
+ bootloader as difficulties can arise with multiple kernels and secure boot.
+ For more details on EFISTUB,
+ see "Documentation/admin-guide/efi-stub.rst".
+
+ - It's important to note that as of 2016 LILO (LInux LOader) is no longer in
+ active development, though as it was extremely popular, it often comes up
+ in documentation. Popular alternatives include GRUB2, rEFInd, Syslinux,
+ systemd-boot, or EFISTUB. For various reasons, it's not recommended to use
+ software that's no longer in active development.
+
+ - Chances are your distribution includes an install script and running
+ ``make install`` will be all that's needed. Should that not be the case
+ you'll have to identify your bootloader and reference its documentation or
+ configure your EFI.
+
+Legacy LILO Instructions
+------------------------
+
+
+ - If you use LILO the kernel images are specified in the file /etc/lilo.conf.
+ The kernel image file is usually /vmlinuz, /boot/vmlinuz, /bzImage or
+ /boot/bzImage. To use the new kernel, save a copy of the old image and copy
+ the new image over the old one. Then, you MUST RERUN LILO to update the
+ loading map! If you don't, you won't be able to boot the new kernel image.
+
+ - Reinstalling LILO is usually a matter of running /sbin/lilo. You may wish
+ to edit /etc/lilo.conf to specify an entry for your old kernel image
+ (say, /vmlinux.old) in case the new one does not work. See the LILO docs
+ for more information.
+
+ - After reinstalling LILO, you should be all set. Shutdown the system,
reboot, and enjoy!
- If you ever need to change the default root device, video mode,
- etc. in the kernel image, use your bootloader's boot options
- where appropriate. No need to recompile the kernel to change
- these parameters.
+ - If you ever need to change the default root device, video mode, etc. in the
+ kernel image, use your bootloader's boot options where appropriate. No need
+ to recompile the kernel to change these parameters.
- Reboot with the new kernel and enjoy.
+
If something goes wrong
-----------------------
diff --git a/Documentation/admin-guide/cgroup-v1/cpusets.rst b/Documentation/admin-guide/cgroup-v1/cpusets.rst
index ae646d621a8a..7d3415eea05d 100644
--- a/Documentation/admin-guide/cgroup-v1/cpusets.rst
+++ b/Documentation/admin-guide/cgroup-v1/cpusets.rst
@@ -179,7 +179,7 @@ files describing that cpuset:
- cpuset.mem_hardwall flag: is memory allocation hardwalled
- cpuset.memory_pressure: measure of how much paging pressure in cpuset
- cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
- - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
+ - cpuset.memory_spread_slab flag: OBSOLETE. Doesn't have any function.
- cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
- cpuset.sched_relax_domain_level: the searching range when migrating tasks
diff --git a/Documentation/admin-guide/cgroup-v1/hugetlb.rst b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
index 0fa724d82abb..493a8e386700 100644
--- a/Documentation/admin-guide/cgroup-v1/hugetlb.rst
+++ b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
@@ -65,10 +65,12 @@ files include::
1. Page fault accounting
-hugetlb.<hugepagesize>.limit_in_bytes
-hugetlb.<hugepagesize>.max_usage_in_bytes
-hugetlb.<hugepagesize>.usage_in_bytes
-hugetlb.<hugepagesize>.failcnt
+::
+
+ hugetlb.<hugepagesize>.limit_in_bytes
+ hugetlb.<hugepagesize>.max_usage_in_bytes
+ hugetlb.<hugepagesize>.usage_in_bytes
+ hugetlb.<hugepagesize>.failcnt
The HugeTLB controller allows users to limit the HugeTLB usage (page fault) per
control group and enforces the limit during page fault. Since HugeTLB
@@ -82,10 +84,12 @@ getting SIGBUS.
2. Reservation accounting
-hugetlb.<hugepagesize>.rsvd.limit_in_bytes
-hugetlb.<hugepagesize>.rsvd.max_usage_in_bytes
-hugetlb.<hugepagesize>.rsvd.usage_in_bytes
-hugetlb.<hugepagesize>.rsvd.failcnt
+::
+
+ hugetlb.<hugepagesize>.rsvd.limit_in_bytes
+ hugetlb.<hugepagesize>.rsvd.max_usage_in_bytes
+ hugetlb.<hugepagesize>.rsvd.usage_in_bytes
+ hugetlb.<hugepagesize>.rsvd.failcnt
The HugeTLB controller allows to limit the HugeTLB reservations per control
group and enforces the controller limit at reservation time and at the fault of
diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst
index cde52cc09645..cc5aec861576 100644
--- a/Documentation/admin-guide/device-mapper/index.rst
+++ b/Documentation/admin-guide/device-mapper/index.rst
@@ -34,6 +34,8 @@ Device Mapper
switch
thin-provisioning
unstriped
+ vdo-design
+ vdo
verity
writecache
zero
diff --git a/Documentation/admin-guide/device-mapper/vdo-design.rst b/Documentation/admin-guide/device-mapper/vdo-design.rst
new file mode 100644
index 000000000000..3cd59decbec0
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/vdo-design.rst
@@ -0,0 +1,633 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+================
+Design of dm-vdo
+================
+
+The dm-vdo (virtual data optimizer) target provides inline deduplication,
+compression, zero-block elimination, and thin provisioning. A dm-vdo target
+can be backed by up to 256TB of storage, and can present a logical size of
+up to 4PB. This target was originally developed at Permabit Technology
+Corp. starting in 2009. It was first released in 2013 and has been used in
+production environments ever since. It was made open-source in 2017 after
+Permabit was acquired by Red Hat. This document describes the design of
+dm-vdo. For usage, see vdo.rst in the same directory as this file.
+
+Because deduplication rates fall drastically as the block size increases, a
+vdo target has a maximum block size of 4K. However, it can achieve
+deduplication rates of 254:1, i.e. up to 254 copies of a given 4K block can
+reference a single 4K of actual storage. It can achieve compression rates
+of 14:1. All zero blocks consume no storage at all.
+
+Theory of Operation
+===================
+
+The design of dm-vdo is based on the idea that deduplication is a two-part
+problem. The first is to recognize duplicate data. The second is to avoid
+storing multiple copies of those duplicates. Therefore, dm-vdo has two main
+parts: a deduplication index (called UDS) that is used to discover
+duplicate data, and a data store with a reference counted block map that
+maps from logical block addresses to the actual storage location of the
+data.
+
+Zones and Threading
+-------------------
+
+Due to the complexity of data optimization, the number of metadata
+structures involved in a single write operation to a vdo target is larger
+than most other targets. Furthermore, because vdo must operate on small
+block sizes in order to achieve good deduplication rates, acceptable
+performance can only be achieved through parallelism. Therefore, vdo's
+design attempts to be lock-free.
+
+Most of a vdo's main data structures are designed to be easily divided into
+"zones" such that any given bio must only access a single zone of any zoned
+structure. Safety with minimal locking is achieved by ensuring that during
+normal operation, each zone is assigned to a specific thread, and only that
+thread will access the portion of the data structure in that zone.
+Associated with each thread is a work queue. Each bio is associated with a
+request object (the "data_vio") which will be added to a work queue when
+the next phase of its operation requires access to the structures in the
+zone associated with that queue.
+
+Another way of thinking about this arrangement is that the work queue for
+each zone has an implicit lock on the structures it manages for all its
+operations, because vdo guarantees that no other thread will alter those
+structures.
+
+Although each structure is divided into zones, this division is not
+reflected in the on-disk representation of each data structure. Therefore,
+the number of zones for each structure, and hence the number of threads,
+can be reconfigured each time a vdo target is started.
+
+The Deduplication Index
+-----------------------
+
+In order to identify duplicate data efficiently, vdo was designed to
+leverage some common characteristics of duplicate data. From empirical
+observations, we gathered two key insights. The first is that in most data
+sets with significant amounts of duplicate data, the duplicates tend to
+have temporal locality. When a duplicate appears, it is more likely that
+other duplicates will be detected, and that those duplicates will have been
+written at about the same time. This is why the index keeps records in
+temporal order. The second insight is that new data is more likely to
+duplicate recent data than it is to duplicate older data and in general,
+there are diminishing returns to looking further back in time. Therefore,
+when the index is full, it should cull its oldest records to make space for
+new ones. Another important idea behind the design of the index is that the
+ultimate goal of deduplication is to reduce storage costs. Since there is a
+trade-off between the storage saved and the resources expended to achieve
+those savings, vdo does not attempt to find every last duplicate block. It
+is sufficient to find and eliminate most of the redundancy.
+
+Each block of data is hashed to produce a 16-byte block name. An index
+record consists of this block name paired with the presumed location of
+that data on the underlying storage. However, it is not possible to
+guarantee that the index is accurate. In the most common case, this occurs
+because it is too costly to update the index when a block is over-written
+or discarded. Doing so would require either storing the block name along
+with the blocks, which is difficult to do efficiently in block-based
+storage, or reading and rehashing each block before overwriting it.
+Inaccuracy can also result from a hash collision where two different blocks
+have the same name. In practice, this is extremely unlikely, but because
+vdo does not use a cryptographic hash, a malicious workload could be
+constructed. Because of these inaccuracies, vdo treats the locations in the
+index as hints, and reads each indicated block to verify that it is indeed
+a duplicate before sharing the existing block with a new one.
+
+Records are collected into groups called chapters. New records are added to
+the newest chapter, called the open chapter. This chapter is stored in a
+format optimized for adding and modifying records, and the content of the
+open chapter is not finalized until it runs out of space for new records.
+When the open chapter fills up, it is closed and a new open chapter is
+created to collect new records.
+
+Closing a chapter converts it to a different format which is optimized for
+reading. The records are written to a series of record pages based on the
+order in which they were received. This means that records with temporal
+locality should be on a small number of pages, reducing the I/O required to
+retrieve them. The chapter also compiles an index that indicates which
+record page contains any given name. This index means that a request for a
+name can determine exactly which record page may contain that record,
+without having to load the entire chapter from storage. This index uses
+only a subset of the block name as its key, so it cannot guarantee that an
+index entry refers to the desired block name. It can only guarantee that if
+there is a record for this name, it will be on the indicated page. Closed
+chapters are read-only structures and their contents are never altered in
+any way.
+
+Once enough records have been written to fill up all the available index
+space, the oldest chapter is removed to make space for new chapters. Any
+time a request finds a matching record in the index, that record is copied
+into the open chapter. This ensures that useful block names remain available
+in the index, while unreferenced block names are forgotten over time.
+
+In order to find records in older chapters, the index also maintains a
+higher level structure called the volume index, which contains entries
+mapping each block name to the chapter containing its newest record. This
+mapping is updated as records for the block name are copied or updated,
+ensuring that only the newest record for a given block name can be found.
+An older record for a block name will no longer be found even though it has
+not been deleted from its chapter. Like the chapter index, the volume index
+uses only a subset of the block name as its key and can not definitively
+say that a record exists for a name. It can only say which chapter would
+contain the record if a record exists. The volume index is stored entirely
+in memory and is saved to storage only when the vdo target is shut down.
+
+From the viewpoint of a request for a particular block name, it will first
+look up the name in the volume index. This search will either indicate that
+the name is new, or which chapter to search. If it returns a chapter, the
+request looks up its name in the chapter index. This will indicate either
+that the name is new, or which record page to search. Finally, if it is not
+new, the request will look for its name in the indicated record page.
+This process may require up to two page reads per request (one for the
+chapter index page and one for the request page). However, recently
+accessed pages are cached so that these page reads can be amortized across
+many block name requests.
+
+The volume index and the chapter indexes are implemented using a
+memory-efficient structure called a delta index. Instead of storing the
+entire block name (the key) for each entry, the entries are sorted by name
+and only the difference between adjacent keys (the delta) is stored.
+Because we expect the hashes to be randomly distributed, the size of the
+deltas follows an exponential distribution. Because of this distribution,
+the deltas are expressed using a Huffman code to take up even less space.
+The entire sorted list of keys is called a delta list. This structure
+allows the index to use many fewer bytes per entry than a traditional hash
+table, but it is slightly more expensive to look up entries, because a
+request must read every entry in a delta list to add up the deltas in order
+to find the record it needs. The delta index reduces this lookup cost by
+splitting its key space into many sub-lists, each starting at a fixed key
+value, so that each individual list is short.
+
+The default index size can hold 64 million records, corresponding to about
+256GB of data. This means that the index can identify duplicate data if the
+original data was written within the last 256GB of writes. This range is
+called the deduplication window. If new writes duplicate data that is older
+than that, the index will not be able to find it because the records of the
+older data have been removed. This means that if an application writes a
+200 GB file to a vdo target and then immediately writes it again, the two
+copies will deduplicate perfectly. Doing the same with a 500 GB file will
+result in no deduplication, because the beginning of the file will no
+longer be in the index by the time the second write begins (assuming there
+is no duplication within the file itself).
+
+If an application anticipates a data workload that will see useful
+deduplication beyond the 256GB threshold, vdo can be configured to use a
+larger index with a correspondingly larger deduplication window. (This
+configuration can only be set when the target is created, not altered
+later. It is important to consider the expected workload for a vdo target
+before configuring it.) There are two ways to do this.
+
+One way is to increase the memory size of the index, which also increases
+the amount of backing storage required. Doubling the size of the index will
+double the length of the deduplication window at the expense of doubling
+the storage size and the memory requirements.
+
+The other option is to enable sparse indexing. Sparse indexing increases
+the deduplication window by a factor of 10, at the expense of also
+increasing the storage size by a factor of 10. However with sparse
+indexing, the memory requirements do not increase. The trade-off is
+slightly more computation per request and a slight decrease in the amount
+of deduplication detected. For most workloads with significant amounts of
+duplicate data, sparse indexing will detect 97-99% of the deduplication
+that a standard index will detect.
+
+The vio and data_vio Structures
+-------------------------------
+
+A vio (short for Vdo I/O) is conceptually similar to a bio, with additional
+fields and data to track vdo-specific information. A struct vio maintains a
+pointer to a bio but also tracks other fields specific to the operation of
+vdo. The vio is kept separate from its related bio because there are many
+circumstances where vdo completes the bio but must continue to do work
+related to deduplication or compression.
+
+Metadata reads and writes, and other writes that originate within vdo, use
+a struct vio directly. Application reads and writes use a larger structure
+called a data_vio to track information about their progress. A struct
+data_vio contain a struct vio and also includes several other fields
+related to deduplication and other vdo features. The data_vio is the
+primary unit of application work in vdo. Each data_vio proceeds through a
+set of steps to handle the application data, after which it is reset and
+returned to a pool of data_vios for reuse.
+
+There is a fixed pool of 2048 data_vios. This number was chosen to bound
+the amount of work that is required to recover from a crash. In addition,
+benchmarks have indicated that increasing the size of the pool does not
+significantly improve performance.
+
+The Data Store
+--------------
+
+The data store is implemented by three main data structures, all of which
+work in concert to reduce or amortize metadata updates across as many data
+writes as possible.
+
+*The Slab Depot*
+
+Most of the vdo volume belongs to the slab depot. The depot contains a
+collection of slabs. The slabs can be up to 32GB, and are divided into
+three sections. Most of a slab consists of a linear sequence of 4K blocks.
+These blocks are used either to store data, or to hold portions of the
+block map (see below). In addition to the data blocks, each slab has a set
+of reference counters, using 1 byte for each data block. Finally each slab
+has a journal.
+
+Reference updates are written to the slab journal. Slab journal blocks are
+written out either when they are full, or when the recovery journal
+requests they do so in order to allow the main recovery journal (see below)
+to free up space. The slab journal is used both to ensure that the main
+recovery journal can regularly free up space, and also to amortize the cost
+of updating individual reference blocks. The reference counters are kept in
+memory and are written out, a block at a time in oldest-dirtied-order, only
+when there is a need to reclaim slab journal space. The write operations
+are performed in the background as needed so they do not add latency to
+particular I/O operations.
+
+Each slab is independent of every other. They are assigned to "physical
+zones" in round-robin fashion. If there are P physical zones, then slab n
+is assigned to zone n mod P.
+
+The slab depot maintains an additional small data structure, the "slab
+summary," which is used to reduce the amount of work needed to come back
+online after a crash. The slab summary maintains an entry for each slab
+indicating whether or not the slab has ever been used, whether all of its
+reference count updates have been persisted to storage, and approximately
+how full it is. During recovery, each physical zone will attempt to recover
+at least one slab, stopping whenever it has recovered a slab which has some
+free blocks. Once each zone has some space, or has determined that none is
+available, the target can resume normal operation in a degraded mode. Read
+and write requests can be serviced, perhaps with degraded performance,
+while the remainder of the dirty slabs are recovered.
+
+*The Block Map*
+
+The block map contains the logical to physical mapping. It can be thought
+of as an array with one entry per logical address. Each entry is 5 bytes,
+36 bits of which contain the physical block number which holds the data for
+the given logical address. The other 4 bits are used to indicate the nature
+of the mapping. Of the 16 possible states, one represents a logical address
+which is unmapped (i.e. it has never been written, or has been discarded),
+one represents an uncompressed block, and the other 14 states are used to
+indicate that the mapped data is compressed, and which of the compression
+slots in the compressed block contains the data for this logical address.
+
+In practice, the array of mapping entries is divided into "block map
+pages," each of which fits in a single 4K block. Each block map page
+consists of a header and 812 mapping entries. Each mapping page is actually
+a leaf of a radix tree which consists of block map pages at each level.
+There are 60 radix trees which are assigned to "logical zones" in round
+robin fashion. (If there are L logical zones, tree n will belong to zone n
+mod L.) At each level, the trees are interleaved, so logical addresses
+0-811 belong to tree 0, logical addresses 812-1623 belong to tree 1, and so
+on. The interleaving is maintained all the way up to the 60 root nodes.
+Choosing 60 trees results in an evenly distributed number of trees per zone
+for a large number of possible logical zone counts. The storage for the 60
+tree roots is allocated at format time. All other block map pages are
+allocated out of the slabs as needed. This flexible allocation avoids the
+need to pre-allocate space for the entire set of logical mappings and also
+makes growing the logical size of a vdo relatively easy.
+
+In operation, the block map maintains two caches. It is prohibitive to keep
+the entire leaf level of the trees in memory, so each logical zone
+maintains its own cache of leaf pages. The size of this cache is
+configurable at target start time. The second cache is allocated at start
+time, and is large enough to hold all the non-leaf pages of the entire
+block map. This cache is populated as pages are needed.
+
+*The Recovery Journal*
+
+The recovery journal is used to amortize updates across the block map and
+slab depot. Each write request causes an entry to be made in the journal.
+Entries are either "data remappings" or "block map remappings." For a data
+remapping, the journal records the logical address affected and its old and
+new physical mappings. For a block map remapping, the journal records the
+block map page number and the physical block allocated for it. Block map
+pages are never reclaimed or repurposed, so the old mapping is always 0.
+
+Each journal entry is an intent record summarizing the metadata updates
+that are required for a data_vio. The recovery journal issues a flush
+before each journal block write to ensure that the physical data for the
+new block mappings in that block are stable on storage, and journal block
+writes are all issued with the FUA bit set to ensure the recovery journal
+entries themselves are stable. The journal entry and the data write it
+represents must be stable on disk before the other metadata structures may
+be updated to reflect the operation. These entries allow the vdo device to
+reconstruct the logical to physical mappings after an unexpected
+interruption such as a loss of power.
+
+*Write Path*
+
+All write I/O to vdo is asynchronous. Each bio will be acknowledged as soon
+as vdo has done enough work to guarantee that it can complete the write
+eventually. Generally, the data for acknowledged but unflushed write I/O
+can be treated as though it is cached in memory. If an application
+requires data to be stable on storage, it must issue a flush or write the
+data with the FUA bit set like any other asynchronous I/O. Shutting down
+the vdo target will also flush any remaining I/O.
+
+Application write bios follow the steps outlined below.
+
+1. A data_vio is obtained from the data_vio pool and associated with the
+ application bio. If there are no data_vios available, the incoming bio
+ will block until a data_vio is available. This provides back pressure
+ to the application. The data_vio pool is protected by a spin lock.
+
+ The newly acquired data_vio is reset and the bio's data is copied into
+ the data_vio if it is a write and the data is not all zeroes. The data
+ must be copied because the application bio can be acknowledged before
+ the data_vio processing is complete, which means later processing steps
+ will no longer have access to the application bio. The application bio
+ may also be smaller than 4K, in which case the data_vio will have
+ already read the underlying block and the data is instead copied over
+ the relevant portion of the larger block.
+
+2. The data_vio places a claim (the "logical lock") on the logical address
+ of the bio. It is vital to prevent simultaneous modifications of the
+ same logical address, because deduplication involves sharing blocks.
+ This claim is implemented as an entry in a hashtable where the key is
+ the logical address and the value is a pointer to the data_vio
+ currently handling that address.
+
+ If a data_vio looks in the hashtable and finds that another data_vio is
+ already operating on that logical address, it waits until the previous
+ operation finishes. It also sends a message to inform the current
+ lock holder that it is waiting. Most notably, a new data_vio waiting
+ for a logical lock will flush the previous lock holder out of the
+ compression packer (step 8d) rather than allowing it to continue
+ waiting to be packed.
+
+ This stage requires the data_vio to get an implicit lock on the
+ appropriate logical zone to prevent concurrent modifications of the
+ hashtable. This implicit locking is handled by the zone divisions
+ described above.
+
+3. The data_vio traverses the block map tree to ensure that all the
+ necessary internal tree nodes have been allocated, by trying to find
+ the leaf page for its logical address. If any interior tree page is
+ missing, it is allocated at this time out of the same physical storage
+ pool used to store application data.
+
+ a. If any page-node in the tree has not yet been allocated, it must be
+ allocated before the write can continue. This step requires the
+ data_vio to lock the page-node that needs to be allocated. This
+ lock, like the logical block lock in step 2, is a hashtable entry
+ that causes other data_vios to wait for the allocation process to
+ complete.
+
+ The implicit logical zone lock is released while the allocation is
+ happening, in order to allow other operations in the same logical
+ zone to proceed. The details of allocation are the same as in
+ step 4. Once a new node has been allocated, that node is added to
+ the tree using a similar process to adding a new data block mapping.
+ The data_vio journals the intent to add the new node to the block
+ map tree (step 10), updates the reference count of the new block
+ (step 11), and reacquires the implicit logical zone lock to add the
+ new mapping to the parent tree node (step 12). Once the tree is
+ updated, the data_vio proceeds down the tree. Any other data_vios
+ waiting on this allocation also proceed.
+
+ b. In the steady-state case, the block map tree nodes will already be
+ allocated, so the data_vio just traverses the tree until it finds
+ the required leaf node. The location of the mapping (the "block map
+ slot") is recorded in the data_vio so that later steps do not need
+ to traverse the tree again. The data_vio then releases the implicit
+ logical zone lock.
+
+4. If the block is a zero block, skip to step 9. Otherwise, an attempt is
+ made to allocate a free data block. This allocation ensures that the
+ data_vio can write its data somewhere even if deduplication and
+ compression are not possible. This stage gets an implicit lock on a
+ physical zone to search for free space within that zone.
+
+ The data_vio will search each slab in a zone until it finds a free
+ block or decides there are none. If the first zone has no free space,
+ it will proceed to search the next physical zone by taking the implicit
+ lock for that zone and releasing the previous one until it finds a
+ free block or runs out of zones to search. The data_vio will acquire a
+ struct pbn_lock (the "physical block lock") on the free block. The
+ struct pbn_lock also has several fields to record the various kinds of
+ claims that data_vios can have on physical blocks. The pbn_lock is
+ added to a hashtable like the logical block locks in step 2. This
+ hashtable is also covered by the implicit physical zone lock. The
+ reference count of the free block is updated to prevent any other
+ data_vio from considering it free. The reference counters are a
+ sub-component of the slab and are thus also covered by the implicit
+ physical zone lock.
+
+5. If an allocation was obtained, the data_vio has all the resources it
+ needs to complete the write. The application bio can safely be
+ acknowledged at this point. The acknowledgment happens on a separate
+ thread to prevent the application callback from blocking other data_vio
+ operations.
+
+ If an allocation could not be obtained, the data_vio continues to
+ attempt to deduplicate or compress the data, but the bio is not
+ acknowledged because the vdo device may be out of space.
+
+6. At this point vdo must determine where to store the application data.
+ The data_vio's data is hashed and the hash (the "record name") is
+ recorded in the data_vio.
+
+7. The data_vio reserves or joins a struct hash_lock, which manages all of
+ the data_vios currently writing the same data. Active hash locks are
+ tracked in a hashtable similar to the way logical block locks are
+ tracked in step 2. This hashtable is covered by the implicit lock on
+ the hash zone.
+
+ If there is no existing hash lock for this data_vio's record_name, the
+ data_vio obtains a hash lock from the pool, adds it to the hashtable,
+ and sets itself as the new hash lock's "agent." The hash_lock pool is
+ also covered by the implicit hash zone lock. The hash lock agent will
+ do all the work to decide where the application data will be
+ written. If a hash lock for the data_vio's record_name already exists,
+ and the data_vio's data is the same as the agent's data, the new
+ data_vio will wait for the agent to complete its work and then share
+ its result.
+
+ In the rare case that a hash lock exists for the data_vio's hash but
+ the data does not match the hash lock's agent, the data_vio skips to
+ step 8h and attempts to write its data directly. This can happen if two
+ different data blocks produce the same hash, for example.
+
+8. The hash lock agent attempts to deduplicate or compress its data with
+ the following steps.
+
+ a. The agent initializes and sends its embedded deduplication request
+ (struct uds_request) to the deduplication index. This does not
+ require the data_vio to get any locks because the index components
+ manage their own locking. The data_vio waits until it either gets a
+ response from the index or times out.
+
+ b. If the deduplication index returns advice, the data_vio attempts to
+ obtain a physical block lock on the indicated physical address, in
+ order to read the data and verify that it is the same as the
+ data_vio's data, and that it can accept more references. If the
+ physical address is already locked by another data_vio, the data at
+ that address may soon be overwritten so it is not safe to use the
+ address for deduplication.
+
+ c. If the data matches and the physical block can add references, the
+ agent and any other data_vios waiting on it will record this
+ physical block as their new physical address and proceed to step 9
+ to record their new mapping. If there are more data_vios in the hash
+ lock than there are references available, one of the remaining
+ data_vios becomes the new agent and continues to step 8d as if no
+ valid advice was returned.
+
+ d. If no usable duplicate block was found, the agent first checks that
+ it has an allocated physical block (from step 3) that it can write
+ to. If the agent does not have an allocation, some other data_vio in
+ the hash lock that does have an allocation takes over as agent. If
+ none of the data_vios have an allocated physical block, these writes
+ are out of space, so they proceed to step 13 for cleanup.
+
+ e. The agent attempts to compress its data. If the data does not
+ compress, the data_vio will continue to step 8h to write its data
+ directly.
+
+ If the compressed size is small enough, the agent will release the
+ implicit hash zone lock and go to the packer (struct packer) where
+ it will be placed in a bin (struct packer_bin) along with other
+ data_vios. All compression operations require the implicit lock on
+ the packer zone.
+
+ The packer can combine up to 14 compressed blocks in a single 4k
+ data block. Compression is only helpful if vdo can pack at least 2
+ data_vios into a single data block. This means that a data_vio may
+ wait in the packer for an arbitrarily long time for other data_vios
+ to fill out the compressed block. There is a mechanism for vdo to
+ evict waiting data_vios when continuing to wait would cause
+ problems. Circumstances causing an eviction include an application
+ flush, device shutdown, or a subsequent data_vio trying to overwrite
+ the same logical block address. A data_vio may also be evicted from
+ the packer if it cannot be paired with any other compressed block
+ before more compressible blocks need to use its bin. An evicted
+ data_vio will proceed to step 8h to write its data directly.
+
+ f. If the agent fills a packer bin, either because all 14 of its slots
+ are used or because it has no remaining space, it is written out
+ using the allocated physical block from one of its data_vios. Step
+ 8d has already ensured that an allocation is available.
+
+ g. Each data_vio sets the compressed block as its new physical address.
+ The data_vio obtains an implicit lock on the physical zone and
+ acquires the struct pbn_lock for the compressed block, which is
+ modified to be a shared lock. Then it releases the implicit physical
+ zone lock and proceeds to step 8i.
+
+ h. Any data_vio evicted from the packer will have an allocation from
+ step 3. It will write its data to that allocated physical block.
+
+ i. After the data is written, if the data_vio is the agent of a hash
+ lock, it will reacquire the implicit hash zone lock and share its
+ physical address with as many other data_vios in the hash lock as
+ possible. Each data_vio will then proceed to step 9 to record its
+ new mapping.
+
+ j. If the agent actually wrote new data (whether compressed or not),
+ the deduplication index is updated to reflect the location of the
+ new data. The agent then releases the implicit hash zone lock.
+
+9. The data_vio determines the previous mapping of the logical address.
+ There is a cache for block map leaf pages (the "block map cache"),
+ because there are usually too many block map leaf nodes to store
+ entirely in memory. If the desired leaf page is not in the cache, the
+ data_vio will reserve a slot in the cache and load the desired page
+ into it, possibly evicting an older cached page. The data_vio then
+ finds the current physical address for this logical address (the "old
+ physical mapping"), if any, and records it. This step requires a lock
+ on the block map cache structures, covered by the implicit logical zone
+ lock.
+
+10. The data_vio makes an entry in the recovery journal containing the
+ logical block address, the old physical mapping, and the new physical
+ mapping. Making this journal entry requires holding the implicit
+ recovery journal lock. The data_vio will wait in the journal until all
+ recovery blocks up to the one containing its entry have been written
+ and flushed to ensure the transaction is stable on storage.
+
+11. Once the recovery journal entry is stable, the data_vio makes two slab
+ journal entries: an increment entry for the new mapping, and a
+ decrement entry for the old mapping. These two operations each require
+ holding a lock on the affected physical slab, covered by its implicit
+ physical zone lock. For correctness during recovery, the slab journal
+ entries in any given slab journal must be in the same order as the
+ corresponding recovery journal entries. Therefore, if the two entries
+ are in different zones, they are made concurrently, and if they are in
+ the same zone, the increment is always made before the decrement in
+ order to avoid underflow. After each slab journal entry is made in
+ memory, the associated reference count is also updated in memory.
+
+12. Once both of the reference count updates are done, the data_vio
+ acquires the implicit logical zone lock and updates the
+ logical-to-physical mapping in the block map to point to the new
+ physical block. At this point the write operation is complete.
+
+13. If the data_vio has a hash lock, it acquires the implicit hash zone
+ lock and releases its hash lock to the pool.
+
+ The data_vio then acquires the implicit physical zone lock and releases
+ the struct pbn_lock it holds for its allocated block. If it had an
+ allocation that it did not use, it also sets the reference count for
+ that block back to zero to free it for use by subsequent data_vios.
+
+ The data_vio then acquires the implicit logical zone lock and releases
+ the logical block lock acquired in step 2.
+
+ The application bio is then acknowledged if it has not previously been
+ acknowledged, and the data_vio is returned to the pool.
+
+*Read Path*
+
+An application read bio follows a much simpler set of steps. It does steps
+1 and 2 in the write path to obtain a data_vio and lock its logical
+address. If there is already a write data_vio in progress for that logical
+address that is guaranteed to complete, the read data_vio will copy the
+data from the write data_vio and return it. Otherwise, it will look up the
+logical-to-physical mapping by traversing the block map tree as in step 3,
+and then read and possibly decompress the indicated data at the indicated
+physical block address. A read data_vio will not allocate block map tree
+nodes if they are missing. If the interior block map nodes do not exist
+yet, the logical block map address must still be unmapped and the read
+data_vio will return all zeroes. A read data_vio handles cleanup and
+acknowledgment as in step 13, although it only needs to release the logical
+lock and return itself to the pool.
+
+*Small Writes*
+
+All storage within vdo is managed as 4KB blocks, but it can accept writes
+as small as 512 bytes. Processing a write that is smaller than 4K requires
+a read-modify-write operation that reads the relevant 4K block, copies the
+new data over the approriate sectors of the block, and then launches a
+write operation for the modified data block. The read and write stages of
+this operation are nearly identical to the normal read and write
+operations, and a single data_vio is used throughout this operation.
+
+*Recovery*
+
+When a vdo is restarted after a crash, it will attempt to recover from the
+recovery journal. During the pre-resume phase of the next start, the
+recovery journal is read. The increment portion of valid entries are played
+into the block map. Next, valid entries are played, in order as required,
+into the slab journals. Finally, each physical zone attempts to replay at
+least one slab journal to reconstruct the reference counts of one slab.
+Once each zone has some free space (or has determined that it has none),
+the vdo comes back online, while the remainder of the slab journals are
+used to reconstruct the rest of the reference counts in the background.
+
+*Read-only Rebuild*
+
+If a vdo encounters an unrecoverable error, it will enter read-only mode.
+This mode indicates that some previously acknowledged data may have been
+lost. The vdo may be instructed to rebuild as best it can in order to
+return to a writable state. However, this is never done automatically due
+to the possibility that data has been lost. During a read-only rebuild, the
+block map is recovered from the recovery journal as before. However, the
+reference counts are not rebuilt from the slab journals. Instead, the
+reference counts are zeroed, the entire block map is traversed, and the
+reference counts are updated from the block mappings. While this may lose
+some data, it ensures that the block map and reference counts are
+consistent with each other. This allows vdo to resume normal operation and
+accept further writes.
diff --git a/Documentation/admin-guide/device-mapper/vdo.rst b/Documentation/admin-guide/device-mapper/vdo.rst
new file mode 100644
index 000000000000..7e1ecafdf91e
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/vdo.rst
@@ -0,0 +1,406 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+dm-vdo
+======
+
+The dm-vdo (virtual data optimizer) device mapper target provides
+block-level deduplication, compression, and thin provisioning. As a device
+mapper target, it can add these features to the storage stack, compatible
+with any file system. The vdo target does not protect against data
+corruption, relying instead on integrity protection of the storage below
+it. It is strongly recommended that lvm be used to manage vdo volumes. See
+lvmvdo(7).
+
+Userspace component
+===================
+
+Formatting a vdo volume requires the use of the 'vdoformat' tool, available
+at:
+
+https://github.com/dm-vdo/vdo/
+
+In most cases, a vdo target will recover from a crash automatically the
+next time it is started. In cases where it encountered an unrecoverable
+error (either during normal operation or crash recovery) the target will
+enter or come up in read-only mode. Because read-only mode is indicative of
+data-loss, a positive action must be taken to bring vdo out of read-only
+mode. The 'vdoforcerebuild' tool, available from the same repo, is used to
+prepare a read-only vdo to exit read-only mode. After running this tool,
+the vdo target will rebuild its metadata the next time it is
+started. Although some data may be lost, the rebuilt vdo's metadata will be
+internally consistent and the target will be writable again.
+
+The repo also contains additional userspace tools which can be used to
+inspect a vdo target's on-disk metadata. Fortunately, these tools are
+rarely needed except by dm-vdo developers.
+
+Metadata requirements
+=====================
+
+Each vdo volume reserves 3GB of space for metadata, or more depending on
+its configuration. It is helpful to check that the space saved by
+deduplication and compression is not cancelled out by the metadata
+requirements. An estimation of the space saved for a specific dataset can
+be computed with the vdo estimator tool, which is available at:
+
+https://github.com/dm-vdo/vdoestimator/
+
+Target interface
+================
+
+Table line
+----------
+
+::
+
+ <offset> <logical device size> vdo V4 <storage device>
+ <storage device size> <minimum I/O size> <block map cache size>
+ <block map era length> [optional arguments]
+
+
+Required parameters:
+
+ offset:
+ The offset, in sectors, at which the vdo volume's logical
+ space begins.
+
+ logical device size:
+ The size of the device which the vdo volume will service,
+ in sectors. Must match the current logical size of the vdo
+ volume.
+
+ storage device:
+ The device holding the vdo volume's data and metadata.
+
+ storage device size:
+ The size of the device holding the vdo volume, as a number
+ of 4096-byte blocks. Must match the current size of the vdo
+ volume.
+
+ minimum I/O size:
+ The minimum I/O size for this vdo volume to accept, in
+ bytes. Valid values are 512 or 4096. The recommended value
+ is 4096.
+
+ block map cache size:
+ The size of the block map cache, as a number of 4096-byte
+ blocks. The minimum and recommended value is 32768 blocks.
+ If the logical thread count is non-zero, the cache size
+ must be at least 4096 blocks per logical thread.
+
+ block map era length:
+ The speed with which the block map cache writes out
+ modified block map pages. A smaller era length is likely to
+ reduce the amount of time spent rebuilding, at the cost of
+ increased block map writes during normal operation. The
+ maximum and recommended value is 16380; the minimum value
+ is 1.
+
+Optional parameters:
+--------------------
+Some or all of these parameters may be specified as <key> <value> pairs.
+
+Thread related parameters:
+
+Different categories of work are assigned to separate thread groups, and
+the number of threads in each group can be configured separately.
+
+If <hash>, <logical>, and <physical> are all set to 0, the work handled by
+all three thread types will be handled by a single thread. If any of these
+values are non-zero, all of them must be non-zero.
+
+ ack:
+ The number of threads used to complete bios. Since
+ completing a bio calls an arbitrary completion function
+ outside the vdo volume, threads of this type allow the vdo
+ volume to continue processing requests even when bio
+ completion is slow. The default is 1.
+
+ bio:
+ The number of threads used to issue bios to the underlying
+ storage. Threads of this type allow the vdo volume to
+ continue processing requests even when bio submission is
+ slow. The default is 4.
+
+ bioRotationInterval:
+ The number of bios to enqueue on each bio thread before
+ switching to the next thread. The value must be greater
+ than 0 and not more than 1024; the default is 64.
+
+ cpu:
+ The number of threads used to do CPU-intensive work, such
+ as hashing and compression. The default is 1.
+
+ hash:
+ The number of threads used to manage data comparisons for
+ deduplication based on the hash value of data blocks. The
+ default is 0.
+
+ logical:
+ The number of threads used to manage caching and locking
+ based on the logical address of incoming bios. The default
+ is 0; the maximum is 60.
+
+ physical:
+ The number of threads used to manage administration of the
+ underlying storage device. At format time, a slab size for
+ the vdo is chosen; the vdo storage device must be large
+ enough to have at least 1 slab per physical thread. The
+ default is 0; the maximum is 16.
+
+Miscellaneous parameters:
+
+ maxDiscard:
+ The maximum size of discard bio accepted, in 4096-byte
+ blocks. I/O requests to a vdo volume are normally split
+ into 4096-byte blocks, and processed up to 2048 at a time.
+ However, discard requests to a vdo volume can be
+ automatically split to a larger size, up to <maxDiscard>
+ 4096-byte blocks in a single bio, and are limited to 1500
+ at a time. Increasing this value may provide better overall
+ performance, at the cost of increased latency for the
+ individual discard requests. The default and minimum is 1;
+ the maximum is UINT_MAX / 4096.
+
+ deduplication:
+ Whether deduplication is enabled. The default is 'on'; the
+ acceptable values are 'on' and 'off'.
+
+ compression:
+ Whether compression is enabled. The default is 'off'; the
+ acceptable values are 'on' and 'off'.
+
+Device modification
+-------------------
+
+A modified table may be loaded into a running, non-suspended vdo volume.
+The modifications will take effect when the device is next resumed. The
+modifiable parameters are <logical device size>, <physical device size>,
+<maxDiscard>, <compression>, and <deduplication>.
+
+If the logical device size or physical device size are changed, upon
+successful resume vdo will store the new values and require them on future
+startups. These two parameters may not be decreased. The logical device
+size may not exceed 4 PB. The physical device size must increase by at
+least 32832 4096-byte blocks if at all, and must not exceed the size of the
+underlying storage device. Additionally, when formatting the vdo device, a
+slab size is chosen: the physical device size may never increase above the
+size which provides 8192 slabs, and each increase must be large enough to
+add at least one new slab.
+
+Examples:
+
+Start a previously-formatted vdo volume with 1 GB logical space and 1 GB
+physical space, storing to /dev/dm-1 which has more than 1 GB of space.
+
+::
+
+ dmsetup create vdo0 --table \
+ "0 2097152 vdo V4 /dev/dm-1 262144 4096 32768 16380"
+
+Grow the logical size to 4 GB.
+
+::
+
+ dmsetup reload vdo0 --table \
+ "0 8388608 vdo V4 /dev/dm-1 262144 4096 32768 16380"
+ dmsetup resume vdo0
+
+Grow the physical size to 2 GB.
+
+::
+
+ dmsetup reload vdo0 --table \
+ "0 8388608 vdo V4 /dev/dm-1 524288 4096 32768 16380"
+ dmsetup resume vdo0
+
+Grow the physical size by 1 GB more and increase max discard sectors.
+
+::
+
+ dmsetup reload vdo0 --table \
+ "0 10485760 vdo V4 /dev/dm-1 786432 4096 32768 16380 maxDiscard 8"
+ dmsetup resume vdo0
+
+Stop the vdo volume.
+
+::
+
+ dmsetup remove vdo0
+
+Start the vdo volume again. Note that the logical and physical device sizes
+must still match, but other parameters can change.
+
+::
+
+ dmsetup create vdo1 --table \
+ "0 10485760 vdo V4 /dev/dm-1 786432 512 65550 5000 hash 1 logical 3 physical 2"
+
+Messages
+--------
+All vdo devices accept messages in the form:
+
+::
+ dmsetup message <target-name> 0 <message-name> <message-parameters>
+
+The messages are:
+
+ stats:
+ Outputs the current view of the vdo statistics. Mostly used
+ by the vdostats userspace program to interpret the output
+ buffer.
+
+ dump:
+ Dumps many internal structures to the system log. This is
+ not always safe to run, so it should only be used to debug
+ a hung vdo. Optional parameters to specify structures to
+ dump are:
+
+ viopool: The pool of I/O requests incoming bios
+ pools: A synonym of 'viopool'
+ vdo: Most of the structures managing on-disk data
+ queues: Basic information about each vdo thread
+ threads: A synonym of 'queues'
+ default: Equivalent to 'queues vdo'
+ all: All of the above.
+
+ dump-on-shutdown:
+ Perform a default dump next time vdo shuts down.
+
+
+Status
+------
+
+::
+
+ <device> <operating mode> <in recovery> <index state>
+ <compression state> <physical blocks used> <total physical blocks>
+
+ device:
+ The name of the vdo volume.
+
+ operating mode:
+ The current operating mode of the vdo volume; values may be
+ 'normal', 'recovering' (the volume has detected an issue
+ with its metadata and is attempting to repair itself), and
+ 'read-only' (an error has occurred that forces the vdo
+ volume to only support read operations and not writes).
+
+ in recovery:
+ Whether the vdo volume is currently in recovery mode;
+ values may be 'recovering' or '-' which indicates not
+ recovering.
+
+ index state:
+ The current state of the deduplication index in the vdo
+ volume; values may be 'closed', 'closing', 'error',
+ 'offline', 'online', 'opening', and 'unknown'.
+
+ compression state:
+ The current state of compression in the vdo volume; values
+ may be 'offline' and 'online'.
+
+ used physical blocks:
+ The number of physical blocks in use by the vdo volume.
+
+ total physical blocks:
+ The total number of physical blocks the vdo volume may use;
+ the difference between this value and the
+ <used physical blocks> is the number of blocks the vdo
+ volume has left before being full.
+
+Memory Requirements
+===================
+
+A vdo target requires a fixed 38 MB of RAM along with the following amounts
+that scale with the target:
+
+- 1.15 MB of RAM for each 1 MB of configured block map cache size. The
+ block map cache requires a minimum of 150 MB.
+- 1.6 MB of RAM for each 1 TB of logical space.
+- 268 MB of RAM for each 1 TB of physical storage managed by the volume.
+
+The deduplication index requires additional memory which scales with the
+size of the deduplication window. For dense indexes, the index requires 1
+GB of RAM per 1 TB of window. For sparse indexes, the index requires 1 GB
+of RAM per 10 TB of window. The index configuration is set when the target
+is formatted and may not be modified.
+
+Module Parameters
+=================
+
+The vdo driver has a numeric parameter 'log_level' which controls the
+verbosity of logging from the driver. The default setting is 6
+(LOGLEVEL_INFO and more severe messages).
+
+Run-time Usage
+==============
+
+When using dm-vdo, it is important to be aware of the ways in which its
+behavior differs from other storage targets.
+
+- There is no guarantee that over-writes of existing blocks will succeed.
+ Because the underlying storage may be multiply referenced, over-writing
+ an existing block generally requires a vdo to have a free block
+ available.
+
+- When blocks are no longer in use, sending a discard request for those
+ blocks lets the vdo release references for those blocks. If the vdo is
+ thinly provisioned, discarding unused blocks is essential to prevent the
+ target from running out of space. However, due to the sharing of
+ duplicate blocks, no discard request for any given logical block is
+ guaranteed to reclaim space.
+
+- Assuming the underlying storage properly implements flush requests, vdo
+ is resilient against crashes, however, unflushed writes may or may not
+ persist after a crash.
+
+- Each write to a vdo target entails a significant amount of processing.
+ However, much of the work is paralellizable. Therefore, vdo targets
+ achieve better throughput at higher I/O depths, and can support up 2048
+ requests in parallel.
+
+Tuning
+======
+
+The vdo device has many options, and it can be difficult to make optimal
+choices without perfect knowledge of the workload. Additionally, most
+configuration options must be set when a vdo target is started, and cannot
+be changed without shutting it down completely; the configuration cannot be
+changed while the target is active. Ideally, tuning with simulated
+workloads should be performed before deploying vdo in production
+environments.
+
+The most important value to adjust is the block map cache size. In order to
+service a request for any logical address, a vdo must load the portion of
+the block map which holds the relevant mapping. These mappings are cached.
+Performance will suffer when the working set does not fit in the cache. By
+default, a vdo allocates 128 MB of metadata cache in RAM to support
+efficient access to 100 GB of logical space at a time. It should be scaled
+up proportionally for larger working sets.
+
+The logical and physical thread counts should also be adjusted. A logical
+thread controls a disjoint section of the block map, so additional logical
+threads increase parallelism and can increase throughput. Physical threads
+control a disjoint section of the data blocks, so additional physical
+threads can also increase throughput. However, excess threads can waste
+resources and increase contention.
+
+Bio submission threads control the parallelism involved in sending I/O to
+the underlying storage; fewer threads mean there is more opportunity to
+reorder I/O requests for performance benefit, but also that each I/O
+request has to wait longer before being submitted.
+
+Bio acknowledgment threads are used for finishing I/O requests. This is
+done on dedicated threads since the amount of work required to execute a
+bio's callback can not be controlled by the vdo itself. Usually one thread
+is sufficient but additional threads may be beneficial, particularly when
+bios have CPU-heavy callbacks.
+
+CPU threads are used for hashing and for compression; in workloads with
+compression enabled, more threads may result in higher throughput.
+
+Hash threads are used to sort active requests by hash and determine whether
+they should deduplicate; the most CPU intensive actions done by these
+threads are comparison of 4096-byte data blocks. In most cases, a single
+hash thread is sufficient.
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index de99caabf65a..ff0b440ef2dc 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -21,3 +21,4 @@ are configurable at compile, boot or run time.
cross-thread-rsb
srso
gather_data_sampling
+ reg-file-data-sampling
diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
new file mode 100644
index 000000000000..0585d02b9a6c
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
@@ -0,0 +1,104 @@
+==================================
+Register File Data Sampling (RFDS)
+==================================
+
+Register File Data Sampling (RFDS) is a microarchitectural vulnerability that
+only affects Intel Atom parts(also branded as E-cores). RFDS may allow
+a malicious actor to infer data values previously used in floating point
+registers, vector registers, or integer registers. RFDS does not provide the
+ability to choose which data is inferred. CVE-2023-28746 is assigned to RFDS.
+
+Affected Processors
+===================
+Below is the list of affected Intel processors [#f1]_:
+
+ =================== ============
+ Common name Family_Model
+ =================== ============
+ ATOM_GOLDMONT 06_5CH
+ ATOM_GOLDMONT_D 06_5FH
+ ATOM_GOLDMONT_PLUS 06_7AH
+ ATOM_TREMONT_D 06_86H
+ ATOM_TREMONT 06_96H
+ ALDERLAKE 06_97H
+ ALDERLAKE_L 06_9AH
+ ATOM_TREMONT_L 06_9CH
+ RAPTORLAKE 06_B7H
+ RAPTORLAKE_P 06_BAH
+ ATOM_GRACEMONT 06_BEH
+ RAPTORLAKE_S 06_BFH
+ =================== ============
+
+As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and
+RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as
+vulnerable in Linux because they share the same family/model with an affected
+part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or
+CPUID.HYBRID. This information could be used to distinguish between the
+affected and unaffected parts, but it is deemed not worth adding complexity as
+the reporting is fixed automatically when these parts enumerate RFDS_NO.
+
+Mitigation
+==========
+Intel released a microcode update that enables software to clear sensitive
+information using the VERW instruction. Like MDS, RFDS deploys the same
+mitigation strategy to force the CPU to clear the affected buffers before an
+attacker can extract the secrets. This is achieved by using the otherwise
+unused and obsolete VERW instruction in combination with a microcode update.
+The microcode clears the affected CPU buffers when the VERW instruction is
+executed.
+
+Mitigation points
+-----------------
+VERW is executed by the kernel before returning to user space, and by KVM
+before VMentry. None of the affected cores support SMT, so VERW is not required
+at C-state transitions.
+
+New bits in IA32_ARCH_CAPABILITIES
+----------------------------------
+Newer processors and microcode update on existing affected processors added new
+bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
+vulnerability and mitigation capability:
+
+- Bit 27 - RFDS_NO - When set, processor is not affected by RFDS.
+- Bit 28 - RFDS_CLEAR - When set, processor is affected by RFDS, and has the
+ microcode that clears the affected buffers on VERW execution.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The kernel command line allows to control RFDS mitigation at boot time with the
+parameter "reg_file_data_sampling=". The valid arguments are:
+
+ ========== =================================================================
+ on If the CPU is vulnerable, enable mitigation; CPU buffer clearing
+ on exit to userspace and before entering a VM.
+ off Disables mitigation.
+ ========== =================================================================
+
+Mitigation default is selected by CONFIG_MITIGATION_RFDS.
+
+Mitigation status information
+-----------------------------
+The Linux kernel provides a sysfs interface to enumerate the current
+vulnerability status of the system: whether the system is vulnerable, and
+which mitigations are active. The relevant sysfs file is:
+
+ /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling
+
+The possible values in this file are:
+
+ .. list-table::
+
+ * - 'Not affected'
+ - The processor is not vulnerable
+ * - 'Vulnerable'
+ - The processor is vulnerable, but no mitigation enabled
+ * - 'Vulnerable: No microcode'
+ - The processor is vulnerable but microcode is not updated.
+ * - 'Mitigation: Clear Register File'
+ - The processor is vulnerable and the CPU buffer clearing mitigation is
+ enabled.
+
+References
+----------
+.. [#f1] Affected Processors
+ https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index 32a8893e5617..cce768afec6b 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -473,8 +473,8 @@ Spectre variant 2
-mindirect-branch=thunk-extern -mindirect-branch-register options.
If the kernel is compiled with a Clang compiler, the compiler needs
to support -mretpoline-external-thunk option. The kernel config
- CONFIG_RETPOLINE needs to be turned on, and the CPU needs to run with
- the latest updated microcode.
+ CONFIG_MITIGATION_RETPOLINE needs to be turned on, and the CPU needs
+ to run with the latest updated microcode.
On Intel Skylake-era systems the mitigation covers most, but not all,
cases. See :ref:`[3] <spec_ref3>` for more details.
@@ -609,8 +609,8 @@ kernel command line.
Selecting 'on' will, and 'auto' may, choose a
mitigation method at run time according to the
CPU, the available microcode, the setting of the
- CONFIG_RETPOLINE configuration option, and the
- compiler with which the kernel was built.
+ CONFIG_MITIGATION_RETPOLINE configuration option,
+ and the compiler with which the kernel was built.
Selecting 'on' will also enable the mitigation
against user space to user space task attacks.
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index fb40a1f6f79e..32ea52f1d150 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -1,3 +1,4 @@
+=================================================
The Linux kernel user's and administrator's guide
=================================================
@@ -37,6 +38,7 @@ problems and bugs in particular.
reporting-issues
reporting-regressions
quickly-build-trimmed-linux
+ verify-bugs-and-bisect-regressions
bug-hunting
bug-bisect
tainted-kernels
@@ -122,7 +124,7 @@ configure specific aspects of kernel behavior to your liking.
pmf
pnp
rapidio
- ras
+ RAS/index
rtc
serial-console
svga
diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
index 5762e7477a0c..0302a93b1d40 100644
--- a/Documentation/admin-guide/kdump/kdump.rst
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -191,9 +191,7 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
CPU is enough for kdump kernel to dump vmcore on most of systems.
However, you can also specify nr_cpus=X to enable multiple processors
- in kdump kernel. In this case, "disable_cpu_apicid=" is needed to
- tell kdump kernel which cpu is 1st kernel's BSP. Please refer to
- admin-guide/kernel-parameters.txt for more details.
+ in kdump kernel.
With CONFIG_SMP=n, the above things are not related.
@@ -454,8 +452,7 @@ Notes on loading the dump-capture kernel:
to use multi-thread programs with it, such as parallel dump feature of
makedumpfile. Otherwise, the multi-thread program may have a great
performance degradation. To enable multi-cpu support, you should bring up an
- SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X]
- options while loading it.
+ SMP dump-capture kernel and specify maxcpus/nr_cpus options while loading it.
* For s390x there are two kdump modes: If a ELF header is specified with
the elfcorehdr= kernel parameter, it is used by the kdump kernel as it
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 102937bc8443..e8bdf5e86a9b 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -108,6 +108,7 @@ is applicable::
CMA Contiguous Memory Area support is enabled.
DRM Direct Rendering Management support is enabled.
DYNAMIC_DEBUG Build in debug messages and enable them at runtime
+ EARLY Parameter processed too early to be embedded in initrd.
EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
EFI EFI Partitioning (GPT) is enabled
EVM Extended Verification Module
@@ -218,8 +219,3 @@ bytes respectively. Such letter suffixes can also be entirely omitted:
.. include:: kernel-parameters.txt
:literal:
-
-Todo
-----
-
- Add more DRM drivers.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 31b3a25680d0..26534c8e927a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -9,7 +9,7 @@
accept_memory=eager can be used to accept all memory
at once during boot.
- acpi= [HW,ACPI,X86,ARM64,RISCV64]
+ acpi= [HW,ACPI,X86,ARM64,RISCV64,EARLY]
Advanced Configuration and Power Interface
Format: { force | on | off | strict | noirq | rsdt |
copy_dsdt }
@@ -26,7 +26,7 @@
See also Documentation/power/runtime_pm.rst, pci=noacpi
- acpi_apic_instance= [ACPI, IOAPIC]
+ acpi_apic_instance= [ACPI,IOAPIC,EARLY]
Format: <int>
2: use 2nd APIC table, if available
1,0: use 1st APIC table
@@ -41,7 +41,7 @@
If set to native, use the device's native backlight mode.
If set to none, disable the ACPI backlight interface.
- acpi_force_32bit_fadt_addr
+ acpi_force_32bit_fadt_addr [ACPI,EARLY]
force FADT to use 32 bit addresses rather than the
64 bit X_* addresses. Some firmware have broken 64
bit addresses for force ACPI ignore these and use
@@ -97,7 +97,7 @@
no: ACPI OperationRegions are not marked as reserved,
no further checks are performed.
- acpi_force_table_verification [HW,ACPI]
+ acpi_force_table_verification [HW,ACPI,EARLY]
Enable table checksum verification during early stage.
By default, this is disabled due to x86 early mapping
size limitation.
@@ -137,7 +137,7 @@
acpi_no_memhotplug [ACPI] Disable memory hotplug. Useful for kdump
kernels.
- acpi_no_static_ssdt [HW,ACPI]
+ acpi_no_static_ssdt [HW,ACPI,EARLY]
Disable installation of static SSDTs at early boot time
By default, SSDTs contained in the RSDT/XSDT will be
installed automatically and they will appear under
@@ -151,7 +151,7 @@
Ignore the ACPI-based watchdog interface (WDAT) and let
a native driver control the watchdog device instead.
- acpi_rsdp= [ACPI,EFI,KEXEC]
+ acpi_rsdp= [ACPI,EFI,KEXEC,EARLY]
Pass the RSDP address to the kernel, mostly used
on machines running EFI runtime service to boot the
second kernel for kdump.
@@ -228,10 +228,10 @@
to assume that this machine's pmtimer latches its value
and always returns good values.
- acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
+ acpi_sci= [HW,ACPI,EARLY] ACPI System Control Interrupt trigger mode
Format: { level | edge | high | low }
- acpi_skip_timer_override [HW,ACPI]
+ acpi_skip_timer_override [HW,ACPI,EARLY]
Recognize and ignore IRQ0/pin2 Interrupt Override.
For broken nForce2 BIOS resulting in XT-PIC timer.
@@ -266,11 +266,11 @@
behave incorrectly in some ways with respect to system
suspend and resume to be ignored (use wisely).
- acpi_use_timer_override [HW,ACPI]
+ acpi_use_timer_override [HW,ACPI,EARLY]
Use timer override. For some broken Nvidia NF5 boards
that require a timer override, but don't have HPET
- add_efi_memmap [EFI; X86] Include EFI memory map in
+ add_efi_memmap [EFI,X86,EARLY] Include EFI memory map in
kernel's map of available physical RAM.
agp= [AGP]
@@ -307,7 +307,7 @@
do not want to use tracing_snapshot_alloc() as it needs
to be done where GFP_KERNEL allocations are allowed.
- allow_mismatched_32bit_el0 [ARM64]
+ allow_mismatched_32bit_el0 [ARM64,EARLY]
Allow execve() of 32-bit applications and setting of the
PER_LINUX32 personality on systems where only a strict
subset of the CPUs support 32-bit EL0. When this
@@ -351,7 +351,7 @@
This mode requires kvm-amd.avic=1.
(Default when IOMMU HW support is present.)
- amd_pstate= [X86]
+ amd_pstate= [X86,EARLY]
disable
Do not enable amd_pstate as the default
scaling driver for the supported processors
@@ -391,7 +391,7 @@
not play well with APC CPU idle - disable it if you have
APC and your system crashes randomly.
- apic= [APIC,X86] Advanced Programmable Interrupt Controller
+ apic= [APIC,X86,EARLY] Advanced Programmable Interrupt Controller
Change the output verbosity while booting
Format: { quiet (default) | verbose | debug }
Change the amount of debugging information output
@@ -401,7 +401,7 @@
Format: apic=driver_name
Examples: apic=bigsmp
- apic_extnmi= [APIC,X86] External NMI delivery setting
+ apic_extnmi= [APIC,X86,EARLY] External NMI delivery setting
Format: { bsp (default) | all | none }
bsp: External NMI is delivered only to CPU 0
all: External NMIs are broadcast to all CPUs as a
@@ -508,21 +508,22 @@
bert_disable [ACPI]
Disable BERT OS support on buggy BIOSes.
- bgrt_disable [ACPI][X86]
+ bgrt_disable [ACPI,X86,EARLY]
Disable BGRT to avoid flickering OEM logo.
blkdevparts= Manual partition parsing of block device(s) for
embedded devices based on command line input.
See Documentation/block/cmdline-partition.rst
- boot_delay= Milliseconds to delay each printk during boot.
+ boot_delay= [KNL,EARLY]
+ Milliseconds to delay each printk during boot.
Only works if CONFIG_BOOT_PRINTK_DELAY is enabled,
and you may also have to specify "lpj=". Boot_delay
values larger than 10 seconds (10000) are assumed
erroneous and ignored.
Format: integer
- bootconfig [KNL]
+ bootconfig [KNL,EARLY]
Extended command line options can be added to an initrd
and this will cause the kernel to look for it.
@@ -557,7 +558,7 @@
trust validation.
format: { id:<keyid> | builtin }
- cca= [MIPS] Override the kernel pages' cache coherency
+ cca= [MIPS,EARLY] Override the kernel pages' cache coherency
algorithm. Accepted values range from 0 to 7
inclusive. See arch/mips/include/asm/pgtable-bits.h
for platform specific values (SB1, Loongson3 and
@@ -672,19 +673,13 @@
[X86-64] hpet,tsc
clocksource.arm_arch_timer.evtstrm=
- [ARM,ARM64]
+ [ARM,ARM64,EARLY]
Format: <bool>
Enable/disable the eventstream feature of the ARM
architected timer so that code using WFE-based polling
loops can be debugged more effectively on production
systems.
- clocksource.max_cswd_read_retries= [KNL]
- Number of clocksource_watchdog() retries due to
- external delays before the clock will be marked
- unstable. Defaults to two retries, that is,
- three attempts to read the clock under test.
-
clocksource.verify_n_cpus= [KNL]
Limit the number of CPUs checked for clocksources
marked with CLOCK_SOURCE_VERIFY_PERCPU that
@@ -702,7 +697,7 @@
10 seconds when built into the kernel.
cma=nn[MG]@[start[MG][-end[MG]]]
- [KNL,CMA]
+ [KNL,CMA,EARLY]
Sets the size of kernel global memory area for
contiguous memory allocations and optionally the
placement constraint by the physical address range of
@@ -711,7 +706,7 @@
kernel/dma/contiguous.c
cma_pernuma=nn[MG]
- [KNL,CMA]
+ [KNL,CMA,EARLY]
Sets the size of kernel per-numa memory area for
contiguous memory allocations. A value of 0 disables
per-numa CMA altogether. And If this option is not
@@ -722,7 +717,7 @@
they will fallback to the global default memory area.
numa_cma=<node>:nn[MG][,<node>:nn[MG]]
- [KNL,CMA]
+ [KNL,CMA,EARLY]
Sets the size of kernel numa memory area for
contiguous memory allocations. It will reserve CMA
area for the specified node.
@@ -739,7 +734,7 @@
a hypervisor.
Default: yes
- coherent_pool=nn[KMG] [ARM,KNL]
+ coherent_pool=nn[KMG] [ARM,KNL,EARLY]
Sets the size of memory pool for coherent, atomic dma
allocations, by default set to 256K.
@@ -757,7 +752,7 @@
condev= [HW,S390] console device
conmode=
- con3215_drop= [S390] 3215 console drop mode.
+ con3215_drop= [S390,EARLY] 3215 console drop mode.
Format: y|n|Y|N|1|0
When set to true, drop data on the 3215 console when
the console buffer is full. In this case the
@@ -863,7 +858,7 @@
kernel before the cpufreq driver probes.
cpu_init_udelay=N
- [X86] Delay for N microsec between assert and de-assert
+ [X86,EARLY] Delay for N microsec between assert and de-assert
of APIC INIT to start processors. This delay occurs
on every CPU online, such as boot, and resume from suspend.
Default: 10000
@@ -883,7 +878,7 @@
kernel more unstable.
crashkernel=size[KMG][@offset[KMG]]
- [KNL] Using kexec, Linux can switch to a 'crash kernel'
+ [KNL,EARLY] Using kexec, Linux can switch to a 'crash kernel'
upon panic. This parameter reserves the physical
memory region [offset, offset + size] for that kernel
image. If '@offset' is omitted, then a suitable offset
@@ -954,10 +949,10 @@
Format: <port#>,<type>
See also Documentation/input/devices/joystick-parport.rst
- debug [KNL] Enable kernel debugging (events log level).
+ debug [KNL,EARLY] Enable kernel debugging (events log level).
debug_boot_weak_hash
- [KNL] Enable printing [hashed] pointers early in the
+ [KNL,EARLY] Enable printing [hashed] pointers early in the
boot sequence. If enabled, we use a weak hash instead
of siphash to hash pointers. Use this option if you are
seeing instances of '(___ptrval___)') and need to see a
@@ -974,10 +969,10 @@
will print _a_lot_ more information - normally only
useful to lockdep developers.
- debug_objects [KNL] Enable object debugging
+ debug_objects [KNL,EARLY] Enable object debugging
debug_guardpage_minorder=
- [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
+ [KNL,EARLY] When CONFIG_DEBUG_PAGEALLOC is set, this
parameter allows control of the order of pages that will
be intentionally kept free (and hence protected) by the
buddy allocator. Bigger value increase the probability
@@ -996,7 +991,7 @@
help tracking down these problems.
debug_pagealloc=
- [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this parameter
+ [KNL,EARLY] When CONFIG_DEBUG_PAGEALLOC is set, this parameter
enables the feature at boot time. By default, it is
disabled and the system will work mostly the same as a
kernel built without CONFIG_DEBUG_PAGEALLOC.
@@ -1004,8 +999,8 @@
useful to also enable the page_owner functionality.
on: enable the feature
- debugfs= [KNL] This parameter enables what is exposed to userspace
- and debugfs internal clients.
+ debugfs= [KNL,EARLY] This parameter enables what is exposed to
+ userspace and debugfs internal clients.
Format: { on, no-mount, off }
on: All functions are enabled.
no-mount:
@@ -1084,7 +1079,7 @@
dhash_entries= [KNL]
Set number of hash buckets for dentry cache.
- disable_1tb_segments [PPC]
+ disable_1tb_segments [PPC,EARLY]
Disables the use of 1TB hash page table segments. This
causes the kernel to fall back to 256MB segments which
can be useful when debugging issues that require an SLB
@@ -1093,41 +1088,32 @@
disable= [IPV6]
See Documentation/networking/ipv6.rst.
- disable_radix [PPC]
+ disable_radix [PPC,EARLY]
Disable RADIX MMU mode on POWER9
disable_tlbie [PPC]
Disable TLBIE instruction. Currently does not work
with KVM, with HASH MMU, or with coherent accelerators.
- disable_cpu_apicid= [X86,APIC,SMP]
- Format: <int>
- The number of initial APIC ID for the
- corresponding CPU to be disabled at boot,
- mostly used for the kdump 2nd kernel to
- disable BSP to wake up multiple CPUs without
- causing system reset or hang due to sending
- INIT from AP to BSP.
-
- disable_ddw [PPC/PSERIES]
+ disable_ddw [PPC/PSERIES,EARLY]
Disable Dynamic DMA Window support. Use this
to workaround buggy firmware.
disable_ipv6= [IPV6]
See Documentation/networking/ipv6.rst.
- disable_mtrr_cleanup [X86]
+ disable_mtrr_cleanup [X86,EARLY]
The kernel tries to adjust MTRR layout from continuous
to discrete, to make X server driver able to add WB
entry later. This parameter disables that.
- disable_mtrr_trim [X86, Intel and AMD only]
+ disable_mtrr_trim [X86, Intel and AMD only,EARLY]
By default the kernel will trim any uncacheable
memory out of your available memory pool based on
MTRR settings. This parameter disables that behavior,
possibly causing your machine to run very slowly.
- disable_timer_pin_1 [X86]
+ disable_timer_pin_1 [X86,EARLY]
Disable PIN 1 of APIC timer
Can be useful to work around chipset bugs.
@@ -1150,6 +1136,26 @@
The filter can be disabled or changed to another
driver later using sysfs.
+ reg_file_data_sampling=
+ [X86] Controls mitigation for Register File Data
+ Sampling (RFDS) vulnerability. RFDS is a CPU
+ vulnerability which may allow userspace to infer
+ kernel data values previously stored in floating point
+ registers, vector registers, or integer registers.
+ RFDS only affects Intel Atom processors.
+
+ on: Turns ON the mitigation.
+ off: Turns OFF the mitigation.
+
+ This parameter overrides the compile time default set
+ by CONFIG_MITIGATION_RFDS. Mitigation cannot be
+ disabled when other VERW based mitigations (like MDS)
+ are enabled. In order to disable RFDS mitigation all
+ VERW based mitigations need to be disabled.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
+
driver_async_probe= [KNL]
List of driver names to be probed asynchronously. *
matches with all driver names. If * is specified, the
@@ -1177,7 +1183,7 @@
dscc4.setup= [NET]
- dt_cpu_ftrs= [PPC]
+ dt_cpu_ftrs= [PPC,EARLY]
Format: {"off" | "known"}
Control how the dt_cpu_ftrs device-tree binding is
used for CPU feature discovery and setup (if it
@@ -1197,12 +1203,12 @@
Documentation/admin-guide/dynamic-debug-howto.rst
for details.
- early_ioremap_debug [KNL]
+ early_ioremap_debug [KNL,EARLY]
Enable debug messages in early_ioremap support. This
is useful for tracking down temporary early mappings
which are not unmapped.
- earlycon= [KNL] Output early console device and options.
+ earlycon= [KNL,EARLY] Output early console device and options.
When used with no options, the early console is
determined by stdout-path property in device tree's
@@ -1338,7 +1344,7 @@
address must be provided, and the serial port must
already be setup and configured.
- earlyprintk= [X86,SH,ARM,M68k,S390]
+ earlyprintk= [X86,SH,ARM,M68k,S390,UM,EARLY]
earlyprintk=vga
earlyprintk=sclp
earlyprintk=xen
@@ -1396,7 +1402,7 @@
edd= [EDD]
Format: {"off" | "on" | "skip[mbr]"}
- efi= [EFI]
+ efi= [EFI,EARLY]
Format: { "debug", "disable_early_pci_dma",
"nochunk", "noruntime", "nosoftreserve",
"novamap", "no_disable_early_pci_dma" }
@@ -1417,13 +1423,13 @@
no_disable_early_pci_dma: Leave the busmaster bit set
on all PCI bridges while in the EFI boot stub
- efi_no_storage_paranoia [EFI; X86]
+ efi_no_storage_paranoia [EFI,X86,EARLY]
Using this parameter you can use more than 50% of
your efi variable storage. Use this parameter only if
you are really sure that your UEFI does sane gc and
fulfills the spec otherwise your board may brick.
- efi_fake_mem= nn[KMG]@ss[KMG]:aa[,nn[KMG]@ss[KMG]:aa,..] [EFI; X86]
+ efi_fake_mem= nn[KMG]@ss[KMG]:aa[,nn[KMG]@ss[KMG]:aa,..] [EFI,X86,EARLY]
Add arbitrary attribute to specific memory range by
updating original EFI memory map.
Region of memory which aa attribute is added to is
@@ -1454,7 +1460,7 @@
eisa_irq_edge= [PARISC,HW]
See header of drivers/parisc/eisa.c.
- ekgdboc= [X86,KGDB] Allow early kernel console debugging
+ ekgdboc= [X86,KGDB,EARLY] Allow early kernel console debugging
Format: ekgdboc=kbd
This is designed to be used in conjunction with
@@ -1469,13 +1475,13 @@
See comment before function elanfreq_setup() in
arch/x86/kernel/cpu/cpufreq/elanfreq.c.
- elfcorehdr=[size[KMG]@]offset[KMG] [PPC,SH,X86,S390]
+ elfcorehdr=[size[KMG]@]offset[KMG] [PPC,SH,X86,S390,EARLY]
Specifies physical address of start of kernel core
image elf header and optionally the size. Generally
kexec loader will pass this option to capture kernel.
See Documentation/admin-guide/kdump/kdump.rst for details.
- enable_mtrr_cleanup [X86]
+ enable_mtrr_cleanup [X86,EARLY]
The kernel tries to adjust MTRR layout from continuous
to discrete, to make X server driver able to add WB
entry later. This parameter enables that.
@@ -1508,7 +1514,7 @@
Permit 'security.evm' to be updated regardless of
current integrity status.
- early_page_ext [KNL] Enforces page_ext initialization to earlier
+ early_page_ext [KNL,EARLY] Enforces page_ext initialization to earlier
stages so cover more early boot allocations.
Please note that as side effect some optimizations
might be disabled to achieve that (e.g. parallelized
@@ -1539,6 +1545,12 @@
Warning: use of this parameter will taint the kernel
and may cause unknown problems.
+ fred= [X86-64]
+ Enable/disable Flexible Return and Event Delivery.
+ Format: { on | off }
+ on: enable FRED when it's present.
+ off: disable FRED, the default setting.
+
ftrace=[tracer]
[FTRACE] will set and start the specified tracer
as early as possible in order to facilitate early
@@ -1600,7 +1612,7 @@
can be changed at run time by the max_graph_depth file
in the tracefs tracing directory. default: 0 (no limit)
- fw_devlink= [KNL] Create device links between consumer and supplier
+ fw_devlink= [KNL,EARLY] Create device links between consumer and supplier
devices by scanning the firmware to infer the
consumer/supplier relationships. This feature is
especially useful when drivers are loaded as modules as
@@ -1619,12 +1631,12 @@
rpm -- Like "on", but also use to order runtime PM.
fw_devlink.strict=<bool>
- [KNL] Treat all inferred dependencies as mandatory
+ [KNL,EARLY] Treat all inferred dependencies as mandatory
dependencies. This only applies for fw_devlink=on|rpm.
Format: <bool>
fw_devlink.sync_state =
- [KNL] When all devices that could probe have finished
+ [KNL,EARLY] When all devices that could probe have finished
probing, this parameter controls what to do with
devices that haven't yet received their sync_state()
calls.
@@ -1645,12 +1657,12 @@
gamma= [HW,DRM]
- gart_fix_e820= [X86-64] disable the fix e820 for K8 GART
+ gart_fix_e820= [X86-64,EARLY] disable the fix e820 for K8 GART
Format: off | on
default: on
gather_data_sampling=
- [X86,INTEL] Control the Gather Data Sampling (GDS)
+ [X86,INTEL,EARLY] Control the Gather Data Sampling (GDS)
mitigation.
Gather Data Sampling is a hardware vulnerability which
@@ -1748,7 +1760,7 @@
(that will set all pages holding image data
during restoration read-only).
- highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact
+ highmem=nn[KMG] [KNL,BOOT,EARLY] forces the highmem zone to have an exact
size of <nn>. This works even on boxes that have no
highmem otherwise. This also works to reduce highmem
size on bigger boxes.
@@ -1759,7 +1771,7 @@
hlt [BUGS=ARM,SH]
- hostname= [KNL] Set the hostname (aka UTS nodename).
+ hostname= [KNL,EARLY] Set the hostname (aka UTS nodename).
Format: <string>
This allows setting the system's hostname during early
startup. This sets the name returned by gethostname.
@@ -1804,7 +1816,7 @@
Documentation/admin-guide/mm/hugetlbpage.rst.
Format: size[KMG]
- hugetlb_cma= [HW,CMA] The size of a CMA area used for allocation
+ hugetlb_cma= [HW,CMA,EARLY] The size of a CMA area used for allocation
of gigantic hugepages. Or using node format, the size
of a CMA area per node can be specified.
Format: nn[KMGTPE] or (node format)
@@ -1850,9 +1862,10 @@
If specified, z/VM IUCV HVC accepts connections
from listed z/VM user IDs only.
- hv_nopvspin [X86,HYPER_V] Disables the paravirt spinlock optimizations
- which allow the hypervisor to 'idle' the
- guest on lock contention.
+ hv_nopvspin [X86,HYPER_V,EARLY]
+ Disables the paravirt spinlock optimizations
+ which allow the hypervisor to 'idle' the guest
+ on lock contention.
i2c_bus= [HW] Override the default board specific I2C bus speed
or register an additional I2C bus that is not
@@ -1917,7 +1930,7 @@
Format: <io>[,<membase>[,<icn_id>[,<icn_id2>]]]
- idle= [X86]
+ idle= [X86,EARLY]
Format: idle=poll, idle=halt, idle=nomwait
Poll forces a polling idle loop that can slightly
improve the performance of waking up a idle CPU, but
@@ -1973,7 +1986,7 @@
mode generally follows that for the NaN encoding,
except where unsupported by hardware.
- ignore_loglevel [KNL]
+ ignore_loglevel [KNL,EARLY]
Ignore loglevel setting - this will print /all/
kernel messages to the console. Useful for debugging.
We also add it as printk module parameter, so users
@@ -2091,21 +2104,21 @@
unpacking being completed before device_ and
late_ initcalls.
- initrd= [BOOT] Specify the location of the initial ramdisk
+ initrd= [BOOT,EARLY] Specify the location of the initial ramdisk
- initrdmem= [KNL] Specify a physical address and size from which to
+ initrdmem= [KNL,EARLY] Specify a physical address and size from which to
load the initrd. If an initrd is compiled in or
specified in the bootparams, it takes priority over this
setting.
Format: ss[KMG],nn[KMG]
Default is 0, 0
- init_on_alloc= [MM] Fill newly allocated pages and heap objects with
+ init_on_alloc= [MM,EARLY] Fill newly allocated pages and heap objects with
zeroes.
Format: 0 | 1
Default set by CONFIG_INIT_ON_ALLOC_DEFAULT_ON.
- init_on_free= [MM] Fill freed pages and heap objects with zeroes.
+ init_on_free= [MM,EARLY] Fill freed pages and heap objects with zeroes.
Format: 0 | 1
Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
@@ -2161,7 +2174,7 @@
0 disables intel_idle and fall back on acpi_idle.
1 to 9 specify maximum depth of C-state.
- intel_pstate= [X86]
+ intel_pstate= [X86,EARLY]
disable
Do not enable intel_pstate as the default
scaling driver for the supported processors
@@ -2205,7 +2218,7 @@
Allow per-logical-CPU P-State performance control limits using
cpufreq sysfs interface
- intremap= [X86-64, Intel-IOMMU]
+ intremap= [X86-64,Intel-IOMMU,EARLY]
on enable Interrupt Remapping (default)
off disable Interrupt Remapping
nosid disable Source ID checking
@@ -2217,7 +2230,7 @@
strict regions from userspace.
relaxed
- iommu= [X86]
+ iommu= [X86,EARLY]
off
force
noforce
@@ -2232,7 +2245,7 @@
nobypass [PPC/POWERNV]
Disable IOMMU bypass, using IOMMU for PCI devices.
- iommu.forcedac= [ARM64, X86] Control IOVA allocation for PCI devices.
+ iommu.forcedac= [ARM64,X86,EARLY] Control IOVA allocation for PCI devices.
Format: { "0" | "1" }
0 - Try to allocate a 32-bit DMA address first, before
falling back to the full range if needed.
@@ -2240,7 +2253,7 @@
forcing Dual Address Cycle for PCI cards supporting
greater than 32-bit addressing.
- iommu.strict= [ARM64, X86, S390] Configure TLB invalidation behaviour
+ iommu.strict= [ARM64,X86,S390,EARLY] Configure TLB invalidation behaviour
Format: { "0" | "1" }
0 - Lazy mode.
Request that DMA unmap operations use deferred
@@ -2256,7 +2269,7 @@
legacy driver-specific options takes precedence.
iommu.passthrough=
- [ARM64, X86] Configure DMA to bypass the IOMMU by default.
+ [ARM64,X86,EARLY] Configure DMA to bypass the IOMMU by default.
Format: { "0" | "1" }
0 - Use IOMMU translation for DMA.
1 - Bypass the IOMMU for DMA.
@@ -2266,7 +2279,7 @@
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
- io_delay= [X86] I/O delay method
+ io_delay= [X86,EARLY] I/O delay method
0x80
Standard port 0x80 based delay
0xed
@@ -2279,28 +2292,28 @@
ip= [IP_PNP]
See Documentation/admin-guide/nfs/nfsroot.rst.
- ipcmni_extend [KNL] Extend the maximum number of unique System V
+ ipcmni_extend [KNL,EARLY] Extend the maximum number of unique System V
IPC identifiers from 32,768 to 16,777,216.
irqaffinity= [SMP] Set the default irq affinity mask
The argument is a cpu list, as described above.
irqchip.gicv2_force_probe=
- [ARM, ARM64]
+ [ARM,ARM64,EARLY]
Format: <bool>
Force the kernel to look for the second 4kB page
of a GICv2 controller even if the memory range
exposed by the device tree is too small.
irqchip.gicv3_nolpi=
- [ARM, ARM64]
+ [ARM,ARM64,EARLY]
Force the kernel to ignore the availability of
LPIs (and by consequence ITSs). Intended for system
that use the kernel as a bootloader, and thus want
to let secondary kernels in charge of setting up
LPIs.
- irqchip.gicv3_pseudo_nmi= [ARM64]
+ irqchip.gicv3_pseudo_nmi= [ARM64,EARLY]
Enables support for pseudo-NMIs in the kernel. This
requires the kernel to be built with
CONFIG_ARM64_PSEUDO_NMI.
@@ -2445,7 +2458,7 @@
parameter KASAN will print report only for the first
invalid access.
- keep_bootcon [KNL]
+ keep_bootcon [KNL,EARLY]
Do not unregister boot console at start. This is only
useful for debugging when something happens in the window
between unregistering the boot console and initializing
@@ -2453,7 +2466,7 @@
keepinitrd [HW,ARM] See retain_initrd.
- kernelcore= [KNL,X86,IA-64,PPC]
+ kernelcore= [KNL,X86,IA-64,PPC,EARLY]
Format: nn[KMGTPE] | nn% | "mirror"
This parameter specifies the amount of memory usable by
the kernel for non-movable allocations. The requested
@@ -2478,7 +2491,7 @@
for Movable pages. "nn[KMGTPE]", "nn%", and "mirror"
are exclusive, so you cannot specify multiple forms.
- kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port.
+ kgdbdbgp= [KGDB,HW,EARLY] kgdb over EHCI usb debug port.
Format: <Controller#>[,poll interval]
The controller # is the number of the ehci usb debug
port as it is probed via PCI. The poll interval is
@@ -2499,7 +2512,7 @@
kms, kbd format: kms,kbd
kms, kbd and serial format: kms,kbd,<ser_dev>[,baud]
- kgdboc_earlycon= [KGDB,HW]
+ kgdboc_earlycon= [KGDB,HW,EARLY]
If the boot console provides the ability to read
characters and can work in polling mode, you can use
this parameter to tell kgdb to use it as a backend
@@ -2514,14 +2527,14 @@
blank and the first boot console that implements
read() will be picked.
- kgdbwait [KGDB] Stop kernel execution and enter the
+ kgdbwait [KGDB,EARLY] Stop kernel execution and enter the
kernel debugger at the earliest opportunity.
kmac= [MIPS] Korina ethernet MAC address.
Configure the RouterBoard 532 series on-chip
Ethernet adapter MAC address.
- kmemleak= [KNL] Boot-time kmemleak enable/disable
+ kmemleak= [KNL,EARLY] Boot-time kmemleak enable/disable
Valid arguments: on, off
Default: on
Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
@@ -2540,8 +2553,8 @@
See also Documentation/trace/kprobetrace.rst "Kernel
Boot Parameter" section.
- kpti= [ARM64] Control page table isolation of user
- and kernel address spaces.
+ kpti= [ARM64,EARLY] Control page table isolation of
+ user and kernel address spaces.
Default: enabled on cores which need mitigation.
0: force disabled
1: force enabled
@@ -2618,7 +2631,8 @@
for NPT.
kvm-arm.mode=
- [KVM,ARM] Select one of KVM/arm64's modes of operation.
+ [KVM,ARM,EARLY] Select one of KVM/arm64's modes of
+ operation.
none: Forcefully disable KVM.
@@ -2638,22 +2652,22 @@
used with extreme caution.
kvm-arm.vgic_v3_group0_trap=
- [KVM,ARM] Trap guest accesses to GICv3 group-0
+ [KVM,ARM,EARLY] Trap guest accesses to GICv3 group-0
system registers
kvm-arm.vgic_v3_group1_trap=
- [KVM,ARM] Trap guest accesses to GICv3 group-1
+ [KVM,ARM,EARLY] Trap guest accesses to GICv3 group-1
system registers
kvm-arm.vgic_v3_common_trap=
- [KVM,ARM] Trap guest accesses to GICv3 common
+ [KVM,ARM,EARLY] Trap guest accesses to GICv3 common
system registers
kvm-arm.vgic_v4_enable=
- [KVM,ARM] Allow use of GICv4 for direct injection of
- LPIs.
+ [KVM,ARM,EARLY] Allow use of GICv4 for direct
+ injection of LPIs.
- kvm_cma_resv_ratio=n [PPC]
+ kvm_cma_resv_ratio=n [PPC,EARLY]
Reserves given percentage from system memory area for
contiguous memory allocation for KVM hash pagetable
allocation.
@@ -2706,7 +2720,7 @@
(enabled). Disable by KVM if hardware lacks support
for it.
- l1d_flush= [X86,INTEL]
+ l1d_flush= [X86,INTEL,EARLY]
Control mitigation for L1D based snooping vulnerability.
Certain CPUs are vulnerable to an exploit against CPU
@@ -2723,7 +2737,7 @@
on - enable the interface for the mitigation
- l1tf= [X86] Control mitigation of the L1TF vulnerability on
+ l1tf= [X86,EARLY] Control mitigation of the L1TF vulnerability on
affected CPUs
The kernel PTE inversion protection is unconditionally
@@ -2792,7 +2806,7 @@
l3cr= [PPC]
- lapic [X86-32,APIC] Enable the local APIC even if BIOS
+ lapic [X86-32,APIC,EARLY] Enable the local APIC even if BIOS
disabled it.
lapic= [X86,APIC] Do not use TSC deadline
@@ -2800,7 +2814,7 @@
back to the programmable timer unit in the LAPIC.
Format: notscdeadline
- lapic_timer_c2_ok [X86,APIC] trust the local apic timer
+ lapic_timer_c2_ok [X86,APIC,EARLY] trust the local apic timer
in C2 power state.
libata.dma= [LIBATA] DMA control
@@ -2924,7 +2938,7 @@
lockd.nlm_udpport=M [NFS] Assign UDP port.
Format: <integer>
- lockdown= [SECURITY]
+ lockdown= [SECURITY,EARLY]
{ integrity | confidentiality }
Enable the kernel lockdown feature. If set to
integrity, kernel features that allow userland to
@@ -3031,7 +3045,8 @@
logibm.irq= [HW,MOUSE] Logitech Bus Mouse Driver
Format: <irq>
- loglevel= All Kernel Messages with a loglevel smaller than the
+ loglevel= [KNL,EARLY]
+ All Kernel Messages with a loglevel smaller than the
console loglevel will be printed to the console. It can
also be changed with klogd or other programs. The
loglevels are defined as follows:
@@ -3045,13 +3060,15 @@
6 (KERN_INFO) informational
7 (KERN_DEBUG) debug-level messages
- log_buf_len=n[KMG] Sets the size of the printk ring buffer,
- in bytes. n must be a power of two and greater
- than the minimal size. The minimal size is defined
- by LOG_BUF_SHIFT kernel config parameter. There is
- also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
- that allows to increase the default size depending on
- the number of CPUs. See init/Kconfig for more details.
+ log_buf_len=n[KMG] [KNL,EARLY]
+ Sets the size of the printk ring buffer, in bytes.
+ n must be a power of two and greater than the
+ minimal size. The minimal size is defined by
+ LOG_BUF_SHIFT kernel config parameter. There
+ is also CONFIG_LOG_CPU_MAX_BUF_SHIFT config
+ parameter that allows to increase the default size
+ depending on the number of CPUs. See init/Kconfig
+ for more details.
logo.nologo [FB] Disables display of the built-in Linux logo.
This may be used to provide more screen space for
@@ -3109,7 +3126,7 @@
max_addr=nn[KMG] [KNL,BOOT,IA-64] All physical memory greater
than or equal to this physical address is ignored.
- maxcpus= [SMP] Maximum number of processors that an SMP kernel
+ maxcpus= [SMP,EARLY] Maximum number of processors that an SMP kernel
will bring up during bootup. maxcpus=n : n >= 0 limits
the kernel to bring up 'n' processors. Surely after
bootup you can bring up the other plugged cpu by executing
@@ -3136,7 +3153,7 @@
Format: <first>,<last>
Specifies range of consoles to be captured by the MDA.
- mds= [X86,INTEL]
+ mds= [X86,INTEL,EARLY]
Control mitigation for the Micro-architectural Data
Sampling (MDS) vulnerability.
@@ -3168,11 +3185,12 @@
For details see: Documentation/admin-guide/hw-vuln/mds.rst
- mem=nn[KMG] [HEXAGON] Set the memory size.
+ mem=nn[KMG] [HEXAGON,EARLY] Set the memory size.
Must be specified, otherwise memory size will be 0.
- mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
- Amount of memory to be used in cases as follows:
+ mem=nn[KMG] [KNL,BOOT,EARLY] Force usage of a specific amount
+ of memory Amount of memory to be used in cases
+ as follows:
1 for test;
2 when the kernel is not able to see the whole system memory;
@@ -3196,8 +3214,8 @@
if system memory of hypervisor is not sufficient.
mem=nn[KMG]@ss[KMG]
- [ARM,MIPS] - override the memory layout reported by
- firmware.
+ [ARM,MIPS,EARLY] - override the memory layout
+ reported by firmware.
Define a memory region of size nn[KMG] starting at
ss[KMG].
Multiple different regions can be specified with
@@ -3206,7 +3224,7 @@
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
memory.
- memblock=debug [KNL] Enable memblock debug messages.
+ memblock=debug [KNL,EARLY] Enable memblock debug messages.
memchunk=nn[KMG]
[KNL,SH] Allow user to override the default size for
@@ -3220,14 +3238,14 @@
option.
See Documentation/admin-guide/mm/memory-hotplug.rst.
- memmap=exactmap [KNL,X86] Enable setting of an exact
+ memmap=exactmap [KNL,X86,EARLY] Enable setting of an exact
E820 memory map, as specified by the user.
Such memmap=exactmap lines can be constructed based on
BIOS output or other requirements. See the memmap=nn@ss
option description.
memmap=nn[KMG]@ss[KMG]
- [KNL, X86, MIPS, XTENSA] Force usage of a specific region of memory.
+ [KNL, X86,MIPS,XTENSA,EARLY] Force usage of a specific region of memory.
Region of memory to be used is from ss to ss+nn.
If @ss[KMG] is omitted, it is equivalent to mem=nn[KMG],
which limits max address to nn[KMG].
@@ -3237,11 +3255,11 @@
memmap=100M@2G,100M#3G,1G!1024G
memmap=nn[KMG]#ss[KMG]
- [KNL,ACPI] Mark specific memory as ACPI data.
+ [KNL,ACPI,EARLY] Mark specific memory as ACPI data.
Region of memory to be marked is from ss to ss+nn.
memmap=nn[KMG]$ss[KMG]
- [KNL,ACPI] Mark specific memory as reserved.
+ [KNL,ACPI,EARLY] Mark specific memory as reserved.
Region of memory to be reserved is from ss to ss+nn.
Example: Exclude memory from 0x18690000-0x1869ffff
memmap=64K$0x18690000
@@ -3251,14 +3269,14 @@
like Grub2, otherwise '$' and the following number
will be eaten.
- memmap=nn[KMG]!ss[KMG]
+ memmap=nn[KMG]!ss[KMG,EARLY]
[KNL,X86] Mark specific memory as protected.
Region of memory to be used, from ss to ss+nn.
The memory region may be marked as e820 type 12 (0xc)
and is NVDIMM or ADR memory.
memmap=<size>%<offset>-<oldtype>+<newtype>
- [KNL,ACPI] Convert memory within the specified region
+ [KNL,ACPI,EARLY] Convert memory within the specified region
from <oldtype> to <newtype>. If "-<oldtype>" is left
out, the whole region will be marked as <newtype>,
even if previously unavailable. If "+<newtype>" is left
@@ -3266,7 +3284,7 @@
specified as e820 types, e.g., 1 = RAM, 2 = reserved,
3 = ACPI, 12 = PRAM.
- memory_corruption_check=0/1 [X86]
+ memory_corruption_check=0/1 [X86,EARLY]
Some BIOSes seem to corrupt the first 64k of
memory when doing things like suspend/resume.
Setting this option will scan the memory
@@ -3278,13 +3296,13 @@
affects the same memory, you can use memmap=
to prevent the kernel from using that memory.
- memory_corruption_check_size=size [X86]
+ memory_corruption_check_size=size [X86,EARLY]
By default it checks for corruption in the low
64k, making this memory unavailable for normal
use. Use this parameter to scan for
corruption in more or less memory.
- memory_corruption_check_period=seconds [X86]
+ memory_corruption_check_period=seconds [X86,EARLY]
By default it checks for corruption every 60
seconds. Use this parameter to check at some
other rate. 0 disables periodic checking.
@@ -3308,7 +3326,7 @@
Note that even when enabled, there are a few cases where
the feature is not effective.
- memtest= [KNL,X86,ARM,M68K,PPC,RISCV] Enable memtest
+ memtest= [KNL,X86,ARM,M68K,PPC,RISCV,EARLY] Enable memtest
Format: <integer>
default : 0 <disable>
Specifies the number of memtest passes to be
@@ -3320,9 +3338,7 @@
mem_encrypt= [X86-64] AMD Secure Memory Encryption (SME) control
Valid arguments: on, off
- Default (depends on kernel configuration option):
- on (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y)
- off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n)
+ Default: off
mem_encrypt=on: Activate SME
mem_encrypt=off: Do not activate SME
@@ -3376,7 +3392,7 @@
https://repo.or.cz/w/linux-2.6/mini2440.git
mitigations=
- [X86,PPC,S390,ARM64] Control optional mitigations for
+ [X86,PPC,S390,ARM64,EARLY] Control optional mitigations for
CPU vulnerabilities. This is a set of curated,
arch-independent options, each of which is an
aggregation of existing arch-specific options.
@@ -3398,7 +3414,9 @@
nospectre_bhb [ARM64]
nospectre_v1 [X86,PPC]
nospectre_v2 [X86,PPC,S390,ARM64]
+ reg_file_data_sampling=off [X86]
retbleed=off [X86]
+ spec_rstack_overflow=off [X86]
spec_store_bypass_disable=off [X86,PPC]
spectre_v2_user=off [X86]
srbds=off [X86,INTEL]
@@ -3429,7 +3447,7 @@
retbleed=auto,nosmt [X86]
mminit_loglevel=
- [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+ [KNL,EARLY] When CONFIG_DEBUG_MEMORY_INIT is set, this
parameter allows control of the logging verbosity for
the additional memory initialisation checks. A value
of 0 disables mminit logging and a level of 4 will
@@ -3437,7 +3455,7 @@
so loglevel=8 may also need to be specified.
mmio_stale_data=
- [X86,INTEL] Control mitigation for the Processor
+ [X86,INTEL,EARLY] Control mitigation for the Processor
MMIO Stale Data vulnerabilities.
Processor MMIO Stale Data is a class of
@@ -3512,7 +3530,7 @@
mousedev.yres= [MOUSE] Vertical screen resolution, used for devices
reporting absolute coordinates, such as tablets
- movablecore= [KNL,X86,IA-64,PPC]
+ movablecore= [KNL,X86,IA-64,PPC,EARLY]
Format: nn[KMGTPE] | nn%
This parameter is the complement to kernelcore=, it
specifies the amount of memory used for migratable
@@ -3523,7 +3541,7 @@
that the amount of memory usable for all allocations
is not too small.
- movable_node [KNL] Boot-time switch to make hotplugable memory
+ movable_node [KNL,EARLY] Boot-time switch to make hotplugable memory
NUMA nodes to be movable. This means that the memory
of such nodes will be usable only for movable
allocations which rules out almost all kernel
@@ -3547,21 +3565,21 @@
[HW] Make the MicroTouch USB driver use raw coordinates
('y', default) or cooked coordinates ('n')
- mtrr=debug [X86]
+ mtrr=debug [X86,EARLY]
Enable printing debug information related to MTRR
registers at boot time.
- mtrr_chunk_size=nn[KMG] [X86]
+ mtrr_chunk_size=nn[KMG,X86,EARLY]
used for mtrr cleanup. It is largest continuous chunk
that could hold holes aka. UC entries.
- mtrr_gran_size=nn[KMG] [X86]
+ mtrr_gran_size=nn[KMG,X86,EARLY]
Used for mtrr cleanup. It is granularity of mtrr block.
Default is 1.
Large value could prevent small alignment from
using up MTRRs.
- mtrr_spare_reg_nr=n [X86]
+ mtrr_spare_reg_nr=n [X86,EARLY]
Format: <integer>
Range: 0,7 : spare reg number
Default : 1
@@ -3747,27 +3765,23 @@
emulation library even if a 387 maths coprocessor
is present.
- no4lvl [RISCV] Disable 4-level and 5-level paging modes. Forces
- kernel to use 3-level paging instead.
+ no4lvl [RISCV,EARLY] Disable 4-level and 5-level paging modes.
+ Forces kernel to use 3-level paging instead.
- no5lvl [X86-64,RISCV] Disable 5-level paging mode. Forces
+ no5lvl [X86-64,RISCV,EARLY] Disable 5-level paging mode. Forces
kernel to use 4-level paging instead.
- noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
- caches in the slab allocator. Saves per-node memory,
- but will impact performance.
-
noalign [KNL,ARM]
- noaltinstr [S390] Disables alternative instructions patching
- (CPU alternatives feature).
+ noaltinstr [S390,EARLY] Disables alternative instructions
+ patching (CPU alternatives feature).
- noapic [SMP,APIC] Tells the kernel to not make use of any
+ noapic [SMP,APIC,EARLY] Tells the kernel to not make use of any
IOAPICs that may be present in the system.
noautogroup Disable scheduler automatic task group creation.
- nocache [ARM]
+ nocache [ARM,EARLY]
no_console_suspend
[HW] Never suspend the console
@@ -3785,13 +3799,13 @@
turn on/off it dynamically.
no_debug_objects
- [KNL] Disable object debugging
+ [KNL,EARLY] Disable object debugging
nodsp [SH] Disable hardware DSP at boot time.
- noefi Disable EFI runtime services support.
+ noefi [EFI,EARLY] Disable EFI runtime services support.
- no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel.
+ no_entry_flush [PPC,EARLY] Don't flush the L1-D cache when entering the kernel.
noexec [IA-64]
@@ -3822,6 +3836,7 @@
real-time systems.
no_hash_pointers
+ [KNL,EARLY]
Force pointers printed to the console or buffers to be
unhashed. By default, when a pointer is printed via %p
format string, that pointer is "hashed", i.e. obscured
@@ -3846,9 +3861,9 @@
the impact of the sleep instructions. This is also
useful when using JTAG debugger.
- nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
+ nohugeiomap [KNL,X86,PPC,ARM64,EARLY] Disable kernel huge I/O mappings.
- nohugevmalloc [KNL,X86,PPC,ARM64] Disable kernel huge vmalloc mappings.
+ nohugevmalloc [KNL,X86,PPC,ARM64,EARLY] Disable kernel huge vmalloc mappings.
nohz= [KNL] Boottime enable/disable dynamic ticks
Valid arguments: on, off
@@ -3870,13 +3885,13 @@
noinitrd [RAM] Tells the kernel not to load any configured
initial RAM disk.
- nointremap [X86-64, Intel-IOMMU] Do not enable interrupt
+ nointremap [X86-64,Intel-IOMMU,EARLY] Do not enable interrupt
remapping.
[Deprecated - use intremap=off]
nointroute [IA-64]
- noinvpcid [X86] Disable the INVPCID cpu feature.
+ noinvpcid [X86,EARLY] Disable the INVPCID cpu feature.
noiotrap [SH] Disables trapped I/O port accesses.
@@ -3887,19 +3902,19 @@
nojitter [IA-64] Disables jitter checking for ITC timers.
- nokaslr [KNL]
+ nokaslr [KNL,EARLY]
When CONFIG_RANDOMIZE_BASE is set, this disables
kernel and module base offset ASLR (Address Space
Layout Randomization).
- no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
+ no-kvmapf [X86,KVM,EARLY] Disable paravirtualized asynchronous page
fault handling.
- no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
+ no-kvmclock [X86,KVM,EARLY] Disable paravirtualized KVM clock driver
- nolapic [X86-32,APIC] Do not enable or use the local APIC.
+ nolapic [X86-32,APIC,EARLY] Do not enable or use the local APIC.
- nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
+ nolapic_timer [X86-32,APIC,EARLY] Do not use the local APIC timer.
nomca [IA-64] Disable machine check abort handling
@@ -3924,23 +3939,23 @@
shutdown the other cpus. Instead use the REBOOT_VECTOR
irq.
- nopat [X86] Disable PAT (page attribute table extension of
+ nopat [X86,EARLY] Disable PAT (page attribute table extension of
pagetables) support.
- nopcid [X86-64] Disable the PCID cpu feature.
+ nopcid [X86-64,EARLY] Disable the PCID cpu feature.
nopku [X86] Disable Memory Protection Keys CPU feature found
in some Intel CPUs.
- nopti [X86-64]
+ nopti [X86-64,EARLY]
Equivalent to pti=off
- nopv= [X86,XEN,KVM,HYPER_V,VMWARE]
+ nopv= [X86,XEN,KVM,HYPER_V,VMWARE,EARLY]
Disables the PV optimizations forcing the guest to run
as generic guest with no PV drivers. Currently support
XEN HVM, KVM, HYPER_V and VMWARE guest.
- nopvspin [X86,XEN,KVM]
+ nopvspin [X86,XEN,KVM,EARLY]
Disables the qspinlock slow path using PV optimizations
which allow the hypervisor to 'idle' the guest on lock
contention.
@@ -3960,20 +3975,20 @@
This is required for the Braillex ib80-piezo Braille
reader made by F.H. Papenmeier (Germany).
- nosgx [X86-64,SGX] Disables Intel SGX kernel support.
+ nosgx [X86-64,SGX,EARLY] Disables Intel SGX kernel support.
- nosmap [PPC]
+ nosmap [PPC,EARLY]
Disable SMAP (Supervisor Mode Access Prevention)
even if it is supported by processor.
- nosmep [PPC64s]
+ nosmep [PPC64s,EARLY]
Disable SMEP (Supervisor Mode Execution Prevention)
even if it is supported by processor.
- nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
+ nosmp [SMP,EARLY] Tells an SMP kernel to act as a UP kernel,
and disable the IO APIC. legacy for "maxcpus=0".
- nosmt [KNL,MIPS,PPC,S390] Disable symmetric multithreading (SMT).
+ nosmt [KNL,MIPS,PPC,S390,EARLY] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
[KNL,X86,PPC] Disable symmetric multithreading (SMT).
@@ -3983,22 +3998,23 @@
nosoftlockup [KNL] Disable the soft-lockup detector.
nospec_store_bypass_disable
- [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
+ [HW,EARLY] Disable all mitigations for the Speculative
+ Store Bypass vulnerability
- nospectre_bhb [ARM64] Disable all mitigations for Spectre-BHB (branch
+ nospectre_bhb [ARM64,EARLY] Disable all mitigations for Spectre-BHB (branch
history injection) vulnerability. System may allow data leaks
with this option.
- nospectre_v1 [X86,PPC] Disable mitigations for Spectre Variant 1
+ nospectre_v1 [X86,PPC,EARLY] Disable mitigations for Spectre Variant 1
(bounds check bypass). With this option data leaks are
possible in the system.
- nospectre_v2 [X86,PPC_E500,ARM64] Disable all mitigations for
- the Spectre variant 2 (indirect branch prediction)
- vulnerability. System may allow data leaks with this
- option.
+ nospectre_v2 [X86,PPC_E500,ARM64,EARLY] Disable all mitigations
+ for the Spectre variant 2 (indirect branch
+ prediction) vulnerability. System may allow data
+ leaks with this option.
- no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES,RISCV] Disable
+ no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES,RISCV,EARLY] Disable
paravirtualized steal time accounting. steal time is
computed, but won't influence scheduler behaviour
@@ -4008,7 +4024,7 @@
broken timer IRQ sources.
no_uaccess_flush
- [PPC] Don't flush the L1-D cache after accessing user data.
+ [PPC,EARLY] Don't flush the L1-D cache after accessing user data.
novmcoredd [KNL,KDUMP]
Disable device dump. Device dump allows drivers to
@@ -4022,15 +4038,15 @@
is set.
no-vmw-sched-clock
- [X86,PV_OPS] Disable paravirtualized VMware scheduler
- clock and use the default one.
+ [X86,PV_OPS,EARLY] Disable paravirtualized VMware
+ scheduler clock and use the default one.
nowatchdog [KNL] Disable both lockup detectors, i.e.
soft-lockup and NMI watchdog (hard-lockup).
- nowb [ARM]
+ nowb [ARM,EARLY]
- nox2apic [X86-64,APIC] Do not enable x2APIC mode.
+ nox2apic [X86-64,APIC,EARLY] Do not enable x2APIC mode.
NOTE: this parameter will be ignored on systems with the
LEGACY_XAPIC_DISABLED bit set in the
@@ -4068,7 +4084,7 @@
purges which is reported from either PAL_VM_SUMMARY or
SAL PALO.
- nr_cpus= [SMP] Maximum number of processors that an SMP kernel
+ nr_cpus= [SMP,EARLY] Maximum number of processors that an SMP kernel
could support. nr_cpus=n : n >= 1 limits the kernel to
support 'n' processors. It could be larger than the
number of already plugged CPU during bootup, later in
@@ -4079,8 +4095,9 @@
nr_uarts= [SERIAL] maximum number of UARTs to be registered.
- numa=off [KNL, ARM64, PPC, RISCV, SPARC, X86] Disable NUMA, Only
- set up a single NUMA node spanning all memory.
+ numa=off [KNL, ARM64, PPC, RISCV, SPARC, X86, EARLY]
+ Disable NUMA, Only set up a single NUMA node
+ spanning all memory.
numa_balancing= [KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
NUMA balancing.
@@ -4091,7 +4108,7 @@
This can be set from sysctl after boot.
See Documentation/admin-guide/sysctl/vm.rst for details.
- ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
+ ohci1394_dma=early [HW,EARLY] enable debugging via the ohci1394 driver.
See Documentation/core-api/debugging-via-ohci1394.rst for more
info.
@@ -4117,7 +4134,8 @@
Once locked, the boundary cannot be changed.
1 indicates lock status, 0 indicates unlock status.
- oops=panic Always panic on oopses. Default is to just kill the
+ oops=panic [KNL,EARLY]
+ Always panic on oopses. Default is to just kill the
process, but there is a small probability of
deadlocking the machine.
This will also cause panics on machine check exceptions.
@@ -4133,13 +4151,13 @@
can be read from sysfs at:
/sys/module/page_alloc/parameters/shuffle.
- page_owner= [KNL] Boot-time page_owner enabling option.
+ page_owner= [KNL,EARLY] Boot-time page_owner enabling option.
Storage of the information about who allocated
each page is disabled in default. With this switch,
we can turn it on.
on: enable the feature
- page_poison= [KNL] Boot-time parameter changing the state of
+ page_poison= [KNL,EARLY] Boot-time parameter changing the state of
poisoning on the buddy allocator, available with
CONFIG_PAGE_POISONING=y.
off: turn off poisoning (default)
@@ -4157,7 +4175,8 @@
timeout < 0: reboot immediately
Format: <timeout>
- panic_on_taint= Bitmask for conditionally calling panic() in add_taint()
+ panic_on_taint= [KNL,EARLY]
+ Bitmask for conditionally calling panic() in add_taint()
Format: <hex>[,nousertaint]
Hexadecimal bitmask representing the set of TAINT flags
that will cause the kernel to panic when add_taint() is
@@ -4313,7 +4332,7 @@
pcbit= [HW,ISDN]
- pci=option[,option...] [PCI] various PCI subsystem options.
+ pci=option[,option...] [PCI,EARLY] various PCI subsystem options.
Some options herein operate on a specific device
or a set of devices (<pci_dev>). These are
@@ -4582,7 +4601,8 @@
Format: { 0 | 1 }
See arch/parisc/kernel/pdc_chassis.c
- percpu_alloc= Select which percpu first chunk allocator to use.
+ percpu_alloc= [MM,EARLY]
+ Select which percpu first chunk allocator to use.
Currently supported values are "embed" and "page".
Archs may support subset or none of the selections.
See comments in mm/percpu.c for details on each
@@ -4644,6 +4664,11 @@
may be specified.
Format: <port>,<port>....
+ possible_cpus= [SMP,S390,X86]
+ Format: <unsigned int>
+ Set the number of possible CPUs, overriding the
+ regular discovery mechanisms (such as ACPI/FW, etc).
+
powersave=off [PPC] This option disables power saving features.
It specifically disables cpuidle and sets the
platform machine description specific power_save
@@ -4651,12 +4676,12 @@
execution priority.
ppc_strict_facility_enable
- [PPC] This option catches any kernel floating point,
+ [PPC,ENABLE] This option catches any kernel floating point,
Altivec, VSX and SPE outside of regions specifically
allowed (eg kernel_enable_fpu()/kernel_disable_fpu()).
There is some performance impact when enabling this.
- ppc_tm= [PPC]
+ ppc_tm= [PPC,EARLY]
Format: {"off"}
Disable Hardware Transactional Memory
@@ -4766,7 +4791,7 @@
[KNL] Number of legacy pty's. Overwrites compiled-in
default number.
- quiet [KNL] Disable most log messages
+ quiet [KNL,EARLY] Disable most log messages
r128= [HW,DRM]
@@ -4783,17 +4808,17 @@
ramdisk_start= [RAM] RAM disk image start address
random.trust_cpu=off
- [KNL] Disable trusting the use of the CPU's
+ [KNL,EARLY] Disable trusting the use of the CPU's
random number generator (if available) to
initialize the kernel's RNG.
random.trust_bootloader=off
- [KNL] Disable trusting the use of the a seed
+ [KNL,EARLY] Disable trusting the use of the a seed
passed by the bootloader (if available) to
initialize the kernel's RNG.
randomize_kstack_offset=
- [KNL] Enable or disable kernel stack offset
+ [KNL,EARLY] Enable or disable kernel stack offset
randomization, which provides roughly 5 bits of
entropy, frustrating memory corruption attacks
that depend on stack address determinism or
@@ -5034,6 +5059,11 @@
this kernel boot parameter, forcibly setting it
to zero.
+ rcutree.enable_rcu_lazy= [KNL]
+ To save power, batch RCU callbacks and flush after
+ delay, memory pressure or callback list growing too
+ big.
+
rcuscale.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
@@ -5484,7 +5514,7 @@
Run specified binary instead of /init from the ramdisk,
used for early userspace startup. See initrd.
- rdrand= [X86]
+ rdrand= [X86,EARLY]
force - Override the decision by the kernel to hide the
advertisement of RDRAND support (this affects
certain AMD processors because of buggy BIOS
@@ -5580,7 +5610,7 @@
them. If <base> is less than 0x10000, the region
is assumed to be I/O ports; otherwise it is memory.
- reservetop= [X86-32]
+ reservetop= [X86-32,EARLY]
Format: nn[KMG]
Reserves a hole at the top of the kernel virtual
address space.
@@ -5665,7 +5695,7 @@
[KNL] Disable ring 3 MONITOR/MWAIT feature on supported
CPUs.
- riscv_isa_fallback [RISCV]
+ riscv_isa_fallback [RISCV,EARLY]
When CONFIG_RISCV_ISA_FALLBACK is not enabled, permit
falling back to detecting extension support by parsing
"riscv,isa" property on devicetree systems when the
@@ -5674,13 +5704,14 @@
ro [KNL] Mount root device read-only on boot
- rodata= [KNL]
+ rodata= [KNL,EARLY]
on Mark read-only kernel memory as read-only (default).
off Leave read-only kernel memory writable for debugging.
full Mark read-only kernel memory and aliases as read-only
[arm64]
rockchip.usb_uart
+ [EARLY]
Enable the uart passthrough on the designated usb port
on Rockchip SoCs. When active, the signals of the
debug-uart get routed to the D+ and D- pins of the usb
@@ -5741,7 +5772,7 @@
sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.
- sched_verbose [KNL] Enables verbose scheduler debug messages.
+ sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
schedstats= [KNL,X86] Enable or disable scheduled statistics.
Allowed values are enable and disable. This feature
@@ -5856,7 +5887,7 @@
non-zero "wait" parameter. See weight_single
and weight_many.
- skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
+ skew_tick= [KNL,EARLY] Offset the periodic timer tick per cpu to mitigate
xtime_lock contention on larger systems, and/or RCU lock
contention on all systems with CONFIG_MAXSMP set.
Format: { "0" | "1" }
@@ -5895,65 +5926,58 @@
simeth= [IA-64]
simscsi=
- slram= [HW,MTD]
-
- slab_merge [MM]
- Enable merging of slabs with similar size when the
- kernel is built without CONFIG_SLAB_MERGE_DEFAULT.
-
- slab_nomerge [MM]
- Disable merging of slabs with similar size. May be
- necessary if there is some reason to distinguish
- allocs to different slabs, especially in hardened
- environments where the risk of heap overflows and
- layout control by attackers can usually be
- frustrated by disabling merging. This will reduce
- most of the exposure of a heap attack to a single
- cache (risks via metadata attacks are mostly
- unchanged). Debug options disable merging on their
- own.
- For more information see Documentation/mm/slub.rst.
-
- slab_max_order= [MM, SLAB]
- Determines the maximum allowed order for slabs.
- A high setting may cause OOMs due to memory
- fragmentation. Defaults to 1 for systems with
- more than 32MB of RAM, 0 otherwise.
-
- slub_debug[=options[,slabs][;[options[,slabs]]...] [MM, SLUB]
- Enabling slub_debug allows one to determine the
+ slab_debug[=options[,slabs][;[options[,slabs]]...] [MM]
+ Enabling slab_debug allows one to determine the
culprit if slab objects become corrupted. Enabling
- slub_debug can create guard zones around objects and
+ slab_debug can create guard zones around objects and
may poison objects when not in use. Also tracks the
last alloc / free. For more information see
Documentation/mm/slub.rst.
+ (slub_debug legacy name also accepted for now)
- slub_max_order= [MM, SLUB]
+ slab_max_order= [MM]
Determines the maximum allowed order for slabs.
A high setting may cause OOMs due to memory
fragmentation. For more information see
Documentation/mm/slub.rst.
+ (slub_max_order legacy name also accepted for now)
- slub_min_objects= [MM, SLUB]
+ slab_merge [MM]
+ Enable merging of slabs with similar size when the
+ kernel is built without CONFIG_SLAB_MERGE_DEFAULT.
+ (slub_merge legacy name also accepted for now)
+
+ slab_min_objects= [MM]
The minimum number of objects per slab. SLUB will
- increase the slab order up to slub_max_order to
+ increase the slab order up to slab_max_order to
generate a sufficiently large slab able to contain
the number of objects indicated. The higher the number
of objects the smaller the overhead of tracking slabs
and the less frequently locks need to be acquired.
For more information see Documentation/mm/slub.rst.
+ (slub_min_objects legacy name also accepted for now)
- slub_min_order= [MM, SLUB]
+ slab_min_order= [MM]
Determines the minimum page order for slabs. Must be
- lower than slub_max_order.
- For more information see Documentation/mm/slub.rst.
+ lower or equal to slab_max_order. For more information see
+ Documentation/mm/slub.rst.
+ (slub_min_order legacy name also accepted for now)
- slub_merge [MM, SLUB]
- Same with slab_merge.
+ slab_nomerge [MM]
+ Disable merging of slabs with similar size. May be
+ necessary if there is some reason to distinguish
+ allocs to different slabs, especially in hardened
+ environments where the risk of heap overflows and
+ layout control by attackers can usually be
+ frustrated by disabling merging. This will reduce
+ most of the exposure of a heap attack to a single
+ cache (risks via metadata attacks are mostly
+ unchanged). Debug options disable merging on their
+ own.
+ For more information see Documentation/mm/slub.rst.
+ (slub_nomerge legacy name also accepted for now)
- slub_nomerge [MM, SLUB]
- Same with slab_nomerge. This is supported for legacy.
- See slab_nomerge for more information.
+ slram= [HW,MTD]
smart2= [HW]
Format: <io1>[,<io2>[,...,<io8>]]
@@ -5987,10 +6011,10 @@
1: Fast pin select (default)
2: ATC IRMode
- smt= [KNL,MIPS,S390] Set the maximum number of threads (logical
- CPUs) to use per physical CPU on systems capable of
- symmetric multithreading (SMT). Will be capped to the
- actual hardware limit.
+ smt= [KNL,MIPS,S390,EARLY] Set the maximum number of threads
+ (logical CPUs) to use per physical CPU on systems
+ capable of symmetric multithreading (SMT). Will
+ be capped to the actual hardware limit.
Format: <integer>
Default: -1 (no limit)
@@ -6012,7 +6036,7 @@
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/admin-guide/laptops/sonypi.rst
- spectre_v2= [X86] Control mitigation of Spectre variant 2
+ spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
The default operation protects the kernel from
user space attacks.
@@ -6027,8 +6051,8 @@
Selecting 'on' will, and 'auto' may, choose a
mitigation method at run time according to the
CPU, the available microcode, the setting of the
- CONFIG_RETPOLINE configuration option, and the
- compiler with which the kernel was built.
+ CONFIG_MITIGATION_RETPOLINE configuration option,
+ and the compiler with which the kernel was built.
Selecting 'on' will also enable the mitigation
against user space to user space task attacks.
@@ -6092,7 +6116,7 @@
spectre_v2_user=auto.
spec_rstack_overflow=
- [X86] Control RAS overflow mitigation on AMD Zen CPUs
+ [X86,EARLY] Control RAS overflow mitigation on AMD Zen CPUs
off - Disable mitigation
microcode - Enable microcode mitigation only
@@ -6103,7 +6127,7 @@
(cloud-specific mitigation)
spec_store_bypass_disable=
- [HW] Control Speculative Store Bypass (SSB) Disable mitigation
+ [HW,EARLY] Control Speculative Store Bypass (SSB) Disable mitigation
(Speculative Store Bypass vulnerability)
Certain CPUs are vulnerable to an exploit against a
@@ -6199,7 +6223,7 @@
#DB exception for bus lock is triggered only when
CPL > 0.
- srbds= [X86,INTEL]
+ srbds= [X86,INTEL,EARLY]
Control the Special Register Buffer Data Sampling
(SRBDS) mitigation.
@@ -6286,7 +6310,7 @@
srcutree.convert_to_big must have the 0x10 bit
set for contention-based conversions to occur.
- ssbd= [ARM64,HW]
+ ssbd= [ARM64,HW,EARLY]
Speculative Store Bypass Disable control
On CPUs that are vulnerable to the Speculative
@@ -6310,7 +6334,7 @@
growing up) the main stack are reserved for no other
mapping. Default value is 256 pages.
- stack_depot_disable= [KNL]
+ stack_depot_disable= [KNL,EARLY]
Setting this to true through kernel command line will
disable the stack depot thereby saving the static memory
consumed by the stack hash table. By default this is set
@@ -6349,12 +6373,12 @@
be used to filter out binaries which have
not yet been made aware of AT_MINSIGSTKSZ.
- stress_hpt [PPC]
+ stress_hpt [PPC,EARLY]
Limits the number of kernel HPT entries in the hash
page table to increase the rate of hash page table
faults on kernel addresses.
- stress_slb [PPC]
+ stress_slb [PPC,EARLY]
Limits the number of kernel SLB entries, and flushes
them frequently to increase the rate of SLB faults
on kernel addresses.
@@ -6414,7 +6438,7 @@
This parameter controls use of the Protected
Execution Facility on pSeries.
- swiotlb= [ARM,IA-64,PPC,MIPS,X86]
+ swiotlb= [ARM,IA-64,PPC,MIPS,X86,EARLY]
Format: { <int> [,<int>] | force | noforce }
<int> -- Number of I/O TLB slabs
<int> -- Second integer after comma. Number of swiotlb
@@ -6424,7 +6448,7 @@
wouldn't be automatically used by the kernel
noforce -- Never use bounce buffers (for debugging)
- switches= [HW,M68k]
+ switches= [HW,M68k,EARLY]
sysctl.*= [KNL]
Set a sysctl parameter, right before loading the init
@@ -6483,11 +6507,11 @@
<deci-seconds>: poll all this frequency
0: no polling (default)
- threadirqs [KNL]
+ threadirqs [KNL,EARLY]
Force threading of all interrupt handlers except those
marked explicitly IRQF_NO_THREAD.
- topology= [S390]
+ topology= [S390,EARLY]
Format: {off | on}
Specify if the kernel should make use of the cpu
topology information if the hardware supports this.
@@ -6728,7 +6752,7 @@
can be overridden by a later tsc=nowatchdog. A console
message will flag any such suppression or overriding.
- tsc_early_khz= [X86] Skip early TSC calibration and use the given
+ tsc_early_khz= [X86,EARLY] Skip early TSC calibration and use the given
value instead. Useful when the early TSC frequency discovery
procedure is not reliable, such as on overclocked systems
with CPUID.16h support and partial CPUID.15h support.
@@ -6763,7 +6787,7 @@
See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
for more details.
- tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
+ tsx_async_abort= [X86,INTEL,EARLY] Control mitigation for the TSX Async
Abort (TAA) vulnerability.
Similar to Micro-architectural Data Sampling (MDS)
@@ -6829,7 +6853,7 @@
unknown_nmi_panic
[X86] Cause panic on unknown NMI.
- unwind_debug [X86-64]
+ unwind_debug [X86-64,EARLY]
Enable unwinder debug output. This can be
useful for debugging certain unwinder error
conditions, including corrupt stacks and
@@ -7019,7 +7043,7 @@
Example: user_debug=31
userpte=
- [X86] Flags controlling user PTE allocations.
+ [X86,EARLY] Flags controlling user PTE allocations.
nohigh = do not allocate PTE pages in
HIGHMEM regardless of setting
@@ -7048,7 +7072,7 @@
vector= [IA-64,SMP]
vector=percpu: enable percpu vector domain
- video= [FB] Frame buffer configuration
+ video= [FB,EARLY] Frame buffer configuration
See Documentation/fb/modedb.rst.
video.brightness_switch_enabled= [ACPI]
@@ -7096,13 +7120,13 @@
P Enable page structure init time poisoning
- Disable all of the above options
- vmalloc=nn[KMG] [KNL,BOOT] Forces the vmalloc area to have an exact
- size of <nn>. This can be used to increase the
- minimum size (128MB on x86). It can also be used to
- decrease the size and leave more room for directly
- mapped kernel RAM.
+ vmalloc=nn[KMG] [KNL,BOOT,EARLY] Forces the vmalloc area to have an
+ exact size of <nn>. This can be used to increase
+ the minimum size (128MB on x86). It can also be
+ used to decrease the size and leave more room
+ for directly mapped kernel RAM.
- vmcp_cma=nn[MG] [KNL,S390]
+ vmcp_cma=nn[MG] [KNL,S390,EARLY]
Sets the memory size reserved for contiguous memory
allocations for the vmcp device driver.
@@ -7115,7 +7139,7 @@
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
Format: <command>
- vsyscall= [X86-64]
+ vsyscall= [X86-64,EARLY]
Controls the behavior of vsyscalls (i.e. calls to
fixed addresses of 0xffffffffff600x00 from legacy
code). Most statically-linked binaries and older
@@ -7225,6 +7249,15 @@
threshold repeatedly. They are likely good
candidates for using WQ_UNBOUND workqueues instead.
+ workqueue.cpu_intensive_warning_thresh=<uint>
+ If CONFIG_WQ_CPU_INTENSIVE_REPORT is set, the kernel
+ will report the work functions which violate the
+ intensive_threshold_us repeatedly. In order to prevent
+ spurious warnings, start printing only after a work
+ function has violated this threshold number of times.
+
+ The default is 4 times. 0 disables the warning.
+
workqueue.power_efficient
Per-cpu workqueues are generally preferred because
they show better performance thanks to cache
@@ -7263,13 +7296,13 @@
When enabled, memory and cache locality will be
impacted.
- writecombine= [LOONGARCH] Control the MAT (Memory Access Type) of
- ioremap_wc().
+ writecombine= [LOONGARCH,EARLY] Control the MAT (Memory Access
+ Type) of ioremap_wc().
on - Enable writecombine, use WUC for ioremap_wc()
off - Disable writecombine, use SUC for ioremap_wc()
- x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
+ x2apic_phys [X86-64,APIC,EARLY] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.
@@ -7280,7 +7313,7 @@
save/restore/migration must be enabled to handle larger
domains.
- xen_emul_unplug= [HW,X86,XEN]
+ xen_emul_unplug= [HW,X86,XEN,EARLY]
Unplug Xen emulated devices
Format: [unplug0,][unplug1]
ide-disks -- unplug primary master IDE devices
@@ -7292,17 +7325,17 @@
the unplug protocol
never -- do not unplug even if version check succeeds
- xen_legacy_crash [X86,XEN]
+ xen_legacy_crash [X86,XEN,EARLY]
Crash from Xen panic notifier, without executing late
panic() code such as dumping handler.
- xen_msr_safe= [X86,XEN]
+ xen_msr_safe= [X86,XEN,EARLY]
Format: <bool>
Select whether to always use non-faulting (safe) MSR
access functions when running as Xen PV guest. The
default value is controlled by CONFIG_XEN_PV_MSR_SAFE.
- xen_nopvspin [X86,XEN]
+ xen_nopvspin [X86,XEN,EARLY]
Disables the qspinlock slowpath using Xen PV optimizations.
This parameter is obsoleted by "nopvspin" parameter, which
has equivalent effect for XEN platform.
@@ -7314,7 +7347,7 @@
has equivalent effect for XEN platform.
xen_no_vector_callback
- [KNL,X86,XEN] Disable the vector callback for Xen
+ [KNL,X86,XEN,EARLY] Disable the vector callback for Xen
event channel interrupts.
xen_scrub_pages= [XEN]
@@ -7323,7 +7356,7 @@
with /sys/devices/system/xen_memory/xen_memory0/scrub_pages.
Default value controlled with CONFIG_XEN_SCRUB_PAGES_DEFAULT.
- xen_timer_slop= [X86-64,XEN]
+ xen_timer_slop= [X86-64,XEN,EARLY]
Set the timer slop (in nanoseconds) for the virtual Xen
timers (default is 100000). This adjusts the minimum
delta of virtualized Xen timers, where lower values
@@ -7376,7 +7409,7 @@
host controller quirks. Meaning of each bit can be
consulted in header drivers/usb/host/xhci.h.
- xmon [PPC]
+ xmon [PPC,EARLY]
Format: { early | on | rw | ro | off }
Controls if xmon debugger is enabled. Default is off.
Passing only "xmon" is equivalent to "xmon=early".
diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
index 993c2a05f5ee..b6aeae3327ce 100644
--- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@@ -243,13 +243,9 @@ To reduce its OS jitter, do any of the following:
3. Do any of the following needed to avoid jitter that your
application cannot tolerate:
- a. Build your kernel with CONFIG_SLUB=y rather than
- CONFIG_SLAB=y, thus avoiding the slab allocator's periodic
- use of each CPU's workqueues to run its cache_reap()
- function.
- b. Avoid using oprofile, thus avoiding OS jitter from
+ a. Avoid using oprofile, thus avoiding OS jitter from
wq_sync_buffer().
- c. Limit your CPU frequency so that a CPU-frequency
+ b. Limit your CPU frequency so that a CPU-frequency
governor is not required, possibly enlisting the aid of
special heatsinks or other cooling technologies. If done
correctly, and if you CPU architecture permits, you should
@@ -259,7 +255,7 @@ To reduce its OS jitter, do any of the following:
WARNING: Please check your CPU specifications to
make sure that this is safe on your particular system.
- d. As of v3.18, Christoph Lameter's on-demand vmstat workers
+ c. As of v3.18, Christoph Lameter's on-demand vmstat workers
commit prevents OS jitter due to vmstat_update() on
CONFIG_SMP=y systems. Before v3.18, is not possible
to entirely get rid of the OS jitter, but you can
@@ -274,7 +270,7 @@ To reduce its OS jitter, do any of the following:
(based on an earlier one from Gilad Ben-Yossef) that
reduces or even eliminates vmstat overhead for some
workloads at https://lore.kernel.org/r/00000140e9dfd6bd-40db3d4f-c1be-434f-8132-7820f81bb586-000000@email.amazonses.com.
- e. If running on high-end powerpc servers, build with
+ d. If running on high-end powerpc servers, build with
CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS
daemon from running on each CPU every second or so.
(This will require editing Kconfig files and will defeat
@@ -282,12 +278,12 @@ To reduce its OS jitter, do any of the following:
due to the rtas_event_scan() function.
WARNING: Please check your CPU specifications to
make sure that this is safe on your particular system.
- f. If running on Cell Processor, build your kernel with
+ e. If running on Cell Processor, build your kernel with
CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from
spu_gov_work().
WARNING: Please check your CPU specifications to
make sure that this is safe on your particular system.
- g. If running on PowerMAC, build your kernel with
+ f. If running on PowerMAC, build your kernel with
CONFIG_PMAC_RACKMETER=n to disable the CPU-meter,
avoiding OS jitter from rackmeter_do_timer().
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 396091651955..7250c0542828 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -206,6 +206,11 @@ Will increase power usage.
Default: 0 (off)
+mem_pcpu_rsv
+------------
+
+Per-cpu reserved forward alloc cache size in page units. Default 1MB per CPU.
+
rmem_default
------------
diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst
index 92a8a07f5c43..f92551539e8a 100644
--- a/Documentation/admin-guide/tainted-kernels.rst
+++ b/Documentation/admin-guide/tainted-kernels.rst
@@ -34,7 +34,7 @@ name of the command ('Comm:') that triggered the event::
You'll find a 'Not tainted: ' there if the kernel was not tainted at the
time of the event; if it was, then it will print 'Tainted: ' and characters
-either letters or blanks. In above example it looks like this::
+either letters or blanks. In the example above it looks like this::
Tainted: P W O
@@ -52,7 +52,7 @@ At runtime, you can query the tainted state by reading
tainted; any other number indicates the reasons why it is. The easiest way to
decode that number is the script ``tools/debugging/kernel-chktaint``, which your
distribution might ship as part of a package called ``linux-tools`` or
-``kernel-tools``; if it doesn't you can download the script from
+``kernel-tools``; if it doesn't, you can download the script from
`git.kernel.org <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/debugging/kernel-chktaint>`_
and execute it with ``sh kernel-chktaint``, which would print something like
this on the machine that had the statements in the logs that were quoted earlier::
diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
new file mode 100644
index 000000000000..58211840ac6f
--- /dev/null
+++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
@@ -0,0 +1,1952 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
+
+=========================================
+How to verify bugs and bisect regressions
+=========================================
+
+This document describes how to check if some Linux kernel problem occurs in code
+currently supported by developers -- to then explain how to locate the change
+causing the issue, if it is a regression (e.g. did not happen with earlier
+versions).
+
+The text aims at people running kernels from mainstream Linux distributions on
+commodity hardware who want to report a kernel bug to the upstream Linux
+developers. Despite this intent, the instructions work just as well for users
+who are already familiar with building their own kernels: they help avoid
+mistakes occasionally made even by experienced developers.
+
+..
+ Note: if you see this note, you are reading the text's source file. You
+ might want to switch to a rendered version: it makes it a lot easier to
+ read and navigate this document -- especially when you want to look something
+ up in the reference section, then jump back to where you left off.
+..
+ Find the latest rendered version of this text here:
+ https://docs.kernel.org/admin-guide/verify-bugs-and-bisect-regressions.rst.html
+
+The essence of the process (aka 'TL;DR')
+========================================
+
+*[If you are new to building or bisecting Linux, ignore this section and head
+over to the* ":ref:`step-by-step guide<introguide_bissbs>`" *below. It utilizes
+the same commands as this section while describing them in brief fashion. The
+steps are nevertheless easy to follow and together with accompanying entries
+in a reference section mention many alternatives, pitfalls, and additional
+aspects, all of which might be essential in your present case.]*
+
+**In case you want to check if a bug is present in code currently supported by
+developers**, execute just the *preparations* and *segment 1*; while doing so,
+consider the newest Linux kernel you regularly use to be the 'working' kernel.
+In the following example that's assumed to be 6.0.13, which is why the sources
+of v6.0 will be used to prepare the .config file.
+
+**In case you face a regression**, follow the steps at least till the end of
+*segment 2*. Then you can submit a preliminary report -- or continue with
+*segment 3*, which describes how to perform a bisection needed for a
+full-fledged regression report. In the following example 6.0.13 is assumed to be
+the 'working' kernel and 6.1.5 to be the first 'broken', which is why v6.0
+will be considered the 'good' release and used to prepare the .config file.
+
+* **Preparations**: set up everything to build your own kernels::
+
+ # * Remove any software that depends on externally maintained kernel modules
+ # or builds any automatically during bootup.
+ # * Ensure Secure Boot permits booting self-compiled Linux kernels.
+ # * If you are not already running the 'working' kernel, reboot into it.
+ # * Install compilers and everything else needed for building Linux.
+ # * Ensure to have 15 Gigabyte free space in your home directory.
+ git clone -o mainline --no-checkout \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git ~/linux/
+ cd ~/linux/
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+ git checkout --detach v6.0
+ # * Hint: if you used an existing clone, ensure no stale .config is around.
+ make olddefconfig
+ # * Ensure the former command picked the .config of the 'working' kernel.
+ # * Connect external hardware (USB keys, tokens, ...), start a VM, bring up
+ # VPNs, mount network shares, and briefly try the feature that is broken.
+ yes '' | make localmodconfig
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local'
+ ./scripts/config -e CONFIG_LOCALVERSION_AUTO
+ # * Note, when short on storage space, check the guide for an alternative:
+ ./scripts/config -d DEBUG_INFO_NONE -e KALLSYMS_ALL -e DEBUG_KERNEL \
+ -e DEBUG_INFO -e DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT -e KALLSYMS
+ # * Hint: at this point you might want to adjust the build configuration;
+ # you'll have to, if you are running Debian.
+ make olddefconfig
+ cp .config ~/kernel-config-working
+
+* **Segment 1**: build a kernel from the latest mainline codebase.
+
+ This among others checks if the problem was fixed already and which developers
+ later need to be told about the problem; in case of a regression, this rules
+ out a .config change as root of the problem.
+
+ a) Checking out latest mainline code::
+
+ cd ~/linux/
+ git checkout --force --detach mainline/master
+
+ b) Build, install, and boot a kernel::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Make sure there is enough disk space to hold another kernel:
+ df -h /boot/ /lib/modules/
+ # * Note: on Arch Linux, its derivatives and a few other distributions
+ # the following commands will do nothing at all or only part of the
+ # job. See the step-by-step guide for further details.
+ command -v installkernel && sudo make modules_install install
+ # * Check how much space your self-built kernel actually needs, which
+ # enables you to make better estimates later:
+ du -ch /boot/*$(make -s kernelrelease)* | tail -n 1
+ du -sh /lib/modules/$(make -s kernelrelease)/
+ # * Hint: the output of the following command will help you pick the
+ # right kernel from the boot menu:
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+ # * Once booted, ensure you are running the kernel you just built by
+ # checking if the output of the next two commands matches:
+ tail -n 1 ~/kernels-built
+ uname -r
+
+ c) Check if the problem occurs with this kernel as well.
+
+* **Segment 2**: ensure the 'good' kernel is also a 'working' kernel.
+
+ This among others verifies the trimmed .config file actually works well, as
+ bisecting with it otherwise would be a waste of time:
+
+ a) Start by checking out the sources of the 'good' version::
+
+ cd ~/linux/
+ git checkout --force --detach v6.0
+
+ b) Build, install, and boot a kernel as described earlier in *segment 1,
+ section b* -- just feel free to skip the 'du' commands, as you have a rough
+ estimate already.
+
+ c) Ensure the feature that regressed with the 'broken' kernel actually works
+ with this one.
+
+* **Segment 3**: perform and validate the bisection.
+
+ a) In case your 'broken' version is a stable/longterm release, add the Git
+ branch holding it::
+
+ git remote set-branches --add stable linux-6.1.y
+ git fetch stable
+
+ b) Initialize the bisection::
+
+ cd ~/linux/
+ git bisect start
+ git bisect good v6.0
+ git bisect bad v6.1.5
+
+ c) Build, install, and boot a kernel as described earlier in *segment 1,
+ section b*.
+
+ In case building or booting the kernel fails for unrelated reasons, run
+ ``git bisect skip``. In all other outcomes, check if the regressed feature
+ works with the newly built kernel. If it does, tell Git by executing
+ ``git bisect good``; if it does not, run ``git bisect bad`` instead.
+
+ All three commands will make Git checkout another commit; then re-execute
+ this step (e.g. build, install, boot, and test a kernel to then tell Git
+ the outcome). Do so again and again until Git shows which commit broke
+ things. If you run short of disk space during this process, check the
+ "Supplementary tasks" section below.
+
+ d) Once your finished the bisection, put a few things away::
+
+ cd ~/linux/
+ git bisect log > ~/bisect-log
+ cp .config ~/bisection-config-culprit
+ git bisect reset
+
+ e) Try to verify the bisection result::
+
+ git checkout --force --detach mainline/master
+ git revert --no-edit cafec0cacaca0
+
+ This is optional, as some commits are impossible to revert. But if the
+ second command worked flawlessly, build, install, and boot one more kernel
+ kernel, which should not show the regression.
+
+* **Supplementary tasks**: cleanup during and after the process.
+
+ a) To avoid running out of disk space during a bisection, you might need to
+ remove some kernels you built earlier. You most likely want to keep those
+ you built during segment 1 and 2 around for a while, but you will most
+ likely no longer need kernels tested during the actual bisection
+ (Segment 3 c). You can list them in build order using::
+
+ ls -ltr /lib/modules/*-local*
+
+ To then for example erase a kernel that identifies itself as
+ '6.0-rc1-local-gcafec0cacaca0', use this::
+
+ sudo rm -rf /lib/modules/6.0-rc1-local-gcafec0cacaca0
+ sudo kernel-install -v remove 6.0-rc1-local-gcafec0cacaca0
+ # * Note, on some distributions kernel-install is missing
+ # or does only part of the job.
+
+ b) If you performed a bisection and successfully validated the result, feel
+ free to remove all kernels built during the actual bisection (Segment 3 c);
+ the kernels you built earlier and later you might want to keep around for
+ a week or two.
+
+.. _introguide_bissbs:
+
+Step-by-step guide on how to verify bugs and bisect regressions
+===============================================================
+
+This guide describes how to set up your own Linux kernels for investigating bugs
+or regressions you intent to report. How far you want to follow the instructions
+depends on your issue:
+
+Execute all steps till the end of *segment 1* to **verify if your kernel problem
+is present in code supported by Linux kernel developers**. If it is, you are all
+set to report the bug -- unless it did not happen with earlier kernel versions,
+as then your want to at least continue with *segment 2* to **check if the issue
+qualifies as regression** which receive priority treatment. Depending on the
+outcome you then are ready to report a bug or submit a preliminary regression
+report; instead of the latter your could also head straight on and follow
+*segment 3* to **perform a bisection** for a full-fledged regression report
+developers are obliged to act upon.
+
+ :ref:`Preparations: set up everything to build your own kernels.<introprep_bissbs>`
+
+ :ref:`Segment 1: try to reproduce the problem with the latest codebase.<introlatestcheck_bissbs>`
+
+ :ref:`Segment 2: check if the kernels you build work fine.<introworkingcheck_bissbs>`
+
+ :ref:`Segment 3: perform a bisection and validate the result.<introbisect_bissbs>`
+
+ :ref:`Supplementary tasks: cleanup during and after following this guide.<introclosure_bissbs>`
+
+The steps in each segment illustrate the important aspects of the process, while
+a comprehensive reference section holds additional details. The latter sometimes
+also outlines alternative approaches, pitfalls, as well as problems that might
+occur at the particular step -- and how to get things rolling again.
+
+For further details on how to report Linux kernel issues or regressions check
+out Documentation/admin-guide/reporting-issues.rst, which works in conjunction
+with this document. It among others explains why you need to verify bugs with
+the latest 'mainline' kernel, even if you face a problem with a kernel from a
+'stable/longterm' series; for users facing a regression it also explains that
+sending a preliminary report after finishing segment 2 might be wise, as the
+regression and its culprit might be known already. For further details on
+what actually qualifies as a regression check out
+Documentation/admin-guide/reporting-regressions.rst.
+
+.. _introprep_bissbs:
+
+Preparations: set up everything to build your own kernels
+---------------------------------------------------------
+
+.. _backup_bissbs:
+
+* Create a fresh backup and put system repair and restore tools at hand, just
+ to be prepared for the unlikely case of something going sideways.
+
+ [:ref:`details<backup_bisref>`]
+
+.. _vanilla_bissbs:
+
+* Remove all software that depends on externally developed kernel drivers or
+ builds them automatically. That includes but is not limited to DKMS, openZFS,
+ VirtualBox, and Nvidia's graphics drivers (including the GPLed kernel module).
+
+ [:ref:`details<vanilla_bisref>`]
+
+.. _secureboot_bissbs:
+
+* On platforms with 'Secure Boot' or similar solutions, prepare everything to
+ ensure the system will permit your self-compiled kernel to boot. The
+ quickest and easiest way to achieve this on commodity x86 systems is to
+ disable such techniques in the BIOS setup utility; alternatively, remove
+ their restrictions through a process initiated by
+ ``mokutil --disable-validation``.
+
+ [:ref:`details<secureboot_bisref>`]
+
+.. _rangecheck_bissbs:
+
+* Determine the kernel versions considered 'good' and 'bad' throughout this
+ guide.
+
+ Do you follow this guide to verify if a bug is present in the code developers
+ care for? Then consider the mainline release your 'working' kernel (the newest
+ one you regularly use) is based on to be the 'good' version; if your 'working'
+ kernel for example is '6.0.11', then your 'good' kernel is 'v6.0'.
+
+ In case you face a regression, it depends on the version range where the
+ regression was introduced:
+
+ * Something which used to work in Linux 6.0 broke when switching to Linux
+ 6.1-rc1? Then henceforth regard 'v6.0' as the last known 'good' version
+ and 'v6.1-rc1' as the first 'bad' one.
+
+ * Some function stopped working when updating from 6.0.11 to 6.1.4? Then for
+ the time being consider 'v6.0' as the last 'good' version and 'v6.1.4' as
+ the 'bad' one. Note, at this point it is merely assumed that 6.0 is fine;
+ this assumption will be checked in segment 2.
+
+ * A feature you used in 6.0.11 does not work at all or worse in 6.1.13? In
+ that case you want to bisect within a stable/longterm series: consider
+ 'v6.0.11' as the last known 'good' version and 'v6.0.13' as the first 'bad'
+ one. Note, in this case you still want to compile and test a mainline kernel
+ as explained in segment 1: the outcome will determine if you need to report
+ your issue to the regular developers or the stable team.
+
+ *Note, do not confuse 'good' version with 'working' kernel; the latter term
+ throughout this guide will refer to the last kernel that has been working
+ fine.*
+
+ [:ref:`details<rangecheck_bisref>`]
+
+.. _bootworking_bissbs:
+
+* Boot into the 'working' kernel and briefly use the apparently broken feature.
+
+ [:ref:`details<bootworking_bisref>`]
+
+.. _diskspace_bissbs:
+
+* Ensure to have enough free space for building Linux. 15 Gigabyte in your home
+ directory should typically suffice. If you have less available, be sure to pay
+ attention to later steps about retrieving the Linux sources and handling of
+ debug symbols: both explain approaches reducing the amount of space, which
+ should allow you to master these tasks with about 4 Gigabytes free space.
+
+ [:ref:`details<diskspace_bisref>`]
+
+.. _buildrequires_bissbs:
+
+* Install all software required to build a Linux kernel. Often you will need:
+ 'bc', 'binutils' ('ld' et al.), 'bison', 'flex', 'gcc', 'git', 'openssl',
+ 'pahole', 'perl', and the development headers for 'libelf' and 'openssl'. The
+ reference section shows how to quickly install those on various popular Linux
+ distributions.
+
+ [:ref:`details<buildrequires_bisref>`]
+
+.. _sources_bissbs:
+
+* Retrieve the mainline Linux sources; then change into the directory holding
+ them, as all further commands in this guide are meant to be executed from
+ there.
+
+ *Note, the following describe how to retrieve the sources using a full
+ mainline clone, which downloads about 2,75 GByte as of early 2024. The*
+ :ref:`reference section describes two alternatives <sources_bisref>` *:
+ one downloads less than 500 MByte, the other works better with unreliable
+ internet connections.*
+
+ Execute the following command to retrieve a fresh mainline codebase while
+ preparing things to add stable/longterm branches later::
+
+ git clone -o mainline --no-checkout \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git ~/linux/
+ cd ~/linux/
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+
+ [:ref:`details<sources_bisref>`]
+
+.. _oldconfig_bissbs:
+
+* Start preparing a kernel build configuration (the '.config' file).
+
+ Before doing so, ensure you are still running the 'working' kernel an earlier
+ step told you to boot; if you are unsure, check the current kernel release
+ identifier using ``uname -r``.
+
+ Afterwards check out the source code for the version earlier established as
+ 'good' (in this example this is assumed to be 6.0) and create a .config file::
+
+ git checkout --detach v6.0
+ make olddefconfig
+
+ The second command will try to locate the build configuration file for the
+ running kernel and then adjust it for the needs of the kernel sources you
+ checked out. While doing so, it will print a few lines you need to check.
+
+ Look out for a line starting with '# using defaults found in'. It should be
+ followed by a path to a file in '/boot/' that contains the release identifier
+ of your currently working kernel. If the line instead continues with something
+ like 'arch/x86/configs/x86_64_defconfig', then the build infra failed to find
+ the .config file for your running kernel -- in which case you have to put one
+ there manually, as explained in the reference section.
+
+ In case you can not find such a line, look for one containing '# configuration
+ written to .config'. If that's the case you have a stale build configuration
+ lying around. Unless you intend to use it, delete it; afterwards run
+ 'make olddefconfig' again and check if it now picked up the right config file
+ as base.
+
+ [:ref:`details<oldconfig_bisref>`]
+
+.. _localmodconfig_bissbs:
+
+* Disable any kernel modules apparently superfluous for your setup. This is
+ optional, but especially wise for bisections, as it speeds up the build
+ process enormously -- at least unless the .config file picked up in the
+ previous step was already tailored to your and your hardware needs, in which
+ case you should skip this step.
+
+ To prepare the trimming, connect external hardware you occasionally use (USB
+ keys, tokens, ...), quickly start a VM, and bring up VPNs. And if you rebooted
+ since you started that guide, ensure that you tried using the feature causing
+ trouble since you started the system. Only then trim your .config::
+
+ yes '' | make localmodconfig
+
+ There is a catch to this, as the 'apparently' in initial sentence of this step
+ and the preparation instructions already hinted at:
+
+ The 'localmodconfig' target easily disables kernel modules for features only
+ used occasionally -- like modules for external peripherals not yet connected
+ since booting, virtualization software not yet utilized, VPN tunnels, and a
+ few other things. That's because some tasks rely on kernel modules Linux only
+ loads when you execute tasks like the aforementioned ones for the first time.
+
+ This drawback of localmodconfig is nothing you should lose sleep over, but
+ something to keep in mind: if something is misbehaving with the kernels built
+ during this guide, this is most likely the reason. You can reduce or nearly
+ eliminate the risk with tricks outlined in the reference section; but when
+ building a kernel just for quick testing purposes this is usually not worth
+ spending much effort on, as long as it boots and allows to properly test the
+ feature that causes trouble.
+
+ [:ref:`details<localmodconfig_bisref>`]
+
+.. _tagging_bissbs:
+
+* Ensure all the kernels you will build are clearly identifiable using a special
+ tag and a unique version number::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local'
+ ./scripts/config -e CONFIG_LOCALVERSION_AUTO
+
+ [:ref:`details<tagging_bisref>`]
+
+.. _debugsymbols_bissbs:
+
+* Decide how to handle debug symbols.
+
+ In the context of this document it is often wise to enable them, as there is a
+ decent chance you will need to decode a stack trace from a 'panic', 'Oops',
+ 'warning', or 'BUG'::
+
+ ./scripts/config -d DEBUG_INFO_NONE -e KALLSYMS_ALL -e DEBUG_KERNEL \
+ -e DEBUG_INFO -e DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT -e KALLSYMS
+
+ But if you are extremely short on storage space, you might want to disable
+ debug symbols instead::
+
+ ./scripts/config -d DEBUG_INFO -d DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT \
+ -d DEBUG_INFO_DWARF4 -d DEBUG_INFO_DWARF5 -e CONFIG_DEBUG_INFO_NONE
+
+ [:ref:`details<debugsymbols_bisref>`]
+
+.. _configmods_bissbs:
+
+* Check if you may want or need to adjust some other kernel configuration
+ options:
+
+ * Are you running Debian? Then you want to avoid known problems by performing
+ additional adjustments explained in the reference section.
+
+ [:ref:`details<configmods_distros_bisref>`].
+
+ * If you want to influence other aspects of the configuration, do so now using
+ your preferred tool. Note, to use make targets like 'menuconfig' or
+ 'nconfig', you will need to install the development files of ncurses; for
+ 'xconfig' you likewise need the Qt5 or Qt6 headers.
+
+ [:ref:`details<configmods_individual_bisref>`].
+
+.. _saveconfig_bissbs:
+
+* Reprocess the .config after the latest adjustments and store it in a safe
+ place::
+
+ make olddefconfig
+ cp .config ~/kernel-config-working
+
+ [:ref:`details<saveconfig_bisref>`]
+
+.. _introlatestcheck_bissbs:
+
+Segment 1: try to reproduce the problem with the latest codebase
+----------------------------------------------------------------
+
+The following steps verify if the problem occurs with the code currently
+supported by developers. In case you face a regression, it also checks that the
+problem is not caused by some .config change, as reporting the issue then would
+be a waste of time. [:ref:`details<introlatestcheck_bisref>`]
+
+.. _checkoutmaster_bissbs:
+
+* Check out the latest Linux codebase::
+
+ cd ~/linux/
+ git checkout --force --detach mainline/master
+
+ [:ref:`details<checkoutmaster_bisref>`]
+
+.. _build_bissbs:
+
+* Build the image and the modules of your first kernel using the config file you
+ prepared::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+
+ If you want your kernel packaged up as deb, rpm, or tar file, see the
+ reference section for alternatives, which obviously will require other
+ steps to install as well.
+
+ [:ref:`details<build_bisref>`]
+
+.. _install_bissbs:
+
+* Install your newly built kernel.
+
+ Before doing so, consider checking if there is still enough room for it::
+
+ df -h /boot/ /lib/modules/
+
+ 150 MByte in /boot/ and 200 in /lib/modules/ usually suffice. Those are rough
+ estimates assuming the worst case. How much your kernels actually require will
+ be determined later.
+
+ Now install the kernel, which will be saved in parallel to the kernels from
+ your Linux distribution::
+
+ command -v installkernel && sudo make modules_install install
+
+ On many commodity Linux distributions this will take care of everything
+ required to boot your kernel. You might want to ensure that's the case by
+ checking if your boot loader's configuration was updated; furthermore ensure
+ an initramfs (also known as initrd) exists, which on many distributions can be
+ achieved by running ``ls -l /boot/init*$(make -s kernelrelease)*``. Those
+ steps are recommended, as there are quite a few Linux distribution where above
+ command is insufficient:
+
+ * On Arch Linux, its derivatives, many immutable Linux distributions, and a
+ few others the above command does nothing at, as they lack 'installkernel'
+ executable.
+
+ * Some distributions install the kernel, but don't add an entry for your
+ kernel in your boot loader's configuration -- the kernel thus won't show up
+ in the boot menu.
+
+ * Some distributions add a boot loader menu entry, but don't create an
+ initramfs on installation -- in that case your kernel most likely will be
+ unable to mount the root partition during bootup.
+
+ If any of that applies to you, see the reference section for further guidance.
+ Once you figured out what to do, consider writing down the necessary
+ installation steps: if you will build more kernels as described in
+ segment 2 and 3, you will have to execute these commands every time that
+ ``command -v installkernel [...]`` comes up again.
+
+ [:ref:`details<install_bisref>`]
+
+.. _storagespace_bissbs:
+
+* In case you plan to follow this guide further, check how much storage space
+ the kernel, its modules, and other related files like the initramfs consume::
+
+ du -ch /boot/*$(make -s kernelrelease)* | tail -n 1
+ du -sh /lib/modules/$(make -s kernelrelease)/
+
+ Write down or remember those two values for later: they enable you to prevent
+ running out of disk space accidentally during a bisection.
+
+ [:ref:`details<storagespace_bisref>`]
+
+.. _kernelrelease_bissbs:
+
+* Show and store the kernelrelease identifier of the kernel you just built::
+
+ make -s kernelrelease | tee -a ~/kernels-built
+
+ Remember the identifier momentarily, as it will help you pick the right kernel
+ from the boot menu upon restarting.
+
+.. _recheckbroken_bissbs:
+
+* Reboot into the kernel you just built and check if the feature that is
+ expected to be broken really is.
+
+ Start by making sure the kernel you booted is the one you just built. When
+ unsure, check if the output of these commands show the exact same release
+ identifier::
+
+ tail -n 1 ~/kernels-built
+ uname -r
+
+ Now verify if the feature that causes trouble works with your newly built
+ kernel. If things work while investigating a regression, check the reference
+ section for further details.
+
+ [:ref:`details<recheckbroken_bisref>`]
+
+.. _recheckstablebroken_bissbs:
+
+* Are you facing a problem within a stable/longterm release, but failed to
+ reproduce it with the mainline kernel you just built? Then check if the latest
+ codebase for the particular series might already fix the problem. To do so,
+ add the stable series Git branch for your 'good' kernel (again, this here is
+ assumed to be 6.0) and check out the latest version::
+
+ cd ~/linux/
+ git remote set-branches --add stable linux-6.0.y
+ git fetch stable
+ git checkout --force --detach linux-6.0.y
+
+ Now use the checked out code to build and install another kernel using the
+ commands the earlier steps already described in more detail::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ command -v installkernel && sudo make modules_install install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ Now verify if you booted the kernel you intended to start, to then check if
+ everything works fine with this kernel::
+
+ tail -n 1 ~/kernels-built
+ uname -r
+
+ [:ref:`details<recheckstablebroken_bisref>`]
+
+Do you follow this guide to verify if a problem is present in the code
+currently supported by Linux kernel developers? Then you are done at this
+point. If you later want to remove the kernel you just built, check out
+:ref:`Supplementary tasks: cleanup during and after following this guide.<introclosure_bissbs>`.
+
+In case you face a regression, move on and execute at least the next segment
+as well.
+
+.. _introworkingcheck_bissbs:
+
+Segment 2: check if the kernels you build work fine
+---------------------------------------------------
+
+In case of a regression, you now want to ensure the trimmed configuration file
+you created earlier works as expected; a bisection with the .config file
+otherwise would be a waste of time. [:ref:`details<introworkingcheck_bisref>`]
+
+.. _recheckworking_bissbs:
+
+* Build your own variant of the 'working' kernel and check if the feature that
+ regressed works as expected with it.
+
+ Start by checking out the sources for the version earlier established as
+ 'good' (once again assumed to be 6.0 here)::
+
+ cd ~/linux/
+ git checkout --detach v6.0
+
+ Now use the checked out code to configure, build, and install another kernel
+ using the commands the previous subsection explained in more detail::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ command -v installkernel && sudo make modules_install install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ When the system booted, you may want to verify once again that the
+ kernel you started is the one you just built:
+
+ tail -n 1 ~/kernels-built
+ uname -r
+
+ Now check if this kernel works as expected; if not, consult the reference
+ section for further instructions.
+
+ [:ref:`details<recheckworking_bisref>`]
+
+.. _introbisect_bissbs:
+
+Segment 3: perform the bisection and validate the result
+--------------------------------------------------------
+
+With all the preparations and precaution builds taken care of, you are now ready
+to begin the bisection. This will make you build quite a few kernels -- usually
+about 15 in case you encountered a regression when updating to a newer series
+(say from 6.0.11 to 6.1.3). But do not worry, due to the trimmed build
+configuration created earlier this works a lot faster than many people assume:
+overall on average it will often just take about 10 to 15 minutes to compile
+each kernel on commodity x86 machines.
+
+* In case your 'bad' version is a stable/longterm release (say v6.1.5), add its
+ stable branch, unless you already did so earlier::
+
+ cd ~/linux/
+ git remote set-branches --add stable linux-6.1.y
+ git fetch stable
+
+.. _bisectstart_bissbs:
+
+* Start the bisection and tell Git about the versions earlier established as
+ 'good' (6.0 in the following example command) and 'bad' (6.1.5)::
+
+ cd ~/linux/
+ git bisect start
+ git bisect good v6.0
+ git bisect bad v6.1.5
+
+ [:ref:`details<bisectstart_bisref>`]
+
+.. _bisectbuild_bissbs:
+
+* Now use the code Git checked out to build, install, and boot a kernel using
+ the commands introduced earlier::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ command -v installkernel && sudo make modules_install install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ If compilation fails for some reason, run ``git bisect skip`` and restart
+ executing the stack of commands from the beginning.
+
+ In case you skipped the "test latest codebase" step in the guide, check its
+ description as for why the 'df [...]' and 'make -s kernelrelease [...]'
+ commands are here.
+
+ Important note: the latter command from this point on will print release
+ identifiers that might look odd or wrong to you -- which they are not, as it's
+ totally normal to see release identifiers like '6.0-rc1-local-gcafec0cacaca0'
+ if you bisect between versions 6.1 and 6.2 for example.
+
+ [:ref:`details<bisectbuild_bisref>`]
+
+.. _bisecttest_bissbs:
+
+* Now check if the feature that regressed works in the kernel you just built.
+
+ You again might want to start by making sure the kernel you booted is the one
+ you just built::
+
+ cd ~/linux/
+ tail -n 1 ~/kernels-built
+ uname -r
+
+ Now verify if the feature that regressed works at this kernel bisection point.
+ If it does, run this::
+
+ git bisect good
+
+ If it does not, run this::
+
+ git bisect bad
+
+ Be sure about what you tell Git, as getting this wrong just once will send the
+ rest of the bisection totally off course.
+
+ While the bisection is ongoing, Git will use the information you provided to
+ find and check out another bisection point for you to test. While doing so, it
+ will print something like 'Bisecting: 675 revisions left to test after this
+ (roughly 10 steps)' to indicate how many further changes it expects to be
+ tested. Now build and install another kernel using the instructions from the
+ previous step; afterwards follow the instructions in this step again.
+
+ Repeat this again and again until you finish the bisection -- that's the case
+ when Git after tagging a change as 'good' or 'bad' prints something like
+ 'cafecaca0c0dacafecaca0c0dacafecaca0c0da is the first bad commit'; right
+ afterwards it will show some details about the culprit including the patch
+ description of the change. The latter might fill your terminal screen, so you
+ might need to scroll up to see the message mentioning the culprit;
+ alternatively, run ``git bisect log > ~/bisection-log``.
+
+ [:ref:`details<bisecttest_bisref>`]
+
+.. _bisectlog_bissbs:
+
+* Store Git's bisection log and the current .config file in a safe place before
+ telling Git to reset the sources to the state before the bisection::
+
+ cd ~/linux/
+ git bisect log > ~/bisection-log
+ cp .config ~/bisection-config-culprit
+ git bisect reset
+
+ [:ref:`details<bisectlog_bisref>`]
+
+.. _revert_bissbs:
+
+* Try reverting the culprit on top of latest mainline to see if this fixes your
+ regression.
+
+ This is optional, as it might be impossible or hard to realize. The former is
+ the case, if the bisection determined a merge commit as the culprit; the
+ latter happens if other changes depend on the culprit. But if the revert
+ succeeds, it is worth building another kernel, as it validates the result of
+ a bisection, which can easily deroute; it furthermore will let kernel
+ developers know, if they can resolve the regression with a quick revert.
+
+ Begin by checking out the latest codebase depending on the range you bisected:
+
+ * Did you face a regression within a stable/longterm series (say between
+ 6.0.11 and 6.0.13) that does not happen in mainline? Then check out the
+ latest codebase for the affected series like this::
+
+ git fetch stable
+ git checkout --force --detach linux-6.0.y
+
+ * In all other cases check out latest mainline::
+
+ git fetch mainline
+ git checkout --force --detach mainline/master
+
+ If you bisected a regression within a stable/longterm series that also
+ happens in mainline, there is one more thing to do: look up the mainline
+ commit-id. To do so, use a command like ``git show abcdcafecabcd`` to
+ view the patch description of the culprit. There will be a line near
+ the top which looks like 'commit cafec0cacaca0 upstream.' or
+ 'Upstream commit cafec0cacaca0'; use that commit-id in the next command
+ and not the one the bisection blamed.
+
+ Now try reverting the culprit by specifying its commit id::
+
+ git revert --no-edit cafec0cacaca0
+
+ If that fails, give up trying and move on to the next step. But if it works,
+ build a kernel again using the familiar command sequence::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig &&
+ make -j $(nproc --all) &&
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ command -v installkernel && sudo make modules_install install
+ Make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ Now check one last time if the feature that made you perform a bisection work
+ with that kernel.
+
+ [:ref:`details<revert_bisref>`]
+
+.. _introclosure_bissbs:
+
+Supplementary tasks: cleanup during and after the bisection
+-----------------------------------------------------------
+
+During and after following this guide you might want or need to remove some of
+the kernels you installed: the boot menu otherwise will become confusing or
+space might run out.
+
+.. _makeroom_bissbs:
+
+* To remove one of the kernels you installed, look up its 'kernelrelease'
+ identifier. This guide stores them in '~/kernels-built', but the following
+ command will print them as well::
+
+ ls -ltr /lib/modules/*-local*
+
+ You in most situations want to remove the oldest kernels built during the
+ actual bisection (e.g. segment 3 of this guide). The two ones you created
+ beforehand (e.g. to test the latest codebase and the version considered
+ 'good') might become handy to verify something later -- thus better keep them
+ around, unless you are really short on storage space.
+
+ To remove the modules of a kernel with the kernelrelease identifier
+ '*6.0-rc1-local-gcafec0cacaca0*', start by removing the directory holding its
+ modules::
+
+ sudo rm -rf /lib/modules/6.0-rc1-local-gcafec0cacaca0
+
+ Afterwards try the following command::
+
+ sudo kernel-install -v remove 6.0-rc1-local-gcafec0cacaca0
+
+ On quite a few distributions this will delete all other kernel files installed
+ while also removing the kernel's entry from the boot menu. But on some
+ distributions kernel-install does not exist or leaves boot-loader entries or
+ kernel image and related files behind; in that case remove them as described
+ in the reference section.
+
+ [:ref:`details<makeroom_bisref>`]
+
+.. _finishingtouch_bissbs:
+
+* Once you have finished the bisection, do not immediately remove anything you
+ set up, as you might need a few things again. What is safe to remove depends
+ on the outcome of the bisection:
+
+ * Could you initially reproduce the regression with the latest codebase and
+ after the bisection were able to fix the problem by reverting the culprit on
+ top of the latest codebase? Then you want to keep those two kernels around
+ for a while, but safely remove all others with a '-local' in the release
+ identifier.
+
+ * Did the bisection end on a merge-commit or seems questionable for other
+ reasons? Then you want to keep as many kernels as possible around for a few
+ days: it's pretty likely that you will be asked to recheck something.
+
+ * In other cases it likely is a good idea to keep the following kernels around
+ for some time: the one built from the latest codebase, the one created from
+ the version considered 'good', and the last three or four you compiled
+ during the actual bisection process.
+
+ [:ref:`details<finishingtouch_bisref>`]
+
+.. _submit_improvements:
+
+This concludes the step-by-step guide.
+
+Did you run into trouble following any of the above steps not cleared up by the
+reference section below? Did you spot errors? Or do you have ideas how to
+improve the guide? Then please take a moment and let the maintainer of this
+document know by email (Thorsten Leemhuis <linux@leemhuis.info>), ideally while
+CCing the Linux docs mailing list (linux-doc@vger.kernel.org). Such feedback is
+vital to improve this document further, which is in everybody's interest, as it
+will enable more people to master the task described here -- and hopefully also
+improve similar guides inspired by this one.
+
+
+Reference section for the step-by-step guide
+============================================
+
+This section holds additional information for almost all the items in the above
+step-by-step guide.
+
+.. _backup_bisref:
+
+Prepare for emergencies
+-----------------------
+
+ *Create a fresh backup and put system repair and restore tools at hand.*
+ [:ref:`... <backup_bissbs>`]
+
+Remember, you are dealing with computers, which sometimes do unexpected things
+-- especially if you fiddle with crucial parts like the kernel of an operating
+system. That's what you are about to do in this process. Hence, better prepare
+for something going sideways, even if that should not happen.
+
+[:ref:`back to step-by-step guide <backup_bissbs>`]
+
+.. _vanilla_bisref:
+
+Remove anything related to externally maintained kernel modules
+---------------------------------------------------------------
+
+ *Remove all software that depends on externally developed kernel drivers or
+ builds them automatically.* [:ref:`...<vanilla_bissbs>`]
+
+Externally developed kernel modules can easily cause trouble during a bisection.
+
+But there is a more important reason why this guide contains this step: most
+kernel developers will not care about reports about regressions occurring with
+kernels that utilize such modules. That's because such kernels are not
+considered 'vanilla' anymore, as Documentation/admin-guide/reporting-issues.rst
+explains in more detail.
+
+[:ref:`back to step-by-step guide <vanilla_bissbs>`]
+
+.. _secureboot_bisref:
+
+Deal with techniques like Secure Boot
+-------------------------------------
+
+ *On platforms with 'Secure Boot' or similar techniques, prepare everything to
+ ensure the system will permit your self-compiled kernel to boot later.*
+ [:ref:`... <secureboot_bissbs>`]
+
+Many modern systems allow only certain operating systems to start; that's why
+they reject booting self-compiled kernels by default.
+
+You ideally deal with this by making your platform trust your self-built kernels
+with the help of a certificate. How to do that is not described
+here, as it requires various steps that would take the text too far away from
+its purpose; 'Documentation/admin-guide/module-signing.rst' and various web
+sides already explain everything needed in more detail.
+
+Temporarily disabling solutions like Secure Boot is another way to make your own
+Linux boot. On commodity x86 systems it is possible to do this in the BIOS Setup
+utility; the required steps vary a lot between machines and therefore cannot be
+described here.
+
+On mainstream x86 Linux distributions there is a third and universal option:
+disable all Secure Boot restrictions for your Linux environment. You can
+initiate this process by running ``mokutil --disable-validation``; this will
+tell you to create a one-time password, which is safe to write down. Now
+restart; right after your BIOS performed all self-tests the bootloader Shim will
+show a blue box with a message 'Press any key to perform MOK management'. Hit
+some key before the countdown exposes, which will open a menu. Choose 'Change
+Secure Boot state'. Shim's 'MokManager' will now ask you to enter three
+randomly chosen characters from the one-time password specified earlier. Once
+you provided them, confirm you really want to disable the validation.
+Afterwards, permit MokManager to reboot the machine.
+
+[:ref:`back to step-by-step guide <secureboot_bissbs>`]
+
+.. _bootworking_bisref:
+
+Boot the last kernel that was working
+-------------------------------------
+
+ *Boot into the last working kernel and briefly recheck if the feature that
+ regressed really works.* [:ref:`...<bootworking_bissbs>`]
+
+This will make later steps that cover creating and trimming the configuration do
+the right thing.
+
+[:ref:`back to step-by-step guide <bootworking_bissbs>`]
+
+.. _diskspace_bisref:
+
+Space requirements
+------------------
+
+ *Ensure to have enough free space for building Linux.*
+ [:ref:`... <diskspace_bissbs>`]
+
+The numbers mentioned are rough estimates with a big extra charge to be on the
+safe side, so often you will need less.
+
+If you have space constraints, be sure to hay attention to the :ref:`step about
+debug symbols' <debugsymbols_bissbs>` and its :ref:`accompanying reference
+section' <debugsymbols_bisref>`, as disabling then will reduce the consumed disk
+space by quite a few gigabytes.
+
+[:ref:`back to step-by-step guide <diskspace_bissbs>`]
+
+.. _rangecheck_bisref:
+
+Bisection range
+---------------
+
+ *Determine the kernel versions considered 'good' and 'bad' throughout this
+ guide.* [:ref:`...<rangecheck_bissbs>`]
+
+Establishing the range of commits to be checked is mostly straightforward,
+except when a regression occurred when switching from a release of one stable
+series to a release of a later series (e.g. from 6.0.11 to 6.1.4). In that case
+Git will need some hand holding, as there is no straight line of descent.
+
+That's because with the release of 6.0 mainline carried on to 6.1 while the
+stable series 6.0.y branched to the side. It's therefore theoretically possible
+that the issue you face with 6.1.4 only worked in 6.0.11, as it was fixed by a
+commit that went into one of the 6.0.y releases, but never hit mainline or the
+6.1.y series. Thankfully that normally should not happen due to the way the
+stable/longterm maintainers maintain the code. It's thus pretty safe to assume
+6.0 as a 'good' kernel. That assumption will be tested anyway, as that kernel
+will be built and tested in the segment '2' of this guide; Git would force you
+to do this as well, if you tried bisecting between 6.0.11 and 6.1.13.
+
+[:ref:`back to step-by-step guide <rangecheck_bissbs>`]
+
+.. _buildrequires_bisref:
+
+Install build requirements
+--------------------------
+
+ *Install all software required to build a Linux kernel.*
+ [:ref:`...<buildrequires_bissbs>`]
+
+The kernel is pretty stand-alone, but besides tools like the compiler you will
+sometimes need a few libraries to build one. How to install everything needed
+depends on your Linux distribution and the configuration of the kernel you are
+about to build.
+
+Here are a few examples what you typically need on some mainstream
+distributions:
+
+* Arch Linux and derivatives::
+
+ sudo pacman --needed -S bc binutils bison flex gcc git kmod libelf openssl \
+ pahole perl zlib ncurses qt6-base
+
+* Debian, Ubuntu, and derivatives::
+
+ sudo apt install bc binutils bison dwarves flex gcc git kmod libelf-dev \
+ libssl-dev make openssl pahole perl-base pkg-config zlib1g-dev \
+ libncurses-dev qt6-base-dev g++
+
+* Fedora and derivatives::
+
+ sudo dnf install binutils \
+ /usr/bin/{bc,bison,flex,gcc,git,openssl,make,perl,pahole,rpmbuild} \
+ /usr/include/{libelf.h,openssl/pkcs7.h,zlib.h,ncurses.h,qt6/QtGui/QAction}
+
+* openSUSE and derivatives::
+
+ sudo zypper install bc binutils bison dwarves flex gcc git \
+ kernel-install-tools libelf-devel make modutils openssl openssl-devel \
+ perl-base zlib-devel rpm-build ncurses-devel qt6-base-devel
+
+These commands install a few packages that are often, but not always needed. You
+for example might want to skip installing the development headers for ncurses,
+which you will only need in case you later might want to adjust the kernel build
+configuration using make the targets 'menuconfig' or 'nconfig'; likewise omit
+the headers of Qt6 is you do not plan to adjust the .config using 'xconfig'.
+
+You furthermore might need additional libraries and their development headers
+for tasks not covered in this guide -- for example when building utilities from
+the kernel's tools/ directory.
+
+[:ref:`back to step-by-step guide <buildrequires_bissbs>`]
+
+.. _sources_bisref:
+
+Download the sources using Git
+------------------------------
+
+ *Retrieve the Linux mainline sources.*
+ [:ref:`...<sources_bissbs>`]
+
+The step-by-step guide outlines how to download the Linux sources using a full
+Git clone of Linus' mainline repository. There is nothing more to say about
+that -- but there are two alternatives ways to retrieve the sources that might
+work better for you:
+
+ * If you have an unreliable internet connection, consider
+ :ref:`using a 'Git bundle'<sources_bundle_bisref>`.
+
+ * If downloading the complete repository would take too long or requires too
+ much storage space, consider :ref:`using a 'shallow
+ clone'<sources_shallow_bisref>`.
+
+.. _sources_bundle_bisref:
+
+Downloading Linux mainline sources using a bundle
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Use the following commands to retrieve the Linux mainline sources using a
+bundle::
+
+ wget -c \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/clone.bundle
+ git clone --no-checkout clone.bundle ~/linux/
+ cd ~/linux/
+ git remote remove origin
+ git remote add mainline \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+ git fetch mainline
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+
+In case the 'wget' command fails, just re-execute it, it will pick up where
+it left off.
+
+[:ref:`back to step-by-step guide <sources_bissbs>`]
+[:ref:`back to section intro <sources_bisref>`]
+
+.. _sources_shallow_bisref:
+
+Downloading Linux mainline sources using a shallow clone
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+First, execute the following command to retrieve the latest mainline codebase::
+
+ git clone -o mainline --no-checkout --depth 1 -b master \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git ~/linux/
+ cd ~/linux/
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+
+Now deepen your clone's history to the second predecessor of the mainline
+release of your 'good' version. In case the latter are 6.0 or 6.0.11, 5.19 would
+be the first predecessor and 5.18 the second -- hence deepen the history up to
+that version::
+
+ git fetch --shallow-exclude=v5.18 mainline
+
+Afterwards add the stable Git repository as remote and all required stable
+branches as explained in the step-by-step guide.
+
+Note, shallow clones have a few peculiar characteristics:
+
+ * For bisections the history needs to be deepened a few mainline versions
+ farther than it seems necessary, as explained above already. That's because
+ Git otherwise will be unable to revert or describe most of the commits within
+ a range (say v6.1..v6.2), as they are internally based on earlier kernels
+ releases (like v6.0-rc2 or 5.19-rc3).
+
+ * This document in most places uses ``git fetch`` with ``--shallow-exclude=``
+ to specify the earliest version you care about (or to be precise: its git
+ tag). You alternatively can use the parameter ``--shallow-since=`` to specify
+ an absolute (say ``'2023-07-15'``) or relative (``'12 months'``) date to
+ define the depth of the history you want to download. When using them while
+ bisecting mainline, ensure to deepen the history to at least 7 months before
+ the release of the mainline release your 'good' kernel is based on.
+
+ * Be warned, when deepening your clone you might encounter an error like
+ 'fatal: error in object: unshallow cafecaca0c0dacafecaca0c0dacafecaca0c0da'.
+ In that case run ``git repack -d`` and try again.
+
+[:ref:`back to step-by-step guide <sources_bissbs>`]
+[:ref:`back to section intro <sources_bisref>`]
+
+.. _oldconfig_bisref:
+
+Start defining the build configuration for your kernel
+------------------------------------------------------
+
+ *Start preparing a kernel build configuration (the '.config' file).*
+ [:ref:`... <oldconfig_bissbs>`]
+
+*Note, this is the first of multiple steps in this guide that create or modify
+build artifacts. The commands used in this guide store them right in the source
+tree to keep things simple. In case you prefer storing the build artifacts
+separately, create a directory like '~/linux-builddir/' and add the parameter
+``O=~/linux-builddir/`` to all make calls used throughout this guide. You will
+have to point other commands there as well -- among them the ``./scripts/config
+[...]`` commands, which will require ``--file ~/linux-builddir/.config`` to
+locate the right build configuration.*
+
+Two things can easily go wrong when creating a .config file as advised:
+
+ * The oldconfig target will use a .config file from your build directory, if
+ one is already present there (e.g. '~/linux/.config'). That's totally fine if
+ that's what you intend (see next step), but in all other cases you want to
+ delete it. This for example is important in case you followed this guide
+ further, but due to problems come back here to redo the configuration from
+ scratch.
+
+ * Sometimes olddefconfig is unable to locate the .config file for your running
+ kernel and will use defaults, as briefly outlined in the guide. In that case
+ check if your distribution ships the configuration somewhere and manually put
+ it in the right place (e.g. '~/linux/.config') if it does. On distributions
+ where /proc/config.gz exists this can be achieved using this command::
+
+ zcat /proc/config.gz > .config
+
+ Once you put it there, run ``make olddefconfig`` again to adjust it to the
+ needs of the kernel about to be built.
+
+Note, the olddefconfig target will set any undefined build options to their
+default value. If you prefer to set such configuration options manually, use
+``make oldconfig`` instead. Then for each undefined configuration option you
+will be asked how to proceed; in case you are unsure what to answer, simply hit
+'enter' to apply the default value. Note though that for bisections you normally
+want to go with the defaults, as you otherwise might enable a new feature that
+causes a problem looking like regressions (for example due to security
+restrictions).
+
+Occasionally odd things happen when trying to use a config file prepared for one
+kernel (say 6.1) on an older mainline release -- especially if it is much older
+(say v5.15). That's one of the reasons why the previous step in the guide told
+you to boot the kernel where everything works. If you manually add a .config
+file you thus want to ensure it's from the working kernel and not from a one
+that shows the regression.
+
+In case you want to build kernels for another machine, locate its kernel build
+configuration; usually ``ls /boot/config-$(uname -r)`` will print its name. Copy
+that file to the build machine and store it as ~/linux/.config; afterwards run
+``make olddefconfig`` to adjust it.
+
+[:ref:`back to step-by-step guide <oldconfig_bissbs>`]
+
+.. _localmodconfig_bisref:
+
+Trim the build configuration for your kernel
+--------------------------------------------
+
+ *Disable any kernel modules apparently superfluous for your setup.*
+ [:ref:`... <localmodconfig_bissbs>`]
+
+As explained briefly in the step-by-step guide already: with localmodconfig it
+can easily happen that your self-built kernels will lack modules for tasks you
+did not perform at least once before utilizing this make target. That happens
+when a task requires kernel modules which are only autoloaded when you execute
+it for the first time. So when you never performed that task since starting your
+kernel the modules will not have been loaded -- and from localmodonfig's point
+of view look superfluous, which thus disables them to reduce the amount of code
+to be compiled.
+
+You can try to avoid this by performing typical tasks that often will autoload
+additional kernel modules: start a VM, establish VPN connections, loop-mount a
+CD/DVD ISO, mount network shares (CIFS, NFS, ...), and connect all external
+devices (2FA keys, headsets, webcams, ...) as well as storage devices with file
+systems you otherwise do not utilize (btrfs, ext4, FAT, NTFS, XFS, ...). But it
+is hard to think of everything that might be needed -- even kernel developers
+often forget one thing or another at this point.
+
+Do not let that risk bother you, especially when compiling a kernel only for
+testing purposes: everything typically crucial will be there. And if you forget
+something important you can turn on a missing feature manually later and quickly
+run the commands again to compile and install a kernel that has everything you
+need.
+
+But if you plan to build and use self-built kernels regularly, you might want to
+reduce the risk by recording which modules your system loads over the course of
+a few weeks. You can automate this with `modprobed-db
+<https://github.com/graysky2/modprobed-db>`_. Afterwards use ``LSMOD=<path>`` to
+point localmodconfig to the list of modules modprobed-db noticed being used::
+
+ yes '' | make LSMOD='${HOME}'/.config/modprobed.db localmodconfig
+
+That parameter also allows you to build trimmed kernels for another machine in
+case you copied a suitable .config over to use as base (see previous step). Just
+run ``lsmod > lsmod_foo-machine`` on that system and copy the generated file to
+your build's host home directory. Then run these commands instead of the one the
+step-by-step guide mentions::
+
+ yes '' | make LSMOD=~/lsmod_foo-machine localmodconfig
+
+[:ref:`back to step-by-step guide <localmodconfig_bissbs>`]
+
+.. _tagging_bisref:
+
+Tag the kernels about to be build
+---------------------------------
+
+ *Ensure all the kernels you will build are clearly identifiable using a
+ special tag and a unique version identifier.* [:ref:`... <tagging_bissbs>`]
+
+This allows you to differentiate your distribution's kernels from those created
+during this process, as the file or directories for the latter will contain
+'-local' in the name; it also helps picking the right entry in the boot menu and
+not lose track of you kernels, as their version numbers will look slightly
+confusing during the bisection.
+
+[:ref:`back to step-by-step guide <tagging_bissbs>`]
+
+.. _debugsymbols_bisref:
+
+Decide to enable or disable debug symbols
+-----------------------------------------
+
+ *Decide how to handle debug symbols.* [:ref:`... <debugsymbols_bissbs>`]
+
+Having debug symbols available can be important when your kernel throws a
+'panic', 'Oops', 'warning', or 'BUG' later when running, as then you will be
+able to find the exact place where the problem occurred in the code. But
+collecting and embedding the needed debug information takes time and consumes
+quite a bit of space: in late 2022 the build artifacts for a typical x86 kernel
+trimmed with localmodconfig consumed around 5 Gigabyte of space with debug
+symbols, but less than 1 when they were disabled. The resulting kernel image and
+modules are bigger as well, which increases storage requirements for /boot/ and
+load times.
+
+In case you want a small kernel and are unlikely to decode a stack trace later,
+you thus might want to disable debug symbols to avoid those downsides. If it
+later turns out that you need them, just enable them as shown and rebuild the
+kernel.
+
+You on the other hand definitely want to enable them for this process, if there
+is a decent chance that you need to decode a stack trace later. The section
+'Decode failure messages' in Documentation/admin-guide/reporting-issues.rst
+explains this process in more detail.
+
+[:ref:`back to step-by-step guide <debugsymbols_bissbs>`]
+
+.. _configmods_bisref:
+
+Adjust build configuration
+--------------------------
+
+ *Check if you may want or need to adjust some other kernel configuration
+ options:*
+
+Depending on your needs you at this point might want or have to adjust some
+kernel configuration options.
+
+.. _configmods_distros_bisref:
+
+Distro specific adjustments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Are you running* [:ref:`... <configmods_bissbs>`]
+
+The following sections help you to avoid build problems that are known to occur
+when following this guide on a few commodity distributions.
+
+**Debian:**
+
+ * Remove a stale reference to a certificate file that would cause your build to
+ fail::
+
+ ./scripts/config --set-str SYSTEM_TRUSTED_KEYS ''
+
+ Alternatively, download the needed certificate and make that configuration
+ option point to it, as `the Debian handbook explains in more detail
+ <https://debian-handbook.info/browse/stable/sect.kernel-compilation.html>`_
+ -- or generate your own, as explained in
+ Documentation/admin-guide/module-signing.rst.
+
+[:ref:`back to step-by-step guide <configmods_bissbs>`]
+
+.. _configmods_individual_bisref:
+
+Individual adjustments
+~~~~~~~~~~~~~~~~~~~~~~
+
+ *If you want to influence the other aspects of the configuration, do so
+ now.* [:ref:`... <configmods_bissbs>`]
+
+You at this point can use a command like ``make menuconfig`` to enable or
+disable certain features using a text-based user interface; to use a graphical
+configuration utility, call the make target ``xconfig`` or ``gconfig`` instead.
+All of them require development libraries from toolkits they are based on
+(ncurses, Qt5, Gtk2); an error message will tell you if something required is
+missing.
+
+[:ref:`back to step-by-step guide <configmods_bissbs>`]
+
+.. _saveconfig_bisref:
+
+Put the .config file aside
+--------------------------
+
+ *Reprocess the .config after the latest changes and store it in a safe place.*
+ [:ref:`... <saveconfig_bissbs>`]
+
+Put the .config you prepared aside, as you want to copy it back to the build
+directory every time during this guide before you start building another
+kernel. That's because going back and forth between different versions can alter
+.config files in odd ways; those occasionally cause side effects that could
+confuse testing or in some cases render the result of your bisection
+meaningless.
+
+[:ref:`back to step-by-step guide <saveconfig_bissbs>`]
+
+.. _introlatestcheck_bisref:
+
+Try to reproduce the regression
+-----------------------------------------
+
+ *Verify the regression is not caused by some .config change and check if it
+ still occurs with the latest codebase.* [:ref:`... <introlatestcheck_bissbs>`]
+
+For some readers it might seem unnecessary to check the latest codebase at this
+point, especially if you did that already with a kernel prepared by your
+distributor or face a regression within a stable/longterm series. But it's
+highly recommended for these reasons:
+
+* You will run into any problems caused by your setup before you actually begin
+ a bisection. That will make it a lot easier to differentiate between 'this
+ most likely is some problem in my setup' and 'this change needs to be skipped
+ during the bisection, as the kernel sources at that stage contain an unrelated
+ problem that causes building or booting to fail'.
+
+* These steps will rule out if your problem is caused by some change in the
+ build configuration between the 'working' and the 'broken' kernel. This for
+ example can happen when your distributor enabled an additional security
+ feature in the newer kernel which was disabled or not yet supported by the
+ older kernel. That security feature might get into the way of something you
+ do -- in which case your problem from the perspective of the Linux kernel
+ upstream developers is not a regression, as
+ Documentation/admin-guide/reporting-regressions.rst explains in more detail.
+ You thus would waste your time if you'd try to bisect this.
+
+* If the cause for your regression was already fixed in the latest mainline
+ codebase, you'd perform the bisection for nothing. This holds true for a
+ regression you encountered with a stable/longterm release as well, as they are
+ often caused by problems in mainline changes that were backported -- in which
+ case the problem will have to be fixed in mainline first. Maybe it already was
+ fixed there and the fix is already in the process of being backported.
+
+* For regressions within a stable/longterm series it's furthermore crucial to
+ know if the issue is specific to that series or also happens in the mainline
+ kernel, as the report needs to be sent to different people:
+
+ * Regressions specific to a stable/longterm series are the stable team's
+ responsibility; mainline Linux developers might or might not care.
+
+ * Regressions also happening in mainline are something the regular Linux
+ developers and maintainers have to handle; the stable team does not care
+ and does not need to be involved in the report, they just should be told
+ to backport the fix once it's ready.
+
+ Your report might be ignored if you send it to the wrong party -- and even
+ when you get a reply there is a decent chance that developers tell you to
+ evaluate which of the two cases it is before they take a closer look.
+
+[:ref:`back to step-by-step guide <introlatestcheck_bissbs>`]
+
+.. _checkoutmaster_bisref:
+
+Checkout the latest Linux codebase
+----------------------------------
+
+ *Checkout the latest Linux codebase.*
+ [:ref:`... <introlatestcheck_bissbs>`]
+
+In case you later want to recheck if an ever newer codebase might fix the
+problem, remember to run that ``git fetch --shallow-exclude [...]`` command
+again mentioned earlier to update your local Git repository.
+
+[:ref:`back to step-by-step guide <introlatestcheck_bissbs>`]
+
+.. _build_bisref:
+
+Build your kernel
+-----------------
+
+ *Build the image and the modules of your first kernel using the config file
+ you prepared.* [:ref:`... <build_bissbs>`]
+
+A lot can go wrong at this stage, but the instructions below will help you help
+yourself. Another subsection explains how to directly package your kernel up as
+deb, rpm or tar file.
+
+Dealing with build errors
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When a build error occurs, it might be caused by some aspect of your machine's
+setup that often can be fixed quickly; other times though the problem lies in
+the code and can only be fixed by a developer. A close examination of the
+failure messages coupled with some research on the internet will often tell you
+which of the two it is. To perform such a investigation, restart the build
+process like this::
+
+ make V=1
+
+The ``V=1`` activates verbose output, which might be needed to see the actual
+error. To make it easier to spot, this command also omits the ``-j $(nproc
+--all)`` used earlier to utilize every CPU core in the system for the job -- but
+this parallelism also results in some clutter when failures occur.
+
+After a few seconds the build process should run into the error again. Now try
+to find the most crucial line describing the problem. Then search the internet
+for the most important and non-generic section of that line (say 4 to 8 words);
+avoid or remove anything that looks remotely system-specific, like your username
+or local path names like ``/home/username/linux/``. First try your regular
+internet search engine with that string, afterwards search Linux kernel mailing
+lists via `lore.kernel.org/all/ <https://lore.kernel.org/all/>`_.
+
+This most of the time will find something that will explain what is wrong; quite
+often one of the hits will provide a solution for your problem, too. If you
+do not find anything that matches your problem, try again from a different angle
+by modifying your search terms or using another line from the error messages.
+
+In the end, most trouble you are to run into has likely been encountered and
+reported by others already. That includes issues where the cause is not your
+system, but lies the code. If you run into one of those, you might thus find a
+solution (e.g. a patch) or workaround for your problem, too.
+
+Package your kernel up
+~~~~~~~~~~~~~~~~~~~~~~
+
+The step-by-step guide uses the default make targets (e.g. 'bzImage' and
+'modules' on x86) to build the image and the modules of your kernel, which later
+steps of the guide then install. You instead can also directly build everything
+and directly package it up by using one of the following targets:
+
+ * ``make -j $(nproc --all) bindeb-pkg`` to generate a deb package
+
+ * ``make -j $(nproc --all) binrpm-pkg`` to generate a rpm package
+
+ * ``make -j $(nproc --all) tarbz2-pkg`` to generate a bz2 compressed tarball
+
+This is just a selection of available make targets for this purpose, see
+``make help`` for others. You can also use these targets after running
+``make -j $(nproc --all)``, as they will pick up everything already built.
+
+If you employ the targets to generate deb or rpm packages, ignore the
+step-by-step guide's instructions on installing and removing your kernel;
+instead install and remove the packages using the package utility for the format
+(e.g. dpkg and rpm) or a package management utility build on top of them (apt,
+aptitude, dnf/yum, zypper, ...). Be aware that the packages generated using
+these two make targets are designed to work on various distributions utilizing
+those formats, they thus will sometimes behave differently than your
+distribution's kernel packages.
+
+[:ref:`back to step-by-step guide <build_bissbs>`]
+
+.. _install_bisref:
+
+Put the kernel in place
+-----------------------
+
+ *Install the kernel you just built.* [:ref:`... <install_bissbs>`]
+
+What you need to do after executing the command in the step-by-step guide
+depends on the existence and the implementation of an ``installkernel``
+executable. Many commodity Linux distributions ship such a kernel installer in
+'/sbin/' that does everything needed, hence there is nothing left for you
+except rebooting. But some distributions contain an installkernel that does
+only part of the job -- and a few lack it completely and leave all the work to
+you.
+
+If ``installkernel`` is found, the kernel's build system will delegate the
+actual installation of your kernel's image and related files to this executable.
+On almost all Linux distributions it will store the image as '/boot/vmlinuz-
+<kernelrelease identifier>' and put a 'System.map-<kernelrelease
+identifier>' alongside it. Your kernel will thus be installed in parallel to any
+existing ones, unless you already have one with exactly the same release name.
+
+Installkernel on many distributions will afterwards generate an 'initramfs'
+(often also called 'initrd'), which commodity distributions rely on for booting;
+hence be sure to keep the order of the two make targets used in the step-by-step
+guide, as things will go sideways if you install your kernel's image before its
+modules. Often installkernel will then add your kernel to the bootloader
+configuration, too. You have to take care of one or both of these tasks
+yourself, if your distributions installkernel doesn't handle them.
+
+A few distributions like Arch Linux and its derivatives totally lack an
+installkernel executable. On those just install the modules using the kernel's
+build system and then install the image and the System.map file manually::
+
+ sudo make modules_install
+ sudo install -m 0600 $(make -s image_name) /boot/vmlinuz-$(make -s kernelrelease)
+ sudo install -m 0600 System.map /boot/System.map-$(make -s kernelrelease)
+
+If your distribution boots with the help of an initramfs, now generate one for
+your kernel using the tools your distribution provides for this process.
+Afterwards add your kernel to your bootloader configuration and reboot.
+
+[:ref:`back to step-by-step guide <install_bissbs>`]
+
+.. _storagespace_bisref:
+
+Storage requirements per kernel
+-------------------------------
+
+ *Check how much storage space the kernel, its modules, and other related files
+ like the initramfs consume.* [:ref:`... <storagespace_bissbs>`]
+
+The kernels built during a bisection consume quite a bit of space in /boot/ and
+/lib/modules/, especially if you enabled debug symbols. That makes it easy to
+fill up volumes during a bisection -- and due to that even kernels which used to
+work earlier might fail to boot. To prevent that you will need to know how much
+space each installed kernel typically requires.
+
+Note, most of the time the pattern '/boot/*$(make -s kernelrelease)*' used in
+the guide will match all files needed to boot your kernel -- but neither the
+path nor the naming scheme are mandatory. On some distributions you thus will
+need to look in different places.
+
+[:ref:`back to step-by-step guide <storagespace_bissbs>`]
+
+.. _recheckbroken_bisref:
+
+Check the kernel built from the latest codebase
+-----------------------------------------------
+
+ *Reboot into the kernel you just built and check if the feature that regressed
+ is really broken there.* [:ref:`... <recheckbroken_bissbs>`]
+
+There are a couple of reasons why the regression you face might not show up with
+your own kernel built from the latest codebase. These are the most frequent:
+
+* The cause for the regression was fixed meanwhile.
+
+* The regression with the broken kernel was caused by a change in the build
+ configuration the provider of your kernel carried out.
+
+* Your problem might be a race condition that does not show up with your kernel;
+ the trimmed build configuration, a different setting for debug symbols, the
+ compiler used, and various other things can cause this.
+
+* In case you encountered the regression with a stable/longterm kernel it might
+ be a problem that is specific to that series; the next step in this guide will
+ check this.
+
+[:ref:`back to step-by-step guide <recheckbroken_bissbs>`]
+
+.. _recheckstablebroken_bisref:
+
+Check the kernel built from the latest stable/longterm codebase
+---------------------------------------------------------------
+
+ *Are you facing a regression within a stable/longterm release, but failed to
+ reproduce it with the kernel you just built using the latest mainline sources?
+ Then check if the latest codebase for the particular series might already fix
+ the problem.* [:ref:`... <recheckstablebroken_bissbs>`]
+
+If this kernel does not show the regression either, there most likely is no need
+for a bisection.
+
+[:ref:`back to step-by-step guide <recheckstablebroken_bissbs>`]
+
+.. _introworkingcheck_bisref:
+
+Ensure the 'good' version is really working well
+------------------------------------------------
+
+ *Check if the kernels you build work fine.*
+ [:ref:`... <introworkingcheck_bissbs>`]
+
+This section will reestablish a known working base. Skipping it might be
+appealing, but is usually a bad idea, as it does something important:
+
+It will ensure the .config file you prepared earlier actually works as expected.
+That is in your own interest, as trimming the configuration is not foolproof --
+and you might be building and testing ten or more kernels for nothing before
+starting to suspect something might be wrong with the build configuration.
+
+That alone is reason enough to spend the time on this, but not the only reason.
+
+Many readers of this guide normally run kernels that are patched, use add-on
+modules, or both. Those kernels thus are not considered 'vanilla' -- therefore
+it's possible that the thing that regressed might never have worked in vanilla
+builds of the 'good' version in the first place.
+
+There is a third reason for those that noticed a regression between
+stable/longterm kernels of different series (e.g. v6.0.13..v6.1.5): it will
+ensure the kernel version you assumed to be 'good' earlier in the process (e.g.
+v6.0) actually is working.
+
+[:ref:`back to step-by-step guide <introworkingcheck_bissbs>`]
+
+.. _recheckworking_bisref:
+
+Build your own version of the 'good' kernel
+-------------------------------------------
+
+ *Build your own variant of the working kernel and check if the feature that
+ regressed works as expected with it.* [:ref:`... <recheckworking_bissbs>`]
+
+In case the feature that broke with newer kernels does not work with your first
+self-built kernel, find and resolve the cause before moving on. There are a
+multitude of reasons why this might happen. Some ideas where to look:
+
+* Maybe localmodconfig did something odd and disabled the module required to
+ test the feature? Then you might want to recreate a .config file based on the
+ one from the last working kernel and skip trimming it down; manually disabling
+ some features in the .config might work as well to reduce the build time.
+
+* Maybe it's not a kernel regression and something that is caused by some fluke,
+ a broken initramfs (also known as initrd), new firmware files, or an updated
+ userland software?
+
+* Maybe it was a feature added to your distributor's kernel which vanilla Linux
+ at that point never supported?
+
+Note, if you found and fixed problems with the .config file, you want to use it
+to build another kernel from the latest codebase, as your earlier tests with
+mainline and the latest version from an affected stable/longterm series most
+likely has been flawed.
+
+[:ref:`back to step-by-step guide <recheckworking_bissbs>`]
+
+.. _bisectstart_bisref:
+
+Start the bisection
+-------------------
+
+ *Start the bisection and tell Git about the versions earlier established as
+ 'good' and 'bad'.* [:ref:`... <bisectstart_bissbs>`]
+
+This will start the bisection process; the last of the commands will make Git
+checkout a commit round about half-way between the 'good' and the 'bad' changes
+for your to test.
+
+[:ref:`back to step-by-step guide <bisectstart_bissbs>`]
+
+.. _bisectbuild_bisref:
+
+Build a kernel from the bisection point
+---------------------------------------
+
+ *Build, install, and boot a kernel from the code Git checked out using the
+ same commands you used earlier.* [:ref:`... <bisectbuild_bissbs>`]
+
+There are two things worth of note here:
+
+* Occasionally building the kernel will fail or it might not boot due some
+ problem in the code at the bisection point. In that case run this command::
+
+ git bisect skip
+
+ Git will then check out another commit nearby which with a bit of luck should
+ work better. Afterwards restart executing this step.
+
+* Those slightly odd looking version identifiers can happen during bisections,
+ because the Linux kernel subsystems prepare their changes for a new mainline
+ release (say 6.2) before its predecessor (e.g. 6.1) is finished. They thus
+ base them on a somewhat earlier point like v6.1-rc1 or even v6.0 -- and then
+ get merged for 6.2 without rebasing nor squashing them once 6.1 is out. This
+ leads to those slightly odd looking version identifiers coming up during
+ bisections.
+
+[:ref:`back to step-by-step guide <bisectbuild_bissbs>`]
+
+.. _bisecttest_bisref:
+
+Bisection checkpoint
+--------------------
+
+ *Check if the feature that regressed works in the kernel you just built.*
+ [:ref:`... <bisecttest_bissbs>`]
+
+Ensure what you tell Git is accurate: getting it wrong just one time will bring
+the rest of the bisection totally of course, hence all testing after that point
+will be for nothing.
+
+[:ref:`back to step-by-step guide <bisecttest_bissbs>`]
+
+.. _bisectlog_bisref:
+
+Put the bisection log away
+--------------------------
+
+ *Store Git's bisection log and the current .config file in a safe place.*
+ [:ref:`... <bisectlog_bissbs>`]
+
+As indicated above: declaring just one kernel wrongly as 'good' or 'bad' will
+render the end result of a bisection useless. In that case you'd normally have
+to restart the bisection from scratch. The log can prevent that, as it might
+allow someone to point out where a bisection likely went sideways -- and then
+instead of testing ten or more kernels you might only have to build a few to
+resolve things.
+
+The .config file is put aside, as there is a decent chance that developers might
+ask for it after you reported the regression.
+
+[:ref:`back to step-by-step guide <bisectlog_bissbs>`]
+
+.. _revert_bisref:
+
+Try reverting the culprit
+-------------------------
+
+ *Try reverting the culprit on top of the latest codebase to see if this fixes
+ your regression.* [:ref:`... <revert_bissbs>`]
+
+This is an optional step, but whenever possible one you should try: there is a
+decent chance that developers will ask you to perform this step when you bring
+the bisection result up. So give it a try, you are in the flow already, building
+one more kernel shouldn't be a big deal at this point.
+
+The step-by-step guide covers everything relevant already except one slightly
+rare thing: did you bisected a regression that also happened with mainline using
+a stable/longterm series, but Git failed to revert the commit in mainline? Then
+try to revert the culprit in the affected stable/longterm series -- and if that
+succeeds, test that kernel version instead.
+
+[:ref:`back to step-by-step guide <revert_bissbs>`]
+
+
+Supplementary tasks: cleanup during and after the bisection
+-----------------------------------------------------------
+
+.. _makeroom_bisref:
+
+Cleaning up during the bisection
+--------------------------------
+
+ *To remove one of the kernels you installed, look up its 'kernelrelease'
+ identifier.* [:ref:`... <makeroom_bissbs>`]
+
+The kernels you install during this process are easy to remove later, as its
+parts are only stored in two places and clearly identifiable. You thus do not
+need to worry to mess up your machine when you install a kernel manually (and
+thus bypass your distribution's packaging system): all parts of your kernels are
+relatively easy to remove later.
+
+One of the two places is a directory in /lib/modules/, which holds the modules
+for each installed kernel. This directory is named after the kernel's release
+identifier; hence, to remove all modules for one of the kernels you built,
+simply remove its modules directory in /lib/modules/.
+
+The other place is /boot/, where typically two up to five files will be placed
+during installation of a kernel. All of them usually contain the release name in
+their file name, but how many files and their exact name depends somewhat on
+your distribution's installkernel executable and its initramfs generator. On
+some distributions the ``kernel-install remove...`` command mentioned in the
+step-by-step guide will delete all of these files for you while also removing
+the menu entry for the kernel from your bootloader configuration. On others you
+have to take care of these two tasks yourself. The following command should
+interactively remove the three main files of a kernel with the release name
+'6.0-rc1-local-gcafec0cacaca0'::
+
+ rm -i /boot/{System.map,vmlinuz,initr}-6.0-rc1-local-gcafec0cacaca0
+
+Afterwards check for other files in /boot/ that have
+'6.0-rc1-local-gcafec0cacaca0' in their name and consider deleting them as well.
+Now remove the boot entry for the kernel from your bootloader's configuration;
+the steps to do that vary quite a bit between Linux distributions.
+
+Note, be careful with wildcards like '*' when deleting files or directories
+for kernels manually: you might accidentally remove files of a 6.0.11 kernel
+when all you want is to remove 6.0 or 6.0.1.
+
+[:ref:`back to step-by-step guide <makeroom_bissbs>`]
+
+Cleaning up after the bisection
+-------------------------------
+
+.. _finishingtouch_bisref:
+
+ *Once you have finished the bisection, do not immediately remove anything
+ you set up, as you might need a few things again.*
+ [:ref:`... <finishingtouch_bissbs>`]
+
+When you are really short of storage space removing the kernels as described in
+the step-by-step guide might not free as much space as you would like. In that
+case consider running ``rm -rf ~/linux/*`` as well now. This will remove the
+build artifacts and the Linux sources, but will leave the Git repository
+(~/linux/.git/) behind -- a simple ``git reset --hard`` thus will bring the
+sources back.
+
+Removing the repository as well would likely be unwise at this point: there is a
+decent chance developers will ask you to build another kernel to perform
+additional tests. This is often required to debug an issue or check proposed
+fixes. Before doing so you want to run the ``git fetch mainline`` command again
+followed by ``git checkout mainline/master`` to bring your clone up to date and
+checkout the latest codebase. Then apply the patch using ``git apply
+<filename>`` or ``git am <filename>`` and build yet another kernel using the
+familiar commands.
+
+Additional tests are also the reason why you want to keep the
+~/kernel-config-working file around for a few weeks.
+
+[:ref:`back to step-by-step guide <finishingtouch_bissbs>`]
+
+
+Additional reading material
+===========================
+
+Further sources
+---------------
+
+* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
+ `fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
+ in the Git documentation.
+* `Working with git bisect <https://nathanchance.dev/posts/working-with-git-bisect/>`_
+ from kernel developer Nathan Chancellor.
+* `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_.
+* `Fully automated bisecting with 'git bisect run' <https://lwn.net/Articles/317154>`_.
+
+..
+ end-of-content
+..
+ This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
+ you spot a typo or small mistake, feel free to let him know directly and
+ he'll fix it. You are free to do the same in a mostly informal way if you
+ want to contribute changes to the text -- but for copyright reasons please CC
+ linux-doc@vger.kernel.org and 'sign-off' your contribution as
+ Documentation/process/submitting-patches.rst explains in the section 'Sign
+ your work - the Developer's Certificate of Origin'.
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use 'The Linux kernel development community' for author attribution
+ and link this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
+
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index e8c2ce1f9df6..45a7f4932fe0 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -243,3 +243,10 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ASR | ASR8601 | #8601001 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 |
++----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/arch/x86/amd-memory-encryption.rst b/Documentation/arch/x86/amd-memory-encryption.rst
index 07caa8fff852..414bc7402ae7 100644
--- a/Documentation/arch/x86/amd-memory-encryption.rst
+++ b/Documentation/arch/x86/amd-memory-encryption.rst
@@ -87,14 +87,14 @@ The state of SME in the Linux kernel can be documented as follows:
kernel is non-zero).
SME can also be enabled and activated in the BIOS. If SME is enabled and
-activated in the BIOS, then all memory accesses will be encrypted and it will
-not be necessary to activate the Linux memory encryption support. If the BIOS
-merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate
-memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or
-by supplying mem_encrypt=on on the kernel command line. However, if BIOS does
-not enable SME, then Linux will not be able to activate memory encryption, even
-if configured to do so by default or the mem_encrypt=on command line parameter
-is specified.
+activated in the BIOS, then all memory accesses will be encrypted and it
+will not be necessary to activate the Linux memory encryption support.
+
+If the BIOS merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG),
+then memory encryption can be enabled by supplying mem_encrypt=on on the
+kernel command line. However, if BIOS does not enable SME, then Linux
+will not be able to activate memory encryption, even if configured to do
+so by default or the mem_encrypt=on command line parameter is specified.
Secure Nested Paging (SNP)
==========================
diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst
index c513855a54bb..4fd492cb4970 100644
--- a/Documentation/arch/x86/boot.rst
+++ b/Documentation/arch/x86/boot.rst
@@ -878,7 +878,8 @@ Protocol: 2.10+
address if possible.
A non-relocatable kernel will unconditionally move itself and to run
- at this address.
+ at this address. A relocatable kernel will move itself to this address if it
+ loaded below this address.
============ =======
Field name: init_size
diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst
index e73fdff62c0a..c58c72362911 100644
--- a/Documentation/arch/x86/mds.rst
+++ b/Documentation/arch/x86/mds.rst
@@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing:
mds_clear_cpu_buffers()
+Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
+Other than CFLAGS.ZF, this macro doesn't clobber any registers.
+
The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
(idle) transitions.
@@ -138,17 +141,30 @@ Mitigation points
When transitioning from kernel to user space the CPU buffers are flushed
on affected CPUs when the mitigation is not disabled on the kernel
- command line. The migitation is enabled through the static key
- mds_user_clear.
-
- The mitigation is invoked in prepare_exit_to_usermode() which covers
- all but one of the kernel to user space transitions. The exception
- is when we return from a Non Maskable Interrupt (NMI), which is
- handled directly in do_nmi().
-
- (The reason that NMI is special is that prepare_exit_to_usermode() can
- enable IRQs. In NMI context, NMIs are blocked, and we don't want to
- enable IRQs with NMIs blocked.)
+ command line. The mitigation is enabled through the feature flag
+ X86_FEATURE_CLEAR_CPU_BUF.
+
+ The mitigation is invoked just before transitioning to userspace after
+ user registers are restored. This is done to minimize the window in
+ which kernel data could be accessed after VERW e.g. via an NMI after
+ VERW.
+
+ **Corner case not handled**
+ Interrupts returning to kernel don't clear CPUs buffers since the
+ exit-to-user path is expected to do that anyways. But, there could be
+ a case when an NMI is generated in kernel after the exit-to-user path
+ has cleared the buffers. This case is not handled and NMI returning to
+ kernel don't clear CPU buffers because:
+
+ 1. It is rare to get an NMI after VERW, but before returning to userspace.
+ 2. For an unprivileged user, there is no known way to make that NMI
+ less rare or target it.
+ 3. It would take a large number of these precisely-timed NMIs to mount
+ an actual attack. There's presumably not enough bandwidth.
+ 4. The NMI in question occurs after a VERW, i.e. when user state is
+ restored and most interesting data is already scrubbed. Whats left
+ is only the data that NMI touches, and that may or may not be of
+ any interest.
2. C-State transition
diff --git a/Documentation/arch/x86/pti.rst b/Documentation/arch/x86/pti.rst
index e08d35177bc0..57e8392f61d3 100644
--- a/Documentation/arch/x86/pti.rst
+++ b/Documentation/arch/x86/pti.rst
@@ -26,9 +26,9 @@ comments in pti.c).
This approach helps to ensure that side-channel attacks leveraging
the paging structures do not function when PTI is enabled. It can be
-enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
-Once enabled at compile-time, it can be disabled at boot with the
-'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+enabled by setting CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=y at compile
+time. Once enabled at compile-time, it can be disabled at boot with
+the 'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
Page Table Management
=====================
diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst
index 08ebf9edbfc1..7352ab89a55a 100644
--- a/Documentation/arch/x86/topology.rst
+++ b/Documentation/arch/x86/topology.rst
@@ -47,17 +47,21 @@ AMD nomenclature for package is 'Node'.
Package-related topology information in the kernel:
- - cpuinfo_x86.x86_max_cores:
+ - topology_num_threads_per_package()
- The number of cores in a package. This information is retrieved via CPUID.
+ The number of threads in a package.
- - cpuinfo_x86.x86_max_dies:
+ - topology_num_cores_per_package()
- The number of dies in a package. This information is retrieved via CPUID.
+ The number of cores in a package.
+
+ - topology_max_dies_per_package()
+
+ The maximum number of dies in a package.
- cpuinfo_x86.topo.die_id:
- The physical ID of the die. This information is retrieved via CPUID.
+ The physical ID of the die.
- cpuinfo_x86.topo.pkg_id:
@@ -96,16 +100,6 @@ are SMT- or CMT-type threads.
AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses
"core".
-Core-related topology information in the kernel:
-
- - smp_num_siblings:
-
- The number of threads in a core. The number of threads in a package can be
- calculated by::
-
- threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings
-
-
Threads
=======
A thread is a single scheduling unit. It's the equivalent to a logical Linux
diff --git a/Documentation/arch/x86/x86_64/fred.rst b/Documentation/arch/x86/x86_64/fred.rst
new file mode 100644
index 000000000000..9f57e7b91f7e
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/fred.rst
@@ -0,0 +1,96 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+Flexible Return and Event Delivery (FRED)
+=========================================
+
+Overview
+========
+
+The FRED architecture defines simple new transitions that change
+privilege level (ring transitions). The FRED architecture was
+designed with the following goals:
+
+1) Improve overall performance and response time by replacing event
+ delivery through the interrupt descriptor table (IDT event
+ delivery) and event return by the IRET instruction with lower
+ latency transitions.
+
+2) Improve software robustness by ensuring that event delivery
+ establishes the full supervisor context and that event return
+ establishes the full user context.
+
+The new transitions defined by the FRED architecture are FRED event
+delivery and, for returning from events, two FRED return instructions.
+FRED event delivery can effect a transition from ring 3 to ring 0, but
+it is used also to deliver events incident to ring 0. One FRED
+instruction (ERETU) effects a return from ring 0 to ring 3, while the
+other (ERETS) returns while remaining in ring 0. Collectively, FRED
+event delivery and the FRED return instructions are FRED transitions.
+
+In addition to these transitions, the FRED architecture defines a new
+instruction (LKGS) for managing the state of the GS segment register.
+The LKGS instruction can be used by 64-bit operating systems that do
+not use the new FRED transitions.
+
+Furthermore, the FRED architecture is easy to extend for future CPU
+architectures.
+
+Software based event dispatching
+================================
+
+FRED operates differently from IDT in terms of event handling. Instead
+of directly dispatching an event to its handler based on the event
+vector, FRED requires the software to dispatch an event to its handler
+based on both the event's type and vector. Therefore, an event dispatch
+framework must be implemented to facilitate the event-to-handler
+dispatch process. The FRED event dispatch framework takes control
+once an event is delivered, and employs a two-level dispatch.
+
+The first level dispatching is event type based, and the second level
+dispatching is event vector based.
+
+Full supervisor/user context
+============================
+
+FRED event delivery atomically save and restore full supervisor/user
+context upon event delivery and return. Thus it avoids the problem of
+transient states due to %cr2 and/or %dr6, and it is no longer needed
+to handle all the ugly corner cases caused by half baked entry states.
+
+FRED allows explicit unblock of NMI with new event return instructions
+ERETS/ERETU, avoiding the mess caused by IRET which unconditionally
+unblocks NMI, e.g., when an exception happens during NMI handling.
+
+FRED always restores the full value of %rsp, thus ESPFIX is no longer
+needed when FRED is enabled.
+
+LKGS
+====
+
+LKGS behaves like the MOV to GS instruction except that it loads the
+base address into the IA32_KERNEL_GS_BASE MSR instead of the GS
+segment’s descriptor cache. With LKGS, it ends up with avoiding
+mucking with kernel GS, i.e., an operating system can always operate
+with its own GS base address.
+
+Because FRED event delivery from ring 3 and ERETU both swap the value
+of the GS base address and that of the IA32_KERNEL_GS_BASE MSR, plus
+the introduction of LKGS instruction, the SWAPGS instruction is no
+longer needed when FRED is enabled, thus is disallowed (#UD).
+
+Stack levels
+============
+
+4 stack levels 0~3 are introduced to replace the nonreentrant IST for
+event handling, and each stack level should be configured to use a
+dedicated stack.
+
+The current stack level could be unchanged or go higher upon FRED
+event delivery. If unchanged, the CPU keeps using the current event
+stack. If higher, the CPU switches to a new event stack specified by
+the MSR of the new stack level, i.e., MSR_IA32_FRED_RSP[123].
+
+Only execution of a FRED return instruction ERET[US], could lower the
+current stack level, causing the CPU to switch back to the stack it was
+on before a previous event delivery that promoted the stack level.
diff --git a/Documentation/arch/x86/x86_64/index.rst b/Documentation/arch/x86/x86_64/index.rst
index a56070fc8e77..ad15e9bd623f 100644
--- a/Documentation/arch/x86/x86_64/index.rst
+++ b/Documentation/arch/x86/x86_64/index.rst
@@ -15,3 +15,4 @@ x86_64 Support
cpu-hotplug-spec
machinecheck
fsgs
+ fred
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 7985c6615f3c..a8f5782bd833 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -177,10 +177,10 @@ In addition to kfuncs' arguments, verifier may need more information about the
type of kfunc(s) being registered with the BPF subsystem. To do so, we define
flags on a set of kfuncs as follows::
- BTF_SET8_START(bpf_task_set)
+ BTF_KFUNCS_START(bpf_task_set)
BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE)
- BTF_SET8_END(bpf_task_set)
+ BTF_KFUNCS_END(bpf_task_set)
This set encodes the BTF ID of each kfunc listed above, and encodes the flags
along with it. Ofcourse, it is also allowed to specify no flags.
@@ -347,10 +347,10 @@ Once the kfunc is prepared for use, the final step to making it visible is
registering it with the BPF subsystem. Registration is done per BPF program
type. An example is shown below::
- BTF_SET8_START(bpf_task_set)
+ BTF_KFUNCS_START(bpf_task_set)
BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE)
- BTF_SET8_END(bpf_task_set)
+ BTF_KFUNCS_END(bpf_task_set)
static const struct btf_kfunc_id_set bpf_task_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst
index 74d64a30f500..f9cd579496c9 100644
--- a/Documentation/bpf/map_lpm_trie.rst
+++ b/Documentation/bpf/map_lpm_trie.rst
@@ -17,7 +17,7 @@ significant byte.
LPM tries may be created with a maximum prefix length that is a multiple
of 8, in the range from 8 to 2048. The key used for lookup and update
-operations is a ``struct bpf_lpm_trie_key``, extended by
+operations is a ``struct bpf_lpm_trie_key_u8``, extended by
``max_prefixlen/8`` bytes.
- For IPv4 addresses the data length is 4 bytes
diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index 245b6defc298..a5ab00ac0b14 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -1,11 +1,11 @@
.. contents::
.. sectnum::
-=======================================
-BPF Instruction Set Specification, v1.0
-=======================================
+======================================
+BPF Instruction Set Architecture (ISA)
+======================================
-This document specifies version 1.0 of the BPF instruction set.
+This document specifies the BPF instruction set architecture (ISA).
Documentation conventions
=========================
@@ -24,22 +24,22 @@ a type's signedness (`S`) and bit width (`N`), respectively.
.. table:: Meaning of signedness notation.
==== =========
- `S` Meaning
+ S Meaning
==== =========
- `u` unsigned
- `s` signed
+ u unsigned
+ s signed
==== =========
.. table:: Meaning of bit-width notation.
===== =========
- `N` Bit width
+ N Bit width
===== =========
- `8` 8 bits
- `16` 16 bits
- `32` 32 bits
- `64` 64 bits
- `128` 128 bits
+ 8 8 bits
+ 16 16 bits
+ 32 32 bits
+ 64 64 bits
+ 128 128 bits
===== =========
For example, `u32` is a type whose valid values are all the 32-bit unsigned
@@ -48,31 +48,31 @@ numbers.
Functions
---------
-* `htobe16`: Takes an unsigned 16-bit number in host-endian format and
+* htobe16: Takes an unsigned 16-bit number in host-endian format and
returns the equivalent number as an unsigned 16-bit number in big-endian
format.
-* `htobe32`: Takes an unsigned 32-bit number in host-endian format and
+* htobe32: Takes an unsigned 32-bit number in host-endian format and
returns the equivalent number as an unsigned 32-bit number in big-endian
format.
-* `htobe64`: Takes an unsigned 64-bit number in host-endian format and
+* htobe64: Takes an unsigned 64-bit number in host-endian format and
returns the equivalent number as an unsigned 64-bit number in big-endian
format.
-* `htole16`: Takes an unsigned 16-bit number in host-endian format and
+* htole16: Takes an unsigned 16-bit number in host-endian format and
returns the equivalent number as an unsigned 16-bit number in little-endian
format.
-* `htole32`: Takes an unsigned 32-bit number in host-endian format and
+* htole32: Takes an unsigned 32-bit number in host-endian format and
returns the equivalent number as an unsigned 32-bit number in little-endian
format.
-* `htole64`: Takes an unsigned 64-bit number in host-endian format and
+* htole64: Takes an unsigned 64-bit number in host-endian format and
returns the equivalent number as an unsigned 64-bit number in little-endian
format.
-* `bswap16`: Takes an unsigned 16-bit number in either big- or little-endian
+* bswap16: Takes an unsigned 16-bit number in either big- or little-endian
format and returns the equivalent number with the same bit width but
opposite endianness.
-* `bswap32`: Takes an unsigned 32-bit number in either big- or little-endian
+* bswap32: Takes an unsigned 32-bit number in either big- or little-endian
format and returns the equivalent number with the same bit width but
opposite endianness.
-* `bswap64`: Takes an unsigned 64-bit number in either big- or little-endian
+* bswap64: Takes an unsigned 64-bit number in either big- or little-endian
format and returns the equivalent number with the same bit width but
opposite endianness.
@@ -97,40 +97,101 @@ Definitions
A: 10000110
B: 11111111 10000110
+Conformance groups
+------------------
+
+An implementation does not need to support all instructions specified in this
+document (e.g., deprecated instructions). Instead, a number of conformance
+groups are specified. An implementation must support the base32 conformance
+group and may support additional conformance groups, where supporting a
+conformance group means it must support all instructions in that conformance
+group.
+
+The use of named conformance groups enables interoperability between a runtime
+that executes instructions, and tools as such compilers that generate
+instructions for the runtime. Thus, capability discovery in terms of
+conformance groups might be done manually by users or automatically by tools.
+
+Each conformance group has a short ASCII label (e.g., "base32") that
+corresponds to a set of instructions that are mandatory. That is, each
+instruction has one or more conformance groups of which it is a member.
+
+This document defines the following conformance groups:
+
+* base32: includes all instructions defined in this
+ specification unless otherwise noted.
+* base64: includes base32, plus instructions explicitly noted
+ as being in the base64 conformance group.
+* atomic32: includes 32-bit atomic operation instructions (see `Atomic operations`_).
+* atomic64: includes atomic32, plus 64-bit atomic operation instructions.
+* divmul32: includes 32-bit division, multiplication, and modulo instructions.
+* divmul64: includes divmul32, plus 64-bit division, multiplication,
+ and modulo instructions.
+* packet: deprecated packet access instructions.
+
Instruction encoding
====================
BPF has two instruction encodings:
* the basic instruction encoding, which uses 64 bits to encode an instruction
-* the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
- constant) value after the basic instruction for a total of 128 bits.
+* the wide instruction encoding, which appends a second 64 bits
+ after the basic instruction for a total of 128 bits.
-The fields conforming an encoded basic instruction are stored in the
-following order::
+Basic instruction encoding
+--------------------------
- opcode:8 src_reg:4 dst_reg:4 offset:16 imm:32 // In little-endian BPF.
- opcode:8 dst_reg:4 src_reg:4 offset:16 imm:32 // In big-endian BPF.
+A basic instruction is encoded as follows::
-**imm**
- signed integer immediate value
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | opcode | regs | offset |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | imm |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-**offset**
- signed integer offset used with pointer arithmetic
+**opcode**
+ operation to perform, encoded as follows::
-**src_reg**
- the source register number (0-10), except where otherwise specified
- (`64-bit immediate instructions`_ reuse this field for other purposes)
+ +-+-+-+-+-+-+-+-+
+ |specific |class|
+ +-+-+-+-+-+-+-+-+
-**dst_reg**
- destination register number (0-10)
+ **specific**
+ The format of these bits varies by instruction class
-**opcode**
- operation to perform
+ **class**
+ The instruction class (see `Instruction classes`_)
+
+**regs**
+ The source and destination register numbers, encoded as follows
+ on a little-endian host::
+
+ +-+-+-+-+-+-+-+-+
+ |src_reg|dst_reg|
+ +-+-+-+-+-+-+-+-+
+
+ and as follows on a big-endian host::
+
+ +-+-+-+-+-+-+-+-+
+ |dst_reg|src_reg|
+ +-+-+-+-+-+-+-+-+
+
+ **src_reg**
+ the source register number (0-10), except where otherwise specified
+ (`64-bit immediate instructions`_ reuse this field for other purposes)
+
+ **dst_reg**
+ destination register number (0-10)
+
+**offset**
+ signed integer offset used with pointer arithmetic
+
+**imm**
+ signed integer immediate value
-Note that the contents of multi-byte fields ('imm' and 'offset') are
-stored using big-endian byte ordering in big-endian BPF and
-little-endian byte ordering in little-endian BPF.
+Note that the contents of multi-byte fields ('offset' and 'imm') are
+stored using big-endian byte ordering on big-endian hosts and
+little-endian byte ordering on little-endian hosts.
For example::
@@ -143,71 +204,83 @@ For example::
Note that most instructions do not use all of the fields.
Unused fields shall be cleared to zero.
-As discussed below in `64-bit immediate instructions`_, a 64-bit immediate
-instruction uses a 64-bit immediate value that is constructed as follows.
-The 64 bits following the basic instruction contain a pseudo instruction
-using the same format but with opcode, dst_reg, src_reg, and offset all set to zero,
-and imm containing the high 32 bits of the immediate value.
+Wide instruction encoding
+--------------------------
+
+Some instructions are defined to use the wide instruction encoding,
+which uses two 32-bit immediate values. The 64 bits following
+the basic instruction format contain a pseudo instruction
+with 'opcode', 'dst_reg', 'src_reg', and 'offset' all set to zero.
This is depicted in the following figure::
- basic_instruction
- .-----------------------------.
- | |
- code:8 regs:8 offset:16 imm:32 unused:32 imm:32
- | |
- '--------------'
- pseudo instruction
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | opcode | regs | offset |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | imm |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | next_imm |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+**opcode**
+ operation to perform, encoded as explained above
-Thus the 64-bit immediate value is constructed as follows:
+**regs**
+ The source and destination register numbers, encoded as explained above
+
+**offset**
+ signed integer offset used with pointer arithmetic
+
+**imm**
+ signed integer immediate value
- imm64 = (next_imm << 32) | imm
+**reserved**
+ unused, set to zero
-where 'next_imm' refers to the imm value of the pseudo instruction
-following the basic instruction. The unused bytes in the pseudo
-instruction are reserved and shall be cleared to zero.
+**next_imm**
+ second signed integer immediate value
Instruction classes
-------------------
-The three LSB bits of the 'opcode' field store the instruction class:
-
-========= ===== =============================== ===================================
-class value description reference
-========= ===== =============================== ===================================
-BPF_LD 0x00 non-standard load operations `Load and store instructions`_
-BPF_LDX 0x01 load into register operations `Load and store instructions`_
-BPF_ST 0x02 store from immediate operations `Load and store instructions`_
-BPF_STX 0x03 store from register operations `Load and store instructions`_
-BPF_ALU 0x04 32-bit arithmetic operations `Arithmetic and jump instructions`_
-BPF_JMP 0x05 64-bit jump operations `Arithmetic and jump instructions`_
-BPF_JMP32 0x06 32-bit jump operations `Arithmetic and jump instructions`_
-BPF_ALU64 0x07 64-bit arithmetic operations `Arithmetic and jump instructions`_
-========= ===== =============================== ===================================
+The three least significant bits of the 'opcode' field store the instruction class:
+
+===== ===== =============================== ===================================
+class value description reference
+===== ===== =============================== ===================================
+LD 0x0 non-standard load operations `Load and store instructions`_
+LDX 0x1 load into register operations `Load and store instructions`_
+ST 0x2 store from immediate operations `Load and store instructions`_
+STX 0x3 store from register operations `Load and store instructions`_
+ALU 0x4 32-bit arithmetic operations `Arithmetic and jump instructions`_
+JMP 0x5 64-bit jump operations `Arithmetic and jump instructions`_
+JMP32 0x6 32-bit jump operations `Arithmetic and jump instructions`_
+ALU64 0x7 64-bit arithmetic operations `Arithmetic and jump instructions`_
+===== ===== =============================== ===================================
Arithmetic and jump instructions
================================
-For arithmetic and jump instructions (``BPF_ALU``, ``BPF_ALU64``, ``BPF_JMP`` and
-``BPF_JMP32``), the 8-bit 'opcode' field is divided into three parts:
+For arithmetic and jump instructions (``ALU``, ``ALU64``, ``JMP`` and
+``JMP32``), the 8-bit 'opcode' field is divided into three parts::
-============== ====== =================
-4 bits (MSB) 1 bit 3 bits (LSB)
-============== ====== =================
-code source instruction class
-============== ====== =================
+ +-+-+-+-+-+-+-+-+
+ | code |s|class|
+ +-+-+-+-+-+-+-+-+
**code**
the operation code, whose meaning varies by instruction class
-**source**
+**s (source)**
the source operand location, which unless otherwise specified is one of:
====== ===== ==============================================
source value description
====== ===== ==============================================
- BPF_K 0x00 use 32-bit 'imm' value as source operand
- BPF_X 0x08 use 'src_reg' register value as source operand
+ K 0 use 32-bit 'imm' value as source operand
+ X 1 use 'src_reg' register value as source operand
====== ===== ==============================================
**instruction class**
@@ -216,70 +289,75 @@ code source instruction class
Arithmetic instructions
-----------------------
-``BPF_ALU`` uses 32-bit wide operands while ``BPF_ALU64`` uses 64-bit wide operands for
-otherwise identical operations.
+``ALU`` uses 32-bit wide operands while ``ALU64`` uses 64-bit wide operands for
+otherwise identical operations. ``ALU64`` instructions belong to the
+base64 conformance group unless noted otherwise.
The 'code' field encodes the operation as below, where 'src' and 'dst' refer
to the values of the source and destination registers, respectively.
-========= ===== ======= ==========================================================
-code value offset description
-========= ===== ======= ==========================================================
-BPF_ADD 0x00 0 dst += src
-BPF_SUB 0x10 0 dst -= src
-BPF_MUL 0x20 0 dst \*= src
-BPF_DIV 0x30 0 dst = (src != 0) ? (dst / src) : 0
-BPF_SDIV 0x30 1 dst = (src != 0) ? (dst s/ src) : 0
-BPF_OR 0x40 0 dst \|= src
-BPF_AND 0x50 0 dst &= src
-BPF_LSH 0x60 0 dst <<= (src & mask)
-BPF_RSH 0x70 0 dst >>= (src & mask)
-BPF_NEG 0x80 0 dst = -dst
-BPF_MOD 0x90 0 dst = (src != 0) ? (dst % src) : dst
-BPF_SMOD 0x90 1 dst = (src != 0) ? (dst s% src) : dst
-BPF_XOR 0xa0 0 dst ^= src
-BPF_MOV 0xb0 0 dst = src
-BPF_MOVSX 0xb0 8/16/32 dst = (s8,s16,s32)src
-BPF_ARSH 0xc0 0 :term:`sign extending<Sign Extend>` dst >>= (src & mask)
-BPF_END 0xd0 0 byte swap operations (see `Byte swap instructions`_ below)
-========= ===== ======= ==========================================================
+===== ===== ======= ==========================================================
+name code offset description
+===== ===== ======= ==========================================================
+ADD 0x0 0 dst += src
+SUB 0x1 0 dst -= src
+MUL 0x2 0 dst \*= src
+DIV 0x3 0 dst = (src != 0) ? (dst / src) : 0
+SDIV 0x3 1 dst = (src != 0) ? (dst s/ src) : 0
+OR 0x4 0 dst \|= src
+AND 0x5 0 dst &= src
+LSH 0x6 0 dst <<= (src & mask)
+RSH 0x7 0 dst >>= (src & mask)
+NEG 0x8 0 dst = -dst
+MOD 0x9 0 dst = (src != 0) ? (dst % src) : dst
+SMOD 0x9 1 dst = (src != 0) ? (dst s% src) : dst
+XOR 0xa 0 dst ^= src
+MOV 0xb 0 dst = src
+MOVSX 0xb 8/16/32 dst = (s8,s16,s32)src
+ARSH 0xc 0 :term:`sign extending<Sign Extend>` dst >>= (src & mask)
+END 0xd 0 byte swap operations (see `Byte swap instructions`_ below)
+===== ===== ======= ==========================================================
Underflow and overflow are allowed during arithmetic operations, meaning
the 64-bit or 32-bit value will wrap. If BPF program execution would
result in division by zero, the destination register is instead set to zero.
-If execution would result in modulo by zero, for ``BPF_ALU64`` the value of
-the destination register is unchanged whereas for ``BPF_ALU`` the upper
+If execution would result in modulo by zero, for ``ALU64`` the value of
+the destination register is unchanged whereas for ``ALU`` the upper
32 bits of the destination register are zeroed.
-``BPF_ADD | BPF_X | BPF_ALU`` means::
+``{ADD, X, ALU}``, where 'code' = ``ADD``, 'source' = ``X``, and 'class' = ``ALU``, means::
dst = (u32) ((u32) dst + (u32) src)
where '(u32)' indicates that the upper 32 bits are zeroed.
-``BPF_ADD | BPF_X | BPF_ALU64`` means::
+``{ADD, X, ALU64}`` means::
dst = dst + src
-``BPF_XOR | BPF_K | BPF_ALU`` means::
+``{XOR, K, ALU}`` means::
- dst = (u32) dst ^ (u32) imm32
+ dst = (u32) dst ^ (u32) imm
-``BPF_XOR | BPF_K | BPF_ALU64`` means::
+``{XOR, K, ALU64}`` means::
- dst = dst ^ imm32
+ dst = dst ^ imm
Note that most instructions have instruction offset of 0. Only three instructions
-(``BPF_SDIV``, ``BPF_SMOD``, ``BPF_MOVSX``) have a non-zero offset.
+(``SDIV``, ``SMOD``, ``MOVSX``) have a non-zero offset.
+Division, multiplication, and modulo operations for ``ALU`` are part
+of the "divmul32" conformance group, and division, multiplication, and
+modulo operations for ``ALU64`` are part of the "divmul64" conformance
+group.
The division and modulo operations support both unsigned and signed flavors.
-For unsigned operations (``BPF_DIV`` and ``BPF_MOD``), for ``BPF_ALU``,
-'imm' is interpreted as a 32-bit unsigned value. For ``BPF_ALU64``,
+For unsigned operations (``DIV`` and ``MOD``), for ``ALU``,
+'imm' is interpreted as a 32-bit unsigned value. For ``ALU64``,
'imm' is first :term:`sign extended<Sign Extend>` from 32 to 64 bits, and then
interpreted as a 64-bit unsigned value.
-For signed operations (``BPF_SDIV`` and ``BPF_SMOD``), for ``BPF_ALU``,
-'imm' is interpreted as a 32-bit signed value. For ``BPF_ALU64``, 'imm'
+For signed operations (``SDIV`` and ``SMOD``), for ``ALU``,
+'imm' is interpreted as a 32-bit signed value. For ``ALU64``, 'imm'
is first :term:`sign extended<Sign Extend>` from 32 to 64 bits, and then
interpreted as a 64-bit signed value.
@@ -291,11 +369,15 @@ etc. This specification requires that signed modulo use truncated division
a % n = a - n * trunc(a / n)
-The ``BPF_MOVSX`` instruction does a move operation with sign extension.
-``BPF_ALU | BPF_MOVSX`` :term:`sign extends<Sign Extend>` 8-bit and 16-bit operands into 32
+The ``MOVSX`` instruction does a move operation with sign extension.
+``{MOVSX, X, ALU}`` :term:`sign extends<Sign Extend>` 8-bit and 16-bit operands into 32
bit operands, and zeroes the remaining upper 32 bits.
-``BPF_ALU64 | BPF_MOVSX`` :term:`sign extends<Sign Extend>` 8-bit, 16-bit, and 32-bit
-operands into 64 bit operands.
+``{MOVSX, X, ALU64}`` :term:`sign extends<Sign Extend>` 8-bit, 16-bit, and 32-bit
+operands into 64 bit operands. Unlike other arithmetic instructions,
+``MOVSX`` is only defined for register source operands (``X``).
+
+The ``NEG`` instruction is only defined when the source bit is clear
+(``K``).
Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31)
for 32-bit operations.
@@ -303,43 +385,45 @@ for 32-bit operations.
Byte swap instructions
----------------------
-The byte swap instructions use instruction classes of ``BPF_ALU`` and ``BPF_ALU64``
-and a 4-bit 'code' field of ``BPF_END``.
+The byte swap instructions use instruction classes of ``ALU`` and ``ALU64``
+and a 4-bit 'code' field of ``END``.
The byte swap instructions operate on the destination register
only and do not use a separate source register or immediate value.
-For ``BPF_ALU``, the 1-bit source operand field in the opcode is used to
+For ``ALU``, the 1-bit source operand field in the opcode is used to
select what byte order the operation converts from or to. For
-``BPF_ALU64``, the 1-bit source operand field in the opcode is reserved
+``ALU64``, the 1-bit source operand field in the opcode is reserved
and must be set to 0.
-========= ========= ===== =================================================
-class source value description
-========= ========= ===== =================================================
-BPF_ALU BPF_TO_LE 0x00 convert between host byte order and little endian
-BPF_ALU BPF_TO_BE 0x08 convert between host byte order and big endian
-BPF_ALU64 Reserved 0x00 do byte swap unconditionally
-========= ========= ===== =================================================
+===== ======== ===== =================================================
+class source value description
+===== ======== ===== =================================================
+ALU TO_LE 0 convert between host byte order and little endian
+ALU TO_BE 1 convert between host byte order and big endian
+ALU64 Reserved 0 do byte swap unconditionally
+===== ======== ===== =================================================
The 'imm' field encodes the width of the swap operations. The following widths
-are supported: 16, 32 and 64.
+are supported: 16, 32 and 64. Width 64 operations belong to the base64
+conformance group and other swap operations belong to the base32
+conformance group.
Examples:
-``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16/32/64 means::
+``{END, TO_LE, ALU}`` with imm = 16/32/64 means::
dst = htole16(dst)
dst = htole32(dst)
dst = htole64(dst)
-``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 16/32/64 means::
+``{END, TO_BE, ALU}`` with imm = 16/32/64 means::
dst = htobe16(dst)
dst = htobe32(dst)
dst = htobe64(dst)
-``BPF_ALU64 | BPF_TO_LE | BPF_END`` with imm = 16/32/64 means::
+``{END, TO_LE, ALU64}`` with imm = 16/32/64 means::
dst = bswap16(dst)
dst = bswap32(dst)
@@ -348,56 +432,61 @@ Examples:
Jump instructions
-----------------
-``BPF_JMP32`` uses 32-bit wide operands while ``BPF_JMP`` uses 64-bit wide operands for
-otherwise identical operations.
+``JMP32`` uses 32-bit wide operands and indicates the base32
+conformance group, while ``JMP`` uses 64-bit wide operands for
+otherwise identical operations, and indicates the base64 conformance
+group unless otherwise specified.
The 'code' field encodes the operation as below:
-======== ===== === =========================================== =========================================
-code value src description notes
-======== ===== === =========================================== =========================================
-BPF_JA 0x0 0x0 PC += offset BPF_JMP class
-BPF_JA 0x0 0x0 PC += imm BPF_JMP32 class
-BPF_JEQ 0x1 any PC += offset if dst == src
-BPF_JGT 0x2 any PC += offset if dst > src unsigned
-BPF_JGE 0x3 any PC += offset if dst >= src unsigned
-BPF_JSET 0x4 any PC += offset if dst & src
-BPF_JNE 0x5 any PC += offset if dst != src
-BPF_JSGT 0x6 any PC += offset if dst > src signed
-BPF_JSGE 0x7 any PC += offset if dst >= src signed
-BPF_CALL 0x8 0x0 call helper function by address see `Helper functions`_
-BPF_CALL 0x8 0x1 call PC += imm see `Program-local functions`_
-BPF_CALL 0x8 0x2 call helper function by BTF ID see `Helper functions`_
-BPF_EXIT 0x9 0x0 return BPF_JMP only
-BPF_JLT 0xa any PC += offset if dst < src unsigned
-BPF_JLE 0xb any PC += offset if dst <= src unsigned
-BPF_JSLT 0xc any PC += offset if dst < src signed
-BPF_JSLE 0xd any PC += offset if dst <= src signed
-======== ===== === =========================================== =========================================
-
-The BPF program needs to store the return value into register R0 before doing a
-``BPF_EXIT``.
+======== ===== ======= =============================== ===================================================
+code value src_reg description notes
+======== ===== ======= =============================== ===================================================
+JA 0x0 0x0 PC += offset {JA, K, JMP} only
+JA 0x0 0x0 PC += imm {JA, K, JMP32} only
+JEQ 0x1 any PC += offset if dst == src
+JGT 0x2 any PC += offset if dst > src unsigned
+JGE 0x3 any PC += offset if dst >= src unsigned
+JSET 0x4 any PC += offset if dst & src
+JNE 0x5 any PC += offset if dst != src
+JSGT 0x6 any PC += offset if dst > src signed
+JSGE 0x7 any PC += offset if dst >= src signed
+CALL 0x8 0x0 call helper function by address {CALL, K, JMP} only, see `Helper functions`_
+CALL 0x8 0x1 call PC += imm {CALL, K, JMP} only, see `Program-local functions`_
+CALL 0x8 0x2 call helper function by BTF ID {CALL, K, JMP} only, see `Helper functions`_
+EXIT 0x9 0x0 return {CALL, K, JMP} only
+JLT 0xa any PC += offset if dst < src unsigned
+JLE 0xb any PC += offset if dst <= src unsigned
+JSLT 0xc any PC += offset if dst < src signed
+JSLE 0xd any PC += offset if dst <= src signed
+======== ===== ======= =============================== ===================================================
+
+The BPF program needs to store the return value into register R0 before doing an
+``EXIT``.
Example:
-``BPF_JSGE | BPF_X | BPF_JMP32`` (0x7e) means::
+``{JSGE, X, JMP32}`` means::
if (s32)dst s>= (s32)src goto +offset
where 's>=' indicates a signed '>=' comparison.
-``BPF_JA | BPF_K | BPF_JMP32`` (0x06) means::
+``{JA, K, JMP32}`` means::
gotol +imm
where 'imm' means the branch offset comes from insn 'imm' field.
-Note that there are two flavors of ``BPF_JA`` instructions. The
-``BPF_JMP`` class permits a 16-bit jump offset specified by the 'offset'
-field, whereas the ``BPF_JMP32`` class permits a 32-bit jump offset
+Note that there are two flavors of ``JA`` instructions. The
+``JMP`` class permits a 16-bit jump offset specified by the 'offset'
+field, whereas the ``JMP32`` class permits a 32-bit jump offset
specified by the 'imm' field. A > 16-bit conditional jump may be
converted to a < 16-bit conditional jump plus a 32-bit unconditional
jump.
+All ``CALL`` and ``JA`` instructions belong to the
+base32 conformance group.
+
Helper functions
~~~~~~~~~~~~~~~~
@@ -416,78 +505,83 @@ Program-local functions
~~~~~~~~~~~~~~~~~~~~~~~
Program-local functions are functions exposed by the same BPF program as the
caller, and are referenced by offset from the call instruction, similar to
-``BPF_JA``. The offset is encoded in the imm field of the call instruction.
-A ``BPF_EXIT`` within the program-local function will return to the caller.
+``JA``. The offset is encoded in the imm field of the call instruction.
+A ``EXIT`` within the program-local function will return to the caller.
Load and store instructions
===========================
-For load and store instructions (``BPF_LD``, ``BPF_LDX``, ``BPF_ST``, and ``BPF_STX``), the
-8-bit 'opcode' field is divided as:
-
-============ ====== =================
-3 bits (MSB) 2 bits 3 bits (LSB)
-============ ====== =================
-mode size instruction class
-============ ====== =================
-
-The mode modifier is one of:
-
- ============= ===== ==================================== =============
- mode modifier value description reference
- ============= ===== ==================================== =============
- BPF_IMM 0x00 64-bit immediate instructions `64-bit immediate instructions`_
- BPF_ABS 0x20 legacy BPF packet access (absolute) `Legacy BPF Packet access instructions`_
- BPF_IND 0x40 legacy BPF packet access (indirect) `Legacy BPF Packet access instructions`_
- BPF_MEM 0x60 regular load and store operations `Regular load and store operations`_
- BPF_MEMSX 0x80 sign-extension load operations `Sign-extension load operations`_
- BPF_ATOMIC 0xc0 atomic operations `Atomic operations`_
- ============= ===== ==================================== =============
-
-The size modifier is one of:
-
- ============= ===== =====================
- size modifier value description
- ============= ===== =====================
- BPF_W 0x00 word (4 bytes)
- BPF_H 0x08 half word (2 bytes)
- BPF_B 0x10 byte
- BPF_DW 0x18 double word (8 bytes)
- ============= ===== =====================
+For load and store instructions (``LD``, ``LDX``, ``ST``, and ``STX``), the
+8-bit 'opcode' field is divided as::
+
+ +-+-+-+-+-+-+-+-+
+ |mode |sz |class|
+ +-+-+-+-+-+-+-+-+
+
+**mode**
+ The mode modifier is one of:
+
+ ============= ===== ==================================== =============
+ mode modifier value description reference
+ ============= ===== ==================================== =============
+ IMM 0 64-bit immediate instructions `64-bit immediate instructions`_
+ ABS 1 legacy BPF packet access (absolute) `Legacy BPF Packet access instructions`_
+ IND 2 legacy BPF packet access (indirect) `Legacy BPF Packet access instructions`_
+ MEM 3 regular load and store operations `Regular load and store operations`_
+ MEMSX 4 sign-extension load operations `Sign-extension load operations`_
+ ATOMIC 6 atomic operations `Atomic operations`_
+ ============= ===== ==================================== =============
+
+**sz (size)**
+ The size modifier is one of:
+
+ ==== ===== =====================
+ size value description
+ ==== ===== =====================
+ W 0 word (4 bytes)
+ H 1 half word (2 bytes)
+ B 2 byte
+ DW 3 double word (8 bytes)
+ ==== ===== =====================
+
+ Instructions using ``DW`` belong to the base64 conformance group.
+
+**class**
+ The instruction class (see `Instruction classes`_)
Regular load and store operations
---------------------------------
-The ``BPF_MEM`` mode modifier is used to encode regular load and store
+The ``MEM`` mode modifier is used to encode regular load and store
instructions that transfer data between a register and memory.
-``BPF_MEM | <size> | BPF_STX`` means::
+``{MEM, <size>, STX}`` means::
*(size *) (dst + offset) = src
-``BPF_MEM | <size> | BPF_ST`` means::
+``{MEM, <size>, ST}`` means::
- *(size *) (dst + offset) = imm32
+ *(size *) (dst + offset) = imm
-``BPF_MEM | <size> | BPF_LDX`` means::
+``{MEM, <size>, LDX}`` means::
dst = *(unsigned size *) (src + offset)
-Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW`` and
-'unsigned size' is one of u8, u16, u32 or u64.
+Where '<size>' is one of: ``B``, ``H``, ``W``, or ``DW``, and
+'unsigned size' is one of: u8, u16, u32, or u64.
Sign-extension load operations
------------------------------
-The ``BPF_MEMSX`` mode modifier is used to encode :term:`sign-extension<Sign Extend>` load
+The ``MEMSX`` mode modifier is used to encode :term:`sign-extension<Sign Extend>` load
instructions that transfer data between a register and memory.
-``BPF_MEMSX | <size> | BPF_LDX`` means::
+``{MEMSX, <size>, LDX}`` means::
dst = *(signed size *) (src + offset)
-Where size is one of: ``BPF_B``, ``BPF_H`` or ``BPF_W``, and
-'signed size' is one of s8, s16 or s32.
+Where size is one of: ``B``, ``H``, or ``W``, and
+'signed size' is one of: s8, s16, or s32.
Atomic operations
-----------------
@@ -497,10 +591,12 @@ interrupted or corrupted by other access to the same memory region
by other BPF programs or means outside of this specification.
All atomic operations supported by BPF are encoded as store operations
-that use the ``BPF_ATOMIC`` mode modifier as follows:
+that use the ``ATOMIC`` mode modifier as follows:
-* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
-* ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations
+* ``{ATOMIC, W, STX}`` for 32-bit operations, which are
+ part of the "atomic32" conformance group.
+* ``{ATOMIC, DW, STX}`` for 64-bit operations, which are
+ part of the "atomic64" conformance group.
* 8-bit and 16-bit wide atomic operations are not supported.
The 'imm' field is used to encode the actual atomic operation.
@@ -510,18 +606,18 @@ arithmetic operations in the 'imm' field to encode the atomic operation:
======== ===== ===========
imm value description
======== ===== ===========
-BPF_ADD 0x00 atomic add
-BPF_OR 0x40 atomic or
-BPF_AND 0x50 atomic and
-BPF_XOR 0xa0 atomic xor
+ADD 0x00 atomic add
+OR 0x40 atomic or
+AND 0x50 atomic and
+XOR 0xa0 atomic xor
======== ===== ===========
-``BPF_ATOMIC | BPF_W | BPF_STX`` with 'imm' = BPF_ADD means::
+``{ATOMIC, W, STX}`` with 'imm' = ADD means::
*(u32 *)(dst + offset) += src
-``BPF_ATOMIC | BPF_DW | BPF_STX`` with 'imm' = BPF ADD means::
+``{ATOMIC, DW, STX}`` with 'imm' = ADD means::
*(u64 *)(dst + offset) += src
@@ -531,20 +627,20 @@ two complex atomic operations:
=========== ================ ===========================
imm value description
=========== ================ ===========================
-BPF_FETCH 0x01 modifier: return old value
-BPF_XCHG 0xe0 | BPF_FETCH atomic exchange
-BPF_CMPXCHG 0xf0 | BPF_FETCH atomic compare and exchange
+FETCH 0x01 modifier: return old value
+XCHG 0xe0 | FETCH atomic exchange
+CMPXCHG 0xf0 | FETCH atomic compare and exchange
=========== ================ ===========================
-The ``BPF_FETCH`` modifier is optional for simple atomic operations, and
-always set for the complex atomic operations. If the ``BPF_FETCH`` flag
+The ``FETCH`` modifier is optional for simple atomic operations, and
+always set for the complex atomic operations. If the ``FETCH`` flag
is set, then the operation also overwrites ``src`` with the value that
was in memory before it was modified.
-The ``BPF_XCHG`` operation atomically exchanges ``src`` with the value
+The ``XCHG`` operation atomically exchanges ``src`` with the value
addressed by ``dst + offset``.
-The ``BPF_CMPXCHG`` operation atomically compares the value addressed by
+The ``CMPXCHG`` operation atomically compares the value addressed by
``dst + offset`` with ``R0``. If they match, the value addressed by
``dst + offset`` is replaced with ``src``. In either case, the
value that was at ``dst + offset`` before the operation is zero-extended
@@ -553,25 +649,25 @@ and loaded back to ``R0``.
64-bit immediate instructions
-----------------------------
-Instructions with the ``BPF_IMM`` 'mode' modifier use the wide instruction
-encoding defined in `Instruction encoding`_, and use the 'src' field of the
+Instructions with the ``IMM`` 'mode' modifier use the wide instruction
+encoding defined in `Instruction encoding`_, and use the 'src_reg' field of the
basic instruction to hold an opcode subtype.
-The following table defines a set of ``BPF_IMM | BPF_DW | BPF_LD`` instructions
-with opcode subtypes in the 'src' field, using new terms such as "map"
+The following table defines a set of ``{IMM, DW, LD}`` instructions
+with opcode subtypes in the 'src_reg' field, using new terms such as "map"
defined further below:
-========================= ====== === ========================================= =========== ==============
-opcode construction opcode src pseudocode imm type dst type
-========================= ====== === ========================================= =========== ==============
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x0 dst = imm64 integer integer
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x1 dst = map_by_fd(imm) map fd map
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x2 dst = map_val(map_by_fd(imm)) + next_imm map fd data pointer
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x4 dst = code_addr(imm) integer code pointer
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x5 dst = map_by_idx(imm) map index map
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x6 dst = map_val(map_by_idx(imm)) + next_imm map index data pointer
-========================= ====== === ========================================= =========== ==============
+======= ========================================= =========== ==============
+src_reg pseudocode imm type dst type
+======= ========================================= =========== ==============
+0x0 dst = (next_imm << 32) | imm integer integer
+0x1 dst = map_by_fd(imm) map fd map
+0x2 dst = map_val(map_by_fd(imm)) + next_imm map fd data pointer
+0x3 dst = var_addr(imm) variable id data pointer
+0x4 dst = code_addr(imm) integer code pointer
+0x5 dst = map_by_idx(imm) map index map
+0x6 dst = map_val(map_by_idx(imm)) + next_imm map index data pointer
+======= ========================================= =========== ==============
where
@@ -609,5 +705,9 @@ Legacy BPF Packet access instructions
-------------------------------------
BPF previously introduced special instructions for access to packet data that were
-carried over from classic BPF. However, these instructions are
-deprecated and should no longer be used.
+carried over from classic BPF. These instructions used an instruction
+class of ``LD``, a size modifier of ``W``, ``H``, or ``B``, and a
+mode modifier of ``ABS`` or ``IND``. The 'dst_reg' and 'offset' fields were
+set to zero, and 'src_reg' was set to zero for ``ABS``. However, these
+instructions are deprecated and should no longer be used. All legacy packet
+access instructions belong to the "packet" conformance group.
diff --git a/Documentation/bpf/verifier.rst b/Documentation/bpf/verifier.rst
index f0ec19db301c..356894399fbf 100644
--- a/Documentation/bpf/verifier.rst
+++ b/Documentation/bpf/verifier.rst
@@ -562,7 +562,7 @@ works::
* ``checkpoint[0].r1`` is marked as read;
* At instruction #5 exit is reached and ``checkpoint[0]`` can now be processed
- by ``clean_live_states()``. After this processing ``checkpoint[0].r0`` has a
+ by ``clean_live_states()``. After this processing ``checkpoint[0].r1`` has a
read mark and all other registers and stack slots are marked as ``NOT_INIT``
or ``STACK_INVALID``
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 5830b01c5642..d148f3e8dd57 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -346,9 +346,9 @@ sys.stderr.write("Using %s theme\n" % html_theme)
html_static_path = ['sphinx-static']
# If true, Docutils "smart quotes" will be used to convert quotes and dashes
-# to typographically correct entities. This will convert "--" to "—",
-# which is not always what we want, so disable it.
-smartquotes = False
+# to typographically correct entities. However, conversion of "--" to "—"
+# is not always what we want, so enable only quotes.
+smartquotes_action = 'q'
# Custom sidebar templates, maps document names to template names.
# Note that the RTD theme ignores this
@@ -388,6 +388,12 @@ latex_elements = {
verbatimhintsturnover=false,
''',
+ #
+ # Some of our authors are fond of deep nesting; tell latex to
+ # cope.
+ #
+ 'maxlistdepth': '10',
+
# For CJK One-half spacing, need to be in front of hyperref
'extrapackages': r'\usepackage{setspace}',
diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst
index 3599cf9267b4..ed73c612174d 100644
--- a/Documentation/core-api/workqueue.rst
+++ b/Documentation/core-api/workqueue.rst
@@ -77,10 +77,12 @@ wants a function to be executed asynchronously it has to set up a work
item pointing to that function and queue that work item on a
workqueue.
-Special purpose threads, called worker threads, execute the functions
-off of the queue, one after the other. If no work is queued, the
-worker threads become idle. These worker threads are managed in so
-called worker-pools.
+A work item can be executed in either a thread or the BH (softirq) context.
+
+For threaded workqueues, special purpose threads, called [k]workers, execute
+the functions off of the queue, one after the other. If no work is queued,
+the worker threads become idle. These worker threads are managed in
+worker-pools.
The cmwq design differentiates between the user-facing workqueues that
subsystems and drivers queue work items on and the backend mechanism
@@ -91,6 +93,12 @@ for high priority ones, for each possible CPU and some extra
worker-pools to serve work items queued on unbound workqueues - the
number of these backing pools is dynamic.
+BH workqueues use the same framework. However, as there can only be one
+concurrent execution context, there's no need to worry about concurrency.
+Each per-CPU BH worker pool contains only one pseudo worker which represents
+the BH execution context. A BH workqueue can be considered a convenience
+interface to softirq.
+
Subsystems and drivers can create and queue work items through special
workqueue API functions as they see fit. They can influence some
aspects of the way the work items are executed by setting flags on the
@@ -106,7 +114,7 @@ unless specifically overridden, a work item of a bound workqueue will
be queued on the worklist of either normal or highpri worker-pool that
is associated to the CPU the issuer is running on.
-For any worker pool implementation, managing the concurrency level
+For any thread pool implementation, managing the concurrency level
(how many execution contexts are active) is an important issue. cmwq
tries to keep the concurrency at a minimal but sufficient level.
Minimal to save resources and sufficient in that the system is used at
@@ -164,6 +172,17 @@ resources, scheduled and executed.
``flags``
---------
+``WQ_BH``
+ BH workqueues can be considered a convenience interface to softirq. BH
+ workqueues are always per-CPU and all BH work items are executed in the
+ queueing CPU's softirq context in the queueing order.
+
+ All BH workqueues must have 0 ``max_active`` and ``WQ_HIGHPRI`` is the
+ only allowed additional flag.
+
+ BH work items cannot sleep. All other features such as delayed queueing,
+ flushing and canceling are supported.
+
``WQ_UNBOUND``
Work items queued to an unbound wq are served by the special
worker-pools which host workers which are not bound to any
@@ -237,15 +256,11 @@ may queue at the same time. Unless there is a specific need for
throttling the number of active work items, specifying '0' is
recommended.
-Some users depend on the strict execution ordering of ST wq. The
-combination of ``@max_active`` of 1 and ``WQ_UNBOUND`` used to
-achieve this behavior. Work items on such wq were always queued to the
-unbound worker-pools and only one work item could be active at any given
-time thus achieving the same ordering property as ST wq.
-
-In the current implementation the above configuration only guarantees
-ST behavior within a given NUMA node. Instead ``alloc_ordered_workqueue()`` should
-be used to achieve system-wide ST behavior.
+Some users depend on strict execution ordering where only one work item
+is in flight at any given time and the work items are processed in
+queueing order. While the combination of ``@max_active`` of 1 and
+``WQ_UNBOUND`` used to achieve this behavior, this is no longer the
+case. Use ``alloc_ordered_queue()`` instead.
Example Execution Scenarios
diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst
index c3389c6f3838..127968995847 100644
--- a/Documentation/dev-tools/checkpatch.rst
+++ b/Documentation/dev-tools/checkpatch.rst
@@ -168,7 +168,7 @@ Available options:
- --fix
- This is an EXPERIMENTAL feature. If correctable errors exists, a file
+ This is an EXPERIMENTAL feature. If correctable errors exist, a file
<inputfile>.EXPERIMENTAL-checkpatch-fixes is created which has the
automatically fixable errors corrected.
@@ -181,7 +181,7 @@ Available options:
- --ignore-perl-version
- Override checking of perl version. Runtime errors maybe encountered after
+ Override checking of perl version. Runtime errors may be encountered after
enabling this flag if the perl version does not meet the minimum specified.
- --codespell
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index 858c77fe7dc4..d56f298a9d7c 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -277,6 +277,27 @@ traces point to places in code that interacted with the object but that are not
directly present in the bad access stack trace. Currently, this includes
call_rcu() and workqueue queuing.
+CONFIG_KASAN_EXTRA_INFO
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Enabling CONFIG_KASAN_EXTRA_INFO allows KASAN to record and report more
+information. The extra information currently supported is the CPU number and
+timestamp at allocation and free. More information can help find the cause of
+the bug and correlate the error with other system events, at the cost of using
+extra memory to record more information (more cost details in the help text of
+CONFIG_KASAN_EXTRA_INFO).
+
+Here is the report with CONFIG_KASAN_EXTRA_INFO enabled (only the
+different parts are shown)::
+
+ ==================================================================
+ ...
+ Allocated by task 134 on cpu 5 at 229.133855s:
+ ...
+ Freed by task 136 on cpu 3 at 230.199335s:
+ ...
+ ==================================================================
+
Implementation details
----------------------
diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index ab376b316c36..ff10dc6eef5d 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -245,6 +245,10 @@ Contributing new tests (details)
TEST_PROGS, TEST_GEN_PROGS mean it is the executable tested by
default.
+ TEST_GEN_MODS_DIR should be used by tests that require modules to be built
+ before the test starts. The variable will contain the name of the directory
+ containing the modules.
+
TEST_CUSTOM_PROGS should be used by tests that require custom build
rules and prevent common build rule use.
@@ -255,9 +259,21 @@ Contributing new tests (details)
TEST_PROGS_EXTENDED, TEST_GEN_PROGS_EXTENDED mean it is the
executable which is not tested by default.
+
TEST_FILES, TEST_GEN_FILES mean it is the file which is used by
test.
+ TEST_INCLUDES is similar to TEST_FILES, it lists files which should be
+ included when exporting or installing the tests, with the following
+ differences:
+
+ * symlinks to files in other directories are preserved
+ * the part of paths below tools/testing/selftests/ is preserved when
+ copying the files to the output directory
+
+ TEST_INCLUDES is meant to list dependencies located in other directories of
+ the selftests hierarchy.
+
* First use the headers inside the kernel source and/or git repo, and then the
system headers. Headers for the kernel release as opposed to headers
installed by the distro on the system should be the primary focus to be able
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index a9efab50eed8..22955d56b379 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -671,8 +671,23 @@ Testing Static Functions
------------------------
If we do not want to expose functions or variables for testing, one option is to
-conditionally ``#include`` the test file at the end of your .c file. For
-example:
+conditionally export the used symbol. For example:
+
+.. code-block:: c
+
+ /* In my_file.c */
+
+ VISIBLE_IF_KUNIT int do_interesting_thing();
+ EXPORT_SYMBOL_IF_KUNIT(do_interesting_thing);
+
+ /* In my_file.h */
+
+ #if IS_ENABLED(CONFIG_KUNIT)
+ int do_interesting_thing(void);
+ #endif
+
+Alternatively, you could conditionally ``#include`` the test file at the end of
+your .c file. For example:
.. code-block:: c
diff --git a/Documentation/dev-tools/ubsan.rst b/Documentation/dev-tools/ubsan.rst
index 2de7c63415da..e3591f8e9d5b 100644
--- a/Documentation/dev-tools/ubsan.rst
+++ b/Documentation/dev-tools/ubsan.rst
@@ -49,34 +49,22 @@ Report example
Usage
-----
-To enable UBSAN configure kernel with::
+To enable UBSAN, configure the kernel with::
- CONFIG_UBSAN=y
+ CONFIG_UBSAN=y
-and to check the entire kernel::
-
- CONFIG_UBSAN_SANITIZE_ALL=y
-
-To enable instrumentation for specific files or directories, add a line
-similar to the following to the respective kernel Makefile:
-
-- For a single file (e.g. main.o)::
-
- UBSAN_SANITIZE_main.o := y
-
-- For all files in one directory::
-
- UBSAN_SANITIZE := y
-
-To exclude files from being instrumented even if
-``CONFIG_UBSAN_SANITIZE_ALL=y``, use::
+To exclude files from being instrumented use::
UBSAN_SANITIZE_main.o := n
-and::
+and to exclude all targets in one directory use::
UBSAN_SANITIZE := n
+When disabled for all targets, specific files can be enabled using::
+
+ UBSAN_SANITIZE_main.o := y
+
Detection of unaligned accesses controlled through the separate option -
CONFIG_UBSAN_ALIGNMENT. It's off by default on architectures that support
unaligned accesses (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y). One could
diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile
index 2323fd5b7cda..129cf698fa8a 100644
--- a/Documentation/devicetree/bindings/Makefile
+++ b/Documentation/devicetree/bindings/Makefile
@@ -28,7 +28,10 @@ $(obj)/%.example.dts: $(src)/%.yaml check_dtschema_version FORCE
find_all_cmd = find $(srctree)/$(src) \( -name '*.yaml' ! \
-name 'processed-schema*' \)
-find_cmd = $(find_all_cmd) | sed 's|^$(srctree)/$(src)/||' | grep -F -e "$(subst :," -e ",$(DT_SCHEMA_FILES))" | sed 's|^|$(srctree)/$(src)/|'
+find_cmd = $(find_all_cmd) | \
+ sed 's|^$(srctree)/||' | \
+ grep -F -e "$(subst :," -e ",$(DT_SCHEMA_FILES))" | \
+ sed 's|^|$(srctree)/|'
CHK_DT_DOCS := $(shell $(find_cmd))
quiet_cmd_yamllint = LINT $(src)
diff --git a/Documentation/devicetree/bindings/arm/amlogic.yaml b/Documentation/devicetree/bindings/arm/amlogic.yaml
index caab7ceeda45..949537cea6be 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.yaml
+++ b/Documentation/devicetree/bindings/arm/amlogic.yaml
@@ -7,19 +7,11 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Amlogic SoC based Platforms
maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+ - Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+ - Jerome Brunet <jbrunet@baylibre.com>
- Kevin Hilman <khilman@baylibre.com>
-description: |+
- Work in progress statement:
-
- Device tree files and bindings applying to Amlogic SoCs and boards are
- considered "unstable". Any Amlogic device tree binding may change at
- any time. Be sure to use a device tree binary and a kernel image
- generated from the same source tree.
-
- Please refer to Documentation/devicetree/bindings/ABI.rst for a definition of a
- stable binding/ABI.
-
properties:
$nodename:
const: '/'
@@ -146,6 +138,7 @@ properties:
- enum:
- amediatech,x96-max
- amlogic,u200
+ - freebox,fbx8am
- radxa,zero
- seirobotics,sei510
- const: amlogic,g12a
diff --git a/Documentation/devicetree/bindings/arm/arm,realview.yaml b/Documentation/devicetree/bindings/arm/arm,realview.yaml
index d1bdee98f9af..3c5f1688dbd7 100644
--- a/Documentation/devicetree/bindings/arm/arm,realview.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,realview.yaml
@@ -10,9 +10,9 @@ maintainers:
- Linus Walleij <linus.walleij@linaro.org>
description: |+
- The ARM RealView series of reference designs were built to explore the ARM
- 11, Cortex A-8 and Cortex A-9 CPUs. This included new features compared to
- the earlier CPUs such as TrustZone and multicore (MPCore).
+ The ARM RealView series of reference designs were built to explore the Arm11,
+ Cortex-A8, and Cortex-A9 CPUs. This included new features compared to the
+ earlier CPUs such as TrustZone and multicore (MPCore).
properties:
$nodename:
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.yaml b/Documentation/devicetree/bindings/arm/atmel-at91.yaml
index 89d75fbb1de4..82f37328cc69 100644
--- a/Documentation/devicetree/bindings/arm/atmel-at91.yaml
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.yaml
@@ -179,6 +179,12 @@ properties:
- const: microchip,sama7g5
- const: microchip,sama7
+ - description: Microchip SAMA7G54 Curiosity Board
+ items:
+ - const: microchip,sama7g54-curiosity
+ - const: microchip,sama7g5
+ - const: microchip,sama7
+
- description: Microchip LAN9662 Evaluation Boards.
items:
- enum:
diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index 228dcc5c7d6f..0027201e19f8 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -384,7 +384,8 @@ properties:
- toradex,apalis_imx6q-ixora # Apalis iMX6Q/D Module on Ixora Carrier Board
- toradex,apalis_imx6q-ixora-v1.1 # Apalis iMX6Q/D Module on Ixora V1.1 Carrier Board
- toradex,apalis_imx6q-ixora-v1.2 # Apalis iMX6Q/D Module on Ixora V1.2 Carrier Board
- - toradex,apalis_imx6q-eval # Apalis iMX6Q/D Module on Apalis Evaluation Board
+ - toradex,apalis_imx6q-eval # Apalis iMX6Q/D Module on Apalis Evaluation Board v1.0/v1.1
+ - toradex,apalis_imx6q-eval-v1.2 # Apalis iMX6Q/D Module on Apalis Evaluation Board v1.2
- const: toradex,apalis_imx6q
- const: fsl,imx6q
@@ -469,6 +470,7 @@ properties:
- prt,prtvt7 # Protonic VT7 board
- rex,imx6dl-rex-basic # Rex Basic i.MX6 Dual Lite Board
- riot,imx6s-riotboard # RIoTboard i.MX6S
+ - sielaff,imx6dl-board # Sielaff i.MX6 Solo Board
- skov,imx6dl-skov-revc-lt2 # SKOV IMX6 CPU SoloCore lt2
- skov,imx6dl-skov-revc-lt6 # SKOV IMX6 CPU SoloCore lt6
- solidrun,cubox-i/dl # SolidRun Cubox-i Solo/DualLite
@@ -708,6 +710,7 @@ properties:
- toradex,colibri-imx6ull # Colibri iMX6ULL Modules
- toradex,colibri-imx6ull-emmc # Colibri iMX6ULL 1GB (eMMC) Module
- toradex,colibri-imx6ull-wifi # Colibri iMX6ULL Wi-Fi / BT Modules
+ - uni-t,uti260b # UNI-T UTi260B Thermal Camera
- const: fsl,imx6ull
- description: i.MX6ULL Armadeus Systems OPOS6ULDev Board
@@ -1026,7 +1029,7 @@ properties:
items:
- enum:
- dimonoff,gateway-evk # i.MX8MN Dimonoff Gateway EVK Board
- - rve,rve-gateway # i.MX8MN RVE Gateway Board
+ - rve,gateway # i.MX8MN RVE Gateway Board
- variscite,var-som-mx8mn-symphony
- const: variscite,var-som-mx8mn
- const: fsl,imx8mn
@@ -1194,7 +1197,8 @@ properties:
- description: i.MX8QM Boards with Toradex Apalis iMX8 Modules
items:
- enum:
- - toradex,apalis-imx8-eval # Apalis iMX8 Module on Apalis Evaluation Board
+ - toradex,apalis-imx8-eval # Apalis iMX8 Module on Apalis Evaluation V1.0/V1.1 Board
+ - toradex,apalis-imx8-eval-v1.2 # Apalis iMX8 Module on Apalis Evaluation V1.2 Board
- toradex,apalis-imx8-ixora-v1.1 # Apalis iMX8 Module on Ixora V1.1 Carrier Board
- const: toradex,apalis-imx8
- const: fsl,imx8qm
@@ -1202,7 +1206,8 @@ properties:
- description: i.MX8QM Boards with Toradex Apalis iMX8 V1.1 Modules
items:
- enum:
- - toradex,apalis-imx8-v1.1-eval # Apalis iMX8 V1.1 Module on Apalis Eval. Board
+ - toradex,apalis-imx8-v1.1-eval # Apalis iMX8 V1.1 Module on Apalis Eval. V1.0/V1.1 Board
+ - toradex,apalis-imx8-v1.1-eval-v1.2 # Apalis iMX8 V1.1 Module on Apalis Eval. V1.2 Board
- toradex,apalis-imx8-v1.1-ixora-v1.1 # Apalis iMX8 V1.1 Module on Ixora V1.1 C. Board
- toradex,apalis-imx8-v1.1-ixora-v1.2 # Apalis iMX8 V1.1 Module on Ixora V1.2 C. Board
- const: toradex,apalis-imx8-v1.1
@@ -1232,6 +1237,22 @@ properties:
- const: toradex,colibri-imx8x
- const: fsl,imx8qxp
+ - description:
+ TQMa8Xx is a series of SOM featuring NXP i.MX8X system-on-chip
+ variants. It is designed to be clicked on different carrier boards
+ MBa8Xx is the starterkit
+ oneOf:
+ - items:
+ - enum:
+ - tq,imx8dxp-tqma8xdp-mba8xx # TQ-Systems GmbH TQMa8XDP SOM on MBa8Xx
+ - const: tq,imx8dxp-tqma8xdp # TQ-Systems GmbH TQMa8XDP SOM (with i.MX8DXP)
+ - const: fsl,imx8dxp
+ - items:
+ - enum:
+ - tq,imx8qxp-tqma8xqp-mba8xx # TQ-Systems GmbH TQMa8XQP SOM on MBa8Xx
+ - const: tq,imx8qxp-tqma8xqp # TQ-Systems GmbH TQMa8XQP SOM (with i.MX8QXP)
+ - const: fsl,imx8qxp
+
- description: i.MX8ULP based Boards
items:
- enum:
@@ -1275,6 +1296,18 @@ properties:
- const: tq,imx93-tqma9352 # TQ-Systems GmbH i.MX93 TQMa93xxCA/LA SOM
- const: fsl,imx93
+ - description: PHYTEC phyCORE-i.MX93 SoM based boards
+ items:
+ - const: phytec,imx93-phyboard-segin # phyBOARD-Segin with i.MX93
+ - const: phytec,imx93-phycore-som # phyCORE-i.MX93 SoM
+ - const: fsl,imx93
+
+ - description: Variscite VAR-SOM-MX93 based boards
+ items:
+ - const: variscite,var-som-mx93-symphony
+ - const: variscite,var-som-mx93
+ - const: fsl,imx93
+
- description:
Freescale Vybrid Platform Device Tree Bindings
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-38x.txt b/Documentation/devicetree/bindings/arm/marvell/armada-38x.txt
deleted file mode 100644
index 202953f1887e..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/armada-38x.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Marvell Armada 38x Platforms Device Tree Bindings
--------------------------------------------------
-
-Boards with a SoC of the Marvell Armada 38x family shall have the
-following property:
-
-Required root node property:
-
- - compatible: must contain "marvell,armada380"
-
-In addition, boards using the Marvell Armada 385 SoC shall have the
-following property before the previous one:
-
-Required root node property:
-
-compatible: must contain "marvell,armada385"
-
-In addition, boards using the Marvell Armada 388 SoC shall have the
-following property before the previous one:
-
-Required root node property:
-
-compatible: must contain "marvell,armada388"
-
-Example:
-
-compatible = "marvell,a385-rd", "marvell,armada385", "marvell,armada380";
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-38x.yaml b/Documentation/devicetree/bindings/arm/marvell/armada-38x.yaml
new file mode 100644
index 000000000000..cdf805b5db95
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/armada-38x.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/armada-38x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 38x Platforms
+
+maintainers:
+ - Gregory CLEMENT <gregory.clement@bootlin.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description:
+ Netgear Armada 380 GS110EM Managed Switch.
+ items:
+ - const: netgear,gs110emx
+ - const: marvell,armada380
+
+ - description:
+ Marvell Armada 385 Development Boards.
+ items:
+ - enum:
+ - marvell,a385-db-amc
+ - marvell,a385-db-ap
+ - const: marvell,armada385
+ - const: marvell,armada380
+
+ - description:
+ SolidRun Armada 385 based single-board computers.
+ items:
+ - enum:
+ - solidrun,clearfog-gtr-l8
+ - solidrun,clearfog-gtr-s4
+ - const: marvell,armada385
+ - const: marvell,armada380
+
+ - description:
+ Kobol Armada 388 based Helios-4 NAS.
+ items:
+ - const: kobol,helios4
+ - const: marvell,armada388
+ - const: marvell,armada385
+ - const: marvell,armada380
+
+ - description:
+ Marvell Armada 388 Development Boards.
+ items:
+ - enum:
+ - marvell,a388-gp
+ - const: marvell,armada388
+ - const: marvell,armada385
+ - const: marvell,armada380
+
+ - description:
+ SolidRun Armada 388 clearfog family single-board computers.
+ items:
+ - enum:
+ - solidrun,clearfog-base-a1
+ - solidrun,clearfog-pro-a1
+ - const: solidrun,clearfog-a1
+ - const: marvell,armada388
+ - const: marvell,armada385
+ - const: marvell,armada380
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/mediatek.yaml b/Documentation/devicetree/bindings/arm/mediatek.yaml
index 6f2f64ae76fc..09f9ffd3ff7b 100644
--- a/Documentation/devicetree/bindings/arm/mediatek.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek.yaml
@@ -17,6 +17,7 @@ properties:
const: '/'
compatible:
oneOf:
+ # Sort by SoC (last) compatible, then board compatible
- items:
- enum:
- mediatek,mt2701-evb
@@ -84,6 +85,11 @@ properties:
- const: mediatek,mt7629
- items:
- enum:
+ - xiaomi,ax3000t
+ - const: mediatek,mt7981b
+ - items:
+ - enum:
+ - acelink,ew-7886cax
- bananapi,bpi-r3
- mediatek,mt7986a-rfb
- const: mediatek,mt7986a
@@ -93,6 +99,10 @@ properties:
- const: mediatek,mt7986b
- items:
- enum:
+ - bananapi,bpi-r4
+ - const: mediatek,mt7988a
+ - items:
+ - enum:
- mediatek,mt8127-moose
- const: mediatek,mt8127
- items:
@@ -129,75 +139,10 @@ properties:
- enum:
- mediatek,mt8173-evb
- const: mediatek,mt8173
- - items:
- - enum:
- - mediatek,mt8183-evb
- - const: mediatek,mt8183
- - description: Google Hayato rev5
- items:
- - const: google,hayato-rev5-sku2
- - const: google,hayato-sku2
- - const: google,hayato
- - const: mediatek,mt8192
- - description: Google Hayato
- items:
- - const: google,hayato-rev1
- - const: google,hayato
- - const: mediatek,mt8192
- - description: Google Spherion rev4 (Acer Chromebook 514)
- items:
- - const: google,spherion-rev4
- - const: google,spherion
- - const: mediatek,mt8192
- - description: Google Spherion (Acer Chromebook 514)
- items:
- - const: google,spherion-rev3
- - const: google,spherion-rev2
- - const: google,spherion-rev1
- - const: google,spherion-rev0
- - const: google,spherion
- - const: mediatek,mt8192
- - description: Acer Tomato (Acer Chromebook Spin 513 CP513-2H)
- items:
- - enum:
- - google,tomato-rev2
- - google,tomato-rev1
- - const: google,tomato
- - const: mediatek,mt8195
- - description: Acer Tomato rev3 - 4 (Acer Chromebook Spin 513 CP513-2H)
- items:
- - const: google,tomato-rev4
- - const: google,tomato-rev3
- - const: google,tomato
- - const: mediatek,mt8195
- - items:
- - enum:
- - mediatek,mt8186-evb
- - const: mediatek,mt8186
- - items:
- - enum:
- - mediatek,mt8188-evb
- - const: mediatek,mt8188
- - items:
- - enum:
- - mediatek,mt8192-evb
- - const: mediatek,mt8192
- - items:
- - enum:
- - mediatek,mt8195-demo
- - mediatek,mt8195-evb
- - const: mediatek,mt8195
- description: Google Burnet (HP Chromebook x360 11MK G3 EE)
items:
- const: google,burnet
- const: mediatek,mt8183
- - description: Google Krane (Lenovo IdeaPad Duet, 10e,...)
- items:
- - enum:
- - google,krane-sku0
- - google,krane-sku176
- - const: google,krane
- - const: mediatek,mt8183
- description: Google Cozmo (Acer Chromebook 314)
items:
- const: google,cozmo
@@ -255,6 +200,13 @@ properties:
- google,kodama-sku32
- const: google,kodama
- const: mediatek,mt8183
+ - description: Google Krane (Lenovo IdeaPad Duet, 10e,...)
+ items:
+ - enum:
+ - google,krane-sku0
+ - google,krane-sku176
+ - const: google,krane
+ - const: mediatek,mt8183
- description: Google Makomo (Lenovo 100e Chromebook 2nd Gen MTK 2)
items:
- enum:
@@ -278,8 +230,123 @@ properties:
- const: mediatek,mt8183
- items:
- enum:
+ - mediatek,mt8183-evb
+ - const: mediatek,mt8183
+ - items:
+ - enum:
- mediatek,mt8183-pumpkin
- const: mediatek,mt8183
+ - description: Google Magneton (Lenovo IdeaPad Slim 3 Chromebook (14M868))
+ items:
+ - const: google,steelix-sku393219
+ - const: google,steelix-sku393216
+ - const: google,steelix
+ - const: mediatek,mt8186
+ - description: Google Magneton (Lenovo IdeaPad Slim 3 Chromebook (14M868))
+ items:
+ - const: google,steelix-sku393220
+ - const: google,steelix-sku393217
+ - const: google,steelix
+ - const: mediatek,mt8186
+ - description: Google Magneton (Lenovo IdeaPad Slim 3 Chromebook (14M868))
+ items:
+ - const: google,steelix-sku393221
+ - const: google,steelix-sku393218
+ - const: google,steelix
+ - const: mediatek,mt8186
+ - description: Google Rusty (Lenovo 100e Chromebook Gen 4)
+ items:
+ - const: google,steelix-sku196609
+ - const: google,steelix-sku196608
+ - const: google,steelix
+ - const: mediatek,mt8186
+ - description: Google Steelix (Lenovo 300e Yoga Chromebook Gen 4)
+ items:
+ - enum:
+ - google,steelix-sku131072
+ - google,steelix-sku131073
+ - const: google,steelix
+ - const: mediatek,mt8186
+ - description: Google Tentacruel (ASUS Chromebook CM14 Flip CM1402F)
+ items:
+ - const: google,tentacruel-sku262147
+ - const: google,tentacruel-sku262146
+ - const: google,tentacruel-sku262145
+ - const: google,tentacruel-sku262144
+ - const: google,tentacruel
+ - const: mediatek,mt8186
+ - description: Google Tentacruel (ASUS Chromebook CM14 Flip CM1402F)
+ items:
+ - const: google,tentacruel-sku262151
+ - const: google,tentacruel-sku262150
+ - const: google,tentacruel-sku262149
+ - const: google,tentacruel-sku262148
+ - const: google,tentacruel
+ - const: mediatek,mt8186
+ - description: Google Tentacool (ASUS Chromebook CM14 CM1402C)
+ items:
+ - const: google,tentacruel-sku327681
+ - const: google,tentacruel
+ - const: mediatek,mt8186
+ - description: Google Tentacool (ASUS Chromebook CM14 CM1402C)
+ items:
+ - const: google,tentacruel-sku327683
+ - const: google,tentacruel
+ - const: mediatek,mt8186
+ - items:
+ - enum:
+ - mediatek,mt8186-evb
+ - const: mediatek,mt8186
+ - items:
+ - enum:
+ - mediatek,mt8188-evb
+ - const: mediatek,mt8188
+ - description: Google Hayato
+ items:
+ - const: google,hayato-rev1
+ - const: google,hayato
+ - const: mediatek,mt8192
+ - description: Google Hayato rev5
+ items:
+ - const: google,hayato-rev5-sku2
+ - const: google,hayato-sku2
+ - const: google,hayato
+ - const: mediatek,mt8192
+ - description: Google Spherion (Acer Chromebook 514)
+ items:
+ - const: google,spherion-rev3
+ - const: google,spherion-rev2
+ - const: google,spherion-rev1
+ - const: google,spherion-rev0
+ - const: google,spherion
+ - const: mediatek,mt8192
+ - description: Google Spherion rev4 (Acer Chromebook 514)
+ items:
+ - const: google,spherion-rev4
+ - const: google,spherion
+ - const: mediatek,mt8192
+ - items:
+ - enum:
+ - mediatek,mt8192-evb
+ - const: mediatek,mt8192
+ - description: Acer Tomato (Acer Chromebook Spin 513 CP513-2H)
+ items:
+ - enum:
+ - google,tomato-rev2
+ - google,tomato-rev1
+ - const: google,tomato
+ - const: mediatek,mt8195
+ - description: Acer Tomato rev3 - 4 (Acer Chromebook Spin 513 CP513-2H)
+ items:
+ - const: google,tomato-rev4
+ - const: google,tomato-rev3
+ - const: google,tomato
+ - const: mediatek,mt8195
+ - items:
+ - enum:
+ - mediatek,mt8195-demo
+ - mediatek,mt8195-evb
+ - const: mediatek,mt8195
- items:
- enum:
- mediatek,mt8365-evk
@@ -287,6 +354,7 @@ properties:
- items:
- enum:
- mediatek,mt8395-evk
+ - radxa,nio-12l
- const: mediatek,mt8395
- const: mediatek,mt8195
- items:
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt b/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt
deleted file mode 100644
index c0e3c3a42bea..000000000000
--- a/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-SPM AVS Wrapper 2 (SAW2)
-
-The SAW2 is a wrapper around the Subsystem Power Manager (SPM) and the
-Adaptive Voltage Scaling (AVS) hardware. The SPM is a programmable
-power-controller that transitions a piece of hardware (like a processor or
-subsystem) into and out of low power modes via a direct connection to
-the PMIC. It can also be wired up to interact with other processors in the
-system, notifying them when a low power state is entered or exited.
-
-Multiple revisions of the SAW hardware are supported using these Device Nodes.
-SAW2 revisions differ in the register offset and configuration data. Also, the
-same revision of the SAW in different SoCs may have different configuration
-data due the differences in hardware capabilities. Hence the SoC name, the
-version of the SAW hardware in that SoC and the distinction between cpu (big
-or Little) or cache, may be needed to uniquely identify the SAW register
-configuration and initialization data. The compatible string is used to
-indicate this parameter.
-
-PROPERTIES
-
-- compatible:
- Usage: required
- Value type: <string>
- Definition: Must have
- "qcom,saw2"
- A more specific value could be one of:
- "qcom,apq8064-saw2-v1.1-cpu"
- "qcom,msm8226-saw2-v2.1-cpu"
- "qcom,msm8974-saw2-v2.1-cpu"
- "qcom,apq8084-saw2-v2.1-cpu"
-
-- reg:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: the first element specifies the base address and size of
- the register region. An optional second element specifies
- the base address and size of the alias register region.
-
-- regulator:
- Usage: optional
- Value type: boolean
- Definition: Indicates that this SPM device acts as a regulator device
- device for the core (CPU or Cache) the SPM is attached
- to.
-
-Example 1:
-
- power-controller@2099000 {
- compatible = "qcom,saw2";
- reg = <0x02099000 0x1000>, <0x02009000 0x1000>;
- regulator;
- };
-
-Example 2:
- saw0: power-controller@f9089000 {
- compatible = "qcom,apq8084-saw2-v2.1-cpu", "qcom,saw2";
- reg = <0xf9089000 0x1000>, <0xf9009000 0x1000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml
index 1a5fb889a444..66beaac60e1d 100644
--- a/Documentation/devicetree/bindings/arm/qcom.yaml
+++ b/Documentation/devicetree/bindings/arm/qcom.yaml
@@ -10,17 +10,10 @@ maintainers:
- Bjorn Andersson <bjorn.andersson@linaro.org>
description: |
- Some qcom based bootloaders identify the dtb blob based on a set of
- device properties like SoC and platform and revisions of those components.
- To support this scheme, we encode this information into the board compatible
- string.
-
- Each board must specify a top-level board compatible string with the following
- format:
-
- compatible = "qcom,<SoC>[-<soc_version>][-<foundry_id>]-<board>[/<subtype>][-<board_version>]"
-
- The 'SoC' and 'board' elements are required. All other elements are optional.
+ For devices using the Qualcomm SoC the "compatible" properties consists of
+ one or several "manufacturer,model" strings, describing the device itself,
+ followed by one or several "qcom,<SoC>" strings, describing the SoC used in
+ the device.
The 'SoC' element must be one of the following strings:
@@ -90,43 +83,9 @@ description: |
sm8650
x1e80100
- The 'board' element must be one of the following strings:
-
- adp
- cdp
- dragonboard
- idp
- liquid
- mtp
- qcp
- qrd
- rb2
- ride
- sbc
- x100
-
- The 'soc_version' and 'board_version' elements take the form of v<Major>.<Minor>
- where the minor number may be omitted when it's zero, i.e. v1.0 is the same
- as v1. If all versions of the 'board_version' elements match, then a
- wildcard '*' should be used, e.g. 'v*'.
-
- The 'foundry_id' and 'subtype' elements are one or more digits from 0 to 9.
-
- Examples:
-
- "qcom,msm8916-v1-cdp-pm8916-v2.1"
-
- A CDP board with an msm8916 SoC, version 1 paired with a pm8916 PMIC of version
- 2.1.
-
- "qcom,apq8074-v2.0-2-dragonboard/1-v0.1"
-
- A dragonboard board v0.1 of subtype 1 with an apq8074 SoC version 2, made in
- foundry 2.
-
There are many devices in the list below that run the standard ChromeOS
bootloader setup and use the open source depthcharge bootloader to boot the
- OS. These devices do not use the scheme described above. For details, see:
+ OS. These devices use the bootflow explained at
https://docs.kernel.org/arch/arm/google/chromebook-boot-flow.html
properties:
@@ -187,6 +146,7 @@ properties:
- microsoft,superman-lte
- microsoft,tesla
- motorola,peregrine
+ - samsung,matisselte
- const: qcom,msm8926
- const: qcom,msm8226
@@ -244,11 +204,15 @@ properties:
- samsung,a5u-eur
- samsung,e5
- samsung,e7
+ - samsung,fortuna3g
+ - samsung,gprimeltecan
- samsung,grandmax
+ - samsung,grandprimelte
- samsung,gt510
- samsung,gt58
- samsung,j5
- samsung,j5x
+ - samsung,rossa
- samsung,serranove
- thwc,uf896
- thwc,ufi001c
@@ -988,6 +952,7 @@ properties:
- items:
- enum:
+ - xiaomi,curtana
- xiaomi,joyeuse
- const: qcom,sm7125
@@ -1035,6 +1000,7 @@ properties:
- items:
- enum:
+ - qcom,sm8550-hdk
- qcom,sm8550-mtp
- qcom,sm8550-qrd
- const: qcom,sm8550
diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml
index 5cf5cbef2cf5..fcf7316ecd74 100644
--- a/Documentation/devicetree/bindings/arm/rockchip.yaml
+++ b/Documentation/devicetree/bindings/arm/rockchip.yaml
@@ -37,29 +37,16 @@ properties:
- anbernic,rg351v
- const: rockchip,rk3326
- - description: Anbernic RG353P
+ - description: Anbernic RK3566 Handheld Gaming Console
items:
- - const: anbernic,rg353p
- - const: rockchip,rk3566
-
- - description: Anbernic RG353PS
- items:
- - const: anbernic,rg353ps
- - const: rockchip,rk3566
-
- - description: Anbernic RG353V
- items:
- - const: anbernic,rg353v
- - const: rockchip,rk3566
-
- - description: Anbernic RG353VS
- items:
- - const: anbernic,rg353vs
- - const: rockchip,rk3566
-
- - description: Anbernic RG503
- items:
- - const: anbernic,rg503
+ - enum:
+ - anbernic,rg353p
+ - anbernic,rg353ps
+ - anbernic,rg353v
+ - anbernic,rg353vs
+ - anbernic,rg503
+ - anbernic,rg-arc-d
+ - anbernic,rg-arc-s
- const: rockchip,rk3566
- description: Asus Tinker board
@@ -237,6 +224,13 @@ properties:
- friendlyarm,nanopi-r5s
- const: rockchip,rk3568
+ - description: FriendlyElec NanoPi R6 series boards
+ items:
+ - enum:
+ - friendlyarm,nanopi-r6c
+ - friendlyarm,nanopi-r6s
+ - const: rockchip,rk3588s
+
- description: FriendlyElec NanoPC T6
items:
- const: friendlyarm,nanopc-t6
@@ -626,9 +620,9 @@ properties:
- const: openailab,eaidk-610
- const: rockchip,rk3399
- - description: Orange Pi RK3399 board
+ - description: Xunlong Orange Pi RK3399 board
items:
- - const: rockchip,rk3399-orangepi
+ - const: xunlong,rk3399-orangepi
- const: rockchip,rk3399
- description: Phytec phyCORE-RK3288 Rapid Development Kit
@@ -655,6 +649,14 @@ properties:
- const: pine64,pinephone-pro
- const: rockchip,rk3399
+ - description: Pine64 PineTab2
+ items:
+ - enum:
+ - pine64,pinetab2-v0.1
+ - pine64,pinetab2-v2.0
+ - const: pine64,pinetab2
+ - const: rockchip,rk3566
+
- description: Pine64 Rock64
items:
- const: pine64,rock64
@@ -692,11 +694,17 @@ properties:
- description: Powkiddy RK3566 Handheld Gaming Console
items:
- enum:
+ - powkiddy,rgb10max3
- powkiddy,rgb30
- powkiddy,rk2023
- powkiddy,x55
- const: rockchip,rk3566
+ - description: QNAP TS-433-4G 4-Bay NAS
+ items:
+ - const: qnap,ts433
+ - const: rockchip,rk3568
+
- description: Radxa Compute Module 3(CM3)
items:
- enum:
@@ -878,6 +886,11 @@ properties:
- const: rockchip,rv1108-evb
- const: rockchip,rv1108
+ - description: Rockchip Toybrick TB-RK3588X board
+ items:
+ - const: rockchip,rk3588-toybrick-x0
+ - const: rockchip,rk3588
+
- description: Theobroma Systems PX30-uQ7 with Haikou baseboard
items:
- const: tsd,px30-ringneck-haikou
@@ -898,6 +911,12 @@ properties:
- const: tsd,rk3588-jaguar
- const: rockchip,rk3588
+ - description: Theobroma Systems RK3588-Q7 with Haikou baseboard
+ items:
+ - const: tsd,rk3588-tiger-haikou
+ - const: tsd,rk3588-tiger
+ - const: rockchip,rk3588
+
- description: Tronsmart Orion R68 Meta
items:
- const: tronsmart,orion-r68-meta
@@ -940,9 +959,9 @@ properties:
- const: rockchip,rk3568-evb1-v10
- const: rockchip,rk3568
- - description: Rockchip RK3568 Banana Pi R2 Pro
+ - description: Sinovoip RK3568 Banana Pi R2 Pro
items:
- - const: rockchip,rk3568-bpi-r2pro
+ - const: sinovoip,rk3568-bpi-r2pro
- const: rockchip,rk3568
- description: Sonoff iHost Smart Home Hub
diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml
index a9d8e85565b8..09d835db6db5 100644
--- a/Documentation/devicetree/bindings/arm/sunxi.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -815,6 +815,12 @@ properties:
- const: allwinner,r7-tv-dongle
- const: allwinner,sun5i-a10s
+ - description: Remix Mini PC
+ items:
+ - const: jide,remix-mini-pc
+ - const: allwinner,sun50i-h64
+ - const: allwinner,sun50i-a64
+
- description: RerVision H3-DVK
items:
- const: rervision,h3-dvk
@@ -835,6 +841,12 @@ properties:
- const: sinlinx,sina33
- const: allwinner,sun8i-a33
+ - description: Sipeed Longan Pi 3H board for the Sipeed Longan Module 3H
+ items:
+ - const: sipeed,longan-pi-3h
+ - const: sipeed,longan-module-3h
+ - const: allwinner,sun50i-h618
+
- description: SourceParts PopStick v1.1
items:
- const: sourceparts,popstick-v1.1
diff --git a/Documentation/devicetree/bindings/arm/tegra.yaml b/Documentation/devicetree/bindings/arm/tegra.yaml
index fcf956406168..8fb4923517d0 100644
--- a/Documentation/devicetree/bindings/arm/tegra.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra.yaml
@@ -64,6 +64,14 @@ properties:
- items:
- const: asus,tf700t
- const: nvidia,tegra30
+ - description: LG Optimus 4X P880
+ items:
+ - const: lg,p880
+ - const: nvidia,tegra30
+ - description: LG Optimus Vu P895
+ items:
+ - const: lg,p895
+ - const: nvidia,tegra30
- items:
- const: toradex,apalis_t30-eval
- const: toradex,apalis_t30
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.yaml
index 0faa403f68c8..ea4fbf655220 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.yaml
@@ -27,7 +27,7 @@ properties:
- const: pmc
- const: wake
- const: aotag
- - const: scratch
+ - enum: [ scratch, misc ]
- const: misc
interrupt-controller: true
@@ -41,25 +41,43 @@ properties:
description: If present, inverts the PMU interrupt signal.
$ref: /schemas/types.yaml#/definitions/flag
-if:
- properties:
- compatible:
- contains:
- const: nvidia,tegra186-pmc
-then:
- properties:
- reg:
- maxItems: 4
-
- reg-names:
- maxItems: 4
-else:
- properties:
- reg:
- minItems: 5
-
- reg-names:
- minItems: 5
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: nvidia,tegra186-pmc
+ then:
+ properties:
+ reg:
+ maxItems: 4
+ reg-names:
+ maxItems: 4
+ contains:
+ const: scratch
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: nvidia,tegra194-pmc
+ then:
+ properties:
+ reg:
+ minItems: 5
+ reg-names:
+ minItems: 5
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: nvidia,tegra234-pmc
+ then:
+ properties:
+ reg-names:
+ contains:
+ const: misc
patternProperties:
"^[a-z0-9]+-[a-z0-9]+$":
diff --git a/Documentation/devicetree/bindings/arm/ti/k3.yaml b/Documentation/devicetree/bindings/arm/ti/k3.yaml
index c6506bccfe88..52b51fd7044e 100644
--- a/Documentation/devicetree/bindings/arm/ti/k3.yaml
+++ b/Documentation/devicetree/bindings/arm/ti/k3.yaml
@@ -87,12 +87,20 @@ properties:
- const: tq,am642-tqma6442l
- const: ti,am642
+ - description: K3 AM642 SoC SolidRun SoM based boards
+ items:
+ - enum:
+ - solidrun,am642-hummingboard-t
+ - const: solidrun,am642-sr-som
+ - const: ti,am642
+
- description: K3 AM654 SoC
items:
- enum:
- siemens,iot2050-advanced
- siemens,iot2050-advanced-m2
- siemens,iot2050-advanced-pg2
+ - siemens,iot2050-advanced-sm
- siemens,iot2050-basic
- siemens,iot2050-basic-pg2
- ti,am654-evm
@@ -123,6 +131,12 @@ properties:
- ti,j721s2-evm
- const: ti,j721s2
+ - description: K3 J722S SoC and Boards
+ items:
+ - enum:
+ - ti,j722s-evm
+ - const: ti,j722s
+
- description: K3 J784s4 SoC
items:
- enum:
diff --git a/Documentation/devicetree/bindings/ata/ahci-mtk.txt b/Documentation/devicetree/bindings/ata/ahci-mtk.txt
deleted file mode 100644
index d2aa696b161b..000000000000
--- a/Documentation/devicetree/bindings/ata/ahci-mtk.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-MediaTek Serial ATA controller
-
-Required properties:
- - compatible : Must be "mediatek,<chip>-ahci", "mediatek,mtk-ahci".
- When using "mediatek,mtk-ahci" compatible strings, you
- need SoC specific ones in addition, one of:
- - "mediatek,mt7622-ahci"
- - reg : Physical base addresses and length of register sets.
- - interrupts : Interrupt associated with the SATA device.
- - interrupt-names : Associated name must be: "hostc".
- - clocks : A list of phandle and clock specifier pairs, one for each
- entry in clock-names.
- - clock-names : Associated names must be: "ahb", "axi", "asic", "rbc", "pm".
- - phys : A phandle and PHY specifier pair for the PHY port.
- - phy-names : Associated name must be: "sata-phy".
- - ports-implemented : See ./ahci-platform.txt for details.
-
-Optional properties:
- - power-domains : A phandle and power domain specifier pair to the power
- domain which is responsible for collapsing and restoring
- power to the peripheral.
- - resets : Must contain an entry for each entry in reset-names.
- See ../reset/reset.txt for details.
- - reset-names : Associated names must be: "axi", "sw", "reg".
- - mediatek,phy-mode : A phandle to the system controller, used to enable
- SATA function.
-
-Example:
-
- sata: sata@1a200000 {
- compatible = "mediatek,mt7622-ahci",
- "mediatek,mtk-ahci";
- reg = <0 0x1a200000 0 0x1100>;
- interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hostc";
- clocks = <&pciesys CLK_SATA_AHB_EN>,
- <&pciesys CLK_SATA_AXI_EN>,
- <&pciesys CLK_SATA_ASIC_EN>,
- <&pciesys CLK_SATA_RBC_EN>,
- <&pciesys CLK_SATA_PM_EN>;
- clock-names = "ahb", "axi", "asic", "rbc", "pm";
- phys = <&u3port1 PHY_TYPE_SATA>;
- phy-names = "sata-phy";
- ports-implemented = <0x1>;
- power-domains = <&scpsys MT7622_POWER_DOMAIN_HIF0>;
- resets = <&pciesys MT7622_SATA_AXI_BUS_RST>,
- <&pciesys MT7622_SATA_PHY_SW_RST>,
- <&pciesys MT7622_SATA_PHY_REG_RST>;
- reset-names = "axi", "sw", "reg";
- mediatek,phy-mode = <&pciesys>;
- };
diff --git a/Documentation/devicetree/bindings/ata/atmel-at91_cf.txt b/Documentation/devicetree/bindings/ata/atmel-at91_cf.txt
deleted file mode 100644
index c1d22b3ae134..000000000000
--- a/Documentation/devicetree/bindings/ata/atmel-at91_cf.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Atmel AT91RM9200 CompactFlash
-
-Required properties:
-- compatible : "atmel,at91rm9200-cf".
-- reg : should specify localbus address and size used.
-- gpios : specifies the gpio pins to control the CF device. Detect
- and reset gpio's are mandatory while irq and vcc gpio's are
- optional and may be set to 0 if not present.
-
-Example:
-compact-flash@50000000 {
- compatible = "atmel,at91rm9200-cf";
- reg = <0x50000000 0x30000000>;
- gpios = <&pioC 13 0 /* irq */
- &pioC 15 0 /* detect */
- 0 /* vcc */
- &pioC 5 0 /* reset */
- >;
-};
diff --git a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
index b29ce598f9aa..9952e0ef7767 100644
--- a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
+++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Ceva AHCI SATA Controller
maintainers:
- - Piyush Mehta <piyush.mehta@amd.com>
+ - Mubin Sayyed <mubin.sayyed@amd.com>
+ - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
description: |
The Ceva SATA controller mostly conforms to the AHCI interface with some
diff --git a/Documentation/devicetree/bindings/ata/mediatek,mtk-ahci.yaml b/Documentation/devicetree/bindings/ata/mediatek,mtk-ahci.yaml
new file mode 100644
index 000000000000..a34bd2e9c352
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/mediatek,mtk-ahci.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/mediatek,mtk-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Serial ATA controller
+
+maintainers:
+ - Ryder Lee <ryder.lee@mediatek.com>
+
+allOf:
+ - $ref: ahci-common.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt7622-ahci
+ - const: mediatek,mtk-ahci
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-names:
+ const: hostc
+
+ clocks:
+ maxItems: 5
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: axi
+ - const: asic
+ - const: rbc
+ - const: pm
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 3
+
+ reset-names:
+ items:
+ - const: axi
+ - const: sw
+ - const: reg
+
+ mediatek,phy-mode:
+ description: System controller phandle, used to enable SATA function
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+required:
+ - reg
+ - interrupts
+ - interrupt-names
+ - clocks
+ - clock-names
+ - phys
+ - phy-names
+ - ports-implemented
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt7622-clk.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/phy/phy.h>
+ #include <dt-bindings/power/mt7622-power.h>
+ #include <dt-bindings/reset/mt7622-reset.h>
+
+ sata@1a200000 {
+ compatible = "mediatek,mt7622-ahci", "mediatek,mtk-ahci";
+ reg = <0x1a200000 0x1100>;
+ interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "hostc";
+ clocks = <&pciesys CLK_SATA_AHB_EN>,
+ <&pciesys CLK_SATA_AXI_EN>,
+ <&pciesys CLK_SATA_ASIC_EN>,
+ <&pciesys CLK_SATA_RBC_EN>,
+ <&pciesys CLK_SATA_PM_EN>;
+ clock-names = "ahb", "axi", "asic", "rbc", "pm";
+ phys = <&u3port1 PHY_TYPE_SATA>;
+ phy-names = "sata-phy";
+ ports-implemented = <0x1>;
+ power-domains = <&scpsys MT7622_POWER_DOMAIN_HIF0>;
+ resets = <&pciesys MT7622_SATA_AXI_BUS_RST>,
+ <&pciesys MT7622_SATA_PHY_SW_RST>,
+ <&pciesys MT7622_SATA_PHY_REG_RST>;
+ reset-names = "axi", "sw", "reg";
+ mediatek,phy-mode = <&pciesys>;
+ };
diff --git a/Documentation/devicetree/bindings/bus/imx-weim.txt b/Documentation/devicetree/bindings/bus/imx-weim.txt
deleted file mode 100644
index e7f502070d77..000000000000
--- a/Documentation/devicetree/bindings/bus/imx-weim.txt
+++ /dev/null
@@ -1,117 +0,0 @@
-Device tree bindings for i.MX Wireless External Interface Module (WEIM)
-
-The term "wireless" does not imply that the WEIM is literally an interface
-without wires. It simply means that this module was originally designed for
-wireless and mobile applications that use low-power technology.
-
-The actual devices are instantiated from the child nodes of a WEIM node.
-
-Required properties:
-
- - compatible: Should contain one of the following:
- "fsl,imx1-weim"
- "fsl,imx27-weim"
- "fsl,imx51-weim"
- "fsl,imx50-weim"
- "fsl,imx6q-weim"
- - reg: A resource specifier for the register space
- (see the example below)
- - clocks: the clock, see the example below.
- - #address-cells: Must be set to 2 to allow memory address translation
- - #size-cells: Must be set to 1 to allow CS address passing
- - ranges: Must be set up to reflect the memory layout with four
- integer values for each chip-select line in use:
-
- <cs-number> 0 <physical address of mapping> <size>
-
-Optional properties:
-
- - fsl,weim-cs-gpr: For "fsl,imx50-weim" and "fsl,imx6q-weim" type of
- devices, it should be the phandle to the system General
- Purpose Register controller that contains WEIM CS GPR
- register, e.g. IOMUXC_GPR1 on i.MX6Q. IOMUXC_GPR1[11:0]
- should be set up as one of the following 4 possible
- values depending on the CS space configuration.
-
- IOMUXC_GPR1[11:0] CS0 CS1 CS2 CS3
- ---------------------------------------------
- 05 128M 0M 0M 0M
- 033 64M 64M 0M 0M
- 0113 64M 32M 32M 0M
- 01111 32M 32M 32M 32M
-
- In case that the property is absent, the reset value or
- what bootloader sets up in IOMUXC_GPR1[11:0] will be
- used.
-
- - fsl,burst-clk-enable For "fsl,imx50-weim" and "fsl,imx6q-weim" type of
- devices, the presence of this property indicates that
- the weim bus should operate in Burst Clock Mode.
-
- - fsl,continuous-burst-clk Make Burst Clock to output continuous clock.
- Without this option Burst Clock will output clock
- only when necessary. This takes effect only if
- "fsl,burst-clk-enable" is set.
-
-Timing property for child nodes. It is mandatory, not optional.
-
- - fsl,weim-cs-timing: The timing array, contains timing values for the
- child node. We get the CS indexes from the address
- ranges in the child node's "reg" property.
- The number of registers depends on the selected chip:
- For i.MX1, i.MX21 ("fsl,imx1-weim") there are two
- registers: CSxU, CSxL.
- For i.MX25, i.MX27, i.MX31 and i.MX35 ("fsl,imx27-weim")
- there are three registers: CSCRxU, CSCRxL, CSCRxA.
- For i.MX50, i.MX53 ("fsl,imx50-weim"),
- i.MX51 ("fsl,imx51-weim") and i.MX6Q ("fsl,imx6q-weim")
- there are six registers: CSxGCR1, CSxGCR2, CSxRCR1,
- CSxRCR2, CSxWCR1, CSxWCR2.
-
-Example for an imx6q-sabreauto board, the NOR flash connected to the WEIM:
-
- weim: weim@21b8000 {
- compatible = "fsl,imx6q-weim";
- reg = <0x021b8000 0x4000>;
- clocks = <&clks 196>;
- #address-cells = <2>;
- #size-cells = <1>;
- ranges = <0 0 0x08000000 0x08000000>;
- fsl,weim-cs-gpr = <&gpr>;
-
- nor@0,0 {
- compatible = "cfi-flash";
- reg = <0 0 0x02000000>;
- #address-cells = <1>;
- #size-cells = <1>;
- bank-width = <2>;
- fsl,weim-cs-timing = <0x00620081 0x00000001 0x1c022000
- 0x0000c000 0x1404a38e 0x00000000>;
- };
- };
-
-Example for an imx6q-based board, a multi-chipselect device connected to WEIM:
-
-In this case, both chip select 0 and 1 will be configured with the same timing
-array values.
-
- weim: weim@21b8000 {
- compatible = "fsl,imx6q-weim";
- reg = <0x021b8000 0x4000>;
- clocks = <&clks 196>;
- #address-cells = <2>;
- #size-cells = <1>;
- ranges = <0 0 0x08000000 0x02000000
- 1 0 0x0a000000 0x02000000
- 2 0 0x0c000000 0x02000000
- 3 0 0x0e000000 0x02000000>;
- fsl,weim-cs-gpr = <&gpr>;
-
- acme@0 {
- compatible = "acme,whatever";
- reg = <0 0 0x100>, <0 0x400000 0x800>,
- <1 0x400000 0x800>;
- fsl,weim-cs-timing = <0x024400b1 0x00001010 0x20081100
- 0x00000000 0xa0000240 0x00000000>;
- };
- };
diff --git a/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml b/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml
index 3eebc03a309b..1d2bcea41c85 100644
--- a/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml
@@ -30,14 +30,16 @@ properties:
- google,gs101-cmu-top
- google,gs101-cmu-apm
- google,gs101-cmu-misc
+ - google,gs101-cmu-peric0
+ - google,gs101-cmu-peric1
clocks:
minItems: 1
- maxItems: 2
+ maxItems: 3
clock-names:
minItems: 1
- maxItems: 2
+ maxItems: 3
"#clock-cells":
const: 1
@@ -85,8 +87,30 @@ allOf:
clock-names:
items:
- - const: dout_cmu_misc_bus
- - const: dout_cmu_misc_sss
+ - const: bus
+ - const: sss
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - google,gs101-cmu-peric0
+ - google,gs101-cmu-peric1
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24.576 MHz)
+ - description: Connectivity Peripheral 0/1 bus clock (from CMU_TOP)
+ - description: Connectivity Peripheral 0/1 IP clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: ip
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sc8180x.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sc8180x.yaml
index 6c4846b34e4b..a1085ef4fd05 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc-sc8180x.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sc8180x.yaml
@@ -31,10 +31,15 @@ properties:
- const: bi_tcxo_ao
- const: sleep_clk
+ power-domains:
+ items:
+ - description: CX domain
+
required:
- compatible
- clocks
- clock-names
+ - power-domains
allOf:
- $ref: qcom,gcc.yaml#
@@ -44,6 +49,7 @@ unevaluatedProperties: false
examples:
- |
#include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
clock-controller@100000 {
compatible = "qcom,gcc-sc8180x";
reg = <0x00100000 0x1f0000>;
@@ -51,6 +57,7 @@ examples:
<&rpmhcc RPMH_CXO_CLK_A>,
<&sleep_clk>;
clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk";
+ power-domains = <&rpmhpd SC8180X_CX>;
#clock-cells = <1>;
#reset-cells = <1>;
#power-domain-cells = <1>;
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml
index 48986460f994..fa0e5b6b02b8 100644
--- a/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml
@@ -17,6 +17,7 @@ description: |
include/dt-bindings/clock/qcom,sm8450-camcc.h
include/dt-bindings/clock/qcom,sm8550-camcc.h
include/dt-bindings/clock/qcom,sc8280xp-camcc.h
+ include/dt-bindings/clock/qcom,x1e80100-camcc.h
allOf:
- $ref: qcom,gcc.yaml#
@@ -27,6 +28,7 @@ properties:
- qcom,sc8280xp-camcc
- qcom,sm8450-camcc
- qcom,sm8550-camcc
+ - qcom,x1e80100-camcc
clocks:
items:
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml
index 1a384e8532a5..36974309cf69 100644
--- a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml
@@ -18,6 +18,7 @@ description: |
include/dt-bindings/clock/qcom,sm8550-gpucc.h
include/dt-bindings/reset/qcom,sm8450-gpucc.h
include/dt-bindings/reset/qcom,sm8650-gpucc.h
+ include/dt-bindings/reset/qcom,x1e80100-gpucc.h
properties:
compatible:
@@ -25,6 +26,7 @@ properties:
- qcom,sm8450-gpucc
- qcom,sm8550-gpucc
- qcom,sm8650-gpucc
+ - qcom,x1e80100-gpucc
clocks:
items:
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml
index c129f8c16b50..bad0260764d4 100644
--- a/Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml
@@ -14,12 +14,17 @@ description: |
Qualcomm display clock control module provides the clocks, resets and power
domains on SM8550.
- See also:: include/dt-bindings/clock/qcom,sm8550-dispcc.h
+ See also:
+ - include/dt-bindings/clock/qcom,sm8550-dispcc.h
+ - include/dt-bindings/clock/qcom,sm8650-dispcc.h
+ - include/dt-bindings/clock/qcom,x1e80100-dispcc.h
properties:
compatible:
enum:
- qcom,sm8550-dispcc
+ - qcom,sm8650-dispcc
+ - qcom,x1e80100-dispcc
clocks:
items:
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8550-tcsr.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8550-tcsr.yaml
index af16b05eac96..48fdd562d743 100644
--- a/Documentation/devicetree/bindings/clock/qcom,sm8550-tcsr.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8550-tcsr.yaml
@@ -23,6 +23,7 @@ properties:
- enum:
- qcom,sm8550-tcsr
- qcom,sm8650-tcsr
+ - qcom,x1e80100-tcsr
- const: syscon
clocks:
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8650-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8650-dispcc.yaml
deleted file mode 100644
index 5e0c45c380f5..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,sm8650-dispcc.yaml
+++ /dev/null
@@ -1,106 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/clock/qcom,sm8650-dispcc.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: Qualcomm Display Clock & Reset Controller for SM8650
-
-maintainers:
- - Bjorn Andersson <andersson@kernel.org>
- - Neil Armstrong <neil.armstrong@linaro.org>
-
-description: |
- Qualcomm display clock control module provides the clocks, resets and power
- domains on SM8650.
-
- See also:: include/dt-bindings/clock/qcom,sm8650-dispcc.h
-
-properties:
- compatible:
- enum:
- - qcom,sm8650-dispcc
-
- clocks:
- items:
- - description: Board XO source
- - description: Board Always On XO source
- - description: Display's AHB clock
- - description: sleep clock
- - description: Byte clock from DSI PHY0
- - description: Pixel clock from DSI PHY0
- - description: Byte clock from DSI PHY1
- - description: Pixel clock from DSI PHY1
- - description: Link clock from DP PHY0
- - description: VCO DIV clock from DP PHY0
- - description: Link clock from DP PHY1
- - description: VCO DIV clock from DP PHY1
- - description: Link clock from DP PHY2
- - description: VCO DIV clock from DP PHY2
- - description: Link clock from DP PHY3
- - description: VCO DIV clock from DP PHY3
-
- '#clock-cells':
- const: 1
-
- '#reset-cells':
- const: 1
-
- '#power-domain-cells':
- const: 1
-
- reg:
- maxItems: 1
-
- power-domains:
- description:
- A phandle and PM domain specifier for the MMCX power domain.
- maxItems: 1
-
- required-opps:
- description:
- A phandle to an OPP node describing required MMCX performance point.
- maxItems: 1
-
-required:
- - compatible
- - reg
- - clocks
- - '#clock-cells'
- - '#reset-cells'
- - '#power-domain-cells'
-
-additionalProperties: false
-
-examples:
- - |
- #include <dt-bindings/clock/qcom,sm8650-gcc.h>
- #include <dt-bindings/clock/qcom,rpmh.h>
- #include <dt-bindings/power/qcom-rpmpd.h>
- #include <dt-bindings/power/qcom,rpmhpd.h>
- clock-controller@af00000 {
- compatible = "qcom,sm8650-dispcc";
- reg = <0x0af00000 0x10000>;
- clocks = <&rpmhcc RPMH_CXO_CLK>,
- <&rpmhcc RPMH_CXO_CLK_A>,
- <&gcc GCC_DISP_AHB_CLK>,
- <&sleep_clk>,
- <&dsi0_phy 0>,
- <&dsi0_phy 1>,
- <&dsi1_phy 0>,
- <&dsi1_phy 1>,
- <&dp0_phy 0>,
- <&dp0_phy 1>,
- <&dp1_phy 0>,
- <&dp1_phy 1>,
- <&dp2_phy 0>,
- <&dp2_phy 1>,
- <&dp3_phy 0>,
- <&dp3_phy 1>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- #power-domain-cells = <1>;
- power-domains = <&rpmhpd RPMHPD_MMCX>;
- required-opps = <&rpmhpd_opp_low_svs>;
- };
-...
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.yaml b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.yaml
index 9c3dc6c4fa94..084259d30232 100644
--- a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.yaml
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.yaml
@@ -50,6 +50,7 @@ properties:
- renesas,r8a779a0-cpg-mssr # R-Car V3U
- renesas,r8a779f0-cpg-mssr # R-Car S4-8
- renesas,r8a779g0-cpg-mssr # R-Car V4H
+ - renesas,r8a779h0-cpg-mssr # R-Car V4M
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml b/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml
index 21d995f29a1e..b8e9cf6ce4e6 100644
--- a/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml
@@ -29,19 +29,22 @@ properties:
audio-ports:
description:
- Array of 8-bit values, 2 values per DAI (Documentation/sound/soc/dai.rst).
+ Array of 2 values per DAI (Documentation/sound/soc/dai.rst).
The implementation allows one or two DAIs.
If two DAIs are defined, they must be of different type.
$ref: /schemas/types.yaml#/definitions/uint32-matrix
+ minItems: 1
+ maxItems: 2
items:
- minItems: 1
items:
- description: |
The first value defines the DAI type: TDA998x_SPDIF or TDA998x_I2S
(see include/dt-bindings/display/tda998x.h).
+ enum: [ 1, 2 ]
- description:
The second value defines the tda998x AP_ENA reg content when the
DAI in question is used.
+ maximum: 0xff
'#sound-dai-cells':
enum: [ 0, 1 ]
diff --git a/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml b/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml
index bc92928c805b..619edcf6c791 100644
--- a/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml
+++ b/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml
@@ -29,6 +29,7 @@ properties:
vddi-supply:
description: regulator that supplies the vddi voltage
backlight: true
+ port: true
required:
- compatible
diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml
index 25d53fde92e1..597c9cc6a312 100644
--- a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml
+++ b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml
@@ -85,7 +85,7 @@ allOf:
clocks:
minItems: 6
maxItems: 6
- regs:
+ reg:
minItems: 2
maxItems: 2
@@ -99,7 +99,7 @@ allOf:
clocks:
minItems: 4
maxItems: 4
- regs:
+ reg:
minItems: 2
maxItems: 2
@@ -116,7 +116,7 @@ allOf:
clocks:
minItems: 3
maxItems: 3
- regs:
+ reg:
minItems: 1
maxItems: 1
diff --git a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml
index 8e584857ddd4..ab8f32c440df 100644
--- a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml
+++ b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml
@@ -26,6 +26,12 @@ properties:
- description: For implementations complying for Versal.
const: xlnx,versal-firmware
+ - description: For implementations complying for Versal NET.
+ items:
+ - enum:
+ - xlnx,versal-net-firmware
+ - const: xlnx,versal-firmware
+
method:
description: |
The method of calling the PM-API firmware layer.
@@ -41,7 +47,53 @@ properties:
"#power-domain-cells":
const: 1
- versal_fpga:
+ clock-controller:
+ $ref: /schemas/clock/xlnx,versal-clk.yaml#
+ description: The clock controller is a hardware block of Xilinx versal
+ clock tree. It reads required input clock frequencies from the devicetree
+ and acts as clock provider for all clock consumers of PS clocks.list of
+ clock specifiers which are external input clocks to the given clock
+ controller.
+ type: object
+
+ gpio:
+ $ref: /schemas/gpio/xlnx,zynqmp-gpio-modepin.yaml#
+ description: The gpio node describes connect to PS_MODE pins via firmware
+ interface.
+ type: object
+
+ soc-nvmem:
+ $ref: /schemas/nvmem/xlnx,zynqmp-nvmem.yaml#
+ description: The ZynqMP MPSoC provides access to the hardware related data
+ like SOC revision, IDCODE and specific purpose efuses.
+ type: object
+
+ pcap:
+ $ref: /schemas/fpga/xlnx,zynqmp-pcap-fpga.yaml
+ description: The ZynqMP SoC uses the PCAP (Processor Configuration Port) to
+ configure the Programmable Logic (PL). The configuration uses the
+ firmware interface.
+ type: object
+
+ pinctrl:
+ $ref: /schemas/pinctrl/xlnx,zynqmp-pinctrl.yaml#
+ description: The pinctrl node provides access to pinconfig and pincontrol
+ functionality available in firmware.
+ type: object
+
+ power-management:
+ $ref: /schemas/power/reset/xlnx,zynqmp-power.yaml#
+ description: The zynqmp-power node describes the power management
+ configurations. It will control remote suspend/shutdown interfaces.
+ type: object
+
+ reset-controller:
+ $ref: /schemas/reset/xlnx,zynqmp-reset.yaml#
+ description: The reset-controller node describes connection to the reset
+ functionality via firmware interface.
+ type: object
+
+ versal-fpga:
$ref: /schemas/fpga/xlnx,versal-fpga.yaml#
description: Compatible of the FPGA device.
type: object
@@ -53,15 +105,6 @@ properties:
vector.
type: object
- clock-controller:
- $ref: /schemas/clock/xlnx,versal-clk.yaml#
- description: The clock controller is a hardware block of Xilinx versal
- clock tree. It reads required input clock frequencies from the devicetree
- and acts as clock provider for all clock consumers of PS clocks.list of
- clock specifiers which are external input clocks to the given clock
- controller.
- type: object
-
required:
- compatible
@@ -73,7 +116,38 @@ examples:
firmware {
zynqmp_firmware: zynqmp-firmware {
#power-domain-cells = <1>;
+ soc-nvmem {
+ compatible = "xlnx,zynqmp-nvmem-fw";
+ nvmem-layout {
+ compatible = "fixed-layout";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ soc_revision: soc-revision@0 {
+ reg = <0x0 0x4>;
+ };
+ };
+ };
+ gpio {
+ compatible = "xlnx,zynqmp-gpio-modepin";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+ pcap {
+ compatible = "xlnx,zynqmp-pcap-fpga";
};
+ pinctrl {
+ compatible = "xlnx,zynqmp-pinctrl";
+ };
+ power-management {
+ compatible = "xlnx,zynqmp-power";
+ interrupts = <0 35 4>;
+ };
+ reset-controller {
+ compatible = "xlnx,zynqmp-reset";
+ #reset-cells = <1>;
+ };
+ };
};
sata {
@@ -84,7 +158,7 @@ examples:
compatible = "xlnx,versal-firmware";
method = "smc";
- versal_fpga: versal_fpga {
+ versal_fpga: versal-fpga {
compatible = "xlnx,versal-fpga";
};
diff --git a/Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml b/Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml
index 26f18834caa3..80833462f620 100644
--- a/Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml
+++ b/Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml
@@ -26,7 +26,7 @@ additionalProperties: false
examples:
- |
- versal_fpga: versal_fpga {
+ versal_fpga: versal-fpga {
compatible = "xlnx,versal-fpga";
};
diff --git a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml
index b1fd632718d4..bb93baa88879 100644
--- a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml
+++ b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml
@@ -12,7 +12,8 @@ description:
PS_MODE). Every pin can be configured as input/output.
maintainers:
- - Piyush Mehta <piyush.mehta@amd.com>
+ - Mubin Sayyed <mubin.sayyed@amd.com>
+ - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/gpu/img,powervr.yaml b/Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml
index a13298f1a182..256e252f8087 100644
--- a/Documentation/devicetree/bindings/gpu/img,powervr.yaml
+++ b/Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml
@@ -2,10 +2,10 @@
# Copyright (c) 2023 Imagination Technologies Ltd.
%YAML 1.2
---
-$id: http://devicetree.org/schemas/gpu/img,powervr.yaml#
+$id: http://devicetree.org/schemas/gpu/img,powervr-rogue.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
-title: Imagination Technologies PowerVR and IMG GPU
+title: Imagination Technologies PowerVR and IMG Rogue GPUs
maintainers:
- Frank Binns <frank.binns@imgtec.com>
diff --git a/Documentation/devicetree/bindings/gpu/img,powervr-sgx.yaml b/Documentation/devicetree/bindings/gpu/img,powervr-sgx.yaml
new file mode 100644
index 000000000000..f5898b04381c
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/img,powervr-sgx.yaml
@@ -0,0 +1,138 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2023 Imagination Technologies Ltd.
+# Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpu/img,powervr-sgx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Imagination Technologies PowerVR SGX GPUs
+
+maintainers:
+ - Frank Binns <frank.binns@imgtec.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - ti,omap3430-gpu # Rev 121
+ - ti,omap3630-gpu # Rev 125
+ - const: img,powervr-sgx530
+ - items:
+ - enum:
+ - ingenic,jz4780-gpu # Rev 130
+ - ti,omap4430-gpu # Rev 120
+ - const: img,powervr-sgx540
+ - items:
+ - enum:
+ - allwinner,sun6i-a31-gpu # MP2 Rev 115
+ - ti,omap4470-gpu # MP1 Rev 112
+ - ti,omap5432-gpu # MP2 Rev 105
+ - ti,am5728-gpu # MP2 Rev 116
+ - ti,am6548-gpu # MP1 Rev 117
+ - const: img,powervr-sgx544
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: core
+ - const: mem
+ - const: sys
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: ti,am6548-gpu
+ then:
+ required:
+ - power-domains
+ else:
+ properties:
+ power-domains: false
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun6i-a31-gpu
+ - ingenic,jz4780-gpu
+ then:
+ required:
+ - clocks
+ - clock-names
+ else:
+ properties:
+ clocks: false
+ clock-names: false
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun6i-a31-gpu
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ minItems: 2
+ maxItems: 2
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: ingenic,jz4780-gpu
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+ clock-names:
+ maxItems: 1
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/soc/ti,sci_pm_domain.h>
+
+ gpu@7000000 {
+ compatible = "ti,am6548-gpu", "img,powervr-sgx544";
+ reg = <0x7000000 0x10000>;
+ interrupts = <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>;
+ power-domains = <&k3_pds 65 TI_SCI_PD_EXCLUSIVE>;
+ };
+
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ gpu: gpu@1c40000 {
+ compatible = "allwinner,sun6i-a31-gpu", "img,powervr-sgx544";
+ reg = <0x01c40000 0x10000>;
+ interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu 1>, <&ccu 2>;
+ clock-names = "core", "mem";
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml b/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml
index df9c57bca2a8..cc8bba5537b9 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml
+++ b/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml
@@ -33,6 +33,7 @@ properties:
- const: samsung,exynos7-hsi2c
- items:
- enum:
+ - google,gs101-hsi2c
- samsung,exynos850-hsi2c
- const: samsung,exynosautov9-hsi2c
- const: samsung,exynos5-hsi2c # Exynos5250 and Exynos5420
diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml
index 3d06db98e978..a93744763787 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml
@@ -36,6 +36,7 @@ properties:
- amlogic,meson-a1-gpio-intc
- amlogic,meson-s4-gpio-intc
- amlogic,c3-gpio-intc
+ - amlogic,t7-gpio-intc
- const: amlogic,meson-gpio-intc
reg:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/starfive,jh8100-intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/starfive,jh8100-intc.yaml
new file mode 100644
index 000000000000..ada5788602d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/starfive,jh8100-intc.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/starfive,jh8100-intc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive External Interrupt Controller
+
+description:
+ StarFive SoC JH8100 contain a external interrupt controller. It can be used
+ to handle high-level input interrupt signals. It also send the output
+ interrupt signal to RISC-V PLIC.
+
+maintainers:
+ - Changhuang Liang <changhuang.liang@starfivetech.com>
+
+properties:
+ compatible:
+ const: starfive,jh8100-intc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ description: APB clock for the interrupt controller
+ maxItems: 1
+
+ resets:
+ description: APB reset for the interrupt controller
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-controller: true
+
+ "#interrupt-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - resets
+ - interrupts
+ - interrupt-controller
+ - "#interrupt-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ interrupt-controller@12260000 {
+ compatible = "starfive,jh8100-intc";
+ reg = <0x12260000 0x10000>;
+ clocks = <&syscrg_ne 76>;
+ resets = <&syscrg_ne 13>;
+ interrupts = <45>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index a4042ae24770..5c130cf06a21 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -83,6 +83,7 @@ properties:
- description: Qcom Adreno GPUs implementing "qcom,smmu-500" and "arm,mmu-500"
items:
- enum:
+ - qcom,qcm2290-smmu-500
- qcom,sa8775p-smmu-500
- qcom,sc7280-smmu-500
- qcom,sc8280xp-smmu-500
@@ -93,6 +94,7 @@ properties:
- qcom,sm8350-smmu-500
- qcom,sm8450-smmu-500
- qcom,sm8550-smmu-500
+ - qcom,sm8650-smmu-500
- const: qcom,adreno-smmu
- const: qcom,smmu-500
- const: arm,mmu-500
@@ -462,6 +464,7 @@ allOf:
compatible:
items:
- enum:
+ - qcom,qcm2290-smmu-500
- qcom,sm6115-smmu-500
- qcom,sm6125-smmu-500
- const: qcom,adreno-smmu
@@ -484,7 +487,12 @@ allOf:
- if:
properties:
compatible:
- const: qcom,sm8450-smmu-500
+ items:
+ - const: qcom,sm8450-smmu-500
+ - const: qcom,adreno-smmu
+ - const: qcom,smmu-500
+ - const: arm,mmu-500
+
then:
properties:
clock-names:
@@ -508,7 +516,13 @@ allOf:
- if:
properties:
compatible:
- const: qcom,sm8550-smmu-500
+ items:
+ - enum:
+ - qcom,sm8550-smmu-500
+ - qcom,sm8650-smmu-500
+ - const: qcom,adreno-smmu
+ - const: qcom,smmu-500
+ - const: arm,mmu-500
then:
properties:
clock-names:
@@ -534,7 +548,6 @@ allOf:
- cavium,smmu-v2
- marvell,ap806-smmu-500
- nvidia,smmu-500
- - qcom,qcm2290-smmu-500
- qcom,qdu1000-smmu-500
- qcom,sc7180-smmu-500
- qcom,sc8180x-smmu-500
@@ -544,7 +557,6 @@ allOf:
- qcom,sdx65-smmu-500
- qcom,sm6350-smmu-500
- qcom,sm6375-smmu-500
- - qcom,sm8650-smmu-500
- qcom,x1e80100-smmu-500
then:
properties:
diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml
index 55a8d1385e21..8a3c2398b10c 100644
--- a/Documentation/devicetree/bindings/leds/common.yaml
+++ b/Documentation/devicetree/bindings/leds/common.yaml
@@ -200,6 +200,18 @@ properties:
#trigger-source-cells property in the source node.
$ref: /schemas/types.yaml#/definitions/phandle-array
+ active-low:
+ type: boolean
+ description:
+ Makes LED active low. To turn the LED ON, line needs to be
+ set to low voltage instead of high.
+
+ inactive-high-impedance:
+ type: boolean
+ description:
+ Set LED to high-impedance mode to turn the LED OFF. LED might also
+ describe this mode as tristate.
+
# Required properties for flash LED child nodes:
flash-max-microamp:
description:
diff --git a/Documentation/devicetree/bindings/leds/leds-bcm63138.yaml b/Documentation/devicetree/bindings/leds/leds-bcm63138.yaml
index 52252fb6bb32..bb20394fca5c 100644
--- a/Documentation/devicetree/bindings/leds/leds-bcm63138.yaml
+++ b/Documentation/devicetree/bindings/leds/leds-bcm63138.yaml
@@ -52,10 +52,6 @@ patternProperties:
maxItems: 1
description: LED pin number
- active-low:
- type: boolean
- description: Makes LED active low
-
required:
- reg
diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6328.yaml b/Documentation/devicetree/bindings/leds/leds-bcm6328.yaml
index 51cc0d82c12e..f3a3ef992929 100644
--- a/Documentation/devicetree/bindings/leds/leds-bcm6328.yaml
+++ b/Documentation/devicetree/bindings/leds/leds-bcm6328.yaml
@@ -78,10 +78,6 @@ patternProperties:
- maximum: 23
description: LED pin number (only LEDs 0 to 23 are valid).
- active-low:
- type: boolean
- description: Makes LED active low.
-
brcm,hardware-controlled:
type: boolean
description: Makes this LED hardware controlled.
diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
index 6e51c6b91ee5..211ffc3c4a20 100644
--- a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
+++ b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
@@ -25,8 +25,6 @@ LED sub-node required properties:
LED sub-node optional properties:
- label : see Documentation/devicetree/bindings/leds/common.txt
- - active-low : Boolean, makes LED active low.
- Default : false
- default-state : see
Documentation/devicetree/bindings/leds/common.txt
- linux,default-trigger : see
diff --git a/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml b/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml
index bd6ec04a8727..a31a202afe5c 100644
--- a/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml
+++ b/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml
@@ -41,9 +41,7 @@ properties:
pwm-names: true
- active-low:
- description: For PWMs where the LED is wired to supply rather than ground.
- type: boolean
+ active-low: true
color: true
diff --git a/Documentation/devicetree/bindings/leds/leds-pwm.yaml b/Documentation/devicetree/bindings/leds/leds-pwm.yaml
index 7de6da58be3c..113b7c218303 100644
--- a/Documentation/devicetree/bindings/leds/leds-pwm.yaml
+++ b/Documentation/devicetree/bindings/leds/leds-pwm.yaml
@@ -34,11 +34,6 @@ patternProperties:
Maximum brightness possible for the LED
$ref: /schemas/types.yaml#/definitions/uint32
- active-low:
- description:
- For PWMs where the LED is wired to supply rather than ground.
- type: boolean
-
required:
- pwms
- max-brightness
diff --git a/Documentation/devicetree/bindings/media/cnm,wave521c.yaml b/Documentation/devicetree/bindings/media/cnm,wave521c.yaml
index 6d5569e77b7a..6a11c1d11fb5 100644
--- a/Documentation/devicetree/bindings/media/cnm,wave521c.yaml
+++ b/Documentation/devicetree/bindings/media/cnm,wave521c.yaml
@@ -17,7 +17,7 @@ properties:
compatible:
items:
- enum:
- - ti,k3-j721s2-wave521c
+ - ti,j721s2-wave521c
- const: cnm,wave521c
reg:
@@ -53,7 +53,7 @@ additionalProperties: false
examples:
- |
vpu: video-codec@12345678 {
- compatible = "ti,k3-j721s2-wave521c", "cnm,wave521c";
+ compatible = "ti,j721s2-wave521c", "cnm,wave521c";
reg = <0x12345678 0x1000>;
clocks = <&clks 42>;
interrupts = <42>;
diff --git a/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml b/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml
index a2051b31fa29..b45743d0a9ec 100644
--- a/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml
+++ b/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml
@@ -16,14 +16,18 @@ description: |+
properties:
compatible:
- enum:
- - mediatek,mt8173-vcodec-enc-vp8
- - mediatek,mt8173-vcodec-enc
- - mediatek,mt8183-vcodec-enc
- - mediatek,mt8188-vcodec-enc
- - mediatek,mt8192-vcodec-enc
- - mediatek,mt8195-vcodec-enc
-
+ oneOf:
+ - items:
+ - enum:
+ - mediatek,mt8173-vcodec-enc-vp8
+ - mediatek,mt8173-vcodec-enc
+ - mediatek,mt8183-vcodec-enc
+ - mediatek,mt8188-vcodec-enc
+ - mediatek,mt8192-vcodec-enc
+ - mediatek,mt8195-vcodec-enc
+ - items:
+ - const: mediatek,mt8186-vcodec-enc
+ - const: mediatek,mt8183-vcodec-enc
reg:
maxItems: 1
@@ -109,10 +113,7 @@ allOf:
properties:
compatible:
enum:
- - mediatek,mt8173-vcodec-enc
- - mediatek,mt8188-vcodec-enc
- - mediatek,mt8192-vcodec-enc
- - mediatek,mt8195-vcodec-enc
+ - mediatek,mt8173-vcodec-enc-vp8
then:
properties:
@@ -122,8 +123,8 @@ allOf:
maxItems: 1
clock-names:
items:
- - const: venc_sel
- else: # for vp8 hw encoder
+ - const: venc_lt_sel
+ else:
properties:
clock:
items:
@@ -131,7 +132,7 @@ allOf:
maxItems: 1
clock-names:
items:
- - const: venc_lt_sel
+ - const: venc_sel
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml b/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml
index 37800e1908cc..83c020a673d6 100644
--- a/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml
+++ b/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml
@@ -38,7 +38,8 @@ properties:
maxItems: 1
iommus:
- maxItems: 2
+ minItems: 2
+ maxItems: 4
description: |
Points to the respective IOMMU block with master port as argument, see
Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details.
diff --git a/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim-peripherals.yaml b/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim-peripherals.yaml
new file mode 100644
index 000000000000..82fc5f4a1ed6
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim-peripherals.yaml
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/fsl/fsl,imx-weim-peripherals.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: i.MX WEIM Bus Peripheral Nodes
+
+maintainers:
+ - Shawn Guo <shawnguo@kernel.org>
+ - Sascha Hauer <s.hauer@pengutronix.de>
+
+description:
+ This binding is meant for the child nodes of the WEIM node. The node
+ represents any device connected to the WEIM bus. It may be a Flash chip,
+ RAM chip or Ethernet controller, etc. These properties are meant for
+ configuring the WEIM settings/timings and will accompany the bindings
+ supported by the respective device.
+
+properties:
+ reg: true
+
+ fsl,weim-cs-timing:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ description:
+ Timing values for the child node.
+ minItems: 2
+ maxItems: 6
+
+# the WEIM child will have its own native properties
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim.yaml b/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim.yaml
new file mode 100644
index 000000000000..3f40ca5b13f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim.yaml
@@ -0,0 +1,204 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/fsl/fsl,imx-weim.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: i.MX Wireless External Interface Module (WEIM)
+
+maintainers:
+ - Shawn Guo <shawnguo@kernel.org>
+ - Sascha Hauer <s.hauer@pengutronix.de>
+
+description:
+ The term "wireless" does not imply that the WEIM is literally an interface
+ without wires. It simply means that this module was originally designed for
+ wireless and mobile applications that use low-power technology. The actual
+ devices are instantiated from the child nodes of a WEIM node.
+
+properties:
+ $nodename:
+ pattern: "^memory-controller@[0-9a-f]+$"
+
+ compatible:
+ oneOf:
+ - enum:
+ - fsl,imx1-weim
+ - fsl,imx27-weim
+ - fsl,imx50-weim
+ - fsl,imx51-weim
+ - fsl,imx6q-weim
+ - items:
+ - enum:
+ - fsl,imx31-weim
+ - fsl,imx35-weim
+ - const: fsl,imx27-weim
+ - items:
+ - enum:
+ - fsl,imx6sx-weim
+ - fsl,imx6ul-weim
+ - const: fsl,imx6q-weim
+
+ "#address-cells":
+ const: 2
+
+ "#size-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ ranges: true
+
+ fsl,weim-cs-gpr:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: |
+ Phandle to the system General Purpose Register controller that contains
+ WEIM CS GPR register, e.g. IOMUXC_GPR1 on i.MX6Q. IOMUXC_GPR1[11:0]
+ should be set up as one of the following 4 possible values depending on
+ the CS space configuration.
+
+ IOMUXC_GPR1[11:0] CS0 CS1 CS2 CS3
+ ---------------------------------------------
+ 05 128M 0M 0M 0M
+ 033 64M 64M 0M 0M
+ 0113 64M 32M 32M 0M
+ 01111 32M 32M 32M 32M
+
+ In case that the property is absent, the reset value or what bootloader
+ sets up in IOMUXC_GPR1[11:0] will be used.
+
+ fsl,burst-clk-enable:
+ type: boolean
+ description:
+ The presence of this property indicates that the weim bus should operate
+ in Burst Clock Mode.
+
+ fsl,continuous-burst-clk:
+ type: boolean
+ description:
+ Make Burst Clock to output continuous clock. Without this option Burst
+ Clock will output clock only when necessary.
+
+patternProperties:
+ "^.*@[0-7],[0-9a-f]+$":
+ type: object
+ description: Devices attached to chip selects are represented as subnodes.
+ $ref: fsl,imx-weim-peripherals.yaml
+ additionalProperties: true
+ required:
+ - fsl,weim-cs-timing
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ not:
+ contains:
+ enum:
+ - fsl,imx50-weim
+ - fsl,imx6q-weim
+ then:
+ properties:
+ fsl,weim-cs-gpr: false
+ fsl,burst-clk-enable: false
+ - if:
+ not:
+ required:
+ - fsl,burst-clk-enable
+ then:
+ properties:
+ fsl,continuous-burst-clk: false
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: fsl,imx1-weim
+ then:
+ patternProperties:
+ "^.*@[0-7],[0-9a-f]+$":
+ properties:
+ fsl,weim-cs-timing:
+ items:
+ items:
+ - description: CSxU
+ - description: CSxL
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx27-weim
+ - fsl,imx31-weim
+ - fsl,imx35-weim
+ then:
+ patternProperties:
+ "^.*@[0-7],[0-9a-f]+$":
+ properties:
+ fsl,weim-cs-timing:
+ items:
+ items:
+ - description: CSCRxU
+ - description: CSCRxL
+ - description: CSCRxA
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx50-weim
+ - fsl,imx51-weim
+ - fsl,imx6q-weim
+ - fsl,imx6sx-weim
+ - fsl,imx6ul-weim
+ then:
+ patternProperties:
+ "^.*@[0-7],[0-9a-f]+$":
+ properties:
+ fsl,weim-cs-timing:
+ items:
+ items:
+ - description: CSxGCR1
+ - description: CSxGCR2
+ - description: CSxRCR1
+ - description: CSxRCR2
+ - description: CSxWCR1
+ - description: CSxWCR2
+
+additionalProperties: false
+
+examples:
+ - |
+ memory-controller@21b8000 {
+ compatible = "fsl,imx6q-weim";
+ reg = <0x021b8000 0x4000>;
+ clocks = <&clks 196>;
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0x08000000 0x08000000>;
+ fsl,weim-cs-gpr = <&gpr>;
+
+ flash@0,0 {
+ compatible = "cfi-flash";
+ reg = <0 0 0x02000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ bank-width = <2>;
+ fsl,weim-cs-timing = <0x00620081 0x00000001 0x1c022000
+ 0x0000c000 0x1404a38e 0x00000000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/memory-controllers/mc-peripheral-props.yaml b/Documentation/devicetree/bindings/memory-controllers/mc-peripheral-props.yaml
index 8d9dae15ade0..00deeb09f87d 100644
--- a/Documentation/devicetree/bindings/memory-controllers/mc-peripheral-props.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/mc-peripheral-props.yaml
@@ -37,5 +37,6 @@ allOf:
- $ref: ingenic,nemc-peripherals.yaml#
- $ref: intel,ixp4xx-expansion-peripheral-props.yaml#
- $ref: ti,gpmc-child.yaml#
+ - $ref: fsl/fsl,imx-weim-peripherals.yaml
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-emc.yaml b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-emc.yaml
index f54e553e6c0e..71896cb10692 100644
--- a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-emc.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-emc.yaml
@@ -145,7 +145,7 @@ patternProperties:
"^emc-table@[0-9]+$":
$ref: "#/$defs/emc-table"
- "^emc-tables@[a-z0-9-]+$":
+ "^emc-tables@[a-f0-9-]+$":
type: object
properties:
reg:
diff --git a/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml b/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml
index 25f3bb9890ae..d7745dd53b51 100644
--- a/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml
@@ -45,6 +45,7 @@ properties:
- items:
- enum:
- renesas,r8a779g0-rpc-if # R-Car V4H
+ - renesas,r8a779h0-rpc-if # R-Car V4M
- const: renesas,rcar-gen4-rpc-if # a generic R-Car gen4 device
- items:
diff --git a/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml b/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml
index 14f1833d37c9..84ac6f50a6fc 100644
--- a/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml
@@ -23,7 +23,9 @@ maintainers:
properties:
compatible:
- const: st,stm32mp1-fmc2-ebi
+ enum:
+ - st,stm32mp1-fmc2-ebi
+ - st,stm32mp25-fmc2-ebi
reg:
maxItems: 1
@@ -34,6 +36,9 @@ properties:
resets:
maxItems: 1
+ power-domains:
+ maxItems: 1
+
"#address-cells":
const: 2
diff --git a/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml b/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml
index 75d8138298fb..660e2ca42daf 100644
--- a/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml
@@ -17,6 +17,10 @@ properties:
oneOf:
- items:
- enum:
+ - brcm,bcm74165b0-asp
+ - const: brcm,asp-v2.2
+ - items:
+ - enum:
- brcm,bcm74165-asp
- const: brcm,asp-v2.1
- items:
diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
index 6684810fcbf0..23dfe0838dca 100644
--- a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
@@ -24,6 +24,7 @@ properties:
- brcm,genet-mdio-v5
- brcm,asp-v2.0-mdio
- brcm,asp-v2.1-mdio
+ - brcm,asp-v2.2-mdio
- brcm,unimac-mdio
reg:
diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
index 170e23f0610d..20c0572c9853 100644
--- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
@@ -28,6 +28,8 @@ Optional properties:
available with tcan4552/4553.
- device-wake-gpios: Wake up GPIO to wake up the TCAN device. Not
available with tcan4552/4553.
+ - wakeup-source: Leave the chip running when suspended, and configure
+ the RX interrupt to wake up the device.
Example:
tcan4x5x: tcan4x5x@0 {
@@ -42,4 +44,5 @@ tcan4x5x: tcan4x5x@0 {
device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>;
reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>;
+ wakeup-source;
};
diff --git a/Documentation/devicetree/bindings/net/can/xilinx,can.yaml b/Documentation/devicetree/bindings/net/can/xilinx,can.yaml
index 64d57c343e6f..8d4e5af6fd6c 100644
--- a/Documentation/devicetree/bindings/net/can/xilinx,can.yaml
+++ b/Documentation/devicetree/bindings/net/can/xilinx,can.yaml
@@ -49,6 +49,10 @@ properties:
resets:
maxItems: 1
+ xlnx,has-ecc:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description: CAN TX_OL, TX_TL and RX FIFOs have ECC support(AXI CAN)
+
required:
- compatible
- reg
@@ -137,6 +141,7 @@ examples:
interrupts = <GIC_SPI 59 IRQ_TYPE_EDGE_RISING>;
tx-fifo-depth = <0x40>;
rx-fifo-depth = <0x40>;
+ xlnx,has-ecc;
};
- |
diff --git a/Documentation/devicetree/bindings/net/cdns,macb.yaml b/Documentation/devicetree/bindings/net/cdns,macb.yaml
index bf8894a0257e..2c71e2cf3a2f 100644
--- a/Documentation/devicetree/bindings/net/cdns,macb.yaml
+++ b/Documentation/devicetree/bindings/net/cdns,macb.yaml
@@ -59,6 +59,11 @@ properties:
- cdns,gem # Generic
- cdns,macb # Generic
+ - items:
+ - enum:
+ - microchip,sam9x7-gem # Microchip SAM9X7 gigabit ethernet interface
+ - const: microchip,sama7g5-gem # Microchip SAMA7G5 gigabit ethernet interface
+
reg:
minItems: 1
items:
diff --git a/Documentation/devicetree/bindings/net/dsa/ar9331.txt b/Documentation/devicetree/bindings/net/dsa/ar9331.txt
deleted file mode 100644
index f824fdae0da2..000000000000
--- a/Documentation/devicetree/bindings/net/dsa/ar9331.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-Atheros AR9331 built-in switch
-=============================
-
-It is a switch built-in to Atheros AR9331 WiSoC and addressable over internal
-MDIO bus. All PHYs are built-in as well.
-
-Required properties:
-
- - compatible: should be: "qca,ar9331-switch"
- - reg: Address on the MII bus for the switch.
- - resets : Must contain an entry for each entry in reset-names.
- - reset-names : Must include the following entries: "switch"
- - interrupt-parent: Phandle to the parent interrupt controller
- - interrupts: IRQ line for the switch
- - interrupt-controller: Indicates the switch is itself an interrupt
- controller. This is used for the PHY interrupts.
- - #interrupt-cells: must be 1
- - mdio: Container of PHY and devices on the switches MDIO bus.
-
-See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
-required and optional properties.
-Examples:
-
-eth0: ethernet@19000000 {
- compatible = "qca,ar9330-eth";
- reg = <0x19000000 0x200>;
- interrupts = <4>;
-
- resets = <&rst 9>, <&rst 22>;
- reset-names = "mac", "mdio";
- clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
- clock-names = "eth", "mdio";
-
- phy-mode = "mii";
- phy-handle = <&phy_port4>;
-};
-
-eth1: ethernet@1a000000 {
- compatible = "qca,ar9330-eth";
- reg = <0x1a000000 0x200>;
- interrupts = <5>;
- resets = <&rst 13>, <&rst 23>;
- reset-names = "mac", "mdio";
- clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
- clock-names = "eth", "mdio";
-
- phy-mode = "gmii";
-
- fixed-link {
- speed = <1000>;
- full-duplex;
- };
-
- mdio {
- #address-cells = <1>;
- #size-cells = <0>;
-
- switch10: switch@10 {
- #address-cells = <1>;
- #size-cells = <0>;
-
- compatible = "qca,ar9331-switch";
- reg = <0x10>;
- resets = <&rst 8>;
- reset-names = "switch";
-
- interrupt-parent = <&miscintc>;
- interrupts = <12>;
-
- interrupt-controller;
- #interrupt-cells = <1>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- switch_port0: port@0 {
- reg = <0x0>;
- ethernet = <&eth1>;
-
- phy-mode = "gmii";
-
- fixed-link {
- speed = <1000>;
- full-duplex;
- };
- };
-
- switch_port1: port@1 {
- reg = <0x1>;
- phy-handle = <&phy_port0>;
- phy-mode = "internal";
- };
-
- switch_port2: port@2 {
- reg = <0x2>;
- phy-handle = <&phy_port1>;
- phy-mode = "internal";
- };
-
- switch_port3: port@3 {
- reg = <0x3>;
- phy-handle = <&phy_port2>;
- phy-mode = "internal";
- };
-
- switch_port4: port@4 {
- reg = <0x4>;
- phy-handle = <&phy_port3>;
- phy-mode = "internal";
- };
- };
-
- mdio {
- #address-cells = <1>;
- #size-cells = <0>;
-
- interrupt-parent = <&switch10>;
-
- phy_port0: phy@0 {
- reg = <0x0>;
- interrupts = <0>;
- };
-
- phy_port1: phy@1 {
- reg = <0x1>;
- interrupts = <0>;
- };
-
- phy_port2: phy@2 {
- reg = <0x2>;
- interrupts = <0>;
- };
-
- phy_port3: phy@3 {
- reg = <0x3>;
- interrupts = <0>;
- };
-
- phy_port4: phy@4 {
- reg = <0x4>;
- interrupts = <0>;
- };
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
index c963dc09e8e1..52acc15ebcbf 100644
--- a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
@@ -31,6 +31,7 @@ properties:
- microchip,ksz9893
- microchip,ksz9563
- microchip,ksz8563
+ - microchip,ksz8567
reset-gpios:
description:
diff --git a/Documentation/devicetree/bindings/net/dsa/qca,ar9331.yaml b/Documentation/devicetree/bindings/net/dsa/qca,ar9331.yaml
new file mode 100644
index 000000000000..fd9ddc59d38c
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/qca,ar9331.yaml
@@ -0,0 +1,161 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/dsa/qca,ar9331.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Atheros AR9331 built-in switch
+
+maintainers:
+ - Oleksij Rempel <o.rempel@pengutronix.de>
+
+description:
+ Qualcomm Atheros AR9331 is a switch built-in to Atheros AR9331 WiSoC and
+ addressable over internal MDIO bus. All PHYs are built-in as well.
+
+properties:
+ compatible:
+ const: qca,ar9331-switch
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-controller: true
+
+ '#interrupt-cells':
+ const: 1
+
+ mdio:
+ $ref: /schemas/net/mdio.yaml#
+ unevaluatedProperties: false
+ properties:
+ interrupt-parent: true
+
+ patternProperties:
+ '(ethernet-)?phy@[0-4]+$':
+ type: object
+ unevaluatedProperties: false
+
+ properties:
+ reg: true
+ interrupts:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ items:
+ - const: switch
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-controller
+ - '#interrupt-cells'
+ - mdio
+ - ports
+ - resets
+ - reset-names
+
+allOf:
+ - $ref: dsa.yaml#/$defs/ethernet-ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch10: switch@10 {
+ compatible = "qca,ar9331-switch";
+ reg = <0x10>;
+
+ interrupt-parent = <&miscintc>;
+ interrupts = <12>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ resets = <&rst 8>;
+ reset-names = "switch";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0x0>;
+ ethernet = <&eth1>;
+
+ phy-mode = "gmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+
+ port@1 {
+ reg = <0x1>;
+ phy-handle = <&phy_port0>;
+ phy-mode = "internal";
+ };
+
+ port@2 {
+ reg = <0x2>;
+ phy-handle = <&phy_port1>;
+ phy-mode = "internal";
+ };
+
+ port@3 {
+ reg = <0x3>;
+ phy-handle = <&phy_port2>;
+ phy-mode = "internal";
+ };
+
+ port@4 {
+ reg = <0x4>;
+ phy-handle = <&phy_port3>;
+ phy-mode = "internal";
+ };
+ };
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ interrupt-parent = <&switch10>;
+
+ phy_port0: ethernet-phy@0 {
+ reg = <0x0>;
+ interrupts = <0>;
+ };
+
+ phy_port1: ethernet-phy@1 {
+ reg = <0x1>;
+ interrupts = <0>;
+ };
+
+ phy_port2: ethernet-phy@2 {
+ reg = <0x2>;
+ interrupts = <0>;
+ };
+
+ phy_port3: ethernet-phy@3 {
+ reg = <0x3>;
+ interrupts = <0>;
+ };
+
+ phy_port4: ethernet-phy@4 {
+ reg = <0x4>;
+ interrupts = <0>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/dsa/realtek.yaml b/Documentation/devicetree/bindings/net/dsa/realtek.yaml
index cce692f57b08..70b6bda3cf98 100644
--- a/Documentation/devicetree/bindings/net/dsa/realtek.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/realtek.yaml
@@ -59,6 +59,9 @@ properties:
description: GPIO to be used to reset the whole device
maxItems: 1
+ resets:
+ maxItems: 1
+
realtek,disable-leds:
type: boolean
description: |
@@ -127,7 +130,6 @@ else:
- mdc-gpios
- mdio-gpios
- mdio
- - reset-gpios
required:
- compatible
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index d14d123ad7a0..b2785b03139f 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -14,7 +14,6 @@ properties:
pattern: "^ethernet(@.*)?$"
label:
- $ref: /schemas/types.yaml#/definitions/string
description: Human readable label on a port of a box.
local-mac-address:
diff --git a/Documentation/devicetree/bindings/net/ethernet-phy-package.yaml b/Documentation/devicetree/bindings/net/ethernet-phy-package.yaml
new file mode 100644
index 000000000000..e567101e6f38
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ethernet-phy-package.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/ethernet-phy-package.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ethernet PHY Package Common Properties
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+description:
+ PHY packages are multi-port Ethernet PHY of the same family
+ and each Ethernet PHY is affected by the global configuration
+ of the PHY package.
+
+ Each reg of the PHYs defined in the PHY package node is
+ absolute and describe the real address of the Ethernet PHY on
+ the MDIO bus.
+
+properties:
+ $nodename:
+ pattern: "^ethernet-phy-package@[a-f0-9]+$"
+
+ reg:
+ minimum: 0
+ maximum: 31
+ description:
+ The base ID number for the PHY package.
+ Commonly the ID of the first PHY in the PHY package.
+
+ Some PHY in the PHY package might be not defined but
+ still occupy ID on the device (just not attached to
+ anything) hence the PHY package reg might correspond
+ to a not attached PHY (offset 0).
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+patternProperties:
+ ^ethernet-phy@[a-f0-9]+$:
+ $ref: ethernet-phy.yaml#
+
+required:
+ - reg
+ - '#address-cells'
+ - '#size-cells'
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml
index 8948a11c994e..5536c06139ca 100644
--- a/Documentation/devicetree/bindings/net/fsl,fec.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml
@@ -224,6 +224,9 @@ properties:
Can be omitted thus no delay is observed. Delay is in range of 1ms to 1000ms.
Other delays are invalid.
+ iommus:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/marvell,prestera.yaml b/Documentation/devicetree/bindings/net/marvell,prestera.yaml
index 5ea8b73663a5..16ff892f7bbd 100644
--- a/Documentation/devicetree/bindings/net/marvell,prestera.yaml
+++ b/Documentation/devicetree/bindings/net/marvell,prestera.yaml
@@ -78,8 +78,8 @@ examples:
pcie@0 {
#address-cells = <3>;
#size-cells = <2>;
- ranges = <0x0 0x0 0x0 0x0 0x0 0x0>;
- reg = <0x0 0x0 0x0 0x0 0x0 0x0>;
+ ranges = <0x02000000 0x0 0x100000 0x10000000 0x0 0x0>;
+ reg = <0x0 0x1000>;
device_type = "pci";
switch@0,0 {
diff --git a/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml b/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml
index 9cc236ec42f2..d0332eb76ad2 100644
--- a/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml
+++ b/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml
@@ -73,7 +73,7 @@ examples:
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/interrupt-controller/irq.h>
- i2c {
+ spi {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/net/qca,qca808x.yaml b/Documentation/devicetree/bindings/net/qca,qca808x.yaml
new file mode 100644
index 000000000000..e2552655902a
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qca,qca808x.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qca,qca808x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Atheros QCA808X PHY
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+description:
+ QCA808X PHYs can have up to 3 LEDs attached.
+ All 3 LEDs are disabled by default.
+ 2 LEDs have dedicated pins with the 3rd LED having the
+ double function of Interrupt LEDs/GPIO or additional LED.
+
+ By default this special PIN is set to LED function.
+
+allOf:
+ - $ref: ethernet-phy.yaml#
+
+properties:
+ compatible:
+ enum:
+ - ethernet-phy-id004d.d101
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/leds/common.h>
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethernet-phy@0 {
+ compatible = "ethernet-phy-id004d.d101";
+ reg = <0>;
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_WAN;
+ default-state = "keep";
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
index 7bdb412a0185..69a337c7e345 100644
--- a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
@@ -37,12 +37,14 @@ properties:
items:
- description: Combined signal for various interrupt events
- description: The interrupt that occurs when Rx exits the LPI state
+ - description: The interrupt that occurs when HW safety error triggered
interrupt-names:
minItems: 1
items:
- const: macirq
- - const: eth_lpi
+ - enum: [eth_lpi, sfty]
+ - const: sfty
clocks:
maxItems: 4
@@ -89,8 +91,9 @@ examples:
<&gcc GCC_ETH_PTP_CLK>,
<&gcc GCC_ETH_RGMII_CLK>;
interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "macirq", "eth_lpi";
+ <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 782 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi", "sfty";
rx-fifo-depth = <4096>;
tx-fifo-depth = <4096>;
diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index c30218684cfe..53cae71d9957 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -159,7 +159,7 @@ properties:
when the AP (not the modem) performs early initialization.
firmware-name:
- $ref: /schemas/types.yaml#/definitions/string
+ maxItems: 1
description:
If present, name (or relative path) of the file within the
firmware search path containing the firmware image used when
diff --git a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
index 3407e909e8a7..0029e197a825 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
@@ -44,6 +44,21 @@ properties:
items:
- const: gcc_mdio_ahb_clk
+ clock-frequency:
+ description:
+ The MDIO bus clock that must be output by the MDIO bus hardware, if
+ absent, the default hardware values are used.
+
+ MDC rate is feed by an external clock (fixed 100MHz) and is divider
+ internally. The default divider is /256 resulting in the default rate
+ applied of 390KHz.
+
+ To follow 802.3 standard that instruct up to 2.5MHz by default, if
+ this property is not declared and the divider is set to /256, by
+ default 1.5625Mhz is select.
+ enum: [ 390625, 781250, 1562500, 3125000, 6250000, 12500000 ]
+ default: 1562500
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/qcom,qca807x.yaml b/Documentation/devicetree/bindings/net/qcom,qca807x.yaml
new file mode 100644
index 000000000000..7290024024f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qcom,qca807x.yaml
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qcom,qca807x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm QCA807x Ethernet PHY
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+ - Robert Marko <robert.marko@sartura.hr>
+
+description: |
+ Qualcomm QCA8072/5 Ethernet PHY is PHY package of 2 or 5
+ IEEE 802.3 clause 22 compliant 10BASE-Te, 100BASE-TX and
+ 1000BASE-T PHY-s.
+
+ They feature 2 SerDes, one for PSGMII or QSGMII connection with
+ MAC, while second one is SGMII for connection to MAC or fiber.
+
+ Both models have a combo port that supports 1000BASE-X and
+ 100BASE-FX fiber.
+
+ Each PHY inside of QCA807x series has 4 digitally controlled
+ output only pins that natively drive LED-s for up to 2 attached
+ LEDs. Some vendor also use these 4 output for GPIO usage without
+ attaching LEDs.
+
+ Note that output pins can be set to drive LEDs OR GPIO, mixed
+ definition are not accepted.
+
+$ref: ethernet-phy-package.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,qca8072-package
+ - qcom,qca8075-package
+
+ qcom,package-mode:
+ description: |
+ PHY package can be configured in 3 mode following this table:
+
+ First Serdes mode Second Serdes mode
+ Option 1 PSGMII for copper Disabled
+ ports 0-4
+ Option 2 PSGMII for copper 1000BASE-X / 100BASE-FX
+ ports 0-4
+ Option 3 QSGMII for copper SGMII for
+ ports 0-3 copper port 4
+
+ PSGMII mode (option 1 or 2) is configured dynamically based on
+ the presence of a connected SFP device.
+ $ref: /schemas/types.yaml#/definitions/string
+ enum:
+ - qsgmii
+ - psgmii
+ default: psgmii
+
+ qcom,tx-drive-strength-milliwatt:
+ description: set the TX Amplifier value in mv.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [140, 160, 180, 200, 220,
+ 240, 260, 280, 300, 320,
+ 400, 500, 600]
+ default: 600
+
+patternProperties:
+ ^ethernet-phy@[a-f0-9]+$:
+ $ref: ethernet-phy.yaml#
+
+ properties:
+ qcom,dac-full-amplitude:
+ description:
+ Set Analog MDI driver amplitude to FULL.
+
+ With this not defined, amplitude is set to DSP.
+ (amplitude is adjusted based on cable length)
+
+ With this enabled and qcom,dac-full-bias-current
+ and qcom,dac-disable-bias-current-tweak disabled,
+ bias current is half.
+ type: boolean
+
+ qcom,dac-full-bias-current:
+ description:
+ Set Analog MDI driver bias current to FULL.
+
+ With this not defined, bias current is set to DSP.
+ (bias current is adjusted based on cable length)
+
+ Actual bias current might be different with
+ qcom,dac-disable-bias-current-tweak disabled.
+ type: boolean
+
+ qcom,dac-disable-bias-current-tweak:
+ description: |
+ Set Analog MDI driver bias current to disable tweak
+ to bias current.
+
+ With this not defined, bias current tweak are enabled
+ by default.
+
+ With this enabled the following tweak are NOT applied:
+ - With both FULL amplitude and FULL bias current: bias current
+ is set to half.
+ - With only DSP amplitude: bias current is set to half and
+ is set to 1/4 with cable < 10m.
+ - With DSP bias current (included both DSP amplitude and
+ DSP bias current): bias current is half the detected current
+ with cable < 10m.
+ type: boolean
+
+ gpio-controller: true
+
+ '#gpio-cells':
+ const: 2
+
+ if:
+ required:
+ - gpio-controller
+ then:
+ properties:
+ leds: false
+
+ unevaluatedProperties: false
+
+required:
+ - compatible
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/leds/common.h>
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethernet-phy-package@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "qcom,qca8075-package";
+ reg = <0>;
+
+ qcom,package-mode = "qsgmii";
+
+ ethernet-phy@0 {
+ reg = <0>;
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
+ };
+
+ ethernet-phy@1 {
+ reg = <1>;
+ };
+
+ ethernet-phy@2 {
+ reg = <2>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ ethernet-phy@3 {
+ reg = <3>;
+ };
+
+ ethernet-phy@4 {
+ reg = <4>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
index 890f7858d0dc..de7ba7f345a9 100644
--- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
@@ -46,6 +46,7 @@ properties:
- enum:
- renesas,etheravb-r8a779a0 # R-Car V3U
- renesas,etheravb-r8a779g0 # R-Car V4H
+ - renesas,etheravb-r8a779h0 # R-Car V4M
- const: renesas,etheravb-rcar-gen4 # R-Car Gen4
- items:
diff --git a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml
index 475aff7714d6..ea35d19be829 100644
--- a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml
@@ -65,9 +65,11 @@ properties:
rx-internal-delay-ps:
enum: [0, 1800]
+ default: 0
tx-internal-delay-ps:
enum: [0, 2000]
+ default: 0
'#address-cells':
const: 1
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 5c2769dc689a..6b0341a8e0ea 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -95,6 +95,7 @@ properties:
- snps,dwmac-5.20
- snps,dwxgmac
- snps,dwxgmac-2.10
+ - starfive,jh7100-dwmac
- starfive,jh7110-dwmac
reg:
@@ -107,13 +108,15 @@ properties:
- description: Combined signal for various interrupt events
- description: The interrupt to manage the remote wake-up packet detection
- description: The interrupt that occurs when Rx exits the LPI state
+ - description: The interrupt that occurs when HW safety error triggered
interrupt-names:
minItems: 1
items:
- const: macirq
- - enum: [eth_wake_irq, eth_lpi]
- - const: eth_lpi
+ - enum: [eth_wake_irq, eth_lpi, sfty]
+ - enum: [eth_wake_irq, eth_lpi, sfty]
+ - enum: [eth_wake_irq, eth_lpi, sfty]
clocks:
minItems: 1
@@ -144,10 +147,12 @@ properties:
- description: AHB reset
reset-names:
- minItems: 1
- items:
- - const: stmmaceth
- - const: ahb
+ oneOf:
+ - items:
+ - enum: [stmmaceth, ahb]
+ - items:
+ - const: stmmaceth
+ - const: ahb
power-domains:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
index 5e7cfbbebce6..0d1962980f57 100644
--- a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
@@ -16,16 +16,20 @@ select:
compatible:
contains:
enum:
+ - starfive,jh7100-dwmac
- starfive,jh7110-dwmac
required:
- compatible
properties:
compatible:
- items:
- - enum:
- - starfive,jh7110-dwmac
- - const: snps,dwmac-5.20
+ oneOf:
+ - items:
+ - const: starfive,jh7100-dwmac
+ - const: snps,dwmac
+ - items:
+ - const: starfive,jh7110-dwmac
+ - const: snps,dwmac-5.20
reg:
maxItems: 1
@@ -46,24 +50,6 @@ properties:
- const: tx
- const: gtx
- interrupts:
- minItems: 3
- maxItems: 3
-
- interrupt-names:
- minItems: 3
- maxItems: 3
-
- resets:
- items:
- - description: MAC Reset signal.
- - description: AHB Reset signal.
-
- reset-names:
- items:
- - const: stmmaceth
- - const: ahb
-
starfive,tx-use-rgmii-clk:
description:
Tx clock is provided by external rgmii clock.
@@ -94,6 +80,48 @@ required:
allOf:
- $ref: snps,dwmac.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: starfive,jh7100-dwmac
+ then:
+ properties:
+ interrupts:
+ minItems: 2
+ maxItems: 2
+
+ interrupt-names:
+ minItems: 2
+ maxItems: 2
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ const: ahb
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: starfive,jh7110-dwmac
+ then:
+ properties:
+ interrupts:
+ minItems: 3
+ maxItems: 3
+
+ interrupt-names:
+ minItems: 3
+ maxItems: 3
+
+ resets:
+ minItems: 2
+
+ reset-names:
+ minItems: 2
+
unevaluatedProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
index f07ae3173b03..d5bd93ee4dbb 100644
--- a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
+++ b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
@@ -7,8 +7,9 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: TI SoC Ethernet Switch Controller (CPSW)
maintainers:
- - Grygorii Strashko <grygorii.strashko@ti.com>
- - Sekhar Nori <nsekhar@ti.com>
+ - Siddharth Vadapalli <s-vadapalli@ti.com>
+ - Ravi Gunasekaran <r-gunasekaran@ti.com>
+ - Roger Quadros <rogerq@kernel.org>
description:
The 3-port switch gigabit ethernet subsystem provides ethernet packet
diff --git a/Documentation/devicetree/bindings/net/ti,dp83822.yaml b/Documentation/devicetree/bindings/net/ti,dp83822.yaml
index db74474207ed..784866ea392b 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83822.yaml
+++ b/Documentation/devicetree/bindings/net/ti,dp83822.yaml
@@ -62,6 +62,40 @@ properties:
for the PHY. The internal delay for the PHY is fixed to 3.5ns relative
to transmit data.
+ ti,cfg-dac-minus-one-bp:
+ description: |
+ DP83826 PHY only.
+ Sets the voltage ratio (with respect to the nominal value)
+ of the logical level -1 for the MLT-3 encoded TX data.
+ enum: [5000, 5625, 6250, 6875, 7500, 8125, 8750, 9375, 10000,
+ 10625, 11250, 11875, 12500, 13125, 13750, 14375, 15000]
+ default: 10000
+
+ ti,cfg-dac-plus-one-bp:
+ description: |
+ DP83826 PHY only.
+ Sets the voltage ratio (with respect to the nominal value)
+ of the logical level +1 for the MLT-3 encoded TX data.
+ enum: [5000, 5625, 6250, 6875, 7500, 8125, 8750, 9375, 10000,
+ 10625, 11250, 11875, 12500, 13125, 13750, 14375, 15000]
+ default: 10000
+
+ ti,rmii-mode:
+ description: |
+ If present, select the RMII operation mode. Two modes are
+ available:
+ - RMII master, where the PHY outputs a 50MHz reference clock which can
+ be connected to the MAC.
+ - RMII slave, where the PHY expects a 50MHz reference clock input
+ shared with the MAC.
+ The RMII operation mode can also be configured by its straps.
+ If the strap pin is not set correctly or not set at all, then this can be
+ used to configure it.
+ $ref: /schemas/types.yaml#/definitions/string
+ enum:
+ - master
+ - slave
+
required:
- reg
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index c9c25132d154..73ed5951d296 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -7,8 +7,9 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: The TI AM654x/J721E/AM642x SoC Gigabit Ethernet MAC (Media Access Controller)
maintainers:
- - Grygorii Strashko <grygorii.strashko@ti.com>
- - Sekhar Nori <nsekhar@ti.com>
+ - Siddharth Vadapalli <s-vadapalli@ti.com>
+ - Ravi Gunasekaran <r-gunasekaran@ti.com>
+ - Roger Quadros <rogerq@kernel.org>
description:
The TI AM654x/J721E SoC Gigabit Ethernet MAC (CPSW2G NUSS) has two ports
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
index 3e910d3b24a0..b1c875325776 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
@@ -7,8 +7,9 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: The TI AM654x/J721E Common Platform Time Sync (CPTS) module
maintainers:
- - Grygorii Strashko <grygorii.strashko@ti.com>
- - Sekhar Nori <nsekhar@ti.com>
+ - Siddharth Vadapalli <s-vadapalli@ti.com>
+ - Ravi Gunasekaran <r-gunasekaran@ti.com>
+ - Roger Quadros <rogerq@kernel.org>
description: |+
The TI AM654x/J721E CPTS module is used to facilitate host control of time
diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
index 252207adbc54..eabceb849537 100644
--- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
@@ -19,9 +19,6 @@ description: |
Alternatively, it can specify the wireless part of the MT7628/MT7688
or MT7622/MT7986 SoC.
-allOf:
- - $ref: ieee80211.yaml#
-
properties:
compatible:
enum:
@@ -38,7 +35,12 @@ properties:
MT7986 should contain 3 regions consys, dcm, and sku, in this order.
interrupts:
- maxItems: 1
+ minItems: 1
+ items:
+ - description: major interrupt for rings
+ - description: additional interrupt for ring 19
+ - description: additional interrupt for ring 4
+ - description: additional interrupt for ring 5
power-domains:
maxItems: 1
@@ -217,6 +219,24 @@ required:
- compatible
- reg
+allOf:
+ - $ref: ieee80211.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - mediatek,mt7981-wmac
+ - mediatek,mt7986-wmac
+ then:
+ properties:
+ interrupts:
+ minItems: 4
+ else:
+ properties:
+ interrupts:
+ maxItems: 1
+
unevaluatedProperties: false
examples:
@@ -293,7 +313,10 @@ examples:
reg = <0x18000000 0x1000000>,
<0x10003000 0x1000>,
<0x11d10000 0x1000>;
- interrupts = <GIC_SPI 213 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 213 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 214 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 216 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&topckgen 50>,
<&topckgen 62>;
clock-names = "mcu", "ap2conn";
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
index 7758a55dd328..9b3ef4bc3732 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
@@ -8,6 +8,7 @@ title: Qualcomm Technologies ath10k wireless devices
maintainers:
- Kalle Valo <kvalo@kernel.org>
+ - Jeff Johnson <jjohnson@kernel.org>
description:
Qualcomm Technologies, Inc. IEEE 802.11ac devices.
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml
index 817f02a8b481..41d023797d7d 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml
@@ -9,6 +9,7 @@ title: Qualcomm Technologies ath11k wireless devices (PCIe)
maintainers:
- Kalle Valo <kvalo@kernel.org>
+ - Jeff Johnson <jjohnson@kernel.org>
description: |
Qualcomm Technologies IEEE 802.11ax PCIe devices
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
index 7d5f982a3d09..672282cdfc2f 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
@@ -9,6 +9,7 @@ title: Qualcomm Technologies ath11k wireless devices
maintainers:
- Kalle Valo <kvalo@kernel.org>
+ - Jeff Johnson <jjohnson@kernel.org>
description: |
These are dt entries for Qualcomm Technologies, Inc. IEEE 802.11ax
diff --git a/Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml b/Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml
index 0720b54881c2..e76fb273490f 100644
--- a/Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml
+++ b/Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml
@@ -45,6 +45,7 @@ properties:
- renesas,r8a779a0-sysc # R-Car V3U
- renesas,r8a779f0-sysc # R-Car S4-8
- renesas,r8a779g0-sysc # R-Car V4H
+ - renesas,r8a779h0-sysc # R-Car V4M
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/reset/renesas,rst.yaml b/Documentation/devicetree/bindings/reset/renesas,rst.yaml
index e7e487247751..58b4a45d3380 100644
--- a/Documentation/devicetree/bindings/reset/renesas,rst.yaml
+++ b/Documentation/devicetree/bindings/reset/renesas,rst.yaml
@@ -50,6 +50,7 @@ properties:
- renesas,r8a779a0-rst # R-Car V3U
- renesas,r8a779f0-rst # R-Car S4-8
- renesas,r8a779g0-rst # R-Car V4H
+ - renesas,r8a779h0-rst # R-Car V4M
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml
index 49db66801429..1f1b42dde94d 100644
--- a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml
+++ b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Zynq UltraScale+ MPSoC and Versal reset
maintainers:
- - Piyush Mehta <piyush.mehta@amd.com>
+ - Mubin Sayyed <mubin.sayyed@amd.com>
+ - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
description: |
The Zynq UltraScale+ MPSoC and Versal has several different resets.
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,pbs.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,pbs.yaml
new file mode 100644
index 000000000000..b502ca72266a
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,pbs.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/soc/qcom/qcom,pbs.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. Programmable Boot Sequencer
+
+maintainers:
+ - Anjelique Melendez <quic_amelende@quicinc.com>
+
+description: |
+ The Qualcomm Technologies, Inc. Programmable Boot Sequencer (PBS)
+ supports triggering power up and power down sequences for clients
+ upon request.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - qcom,pmi632-pbs
+ - const: qcom,pbs
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/spmi/spmi.h>
+
+ pmic@0 {
+ reg = <0x0 SPMI_USID>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pbs@7400 {
+ compatible = "qcom,pmi632-pbs", "qcom,pbs";
+ reg = <0x7400>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml
index 61df97ffe1e4..d3f3259ef77d 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml
@@ -32,6 +32,7 @@ properties:
- items:
- enum:
- qcom,sm8650-pmic-glink
+ - qcom,x1e80100-pmic-glink
- const: qcom,sm8550-pmic-glink
- const: qcom,pmic-glink
@@ -65,6 +66,7 @@ allOf:
enum:
- qcom,sm8450-pmic-glink
- qcom,sm8550-pmic-glink
+ - qcom,x1e80100-pmic-glink
then:
properties:
orientation-gpios: false
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml
index 031800985b5e..9410404f87f1 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml
@@ -35,6 +35,8 @@ properties:
description: Phandle to an RPM MSG RAM slice containing the master stats
minItems: 1
maxItems: 5
+ items:
+ maxItems: 1
qcom,master-names:
$ref: /schemas/types.yaml#/definitions/string-array
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,saw2.yaml
index 20c8cd38ff0d..ca4bce817273 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,saw2.yaml
@@ -1,23 +1,33 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: http://devicetree.org/schemas/soc/qcom/qcom,spm.yaml#
+$id: http://devicetree.org/schemas/soc/qcom/qcom,saw2.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
-title: Qualcomm Subsystem Power Manager
+title: Qualcomm Subsystem Power Manager / SPM AVS Wrapper 2 (SAW2)
maintainers:
- Andy Gross <agross@kernel.org>
- Bjorn Andersson <bjorn.andersson@linaro.org>
description: |
- This binding describes the Qualcomm Subsystem Power Manager, used to control
- the peripheral logic surrounding the application cores in Qualcomm platforms.
+ The Qualcomm Subsystem Power Manager is used to control the peripheral logic
+ surrounding the application cores in Qualcomm platforms.
+
+ The SAW2 is a wrapper around the Subsystem Power Manager (SPM) and the
+ Adaptive Voltage Scaling (AVS) hardware. The SPM is a programmable
+ power-controller that transitions a piece of hardware (like a processor or
+ subsystem) into and out of low power modes via a direct connection to
+ the PMIC. It can also be wired up to interact with other processors in the
+ system, notifying them when a low power state is entered or exited.
properties:
compatible:
items:
- enum:
+ - qcom,ipq4019-saw2-cpu
+ - qcom,ipq4019-saw2-l2
+ - qcom,ipq8064-saw2-cpu
- qcom,sdm660-gold-saw2-v4.1-l2
- qcom,sdm660-silver-saw2-v4.1-l2
- qcom,msm8998-gold-saw2-v4.1-l2
@@ -26,16 +36,27 @@ properties:
- qcom,msm8916-saw2-v3.0-cpu
- qcom,msm8939-saw2-v3.0-cpu
- qcom,msm8226-saw2-v2.1-cpu
+ - qcom,msm8226-saw2-v2.1-l2
+ - qcom,msm8960-saw2-cpu
- qcom,msm8974-saw2-v2.1-cpu
+ - qcom,msm8974-saw2-v2.1-l2
- qcom,msm8976-gold-saw2-v2.3-l2
- qcom,msm8976-silver-saw2-v2.3-l2
- qcom,apq8084-saw2-v2.1-cpu
+ - qcom,apq8084-saw2-v2.1-l2
- qcom,apq8064-saw2-v1.1-cpu
- const: qcom,saw2
reg:
- description: Base address and size of the SPM register region
- maxItems: 1
+ items:
+ - description: Base address and size of the SPM register region
+ - description: Base address and size of the alias register region
+ minItems: 1
+
+ regulator:
+ $ref: /schemas/regulator/regulator.yaml#
+ description: Indicates that this SPM device acts as a regulator device
+ device for the core (CPU or Cache) the SPM is attached to.
required:
- compatible
@@ -82,4 +103,17 @@ examples:
reg = <0x17912000 0x1000>;
};
+ - |
+ /*
+ * Example 3: SAW2 with the bundled regulator definition.
+ */
+ power-manager@2089000 {
+ compatible = "qcom,apq8064-saw2-v1.1-cpu", "qcom,saw2";
+ reg = <0x02089000 0x1000>, <0x02009000 0x1000>;
+
+ regulator {
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1300000>;
+ };
+ };
...
diff --git a/Documentation/devicetree/bindings/soc/renesas/renesas-soc.yaml b/Documentation/devicetree/bindings/soc/renesas/renesas-soc.yaml
new file mode 100644
index 000000000000..5ddd31f30f26
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/renesas/renesas-soc.yaml
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/soc/renesas/renesas-soc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas SoC compatibles naming convention
+
+maintainers:
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+ - Niklas Söderlund <niklas.soderlund@ragnatech.se>
+
+description: |
+ Guidelines for new compatibles for SoC blocks/components.
+ When adding new compatibles in new bindings, use the format::
+ renesas,SoC-IP
+
+ For example::
+ renesas,r8a77965-csi2
+
+ When adding new compatibles to existing bindings, use the format in the
+ existing binding, even if it contradicts the above.
+
+select:
+ properties:
+ compatible:
+ contains:
+ pattern: "^renesas,.+-.+$"
+ required:
+ - compatible
+
+properties:
+ compatible:
+ minItems: 1
+ maxItems: 4
+ items:
+ anyOf:
+ # Preferred naming style for compatibles of SoC components
+ - pattern: "^renesas,(emev2|r(7s|8a|9a)[a-z0-9]+|rcar|rmobile|rz[a-z0-9]*|sh(7[a-z0-9]+)?|mobile)-[a-z0-9-]+$"
+ - pattern: "^renesas,(condor|falcon|gr-peach|gray-hawk|salvator|sk-rz|smar(c(2)?)?|spider|white-hawk)(.*)?$"
+
+ # Legacy compatibles
+ #
+ # New compatibles are not allowed.
+ - pattern: "^renesas,(can|cpg|dmac|du|(g)?ether(avb)?|gpio|hscif|(r)?i[i2]c|imr|intc|ipmmu|irqc|jpu|mmcif|msiof|mtu2|pci(e)?|pfc|pwm|[rq]spi|rcar_sound|sata|scif[ab]*|sdhi|thermal|tmu|tpu|usb(2|hs)?|vin|xhci)-[a-z0-9-]+$"
+ - pattern: "^renesas,(d|s)?bsc(3)?-(r8a73a4|r8a7740|sh73a0)$"
+ - pattern: "^renesas,em-(gio|sti|uart)$"
+ - pattern: "^renesas,fsi2-(r8a7740|sh73a0)$"
+ - pattern: "^renesas,hspi-r8a777[89]$"
+ - pattern: "^renesas,sysc-(r8a73a4|r8a7740|rmobile|sh73a0)$"
+ - enum:
+ - renesas,imr-lx4
+ - renesas,mtu2-r7s72100
+
+ # None SoC component compatibles
+ #
+ # Compatibles with the Renesas vendor prefix that do not relate to any SoC
+ # component are OK. New compatibles are allowed.
+ - enum:
+ - renesas,smp-sram
+
+ # Do not fail compatibles not matching the select pattern
+ #
+ # Some SoC components in addition to a Renesas compatible list
+ # compatibles not related to Renesas. The select pattern for this
+ # schema hits all compatibles that have at lest one Renesas compatible
+ # and try to validate all values in that compatible array, allow all
+ # that don't match the schema select pattern. For example,
+ #
+ # compatible = "renesas,r9a07g044-mali", "arm,mali-bifrost";
+ - pattern: "^(?!renesas,.+-.+).+$"
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/soc/renesas/renesas.yaml b/Documentation/devicetree/bindings/soc/renesas/renesas.yaml
index 16ca3ff7b1ae..c1ce4da2dc32 100644
--- a/Documentation/devicetree/bindings/soc/renesas/renesas.yaml
+++ b/Documentation/devicetree/bindings/soc/renesas/renesas.yaml
@@ -348,12 +348,25 @@ properties:
- renesas,white-hawk-cpu # White Hawk CPU board (RTP8A779G0ASKB0FC0SA000)
- const: renesas,r8a779g0
+ - description: R-Car V4H (R8A779G2)
+ items:
+ - enum:
+ - renesas,white-hawk-single # White Hawk Single board (RTP8A779G2ASKB0F10SA001)
+ - const: renesas,r8a779g2
+ - const: renesas,r8a779g0
+
- items:
- enum:
- renesas,white-hawk-breakout # White Hawk BreakOut board (RTP8A779G0ASKB0SB0SA000)
- const: renesas,white-hawk-cpu
- const: renesas,r8a779g0
+ - description: R-Car V4M (R8A779H0)
+ items:
+ - enum:
+ - renesas,gray-hawk-single # Gray Hawk Single board (RTP8A779H0ASKB0F10S)
+ - const: renesas,r8a779h0
+
- description: R-Car H3e (R8A779M0)
items:
- enum:
@@ -475,12 +488,6 @@ properties:
- renesas,r9a07g054l2 # Dual Cortex-A55 RZ/V2L
- const: renesas,r9a07g054
- - description: RZ/V2M (R9A09G011)
- items:
- - enum:
- - renesas,rzv2mevk2 # RZ/V2M Eval Board v2.0
- - const: renesas,r9a09g011
-
- description: RZ/G3S (R9A08G045)
items:
- enum:
@@ -500,6 +507,12 @@ properties:
- const: renesas,r9a08g045s33 # PCIe support
- const: renesas,r9a08g045
+ - description: RZ/V2M (R9A09G011)
+ items:
+ - enum:
+ - renesas,rzv2mevk2 # RZ/V2M Eval Board v2.0
+ - const: renesas,r9a09g011
+
additionalProperties: true
...
diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
index 9793ea6f0fe6..0b87c266760c 100644
--- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
+++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
@@ -22,12 +22,15 @@ properties:
- rockchip,rk3568-usb2phy-grf
- rockchip,rk3588-bigcore0-grf
- rockchip,rk3588-bigcore1-grf
+ - rockchip,rk3588-hdptxphy-grf
- rockchip,rk3588-ioc
- rockchip,rk3588-php-grf
- rockchip,rk3588-pipe-phy-grf
- rockchip,rk3588-sys-grf
- rockchip,rk3588-pcie3-phy-grf
- rockchip,rk3588-pcie3-pipe-grf
+ - rockchip,rk3588-usb-grf
+ - rockchip,rk3588-usbdpphy-grf
- rockchip,rk3588-vo-grf
- rockchip,rk3588-vop-grf
- rockchip,rv1108-usbgrf
@@ -66,6 +69,9 @@ properties:
reg:
maxItems: 1
+ clocks:
+ maxItems: 1
+
"#address-cells":
const: 1
@@ -248,6 +254,22 @@ allOf:
unevaluatedProperties: false
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - rockchip,rk3588-vo-grf
+
+ then:
+ required:
+ - clocks
+
+ else:
+ properties:
+ clocks: false
+
+
examples:
- |
#include <dt-bindings/clock/rk3399-cru.h>
diff --git a/Documentation/devicetree/bindings/soc/samsung/samsung,exynos-sysreg.yaml b/Documentation/devicetree/bindings/soc/samsung/samsung,exynos-sysreg.yaml
index 1794e3799f21..c0c6ce8fc786 100644
--- a/Documentation/devicetree/bindings/soc/samsung/samsung,exynos-sysreg.yaml
+++ b/Documentation/devicetree/bindings/soc/samsung/samsung,exynos-sysreg.yaml
@@ -72,6 +72,8 @@ allOf:
compatible:
contains:
enum:
+ - google,gs101-peric0-sysreg
+ - google,gs101-peric1-sysreg
- samsung,exynos850-cmgp-sysreg
- samsung,exynos850-peri-sysreg
- samsung,exynos850-sysreg
diff --git a/Documentation/devicetree/bindings/soc/xilinx/xilinx.yaml b/Documentation/devicetree/bindings/soc/xilinx/xilinx.yaml
index d4c0fe1fe435..131aba5ed9f4 100644
--- a/Documentation/devicetree/bindings/soc/xilinx/xilinx.yaml
+++ b/Documentation/devicetree/bindings/soc/xilinx/xilinx.yaml
@@ -117,20 +117,70 @@ properties:
- const: xlnx,zynqmp
- description: Xilinx Kria SOMs
+ minItems: 3
items:
- - const: xlnx,zynqmp-sm-k26-rev1
- - const: xlnx,zynqmp-sm-k26-revB
- - const: xlnx,zynqmp-sm-k26-revA
- - const: xlnx,zynqmp-sm-k26
- - const: xlnx,zynqmp
+ enum:
+ - xlnx,zynqmp-sm-k26-rev2
+ - xlnx,zynqmp-sm-k26-rev1
+ - xlnx,zynqmp-sm-k26-revB
+ - xlnx,zynqmp-sm-k26-revA
+ - xlnx,zynqmp-sm-k26
+ - xlnx,zynqmp
+ allOf:
+ - contains:
+ const: xlnx,zynqmp
+ - contains:
+ const: xlnx,zynqmp-sm-k26
- description: Xilinx Kria SOMs (starter)
+ minItems: 3
items:
- - const: xlnx,zynqmp-smk-k26-rev1
- - const: xlnx,zynqmp-smk-k26-revB
- - const: xlnx,zynqmp-smk-k26-revA
- - const: xlnx,zynqmp-smk-k26
- - const: xlnx,zynqmp
+ enum:
+ - xlnx,zynqmp-smk-k26-rev2
+ - xlnx,zynqmp-smk-k26-rev1
+ - xlnx,zynqmp-smk-k26-revB
+ - xlnx,zynqmp-smk-k26-revA
+ - xlnx,zynqmp-smk-k26
+ - xlnx,zynqmp
+ allOf:
+ - contains:
+ const: xlnx,zynqmp
+ - contains:
+ const: xlnx,zynqmp-smk-k26
+
+ - description: Xilinx Kria SOM KV260 revA/Y/Z
+ minItems: 3
+ items:
+ enum:
+ - xlnx,zynqmp-sk-kv260-revA
+ - xlnx,zynqmp-sk-kv260-revY
+ - xlnx,zynqmp-sk-kv260-revZ
+ - xlnx,zynqmp-sk-kv260
+ - xlnx,zynqmp
+ allOf:
+ - contains:
+ const: xlnx,zynqmp-sk-kv260-revA
+ - contains:
+ const: xlnx,zynqmp-sk-kv260
+ - contains:
+ const: xlnx,zynqmp
+
+ - description: Xilinx Kria SOM KV260 rev2/1/B
+ minItems: 3
+ items:
+ enum:
+ - xlnx,zynqmp-sk-kv260-rev2
+ - xlnx,zynqmp-sk-kv260-rev1
+ - xlnx,zynqmp-sk-kv260-revB
+ - xlnx,zynqmp-sk-kv260
+ - xlnx,zynqmp
+ allOf:
+ - contains:
+ const: xlnx,zynqmp-sk-kv260-revB
+ - contains:
+ const: xlnx,zynqmp-sk-kv260
+ - contains:
+ const: xlnx,zynqmp
- description: AMD MicroBlaze V (QEMU)
items:
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
index 8108c564dd78..aa32dc950e72 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
@@ -22,6 +22,7 @@ properties:
- const: allwinner,sun6i-a31-spdif
- const: allwinner,sun8i-h3-spdif
- const: allwinner,sun50i-h6-spdif
+ - const: allwinner,sun50i-h616-spdif
- items:
- const: allwinner,sun8i-a83t-spdif
- const: allwinner,sun8i-h3-spdif
@@ -62,6 +63,8 @@ allOf:
enum:
- allwinner,sun6i-a31-spdif
- allwinner,sun8i-h3-spdif
+ - allwinner,sun50i-h6-spdif
+ - allwinner,sun50i-h616-spdif
then:
required:
@@ -73,7 +76,7 @@ allOf:
contains:
enum:
- allwinner,sun8i-h3-spdif
- - allwinner,sun50i-h6-spdif
+ - allwinner,sun50i-h616-spdif
then:
properties:
diff --git a/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml b/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml
index ec4b6e547ca6..cdcd7c6f21eb 100644
--- a/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml
+++ b/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml
@@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Google SC7280-Herobrine ASoC sound card driver
maintainers:
- - Srinivasa Rao Mandadapu <srivasam@codeaurora.org>
- Judy Hsiao <judyhsiao@chromium.org>
description:
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max9808x.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max9808x.yaml
index c29d7942915c..241d20f3aad0 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max9808x.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max9808x.yaml
@@ -64,7 +64,7 @@ examples:
#include <dt-bindings/clock/tegra30-car.h>
#include <dt-bindings/soc/tegra-pmc.h>
sound {
- compatible = "lge,tegra-audio-max98089-p895",
+ compatible = "lg,tegra-audio-max98089-p895",
"nvidia,tegra-audio-max98089";
nvidia,model = "LG Optimus Vu MAX98089";
diff --git a/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml b/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml
index a1c96985951f..cf07b8f787a6 100644
--- a/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml
+++ b/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml
@@ -56,7 +56,7 @@ properties:
ranges: true
patternProperties:
- "^sram@[a-z0-9]+":
+ "^sram@[a-f0-9]+":
$ref: /schemas/sram/sram.yaml#
unevaluatedProperties: false
diff --git a/Documentation/devicetree/bindings/tpm/tpm-common.yaml b/Documentation/devicetree/bindings/tpm/tpm-common.yaml
index 90390624a8be..3c1241b2a43f 100644
--- a/Documentation/devicetree/bindings/tpm/tpm-common.yaml
+++ b/Documentation/devicetree/bindings/tpm/tpm-common.yaml
@@ -42,7 +42,7 @@ properties:
resets:
description: Reset controller to reset the TPM
- $ref: /schemas/types.yaml#/definitions/phandle
+ maxItems: 1
reset-gpios:
description: Output GPIO pin to reset the TPM
diff --git a/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml b/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml
index 88cc1e3a0c88..b2b509b3944d 100644
--- a/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml
+++ b/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml
@@ -55,9 +55,12 @@ properties:
samsung,sysreg:
$ref: /schemas/types.yaml#/definitions/phandle-array
- description: Should be phandle/offset pair. The phandle to the syscon node
- which indicates the FSYSx sysreg interface and the offset of
- the control register for UFS io coherency setting.
+ items:
+ - items:
+ - description: phandle to FSYSx sysreg node
+ - description: offset of the control register for UFS io coherency setting
+ description:
+ Phandle and offset to the FSYSx sysreg for UFS io coherency setting.
dma-coherent: true
diff --git a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml
index bb373eb025a5..00f87a558c7d 100644
--- a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml
+++ b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Xilinx SuperSpeed DWC3 USB SoC controller
maintainers:
- - Piyush Mehta <piyush.mehta@amd.com>
+ - Mubin Sayyed <mubin.sayyed@amd.com>
+ - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml
index 6d4cfd943f58..445183d9d6db 100644
--- a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml
+++ b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml
@@ -16,8 +16,9 @@ description:
USB 2.0 traffic.
maintainers:
- - Piyush Mehta <piyush.mehta@amd.com>
- Michal Simek <michal.simek@amd.com>
+ - Mubin Sayyed <mubin.sayyed@amd.com>
+ - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml
index 868dffe314bc..a7f75fe36665 100644
--- a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml
+++ b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Xilinx udc controller
maintainers:
- - Piyush Mehta <piyush.mehta@amd.com>
+ - Mubin Sayyed <mubin.sayyed@amd.com>
+ - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 1a0dc04f1db4..d371eb572164 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -39,6 +39,8 @@ patternProperties:
description: ShenZhen Asia Better Technology Ltd.
"^acbel,.*":
description: Acbel Polytech Inc.
+ "^acelink,.*":
+ description: Acelink Technology Co., Ltd.
"^acer,.*":
description: Acer Inc.
"^acme,.*":
@@ -500,6 +502,8 @@ patternProperties:
description: FocalTech Systems Co.,Ltd
"^forlinx,.*":
description: Baoding Forlinx Embedded Technology Co., Ltd.
+ "^freebox,.*":
+ description: Freebox SAS
"^freecom,.*":
description: Freecom Gmbh
"^frida,.*":
@@ -719,6 +723,8 @@ patternProperties:
description: JetHome (IP Sokolov P.A.)
"^jianda,.*":
description: Jiandangjing Technology Co., Ltd.
+ "^jide,.*":
+ description: Jide Tech
"^joz,.*":
description: JOZ BV
"^kam,.*":
@@ -1484,6 +1490,8 @@ patternProperties:
description: Ufi Space Co., Ltd.
"^ugoos,.*":
description: Ugoos Industrial Co., Ltd.
+ "^uni-t,.*":
+ description: Uni-Trend Technology (China) Co., Ltd.
"^uniwest,.*":
description: United Western Technologies Corp (UniWest)
"^upisemi,.*":
diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst
index 6ad72ac6861b..d6f7efefea42 100644
--- a/Documentation/doc-guide/kernel-doc.rst
+++ b/Documentation/doc-guide/kernel-doc.rst
@@ -341,6 +341,51 @@ Typedefs with function prototypes can also be documented::
*/
typedef void (*type_name)(struct v4l2_ctrl *arg1, void *arg2);
+Object-like macro documentation
+-------------------------------
+
+Object-like macros are distinct from function-like macros. They are
+differentiated by whether the macro name is immediately followed by a
+left parenthesis ('(') for function-like macros or not followed by one
+for object-like macros.
+
+Function-like macros are handled like functions by ``scripts/kernel-doc``.
+They may have a parameter list. Object-like macros have do not have a
+parameter list.
+
+The general format of an object-like macro kernel-doc comment is::
+
+ /**
+ * define object_name - Brief description.
+ *
+ * Description of the object.
+ */
+
+Example::
+
+ /**
+ * define MAX_ERRNO - maximum errno value that is supported
+ *
+ * Kernel pointers have redundant information, so we can use a
+ * scheme where we can return either an error code or a normal
+ * pointer with the same return value.
+ */
+ #define MAX_ERRNO 4095
+
+Example::
+
+ /**
+ * define DRM_GEM_VRAM_PLANE_HELPER_FUNCS - \
+ * Initializes struct drm_plane_helper_funcs for VRAM handling
+ *
+ * This macro initializes struct drm_plane_helper_funcs to use the
+ * respective helper functions.
+ */
+ #define DRM_GEM_VRAM_PLANE_HELPER_FUNCS \
+ .prepare_fb = drm_gem_vram_plane_helper_prepare_fb, \
+ .cleanup_fb = drm_gem_vram_plane_helper_cleanup_fb
+
+
Highlights and cross-references
-------------------------------
diff --git a/Documentation/doc-guide/maintainer-profile.rst b/Documentation/doc-guide/maintainer-profile.rst
index 755d39f0d407..db3636d0d71d 100644
--- a/Documentation/doc-guide/maintainer-profile.rst
+++ b/Documentation/doc-guide/maintainer-profile.rst
@@ -27,6 +27,13 @@ documentation and ensure that no new errors or warnings have been
introduced. Generating HTML documents and looking at the result will help
to avoid unsightly misunderstandings about how things will be rendered.
+All new documentation (including additions to existing documents) should
+ideally justify who the intended target audience is somewhere in the
+changelog; this way, we ensure that the documentation ends up in the correct
+place. Some possible categories are: kernel developers (experts or
+beginners), userspace programmers, end users and/or system administrators,
+and distributors.
+
Key cycle dates
---------------
diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst
index 3d125fb4139d..8081ebfe48bc 100644
--- a/Documentation/doc-guide/sphinx.rst
+++ b/Documentation/doc-guide/sphinx.rst
@@ -48,13 +48,14 @@ or ``virtualenv``, depending on how your distribution packaged Python 3.
on the Sphinx version, it should be installed separately,
with ``pip install sphinx_rtd_theme``.
-In summary, if you want to install Sphinx version 2.4.4, you should do::
+In summary, if you want to install the latest version of Sphinx, you
+should do::
- $ virtualenv sphinx_2.4.4
- $ . sphinx_2.4.4/bin/activate
- (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt
+ $ virtualenv sphinx_latest
+ $ . sphinx_latest/bin/activate
+ (sphinx_latest) $ pip install -r Documentation/sphinx/requirements.txt
-After running ``. sphinx_2.4.4/bin/activate``, the prompt will change,
+After running ``. sphinx_latest/bin/activate``, the prompt will change,
in order to indicate that you're using the new environment. If you
open a new shell, you need to rerun this command to enter again at
the virtual environment before building the documentation.
@@ -63,8 +64,7 @@ Image output
------------
The kernel documentation build system contains an extension that
-handles images on both GraphViz and SVG formats (see
-:ref:`sphinx_kfigure`).
+handles images in both GraphViz and SVG formats (see :ref:`sphinx_kfigure`).
For it to work, you need to install both GraphViz and ImageMagick
packages. If those packages are not installed, the build system will
@@ -108,7 +108,7 @@ further info.
Checking for Sphinx dependencies
--------------------------------
-There's a script that automatically check for Sphinx dependencies. If it can
+There's a script that automatically checks for Sphinx dependencies. If it can
recognize your distribution, it will also give a hint about the install
command line options for your distro::
@@ -283,7 +283,7 @@ Here are some specific guidelines for the kernel documentation:
from highlighting. For a short snippet of code embedded in the text, use \`\`.
-the C domain
+The C domain
------------
The **Sphinx C Domain** (name c) is suited for documentation of C API. E.g. a
diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst
index e3d593841aa7..ea8d16600e16 100644
--- a/Documentation/driver-api/dpll.rst
+++ b/Documentation/driver-api/dpll.rst
@@ -545,7 +545,7 @@ In such scenario, dpll device input signal shall be also configurable
to drive dpll with signal recovered from the PHY netdevice.
This is done by exposing a pin to the netdevice - attaching pin to the
netdevice itself with
-``netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``.
+``dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``.
Exposed pin id handle ``DPLL_A_PIN_ID`` is then identifiable by the user
as it is attached to rtnetlink respond to get ``RTM_NEWLINK`` command in
nested attribute ``IFLA_DPLL_PIN``.
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index eba851605388..f10decc2c14b 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -9,110 +9,141 @@ of device drivers. This document is an only somewhat organized collection
of some of those interfaces — it will hopefully get better over time! The
available subsections can be seen below.
+
+General information for driver authors
+======================================
+
+This section contains documentation that should, at some point or other, be
+of interest to most developers working on device drivers.
+
.. toctree::
- :caption: Table of contents
- :maxdepth: 2
+ :maxdepth: 1
- driver-model/index
basics
+ driver-model/index
+ device_link
infrastructure
ioctl
- early-userspace/index
pm/index
- clk
+
+Useful support libraries
+========================
+
+This section contains documentation that should, at some point or other, be
+of interest to most developers working on device drivers.
+
+.. toctree::
+ :maxdepth: 1
+
+ early-userspace/index
+ connector
device-io
+ devfreq
dma-buf
- device_link
component
- message-based
- infiniband
- aperture
- frame-buffer
- regulator
- reset
- iio/index
- input
- usb/index
- firewire
- pci/index
+ io-mapping
+ io_ordering
+ uio-howto
+ vfio-mediated-device
+ vfio
+ vfio-pci-device-specific-driver-acceptance
+
+Bus-level documentation
+=======================
+
+.. toctree::
+ :maxdepth: 1
+
+ auxiliary_bus
cxl/index
- spi
- i2c
- ipmb
- ipmi
+ eisa
+ firewire
i3c/index
- interconnect
- devfreq
- hsi
- edac
- scsi
- libata
- target
- mailbox
- mtdnand
- miscellaneous
- mei/index
- mtd/index
- mmc/index
- nvdimm/index
- w1
+ isa
+ men-chameleon-bus
+ pci/index
rapidio/index
- s390-drivers
+ slimbus
+ usb/index
+ virtio/index
vme
+ w1
+ xillybus
+
+
+Subsystem-specific APIs
+=======================
+
+.. toctree::
+ :maxdepth: 1
+
80211/index
- uio-howto
+ acpi/index
+ backlight/lp855x-driver.rst
+ clk
+ console
+ crypto/index
+ dmaengine/index
+ dpll
+ edac
firmware/index
- pin-control
+ fpga/index
+ frame-buffer
+ aperture
+ generic-counter
gpio/index
+ hsi
+ hte/index
+ i2c
+ iio/index
+ infiniband
+ input
+ interconnect
+ ipmb
+ ipmi
+ libata
+ mailbox
md/index
media/index
+ mei/index
+ memory-devices/index
+ message-based
misc_devices
+ miscellaneous
+ mmc/index
+ mtd/index
+ mtdnand
nfc/index
- dmaengine/index
- slimbus
- soundwire/index
- thermal/index
- fpga/index
- acpi/index
- auxiliary_bus
- backlight/lp855x-driver.rst
- connector
- console
- eisa
- isa
- io-mapping
- io_ordering
- generic-counter
- memory-devices/index
- men-chameleon-bus
ntb
+ nvdimm/index
nvmem
parport-lowlevel
+ phy/index
+ pin-control
+ pldmfw/index
pps
ptp
- phy/index
pwm
- pldmfw/index
+ regulator
+ reset
rfkill
+ s390-drivers
+ scsi
serial/index
sm501
+ soundwire/index
+ spi
surface_aggregator/index
switchtec
sync_file
+ target
+ tee
+ thermal/index
tty/index
- vfio-mediated-device
- vfio
- vfio-pci-device-specific-driver-acceptance
- virtio/index
+ wbrf
+ wmi
xilinx/index
- xillybus
zorro
- hte/index
- wmi
- dpll
- wbrf
- crypto/index
- tee
.. only:: subproject and html
diff --git a/Documentation/fault-injection/index.rst b/Documentation/fault-injection/index.rst
index 8408a8a91b34..a6ea1d190222 100644
--- a/Documentation/fault-injection/index.rst
+++ b/Documentation/fault-injection/index.rst
@@ -1,7 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
===============
-fault-injection
+Fault-injection
===============
.. toctree::
diff --git a/Documentation/filesystems/files.rst b/Documentation/filesystems/files.rst
index 9e38e4c221ca..eb770f891b27 100644
--- a/Documentation/filesystems/files.rst
+++ b/Documentation/filesystems/files.rst
@@ -116,7 +116,7 @@ before and after the reference count increment. This pattern can be seen
in get_file_rcu() and __files_get_rcu().
In addition, it isn't possible to access or check fields in struct file
-without first aqcuiring a reference on it under rcu lookup. Not doing
+without first acquiring a reference on it under rcu lookup. Not doing
that was always very dodgy and it was only usable for non-pointer data
in struct file. With SLAB_TYPESAFE_BY_RCU it is necessary that callers
either first acquire a reference or they must hold the files_lock of the
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index e86b886b64d0..04eaab01314b 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -338,11 +338,14 @@ Supported modes
Currently, the following pairs of encryption modes are supported:
-- AES-256-XTS for contents and AES-256-CTS-CBC for filenames
+- AES-256-XTS for contents and AES-256-CBC-CTS for filenames
- AES-256-XTS for contents and AES-256-HCTR2 for filenames
- Adiantum for both contents and filenames
-- AES-128-CBC-ESSIV for contents and AES-128-CTS-CBC for filenames
-- SM4-XTS for contents and SM4-CTS-CBC for filenames
+- AES-128-CBC-ESSIV for contents and AES-128-CBC-CTS for filenames
+- SM4-XTS for contents and SM4-CBC-CTS for filenames
+
+Note: in the API, "CBC" means CBC-ESSIV, and "CTS" means CBC-CTS.
+So, for example, FSCRYPT_MODE_AES_256_CTS means AES-256-CBC-CTS.
Authenticated encryption modes are not currently supported because of
the difficulty of dealing with ciphertext expansion. Therefore,
@@ -351,11 +354,11 @@ contents encryption uses a block cipher in `XTS mode
`CBC-ESSIV mode
<https://en.wikipedia.org/wiki/Disk_encryption_theory#Encrypted_salt-sector_initialization_vector_(ESSIV)>`_,
or a wide-block cipher. Filenames encryption uses a
-block cipher in `CTS-CBC mode
+block cipher in `CBC-CTS mode
<https://en.wikipedia.org/wiki/Ciphertext_stealing>`_ or a wide-block
cipher.
-The (AES-256-XTS, AES-256-CTS-CBC) pair is the recommended default.
+The (AES-256-XTS, AES-256-CBC-CTS) pair is the recommended default.
It is also the only option that is *guaranteed* to always be supported
if the kernel supports fscrypt at all; see `Kernel config options`_.
@@ -364,7 +367,7 @@ upgrades the filenames encryption to use a wide-block cipher. (A
*wide-block cipher*, also called a tweakable super-pseudorandom
permutation, has the property that changing one bit scrambles the
entire result.) As described in `Filenames encryption`_, a wide-block
-cipher is the ideal mode for the problem domain, though CTS-CBC is the
+cipher is the ideal mode for the problem domain, though CBC-CTS is the
"least bad" choice among the alternatives. For more information about
HCTR2, see `the HCTR2 paper <https://eprint.iacr.org/2021/1441.pdf>`_.
@@ -375,13 +378,13 @@ the work is done by XChaCha12, which is much faster than AES when AES
acceleration is unavailable. For more information about Adiantum, see
`the Adiantum paper <https://eprint.iacr.org/2018/720.pdf>`_.
-The (AES-128-CBC-ESSIV, AES-128-CTS-CBC) pair exists only to support
+The (AES-128-CBC-ESSIV, AES-128-CBC-CTS) pair exists only to support
systems whose only form of AES acceleration is an off-CPU crypto
accelerator such as CAAM or CESA that does not support XTS.
The remaining mode pairs are the "national pride ciphers":
-- (SM4-XTS, SM4-CTS-CBC)
+- (SM4-XTS, SM4-CBC-CTS)
Generally speaking, these ciphers aren't "bad" per se, but they
receive limited security review compared to the usual choices such as
@@ -393,7 +396,7 @@ Kernel config options
Enabling fscrypt support (CONFIG_FS_ENCRYPTION) automatically pulls in
only the basic support from the crypto API needed to use AES-256-XTS
-and AES-256-CTS-CBC encryption. For optimal performance, it is
+and AES-256-CBC-CTS encryption. For optimal performance, it is
strongly recommended to also enable any available platform-specific
kconfig options that provide acceleration for the algorithm(s) you
wish to use. Support for any "non-default" encryption modes typically
@@ -407,7 +410,7 @@ kernel crypto API (see `Inline encryption support`_); in that case,
the file contents mode doesn't need to supported in the kernel crypto
API, but the filenames mode still does.
-- AES-256-XTS and AES-256-CTS-CBC
+- AES-256-XTS and AES-256-CBC-CTS
- Recommended:
- arm64: CONFIG_CRYPTO_AES_ARM64_CE_BLK
- x86: CONFIG_CRYPTO_AES_NI_INTEL
@@ -433,7 +436,7 @@ API, but the filenames mode still does.
- x86: CONFIG_CRYPTO_NHPOLY1305_SSE2
- x86: CONFIG_CRYPTO_NHPOLY1305_AVX2
-- AES-128-CBC-ESSIV and AES-128-CTS-CBC:
+- AES-128-CBC-ESSIV and AES-128-CBC-CTS:
- Mandatory:
- CONFIG_CRYPTO_ESSIV
- CONFIG_CRYPTO_SHA256 or another SHA-256 implementation
@@ -521,7 +524,7 @@ alternatively has the file's nonce (for `DIRECT_KEY policies`_) or
inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs.
Thus, IV reuse is limited to within a single directory.
-With CTS-CBC, the IV reuse means that when the plaintext filenames share a
+With CBC-CTS, the IV reuse means that when the plaintext filenames share a
common prefix at least as long as the cipher block size (16 bytes for AES), the
corresponding encrypted filenames will also share a common prefix. This is
undesirable. Adiantum and HCTR2 do not have this weakness, as they are
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index e18bc5ae3b35..0ea1e44fa028 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -98,7 +98,6 @@ Documentation for filesystem implementations.
isofs
nilfs2
nfs/index
- ntfs
ntfs3
ocfs2
ocfs2-online-filecheck
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index d5bf4b6b7509..e664061ed55d 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -29,7 +29,7 @@ prototypes::
char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
struct vfsmount *(*d_automount)(struct path *path);
int (*d_manage)(const struct path *, bool);
- struct dentry *(*d_real)(struct dentry *, const struct inode *);
+ struct dentry *(*d_real)(struct dentry *, enum d_real_type type);
locking rules:
diff --git a/Documentation/filesystems/ntfs.rst b/Documentation/filesystems/ntfs.rst
deleted file mode 100644
index 5bb093a26485..000000000000
--- a/Documentation/filesystems/ntfs.rst
+++ /dev/null
@@ -1,466 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-================================
-The Linux NTFS filesystem driver
-================================
-
-
-.. Table of contents
-
- - Overview
- - Web site
- - Features
- - Supported mount options
- - Known bugs and (mis-)features
- - Using NTFS volume and stripe sets
- - The Device-Mapper driver
- - The Software RAID / MD driver
- - Limitations when using the MD driver
-
-
-Overview
-========
-
-Linux-NTFS comes with a number of user-space programs known as ntfsprogs.
-These include mkntfs, a full-featured ntfs filesystem format utility,
-ntfsundelete used for recovering files that were unintentionally deleted
-from an NTFS volume and ntfsresize which is used to resize an NTFS partition.
-See the web site for more information.
-
-To mount an NTFS 1.2/3.x (Windows NT4/2000/XP/2003) volume, use the file
-system type 'ntfs'. The driver currently supports read-only mode (with no
-fault-tolerance, encryption or journalling) and very limited, but safe, write
-support.
-
-For fault tolerance and raid support (i.e. volume and stripe sets), you can
-use the kernel's Software RAID / MD driver. See section "Using Software RAID
-with NTFS" for details.
-
-
-Web site
-========
-
-There is plenty of additional information on the linux-ntfs web site
-at http://www.linux-ntfs.org/
-
-The web site has a lot of additional information, such as a comprehensive
-FAQ, documentation on the NTFS on-disk format, information on the Linux-NTFS
-userspace utilities, etc.
-
-
-Features
-========
-
-- This is a complete rewrite of the NTFS driver that used to be in the 2.4 and
- earlier kernels. This new driver implements NTFS read support and is
- functionally equivalent to the old ntfs driver and it also implements limited
- write support. The biggest limitation at present is that files/directories
- cannot be created or deleted. See below for the list of write features that
- are so far supported. Another limitation is that writing to compressed files
- is not implemented at all. Also, neither read nor write access to encrypted
- files is so far implemented.
-- The new driver has full support for sparse files on NTFS 3.x volumes which
- the old driver isn't happy with.
-- The new driver supports execution of binaries due to mmap() now being
- supported.
-- The new driver supports loopback mounting of files on NTFS which is used by
- some Linux distributions to enable the user to run Linux from an NTFS
- partition by creating a large file while in Windows and then loopback
- mounting the file while in Linux and creating a Linux filesystem on it that
- is used to install Linux on it.
-- A comparison of the two drivers using::
-
- time find . -type f -exec md5sum "{}" \;
-
- run three times in sequence with each driver (after a reboot) on a 1.4GiB
- NTFS partition, showed the new driver to be 20% faster in total time elapsed
- (from 9:43 minutes on average down to 7:53). The time spent in user space
- was unchanged but the time spent in the kernel was decreased by a factor of
- 2.5 (from 85 CPU seconds down to 33).
-- The driver does not support short file names in general. For backwards
- compatibility, we implement access to files using their short file names if
- they exist. The driver will not create short file names however, and a
- rename will discard any existing short file name.
-- The new driver supports exporting of mounted NTFS volumes via NFS.
-- The new driver supports async io (aio).
-- The new driver supports fsync(2), fdatasync(2), and msync(2).
-- The new driver supports readv(2) and writev(2).
-- The new driver supports access time updates (including mtime and ctime).
-- The new driver supports truncate(2) and open(2) with O_TRUNC. But at present
- only very limited support for highly fragmented files, i.e. ones which have
- their data attribute split across multiple extents, is included. Another
- limitation is that at present truncate(2) will never create sparse files,
- since to mark a file sparse we need to modify the directory entry for the
- file and we do not implement directory modifications yet.
-- The new driver supports write(2) which can both overwrite existing data and
- extend the file size so that you can write beyond the existing data. Also,
- writing into sparse regions is supported and the holes are filled in with
- clusters. But at present only limited support for highly fragmented files,
- i.e. ones which have their data attribute split across multiple extents, is
- included. Another limitation is that write(2) will never create sparse
- files, since to mark a file sparse we need to modify the directory entry for
- the file and we do not implement directory modifications yet.
-
-Supported mount options
-=======================
-
-In addition to the generic mount options described by the manual page for the
-mount command (man 8 mount, also see man 5 fstab), the NTFS driver supports the
-following mount options:
-
-======================= =======================================================
-iocharset=name Deprecated option. Still supported but please use
- nls=name in the future. See description for nls=name.
-
-nls=name Character set to use when returning file names.
- Unlike VFAT, NTFS suppresses names that contain
- unconvertible characters. Note that most character
- sets contain insufficient characters to represent all
- possible Unicode characters that can exist on NTFS.
- To be sure you are not missing any files, you are
- advised to use nls=utf8 which is capable of
- representing all Unicode characters.
-
-utf8=<bool> Option no longer supported. Currently mapped to
- nls=utf8 but please use nls=utf8 in the future and
- make sure utf8 is compiled either as module or into
- the kernel. See description for nls=name.
-
-uid=
-gid=
-umask= Provide default owner, group, and access mode mask.
- These options work as documented in mount(8). By
- default, the files/directories are owned by root and
- he/she has read and write permissions, as well as
- browse permission for directories. No one else has any
- access permissions. I.e. the mode on all files is by
- default rw------- and for directories rwx------, a
- consequence of the default fmask=0177 and dmask=0077.
- Using a umask of zero will grant all permissions to
- everyone, i.e. all files and directories will have mode
- rwxrwxrwx.
-
-fmask=
-dmask= Instead of specifying umask which applies both to
- files and directories, fmask applies only to files and
- dmask only to directories.
-
-sloppy=<BOOL> If sloppy is specified, ignore unknown mount options.
- Otherwise the default behaviour is to abort mount if
- any unknown options are found.
-
-show_sys_files=<BOOL> If show_sys_files is specified, show the system files
- in directory listings. Otherwise the default behaviour
- is to hide the system files.
- Note that even when show_sys_files is specified, "$MFT"
- will not be visible due to bugs/mis-features in glibc.
- Further, note that irrespective of show_sys_files, all
- files are accessible by name, i.e. you can always do
- "ls -l \$UpCase" for example to specifically show the
- system file containing the Unicode upcase table.
-
-case_sensitive=<BOOL> If case_sensitive is specified, treat all file names as
- case sensitive and create file names in the POSIX
- namespace. Otherwise the default behaviour is to treat
- file names as case insensitive and to create file names
- in the WIN32/LONG name space. Note, the Linux NTFS
- driver will never create short file names and will
- remove them on rename/delete of the corresponding long
- file name.
- Note that files remain accessible via their short file
- name, if it exists. If case_sensitive, you will need
- to provide the correct case of the short file name.
-
-disable_sparse=<BOOL> If disable_sparse is specified, creation of sparse
- regions, i.e. holes, inside files is disabled for the
- volume (for the duration of this mount only). By
- default, creation of sparse regions is enabled, which
- is consistent with the behaviour of traditional Unix
- filesystems.
-
-errors=opt What to do when critical filesystem errors are found.
- Following values can be used for "opt":
-
- ======== =========================================
- continue DEFAULT, try to clean-up as much as
- possible, e.g. marking a corrupt inode as
- bad so it is no longer accessed, and then
- continue.
- recover At present only supported is recovery of
- the boot sector from the backup copy.
- If read-only mount, the recovery is done
- in memory only and not written to disk.
- ======== =========================================
-
- Note that the options are additive, i.e. specifying::
-
- errors=continue,errors=recover
-
- means the driver will attempt to recover and if that
- fails it will clean-up as much as possible and
- continue.
-
-mft_zone_multiplier= Set the MFT zone multiplier for the volume (this
- setting is not persistent across mounts and can be
- changed from mount to mount but cannot be changed on
- remount). Values of 1 to 4 are allowed, 1 being the
- default. The MFT zone multiplier determines how much
- space is reserved for the MFT on the volume. If all
- other space is used up, then the MFT zone will be
- shrunk dynamically, so this has no impact on the
- amount of free space. However, it can have an impact
- on performance by affecting fragmentation of the MFT.
- In general use the default. If you have a lot of small
- files then use a higher value. The values have the
- following meaning:
-
- ===== =================================
- Value MFT zone size (% of volume size)
- ===== =================================
- 1 12.5%
- 2 25%
- 3 37.5%
- 4 50%
- ===== =================================
-
- Note this option is irrelevant for read-only mounts.
-======================= =======================================================
-
-
-Known bugs and (mis-)features
-=============================
-
-- The link count on each directory inode entry is set to 1, due to Linux not
- supporting directory hard links. This may well confuse some user space
- applications, since the directory names will have the same inode numbers.
- This also speeds up ntfs_read_inode() immensely. And we haven't found any
- problems with this approach so far. If you find a problem with this, please
- let us know.
-
-
-Please send bug reports/comments/feedback/abuse to the Linux-NTFS development
-list at sourceforge: linux-ntfs-dev@lists.sourceforge.net
-
-
-Using NTFS volume and stripe sets
-=================================
-
-For support of volume and stripe sets, you can either use the kernel's
-Device-Mapper driver or the kernel's Software RAID / MD driver. The former is
-the recommended one to use for linear raid. But the latter is required for
-raid level 5. For striping and mirroring, either driver should work fine.
-
-
-The Device-Mapper driver
-------------------------
-
-You will need to create a table of the components of the volume/stripe set and
-how they fit together and load this into the kernel using the dmsetup utility
-(see man 8 dmsetup).
-
-Linear volume sets, i.e. linear raid, has been tested and works fine. Even
-though untested, there is no reason why stripe sets, i.e. raid level 0, and
-mirrors, i.e. raid level 1 should not work, too. Stripes with parity, i.e.
-raid level 5, unfortunately cannot work yet because the current version of the
-Device-Mapper driver does not support raid level 5. You may be able to use the
-Software RAID / MD driver for raid level 5, see the next section for details.
-
-To create the table describing your volume you will need to know each of its
-components and their sizes in sectors, i.e. multiples of 512-byte blocks.
-
-For NT4 fault tolerant volumes you can obtain the sizes using fdisk. So for
-example if one of your partitions is /dev/hda2 you would do::
-
- $ fdisk -ul /dev/hda
-
- Disk /dev/hda: 81.9 GB, 81964302336 bytes
- 255 heads, 63 sectors/track, 9964 cylinders, total 160086528 sectors
- Units = sectors of 1 * 512 = 512 bytes
-
- Device Boot Start End Blocks Id System
- /dev/hda1 * 63 4209029 2104483+ 83 Linux
- /dev/hda2 4209030 37768814 16779892+ 86 NTFS
- /dev/hda3 37768815 46170809 4200997+ 83 Linux
-
-And you would know that /dev/hda2 has a size of 37768814 - 4209030 + 1 =
-33559785 sectors.
-
-For Win2k and later dynamic disks, you can for example use the ldminfo utility
-which is part of the Linux LDM tools (the latest version at the time of
-writing is linux-ldm-0.0.8.tar.bz2). You can download it from:
-
- http://www.linux-ntfs.org/
-
-Simply extract the downloaded archive (tar xvjf linux-ldm-0.0.8.tar.bz2), go
-into it (cd linux-ldm-0.0.8) and change to the test directory (cd test). You
-will find the precompiled (i386) ldminfo utility there. NOTE: You will not be
-able to compile this yourself easily so use the binary version!
-
-Then you would use ldminfo in dump mode to obtain the necessary information::
-
- $ ./ldminfo --dump /dev/hda
-
-This would dump the LDM database found on /dev/hda which describes all of your
-dynamic disks and all the volumes on them. At the bottom you will see the
-VOLUME DEFINITIONS section which is all you really need. You may need to look
-further above to determine which of the disks in the volume definitions is
-which device in Linux. Hint: Run ldminfo on each of your dynamic disks and
-look at the Disk Id close to the top of the output for each (the PRIVATE HEADER
-section). You can then find these Disk Ids in the VBLK DATABASE section in the
-<Disk> components where you will get the LDM Name for the disk that is found in
-the VOLUME DEFINITIONS section.
-
-Note you will also need to enable the LDM driver in the Linux kernel. If your
-distribution did not enable it, you will need to recompile the kernel with it
-enabled. This will create the LDM partitions on each device at boot time. You
-would then use those devices (for /dev/hda they would be /dev/hda1, 2, 3, etc)
-in the Device-Mapper table.
-
-You can also bypass using the LDM driver by using the main device (e.g.
-/dev/hda) and then using the offsets of the LDM partitions into this device as
-the "Start sector of device" when creating the table. Once again ldminfo would
-give you the correct information to do this.
-
-Assuming you know all your devices and their sizes things are easy.
-
-For a linear raid the table would look like this (note all values are in
-512-byte sectors)::
-
- # Offset into Size of this Raid type Device Start sector
- # volume device of device
- 0 1028161 linear /dev/hda1 0
- 1028161 3903762 linear /dev/hdb2 0
- 4931923 2103211 linear /dev/hdc1 0
-
-For a striped volume, i.e. raid level 0, you will need to know the chunk size
-you used when creating the volume. Windows uses 64kiB as the default, so it
-will probably be this unless you changes the defaults when creating the array.
-
-For a raid level 0 the table would look like this (note all values are in
-512-byte sectors)::
-
- # Offset Size Raid Number Chunk 1st Start 2nd Start
- # into of the type of size Device in Device in
- # volume volume stripes device device
- 0 2056320 striped 2 128 /dev/hda1 0 /dev/hdb1 0
-
-If there are more than two devices, just add each of them to the end of the
-line.
-
-Finally, for a mirrored volume, i.e. raid level 1, the table would look like
-this (note all values are in 512-byte sectors)::
-
- # Ofs Size Raid Log Number Region Should Number Source Start Target Start
- # in of the type type of log size sync? of Device in Device in
- # vol volume params mirrors Device Device
- 0 2056320 mirror core 2 16 nosync 2 /dev/hda1 0 /dev/hdb1 0
-
-If you are mirroring to multiple devices you can specify further targets at the
-end of the line.
-
-Note the "Should sync?" parameter "nosync" means that the two mirrors are
-already in sync which will be the case on a clean shutdown of Windows. If the
-mirrors are not clean, you can specify the "sync" option instead of "nosync"
-and the Device-Mapper driver will then copy the entirety of the "Source Device"
-to the "Target Device" or if you specified multiple target devices to all of
-them.
-
-Once you have your table, save it in a file somewhere (e.g. /etc/ntfsvolume1),
-and hand it over to dmsetup to work with, like so::
-
- $ dmsetup create myvolume1 /etc/ntfsvolume1
-
-You can obviously replace "myvolume1" with whatever name you like.
-
-If it all worked, you will now have the device /dev/device-mapper/myvolume1
-which you can then just use as an argument to the mount command as usual to
-mount the ntfs volume. For example::
-
- $ mount -t ntfs -o ro /dev/device-mapper/myvolume1 /mnt/myvol1
-
-(You need to create the directory /mnt/myvol1 first and of course you can use
-anything you like instead of /mnt/myvol1 as long as it is an existing
-directory.)
-
-It is advisable to do the mount read-only to see if the volume has been setup
-correctly to avoid the possibility of causing damage to the data on the ntfs
-volume.
-
-
-The Software RAID / MD driver
------------------------------
-
-An alternative to using the Device-Mapper driver is to use the kernel's
-Software RAID / MD driver. For which you need to set up your /etc/raidtab
-appropriately (see man 5 raidtab).
-
-Linear volume sets, i.e. linear raid, as well as stripe sets, i.e. raid level
-0, have been tested and work fine (though see section "Limitations when using
-the MD driver with NTFS volumes" especially if you want to use linear raid).
-Even though untested, there is no reason why mirrors, i.e. raid level 1, and
-stripes with parity, i.e. raid level 5, should not work, too.
-
-You have to use the "persistent-superblock 0" option for each raid-disk in the
-NTFS volume/stripe you are configuring in /etc/raidtab as the persistent
-superblock used by the MD driver would damage the NTFS volume.
-
-Windows by default uses a stripe chunk size of 64k, so you probably want the
-"chunk-size 64k" option for each raid-disk, too.
-
-For example, if you have a stripe set consisting of two partitions /dev/hda5
-and /dev/hdb1 your /etc/raidtab would look like this::
-
- raiddev /dev/md0
- raid-level 0
- nr-raid-disks 2
- nr-spare-disks 0
- persistent-superblock 0
- chunk-size 64k
- device /dev/hda5
- raid-disk 0
- device /dev/hdb1
- raid-disk 1
-
-For linear raid, just change the raid-level above to "raid-level linear", for
-mirrors, change it to "raid-level 1", and for stripe sets with parity, change
-it to "raid-level 5".
-
-Note for stripe sets with parity you will also need to tell the MD driver
-which parity algorithm to use by specifying the option "parity-algorithm
-which", where you need to replace "which" with the name of the algorithm to
-use (see man 5 raidtab for available algorithms) and you will have to try the
-different available algorithms until you find one that works. Make sure you
-are working read-only when playing with this as you may damage your data
-otherwise. If you find which algorithm works please let us know (email the
-linux-ntfs developers list linux-ntfs-dev@lists.sourceforge.net or drop in on
-IRC in channel #ntfs on the irc.freenode.net network) so we can update this
-documentation.
-
-Once the raidtab is setup, run for example raid0run -a to start all devices or
-raid0run /dev/md0 to start a particular md device, in this case /dev/md0.
-
-Then just use the mount command as usual to mount the ntfs volume using for
-example::
-
- mount -t ntfs -o ro /dev/md0 /mnt/myntfsvolume
-
-It is advisable to do the mount read-only to see if the md volume has been
-setup correctly to avoid the possibility of causing damage to the data on the
-ntfs volume.
-
-
-Limitations when using the Software RAID / MD driver
------------------------------------------------------
-
-Using the md driver will not work properly if any of your NTFS partitions have
-an odd number of sectors. This is especially important for linear raid as all
-data after the first partition with an odd number of sectors will be offset by
-one or more sectors so if you mount such a partition with write support you
-will cause massive damage to the data on the volume which will only become
-apparent when you try to use the volume again under Windows.
-
-So when using linear raid, make sure that all your partitions have an even
-number of sectors BEFORE attempting to use it. You have been warned!
-
-Even better is to simply use the Device-Mapper for linear raid and then you do
-not have this problem with odd numbers of sectors.
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 1c244866041a..165514401441 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -145,7 +145,9 @@ filesystem, an overlay filesystem needs to record in the upper filesystem
that files have been removed. This is done using whiteouts and opaque
directories (non-directories are always opaque).
-A whiteout is created as a character device with 0/0 device number.
+A whiteout is created as a character device with 0/0 device number or
+as a zero-size regular file with the xattr "trusted.overlay.whiteout".
+
When a whiteout is found in the upper level of a merged directory, any
matching name in the lower level is ignored, and the whiteout itself
is also hidden.
@@ -154,6 +156,13 @@ A directory is made opaque by setting the xattr "trusted.overlay.opaque"
to "y". Where the upper filesystem contains an opaque directory, any
directory in the lower filesystem with the same name is ignored.
+An opaque directory should not conntain any whiteouts, because they do not
+serve any purpose. A merge directory containing regular files with the xattr
+"trusted.overlay.whiteout", should be additionally marked by setting the xattr
+"trusted.overlay.opaque" to "x" on the merge directory itself.
+This is needed to avoid the overhead of checking the "trusted.overlay.whiteout"
+on all entries during readdir in the common case.
+
readdir
-------
@@ -534,8 +543,9 @@ A lower dir with a regular whiteout will always be handled by the overlayfs
mount, so to support storing an effective whiteout file in an overlayfs mount an
alternative form of whiteout is supported. This form is a regular, zero-size
file with the "overlay.whiteout" xattr set, inside a directory with the
-"overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs,
-but can be used by userspace tools (like containers) that generate lower layers.
+"overlay.opaque" xattr set to "x" (see `whiteouts and opaque directories`_).
+These alternative whiteouts are never created by overlayfs, but can be used by
+userspace tools (like containers) that generate lower layers.
These alternative whiteouts can be escaped using the standard xattr escape
mechanism in order to properly nest to any depth.
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 104c6d047d9b..c6a6b9df2104 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -1899,8 +1899,8 @@ For more information on mount propagation see:
These files provide a method to access a task's comm value. It also allows for
a task to set its own or one of its thread siblings comm value. The comm value
is limited in size compared to the cmdline value, so writing anything longer
-then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated
-comm value.
+then the kernel's TASK_COMM_LEN (currently 16 chars, including the NUL
+terminator) will result in a truncated comm value.
3.7 /proc/<pid>/task/<tid>/children - Information about task children
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index eebcc0f9e2bc..6e903a903f8f 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -1264,7 +1264,7 @@ defined:
char *(*d_dname)(struct dentry *, char *, int);
struct vfsmount *(*d_automount)(struct path *);
int (*d_manage)(const struct path *, bool);
- struct dentry *(*d_real)(struct dentry *, const struct inode *);
+ struct dentry *(*d_real)(struct dentry *, enum d_real_type type);
};
``d_revalidate``
@@ -1419,16 +1419,14 @@ defined:
the dentry being transited from.
``d_real``
- overlay/union type filesystems implement this method to return
- one of the underlying dentries hidden by the overlay. It is
- used in two different modes:
+ overlay/union type filesystems implement this method to return one
+ of the underlying dentries of a regular file hidden by the overlay.
- Called from file_dentry() it returns the real dentry matching
- the inode argument. The real dentry may be from a lower layer
- already copied up, but still referenced from the file. This
- mode is selected with a non-NULL inode argument.
+ The 'type' argument takes the values D_REAL_DATA or D_REAL_METADATA
+ for returning the real underlying dentry that refers to the inode
+ hosting the file's data or metadata respectively.
- With NULL inode the topmost real underlying dentry is returned.
+ For non-regular files, the 'dentry' argument is returned.
Each dentry has a pointer to its parent dentry, as well as a hash list
of child dentries. Child dentries are basically like files in a
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 36e61783437c..5298611e00ee 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -22,10 +22,10 @@ community and getting your work upstream.
.. toctree::
:maxdepth: 1
- process/development-process
- process/submitting-patches
+ Development process <process/development-process>
+ Submitting patches <process/submitting-patches>
Code of conduct <process/code-of-conduct>
- maintainer/index
+ Maintainer handbook <maintainer/index>
All development-process docs <process/index>
@@ -38,10 +38,10 @@ kernel.
.. toctree::
:maxdepth: 1
- core-api/index
- driver-api/index
- subsystem-apis
- Locking in the kernel <locking/index>
+ Core API <core-api/index>
+ Driver APIs <driver-api/index>
+ Subsystems <subsystem-apis>
+ Locking <locking/index>
Development tools and processes
===============================
@@ -51,15 +51,15 @@ Various other manuals with useful information for all kernel developers.
.. toctree::
:maxdepth: 1
- process/license-rules
- doc-guide/index
- dev-tools/index
- dev-tools/testing-overview
- kernel-hacking/index
- trace/index
- fault-injection/index
- livepatch/index
- rust/index
+ Licensing rules <process/license-rules>
+ Writing documentation <doc-guide/index>
+ Development tools <dev-tools/index>
+ Testing guide <dev-tools/testing-overview>
+ Hacking guide <kernel-hacking/index>
+ Tracing <trace/index>
+ Fault injection <fault-injection/index>
+ Livepatching <livepatch/index>
+ Rust <rust/index>
User-oriented documentation
@@ -72,11 +72,11 @@ developers seeking information on the kernel's user-space APIs.
.. toctree::
:maxdepth: 1
- admin-guide/index
- The kernel build system <kbuild/index>
- admin-guide/reporting-issues.rst
- User-space tools <tools/index>
- userspace-api/index
+ Administration <admin-guide/index>
+ Build system <kbuild/index>
+ Reporting issues <admin-guide/reporting-issues.rst>
+ Userspace tools <tools/index>
+ Userspace API <userspace-api/index>
See also: the `Linux man pages <https://www.kernel.org/doc/man-pages/>`_,
which are kept separately from the kernel's own documentation.
@@ -89,8 +89,8 @@ platform firmwares.
.. toctree::
:maxdepth: 1
- firmware-guide/index
- devicetree/index
+ Firmware <firmware-guide/index>
+ Firmware and Devicetree <devicetree/index>
Architecture-specific documentation
@@ -99,7 +99,7 @@ Architecture-specific documentation
.. toctree::
:maxdepth: 2
- arch/index
+ CPU architectures <arch/index>
Other documentation
@@ -112,8 +112,7 @@ to ReStructured Text format, or are simply too old.
.. toctree::
:maxdepth: 1
- staging/index
- RAS/ras
+ Unsorted documentation <staging/index>
Translations
@@ -122,7 +121,7 @@ Translations
.. toctree::
:maxdepth: 2
- translations/index
+ Translations <translations/index>
Indices and tables
==================
diff --git a/Documentation/kbuild/Kconfig.recursion-issue-01 b/Documentation/kbuild/Kconfig.recursion-issue-01
index e8877db0461f..ac49836d8ecf 100644
--- a/Documentation/kbuild/Kconfig.recursion-issue-01
+++ b/Documentation/kbuild/Kconfig.recursion-issue-01
@@ -16,13 +16,13 @@
# that are possible for CORE. So for example if CORE_BELL_A_ADVANCED is 'y',
# CORE must be 'y' too.
#
-# * What influences CORE_BELL_A_ADVANCED ?
+# * What influences CORE_BELL_A_ADVANCED?
#
# As the name implies CORE_BELL_A_ADVANCED is an advanced feature of
# CORE_BELL_A so naturally it depends on CORE_BELL_A. So if CORE_BELL_A is 'y'
# we know CORE_BELL_A_ADVANCED can be 'y' too.
#
-# * What influences CORE_BELL_A ?
+# * What influences CORE_BELL_A?
#
# CORE_BELL_A depends on CORE, so CORE influences CORE_BELL_A.
#
@@ -34,7 +34,7 @@
# the "recursive dependency detected" error.
#
# Reading the Documentation/kbuild/Kconfig.recursion-issue-01 file it may be
-# obvious that an easy to solution to this problem should just be the removal
+# obvious that an easy solution to this problem should just be the removal
# of the "select CORE" from CORE_BELL_A_ADVANCED as that is implicit already
# since CORE_BELL_A depends on CORE. Recursive dependency issues are not always
# so trivial to resolve, we provide another example below of practical
diff --git a/Documentation/maintainer/maintainer-entry-profile.rst b/Documentation/maintainer/maintainer-entry-profile.rst
index 18cee1edaecb..b49fb6dc4d0c 100644
--- a/Documentation/maintainer/maintainer-entry-profile.rst
+++ b/Documentation/maintainer/maintainer-entry-profile.rst
@@ -102,7 +102,10 @@ to do something different in the near future.
../doc-guide/maintainer-profile
../nvdimm/maintainer-entry-profile
../arch/riscv/patch-acceptance
+ ../process/maintainer-soc
+ ../process/maintainer-soc-clean-dts
../driver-api/media/maintainer-entry-profile
+ ../process/maintainer-netdev
../driver-api/vfio-pci-device-specific-driver-acceptance
../nvme/feature-and-quirk-policy
../filesystems/xfs/xfs-maintainer-entry-profile
diff --git a/Documentation/mm/slub.rst b/Documentation/mm/slub.rst
index be75971532f5..b517ee28a955 100644
--- a/Documentation/mm/slub.rst
+++ b/Documentation/mm/slub.rst
@@ -9,7 +9,7 @@ SLUB can enable debugging only for selected slabs in order to avoid
an impact on overall system performance which may make a bug more
difficult to find.
-In order to switch debugging on one can add an option ``slub_debug``
+In order to switch debugging on one can add an option ``slab_debug``
to the kernel command line. That will enable full debugging for
all slabs.
@@ -26,16 +26,16 @@ be enabled on the command line. F.e. no tracking information will be
available without debugging on and validation can only partially
be performed if debugging was not switched on.
-Some more sophisticated uses of slub_debug:
+Some more sophisticated uses of slab_debug:
-------------------------------------------
-Parameters may be given to ``slub_debug``. If none is specified then full
+Parameters may be given to ``slab_debug``. If none is specified then full
debugging is enabled. Format:
-slub_debug=<Debug-Options>
+slab_debug=<Debug-Options>
Enable options for all slabs
-slub_debug=<Debug-Options>,<slab name1>,<slab name2>,...
+slab_debug=<Debug-Options>,<slab name1>,<slab name2>,...
Enable options only for select slabs (no spaces
after a comma)
@@ -60,23 +60,23 @@ Possible debug options are::
F.e. in order to boot just with sanity checks and red zoning one would specify::
- slub_debug=FZ
+ slab_debug=FZ
Trying to find an issue in the dentry cache? Try::
- slub_debug=,dentry
+ slab_debug=,dentry
to only enable debugging on the dentry cache. You may use an asterisk at the
end of the slab name, in order to cover all slabs with the same prefix. For
example, here's how you can poison the dentry cache as well as all kmalloc
slabs::
- slub_debug=P,kmalloc-*,dentry
+ slab_debug=P,kmalloc-*,dentry
Red zoning and tracking may realign the slab. We can just apply sanity checks
to the dentry cache with::
- slub_debug=F,dentry
+ slab_debug=F,dentry
Debugging options may require the minimum possible slab order to increase as
a result of storing the metadata (for example, caches with PAGE_SIZE object
@@ -84,20 +84,20 @@ sizes). This has a higher liklihood of resulting in slab allocation errors
in low memory situations or if there's high fragmentation of memory. To
switch off debugging for such caches by default, use::
- slub_debug=O
+ slab_debug=O
You can apply different options to different list of slab names, using blocks
of options. This will enable red zoning for dentry and user tracking for
kmalloc. All other slabs will not get any debugging enabled::
- slub_debug=Z,dentry;U,kmalloc-*
+ slab_debug=Z,dentry;U,kmalloc-*
You can also enable options (e.g. sanity checks and poisoning) for all caches
except some that are deemed too performance critical and don't need to be
debugged by specifying global debug options followed by a list of slab names
with "-" as options::
- slub_debug=FZ;-,zs_handle,zspage
+ slab_debug=FZ;-,zs_handle,zspage
The state of each debug option for a slab can be found in the respective files
under::
@@ -105,7 +105,7 @@ under::
/sys/kernel/slab/<slab name>/
If the file contains 1, the option is enabled, 0 means disabled. The debug
-options from the ``slub_debug`` parameter translate to the following files::
+options from the ``slab_debug`` parameter translate to the following files::
F sanity_checks
Z red_zone
@@ -129,7 +129,7 @@ in order to reduce overhead and increase cache hotness of objects.
Slab validation
===============
-SLUB can validate all object if the kernel was booted with slub_debug. In
+SLUB can validate all object if the kernel was booted with slab_debug. In
order to do so you must have the ``slabinfo`` tool. Then you can do
::
@@ -150,29 +150,29 @@ list_lock once in a while to deal with partial slabs. That overhead is
governed by the order of the allocation for each slab. The allocations
can be influenced by kernel parameters:
-.. slub_min_objects=x (default 4)
-.. slub_min_order=x (default 0)
-.. slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
+.. slab_min_objects=x (default: automatically scaled by number of cpus)
+.. slab_min_order=x (default 0)
+.. slab_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
-``slub_min_objects``
+``slab_min_objects``
allows to specify how many objects must at least fit into one
slab in order for the allocation order to be acceptable. In
general slub will be able to perform this number of
allocations on a slab without consulting centralized resources
(list_lock) where contention may occur.
-``slub_min_order``
+``slab_min_order``
specifies a minimum order of slabs. A similar effect like
- ``slub_min_objects``.
+ ``slab_min_objects``.
-``slub_max_order``
- specified the order at which ``slub_min_objects`` should no
+``slab_max_order``
+ specified the order at which ``slab_min_objects`` should no
longer be checked. This is useful to avoid SLUB trying to
- generate super large order pages to fit ``slub_min_objects``
+ generate super large order pages to fit ``slab_min_objects``
of a slab cache with large object sizes into one high order
page. Setting command line parameter
``debug_guardpage_minorder=N`` (N > 0), forces setting
- ``slub_max_order`` to 0, what cause minimum possible order of
+ ``slab_max_order`` to 0, what cause minimum possible order of
slabs allocation.
SLUB Debug output
@@ -219,7 +219,7 @@ Here is a sample of slub debug output::
FIX kmalloc-8: Restoring Redzone 0xc90f6d28-0xc90f6d2b=0xcc
If SLUB encounters a corrupted object (full detection requires the kernel
-to be booted with slub_debug) then the following output will be dumped
+to be booted with slab_debug) then the following output will be dumped
into the syslog:
1. Description of the problem encountered
@@ -239,7 +239,7 @@ into the syslog:
pid=<pid of the process>
(Object allocation / free information is only available if SLAB_STORE_USER is
- set for the slab. slub_debug sets that option)
+ set for the slab. slab_debug sets that option)
2. The object contents if an object was involved.
@@ -262,7 +262,7 @@ into the syslog:
the object boundary.
(Redzone information is only available if SLAB_RED_ZONE is set.
- slub_debug sets that option)
+ slab_debug sets that option)
Padding <address> : <bytes>
Unused data to fill up the space in order to get the next object
@@ -296,7 +296,7 @@ Emergency operations
Minimal debugging (sanity checks alone) can be enabled by booting with::
- slub_debug=F
+ slab_debug=F
This will be generally be enough to enable the resiliency features of slub
which will keep the system running even if a bad kernel component will
@@ -311,13 +311,13 @@ and enabling debugging only for that cache
I.e.::
- slub_debug=F,dentry
+ slab_debug=F,dentry
If the corruption occurs by writing after the end of the object then it
may be advisable to enable a Redzone to avoid corrupting the beginning
of other objects::
- slub_debug=FZ,dentry
+ slab_debug=FZ,dentry
Extended slabinfo mode and plotting
===================================
diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index c58f7153fcf8..4dfd899a1661 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -11,7 +11,7 @@ $defs:
minimum: 0
len-or-define:
type: [ string, integer ]
- pattern: ^[0-9A-Za-z_]+( - 1)?$
+ pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
# literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
@@ -126,8 +126,9 @@ properties:
Prefix for the C enum name of the attributes. Default family[name]-set[name]-a-
type: string
enum-name:
- description: Name for the enum type of the attribute.
- type: string
+ description: |
+ Name for the enum type of the attribute, if empty no name will be used.
+ type: [ string, "null" ]
doc:
description: Documentation of the space.
type: string
@@ -208,6 +209,11 @@ properties:
exact-len:
description: Exact length for a string or a binary attribute.
$ref: '#/$defs/len-or-define'
+ unterminated-ok:
+ description: |
+ For string attributes, do not check whether attribute
+ contains the terminating null character.
+ type: boolean
sub-type: *attr-type
display-hint: &display-hint
description: |
@@ -261,14 +267,16 @@ properties:
the prefix with the upper case name of the command, with dashes replaced by underscores.
type: string
enum-name:
- description: Name for the enum type with commands.
- type: string
+ description: |
+ Name for the enum type with commands, if empty no name will be used.
+ type: [ string, "null" ]
async-prefix:
description: Same as name-prefix but used to render notifications and events to separate enum.
type: string
async-enum:
- description: Name for the enum type with notifications/events.
- type: string
+ description: |
+ Name for the enum type with commands, if empty no name will be used.
+ type: [ string, "null" ]
list:
description: List of commands
type: array
@@ -370,3 +378,22 @@ properties:
type: string
# End genetlink-c
flags: *cmd_flags
+
+ kernel-family:
+ description: Additional global attributes used for kernel C code generation.
+ type: object
+ additionalProperties: False
+ properties:
+ headers:
+ description: |
+ List of extra headers which should be included in the source
+ of the generated code.
+ type: array
+ items:
+ type: string
+ sock-priv:
+ description: |
+ Literal name of the type which is used within the kernel
+ to store the socket state. The type / structure is internal
+ to the kernel, and is not defined in the spec.
+ type: string
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index 938703088306..b48ad3b1cc32 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -11,7 +11,7 @@ $defs:
minimum: 0
len-or-define:
type: [ string, integer ]
- pattern: ^[0-9A-Za-z_]+( - 1)?$
+ pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
# literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
@@ -168,8 +168,9 @@ properties:
Prefix for the C enum name of the attributes. Default family[name]-set[name]-a-
type: string
enum-name:
- description: Name for the enum type of the attribute.
- type: string
+ description: |
+ Name for the enum type of the attribute, if empty no name will be used.
+ type: [ string, "null" ]
doc:
description: Documentation of the space.
type: string
@@ -251,6 +252,11 @@ properties:
exact-len:
description: Exact length for a string or a binary attribute.
$ref: '#/$defs/len-or-define'
+ unterminated-ok:
+ description: |
+ For string attributes, do not check whether attribute
+ contains the terminating null character.
+ type: boolean
sub-type: *attr-type
display-hint: *display-hint
# Start genetlink-c
@@ -304,14 +310,16 @@ properties:
the prefix with the upper case name of the command, with dashes replaced by underscores.
type: string
enum-name:
- description: Name for the enum type with commands.
- type: string
+ description: |
+ Name for the enum type with commands, if empty no name will be used.
+ type: [ string, "null" ]
async-prefix:
description: Same as name-prefix but used to render notifications and events to separate enum.
type: string
async-enum:
- description: Name for the enum type with notifications/events.
- type: string
+ description: |
+ Name for the enum type with commands, if empty no name will be used.
+ type: [ string, "null" ]
# Start genetlink-legacy
fixed-header: &fixed-header
description: |
@@ -431,3 +439,22 @@ properties:
type: string
# End genetlink-c
flags: *cmd_flags
+
+ kernel-family:
+ description: Additional global attributes used for kernel C code generation.
+ type: object
+ additionalProperties: False
+ properties:
+ headers:
+ description: |
+ List of extra headers which should be included in the source
+ of the generated code.
+ type: array
+ items:
+ type: string
+ sock-priv:
+ description: |
+ Literal name of the type which is used within the kernel
+ to store the socket state. The type / structure is internal
+ to the kernel, and is not defined in the spec.
+ type: string
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index 3283bf458ff1..ebd6ee743fcc 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -11,7 +11,7 @@ $defs:
minimum: 0
len-or-define:
type: [ string, integer ]
- pattern: ^[0-9A-Za-z_]+( - 1)?$
+ pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
# literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
@@ -328,3 +328,22 @@ properties:
The name for the group, used to form the define and the value of the define.
type: string
flags: *cmd_flags
+
+ kernel-family:
+ description: Additional global attributes used for kernel C code generation.
+ type: object
+ additionalProperties: False
+ properties:
+ headers:
+ description: |
+ List of extra headers which should be included in the source
+ of the generated code.
+ type: array
+ items:
+ type: string
+ sock-priv:
+ description: |
+ Literal name of the type which is used within the kernel
+ to store the socket state. The type / structure is internal
+ to the kernel, and is not defined in the spec.
+ type: string
diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml
index 04b92f1a5cd6..a76e54cbadbc 100644
--- a/Documentation/netlink/netlink-raw.yaml
+++ b/Documentation/netlink/netlink-raw.yaml
@@ -11,7 +11,7 @@ $defs:
minimum: 0
len-or-define:
type: [ string, integer ]
- pattern: ^[0-9A-Za-z_]+( - 1)?$
+ pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
# Schema for specs
@@ -152,14 +152,23 @@ properties:
the right formatting mechanism when displaying values of this
type.
enum: [ hex, mac, fddi, ipv4, ipv6, uuid ]
+ struct:
+ description: Name of the nested struct type.
+ type: string
if:
properties:
type:
- oneOf:
- - const: binary
- - const: pad
+ const: pad
then:
required: [ len ]
+ if:
+ properties:
+ type:
+ const: binary
+ then:
+ oneOf:
+ - required: [ len ]
+ - required: [ struct ]
# End genetlink-legacy
attribute-sets:
@@ -180,8 +189,9 @@ properties:
Prefix for the C enum name of the attributes. Default family[name]-set[name]-a-
type: string
enum-name:
- description: Name for the enum type of the attribute.
- type: string
+ description: |
+ Name for the enum type of the attribute, if empty no name will be used.
+ type: [ string, "null" ]
doc:
description: Documentation of the space.
type: string
@@ -261,6 +271,11 @@ properties:
exact-len:
description: Exact length for a string or a binary attribute.
$ref: '#/$defs/len-or-define'
+ unterminated-ok:
+ description: |
+ For string attributes, do not check whether attribute
+ contains the terminating null character.
+ type: boolean
sub-type: *attr-type
display-hint: *display-hint
# Start genetlink-c
@@ -362,14 +377,16 @@ properties:
the prefix with the upper case name of the command, with dashes replaced by underscores.
type: string
enum-name:
- description: Name for the enum type with commands.
- type: string
+ description: |
+ Name for the enum type with commands, if empty no name will be used.
+ type: [ string, "null" ]
async-prefix:
description: Same as name-prefix but used to render notifications and events to separate enum.
type: string
async-enum:
- description: Name for the enum type with notifications/events.
- type: string
+ description: |
+ Name for the enum type with commands, if empty no name will be used.
+ type: [ string, "null" ]
# Start genetlink-legacy
fixed-header: &fixed-header
description: |
diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
index cf6eaa0da821..09fbb4c03fc8 100644
--- a/Documentation/netlink/specs/devlink.yaml
+++ b/Documentation/netlink/specs/devlink.yaml
@@ -290,7 +290,7 @@ attribute-sets:
enum: eswitch-mode
-
name: eswitch-inline-mode
- type: u16
+ type: u8
enum: eswitch-inline-mode
-
name: dpipe-tables
diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml
index b14aed18065f..95b0eb1486bf 100644
--- a/Documentation/netlink/specs/dpll.yaml
+++ b/Documentation/netlink/specs/dpll.yaml
@@ -52,6 +52,40 @@ definitions:
dpll's lock-state shall remain DPLL_LOCK_STATUS_UNLOCKED)
render-max: true
-
+ type: enum
+ name: lock-status-error
+ doc: |
+ if previous status change was done due to a failure, this provides
+ information of dpll device lock status error.
+ Valid values for DPLL_A_LOCK_STATUS_ERROR attribute
+ entries:
+ -
+ name: none
+ doc: |
+ dpll device lock status was changed without any error
+ value: 1
+ -
+ name: undefined
+ doc: |
+ dpll device lock status was changed due to undefined error.
+ Driver fills this value up in case it is not able
+ to obtain suitable exact error type.
+ -
+ name: media-down
+ doc: |
+ dpll device lock status was changed because of associated
+ media got down.
+ This may happen for example if dpll device was previously
+ locked on an input pin of type PIN_TYPE_SYNCE_ETH_PORT.
+ -
+ name: fractional-frequency-offset-too-high
+ doc: |
+ the FFO (Fractional Frequency Offset) between the RX and TX
+ symbol rate on the media got too high.
+ This may happen for example if dpll device was previously
+ locked on an input pin of type PIN_TYPE_SYNCE_ETH_PORT.
+ render-max: true
+ -
type: const
name: temp-divider
value: 1000
@@ -214,6 +248,10 @@ attribute-sets:
name: type
type: u32
enum: type
+ -
+ name: lock-status-error
+ type: u32
+ enum: lock-status-error
-
name: pin
enum-name: dpll_a_pin
@@ -274,6 +312,7 @@ attribute-sets:
-
name: capabilities
type: u32
+ enum: pin-capabilities
-
name: parent-device
type: nest
@@ -379,13 +418,12 @@ operations:
- mode
- mode-supported
- lock-status
+ - lock-status-error
- temp
- clock-id
- type
dump:
- pre: dpll-lock-dumpit
- post: dpll-unlock-dumpit
reply: *dev-attrs
-
@@ -473,8 +511,6 @@ operations:
- fractional-frequency-offset
dump:
- pre: dpll-lock-dumpit
- post: dpll-unlock-dumpit
request:
attributes:
- id
diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml
index 49f90cfb4698..af525ed29792 100644
--- a/Documentation/netlink/specs/mptcp_pm.yaml
+++ b/Documentation/netlink/specs/mptcp_pm.yaml
@@ -292,13 +292,14 @@ operations:
-
name: get-addr
doc: Get endpoint information
- attribute-set: endpoint
+ attribute-set: attr
dont-validate: [ strict ]
flags: [ uns-admin-perm ]
do: &get-addr-attrs
request:
attributes:
- addr
+ - token
reply:
attributes:
- addr
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 3addac970680..76352dbd2be4 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -74,6 +74,10 @@ definitions:
name: queue-type
type: enum
entries: [ rx, tx ]
+ -
+ name: qstats-scope
+ type: flags
+ entries: [ queue ]
attribute-sets:
-
@@ -265,6 +269,73 @@ attribute-sets:
doc: ID of the NAPI instance which services this queue.
type: u32
+ -
+ name: qstats
+ doc: |
+ Get device statistics, scoped to a device or a queue.
+ These statistics extend (and partially duplicate) statistics available
+ in struct rtnl_link_stats64.
+ Value of the `scope` attribute determines how statistics are
+ aggregated. When aggregated for the entire device the statistics
+ represent the total number of events since last explicit reset of
+ the device (i.e. not a reconfiguration like changing queue count).
+ When reported per-queue, however, the statistics may not add
+ up to the total number of events, will only be reported for currently
+ active objects, and will likely report the number of events since last
+ reconfiguration.
+ attributes:
+ -
+ name: ifindex
+ doc: ifindex of the netdevice to which stats belong.
+ type: u32
+ checks:
+ min: 1
+ -
+ name: queue-type
+ doc: Queue type as rx, tx, for queue-id.
+ type: u32
+ enum: queue-type
+ -
+ name: queue-id
+ doc: Queue ID, if stats are scoped to a single queue instance.
+ type: u32
+ -
+ name: scope
+ doc: |
+ What object type should be used to iterate over the stats.
+ type: uint
+ enum: qstats-scope
+ -
+ name: rx-packets
+ doc: |
+ Number of wire packets successfully received and passed to the stack.
+ For drivers supporting XDP, XDP is considered the first layer
+ of the stack, so packets consumed by XDP are still counted here.
+ type: uint
+ value: 8 # reserve some attr ids in case we need more metadata later
+ -
+ name: rx-bytes
+ doc: Successfully received bytes, see `rx-packets`.
+ type: uint
+ -
+ name: tx-packets
+ doc: |
+ Number of wire packets successfully sent. Packet is considered to be
+ successfully sent once it is in device memory (usually this means
+ the device has issued a DMA completion for the packet).
+ type: uint
+ -
+ name: tx-bytes
+ doc: Successfully sent bytes, see `tx-packets`.
+ type: uint
+ -
+ name: rx-alloc-fail
+ doc: |
+ Number of times skb or buffer allocation failed on the Rx datapath.
+ Allocation failure may, or may not result in a packet drop, depending
+ on driver implementation and whether system recovers quickly.
+ type: uint
+
operations:
list:
-
@@ -405,6 +476,26 @@ operations:
attributes:
- ifindex
reply: *napi-get-op
+ -
+ name: qstats-get
+ doc: |
+ Get / dump fine grained statistics. Which statistics are reported
+ depends on the device and the driver, and whether the driver stores
+ software counters per-queue.
+ attribute-set: qstats
+ dump:
+ request:
+ attributes:
+ - scope
+ reply:
+ attributes:
+ - ifindex
+ - queue-type
+ - queue-id
+ - rx-packets
+ - rx-bytes
+ - tx-packets
+ - tx-bytes
mcast-groups:
list:
diff --git a/Documentation/netlink/specs/nlctrl.yaml b/Documentation/netlink/specs/nlctrl.yaml
new file mode 100644
index 000000000000..b1632b95f725
--- /dev/null
+++ b/Documentation/netlink/specs/nlctrl.yaml
@@ -0,0 +1,206 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: nlctrl
+protocol: genetlink-legacy
+uapi-header: linux/genetlink.h
+
+doc: |
+ genetlink meta-family that exposes information about all genetlink
+ families registered in the kernel (including itself).
+
+definitions:
+ -
+ name: op-flags
+ type: flags
+ enum-name:
+ entries:
+ - admin-perm
+ - cmd-cap-do
+ - cmd-cap-dump
+ - cmd-cap-haspol
+ - uns-admin-perm
+ -
+ name: attr-type
+ enum-name: netlink-attribute-type
+ type: enum
+ entries:
+ - invalid
+ - flag
+ - u8
+ - u16
+ - u32
+ - u64
+ - s8
+ - s16
+ - s32
+ - s64
+ - binary
+ - string
+ - nul-string
+ - nested
+ - nested-array
+ - bitfield32
+ - sint
+ - uint
+
+attribute-sets:
+ -
+ name: ctrl-attrs
+ name-prefix: ctrl-attr-
+ attributes:
+ -
+ name: family-id
+ type: u16
+ -
+ name: family-name
+ type: string
+ -
+ name: version
+ type: u32
+ -
+ name: hdrsize
+ type: u32
+ -
+ name: maxattr
+ type: u32
+ -
+ name: ops
+ type: array-nest
+ nested-attributes: op-attrs
+ -
+ name: mcast-groups
+ type: array-nest
+ nested-attributes: mcast-group-attrs
+ -
+ name: policy
+ type: nest-type-value
+ type-value: [ policy-id, attr-id ]
+ nested-attributes: policy-attrs
+ -
+ name: op-policy
+ type: nest-type-value
+ type-value: [ op-id ]
+ nested-attributes: op-policy-attrs
+ -
+ name: op
+ type: u32
+ -
+ name: mcast-group-attrs
+ name-prefix: ctrl-attr-mcast-grp-
+ enum-name:
+ attributes:
+ -
+ name: name
+ type: string
+ -
+ name: id
+ type: u32
+ -
+ name: op-attrs
+ name-prefix: ctrl-attr-op-
+ enum-name:
+ attributes:
+ -
+ name: id
+ type: u32
+ -
+ name: flags
+ type: u32
+ enum: op-flags
+ enum-as-flags: true
+ -
+ name: policy-attrs
+ name-prefix: nl-policy-type-attr-
+ enum-name:
+ attributes:
+ -
+ name: type
+ type: u32
+ enum: attr-type
+ -
+ name: min-value-s
+ type: s64
+ -
+ name: max-value-s
+ type: s64
+ -
+ name: min-value-u
+ type: u64
+ -
+ name: max-value-u
+ type: u64
+ -
+ name: min-length
+ type: u32
+ -
+ name: max-length
+ type: u32
+ -
+ name: policy-idx
+ type: u32
+ -
+ name: policy-maxtype
+ type: u32
+ -
+ name: bitfield32-mask
+ type: u32
+ -
+ name: mask
+ type: u64
+ -
+ name: pad
+ type: pad
+ -
+ name: op-policy-attrs
+ name-prefix: ctrl-attr-policy-
+ enum-name:
+ attributes:
+ -
+ name: do
+ type: u32
+ -
+ name: dump
+ type: u32
+
+operations:
+ enum-model: directional
+ name-prefix: ctrl-cmd-
+ list:
+ -
+ name: getfamily
+ doc: Get / dump genetlink families
+ attribute-set: ctrl-attrs
+ do:
+ request:
+ value: 3
+ attributes:
+ - family-name
+ reply: &all-attrs
+ value: 1
+ attributes:
+ - family-id
+ - family-name
+ - hdrsize
+ - maxattr
+ - mcast-groups
+ - ops
+ - version
+ dump:
+ reply: *all-attrs
+ -
+ name: getpolicy
+ doc: Get / dump genetlink policies
+ attribute-set: ctrl-attrs
+ dump:
+ request:
+ value: 10
+ attributes:
+ - family-name
+ - family-id
+ - op
+ reply:
+ value: 10
+ attributes:
+ - family-id
+ - op-policy
+ - policy
diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml
index 1ad01d52a863..8e4d19adee8c 100644
--- a/Documentation/netlink/specs/rt_link.yaml
+++ b/Documentation/netlink/specs/rt_link.yaml
@@ -942,6 +942,10 @@ attribute-sets:
-
name: gro-ipv4-max-size
type: u32
+ -
+ name: dpll-pin
+ type: nest
+ nested-attributes: link-dpll-pin-attrs
-
name: af-spec-attrs
attributes:
@@ -1627,6 +1631,12 @@ attribute-sets:
-
name: used
type: u8
+ -
+ name: link-dpll-pin-attrs
+ attributes:
+ -
+ name: id
+ type: u32
sub-messages:
-
diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml
index 4346fa402fc9..324fa182cd14 100644
--- a/Documentation/netlink/specs/tc.yaml
+++ b/Documentation/netlink/specs/tc.yaml
@@ -48,21 +48,28 @@ definitions:
-
name: bytes
type: u64
+ doc: Number of enqueued bytes
-
name: packets
type: u32
+ doc: Number of enqueued packets
-
name: drops
type: u32
+ doc: Packets dropped because of lack of resources
-
name: overlimits
type: u32
+ doc: |
+ Number of throttle events when this flow goes out of allocated bandwidth
-
name: bps
type: u32
+ doc: Current flow byte rate
-
name: pps
type: u32
+ doc: Current flow packet rate
-
name: qlen
type: u32
@@ -112,6 +119,7 @@ definitions:
-
name: limit
type: u32
+ doc: Queue length; bytes for bfifo, packets for pfifo
-
name: tc-htb-opt
type: struct
@@ -119,11 +127,11 @@ definitions:
-
name: rate
type: binary
- len: 12
+ struct: tc-ratespec
-
name: ceil
type: binary
- len: 12
+ struct: tc-ratespec
-
name: buffer
type: u32
@@ -149,15 +157,19 @@ definitions:
-
name: rate2quantum
type: u32
+ doc: bps->quantum divisor
-
name: defcls
type: u32
+ doc: Default class number
-
name: debug
type: u32
+ doc: Debug flags
-
name: direct-pkts
type: u32
+ doc: Count of non shaped packets
-
name: tc-gred-qopt
type: struct
@@ -165,15 +177,19 @@ definitions:
-
name: limit
type: u32
+ doc: HARD maximal queue length in bytes
-
name: qth-min
type: u32
+ doc: Min average length threshold in bytes
-
name: qth-max
type: u32
+ doc: Max average length threshold in bytes
-
name: DP
type: u32
+ doc: Up to 2^32 DPs
-
name: backlog
type: u32
@@ -195,15 +211,19 @@ definitions:
-
name: Wlog
type: u8
+ doc: log(W)
-
name: Plog
type: u8
+ doc: log(P_max / (qth-max - qth-min))
-
name: Scell_log
type: u8
+ doc: cell size for idle damping
-
name: prio
type: u8
+ doc: Priority of this VQ
-
name: packets
type: u32
@@ -266,9 +286,11 @@ definitions:
-
name: bands
type: u16
+ doc: Number of bands
-
name: max-bands
type: u16
+ doc: Maximum number of queues
-
name: tc-netem-qopt
type: struct
@@ -276,21 +298,138 @@ definitions:
-
name: latency
type: u32
+ doc: Added delay in microseconds
-
name: limit
type: u32
+ doc: Fifo limit in packets
-
name: loss
type: u32
+ doc: Random packet loss (0=none, ~0=100%)
-
name: gap
type: u32
+ doc: Re-ordering gap (0 for none)
-
name: duplicate
type: u32
+ doc: Random packet duplication (0=none, ~0=100%)
-
name: jitter
type: u32
+ doc: Random jitter latency in microseconds
+ -
+ name: tc-netem-gimodel
+ doc: State transition probabilities for 4 state model
+ type: struct
+ members:
+ -
+ name: p13
+ type: u32
+ -
+ name: p31
+ type: u32
+ -
+ name: p32
+ type: u32
+ -
+ name: p14
+ type: u32
+ -
+ name: p23
+ type: u32
+ -
+ name: tc-netem-gemodel
+ doc: Gilbert-Elliot models
+ type: struct
+ members:
+ -
+ name: p
+ type: u32
+ -
+ name: r
+ type: u32
+ -
+ name: h
+ type: u32
+ -
+ name: k1
+ type: u32
+ -
+ name: tc-netem-corr
+ type: struct
+ members:
+ -
+ name: delay-corr
+ type: u32
+ doc: Delay correlation
+ -
+ name: loss-corr
+ type: u32
+ doc: Packet loss correlation
+ -
+ name: dup-corr
+ type: u32
+ doc: Duplicate correlation
+ -
+ name: tc-netem-reorder
+ type: struct
+ members:
+ -
+ name: probability
+ type: u32
+ -
+ name: correlation
+ type: u32
+ -
+ name: tc-netem-corrupt
+ type: struct
+ members:
+ -
+ name: probability
+ type: u32
+ -
+ name: correlation
+ type: u32
+ -
+ name: tc-netem-rate
+ type: struct
+ members:
+ -
+ name: rate
+ type: u32
+ -
+ name: packet-overhead
+ type: s32
+ -
+ name: cell-size
+ type: u32
+ -
+ name: cell-overhead
+ type: s32
+ -
+ name: tc-netem-slot
+ type: struct
+ members:
+ -
+ name: min-delay
+ type: s64
+ -
+ name: max-delay
+ type: s64
+ -
+ name: max-packets
+ type: s32
+ -
+ name: max-bytes
+ type: s32
+ -
+ name: dist-delay
+ type: s64
+ -
+ name: dist-jitter
+ type: s64
-
name: tc-plug-qopt
type: struct
@@ -307,11 +446,13 @@ definitions:
members:
-
name: bands
- type: u16
+ type: u32
+ doc: Number of bands
-
name: priomap
type: binary
len: 16
+ doc: Map of logical priority -> PRIO band
-
name: tc-red-qopt
type: struct
@@ -319,21 +460,27 @@ definitions:
-
name: limit
type: u32
+ doc: Hard queue length in packets
-
name: qth-min
type: u32
+ doc: Min average threshold in packets
-
name: qth-max
type: u32
+ doc: Max average threshold in packets
-
name: Wlog
type: u8
+ doc: log(W)
-
name: Plog
type: u8
+ doc: log(P_max / (qth-max - qth-min))
-
name: Scell-log
type: u8
+ doc: Cell size for idle damping
-
name: flags
type: u8
@@ -369,71 +516,128 @@ definitions:
name: penalty-burst
type: u32
-
- name: tc-sfq-qopt-v1 # TODO nested structs
+ name: tc-sfq-qopt
type: struct
members:
-
name: quantum
type: u32
+ doc: Bytes per round allocated to flow
-
name: perturb-period
type: s32
+ doc: Period of hash perturbation
-
name: limit
type: u32
+ doc: Maximal packets in queue
-
name: divisor
type: u32
+ doc: Hash divisor
-
name: flows
type: u32
+ doc: Maximal number of flows
+ -
+ name: tc-sfqred-stats
+ type: struct
+ members:
+ -
+ name: prob-drop
+ type: u32
+ doc: Early drops, below max threshold
+ -
+ name: forced-drop
+ type: u32
+ doc: Early drops, after max threshold
+ -
+ name: prob-mark
+ type: u32
+ doc: Marked packets, below max threshold
+ -
+ name: forced-mark
+ type: u32
+ doc: Marked packets, after max threshold
+ -
+ name: prob-mark-head
+ type: u32
+ doc: Marked packets, below max threshold
+ -
+ name: forced-mark-head
+ type: u32
+ doc: Marked packets, after max threshold
+ -
+ name: tc-sfq-qopt-v1
+ type: struct
+ members:
+ -
+ name: v0
+ type: binary
+ struct: tc-sfq-qopt
-
name: depth
type: u32
+ doc: Maximum number of packets per flow
-
name: headdrop
type: u32
-
name: limit
type: u32
+ doc: HARD maximal flow queue length in bytes
-
name: qth-min
type: u32
+ doc: Min average length threshold in bytes
-
- name: qth-mac
+ name: qth-max
type: u32
+ doc: Max average length threshold in bytes
-
name: Wlog
type: u8
+ doc: log(W)
-
name: Plog
type: u8
+ doc: log(P_max / (qth-max - qth-min))
-
name: Scell-log
type: u8
+ doc: Cell size for idle damping
-
name: flags
type: u8
-
name: max-P
type: u32
+ doc: probabilty, high resolution
-
- name: prob-drop
- type: u32
+ name: stats
+ type: binary
+ struct: tc-sfqred-stats
+ -
+ name: tc-ratespec
+ type: struct
+ members:
-
- name: forced-drop
- type: u32
+ name: cell-log
+ type: u8
-
- name: prob-mark
- type: u32
+ name: linklayer
+ type: u8
-
- name: forced-mark
- type: u32
+ name: overhead
+ type: u8
-
- name: prob-mark-head
- type: u32
+ name: cell-align
+ type: u8
-
- name: forced-mark-head
+ name: mpu
+ type: u8
+ -
+ name: rate
type: u32
-
name: tc-tbf-qopt
@@ -441,12 +645,12 @@ definitions:
members:
-
name: rate
- type: binary # TODO nested struct tc_ratespec
- len: 12
+ type: binary
+ struct: tc-ratespec
-
name: peakrate
- type: binary # TODO nested struct tc_ratespec
- len: 12
+ type: binary
+ struct: tc-ratespec
-
name: limit
type: u32
@@ -491,9 +695,663 @@ definitions:
-
name: interval
type: s8
+ doc: Sampling period
-
name: ewma-log
type: u8
+ doc: The log() of measurement window weight
+ -
+ name: tc-choke-xstats
+ type: struct
+ members:
+ -
+ name: early
+ type: u32
+ doc: Early drops
+ -
+ name: pdrop
+ type: u32
+ doc: Drops due to queue limits
+ -
+ name: other
+ type: u32
+ doc: Drops due to drop() calls
+ -
+ name: marked
+ type: u32
+ doc: Marked packets
+ -
+ name: matched
+ type: u32
+ doc: Drops due to flow match
+ -
+ name: tc-codel-xstats
+ type: struct
+ members:
+ -
+ name: maxpacket
+ type: u32
+ doc: Largest packet we've seen so far
+ -
+ name: count
+ type: u32
+ doc: How many drops we've done since the last time we entered dropping state
+ -
+ name: lastcount
+ type: u32
+ doc: Count at entry to dropping state
+ -
+ name: ldelay
+ type: u32
+ doc: in-queue delay seen by most recently dequeued packet
+ -
+ name: drop-next
+ type: s32
+ doc: Time to drop next packet
+ -
+ name: drop-overlimit
+ type: u32
+ doc: Number of times max qdisc packet limit was hit
+ -
+ name: ecn-mark
+ type: u32
+ doc: Number of packets we've ECN marked instead of dropped
+ -
+ name: dropping
+ type: u32
+ doc: Are we in a dropping state?
+ -
+ name: ce-mark
+ type: u32
+ doc: Number of CE marked packets because of ce-threshold
+ -
+ name: tc-fq-codel-xstats
+ type: struct
+ members:
+ -
+ name: type
+ type: u32
+ -
+ name: maxpacket
+ type: u32
+ doc: Largest packet we've seen so far
+ -
+ name: drop-overlimit
+ type: u32
+ doc: Number of times max qdisc packet limit was hit
+ -
+ name: ecn-mark
+ type: u32
+ doc: Number of packets we ECN marked instead of being dropped
+ -
+ name: new-flow-count
+ type: u32
+ doc: Number of times packets created a new flow
+ -
+ name: new-flows-len
+ type: u32
+ doc: Count of flows in new list
+ -
+ name: old-flows-len
+ type: u32
+ doc: Count of flows in old list
+ -
+ name: ce-mark
+ type: u32
+ doc: Packets above ce-threshold
+ -
+ name: memory-usage
+ type: u32
+ doc: Memory usage in bytes
+ -
+ name: drop-overmemory
+ type: u32
+ -
+ name: tc-fq-pie-xstats
+ type: struct
+ members:
+ -
+ name: packets-in
+ type: u32
+ doc: Total number of packets enqueued
+ -
+ name: dropped
+ type: u32
+ doc: Packets dropped due to fq_pie_action
+ -
+ name: overlimit
+ type: u32
+ doc: Dropped due to lack of space in queue
+ -
+ name: overmemory
+ type: u32
+ doc: Dropped due to lack of memory in queue
+ -
+ name: ecn-mark
+ type: u32
+ doc: Packets marked with ecn
+ -
+ name: new-flow-count
+ type: u32
+ doc: Count of new flows created by packets
+ -
+ name: new-flows-len
+ type: u32
+ doc: Count of flows in new list
+ -
+ name: old-flows-len
+ type: u32
+ doc: Count of flows in old list
+ -
+ name: memory-usage
+ type: u32
+ doc: Total memory across all queues
+ -
+ name: tc-fq-qd-stats
+ type: struct
+ members:
+ -
+ name: gc-flows
+ type: u64
+ -
+ name: highprio-packets
+ type: u64
+ doc: obsolete
+ -
+ name: tcp-retrans
+ type: u64
+ doc: obsolete
+ -
+ name: throttled
+ type: u64
+ -
+ name: flows-plimit
+ type: u64
+ -
+ name: pkts-too-long
+ type: u64
+ -
+ name: allocation-errors
+ type: u64
+ -
+ name: time-next-delayed-flow
+ type: s64
+ -
+ name: flows
+ type: u32
+ -
+ name: inactive-flows
+ type: u32
+ -
+ name: throttled-flows
+ type: u32
+ -
+ name: unthrottle-latency-ns
+ type: u32
+ -
+ name: ce-mark
+ type: u64
+ doc: Packets above ce-threshold
+ -
+ name: horizon-drops
+ type: u64
+ -
+ name: horizon-caps
+ type: u64
+ -
+ name: fastpath-packets
+ type: u64
+ -
+ name: band-drops
+ type: binary
+ len: 24
+ -
+ name: band-pkt-count
+ type: binary
+ len: 12
+ -
+ name: pad
+ type: pad
+ len: 4
+ -
+ name: tc-hhf-xstats
+ type: struct
+ members:
+ -
+ name: drop-overlimit
+ type: u32
+ doc: Number of times max qdisc packet limit was hit
+ -
+ name: hh-overlimit
+ type: u32
+ doc: Number of times max heavy-hitters was hit
+ -
+ name: hh-tot-count
+ type: u32
+ doc: Number of captured heavy-hitters so far
+ -
+ name: hh-cur-count
+ type: u32
+ doc: Number of current heavy-hitters
+ -
+ name: tc-pie-xstats
+ type: struct
+ members:
+ -
+ name: prob
+ type: u64
+ doc: Current probability
+ -
+ name: delay
+ type: u32
+ doc: Current delay in ms
+ -
+ name: avg-dq-rate
+ type: u32
+ doc: Current average dq rate in bits/pie-time
+ -
+ name: dq-rate-estimating
+ type: u32
+ doc: Is avg-dq-rate being calculated?
+ -
+ name: packets-in
+ type: u32
+ doc: Total number of packets enqueued
+ -
+ name: dropped
+ type: u32
+ doc: Packets dropped due to pie action
+ -
+ name: overlimit
+ type: u32
+ doc: Dropped due to lack of space in queue
+ -
+ name: maxq
+ type: u32
+ doc: Maximum queue size
+ -
+ name: ecn-mark
+ type: u32
+ doc: Packets marked with ecn
+ -
+ name: tc-red-xstats
+ type: struct
+ members:
+ -
+ name: early
+ type: u32
+ doc: Early drops
+ -
+ name: pdrop
+ type: u32
+ doc: Drops due to queue limits
+ -
+ name: other
+ type: u32
+ doc: Drops due to drop() calls
+ -
+ name: marked
+ type: u32
+ doc: Marked packets
+ -
+ name: tc-sfb-xstats
+ type: struct
+ members:
+ -
+ name: earlydrop
+ type: u32
+ -
+ name: penaltydrop
+ type: u32
+ -
+ name: bucketdrop
+ type: u32
+ -
+ name: queuedrop
+ type: u32
+ -
+ name: childdrop
+ type: u32
+ doc: drops in child qdisc
+ -
+ name: marked
+ type: u32
+ -
+ name: maxqlen
+ type: u32
+ -
+ name: maxprob
+ type: u32
+ -
+ name: avgprob
+ type: u32
+ -
+ name: tc-sfq-xstats
+ type: struct
+ members:
+ -
+ name: allot
+ type: s32
+ -
+ name: gnet-stats-basic
+ type: struct
+ members:
+ -
+ name: bytes
+ type: u64
+ -
+ name: packets
+ type: u32
+ -
+ name: gnet-stats-rate-est
+ type: struct
+ members:
+ -
+ name: bps
+ type: u32
+ -
+ name: pps
+ type: u32
+ -
+ name: gnet-stats-rate-est64
+ type: struct
+ members:
+ -
+ name: bps
+ type: u64
+ -
+ name: pps
+ type: u64
+ -
+ name: gnet-stats-queue
+ type: struct
+ members:
+ -
+ name: qlen
+ type: u32
+ -
+ name: backlog
+ type: u32
+ -
+ name: drops
+ type: u32
+ -
+ name: requeues
+ type: u32
+ -
+ name: overlimits
+ type: u32
+ -
+ name: tc-u32-key
+ type: struct
+ members:
+ -
+ name: mask
+ type: u32
+ byte-order: big-endian
+ -
+ name: val
+ type: u32
+ byte-order: big-endian
+ -
+ name: "off"
+ type: s32
+ -
+ name: offmask
+ type: s32
+ -
+ name: tc-u32-sel
+ type: struct
+ members:
+ -
+ name: flags
+ type: u8
+ -
+ name: offshift
+ type: u8
+ -
+ name: nkeys
+ type: u8
+ -
+ name: offmask
+ type: u16
+ byte-order: big-endian
+ -
+ name: "off"
+ type: u16
+ -
+ name: offoff
+ type: s16
+ -
+ name: hoff
+ type: s16
+ -
+ name: hmask
+ type: u32
+ byte-order: big-endian
+ -
+ name: keys
+ type: binary
+ struct: tc-u32-key # TODO: array
+ -
+ name: tc-u32-pcnt
+ type: struct
+ members:
+ -
+ name: rcnt
+ type: u64
+ -
+ name: rhit
+ type: u64
+ -
+ name: kcnts
+ type: u64 # TODO: array
+ -
+ name: tcf-t
+ type: struct
+ members:
+ -
+ name: install
+ type: u64
+ -
+ name: lastuse
+ type: u64
+ -
+ name: expires
+ type: u64
+ -
+ name: firstuse
+ type: u64
+ -
+ name: tc-gen
+ type: struct
+ members:
+ -
+ name: index
+ type: u32
+ -
+ name: capab
+ type: u32
+ -
+ name: action
+ type: s32
+ -
+ name: refcnt
+ type: s32
+ -
+ name: bindcnt
+ type: s32
+ -
+ name: tc-gact-p
+ type: struct
+ members:
+ -
+ name: ptype
+ type: u16
+ -
+ name: pval
+ type: u16
+ -
+ name: paction
+ type: s32
+ -
+ name: tcf-ematch-tree-hdr
+ type: struct
+ members:
+ -
+ name: nmatches
+ type: u16
+ -
+ name: progid
+ type: u16
+ -
+ name: tc-basic-pcnt
+ type: struct
+ members:
+ -
+ name: rcnt
+ type: u64
+ -
+ name: rhit
+ type: u64
+ -
+ name: tc-matchall-pcnt
+ type: struct
+ members:
+ -
+ name: rhit
+ type: u64
+ -
+ name: tc-mpls
+ type: struct
+ members:
+ -
+ name: index
+ type: u32
+ -
+ name: capab
+ type: u32
+ -
+ name: action
+ type: s32
+ -
+ name: refcnt
+ type: s32
+ -
+ name: bindcnt
+ type: s32
+ -
+ name: m-action
+ type: s32
+ -
+ name: tc-police
+ type: struct
+ members:
+ -
+ name: index
+ type: u32
+ -
+ name: action
+ type: s32
+ -
+ name: limit
+ type: u32
+ -
+ name: burst
+ type: u32
+ -
+ name: mtu
+ type: u32
+ -
+ name: rate
+ type: binary
+ struct: tc-ratespec
+ -
+ name: peakrate
+ type: binary
+ struct: tc-ratespec
+ -
+ name: refcnt
+ type: s32
+ -
+ name: bindcnt
+ type: s32
+ -
+ name: capab
+ type: u32
+ -
+ name: tc-pedit-sel
+ type: struct
+ members:
+ -
+ name: index
+ type: u32
+ -
+ name: capab
+ type: u32
+ -
+ name: action
+ type: s32
+ -
+ name: refcnt
+ type: s32
+ -
+ name: bindcnt
+ type: s32
+ -
+ name: nkeys
+ type: u8
+ -
+ name: flags
+ type: u8
+ -
+ name: keys
+ type: binary
+ struct: tc-pedit-key # TODO: array
+ -
+ name: tc-pedit-key
+ type: struct
+ members:
+ -
+ name: mask
+ type: u32
+ -
+ name: val
+ type: u32
+ -
+ name: "off"
+ type: u32
+ -
+ name: at
+ type: u32
+ -
+ name: offmask
+ type: u32
+ -
+ name: shift
+ type: u32
+ -
+ name: tc-vlan
+ type: struct
+ members:
+ -
+ name: index
+ type: u32
+ -
+ name: capab
+ type: u32
+ -
+ name: action
+ type: s32
+ -
+ name: refcnt
+ type: s32
+ -
+ name: bindcnt
+ type: s32
+ -
+ name: v-action
+ type: s32
attribute-sets:
-
name: tc-attrs
@@ -512,7 +1370,9 @@ attribute-sets:
struct: tc-stats
-
name: xstats
- type: binary
+ type: sub-message
+ sub-message: tca-stats-app-msg
+ selector: kind
-
name: rate
type: binary
@@ -553,6 +1413,582 @@ attribute-sets:
name: ext-warn-msg
type: string
-
+ name: tc-act-attrs
+ attributes:
+ -
+ name: kind
+ type: string
+ -
+ name: options
+ type: sub-message
+ sub-message: tc-act-options-msg
+ selector: kind
+ -
+ name: index
+ type: u32
+ -
+ name: stats
+ type: nest
+ nested-attributes: tc-act-stats-attrs
+ -
+ name: pad
+ type: pad
+ -
+ name: cookie
+ type: binary
+ -
+ name: flags
+ type: bitfield32
+ -
+ name: hw-stats
+ type: bitfield32
+ -
+ name: used-hw-stats
+ type: bitfield32
+ -
+ name: in-hw-count
+ type: u32
+ -
+ name: tc-act-stats-attrs
+ attributes:
+ -
+ name: basic
+ type: binary
+ struct: gnet-stats-basic
+ -
+ name: rate-est
+ type: binary
+ struct: gnet-stats-rate-est
+ -
+ name: queue
+ type: binary
+ struct: gnet-stats-queue
+ -
+ name: app
+ type: binary
+ -
+ name: rate-est64
+ type: binary
+ struct: gnet-stats-rate-est64
+ -
+ name: pad
+ type: pad
+ -
+ name: basic-hw
+ type: binary
+ struct: gnet-stats-basic
+ -
+ name: pkt64
+ type: u64
+ -
+ name: tc-act-bpf-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ -
+ name: ops-len
+ type: u16
+ -
+ name: ops
+ type: binary
+ -
+ name: fd
+ type: u32
+ -
+ name: name
+ type: string
+ -
+ name: pad
+ type: pad
+ -
+ name: tag
+ type: binary
+ -
+ name: id
+ type: binary
+ -
+ name: tc-act-connmark-attrs
+ attributes:
+ -
+ name: parms
+ type: binary
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: pad
+ type: pad
+ -
+ name: tc-act-csum-attrs
+ attributes:
+ -
+ name: parms
+ type: binary
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: pad
+ type: pad
+ -
+ name: tc-act-ct-attrs
+ attributes:
+ -
+ name: parms
+ type: binary
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: action
+ type: u16
+ -
+ name: zone
+ type: u16
+ -
+ name: mark
+ type: u32
+ -
+ name: mark-mask
+ type: u32
+ -
+ name: labels
+ type: binary
+ -
+ name: labels-mask
+ type: binary
+ -
+ name: nat-ipv4-min
+ type: u32
+ byte-order: big-endian
+ -
+ name: nat-ipv4-max
+ type: u32
+ byte-order: big-endian
+ -
+ name: nat-ipv6-min
+ type: binary
+ -
+ name: nat-ipv6-max
+ type: binary
+ -
+ name: nat-port-min
+ type: u16
+ byte-order: big-endian
+ -
+ name: nat-port-max
+ type: u16
+ byte-order: big-endian
+ -
+ name: pad
+ type: pad
+ -
+ name: helper-name
+ type: string
+ -
+ name: helper-family
+ type: u8
+ -
+ name: helper-proto
+ type: u8
+ -
+ name: tc-act-ctinfo-attrs
+ attributes:
+ -
+ name: pad
+ type: pad
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: act
+ type: binary
+ -
+ name: zone
+ type: u16
+ -
+ name: parms-dscp-mask
+ type: u32
+ -
+ name: parms-dscp-statemask
+ type: u32
+ -
+ name: parms-cpmark-mask
+ type: u32
+ -
+ name: stats-dscp-set
+ type: u64
+ -
+ name: stats-dscp-error
+ type: u64
+ -
+ name: stats-cpmark-set
+ type: u64
+ -
+ name: tc-act-gate-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ -
+ name: pad
+ type: pad
+ -
+ name: priority
+ type: s32
+ -
+ name: entry-list
+ type: binary
+ -
+ name: base-time
+ type: u64
+ -
+ name: cycle-time
+ type: u64
+ -
+ name: cycle-time-ext
+ type: u64
+ -
+ name: flags
+ type: u32
+ -
+ name: clockid
+ type: s32
+ -
+ name: tc-act-ife-attrs
+ attributes:
+ -
+ name: parms
+ type: binary
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: dmac
+ type: binary
+ -
+ name: smac
+ type: binary
+ -
+ name: type
+ type: u16
+ -
+ name: metalst
+ type: binary
+ -
+ name: pad
+ type: pad
+ -
+ name: tc-act-mirred-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ -
+ name: pad
+ type: pad
+ -
+ name: blockid
+ type: binary
+ -
+ name: tc-act-mpls-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ struct: tc-mpls
+ -
+ name: pad
+ type: pad
+ -
+ name: proto
+ type: u16
+ byte-order: big-endian
+ -
+ name: label
+ type: u32
+ -
+ name: tc
+ type: u8
+ -
+ name: ttl
+ type: u8
+ -
+ name: bos
+ type: u8
+ -
+ name: tc-act-nat-attrs
+ attributes:
+ -
+ name: parms
+ type: binary
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: pad
+ type: pad
+ -
+ name: tc-act-pedit-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ struct: tc-pedit-sel
+ -
+ name: pad
+ type: pad
+ -
+ name: parms-ex
+ type: binary
+ -
+ name: keys-ex
+ type: binary
+ -
+ name: key-ex
+ type: binary
+ -
+ name: tc-act-simple-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ -
+ name: data
+ type: binary
+ -
+ name: pad
+ type: pad
+ -
+ name: tc-act-skbedit-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ -
+ name: priority
+ type: u32
+ -
+ name: queue-mapping
+ type: u16
+ -
+ name: mark
+ type: u32
+ -
+ name: pad
+ type: pad
+ -
+ name: ptype
+ type: u16
+ -
+ name: mask
+ type: u32
+ -
+ name: flags
+ type: u64
+ -
+ name: queue-mapping-max
+ type: u16
+ -
+ name: tc-act-skbmod-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ -
+ name: dmac
+ type: binary
+ -
+ name: smac
+ type: binary
+ -
+ name: etype
+ type: binary
+ -
+ name: pad
+ type: pad
+ -
+ name: tc-act-tunnel-key-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ -
+ name: enc-ipv4-src
+ type: u32
+ byte-order: big-endian
+ -
+ name: enc-ipv4-dst
+ type: u32
+ byte-order: big-endian
+ -
+ name: enc-ipv6-src
+ type: binary
+ -
+ name: enc-ipv6-dst
+ type: binary
+ -
+ name: enc-key-id
+ type: u64
+ byte-order: big-endian
+ -
+ name: pad
+ type: pad
+ -
+ name: enc-dst-port
+ type: u16
+ byte-order: big-endian
+ -
+ name: no-csum
+ type: u8
+ -
+ name: enc-opts
+ type: binary
+ -
+ name: enc-tos
+ type: u8
+ -
+ name: enc-ttl
+ type: u8
+ -
+ name: no-frag
+ type: flag
+ -
+ name: tc-act-vlan-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ struct: tc-vlan
+ -
+ name: push-vlan-id
+ type: u16
+ -
+ name: push-vlan-protocol
+ type: u16
+ -
+ name: pad
+ type: pad
+ -
+ name: push-vlan-priority
+ type: u8
+ -
+ name: push-eth-dst
+ type: binary
+ -
+ name: push-eth-src
+ type: binary
+ -
+ name: tc-basic-attrs
+ attributes:
+ -
+ name: classid
+ type: u32
+ -
+ name: ematches
+ type: nest
+ nested-attributes: tc-ematch-attrs
+ -
+ name: act
+ type: array-nest
+ nested-attributes: tc-act-attrs
+ -
+ name: police
+ type: nest
+ nested-attributes: tc-police-attrs
+ -
+ name: pcnt
+ type: binary
+ struct: tc-basic-pcnt
+ -
+ name: pad
+ type: pad
+ -
+ name: tc-bpf-attrs
+ attributes:
+ -
+ name: act
+ type: nest
+ nested-attributes: tc-act-attrs
+ -
+ name: police
+ type: nest
+ nested-attributes: tc-police-attrs
+ -
+ name: classid
+ type: u32
+ -
+ name: ops-len
+ type: u16
+ -
+ name: ops
+ type: binary
+ -
+ name: fd
+ type: u32
+ -
+ name: name
+ type: string
+ -
+ name: flags
+ type: u32
+ -
+ name: flags-gen
+ type: u32
+ -
+ name: tag
+ type: binary
+ -
+ name: id
+ type: u32
+ -
name: tc-cake-attrs
attributes:
-
@@ -641,7 +2077,8 @@ attribute-sets:
type: u32
-
name: tin-stats
- type: binary
+ type: array-nest
+ nested-attributes: tc-cake-tin-stats-attrs
-
name: deficit
type: s32
@@ -661,6 +2098,84 @@ attribute-sets:
name: blue-timer-us
type: s32
-
+ name: tc-cake-tin-stats-attrs
+ attributes:
+ -
+ name: pad
+ type: pad
+ -
+ name: sent-packets
+ type: u32
+ -
+ name: sent-bytes64
+ type: u64
+ -
+ name: dropped-packets
+ type: u32
+ -
+ name: dropped-bytes64
+ type: u64
+ -
+ name: acks-dropped-packets
+ type: u32
+ -
+ name: acks-dropped-bytes64
+ type: u64
+ -
+ name: ecn-marked-packets
+ type: u32
+ -
+ name: ecn-marked-bytes64
+ type: u64
+ -
+ name: backlog-packets
+ type: u32
+ -
+ name: backlog-bytes
+ type: u32
+ -
+ name: threshold-rate64
+ type: u64
+ -
+ name: target-us
+ type: u32
+ -
+ name: interval-us
+ type: u32
+ -
+ name: way-indirect-hits
+ type: u32
+ -
+ name: way-misses
+ type: u32
+ -
+ name: way-collisions
+ type: u32
+ -
+ name: peak-delay-us
+ type: u32
+ -
+ name: avg-delay-us
+ type: u32
+ -
+ name: base-delay-us
+ type: u32
+ -
+ name: sparse-flows
+ type: u32
+ -
+ name: bulk-flows
+ type: u32
+ -
+ name: unresponsive-flows
+ type: u32
+ -
+ name: max-skblen
+ type: u32
+ -
+ name: flow-quantum
+ type: u32
+ -
name: tc-cbs-attrs
attributes:
-
@@ -668,6 +2183,20 @@ attribute-sets:
type: binary
struct: tc-cbs-qopt
-
+ name: tc-cgroup-attrs
+ attributes:
+ -
+ name: act
+ type: nest
+ nested-attributes: tc-act-attrs
+ -
+ name: police
+ type: nest
+ nested-attributes: tc-police-attrs
+ -
+ name: ematches
+ type: binary
+ -
name: tc-choke-attrs
attributes:
-
@@ -677,6 +2206,9 @@ attribute-sets:
-
name: stab
type: binary
+ checks:
+ min-len: 256
+ max-len: 256
-
name: max-p
type: u32
@@ -705,6 +2237,56 @@ attribute-sets:
name: quantum
type: u32
-
+ name: tc-ematch-attrs
+ attributes:
+ -
+ name: tree-hdr
+ type: binary
+ struct: tcf-ematch-tree-hdr
+ -
+ name: tree-list
+ type: binary
+ -
+ name: tc-flow-attrs
+ attributes:
+ -
+ name: keys
+ type: u32
+ -
+ name: mode
+ type: u32
+ -
+ name: baseclass
+ type: u32
+ -
+ name: rshift
+ type: u32
+ -
+ name: addend
+ type: u32
+ -
+ name: mask
+ type: u32
+ -
+ name: xor
+ type: u32
+ -
+ name: divisor
+ type: u32
+ -
+ name: act
+ type: binary
+ -
+ name: police
+ type: nest
+ nested-attributes: tc-police-attrs
+ -
+ name: ematches
+ type: binary
+ -
+ name: perturb
+ type: u32
+ -
name: tc-flower-attrs
attributes:
-
@@ -953,15 +2535,19 @@ attribute-sets:
-
name: key-arp-sha
type: binary
+ display-hint: mac
-
name: key-arp-sha-mask
type: binary
+ display-hint: mac
-
name: key-arp-tha
type: binary
+ display-hint: mac
-
name: key-arp-tha-mask
type: binary
+ display-hint: mac
-
name: key-mpls-ttl
type: u8
@@ -1020,10 +2606,12 @@ attribute-sets:
type: u8
-
name: key-enc-opts
- type: binary
+ type: nest
+ nested-attributes: tc-flower-key-enc-opts-attrs
-
name: key-enc-opts-mask
- type: binary
+ type: nest
+ nested-attributes: tc-flower-key-enc-opts-attrs
-
name: in-hw-count
type: u32
@@ -1069,7 +2657,8 @@ attribute-sets:
type: binary
-
name: key-mpls-opts
- type: binary
+ type: nest
+ nested-attributes: tc-flower-key-mpls-opt-attrs
-
name: key-hash
type: u32
@@ -1091,6 +2680,129 @@ attribute-sets:
name: key-l2-tpv3-sid
type: u32
byte-order: big-endian
+ -
+ name: l2-miss
+ type: u8
+ -
+ name: key-cfm
+ type: nest
+ nested-attributes: tc-flower-key-cfm-attrs
+ -
+ name: key-spi
+ type: u32
+ byte-order: big-endian
+ -
+ name: key-spi-mask
+ type: u32
+ byte-order: big-endian
+ -
+ name: tc-flower-key-enc-opts-attrs
+ attributes:
+ -
+ name: geneve
+ type: nest
+ nested-attributes: tc-flower-key-enc-opt-geneve-attrs
+ -
+ name: vxlan
+ type: nest
+ nested-attributes: tc-flower-key-enc-opt-vxlan-attrs
+ -
+ name: erspan
+ type: nest
+ nested-attributes: tc-flower-key-enc-opt-erspan-attrs
+ -
+ name: gtp
+ type: nest
+ nested-attributes: tc-flower-key-enc-opt-gtp-attrs
+ -
+ name: tc-flower-key-enc-opt-geneve-attrs
+ attributes:
+ -
+ name: class
+ type: u16
+ -
+ name: type
+ type: u8
+ -
+ name: data
+ type: binary
+ -
+ name: tc-flower-key-enc-opt-vxlan-attrs
+ attributes:
+ -
+ name: gbp
+ type: u32
+ -
+ name: tc-flower-key-enc-opt-erspan-attrs
+ attributes:
+ -
+ name: ver
+ type: u8
+ -
+ name: index
+ type: u32
+ -
+ name: dir
+ type: u8
+ -
+ name: hwid
+ type: u8
+ -
+ name: tc-flower-key-enc-opt-gtp-attrs
+ attributes:
+ -
+ name: pdu-type
+ type: u8
+ -
+ name: qfi
+ type: u8
+ -
+ name: tc-flower-key-mpls-opt-attrs
+ attributes:
+ -
+ name: lse-depth
+ type: u8
+ -
+ name: lse-ttl
+ type: u8
+ -
+ name: lse-bos
+ type: u8
+ -
+ name: lse-tc
+ type: u8
+ -
+ name: lse-label
+ type: u32
+ -
+ name: tc-flower-key-cfm-attrs
+ attributes:
+ -
+ name: md-level
+ type: u8
+ -
+ name: opcode
+ type: u8
+ -
+ name: tc-fw-attrs
+ attributes:
+ -
+ name: classid
+ type: u32
+ -
+ name: police
+ type: nest
+ nested-attributes: tc-police-attrs
+ -
+ name: indev
+ type: string
+ -
+ name: act
+ type: array-nest
+ nested-attributes: tc-act-attrs
+ -
+ name: mask
+ type: u32
-
name: tc-gred-attrs
attributes:
@@ -1135,7 +2847,7 @@ attribute-sets:
type: u32
-
name: stat-bytes
- type: u32
+ type: u64
-
name: stat-packets
type: u32
@@ -1232,40 +2944,25 @@ attribute-sets:
name: offload
type: flag
-
- name: tc-act-attrs
+ name: tc-matchall-attrs
attributes:
-
- name: kind
- type: string
+ name: classid
+ type: u32
-
- name: options
- type: sub-message
- sub-message: tc-act-options-msg
- selector: kind
+ name: act
+ type: array-nest
+ nested-attributes: tc-act-attrs
-
- name: index
+ name: flags
type: u32
-
- name: stats
+ name: pcnt
type: binary
+ struct: tc-matchall-pcnt
-
name: pad
type: pad
- -
- name: cookie
- type: binary
- -
- name: flags
- type: bitfield32
- -
- name: hw-stats
- type: bitfield32
- -
- name: used-hw-stats
- type: bitfield32
- -
- name: in-hw-count
- type: u32
-
name: tc-etf-attrs
attributes:
@@ -1304,48 +3001,71 @@ attribute-sets:
-
name: plimit
type: u32
+ doc: Limit of total number of packets in queue
-
name: flow-plimit
type: u32
+ doc: Limit of packets per flow
-
name: quantum
type: u32
+ doc: RR quantum
-
name: initial-quantum
type: u32
+ doc: RR quantum for new flow
-
name: rate-enable
type: u32
+ doc: Enable / disable rate limiting
-
name: flow-default-rate
type: u32
+ doc: Obsolete, do not use
-
name: flow-max-rate
type: u32
+ doc: Per flow max rate
-
name: buckets-log
type: u32
+ doc: log2(number of buckets)
-
name: flow-refill-delay
type: u32
+ doc: Flow credit refill delay in usec
-
name: orphan-mask
type: u32
+ doc: Mask applied to orphaned skb hashes
-
name: low-rate-threshold
type: u32
+ doc: Per packet delay under this rate
-
name: ce-threshold
type: u32
+ doc: DCTCP-like CE marking threshold
-
name: timer-slack
type: u32
-
name: horizon
type: u32
+ doc: Time horizon in usec
-
name: horizon-drop
type: u8
+ doc: Drop packets beyond horizon, or cap their EDT
+ -
+ name: priomap
+ type: binary
+ struct: tc-prio-qopt
+ -
+ name: weights
+ type: binary
+ sub-type: s32
+ doc: Weights for each band
-
name: tc-fq-codel-attrs
attributes:
@@ -1427,6 +3147,7 @@ attribute-sets:
-
name: corr
type: binary
+ struct: tc-netem-corr
-
name: delay-dist
type: binary
@@ -1434,15 +3155,19 @@ attribute-sets:
-
name: reorder
type: binary
+ struct: tc-netem-reorder
-
name: corrupt
type: binary
+ struct: tc-netem-corrupt
-
name: loss
- type: binary
+ type: nest
+ nested-attributes: tc-netem-loss-attrs
-
name: rate
type: binary
+ struct: tc-netem-rate
-
name: ecn
type: u32
@@ -1461,10 +3186,27 @@ attribute-sets:
-
name: slot
type: binary
+ struct: tc-netem-slot
-
name: slot-dist
type: binary
sub-type: s16
+ -
+ name: prng-seed
+ type: u64
+ -
+ name: tc-netem-loss-attrs
+ attributes:
+ -
+ name: gi
+ type: binary
+ doc: General Intuitive - 4 state model
+ struct: tc-netem-gimodel
+ -
+ name: ge
+ type: binary
+ doc: Gilbert Elliot models
+ struct: tc-netem-gemodel
-
name: tc-pie-attrs
attributes:
@@ -1493,6 +3235,44 @@ attribute-sets:
name: dq-rate-estimator
type: u32
-
+ name: tc-police-attrs
+ attributes:
+ -
+ name: tbf
+ type: binary
+ struct: tc-police
+ -
+ name: rate
+ type: binary
+ -
+ name: peakrate
+ type: binary
+ -
+ name: avrate
+ type: u32
+ -
+ name: result
+ type: u32
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: pad
+ type: pad
+ -
+ name: rate64
+ type: u64
+ -
+ name: peakrate64
+ type: u64
+ -
+ name: pktrate64
+ type: u64
+ -
+ name: pktburst64
+ type: u64
+ -
name: tc-qfq-attrs
attributes:
-
@@ -1516,7 +3296,7 @@ attribute-sets:
type: u32
-
name: flags
- type: binary
+ type: bitfield32
-
name: early-drop-block
type: u32
@@ -1524,6 +3304,29 @@ attribute-sets:
name: mark-block
type: u32
-
+ name: tc-route-attrs
+ attributes:
+ -
+ name: classid
+ type: u32
+ -
+ name: to
+ type: u32
+ -
+ name: from
+ type: u32
+ -
+ name: iif
+ type: u32
+ -
+ name: police
+ type: nest
+ nested-attributes: tc-police-attrs
+ -
+ name: act
+ type: array-nest
+ nested-attributes: tc-act-attrs
+ -
name: tc-taprio-attrs
attributes:
-
@@ -1573,6 +3376,7 @@ attribute-sets:
name: entry
type: nest
nested-attributes: tc-taprio-sched-entry
+ multi-attr: true
-
name: tc-taprio-sched-entry
attributes:
@@ -1629,17 +3433,43 @@ attribute-sets:
name: pad
type: pad
-
- name: tca-gact-attrs
+ name: tc-act-sample-attrs
attributes:
-
name: tm
type: binary
+ struct: tcf-t
-
name: parms
type: binary
+ struct: tc-gen
+ -
+ name: rate
+ type: u32
+ -
+ name: trunc-size
+ type: u32
+ -
+ name: psample-group
+ type: u32
+ -
+ name: pad
+ type: pad
+ -
+ name: tc-act-gact-attrs
+ attributes:
+ -
+ name: tm
+ type: binary
+ struct: tcf-t
+ -
+ name: parms
+ type: binary
+ struct: tc-gen
-
name: prob
type: binary
+ struct: tc-gact-p
-
name: pad
type: pad
@@ -1659,35 +3489,90 @@ attribute-sets:
-
name: basic
type: binary
+ struct: gnet-stats-basic
-
name: rate-est
type: binary
+ struct: gnet-stats-rate-est
-
name: queue
type: binary
+ struct: gnet-stats-queue
-
name: app
- type: binary # TODO sub-message needs 2+ level deep lookup
+ type: sub-message
sub-message: tca-stats-app-msg
selector: kind
-
name: rate-est64
type: binary
+ struct: gnet-stats-rate-est64
-
name: pad
type: pad
-
name: basic-hw
type: binary
+ struct: gnet-stats-basic
-
name: pkt64
+ type: u64
+ -
+ name: tc-u32-attrs
+ attributes:
+ -
+ name: classid
+ type: u32
+ -
+ name: hash
+ type: u32
+ -
+ name: link
+ type: u32
+ -
+ name: divisor
+ type: u32
+ -
+ name: sel
type: binary
+ struct: tc-u32-sel
+ -
+ name: police
+ type: nest
+ nested-attributes: tc-police-attrs
+ -
+ name: act
+ type: array-nest
+ nested-attributes: tc-act-attrs
+ -
+ name: indev
+ type: string
+ -
+ name: pcnt
+ type: binary
+ struct: tc-u32-pcnt
+ -
+ name: mark
+ type: binary
+ struct: tc-u32-mark
+ -
+ name: flags
+ type: u32
+ -
+ name: pad
+ type: pad
sub-messages:
-
name: tc-options-msg
formats:
-
+ value: basic
+ attribute-set: tc-basic-attrs
+ -
+ value: bpf
+ attribute-set: tc-bpf-attrs
+ -
value: bfifo
fixed-header: tc-fifo-qopt
-
@@ -1697,6 +3582,9 @@ sub-messages:
value: cbs
attribute-set: tc-cbs-attrs
-
+ value: cgroup
+ attribute-set: tc-cgroup-attrs
+ -
value: choke
attribute-set: tc-choke-attrs
-
@@ -1714,6 +3602,12 @@ sub-messages:
value: ets
attribute-set: tc-ets-attrs
-
+ value: flow
+ attribute-set: tc-flow-attrs
+ -
+ value: flower
+ attribute-set: tc-flower-attrs
+ -
value: fq
attribute-set: tc-fq-attrs
-
@@ -1723,8 +3617,8 @@ sub-messages:
value: fq_pie
attribute-set: tc-fq-pie-attrs
-
- value: flower
- attribute-set: tc-flower-attrs
+ value: fw
+ attribute-set: tc-fw-attrs
-
value: gred
attribute-set: tc-gred-attrs
@@ -1740,6 +3634,9 @@ sub-messages:
-
value: ingress # no content
-
+ value: matchall
+ attribute-set: tc-matchall-attrs
+ -
value: mq # no content
-
value: mqprio
@@ -1776,6 +3673,9 @@ sub-messages:
value: red
attribute-set: tc-red-attrs
-
+ value: route
+ attribute-set: tc-route-attrs
+ -
value: sfb
fixed-header: tc-sfb-qopt
-
@@ -1787,88 +3687,105 @@ sub-messages:
-
value: tbf
attribute-set: tc-tbf-attrs
+ -
+ value: u32
+ attribute-set: tc-u32-attrs
-
name: tc-act-options-msg
formats:
-
- value: gact
- attribute-set: tca-gact-attrs
- -
- name: tca-stats-app-msg
- formats:
+ value: bpf
+ attribute-set: tc-act-bpf-attrs
-
- value: bfifo
+ value: connmark
+ attribute-set: tc-act-connmark-attrs
-
- value: blackhole
+ value: csum
+ attribute-set: tc-act-csum-attrs
-
- value: cake
- attribute-set: tc-cake-stats-attrs
+ value: ct
+ attribute-set: tc-act-ct-attrs
-
- value: cbs
+ value: ctinfo
+ attribute-set: tc-act-ctinfo-attrs
-
- value: choke
+ value: gact
+ attribute-set: tc-act-gact-attrs
-
- value: clsact
+ value: gate
+ attribute-set: tc-act-gate-attrs
-
- value: codel
+ value: ife
+ attribute-set: tc-act-ife-attrs
-
- value: drr
+ value: mirred
+ attribute-set: tc-act-mirred-attrs
-
- value: etf
+ value: mpls
+ attribute-set: tc-act-mpls-attrs
-
- value: ets
- -
- value: fq
- -
- value: fq_codel
+ value: nat
+ attribute-set: tc-act-nat-attrs
-
- value: fq_pie
+ value: pedit
+ attribute-set: tc-act-pedit-attrs
-
- value: flower
+ value: police
+ attribute-set: tc-act-police-attrs
-
- value: gred
+ value: sample
+ attribute-set: tc-act-sample-attrs
-
- value: hfsc
+ value: simple
+ attribute-set: tc-act-simple-attrs
-
- value: hhf
+ value: skbedit
+ attribute-set: tc-act-skbedit-attrs
-
- value: htb
+ value: skbmod
+ attribute-set: tc-act-skbmod-attrs
-
- value: ingress
+ value: tunnel_key
+ attribute-set: tc-act-tunnel-key-attrs
-
- value: mq
+ value: vlan
+ attribute-set: tc-act-vlan-attrs
+ -
+ name: tca-stats-app-msg
+ formats:
-
- value: mqprio
+ value: cake
+ attribute-set: tc-cake-stats-attrs
-
- value: multiq
+ value: choke
+ fixed-header: tc-choke-xstats
-
- value: netem
+ value: codel
+ fixed-header: tc-codel-xstats
-
- value: noqueue
+ value: fq
+ fixed-header: tc-fq-qd-stats
-
- value: pfifo
+ value: fq_codel
+ fixed-header: tc-fq-codel-xstats
-
- value: pfifo_fast
+ value: fq_pie
+ fixed-header: tc-fq-pie-xstats
-
- value: pfifo_head_drop
+ value: hhf
+ fixed-header: tc-hhf-xstats
-
value: pie
- -
- value: plug
- -
- value: prio
- -
- value: qfq
+ fixed-header: tc-pie-xstats
-
value: red
+ fixed-header: tc-red-xstats
-
value: sfb
+ fixed-header: tc-sfb-xstats
-
value: sfq
- -
- value: taprio
- -
- value: tbf
+ fixed-header: tc-sfq-xstats
operations:
enum-model: directional
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index dceeb0d763aa..72da7057e4cf 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -329,23 +329,24 @@ XDP_SHARED_UMEM option and provide the initial socket's fd in the
sxdp_shared_umem_fd field as you registered the UMEM on that
socket. These two sockets will now share one and the same UMEM.
-There is no need to supply an XDP program like the one in the previous
-case where sockets were bound to the same queue id and
-device. Instead, use the NIC's packet steering capabilities to steer
-the packets to the right queue. In the previous example, there is only
-one queue shared among sockets, so the NIC cannot do this steering. It
-can only steer between queues.
-
-In libbpf, you need to use the xsk_socket__create_shared() API as it
-takes a reference to a FILL ring and a COMPLETION ring that will be
-created for you and bound to the shared UMEM. You can use this
-function for all the sockets you create, or you can use it for the
-second and following ones and use xsk_socket__create() for the first
-one. Both methods yield the same result.
+In this case, it is possible to use the NIC's packet steering
+capabilities to steer the packets to the right queue. This is not
+possible in the previous example as there is only one queue shared
+among sockets, so the NIC cannot do this steering as it can only steer
+between queues.
+
+In libxdp (or libbpf prior to version 1.0), you need to use the
+xsk_socket__create_shared() API as it takes a reference to a FILL ring
+and a COMPLETION ring that will be created for you and bound to the
+shared UMEM. You can use this function for all the sockets you create,
+or you can use it for the second and following ones and use
+xsk_socket__create() for the first one. Both methods yield the same
+result.
Note that a UMEM can be shared between sockets on the same queue id
and device, as well as between queues on the same device and between
-devices at the same time.
+devices at the same time. It is also possible to redirect to any
+socket as long as it is bound to the same umem with XDP_SHARED_UMEM.
XDP_USE_NEED_WAKEUP bind flag
-----------------------------
@@ -822,6 +823,10 @@ A: The short answer is no, that is not supported at the moment. The
switch, or other distribution mechanism, in your NIC to direct
traffic to the correct queue id and socket.
+ Note that if you are using the XDP_SHARED_UMEM option, it is
+ possible to switch traffic between any socket bound to the same
+ umem.
+
Q: My packets are sometimes corrupted. What is wrong?
A: Care has to be taken not to feed the same buffer in the UMEM into
diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
index f7a73421eb76..e774b48de9f5 100644
--- a/Documentation/networking/bonding.rst
+++ b/Documentation/networking/bonding.rst
@@ -444,6 +444,18 @@ arp_missed_max
The default value is 2, and the allowable range is 1 - 255.
+coupled_control
+
+ Specifies whether the LACP state machine's MUX in the 802.3ad mode
+ should have separate Collecting and Distributing states.
+
+ This is by implementing the independent control state machine per
+ IEEE 802.1AX-2008 5.4.15 in addition to the existing coupled control
+ state machine.
+
+ The default value is 1. This setting does not separate the Collecting
+ and Distributing states, maintaining the bond in coupled control.
+
downdelay
Specifies the time, in milliseconds, to wait before disabling
diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst
index ba14e7b07869..ef8b73e157b2 100644
--- a/Documentation/networking/bridge.rst
+++ b/Documentation/networking/bridge.rst
@@ -324,7 +324,7 @@ Contact Info
The code is currently maintained by Roopa Prabhu <roopa@nvidia.com> and
Nikolay Aleksandrov <razor@blackwall.org>. Bridge bugs and enhancements
are discussed on the linux-netdev mailing list netdev@vger.kernel.org and
-bridge@lists.linux-foundation.org.
+bridge@lists.linux.dev.
The list is open to anyone interested: http://vger.kernel.org/vger-lists.html#netdev
diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
index d7e1ada905b2..62519d38c58b 100644
--- a/Documentation/networking/can.rst
+++ b/Documentation/networking/can.rst
@@ -444,6 +444,24 @@ definitions are specified for CAN specific MTUs in include/linux/can.h:
#define CANFD_MTU (sizeof(struct canfd_frame)) == 72 => CAN FD frame
+Returned Message Flags
+----------------------
+
+When using the system call recvmsg(2) on a RAW or a BCM socket, the
+msg->msg_flags field may contain the following flags:
+
+MSG_DONTROUTE:
+ set when the received frame was created on the local host.
+
+MSG_CONFIRM:
+ set when the frame was sent via the socket it is received on.
+ This flag can be interpreted as a 'transmission confirmation' when the
+ CAN driver supports the echo of frames on driver level, see
+ :ref:`socketcan-local-loopback1` and :ref:`socketcan-local-loopback2`.
+ (Note: In order to receive such messages on a RAW socket,
+ CAN_RAW_RECV_OWN_MSGS must be set.)
+
+
.. _socketcan-raw-sockets:
RAW Protocol Sockets with can_filters (SOCK_RAW)
@@ -693,22 +711,6 @@ where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
CAN ID ranges from the incoming traffic.
-RAW Socket Returned Message Flags
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-When using recvmsg() call, the msg->msg_flags may contain following flags:
-
-MSG_DONTROUTE:
- set when the received frame was created on the local host.
-
-MSG_CONFIRM:
- set when the frame was sent via the socket it is received on.
- This flag can be interpreted as a 'transmission confirmation' when the
- CAN driver supports the echo of frames on driver level, see
- :ref:`socketcan-local-loopback1` and :ref:`socketcan-local-loopback2`.
- In order to receive such messages, CAN_RAW_RECV_OWN_MSGS must be set.
-
-
Broadcast Manager Protocol Sockets (SOCK_DGRAM)
-----------------------------------------------
diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
index b842bcb14255..a4c7d0c65fd7 100644
--- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
+++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
@@ -211,10 +211,16 @@ Documentation/networking/net_dim.rst
RX copybreak
============
+
The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
and can be configured by the ETHTOOL_STUNABLE command of the
SIOCETHTOOL ioctl.
+This option controls the maximum packet length for which the RX
+descriptor it was received on would be recycled. When a packet smaller
+than RX copybreak bytes is received, it is copied into a new memory
+buffer and the RX descriptor is returned to HW.
+
Statistics
==========
diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 43de285b8a92..6932d8c043c2 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -42,6 +42,7 @@ Contents:
intel/ice
marvell/octeontx2
marvell/octeon_ep
+ marvell/octeon_ep_vf
mellanox/mlx5/index
microsoft/netvsc
neterion/s2io
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
index 5038e54586af..934752f675ba 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
@@ -368,15 +368,28 @@ more options for Receive Side Scaling (RSS) hash byte configuration.
# ethtool -N <ethX> rx-flow-hash <type> <option>
Where <type> is:
- tcp4 signifying TCP over IPv4
- udp4 signifying UDP over IPv4
- tcp6 signifying TCP over IPv6
- udp6 signifying UDP over IPv6
+ tcp4 signifying TCP over IPv4
+ udp4 signifying UDP over IPv4
+ gtpc4 signifying GTP-C over IPv4
+ gtpc4t signifying GTP-C (include TEID) over IPv4
+ gtpu4 signifying GTP-U over IPV4
+ gtpu4e signifying GTP-U and Extension Header over IPV4
+ gtpu4u signifying GTP-U PSC Uplink over IPV4
+ gtpu4d signifying GTP-U PSC Downlink over IPV4
+ tcp6 signifying TCP over IPv6
+ udp6 signifying UDP over IPv6
+ gtpc6 signifying GTP-C over IPv6
+ gtpc6t signifying GTP-C (include TEID) over IPv6
+ gtpu6 signifying GTP-U over IPV6
+ gtpu6e signifying GTP-U and Extension Header over IPV6
+ gtpu6u signifying GTP-U PSC Uplink over IPV6
+ gtpu6d signifying GTP-U PSC Downlink over IPV6
And <option> is one or more of:
s Hash on the IP source address of the Rx packet.
d Hash on the IP destination address of the Rx packet.
f Hash on bytes 0 and 1 of the Layer 4 header of the Rx packet.
n Hash on bytes 2 and 3 of the Layer 4 header of the Rx packet.
+ e Hash on GTP Packet on TEID (4bytes) of the Rx packet.
Accelerated Receive Flow Steering (aRFS)
diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst
new file mode 100644
index 000000000000..603133d0b92f
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+=======================================================================
+Linux kernel networking driver for Marvell's Octeon PCI Endpoint NIC VF
+=======================================================================
+
+Network driver for Marvell's Octeon PCI EndPoint NIC VF.
+Copyright (c) 2020 Marvell International Ltd.
+
+Overview
+========
+This driver implements networking functionality of Marvell's Octeon PCI
+EndPoint NIC VF.
+
+Supported Devices
+=================
+Currently, this driver support following devices:
+ * Network controller: Cavium, Inc. Device b203
+ * Network controller: Cavium, Inc. Device b403
+ * Network controller: Cavium, Inc. Device b103
+ * Network controller: Cavium, Inc. Device b903
+ * Network controller: Cavium, Inc. Device ba03
+ * Network controller: Cavium, Inc. Device bc03
+ * Network controller: Cavium, Inc. Device bd03
diff --git a/Documentation/networking/device_drivers/wwan/t7xx.rst b/Documentation/networking/device_drivers/wwan/t7xx.rst
index dd5b731957ca..f346f5f85f15 100644
--- a/Documentation/networking/device_drivers/wwan/t7xx.rst
+++ b/Documentation/networking/device_drivers/wwan/t7xx.rst
@@ -39,6 +39,34 @@ command and receive response:
- open the AT control channel using a UART tool or a special user tool
+Sysfs
+=====
+The driver provides sysfs interfaces to userspace.
+
+t7xx_mode
+---------
+The sysfs interface provides userspace with access to the device mode, this interface
+supports read and write operations.
+
+Device mode:
+
+- ``unknown`` represents that device in unknown status
+- ``ready`` represents that device in ready status
+- ``reset`` represents that device in reset status
+- ``fastboot_switching`` represents that device in fastboot switching status
+- ``fastboot_download`` represents that device in fastboot download status
+- ``fastboot_dump`` represents that device in fastboot dump status
+
+Read from userspace to get the current device mode.
+
+::
+ $ cat /sys/bus/pci/devices/${bdf}/t7xx_mode
+
+Write from userspace to set the device mode.
+
+::
+ $ echo fastboot_switching > /sys/bus/pci/devices/${bdf}/t7xx_mode
+
Management application development
==================================
The driver and userspace interfaces are described below. The MBIM protocol is
@@ -97,6 +125,20 @@ The driver exposes an AT port by implementing AT WWAN Port.
The userspace end of the control port is a /dev/wwan0at0 character
device. Application shall use this interface to issue AT commands.
+fastboot port userspace ABI
+---------------------------
+
+/dev/wwan0fastboot0 character device
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The driver exposes a fastboot protocol interface by implementing
+fastboot WWAN Port. The userspace end of the fastboot channel pipe is a
+/dev/wwan0fastboot0 character device. Application shall use this interface for
+fastboot protocol communication.
+
+Please note that driver needs to be reloaded to export /dev/wwan0fastboot0
+port, because device needs a cold reset after enter ``fastboot_switching``
+mode.
+
The MediaTek's T700 modem supports the 3GPP TS 27.007 [4] specification.
References
@@ -118,3 +160,7 @@ speak the Mobile Interface Broadband Model (MBIM) protocol"*
[4] *Specification # 27.007 - 3GPP*
- https://www.3gpp.org/DynaReport/27007.htm
+
+[5] *fastboot "a mechanism for communicating with bootloaders"*
+
+- https://android.googlesource.com/platform/system/core/+/refs/heads/main/fastboot/README.md
diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst
index e33ad2401ad7..562f46b41274 100644
--- a/Documentation/networking/devlink/devlink-port.rst
+++ b/Documentation/networking/devlink/devlink-port.rst
@@ -126,7 +126,7 @@ Users may also set the RoCE capability of the function using
`devlink port function set roce` command.
Users may also set the function as migratable using
-'devlink port function set migratable' command.
+`devlink port function set migratable` command.
Users may also set the IPsec crypto capability of the function using
`devlink port function set ipsec_crypto` command.
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 702f204a3dbd..456985407475 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -97,6 +97,10 @@ parameters.
When metadata is disabled, the above use cases will fail to initialize if
users try to enable them.
+
+ Note: Setting this parameter does not take effect immediately. Setting
+ must happen in legacy mode and eswitch port metadata takes effect after
+ enabling switchdev mode.
* - ``hairpin_num_queues``
- u32
- driverinit
@@ -246,7 +250,7 @@ them in realtime.
Description of the vnic counters:
-- total_q_under_processor_handle
+- total_error_queues
number of queues in an error state due to
an async error or errored command.
- send_queue_priority_update_flow
@@ -255,7 +259,8 @@ Description of the vnic counters:
number of times CQ entered an error state due to an overflow.
- async_eq_overrun
number of times an EQ mapped to async events was overrun.
- comp_eq_overrun number of times an EQ mapped to completion events was
+- comp_eq_overrun
+ number of times an EQ mapped to completion events was
overrun.
- quota_exceeded_command
number of commands issued and failed due to quota exceeded.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 69f3d6dcd9fd..473d72c36d61 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -74,6 +74,7 @@ Contents:
mpls-sysctl
mptcp-sysctl
multiqueue
+ multi-pf-netdev
napi
net_cachelines/index
netconsole
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 7afff42612e9..bd50df6a5a42 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2503,7 +2503,7 @@ use_tempaddr - INTEGER
temp_valid_lft - INTEGER
valid lifetime (in seconds) for temporary addresses. If less than the
- minimum required lifetime (typically 5 seconds), temporary addresses
+ minimum required lifetime (typically 5-7 seconds), temporary addresses
will not be created.
Default: 172800 (2 days)
@@ -2511,7 +2511,7 @@ temp_valid_lft - INTEGER
temp_prefered_lft - INTEGER
Preferred lifetime (in seconds) for temporary addresses. If
temp_prefered_lft is less than the minimum required lifetime (typically
- 5 seconds), temporary addresses will not be created. If
+ 5-7 seconds), the preferred lifetime is the minimum required. If
temp_prefered_lft is greater than temp_valid_lft, the preferred lifetime
is temp_valid_lft.
@@ -2535,6 +2535,16 @@ max_desync_factor - INTEGER
Default: 600
+regen_min_advance - INTEGER
+ How far in advance (in seconds), at minimum, to create a new temporary
+ address before the current one is deprecated. This value is added to
+ the amount of time that may be required for duplicate address detection
+ to determine when to create a new address. Linux permits setting this
+ value to less than the default of 2 seconds, but a value less than 2
+ does not conform to RFC 8981.
+
+ Default: 2
+
regen_max_retry - INTEGER
Number of attempts before give up attempting to generate
valid temporary addresses.
diff --git a/Documentation/networking/l2tp.rst b/Documentation/networking/l2tp.rst
index 7f383e99dbad..8496b467dea4 100644
--- a/Documentation/networking/l2tp.rst
+++ b/Documentation/networking/l2tp.rst
@@ -386,12 +386,19 @@ Sample userspace code:
- Create session PPPoX data socket::
+ /* Input: the L2TP tunnel UDP socket `tunnel_fd`, which needs to be
+ * bound already (both sockname and peername), otherwise it will not be
+ * ready.
+ */
+
struct sockaddr_pppol2tp sax;
- int fd;
+ int session_fd;
+ int ret;
+
+ session_fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+ if (session_fd < 0)
+ return -errno;
- /* Note, the tunnel socket must be bound already, else it
- * will not be ready
- */
sax.sa_family = AF_PPPOX;
sax.sa_protocol = PX_PROTO_OL2TP;
sax.pppol2tp.fd = tunnel_fd;
@@ -406,12 +413,128 @@ Sample userspace code:
/* session_fd is the fd of the session's PPPoL2TP socket.
* tunnel_fd is the fd of the tunnel UDP / L2TPIP socket.
*/
- fd = connect(session_fd, (struct sockaddr *)&sax, sizeof(sax));
- if (fd < 0 ) {
+ ret = connect(session_fd, (struct sockaddr *)&sax, sizeof(sax));
+ if (ret < 0 ) {
+ close(session_fd);
+ return -errno;
+ }
+
+ return session_fd;
+
+L2TP control packets will still be available for read on `tunnel_fd`.
+
+ - Create PPP channel::
+
+ /* Input: the session PPPoX data socket `session_fd` which was created
+ * as described above.
+ */
+
+ int ppp_chan_fd;
+ int chindx;
+ int ret;
+
+ ret = ioctl(session_fd, PPPIOCGCHAN, &chindx);
+ if (ret < 0)
+ return -errno;
+
+ ppp_chan_fd = open("/dev/ppp", O_RDWR);
+ if (ppp_chan_fd < 0)
+ return -errno;
+
+ ret = ioctl(ppp_chan_fd, PPPIOCATTCHAN, &chindx);
+ if (ret < 0) {
+ close(ppp_chan_fd);
return -errno;
}
+
+ return ppp_chan_fd;
+
+LCP PPP frames will be available for read on `ppp_chan_fd`.
+
+ - Create PPP interface::
+
+ /* Input: the PPP channel `ppp_chan_fd` which was created as described
+ * above.
+ */
+
+ int ifunit = -1;
+ int ppp_if_fd;
+ int ret;
+
+ ppp_if_fd = open("/dev/ppp", O_RDWR);
+ if (ppp_if_fd < 0)
+ return -errno;
+
+ ret = ioctl(ppp_if_fd, PPPIOCNEWUNIT, &ifunit);
+ if (ret < 0) {
+ close(ppp_if_fd);
+ return -errno;
+ }
+
+ ret = ioctl(ppp_chan_fd, PPPIOCCONNECT, &ifunit);
+ if (ret < 0) {
+ close(ppp_if_fd);
+ return -errno;
+ }
+
+ return ppp_if_fd;
+
+IPCP/IPv6CP PPP frames will be available for read on `ppp_if_fd`.
+
+The ppp<ifunit> interface can then be configured as usual with netlink's
+RTM_NEWLINK, RTM_NEWADDR, RTM_NEWROUTE, or ioctl's SIOCSIFMTU, SIOCSIFADDR,
+SIOCSIFDSTADDR, SIOCSIFNETMASK, SIOCSIFFLAGS, or with the `ip` command.
+
+ - Bridging L2TP sessions which have PPP pseudowire types (this is also called
+ L2TP tunnel switching or L2TP multihop) is supported by bridging the PPP
+ channels of the two L2TP sessions to be bridged::
+
+ /* Input: the session PPPoX data sockets `session_fd1` and `session_fd2`
+ * which were created as described further above.
+ */
+
+ int ppp_chan_fd;
+ int chindx1;
+ int chindx2;
+ int ret;
+
+ ret = ioctl(session_fd1, PPPIOCGCHAN, &chindx1);
+ if (ret < 0)
+ return -errno;
+
+ ret = ioctl(session_fd2, PPPIOCGCHAN, &chindx2);
+ if (ret < 0)
+ return -errno;
+
+ ppp_chan_fd = open("/dev/ppp", O_RDWR);
+ if (ppp_chan_fd < 0)
+ return -errno;
+
+ ret = ioctl(ppp_chan_fd, PPPIOCATTCHAN, &chindx1);
+ if (ret < 0) {
+ close(ppp_chan_fd);
+ return -errno;
+ }
+
+ ret = ioctl(ppp_chan_fd, PPPIOCBRIDGECHAN, &chindx2);
+ close(ppp_chan_fd);
+ if (ret < 0)
+ return -errno;
+
return 0;
+It can be noted that when bridging PPP channels, the PPP session is not locally
+terminated, and no local PPP interface is created. PPP frames arriving on one
+channel are directly passed to the other channel, and vice versa.
+
+The PPP channel does not need to be kept open. Only the session PPPoX data
+sockets need to be kept open.
+
+More generally, it is also possible in the same way to e.g. bridge a PPPoL2TP
+PPP channel with other types of PPP channels, such as PPPoE.
+
+See more details for the PPP side in ppp_generic.rst.
+
Old L2TPv2-only API
-------------------
diff --git a/Documentation/networking/multi-pf-netdev.rst b/Documentation/networking/multi-pf-netdev.rst
new file mode 100644
index 000000000000..be8e4bcadf11
--- /dev/null
+++ b/Documentation/networking/multi-pf-netdev.rst
@@ -0,0 +1,174 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===============
+Multi-PF Netdev
+===============
+
+Contents
+========
+
+- `Background`_
+- `Overview`_
+- `mlx5 implementation`_
+- `Channels distribution`_
+- `Observability`_
+- `Steering`_
+- `Mutually exclusive features`_
+
+Background
+==========
+
+The Multi-PF NIC technology enables several CPUs within a multi-socket server to connect directly to
+the network, each through its own dedicated PCIe interface. Through either a connection harness that
+splits the PCIe lanes between two cards or by bifurcating a PCIe slot for a single card. This
+results in eliminating the network traffic traversing over the internal bus between the sockets,
+significantly reducing overhead and latency, in addition to reducing CPU utilization and increasing
+network throughput.
+
+Overview
+========
+
+The feature adds support for combining multiple PFs of the same port in a Multi-PF environment under
+one netdev instance. It is implemented in the netdev layer. Lower-layer instances like pci func,
+sysfs entry, and devlink are kept separate.
+Passing traffic through different devices belonging to different NUMA sockets saves cross-NUMA
+traffic and allows apps running on the same netdev from different NUMAs to still feel a sense of
+proximity to the device and achieve improved performance.
+
+mlx5 implementation
+===================
+
+Multi-PF or Socket-direct in mlx5 is achieved by grouping PFs together which belong to the same
+NIC and has the socket-direct property enabled, once all PFs are probed, we create a single netdev
+to represent all of them, symmetrically, we destroy the netdev whenever any of the PFs is removed.
+
+The netdev network channels are distributed between all devices, a proper configuration would utilize
+the correct close NUMA node when working on a certain app/CPU.
+
+We pick one PF to be a primary (leader), and it fills a special role. The other devices
+(secondaries) are disconnected from the network at the chip level (set to silent mode). In silent
+mode, no south <-> north traffic flowing directly through a secondary PF. It needs the assistance of
+the leader PF (east <-> west traffic) to function. All Rx/Tx traffic is steered through the primary
+to/from the secondaries.
+
+Currently, we limit the support to PFs only, and up to two PFs (sockets).
+
+Channels distribution
+=====================
+
+We distribute the channels between the different PFs to achieve local NUMA node performance
+on multiple NUMA nodes.
+
+Each combined channel works against one specific PF, creating all its datapath queues against it. We
+distribute channels to PFs in a round-robin policy.
+
+::
+
+ Example for 2 PFs and 5 channels:
+ +--------+--------+
+ | ch idx | PF idx |
+ +--------+--------+
+ | 0 | 0 |
+ | 1 | 1 |
+ | 2 | 0 |
+ | 3 | 1 |
+ | 4 | 0 |
+ +--------+--------+
+
+
+The reason we prefer round-robin is, it is less influenced by changes in the number of channels. The
+mapping between a channel index and a PF is fixed, no matter how many channels the user configures.
+As the channel stats are persistent across channel's closure, changing the mapping every single time
+would turn the accumulative stats less representing of the channel's history.
+
+This is achieved by using the correct core device instance (mdev) in each channel, instead of them
+all using the same instance under "priv->mdev".
+
+Observability
+=============
+The relation between PF, irq, napi, and queue can be observed via netlink spec:
+
+$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump queue-get --json='{"ifindex": 13}'
+[{'id': 0, 'ifindex': 13, 'napi-id': 539, 'type': 'rx'},
+ {'id': 1, 'ifindex': 13, 'napi-id': 540, 'type': 'rx'},
+ {'id': 2, 'ifindex': 13, 'napi-id': 541, 'type': 'rx'},
+ {'id': 3, 'ifindex': 13, 'napi-id': 542, 'type': 'rx'},
+ {'id': 4, 'ifindex': 13, 'napi-id': 543, 'type': 'rx'},
+ {'id': 0, 'ifindex': 13, 'napi-id': 539, 'type': 'tx'},
+ {'id': 1, 'ifindex': 13, 'napi-id': 540, 'type': 'tx'},
+ {'id': 2, 'ifindex': 13, 'napi-id': 541, 'type': 'tx'},
+ {'id': 3, 'ifindex': 13, 'napi-id': 542, 'type': 'tx'},
+ {'id': 4, 'ifindex': 13, 'napi-id': 543, 'type': 'tx'}]
+
+$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump napi-get --json='{"ifindex": 13}'
+[{'id': 543, 'ifindex': 13, 'irq': 42},
+ {'id': 542, 'ifindex': 13, 'irq': 41},
+ {'id': 541, 'ifindex': 13, 'irq': 40},
+ {'id': 540, 'ifindex': 13, 'irq': 39},
+ {'id': 539, 'ifindex': 13, 'irq': 36}]
+
+Here you can clearly observe our channels distribution policy:
+
+$ ls /proc/irq/{36,39,40,41,42}/mlx5* -d -1
+/proc/irq/36/mlx5_comp1@pci:0000:08:00.0
+/proc/irq/39/mlx5_comp1@pci:0000:09:00.0
+/proc/irq/40/mlx5_comp2@pci:0000:08:00.0
+/proc/irq/41/mlx5_comp2@pci:0000:09:00.0
+/proc/irq/42/mlx5_comp3@pci:0000:08:00.0
+
+Steering
+========
+Secondary PFs are set to "silent" mode, meaning they are disconnected from the network.
+
+In Rx, the steering tables belong to the primary PF only, and it is its role to distribute incoming
+traffic to other PFs, via cross-vhca steering capabilities. Still maintain a single default RSS table,
+that is capable of pointing to the receive queues of a different PF.
+
+In Tx, the primary PF creates a new Tx flow table, which is aliased by the secondaries, so they can
+go out to the network through it.
+
+In addition, we set default XPS configuration that, based on the CPU, selects an SQ belonging to the
+PF on the same node as the CPU.
+
+XPS default config example:
+
+NUMA node(s): 2
+NUMA node0 CPU(s): 0-11
+NUMA node1 CPU(s): 12-23
+
+PF0 on node0, PF1 on node1.
+
+- /sys/class/net/eth2/queues/tx-0/xps_cpus:000001
+- /sys/class/net/eth2/queues/tx-1/xps_cpus:001000
+- /sys/class/net/eth2/queues/tx-2/xps_cpus:000002
+- /sys/class/net/eth2/queues/tx-3/xps_cpus:002000
+- /sys/class/net/eth2/queues/tx-4/xps_cpus:000004
+- /sys/class/net/eth2/queues/tx-5/xps_cpus:004000
+- /sys/class/net/eth2/queues/tx-6/xps_cpus:000008
+- /sys/class/net/eth2/queues/tx-7/xps_cpus:008000
+- /sys/class/net/eth2/queues/tx-8/xps_cpus:000010
+- /sys/class/net/eth2/queues/tx-9/xps_cpus:010000
+- /sys/class/net/eth2/queues/tx-10/xps_cpus:000020
+- /sys/class/net/eth2/queues/tx-11/xps_cpus:020000
+- /sys/class/net/eth2/queues/tx-12/xps_cpus:000040
+- /sys/class/net/eth2/queues/tx-13/xps_cpus:040000
+- /sys/class/net/eth2/queues/tx-14/xps_cpus:000080
+- /sys/class/net/eth2/queues/tx-15/xps_cpus:080000
+- /sys/class/net/eth2/queues/tx-16/xps_cpus:000100
+- /sys/class/net/eth2/queues/tx-17/xps_cpus:100000
+- /sys/class/net/eth2/queues/tx-18/xps_cpus:000200
+- /sys/class/net/eth2/queues/tx-19/xps_cpus:200000
+- /sys/class/net/eth2/queues/tx-20/xps_cpus:000400
+- /sys/class/net/eth2/queues/tx-21/xps_cpus:400000
+- /sys/class/net/eth2/queues/tx-22/xps_cpus:000800
+- /sys/class/net/eth2/queues/tx-23/xps_cpus:800000
+
+Mutually exclusive features
+===========================
+
+The nature of Multi-PF, where different channels work with different PFs, conflicts with
+stateful features where the state is maintained in one of the PFs.
+For example, in the TLS device-offload feature, special context objects are created per connection
+and maintained in the PF. Transitioning between different RQs/SQs would break the feature. Hence,
+we disable this combination for now.
diff --git a/Documentation/networking/net_cachelines/inet_sock.rst b/Documentation/networking/net_cachelines/inet_sock.rst
index a2babd0d7954..595d7ef5fc8b 100644
--- a/Documentation/networking/net_cachelines/inet_sock.rst
+++ b/Documentation/networking/net_cachelines/inet_sock.rst
@@ -1,9 +1,9 @@
.. SPDX-License-Identifier: GPL-2.0
.. Copyright (C) 2023 Google LLC
-=====================================================
-inet_connection_sock struct fast path usage breakdown
-=====================================================
+==========================================
+inet_sock struct fast path usage breakdown
+==========================================
Type Name fastpath_tx_access fastpath_rx_access comment
..struct ..inet_sock
diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst
index e75a53593bb9..dceb49d56a91 100644
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -136,8 +136,8 @@ struct_netpoll_info* npinfo -
possible_net_t nd_net - read_mostly (dev_net)napi_busy_loop,tcp_v(4/6)_rcv,ip(v6)_rcv,ip(6)_input,ip(6)_input_finish
void* ml_priv
enum_netdev_ml_priv_type ml_priv_type
-struct_pcpu_lstats__percpu* lstats
-struct_pcpu_sw_netstats__percpu* tstats
+struct_pcpu_lstats__percpu* lstats read_mostly dev_lstats_add()
+struct_pcpu_sw_netstats__percpu* tstats read_mostly dev_sw_netstats_tx_add()
struct_pcpu_dstats__percpu* dstats
struct_garp_port* garp_port
struct_mrp_port* mrp_port
diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst
index 97d7a5c8e01c..1c154cbd1848 100644
--- a/Documentation/networking/net_cachelines/tcp_sock.rst
+++ b/Documentation/networking/net_cachelines/tcp_sock.rst
@@ -38,13 +38,13 @@ u32 max_window read_mostly -
u32 mss_cache read_mostly read_mostly tcp_rate_check_app_limited,tcp_current_mss,tcp_sync_mss,tcp_sndbuf_expand,tcp_tso_should_defer(tx);tcp_update_pacing_rate,tcp_clean_rtx_queue(rx)
u32 window_clamp read_mostly read_write tcp_rcv_space_adjust,__tcp_select_window
u32 rcv_ssthresh read_mostly - __tcp_select_window
-u82 scaling_ratio
+u8 scaling_ratio read_mostly read_mostly tcp_win_from_space
struct tcp_rack
u16 advmss - read_mostly tcp_rcv_space_adjust
u8 compressed_ack
u8:2 dup_ack_counter
u8:1 tlp_retrans
-u8:1 tcp_usec_ts
+u8:1 tcp_usec_ts read_mostly read_mostly
u32 chrono_start read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data)
u32[3] chrono_stat read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data)
u8:2 chrono_type read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data)
diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst
index 390730a74332..d55c2a22ec7a 100644
--- a/Documentation/networking/netconsole.rst
+++ b/Documentation/networking/netconsole.rst
@@ -15,6 +15,8 @@ Extended console support by Tejun Heo <tj@kernel.org>, May 1 2015
Release prepend support by Breno Leitao <leitao@debian.org>, Jul 7 2023
+Userdata append support by Matthew Wood <thepacketgeek@gmail.com>, Jan 22 2024
+
Please send bug reports to Matt Mackall <mpm@selenic.com>
Satyam Sharma <satyam.sharma@gmail.com>, and Cong Wang <xiyou.wangcong@gmail.com>
@@ -171,6 +173,70 @@ You can modify these targets in runtime by creating the following targets::
cat cmdline1/remote_ip
10.0.0.3
+Append User Data
+----------------
+
+Custom user data can be appended to the end of messages with netconsole
+dynamic configuration enabled. User data entries can be modified without
+changing the "enabled" attribute of a target.
+
+Directories (keys) under `userdata` are limited to 53 character length, and
+data in `userdata/<key>/value` are limited to 200 bytes::
+
+ cd /sys/kernel/config/netconsole && mkdir cmdline0
+ cd cmdline0
+ mkdir userdata/foo
+ echo bar > userdata/foo/value
+ mkdir userdata/qux
+ echo baz > userdata/qux/value
+
+Messages will now include this additional user data::
+
+ echo "This is a message" > /dev/kmsg
+
+Sends::
+
+ 12,607,22085407756,-;This is a message
+ foo=bar
+ qux=baz
+
+Preview the userdata that will be appended with::
+
+ cd /sys/kernel/config/netconsole/cmdline0/userdata
+ for f in `ls userdata`; do echo $f=$(cat userdata/$f/value); done
+
+If a `userdata` entry is created but no data is written to the `value` file,
+the entry will be omitted from netconsole messages::
+
+ cd /sys/kernel/config/netconsole && mkdir cmdline0
+ cd cmdline0
+ mkdir userdata/foo
+ echo bar > userdata/foo/value
+ mkdir userdata/qux
+
+The `qux` key is omitted since it has no value::
+
+ echo "This is a message" > /dev/kmsg
+ 12,607,22085407756,-;This is a message
+ foo=bar
+
+Delete `userdata` entries with `rmdir`::
+
+ rmdir /sys/kernel/config/netconsole/cmdline0/userdata/qux
+
+.. warning::
+ When writing strings to user data values, input is broken up per line in
+ configfs store calls and this can cause confusing behavior::
+
+ mkdir userdata/testing
+ printf "val1\nval2" > userdata/testing/value
+ # userdata store value is called twice, first with "val1\n" then "val2"
+ # so "val2" is stored, being the last value stored
+ cat userdata/testing/value
+ val2
+
+ It is recommended to not write user data values with newlines.
+
Extended console:
=================
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index 9e4cccb90b87..c2476917a6c3 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -252,8 +252,8 @@ ndo_eth_ioctl:
Context: process
ndo_get_stats:
- Synchronization: rtnl_lock() semaphore, dev_base_lock rwlock, or RCU.
- Context: atomic (can't sleep under rwlock or RCU)
+ Synchronization: rtnl_lock() semaphore, or RCU.
+ Context: atomic (can't sleep under RCU)
ndo_start_xmit:
Synchronization: __netif_tx_lock spinlock.
diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst
index 8054d33f449f..5bf285d73e8a 100644
--- a/Documentation/networking/sfp-phylink.rst
+++ b/Documentation/networking/sfp-phylink.rst
@@ -231,16 +231,136 @@ this documentation.
For further information on these methods, please see the inline
documentation in :c:type:`struct phylink_mac_ops <phylink_mac_ops>`.
-9. Remove calls to of_parse_phandle() for the PHY,
- of_phy_register_fixed_link() for fixed links etc. from the probe
- function, and replace with:
+9. Fill-in the :c:type:`struct phylink_config <phylink_config>` fields with
+ a reference to the :c:type:`struct device <device>` associated to your
+ :c:type:`struct net_device <net_device>`:
.. code-block:: c
- struct phylink *phylink;
priv->phylink_config.dev = &dev.dev;
priv->phylink_config.type = PHYLINK_NETDEV;
+ Fill-in the various speeds, pause and duplex modes your MAC can handle:
+
+ .. code-block:: c
+
+ priv->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD;
+
+10. Some Ethernet controllers work in pair with a PCS (Physical Coding Sublayer)
+ block, that can handle among other things the encoding/decoding, link
+ establishment detection and autonegotiation. While some MACs have internal
+ PCS whose operation is transparent, some other require dedicated PCS
+ configuration for the link to become functional. In that case, phylink
+ provides a PCS abstraction through :c:type:`struct phylink_pcs <phylink_pcs>`.
+
+ Identify if your driver has one or more internal PCS blocks, and/or if
+ your controller can use an external PCS block that might be internally
+ connected to your controller.
+
+ If your controller doesn't have any internal PCS, you can go to step 11.
+
+ If your Ethernet controller contains one or several PCS blocks, create
+ one :c:type:`struct phylink_pcs <phylink_pcs>` instance per PCS block within
+ your driver's private data structure:
+
+ .. code-block:: c
+
+ struct phylink_pcs pcs;
+
+ Populate the relevant :c:type:`struct phylink_pcs_ops <phylink_pcs_ops>` to
+ configure your PCS. Create a :c:func:`pcs_get_state` function that reports
+ the inband link state, a :c:func:`pcs_config` function to configure your
+ PCS according to phylink-provided parameters, and a :c:func:`pcs_validate`
+ function that report to phylink all accepted configuration parameters for
+ your PCS:
+
+ .. code-block:: c
+
+ struct phylink_pcs_ops foo_pcs_ops = {
+ .pcs_validate = foo_pcs_validate,
+ .pcs_get_state = foo_pcs_get_state,
+ .pcs_config = foo_pcs_config,
+ };
+
+ Arrange for PCS link state interrupts to be forwarded into
+ phylink, via:
+
+ .. code-block:: c
+
+ phylink_pcs_change(pcs, link_is_up);
+
+ where ``link_is_up`` is true if the link is currently up or false
+ otherwise. If a PCS is unable to provide these interrupts, then
+ it should set ``pcs->pcs_poll = true;`` when creating the PCS.
+
+11. If your controller relies on, or accepts the presence of an external PCS
+ controlled through its own driver, add a pointer to a phylink_pcs instance
+ in your driver private data structure:
+
+ .. code-block:: c
+
+ struct phylink_pcs *pcs;
+
+ The way of getting an instance of the actual PCS depends on the platform,
+ some PCS sit on an MDIO bus and are grabbed by passing a pointer to the
+ corresponding :c:type:`struct mii_bus <mii_bus>` and the PCS's address on
+ that bus. In this example, we assume the controller attaches to a Lynx PCS
+ instance:
+
+ .. code-block:: c
+
+ priv->pcs = lynx_pcs_create_mdiodev(bus, 0);
+
+ Some PCS can be recovered based on firmware information:
+
+ .. code-block:: c
+
+ priv->pcs = lynx_pcs_create_fwnode(of_fwnode_handle(node));
+
+12. Populate the :c:func:`mac_select_pcs` callback and add it to your
+ :c:type:`struct phylink_mac_ops <phylink_mac_ops>` set of ops. This function
+ must return a pointer to the relevant :c:type:`struct phylink_pcs <phylink_pcs>`
+ that will be used for the requested link configuration:
+
+ .. code-block:: c
+
+ static struct phylink_pcs *foo_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
+ {
+ struct foo_priv *priv = container_of(config, struct foo_priv,
+ phylink_config);
+
+ if ( /* 'interface' needs a PCS to function */ )
+ return priv->pcs;
+
+ return NULL;
+ }
+
+ See :c:func:`mvpp2_select_pcs` for an example of a driver that has multiple
+ internal PCS.
+
+13. Fill-in all the :c:type:`phy_interface_t <phy_interface_t>` (i.e. all MAC to
+ PHY link modes) that your MAC can output. The following example shows a
+ configuration for a MAC that can handle all RGMII modes, SGMII and 1000BaseX.
+ You must adjust these according to what your MAC and all PCS associated
+ with this MAC are capable of, and not just the interface you wish to use:
+
+ .. code-block:: c
+
+ phy_interface_set_rgmii(priv->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_SGMII,
+ priv->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_1000BASEX,
+ priv->phylink_config.supported_interfaces);
+
+14. Remove calls to of_parse_phandle() for the PHY,
+ of_phy_register_fixed_link() for fixed links etc. from the probe
+ function, and replace with:
+
+ .. code-block:: c
+
+ struct phylink *phylink;
+
phylink = phylink_create(&priv->phylink_config, node, phy_mode, &phylink_ops);
if (IS_ERR(phylink)) {
err = PTR_ERR(phylink);
@@ -249,14 +369,14 @@ this documentation.
priv->phylink = phylink;
- and arrange to destroy the phylink in the probe failure path as
- appropriate and the removal path too by calling:
+ and arrange to destroy the phylink in the probe failure path as
+ appropriate and the removal path too by calling:
- .. code-block:: c
+ .. code-block:: c
phylink_destroy(priv->phylink);
-10. Arrange for MAC link state interrupts to be forwarded into
+15. Arrange for MAC link state interrupts to be forwarded into
phylink, via:
.. code-block:: c
@@ -264,17 +384,16 @@ this documentation.
phylink_mac_change(priv->phylink, link_is_up);
where ``link_is_up`` is true if the link is currently up or false
- otherwise. If a MAC is unable to provide these interrupts, then
- it should set ``priv->phylink_config.pcs_poll = true;`` in step 9.
+ otherwise.
-11. Verify that the driver does not call::
+16. Verify that the driver does not call::
netif_carrier_on()
netif_carrier_off()
- as these will interfere with phylink's tracking of the link state,
- and cause phylink to omit calls via the :c:func:`mac_link_up` and
- :c:func:`mac_link_down` methods.
+ as these will interfere with phylink's tracking of the link state,
+ and cause phylink to omit calls via the :c:func:`mac_link_up` and
+ :c:func:`mac_link_down` methods.
Network drivers should call phylink_stop() and phylink_start() via their
suspend/resume paths, which ensures that the appropriate
diff --git a/Documentation/networking/statistics.rst b/Documentation/networking/statistics.rst
index 551b3cc29a41..75e017dfa825 100644
--- a/Documentation/networking/statistics.rst
+++ b/Documentation/networking/statistics.rst
@@ -41,6 +41,15 @@ If `-s` is specified once the detailed errors won't be shown.
`ip` supports JSON formatting via the `-j` option.
+Queue statistics
+~~~~~~~~~~~~~~~~
+
+Queue statistics are accessible via the netdev netlink family.
+
+Currently no widely distributed CLI exists to access those statistics.
+Kernel development tools (ynl) can be used to experiment with them,
+see `Documentation/userspace-api/netlink/intro-specs.rst`.
+
Protocol-specific statistics
----------------------------
@@ -147,6 +156,12 @@ Statistics are reported both in the responses to link information
requests (`RTM_GETLINK`) and statistic requests (`RTM_GETSTATS`,
when `IFLA_STATS_LINK_64` bit is set in the `.filter_mask` of the request).
+netdev (netlink)
+~~~~~~~~~~~~~~~~
+
+`netdev` generic netlink family allows accessing page pool and per queue
+statistics.
+
ethtool
-------
diff --git a/Documentation/networking/xfrm_device.rst b/Documentation/networking/xfrm_device.rst
index 535077cbeb07..bfea9d8579ed 100644
--- a/Documentation/networking/xfrm_device.rst
+++ b/Documentation/networking/xfrm_device.rst
@@ -71,9 +71,9 @@ Callbacks to implement
bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
struct xfrm_state *x);
void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
+ void (*xdo_dev_state_update_stats) (struct xfrm_state *x);
/* Solely packet offload callbacks */
- void (*xdo_dev_state_update_curlft) (struct xfrm_state *x);
int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack);
void (*xdo_dev_policy_delete) (struct xfrm_policy *x);
void (*xdo_dev_policy_free) (struct xfrm_policy *x);
@@ -191,6 +191,6 @@ xdo_dev_policy_free() on any remaining offloaded states.
Outcome of HW handling packets, the XFRM core can't count hard, soft limits.
The HW/driver are responsible to perform it and provide accurate data when
-xdo_dev_state_update_curlft() is called. In case of one of these limits
+xdo_dev_state_update_stats() is called. In case of one of these limits
occuried, the driver needs to call to xfrm_state_check_expire() to make sure
that XFRM performs rekeying sequence.
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 50b3d1cb1115..ca611c9c2d1e 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -31,7 +31,7 @@ you probably needn't concern yourself with pcmciautils.
====================== =============== ========================================
GNU C 5.1 gcc --version
Clang/LLVM (optional) 11.0.0 clang --version
-Rust (optional) 1.74.1 rustc --version
+Rust (optional) 1.76.0 rustc --version
bindgen (optional) 0.65.1 bindgen --version
GNU make 3.82 make --version
bash 4.2 bash --version
@@ -144,8 +144,8 @@ Bison
Since Linux 4.16, the build system generates parsers
during build. This requires bison 2.0 or later.
-pahole:
--------
+pahole
+------
Since Linux 5.2, if CONFIG_DEBUG_INFO_BTF is selected, the build system
generates BTF (BPF Type Format) from DWARF in vmlinux, a bit later from kernel
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index c48382c6b477..9c7cf7347394 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -203,7 +203,7 @@ Do not unnecessarily use braces where a single statement will do.
and
-.. code-block:: none
+.. code-block:: c
if (condition)
do_this();
@@ -586,9 +586,9 @@ fix for this is to split it up into two error labels ``err_free_bar:`` and
.. code-block:: c
- err_free_bar:
+ err_free_bar:
kfree(foo->bar);
- err_free_foo:
+ err_free_foo:
kfree(foo);
return ret;
@@ -660,7 +660,7 @@ make a good program).
So, you can either get rid of GNU emacs, or change it to use saner
values. To do the latter, you can stick the following in your .emacs file:
-.. code-block:: none
+.. code-block:: elisp
(defun c-lineup-arglist-tabs-only (ignored)
"Line up argument lists by tabs, not spaces"
@@ -679,7 +679,7 @@ values. To do the latter, you can stick the following in your .emacs file:
(c-offsets-alist . (
(arglist-close . c-lineup-arglist-tabs-only)
(arglist-cont-nonempty .
- (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only))
+ (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only))
(arglist-intro . +)
(brace-list-intro . +)
(c . c-lineup-C-comments)
@@ -899,7 +899,8 @@ which you should use to make sure messages are matched to the right device
and driver, and are tagged with the right level: dev_err(), dev_warn(),
dev_info(), and so forth. For messages that aren't associated with a
particular device, <linux/printk.h> defines pr_notice(), pr_info(),
-pr_warn(), pr_err(), etc.
+pr_warn(), pr_err(), etc. When drivers are working properly they are quiet,
+so prefer to use dev_dbg/pr_debug unless something is wrong.
Coming up with good debugging messages can be quite a challenge; and once
you have them, they can be a huge help for remote troubleshooting. However
diff --git a/Documentation/process/cve.rst b/Documentation/process/cve.rst
new file mode 100644
index 000000000000..5e2753eff729
--- /dev/null
+++ b/Documentation/process/cve.rst
@@ -0,0 +1,121 @@
+====
+CVEs
+====
+
+Common Vulnerabilities and Exposure (CVE®) numbers were developed as an
+unambiguous way to identify, define, and catalog publicly disclosed
+security vulnerabilities. Over time, their usefulness has declined with
+regards to the kernel project, and CVE numbers were very often assigned
+in inappropriate ways and for inappropriate reasons. Because of this,
+the kernel development community has tended to avoid them. However, the
+combination of continuing pressure to assign CVEs and other forms of
+security identifiers, and ongoing abuses by individuals and companies
+outside of the kernel community has made it clear that the kernel
+community should have control over those assignments.
+
+The Linux kernel developer team does have the ability to assign CVEs for
+potential Linux kernel security issues. This assignment is independent
+of the :doc:`normal Linux kernel security bug reporting
+process<../process/security-bugs>`.
+
+A list of all assigned CVEs for the Linux kernel can be found in the
+archives of the linux-cve mailing list, as seen on
+https://lore.kernel.org/linux-cve-announce/. To get notice of the
+assigned CVEs, please `subscribe
+<https://subspace.kernel.org/subscribing.html>`_ to that mailing list.
+
+Process
+=======
+
+As part of the normal stable release process, kernel changes that are
+potentially security issues are identified by the developers responsible
+for CVE number assignments and have CVE numbers automatically assigned
+to them. These assignments are published on the linux-cve-announce
+mailing list as announcements on a frequent basis.
+
+Note, due to the layer at which the Linux kernel is in a system, almost
+any bug might be exploitable to compromise the security of the kernel,
+but the possibility of exploitation is often not evident when the bug is
+fixed. Because of this, the CVE assignment team is overly cautious and
+assign CVE numbers to any bugfix that they identify. This
+explains the seemingly large number of CVEs that are issued by the Linux
+kernel team.
+
+If the CVE assignment team misses a specific fix that any user feels
+should have a CVE assigned to it, please email them at <cve@kernel.org>
+and the team there will work with you on it. Note that no potential
+security issues should be sent to this alias, it is ONLY for assignment
+of CVEs for fixes that are already in released kernel trees. If you
+feel you have found an unfixed security issue, please follow the
+:doc:`normal Linux kernel security bug reporting
+process<../process/security-bugs>`.
+
+No CVEs will be automatically assigned for unfixed security issues in
+the Linux kernel; assignment will only automatically happen after a fix
+is available and applied to a stable kernel tree, and it will be tracked
+that way by the git commit id of the original fix. If anyone wishes to
+have a CVE assigned before an issue is resolved with a commit, please
+contact the kernel CVE assignment team at <cve@kernel.org> to get an
+identifier assigned from their batch of reserved identifiers.
+
+No CVEs will be assigned for any issue found in a version of the kernel
+that is not currently being actively supported by the Stable/LTS kernel
+team. A list of the currently supported kernel branches can be found at
+https://kernel.org/releases.html
+
+Disputes of assigned CVEs
+=========================
+
+The authority to dispute or modify an assigned CVE for a specific kernel
+change lies solely with the maintainers of the relevant subsystem
+affected. This principle ensures a high degree of accuracy and
+accountability in vulnerability reporting. Only those individuals with
+deep expertise and intimate knowledge of the subsystem can effectively
+assess the validity and scope of a reported vulnerability and determine
+its appropriate CVE designation. Any attempt to modify or dispute a CVE
+outside of this designated authority could lead to confusion, inaccurate
+reporting, and ultimately, compromised systems.
+
+Invalid CVEs
+============
+
+If a security issue is found in a Linux kernel that is only supported by
+a Linux distribution due to the changes that have been made by that
+distribution, or due to the distribution supporting a kernel version
+that is no longer one of the kernel.org supported releases, then a CVE
+can not be assigned by the Linux kernel CVE team, and must be asked for
+from that Linux distribution itself.
+
+Any CVE that is assigned against the Linux kernel for an actively
+supported kernel version, by any group other than the kernel assignment
+CVE team should not be treated as a valid CVE. Please notify the
+kernel CVE assignment team at <cve@kernel.org> so that they can work to
+invalidate such entries through the CNA remediation process.
+
+Applicability of specific CVEs
+==============================
+
+As the Linux kernel can be used in many different ways, with many
+different ways of accessing it by external users, or no access at all,
+the applicability of any specific CVE is up to the user of Linux to
+determine, it is not up to the CVE assignment team. Please do not
+contact us to attempt to determine the applicability of any specific
+CVE.
+
+Also, as the source tree is so large, and any one system only uses a
+small subset of the source tree, any users of Linux should be aware that
+large numbers of assigned CVEs are not relevant for their systems.
+
+In short, we do not know your use case, and we do not know what portions
+of the kernel that you use, so there is no way for us to determine if a
+specific CVE is relevant for your system.
+
+As always, it is best to take all released kernel changes, as they are
+tested together in a unified whole by many community members, and not as
+individual cherry-picked changes. Also note that for many bugs, the
+solution to the overall problem is not found in a single change, but by
+the sum of many fixes on top of each other. Ideally CVEs will be
+assigned to all fixes for all issues, but sometimes we will fail to
+notice fixes, therefore assume that some changes without a CVE assigned
+might be relevant to take.
+
diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst
index 31000f075707..bb2100228cc7 100644
--- a/Documentation/process/embargoed-hardware-issues.rst
+++ b/Documentation/process/embargoed-hardware-issues.rst
@@ -255,7 +255,7 @@ an involved disclosed party. The current ambassadors list:
IBM Power Anton Blanchard <anton@linux.ibm.com>
IBM Z Christian Borntraeger <borntraeger@de.ibm.com>
Intel Tony Luck <tony.luck@intel.com>
- Qualcomm Trilok Soni <tsoni@codeaurora.org>
+ Qualcomm Trilok Soni <quic_tsoni@quicinc.com>
RISC-V Palmer Dabbelt <palmer@dabbelt.com>
Samsung Javier González <javier.gonz@samsung.com>
diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst
index 6c73889c98fc..eebda4910a88 100644
--- a/Documentation/process/howto.rst
+++ b/Documentation/process/howto.rst
@@ -351,8 +351,8 @@ Managing bug reports
--------------------
One of the best ways to put into practice your hacking skills is by fixing
-bugs reported by other people. Not only you will help to make the kernel
-more stable, but you'll also learn to fix real world problems and you will
+bugs reported by other people. Not only will you help to make the kernel
+more stable, but you'll also learn to fix real-world problems and you will
improve your skills, and other developers will be aware of your presence.
Fixing bugs is one of the best ways to get merits among other developers,
because not many people like wasting time fixing other people's bugs.
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 6cb732dfcc72..de9cbb7bd7eb 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -81,6 +81,7 @@ of special classes of bugs: regressions and security problems.
handling-regressions
security-bugs
+ cve
embargoed-hardware-issues
Maintainer information
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index 84ee60fceef2..fd96e4a3cef9 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -431,7 +431,7 @@ patchwork checks
Checks in patchwork are mostly simple wrappers around existing kernel
scripts, the sources are available at:
-https://github.com/kuba-moo/nipa/tree/master/tests
+https://github.com/linux-netdev/nipa/tree/master/tests
**Do not** post your patches just to run them through the checks.
You must ensure that your patches are ready by testing them locally
diff --git a/Documentation/process/maintainer-tip.rst b/Documentation/process/maintainer-tip.rst
index 08dd0f804410..497bb39727c8 100644
--- a/Documentation/process/maintainer-tip.rst
+++ b/Documentation/process/maintainer-tip.rst
@@ -304,13 +304,15 @@ following tag ordering scheme:
- Reported-by: ``Reporter <reporter@mail>``
+ - Closes: ``URL or Message-ID of the bug report this is fixing``
+
- Originally-by: ``Original author <original-author@mail>``
- Suggested-by: ``Suggester <suggester@mail>``
- Co-developed-by: ``Co-author <co-author@mail>``
- Signed-off: ``Co-author <co-author@mail>``
+ Signed-off-by: ``Co-author <co-author@mail>``
Note, that Co-developed-by and Signed-off-by of the co-author(s) must
come in pairs.
@@ -478,7 +480,7 @@ Multi-line comments::
* Larger multi-line comments should be split into paragraphs.
*/
-No tail comments:
+No tail comments (see below):
Please refrain from using tail comments. Tail comments disturb the
reading flow in almost all contexts, but especially in code::
@@ -499,6 +501,34 @@ No tail comments:
/* This magic initialization needs a comment. Maybe not? */
seed = MAGIC_CONSTANT;
+ Use C++ style, tail comments when documenting structs in headers to
+ achieve a more compact layout and better readability::
+
+ // eax
+ u32 x2apic_shift : 5, // Number of bits to shift APIC ID right
+ // for the topology ID at the next level
+ : 27; // Reserved
+ // ebx
+ u32 num_processors : 16, // Number of processors at current level
+ : 16; // Reserved
+
+ versus::
+
+ /* eax */
+ /*
+ * Number of bits to shift APIC ID right for the topology ID
+ * at the next level
+ */
+ u32 x2apic_shift : 5,
+ /* Reserved */
+ : 27;
+
+ /* ebx */
+ /* Number of processors at current level */
+ u32 num_processors : 16,
+ /* Reserved */
+ : 16;
+
Comment the important things:
Comments should be added where the operation is not obvious. Documenting
diff --git a/Documentation/process/researcher-guidelines.rst b/Documentation/process/researcher-guidelines.rst
index d159cd4f5e5b..beb484c5965d 100644
--- a/Documentation/process/researcher-guidelines.rst
+++ b/Documentation/process/researcher-guidelines.rst
@@ -167,4 +167,4 @@ If no one can be found to internally review patches and you need
help finding such a person, or if you have any other questions
related to this document and the developer community's expectations,
please reach out to the private Technical Advisory Board mailing list:
-<tech-board@lists.linux-foundation.org>.
+<tech-board@groups.linuxfoundation.org>.
diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst
index 692a3ba56cca..56c560a00b37 100644
--- a/Documentation/process/security-bugs.rst
+++ b/Documentation/process/security-bugs.rst
@@ -99,9 +99,8 @@ CVE assignment
The security team does not assign CVEs, nor do we require them for
reports or fixes, as this can needlessly complicate the process and may
delay the bug handling. If a reporter wishes to have a CVE identifier
-assigned, they should find one by themselves, for example by contacting
-MITRE directly. However under no circumstances will a patch inclusion
-be delayed to wait for a CVE identifier to arrive.
+assigned for a confirmed issue, they can contact the :doc:`kernel CVE
+assignment team<../process/cve>` to obtain one.
Non-disclosure agreements
-------------------------
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index b1bc2d37bd0a..e531dd504b6c 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -1,7 +1,8 @@
.. _submitchecklist:
+=======================================
Linux Kernel patch submission checklist
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+=======================================
Here are some basic things that developers should do if they want to see their
kernel patch submissions accepted more quickly.
@@ -10,111 +11,123 @@ These are all above and beyond the documentation that is provided in
:ref:`Documentation/process/submitting-patches.rst <submittingpatches>`
and elsewhere regarding submitting Linux kernel patches.
+Review your code
+================
1) If you use a facility then #include the file that defines/declares
that facility. Don't depend on other header files pulling in ones
that you use.
-2) Builds cleanly:
+2) Check your patch for general style as detailed in
+ :ref:`Documentation/process/coding-style.rst <codingstyle>`.
- a) with applicable or modified ``CONFIG`` options ``=y``, ``=m``, and
- ``=n``. No ``gcc`` warnings/errors, no linker warnings/errors.
+3) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a
+ comment in the source code that explains the logic of what they are doing
+ and why.
- b) Passes ``allnoconfig``, ``allmodconfig``
+Review Kconfig changes
+======================
- c) Builds successfully when using ``O=builddir``
+1) Any new or modified ``CONFIG`` options do not muck up the config menu and
+ default to off unless they meet the exception criteria documented in
+ ``Documentation/kbuild/kconfig-language.rst`` Menu attributes: default value.
- d) Any Documentation/ changes build successfully without new warnings/errors.
- Use ``make htmldocs`` or ``make pdfdocs`` to check the build and
- fix any issues.
+2) All new ``Kconfig`` options have help text.
-3) Builds on multiple CPU architectures by using local cross-compile tools
- or some other build farm.
+3) Has been carefully reviewed with respect to relevant ``Kconfig``
+ combinations. This is very hard to get right with testing---brainpower
+ pays off here.
-4) ppc64 is a good architecture for cross-compilation checking because it
- tends to use ``unsigned long`` for 64-bit quantities.
+Provide documentation
+=====================
-5) Check your patch for general style as detailed in
- :ref:`Documentation/process/coding-style.rst <codingstyle>`.
- Check for trivial violations with the patch style checker prior to
- submission (``scripts/checkpatch.pl``).
- You should be able to justify all violations that remain in
- your patch.
+1) Include :ref:`kernel-doc <kernel_doc>` to document global kernel APIs.
+ (Not required for static functions, but OK there also.)
-6) Any new or modified ``CONFIG`` options do not muck up the config menu and
- default to off unless they meet the exception criteria documented in
- ``Documentation/kbuild/kconfig-language.rst`` Menu attributes: default value.
+2) All new ``/proc`` entries are documented under ``Documentation/``
-7) All new ``Kconfig`` options have help text.
+3) All new kernel boot parameters are documented in
+ ``Documentation/admin-guide/kernel-parameters.rst``.
-8) Has been carefully reviewed with respect to relevant ``Kconfig``
- combinations. This is very hard to get right with testing -- brainpower
- pays off here.
+4) All new module parameters are documented with ``MODULE_PARM_DESC()``
-9) Check cleanly with sparse.
+5) All new userspace interfaces are documented in ``Documentation/ABI/``.
+ See ``Documentation/ABI/README`` for more information.
+ Patches that change userspace interfaces should be CCed to
+ linux-api@vger.kernel.org.
-10) Use ``make checkstack`` and fix any problems that it finds.
+6) If any ioctl's are added by the patch, then also update
+ ``Documentation/userspace-api/ioctl/ioctl-number.rst``.
- .. note::
+Check your code with tools
+==========================
- ``checkstack`` does not point out problems explicitly,
- but any one function that uses more than 512 bytes on the stack is a
- candidate for change.
+1) Check for trivial violations with the patch style checker prior to
+ submission (``scripts/checkpatch.pl``).
+ You should be able to justify all violations that remain in
+ your patch.
-11) Include :ref:`kernel-doc <kernel_doc>` to document global kernel APIs.
- (Not required for static functions, but OK there also.) Use
- ``make htmldocs`` or ``make pdfdocs`` to check the
- :ref:`kernel-doc <kernel_doc>` and fix any issues.
+2) Check cleanly with sparse.
-12) Has been tested with ``CONFIG_PREEMPT``, ``CONFIG_DEBUG_PREEMPT``,
- ``CONFIG_DEBUG_SLAB``, ``CONFIG_DEBUG_PAGEALLOC``, ``CONFIG_DEBUG_MUTEXES``,
- ``CONFIG_DEBUG_SPINLOCK``, ``CONFIG_DEBUG_ATOMIC_SLEEP``,
- ``CONFIG_PROVE_RCU`` and ``CONFIG_DEBUG_OBJECTS_RCU_HEAD`` all
- simultaneously enabled.
+3) Use ``make checkstack`` and fix any problems that it finds.
+ Note that ``checkstack`` does not point out problems explicitly,
+ but any one function that uses more than 512 bytes on the stack is a
+ candidate for change.
-13) Has been build- and runtime tested with and without ``CONFIG_SMP`` and
- ``CONFIG_PREEMPT.``
+Build your code
+===============
-14) All codepaths have been exercised with all lockdep features enabled.
+1) Builds cleanly:
-15) All new ``/proc`` entries are documented under ``Documentation/``
+ a) with applicable or modified ``CONFIG`` options ``=y``, ``=m``, and
+ ``=n``. No ``gcc`` warnings/errors, no linker warnings/errors.
-16) All new kernel boot parameters are documented in
- ``Documentation/admin-guide/kernel-parameters.rst``.
+ b) Passes ``allnoconfig``, ``allmodconfig``
+
+ c) Builds successfully when using ``O=builddir``
+
+ d) Any Documentation/ changes build successfully without new warnings/errors.
+ Use ``make htmldocs`` or ``make pdfdocs`` to check the build and
+ fix any issues.
-17) All new module parameters are documented with ``MODULE_PARM_DESC()``
+2) Builds on multiple CPU architectures by using local cross-compile tools
+ or some other build farm. Note that ppc64 is a good architecture for
+ cross-compilation checking because it tends to use ``unsigned long`` for
+ 64-bit quantities.
-18) All new userspace interfaces are documented in ``Documentation/ABI/``.
- See ``Documentation/ABI/README`` for more information.
- Patches that change userspace interfaces should be CCed to
- linux-api@vger.kernel.org.
+3) Newly-added code has been compiled with ``gcc -W`` (use
+ ``make KCFLAGS=-W``). This will generate lots of noise, but is good
+ for finding bugs like "warning: comparison between signed and unsigned".
-19) Has been checked with injection of at least slab and page-allocation
- failures. See ``Documentation/fault-injection/``.
+4) If your modified source code depends on or uses any of the kernel
+ APIs or features that are related to the following ``Kconfig`` symbols,
+ then test multiple builds with the related ``Kconfig`` symbols disabled
+ and/or ``=m`` (if that option is available) [not all of these at the
+ same time, just various/random combinations of them]:
- If the new code is substantial, addition of subsystem-specific fault
- injection might be appropriate.
+ ``CONFIG_SMP``, ``CONFIG_SYSFS``, ``CONFIG_PROC_FS``, ``CONFIG_INPUT``,
+ ``CONFIG_PCI``, ``CONFIG_BLOCK``, ``CONFIG_PM``, ``CONFIG_MAGIC_SYSRQ``,
+ ``CONFIG_NET``, ``CONFIG_INET=n`` (but latter with ``CONFIG_NET=y``).
-20) Newly-added code has been compiled with ``gcc -W`` (use
- ``make KCFLAGS=-W``). This will generate lots of noise, but is good
- for finding bugs like "warning: comparison between signed and unsigned".
+Test your code
+==============
-21) Tested after it has been merged into the -mm patchset to make sure
- that it still works with all of the other queued patches and various
- changes in the VM, VFS, and other subsystems.
+1) Has been tested with ``CONFIG_PREEMPT``, ``CONFIG_DEBUG_PREEMPT``,
+ ``CONFIG_SLUB_DEBUG``, ``CONFIG_DEBUG_PAGEALLOC``, ``CONFIG_DEBUG_MUTEXES``,
+ ``CONFIG_DEBUG_SPINLOCK``, ``CONFIG_DEBUG_ATOMIC_SLEEP``,
+ ``CONFIG_PROVE_RCU`` and ``CONFIG_DEBUG_OBJECTS_RCU_HEAD`` all
+ simultaneously enabled.
-22) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a
- comment in the source code that explains the logic of what they are doing
- and why.
+2) Has been build- and runtime tested with and without ``CONFIG_SMP`` and
+ ``CONFIG_PREEMPT.``
-23) If any ioctl's are added by the patch, then also update
- ``Documentation/userspace-api/ioctl/ioctl-number.rst``.
+3) All codepaths have been exercised with all lockdep features enabled.
-24) If your modified source code depends on or uses any of the kernel
- APIs or features that are related to the following ``Kconfig`` symbols,
- then test multiple builds with the related ``Kconfig`` symbols disabled
- and/or ``=m`` (if that option is available) [not all of these at the
- same time, just various/random combinations of them]:
+4) Has been checked with injection of at least slab and page-allocation
+ failures. See ``Documentation/fault-injection/``.
+ If the new code is substantial, addition of subsystem-specific fault
+ injection might be appropriate.
- ``CONFIG_SMP``, ``CONFIG_SYSFS``, ``CONFIG_PROC_FS``, ``CONFIG_INPUT``, ``CONFIG_PCI``, ``CONFIG_BLOCK``, ``CONFIG_PM``, ``CONFIG_MAGIC_SYSRQ``,
- ``CONFIG_NET``, ``CONFIG_INET=n`` (but latter with ``CONFIG_NET=y``).
+5) Tested with the most recent tag of linux-next to make sure that it still
+ works with all of the other queued patches and various changes in the VM,
+ VFS, and other subsystems.
diff --git a/Documentation/rust/general-information.rst b/Documentation/rust/general-information.rst
index 236c6dd3c647..081397827a7e 100644
--- a/Documentation/rust/general-information.rst
+++ b/Documentation/rust/general-information.rst
@@ -77,27 +77,3 @@ configuration:
#[cfg(CONFIG_X="y")] // Enabled as a built-in (`y`)
#[cfg(CONFIG_X="m")] // Enabled as a module (`m`)
#[cfg(not(CONFIG_X))] // Disabled
-
-
-Testing
--------
-
-There are the tests that come from the examples in the Rust documentation
-and get transformed into KUnit tests. These can be run via KUnit. For example
-via ``kunit_tool`` (``kunit.py``) on the command line::
-
- ./tools/testing/kunit/kunit.py run --make_options LLVM=1 --arch x86_64 --kconfig_add CONFIG_RUST=y
-
-Alternatively, KUnit can run them as kernel built-in at boot. Refer to
-Documentation/dev-tools/kunit/index.rst for the general KUnit documentation
-and Documentation/dev-tools/kunit/architecture.rst for the details of kernel
-built-in vs. command line testing.
-
-Additionally, there are the ``#[test]`` tests. These can be run using
-the ``rusttest`` Make target::
-
- make LLVM=1 rusttest
-
-This requires the kernel ``.config`` and downloads external repositories.
-It runs the ``#[test]`` tests on the host (currently) and thus is fairly
-limited in what these tests can test.
diff --git a/Documentation/rust/index.rst b/Documentation/rust/index.rst
index 965f2db529e0..46d35bd395cf 100644
--- a/Documentation/rust/index.rst
+++ b/Documentation/rust/index.rst
@@ -40,6 +40,7 @@ configurations.
general-information
coding-guidelines
arch-support
+ testing
.. only:: subproject and html
diff --git a/Documentation/rust/testing.rst b/Documentation/rust/testing.rst
new file mode 100644
index 000000000000..6658998d1b6c
--- /dev/null
+++ b/Documentation/rust/testing.rst
@@ -0,0 +1,135 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Testing
+=======
+
+This document contains useful information how to test the Rust code in the
+kernel.
+
+There are two sorts of tests:
+
+- The KUnit tests.
+- The ``#[test]`` tests.
+
+The KUnit tests
+---------------
+
+These are the tests that come from the examples in the Rust documentation. They
+get transformed into KUnit tests.
+
+Usage
+*****
+
+These tests can be run via KUnit. For example via ``kunit_tool`` (``kunit.py``)
+on the command line::
+
+ ./tools/testing/kunit/kunit.py run --make_options LLVM=1 --arch x86_64 --kconfig_add CONFIG_RUST=y
+
+Alternatively, KUnit can run them as kernel built-in at boot. Refer to
+Documentation/dev-tools/kunit/index.rst for the general KUnit documentation
+and Documentation/dev-tools/kunit/architecture.rst for the details of kernel
+built-in vs. command line testing.
+
+To use these KUnit doctests, the following must be enabled::
+
+ CONFIG_KUNIT
+ Kernel hacking -> Kernel Testing and Coverage -> KUnit - Enable support for unit tests
+ CONFIG_RUST_KERNEL_DOCTESTS
+ Kernel hacking -> Rust hacking -> Doctests for the `kernel` crate
+
+in the kernel config system.
+
+KUnit tests are documentation tests
+***********************************
+
+These documentation tests are typically examples of usage of any item (e.g.
+function, struct, module...).
+
+They are very convenient because they are just written alongside the
+documentation. For instance:
+
+.. code-block:: rust
+
+ /// Sums two numbers.
+ ///
+ /// ```
+ /// assert_eq!(mymod::f(10, 20), 30);
+ /// ```
+ pub fn f(a: i32, b: i32) -> i32 {
+ a + b
+ }
+
+In userspace, the tests are collected and run via ``rustdoc``. Using the tool
+as-is would be useful already, since it allows verifying that examples compile
+(thus enforcing they are kept in sync with the code they document) and as well
+as running those that do not depend on in-kernel APIs.
+
+For the kernel, however, these tests get transformed into KUnit test suites.
+This means that doctests get compiled as Rust kernel objects, allowing them to
+run against a built kernel.
+
+A benefit of this KUnit integration is that Rust doctests get to reuse existing
+testing facilities. For instance, the kernel log would look like::
+
+ KTAP version 1
+ 1..1
+ KTAP version 1
+ # Subtest: rust_doctests_kernel
+ 1..59
+ # rust_doctest_kernel_build_assert_rs_0.location: rust/kernel/build_assert.rs:13
+ ok 1 rust_doctest_kernel_build_assert_rs_0
+ # rust_doctest_kernel_build_assert_rs_1.location: rust/kernel/build_assert.rs:56
+ ok 2 rust_doctest_kernel_build_assert_rs_1
+ # rust_doctest_kernel_init_rs_0.location: rust/kernel/init.rs:122
+ ok 3 rust_doctest_kernel_init_rs_0
+ ...
+ # rust_doctest_kernel_types_rs_2.location: rust/kernel/types.rs:150
+ ok 59 rust_doctest_kernel_types_rs_2
+ # rust_doctests_kernel: pass:59 fail:0 skip:0 total:59
+ # Totals: pass:59 fail:0 skip:0 total:59
+ ok 1 rust_doctests_kernel
+
+Tests using the `? <https://doc.rust-lang.org/reference/expressions/operator-expr.html#the-question-mark-operator>`_
+operator are also supported as usual, e.g.:
+
+.. code-block:: rust
+
+ /// ```
+ /// # use kernel::{spawn_work_item, workqueue};
+ /// spawn_work_item!(workqueue::system(), || pr_info!("x"))?;
+ /// # Ok::<(), Error>(())
+ /// ```
+
+The tests are also compiled with Clippy under ``CLIPPY=1``, just like normal
+code, thus also benefitting from extra linting.
+
+In order for developers to easily see which line of doctest code caused a
+failure, a KTAP diagnostic line is printed to the log. This contains the
+location (file and line) of the original test (i.e. instead of the location in
+the generated Rust file)::
+
+ # rust_doctest_kernel_types_rs_2.location: rust/kernel/types.rs:150
+
+Rust tests appear to assert using the usual ``assert!`` and ``assert_eq!``
+macros from the Rust standard library (``core``). We provide a custom version
+that forwards the call to KUnit instead. Importantly, these macros do not
+require passing context, unlike those for KUnit testing (i.e.
+``struct kunit *``). This makes them easier to use, and readers of the
+documentation do not need to care about which testing framework is used. In
+addition, it may allow us to test third-party code more easily in the future.
+
+A current limitation is that KUnit does not support assertions in other tasks.
+Thus, we presently simply print an error to the kernel log if an assertion
+actually failed. Additionally, doctests are not run for nonpublic functions.
+
+The ``#[test]`` tests
+---------------------
+
+Additionally, there are the ``#[test]`` tests. These can be run using the
+``rusttest`` Make target::
+
+ make LLVM=1 rusttest
+
+This requires the kernel ``.config`` and downloads external repositories. It
+runs the ``#[test]`` tests on the host (currently) and thus is fairly limited in
+what these tests can test.
diff --git a/Documentation/sphinx/kernel_feat.py b/Documentation/sphinx/kernel_feat.py
index b9df61eb4501..03ace5f01b5c 100644
--- a/Documentation/sphinx/kernel_feat.py
+++ b/Documentation/sphinx/kernel_feat.py
@@ -109,7 +109,7 @@ class KernelFeat(Directive):
else:
out_lines += line + "\n"
- nodeList = self.nestedParse(out_lines, fname)
+ nodeList = self.nestedParse(out_lines, self.arguments[0])
return nodeList
def nestedParse(self, lines, fname):
diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty
index 9707e033c8c4..3092df051c95 100644
--- a/Documentation/sphinx/kerneldoc-preamble.sty
+++ b/Documentation/sphinx/kerneldoc-preamble.sty
@@ -54,9 +54,7 @@
\renewcommand*\l@section{\@dottedtocline{1}{2.4em}{3.2em}}
\renewcommand*\l@subsection{\@dottedtocline{2}{5.6em}{4.3em}}
\makeatother
-%% Sphinx < 1.8 doesn't have \sphinxtableofcontentshook
-\providecommand{\sphinxtableofcontentshook}{}
-%% Undefine it for compatibility with Sphinx 1.7.9
+%% Prevent default \sphinxtableofcontentshook from overwriting above tweaks.
\renewcommand{\sphinxtableofcontentshook}{} % Empty the hook
% Prevent column squeezing of tabulary. \tymin is set by Sphinx as:
@@ -136,9 +134,6 @@
}
\newCJKfontfamily[JPsans]\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
\newCJKfontfamily[JPmono]\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
- % Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
- \providecommand{\onehalfspacing}{}
- \providecommand{\singlespacing}{}
% Define custom macros to on/off CJK
%% One and half spacing for CJK contents
\newcommand{\kerneldocCJKon}{\makexeCJKactive\onehalfspacing}
diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py
index 7acf09963daa..ec1ddfff1863 100644
--- a/Documentation/sphinx/kerneldoc.py
+++ b/Documentation/sphinx/kerneldoc.py
@@ -61,9 +61,9 @@ class KernelDocDirective(Directive):
env = self.state.document.settings.env
cmd = [env.config.kerneldoc_bin, '-rst', '-enable-lineno']
- # Pass the version string to kernel-doc, as it needs to use a different
- # dialect, depending what the C domain supports for each specific
- # Sphinx versions
+ # Pass the version string to kernel-doc, as it needs to use a different
+ # dialect, depending what the C domain supports for each specific
+ # Sphinx versions
cmd += ['-sphinx-version', sphinx.__version__]
filename = env.config.kerneldoc_srctree + '/' + self.arguments[0]
diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt
index 5d47ed443949..5017f307c8a4 100644
--- a/Documentation/sphinx/requirements.txt
+++ b/Documentation/sphinx/requirements.txt
@@ -1,6 +1,3 @@
-# jinja2>=3.1 is not compatible with Sphinx<4.0
-jinja2<3.1
-# alabaster>=0.7.14 is not compatible with Sphinx<=3.3
-alabaster<0.7.14
-Sphinx==2.4.4
+alabaster
+Sphinx
pyyaml
diff --git a/Documentation/sphinx/templates/kernel-toc.html b/Documentation/sphinx/templates/kernel-toc.html
index b58efa99df52..41f1efbe64bb 100644
--- a/Documentation/sphinx/templates/kernel-toc.html
+++ b/Documentation/sphinx/templates/kernel-toc.html
@@ -12,5 +12,7 @@
<script type="text/javascript"> <!--
var sbar = document.getElementsByClassName("sphinxsidebar")[0];
let currents = document.getElementsByClassName("current")
- sbar.scrollTop = currents[currents.length - 1].offsetTop;
+ if (currents.length) {
+ sbar.scrollTop = currents[currents.length - 1].offsetTop;
+ }
--> </script>
diff --git a/Documentation/sphinx/translations.py b/Documentation/sphinx/translations.py
index 47161e6eba99..32c2b32b2b5e 100644
--- a/Documentation/sphinx/translations.py
+++ b/Documentation/sphinx/translations.py
@@ -29,10 +29,7 @@ all_languages = {
}
class LanguagesNode(nodes.Element):
- def __init__(self, current_language, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- self.current_language = current_language
+ pass
class TranslationsTransform(Transform):
default_priority = 900
@@ -49,7 +46,8 @@ class TranslationsTransform(Transform):
# normalize docname to be the untranslated one
docname = os.path.join(*components[2:])
- new_nodes = LanguagesNode(all_languages[this_lang_code])
+ new_nodes = LanguagesNode()
+ new_nodes['current_language'] = all_languages[this_lang_code]
for lang_code, lang_name in all_languages.items():
if lang_code == this_lang_code:
@@ -84,7 +82,7 @@ def process_languages(app, doctree, docname):
html_content = app.builder.templates.render('translations.html',
context={
- 'current_language': node.current_language,
+ 'current_language': node['current_language'],
'languages': languages,
})
diff --git a/Documentation/staging/rpmsg.rst b/Documentation/staging/rpmsg.rst
index dba3e5f65612..3713adaa1608 100644
--- a/Documentation/staging/rpmsg.rst
+++ b/Documentation/staging/rpmsg.rst
@@ -157,7 +157,7 @@ Returns 0 on success and an appropriate error value on failure.
int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
-sends a message across to the remote processor from a given endoint,
+sends a message across to the remote processor from a given endpoint,
to a destination address provided by the user.
The user should specify the channel, the data it wants to send,
diff --git a/Documentation/subsystem-apis.rst b/Documentation/subsystem-apis.rst
index 2d353fb8ea26..74af50d2ef7f 100644
--- a/Documentation/subsystem-apis.rst
+++ b/Documentation/subsystem-apis.rst
@@ -61,6 +61,8 @@ Storage interfaces
scsi/index
target/index
+Other subsystems
+----------------
**Fixme**: much more organizational work is needed here.
.. toctree::
diff --git a/Documentation/translations/it_IT/RCU/index.rst b/Documentation/translations/it_IT/RCU/index.rst
new file mode 100644
index 000000000000..22adf1d58752
--- /dev/null
+++ b/Documentation/translations/it_IT/RCU/index.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _it_rcu_concepts:
+
+===============
+Concetti su RCU
+===============
+
+.. toctree::
+ :maxdepth: 3
+
+ torture
+
+.. only:: subproject and html
+
+ Indici
+ ======
+
+ * :ref:`genindex`
diff --git a/Documentation/translations/it_IT/RCU/torture.rst b/Documentation/translations/it_IT/RCU/torture.rst
new file mode 100644
index 000000000000..189f7c6caebc
--- /dev/null
+++ b/Documentation/translations/it_IT/RCU/torture.rst
@@ -0,0 +1,369 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-ita.rst
+
+=============================================
+Le operazioni RCU per le verifiche *torture*
+=============================================
+
+CONFIG_RCU_TORTURE_TEST
+=======================
+
+L'opzione CONFIG_RCU_TORTURE_TEST è disponibile per tutte le implementazione di
+RCU. L'opzione creerà un modulo rcutorture che potrete caricare per avviare le
+verifiche. La verifica userà printk() per riportare lo stato, dunque potrete
+visualizzarlo con dmesg (magari usate grep per filtrare "torture"). Le verifiche
+inizieranno al caricamento, e si fermeranno alla sua rimozione.
+
+I parametri di modulo hanno tutti il prefisso "rcutortute.", vedere
+Documentation/admin-guide/kernel-parameters.txt.
+
+Rapporto
+========
+
+Il rapporto sulle verifiche si presenta nel seguente modo::
+
+ rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
+ rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
+ rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0
+ rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0
+ rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
+ rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
+
+Sulla maggior parte dei sistemi questo rapporto si produce col comando "dmesg |
+grep torture:". Su configurazioni più esoteriche potrebbe essere necessario
+usare altri comandi per visualizzare i messaggi di printk(). La funzione
+printk() usa KERN_ALERT, dunque i messaggi dovrebbero essere ben visibili. ;-)
+
+La prima e l'ultima riga mostrano i parametri di module di rcutorture, e solo
+sull'ultima riga abbiamo il risultato finale delle verifiche effettuate che può
+essere "SUCCESS" (successo) or "FAILURE" (insuccesso).
+
+Le voci sono le seguenti:
+
+* "rtc": L'indirizzo in esadecimale della struttura attualmente visibile dai
+ lettori.
+
+* "ver": Il numero di volte dall'avvio che il processo scrittore di RCU ha
+ cambiato la struttura visible ai lettori.
+
+* "tfle": se non è zero, indica la lista di strutture "torture freelist" da
+ mettere in "rtc" è vuota. Questa condizione è importante perché potrebbe
+ illuderti che RCU stia funzionando mentre invece non è il caso. :-/
+
+* "rta": numero di strutture allocate dalla lista "torture freelist".
+
+* "rtaf": il numero di allocazioni fallite dalla lista "torture freelist" a
+ causa del fatto che fosse vuota. Non è inusuale che sia diverso da zero, ma è
+ un brutto segno se questo numero rappresenta una frazione troppo alta di
+ "rta".
+
+* "rtf": il numero di rilasci nella lista "torture freelist"
+
+* "rtmbe": Un valore diverso da zero indica che rcutorture crede che
+ rcu_assign_pointer() e rcu_dereference() non funzionino correttamente. Il
+ valore dovrebbe essere zero.
+
+* "rtbe": un valore diverso da zero indica che le funzioni della famiglia
+ rcu_barrier() non funzionano correttamente.
+
+* "rtbke": rcutorture è stato capace di creare dei kthread real-time per forzare
+ l'inversione di priorità di RCU. Il valore dovrebbe essere zero.
+
+* "rtbre": sebbene rcutorture sia riuscito a creare dei kthread capaci di
+ forzare l'inversione di priorità, non è riuscito però ad impostarne la
+ priorità real-time al livello 1. Il valore dovrebbe essere zero.
+
+* "rtbf": Il numero di volte che è fallita la promozione della priorità per
+ risolvere un'inversione.
+
+* "rtb": Il numero di volte che rcutorture ha provato a forzare l'inversione di
+ priorità. Il valore dovrebbe essere diverso da zero Se state verificando la
+ promozione della priorità col parametro "test_bootst".
+
+* "nt": il numero di volte che rcutorture ha eseguito codice lato lettura
+ all'interno di un gestore di *timer*. Questo valore dovrebbe essere diverso da
+ zero se avete specificato il parametro "irqreader".
+
+* "Reader Pipe": un istogramma dell'età delle strutture viste dai lettori. RCU
+ non funziona correttamente se una qualunque voce, dalla terza in poi, ha un
+ valore diverso da zero. Se dovesse succedere, rcutorture stampa la stringa
+ "!!!" per renderlo ben visibile. L'età di una struttura appena creata è zero,
+ diventerà uno quando sparisce dalla visibilità di un lettore, e incrementata
+ successivamente per ogni periodo di grazia; infine rilasciata dopo essere
+ passata per (RCU_TORTURE_PIPE_LEN-2) periodi di grazia.
+
+ L'istantanea qui sopra è stata presa da una corretta implementazione di RCU.
+ Se volete vedere come appare quando non funziona, sbizzarritevi nel romperla.
+ ;-)
+
+* "Reader Batch": un istogramma di età di strutture viste dai lettori, ma
+ conteggiata in termini di lotti piuttosto che periodi. Anche qui dalla terza
+ voce in poi devono essere zero. La ragione d'esistere di questo rapporto è che
+ a volte è più facile scatenare un terzo valore diverso da zero qui piuttosto
+ che nella lista "Reader Pipe".
+
+* "Free-Block Circulation": il numero di strutture *torture* che hanno raggiunto
+ un certo punto nella catena. Il primo numero dovrebbe corrispondere
+ strettamente al numero di strutture allocate; il secondo conta quelle rimosse
+ dalla vista dei lettori. Ad eccezione dell'ultimo valore, gli altri
+ corrispondono al numero di passaggi attraverso il periodo di grazia. L'ultimo
+ valore dovrebbe essere zero, perché viene incrementato solo se il contatore
+ della struttura torture viene in un qualche modo incrementato oltre il
+ normale.
+
+Una diversa implementazione di RCU potrebbe fornire informazioni aggiuntive. Per
+esempio, *Tree SRCU* fornisce anche la seguente riga::
+
+ srcud-torture: Tree SRCU per-CPU(idx=0): 0(35,-21) 1(-4,24) 2(1,1) 3(-26,20) 4(28,-47) 5(-9,4) 6(-10,14) 7(-14,11) T(1,6)
+
+Questa riga mostra lo stato dei contatori per processore, in questo caso per
+*Tree SRCU*, usando un'allocazione dinamica di srcu_struct (dunque "srcud-"
+piuttosto che "srcu-"). I numeri fra parentesi sono i valori del "vecchio"
+contatore e di quello "corrente" per ogni processore. Il valore "idx" mappa
+questi due valori nell'array, ed è utile per il *debug*. La "T" finale contiene
+il valore totale dei contatori.
+
+Uso su specifici kernel
+=======================
+
+A volte può essere utile eseguire RCU torture su un kernel già compilato, ad
+esempio quando lo si sta per mettere in proeduzione. In questo caso, il kernel
+dev'essere compilato con CONFIG_RCU_TORTURE_TEST=m, cosicché le verifiche possano
+essere avviate usano modprobe e terminate con rmmod.
+
+Per esempio, potreste usare questo script::
+
+ #!/bin/sh
+
+ modprobe rcutorture
+ sleep 3600
+ rmmod rcutorture
+ dmesg | grep torture:
+
+Potete controllare il rapporto verificando manualmente la presenza del marcatore
+di errore "!!!". Ovviamente, siete liberi di scriverne uno più elaborato che
+identifichi automaticamente gli errori. Il comando "rmmod" forza la stampa di
+"SUCCESS" (successo), "FAILURE" (fallimento), o "RCU_HOTPLUG". I primi due sono
+autoesplicativi; invece, l'ultimo indica che non son stati trovati problemi in
+RCU, tuttavia ci sono stati problemi con CPU-hotplug.
+
+
+Uso sul kernel di riferimento
+=============================
+
+Quando si usa rcutorture per verificare modifiche ad RCU stesso, spesso è
+necessario compilare un certo numero di kernel usando configurazioni diverse e
+con parametri d'avvio diversi. In questi casi, usare modprobe ed rmmod potrebbe
+richiedere molto tempo ed il processo essere suscettibile ad errori.
+
+Dunque, viene messo a disposizione il programma
+tools/testing/selftests/rcutorture/bin/kvm.sh per le architetture x86, arm64 e
+powerpc. Di base, eseguirà la serie di verifiche elencate in
+tools/testing/selftests/rcutorture/configs/rcu/CFLIST. Ognuna di queste verrà
+eseguita per 30 minuti in una macchina virtuale con uno spazio utente minimale
+fornito da un initrd generato automaticamente. Al completamento, gli artefatti
+prodotti e i messaggi vengono analizzati alla ricerca di errori, ed i risultati
+delle esecuzioni riassunti in un rapporto.
+
+Su grandi sistemi, le verifiche di rcutorture posso essere velocizzare passano a
+kvm.sh l'argomento --cpus. Per esempio, su un sistema a 64 processori, "--cpus
+43" userà fino a 43 processori per eseguire contemporaneamente le verifiche. Su
+un kernel v5.4 per eseguire tutti gli scenari in due serie, riduce il tempo
+d'esecuzione da otto ore a un'ora (senza contare il tempo per compilare sedici
+kernel). L'argomento "--dryrun sched" non eseguirà verifiche, piuttosto vi
+informerà su come queste verranno organizzate in serie. Questo può essere utile
+per capire quanti processori riservare per le verifiche in --cpus.
+
+Non serve eseguire tutti gli scenari di verifica per ogni modifica. Per esempio,
+per una modifica a Tree SRCU potete eseguire gli scenari SRCU-N e SRCU-P. Per
+farlo usate l'argomento --configs di kvm.sh in questo modo: "--configs 'SRCU-N
+SRCU-P'". Su grandi sistemi si possono eseguire più copie degli stessi scenari,
+per esempio, un hardware che permette di eseguire 448 thread, può eseguire 5
+istanze complete contemporaneamente. Per farlo::
+
+ kvm.sh --cpus 448 --configs '5*CFLIST'
+
+Oppure, lo stesso sistema, può eseguire contemporaneamente 56 istanze dello
+scenario su otto processori::
+
+ kvm.sh --cpus 448 --configs '56*TREE04'
+
+O ancora 28 istanze per ogni scenario su otto processori::
+
+ kvm.sh --cpus 448 --configs '28*TREE03 28*TREE04'
+
+Ovviamente, ogni esecuzione utilizzerà della memoria. Potete limitarne l'uso con
+l'argomento --memory, che di base assume il valore 512M. Per poter usare valori
+piccoli dovrete disabilitare le verifiche *callback-flooding* usando il
+parametro --bootargs che vedremo in seguito.
+
+A volte è utile avere informazioni aggiuntive di debug, in questo caso potete
+usare il parametro --kconfig, per esempio, ``--kconfig
+'CONFIG_RCU_EQS_DEBUG=y'``. In aggiunta, ci sono i parametri --gdb, --kasan, and
+kcsan. Da notare che --gdb vi limiterà all'uso di un solo scenario per
+esecuzione di kvm.sh e richiede di avere anche un'altra finestra aperta dalla
+quale eseguire ``gdb`` come viene spiegato dal programma.
+
+Potete passare anche i parametri d'avvio del kernel, per esempio, per
+controllare i parametri del modulo rcutorture. Per esempio, per verificare
+modifiche del codice RCU CPU stall-warning, usate ``bootargs
+'rcutorture.stall_cpu=30``. Il programma riporterà un fallimento, ossia il
+risultato della verifica. Come visto in precedenza, ridurre la memoria richiede
+la disabilitazione delle verifiche *callback-flooding*::
+
+ kvm.sh --cpus 448 --configs '56*TREE04' --memory 128M \
+ --bootargs 'rcutorture.fwd_progress=0'
+
+A volte tutto quello che serve è una serie completa di compilazioni del kernel.
+Questo si ottiene col parametro --buildonly.
+
+Il parametro --duration sovrascrive quello di base di 30 minuti. Per esempio,
+con ``--duration 2d`` l'esecuzione sarà di due giorni, ``--duraction 5min`` di
+cinque minuti, e ``--duration 45s`` di 45 secondi. L'ultimo può essere utile per
+scovare rari errori nella sequenza d'avvio.
+
+Infine, il parametro --trust-make permette ad ogni nuova compilazione del kernel
+di riutilizzare tutto il possibile da quelle precedenti. Da notare che senza il
+parametro --trust-make, i vostri file di *tag* potrebbero essere distrutti.
+
+Ci sono altri parametri più misteriosi che sono documentati nel codice sorgente
+dello programma kvm.sh.
+
+Se un'esecuzione contiene degli errori, il loro numero durante la compilazione e
+all'esecuzione verranno elencati alla fine fra i risultati di kvm.sh (che vi
+consigliamo caldamente di reindirizzare verso un file). I file prodotti dalla
+compilazione ed i risultati stampati vengono salvati, usando un riferimento
+temporale, nelle cartella tools/testing/selftests/rcutorture/res. Una cartella
+di queste cartelle può essere fornita a kvm-find-errors.sh per estrarne gli
+errori. Per esempio::
+
+ tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh \
+ tools/testing/selftests/rcutorture/res/2020.01.20-15.54.23
+
+Tuttavia, molto spesso è più conveniente aprire i file direttamente. I file
+riguardanti tutti gli scenari di un'esecuzione di trovano nella cartella
+principale (2020.01.20-15.54.23 nell'esempio precedente), mentre quelli
+specifici per scenario si trovano in sotto cartelle che prendono il nome dello
+scenario stesso (per esempio, "TREE04"). Se un dato scenario viene eseguito più
+di una volta (come abbiamo visto con "--configs '56*TREE04'"), allora dalla
+seconda esecuzione in poi le sottocartelle includeranno un numero di
+progressione, per esempio "TREE04.2", "TREE04.3", e via dicendo.
+
+Il file solitamente più usato nella cartella principale è testid.txt. Se la
+verifica viene eseguita in un repositorio git, allora questo file conterrà il
+*commit* sul quale si basano le verifiche, mentre tutte le modifiche non
+registrare verranno mostrate in formato diff.
+
+I file solitamente più usati nelle cartelle di scenario sono:
+
+.config
+ Questo file contiene le opzioni di Kconfig
+
+Make.out
+ Questo file contiene il risultato di compilazione per uno specifico scenario
+
+console.log
+ Questo file contiene il risultato d'esecuzione per uno specifico scenario.
+ Questo file può essere esaminato una volta che il kernel è stato avviato,
+ ma potrebbe non esistere se l'avvia non è fallito.
+
+vmlinux
+ Questo file contiene il kernel, e potrebbe essere utile da esaminare con
+ programmi come pbjdump e gdb
+
+Ci sono altri file, ma vengono usati meno. Molti sono utili all'analisi di
+rcutorture stesso o dei suoi programmi.
+
+Nel kernel v5.4, su un sistema a 12 processori, un'esecuzione senza errori
+usando gli scenari di base produce il seguente risultato::
+
+ SRCU-N ------- 804233 GPs (148.932/s) [srcu: g10008272 f0x0 ]
+ SRCU-P ------- 202320 GPs (37.4667/s) [srcud: g1809476 f0x0 ]
+ SRCU-t ------- 1122086 GPs (207.794/s) [srcu: g0 f0x0 ]
+ SRCU-u ------- 1111285 GPs (205.794/s) [srcud: g1 f0x0 ]
+ TASKS01 ------- 19666 GPs (3.64185/s) [tasks: g0 f0x0 ]
+ TASKS02 ------- 20541 GPs (3.80389/s) [tasks: g0 f0x0 ]
+ TASKS03 ------- 19416 GPs (3.59556/s) [tasks: g0 f0x0 ]
+ TINY01 ------- 836134 GPs (154.84/s) [rcu: g0 f0x0 ] n_max_cbs: 34198
+ TINY02 ------- 850371 GPs (157.476/s) [rcu: g0 f0x0 ] n_max_cbs: 2631
+ TREE01 ------- 162625 GPs (30.1157/s) [rcu: g1124169 f0x0 ]
+ TREE02 ------- 333003 GPs (61.6672/s) [rcu: g2647753 f0x0 ] n_max_cbs: 35844
+ TREE03 ------- 306623 GPs (56.782/s) [rcu: g2975325 f0x0 ] n_max_cbs: 1496497
+ CPU count limited from 16 to 12
+ TREE04 ------- 246149 GPs (45.5831/s) [rcu: g1695737 f0x0 ] n_max_cbs: 434961
+ TREE05 ------- 314603 GPs (58.2598/s) [rcu: g2257741 f0x2 ] n_max_cbs: 193997
+ TREE07 ------- 167347 GPs (30.9902/s) [rcu: g1079021 f0x0 ] n_max_cbs: 478732
+ CPU count limited from 16 to 12
+ TREE09 ------- 752238 GPs (139.303/s) [rcu: g13075057 f0x0 ] n_max_cbs: 99011
+
+Ripetizioni
+===========
+
+Immaginate di essere alla caccia di un raro problema che si verifica all'avvio.
+Potreste usare kvm.sh, tuttavia questo ricompilerebbe il kernel ad ogni
+esecuzione. Se avete bisogno di (diciamo) 1000 esecuzioni per essere sicuri di
+aver risolto il problema, allora queste inutili ricompilazioni possono diventare
+estremamente fastidiose.
+
+Per questo motivo esiste kvm-again.sh.
+
+Immaginate che un'esecuzione precedente di kvm.sh abbia lasciato i suoi
+artefatti nella cartella::
+
+ tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28
+
+Questa esecuzione può essere rieseguita senza ricompilazioni::
+
+ kvm-again.sh tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28
+
+Alcuni dei parametri originali di kvm.sh possono essere sovrascritti, in
+particolare --duration e --bootargs. Per esempio::
+
+ kvm-again.sh tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28 \
+ --duration 45s
+
+rieseguirebbe il test precedente, ma solo per 45 secondi, e quindi aiutando a
+trovare quel raro problema all'avvio sopracitato.
+
+Esecuzioni distribuite
+======================
+
+Sebbene kvm.sh sia utile, le sue verifiche sono limitate ad un singolo sistema.
+Non è poi così difficile usare un qualsiasi ambiente di sviluppo per eseguire
+(diciamo) 5 istanze di kvm.sh su altrettanti sistemi, ma questo avvierebbe
+inutili ricompilazioni del kernel. In aggiunta, il processo di distribuzione
+degli scenari di verifica per rcutorture sui sistemi disponibili richiede
+scrupolo perché soggetto ad errori.
+
+Per questo esiste kvm-remote.sh.
+
+Se il seguente comando funziona::
+
+ ssh system0 date
+
+e funziona anche per system1, system2, system3, system4, e system5, e tutti
+questi sistemi hanno 64 CPU, allora potere eseguire::
+
+ kvm-remote.sh "system0 system1 system2 system3 system4 system5" \
+ --cpus 64 --duration 8h --configs "5*CFLIST"
+
+Questo compilerà lo scenario di base sul sistema locale, poi lo distribuirà agli
+altri cinque sistemi elencati fra i parametri, ed eseguirà ogni scenario per
+otto ore. Alla fine delle esecuzioni, i risultati verranno raccolti, registrati,
+e stampati. La maggior parte dei parametri di kvm.sh possono essere usati con
+kvm-remote.sh, tuttavia la lista dei sistemi deve venire sempre per prima.
+
+L'argomento di kvm.sh ``--dryrun scenarios`` può essere utile per scoprire
+quanti scenari potrebbero essere eseguiti in gruppo di sistemi.
+
+Potete rieseguire anche una precedente esecuzione remota come abbiamo già fatto
+per kvm.sh::
+
+ kvm-remote.sh "system0 system1 system2 system3 system4 system5" \
+ tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28-remote \
+ --duration 24h
+
+In questo caso, la maggior parte dei parametri di kvm-again.sh possono essere
+usati dopo il percorso alla cartella contenente gli artefatti dell'esecuzione da
+ripetere.
diff --git a/Documentation/translations/it_IT/core-api/index.rst b/Documentation/translations/it_IT/core-api/index.rst
index cc4c4328ad03..dad20402d11b 100644
--- a/Documentation/translations/it_IT/core-api/index.rst
+++ b/Documentation/translations/it_IT/core-api/index.rst
@@ -10,6 +10,18 @@ Utilità di base
symbol-namespaces
+Primitive di sincronizzazione
+=============================
+
+Come Linux impedisce che tutto si verifichi contemporaneamente. Consultate
+Documentation/translations/it_IT/locking/index.rst per maggiorni informazioni
+sul tema.
+
+.. toctree::
+ :maxdepth: 1
+
+ ../RCU/index
+
.. only:: subproject and html
Indices
diff --git a/Documentation/translations/it_IT/i2c/i2c-protocol.rst b/Documentation/translations/it_IT/i2c/i2c-protocol.rst
new file mode 100644
index 000000000000..ba7c8cd8f560
--- /dev/null
+++ b/Documentation/translations/it_IT/i2c/i2c-protocol.rst
@@ -0,0 +1,99 @@
+=================
+Il protocollo I2C
+=================
+
+Questo documento è una panoramica delle transazioni di base I2C e delle API
+del kernel per eseguirli.
+
+Spiegazione dei simboli
+=======================
+
+=============== ===========================================================
+S Condizione di avvio
+P Condizione di stop
+Rd/Wr (1 bit) Bit di lettura/scrittura. Rd vale 1, Wr vale 0.
+A, NA (1 bit) Bit di riconoscimento (ACK) e di non riconoscimento (NACK).
+Addr (7 bit) Indirizzo I2C a 7 bit. Nota che questo può essere espanso
+ per ottenere un indirizzo I2C a 10 bit.
+Dati (8 bit) Un byte di dati.
+
+[..] Fra parentesi quadre i dati inviati da dispositivi I2C,
+ anziché dal master.
+=============== ===========================================================
+
+
+Transazione semplice di invio
+=============================
+
+Implementato da i2c_master_send()::
+
+ S Addr Wr [A] Dati [A] Dati [A] ... [A] Dati [A] P
+
+
+Transazione semplice di ricezione
+=================================
+
+Implementato da i2c_master_recv()::
+
+ S Addr Rd [A] [Dati] A [Dati] A ... A [Dati] NA P
+
+
+Transazioni combinate
+=====================
+
+Implementato da i2c_transfer().
+
+Sono come le transazioni di cui sopra, ma invece di uno condizione di stop P
+viene inviata una condizione di inizio S e la transazione continua.
+Un esempio di lettura di un byte, seguita da una scrittura di un byte::
+
+ S Addr Rd [A] [Dati] NA S Addr Wr [A] Dati [A] P
+
+
+Transazioni modificate
+======================
+
+Le seguenti modifiche al protocollo I2C possono essere generate
+impostando questi flag per i messaggi I2C. Ad eccezione di I2C_M_NOSTART, sono
+di solito necessari solo per risolvere problemi di un dispositivo:
+
+I2C_M_IGNORE_NAK:
+ Normalmente il messaggio viene interrotto immediatamente se il dispositivo
+ risponde con [NA]. Impostando questo flag, si considera qualsiasi [NA] come
+ [A] e tutto il messaggio viene inviato.
+ Questi messaggi potrebbero comunque non riuscire a raggiungere il timeout
+ SCL basso->alto.
+
+I2C_M_NO_RD_ACK:
+ In un messaggio di lettura, il bit A/NA del master viene saltato.
+
+I2C_M_NOSTART:
+ In una transazione combinata, potrebbe non essere generato alcun
+ "S Addr Wr/Rd [A]".
+ Ad esempio, impostando I2C_M_NOSTART sul secondo messaggio parziale
+ genera qualcosa del tipo::
+
+ S Addr Rd [A] [Dati] NA Dati [A] P
+
+ Se si imposta il flag I2C_M_NOSTART per il primo messaggio parziale,
+ non viene generato Addr, ma si genera la condizione di avvio S.
+ Questo probabilmente confonderà tutti gli altri dispositivi sul bus, quindi
+ meglio non usarlo.
+
+ Questo viene spesso utilizzato per raccogliere le trasmissioni da più
+ buffer di dati presenti nella memoria di sistema in qualcosa che appare
+ come un singolo trasferimento verso il dispositivo I2C. Inoltre, alcuni
+ dispositivi particolari lo utilizzano anche tra i cambi di direzione.
+
+I2C_M_REV_DIR_ADDR:
+ Questo inverte il flag Rd/Wr. Cioè, se si vuole scrivere, ma si ha bisogno
+ di emettere una Rd invece di una Wr, o viceversa, si può impostare questo
+ flag.
+ Per esempio::
+
+ S Addr Rd [A] Dati [A] Dati [A] ... [A] Dati [A] P
+
+I2C_M_STOP:
+ Forza una condizione di stop (P) dopo il messaggio. Alcuni protocolli
+ simili a I2C come SCCB lo richiedono. Normalmente, non si vuole essere
+ interrotti tra i messaggi di un trasferimento.
diff --git a/Documentation/translations/it_IT/i2c/index.rst b/Documentation/translations/it_IT/i2c/index.rst
new file mode 100644
index 000000000000..14fbe3d78299
--- /dev/null
+++ b/Documentation/translations/it_IT/i2c/index.rst
@@ -0,0 +1,46 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+Il sottosistema I2C/SMBus
+=========================
+
+Introduzione
+============
+
+.. toctree::
+ :maxdepth: 1
+
+ summary
+ i2c-protocol
+
+Scrivere un device driver
+=========================
+
+.. toctree::
+ :maxdepth: 1
+
+Debugging
+=========
+
+.. toctree::
+ :maxdepth: 1
+
+Slave I2C
+=========
+
+.. toctree::
+ :maxdepth: 1
+
+
+Argomenti avanzati
+==================
+
+.. toctree::
+ :maxdepth: 1
+
+.. only:: subproject and html
+
+ Indici
+ ======
+
+ * :ref:`genindex`
diff --git a/Documentation/translations/it_IT/i2c/summary.rst b/Documentation/translations/it_IT/i2c/summary.rst
new file mode 100644
index 000000000000..1535e13a32e2
--- /dev/null
+++ b/Documentation/translations/it_IT/i2c/summary.rst
@@ -0,0 +1,64 @@
+==========================
+Introduzione a I2C e SMBus
+==========================
+
+I²C (letteralmente "I al quadrato C" e scritto I2C nella documentazione del
+kernel) è un protocollo sviluppato da Philips. É un protocollo lento a 2 fili
+(a velocità variabile, al massimo 400KHz), con un'estensione per le velocità
+elevate (3.4 MHz). Questo protocollo offre un bus a basso costo per collegare
+dispositivi di vario genere a cui si accede sporadicamente e utilizzando
+poca banda. Alcuni sistemi usano varianti che non rispettano i requisiti
+originali, per cui non sono indicati come I2C, ma hanno nomi diversi, per
+esempio TWI (Interfaccia a due fili), IIC.
+
+L'ultima specifica ufficiale I2C è la `"Specifica I2C-bus e manuale utente"
+(UM10204) <https://www.nxp.com/webapp/Download?colCode=UM10204>`_
+pubblicata da NXP Semiconductors. Tuttavia, è necessario effettuare il login
+al sito per accedere al PDF. Una versione precedente della specifica
+(revisione 6) è archiviata
+`qui <https://web.archive.org/web/20210813122132/
+https://www.nxp.com/docs/en/user-guide/UM10204.pdf>`_.
+
+SMBus (Bus per la gestione del sistema) si basa sul protocollo I2C ed è
+principalmente un sottoinsieme di protocolli e segnali I2C. Molti dispositivi
+I2C funzioneranno su SMBus, ma alcuni protocolli SMBus aggiungono semantica
+oltre quanto richiesto da I2C. Le moderne schede madri dei PC si affidano a
+SMBus. I più comuni dispositivi collegati tramite SMBus sono moduli RAM
+configurati utilizzando EEPROM I2C, e circuiti integrati di monitoraggio
+hardware.
+
+Poiché SMBus è principalmente un sottoinsieme del bus I2C,
+possiamo farne uso su molti sistemi I2C. Ci sono però sistemi che non
+soddisfano i vincoli elettrici sia di SMBus che di I2C; e altri che non possono
+implementare tutta la semantica o messaggi comuni del protocollo SMBus.
+
+
+Terminologia
+============
+
+Utilizzando la terminologia della documentazione ufficiale, il bus I2C connette
+uno o più circuiti integrati *master* e uno o più circuiti integrati *slave*.
+
+.. kernel-figure:: ../../../i2c/i2c_bus.svg
+ :alt: Un semplice bus I2C con un master e 3 slave
+
+ Un semplice Bus I2C
+
+Un circuito integrato **master** è un nodo che inizia le comunicazioni con gli
+slave. Nell'implementazione del kernel Linux è chiamato **adattatore** o bus. I
+driver degli adattatori si trovano nella sottocartella ``drivers/i2c/busses/``.
+
+Un **algoritmo** contiene codice generico che può essere utilizzato per
+implementare una intera classe di adattatori I2C. Ciascun driver dell'
+adattatore specifico dipende da un driver dell'algoritmo nella sottocartella
+``drivers/i2c/algos/`` o include la propria implementazione.
+
+Un circuito integrato **slave** è un nodo che risponde alle comunicazioni
+quando indirizzato dal master. In Linux è chiamato **client** (dispositivo). I
+driver dei dispositivi sono contenuti in una cartella specifica per la
+funzionalità che forniscono, ad esempio ``drivers/media/gpio/`` per espansori
+GPIO e ``drivers/media/i2c/`` per circuiti integrati relativi ai video.
+
+Per la configurazione di esempio in figura, avrai bisogno di un driver per il
+tuo adattatore I2C e driver per i tuoi dispositivi I2C (solitamente un driver
+per ciascuno dispositivo).
diff --git a/Documentation/translations/it_IT/index.rst b/Documentation/translations/it_IT/index.rst
index b95dfa1ded04..70ccd23b2cde 100644
--- a/Documentation/translations/it_IT/index.rst
+++ b/Documentation/translations/it_IT/index.rst
@@ -91,6 +91,8 @@ interfacciarsi con il resto del kernel.
:maxdepth: 1
core-api/index
+ Sincronizzazione nel kernel <locking/index>
+ subsystem-apis
Strumenti e processi per lo sviluppo
====================================
diff --git a/Documentation/translations/it_IT/locking/index.rst b/Documentation/translations/it_IT/locking/index.rst
new file mode 100644
index 000000000000..19963d33e84d
--- /dev/null
+++ b/Documentation/translations/it_IT/locking/index.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Sincronizzazione
+================
+
+.. toctree::
+ :maxdepth: 1
+
+ locktypes
+ lockdep-design
+ lockstat
+ locktorture
+
+.. only:: subproject and html
+
+ Indici
+ ======
+
+ * :ref:`genindex`
diff --git a/Documentation/translations/it_IT/locking/lockdep-design.rst b/Documentation/translations/it_IT/locking/lockdep-design.rst
new file mode 100644
index 000000000000..9ed00d8cf280
--- /dev/null
+++ b/Documentation/translations/it_IT/locking/lockdep-design.rst
@@ -0,0 +1,678 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-ita.rst
+
+Validatore di sincronizzazione durante l'esecuzione
+===================================================
+
+Classi di blocchi
+-----------------
+
+L'oggetto su cui il validatore lavora è una "classe" di blocchi.
+
+Una classe di blocchi è un gruppo di blocchi che seguono le stesse regole di
+sincronizzazione, anche quando i blocchi potrebbero avere più istanze (anche
+decine di migliaia). Per esempio un blocco nella struttura inode è una classe,
+mentre ogni inode sarà un'istanza di questa classe di blocco.
+
+Il validatore traccia lo "stato d'uso" di una classe di blocchi e le sue
+dipendenze con altre classi. L'uso di un blocco indica come quel blocco viene
+usato rispetto al suo contesto d'interruzione, mentre le dipendenze di un blocco
+possono essere interpretate come il loro ordine; per esempio L1 -> L2 suggerisce
+che un processo cerca di acquisire L2 mentre già trattiene L1. Dal punto di
+vista di lockdep, i due blocchi (L1 ed L2) non sono per forza correlati: quella
+dipendenza indica solamente l'ordine in cui sono successe le cose. Il validatore
+verifica permanentemente la correttezza dell'uso dei blocchi e delle loro
+dipendenze, altrimenti ritornerà un errore.
+
+Il comportamento di una classe di blocchi viene costruito dall'insieme delle sue
+istanze. Una classe di blocco viene registrata alla creazione della sua prima
+istanza, mentre tutte le successive istanze verranno mappate; dunque, il loro
+uso e le loro dipendenze contribuiranno a costruire quello della classe. Una
+classe di blocco non sparisce quando sparisce una sua istanza, ma può essere
+rimossa quando il suo spazio in memoria viene reclamato. Per esempio, questo
+succede quando si rimuove un modulo, o quando una *workqueue* viene eliminata.
+
+Stato
+-----
+
+Il validatore traccia l'uso cronologico delle classi di blocchi e ne divide
+l'uso in categorie (4 USI * n STATI + 1).
+
+I quattro USI possono essere:
+
+- 'sempre trattenuto nel contesto <STATO>'
+- 'sempre trattenuto come blocco di lettura nel contesto <STATO>'
+- 'sempre trattenuto con <STATO> abilitato'
+- 'sempre trattenuto come blocco di lettura con <STATO> abilitato'
+
+gli `n` STATI sono codificati in kernel/locking/lockdep_states.h, ad oggi
+includono:
+
+- hardirq
+- softirq
+
+infine l'ultima categoria è:
+
+- 'sempre trattenuto' [ == !unused ]
+
+Quando vengono violate le regole di sincronizzazione, questi bit di utilizzo
+vengono presentati nei messaggi di errore di sincronizzazione, fra parentesi
+graffe, per un totale di `2 * n` (`n`: bit STATO). Un esempio inventato::
+
+ modprobe/2287 is trying to acquire lock:
+ (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
+
+ but task is already holding lock:
+ (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
+
+Per un dato blocco, da sinistra verso destra, la posizione del bit indica l'uso
+del blocco e di un eventuale blocco di lettura, per ognuno degli `n` STATI elencati
+precedentemente. Il carattere mostrato per ogni bit indica:
+
+ === ===========================================================================
+ '.' acquisito con interruzioni disabilitate fuori da un contesto d'interruzione
+ '-' acquisito in contesto d'interruzione
+ '+' acquisito con interruzioni abilitate
+ '?' acquisito in contesto d'interruzione con interruzioni abilitate
+ === ===========================================================================
+
+Il seguente esempio mostra i bit::
+
+ (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
+ ||||
+ ||| \-> softirq disabilitati e fuori da un contesto di softirq
+ || \--> acquisito in un contesto di softirq
+ | \---> hardirq disabilitati e fuori da un contesto di hardirq
+ \----> acquisito in un contesto di hardirq
+
+Per un dato STATO, che il blocco sia mai stato acquisito in quel contesto di
+STATO, o che lo STATO sia abilitato, ci lascia coi quattro possibili scenari
+mostrati nella seguente tabella. Il carattere associato al bit indica con
+esattezza in quale scenario ci si trova al momento del rapporto.
+
+ +---------------+---------------+------------------+
+ | | irq abilitati | irq disabilitati |
+ +---------------+---------------+------------------+
+ | sempre in irq | '?' | '-' |
+ +---------------+---------------+------------------+
+ | mai in irq | '+' | '.' |
+ +---------------+---------------+------------------+
+
+Il carattere '-' suggerisce che le interruzioni sono disabilitate perché
+altrimenti verrebbe mostrato il carattere '?'. Una deduzione simile può essere
+fatta anche per '+'
+
+I blocchi inutilizzati (ad esempio i mutex) non possono essere fra le cause di
+un errore.
+
+Regole dello stato per un blocco singolo
+----------------------------------------
+
+Avere un blocco sicuro in interruzioni (*irq-safe*) significa che è sempre stato
+usato in un contesto d'interruzione, mentre un blocco insicuro in interruzioni
+(*irq-unsafe*) significa che è sempre stato acquisito con le interruzioni
+abilitate.
+
+Una classe softirq insicura è automaticamente insicura anche per hardirq. I
+seguenti stati sono mutualmente esclusivi: solo una può essere vero quando viene
+usata una classe di blocco::
+
+ <hardirq-safe> o <hardirq-unsafe>
+ <softirq-safe> o <softirq-unsafe>
+
+Questo perché se un blocco può essere usato in un contesto di interruzioni
+(sicuro in interruzioni), allora non può mai essere acquisito con le
+interruzioni abilitate (insicuro in interruzioni). Altrimenti potrebbe
+verificarsi uno stallo. Per esempio, questo blocco viene acquisito, ma prima di
+essere rilasciato il contesto d'esecuzione viene interrotto nuovamente, e quindi
+si tenterà di acquisirlo nuovamente. Questo porterà ad uno stallo, in
+particolare uno stallo ricorsivo.
+
+Il validatore rileva e riporta gli usi di blocchi che violano queste regole per
+blocchi singoli.
+
+Regole per le dipendenze di blocchi multipli
+--------------------------------------------
+
+La stessa classe di blocco non deve essere acquisita due volte, questo perché
+potrebbe portare ad uno blocco ricorsivo e dunque ad uno stallo.
+
+Inoltre, due blocchi non possono essere trattenuti in ordine inverso::
+
+ <L1> -> <L2>
+ <L2> -> <L1>
+
+perché porterebbe ad uno stallo - chiamato stallo da blocco inverso - in cui si
+cerca di trattenere i due blocchi in un ciclo in cui entrambe i contesti
+aspettano per sempre che l'altro termini. Il validatore è in grado di trovare
+queste dipendenze cicliche di qualsiasi complessità, ovvero nel mezzo ci
+potrebbero essere altre sequenze di blocchi. Il validatore troverà se questi
+blocchi possono essere acquisiti circolarmente.
+
+In aggiunta, le seguenti sequenze di blocco nei contesti indicati non sono
+permesse, indipendentemente da quale che sia la classe di blocco::
+
+ <hardirq-safe> -> <hardirq-unsafe>
+ <softirq-safe> -> <softirq-unsafe>
+
+La prima regola deriva dal fatto che un blocco sicuro in interruzioni può essere
+trattenuto in un contesto d'interruzione che, per definizione, ha la possibilità
+di interrompere un blocco insicuro in interruzioni; questo porterebbe ad uno
+stallo da blocco inverso. La seconda, analogamente, ci dice che un blocco sicuro
+in interruzioni software potrebbe essere trattenuto in un contesto di
+interruzione software, dunque potrebbe interrompere un blocco insicuro in
+interruzioni software.
+
+Le suddette regole vengono applicate per qualsiasi sequenza di blocchi: quando
+si acquisiscono nuovi blocchi, il validatore verifica se vi è una violazione
+delle regole fra il nuovo blocco e quelli già trattenuti.
+
+Quando una classe di blocco cambia stato, applicheremo le seguenti regole:
+
+- se viene trovato un nuovo blocco sicuro in interruzioni, verificheremo se
+ abbia mai trattenuto dei blocchi insicuri in interruzioni.
+
+- se viene trovato un nuovo blocco sicuro in interruzioni software,
+ verificheremo se abbia trattenuto dei blocchi insicuri in interruzioni
+ software.
+
+- se viene trovato un nuovo blocco insicuro in interruzioni, verificheremo se
+ abbia trattenuto dei blocchi sicuri in interruzioni.
+
+- se viene trovato un nuovo blocco insicuro in interruzioni software,
+ verificheremo se abbia trattenuto dei blocchi sicuri in interruzioni
+ software.
+
+(Di nuovo, questi controlli vengono fatti perché un contesto d'interruzione
+potrebbe interrompere l'esecuzione di qualsiasi blocco insicuro portando ad uno
+stallo; questo anche se lo stallo non si verifica in pratica)
+
+Eccezione: dipendenze annidate sui dati portano a blocchi annidati
+------------------------------------------------------------------
+
+Ci sono alcuni casi in cui il kernel Linux acquisisce più volte la stessa
+istanza di una classe di blocco. Solitamente, questo succede quando esiste una
+gerarchia fra oggetti dello stesso tipo. In questi casi viene ereditato
+implicitamente l'ordine fra i due oggetti (definito dalle proprietà di questa
+gerarchia), ed il kernel tratterrà i blocchi in questo ordine prefissato per
+ognuno degli oggetti.
+
+Un esempio di questa gerarchia di oggetti che producono "blocchi annidati" sono
+i *block-dev* che rappresentano l'intero disco e quelli che rappresentano una
+sua partizione; la partizione è una parte del disco intero, e l'ordine dei
+blocchi sarà corretto fintantoche uno acquisisce il blocco del disco intero e
+poi quello della partizione. Il validatore non rileva automaticamente questo
+ordine implicito, perché queste regole di sincronizzazione non sono statiche.
+
+Per istruire il validatore riguardo a questo uso corretto dei blocchi sono stati
+introdotte nuove primitive per specificare i "livelli di annidamento". Per
+esempio, per i blocchi a mutua esclusione dei *block-dev* si avrebbe una
+chiamata simile a::
+
+ enum bdev_bd_mutex_lock_class
+ {
+ BD_MUTEX_NORMAL,
+ BD_MUTEX_WHOLE,
+ BD_MUTEX_PARTITION
+ };
+
+ mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
+
+In questo caso la sincronizzazione viene fatta su un *block-dev* sapendo che si
+tratta di una partizione.
+
+Ai fini della validazione, il validatore lo considererà con una - sotto - classe
+di blocco separata.
+
+Nota: Prestate estrema attenzione che la vostra gerarchia sia corretta quando si
+vogliono usare le primitive _nested(); altrimenti potreste avere sia falsi
+positivi che falsi negativi.
+
+Annotazioni
+-----------
+
+Si possono utilizzare due costrutti per verificare ed annotare se certi blocchi
+devono essere trattenuti: lockdep_assert_held*(&lock) e
+lockdep_*pin_lock(&lock).
+
+Come suggerito dal nome, la famiglia di macro lockdep_assert_held* asseriscono
+che un dato blocco in un dato momento deve essere trattenuto (altrimenti, verrà
+generato un WARN()). Queste vengono usate abbondantemente nel kernel, per
+esempio in kernel/sched/core.c::
+
+ void update_rq_clock(struct rq *rq)
+ {
+ s64 delta;
+
+ lockdep_assert_held(&rq->lock);
+ [...]
+ }
+
+dove aver trattenuto rq->lock è necessario per aggiornare in sicurezza il clock
+rq.
+
+L'altra famiglia di macro è lockdep_*pin_lock(), che a dire il vero viene usata
+solo per rq->lock ATM. Se per caso un blocco non viene trattenuto, queste
+genereranno un WARN(). Questo si rivela particolarmente utile quando si deve
+verificare la correttezza di codice con *callback*, dove livelli superiori
+potrebbero assumere che un blocco rimanga trattenuto, ma livelli inferiori
+potrebbero invece pensare che il blocco possa essere rilasciato e poi
+riacquisito (involontariamente si apre una sezione critica). lockdep_pin_lock()
+restituisce 'struct pin_cookie' che viene usato da lockdep_unpin_lock() per
+verificare che nessuno abbia manomesso il blocco. Per esempio in
+kernel/sched/sched.h abbiamo::
+
+ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
+ {
+ rf->cookie = lockdep_pin_lock(&rq->lock);
+ [...]
+ }
+
+ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
+ {
+ [...]
+ lockdep_unpin_lock(&rq->lock, rf->cookie);
+ }
+
+I commenti riguardo alla sincronizzazione possano fornire informazioni utili,
+tuttavia sono le verifiche in esecuzione effettuate da queste macro ad essere
+vitali per scovare problemi di sincronizzazione, ed inoltre forniscono lo stesso
+livello di informazioni quando si ispeziona il codice. Nel dubbio, preferite
+queste annotazioni!
+
+Dimostrazione di correttezza al 100%
+------------------------------------
+
+Il validatore verifica la proprietà di chiusura in senso matematico. Ovvero, per
+ogni sequenza di sincronizzazione di un singolo processo che si verifichi almeno
+una volta nel kernel, il validatore dimostrerà con una certezza del 100% che
+nessuna combinazione e tempistica di queste sequenze possa causare uno stallo in
+una qualsiasi classe di blocco. [1]_
+
+In pratica, per dimostrare l'esistenza di uno stallo non servono complessi
+scenari di sincronizzazione multi-processore e multi-processo. Il validatore può
+dimostrare la correttezza basandosi sulla sola sequenza di sincronizzazione
+apparsa almeno una volta (in qualunque momento, in qualunque processo o
+contesto). Uno scenario complesso che avrebbe bisogno di 3 processori e una
+sfortunata presenza di processi, interruzioni, e pessimo tempismo, può essere
+riprodotto su un sistema a singolo processore.
+
+Questo riduce drasticamente la complessità del controllo di qualità della
+sincronizzazione nel kernel: quello che deve essere fatto è di innescare nel
+kernel quante più possibili "semplici" sequenze di sincronizzazione, almeno una
+volta, allo scopo di dimostrarne la correttezza. Questo al posto di innescare
+una verifica per ogni possibile combinazione di sincronizzazione fra processori,
+e differenti scenari con hardirq e softirq e annidamenti vari (nella pratica,
+impossibile da fare)
+
+.. [1]
+
+ assumendo che il validatore sia corretto al 100%, e che nessun altra parte
+ del sistema possa corromperne lo stato. Assumiamo anche che tutti i percorsi
+ MNI/SMM [potrebbero interrompere anche percorsi dove le interruzioni sono
+ disabilitate] sono corretti e non interferiscono con il validatore. Inoltre,
+ assumiamo che un hash a 64-bit sia unico per ogni sequenza di
+ sincronizzazione nel sistema. Infine, la ricorsione dei blocchi non deve
+ essere maggiore di 20.
+
+Prestazione
+-----------
+
+Le regole sopracitate hanno bisogno di una quantità **enorme** di verifiche
+durante l'esecuzione. Il sistema sarebbe diventato praticamente inutilizzabile
+per la sua lentezza se le avessimo fatte davvero per ogni blocco trattenuto e
+per ogni abilitazione delle interruzioni. La complessità della verifica è
+O(N^2), quindi avremmo dovuto fare decine di migliaia di verifiche per ogni
+evento, il tutto per poche centinaia di classi.
+
+Il problema è stato risolto facendo una singola verifica per ogni 'scenario di
+sincronizzazione' (una sequenza unica di blocchi trattenuti uno dopo l'altro).
+Per farlo, viene mantenuta una pila dei blocchi trattenuti, e viene calcolato un
+hash a 64-bit unico per ogni sequenza. Quando la sequenza viene verificata per
+la prima volta, l'hash viene inserito in una tabella hash. La tabella potrà
+essere verificata senza bisogno di blocchi. Se la sequenza dovesse ripetersi, la
+tabella ci dirà che non è necessario verificarla nuovamente.
+
+Risoluzione dei problemi
+------------------------
+
+Il massimo numero di classi di blocco che il validatore può tracciare è:
+MAX_LOCKDEP_KEYS. Oltrepassare questo limite indurrà lokdep a generare il
+seguente avviso::
+
+ (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+
+Di base questo valore è 8191, e un classico sistema da ufficio ha meno di 1000
+classi, dunque questo avviso è solitamente la conseguenza di un problema di
+perdita delle classi di blocco o d'inizializzazione dei blocchi. Di seguito una
+descrizione dei due problemi:
+
+1. caricare e rimuovere continuamente i moduli mentre il validatore è in
+ esecuzione porterà ad una perdita di classi di blocco. Il problema è che ogni
+ caricamento crea un nuovo insieme di classi di blocco per tutti i blocchi di
+ quel modulo. Tuttavia, la rimozione del modulo non rimuove le vecchie classi
+ (vedi dopo perché non le riusiamo). Dunque, il continuo caricamento e
+ rimozione di un modulo non fa altro che aumentare il contatore di classi fino
+ a raggiungere, eventualmente, il limite.
+
+2. Usare array con un gran numero di blocchi che non vengono esplicitamente
+ inizializzati. Per esempio, una tabella hash con 8192 *bucket* dove ognuno ha
+ il proprio spinlock_t consumerà 8192 classi di blocco a meno che non vengano
+ esplicitamente inizializzati in esecuzione usando spin_lock_init() invece
+ dell'inizializzazione durante la compilazione con __SPIN_LOCK_UNLOCKED().
+ Sbagliare questa inizializzazione garantisce un esaurimento di classi di
+ blocco. Viceversa, un ciclo che invoca spin_lock_init() su tutti i blocchi li
+ mapperebbe tutti alla stessa classe di blocco.
+
+ La morale della favola è che dovete sempre inizializzare esplicitamente i
+ vostri blocchi.
+
+Qualcuno potrebbe argomentare che il validatore debba permettere il riuso di
+classi di blocco. Tuttavia, se siete tentati dall'argomento, prima revisionate
+il codice e pensate alla modifiche necessarie, e tenendo a mente che le classi
+di blocco da rimuovere probabilmente sono legate al grafo delle dipendenze. Più
+facile a dirsi che a farsi.
+
+Ovviamente, se non esaurite le classi di blocco, la prossima cosa da fare è
+quella di trovare le classi non funzionanti. Per prima cosa, il seguente comando
+ritorna il numero di classi attualmente in uso assieme al valore massimo::
+
+ grep "lock-classes" /proc/lockdep_stats
+
+Questo comando produce il seguente messaggio::
+
+ lock-classes: 748 [max: 8191]
+
+Se il numero di assegnazioni (748 qui sopra) aumenta continuamente nel tempo,
+allora c'è probabilmente un problema da qualche parte. Il seguente comando può
+essere utilizzato per identificare le classi di blocchi problematiche::
+
+ grep "BD" /proc/lockdep
+
+Eseguite il comando e salvatene l'output, quindi confrontatelo con l'output di
+un'esecuzione successiva per identificare eventuali problemi. Questo stesso
+output può anche aiutarti a trovare situazioni in cui l'inizializzazione del
+blocco è stata omessa.
+
+Lettura ricorsiva dei blocchi
+-----------------------------
+
+Il resto di questo documento vuole dimostrare che certi cicli equivalgono ad una
+possibilità di stallo.
+
+Ci sono tre tipi di bloccatori: gli scrittori (bloccatori esclusivi, come
+spin_lock() o write_lock()), lettori non ricorsivi (bloccatori condivisi, come
+down_read()), e lettori ricorsivi (bloccatori condivisi ricorsivi, come
+rcu_read_lock()). D'ora in poi, per questi tipi di bloccatori, useremo la
+seguente notazione:
+
+ W o E: per gli scrittori (bloccatori esclusivi) (W dall'inglese per
+ *Writer*, ed E per *Exclusive*).
+
+ r: per i lettori non ricorsivi (r dall'inglese per *reader*).
+
+ R: per i lettori ricorsivi (R dall'inglese per *Reader*).
+
+ S: per qualsiasi lettore (non ricorsivi + ricorsivi), dato che entrambe
+ sono bloccatori condivisi (S dall'inglese per *Shared*).
+
+ N: per gli scrittori ed i lettori non ricorsivi, dato che entrambe sono
+ non ricorsivi.
+
+Ovviamente, N equivale a "r o W" ed S a "r o R".
+
+Come suggerisce il nome, i lettori ricorsivi sono dei bloccatori a cui è
+permesso di acquisire la stessa istanza di blocco anche all'interno della
+sezione critica di un altro lettore. In altre parole, permette di annidare la
+stessa istanza di blocco nelle sezioni critiche dei lettori.
+
+Dall'altro canto, lo stesso comportamento indurrebbe un lettore non ricorsivo ad
+auto infliggersi uno stallo.
+
+La differenza fra questi due tipi di lettori esiste perché: quelli ricorsivi
+vengono bloccati solo dal trattenimento di un blocco di scrittura, mentre quelli
+non ricorsivi possono essere bloccati dall'attesa di un blocco di scrittura.
+Consideriamo il seguente esempio::
+
+ TASK A: TASK B:
+
+ read_lock(X);
+ write_lock(X);
+ read_lock_2(X);
+
+L'attività A acquisisce il blocco di lettura X (non importa se di tipo ricorsivo
+o meno) usando read_lock(). Quando l'attività B tenterà di acquisire il blocco
+X, si fermerà e rimarrà in attesa che venga rilasciato. Ora se read_lock_2() è
+un tipo lettore ricorsivo, l'attività A continuerà perché gli scrittori in
+attesa non possono bloccare lettori ricorsivi, e non avremo alcuno stallo.
+Tuttavia, se read_lock_2() è un lettore non ricorsivo, allora verrà bloccato
+dall'attività B e si causerà uno stallo.
+
+Condizioni bloccanti per lettori/scrittori su uno stesso blocco
+---------------------------------------------------------------
+Essenzialmente ci sono quattro condizioni bloccanti:
+
+1. Uno scrittore blocca un altro scrittore.
+2. Un lettore blocca uno scrittore.
+3. Uno scrittore blocca sia i lettori ricorsivi che non ricorsivi.
+4. Un lettore (ricorsivo o meno) non blocca altri lettori ricorsivi ma potrebbe
+ bloccare quelli non ricorsivi (perché potrebbero esistere degli scrittori in
+ attesa).
+
+Di seguito le tabella delle condizioni bloccanti, Y (*Yes*) significa che il
+tipo in riga blocca quello in colonna, mentre N l'opposto.
+
+ +---+---+---+---+
+ | | W | r | R |
+ +---+---+---+---+
+ | W | Y | Y | Y |
+ +---+---+---+---+
+ | r | Y | Y | N |
+ +---+---+---+---+
+ | R | Y | Y | N |
+ +---+---+---+---+
+
+ (W: scrittori, r: lettori non ricorsivi, R: lettori ricorsivi)
+
+Al contrario dei blocchi per lettori non ricorsivi, quelli ricorsivi vengono
+trattenuti da chi trattiene il blocco di scrittura piuttosto che da chi ne
+attende il rilascio. Per esempio::
+
+ TASK A: TASK B:
+
+ read_lock(X);
+
+ write_lock(X);
+
+ read_lock(X);
+
+non produce uno stallo per i lettori ricorsivi, in quanto il processo B rimane
+in attesta del blocco X, mentre il secondo read_lock() non ha bisogno di
+aspettare perché si tratta di un lettore ricorsivo. Tuttavia, se read_lock()
+fosse un lettore non ricorsivo, questo codice produrrebbe uno stallo.
+
+Da notare che in funzione dell'operazione di blocco usate per l'acquisizione (in
+particolare il valore del parametro 'read' in lock_acquire()), un blocco può
+essere di scrittura (blocco esclusivo), di lettura non ricorsivo (blocco
+condiviso e non ricorsivo), o di lettura ricorsivo (blocco condiviso e
+ricorsivo). In altre parole, per un'istanza di blocco esistono tre tipi di
+acquisizione che dipendono dalla funzione di acquisizione usata: esclusiva, di
+lettura non ricorsiva, e di lettura ricorsiva.
+
+In breve, chiamiamo "non ricorsivi" blocchi di scrittura e quelli di lettura non
+ricorsiva, mentre "ricorsivi" i blocchi di lettura ricorsivi.
+
+I blocchi ricorsivi non si bloccano a vicenda, mentre quelli non ricorsivi sì
+(anche in lettura). Un blocco di lettura non ricorsivi può bloccare uno
+ricorsivo, e viceversa.
+
+Il seguente esempio mostra uno stallo con blocchi ricorsivi::
+
+ TASK A: TASK B:
+
+ read_lock(X);
+ read_lock(Y);
+ write_lock(Y);
+ write_lock(X);
+
+Il processo A attende che il processo B esegua read_unlock() so Y, mentre il
+processo B attende che A esegua read_unlock() su X.
+
+Tipi di dipendenze e percorsi forti
+-----------------------------------
+Le dipendenze fra blocchi tracciano l'ordine con cui una coppia di blocchi viene
+acquisita, e perché vi sono 3 tipi di bloccatori, allora avremo 9 tipi di
+dipendenze. Tuttavia, vi mostreremo che 4 sono sufficienti per individuare gli
+stalli.
+
+Per ogni dipendenza fra blocchi avremo::
+
+ L1 -> L2
+
+Questo significa che lockdep ha visto acquisire L1 prima di L2 nello stesso
+contesto di esecuzione. Per quanto riguarda l'individuazione degli stalli, ci
+interessa sapere se possiamo rimanere bloccati da L2 mentre L1 viene trattenuto.
+In altre parole, vogliamo sapere se esiste un bloccatore L3 che viene bloccato
+da L1 e un L2 che viene bloccato da L3. Dunque, siamo interessati a (1) quello
+che L1 blocca e (2) quello che blocca L2. Di conseguenza, possiamo combinare
+lettori ricorsivi e non per L1 (perché bloccano gli stessi tipi) e possiamo
+combinare scrittori e lettori non ricorsivi per L2 (perché vengono bloccati
+dagli stessi tipi).
+
+Con questa semplificazione, possiamo dedurre che ci sono 4 tipi di rami nel
+grafo delle dipendenze di lockdep:
+
+1) -(ER)->:
+ dipendenza da scrittore esclusivo a lettore ricorsivo. "X -(ER)-> Y"
+ significa X -> Y, dove X è uno scrittore e Y un lettore ricorsivo.
+
+2) -(EN)->:
+ dipendenza da scrittore esclusivo a bloccatore non ricorsivo.
+ "X -(EN)->" significa X-> Y, dove X è uno scrittore e Y può essere
+ o uno scrittore o un lettore non ricorsivo.
+
+3) -(SR)->:
+ dipendenza da lettore condiviso a lettore ricorsivo. "X -(SR)->"
+ significa X -> Y, dove X è un lettore (ricorsivo o meno) e Y è un
+ lettore ricorsivo.
+
+4) -(SN)->:
+ dipendenza da lettore condiviso a bloccatore non ricorsivo.
+ "X -(SN)-> Y" significa X -> Y , dove X è un lettore (ricorsivo
+ o meno) e Y può essere o uno scrittore o un lettore non ricorsivo.
+
+Da notare che presi due blocchi, questi potrebbero avere più dipendenza fra di
+loro. Per esempio::
+
+ TASK A:
+
+ read_lock(X);
+ write_lock(Y);
+ ...
+
+ TASK B:
+
+ write_lock(X);
+ write_lock(Y);
+
+Nel grafo delle dipendenze avremo sia X -(SN)-> Y che X -(EN)-> Y.
+
+Usiamo -(xN)-> per rappresentare i rami sia per -(EN)-> che -(SN)->, allo stesso
+modo -(Ex)->, -(xR)-> e -(Sx)->
+
+Un "percorso" in un grafo è una serie di nodi e degli archi che li congiungono.
+Definiamo un percorso "forte", come il percorso che non ha archi (dipendenze) di
+tipo -(xR)-> e -(Sx)->. In altre parole, un percorso "forte" è un percorso da un
+blocco ad un altro attraverso le varie dipendenze, e se sul percorso abbiamo X
+-> Y -> Z (dove X, Y, e Z sono blocchi), e da X a Y si ha una dipendenza -(SR)->
+o -(ER)->, allora fra Y e Z non deve esserci una dipendenza -(SN)-> o -(SR)->.
+
+Nella prossima sezione vedremo perché definiamo questo percorso "forte".
+
+Identificazione di stalli da lettura ricorsiva
+----------------------------------------------
+Ora vogliamo dimostrare altre due cose:
+
+Lemma 1:
+
+Se esiste un percorso chiuso forte (ciclo forte), allora esiste anche una
+combinazione di sequenze di blocchi che causa uno stallo. In altre parole,
+l'esistenza di un ciclo forte è sufficiente alla scoperta di uno stallo.
+
+Lemma 2:
+
+Se non esiste un percorso chiuso forte (ciclo forte), allora non esiste una
+combinazione di sequenze di blocchi che causino uno stallo. In altre parole, i
+cicli forti sono necessari alla rilevazione degli stallo.
+
+Con questi due lemmi possiamo facilmente affermare che un percorso chiuso forte
+è sia sufficiente che necessario per avere gli stalli, dunque averli equivale
+alla possibilità di imbattersi concretamente in uno stallo. Un percorso chiuso
+forte significa che può causare stalli, per questo lo definiamo "forte", ma ci
+sono anche cicli di dipendenze che non causeranno stalli.
+
+Dimostrazione di sufficienza (lemma 1):
+
+Immaginiamo d'avere un ciclo forte::
+
+ L1 -> L2 ... -> Ln -> L1
+
+Questo significa che abbiamo le seguenti dipendenze::
+
+ L1 -> L2
+ L2 -> L3
+ ...
+ Ln-1 -> Ln
+ Ln -> L1
+
+Ora possiamo costruire una combinazione di sequenze di blocchi che causano lo
+stallo.
+
+Per prima cosa facciamo sì che un processo/processore prenda L1 in L1 -> L2, poi
+un altro prende L2 in L2 -> L3, e così via. Alla fine, tutti i Lx in Lx -> Lx+1
+saranno trattenuti da processi/processori diversi.
+
+Poi visto che abbiamo L1 -> L2, chi trattiene L1 vorrà acquisire L2 in L1 -> L2,
+ma prima dovrà attendere che venga rilasciato da chi lo trattiene. Questo perché
+L2 è già trattenuto da un altro processo/processore, ed in più L1 -> L2 e L2 ->
+L3 non sono -(xR)-> né -(Sx)-> (la definizione di forte). Questo significa che L2
+in L1 -> L2 non è un bloccatore non ricorsivo (bloccabile da chiunque), e L2 in
+L2 -> L3 non è uno scrittore (che blocca chiunque).
+
+In aggiunta, possiamo trarre una simile conclusione per chi sta trattenendo L2:
+deve aspettare che L3 venga rilasciato, e così via. Ora possiamo dimostrare che
+chi trattiene Lx deve aspettare che Lx+1 venga rilasciato. Notiamo che Ln+1 è
+L1, dunque si è creato un ciclo dal quale non possiamo uscire, quindi si ha uno
+stallo.
+
+Dimostrazione della necessità (lemma 2):
+
+Questo lemma equivale a dire che: se siamo in uno scenario di stallo, allora
+deve esiste un ciclo forte nel grafo delle dipendenze.
+
+Secondo Wikipedia[1], se c'è uno stallo, allora deve esserci un ciclo di attese,
+ovvero ci sono N processi/processori dove P1 aspetta un blocco trattenuto da P2,
+e P2 ne aspetta uno trattenuto da P3, ... e Pn attende che il blocco P1 venga
+rilasciato. Chiamiamo Lx il blocco che attende Px, quindi P1 aspetta L1 e
+trattiene Ln. Quindi avremo Ln -> L1 nel grafo delle dipendenze. Similarmente,
+nel grafo delle dipendenze avremo L1 -> L2, L2 -> L3, ..., Ln-1 -> Ln, il che
+significa che abbiamo un ciclo::
+
+ Ln -> L1 -> L2 -> ... -> Ln
+
+, ed ora dimostriamo d'avere un ciclo forte.
+
+Per un blocco Lx, il processo Px contribuisce alla dipendenza Lx-1 -> Lx e Px+1
+contribuisce a quella Lx -> Lx+1. Visto che Px aspetta che Px+1 rilasci Lx, sarà
+impossibile che Lx in Px+1 sia un lettore e che Lx in Px sia un lettore
+ricorsivo. Questo perché i lettori (ricorsivi o meno) non bloccano lettori
+ricorsivi. Dunque, Lx-1 -> Lx e Lx -> Lx+1 non possono essere una coppia di
+-(xR)-> -(Sx)->. Questo è vero per ogni ciclo, dunque, questo è un ciclo forte.
+
+Riferimenti
+-----------
+
+[1]: https://it.wikipedia.org/wiki/Stallo_(informatica)
+
+[2]: Shibu, K. (2009). Intro To Embedded Systems (1st ed.). Tata McGraw-Hill
diff --git a/Documentation/translations/it_IT/locking/lockstat.rst b/Documentation/translations/it_IT/locking/lockstat.rst
new file mode 100644
index 000000000000..77972d971d7c
--- /dev/null
+++ b/Documentation/translations/it_IT/locking/lockstat.rst
@@ -0,0 +1,230 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-ita.rst
+
+=======================
+Statistiche sui blocchi
+=======================
+
+Cosa
+====
+
+Come suggerisce il nome, fornisce statistiche sui blocchi.
+
+
+Perché
+======
+
+Perché, tanto per fare un esempio, le contese sui blocchi possono influenzare
+significativamente le prestazioni.
+
+Come
+====
+
+*Lockdep* ha punti di collegamento nelle funzioni di blocco e inoltre
+mappa le istanze di blocco con le relative classi. Partiamo da questo punto
+(vedere Documentation/translations/it_IT/locking/lockdep-design.rst).
+Il grafico sottostante mostra la relazione che intercorre fra le
+funzioni di blocco e i vari punti di collegamenti che ci sono al loro
+interno::
+
+ __acquire
+ |
+ lock _____
+ | \
+ | __contended
+ | |
+ | <wait>
+ | _______/
+ |/
+ |
+ __acquired
+ |
+ .
+ <hold>
+ .
+ |
+ __release
+ |
+ unlock
+
+ lock, unlock - le classiche funzioni di blocco
+ __* - i punti di collegamento
+ <> - stati
+
+Grazie a questi punti di collegamento possiamo fornire le seguenti statistiche:
+
+con-bounces
+ - numero di contese su un blocco che riguarda dati di un processore
+
+contentions
+ - numero di acquisizioni di blocchi che hanno dovuto attendere
+
+wait time
+ min
+ - tempo minimo (diverso da zero) che sia mai stato speso in attesa di
+ un blocco
+
+ max
+ - tempo massimo che sia mai stato speso in attesa di un blocco
+
+ total
+ - tempo totale speso in attesa di un blocco
+
+ avg
+ - tempo medio speso in attesa di un blocco
+
+acq-bounces
+ - numero di acquisizioni di blocco che riguardavano i dati su un processore
+
+acquisitions
+ - numero di volte che un blocco è stato ottenuto
+
+hold time
+ min
+ - tempo minimo (diverso da zero) che sia mai stato speso trattenendo un blocco
+
+ max
+ - tempo massimo che sia mai stato speso trattenendo un blocco
+
+ total
+ - tempo totale di trattenimento di un blocco
+
+ avg
+ - tempo medio di trattenimento di un blocco
+
+Questi numeri vengono raccolti per classe di blocco, e per ogni stato di
+lettura/scrittura (quando applicabile).
+
+Inoltre, questa raccolta di statistiche tiene traccia di 4 punti di contesa
+per classe di blocco. Un punto di contesa è una chiamata che ha dovuto
+aspettare l'acquisizione di un blocco.
+
+Configurazione
+--------------
+
+Le statistiche sui blocchi si abilitano usando l'opzione di configurazione
+CONFIG_LOCK_STAT.
+
+Uso
+---
+
+Abilitare la raccolta di statistiche::
+
+ # echo 1 >/proc/sys/kernel/lock_stat
+
+Disabilitare la raccolta di statistiche::
+
+ # echo 0 >/proc/sys/kernel/lock_stat
+
+Per vedere le statistiche correnti sui blocchi::
+
+ ( i numeri di riga non fanno parte dell'output del comando, ma sono stati
+ aggiunti ai fini di questa spiegazione )
+
+ # less /proc/lock_stat
+
+ 01 lock_stat version 0.4
+ 02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg
+ 04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 05
+ 06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99
+ 07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77
+ 08 ---------------
+ 09 &mm->mmap_sem 1 [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
+ 10 &mm->mmap_sem 96 [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+ 11 &mm->mmap_sem 34 [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+ 12 &mm->mmap_sem 17 [<ffffffff81127e71>] vm_munmap+0x41/0x80
+ 13 ---------------
+ 14 &mm->mmap_sem 1 [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
+ 15 &mm->mmap_sem 60 [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
+ 16 &mm->mmap_sem 41 [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+ 17 &mm->mmap_sem 68 [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+ 18
+ 19.............................................................................................................................................................................................................................
+ 20
+ 21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48
+ 22 ---------------
+ 23 unix_table_lock 45 [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+ 24 unix_table_lock 47 [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+ 25 unix_table_lock 15 [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+ 26 unix_table_lock 5 [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+ 27 ---------------
+ 28 unix_table_lock 39 [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+ 29 unix_table_lock 49 [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+ 30 unix_table_lock 20 [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+ 31 unix_table_lock 4 [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+
+Questo estratto mostra le statistiche delle prime due classi di
+blocco. La riga 01 mostra la versione dell'output - la versione
+cambierà ogni volta che cambia il formato. Le righe dalla 02 alla 04
+rappresentano l'intestazione con la descrizione delle colonne. Le
+statistiche sono mostrate nelle righe dalla 05 alla 18 e dalla 20
+alla 31. Queste statistiche sono divise in due parti: le statistiche,
+seguite dai punti di contesa (righe 08 e 13) separati da un divisore.
+
+Le righe dalla 09 alla 12 mostrano i primi quattro punti di contesa
+registrati (il codice che tenta di acquisire un blocco) e le righe
+dalla 14 alla 17 mostrano i primi quattro punti contesi registrati
+(ovvero codice che ha acquisito un blocco). È possibile che nelle
+statistiche manchi il valore *max con-bounces*.
+
+Il primo blocco (righe dalla 05 alla 18) è di tipo lettura/scrittura e quindi
+mostra due righe prima del divisore. I punti di contesa non corrispondono alla
+descrizione delle colonne nell'intestazione; essi hanno due colonne: *punti di
+contesa* e *[<IP>] simboli*. Il secondo gruppo di punti di contesa sono i punti
+con cui si contende il blocco.
+
+La parte interna del tempo è espressa in us (microsecondi).
+
+Quando si ha a che fare con blocchi annidati si potrebbero vedere le
+sottoclassi di blocco::
+
+ 32...........................................................................................................................................................................................................................
+ 33
+ 34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82
+ 35 ---------
+ 36 &rq->lock 645 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+ 37 &rq->lock 297 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+ 38 &rq->lock 360 [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
+ 39 &rq->lock 428 [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
+ 40 ---------
+ 41 &rq->lock 77 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+ 42 &rq->lock 174 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+ 43 &rq->lock 4715 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+ 44 &rq->lock 893 [<ffffffff81340524>] schedule+0x157/0x7b8
+ 45
+ 46...........................................................................................................................................................................................................................
+ 47
+ 48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84
+ 49 -----------
+ 50 &rq->lock/1 11526 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+ 51 -----------
+ 52 &rq->lock/1 5645 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+ 53 &rq->lock/1 1224 [<ffffffff81340524>] schedule+0x157/0x7b8
+ 54 &rq->lock/1 4336 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+ 55 &rq->lock/1 181 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+
+La riga 48 mostra le statistiche per la seconda sottoclasse (/1) della
+classe *&irq->lock* (le sottoclassi partono da 0); in questo caso,
+come suggerito dalla riga 50, ``double_rq_lock`` tenta di acquisire un blocco
+annidato di due spinlock.
+
+Per vedere i blocco più contesi::
+
+ # grep : /proc/lock_stat | head
+ clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71
+ tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59
+ &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59
+ &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69
+ &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93
+ tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72
+ tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89
+ rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89
+ &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27
+ rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76
+
+Per cancellare le statistiche::
+
+ # echo 0 > /proc/lock_stat
diff --git a/Documentation/translations/it_IT/locking/locktorture.rst b/Documentation/translations/it_IT/locking/locktorture.rst
new file mode 100644
index 000000000000..87a0dbeaca77
--- /dev/null
+++ b/Documentation/translations/it_IT/locking/locktorture.rst
@@ -0,0 +1,181 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-ita.rst
+
+============================================
+Funzionamento del test *Kernel Lock Torture*
+============================================
+
+CONFIG_LOCK_TORTURE_TEST
+========================
+
+L'opzione di configurazione CONFIG_LOCK_TORTURE_TEST fornisce un
+modulo kernel che esegue delle verifiche che *torturano* le primitive di
+sincronizzazione del kernel. Se dovesse servire, il modulo kernel,
+'locktorture', può essere generato successivamente su un kernel che
+volete verificare. Periodicamente le verifiche stampano messaggi tramite
+``printk()`` e che quindi possono essere letti tramite ``dmesg`` (magari
+filtrate l'output con ``grep "torture"``). La verifica inizia quando
+il modulo viene caricato e termina quando viene rimosso. Questo
+programma si basa sulle modalità di verifica di RCU tramite rcutorture.
+
+Questa verifica consiste nella creazione di un certo numero di thread
+del kernel che acquisiscono un blocco e lo trattengono per una certa
+quantità di tempo così da simulare diversi comportamenti nelle sezioni
+critiche. La quantità di contese su un blocco può essere simulata
+allargando la sezione critica e/o creando più thread.
+
+
+Parametri del modulo
+====================
+
+Questo modulo ha i seguenti parametri:
+
+
+Specifici di locktorture
+------------------------
+
+nwriters_stress
+ Numero di thread del kernel che stresseranno l'acquisizione
+ esclusiva dei blocchi (scrittori). Il valore di base è il
+ doppio del numero di processori attivi presenti.
+
+nreaders_stress
+ Numero di thread del kernel che stresseranno l'acquisizione
+ condivisa dei blocchi (lettori). Il valore di base è lo stesso
+ di nwriters_stress. Se l'utente non ha specificato
+ nwriters_stress, allora entrambe i valori corrisponderanno
+ al numero di processori attivi presenti.
+
+torture_type
+ Tipo di blocco da verificare. Di base, solo gli spinlock
+ verranno verificati. Questo modulo può verificare anche
+ i seguenti tipi di blocchi:
+
+ - "lock_busted":
+ Simula un'incorretta implementazione del
+ blocco.
+
+ - "spin_lock":
+ coppie di spin_lock() e spin_unlock().
+
+ - "spin_lock_irq":
+ coppie di spin_lock_irq() e spin_unlock_irq().
+
+ - "rw_lock":
+ coppie di rwlock read/write lock() e unlock().
+
+ - "rw_lock_irq":
+ copie di rwlock read/write lock_irq() e
+ unlock_irq().
+
+ - "mutex_lock":
+ coppie di mutex_lock() e mutex_unlock().
+
+ - "rtmutex_lock":
+ coppie di rtmutex_lock() e rtmutex_unlock().
+ Il kernel deve avere CONFIG_RT_MUTEXES=y.
+
+ - "rwsem_lock":
+ coppie di semafori read/write down() e up().
+
+
+Generici dell'ambiente di sviluppo 'torture' (RCU + locking)
+------------------------------------------------------------
+
+shutdown_secs
+ Numero di secondi prima che la verifica termini e il sistema
+ venga spento. Il valore di base è zero, il che disabilita
+ la possibilità di terminare e spegnere. Questa funzionalità
+ può essere utile per verifiche automatizzate.
+
+onoff_interval
+ Numero di secondi fra ogni tentativo di esecuzione di
+ un'operazione casuale di CPU-hotplug. Di base è zero, il
+ che disabilita la funzionalità di CPU-hotplug. Nei kernel
+ con CONFIG_HOTPLUG_CPU=n, locktorture si rifiuterà, senza
+ dirlo, di effettuare una qualsiasi operazione di
+ CPU-hotplug indipendentemente dal valore specificato in
+ onoff_interval.
+
+onoff_holdoff
+ Numero di secondi da aspettare prima di iniziare le
+ operazioni di CPU-hotplug. Normalmente questo verrebbe
+ usato solamente quando locktorture è compilato come parte
+ integrante del kernel ed eseguito automaticamente all'avvio,
+ in questo caso è utile perché permette di non confondere
+ l'avvio con i processori che vanno e vengono. Questo
+ parametro è utile sono se CONFIG_HOTPLUG_CPU è abilitato.
+
+stat_interval
+ Numero di secondi fra una stampa (printk()) delle
+ statistiche e l'altra. Di base, locktorture riporta le
+ statistiche ogni 60 secondi. Impostando l'intervallo a 0
+ ha l'effetto di stampare le statistiche -solo- quando il
+ modulo viene rimosso.
+
+stutter
+ Durata della verifica prima di effettuare una pausa di
+ eguale durata. Di base "stutter=5", quindi si eseguono
+ verifiche e pause di (circa) cinque secondi.
+ L'impostazione di "stutter=0" fa si che la verifica
+ venga eseguita continuamente senza fermarsi.
+
+shuffle_interval
+ Il numero di secondi per cui un thread debba mantenere
+ l'affinità con un sottoinsieme di processori, di base è
+ 3 secondi. Viene usato assieme a test_no_idle_hz.
+
+verbose
+ Abilita le stampe di debug, via printk(). Di base è
+ abilitato. Queste informazioni aggiuntive sono per la
+ maggior parte relative ad errori di alto livello e resoconti
+ da parte dell'struttura 'torture'.
+
+
+Statistiche
+===========
+
+Le statistiche vengono stampate secondo il seguente formato::
+
+ spin_lock-torture: Writes: Total: 93746064 Max/Min: 0/0 Fail: 0
+ (A) (B) (C) (D) (E)
+
+ (A): tipo di lock sotto verifica -- parametro torture_type.
+
+ (B): Numero di acquisizione del blocco in scrittura. Se si ha a che fare
+ con una primitiva di lettura/scrittura apparirà di seguito anche una
+ seconda voce "Reads"
+
+ (C): Numero di volte che il blocco è stato acquisito
+
+ (D): Numero minimo e massimo di volte che un thread ha fallito
+ nell'acquisire il blocco
+
+ (E): valori true/false nel caso di errori durante l'acquisizione del blocco.
+ Questo dovrebbe dare un riscontro positivo -solo- se c'è un baco
+ nell'implementazione delle primitive di sincronizzazione. Altrimenti un
+ blocco non dovrebbe mai fallire (per esempio, spin_lock()).
+ Ovviamente lo stesso si applica per (C). Un semplice esempio è il tipo
+ "lock_busted".
+
+Uso
+===
+
+Il seguente script può essere utilizzato per verificare i blocchi::
+
+ #!/bin/sh
+
+ modprobe locktorture
+ sleep 3600
+ rmmod locktorture
+ dmesg | grep torture:
+
+L'output può essere manualmente ispezionato cercando il marcatore d'errore
+"!!!". Ovviamente potreste voler creare degli script più elaborati che
+verificano automaticamente la presenza di errori. Il comando "rmmod" forza la
+stampa (usando printk()) di "SUCCESS", "FAILURE", oppure "RCU_HOTPLUG". I primi
+due si piegano da soli, mentre l'ultimo indica che non stati trovati problemi di
+sincronizzazione, tuttavia ne sono stati trovati in CPU-hotplug.
+
+Consultate anche: Documentation/translations/it_IT/RCU/torture.rst
diff --git a/Documentation/translations/it_IT/locking/locktypes.rst b/Documentation/translations/it_IT/locking/locktypes.rst
new file mode 100644
index 000000000000..1c7056283b9d
--- /dev/null
+++ b/Documentation/translations/it_IT/locking/locktypes.rst
@@ -0,0 +1,547 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-ita.rst
+
+.. _it_kernel_hacking_locktypes:
+
+========================================
+Tipologie di blocco e le loro istruzioni
+========================================
+
+Introduzione
+============
+
+Il kernel fornisce un certo numero di primitive di blocco che possiamo dividere
+in tre categorie:
+
+ - blocchi ad attesa con sospensione
+ - blocchi locali per CPU
+ - blocchi ad attesa attiva
+
+Questo documento descrive questi tre tipi e fornisce istruzioni su come
+annidarli, ed usarli su kernel PREEMPT_RT.
+
+Categorie di blocchi
+====================
+
+Blocchi ad attesa con sospensione
+---------------------------------
+
+I blocchi ad attesa con sospensione possono essere acquisiti solo in un contesti
+dov'è possibile la prelazione.
+
+Diverse implementazioni permettono di usare try_lock() anche in altri contesti,
+nonostante ciò è bene considerare anche la sicurezza dei corrispondenti
+unlock(). Inoltre, vanno prese in considerazione anche le varianti di *debug*
+di queste primitive. Insomma, non usate i blocchi ad attesa con sospensioni in
+altri contesti a meno che proprio non vi siano alternative.
+
+In questa categoria troviamo:
+
+ - mutex
+ - rt_mutex
+ - semaphore
+ - rw_semaphore
+ - ww_mutex
+ - percpu_rw_semaphore
+
+Nei kernel con PREEMPT_RT, i seguenti blocchi sono convertiti in blocchi ad
+attesa con sospensione:
+
+ - local_lock
+ - spinlock_t
+ - rwlock_t
+
+Blocchi locali per CPU
+----------------------
+
+ - local_lock
+
+Su kernel non-PREEMPT_RT, le funzioni local_lock gestiscono le primitive di
+disabilitazione di prelazione ed interruzioni. Al contrario di altri meccanismi,
+la disabilitazione della prelazione o delle interruzioni sono puri meccanismi
+per il controllo della concorrenza su una CPU e quindi non sono adatti per la
+gestione della concorrenza inter-CPU.
+
+Blocchi ad attesa attiva
+------------------------
+
+ - raw_spinlcok_t
+ - bit spinlocks
+
+ Nei kernel non-PREEMPT_RT, i seguenti blocchi sono ad attesa attiva:
+
+ - spinlock_t
+ - rwlock_t
+
+Implicitamente, i blocchi ad attesa attiva disabilitano la prelazione e le
+funzioni lock/unlock hanno anche dei suffissi per gestire il livello di
+protezione:
+
+ =================== =========================================================================
+ _bh() disabilita / abilita *bottom halves* (interruzioni software)
+ _irq() disabilita / abilita le interruzioni
+ _irqsave/restore() salva e disabilita le interruzioni / ripristina ed attiva le interruzioni
+ =================== =========================================================================
+
+Semantica del proprietario
+==========================
+
+Eccetto i semafori, i sopracitati tipi di blocchi hanno tutti una semantica
+molto stringente riguardo al proprietario di un blocco:
+
+ Il contesto (attività) che ha acquisito il blocco deve rilasciarlo
+
+I semafori rw_semaphores hanno un'interfaccia speciale che permette anche ai non
+proprietari del blocco di rilasciarlo per i lettori.
+
+rtmutex
+=======
+
+I blocchi a mutua esclusione RT (*rtmutex*) sono un sistema a mutua esclusione
+con supporto all'ereditarietà della priorità (PI).
+
+Questo meccanismo ha delle limitazioni sui kernel non-PREEMPT_RT dovuti alla
+prelazione e alle sezioni con interruzioni disabilitate.
+
+Chiaramente, questo meccanismo non può avvalersi della prelazione su una sezione
+dove la prelazione o le interruzioni sono disabilitate; anche sui kernel
+PREEMPT_RT. Tuttavia, i kernel PREEMPT_RT eseguono la maggior parte delle
+sezioni in contesti dov'è possibile la prelazione, specialmente in contesti
+d'interruzione (anche software). Questa conversione permette a spinlock_t e
+rwlock_t di essere implementati usando rtmutex.
+
+semaphore
+=========
+
+La primitiva semaphore implementa un semaforo con contatore.
+
+I semafori vengono spesso utilizzati per la serializzazione e l'attesa, ma per
+nuovi casi d'uso si dovrebbero usare meccanismi diversi, come mutex e
+completion.
+
+semaphore e PREEMPT_RT
+----------------------
+
+I kernel PREEMPT_RT non cambiano l'implementazione di semaphore perché non hanno
+un concetto di proprietario, dunque impediscono a PREEMPT_RT d'avere
+l'ereditarietà della priorità sui semafori. Un proprietario sconosciuto non può
+ottenere una priorità superiore. Di consequenza, bloccarsi sui semafori porta
+all'inversione di priorità.
+
+
+rw_semaphore
+============
+
+Il blocco rw_semaphore è un meccanismo che permette più lettori ma un solo scrittore.
+
+Sui kernel non-PREEMPT_RT l'implementazione è imparziale, quindi previene
+l'inedia dei processi scrittori.
+
+Questi blocchi hanno una semantica molto stringente riguardo il proprietario, ma
+offre anche interfacce speciali che permettono ai processi non proprietari di
+rilasciare un processo lettore. Queste interfacce funzionano indipendentemente
+dalla configurazione del kernel.
+
+rw_semaphore e PREEMPT_RT
+-------------------------
+
+I kernel PREEMPT_RT sostituiscono i rw_semaphore con un'implementazione basata
+su rt_mutex, e questo ne modifica l'imparzialità:
+
+ Dato che uno scrittore rw_semaphore non può assicurare la propria priorità ai
+ suoi lettori, un lettore con priorità più bassa che ha subito la prelazione
+ continuerà a trattenere il blocco, quindi porta all'inedia anche gli scrittori
+ con priorità più alta. Per contro, dato che i lettori possono garantire la
+ propria priorità agli scrittori, uno scrittore a bassa priorità che subisce la
+ prelazione vedrà la propria priorità alzata finché non rilascerà il blocco, e
+ questo preverrà l'inedia dei processi lettori a causa di uno scrittore.
+
+
+local_lock
+==========
+
+I local_lock forniscono nomi agli ambiti di visibilità delle sezioni critiche
+protette tramite la disattivazione della prelazione o delle interruzioni.
+
+Sui kernel non-PREEMPT_RT le operazioni local_lock si traducono
+nell'abilitazione o disabilitazione della prelazione o le interruzioni.
+
+ =============================== ======================
+ local_lock(&llock) preempt_disable()
+ local_unlock(&llock) preempt_enable()
+ local_lock_irq(&llock) local_irq_disable()
+ local_unlock_irq(&llock) local_irq_enable()
+ local_lock_irqsave(&llock) local_irq_save()
+ local_unlock_irqrestore(&llock) local_irq_restore()
+ =============================== ======================
+
+Gli ambiti di visibilità con nome hanno due vantaggi rispetto alle primitive di
+base:
+
+ - Il nome del blocco permette di fare un'analisi statica, ed è anche chiaro su
+ cosa si applichi la protezione cosa che invece non si può fare con le
+ classiche primitive in quanto sono opache e senza alcun ambito di
+ visibilità.
+
+ - Se viene abilitato lockdep, allora local_lock ottiene un lockmap che
+ permette di verificare la bontà della protezione. Per esempio, questo può
+ identificare i casi dove una funzione usa preempt_disable() come meccanismo
+ di protezione in un contesto d'interruzione (anche software). A parte
+ questo, lockdep_assert_held(&llock) funziona come tutte le altre primitive
+ di sincronizzazione.
+
+local_lock e PREEMPT_RT
+-------------------------
+
+I kernel PREEMPT_RT sostituiscono local_lock con uno spinlock_t per CPU, quindi
+ne cambia la semantica:
+
+ - Tutte le modifiche a spinlock_t si applicano anche a local_lock
+
+L'uso di local_lock
+-------------------
+
+I local_lock dovrebbero essere usati su kernel non-PREEMPT_RT quando la
+disabilitazione della prelazione o delle interruzioni è il modo più adeguato per
+gestire l'accesso concorrente a strutture dati per CPU.
+
+Questo meccanismo non è adatto alla protezione da prelazione o interruzione su
+kernel PREEMPT_RT dato che verrà convertito in spinlock_t.
+
+
+raw_spinlock_t e spinlock_t
+===========================
+
+raw_spinlock_t
+--------------
+
+I blocco raw_spinlock_t è un blocco ad attesa attiva su tutti i tipi di kernel,
+incluso quello PREEMPT_RT. Usate raw_spinlock_t solo in sezioni critiche nel
+cuore del codice, nella gestione delle interruzioni di basso livello, e in posti
+dove è necessario disabilitare la prelazione o le interruzioni. Per esempio, per
+accedere in modo sicuro lo stato dell'hardware. A volte, i raw_spinlock_t
+possono essere usati quando la sezione critica è minuscola, per evitare gli
+eccessi di un rtmutex.
+
+spinlock_t
+----------
+
+Il significato di spinlock_t cambia in base allo stato di PREEMPT_RT.
+
+Sui kernel non-PREEMPT_RT, spinlock_t si traduce in un raw_spinlock_t ed ha
+esattamente lo stesso significato.
+
+spinlock_t e PREEMPT_RT
+-----------------------
+
+Sui kernel PREEMPT_RT, spinlock_t ha un'implementazione dedicata che si basa
+sull'uso di rt_mutex. Questo ne modifica il significato:
+
+ - La prelazione non viene disabilitata.
+
+ - I suffissi relativi alla interruzioni (_irq, _irqsave / _irqrestore) per le
+ operazioni spin_lock / spin_unlock non hanno alcun effetto sullo stato delle
+ interruzioni della CPU.
+
+ - I suffissi relativi alle interruzioni software (_bh()) disabilitano i
+ relativi gestori d'interruzione.
+
+ I kernel non-PREEMPT_RT disabilitano la prelazione per ottenere lo stesso effetto.
+
+ I kernel PREEMPT_RT usano un blocco per CPU per la serializzazione, il che
+ permette di tenere attiva la prelazione. Il blocco disabilita i gestori
+ d'interruzione software e previene la rientranza vista la prelazione attiva.
+
+A parte quanto appena discusso, i kernel PREEMPT_RT preservano il significato
+di tutti gli altri aspetti di spinlock_t:
+
+ - Le attività che trattengono un blocco spinlock_t non migrano su altri
+ processori. Disabilitando la prelazione, i kernel non-PREEMPT_RT evitano la
+ migrazione. Invece, i kernel PREEMPT_RT disabilitano la migrazione per
+ assicurarsi che i puntatori a variabili per CPU rimangano validi anche
+ quando un'attività subisce la prelazione.
+
+ - Lo stato di un'attività si mantiene durante le acquisizioni del blocco al
+ fine di garantire che le regole basate sullo stato delle attività si possano
+ applicare a tutte le configurazioni del kernel. I kernel non-PREEMPT_RT
+ lasciano lo stato immutato. Tuttavia, la funzionalità PREEMPT_RT deve
+ cambiare lo stato se l'attività si blocca durante l'acquisizione. Dunque,
+ salva lo stato attuale prima di bloccarsi ed il rispettivo risveglio lo
+ ripristinerà come nell'esempio seguente::
+
+ task->state = TASK_INTERRUPTIBLE
+ lock()
+ block()
+ task->saved_state = task->state
+ task->state = TASK_UNINTERRUPTIBLE
+ schedule()
+ lock wakeup
+ task->state = task->saved_state
+
+ Altri tipi di risvegli avrebbero impostato direttamente lo stato a RUNNING,
+ ma in questo caso non avrebbe funzionato perché l'attività deve rimanere
+ bloccata fintanto che il blocco viene trattenuto. Quindi, lo stato salvato
+ viene messo a RUNNING quando il risveglio di un non-blocco cerca di
+ risvegliare un'attività bloccata in attesa del rilascio di uno spinlock. Poi,
+ quando viene completata l'acquisizione del blocco, il suo risveglio
+ ripristinerà lo stato salvato, in questo caso a RUNNING::
+
+ task->state = TASK_INTERRUPTIBLE
+ lock()
+ block()
+ task->saved_state = task->state
+ task->state = TASK_UNINTERRUPTIBLE
+ schedule()
+ non lock wakeup
+ task->saved_state = TASK_RUNNING
+
+ lock wakeup
+ task->state = task->saved_state
+
+ Questo garantisce che il vero risveglio non venga perso.
+
+rwlock_t
+========
+
+Il blocco rwlock_t è un meccanismo che permette più lettori ma un solo scrittore.
+
+Sui kernel non-PREEMPT_RT questo è un blocco ad attesa e per i suoi suffissi si
+applicano le stesse regole per spinlock_t. La sua implementazione è imparziale,
+quindi previene l'inedia dei processi scrittori.
+
+rwlock_t e PREEMPT_RT
+---------------------
+
+Sui kernel PREEMPT_RT rwlock_t ha un'implementazione dedicata che si basa
+sull'uso di rt_mutex. Questo ne modifica il significato:
+
+ - Tutte le modifiche fatte a spinlock_t si applicano anche a rwlock_t.
+
+ - Dato che uno scrittore rw_semaphore non può assicurare la propria priorità ai
+ suoi lettori, un lettore con priorità più bassa che ha subito la prelazione
+ continuerà a trattenere il blocco, quindi porta all'inedia anche gli
+ scrittori con priorità più alta. Per contro, dato che i lettori possono
+ garantire la propria priorità agli scrittori, uno scrittore a bassa priorità
+ che subisce la prelazione vedrà la propria priorità alzata finché non
+ rilascerà il blocco, e questo preverrà l'inedia dei processi lettori a causa
+ di uno scrittore.
+
+
+Precisazioni su PREEMPT_RT
+==========================
+
+local_lock su RT
+----------------
+
+Sui kernel PREEMPT_RT Ci sono alcune implicazioni dovute alla conversione di
+local_lock in un spinlock_t. Per esempio, su un kernel non-PREEMPT_RT il
+seguente codice funzionerà come ci si aspetta::
+
+ local_lock_irq(&local_lock);
+ raw_spin_lock(&lock);
+
+ed è equivalente a::
+
+ raw_spin_lock_irq(&lock);
+
+Ma su un kernel PREEMPT_RT questo codice non funzionerà perché local_lock_irq()
+si traduce in uno spinlock_t per CPU che non disabilita né le interruzioni né la
+prelazione. Il seguente codice funzionerà su entrambe i kernel con o senza
+PREEMPT_RT::
+
+ local_lock_irq(&local_lock);
+ spin_lock(&lock);
+
+Un altro dettaglio da tenere a mente con local_lock è che ognuno di loro ha un
+ambito di protezione ben preciso. Dunque, la seguente sostituzione è errate::
+
+
+ func1()
+ {
+ local_irq_save(flags); -> local_lock_irqsave(&local_lock_1, flags);
+ func3();
+ local_irq_restore(flags); -> local_unlock_irqrestore(&local_lock_1, flags);
+ }
+
+ func2()
+ {
+ local_irq_save(flags); -> local_lock_irqsave(&local_lock_2, flags);
+ func3();
+ local_irq_restore(flags); -> local_unlock_irqrestore(&local_lock_2, flags);
+ }
+
+ func3()
+ {
+ lockdep_assert_irqs_disabled();
+ access_protected_data();
+ }
+
+Questo funziona correttamente su un kernel non-PREEMPT_RT, ma su un kernel
+PREEMPT_RT local_lock_1 e local_lock_2 sono distinti e non possono serializzare
+i chiamanti di func3(). L'*assert* di lockdep verrà attivato su un kernel
+PREEMPT_RT perché local_lock_irqsave() non disabilita le interruzione a casa
+della specifica semantica di spinlock_t in PREEMPT_RT. La corretta sostituzione
+è::
+
+ func1()
+ {
+ local_irq_save(flags); -> local_lock_irqsave(&local_lock, flags);
+ func3();
+ local_irq_restore(flags); -> local_unlock_irqrestore(&local_lock, flags);
+ }
+
+ func2()
+ {
+ local_irq_save(flags); -> local_lock_irqsave(&local_lock, flags);
+ func3();
+ local_irq_restore(flags); -> local_unlock_irqrestore(&local_lock, flags);
+ }
+
+ func3()
+ {
+ lockdep_assert_held(&local_lock);
+ access_protected_data();
+ }
+
+spinlock_t e rwlock_t
+---------------------
+
+Ci sono alcune conseguenze di cui tener conto dal cambiamento di semantica di
+spinlock_t e rwlock_t sui kernel PREEMPT_RT. Per esempio, sui kernel non
+PREEMPT_RT il seguente codice funziona come ci si aspetta::
+
+ local_irq_disable();
+ spin_lock(&lock);
+
+ed è equivalente a::
+
+ spin_lock_irq(&lock);
+
+Lo stesso vale per rwlock_t e le varianti con _irqsave().
+
+Sui kernel PREEMPT_RT questo codice non funzionerà perché gli rtmutex richiedono
+un contesto con la possibilità di prelazione. Al suo posto, usate
+spin_lock_irq() o spin_lock_irqsave() e le loro controparti per il rilascio. I
+kernel PREEMPT_RT offrono un meccanismo local_lock per i casi in cui la
+disabilitazione delle interruzioni ed acquisizione di un blocco devono rimanere
+separati. Acquisire un local_lock àncora un processo ad una CPU permettendo cose
+come un'acquisizione di un blocco con interruzioni disabilitate per singola CPU.
+
+Il tipico scenario è quando si vuole proteggere una variabile di processore nel
+contesto di un thread::
+
+
+ struct foo *p = get_cpu_ptr(&var1);
+
+ spin_lock(&p->lock);
+ p->count += this_cpu_read(var2);
+
+Questo codice è corretto su un kernel non-PREEMPT_RT, ma non lo è su un
+PREEMPT_RT. La modifica della semantica di spinlock_t su PREEMPT_RT non permette
+di acquisire p->lock perché, implicitamente, get_cpu_ptr() disabilita la
+prelazione. La seguente sostituzione funzionerà su entrambe i kernel::
+
+ struct foo *p;
+
+ migrate_disable();
+ p = this_cpu_ptr(&var1);
+ spin_lock(&p->lock);
+ p->count += this_cpu_read(var2);
+
+La funzione migrate_disable() assicura che il processo venga tenuto sulla CPU
+corrente, e di conseguenza garantisce che gli accessi per-CPU alle variabili var1 e
+var2 rimangano sulla stessa CPU fintanto che il processo rimane prelabile.
+
+La sostituzione con migrate_disable() non funzionerà nel seguente scenario::
+
+ func()
+ {
+ struct foo *p;
+
+ migrate_disable();
+ p = this_cpu_ptr(&var1);
+ p->val = func2();
+
+Questo non funziona perché migrate_disable() non protegge dal ritorno da un
+processo che aveva avuto il diritto di prelazione. Una sostituzione più adatta
+per questo caso è::
+
+ func()
+ {
+ struct foo *p;
+
+ local_lock(&foo_lock);
+ p = this_cpu_ptr(&var1);
+ p->val = func2();
+
+Su un kernel non-PREEMPT_RT, questo codice protegge dal rientro disabilitando la
+prelazione. Su un kernel PREEMPT_RT si ottiene lo stesso risultato acquisendo lo
+spinlock di CPU.
+
+raw_spinlock_t su RT
+--------------------
+
+Acquisire un raw_spinlock_t disabilita la prelazione e possibilmente anche le
+interruzioni, quindi la sezione critica deve evitare di acquisire uno spinlock_t
+o rwlock_t. Per esempio, la sezione critica non deve fare allocazioni di
+memoria. Su un kernel non-PREEMPT_RT il seguente codice funziona perfettamente::
+
+ raw_spin_lock(&lock);
+ p = kmalloc(sizeof(*p), GFP_ATOMIC);
+
+Ma lo stesso codice non funziona su un kernel PREEMPT_RT perché l'allocatore di
+memoria può essere oggetto di prelazione e quindi non può essere chiamato in un
+contesto atomico. Tuttavia, si può chiamare l'allocatore di memoria quando si
+trattiene un blocco *non-raw* perché non disabilitano la prelazione sui kernel
+PREEMPT_RT::
+
+ spin_lock(&lock);
+ p = kmalloc(sizeof(*p), GFP_ATOMIC);
+
+
+bit spinlocks
+-------------
+
+I kernel PREEMPT_RT non possono sostituire i bit spinlock perché un singolo bit
+è troppo piccolo per farci stare un rtmutex. Dunque, la semantica dei bit
+spinlock è mantenuta anche sui kernel PREEMPT_RT. Quindi, le precisazioni fatte
+per raw_spinlock_t valgono anche qui.
+
+In PREEMPT_RT, alcuni bit spinlock sono sostituiti con normali spinlock_t usando
+condizioni di preprocessore in base a dove vengono usati. Per contro, questo non
+serve quando si sostituiscono gli spinlock_t. Invece, le condizioni poste sui
+file d'intestazione e sul cuore dell'implementazione della sincronizzazione
+permettono al compilatore di effettuare la sostituzione in modo trasparente.
+
+
+Regole d'annidamento dei tipi di blocchi
+========================================
+
+Le regole principali sono:
+
+ - I tipi di blocco appartenenti alla stessa categoria possono essere annidati
+ liberamente a patto che si rispetti l'ordine di blocco al fine di evitare
+ stalli.
+
+ - I blocchi con sospensione non possono essere annidati in blocchi del tipo
+ CPU locale o ad attesa attiva
+
+ - I blocchi ad attesa attiva e su CPU locale possono essere annidati nei
+ blocchi ad attesa con sospensione.
+
+ - I blocchi ad attesa attiva possono essere annidati in qualsiasi altro tipo.
+
+Queste limitazioni si applicano ad entrambe i kernel con o senza PREEMPT_RT.
+
+Il fatto che un kernel PREEMPT_RT cambi i blocchi spinlock_t e rwlock_t dal tipo
+ad attesa attiva a quello con sospensione, e che sostituisca local_lock con uno
+spinlock_t per CPU, significa che non possono essere acquisiti quando si è in un
+blocco raw_spinlock. Ne consegue il seguente ordine d'annidamento:
+
+ 1) blocchi ad attesa con sospensione
+ 2) spinlock_t, rwlock_t, local_lock
+ 3) raw_spinlock_t e bit spinlocks
+
+Se queste regole verranno violate, allora lockdep se ne accorgerà e questo sia
+con o senza PREEMPT_RT.
diff --git a/Documentation/translations/it_IT/networking/netdev-FAQ.rst b/Documentation/translations/it_IT/networking/netdev-FAQ.rst
deleted file mode 100644
index 8a1e049585c0..000000000000
--- a/Documentation/translations/it_IT/networking/netdev-FAQ.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-.. include:: ../disclaimer-ita.rst
-
-:Original: :ref:`Documentation/process/maintainer-netdev.rst <netdev-FAQ>`
-
-.. _it_netdev-FAQ:
-
-==========
-netdev FAQ
-==========
-
-.. warning::
-
- TODO ancora da tradurre
diff --git a/Documentation/translations/it_IT/process/coding-style.rst b/Documentation/translations/it_IT/process/coding-style.rst
index 5f244e16f511..284a75ac19f8 100644
--- a/Documentation/translations/it_IT/process/coding-style.rst
+++ b/Documentation/translations/it_IT/process/coding-style.rst
@@ -575,9 +575,9 @@ due parti ``err_free_bar:`` e ``err_free_foo:``:
.. code-block:: c
- err_free_bar:
+ err_free_bar:
kfree(foo->bar);
- err_free_foo:
+ err_free_foo:
kfree(foo);
return ret;
@@ -671,7 +671,7 @@ segue nel vostro file .emacs:
(c-offsets-alist . (
(arglist-close . c-lineup-arglist-tabs-only)
(arglist-cont-nonempty .
- (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only))
+ (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only))
(arglist-intro . +)
(brace-list-intro . +)
(c . c-lineup-C-comments)
diff --git a/Documentation/translations/it_IT/subsystem-apis.rst b/Documentation/translations/it_IT/subsystem-apis.rst
new file mode 100644
index 000000000000..d179af60c26d
--- /dev/null
+++ b/Documentation/translations/it_IT/subsystem-apis.rst
@@ -0,0 +1,47 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================================
+Documentazione dei sottosistemi del kernel
+==========================================
+
+In questa parte della documentazione si entra nel dettaglio di come funzionano
+i sottosistemi specifici del kernel dal punto di vista di uno sviluppatore del
+kernel. Molte delle informazioni qui contenute provengono direttamente dai
+sorgenti del kernel, con aggiunte di materiale dove è necessario (anche se
+talora *non* è stato aggiunto tutto ciò che era necessario).
+
+Sottosistemi principali
+-----------------------
+
+.. toctree::
+ :maxdepth: 1
+
+ core-api/index
+
+Interfacce uomo-macchina
+------------------------
+
+.. toctree::
+ :maxdepth: 1
+
+
+Interfacce di rete
+------------------
+
+.. toctree::
+ :maxdepth: 1
+
+Interfacce per l'archiviazione
+------------------------------
+
+.. toctree::
+ :maxdepth: 1
+
+
+Interfacce varie
+----------------
+
+.. toctree::
+ :maxdepth: 1
+
+ i2c/index
diff --git a/Documentation/translations/ja_JP/index.rst b/Documentation/translations/ja_JP/index.rst
index 43b9fb7246d3..0b476b429e3b 100644
--- a/Documentation/translations/ja_JP/index.rst
+++ b/Documentation/translations/ja_JP/index.rst
@@ -11,7 +11,7 @@
.. toctree::
:maxdepth: 1
- howto
+ process/howto
.. raw:: latex
diff --git a/Documentation/translations/ja_JP/howto.rst b/Documentation/translations/ja_JP/process/howto.rst
index 8d856ebe873c..8d856ebe873c 100644
--- a/Documentation/translations/ja_JP/howto.rst
+++ b/Documentation/translations/ja_JP/process/howto.rst
diff --git a/Documentation/translations/sp_SP/process/coding-style.rst b/Documentation/translations/sp_SP/process/coding-style.rst
index a0261ba5b902..a37274764371 100644
--- a/Documentation/translations/sp_SP/process/coding-style.rst
+++ b/Documentation/translations/sp_SP/process/coding-style.rst
@@ -604,9 +604,9 @@ Normalmente la solución para esto es dividirlo en dos etiquetas de error
.. code-block:: c
- err_free_bar:
+ err_free_bar:
kfree(foo->bar);
- err_free_foo:
+ err_free_foo:
kfree(foo);
return ret;
@@ -698,7 +698,7 @@ sanos. Para hacer esto último, puede pegar lo siguiente en su archivo
(c-offsets-alist . (
(arglist-close . c-lineup-arglist-tabs-only)
(arglist-cont-nonempty .
- (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only))
+ (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only))
(arglist-intro . +)
(brace-list-intro . +)
(c . c-lineup-C-comments)
diff --git a/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst b/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst
index c261b428b3f0..7d4d694967c7 100644
--- a/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst
+++ b/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst
@@ -273,7 +273,7 @@ revelada involucrada. La lista de embajadores actuales:
IBM Power Anton Blanchard <anton@linux.ibm.com>
IBM Z Christian Borntraeger <borntraeger@de.ibm.com>
Intel Tony Luck <tony.luck@intel.com>
- Qualcomm Trilok Soni <tsoni@codeaurora.org>
+ Qualcomm Trilok Soni <quic_tsoni@quicinc.com>
Samsung Javier González <javier.gonz@samsung.com>
Microsoft James Morris <jamorris@linux.microsoft.com>
diff --git a/Documentation/translations/sp_SP/process/researcher-guidelines.rst b/Documentation/translations/sp_SP/process/researcher-guidelines.rst
index 462b3290b7b8..deccc908a68d 100644
--- a/Documentation/translations/sp_SP/process/researcher-guidelines.rst
+++ b/Documentation/translations/sp_SP/process/researcher-guidelines.rst
@@ -147,4 +147,4 @@ Si no se puede encontrar a nadie para revisar internamente los parches y necesit
ayuda para encontrar a esa persona, o si tiene alguna otra pregunta relacionada
con este documento y las expectativas de la comunidad de desarrolladores, por
favor contacte con la lista de correo privada Technical Advisory Board:
-<tech-board@lists.linux-foundation.org>.
+<tech-board@groups.linuxfoundation.org>.
diff --git a/Documentation/translations/zh_CN/process/coding-style.rst b/Documentation/translations/zh_CN/process/coding-style.rst
index fa28ef0a7fee..3bc2810b151d 100644
--- a/Documentation/translations/zh_CN/process/coding-style.rst
+++ b/Documentation/translations/zh_CN/process/coding-style.rst
@@ -523,9 +523,9 @@ Linux 里这是æ倡的åšæ³•ï¼Œå› ä¸ºè¿™æ ·å¯ä»¥å¾ˆç®€å•çš„给读者æä¾›æ›
.. code-block:: c
- err_free_bar:
+ err_free_bar:
kfree(foo->bar);
- err_free_foo:
+ err_free_foo:
kfree(foo);
return ret;
diff --git a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
index cf5f1fca3d92..c90ecb557811 100644
--- a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
+++ b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
@@ -177,7 +177,7 @@ CVE分é…
AMD Tom Lendacky <thomas.lendacky@amd.com>
IBM
Intel Tony Luck <tony.luck@intel.com>
- Qualcomm Trilok Soni <tsoni@codeaurora.org>
+ Qualcomm Trilok Soni <quic_tsoni@quicinc.com>
Microsoft Sasha Levin <sashal@kernel.org>
VMware
diff --git a/Documentation/translations/zh_CN/userspace-api/accelerators/ocxl.rst b/Documentation/translations/zh_CN/userspace-api/accelerators/ocxl.rst
index 845b932bf935..aefad87e9099 100644
--- a/Documentation/translations/zh_CN/userspace-api/accelerators/ocxl.rst
+++ b/Documentation/translations/zh_CN/userspace-api/accelerators/ocxl.rst
@@ -53,7 +53,7 @@ OpenCAPI定义了一个在物ç†é“¾è·¯å±‚上实现的数æ®é“¾è·¯å±‚(TL)和ä
Processor:处ç†å™¨
Memory:内存
- Accelerated Function Unit:加速函数å•å…ƒ
+ Accelerated Function Unit:加速功能å•å…ƒ
@@ -97,7 +97,7 @@ OpenCAPI拥有AFUå‘主机进程å‘é€ä¸­æ–­çš„å¯èƒ½æ€§ã€‚它通过定义在传
========
驱动为æ¯ä¸ªåœ¨ç‰©ç†è®¾å¤‡ä¸Šå‘现的AFU创建一个字符设备。一个物ç†è®¾å¤‡å¯èƒ½æ‹¥æœ‰å¤šä¸ª
-函数,一个函数å¯ä»¥æ‹¥æœ‰å¤šä¸ªAFU。ä¸è¿‡ç¼–写这篇文档之时,åªå¯¹å¯¼å‡ºä¸€ä¸ªAFU的设备
+功能,一个功能å¯ä»¥æ‹¥æœ‰å¤šä¸ªAFU。ä¸è¿‡ç¼–写这篇文档之时,åªå¯¹å¯¼å‡ºä¸€ä¸ªAFU的设备
测试过。
字符设备å¯ä»¥åœ¨ /dev/ocxl/ 中被找到,其命å为:
diff --git a/Documentation/translations/zh_TW/process/coding-style.rst b/Documentation/translations/zh_TW/process/coding-style.rst
index f11dbb65ca21..c7ac504f6f40 100644
--- a/Documentation/translations/zh_TW/process/coding-style.rst
+++ b/Documentation/translations/zh_TW/process/coding-style.rst
@@ -526,9 +526,9 @@ Linux è£é€™æ˜¯æ倡的åšæ³•ï¼Œå› çˆ²é€™æ¨£å¯ä»¥å¾ˆç°¡å–®çš„給讀者æä¾›æ›
.. code-block:: c
- err_free_bar:
+ err_free_bar:
kfree(foo->bar);
- err_free_foo:
+ err_free_foo:
kfree(foo);
return ret;
diff --git a/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst b/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst
index 3cce7db2ab7e..93d21fd88910 100644
--- a/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst
+++ b/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst
@@ -180,7 +180,7 @@ CVE分é…
AMD Tom Lendacky <thomas.lendacky@amd.com>
IBM
Intel Tony Luck <tony.luck@intel.com>
- Qualcomm Trilok Soni <tsoni@codeaurora.org>
+ Qualcomm Trilok Soni <quic_tsoni@quicinc.com>
Microsoft Sasha Levin <sashal@kernel.org>
VMware
diff --git a/Documentation/usb/gadget-testing.rst b/Documentation/usb/gadget-testing.rst
index 8cd62c466d20..077dfac7ed98 100644
--- a/Documentation/usb/gadget-testing.rst
+++ b/Documentation/usb/gadget-testing.rst
@@ -448,17 +448,17 @@ Function-specific configfs interface
The function name to use when creating the function directory is "ncm".
The NCM function provides these attributes in its function directory:
- =============== ==================================================
- ifname network device interface name associated with this
- function instance
- qmult queue length multiplier for high and super speed
- host_addr MAC address of host's end of this
- Ethernet over USB link
- dev_addr MAC address of device's end of this
- Ethernet over USB link
- max_segment_size Segment size required for P2P connections. This
- will set MTU to (max_segment_size - 14 bytes)
- =============== ==================================================
+ ======================= ==================================================
+ ifname network device interface name associated with this
+ function instance
+ qmult queue length multiplier for high and super speed
+ host_addr MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr MAC address of device's end of this
+ Ethernet over USB link
+ max_segment_size Segment size required for P2P connections. This
+ will set MTU to 14 bytes
+ ======================= ==================================================
and after creating the functions/ncm.<instance name> they contain default
values: qmult is 5, dev_addr and host_addr are randomly selected.
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index 09f61bd2ac2e..67d663cf2ff2 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -9,31 +9,58 @@ While much of the kernel's user-space API is documented elsewhere
also be found in the kernel tree itself. This manual is intended to be the
place where this information is gathered.
+
+System calls
+============
+
+.. toctree::
+ :maxdepth: 1
+
+ unshare
+ futex2
+ ebpf/index
+ ioctl/index
+
+Security-related interfaces
+===========================
+
.. toctree::
- :caption: Table of contents
- :maxdepth: 2
+ :maxdepth: 1
no_new_privs
seccomp_filter
landlock
- unshare
+ lsm
spec_ctrl
+ tee
+
+Devices and I/O
+===============
+
+.. toctree::
+ :maxdepth: 1
+
accelerators/ocxl
dma-buf-alloc-exchange
- ebpf/index
- ELF
- ioctl/index
iommu
iommufd
media/index
+ dcdbas
+ vduse
+ isapnp
+
+Everything else
+===============
+
+.. toctree::
+ :maxdepth: 1
+
+ ELF
netlink/index
sysfs-platform_profile
vduse
futex2
- lsm
- tee
- isapnp
- dcdbas
+ perf_ring_buffer
.. only:: subproject and html
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 457e16f06e04..c472423412bf 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -82,8 +82,9 @@ Code Seq# Include File Comments
0x10 00-0F drivers/char/s390/vmcp.h
0x10 10-1F arch/s390/include/uapi/sclp_ctl.h
0x10 20-2F arch/s390/include/uapi/asm/hypfs.h
-0x12 all linux/fs.h
+0x12 all linux/fs.h BLK* ioctls
linux/blkpg.h
+0x15 all linux/fs.h FS_IOC_* ioctls
0x1b all InfiniBand Subsystem
<http://infiniband.sourceforge.net/>
0x20 all drivers/cdrom/cm206.h
@@ -309,6 +310,7 @@ Code Seq# Include File Comments
0x89 0B-DF linux/sockios.h
0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
+0x8A 00-1F linux/eventpoll.h
0x8B all linux/wireless.h
0x8C 00-3F WiNRADiO driver
<http://www.winradio.com.au/>
diff --git a/Documentation/userspace-api/netlink/netlink-raw.rst b/Documentation/userspace-api/netlink/netlink-raw.rst
index 1e14f5f22b8e..1990eea772d0 100644
--- a/Documentation/userspace-api/netlink/netlink-raw.rst
+++ b/Documentation/userspace-api/netlink/netlink-raw.rst
@@ -150,3 +150,45 @@ attributes from an ``attribute-set``. For example the following
Note that a selector attribute must appear in a netlink message before any
sub-message attributes that depend on it.
+
+If an attribute such as ``kind`` is defined at more than one nest level, then a
+sub-message selector will be resolved using the value 'closest' to the selector.
+For example, if the same attribute name is defined in a nested ``attribute-set``
+alongside a sub-message selector and also in a top level ``attribute-set``, then
+the selector will be resolved using the value 'closest' to the selector. If the
+value is not present in the message at the same level as defined in the spec
+then this is an error.
+
+Nested struct definitions
+-------------------------
+
+Many raw netlink families such as :doc:`tc<../../networking/netlink_spec/tc>`
+make use of nested struct definitions. The ``netlink-raw`` schema makes it
+possible to embed a struct within a struct definition using the ``struct``
+property. For example, the following struct definition embeds the
+``tc-ratespec`` struct definition for both the ``rate`` and the ``peakrate``
+members of ``struct tc-tbf-qopt``.
+
+.. code-block:: yaml
+
+ -
+ name: tc-tbf-qopt
+ type: struct
+ members:
+ -
+ name: rate
+ type: binary
+ struct: tc-ratespec
+ -
+ name: peakrate
+ type: binary
+ struct: tc-ratespec
+ -
+ name: limit
+ type: u32
+ -
+ name: buffer
+ type: u32
+ -
+ name: mtu
+ type: u32
diff --git a/Documentation/userspace-api/perf_ring_buffer.rst b/Documentation/userspace-api/perf_ring_buffer.rst
new file mode 100644
index 000000000000..bde9d8cbc106
--- /dev/null
+++ b/Documentation/userspace-api/perf_ring_buffer.rst
@@ -0,0 +1,830 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Perf ring buffer
+================
+
+.. CONTENTS
+
+ 1. Introduction
+
+ 2. Ring buffer implementation
+ 2.1 Basic algorithm
+ 2.2 Ring buffer for different tracing modes
+ 2.2.1 Default mode
+ 2.2.2 Per-thread mode
+ 2.2.3 Per-CPU mode
+ 2.2.4 System wide mode
+ 2.3 Accessing buffer
+ 2.3.1 Producer-consumer model
+ 2.3.2 Properties of the ring buffers
+ 2.3.3 Writing samples into buffer
+ 2.3.4 Reading samples from buffer
+ 2.3.5 Memory synchronization
+
+ 3. The mechanism of AUX ring buffer
+ 3.1 The relationship between AUX and regular ring buffers
+ 3.2 AUX events
+ 3.3 Snapshot mode
+
+
+1. Introduction
+===============
+
+The ring buffer is a fundamental mechanism for data transfer. perf uses
+ring buffers to transfer event data from kernel to user space, another
+kind of ring buffer which is so called auxiliary (AUX) ring buffer also
+plays an important role for hardware tracing with Intel PT, Arm
+CoreSight, etc.
+
+The ring buffer implementation is critical but it's also a very
+challenging work. On the one hand, the kernel and perf tool in the user
+space use the ring buffer to exchange data and stores data into data
+file, thus the ring buffer needs to transfer data with high throughput;
+on the other hand, the ring buffer management should avoid significant
+overload to distract profiling results.
+
+This documentation dives into the details for perf ring buffer with two
+parts: firstly it explains the perf ring buffer implementation, then the
+second part discusses the AUX ring buffer mechanism.
+
+2. Ring buffer implementation
+=============================
+
+2.1 Basic algorithm
+-------------------
+
+That said, a typical ring buffer is managed by a head pointer and a tail
+pointer; the head pointer is manipulated by a writer and the tail
+pointer is updated by a reader respectively.
+
+::
+
+ +---------------------------+
+ | | |***|***|***| | |
+ +---------------------------+
+ `-> Tail `-> Head
+
+ * : the data is filled by the writer.
+
+ Figure 1. Ring buffer
+
+Perf uses the same way to manage its ring buffer. In the implementation
+there are two key data structures held together in a set of consecutive
+pages, the control structure and then the ring buffer itself. The page
+with the control structure in is known as the "user page". Being held
+in continuous virtual addresses simplifies locating the ring buffer
+address, it is in the pages after the page with the user page.
+
+The control structure is named as ``perf_event_mmap_page``, it contains a
+head pointer ``data_head`` and a tail pointer ``data_tail``. When the
+kernel starts to fill records into the ring buffer, it updates the head
+pointer to reserve the memory so later it can safely store events into
+the buffer. On the other side, when the user page is a writable mapping,
+the perf tool has the permission to update the tail pointer after consuming
+data from the ring buffer. Yet another case is for the user page's
+read-only mapping, which is to be addressed in the section
+:ref:`writing_samples_into_buffer`.
+
+::
+
+ user page ring buffer
+ +---------+---------+ +---------------------------------------+
+ |data_head|data_tail|...| | |***|***|***|***|***| | | |
+ +---------+---------+ +---------------------------------------+
+ ` `----------------^ ^
+ `----------------------------------------------|
+
+ * : the data is filled by the writer.
+
+ Figure 2. Perf ring buffer
+
+When using the ``perf record`` tool, we can specify the ring buffer size
+with option ``-m`` or ``--mmap-pages=``, the given size will be rounded up
+to a power of two that is a multiple of a page size. Though the kernel
+allocates at once for all memory pages, it's deferred to map the pages
+to VMA area until the perf tool accesses the buffer from the user space.
+In other words, at the first time accesses the buffer's page from user
+space in the perf tool, a data abort exception for page fault is taken
+and the kernel uses this occasion to map the page into process VMA
+(see ``perf_mmap_fault()``), thus the perf tool can continue to access
+the page after returning from the exception.
+
+2.2 Ring buffer for different tracing modes
+-------------------------------------------
+
+The perf profiles programs with different modes: default mode, per thread
+mode, per cpu mode, and system wide mode. This section describes these
+modes and how the ring buffer meets requirements for them. At last we
+will review the race conditions caused by these modes.
+
+2.2.1 Default mode
+^^^^^^^^^^^^^^^^^^
+
+Usually we execute ``perf record`` command followed by a profiling program
+name, like below command::
+
+ perf record test_program
+
+This command doesn't specify any options for CPU and thread modes, the
+perf tool applies the default mode on the perf event. It maps all the
+CPUs in the system and the profiled program's PID on the perf event, and
+it enables inheritance mode on the event so that child tasks inherits
+the events. As a result, the perf event is attributed as::
+
+ evsel::cpus::map[] = { 0 .. _SC_NPROCESSORS_ONLN-1 }
+ evsel::threads::map[] = { pid }
+ evsel::attr::inherit = 1
+
+These attributions finally will be reflected on the deployment of ring
+buffers. As shown below, the perf tool allocates individual ring buffer
+for each CPU, but it only enables events for the profiled program rather
+than for all threads in the system. The *T1* thread represents the
+thread context of the 'test_program', whereas *T2* and *T3* are irrelevant
+threads in the system. The perf samples are exclusively collected for
+the *T1* thread and stored in the ring buffer associated with the CPU on
+which the *T1* thread is running.
+
+::
+
+ T1 T2 T1
+ +----+ +-----------+ +----+
+ CPU0 |xxxx| |xxxxxxxxxxx| |xxxx|
+ +----+--------------+-----------+----------+----+-------->
+ | |
+ v v
+ +-----------------------------------------------------+
+ | Ring buffer 0 |
+ +-----------------------------------------------------+
+
+ T1
+ +-----+
+ CPU1 |xxxxx|
+ -----+-----+--------------------------------------------->
+ |
+ v
+ +-----------------------------------------------------+
+ | Ring buffer 1 |
+ +-----------------------------------------------------+
+
+ T1 T3
+ +----+ +-------+
+ CPU2 |xxxx| |xxxxxxx|
+ --------------------------+----+--------+-------+-------->
+ |
+ v
+ +-----------------------------------------------------+
+ | Ring buffer 2 |
+ +-----------------------------------------------------+
+
+ T1
+ +--------------+
+ CPU3 |xxxxxxxxxxxxxx|
+ -----------+--------------+------------------------------>
+ |
+ v
+ +-----------------------------------------------------+
+ | Ring buffer 3 |
+ +-----------------------------------------------------+
+
+ T1: Thread 1; T2: Thread 2; T3: Thread 3
+ x: Thread is in running state
+
+ Figure 3. Ring buffer for default mode
+
+2.2.2 Per-thread mode
+^^^^^^^^^^^^^^^^^^^^^
+
+By specifying option ``--per-thread`` in perf command, e.g.
+
+::
+
+ perf record --per-thread test_program
+
+The perf event doesn't map to any CPUs and is only bound to the
+profiled process, thus, the perf event's attributions are::
+
+ evsel::cpus::map[0] = { -1 }
+ evsel::threads::map[] = { pid }
+ evsel::attr::inherit = 0
+
+In this mode, a single ring buffer is allocated for the profiled thread;
+if the thread is scheduled on a CPU, the events on that CPU will be
+enabled; and if the thread is scheduled out from the CPU, the events on
+the CPU will be disabled. When the thread is migrated from one CPU to
+another, the events are to be disabled on the previous CPU and enabled
+on the next CPU correspondingly.
+
+::
+
+ T1 T2 T1
+ +----+ +-----------+ +----+
+ CPU0 |xxxx| |xxxxxxxxxxx| |xxxx|
+ +----+--------------+-----------+----------+----+-------->
+ | |
+ | T1 |
+ | +-----+ |
+ CPU1 | |xxxxx| |
+ --|--+-----+----------------------------------|---------->
+ | | |
+ | | T1 T3 |
+ | | +----+ +---+ |
+ CPU2 | | |xxxx| |xxx| |
+ --|-----|-----------------+----+--------+---+-|---------->
+ | | | |
+ | | T1 | |
+ | | +--------------+ | |
+ CPU3 | | |xxxxxxxxxxxxxx| | |
+ --|-----|--+--------------+-|-----------------|---------->
+ | | | | |
+ v v v v v
+ +-----------------------------------------------------+
+ | Ring buffer |
+ +-----------------------------------------------------+
+
+ T1: Thread 1
+ x: Thread is in running state
+
+ Figure 4. Ring buffer for per-thread mode
+
+When perf runs in per-thread mode, a ring buffer is allocated for the
+profiled thread *T1*. The ring buffer is dedicated for thread *T1*, if the
+thread *T1* is running, the perf events will be recorded into the ring
+buffer; when the thread is sleeping, all associated events will be
+disabled, thus no trace data will be recorded into the ring buffer.
+
+2.2.3 Per-CPU mode
+^^^^^^^^^^^^^^^^^^
+
+The option ``-C`` is used to collect samples on the list of CPUs, for
+example the below perf command receives option ``-C 0,2``::
+
+ perf record -C 0,2 test_program
+
+It maps the perf event to CPUs 0 and 2, and the event is not associated to any
+PID. Thus the perf event attributions are set as::
+
+ evsel::cpus::map[0] = { 0, 2 }
+ evsel::threads::map[] = { -1 }
+ evsel::attr::inherit = 0
+
+This results in the session of ``perf record`` will sample all threads on CPU0
+and CPU2, and be terminated until test_program exits. Even there have tasks
+running on CPU1 and CPU3, since the ring buffer is absent for them, any
+activities on these two CPUs will be ignored. A usage case is to combine the
+options for per-thread mode and per-CPU mode, e.g. the options ``–C 0,2`` and
+``––per–thread`` are specified together, the samples are recorded only when
+the profiled thread is scheduled on any of the listed CPUs.
+
+::
+
+ T1 T2 T1
+ +----+ +-----------+ +----+
+ CPU0 |xxxx| |xxxxxxxxxxx| |xxxx|
+ +----+--------------+-----------+----------+----+-------->
+ | | |
+ v v v
+ +-----------------------------------------------------+
+ | Ring buffer 0 |
+ +-----------------------------------------------------+
+
+ T1
+ +-----+
+ CPU1 |xxxxx|
+ -----+-----+--------------------------------------------->
+
+ T1 T3
+ +----+ +-------+
+ CPU2 |xxxx| |xxxxxxx|
+ --------------------------+----+--------+-------+-------->
+ | |
+ v v
+ +-----------------------------------------------------+
+ | Ring buffer 1 |
+ +-----------------------------------------------------+
+
+ T1
+ +--------------+
+ CPU3 |xxxxxxxxxxxxxx|
+ -----------+--------------+------------------------------>
+
+ T1: Thread 1; T2: Thread 2; T3: Thread 3
+ x: Thread is in running state
+
+ Figure 5. Ring buffer for per-CPU mode
+
+2.2.4 System wide mode
+^^^^^^^^^^^^^^^^^^^^^^
+
+By using option ``–a`` or ``––all–cpus``, perf collects samples on all CPUs
+for all tasks, we call it as the system wide mode, the command is::
+
+ perf record -a test_program
+
+Similar to the per-CPU mode, the perf event doesn't bind to any PID, and
+it maps to all CPUs in the system::
+
+ evsel::cpus::map[] = { 0 .. _SC_NPROCESSORS_ONLN-1 }
+ evsel::threads::map[] = { -1 }
+ evsel::attr::inherit = 0
+
+In the system wide mode, every CPU has its own ring buffer, all threads
+are monitored during the running state and the samples are recorded into
+the ring buffer belonging to the CPU which the events occurred on.
+
+::
+
+ T1 T2 T1
+ +----+ +-----------+ +----+
+ CPU0 |xxxx| |xxxxxxxxxxx| |xxxx|
+ +----+--------------+-----------+----------+----+-------->
+ | | |
+ v v v
+ +-----------------------------------------------------+
+ | Ring buffer 0 |
+ +-----------------------------------------------------+
+
+ T1
+ +-----+
+ CPU1 |xxxxx|
+ -----+-----+--------------------------------------------->
+ |
+ v
+ +-----------------------------------------------------+
+ | Ring buffer 1 |
+ +-----------------------------------------------------+
+
+ T1 T3
+ +----+ +-------+
+ CPU2 |xxxx| |xxxxxxx|
+ --------------------------+----+--------+-------+-------->
+ | |
+ v v
+ +-----------------------------------------------------+
+ | Ring buffer 2 |
+ +-----------------------------------------------------+
+
+ T1
+ +--------------+
+ CPU3 |xxxxxxxxxxxxxx|
+ -----------+--------------+------------------------------>
+ |
+ v
+ +-----------------------------------------------------+
+ | Ring buffer 3 |
+ +-----------------------------------------------------+
+
+ T1: Thread 1; T2: Thread 2; T3: Thread 3
+ x: Thread is in running state
+
+ Figure 6. Ring buffer for system wide mode
+
+2.3 Accessing buffer
+--------------------
+
+Based on the understanding of how the ring buffer is allocated in
+various modes, this section explains access the ring buffer.
+
+2.3.1 Producer-consumer model
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In the Linux kernel, the PMU events can produce samples which are stored
+into the ring buffer; the perf command in user space consumes the
+samples by reading out data from the ring buffer and finally saves the
+data into the file for post analysis. It’s a typical producer-consumer
+model for using the ring buffer.
+
+The perf process polls on the PMU events and sleeps when no events are
+incoming. To prevent frequent exchanges between the kernel and user
+space, the kernel event core layer introduces a watermark, which is
+stored in the ``perf_buffer::watermark``. When a sample is recorded into
+the ring buffer, and if the used buffer exceeds the watermark, the
+kernel wakes up the perf process to read samples from the ring buffer.
+
+::
+
+ Perf
+ / | Read samples
+ Polling / `--------------| Ring buffer
+ v v ;---------------------v
+ +----------------+ +---------+---------+ +-------------------+
+ |Event wait queue| |data_head|data_tail| |***|***| | |***|
+ +----------------+ +---------+---------+ +-------------------+
+ ^ ^ `------------------------^
+ | Wake up tasks | Store samples
+ +-----------------------------+
+ | Kernel event core layer |
+ +-----------------------------+
+
+ * : the data is filled by the writer.
+
+ Figure 7. Writing and reading the ring buffer
+
+When the kernel event core layer notifies the user space, because
+multiple events might share the same ring buffer for recording samples,
+the core layer iterates every event associated with the ring buffer and
+wakes up tasks waiting on the event. This is fulfilled by the kernel
+function ``ring_buffer_wakeup()``.
+
+After the perf process is woken up, it starts to check the ring buffers
+one by one, if it finds any ring buffer containing samples it will read
+out the samples for statistics or saving into the data file. Given the
+perf process is able to run on any CPU, this leads to the ring buffer
+potentially being accessed from multiple CPUs simultaneously, which
+causes race conditions. The race condition handling is described in the
+section :ref:`memory_synchronization`.
+
+2.3.2 Properties of the ring buffers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Linux kernel supports two write directions for the ring buffer: forward and
+backward. The forward writing saves samples from the beginning of the ring
+buffer, the backward writing stores data from the end of the ring buffer with
+the reversed direction. The perf tool determines the writing direction.
+
+Additionally, the tool can map buffers in either read-write mode or read-only
+mode to the user space.
+
+The ring buffer in the read-write mode is mapped with the property
+``PROT_READ | PROT_WRITE``. With the write permission, the perf tool
+updates the ``data_tail`` to indicate the data start position. Combining
+with the head pointer ``data_head``, which works as the end position of
+the current data, the perf tool can easily know where read out the data
+from.
+
+Alternatively, in the read-only mode, only the kernel keeps to update
+the ``data_head`` while the user space cannot access the ``data_tail`` due
+to the mapping property ``PROT_READ``.
+
+As a result, the matrix below illustrates the various combinations of
+direction and mapping characteristics. The perf tool employs two of these
+combinations to support buffer types: the non-overwrite buffer and the
+overwritable buffer.
+
+.. list-table::
+ :widths: 1 1 1
+ :header-rows: 1
+
+ * - Mapping mode
+ - Forward
+ - Backward
+ * - read-write
+ - Non-overwrite ring buffer
+ - Not used
+ * - read-only
+ - Not used
+ - Overwritable ring buffer
+
+The non-overwrite ring buffer uses the read-write mapping with forward
+writing. It starts to save data from the beginning of the ring buffer
+and wrap around when overflow, which is used with the read-write mode in
+the normal ring buffer. When the consumer doesn't keep up with the
+producer, it would lose some data, the kernel keeps how many records it
+lost and generates the ``PERF_RECORD_LOST`` records in the next time
+when it finds a space in the ring buffer.
+
+The overwritable ring buffer uses the backward writing with the
+read-only mode. It saves the data from the end of the ring buffer and
+the ``data_head`` keeps the position of current data, the perf always
+knows where it starts to read and until the end of the ring buffer, thus
+it don't need the ``data_tail``. In this mode, it will not generate the
+``PERF_RECORD_LOST`` records.
+
+.. _writing_samples_into_buffer:
+
+2.3.3 Writing samples into buffer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When a sample is taken and saved into the ring buffer, the kernel
+prepares sample fields based on the sample type; then it prepares the
+info for writing ring buffer which is stored in the structure
+``perf_output_handle``. In the end, the kernel outputs the sample into
+the ring buffer and updates the head pointer in the user page so the
+perf tool can see the latest value.
+
+The structure ``perf_output_handle`` serves as a temporary context for
+tracking the information related to the buffer. The advantages of it is
+that it enables concurrent writing to the buffer by different events.
+For example, a software event and a hardware PMU event both are enabled
+for profiling, two instances of ``perf_output_handle`` serve as separate
+contexts for the software event and the hardware event respectively.
+This allows each event to reserve its own memory space for populating
+the record data.
+
+2.3.4 Reading samples from buffer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In the user space, the perf tool utilizes the ``perf_event_mmap_page``
+structure to handle the head and tail of the buffer. It also uses
+``perf_mmap`` structure to keep track of a context for the ring buffer, this
+context includes information about the buffer's starting and ending
+addresses. Additionally, the mask value can be utilized to compute the
+circular buffer pointer even for an overflow.
+
+Similar to the kernel, the perf tool in the user space first reads out
+the recorded data from the ring buffer, and then updates the buffer's
+tail pointer ``perf_event_mmap_page::data_tail``.
+
+.. _memory_synchronization:
+
+2.3.5 Memory synchronization
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The modern CPUs with relaxed memory model cannot promise the memory
+ordering, this means it’s possible to access the ring buffer and the
+``perf_event_mmap_page`` structure out of order. To assure the specific
+sequence for memory accessing perf ring buffer, memory barriers are
+used to assure the data dependency. The rationale for the memory
+synchronization is as below::
+
+ Kernel User space
+
+ if (LOAD ->data_tail) { LOAD ->data_head
+ (A) smp_rmb() (C)
+ STORE $data LOAD $data
+ smp_wmb() (B) smp_mb() (D)
+ STORE ->data_head STORE ->data_tail
+ }
+
+The comments in tools/include/linux/ring_buffer.h gives nice description
+for why and how to use memory barriers, here we will just provide an
+alternative explanation:
+
+(A) is a control dependency so that CPU assures order between checking
+pointer ``perf_event_mmap_page::data_tail`` and filling sample into ring
+buffer;
+
+(D) pairs with (A). (D) separates the ring buffer data reading from
+writing the pointer ``data_tail``, perf tool first consumes samples and then
+tells the kernel that the data chunk has been released. Since a reading
+operation is followed by a writing operation, thus (D) is a full memory
+barrier.
+
+(B) is a writing barrier in the middle of two writing operations, which
+makes sure that recording a sample must be prior to updating the head
+pointer.
+
+(C) pairs with (B). (C) is a read memory barrier to ensure the head
+pointer is fetched before reading samples.
+
+To implement the above algorithm, the ``perf_output_put_handle()`` function
+in the kernel and two helpers ``ring_buffer_read_head()`` and
+``ring_buffer_write_tail()`` in the user space are introduced, they rely
+on memory barriers as described above to ensure the data dependency.
+
+Some architectures support one-way permeable barrier with load-acquire
+and store-release operations, these barriers are more relaxed with less
+performance penalty, so (C) and (D) can be optimized to use barriers
+``smp_load_acquire()`` and ``smp_store_release()`` respectively.
+
+If an architecture doesn’t support load-acquire and store-release in its
+memory model, it will roll back to the old fashion of memory barrier
+operations. In this case, ``smp_load_acquire()`` encapsulates
+``READ_ONCE()`` + ``smp_mb()``, since ``smp_mb()`` is costly,
+``ring_buffer_read_head()`` doesn't invoke ``smp_load_acquire()`` and it uses
+the barriers ``READ_ONCE()`` + ``smp_rmb()`` instead.
+
+3. The mechanism of AUX ring buffer
+===================================
+
+In this chapter, we will explain the implementation of the AUX ring
+buffer. In the first part it will discuss the connection between the
+AUX ring buffer and the regular ring buffer, then the second part will
+examine how the AUX ring buffer co-works with the regular ring buffer,
+as well as the additional features introduced by the AUX ring buffer for
+the sampling mechanism.
+
+3.1 The relationship between AUX and regular ring buffers
+---------------------------------------------------------
+
+Generally, the AUX ring buffer is an auxiliary for the regular ring
+buffer. The regular ring buffer is primarily used to store the event
+samples and every event format complies with the definition in the
+union ``perf_event``; the AUX ring buffer is for recording the hardware
+trace data and the trace data format is hardware IP dependent.
+
+The general use and advantage of the AUX ring buffer is that it is
+written directly by hardware rather than by the kernel. For example,
+regular profile samples that write to the regular ring buffer cause an
+interrupt. Tracing execution requires a high number of samples and
+using interrupts would be overwhelming for the regular ring buffer
+mechanism. Having an AUX buffer allows for a region of memory more
+decoupled from the kernel and written to directly by hardware tracing.
+
+The AUX ring buffer reuses the same algorithm with the regular ring
+buffer for the buffer management. The control structure
+``perf_event_mmap_page`` extends the new fields ``aux_head`` and ``aux_tail``
+for the head and tail pointers of the AUX ring buffer.
+
+During the initialisation phase, besides the mmap()-ed regular ring
+buffer, the perf tool invokes a second syscall in the
+``auxtrace_mmap__mmap()`` function for the mmap of the AUX buffer with
+non-zero file offset; ``rb_alloc_aux()`` in the kernel allocates pages
+correspondingly, these pages will be deferred to map into VMA when
+handling the page fault, which is the same lazy mechanism with the
+regular ring buffer.
+
+AUX events and AUX trace data are two different things. Let's see an
+example::
+
+ perf record -a -e cycles -e cs_etm/@tmc_etr0/ -- sleep 2
+
+The above command enables two events: one is the event *cycles* from PMU
+and another is the AUX event *cs_etm* from Arm CoreSight, both are saved
+into the regular ring buffer while the CoreSight's AUX trace data is
+stored in the AUX ring buffer.
+
+As a result, we can see the regular ring buffer and the AUX ring buffer
+are allocated in pairs. The perf in default mode allocates the regular
+ring buffer and the AUX ring buffer per CPU-wise, which is the same as
+the system wide mode, however, the default mode records samples only for
+the profiled program, whereas the latter mode profiles for all programs
+in the system. For per-thread mode, the perf tool allocates only one
+regular ring buffer and one AUX ring buffer for the whole session. For
+the per-CPU mode, the perf allocates two kinds of ring buffers for
+selected CPUs specified by the option ``-C``.
+
+The below figure demonstrates the buffers' layout in the system wide
+mode; if there are any activities on one CPU, the AUX event samples and
+the hardware trace data will be recorded into the dedicated buffers for
+the CPU.
+
+::
+
+ T1 T2 T1
+ +----+ +-----------+ +----+
+ CPU0 |xxxx| |xxxxxxxxxxx| |xxxx|
+ +----+--------------+-----------+----------+----+-------->
+ | | |
+ v v v
+ +-----------------------------------------------------+
+ | Ring buffer 0 |
+ +-----------------------------------------------------+
+ | | |
+ v v v
+ +-----------------------------------------------------+
+ | AUX Ring buffer 0 |
+ +-----------------------------------------------------+
+
+ T1
+ +-----+
+ CPU1 |xxxxx|
+ -----+-----+--------------------------------------------->
+ |
+ v
+ +-----------------------------------------------------+
+ | Ring buffer 1 |
+ +-----------------------------------------------------+
+ |
+ v
+ +-----------------------------------------------------+
+ | AUX Ring buffer 1 |
+ +-----------------------------------------------------+
+
+ T1 T3
+ +----+ +-------+
+ CPU2 |xxxx| |xxxxxxx|
+ --------------------------+----+--------+-------+-------->
+ | |
+ v v
+ +-----------------------------------------------------+
+ | Ring buffer 2 |
+ +-----------------------------------------------------+
+ | |
+ v v
+ +-----------------------------------------------------+
+ | AUX Ring buffer 2 |
+ +-----------------------------------------------------+
+
+ T1
+ +--------------+
+ CPU3 |xxxxxxxxxxxxxx|
+ -----------+--------------+------------------------------>
+ |
+ v
+ +-----------------------------------------------------+
+ | Ring buffer 3 |
+ +-----------------------------------------------------+
+ |
+ v
+ +-----------------------------------------------------+
+ | AUX Ring buffer 3 |
+ +-----------------------------------------------------+
+
+ T1: Thread 1; T2: Thread 2; T3: Thread 3
+ x: Thread is in running state
+
+ Figure 8. AUX ring buffer for system wide mode
+
+3.2 AUX events
+--------------
+
+Similar to ``perf_output_begin()`` and ``perf_output_end()``'s working for the
+regular ring buffer, ``perf_aux_output_begin()`` and ``perf_aux_output_end()``
+serve for the AUX ring buffer for processing the hardware trace data.
+
+Once the hardware trace data is stored into the AUX ring buffer, the PMU
+driver will stop hardware tracing by calling the ``pmu::stop()`` callback.
+Similar to the regular ring buffer, the AUX ring buffer needs to apply
+the memory synchronization mechanism as discussed in the section
+:ref:`memory_synchronization`. Since the AUX ring buffer is managed by the
+PMU driver, the barrier (B), which is a writing barrier to ensure the trace
+data is externally visible prior to updating the head pointer, is asked
+to be implemented in the PMU driver.
+
+Then ``pmu::stop()`` can safely call the ``perf_aux_output_end()`` function to
+finish two things:
+
+- It fills an event ``PERF_RECORD_AUX`` into the regular ring buffer, this
+ event delivers the information of the start address and data size for a
+ chunk of hardware trace data has been stored into the AUX ring buffer;
+
+- Since the hardware trace driver has stored new trace data into the AUX
+ ring buffer, the argument *size* indicates how many bytes have been
+ consumed by the hardware tracing, thus ``perf_aux_output_end()`` updates the
+ header pointer ``perf_buffer::aux_head`` to reflect the latest buffer usage.
+
+At the end, the PMU driver will restart hardware tracing. During this
+temporary suspending period, it will lose hardware trace data, which
+will introduce a discontinuity during decoding phase.
+
+The event ``PERF_RECORD_AUX`` presents an AUX event which is handled in the
+kernel, but it lacks the information for saving the AUX trace data in
+the perf file. When the perf tool copies the trace data from AUX ring
+buffer to the perf data file, it synthesizes a ``PERF_RECORD_AUXTRACE``
+event which is not a kernel ABI, it's defined by the perf tool to describe
+which portion of data in the AUX ring buffer is saved. Afterwards, the perf
+tool reads out the AUX trace data from the perf file based on the
+``PERF_RECORD_AUXTRACE`` events, and the ``PERF_RECORD_AUX`` event is used to
+decode a chunk of data by correlating with time order.
+
+3.3 Snapshot mode
+-----------------
+
+Perf supports snapshot mode for AUX ring buffer, in this mode, users
+only record AUX trace data at a specific time point which users are
+interested in. E.g. below gives an example of how to take snapshots
+with 1 second interval with Arm CoreSight::
+
+ perf record -e cs_etm/@tmc_etr0/u -S -a program &
+ PERFPID=$!
+ while true; do
+ kill -USR2 $PERFPID
+ sleep 1
+ done
+
+The main flow for snapshot mode is:
+
+- Before a snapshot is taken, the AUX ring buffer acts in free run mode.
+ During free run mode the perf doesn't record any of the AUX events and
+ trace data;
+
+- Once the perf tool receives the *USR2* signal, it triggers the callback
+ function ``auxtrace_record::snapshot_start()`` to deactivate hardware
+ tracing. The kernel driver then populates the AUX ring buffer with the
+ hardware trace data, and the event ``PERF_RECORD_AUX`` is stored in the
+ regular ring buffer;
+
+- Then perf tool takes a snapshot, ``record__read_auxtrace_snapshot()``
+ reads out the hardware trace data from the AUX ring buffer and saves it
+ into perf data file;
+
+- After the snapshot is finished, ``auxtrace_record::snapshot_finish()``
+ restarts the PMU event for AUX tracing.
+
+The perf only accesses the head pointer ``perf_event_mmap_page::aux_head``
+in snapshot mode and doesn’t touch tail pointer ``aux_tail``, this is
+because the AUX ring buffer can overflow in free run mode, the tail
+pointer is useless in this case. Alternatively, the callback
+``auxtrace_record::find_snapshot()`` is introduced for making the decision
+of whether the AUX ring buffer has been wrapped around or not, at the
+end it fixes up the AUX buffer's head which are used to calculate the
+trace data size.
+
+As we know, the buffers' deployment can be per-thread mode, per-CPU
+mode, or system wide mode, and the snapshot can be applied to any of
+these modes. Below is an example of taking snapshot with system wide
+mode.
+
+::
+
+ Snapshot is taken
+ |
+ v
+ +------------------------+
+ | AUX Ring buffer 0 | <- aux_head
+ +------------------------+
+ v
+ +--------------------------------+
+ | AUX Ring buffer 1 | <- aux_head
+ +--------------------------------+
+ v
+ +--------------------------------------------+
+ | AUX Ring buffer 2 | <- aux_head
+ +--------------------------------------------+
+ v
+ +---------------------------------------+
+ | AUX Ring buffer 3 | <- aux_head
+ +---------------------------------------+
+
+ Figure 9. Snapshot with system wide mode
diff --git a/Documentation/virt/coco/sev-guest.rst b/Documentation/virt/coco/sev-guest.rst
index 68b0d2363af8..e1eaf6a830ce 100644
--- a/Documentation/virt/coco/sev-guest.rst
+++ b/Documentation/virt/coco/sev-guest.rst
@@ -67,6 +67,23 @@ counter (e.g. counter overflow), then -EIO will be returned.
};
};
+The host ioctls are issued to a file descriptor of the /dev/sev device.
+The ioctl accepts the command ID/input structure documented below.
+
+::
+
+ struct sev_issue_cmd {
+ /* Command ID */
+ __u32 cmd;
+
+ /* Command request structure */
+ __u64 data;
+
+ /* Firmware error code on failure (see psp-sev.h) */
+ __u32 error;
+ };
+
+
2.1 SNP_GET_REPORT
------------------
@@ -124,6 +141,41 @@ be updated with the expected value.
See GHCB specification for further detail on how to parse the certificate blob.
+2.4 SNP_PLATFORM_STATUS
+-----------------------
+:Technology: sev-snp
+:Type: hypervisor ioctl cmd
+:Parameters (out): struct sev_user_data_snp_status
+:Returns (out): 0 on success, -negative on error
+
+The SNP_PLATFORM_STATUS command is used to query the SNP platform status. The
+status includes API major, minor version and more. See the SEV-SNP
+specification for further details.
+
+2.5 SNP_COMMIT
+--------------
+:Technology: sev-snp
+:Type: hypervisor ioctl cmd
+:Returns (out): 0 on success, -negative on error
+
+SNP_COMMIT is used to commit the currently installed firmware using the
+SEV-SNP firmware SNP_COMMIT command. This prevents roll-back to a previously
+committed firmware version. This will also update the reported TCB to match
+that of the currently installed firmware.
+
+2.6 SNP_SET_CONFIG
+------------------
+:Technology: sev-snp
+:Type: hypervisor ioctl cmd
+:Parameters (in): struct sev_user_data_snp_config
+:Returns (out): 0 on success, -negative on error
+
+SNP_SET_CONFIG is used to set the system-wide configuration such as
+reported TCB version in the attestation report. The command is similar
+to SNP_CONFIG command defined in the SEV-SNP spec. The current values of
+the firmware parameters affected by this command can be queried via
+SNP_PLATFORM_STATUS.
+
3. SEV-SNP CPUID Enforcement
============================
diff --git a/Documentation/virt/hyperv/index.rst b/Documentation/virt/hyperv/index.rst
index 4a7a1b738bbe..de447e11b4a5 100644
--- a/Documentation/virt/hyperv/index.rst
+++ b/Documentation/virt/hyperv/index.rst
@@ -10,3 +10,4 @@ Hyper-V Enlightenments
overview
vmbus
clocks
+ vpci
diff --git a/Documentation/virt/hyperv/vpci.rst b/Documentation/virt/hyperv/vpci.rst
new file mode 100644
index 000000000000..b65b2126ede3
--- /dev/null
+++ b/Documentation/virt/hyperv/vpci.rst
@@ -0,0 +1,316 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+PCI pass-thru devices
+=========================
+In a Hyper-V guest VM, PCI pass-thru devices (also called
+virtual PCI devices, or vPCI devices) are physical PCI devices
+that are mapped directly into the VM's physical address space.
+Guest device drivers can interact directly with the hardware
+without intermediation by the host hypervisor. This approach
+provides higher bandwidth access to the device with lower
+latency, compared with devices that are virtualized by the
+hypervisor. The device should appear to the guest just as it
+would when running on bare metal, so no changes are required
+to the Linux device drivers for the device.
+
+Hyper-V terminology for vPCI devices is "Discrete Device
+Assignment" (DDA). Public documentation for Hyper-V DDA is
+available here: `DDA`_
+
+.. _DDA: https://learn.microsoft.com/en-us/windows-server/virtualization/hyper-v/plan/plan-for-deploying-devices-using-discrete-device-assignment
+
+DDA is typically used for storage controllers, such as NVMe,
+and for GPUs. A similar mechanism for NICs is called SR-IOV
+and produces the same benefits by allowing a guest device
+driver to interact directly with the hardware. See Hyper-V
+public documentation here: `SR-IOV`_
+
+.. _SR-IOV: https://learn.microsoft.com/en-us/windows-hardware/drivers/network/overview-of-single-root-i-o-virtualization--sr-iov-
+
+This discussion of vPCI devices includes DDA and SR-IOV
+devices.
+
+Device Presentation
+-------------------
+Hyper-V provides full PCI functionality for a vPCI device when
+it is operating, so the Linux device driver for the device can
+be used unchanged, provided it uses the correct Linux kernel
+APIs for accessing PCI config space and for other integration
+with Linux. But the initial detection of the PCI device and
+its integration with the Linux PCI subsystem must use Hyper-V
+specific mechanisms. Consequently, vPCI devices on Hyper-V
+have a dual identity. They are initially presented to Linux
+guests as VMBus devices via the standard VMBus "offer"
+mechanism, so they have a VMBus identity and appear under
+/sys/bus/vmbus/devices. The VMBus vPCI driver in Linux at
+drivers/pci/controller/pci-hyperv.c handles a newly introduced
+vPCI device by fabricating a PCI bus topology and creating all
+the normal PCI device data structures in Linux that would
+exist if the PCI device were discovered via ACPI on a bare-
+metal system. Once those data structures are set up, the
+device also has a normal PCI identity in Linux, and the normal
+Linux device driver for the vPCI device can function as if it
+were running in Linux on bare-metal. Because vPCI devices are
+presented dynamically through the VMBus offer mechanism, they
+do not appear in the Linux guest's ACPI tables. vPCI devices
+may be added to a VM or removed from a VM at any time during
+the life of the VM, and not just during initial boot.
+
+With this approach, the vPCI device is a VMBus device and a
+PCI device at the same time. In response to the VMBus offer
+message, the hv_pci_probe() function runs and establishes a
+VMBus connection to the vPCI VSP on the Hyper-V host. That
+connection has a single VMBus channel. The channel is used to
+exchange messages with the vPCI VSP for the purpose of setting
+up and configuring the vPCI device in Linux. Once the device
+is fully configured in Linux as a PCI device, the VMBus
+channel is used only if Linux changes the vCPU to be interrupted
+in the guest, or if the vPCI device is removed from
+the VM while the VM is running. The ongoing operation of the
+device happens directly between the Linux device driver for
+the device and the hardware, with VMBus and the VMBus channel
+playing no role.
+
+PCI Device Setup
+----------------
+PCI device setup follows a sequence that Hyper-V originally
+created for Windows guests, and that can be ill-suited for
+Linux guests due to differences in the overall structure of
+the Linux PCI subsystem compared with Windows. Nonetheless,
+with a bit of hackery in the Hyper-V virtual PCI driver for
+Linux, the virtual PCI device is setup in Linux so that
+generic Linux PCI subsystem code and the Linux driver for the
+device "just work".
+
+Each vPCI device is set up in Linux to be in its own PCI
+domain with a host bridge. The PCI domainID is derived from
+bytes 4 and 5 of the instance GUID assigned to the VMBus vPCI
+device. The Hyper-V host does not guarantee that these bytes
+are unique, so hv_pci_probe() has an algorithm to resolve
+collisions. The collision resolution is intended to be stable
+across reboots of the same VM so that the PCI domainIDs don't
+change, as the domainID appears in the user space
+configuration of some devices.
+
+hv_pci_probe() allocates a guest MMIO range to be used as PCI
+config space for the device. This MMIO range is communicated
+to the Hyper-V host over the VMBus channel as part of telling
+the host that the device is ready to enter d0. See
+hv_pci_enter_d0(). When the guest subsequently accesses this
+MMIO range, the Hyper-V host intercepts the accesses and maps
+them to the physical device PCI config space.
+
+hv_pci_probe() also gets BAR information for the device from
+the Hyper-V host, and uses this information to allocate MMIO
+space for the BARs. That MMIO space is then setup to be
+associated with the host bridge so that it works when generic
+PCI subsystem code in Linux processes the BARs.
+
+Finally, hv_pci_probe() creates the root PCI bus. At this
+point the Hyper-V virtual PCI driver hackery is done, and the
+normal Linux PCI machinery for scanning the root bus works to
+detect the device, to perform driver matching, and to
+initialize the driver and device.
+
+PCI Device Removal
+------------------
+A Hyper-V host may initiate removal of a vPCI device from a
+guest VM at any time during the life of the VM. The removal
+is instigated by an admin action taken on the Hyper-V host and
+is not under the control of the guest OS.
+
+A guest VM is notified of the removal by an unsolicited
+"Eject" message sent from the host to the guest over the VMBus
+channel associated with the vPCI device. Upon receipt of such
+a message, the Hyper-V virtual PCI driver in Linux
+asynchronously invokes Linux kernel PCI subsystem calls to
+shutdown and remove the device. When those calls are
+complete, an "Ejection Complete" message is sent back to
+Hyper-V over the VMBus channel indicating that the device has
+been removed. At this point, Hyper-V sends a VMBus rescind
+message to the Linux guest, which the VMBus driver in Linux
+processes by removing the VMBus identity for the device. Once
+that processing is complete, all vestiges of the device having
+been present are gone from the Linux kernel. The rescind
+message also indicates to the guest that Hyper-V has stopped
+providing support for the vPCI device in the guest. If the
+guest were to attempt to access that device's MMIO space, it
+would be an invalid reference. Hypercalls affecting the device
+return errors, and any further messages sent in the VMBus
+channel are ignored.
+
+After sending the Eject message, Hyper-V allows the guest VM
+60 seconds to cleanly shutdown the device and respond with
+Ejection Complete before sending the VMBus rescind
+message. If for any reason the Eject steps don't complete
+within the allowed 60 seconds, the Hyper-V host forcibly
+performs the rescind steps, which will likely result in
+cascading errors in the guest because the device is now no
+longer present from the guest standpoint and accessing the
+device MMIO space will fail.
+
+Because ejection is asynchronous and can happen at any point
+during the guest VM lifecycle, proper synchronization in the
+Hyper-V virtual PCI driver is very tricky. Ejection has been
+observed even before a newly offered vPCI device has been
+fully setup. The Hyper-V virtual PCI driver has been updated
+several times over the years to fix race conditions when
+ejections happen at inopportune times. Care must be taken when
+modifying this code to prevent re-introducing such problems.
+See comments in the code.
+
+Interrupt Assignment
+--------------------
+The Hyper-V virtual PCI driver supports vPCI devices using
+MSI, multi-MSI, or MSI-X. Assigning the guest vCPU that will
+receive the interrupt for a particular MSI or MSI-X message is
+complex because of the way the Linux setup of IRQs maps onto
+the Hyper-V interfaces. For the single-MSI and MSI-X cases,
+Linux calls hv_compse_msi_msg() twice, with the first call
+containing a dummy vCPU and the second call containing the
+real vCPU. Furthermore, hv_irq_unmask() is finally called
+(on x86) or the GICD registers are set (on arm64) to specify
+the real vCPU again. Each of these three calls interact
+with Hyper-V, which must decide which physical CPU should
+receive the interrupt before it is forwarded to the guest VM.
+Unfortunately, the Hyper-V decision-making process is a bit
+limited, and can result in concentrating the physical
+interrupts on a single CPU, causing a performance bottleneck.
+See details about how this is resolved in the extensive
+comment above the function hv_compose_msi_req_get_cpu().
+
+The Hyper-V virtual PCI driver implements the
+irq_chip.irq_compose_msi_msg function as hv_compose_msi_msg().
+Unfortunately, on Hyper-V the implementation requires sending
+a VMBus message to the Hyper-V host and awaiting an interrupt
+indicating receipt of a reply message. Since
+irq_chip.irq_compose_msi_msg can be called with IRQ locks
+held, it doesn't work to do the normal sleep until awakened by
+the interrupt. Instead hv_compose_msi_msg() must send the
+VMBus message, and then poll for the completion message. As
+further complexity, the vPCI device could be ejected/rescinded
+while the polling is in progress, so this scenario must be
+detected as well. See comments in the code regarding this
+very tricky area.
+
+Most of the code in the Hyper-V virtual PCI driver (pci-
+hyperv.c) applies to Hyper-V and Linux guests running on x86
+and on arm64 architectures. But there are differences in how
+interrupt assignments are managed. On x86, the Hyper-V
+virtual PCI driver in the guest must make a hypercall to tell
+Hyper-V which guest vCPU should be interrupted by each
+MSI/MSI-X interrupt, and the x86 interrupt vector number that
+the x86_vector IRQ domain has picked for the interrupt. This
+hypercall is made by hv_arch_irq_unmask(). On arm64, the
+Hyper-V virtual PCI driver manages the allocation of an SPI
+for each MSI/MSI-X interrupt. The Hyper-V virtual PCI driver
+stores the allocated SPI in the architectural GICD registers,
+which Hyper-V emulates, so no hypercall is necessary as with
+x86. Hyper-V does not support using LPIs for vPCI devices in
+arm64 guest VMs because it does not emulate a GICv3 ITS.
+
+The Hyper-V virtual PCI driver in Linux supports vPCI devices
+whose drivers create managed or unmanaged Linux IRQs. If the
+smp_affinity for an unmanaged IRQ is updated via the /proc/irq
+interface, the Hyper-V virtual PCI driver is called to tell
+the Hyper-V host to change the interrupt targeting and
+everything works properly. However, on x86 if the x86_vector
+IRQ domain needs to reassign an interrupt vector due to
+running out of vectors on a CPU, there's no path to inform the
+Hyper-V host of the change, and things break. Fortunately,
+guest VMs operate in a constrained device environment where
+using all the vectors on a CPU doesn't happen. Since such a
+problem is only a theoretical concern rather than a practical
+concern, it has been left unaddressed.
+
+DMA
+---
+By default, Hyper-V pins all guest VM memory in the host
+when the VM is created, and programs the physical IOMMU to
+allow the VM to have DMA access to all its memory. Hence
+it is safe to assign PCI devices to the VM, and allow the
+guest operating system to program the DMA transfers. The
+physical IOMMU prevents a malicious guest from initiating
+DMA to memory belonging to the host or to other VMs on the
+host. From the Linux guest standpoint, such DMA transfers
+are in "direct" mode since Hyper-V does not provide a virtual
+IOMMU in the guest.
+
+Hyper-V assumes that physical PCI devices always perform
+cache-coherent DMA. When running on x86, this behavior is
+required by the architecture. When running on arm64, the
+architecture allows for both cache-coherent and
+non-cache-coherent devices, with the behavior of each device
+specified in the ACPI DSDT. But when a PCI device is assigned
+to a guest VM, that device does not appear in the DSDT, so the
+Hyper-V VMBus driver propagates cache-coherency information
+from the VMBus node in the ACPI DSDT to all VMBus devices,
+including vPCI devices (since they have a dual identity as a VMBus
+device and as a PCI device). See vmbus_dma_configure().
+Current Hyper-V versions always indicate that the VMBus is
+cache coherent, so vPCI devices on arm64 always get marked as
+cache coherent and the CPU does not perform any sync
+operations as part of dma_map/unmap_*() calls.
+
+vPCI protocol versions
+----------------------
+As previously described, during vPCI device setup and teardown
+messages are passed over a VMBus channel between the Hyper-V
+host and the Hyper-v vPCI driver in the Linux guest. Some
+messages have been revised in newer versions of Hyper-V, so
+the guest and host must agree on the vPCI protocol version to
+be used. The version is negotiated when communication over
+the VMBus channel is first established. See
+hv_pci_protocol_negotiation(). Newer versions of the protocol
+extend support to VMs with more than 64 vCPUs, and provide
+additional information about the vPCI device, such as the
+guest virtual NUMA node to which it is most closely affined in
+the underlying hardware.
+
+Guest NUMA node affinity
+------------------------
+When the vPCI protocol version provides it, the guest NUMA
+node affinity of the vPCI device is stored as part of the Linux
+device information for subsequent use by the Linux driver. See
+hv_pci_assign_numa_node(). If the negotiated protocol version
+does not support the host providing NUMA affinity information,
+the Linux guest defaults the device NUMA node to 0. But even
+when the negotiated protocol version includes NUMA affinity
+information, the ability of the host to provide such
+information depends on certain host configuration options. If
+the guest receives NUMA node value "0", it could mean NUMA
+node 0, or it could mean "no information is available".
+Unfortunately it is not possible to distinguish the two cases
+from the guest side.
+
+PCI config space access in a CoCo VM
+------------------------------------
+Linux PCI device drivers access PCI config space using a
+standard set of functions provided by the Linux PCI subsystem.
+In Hyper-V guests these standard functions map to functions
+hv_pcifront_read_config() and hv_pcifront_write_config()
+in the Hyper-V virtual PCI driver. In normal VMs,
+these hv_pcifront_*() functions directly access the PCI config
+space, and the accesses trap to Hyper-V to be handled.
+But in CoCo VMs, memory encryption prevents Hyper-V
+from reading the guest instruction stream to emulate the
+access, so the hv_pcifront_*() functions must invoke
+hypercalls with explicit arguments describing the access to be
+made.
+
+Config Block back-channel
+-------------------------
+The Hyper-V host and Hyper-V virtual PCI driver in Linux
+together implement a non-standard back-channel communication
+path between the host and guest. The back-channel path uses
+messages sent over the VMBus channel associated with the vPCI
+device. The functions hyperv_read_cfg_blk() and
+hyperv_write_cfg_blk() are the primary interfaces provided to
+other parts of the Linux kernel. As of this writing, these
+interfaces are used only by the Mellanox mlx5 driver to pass
+diagnostic data to a Hyper-V host running in the Azure public
+cloud. The functions hyperv_read_cfg_blk() and
+hyperv_write_cfg_blk() are implemented in a separate module
+(pci-hyperv-intf.c, under CONFIG_PCI_HYPERV_INTERFACE) that
+effectively stubs them out when running in non-Hyper-V
+environments.
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 3ec0b7a455a0..09c7e585ff58 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8791,6 +8791,11 @@ means the VM type with value @n is supported. Possible values of @n are::
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_SW_PROTECTED_VM 1
+Note, KVM_X86_SW_PROTECTED_VM is currently only for development and testing.
+Do not use KVM_X86_SW_PROTECTED_VM for "real" VMs, and especially not in
+production. The behavior and effective ABI for software-protected VMs is
+unstable.
+
9. Known KVM API problems
=========================
diff --git a/MAINTAINERS b/MAINTAINERS
index 8d1052fa6a69..8ea2cbfc52ea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -24,7 +24,7 @@ Descriptions of section entries and preferred order
filing info, a direct bug tracker link, or a mailto: URI.
C: URI for *chat* protocol, server and channel where developers
usually hang out, for example irc://server/channel.
- P: Subsystem Profile document for more details submitting
+ P: *Subsystem Profile* document for more details submitting
patches to the given subsystem. This is either an in-tree file,
or a URI. See Documentation/maintainer/maintainer-entry-profile.rst
for details.
@@ -897,6 +897,12 @@ Q: https://patchwork.kernel.org/project/linux-rdma/list/
F: drivers/infiniband/hw/efa/
F: include/uapi/rdma/efa-abi.h
+AMD ADDRESS TRANSLATION LIBRARY (ATL)
+M: Yazen Ghannam <Yazen.Ghannam@amd.com>
+L: linux-edac@vger.kernel.org
+S: Supported
+F: drivers/ras/amd/atl/*
+
AMD AXI W1 DRIVER
M: Kris Chaplin <kris.chaplin@amd.com>
R: Thomas Delev <thomas.delev@amd.com>
@@ -1395,6 +1401,7 @@ F: drivers/hwmon/max31760.c
ANALOGBITS PLL LIBRARIES
M: Paul Walmsley <paul.walmsley@sifive.com>
+M: Samuel Holland <samuel.holland@sifive.com>
S: Supported
F: drivers/clk/analogbits/*
F: include/linux/clk/analogbits*
@@ -2156,7 +2163,7 @@ M: Shawn Guo <shawnguo@kernel.org>
M: Sascha Hauer <s.hauer@pengutronix.de>
R: Pengutronix Kernel Team <kernel@pengutronix.de>
R: Fabio Estevam <festevam@gmail.com>
-R: NXP Linux Team <linux-imx@nxp.com>
+L: imx@lists.linux.dev
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
@@ -2542,13 +2549,14 @@ F: drivers/*/*/*wpcm*
F: drivers/*/*wpcm*
ARM/NXP S32G ARCHITECTURE
-M: Chester Lin <chester62515@gmail.com>
-R: Andreas Färber <afaerber@suse.de>
+R: Chester Lin <chester62515@gmail.com>
R: Matthias Brugger <mbrugger@suse.com>
-R: NXP S32 Linux Team <s32@nxp.com>
+R: Ghennadi Procopciuc <ghennadi.procopciuc@oss.nxp.com>
+L: NXP S32 Linux Team <s32@nxp.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm64/boot/dts/freescale/s32g*.dts*
+F: drivers/pinctrl/nxp/
ARM/Orion SoC/Technologic Systems TS-78xx platform support
M: Alexander Clouter <alex@digriz.org.uk>
@@ -3168,10 +3176,10 @@ F: drivers/hwmon/asus-ec-sensors.c
ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS
M: Corentin Chary <corentin.chary@gmail.com>
-L: acpi4asus-user@lists.sourceforge.net
+M: Luke D. Jones <luke@ljones.dev>
L: platform-driver-x86@vger.kernel.org
S: Maintained
-W: http://acpi4asus.sf.net
+W: https://asus-linux.org/
F: drivers/platform/x86/asus*.c
F: drivers/platform/x86/eeepc*.c
@@ -3799,6 +3807,7 @@ M: Alexei Starovoitov <ast@kernel.org>
M: Daniel Borkmann <daniel@iogearbox.net>
M: Andrii Nakryiko <andrii@kernel.org>
R: Martin KaFai Lau <martin.lau@linux.dev>
+R: Eduard Zingerman <eddyz87@gmail.com>
R: Song Liu <song@kernel.org>
R: Yonghong Song <yonghong.song@linux.dev>
R: John Fastabend <john.fastabend@gmail.com>
@@ -3859,6 +3868,7 @@ F: net/unix/unix_bpf.c
BPF [LIBRARY] (libbpf)
M: Andrii Nakryiko <andrii@kernel.org>
+M: Eduard Zingerman <eddyz87@gmail.com>
L: bpf@vger.kernel.org
S: Maintained
F: tools/lib/bpf/
@@ -3916,6 +3926,7 @@ F: security/bpf/
BPF [SELFTESTS] (Test Runners & Infrastructure)
M: Andrii Nakryiko <andrii@kernel.org>
+M: Eduard Zingerman <eddyz87@gmail.com>
R: Mykola Lysenko <mykolal@fb.com>
L: bpf@vger.kernel.org
S: Maintained
@@ -4169,14 +4180,14 @@ F: drivers/firmware/broadcom/tee_bnxt_fw.c
F: drivers/net/ethernet/broadcom/bnxt/
F: include/linux/firmware/broadcom/tee_bnxt_fw.h
-BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER
-M: Arend van Spriel <aspriel@gmail.com>
-M: Franky Lin <franky.lin@broadcom.com>
-M: Hante Meuleman <hante.meuleman@broadcom.com>
+BROADCOM BRCM80211 IEEE802.11 WIRELESS DRIVERS
+M: Arend van Spriel <arend.vanspriel@broadcom.com>
L: linux-wireless@vger.kernel.org
+L: brcm80211@lists.linux.dev
L: brcm80211-dev-list.pdl@broadcom.com
S: Supported
F: drivers/net/wireless/broadcom/brcm80211/
+F: include/linux/platform_data/brcmfmac.h
BROADCOM BRCMSTB GPIO DRIVER
M: Doug Berger <opendmb@gmail.com>
@@ -4547,7 +4558,7 @@ F: drivers/net/ieee802154/ca8210.c
CACHEFILES: FS-CACHE BACKEND FOR CACHING ON MOUNTED FILESYSTEMS
M: David Howells <dhowells@redhat.com>
-L: linux-cachefs@redhat.com (moderated for non-subscribers)
+L: netfs@lists.linux.dev
S: Supported
F: Documentation/filesystems/caching/cachefiles.rst
F: fs/cachefiles/
@@ -4629,8 +4640,8 @@ S: Maintained
F: net/sched/sch_cake.c
CAN NETWORK DRIVERS
-M: Wolfgang Grandegger <wg@grandegger.com>
M: Marc Kleine-Budde <mkl@pengutronix.de>
+M: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
L: linux-can@vger.kernel.org
S: Maintained
W: https://github.com/linux-can
@@ -5378,7 +5389,7 @@ CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
M: Johannes Weiner <hannes@cmpxchg.org>
M: Michal Hocko <mhocko@kernel.org>
M: Roman Gushchin <roman.gushchin@linux.dev>
-M: Shakeel Butt <shakeelb@google.com>
+M: Shakeel Butt <shakeel.butt@linux.dev>
R: Muchun Song <muchun.song@linux.dev>
L: cgroups@vger.kernel.org
L: linux-mm@kvack.org
@@ -5610,6 +5621,11 @@ S: Maintained
F: Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml
F: drivers/net/can/ctucanfd/
+CVE ASSIGNMENT CONTACT
+M: CVE Assignment Team <cve@kernel.org>
+S: Maintained
+F: Documentation/process/cve.rst
+
CW1200 WLAN driver
S: Orphan
F: drivers/net/wireless/st/cw1200/
@@ -5958,7 +5974,6 @@ S: Maintained
F: drivers/platform/x86/dell/dell-wmi-descriptor.c
DELL WMI HARDWARE PRIVACY SUPPORT
-M: Perry Yuan <Perry.Yuan@dell.com>
L: Dell.Client.Kernel@dell.com
L: platform-driver-x86@vger.kernel.org
S: Maintained
@@ -6119,6 +6134,14 @@ F: include/linux/device-mapper.h
F: include/linux/dm-*.h
F: include/uapi/linux/dm-*.h
+DEVICE-MAPPER VDO TARGET
+M: Matthew Sakai <msakai@redhat.com>
+M: dm-devel@lists.linux.dev
+L: dm-devel@lists.linux.dev
+S: Maintained
+F: Documentation/admin-guide/device-mapper/vdo*.rst
+F: drivers/md/dm-vdo/
+
DEVLINK
M: Jiri Pirko <jiri@resnulli.us>
L: netdev@vger.kernel.org
@@ -6373,6 +6396,7 @@ L: linux-doc@vger.kernel.org
S: Maintained
F: Documentation/admin-guide/quickly-build-trimmed-linux.rst
F: Documentation/admin-guide/reporting-issues.rst
+F: Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
DOCUMENTATION SCRIPTS
M: Mauro Carvalho Chehab <mchehab@kernel.org>
@@ -7578,7 +7602,6 @@ R: Robert Richter <rric@kernel.org>
L: linux-edac@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac-for-next
-F: Documentation/admin-guide/ras.rst
F: Documentation/driver-api/edac.rst
F: drivers/edac/
F: include/linux/edac.h
@@ -7885,6 +7908,13 @@ S: Maintained
F: include/linux/errseq.h
F: lib/errseq.c
+ESD CAN NETWORK DRIVERS
+M: Stefan Mätje <stefan.maetje@esd.eu>
+R: socketcan@esd.eu
+L: linux-can@vger.kernel.org
+S: Maintained
+F: drivers/net/can/esd/
+
ESD CAN/USB DRIVERS
M: Frank Jungclaus <frank.jungclaus@esd.eu>
R: socketcan@esd.eu
@@ -7955,12 +7985,13 @@ L: rust-for-linux@vger.kernel.org
S: Maintained
F: rust/kernel/net/phy.rs
-EXEC & BINFMT API
+EXEC & BINFMT API, ELF
R: Eric Biederman <ebiederm@xmission.com>
R: Kees Cook <keescook@chromium.org>
L: linux-mm@kvack.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
+F: Documentation/userspace-api/ELF.rst
F: fs/*binfmt_*.c
F: fs/exec.c
F: include/linux/binfmts.h
@@ -8160,6 +8191,7 @@ F: include/uapi/scsi/fc/
FILE LOCKING (flock() and fcntl()/lockf())
M: Jeff Layton <jlayton@kernel.org>
M: Chuck Lever <chuck.lever@oracle.com>
+R: Alexander Aring <alex.aring@gmail.com>
L: linux-fsdevel@vger.kernel.org
S: Maintained
F: fs/fcntl.c
@@ -8223,7 +8255,8 @@ F: include/linux/iomap.h
FILESYSTEMS [NETFS LIBRARY]
M: David Howells <dhowells@redhat.com>
-L: linux-cachefs@redhat.com (moderated for non-subscribers)
+R: Jeff Layton <jlayton@kernel.org>
+L: netfs@lists.linux.dev
L: linux-fsdevel@vger.kernel.org
S: Supported
F: Documentation/filesystems/caching/
@@ -8489,7 +8522,7 @@ FREESCALE IMX / MXC FEC DRIVER
M: Wei Fang <wei.fang@nxp.com>
R: Shenwei Wang <shenwei.wang@nxp.com>
R: Clark Wang <xiaoning.wang@nxp.com>
-R: NXP Linux Team <linux-imx@nxp.com>
+L: imx@lists.linux.dev
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/fsl,fec.yaml
@@ -8524,7 +8557,7 @@ F: drivers/i2c/busses/i2c-imx.c
FREESCALE IMX LPI2C DRIVER
M: Dong Aisheng <aisheng.dong@nxp.com>
L: linux-i2c@vger.kernel.org
-L: linux-imx@nxp.com
+L: imx@lists.linux.dev
S: Maintained
F: Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml
F: drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -8584,6 +8617,13 @@ F: Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,cpm1-scc-qmc.yaml
F: drivers/soc/fsl/qe/qmc.c
F: include/soc/fsl/qe/qmc.h
+FREESCALE QUICC ENGINE QMC HDLC DRIVER
+M: Herve Codina <herve.codina@bootlin.com>
+L: netdev@vger.kernel.org
+L: linuxppc-dev@lists.ozlabs.org
+S: Maintained
+F: drivers/net/wan/fsl_qmc_hdlc.c
+
FREESCALE QUICC ENGINE TSA DRIVER
M: Herve Codina <herve.codina@bootlin.com>
L: linuxppc-dev@lists.ozlabs.org
@@ -8975,9 +9015,10 @@ F: include/linux/string.h
F: include/linux/string_choices.h
F: include/linux/string_helpers.h
F: lib/string.c
+F: lib/string_kunit.c
F: lib/string_helpers.c
-F: lib/test-string_helpers.c
-F: lib/test_string.c
+F: lib/string_helpers_kunit.c
+F: scripts/coccinelle/api/string_choices.cocci
GENERIC UIO DRIVER FOR PCI DEVICES
M: "Michael S. Tsirkin" <mst@redhat.com>
@@ -9079,6 +9120,7 @@ F: Documentation/devicetree/bindings/clock/google,gs101-clock.yaml
F: arch/arm64/boot/dts/exynos/google/
F: drivers/clk/samsung/clk-gs101.c
F: include/dt-bindings/clock/google,gs101.h
+K: [gG]oogle.?[tT]ensor
GPD POCKET FAN DRIVER
M: Hans de Goede <hdegoede@redhat.com>
@@ -10090,7 +10132,7 @@ L: linux-i2c@vger.kernel.org
S: Maintained
W: https://i2c.wiki.kernel.org/
Q: https://patchwork.ozlabs.org/project/linux-i2c/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux.git
F: Documentation/devicetree/bindings/i2c/
F: drivers/i2c/algos/
F: drivers/i2c/busses/
@@ -10282,7 +10324,7 @@ F: drivers/scsi/ibmvscsi/ibmvscsi*
F: include/scsi/viosrp.h
IBM Power Virtual SCSI Device Target Driver
-M: Michael Cyr <mikecyr@linux.ibm.com>
+M: Tyrel Datwyler <tyreld@linux.ibm.com>
L: linux-scsi@vger.kernel.org
L: target-devel@vger.kernel.org
S: Supported
@@ -10470,7 +10512,8 @@ M: Donald Robson <donald.robson@imgtec.com>
M: Matt Coster <matt.coster@imgtec.com>
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc
-F: Documentation/devicetree/bindings/gpu/img,powervr.yaml
+F: Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml
+F: Documentation/devicetree/bindings/gpu/img,powervr-sgx.yaml
F: Documentation/gpu/imagination/
F: drivers/gpu/drm/imagination/
F: include/uapi/drm/pvr_drm.h
@@ -10728,7 +10771,7 @@ INTEL DRM I915 DRIVER (Meteor Lake, DG2 and older excluding Poulsbo, Moorestown
M: Jani Nikula <jani.nikula@linux.intel.com>
M: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
M: Rodrigo Vivi <rodrigo.vivi@intel.com>
-M: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
+M: Tvrtko Ursulin <tursulin@ursulin.net>
L: intel-gfx@lists.freedesktop.org
S: Supported
W: https://drm.pages.freedesktop.org/intel-docs/
@@ -10800,11 +10843,11 @@ F: drivers/gpio/gpio-tangier.h
INTEL GVT-g DRIVERS (Intel GPU Virtualization)
M: Zhenyu Wang <zhenyuw@linux.intel.com>
-M: Zhi Wang <zhi.a.wang@intel.com>
+M: Zhi Wang <zhi.wang.linux@gmail.com>
L: intel-gvt-dev@lists.freedesktop.org
L: intel-gfx@lists.freedesktop.org
S: Supported
-W: https://01.org/igvt-g
+W: https://github.com/intel/gvt-linux/wiki
T: git https://github.com/intel/gvt-linux.git
F: drivers/gpu/drm/i915/gvt/
@@ -11126,7 +11169,6 @@ S: Supported
F: drivers/net/wireless/intel/iwlegacy/
INTEL WIRELESS WIFI LINK (iwlwifi)
-M: Gregory Greenman <gregory.greenman@intel.com>
M: Miri Korenblit <miriam.rachel.korenblit@intel.com>
L: linux-wireless@vger.kernel.org
S: Supported
@@ -11151,6 +11193,16 @@ L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/wwan/iosm/
+INTEL(R) FLEXIBLE RETURN AND EVENT DELIVERY
+M: Xin Li <xin@zytor.com>
+M: "H. Peter Anvin" <hpa@zytor.com>
+S: Supported
+F: Documentation/arch/x86/x86_64/fred.rst
+F: arch/x86/entry/entry_64_fred.S
+F: arch/x86/entry/entry_fred.c
+F: arch/x86/include/asm/fred.h
+F: arch/x86/kernel/fred.c
+
INTEL(R) TRACE HUB
M: Alexander Shishkin <alexander.shishkin@linux.intel.com>
S: Supported
@@ -11239,7 +11291,6 @@ F: drivers/iommu/
F: include/linux/iommu.h
F: include/linux/iova.h
F: include/linux/of_iommu.h
-F: include/uapi/linux/iommu.h
IOMMUFD
M: Jason Gunthorpe <jgg@nvidia.com>
@@ -11724,6 +11775,7 @@ F: fs/smb/server/
KERNEL UNIT TESTING FRAMEWORK (KUnit)
M: Brendan Higgins <brendanhiggins@google.com>
M: David Gow <davidgow@google.com>
+R: Rae Moar <rmoar@google.com>
L: linux-kselftest@vger.kernel.org
L: kunit-dev@googlegroups.com
S: Maintained
@@ -12152,11 +12204,11 @@ F: Documentation/scsi/53c700.rst
F: drivers/scsi/53c700*
LEAKING_ADDRESSES
-M: Tobin C. Harding <me@tobin.cc>
M: Tycho Andersen <tycho@tycho.pizza>
+R: Kees Cook <keescook@chromium.org>
L: linux-hardening@vger.kernel.org
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/tobin/leaks.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
F: scripts/leaking_addresses.pl
LED SUBSYSTEM
@@ -12510,7 +12562,6 @@ F: arch/powerpc/include/asm/livepatch.h
F: include/linux/livepatch.h
F: kernel/livepatch/
F: kernel/module/livepatch.c
-F: lib/livepatch/
F: samples/livepatch/
F: tools/testing/selftests/livepatch/
@@ -12902,6 +12953,8 @@ M: Alejandro Colomar <alx@kernel.org>
L: linux-man@vger.kernel.org
S: Maintained
W: http://www.kernel.org/doc/man-pages
+T: git git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git
+T: git git://www.alejandro-colomar.es/src/alx/linux/man-pages/man-pages.git
MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP)
M: Jeremy Kerr <jk@codeconstruct.com.au>
@@ -13053,6 +13106,15 @@ L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/marvell/octeon_ep
+MARVELL OCTEON ENDPOINT VF DRIVER
+M: Veerasenareddy Burru <vburru@marvell.com>
+M: Sathesh Edara <sedara@marvell.com>
+M: Shinas Rasheed <srasheed@marvell.com>
+M: Satananda Burla <sburla@marvell.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/marvell/octeon_ep_vf
+
MARVELL OCTEONTX2 PHYSICAL FUNCTION DRIVER
M: Sunil Goutham <sgoutham@marvell.com>
M: Geetha sowjanya <gakula@marvell.com>
@@ -14007,7 +14069,7 @@ F: include/uapi/rdma/mlx5-abi.h
MELLANOX MLX5 VDPA DRIVER
M: Dragos Tatulea <dtatulea@nvidia.com>
-L: virtualization@lists.linux-foundation.org
+L: virtualization@lists.linux.dev
S: Supported
F: drivers/vdpa/mlx5/
@@ -14103,6 +14165,17 @@ F: mm/
F: tools/mm/
F: tools/testing/selftests/mm/
+MEMORY MAPPING
+M: Andrew Morton <akpm@linux-foundation.org>
+R: Liam R. Howlett <Liam.Howlett@oracle.com>
+R: Vlastimil Babka <vbabka@suse.cz>
+R: Lorenzo Stoakes <lstoakes@gmail.com>
+L: linux-mm@kvack.org
+S: Maintained
+W: http://www.linux-mm.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
+F: mm/mmap.c
+
MEMORY TECHNOLOGY DEVICES (MTD)
M: Miquel Raynal <miquel.raynal@bootlin.com>
M: Richard Weinberger <richard@nod.at>
@@ -14361,7 +14434,7 @@ MICROCHIP MCP16502 PMIC DRIVER
M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
-F: Documentation/devicetree/bindings/regulator/mcp16502-regulator.txt
+F: Documentation/devicetree/bindings/regulator/microchip,mcp16502.yaml
F: drivers/regulator/mcp16502.c
MICROCHIP MCP3564 ADC DRIVER
@@ -15080,6 +15153,7 @@ NETDEVSIM
M: Jakub Kicinski <kuba@kernel.org>
S: Maintained
F: drivers/net/netdevsim/*
+F: tools/testing/selftests/drivers/net/netdevsim/*
NETEM NETWORK EMULATOR
M: Stephen Hemminger <stephen@networkplumber.org>
@@ -15177,6 +15251,7 @@ F: Documentation/networking/net_cachelines/net_device.rst
F: drivers/connector/
F: drivers/net/
F: include/dt-bindings/net/
+F: include/linux/cn_proc.h
F: include/linux/etherdevice.h
F: include/linux/fcdevice.h
F: include/linux/fddidevice.h
@@ -15184,6 +15259,7 @@ F: include/linux/hippidevice.h
F: include/linux/if_*
F: include/linux/inetdevice.h
F: include/linux/netdevice.h
+F: include/uapi/linux/cn_proc.h
F: include/uapi/linux/if_*
F: include/uapi/linux/netdevice.h
X: drivers/net/wireless/
@@ -15232,6 +15308,8 @@ F: Documentation/networking/
F: Documentation/networking/net_cachelines/
F: Documentation/process/maintainer-netdev.rst
F: Documentation/userspace-api/netlink/
+F: include/linux/framer/framer-provider.h
+F: include/linux/framer/framer.h
F: include/linux/in.h
F: include/linux/indirect_call_wrapper.h
F: include/linux/net.h
@@ -15319,7 +15397,7 @@ K: \bmdo_
NETWORKING [MPTCP]
M: Matthieu Baerts <matttbe@kernel.org>
M: Mat Martineau <martineau@kernel.org>
-R: Geliang Tang <geliang.tang@linux.dev>
+R: Geliang Tang <geliang@kernel.org>
L: netdev@vger.kernel.org
L: mptcp@lists.linux.dev
S: Maintained
@@ -15566,16 +15644,6 @@ W: https://github.com/davejiang/linux/wiki
T: git https://github.com/davejiang/linux.git
F: drivers/ntb/hw/intel/
-NTFS FILESYSTEM
-M: Anton Altaparmakov <anton@tuxera.com>
-R: Namjae Jeon <linkinjeon@kernel.org>
-L: linux-ntfs-dev@lists.sourceforge.net
-S: Supported
-W: http://www.tuxera.com/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/aia21/ntfs.git
-F: Documentation/filesystems/ntfs.rst
-F: fs/ntfs/
-
NTFS3 FILESYSTEM
M: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
L: ntfs3@lists.linux.dev
@@ -15704,7 +15772,7 @@ F: drivers/iio/gyro/fxas21002c_spi.c
NXP i.MX 7D/6SX/6UL/93 AND VF610 ADC DRIVER
M: Haibo Chen <haibo.chen@nxp.com>
L: linux-iio@vger.kernel.org
-L: linux-imx@nxp.com
+L: imx@lists.linux.dev
S: Maintained
F: Documentation/devicetree/bindings/iio/adc/fsl,imx7d-adc.yaml
F: Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml
@@ -15741,7 +15809,7 @@ F: drivers/gpu/drm/imx/dcss/
NXP i.MX 8QXP ADC DRIVER
M: Cai Huoqing <cai.huoqing@linux.dev>
M: Haibo Chen <haibo.chen@nxp.com>
-L: linux-imx@nxp.com
+L: imx@lists.linux.dev
L: linux-iio@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml
@@ -15749,7 +15817,7 @@ F: drivers/iio/adc/imx8qxp-adc.c
NXP i.MX 8QXP/8QM JPEG V4L2 DRIVER
M: Mirela Rabulea <mirela.rabulea@nxp.com>
-R: NXP Linux Team <linux-imx@nxp.com>
+L: imx@lists.linux.dev
L: linux-media@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/media/nxp,imx8-jpeg.yaml
@@ -15759,7 +15827,7 @@ NXP i.MX CLOCK DRIVERS
M: Abel Vesa <abelvesa@kernel.org>
R: Peng Fan <peng.fan@nxp.com>
L: linux-clk@vger.kernel.org
-L: linux-imx@nxp.com
+L: imx@lists.linux.dev
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/abelvesa/linux.git clk/imx
F: Documentation/devicetree/bindings/clock/imx*
@@ -16720,6 +16788,7 @@ F: drivers/pci/controller/dwc/*layerscape*
PCI DRIVER FOR FU740
M: Paul Walmsley <paul.walmsley@sifive.com>
M: Greentime Hu <greentime.hu@sifive.com>
+M: Samuel Holland <samuel.holland@sifive.com>
L: linux-pci@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/pci/sifive,fu740-pcie.yaml
@@ -16832,6 +16901,7 @@ F: drivers/pci/controller/dwc/*designware*
PCI DRIVER FOR TI DRA7XX/J721E
M: Vignesh Raghavendra <vigneshr@ti.com>
+R: Siddharth Vadapalli <s-vadapalli@ti.com>
L: linux-omap@vger.kernel.org
L: linux-pci@vger.kernel.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -16856,9 +16926,8 @@ F: Documentation/devicetree/bindings/pci/xilinx-versal-cpm.yaml
F: drivers/pci/controller/pcie-xilinx-cpm.c
PCI ENDPOINT SUBSYSTEM
-M: Lorenzo Pieralisi <lpieralisi@kernel.org>
+M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
M: Krzysztof Wilczyński <kw@linux.com>
-R: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
R: Kishon Vijay Abraham I <kishon@kernel.org>
L: linux-pci@vger.kernel.org
S: Supported
@@ -17178,7 +17247,7 @@ R: John Garry <john.g.garry@oracle.com>
R: Will Deacon <will@kernel.org>
R: James Clark <james.clark@arm.com>
R: Mike Leach <mike.leach@linaro.org>
-R: Leo Yan <leo.yan@linaro.org>
+R: Leo Yan <leo.yan@linux.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: tools/build/feature/test-libopencsd.c
@@ -17259,9 +17328,12 @@ M: Shawn Guo <shawnguo@kernel.org>
M: Jacky Bai <ping.bai@nxp.com>
R: Pengutronix Kernel Team <kernel@pengutronix.de>
L: linux-gpio@vger.kernel.org
+L: NXP S32 Linux Team <s32@nxp.com>
S: Maintained
F: Documentation/devicetree/bindings/pinctrl/fsl,*
+F: Documentation/devicetree/bindings/pinctrl/nxp,s32*
F: drivers/pinctrl/freescale/
+F: drivers/pinctrl/nxp/
PIN CONTROLLER - INTEL
M: Mika Westerberg <mika.westerberg@linux.intel.com>
@@ -17315,14 +17387,6 @@ S: Supported
F: drivers/gpio/gpio-sama5d2-piobu.c
F: drivers/pinctrl/pinctrl-at91*
-PIN CONTROLLER - NXP S32
-M: Chester Lin <clin@suse.com>
-R: NXP S32 Linux Team <s32@nxp.com>
-L: linux-gpio@vger.kernel.org
-S: Maintained
-F: Documentation/devicetree/bindings/pinctrl/nxp,s32*
-F: drivers/pinctrl/nxp/
-
PIN CONTROLLER - QUALCOMM
M: Bjorn Andersson <andersson@kernel.org>
L: linux-arm-msm@vger.kernel.org
@@ -17339,7 +17403,6 @@ F: Documentation/devicetree/bindings/pinctrl/renesas,*
F: drivers/pinctrl/renesas/
PIN CONTROLLER - SAMSUNG
-M: Tomasz Figa <tomasz.figa@gmail.com>
M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
R: Alim Akhtar <alim.akhtar@samsung.com>
@@ -17489,6 +17552,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F: fs/timerfd.c
F: include/linux/time_namespace.h
F: include/linux/timer*
+F: include/trace/events/timer*
F: kernel/time/*timer*
F: kernel/time/namespace.c
@@ -17525,6 +17589,7 @@ F: Documentation/devicetree/bindings/power/supply/
F: drivers/power/supply/
F: include/linux/power/
F: include/linux/power_supply.h
+F: tools/testing/selftests/power_supply/
POWERNV OPERATOR PANEL LCD DISPLAY DRIVER
M: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
@@ -17972,33 +18037,34 @@ F: drivers/media/tuners/qt1010*
QUALCOMM ATH12K WIRELESS DRIVER
M: Kalle Valo <kvalo@kernel.org>
-M: Jeff Johnson <quic_jjohnson@quicinc.com>
+M: Jeff Johnson <jjohnson@kernel.org>
L: ath12k@lists.infradead.org
S: Supported
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath12k
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
F: drivers/net/wireless/ath/ath12k/
+N: ath12k
QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
M: Kalle Valo <kvalo@kernel.org>
-M: Jeff Johnson <quic_jjohnson@quicinc.com>
+M: Jeff Johnson <jjohnson@kernel.org>
L: ath10k@lists.infradead.org
S: Supported
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
-F: Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
F: drivers/net/wireless/ath/ath10k/
+N: ath10k
QUALCOMM ATHEROS ATH11K WIRELESS DRIVER
M: Kalle Valo <kvalo@kernel.org>
-M: Jeff Johnson <quic_jjohnson@quicinc.com>
+M: Jeff Johnson <jjohnson@kernel.org>
L: ath11k@lists.infradead.org
S: Supported
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k
B: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k/bugreport
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
-F: Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
F: drivers/net/wireless/ath/ath11k/
+N: ath11k
QUALCOMM ATHEROS ATH9K WIRELESS DRIVER
M: Toke Høiland-Jørgensen <toke@toke.dk>
@@ -18009,6 +18075,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
F: Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
F: drivers/net/wireless/ath/ath9k/
+QUALCOMM ATHEROS QCA7K ETHERNET DRIVER
+M: Stefan Wahren <wahrenst@gmx.net>
+L: netdev@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/net/qca,qca7000.txt
+F: drivers/net/ethernet/qualcomm/qca*
+
QUALCOMM BAM-DMUX WWAN NETWORK DRIVER
M: Stephan Gerhold <stephan@gerhold.net>
L: netdev@vger.kernel.org
@@ -18081,7 +18154,6 @@ F: drivers/net/ethernet/qualcomm/emac/
QUALCOMM ETHQOS ETHERNET DRIVER
M: Vinod Koul <vkoul@kernel.org>
-R: Bhupesh Sharma <bhupesh.sharma@linaro.org>
L: netdev@vger.kernel.org
L: linux-arm-msm@vger.kernel.org
S: Maintained
@@ -18353,11 +18425,17 @@ M: Tony Luck <tony.luck@intel.com>
M: Borislav Petkov <bp@alien8.de>
L: linux-edac@vger.kernel.org
S: Maintained
-F: Documentation/admin-guide/ras.rst
+F: Documentation/admin-guide/RAS
F: drivers/ras/
F: include/linux/ras.h
F: include/ras/ras_event.h
+RAS FRU MEMORY POISON MANAGER (FMPM)
+M: Yazen Ghannam <Yazen.Ghannam@amd.com>
+L: linux-edac@vger.kernel.org
+S: Maintained
+F: drivers/ras/amd/fmpm.c
+
RC-CORE / LIRC FRAMEWORK
M: Sean Young <sean@mess.org>
L: linux-media@vger.kernel.org
@@ -18428,7 +18506,7 @@ S: Supported
F: drivers/infiniband/sw/rdmavt
RDS - RELIABLE DATAGRAM SOCKETS
-M: Santosh Shilimkar <santosh.shilimkar@oracle.com>
+M: Allison Henderson <allison.henderson@oracle.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
L: rds-devel@oss.oracle.com (moderated for non-subscribers)
@@ -18847,6 +18925,7 @@ F: Documentation/devicetree/bindings/riscv/
F: arch/riscv/boot/dts/
X: arch/riscv/boot/dts/allwinner/
X: arch/riscv/boot/dts/renesas/
+X: arch/riscv/boot/dts/sophgo/
RISC-V PMU DRIVERS
M: Atish Patra <atishp@atishpatra.org>
@@ -19095,6 +19174,7 @@ F: Documentation/rust/
F: rust/
F: samples/rust/
F: scripts/*rust*
+F: tools/testing/selftests/rust/
K: \b(?i:rust)\b
RXRPC SOCKETS (AF_RXRPC)
@@ -19388,7 +19468,6 @@ F: drivers/media/platform/samsung/exynos4-is/
SAMSUNG SOC CLOCK DRIVERS
M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
-M: Tomasz Figa <tomasz.figa@gmail.com>
M: Chanwoo Choi <cw00.choi@samsung.com>
R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-samsung-soc@vger.kernel.org
@@ -19630,7 +19709,7 @@ F: drivers/mmc/host/sdhci-of-at91.c
SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) NXP i.MX DRIVER
M: Haibo Chen <haibo.chen@nxp.com>
-L: linux-imx@nxp.com
+L: imx@lists.linux.dev
L: linux-mmc@vger.kernel.org
S: Maintained
F: drivers/mmc/host/sdhci-esdhc-imx.c
@@ -19965,36 +20044,15 @@ S: Maintained
F: drivers/watchdog/simatic-ipc-wdt.c
SIFIVE DRIVERS
-M: Palmer Dabbelt <palmer@dabbelt.com>
M: Paul Walmsley <paul.walmsley@sifive.com>
+M: Samuel Holland <samuel.holland@sifive.com>
L: linux-riscv@lists.infradead.org
S: Supported
+F: drivers/dma/sf-pdma/
N: sifive
+K: fu[57]40
K: [^@]sifive
-SIFIVE CACHE DRIVER
-M: Conor Dooley <conor@kernel.org>
-L: linux-riscv@lists.infradead.org
-S: Maintained
-F: Documentation/devicetree/bindings/cache/sifive,ccache0.yaml
-F: drivers/cache/sifive_ccache.c
-
-SIFIVE FU540 SYSTEM-ON-CHIP
-M: Paul Walmsley <paul.walmsley@sifive.com>
-M: Palmer Dabbelt <palmer@dabbelt.com>
-L: linux-riscv@lists.infradead.org
-S: Supported
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/pjw/sifive.git
-N: fu540
-K: fu540
-
-SIFIVE PDMA DRIVER
-M: Green Wan <green.wan@sifive.com>
-S: Maintained
-F: Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml
-F: drivers/dma/sf-pdma/
-
-
SILEAD TOUCHSCREEN DRIVER
M: Hans de Goede <hdegoede@redhat.com>
L: linux-input@vger.kernel.org
@@ -20203,8 +20261,8 @@ F: Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml
F: drivers/net/ethernet/socionext/sni_ave.c
SOCIONEXT (SNI) NETSEC NETWORK DRIVER
-M: Jassi Brar <jaswinder.singh@linaro.org>
M: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+M: Masahisa Kojima <kojima.masahisa@socionext.com>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/socionext,synquacer-netsec.yaml
@@ -20445,12 +20503,13 @@ F: drivers/char/sonypi.c
F: drivers/platform/x86/sony-laptop.c
F: include/linux/sony-laptop.h
-SOPHGO DEVICETREES
-M: Chao Wei <chao.wei@sophgo.com>
+SOPHGO DEVICETREES and DRIVERS
M: Chen Wang <unicorn_wang@outlook.com>
+M: Inochi Amaoto <inochiama@outlook.com>
+T: git https://github.com/sophgo/linux.git
S: Maintained
-F: arch/riscv/boot/dts/sophgo/
-F: Documentation/devicetree/bindings/riscv/sophgo.yaml
+N: sophgo
+K: sophgo
SOUND
M: Jaroslav Kysela <perex@perex.cz>
@@ -20549,6 +20608,7 @@ F: Documentation/translations/sp_SP/
SPARC + UltraSPARC (sparc/sparc64)
M: "David S. Miller" <davem@davemloft.net>
+M: Andreas Larsson <andreas@gaisler.com>
L: sparclinux@vger.kernel.org
S: Maintained
Q: http://patchwork.ozlabs.org/project/sparclinux/list/
@@ -20956,6 +21016,12 @@ F: Documentation/devicetree/bindings/phy/starfive,jh7110-usb-phy.yaml
F: drivers/phy/starfive/phy-jh7110-pcie.c
F: drivers/phy/starfive/phy-jh7110-usb.c
+STARFIVE JH8100 EXTERNAL INTERRUPT CONTROLLER DRIVER
+M: Changhuang Liang <changhuang.liang@starfivetech.com>
+S: Supported
+F: Documentation/devicetree/bindings/interrupt-controller/starfive,jh8100-intc.yaml
+F: drivers/irqchip/irq-starfive-jh8100-intc.c
+
STATIC BRANCH/CALL
M: Peter Zijlstra <peterz@infradead.org>
M: Josh Poimboeuf <jpoimboe@kernel.org>
@@ -21321,6 +21387,7 @@ F: drivers/clk/clk-sc[mp]i.c
F: drivers/cpufreq/sc[mp]i-cpufreq.c
F: drivers/firmware/arm_scmi/
F: drivers/firmware/arm_scpi.c
+F: drivers/hwmon/scmi-hwmon.c
F: drivers/pmdomain/arm/
F: drivers/powercap/arm_scmi_powercap.c
F: drivers/regulator/scmi-regulator.c
@@ -21515,7 +21582,7 @@ F: tools/testing/selftests/drivers/net/team/
TECHNICAL ADVISORY BOARD PROCESS DOCS
M: "Theodore Ts'o" <tytso@mit.edu>
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-L: tech-board-discuss@lists.linux-foundation.org
+L: tech-board-discuss@lists.linux.dev
S: Maintained
F: Documentation/process/contribution-maturity-model.rst
F: Documentation/process/researcher-guidelines.rst
@@ -22005,6 +22072,14 @@ F: Documentation/devicetree/bindings/media/i2c/ti,ds90*
F: drivers/media/i2c/ds90*
F: include/media/i2c/ds90*
+TI HDC302X HUMIDITY DRIVER
+M: Javier Carrasco <javier.carrasco.cruz@gmail.com>
+M: Li peiyu <579lpy@gmail.com>
+L: linux-iio@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/iio/humidity/ti,hdc3020.yaml
+F: drivers/iio/humidity/hdc3020.c
+
TI ICSSG ETHERNET DRIVER (ICSSG)
R: MD Danish Anwar <danishanwar@ti.com>
R: Roger Quadros <rogerq@kernel.org>
@@ -22471,6 +22546,23 @@ F: Documentation/block/ublk.rst
F: drivers/block/ublk_drv.c
F: include/uapi/linux/ublk_cmd.h
+UBSAN
+M: Kees Cook <keescook@chromium.org>
+R: Marco Elver <elver@google.com>
+R: Andrey Konovalov <andreyknvl@gmail.com>
+R: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+L: kasan-dev@googlegroups.com
+L: linux-hardening@vger.kernel.org
+S: Supported
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
+F: Documentation/dev-tools/ubsan.rst
+F: include/linux/ubsan.h
+F: lib/Kconfig.ubsan
+F: lib/test_ubsan.c
+F: lib/ubsan.c
+F: scripts/Makefile.ubsan
+K: \bARCH_HAS_UBSAN\b
+
UCLINUX (M68KNOMMU AND COLDFIRE)
M: Greg Ungerer <gerg@linux-m68k.org>
L: linux-m68k@lists.linux-m68k.org
@@ -22860,9 +22952,8 @@ S: Maintained
F: drivers/usb/typec/mux/pi3usb30532.c
USB TYPEC PORT CONTROLLER DRIVERS
-M: Guenter Roeck <linux@roeck-us.net>
L: linux-usb@vger.kernel.org
-S: Maintained
+S: Orphan
F: drivers/usb/typec/tcpm/
USB UHCI DRIVER
@@ -23074,7 +23165,7 @@ F: drivers/vfio/pci/mlx5/
VFIO VIRTIO PCI DRIVER
M: Yishai Hadas <yishaih@nvidia.com>
L: kvm@vger.kernel.org
-L: virtualization@lists.linux-foundation.org
+L: virtualization@lists.linux.dev
S: Maintained
F: drivers/vfio/pci/virtio
@@ -24118,14 +24209,13 @@ F: drivers/net/ethernet/xilinx/xilinx_axienet*
XILINX CAN DRIVER
M: Appana Durga Kedareswara rao <appana.durga.rao@xilinx.com>
-R: Naga Sureshkumar Relli <naga.sureshkumar.relli@xilinx.com>
L: linux-can@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/can/xilinx,can.yaml
F: drivers/net/can/xilinx_can.c
XILINX EVENT MANAGEMENT DRIVER
-M: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
+M: Michal Simek <michal.simek@amd.com>
S: Maintained
F: drivers/soc/xilinx/xlnx_event_manager.c
F: include/linux/firmware/xlnx-event-manager.h
@@ -24339,13 +24429,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git
F: Documentation/filesystems/zonefs.rst
F: fs/zonefs/
-ZPOOL COMPRESSED PAGE STORAGE API
-M: Dan Streetman <ddstreet@ieee.org>
-L: linux-mm@kvack.org
-S: Maintained
-F: include/linux/zpool.h
-F: mm/zpool.c
-
ZR36067 VIDEO FOR LINUX DRIVER
M: Corentin Labbe <clabbe@baylibre.com>
L: mjpeg-users@lists.sourceforge.net
@@ -24397,7 +24480,9 @@ M: Nhat Pham <nphamcs@gmail.com>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/admin-guide/mm/zswap.rst
+F: include/linux/zpool.h
F: include/linux/zswap.h
+F: mm/zpool.c
F: mm/zswap.c
THE REST
diff --git a/Makefile b/Makefile
index 9869f57c3fb3..d18fa2a6240d 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 8
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
@@ -294,15 +294,15 @@ may-sync-config := 1
single-build :=
ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),)
- ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),)
+ ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),)
need-config :=
- endif
+ endif
endif
ifneq ($(filter $(no-sync-config-targets), $(MAKECMDGOALS)),)
- ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),)
+ ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),)
may-sync-config :=
- endif
+ endif
endif
need-compiler := $(may-sync-config)
@@ -323,9 +323,9 @@ endif
# We cannot build single targets and the others at the same time
ifneq ($(filter $(single-targets), $(MAKECMDGOALS)),)
single-build := 1
- ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),)
+ ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),)
mixed-build := 1
- endif
+ endif
endif
# For "make -j clean all", "make -j mrproper defconfig all", etc.
@@ -986,6 +986,10 @@ NOSTDINC_FLAGS += -nostdinc
# perform bounds checking.
KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3)
+#Currently, disable -Wstringop-overflow for GCC 11, globally.
+KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow)
+KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow)
+
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += -fno-strict-overflow
@@ -1197,7 +1201,7 @@ prepare0: archprepare
# All the preparing..
prepare: prepare0
ifdef CONFIG_RUST
- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh
+ +$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh
$(Q)$(MAKE) $(build)=rust
endif
@@ -1662,7 +1666,7 @@ help:
@echo ' (sparse by default)'
@echo ' make C=2 [targets] Force check of all c source with $$CHECK'
@echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
- @echo ' make W=n [targets] Enable extra build checks, n=1,2,3 where'
+ @echo ' make W=n [targets] Enable extra build checks, n=1,2,3,c,e where'
@echo ' 1: warnings which may be relevant and do not occur too often'
@echo ' 2: warnings which occur quite often but may still be relevant'
@echo ' 3: more obscure warnings, can most likely be ignored'
@@ -1707,7 +1711,7 @@ $(DOC_TARGETS):
# "Is Rust available?" target
PHONY += rustavailable
rustavailable:
- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh && echo "Rust is available!"
+ +$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh && echo "Rust is available!"
# Documentation target
#
diff --git a/README b/README
index 669ac7c32292..026eff0b8e06 100644
--- a/README
+++ b/README
@@ -11,7 +11,7 @@ In order to build the documentation, use ``make htmldocs`` or
https://www.kernel.org/doc/html/latest/
There are various text files in the Documentation/ subdirectory,
-several of them using the Restructured Text markup notation.
+several of them using the ReStructured Text markup notation.
Please read the Documentation/process/changes.rst file, as it contains the
requirements for building and running the kernel, and information about
diff --git a/arch/Kconfig b/arch/Kconfig
index c91917b50873..fd18b7db2c77 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -673,6 +673,7 @@ config SHADOW_CALL_STACK
bool "Shadow Call Stack"
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
+ depends on MMU
help
This option enables the compiler's Shadow Call Stack, which
uses a shadow stack to protect function return addresses from
@@ -1077,17 +1078,107 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
and vice-versa 32-bit applications to call 64-bit mmap().
Required for applications doing different bitness syscalls.
+config HAVE_PAGE_SIZE_4KB
+ bool
+
+config HAVE_PAGE_SIZE_8KB
+ bool
+
+config HAVE_PAGE_SIZE_16KB
+ bool
+
+config HAVE_PAGE_SIZE_32KB
+ bool
+
+config HAVE_PAGE_SIZE_64KB
+ bool
+
+config HAVE_PAGE_SIZE_256KB
+ bool
+
+choice
+ prompt "MMU page size"
+
+config PAGE_SIZE_4KB
+ bool "4KiB pages"
+ depends on HAVE_PAGE_SIZE_4KB
+ help
+ This option select the standard 4KiB Linux page size and the only
+ available option on many architectures. Using 4KiB page size will
+ minimize memory consumption and is therefore recommended for low
+ memory systems.
+ Some software that is written for x86 systems makes incorrect
+ assumptions about the page size and only runs on 4KiB pages.
+
+config PAGE_SIZE_8KB
+ bool "8KiB pages"
+ depends on HAVE_PAGE_SIZE_8KB
+ help
+ This option is the only supported page size on a few older
+ processors, and can be slightly faster than 4KiB pages.
+
+config PAGE_SIZE_16KB
+ bool "16KiB pages"
+ depends on HAVE_PAGE_SIZE_16KB
+ help
+ This option is usually a good compromise between memory
+ consumption and performance for typical desktop and server
+ workloads, often saving a level of page table lookups compared
+ to 4KB pages as well as reducing TLB pressure and overhead of
+ per-page operations in the kernel at the expense of a larger
+ page cache.
+
+config PAGE_SIZE_32KB
+ bool "32KiB pages"
+ depends on HAVE_PAGE_SIZE_32KB
+ help
+ Using 32KiB page size will result in slightly higher performance
+ kernel at the price of higher memory consumption compared to
+ 16KiB pages. This option is available only on cnMIPS cores.
+ Note that you will need a suitable Linux distribution to
+ support this.
+
+config PAGE_SIZE_64KB
+ bool "64KiB pages"
+ depends on HAVE_PAGE_SIZE_64KB
+ help
+ Using 64KiB page size will result in slightly higher performance
+ kernel at the price of much higher memory consumption compared to
+ 4KiB or 16KiB pages.
+ This is not suitable for general-purpose workloads but the
+ better performance may be worth the cost for certain types of
+ supercomputing or database applications that work mostly with
+ large in-memory data rather than small files.
+
+config PAGE_SIZE_256KB
+ bool "256KiB pages"
+ depends on HAVE_PAGE_SIZE_256KB
+ help
+ 256KiB pages have little practical value due to their extreme
+ memory usage. The kernel will only be able to run applications
+ that have been compiled with '-zmax-page-size' set to 256KiB
+ (the default is 64KiB or 4KiB on most architectures).
+
+endchoice
+
config PAGE_SIZE_LESS_THAN_64KB
def_bool y
- depends on !ARM64_64K_PAGES
depends on !PAGE_SIZE_64KB
- depends on !PARISC_PAGE_SIZE_64KB
depends on PAGE_SIZE_LESS_THAN_256KB
config PAGE_SIZE_LESS_THAN_256KB
def_bool y
depends on !PAGE_SIZE_256KB
+config PAGE_SHIFT
+ int
+ default 12 if PAGE_SIZE_4KB
+ default 13 if PAGE_SIZE_8KB
+ default 14 if PAGE_SIZE_16KB
+ default 15 if PAGE_SIZE_32KB
+ default 16 if PAGE_SIZE_64KB
+ default 18 if PAGE_SIZE_256KB
+
# This allows to use a set of generic functions to determine mmap base
# address by giving priority to top-down scheme only if the process
# is not in legacy mode (compat task, unlimited stack size or
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index d6968d090d49..4f490250d323 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -14,6 +14,7 @@ config ALPHA
select PCI_DOMAINS if PCI
select PCI_SYSCALL if PCI
select HAVE_ASM_MODVERSIONS
+ select HAVE_PAGE_SIZE_8KB
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
select NEED_DMA_MAP_STATE
diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
index 4db1ebc0ed99..70419e6be1a3 100644
--- a/arch/alpha/include/asm/page.h
+++ b/arch/alpha/include/asm/page.h
@@ -6,7 +6,7 @@
#include <asm/pal.h>
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 13
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 7439b2377df5..8e9dd63b220c 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -467,11 +467,6 @@ smp_prepare_cpus(unsigned int max_cpus)
smp_num_cpus = smp_num_probed;
}
-void
-smp_prepare_boot_cpu(void)
-{
-}
-
int
__cpu_up(unsigned int cpu, struct task_struct *tidle)
{
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 1b0483c51cc1..4092bec198be 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -284,14 +284,17 @@ choice
config ARC_PAGE_SIZE_8K
bool "8KB"
+ select HAVE_PAGE_SIZE_8KB
help
Choose between 8k vs 16k
config ARC_PAGE_SIZE_16K
+ select HAVE_PAGE_SIZE_16KB
bool "16KB"
config ARC_PAGE_SIZE_4K
bool "4KB"
+ select HAVE_PAGE_SIZE_4KB
depends on ARC_MMU_V3 || ARC_MMU_V4
endchoice
diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h
index 9d9618079739..a339223d9e05 100644
--- a/arch/arc/include/asm/jump_label.h
+++ b/arch/arc/include/asm/jump_label.h
@@ -31,7 +31,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
- asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
+ asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
"1: \n"
"nop \n"
".pushsection __jump_table, \"aw\" \n"
@@ -47,7 +47,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key,
bool branch)
{
- asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
+ asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
"1: \n"
"b %l[l_yes] \n"
".pushsection __jump_table, \"aw\" \n"
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
index 2a4ad619abfb..7fd9e741b527 100644
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -13,10 +13,8 @@
#include <linux/const.h>
/* PAGE_SHIFT determines the page size */
-#if defined(CONFIG_ARC_PAGE_SIZE_16K)
-#define PAGE_SHIFT 14
-#elif defined(CONFIG_ARC_PAGE_SIZE_4K)
-#define PAGE_SHIFT 12
+#ifdef __KERNEL__
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#else
/*
* Default 8k
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 8d9b188caa27..b2f2c59279a6 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -39,11 +39,6 @@ struct plat_smp_ops __weak plat_smp_ops;
/* XXX: per cpu ? Only needed once in early secondary boot */
struct task_struct *secondary_idle_tsk;
-/* Called from start_kernel */
-void __init smp_prepare_boot_cpu(void)
-{
-}
-
static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask)
{
unsigned long dt_root = of_get_flat_dt_root();
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0af6709570d1..c46ec54c5363 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -29,7 +29,7 @@ config ARM
select ARCH_HAVE_NMI_SAFE_CMPXCHG if CPU_V7 || CPU_V7M || CPU_V6K
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_KEEP_MEMBLOCK
- select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_UBSAN
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
@@ -116,6 +116,7 @@ config ARM
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_OPTPROBES if !THUMB2_KERNEL
+ select HAVE_PAGE_SIZE_4KB
select HAVE_PCI if MMU
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 473280d5adce..d82908b1b1bb 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -158,9 +158,7 @@ textofs-$(CONFIG_ARCH_REALTEK) := 0x00108000
ifeq ($(CONFIG_ARCH_SA1100),y)
textofs-$(CONFIG_SA1111) := 0x00208000
endif
-textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000
-textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000
-textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000
+textofs-$(CONFIG_ARCH_QCOM_RESERVE_SMEM) := 0x00208000
textofs-$(CONFIG_ARCH_MESON) := 0x00208000
textofs-$(CONFIG_ARCH_AXXIA) := 0x00308000
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 6b4baa6a9a50..6c41b270560e 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -154,7 +154,7 @@ decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
putstr(" done, booting the kernel.\n");
}
-void fortify_panic(const char *name)
+void __fortify_panic(const u8 reason, size_t avail, size_t size)
{
error("detected buffer overflow");
}
diff --git a/arch/arm/boot/compressed/misc.h b/arch/arm/boot/compressed/misc.h
index 6da00a26ac08..8c73940b5fe4 100644
--- a/arch/arm/boot/compressed/misc.h
+++ b/arch/arm/boot/compressed/misc.h
@@ -10,7 +10,7 @@ void __div0(void);
void
decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
unsigned long free_mem_ptr_end_p, int arch_id);
-void fortify_panic(const char *name);
+void __fortify_panic(const u8 reason, size_t avail, size_t size);
int atags_to_fdt(void *atag_list, void *fdt, int total_space);
uint32_t fdt_check_mem_start(uint32_t mem_start, const void *fdt);
int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x));
diff --git a/arch/arm/boot/dts/allwinner/sun8i-r40-feta40i.dtsi b/arch/arm/boot/dts/allwinner/sun8i-r40-feta40i.dtsi
index 9f39b5a2bb35..c12361d0317f 100644
--- a/arch/arm/boot/dts/allwinner/sun8i-r40-feta40i.dtsi
+++ b/arch/arm/boot/dts/allwinner/sun8i-r40-feta40i.dtsi
@@ -42,6 +42,13 @@
vcc-pg-supply = <&reg_dldo1>;
};
+&reg_aldo1 {
+ regulator-always-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc-3v3-tv-usb";
+};
+
&reg_aldo2 {
regulator-always-on;
regulator-min-microvolt = <1800000>;
diff --git a/arch/arm/boot/dts/amazon/alpine.dtsi b/arch/arm/boot/dts/amazon/alpine.dtsi
index ff68dfb4eb78..90bd12feac01 100644
--- a/arch/arm/boot/dts/amazon/alpine.dtsi
+++ b/arch/arm/boot/dts/amazon/alpine.dtsi
@@ -167,7 +167,6 @@
msix: msix@fbe00000 {
compatible = "al,alpine-msix";
reg = <0x0 0xfbe00000 0x0 0x100000>;
- interrupt-controller;
msi-controller;
al,msi-base-spi = <96>;
al,msi-num-spis = <64>;
diff --git a/arch/arm/boot/dts/amlogic/meson.dtsi b/arch/arm/boot/dts/amlogic/meson.dtsi
index 8e3860d5d916..8cb0fc78b2af 100644
--- a/arch/arm/boot/dts/amlogic/meson.dtsi
+++ b/arch/arm/boot/dts/amlogic/meson.dtsi
@@ -23,7 +23,7 @@
#size-cells = <1>;
ranges;
- cbus: cbus@c1100000 {
+ cbus: bus@c1100000 {
compatible = "simple-bus";
reg = <0xc1100000 0x200000>;
#address-cells = <1>;
@@ -206,7 +206,7 @@
};
};
- aobus: aobus@c8100000 {
+ aobus: bus@c8100000 {
compatible = "simple-bus";
reg = <0xc8100000 0x100000>;
#address-cells = <1>;
@@ -302,7 +302,7 @@
reg = <0xd9040000 0x10000>;
};
- secbus: secbus@da000000 {
+ secbus: bus@da000000 {
compatible = "simple-bus";
reg = <0xda000000 0x6000>;
#address-cells = <1>;
diff --git a/arch/arm/boot/dts/amlogic/meson8.dtsi b/arch/arm/boot/dts/amlogic/meson8.dtsi
index 59932fbfd5d5..f57be9ae150f 100644
--- a/arch/arm/boot/dts/amlogic/meson8.dtsi
+++ b/arch/arm/boot/dts/amlogic/meson8.dtsi
@@ -645,7 +645,6 @@
};
&hwrng {
- compatible = "amlogic,meson8-rng", "amlogic,meson-rng";
clocks = <&clkc CLKID_RNG0>;
clock-names = "core";
};
diff --git a/arch/arm/boot/dts/amlogic/meson8b.dtsi b/arch/arm/boot/dts/amlogic/meson8b.dtsi
index 5198f5177c2c..2d9d24d3a95d 100644
--- a/arch/arm/boot/dts/amlogic/meson8b.dtsi
+++ b/arch/arm/boot/dts/amlogic/meson8b.dtsi
@@ -620,7 +620,6 @@
};
&hwrng {
- compatible = "amlogic,meson8b-rng", "amlogic,meson-rng";
clocks = <&clkc CLKID_RNG0>;
clock-names = "core";
};
diff --git a/arch/arm/boot/dts/arm/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm/arm-realview-pb1176.dts
index efed325af88d..d99bac02232b 100644
--- a/arch/arm/boot/dts/arm/arm-realview-pb1176.dts
+++ b/arch/arm/boot/dts/arm/arm-realview-pb1176.dts
@@ -451,7 +451,7 @@
/* Direct-mapped development chip ROM */
pb1176_rom@10200000 {
- compatible = "direct-mapped";
+ compatible = "mtd-rom";
reg = <0x10200000 0x4000>;
bank-width = <1>;
};
diff --git a/arch/arm/boot/dts/arm/integratorap-im-pd1.dts b/arch/arm/boot/dts/arm/integratorap-im-pd1.dts
index 7072a70da00d..367850ea0912 100644
--- a/arch/arm/boot/dts/arm/integratorap-im-pd1.dts
+++ b/arch/arm/boot/dts/arm/integratorap-im-pd1.dts
@@ -129,8 +129,6 @@
bridge {
compatible = "ti,ths8134b", "ti,ths8134";
- #address-cells = <1>;
- #size-cells = <0>;
ports {
#address-cells = <1>;
@@ -154,6 +152,7 @@
vga {
compatible = "vga-connector";
+ label = "J30";
port {
vga_con_in: endpoint {
diff --git a/arch/arm/boot/dts/arm/versatile-ab.dts b/arch/arm/boot/dts/arm/versatile-ab.dts
index f31dcf7e5862..de45aa99e260 100644
--- a/arch/arm/boot/dts/arm/versatile-ab.dts
+++ b/arch/arm/boot/dts/arm/versatile-ab.dts
@@ -32,8 +32,6 @@
bridge {
compatible = "ti,ths8134b", "ti,ths8134";
- #address-cells = <1>;
- #size-cells = <0>;
ports {
#address-cells = <1>;
@@ -59,6 +57,7 @@
vga {
compatible = "vga-connector";
+ label = "J1";
port {
vga_con_in: endpoint {
diff --git a/arch/arm/boot/dts/arm/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/arm/vexpress-v2p-ca9.dts
index 5916e4877eac..8bf35666412b 100644
--- a/arch/arm/boot/dts/arm/vexpress-v2p-ca9.dts
+++ b/arch/arm/boot/dts/arm/vexpress-v2p-ca9.dts
@@ -20,7 +20,9 @@
#address-cells = <1>;
#size-cells = <1>;
- chosen { };
+ chosen {
+ stdout-path = &v2m_serial0;
+ };
aliases {
serial0 = &v2m_serial0;
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts
index e899de681f47..5be0e8fd2633 100644
--- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts
@@ -45,8 +45,8 @@
num-chipselects = <1>;
cs-gpios = <&gpio0 ASPEED_GPIO(Z, 0) GPIO_ACTIVE_LOW>;
- tpmdev@0 {
- compatible = "tcg,tpm_tis-spi";
+ tpm@0 {
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
spi-max-frequency = <33000000>;
reg = <0>;
};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts
index a677c827e758..5a8169bbda87 100644
--- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts
@@ -80,8 +80,8 @@
gpio-miso = <&gpio ASPEED_GPIO(R, 5) GPIO_ACTIVE_HIGH>;
num-chipselects = <1>;
- tpmdev@0 {
- compatible = "tcg,tpm_tis-spi";
+ tpm@0 {
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
spi-max-frequency = <33000000>;
reg = <0>;
};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts
index 3f6010ef2b86..213023bc5aec 100644
--- a/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts
@@ -456,7 +456,7 @@
status = "okay";
tpm: tpm@2e {
- compatible = "tcg,tpm-tis-i2c";
+ compatible = "nuvoton,npct75x", "tcg,tpm-tis-i2c";
reg = <0x2e>;
};
};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi
index 530491ae5eb2..857cb26ed6d7 100644
--- a/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi
+++ b/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi
@@ -466,7 +466,6 @@
i2c0: i2c-bus@40 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x40 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -482,7 +481,6 @@
i2c1: i2c-bus@80 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x80 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -498,7 +496,6 @@
i2c2: i2c-bus@c0 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0xc0 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -515,7 +512,6 @@
i2c3: i2c-bus@100 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x100 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -532,7 +528,6 @@
i2c4: i2c-bus@140 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x140 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -549,7 +544,6 @@
i2c5: i2c-bus@180 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x180 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -566,7 +560,6 @@
i2c6: i2c-bus@1c0 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x1c0 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -583,7 +576,6 @@
i2c7: i2c-bus@300 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x300 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -600,7 +592,6 @@
i2c8: i2c-bus@340 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x340 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -617,7 +608,6 @@
i2c9: i2c-bus@380 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x380 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -634,7 +624,6 @@
i2c10: i2c-bus@3c0 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x3c0 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -651,7 +640,6 @@
i2c11: i2c-bus@400 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x400 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -668,7 +656,6 @@
i2c12: i2c-bus@440 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x440 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
@@ -685,7 +672,6 @@
i2c13: i2c-bus@480 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x480 0x40>;
compatible = "aspeed,ast2400-i2c-bus";
diff --git a/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi
index 04f98d1dbb97..e6f3cf3c721e 100644
--- a/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi
+++ b/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi
@@ -363,6 +363,7 @@
interrupts = <40>;
reg = <0x1e780200 0x0100>;
clocks = <&syscon ASPEED_CLK_APB>;
+ #interrupt-cells = <2>;
interrupt-controller;
bus-frequency = <12000000>;
pinctrl-names = "default";
@@ -594,7 +595,6 @@
i2c0: i2c-bus@40 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x40 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -610,7 +610,6 @@
i2c1: i2c-bus@80 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x80 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -626,7 +625,6 @@
i2c2: i2c-bus@c0 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0xc0 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -643,7 +641,6 @@
i2c3: i2c-bus@100 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x100 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -660,7 +657,6 @@
i2c4: i2c-bus@140 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x140 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -677,7 +673,6 @@
i2c5: i2c-bus@180 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x180 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -694,7 +689,6 @@
i2c6: i2c-bus@1c0 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x1c0 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -711,7 +705,6 @@
i2c7: i2c-bus@300 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x300 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -728,7 +721,6 @@
i2c8: i2c-bus@340 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x340 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -745,7 +737,6 @@
i2c9: i2c-bus@380 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x380 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -762,7 +753,6 @@
i2c10: i2c-bus@3c0 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x3c0 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -779,7 +769,6 @@
i2c11: i2c-bus@400 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x400 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -796,7 +785,6 @@
i2c12: i2c-bus@440 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x440 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
@@ -813,7 +801,6 @@
i2c13: i2c-bus@480 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x480 0x40>;
compatible = "aspeed,ast2500-i2c-bus";
diff --git a/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi
index c4d1faade8be..29f94696d8b1 100644
--- a/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi
+++ b/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi
@@ -474,6 +474,7 @@
reg = <0x1e780500 0x100>;
interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&syscon ASPEED_CLK_APB2>;
+ #interrupt-cells = <2>;
interrupt-controller;
bus-frequency = <12000000>;
pinctrl-names = "default";
@@ -488,6 +489,7 @@
reg = <0x1e780600 0x100>;
interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&syscon ASPEED_CLK_APB2>;
+ #interrupt-cells = <2>;
interrupt-controller;
bus-frequency = <12000000>;
pinctrl-names = "default";
@@ -902,7 +904,6 @@
i2c0: i2c-bus@80 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x80 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -917,7 +918,6 @@
i2c1: i2c-bus@100 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x100 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -932,7 +932,6 @@
i2c2: i2c-bus@180 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x180 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -947,7 +946,6 @@
i2c3: i2c-bus@200 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x200 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -962,7 +960,6 @@
i2c4: i2c-bus@280 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x280 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -977,7 +974,6 @@
i2c5: i2c-bus@300 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x300 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -992,7 +988,6 @@
i2c6: i2c-bus@380 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x380 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1007,7 +1002,6 @@
i2c7: i2c-bus@400 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x400 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1022,7 +1016,6 @@
i2c8: i2c-bus@480 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x480 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1037,7 +1030,6 @@
i2c9: i2c-bus@500 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x500 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1052,7 +1044,6 @@
i2c10: i2c-bus@580 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x580 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1067,7 +1058,6 @@
i2c11: i2c-bus@600 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x600 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1082,7 +1072,6 @@
i2c12: i2c-bus@680 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x680 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1097,7 +1086,6 @@
i2c13: i2c-bus@700 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x700 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1112,7 +1100,6 @@
i2c14: i2c-bus@780 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x780 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1127,7 +1114,6 @@
i2c15: i2c-bus@800 {
#address-cells = <1>;
#size-cells = <0>;
- #interrupt-cells = <1>;
reg = <0x800 0x80>;
compatible = "aspeed,ast2600-i2c-bus";
clocks = <&syscon ASPEED_CLK_APB2>;
diff --git a/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi b/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi
index 31590d3186a2..00e5887c926f 100644
--- a/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi
+++ b/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi
@@ -35,8 +35,8 @@
gpio-mosi = <&gpio0 ASPEED_GPIO(X, 4) GPIO_ACTIVE_HIGH>;
gpio-miso = <&gpio0 ASPEED_GPIO(X, 5) GPIO_ACTIVE_HIGH>;
- tpmdev@0 {
- compatible = "tcg,tpm_tis-spi";
+ tpm@0 {
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
spi-max-frequency = <33000000>;
reg = <0>;
};
diff --git a/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi b/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi
index f9f79ed82518..07ca0d993c9f 100644
--- a/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi
+++ b/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi
@@ -167,6 +167,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupt-parent = <&mailbox>;
interrupts = <0>;
};
@@ -247,6 +248,7 @@
gpio-controller;
interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
interrupt-controller;
+ #interrupt-cells = <2>;
};
i2c1: i2c@1800b000 {
@@ -518,6 +520,7 @@
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
gpio-ranges = <&pinctrl 0 42 1>,
<&pinctrl 1 44 3>,
diff --git a/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi b/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi
index 788a6806191a..75545b10ef2f 100644
--- a/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi
+++ b/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi
@@ -200,6 +200,7 @@
gpio-controller;
ngpios = <4>;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
};
diff --git a/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi b/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi
index 9d20ba3b1ffb..6a4482c93167 100644
--- a/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi
+++ b/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi
@@ -180,6 +180,7 @@
gpio-controller;
ngpios = <32>;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
gpio-ranges = <&pinctrl 0 0 32>;
};
@@ -352,6 +353,7 @@
gpio-controller;
ngpios = <4>;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
};
diff --git a/arch/arm/boot/dts/gemini/gemini-dlink-dir-685.dts b/arch/arm/boot/dts/gemini/gemini-dlink-dir-685.dts
index 396149664297..b4dbcf8f168e 100644
--- a/arch/arm/boot/dts/gemini/gemini-dlink-dir-685.dts
+++ b/arch/arm/boot/dts/gemini/gemini-dlink-dir-685.dts
@@ -27,10 +27,10 @@
gpio_keys {
compatible = "gpio-keys";
- button-esc {
+ button-reset {
debounce-interval = <100>;
wakeup-source;
- linux,code = <KEY_ESC>;
+ linux,code = <KEY_RESTART>;
label = "reset";
/* Collides with LPC_LAD[0], UART DCD, SSP 97RST */
gpios = <&gpio0 8 GPIO_ACTIVE_LOW>;
@@ -187,7 +187,7 @@
};
/* This is a RealTek RTL8366RB switch and PHY using SMI over GPIO */
- switch {
+ ethernet-switch {
compatible = "realtek,rtl8366rb";
/* 22 = MDIO (has input reads), 21 = MDC (clock, output only) */
mdc-gpios = <&gpio0 21 GPIO_ACTIVE_HIGH>;
@@ -204,36 +204,36 @@
#interrupt-cells = <1>;
};
- ports {
+ ethernet-ports {
#address-cells = <1>;
#size-cells = <0>;
- port@0 {
+ ethernet-port@0 {
reg = <0>;
label = "lan0";
phy-handle = <&phy0>;
};
- port@1 {
+ ethernet-port@1 {
reg = <1>;
label = "lan1";
phy-handle = <&phy1>;
};
- port@2 {
+ ethernet-port@2 {
reg = <2>;
label = "lan2";
phy-handle = <&phy2>;
};
- port@3 {
+ ethernet-port@3 {
reg = <3>;
label = "lan3";
phy-handle = <&phy3>;
};
- port@4 {
+ ethernet-port@4 {
reg = <4>;
label = "wan";
phy-handle = <&phy4>;
};
- rtl8366rb_cpu_port: port@5 {
+ rtl8366rb_cpu_port: ethernet-port@5 {
reg = <5>;
label = "cpu";
ethernet = <&gmac0>;
@@ -252,27 +252,27 @@
#address-cells = <1>;
#size-cells = <0>;
- phy0: phy@0 {
+ phy0: ethernet-phy@0 {
reg = <0>;
interrupt-parent = <&switch_intc>;
interrupts = <0>;
};
- phy1: phy@1 {
+ phy1: ethernet-phy@1 {
reg = <1>;
interrupt-parent = <&switch_intc>;
interrupts = <1>;
};
- phy2: phy@2 {
+ phy2: ethernet-phy@2 {
reg = <2>;
interrupt-parent = <&switch_intc>;
interrupts = <2>;
};
- phy3: phy@3 {
+ phy3: ethernet-phy@3 {
reg = <3>;
interrupt-parent = <&switch_intc>;
interrupts = <3>;
};
- phy4: phy@4 {
+ phy4: ethernet-phy@4 {
reg = <4>;
interrupt-parent = <&switch_intc>;
interrupts = <12>;
diff --git a/arch/arm/boot/dts/gemini/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini/gemini-dlink-dns-313.dts
index 138c47e1ac1b..8c54d3a5a721 100644
--- a/arch/arm/boot/dts/gemini/gemini-dlink-dns-313.dts
+++ b/arch/arm/boot/dts/gemini/gemini-dlink-dns-313.dts
@@ -33,10 +33,10 @@
gpio_keys {
compatible = "gpio-keys";
- button-esc {
+ button-reset {
debounce-interval = <100>;
wakeup-source;
- linux,code = <KEY_ESC>;
+ linux,code = <KEY_RESTART>;
label = "reset";
gpios = <&gpio1 31 GPIO_ACTIVE_LOW>;
};
diff --git a/arch/arm/boot/dts/gemini/gemini-sl93512r.dts b/arch/arm/boot/dts/gemini/gemini-sl93512r.dts
index 91c19e8ebfe8..4992ec276de9 100644
--- a/arch/arm/boot/dts/gemini/gemini-sl93512r.dts
+++ b/arch/arm/boot/dts/gemini/gemini-sl93512r.dts
@@ -43,7 +43,7 @@
button-setup {
debounce-interval = <50>;
wakeup-source;
- linux,code = <KEY_SETUP>;
+ linux,code = <KEY_RESTART>;
label = "factory reset";
/* Conflict with NAND flash */
gpios = <&gpio0 18 GPIO_ACTIVE_LOW>;
@@ -93,7 +93,7 @@
cs-gpios = <&gpio1 31 GPIO_ACTIVE_HIGH>;
num-chipselects = <1>;
- switch@0 {
+ ethernet-switch@0 {
compatible = "vitesse,vsc7385";
reg = <0>;
/* Specified for 2.5 MHz or below */
@@ -101,27 +101,27 @@
gpio-controller;
#gpio-cells = <2>;
- ports {
+ ethernet-ports {
#address-cells = <1>;
#size-cells = <0>;
- port@0 {
+ ethernet-port@0 {
reg = <0>;
label = "lan1";
};
- port@1 {
+ ethernet-port@1 {
reg = <1>;
label = "lan2";
};
- port@2 {
+ ethernet-port@2 {
reg = <2>;
label = "lan3";
};
- port@3 {
+ ethernet-port@3 {
reg = <3>;
label = "lan4";
};
- vsc: port@6 {
+ vsc: ethernet-port@6 {
reg = <6>;
label = "cpu";
ethernet = <&gmac1>;
diff --git a/arch/arm/boot/dts/gemini/gemini-sq201.dts b/arch/arm/boot/dts/gemini/gemini-sq201.dts
index d0efd76695da..f8c6f6e5cdea 100644
--- a/arch/arm/boot/dts/gemini/gemini-sq201.dts
+++ b/arch/arm/boot/dts/gemini/gemini-sq201.dts
@@ -30,7 +30,7 @@
button-setup {
debounce-interval = <100>;
wakeup-source;
- linux,code = <KEY_SETUP>;
+ linux,code = <KEY_RESTART>;
label = "factory reset";
/* Conflict with NAND flash */
gpios = <&gpio0 18 GPIO_ACTIVE_LOW>;
@@ -78,7 +78,7 @@
cs-gpios = <&gpio1 31 GPIO_ACTIVE_HIGH>;
num-chipselects = <1>;
- switch@0 {
+ ethernet-switch@0 {
compatible = "vitesse,vsc7395";
reg = <0>;
/* Specified for 2.5 MHz or below */
@@ -86,27 +86,27 @@
gpio-controller;
#gpio-cells = <2>;
- ports {
+ ethernet-ports {
#address-cells = <1>;
#size-cells = <0>;
- port@0 {
+ ethernet-port@0 {
reg = <0>;
label = "lan1";
};
- port@1 {
+ ethernet-port@1 {
reg = <1>;
label = "lan2";
};
- port@2 {
+ ethernet-port@2 {
reg = <2>;
label = "lan3";
};
- port@3 {
+ ethernet-port@3 {
reg = <3>;
label = "lan4";
};
- vsc: port@6 {
+ vsc: ethernet-port@6 {
reg = <6>;
label = "cpu";
ethernet = <&gmac1>;
diff --git a/arch/arm/boot/dts/gemini/gemini-wbd111.dts b/arch/arm/boot/dts/gemini/gemini-wbd111.dts
index 3c88c59ab481..6a0c89e0c918 100644
--- a/arch/arm/boot/dts/gemini/gemini-wbd111.dts
+++ b/arch/arm/boot/dts/gemini/gemini-wbd111.dts
@@ -10,7 +10,7 @@
/ {
model = "Wiliboard WBD-111";
- compatible = "wiliboard,wbd111", "cortina,gemini";
+ compatible = "wiligear,wiliboard-wbd111", "cortina,gemini";
#address-cells = <1>;
#size-cells = <1>;
@@ -28,10 +28,10 @@
gpio_keys {
compatible = "gpio-keys";
- button-setup {
+ button-reset {
debounce-interval = <100>;
wakeup-source;
- linux,code = <KEY_SETUP>;
+ linux,code = <KEY_RESTART>;
label = "reset";
/* Conflict with ICE */
gpios = <&gpio0 5 GPIO_ACTIVE_LOW>;
diff --git a/arch/arm/boot/dts/gemini/gemini-wbd222.dts b/arch/arm/boot/dts/gemini/gemini-wbd222.dts
index ff72bbc4db3e..d8b34ebad4b0 100644
--- a/arch/arm/boot/dts/gemini/gemini-wbd222.dts
+++ b/arch/arm/boot/dts/gemini/gemini-wbd222.dts
@@ -10,7 +10,7 @@
/ {
model = "Wiliboard WBD-222";
- compatible = "wiliboard,wbd222", "cortina,gemini";
+ compatible = "wiligear,wiliboard-wbd222", "cortina,gemini";
#address-cells = <1>;
#size-cells = <1>;
@@ -27,10 +27,10 @@
gpio_keys {
compatible = "gpio-keys";
- button-setup {
+ button-reset {
debounce-interval = <100>;
wakeup-source;
- linux,code = <KEY_SETUP>;
+ linux,code = <KEY_RESTART>;
label = "reset";
/* Conflict with ICE */
gpios = <&gpio0 5 GPIO_ACTIVE_LOW>;
diff --git a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts
index 4d70f6afd13a..6d5e69035f94 100644
--- a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts
+++ b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts
@@ -60,6 +60,8 @@
* We have slots (IDSEL) 1 and 2 with one assigned IRQ
* each handling all IRQs.
*/
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0xf800 0 0 7>;
interrupt-map =
/* IDSEL 1 */
<0x0800 0 0 1 &gpio0 11 IRQ_TYPE_LEVEL_LOW>, /* INT A on slot 1 is irq 11 */
diff --git a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts
index 9ec0169bacf8..5f4c849915db 100644
--- a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts
+++ b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts
@@ -89,6 +89,8 @@
* The slots have Ethernet, Ethernet, NEC and MPCI.
* The IDSELs are 11, 12, 13, 14.
*/
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0xf800 0 0 7>;
interrupt-map =
/* IDSEL 11 - Ethernet A */
<0x5800 0 0 1 &gpio0 4 IRQ_TYPE_LEVEL_LOW>, /* INT A on slot 11 is irq 4 */
diff --git a/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr-l8.dts b/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr-l8.dts
index 1707d1b01545..cb85f8e31dfc 100644
--- a/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr-l8.dts
+++ b/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr-l8.dts
@@ -4,6 +4,18 @@
/ {
model = "SolidRun Clearfog GTR L8";
+ compatible = "solidrun,clearfog-gtr-l8", "marvell,armada385",
+ "marvell,armada380";
+
+ /* CON25 */
+ sfp1: sfp-1 {
+ compatible = "sff,sfp";
+ pinctrl-0 = <&cf_gtr_sfp1_pins>;
+ pinctrl-names = "default";
+ i2c-bus = <&i2c0>;
+ mod-def0-gpio = <&gpio0 24 GPIO_ACTIVE_LOW>;
+ tx-disable-gpio = <&gpio1 22 GPIO_ACTIVE_HIGH>;
+ };
};
&mdio {
@@ -20,57 +32,65 @@
ethernet-port@1 {
reg = <1>;
- label = "lan8";
+ label = "lan1";
phy-handle = <&switch0phy0>;
};
ethernet-port@2 {
reg = <2>;
- label = "lan7";
+ label = "lan2";
phy-handle = <&switch0phy1>;
};
ethernet-port@3 {
reg = <3>;
- label = "lan6";
+ label = "lan3";
phy-handle = <&switch0phy2>;
};
ethernet-port@4 {
reg = <4>;
- label = "lan5";
+ label = "lan4";
phy-handle = <&switch0phy3>;
};
ethernet-port@5 {
reg = <5>;
- label = "lan4";
+ label = "lan5";
phy-handle = <&switch0phy4>;
};
ethernet-port@6 {
reg = <6>;
- label = "lan3";
+ label = "lan6";
phy-handle = <&switch0phy5>;
};
ethernet-port@7 {
reg = <7>;
- label = "lan2";
+ label = "lan7";
phy-handle = <&switch0phy6>;
};
ethernet-port@8 {
reg = <8>;
- label = "lan1";
+ label = "lan8";
phy-handle = <&switch0phy7>;
};
+ ethernet-port@9 {
+ reg = <9>;
+ label = "lan-sfp";
+ phy-mode = "sgmii";
+ sfp = <&sfp1>;
+ managed = "in-band-status";
+ };
+
ethernet-port@10 {
reg = <10>;
phy-mode = "2500base-x";
-
ethernet = <&eth1>;
+
fixed-link {
speed = <2500>;
full-duplex;
diff --git a/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr-s4.dts b/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr-s4.dts
index a7678a784c18..5f83d981449a 100644
--- a/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr-s4.dts
+++ b/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr-s4.dts
@@ -4,6 +4,8 @@
/ {
model = "SolidRun Clearfog GTR S4";
+ compatible = "solidrun,clearfog-gtr-s4", "marvell,armada385",
+ "marvell,armada380";
};
&sfp0 {
diff --git a/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr.dtsi b/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr.dtsi
index d1452a04e904..f3a3cb6ac311 100644
--- a/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr.dtsi
+++ b/arch/arm/boot/dts/marvell/armada-385-clearfog-gtr.dtsi
@@ -141,18 +141,13 @@
};
pinctrl@18000 {
- cf_gtr_switch_reset_pins: cf-gtr-switch-reset-pins {
- marvell,pins = "mpp18";
- marvell,function = "gpio";
- };
-
- cf_gtr_usb3_con_vbus: cf-gtr-usb3-con-vbus {
- marvell,pins = "mpp22";
+ cf_gtr_fan_pwm: cf-gtr-fan-pwm {
+ marvell,pins = "mpp23";
marvell,function = "gpio";
};
- cf_gtr_fan_pwm: cf-gtr-fan-pwm {
- marvell,pins = "mpp23";
+ cf_gtr_front_button_pins: cf-gtr-front-button-pins {
+ marvell,pins = "mpp53";
marvell,function = "gpio";
};
@@ -162,6 +157,37 @@
marvell,function = "i2c1";
};
+ cf_gtr_isolation_pins: cf-gtr-isolation-pins {
+ marvell,pins = "mpp47";
+ marvell,function = "gpio";
+ };
+
+ cf_gtr_led_pins: led-pins {
+ marvell,pins = "mpp42", "mpp52";
+ marvell,function = "gpio";
+ };
+
+ cf_gtr_lte_disable_pins: lte-disable-pins {
+ marvell,pins = "mpp34";
+ marvell,function = "gpio";
+ };
+
+ cf_gtr_pci_pins: pci-pins {
+ // pci reset
+ marvell,pins = "mpp33", "mpp35", "mpp44";
+ marvell,function = "gpio";
+ };
+
+ cf_gtr_poe_reset_pins: cf-gtr-poe-reset-pins {
+ marvell,pins = "mpp48";
+ marvell,function = "gpio";
+ };
+
+ cf_gtr_rear_button_pins: cf-gtr-rear-button-pins {
+ marvell,pins = "mpp36";
+ marvell,function = "gpio";
+ };
+
cf_gtr_sdhci_pins: cf-gtr-sdhci-pins {
marvell,pins = "mpp21", "mpp28",
"mpp37", "mpp38",
@@ -169,13 +195,15 @@
marvell,function = "sd0";
};
- cf_gtr_isolation_pins: cf-gtr-isolation-pins {
- marvell,pins = "mpp47";
+ cf_gtr_sfp0_pins: sfp0-pins {
+ /* sfp modabs, txdisable */
+ marvell,pins = "mpp25", "mpp46";
marvell,function = "gpio";
};
- cf_gtr_poe_reset_pins: cf-gtr-poe-reset-pins {
- marvell,pins = "mpp48";
+ cf_gtr_sfp1_pins: sfp1-pins {
+ /* sfp modabs, txdisable */
+ marvell,pins = "mpp24", "mpp54";
marvell,function = "gpio";
};
@@ -184,13 +212,18 @@
marvell,function = "spi1";
};
- cf_gtr_front_button_pins: cf-gtr-front-button-pins {
- marvell,pins = "mpp53";
+ cf_gtr_switch_reset_pins: cf-gtr-switch-reset-pins {
+ marvell,pins = "mpp18";
marvell,function = "gpio";
};
- cf_gtr_rear_button_pins: cf-gtr-rear-button-pins {
- marvell,pins = "mpp36";
+ cf_gtr_usb3_con_vbus: cf-gtr-usb3-con-vbus {
+ marvell,pins = "mpp22";
+ marvell,function = "gpio";
+ };
+
+ cf_gtr_wifi_disable_pins: wifi-disable-pins {
+ marvell,pins = "mpp30", "mpp31";
marvell,function = "gpio";
};
};
@@ -221,21 +254,26 @@
};
pcie {
+ pinctrl-0 = <&cf_gtr_pci_pins>;
+ pinctrl-names = "default";
status = "okay";
/*
* The PCIe units are accessible through
* the mini-PCIe connectors on the board.
*/
+ /* CON3 - serdes 0 */
pcie@1,0 {
reset-gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
status = "okay";
};
+ /* CON4 - serdes 2 */
pcie@2,0 {
reset-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
status = "okay";
};
+ /* CON2 - serdes 4 */
pcie@3,0 {
reset-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>;
status = "okay";
@@ -243,10 +281,12 @@
};
};
- sfp0: sfp {
+ /* CON5 */
+ sfp0: sfp-0 {
compatible = "sff,sfp";
+ pinctrl-0 = <&cf_gtr_sfp0_pins>;
+ pinctrl-names = "default";
i2c-bus = <&i2c1>;
- los-gpio = <&gpio1 22 GPIO_ACTIVE_HIGH>;
mod-def0-gpio = <&gpio0 25 GPIO_ACTIVE_LOW>;
tx-disable-gpio = <&gpio1 14 GPIO_ACTIVE_HIGH>;
};
@@ -273,6 +313,8 @@
gpio-leds {
compatible = "gpio-leds";
+ pinctrl-0 = <&cf_gtr_led_pins>;
+ pinctrl-names = "default";
led1 {
function = LED_FUNCTION_CPU;
@@ -408,7 +450,7 @@
};
&gpio0 {
- pinctrl-0 = <&cf_gtr_fan_pwm>;
+ pinctrl-0 = <&cf_gtr_fan_pwm &cf_gtr_wifi_disable_pins>;
pinctrl-names = "default";
wifi-disable {
@@ -420,7 +462,7 @@
};
&gpio1 {
- pinctrl-0 = <&cf_gtr_isolation_pins &cf_gtr_poe_reset_pins>;
+ pinctrl-0 = <&cf_gtr_isolation_pins &cf_gtr_poe_reset_pins &cf_gtr_lte_disable_pins>;
pinctrl-names = "default";
lte-disable {
diff --git a/arch/arm/boot/dts/marvell/armada-388-clearfog.dts b/arch/arm/boot/dts/marvell/armada-388-clearfog.dts
index 3290ccad2374..09bf2e6d4ed0 100644
--- a/arch/arm/boot/dts/marvell/armada-388-clearfog.dts
+++ b/arch/arm/boot/dts/marvell/armada-388-clearfog.dts
@@ -10,8 +10,9 @@
/ {
model = "SolidRun Clearfog A1";
- compatible = "solidrun,clearfog-a1", "marvell,armada388",
- "marvell,armada385", "marvell,armada380";
+ compatible = "solidrun,clearfog-pro-a1", "solidrun,clearfog-a1",
+ "marvell,armada388", "marvell,armada385",
+ "marvell,armada380";
soc {
internal-regs {
diff --git a/arch/arm/boot/dts/marvell/dove-cubox.dts b/arch/arm/boot/dts/marvell/dove-cubox.dts
index bfde99486a87..bcaaf8320c45 100644
--- a/arch/arm/boot/dts/marvell/dove-cubox.dts
+++ b/arch/arm/boot/dts/marvell/dove-cubox.dts
@@ -101,7 +101,7 @@
/* connect xtal input as source of pll0 and pll1 */
silabs,pll-source = <0 0>, <1 0>;
- clkout0 {
+ clkout@0 {
reg = <0>;
silabs,drive-strength = <8>;
silabs,multisynth-source = <0>;
@@ -109,7 +109,7 @@
silabs,pll-master;
};
- clkout2 {
+ clkout@2 {
reg = <2>;
silabs,drive-strength = <8>;
silabs,multisynth-source = <1>;
diff --git a/arch/arm/boot/dts/marvell/kirkwood-l-50.dts b/arch/arm/boot/dts/marvell/kirkwood-l-50.dts
index dffb9f84e67c..c841eb8e7fb1 100644
--- a/arch/arm/boot/dts/marvell/kirkwood-l-50.dts
+++ b/arch/arm/boot/dts/marvell/kirkwood-l-50.dts
@@ -65,6 +65,7 @@
gpio2: gpio-expander@20 {
#gpio-cells = <2>;
#interrupt-cells = <2>;
+ interrupt-controller;
compatible = "semtech,sx1505q";
reg = <0x20>;
@@ -79,6 +80,7 @@
gpio3: gpio-expander@21 {
#gpio-cells = <2>;
#interrupt-cells = <2>;
+ interrupt-controller;
compatible = "semtech,sx1505q";
reg = <0x21>;
diff --git a/arch/arm/boot/dts/marvell/mmp2-brownstone.dts b/arch/arm/boot/dts/marvell/mmp2-brownstone.dts
index 04f1ae1382e7..bc64348b8218 100644
--- a/arch/arm/boot/dts/marvell/mmp2-brownstone.dts
+++ b/arch/arm/boot/dts/marvell/mmp2-brownstone.dts
@@ -28,7 +28,7 @@
&twsi1 {
status = "okay";
pmic: max8925@3c {
- compatible = "maxium,max8925";
+ compatible = "maxim,max8925";
reg = <0x3c>;
interrupts = <1>;
interrupt-parent = <&intcmux4>;
diff --git a/arch/arm/boot/dts/microchip/Makefile b/arch/arm/boot/dts/microchip/Makefile
index efde9546c8f4..0c45c8d17468 100644
--- a/arch/arm/boot/dts/microchip/Makefile
+++ b/arch/arm/boot/dts/microchip/Makefile
@@ -11,6 +11,7 @@ DTC_FLAGS_at91-sama5d2_xplained := -@
DTC_FLAGS_at91-sama5d3_eds := -@
DTC_FLAGS_at91-sama5d3_xplained := -@
DTC_FLAGS_at91-sama5d4_xplained := -@
+DTC_FLAGS_at91-sama7g54_curiosity := -@
DTC_FLAGS_at91-sama7g5ek := -@
dtb-$(CONFIG_SOC_AT91RM9200) += \
at91rm9200ek.dtb \
@@ -87,6 +88,7 @@ dtb-$(CONFIG_SOC_SAM_V7) += \
at91-sama5d4ek.dtb \
at91-vinco.dtb
dtb-$(CONFIG_SOC_SAMA7G5) += \
+ at91-sama7g54_curiosity.dtb \
at91-sama7g5ek.dtb
dtb-$(CONFIG_SOC_LAN966) += \
diff --git a/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts b/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts
new file mode 100644
index 000000000000..4f609e9e510e
--- /dev/null
+++ b/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * at91-sama7g54_curiosity.dts - Device Tree file for SAMA7G54 Curiosity Board
+ *
+ * Copyright (C) 2024 Microchip Technology Inc. and its subsidiaries
+ *
+ * Author: Mihai Sain <mihai.sain@microchip.com>
+ *
+ */
+/dts-v1/;
+#include "sama7g5-pinfunc.h"
+#include "sama7g5.dtsi"
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/mfd/atmel-flexcom.h>
+#include <dt-bindings/pinctrl/at91.h>
+
+/ {
+ model = "Microchip SAMA7G54 Curiosity";
+ compatible = "microchip,sama7g54-curiosity", "microchip,sama7g5", "microchip,sama7";
+
+ aliases {
+ serial0 = &uart3;
+ i2c0 = &i2c10;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_key_gpio_default>;
+
+ button-user {
+ label = "user-button";
+ gpios = <&pioA PIN_PD19 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_PROG1>;
+ wakeup-source;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_led_gpio_default>;
+
+ led-red {
+ color = <LED_COLOR_ID_RED>;
+ function = LED_FUNCTION_POWER;
+ gpios = <&pioA PIN_PD13 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ led-green {
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_BOOT;
+ gpios = <&pioA PIN_PD14 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+
+ led-blue {
+ color = <LED_COLOR_ID_BLUE>;
+ function = LED_FUNCTION_CPU;
+ gpios = <&pioA PIN_PB15 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "heartbeat";
+ };
+ };
+
+ memory@60000000 {
+ device_type = "memory";
+ reg = <0x60000000 0x10000000>; /* 256 MiB DDR3L-1066 16-bit */
+ };
+};
+
+&adc {
+ vddana-supply = <&vddout25>;
+ vref-supply = <&vddout25>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_mikrobus1_an_default &pinctrl_mikrobus2_an_default>;
+ status = "okay";
+};
+
+&cpu0 {
+ cpu-supply = <&vddcpu>;
+};
+
+&dma0 {
+ status = "okay";
+};
+
+&dma1 {
+ status = "okay";
+};
+
+&dma2 {
+ status = "okay";
+};
+
+&ebi {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_nand_default>;
+ status = "okay";
+
+ nand_controller: nand-controller {
+ status = "okay";
+
+ nand@3 {
+ reg = <0x3 0x0 0x800000>;
+ atmel,rb = <0>;
+ nand-bus-width = <8>;
+ nand-ecc-mode = "hw";
+ nand-ecc-strength = <8>;
+ nand-ecc-step-size = <512>;
+ nand-on-flash-bbt;
+ label = "nand";
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ at91bootstrap@0 {
+ label = "nand: at91bootstrap";
+ reg = <0x0 0x40000>;
+ };
+
+ bootloader@40000 {
+ label = "nand: u-boot";
+ reg = <0x40000 0x100000>;
+ };
+
+ bootloaderenv@140000 {
+ label = "nand: u-boot env";
+ reg = <0x140000 0x40000>;
+ };
+
+ dtb@180000 {
+ label = "nand: device tree";
+ reg = <0x180000 0x80000>;
+ };
+
+ kernel@200000 {
+ label = "nand: kernel";
+ reg = <0x200000 0x600000>;
+ };
+
+ rootfs@800000 {
+ label = "nand: rootfs";
+ reg = <0x800000 0x1f800000>;
+ };
+ };
+ };
+ };
+};
+
+&flx3 {
+ atmel,flexcom-mode = <ATMEL_FLEXCOM_MODE_USART>;
+ status = "okay";
+
+ uart3: serial@200 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_flx3_default>;
+ status = "okay";
+ };
+};
+
+&flx10 {
+ atmel,flexcom-mode = <ATMEL_FLEXCOM_MODE_TWI>;
+ status = "okay";
+
+ i2c10: i2c@600 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_flx10_default>;
+ i2c-analog-filter;
+ i2c-digital-filter;
+ i2c-digital-filter-width-ns = <35>;
+ status = "okay";
+
+ eeprom@51 {
+ compatible = "atmel,24c02";
+ reg = <0x51>;
+ pagesize = <16>;
+ size = <256>;
+ vcc-supply = <&vdd_3v3>;
+ };
+
+ pmic@5b {
+ compatible = "microchip,mcp16502";
+ reg = <0x5b>;
+
+ regulators {
+ vdd_3v3: VDD_IO {
+ regulator-name = "VDD_IO";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+
+ regulator-state-standby {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <3300000>;
+ regulator-mode = <4>;
+ };
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-mode = <4>;
+ };
+ };
+
+ vddioddr: VDD_DDR {
+ regulator-name = "VDD_DDR";
+ regulator-min-microvolt = <1350000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+
+ regulator-state-standby {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1350000>;
+ regulator-mode = <4>;
+ };
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1350000>;
+ regulator-mode = <4>;
+ };
+ };
+
+ vddcore: VDD_CORE {
+ regulator-name = "VDD_CORE";
+ regulator-min-microvolt = <1150000>;
+ regulator-max-microvolt = <1150000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+
+ regulator-state-standby {
+ regulator-on-in-suspend;
+ regulator-suspend-voltage = <1150000>;
+ regulator-mode = <4>;
+ };
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-mode = <4>;
+ };
+ };
+
+ vddcpu: VDD_OTHER {
+ regulator-name = "VDD_OTHER";
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1250000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-ramp-delay = <3125>;
+ regulator-always-on;
+
+ regulator-state-standby {
+ regulator-on-in-suspend;
+ regulator-suspend-voltage = <1050000>;
+ regulator-mode = <4>;
+ };
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-mode = <4>;
+ };
+ };
+
+ vldo1: LDO1 {
+ regulator-name = "LDO1";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+
+ regulator-state-standby {
+ regulator-suspend-voltage = <1800000>;
+ regulator-on-in-suspend;
+ };
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vldo2: LDO2 {
+ regulator-name = "LDO2";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+
+ regulator-state-standby {
+ regulator-suspend-voltage = <3300000>;
+ regulator-on-in-suspend;
+ };
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+ };
+ };
+ };
+};
+
+&main_xtal {
+ clock-frequency = <24000000>;
+};
+
+&qspi1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_qspi1_default>;
+ status = "okay";
+
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0x0>;
+ spi-max-frequency = <100000000>;
+ spi-tx-bus-width = <4>;
+ spi-rx-bus-width = <4>;
+ m25p,fast-read;
+ };
+};
+
+&pioA {
+ pinctrl_flx3_default: flx3-default {
+ pinmux = <PIN_PD16__FLEXCOM3_IO0>,
+ <PIN_PD17__FLEXCOM3_IO1>;
+ bias-pull-up;
+ };
+
+ pinctrl_flx10_default: flx10-default {
+ pinmux = <PIN_PC30__FLEXCOM10_IO0>,
+ <PIN_PC31__FLEXCOM10_IO1>;
+ bias-pull-up;
+ };
+
+ pinctrl_key_gpio_default: key-gpio-default {
+ pinmux = <PIN_PD19__GPIO>;
+ bias-pull-up;
+ };
+
+ pinctrl_led_gpio_default: led-gpio-default {
+ pinmux = <PIN_PD13__GPIO>,
+ <PIN_PD14__GPIO>,
+ <PIN_PB15__GPIO>;
+ bias-pull-up;
+ };
+
+ pinctrl_mikrobus1_an_default: mikrobus1-an-default {
+ pinmux = <PIN_PC15__GPIO>;
+ bias-disable;
+ };
+
+ pinctrl_mikrobus2_an_default: mikrobus2-an-default {
+ pinmux = <PIN_PC13__GPIO>;
+ bias-disable;
+ };
+
+ pinctrl_nand_default: nand-default {
+ pinmux = <PIN_PD9__D0>,
+ <PIN_PD10__D1>,
+ <PIN_PD11__D2>,
+ <PIN_PC21__D3>,
+ <PIN_PC22__D4>,
+ <PIN_PC23__D5>,
+ <PIN_PC24__D6>,
+ <PIN_PD2__D7>,
+ <PIN_PD3__NANDRDY>,
+ <PIN_PD4__NCS3_NANDCS>,
+ <PIN_PD5__NWE_NWR0_NANDWE>,
+ <PIN_PD6__NRD_NANDOE>,
+ <PIN_PD7__A21_NANDALE>,
+ <PIN_PD8__A22_NANDCLE>;
+ bias-disable;
+ slew-rate = <0>;
+ };
+
+ pinctrl_qspi1_default: qspi1-default {
+ pinmux = <PIN_PB22__QSPI1_IO3>,
+ <PIN_PB23__QSPI1_IO2>,
+ <PIN_PB24__QSPI1_IO1>,
+ <PIN_PB25__QSPI1_IO0>,
+ <PIN_PB26__QSPI1_CS>,
+ <PIN_PB27__QSPI1_SCK>;
+ bias-pull-up;
+ slew-rate = <0>;
+ };
+
+ pinctrl_sdmmc0_default: sdmmc0-default {
+ pinmux = <PIN_PA0__SDMMC0_CK>,
+ <PIN_PA1__SDMMC0_CMD>,
+ <PIN_PA2__SDMMC0_RSTN>,
+ <PIN_PA3__SDMMC0_DAT0>,
+ <PIN_PA4__SDMMC0_DAT1>,
+ <PIN_PA5__SDMMC0_DAT2>,
+ <PIN_PA6__SDMMC0_DAT3>;
+ bias-pull-up;
+ slew-rate = <0>;
+ };
+
+ pinctrl_sdmmc1_default: sdmmc1-default {
+ pinmux = <PIN_PB29__SDMMC1_CMD>,
+ <PIN_PB30__SDMMC1_CK>,
+ <PIN_PB31__SDMMC1_DAT0>,
+ <PIN_PC0__SDMMC1_DAT1>,
+ <PIN_PC1__SDMMC1_DAT2>,
+ <PIN_PC2__SDMMC1_DAT3>,
+ <PIN_PC4__SDMMC1_CD>;
+ bias-pull-up;
+ slew-rate = <0>;
+ };
+};
+
+&rtt {
+ atmel,rtt-rtc-time-reg = <&gpbr 0x0>;
+};
+
+/* M.2 slot for wireless card */
+&sdmmc0 {
+ bus-width = <4>;
+ cd-gpios = <&pioA 31 GPIO_ACTIVE_LOW>;
+ disable-wp;
+ sdhci-caps-mask = <0x0 0x00200000>;
+ vmmc-supply = <&vdd_3v3>;
+ vqmmc-supply = <&vdd_3v3>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sdmmc0_default>;
+ status = "okay";
+};
+
+/* micro SD socket */
+&sdmmc1 {
+ bus-width = <4>;
+ disable-wp;
+ sdhci-caps-mask = <0x0 0x00200000>;
+ vmmc-supply = <&vdd_3v3>;
+ vqmmc-supply = <&vdd_3v3>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sdmmc1_default>;
+ status = "okay";
+};
+
+&slow_xtal {
+ clock-frequency = <32768>;
+};
+
+&shdwc {
+ debounce-delay-us = <976>;
+ status = "okay";
+
+ input@0 {
+ reg = <0>;
+ };
+};
+
+&tcb0 {
+ timer0: timer@0 {
+ compatible = "atmel,tcb-timer";
+ reg = <0>;
+ };
+
+ timer1: timer@1 {
+ compatible = "atmel,tcb-timer";
+ reg = <1>;
+ };
+};
+
+&trng {
+ status = "okay";
+};
+
+&vddout25 {
+ vin-supply = <&vdd_3v3>;
+ status = "okay";
+};
diff --git a/arch/arm/boot/dts/microchip/at91sam9g25-gardena-smart-gateway.dts b/arch/arm/boot/dts/microchip/at91sam9g25-gardena-smart-gateway.dts
index 92f2c05c873f..af70eb8a3a02 100644
--- a/arch/arm/boot/dts/microchip/at91sam9g25-gardena-smart-gateway.dts
+++ b/arch/arm/boot/dts/microchip/at91sam9g25-gardena-smart-gateway.dts
@@ -121,6 +121,8 @@
};
&usart3 {
+ atmel,use-dma-rx;
+ atmel,use-dma-tx;
status = "okay";
pinctrl-0 = <&pinctrl_usart3
diff --git a/arch/arm/boot/dts/microchip/at91sam9x5ek.dtsi b/arch/arm/boot/dts/microchip/at91sam9x5ek.dtsi
index 5f4eaa618ab4..9618b8d965b0 100644
--- a/arch/arm/boot/dts/microchip/at91sam9x5ek.dtsi
+++ b/arch/arm/boot/dts/microchip/at91sam9x5ek.dtsi
@@ -39,6 +39,8 @@
};
&dbgu {
+ atmel,use-dma-rx;
+ atmel,use-dma-tx;
status = "okay";
};
diff --git a/arch/arm/boot/dts/microchip/sam9x60.dtsi b/arch/arm/boot/dts/microchip/sam9x60.dtsi
index 73d570a17269..291540e5d81e 100644
--- a/arch/arm/boot/dts/microchip/sam9x60.dtsi
+++ b/arch/arm/boot/dts/microchip/sam9x60.dtsi
@@ -179,7 +179,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(8))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(9))>;
@@ -202,7 +202,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(8))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(9))>;
@@ -220,7 +220,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(8))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(9))>;
@@ -248,7 +248,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(10))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(11))>;
@@ -271,7 +271,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(10))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(11))>;
@@ -289,7 +289,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(10))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(11))>;
@@ -377,7 +377,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(22))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(23))>;
@@ -399,7 +399,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(22))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(23))>;
@@ -426,7 +426,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(24))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(25))>;
@@ -448,7 +448,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(24))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(25))>;
@@ -583,7 +583,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(12))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(13))>;
@@ -605,7 +605,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(12))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(13))>;
@@ -632,7 +632,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(14))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(15))>;
@@ -654,7 +654,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(14))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(15))>;
@@ -681,7 +681,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(16))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(17))>;
@@ -703,7 +703,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(16))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(17))>;
@@ -730,7 +730,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(0))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(1))>;
@@ -753,7 +753,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(0))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(1))>;
@@ -771,7 +771,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(0))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(1))>;
@@ -798,7 +798,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(2))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(3))>;
@@ -821,7 +821,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(2))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(3))>;
@@ -839,7 +839,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(2))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(3))>;
@@ -866,7 +866,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(4))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(5))>;
@@ -889,7 +889,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(4))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(5))>;
@@ -907,7 +907,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(4))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(5))>;
@@ -934,7 +934,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(6))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(7))>;
@@ -957,7 +957,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(6))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(7))>;
@@ -975,7 +975,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(6))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(7))>;
@@ -1057,7 +1057,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(18))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(19))>;
@@ -1079,7 +1079,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(18))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(19))>;
@@ -1106,7 +1106,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(20))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(21))>;
@@ -1128,7 +1128,7 @@
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(20))>,
- <&dma0
+ <&dma0
(AT91_XDMAC_DT_MEM_IF(0) |
AT91_XDMAC_DT_PER_IF(1) |
AT91_XDMAC_DT_PERID(21))>;
diff --git a/arch/arm/boot/dts/microchip/sama7g5.dtsi b/arch/arm/boot/dts/microchip/sama7g5.dtsi
index 269e0a3ca269..75778be126a3 100644
--- a/arch/arm/boot/dts/microchip/sama7g5.dtsi
+++ b/arch/arm/boot/dts/microchip/sama7g5.dtsi
@@ -698,7 +698,7 @@
};
flx0: flexcom@e1818000 {
- compatible = "atmel,sama5d2-flexcom";
+ compatible = "microchip,sama7g5-flexcom", "atmel,sama5d2-flexcom";
reg = <0xe1818000 0x200>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 38>;
#address-cells = <1>;
@@ -714,7 +714,7 @@
clocks = <&pmc PMC_TYPE_PERIPHERAL 38>;
clock-names = "usart";
dmas = <&dma1 AT91_XDMAC_DT_PERID(6)>,
- <&dma1 AT91_XDMAC_DT_PERID(5)>;
+ <&dma1 AT91_XDMAC_DT_PERID(5)>;
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
@@ -723,7 +723,7 @@
};
flx1: flexcom@e181c000 {
- compatible = "atmel,sama5d2-flexcom";
+ compatible = "microchip,sama7g5-flexcom", "atmel,sama5d2-flexcom";
reg = <0xe181c000 0x200>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 39>;
#address-cells = <1>;
@@ -740,14 +740,14 @@
clocks = <&pmc PMC_TYPE_PERIPHERAL 39>;
atmel,fifo-size = <32>;
dmas = <&dma0 AT91_XDMAC_DT_PERID(8)>,
- <&dma0 AT91_XDMAC_DT_PERID(7)>;
+ <&dma0 AT91_XDMAC_DT_PERID(7)>;
dma-names = "tx", "rx";
status = "disabled";
};
};
flx3: flexcom@e1824000 {
- compatible = "atmel,sama5d2-flexcom";
+ compatible = "microchip,sama7g5-flexcom", "atmel,sama5d2-flexcom";
reg = <0xe1824000 0x200>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 41>;
#address-cells = <1>;
@@ -763,7 +763,7 @@
clocks = <&pmc PMC_TYPE_PERIPHERAL 41>;
clock-names = "usart";
dmas = <&dma1 AT91_XDMAC_DT_PERID(12)>,
- <&dma1 AT91_XDMAC_DT_PERID(11)>;
+ <&dma1 AT91_XDMAC_DT_PERID(11)>;
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
@@ -791,7 +791,7 @@
};
flx4: flexcom@e2018000 {
- compatible = "atmel,sama5d2-flexcom";
+ compatible = "microchip,sama7g5-flexcom", "atmel,sama5d2-flexcom";
reg = <0xe2018000 0x200>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 42>;
#address-cells = <1>;
@@ -807,7 +807,7 @@
clocks = <&pmc PMC_TYPE_PERIPHERAL 42>;
clock-names = "usart";
dmas = <&dma1 AT91_XDMAC_DT_PERID(14)>,
- <&dma1 AT91_XDMAC_DT_PERID(13)>;
+ <&dma1 AT91_XDMAC_DT_PERID(13)>;
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
@@ -817,7 +817,7 @@
};
flx7: flexcom@e2024000 {
- compatible = "atmel,sama5d2-flexcom";
+ compatible = "microchip,sama7g5-flexcom", "atmel,sama5d2-flexcom";
reg = <0xe2024000 0x200>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 45>;
#address-cells = <1>;
@@ -833,7 +833,7 @@
clocks = <&pmc PMC_TYPE_PERIPHERAL 45>;
clock-names = "usart";
dmas = <&dma1 AT91_XDMAC_DT_PERID(20)>,
- <&dma1 AT91_XDMAC_DT_PERID(19)>;
+ <&dma1 AT91_XDMAC_DT_PERID(19)>;
dma-names = "tx", "rx";
atmel,use-dma-rx;
atmel,use-dma-tx;
@@ -911,7 +911,7 @@
};
flx8: flexcom@e2818000 {
- compatible = "atmel,sama5d2-flexcom";
+ compatible = "microchip,sama7g5-flexcom", "atmel,sama5d2-flexcom";
reg = <0xe2818000 0x200>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 46>;
#address-cells = <1>;
@@ -928,14 +928,14 @@
clocks = <&pmc PMC_TYPE_PERIPHERAL 46>;
atmel,fifo-size = <32>;
dmas = <&dma0 AT91_XDMAC_DT_PERID(22)>,
- <&dma0 AT91_XDMAC_DT_PERID(21)>;
+ <&dma0 AT91_XDMAC_DT_PERID(21)>;
dma-names = "tx", "rx";
status = "disabled";
};
};
flx9: flexcom@e281c000 {
- compatible = "atmel,sama5d2-flexcom";
+ compatible = "microchip,sama7g5-flexcom", "atmel,sama5d2-flexcom";
reg = <0xe281c000 0x200>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 47>;
#address-cells = <1>;
@@ -952,14 +952,38 @@
clocks = <&pmc PMC_TYPE_PERIPHERAL 47>;
atmel,fifo-size = <32>;
dmas = <&dma0 AT91_XDMAC_DT_PERID(24)>,
- <&dma0 AT91_XDMAC_DT_PERID(23)>;
+ <&dma0 AT91_XDMAC_DT_PERID(23)>;
+ dma-names = "tx", "rx";
+ status = "disabled";
+ };
+ };
+
+ flx10: flexcom@e2820000 {
+ compatible = "microchip,sama7g5-flexcom", "atmel,sama5d2-flexcom";
+ reg = <0xe2820000 0x200>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 48>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x0 0xe2820000 0x800>;
+ status = "disabled";
+
+ i2c10: i2c@600 {
+ compatible = "microchip,sama7g5-i2c", "microchip,sam9x60-i2c";
+ reg = <0x600 0x200>;
+ interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 48>;
+ atmel,fifo-size = <32>;
+ dmas = <&dma0 AT91_XDMAC_DT_PERID(26)>,
+ <&dma0 AT91_XDMAC_DT_PERID(25)>;
dma-names = "tx", "rx";
status = "disabled";
};
};
flx11: flexcom@e2824000 {
- compatible = "atmel,sama5d2-flexcom";
+ compatible = "microchip,sama7g5-flexcom", "atmel,sama5d2-flexcom";
reg = <0xe2824000 0x200>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 49>;
#address-cells = <1>;
@@ -977,7 +1001,7 @@
#size-cells = <0>;
atmel,fifo-size = <32>;
dmas = <&dma0 AT91_XDMAC_DT_PERID(28)>,
- <&dma0 AT91_XDMAC_DT_PERID(27)>;
+ <&dma0 AT91_XDMAC_DT_PERID(27)>;
dma-names = "tx", "rx";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi b/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi
index fd671c7a1e5d..6e1f0f164cb4 100644
--- a/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi
+++ b/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi
@@ -120,6 +120,7 @@
interrupts = <2 IRQ_TYPE_LEVEL_HIGH>,
<3 IRQ_TYPE_LEVEL_HIGH>,
<4 IRQ_TYPE_LEVEL_HIGH>;
+ #interrupt-cells = <2>;
interrupt-controller;
};
@@ -128,6 +129,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupts = <5 IRQ_TYPE_LEVEL_HIGH>;
+ #interrupt-cells = <2>;
interrupt-controller;
};
diff --git a/arch/arm/boot/dts/nvidia/Makefile b/arch/arm/boot/dts/nvidia/Makefile
index 60091bf7e48b..96972559253c 100644
--- a/arch/arm/boot/dts/nvidia/Makefile
+++ b/arch/arm/boot/dts/nvidia/Makefile
@@ -39,5 +39,7 @@ dtb-$(CONFIG_ARCH_TEGRA_3x_SOC) += \
tegra30-cardhu-a02.dtb \
tegra30-cardhu-a04.dtb \
tegra30-colibri-eval-v3.dtb \
+ tegra30-lg-p880.dtb \
+ tegra30-lg-p895.dtb \
tegra30-ouya.dtb \
tegra30-pegatron-chagall.dtb
diff --git a/arch/arm/boot/dts/nvidia/tegra124-nyan.dtsi b/arch/arm/boot/dts/nvidia/tegra124-nyan.dtsi
index a2ee37180200..8125c1b3e8d7 100644
--- a/arch/arm/boot/dts/nvidia/tegra124-nyan.dtsi
+++ b/arch/arm/boot/dts/nvidia/tegra124-nyan.dtsi
@@ -338,6 +338,7 @@
interrupt-parent = <&gpio>;
interrupts = <TEGRA_GPIO(C, 7) IRQ_TYPE_LEVEL_LOW>;
reg = <0>;
+ wakeup-source;
google,cros-ec-spi-msg-delay = <2000>;
diff --git a/arch/arm/boot/dts/nvidia/tegra124-venice2.dts b/arch/arm/boot/dts/nvidia/tegra124-venice2.dts
index 3924ee385dee..df98dc2a67b8 100644
--- a/arch/arm/boot/dts/nvidia/tegra124-venice2.dts
+++ b/arch/arm/boot/dts/nvidia/tegra124-venice2.dts
@@ -857,6 +857,7 @@
interrupt-parent = <&gpio>;
interrupts = <TEGRA_GPIO(C, 7) IRQ_TYPE_LEVEL_LOW>;
reg = <0>;
+ wakeup-source;
google,cros-ec-spi-msg-delay = <2000>;
diff --git a/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi b/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi
index 1640763fd4af..ff0d684622f7 100644
--- a/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi
+++ b/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi
@@ -997,7 +997,6 @@
compatible = "st,stmpe811";
reg = <0x41>;
irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>;
- interrupt-controller;
id = <0>;
blocks = <0x5>;
irq-trigger = <0x1>;
diff --git a/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi b/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi
index 3b6fad273cab..d38f1dd38a90 100644
--- a/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi
+++ b/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi
@@ -980,7 +980,6 @@
compatible = "st,stmpe811";
reg = <0x41>;
irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>;
- interrupt-controller;
id = <0>;
blocks = <0x5>;
irq-trigger = <0x1>;
diff --git a/arch/arm/boot/dts/nvidia/tegra30-asus-nexus7-grouper-common.dtsi b/arch/arm/boot/dts/nvidia/tegra30-asus-nexus7-grouper-common.dtsi
index a9342e04b14b..15f53babdc21 100644
--- a/arch/arm/boot/dts/nvidia/tegra30-asus-nexus7-grouper-common.dtsi
+++ b/arch/arm/boot/dts/nvidia/tegra30-asus-nexus7-grouper-common.dtsi
@@ -915,6 +915,9 @@
reg = <0x1c>;
realtek,dmic1-data-pin = <1>;
+
+ clocks = <&tegra_pmc TEGRA_PMC_CLK_OUT_1>;
+ clock-names = "mclk";
};
nct72: temperature-sensor@4c {
diff --git a/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi b/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi
index 4eb526fe9c55..81c8a5fd92cc 100644
--- a/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi
+++ b/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi
@@ -861,7 +861,6 @@
compatible = "st,stmpe811";
reg = <0x41>;
irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>;
- interrupt-controller;
id = <0>;
blocks = <0x5>;
irq-trigger = <0x1>;
diff --git a/arch/arm/boot/dts/nvidia/tegra30-lg-p880.dts b/arch/arm/boot/dts/nvidia/tegra30-lg-p880.dts
new file mode 100644
index 000000000000..2f7754fd42a1
--- /dev/null
+++ b/arch/arm/boot/dts/nvidia/tegra30-lg-p880.dts
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0
+/dts-v1/;
+
+#include "tegra30-lg-x3.dtsi"
+
+/ {
+ model = "LG Optimus 4X HD P880";
+ compatible = "lg,p880", "nvidia,tegra30";
+
+ aliases {
+ mmc1 = &sdmmc3; /* uSD slot */
+ mmc2 = &sdmmc1; /* WiFi */
+ };
+
+ pinmux@70000868 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&state_default>;
+
+ state_default: pinmux {
+ /* WLAN SDIO pinmux */
+ host-wlan-wake {
+ nvidia,pins = "pu4";
+ nvidia,function = "pwm1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* GNSS UART-B pinmux */
+ uartb-rxd {
+ nvidia,pins = "uart2_rxd_pc3";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ uartb-txd {
+ nvidia,pins = "uart2_txd_pc2";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ gps-reset {
+ nvidia,pins = "kb_row7_pr7";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+
+ /* MicroSD pinmux */
+ sdmmc3-clk {
+ nvidia,pins = "sdmmc3_clk_pa6";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sdmmc3-data {
+ nvidia,pins = "sdmmc3_cmd_pa7",
+ "sdmmc3_dat0_pb7",
+ "sdmmc3_dat1_pb6",
+ "sdmmc3_dat2_pb5",
+ "sdmmc3_dat3_pb4";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ microsd-detect {
+ nvidia,pins = "clk2_out_pw5";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* GPIO keys pinmux */
+ volume-up {
+ nvidia,pins = "ulpi_data6_po7";
+ nvidia,function = "spi2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* Sensors pinmux */
+ current-alert-irq {
+ nvidia,pins = "uart2_rts_n_pj6";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* AUDIO pinmux */
+ sub-mic-ldo {
+ nvidia,pins = "gmi_cs7_n_pi6";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ };
+ };
+
+ i2c@7000c400 {
+ touchscreen@20 {
+ rmi4-f11@11 {
+ syna,clip-x-high = <1110>;
+ syna,clip-y-high = <1973>;
+
+ touchscreen-inverted-y;
+ };
+ };
+ };
+
+ memory-controller@7000f000 {
+ emc-timings-0 {
+ /* SAMSUNG 1GB K4P8G304EB FGC1 533MHz */
+ nvidia,ram-code = <0>;
+
+ timing-12750000 {
+ clock-frequency = <12750000>;
+
+ nvidia,emem-configuration = < 0x00050001 0xc0000010
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000003 0x00000001 0x00000002 0x00000004
+ 0x00000001 0x00000000 0x00000002 0x00000002
+ 0x02020001 0x00060402 0x77230303 0x001f0000 >;
+ };
+
+ timing-25500000 {
+ clock-frequency = <25500000>;
+
+ nvidia,emem-configuration = < 0x00020001 0xc0000010
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000003 0x00000001 0x00000002 0x00000004
+ 0x00000001 0x00000000 0x00000002 0x00000002
+ 0x02020001 0x00060402 0x73e30303 0x001f0000 >;
+ };
+
+ timing-51000000 {
+ clock-frequency = <51000000>;
+
+ nvidia,emem-configuration = < 0x00010001 0xc0000010
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000003 0x00000001 0x00000002 0x00000004
+ 0x00000001 0x00000000 0x00000002 0x00000002
+ 0x02020001 0x00060402 0x72c30303 0x001f0000 >;
+ };
+
+ timing-102000000 {
+ clock-frequency = <102000000>;
+
+ nvidia,emem-configuration = < 0x00000001 0xc0000018
+ 0x00000001 0x00000001 0x00000003 0x00000001
+ 0x00000003 0x00000001 0x00000002 0x00000004
+ 0x00000001 0x00000000 0x00000002 0x00000002
+ 0x02020001 0x00060403 0x72430504 0x001f0000 >;
+ };
+
+ timing-204000000 {
+ clock-frequency = <204000000>;
+
+ nvidia,emem-configuration = < 0x00000003 0xc0000025
+ 0x00000001 0x00000001 0x00000006 0x00000003
+ 0x00000005 0x00000001 0x00000002 0x00000004
+ 0x00000001 0x00000000 0x00000003 0x00000002
+ 0x02030001 0x00070506 0x71e40a07 0x001f0000 >;
+ };
+
+ timing-266500000 {
+ clock-frequency = <266500000>;
+
+ nvidia,emem-configuration = < 0x00000004 0xC0000030
+ 0x00000001 0x00000002 0x00000008 0x00000004
+ 0x00000006 0x00000001 0x00000002 0x00000005
+ 0x00000001 0x00000000 0x00000003 0x00000003
+ 0x03030001 0x00090608 0x70040c09 0x001f0000 >;
+ };
+
+ timing-533000000 {
+ clock-frequency = <533000000>;
+
+ nvidia,emem-configuration = < 0x00000008 0xC0000060
+ 0x00000003 0x00000004 0x00000010 0x0000000a
+ 0x0000000d 0x00000002 0x00000002 0x00000008
+ 0x00000002 0x00000000 0x00000004 0x00000005
+ 0x05040002 0x00110b10 0x70281811 0x001f0000 >;
+ };
+ };
+ };
+
+ memory-controller@7000f400 {
+ emc-timings-0 {
+ /* SAMSUNG 1GB K4P8G304EB FGC1 533MHz */
+ nvidia,ram-code = <0>;
+
+ timing-12750000 {
+ clock-frequency = <12750000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010022>;
+ nvidia,emc-mode-2 = <0x00020001>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000009>;
+ nvidia,emc-cfg-dyn-self-ref;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x00000000
+ 0x00000001 0x00000002 0x00000002 0x00000004
+ 0x00000004 0x00000001 0x00000005 0x00000002
+ 0x00000002 0x00000001 0x00000001 0x00000000
+ 0x00000001 0x00000003 0x00000001 0x0000000b
+ 0x00000009 0x0000002f 0x00000000 0x0000000b
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000001 0x00000007 0x00000002 0x00000002
+ 0x00000003 0x00000008 0x00000004 0x00000001
+ 0x00000002 0x00000036 0x00000004 0x00000004
+ 0x00000000 0x00000000 0x00004282 0x007800a4
+ 0x00008000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00100220 0x0800201c 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x00000009 0x00090009 0xa0f10000 0x00000000
+ 0x00000000 0x80000164 0xe0000000 0xff00ff00 >;
+ };
+
+ timing-25500000 {
+ clock-frequency = <25500000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010022>;
+ nvidia,emc-mode-2 = <0x00020001>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000009>;
+ nvidia,emc-cfg-dyn-self-ref;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x00000001
+ 0x00000003 0x00000002 0x00000002 0x00000004
+ 0x00000004 0x00000001 0x00000005 0x00000002
+ 0x00000002 0x00000001 0x00000001 0x00000000
+ 0x00000001 0x00000003 0x00000001 0x0000000b
+ 0x00000009 0x00000060 0x00000000 0x00000018
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000001 0x00000007 0x00000004 0x00000004
+ 0x00000003 0x00000008 0x00000004 0x00000001
+ 0x00000002 0x0000006b 0x00000004 0x00000004
+ 0x00000000 0x00000000 0x00004282 0x007800a4
+ 0x00008000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00100220 0x0800201c 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x0000000a 0x00090009 0xa0f10000 0x00000000
+ 0x00000000 0x800001c5 0xe0000000 0xff00ff00 >;
+ };
+
+ timing-51000000 {
+ clock-frequency = <51000000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010022>;
+ nvidia,emc-mode-2 = <0x00020001>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000009>;
+ nvidia,emc-cfg-dyn-self-ref;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x00000003
+ 0x00000006 0x00000002 0x00000002 0x00000004
+ 0x00000004 0x00000001 0x00000005 0x00000002
+ 0x00000002 0x00000001 0x00000001 0x00000000
+ 0x00000001 0x00000003 0x00000001 0x0000000b
+ 0x00000009 0x000000c0 0x00000000 0x00000030
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000001 0x00000007 0x00000008 0x00000008
+ 0x00000003 0x00000008 0x00000004 0x00000001
+ 0x00000002 0x000000d5 0x00000004 0x00000004
+ 0x00000000 0x00000000 0x00004282 0x007800a4
+ 0x00008000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00100220 0x0800201c 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x00000013 0x00090009 0xa0f10000 0x00000000
+ 0x00000000 0x80000287 0xe0000000 0xff00ff00 >;
+ };
+
+ timing-102000000 {
+ clock-frequency = <102000000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010022>;
+ nvidia,emc-mode-2 = <0x00020001>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x0000000a>;
+ nvidia,emc-cfg-dyn-self-ref;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x00000006
+ 0x0000000d 0x00000004 0x00000002 0x00000004
+ 0x00000004 0x00000001 0x00000005 0x00000002
+ 0x00000002 0x00000001 0x00000001 0x00000000
+ 0x00000001 0x00000003 0x00000001 0x0000000b
+ 0x00000009 0x00000181 0x00000000 0x00000060
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000001 0x00000007 0x0000000f 0x0000000f
+ 0x00000003 0x00000008 0x00000004 0x00000001
+ 0x00000002 0x000001a9 0x00000004 0x00000004
+ 0x00000000 0x00000000 0x00004282 0x007800a4
+ 0x00008000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00100220 0x0800201c 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x00000025 0x00090009 0xa0f10000 0x00000000
+ 0x00000000 0x8000040b 0xe0000000 0xff00ff00 >;
+ };
+
+ timing-204000000 {
+ clock-frequency = <204000000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010042>;
+ nvidia,emc-mode-2 = <0x00020001>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000013>;
+ nvidia,emc-cfg-dyn-self-ref;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x0000000c
+ 0x0000001a 0x00000008 0x00000003 0x00000005
+ 0x00000004 0x00000001 0x00000006 0x00000003
+ 0x00000003 0x00000002 0x00000002 0x00000000
+ 0x00000001 0x00000003 0x00000001 0x0000000c
+ 0x0000000a 0x00000303 0x00000000 0x000000c0
+ 0x00000001 0x00000001 0x00000003 0x00000000
+ 0x00000001 0x00000007 0x0000001d 0x0000001d
+ 0x00000004 0x0000000b 0x00000005 0x00000001
+ 0x00000002 0x00000351 0x00000004 0x00000006
+ 0x00000000 0x00000000 0x00004282 0x004400a4
+ 0x00008000 0x00070000 0x00070000 0x00070000
+ 0x00070000 0x00070000 0x00070000 0x00070000
+ 0x00070000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00080000 0x00080000 0x00080000
+ 0x00080000 0x000e0220 0x0800201c 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x0000004a 0x00090009 0xa0f10000 0x00000000
+ 0x00000000 0x80000713 0xe0000000 0xff00ff00 >;
+ };
+
+ timing-266500000 {
+ clock-frequency = <266500000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010042>;
+ nvidia,emc-mode-2 = <0x00020002>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000018>;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x0000000f
+ 0x00000022 0x0000000b 0x00000004 0x00000005
+ 0x00000005 0x00000001 0x00000007 0x00000004
+ 0x00000004 0x00000002 0x00000002 0x00000000
+ 0x00000002 0x00000005 0x00000002 0x0000000c
+ 0x0000000b 0x000003ef 0x00000000 0x000000fb
+ 0x00000001 0x00000001 0x00000004 0x00000000
+ 0x00000001 0x00000009 0x00000026 0x00000026
+ 0x00000004 0x0000000e 0x00000006 0x00000001
+ 0x00000002 0x00000455 0x00000000 0x00000004
+ 0x00000000 0x00000000 0x00006282 0x003200a4
+ 0x00008000 0x00050000 0x00050000 0x00050000
+ 0x00050000 0x00050000 0x00050000 0x00050000
+ 0x00050000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00060000 0x00060000 0x00060000
+ 0x00060000 0x000b0220 0x0800003d 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x00000060 0x000a000a 0xa0f10000 0x00000000
+ 0x00000000 0x800008ee 0xe0000000 0xff00ff00 >;
+ };
+
+ timing-533000000 {
+ clock-frequency = <533000000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x000100c2>;
+ nvidia,emc-mode-2 = <0x00020006>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000030>;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x0000001f
+ 0x00000045 0x00000016 0x00000009 0x00000008
+ 0x00000009 0x00000003 0x0000000d 0x00000009
+ 0x00000009 0x00000005 0x00000003 0x00000000
+ 0x00000004 0x00000009 0x00000006 0x0000000d
+ 0x00000010 0x000007df 0x00000000 0x000001f7
+ 0x00000003 0x00000003 0x00000009 0x00000000
+ 0x00000001 0x0000000f 0x0000004b 0x0000004b
+ 0x00000008 0x0000001b 0x0000000c 0x00000001
+ 0x00000002 0x000008aa 0x00000000 0x00000006
+ 0x00000000 0x00000000 0x00006282 0xf0120091
+ 0x00008000 0x0000000a 0x0000000a 0x0000000a
+ 0x0000000a 0x0000000a 0x0000000a 0x0000000a
+ 0x0000000a 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x0000000a 0x0000000a 0x0000000a
+ 0x0000000a 0x00090220 0x0800003d 0x00000000
+ 0x77ffc004 0x01f1f408 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x000000c0 0x000e000e 0xa0f10000 0x00000000
+ 0x00000000 0x800010d9 0xe0000000 0xff00ff88 >;
+ };
+ };
+ };
+
+ sdmmc3: mmc@78000400 {
+ status = "okay";
+
+ cd-gpios = <&gpio TEGRA_GPIO(W, 5) GPIO_ACTIVE_LOW>;
+ bus-width = <4>;
+
+ vmmc-supply = <&vdd_usd>;
+ vqmmc-supply = <&vdd_1v8_vio>;
+ };
+
+ battery: battery-cell {
+ compatible = "simple-battery";
+ device-chemistry = "lithium-ion";
+ charge-full-design-microamp-hours = <2150000>;
+ energy-full-design-microwatt-hours = <8200000>;
+ operating-range-celsius = <0 45>;
+ };
+
+ gpio-keys {
+ key-volume-up {
+ label = "Volume Up";
+ gpios = <&gpio TEGRA_GPIO(O, 7) GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_VOLUMEUP>;
+ debounce-interval = <10>;
+ wakeup-event-action = <EV_ACT_ASSERTED>;
+ wakeup-source;
+ };
+ };
+
+ sound {
+ compatible = "lg,tegra-audio-max98089-p880",
+ "nvidia,tegra-audio-max98089";
+ nvidia,model = "LG Optimus 4X HD MAX98089";
+
+ nvidia,int-mic-en-gpios = <&gpio TEGRA_GPIO(I, 6) GPIO_ACTIVE_HIGH>;
+ };
+};
diff --git a/arch/arm/boot/dts/nvidia/tegra30-lg-p895.dts b/arch/arm/boot/dts/nvidia/tegra30-lg-p895.dts
new file mode 100644
index 000000000000..e32fafc7f5e0
--- /dev/null
+++ b/arch/arm/boot/dts/nvidia/tegra30-lg-p895.dts
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0
+/dts-v1/;
+
+#include "tegra30-lg-x3.dtsi"
+
+/ {
+ model = "LG Optimus Vu P895";
+ compatible = "lg,p895", "nvidia,tegra30";
+
+ pinmux@70000868 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&state_default>;
+
+ state_default: pinmux {
+ /* GNSS UART-B pinmux */
+ uartb-cts-rxd {
+ nvidia,pins = "uart2_cts_n_pj5",
+ "uart2_rxd_pc3";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ uartb-rts-txd {
+ nvidia,pins = "uart2_rts_n_pj6",
+ "uart2_txd_pc2";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ gps-reset {
+ nvidia,pins = "spdif_out_pk5";
+ nvidia,function = "spdif";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+
+ /* GPIO keys pinmux */
+ memo-key {
+ nvidia,pins = "sdmmc3_dat1_pb6";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ volume-up {
+ nvidia,pins = "gmi_cs7_n_pi6";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* Sensors pinmux */
+ current-alert-irq {
+ nvidia,pins = "spi1_cs0_n_px6";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* Panel pinmux */
+ panel-vdd {
+ nvidia,pins = "pbb0";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+
+ /* AUDIO pinmux */
+ sub-mic-ldo {
+ nvidia,pins = "gmi_dqs_pi2";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+
+ /* Modem pinmux */
+ usim-detect {
+ nvidia,pins = "clk2_out_pw5";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* GPIO power/drive control */
+ drive-sdmmc4 {
+ nvidia,pins = "drive_gma",
+ "drive_gmb",
+ "drive_gmc",
+ "drive_gmd";
+ nvidia,pull-down-strength = <9>;
+ nvidia,pull-up-strength = <9>;
+ nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_SLOWEST>;
+ nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_SLOWEST>;
+ };
+ };
+ };
+
+ i2c@7000c400 {
+ touchscreen@20 {
+ rmi4-f11@11 {
+ syna,clip-x-high = <1535>;
+ syna,clip-y-high = <2047>;
+ };
+ };
+ };
+
+ memory-controller@7000f000 {
+ emc-timings-2 {
+ /* Hynix 1GB H9TCNNN8JDMMPR LPDDR2 533MHz */
+ nvidia,ram-code = <2>;
+
+ timing-12750000 {
+ clock-frequency = <12750000>;
+
+ nvidia,emem-configuration = < 0x00020001 0xc0000010
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000003 0x00000001 0x00000002 0x00000004
+ 0x00000001 0x00000000 0x00000002 0x00000002
+ 0x02020001 0x00060402 0x77230303 0x001f0000 >;
+ };
+
+ timing-25500000 {
+ clock-frequency = <25500000>;
+
+ nvidia,emem-configuration = < 0x00030003 0xc0000010
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000003 0x00000001 0x00000002 0x00000004
+ 0x00000001 0x00000000 0x00000002 0x00000002
+ 0x02020001 0x00060402 0x73e30303 0x001f0000 >;
+ };
+
+ timing-51000000 {
+ clock-frequency = <51000000>;
+
+ nvidia,emem-configuration = < 0x00010003 0xc0000010
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000003 0x00000001 0x00000002 0x00000004
+ 0x00000001 0x00000000 0x00000002 0x00000002
+ 0x02020001 0x00060402 0x72c30303 0x001f0000 >;
+ };
+
+ timing-102000000 {
+ clock-frequency = <102000000>;
+
+ nvidia,emem-configuration = < 0x00000003 0xc0000018
+ 0x00000001 0x00000001 0x00000003 0x00000001
+ 0x00000003 0x00000001 0x00000002 0x00000004
+ 0x00000001 0x00000000 0x00000002 0x00000002
+ 0x02020001 0x00060403 0x72430504 0x001f0000 >;
+ };
+
+ timing-204000000 {
+ clock-frequency = <204000000>;
+
+ nvidia,emem-configuration = < 0x00000006 0xc0000025
+ 0x00000001 0x00000001 0x00000006 0x00000003
+ 0x00000005 0x00000001 0x00000002 0x00000004
+ 0x00000001 0x00000000 0x00000003 0x00000002
+ 0x02030001 0x00070506 0x71e40a07 0x001f0000 >;
+ };
+
+ timing-266500000 {
+ clock-frequency = <266500000>;
+
+ nvidia,emem-configuration = < 0x00000008 0xc0000030
+ 0x00000001 0x00000002 0x00000008 0x00000004
+ 0x00000006 0x00000001 0x00000002 0x00000005
+ 0x00000001 0x00000000 0x00000003 0x00000003
+ 0x03030001 0x00090608 0x70040c09 0x001f0000 >;
+ };
+
+ timing-533000000 {
+ clock-frequency = <533000000>;
+
+ nvidia,emem-configuration = < 0x0000000f 0xc0000060
+ 0x00000003 0x00000004 0x00000010 0x0000000a
+ 0x0000000d 0x00000002 0x00000002 0x00000008
+ 0x00000002 0x00000000 0x00000004 0x00000005
+ 0x05040002 0x00110b10 0x70281811 0x001f0000 >;
+ };
+ };
+ };
+
+ memory-controller@7000f400 {
+ emc-timings-2 {
+ /* Hynix 1GB H9TCNNN8JDMMPR LPDDR2 533MHz */
+ nvidia,ram-code = <2>;
+
+ timing-12750000 {
+ clock-frequency = <12750000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010022>;
+ nvidia,emc-mode-2 = <0x00020001>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000009>;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x00000000
+ 0x00000001 0x00000002 0x00000002 0x00000004
+ 0x00000004 0x00000001 0x00000005 0x00000002
+ 0x00000002 0x00000001 0x00000001 0x00000000
+ 0x00000001 0x00000003 0x00000001 0x0000000b
+ 0x00000009 0x0000002f 0x00000000 0x0000000b
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000001 0x00000007 0x00000002 0x00000002
+ 0x00000003 0x00000008 0x00000004 0x00000004
+ 0x00000002 0x00000036 0x00000004 0x00000004
+ 0x00000000 0x00000000 0x00004282 0x007800a4
+ 0x00008000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00100220 0x0800201c 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x00000009 0x00090009 0xa0f10000 0x00000000
+ 0x00000000 0x80000164 0xe0000000 0xff00ff00 >;
+ };
+
+ timing-25500000 {
+ clock-frequency = <25500000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010022>;
+ nvidia,emc-mode-2 = <0x00020001>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000009>;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x00000001
+ 0x00000003 0x00000002 0x00000002 0x00000004
+ 0x00000004 0x00000001 0x00000005 0x00000002
+ 0x00000002 0x00000001 0x00000001 0x00000000
+ 0x00000001 0x00000003 0x00000001 0x0000000b
+ 0x00000009 0x00000060 0x00000000 0x00000018
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000001 0x00000007 0x00000004 0x00000004
+ 0x00000003 0x00000008 0x00000004 0x00000004
+ 0x00000002 0x0000006b 0x00000004 0x00000004
+ 0x00000000 0x00000000 0x00004282 0x007800a4
+ 0x00008000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00100220 0x0800201c 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x0000000a 0x00090009 0xa0f10000 0x00000000
+ 0x00000000 0x800001c5 0xd0000000 0xff00ff00 >;
+ };
+
+ timing-51000000 {
+ clock-frequency = <51000000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010022>;
+ nvidia,emc-mode-2 = <0x00020001>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000009>;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x00000003
+ 0x00000006 0x00000002 0x00000002 0x00000004
+ 0x00000004 0x00000001 0x00000005 0x00000002
+ 0x00000002 0x00000001 0x00000001 0x00000000
+ 0x00000001 0x00000003 0x00000001 0x0000000b
+ 0x00000009 0x000000c0 0x00000000 0x00000030
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000001 0x00000007 0x00000008 0x00000008
+ 0x00000003 0x00000008 0x00000004 0x00000004
+ 0x00000002 0x000000d5 0x00000004 0x00000004
+ 0x00000000 0x00000000 0x00004282 0x007800a4
+ 0x00008000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00100220 0x0800201c 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x00000013 0x00090009 0xa0f10000 0x00000000
+ 0x00000000 0x80000287 0xd0000000 0xff00ff00 >;
+ };
+
+ timing-102000000 {
+ clock-frequency = <102000000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010022>;
+ nvidia,emc-mode-2 = <0x00020001>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x0000000a>;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x00000006
+ 0x0000000d 0x00000004 0x00000002 0x00000004
+ 0x00000004 0x00000001 0x00000005 0x00000002
+ 0x00000002 0x00000001 0x00000001 0x00000000
+ 0x00000001 0x00000003 0x00000001 0x0000000b
+ 0x00000009 0x00000181 0x00000000 0x00000060
+ 0x00000001 0x00000001 0x00000002 0x00000000
+ 0x00000001 0x00000007 0x0000000f 0x0000000f
+ 0x00000003 0x00000008 0x00000004 0x00000004
+ 0x00000002 0x000001a9 0x00000004 0x00000006
+ 0x00000000 0x00000000 0x00004282 0x007800a4
+ 0x00008000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x000fc000 0x000fc000 0x000fc000
+ 0x000fc000 0x00100220 0x0800201c 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x00000025 0x00090009 0xa0f10000 0x00000000
+ 0x00000000 0x8000040b 0xd0000000 0xff00ff00 >;
+ };
+
+ timing-204000000 {
+ clock-frequency = <204000000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010042>;
+ nvidia,emc-mode-2 = <0x00020001>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000013>;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x0000000c
+ 0x0000001a 0x00000008 0x00000003 0x00000005
+ 0x00000004 0x00000001 0x00000006 0x00000003
+ 0x00000003 0x00000002 0x00000002 0x00000000
+ 0x00000001 0x00000004 0x00000001 0x0000000c
+ 0x0000000a 0x00000303 0x00000000 0x000000c0
+ 0x00000001 0x00000001 0x00000003 0x00000000
+ 0x00000001 0x00000007 0x0000001d 0x0000001d
+ 0x00000004 0x0000000b 0x00000005 0x00000004
+ 0x00000002 0x00000351 0x00000005 0x00000004
+ 0x00000000 0x00000000 0x00004282 0x004400a4
+ 0x00008000 0x00080000 0x00080000 0x00080000
+ 0x00080000 0x00072000 0x00072000 0x00072000
+ 0x00072000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00080000 0x00080000 0x00080000
+ 0x00080000 0x000e0220 0x0800201c 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x0000004a 0x00090009 0xa0f10000 0x00000000
+ 0x00000000 0x80000713 0xe0000000 0xff00ff00 >;
+ };
+
+ timing-266500000 {
+ clock-frequency = <266500000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x00010042>;
+ nvidia,emc-mode-2 = <0x00020002>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000018>;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x0000000f
+ 0x00000022 0x0000000b 0x00000004 0x00000005
+ 0x00000005 0x00000001 0x00000007 0x00000004
+ 0x00000004 0x00000002 0x00000002 0x00000000
+ 0x00000002 0x00000005 0x00000002 0x0000000c
+ 0x0000000b 0x000003ef 0x00000000 0x000000fb
+ 0x00000001 0x00000001 0x00000004 0x00000000
+ 0x00000001 0x00000009 0x00000026 0x00000026
+ 0x00000004 0x0000000e 0x00000006 0x00000004
+ 0x00000002 0x00000455 0x00000000 0x00000004
+ 0x00000000 0x00000000 0x00006282 0x003200a4
+ 0x00008000 0x00070000 0x00070000 0x00070000
+ 0x00070000 0x00072000 0x00072000 0x00072000
+ 0x00072000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00080002 0x00080002 0x00080002
+ 0x00080002 0x000e0220 0x0800003d 0x00000000
+ 0x77ffc004 0x01f1f008 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x00000060 0x000a000a 0xa0f10000 0x00000000
+ 0x00000000 0x800008ee 0xe0000000 0xff00ff00 >;
+ };
+
+ timing-533000000 {
+ clock-frequency = <533000000>;
+
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-mode-1 = <0x000100c2>;
+ nvidia,emc-mode-2 = <0x00020006>;
+ nvidia,emc-mode-reset = <0x00000000>;
+ nvidia,emc-zcal-cnt-long = <0x00000030>;
+ nvidia,emc-cfg-periodic-qrst;
+
+ nvidia,emc-configuration = < 0x0000001f
+ 0x00000045 0x00000016 0x00000009 0x00000008
+ 0x00000009 0x00000003 0x0000000d 0x00000009
+ 0x00000009 0x00000005 0x00000003 0x00000000
+ 0x00000004 0x0000000a 0x00000006 0x0000000d
+ 0x00000010 0x000007df 0x00000000 0x000001f7
+ 0x00000003 0x00000003 0x00000009 0x00000000
+ 0x00000001 0x0000000f 0x0000004b 0x0000004b
+ 0x00000008 0x0000001b 0x0000000c 0x00000004
+ 0x00000002 0x000008aa 0x00000000 0x00000004
+ 0x00000000 0x00000000 0x00006282 0xf0120091
+ 0x00008000 0x0000000c 0x0000000c 0x0000000c
+ 0x0000000c 0x0000000a 0x0000000a 0x0000000a
+ 0x0000000a 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x00000000 0x0000000c 0x0000000c 0x0000000c
+ 0x0000000c 0x000c0220 0x0800003d 0x00000000
+ 0x77ffc004 0x01f1f408 0x00000000 0x00000007
+ 0x08000068 0x08000000 0x00000802 0x00064000
+ 0x000000c0 0x000e000e 0xa0f10000 0x00000000
+ 0x00000000 0x800010d9 0xe0000000 0xff00ff88 >;
+ };
+ };
+ };
+
+ battery: battery-cell {
+ compatible = "simple-battery";
+ device-chemistry = "lithium-ion";
+ charge-full-design-microamp-hours = <2080000>;
+ energy-full-design-microwatt-hours = <7700000>;
+ operating-range-celsius = <0 45>;
+ };
+
+ gpio-keys {
+ key-memo {
+ label = "Memo";
+ gpios = <&gpio TEGRA_GPIO(B, 6) GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_MEMO>;
+ debounce-interval = <10>;
+ wakeup-event-action = <EV_ACT_ASSERTED>;
+ wakeup-source;
+ };
+
+ key-volume-up {
+ label = "Volume Up";
+ gpios = <&gpio TEGRA_GPIO(I, 6) GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_VOLUMEUP>;
+ debounce-interval = <10>;
+ wakeup-event-action = <EV_ACT_ASSERTED>;
+ wakeup-source;
+ };
+ };
+
+ gpio-leds {
+ led-power {
+ label = "power::white";
+ gpios = <&gpio TEGRA_GPIO(R, 3) GPIO_ACTIVE_HIGH>;
+
+ linux,default-trigger = "battery-charging";
+
+ color = <LED_COLOR_ID_WHITE>;
+ function = LED_FUNCTION_CHARGING;
+ };
+ };
+
+ regulator-lcd3v {
+ gpio = <&gpio TEGRA_GPIO(BB, 0) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ };
+
+ sound {
+ compatible = "lg,tegra-audio-max98089-p895",
+ "nvidia,tegra-audio-max98089";
+ nvidia,model = "LG Optimus Vu MAX98089";
+
+ nvidia,int-mic-en-gpios = <&gpio TEGRA_GPIO(I, 2) GPIO_ACTIVE_HIGH>;
+ };
+};
diff --git a/arch/arm/boot/dts/nvidia/tegra30-lg-x3.dtsi b/arch/arm/boot/dts/nvidia/tegra30-lg-x3.dtsi
new file mode 100644
index 000000000000..909260a5d0fb
--- /dev/null
+++ b/arch/arm/boot/dts/nvidia/tegra30-lg-x3.dtsi
@@ -0,0 +1,1812 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <dt-bindings/input/gpio-keys.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/mfd/max77620.h>
+#include <dt-bindings/thermal/thermal.h>
+
+#include "tegra30.dtsi"
+#include "tegra30-cpu-opp.dtsi"
+#include "tegra30-cpu-opp-microvolt.dtsi"
+
+/ {
+ chassis-type = "handset";
+
+ aliases {
+ mmc0 = &sdmmc4; /* eMMC */
+ mmc1 = &sdmmc1; /* WiFi */
+
+ rtc0 = &pmic;
+ rtc1 = "/rtc@7000e000";
+
+ serial0 = &uartd; /* Console */
+ serial1 = &uartc; /* Bluetooth */
+ serial2 = &uartb; /* GPS */
+ };
+
+ /*
+ * The decompressor and also some bootloaders rely on a
+ * pre-existing /chosen node to be available to insert the
+ * command line and merge other ATAGS info.
+ */
+ chosen { };
+
+ firmware {
+ trusted-foundations {
+ compatible = "tlm,trusted-foundations";
+ tlm,version-major = <2>;
+ tlm,version-minor = <8>;
+ };
+ };
+
+ memory@80000000 {
+ reg = <0x80000000 0x40000000>;
+ };
+
+ reserved-memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ linux,cma@80000000 {
+ compatible = "shared-dma-pool";
+ alloc-ranges = <0x80000000 0x30000000>;
+ size = <0x10000000>; /* 256MiB */
+ linux,cma-default;
+ reusable;
+ };
+
+ ramoops@bed00000 {
+ compatible = "ramoops";
+ reg = <0xbed00000 0x10000>; /* 64kB */
+ console-size = <0x8000>; /* 32kB */
+ record-size = <0x400>; /* 1kB */
+ ecc-size = <16>;
+ };
+
+ trustzone@bfe00000 {
+ reg = <0xbfe00000 0x200000>; /* 2MB */
+ no-map;
+ };
+ };
+
+ vde@6001a000 {
+ assigned-clocks = <&tegra_car TEGRA30_CLK_VDE>;
+ assigned-clock-parents = <&tegra_car TEGRA30_CLK_PLL_P>;
+ assigned-clock-rates = <408000000>;
+ };
+
+ pinmux@70000868 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&state_default>;
+
+ state_default: pinmux {
+ /* WLAN SDIO pinmux */
+ sdmmc1-clk {
+ nvidia,pins = "sdmmc1_clk_pz0";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sdmmc1-cmd {
+ nvidia,pins = "sdmmc1_cmd_pz1",
+ "sdmmc1_dat3_py4",
+ "sdmmc1_dat2_py5",
+ "sdmmc1_dat1_py6",
+ "sdmmc1_dat0_py7";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ wlan-reset {
+ nvidia,pins = "pv3";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ wlan-host-wake {
+ nvidia,pins = "pu6";
+ nvidia,function = "pwm3";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* GNSS UART-B pinmux */
+ gps-pwr-en {
+ nvidia,pins = "kb_row6_pr6";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ gps-ldo-en {
+ nvidia,pins = "ulpi_dir_py1";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ gps-clk-ref {
+ nvidia,pins = "gmi_ad8_ph0";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+
+ /* Bluetooth UART-C pinmux */
+ uartc-cts-rxd {
+ nvidia,pins = "uart3_cts_n_pa1",
+ "uart3_rxd_pw7";
+ nvidia,function = "uartc";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ uartc-rts-txd {
+ nvidia,pins = "uart3_rts_n_pc0",
+ "uart3_txd_pw6";
+ nvidia,function = "uartc";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ bt-reset {
+ nvidia,pins = "clk2_req_pcc5";
+ nvidia,function = "dap";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ bt-dev-wake {
+ nvidia,pins = "kb_row11_ps3";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ bt-host-wake {
+ nvidia,pins = "kb_row12_ps4";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ bt-pcm-dap4 {
+ nvidia,pins = "dap4_fs_pp4",
+ "dap4_din_pp5",
+ "dap4_dout_pp6",
+ "dap4_sclk_pp7";
+ nvidia,function = "i2s3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* EMMC pinmux */
+ sdmmc4-clk {
+ nvidia,pins = "sdmmc4_clk_pcc4";
+ nvidia,function = "sdmmc4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sdmmc4-data {
+ nvidia,pins = "sdmmc4_cmd_pt7",
+ "sdmmc4_dat0_paa0",
+ "sdmmc4_dat1_paa1",
+ "sdmmc4_dat2_paa2",
+ "sdmmc4_dat3_paa3",
+ "sdmmc4_dat4_paa4",
+ "sdmmc4_dat5_paa5",
+ "sdmmc4_dat6_paa6",
+ "sdmmc4_dat7_paa7";
+ nvidia,function = "sdmmc4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sdmmc4-reset {
+ nvidia,pins = "sdmmc4_rst_n_pcc3";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* I2C pinmux */
+ gen1-i2c {
+ nvidia,pins = "gen1_i2c_scl_pc4",
+ "gen1_i2c_sda_pc5";
+ nvidia,function = "i2c1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ };
+ gen2-i2c {
+ nvidia,pins = "gen2_i2c_scl_pt5",
+ "gen2_i2c_sda_pt6";
+ nvidia,function = "i2c2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ };
+ cam-i2c {
+ nvidia,pins = "cam_i2c_scl_pbb1",
+ "cam_i2c_sda_pbb2";
+ nvidia,function = "i2c3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ };
+ ddc-i2c {
+ nvidia,pins = "ddc_scl_pv4",
+ "ddc_sda_pv5";
+ nvidia,function = "i2c4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ };
+ pwr-i2c {
+ nvidia,pins = "pwr_i2c_scl_pz6",
+ "pwr_i2c_sda_pz7";
+ nvidia,function = "i2cpwr";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ };
+ mhl-i2c {
+ nvidia,pins = "kb_col6_pq6",
+ "kb_col7_pq7";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* GPIO keys pinmux */
+ power-key {
+ nvidia,pins = "gmi_wp_n_pc7";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ volume-down {
+ nvidia,pins = "ulpi_data3_po4";
+ nvidia,function = "spi3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* Sensors pinmux */
+ sen-vdd {
+ nvidia,pins = "spi1_miso_px7";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ proxi-vdd {
+ nvidia,pins = "spi2_miso_px1";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ sen-vio {
+ nvidia,pins = "lcd_dc1_pd2";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ nct-irq {
+ nvidia,pins = "gmi_iordy_pi5";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ bat-irq {
+ nvidia,pins = "kb_row8_ps0";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ charger-irq {
+ nvidia,pins = "gmi_cs1_n_pj2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ mpu-irq {
+ nvidia,pins = "gmi_ad12_ph4";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ compass-irq {
+ nvidia,pins = "gmi_ad13_ph5";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ light-irq {
+ nvidia,pins = "gmi_cs4_n_pk2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* LED pinmux */
+ backlight-en {
+ nvidia,pins = "lcd_dc0_pn6";
+ nvidia,function = "rsvd3";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ flash-led-en {
+ nvidia,pins = "pbb3";
+ nvidia,function = "vgp3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ keypad-led {
+ nvidia,pins = "kb_row2_pr2",
+ "kb_row3_pr3";
+ nvidia,function = "rsvd3";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+
+ /* NFC pinmux */
+ nfc-irq {
+ nvidia,pins = "spi2_cs1_n_pw2";
+ nvidia,function = "spi2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ nfc-ven {
+ nvidia,pins = "spi1_sck_px5";
+ nvidia,function = "spi1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ nfc-firm {
+ nvidia,pins = "kb_row0_pr0";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+
+ /* DC pinmux */
+ lcd-pwr {
+ nvidia,pins = "lcd_pwr0_pb2",
+ "lcd_pwr1_pc1";
+ nvidia,function = "displaya";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ lcd-wr-n {
+ nvidia,pins = "lcd_wr_n_pz3";
+ nvidia,function = "displaya";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ lcd-id {
+ nvidia,pins = "lcd_m1_pw1";
+ nvidia,function = "displaya";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ lcd-pclk {
+ nvidia,pins = "lcd_pclk_pb3",
+ "lcd_de_pj1",
+ "lcd_hsync_pj3",
+ "lcd_vsync_pj4";
+ nvidia,function = "displaya";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ lcd-rgb-blue {
+ nvidia,pins = "lcd_d0_pe0",
+ "lcd_d1_pe1",
+ "lcd_d2_pe2",
+ "lcd_d3_pe3",
+ "lcd_d4_pe4",
+ "lcd_d5_pe5",
+ "lcd_d18_pm2",
+ "lcd_d19_pm3";
+ nvidia,function = "displaya";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ lcd-rgb-green {
+ nvidia,pins = "lcd_d6_pe6",
+ "lcd_d7_pe7",
+ "lcd_d8_pf0",
+ "lcd_d9_pf1",
+ "lcd_d10_pf2",
+ "lcd_d11_pf3",
+ "lcd_d20_pm4",
+ "lcd_d21_pm5";
+ nvidia,function = "displaya";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ lcd-rgb-red {
+ nvidia,pins = "lcd_d12_pf4",
+ "lcd_d13_pf5",
+ "lcd_d14_pf6",
+ "lcd_d15_pf7",
+ "lcd_d16_pm0",
+ "lcd_d17_pm1",
+ "lcd_d22_pm6",
+ "lcd_d23_pm7";
+ nvidia,function = "displaya";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* Bridge pinmux */
+ bridge-reset {
+ nvidia,pins = "ulpi_data1_po2";
+ nvidia,function = "spi3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ rgb-ic-en {
+ nvidia,pins = "gmi_a18_pb1";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ bridge-clk {
+ nvidia,pins = "clk3_out_pee0";
+ nvidia,function = "extperiph3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ rgb-bridge {
+ nvidia,pins = "lcd_sdin_pz2",
+ "lcd_sdout_pn5",
+ "lcd_cs0_n_pn4",
+ "lcd_sck_pz4";
+ nvidia,function = "spi5";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* Panel pinmux */
+ panel-reset {
+ nvidia,pins = "lcd_cs1_n_pw0";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ panel-vio {
+ nvidia,pins = "ulpi_clk_py0";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+
+ /* Touchscreen pinmux */
+ touch-vdd {
+ nvidia,pins = "kb_col1_pq1";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ touch-vio {
+ nvidia,pins = "spi1_mosi_px4";
+ nvidia,function = "spi2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ touch-irq-n {
+ nvidia,pins = "kb_col3_pq3";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ touch-rst-n {
+ nvidia,pins = "ulpi_data0_po1";
+ nvidia,function = "spi3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ touch-maker-id {
+ nvidia,pins = "kb_col2_pq2";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* MHL pinmux */
+ mhl-vio {
+ nvidia,pins = "pv2";
+ nvidia,function = "owr";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ mhl-rst-n {
+ nvidia,pins = "clk3_req_pee1";
+ nvidia,function = "dev3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ mhl-irq {
+ nvidia,pins = "crt_vsync_pv7";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ mhl-sel {
+ nvidia,pins = "kb_row10_ps2";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ hdmi-hpd {
+ nvidia,pins = "hdmi_int_pn7";
+ nvidia,function = "hdmi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* AUDIO pinmux */
+ hp-detect {
+ nvidia,pins = "pbb6";
+ nvidia,function = "vgp6";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ hp-hook {
+ nvidia,pins = "ulpi_data4_po5";
+ nvidia,function = "ulpi";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ ear-mic-en {
+ nvidia,pins = "spi2_mosi_px0";
+ nvidia,function = "spi2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ audio-irq {
+ nvidia,pins = "spi2_cs2_n_pw3";
+ nvidia,function = "spi3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ audio-mclk {
+ nvidia,pins = "clk1_out_pw4";
+ nvidia,function = "extperiph1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ dap-i2s0 {
+ nvidia,pins = "dap1_fs_pn0",
+ "dap1_din_pn1",
+ "dap1_dout_pn2",
+ "dap1_sclk_pn3";
+ nvidia,function = "i2s0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ dap-i2s1 {
+ nvidia,pins = "dap2_fs_pa2",
+ "dap2_sclk_pa3",
+ "dap2_din_pa4",
+ "dap2_dout_pa5";
+ nvidia,function = "i2s1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* MUIC pinmux */
+ muic-irq {
+ nvidia,pins = "gmi_cs0_n_pj0";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ muic-dp2t {
+ nvidia,pins = "pcc2";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ muic-usif {
+ nvidia,pins = "ulpi_stp_py3";
+ nvidia,function = "spi1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ ifx-usb-vbus-en {
+ nvidia,pins = "kb_row4_pr4";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ pcb-rev {
+ nvidia,pins = "gmi_wait_pi7",
+ "gmi_rst_n_pi4";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ jtag-rtck {
+ nvidia,pins = "jtag_rtck_pu7";
+ nvidia,function = "rtck";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+
+ /* Camera pinmux */
+ cam-mclk {
+ nvidia,pins = "cam_mclk_pcc0";
+ nvidia,function = "vi_alt3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ cam-pmic-en {
+ nvidia,pins = "pbb4";
+ nvidia,function = "vgp4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ front-cam-rst {
+ nvidia,pins = "pbb5";
+ nvidia,function = "vgp5";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ front-cam-vio {
+ nvidia,pins = "ulpi_nxt_py2";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ rear-cam-rst {
+ nvidia,pins = "gmi_cs3_n_pk4";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ rear-cam-eprom-pr {
+ nvidia,pins = "gmi_cs2_n_pk3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ rear-cam-vcm-pwdn {
+ nvidia,pins = "kb_row1_pr1";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* Haptic pinmux */
+ haptic-en {
+ nvidia,pins = "gmi_ad9_ph1";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ haptic-osc {
+ nvidia,pins = "gmi_ad11_ph3";
+ nvidia,function = "pwm3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+
+ /* Modem pinmux */
+ cp2ap-ack1-host-active {
+ nvidia,pins = "pu5";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ cp2ap-ack2-host-wakeup {
+ nvidia,pins = "pv0";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ ap2cp-ack2-suspend-req {
+ nvidia,pins = "kb_row14_ps6";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ ap2cp-ack1-slave-wakeup {
+ nvidia,pins = "kb_row15_ps7";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ cp-kkp {
+ nvidia,pins = "kb_col0_pq0";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ cp-crash-irq {
+ nvidia,pins = "kb_row13_ps5";
+ nvidia,function = "kbc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ ap2cp-uarta-tx-ipc {
+ nvidia,pins = "pu0";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ ap2cp-uarta-rx-ipc {
+ nvidia,pins = "pu1";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ fota-ap-cts-cp-rts {
+ nvidia,pins = "pu2";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ fota-ap-rts-cp-cts {
+ nvidia,pins = "pu3";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ modem-enable {
+ nvidia,pins = "ulpi_data7_po0";
+ nvidia,function = "hsi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ modem-reset {
+ nvidia,pins = "pv1";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ dap-i2s2 {
+ nvidia,pins = "dap3_fs_pp0",
+ "dap3_din_pp1",
+ "dap3_dout_pp2",
+ "dap3_sclk_pp3";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+
+ /* GPIO power/drive control */
+ drive-i2c {
+ nvidia,pins = "drive_dbg",
+ "drive_at5",
+ "drive_gme",
+ "drive_ddc",
+ "drive_ao1";
+ nvidia,high-speed-mode = <TEGRA_PIN_DISABLE>;
+ nvidia,schmitt = <TEGRA_PIN_ENABLE>;
+ nvidia,low-power-mode = <TEGRA_PIN_LP_DRIVE_DIV_1>;
+ nvidia,pull-down-strength = <31>;
+ nvidia,pull-up-strength = <31>;
+ nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+ nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+ };
+
+ drive-uart3 {
+ nvidia,pins = "drive_uart3";
+ nvidia,high-speed-mode = <TEGRA_PIN_DISABLE>;
+ nvidia,schmitt = <TEGRA_PIN_ENABLE>;
+ nvidia,low-power-mode = <TEGRA_PIN_LP_DRIVE_DIV_1>;
+ nvidia,pull-down-strength = <31>;
+ nvidia,pull-up-strength = <31>;
+ nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+ nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+ };
+
+ drive-gmi {
+ nvidia,pins = "drive_at3";
+ nvidia,high-speed-mode = <TEGRA_PIN_DISABLE>;
+ nvidia,schmitt = <TEGRA_PIN_ENABLE>;
+ nvidia,low-power-mode = <TEGRA_PIN_LP_DRIVE_DIV_1>;
+ nvidia,pull-down-strength = <31>;
+ nvidia,pull-up-strength = <31>;
+ nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+ nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+ };
+ };
+ };
+
+ uartb: serial@70006040 {
+ compatible = "nvidia,tegra30-hsuart";
+ reset-names = "serial";
+ /delete-property/ reg-shift;
+ status = "okay";
+
+ /* GNSS GSD5T */
+ };
+
+ uartc: serial@70006200 {
+ compatible = "nvidia,tegra30-hsuart";
+ reset-names = "serial";
+ /delete-property/ reg-shift;
+ status = "okay";
+
+ nvidia,adjust-baud-rates = <0 9600 100>,
+ <9600 115200 200>,
+ <1000000 4000000 136>;
+
+ /* BCM4330B1 37.4 MHz Class 1.5 ExtLNA */
+ bluetooth {
+ compatible = "brcm,bcm4330-bt";
+ max-speed = <4000000>;
+
+ clocks = <&tegra_pmc TEGRA_PMC_CLK_BLINK>;
+ clock-names = "txco";
+
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(S, 4) IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "host-wakeup";
+
+ device-wakeup-gpios = <&gpio TEGRA_GPIO(S, 3) GPIO_ACTIVE_HIGH>;
+ shutdown-gpios = <&gpio TEGRA_GPIO(CC, 5) GPIO_ACTIVE_HIGH>;
+
+ vbat-supply = <&vdd_3v3_vbat>;
+ vddio-supply = <&vdd_1v8_vio>;
+ };
+ };
+
+ uartd: serial@70006300 {
+ /delete-property/ dmas;
+ /delete-property/ dma-names;
+ status = "okay";
+
+ /* Console */
+ };
+
+ pwm@7000a000 {
+ status = "okay";
+ };
+
+ gen1_i2c: i2c@7000c000 {
+ status = "okay";
+ clock-frequency = <400000>;
+
+ /* Aichi AMI306 digital compass */
+ magnetometer@e {
+ compatible = "asahi-kasei,ak8974";
+ reg = <0x0e>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(H, 5) IRQ_TYPE_EDGE_RISING>;
+
+ avdd-supply = <&vdd_3v0_sen>;
+ dvdd-supply = <&vdd_1v8_vio>;
+
+ mount-matrix = "-1", "0", "0",
+ "0", "1", "0",
+ "0", "0", "-1";
+ };
+
+ max98089: audio-codec@10 {
+ compatible = "maxim,max98089";
+ reg = <0x10>;
+
+ clocks = <&tegra_pmc TEGRA_PMC_CLK_OUT_1>;
+ clock-names = "mclk";
+
+ assigned-clocks = <&tegra_pmc TEGRA_PMC_CLK_OUT_1>;
+ assigned-clock-parents = <&tegra_car TEGRA30_CLK_EXTERN1>;
+ };
+
+ nfc@28 {
+ compatible = "nxp,pn544-i2c";
+ reg = <0x28>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(W, 2) IRQ_TYPE_EDGE_RISING>;
+
+ enable-gpios = <&gpio TEGRA_GPIO(X, 5) GPIO_ACTIVE_HIGH>;
+ firmware-gpios = <&gpio TEGRA_GPIO(R, 0) GPIO_ACTIVE_HIGH>;
+ };
+
+ imu@68 {
+ compatible = "invensense,mpu6050";
+ reg = <0x68>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(H, 4) IRQ_TYPE_EDGE_RISING>;
+
+ vdd-supply = <&vdd_3v0_sen>;
+ vddio-supply = <&vdd_1v8_sen>;
+
+ mount-matrix = "1", "0", "0",
+ "0", "1", "0",
+ "0", "0", "-1";
+ };
+ };
+
+ gen2_i2c: i2c@7000c400 {
+ status = "okay";
+ clock-frequency = <400000>;
+
+ /* Synaptics RMI4 S3203B touchcreen */
+ touchscreen@20 {
+ compatible = "syna,rmi4-i2c";
+ reg = <0x20>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(Q, 3) IRQ_TYPE_EDGE_FALLING>;
+
+ vdd-supply = <&vdd_3v0_touch>;
+ vio-supply = <&vdd_1v8_touch>;
+
+ syna,reset-delay-ms = <20>;
+ syna,startup-delay-ms = <200>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rmi4-f01@1 {
+ reg = <0x1>;
+ syna,nosleep-mode = <1>;
+ };
+
+ rmi4-f11@11 {
+ reg = <0x11>;
+ syna,sensor-type = <1>;
+
+ syna,clip-x-low = <0>;
+ syna,clip-y-low = <0>;
+ };
+ };
+ };
+
+ cam_i2c: i2c@7000c500 {
+ status = "okay";
+ clock-frequency = <400000>;
+
+ dw9714: coil@c {
+ compatible = "dongwoon,dw9714";
+ reg = <0x0c>;
+
+ enable-gpios = <&gpio TEGRA_GPIO(R, 1) GPIO_ACTIVE_HIGH>;
+
+ vcc-supply = <&vcc_focuser>;
+ };
+
+ camera-pmic@7d {
+ compatible = "ti,lp8720";
+ reg = <0x7d>;
+
+ enable-gpios = <&gpio TEGRA_GPIO(BB, 4) GPIO_ACTIVE_HIGH>;
+
+ vt_1v2_front: ldo1 {
+ regulator-name = "vt_1v2_dig";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ };
+
+ vt_2v7_front: ldo2 {
+ regulator-name = "vt_2v7_vana";
+ regulator-min-microvolt = <2700000>;
+ regulator-max-microvolt = <2700000>;
+ };
+
+ vdd_2v7_rear: ldo3 {
+ regulator-name = "8m_2v7_vana";
+ regulator-min-microvolt = <2700000>;
+ regulator-max-microvolt = <2800000>;
+ };
+
+ vio_1v8_rear: ldo4 {
+ regulator-name = "vio_1v8_cam";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ vcc_focuser: ldo5 {
+ regulator-name = "8m_2v8_vcm";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ };
+
+ vdd_1v2_rear: buck {
+ regulator-name = "8m_1v2_cam";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ };
+ };
+ };
+
+ hdmi_ddc: i2c@7000c700 {
+ status = "okay";
+ clock-frequency = <100000>;
+ };
+
+ pwr_i2c: i2c@7000d000 {
+ status = "okay";
+ clock-frequency = <400000>;
+
+ pmic: max77663@1c {
+ compatible = "maxim,max77663";
+ reg = <0x1c>;
+
+ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+
+ #gpio-cells = <2>;
+ gpio-controller;
+
+ system-power-controller;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&max77663_default>;
+
+ max77663_default: pinmux {
+ gpio1 {
+ pins = "gpio1";
+ function = "gpio";
+ drive-open-drain = <1>;
+ };
+
+ gpio4 {
+ pins = "gpio4";
+ function = "32k-out1";
+ };
+ };
+
+ fps {
+ fps0 {
+ maxim,fps-event-source = <MAX77620_FPS_EVENT_SRC_EN0>;
+ };
+
+ fps1 {
+ maxim,fps-event-source = <MAX77620_FPS_EVENT_SRC_EN1>;
+ };
+
+ fps2 {
+ maxim,fps-event-source = <MAX77620_FPS_EVENT_SRC_EN0>;
+ };
+ };
+
+ regulators {
+ in-sd0-supply = <&vdd_5v0_vbus>;
+ in-sd1-supply = <&vdd_5v0_vbus>;
+ in-sd2-supply = <&vdd_5v0_vbus>;
+ in-sd3-supply = <&vdd_5v0_vbus>;
+
+ in-ldo0-1-supply = <&vdd_1v8_vio>;
+ in-ldo2-supply = <&vdd_3v3_vbat>;
+ in-ldo3-5-supply = <&vdd_3v3_vbat>;
+ in-ldo4-6-supply = <&vdd_3v3_vbat>;
+ in-ldo7-8-supply = <&vdd_1v8_vio>;
+
+ vdd_cpu: sd0 {
+ regulator-name = "vdd_cpu";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1250000>;
+ regulator-coupled-with = <&vdd_core>;
+ regulator-coupled-max-spread = <300000>;
+ regulator-max-step-microvolt = <100000>;
+ regulator-always-on;
+ regulator-boot-on;
+
+ nvidia,tegra-cpu-regulator;
+ maxim,active-fps-source = <MAX77620_FPS_SRC_NONE>;
+ };
+
+ vdd_core: sd1 {
+ regulator-name = "vdd_core";
+ regulator-min-microvolt = <950000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-coupled-with = <&vdd_cpu>;
+ regulator-coupled-max-spread = <300000>;
+ regulator-max-step-microvolt = <100000>;
+ regulator-always-on;
+ regulator-boot-on;
+
+ nvidia,tegra-core-regulator;
+ maxim,active-fps-source = <MAX77620_FPS_SRC_1>;
+ };
+
+ vdd_1v8_vio: sd2 {
+ regulator-name = "vdd_1v8_gen";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ regulator-boot-on;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_NONE>;
+ };
+
+ sd3 {
+ regulator-name = "vddio_ddr";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-always-on;
+ regulator-boot-on;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_NONE>;
+ };
+
+ ldo0 {
+ regulator-name = "avdd_pll";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-always-on;
+ regulator-boot-on;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_1>;
+ };
+
+ ldo1 {
+ regulator-name = "vdd_ddr_hs";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-always-on;
+ regulator-boot-on;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_NONE>;
+ };
+
+ avdd_3v3_periph: ldo2 {
+ regulator-name = "avdd_usb";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_NONE>;
+ };
+
+ vdd_usd: ldo3 {
+ regulator-name = "vdd_sdmmc3";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-always-on;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_NONE>;
+ };
+
+ ldo4 {
+ regulator-name = "vdd_rtc";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-always-on;
+ regulator-boot-on;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_0>;
+ };
+
+ vcore_emmc: ldo5 {
+ regulator-name = "vdd_ddr_rx";
+ regulator-min-microvolt = <2850000>;
+ regulator-max-microvolt = <2850000>;
+ regulator-always-on;
+ regulator-boot-on;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_0>;
+ };
+
+ avdd_1v8_hdmi_pll: ldo6 {
+ regulator-name = "avdd_osc";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ regulator-boot-on;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_NONE>;
+ };
+
+ vdd_1v2_mhl: ldo7 {
+ regulator-name = "vdd_1v2_mhl";
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1250000>;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_NONE>;
+ };
+
+ ldo8 {
+ regulator-name = "avdd_dsi_csi";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+
+ maxim,active-fps-source = <MAX77620_FPS_SRC_NONE>;
+ };
+ };
+ };
+
+ fuel-gauge@36 {
+ compatible = "maxim,max17043";
+ reg = <0x36>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(S, 0) IRQ_TYPE_EDGE_FALLING>;
+
+ monitored-battery = <&battery>;
+
+ maxim,alert-low-soc-level = <10>;
+ wakeup-source;
+ };
+
+ power-sensor@40 {
+ compatible = "ti,ina230";
+ reg = <0x40>;
+
+ vs-supply = <&vdd_3v0_sen>;
+ };
+
+ nct72: temperature-sensor@4c {
+ compatible = "onnn,nct1008";
+ reg = <0x4c>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(I, 5) IRQ_TYPE_EDGE_FALLING>;
+
+ vcc-supply = <&vdd_3v0_sen>;
+ #thermal-sensor-cells = <1>;
+ };
+ };
+
+ i2c-mhl {
+ compatible = "i2c-gpio";
+
+ sda-gpios = <&gpio TEGRA_GPIO(Q, 7) (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ scl-gpios = <&gpio TEGRA_GPIO(Q, 6) (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+
+ i2c-gpio,delay-us = <5>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ spi@7000dc00 {
+ status = "okay";
+ spi-max-frequency = <25000000>;
+
+ /* DSI bridge */
+ };
+
+ pmc@7000e400 {
+ status = "okay";
+ nvidia,invert-interrupt;
+ nvidia,suspend-mode = <2>;
+ nvidia,cpu-pwr-good-time = <2000>;
+ nvidia,cpu-pwr-off-time = <200>;
+ nvidia,core-pwr-good-time = <3845 3845>;
+ nvidia,core-pwr-off-time = <0>;
+ nvidia,core-power-req-active-high;
+ nvidia,sys-clock-req-active-high;
+ core-supply = <&vdd_core>;
+
+ i2c-thermtrip {
+ nvidia,i2c-controller-id = <4>;
+ nvidia,bus-addr = <0x1c>;
+ nvidia,reg-addr = <0x41>;
+ nvidia,reg-data = <0x02>;
+ };
+ };
+
+ hda@70030000 {
+ status = "okay";
+ };
+
+ ahub@70080000 {
+ /* HIFI CODEC */
+ i2s@70080300 { /* i2s0 */
+ status = "okay";
+ };
+
+ /* BASEBAND */
+ i2s@70080500 { /* i2s2 */
+ status = "okay";
+ };
+
+ /* BT SCO */
+ i2s@70080600 { /* i2s3 */
+ status = "okay";
+ };
+ };
+
+ sdmmc1: mmc@78000000 {
+ status = "okay";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ assigned-clocks = <&tegra_car TEGRA30_CLK_SDMMC1>;
+ assigned-clock-parents = <&tegra_car TEGRA30_CLK_PLL_C>;
+ assigned-clock-rates = <50000000>;
+
+ max-frequency = <50000000>;
+ keep-power-in-suspend;
+ bus-width = <4>;
+ non-removable;
+
+ mmc-pwrseq = <&brcm_wifi_pwrseq>;
+ vmmc-supply = <&vdd_3v3_vbat>;
+ vqmmc-supply = <&vdd_1v8_vio>;
+
+ /* BCM4330B1 37.4 MHz Class 1.5 ExtLNA */
+ wifi@1 {
+ compatible = "brcm,bcm4329-fmac";
+ reg = <1>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(U, 6) IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "host-wake";
+ };
+ };
+
+ sdmmc4: mmc@78000600 {
+ status = "okay";
+ bus-width = <8>;
+
+ non-removable;
+ mmc-ddr-1_8v;
+
+ vmmc-supply = <&vcore_emmc>;
+ vqmmc-supply = <&vdd_1v8_vio>;
+ };
+
+ /* Micro USB */
+ usb@7d000000 {
+ compatible = "nvidia,tegra30-udc";
+ status = "okay";
+ dr_mode = "peripheral";
+ };
+
+ usb-phy@7d000000 {
+ status = "okay";
+ dr_mode = "peripheral";
+ nvidia,hssync-start-delay = <0>;
+ nvidia,xcvr-lsfslew = <2>;
+ nvidia,xcvr-lsrslew = <2>;
+ vbus-supply = <&avdd_3v3_periph>;
+ };
+
+ /* PMIC has a built-in 32KHz oscillator which is used by PMC */
+ clk32k_in: clock-32k {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ clock-output-names = "pmic-oscillator";
+ };
+
+ gps_refclk: clock-gps {
+ compatible = "fixed-clock";
+ clock-frequency = <26000000>;
+ clock-accuracy = <100>;
+ #clock-cells = <0>;
+ };
+
+ gps_osc: clock-gps-osc-gate {
+ compatible = "gpio-gate-clock";
+ enable-gpios = <&gpio TEGRA_GPIO(H, 0) GPIO_ACTIVE_HIGH>;
+ clocks = <&gps_refclk>;
+ #clock-cells = <0>;
+ };
+
+ cpus {
+ cpu0: cpu@0 {
+ cpu-supply = <&vdd_cpu>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ #cooling-cells = <2>;
+ };
+ cpu1: cpu@1 {
+ cpu-supply = <&vdd_cpu>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ #cooling-cells = <2>;
+ };
+ cpu2: cpu@2 {
+ cpu-supply = <&vdd_cpu>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ #cooling-cells = <2>;
+ };
+ cpu3: cpu@3 {
+ cpu-supply = <&vdd_cpu>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ #cooling-cells = <2>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ key-power {
+ label = "Power";
+ gpios = <&gpio TEGRA_GPIO(C, 7) GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_POWER>;
+ debounce-interval = <10>;
+ wakeup-event-action = <EV_ACT_ASSERTED>;
+ wakeup-source;
+ };
+
+ key-volume-down {
+ label = "Volume Down";
+ gpios = <&gpio TEGRA_GPIO(O, 4) GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_VOLUMEDOWN>;
+ debounce-interval = <10>;
+ wakeup-event-action = <EV_ACT_ASSERTED>;
+ wakeup-source;
+ };
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ led-keypad {
+ label = "keypad::white";
+ gpios = <&gpio TEGRA_GPIO(R, 2) GPIO_ACTIVE_HIGH>;
+
+ color = <LED_COLOR_ID_WHITE>;
+ function = LED_FUNCTION_KBD_BACKLIGHT;
+ };
+ };
+
+ opp-table-actmon {
+ /delete-node/ opp-625000000;
+ /delete-node/ opp-667000000;
+ /delete-node/ opp-750000000;
+ /delete-node/ opp-800000000;
+ /delete-node/ opp-900000000;
+ };
+
+ opp-table-emc {
+ /delete-node/ opp-625000000-1200;
+ /delete-node/ opp-625000000-1250;
+ /delete-node/ opp-667000000-1200;
+ /delete-node/ opp-750000000-1300;
+ /delete-node/ opp-800000000-1300;
+ /delete-node/ opp-900000000-1350;
+ };
+
+ brcm_wifi_pwrseq: pwrseq-wifi {
+ compatible = "mmc-pwrseq-simple";
+
+ clocks = <&tegra_pmc TEGRA_PMC_CLK_BLINK>;
+ clock-names = "ext_clock";
+
+ reset-gpios = <&gpio TEGRA_GPIO(V, 3) GPIO_ACTIVE_LOW>;
+ post-power-on-delay-ms = <300>;
+ power-off-delay-us = <300>;
+ };
+
+ vdd_5v0_vbus: regulator-vbus {
+ compatible = "regulator-fixed";
+ regulator-name = "vdd_vbus";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ vdd_3v3_vbat: regulator-vbat {
+ compatible = "regulator-fixed";
+ regulator-name = "vdd_vbat";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ regulator-boot-on;
+ vin-supply = <&vdd_5v0_vbus>;
+ };
+
+ vdd_3v0_sen: regulator-sen3v {
+ compatible = "regulator-fixed";
+ regulator-name = "vdd_3v0_sensor";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-boot-on;
+ gpio = <&gpio TEGRA_GPIO(X, 7) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_vbat>;
+ };
+
+ vdd_3v0_proxi: regulator-proxi {
+ compatible = "regulator-fixed";
+ regulator-name = "vdd_3v0_proxi";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-boot-on;
+ gpio = <&gpio TEGRA_GPIO(X, 1) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_vbat>;
+ };
+
+ vdd_1v8_sen: regulator-sen1v8 {
+ compatible = "regulator-fixed";
+ regulator-name = "vdd_1v8_sensor";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-boot-on;
+ gpio = <&gpio TEGRA_GPIO(D, 2) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_vbat>;
+ };
+
+ vcc_3v0_lcd: regulator-lcd3v {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_3v0_lcd";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-boot-on;
+ vin-supply = <&vdd_3v3_vbat>;
+ };
+
+ iovcc_1v8_lcd: regulator-lcd1v8 {
+ compatible = "regulator-fixed";
+ regulator-name = "iovcc_1v8_lcd";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-boot-on;
+ gpio = <&gpio TEGRA_GPIO(Y, 0) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_vbat>;
+ };
+
+ vio_1v8_mhl: regulator-mhl1v8 {
+ compatible = "regulator-fixed";
+ regulator-name = "vio_1v8_mhl";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-boot-on;
+ gpio = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_vbat>;
+ };
+
+ vdd_3v0_touch: regulator-touchpwr {
+ compatible = "regulator-fixed";
+ regulator-name = "vdd_3v0_touch";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-boot-on;
+ gpio = <&gpio TEGRA_GPIO(Q, 1) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_vbat>;
+ };
+
+ vdd_1v8_touch: regulator-touchvio {
+ compatible = "regulator-fixed";
+ regulator-name = "vdd_1v8_touch";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-boot-on;
+ gpio = <&gpio TEGRA_GPIO(X, 4) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_vbat>;
+ };
+
+ vcc_1v8_gps: regulator-gps {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_1v8_gps";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-boot-on;
+ gpio = <&gpio TEGRA_GPIO(Y, 1) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_vbat>;
+ };
+
+ vio_1v8_front: regulator-frontvio {
+ compatible = "regulator-fixed";
+ regulator-name = "vt_1v8_cam_vio";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ gpio = <&gpio TEGRA_GPIO(Y, 2) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_vbat>;
+ };
+
+ sound {
+ nvidia,audio-routing =
+ "Headphone Jack", "HPL",
+ "Headphone Jack", "HPR",
+ "Int Spk", "SPKL",
+ "Int Spk", "SPKR",
+ "Earpiece", "RECL",
+ "Earpiece", "RECR",
+ "INA1", "Mic Jack",
+ "MIC1", "MICBIAS",
+ "MICBIAS", "Internal Mic 1",
+ "MIC2", "Internal Mic 2";
+
+ nvidia,i2s-controller = <&tegra_i2s0>;
+ nvidia,audio-codec = <&max98089>;
+
+ nvidia,hp-det-gpios = <&gpio TEGRA_GPIO(BB, 6) GPIO_ACTIVE_LOW>;
+ nvidia,mic-det-gpios = <&gpio TEGRA_GPIO(O, 5) GPIO_ACTIVE_HIGH>;
+ nvidia,ext-mic-en-gpios = <&gpio TEGRA_GPIO(X, 0) GPIO_ACTIVE_HIGH>;
+ nvidia,coupled-mic-hp-det;
+
+ clocks = <&tegra_car TEGRA30_CLK_PLL_A>,
+ <&tegra_car TEGRA30_CLK_PLL_A_OUT0>,
+ <&tegra_pmc TEGRA_PMC_CLK_OUT_1>;
+ clock-names = "pll_a", "pll_a_out0", "mclk";
+
+ assigned-clocks = <&tegra_car TEGRA30_CLK_EXTERN1>,
+ <&tegra_pmc TEGRA_PMC_CLK_OUT_1>;
+
+ assigned-clock-parents = <&tegra_car TEGRA30_CLK_PLL_A_OUT0>,
+ <&tegra_car TEGRA30_CLK_EXTERN1>;
+ };
+
+ thermal-zones {
+ /*
+ * NCT72 has two sensors:
+ *
+ * 0: internal that monitors ambient/skin temperature
+ * 1: external that is connected to the CPU's diode
+ *
+ * Ideally we should use userspace thermal governor,
+ * but it's a much more complex solution. The "skin"
+ * zone exists as a simpler solution which prevents
+ * this device from getting too hot from a user's
+ * tactile perspective. The CPU zone is intended to
+ * protect silicon from damage.
+ */
+
+ skin-thermal {
+ polling-delay-passive = <1000>; /* milliseconds */
+ polling-delay = <5000>; /* milliseconds */
+
+ thermal-sensors = <&nct72 0>;
+
+ trips {
+ trip0: skin-alert {
+ /* throttle at 50C until temperature drops to 49.8C */
+ temperature = <50000>;
+ hysteresis = <200>;
+ type = "passive";
+ };
+
+ trip1: skin-crit {
+ /* shut down at 60C */
+ temperature = <60000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&trip0>;
+ cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&actmon THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpu-thermal {
+ polling-delay-passive = <1000>; /* milliseconds */
+ polling-delay = <5000>; /* milliseconds */
+
+ thermal-sensors = <&nct72 1>;
+
+ trips {
+ trip2: cpu-alert {
+ /* throttle at 75C until temperature drops to 74.8C */
+ temperature = <75000>;
+ hysteresis = <200>;
+ type = "passive";
+ };
+
+ trip3: cpu-crit {
+ /* shut down at 90C */
+ temperature = <90000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map1 {
+ trip = <&trip2>;
+ cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&actmon THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/nxp/imx/Makefile b/arch/arm/boot/dts/nxp/imx/Makefile
index a724d1a7a9a0..4052cad859fa 100644
--- a/arch/arm/boot/dts/nxp/imx/Makefile
+++ b/arch/arm/boot/dts/nxp/imx/Makefile
@@ -45,7 +45,9 @@ dtb-$(CONFIG_SOC_IMX53) += \
imx53-mba53.dtb \
imx53-ppd.dtb \
imx53-qsb.dtb \
+ imx53-qsb-hdmi.dtb \
imx53-qsrb.dtb \
+ imx53-qsrb-hdmi.dtb \
imx53-sk-imx53.dtb \
imx53-sk-imx53-atm0700d4-lvds.dtb \
imx53-sk-imx53-atm0700d4-rgb.dtb \
@@ -54,6 +56,8 @@ dtb-$(CONFIG_SOC_IMX53) += \
imx53-tx53-x13x.dtb \
imx53-usbarmory.dtb \
imx53-voipac-bsb.dtb
+imx53-qsb-hdmi-dtbs := imx53-qsb.dtb imx53-qsb-hdmi.dtbo
+imx53-qsrb-hdmi-dtbs := imx53-qsrb.dtb imx53-qsb-hdmi.dtbo
dtb-$(CONFIG_SOC_IMX6Q) += \
imx6dl-alti6p.dtb \
imx6dl-apf6dev.dtb \
@@ -118,6 +122,7 @@ dtb-$(CONFIG_SOC_IMX6Q) += \
imx6dl-sabrelite.dtb \
imx6dl-sabresd.dtb \
imx6dl-savageboard.dtb \
+ imx6dl-sielaff.dtb \
imx6dl-skov-revc-lt2.dtb \
imx6dl-skov-revc-lt6.dtb \
imx6dl-solidsense.dtb \
@@ -147,6 +152,7 @@ dtb-$(CONFIG_SOC_IMX6Q) += \
imx6dl-yapp4-phoenix.dtb \
imx6dl-yapp4-ursa.dtb \
imx6q-apalis-eval.dtb \
+ imx6q-apalis-eval-v1.2.dtb \
imx6q-apalis-ixora.dtb \
imx6q-apalis-ixora-v1.1.dtb \
imx6q-apalis-ixora-v1.2.dtb \
diff --git a/arch/arm/boot/dts/nxp/imx/imx1-apf9328.dts b/arch/arm/boot/dts/nxp/imx/imx1-apf9328.dts
index e66eef87a7a4..058e9435524f 100644
--- a/arch/arm/boot/dts/nxp/imx/imx1-apf9328.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx1-apf9328.dts
@@ -54,7 +54,7 @@
#size-cells = <1>;
};
- eth: eth@4,c00000 {
+ eth: ethernet@4,c00000 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_eth>;
compatible = "davicom,dm9000";
diff --git a/arch/arm/boot/dts/nxp/imx/imx1.dtsi b/arch/arm/boot/dts/nxp/imx/imx1.dtsi
index 1ac10965fdfd..389ecb1ebf8f 100644
--- a/arch/arm/boot/dts/nxp/imx/imx1.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx1.dtsi
@@ -251,7 +251,7 @@
};
};
- weim: weim@220000 {
+ weim: memory-controller@220000 {
#address-cells = <2>;
#size-cells = <1>;
compatible = "fsl,imx1-weim";
diff --git a/arch/arm/boot/dts/nxp/imx/imx27.dtsi b/arch/arm/boot/dts/nxp/imx/imx27.dtsi
index ec472695c71e..ec3ccc8f4095 100644
--- a/arch/arm/boot/dts/nxp/imx/imx27.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx27.dtsi
@@ -568,7 +568,7 @@
status = "disabled";
};
- weim: weim@d8002000 {
+ weim: memory-controller@d8002000 {
#address-cells = <2>;
#size-cells = <1>;
compatible = "fsl,imx27-weim";
diff --git a/arch/arm/boot/dts/nxp/imx/imx31.dtsi b/arch/arm/boot/dts/nxp/imx/imx31.dtsi
index e1ae7c175f7d..00006c90d9a7 100644
--- a/arch/arm/boot/dts/nxp/imx/imx31.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx31.dtsi
@@ -352,7 +352,7 @@
status = "disabled";
};
- weim: weim@b8002000 {
+ weim: memory-controller@b8002000 {
compatible = "fsl,imx31-weim", "fsl,imx27-weim";
reg = <0xb8002000 0x1000>;
clocks = <&clks 56>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx35.dtsi b/arch/arm/boot/dts/nxp/imx/imx35.dtsi
index 2d20e5541acc..442dc15677b8 100644
--- a/arch/arm/boot/dts/nxp/imx/imx35.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx35.dtsi
@@ -374,7 +374,7 @@
status = "disabled";
};
- weim: weim@b8002000 {
+ weim: memory-controller@b8002000 {
#address-cells = <2>;
#size-cells = <1>;
clocks = <&clks 0>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx51.dtsi b/arch/arm/boot/dts/nxp/imx/imx51.dtsi
index c96d6311dfa7..4efce49022e4 100644
--- a/arch/arm/boot/dts/nxp/imx/imx51.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx51.dtsi
@@ -578,7 +578,7 @@
reg = <0x83fd8000 0x1000>;
};
- weim: weim@83fda000 {
+ weim: memory-controller@83fda000 {
#address-cells = <2>;
#size-cells = <1>;
compatible = "fsl,imx51-weim";
diff --git a/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso b/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso
new file mode 100644
index 000000000000..c84e9b052527
--- /dev/null
+++ b/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * DT overlay for MCIMXHDMICARD as used with the iMX53 QSB or QSRB boards
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/gpio/gpio.h>
+
+/dts-v1/;
+/plugin/;
+
+&{/} {
+ /delete-node/ panel;
+
+ hdmi: connector-hdmi {
+ compatible = "hdmi-connector";
+ label = "hdmi";
+ type = "a";
+
+ port {
+ hdmi_connector_in: endpoint {
+ remote-endpoint = <&sii9022_out>;
+ };
+ };
+ };
+
+ reg_1p2v: regulator-1p2v {
+ compatible = "regulator-fixed";
+ regulator-name = "1P2V";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-always-on;
+ vin-supply = <&reg_3p2v>;
+ };
+};
+
+&display0 {
+ status = "okay";
+};
+
+&display0 {
+ port@1 {
+ display0_out: endpoint {
+ remote-endpoint = <&sii9022_in>;
+ };
+ };
+};
+
+&i2c2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ sii9022: bridge-hdmi@39 {
+ compatible = "sil,sii9022";
+ reg = <0x39>;
+ reset-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
+ interrupts-extended = <&gpio3 31 IRQ_TYPE_LEVEL_LOW>;
+ iovcc-supply = <&reg_3p2v>;
+ #sound-dai-cells = <0>;
+ sil,i2s-data-lanes = <0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ sii9022_in: endpoint {
+ remote-endpoint = <&display0_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ sii9022_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+ };
+ };
+};
+
+&tve {
+ status = "disabled";
+};
diff --git a/arch/arm/boot/dts/nxp/imx/imx6dl-sielaff.dts b/arch/arm/boot/dts/nxp/imx/imx6dl-sielaff.dts
new file mode 100644
index 000000000000..7de8d5f26518
--- /dev/null
+++ b/arch/arm/boot/dts/nxp/imx/imx6dl-sielaff.dts
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+/*
+ * Copyright (C) 2022 Kontron Electronics GmbH
+ */
+
+/dts-v1/;
+
+#include "imx6dl.dtsi"
+#include <dt-bindings/clock/imx6qdl-clock.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+ model = "Sielaff i.MX6 Solo";
+ compatible = "sielaff,imx6dl-board", "fsl,imx6dl";
+
+ chosen {
+ stdout-path = &uart2;
+ };
+
+ backlight: pwm-backlight {
+ compatible = "pwm-backlight";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_backlight>;
+ pwms = <&pwm3 0 50000 0>;
+ brightness-levels = <0 0 64 88 112 136 184 232 255>;
+ default-brightness-level = <4>;
+ enable-gpios = <&gpio6 16 GPIO_ACTIVE_HIGH>;
+ power-supply = <&reg_backlight>;
+ };
+
+ cec {
+ compatible = "cec-gpio";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_hdmi_cec>;
+ cec-gpios = <&gpio2 7 GPIO_ACTIVE_HIGH>;
+ hdmi-phandle = <&hdmi>;
+ };
+
+ enet_ref: clock-enet-ref {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <50000000>;
+ clock-output-names = "enet-ref";
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_gpio_keys>;
+
+ key-0 {
+ gpios = <&gpio2 16 0>;
+ debounce-interval = <10>;
+ linux,code = <1>;
+ };
+
+ key-1 {
+ gpios = <&gpio3 27 0>;
+ debounce-interval = <10>;
+ linux,code = <2>;
+ };
+
+ key-2 {
+ gpios = <&gpio5 4 0>;
+ debounce-interval = <10>;
+ linux,code = <3>;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_gpio_leds>;
+
+ led-debug {
+ label = "debug-led";
+ gpios = <&gpio5 21 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ linux,default-trigger = "heartbeat";
+ };
+ };
+
+ memory@80000000 {
+ reg = <0x80000000 0x20000000>;
+ device_type = "memory";
+ };
+
+ osc_eth_phy: clock-osc-eth-phy {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <25000000>;
+ clock-output-names = "osc-eth-phy";
+ };
+
+ panel {
+ compatible = "lg,lb070wv8";
+ backlight = <&backlight>;
+ power-supply = <&reg_3v3>;
+
+ port {
+ panel_in_lvds: endpoint {
+ remote-endpoint = <&lvds_out>;
+ };
+ };
+ };
+
+ reg_3v3: regulator-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ reg_backlight: regulator-backlight {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_backlight>;
+ enable-active-high;
+ gpio = <&gpio1 23 GPIO_ACTIVE_HIGH>;
+ regulator-name = "backlight";
+ regulator-min-microvolt = <12000000>;
+ regulator-max-microvolt = <12000000>;
+ };
+
+ reg_usb_otg_vbus: regulator-usb-otg-vbus {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_usbotg_vbus>;
+ enable-active-high;
+ gpio = <&gpio4 15 GPIO_ACTIVE_HIGH>;
+ regulator-name = "usb_otg_vbus";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ };
+};
+
+&ecspi2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_ecspi2>;
+ cs-gpios = <&gpio5 29 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <20000000>;
+ };
+};
+
+&fec {
+ /*
+ * Set PTP clock to external instead of internal reference, as the
+ * REF_CLK from the PHY is fed back into the i.MX6 and the GPR
+ * register needs to be set accordingly (see mach-imx6q.c).
+ */
+ clocks = <&clks IMX6QDL_CLK_ENET>,
+ <&clks IMX6QDL_CLK_ENET>,
+ <&enet_ref>,
+ <&clks IMX6QDL_CLK_ENET_REF>;
+ clock-names = "ipg", "ahb", "ptp", "enet_out";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enet>;
+ phy-connection-type = "rmii";
+ phy-handle = <&ethphy>;
+ status = "okay";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy: ethernet-phy@1 {
+ reg = <1>;
+ clocks = <&osc_eth_phy>;
+ clock-names = "rmii-ref";
+ micrel,led-mode = <1>;
+ reset-assert-us = <500>;
+ reset-deassert-us = <100>;
+ reset-gpios = <&gpio5 2 GPIO_ACTIVE_LOW>;
+ };
+ };
+};
+
+&gpio1 {
+ gpio-line-names =
+ "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "key-out", "key-in",
+ "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "";
+};
+
+&gpio2 {
+ gpio-line-names =
+ "", "", "", "", "", "", "", "",
+ "lan9500a-rst", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "";
+};
+
+&gpmi {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_gpmi_nand>;
+ status = "okay";
+};
+
+&hdmi {
+ ddc-i2c-bus = <&i2c4>;
+ status = "okay";
+};
+
+&i2c2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c2>;
+ clock-frequency = <100000>;
+ status = "okay";
+
+ rtc@51 {
+ compatible = "nxp,pcf8563";
+ reg = <0x51>;
+ };
+};
+
+&i2c3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c3>;
+ clock-frequency = <100000>;
+ status = "okay";
+
+ touchscreen@55 {
+ compatible = "sitronix,st1633";
+ reg = <0x55>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_touch>;
+ interrupts = <18 IRQ_TYPE_EDGE_FALLING>;
+ interrupt-parent = <&gpio5>;
+ gpios = <&gpio1 2 GPIO_ACTIVE_LOW>;
+ status = "disabled";
+ };
+
+ touchscreen@5d {
+ compatible = "goodix,gt928";
+ reg = <0x5d>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_touch>;
+ interrupts = <18 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-parent = <&gpio5>;
+ irq-gpios = <&gpio5 18 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+ status = "disabled";
+ };
+};
+
+&i2c4 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c4>;
+ clock-frequency = <100000>;
+ status = "okay";
+};
+
+&ldb {
+ status = "okay";
+
+ lvds: lvds-channel@0 {
+ fsl,data-mapping = "spwg";
+ fsl,data-width = <24>;
+ status = "okay";
+
+ port@4 {
+ reg = <4>;
+
+ lvds_out: endpoint {
+ remote-endpoint = <&panel_in_lvds>;
+ };
+ };
+ };
+};
+
+&pwm3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm3>;
+ status = "okay";
+};
+
+&uart1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart1>;
+ status = "okay";
+};
+
+&uart2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart2>;
+ status = "okay";
+};
+
+&uart3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart3>;
+ status = "okay";
+};
+
+&usbh1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usbh1>;
+ disable-over-current;
+ status = "okay";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ usb1@1 {
+ compatible = "usb4b4,6570";
+ reg = <1>;
+ clocks = <&clks IMX6QDL_CLK_CKO>;
+
+ assigned-clocks = <&clks IMX6QDL_CLK_CKO>,
+ <&clks IMX6QDL_CLK_CKO2_SEL>;
+ assigned-clock-parents = <&clks IMX6QDL_CLK_CKO2>,
+ <&clks IMX6QDL_CLK_OSC>;
+ assigned-clock-rates = <12000000 0>;
+ };
+};
+
+&usbotg {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usbotg>;
+ dr_mode = "host";
+ over-current-active-low;
+ vbus-supply = <&reg_usb_otg_vbus>;
+ status = "okay";
+};
+
+&usdhc3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usdhc3>;
+ cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+ vmmc-supply = <&reg_3v3>;
+ voltage-ranges = <3300 3300>;
+ no-1-8-v;
+ status = "okay";
+};
+
+&wdog1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_wdog>;
+ fsl,ext-reset-output;
+ status = "okay";
+};
+
+&iomuxc {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_hog>;
+
+ pinctrl_hog: hoggrp {
+ fsl,pins = <
+ MX6QDL_PAD_RGMII_RD0__GPIO6_IO25 0x1b0b0 /* PMIC_IRQ */
+ MX6QDL_PAD_SD2_DAT3__GPIO1_IO12 0x1b0b0
+ MX6QDL_PAD_SD2_DAT1__GPIO1_IO14 0x1b0b0
+ MX6QDL_PAD_SD2_DAT0__GPIO1_IO15 0x1b0b0
+ MX6QDL_PAD_SD4_DAT0__GPIO2_IO08 0x1b0b0
+ MX6QDL_PAD_EIM_D29__GPIO3_IO29 0x1b0b0
+ >;
+ };
+
+ pinctrl_backlight: backlightgrp {
+ fsl,pins = <
+ MX6QDL_PAD_NANDF_CS3__GPIO6_IO16 0x100b1
+ >;
+ };
+
+ pinctrl_ecspi2: ecspi2grp {
+ fsl,pins = <
+ MX6QDL_PAD_CSI0_DAT10__ECSPI2_MISO 0x100b1
+ MX6QDL_PAD_CSI0_DAT9__ECSPI2_MOSI 0x100b1
+ MX6QDL_PAD_CSI0_DAT8__ECSPI2_SCLK 0x100b1
+ MX6QDL_PAD_CSI0_DAT11__GPIO5_IO29 0x100b1
+ >;
+ };
+
+ pinctrl_enet: enetgrp {
+ fsl,pins = <
+ MX6QDL_PAD_ENET_MDIO__ENET_MDIO 0x1b0b0
+ MX6QDL_PAD_ENET_MDC__ENET_MDC 0x1b0b0
+ MX6QDL_PAD_ENET_RXD0__ENET_RX_DATA0 0x1b0b0
+ MX6QDL_PAD_ENET_RXD1__ENET_RX_DATA1 0x1b0b0
+ MX6QDL_PAD_ENET_CRS_DV__ENET_RX_EN 0x1b0b0
+ MX6QDL_PAD_ENET_RX_ER__ENET_RX_ER 0x1b0b0
+ MX6QDL_PAD_ENET_TXD0__ENET_TX_DATA0 0x1b0b0
+ MX6QDL_PAD_ENET_TXD1__ENET_TX_DATA1 0x1b0b0
+ MX6QDL_PAD_ENET_TX_EN__ENET_TX_EN 0x1b0b0
+ MX6QDL_PAD_GPIO_16__ENET_REF_CLK 0x4001b0a8
+ MX6QDL_PAD_EIM_A25__GPIO5_IO02 0x100b1
+ >;
+ };
+
+ pinctrl_gpio_keys: gpiokeysgrp {
+ fsl,pins = <
+ MX6QDL_PAD_EIM_A22__GPIO2_IO16 0x1b080
+ MX6QDL_PAD_EIM_D27__GPIO3_IO27 0x1b080
+ MX6QDL_PAD_EIM_A24__GPIO5_IO04 0x1b080
+ >;
+ };
+
+ pinctrl_gpio_leds: gpioledsgrp {
+ fsl,pins = <
+ MX6QDL_PAD_CSI0_VSYNC__GPIO5_IO21 0x1b0b0
+ >;
+ };
+
+ pinctrl_gpmi_nand: gpminandgrp {
+ fsl,pins = <
+ MX6QDL_PAD_NANDF_CLE__NAND_CLE 0xb0b1
+ MX6QDL_PAD_NANDF_ALE__NAND_ALE 0xb0b1
+ MX6QDL_PAD_NANDF_WP_B__NAND_WP_B 0xb0b1
+ MX6QDL_PAD_NANDF_RB0__NAND_READY_B 0xb000
+ MX6QDL_PAD_NANDF_CS0__NAND_CE0_B 0xb0b1
+ MX6QDL_PAD_SD4_CMD__NAND_RE_B 0xb0b1
+ MX6QDL_PAD_SD4_CLK__NAND_WE_B 0xb0b1
+ MX6QDL_PAD_NANDF_D0__NAND_DATA00 0xb0b1
+ MX6QDL_PAD_NANDF_D1__NAND_DATA01 0xb0b1
+ MX6QDL_PAD_NANDF_D2__NAND_DATA02 0xb0b1
+ MX6QDL_PAD_NANDF_D3__NAND_DATA03 0xb0b1
+ MX6QDL_PAD_NANDF_D4__NAND_DATA04 0xb0b1
+ MX6QDL_PAD_NANDF_D5__NAND_DATA05 0xb0b1
+ MX6QDL_PAD_NANDF_D6__NAND_DATA06 0xb0b1
+ MX6QDL_PAD_NANDF_D7__NAND_DATA07 0xb0b1
+ >;
+ };
+
+ pinctrl_hdmi_cec: hdmicecgrp {
+ fsl,pins = <
+ MX6QDL_PAD_EIM_A21__GPIO2_IO17 0x1b8b1
+ >;
+ };
+
+ pinctrl_i2c2: i2c2grp {
+ fsl,pins = <
+ MX6QDL_PAD_KEY_COL3__I2C2_SCL 0x4001b8b1
+ MX6QDL_PAD_KEY_ROW3__I2C2_SDA 0x4001b8b1
+ >;
+ };
+
+ pinctrl_i2c3: i2c3grp {
+ fsl,pins = <
+ MX6QDL_PAD_GPIO_5__I2C3_SCL 0x4001f8b1
+ MX6QDL_PAD_GPIO_6__I2C3_SDA 0x4001f8b1
+ >;
+ };
+
+ pinctrl_i2c4: i2c4grp {
+ fsl,pins = <
+ MX6QDL_PAD_GPIO_7__I2C4_SCL 0x4001b8b1
+ MX6QDL_PAD_GPIO_8__I2C4_SDA 0x4001b8b1
+ >;
+ };
+
+ pinctrl_pwm3: pwm3grp {
+ fsl,pins = <
+ MX6QDL_PAD_SD4_DAT1__PWM3_OUT 0x1b0b1
+ >;
+ };
+
+ pinctrl_reg_backlight: regbacklightgrp {
+ fsl,pins = <
+ MX6QDL_PAD_ENET_REF_CLK__GPIO1_IO23 0x1b0b1
+ >;
+ };
+
+ pinctrl_reg_usbotg_vbus: regusbotgvbusgrp {
+ fsl,pins = <
+ MX6QDL_PAD_KEY_ROW4__GPIO4_IO15 0x1b0b1
+ >;
+ };
+
+ pinctrl_touch: touchgrp {
+ fsl,pins = <
+ MX6QDL_PAD_GPIO_2__GPIO1_IO02 0x1b0b0
+ MX6QDL_PAD_CSI0_PIXCLK__GPIO5_IO18 0x1b0b0
+ >;
+ };
+
+ pinctrl_uart1: uart1grp {
+ fsl,pins = <
+ MX6QDL_PAD_SD3_DAT7__UART1_TX_DATA 0x1b0b1
+ MX6QDL_PAD_SD3_DAT6__UART1_RX_DATA 0x1b0b1
+ >;
+ };
+
+ pinctrl_uart2: uart2grp {
+ fsl,pins = <
+ MX6QDL_PAD_SD4_DAT7__UART2_TX_DATA 0x1b0b1
+ MX6QDL_PAD_SD4_DAT4__UART2_RX_DATA 0x1b0b1
+ >;
+ };
+
+ pinctrl_uart3: uart3grp {
+ fsl,pins = <
+ MX6QDL_PAD_EIM_D24__UART3_TX_DATA 0x1b0b0
+ MX6QDL_PAD_EIM_D25__UART3_RX_DATA 0x1b0b0
+ >;
+ };
+
+ pinctrl_usbh1: usbh1grp {
+ fsl,pins = <
+ MX6QDL_PAD_GPIO_3__USB_H1_OC 0x1b0b1
+ MX6QDL_PAD_CSI0_MCLK__CCM_CLKO1 0x1b0b0
+ >;
+ };
+
+ pinctrl_usbotg: usbotggrp {
+ fsl,pins = <
+ MX6QDL_PAD_KEY_COL4__USB_OTG_OC 0x1b0b1
+ >;
+ };
+
+ pinctrl_usdhc3: usdhc3grp {
+ fsl,pins = <
+ MX6QDL_PAD_SD3_CMD__SD3_CMD 0x17059
+ MX6QDL_PAD_SD3_CLK__SD3_CLK 0x10059
+ MX6QDL_PAD_SD3_DAT0__SD3_DATA0 0x17059
+ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059
+ MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059
+ MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059
+ MX6QDL_PAD_GPIO_4__GPIO1_IO04 0x100b1
+ >;
+ };
+
+ pinctrl_wdog: wdoggrp {
+ fsl,pins = <
+ MX6QDL_PAD_GPIO_9__WDOG1_B 0x1b0b0
+ >;
+ };
+};
diff --git a/arch/arm/boot/dts/nxp/imx/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/nxp/imx/imx6dl-yapp4-common.dtsi
index 3be38a3c4bb1..c32ea040fecd 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6dl-yapp4-common.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6dl-yapp4-common.dtsi
@@ -117,17 +117,9 @@
#address-cells = <1>;
#size-cells = <0>;
- phy_port2: phy@1 {
- reg = <1>;
- };
-
- phy_port3: phy@2 {
- reg = <2>;
- };
-
switch@10 {
compatible = "qca,qca8334";
- reg = <10>;
+ reg = <0x10>;
reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>;
switch_ports: ports {
@@ -149,15 +141,30 @@
eth2: port@2 {
reg = <2>;
label = "eth2";
+ phy-mode = "internal";
phy-handle = <&phy_port2>;
};
eth1: port@3 {
reg = <3>;
label = "eth1";
+ phy-mode = "internal";
phy-handle = <&phy_port3>;
};
};
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy_port2: ethernet-phy@1 {
+ reg = <1>;
+ };
+
+ phy_port3: ethernet-phy@2 {
+ reg = <2>;
+ };
+ };
};
};
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval-v1.2.dts b/arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval-v1.2.dts
new file mode 100644
index 000000000000..15d4a98ee976
--- /dev/null
+++ b/arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval-v1.2.dts
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+/*
+ * Copyright 2024 Toradex
+ */
+
+/dts-v1/;
+
+#include "imx6q-apalis-eval.dtsi"
+
+/ {
+ model = "Toradex Apalis iMX6Q/D Module on Apalis Evaluation Board v1.2";
+ compatible = "toradex,apalis_imx6q-eval-v1.2", "toradex,apalis_imx6q",
+ "fsl,imx6q";
+
+ reg_3v3_mmc: regulator-3v3-mmc {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio2 0 GPIO_ACTIVE_HIGH>;
+ off-on-delay-us = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enable_3v3_mmc>;
+ regulator-max-microvolt = <3300000>;
+ regulator-min-microvolt = <3300000>;
+ regulator-name = "3.3V_MMC";
+ startup-delay-us = <10000>;
+ };
+
+ reg_3v3_sd: regulator-3v3-sd {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio2 1 GPIO_ACTIVE_HIGH>;
+ off-on-delay-us = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enable_3v3_sd>;
+ regulator-max-microvolt = <3300000>;
+ regulator-min-microvolt = <3300000>;
+ regulator-name = "3.3V_SD";
+ startup-delay-us = <10000>;
+ };
+
+ reg_can1: regulator-can1 {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio2 3 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enable_can1_power>;
+ regulator-name = "5V_SW_CAN1";
+ startup-delay-us = <10000>;
+ };
+
+ reg_can2: regulator-can2 {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio2 2 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enable_can2_power>;
+ regulator-name = "5V_SW_CAN2";
+ startup-delay-us = <10000>;
+ };
+
+ sound-carrier {
+ compatible = "simple-audio-card";
+ simple-audio-card,bitclock-master = <&codec_dai>;
+ simple-audio-card,format = "i2s";
+ simple-audio-card,frame-master = <&codec_dai>;
+ simple-audio-card,name = "apalis-nau8822";
+ simple-audio-card,routing =
+ "Headphones", "LHP",
+ "Headphones", "RHP",
+ "Speaker", "LSPK",
+ "Speaker", "RSPK",
+ "Line Out", "AUXOUT1",
+ "Line Out", "AUXOUT2",
+ "LAUX", "Line In",
+ "RAUX", "Line In",
+ "LMICP", "Mic In",
+ "RMICP", "Mic In";
+ simple-audio-card,widgets =
+ "Headphones", "Headphones",
+ "Line Out", "Line Out",
+ "Speaker", "Speaker",
+ "Microphone", "Mic In",
+ "Line", "Line In";
+
+ codec_dai: simple-audio-card,codec {
+ sound-dai = <&nau8822_1a>;
+ system-clock-frequency = <12288000>;
+ };
+
+ simple-audio-card,cpu {
+ sound-dai = <&ssi2>;
+ };
+ };
+};
+
+&can1 {
+ xceiver-supply = <&reg_can1>;
+ status = "okay";
+};
+
+&can2 {
+ xceiver-supply = <&reg_can2>;
+ status = "okay";
+};
+
+/* I2C1_SDA/SCL on MXM3 209/211 */
+&i2c1 {
+ /* Audio Codec */
+ nau8822_1a: audio-codec@1a {
+ compatible = "nuvoton,nau8822";
+ reg = <0x1a>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_nau8822>;
+ #sound-dai-cells = <0>;
+ };
+
+ /* Current measurement into module VCC */
+ hwmon@40 {
+ compatible = "ti,ina219";
+ reg = <0x40>;
+ shunt-resistor = <5000>;
+ };
+
+ /* Temperature Sensor */
+ temperature-sensor@4f {
+ compatible = "ti,tmp75c";
+ reg = <0x4f>;
+ };
+
+ /* EEPROM */
+ eeprom@57 {
+ compatible = "st,24c02", "atmel,24c02";
+ reg = <0x57>;
+ pagesize = <16>;
+ size = <256>;
+ };
+};
+
+&pcie {
+ status = "okay";
+};
+
+&ssi2 {
+ status = "okay";
+};
+
+/* MMC1 */
+&usdhc1 {
+ bus-width = <4>;
+ pinctrl-0 = <&pinctrl_usdhc1_4bit &pinctrl_mmc_cd>;
+ vmmc-supply = <&reg_3v3_mmc>;
+ status = "okay";
+};
+
+/* SD1 */
+&usdhc2 {
+ cd-gpios = <&gpio6 14 GPIO_ACTIVE_LOW>;
+ pinctrl-0 = <&pinctrl_usdhc2 &pinctrl_sd_cd>;
+ vmmc-supply = <&reg_3v3_sd>;
+ status = "okay";
+};
+
+&iomuxc {
+ pinctrl_enable_3v3_mmc: enable3v3mmcgrp {
+ fsl,pins = <
+ /* MMC1_PWR_CTRL */
+ MX6QDL_PAD_NANDF_D0__GPIO2_IO00 0x1b0b0
+ >;
+ };
+
+ pinctrl_enable_3v3_sd: enable3v3sdgrp {
+ fsl,pins = <
+ /* SD1_PWR_CTRL */
+ MX6QDL_PAD_NANDF_D1__GPIO2_IO01 0x1b0b0
+ >;
+ };
+
+ pinctrl_enable_can1_power: enablecan1powergrp {
+ fsl,pins = <
+ /* CAN1_PWR_EN */
+ MX6QDL_PAD_NANDF_D3__GPIO2_IO03 0x1b0b0
+ >;
+ };
+
+ pinctrl_enable_can2_power: enablecan2powergrp {
+ fsl,pins = <
+ /* CAN2_PWR_EN */
+ MX6QDL_PAD_NANDF_D2__GPIO2_IO02 0x1b0b0
+ >;
+ };
+
+ pinctrl_nau8822: nau8822grp {
+ fsl,pins = <
+ MX6QDL_PAD_DISP0_DAT16__AUD5_TXC 0x130b0
+ MX6QDL_PAD_DISP0_DAT17__AUD5_TXD 0x130b0
+ MX6QDL_PAD_DISP0_DAT18__AUD5_TXFS 0x130b0
+ MX6QDL_PAD_DISP0_DAT19__AUD5_RXD 0x130b0
+ >;
+ };
+};
diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval.dts b/arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval.dts
index 3fc079dfd61e..e1077e2da5f4 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval.dts
@@ -7,29 +7,13 @@
/dts-v1/;
-#include <dt-bindings/gpio/gpio.h>
-#include <dt-bindings/input/input.h>
-#include <dt-bindings/interrupt-controller/irq.h>
-#include "imx6q.dtsi"
-#include "imx6qdl-apalis.dtsi"
+#include "imx6q-apalis-eval.dtsi"
/ {
model = "Toradex Apalis iMX6Q/D Module on Apalis Evaluation Board";
compatible = "toradex,apalis_imx6q-eval", "toradex,apalis_imx6q",
"fsl,imx6q";
- aliases {
- i2c0 = &i2c1;
- i2c1 = &i2c3;
- i2c2 = &i2c2;
- rtc0 = &rtc_i2c;
- rtc1 = &snvs_rtc;
- };
-
- chosen {
- stdout-path = "serial0:115200n8";
- };
-
reg_pcie_switch: regulator-pcie-switch {
compatible = "regulator-fixed";
enable-active-high;
@@ -40,14 +24,6 @@
startup-delay-us = <100000>;
status = "okay";
};
-
- reg_3v3_sw: regulator-3v3-sw {
- compatible = "regulator-fixed";
- regulator-always-on;
- regulator-max-microvolt = <3300000>;
- regulator-min-microvolt = <3300000>;
- regulator-name = "3.3V_SW";
- };
};
&can1 {
@@ -62,102 +38,22 @@
/* I2C1_SDA/SCL on MXM3 209/211 (e.g. RTC on carrier board) */
&i2c1 {
- status = "okay";
-
+ /* PCIe Switch */
pcie-switch@58 {
compatible = "plx,pex8605";
reg = <0x58>;
};
-
- /* M41T0M6 real time clock on carrier board */
- rtc_i2c: rtc@68 {
- compatible = "st,m41t0";
- reg = <0x68>;
- };
-};
-
-/*
- * I2C3_SDA/SCL (CAM) on MXM3 pin 201/203 (e.g. camera sensor on carrier
- * board)
- */
-&i2c3 {
- status = "okay";
};
&pcie {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_reset_moci>;
- /* active-high meaning opposite of regular PERST# active-low polarity */
- reset-gpio = <&gpio1 28 GPIO_ACTIVE_HIGH>;
- reset-gpio-active-high;
vpcie-supply = <&reg_pcie_switch>;
status = "okay";
};
-&pwm1 {
- status = "okay";
-};
-
-&pwm2 {
- status = "okay";
-};
-
-&pwm3 {
- status = "okay";
-};
-
-&pwm4 {
- status = "okay";
-};
-
-&reg_usb_host_vbus {
- status = "okay";
-};
-
-&reg_usb_otg_vbus {
- status = "okay";
-};
-
-&sata {
- status = "okay";
-};
-
&sound_spdif {
status = "okay";
};
-&spdif {
- status = "okay";
-};
-
-&uart1 {
- status = "okay";
-};
-
-&uart2 {
- status = "okay";
-};
-
-&uart4 {
- status = "okay";
-};
-
-&uart5 {
- status = "okay";
-};
-
-&usbh1 {
- disable-over-current;
- vbus-supply = <&reg_usb_host_vbus>;
- status = "okay";
-};
-
-&usbotg {
- disable-over-current;
- vbus-supply = <&reg_usb_otg_vbus>;
- status = "okay";
-};
-
/* MMC1 */
&usdhc1 {
status = "okay";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval.dtsi b/arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval.dtsi
new file mode 100644
index 000000000000..b6c45ad3f430
--- /dev/null
+++ b/arch/arm/boot/dts/nxp/imx/imx6q-apalis-eval.dtsi
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+/*
+ * Copyright 2014-2024 Toradex
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include "imx6q.dtsi"
+#include "imx6qdl-apalis.dtsi"
+
+/ {
+ aliases {
+ i2c0 = &i2c1;
+ i2c1 = &i2c3;
+ i2c2 = &i2c2;
+ rtc0 = &rtc_i2c;
+ rtc1 = &snvs_rtc;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ reg_3v3_sw: regulator-3v3-sw {
+ compatible = "regulator-fixed";
+ regulator-always-on;
+ regulator-max-microvolt = <3300000>;
+ regulator-min-microvolt = <3300000>;
+ regulator-name = "3.3V_SW";
+ };
+};
+
+&i2c1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "okay";
+
+ /* M41T0M6 real time clock on carrier board */
+ rtc_i2c: rtc@68 {
+ compatible = "st,m41t0";
+ reg = <0x68>;
+ };
+};
+
+/*
+ * I2C3_SDA/SCL (CAM) on MXM3 pin 201/203 (e.g. camera sensor on carrier
+ * board)
+ */
+&i2c3 {
+ status = "okay";
+};
+
+&pcie {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reset_moci>;
+ /* active-high meaning opposite of regular PERST# active-low polarity */
+ reset-gpio = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+ reset-gpio-active-high;
+};
+
+&pwm1 {
+ status = "okay";
+};
+
+&pwm2 {
+ status = "okay";
+};
+
+&pwm3 {
+ status = "okay";
+};
+
+&pwm4 {
+ status = "okay";
+};
+
+&reg_usb_host_vbus {
+ status = "okay";
+};
+
+&reg_usb_otg_vbus {
+ status = "okay";
+};
+
+&sata {
+ status = "okay";
+};
+
+&spdif {
+ status = "okay";
+};
+
+&uart1 {
+ status = "okay";
+};
+
+&uart2 {
+ status = "okay";
+};
+
+&uart4 {
+ status = "okay";
+};
+
+&uart5 {
+ status = "okay";
+};
+
+&usbh1 {
+ disable-over-current;
+ vbus-supply = <&reg_usb_host_vbus>;
+ status = "okay";
+};
+
+&usbotg {
+ disable-over-current;
+ vbus-supply = <&reg_usb_otg_vbus>;
+ status = "okay";
+};
diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts b/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts
index db8c332df6a1..cad112e05475 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts
@@ -227,7 +227,6 @@
#address-cells = <3>;
#size-cells = <2>;
- #interrupt-cells = <1>;
bridge@2,1 {
compatible = "pci10b5,8605";
@@ -235,7 +234,6 @@
#address-cells = <3>;
#size-cells = <2>;
- #interrupt-cells = <1>;
/* Intel Corporation I210 Gigabit Network Connection */
ethernet@3,0 {
@@ -250,7 +248,6 @@
#address-cells = <3>;
#size-cells = <2>;
- #interrupt-cells = <1>;
/* Intel Corporation I210 Gigabit Network Connection */
switch_nic: ethernet@4,0 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi
index 99f4f6ac71d4..c1ae7c47b442 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi
@@ -245,6 +245,7 @@
reg = <0x74>;
gpio-controller;
#gpio-cells = <2>;
+ #interrupt-cells = <2>;
interrupt-controller;
interrupt-parent = <&gpio2>;
interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
@@ -390,7 +391,6 @@
#address-cells = <3>;
#size-cells = <2>;
- #interrupt-cells = <1>;
};
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi
index 2ae93f57fe5a..ea40623d12e5 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi
@@ -626,7 +626,6 @@
blocks = <0x5>;
id = <0>;
interrupts = <10 IRQ_TYPE_LEVEL_LOW>;
- interrupt-controller;
interrupt-parent = <&gpio4>;
irq-trigger = <0x1>;
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi
index 55c90f6393ad..d3a7a6eeb8e0 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi
@@ -550,7 +550,6 @@
blocks = <0x5>;
interrupts = <20 IRQ_TYPE_LEVEL_LOW>;
interrupt-parent = <&gpio6>;
- interrupt-controller;
id = <0>;
irq-trigger = <0x1>;
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi
index a63e73adc1fc..42b2ba23aefc 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi
@@ -225,7 +225,6 @@
pinctrl-0 = <&pinctrl_pmic>;
interrupt-parent = <&gpio2>;
interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
- interrupt-controller;
onkey {
compatible = "dlg,da9063-onkey";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-hummingboard.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-hummingboard.dtsi
index bfade7149080..a955c77cd499 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-hummingboard.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-hummingboard.dtsi
@@ -41,6 +41,11 @@
#include <dt-bindings/sound/fsl-imx-audmux.h>
/ {
+ aliases {
+ rtc0 = &carrier_rtc;
+ rtc1 = &snvs_rtc;
+ };
+
/* Will be filled by the bootloader */
memory@10000000 {
device_type = "memory";
@@ -187,7 +192,7 @@
status = "okay";
/* Pro baseboard model */
- rtc@68 {
+ carrier_rtc: rtc@68 {
compatible = "nxp,pcf8523";
reg = <0x68>;
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-hummingboard2.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-hummingboard2.dtsi
index 0883ef99cded..e6017f9bf640 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-hummingboard2.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-hummingboard2.dtsi
@@ -41,6 +41,11 @@
#include <dt-bindings/sound/fsl-imx-audmux.h>
/ {
+ aliases {
+ rtc0 = &pcf8523;
+ rtc1 = &snvs_rtc;
+ };
+
/* Will be filled by the bootloader */
memory@10000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi
index 113974520d54..c0c47adc5866 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi
@@ -124,6 +124,7 @@
reg = <0x58>;
interrupt-parent = <&gpio2>;
interrupts = <9 IRQ_TYPE_LEVEL_LOW>; /* active-low GPIO2_9 */
+ #interrupt-cells = <2>;
interrupt-controller;
regulators {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi
index 86b4269e0e01..85e278eb2016 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi
@@ -100,6 +100,7 @@
interrupt-parent = <&gpio1>;
interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
+ #interrupt-cells = <2>;
gpio-controller;
#gpio-cells = <2>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi
index 2731faede1cb..d59d5d0e1d19 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi
@@ -13,10 +13,14 @@
aliases {
can0 = &can1;
can1 = &can2;
+ ethernet0 = &fec;
+ ethernet1 = &lan1;
+ ethernet2 = &lan2;
mdio-gpio0 = &mdio;
nand = &gpmi;
rtc0 = &i2c_rtc;
rtc1 = &snvs;
+ switch0 = &switch;
usb0 = &usbh1;
usb1 = &usbotg;
};
@@ -60,7 +64,7 @@
gpios = <&gpio1 31 GPIO_ACTIVE_HIGH>,
<&gpio1 22 GPIO_ACTIVE_HIGH>;
- switch@0 {
+ switch: switch@0 {
compatible = "microchip,ksz8873";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_switch>;
@@ -73,13 +77,13 @@
#address-cells = <1>;
#size-cells = <0>;
- ports@0 {
+ lan1: ports@0 {
reg = <0>;
phy-mode = "internal";
label = "lan1";
};
- ports@1 {
+ lan2: ports@1 {
reg = <1>;
phy-mode = "internal";
label = "lan2";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl.dtsi
index 81142c523fa8..8431b8a994f4 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl.dtsi
@@ -1158,7 +1158,7 @@
status = "disabled";
};
- weim: weim@21b8000 {
+ weim: memory-controller@21b8000 {
#address-cells = <2>;
#size-cells = <1>;
compatible = "fsl,imx6q-weim";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6sl-tolino-shine2hd.dts b/arch/arm/boot/dts/nxp/imx/imx6sl-tolino-shine2hd.dts
index 815119c12bd4..5636fb3661e8 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6sl-tolino-shine2hd.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6sl-tolino-shine2hd.dts
@@ -141,8 +141,10 @@
interrupts = <6 IRQ_TYPE_EDGE_FALLING>;
vdd-supply = <&ldo1_reg>;
reset-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>;
- x-size = <1072>;
- y-size = <1448>;
+ touchscreen-size-x = <1072>;
+ touchscreen-size-y = <1448>;
+ touchscreen-swapped-x-y;
+ touchscreen-inverted-x;
};
/* TODO: TPS65185 PMIC for E Ink at 0x68 */
diff --git a/arch/arm/boot/dts/nxp/imx/imx6sl.dtsi b/arch/arm/boot/dts/nxp/imx/imx6sl.dtsi
index 28111efb19a6..6aa61235e39e 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6sl.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6sl.dtsi
@@ -949,7 +949,7 @@
clocks = <&clks IMX6SL_CLK_DUMMY>;
};
- weim: weim@21b8000 {
+ weim: memory-controller@21b8000 {
#address-cells = <2>;
#size-cells = <1>;
reg = <0x021b8000 0x4000>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi b/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi
index df3a375f0a3e..0de359d62a47 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi
@@ -1107,7 +1107,7 @@
status = "disabled";
};
- weim: weim@21b8000 {
+ weim: memory-controller@21b8000 {
#address-cells = <2>;
#size-cells = <1>;
compatible = "fsl,imx6sx-weim", "fsl,imx6q-weim";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-14x14-evk.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-14x14-evk.dtsi
index 2ac40d69425b..f10f0525490b 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-14x14-evk.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-14x14-evk.dtsi
@@ -321,7 +321,7 @@
&tsc {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_tsc>;
- xnur-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
+ xnur-gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
measure-delay-time = <0xffff>;
pre-charge-time = <0xfff>;
status = "okay";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
index 875ae699c5cb..2ca18f3dad0a 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
@@ -203,7 +203,7 @@
&tsc {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_tsc>;
- xnur-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
+ xnur-gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
};
&sai2 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6uldev.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6uldev.dtsi
index 18cac19aa9b0..af337f18a266 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6uldev.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6uldev.dtsi
@@ -156,7 +156,7 @@
&tsc {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_tsc>;
- xnur-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
+ xnur-gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
measure-delay-time = <0xffff>;
pre-charge-time = <0xffff>;
status = "okay";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi
index a27a7554c2e7..235aa676618b 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi
@@ -370,7 +370,7 @@
};
};
- tsc: tsc@2040000 {
+ tsc: touchscreen@2040000 {
compatible = "fsl,imx6ul-tsc";
reg = <0x02040000 0x4000>, <0x0219c000 0x4000>;
interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
@@ -538,6 +538,8 @@
fsl,num-rx-queues = <1>;
fsl,stop-mode = <&gpr 0x10 4>;
fsl,magic-packet;
+ nvmem-cells = <&fec2_mac_addr>;
+ nvmem-cell-names = "mac-address";
status = "disabled";
};
@@ -638,6 +640,7 @@
nvmem-cells = <&tempmon_calib>, <&tempmon_temp_grade>;
nvmem-cell-names = "calib", "temp_grade";
clocks = <&clks IMX6UL_CLK_PLL3_USB_OTG>;
+ #thermal-sensor-cells = <0>;
};
};
@@ -855,7 +858,6 @@
clocks = <&clks IMX6UL_CLK_USBOH3>;
fsl,usbphy = <&usbphy1>;
fsl,usbmisc = <&usbmisc 0>;
- fsl,anatop = <&anatop>;
ahb-burst-config = <0x0>;
tx-burst-size-dword = <0x10>;
rx-burst-size-dword = <0x10>;
@@ -897,6 +899,8 @@
fsl,num-rx-queues = <1>;
fsl,stop-mode = <&gpr 0x10 3>;
fsl,magic-packet;
+ nvmem-cells = <&fec1_mac_addr>;
+ nvmem-cell-names = "mac-address";
status = "disabled";
};
@@ -975,7 +979,7 @@
clocks = <&clks IMX6UL_CLK_MMDC_P0_IPG>;
};
- weim: weim@21b8000 {
+ weim: memory-controller@21b8000 {
#address-cells = <2>;
#size-cells = <1>;
compatible = "fsl,imx6ul-weim", "fsl,imx6q-weim";
@@ -1004,6 +1008,14 @@
cpu_speed_grade: speed-grade@10 {
reg = <0x10 4>;
};
+
+ fec1_mac_addr: mac-addr@88 {
+ reg = <0x88 6>;
+ };
+
+ fec2_mac_addr: mac-addr@8e {
+ reg = <0x8e 6>;
+ };
};
csi: csi@21c4000 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-dhcom-som-cfg-sdcard.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-dhcom-som-cfg-sdcard.dtsi
index 040421f9c970..5e39f8dc1351 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ull-dhcom-som-cfg-sdcard.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ull-dhcom-som-cfg-sdcard.dtsi
@@ -14,10 +14,12 @@
*/
/*
- * To use usdhc1 as SD card, the WiFi node must be deleted.
+ * To use usdhc1 as SD card, the WiFi node must be deleted. The associated
+ * pwrseq node is also deleted in order to ensure that GPIO H is released.
* BT is also not available, so remove BT from the UART node.
*/
/delete-node/ &brcmf;
+/delete-node/ &usdhc1_pwrseq;
/delete-node/ &bluetooth;
/ {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-dhcom-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-dhcom-som.dtsi
index 830b5a5064f2..a74f5273f9b3 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ull-dhcom-som.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ull-dhcom-som.dtsi
@@ -52,7 +52,7 @@
};
/* SoM with WiFi/BT: WiFi pin WL_REG_ON is connected to a DHCOM GPIO */
- /omit-if-no-ref/ usdhc1_pwrseq: usdhc1-pwrseq {
+ usdhc1_pwrseq: usdhc1-pwrseq {
compatible = "mmc-pwrseq-simple";
reset-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>; /* GPIO H */
};
@@ -273,7 +273,7 @@
pinctrl-names = "default";
pre-charge-time = <0xfff>;
touchscreen-average-samples = <32>;
- xnur-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
+ xnur-gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
};
/* DHCOM UART1 */
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-dhcor-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-dhcor-som.dtsi
index 45315adfaa86..75486e1b0c15 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ull-dhcor-som.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ull-dhcor-som.dtsi
@@ -28,10 +28,14 @@
/*
* Due to the design as a solderable SOM, there are no capacitors
* below the SoC, therefore higher voltages are required.
+ * Due to CPU lifetime consideration of the SoC manufacturer and
+ * the preferred area of operation in the industrial related
+ * environment, set the maximum frequency for each DHCOM i.MX6ULL
+ * to 792MHz, as with the industrial type.
*/
+ clock-frequency = <792000000>;
operating-points = <
/* kHz uV */
- 900000 1275000
792000 1250000 /* Voltage increased */
528000 1175000
396000 1025000
@@ -39,7 +43,6 @@
>;
fsl,soc-operating-points = <
/* KHz uV */
- 900000 1250000
792000 1250000 /* Voltage increased */
528000 1175000
396000 1175000
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi
index 44cc4ff1d0df..d12fb44aeb14 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi
@@ -116,7 +116,7 @@
tpm_tis: tpm@1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_tpm>;
- compatible = "tcg,tpm_tis-spi";
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
reg = <1>;
spi-max-frequency = <20000000>;
interrupt-parent = <&gpio5>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull.dtsi
index 2bccd45e9fc2..8a1776067ecc 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ull.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ull.dtsi
@@ -75,7 +75,7 @@
clocks = <&clks IMX6UL_CLK_DUMMY>;
};
- iomuxc_snvs: iomuxc-snvs@2290000 {
+ iomuxc_snvs: pinctrl@2290000 {
compatible = "fsl,imx6ull-iomuxc-snvs";
reg = <0x02290000 0x4000>;
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
index 3df6dff7734a..1235a71c6abe 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
@@ -18,6 +18,8 @@
mmc0 = &usdhc3;
mmc1 = &usdhc1;
/delete-property/ mmc2;
+ rtc0 = &ds1339;
+ rtc1 = &snvs_rtc;
};
beeper {
@@ -32,11 +34,18 @@
gpio_buttons: gpio-keys {
compatible = "gpio-keys";
+ /*
+ * NOTE: These buttons are attached to a GPIO-expander.
+ * Enabling wakeup-source, enables wakeup on all inputs.
+ * If PE_GPIO[3..6] are used as inputs, they cause a
+ * wakeup as well.
+ */
button-0 {
/* #SWITCH_A */
label = "S11";
linux,code = <KEY_1>;
gpios = <&pca9555 13 GPIO_ACTIVE_LOW>;
+ wakeup-source;
};
button-1 {
@@ -44,6 +53,7 @@
label = "S12";
linux,code = <KEY_2>;
gpios = <&pca9555 14 GPIO_ACTIVE_LOW>;
+ wakeup-source;
};
button-2 {
@@ -51,6 +61,7 @@
label = "S13";
linux,code = <KEY_3>;
gpios = <&pca9555 15 GPIO_ACTIVE_LOW>;
+ wakeup-source;
};
};
@@ -171,6 +182,14 @@
regulator-always-on;
};
+ reg_vcc_3v3: regulator-vcc-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "VCC3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
sound {
compatible = "fsl,imx-audio-tlv320aic32x4";
model = "imx-audio-tlv320aic32x4";
@@ -198,9 +217,9 @@
&ecspi1 {
pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_ecspi1>;
+ pinctrl-0 = <&pinctrl_ecspi1>, <&pinctrl_ecspi1_ss0>;
cs-gpios = <&gpio4 0 GPIO_ACTIVE_LOW>, <&gpio4 1 GPIO_ACTIVE_LOW>,
- <&gpio4 2 GPIO_ACTIVE_LOW>;
+ <&gpio4 2 GPIO_ACTIVE_LOW>, <&gpio4 19 GPIO_ACTIVE_LOW>;
status = "okay";
};
@@ -214,8 +233,6 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet1>;
phy-mode = "rgmii-id";
- phy-reset-gpios = <&gpio7 15 GPIO_ACTIVE_LOW>;
- phy-reset-duration = <1>;
phy-supply = <&reg_fec1_pwdn>;
phy-handle = <&ethphy1_0>;
fsl,magic-packet;
@@ -228,10 +245,15 @@
ethphy1_0: ethernet-phy@0 {
compatible = "ethernet-phy-ieee802.3-c22";
reg = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enet1_phy>;
ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_50_NS>;
ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_50_NS>;
ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
ti,clk-output-sel = <DP83867_CLK_O_SEL_OFF>;
+ reset-gpios = <&gpio7 15 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <1000>;
+ reset-deassert-us = <500>;
};
};
};
@@ -290,13 +312,17 @@
lm75: temperature-sensor@49 {
compatible = "national,lm75";
reg = <0x49>;
+ vs-supply = <&reg_vcc_3v3>;
};
};
&i2c2 {
clock-frequency = <100000>;
- pinctrl-names = "default";
+ pinctrl-names = "default", "gpio";
pinctrl-0 = <&pinctrl_i2c2>;
+ pinctrl-1 = <&pinctrl_i2c2_recovery>;
+ scl-gpios = <&gpio4 10 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ sda-gpios = <&gpio4 11 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
status = "okay";
tlv320aic32x4: audio-codec@18 {
@@ -319,13 +345,17 @@
interrupts = <12 IRQ_TYPE_EDGE_FALLING>;
interrupt-controller;
#interrupt-cells = <2>;
+ vcc-supply = <&reg_vcc_3v3>;
};
};
&i2c3 {
clock-frequency = <100000>;
- pinctrl-names = "default";
+ pinctrl-names = "default", "gpio";
pinctrl-0 = <&pinctrl_i2c3>;
+ pinctrl-1 = <&pinctrl_i2c3_recovery>;
+ scl-gpios = <&gpio4 12 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ sda-gpios = <&gpio4 13 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
status = "okay";
};
@@ -334,213 +364,213 @@
pinctrl-0 = <&pinctrl_hog_mba7_1>;
pinctrl_ecspi1: ecspi1grp {
+ fsl,pins =
+ <MX7D_PAD_ECSPI1_MISO__ECSPI1_MISO 0x7c>,
+ <MX7D_PAD_ECSPI1_MOSI__ECSPI1_MOSI 0x74>,
+ <MX7D_PAD_ECSPI1_SCLK__ECSPI1_SCLK 0x74>,
+ <MX7D_PAD_UART1_RX_DATA__GPIO4_IO0 0x74>,
+ <MX7D_PAD_UART1_TX_DATA__GPIO4_IO1 0x74>,
+ <MX7D_PAD_UART2_RX_DATA__GPIO4_IO2 0x74>;
+ };
+
+ pinctrl_ecspi1_ss0: ecspi1ss0grp {
fsl,pins = <
- MX7D_PAD_ECSPI1_MISO__ECSPI1_MISO 0x7c
- MX7D_PAD_ECSPI1_MOSI__ECSPI1_MOSI 0x74
- MX7D_PAD_ECSPI1_SCLK__ECSPI1_SCLK 0x74
- MX7D_PAD_UART1_RX_DATA__GPIO4_IO0 0x74
- MX7D_PAD_UART1_TX_DATA__GPIO4_IO1 0x74
- MX7D_PAD_UART2_RX_DATA__GPIO4_IO2 0x74
+ MX7D_PAD_ECSPI1_SS0__GPIO4_IO19 0x74
>;
};
pinctrl_ecspi2: ecspi2grp {
- fsl,pins = <
- MX7D_PAD_ECSPI2_MISO__ECSPI2_MISO 0x7c
- MX7D_PAD_ECSPI2_MOSI__ECSPI2_MOSI 0x74
- MX7D_PAD_ECSPI2_SCLK__ECSPI2_SCLK 0x74
- MX7D_PAD_ECSPI2_SS0__ECSPI2_SS0 0x74
- >;
+ fsl,pins =
+ <MX7D_PAD_ECSPI2_MISO__ECSPI2_MISO 0x7c>,
+ <MX7D_PAD_ECSPI2_MOSI__ECSPI2_MOSI 0x74>,
+ <MX7D_PAD_ECSPI2_SCLK__ECSPI2_SCLK 0x74>,
+ <MX7D_PAD_ECSPI2_SS0__ECSPI2_SS0 0x74>;
};
pinctrl_enet1: enet1grp {
- fsl,pins = <
- MX7D_PAD_GPIO1_IO10__ENET1_MDIO 0x02
- MX7D_PAD_GPIO1_IO11__ENET1_MDC 0x00
- MX7D_PAD_ENET1_RGMII_TXC__ENET1_RGMII_TXC 0x71
- MX7D_PAD_ENET1_RGMII_TD0__ENET1_RGMII_TD0 0x71
- MX7D_PAD_ENET1_RGMII_TD1__ENET1_RGMII_TD1 0x71
- MX7D_PAD_ENET1_RGMII_TD2__ENET1_RGMII_TD2 0x71
- MX7D_PAD_ENET1_RGMII_TD3__ENET1_RGMII_TD3 0x71
- MX7D_PAD_ENET1_RGMII_TX_CTL__ENET1_RGMII_TX_CTL 0x71
- MX7D_PAD_ENET1_RGMII_RXC__ENET1_RGMII_RXC 0x79
- MX7D_PAD_ENET1_RGMII_RD0__ENET1_RGMII_RD0 0x79
- MX7D_PAD_ENET1_RGMII_RD1__ENET1_RGMII_RD1 0x79
- MX7D_PAD_ENET1_RGMII_RD2__ENET1_RGMII_RD2 0x79
- MX7D_PAD_ENET1_RGMII_RD3__ENET1_RGMII_RD3 0x79
- MX7D_PAD_ENET1_RGMII_RX_CTL__ENET1_RGMII_RX_CTL 0x79
+ fsl,pins =
+ <MX7D_PAD_GPIO1_IO10__ENET1_MDIO 0x02>,
+ <MX7D_PAD_GPIO1_IO11__ENET1_MDC 0x00>,
+ <MX7D_PAD_ENET1_RGMII_TXC__ENET1_RGMII_TXC 0x71>,
+ <MX7D_PAD_ENET1_RGMII_TD0__ENET1_RGMII_TD0 0x71>,
+ <MX7D_PAD_ENET1_RGMII_TD1__ENET1_RGMII_TD1 0x71>,
+ <MX7D_PAD_ENET1_RGMII_TD2__ENET1_RGMII_TD2 0x71>,
+ <MX7D_PAD_ENET1_RGMII_TD3__ENET1_RGMII_TD3 0x71>,
+ <MX7D_PAD_ENET1_RGMII_TX_CTL__ENET1_RGMII_TX_CTL 0x71>,
+ <MX7D_PAD_ENET1_RGMII_RXC__ENET1_RGMII_RXC 0x79>,
+ <MX7D_PAD_ENET1_RGMII_RD0__ENET1_RGMII_RD0 0x79>,
+ <MX7D_PAD_ENET1_RGMII_RD1__ENET1_RGMII_RD1 0x79>,
+ <MX7D_PAD_ENET1_RGMII_RD2__ENET1_RGMII_RD2 0x79>,
+ <MX7D_PAD_ENET1_RGMII_RD3__ENET1_RGMII_RD3 0x79>,
+ <MX7D_PAD_ENET1_RGMII_RX_CTL__ENET1_RGMII_RX_CTL 0x79>;
+ };
+
+ pinctrl_enet1_phy: enet1phygrp {
+ fsl,pins =
/* Reset: SION, 100kPU, SRE_FAST, DSE_X1 */
- MX7D_PAD_ENET1_COL__GPIO7_IO15 0x40000070
+ <MX7D_PAD_ENET1_COL__GPIO7_IO15 0x40000070>,
/* INT/PWDN: SION, 100kPU, HYS, SRE_FAST, DSE_X1 */
- MX7D_PAD_GPIO1_IO09__GPIO1_IO9 0x40000078
- >;
+ <MX7D_PAD_GPIO1_IO09__GPIO1_IO9 0x40000078>;
};
pinctrl_flexcan1: flexcan1grp {
- fsl,pins = <
- MX7D_PAD_GPIO1_IO12__FLEXCAN1_RX 0x5a
- MX7D_PAD_GPIO1_IO13__FLEXCAN1_TX 0x52
- >;
+ fsl,pins =
+ <MX7D_PAD_GPIO1_IO12__FLEXCAN1_RX 0x5a>,
+ <MX7D_PAD_GPIO1_IO13__FLEXCAN1_TX 0x52>;
};
pinctrl_flexcan2: flexcan2grp {
- fsl,pins = <
- MX7D_PAD_GPIO1_IO14__FLEXCAN2_RX 0x5a
- MX7D_PAD_GPIO1_IO15__FLEXCAN2_TX 0x52
- >;
+ fsl,pins =
+ <MX7D_PAD_GPIO1_IO14__FLEXCAN2_RX 0x5a>,
+ <MX7D_PAD_GPIO1_IO15__FLEXCAN2_TX 0x52>;
};
pinctrl_hog_mba7_1: hogmba71grp {
- fsl,pins = <
+ fsl,pins =
/* Limitation: WDOG2_B / WDOG2_RESET not usable */
- MX7D_PAD_ENET1_RX_CLK__GPIO7_IO13 0x4000007c
- MX7D_PAD_ENET1_CRS__GPIO7_IO14 0x40000074
+ <MX7D_PAD_ENET1_RX_CLK__GPIO7_IO13 0x4000007c>,
+ <MX7D_PAD_ENET1_CRS__GPIO7_IO14 0x40000074>,
/* #BOOT_EN */
- MX7D_PAD_UART2_TX_DATA__GPIO4_IO3 0x40000010
- >;
+ <MX7D_PAD_UART2_TX_DATA__GPIO4_IO3 0x40000010>;
};
pinctrl_i2c2: i2c2grp {
- fsl,pins = <
- MX7D_PAD_I2C2_SCL__I2C2_SCL 0x40000078
- MX7D_PAD_I2C2_SDA__I2C2_SDA 0x40000078
- >;
+ fsl,pins =
+ <MX7D_PAD_I2C2_SCL__I2C2_SCL 0x40000078>,
+ <MX7D_PAD_I2C2_SDA__I2C2_SDA 0x40000078>;
+ };
+
+ pinctrl_i2c2_recovery: i2c2recoverygrp {
+ fsl,pins =
+ <MX7D_PAD_I2C2_SCL__GPIO4_IO10 0x40000078>,
+ <MX7D_PAD_I2C2_SDA__GPIO4_IO11 0x40000078>;
};
pinctrl_i2c3: i2c3grp {
- fsl,pins = <
- MX7D_PAD_I2C3_SCL__I2C3_SCL 0x40000078
- MX7D_PAD_I2C3_SDA__I2C3_SDA 0x40000078
- >;
+ fsl,pins =
+ <MX7D_PAD_I2C3_SCL__I2C3_SCL 0x40000078>,
+ <MX7D_PAD_I2C3_SDA__I2C3_SDA 0x40000078>;
+ };
+
+ pinctrl_i2c3_recovery: i2c3recoverygrp {
+ fsl,pins =
+ <MX7D_PAD_I2C3_SCL__GPIO4_IO12 0x40000078>,
+ <MX7D_PAD_I2C3_SDA__GPIO4_IO13 0x40000078>;
};
pinctrl_pca9555: pca95550grp {
- fsl,pins = <
- MX7D_PAD_ENET1_TX_CLK__GPIO7_IO12 0x78
- >;
+ fsl,pins =
+ <MX7D_PAD_ENET1_TX_CLK__GPIO7_IO12 0x78>;
};
pinctrl_sai1: sai1grp {
- fsl,pins = <
- MX7D_PAD_SAI1_MCLK__SAI1_MCLK 0x11
- MX7D_PAD_SAI1_RX_BCLK__SAI1_RX_BCLK 0x1c
- MX7D_PAD_SAI1_RX_DATA__SAI1_RX_DATA0 0x1c
- MX7D_PAD_SAI1_RX_SYNC__SAI2_RX_SYNC 0x1c
-
- MX7D_PAD_SAI1_TX_BCLK__SAI1_TX_BCLK 0x1c
- MX7D_PAD_SAI1_TX_DATA__SAI1_TX_DATA0 0x14
- MX7D_PAD_SAI1_TX_SYNC__SAI1_TX_SYNC 0x14
- >;
+ fsl,pins =
+ <MX7D_PAD_SAI1_MCLK__SAI1_MCLK 0x11>,
+ <MX7D_PAD_SAI1_RX_BCLK__SAI1_RX_BCLK 0x1c>,
+ <MX7D_PAD_SAI1_RX_DATA__SAI1_RX_DATA0 0x1c>,
+ <MX7D_PAD_SAI1_RX_SYNC__SAI2_RX_SYNC 0x1c>,
+
+ <MX7D_PAD_SAI1_TX_BCLK__SAI1_TX_BCLK 0x1c>,
+ <MX7D_PAD_SAI1_TX_DATA__SAI1_TX_DATA0 0x14>,
+ <MX7D_PAD_SAI1_TX_SYNC__SAI1_TX_SYNC 0x14>;
};
pinctrl_uart3: uart3grp {
- fsl,pins = <
- MX7D_PAD_UART3_RX_DATA__UART3_DCE_RX 0x7e
- MX7D_PAD_UART3_TX_DATA__UART3_DCE_TX 0x76
- MX7D_PAD_UART3_CTS_B__UART3_DCE_CTS 0x76
- MX7D_PAD_UART3_RTS_B__UART3_DCE_RTS 0x7e
- >;
+ fsl,pins =
+ <MX7D_PAD_UART3_RX_DATA__UART3_DCE_RX 0x7e>,
+ <MX7D_PAD_UART3_TX_DATA__UART3_DCE_TX 0x76>,
+ <MX7D_PAD_UART3_CTS_B__UART3_DCE_CTS 0x76>,
+ <MX7D_PAD_UART3_RTS_B__UART3_DCE_RTS 0x7e>;
};
pinctrl_uart4: uart4grp {
- fsl,pins = <
- MX7D_PAD_SAI2_TX_SYNC__UART4_DCE_RX 0x7e
- MX7D_PAD_SAI2_TX_BCLK__UART4_DCE_TX 0x76
- MX7D_PAD_SAI2_RX_DATA__UART4_DCE_CTS 0x76
- MX7D_PAD_SAI2_TX_DATA__UART4_DCE_RTS 0x7e
- >;
+ fsl,pins =
+ <MX7D_PAD_SAI2_TX_SYNC__UART4_DCE_RX 0x7e>,
+ <MX7D_PAD_SAI2_TX_BCLK__UART4_DCE_TX 0x76>,
+ <MX7D_PAD_SAI2_RX_DATA__UART4_DCE_CTS 0x76>,
+ <MX7D_PAD_SAI2_TX_DATA__UART4_DCE_RTS 0x7e>;
};
pinctrl_uart5: uart5grp {
- fsl,pins = <
- MX7D_PAD_I2C4_SCL__UART5_DCE_RX 0x7e
- MX7D_PAD_I2C4_SDA__UART5_DCE_TX 0x76
- >;
+ fsl,pins =
+ <MX7D_PAD_I2C4_SCL__UART5_DCE_RX 0x7e>,
+ <MX7D_PAD_I2C4_SDA__UART5_DCE_TX 0x76>;
};
pinctrl_uart6: uart6grp {
- fsl,pins = <
- MX7D_PAD_EPDC_DATA08__UART6_DCE_RX 0x7d
- MX7D_PAD_EPDC_DATA09__UART6_DCE_TX 0x75
- MX7D_PAD_EPDC_DATA11__UART6_DCE_CTS 0x75
- MX7D_PAD_EPDC_DATA10__UART6_DCE_RTS 0x7d
- >;
+ fsl,pins =
+ <MX7D_PAD_EPDC_DATA08__UART6_DCE_RX 0x7d>,
+ <MX7D_PAD_EPDC_DATA09__UART6_DCE_TX 0x75>,
+ <MX7D_PAD_EPDC_DATA11__UART6_DCE_CTS 0x75>,
+ <MX7D_PAD_EPDC_DATA10__UART6_DCE_RTS 0x7d>;
};
pinctrl_uart7: uart7grp {
- fsl,pins = <
- MX7D_PAD_EPDC_DATA12__UART7_DCE_RX 0x7e
- MX7D_PAD_EPDC_DATA13__UART7_DCE_TX 0x76
- MX7D_PAD_EPDC_DATA15__UART7_DCE_CTS 0x76
+ fsl,pins =
+ <MX7D_PAD_EPDC_DATA12__UART7_DCE_RX 0x7e>,
+ <MX7D_PAD_EPDC_DATA13__UART7_DCE_TX 0x76>,
+ <MX7D_PAD_EPDC_DATA15__UART7_DCE_CTS 0x76>,
/* Limitation: RTS is not connected */
- MX7D_PAD_EPDC_DATA14__UART7_DCE_RTS 0x7e
- >;
+ <MX7D_PAD_EPDC_DATA14__UART7_DCE_RTS 0x7e>;
};
- pinctrl_usdhc1_gpio: usdhc1grp_gpio {
- fsl,pins = <
+ pinctrl_usdhc1_gpio: usdhc1_gpiogrp {
+ fsl,pins =
/* WP */
- MX7D_PAD_SD1_WP__GPIO5_IO1 0x7c
+ <MX7D_PAD_SD1_WP__GPIO5_IO1 0x7c>,
/* CD */
- MX7D_PAD_SD1_CD_B__GPIO5_IO0 0x7c
+ <MX7D_PAD_SD1_CD_B__GPIO5_IO0 0x7c>,
/* VSELECT */
- MX7D_PAD_GPIO1_IO08__SD1_VSELECT 0x59
- >;
+ <MX7D_PAD_GPIO1_IO08__SD1_VSELECT 0x59>;
};
pinctrl_usdhc1: usdhc1grp {
- fsl,pins = <
- MX7D_PAD_SD1_CMD__SD1_CMD 0x5e
- MX7D_PAD_SD1_CLK__SD1_CLK 0x57
- MX7D_PAD_SD1_DATA0__SD1_DATA0 0x5e
- MX7D_PAD_SD1_DATA1__SD1_DATA1 0x5e
- MX7D_PAD_SD1_DATA2__SD1_DATA2 0x5e
- MX7D_PAD_SD1_DATA3__SD1_DATA3 0x5e
- >;
- };
-
- pinctrl_usdhc1_100mhz: usdhc1grp_100mhz {
- fsl,pins = <
- MX7D_PAD_SD1_CMD__SD1_CMD 0x5a
- MX7D_PAD_SD1_CLK__SD1_CLK 0x57
- MX7D_PAD_SD1_DATA0__SD1_DATA0 0x5a
- MX7D_PAD_SD1_DATA1__SD1_DATA1 0x5a
- MX7D_PAD_SD1_DATA2__SD1_DATA2 0x5a
- MX7D_PAD_SD1_DATA3__SD1_DATA3 0x5a
- >;
- };
-
- pinctrl_usdhc1_200mhz: usdhc1grp_200mhz {
- fsl,pins = <
- MX7D_PAD_SD1_CMD__SD1_CMD 0x5b
- MX7D_PAD_SD1_CLK__SD1_CLK 0x57
- MX7D_PAD_SD1_DATA0__SD1_DATA0 0x5b
- MX7D_PAD_SD1_DATA1__SD1_DATA1 0x5b
- MX7D_PAD_SD1_DATA2__SD1_DATA2 0x5b
- MX7D_PAD_SD1_DATA3__SD1_DATA3 0x5b
- >;
+ fsl,pins =
+ <MX7D_PAD_SD1_CMD__SD1_CMD 0x5e>,
+ <MX7D_PAD_SD1_CLK__SD1_CLK 0x57>,
+ <MX7D_PAD_SD1_DATA0__SD1_DATA0 0x5e>,
+ <MX7D_PAD_SD1_DATA1__SD1_DATA1 0x5e>,
+ <MX7D_PAD_SD1_DATA2__SD1_DATA2 0x5e>,
+ <MX7D_PAD_SD1_DATA3__SD1_DATA3 0x5e>;
+ };
+
+ pinctrl_usdhc1_100mhz: usdhc1_100mhzgrp {
+ fsl,pins =
+ <MX7D_PAD_SD1_CMD__SD1_CMD 0x5a>,
+ <MX7D_PAD_SD1_CLK__SD1_CLK 0x57>,
+ <MX7D_PAD_SD1_DATA0__SD1_DATA0 0x5a>,
+ <MX7D_PAD_SD1_DATA1__SD1_DATA1 0x5a>,
+ <MX7D_PAD_SD1_DATA2__SD1_DATA2 0x5a>,
+ <MX7D_PAD_SD1_DATA3__SD1_DATA3 0x5a>;
+ };
+
+ pinctrl_usdhc1_200mhz: usdhc1_200mhzgrp {
+ fsl,pins =
+ <MX7D_PAD_SD1_CMD__SD1_CMD 0x5b>,
+ <MX7D_PAD_SD1_CLK__SD1_CLK 0x57>,
+ <MX7D_PAD_SD1_DATA0__SD1_DATA0 0x5b>,
+ <MX7D_PAD_SD1_DATA1__SD1_DATA1 0x5b>,
+ <MX7D_PAD_SD1_DATA2__SD1_DATA2 0x5b>,
+ <MX7D_PAD_SD1_DATA3__SD1_DATA3 0x5b>;
};
};
&iomuxc_lpsr {
pinctrl_pwm1: pwm1grp {
- fsl,pins = <
+ fsl,pins =
/* LCD_CONTRAST */
- MX7D_PAD_LPSR_GPIO1_IO01__PWM1_OUT 0x50
- >;
+ <MX7D_PAD_LPSR_GPIO1_IO01__PWM1_OUT 0x50>;
};
pinctrl_usbotg1: usbotg1grp {
- fsl,pins = <
- MX7D_PAD_LPSR_GPIO1_IO04__USB_OTG1_OC 0x5c
- MX7D_PAD_LPSR_GPIO1_IO05__GPIO1_IO5 0x59
- >;
+ fsl,pins =
+ <MX7D_PAD_LPSR_GPIO1_IO04__USB_OTG1_OC 0x5c>,
+ <MX7D_PAD_LPSR_GPIO1_IO05__GPIO1_IO5 0x59>;
};
pinctrl_wdog1: wdog1grp {
- fsl,pins = <
- MX7D_PAD_LPSR_GPIO1_IO00__WDOG1_WDOG_B 0x30
- >;
+ fsl,pins =
+ <MX7D_PAD_LPSR_GPIO1_IO00__WDOG1_WDOG_B 0x30>;
};
};
@@ -560,6 +590,10 @@
status = "okay";
};
+&snvs_pwrkey {
+ status = "okay";
+};
+
&uart3 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_uart3>;
@@ -605,6 +639,7 @@
};
&usbh {
+ disable-over-current;
status = "okay";
};
@@ -630,6 +665,8 @@
vmmc-supply = <&reg_sd1_vmmc>;
bus-width = <4>;
no-1-8-v;
+ no-sdio;
+ no-emmc;
status = "okay";
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7-tqma7.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-tqma7.dtsi
index 3fc3130f9def..028961eb7108 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7-tqma7.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7-tqma7.dtsi
@@ -30,8 +30,11 @@
};
&i2c1 {
- pinctrl-names = "default";
+ pinctrl-names = "default", "gpio";
pinctrl-0 = <&pinctrl_i2c1>;
+ pinctrl-1 = <&pinctrl_i2c1_recovery>;
+ scl-gpios = <&gpio4 8 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ sda-gpios = <&gpio4 9 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
clock-frequency = <100000>;
status = "okay";
@@ -109,7 +112,7 @@
};
vgen4_reg: v33 {
- regulator-min-microvolt = <2850000>;
+ regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
regulator-always-on;
};
@@ -135,7 +138,7 @@
};
/* NXP SE97BTP with temperature sensor + eeprom, TQMa7x 02xx */
- se97b: temperature-sensor-eeprom@1e {
+ se97b: temperature-sensor@1e {
compatible = "nxp,se97b", "jedec,jc-42.4-temp";
reg = <0x1e>;
};
@@ -143,15 +146,18 @@
/* ST M24C64 */
m24c64: eeprom@50 {
compatible = "atmel,24c64";
+ read-only;
reg = <0x50>;
pagesize = <32>;
+ vcc-supply = <&vgen4_reg>;
status = "okay";
};
at24c02: eeprom@56 {
- compatible = "atmel,24c02";
+ compatible = "nxp,se97b", "atmel,24c02";
reg = <0x56>;
pagesize = <16>;
+ vcc-supply = <&vgen4_reg>;
status = "okay";
};
@@ -163,91 +169,89 @@
&iomuxc {
pinctrl_i2c1: i2c1grp {
- fsl,pins = <
- MX7D_PAD_I2C1_SDA__I2C1_SDA 0x40000078
- MX7D_PAD_I2C1_SCL__I2C1_SCL 0x40000078
- >;
+ fsl,pins =
+ <MX7D_PAD_I2C1_SDA__I2C1_SDA 0x40000078>,
+ <MX7D_PAD_I2C1_SCL__I2C1_SCL 0x40000078>;
+ };
+
+ pinctrl_i2c1_recovery: i2c1recoverygrp {
+ fsl,pins =
+ <MX7D_PAD_I2C1_SDA__GPIO4_IO9 0x40000078>,
+ <MX7D_PAD_I2C1_SCL__GPIO4_IO8 0x40000078>;
};
pinctrl_pmic1: pmic1grp {
- fsl,pins = <
- MX7D_PAD_SD2_RESET_B__GPIO5_IO11 0x4000005C
- >;
+ fsl,pins =
+ <MX7D_PAD_SD2_RESET_B__GPIO5_IO11 0x4000005C>;
};
pinctrl_qspi: qspigrp {
- fsl,pins = <
- MX7D_PAD_EPDC_DATA00__QSPI_A_DATA0 0x5A
- MX7D_PAD_EPDC_DATA01__QSPI_A_DATA1 0x5A
- MX7D_PAD_EPDC_DATA02__QSPI_A_DATA2 0x5A
- MX7D_PAD_EPDC_DATA03__QSPI_A_DATA3 0x5A
- MX7D_PAD_EPDC_DATA05__QSPI_A_SCLK 0x11
- MX7D_PAD_EPDC_DATA06__QSPI_A_SS0_B 0x54
- MX7D_PAD_EPDC_DATA07__QSPI_A_SS1_B 0x54
- >;
+ fsl,pins =
+ <MX7D_PAD_EPDC_DATA00__QSPI_A_DATA0 0x5A>,
+ <MX7D_PAD_EPDC_DATA01__QSPI_A_DATA1 0x5A>,
+ <MX7D_PAD_EPDC_DATA02__QSPI_A_DATA2 0x5A>,
+ <MX7D_PAD_EPDC_DATA03__QSPI_A_DATA3 0x5A>,
+ <MX7D_PAD_EPDC_DATA05__QSPI_A_SCLK 0x11>,
+ <MX7D_PAD_EPDC_DATA06__QSPI_A_SS0_B 0x54>,
+ <MX7D_PAD_EPDC_DATA07__QSPI_A_SS1_B 0x54>;
};
pinctrl_qspi_reset: qspi_resetgrp {
- fsl,pins = <
+ fsl,pins =
/* #QSPI_RESET */
- MX7D_PAD_EPDC_DATA04__GPIO2_IO4 0x52
- >;
+ <MX7D_PAD_EPDC_DATA04__GPIO2_IO4 0x52>;
};
pinctrl_usdhc3: usdhc3grp {
- fsl,pins = <
- MX7D_PAD_SD3_CMD__SD3_CMD 0x59
- MX7D_PAD_SD3_CLK__SD3_CLK 0x56
- MX7D_PAD_SD3_DATA0__SD3_DATA0 0x59
- MX7D_PAD_SD3_DATA1__SD3_DATA1 0x59
- MX7D_PAD_SD3_DATA2__SD3_DATA2 0x59
- MX7D_PAD_SD3_DATA3__SD3_DATA3 0x59
- MX7D_PAD_SD3_DATA4__SD3_DATA4 0x59
- MX7D_PAD_SD3_DATA5__SD3_DATA5 0x59
- MX7D_PAD_SD3_DATA6__SD3_DATA6 0x59
- MX7D_PAD_SD3_DATA7__SD3_DATA7 0x59
- MX7D_PAD_SD3_STROBE__SD3_STROBE 0x19
- >;
+ fsl,pins =
+ <MX7D_PAD_SD3_CMD__SD3_CMD 0x59>,
+ <MX7D_PAD_SD3_CLK__SD3_CLK 0x56>,
+ <MX7D_PAD_SD3_DATA0__SD3_DATA0 0x59>,
+ <MX7D_PAD_SD3_DATA1__SD3_DATA1 0x59>,
+ <MX7D_PAD_SD3_DATA2__SD3_DATA2 0x59>,
+ <MX7D_PAD_SD3_DATA3__SD3_DATA3 0x59>,
+ <MX7D_PAD_SD3_DATA4__SD3_DATA4 0x59>,
+ <MX7D_PAD_SD3_DATA5__SD3_DATA5 0x59>,
+ <MX7D_PAD_SD3_DATA6__SD3_DATA6 0x59>,
+ <MX7D_PAD_SD3_DATA7__SD3_DATA7 0x59>,
+ <MX7D_PAD_SD3_STROBE__SD3_STROBE 0x19>;
};
- pinctrl_usdhc3_100mhz: usdhc3grp_100mhz {
- fsl,pins = <
- MX7D_PAD_SD3_CMD__SD3_CMD 0x5a
- MX7D_PAD_SD3_CLK__SD3_CLK 0x51
- MX7D_PAD_SD3_DATA0__SD3_DATA0 0x5a
- MX7D_PAD_SD3_DATA1__SD3_DATA1 0x5a
- MX7D_PAD_SD3_DATA2__SD3_DATA2 0x5a
- MX7D_PAD_SD3_DATA3__SD3_DATA3 0x5a
- MX7D_PAD_SD3_DATA4__SD3_DATA4 0x5a
- MX7D_PAD_SD3_DATA5__SD3_DATA5 0x5a
- MX7D_PAD_SD3_DATA6__SD3_DATA6 0x5a
- MX7D_PAD_SD3_DATA7__SD3_DATA7 0x5a
- MX7D_PAD_SD3_STROBE__SD3_STROBE 0x1a
- >;
+ pinctrl_usdhc3_100mhz: usdhc3_100mhzgrp {
+ fsl,pins =
+ <MX7D_PAD_SD3_CMD__SD3_CMD 0x5a>,
+ <MX7D_PAD_SD3_CLK__SD3_CLK 0x51>,
+ <MX7D_PAD_SD3_DATA0__SD3_DATA0 0x5a>,
+ <MX7D_PAD_SD3_DATA1__SD3_DATA1 0x5a>,
+ <MX7D_PAD_SD3_DATA2__SD3_DATA2 0x5a>,
+ <MX7D_PAD_SD3_DATA3__SD3_DATA3 0x5a>,
+ <MX7D_PAD_SD3_DATA4__SD3_DATA4 0x5a>,
+ <MX7D_PAD_SD3_DATA5__SD3_DATA5 0x5a>,
+ <MX7D_PAD_SD3_DATA6__SD3_DATA6 0x5a>,
+ <MX7D_PAD_SD3_DATA7__SD3_DATA7 0x5a>,
+ <MX7D_PAD_SD3_STROBE__SD3_STROBE 0x1a>;
};
- pinctrl_usdhc3_200mhz: usdhc3grp_200mhz {
- fsl,pins = <
- MX7D_PAD_SD3_CMD__SD3_CMD 0x5b
- MX7D_PAD_SD3_CLK__SD3_CLK 0x51
- MX7D_PAD_SD3_DATA0__SD3_DATA0 0x5b
- MX7D_PAD_SD3_DATA1__SD3_DATA1 0x5b
- MX7D_PAD_SD3_DATA2__SD3_DATA2 0x5b
- MX7D_PAD_SD3_DATA3__SD3_DATA3 0x5b
- MX7D_PAD_SD3_DATA4__SD3_DATA4 0x5b
- MX7D_PAD_SD3_DATA5__SD3_DATA5 0x5b
- MX7D_PAD_SD3_DATA6__SD3_DATA6 0x5b
- MX7D_PAD_SD3_DATA7__SD3_DATA7 0x5b
- MX7D_PAD_SD3_STROBE__SD3_STROBE 0x1b
- >;
+ pinctrl_usdhc3_200mhz: usdhc3_200mhzgrp {
+ fsl,pins =
+ <MX7D_PAD_SD3_CMD__SD3_CMD 0x5b>,
+ <MX7D_PAD_SD3_CLK__SD3_CLK 0x51>,
+ <MX7D_PAD_SD3_DATA0__SD3_DATA0 0x5b>,
+ <MX7D_PAD_SD3_DATA1__SD3_DATA1 0x5b>,
+ <MX7D_PAD_SD3_DATA2__SD3_DATA2 0x5b>,
+ <MX7D_PAD_SD3_DATA3__SD3_DATA3 0x5b>,
+ <MX7D_PAD_SD3_DATA4__SD3_DATA4 0x5b>,
+ <MX7D_PAD_SD3_DATA5__SD3_DATA5 0x5b>,
+ <MX7D_PAD_SD3_DATA6__SD3_DATA6 0x5b>,
+ <MX7D_PAD_SD3_DATA7__SD3_DATA7 0x5b>,
+ <MX7D_PAD_SD3_STROBE__SD3_STROBE 0x1b>;
};
};
&iomuxc_lpsr {
pinctrl_wdog1: wdog1grp {
- fsl,pins = <
- MX7D_PAD_LPSR_GPIO1_IO00__WDOG1_WDOG_B 0x30
- >;
+ fsl,pins =
+ <MX7D_PAD_LPSR_GPIO1_IO00__WDOG1_WDOG_B 0x30>;
};
};
@@ -265,10 +269,6 @@
};
};
-&sdma {
- status = "okay";
-};
-
&usdhc3 {
pinctrl-names = "default", "state_100mhz", "state_200mhz";
pinctrl-0 = <&pinctrl_usdhc3>;
@@ -278,6 +278,8 @@
assigned-clock-rates = <400000000>;
bus-width = <8>;
non-removable;
+ no-sd;
+ no-sdio;
vmmc-supply = <&vgen4_reg>;
vqmmc-supply = <&sw2_reg>;
status = "okay";
diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts b/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts
index 3a723843d562..9984b343cdf0 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts
@@ -130,7 +130,7 @@
* TCG specification - Section 6.4.1 Clocking:
* TPM shall support a SPI clock frequency range of 10-24 MHz.
*/
- st33htph: tpm-tis@0 {
+ st33htph: tpm@0 {
compatible = "st,st33htpm-spi", "tcg,tpm_tis-spi";
reg = <0>;
spi-max-frequency = <24000000>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-mba7.dts b/arch/arm/boot/dts/nxp/imx/imx7d-mba7.dts
index 32bf9fa9d00e..0443faa3dfae 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7d-mba7.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx7d-mba7.dts
@@ -21,8 +21,6 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet2>;
phy-mode = "rgmii-id";
- phy-reset-gpios = <&gpio2 28 GPIO_ACTIVE_LOW>;
- phy-reset-duration = <1>;
phy-supply = <&reg_fec2_pwdn>;
phy-handle = <&ethphy2_0>;
fsl,magic-packet;
@@ -35,59 +33,85 @@
ethphy2_0: ethernet-phy@0 {
compatible = "ethernet-phy-ieee802.3-c22";
reg = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enet2_phy>;
ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_50_NS>;
ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_50_NS>;
ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
ti,clk-output-sel = <DP83867_CLK_O_SEL_OFF>;
+ reset-gpios = <&gpio2 28 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <1000>;
+ reset-deassert-us = <500>;
};
};
};
+&gpio2 {
+ pcie-dis-hog {
+ gpio-hog;
+ gpios = <29 GPIO_ACTIVE_HIGH>;
+ output-high;
+ line-name = "pcie-dis";
+ };
+
+ pcie-rst-hog {
+ gpio-hog;
+ gpios = <12 GPIO_ACTIVE_HIGH>;
+ output-high;
+ line-name = "pcie-rst";
+ };
+};
+
&iomuxc {
pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_hog_mba7_1>;
+ pinctrl-0 = <&pinctrl_hog_mba7_1>, <&pinctrl_hog_pcie>;
pinctrl_enet2: enet2grp {
- fsl,pins = <
- MX7D_PAD_SD2_CD_B__ENET2_MDIO 0x02
- MX7D_PAD_SD2_WP__ENET2_MDC 0x00
- MX7D_PAD_EPDC_GDSP__ENET2_RGMII_TXC 0x71
- MX7D_PAD_EPDC_SDCE2__ENET2_RGMII_TD0 0x71
- MX7D_PAD_EPDC_SDCE3__ENET2_RGMII_TD1 0x71
- MX7D_PAD_EPDC_GDCLK__ENET2_RGMII_TD2 0x71
- MX7D_PAD_EPDC_GDOE__ENET2_RGMII_TD3 0x71
- MX7D_PAD_EPDC_GDRL__ENET2_RGMII_TX_CTL 0x71
- MX7D_PAD_EPDC_SDCE1__ENET2_RGMII_RXC 0x79
- MX7D_PAD_EPDC_SDCLK__ENET2_RGMII_RD0 0x79
- MX7D_PAD_EPDC_SDLE__ENET2_RGMII_RD1 0x79
- MX7D_PAD_EPDC_SDOE__ENET2_RGMII_RD2 0x79
- MX7D_PAD_EPDC_SDSHR__ENET2_RGMII_RD3 0x79
- MX7D_PAD_EPDC_SDCE0__ENET2_RGMII_RX_CTL 0x79
+ fsl,pins =
+ <MX7D_PAD_SD2_CD_B__ENET2_MDIO 0x02>,
+ <MX7D_PAD_SD2_WP__ENET2_MDC 0x00>,
+ <MX7D_PAD_EPDC_GDSP__ENET2_RGMII_TXC 0x71>,
+ <MX7D_PAD_EPDC_SDCE2__ENET2_RGMII_TD0 0x71>,
+ <MX7D_PAD_EPDC_SDCE3__ENET2_RGMII_TD1 0x71>,
+ <MX7D_PAD_EPDC_GDCLK__ENET2_RGMII_TD2 0x71>,
+ <MX7D_PAD_EPDC_GDOE__ENET2_RGMII_TD3 0x71>,
+ <MX7D_PAD_EPDC_GDRL__ENET2_RGMII_TX_CTL 0x71>,
+ <MX7D_PAD_EPDC_SDCE1__ENET2_RGMII_RXC 0x79>,
+ <MX7D_PAD_EPDC_SDCLK__ENET2_RGMII_RD0 0x79>,
+ <MX7D_PAD_EPDC_SDLE__ENET2_RGMII_RD1 0x79>,
+ <MX7D_PAD_EPDC_SDOE__ENET2_RGMII_RD2 0x79>,
+ <MX7D_PAD_EPDC_SDSHR__ENET2_RGMII_RD3 0x79>,
+ <MX7D_PAD_EPDC_SDCE0__ENET2_RGMII_RX_CTL 0x79>;
+ };
+
+ pinctrl_enet2_phy: enet2phygrp {
+ fsl,pins =
/* Reset: SION, 100kPU, SRE_FAST, DSE_X1 */
- MX7D_PAD_EPDC_BDR0__GPIO2_IO28 0x40000070
+ <MX7D_PAD_EPDC_BDR0__GPIO2_IO28 0x40000070>,
/* INT/PWDN: SION, 100kPU, HYS, SRE_FAST, DSE_X1 */
- MX7D_PAD_EPDC_PWR_STAT__GPIO2_IO31 0x40000078
- >;
+ <MX7D_PAD_EPDC_PWR_STAT__GPIO2_IO31 0x40000078>;
};
- pinctrl_pcie: pciegrp {
- fsl,pins = <
- /* #pcie_wake */
- MX7D_PAD_EPDC_PWR_COM__GPIO2_IO30 0x70
+ pinctrl_hog_pcie: hogpciegrp {
+ fsl,pins =
/* #pcie_rst */
- MX7D_PAD_SD2_CLK__GPIO5_IO12 0x70
+ <MX7D_PAD_SD2_CLK__GPIO5_IO12 0x70>,
/* #pcie_dis */
- MX7D_PAD_EPDC_BDR1__GPIO2_IO29 0x70
- >;
+ <MX7D_PAD_EPDC_BDR1__GPIO2_IO29 0x70>;
+ };
+
+ pinctrl_pcie: pciegrp {
+ fsl,pins =
+ /* #pcie_wake */
+ <MX7D_PAD_EPDC_PWR_COM__GPIO2_IO30 0x70>;
};
};
&iomuxc_lpsr {
pinctrl_usbotg2: usbotg2grp {
- fsl,pins = <
- MX7D_PAD_LPSR_GPIO1_IO06__USB_OTG2_OC 0x5c
- MX7D_PAD_LPSR_GPIO1_IO07__GPIO1_IO7 0x59
- >;
+ fsl,pins =
+ <MX7D_PAD_LPSR_GPIO1_IO06__USB_OTG2_OC 0x5c>,
+ <MX7D_PAD_LPSR_GPIO1_IO07__GPIO1_IO7 0x59>;
};
};
@@ -98,16 +122,14 @@
/* probe deferral not supported */
/* pcie-bus-supply = <&reg_mpcie_1v5>; */
reset-gpio = <&gpio5 12 GPIO_ACTIVE_LOW>;
- status = "okay";
+ status = "disabled";
};
&usbotg2 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usbotg2>;
vbus-supply = <&reg_usb_otg2_vbus>;
- srp-disable;
- hnp-disable;
- adp-disable;
+ disable-over-current;
dr_mode = "host";
status = "okay";
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts b/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts
index 12361fcbe24a..1b965652291b 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts
@@ -63,6 +63,7 @@
gpio-controller;
#gpio-cells = <2>;
#interrupt-cells = <2>;
+ interrupt-controller;
reg = <0x25>;
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
index ebf7befcc11e..9c81c6baa2d3 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
@@ -834,16 +834,6 @@
<&clks IMX7D_LCDIF_PIXEL_ROOT_CLK>;
clock-names = "pix", "axi";
status = "disabled";
-
- port {
- #address-cells = <1>;
- #size-cells = <0>;
-
- lcdif_out_mipi_dsi: endpoint@0 {
- reg = <0>;
- remote-endpoint = <&mipi_dsi_in_lcdif>;
- };
- };
};
mipi_csi: mipi-csi@30750000 {
@@ -895,22 +885,6 @@
samsung,esc-clock-frequency = <20000000>;
samsung,pll-clock-frequency = <24000000>;
status = "disabled";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- mipi_dsi_in_lcdif: endpoint@0 {
- reg = <0>;
- remote-endpoint = <&lcdif_out_mipi_dsi>;
- };
- };
- };
};
};
diff --git a/arch/arm/boot/dts/nxp/ls/ls1021a.dtsi b/arch/arm/boot/dts/nxp/ls/ls1021a.dtsi
index d471cc5efa94..e86998ca77d6 100644
--- a/arch/arm/boot/dts/nxp/ls/ls1021a.dtsi
+++ b/arch/arm/boot/dts/nxp/ls/ls1021a.dtsi
@@ -808,7 +808,9 @@
dr_mode = "host";
snps,quirk-frame-length-adjustment = <0x20>;
snps,dis_rxdet_inp3_quirk;
+ usb3-lpm-capable;
snps,incr-burst-type-adjustment = <1>, <4>, <8>, <16>;
+ snps,host-vbus-glitches;
};
pcie@3400000 {
diff --git a/arch/arm/boot/dts/nxp/mxs/imx28-evk.dts b/arch/arm/boot/dts/nxp/mxs/imx28-evk.dts
index 9ebb7371e235..330d3aff6b6c 100644
--- a/arch/arm/boot/dts/nxp/mxs/imx28-evk.dts
+++ b/arch/arm/boot/dts/nxp/mxs/imx28-evk.dts
@@ -198,7 +198,7 @@
clocks = <&saif0>;
};
- at24@51 {
+ eeprom@51 {
compatible = "atmel,24c32";
pagesize = <32>;
reg = <0x51>;
diff --git a/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts
index b0ed68af0546..029f49be40e3 100644
--- a/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts
+++ b/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts
@@ -338,6 +338,7 @@
reg = <0x22>;
gpio-controller;
#gpio-cells = <2>;
+ #interrupt-cells = <2>;
interrupt-controller;
interrupt-parent = <&gpio3>;
interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
diff --git a/arch/arm/boot/dts/qcom/Makefile b/arch/arm/boot/dts/qcom/Makefile
index 9cc1e14e6cd0..6478a39b3be5 100644
--- a/arch/arm/boot/dts/qcom/Makefile
+++ b/arch/arm/boot/dts/qcom/Makefile
@@ -36,6 +36,7 @@ dtb-$(CONFIG_ARCH_QCOM) += \
qcom-msm8926-microsoft-superman-lte.dtb \
qcom-msm8926-microsoft-tesla.dtb \
qcom-msm8926-motorola-peregrine.dtb \
+ qcom-msm8926-samsung-matisselte.dtb \
qcom-msm8960-cdp.dtb \
qcom-msm8960-samsung-expressatt.dtb \
qcom-msm8974-lge-nexus5-hammerhead.dtb \
diff --git a/arch/arm/boot/dts/qcom/qcom-apq8026-lg-lenok.dts b/arch/arm/boot/dts/qcom/qcom-apq8026-lg-lenok.dts
index 0a1fd5eb3c6d..a70de21bf139 100644
--- a/arch/arm/boot/dts/qcom/qcom-apq8026-lg-lenok.dts
+++ b/arch/arm/boot/dts/qcom/qcom-apq8026-lg-lenok.dts
@@ -7,6 +7,7 @@
#include "qcom-msm8226.dtsi"
#include "pm8226.dtsi"
+#include <dt-bindings/clock/qcom,mmcc-msm8974.h>
/delete-node/ &adsp_region;
@@ -56,6 +57,29 @@
pinctrl-names = "default";
pinctrl-0 = <&wlan_regulator_default_state>;
};
+
+ pwm_vibrator: pwm {
+ compatible = "clk-pwm";
+ clocks = <&mmcc CAMSS_GP0_CLK>;
+
+ pinctrl-0 = <&vibrator_clk_default_state>;
+ pinctrl-names = "default";
+
+ #pwm-cells = <2>;
+ };
+
+ vibrator {
+ compatible = "pwm-vibrator";
+
+ pwms = <&pwm_vibrator 0 10000>;
+ pwm-names = "enable";
+
+ vcc-supply = <&pm8226_l28>;
+ enable-gpios = <&tlmm 62 GPIO_ACTIVE_HIGH>;
+
+ pinctrl-0 = <&vibrator_en_default_state>;
+ pinctrl-names = "default";
+ };
};
&adsp {
@@ -330,6 +354,20 @@
};
};
+ vibrator_clk_default_state: vibrator-clk-default-state {
+ pins = "gpio33";
+ function = "gp0_clk";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ vibrator_en_default_state: vibrator-en-default-state {
+ pins = "gpio62";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
wlan_hostwake_default_state: wlan-hostwake-default-state {
pins = "gpio37";
function = "gpio";
diff --git a/arch/arm/boot/dts/qcom/qcom-apq8026-samsung-matisse-wifi.dts b/arch/arm/boot/dts/qcom/qcom-apq8026-samsung-matisse-wifi.dts
index cffc069712b2..da3be658e822 100644
--- a/arch/arm/boot/dts/qcom/qcom-apq8026-samsung-matisse-wifi.dts
+++ b/arch/arm/boot/dts/qcom/qcom-apq8026-samsung-matisse-wifi.dts
@@ -5,142 +5,13 @@
/dts-v1/;
-#include <dt-bindings/input/input.h>
-#include "qcom-msm8226.dtsi"
-#include "pm8226.dtsi"
-
-/delete-node/ &adsp_region;
-/delete-node/ &smem_region;
+#include "qcom-msm8226-samsung-matisse-common.dtsi"
/ {
model = "Samsung Galaxy Tab 4 10.1";
compatible = "samsung,matisse-wifi", "qcom,apq8026";
chassis-type = "tablet";
- aliases {
- mmc0 = &sdhc_1; /* SDC1 eMMC slot */
- mmc1 = &sdhc_2; /* SDC2 SD card slot */
- display0 = &framebuffer0;
- };
-
- chosen {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- stdout-path = "display0";
-
- framebuffer0: framebuffer@3200000 {
- compatible = "simple-framebuffer";
- reg = <0x03200000 0x800000>;
- width = <1280>;
- height = <800>;
- stride = <(1280 * 3)>;
- format = "r8g8b8";
- };
- };
-
- gpio-hall-sensor {
- compatible = "gpio-keys";
-
- event-hall-sensor {
- label = "Hall Effect Sensor";
- gpios = <&tlmm 110 GPIO_ACTIVE_LOW>;
- linux,input-type = <EV_SW>;
- linux,code = <SW_LID>;
- debounce-interval = <15>;
- linux,can-disable;
- wakeup-source;
- };
- };
-
- gpio-keys {
- compatible = "gpio-keys";
- autorepeat;
-
- key-home {
- label = "Home";
- gpios = <&tlmm 108 GPIO_ACTIVE_LOW>;
- linux,code = <KEY_HOMEPAGE>;
- debounce-interval = <15>;
- };
-
- key-volume-down {
- label = "Volume Down";
- gpios = <&tlmm 107 GPIO_ACTIVE_LOW>;
- linux,code = <KEY_VOLUMEDOWN>;
- debounce-interval = <15>;
- };
-
- key-volume-up {
- label = "Volume Up";
- gpios = <&tlmm 106 GPIO_ACTIVE_LOW>;
- linux,code = <KEY_VOLUMEUP>;
- debounce-interval = <15>;
- };
- };
-
- i2c-backlight {
- compatible = "i2c-gpio";
- sda-gpios = <&tlmm 20 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
- scl-gpios = <&tlmm 21 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
-
- pinctrl-0 = <&backlight_i2c_default_state>;
- pinctrl-names = "default";
-
- i2c-gpio,delay-us = <4>;
-
- #address-cells = <1>;
- #size-cells = <0>;
-
- backlight@2c {
- compatible = "ti,lp8556";
- reg = <0x2c>;
-
- dev-ctrl = /bits/ 8 <0x80>;
- init-brt = /bits/ 8 <0x3f>;
-
- pwms = <&backlight_pwm 0 100000>;
- pwm-names = "lp8556";
-
- rom-a0h {
- rom-addr = /bits/ 8 <0xa0>;
- rom-val = /bits/ 8 <0x44>;
- };
-
- rom-a1h {
- rom-addr = /bits/ 8 <0xa1>;
- rom-val = /bits/ 8 <0x6c>;
- };
-
- rom-a5h {
- rom-addr = /bits/ 8 <0xa5>;
- rom-val = /bits/ 8 <0x24>;
- };
- };
- };
-
- backlight_pwm: pwm {
- compatible = "clk-pwm";
- #pwm-cells = <2>;
- clocks = <&mmcc CAMSS_GP0_CLK>;
- pinctrl-0 = <&backlight_pwm_default_state>;
- pinctrl-names = "default";
- };
-
- reg_tsp_1p8v: regulator-tsp-1p8v {
- compatible = "regulator-fixed";
- regulator-name = "tsp_1p8v";
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
-
- gpio = <&tlmm 31 GPIO_ACTIVE_HIGH>;
- enable-active-high;
-
- pinctrl-names = "default";
- pinctrl-0 = <&tsp_en_default_state>;
- };
-
reg_tsp_3p3v: regulator-tsp-3p3v {
compatible = "regulator-fixed";
regulator-name = "tsp_3p3v";
@@ -153,74 +24,6 @@
pinctrl-names = "default";
pinctrl-0 = <&tsp_en1_default_state>;
};
-
- reserved-memory {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- framebuffer@3200000 {
- reg = <0x03200000 0x800000>;
- no-map;
- };
-
- mpss@8400000 {
- reg = <0x08400000 0x1f00000>;
- no-map;
- };
-
- mba@a300000 {
- reg = <0x0a300000 0x100000>;
- no-map;
- };
-
- reserved@cb00000 {
- reg = <0x0cb00000 0x700000>;
- no-map;
- };
-
- wcnss@d200000 {
- reg = <0x0d200000 0x700000>;
- no-map;
- };
-
- adsp_region: adsp@d900000 {
- reg = <0x0d900000 0x1800000>;
- no-map;
- };
-
- venus@f100000 {
- reg = <0x0f100000 0x500000>;
- no-map;
- };
-
- smem_region: smem@fa00000 {
- reg = <0x0fa00000 0x100000>;
- no-map;
- };
-
- reserved@fb00000 {
- reg = <0x0fb00000 0x260000>;
- no-map;
- };
-
- rfsa@fd60000 {
- reg = <0x0fd60000 0x20000>;
- no-map;
- };
-
- rmtfs@fd80000 {
- compatible = "qcom,rmtfs-mem";
- reg = <0x0fd80000 0x180000>;
- no-map;
-
- qcom,client-id = <1>;
- };
- };
-};
-
-&adsp {
- status = "okay";
};
&blsp1_i2c2 {
@@ -243,21 +46,6 @@
};
};
-&blsp1_i2c4 {
- status = "okay";
-
- muic: usb-switch@25 {
- compatible = "siliconmitus,sm5502-muic";
- reg = <0x25>;
-
- interrupt-parent = <&tlmm>;
- interrupts = <67 IRQ_TYPE_EDGE_FALLING>;
-
- pinctrl-names = "default";
- pinctrl-0 = <&muic_int_default_state>;
- };
-};
-
&blsp1_i2c5 {
status = "okay";
@@ -268,6 +56,13 @@
interrupt-parent = <&tlmm>;
interrupts = <17 IRQ_TYPE_LEVEL_LOW>;
+ linux,keycodes = <KEY_RESERVED>,
+ <KEY_RESERVED>,
+ <KEY_RESERVED>,
+ <KEY_RESERVED>,
+ <KEY_APPSELECT>,
+ <KEY_BACK>;
+
pinctrl-names = "default";
pinctrl-0 = <&tsp_int_rst_default_state>;
@@ -278,242 +73,19 @@
};
};
-&rpm_requests {
- regulators {
- compatible = "qcom,rpm-pm8226-regulators";
-
- pm8226_s3: s3 {
- regulator-min-microvolt = <1200000>;
- regulator-max-microvolt = <1300000>;
- };
-
- pm8226_s4: s4 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- };
-
- pm8226_s5: s5 {
- regulator-min-microvolt = <1150000>;
- regulator-max-microvolt = <1150000>;
- };
-
- pm8226_l1: l1 {
- regulator-min-microvolt = <1225000>;
- regulator-max-microvolt = <1225000>;
- };
-
- pm8226_l2: l2 {
- regulator-min-microvolt = <1200000>;
- regulator-max-microvolt = <1200000>;
- };
-
- pm8226_l3: l3 {
- regulator-min-microvolt = <750000>;
- regulator-max-microvolt = <1337500>;
- regulator-always-on;
- };
-
- pm8226_l4: l4 {
- regulator-min-microvolt = <1200000>;
- regulator-max-microvolt = <1200000>;
- };
-
- pm8226_l5: l5 {
- regulator-min-microvolt = <1200000>;
- regulator-max-microvolt = <1200000>;
- };
-
- pm8226_l6: l6 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- regulator-always-on;
- };
-
- pm8226_l7: l7 {
- regulator-min-microvolt = <1850000>;
- regulator-max-microvolt = <1850000>;
- };
-
- pm8226_l8: l8 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- regulator-always-on;
- };
-
- pm8226_l9: l9 {
- regulator-min-microvolt = <2050000>;
- regulator-max-microvolt = <2050000>;
- };
-
- pm8226_l10: l10 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- };
-
- pm8226_l12: l12 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- };
-
- pm8226_l14: l14 {
- regulator-min-microvolt = <2750000>;
- regulator-max-microvolt = <2750000>;
- };
-
- pm8226_l15: l15 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <3300000>;
- };
-
- pm8226_l16: l16 {
- regulator-min-microvolt = <3000000>;
- regulator-max-microvolt = <3350000>;
- };
-
- pm8226_l17: l17 {
- regulator-min-microvolt = <2950000>;
- regulator-max-microvolt = <2950000>;
-
- regulator-system-load = <200000>;
- regulator-allow-set-load;
- regulator-always-on;
- };
-
- pm8226_l18: l18 {
- regulator-min-microvolt = <2950000>;
- regulator-max-microvolt = <2950000>;
- };
-
- pm8226_l19: l19 {
- regulator-min-microvolt = <2850000>;
- regulator-max-microvolt = <3000000>;
- };
-
- pm8226_l20: l20 {
- regulator-min-microvolt = <3075000>;
- regulator-max-microvolt = <3075000>;
- };
-
- pm8226_l21: l21 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <2950000>;
- };
-
- pm8226_l22: l22 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <3000000>;
- };
-
- pm8226_l23: l23 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <3300000>;
- };
-
- pm8226_l24: l24 {
- regulator-min-microvolt = <1300000>;
- regulator-max-microvolt = <1350000>;
- };
-
- pm8226_l25: l25 {
- regulator-min-microvolt = <1775000>;
- regulator-max-microvolt = <2125000>;
- };
-
- pm8226_l26: l26 {
- regulator-min-microvolt = <1225000>;
- regulator-max-microvolt = <1300000>;
- };
-
- pm8226_l27: l27 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- };
-
- pm8226_l28: l28 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <2950000>;
- };
-
- pm8226_lvs1: lvs1 {};
- };
-};
-
-&sdhc_1 {
- vmmc-supply = <&pm8226_l17>;
- vqmmc-supply = <&pm8226_l6>;
-
- bus-width = <8>;
- non-removable;
-
- status = "okay";
+&pm8226_l3 {
+ regulator-max-microvolt = <1337500>;
};
-&sdhc_2 {
- vmmc-supply = <&pm8226_l18>;
- vqmmc-supply = <&pm8226_l21>;
-
- bus-width = <4>;
- cd-gpios = <&tlmm 38 GPIO_ACTIVE_LOW>;
-
- status = "okay";
+&pm8226_s4 {
+ regulator-max-microvolt = <1800000>;
};
&tlmm {
- accel_int_default_state: accel-int-default-state {
- pins = "gpio54";
- function = "gpio";
- drive-strength = <2>;
- bias-disable;
- };
-
- backlight_i2c_default_state: backlight-i2c-default-state {
- pins = "gpio20", "gpio21";
- function = "gpio";
- drive-strength = <2>;
- bias-disable;
- };
-
- backlight_pwm_default_state: backlight-pwm-default-state {
- pins = "gpio33";
- function = "gp0_clk";
- };
-
- muic_int_default_state: muic-int-default-state {
- pins = "gpio67";
- function = "gpio";
- drive-strength = <2>;
- bias-disable;
- };
-
- tsp_en_default_state: tsp-en-default-state {
- pins = "gpio31";
- function = "gpio";
- drive-strength = <2>;
- bias-disable;
- };
-
tsp_en1_default_state: tsp-en1-default-state {
pins = "gpio73";
function = "gpio";
drive-strength = <2>;
bias-disable;
};
-
- tsp_int_rst_default_state: tsp-int-rst-default-state {
- pins = "gpio17";
- function = "gpio";
- drive-strength = <10>;
- bias-pull-up;
- };
-};
-
-&usb {
- extcon = <&muic>, <&muic>;
- status = "okay";
-};
-
-&usb_hs_phy {
- extcon = <&muic>;
- v1p8-supply = <&pm8226_l10>;
- v3p3-supply = <&pm8226_l20>;
};
diff --git a/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi
index 3faf57035d54..9a5ba978775a 100644
--- a/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi
@@ -190,7 +190,7 @@
cpu-pmu {
compatible = "qcom,krait-pmu";
- interrupts = <1 10 0x304>;
+ interrupts = <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>;
};
clocks {
@@ -244,7 +244,7 @@
modem_smsm: modem@1 {
reg = <1>;
- interrupts = <0 38 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>;
interrupt-controller;
#interrupt-cells = <2>;
@@ -252,7 +252,7 @@
q6_smsm: q6@2 {
reg = <2>;
- interrupts = <0 89 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 89 IRQ_TYPE_EDGE_RISING>;
interrupt-controller;
#interrupt-cells = <2>;
@@ -260,7 +260,7 @@
wcnss_smsm: wcnss@3 {
reg = <3>;
- interrupts = <0 204 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 204 IRQ_TYPE_EDGE_RISING>;
interrupt-controller;
#interrupt-cells = <2>;
@@ -268,7 +268,7 @@
dsps_smsm: dsps@4 {
reg = <4>;
- interrupts = <0 137 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 137 IRQ_TYPE_EDGE_RISING>;
interrupt-controller;
#interrupt-cells = <2>;
@@ -299,7 +299,7 @@
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <2>;
- interrupts = <0 16 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&ps_hold>;
@@ -321,9 +321,9 @@
timer@200a000 {
compatible = "qcom,kpss-wdt-apq8064", "qcom,kpss-timer",
"qcom,msm-timer";
- interrupts = <1 1 0x301>,
- <1 2 0x301>,
- <1 3 0x301>;
+ interrupts = <GIC_PPI 1 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>,
+ <GIC_PPI 2 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>,
+ <GIC_PPI 3 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>;
reg = <0x0200a000 0x100>;
clock-frequency = <27000000>;
cpu-offset = <0x80000>;
@@ -365,28 +365,44 @@
#clock-cells = <0>;
};
- saw0: power-controller@2089000 {
+ saw0: power-manager@2089000 {
compatible = "qcom,apq8064-saw2-v1.1-cpu", "qcom,saw2";
reg = <0x02089000 0x1000>, <0x02009000 0x1000>;
- regulator;
+
+ saw0_vreg: regulator {
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1300000>;
+ };
};
- saw1: power-controller@2099000 {
+ saw1: power-manager@2099000 {
compatible = "qcom,apq8064-saw2-v1.1-cpu", "qcom,saw2";
reg = <0x02099000 0x1000>, <0x02009000 0x1000>;
- regulator;
+
+ saw1_vreg: regulator {
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1300000>;
+ };
};
- saw2: power-controller@20a9000 {
+ saw2: power-manager@20a9000 {
compatible = "qcom,apq8064-saw2-v1.1-cpu", "qcom,saw2";
reg = <0x020a9000 0x1000>, <0x02009000 0x1000>;
- regulator;
+
+ saw2_vreg: regulator {
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1300000>;
+ };
};
- saw3: power-controller@20b9000 {
+ saw3: power-manager@20b9000 {
compatible = "qcom,apq8064-saw2-v1.1-cpu", "qcom,saw2";
reg = <0x020b9000 0x1000>, <0x02009000 0x1000>;
- regulator;
+
+ saw3_vreg: regulator {
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1300000>;
+ };
};
sps_sic_non_secure: sps-sic-non-secure@12100000 {
@@ -411,7 +427,7 @@
compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
reg = <0x12450000 0x100>,
<0x12400000 0x03>;
- interrupts = <0 193 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GSBI1_UART_CLK>, <&gcc GSBI1_H_CLK>;
clock-names = "core", "iface";
status = "disabled";
@@ -423,7 +439,7 @@
pinctrl-1 = <&i2c1_pins_sleep>;
pinctrl-names = "default", "sleep";
reg = <0x12460000 0x1000>;
- interrupts = <0 194 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 194 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GSBI1_QUP_CLK>, <&gcc GSBI1_H_CLK>;
clock-names = "core", "iface";
#address-cells = <1>;
@@ -452,7 +468,7 @@
pinctrl-0 = <&i2c2_pins>;
pinctrl-1 = <&i2c2_pins_sleep>;
pinctrl-names = "default", "sleep";
- interrupts = <0 196 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 196 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GSBI2_QUP_CLK>, <&gcc GSBI2_H_CLK>;
clock-names = "core", "iface";
#address-cells = <1>;
@@ -539,7 +555,7 @@
compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
reg = <0x1a240000 0x100>,
<0x1a200000 0x03>;
- interrupts = <0 154 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GSBI5_UART_CLK>, <&gcc GSBI5_H_CLK>;
clock-names = "core", "iface";
status = "disabled";
@@ -548,7 +564,7 @@
gsbi5_spi: spi@1a280000 {
compatible = "qcom,spi-qup-v1.1.1";
reg = <0x1a280000 0x1000>;
- interrupts = <0 155 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-0 = <&spi5_default>;
pinctrl-1 = <&spi5_sleep>;
pinctrl-names = "default", "sleep";
@@ -575,7 +591,7 @@
compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
reg = <0x16540000 0x100>,
<0x16500000 0x03>;
- interrupts = <0 156 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GSBI6_UART_CLK>, <&gcc GSBI6_H_CLK>;
clock-names = "core", "iface";
status = "disabled";
@@ -611,7 +627,7 @@
compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
reg = <0x16640000 0x1000>,
<0x16600000 0x1000>;
- interrupts = <0 158 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GSBI7_UART_CLK>, <&gcc GSBI7_H_CLK>;
clock-names = "core", "iface";
status = "disabled";
@@ -908,7 +924,7 @@
sdcc3bam: dma-controller@12182000 {
compatible = "qcom,bam-v1.3.0";
reg = <0x12182000 0x8000>;
- interrupts = <0 96 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc SDC3_H_CLK>;
clock-names = "bam_clk";
#dma-cells = <1>;
@@ -936,7 +952,7 @@
sdcc4bam: dma-controller@121c2000 {
compatible = "qcom,bam-v1.3.0";
reg = <0x121c2000 0x8000>;
- interrupts = <0 95 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc SDC4_H_CLK>;
clock-names = "bam_clk";
#dma-cells = <1>;
@@ -965,7 +981,7 @@
sdcc1bam: dma-controller@12402000 {
compatible = "qcom,bam-v1.3.0";
reg = <0x12402000 0x8000>;
- interrupts = <0 98 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc SDC1_H_CLK>;
clock-names = "bam_clk";
#dma-cells = <1>;
diff --git a/arch/arm/boot/dts/qcom/qcom-apq8084.dtsi b/arch/arm/boot/dts/qcom/qcom-apq8084.dtsi
index 2b1f9d0fb510..8204e64d9a97 100644
--- a/arch/arm/boot/dts/qcom/qcom-apq8084.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-apq8084.dtsi
@@ -629,30 +629,29 @@
};
};
- saw0: power-controller@f9089000 {
+ saw0: power-manager@f9089000 {
compatible = "qcom,apq8084-saw2-v2.1-cpu", "qcom,saw2";
reg = <0xf9089000 0x1000>, <0xf9009000 0x1000>;
};
- saw1: power-controller@f9099000 {
+ saw1: power-manager@f9099000 {
compatible = "qcom,apq8084-saw2-v2.1-cpu", "qcom,saw2";
reg = <0xf9099000 0x1000>, <0xf9009000 0x1000>;
};
- saw2: power-controller@f90a9000 {
+ saw2: power-manager@f90a9000 {
compatible = "qcom,apq8084-saw2-v2.1-cpu", "qcom,saw2";
reg = <0xf90a9000 0x1000>, <0xf9009000 0x1000>;
};
- saw3: power-controller@f90b9000 {
+ saw3: power-manager@f90b9000 {
compatible = "qcom,apq8084-saw2-v2.1-cpu", "qcom,saw2";
reg = <0xf90b9000 0x1000>, <0xf9009000 0x1000>;
};
- saw_l2: power-controller@f9012000 {
- compatible = "qcom,saw2";
+ saw_l2: power-manager@f9012000 {
+ compatible = "qcom,apq8084-saw2-v2.1-l2", "qcom,saw2";
reg = <0xf9012000 0x1000>;
- regulator;
};
acc0: power-manager@f9088000 {
diff --git a/arch/arm/boot/dts/qcom/qcom-ipq4019-ap.dk01.1.dtsi b/arch/arm/boot/dts/qcom/qcom-ipq4019-ap.dk01.1.dtsi
index 0505270cf508..f7ac8f9d0b6f 100644
--- a/arch/arm/boot/dts/qcom/qcom-ipq4019-ap.dk01.1.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-ipq4019-ap.dk01.1.dtsi
@@ -27,87 +27,83 @@
chosen {
stdout-path = "serial0:115200n8";
};
+};
- soc {
- rng@22000 {
- status = "okay";
- };
-
- pinctrl@1000000 {
- serial_pins: serial_pinmux {
- mux {
- pins = "gpio60", "gpio61";
- function = "blsp_uart0";
- bias-disable;
- };
- };
-
- spi_0_pins: spi_0_pinmux {
- pinmux {
- function = "blsp_spi0";
- pins = "gpio55", "gpio56", "gpio57";
- };
- pinmux_cs {
- function = "gpio";
- pins = "gpio54";
- };
- pinconf {
- pins = "gpio55", "gpio56", "gpio57";
- drive-strength = <12>;
- bias-disable;
- };
- pinconf_cs {
- pins = "gpio54";
- drive-strength = <2>;
- bias-disable;
- output-high;
- };
- };
- };
+&prng {
+ status = "okay";
+};
- blsp_dma: dma-controller@7884000 {
- status = "okay";
+&tlmm {
+ serial_pins: serial_pinmux {
+ mux {
+ pins = "gpio60", "gpio61";
+ function = "blsp_uart0";
+ bias-disable;
};
+ };
- spi@78b5000 {
- pinctrl-0 = <&spi_0_pins>;
- pinctrl-names = "default";
- status = "okay";
- cs-gpios = <&tlmm 54 GPIO_ACTIVE_HIGH>;
-
- mx25l25635e@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0>;
- compatible = "mx25l25635e";
- spi-max-frequency = <24000000>;
- };
+ spi_0_pins: spi_0_pinmux {
+ pinmux {
+ function = "blsp_spi0";
+ pins = "gpio55", "gpio56", "gpio57";
};
-
- serial@78af000 {
- pinctrl-0 = <&serial_pins>;
- pinctrl-names = "default";
- status = "okay";
+ pinmux_cs {
+ function = "gpio";
+ pins = "gpio54";
};
-
- cryptobam: dma-controller@8e04000 {
- status = "okay";
+ pinconf {
+ pins = "gpio55", "gpio56", "gpio57";
+ drive-strength = <12>;
+ bias-disable;
};
-
- crypto@8e3a000 {
- status = "okay";
+ pinconf_cs {
+ pins = "gpio54";
+ drive-strength = <2>;
+ bias-disable;
+ output-high;
};
+ };
+};
- watchdog@b017000 {
- status = "okay";
- };
+&blsp_dma {
+ status = "okay";
+};
- wifi@a000000 {
- status = "okay";
- };
+&blsp1_spi1 {
+ pinctrl-0 = <&spi_0_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+ cs-gpios = <&tlmm 54 GPIO_ACTIVE_HIGH>;
- wifi@a800000 {
- status = "okay";
- };
+ flash@0 {
+ reg = <0>;
+ compatible = "jedec,spi-nor";
+ spi-max-frequency = <24000000>;
};
};
+
+&blsp1_uart1 {
+ pinctrl-0 = <&serial_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&cryptobam {
+ status = "okay";
+};
+
+&crypto {
+ status = "okay";
+};
+
+&watchdog {
+ status = "okay";
+};
+
+&wifi0 {
+ status = "okay";
+};
+
+&wifi1 {
+ status = "okay";
+};
diff --git a/arch/arm/boot/dts/qcom/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom/qcom-ipq4019.dtsi
index f989bd741cd1..681cb3fc8085 100644
--- a/arch/arm/boot/dts/qcom/qcom-ipq4019.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-ipq4019.dtsi
@@ -162,10 +162,10 @@
timer {
compatible = "arm,armv7-timer";
- interrupts = <1 2 0xf08>,
- <1 3 0xf08>,
- <1 4 0xf08>,
- <1 1 0xf08>;
+ interrupts = <GIC_PPI 2 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 3 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 4 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 1 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
clock-frequency = <48000000>;
always-on;
};
@@ -350,34 +350,29 @@
reg = <0x0b0b8000 0x1000>, <0xb008000 0x1000>;
};
- saw0: regulator@b089000 {
- compatible = "qcom,saw2";
+ saw0: power-manager@b089000 {
+ compatible = "qcom,ipq4019-saw2-cpu", "qcom,saw2";
reg = <0x0b089000 0x1000>, <0x0b009000 0x1000>;
- regulator;
};
- saw1: regulator@b099000 {
- compatible = "qcom,saw2";
+ saw1: power-manager@b099000 {
+ compatible = "qcom,ipq4019-saw2-cpu", "qcom,saw2";
reg = <0x0b099000 0x1000>, <0x0b009000 0x1000>;
- regulator;
};
- saw2: regulator@b0a9000 {
- compatible = "qcom,saw2";
+ saw2: power-manager@b0a9000 {
+ compatible = "qcom,ipq4019-saw2-cpu", "qcom,saw2";
reg = <0x0b0a9000 0x1000>, <0x0b009000 0x1000>;
- regulator;
};
- saw3: regulator@b0b9000 {
- compatible = "qcom,saw2";
+ saw3: power-manager@b0b9000 {
+ compatible = "qcom,ipq4019-saw2-cpu", "qcom,saw2";
reg = <0x0b0b9000 0x1000>, <0x0b009000 0x1000>;
- regulator;
};
- saw_l2: regulator@b012000 {
- compatible = "qcom,saw2";
+ saw_l2: power-manager@b012000 {
+ compatible = "qcom,ipq4019-saw2-l2", "qcom,saw2";
reg = <0xb012000 0x1000>;
- regulator;
};
blsp1_uart1: serial@78af000 {
@@ -684,7 +679,7 @@
clocks = <&gcc GCC_USB2_MASTER_CLK>,
<&gcc GCC_USB2_SLEEP_CLK>,
<&gcc GCC_USB2_MOCK_UTMI_CLK>;
- clock-names = "master", "sleep", "mock_utmi";
+ clock-names = "core", "sleep", "mock_utmi";
ranges;
status = "disabled";
diff --git a/arch/arm/boot/dts/qcom/qcom-ipq8064.dtsi b/arch/arm/boot/dts/qcom/qcom-ipq8064.dtsi
index 6a7f4dd0f775..2eb6758b6a3a 100644
--- a/arch/arm/boot/dts/qcom/qcom-ipq8064.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-ipq8064.dtsi
@@ -586,10 +586,9 @@
#clock-cells = <0>;
};
- saw0: regulator@2089000 {
- compatible = "qcom,saw2";
+ saw0: power-manager@2089000 {
+ compatible = "qcom,ipq8064-saw2-cpu", "qcom,saw2";
reg = <0x02089000 0x1000>, <0x02009000 0x1000>;
- regulator;
};
acc1: clock-controller@2098000 {
@@ -601,10 +600,9 @@
#clock-cells = <0>;
};
- saw1: regulator@2099000 {
- compatible = "qcom,saw2";
+ saw1: power-manager@2099000 {
+ compatible = "qcom,ipq8064-saw2-cpu", "qcom,saw2";
reg = <0x02099000 0x1000>, <0x02009000 0x1000>;
- regulator;
};
nss_common: syscon@3000000 {
@@ -623,7 +621,6 @@
ranges;
resets = <&gcc USB30_0_MASTER_RESET>;
- reset-names = "master";
status = "disabled";
@@ -669,7 +666,6 @@
ranges;
resets = <&gcc USB30_1_MASTER_RESET>;
- reset-names = "master";
status = "disabled";
diff --git a/arch/arm/boot/dts/qcom/qcom-msm8226-samsung-matisse-common.dtsi b/arch/arm/boot/dts/qcom/qcom-msm8226-samsung-matisse-common.dtsi
new file mode 100644
index 000000000000..a15a44fc0181
--- /dev/null
+++ b/arch/arm/boot/dts/qcom/qcom-msm8226-samsung-matisse-common.dtsi
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2022, Matti Lehtimäki <matti.lehtimaki@gmail.com>
+ */
+
+#include <dt-bindings/input/input.h>
+#include "qcom-msm8226.dtsi"
+#include "pm8226.dtsi"
+
+/delete-node/ &adsp_region;
+/delete-node/ &smem_region;
+
+/ {
+ aliases {
+ mmc0 = &sdhc_1; /* SDC1 eMMC slot */
+ mmc1 = &sdhc_2; /* SDC2 SD card slot */
+ display0 = &framebuffer0;
+ };
+
+ chosen {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ stdout-path = "display0";
+
+ framebuffer0: framebuffer@3200000 {
+ compatible = "simple-framebuffer";
+ reg = <0x03200000 0x800000>;
+ width = <1280>;
+ height = <800>;
+ stride = <(1280 * 3)>;
+ format = "r8g8b8";
+ };
+ };
+
+ gpio-hall-sensor {
+ compatible = "gpio-keys";
+
+ event-hall-sensor {
+ label = "Hall Effect Sensor";
+ gpios = <&tlmm 110 GPIO_ACTIVE_LOW>;
+ linux,input-type = <EV_SW>;
+ linux,code = <SW_LID>;
+ debounce-interval = <15>;
+ linux,can-disable;
+ wakeup-source;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+ autorepeat;
+
+ key-home {
+ label = "Home";
+ gpios = <&tlmm 108 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_HOMEPAGE>;
+ debounce-interval = <15>;
+ };
+
+ key-volume-down {
+ label = "Volume Down";
+ gpios = <&tlmm 107 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_VOLUMEDOWN>;
+ debounce-interval = <15>;
+ };
+
+ key-volume-up {
+ label = "Volume Up";
+ gpios = <&tlmm 106 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_VOLUMEUP>;
+ debounce-interval = <15>;
+ };
+ };
+
+ i2c-backlight {
+ compatible = "i2c-gpio";
+ sda-gpios = <&tlmm 20 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
+ scl-gpios = <&tlmm 21 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
+
+ pinctrl-0 = <&backlight_i2c_default_state>;
+ pinctrl-names = "default";
+
+ i2c-gpio,delay-us = <4>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ backlight@2c {
+ compatible = "ti,lp8556";
+ reg = <0x2c>;
+
+ dev-ctrl = /bits/ 8 <0x80>;
+ init-brt = /bits/ 8 <0x3f>;
+
+ pwms = <&backlight_pwm 0 100000>;
+ pwm-names = "lp8556";
+
+ rom-a0h {
+ rom-addr = /bits/ 8 <0xa0>;
+ rom-val = /bits/ 8 <0x44>;
+ };
+
+ rom-a1h {
+ rom-addr = /bits/ 8 <0xa1>;
+ rom-val = /bits/ 8 <0x6c>;
+ };
+
+ rom-a5h {
+ rom-addr = /bits/ 8 <0xa5>;
+ rom-val = /bits/ 8 <0x24>;
+ };
+ };
+ };
+
+ backlight_pwm: pwm {
+ compatible = "clk-pwm";
+ #pwm-cells = <2>;
+ clocks = <&mmcc CAMSS_GP0_CLK>;
+ pinctrl-0 = <&backlight_pwm_default_state>;
+ pinctrl-names = "default";
+ };
+
+ reg_tsp_1p8v: regulator-tsp-1p8v {
+ compatible = "regulator-fixed";
+ regulator-name = "tsp_1p8v";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ gpio = <&tlmm 31 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&tsp_en_default_state>;
+ };
+
+ reserved-memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ framebuffer@3200000 {
+ reg = <0x03200000 0x800000>;
+ no-map;
+ };
+
+ mpss@8400000 {
+ reg = <0x08400000 0x1f00000>;
+ no-map;
+ };
+
+ mba@a300000 {
+ reg = <0x0a300000 0x100000>;
+ no-map;
+ };
+
+ reserved@cb00000 {
+ reg = <0x0cb00000 0x700000>;
+ no-map;
+ };
+
+ wcnss@d200000 {
+ reg = <0x0d200000 0x700000>;
+ no-map;
+ };
+
+ adsp_region: adsp@d900000 {
+ reg = <0x0d900000 0x1800000>;
+ no-map;
+ };
+
+ venus@f100000 {
+ reg = <0x0f100000 0x500000>;
+ no-map;
+ };
+
+ smem_region: smem@fa00000 {
+ reg = <0x0fa00000 0x100000>;
+ no-map;
+ };
+
+ reserved@fb00000 {
+ reg = <0x0fb00000 0x260000>;
+ no-map;
+ };
+
+ rfsa@fd60000 {
+ reg = <0x0fd60000 0x20000>;
+ no-map;
+ };
+
+ rmtfs@fd80000 {
+ compatible = "qcom,rmtfs-mem";
+ reg = <0x0fd80000 0x180000>;
+ no-map;
+
+ qcom,client-id = <1>;
+ };
+ };
+};
+
+&adsp {
+ status = "okay";
+};
+
+&blsp1_i2c4 {
+ status = "okay";
+
+ muic: usb-switch@25 {
+ compatible = "siliconmitus,sm5502-muic";
+ reg = <0x25>;
+
+ interrupt-parent = <&tlmm>;
+ interrupts = <67 IRQ_TYPE_EDGE_FALLING>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&muic_int_default_state>;
+ };
+};
+
+&blsp1_uart3 {
+ status = "okay";
+};
+
+&rpm_requests {
+ regulators {
+ compatible = "qcom,rpm-pm8226-regulators";
+
+ pm8226_s3: s3 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1300000>;
+ };
+
+ pm8226_s4: s4 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <2200000>;
+ };
+
+ pm8226_s5: s5 {
+ regulator-min-microvolt = <1150000>;
+ regulator-max-microvolt = <1150000>;
+ };
+
+ pm8226_l1: l1 {
+ regulator-min-microvolt = <1225000>;
+ regulator-max-microvolt = <1225000>;
+ };
+
+ pm8226_l2: l2 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ };
+
+ pm8226_l3: l3 {
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ };
+
+ pm8226_l4: l4 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ };
+
+ pm8226_l5: l5 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ };
+
+ pm8226_l6: l6 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ };
+
+ pm8226_l7: l7 {
+ regulator-min-microvolt = <1850000>;
+ regulator-max-microvolt = <1850000>;
+ };
+
+ pm8226_l8: l8 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ };
+
+ pm8226_l9: l9 {
+ regulator-min-microvolt = <2050000>;
+ regulator-max-microvolt = <2050000>;
+ };
+
+ pm8226_l10: l10 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ pm8226_l12: l12 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ pm8226_l14: l14 {
+ regulator-min-microvolt = <2750000>;
+ regulator-max-microvolt = <2750000>;
+ };
+
+ pm8226_l15: l15 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ pm8226_l16: l16 {
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3350000>;
+ };
+
+ pm8226_l17: l17 {
+ regulator-min-microvolt = <2950000>;
+ regulator-max-microvolt = <2950000>;
+
+ regulator-system-load = <200000>;
+ regulator-allow-set-load;
+ regulator-always-on;
+ };
+
+ pm8226_l18: l18 {
+ regulator-min-microvolt = <2950000>;
+ regulator-max-microvolt = <2950000>;
+ };
+
+ pm8226_l19: l19 {
+ regulator-min-microvolt = <2850000>;
+ regulator-max-microvolt = <3000000>;
+ };
+
+ pm8226_l20: l20 {
+ regulator-min-microvolt = <3075000>;
+ regulator-max-microvolt = <3075000>;
+ };
+
+ pm8226_l21: l21 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <2950000>;
+ };
+
+ pm8226_l22: l22 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3000000>;
+ };
+
+ pm8226_l23: l23 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ pm8226_l24: l24 {
+ regulator-min-microvolt = <1300000>;
+ regulator-max-microvolt = <1350000>;
+ };
+
+ pm8226_l25: l25 {
+ regulator-min-microvolt = <1775000>;
+ regulator-max-microvolt = <2125000>;
+ };
+
+ pm8226_l26: l26 {
+ regulator-min-microvolt = <1225000>;
+ regulator-max-microvolt = <1300000>;
+ };
+
+ pm8226_l27: l27 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ pm8226_l28: l28 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <2950000>;
+ };
+
+ pm8226_lvs1: lvs1 {};
+ };
+};
+
+&sdhc_1 {
+ vmmc-supply = <&pm8226_l17>;
+ vqmmc-supply = <&pm8226_l6>;
+
+ bus-width = <8>;
+ non-removable;
+
+ status = "okay";
+};
+
+&sdhc_2 {
+ vmmc-supply = <&pm8226_l18>;
+ vqmmc-supply = <&pm8226_l21>;
+
+ bus-width = <4>;
+ cd-gpios = <&tlmm 38 GPIO_ACTIVE_LOW>;
+
+ status = "okay";
+};
+
+&tlmm {
+ accel_int_default_state: accel-int-default-state {
+ pins = "gpio54";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ backlight_i2c_default_state: backlight-i2c-default-state {
+ pins = "gpio20", "gpio21";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ backlight_pwm_default_state: backlight-pwm-default-state {
+ pins = "gpio33";
+ function = "gp0_clk";
+ };
+
+ muic_int_default_state: muic-int-default-state {
+ pins = "gpio67";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ tsp_en_default_state: tsp-en-default-state {
+ pins = "gpio31";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ tsp_int_rst_default_state: tsp-int-rst-default-state {
+ pins = "gpio17";
+ function = "gpio";
+ drive-strength = <10>;
+ bias-pull-up;
+ };
+};
+
+&usb {
+ extcon = <&muic>, <&muic>;
+ status = "okay";
+};
+
+&usb_hs_phy {
+ extcon = <&muic>;
+ v1p8-supply = <&pm8226_l10>;
+ v3p3-supply = <&pm8226_l20>;
+};
diff --git a/arch/arm/boot/dts/qcom/qcom-msm8226.dtsi b/arch/arm/boot/dts/qcom/qcom-msm8226.dtsi
index b492c95e5d30..270973e85625 100644
--- a/arch/arm/boot/dts/qcom/qcom-msm8226.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-msm8226.dtsi
@@ -20,11 +20,6 @@
chosen { };
- memory@0 {
- device_type = "memory";
- reg = <0x0 0x0>;
- };
-
clocks {
xo_board: xo_board {
compatible = "fixed-clock";
@@ -39,6 +34,57 @@
};
};
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ CPU0: cpu@0 {
+ compatible = "arm,cortex-a7";
+ enable-method = "qcom,msm8226-smp";
+ device_type = "cpu";
+ reg = <0>;
+ next-level-cache = <&L2>;
+ qcom,acc = <&acc0>;
+ qcom,saw = <&saw0>;
+ };
+
+ CPU1: cpu@1 {
+ compatible = "arm,cortex-a7";
+ enable-method = "qcom,msm8226-smp";
+ device_type = "cpu";
+ reg = <1>;
+ next-level-cache = <&L2>;
+ qcom,acc = <&acc1>;
+ qcom,saw = <&saw1>;
+ };
+
+ CPU2: cpu@2 {
+ compatible = "arm,cortex-a7";
+ enable-method = "qcom,msm8226-smp";
+ device_type = "cpu";
+ reg = <2>;
+ next-level-cache = <&L2>;
+ qcom,acc = <&acc2>;
+ qcom,saw = <&saw2>;
+ };
+
+ CPU3: cpu@3 {
+ compatible = "arm,cortex-a7";
+ enable-method = "qcom,msm8226-smp";
+ device_type = "cpu";
+ reg = <3>;
+ next-level-cache = <&L2>;
+ qcom,acc = <&acc3>;
+ qcom,saw = <&saw3>;
+ };
+
+ L2: l2-cache {
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ };
+ };
+
firmware {
scm {
compatible = "qcom,scm-msm8226", "qcom,scm";
@@ -47,6 +93,11 @@
};
};
+ memory@0 {
+ device_type = "memory";
+ reg = <0x0 0x0>;
+ };
+
pmu {
compatible = "arm,cortex-a7-pmu";
interrupts = <GIC_PPI 7 (GIC_CPU_MASK_SIMPLE(4) |
@@ -185,6 +236,117 @@
reg = <0xf9011000 0x1000>;
};
+ saw_l2: power-manager@f9012000 {
+ compatible = "qcom,msm8226-saw2-v2.1-l2", "qcom,saw2";
+ reg = <0xf9012000 0x1000>;
+ };
+
+ watchdog@f9017000 {
+ compatible = "qcom,apss-wdt-msm8226", "qcom,kpss-wdt";
+ reg = <0xf9017000 0x1000>;
+ interrupts = <GIC_SPI 3 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 4 IRQ_TYPE_EDGE_RISING>;
+ clocks = <&sleep_clk>;
+ };
+
+ timer@f9020000 {
+ compatible = "arm,armv7-timer-mem";
+ reg = <0xf9020000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ frame@f9021000 {
+ frame-number = <0>;
+ interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0xf9021000 0x1000>,
+ <0xf9022000 0x1000>;
+ };
+
+ frame@f9023000 {
+ frame-number = <1>;
+ interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0xf9023000 0x1000>;
+ status = "disabled";
+ };
+
+ frame@f9024000 {
+ frame-number = <2>;
+ interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0xf9024000 0x1000>;
+ status = "disabled";
+ };
+
+ frame@f9025000 {
+ frame-number = <3>;
+ interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0xf9025000 0x1000>;
+ status = "disabled";
+ };
+
+ frame@f9026000 {
+ frame-number = <4>;
+ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0xf9026000 0x1000>;
+ status = "disabled";
+ };
+
+ frame@f9027000 {
+ frame-number = <5>;
+ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0xf9027000 0x1000>;
+ status = "disabled";
+ };
+
+ frame@f9028000 {
+ frame-number = <6>;
+ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0xf9028000 0x1000>;
+ status = "disabled";
+ };
+ };
+
+ acc0: power-manager@f9088000 {
+ compatible = "qcom,kpss-acc-v2";
+ reg = <0xf9088000 0x1000>, <0xf9008000 0x1000>;
+ };
+
+ saw0: power-manager@f9089000 {
+ compatible = "qcom,msm8226-saw2-v2.1-cpu", "qcom,saw2";
+ reg = <0xf9089000 0x1000>;
+ };
+
+ acc1: power-manager@f9098000 {
+ compatible = "qcom,kpss-acc-v2";
+ reg = <0xf9098000 0x1000>, <0xf9008000 0x1000>;
+ };
+
+ saw1: power-manager@f9099000 {
+ compatible = "qcom,msm8226-saw2-v2.1-cpu", "qcom,saw2";
+ reg = <0xf9099000 0x1000>;
+ };
+
+ acc2: power-manager@f90a8000 {
+ compatible = "qcom,kpss-acc-v2";
+ reg = <0xf90a8000 0x1000>, <0xf9008000 0x1000>;
+ };
+
+ saw2: power-manager@f90a9000 {
+ compatible = "qcom,msm8226-saw2-v2.1-cpu", "qcom,saw2";
+ reg = <0xf90a9000 0x1000>;
+ };
+
+ acc3: power-manager@f90b8000 {
+ compatible = "qcom,kpss-acc-v2";
+ reg = <0xf90b8000 0x1000>, <0xf9008000 0x1000>;
+ };
+
+ saw3: power-manager@f90b9000 {
+ compatible = "qcom,msm8226-saw2-v2.1-cpu", "qcom,saw2";
+ reg = <0xf90b9000 0x1000>;
+ };
+
sdhc_1: mmc@f9824900 {
compatible = "qcom,msm8226-sdhci", "qcom,sdhci-msm-v4";
reg = <0xf9824900 0x11c>, <0xf9824000 0x800>;
@@ -201,35 +363,35 @@
status = "disabled";
};
- sdhc_2: mmc@f98a4900 {
+ sdhc_3: mmc@f9864900 {
compatible = "qcom,msm8226-sdhci", "qcom,sdhci-msm-v4";
- reg = <0xf98a4900 0x11c>, <0xf98a4000 0x800>;
+ reg = <0xf9864900 0x11c>, <0xf9864000 0x800>;
reg-names = "hc", "core";
- interrupts = <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 224 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "hc_irq", "pwr_irq";
- clocks = <&gcc GCC_SDCC2_AHB_CLK>,
- <&gcc GCC_SDCC2_APPS_CLK>,
+ clocks = <&gcc GCC_SDCC3_AHB_CLK>,
+ <&gcc GCC_SDCC3_APPS_CLK>,
<&rpmcc RPM_SMD_XO_CLK_SRC>;
clock-names = "iface", "core", "xo";
pinctrl-names = "default";
- pinctrl-0 = <&sdhc2_default_state>;
+ pinctrl-0 = <&sdhc3_default_state>;
status = "disabled";
};
- sdhc_3: mmc@f9864900 {
+ sdhc_2: mmc@f98a4900 {
compatible = "qcom,msm8226-sdhci", "qcom,sdhci-msm-v4";
- reg = <0xf9864900 0x11c>, <0xf9864000 0x800>;
+ reg = <0xf98a4900 0x11c>, <0xf98a4000 0x800>;
reg-names = "hc", "core";
- interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 224 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "hc_irq", "pwr_irq";
- clocks = <&gcc GCC_SDCC3_AHB_CLK>,
- <&gcc GCC_SDCC3_APPS_CLK>,
+ clocks = <&gcc GCC_SDCC2_AHB_CLK>,
+ <&gcc GCC_SDCC2_APPS_CLK>,
<&rpmcc RPM_SMD_XO_CLK_SRC>;
clock-names = "iface", "core", "xo";
pinctrl-names = "default";
- pinctrl-0 = <&sdhc3_default_state>;
+ pinctrl-0 = <&sdhc2_default_state>;
status = "disabled";
};
@@ -272,7 +434,6 @@
};
blsp1_i2c1: i2c@f9923000 {
- status = "disabled";
compatible = "qcom,i2c-qup-v2.1.1";
reg = <0xf9923000 0x1000>;
interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
@@ -282,10 +443,10 @@
pinctrl-0 = <&blsp1_i2c1_pins>;
#address-cells = <1>;
#size-cells = <0>;
+ status = "disabled";
};
blsp1_i2c2: i2c@f9924000 {
- status = "disabled";
compatible = "qcom,i2c-qup-v2.1.1";
reg = <0xf9924000 0x1000>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
@@ -295,10 +456,10 @@
pinctrl-0 = <&blsp1_i2c2_pins>;
#address-cells = <1>;
#size-cells = <0>;
+ status = "disabled";
};
blsp1_i2c3: i2c@f9925000 {
- status = "disabled";
compatible = "qcom,i2c-qup-v2.1.1";
reg = <0xf9925000 0x1000>;
interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
@@ -308,10 +469,10 @@
pinctrl-0 = <&blsp1_i2c3_pins>;
#address-cells = <1>;
#size-cells = <0>;
+ status = "disabled";
};
blsp1_i2c4: i2c@f9926000 {
- status = "disabled";
compatible = "qcom,i2c-qup-v2.1.1";
reg = <0xf9926000 0x1000>;
interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
@@ -321,10 +482,10 @@
pinctrl-0 = <&blsp1_i2c4_pins>;
#address-cells = <1>;
#size-cells = <0>;
+ status = "disabled";
};
blsp1_i2c5: i2c@f9927000 {
- status = "disabled";
compatible = "qcom,i2c-qup-v2.1.1";
reg = <0xf9927000 0x1000>;
interrupts = <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>;
@@ -334,6 +495,7 @@
pinctrl-0 = <&blsp1_i2c5_pins>;
#address-cells = <1>;
#size-cells = <0>;
+ status = "disabled";
};
blsp1_i2c6: i2c@f9928000 {
@@ -351,33 +513,6 @@
status = "disabled";
};
- cci: cci@fda0c000 {
- compatible = "qcom,msm8226-cci";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0xfda0c000 0x1000>;
- interrupts = <GIC_SPI 50 IRQ_TYPE_EDGE_RISING>;
- clocks = <&mmcc CAMSS_TOP_AHB_CLK>,
- <&mmcc CAMSS_CCI_CCI_AHB_CLK>,
- <&mmcc CAMSS_CCI_CCI_CLK>;
- clock-names = "camss_top_ahb",
- "cci_ahb",
- "cci";
-
- pinctrl-names = "default", "sleep";
- pinctrl-0 = <&cci_default>;
- pinctrl-1 = <&cci_sleep>;
-
- status = "disabled";
-
- cci_i2c0: i2c-bus@0 {
- reg = <0>;
- clock-frequency = <400000>;
- #address-cells = <1>;
- #size-cells = <0>;
- };
- };
-
usb: usb@f9a55000 {
compatible = "qcom,ci-hdrc";
reg = <0xf9a55000 0x200>,
@@ -417,6 +552,18 @@
};
};
+ rng@f9bff000 {
+ compatible = "qcom,prng";
+ reg = <0xf9bff000 0x200>;
+ clocks = <&gcc GCC_PRNG_AHB_CLK>;
+ clock-names = "core";
+ };
+
+ sram@fc190000 {
+ compatible = "qcom,msm8226-rpm-stats";
+ reg = <0xfc190000 0x10000>;
+ };
+
gcc: clock-controller@fc400000 {
compatible = "qcom,gcc-msm8226";
reg = <0xfc400000 0x4000>;
@@ -430,146 +577,28 @@
"sleep_clk";
};
- mmcc: clock-controller@fd8c0000 {
- compatible = "qcom,mmcc-msm8226";
- reg = <0xfd8c0000 0x6000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- #power-domain-cells = <1>;
-
- clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
- <&gcc GCC_MMSS_GPLL0_CLK_SRC>,
- <&gcc GPLL0_VOTE>,
- <&gcc GPLL1_VOTE>,
- <&rpmcc RPM_SMD_GFX3D_CLK_SRC>,
- <&mdss_dsi0_phy 1>,
- <&mdss_dsi0_phy 0>;
- clock-names = "xo",
- "mmss_gpll0_vote",
- "gpll0_vote",
- "gpll1_vote",
- "gfx3d_clk_src",
- "dsi0pll",
- "dsi0pllbyte";
- };
-
- tlmm: pinctrl@fd510000 {
- compatible = "qcom,msm8226-pinctrl";
- reg = <0xfd510000 0x4000>;
- gpio-controller;
- #gpio-cells = <2>;
- gpio-ranges = <&tlmm 0 0 117>;
- interrupt-controller;
- #interrupt-cells = <2>;
- interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
-
- blsp1_i2c1_pins: blsp1-i2c1-state {
- pins = "gpio2", "gpio3";
- function = "blsp_i2c1";
- drive-strength = <2>;
- bias-disable;
- };
-
- blsp1_i2c2_pins: blsp1-i2c2-state {
- pins = "gpio6", "gpio7";
- function = "blsp_i2c2";
- drive-strength = <2>;
- bias-disable;
- };
-
- blsp1_i2c3_pins: blsp1-i2c3-state {
- pins = "gpio10", "gpio11";
- function = "blsp_i2c3";
- drive-strength = <2>;
- bias-disable;
- };
-
- blsp1_i2c4_pins: blsp1-i2c4-state {
- pins = "gpio14", "gpio15";
- function = "blsp_i2c4";
- drive-strength = <2>;
- bias-disable;
- };
-
- blsp1_i2c5_pins: blsp1-i2c5-state {
- pins = "gpio18", "gpio19";
- function = "blsp_i2c5";
- drive-strength = <2>;
- bias-disable;
- };
-
- blsp1_i2c6_pins: blsp1-i2c6-state {
- pins = "gpio22", "gpio23";
- function = "blsp_i2c6";
- drive-strength = <2>;
- bias-disable;
- };
-
- cci_default: cci-default-state {
- pins = "gpio29", "gpio30";
- function = "cci_i2c0";
-
- drive-strength = <2>;
- bias-disable;
- };
+ rpm_msg_ram: sram@fc428000 {
+ compatible = "qcom,rpm-msg-ram";
+ reg = <0xfc428000 0x4000>;
- cci_sleep: cci-sleep-state {
- pins = "gpio29", "gpio30";
- function = "gpio";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0xfc428000 0x4000>;
- drive-strength = <2>;
- bias-disable;
+ apss_master_stats: sram@150 {
+ reg = <0x150 0x14>;
};
- sdhc1_default_state: sdhc1-default-state {
- clk-pins {
- pins = "sdc1_clk";
- drive-strength = <10>;
- bias-disable;
- };
-
- cmd-data-pins {
- pins = "sdc1_cmd", "sdc1_data";
- drive-strength = <10>;
- bias-pull-up;
- };
+ mpss_master_stats: sram@b50 {
+ reg = <0xb50 0x14>;
};
- sdhc2_default_state: sdhc2-default-state {
- clk-pins {
- pins = "sdc2_clk";
- drive-strength = <10>;
- bias-disable;
- };
-
- cmd-data-pins {
- pins = "sdc2_cmd", "sdc2_data";
- drive-strength = <10>;
- bias-pull-up;
- };
+ lpss_master_stats: sram@1550 {
+ reg = <0x1550 0x14>;
};
- sdhc3_default_state: sdhc3-default-state {
- clk-pins {
- pins = "gpio44";
- function = "sdc3";
- drive-strength = <8>;
- bias-disable;
- };
-
- cmd-pins {
- pins = "gpio43";
- function = "sdc3";
- drive-strength = <8>;
- bias-pull-up;
- };
-
- data-pins {
- pins = "gpio39", "gpio40", "gpio41", "gpio42";
- function = "sdc3";
- drive-strength = <8>;
- bias-pull-up;
- };
+ pronto_master_stats: sram@1f50 {
+ reg = <0x1f50 0x14>;
};
};
@@ -714,170 +743,153 @@
#interrupt-cells = <4>;
};
- rng@f9bff000 {
- compatible = "qcom,prng";
- reg = <0xf9bff000 0x200>;
- clocks = <&gcc GCC_PRNG_AHB_CLK>;
- clock-names = "core";
+ tcsr_mutex: hwlock@fd484000 {
+ compatible = "qcom,msm8226-tcsr-mutex", "qcom,tcsr-mutex";
+ reg = <0xfd484000 0x1000>;
+ #hwlock-cells = <1>;
};
- timer@f9020000 {
- compatible = "arm,armv7-timer-mem";
- reg = <0xf9020000 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- frame@f9021000 {
- frame-number = <0>;
- interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
- reg = <0xf9021000 0x1000>,
- <0xf9022000 0x1000>;
- };
+ tlmm: pinctrl@fd510000 {
+ compatible = "qcom,msm8226-pinctrl";
+ reg = <0xfd510000 0x4000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-ranges = <&tlmm 0 0 117>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
- frame@f9023000 {
- frame-number = <1>;
- interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
- reg = <0xf9023000 0x1000>;
- status = "disabled";
+ blsp1_i2c1_pins: blsp1-i2c1-state {
+ pins = "gpio2", "gpio3";
+ function = "blsp_i2c1";
+ drive-strength = <2>;
+ bias-disable;
};
- frame@f9024000 {
- frame-number = <2>;
- interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
- reg = <0xf9024000 0x1000>;
- status = "disabled";
+ blsp1_i2c2_pins: blsp1-i2c2-state {
+ pins = "gpio6", "gpio7";
+ function = "blsp_i2c2";
+ drive-strength = <2>;
+ bias-disable;
};
- frame@f9025000 {
- frame-number = <3>;
- interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
- reg = <0xf9025000 0x1000>;
- status = "disabled";
+ blsp1_i2c3_pins: blsp1-i2c3-state {
+ pins = "gpio10", "gpio11";
+ function = "blsp_i2c3";
+ drive-strength = <2>;
+ bias-disable;
};
- frame@f9026000 {
- frame-number = <4>;
- interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
- reg = <0xf9026000 0x1000>;
- status = "disabled";
+ blsp1_i2c4_pins: blsp1-i2c4-state {
+ pins = "gpio14", "gpio15";
+ function = "blsp_i2c4";
+ drive-strength = <2>;
+ bias-disable;
};
- frame@f9027000 {
- frame-number = <5>;
- interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
- reg = <0xf9027000 0x1000>;
- status = "disabled";
+ blsp1_i2c5_pins: blsp1-i2c5-state {
+ pins = "gpio18", "gpio19";
+ function = "blsp_i2c5";
+ drive-strength = <2>;
+ bias-disable;
};
- frame@f9028000 {
- frame-number = <6>;
- interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
- reg = <0xf9028000 0x1000>;
- status = "disabled";
+ blsp1_i2c6_pins: blsp1-i2c6-state {
+ pins = "gpio22", "gpio23";
+ function = "blsp_i2c6";
+ drive-strength = <2>;
+ bias-disable;
};
- };
-
- sram@fc190000 {
- compatible = "qcom,msm8226-rpm-stats";
- reg = <0xfc190000 0x10000>;
- };
-
- rpm_msg_ram: sram@fc428000 {
- compatible = "qcom,rpm-msg-ram";
- reg = <0xfc428000 0x4000>;
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0xfc428000 0x4000>;
-
- apss_master_stats: sram@150 {
- reg = <0x150 0x14>;
- };
+ cci_default: cci-default-state {
+ pins = "gpio29", "gpio30";
+ function = "cci_i2c0";
- mpss_master_stats: sram@b50 {
- reg = <0xb50 0x14>;
+ drive-strength = <2>;
+ bias-disable;
};
- lpss_master_stats: sram@1550 {
- reg = <0x1550 0x14>;
- };
+ cci_sleep: cci-sleep-state {
+ pins = "gpio29", "gpio30";
+ function = "gpio";
- pronto_master_stats: sram@1f50 {
- reg = <0x1f50 0x14>;
+ drive-strength = <2>;
+ bias-disable;
};
- };
-
- tcsr_mutex: hwlock@fd484000 {
- compatible = "qcom,msm8226-tcsr-mutex", "qcom,tcsr-mutex";
- reg = <0xfd484000 0x1000>;
- #hwlock-cells = <1>;
- };
- adsp: remoteproc@fe200000 {
- compatible = "qcom,msm8226-adsp-pil";
- reg = <0xfe200000 0x100>;
-
- interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_EDGE_RISING>,
- <&adsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
- <&adsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>,
- <&adsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>,
- <&adsp_smp2p_in 3 IRQ_TYPE_EDGE_RISING>;
- interrupt-names = "wdog", "fatal", "ready", "handover", "stop-ack";
-
- power-domains = <&rpmpd MSM8226_VDDCX>;
- power-domain-names = "cx";
-
- clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>;
- clock-names = "xo";
-
- memory-region = <&adsp_region>;
-
- qcom,smem-states = <&adsp_smp2p_out 0>;
- qcom,smem-state-names = "stop";
-
- status = "disabled";
+ sdhc1_default_state: sdhc1-default-state {
+ clk-pins {
+ pins = "sdc1_clk";
+ drive-strength = <10>;
+ bias-disable;
+ };
- smd-edge {
- interrupts = <GIC_SPI 156 IRQ_TYPE_EDGE_RISING>;
+ cmd-data-pins {
+ pins = "sdc1_cmd", "sdc1_data";
+ drive-strength = <10>;
+ bias-pull-up;
+ };
+ };
- qcom,ipc = <&apcs 8 8>;
- qcom,smd-edge = <1>;
+ sdhc2_default_state: sdhc2-default-state {
+ clk-pins {
+ pins = "sdc2_clk";
+ drive-strength = <10>;
+ bias-disable;
+ };
- label = "lpass";
+ cmd-data-pins {
+ pins = "sdc2_cmd", "sdc2_data";
+ drive-strength = <10>;
+ bias-pull-up;
+ };
};
- };
- sram@fdd00000 {
- compatible = "qcom,msm8226-ocmem";
- reg = <0xfdd00000 0x2000>,
- <0xfec00000 0x20000>;
- reg-names = "ctrl", "mem";
- ranges = <0 0xfec00000 0x20000>;
- clocks = <&rpmcc RPM_SMD_OCMEMGX_CLK>;
- clock-names = "core";
+ sdhc3_default_state: sdhc3-default-state {
+ clk-pins {
+ pins = "gpio44";
+ function = "sdc3";
+ drive-strength = <8>;
+ bias-disable;
+ };
- #address-cells = <1>;
- #size-cells = <1>;
+ cmd-pins {
+ pins = "gpio43";
+ function = "sdc3";
+ drive-strength = <8>;
+ bias-pull-up;
+ };
- gmu_sram: gmu-sram@0 {
- reg = <0x0 0x20000>;
+ data-pins {
+ pins = "gpio39", "gpio40", "gpio41", "gpio42";
+ function = "sdc3";
+ drive-strength = <8>;
+ bias-pull-up;
+ };
};
};
- sram@fe805000 {
- compatible = "qcom,msm8226-imem", "syscon", "simple-mfd";
- reg = <0xfe805000 0x1000>;
-
- reboot-mode {
- compatible = "syscon-reboot-mode";
- offset = <0x65c>;
+ mmcc: clock-controller@fd8c0000 {
+ compatible = "qcom,mmcc-msm8226";
+ reg = <0xfd8c0000 0x6000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
- mode-bootloader = <0x77665500>;
- mode-normal = <0x77665501>;
- mode-recovery = <0x77665502>;
- };
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_MMSS_GPLL0_CLK_SRC>,
+ <&gcc GPLL0_VOTE>,
+ <&gcc GPLL1_VOTE>,
+ <&rpmcc RPM_SMD_GFX3D_CLK_SRC>,
+ <&mdss_dsi0_phy 1>,
+ <&mdss_dsi0_phy 0>;
+ clock-names = "xo",
+ "mmss_gpll0_vote",
+ "gpll0_vote",
+ "gpll1_vote",
+ "gfx3d_clk_src",
+ "dsi0pll",
+ "dsi0pllbyte";
};
mdss: display-subsystem@fd900000 {
@@ -1007,6 +1019,33 @@
};
};
+ cci: cci@fda0c000 {
+ compatible = "qcom,msm8226-cci";
+ reg = <0xfda0c000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <GIC_SPI 50 IRQ_TYPE_EDGE_RISING>;
+ clocks = <&mmcc CAMSS_TOP_AHB_CLK>,
+ <&mmcc CAMSS_CCI_CCI_AHB_CLK>,
+ <&mmcc CAMSS_CCI_CCI_CLK>;
+ clock-names = "camss_top_ahb",
+ "cci_ahb",
+ "cci";
+
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&cci_default>;
+ pinctrl-1 = <&cci_sleep>;
+
+ status = "disabled";
+
+ cci_i2c0: i2c-bus@0 {
+ reg = <0>;
+ clock-frequency = <400000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
gpu: adreno@fdb00000 {
compatible = "qcom,adreno-305.18", "qcom,adreno";
reg = <0xfdb00000 0x10000>;
@@ -1046,6 +1085,71 @@
};
};
};
+
+ sram@fdd00000 {
+ compatible = "qcom,msm8226-ocmem";
+ reg = <0xfdd00000 0x2000>,
+ <0xfec00000 0x20000>;
+ reg-names = "ctrl", "mem";
+ ranges = <0 0xfec00000 0x20000>;
+ clocks = <&rpmcc RPM_SMD_OCMEMGX_CLK>;
+ clock-names = "core";
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ gmu_sram: gmu-sram@0 {
+ reg = <0x0 0x20000>;
+ };
+ };
+
+ adsp: remoteproc@fe200000 {
+ compatible = "qcom,msm8226-adsp-pil";
+ reg = <0xfe200000 0x100>;
+
+ interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_EDGE_RISING>,
+ <&adsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
+ <&adsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>,
+ <&adsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>,
+ <&adsp_smp2p_in 3 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "wdog", "fatal", "ready", "handover", "stop-ack";
+
+ power-domains = <&rpmpd MSM8226_VDDCX>;
+ power-domain-names = "cx";
+
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>;
+ clock-names = "xo";
+
+ memory-region = <&adsp_region>;
+
+ qcom,smem-states = <&adsp_smp2p_out 0>;
+ qcom,smem-state-names = "stop";
+
+ status = "disabled";
+
+ smd-edge {
+ interrupts = <GIC_SPI 156 IRQ_TYPE_EDGE_RISING>;
+
+ qcom,ipc = <&apcs 8 8>;
+ qcom,smd-edge = <1>;
+
+ label = "lpass";
+ };
+ };
+
+ sram@fe805000 {
+ compatible = "qcom,msm8226-imem", "syscon", "simple-mfd";
+ reg = <0xfe805000 0x1000>;
+
+ reboot-mode {
+ compatible = "syscon-reboot-mode";
+ offset = <0x65c>;
+
+ mode-bootloader = <0x77665500>;
+ mode-normal = <0x77665501>;
+ mode-recovery = <0x77665502>;
+ };
+ };
};
thermal-zones {
diff --git a/arch/arm/boot/dts/qcom/qcom-msm8660.dtsi b/arch/arm/boot/dts/qcom/qcom-msm8660.dtsi
index a7c245b9c8f9..455ba4bf1bf4 100644
--- a/arch/arm/boot/dts/qcom/qcom-msm8660.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-msm8660.dtsi
@@ -47,7 +47,7 @@
cpu-pmu {
compatible = "qcom,scorpion-mp-pmu";
- interrupts = <1 9 0x304>;
+ interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>;
};
clocks {
@@ -89,12 +89,11 @@
timer@2000000 {
compatible = "qcom,scss-timer", "qcom,msm-timer";
- interrupts = <1 0 0x301>,
- <1 1 0x301>,
- <1 2 0x301>;
+ interrupts = <GIC_PPI 0 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>,
+ <GIC_PPI 1 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>,
+ <GIC_PPI 2 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>;
reg = <0x02000000 0x100>;
- clock-frequency = <27000000>,
- <32768>;
+ clock-frequency = <27000000>;
cpu-offset = <0x40000>;
};
@@ -105,7 +104,7 @@
gpio-controller;
gpio-ranges = <&tlmm 0 0 173>;
#gpio-cells = <2>;
- interrupts = <0 16 0x4>;
+ interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
interrupt-controller;
#interrupt-cells = <2>;
@@ -283,7 +282,7 @@
compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
reg = <0x19c40000 0x1000>,
<0x19c00000 0x1000>;
- interrupts = <0 195 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 195 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GSBI12_UART_CLK>, <&gcc GSBI12_H_CLK>;
clock-names = "core", "iface";
status = "disabled";
@@ -292,7 +291,7 @@
gsbi12_i2c: i2c@19c80000 {
compatible = "qcom,i2c-qup-v1.1.1";
reg = <0x19c80000 0x1000>;
- interrupts = <0 196 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 196 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GSBI12_QUP_CLK>, <&gcc GSBI12_H_CLK>;
clock-names = "core", "iface";
#address-cells = <1>;
diff --git a/arch/arm/boot/dts/qcom/qcom-msm8926-htc-memul.dts b/arch/arm/boot/dts/qcom/qcom-msm8926-htc-memul.dts
index ed328b24335f..3037344eb240 100644
--- a/arch/arm/boot/dts/qcom/qcom-msm8926-htc-memul.dts
+++ b/arch/arm/boot/dts/qcom/qcom-msm8926-htc-memul.dts
@@ -107,7 +107,20 @@
};
unknown@fb00000 {
- reg = <0x0fb00000 0x1b00000>;
+ reg = <0x0fb00000 0x280000>;
+ no-map;
+ };
+
+ rmtfs@fd80000 {
+ compatible = "qcom,rmtfs-mem";
+ reg = <0x0fd80000 0x180000>;
+ no-map;
+
+ qcom,client-id = <1>;
+ };
+
+ unknown@ff00000 {
+ reg = <0x0ff00000 0x1700000>;
no-map;
};
};
diff --git a/arch/arm/boot/dts/qcom/qcom-msm8926-samsung-matisselte.dts b/arch/arm/boot/dts/qcom/qcom-msm8926-samsung-matisselte.dts
new file mode 100644
index 000000000000..d0e1bc39f8ef
--- /dev/null
+++ b/arch/arm/boot/dts/qcom/qcom-msm8926-samsung-matisselte.dts
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2022, Matti Lehtimäki <matti.lehtimaki@gmail.com>
+ * Copyright (c) 2023, Stefan Hansson <newbyte@postmarketos.org>
+ */
+
+/dts-v1/;
+
+#include "qcom-msm8226-samsung-matisse-common.dtsi"
+
+/ {
+ model = "Samsung Galaxy Tab 4 10.1 LTE";
+ compatible = "samsung,matisselte", "qcom,msm8926", "qcom,msm8226";
+ chassis-type = "tablet";
+
+ reg_tsp_3p3v: regulator-tsp-3p3v {
+ compatible = "regulator-fixed";
+ regulator-name = "tsp_3p3v";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+ gpio = <&tlmm 32 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&tsp_en1_default_state>;
+ };
+};
+
+&tlmm {
+ tsp_en1_default_state: tsp-en1-default-state {
+ pins = "gpio32";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+};
diff --git a/arch/arm/boot/dts/qcom/qcom-msm8960-pins.dtsi b/arch/arm/boot/dts/qcom/qcom-msm8960-pins.dtsi
new file mode 100644
index 000000000000..4fa982771288
--- /dev/null
+++ b/arch/arm/boot/dts/qcom/qcom-msm8960-pins.dtsi
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+&msmgpio {
+ i2c3_default_state: i2c3-default-state {
+ i2c3-pins {
+ pins = "gpio16", "gpio17";
+ function = "gsbi3";
+ drive-strength = <8>;
+ bias-disable;
+ };
+ };
+
+ i2c3_sleep_state: i2c3-sleep-state {
+ i2c3-pins {
+ pins = "gpio16", "gpio17";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-bus-hold;
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/qcom/qcom-msm8960-samsung-expressatt.dts b/arch/arm/boot/dts/qcom/qcom-msm8960-samsung-expressatt.dts
index 1a5116336ff0..af6cc6393d74 100644
--- a/arch/arm/boot/dts/qcom/qcom-msm8960-samsung-expressatt.dts
+++ b/arch/arm/boot/dts/qcom/qcom-msm8960-samsung-expressatt.dts
@@ -4,6 +4,9 @@
#include "qcom-msm8960.dtsi"
#include "pm8921.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/pinctrl/qcom,pmic-gpio.h>
+#include <dt-bindings/input/gpio-keys.h>
/ {
model = "Samsung Galaxy Express SGH-I437";
@@ -19,6 +22,36 @@
chosen {
stdout-path = "serial0:115200n8";
};
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&gpio_keys_pin_a>;
+
+ key-home {
+ label = "Home";
+ gpios = <&msmgpio 40 GPIO_ACTIVE_LOW>;
+ debounce-interval = <5>;
+ linux,code = <KEY_HOMEPAGE>;
+ wakeup-event-action = <EV_ACT_ASSERTED>;
+ wakeup-source;
+ };
+
+ key-volume-up {
+ label = "Volume Up";
+ gpios = <&msmgpio 50 GPIO_ACTIVE_LOW>;
+ debounce-interval = <5>;
+ linux,code = <KEY_VOLUMEUP>;
+ };
+
+ key-volume-down {
+ label = "Volume Down";
+ gpios = <&msmgpio 81 GPIO_ACTIVE_LOW>;
+ debounce-interval = <5>;
+ linux,code = <KEY_VOLUMEDOWN>;
+ };
+ };
};
&gsbi5 {
@@ -52,6 +85,27 @@
status = "okay";
};
+&gsbi3 {
+ qcom,mode = <GSBI_PROT_I2C>;
+ status = "okay";
+};
+
+&gsbi3_i2c {
+ status = "okay";
+
+ // Atmel mXT224S touchscreen
+ touchscreen@4a {
+ compatible = "atmel,maxtouch";
+ reg = <0x4a>;
+ interrupt-parent = <&msmgpio>;
+ interrupts = <11 IRQ_TYPE_EDGE_FALLING>;
+ vdda-supply = <&pm8921_lvs6>;
+ vdd-supply = <&pm8921_l17>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&touchscreen>;
+ };
+};
+
&msmgpio {
spi1_default: spi1-default-state {
mosi-pins {
@@ -83,6 +137,21 @@
bias-disable;
};
};
+
+ gpio_keys_pin_a: gpio-keys-active-state {
+ pins = "gpio40", "gpio50", "gpio81";
+ function = "gpio";
+ drive-strength = <8>;
+ bias-disable;
+ };
+
+ touchscreen: touchscreen-int-state {
+ pins = "gpio11";
+ function = "gpio";
+ output-enable;
+ bias-disable;
+ drive-strength = <2>;
+ };
};
&pm8921 {
@@ -245,7 +314,7 @@
};
pm8921_l17: l17 {
- regulator-min-microvolt = <1800000>;
+ regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
bias-pull-down;
};
diff --git a/arch/arm/boot/dts/qcom/qcom-msm8960.dtsi b/arch/arm/boot/dts/qcom/qcom-msm8960.dtsi
index f420740e068e..922f9e49468a 100644
--- a/arch/arm/boot/dts/qcom/qcom-msm8960.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-msm8960.dtsi
@@ -220,16 +220,24 @@
#clock-cells = <0>;
};
- saw0: regulator@2089000 {
- compatible = "qcom,saw2";
+ saw0: power-manager@2089000 {
+ compatible = "qcom,msm8960-saw2-cpu", "qcom,saw2";
reg = <0x02089000 0x1000>, <0x02009000 0x1000>;
- regulator;
+
+ saw0_vreg: regulator {
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1300000>;
+ };
};
- saw1: regulator@2099000 {
- compatible = "qcom,saw2";
+ saw1: power-manager@2099000 {
+ compatible = "qcom,msm8960-saw2-cpu", "qcom,saw2";
reg = <0x02099000 0x1000>, <0x02009000 0x1000>;
- regulator;
+
+ saw1_vreg: regulator {
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1300000>;
+ };
};
gsbi5: gsbi@16400000 {
@@ -359,5 +367,33 @@
};
};
};
+
+ gsbi3: gsbi@16200000 {
+ compatible = "qcom,gsbi-v1.0.0";
+ reg = <0x16200000 0x100>;
+ ranges;
+ cell-index = <3>;
+ clocks = <&gcc GSBI3_H_CLK>;
+ clock-names = "iface";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ status = "disabled";
+
+ gsbi3_i2c: i2c@16280000 {
+ compatible = "qcom,i2c-qup-v1.1.1";
+ reg = <0x16280000 0x1000>;
+ pinctrl-0 = <&i2c3_default_state>;
+ pinctrl-1 = <&i2c3_sleep_state>;
+ pinctrl-names = "default", "sleep";
+ interrupts = <GIC_SPI 151 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&gcc GSBI3_QUP_CLK>,
+ <&gcc GSBI3_H_CLK>;
+ clock-names = "core", "iface";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ };
};
};
+#include "qcom-msm8960-pins.dtsi"
diff --git a/arch/arm/boot/dts/qcom/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom/qcom-msm8974.dtsi
index b1413983787c..5efc38d712cc 100644
--- a/arch/arm/boot/dts/qcom/qcom-msm8974.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-msm8974.dtsi
@@ -31,7 +31,7 @@
cpus {
#address-cells = <1>;
#size-cells = <0>;
- interrupts = <GIC_PPI 9 0xf04>;
+ interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
CPU0: cpu@0 {
compatible = "qcom,krait";
@@ -110,7 +110,7 @@
pmu {
compatible = "qcom,krait-pmu";
- interrupts = <GIC_PPI 7 0xf04>;
+ interrupts = <GIC_PPI 7 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
};
rpm: remoteproc {
@@ -346,10 +346,9 @@
reg = <0xf9011000 0x1000>;
};
- saw_l2: power-controller@f9012000 {
- compatible = "qcom,saw2";
+ saw_l2: power-manager@f9012000 {
+ compatible = "qcom,msm8974-saw2-v2.1-l2", "qcom,saw2";
reg = <0xf9012000 0x1000>;
- regulator;
};
watchdog@f9017000 {
@@ -424,7 +423,7 @@
reg = <0xf9088000 0x1000>, <0xf9008000 0x1000>;
};
- saw0: power-controller@f9089000 {
+ saw0: power-manager@f9089000 {
compatible = "qcom,msm8974-saw2-v2.1-cpu", "qcom,saw2";
reg = <0xf9089000 0x1000>, <0xf9009000 0x1000>;
};
@@ -434,7 +433,7 @@
reg = <0xf9098000 0x1000>, <0xf9008000 0x1000>;
};
- saw1: power-controller@f9099000 {
+ saw1: power-manager@f9099000 {
compatible = "qcom,msm8974-saw2-v2.1-cpu", "qcom,saw2";
reg = <0xf9099000 0x1000>, <0xf9009000 0x1000>;
};
@@ -444,7 +443,7 @@
reg = <0xf90a8000 0x1000>, <0xf9008000 0x1000>;
};
- saw2: power-controller@f90a9000 {
+ saw2: power-manager@f90a9000 {
compatible = "qcom,msm8974-saw2-v2.1-cpu", "qcom,saw2";
reg = <0xf90a9000 0x1000>, <0xf9009000 0x1000>;
};
@@ -454,7 +453,7 @@
reg = <0xf90b8000 0x1000>, <0xf9008000 0x1000>;
};
- saw3: power-controller@f90b9000 {
+ saw3: power-manager@f90b9000 {
compatible = "qcom,msm8974-saw2-v2.1-cpu", "qcom,saw2";
reg = <0xf90b9000 0x1000>, <0xf9009000 0x1000>;
};
@@ -538,7 +537,7 @@
status = "disabled";
compatible = "qcom,i2c-qup-v2.1.1";
reg = <0xf9923000 0x1000>;
- interrupts = <0 95 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GCC_BLSP1_QUP1_I2C_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>;
clock-names = "core", "iface";
pinctrl-names = "default", "sleep";
@@ -566,7 +565,7 @@
status = "disabled";
compatible = "qcom,i2c-qup-v2.1.1";
reg = <0xf9925000 0x1000>;
- interrupts = <0 97 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GCC_BLSP1_QUP3_I2C_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>;
clock-names = "core", "iface";
pinctrl-names = "default", "sleep";
@@ -666,7 +665,7 @@
status = "disabled";
compatible = "qcom,i2c-qup-v2.1.1";
reg = <0xf9968000 0x1000>;
- interrupts = <0 106 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GCC_BLSP2_QUP6_I2C_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
clock-names = "core", "iface";
pinctrl-names = "default", "sleep";
@@ -1234,7 +1233,7 @@
qfprom: qfprom@fc4bc000 {
compatible = "qcom,msm8974-qfprom", "qcom,qfprom";
- reg = <0xfc4bc000 0x1000>;
+ reg = <0xfc4bc000 0x2100>;
#address-cells = <1>;
#size-cells = <1>;
@@ -2403,10 +2402,10 @@
timer {
compatible = "arm,armv7-timer";
- interrupts = <GIC_PPI 2 0xf08>,
- <GIC_PPI 3 0xf08>,
- <GIC_PPI 4 0xf08>,
- <GIC_PPI 1 0xf08>;
+ interrupts = <GIC_PPI 2 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 3 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 4 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 1 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
clock-frequency = <19200000>;
};
};
diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
index 2045fc779f88..edc9aaf828c8 100644
--- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
@@ -340,10 +340,10 @@
"msi8";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
- interrupt-map = <0 0 0 1 &intc 0 0 0 141 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
- <0 0 0 2 &intc 0 0 0 142 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
- <0 0 0 3 &intc 0 0 0 143 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
- <0 0 0 4 &intc 0 0 0 144 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
+ interrupt-map = <0 0 0 1 &intc 0 141 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
+ <0 0 0 2 &intc 0 142 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
+ <0 0 0 3 &intc 0 143 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
+ <0 0 0 4 &intc 0 144 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
clocks = <&gcc GCC_PCIE_PIPE_CLK>,
<&gcc GCC_PCIE_AUX_CLK>,
@@ -580,12 +580,16 @@
<&gcc GCC_USB30_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 51 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 10 IRQ_TYPE_EDGE_BOTH>,
<&pdc 11 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 10 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq",
- "dm_hs_phy_irq", "dp_hs_phy_irq";
+ <&pdc 51 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
power-domains = <&gcc USB30_GDSC>;
@@ -727,57 +731,57 @@
frame@17821000 {
frame-number = <0>;
- interrupts = <GIC_SPI 7 0x4>,
- <GIC_SPI 6 0x4>;
+ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17821000 0x1000>,
<0x17822000 0x1000>;
};
frame@17823000 {
frame-number = <1>;
- interrupts = <GIC_SPI 8 0x4>;
+ interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17823000 0x1000>;
status = "disabled";
};
frame@17824000 {
frame-number = <2>;
- interrupts = <GIC_SPI 9 0x4>;
+ interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17824000 0x1000>;
status = "disabled";
};
frame@17825000 {
frame-number = <3>;
- interrupts = <GIC_SPI 10 0x4>;
+ interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17825000 0x1000>;
status = "disabled";
};
frame@17826000 {
frame-number = <4>;
- interrupts = <GIC_SPI 11 0x4>;
+ interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17826000 0x1000>;
status = "disabled";
};
frame@17827000 {
frame-number = <5>;
- interrupts = <GIC_SPI 12 0x4>;
+ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17827000 0x1000>;
status = "disabled";
};
frame@17828000 {
frame-number = <6>;
- interrupts = <GIC_SPI 13 0x4>;
+ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17828000 0x1000>;
status = "disabled";
};
frame@17829000 {
frame-number = <7>;
- interrupts = <GIC_SPI 14 0x4>;
+ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17829000 0x1000>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/qcom/qcom-sdx65.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx65.dtsi
index 40591a4da6a4..a949454212e9 100644
--- a/arch/arm/boot/dts/qcom/qcom-sdx65.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-sdx65.dtsi
@@ -492,23 +492,25 @@
clocks = <&gcc GCC_USB30_SLV_AHB_CLK>,
<&gcc GCC_USB30_MASTER_CLK>,
<&gcc GCC_USB30_MSTR_AXI_CLK>,
- <&gcc GCC_USB30_MOCK_UTMI_CLK>,
- <&gcc GCC_USB30_SLEEP_CLK>;
- clock-names = "cfg_noc", "core", "iface", "mock_utmi",
- "sleep";
+ <&gcc GCC_USB30_SLEEP_CLK>,
+ <&gcc GCC_USB30_MOCK_UTMI_CLK>;
+ clock-names = "cfg_noc", "core", "iface", "sleep",
+ "mock_utmi";
assigned-clocks = <&gcc GCC_USB30_MOCK_UTMI_CLK>,
<&gcc GCC_USB30_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 76 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 19 IRQ_TYPE_EDGE_BOTH>,
<&pdc 18 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 19 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq",
- "ss_phy_irq",
+ <&pdc 76 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
"dm_hs_phy_irq",
- "dp_hs_phy_irq";
+ "ss_phy_irq";
power-domains = <&gcc USB30_GDSC>;
@@ -667,57 +669,57 @@
frame@17821000 {
frame-number = <0>;
- interrupts = <GIC_SPI 7 0x4>,
- <GIC_SPI 6 0x4>;
+ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17821000 0x1000>,
<0x17822000 0x1000>;
};
frame@17823000 {
frame-number = <1>;
- interrupts = <GIC_SPI 8 0x4>;
+ interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17823000 0x1000>;
status = "disabled";
};
frame@17824000 {
frame-number = <2>;
- interrupts = <GIC_SPI 9 0x4>;
+ interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17824000 0x1000>;
status = "disabled";
};
frame@17825000 {
frame-number = <3>;
- interrupts = <GIC_SPI 10 0x4>;
+ interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17825000 0x1000>;
status = "disabled";
};
frame@17826000 {
frame-number = <4>;
- interrupts = <GIC_SPI 11 0x4>;
+ interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17826000 0x1000>;
status = "disabled";
};
frame@17827000 {
frame-number = <5>;
- interrupts = <GIC_SPI 12 0x4>;
+ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17827000 0x1000>;
status = "disabled";
};
frame@17828000 {
frame-number = <6>;
- interrupts = <GIC_SPI 13 0x4>;
+ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17828000 0x1000>;
status = "disabled";
};
frame@17829000 {
frame-number = <7>;
- interrupts = <GIC_SPI 14 0x4>;
+ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x17829000 0x1000>;
status = "disabled";
};
@@ -804,10 +806,10 @@
timer {
compatible = "arm,armv7-timer";
- interrupts = <1 13 0xf08>,
- <1 12 0xf08>,
- <1 10 0xf08>,
- <1 11 0xf08>;
+ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 12 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
clock-frequency = <19200000>;
};
};
diff --git a/arch/arm/boot/dts/renesas/r8a73a4-ape6evm.dts b/arch/arm/boot/dts/renesas/r8a73a4-ape6evm.dts
index ed75c01dbee1..3d02f065f71c 100644
--- a/arch/arm/boot/dts/renesas/r8a73a4-ape6evm.dts
+++ b/arch/arm/boot/dts/renesas/r8a73a4-ape6evm.dts
@@ -209,6 +209,18 @@
status = "okay";
};
+&extal1_clk {
+ clock-frequency = <26000000>;
+};
+
+&extal2_clk {
+ clock-frequency = <48000000>;
+};
+
+&extalr_clk {
+ clock-frequency = <32768>;
+};
+
&pfc {
scifa0_pins: scifa0 {
groups = "scifa0_data";
diff --git a/arch/arm/boot/dts/renesas/r8a73a4.dtsi b/arch/arm/boot/dts/renesas/r8a73a4.dtsi
index c39066967053..ac654ff45d0e 100644
--- a/arch/arm/boot/dts/renesas/r8a73a4.dtsi
+++ b/arch/arm/boot/dts/renesas/r8a73a4.dtsi
@@ -450,17 +450,20 @@
extalr_clk: extalr {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <32768>;
+ /* This value must be overridden by the board. */
+ clock-frequency = <0>;
};
extal1_clk: extal1 {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <25000000>;
+ /* This value must be overridden by the board. */
+ clock-frequency = <0>;
};
extal2_clk: extal2 {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <48000000>;
+ /* This value must be overridden by the board. */
+ clock-frequency = <0>;
};
fsiack_clk: fsiack {
compatible = "fixed-clock";
@@ -621,6 +624,13 @@
clock-div = <2>;
clock-mult = <1>;
};
+ cp_clk: cp {
+ compatible = "fixed-factor-clock";
+ clocks = <&main_div2_clk>;
+ #clock-cells = <0>;
+ clock-div = <1>;
+ clock-mult = <1>;
+ };
pll0_div2_clk: pll0_div2 {
compatible = "fixed-factor-clock";
clocks = <&cpg_clocks R8A73A4_CLK_PLL0>;
@@ -686,9 +696,8 @@
mstp4_clks: mstp4_clks@e6150140 {
compatible = "renesas,r8a73a4-mstp-clocks", "renesas,cpg-mstp-clocks";
reg = <0 0xe6150140 0 4>, <0 0xe615004c 0 4>;
- clocks = <&main_div2_clk>, <&cpg_clocks R8A73A4_CLK_ZS>,
- <&main_div2_clk>,
- <&cpg_clocks R8A73A4_CLK_HP>,
+ clocks = <&cp_clk>, <&cpg_clocks R8A73A4_CLK_ZS>,
+ <&cp_clk>, <&cpg_clocks R8A73A4_CLK_HP>,
<&cpg_clocks R8A73A4_CLK_HP>;
#clock-cells = <1>;
clock-indices = <
@@ -702,7 +711,7 @@
mstp5_clks: mstp5_clks@e6150144 {
compatible = "renesas,r8a73a4-mstp-clocks", "renesas,cpg-mstp-clocks";
reg = <0 0xe6150144 0 4>, <0 0xe615003c 0 4>;
- clocks = <&extal2_clk>, <&cpg_clocks R8A73A4_CLK_HP>;
+ clocks = <&cp_clk>, <&cpg_clocks R8A73A4_CLK_HP>;
#clock-cells = <1>;
clock-indices = <
R8A73A4_CLK_THERMAL R8A73A4_CLK_IIC8
diff --git a/arch/arm/boot/dts/renesas/r8a7740.dtsi b/arch/arm/boot/dts/renesas/r8a7740.dtsi
index 55884ec701f8..d13ab86c3ab4 100644
--- a/arch/arm/boot/dts/renesas/r8a7740.dtsi
+++ b/arch/arm/boot/dts/renesas/r8a7740.dtsi
@@ -459,6 +459,7 @@
interrupts = <GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 199 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 200 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&mstp1_clks R8A7740_CLK_TMU0>;
clock-names = "fck";
power-domains = <&pd_a4r>;
@@ -474,6 +475,7 @@
interrupts = <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&mstp1_clks R8A7740_CLK_TMU1>;
clock-names = "fck";
power-domains = <&pd_a4r>;
diff --git a/arch/arm/boot/dts/renesas/r8a7778.dtsi b/arch/arm/boot/dts/renesas/r8a7778.dtsi
index 8d4530ed2fc6..b80e832c9277 100644
--- a/arch/arm/boot/dts/renesas/r8a7778.dtsi
+++ b/arch/arm/boot/dts/renesas/r8a7778.dtsi
@@ -199,7 +199,9 @@
reg = <0xffd80000 0x30>;
interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&mstp0_clks R8A7778_CLK_TMU0>;
clock-names = "fck";
power-domains = <&cpg_clocks>;
@@ -214,7 +216,9 @@
reg = <0xffd81000 0x30>;
interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&mstp0_clks R8A7778_CLK_TMU1>;
clock-names = "fck";
power-domains = <&cpg_clocks>;
@@ -230,6 +234,7 @@
interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&mstp0_clks R8A7778_CLK_TMU2>;
clock-names = "fck";
power-domains = <&cpg_clocks>;
@@ -250,6 +255,8 @@
reg = <0xffd90000 0x1000>, /* SRU */
<0xffd91000 0x240>, /* SSI */
<0xfffe0000 0x24>; /* ADG */
+ reg-names = "sru", "ssi", "adg";
+
clocks = <&mstp3_clks R8A7778_CLK_SSI8>,
<&mstp3_clks R8A7778_CLK_SSI7>,
<&mstp3_clks R8A7778_CLK_SSI6>,
diff --git a/arch/arm/boot/dts/renesas/r8a7779.dtsi b/arch/arm/boot/dts/renesas/r8a7779.dtsi
index 7743af5e2a6f..1944703cba4f 100644
--- a/arch/arm/boot/dts/renesas/r8a7779.dtsi
+++ b/arch/arm/boot/dts/renesas/r8a7779.dtsi
@@ -402,7 +402,9 @@
reg = <0xffd80000 0x30>;
interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&mstp0_clks R8A7779_CLK_TMU0>;
clock-names = "fck";
power-domains = <&sysc R8A7779_PD_ALWAYS_ON>;
@@ -417,7 +419,9 @@
reg = <0xffd81000 0x30>;
interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&mstp0_clks R8A7779_CLK_TMU1>;
clock-names = "fck";
power-domains = <&sysc R8A7779_PD_ALWAYS_ON>;
@@ -433,6 +437,7 @@
interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&mstp0_clks R8A7779_CLK_TMU2>;
clock-names = "fck";
power-domains = <&sysc R8A7779_PD_ALWAYS_ON>;
diff --git a/arch/arm/boot/dts/renesas/r8a7790-lager.dts b/arch/arm/boot/dts/renesas/r8a7790-lager.dts
index 2fba4d084001..8590981245a6 100644
--- a/arch/arm/boot/dts/renesas/r8a7790-lager.dts
+++ b/arch/arm/boot/dts/renesas/r8a7790-lager.dts
@@ -447,6 +447,7 @@
interrupt-parent = <&irqc0>;
interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
+ #interrupt-cells = <2>;
rtc {
compatible = "dlg,da9063-rtc";
diff --git a/arch/arm/boot/dts/renesas/r8a7790-stout.dts b/arch/arm/boot/dts/renesas/r8a7790-stout.dts
index f9bc5b4f019d..683f7395fab0 100644
--- a/arch/arm/boot/dts/renesas/r8a7790-stout.dts
+++ b/arch/arm/boot/dts/renesas/r8a7790-stout.dts
@@ -347,6 +347,7 @@
interrupt-parent = <&irqc0>;
interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
+ #interrupt-cells = <2>;
onkey {
compatible = "dlg,da9063-onkey";
diff --git a/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts b/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts
index e9c13bb03772..0efd9f98c75a 100644
--- a/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts
+++ b/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts
@@ -819,6 +819,7 @@
interrupt-parent = <&irqc0>;
interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
+ #interrupt-cells = <2>;
rtc {
compatible = "dlg,da9063-rtc";
diff --git a/arch/arm/boot/dts/renesas/r8a7791-porter.dts b/arch/arm/boot/dts/renesas/r8a7791-porter.dts
index 7e8bc06715f6..93c86e921645 100644
--- a/arch/arm/boot/dts/renesas/r8a7791-porter.dts
+++ b/arch/arm/boot/dts/renesas/r8a7791-porter.dts
@@ -413,6 +413,7 @@
interrupt-parent = <&irqc0>;
interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
+ #interrupt-cells = <2>;
watchdog {
compatible = "dlg,da9063-watchdog";
diff --git a/arch/arm/boot/dts/renesas/r8a7792-blanche.dts b/arch/arm/boot/dts/renesas/r8a7792-blanche.dts
index 4f9838cf97ee..540a9ad28f28 100644
--- a/arch/arm/boot/dts/renesas/r8a7792-blanche.dts
+++ b/arch/arm/boot/dts/renesas/r8a7792-blanche.dts
@@ -381,6 +381,7 @@
interrupt-parent = <&irqc>;
interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
+ #interrupt-cells = <2>;
rtc {
compatible = "dlg,da9063-rtc";
diff --git a/arch/arm/boot/dts/renesas/r8a7793-gose.dts b/arch/arm/boot/dts/renesas/r8a7793-gose.dts
index 1744fdbf9e0c..1ea6c757893b 100644
--- a/arch/arm/boot/dts/renesas/r8a7793-gose.dts
+++ b/arch/arm/boot/dts/renesas/r8a7793-gose.dts
@@ -759,6 +759,7 @@
interrupt-parent = <&irqc0>;
interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
+ #interrupt-cells = <2>;
rtc {
compatible = "dlg,da9063-rtc";
diff --git a/arch/arm/boot/dts/renesas/r8a7794-alt.dts b/arch/arm/boot/dts/renesas/r8a7794-alt.dts
index c0d067df22a0..b5ecafbb2e4d 100644
--- a/arch/arm/boot/dts/renesas/r8a7794-alt.dts
+++ b/arch/arm/boot/dts/renesas/r8a7794-alt.dts
@@ -453,6 +453,7 @@
interrupt-parent = <&gpio3>;
interrupts = <31 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
+ #interrupt-cells = <2>;
rtc {
compatible = "dlg,da9063-rtc";
diff --git a/arch/arm/boot/dts/renesas/r8a7794-silk.dts b/arch/arm/boot/dts/renesas/r8a7794-silk.dts
index 43d480a7f3ea..595e074085eb 100644
--- a/arch/arm/boot/dts/renesas/r8a7794-silk.dts
+++ b/arch/arm/boot/dts/renesas/r8a7794-silk.dts
@@ -439,6 +439,7 @@
interrupt-parent = <&gpio3>;
interrupts = <31 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
+ #interrupt-cells = <2>;
onkey {
compatible = "dlg,da9063-onkey";
diff --git a/arch/arm/boot/dts/rockchip/rk3128-xpi-3128.dts b/arch/arm/boot/dts/rockchip/rk3128-xpi-3128.dts
index 03a97881519a..21c1678f4e91 100644
--- a/arch/arm/boot/dts/rockchip/rk3128-xpi-3128.dts
+++ b/arch/arm/boot/dts/rockchip/rk3128-xpi-3128.dts
@@ -47,6 +47,17 @@
regulator-boot-on;
};
+ hdmi-connnector {
+ compatible = "hdmi-connector";
+ type = "a";
+
+ port {
+ hdmi_connector_in: endpoint {
+ remote-endpoint = <&hdmi_connector_out>;
+ };
+ };
+ };
+
/*
* This is a vbus-supply, which also supplies the GL852G usb hub,
* thus has to be always-on
@@ -239,6 +250,10 @@
cpu-supply = <&vdd_arm>;
};
+&display_subsystem {
+ status = "okay";
+};
+
&emmc {
bus-width = <8>;
vmmc-supply = <&vcc_io>;
@@ -328,6 +343,16 @@
status = "okay";
};
+&hdmi {
+ status = "okay";
+};
+
+&hdmi_out {
+ hdmi_connector_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+};
+
&mdio {
phy0: ethernet-phy@1 {
compatible = "ethernet-phy-ieee802.3-c22";
@@ -423,3 +448,7 @@
&usb2phy_otg {
status = "okay";
};
+
+&vop {
+ status = "okay";
+};
diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi
index e2264c40b924..fb98873fd94e 100644
--- a/arch/arm/boot/dts/rockchip/rk3128.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi
@@ -115,6 +115,12 @@
};
};
+ display_subsystem: display-subsystem {
+ compatible = "rockchip,display-subsystem";
+ ports = <&vop_out>;
+ status = "disabled";
+ };
+
gpu_opp_table: opp-table-1 {
compatible = "operating-points-v2";
@@ -246,6 +252,32 @@
};
};
+ vop: vop@1010e000 {
+ compatible = "rockchip,rk3126-vop";
+ reg = <0x1010e000 0x300>;
+ interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru ACLK_LCDC0>, <&cru DCLK_VOP>,
+ <&cru HCLK_LCDC0>;
+ clock-names = "aclk_vop", "dclk_vop",
+ "hclk_vop";
+ resets = <&cru SRST_VOP_A>, <&cru SRST_VOP_H>,
+ <&cru SRST_VOP_D>;
+ reset-names = "axi", "ahb",
+ "dclk";
+ power-domains = <&power RK3128_PD_VIO>;
+ status = "disabled";
+
+ vop_out: port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ vop_out_hdmi: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&hdmi_in_vop>;
+ };
+ };
+ };
+
qos_gpu: qos@1012d000 {
compatible = "rockchip,rk3128-qos", "syscon";
reg = <0x1012d000 0x20>;
@@ -436,6 +468,34 @@
};
};
+ hdmi: hdmi@20034000 {
+ compatible = "rockchip,rk3128-inno-hdmi";
+ reg = <0x20034000 0x4000>;
+ interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru PCLK_HDMI>, <&cru DCLK_VOP>;
+ clock-names = "pclk", "ref";
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmii2c_xfer &hdmi_hpd &hdmi_cec>;
+ power-domains = <&power RK3128_PD_VIO>;
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi_in: port@0 {
+ reg = <0>;
+ hdmi_in_vop: endpoint {
+ remote-endpoint = <&vop_out_hdmi>;
+ };
+ };
+
+ hdmi_out: port@1 {
+ reg = <1>;
+ };
+ };
+ };
+
timer0: timer@20044000 {
compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer";
reg = <0x20044000 0x20>;
diff --git a/arch/arm/boot/dts/rockchip/rk322x.dtsi b/arch/arm/boot/dts/rockchip/rk322x.dtsi
index 831561fc1814..96421355c274 100644
--- a/arch/arm/boot/dts/rockchip/rk322x.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk322x.dtsi
@@ -736,14 +736,20 @@
status = "disabled";
ports {
- hdmi_in: port {
- #address-cells = <1>;
- #size-cells = <0>;
- hdmi_in_vop: endpoint@0 {
- reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi_in: port@0 {
+ reg = <0>;
+
+ hdmi_in_vop: endpoint {
remote-endpoint = <&vop_out_hdmi>;
};
};
+
+ hdmi_out: port@1 {
+ reg = <1>;
+ };
};
};
diff --git a/arch/arm/boot/dts/rockchip/rk3288.dtsi b/arch/arm/boot/dts/rockchip/rk3288.dtsi
index ead343dc3df1..3f1d640afafa 100644
--- a/arch/arm/boot/dts/rockchip/rk3288.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk3288.dtsi
@@ -1240,27 +1240,37 @@
compatible = "rockchip,rk3288-dw-hdmi";
reg = <0x0 0xff980000 0x0 0x20000>;
reg-io-width = <4>;
- #sound-dai-cells = <0>;
- rockchip,grf = <&grf>;
interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>;
clock-names = "iahb", "isfr", "cec";
power-domains = <&power RK3288_PD_VIO>;
+ rockchip,grf = <&grf>;
+ #sound-dai-cells = <0>;
status = "disabled";
ports {
- hdmi_in: port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi_in: port@0 {
+ reg = <0>;
#address-cells = <1>;
#size-cells = <0>;
+
hdmi_in_vopb: endpoint@0 {
reg = <0>;
remote-endpoint = <&vopb_out_hdmi>;
};
+
hdmi_in_vopl: endpoint@1 {
reg = <1>;
remote-endpoint = <&vopl_out_hdmi>;
};
};
+
+ hdmi_out: port@1 {
+ reg = <1>;
+ };
};
};
diff --git a/arch/arm/boot/dts/rockchip/rv1108.dtsi b/arch/arm/boot/dts/rockchip/rv1108.dtsi
index abf3006f0a84..f3291f3bbc6f 100644
--- a/arch/arm/boot/dts/rockchip/rv1108.dtsi
+++ b/arch/arm/boot/dts/rockchip/rv1108.dtsi
@@ -196,7 +196,6 @@
pwm4: pwm@10280000 {
compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
reg = <0x10280000 0x10>;
- interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>;
clock-names = "pwm", "pclk";
pinctrl-names = "default";
@@ -208,7 +207,6 @@
pwm5: pwm@10280010 {
compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
reg = <0x10280010 0x10>;
- interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>;
clock-names = "pwm", "pclk";
pinctrl-names = "default";
@@ -220,7 +218,6 @@
pwm6: pwm@10280020 {
compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
reg = <0x10280020 0x10>;
- interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>;
clock-names = "pwm", "pclk";
pinctrl-names = "default";
@@ -232,7 +229,6 @@
pwm7: pwm@10280030 {
compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
reg = <0x10280030 0x10>;
- interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>;
clock-names = "pwm", "pclk";
pinctrl-names = "default";
@@ -386,7 +382,6 @@
pwm0: pwm@20040000 {
compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
reg = <0x20040000 0x10>;
- interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>;
clock-names = "pwm", "pclk";
pinctrl-names = "default";
@@ -398,7 +393,6 @@
pwm1: pwm@20040010 {
compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
reg = <0x20040010 0x10>;
- interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>;
clock-names = "pwm", "pclk";
pinctrl-names = "default";
@@ -410,7 +404,6 @@
pwm2: pwm@20040020 {
compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
reg = <0x20040020 0x10>;
- interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>;
clock-names = "pwm", "pclk";
pinctrl-names = "default";
@@ -422,7 +415,6 @@
pwm3: pwm@20040030 {
compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
reg = <0x20040030 0x10>;
- interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>;
clock-names = "pwm", "pclk";
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/rockchip/rv1126-sonoff-ihost.dtsi b/arch/arm/boot/dts/rockchip/rv1126-sonoff-ihost.dtsi
index 32b329e87a0c..9a87dc0d5f66 100644
--- a/arch/arm/boot/dts/rockchip/rv1126-sonoff-ihost.dtsi
+++ b/arch/arm/boot/dts/rockchip/rv1126-sonoff-ihost.dtsi
@@ -8,6 +8,8 @@
aliases {
ethernet0 = &gmac;
mmc0 = &emmc;
+ mmc1 = &sdio;
+ mmc2 = &sdmmc;
};
chosen {
@@ -325,7 +327,7 @@
pmuio1-supply = <&vcc3v3_sys>;
vccio1-supply = <&vcc_1v8>;
vccio2-supply = <&vccio_sd>;
- vccio3-supply = <&vcc_1v8>;
+ vccio3-supply = <&vcc3v3_sd>;
vccio4-supply = <&vcc_dovdd>;
vccio5-supply = <&vcc_1v8>;
vccio6-supply = <&vcc_1v8>;
@@ -343,14 +345,14 @@
cap-sd-highspeed;
cap-sdio-irq;
keep-power-in-suspend;
- max-frequency = <100000000>;
+ max-frequency = <50000000>;
mmc-pwrseq = <&sdio_pwrseq>;
non-removable;
pinctrl-names = "default";
pinctrl-0 = <&sdmmc1_clk &sdmmc1_cmd &sdmmc1_bus4>;
rockchip,default-sample-phase = <90>;
- sd-uhs-sdr104;
- vmmc-supply = <&vcc3v3_sys>;
+ sd-uhs-sdr50;
+ vmmc-supply = <&vcc3v3_sd>;
vqmmc-supply = <&vcc_1v8>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi b/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi
index d7954ff466b4..e5254e32aa8f 100644
--- a/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi
+++ b/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi
@@ -434,6 +434,7 @@
};
&fimd {
+ samsung,invert-vclk;
status = "okay";
};
diff --git a/arch/arm/boot/dts/samsung/exynos4412-i9300.dts b/arch/arm/boot/dts/samsung/exynos4412-i9300.dts
index 61aca5798f38..b79d456e976d 100644
--- a/arch/arm/boot/dts/samsung/exynos4412-i9300.dts
+++ b/arch/arm/boot/dts/samsung/exynos4412-i9300.dts
@@ -18,7 +18,7 @@
memory@40000000 {
device_type = "memory";
- reg = <0x40000000 0x40000000>;
+ reg = <0x40000000 0x3fc00000>;
};
};
diff --git a/arch/arm/boot/dts/samsung/exynos4412-i9305.dts b/arch/arm/boot/dts/samsung/exynos4412-i9305.dts
index 77083f1a8273..1048ef5d9bc3 100644
--- a/arch/arm/boot/dts/samsung/exynos4412-i9305.dts
+++ b/arch/arm/boot/dts/samsung/exynos4412-i9305.dts
@@ -11,7 +11,7 @@
memory@40000000 {
device_type = "memory";
- reg = <0x40000000 0x80000000>;
+ reg = <0x40000000 0x7fc00000>;
};
};
diff --git a/arch/arm/boot/dts/samsung/exynos4412-n710x.dts b/arch/arm/boot/dts/samsung/exynos4412-n710x.dts
index 0a151437fc73..eee1000dea92 100644
--- a/arch/arm/boot/dts/samsung/exynos4412-n710x.dts
+++ b/arch/arm/boot/dts/samsung/exynos4412-n710x.dts
@@ -9,7 +9,7 @@
memory@40000000 {
device_type = "memory";
- reg = <0x40000000 0x80000000>;
+ reg = <0x40000000 0x7fc00000>;
};
/* bootargs are passed in by bootloader */
diff --git a/arch/arm/boot/dts/samsung/exynos4412-p4note.dtsi b/arch/arm/boot/dts/samsung/exynos4412-p4note.dtsi
index 0b89d5682f85..28a605802733 100644
--- a/arch/arm/boot/dts/samsung/exynos4412-p4note.dtsi
+++ b/arch/arm/boot/dts/samsung/exynos4412-p4note.dtsi
@@ -23,7 +23,7 @@
memory@40000000 {
device_type = "memory";
- reg = <0x40000000 0x80000000>;
+ reg = <0x40000000 0x7fc00000>;
};
aliases {
@@ -362,6 +362,39 @@
status = "okay";
};
+&i2c_1 {
+ samsung,i2c-sda-delay = <100>;
+ samsung,i2c-slave-addr = <0x10>;
+ samsung,i2c-max-bus-freq = <400000>;
+ pinctrl-0 = <&i2c1_bus>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ accelerometer@19 {
+ compatible = "st,lsm330dlc-accel";
+ reg = <0x19>;
+ interrupt-parent = <&gpx0>;
+ interrupts = <0 IRQ_TYPE_EDGE_RISING>;
+ pinctrl-0 = <&accelerometer_irq>;
+ pinctrl-names = "default";
+ mount-matrix = "1", "0", "0",
+ "0", "-1", "0",
+ "0", "0", "-1";
+ };
+
+ gyro@6b {
+ compatible = "st,lsm330dlc-gyro";
+ reg = <0x6b>;
+ interrupt-parent = <&gpx0>;
+ interrupts = <6 IRQ_TYPE_EDGE_RISING>;
+ pinctrl-0 = <&gyro_data_enable &gyro_irq>;
+ pinctrl-names = "default";
+ mount-matrix = "1", "0", "0",
+ "0", "-1", "0",
+ "0", "0", "-1";
+ };
+};
+
&i2c_3 {
samsung,i2c-sda-delay = <100>;
samsung,i2c-slave-addr = <0x10>;
@@ -844,6 +877,12 @@
samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
};
+ gyro_data_enable: gyro-data-enable-pins {
+ samsung,pins = "gpl2-0";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_OUTPUT>;
+ samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+ };
+
uart_sel: uart-sel-pins {
samsung,pins = "gpl2-7";
samsung,pin-function = <EXYNOS_PIN_FUNC_OUTPUT>;
@@ -894,12 +933,24 @@
samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
};
+ accelerometer_irq: accelerometer-irq-pins {
+ samsung,pins = "gpx0-0";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_INPUT>;
+ samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+ };
+
stmpe_adc_irq: stmpe-adc-irq-pins {
samsung,pins = "gpx0-1";
samsung,pin-function = <EXYNOS_PIN_FUNC_INPUT>;
samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
};
+ gyro_irq: gyro-irq-pins {
+ samsung,pins = "gpx0-6";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_INPUT>;
+ samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+ };
+
max77686_irq: max77686-irq-pins {
samsung,pins = "gpx0-7";
samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
diff --git a/arch/arm/boot/dts/samsung/exynos5420-galaxy-tab-common.dtsi b/arch/arm/boot/dts/samsung/exynos5420-galaxy-tab-common.dtsi
index f525b2f5e4e0..246040967082 100644
--- a/arch/arm/boot/dts/samsung/exynos5420-galaxy-tab-common.dtsi
+++ b/arch/arm/boot/dts/samsung/exynos5420-galaxy-tab-common.dtsi
@@ -30,6 +30,7 @@
aliases {
mmc0 = &mmc_0;
+ mmc1 = &mmc_1;
mmc2 = &mmc_2;
};
@@ -39,7 +40,7 @@
memory@20000000 {
device_type = "memory";
- reg = <0x20000000 0xc0000000>;
+ reg = <0x20000000 0xbfa00000>;
};
firmware@2073000 {
@@ -87,6 +88,13 @@
linux,code = <KEY_VOLUMEDOWN>;
};
};
+
+ mmc1_pwrseq: pwrseq {
+ compatible = "mmc-pwrseq-simple";
+ reset-gpios = <&gpy7 7 GPIO_ACTIVE_LOW>;
+ clocks = <&s2mps11_osc S2MPS11_CLK_BT>;
+ clock-names = "ext_clock";
+ };
};
&cci {
@@ -620,6 +628,25 @@
vqmmc-supply = <&ldo3_reg>;
};
+/* WiFi */
+&mmc_1 {
+ bus-width = <4>;
+ cap-sd-highspeed;
+ cap-sdio-irq;
+ card-detect-delay = <200>;
+ keep-power-in-suspend;
+ mmc-pwrseq = <&mmc1_pwrseq>;
+ non-removable;
+ pinctrl-0 = <&sd1_clk>, <&sd1_cmd>, <&sd1_int>, <&sd1_bus1>,
+ <&sd1_bus4>, <&wifi_en>;
+ pinctrl-names = "default";
+ vqmmc-supply = <&ldo2_reg>;
+ samsung,dw-mshc-ciu-div = <1>;
+ samsung,dw-mshc-ddr-timing = <0 2>;
+ samsung,dw-mshc-sdr-timing = <0 1>;
+ status = "okay";
+};
+
/* External sdcard */
&mmc_2 {
status = "okay";
@@ -649,6 +676,11 @@
samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
samsung,pin-drv = <EXYNOS5420_PIN_DRV_LV1>;
};
+
+ wifi_en: wifi-en-pins {
+ samsung,pins = "gpy7-7";
+ samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+ };
};
&rtc {
diff --git a/arch/arm/boot/dts/samsung/exynos5420-peach-pit.dts b/arch/arm/boot/dts/samsung/exynos5420-peach-pit.dts
index 4e757b6e28e1..3759742d38ca 100644
--- a/arch/arm/boot/dts/samsung/exynos5420-peach-pit.dts
+++ b/arch/arm/boot/dts/samsung/exynos5420-peach-pit.dts
@@ -967,6 +967,7 @@
reg = <0>;
spi-max-frequency = <3125000>;
google,has-vbc-nvram;
+ wakeup-source;
controller-data {
samsung,spi-feedback-delay = <1>;
diff --git a/arch/arm/boot/dts/samsung/exynos5422-odroidxu3-common.dtsi b/arch/arm/boot/dts/samsung/exynos5422-odroidxu3-common.dtsi
index b4a851aa8881..4a4c55a4beb3 100644
--- a/arch/arm/boot/dts/samsung/exynos5422-odroidxu3-common.dtsi
+++ b/arch/arm/boot/dts/samsung/exynos5422-odroidxu3-common.dtsi
@@ -55,7 +55,7 @@
thermal-zones {
cpu0_thermal: cpu0-thermal {
thermal-sensors = <&tmu_cpu0>;
- polling-delay-passive = <250>;
+ polling-delay-passive = <0>;
polling-delay = <0>;
trips {
cpu0_alert0: cpu-alert-0 {
@@ -78,12 +78,6 @@
hysteresis = <0>; /* millicelsius */
type = "critical";
};
- /*
- * Exynos542x supports only 4 trip-points
- * so for these polling mode is required.
- * Start polling at temperature level of last
- * interrupt-driven trip: cpu0_alert2
- */
cpu0_alert3: cpu-alert-3 {
temperature = <70000>; /* millicelsius */
hysteresis = <10000>; /* millicelsius */
@@ -144,7 +138,7 @@
};
cpu1_thermal: cpu1-thermal {
thermal-sensors = <&tmu_cpu1>;
- polling-delay-passive = <250>;
+ polling-delay-passive = <0>;
polling-delay = <0>;
trips {
cpu1_alert0: cpu-alert-0 {
@@ -217,7 +211,7 @@
};
cpu2_thermal: cpu2-thermal {
thermal-sensors = <&tmu_cpu2>;
- polling-delay-passive = <250>;
+ polling-delay-passive = <0>;
polling-delay = <0>;
trips {
cpu2_alert0: cpu-alert-0 {
@@ -290,7 +284,7 @@
};
cpu3_thermal: cpu3-thermal {
thermal-sensors = <&tmu_cpu3>;
- polling-delay-passive = <250>;
+ polling-delay-passive = <0>;
polling-delay = <0>;
trips {
cpu3_alert0: cpu-alert-0 {
@@ -363,7 +357,7 @@
};
gpu_thermal: gpu-thermal {
thermal-sensors = <&tmu_gpu>;
- polling-delay-passive = <250>;
+ polling-delay-passive = <0>;
polling-delay = <0>;
trips {
gpu_alert0: gpu-alert-0 {
diff --git a/arch/arm/boot/dts/samsung/exynos5800-peach-pi.dts b/arch/arm/boot/dts/samsung/exynos5800-peach-pi.dts
index f91bc4ae008e..9bbbdce9103a 100644
--- a/arch/arm/boot/dts/samsung/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/samsung/exynos5800-peach-pi.dts
@@ -949,6 +949,7 @@
reg = <0>;
spi-max-frequency = <3125000>;
google,has-vbc-nvram;
+ wakeup-source;
controller-data {
samsung,spi-feedback-delay = <1>;
diff --git a/arch/arm/boot/dts/st/Makefile b/arch/arm/boot/dts/st/Makefile
index 7892ad69b441..9fedd6776208 100644
--- a/arch/arm/boot/dts/st/Makefile
+++ b/arch/arm/boot/dts/st/Makefile
@@ -23,6 +23,7 @@ dtb-$(CONFIG_ARCH_STM32) += \
stm32f469-disco.dtb \
stm32f746-disco.dtb \
stm32f769-disco.dtb \
+ stm32f769-disco-mb1166-reva09.dtb \
stm32429i-eval.dtb \
stm32746g-eval.dtb \
stm32h743i-eval.dtb \
diff --git a/arch/arm/boot/dts/st/stih407-pinctrl.dtsi b/arch/arm/boot/dts/st/stih407-pinctrl.dtsi
index 7815669fe813..dcb821f567fa 100644
--- a/arch/arm/boot/dts/st/stih407-pinctrl.dtsi
+++ b/arch/arm/boot/dts/st/stih407-pinctrl.dtsi
@@ -462,14 +462,14 @@
serial0 {
pinctrl_serial0: serial0-0 {
st,pins {
- tx = <&pio17 0 ALT1 OUT>;
- rx = <&pio17 1 ALT1 IN>;
+ tx = <&pio17 0 ALT1 OUT>;
+ rx = <&pio17 1 ALT1 IN>;
};
};
pinctrl_serial0_hw_flowctrl: serial0-0_hw_flowctrl {
st,pins {
- tx = <&pio17 0 ALT1 OUT>;
- rx = <&pio17 1 ALT1 IN>;
+ tx = <&pio17 0 ALT1 OUT>;
+ rx = <&pio17 1 ALT1 IN>;
cts = <&pio17 2 ALT1 IN>;
rts = <&pio17 3 ALT1 OUT>;
};
diff --git a/arch/arm/boot/dts/st/stm32429i-eval.dts b/arch/arm/boot/dts/st/stm32429i-eval.dts
index 576235ec3c51..afa417b34b25 100644
--- a/arch/arm/boot/dts/st/stm32429i-eval.dts
+++ b/arch/arm/boot/dts/st/stm32429i-eval.dts
@@ -222,7 +222,6 @@
reg = <0x42>;
interrupts = <8 3>;
interrupt-parent = <&gpioi>;
- interrupt-controller;
wakeup-source;
stmpegpio: stmpe_gpio {
diff --git a/arch/arm/boot/dts/st/stm32f769-disco-mb1166-reva09.dts b/arch/arm/boot/dts/st/stm32f769-disco-mb1166-reva09.dts
new file mode 100644
index 000000000000..ff7ff32371d0
--- /dev/null
+++ b/arch/arm/boot/dts/st/stm32f769-disco-mb1166-reva09.dts
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Dario Binacchi <dario.binacchi@amarulasolutions.com>
+ */
+
+#include "stm32f769-disco.dts"
+
+&panel0 {
+ compatible = "frida,frd400b25025", "novatek,nt35510";
+ vddi-supply = <&vcc_3v3>;
+ vdd-supply = <&vcc_3v3>;
+ /delete-property/power-supply;
+};
diff --git a/arch/arm/boot/dts/st/stm32f769-disco.dts b/arch/arm/boot/dts/st/stm32f769-disco.dts
index 5d12ae25b327..52c5baf58ab9 100644
--- a/arch/arm/boot/dts/st/stm32f769-disco.dts
+++ b/arch/arm/boot/dts/st/stm32f769-disco.dts
@@ -41,7 +41,7 @@
*/
/dts-v1/;
-#include "stm32f746.dtsi"
+#include "stm32f769.dtsi"
#include "stm32f769-pinctrl.dtsi"
#include <dt-bindings/input/input.h>
#include <dt-bindings/gpio/gpio.h>
@@ -60,6 +60,19 @@
reg = <0xC0000000 0x1000000>;
};
+ reserved-memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ linux,dma {
+ compatible = "shared-dma-pool";
+ linux,dma-default;
+ no-map;
+ size = <0x100000>;
+ };
+ };
+
aliases {
serial0 = &usart1;
};
@@ -92,9 +105,9 @@
clock-names = "main_clk";
};
- mmc_vcard: mmc_vcard {
+ vcc_3v3: vcc-3v3 {
compatible = "regulator-fixed";
- regulator-name = "mmc_vcard";
+ regulator-name = "vcc_3v3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
@@ -114,6 +127,45 @@
clock-frequency = <25000000>;
};
+&dsi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "okay";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ dsi_in: endpoint {
+ remote-endpoint = <&ltdc_out_dsi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ dsi_out: endpoint {
+ remote-endpoint = <&dsi_panel_in>;
+ };
+ };
+ };
+
+ panel0: panel@0 {
+ compatible = "orisetech,otm8009a";
+ reg = <0>; /* dsi virtual channel (0..3) */
+ reset-gpios = <&gpioj 15 GPIO_ACTIVE_LOW>;
+ power-supply = <&vcc_3v3>;
+ status = "okay";
+
+ port {
+ dsi_panel_in: endpoint {
+ remote-endpoint = <&dsi_out>;
+ };
+ };
+ };
+};
+
&i2c1 {
pinctrl-0 = <&i2c1_pins_b>;
pinctrl-names = "default";
@@ -122,13 +174,23 @@
status = "okay";
};
+&ltdc {
+ status = "okay";
+
+ port {
+ ltdc_out_dsi: endpoint {
+ remote-endpoint = <&dsi_in>;
+ };
+ };
+};
+
&rtc {
status = "okay";
};
&sdio2 {
status = "okay";
- vmmc-supply = <&mmc_vcard>;
+ vmmc-supply = <&vcc_3v3>;
cd-gpios = <&gpioi 15 GPIO_ACTIVE_LOW>;
broken-cd;
pinctrl-names = "default", "opendrain", "sleep";
diff --git a/arch/arm/boot/dts/st/stm32f769.dtsi b/arch/arm/boot/dts/st/stm32f769.dtsi
new file mode 100644
index 000000000000..4e7d9032149c
--- /dev/null
+++ b/arch/arm/boot/dts/st/stm32f769.dtsi
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Dario Binacchi <dario.binacchi@amarulasolutions.com>
+ */
+
+#include "stm32f746.dtsi"
+
+/ {
+ soc {
+ dsi: dsi@40016c00 {
+ compatible = "st,stm32-dsi";
+ reg = <0x40016c00 0x800>;
+ clocks = <&rcc 1 CLK_F769_DSI>, <&clk_hse>;
+ clock-names = "pclk", "ref";
+ resets = <&rcc STM32F7_APB2_RESET(DSI)>;
+ reset-names = "apb";
+ status = "disabled";
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/st/stm32mp131.dtsi b/arch/arm/boot/dts/st/stm32mp131.dtsi
index b04d24c939c3..3900f32da797 100644
--- a/arch/arm/boot/dts/st/stm32mp131.dtsi
+++ b/arch/arm/boot/dts/st/stm32mp131.dtsi
@@ -1315,6 +1315,13 @@
status = "disabled";
};
+ crc1: crc@58009000 {
+ compatible = "st,stm32f7-crc";
+ reg = <0x58009000 0x400>;
+ clocks = <&rcc CRC1>;
+ status = "disabled";
+ };
+
usbh_ohci: usb@5800c000 {
compatible = "generic-ohci";
reg = <0x5800c000 0x1000>;
diff --git a/arch/arm/boot/dts/st/stm32mp135f-dk.dts b/arch/arm/boot/dts/st/stm32mp135f-dk.dts
index eea740d097c7..52171214a308 100644
--- a/arch/arm/boot/dts/st/stm32mp135f-dk.dts
+++ b/arch/arm/boot/dts/st/stm32mp135f-dk.dts
@@ -93,6 +93,14 @@
};
};
+&crc1 {
+ status = "okay";
+};
+
+&cryp {
+ status = "okay";
+};
+
&i2c1 {
pinctrl-names = "default", "sleep";
pinctrl-0 = <&i2c1_pins_a>;
diff --git a/arch/arm/boot/dts/st/stm32mp157.dtsi b/arch/arm/boot/dts/st/stm32mp157.dtsi
index 6197d878894d..97cd24227cef 100644
--- a/arch/arm/boot/dts/st/stm32mp157.dtsi
+++ b/arch/arm/boot/dts/st/stm32mp157.dtsi
@@ -20,7 +20,7 @@
dsi: dsi@5a000000 {
compatible = "st,stm32-dsi";
reg = <0x5a000000 0x800>;
- clocks = <&rcc DSI_K>, <&clk_hse>, <&rcc DSI_PX>;
+ clocks = <&rcc DSI>, <&clk_hse>, <&rcc DSI_PX>;
clock-names = "pclk", "ref", "px_clk";
phy-dsi-supply = <&reg18>;
resets = <&rcc DSI_R>;
diff --git a/arch/arm/boot/dts/st/stm32mp157a-dk1-scmi.dts b/arch/arm/boot/dts/st/stm32mp157a-dk1-scmi.dts
index ce5937270aa1..306e1bc2a514 100644
--- a/arch/arm/boot/dts/st/stm32mp157a-dk1-scmi.dts
+++ b/arch/arm/boot/dts/st/stm32mp157a-dk1-scmi.dts
@@ -30,7 +30,7 @@
};
&dsi {
- clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
+ clocks = <&rcc DSI>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
};
&gpioz {
diff --git a/arch/arm/boot/dts/st/stm32mp157c-dk2-scmi.dts b/arch/arm/boot/dts/st/stm32mp157c-dk2-scmi.dts
index c20a73841c1f..956da5f26c1c 100644
--- a/arch/arm/boot/dts/st/stm32mp157c-dk2-scmi.dts
+++ b/arch/arm/boot/dts/st/stm32mp157c-dk2-scmi.dts
@@ -36,7 +36,7 @@
&dsi {
phy-dsi-supply = <&scmi_reg18>;
- clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
+ clocks = <&rcc DSI>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
};
&gpioz {
diff --git a/arch/arm/boot/dts/st/stm32mp157c-dk2.dts b/arch/arm/boot/dts/st/stm32mp157c-dk2.dts
index 510cca5acb79..7a701f7ef0c7 100644
--- a/arch/arm/boot/dts/st/stm32mp157c-dk2.dts
+++ b/arch/arm/boot/dts/st/stm32mp157c-dk2.dts
@@ -64,7 +64,6 @@
reg = <0x38>;
interrupts = <2 2>;
interrupt-parent = <&gpiof>;
- interrupt-controller;
touchscreen-size-x = <480>;
touchscreen-size-y = <800>;
status = "okay";
diff --git a/arch/arm/boot/dts/st/stm32mp157c-ed1-scmi.dts b/arch/arm/boot/dts/st/stm32mp157c-ed1-scmi.dts
index 5e2eaf57ce22..8e4b0db198c2 100644
--- a/arch/arm/boot/dts/st/stm32mp157c-ed1-scmi.dts
+++ b/arch/arm/boot/dts/st/stm32mp157c-ed1-scmi.dts
@@ -35,7 +35,7 @@
};
&dsi {
- clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
+ clocks = <&rcc DSI>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
};
&gpioz {
diff --git a/arch/arm/boot/dts/st/stm32mp157c-ev1-scmi.dts b/arch/arm/boot/dts/st/stm32mp157c-ev1-scmi.dts
index 3226fb945a8e..72b9cab2d990 100644
--- a/arch/arm/boot/dts/st/stm32mp157c-ev1-scmi.dts
+++ b/arch/arm/boot/dts/st/stm32mp157c-ev1-scmi.dts
@@ -36,7 +36,7 @@
&dsi {
phy-dsi-supply = <&scmi_reg18>;
- clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
+ clocks = <&rcc DSI>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
};
&gpioz {
diff --git a/arch/arm/boot/dts/st/stm32mp157c-lxa-tac-gen2.dts b/arch/arm/boot/dts/st/stm32mp157c-lxa-tac-gen2.dts
index 8a34d15e9005..4cc177031661 100644
--- a/arch/arm/boot/dts/st/stm32mp157c-lxa-tac-gen2.dts
+++ b/arch/arm/boot/dts/st/stm32mp157c-lxa-tac-gen2.dts
@@ -148,7 +148,7 @@
compatible = "ti,lmp92064";
reg = <0>;
- reset-gpios = <&gpioa 4 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpioa 4 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
shunt-resistor-micro-ohms = <15000>;
spi-max-frequency = <5000000>;
vdd-supply = <&reg_pb_3v3>;
diff --git a/arch/arm/boot/dts/st/stm32mp15xc-lxa-tac.dtsi b/arch/arm/boot/dts/st/stm32mp15xc-lxa-tac.dtsi
index fc3a2386dbb9..cfaf8adde319 100644
--- a/arch/arm/boot/dts/st/stm32mp15xc-lxa-tac.dtsi
+++ b/arch/arm/boot/dts/st/stm32mp15xc-lxa-tac.dtsi
@@ -409,7 +409,7 @@ baseboard_eeprom: &sip_eeprom {
&spi2 {
pinctrl-names = "default";
pinctrl-0 = <&spi2_pins_c>;
- cs-gpios = <&gpiof 12 GPIO_ACTIVE_LOW>;
+ cs-gpios = <&gpiof 12 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>;
status = "okay";
};
@@ -471,6 +471,10 @@ baseboard_eeprom: &sip_eeprom {
interrupt-parent = <&gpioa>;
interrupts = <6 IRQ_TYPE_EDGE_RISING>;
+ /* Reduce RGMII EMI emissions by reducing drive strength */
+ microchip,hi-drive-strength-microamp = <2000>;
+ microchip,lo-drive-strength-microamp = <8000>;
+
ports {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm/boot/dts/ti/davinci/da850.dtsi b/arch/arm/boot/dts/ti/davinci/da850.dtsi
index f759fdfe1b10..1d3fb5397ce3 100644
--- a/arch/arm/boot/dts/ti/davinci/da850.dtsi
+++ b/arch/arm/boot/dts/ti/davinci/da850.dtsi
@@ -536,7 +536,7 @@
reg = <0x40000 0x1000>;
cap-sd-highspeed;
cap-mmc-highspeed;
- interrupts = <16>;
+ interrupts = <16>, <17>;
dmas = <&edma0 16 0>, <&edma0 17 0>;
dma-names = "rx", "tx";
clocks = <&psc0 5>;
@@ -566,7 +566,7 @@
reg = <0x21b000 0x1000>;
cap-sd-highspeed;
cap-mmc-highspeed;
- interrupts = <72>;
+ interrupts = <72>, <73>;
dmas = <&edma1 28 0>, <&edma1 29 0>;
dma-names = "rx", "tx";
clocks = <&psc1 18>;
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-clocks.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-clocks.dtsi
index 0397c3423d2d..20bab90ee0ba 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-clocks.dtsi
@@ -2,7 +2,7 @@
/*
* Device Tree Source for Keystone 2 clock tree
*
- * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
clocks {
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2e-clocks.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2e-clocks.dtsi
index cf30e007fea3..74720dbf3110 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2e-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2e-clocks.dtsi
@@ -2,7 +2,7 @@
/*
* Keystone 2 Edison SoC specific device tree
*
- * Copyright (C) 2014-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2014-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
clocks {
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2e-evm.dts b/arch/arm/boot/dts/ti/keystone/keystone-k2e-evm.dts
index 6978d6a362f3..58099ce8d449 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2e-evm.dts
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2e-evm.dts
@@ -2,7 +2,7 @@
/*
* Keystone 2 Edison EVM device tree
*
- * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2e-netcp.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2e-netcp.dtsi
index 5c88a90903b8..e586350ae4dc 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2e-netcp.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2e-netcp.dtsi
@@ -2,7 +2,7 @@
/*
* Device Tree Source for Keystone 2 Edison Netcp driver
*
- * Copyright (C) 2015-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2015-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
qmss: qmss@2a40000 {
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2e.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2e.dtsi
index 65c32946c522..662aa33cba11 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2e.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2e.dtsi
@@ -2,7 +2,7 @@
/*
* Keystone 2 Edison soc device tree
*
- * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/reset/ti-syscon.h>
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2g-evm.dts b/arch/arm/boot/dts/ti/keystone/keystone-k2g-evm.dts
index f0ddbbcdc972..bf5f67d70235 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2g-evm.dts
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2g-evm.dts
@@ -2,7 +2,7 @@
/*
* Device Tree Source for K2G EVM
*
- * Copyright (C) 2016-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2016-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2g-ice.dts b/arch/arm/boot/dts/ti/keystone/keystone-k2g-ice.dts
index 6ceb0d5c6388..264e1e0d23c8 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2g-ice.dts
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2g-ice.dts
@@ -2,7 +2,7 @@
/*
* Device Tree Source for K2G Industrial Communication Engine EVM
*
- * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2g-netcp.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2g-netcp.dtsi
index 7109ca031617..974c8f2fa740 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2g-netcp.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2g-netcp.dtsi
@@ -2,7 +2,7 @@
/*
* Device Tree Source for K2G Netcp driver
*
- * Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2018 Texas Instruments Incorporated - https://www.ti.com/
*/
qmss: qmss@4020000 {
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2g.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2g.dtsi
index 102d59694d90..790b29ab0fa2 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2g.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2g.dtsi
@@ -2,7 +2,7 @@
/*
* Device Tree Source for K2G SOC
*
- * Copyright (C) 2016-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2016-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/interrupt-controller/arm-gic.h>
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2hk-clocks.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2hk-clocks.dtsi
index 4ba6912176ef..3ca4722087c9 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2hk-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2hk-clocks.dtsi
@@ -2,7 +2,7 @@
/*
* Keystone 2 Kepler/Hawking SoC clock nodes
*
- * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
clocks {
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2hk-evm.dts b/arch/arm/boot/dts/ti/keystone/keystone-k2hk-evm.dts
index 8dfb54295027..b824fad9a4ec 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2hk-evm.dts
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2hk-evm.dts
@@ -2,7 +2,7 @@
/*
* Keystone 2 Kepler/Hawking EVM device tree
*
- * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2hk-netcp.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2hk-netcp.dtsi
index c2ee775eab6a..3ab1b5d6f9bc 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2hk-netcp.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2hk-netcp.dtsi
@@ -2,7 +2,7 @@
/*
* Device Tree Source for Keystone 2 Hawking Netcp driver
*
- * Copyright (C) 2015-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2015-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
qmss: qmss@2a40000 {
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2hk.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2hk.dtsi
index da6d3934c2e8..4fdf4b30384f 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2hk.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2hk.dtsi
@@ -2,7 +2,7 @@
/*
* Keystone 2 Kepler/Hawking soc specific device tree
*
- * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/reset/ti-syscon.h>
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2l-clocks.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2l-clocks.dtsi
index 635528064dea..fcfc2fb6cc2d 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2l-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2l-clocks.dtsi
@@ -2,7 +2,7 @@
/*
* Keystone 2 lamarr SoC clock nodes
*
- * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
clocks {
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2l-evm.dts b/arch/arm/boot/dts/ti/keystone/keystone-k2l-evm.dts
index be619e39a16f..ccda63ab12fe 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2l-evm.dts
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2l-evm.dts
@@ -2,7 +2,7 @@
/*
* Keystone 2 Lamarr EVM device tree
*
- * Copyright (C) 2014-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2014-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2l-netcp.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2l-netcp.dtsi
index 1afebd7458c1..b8f880faaa31 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2l-netcp.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2l-netcp.dtsi
@@ -2,7 +2,7 @@
/*
* Device Tree Source for Keystone 2 Lamarr Netcp driver
*
- * Copyright (C) 2015-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2015-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
qmss: qmss@2a40000 {
diff --git a/arch/arm/boot/dts/ti/keystone/keystone-k2l.dtsi b/arch/arm/boot/dts/ti/keystone/keystone-k2l.dtsi
index 2062fe561642..330b437b667f 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone-k2l.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone-k2l.dtsi
@@ -2,7 +2,7 @@
/*
* Keystone 2 Lamarr SoC specific device tree
*
- * Copyright (C) 2014-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2014-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/reset/ti-syscon.h>
diff --git a/arch/arm/boot/dts/ti/keystone/keystone.dtsi b/arch/arm/boot/dts/ti/keystone/keystone.dtsi
index 1fd04bb37a15..ff16428860a9 100644
--- a/arch/arm/boot/dts/ti/keystone/keystone.dtsi
+++ b/arch/arm/boot/dts/ti/keystone/keystone.dtsi
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/interrupt-controller/arm-gic.h>
diff --git a/arch/arm/boot/dts/ti/omap/am335x-baltos-ir2110.dts b/arch/arm/boot/dts/ti/omap/am335x-baltos-ir2110.dts
index ea5882ed7010..f82d2231dfaa 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-baltos-ir2110.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-baltos-ir2110.dts
@@ -5,7 +5,7 @@
/*
* VScom OnRISC
- * http://www.vscom.de
+ * https://www.vscom.de
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-baltos-ir3220.dts b/arch/arm/boot/dts/ti/omap/am335x-baltos-ir3220.dts
index ea4f8dde6424..74a2191af146 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-baltos-ir3220.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-baltos-ir3220.dts
@@ -5,7 +5,7 @@
/*
* VScom OnRISC
- * http://www.vscom.de
+ * https://www.vscom.de
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-baltos-ir5221.dts b/arch/arm/boot/dts/ti/omap/am335x-baltos-ir5221.dts
index ec914f27d11d..723ff88f76ac 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-baltos-ir5221.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-baltos-ir5221.dts
@@ -5,7 +5,7 @@
/*
* VScom OnRISC
- * http://www.vscom.de
+ * https://www.vscom.de
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-baltos-leds.dtsi b/arch/arm/boot/dts/ti/omap/am335x-baltos-leds.dtsi
index 6a52e42b9e81..049fd8e1b40f 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-baltos-leds.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am335x-baltos-leds.dtsi
@@ -5,7 +5,7 @@
/*
* VScom OnRISC
- * http://www.vscom.de
+ * https://www.vscom.de
*/
/*#include "am33xx.dtsi"*/
diff --git a/arch/arm/boot/dts/ti/omap/am335x-baltos.dtsi b/arch/arm/boot/dts/ti/omap/am335x-baltos.dtsi
index c14d5b70c72f..a4beb718559c 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-baltos.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am335x-baltos.dtsi
@@ -5,7 +5,7 @@
/*
* VScom OnRISC
- * http://www.vscom.de
+ * https://www.vscom.de
*/
#include "am33xx.dtsi"
diff --git a/arch/arm/boot/dts/ti/omap/am335x-base0033.dts b/arch/arm/boot/dts/ti/omap/am335x-base0033.dts
index eba843e22ea1..46078af4b7a3 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-base0033.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-base0033.dts
@@ -2,7 +2,7 @@
/*
* am335x-base0033.dts - Device Tree file for IGEP AQUILA EXPANSION
*
- * Copyright (C) 2013 ISEE 2007 SL - http://www.isee.biz
+ * Copyright (C) 2013 ISEE 2007 SL - https://www.isee.biz
*/
#include "am335x-igep0033.dtsi"
diff --git a/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi b/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
index 96451c8a815c..2d0216840ff5 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
@@ -289,8 +289,8 @@
* For details, see linux-omap mailing list May 2015 thread
* [PATCH] ARM: dts: am335x-bone* enable pmic-shutdown-controller
* In particular, messages:
- * http://www.spinics.net/lists/linux-omap/msg118585.html
- * http://www.spinics.net/lists/linux-omap/msg118615.html
+ * https://www.spinics.net/lists/linux-omap/msg118585.html
+ * https://www.spinics.net/lists/linux-omap/msg118615.html
*
* You can override this later with
* &tps { /delete-property/ ti,pmic-shutdown-controller; }
diff --git a/arch/arm/boot/dts/ti/omap/am335x-cm-t335.dts b/arch/arm/boot/dts/ti/omap/am335x-cm-t335.dts
index 72990e7ffe10..06767ea164b5 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-cm-t335.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-cm-t335.dts
@@ -2,7 +2,7 @@
/*
* am335x-cm-t335.dts - Device Tree file for Compulab CM-T335
*
- * Copyright (C) 2014 - 2015 CompuLab Ltd. - http://www.compulab.co.il/
+ * Copyright (C) 2014 - 2015 CompuLab Ltd. - https://www.compulab.co.il/
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-evmsk.dts b/arch/arm/boot/dts/ti/omap/am335x-evmsk.dts
index 57f78846c42d..eba888dcd60e 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-evmsk.dts
@@ -5,7 +5,7 @@
/*
* AM335x Starter Kit
- * http://www.ti.com/tool/tmdssk3358
+ * https://www.ti.com/tool/tmdssk3358
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-guardian.dts b/arch/arm/boot/dts/ti/omap/am335x-guardian.dts
index 205fe0ed7352..56e5d954a490 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-guardian.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-guardian.dts
@@ -303,8 +303,8 @@
* For details, see linux-omap mailing list May 2015 thread
* [PATCH] ARM: dts: am335x-bone* enable pmic-shutdown-controller
* In particular, messages:
- * http://www.spinics.net/lists/linux-omap/msg118585.html
- * http://www.spinics.net/lists/linux-omap/msg118615.html
+ * https://www.spinics.net/lists/linux-omap/msg118585.html
+ * https://www.spinics.net/lists/linux-omap/msg118615.html
*
* You can override this later with
* &tps { /delete-property/ ti,pmic-shutdown-controller; }
diff --git a/arch/arm/boot/dts/ti/omap/am335x-icev2.dts b/arch/arm/boot/dts/ti/omap/am335x-icev2.dts
index 3c4228927f56..6f0f4fba043b 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-icev2.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-icev2.dts
@@ -5,7 +5,7 @@
/*
* AM335x ICE V2 board
- * http://www.ti.com/tool/tmdsice3359
+ * https://www.ti.com/tool/tmdsice3359
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-igep0033.dtsi b/arch/arm/boot/dts/ti/omap/am335x-igep0033.dtsi
index e85c33fd42f0..c7a4a5476489 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-igep0033.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am335x-igep0033.dtsi
@@ -2,7 +2,7 @@
/*
* am335x-igep0033.dtsi - Device Tree file for IGEP COM AQUILA AM335x
*
- * Copyright (C) 2013 ISEE 2007 SL - http://www.isee.biz
+ * Copyright (C) 2013 ISEE 2007 SL - https://www.isee.biz
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi b/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi
index b8730aa52ce6..a59331aa58e5 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi
@@ -217,7 +217,7 @@
pinctrl-names = "default";
pinctrl-0 = <&spi1_pins>;
- tpm_spi_tis@0 {
+ tpm@0 {
compatible = "tcg,tpm_tis-spi";
reg = <0>;
spi-max-frequency = <500000>;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-myirtech-myc.dtsi b/arch/arm/boot/dts/ti/omap/am335x-myirtech-myc.dtsi
index 584599269217..9c9359844a20 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-myirtech-myc.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am335x-myirtech-myc.dtsi
@@ -2,7 +2,7 @@
/* SPDX-FileCopyrightText: Alexander Shiyan, <shc_work@mail.ru> */
/* Based on code by myc_c335x.dts, MYiRtech.com */
-/* Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ */
+/* Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ */
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-myirtech-myd.dts b/arch/arm/boot/dts/ti/omap/am335x-myirtech-myd.dts
index d3bba79b9358..fd91a3c01a63 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-myirtech-myd.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-myirtech-myd.dts
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/* SPDX-FileCopyrightText: Alexander Shiyan, <shc_work@mail.ru> */
/* Based on code by myd_c335x.dts, MYiRtech.com */
-/* Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ */
+/* Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ */
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-nano.dts b/arch/arm/boot/dts/ti/omap/am335x-nano.dts
index a475c0d91306..26b5510cb3d1 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-nano.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-nano.dts
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2013 Newflow Ltd - http://www.newflow.co.uk/
+ * Copyright (C) 2013 Newflow Ltd - https://www.newflow.co.uk/
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-netcan-plus-1xx.dts b/arch/arm/boot/dts/ti/omap/am335x-netcan-plus-1xx.dts
index f7fad48e36ed..546e88f8fbad 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-netcan-plus-1xx.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-netcan-plus-1xx.dts
@@ -5,7 +5,7 @@
/*
* VScom OnRISC
- * http://www.vscom.de
+ * https://www.vscom.de
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-netcom-plus-2xx.dts b/arch/arm/boot/dts/ti/omap/am335x-netcom-plus-2xx.dts
index 76751a324ad7..f66d57bb685e 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-netcom-plus-2xx.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-netcom-plus-2xx.dts
@@ -5,7 +5,7 @@
/*
* VScom OnRISC
- * http://www.vscom.de
+ * https://www.vscom.de
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-netcom-plus-8xx.dts b/arch/arm/boot/dts/ti/omap/am335x-netcom-plus-8xx.dts
index 5a9fcec040fa..5fb2c629f35c 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-netcom-plus-8xx.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-netcom-plus-8xx.dts
@@ -5,7 +5,7 @@
/*
* VScom OnRISC
- * http://www.vscom.de
+ * https://www.vscom.de
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-pdu001.dts b/arch/arm/boot/dts/ti/omap/am335x-pdu001.dts
index 3c9444e98c14..f38f5bff2b96 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-pdu001.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-pdu001.dts
@@ -3,7 +3,7 @@
*
* EETS GmbH PDU001 board device tree file
*
- * Copyright (C) 2018 EETS GmbH - http://www.eets.ch/
+ * Copyright (C) 2018 EETS GmbH - https://www.eets.ch/
*
* Copyright (C) 2011, Texas Instruments, Incorporated - https://www.ti.com/
*
diff --git a/arch/arm/boot/dts/ti/omap/am335x-sancloud-bbe-extended-wifi.dts b/arch/arm/boot/dts/ti/omap/am335x-sancloud-bbe-extended-wifi.dts
index 5522759def26..7c9f65126c63 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-sancloud-bbe-extended-wifi.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-sancloud-bbe-extended-wifi.dts
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2021 Sancloud Ltd
- * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-sancloud-bbe-lite.dts b/arch/arm/boot/dts/ti/omap/am335x-sancloud-bbe-lite.dts
index b1b400226d83..c6c96f6182a8 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-sancloud-bbe-lite.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-sancloud-bbe-lite.dts
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/
* Copyright (C) 2021 SanCloud Ltd
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am335x-sbc-t335.dts b/arch/arm/boot/dts/ti/omap/am335x-sbc-t335.dts
index 596774c84744..2841e95d9a09 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-sbc-t335.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-sbc-t335.dts
@@ -2,7 +2,7 @@
/*
* am335x-sbc-t335.dts - Device Tree file for Compulab SBC-T335
*
- * Copyright (C) 2014 - 2015 CompuLab Ltd. - http://www.compulab.co.il/
+ * Copyright (C) 2014 - 2015 CompuLab Ltd. - https://www.compulab.co.il/
*/
#include "am335x-cm-t335.dts"
diff --git a/arch/arm/boot/dts/ti/omap/am335x-sl50.dts b/arch/arm/boot/dts/ti/omap/am335x-sl50.dts
index 1115c812f6c8..757ebd96b3f0 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-sl50.dts
+++ b/arch/arm/boot/dts/ti/omap/am335x-sl50.dts
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2015 Toby Churchill - http://www.toby-churchill.com/
+ * Copyright (C) 2015 Toby Churchill - https://www.toby-churchill.com/
+ * url above is defunct
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am33xx.dtsi b/arch/arm/boot/dts/ti/omap/am33xx.dtsi
index 5b9e01a8aa5d..989d5a6edeed 100644
--- a/arch/arm/boot/dts/ti/omap/am33xx.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am33xx.dtsi
@@ -640,10 +640,11 @@
#size-cells = <1>;
ranges = <0 0x56000000 0x1000000>;
- /*
- * Closed source PowerVR driver, no child device
- * binding or driver in mainline
- */
+ gpu@0 {
+ compatible = "ti,omap3630-gpu", "img,powervr-sgx530";
+ reg = <0x0 0x10000>; /* 64kB */
+ interrupts = <37>;
+ };
};
};
};
diff --git a/arch/arm/boot/dts/ti/omap/am3517.dtsi b/arch/arm/boot/dts/ti/omap/am3517.dtsi
index 77e58e686fb1..19aad715dff7 100644
--- a/arch/arm/boot/dts/ti/omap/am3517.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am3517.dtsi
@@ -162,12 +162,13 @@
clock-names = "fck", "ick";
#address-cells = <1>;
#size-cells = <1>;
- ranges = <0 0x50000000 0x4000>;
+ ranges = <0 0x50000000 0x10000>;
- /*
- * Closed source PowerVR driver, no child device
- * binding or driver in mainline
- */
+ gpu@0 {
+ compatible = "ti,omap3430-gpu", "img,powervr-sgx530";
+ reg = <0x0 0x10000>; /* 64kB */
+ interrupts = <21>;
+ };
};
};
};
diff --git a/arch/arm/boot/dts/ti/omap/am4372.dtsi b/arch/arm/boot/dts/ti/omap/am4372.dtsi
index 9d2c064534f7..5fd1b380ece6 100644
--- a/arch/arm/boot/dts/ti/omap/am4372.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am4372.dtsi
@@ -719,6 +719,12 @@
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0x56000000 0x1000000>;
+
+ gpu@0 {
+ compatible = "ti,omap3630-gpu", "img,powervr-sgx530";
+ reg = <0x0 0x10000>; /* 64kB */
+ interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+ };
};
};
};
diff --git a/arch/arm/boot/dts/ti/omap/am437x-cm-t43.dts b/arch/arm/boot/dts/ti/omap/am437x-cm-t43.dts
index 9ec75d03eaff..172516a7667e 100644
--- a/arch/arm/boot/dts/ti/omap/am437x-cm-t43.dts
+++ b/arch/arm/boot/dts/ti/omap/am437x-cm-t43.dts
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2015 CompuLab, Ltd. - http://www.compulab.co.il/
+ * Copyright (C) 2015 CompuLab, Ltd. - https://www.compulab.co.il/
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/am437x-sbc-t43.dts b/arch/arm/boot/dts/ti/omap/am437x-sbc-t43.dts
index 34a5407bee15..5ec57dcb0659 100644
--- a/arch/arm/boot/dts/ti/omap/am437x-sbc-t43.dts
+++ b/arch/arm/boot/dts/ti/omap/am437x-sbc-t43.dts
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2015 CompuLab, Ltd. - http://www.compulab.co.il/
+ * Copyright (C) 2015 CompuLab, Ltd. - https://www.compulab.co.il/
*/
#include "am437x-cm-t43.dts"
diff --git a/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts b/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts
index c8e55642f9c6..eb1ec85aba28 100644
--- a/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts
+++ b/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (C) 2014-2019 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2014-2019 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
@@ -415,7 +415,6 @@
reg = <0x41>;
interrupts = <30 IRQ_TYPE_LEVEL_LOW>;
interrupt-parent = <&gpio2>;
- interrupt-controller;
id = <0>;
blocks = <0x5>;
irq-trigger = <0x1>;
diff --git a/arch/arm/boot/dts/ti/omap/am57xx-cl-som-am57x.dts b/arch/arm/boot/dts/ti/omap/am57xx-cl-som-am57x.dts
index 4fd831ff206f..d6e3152b02f7 100644
--- a/arch/arm/boot/dts/ti/omap/am57xx-cl-som-am57x.dts
+++ b/arch/arm/boot/dts/ti/omap/am57xx-cl-som-am57x.dts
@@ -2,7 +2,7 @@
/*
* Support for CompuLab CL-SOM-AM57x System-on-Module
*
- * Copyright (C) 2015 CompuLab Ltd. - http://www.compulab.co.il/
+ * Copyright (C) 2015 CompuLab Ltd. - https://www.compulab.co.il/
* Author: Dmitry Lifshitz <lifshitz@compulab.co.il>
*/
diff --git a/arch/arm/boot/dts/ti/omap/am57xx-sbc-am57x.dts b/arch/arm/boot/dts/ti/omap/am57xx-sbc-am57x.dts
index 363115afb0a4..64675f4edb60 100644
--- a/arch/arm/boot/dts/ti/omap/am57xx-sbc-am57x.dts
+++ b/arch/arm/boot/dts/ti/omap/am57xx-sbc-am57x.dts
@@ -2,7 +2,7 @@
/*
* Support for CompuLab SBC-AM57x single board computer
*
- * Copyright (C) 2015 CompuLab Ltd. - http://www.compulab.co.il/
+ * Copyright (C) 2015 CompuLab Ltd. - https://www.compulab.co.il/
* Author: Dmitry Lifshitz <lifshitz@compulab.co.il>
*/
diff --git a/arch/arm/boot/dts/ti/omap/compulab-sb-som.dtsi b/arch/arm/boot/dts/ti/omap/compulab-sb-som.dtsi
index f5e6216718d8..8a8fa1b2b26c 100644
--- a/arch/arm/boot/dts/ti/omap/compulab-sb-som.dtsi
+++ b/arch/arm/boot/dts/ti/omap/compulab-sb-som.dtsi
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2015 CompuLab, Ltd. - http://www.compulab.co.il/
+ * Copyright (C) 2015 CompuLab, Ltd. - https://www.compulab.co.il/
*/
/ {
diff --git a/arch/arm/boot/dts/ti/omap/dra7-l4.dtsi b/arch/arm/boot/dts/ti/omap/dra7-l4.dtsi
index 5733e3a4ea8e..6e67d99832ac 100644
--- a/arch/arm/boot/dts/ti/omap/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/ti/omap/dra7-l4.dtsi
@@ -80,7 +80,7 @@
};
};
- phy_gmii_sel: phy-gmii-sel {
+ phy_gmii_sel: phy-gmii-sel@554 {
compatible = "ti,dra7xx-phy-gmii-sel";
reg = <0x554 0x4>;
#phy-cells = <1>;
diff --git a/arch/arm/boot/dts/ti/omap/dra7.dtsi b/arch/arm/boot/dts/ti/omap/dra7.dtsi
index 6509c742fb58..164fa88c459e 100644
--- a/arch/arm/boot/dts/ti/omap/dra7.dtsi
+++ b/arch/arm/boot/dts/ti/omap/dra7.dtsi
@@ -638,7 +638,7 @@
};
};
- abb_mpu: regulator-abb-mpu {
+ abb_mpu: regulator-abb-mpu@4ae07ddc {
compatible = "ti,abb-v3";
regulator-name = "abb_mpu";
#address-cells = <0>;
@@ -671,7 +671,7 @@
>;
};
- abb_ivahd: regulator-abb-ivahd {
+ abb_ivahd: regulator-abb-ivahd@4ae07e34 {
compatible = "ti,abb-v3";
regulator-name = "abb_ivahd";
#address-cells = <0>;
@@ -704,7 +704,7 @@
>;
};
- abb_dspeve: regulator-abb-dspeve {
+ abb_dspeve: regulator-abb-dspeve@4ae07e30 {
compatible = "ti,abb-v3";
regulator-name = "abb_dspeve";
#address-cells = <0>;
@@ -737,7 +737,7 @@
>;
};
- abb_gpu: regulator-abb-gpu {
+ abb_gpu: regulator-abb-gpu@4ae07de4 {
compatible = "ti,abb-v3";
regulator-name = "abb_gpu";
#address-cells = <0>;
@@ -850,12 +850,19 @@
<SYSC_IDLE_SMART>;
ti,sysc-sidle = <SYSC_IDLE_FORCE>,
<SYSC_IDLE_NO>,
- <SYSC_IDLE_SMART>;
+ <SYSC_IDLE_SMART>,
+ <SYSC_IDLE_SMART_WKUP>;
clocks = <&gpu_clkctrl DRA7_GPU_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0x56000000 0x2000000>;
+
+ gpu@0 {
+ compatible = "ti,am5728-gpu", "img,powervr-sgx544";
+ reg = <0x0 0x10000>; /* 64kB */
+ interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+ };
};
crossbar_mpu: crossbar@4a002a48 {
diff --git a/arch/arm/boot/dts/ti/omap/dra74x-p.dtsi b/arch/arm/boot/dts/ti/omap/dra74x-p.dtsi
index 006189dad7a7..bb5239ae164d 100644
--- a/arch/arm/boot/dts/ti/omap/dra74x-p.dtsi
+++ b/arch/arm/boot/dts/ti/omap/dra74x-p.dtsi
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/arch/arm/boot/dts/ti/omap/dra7xx-clocks.dtsi b/arch/arm/boot/dts/ti/omap/dra7xx-clocks.dtsi
index 04a7a6d1d529..06466d36caa9 100644
--- a/arch/arm/boot/dts/ti/omap/dra7xx-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/omap/dra7xx-clocks.dtsi
@@ -1685,7 +1685,7 @@
reg = <0x0558>;
};
- sys_32k_ck: clock-sys-32k {
+ sys_32k_ck: clock-sys-32k@6c4 {
#clock-cells = <0>;
compatible = "ti,mux-clock";
clock-output-names = "sys_32k_ck";
diff --git a/arch/arm/boot/dts/ti/omap/omap34xx.dtsi b/arch/arm/boot/dts/ti/omap/omap34xx.dtsi
index fc7233ac183a..acdd0ee34421 100644
--- a/arch/arm/boot/dts/ti/omap/omap34xx.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap34xx.dtsi
@@ -164,12 +164,13 @@
clock-names = "fck", "ick";
#address-cells = <1>;
#size-cells = <1>;
- ranges = <0 0x50000000 0x4000>;
+ ranges = <0 0x50000000 0x10000>;
- /*
- * Closed source PowerVR driver, no child device
- * binding or driver in mainline
- */
+ gpu@0 {
+ compatible = "ti,omap3430-gpu", "img,powervr-sgx530";
+ reg = <0x0 0x10000>; /* 64kB */
+ interrupts = <21>;
+ };
};
};
diff --git a/arch/arm/boot/dts/ti/omap/omap36xx.dtsi b/arch/arm/boot/dts/ti/omap/omap36xx.dtsi
index e6d8070c1bf8..c3d79ecd56e3 100644
--- a/arch/arm/boot/dts/ti/omap/omap36xx.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap36xx.dtsi
@@ -211,10 +211,11 @@
#size-cells = <1>;
ranges = <0 0x50000000 0x2000000>;
- /*
- * Closed source PowerVR driver, no child device
- * binding or driver in mainline
- */
+ gpu@0 {
+ compatible = "ti,omap3630-gpu", "img,powervr-sgx530";
+ reg = <0x0 0x2000000>; /* 32MB */
+ interrupts = <21>;
+ };
};
};
diff --git a/arch/arm/boot/dts/ti/omap/omap4-epson-embt2ws.dts b/arch/arm/boot/dts/ti/omap/omap4-epson-embt2ws.dts
index 24f7d0285f79..339e52ba3614 100644
--- a/arch/arm/boot/dts/ti/omap/omap4-epson-embt2ws.dts
+++ b/arch/arm/boot/dts/ti/omap/omap4-epson-embt2ws.dts
@@ -85,6 +85,7 @@
interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */
interrupt-controller;
#interrupt-cells = <1>;
+ system-power-controller;
rtc {
compatible = "ti,twl4030-rtc";
diff --git a/arch/arm/boot/dts/ti/omap/omap4-panda-common.dtsi b/arch/arm/boot/dts/ti/omap/omap4-panda-common.dtsi
index f528511c2537..97706d6296a6 100644
--- a/arch/arm/boot/dts/ti/omap/omap4-panda-common.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap4-panda-common.dtsi
@@ -408,6 +408,7 @@
reg = <0x48>;
/* IRQ# = 7 */
interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */
+ system-power-controller;
};
twl6040: twl@4b {
diff --git a/arch/arm/boot/dts/ti/omap/omap4-sdp.dts b/arch/arm/boot/dts/ti/omap/omap4-sdp.dts
index b2cb93edbc3a..b535d24c6140 100644
--- a/arch/arm/boot/dts/ti/omap/omap4-sdp.dts
+++ b/arch/arm/boot/dts/ti/omap/omap4-sdp.dts
@@ -439,7 +439,7 @@
/*
* Ambient Light Sensor
- * http://www.rohm.com/products/databook/sensor/pdf/bh1780gli-e.pdf
+ * https://www.rohm.com/products/databook/sensor/pdf/bh1780gli-e.pdf (defunct)
*/
bh1780@29 {
compatible = "rohm,bh1780";
diff --git a/arch/arm/boot/dts/ti/omap/omap4.dtsi b/arch/arm/boot/dts/ti/omap/omap4.dtsi
index 2bbff9032be3..559b2bfe4ca7 100644
--- a/arch/arm/boot/dts/ti/omap/omap4.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap4.dtsi
@@ -501,10 +501,11 @@
#size-cells = <1>;
ranges = <0 0x56000000 0x2000000>;
- /*
- * Closed source PowerVR driver, no child device
- * binding or driver in mainline
- */
+ gpu@0 {
+ compatible = "ti,omap4430-gpu", "img,powervr-sgx540";
+ reg = <0x0 0x2000000>; /* 32MB */
+ interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+ };
};
/*
diff --git a/arch/arm/boot/dts/ti/omap/omap5-igep0050.dts b/arch/arm/boot/dts/ti/omap/omap5-igep0050.dts
index d4ca2e3a14dd..0368e32f67e7 100644
--- a/arch/arm/boot/dts/ti/omap/omap5-igep0050.dts
+++ b/arch/arm/boot/dts/ti/omap/omap5-igep0050.dts
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2013 ISEE 2007 SL - http://www.isee.biz/
+ * Copyright (C) 2013 ISEE 2007 SL - https://www.isee.biz/
*/
/dts-v1/;
diff --git a/arch/arm/boot/dts/ti/omap/omap5.dtsi b/arch/arm/boot/dts/ti/omap/omap5.dtsi
index bac6fa838793..6a66214ad0e2 100644
--- a/arch/arm/boot/dts/ti/omap/omap5.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap5.dtsi
@@ -453,10 +453,11 @@
#size-cells = <1>;
ranges = <0 0x56000000 0x2000000>;
- /*
- * Closed source PowerVR driver, no child device
- * binding or driver in mainline
- */
+ gpu@0 {
+ compatible = "ti,omap5432-gpu", "img,powervr-sgx544";
+ reg = <0x0 0x2000000>; /* 32MB */
+ interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+ };
};
target-module@58000000 {
diff --git a/arch/arm/boot/dts/ti/omap/twl4030.dtsi b/arch/arm/boot/dts/ti/omap/twl4030.dtsi
index 93e07c18781b..a5d9c5738317 100644
--- a/arch/arm/boot/dts/ti/omap/twl4030.dtsi
+++ b/arch/arm/boot/dts/ti/omap/twl4030.dtsi
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2011 Texas Instruments Incorporated - https://www.ti.com/
*/
/*
diff --git a/arch/arm/boot/dts/ti/omap/twl6030.dtsi b/arch/arm/boot/dts/ti/omap/twl6030.dtsi
index 9d588cfaa5cb..8da969035c41 100644
--- a/arch/arm/boot/dts/ti/omap/twl6030.dtsi
+++ b/arch/arm/boot/dts/ti/omap/twl6030.dtsi
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2011 Texas Instruments Incorporated - https://www.ti.com/
*/
/*
* Integrated Power Management Chip
- * http://www.ti.com/lit/ds/symlink/twl6030.pdf
+ * https://www.ti.com/lit/ds/symlink/twl6030.pdf
*/
&twl {
compatible = "ti,twl6030";
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index c98d5ff8a1ed..7ad48fdda1da 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -318,8 +318,11 @@ CONFIG_EXTCON_MAX77693=y
CONFIG_EXTCON_MAX8997=y
CONFIG_EXYNOS5422_DMC=y
CONFIG_IIO=y
+CONFIG_IIO_ST_ACCEL_3AXIS=m
+# CONFIG_IIO_ST_ACCEL_SPI_3AXIS is not set
CONFIG_EXYNOS_ADC=y
CONFIG_STMPE_ADC=y
+CONFIG_IIO_ST_GYRO_3AXIS=m
CONFIG_CM36651=y
CONFIG_AK8975=y
CONFIG_SENSORS_ISL29018=y
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 0a90583f9f01..92d649a817b3 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -208,6 +208,7 @@ CONFIG_PINCTRL_IMX8MQ=y
CONFIG_GPIO_SYSFS=y
CONFIG_GPIO_MXC=y
CONFIG_GPIO_SIOX=m
+CONFIG_GPIO_VF610=y
CONFIG_GPIO_MAX732X=y
CONFIG_GPIO_PCA953X=y
CONFIG_GPIO_PCF857X=y
@@ -297,6 +298,7 @@ CONFIG_FB_MODE_HELPERS=y
CONFIG_LCD_CLASS_DEVICE=y
CONFIG_LCD_L4F00242T03=y
CONFIG_LCD_PLATFORM=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_BACKLIGHT_PWM=y
CONFIG_BACKLIGHT_GPIO=y
CONFIG_FRAMEBUFFER_CONSOLE=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index ecb3e286107a..b2955dcb5a53 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -183,6 +183,7 @@ CONFIG_PCIE_RCAR_HOST=y
CONFIG_PCI_RCAR_GEN2=y
CONFIG_PCI_LAYERSCAPE=y
CONFIG_PCI_DRA7XX_EP=y
+CONFIG_PCI_KEYSTONE_HOST=y
CONFIG_PCI_ENDPOINT=y
CONFIG_PCI_ENDPOINT_CONFIGFS=y
CONFIG_PCI_EPF_TEST=m
@@ -191,6 +192,7 @@ CONFIG_DEVTMPFS_MOUNT=y
CONFIG_OMAP_OCP2SCP=y
CONFIG_ARM_SCMI_PROTOCOL=y
CONFIG_RASPBERRYPI_FIRMWARE=y
+CONFIG_TI_SCI_PROTOCOL=y
CONFIG_TRUSTED_FOUNDATIONS=y
CONFIG_BCM47XX_NVRAM=y
CONFIG_BCM47XX_SPROM=y
@@ -280,6 +282,8 @@ CONFIG_DWMAC_DWC_QOS_ETH=y
CONFIG_TI_CPSW=y
CONFIG_TI_CPSW_SWITCHDEV=y
CONFIG_TI_CPTS=y
+CONFIG_TI_KEYSTONE_NETCP=y
+CONFIG_TI_KEYSTONE_NETCP_ETHSS=y
CONFIG_XILINX_EMACLITE=y
CONFIG_SFP=m
CONFIG_BROADCOM_PHY=y
@@ -292,6 +296,8 @@ CONFIG_CAN_AT91=m
CONFIG_CAN_FLEXCAN=m
CONFIG_CAN_SUN4I=y
CONFIG_CAN_XILINXCAN=y
+CONFIG_CAN_C_CAN=m
+CONFIG_CAN_C_CAN_PLATFORM=m
CONFIG_CAN_RCAR=m
CONFIG_CAN_MCP251X=y
CONFIG_MDIO_MSCC_MIIM=m
@@ -335,6 +341,7 @@ CONFIG_INPUT_MISC=y
CONFIG_INPUT_PM8941_PWRKEY=y
CONFIG_INPUT_MAX77693_HAPTIC=m
CONFIG_INPUT_MAX8997_HAPTIC=m
+CONFIG_INPUT_GPIO_DECODER=m
CONFIG_INPUT_CPCAP_PWRBUTTON=m
CONFIG_INPUT_AXP20X_PEK=m
CONFIG_INPUT_DA9063_ONKEY=m
@@ -436,6 +443,7 @@ CONFIG_SPI_ATMEL_QUADSPI=m
CONFIG_SPI_BCM2835=y
CONFIG_SPI_BCM2835AUX=y
CONFIG_SPI_CADENCE=y
+CONFIG_SPI_CADENCE_QUADSPI=y
CONFIG_SPI_DAVINCI=y
CONFIG_SPI_FSL_QUADSPI=m
CONFIG_SPI_GXP=m
@@ -769,6 +777,7 @@ CONFIG_FB_EFI=y
CONFIG_FB_WM8505=y
CONFIG_FB_SH_MOBILE_LCDC=y
CONFIG_FB_SIMPLE=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_BACKLIGHT_PWM=y
CONFIG_BACKLIGHT_AS3711=y
CONFIG_BACKLIGHT_GPIO=y
@@ -1047,7 +1056,6 @@ CONFIG_KEYBOARD_NVEC=y
CONFIG_SERIO_NVEC_PS2=y
CONFIG_NVEC_POWER=y
CONFIG_NVEC_PAZ00=y
-CONFIG_STAGING_BOARD=y
CONFIG_CHROME_PLATFORMS=y
CONFIG_CROS_EC=m
CONFIG_CROS_EC_I2C=m
@@ -1073,6 +1081,7 @@ CONFIG_HWSPINLOCK_OMAP=y
CONFIG_HWSPINLOCK_QCOM=y
CONFIG_OMAP2PLUS_MBOX=y
CONFIG_BCM2835_MBOX=y
+CONFIG_TI_MESSAGE_MANAGER=y
CONFIG_QCOM_APCS_IPC=y
CONFIG_STM32_IPCC=m
CONFIG_QCOM_IPCC=y
@@ -1133,11 +1142,15 @@ CONFIG_ARCH_TEGRA_2x_SOC=y
CONFIG_ARCH_TEGRA_3x_SOC=y
CONFIG_ARCH_TEGRA_114_SOC=y
CONFIG_ARCH_TEGRA_124_SOC=y
+CONFIG_SOC_TI=y
+CONFIG_KEYSTONE_NAVIGATOR_QMSS=y
+CONFIG_KEYSTONE_NAVIGATOR_DMA=y
CONFIG_RASPBERRYPI_POWER=y
CONFIG_QCOM_CPR=y
CONFIG_QCOM_RPMHPD=y
CONFIG_QCOM_RPMPD=y
CONFIG_ROCKCHIP_PM_DOMAINS=y
+CONFIG_TI_SCI_PM_DOMAINS=y
CONFIG_ARM_EXYNOS_BUS_DEVFREQ=m
CONFIG_ARM_TEGRA_DEVFREQ=m
CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP=m
@@ -1150,6 +1163,8 @@ CONFIG_STM32_FMC2_EBI=y
CONFIG_EXYNOS5422_DMC=m
CONFIG_IIO=y
CONFIG_IIO_SW_TRIGGER=y
+CONFIG_IIO_ST_ACCEL_3AXIS=m
+# CONFIG_IIO_ST_ACCEL_SPI_3AXIS is not set
CONFIG_ASPEED_ADC=m
CONFIG_AT91_ADC=m
CONFIG_AT91_SAMA5D2_ADC=m
@@ -1169,6 +1184,7 @@ CONFIG_IIO_CROS_EC_SENSORS_CORE=m
CONFIG_IIO_CROS_EC_SENSORS=m
CONFIG_STM32_DAC=m
CONFIG_MPU3050_I2C=y
+CONFIG_IIO_ST_GYRO_3AXIS=m
CONFIG_CM36651=m
CONFIG_IIO_CROS_EC_LIGHT_PROX=m
CONFIG_SENSORS_ISL29018=y
@@ -1193,10 +1209,13 @@ CONFIG_PWM_STM32=m
CONFIG_PWM_STM32_LP=m
CONFIG_PWM_SUN4I=y
CONFIG_PWM_TEGRA=y
+CONFIG_PWM_TIECAP=m
CONFIG_PWM_VT8500=y
CONFIG_KEYSTONE_IRQ=y
CONFIG_RESET_MCHP_SPARX5=y
CONFIG_RESET_SCMI=y
+CONFIG_RESET_TI_SCI=m
+CONFIG_RESET_TI_SYSCON=m
CONFIG_PHY_SUN4I_USB=y
CONFIG_PHY_SUN9I_USB=y
CONFIG_PHY_BRCM_USB=m
diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig
index c47a638172a8..091e1840933c 100644
--- a/arch/arm/configs/shmobile_defconfig
+++ b/arch/arm/configs/shmobile_defconfig
@@ -191,8 +191,6 @@ CONFIG_DW_DMAC=y
CONFIG_RZN1_DMAMUX=y
CONFIG_RCAR_DMAC=y
CONFIG_RENESAS_USB_DMAC=y
-CONFIG_STAGING=y
-CONFIG_STAGING_BOARD=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_ARCH_EMEV2=y
CONFIG_ARCH_R8A7794=y
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index d68101655b74..9f21e170320f 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -4,7 +4,6 @@
#include <asm/auxvec.h>
#include <asm/hwcap.h>
-#include <asm/vdso_datapage.h>
/*
* ELF register definitions..
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index e12d7d096fc0..e4eb54f6cd9f 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -11,7 +11,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
WASM(nop) "\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
@@ -25,7 +25,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
WASM(b) " %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 119aa85d1feb..62af9f7f9e96 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -8,7 +8,7 @@
#define _ASMARM_PAGE_H
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1))
diff --git a/arch/arm/include/asm/vdso_datapage.h b/arch/arm/include/asm/vdso_datapage.h
deleted file mode 100644
index bef68f59928d..000000000000
--- a/arch/arm/include/asm/vdso_datapage.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Adapted from arm64 version.
- *
- * Copyright (C) 2012 ARM Limited
- */
-#ifndef __ASM_VDSO_DATAPAGE_H
-#define __ASM_VDSO_DATAPAGE_H
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-
-#include <vdso/datapage.h>
-#include <asm/page.h>
-
-union vdso_data_store {
- struct vdso_data data[CS_BASES];
- u8 page[PAGE_SIZE];
-};
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_VDSO_DATAPAGE_H */
diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
index 352ab213520d..f9a3897b06e7 100644
--- a/arch/arm/include/asm/word-at-a-time.h
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -8,7 +8,8 @@
* Little-endian word-at-a-time zero byte handling.
* Heavily based on the x86 algorithm.
*/
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
struct word_at_a_time {
const unsigned long one_bits, high_bits;
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 219cbc7e5d13..4915662842ff 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -21,10 +21,12 @@
#include <asm/mpu.h>
#include <asm/procinfo.h>
#include <asm/suspend.h>
-#include <asm/vdso_datapage.h>
#include <asm/hardware/cache-l2x0.h>
#include <linux/kbuild.h>
#include <linux/arm-smccc.h>
+
+#include <vdso/datapage.h>
+
#include "signal.h"
/*
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index f297d66a8a76..d499ad461b00 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -21,7 +21,6 @@
#include <asm/cacheflush.h>
#include <asm/page.h>
#include <asm/vdso.h>
-#include <asm/vdso_datapage.h>
#include <clocksource/arm_arch_timer.h>
#include <vdso/helpers.h>
#include <vdso/vsyscall.h>
@@ -35,9 +34,6 @@ extern char vdso_start[], vdso_end[];
/* Total number of pages needed for the data and text portions of the VDSO. */
unsigned int vdso_total_pages __ro_after_init;
-/*
- * The VDSO data page.
- */
static union vdso_data_store vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = vdso_data_store.data;
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index 71b113976420..8b1ec60a9a46 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -339,6 +339,7 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = {
GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
GPIO_LOOKUP_IDX("G", 0, NULL, 1,
GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
+ { }
},
};
diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index 444a7eaa320c..25893d109190 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -452,7 +452,7 @@ static int mmdc_pmu_init(struct mmdc_pmu *pmu_mmdc,
.active_events = 0,
};
- pmu_mmdc->id = ida_simple_get(&mmdc_ida, 0, 0, GFP_KERNEL);
+ pmu_mmdc->id = ida_alloc(&mmdc_ida, GFP_KERNEL);
return pmu_mmdc->id;
}
@@ -461,7 +461,7 @@ static void imx_mmdc_remove(struct platform_device *pdev)
{
struct mmdc_pmu *pmu_mmdc = platform_get_drvdata(pdev);
- ida_simple_remove(&mmdc_ida, pmu_mmdc->id);
+ ida_free(&mmdc_ida, pmu_mmdc->id);
cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
perf_pmu_unregister(&pmu_mmdc->pmu);
iounmap(pmu_mmdc->mmdc_base);
@@ -529,7 +529,7 @@ pmu_register_err:
cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
hrtimer_cancel(&pmu_mmdc->hrtimer);
pmu_release_id:
- ida_simple_remove(&mmdc_ida, pmu_mmdc->id);
+ ida_free(&mmdc_ida, pmu_mmdc->id);
pmu_free:
kfree(pmu_mmdc);
return ret;
diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig
index cbf703f0d850..a643b71e30a3 100644
--- a/arch/arm/mach-omap1/Kconfig
+++ b/arch/arm/mach-omap1/Kconfig
@@ -4,7 +4,6 @@ menuconfig ARCH_OMAP1
depends on ARCH_MULTI_V4T || ARCH_MULTI_V5
depends on CPU_LITTLE_ENDIAN
depends on ATAGS
- select ARCH_OMAP
select ARCH_HAS_HOLES_MEMORYMODEL
select ARCH_OMAP
select CLKSRC_MMIO
diff --git a/arch/arm/mach-omap2/am33xx-restart.c b/arch/arm/mach-omap2/am33xx-restart.c
index ef2f18a56b65..fcf3d557aa78 100644
--- a/arch/arm/mach-omap2/am33xx-restart.c
+++ b/arch/arm/mach-omap2/am33xx-restart.c
@@ -9,7 +9,7 @@
#include "prm.h"
/**
- * am3xx_restart - trigger a software restart of the SoC
+ * am33xx_restart - trigger a software restart of the SoC
* @mode: the "reboot mode", see arch/arm/kernel/{setup,process}.c
* @cmd: passed from the userspace program rebooting the system (if provided)
*
@@ -18,7 +18,8 @@
*/
void am33xx_restart(enum reboot_mode mode, const char *cmd)
{
- /* TODO: Handle mode and cmd if necessary */
+ /* TODO: Handle cmd if necessary */
+ prm_reboot_mode = mode;
omap_prm_reset_system();
}
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index fde6ccb3df6e..68e0baad2bbf 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -246,6 +246,12 @@ DT_MACHINE_START(AM33XX_DT, "Generic AM33XX (Flattened Device Tree)")
.init_time = omap_init_time_of,
.dt_compat = am33xx_boards_compat,
.restart = am33xx_restart,
+ /*
+ * Historically am33xx supported only REBOOT_WARM even though default
+ * reboot_mode was REBOOT_COLD. Reflect legacy de-facto behaviour in
+ * SYSFS.
+ */
+ .reboot_mode = REBOOT_WARM,
MACHINE_END
#endif
diff --git a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
index be4557d1fdac..011076a5952f 100644
--- a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
+++ b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
@@ -162,7 +162,7 @@ static int omap2_select_table_rate(struct clk_hw *hw, unsigned long rate,
}
/**
- * omap2xxx_clkt_vps_check_bootloader_rate - determine which of the rate
+ * omap2xxx_clkt_vps_check_bootloader_rates - determine which of the rate
* table sets matches the current CORE DPLL hardware rate
*
* Check the MPU rate set by bootloader. Sets the 'curr_prcm_set'
diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c
index d145e7ac709b..69dc5b839335 100644
--- a/arch/arm/mach-omap2/clockdomain.c
+++ b/arch/arm/mach-omap2/clockdomain.c
@@ -990,7 +990,7 @@ void clkdm_allow_idle(struct clockdomain *clkdm)
}
/**
- * clkdm_deny_idle - disable hwsup idle transitions for clkdm
+ * clkdm_deny_idle_nolock - disable hwsup idle transitions for clkdm
* @clkdm: struct clockdomain *
*
* Prevent the hardware from automatically switching the clockdomain
@@ -1110,7 +1110,7 @@ void clkdm_del_autodeps(struct clockdomain *clkdm)
/**
* clkdm_clk_enable - add an enabled downstream clock to this clkdm
* @clkdm: struct clockdomain *
- * @clk: struct clk * of the enabled downstream clock
+ * @unused: struct clk * of the enabled downstream clock
*
* Increment the usecount of the clockdomain @clkdm and ensure that it
* is awake before @clk is enabled. Intended to be called by
diff --git a/arch/arm/mach-omap2/cm33xx.c b/arch/arm/mach-omap2/cm33xx.c
index c824d4e3db63..acdf72a541c0 100644
--- a/arch/arm/mach-omap2/cm33xx.c
+++ b/arch/arm/mach-omap2/cm33xx.c
@@ -357,7 +357,7 @@ static int am33xx_clkdm_save_context(struct clockdomain *clkdm)
}
/**
- * am33xx_restore_save_context - Restore the clockdomain transition context
+ * am33xx_clkdm_restore_context - Restore the clockdomain transition context
* @clkdm: The clockdomain pointer whose context needs to be restored
*
* Restore the clockdomain transition context.
diff --git a/arch/arm/mach-omap2/cminst44xx.c b/arch/arm/mach-omap2/cminst44xx.c
index 46670521b278..49483a888046 100644
--- a/arch/arm/mach-omap2/cminst44xx.c
+++ b/arch/arm/mach-omap2/cminst44xx.c
@@ -237,7 +237,7 @@ static void omap4_cminst_clkdm_disable_hwsup(u8 part, u16 inst, u16 cdoffs)
}
/**
- * omap4_cminst_clkdm_force_sleep - try to take a clockdomain out of idle
+ * omap4_cminst_clkdm_force_wakeup - try to take a clockdomain out of idle
* @part: PRCM partition ID that the clockdomain registers exist in
* @inst: CM instance register offset (*_INST macro)
* @cdoffs: Clockdomain register offset (*_CDOFFS macro)
diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c
index 29c7350b06ab..c2e1aecd07cc 100644
--- a/arch/arm/mach-omap2/omap-secure.c
+++ b/arch/arm/mach-omap2/omap-secure.c
@@ -47,7 +47,7 @@ static void __init omap_optee_init_check(void)
}
/**
- * omap_sec_dispatcher: Routine to dispatch low power secure
+ * omap_secure_dispatcher - Routine to dispatch low power secure
* service routines
* @idx: The HAL API index
* @flag: The flag indicating criticality of operation
@@ -183,7 +183,7 @@ static u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs,
/**
* rx51_secure_update_aux_cr: Routine to modify the contents of Auxiliary Control Register
* @set_bits: bits to set in ACR
- * @clr_bits: bits to clear in ACR
+ * @clear_bits: bits to clear in ACR
*
* Return the non-zero error value on failure.
*/
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index ba71928c0fcb..111677878d9c 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -900,7 +900,7 @@ static int _init_interface_clks(struct omap_hwmod *oh)
}
/**
- * _init_opt_clk - get a struct clk * for the hwmod's optional clocks
+ * _init_opt_clks - get a struct clk * for the hwmod's optional clocks
* @oh: struct omap_hwmod *
*
* Called from _init_clocks(). Populates the @oh omap_hwmod_opt_clk
@@ -2297,7 +2297,7 @@ static void __init parse_module_flags(struct omap_hwmod *oh,
/**
* _init - initialize internal data for the hwmod @oh
* @oh: struct omap_hwmod *
- * @n: (unused)
+ * @data: (unused)
*
* Look up the clocks and the address space used by the MPU to access
* registers belonging to the hwmod @oh. @oh must already be
@@ -2493,7 +2493,7 @@ static void _setup_postsetup(struct omap_hwmod *oh)
/**
* _setup - prepare IP block hardware for use
* @oh: struct omap_hwmod *
- * @n: (unused, pass NULL)
+ * @data: (unused, pass NULL)
*
* Configure the IP block represented by @oh. This may include
* enabling the IP block, resetting it, and placing it into a
@@ -3367,8 +3367,9 @@ static int omap_hwmod_check_module(struct device *dev,
* omap_hwmod_allocate_module - allocate new module
* @dev: struct device
* @oh: module
+ * @data: module data
* @sysc_fields: sysc register bits
- * @clockdomain: clockdomain
+ * @clkdm: clockdomain
* @rev_offs: revision register offset
* @sysc_offs: sysconfig register offset
* @syss_offs: sysstatus register offset
diff --git a/arch/arm/mach-omap2/omap_hwmod_common_data.c b/arch/arm/mach-omap2/omap_hwmod_common_data.c
index 246f1e5da99f..439232233c39 100644
--- a/arch/arm/mach-omap2/omap_hwmod_common_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_common_data.c
@@ -20,7 +20,7 @@
#include "omap_hwmod_common_data.h"
-/**
+/*
* struct omap_hwmod_sysc_type1 - TYPE1 sysconfig scheme.
*
* To be used by hwmod structure to specify the sysconfig offsets
@@ -36,7 +36,7 @@ struct sysc_regbits omap_hwmod_sysc_type1 = {
.autoidle_shift = SYSC_TYPE1_AUTOIDLE_SHIFT,
};
-/**
+/*
* struct omap_hwmod_sysc_type2 - TYPE2 sysconfig scheme.
*
* To be used by hwmod structure to specify the sysconfig offsets if the
@@ -50,7 +50,7 @@ struct sysc_regbits omap_hwmod_sysc_type2 = {
.dmadisable_shift = SYSC_TYPE2_DMADISABLE_SHIFT,
};
-/**
+/*
* struct omap_hwmod_sysc_type3 - TYPE3 sysconfig scheme.
* Used by some IPs on AM33xx
*/
diff --git a/arch/arm/mach-omap2/pmic-cpcap.c b/arch/arm/mach-omap2/pmic-cpcap.c
index 668dc84fd31e..4f31e61c0c90 100644
--- a/arch/arm/mach-omap2/pmic-cpcap.c
+++ b/arch/arm/mach-omap2/pmic-cpcap.c
@@ -18,10 +18,10 @@
#include "vc.h"
/**
- * omap_cpcap_vsel_to_vdc - convert CPCAP VSEL value to microvolts DC
+ * omap_cpcap_vsel_to_uv - convert CPCAP VSEL value to microvolts DC
* @vsel: CPCAP VSEL value to convert
*
- * Returns the microvolts DC that the CPCAP PMIC should generate when
+ * Returns: the microvolts DC that the CPCAP PMIC should generate when
* programmed with @vsel.
*/
static unsigned long omap_cpcap_vsel_to_uv(unsigned char vsel)
@@ -35,7 +35,7 @@ static unsigned long omap_cpcap_vsel_to_uv(unsigned char vsel)
* omap_cpcap_uv_to_vsel - convert microvolts DC to CPCAP VSEL value
* @uv: microvolts DC to convert
*
- * Returns the VSEL value necessary for the CPCAP PMIC to
+ * Returns: the VSEL value necessary for the CPCAP PMIC to
* generate an output voltage equal to or greater than @uv microvolts DC.
*/
static unsigned char omap_cpcap_uv_to_vsel(unsigned long uv)
@@ -82,10 +82,10 @@ static struct omap_voltdm_pmic omap_cpcap_iva = {
};
/**
- * omap_max8952_vsel_to_vdc - convert MAX8952 VSEL value to microvolts DC
+ * omap_max8952_vsel_to_uv - convert MAX8952 VSEL value to microvolts DC
* @vsel: MAX8952 VSEL value to convert
*
- * Returns the microvolts DC that the MAX8952 Regulator should generate when
+ * Returns: the microvolts DC that the MAX8952 Regulator should generate when
* programmed with @vsel.
*/
static unsigned long omap_max8952_vsel_to_uv(unsigned char vsel)
@@ -99,7 +99,7 @@ static unsigned long omap_max8952_vsel_to_uv(unsigned char vsel)
* omap_max8952_uv_to_vsel - convert microvolts DC to MAX8952 VSEL value
* @uv: microvolts DC to convert
*
- * Returns the VSEL value necessary for the MAX8952 Regulator to
+ * Returns: the VSEL value necessary for the MAX8952 Regulator to
* generate an output voltage equal to or greater than @uv microvolts DC.
*/
static unsigned char omap_max8952_uv_to_vsel(unsigned long uv)
@@ -129,10 +129,10 @@ static struct omap_voltdm_pmic omap443x_max8952_mpu = {
};
/**
- * omap_fan5355_vsel_to_vdc - convert FAN535503 VSEL value to microvolts DC
+ * omap_fan535503_vsel_to_uv - convert FAN535503 VSEL value to microvolts DC
* @vsel: FAN535503 VSEL value to convert
*
- * Returns the microvolts DC that the FAN535503 Regulator should generate when
+ * Returns: the microvolts DC that the FAN535503 Regulator should generate when
* programmed with @vsel.
*/
static unsigned long omap_fan535503_vsel_to_uv(unsigned char vsel)
@@ -144,10 +144,10 @@ static unsigned long omap_fan535503_vsel_to_uv(unsigned char vsel)
}
/**
- * omap_fan535508_vsel_to_vdc - convert FAN535508 VSEL value to microvolts DC
+ * omap_fan535508_vsel_to_uv - convert FAN535508 VSEL value to microvolts DC
* @vsel: FAN535508 VSEL value to convert
*
- * Returns the microvolts DC that the FAN535508 Regulator should generate when
+ * Returns: the microvolts DC that the FAN535508 Regulator should generate when
* programmed with @vsel.
*/
static unsigned long omap_fan535508_vsel_to_uv(unsigned char vsel)
@@ -165,7 +165,7 @@ static unsigned long omap_fan535508_vsel_to_uv(unsigned char vsel)
* omap_fan535503_uv_to_vsel - convert microvolts DC to FAN535503 VSEL value
* @uv: microvolts DC to convert
*
- * Returns the VSEL value necessary for the MAX8952 Regulator to
+ * Returns: the VSEL value necessary for the MAX8952 Regulator to
* generate an output voltage equal to or greater than @uv microvolts DC.
*/
static unsigned char omap_fan535503_uv_to_vsel(unsigned long uv)
@@ -184,7 +184,7 @@ static unsigned char omap_fan535503_uv_to_vsel(unsigned long uv)
* omap_fan535508_uv_to_vsel - convert microvolts DC to FAN535508 VSEL value
* @uv: microvolts DC to convert
*
- * Returns the VSEL value necessary for the MAX8952 Regulator to
+ * Returns: the VSEL value necessary for the MAX8952 Regulator to
* generate an output voltage equal to or greater than @uv microvolts DC.
*/
static unsigned char omap_fan535508_uv_to_vsel(unsigned long uv)
diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c
index 5e05dd1324e7..2441d96b7144 100644
--- a/arch/arm/mach-omap2/powerdomain.c
+++ b/arch/arm/mach-omap2/powerdomain.c
@@ -1162,7 +1162,7 @@ static int pwrdm_save_context(struct powerdomain *pwrdm, void *unused)
}
/**
- * pwrdm_save_context - restore powerdomain registers
+ * pwrdm_restore_context - restore powerdomain registers
*
* Restore powerdomain control registers after a suspend or resume
* event.
diff --git a/arch/arm/mach-omap2/prm-regbits-33xx.h b/arch/arm/mach-omap2/prm-regbits-33xx.h
index 3748c5266ae1..9b97f8c76cd1 100644
--- a/arch/arm/mach-omap2/prm-regbits-33xx.h
+++ b/arch/arm/mach-omap2/prm-regbits-33xx.h
@@ -15,6 +15,7 @@
#define AM33XX_GFX_MEM_STATEST_MASK (0x3 << 4)
#define AM33XX_GLOBAL_WARM_SW_RST_MASK (1 << 1)
#define AM33XX_RST_GLOBAL_WARM_SW_MASK (1 << 0)
+#define AM33XX_RST_GLOBAL_COLD_SW_MASK (1 << 1)
#define AM33XX_PRUSS_MEM_ONSTATE_MASK (0x3 << 5)
#define AM33XX_PRUSS_MEM_RETSTATE_MASK (1 << 7)
#define AM33XX_PRUSS_MEM_STATEST_MASK (0x3 << 23)
diff --git a/arch/arm/mach-omap2/prm.h b/arch/arm/mach-omap2/prm.h
index fc45a7ed09bb..fc53a27eed01 100644
--- a/arch/arm/mach-omap2/prm.h
+++ b/arch/arm/mach-omap2/prm.h
@@ -15,6 +15,7 @@
# ifndef __ASSEMBLER__
extern struct omap_domain_base prm_base;
extern u16 prm_features;
+extern enum reboot_mode prm_reboot_mode;
int omap_prcm_init(void);
int omap2_prcm_base_init(void);
# endif
diff --git a/arch/arm/mach-omap2/prm33xx.c b/arch/arm/mach-omap2/prm33xx.c
index 4b65a0f9cf7d..505d685d6792 100644
--- a/arch/arm/mach-omap2/prm33xx.c
+++ b/arch/arm/mach-omap2/prm33xx.c
@@ -10,15 +10,12 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/io.h>
+#include <linux/reboot.h>
#include "powerdomain.h"
#include "prm33xx.h"
#include "prm-regbits-33xx.h"
-#define AM33XX_PRM_RSTCTRL_OFFSET 0x0000
-
-#define AM33XX_RST_GLOBAL_WARM_SW_MASK (1 << 0)
-
/* Read a register in a PRM instance */
static u32 am33xx_prm_read_reg(s16 inst, u16 idx)
{
@@ -322,10 +319,19 @@ static int am33xx_check_vcvp(void)
*
* Immediately reboots the device through warm reset.
*/
-static void am33xx_prm_global_warm_sw_reset(void)
+static void am33xx_prm_global_sw_reset(void)
{
- am33xx_prm_rmw_reg_bits(AM33XX_RST_GLOBAL_WARM_SW_MASK,
- AM33XX_RST_GLOBAL_WARM_SW_MASK,
+ /*
+ * Historically AM33xx performed warm reset for all requested reboot_mode.
+ * Keep this behaviour unchanged for all except newly added REBOOT_COLD.
+ */
+ u32 mask = AM33XX_RST_GLOBAL_WARM_SW_MASK;
+
+ if (prm_reboot_mode == REBOOT_COLD)
+ mask = AM33XX_RST_GLOBAL_COLD_SW_MASK;
+
+ am33xx_prm_rmw_reg_bits(mask,
+ mask,
AM33XX_PRM_DEVICE_MOD,
AM33XX_PRM_RSTCTRL_OFFSET);
@@ -386,7 +392,7 @@ static struct prm_ll_data am33xx_prm_ll_data = {
.assert_hardreset = am33xx_prm_assert_hardreset,
.deassert_hardreset = am33xx_prm_deassert_hardreset,
.is_hardreset_asserted = am33xx_prm_is_hardreset_asserted,
- .reset_system = am33xx_prm_global_warm_sw_reset,
+ .reset_system = am33xx_prm_global_sw_reset,
};
int __init am33xx_prm_init(const struct omap_prcm_init_data *data)
diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c
index 25093c1e5b9a..6c555438dd48 100644
--- a/arch/arm/mach-omap2/prm44xx.c
+++ b/arch/arm/mach-omap2/prm44xx.c
@@ -407,7 +407,7 @@ static bool omap44xx_prm_was_any_context_lost_old(u8 part, s16 inst, u16 idx)
}
/**
- * omap44xx_prm_clear_context_lost_flags_old - clear context loss flags
+ * omap44xx_prm_clear_context_loss_flags_old - clear context loss flags
* @part: PRM partition ID (e.g., OMAP4430_PRM_PARTITION)
* @inst: PRM instance offset (e.g., OMAP4430_PRM_MPU_INST)
* @idx: CONTEXT register offset
diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c
index fd896f2295a1..ee4588acda50 100644
--- a/arch/arm/mach-omap2/prm_common.c
+++ b/arch/arm/mach-omap2/prm_common.c
@@ -67,6 +67,12 @@ struct omap_domain_base prm_base;
u16 prm_features;
/*
+ * Platforms that implement different reboot modes can store the requested
+ * mode here.
+ */
+enum reboot_mode prm_reboot_mode;
+
+/*
* prm_ll_data: function pointers to SoC-specific implementations of
* common PRM functions
*/
@@ -370,7 +376,7 @@ bool prm_was_any_context_lost_old(u8 part, s16 inst, u16 idx)
}
/**
- * prm_clear_context_lost_flags_old - clear context loss flags (old API)
+ * prm_clear_context_loss_flags_old - clear context loss flags (old API)
* @part: PRM partition ID (e.g., OMAP4430_PRM_PARTITION)
* @inst: PRM instance offset (e.g., OMAP4430_PRM_MPU_INST)
* @idx: CONTEXT register offset
@@ -497,6 +503,7 @@ int omap_prm_clear_mod_irqs(s16 module, u8 regs, u32 wkst_mask)
/**
* omap_prm_vp_check_txdone - check voltage processor TX done status
+ * @vp_id: unique VP instance ID
*
* Checks if voltage processor transmission has been completed.
* Returns non-zero if a transmission has completed, 0 otherwise.
@@ -514,6 +521,7 @@ u32 omap_prm_vp_check_txdone(u8 vp_id)
/**
* omap_prm_vp_clear_txdone - clears voltage processor TX done status
+ * @vp_id: unique VP instance ID
*
* Clears the status bit for completed voltage processor transmission
* returned by prm_vp_check_txdone.
diff --git a/arch/arm/mach-omap2/wd_timer.c b/arch/arm/mach-omap2/wd_timer.c
index d4ea56a5f75a..898e7e332981 100644
--- a/arch/arm/mach-omap2/wd_timer.c
+++ b/arch/arm/mach-omap2/wd_timer.c
@@ -57,7 +57,7 @@ int omap2_wd_timer_disable(struct omap_hwmod *oh)
}
/**
- * omap2_wdtimer_reset - reset and disable the WDTIMER IP block
+ * omap2_wd_timer_reset - reset and disable the WDTIMER IP block
* @oh: struct omap_hwmod *
*
* After the WDTIMER IP blocks are reset on OMAP2/3, we must also take
@@ -71,6 +71,8 @@ int omap2_wd_timer_disable(struct omap_hwmod *oh)
* during a normal merge window. omap_hwmod_softreset() should be
* renamed to omap_hwmod_set_ocp_softreset(), and omap_hwmod_softreset()
* should call the hwmod _ocp_softreset() code.
+ *
+ * Returns: %0 on success or -errno value on error.
*/
int omap2_wd_timer_reset(struct omap_hwmod *oh)
{
diff --git a/arch/arm/mach-qcom/Kconfig b/arch/arm/mach-qcom/Kconfig
index 12a812e61c16..f4765be1b2a0 100644
--- a/arch/arm/mach-qcom/Kconfig
+++ b/arch/arm/mach-qcom/Kconfig
@@ -4,46 +4,21 @@ menuconfig ARCH_QCOM
depends on ARCH_MULTI_V7
select ARM_GIC
select ARM_AMBA
+ select CLKSRC_QCOM
+ select HAVE_ARM_ARCH_TIMER
select PINCTRL
select QCOM_SCM if SMP
help
Support for Qualcomm's devicetree based systems.
+ This includes support for a few devices with ARM64 SoC, that have
+ ARM32 signed firmware that does not allow booting ARM64 kernels.
if ARCH_QCOM
-config ARCH_IPQ40XX
- bool "Enable support for IPQ40XX"
- select CLKSRC_QCOM
- select HAVE_ARM_ARCH_TIMER
-
-config ARCH_MSM8X60
- bool "Enable support for MSM8X60"
- select CLKSRC_QCOM
-
-config ARCH_MSM8909
- bool "Enable support for MSM8909"
- select HAVE_ARM_ARCH_TIMER
-
-config ARCH_MSM8916
- bool "Enable support for MSM8916"
- select HAVE_ARM_ARCH_TIMER
+config ARCH_QCOM_RESERVE_SMEM
+ bool "Reserve SMEM at the beginning of RAM"
help
- Enable support for the Qualcomm Snapdragon 410 (MSM8916/APQ8016).
-
- Note that ARM64 is the main supported architecture for MSM8916.
- The ARM32 option is intended for a few devices with signed firmware
- that does not allow booting ARM64 kernels.
-
-config ARCH_MSM8960
- bool "Enable support for MSM8960"
- select CLKSRC_QCOM
-
-config ARCH_MSM8974
- bool "Enable support for MSM8974"
- select HAVE_ARM_ARCH_TIMER
-
-config ARCH_MDM9615
- bool "Enable support for MDM9615"
- select CLKSRC_QCOM
+ Reserve 2MB at the beginning of the System RAM for shared mem.
+ This is required on IPQ40xx, MSM8x60 and MSM8960 platforms.
endif
diff --git a/arch/arm/mach-s3c/cpu.h b/arch/arm/mach-s3c/cpu.h
index d0adc9b40e25..a0187606b999 100644
--- a/arch/arm/mach-s3c/cpu.h
+++ b/arch/arm/mach-s3c/cpu.h
@@ -76,6 +76,6 @@ extern void s3c24xx_init_uartdevs(char *name,
struct s3c24xx_uart_resources *res,
struct s3c2410_uartcfg *cfg, int no);
-extern struct bus_type s3c6410_subsys;
+extern const struct bus_type s3c6410_subsys;
#endif
diff --git a/arch/arm/mach-s3c/s3c6410.c b/arch/arm/mach-s3c/s3c6410.c
index e79f18d0ca81..a29276a4fde5 100644
--- a/arch/arm/mach-s3c/s3c6410.c
+++ b/arch/arm/mach-s3c/s3c6410.c
@@ -57,7 +57,7 @@ void __init s3c6410_init_irq(void)
s3c64xx_init_irq(~0 & ~(1 << 7), ~0);
}
-struct bus_type s3c6410_subsys = {
+const struct bus_type s3c6410_subsys = {
.name = "s3c6410-core",
.dev_name = "s3c6410-core",
};
diff --git a/arch/arm/mach-s3c/s3c64xx.c b/arch/arm/mach-s3c/s3c64xx.c
index 9f9717874d67..6c70ea7f2931 100644
--- a/arch/arm/mach-s3c/s3c64xx.c
+++ b/arch/arm/mach-s3c/s3c64xx.c
@@ -149,7 +149,7 @@ static struct map_desc s3c_iodesc[] __initdata = {
},
};
-static struct bus_type s3c64xx_subsys = {
+static const struct bus_type s3c64xx_subsys = {
.name = "s3c64xx-core",
.dev_name = "s3c64xx-core",
};
diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c
index d59c094cdea8..6fa70f787df4 100644
--- a/arch/arm/mach-s5pv210/pm.c
+++ b/arch/arm/mach-s5pv210/pm.c
@@ -47,7 +47,7 @@ static void s3c_pm_do_save(struct sleep_save *ptr, int count)
}
/**
- * s3c_pm_do_restore() - restore register values from the save list.
+ * s3c_pm_do_restore_core() - restore register values from the save list.
* @ptr: Pointer to an array of registers.
* @count: Size of the ptr array.
*
diff --git a/arch/arm/mach-zynq/slcr.c b/arch/arm/mach-zynq/slcr.c
index 9765b3f4c2fc..6aae14b0736c 100644
--- a/arch/arm/mach-zynq/slcr.c
+++ b/arch/arm/mach-zynq/slcr.c
@@ -10,7 +10,6 @@
#include <linux/mfd/syscon.h>
#include <linux/of_address.h>
#include <linux/regmap.h>
-#include <linux/clk/zynq.h>
#include "common.h"
/* register offsets */
@@ -146,7 +145,7 @@ void zynq_slcr_cpu_stop(int cpu)
}
/**
- * zynq_slcr_cpu_state - Read/write cpu state
+ * zynq_slcr_cpu_state_read - Read cpu state
* @cpu: cpu number
*
* SLCR_REBOOT_STATUS save upper 2 bits (31/30 cpu states for cpu0 and cpu1)
@@ -165,7 +164,7 @@ bool zynq_slcr_cpu_state_read(int cpu)
}
/**
- * zynq_slcr_cpu_state - Read/write cpu state
+ * zynq_slcr_cpu_state_write - Write cpu state
* @cpu: cpu number
* @die: cpu state - true if cpu is going to die
*
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index e96fb40b9cc3..07565b593ed6 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -298,6 +298,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 2129070065c3..794cfea9f9d4 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -110,8 +110,8 @@ void __init add_static_vm_early(struct static_vm *svm)
int ioremap_page(unsigned long virt, unsigned long phys,
const struct mem_type *mtype)
{
- return ioremap_page_range(virt, virt + PAGE_SIZE, phys,
- __pgprot(mtype->prot_pte));
+ return vmap_page_range(virt, virt + PAGE_SIZE, phys,
+ __pgprot(mtype->prot_pte));
}
EXPORT_SYMBOL(ioremap_page);
@@ -466,8 +466,8 @@ int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
if (res->end > IO_SPACE_LIMIT)
return -EINVAL;
- return ioremap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
- __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));
+ return vmap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
+ __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));
}
EXPORT_SYMBOL(pci_remap_iospace);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index aa7c1d435139..ae0db7c97435 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -107,7 +107,7 @@ config ARM64
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
- select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_UBSAN
select ARM_AMBA
select ARM_ARCH_TIMER
select ARM_GIC
@@ -277,27 +277,21 @@ config 64BIT
config MMU
def_bool y
-config ARM64_PAGE_SHIFT
- int
- default 16 if ARM64_64K_PAGES
- default 14 if ARM64_16K_PAGES
- default 12
-
config ARM64_CONT_PTE_SHIFT
int
- default 5 if ARM64_64K_PAGES
- default 7 if ARM64_16K_PAGES
+ default 5 if PAGE_SIZE_64KB
+ default 7 if PAGE_SIZE_16KB
default 4
config ARM64_CONT_PMD_SHIFT
int
- default 5 if ARM64_64K_PAGES
- default 5 if ARM64_16K_PAGES
+ default 5 if PAGE_SIZE_64KB
+ default 5 if PAGE_SIZE_16KB
default 4
config ARCH_MMAP_RND_BITS_MIN
- default 14 if ARM64_64K_PAGES
- default 16 if ARM64_16K_PAGES
+ default 14 if PAGE_SIZE_64KB
+ default 16 if PAGE_SIZE_16KB
default 18
# max bits determined by the following formula:
@@ -1259,11 +1253,13 @@ choice
config ARM64_4K_PAGES
bool "4KB"
+ select HAVE_PAGE_SIZE_4KB
help
This feature enables 4KB pages support.
config ARM64_16K_PAGES
bool "16KB"
+ select HAVE_PAGE_SIZE_16KB
help
The system will use 16KB pages support. AArch32 emulation
requires applications compiled with 16K (or a multiple of 16K)
@@ -1271,6 +1267,7 @@ config ARM64_16K_PAGES
config ARM64_64K_PAGES
bool "64KB"
+ select HAVE_PAGE_SIZE_64KB
help
This feature enables 64KB pages support (4KB by default)
allowing only two levels of page tables and faster TLB
@@ -1291,19 +1288,19 @@ choice
config ARM64_VA_BITS_36
bool "36-bit" if EXPERT
- depends on ARM64_16K_PAGES
+ depends on PAGE_SIZE_16KB
config ARM64_VA_BITS_39
bool "39-bit"
- depends on ARM64_4K_PAGES
+ depends on PAGE_SIZE_4KB
config ARM64_VA_BITS_42
bool "42-bit"
- depends on ARM64_64K_PAGES
+ depends on PAGE_SIZE_64KB
config ARM64_VA_BITS_47
bool "47-bit"
- depends on ARM64_16K_PAGES
+ depends on PAGE_SIZE_16KB
config ARM64_VA_BITS_48
bool "48-bit"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 47ecc4cff9d2..a88cdf910687 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -195,7 +195,7 @@ vdso_prepare: prepare0
include/generated/vdso-offsets.h arch/arm64/kernel/vdso/vdso.so
ifdef CONFIG_COMPAT_VDSO
$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 \
- include/generated/vdso32-offsets.h arch/arm64/kernel/vdso32/vdso.so
+ arch/arm64/kernel/vdso32/vdso.so
endif
endif
diff --git a/arch/arm64/boot/dts/allwinner/Makefile b/arch/arm64/boot/dts/allwinner/Makefile
index 91d505b385de..21149b346a60 100644
--- a/arch/arm64/boot/dts/allwinner/Makefile
+++ b/arch/arm64/boot/dts/allwinner/Makefile
@@ -16,6 +16,7 @@ dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-pinetab.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-pinetab-early-adopter.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-sopine-baseboard.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-teres-i.dtb
+dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h64-remix-mini-pc.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a100-allwinner-perf1.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h5-bananapi-m2-plus.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h5-bananapi-m2-plus-v1.2.dtb
@@ -42,5 +43,7 @@ dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-bigtreetech-cb1-manta.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-bigtreetech-pi.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-orangepi-zero2.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-x96-mate.dtb
+dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h618-longanpi-3h.dtb
+dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h618-orangepi-zero2w.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h618-orangepi-zero3.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h618-transpeed-8k618-t.dtb
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
index 9ec49ac2f6fd..381d58cea092 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
@@ -291,6 +291,8 @@
};
&spdif {
+ pinctrl-names = "default";
+ pinctrl-0 = <&spdif_tx_pin>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi
index 4903d6358112..855b7d43bc50 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi
@@ -166,6 +166,8 @@
};
&spdif {
+ pinctrl-names = "default";
+ pinctrl-0 = <&spdif_tx_pin>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
index ca1d287a0a01..d11e5041bae9 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
@@ -406,6 +406,7 @@
function = "spi1";
};
+ /omit-if-no-ref/
spdif_tx_pin: spdif-tx-pin {
pins = "PH7";
function = "spdif";
@@ -655,10 +656,8 @@
clocks = <&ccu CLK_BUS_SPDIF>, <&ccu CLK_SPDIF>;
clock-names = "apb", "spdif";
resets = <&ccu RST_BUS_SPDIF>;
- dmas = <&dma 2>;
- dma-names = "tx";
- pinctrl-names = "default";
- pinctrl-0 = <&spdif_tx_pin>;
+ dmas = <&dma 2>, <&dma 2>;
+ dma-names = "rx", "tx";
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1-manta.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1-manta.dts
index dbce61b355d6..4bfb52609c94 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1-manta.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1-manta.dts
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0+ or MIT)
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
/*
* Copyright (C) 2023 Martin Botka <martin.botka@somainline.org>.
*/
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1.dtsi
index 1fed2b46cfe8..af421ba24ce0 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1.dtsi
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0+ or MIT)
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
/*
* Copyright (C) 2023 Martin Botka <martin.botka@somainline.org>.
*/
@@ -93,7 +93,7 @@
interrupt-controller;
#interrupt-cells = <1>;
- regulators{
+ regulators {
reg_dcdc1: dcdc1 {
regulator-name = "vdd-gpu-sys";
regulator-min-microvolt = <810000>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-pi.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-pi.dts
index 832f08b2b260..ff84a3794470 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-pi.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-pi.dts
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0+ or MIT)
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
/*
* Copyright (C) 2023 Martin Botka <martin@biqu3d.com>.
*/
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616.dtsi
index d549d277d972..b2e85e52d1a1 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h616.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h616.dtsi
@@ -9,6 +9,7 @@
#include <dt-bindings/clock/sun6i-rtc.h>
#include <dt-bindings/reset/sun50i-h616-ccu.h>
#include <dt-bindings/reset/sun50i-h6-r-ccu.h>
+#include <dt-bindings/thermal/thermal.h>
/ {
interrupt-parent = <&gic>;
@@ -133,11 +134,28 @@
#reset-cells = <1>;
};
+ dma: dma-controller@3002000 {
+ compatible = "allwinner,sun50i-h616-dma",
+ "allwinner,sun50i-a100-dma";
+ reg = <0x03002000 0x1000>;
+ interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_DMA>, <&ccu CLK_MBUS_DMA>;
+ clock-names = "bus", "mbus";
+ dma-channels = <16>;
+ dma-requests = <49>;
+ resets = <&ccu RST_BUS_DMA>;
+ #dma-cells = <1>;
+ };
+
sid: efuse@3006000 {
compatible = "allwinner,sun50i-h616-sid", "allwinner,sun50i-a64-sid";
reg = <0x03006000 0x1000>;
#address-cells = <1>;
#size-cells = <1>;
+
+ ths_calibration: thermal-sensor-calibration@14 {
+ reg = <0x14 0x8>;
+ };
};
watchdog: watchdog@30090a0 {
@@ -240,6 +258,11 @@
function = "spi1";
};
+ spdif_tx_pin: spdif-tx-pin {
+ pins = "PH4";
+ function = "spdif";
+ };
+
uart0_ph_pins: uart0-ph-pins {
pins = "PH0", "PH1";
function = "uart0";
@@ -256,6 +279,12 @@
pins = "PG8", "PG9";
function = "uart1";
};
+
+ /omit-if-no-ref/
+ x32clk_fanout_pin: x32clk-fanout-pin {
+ pins = "PG10";
+ function = "clock";
+ };
};
gic: interrupt-controller@3021000 {
@@ -339,6 +368,8 @@
reg-shift = <2>;
reg-io-width = <4>;
clocks = <&ccu CLK_BUS_UART0>;
+ dmas = <&dma 14>, <&dma 14>;
+ dma-names = "tx", "rx";
resets = <&ccu RST_BUS_UART0>;
status = "disabled";
};
@@ -350,6 +381,8 @@
reg-shift = <2>;
reg-io-width = <4>;
clocks = <&ccu CLK_BUS_UART1>;
+ dmas = <&dma 15>, <&dma 15>;
+ dma-names = "tx", "rx";
resets = <&ccu RST_BUS_UART1>;
status = "disabled";
};
@@ -361,6 +394,8 @@
reg-shift = <2>;
reg-io-width = <4>;
clocks = <&ccu CLK_BUS_UART2>;
+ dmas = <&dma 16>, <&dma 16>;
+ dma-names = "tx", "rx";
resets = <&ccu RST_BUS_UART2>;
status = "disabled";
};
@@ -372,6 +407,8 @@
reg-shift = <2>;
reg-io-width = <4>;
clocks = <&ccu CLK_BUS_UART3>;
+ dmas = <&dma 17>, <&dma 17>;
+ dma-names = "tx", "rx";
resets = <&ccu RST_BUS_UART3>;
status = "disabled";
};
@@ -383,6 +420,8 @@
reg-shift = <2>;
reg-io-width = <4>;
clocks = <&ccu CLK_BUS_UART4>;
+ dmas = <&dma 18>, <&dma 18>;
+ dma-names = "tx", "rx";
resets = <&ccu RST_BUS_UART4>;
status = "disabled";
};
@@ -394,6 +433,8 @@
reg-shift = <2>;
reg-io-width = <4>;
clocks = <&ccu CLK_BUS_UART5>;
+ dmas = <&dma 19>, <&dma 19>;
+ dma-names = "tx", "rx";
resets = <&ccu RST_BUS_UART5>;
status = "disabled";
};
@@ -405,6 +446,8 @@
reg = <0x05002000 0x400>;
interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_I2C0>;
+ dmas = <&dma 43>, <&dma 43>;
+ dma-names = "rx", "tx";
resets = <&ccu RST_BUS_I2C0>;
pinctrl-names = "default";
pinctrl-0 = <&i2c0_pins>;
@@ -420,6 +463,8 @@
reg = <0x05002400 0x400>;
interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_I2C1>;
+ dmas = <&dma 44>, <&dma 44>;
+ dma-names = "rx", "tx";
resets = <&ccu RST_BUS_I2C1>;
status = "disabled";
#address-cells = <1>;
@@ -433,6 +478,8 @@
reg = <0x05002800 0x400>;
interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_I2C2>;
+ dmas = <&dma 45>, <&dma 45>;
+ dma-names = "rx", "tx";
resets = <&ccu RST_BUS_I2C2>;
status = "disabled";
#address-cells = <1>;
@@ -446,6 +493,8 @@
reg = <0x05002c00 0x400>;
interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_I2C3>;
+ dmas = <&dma 46>, <&dma 46>;
+ dma-names = "rx", "tx";
resets = <&ccu RST_BUS_I2C3>;
status = "disabled";
#address-cells = <1>;
@@ -459,6 +508,8 @@
reg = <0x05003000 0x400>;
interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_I2C4>;
+ dmas = <&dma 47>, <&dma 47>;
+ dma-names = "rx", "tx";
resets = <&ccu RST_BUS_I2C4>;
status = "disabled";
#address-cells = <1>;
@@ -472,6 +523,8 @@
interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_SPI0>, <&ccu CLK_SPI0>;
clock-names = "ahb", "mod";
+ dmas = <&dma 22>, <&dma 22>;
+ dma-names = "rx", "tx";
resets = <&ccu RST_BUS_SPI0>;
status = "disabled";
#address-cells = <1>;
@@ -485,6 +538,8 @@
interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_SPI1>, <&ccu CLK_SPI1>;
clock-names = "ahb", "mod";
+ dmas = <&dma 23>, <&dma 23>;
+ dma-names = "rx", "tx";
resets = <&ccu RST_BUS_SPI1>;
status = "disabled";
#address-cells = <1>;
@@ -511,6 +566,34 @@
};
};
+ spdif: spdif@5093000 {
+ compatible = "allwinner,sun50i-h616-spdif";
+ reg = <0x05093000 0x400>;
+ interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPDIF>, <&ccu CLK_SPDIF>;
+ clock-names = "apb", "spdif";
+ resets = <&ccu RST_BUS_SPDIF>;
+ dmas = <&dma 2>;
+ dma-names = "tx";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spdif_tx_pin>;
+ #sound-dai-cells = <0>;
+ status = "disabled";
+ };
+
+ ths: thermal-sensor@5070400 {
+ compatible = "allwinner,sun50i-h616-ths";
+ reg = <0x05070400 0x400>;
+ interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_THS>;
+ clock-names = "bus";
+ resets = <&ccu RST_BUS_THS>;
+ nvmem-cells = <&ths_calibration>;
+ nvmem-cell-names = "calibration";
+ allwinner,sram = <&syscon>;
+ #thermal-sensor-cells = <1>;
+ };
+
usbotg: usb@5100000 {
compatible = "allwinner,sun50i-h616-musb",
"allwinner,sun8i-h3-musb";
@@ -734,6 +817,8 @@
reg = <0x07081400 0x400>;
interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&r_ccu CLK_R_APB2_I2C>;
+ dmas = <&dma 48>, <&dma 48>;
+ dma-names = "rx", "tx";
resets = <&r_ccu RST_R_APB2_I2C>;
status = "disabled";
#address-cells = <1>;
@@ -755,4 +840,74 @@
#size-cells = <0>;
};
};
+
+ thermal-zones {
+ cpu-thermal {
+ polling-delay-passive = <500>;
+ polling-delay = <1000>;
+ thermal-sensors = <&ths 2>;
+ sustainable-power = <1000>;
+
+ trips {
+ cpu_threshold: cpu-trip-0 {
+ temperature = <60000>;
+ type = "passive";
+ hysteresis = <0>;
+ };
+ cpu_target: cpu-trip-1 {
+ temperature = <70000>;
+ type = "passive";
+ hysteresis = <0>;
+ };
+ cpu_critical: cpu-trip-2 {
+ temperature = <110000>;
+ type = "critical";
+ hysteresis = <0>;
+ };
+ };
+ };
+
+ gpu-thermal {
+ polling-delay-passive = <500>;
+ polling-delay = <1000>;
+ thermal-sensors = <&ths 0>;
+ sustainable-power = <1100>;
+
+ trips {
+ gpu_temp_critical: gpu-trip-0 {
+ temperature = <110000>;
+ type = "critical";
+ hysteresis = <0>;
+ };
+ };
+ };
+
+ ve-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&ths 1>;
+
+ trips {
+ ve_temp_critical: ve-trip-0 {
+ temperature = <110000>;
+ type = "critical";
+ hysteresis = <0>;
+ };
+ };
+ };
+
+ ddr-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&ths 3>;
+
+ trips {
+ ddr_temp_critical: ddr-trip-0 {
+ temperature = <110000>;
+ type = "critical";
+ hysteresis = <0>;
+ };
+ };
+ };
+ };
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-longan-module-3h.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h618-longan-module-3h.dtsi
new file mode 100644
index 000000000000..8c1263a3939e
--- /dev/null
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-longan-module-3h.dtsi
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) Jisheng Zhang <jszhang@kernel.org>
+ */
+
+#include "sun50i-h616.dtsi"
+
+&mmc2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&mmc2_pins>;
+ vmmc-supply = <&reg_dldo1>;
+ vqmmc-supply = <&reg_aldo1>;
+ bus-width = <8>;
+ non-removable;
+ cap-mmc-hw-reset;
+ mmc-ddr-1_8v;
+ mmc-hs200-1_8v;
+ status = "okay";
+};
+
+&r_i2c {
+ status = "okay";
+
+ axp313: pmic@36 {
+ compatible = "x-powers,axp313a";
+ reg = <0x36>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+
+ regulators {
+ reg_aldo1: aldo1 {
+ regulator-always-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "vcc-1v8-pll";
+ };
+
+ reg_dldo1: dldo1 {
+ regulator-always-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc-3v3-io";
+ };
+
+ reg_dcdc1: dcdc1 {
+ regulator-always-on;
+ regulator-min-microvolt = <810000>;
+ regulator-max-microvolt = <990000>;
+ regulator-name = "vdd-gpu-sys";
+ };
+
+ reg_dcdc2: dcdc2 {
+ regulator-always-on;
+ regulator-min-microvolt = <810000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-name = "vdd-cpu";
+ };
+
+ reg_dcdc3: dcdc3 {
+ regulator-always-on;
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-name = "vdd-dram";
+ };
+ };
+ };
+};
+
+&pio {
+ vcc-pc-supply = <&reg_dldo1>;
+ vcc-pf-supply = <&reg_dldo1>;
+ vcc-pg-supply = <&reg_aldo1>;
+ vcc-ph-supply = <&reg_dldo1>;
+ vcc-pi-supply = <&reg_dldo1>;
+};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-longanpi-3h.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-longanpi-3h.dts
new file mode 100644
index 000000000000..18b29c6b867f
--- /dev/null
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-longanpi-3h.dts
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) Jisheng Zhang <jszhang@kernel.org>
+ */
+
+/dts-v1/;
+
+#include "sun50i-h618-longan-module-3h.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+ model = "Sipeed Longan Pi 3H";
+ compatible = "sipeed,longan-pi-3h", "sipeed,longan-module-3h", "allwinner,sun50i-h618";
+
+ aliases {
+ ethernet0 = &emac0;
+ serial0 = &uart0;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-0 {
+ color = <LED_COLOR_ID_ORANGE>;
+ function = LED_FUNCTION_INDICATOR;
+ function-enumerator = <0>;
+ gpios = <&pio 6 2 GPIO_ACTIVE_LOW>; /* PG2 */
+ };
+
+ led-1 {
+ color = <LED_COLOR_ID_ORANGE>;
+ function = LED_FUNCTION_INDICATOR;
+ function-enumerator = <1>;
+ gpios = <&pio 6 4 GPIO_ACTIVE_LOW>; /* PG4 */
+ };
+ };
+
+ reg_vcc5v: regulator-vcc5v {
+ /* board wide 5V supply directly from the USB-C socket */
+ compatible = "regulator-fixed";
+ regulator-name = "vcc-5v";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-always-on;
+ };
+
+ reg_vcc3v3: regulator-vcc3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc-3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ vin-supply = <&reg_vcc5v>;
+ };
+};
+
+&axp313 {
+ vin1-supply = <&reg_vcc5v>;
+ vin2-supply = <&reg_vcc5v>;
+ vin3-supply = <&reg_vcc5v>;
+};
+
+&ehci1 {
+ status = "okay";
+};
+
+&ohci1 {
+ status = "okay";
+};
+
+&ehci2 {
+ status = "okay";
+};
+
+&ohci2 {
+ status = "okay";
+};
+
+/* WiFi & BT combo module is connected to this Host */
+&ehci3 {
+ status = "okay";
+};
+
+&ohci3 {
+ status = "okay";
+};
+
+&emac0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&ext_rgmii_pins>;
+ phy-mode = "rgmii";
+ phy-handle = <&ext_rgmii_phy>;
+ allwinner,rx-delay-ps = <3100>;
+ allwinner,tx-delay-ps = <700>;
+ phy-supply = <&reg_vcc3v3>;
+ status = "okay";
+};
+
+&mdio0 {
+ ext_rgmii_phy: ethernet-phy@1 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <1>;
+ };
+};
+
+&mmc0 {
+ bus-width = <4>;
+ cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; /* PF6 */
+ vmmc-supply = <&reg_vcc3v3>;
+ status = "okay";
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&usbotg {
+ /*
+ * PHY0 pins are connected to a USB-C socket, but a role switch
+ * is not implemented: both CC pins are pulled to GND.
+ * The VBUS pins power the device, so a fixed peripheral mode
+ * is the best choice.
+ * The board can be powered via GPIOs, in this case port0 *can*
+ * act as a host (with a cable/adapter ignoring CC), as VBUS is
+ * then provided by the GPIOs. Any user of this setup would
+ * need to adjust the DT accordingly: dr_mode set to "host",
+ * enabling OHCI0 and EHCI0.
+ */
+ dr_mode = "peripheral";
+ status = "okay";
+};
+
+&usbphy {
+ usb1_vbus-supply = <&reg_vcc5v>;
+ usb2_vbus-supply = <&reg_vcc5v>;
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-transpeed-8k618-t.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-transpeed-8k618-t.dts
index 8ea1fd41aeba..ac0a2b7ea6f3 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h618-transpeed-8k618-t.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-transpeed-8k618-t.dts
@@ -15,6 +15,7 @@
compatible = "transpeed,8k618-t", "allwinner,sun50i-h618";
aliases {
+ ethernet1 = &sdio_wifi;
serial0 = &uart0;
};
@@ -39,6 +40,15 @@
regulator-max-microvolt = <3300000>;
regulator-always-on;
};
+
+ wifi_pwrseq: wifi_pwrseq {
+ compatible = "mmc-pwrseq-simple";
+ clocks = <&rtc CLK_OSC32K_FANOUT>;
+ clock-names = "ext_clock";
+ pinctrl-0 = <&x32clk_fanout_pin>;
+ pinctrl-names = "default";
+ reset-gpios = <&pio 6 18 GPIO_ACTIVE_LOW>; /* PG18 */
+ };
};
&ehci0 {
@@ -60,6 +70,19 @@
status = "okay";
};
+&mmc1 {
+ vmmc-supply = <&reg_dldo1>;
+ vqmmc-supply = <&reg_aldo1>;
+ mmc-pwrseq = <&wifi_pwrseq>;
+ bus-width = <4>;
+ non-removable;
+ status = "okay";
+
+ sdio_wifi: wifi@1 {
+ reg = <1>;
+ };
+};
+
&mmc2 {
vmmc-supply = <&reg_dldo1>;
vqmmc-supply = <&reg_aldo1>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts
new file mode 100644
index 000000000000..b6e3c169797f
--- /dev/null
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+// Copyright (c) 2023 ARM Ltd.
+
+/dts-v1/;
+
+#include "sun50i-a64.dtsi"
+#include "sun50i-a64-cpu-opp.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+ model = "Remix Mini PC";
+ compatible = "jide,remix-mini-pc", "allwinner,sun50i-h64",
+ "allwinner,sun50i-a64";
+
+ aliases {
+ ethernet1 = &rtl8723bs;
+ serial0 = &uart0;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ hdmi-connector {
+ compatible = "hdmi-connector";
+ type = "a";
+
+ port {
+ hdmi_con_in: endpoint {
+ remote-endpoint = <&hdmi_out_con>;
+ };
+ };
+ };
+
+ reg_vcc5v: regulator-5v {
+ /* board wide 5V supply directly from the DC input */
+ compatible = "regulator-fixed";
+ regulator-name = "vcc-5v";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-always-on;
+ };
+
+ wifi_pwrseq: wifi_pwrseq {
+ compatible = "mmc-pwrseq-simple";
+ reset-gpios = <&r_pio 0 2 GPIO_ACTIVE_LOW>; /* PL2 */
+ post-power-on-delay-ms = <200>;
+ };
+};
+
+&codec {
+ status = "okay";
+};
+
+&codec_analog {
+ cpvdd-supply = <&reg_eldo1>;
+ status = "okay";
+};
+
+&cpu0 {
+ cpu-supply = <&reg_dcdc2>;
+};
+
+&cpu1 {
+ cpu-supply = <&reg_dcdc2>;
+};
+
+&cpu2 {
+ cpu-supply = <&reg_dcdc2>;
+};
+
+&cpu3 {
+ cpu-supply = <&reg_dcdc2>;
+};
+
+&dai {
+ status = "okay";
+};
+
+&de {
+ status = "okay";
+};
+
+&ehci0 {
+ status = "okay";
+};
+
+&ehci1 {
+ status = "okay";
+};
+
+&hdmi {
+ hvcc-supply = <&reg_dldo1>;
+ status = "okay";
+};
+
+&hdmi_out {
+ hdmi_out_con: endpoint {
+ remote-endpoint = <&hdmi_con_in>;
+ };
+};
+
+/* Connects to the AC200 chip */
+&i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0_pins>;
+ status = "okay";
+};
+
+&i2c0_pins {
+ bias-pull-up;
+};
+
+&mmc0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&mmc0_pins>;
+ vmmc-supply = <&reg_dcdc1>;
+ cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>;
+ disable-wp;
+ bus-width = <4>;
+ status = "okay";
+};
+
+&mmc1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&mmc1_pins>;
+ vmmc-supply = <&reg_aldo1>;
+ vqmmc-supply = <&reg_dldo4>;
+ mmc-pwrseq = <&wifi_pwrseq>;
+ bus-width = <4>;
+ non-removable;
+ status = "okay";
+
+ rtl8723bs: wifi@1 {
+ reg = <1>;
+ interrupt-parent = <&r_pio>;
+ interrupts = <0 3 IRQ_TYPE_LEVEL_LOW>; /* PL3 */
+ interrupt-names = "host-wake";
+ };
+};
+
+&mmc2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&mmc2_pins>, <&mmc2_ds_pin>;
+ vmmc-supply = <&reg_dcdc1>;
+ vqmmc-supply = <&reg_eldo1>;
+ bus-width = <8>;
+ non-removable;
+ mmc-hs200-1_8v;
+ mmc-hs400-1_8v;
+ cap-mmc-hw-reset;
+ status = "okay";
+};
+
+&ohci0 {
+ status = "okay";
+};
+
+&ohci1 {
+ status = "okay";
+};
+
+&pio {
+ vcc-pb-supply = <&reg_dcdc1>;
+ vcc-pc-supply = <&reg_dcdc1>;
+ vcc-pd-supply = <&reg_dcdc1>;
+ vcc-pe-supply = <&reg_dcdc1>;
+ vcc-pf-supply = <&reg_dcdc1>;
+ vcc-pg-supply = <&reg_dldo4>;
+ vcc-ph-supply = <&reg_dcdc1>;
+};
+
+&r_ir {
+ status = "okay";
+};
+
+&r_pio {
+ /*
+ * We cannot add that supply for now since it would create a circular
+ * dependency between pinctrl, the regulator and the RSB Bus.
+ *
+ * vcc-pl-supply = <&reg_aldo2>;
+ */
+};
+
+&r_rsb {
+ status = "okay";
+
+ axp803: pmic@3a3 {
+ compatible = "x-powers,axp803";
+ reg = <0x3a3>;
+ interrupt-parent = <&r_intc>;
+ interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_LOW>;
+ x-powers,drive-vbus-en;
+
+ vin1-supply = <&reg_vcc5v>;
+ vin2-supply = <&reg_vcc5v>;
+ vin3-supply = <&reg_vcc5v>;
+ vin5-supply = <&reg_vcc5v>;
+ vin6-supply = <&reg_vcc5v>;
+ aldoin-supply = <&reg_vcc5v>;
+ dldoin-supply = <&reg_vcc5v>;
+ eldoin-supply = <&reg_vcc5v>;
+ fldoin-supply = <&reg_vcc5v>;
+ drivevbus-supply = <&reg_vcc5v>;
+ ips-supply = <&reg_vcc5v>;
+
+ status = "okay";
+ };
+};
+
+#include "axp803.dtsi"
+
+&ac_power_supply {
+ status = "okay";
+};
+
+&reg_dcdc1 {
+ regulator-always-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc-3v3";
+};
+
+&reg_dcdc2 {
+ regulator-always-on;
+ regulator-min-microvolt = <1040000>;
+ regulator-max-microvolt = <1300000>;
+ regulator-name = "vdd-cpux";
+};
+
+/* DCDC3 is polyphased with DCDC2 */
+
+&reg_dcdc5 {
+ regulator-always-on;
+ regulator-min-microvolt = <1500000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-name = "vcc-dram";
+};
+
+/* Deviates from the reset default of 1.1V. */
+&reg_dcdc6 {
+ regulator-always-on;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-name = "vdd-sys";
+};
+
+&reg_aldo1 {
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc-wifi";
+};
+
+&reg_aldo2 {
+ /* Specifying R_PIO consumer would create circular dependency. */
+ regulator-always-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc-pl";
+};
+
+&reg_aldo3 {
+ regulator-always-on;
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-name = "vcc-pll-avcc";
+};
+
+/* AC200 power supply */
+&reg_dldo1 {
+ regulator-always-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc-ave-33";
+};
+
+&reg_dldo4 {
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc-wifi-io";
+};
+
+&reg_drivevbus {
+ regulator-name = "usb0-vbus";
+ status = "okay";
+};
+
+&reg_eldo1 {
+ regulator-always-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "vcc-cpvdd-dram-emmc";
+};
+
+/* Supplies the arisc management core, needed by TF-A to power off cores. */
+&reg_fldo2 {
+ regulator-always-on;
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-name = "vdd-cpus";
+};
+
+&reg_rtc_ldo {
+ regulator-name = "vcc-rtc";
+};
+
+&simplefb_hdmi {
+ vcc-hdmi-supply = <&reg_dcdc1>;
+};
+
+&sound {
+ simple-audio-card,aux-devs = <&codec_analog>;
+ simple-audio-card,widgets = "Microphone", "Microphone Jack",
+ "Headphone", "Headphone Jack";
+ simple-audio-card,routing =
+ "Left DAC", "DACL",
+ "Right DAC", "DACR",
+ "Headphone Jack", "HP",
+ "ADCL", "Left ADC",
+ "ADCR", "Right ADC",
+ "MIC2", "Microphone Jack";
+ status = "okay";
+};
+
+/* On the (unpopulated) UART pads. */
+&uart0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart0_pb_pins>;
+ status = "okay";
+};
+
+&uart1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart1_pins>, <&uart1_rts_cts_pins>;
+ uart-has-rtscts;
+ status = "okay";
+
+ bluetooth {
+ compatible = "realtek,rtl8723bs-bt";
+ enable-gpios = <&r_pio 0 4 GPIO_ACTIVE_HIGH>; /* PL4 */
+ max-speed = <1500000>;
+ };
+};
+
+&usb_otg {
+ dr_mode = "host";
+ status = "okay";
+};
+
+&usbphy {
+ usb0_vbus-supply = <&reg_drivevbus>;
+ usb1_vbus-supply = <&reg_drivevbus>;
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi
index dccbba6e7f98..dbf2dce8d1d6 100644
--- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi
+++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi
@@ -145,7 +145,6 @@
msix: msix@fbe00000 {
compatible = "al,alpine-msix";
reg = <0x0 0xfbe00000 0x0 0x100000>;
- interrupt-controller;
msi-controller;
al,msi-base-spi = <160>;
al,msi-num-spis = <160>;
diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi
index 39481d7fd7d4..3ea178acdddf 100644
--- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi
+++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi
@@ -355,7 +355,6 @@
msix: msix@fbe00000 {
compatible = "al,alpine-msix";
reg = <0x0 0xfbe00000 0x0 0x100000>;
- interrupt-controller;
msi-controller;
al,msi-base-spi = <336>;
al,msi-num-spis = <959>;
diff --git a/arch/arm64/boot/dts/amlogic/Makefile b/arch/arm64/boot/dts/amlogic/Makefile
index cc8b34bd583d..1ab160bf928a 100644
--- a/arch/arm64/boot/dts/amlogic/Makefile
+++ b/arch/arm64/boot/dts/amlogic/Makefile
@@ -8,6 +8,8 @@ dtb-$(CONFIG_ARCH_MESON) += meson-axg-jethome-jethub-j100.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-axg-jethome-jethub-j110-rev-2.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-axg-jethome-jethub-j110-rev-3.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-axg-s400.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-g12a-fbx8am-brcm.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-g12a-fbx8am-realtek.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12a-radxa-zero.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12a-sei510.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12a-u200.dtb
@@ -80,3 +82,7 @@ dtb-$(CONFIG_ARCH_MESON) += meson-sm1-odroid-hc4.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-sei610.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-x96-air-gbit.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-x96-air.dtb
+
+# Overlays
+meson-g12a-fbx8am-brcm-dtbs := meson-g12a-fbx8am.dtb meson-g12a-fbx8am-brcm.dtbo
+meson-g12a-fbx8am-realtek-dtbs := meson-g12a-fbx8am.dtb meson-g12a-fbx8am-realtek.dtbo
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-c3.dtsi b/arch/arm64/boot/dts/amlogic/amlogic-c3.dtsi
index 2ad1f8eef199..32a754fe7990 100644
--- a/arch/arm64/boot/dts/amlogic/amlogic-c3.dtsi
+++ b/arch/arm64/boot/dts/amlogic/amlogic-c3.dtsi
@@ -6,6 +6,7 @@
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/reset/amlogic,c3-reset.h>
/ {
cpus {
@@ -81,6 +82,12 @@
#size-cells = <2>;
ranges = <0x0 0x0 0x0 0xfe000000 0x0 0x480000>;
+ reset: reset-controller@2000 {
+ compatible = "amlogic,c3-reset";
+ reg = <0x0 0x2000 0x0 0x98>;
+ #reset-cells = <1>;
+ };
+
watchdog@2100 {
compatible = "amlogic,c3-wdt", "amlogic,t7-wdt";
reg = <0x0 0x2100 0x0 0x10>;
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi b/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
index a03c7667d2b6..5248bdf824ea 100644
--- a/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
+++ b/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
@@ -54,7 +54,7 @@
enable-method = "psci";
};
- cpu101: cpu@101{
+ cpu101: cpu@101 {
device_type = "cpu";
compatible = "arm,cortex-a53";
reg = <0x0 0x101>;
@@ -171,6 +171,16 @@
};
};
+ gpio_intc: interrupt-controller@4080 {
+ compatible = "amlogic,t7-gpio-intc",
+ "amlogic,meson-gpio-intc";
+ reg = <0x0 0x4080 0x0 0x20>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ amlogic,channel-interrupts =
+ <10 11 12 13 14 15 16 17 18 19 20 21>;
+ };
+
uart_a: serial@78000 {
compatible = "amlogic,t7-uart", "amlogic,meson-s4-uart";
reg = <0x0 0x78000 0x0 0x18>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-a1-ad402.dts b/arch/arm64/boot/dts/amlogic/meson-a1-ad402.dts
index 1c20516fa653..4bc30af05848 100644
--- a/arch/arm64/boot/dts/amlogic/meson-a1-ad402.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-a1-ad402.dts
@@ -106,7 +106,7 @@
pinctrl-0 = <&spifc_pins>;
pinctrl-names = "default";
- spi_nand@0 {
+ flash@0 {
compatible = "spi-nand";
status = "okay";
reg = <0>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-a1.dtsi b/arch/arm64/boot/dts/amlogic/meson-a1.dtsi
index 648e7f49424f..c03e207ea6c5 100644
--- a/arch/arm64/boot/dts/amlogic/meson-a1.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-a1.dtsi
@@ -450,6 +450,8 @@
<&clkc_periphs CLKID_USB_BUS>,
<&clkc_periphs CLKID_USB_CTRL_IN>;
clock-names = "usb_ctrl", "usb_bus", "xtal_usb_ctrl";
+ assigned-clocks = <&clkc_periphs CLKID_USB_BUS>;
+ assigned-clock-rates = <64000000>;
resets = <&reset RESET_USBCTRL>;
reset-name = "usb_ctrl";
diff --git a/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j1xx.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j1xx.dtsi
index db605f3a22b4..9b65ae818e2f 100644
--- a/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j1xx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j1xx.dtsi
@@ -35,7 +35,7 @@
reset-gpios = <&gpio BOOT_9 GPIO_ACTIVE_LOW>;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
@@ -44,7 +44,7 @@
regulator-always-on;
};
- vcc_5v: regulator-vcc_5v {
+ vcc_5v: regulator-vcc-5v {
compatible = "regulator-fixed";
regulator-name = "VCC5V";
regulator-min-microvolt = <5000000>;
@@ -52,7 +52,7 @@
regulator-always-on;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
@@ -61,7 +61,7 @@
regulator-always-on;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
@@ -70,7 +70,7 @@
regulator-always-on;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <3300000>;
@@ -79,7 +79,7 @@
regulator-always-on;
};
- vccq_1v8: regulator-vccq_1v8 {
+ vccq_1v8: regulator-vccq-1v8 {
compatible = "regulator-fixed";
regulator-name = "VCCQ_1V8";
regulator-min-microvolt = <1800000>;
@@ -88,7 +88,7 @@
regulator-always-on;
};
- usb_pwr: regulator-usb_pwr {
+ usb_pwr: regulator-usb-pwr {
compatible = "regulator-fixed";
regulator-name = "USB_PWR";
regulator-min-microvolt = <5000000>;
@@ -332,19 +332,3 @@
"", "", "", "", "", // 80 - 84
"", ""; // 85-86
};
-
-&cpu0 {
- #cooling-cells = <2>;
-};
-
-&cpu1 {
- #cooling-cells = <2>;
-};
-
-&cpu2 {
- #cooling-cells = <2>;
-};
-
-&cpu3 {
- #cooling-cells = <2>;
-};
diff --git a/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts b/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts
index c8905663bc75..7ed526f45175 100644
--- a/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts
@@ -12,7 +12,7 @@
compatible = "amlogic,s400", "amlogic,a113d", "amlogic,meson-axg";
model = "Amlogic Meson AXG S400 Development Board";
- adc_keys {
+ keys {
compatible = "adc-keys";
io-channels = <&saradc 0>;
io-channel-names = "buttons";
@@ -111,7 +111,7 @@
reg = <0x0 0x0 0x0 0x40000000>;
};
- main_12v: regulator-main_12v {
+ main_12v: regulator-main-12v {
compatible = "regulator-fixed";
regulator-name = "12V";
regulator-min-microvolt = <12000000>;
@@ -119,7 +119,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
@@ -128,7 +128,7 @@
regulator-always-on;
};
- vcc_5v: regulator-vcc_5v {
+ vcc_5v: regulator-vcc-5v {
compatible = "regulator-fixed";
regulator-name = "VCC5V";
regulator-min-microvolt = <5000000>;
@@ -139,7 +139,7 @@
enable-active-high;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
@@ -148,7 +148,7 @@
regulator-always-on;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
@@ -157,7 +157,7 @@
regulator-always-on;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
@@ -166,7 +166,7 @@
regulator-always-on;
};
- usb_pwr: regulator-usb_pwr {
+ usb_pwr: regulator-usb-pwr {
compatible = "regulator-fixed";
regulator-name = "USB_PWR";
regulator-min-microvolt = <5000000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
index 7e5ac9db93f8..6d12b760b90f 100644
--- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
@@ -74,6 +74,8 @@
enable-method = "psci";
next-level-cache = <&l2>;
clocks = <&scpi_dvfs 0>;
+ dynamic-power-coefficient = <140>;
+ #cooling-cells = <2>;
};
cpu1: cpu@1 {
@@ -83,6 +85,8 @@
enable-method = "psci";
next-level-cache = <&l2>;
clocks = <&scpi_dvfs 0>;
+ dynamic-power-coefficient = <140>;
+ #cooling-cells = <2>;
};
cpu2: cpu@2 {
@@ -92,6 +96,8 @@
enable-method = "psci";
next-level-cache = <&l2>;
clocks = <&scpi_dvfs 0>;
+ dynamic-power-coefficient = <140>;
+ #cooling-cells = <2>;
};
cpu3: cpu@3 {
@@ -101,6 +107,8 @@
enable-method = "psci";
next-level-cache = <&l2>;
clocks = <&scpi_dvfs 0>;
+ dynamic-power-coefficient = <140>;
+ #cooling-cells = <2>;
};
l2: l2-cache0 {
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
index ff68b911b729..9d5eab6595d0 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
@@ -2502,6 +2502,9 @@
clocks = <&clkc CLKID_NNA_CORE_CLK>,
<&clkc CLKID_NNA_AXI_CLK>;
clock-names = "core", "bus";
+ assigned-clocks = <&clkc CLKID_NNA_CORE_CLK>,
+ <&clkc CLKID_NNA_AXI_CLK>;
+ assigned-clock-rates = <800000000>, <800000000>;
resets = <&reset RESET_NNA>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am-brcm.dtso b/arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am-brcm.dtso
new file mode 100644
index 000000000000..9591fdc31ee0
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am-brcm.dtso
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+// Copyright (c) 2024 Freebox SAS
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/gpio/meson-g12a-gpio.h>
+
+&uart_A {
+ bluetooth {
+ compatible = "brcm,bcm43438-bt";
+ shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>;
+ max-speed = <2000000>;
+ clocks = <&wifi32k>;
+ clock-names = "lpo";
+ vbat-supply = <&vddao_3v3>;
+ vddio-supply = <&vddio_ao1v8>;
+ };
+};
+
+&sd_emmc_a {
+ /* Per mmc-controller.yaml */
+ #address-cells = <1>;
+ #size-cells = <0>;
+ /* NB: may be either AP6398S or AP6398SR3 wifi module */
+ brcmf: wifi@1 {
+ reg = <1>;
+ compatible = "brcm,bcm4329-fmac";
+ };
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am-realtek.dtso b/arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am-realtek.dtso
new file mode 100644
index 000000000000..55fff35b09ae
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am-realtek.dtso
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+// Copyright (c) 2024 Freebox SAS
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/gpio/meson-g12a-gpio.h>
+
+&uart_A {
+ bluetooth {
+ compatible = "realtek,rtl8822cs-bt";
+ enable-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>;
+ host-wake-gpios = <&gpio GPIOX_19 GPIO_ACTIVE_HIGH>;
+ device-wake-gpios = <&gpio GPIOX_18 GPIO_ACTIVE_HIGH>;
+ };
+};
+
+&sd_emmc_a {
+ /* No explicit compatible for rtl8822cs sdio */
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am.dts
new file mode 100644
index 000000000000..af211d8f3952
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-fbx8am.dts
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+// Copyright (c) 2024 Freebox SAS
+
+/*
+ * SEI codename: SEI530FB (based on SEI510)
+ * Freebox codename: fbx8am
+ * Commercial names: Freebox Pop, Player TV Free 4K
+ */
+
+/dts-v1/;
+
+#include "meson-g12a.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/gpio/meson-g12a-gpio.h>
+#include <dt-bindings/sound/meson-g12a-tohdmitx.h>
+
+/ {
+ compatible = "freebox,fbx8am", "amlogic,g12a";
+ model = "Freebox Player Pop";
+ chassis-type = "embedded";
+
+ firmware {
+ optee {
+ compatible = "linaro,optee-tz";
+ method = "smc";
+ };
+ };
+
+ gpio-keys-polled {
+ compatible = "gpio-keys-polled";
+ poll-interval = <100>;
+
+ /* Physical user-accessible reset button near USB port */
+ power-button {
+ label = "Reset";
+ linux,code = <BTN_MISC>;
+ gpios = <&gpio_ao GPIOAO_3 GPIO_ACTIVE_HIGH>;
+ };
+ };
+
+ spdif_dit: audio-codec-2 {
+ #sound-dai-cells = <0>;
+ compatible = "linux,spdif-dit";
+ status = "okay";
+ sound-name-prefix = "DIT";
+ };
+
+ aliases {
+ serial0 = &uart_AO;
+ ethernet0 = &ethmac;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ emmc_pwrseq: emmc-pwrseq {
+ compatible = "mmc-pwrseq-emmc";
+ reset-gpios = <&gpio BOOT_12 GPIO_ACTIVE_LOW>;
+ };
+
+ hdmi-connector {
+ compatible = "hdmi-connector";
+ type = "a";
+
+ port {
+ hdmi_connector_in: endpoint {
+ remote-endpoint = <&hdmi_tx_tmds_out>;
+ };
+ };
+ };
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x0 0x0 0x0 0x80000000>;
+ };
+
+ ao_5v: regulator-ao-5v {
+ compatible = "regulator-fixed";
+ regulator-name = "AO_5V";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&dc_in>;
+ regulator-always-on;
+ };
+
+ dc_in: regulator-dc-in {
+ compatible = "regulator-fixed";
+ regulator-name = "DC_IN";
+ regulator-min-microvolt = <12000000>;
+ regulator-max-microvolt = <12000000>;
+ regulator-always-on;
+ };
+
+ emmc_1v8: regulator-emmc-1v8 {
+ compatible = "regulator-fixed";
+ regulator-name = "EMMC_1V8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ vin-supply = <&vddao_3v3>;
+ regulator-always-on;
+ };
+
+ vddao_3v3: regulator-vddao-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "VDDAO_3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&ao_5v>;
+ regulator-always-on;
+ };
+
+ vddao_3v3_t: regulator-vddao-3v3-t {
+ compatible = "regulator-fixed";
+ regulator-name = "VDDAO_3V3_T";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vddao_3v3>;
+ gpio = <&gpio GPIOH_8 GPIO_OPEN_DRAIN>;
+ enable-active-high;
+ };
+
+ vddcpu: regulator-vddcpu {
+ /*
+ * SY8120B1ABC DC/DC Regulator.
+ */
+ compatible = "pwm-regulator";
+
+ regulator-name = "VDDCPU";
+ regulator-min-microvolt = <721000>;
+ regulator-max-microvolt = <1022000>;
+
+ pwm-supply = <&ao_5v>;
+
+ pwms = <&pwm_AO_cd 1 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ vddio_ao1v8: regulator-vddio-ao1v8 {
+ compatible = "regulator-fixed";
+ regulator-name = "VDDIO_AO1V8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ vin-supply = <&vddao_3v3>;
+ regulator-always-on;
+ };
+
+ sdio_pwrseq: sdio-pwrseq {
+ compatible = "mmc-pwrseq-simple";
+ reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>;
+ post-power-on-delay-ms = <10>; /* required for 43752 */
+ clocks = <&wifi32k>;
+ clock-names = "ext_clock";
+ };
+
+ wifi32k: wifi32k {
+ compatible = "pwm-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ pwms = <&pwm_ef 0 30518 0>; /* PWM_E at 32.768KHz */
+ };
+
+ sound {
+ compatible = "amlogic,axg-sound-card";
+ model = "fbx8am";
+ audio-aux-devs = <&tdmout_b>;
+ audio-routing = "TDMOUT_B IN 0", "FRDDR_A OUT 1",
+ "TDMOUT_B IN 1", "FRDDR_B OUT 1",
+ "TDMOUT_B IN 2", "FRDDR_C OUT 1",
+ "TDM_B Playback", "TDMOUT_B OUT",
+ "SPDIFOUT_A IN 0", "FRDDR_A OUT 3",
+ "SPDIFOUT_A IN 1", "FRDDR_B OUT 3",
+ "SPDIFOUT_A IN 2", "FRDDR_C OUT 3";
+
+ assigned-clocks = <&clkc CLKID_MPLL2>,
+ <&clkc CLKID_MPLL0>,
+ <&clkc CLKID_MPLL1>;
+ assigned-clock-parents = <0>, <0>, <0>;
+ assigned-clock-rates = <294912000>,
+ <270950400>,
+ <393216000>;
+
+ dai-link-0 {
+ sound-dai = <&frddr_a>;
+ };
+
+ dai-link-1 {
+ sound-dai = <&frddr_b>;
+ };
+
+ dai-link-2 {
+ sound-dai = <&frddr_c>;
+ };
+
+ /* 8ch hdmi interface */
+ dai-link-3 {
+ sound-dai = <&tdmif_b>;
+ dai-format = "i2s";
+ dai-tdm-slot-tx-mask-0 = <1 1>;
+ dai-tdm-slot-tx-mask-1 = <1 1>;
+ dai-tdm-slot-tx-mask-2 = <1 1>;
+ dai-tdm-slot-tx-mask-3 = <1 1>;
+ mclk-fs = <256>;
+
+ codec {
+ sound-dai = <&tohdmitx TOHDMITX_I2S_IN_B>;
+ };
+ };
+
+ /* spdif hdmi or toslink interface */
+ dai-link-4 {
+ sound-dai = <&spdifout_a>;
+
+ codec-0 {
+ sound-dai = <&spdif_dit>;
+ };
+
+ codec-1 {
+ sound-dai = <&tohdmitx TOHDMITX_SPDIF_IN_A>;
+ };
+ };
+
+ /* spdif hdmi interface */
+ dai-link-5 {
+ sound-dai = <&spdifout_b>;
+
+ codec {
+ sound-dai = <&tohdmitx TOHDMITX_SPDIF_IN_B>;
+ };
+ };
+
+ /* hdmi glue */
+ dai-link-6 {
+ sound-dai = <&tohdmitx TOHDMITX_I2S_OUT>;
+
+ codec {
+ sound-dai = <&hdmi_tx>;
+ };
+ };
+ };
+};
+
+&arb {
+ status = "okay";
+};
+
+&cecb_AO {
+ pinctrl-0 = <&cec_ao_b_h_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+ hdmi-phandle = <&hdmi_tx>;
+};
+
+&clkc_audio {
+ status = "okay";
+};
+
+&cpu0 {
+ cpu-supply = <&vddcpu>;
+ operating-points-v2 = <&cpu_opp_table>;
+ clocks = <&clkc CLKID_CPU_CLK>;
+ clock-latency = <50000>;
+};
+
+&cpu1 {
+ cpu-supply = <&vddcpu>;
+ operating-points-v2 = <&cpu_opp_table>;
+ clocks = <&clkc CLKID_CPU_CLK>;
+ clock-latency = <50000>;
+};
+
+&cpu2 {
+ cpu-supply = <&vddcpu>;
+ operating-points-v2 = <&cpu_opp_table>;
+ clocks = <&clkc CLKID_CPU_CLK>;
+ clock-latency = <50000>;
+};
+
+&cpu3 {
+ cpu-supply = <&vddcpu>;
+ operating-points-v2 = <&cpu_opp_table>;
+ clocks = <&clkc CLKID_CPU_CLK>;
+ clock-latency = <50000>;
+};
+
+&ethmac {
+ status = "okay";
+ phy-handle = <&internal_ephy>;
+ phy-mode = "rmii";
+};
+
+&frddr_a {
+ status = "okay";
+};
+
+&frddr_b {
+ status = "okay";
+};
+
+&frddr_c {
+ status = "okay";
+};
+
+&spdifout_a {
+ pinctrl-0 = <&spdif_out_h_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&spdifout_b {
+ status = "okay";
+};
+
+&hdmi_tx {
+ status = "okay";
+ pinctrl-0 = <&hdmitx_hpd_pins>, <&hdmitx_ddc_pins>;
+ pinctrl-names = "default";
+};
+
+&hdmi_tx_tmds_port {
+ hdmi_tx_tmds_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+};
+
+&i2c3 {
+ status = "okay";
+ pinctrl-0 = <&i2c3_sda_a_pins>, <&i2c3_sck_a_pins>;
+ pinctrl-names = "default";
+};
+
+&ir {
+ status = "okay";
+ pinctrl-0 = <&remote_input_ao_pins>;
+ pinctrl-names = "default";
+};
+
+&pwm_AO_cd {
+ pinctrl-0 = <&pwm_ao_d_e_pins>;
+ pinctrl-names = "default";
+ clocks = <&xtal>;
+ clock-names = "clkin1";
+ status = "okay";
+};
+
+&pwm_ef {
+ status = "okay";
+ pinctrl-0 = <&pwm_e_pins>;
+ pinctrl-names = "default";
+ clocks = <&xtal>;
+ clock-names = "clkin0";
+};
+
+&pdm {
+ pinctrl-0 = <&pdm_din0_z_pins>, <&pdm_din1_z_pins>,
+ <&pdm_din2_z_pins>, <&pdm_din3_z_pins>,
+ <&pdm_dclk_z_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&saradc {
+ status = "okay";
+ vref-supply = <&vddio_ao1v8>;
+};
+
+/* SDIO */
+&sd_emmc_a {
+ status = "okay";
+ pinctrl-0 = <&sdio_pins>;
+ pinctrl-1 = <&sdio_clk_gate_pins>;
+ pinctrl-names = "default", "clk-gate";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bus-width = <4>;
+ cap-sd-highspeed;
+ sd-uhs-sdr50;
+ max-frequency = <100000000>;
+
+ non-removable;
+ disable-wp;
+
+ /* WiFi firmware requires power to be kept while in suspend */
+ keep-power-in-suspend;
+
+ mmc-pwrseq = <&sdio_pwrseq>;
+
+ vmmc-supply = <&vddao_3v3>;
+ vqmmc-supply = <&vddio_ao1v8>;
+};
+
+/* SD card */
+&sd_emmc_b {
+ status = "okay";
+ pinctrl-0 = <&sdcard_c_pins>;
+ pinctrl-1 = <&sdcard_clk_gate_c_pins>;
+ pinctrl-names = "default", "clk-gate";
+
+ bus-width = <4>;
+ cap-sd-highspeed;
+ max-frequency = <50000000>;
+ disable-wp;
+
+ cd-gpios = <&gpio GPIOC_6 GPIO_ACTIVE_LOW>;
+ vmmc-supply = <&vddao_3v3>;
+ vqmmc-supply = <&vddao_3v3>;
+};
+
+/* eMMC */
+&sd_emmc_c {
+ status = "okay";
+ pinctrl-0 = <&emmc_ctrl_pins>, <&emmc_data_8b_pins>, <&emmc_ds_pins>;
+ pinctrl-1 = <&emmc_clk_gate_pins>;
+ pinctrl-names = "default", "clk-gate";
+
+ bus-width = <8>;
+ cap-mmc-highspeed;
+ mmc-ddr-1_8v;
+ mmc-hs200-1_8v;
+ max-frequency = <200000000>;
+ non-removable;
+ disable-wp;
+
+ mmc-pwrseq = <&emmc_pwrseq>;
+ vmmc-supply = <&vddao_3v3>;
+ vqmmc-supply = <&emmc_1v8>;
+};
+
+&tdmif_b {
+ status = "okay";
+};
+
+&tdmout_b {
+ status = "okay";
+};
+
+&tohdmitx {
+ status = "okay";
+};
+
+&uart_A {
+ status = "okay";
+ pinctrl-0 = <&uart_a_pins>, <&uart_a_cts_rts_pins>;
+ pinctrl-names = "default";
+ uart-has-rtscts;
+};
+
+&uart_AO {
+ status = "okay";
+ pinctrl-0 = <&uart_ao_a_pins>;
+ pinctrl-names = "default";
+};
+
+&usb {
+ status = "okay";
+ dr_mode = "host";
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-radxa-zero.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-radxa-zero.dts
index fcd7e1d8e16f..15b9bc280706 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12a-radxa-zero.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-radxa-zero.dts
@@ -60,7 +60,7 @@
clock-names = "ext_clock";
};
- ao_5v: regulator-ao_5v {
+ ao_5v: regulator-ao-5v {
compatible = "regulator-fixed";
regulator-name = "AO_5V";
regulator-min-microvolt = <5000000>;
@@ -68,7 +68,7 @@
regulator-always-on;
};
- vcc_1v8: regulator-vcc_1v8 {
+ vcc_1v8: regulator-vcc-1v8 {
compatible = "regulator-fixed";
regulator-name = "VCC_1V8";
regulator-min-microvolt = <1800000>;
@@ -77,7 +77,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
@@ -86,7 +86,7 @@
regulator-always-on;
};
- hdmi_pw: regulator-hdmi_pw {
+ hdmi_pw: regulator-hdmi-pw {
compatible = "regulator-fixed";
regulator-name = "HDMI_PW";
regulator-min-microvolt = <5000000>;
@@ -95,7 +95,7 @@
regulator-always-on;
};
- vddao_1v8: regulator-vddao_1v8 {
+ vddao_1v8: regulator-vddao-1v8 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_1V8";
regulator-min-microvolt = <1800000>;
@@ -104,7 +104,7 @@
regulator-always-on;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
index 4c4550dd4711..61cb8135a392 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
@@ -15,7 +15,7 @@
compatible = "seirobotics,sei510", "amlogic,g12a";
model = "SEI Robotics SEI510";
- adc_keys {
+ keys {
compatible = "adc-keys";
io-channels = <&saradc 0>;
io-channel-names = "buttons";
@@ -83,7 +83,7 @@
reg = <0x0 0x0 0x0 0x40000000>;
};
- ao_5v: regulator-ao_5v {
+ ao_5v: regulator-ao-5v {
compatible = "regulator-fixed";
regulator-name = "AO_5V";
regulator-min-microvolt = <5000000>;
@@ -92,7 +92,7 @@
regulator-always-on;
};
- dc_in: regulator-dc_in {
+ dc_in: regulator-dc-in {
compatible = "regulator-fixed";
regulator-name = "DC_IN";
regulator-min-microvolt = <5000000>;
@@ -100,7 +100,7 @@
regulator-always-on;
};
- emmc_1v8: regulator-emmc_1v8 {
+ emmc_1v8: regulator-emmc-1v8 {
compatible = "regulator-fixed";
regulator-name = "EMMC_1V8";
regulator-min-microvolt = <1800000>;
@@ -109,7 +109,7 @@
regulator-always-on;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
@@ -118,7 +118,7 @@
regulator-always-on;
};
- vddao_3v3_t: regultor-vddao_3v3_t {
+ vddao_3v3_t: regulator-vddao-3v3-t {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3_T";
regulator-min-microvolt = <3300000>;
@@ -147,7 +147,7 @@
regulator-always-on;
};
- vddio_ao1v8: regulator-vddio_ao1v8 {
+ vddio_ao1v8: regulator-vddio-ao1v8 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO1V8";
regulator-min-microvolt = <1800000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts
index 8355ddd7e9ae..3da7922d83f1 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts
@@ -75,7 +75,7 @@
reg = <0x0 0x0 0x0 0x40000000>;
};
- flash_1v8: regulator-flash_1v8 {
+ flash_1v8: regulator-flash-1v8 {
compatible = "regulator-fixed";
regulator-name = "FLASH_1V8";
regulator-min-microvolt = <1800000>;
@@ -84,7 +84,7 @@
regulator-always-on;
};
- main_12v: regulator-main_12v {
+ main_12v: regulator-main-12v {
compatible = "regulator-fixed";
regulator-name = "12V";
regulator-min-microvolt = <12000000>;
@@ -92,7 +92,7 @@
regulator-always-on;
};
- usb_pwr_en: regulator-usb_pwr_en {
+ usb_pwr_en: regulator-usb-pwr-en {
compatible = "regulator-fixed";
regulator-name = "USB_PWR_EN";
regulator-min-microvolt = <5000000>;
@@ -103,7 +103,7 @@
enable-active-high;
};
- vcc_1v8: regulator-vcc_1v8 {
+ vcc_1v8: regulator-vcc-1v8 {
compatible = "regulator-fixed";
regulator-name = "VCC_1V8";
regulator-min-microvolt = <1800000>;
@@ -112,7 +112,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
@@ -122,7 +122,7 @@
/* FIXME: actually controlled by VDDCPU_B_EN */
};
- vcc_5v: regulator-vcc_5v {
+ vcc_5v: regulator-vcc-5v {
compatible = "regulator-fixed";
regulator-name = "VCC_5V";
regulator-min-microvolt = <5000000>;
@@ -133,7 +133,7 @@
enable-active-high;
};
- vddao_1v8: regulator-vddao_1v8 {
+ vddao_1v8: regulator-vddao-1v8 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_1V8";
regulator-min-microvolt = <1800000>;
@@ -142,7 +142,7 @@
regulator-always-on;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts
index 9b55982b6a6b..05c7a1e3f1b7 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts
@@ -66,7 +66,7 @@
clock-names = "ext_clock";
};
- flash_1v8: regulator-flash_1v8 {
+ flash_1v8: regulator-flash-1v8 {
compatible = "regulator-fixed";
regulator-name = "FLASH_1V8";
regulator-min-microvolt = <1800000>;
@@ -75,7 +75,7 @@
regulator-always-on;
};
- dc_in: regulator-dc_in {
+ dc_in: regulator-dc-in {
compatible = "regulator-fixed";
regulator-name = "DC_IN";
regulator-min-microvolt = <5000000>;
@@ -83,7 +83,7 @@
regulator-always-on;
};
- vcc_1v8: regulator-vcc_1v8 {
+ vcc_1v8: regulator-vcc-1v8 {
compatible = "regulator-fixed";
regulator-name = "VCC_1V8";
regulator-min-microvolt = <1800000>;
@@ -92,7 +92,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
@@ -102,7 +102,7 @@
/* FIXME: actually controlled by VDDCPU_B_EN */
};
- vcc_5v: regulator-vcc_5v {
+ vcc_5v: regulator-vcc-5v {
compatible = "regulator-fixed";
regulator-name = "VCC_5V";
regulator-min-microvolt = <5000000>;
@@ -112,7 +112,7 @@
gpio = <&gpio GPIOH_8 GPIO_OPEN_DRAIN>;
};
- vddao_1v8: regulator-vddao_1v8 {
+ vddao_1v8: regulator-vddao-1v8 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_1V8";
regulator-min-microvolt = <1800000>;
@@ -121,7 +121,7 @@
regulator-always-on;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
index 91c9769fda20..d80dd9a3da31 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
@@ -19,7 +19,7 @@
status = "okay";
};
- hub_5v: regulator-hub_5v {
+ hub_5v: regulator-hub-5v {
compatible = "regulator-fixed";
regulator-name = "HUB_5V";
regulator-min-microvolt = <5000000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid.dtsi
index 9e12a34b2840..09d959aefb18 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid.dtsi
@@ -48,7 +48,7 @@
};
};
- tflash_vdd: regulator-tflash_vdd {
+ tflash_vdd: regulator-tflash-vdd {
compatible = "regulator-fixed";
regulator-name = "TFLASH_VDD";
@@ -60,7 +60,7 @@
regulator-always-on;
};
- tf_io: gpio-regulator-tf_io {
+ tf_io: gpio-regulator-tf-io {
compatible = "regulator-gpio";
regulator-name = "TF_IO";
@@ -74,7 +74,7 @@
<1800000 1>;
};
- flash_1v8: regulator-flash_1v8 {
+ flash_1v8: regulator-flash-1v8 {
compatible = "regulator-fixed";
regulator-name = "FLASH_1V8";
regulator-min-microvolt = <1800000>;
@@ -83,7 +83,7 @@
regulator-always-on;
};
- main_12v: regulator-main_12v {
+ main_12v: regulator-main-12v {
compatible = "regulator-fixed";
regulator-name = "12V";
regulator-min-microvolt = <12000000>;
@@ -91,7 +91,7 @@
regulator-always-on;
};
- usb_pwr_en: regulator-usb_pwr_en {
+ usb_pwr_en: regulator-usb-pwr-en {
compatible = "regulator-fixed";
regulator-name = "USB_PWR_EN";
regulator-min-microvolt = <5000000>;
@@ -103,7 +103,7 @@
enable-active-high;
};
- vcc_5v: regulator-vcc_5v {
+ vcc_5v: regulator-vcc-5v {
compatible = "regulator-fixed";
regulator-name = "5V";
regulator-min-microvolt = <5000000>;
@@ -114,7 +114,7 @@
enable-active-high;
};
- vcc_1v8: regulator-vcc_1v8 {
+ vcc_1v8: regulator-vcc-1v8 {
compatible = "regulator-fixed";
regulator-name = "VCC_1V8";
regulator-min-microvolt = <1800000>;
@@ -123,7 +123,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
@@ -171,7 +171,7 @@
regulator-always-on;
};
- vddao_1v8: regulator-vddao_1v8 {
+ vddao_1v8: regulator-vddao-1v8 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_1V8";
regulator-min-microvolt = <1800000>;
@@ -180,7 +180,7 @@
regulator-always-on;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi
index ac8b7178257e..4cb6930ffb19 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi
@@ -39,7 +39,7 @@
clock-names = "ext_clock";
};
- flash_1v8: regulator-flash_1v8 {
+ flash_1v8: regulator-flash-1v8 {
compatible = "regulator-fixed";
regulator-name = "FLASH_1V8";
regulator-min-microvolt = <1800000>;
@@ -48,7 +48,7 @@
regulator-always-on;
};
- main_12v: regulator-main_12v {
+ main_12v: regulator-main-12v {
compatible = "regulator-fixed";
regulator-name = "12V";
regulator-min-microvolt = <12000000>;
@@ -56,7 +56,7 @@
regulator-always-on;
};
- vcc_5v: regulator-vcc_5v {
+ vcc_5v: regulator-vcc-5v {
compatible = "regulator-fixed";
regulator-name = "VCC_5V";
regulator-min-microvolt = <5000000>;
@@ -67,7 +67,7 @@
enable-active-high;
};
- vcc_1v8: regulator-vcc_1v8 {
+ vcc_1v8: regulator-vcc-1v8 {
compatible = "regulator-fixed";
regulator-name = "VCC_1V8";
regulator-min-microvolt = <1800000>;
@@ -76,7 +76,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-libretech-pc.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-libretech-pc.dtsi
index 5e7b9273b062..efd662a452e8 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx-libretech-pc.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx-libretech-pc.dtsi
@@ -84,7 +84,7 @@
reg = <0x0 0x0 0x0 0x80000000>;
};
- ao_5v: regulator-ao_5v {
+ ao_5v: regulator-ao-5v {
compatible = "regulator-fixed";
regulator-name = "AO_5V";
regulator-min-microvolt = <5000000>;
@@ -93,7 +93,7 @@
regulator-always-on;
};
- dc_in: regulator-dc_in {
+ dc_in: regulator-dc-in {
compatible = "regulator-fixed";
regulator-name = "DC_IN";
regulator-min-microvolt = <5000000>;
@@ -120,7 +120,7 @@
};
};
- vcc_card: regulator-vcc_card {
+ vcc_card: regulator-vcc-card {
compatible = "regulator-fixed";
regulator-name = "VCC_CARD";
regulator-min-microvolt = <3300000>;
@@ -141,7 +141,7 @@
gpio = <&gpio GPIOH_3 GPIO_OPEN_DRAIN>;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
@@ -150,7 +150,7 @@
regulator-always-on;
};
- vddio_ao3v3: regulator-vddio_ao3v3 {
+ vddio_ao3v3: regulator-vddio-ao3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO3V3";
regulator-min-microvolt = <3300000>;
@@ -159,7 +159,7 @@
regulator-always-on;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
index e59c3c92b1e7..08d6b69ba469 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
@@ -50,28 +50,28 @@
regulator-always-on;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts
index 4aab1ab705b4..cca129ce2c58 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts
@@ -78,21 +78,21 @@
<3300000 1>;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
index e6d2de7c45a9..c431986e6a33 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
@@ -67,7 +67,7 @@
regulator-always-on;
};
- hdmi_p5v0: regulator-hdmi_p5v0 {
+ hdmi_p5v0: regulator-hdmi-p5v0 {
compatible = "regulator-fixed";
regulator-name = "HDMI_P5V0";
regulator-min-microvolt = <5000000>;
@@ -76,7 +76,7 @@
vin-supply = <&p5v0>;
};
- tflash_vdd: regulator-tflash_vdd {
+ tflash_vdd: regulator-tflash-vdd {
compatible = "regulator-fixed";
regulator-name = "TFLASH_VDD";
@@ -92,7 +92,7 @@
vin-supply = <&vddio_ao3v3>;
};
- tf_io: gpio-regulator-tf_io {
+ tf_io: gpio-regulator-tf-io {
compatible = "regulator-gpio";
regulator-name = "TF_IO";
@@ -148,7 +148,7 @@
vin-supply = <&p5v0>;
};
- ddr3_1v5: regulator-ddr3_1v5 {
+ ddr3_1v5: regulator-ddr3-1v5 {
compatible = "regulator-fixed";
regulator-name = "DDR3_1V5";
regulator-min-microvolt = <1500000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts
index 591455c50e88..7f94716876d3 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts
@@ -21,14 +21,14 @@
sound-name-prefix = "DIT";
};
- avdd18_usb_adc: regulator-avdd18_usb_adc {
+ avdd18_usb_adc: regulator-avdd18-usb-adc {
compatible = "regulator-fixed";
regulator-name = "AVDD18_USB_ADC";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- adc_keys {
+ keys {
compatible = "adc-keys";
io-channels = <&saradc 0>;
io-channel-names = "buttons";
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi
index e803a466fe4e..52d57773a77f 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi
@@ -53,21 +53,21 @@
regulator-settling-time-down-us = <150000>;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
index 74df32534231..255e93a0b36d 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
@@ -47,28 +47,28 @@
enable-active-high;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
index 94dafb955301..deb295227189 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
@@ -49,21 +49,21 @@
enable-active-high;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
@@ -71,7 +71,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts
index a29b49f051ae..90ef9c17d80b 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts
@@ -42,7 +42,7 @@
};
};
- dc_5v: regulator-dc_5v {
+ dc_5v: regulator-dc-5v {
compatible = "regulator-fixed";
regulator-name = "DC_5V";
regulator-min-microvolt = <5000000>;
@@ -89,7 +89,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
@@ -98,7 +98,7 @@
regulator-always-on;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
@@ -107,7 +107,7 @@
regulator-always-on;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts
index c0d6eb55100a..08a4718219b1 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts
@@ -64,28 +64,28 @@
reg = <0x0 0x0 0x0 0x20000000>;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905w-jethome-jethub-j80.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905w-jethome-jethub-j80.dts
index a18d6d241a5a..2b94b6e5285e 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905w-jethome-jethub-j80.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905w-jethome-jethub-j80.dts
@@ -37,28 +37,28 @@
stdout-path = "serial0:115200n8";
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts
index c8d74e61dec1..89fe5110f7a2 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts
@@ -42,21 +42,21 @@
<3300000 1>;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc-v2.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc-v2.dts
index 2825db91e462..63b20860067c 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc-v2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc-v2.dts
@@ -67,7 +67,7 @@
reg = <0x0 0x0 0x0 0x80000000>;
};
- ao_5v: regulator-ao_5v {
+ ao_5v: regulator-ao-5v {
compatible = "regulator-fixed";
regulator-name = "AO_5V";
regulator-min-microvolt = <5000000>;
@@ -76,7 +76,7 @@
regulator-always-on;
};
- dc_in: regulator-dc_in {
+ dc_in: regulator-dc-in {
compatible = "regulator-fixed";
regulator-name = "DC_IN";
regulator-min-microvolt = <5000000>;
@@ -93,7 +93,7 @@
regulator-always-on;
};
- vcc_card: regulator-vcc_card {
+ vcc_card: regulator-vcc-card {
compatible = "regulator-fixed";
regulator-name = "VCC_CARD";
regulator-min-microvolt = <3300000>;
@@ -114,7 +114,7 @@
gpio = <&gpio GPIOH_3 GPIO_OPEN_DRAIN>;
};
- vddio_ao3v3: regulator-vddio_ao3v3 {
+ vddio_ao3v3: regulator-vddio-ao3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO3V3";
regulator-min-microvolt = <3300000>;
@@ -139,7 +139,7 @@
regulator-settling-time-down-us = <50000>;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
@@ -148,7 +148,7 @@
regulator-always-on;
};
- vcc_1v8: regulator-vcc_1v8 {
+ vcc_1v8: regulator-vcc-1v8 {
compatible = "regulator-fixed";
regulator-name = "VCC 1V8";
regulator-min-microvolt = <1800000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
index 27093e6ac9e2..8b26c9661be1 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
@@ -93,7 +93,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
@@ -117,7 +117,7 @@
regulator-settling-time-down-us = <50000>;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
@@ -125,7 +125,7 @@
};
/* This is provided by LDOs on the eMMC daugther card */
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
index f1acca5c4434..c79f9f2099bf 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
@@ -42,21 +42,21 @@
<3300000 1>;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
index a150cc0e18ff..7e7dc87ede2d 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
@@ -39,28 +39,28 @@
regulator-always-on;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
index 860f307494c5..07e7c3bedea0 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
@@ -112,28 +112,28 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddio_boot: regulator-vddio_boot {
+ vddio_boot: regulator-vddio-boot {
compatible = "regulator-fixed";
regulator-name = "VDDIO_BOOT";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-s912-libretech-pc.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-s912-libretech-pc.dts
index 4eda9f634c42..a66f19851ac9 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm-s912-libretech-pc.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm-s912-libretech-pc.dts
@@ -14,7 +14,7 @@
"amlogic,meson-gxm";
model = "Libre Computer AML-S912-PC";
- typec2_vbus: regulator-typec2_vbus {
+ typec2_vbus: regulator-typec2-vbus {
compatible = "regulator-fixed";
regulator-name = "TYPEC2_VBUS";
regulator-min-microvolt = <5000000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi b/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi
index 514a6dd4b124..e78cc9b577a0 100644
--- a/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi
@@ -80,7 +80,7 @@
clock-names = "ext_clock";
};
- dc_in: regulator-dc_in {
+ dc_in: regulator-dc-in {
compatible = "regulator-fixed";
regulator-name = "DC_IN";
regulator-min-microvolt = <5000000>;
@@ -88,7 +88,7 @@
regulator-always-on;
};
- vcc_5v: regulator-vcc_5v {
+ vcc_5v: regulator-vcc-5v {
compatible = "regulator-fixed";
regulator-name = "VCC_5V";
regulator-min-microvolt = <5000000>;
@@ -99,7 +99,7 @@
enable-active-high;
};
- vcc_1v8: regulator-vcc_1v8 {
+ vcc_1v8: regulator-vcc-1v8 {
compatible = "regulator-fixed";
regulator-name = "VCC_1V8";
regulator-min-microvolt = <1800000>;
@@ -108,7 +108,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
@@ -118,7 +118,7 @@
/* FIXME: actually controlled by VDDCPU_B_EN */
};
- vddao_1v8: regulator-vddao_1v8 {
+ vddao_1v8: regulator-vddao-1v8 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO1V8";
regulator-min-microvolt = <1800000>;
@@ -127,7 +127,7 @@
regulator-always-on;
};
- emmc_1v8: regulator-emmc_1v8 {
+ emmc_1v8: regulator-emmc-1v8 {
compatible = "regulator-fixed";
regulator-name = "EMMC_AO1V8";
regulator-min-microvolt = <1800000>;
@@ -136,7 +136,7 @@
regulator-always-on;
};
- vsys_3v3: regulator-vsys_3v3 {
+ vsys_3v3: regulator-vsys-3v3 {
compatible = "regulator-fixed";
regulator-name = "VSYS_3V3";
regulator-min-microvolt = <3300000>;
@@ -145,7 +145,7 @@
regulator-always-on;
};
- usb_pwr: regulator-usb_pwr {
+ usb_pwr: regulator-usb-pwr {
compatible = "regulator-fixed";
regulator-name = "USB_PWR";
regulator-min-microvolt = <5000000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-libretech-cottonwood.dtsi b/arch/arm64/boot/dts/amlogic/meson-libretech-cottonwood.dtsi
index 35e8f5bae990..082b72703cdf 100644
--- a/arch/arm64/boot/dts/amlogic/meson-libretech-cottonwood.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-libretech-cottonwood.dtsi
@@ -150,7 +150,7 @@
gpio-open-drain;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
@@ -171,7 +171,7 @@
pwm-dutycycle-range = <100 0>;
};
- vddio_ao18: regulator-vddio_ao18 {
+ vddio_ao18: regulator-vddio-ao18 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO18";
regulator-min-microvolt = <1800000>;
@@ -180,7 +180,7 @@
vin-supply = <&vddao_3v3>;
};
- vddio_c: regulator-vddio_c {
+ vddio_c: regulator-vddio-c {
compatible = "regulator-gpio";
regulator-name = "VDDIO_C";
regulator-min-microvolt = <1800000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-ac2xx.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1-ac2xx.dtsi
index 46a34731f7e2..d1fa8b8bf795 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-ac2xx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-ac2xx.dtsi
@@ -54,7 +54,7 @@
reg = <0x0 0x0 0x0 0x40000000>;
};
- ao_5v: regulator-ao_5v {
+ ao_5v: regulator-ao-5v {
compatible = "regulator-fixed";
regulator-name = "AO_5V";
regulator-min-microvolt = <5000000>;
@@ -63,7 +63,7 @@
regulator-always-on;
};
- dc_in: regulator-dc_in {
+ dc_in: regulator-dc-in {
compatible = "regulator-fixed";
regulator-name = "DC_IN";
regulator-min-microvolt = <5000000>;
@@ -71,7 +71,7 @@
regulator-always-on;
};
- emmc_1v8: regulator-emmc_1v8 {
+ emmc_1v8: regulator-emmc-1v8 {
compatible = "regulator-fixed";
regulator-name = "EMMC_1V8";
regulator-min-microvolt = <1800000>;
@@ -80,7 +80,7 @@
regulator-always-on;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
@@ -105,7 +105,7 @@
regulator-always-on;
};
- vddio_ao1v8: regulator-vddio_ao1v8 {
+ vddio_ao1v8: regulator-vddio-ao1v8 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO1V8";
regulator-min-microvolt = <1800000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi.dtsi
index 62404743e62d..81dce862902a 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi.dtsi
@@ -82,7 +82,7 @@
reg = <0x0 0x0 0x0 0x40000000>;
};
- emmc_1v8: regulator-emmc_1v8 {
+ emmc_1v8: regulator-emmc-1v8 {
compatible = "regulator-fixed";
regulator-name = "EMMC_1V8";
regulator-min-microvolt = <1800000>;
@@ -91,7 +91,7 @@
regulator-always-on;
};
- dc_in: regulator-dc_in {
+ dc_in: regulator-dc-in {
compatible = "regulator-fixed";
regulator-name = "DC_IN";
regulator-min-microvolt = <5000000>;
@@ -99,7 +99,7 @@
regulator-always-on;
};
- vddio_c: regulator-vddio_c {
+ vddio_c: regulator-vddio-c {
compatible = "regulator-gpio";
regulator-name = "VDDIO_C";
regulator-min-microvolt = <1800000>;
@@ -116,7 +116,7 @@
<3300000 1>;
};
- tflash_vdd: regulator-tflash_vdd {
+ tflash_vdd: regulator-tflash-vdd {
compatible = "regulator-fixed";
regulator-name = "TFLASH_VDD";
regulator-min-microvolt = <3300000>;
@@ -127,7 +127,7 @@
regulator-always-on;
};
- vddao_1v8: regulator-vddao_1v8 {
+ vddao_1v8: regulator-vddao-1v8 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_1V8";
regulator-min-microvolt = <1800000>;
@@ -136,7 +136,7 @@
regulator-always-on;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
@@ -165,7 +165,7 @@
};
/* USB Hub Power Enable */
- vl_pwr_en: regulator-vl_pwr_en {
+ vl_pwr_en: regulator-vl-pwr-en {
compatible = "regulator-fixed";
regulator-name = "VL_PWR_EN";
regulator-min-microvolt = <5000000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts
index 846a2d6c20e5..0170139b8d32 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts
@@ -43,7 +43,7 @@
};
/* Powers the SATA Disk 0 regulator, which is enabled when a disk load is detected */
- p12v_0: regulator-p12v_0 {
+ p12v_0: regulator-p12v-0 {
compatible = "regulator-fixed";
regulator-name = "P12V_0";
regulator-min-microvolt = <12000000>;
@@ -56,7 +56,7 @@
};
/* Powers the SATA Disk 1 regulator, which is enabled when a disk load is detected */
- p12v_1: regulator-p12v_1 {
+ p12v_1: regulator-p12v-1 {
compatible = "regulator-fixed";
regulator-name = "P12V_1";
regulator-min-microvolt = <12000000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
index 1db2327bbd13..951eb8e3f0c0 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
@@ -28,7 +28,7 @@
reset-gpios = <&gpio BOOT_12 GPIO_ACTIVE_LOW>;
};
- tflash_vdd: regulator-tflash_vdd {
+ tflash_vdd: regulator-tflash-vdd {
compatible = "regulator-fixed";
regulator-name = "TFLASH_VDD";
@@ -40,7 +40,7 @@
regulator-always-on;
};
- tf_io: gpio-regulator-tf_io {
+ tf_io: gpio-regulator-tf-io {
compatible = "regulator-gpio";
regulator-name = "TF_IO";
@@ -59,7 +59,7 @@
<1800000 1>;
};
- flash_1v8: regulator-flash_1v8 {
+ flash_1v8: regulator-flash-1v8 {
compatible = "regulator-fixed";
regulator-name = "FLASH_1V8";
regulator-min-microvolt = <1800000>;
@@ -68,7 +68,7 @@
regulator-always-on;
};
- main_12v: regulator-main_12v {
+ main_12v: regulator-main-12v {
compatible = "regulator-fixed";
regulator-name = "12V";
regulator-min-microvolt = <12000000>;
@@ -76,7 +76,7 @@
regulator-always-on;
};
- vcc_5v: regulator-vcc_5v {
+ vcc_5v: regulator-vcc-5v {
compatible = "regulator-fixed";
regulator-name = "5V";
regulator-min-microvolt = <5000000>;
@@ -87,7 +87,7 @@
enable-active-high;
};
- vcc_1v8: regulator-vcc_1v8 {
+ vcc_1v8: regulator-vcc-1v8 {
compatible = "regulator-fixed";
regulator-name = "VCC_1V8";
regulator-min-microvolt = <1800000>;
@@ -96,7 +96,7 @@
regulator-always-on;
};
- vcc_3v3: regulator-vcc_3v3 {
+ vcc_3v3: regulator-vcc-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
regulator-min-microvolt = <3300000>;
@@ -125,7 +125,7 @@
regulator-always-on;
};
- usb_pwr_en: regulator-usb_pwr_en {
+ usb_pwr_en: regulator-usb-pwr-en {
compatible = "regulator-fixed";
regulator-name = "USB_PWR_EN";
regulator-min-microvolt = <5000000>;
@@ -137,7 +137,7 @@
enable-active-high;
};
- vddao_1v8: regulator-vddao_1v8 {
+ vddao_1v8: regulator-vddao-1v8 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_1V8";
regulator-min-microvolt = <1800000>;
@@ -146,7 +146,7 @@
regulator-always-on;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
index 109932068dbe..3581e14cbf18 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
@@ -127,7 +127,7 @@
reg = <0x0 0x0 0x0 0x40000000>;
};
- ao_5v: regulator-ao_5v {
+ ao_5v: regulator-ao-5v {
compatible = "regulator-fixed";
regulator-name = "AO_5V";
regulator-min-microvolt = <5000000>;
@@ -136,7 +136,7 @@
regulator-always-on;
};
- dc_in: regulator-dc_in {
+ dc_in: regulator-dc-in {
compatible = "regulator-fixed";
regulator-name = "DC_IN";
regulator-min-microvolt = <5000000>;
@@ -144,7 +144,7 @@
regulator-always-on;
};
- emmc_1v8: regulator-emmc_1v8 {
+ emmc_1v8: regulator-emmc-1v8 {
compatible = "regulator-fixed";
regulator-name = "EMMC_1V8";
regulator-min-microvolt = <1800000>;
@@ -153,7 +153,7 @@
regulator-always-on;
};
- vddao_3v3: regulator-vddao_3v3 {
+ vddao_3v3: regulator-vddao-3v3 {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3";
regulator-min-microvolt = <3300000>;
@@ -163,7 +163,7 @@
};
/* Used by Tuner, RGB Led & IR Emitter LED array */
- vddao_3v3_t: regulator-vddao_3v3_t {
+ vddao_3v3_t: regulator-vddao-3v3-t {
compatible = "regulator-fixed";
regulator-name = "VDDAO_3V3_T";
regulator-min-microvolt = <3300000>;
@@ -192,7 +192,7 @@
regulator-always-on;
};
- vddio_ao1v8: regulator-vddio_ao1v8 {
+ vddio_ao1v8: regulator-vddio-ao1v8 {
compatible = "regulator-fixed";
regulator-name = "VDDIO_AO1V8";
regulator-min-microvolt = <1800000>;
diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi
index 9dcd25ec2c04..896d1f33b5b6 100644
--- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi
+++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi
@@ -586,6 +586,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupts = <GIC_SPI 400 IRQ_TYPE_LEVEL_HIGH>;
};
diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi
index f049687d6b96..d8516ec0dae7 100644
--- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi
+++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi
@@ -450,6 +450,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupts = <GIC_SPI 183 IRQ_TYPE_LEVEL_HIGH>;
gpio-ranges = <&pinmux 0 0 16>,
<&pinmux 16 71 2>,
diff --git a/arch/arm64/boot/dts/exynos/exynos850.dtsi b/arch/arm64/boot/dts/exynos/exynos850.dtsi
index da3f4a791e68..2ba67c3d0681 100644
--- a/arch/arm64/boot/dts/exynos/exynos850.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos850.dtsi
@@ -184,6 +184,16 @@
clock-names = "fin_pll", "mct";
};
+ pdma0: dma-controller@120c0000 {
+ compatible = "arm,pl330", "arm,primecell";
+ reg = <0x120c0000 0x1000>;
+ clocks = <&cmu_core CLK_GOUT_PDMA_CORE_ACLK>;
+ clock-names = "apb_pclk";
+ #dma-cells = <1>;
+ interrupts = <GIC_SPI 479 IRQ_TYPE_LEVEL_HIGH>;
+ arm,pl330-broken-no-flushp;
+ };
+
gic: interrupt-controller@12a01000 {
compatible = "arm,gic-400";
#interrupt-cells = <3>;
@@ -728,6 +738,24 @@
<&cmu_peri CLK_GOUT_SPI0_IPCLK>;
clock-names = "pclk", "ipclk";
status = "disabled";
+
+ spi_0: spi@13940000 {
+ compatible = "samsung,exynos850-spi";
+ reg = <0x13940000 0x30>;
+ clocks = <&cmu_peri CLK_GOUT_SPI0_PCLK>,
+ <&cmu_peri CLK_GOUT_SPI0_IPCLK>;
+ clock-names = "spi", "spi_busclk0";
+ dmas = <&pdma0 5>, <&pdma0 4>;
+ dma-names = "tx", "rx";
+ interrupts = <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
+ pinctrl-0 = <&spi0_pins>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ samsung,spi-src-clk = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
};
usi_cmgp0: usi@11d000c0 {
@@ -769,6 +797,24 @@
clock-names = "uart", "clk_uart_baud0";
status = "disabled";
};
+
+ spi_1: spi@11d00000 {
+ compatible = "samsung,exynos850-spi";
+ reg = <0x11d00000 0x30>;
+ clocks = <&cmu_cmgp CLK_GOUT_CMGP_USI0_PCLK>,
+ <&cmu_cmgp CLK_GOUT_CMGP_USI0_IPCLK>;
+ clock-names = "spi", "spi_busclk0";
+ dmas = <&pdma0 12>, <&pdma0 13>;
+ dma-names = "tx", "rx";
+ interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>;
+ pinctrl-0 = <&spi1_pins>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ samsung,spi-src-clk = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
};
usi_cmgp1: usi@11d200c0 {
@@ -810,6 +856,24 @@
clock-names = "uart", "clk_uart_baud0";
status = "disabled";
};
+
+ spi_2: spi@11d20000 {
+ compatible = "samsung,exynos850-spi";
+ reg = <0x11d20000 0x30>;
+ clocks = <&cmu_cmgp CLK_GOUT_CMGP_USI1_PCLK>,
+ <&cmu_cmgp CLK_GOUT_CMGP_USI1_IPCLK>;
+ clock-names = "spi", "spi_busclk0";
+ dmas = <&pdma0 14>, <&pdma0 15>;
+ dma-names = "tx", "rx";
+ interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+ pinctrl-0 = <&spi2_pins>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ samsung,spi-src-clk = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
};
};
};
diff --git a/arch/arm64/boot/dts/exynos/google/gs101-oriole.dts b/arch/arm64/boot/dts/exynos/google/gs101-oriole.dts
index 4a71f752200d..6ccade2c8cb4 100644
--- a/arch/arm64/boot/dts/exynos/google/gs101-oriole.dts
+++ b/arch/arm64/boot/dts/exynos/google/gs101-oriole.dts
@@ -63,6 +63,20 @@
clock-frequency = <200000000>;
};
+&hsi2c_8 {
+ status = "okay";
+
+ eeprom: eeprom@50 {
+ compatible = "atmel,24c08";
+ reg = <0x50>;
+ };
+};
+
+&hsi2c_12 {
+ status = "okay";
+ /* TODO: add the devices once drivers exist */
+};
+
&pinctrl_far_alive {
key_voldown: key-voldown-pins {
samsung,pins = "gpa7-3";
@@ -99,6 +113,16 @@
status = "okay";
};
+&usi8 {
+ samsung,mode = <USI_V2_I2C>;
+ status = "okay";
+};
+
+&usi12 {
+ samsung,mode = <USI_V2_I2C>;
+ status = "okay";
+};
+
&watchdog_cl0 {
timeout-sec = <30>;
status = "okay";
diff --git a/arch/arm64/boot/dts/exynos/google/gs101-pinctrl.dtsi b/arch/arm64/boot/dts/exynos/google/gs101-pinctrl.dtsi
index e6a9776d4d62..a675f822acec 100644
--- a/arch/arm64/boot/dts/exynos/google/gs101-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/exynos/google/gs101-pinctrl.dtsi
@@ -251,7 +251,7 @@
#interrupt-cells = <2>;
};
- pcie0_clkreq: pcie0-clkreq-pins{
+ pcie0_clkreq: pcie0-clkreq-pins {
samsung,pins = "gph0-1";
samsung,pin-function = <GS101_PIN_FUNC_2>;
samsung,pin-pud = <GS101_PIN_PULL_UP>;
diff --git a/arch/arm64/boot/dts/exynos/google/gs101.dtsi b/arch/arm64/boot/dts/exynos/google/gs101.dtsi
index 9747cb3fa03a..55e6bcb3689e 100644
--- a/arch/arm64/boot/dts/exynos/google/gs101.dtsi
+++ b/arch/arm64/boot/dts/exynos/google/gs101.dtsi
@@ -73,7 +73,7 @@
compatible = "arm,cortex-a55";
reg = <0x0000>;
enable-method = "psci";
- cpu-idle-states = <&ANANKE_CPU_SLEEP>;
+ cpu-idle-states = <&ANANKE_CPU_SLEEP>;
capacity-dmips-mhz = <250>;
dynamic-power-coefficient = <70>;
};
@@ -83,7 +83,7 @@
compatible = "arm,cortex-a55";
reg = <0x0100>;
enable-method = "psci";
- cpu-idle-states = <&ANANKE_CPU_SLEEP>;
+ cpu-idle-states = <&ANANKE_CPU_SLEEP>;
capacity-dmips-mhz = <250>;
dynamic-power-coefficient = <70>;
};
@@ -93,7 +93,7 @@
compatible = "arm,cortex-a55";
reg = <0x0200>;
enable-method = "psci";
- cpu-idle-states = <&ANANKE_CPU_SLEEP>;
+ cpu-idle-states = <&ANANKE_CPU_SLEEP>;
capacity-dmips-mhz = <250>;
dynamic-power-coefficient = <70>;
};
@@ -103,7 +103,7 @@
compatible = "arm,cortex-a55";
reg = <0x0300>;
enable-method = "psci";
- cpu-idle-states = <&ANANKE_CPU_SLEEP>;
+ cpu-idle-states = <&ANANKE_CPU_SLEEP>;
capacity-dmips-mhz = <250>;
dynamic-power-coefficient = <70>;
};
@@ -113,7 +113,7 @@
compatible = "arm,cortex-a76";
reg = <0x0400>;
enable-method = "psci";
- cpu-idle-states = <&ENYO_CPU_SLEEP>;
+ cpu-idle-states = <&ENYO_CPU_SLEEP>;
capacity-dmips-mhz = <620>;
dynamic-power-coefficient = <284>;
};
@@ -123,7 +123,7 @@
compatible = "arm,cortex-a76";
reg = <0x0500>;
enable-method = "psci";
- cpu-idle-states = <&ENYO_CPU_SLEEP>;
+ cpu-idle-states = <&ENYO_CPU_SLEEP>;
capacity-dmips-mhz = <620>;
dynamic-power-coefficient = <284>;
};
@@ -133,7 +133,7 @@
compatible = "arm,cortex-x1";
reg = <0x0600>;
enable-method = "psci";
- cpu-idle-states = <&HERA_CPU_SLEEP>;
+ cpu-idle-states = <&HERA_CPU_SLEEP>;
capacity-dmips-mhz = <1024>;
dynamic-power-coefficient = <650>;
};
@@ -143,7 +143,7 @@
compatible = "arm,cortex-x1";
reg = <0x0700>;
enable-method = "psci";
- cpu-idle-states = <&HERA_CPU_SLEEP>;
+ cpu-idle-states = <&HERA_CPU_SLEEP>;
capacity-dmips-mhz = <1024>;
dynamic-power-coefficient = <650>;
};
@@ -180,14 +180,6 @@
};
};
- /* TODO replace with CCF clock */
- dummy_clk: clock-3 {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <12345>;
- clock-output-names = "pclk";
- };
-
/* ect node is required to be present by bootloader */
ect {
};
@@ -289,7 +281,27 @@
#clock-cells = <1>;
clocks = <&cmu_top CLK_DOUT_CMU_MISC_BUS>,
<&cmu_top CLK_DOUT_CMU_MISC_SSS>;
- clock-names = "dout_cmu_misc_bus", "dout_cmu_misc_sss";
+ clock-names = "bus", "sss";
+ };
+
+ timer@10050000 {
+ compatible = "google,gs101-mct",
+ "samsung,exynos4210-mct";
+ reg = <0x10050000 0x800>;
+ interrupts = <GIC_SPI 753 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 754 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 755 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 756 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 757 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 758 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 759 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 760 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 761 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 762 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 763 IRQ_TYPE_LEVEL_HIGH 0>,
+ <GIC_SPI 764 IRQ_TYPE_LEVEL_HIGH 0>;
+ clocks = <&ext_24_5m>, <&cmu_misc CLK_GOUT_MISC_MCT_PCLK>;
+ clock-names = "fin_pll", "mct";
};
watchdog_cl0: watchdog@10060000 {
@@ -339,9 +351,20 @@
};
};
+ cmu_peric0: clock-controller@10800000 {
+ compatible = "google,gs101-cmu-peric0";
+ reg = <0x10800000 0x4000>;
+ #clock-cells = <1>;
+ clocks = <&ext_24_5m>,
+ <&cmu_top CLK_DOUT_CMU_PERIC0_BUS>,
+ <&cmu_top CLK_DOUT_CMU_PERIC0_IP>;
+ clock-names = "oscclk", "bus", "ip";
+ };
+
sysreg_peric0: syscon@10820000 {
compatible = "google,gs101-peric0-sysreg", "syscon";
reg = <0x10820000 0x10000>;
+ clocks = <&cmu_peric0 CLK_GOUT_PERIC0_SYSREG_PERIC0_PCLK>;
};
pinctrl_peric0: pinctrl@10840000 {
@@ -350,6 +373,35 @@
interrupts = <GIC_SPI 625 IRQ_TYPE_LEVEL_HIGH 0>;
};
+ usi8: usi@109700c0 {
+ compatible = "google,gs101-usi",
+ "samsung,exynos850-usi";
+ reg = <0x109700c0 0x20>;
+ ranges;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ clocks = <&cmu_peric0 CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_7>,
+ <&cmu_peric0 CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_7>;
+ clock-names = "pclk", "ipclk";
+ samsung,sysreg = <&sysreg_peric0 0x101c>;
+ status = "disabled";
+
+ hsi2c_8: i2c@10970000 {
+ compatible = "google,gs101-hsi2c",
+ "samsung,exynosautov9-hsi2c";
+ reg = <0x10970000 0xc0>;
+ interrupts = <GIC_SPI 642 IRQ_TYPE_LEVEL_HIGH 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&hsi2c8_bus>;
+ clocks = <&cmu_peric0 CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_7>,
+ <&cmu_peric0 CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_7>;
+ clock-names = "hsi2c", "hsi2c_pclk";
+ status = "disabled";
+ };
+ };
+
usi_uart: usi@10a000c0 {
compatible = "google,gs101-usi",
"samsung,exynos850-usi";
@@ -357,7 +409,8 @@
ranges;
#address-cells = <1>;
#size-cells = <1>;
- clocks = <&dummy_clk>, <&dummy_clk>;
+ clocks = <&cmu_peric0 CLK_GOUT_PERIC0_PERIC0_TOP1_PCLK_0>,
+ <&cmu_peric0 CLK_GOUT_PERIC0_PERIC0_TOP1_IPCLK_0>;
clock-names = "pclk", "ipclk";
samsung,sysreg = <&sysreg_peric0 0x1020>;
samsung,mode = <USI_V2_UART>;
@@ -366,19 +419,30 @@
serial_0: serial@10a00000 {
compatible = "google,gs101-uart";
reg = <0x10a00000 0xc0>;
- reg-io-width = <4>;
interrupts = <GIC_SPI 634
IRQ_TYPE_LEVEL_HIGH 0>;
- clocks = <&dummy_clk 0>, <&dummy_clk 0>;
+ clocks = <&cmu_peric0 CLK_GOUT_PERIC0_PERIC0_TOP1_PCLK_0>,
+ <&cmu_peric0 CLK_GOUT_PERIC0_PERIC0_TOP1_IPCLK_0>;
clock-names = "uart", "clk_uart_baud0";
samsung,uart-fifosize = <256>;
status = "disabled";
};
};
+ cmu_peric1: clock-controller@10c00000 {
+ compatible = "google,gs101-cmu-peric1";
+ reg = <0x10c00000 0x4000>;
+ #clock-cells = <1>;
+ clocks = <&ext_24_5m>,
+ <&cmu_top CLK_DOUT_CMU_PERIC1_BUS>,
+ <&cmu_top CLK_DOUT_CMU_PERIC1_IP>;
+ clock-names = "oscclk", "bus", "ip";
+ };
+
sysreg_peric1: syscon@10c20000 {
compatible = "google,gs101-peric1-sysreg", "syscon";
reg = <0x10c20000 0x10000>;
+ clocks = <&cmu_peric1 CLK_GOUT_PERIC1_SYSREG_PERIC1_PCLK>;
};
pinctrl_peric1: pinctrl@10c40000 {
@@ -387,6 +451,35 @@
interrupts = <GIC_SPI 644 IRQ_TYPE_LEVEL_HIGH 0>;
};
+ usi12: usi@10d500c0 {
+ compatible = "google,gs101-usi",
+ "samsung,exynos850-usi";
+ reg = <0x10d500c0 0x20>;
+ ranges;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ clocks = <&cmu_peric1 CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_5>,
+ <&cmu_peric1 CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_5>;
+ clock-names = "pclk", "ipclk";
+ samsung,sysreg = <&sysreg_peric1 0x1010>;
+ status = "disabled";
+
+ hsi2c_12: i2c@10d50000 {
+ compatible = "google,gs101-hsi2c",
+ "samsung,exynosautov9-hsi2c";
+ reg = <0x10d50000 0xc0>;
+ interrupts = <GIC_SPI 655 IRQ_TYPE_LEVEL_HIGH 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ pinctrl-0 = <&hsi2c12_bus>;
+ pinctrl-names = "default";
+ clocks = <&cmu_peric1 CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_5>,
+ <&cmu_peric1 CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_5>;
+ clock-names = "hsi2c", "hsi2c_pclk";
+ status = "disabled";
+ };
+ };
+
pinctrl_hsi1: pinctrl@11840000 {
compatible = "google,gs101-pinctrl";
reg = <0x11840000 0x00001000>;
diff --git a/arch/arm64/boot/dts/freescale/Makefile b/arch/arm64/boot/dts/freescale/Makefile
index 2e027675d7bb..045250d0a040 100644
--- a/arch/arm64/boot/dts/freescale/Makefile
+++ b/arch/arm64/boot/dts/freescale/Makefile
@@ -20,23 +20,41 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-frwy.dtb
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-qds.dtb
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-rdb.dtb
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-tqmls1046a-mbls10xxa.dtb
+DTC_FLAGS_fsl-ls1088a-qds := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-qds.dtb
+DTC_FLAGS_fsl-ls1088a-rdb := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-rdb.dtb
+DTC_FLAGS_fsl-ls1088a-ten64 := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-ten64.dtb
+DTC_FLAGS_fsl-ls1088a-tqmls1088a-mbls10xxa := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-tqmls1088a-mbls10xxa.dtb
+DTC_FLAGS_fsl-ls2080a-qds := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-qds.dtb
+DTC_FLAGS_fsl-ls2080a-rdb := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-rdb.dtb
+DTC_FLAGS_fsl-ls2081a-rdb := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2081a-rdb.dtb
+DTC_FLAGS_fsl-ls2080a-simu := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-simu.dtb
+DTC_FLAGS_fsl-ls2088a-qds := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2088a-qds.dtb
+DTC_FLAGS_fsl-ls2088a-rdb := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2088a-rdb.dtb
+DTC_FLAGS_fsl-lx2160a-bluebox3 := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-bluebox3.dtb
+DTC_FLAGS_fsl-lx2160a-bluebox3-rev-a := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-bluebox3-rev-a.dtb
+DTC_FLAGS_fsl-lx2160a-clearfog-cx := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-clearfog-cx.dtb
+DTC_FLAGS_fsl-lx2160a-honeycomb := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-honeycomb.dtb
+DTC_FLAGS_fsl-lx2160a-qds := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-qds.dtb
+DTC_FLAGS_fsl-lx2160a-rdb := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-rdb.dtb
+DTC_FLAGS_fsl-lx2162a-clearfog := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2162a-clearfog.dtb
+DTC_FLAGS_fsl-lx2162a-qds := -Wno-interrupt_map
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2162a-qds.dtb
fsl-ls1028a-qds-13bb-dtbs := fsl-ls1028a-qds.dtb fsl-ls1028a-qds-13bb.dtbo
@@ -53,6 +71,7 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-85bb.dtb
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-899b.dtb
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-9999.dtb
+DTC_FLAGS_fsl-lx2160a-tqmlx2160a-mblx2160a := -Wno-interrupt_map
fsl-lx2160a-tqmlx2160a-mblx2160a-12-11-x-dtbs := fsl-lx2160a-tqmlx2160a-mblx2160a.dtb \
fsl-lx2160a-tqmlx2160a-mblx2160a_12_x_x.dtbo \
fsl-lx2160a-tqmlx2160a-mblx2160a_x_11_x.dtbo
@@ -80,6 +99,7 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-tqmlx2160a-mblx2160a-14-8-x.dtb
dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-tqmlx2160a-mblx2160a-14-7-x.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8dxl-evk.dtb
+dtb-$(CONFIG_ARCH_MXC) += imx8dxp-tqma8xdp-mba8xx.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8mm-beacon-kit.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8mm-data-modul-edm-sbc.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8mm-ddr4-evk.dtb
@@ -133,7 +153,9 @@ dtb-$(CONFIG_ARCH_MXC) += imx8mn-var-som-symphony.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8mn-venice-gw7902.dtb
imx8mn-tqma8mqnl-mba8mx-lvds-tm070jvhg33-dtbs += imx8mn-tqma8mqnl-mba8mx.dtb imx8mn-tqma8mqnl-mba8mx-lvds-tm070jvhg33.dtbo
+imx8mn-tqma8mqnl-mba8mx-usbotg-dtbs += imx8mn-tqma8mqnl-mba8mx.dtb imx8mn-tqma8mqnl-mba8mx-usbotg.dtbo
dtb-$(CONFIG_ARCH_MXC) += imx8mn-tqma8mqnl-mba8mx-lvds-tm070jvhg33.dtb
+dtb-$(CONFIG_ARCH_MXC) += imx8mn-tqma8mqnl-mba8mx-usbotg.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8mp-beacon-kit.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8mp-data-modul-edm-sbc.dtb
@@ -188,8 +210,10 @@ imx8mq-tqma8mq-mba8mx-lvds-tm070jvhg33-dtbs += imx8mq-tqma8mq-mba8mx.dtb imx8mq-
dtb-$(CONFIG_ARCH_MXC) += imx8mq-tqma8mq-mba8mx-lvds-tm070jvhg33.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8qm-apalis-eval.dtb
+dtb-$(CONFIG_ARCH_MXC) += imx8qm-apalis-eval-v1.2.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8qm-apalis-ixora-v1.1.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8qm-apalis-v1.1-eval.dtb
+dtb-$(CONFIG_ARCH_MXC) += imx8qm-apalis-v1.1-eval-v1.2.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8qm-apalis-v1.1-ixora-v1.1.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8qm-apalis-v1.1-ixora-v1.2.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8qm-mek.dtb
@@ -199,10 +223,13 @@ dtb-$(CONFIG_ARCH_MXC) += imx8qxp-colibri-eval-v3.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8qxp-colibri-iris.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8qxp-colibri-iris-v2.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8qxp-mek.dtb
+dtb-$(CONFIG_ARCH_MXC) += imx8qxp-tqma8xqp-mba8xx.dtb
dtb-$(CONFIG_ARCH_MXC) += imx8ulp-evk.dtb
dtb-$(CONFIG_ARCH_MXC) += imx93-11x11-evk.dtb
+dtb-$(CONFIG_ARCH_MXC) += imx93-phyboard-segin.dtb
dtb-$(CONFIG_ARCH_MXC) += imx93-tqma9352-mba93xxca.dtb
dtb-$(CONFIG_ARCH_MXC) += imx93-tqma9352-mba93xxla.dtb
+dtb-$(CONFIG_ARCH_MXC) += imx93-var-som-symphony.dtb
imx8mm-venice-gw72xx-0x-imx219-dtbs := imx8mm-venice-gw72xx-0x.dtb imx8mm-venice-gw72xx-0x-imx219.dtbo
imx8mm-venice-gw72xx-0x-rpidsi-dtbs := imx8mm-venice-gw72xx-0x.dtb imx8mm-venice-gw72xx-0x-rpidsi.dtbo
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi
index 1e3fe3897b52..fe9093b3c02e 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi
@@ -290,7 +290,7 @@
dcfg: dcfg@1ee0000 {
compatible = "fsl,ls1012a-dcfg",
"syscon";
- reg = <0x0 0x1ee0000 0x0 0x10000>;
+ reg = <0x0 0x1ee0000 0x0 0x1000>;
big-endian;
};
@@ -351,24 +351,26 @@
};
i2c0: i2c@2180000 {
- compatible = "fsl,vf610-i2c";
+ compatible = "fsl,ls1012a-i2c", "fsl,vf610-i2c";
#address-cells = <1>;
#size-cells = <0>;
reg = <0x0 0x2180000 0x0 0x10000>;
interrupts = <0 56 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
QORIQ_CLK_PLL_DIV(4)>;
+ scl-gpios = <&gpio0 2 0>;
status = "disabled";
};
i2c1: i2c@2190000 {
- compatible = "fsl,vf610-i2c";
+ compatible = "fsl,ls1012a-i2c", "fsl,vf610-i2c";
#address-cells = <1>;
#size-cells = <0>;
reg = <0x0 0x2190000 0x0 0x10000>;
interrupts = <0 57 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
QORIQ_CLK_PLL_DIV(4)>;
+ scl-gpios = <&gpio0 13 0>;
status = "disabled";
};
@@ -499,6 +501,7 @@
snps,quirk-frame-length-adjustment = <0x20>;
snps,dis_rxdet_inp3_quirk;
snps,incr-burst-type-adjustment = <1>, <4>, <8>, <16>;
+ snps,host-vbus-glitches;
};
sata: sata@3200000 {
@@ -550,6 +553,7 @@
<0000 0 0 2 &gic 0 111 IRQ_TYPE_LEVEL_HIGH>,
<0000 0 0 3 &gic 0 112 IRQ_TYPE_LEVEL_HIGH>,
<0000 0 0 4 &gic 0 113 IRQ_TYPE_LEVEL_HIGH>;
+ big-endian;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
index 1515cec23147..754a64be739c 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
@@ -485,7 +485,6 @@
<0x00030005 0x00000042>,
<0x00030006 0x0000004c>,
<0x00030007 0x00000056>;
- big-endian;
#thermal-sensor-cells = <1>;
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
index 8616d5e0c388..604bf88d70b3 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
@@ -591,6 +591,8 @@
reg = <0x00 0x03400000 0x0 0x00100000>,
<0x20 0x00000000 0x8 0x00000000>;
reg-names = "regs", "addr_space";
+ interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>; /* PME interrupt */
+ interrupt-names = "pme";
num-ib-windows = <24>;
num-ob-windows = <256>;
max-functions = /bits/ 8 <2>;
@@ -628,6 +630,8 @@
reg = <0x00 0x03500000 0x0 0x00100000>,
<0x28 0x00000000 0x8 0x00000000>;
reg-names = "regs", "addr_space";
+ interrupts = <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>; /* PME interrupt */
+ interrupt-names = "pme";
num-ib-windows = <6>;
num-ob-windows = <6>;
status = "disabled";
@@ -664,6 +668,8 @@
reg = <0x00 0x03600000 0x0 0x00100000>,
<0x30 0x00000000 0x8 0x00000000>;
reg-names = "regs", "addr_space";
+ interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>; /* PME interrupt */
+ interrupt-names = "pme";
num-ib-windows = <6>;
num-ob-windows = <6>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
index 6640b49670ae..e665c629e1a1 100644
--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
@@ -949,34 +949,50 @@
};
uart0: serial@21c0000 {
- compatible = "arm,sbsa-uart","arm,pl011";
+ compatible = "arm,pl011", "arm,primecell";
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(8)>,
+ <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(8)>;
+ clock-names = "uartclk", "apb_pclk";
reg = <0x0 0x21c0000 0x0 0x1000>;
interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
- current-speed = <115200>;
status = "disabled";
};
uart1: serial@21d0000 {
- compatible = "arm,sbsa-uart","arm,pl011";
+ compatible = "arm,pl011", "arm,primecell";
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(8)>,
+ <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(8)>;
+ clock-names = "uartclk", "apb_pclk";
reg = <0x0 0x21d0000 0x0 0x1000>;
interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
- current-speed = <115200>;
status = "disabled";
};
uart2: serial@21e0000 {
- compatible = "arm,sbsa-uart","arm,pl011";
+ compatible = "arm,pl011", "arm,primecell";
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(8)>,
+ <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(8)>;
+ clock-names = "uartclk", "apb_pclk";
reg = <0x0 0x21e0000 0x0 0x1000>;
interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
- current-speed = <115200>;
status = "disabled";
};
uart3: serial@21f0000 {
- compatible = "arm,sbsa-uart","arm,pl011";
+ compatible = "arm,pl011", "arm,primecell";
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(8)>,
+ <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(8)>;
+ clock-names = "uartclk", "apb_pclk";
reg = <0x0 0x21f0000 0x0 0x1000>;
interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
- current-speed = <115200>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-eval-v1.1.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-eval-v1.1.dtsi
new file mode 100644
index 000000000000..0f77f78f4d96
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8-apalis-eval-v1.1.dtsi
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+/*
+ * Copyright 2024 Toradex
+ */
+
+#include "imx8-apalis-eval.dtsi"
+
+/* Apalis CAN1 */
+&flexcan1 {
+ status = "okay";
+};
+
+/* Apalis CAN2 */
+&flexcan2 {
+ status = "okay";
+};
+
+/* Apalis MMC1 */
+&usdhc2 {
+ status = "okay";
+};
+
+/* Apalis SD1 */
+&usdhc3 {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-eval-v1.2.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-eval-v1.2.dtsi
new file mode 100644
index 000000000000..f5c6a0164f36
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8-apalis-eval-v1.2.dtsi
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+/*
+ * Copyright 2024 Toradex
+ */
+
+#include "imx8-apalis-eval.dtsi"
+
+/ {
+ reg_3v3_mmc: regulator-3v3-mmc {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enable_3v3_mmc>;
+ enable-active-high;
+ gpio = <&lsio_gpio5 19 GPIO_ACTIVE_HIGH>;
+ off-on-delay-us = <100000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-min-microvolt = <3300000>;
+ regulator-name = "3.3V_MMC";
+ startup-delay-us = <10000>;
+ };
+
+ reg_3v3_sd: regulator-3v3-sd {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enable_3v3_sd>;
+ enable-active-high;
+ gpio = <&lsio_gpio5 20 GPIO_ACTIVE_HIGH>;
+ off-on-delay-us = <100000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-min-microvolt = <3300000>;
+ regulator-name = "3.3V_SD";
+ startup-delay-us = <10000>;
+ };
+
+ reg_can1: regulator-can1 {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enable_can1_power>;
+ enable-active-high;
+ gpio = <&lsio_gpio5 22 GPIO_ACTIVE_HIGH>;
+ regulator-name = "5V_SW_CAN1";
+ startup-delay-us = <10000>;
+ };
+
+ reg_can2: regulator-can2 {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_enable_can2_power>;
+ enable-active-high;
+ gpio = <&lsio_gpio5 21 GPIO_ACTIVE_HIGH>;
+ regulator-name = "5V_SW_CAN2";
+ startup-delay-us = <10000>;
+ };
+};
+
+/* Apalis CAN1 */
+&flexcan1 {
+ xceiver-supply = <&reg_can1>;
+ status = "okay";
+};
+
+/* Apalis CAN2 */
+&flexcan2 {
+ xceiver-supply = <&reg_can2>;
+ status = "okay";
+};
+
+/* Apalis I2C1 */
+&i2c2 {
+ status = "okay";
+
+ /* Power/Current Measurement Sensor */
+ hwmon@40 {
+ compatible = "ti,ina219";
+ reg = <0x40>;
+ shunt-resistor = <5000>;
+ };
+
+ temperature-sensor@4f {
+ compatible = "ti,tmp75c";
+ reg = <0x4f>;
+ };
+
+ eeprom@57 {
+ compatible = "st,24c02", "atmel,24c02";
+ reg = <0x57>;
+ };
+};
+
+/* Apalis MMC1 */
+&usdhc2 {
+ pinctrl-0 = <&pinctrl_usdhc2_4bit>, <&pinctrl_mmc1_cd>;
+ pinctrl-1 = <&pinctrl_usdhc2_4bit_100mhz>, <&pinctrl_mmc1_cd>;
+ pinctrl-2 = <&pinctrl_usdhc2_4bit_200mhz>, <&pinctrl_mmc1_cd>;
+ pinctrl-3 = <&pinctrl_usdhc2_4bit_sleep>, <&pinctrl_mmc1_cd_sleep>;
+ bus-width = <4>;
+ vmmc-supply = <&reg_3v3_mmc>;
+ status = "okay";
+};
+
+/* Apalis SD1 */
+&usdhc3 {
+ vmmc-supply = <&reg_3v3_sd>;
+ status = "okay";
+};
+
+&iomuxc {
+
+ pinctrl_enable_3v3_mmc: enable3v3mmcgrp {
+ fsl,pins = <IMX8QM_USDHC1_DATA4_LSIO_GPIO5_IO19 0x00000021>; /* MXM3_148 */
+ };
+
+ pinctrl_enable_3v3_sd: enable3v3sdgrp {
+ fsl,pins = <IMX8QM_USDHC1_DATA5_LSIO_GPIO5_IO20 0x00000021>; /* MXM3_152 */
+ };
+
+ pinctrl_enable_can1_power: enablecan1powergrp {
+ fsl,pins = <IMX8QM_USDHC1_DATA7_LSIO_GPIO5_IO22 0x00000021>; /* MXM3_158 */
+ };
+
+ pinctrl_enable_can2_power: enablecan2powergrp {
+ fsl,pins = <IMX8QM_USDHC1_DATA6_LSIO_GPIO5_IO21 0x00000021>; /* MXM3_156 */
+ };
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-eval.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-eval.dtsi
index 685d4294f4f1..deecb96a1596 100644
--- a/arch/arm64/boot/dts/freescale/imx8-apalis-eval.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-apalis-eval.dtsi
@@ -35,18 +35,6 @@
status = "okay";
};
-/* Apalis CAN1 */
-&flexcan1 {
- status = "okay";
-};
-
-/* Apalis CAN2 */
-&flexcan2 {
- status = "okay";
-};
-
-/* TODO: GPU */
-
/* Apalis I2C1 */
&i2c2 {
status = "okay";
@@ -132,13 +120,3 @@
};
/* TODO: Apalis USBH4 SuperSpeed */
-
-/* Apalis MMC1 */
-&usdhc2 {
- status = "okay";
-};
-
-/* Apalis SD1 */
-&usdhc3 {
- status = "okay";
-};
diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
index f69b0c17560a..160153853b68 100644
--- a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
@@ -261,7 +261,6 @@
reset-assert-us = <2>;
reset-deassert-us = <2>;
reset-gpios = <&lsio_gpio1 11 GPIO_ACTIVE_LOW>;
- reset-names = "phy";
};
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi
index f057c6b21b30..07afeb78ed56 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi
@@ -4,6 +4,7 @@
* Dong Aisheng <aisheng.dong@nxp.com>
*/
+#include <dt-bindings/clock/imx8-clock.h>
#include <dt-bindings/clock/imx8-lpcg.h>
#include <dt-bindings/firmware/imx/rsrc.h>
@@ -14,12 +15,174 @@ audio_ipg_clk: clock-audio-ipg {
clock-output-names = "audio_ipg_clk";
};
+clk_ext_aud_mclk0: clock-ext-aud-mclk0 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "ext_aud_mclk0";
+};
+
+clk_ext_aud_mclk1: clock-ext-aud-mclk1 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "ext_aud_mclk1";
+};
+
+clk_esai0_rx_clk: clock-esai0-rx {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "esai0_rx_clk";
+};
+
+clk_esai0_rx_hf_clk: clock-esai0-rx-hf {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "esai0_rx_hf_clk";
+};
+
+clk_esai0_tx_clk: clock-esai0-tx {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "esai0_tx_clk";
+};
+
+clk_esai0_tx_hf_clk: clock-esai0-tx-hf {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "esai0_tx_hf_clk";
+};
+
+clk_spdif0_rx: clock-spdif0-rx {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "spdif0_rx";
+};
+
+clk_sai0_rx_bclk: clock-sai0-rx-bclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "sai0_rx_bclk";
+};
+
+clk_sai0_tx_bclk: clock-sai0-tx-bclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "sai0_tx_bclk";
+};
+
+clk_sai1_rx_bclk: clock-sai1-rx-bclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "sai1_rx_bclk";
+};
+
+clk_sai1_tx_bclk: clock-sai1-tx-bclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "sai1_tx_bclk";
+};
+
+clk_sai2_rx_bclk: clock-sai2-rx-bclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "sai2_rx_bclk";
+};
+
+clk_sai3_rx_bclk: clock-sai3-rx-bclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "sai3_rx_bclk";
+};
+
+clk_sai4_rx_bclk: clock-sai4-rx-bclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "sai4_rx_bclk";
+};
+
audio_subsys: bus@59000000 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
ranges = <0x59000000 0x0 0x59000000 0x1000000>;
+ sai0: sai@59040000 {
+ compatible = "fsl,imx8qm-sai";
+ reg = <0x59040000 0x10000>;
+ interrupts = <GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&sai0_lpcg 1>,
+ <&clk_dummy>,
+ <&sai0_lpcg 0>,
+ <&clk_dummy>,
+ <&clk_dummy>;
+ clock-names = "bus", "mclk0", "mclk1", "mclk2", "mclk3";
+ dma-names = "rx", "tx";
+ dmas = <&edma0 12 0 1>, <&edma0 13 0 0>;
+ power-domains = <&pd IMX_SC_R_SAI_0>;
+ status = "disabled";
+ };
+
+ sai1: sai@59050000 {
+ compatible = "fsl,imx8qm-sai";
+ reg = <0x59050000 0x10000>;
+ interrupts = <GIC_SPI 316 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&sai1_lpcg 1>,
+ <&clk_dummy>,
+ <&sai1_lpcg 0>,
+ <&clk_dummy>,
+ <&clk_dummy>;
+ clock-names = "bus", "mclk0", "mclk1", "mclk2", "mclk3";
+ dma-names = "rx", "tx";
+ dmas = <&edma0 14 0 1>, <&edma0 15 0 0>;
+ power-domains = <&pd IMX_SC_R_SAI_1>;
+ status = "disabled";
+ };
+
+ sai2: sai@59060000 {
+ compatible = "fsl,imx8qm-sai";
+ reg = <0x59060000 0x10000>;
+ interrupts = <GIC_SPI 318 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&sai2_lpcg 1>,
+ <&clk_dummy>,
+ <&sai2_lpcg 0>,
+ <&clk_dummy>,
+ <&clk_dummy>;
+ clock-names = "bus", "mclk0", "mclk1", "mclk2", "mclk3";
+ dma-names = "rx";
+ dmas = <&edma0 16 0 1>;
+ power-domains = <&pd IMX_SC_R_SAI_2>;
+ status = "disabled";
+ };
+
+ sai3: sai@59070000 {
+ compatible = "fsl,imx8qm-sai";
+ reg = <0x59070000 0x10000>;
+ interrupts = <GIC_SPI 323 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&sai3_lpcg 1>,
+ <&clk_dummy>,
+ <&sai3_lpcg 0>,
+ <&clk_dummy>,
+ <&clk_dummy>;
+ clock-names = "bus", "mclk0", "mclk1", "mclk2", "mclk3";
+ dma-names = "rx";
+ dmas = <&edma0 17 0 1>;
+ power-domains = <&pd IMX_SC_R_SAI_3>;
+ status = "disabled";
+ };
+
edma0: dma-controller@591f0000 {
compatible = "fsl,imx8qm-edma";
reg = <0x591f0000 0x190000>;
@@ -76,6 +239,54 @@ audio_subsys: bus@59000000 {
<&pd IMX_SC_R_DMA_0_CH23>;
};
+ sai0_lpcg: clock-controller@59440000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x59440000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&acm IMX_ADMA_ACM_SAI0_MCLK_SEL>,
+ <&audio_ipg_clk>;
+ clock-indices = <IMX_LPCG_CLK_0>, <IMX_LPCG_CLK_4>;
+ clock-output-names = "sai0_lpcg_mclk",
+ "sai0_lpcg_ipg_clk";
+ power-domains = <&pd IMX_SC_R_SAI_0>;
+ };
+
+ sai1_lpcg: clock-controller@59450000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x59450000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&acm IMX_ADMA_ACM_SAI1_MCLK_SEL>,
+ <&audio_ipg_clk>;
+ clock-indices = <IMX_LPCG_CLK_0>, <IMX_LPCG_CLK_4>;
+ clock-output-names = "sai1_lpcg_mclk",
+ "sai1_lpcg_ipg_clk";
+ power-domains = <&pd IMX_SC_R_SAI_1>;
+ };
+
+ sai2_lpcg: clock-controller@59460000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x59460000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&acm IMX_ADMA_ACM_SAI2_MCLK_SEL>,
+ <&audio_ipg_clk>;
+ clock-indices = <IMX_LPCG_CLK_0>, <IMX_LPCG_CLK_4>;
+ clock-output-names = "sai2_lpcg_mclk",
+ "sai2_lpcg_ipg_clk";
+ power-domains = <&pd IMX_SC_R_SAI_2>;
+ };
+
+ sai3_lpcg: clock-controller@59470000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x59470000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&acm IMX_ADMA_ACM_SAI3_MCLK_SEL>,
+ <&audio_ipg_clk>;
+ clock-indices = <IMX_LPCG_CLK_0>, <IMX_LPCG_CLK_4>;
+ clock-output-names = "sai3_lpcg_mclk",
+ "sai3_lpcg_ipg_clk";
+ power-domains = <&pd IMX_SC_R_SAI_3>;
+ };
+
dsp_lpcg: clock-controller@59580000 {
compatible = "fsl,imx8qxp-lpcg";
reg = <0x59580000 0x10000>;
@@ -151,4 +362,123 @@ audio_subsys: bus@59000000 {
<&pd IMX_SC_R_DMA_1_CH9>,
<&pd IMX_SC_R_DMA_1_CH10>;
};
+
+ aud_rec0_lpcg: clock-controller@59d00000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x59d00000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&clk IMX_SC_R_AUDIO_PLL_0 IMX_SC_PM_CLK_MST_BUS>;
+ clock-indices = <IMX_LPCG_CLK_0>;
+ clock-output-names = "aud_rec_clk0_lpcg_clk";
+ power-domains = <&pd IMX_SC_R_AUDIO_PLL_0>;
+ };
+
+ aud_rec1_lpcg: clock-controller@59d10000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x59d10000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&clk IMX_SC_R_AUDIO_PLL_1 IMX_SC_PM_CLK_MST_BUS>;
+ clock-indices = <IMX_LPCG_CLK_0>;
+ clock-output-names = "aud_rec_clk1_lpcg_clk";
+ power-domains = <&pd IMX_SC_R_AUDIO_PLL_1>;
+ };
+
+ aud_pll_div0_lpcg: clock-controller@59d20000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x59d20000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&clk IMX_SC_R_AUDIO_PLL_0 IMX_SC_PM_CLK_SLV_BUS>;
+ clock-indices = <IMX_LPCG_CLK_0>;
+ clock-output-names = "aud_pll_div_clk0_lpcg_clk";
+ power-domains = <&pd IMX_SC_R_AUDIO_PLL_0>;
+ };
+
+ aud_pll_div1_lpcg: clock-controller@59d30000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x59d30000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&clk IMX_SC_R_AUDIO_PLL_1 IMX_SC_PM_CLK_SLV_BUS>;
+ clock-indices = <IMX_LPCG_CLK_0>;
+ clock-output-names = "aud_pll_div_clk1_lpcg_clk";
+ power-domains = <&pd IMX_SC_R_AUDIO_PLL_1>;
+ };
+
+ mclkout0_lpcg: clock-controller@59d50000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x59d50000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&acm IMX_ADMA_ACM_MCLKOUT0_SEL>;
+ clock-indices = <IMX_LPCG_CLK_0>;
+ clock-output-names = "mclkout0_lpcg_clk";
+ power-domains = <&pd IMX_SC_R_MCLK_OUT_0>;
+ };
+
+ mclkout1_lpcg: clock-controller@59d60000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x59d60000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&acm IMX_ADMA_ACM_MCLKOUT1_SEL>;
+ clock-indices = <IMX_LPCG_CLK_0>;
+ clock-output-names = "mclkout1_lpcg_clk";
+ power-domains = <&pd IMX_SC_R_MCLK_OUT_1>;
+ };
+
+ acm: acm@59e00000 {
+ compatible = "fsl,imx8qxp-acm";
+ reg = <0x59e00000 0x1d0000>;
+ #clock-cells = <1>;
+ power-domains = <&pd IMX_SC_R_AUDIO_CLK_0>,
+ <&pd IMX_SC_R_AUDIO_CLK_1>,
+ <&pd IMX_SC_R_MCLK_OUT_0>,
+ <&pd IMX_SC_R_MCLK_OUT_1>,
+ <&pd IMX_SC_R_AUDIO_PLL_0>,
+ <&pd IMX_SC_R_AUDIO_PLL_1>,
+ <&pd IMX_SC_R_ASRC_0>,
+ <&pd IMX_SC_R_ASRC_1>,
+ <&pd IMX_SC_R_ESAI_0>,
+ <&pd IMX_SC_R_SAI_0>,
+ <&pd IMX_SC_R_SAI_1>,
+ <&pd IMX_SC_R_SAI_2>,
+ <&pd IMX_SC_R_SAI_3>,
+ <&pd IMX_SC_R_SAI_4>,
+ <&pd IMX_SC_R_SAI_5>,
+ <&pd IMX_SC_R_SPDIF_0>,
+ <&pd IMX_SC_R_MQS_0>;
+ clocks = <&aud_rec0_lpcg IMX_LPCG_CLK_0>,
+ <&aud_rec1_lpcg IMX_LPCG_CLK_0>,
+ <&aud_pll_div0_lpcg IMX_LPCG_CLK_0>,
+ <&aud_pll_div1_lpcg IMX_LPCG_CLK_0>,
+ <&clk_ext_aud_mclk0>,
+ <&clk_ext_aud_mclk1>,
+ <&clk_esai0_rx_clk>,
+ <&clk_esai0_rx_hf_clk>,
+ <&clk_esai0_tx_clk>,
+ <&clk_esai0_tx_hf_clk>,
+ <&clk_spdif0_rx>,
+ <&clk_sai0_rx_bclk>,
+ <&clk_sai0_tx_bclk>,
+ <&clk_sai1_rx_bclk>,
+ <&clk_sai1_tx_bclk>,
+ <&clk_sai2_rx_bclk>,
+ <&clk_sai3_rx_bclk>,
+ <&clk_sai4_rx_bclk>;
+ clock-names = "aud_rec_clk0_lpcg_clk",
+ "aud_rec_clk1_lpcg_clk",
+ "aud_pll_div_clk0_lpcg_clk",
+ "aud_pll_div_clk1_lpcg_clk",
+ "ext_aud_mclk0",
+ "ext_aud_mclk1",
+ "esai0_rx_clk",
+ "esai0_rx_hf_clk",
+ "esai0_tx_clk",
+ "esai0_tx_hf_clk",
+ "spdif0_rx",
+ "sai0_rx_bclk",
+ "sai0_tx_bclk",
+ "sai1_rx_bclk",
+ "sai1_tx_bclk",
+ "sai2_rx_bclk",
+ "sai3_rx_bclk",
+ "sai4_rx_bclk";
+ };
};
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
index b0bb77150adc..cab3468b1875 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
@@ -5,6 +5,7 @@
*/
#include <dt-bindings/clock/imx8-lpcg.h>
+#include <dt-bindings/dma/fsl-edma.h>
#include <dt-bindings/firmware/imx/rsrc.h>
dma_ipg_clk: clock-dma-ipg {
@@ -93,8 +94,8 @@ dma_subsys: bus@5a000000 {
assigned-clocks = <&clk IMX_SC_R_UART_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <80000000>;
power-domains = <&pd IMX_SC_R_UART_0>;
- dma-names = "tx","rx";
- dmas = <&edma2 9 0 0>, <&edma2 8 0 1>;
+ dma-names = "rx", "tx";
+ dmas = <&edma2 8 0 FSL_EDMA_RX>, <&edma2 9 0 0>;
status = "disabled";
};
@@ -107,8 +108,8 @@ dma_subsys: bus@5a000000 {
assigned-clocks = <&clk IMX_SC_R_UART_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <80000000>;
power-domains = <&pd IMX_SC_R_UART_1>;
- dma-names = "tx","rx";
- dmas = <&edma2 11 0 0>, <&edma2 10 0 1>;
+ dma-names = "rx", "tx";
+ dmas = <&edma2 10 0 FSL_EDMA_RX>, <&edma2 11 0 0>;
status = "disabled";
};
@@ -121,8 +122,8 @@ dma_subsys: bus@5a000000 {
assigned-clocks = <&clk IMX_SC_R_UART_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <80000000>;
power-domains = <&pd IMX_SC_R_UART_2>;
- dma-names = "tx","rx";
- dmas = <&edma2 13 0 0>, <&edma2 12 0 1>;
+ dma-names = "rx", "tx";
+ dmas = <&edma2 12 0 FSL_EDMA_RX>, <&edma2 13 0 0>;
status = "disabled";
};
@@ -135,8 +136,8 @@ dma_subsys: bus@5a000000 {
assigned-clocks = <&clk IMX_SC_R_UART_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <80000000>;
power-domains = <&pd IMX_SC_R_UART_3>;
- dma-names = "tx","rx";
- dmas = <&edma2 15 0 0>, <&edma2 14 0 1>;
+ dma-names = "rx", "tx";
+ dmas = <&edma2 14 0 FSL_EDMA_RX>, <&edma2 15 0 0>;
status = "disabled";
};
@@ -192,29 +193,6 @@ dma_subsys: bus@5a000000 {
<&pd IMX_SC_R_DMA_2_CH15>;
};
- edma3: dma-controller@5a9f0000 {
- compatible = "fsl,imx8qm-edma";
- reg = <0x5a9f0000 0x90000>;
- #dma-cells = <3>;
- dma-channels = <8>;
- interrupts = <GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 426 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 427 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 428 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 429 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 431 IRQ_TYPE_LEVEL_HIGH>;
- power-domains = <&pd IMX_SC_R_DMA_3_CH0>,
- <&pd IMX_SC_R_DMA_3_CH1>,
- <&pd IMX_SC_R_DMA_3_CH2>,
- <&pd IMX_SC_R_DMA_3_CH3>,
- <&pd IMX_SC_R_DMA_3_CH4>,
- <&pd IMX_SC_R_DMA_3_CH5>,
- <&pd IMX_SC_R_DMA_3_CH6>,
- <&pd IMX_SC_R_DMA_3_CH7>;
- };
-
spi0_lpcg: clock-controller@5a400000 {
compatible = "fsl,imx8qxp-lpcg";
reg = <0x5a400000 0x10000>;
@@ -460,6 +438,29 @@ dma_subsys: bus@5a000000 {
status = "disabled";
};
+ edma3: dma-controller@5a9f0000 {
+ compatible = "fsl,imx8qm-edma";
+ reg = <0x5a9f0000 0x90000>;
+ #dma-cells = <3>;
+ dma-channels = <8>;
+ interrupts = <GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 426 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 427 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 428 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 429 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 431 IRQ_TYPE_LEVEL_HIGH>;
+ power-domains = <&pd IMX_SC_R_DMA_3_CH0>,
+ <&pd IMX_SC_R_DMA_3_CH1>,
+ <&pd IMX_SC_R_DMA_3_CH2>,
+ <&pd IMX_SC_R_DMA_3_CH3>,
+ <&pd IMX_SC_R_DMA_3_CH4>,
+ <&pd IMX_SC_R_DMA_3_CH5>,
+ <&pd IMX_SC_R_DMA_3_CH6>,
+ <&pd IMX_SC_R_DMA_3_CH7>;
+ };
+
i2c0_lpcg: clock-controller@5ac00000 {
compatible = "fsl,imx8qxp-lpcg";
reg = <0x5ac00000 0x10000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-gpu0.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-gpu0.dtsi
new file mode 100644
index 000000000000..9b8a44aa63d6
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-gpu0.dtsi
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2019 NXP
+ * Dong Aisheng <aisheng.dong@nxp.com>
+ */
+
+#include <dt-bindings/firmware/imx/rsrc.h>
+
+gpu0_subsys: bus@53000000 {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x53000000 0x0 0x53000000 0x1000000>;
+
+ gpu_3d0: gpu@53100000 {
+ compatible = "vivante,gc";
+ reg = <0x53100000 0x40000>;
+ interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk IMX_SC_R_GPU_0_PID0 IMX_SC_PM_CLK_PER>,
+ <&clk IMX_SC_R_GPU_0_PID0 IMX_SC_PM_CLK_MISC>;
+ clock-names = "core", "shader";
+ assigned-clocks = <&clk IMX_SC_R_GPU_0_PID0 IMX_SC_PM_CLK_PER>,
+ <&clk IMX_SC_R_GPU_0_PID0 IMX_SC_PM_CLK_MISC>;
+ assigned-clock-rates = <700000000>, <850000000>;
+ power-domains = <&pd IMX_SC_R_GPU_0_PID0>;
+ };
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts b/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts
index b972658efb17..2123d431e061 100644
--- a/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts
@@ -81,6 +81,24 @@
status = "disabled";
};
+ reg_can0_stby: regulator-4 {
+ compatible = "regulator-fixed";
+ regulator-name = "can0-stby";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&pca6416_3 0 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ };
+
+ reg_can1_stby: regulator-5 {
+ compatible = "regulator-fixed";
+ regulator-name = "can1-stby";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&pca6416_3 1 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ };
+
reg_usdhc2_vmmc: regulator-3 {
compatible = "regulator-fixed";
regulator-name = "SD1_SPWR";
@@ -261,12 +279,81 @@
};
};
+&i2c3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c3>;
+ status = "okay";
+
+ pca6416_3: gpio@20 {
+ compatible = "ti,tca6416";
+ reg = <0x20>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-parent = <&lsio_gpio2>;
+ interrupts = <5 IRQ_TYPE_EDGE_RISING>;
+ };
+
+ pca9548_2: i2c-mux@70 {
+ compatible = "nxp,pca9548";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0>;
+ };
+
+ i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x1>;
+ };
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x2>;
+ };
+
+ i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x3>;
+ };
+
+ i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x4>;
+ };
+ };
+};
+
&lpuart0 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_lpuart0>;
status = "okay";
};
+&flexcan2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_flexcan2>;
+ xceiver-supply = <&reg_can0_stby>;
+ status = "okay";
+};
+
+&flexcan3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_flexcan3>;
+ xceiver-supply = <&reg_can1_stby>;
+ status = "okay";
+};
+
&lsio_gpio4 {
status = "okay";
};
@@ -436,6 +523,20 @@
>;
};
+ pinctrl_flexcan2: flexcan2grp {
+ fsl,pins = <
+ IMX8DXL_UART2_TX_ADMA_FLEXCAN1_TX 0x00000021
+ IMX8DXL_UART2_RX_ADMA_FLEXCAN1_RX 0x00000021
+ >;
+ };
+
+ pinctrl_flexcan3: flexcan3grp {
+ fsl,pins = <
+ IMX8DXL_FLEXCAN2_TX_ADMA_FLEXCAN2_TX 0x00000021
+ IMX8DXL_FLEXCAN2_RX_ADMA_FLEXCAN2_RX 0x00000021
+ >;
+ };
+
pinctrl_fec1: fec1grp {
fsl,pins = <
IMX8DXL_COMP_CTL_GPIO_1V8_3V3_ENET_ENETB0_PAD 0x000014a0
diff --git a/arch/arm64/boot/dts/freescale/imx8dxl-ss-adma.dtsi b/arch/arm64/boot/dts/freescale/imx8dxl-ss-adma.dtsi
index 0a477f6318f1..5d012c95222f 100644
--- a/arch/arm64/boot/dts/freescale/imx8dxl-ss-adma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8dxl-ss-adma.dtsi
@@ -15,6 +15,63 @@
interrupts = <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>;
};
+&edma0 {
+ reg = <0x591f0000 0x1a0000>;
+ #dma-cells = <3>;
+ dma-channels = <25>;
+ dma-channel-mask = <0x1c0cc0>;
+ interrupts = <GIC_SPI 262 IRQ_TYPE_LEVEL_HIGH>, /* asrc 0 */
+ <GIC_SPI 263 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 264 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 265 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 327 IRQ_TYPE_LEVEL_HIGH>, /* spdif0 */
+ <GIC_SPI 329 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>, /* sai0 */
+ <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>, /* sai1 */
+ <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>, /* sai2 */
+ <GIC_SPI 199 IRQ_TYPE_LEVEL_HIGH>, /* sai3 */
+ <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>, /* gpt0 */
+ <GIC_SPI 269 IRQ_TYPE_LEVEL_HIGH>, /* gpt1 */
+ <GIC_SPI 270 IRQ_TYPE_LEVEL_HIGH>, /* gpt2 */
+ <GIC_SPI 271 IRQ_TYPE_LEVEL_HIGH>; /* gpt3 */
+ power-domains = <&pd IMX_SC_R_DMA_0_CH0>,
+ <&pd IMX_SC_R_DMA_0_CH1>,
+ <&pd IMX_SC_R_DMA_0_CH2>,
+ <&pd IMX_SC_R_DMA_0_CH3>,
+ <&pd IMX_SC_R_DMA_0_CH4>,
+ <&pd IMX_SC_R_DMA_0_CH5>,
+ <&pd IMX_SC_R_DMA_0_CH6>,
+ <&pd IMX_SC_R_DMA_0_CH7>,
+ <&pd IMX_SC_R_DMA_0_CH8>,
+ <&pd IMX_SC_R_DMA_0_CH9>,
+ <&pd IMX_SC_R_DMA_0_CH10>,
+ <&pd IMX_SC_R_DMA_0_CH11>,
+ <&pd IMX_SC_R_DMA_0_CH12>,
+ <&pd IMX_SC_R_DMA_0_CH13>,
+ <&pd IMX_SC_R_DMA_0_CH14>,
+ <&pd IMX_SC_R_DMA_0_CH15>,
+ <&pd IMX_SC_R_DMA_0_CH16>,
+ <&pd IMX_SC_R_DMA_0_CH17>,
+ <&pd IMX_SC_R_DMA_0_CH18>,
+ <&pd IMX_SC_R_DMA_0_CH19>,
+ <&pd IMX_SC_R_DMA_0_CH20>,
+ <&pd IMX_SC_R_DMA_0_CH21>,
+ <&pd IMX_SC_R_DMA_0_CH22>,
+ <&pd IMX_SC_R_DMA_0_CH23>,
+ <&pd IMX_SC_R_DMA_0_CH24>;
+};
+
&edma2 {
interrupts = <GIC_SPI 288 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 289 IRQ_TYPE_LEVEL_HIGH>,
@@ -45,24 +102,44 @@
<GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>;
};
+&flexcan1 {
+ interrupts = <GIC_SPI 238 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&flexcan2 {
+ interrupts = <GIC_SPI 239 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&flexcan3 {
+ interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
+};
+
&i2c0 {
compatible = "fsl,imx8dxl-lpi2c", "fsl,imx7ulp-lpi2c";
interrupts = <GIC_SPI 222 IRQ_TYPE_LEVEL_HIGH>;
+ dma-names = "tx","rx";
+ dmas = <&edma3 1 0 0>, <&edma3 0 0 FSL_EDMA_RX>;
};
&i2c1 {
compatible = "fsl,imx8dxl-lpi2c", "fsl,imx7ulp-lpi2c";
interrupts = <GIC_SPI 223 IRQ_TYPE_LEVEL_HIGH>;
+ dma-names = "tx","rx";
+ dmas = <&edma3 3 0 0>, <&edma3 2 0 FSL_EDMA_RX>;
};
&i2c2 {
compatible = "fsl,imx8dxl-lpi2c", "fsl,imx7ulp-lpi2c";
interrupts = <GIC_SPI 224 IRQ_TYPE_LEVEL_HIGH>;
+ dma-names = "tx","rx";
+ dmas = <&edma3 5 0 0>, <&edma3 4 0 FSL_EDMA_RX>;
};
&i2c3 {
compatible = "fsl,imx8dxl-lpi2c", "fsl,imx7ulp-lpi2c";
interrupts = <GIC_SPI 225 IRQ_TYPE_LEVEL_HIGH>;
+ dma-names = "tx","rx";
+ dmas = <&edma3 7 0 0>, <&edma3 6 0 FSL_EDMA_RX>;
};
&lpuart0 {
diff --git a/arch/arm64/boot/dts/freescale/imx8dxl.dtsi b/arch/arm64/boot/dts/freescale/imx8dxl.dtsi
index f580eb6db9a6..a0674c5c5576 100644
--- a/arch/arm64/boot/dts/freescale/imx8dxl.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8dxl.dtsi
@@ -4,6 +4,7 @@
*/
#include <dt-bindings/clock/imx8-clock.h>
+#include <dt-bindings/dma/fsl-edma.h>
#include <dt-bindings/firmware/imx/rsrc.h>
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
diff --git a/arch/arm64/boot/dts/freescale/imx8dxp-tqma8xdp-mba8xx.dts b/arch/arm64/boot/dts/freescale/imx8dxp-tqma8xdp-mba8xx.dts
new file mode 100644
index 000000000000..f35514b7b338
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8dxp-tqma8xdp-mba8xx.dts
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR X11)
+/*
+ * Copyright 2018-2023 TQ-Systems GmbH <linux@ew.tq-group.com>,
+ * D-82229 Seefeld, Germany.
+ * Author: Alexander Stein
+ */
+
+/dts-v1/;
+
+#include "imx8dxp-tqma8xdp.dtsi"
+#include "mba8xx.dtsi"
+
+/ {
+ model = "TQ-Systems i.MX8DXP TQMa8XDP on MBa8Xx";
+ compatible = "tq,imx8dxp-tqma8xdp-mba8xx", "tq,imx8dxp-tqma8xdp", "fsl,imx8dxp";
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8dxp-tqma8xdp.dtsi b/arch/arm64/boot/dts/freescale/imx8dxp-tqma8xdp.dtsi
new file mode 100644
index 000000000000..e2de8517aa0e
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8dxp-tqma8xdp.dtsi
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR X11)
+/*
+ * Copyright 2018-2023 TQ-Systems GmbH <linux@ew.tq-group.com>,
+ * D-82229 Seefeld, Germany.
+ * Author: Alexander Stein
+ */
+
+#include "imx8dxp.dtsi"
+#include "tqma8xx.dtsi"
+
+/ {
+ model = "TQ-Systems i.MX8DXP TQMa8XDP";
+ compatible = "tq,imx8dxp-tqma8xdp", "fsl,imx8dxp";
+};
+
+&pmic_thermal {
+ cooling-maps {
+ map0 {
+ cooling-device =
+ <&A35_0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&A35_1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8dxp.dtsi b/arch/arm64/boot/dts/freescale/imx8dxp.dtsi
new file mode 100644
index 000000000000..a8f7352332c0
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8dxp.dtsi
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017-2019 NXP
+ */
+
+/dts-v1/;
+
+#include "imx8qxp.dtsi"
+
+/delete-node/ &A35_2;
+/delete-node/ &A35_3;
+
+&thermal_zones {
+ cpu0-thermal {
+ cooling-maps {
+ map0 {
+ cooling-device =
+ <&A35_0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&A35_1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
index b53104ed8919..bd5b365867fd 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
@@ -151,6 +151,28 @@
clocks = <&clk IMX8MM_CLK_SAI3_ROOT>;
};
};
+
+ sound-micfil {
+ compatible = "fsl,imx-audio-card";
+ model = "micfil-audio";
+
+ pri-dai-link {
+ link-name = "micfil hifi";
+ format = "i2s";
+
+ cpu {
+ sound-dai = <&micfil>;
+ };
+ };
+ };
+
+ sound-spdif {
+ compatible = "fsl,imx-audio-spdif";
+ model = "imx-spdif";
+ spdif-controller = <&spdif1>;
+ spdif-out;
+ spdif-in;
+ };
};
&A53_0 {
@@ -434,6 +456,16 @@
status = "okay";
};
+&micfil {
+ #sound-dai-cells = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pdm>;
+ assigned-clocks = <&clk IMX8MM_CLK_PDM>;
+ assigned-clock-parents = <&clk IMX8MM_AUDIO_PLL1_OUT>;
+ assigned-clock-rates = <196608000>;
+ status = "okay";
+};
+
&mipi_csi {
status = "okay";
@@ -509,6 +541,24 @@
status = "okay";
};
+&spdif1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_spdif1>;
+ assigned-clocks = <&clk IMX8MM_CLK_SPDIF1>;
+ assigned-clock-parents = <&clk IMX8MM_AUDIO_PLL1_OUT>;
+ assigned-clock-rates = <24576000>;
+ clocks = <&clk IMX8MM_CLK_AUDIO_AHB>, <&clk IMX8MM_CLK_24M>,
+ <&clk IMX8MM_CLK_SPDIF1>, <&clk IMX8MM_CLK_DUMMY>,
+ <&clk IMX8MM_CLK_DUMMY>, <&clk IMX8MM_CLK_DUMMY>,
+ <&clk IMX8MM_CLK_AUDIO_AHB>, <&clk IMX8MM_CLK_DUMMY>,
+ <&clk IMX8MM_CLK_DUMMY>, <&clk IMX8MM_CLK_DUMMY>,
+ <&clk IMX8MM_AUDIO_PLL1_OUT>, <&clk IMX8MM_AUDIO_PLL2_OUT>;
+ clock-names = "core", "rxtx0", "rxtx1", "rxtx2", "rxtx3",
+ "rxtx4", "rxtx5", "rxtx6", "rxtx7", "spba",
+ "pll8k", "pll11k";
+ status = "okay";
+};
+
&uart2 { /* console */
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_uart2>;
@@ -636,6 +686,18 @@
>;
};
+ pinctrl_pdm: pdmgrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SAI5_MCLK_SAI5_MCLK 0xd6
+ MX8MM_IOMUXC_SAI5_RXC_PDM_CLK 0xd6
+ MX8MM_IOMUXC_SAI5_RXFS_SAI5_RX_SYNC 0xd6
+ MX8MM_IOMUXC_SAI5_RXD0_PDM_DATA0 0xd6
+ MX8MM_IOMUXC_SAI5_RXD1_PDM_DATA1 0xd6
+ MX8MM_IOMUXC_SAI5_RXD2_PDM_DATA2 0xd6
+ MX8MM_IOMUXC_SAI5_RXD3_PDM_DATA3 0xd6
+ >;
+ };
+
pinctrl_pmic: pmicirqgrp {
fsl,pins = <
MX8MM_IOMUXC_GPIO1_IO03_GPIO1_IO3 0x141
@@ -666,6 +728,13 @@
>;
};
+ pinctrl_spdif1: spdif1grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SPDIF_TX_SPDIF1_OUT 0xd6
+ MX8MM_IOMUXC_SPDIF_RX_SPDIF1_IN 0xd6
+ >;
+ };
+
pinctrl_typec1: typec1grp {
fsl,pins = <
MX8MM_IOMUXC_SD1_STROBE_GPIO2_IO11 0x159
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts
index 8b16bd68576c..33f8d7d1970e 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts
@@ -25,23 +25,21 @@
leds {
compatible = "gpio-leds";
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_gpio_led>;
led1 {
label = "led1";
- gpios = <&gpio1 12 GPIO_ACTIVE_LOW>;
+ gpios = <&gpio1 5 GPIO_ACTIVE_LOW>;
linux,default-trigger = "heartbeat";
};
led2 {
label = "led2";
- gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
+ gpios = <&gpio3 8 GPIO_ACTIVE_LOW>;
};
led3 {
label = "led3";
- gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
+ gpios = <&gpio3 7 GPIO_ACTIVE_LOW>;
};
};
@@ -52,24 +50,12 @@
reg_rst_eth2: regulator-rst-eth2 {
compatible = "regulator-fixed";
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_usb_eth2>;
- gpio = <&gpio3 2 GPIO_ACTIVE_HIGH>;
+ gpio = <&gpio1 1 GPIO_ACTIVE_HIGH>;
enable-active-high;
regulator-always-on;
regulator-name = "rst-usb-eth2";
};
- reg_usb1_vbus: regulator-usb1-vbus {
- compatible = "regulator-fixed";
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_reg_usb1_vbus>;
- gpio = <&gpio3 25 GPIO_ACTIVE_LOW>;
- regulator-min-microvolt = <5000000>;
- regulator-max-microvolt = <5000000>;
- regulator-name = "usb1-vbus";
- };
-
reg_vdd_5v: regulator-5v {
compatible = "regulator-fixed";
regulator-always-on;
@@ -80,9 +66,6 @@
};
&ecspi2 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_ecspi2>;
- cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>;
status = "okay";
can@0 {
@@ -91,7 +74,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_can>;
clocks = <&osc_can>;
- interrupts-extended = <&gpio4 28 IRQ_TYPE_LEVEL_LOW>;
+ interrupts-extended = <&gpio5 1 IRQ_TYPE_LEVEL_LOW>;
/*
* Limit the SPI clock to 15 MHz to prevent issues
* with corrupted data due to chip errata.
@@ -103,9 +86,6 @@
};
&ecspi3 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_ecspi3>;
- cs-gpios = <&gpio5 25 GPIO_ACTIVE_LOW>;
status = "okay";
eeram@0 {
@@ -117,8 +97,8 @@
&fec1 {
pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_enet>;
- phy-connection-type = "rgmii-rxid";
+ pinctrl-0 = <&pinctrl_enet_rgmii>;
+ phy-connection-type = "rgmii-id";
phy-handle = <&ethphy>;
status = "okay";
@@ -127,55 +107,101 @@
#size-cells = <0>;
ethphy: ethernet-phy@0 {
+ compatible = "ethernet-phy-id4f51.e91b";
reg = <0>;
- reset-assert-us = <1>;
- reset-deassert-us = <15000>;
- reset-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <10000>;
+ reset-gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
};
};
};
+/*
+ * Rename SoM signals according to board usage:
+ * GPIO_B_0 -> DIO1_OUT
+ * GPIO_B_1 -> DIO2_OUT
+ */
&gpio1 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_gpio1>;
- gpio-line-names = "", "", "", "dio1-out", "", "", "dio1-in", "dio2-out",
- "dio2-in", "dio3-out", "dio3-in", "dio4-out", "", "", "", "",
- "", "", "", "", "", "", "", "",
- "", "", "", "", "", "", "", "";
+ gpio-line-names = "", "GPIO_A_0", "", "GPIO_A_1",
+ "", "GPIO_A_2", "GPIO_A_3", "GPIO_A_4",
+ "GPIO_A_5", "GPIO_A_6", "GPIO_A_7", "DIO1_OUT",
+ "DIO2_OUT", "USB_A_OC#", "CAM_MCK", "USB_B_OC#",
+ "ETH_MDC", "ETH_MDIO", "ETH_A_(S)(R)(G)MII_TXD3",
+ "ETH_A_(S)(R)(G)MII_TXD2", "ETH_A_(S)(R)(G)MII_TXD1",
+ "ETH_A_(S)(R)(G)MII_TXD0", "ETH_A_(R)(G)MII_TX_EN(_ER)",
+ "ETH_A_(R)(G)MII_TX_CLK", "ETH_A_(R)(G)MII_RX_DV(_ER)",
+ "ETH_A_(R)(G)MII_RX_CLK", "ETH_A_(S)(R)(G)MII_RXD0",
+ "ETH_A_(S)(R)(G)MII_RXD1", "ETH_A_(R)(G)MII_RXD2",
+ "ETH_A_(R)(G)MII_RXD3";
};
-&gpio5 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_gpio5>;
- gpio-line-names = "", "", "dio4-in", "", "", "", "", "",
- "", "", "", "", "", "", "", "",
- "", "", "", "", "", "", "", "",
- "", "", "", "", "", "", "", "";
+/*
+ * Rename SoM signals according to board usage:
+ * GPIO_B_2 -> DIO3_OUT
+ * GPIO_B_3 -> DIO4_OUT
+ */
+&gpio3 {
+ gpio-line-names = "GPIO_C_5", "GPIO_C_4", "SDIO_B_CD#", "SDIO_B_D5",
+ "SDIO_B_D6", "SDIO_B_D7", "GPIO_C_0", "GPIO_C_1",
+ "GPIO_C_2", "GPIO_C_3", "SDIO_B_D0", "SDIO_B_D1",
+ "SDIO_B_D2", "SDIO_B_D3", "", "SDIO_B_D4",
+ "CARRIER_PWR_EN", "SDIO_B_CLK", "SDIO_B_CMD", "DIO3_OUT",
+ "USB_B_EN", "DIO4_OUT", "PCIe_CLKREQ#", "PCIe_A_PERST#",
+ "PCIe_WAKE#", "USB_A_EN";
};
-&i2c4 {
- clock-frequency = <100000>;
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_i2c4>;
+/*
+ * Rename SoM signals according to board usage:
+ * GPIO_B_4 -> DIO1_IN
+ * GPIO_B_5 -> DIO2_IN
+ * GPIO_B_6 -> DIO3_IN
+ * GPIO_B_7 -> DIO4_IN
+ */
+&gpio4 {
+ gpio-line-names = "GPIO_C_7", "", "I2S_A_DATA_IN", "I2S_B_DATA_IN",
+ "DIO1_IN", "BOOT_SEL0#", "BOOT_SEL1#", "",
+ "", "", "I2S_LRCLK", "I2S_BITCLK",
+ "I2S_A_DATA_OUT", "I2S_B_DATA_OUT", "DIO2_IN", "DIO3_IN",
+ "DIO4_IN", "SPI_A_/WP_(IO2)", "SPI_A_/HOLD_(IO3)", "GPIO_C_6",
+ "I2S_MCLK", "UART_A_TX", "UART_A_RX", "UART_A_CTS",
+ "UART_A_RTS", "", "", "",
+ "PCIe_SM_ALERT", "UART_B_RTS", "UART_B_CTS", "UART_B_RX";
+};
+
+&i2c3 {
status = "okay";
+
+ usb-hub@2c {
+ compatible = "microchip,usb2514b";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb_hub>;
+ reg = <0x2c>;
+ non-removable-ports = <0>, <3>;
+ reset-gpios = <&gpio5 2 GPIO_ACTIVE_LOW>;
+ };
};
&pwm2 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_pwm2>;
status = "okay";
};
+&reg_usb2_vbus {
+ status = "disabled";
+};
+
+&reg_usdhc2_vcc {
+ status = "disabled";
+};
+
+&reg_usdhc3_vcc {
+ status = "disabled";
+};
+
&uart1 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_uart1>;
uart-has-rtscts;
status = "okay";
};
&uart2 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_uart2>;
linux,rs485-enabled-at-boot-time;
uart-has-rtscts;
status = "okay";
@@ -183,8 +209,6 @@
&usbotg1 {
dr_mode = "otg";
- disable-over-current;
- vbus-supply = <&reg_usb1_vbus>;
status = "okay";
};
@@ -195,14 +219,17 @@
#size-cells = <0>;
status = "okay";
+ /* VBUS is controlled by the hub */
+ /delete-property/ vbus-supply;
+
usb1@1 {
- compatible = "usb424,9514";
+ compatible = "usb424,2514";
reg = <1>;
#address-cells = <1>;
#size-cells = <0>;
usbnet: ethernet@1 {
- compatible = "usb424,ec00";
+ compatible = "usbb95,772b";
reg = <1>;
local-mac-address = [ 00 00 00 00 00 00 ];
};
@@ -210,167 +237,20 @@
};
&usdhc2 {
- pinctrl-names = "default", "state_100mhz", "state_200mhz";
- pinctrl-0 = <&pinctrl_usdhc2>;
- pinctrl-1 = <&pinctrl_usdhc2_100mhz>;
- pinctrl-2 = <&pinctrl_usdhc2_200mhz>;
vmmc-supply = <&reg_vdd_3v3>;
- vqmmc-supply = <&reg_nvcc_sd>;
- cd-gpios = <&gpio2 12 GPIO_ACTIVE_LOW>;
status = "okay";
};
&iomuxc {
pinctrl_can: cangrp {
fsl,pins = <
- MX8MM_IOMUXC_SAI3_RXFS_GPIO4_IO28 0x19
- >;
- };
-
- pinctrl_ecspi2: ecspi2grp {
- fsl,pins = <
- MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82
- MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82
- MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82
- MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x19
- >;
- };
-
- pinctrl_ecspi3: ecspi3grp {
- fsl,pins = <
- MX8MM_IOMUXC_UART2_RXD_ECSPI3_MISO 0x82
- MX8MM_IOMUXC_UART1_TXD_ECSPI3_MOSI 0x82
- MX8MM_IOMUXC_UART1_RXD_ECSPI3_SCLK 0x82
- MX8MM_IOMUXC_UART2_TXD_GPIO5_IO25 0x19
- >;
- };
-
- pinctrl_enet: enetgrp {
- fsl,pins = <
- MX8MM_IOMUXC_ENET_MDC_ENET1_MDC 0x3
- MX8MM_IOMUXC_ENET_MDIO_ENET1_MDIO 0x3
- MX8MM_IOMUXC_ENET_TD3_ENET1_RGMII_TD3 0x1f
- MX8MM_IOMUXC_ENET_TD2_ENET1_RGMII_TD2 0x1f
- MX8MM_IOMUXC_ENET_TD1_ENET1_RGMII_TD1 0x1f
- MX8MM_IOMUXC_ENET_TD0_ENET1_RGMII_TD0 0x1f
- MX8MM_IOMUXC_ENET_RD3_ENET1_RGMII_RD3 0x91
- MX8MM_IOMUXC_ENET_RD2_ENET1_RGMII_RD2 0x91
- MX8MM_IOMUXC_ENET_RD1_ENET1_RGMII_RD1 0x91
- MX8MM_IOMUXC_ENET_RD0_ENET1_RGMII_RD0 0x91
- MX8MM_IOMUXC_ENET_TXC_ENET1_RGMII_TXC 0x1f
- MX8MM_IOMUXC_ENET_RXC_ENET1_RGMII_RXC 0x91
- MX8MM_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL 0x91
- MX8MM_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL 0x1f
- MX8MM_IOMUXC_GPIO1_IO01_GPIO1_IO1 0x19 /* PHY RST */
- MX8MM_IOMUXC_GPIO1_IO05_GPIO1_IO5 0x19 /* ETH IRQ */
- >;
- };
-
- pinctrl_gpio_led: gpioledgrp {
- fsl,pins = <
- MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x19
- MX8MM_IOMUXC_GPIO1_IO13_GPIO1_IO13 0x19
- MX8MM_IOMUXC_GPIO1_IO14_GPIO1_IO14 0x19
- >;
- };
-
- pinctrl_gpio1: gpio1grp {
- fsl,pins = <
- MX8MM_IOMUXC_GPIO1_IO03_GPIO1_IO3 0x19
- MX8MM_IOMUXC_GPIO1_IO07_GPIO1_IO7 0x19
- MX8MM_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x19
- MX8MM_IOMUXC_GPIO1_IO11_GPIO1_IO11 0x19
- MX8MM_IOMUXC_GPIO1_IO06_GPIO1_IO6 0x19
- MX8MM_IOMUXC_GPIO1_IO08_GPIO1_IO8 0x19
- MX8MM_IOMUXC_GPIO1_IO10_GPIO1_IO10 0x19
- >;
- };
-
- pinctrl_gpio5: gpio5grp {
- fsl,pins = <
- MX8MM_IOMUXC_SAI3_MCLK_GPIO5_IO2 0x19
- >;
- };
-
- pinctrl_i2c4: i2c4grp {
- fsl,pins = <
- MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x400001c3
- MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x400001c3
- >;
- };
-
- pinctrl_pwm2: pwm2grp {
- fsl,pins = <
- MX8MM_IOMUXC_SPDIF_RX_PWM2_OUT 0x19
- >;
- };
-
- pinctrl_reg_usb1_vbus: regusb1vbusgrp {
- fsl,pins = <
- MX8MM_IOMUXC_SAI5_MCLK_GPIO3_IO25 0x19
- >;
- };
-
- pinctrl_uart1: uart1grp {
- fsl,pins = <
- MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x140
- MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x140
- MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x140
- MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x140
- >;
- };
-
- pinctrl_uart2: uart2grp {
- fsl,pins = <
- MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x140
- MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x140
- MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x140
- MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x140
- >;
- };
-
- pinctrl_usb_eth2: usbeth2grp {
- fsl,pins = <
- MX8MM_IOMUXC_NAND_CE1_B_GPIO3_IO2 0x19
- >;
- };
-
- pinctrl_usdhc2: usdhc2grp {
- fsl,pins = <
- MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x190
- MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d0
- MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d0
- MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d0
- MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d0
- MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d0
- MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019
- MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0
- >;
- };
-
- pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp {
- fsl,pins = <
- MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x194
- MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d4
- MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d4
- MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d4
- MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d4
- MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d4
- MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019
- MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0
+ MX8MM_IOMUXC_SAI3_TXD_GPIO5_IO1 0x19 /* SDIO_B_PWR_EN */
>;
};
- pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp {
+ pinctrl_usb_hub: usbhubgrp {
fsl,pins = <
- MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x196
- MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d6
- MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d6
- MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d6
- MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d6
- MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d6
- MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019
- MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0
+ MX8MM_IOMUXC_SAI3_MCLK_GPIO5_IO2 0x19 /* SDIO_B_WP */
>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts
index dcec57c20399..aab8e2421650 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts
@@ -279,8 +279,8 @@
pinctrl_i2c4: i2c4grp {
fsl,pins = <
- MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x400001c3
- MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x400001c3
+ MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x40000083
+ MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x40000083
>;
};
@@ -292,19 +292,19 @@
pinctrl_uart1: uart1grp {
fsl,pins = <
- MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x140
- MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x140
- MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x140
- MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x140
+ MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x0
+ MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x0
+ MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x0
+ MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x0
>;
};
pinctrl_uart2: uart2grp {
fsl,pins = <
- MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x140
- MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x140
- MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x140
- MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x140
+ MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x0
+ MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x0
+ MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x0
+ MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x0
>;
};
@@ -316,40 +316,40 @@
pinctrl_usdhc2: usdhc2grp {
fsl,pins = <
- MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x190
+ MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x90
MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d0
MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d0
MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d0
MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d0
MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d0
- MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019
- MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0
+ MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19
+ MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0
>;
};
pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp {
fsl,pins = <
- MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x194
+ MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x94
MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d4
MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d4
MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d4
MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d4
MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d4
- MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019
- MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0
+ MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19
+ MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0
>;
};
pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp {
fsl,pins = <
- MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x196
+ MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x96
MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d6
MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d6
MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d6
MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d6
MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d6
- MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019
- MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0
+ MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19
+ MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0
>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi
index 6e75ab879bf5..663ae52b4852 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi
@@ -3,6 +3,7 @@
* Copyright (C) 2022 Kontron Electronics GmbH
*/
+#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/interrupt-controller/irq.h>
#include "imx8mm.dtsi"
@@ -28,6 +29,73 @@
chosen {
stdout-path = &uart3;
};
+
+ reg_vdd_carrier: regulator-vdd-carrier {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_vdd_carrier>;
+ gpio = <&gpio3 16 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-name = "VDD_CARRIER";
+
+ regulator-state-standby {
+ regulator-on-in-suspend;
+ };
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+
+ regulator-state-disk {
+ regulator-off-in-suspend;
+ };
+ };
+
+ reg_usb1_vbus: regulator-usb1-vbus {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_usb1_vbus>;
+ enable-active-high;
+ gpio = <&gpio3 25 GPIO_ACTIVE_HIGH>;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-name = "VBUS_USB1";
+ };
+
+ reg_usb2_vbus: regulator-usb2-vbus {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_usb2_vbus>;
+ enable-active-high;
+ gpio = <&gpio3 20 GPIO_ACTIVE_HIGH>;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-name = "VBUS_USB2";
+ };
+
+ reg_usdhc2_vcc: regulator-usdhc2-vcc {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_usdhc2_vcc>;
+ enable-active-high;
+ gpio = <&gpio2 19 GPIO_ACTIVE_HIGH>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "VCC_SDIO_A";
+ };
+
+ reg_usdhc3_vcc: regulator-usdhc3-vcc {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_usdhc3_vcc>;
+ enable-active-high;
+ gpio = <&gpio5 1 GPIO_ACTIVE_HIGH>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "VCC_SDIO_B";
+ };
};
&A53_0 {
@@ -96,6 +164,79 @@
};
};
+&ecspi2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_ecspi2>, <&pinctrl_ecspi2_gpio>;
+ cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>;
+};
+
+&ecspi3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_ecspi3>;
+ cs-gpios = <&gpio5 25 GPIO_ACTIVE_LOW>;
+};
+
+&gpio1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_gpio1>;
+ gpio-line-names = "", "GPIO_A_0", "", "GPIO_A_1",
+ "", "GPIO_A_2", "GPIO_A_3", "GPIO_A_4",
+ "GPIO_A_5", "GPIO_A_6", "GPIO_A_7", "GPIO_B_0",
+ "GPIO_B_1", "USB_A_OC#", "CAM_MCK", "USB_B_OC#",
+ "ETH_MDC", "ETH_MDIO", "ETH_A_(S)(R)(G)MII_TXD3",
+ "ETH_A_(S)(R)(G)MII_TXD2", "ETH_A_(S)(R)(G)MII_TXD1",
+ "ETH_A_(S)(R)(G)MII_TXD0", "ETH_A_(R)(G)MII_TX_EN(_ER)",
+ "ETH_A_(R)(G)MII_TX_CLK", "ETH_A_(R)(G)MII_RX_DV(_ER)",
+ "ETH_A_(R)(G)MII_RX_CLK", "ETH_A_(S)(R)(G)MII_RXD0",
+ "ETH_A_(S)(R)(G)MII_RXD1", "ETH_A_(R)(G)MII_RXD2",
+ "ETH_A_(R)(G)MII_RXD3";
+};
+
+&gpio2 {
+ gpio-line-names = "", "", "", "",
+ "", "", "", "",
+ "", "", "", "",
+ "SDIO_A_CD#", "SDIO_A_CLK", "SDIO_A_CMD", "SDIO_A_D0",
+ "SDIO_A_D1", "SDIO_A_D2", "SDIO_A_D3", "SDIO_A_PWR_EN",
+ "SDIO_A_WP";
+};
+
+&gpio3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_gpio3>;
+ gpio-line-names = "GPIO_C_5", "GPIO_C_4", "SDIO_B_CD#", "SDIO_B_D5",
+ "SDIO_B_D6", "SDIO_B_D7", "GPIO_C_0", "GPIO_C_1",
+ "GPIO_C_2", "GPIO_C_3", "SDIO_B_D0", "SDIO_B_D1",
+ "SDIO_B_D2", "SDIO_B_D3", "", "SDIO_B_D4",
+ "CARRIER_PWR_EN", "SDIO_B_CLK", "SDIO_B_CMD", "GPIO_B_2",
+ "USB_B_EN", "GPIO_B_3", "PCIe_CLKREQ#", "PCIe_A_PERST#",
+ "PCIe_WAKE#", "USB_A_EN";
+};
+
+&gpio4 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_gpio4>;
+ gpio-line-names = "GPIO_C_7", "", "I2S_A_DATA_IN", "I2S_B_DATA_IN",
+ "GPIO_B_4", "BOOT_SEL0#", "BOOT_SEL1#", "",
+ "", "", "I2S_LRCLK", "I2S_BITCLK",
+ "I2S_A_DATA_OUT", "I2S_B_DATA_OUT", "GPIO_B_5", "GPIO_B_6",
+ "GPIO_B_7", "SPI_A_/WP_(IO2)", "SPI_A_/HOLD_(IO3)", "GPIO_C_6",
+ "I2S_MCLK", "UART_A_TX", "UART_A_RX", "UART_A_CTS",
+ "UART_A_RTS", "", "", "",
+ "PCIe_SM_ALERT", "UART_B_RTS", "UART_B_CTS", "UART_B_RX";
+};
+
+&gpio5 {
+ gpio-line-names = "UART_B_TX", "SDIO_B_PWR_EN", "SDIO_B_WP", "PWM_2",
+ "PWM_1", "PWM_0", "", "",
+ "", "", "SPI_A_SCK", "SPI_A_SDO_(IO1)",
+ "SPI_A_SCK", "SPI_A_CS0#", "", "",
+ "I2C_A_SCL", "I2C_A_SDA", "I2C_B_SCL", "I2C_B_SDA",
+ "PCIe_SMCLK", "PCIe_SMDAT", "SPI_B_SCK", "SPI_B_SDO",
+ "SPI_B_SDI", "SPI_B_CS0#", "UART_CON_RX", "UART_CON_TX",
+ "UART_C_RX", "UART_C_TX";
+};
+
&i2c1 {
clock-frequency = <400000>;
pinctrl-names = "default";
@@ -205,22 +346,86 @@
};
};
+ eeprom: eeprom@50 {
+ compatible = "atmel,24c64";
+ reg = <0x50>;
+ address-width = <16>;
+ pagesize = <32>;
+ size = <8192>;
+ };
+
rv3028: rtc@52 {
compatible = "microcrystal,rv3028";
reg = <0x52>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_rtc>;
- interrupts-extended = <&gpio4 1 IRQ_TYPE_LEVEL_HIGH>;
- trickle-diode-disable;
+ interrupts-extended = <&gpio4 1 IRQ_TYPE_LEVEL_LOW>;
};
};
+&i2c2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c2>;
+};
+
+&i2c3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c3>;
+};
+
+&i2c4 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c4>;
+};
+
+&pwm1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm1>;
+};
+
+&pwm2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm2>;
+};
+
+&pwm3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm3>;
+};
+
+&uart1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart1>;
+};
+
+&uart2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart2>;
+};
+
&uart3 { /* console */
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_uart3>;
status = "okay";
};
+&uart4 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart4>;
+};
+
+&usbotg1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb1>;
+ vbus-supply = <&reg_usb1_vbus>;
+};
+
+&usbotg2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb2>;
+ vbus-supply = <&reg_usb2_vbus>;
+};
+
&usdhc1 {
pinctrl-names = "default", "state_100mhz", "state_200mhz";
pinctrl-0 = <&pinctrl_usdhc1>;
@@ -233,6 +438,26 @@
status = "okay";
};
+&usdhc2 {
+ pinctrl-names = "default", "state_100mhz", "state_200mhz";
+ pinctrl-0 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
+ pinctrl-1 = <&pinctrl_usdhc2_100mhz>, <&pinctrl_usdhc2_gpio>;
+ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>;
+ vmmc-supply = <&reg_usdhc2_vcc>;
+ vqmmc-supply = <&reg_nvcc_sd>;
+ cd-gpios = <&gpio2 12 GPIO_ACTIVE_LOW>;
+};
+
+&usdhc3 {
+ pinctrl-names = "default", "state_100mhz", "state_200mhz";
+ pinctrl-0 = <&pinctrl_usdhc3>, <&pinctrl_usdhc3_gpio>;
+ pinctrl-1 = <&pinctrl_usdhc3_100mhz>, <&pinctrl_usdhc3_gpio>;
+ pinctrl-2 = <&pinctrl_usdhc3_200mhz>, <&pinctrl_usdhc3_gpio>;
+ vmmc-supply = <&reg_usdhc3_vcc>;
+ vqmmc-supply = <&reg_nvcc_sd>;
+ cd-gpios = <&gpio3 2 GPIO_ACTIVE_LOW>;
+};
+
&wdog1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_wdog>;
@@ -241,6 +466,12 @@
};
&iomuxc {
+ pinctrl_csi_mck: csimckgrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_GPIO1_IO14_CCMSRCGPCMIX_CLKO1 0x59 /* CAM_MCK */
+ >;
+ };
+
pinctrl_ecspi1: ecspi1grp {
fsl,pins = <
MX8MM_IOMUXC_ECSPI1_MISO_ECSPI1_MISO 0x82
@@ -250,10 +481,140 @@
>;
};
+ pinctrl_ecspi2: ecspi2grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82 /* SPI_A_SDI_(IO0) */
+ MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82 /* SPI_A_SDO_(IO1) */
+ MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82 /* SPI_A_SCK */
+ MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x19 /* SPI_A_CS0# */
+ >;
+ };
+
+ pinctrl_ecspi2_gpio: ecspi2gpiogrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SAI1_TXD5_GPIO4_IO17 0x19 /* SPI_A_/WP_(IO2) */
+ MX8MM_IOMUXC_SAI1_TXD6_GPIO4_IO18 0x19 /* SPI_A_/HOLD_(IO3) */
+ >;
+ };
+
+ pinctrl_ecspi3: ecspi3grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_UART2_RXD_ECSPI3_MISO 0x82 /* SPI_B_SDI */
+ MX8MM_IOMUXC_UART1_TXD_ECSPI3_MOSI 0x82 /* SPI_B_SDO */
+ MX8MM_IOMUXC_UART1_RXD_ECSPI3_SCLK 0x82 /* SPI_B_SCK */
+ MX8MM_IOMUXC_UART2_TXD_GPIO5_IO25 0x19 /* SPI_B_CS0# */
+ >;
+ };
+
+ pinctrl_enet_rgmii: enetrgmiigrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_ENET_MDC_ENET1_MDC 0x03 /* ETH_MDC */
+ MX8MM_IOMUXC_ENET_MDIO_ENET1_MDIO 0x03 /* ETH_MDIO */
+ MX8MM_IOMUXC_ENET_TD3_ENET1_RGMII_TD3 0x1f /* ETH_A_(S)(R)(G)MII_TXD3 */
+ MX8MM_IOMUXC_ENET_TD2_ENET1_RGMII_TD2 0x1f /* ETH_A_(S)(R)(G)MII_TXD2 */
+ MX8MM_IOMUXC_ENET_TD1_ENET1_RGMII_TD1 0x1f /* ETH_A_(S)(R)(G)MII_TXD1 */
+ MX8MM_IOMUXC_ENET_TD0_ENET1_RGMII_TD0 0x1f /* ETH_A_(S)(R)(G)MII_TXD0 */
+ MX8MM_IOMUXC_ENET_RD3_ENET1_RGMII_RD3 0x91 /* ETH_A_(R)(G)MII_RXD3 */
+ MX8MM_IOMUXC_ENET_RD2_ENET1_RGMII_RD2 0x91 /* ETH_A_(R)(G)MII_RXD2 */
+ MX8MM_IOMUXC_ENET_RD1_ENET1_RGMII_RD1 0x91 /* ETH_A_(S)(R)(G)MII_RXD1 */
+ MX8MM_IOMUXC_ENET_RD0_ENET1_RGMII_RD0 0x91 /* ETH_A_(S)(R)(G)MII_RXD0 */
+ MX8MM_IOMUXC_ENET_TXC_ENET1_RGMII_TXC 0x1f /* ETH_A_(R)(G)MII_TX_CLK */
+ MX8MM_IOMUXC_ENET_RXC_ENET1_RGMII_RXC 0x91 /* ETH_A_(R)(G)MII_RX_CLK */
+ MX8MM_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL 0x91 /* ETH_A_(R)(G)MII_RX_DV(_ER) */
+ MX8MM_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL 0x1f /* ETH_A_(R)(G)MII_TX_EN(_ER) */
+ >;
+ };
+
+ pinctrl_enet_rmii: enetrmiigrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_ENET_MDC_ENET1_MDC 0x03 /* ETH_MDC */
+ MX8MM_IOMUXC_ENET_MDIO_ENET1_MDIO 0x03 /* ETH_MDIO */
+ MX8MM_IOMUXC_ENET_TD2_ENET1_TX_CLK 0x4000001f /* ETH_A_(S)(R)(G)MII_TXD2 */
+ MX8MM_IOMUXC_ENET_TD1_ENET1_RGMII_TD1 0x56 /* ETH_A_(S)(R)(G)MII_TXD1 */
+ MX8MM_IOMUXC_ENET_TD0_ENET1_RGMII_TD0 0x56 /* ETH_A_(S)(R)(G)MII_TXD0 */
+ MX8MM_IOMUXC_ENET_RD1_ENET1_RGMII_RD1 0x56 /* ETH_A_(S)(R)(G)MII_RXD1 */
+ MX8MM_IOMUXC_ENET_RD0_ENET1_RGMII_RD0 0x56 /* ETH_A_(S)(R)(G)MII_RXD0 */
+ MX8MM_IOMUXC_ENET_RXC_ENET1_RX_ER 0x56 /* ETH_A_(R)(G)MII_RX_CLK */
+ MX8MM_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL 0x56 /* ETH_A_(R)(G)MII_RX_DV(_ER) */
+ MX8MM_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL 0x56 /* ETH_A_(R)(G)MII_TX_EN(_ER) */
+ >;
+ };
+
+ pinctrl_gpio1: gpio1grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_GPIO1_IO01_GPIO1_IO1 0x19 /* GPIO_A_0 */
+ MX8MM_IOMUXC_GPIO1_IO03_GPIO1_IO3 0x19 /* GPIO_A_1 */
+ MX8MM_IOMUXC_GPIO1_IO05_GPIO1_IO5 0x19 /* GPIO_A_2 */
+ MX8MM_IOMUXC_GPIO1_IO06_GPIO1_IO6 0x19 /* GPIO_A_3 */
+ MX8MM_IOMUXC_GPIO1_IO07_GPIO1_IO7 0x19 /* GPIO_A_4 */
+ MX8MM_IOMUXC_GPIO1_IO08_GPIO1_IO8 0x19 /* GPIO_A_5 */
+ MX8MM_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x19 /* GPIO_A_6 */
+ MX8MM_IOMUXC_GPIO1_IO10_GPIO1_IO10 0x19 /* GPIO_A_7 */
+ MX8MM_IOMUXC_GPIO1_IO11_GPIO1_IO11 0x19 /* GPIO_B_0 */
+ MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x19 /* GPIO_B_1 */
+ >;
+ };
+
+ pinctrl_gpio3: gpio3grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_NAND_ALE_GPIO3_IO0 0x19 /* GPIO_C_5 */
+ MX8MM_IOMUXC_NAND_CE0_B_GPIO3_IO1 0x19 /* GPIO_C_4 */
+ MX8MM_IOMUXC_NAND_DATA00_GPIO3_IO6 0x19 /* GPIO_C_0 */
+ MX8MM_IOMUXC_NAND_DATA01_GPIO3_IO7 0x19 /* GPIO_C_1 */
+ MX8MM_IOMUXC_NAND_DATA02_GPIO3_IO8 0x19 /* GPIO_C_2 */
+ MX8MM_IOMUXC_NAND_DATA03_GPIO3_IO9 0x19 /* GPIO_C_3 */
+ MX8MM_IOMUXC_SAI5_RXFS_GPIO3_IO19 0x19 /* GPIO_B_2 */
+ MX8MM_IOMUXC_SAI5_RXD0_GPIO3_IO21 0x19 /* GPIO_B_3 */
+ >;
+ };
+
+ pinctrl_gpio4: gpio4grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SAI1_RXFS_GPIO4_IO0 0x19 /* GPIO_C_7 */
+ MX8MM_IOMUXC_SAI1_RXD2_GPIO4_IO4 0x19 /* GPIO_B_4 */
+ MX8MM_IOMUXC_SAI1_RXD3_GPIO4_IO5 0x19 /* BOOT_SEL0# */
+ MX8MM_IOMUXC_SAI1_RXD4_GPIO4_IO6 0x19 /* BOOT_SEL1# */
+ MX8MM_IOMUXC_SAI1_TXD2_GPIO4_IO14 0x19 /* GPIO_B_5 */
+ MX8MM_IOMUXC_SAI1_TXD3_GPIO4_IO15 0x19 /* GPIO_B_6 */
+ MX8MM_IOMUXC_SAI1_TXD4_GPIO4_IO16 0x19 /* GPIO_B_7 */
+ MX8MM_IOMUXC_SAI1_TXD7_GPIO4_IO19 0x19 /* GPIO_C_6 */
+ >;
+ };
+
pinctrl_i2c1: i2c1grp {
fsl,pins = <
- MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x400001c3
- MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x400001c3
+ MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x40000083
+ MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x40000083
+ >;
+ };
+
+ pinctrl_i2c2: i2c2grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_I2C2_SCL_I2C2_SCL 0x40000083 /* I2C_A_SCL */
+ MX8MM_IOMUXC_I2C2_SDA_I2C2_SDA 0x40000083 /* I2C_A_SDA */
+ >;
+ };
+
+ pinctrl_i2c3: i2c3grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_I2C3_SCL_I2C3_SCL 0x40000083 /* I2C_B_SCL */
+ MX8MM_IOMUXC_I2C3_SDA_I2C3_SDA 0x40000083 /* I2C_B_SDA */
+ >;
+ };
+
+ pinctrl_i2c4: i2c4grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x40000083 /* PCIe_SMCLK and I2C_CAM_SCL/CSI_TX_P */
+ MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x40000083 /* PCIe_SMDAT and I2C_CAM_SDA/CSI_TX_N */
+ >;
+ };
+
+ pinctrl_pcie: pciegrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SAI5_RXD1_GPIO3_IO22 0x19 /* PCIe_CLKREQ# */
+ MX8MM_IOMUXC_SAI5_RXD2_GPIO3_IO23 0x19 /* PCIe_A_PERST# */
+ MX8MM_IOMUXC_SAI5_RXD3_GPIO3_IO24 0x19 /* PCIe_WAKE# */
+ MX8MM_IOMUXC_SAI3_RXFS_GPIO4_IO28 0x19 /* PCIe_SM_ALERT */
>;
};
@@ -263,16 +624,113 @@
>;
};
+ pinctrl_pwm1: pwm1grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SPDIF_EXT_CLK_PWM1_OUT 0x19 /* PWM_0 */
+ >;
+ };
+
+ pinctrl_pwm2: pwm2grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SPDIF_RX_PWM2_OUT 0x19 /* PWM_1 */
+ >;
+ };
+
+ pinctrl_pwm3: pwm3grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SPDIF_TX_PWM3_OUT 0x19 /* PWM_2 */
+ >;
+ };
+
+ pinctrl_reg_usb1_vbus: regusb1vbusgrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SAI5_MCLK_GPIO3_IO25 0x19 /* USB_A_EN */
+ >;
+ };
+
+ pinctrl_reg_usb2_vbus: regusb2vbusgrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SAI5_RXC_GPIO3_IO20 0x19 /* USB_B_EN */
+ >;
+ };
+
+ pinctrl_reg_usdhc2_vcc: regusdhc2vccgrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SD2_RESET_B_GPIO2_IO19 0x19 /* SDIO_A_PWR_EN */
+ >;
+ };
+
+ pinctrl_reg_usdhc3_vcc: regusdhc3vccgrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SAI3_TXD_GPIO5_IO1 0x19 /* SDIO_B_PWR_EN */
+ >;
+ };
+
+ pinctrl_reg_vdd_carrier: regvddcarriergrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_NAND_READY_B_GPIO3_IO16 0x19 /* CARRIER_PWR_EN */
+ >;
+ };
+
pinctrl_rtc: rtcgrp {
fsl,pins = <
MX8MM_IOMUXC_SAI1_RXC_GPIO4_IO1 0x19
>;
};
+ pinctrl_sai1: sai1grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SAI1_RXD0_SAI1_RX_DATA0 0xd6 /* I2S_A_DATA_IN */
+ MX8MM_IOMUXC_SAI1_TXD0_SAI1_TX_DATA0 0xd6 /* I2S_A_DATA_OUT */
+ MX8MM_IOMUXC_SAI1_RXD1_SAI1_RX_DATA1 0xd6 /* I2S_B_DATA_IN */
+ MX8MM_IOMUXC_SAI1_TXD1_SAI1_TX_DATA1 0xd6 /* I2S_B_DATA_OUT */
+ MX8MM_IOMUXC_SAI1_MCLK_SAI1_MCLK 0xd6 /* I2S_MCLK */
+ MX8MM_IOMUXC_SAI1_TXFS_SAI1_TX_SYNC 0xd6 /* I2S_LRCLK */
+ MX8MM_IOMUXC_SAI1_TXC_SAI1_TX_BCLK 0xd6 /* I2S_BITCLK */
+ >;
+ };
+
+ pinctrl_uart1: uart1grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x0 /* UART_A_RX */
+ MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x0 /* UART_A_TX */
+ MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x0 /* UART_A_CTS */
+ MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x0 /* UART_A_RTS */
+ >;
+ };
+
+ pinctrl_uart2: uart2grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x0 /* UART_B_RX */
+ MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x0 /* UART_B_TX */
+ MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x0 /* UART_B_CTS */
+ MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x0 /* UART_B_RTS */
+ >;
+ };
+
pinctrl_uart3: uart3grp {
fsl,pins = <
- MX8MM_IOMUXC_UART3_RXD_UART3_DCE_RX 0x140
- MX8MM_IOMUXC_UART3_TXD_UART3_DCE_TX 0x140
+ MX8MM_IOMUXC_UART3_RXD_UART3_DCE_RX 0x140 /* UART_CON_RX */
+ MX8MM_IOMUXC_UART3_TXD_UART3_DCE_TX 0x140 /* UART_CON_TX */
+ >;
+ };
+
+ pinctrl_uart4: uart4grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_UART4_RXD_UART4_DCE_RX 0x0 /* UART_C_RX */
+ MX8MM_IOMUXC_UART4_TXD_UART4_DCE_TX 0x0 /* UART_C_TX */
+ >;
+ };
+
+ pinctrl_usb1: usb1grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x19 /* USB_A_OC# */
+ >;
+ };
+
+ pinctrl_usb2: usb2grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_GPIO1_IO15_USB2_OTG_OC 0x19 /* USB_B_OC# */
>;
};
@@ -327,6 +785,103 @@
>;
};
+ pinctrl_usdhc2: usdhc2grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x90 /* SDIO_A_CLK */
+ MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d0 /* SDIO_A_CMD */
+ MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d0 /* SDIO_A_D0 */
+ MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d0 /* SDIO_A_D1 */
+ MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d0 /* SDIO_A_D2 */
+ MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d0 /* SDIO_A_D3 */
+ MX8MM_IOMUXC_SD2_WP_USDHC2_WP 0x400000d6 /* SDIO_A_WP */
+ MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x90
+ >;
+ };
+
+ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x94 /* SDIO_A_CLK */
+ MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d4 /* SDIO_A_CMD */
+ MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d4 /* SDIO_A_D0 */
+ MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d4 /* SDIO_A_D1 */
+ MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d4 /* SDIO_A_D2 */
+ MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d4 /* SDIO_A_D3 */
+ MX8MM_IOMUXC_SD2_WP_USDHC2_WP 0x400000d6 /* SDIO_A_WP */
+ MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x90
+ >;
+ };
+
+ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x96 /* SDIO_A_CLK */
+ MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d6 /* SDIO_A_CMD */
+ MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d6 /* SDIO_A_D0 */
+ MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d6 /* SDIO_A_D1 */
+ MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d6 /* SDIO_A_D2 */
+ MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d6 /* SDIO_A_D3 */
+ MX8MM_IOMUXC_SD2_WP_USDHC2_WP 0x400000d6 /* SDIO_A_WP */
+ MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x90
+ >;
+ };
+
+ pinctrl_usdhc2_gpio: usdhc2gpiogrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 /* SDIO_A_CD# */
+ >;
+ };
+
+ pinctrl_usdhc3: usdhc3grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_NAND_WE_B_USDHC3_CLK 0x90 /* SDIO_B_CLK */
+ MX8MM_IOMUXC_NAND_WP_B_USDHC3_CMD 0x90 /* SDIO_B_CMD */
+ MX8MM_IOMUXC_NAND_DATA04_USDHC3_DATA0 0x90 /* SDIO_B_D0 */
+ MX8MM_IOMUXC_NAND_DATA05_USDHC3_DATA1 0x90 /* SDIO_B_D1 */
+ MX8MM_IOMUXC_NAND_DATA06_USDHC3_DATA2 0x90 /* SDIO_B_D2 */
+ MX8MM_IOMUXC_NAND_DATA07_USDHC3_DATA3 0x90 /* SDIO_B_D3 */
+ MX8MM_IOMUXC_NAND_RE_B_USDHC3_DATA4 0x90 /* SDIO_B_D4 */
+ MX8MM_IOMUXC_NAND_CE2_B_USDHC3_DATA5 0x90 /* SDIO_B_D5 */
+ MX8MM_IOMUXC_NAND_CE3_B_USDHC3_DATA6 0x90 /* SDIO_B_D6 */
+ MX8MM_IOMUXC_NAND_CLE_USDHC3_DATA7 0x90 /* SDIO_B_D7 */
+ >;
+ };
+
+ pinctrl_usdhc3_100mhz: usdhc3-100mhzgrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_NAND_WE_B_USDHC3_CLK 0x94 /* SDIO_B_CLK */
+ MX8MM_IOMUXC_NAND_WP_B_USDHC3_CMD 0x94 /* SDIO_B_CMD */
+ MX8MM_IOMUXC_NAND_DATA04_USDHC3_DATA0 0x94 /* SDIO_B_D0 */
+ MX8MM_IOMUXC_NAND_DATA05_USDHC3_DATA1 0x94 /* SDIO_B_D1 */
+ MX8MM_IOMUXC_NAND_DATA06_USDHC3_DATA2 0x94 /* SDIO_B_D2 */
+ MX8MM_IOMUXC_NAND_DATA07_USDHC3_DATA3 0x94 /* SDIO_B_D3 */
+ MX8MM_IOMUXC_NAND_RE_B_USDHC3_DATA4 0x94 /* SDIO_B_D4 */
+ MX8MM_IOMUXC_NAND_CE2_B_USDHC3_DATA5 0x94 /* SDIO_B_D5 */
+ MX8MM_IOMUXC_NAND_CE3_B_USDHC3_DATA6 0x94 /* SDIO_B_D6 */
+ MX8MM_IOMUXC_NAND_CLE_USDHC3_DATA7 0x94 /* SDIO_B_D7 */
+ >;
+ };
+
+ pinctrl_usdhc3_200mhz: usdhc3-200mhzgrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_NAND_WE_B_USDHC3_CLK 0x96 /* SDIO_B_CLK */
+ MX8MM_IOMUXC_NAND_WP_B_USDHC3_CMD 0x96 /* SDIO_B_CMD */
+ MX8MM_IOMUXC_NAND_DATA04_USDHC3_DATA0 0x96 /* SDIO_B_D0 */
+ MX8MM_IOMUXC_NAND_DATA05_USDHC3_DATA1 0x96 /* SDIO_B_D1 */
+ MX8MM_IOMUXC_NAND_DATA06_USDHC3_DATA2 0x96 /* SDIO_B_D2 */
+ MX8MM_IOMUXC_NAND_DATA07_USDHC3_DATA3 0x96 /* SDIO_B_D3 */
+ MX8MM_IOMUXC_NAND_RE_B_USDHC3_DATA4 0x96 /* SDIO_B_D4 */
+ MX8MM_IOMUXC_NAND_CE2_B_USDHC3_DATA5 0x96 /* SDIO_B_D5 */
+ MX8MM_IOMUXC_NAND_CE3_B_USDHC3_DATA6 0x96 /* SDIO_B_D6 */
+ MX8MM_IOMUXC_NAND_CLE_USDHC3_DATA7 0x96 /* SDIO_B_D7 */
+ >;
+ };
+
+ pinctrl_usdhc3_gpio: usdhc3gpiogrp {
+ fsl,pins = <
+ MX8MM_IOMUXC_NAND_CE1_B_GPIO3_IO2 0x19 /* SDIO_B_CD# */
+ MX8MM_IOMUXC_SAI3_MCLK_GPIO5_IO2 0x19 /* SDIO_B_WP */
+ >;
+ };
+
pinctrl_wdog: wdoggrp {
fsl,pins = <
MX8MM_IOMUXC_GPIO1_IO02_WDOG1_WDOG_B 0xc6
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi
index 1f8326613ee9..2076148e0862 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi
@@ -237,8 +237,8 @@
pinctrl_i2c1: i2c1grp {
fsl,pins = <
- MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x400001c3
- MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x400001c3
+ MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x40000083
+ MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x40000083
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts
index 968f475b9a96..27a902569e2a 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts
@@ -120,7 +120,7 @@
};
tpm: tpm@1 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
interrupt-parent = <&gpio2>;
pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts
index ea6e8b85169f..01b632b220dc 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts
@@ -5,6 +5,8 @@
/dts-v1/;
+#include <dt-bindings/phy/phy-imx8-pcie.h>
+
#include "imx8mm-tqma8mqml.dtsi"
#include "mba8mx.dtsi"
@@ -74,19 +76,23 @@
};
&pcie_phy {
- clocks = <&pcie0_refclk>;
+ fsl,refclk-pad-mode = <IMX8_PCIE_REFCLK_PAD_INPUT>;
+ fsl,clkreq-unsupported;
+ clocks = <&pcieclk 2>;
+ clock-names = "ref";
status = "okay";
};
+/* PCIe slot on X36 */
&pcie0 {
reset-gpio = <&expander0 14 GPIO_ACTIVE_LOW>;
- clocks = <&clk IMX8MM_CLK_PCIE1_ROOT>, <&pcie0_refclk>,
+ clocks = <&clk IMX8MM_CLK_PCIE1_ROOT>, <&pcieclk 3>,
<&clk IMX8MM_CLK_PCIE1_AUX>;
assigned-clocks = <&clk IMX8MM_CLK_PCIE1_AUX>,
- <&clk IMX8MM_CLK_PCIE1_CTRL>;
+ <&clk IMX8MM_CLK_PCIE1_CTRL>;
assigned-clock-rates = <10000000>, <250000000>;
assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_50M>,
- <&clk IMX8MM_SYS_PLL2_250M>;
+ <&clk IMX8MM_SYS_PLL2_250M>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi
index 6425773f68e0..41c966147b94 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi
@@ -47,25 +47,20 @@
gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>;
status = "okay";
};
-
- reg_usb_otg1_vbus: regulator-usb-otg1 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_reg_usb1_en>;
- compatible = "regulator-fixed";
- regulator-name = "usb_otg1_vbus";
- gpio = <&gpio1 10 GPIO_ACTIVE_HIGH>;
- enable-active-high;
- regulator-min-microvolt = <5000000>;
- regulator-max-microvolt = <5000000>;
- };
};
-/* off-board header */
&ecspi2 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_spi2>;
- cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>;
+ cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>,
+ <&gpio1 10 GPIO_ACTIVE_LOW>;
status = "okay";
+
+ tpm@1 {
+ compatible = "tcg,tpm_tis-spi";
+ reg = <0x1>;
+ spi-max-frequency = <36000000>;
+ };
};
&gpio1 {
@@ -144,9 +139,10 @@
};
&usbotg1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usbotg1>;
dr_mode = "otg";
over-current-active-low;
- vbus-supply = <&reg_usb_otg1_vbus>;
status = "okay";
};
@@ -204,20 +200,13 @@
>;
};
- pinctrl_reg_usb1_en: regusb1grp {
- fsl,pins = <
- MX8MM_IOMUXC_GPIO1_IO10_GPIO1_IO10 0x41
- MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x141
- MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x41
- >;
- };
-
pinctrl_spi2: spi2grp {
fsl,pins = <
MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6
MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6
MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6
+ MX8MM_IOMUXC_GPIO1_IO10_GPIO1_IO10 0xd6
>;
};
@@ -234,4 +223,11 @@
MX8MM_IOMUXC_UART3_TXD_UART3_DCE_TX 0x140
>;
};
+
+ pinctrl_usbotg1: usbotg1grp {
+ fsl,pins = <
+ MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x141
+ MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x41
+ >;
+ };
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
index 3f3f2a2c89cd..752caa38eb03 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
@@ -89,7 +89,7 @@
status = "okay";
tpm@1 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
spi-max-frequency = <36000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
index 06fed9376996..2aa6c1090fc7 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
@@ -109,7 +109,7 @@
status = "okay";
tpm@1 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
spi-max-frequency = <36000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
index 87b80e2412cb..5e2cbaf27e0f 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
@@ -285,7 +285,8 @@
&ecspi1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_spi1>;
- cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>;
+ cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>,
+ <&gpio4 24 GPIO_ACTIVE_LOW>;
status = "okay";
flash@0 {
@@ -294,6 +295,12 @@
spi-max-frequency = <40000000>;
status = "okay";
};
+
+ tpm@1 {
+ compatible = "tcg,tpm_tis-spi";
+ reg = <0x1>;
+ spi-max-frequency = <36000000>;
+ };
};
&fec1 {
@@ -319,7 +326,7 @@
&gpio4 {
gpio-line-names = "", "", "", "",
- "", "", "uart3_rs232#", "uart3_rs422#",
+ "dig1_ctl", "dig2_ctl", "uart3_rs232#", "uart3_rs422#",
"uart3_rs485#", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "",
"", "", "", "uart4_rs485#", "", "sim1det#", "sim2det#", "";
@@ -842,6 +849,8 @@
pinctrl_hog: hoggrp {
fsl,pins = <
+ MX8MM_IOMUXC_SAI1_RXD2_GPIO4_IO4 0x40000041 /* DIG1_CTL */
+ MX8MM_IOMUXC_SAI1_RXD3_GPIO4_IO5 0x40000041 /* DIG2_CTL */
MX8MM_IOMUXC_SPDIF_TX_GPIO5_IO3 0x40000041 /* DIG2_OUT */
MX8MM_IOMUXC_SPDIF_RX_GPIO5_IO4 0x40000041 /* DIG2_IN */
MX8MM_IOMUXC_GPIO1_IO06_GPIO1_IO6 0x40000041 /* DIG1_IN */
@@ -987,6 +996,7 @@
MX8MM_IOMUXC_ECSPI1_MOSI_ECSPI1_MOSI 0x82
MX8MM_IOMUXC_ECSPI1_MISO_ECSPI1_MISO 0x82
MX8MM_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x140
+ MX8MM_IOMUXC_SAI2_TXFS_GPIO4_IO24 0x140
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts
index 35b8d2060cd9..bbd80896db96 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts
@@ -99,8 +99,6 @@
};
&lcdif {
- assigned-clocks = <&clk IMX8MN_VIDEO_PLL1>;
- assigned-clock-rates = <594000000>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-evk.dtsi
index a0e13d3324ed..269e70f66a13 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-evk.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn-evk.dtsi
@@ -110,6 +110,20 @@
spdif-out;
spdif-in;
};
+
+ sound-micfil {
+ compatible = "fsl,imx-audio-card";
+ model = "micfil-audio";
+
+ pri-dai-link {
+ link-name = "micfil hifi";
+ format = "i2s";
+
+ cpu {
+ sound-dai = <&micfil>;
+ };
+ };
+ };
};
&easrc {
@@ -285,6 +299,16 @@
status = "okay";
};
+&micfil {
+ #sound-dai-cells = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pdm>;
+ assigned-clocks = <&clk IMX8MN_CLK_PDM>;
+ assigned-clock-parents = <&clk IMX8MN_AUDIO_PLL1_OUT>;
+ assigned-clock-rates = <196608000>;
+ status = "okay";
+};
+
&mipi_csi {
status = "okay";
@@ -522,6 +546,18 @@
>;
};
+ pinctrl_pdm: pdmgrp {
+ fsl,pins = <
+ MX8MN_IOMUXC_SAI5_MCLK_SAI5_MCLK 0xd6
+ MX8MN_IOMUXC_SAI5_RXC_PDM_CLK 0xd6
+ MX8MN_IOMUXC_SAI5_RXFS_SAI5_RX_SYNC 0xd6
+ MX8MN_IOMUXC_SAI5_RXD0_PDM_BIT_STREAM0 0xd6
+ MX8MN_IOMUXC_SAI5_RXD1_PDM_BIT_STREAM1 0xd6
+ MX8MN_IOMUXC_SAI5_RXD2_PDM_BIT_STREAM2 0xd6
+ MX8MN_IOMUXC_SAI5_RXD3_PDM_BIT_STREAM3 0xd6
+ >;
+ };
+
pinctrl_pmic: pmicirqgrp {
fsl,pins = <
MX8MN_IOMUXC_GPIO1_IO03_GPIO1_IO3 0x141
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-rve-gateway.dts b/arch/arm64/boot/dts/freescale/imx8mn-rve-gateway.dts
index 1b633bd1ebb6..ea1855171fb0 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-rve-gateway.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-rve-gateway.dts
@@ -10,7 +10,7 @@
/ {
model = "RVE gateway";
- compatible = "rve,rve-gateway", "variscite,var-som-mx8mn", "fsl,imx8mn";
+ compatible = "rve,gateway", "variscite,var-som-mx8mn", "fsl,imx8mn";
crystal_duart_24m: crystal-duart-24m {
compatible = "fixed-clock";
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-tqma8mqnl-mba8mx-usbotg.dtso b/arch/arm64/boot/dts/freescale/imx8mn-tqma8mqnl-mba8mx-usbotg.dtso
new file mode 100644
index 000000000000..96db07fc9bec
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8mn-tqma8mqnl-mba8mx-usbotg.dtso
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR MIT)
+/*
+ * Copyright (c) 2022-2024 TQ-Systems GmbH <linux@ew.tq-group.com>,
+ * D-82229 Seefeld, Germany.
+ * Author: Alexander Stein
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+
+#include "imx8mn-pinfunc.h"
+
+&{/} {
+ connector {
+ compatible = "gpio-usb-b-connector", "usb-b-connector";
+ type = "micro";
+ label = "X19";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb1_connector>;
+ id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>;
+
+ port {
+ usb_dr_connector: endpoint {
+ remote-endpoint = <&usb1_drd_sw>;
+ };
+ };
+ };
+};
+
+&rst_usb_hub_hog {
+ output-low;
+};
+
+&sel_usb_hub_hog {
+ output-low;
+};
+
+&usbotg1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usbotg>;
+ dr_mode = "otg";
+ srp-disable;
+ hnp-disable;
+ adp-disable;
+ power-active-high;
+ /delete-property/ disable-over-current;
+ over-current-active-low;
+ usb-role-switch;
+ status = "okay";
+
+ port {
+ usb1_drd_sw: endpoint {
+ remote-endpoint = <&usb_dr_connector>;
+ };
+ };
+};
+
+&iomuxc {
+ pinctrl_usb1_connector: usb1-connectorgrp {
+ fsl,pins = <MX8MN_IOMUXC_GPIO1_IO10_GPIO1_IO10 0x1c0>;
+ };
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-tqma8mqnl-mba8mx.dts b/arch/arm64/boot/dts/freescale/imx8mn-tqma8mqnl-mba8mx.dts
index c07d59147ab5..433d8bba4425 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-tqma8mqnl-mba8mx.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-tqma8mqnl-mba8mx.dts
@@ -41,7 +41,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usb0hub_sel>;
- sel-usb-hub-hog {
+ sel_usb_hub_hog: sel-usb-hub-hog {
gpio-hog;
gpios = <1 GPIO_ACTIVE_HIGH>;
output-high;
@@ -198,8 +198,7 @@
pinctrl_usbotg: usbotggrp {
fsl,pins = <MX8MN_IOMUXC_GPIO1_IO12_USB1_OTG_PWR 0x84>,
- <MX8MN_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x84>,
- <MX8MN_IOMUXC_GPIO1_IO10_USB1_OTG_ID 0x1C4>;
+ <MX8MN_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x84>;
};
pinctrl_usdhc2: usdhc2grp {
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts b/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts
index f38ee2266b25..a6b94d1957c9 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts
@@ -128,14 +128,9 @@
pinctrl-0 = <&pinctrl_ptn5150>;
status = "okay";
- connector {
- compatible = "usb-c-connector";
- label = "USB-C";
-
- port {
- typec1_dr_sw: endpoint {
- remote-endpoint = <&usb1_drd_sw>;
- };
+ port {
+ typec1_dr_sw: endpoint {
+ remote-endpoint = <&usb1_drd_sw>;
};
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
index 136e75c51251..932c8b05c75f 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
@@ -1168,7 +1168,7 @@
<&clk IMX8MN_SYS_PLL1_800M>;
assigned-clock-rates = <266000000>,
<24000000>,
- <594000000>,
+ <24000000>,
<500000000>,
<200000000>;
#power-domain-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts
index feae77e03835..a08057410bde 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts
@@ -234,7 +234,7 @@
status = "okay";
tpm: tpm@0 {
- compatible = "infineon,slb9670";
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
reg = <0>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_tpm>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi
index e5da90804780..8be251b69378 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi
@@ -50,6 +50,8 @@
phy-mode = "rgmii-id";
phy-handle = <&ethphy0>;
snps,force_thresh_dma_mode;
+ snps,mtl-rx-config = <&mtl_rx_setup>;
+ snps,mtl-tx-config = <&mtl_tx_setup>;
status = "okay";
mdio {
@@ -66,6 +68,71 @@
interrupts = <10 IRQ_TYPE_LEVEL_LOW>;
};
};
+
+ mtl_rx_setup: rx-queues-config {
+ snps,rx-queues-to-use = <5>;
+ snps,rx-sched-sp;
+
+ queue0 {
+ snps,dcb-algorithm;
+ snps,priority = <0x1>;
+ snps,map-to-dma-channel = <0>;
+ };
+
+ queue1 {
+ snps,dcb-algorithm;
+ snps,priority = <0x2>;
+ snps,map-to-dma-channel = <1>;
+ };
+
+ queue2 {
+ snps,dcb-algorithm;
+ snps,priority = <0x4>;
+ snps,map-to-dma-channel = <2>;
+ };
+
+ queue3 {
+ snps,dcb-algorithm;
+ snps,priority = <0x8>;
+ snps,map-to-dma-channel = <3>;
+ };
+
+ queue4 {
+ snps,dcb-algorithm;
+ snps,priority = <0xf0>;
+ snps,map-to-dma-channel = <4>;
+ };
+ };
+
+ mtl_tx_setup: tx-queues-config {
+ snps,tx-queues-to-use = <5>;
+ snps,tx-sched-sp;
+
+ queue0 {
+ snps,dcb-algorithm;
+ snps,priority = <0x1>;
+ };
+
+ queue1 {
+ snps,dcb-algorithm;
+ snps,priority = <0x2>;
+ };
+
+ queue2 {
+ snps,dcb-algorithm;
+ snps,priority = <0x4>;
+ };
+
+ queue3 {
+ snps,dcb-algorithm;
+ snps,priority = <0x8>;
+ };
+
+ queue4 {
+ snps,dcb-algorithm;
+ snps,priority = <0xf0>;
+ };
+ };
};
&flexspi {
@@ -206,6 +273,10 @@
assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_80M>;
uart-has-rtscts;
status = "okay";
+
+ bluetooth {
+ compatible = "nxp,88w8997-bt";
+ };
};
&usdhc1 {
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts
index d98a040860a4..7e1b58dbe23a 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts
@@ -6,6 +6,7 @@
/dts-v1/;
#include <dt-bindings/net/qca-ar803x.h>
+#include <dt-bindings/phy/phy-imx8-pcie.h>
#include "imx8mp.dtsi"
/ {
@@ -45,6 +46,19 @@
clock-frequency = <25000000>;
};
+ clk_pwm4: clock-pwm4 {
+ compatible = "pwm-clock";
+ #clock-cells = <0>;
+ clock-frequency = <12000000>;
+ clock-output-names = "codec-pwm4";
+ /*
+ * 1 / 83 ns ~= 12 MHz , but since the PWM input clock is 24 MHz
+ * and the calculated PWM period is 1 and duty cycle is 50%, the
+ * result is exactly 12 MHz, which is fine for SGTL5000 MCLK.
+ */
+ pwms = <&pwm4 0 83 0>;
+ };
+
panel: panel {
/* Compatible string is filled in by panel board DT Overlay. */
backlight = <&backlight>;
@@ -82,6 +96,24 @@
vin-supply = <&buck4>;
};
+ sound {
+ compatible = "simple-audio-card";
+ simple-audio-card,name = "SGTL5000-Card";
+ simple-audio-card,format = "i2s";
+ simple-audio-card,bitclock-master = <&codec_dai>;
+ simple-audio-card,frame-master = <&codec_dai>;
+ simple-audio-card,widgets = "Headphone", "Headphone Jack";
+ simple-audio-card,routing = "Headphone Jack", "HP_OUT";
+
+ cpu_dai: simple-audio-card,cpu {
+ sound-dai = <&sai3>;
+ };
+
+ codec_dai: simple-audio-card,codec {
+ sound-dai = <&sgtl5000>;
+ };
+ };
+
watchdog { /* TPS3813 */
compatible = "linux,wdt-gpio";
pinctrl-names = "default";
@@ -121,7 +153,7 @@
flash@0 { /* W25Q128JVEI */
compatible = "jedec,spi-nor";
reg = <0>;
- spi-max-frequency = <100000000>; /* Up to 133 MHz */
+ spi-max-frequency = <40000000>;
spi-tx-bus-width = <1>;
spi-rx-bus-width = <1>;
};
@@ -288,6 +320,15 @@
sda-gpios = <&gpio5 15 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
status = "okay";
+ sgtl5000: audio-codec@a {
+ compatible = "fsl,sgtl5000";
+ reg = <0x0a>;
+ #sound-dai-cells = <0>;
+ clocks = <&clk_pwm4>;
+ VDDA-supply = <&buck4>;
+ VDDIO-supply = <&buck4>;
+ };
+
usb-hub@2c {
compatible = "microchip,usb2514bi";
reg = <0x2c>;
@@ -429,6 +470,21 @@
status = "okay";
};
+&pcie_phy {
+ clocks = <&pcieclk 0>;
+ clock-names = "ref";
+ fsl,refclk-pad-mode = <IMX8_PCIE_REFCLK_PAD_INPUT>;
+ status = "okay";
+};
+
+&pcie {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pcie0>;
+ fsl,max-link-speed = <3>;
+ reset-gpio = <&gpio1 5 GPIO_ACTIVE_LOW>;
+ status = "okay";
+};
+
&pwm1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_panel_pwm>;
@@ -436,6 +492,23 @@
status = "disabled";
};
+&pwm4 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm4>;
+ status = "okay";
+};
+
+&sai3 {
+ #clock-cells = <0>;
+ #sound-dai-cells = <0>;
+ assigned-clocks = <&clk IMX8MP_CLK_SAI3>;
+ assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL1_OUT>;
+ assigned-clock-rates = <12288000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sai3>;
+ status = "okay";
+};
+
/* SD slot */
&usdhc2 {
pinctrl-names = "default", "state_100mhz", "state_200mhz";
@@ -486,7 +559,7 @@
&uart4 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_uart4>;
- status = "okay";
+ status = "disabled";
};
&usb3_phy0 {
@@ -785,6 +858,12 @@
>;
};
+ pinctrl_pwm4: pwm4-grp {
+ fsl,pins = <
+ MX8MP_IOMUXC_SAI3_MCLK__PWM4_OUT 0xd6
+ >;
+ };
+
pinctrl_rtc: rtc-grp {
fsl,pins = <
/* RTC_IRQ# */
@@ -816,7 +895,6 @@
MX8MP_IOMUXC_SAI3_TXFS__AUDIOMIX_SAI3_TX_SYNC 0xd6
MX8MP_IOMUXC_SAI3_TXD__AUDIOMIX_SAI3_TX_DATA00 0xd6
MX8MP_IOMUXC_SAI3_TXC__AUDIOMIX_SAI3_TX_BCLK 0xd6
- MX8MP_IOMUXC_SAI3_MCLK__AUDIOMIX_SAI3_MCLK 0xd6
MX8MP_IOMUXC_SAI3_RXD__AUDIOMIX_SAI3_RX_DATA00 0xd6
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts
index fea67a9282f0..b749e28e5ede 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts
@@ -175,14 +175,10 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_ptn5150>;
- connector {
- compatible = "usb-c-connector";
- label = "USB-C";
-
- port {
- ptn5150_out_ep: endpoint {
- remote-endpoint = <&dwc3_0_ep>;
- };
+ port {
+
+ ptn5150_out_ep: endpoint {
+ remote-endpoint = <&dwc3_0_ep>;
};
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi
index 4ae4fdab461e..43f1d45ccc96 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi
@@ -255,7 +255,7 @@
<&clk IMX8MP_AUDIO_PLL2_OUT>;
assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL2_OUT>;
assigned-clock-rates = <13000000>, <13000000>, <156000000>;
- reset-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>;
status = "disabled";
ports {
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
index f87fa5a948cc..9beba8d6a0df 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
@@ -23,7 +23,7 @@
port {
hdmi_connector_in: endpoint {
- remote-endpoint = <&adv7533_out>;
+ remote-endpoint = <&adv7535_out>;
};
};
};
@@ -107,6 +107,13 @@
enable-active-high;
};
+ reg_vext_3v3: regulator-vext-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "VEXT_3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
sound {
compatible = "simple-audio-card";
simple-audio-card,name = "wm8960-audio";
@@ -364,7 +371,7 @@
regulator-always-on;
};
- BUCK5 {
+ reg_buck5: BUCK5 {
regulator-name = "BUCK5";
regulator-min-microvolt = <1650000>;
regulator-max-microvolt = <1950000>;
@@ -415,14 +422,16 @@
hdmi@3d {
compatible = "adi,adv7535";
- reg = <0x3d>, <0x3c>, <0x3e>, <0x3f>;
- reg-names = "main", "cec", "edid", "packet";
+ reg = <0x3d>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <9 IRQ_TYPE_EDGE_FALLING>;
adi,dsi-lanes = <4>;
- adi,input-depth = <8>;
- adi,input-colorspace = "rgb";
- adi,input-clock = "1x";
- adi,input-style = <1>;
- adi,input-justification = "evenly";
+ avdd-supply = <&reg_buck5>;
+ dvdd-supply = <&reg_buck5>;
+ pvdd-supply = <&reg_buck5>;
+ a2vdd-supply = <&reg_buck5>;
+ v3p3-supply = <&reg_vext_3v3>;
+ v1p2-supply = <&reg_buck5>;
ports {
#address-cells = <1>;
@@ -431,7 +440,7 @@
port@0 {
reg = <0>;
- adv7533_in: endpoint {
+ adv7535_in: endpoint {
remote-endpoint = <&dsi_out>;
};
};
@@ -439,7 +448,7 @@
port@1 {
reg = <1>;
- adv7533_out: endpoint {
+ adv7535_out: endpoint {
remote-endpoint = <&hdmi_connector_in>;
};
};
@@ -524,7 +533,7 @@
reg = <1>;
dsi_out: endpoint {
- remote-endpoint = <&adv7533_in>;
+ remote-endpoint = <&adv7535_in>;
data-lanes = <1 2 3 4>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts
index c8640cac3edc..00a240484c25 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts
@@ -19,6 +19,30 @@
stdout-path = &uart1;
};
+ backlight_lvds: backlight {
+ compatible = "pwm-backlight";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_lvds1>;
+ brightness-levels = <0 4 8 16 32 64 128 255>;
+ default-brightness-level = <11>;
+ enable-gpios = <&gpio2 20 GPIO_ACTIVE_LOW>;
+ num-interpolated-steps = <2>;
+ power-supply = <&reg_lvds1_reg_en>;
+ pwms = <&pwm3 0 50000 0>;
+ };
+
+ panel1_lvds: panel-lvds {
+ compatible = "edt,etml1010g3dra";
+ backlight = <&backlight_lvds>;
+ power-supply = <&reg_vcc_3v3_sw>;
+
+ port {
+ panel1_in: endpoint {
+ remote-endpoint = <&ldb_lvds_ch1>;
+ };
+ };
+ };
+
reg_can1_stby: regulator-can1-stby {
compatible = "regulator-fixed";
pinctrl-names = "default";
@@ -39,6 +63,15 @@
regulator-name = "can2-stby";
};
+ reg_lvds1_reg_en: regulator-lvds1 {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio1 9 GPIO_ACTIVE_HIGH>;
+ regulator-max-microvolt = <1200000>;
+ regulator-min-microvolt = <1200000>;
+ regulator-name = "lvds1_reg_en";
+ };
+
reg_usb1_vbus: regulator-usb1-vbus {
compatible = "regulator-fixed";
pinctrl-names = "default";
@@ -61,6 +94,13 @@
startup-delay-us = <100>;
off-on-delay-us = <12000>;
};
+
+ reg_vcc_3v3_sw: regulator-vcc-3v3-sw {
+ compatible = "regulator-fixed";
+ regulator-name = "VCC_3V3_SW";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
};
&eqos {
@@ -135,10 +175,41 @@
};
};
+&lcdif2 {
+ status = "okay";
+};
+
+&lvds_bridge {
+ status = "okay";
+
+ ports {
+ port@2 {
+ ldb_lvds_ch1: endpoint {
+ remote-endpoint = <&panel1_in>;
+ };
+ };
+ };
+};
+
&snvs_pwrkey {
status = "okay";
};
+&pwm3 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm3>;
+};
+
+&rv3028 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rtc>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <19 IRQ_TYPE_LEVEL_LOW>;
+ wakeup-source;
+ trickle-resistor-ohms = <3000>;
+};
+
/* debug console */
&uart1 {
pinctrl-names = "default";
@@ -239,12 +310,12 @@
MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90
MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90
MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90
- MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16
- MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16
- MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16
- MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16
- MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16
- MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16
+ MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x12
+ MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x12
+ MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x12
+ MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x12
+ MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x12
+ MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x12
MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x10
>;
};
@@ -289,16 +360,34 @@
>;
};
+ pinctrl_lvds1: lvds1grp {
+ fsl,pins = <
+ MX8MP_IOMUXC_SD2_WP__GPIO2_IO20 0x12
+ >;
+ };
+
+ pinctrl_pwm3: pwm3grp {
+ fsl,pins = <
+ MX8MP_IOMUXC_SPDIF_TX__PWM3_OUT 0x12
+ >;
+ };
+
pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp {
fsl,pins = <
MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40
>;
};
+ pinctrl_rtc: rtcgrp {
+ fsl,pins = <
+ MX8MP_IOMUXC_SAI1_TXD7__GPIO4_IO19 0x1C0
+ >;
+ };
+
pinctrl_uart1: uart1grp {
fsl,pins = <
- MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x40
- MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x40
+ MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x140
+ MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x140
>;
};
@@ -319,7 +408,7 @@
pinctrl_usdhc2_pins: usdhc2-gpiogrp {
fsl,pins = <
- MX8MP_IOMUXC_SD2_CD_B__GPIO2_IO12 0x1c4
+ MX8MP_IOMUXC_SD2_CD_B__GPIO2_IO12 0x40
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi
index c976c3b6cbc6..e6ffa6a6b68b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi
@@ -175,7 +175,6 @@
rv3028: rtc@52 {
compatible = "microcrystal,rv3028";
reg = <0x52>;
- trickle-resistor-ohms = <3000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts
index a2d5d19b2de0..86d3da36e4f3 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts
@@ -184,6 +184,13 @@
enable-active-high;
};
+ reg_vcc_1v8: regulator-1v8 {
+ compatible = "regulator-fixed";
+ regulator-name = "VCC_1V8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
reg_vcc_3v3: regulator-3v3 {
compatible = "regulator-fixed";
regulator-name = "VCC_3V3";
@@ -480,7 +487,7 @@
clock-names = "mclk";
clocks = <&audio_blk_ctrl IMX8MP_CLK_AUDIOMIX_SAI3_MCLK1>;
reset-gpios = <&gpio4 29 GPIO_ACTIVE_LOW>;
- iov-supply = <&reg_vcc_3v3>;
+ iov-supply = <&reg_vcc_1v8>;
ldoin-supply = <&reg_vcc_3v3>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
index 0e8d0f3c7ea8..e7bf032265e0 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
@@ -63,8 +63,15 @@
&ecspi2 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_spi2>;
- cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>;
+ cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>,
+ <&gpio1 10 GPIO_ACTIVE_LOW>;
status = "okay";
+
+ tpm@1 {
+ compatible = "tcg,tpm_tis-spi";
+ reg = <0x1>;
+ spi-max-frequency = <36000000>;
+ };
};
&gpio4 {
@@ -228,6 +235,7 @@
MX8MP_IOMUXC_ECSPI2_MOSI__ECSPI2_MOSI 0x140
MX8MP_IOMUXC_ECSPI2_MISO__ECSPI2_MISO 0x140
MX8MP_IOMUXC_ECSPI2_SS0__GPIO5_IO13 0x140
+ MX8MP_IOMUXC_GPIO1_IO10__GPIO1_IO10 0x140
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
index c24587c895e1..41c79d2ebdd6 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
@@ -103,7 +103,7 @@
status = "okay";
tpm@1 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
spi-max-frequency = <36000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index 628ffba69862..d5c400b355af 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -115,7 +115,7 @@
status = "okay";
tpm@1 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
spi-max-frequency = <36000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
index 9caf7ca25444..cae586cd45bd 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
@@ -196,7 +196,7 @@
status = "okay";
tpm@0 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x0>;
spi-max-frequency = <36000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi
index c3305f0d4001..faa17cbbe2fd 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi
@@ -552,7 +552,7 @@
regulator-name = "On-module +V3.3_ADC (LDO4)";
};
- LDO5 {
+ reg_vdd_sdio: LDO5 {
regulator-max-microvolt = <3300000>;
regulator-min-microvolt = <1800000>;
regulator-name = "On-module +V3.3_1.8_SD (LDO5)";
@@ -885,6 +885,7 @@
pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_cd>;
pinctrl-3 = <&pinctrl_usdhc2_sleep>, <&pinctrl_usdhc2_cd_sleep>;
vmmc-supply = <&reg_usdhc2_vmmc>;
+ vqmmc-supply = <&reg_vdd_sdio>;
};
/* On-module eMMC */
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index 76c73daf546b..bfc5c81a5bd4 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -1636,8 +1636,10 @@
<&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF_ROOT>,
<&clk IMX8MP_CLK_MEDIA_AXI_ROOT>;
clock-names = "pclk", "wrap", "phy", "axi";
- assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM1_PIX>;
- assigned-clock-parents = <&clk IMX8MP_SYS_PLL2_1000M>;
+ assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM1_PIX>,
+ <&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF>;
+ assigned-clock-parents = <&clk IMX8MP_SYS_PLL2_1000M>,
+ <&clk IMX8MP_CLK_24M>;
assigned-clock-rates = <500000000>;
power-domains = <&media_blk_ctrl IMX8MP_MEDIABLK_PD_MIPI_CSI2_1>;
status = "disabled";
@@ -1670,8 +1672,10 @@
<&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF_ROOT>,
<&clk IMX8MP_CLK_MEDIA_AXI_ROOT>;
clock-names = "pclk", "wrap", "phy", "axi";
- assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM2_PIX>;
- assigned-clock-parents = <&clk IMX8MP_SYS_PLL2_1000M>;
+ assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM1_PIX>,
+ <&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF>;
+ assigned-clock-parents = <&clk IMX8MP_SYS_PLL2_1000M>,
+ <&clk IMX8MP_CLK_24M>;
assigned-clock-rates = <266000000>;
power-domains = <&media_blk_ctrl IMX8MP_MEDIABLK_PD_MIPI_CSI2_2>;
status = "disabled";
@@ -1820,7 +1824,7 @@
compatible = "fsl,imx8mp-ldb";
reg = <0x5c 0x4>, <0x128 0x4>;
reg-names = "ldb", "lvds";
- clocks = <&clk IMX8MP_CLK_MEDIA_LDB>;
+ clocks = <&clk IMX8MP_CLK_MEDIA_LDB_ROOT>;
clock-names = "ldb";
assigned-clocks = <&clk IMX8MP_CLK_MEDIA_LDB>;
assigned-clock-parents = <&clk IMX8MP_VIDEO_PLL1_OUT>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts
index 6376417e918c..d8cf1f27c3ec 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts
@@ -65,7 +65,7 @@
status = "okay";
tpm@0 {
- compatible = "infineon,slb9670";
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
reg = <0>;
spi-max-frequency = <43000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq-mba8mx.dts b/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq-mba8mx.dts
index b302daca4ce6..0165f3a25985 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq-mba8mx.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq-mba8mx.dts
@@ -28,18 +28,6 @@
id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>;
};
- pcie0_refclk: pcie0-refclk {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <100000000>;
- };
-
- pcie1_refclk: pcie1-refclk {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <100000000>;
- };
-
reg_otg_vbus: regulator-otg-vbus {
compatible = "regulator-fixed";
pinctrl-names = "default";
@@ -103,23 +91,24 @@
gpios = <&gpio3 16 GPIO_ACTIVE_HIGH>;
};
+/* PCIe slot on X36 */
&pcie0 {
reset-gpio = <&expander0 14 GPIO_ACTIVE_LOW>;
clocks = <&clk IMX8MQ_CLK_PCIE1_ROOT>,
- <&pcie0_refclk>,
- <&clk IMX8MQ_CLK_PCIE1_PHY>,
+ <&pcieclk 3>,
+ <&pcieclk 2>,
<&clk IMX8MQ_CLK_PCIE1_AUX>;
status = "okay";
};
/*
- * miniPCIe, also usable for cards with USB. Therefore configure the reset as
+ * miniPCIe on X28, also usable for cards with USB. Therefore configure the reset as
* static gpio hog.
*/
&pcie1 {
clocks = <&clk IMX8MQ_CLK_PCIE2_ROOT>,
- <&pcie1_refclk>,
- <&clk IMX8MQ_CLK_PCIE2_PHY>,
+ <&pcieclk 1>,
+ <&pcieclk 0>,
<&clk IMX8MQ_CLK_PCIE2_AUX>;
status = "okay";
};
@@ -171,6 +160,7 @@
};
&usb3_phy1 {
+ vbus-supply = <&reg_hub_vbus>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-apalis-eval-v1.2.dts b/arch/arm64/boot/dts/freescale/imx8qm-apalis-eval-v1.2.dts
new file mode 100644
index 000000000000..8466a8204ed0
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8qm-apalis-eval-v1.2.dts
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+/*
+ * Copyright 2024 Toradex
+ */
+
+/dts-v1/;
+
+#include "imx8qm-apalis.dtsi"
+#include "imx8-apalis-eval-v1.2.dtsi"
+
+/ {
+ model = "Toradex Apalis iMX8QM/QP on Apalis Evaluation Board V1.2";
+ compatible = "toradex,apalis-imx8-eval-v1.2",
+ "toradex,apalis-imx8",
+ "fsl,imx8qm";
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-apalis-eval.dts b/arch/arm64/boot/dts/freescale/imx8qm-apalis-eval.dts
index 5ab0921eb599..b0ebf6d05450 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-apalis-eval.dts
+++ b/arch/arm64/boot/dts/freescale/imx8qm-apalis-eval.dts
@@ -6,7 +6,7 @@
/dts-v1/;
#include "imx8qm-apalis.dtsi"
-#include "imx8-apalis-eval.dtsi"
+#include "imx8-apalis-eval-v1.1.dtsi"
/ {
model = "Toradex Apalis iMX8QM/QP on Apalis Evaluation Board";
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-apalis-v1.1-eval-v1.2.dts b/arch/arm64/boot/dts/freescale/imx8qm-apalis-v1.1-eval-v1.2.dts
new file mode 100644
index 000000000000..92c0ae0c0337
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8qm-apalis-v1.1-eval-v1.2.dts
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+/*
+ * Copyright 2024 Toradex
+ */
+
+/dts-v1/;
+
+#include "imx8qm-apalis-v1.1.dtsi"
+#include "imx8-apalis-eval-v1.2.dtsi"
+
+/ {
+ model = "Toradex Apalis iMX8QM V1.1 on Apalis Evaluation Board V1.2";
+ compatible = "toradex,apalis-imx8-v1.1-eval-v1.2",
+ "toradex,apalis-imx8-v1.1",
+ "fsl,imx8qm";
+};
+
+/* Apalis MMC1 */
+&usdhc2 {
+ /delete-property/ no-1-8-v;
+};
+
+/* Apalis SD1 */
+&usdhc3 {
+ /delete-property/ no-1-8-v;
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-apalis-v1.1-eval.dts b/arch/arm64/boot/dts/freescale/imx8qm-apalis-v1.1-eval.dts
index c8ff75831556..c998e542f93c 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-apalis-v1.1-eval.dts
+++ b/arch/arm64/boot/dts/freescale/imx8qm-apalis-v1.1-eval.dts
@@ -6,7 +6,7 @@
/dts-v1/;
#include "imx8qm-apalis-v1.1.dtsi"
-#include "imx8-apalis-eval.dtsi"
+#include "imx8-apalis-eval-v1.1.dtsi"
/ {
model = "Toradex Apalis iMX8QM V1.1 on Apalis Evaluation Board";
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
index 6d50838ad17d..77ac0efdfaad 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
+++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
@@ -41,6 +41,18 @@
};
};
+&i2c1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clock-frequency = <100000>;
+ pinctrl-names = "default", "gpio";
+ pinctrl-0 = <&pinctrl_i2c1>;
+ pinctrl-1 = <&pinctrl_i2c1_gpio>;
+ scl-gpios = <&lsio_gpio0 14 GPIO_ACTIVE_HIGH>;
+ sda-gpios = <&lsio_gpio0 15 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+};
+
&lpuart0 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_lpuart0>;
@@ -104,6 +116,20 @@
};
&iomuxc {
+ pinctrl_i2c1: i2c1grp {
+ fsl,pins = <
+ IMX8QM_GPT0_CLK_DMA_I2C1_SCL 0x0600004c
+ IMX8QM_GPT0_CAPTURE_DMA_I2C1_SDA 0x0600004c
+ >;
+ };
+
+ pinctrl_i2c1_gpio: i2c1gpio-grp {
+ fsl,pins = <
+ IMX8QM_GPT0_CLK_LSIO_GPIO0_IO14 0xc600004c
+ IMX8QM_GPT0_CAPTURE_LSIO_GPIO0_IO15 0xc600004c
+ >;
+ };
+
pinctrl_fec1: fec1grp {
fsl,pins = <
IMX8QM_ENET0_MDC_CONN_ENET0_MDC 0x06000020
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-conn.dtsi
index ec1639174e2e..545e175c88b3 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-conn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-conn.dtsi
@@ -6,20 +6,25 @@
&fec1 {
compatible = "fsl,imx8qm-fec", "fsl,imx6sx-fec";
+ iommus = <&smmu 0x12 0x7f80>;
};
&fec2 {
compatible = "fsl,imx8qm-fec", "fsl,imx6sx-fec";
+ iommus = <&smmu 0x12 0x7f80>;
};
&usdhc1 {
compatible = "fsl,imx8qm-usdhc", "fsl,imx8qxp-usdhc", "fsl,imx7d-usdhc";
+ iommus = <&smmu 0x11 0x7f80>;
};
&usdhc2 {
compatible = "fsl,imx8qm-usdhc", "fsl,imx8qxp-usdhc", "fsl,imx7d-usdhc";
+ iommus = <&smmu 0x11 0x7f80>;
};
&usdhc3 {
compatible = "fsl,imx8qm-usdhc", "fsl,imx8qxp-usdhc", "fsl,imx7d-usdhc";
+ iommus = <&smmu 0x11 0x7f80>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
index 69cb8676732e..11626fae5f97 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
@@ -17,6 +17,32 @@
power-domains = <&pd IMX_SC_R_UART_4>;
};
+ i2c4: i2c@5a840000 {
+ compatible = "fsl,imx8qm-lpi2c", "fsl,imx7ulp-lpi2c";
+ reg = <0x5a840000 0x4000>;
+ interrupts = <GIC_SPI 344 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&gic>;
+ clocks = <&i2c4_lpcg 0>,
+ <&i2c4_lpcg 1>;
+ clock-names = "per", "ipg";
+ assigned-clocks = <&clk IMX_SC_R_I2C_4 IMX_SC_PM_CLK_PER>;
+ assigned-clock-rates = <24000000>;
+ power-domains = <&pd IMX_SC_R_I2C_4>;
+ status = "disabled";
+ };
+
+ i2c4_lpcg: clock-controller@5ac40000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x5ac40000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&clk IMX_SC_R_I2C_4 IMX_SC_PM_CLK_PER>,
+ <&dma_ipg_clk>;
+ clock-indices = <IMX_LPCG_CLK_0>, <IMX_LPCG_CLK_4>;
+ clock-output-names = "i2c4_lpcg_clk",
+ "i2c4_lpcg_ipg_clk";
+ power-domains = <&pd IMX_SC_R_I2C_4>;
+ };
+
can1_lpcg: clock-controller@5ace0000 {
compatible = "fsl,imx8qxp-lpcg";
reg = <0x5ace0000 0x10000>;
@@ -96,15 +122,30 @@
status = "okay";
};
+/* It is eDMA1 in 8QM RM, but 8QXP it is eDMA3 */
&edma3 {
+ reg = <0x5a9f0000 0x210000>;
+ dma-channels = <10>;
+ interrupts = <GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 426 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 427 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 428 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 429 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 431 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 432 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 433 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&pd IMX_SC_R_DMA_1_CH0>,
- <&pd IMX_SC_R_DMA_1_CH1>,
- <&pd IMX_SC_R_DMA_1_CH2>,
- <&pd IMX_SC_R_DMA_1_CH3>,
- <&pd IMX_SC_R_DMA_1_CH4>,
- <&pd IMX_SC_R_DMA_1_CH5>,
- <&pd IMX_SC_R_DMA_1_CH6>,
- <&pd IMX_SC_R_DMA_1_CH7>;
+ <&pd IMX_SC_R_DMA_1_CH1>,
+ <&pd IMX_SC_R_DMA_1_CH2>,
+ <&pd IMX_SC_R_DMA_1_CH3>,
+ <&pd IMX_SC_R_DMA_1_CH4>,
+ <&pd IMX_SC_R_DMA_1_CH5>,
+ <&pd IMX_SC_R_DMA_1_CH6>,
+ <&pd IMX_SC_R_DMA_1_CH7>,
+ <&pd IMX_SC_R_DMA_1_CH8>,
+ <&pd IMX_SC_R_DMA_1_CH9>;
};
&flexcan1 {
diff --git a/arch/arm64/boot/dts/freescale/imx8qm.dtsi b/arch/arm64/boot/dts/freescale/imx8qm.dtsi
index 31744fc1ab08..b3d01677b70c 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm.dtsi
@@ -265,6 +265,47 @@
<GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>; /* Hypervisor */
};
+ smmu: iommu@51400000 {
+ compatible = "arm,mmu-500";
+ interrupt-parent = <&gic>;
+ reg = <0 0x51400000 0 0x40000>;
+ #global-interrupts = <1>;
+ #iommu-cells = <2>;
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
system-controller {
compatible = "fsl,imx-scu";
mbox-names = "tx0",
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-tqma8xqp-mba8xx.dts b/arch/arm64/boot/dts/freescale/imx8qxp-tqma8xqp-mba8xx.dts
new file mode 100644
index 000000000000..7d2e98bf8bc5
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8qxp-tqma8xqp-mba8xx.dts
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR X11)
+/*
+ * Copyright 2018-2023 TQ-Systems GmbH <linux@ew.tq-group.com>,
+ * D-82229 Seefeld, Germany.
+ * Author: Alexander Stein
+ */
+
+/dts-v1/;
+
+#include "imx8qxp-tqma8xqp.dtsi"
+#include "mba8xx.dtsi"
+
+/ {
+ model = "TQ-Systems i.MX8QXP TQMa8XQP on MBa8Xx";
+ compatible = "tq,imx8qxp-tqma8xqp-mba8xx", "tq,imx8qxp-tqma8xqp", "fsl,imx8qxp";
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-tqma8xqp.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp-tqma8xqp.dtsi
new file mode 100644
index 000000000000..b14040bf4ddd
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx8qxp-tqma8xqp.dtsi
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR X11)
+/*
+ * Copyright 2018-2023 TQ-Systems GmbH <linux@ew.tq-group.com>,
+ * D-82229 Seefeld, Germany.
+ * Author: Alexander Stein
+ */
+
+#include "imx8qxp.dtsi"
+#include "tqma8xx.dtsi"
+
+/ {
+ model = "TQ-Systems i.MX8QXP TQMa8XQP";
+ compatible = "tq,imx8qxp-tqma8xqp", "fsl,imx8qxp";
+};
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
index 958267b33340..10e16d84c0c3 100644
--- a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
@@ -260,6 +260,13 @@
<GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>; /* Hypervisor */
};
+ clk_dummy: clock-dummy {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ clock-output-names = "clk_dummy";
+ };
+
xtal32k: clock-xtal32k {
compatible = "fixed-clock";
#clock-cells = <0>;
@@ -310,6 +317,7 @@
/* sorted in register address */
#include "imx8-ss-img.dtsi"
#include "imx8-ss-vpu.dtsi"
+ #include "imx8-ss-gpu0.dtsi"
#include "imx8-ss-adma.dtsi"
#include "imx8-ss-conn.dtsi"
#include "imx8-ss-ddr.dtsi"
diff --git a/arch/arm64/boot/dts/freescale/imx8ulp-evk.dts b/arch/arm64/boot/dts/freescale/imx8ulp-evk.dts
index 69dd8e31027c..24bb253b938d 100644
--- a/arch/arm64/boot/dts/freescale/imx8ulp-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8ulp-evk.dts
@@ -37,7 +37,7 @@
no-map;
};
- rsc_table: rsc-table@1fff8000{
+ rsc_table: rsc-table@1fff8000 {
reg = <0 0x1fff8000 0 0x1000>;
no-map;
};
diff --git a/arch/arm64/boot/dts/freescale/imx93-phyboard-segin.dts b/arch/arm64/boot/dts/freescale/imx93-phyboard-segin.dts
new file mode 100644
index 000000000000..85fb188b057f
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx93-phyboard-segin.dts
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2023 PHYTEC Messtechnik GmbH
+ * Author: Wadim Egorov <w.egorov@phytec.de>, Christoph Stoidner <c.stoidner@phytec.de>
+ * Copyright (C) 2024 Mathieu Othacehe <m.othacehe@gmail.com>
+ *
+ * Product homepage:
+ * phyBOARD-Segin carrier board is reused for the i.MX93 design.
+ * https://www.phytec.eu/en/produkte/single-board-computer/phyboard-segin-imx6ul/
+ */
+/dts-v1/;
+
+#include "imx93-phycore-som.dtsi"
+
+/{
+ model = "PHYTEC phyBOARD-Segin-i.MX93";
+ compatible = "phytec,imx93-phyboard-segin", "phytec,imx93-phycore-som",
+ "fsl,imx93";
+
+ chosen {
+ stdout-path = &lpuart1;
+ };
+
+ reg_usdhc2_vmmc: regulator-usdhc2 {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio3 7 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_usdhc2_vmmc>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "VCC_SD";
+ };
+};
+
+/* Console */
+&lpuart1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart1>;
+ status = "okay";
+};
+
+/* eMMC */
+&usdhc1 {
+ no-1-8-v;
+};
+
+/* SD-Card */
+&usdhc2 {
+ pinctrl-names = "default", "state_100mhz", "state_200mhz";
+ pinctrl-0 = <&pinctrl_usdhc2_default>, <&pinctrl_usdhc2_cd>;
+ pinctrl-1 = <&pinctrl_usdhc2_100mhz>, <&pinctrl_usdhc2_cd>;
+ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_cd>;
+ bus-width = <4>;
+ cd-gpios = <&gpio3 0 GPIO_ACTIVE_LOW>;
+ no-mmc;
+ no-sdio;
+ vmmc-supply = <&reg_usdhc2_vmmc>;
+ status = "okay";
+};
+
+&iomuxc {
+ pinctrl_uart1: uart1grp {
+ fsl,pins = <
+ MX93_PAD_UART1_RXD__LPUART1_RX 0x31e
+ MX93_PAD_UART1_TXD__LPUART1_TX 0x30e
+ >;
+ };
+
+ pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp {
+ fsl,pins = <
+ MX93_PAD_SD2_RESET_B__GPIO3_IO07 0x31e
+ >;
+ };
+
+ pinctrl_usdhc2_cd: usdhc2cdgrp {
+ fsl,pins = <
+ MX93_PAD_SD2_CD_B__GPIO3_IO00 0x31e
+ >;
+ };
+
+ pinctrl_usdhc2_default: usdhc2grp {
+ fsl,pins = <
+ MX93_PAD_SD2_CLK__USDHC2_CLK 0x179e
+ MX93_PAD_SD2_CMD__USDHC2_CMD 0x139e
+ MX93_PAD_SD2_DATA0__USDHC2_DATA0 0x138e
+ MX93_PAD_SD2_DATA1__USDHC2_DATA1 0x138e
+ MX93_PAD_SD2_DATA2__USDHC2_DATA2 0x138e
+ MX93_PAD_SD2_DATA3__USDHC2_DATA3 0x139e
+ MX93_PAD_SD2_VSELECT__USDHC2_VSELECT 0x51e
+ >;
+ };
+
+ pinctrl_usdhc2_100mhz: usdhc2grp {
+ fsl,pins = <
+ MX93_PAD_SD2_CLK__USDHC2_CLK 0x179e
+ MX93_PAD_SD2_CMD__USDHC2_CMD 0x139e
+ MX93_PAD_SD2_DATA0__USDHC2_DATA0 0x138e
+ MX93_PAD_SD2_DATA1__USDHC2_DATA1 0x138e
+ MX93_PAD_SD2_DATA2__USDHC2_DATA2 0x139e
+ MX93_PAD_SD2_DATA3__USDHC2_DATA3 0x139e
+ MX93_PAD_SD2_VSELECT__USDHC2_VSELECT 0x51e
+ >;
+ };
+
+ pinctrl_usdhc2_200mhz: usdhc2grp {
+ fsl,pins = <
+ MX93_PAD_SD2_CLK__USDHC2_CLK 0x178e
+ MX93_PAD_SD2_CMD__USDHC2_CMD 0x139e
+ MX93_PAD_SD2_DATA0__USDHC2_DATA0 0x139e
+ MX93_PAD_SD2_DATA1__USDHC2_DATA1 0x139e
+ MX93_PAD_SD2_DATA2__USDHC2_DATA2 0x139e
+ MX93_PAD_SD2_DATA3__USDHC2_DATA3 0x139e
+ MX93_PAD_SD2_VSELECT__USDHC2_VSELECT 0x51e
+ >;
+ };
+};
diff --git a/arch/arm64/boot/dts/freescale/imx93-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx93-phycore-som.dtsi
new file mode 100644
index 000000000000..88c2657b50e6
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx93-phycore-som.dtsi
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2023 PHYTEC Messtechnik GmbH
+ * Author: Wadim Egorov <w.egorov@phytec.de>, Christoph Stoidner <c.stoidner@phytec.de>
+ * Copyright (C) 2024 Mathieu Othacehe <m.othacehe@gmail.com>
+ *
+ * Product homepage:
+ * https://www.phytec.eu/en/produkte/system-on-modules/phycore-imx-91-93/
+ */
+
+#include <dt-bindings/leds/common.h>
+
+#include "imx93.dtsi"
+
+/{
+ model = "PHYTEC phyCORE-i.MX93";
+ compatible = "phytec,imx93-phycore-som", "fsl,imx93";
+
+ reserved-memory {
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ linux,cma {
+ compatible = "shared-dma-pool";
+ reusable;
+ alloc-ranges = <0 0x80000000 0 0x40000000>;
+ size = <0 0x10000000>;
+ linux,cma-default;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_leds>;
+
+ led-0 {
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_HEARTBEAT;
+ gpios = <&gpio1 1 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "heartbeat";
+ };
+ };
+};
+
+/* Ethernet */
+&fec {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_fec>;
+ phy-mode = "rmii";
+ phy-handle = <&ethphy1>;
+ fsl,magic-packet;
+ assigned-clocks = <&clk IMX93_CLK_ENET_TIMER1>,
+ <&clk IMX93_CLK_ENET_REF>,
+ <&clk IMX93_CLK_ENET_REF_PHY>;
+ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>,
+ <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>,
+ <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>;
+ assigned-clock-rates = <100000000>, <50000000>, <50000000>;
+ status = "okay";
+
+ mdio: mdio {
+ clock-frequency = <5000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy1: ethernet-phy@1 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <1>;
+ };
+ };
+};
+
+/* eMMC */
+&usdhc1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usdhc1>;
+ bus-width = <8>;
+ non-removable;
+ status = "okay";
+};
+
+/* Watchdog */
+&wdog3 {
+ status = "okay";
+};
+
+&iomuxc {
+ pinctrl_fec: fecgrp {
+ fsl,pins = <
+ MX93_PAD_ENET2_MDC__ENET1_MDC 0x50e
+ MX93_PAD_ENET2_MDIO__ENET1_MDIO 0x502
+ MX93_PAD_ENET2_RD0__ENET1_RGMII_RD0 0x57e
+ MX93_PAD_ENET2_RD1__ENET1_RGMII_RD1 0x57e
+ MX93_PAD_ENET2_RXC__ENET1_RX_ER 0x5fe
+ MX93_PAD_ENET2_RX_CTL__ENET1_RGMII_RX_CTL 0x57e
+ MX93_PAD_ENET2_TD0__ENET1_RGMII_TD0 0x50e
+ MX93_PAD_ENET2_TD1__ENET1_RGMII_TD1 0x50e
+ MX93_PAD_ENET2_TX_CTL__ENET1_RGMII_TX_CTL 0x50e
+ MX93_PAD_ENET2_TD2__ENET1_TX_CLK 0x4000050e
+ >;
+ };
+
+ pinctrl_leds: ledsgrp {
+ fsl,pins = <
+ MX93_PAD_I2C1_SDA__GPIO1_IO01 0x31e
+ >;
+ };
+
+ pinctrl_usdhc1: usdhc1grp {
+ fsl,pins = <
+ MX93_PAD_SD1_CLK__USDHC1_CLK 0x179e
+ MX93_PAD_SD1_CMD__USDHC1_CMD 0x1386
+ MX93_PAD_SD1_DATA0__USDHC1_DATA0 0x138e
+ MX93_PAD_SD1_DATA1__USDHC1_DATA1 0x1386
+ MX93_PAD_SD1_DATA2__USDHC1_DATA2 0x138e
+ MX93_PAD_SD1_DATA3__USDHC1_DATA3 0x1386
+ MX93_PAD_SD1_DATA4__USDHC1_DATA4 0x1386
+ MX93_PAD_SD1_DATA5__USDHC1_DATA5 0x1386
+ MX93_PAD_SD1_DATA6__USDHC1_DATA6 0x1386
+ MX93_PAD_SD1_DATA7__USDHC1_DATA7 0x1386
+ MX93_PAD_SD1_STROBE__USDHC1_STROBE 0x179e
+ >;
+ };
+};
diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi
index f6e422dc2663..9d2328c185c9 100644
--- a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi
@@ -122,10 +122,8 @@
/* protectable identification memory (part of M24C64-D @57) */
eeprom@5f {
- compatible = "st,24c64", "atmel,24c64";
+ compatible = "atmel,24c64d-wl";
reg = <0x5f>;
- size = <32>;
- pagesize = <32>;
vcc-supply = <&reg_v3v3>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx93-var-som-symphony.dts b/arch/arm64/boot/dts/freescale/imx93-var-som-symphony.dts
new file mode 100644
index 000000000000..576d6982a4a0
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx93-var-som-symphony.dts
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright 2021 NXP
+ * Copyright 2023 Variscite Ltd.
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/leds/common.h>
+#include "imx93-var-som.dtsi"
+
+/{
+ model = "Variscite VAR-SOM-MX93 on Symphony evaluation board";
+ compatible = "variscite,var-som-mx93-symphony",
+ "variscite,var-som-mx93", "fsl,imx93";
+
+ aliases {
+ ethernet0 = &eqos;
+ ethernet1 = &fec;
+ };
+
+ chosen {
+ stdout-path = &lpuart1;
+ };
+
+ /*
+ * Needed only for Symphony <= v1.5
+ */
+ reg_fec_phy: regulator-fec-phy {
+ compatible = "regulator-fixed";
+ regulator-name = "fec-phy";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-enable-ramp-delay = <20000>;
+ gpio = <&pca9534 7 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ regulator-always-on;
+ };
+
+ reg_usdhc2_vmmc: regulator-usdhc2 {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_usdhc2_vmmc>;
+ regulator-name = "VSD_3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&gpio2 18 GPIO_ACTIVE_HIGH>;
+ off-on-delay-us = <20000>;
+ enable-active-high;
+ };
+
+ reg_vref_1v8: regulator-adc-vref {
+ compatible = "regulator-fixed";
+ regulator-name = "vref_1v8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ ethosu_mem: ethosu-region@88000000 {
+ compatible = "shared-dma-pool";
+ reusable;
+ reg = <0x0 0x88000000 0x0 0x8000000>;
+ };
+
+ vdev0vring0: vdev0vring0@87ee0000 {
+ reg = <0 0x87ee0000 0 0x8000>;
+ no-map;
+ };
+
+ vdev0vring1: vdev0vring1@87ee8000 {
+ reg = <0 0x87ee8000 0 0x8000>;
+ no-map;
+ };
+
+ vdev1vring0: vdev1vring0@87ef0000 {
+ reg = <0 0x87ef0000 0 0x8000>;
+ no-map;
+ };
+
+ vdev1vring1: vdev1vring1@87ef8000 {
+ reg = <0 0x87ef8000 0 0x8000>;
+ no-map;
+ };
+
+ rsc_table: rsc-table@2021f000 {
+ reg = <0 0x2021f000 0 0x1000>;
+ no-map;
+ };
+
+ vdevbuffer: vdevbuffer@87f00000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x87f00000 0 0x100000>;
+ no-map;
+ };
+
+ ele_reserved: ele-reserved@87de0000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x87de0000 0 0x100000>;
+ no-map;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ key-back {
+ label = "Back";
+ gpios = <&pca9534 1 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_BACK>;
+ };
+
+ key-home {
+ label = "Home";
+ gpios = <&pca9534 2 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_HOME>;
+ };
+
+ key-menu {
+ label = "Menu";
+ gpios = <&pca9534 3 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_MENU>;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-0 {
+ function = LED_FUNCTION_STATUS;
+ color = <LED_COLOR_ID_GREEN>;
+ gpios = <&pca9534 0 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "heartbeat";
+ };
+ };
+};
+
+/* Use external instead of internal RTC*/
+&bbnsm_rtc {
+ status = "disabled";
+};
+
+&eqos {
+ mdio {
+ ethphy1: ethernet-phy@5 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <5>;
+ qca,disable-smarteee;
+ eee-broken-1000t;
+ reset-gpios = <&pca9534 5 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <10000>;
+ reset-deassert-us = <20000>;
+ vddio-supply = <&vddio1>;
+
+ vddio1: vddio-regulator {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+ };
+ };
+};
+
+&fec {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_fec>;
+ phy-mode = "rgmii";
+ phy-handle = <&ethphy1>;
+ phy-supply = <&reg_fec_phy>;
+ status = "okay";
+};
+
+&flexcan1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_flexcan1>;
+ status = "okay";
+};
+
+&lpi2c1 {
+ clock-frequency = <400000>;
+ pinctrl-names = "default", "sleep", "gpio";
+ pinctrl-0 = <&pinctrl_lpi2c1>;
+ pinctrl-1 = <&pinctrl_lpi2c1_gpio>;
+ pinctrl-2 = <&pinctrl_lpi2c1_gpio>;
+ scl-gpios = <&gpio1 0 GPIO_ACTIVE_HIGH>;
+ sda-gpios = <&gpio1 1 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+
+ /* DS1337 RTC module */
+ rtc@68 {
+ compatible = "dallas,ds1337";
+ reg = <0x68>;
+ };
+};
+
+&lpi2c5 {
+ clock-frequency = <400000>;
+ pinctrl-names = "default", "sleep", "gpio";
+ pinctrl-0 = <&pinctrl_lpi2c5>;
+ pinctrl-1 = <&pinctrl_lpi2c5_gpio>;
+ pinctrl-2 = <&pinctrl_lpi2c5_gpio>;
+ scl-gpios = <&gpio2 23 GPIO_ACTIVE_HIGH>;
+ sda-gpios = <&gpio2 22 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+
+ pca9534: gpio@20 {
+ compatible = "nxp,pca9534";
+ reg = <0x20>;
+ gpio-controller;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pca9534>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <26 IRQ_TYPE_EDGE_FALLING>;
+ #gpio-cells = <2>;
+ wakeup-source;
+ };
+};
+
+/* Console */
+&lpuart1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart1>;
+ status = "okay";
+};
+
+/* J18.7, J18.9 */
+&lpuart6 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart6>;
+ status = "okay";
+};
+
+/* SD */
+&usdhc2 {
+ pinctrl-names = "default", "state_100mhz", "state_200mhz";
+ pinctrl-0 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
+ pinctrl-1 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
+ pinctrl-2 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
+ cd-gpios = <&gpio3 00 GPIO_ACTIVE_LOW>;
+ vmmc-supply = <&reg_usdhc2_vmmc>;
+ bus-width = <4>;
+ status = "okay";
+ no-sdio;
+ no-mmc;
+};
+
+/* Watchdog */
+&wdog3 {
+ status = "okay";
+};
+
+&iomuxc {
+ pinctrl_fec: fecgrp {
+ fsl,pins = <
+ MX93_PAD_ENET2_RD0__ENET1_RGMII_RD0 0x57e
+ MX93_PAD_ENET2_RD1__ENET1_RGMII_RD1 0x57e
+ MX93_PAD_ENET2_RD2__ENET1_RGMII_RD2 0x57e
+ MX93_PAD_ENET2_RD3__ENET1_RGMII_RD3 0x57e
+ MX93_PAD_ENET2_RXC__ENET1_RGMII_RXC 0x5fe
+ MX93_PAD_ENET2_RX_CTL__ENET1_RGMII_RX_CTL 0x57e
+ MX93_PAD_ENET2_TD0__ENET1_RGMII_TD0 0x57e
+ MX93_PAD_ENET2_TD1__ENET1_RGMII_TD1 0x57e
+ MX93_PAD_ENET2_TD2__ENET1_RGMII_TD2 0x57e
+ MX93_PAD_ENET2_TD3__ENET1_RGMII_TD3 0x57e
+ MX93_PAD_ENET2_TXC__ENET1_RGMII_TXC 0x5fe
+ MX93_PAD_ENET2_TX_CTL__ENET1_RGMII_TX_CTL 0x57e
+ >;
+ };
+
+ pinctrl_flexcan1: flexcan1grp {
+ fsl,pins = <
+ MX93_PAD_PDM_CLK__CAN1_TX 0x139e
+ MX93_PAD_PDM_BIT_STREAM0__CAN1_RX 0x139e
+ >;
+ };
+
+ pinctrl_lpi2c1: lpi2c1grp {
+ fsl,pins = <
+ MX93_PAD_I2C1_SCL__LPI2C1_SCL 0x40000b9e
+ MX93_PAD_I2C1_SDA__LPI2C1_SDA 0x40000b9e
+ >;
+ };
+
+ pinctrl_lpi2c1_gpio: lpi2c1gpiogrp {
+ fsl,pins = <
+ MX93_PAD_I2C1_SCL__GPIO1_IO00 0x31e
+ MX93_PAD_I2C1_SDA__GPIO1_IO01 0x31e
+ >;
+ };
+
+ pinctrl_lpi2c5: lpi2c5grp {
+ fsl,pins = <
+ MX93_PAD_GPIO_IO23__LPI2C5_SCL 0x40000b9e
+ MX93_PAD_GPIO_IO22__LPI2C5_SDA 0x40000b9e
+ >;
+ };
+
+ pinctrl_lpi2c5_gpio: lpi2c5gpiogrp {
+ fsl,pins = <
+ MX93_PAD_GPIO_IO23__GPIO2_IO23 0x31e
+ MX93_PAD_GPIO_IO22__GPIO2_IO22 0x31e
+ >;
+ };
+
+ pinctrl_pca9534: pca9534grp {
+ fsl,pins = <
+ MX93_PAD_CCM_CLKO1__GPIO3_IO26 0x31e
+ >;
+ };
+
+ pinctrl_uart1: uart1grp {
+ fsl,pins = <
+ MX93_PAD_UART1_RXD__LPUART1_RX 0x31e
+ MX93_PAD_UART1_TXD__LPUART1_TX 0x31e
+ >;
+ };
+
+ pinctrl_uart6: uart6grp {
+ fsl,pins = <
+ MX93_PAD_GPIO_IO05__LPUART6_RX 0x31e
+ MX93_PAD_GPIO_IO04__LPUART6_TX 0x31e
+ >;
+ };
+
+ pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp {
+ fsl,pins = <
+ MX93_PAD_GPIO_IO18__GPIO2_IO18 0x31e
+ >;
+ };
+
+ pinctrl_usdhc2: usdhc2grp {
+ fsl,pins = <
+ MX93_PAD_SD2_CLK__USDHC2_CLK 0x15fe
+ MX93_PAD_SD2_CMD__USDHC2_CMD 0x13fe
+ MX93_PAD_SD2_DATA0__USDHC2_DATA0 0x13fe
+ MX93_PAD_SD2_DATA1__USDHC2_DATA1 0x13fe
+ MX93_PAD_SD2_DATA2__USDHC2_DATA2 0x13fe
+ MX93_PAD_SD2_DATA3__USDHC2_DATA3 0x13fe
+ MX93_PAD_SD2_VSELECT__USDHC2_VSELECT 0x51e
+ >;
+ };
+
+ pinctrl_usdhc2_gpio: usdhc2gpiogrp {
+ fsl,pins = <
+ MX93_PAD_SD2_CD_B__GPIO3_IO00 0x31e
+ >;
+ };
+};
diff --git a/arch/arm64/boot/dts/freescale/imx93-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx93-var-som.dtsi
new file mode 100644
index 000000000000..783938245e4f
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx93-var-som.dtsi
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright 2022 NXP
+ * Copyright 2023 Variscite Ltd.
+ */
+
+/dts-v1/;
+
+#include "imx93.dtsi"
+
+/{
+ model = "Variscite VAR-SOM-MX93 module";
+ compatible = "variscite,var-som-mx93", "fsl,imx93";
+
+ mmc_pwrseq: mmc-pwrseq {
+ compatible = "mmc-pwrseq-simple";
+ post-power-on-delay-ms = <100>;
+ power-off-delay-us = <10000>;
+ reset-gpios = <&gpio4 14 GPIO_ACTIVE_LOW>, /* WIFI_RESET */
+ <&gpio3 7 GPIO_ACTIVE_LOW>; /* WIFI_PWR_EN */
+ };
+
+ reg_eqos_phy: regulator-eqos-phy {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_eqos_phy>;
+ regulator-name = "eth_phy_pwr";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&gpio1 7 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ startup-delay-us = <100000>;
+ regulator-always-on;
+ };
+};
+
+&eqos {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_eqos>;
+ phy-mode = "rgmii";
+ phy-handle = <&ethphy0>;
+ status = "okay";
+
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clock-frequency = <1000000>;
+
+ ethphy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ eee-broken-1000t;
+ };
+ };
+};
+
+/* eMMC */
+&usdhc1 {
+ pinctrl-names = "default", "state_100mhz", "state_200mhz";
+ pinctrl-0 = <&pinctrl_usdhc1>;
+ pinctrl-1 = <&pinctrl_usdhc1>;
+ pinctrl-2 = <&pinctrl_usdhc1>;
+ bus-width = <8>;
+ non-removable;
+ status = "okay";
+};
+
+&iomuxc {
+ pinctrl_eqos: eqosgrp {
+ fsl,pins = <
+ MX93_PAD_ENET1_MDC__ENET_QOS_MDC 0x57e
+ MX93_PAD_ENET1_MDIO__ENET_QOS_MDIO 0x57e
+ MX93_PAD_ENET1_RD0__ENET_QOS_RGMII_RD0 0x57e
+ MX93_PAD_ENET1_RD1__ENET_QOS_RGMII_RD1 0x57e
+ MX93_PAD_ENET1_RD2__ENET_QOS_RGMII_RD2 0x57e
+ MX93_PAD_ENET1_RD3__ENET_QOS_RGMII_RD3 0x57e
+ MX93_PAD_ENET1_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x5fe
+ MX93_PAD_ENET1_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x57e
+ MX93_PAD_ENET1_TD0__ENET_QOS_RGMII_TD0 0x57e
+ MX93_PAD_ENET1_TD1__ENET_QOS_RGMII_TD1 0x57e
+ MX93_PAD_ENET1_TD2__ENET_QOS_RGMII_TD2 0x57e
+ MX93_PAD_ENET1_TD3__ENET_QOS_RGMII_TD3 0x57e
+ MX93_PAD_ENET1_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x5fe
+ MX93_PAD_ENET1_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x57e
+ >;
+ };
+
+ pinctrl_reg_eqos_phy: regeqosgrp {
+ fsl,pins = <
+ MX93_PAD_UART2_TXD__GPIO1_IO07 0x51e
+ >;
+ };
+
+ pinctrl_usdhc1: usdhc1grp {
+ fsl,pins = <
+ MX93_PAD_SD1_CLK__USDHC1_CLK 0x15fe
+ MX93_PAD_SD1_CMD__USDHC1_CMD 0x13fe
+ MX93_PAD_SD1_DATA0__USDHC1_DATA0 0x13fe
+ MX93_PAD_SD1_DATA1__USDHC1_DATA1 0x13fe
+ MX93_PAD_SD1_DATA2__USDHC1_DATA2 0x13fe
+ MX93_PAD_SD1_DATA3__USDHC1_DATA3 0x13fe
+ MX93_PAD_SD1_DATA4__USDHC1_DATA4 0x13fe
+ MX93_PAD_SD1_DATA5__USDHC1_DATA5 0x13fe
+ MX93_PAD_SD1_DATA6__USDHC1_DATA6 0x13fe
+ MX93_PAD_SD1_DATA7__USDHC1_DATA7 0x13fe
+ MX93_PAD_SD1_STROBE__USDHC1_STROBE 0x15fe
+ >;
+ };
+};
diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi
index 8f2e7c42ad6e..601c94e1fac8 100644
--- a/arch/arm64/boot/dts/freescale/imx93.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93.dtsi
@@ -294,7 +294,7 @@
status = "disabled";
};
- i3c1: i3c-master@44330000 {
+ i3c1: i3c@44330000 {
compatible = "silvaco,i3c-master-v1";
reg = <0x44330000 0x10000>;
interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
@@ -671,7 +671,7 @@
status = "disabled";
};
- i3c2: i3c-master@42520000 {
+ i3c2: i3c@42520000 {
compatible = "silvaco,i3c-master-v1";
reg = <0x42520000 0x10000>;
interrupts = <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/boot/dts/freescale/mba8mx.dtsi b/arch/arm64/boot/dts/freescale/mba8mx.dtsi
index e2bc53b8d39a..427467df42bf 100644
--- a/arch/arm64/boot/dts/freescale/mba8mx.dtsi
+++ b/arch/arm64/boot/dts/freescale/mba8mx.dtsi
@@ -29,6 +29,12 @@
stdout-path = &uart3;
};
+ clk_xtal25: clk-xtal25 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <25000000>;
+ };
+
gpio-keys {
compatible = "gpio-keys";
pinctrl-names = "default";
@@ -100,12 +106,6 @@
};
};
- pcie0_refclk: pcie0-refclk {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <100000000>;
- };
-
reg_12v: regulator-12v {
compatible = "regulator-fixed";
regulator-name = "MBA8MX_12V";
@@ -219,7 +219,7 @@
line-name = "BOOT_CFG_OE#";
};
- rst-usb-hub-hog {
+ rst_usb_hub_hog: rst-usb-hub-hog {
gpio-hog;
gpios = <13 0>;
output-high;
@@ -264,6 +264,13 @@
pagesize = <16>;
vcc-supply = <&reg_vcc_3v3>;
};
+
+ pcieclk: clk@68 {
+ compatible = "renesas,9fgv0441";
+ reg = <0x68>;
+ clocks = <&clk_xtal25>;
+ #clock-cells = <1>;
+ };
};
&i2c3 {
diff --git a/arch/arm64/boot/dts/freescale/mba8xx.dtsi b/arch/arm64/boot/dts/freescale/mba8xx.dtsi
new file mode 100644
index 000000000000..276d1683b03b
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/mba8xx.dtsi
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR X11)
+/*
+ * Copyright 2018-2023 TQ-Systems GmbH <linux@ew.tq-group.com>,
+ * D-82229 Seefeld, Germany.
+ * Author: Alexander Stein
+ */
+
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/net/ti-dp83867.h>
+
+/ {
+ adc {
+ compatible = "iio-hwmon";
+ io-channels = <&adc0 0>, <&adc0 1>, <&adc0 2>, <&adc0 3>;
+ };
+
+ aliases {
+ rtc0 = &pcf85063;
+ rtc1 = &rtc;
+ };
+
+ backlight_lvds: backlight-lvds {
+ compatible = "pwm-backlight";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_bl_lvds>;
+ pwms = <&adma_pwm 0 5000000 0>;
+ brightness-levels = <0 4 8 16 32 64 128 255>;
+ default-brightness-level = <7>;
+ power-supply = <&reg_12v0>;
+ enable-gpios = <&lsio_gpio1 30 GPIO_ACTIVE_HIGH>;
+ status = "disabled";
+ };
+
+ chosen {
+ stdout-path = &lpuart1;
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_gpiobuttons>;
+ autorepeat;
+
+ switch-a {
+ label = "switcha";
+ linux,code = <BTN_0>;
+ gpios = <&lsio_gpio1 13 GPIO_ACTIVE_LOW>;
+ };
+
+ switch-b {
+ label = "switchb";
+ linux,code = <BTN_1>;
+ gpios = <&lsio_gpio1 14 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ led1 {
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_STATUS;
+ gpios = <&expander 1 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "default-on";
+ };
+
+ led2 {
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_HEARTBEAT;
+ gpios = <&expander 2 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "heartbeat";
+ };
+ };
+
+ /* TODO LVDS panels */
+
+ reg_12v0: regulator-12v0 {
+ compatible = "regulator-fixed";
+ regulator-name = "V_12V";
+ regulator-min-microvolt = <12000000>;
+ regulator-max-microvolt = <12000000>;
+ gpio = <&expander 6 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ };
+
+ reg_pcie_1v5: regulator-pcie-1v5 {
+ compatible = "regulator-fixed";
+ regulator-name = "MBA8XX_PCIE_1V5";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_pcie_1v5>;
+ regulator-min-microvolt = <1500000>;
+ regulator-max-microvolt = <1500000>;
+ gpio = <&lsio_gpio0 30 GPIO_ACTIVE_HIGH>;
+ startup-delay-us = <1000>;
+ enable-active-high;
+ };
+
+ reg_pcie_3v3: regulator-pcie-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "MBA8XX_PCIE_3V3";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_reg_pcie_3v3>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&lsio_gpio0 31 GPIO_ACTIVE_HIGH>;
+ startup-delay-us = <1000>;
+ enable-active-high;
+ regulator-always-on;
+ };
+
+ reg_3v3_mb: regulator-usdhc2-vmmc {
+ compatible = "regulator-fixed";
+ regulator-name = "V_3V3_MB";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ sound {
+ compatible = "fsl,imx-audio-tlv320aic32x4";
+ model = "tqm-tlv320aic32";
+ audio-codec = <&tlv320aic3x04>;
+ ssi-controller = <&sai1>;
+ };
+};
+
+&adc0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_adc0>;
+ vref-supply = <&reg_1v8>;
+ #io-channel-cells = <1>;
+ status = "okay";
+};
+
+&adma_pwm {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_admapwm>;
+};
+
+&fec1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_fec1>;
+ phy-mode = "rgmii-id";
+ phy-handle = <&ethphy0>;
+ status = "okay";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethphy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_ethphy0>;
+ ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>;
+ ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>;
+ ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
+ ti,dp83867-rxctrl-strap-quirk;
+ ti,clk-output-sel = <DP83867_CLK_O_SEL_OFF>;
+ reset-gpios = <&lsio_gpio3 2 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <500000>;
+ reset-deassert-us = <50000>;
+ enet-phy-lane-no-swap;
+ interrupt-parent = <&lsio_gpio3>;
+ interrupts = <0 IRQ_TYPE_EDGE_FALLING>;
+ };
+
+ ethphy3: ethernet-phy@3 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <3>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_ethphy3>;
+ ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>;
+ ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>;
+ ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
+ ti,dp83867-rxctrl-strap-quirk;
+ ti,clk-output-sel = <DP83867_CLK_O_SEL_OFF>;
+ reset-gpios = <&lsio_gpio3 3 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <500000>;
+ reset-deassert-us = <50000>;
+ enet-phy-lane-no-swap;
+ interrupt-parent = <&lsio_gpio3>;
+ interrupts = <1 IRQ_TYPE_EDGE_FALLING>;
+ };
+ };
+};
+
+&fec2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_fec2>;
+ phy-mode = "rgmii-id";
+ phy-handle = <&ethphy3>;
+ status = "okay";
+};
+
+&flexcan1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_can0>;
+ xceiver-supply = <&reg_3v3>;
+ status = "okay";
+};
+
+&flexcan2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_can1>;
+ xceiver-supply = <&reg_3v3>;
+ status = "okay";
+};
+
+&i2c1 {
+ tlv320aic3x04: audio-codec@18 {
+ compatible = "ti,tlv320aic32x4";
+ reg = <0x18>;
+ clocks = <&mclkout0_lpcg 0>;
+ clock-names = "mclk";
+ iov-supply = <&reg_1v8>;
+ ldoin-supply = <&reg_3v3>;
+ };
+
+ se97b_1c: temperature-sensor@1c {
+ compatible = "nxp,se97b", "jedec,jc-42.4-temp";
+ reg = <0x1c>;
+ };
+
+ at24c02_54: eeprom@54 {
+ compatible = "nxp,se97b", "atmel,24c02";
+ reg = <0x54>;
+ pagesize = <16>;
+ vcc-supply = <&reg_3v3>;
+ };
+
+ expander: gpio@70 {
+ compatible = "nxp,pca9538";
+ reg = <0x70>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pca9538>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-parent = <&lsio_gpio4>;
+ interrupts = <19 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ vcc-supply = <&reg_1v8>;
+
+ gpio-line-names = "", "LED_A",
+ "LED_B", "",
+ "DSI_EN", "USB_RESET#",
+ "V_12V_EN", "PCIE_DIS#";
+ };
+};
+
+&i2c2 {
+ clock-frequency = <100000>;
+ pinctrl-names = "default", "gpio";
+ pinctrl-0 = <&pinctrl_lpi2c2>;
+ pinctrl-1 = <&pinctrl_lpi2c2gpio>;
+ scl-gpios = <&lsio_gpio1 31 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ sda-gpios = <&lsio_gpio2 0 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ status = "okay";
+};
+
+/* TODO LDB */
+
+&lpspi1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_spi1>;
+ cs-gpios = <&lsio_gpio0 27 GPIO_ACTIVE_LOW>, <&lsio_gpio0 29 GPIO_ACTIVE_LOW>;
+ status = "okay";
+};
+
+&lpspi2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_spi2>;
+ cs-gpios = <&lsio_gpio1 0 GPIO_ACTIVE_LOW>;
+ status = "okay";
+};
+
+&lpspi3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_spi3>;
+ num-cs = <2>;
+ cs-gpios = <&lsio_gpio0 16 GPIO_ACTIVE_LOW>;
+ status = "okay";
+};
+
+&lpuart1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_lpuart1>;
+ status = "okay";
+};
+
+&lpuart3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_lpuart3>;
+ status = "okay";
+};
+
+&lsio_gpio3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_lsgpio3>;
+ gpio-line-names = "", "", "", "",
+ "", "", "", "",
+ "", "", "", "",
+ "", "", "", "X4_15",
+ "", "", "", "",
+ "", "", "", "",
+ "", "", "", "",
+ "", "", "", "";
+};
+
+/* TODO: Mini-PCIe */
+
+&sai1 {
+ assigned-clocks = <&clk IMX_SC_R_AUDIO_PLL_0 IMX_SC_PM_CLK_PLL>,
+ <&clk IMX_SC_R_AUDIO_PLL_0 IMX_SC_PM_CLK_SLV_BUS>,
+ <&clk IMX_SC_R_AUDIO_PLL_0 IMX_SC_PM_CLK_MST_BUS>,
+ <&sai1_lpcg 0>;
+ assigned-clock-rates = <786432000>, <49152000>, <12288000>, <49152000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sai1>;
+ status = "okay";
+};
+
+&usbotg1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usbotg1>;
+ srp-disable;
+ hnp-disable;
+ adp-disable;
+ power-active-high;
+ over-current-active-low;
+ dr_mode = "otg";
+ status = "okay";
+};
+
+&usbotg3 {
+ status = "okay";
+};
+
+&usbotg3_cdns3 {
+ dr_mode = "host";
+ status = "okay";
+};
+
+&usbphy1 {
+ status = "okay";
+};
+
+&usb3_phy {
+ status = "okay";
+};
+
+&usdhc2 {
+ pinctrl-names = "default", "state_100mhz", "state_200mhz";
+ pinctrl-0 = <&pinctrl_usdhc2>, <&pinctrl_usdhc2_gpio>;
+ pinctrl-1 = <&pinctrl_usdhc2_100mhz>, <&pinctrl_usdhc2_gpio>;
+ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>;
+ bus-width = <4>;
+ cd-gpios = <&lsio_gpio4 22 GPIO_ACTIVE_LOW>;
+ wp-gpios = <&lsio_gpio4 21 GPIO_ACTIVE_HIGH>;
+ vmmc-supply = <&reg_3v3_mb>;
+ no-1-8-v;
+ no-sdio;
+ no-mmc;
+ status = "okay";
+};
+
+&iomuxc {
+ pinctrl_adc0: adc0grp {
+ fsl,pins = <IMX8QXP_ADC_IN0_ADMA_ADC_IN0 0x02000060>,
+ <IMX8QXP_ADC_IN1_ADMA_ADC_IN1 0x02000060>,
+ <IMX8QXP_ADC_IN2_ADMA_ADC_IN2 0x02000060>,
+ <IMX8QXP_ADC_IN3_ADMA_ADC_IN3 0x02000060>;
+ };
+
+ pinctrl_admapwm: admapwmgrp {
+ fsl,pins = <IMX8QXP_SPI0_CS1_ADMA_LCD_PWM0_OUT 0x00000021>;
+ };
+
+ pinctrl_bl_lvds: bllvdsgrp {
+ fsl,pins = <IMX8QXP_MIPI_DSI1_I2C0_SDA_LSIO_GPIO1_IO30 0x00000021>;
+ };
+
+ pinctrl_can0: can0grp {
+ fsl,pins = <IMX8QXP_UART0_RX_ADMA_FLEXCAN0_RX 0x00000021>,
+ <IMX8QXP_UART0_TX_ADMA_FLEXCAN0_TX 0x00000021>;
+ };
+
+ pinctrl_can1: can1grp {
+ fsl,pins = <IMX8QXP_UART2_RX_ADMA_FLEXCAN1_RX 0x00000021>,
+ <IMX8QXP_UART2_TX_ADMA_FLEXCAN1_TX 0x00000021>;
+ };
+
+ pinctrl_ethphy0: ethphy0grp {
+ fsl,pins = <IMX8QXP_CSI_EN_LSIO_GPIO3_IO02 0x00000040>,
+ <IMX8QXP_CSI_PCLK_LSIO_GPIO3_IO00 0x00000040>;
+ };
+
+ pinctrl_ethphy3: ethphy3grp {
+ fsl,pins = <IMX8QXP_CSI_RESET_LSIO_GPIO3_IO03 0x00000040>,
+ <IMX8QXP_CSI_MCLK_LSIO_GPIO3_IO01 0x00000040>;
+ };
+
+ pinctrl_fec1: fec1grp {
+ fsl,pins = <IMX8QXP_ENET0_MDC_CONN_ENET0_MDC 0x06000041>,
+ <IMX8QXP_ENET0_MDIO_CONN_ENET0_MDIO 0x06000041>,
+ <IMX8QXP_ENET0_RGMII_TX_CTL_CONN_ENET0_RGMII_TX_CTL 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_TXC_CONN_ENET0_RGMII_TXC 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_TXD0_CONN_ENET0_RGMII_TXD0 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_TXD1_CONN_ENET0_RGMII_TXD1 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_TXD2_CONN_ENET0_RGMII_TXD2 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_TXD3_CONN_ENET0_RGMII_TXD3 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_RX_CTL_CONN_ENET0_RGMII_RX_CTL 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_RXC_CONN_ENET0_RGMII_RXC 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_RXD0_CONN_ENET0_RGMII_RXD0 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_RXD1_CONN_ENET0_RGMII_RXD1 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_RXD2_CONN_ENET0_RGMII_RXD2 0x00000040>,
+ <IMX8QXP_ENET0_RGMII_RXD3_CONN_ENET0_RGMII_RXD3 0x00000040>;
+ };
+
+ pinctrl_fec2: fec2grp {
+ fsl,pins = <IMX8QXP_ESAI0_SCKR_CONN_ENET1_RGMII_TX_CTL 0x00000040>,
+ <IMX8QXP_ESAI0_FSR_CONN_ENET1_RGMII_TXC 0x00000040>,
+ <IMX8QXP_ESAI0_TX4_RX1_CONN_ENET1_RGMII_TXD0 0x00000040>,
+ <IMX8QXP_ESAI0_TX5_RX0_CONN_ENET1_RGMII_TXD1 0x00000040>,
+ <IMX8QXP_ESAI0_FST_CONN_ENET1_RGMII_TXD2 0x00000040>,
+ <IMX8QXP_ESAI0_SCKT_CONN_ENET1_RGMII_TXD3 0x00000040>,
+ <IMX8QXP_ESAI0_TX0_CONN_ENET1_RGMII_RXC 0x00000040>,
+ <IMX8QXP_SPDIF0_TX_CONN_ENET1_RGMII_RX_CTL 0x00000040>,
+ <IMX8QXP_SPDIF0_RX_CONN_ENET1_RGMII_RXD0 0x00000040>,
+ <IMX8QXP_ESAI0_TX3_RX2_CONN_ENET1_RGMII_RXD1 0x00000040>,
+ <IMX8QXP_ESAI0_TX2_RX3_CONN_ENET1_RGMII_RXD2 0x00000040>,
+ <IMX8QXP_ESAI0_TX1_CONN_ENET1_RGMII_RXD3 0x00000040>;
+ };
+
+ pinctrl_gpiobuttons: gpiobuttonsgrp {
+ fsl,pins = <IMX8QXP_ADC_IN5_LSIO_GPIO1_IO13 0x00000020>,
+ <IMX8QXP_ADC_IN4_LSIO_GPIO1_IO14 0x00000020>;
+ };
+
+ pinctrl_lpi2c2: lpi2c2grp {
+ fsl,pins = <IMX8QXP_MIPI_DSI1_GPIO0_00_ADMA_I2C2_SCL 0x06000021>,
+ <IMX8QXP_MIPI_DSI1_GPIO0_01_ADMA_I2C2_SDA 0x06000021>;
+ };
+
+ pinctrl_lpi2c2gpio: lpi2c2gpiogrp {
+ fsl,pins = <IMX8QXP_MIPI_DSI1_GPIO0_00_LSIO_GPIO1_IO31 0x06000021>,
+ <IMX8QXP_MIPI_DSI1_GPIO0_01_LSIO_GPIO2_IO00 0x06000021>;
+ };
+
+ pinctrl_lpuart1: lpuart1grp {
+ fsl,pins = <IMX8QXP_UART1_RX_ADMA_UART1_RX 0x06000020>,
+ <IMX8QXP_UART1_TX_ADMA_UART1_TX 0x06000020>;
+ };
+
+ pinctrl_lpuart3: lpuart3grp {
+ fsl,pins = <IMX8QXP_FLEXCAN2_RX_ADMA_UART3_RX 0x06000020>,
+ <IMX8QXP_FLEXCAN2_TX_ADMA_UART3_TX 0x06000020>;
+ };
+
+ pinctrl_lsgpio3: lsgpio3grp {
+ fsl,pins = <IMX8QXP_QSPI0A_SS1_B_LSIO_GPIO3_IO15 0x00000021>;
+ };
+
+ pinctrl_pca9538: pca9538grp {
+ fsl,pins = <IMX8QXP_USDHC1_RESET_B_LSIO_GPIO4_IO19 0x00000020>;
+ };
+
+ pinctrl_pcieb: pcieagrp {
+ fsl,pins = <IMX8QXP_PCIE_CTRL0_PERST_B_LSIO_GPIO4_IO00 0x06000041>,
+ <IMX8QXP_PCIE_CTRL0_CLKREQ_B_LSIO_GPIO4_IO01 0x06000041>,
+ <IMX8QXP_PCIE_CTRL0_WAKE_B_LSIO_GPIO4_IO02 0x04000041>;
+ };
+
+ pinctrl_reg_pcie_1v5: regpcie1v5grp {
+ fsl,pins = <IMX8QXP_SAI1_RXC_LSIO_GPIO0_IO30 0x00000021>;
+ };
+
+ pinctrl_reg_pcie_3v3: regpcie3v3grp {
+ fsl,pins = <IMX8QXP_SAI1_RXFS_LSIO_GPIO0_IO31 0x00000021>;
+ };
+
+ pinctrl_sai1: sai1grp {
+ fsl,pins = <IMX8QXP_MCLK_OUT0_ADMA_ACM_MCLK_OUT0 0x06000041>,
+ <IMX8QXP_FLEXCAN0_RX_ADMA_SAI1_TXC 0x06000041>,
+ <IMX8QXP_FLEXCAN0_TX_ADMA_SAI1_TXFS 0x06000041>,
+ <IMX8QXP_FLEXCAN1_RX_ADMA_SAI1_TXD 0x06000041>,
+ <IMX8QXP_FLEXCAN1_TX_ADMA_SAI1_RXD 0x06000041>;
+ };
+
+ pinctrl_spi1: spi1grp {
+ fsl,pins = <IMX8QXP_SAI0_TXC_ADMA_SPI1_SDI 0x00000041>,
+ <IMX8QXP_SAI0_TXD_ADMA_SPI1_SDO 0x00000041>,
+ <IMX8QXP_SAI0_TXFS_ADMA_SPI1_SCK 0x00000041>,
+ <IMX8QXP_SAI0_RXD_LSIO_GPIO0_IO27 0x00000021>,
+ <IMX8QXP_SAI1_RXD_LSIO_GPIO0_IO29 0x00000021>;
+ };
+
+ pinctrl_spi2: spi2grp {
+ fsl,pins = <IMX8QXP_SPI2_SCK_ADMA_SPI2_SCK 0x00000041>,
+ <IMX8QXP_SPI2_SDI_ADMA_SPI2_SDI 0x00000041>,
+ <IMX8QXP_SPI2_SDO_ADMA_SPI2_SDO 0x00000041>,
+ <IMX8QXP_SPI2_CS0_LSIO_GPIO1_IO00 0x00000021>;
+ };
+
+ pinctrl_spi3: spi3grp {
+ fsl,pins = <IMX8QXP_SPI3_SCK_ADMA_SPI3_SCK 0x00000041>,
+ <IMX8QXP_SPI3_SDI_ADMA_SPI3_SDI 0x00000041>,
+ <IMX8QXP_SPI3_SDO_ADMA_SPI3_SDO 0x00000041>,
+ <IMX8QXP_SPI3_CS0_LSIO_GPIO0_IO16 0x00000021>,
+ <IMX8QXP_SPI3_CS1_ADMA_SPI3_CS1 0x00000021>;
+ };
+
+ pinctrl_usbotg1: usbotg1grp {
+ fsl,pins = <IMX8QXP_USB_SS3_TC0_CONN_USB_OTG1_PWR 0x00000021>,
+ <IMX8QXP_USB_SS3_TC2_CONN_USB_OTG1_OC 0x00000021>;
+ };
+
+ pinctrl_usdhc2_gpio: usdhc2gpiogrp {
+ fsl,pins = <IMX8QXP_USDHC1_WP_LSIO_GPIO4_IO21 0x00000021>,
+ <IMX8QXP_USDHC1_CD_B_LSIO_GPIO4_IO22 0x00000021>;
+ };
+
+ pinctrl_usdhc2: usdhc2grp {
+ fsl,pins = <IMX8QXP_USDHC1_CLK_CONN_USDHC1_CLK 0x06000041>,
+ <IMX8QXP_USDHC1_CMD_CONN_USDHC1_CMD 0x00000021>,
+ <IMX8QXP_USDHC1_DATA0_CONN_USDHC1_DATA0 0x00000021>,
+ <IMX8QXP_USDHC1_DATA1_CONN_USDHC1_DATA1 0x00000021>,
+ <IMX8QXP_USDHC1_DATA2_CONN_USDHC1_DATA2 0x00000021>,
+ <IMX8QXP_USDHC1_DATA3_CONN_USDHC1_DATA3 0x00000021>,
+ <IMX8QXP_USDHC1_VSELECT_CONN_USDHC1_VSELECT 0x00000021>;
+ };
+
+ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp {
+ fsl,pins = <IMX8QXP_USDHC1_CLK_CONN_USDHC1_CLK 0x06000040>,
+ <IMX8QXP_USDHC1_CMD_CONN_USDHC1_CMD 0x00000020>,
+ <IMX8QXP_USDHC1_DATA0_CONN_USDHC1_DATA0 0x00000020>,
+ <IMX8QXP_USDHC1_DATA1_CONN_USDHC1_DATA1 0x00000020>,
+ <IMX8QXP_USDHC1_DATA2_CONN_USDHC1_DATA2 0x00000020>,
+ <IMX8QXP_USDHC1_DATA3_CONN_USDHC1_DATA3 0x00000020>,
+ <IMX8QXP_USDHC1_VSELECT_CONN_USDHC1_VSELECT 0x00000020>;
+ };
+
+ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp {
+ fsl,pins = <IMX8QXP_USDHC1_CLK_CONN_USDHC1_CLK 0x06000040>,
+ <IMX8QXP_USDHC1_CMD_CONN_USDHC1_CMD 0x00000020>,
+ <IMX8QXP_USDHC1_DATA0_CONN_USDHC1_DATA0 0x00000020>,
+ <IMX8QXP_USDHC1_DATA1_CONN_USDHC1_DATA1 0x00000020>,
+ <IMX8QXP_USDHC1_DATA2_CONN_USDHC1_DATA2 0x00000020>,
+ <IMX8QXP_USDHC1_DATA3_CONN_USDHC1_DATA3 0x00000020>,
+ <IMX8QXP_USDHC1_VSELECT_CONN_USDHC1_VSELECT 0x00000020>;
+ };
+};
diff --git a/arch/arm64/boot/dts/freescale/tqma8xx.dtsi b/arch/arm64/boot/dts/freescale/tqma8xx.dtsi
new file mode 100644
index 000000000000..d98469a7c47c
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/tqma8xx.dtsi
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR X11)
+/*
+ * Copyright 2018-2023 TQ-Systems GmbH <linux@ew.tq-group.com>,
+ * D-82229 Seefeld, Germany.
+ * Author: Alexander Stein
+ */
+
+/ {
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x00000000 0x80000000 0 0x40000000>;
+ };
+
+ reg_1v8: regulator-1v8 {
+ compatible = "regulator-fixed";
+ regulator-name = "V_1V8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ reg_3v3: regulator-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "V_3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ /*
+ * global autoconfigured region for contiguous allocations
+ * must not exceed memory size and region
+ */
+ linux,cma {
+ compatible = "shared-dma-pool";
+ reusable;
+ size = <0 0x20000000>;
+ alloc-ranges = <0 0x96000000 0 0x30000000>;
+ linux,cma-default;
+ };
+ };
+};
+
+/* TQMa8Xx only uses industrial grade, reduce trip points accordingly */
+&cpu_alert0 {
+ temperature = <95000>;
+};
+
+&cpu_crit0 {
+ temperature = <100000>;
+};
+/* end of temperature grade adjustments */
+
+&flexspi0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_flexspi0>;
+ status = "okay";
+
+ flash0: flash@0 {
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "jedec,spi-nor";
+ spi-max-frequency = <66000000>;
+ spi-tx-bus-width = <1>;
+ spi-rx-bus-width = <4>;
+ };
+};
+
+/* TODO GPU */
+
+&i2c1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clock-frequency = <100000>;
+ pinctrl-names = "default", "gpio";
+ pinctrl-0 = <&pinctrl_lpi2c1>;
+ pinctrl-1 = <&pinctrl_lpi2c1gpio>;
+ scl-gpios = <&lsio_gpio1 27 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ sda-gpios = <&lsio_gpio1 28 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ status = "okay";
+
+ se97: temperature-sensor@1b {
+ compatible = "nxp,se97b", "jedec,jc-42.4-temp";
+ reg = <0x1b>;
+ };
+
+ pcf85063: rtc@51 {
+ compatible = "nxp,pcf85063a";
+ reg = <0x51>;
+ quartz-load-femtofarads = <7000>;
+ };
+
+ at24c02: eeprom@53 {
+ compatible = "nxp,se97b", "atmel,24c02";
+ reg = <0x53>;
+ pagesize = <16>;
+ read-only;
+ vcc-supply = <&reg_3v3>;
+ };
+
+ m24c64: eeprom@57 {
+ compatible = "atmel,24c64";
+ reg = <0x57>;
+ pagesize = <32>;
+ vcc-supply = <&reg_3v3>;
+ };
+};
+
+&mu_m0 {
+ status = "okay";
+};
+
+&mu1_m0 {
+ status = "okay";
+};
+
+&thermal_zones {
+ pmic_thermal: pmic-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <2000>;
+ thermal-sensors = <&tsens IMX_SC_R_PMIC_0>;
+
+ trips {
+ pmic_alert0: trip0 {
+ temperature = <110000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ pmic_crit0: trip1 {
+ temperature = <125000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&pmic_alert0>;
+ cooling-device =
+ <&A35_0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&A35_1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&A35_2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&A35_3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+};
+
+&usdhc1 {
+ pinctrl-names = "default", "state_100mhz", "state_200mhz";
+ pinctrl-0 = <&pinctrl_usdhc1>;
+ pinctrl-1 = <&pinctrl_usdhc1_100mhz>;
+ pinctrl-2 = <&pinctrl_usdhc1_200mhz>;
+ vqmmc-supply = <&reg_1v8>;
+ vmmc-supply = <&reg_3v3>;
+ bus-width = <8>;
+ non-removable;
+ no-sdio;
+ no-sd;
+ status = "okay";
+};
+
+&vpu {
+ compatible = "nxp,imx8qxp-vpu";
+ status = "okay";
+};
+
+&vpu_core0 {
+ memory-region = <&decoder_boot>, <&decoder_rpc>;
+ status = "okay";
+};
+
+&vpu_core1 {
+ memory-region = <&encoder_boot>, <&encoder_rpc>;
+ status = "okay";
+};
+
+&iomuxc {
+ pinctrl_flexspi0: flexspi0grp {
+ fsl,pins = <
+ IMX8QXP_QSPI0A_DATA0_LSIO_QSPI0A_DATA0 0x0600004d
+ IMX8QXP_QSPI0A_DATA1_LSIO_QSPI0A_DATA1 0x0600004d
+ IMX8QXP_QSPI0A_DATA2_LSIO_QSPI0A_DATA2 0x0600004d
+ IMX8QXP_QSPI0A_DATA3_LSIO_QSPI0A_DATA3 0x0600004d
+ IMX8QXP_QSPI0A_DQS_LSIO_QSPI0A_DQS 0x0600004d
+ IMX8QXP_QSPI0A_SS0_B_LSIO_QSPI0A_SS0_B 0x0600004d
+ IMX8QXP_QSPI0A_SCLK_LSIO_QSPI0A_SCLK 0x0600004d
+ IMX8QXP_QSPI0B_SCLK_LSIO_QSPI0B_SCLK 0x0600004d
+ IMX8QXP_QSPI0B_DATA0_LSIO_QSPI0B_DATA0 0x0600004d
+ IMX8QXP_QSPI0B_DATA1_LSIO_QSPI0B_DATA1 0x0600004d
+ IMX8QXP_QSPI0B_DATA2_LSIO_QSPI0B_DATA2 0x0600004d
+ IMX8QXP_QSPI0B_DATA3_LSIO_QSPI0B_DATA3 0x0600004d
+ IMX8QXP_QSPI0B_DQS_LSIO_QSPI0B_DQS 0x0600004d
+ IMX8QXP_QSPI0B_SS0_B_LSIO_QSPI0B_SS0_B 0x0600004d
+ IMX8QXP_QSPI0B_SS1_B_LSIO_QSPI0B_SS1_B 0x0600004d
+ >;
+ };
+
+ pinctrl_lpi2c1: lpi2c1grp {
+ fsl,pins = <
+ IMX8QXP_MIPI_DSI0_GPIO0_00_ADMA_I2C1_SCL 0x06000021
+ IMX8QXP_MIPI_DSI0_GPIO0_01_ADMA_I2C1_SDA 0x06000021
+ >;
+ };
+
+ pinctrl_lpi2c1gpio: lpi2c1gpiogrp {
+ fsl,pins = <
+ IMX8QXP_MIPI_DSI0_GPIO0_00_LSIO_GPIO1_IO27 0x06000021
+ IMX8QXP_MIPI_DSI0_GPIO0_01_LSIO_GPIO1_IO28 0x06000021
+ >;
+ };
+
+ pinctrl_usdhc1: usdhc1grp {
+ fsl,pins = <
+ IMX8QXP_EMMC0_CLK_CONN_EMMC0_CLK 0x06000041
+ IMX8QXP_EMMC0_CMD_CONN_EMMC0_CMD 0x00000021
+ IMX8QXP_EMMC0_DATA0_CONN_EMMC0_DATA0 0x00000021
+ IMX8QXP_EMMC0_DATA1_CONN_EMMC0_DATA1 0x00000021
+ IMX8QXP_EMMC0_DATA2_CONN_EMMC0_DATA2 0x00000021
+ IMX8QXP_EMMC0_DATA3_CONN_EMMC0_DATA3 0x00000021
+ IMX8QXP_EMMC0_DATA4_CONN_EMMC0_DATA4 0x00000021
+ IMX8QXP_EMMC0_DATA5_CONN_EMMC0_DATA5 0x00000021
+ IMX8QXP_EMMC0_DATA6_CONN_EMMC0_DATA6 0x00000021
+ IMX8QXP_EMMC0_DATA7_CONN_EMMC0_DATA7 0x00000021
+ IMX8QXP_EMMC0_STROBE_CONN_EMMC0_STROBE 0x00000041
+ >;
+ };
+
+ pinctrl_usdhc1_100mhz: usdhc1-100mhzgrp {
+ fsl,pins = <
+ IMX8QXP_EMMC0_CLK_CONN_EMMC0_CLK 0x06000040
+ IMX8QXP_EMMC0_CMD_CONN_EMMC0_CMD 0x00000020
+ IMX8QXP_EMMC0_DATA0_CONN_EMMC0_DATA0 0x00000020
+ IMX8QXP_EMMC0_DATA1_CONN_EMMC0_DATA1 0x00000020
+ IMX8QXP_EMMC0_DATA2_CONN_EMMC0_DATA2 0x00000020
+ IMX8QXP_EMMC0_DATA3_CONN_EMMC0_DATA3 0x00000020
+ IMX8QXP_EMMC0_DATA4_CONN_EMMC0_DATA4 0x00000020
+ IMX8QXP_EMMC0_DATA5_CONN_EMMC0_DATA5 0x00000020
+ IMX8QXP_EMMC0_DATA6_CONN_EMMC0_DATA6 0x00000020
+ IMX8QXP_EMMC0_DATA7_CONN_EMMC0_DATA7 0x00000020
+ IMX8QXP_EMMC0_STROBE_CONN_EMMC0_STROBE 0x00000040
+ >;
+ };
+
+ pinctrl_usdhc1_200mhz: usdhc1-200mhzgrp {
+ fsl,pins = <
+ IMX8QXP_EMMC0_CLK_CONN_EMMC0_CLK 0x06000040
+ IMX8QXP_EMMC0_CMD_CONN_EMMC0_CMD 0x00000020
+ IMX8QXP_EMMC0_DATA0_CONN_EMMC0_DATA0 0x00000020
+ IMX8QXP_EMMC0_DATA1_CONN_EMMC0_DATA1 0x00000020
+ IMX8QXP_EMMC0_DATA2_CONN_EMMC0_DATA2 0x00000020
+ IMX8QXP_EMMC0_DATA3_CONN_EMMC0_DATA3 0x00000020
+ IMX8QXP_EMMC0_DATA4_CONN_EMMC0_DATA4 0x00000020
+ IMX8QXP_EMMC0_DATA5_CONN_EMMC0_DATA5 0x00000020
+ IMX8QXP_EMMC0_DATA6_CONN_EMMC0_DATA6 0x00000020
+ IMX8QXP_EMMC0_DATA7_CONN_EMMC0_DATA7 0x00000020
+ IMX8QXP_EMMC0_STROBE_CONN_EMMC0_STROBE 0x00000040
+ >;
+ };
+};
diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi
index d66d425e45b7..1162978329c1 100644
--- a/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi
+++ b/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi
@@ -202,7 +202,7 @@
status = "disabled";
};
- i3c0: i3c-master@10da0000 {
+ i3c0: i3c@10da0000 {
compatible = "snps,dw-i3c-master-1.00a";
reg = <0x10da0000 0x1000>;
#address-cells = <3>;
@@ -212,7 +212,7 @@
status = "disabled";
};
- i3c1: i3c-master@10da1000 {
+ i3c1: i3c@10da1000 {
compatible = "snps,dw-i3c-master-1.00a";
reg = <0x10da1000 0x1000>;
#address-cells = <3>;
diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi
index 48ec4ebec0a8..b864ffa74ea8 100644
--- a/arch/arm64/boot/dts/lg/lg1312.dtsi
+++ b/arch/arm64/boot/dts/lg/lg1312.dtsi
@@ -126,7 +126,6 @@
amba {
#address-cells = <2>;
#size-cells = <1>;
- #interrupt-cells = <3>;
compatible = "simple-bus";
interrupt-parent = <&gic>;
diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi
index 3869460aa5dc..996fb39bb50c 100644
--- a/arch/arm64/boot/dts/lg/lg1313.dtsi
+++ b/arch/arm64/boot/dts/lg/lg1313.dtsi
@@ -126,7 +126,6 @@
amba {
#address-cells = <2>;
#size-cells = <1>;
- #interrupt-cells = <3>;
compatible = "simple-bus";
interrupt-parent = <&gic>;
diff --git a/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi b/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi
index b5e042b8e929..5591939e057b 100644
--- a/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi
@@ -77,7 +77,6 @@
#address-cells = <2>;
#size-cells = <2>;
ranges;
- dma-ranges;
internal-regs@7f000000 {
#address-cells = <1>;
@@ -204,6 +203,30 @@
};
};
+ mmc_dma: bus@80500000 {
+ compatible = "simple-bus";
+ ranges;
+ #address-cells = <0x2>;
+ #size-cells = <0x2>;
+ reg = <0x0 0x80500000 0x0 0x100000>;
+ dma-ranges = <0x0 0x0 0x2 0x0 0x0 0x80000000>;
+ dma-coherent;
+
+ sdhci: mmc@805c0000 {
+ compatible = "marvell,ac5-sdhci",
+ "marvell,armada-ap806-sdhci";
+ reg = <0x0 0x805c0000 0x0 0x1000>;
+ interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&emmc_clock>, <&cnm_clock>;
+ clock-names = "core", "axi";
+ bus-width = <8>;
+ non-removable;
+ mmc-ddr-1_8v;
+ mmc-hs200-1_8v;
+ mmc-hs400-1_8v;
+ };
+ };
+
/*
* Dedicated section for devices behind 32bit controllers so we
* can configure specific DMA mapping for them
@@ -335,5 +358,11 @@
#clock-cells = <0>;
clock-frequency = <400000000>;
};
+
+ emmc_clock: emmc-clock {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <400000000>;
+ };
};
};
diff --git a/arch/arm64/boot/dts/marvell/ac5-98dx35xx-rd.dts b/arch/arm64/boot/dts/marvell/ac5-98dx35xx-rd.dts
index f0ebdb84eec9..0c973d7a215a 100644
--- a/arch/arm64/boot/dts/marvell/ac5-98dx35xx-rd.dts
+++ b/arch/arm64/boot/dts/marvell/ac5-98dx35xx-rd.dts
@@ -99,3 +99,7 @@
};
};
};
+
+&sdhci {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index e300145ad1a6..1cc3fa1c354d 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -431,14 +431,14 @@
crypto: crypto@90000 {
compatible = "inside-secure,safexcel-eip97ies";
reg = <0x90000 0x20000>;
- interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "mem", "ring0", "ring1",
- "ring2", "ring3", "eip";
+ <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "ring0", "ring1", "ring2",
+ "ring3", "eip", "mem";
clocks = <&nb_periph_clk 15>;
};
diff --git a/arch/arm64/boot/dts/marvell/armada-ap807.dtsi b/arch/arm64/boot/dts/marvell/armada-ap807.dtsi
index 4a23f65d475f..a3328d05fc94 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap807.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap807.dtsi
@@ -33,3 +33,6 @@
"marvell,armada-ap806-sdhci"; /* Backward compatibility */
};
+&ap_thermal {
+ compatible = "marvell,armada-ap807-thermal";
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
index 2c920e22cec2..7ec7c789d87e 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
@@ -138,7 +138,6 @@
odmi: odmi@300000 {
compatible = "marvell,odmi-controller";
- interrupt-controller;
msi-controller;
marvell,odmi-frames = <4>;
reg = <0x300000 0x4000>,
diff --git a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi
index 4ec1aae0a3a9..7e595ac80043 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi
@@ -511,14 +511,14 @@
CP11X_LABEL(crypto): crypto@800000 {
compatible = "inside-secure,safexcel-eip197b";
reg = <0x800000 0x200000>;
- interrupts = <87 IRQ_TYPE_LEVEL_HIGH>,
- <88 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts = <88 IRQ_TYPE_LEVEL_HIGH>,
<89 IRQ_TYPE_LEVEL_HIGH>,
<90 IRQ_TYPE_LEVEL_HIGH>,
<91 IRQ_TYPE_LEVEL_HIGH>,
- <92 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "mem", "ring0", "ring1",
- "ring2", "ring3", "eip";
+ <92 IRQ_TYPE_LEVEL_HIGH>,
+ <87 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "ring0", "ring1", "ring2", "ring3",
+ "eip", "mem";
clock-names = "core", "reg";
clocks = <&CP11X_LABEL(clk) 1 26>,
<&CP11X_LABEL(clk) 1 17>;
diff --git a/arch/arm64/boot/dts/mediatek/Makefile b/arch/arm64/boot/dts/mediatek/Makefile
index 1e6f91731e92..37b4ca3a87c9 100644
--- a/arch/arm64/boot/dts/mediatek/Makefile
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -8,6 +8,8 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt6797-evb.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt6797-x20-dev.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7622-rfb1.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7622-bananapi-bpi-r64.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7981b-xiaomi-ax3000t.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-acelink-ew-7886cax.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-emmc.dtbo
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-nand.dtbo
@@ -15,6 +17,7 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-nor.dtbo
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd.dtbo
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-rfb.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986b-rfb.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8167-pumpkin.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-elm.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-elm-hana.dtb
@@ -49,6 +52,16 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt8183-kukui-kodama-sku32.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8183-kukui-krane-sku0.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8183-kukui-krane-sku176.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8183-pumpkin.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-corsola-magneton-sku393216.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-corsola-magneton-sku393217.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-corsola-magneton-sku393218.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-corsola-rusty-sku196608.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-corsola-steelix-sku131072.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-corsola-steelix-sku131073.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-corsola-tentacool-sku327681.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-corsola-tentacool-sku327683.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-corsola-tentacruel-sku262144.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-corsola-tentacruel-sku262148.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8186-evb.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8188-evb.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8192-asurada-hayato-r1.dtb
@@ -63,4 +76,5 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt8195-demo.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8195-evb.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8365-evk.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-genio-1200-evk.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8516-pumpkin.dtb
diff --git a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts
index fffdb7bbf889..0c38f7b51763 100644
--- a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts
@@ -43,12 +43,12 @@
extcon_usb: extcon_iddig {
compatible = "linux,extcon-usb-gpio";
- id-gpio = <&pio 12 GPIO_ACTIVE_HIGH>;
+ id-gpios = <&pio 12 GPIO_ACTIVE_HIGH>;
};
extcon_usb1: extcon_iddig1 {
compatible = "linux,extcon-usb-gpio";
- id-gpio = <&pio 14 GPIO_ACTIVE_HIGH>;
+ id-gpios = <&pio 14 GPIO_ACTIVE_HIGH>;
};
usb_p0_vbus: regulator-usb-p0-vbus {
diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
index ed1a9d319415..6d218caa198c 100644
--- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
@@ -261,7 +261,7 @@
#clock-cells = <1>;
};
- syscfg_pctl_a: syscfg_pctl_a@10005000 {
+ syscfg_pctl_a: syscon@10005000 {
compatible = "mediatek,mt2712-pctl-a-syscfg", "syscon";
reg = <0 0x10005000 0 0x1000>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt6797.dtsi b/arch/arm64/boot/dts/mediatek/mt6797.dtsi
index c3677d77e0a4..0e9d11b4585b 100644
--- a/arch/arm64/boot/dts/mediatek/mt6797.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt6797.dtsi
@@ -117,7 +117,7 @@
#clock-cells = <1>;
};
- infrasys: infracfg_ao@10001000 {
+ infrasys: syscon@10001000 {
compatible = "mediatek,mt6797-infracfg", "syscon";
reg = <0 0x10001000 0 0x1000>;
#clock-cells = <1>;
@@ -452,19 +452,19 @@
#clock-cells = <1>;
};
- imgsys: imgsys_config@15000000 {
+ imgsys: syscon@15000000 {
compatible = "mediatek,mt6797-imgsys", "syscon";
reg = <0 0x15000000 0 0x1000>;
#clock-cells = <1>;
};
- vdecsys: vdec_gcon@16000000 {
+ vdecsys: syscon@16000000 {
compatible = "mediatek,mt6797-vdecsys", "syscon";
reg = <0 0x16000000 0 0x10000>;
#clock-cells = <1>;
};
- vencsys: venc_gcon@17000000 {
+ vencsys: syscon@17000000 {
compatible = "mediatek,mt6797-vencsys", "syscon";
reg = <0 0x17000000 0 0x1000>;
#clock-cells = <1>;
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
index a1f42048dcc7..224bb289660c 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
@@ -75,6 +75,7 @@
memory@40000000 {
reg = <0 0x40000000 0 0x40000000>;
+ device_type = "memory";
};
reg_1p8v: regulator-1p8v {
@@ -185,6 +186,18 @@
label = "lan3";
};
+ port@5 {
+ reg = <5>;
+ ethernet = <&gmac1>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
port@6 {
reg = <6>;
label = "cpu";
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
index 2dc1bdc74e21..41629769bdc8 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
@@ -57,6 +57,7 @@
memory@40000000 {
reg = <0 0x40000000 0 0x20000000>;
+ device_type = "memory";
};
reg_1p8v: regulator-1p8v {
@@ -117,6 +118,18 @@
};
};
+ gmac1: mac@1 {
+ compatible = "mediatek,eth-mac";
+ reg = <1>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
mdio-bus {
#address-cells = <1>;
#size-cells = <0>;
@@ -155,6 +168,18 @@
label = "wan";
};
+ port@5 {
+ reg = <5>;
+ ethernet = <&gmac1>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
port@6 {
reg = <6>;
label = "cpu";
diff --git a/arch/arm64/boot/dts/mediatek/mt7981b-xiaomi-ax3000t.dts b/arch/arm64/boot/dts/mediatek/mt7981b-xiaomi-ax3000t.dts
new file mode 100644
index 000000000000..a314c3e05e50
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt7981b-xiaomi-ax3000t.dts
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+/dts-v1/;
+
+#include "mt7981b.dtsi"
+
+/ {
+ compatible = "xiaomi,ax3000t", "mediatek,mt7981b";
+ model = "Xiaomi AX3000T";
+
+ memory@40000000 {
+ reg = <0 0x40000000 0 0x10000000>;
+ device_type = "memory";
+ };
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt7981b.dtsi b/arch/arm64/boot/dts/mediatek/mt7981b.dtsi
new file mode 100644
index 000000000000..4feff3d1c5f4
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt7981b.dtsi
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+#include <dt-bindings/clock/mediatek,mt7981-clk.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+ compatible = "mediatek,mt7981b";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "arm,cortex-a53";
+ reg = <0x0>;
+ device_type = "cpu";
+ enable-method = "psci";
+ };
+
+ cpu@1 {
+ compatible = "arm,cortex-a53";
+ reg = <0x1>;
+ device_type = "cpu";
+ enable-method = "psci";
+ };
+ };
+
+ oscillator-40m {
+ compatible = "fixed-clock";
+ clock-frequency = <40000000>;
+ clock-output-names = "clkxtal";
+ #clock-cells = <0>;
+ };
+
+ psci {
+ compatible = "arm,psci-1.0";
+ method = "smc";
+ };
+
+ soc {
+ compatible = "simple-bus";
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ gic: interrupt-controller@c000000 {
+ compatible = "arm,gic-v3";
+ reg = <0 0x0c000000 0 0x40000>, /* GICD */
+ <0 0x0c080000 0 0x200000>; /* GICR */
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
+ };
+
+ infracfg: clock-controller@10001000 {
+ compatible = "mediatek,mt7981-infracfg", "syscon";
+ reg = <0 0x10001000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ clock-controller@1001b000 {
+ compatible = "mediatek,mt7981-topckgen", "syscon";
+ reg = <0 0x1001b000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ clock-controller@1001e000 {
+ compatible = "mediatek,mt7981-apmixedsys";
+ reg = <0 0x1001e000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ pwm@10048000 {
+ compatible = "mediatek,mt7981-pwm";
+ reg = <0 0x10048000 0 0x1000>;
+ clocks = <&infracfg CLK_INFRA_PWM_STA>,
+ <&infracfg CLK_INFRA_PWM_HCK>,
+ <&infracfg CLK_INFRA_PWM1_CK>,
+ <&infracfg CLK_INFRA_PWM2_CK>,
+ <&infracfg CLK_INFRA_PWM3_CK>;
+ clock-names = "top", "main", "pwm1", "pwm2", "pwm3";
+ #pwm-cells = <2>;
+ };
+
+ clock-controller@15000000 {
+ compatible = "mediatek,mt7981-ethsys", "syscon";
+ reg = <0 0x15000000 0 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 14 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 11 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>;
+ };
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-acelink-ew-7886cax.dts b/arch/arm64/boot/dts/mediatek/mt7986a-acelink-ew-7886cax.dts
new file mode 100644
index 000000000000..08b3b0827436
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt7986a-acelink-ew-7886cax.dts
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+/dts-v1/;
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/leds/common.h>
+
+#include "mt7986a.dtsi"
+
+/ {
+ compatible = "acelink,ew-7886cax", "mediatek,mt7986a";
+ model = "Acelink EW-7886CAX";
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ memory@40000000 {
+ reg = <0 0x40000000 0 0x20000000>;
+ device_type = "memory";
+ };
+
+ keys {
+ compatible = "gpio-keys";
+
+ key-restart {
+ label = "Reset";
+ gpios = <&pio 7 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_RESTART>;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-0 {
+ function = LED_FUNCTION_STATUS;
+ color = <LED_COLOR_ID_RED>;
+ gpios = <&pio 18 GPIO_ACTIVE_HIGH>;
+ };
+
+ led-1 {
+ function = LED_FUNCTION_STATUS;
+ color = <LED_COLOR_ID_GREEN>;
+ gpios = <&pio 19 GPIO_ACTIVE_HIGH>;
+ };
+
+ led-2 {
+ function = LED_FUNCTION_STATUS;
+ color = <LED_COLOR_ID_BLUE>;
+ gpios = <&pio 20 GPIO_ACTIVE_HIGH>;
+ };
+ };
+};
+
+&crypto {
+ status = "okay";
+};
+
+&eth {
+ status = "okay";
+
+ mac@1 {
+ compatible = "mediatek,eth-mac";
+ reg = <1>;
+ phy-mode = "2500base-x";
+ phy-handle = <&phy6>;
+ nvmem-cells = <&macaddr>;
+ nvmem-cell-names = "mac-address";
+ };
+
+ mdio-bus {
+ reset-gpios = <&pio 6 GPIO_ACTIVE_LOW>;
+ reset-delay-us = <50000>;
+ reset-post-delay-us = <20000>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy6: phy@6 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <6>;
+ };
+ };
+};
+
+&pcie_phy {
+ status = "okay";
+};
+
+&spi0 {
+ status = "okay";
+
+ flash@0 {
+ compatible = "spi-nand";
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ spi-max-frequency = <52000000>;
+ spi-rx-bus-width = <4>;
+ spi-tx-bus-width = <4>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ reg = <0x0 0x100000>;
+ label = "bootloader";
+ read-only;
+ };
+
+ partition@100000 {
+ reg = <0x100000 0x80000>;
+ label = "u-boot-env";
+ };
+
+ partition@180000 {
+ compatible = "nvmem-cells";
+ reg = <0x180000 0x200000>;
+ label = "factory";
+ read-only;
+
+ nvmem-layout {
+ compatible = "fixed-layout";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ eeprom: eeprom@0 {
+ reg = <0x0 0x1000>;
+ };
+
+ macaddr: macaddr@4 {
+ reg = <0x4 0x6>;
+ };
+ };
+ };
+
+ partition@380000 {
+ reg = <0x380000 0x200000>;
+ label = "fip";
+ };
+
+ partition@580000 {
+ reg = <0x580000 0x4000000>;
+ label = "ubi";
+ };
+ };
+ };
+};
+
+&trng {
+ status = "okay";
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&watchdog {
+ status = "okay";
+};
+
+&wifi {
+ nvmem-cells = <&eeprom>;
+ nvmem-cell-names = "eeprom";
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3-nand.dtso b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3-nand.dtso
index 543c13385d6e..7b97c5c91bd0 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3-nand.dtso
+++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3-nand.dtso
@@ -15,7 +15,7 @@
__overlay__ {
#address-cells = <1>;
#size-cells = <0>;
- spi_nand: spi_nand@0 {
+ spi_nand: flash@0 {
compatible = "spi-nand";
reg = <0>;
spi-max-frequency = <10000000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
index d06d4af43cbf..e04b1c0c0ebb 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
@@ -43,7 +43,7 @@
#cooling-cells = <2>;
/* cooling level (0, 1, 2) - pwm inverted */
cooling-levels = <255 96 0>;
- pwms = <&pwm 0 10000 0>;
+ pwms = <&pwm 0 10000>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-rfb.dts b/arch/arm64/boot/dts/mediatek/mt7986a-rfb.dts
index 3ef371ca254e..5d8e3d3f6c20 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a-rfb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7986a-rfb.dts
@@ -65,6 +65,18 @@
};
};
+ gmac1: mac@1 {
+ compatible = "mediatek,eth-mac";
+ reg = <1>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
mdio: mdio-bus {
#address-cells = <1>;
#size-cells = <0>;
@@ -237,12 +249,13 @@
pinctrl-0 = <&spi_flash_pins>;
cs-gpios = <0>, <0>;
status = "okay";
- spi_nand: spi_nand@0 {
+
+ spi_nand: flash@0 {
compatible = "spi-nand";
reg = <0>;
spi-max-frequency = <10000000>;
- spi-tx-buswidth = <4>;
- spi-rx-buswidth = <4>;
+ spi-tx-bus-width = <4>;
+ spi-rx-bus-width = <4>;
};
};
@@ -287,6 +300,18 @@
label = "lan4";
};
+ port@5 {
+ reg = <5>;
+ ethernet = <&gmac1>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
port@6 {
reg = <6>;
label = "cpu";
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
index fc751e049953..b3f416b9a7a4 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
@@ -16,49 +16,49 @@
#address-cells = <2>;
#size-cells = <2>;
- clk40m: oscillator-40m {
- compatible = "fixed-clock";
- clock-frequency = <40000000>;
- #clock-cells = <0>;
- clock-output-names = "clkxtal";
- };
-
cpus {
#address-cells = <1>;
#size-cells = <0>;
cpu0: cpu@0 {
- device_type = "cpu";
compatible = "arm,cortex-a53";
- enable-method = "psci";
reg = <0x0>;
+ device_type = "cpu";
+ enable-method = "psci";
#cooling-cells = <2>;
};
cpu1: cpu@1 {
- device_type = "cpu";
compatible = "arm,cortex-a53";
- enable-method = "psci";
reg = <0x1>;
+ device_type = "cpu";
+ enable-method = "psci";
#cooling-cells = <2>;
};
cpu2: cpu@2 {
- device_type = "cpu";
compatible = "arm,cortex-a53";
- enable-method = "psci";
reg = <0x2>;
+ device_type = "cpu";
+ enable-method = "psci";
#cooling-cells = <2>;
};
cpu3: cpu@3 {
- device_type = "cpu";
- enable-method = "psci";
compatible = "arm,cortex-a53";
reg = <0x3>;
+ device_type = "cpu";
+ enable-method = "psci";
#cooling-cells = <2>;
};
};
+ clk40m: oscillator-40m {
+ compatible = "fixed-clock";
+ clock-frequency = <40000000>;
+ #clock-cells = <0>;
+ clock-output-names = "clkxtal";
+ };
+
psci {
compatible = "arm,psci-0.2";
method = "smc";
@@ -121,38 +121,30 @@
};
- timer {
- compatible = "arm,armv8-timer";
- interrupt-parent = <&gic>;
- interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_LOW>,
- <GIC_PPI 14 IRQ_TYPE_LEVEL_LOW>,
- <GIC_PPI 11 IRQ_TYPE_LEVEL_LOW>,
- <GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>;
- };
-
soc {
- #address-cells = <2>;
- #size-cells = <2>;
compatible = "simple-bus";
ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
gic: interrupt-controller@c000000 {
compatible = "arm,gic-v3";
- #interrupt-cells = <3>;
- interrupt-parent = <&gic>;
- interrupt-controller;
reg = <0 0x0c000000 0 0x10000>, /* GICD */
<0 0x0c080000 0 0x80000>, /* GICR */
<0 0x0c400000 0 0x2000>, /* GICC */
<0 0x0c410000 0 0x1000>, /* GICH */
<0 0x0c420000 0 0x2000>; /* GICV */
+ interrupt-parent = <&gic>;
interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
};
infracfg: infracfg@10001000 {
compatible = "mediatek,mt7986-infracfg", "syscon";
reg = <0 0x10001000 0 0x1000>;
#clock-cells = <1>;
+ #reset-cells = <1>;
};
wed_pcie: wed-pcie@10003000 {
@@ -202,6 +194,19 @@
#interrupt-cells = <2>;
};
+ pwm: pwm@10048000 {
+ compatible = "mediatek,mt7986-pwm";
+ reg = <0 0x10048000 0 0x1000>;
+ #pwm-cells = <2>;
+ interrupts = <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&topckgen CLK_TOP_PWM_SEL>,
+ <&infracfg CLK_INFRA_PWM_STA>,
+ <&infracfg CLK_INFRA_PWM1_CK>,
+ <&infracfg CLK_INFRA_PWM2_CK>;
+ clock-names = "top", "main", "pwm1", "pwm2";
+ status = "disabled";
+ };
+
sgmiisys0: syscon@10060000 {
compatible = "mediatek,mt7986-sgmiisys_0",
"syscon";
@@ -234,26 +239,11 @@
<GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "ring0", "ring1", "ring2", "ring3";
clocks = <&infracfg CLK_INFRA_EIP97_CK>;
- clock-names = "infra_eip97_ck";
assigned-clocks = <&topckgen CLK_TOP_EIP_B_SEL>;
assigned-clock-parents = <&apmixedsys CLK_APMIXED_NET2PLL>;
status = "disabled";
};
- pwm: pwm@10048000 {
- compatible = "mediatek,mt7986-pwm";
- reg = <0 0x10048000 0 0x1000>;
- #clock-cells = <1>;
- #pwm-cells = <2>;
- interrupts = <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&topckgen CLK_TOP_PWM_SEL>,
- <&infracfg CLK_INFRA_PWM_STA>,
- <&infracfg CLK_INFRA_PWM1_CK>,
- <&infracfg CLK_INFRA_PWM2_CK>;
- clock-names = "top", "main", "pwm1", "pwm2";
- status = "disabled";
- };
-
uart0: serial@11002000 {
compatible = "mediatek,mt7986-uart",
"mediatek,mt6577-uart";
@@ -311,9 +301,9 @@
spi0: spi@1100a000 {
compatible = "mediatek,mt7986-spi-ipm", "mediatek,spi-ipm";
+ reg = <0 0x1100a000 0 0x100>;
#address-cells = <1>;
#size-cells = <0>;
- reg = <0 0x1100a000 0 0x100>;
interrupts = <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&topckgen CLK_TOP_MPLL_D2>,
<&topckgen CLK_TOP_SPI_SEL>,
@@ -325,9 +315,9 @@
spi1: spi@1100b000 {
compatible = "mediatek,mt7986-spi-ipm", "mediatek,spi-ipm";
+ reg = <0 0x1100b000 0 0x100>;
#address-cells = <1>;
#size-cells = <0>;
- reg = <0 0x1100b000 0 0x100>;
interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&topckgen CLK_TOP_MPLL_D2>,
<&topckgen CLK_TOP_SPIM_MST_SEL>,
@@ -337,6 +327,21 @@
status = "disabled";
};
+ thermal: thermal@1100c800 {
+ compatible = "mediatek,mt7986-thermal";
+ reg = <0 0x1100c800 0 0x800>;
+ interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&infracfg CLK_INFRA_THERM_CK>,
+ <&infracfg CLK_INFRA_ADC_26M_CK>,
+ <&infracfg CLK_INFRA_ADC_FRC_CK>;
+ clock-names = "therm", "auxadc", "adc_32k";
+ nvmem-cells = <&thermal_calibration>;
+ nvmem-cell-names = "calibration-data";
+ #thermal-sensor-cells = <1>;
+ mediatek,auxadc = <&auxadc>;
+ mediatek,apmixedsys = <&apmixedsys>;
+ };
+
auxadc: adc@1100d000 {
compatible = "mediatek,mt7986-auxadc";
reg = <0 0x1100d000 0 0x1000>;
@@ -388,39 +393,23 @@
status = "disabled";
};
- thermal: thermal@1100c800 {
- #thermal-sensor-cells = <1>;
- compatible = "mediatek,mt7986-thermal";
- reg = <0 0x1100c800 0 0x800>;
- interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&infracfg CLK_INFRA_THERM_CK>,
- <&infracfg CLK_INFRA_ADC_26M_CK>,
- <&infracfg CLK_INFRA_ADC_FRC_CK>;
- clock-names = "therm", "auxadc", "adc_32k";
- mediatek,auxadc = <&auxadc>;
- mediatek,apmixedsys = <&apmixedsys>;
- nvmem-cells = <&thermal_calibration>;
- nvmem-cell-names = "calibration-data";
- };
-
pcie: pcie@11280000 {
compatible = "mediatek,mt7986-pcie",
"mediatek,mt8192-pcie";
+ reg = <0x00 0x11280000 0x00 0x4000>;
+ reg-names = "pcie-mac";
+ ranges = <0x82000000 0x00 0x20000000 0x00
+ 0x20000000 0x00 0x10000000>;
device_type = "pci";
#address-cells = <3>;
#size-cells = <2>;
- reg = <0x00 0x11280000 0x00 0x4000>;
- reg-names = "pcie-mac";
interrupts = <GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>;
bus-range = <0x00 0xff>;
- ranges = <0x82000000 0x00 0x20000000 0x00
- 0x20000000 0x00 0x10000000>;
clocks = <&infracfg CLK_INFRA_IPCIE_PIPE_CK>,
<&infracfg CLK_INFRA_IPCIE_CK>,
<&infracfg CLK_INFRA_IPCIER_CK>,
<&infracfg CLK_INFRA_IPCIEB_CK>;
clock-names = "pl_250m", "tl_26m", "peri_26m", "top_133m";
- status = "disabled";
phys = <&pcie_port PHY_TYPE_PCIE>;
phy-names = "pcie-phy";
@@ -431,6 +420,8 @@
<0 0 0 2 &pcie_intc 1>,
<0 0 0 3 &pcie_intc 2>,
<0 0 0 4 &pcie_intc 3>;
+ status = "disabled";
+
pcie_intc: interrupt-controller {
#address-cells = <0>;
#interrupt-cells = <1>;
@@ -441,9 +432,9 @@
pcie_phy: t-phy {
compatible = "mediatek,mt7986-tphy",
"mediatek,generic-tphy-v2";
+ ranges;
#address-cells = <2>;
#size-cells = <2>;
- ranges;
status = "disabled";
pcie_port: pcie-phy@11c00000 {
@@ -468,9 +459,9 @@
usb_phy: t-phy@11e10000 {
compatible = "mediatek,mt7986-tphy",
"mediatek,generic-tphy-v2";
+ ranges = <0 0 0x11e10000 0x1700>;
#address-cells = <1>;
#size-cells = <1>;
- ranges = <0 0 0x11e10000 0x1700>;
status = "disabled";
u2port0: usb-phy@0 {
@@ -498,11 +489,11 @@
};
ethsys: syscon@15000000 {
- #address-cells = <1>;
- #size-cells = <1>;
compatible = "mediatek,mt7986-ethsys",
"syscon";
reg = <0 0x15000000 0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
#clock-cells = <1>;
#reset-cells = <1>;
};
@@ -533,20 +524,6 @@
mediatek,wo-ccif = <&wo_ccif1>;
};
- wo_ccif0: syscon@151a5000 {
- compatible = "mediatek,mt7986-wo-ccif", "syscon";
- reg = <0 0x151a5000 0 0x1000>;
- interrupt-parent = <&gic>;
- interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_HIGH>;
- };
-
- wo_ccif1: syscon@151ad000 {
- compatible = "mediatek,mt7986-wo-ccif", "syscon";
- reg = <0 0x151ad000 0 0x1000>;
- interrupt-parent = <&gic>;
- interrupts = <GIC_SPI 212 IRQ_TYPE_LEVEL_HIGH>;
- };
-
eth: ethernet@15100000 {
compatible = "mediatek,mt7986-eth";
reg = <0 0x15100000 0 0x80000>;
@@ -579,26 +556,40 @@
<&topckgen CLK_TOP_SGM_325M_SEL>;
assigned-clock-parents = <&apmixedsys CLK_APMIXED_NET2PLL>,
<&apmixedsys CLK_APMIXED_SGMPLL>;
+ #reset-cells = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
mediatek,ethsys = <&ethsys>;
mediatek,sgmiisys = <&sgmiisys0>, <&sgmiisys1>;
mediatek,wed-pcie = <&wed_pcie>;
mediatek,wed = <&wed0>, <&wed1>;
- #reset-cells = <1>;
- #address-cells = <1>;
- #size-cells = <0>;
status = "disabled";
};
+ wo_ccif0: syscon@151a5000 {
+ compatible = "mediatek,mt7986-wo-ccif", "syscon";
+ reg = <0 0x151a5000 0 0x1000>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ wo_ccif1: syscon@151ad000 {
+ compatible = "mediatek,mt7986-wo-ccif", "syscon";
+ reg = <0 0x151ad000 0 0x1000>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 212 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
wifi: wifi@18000000 {
compatible = "mediatek,mt7986-wmac";
+ reg = <0 0x18000000 0 0x1000000>,
+ <0 0x10003000 0 0x1000>,
+ <0 0x11d10000 0 0x1000>;
resets = <&watchdog MT7986_TOPRGU_CONSYS_SW_RST>;
reset-names = "consys";
clocks = <&topckgen CLK_TOP_CONN_MCUSYS_SEL>,
<&topckgen CLK_TOP_AP2CNN_HOST_SEL>;
clock-names = "mcu", "ap2conn";
- reg = <0 0x18000000 0 0x1000000>,
- <0 0x10003000 0 0x1000>,
- <0 0x11d10000 0 0x1000>;
interrupts = <GIC_SPI 213 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 214 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>,
@@ -646,4 +637,13 @@
};
};
};
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 14 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 11 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>;
+ };
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7986b-rfb.dts b/arch/arm64/boot/dts/mediatek/mt7986b-rfb.dts
index dde190442e38..58f77d932429 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986b-rfb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7986b-rfb.dts
@@ -45,6 +45,18 @@
};
};
+ gmac1: mac@1 {
+ compatible = "mediatek,eth-mac";
+ reg = <1>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
mdio: mdio-bus {
#address-cells = <1>;
#size-cells = <0>;
@@ -83,6 +95,18 @@
label = "lan4";
};
+ port@5 {
+ reg = <5>;
+ ethernet = <&gmac1>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
port@6 {
reg = <6>;
label = "cpu";
@@ -152,12 +176,13 @@
pinctrl-0 = <&spi_flash_pins>;
cs-gpios = <0>, <0>;
status = "okay";
- spi_nand: spi_nand@0 {
+
+ spi_nand: flash@0 {
compatible = "spi-nand";
reg = <0>;
spi-max-frequency = <10000000>;
- spi-tx-buswidth = <4>;
- spi-rx-buswidth = <4>;
+ spi-tx-bus-width = <4>;
+ spi-rx-bus-width = <4>;
};
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7988a-bananapi-bpi-r4.dts b/arch/arm64/boot/dts/mediatek/mt7988a-bananapi-bpi-r4.dts
new file mode 100644
index 000000000000..efc4ad0b08b8
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt7988a-bananapi-bpi-r4.dts
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+/dts-v1/;
+
+#include "mt7988a.dtsi"
+
+/ {
+ compatible = "bananapi,bpi-r4", "mediatek,mt7988a";
+ model = "Banana Pi BPI-R4";
+ chassis-type = "embedded";
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt7988a.dtsi b/arch/arm64/boot/dts/mediatek/mt7988a.dtsi
new file mode 100644
index 000000000000..bba97de4fb44
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt7988a.dtsi
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+ compatible = "mediatek,mt7988a";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "arm,cortex-a73";
+ reg = <0x0>;
+ device_type = "cpu";
+ enable-method = "psci";
+ };
+
+ cpu@1 {
+ compatible = "arm,cortex-a73";
+ reg = <0x1>;
+ device_type = "cpu";
+ enable-method = "psci";
+ };
+
+ cpu@2 {
+ compatible = "arm,cortex-a73";
+ reg = <0x2>;
+ device_type = "cpu";
+ enable-method = "psci";
+ };
+
+ cpu@3 {
+ compatible = "arm,cortex-a73";
+ reg = <0x3>;
+ device_type = "cpu";
+ enable-method = "psci";
+ };
+ };
+
+ oscillator-40m {
+ compatible = "fixed-clock";
+ clock-frequency = <40000000>;
+ #clock-cells = <0>;
+ clock-output-names = "clkxtal";
+ };
+
+ pmu {
+ compatible = "arm,cortex-a73-pmu";
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_PPI 7 IRQ_TYPE_LEVEL_LOW>;
+ };
+
+ psci {
+ compatible = "arm,psci-0.2";
+ method = "smc";
+ };
+
+ soc {
+ compatible = "simple-bus";
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ gic: interrupt-controller@c000000 {
+ compatible = "arm,gic-v3";
+ reg = <0 0x0c000000 0 0x40000>, /* GICD */
+ <0 0x0c080000 0 0x200000>, /* GICR */
+ <0 0x0c400000 0 0x2000>, /* GICC */
+ <0 0x0c410000 0 0x1000>, /* GICH */
+ <0 0x0c420000 0 0x2000>; /* GICV */
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
+ };
+
+ clock-controller@10001000 {
+ compatible = "mediatek,mt7988-infracfg", "syscon";
+ reg = <0 0x10001000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ clock-controller@1001b000 {
+ compatible = "mediatek,mt7988-topckgen", "syscon";
+ reg = <0 0x1001b000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ watchdog: watchdog@1001c000 {
+ compatible = "mediatek,mt7988-wdt";
+ reg = <0 0x1001c000 0 0x1000>;
+ interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>;
+ #reset-cells = <1>;
+ };
+
+ clock-controller@1001e000 {
+ compatible = "mediatek,mt7988-apmixedsys";
+ reg = <0 0x1001e000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ clock-controller@11f40000 {
+ compatible = "mediatek,mt7988-xfi-pll";
+ reg = <0 0x11f40000 0 0x1000>;
+ resets = <&watchdog 16>;
+ #clock-cells = <1>;
+ };
+
+ clock-controller@15000000 {
+ compatible = "mediatek,mt7988-ethsys", "syscon";
+ reg = <0 0x15000000 0 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ clock-controller@15031000 {
+ compatible = "mediatek,mt7988-ethwarp";
+ reg = <0 0x15031000 0 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 14 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 11 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>;
+ };
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-elm-hana-rev7.dts b/arch/arm64/boot/dts/mediatek/mt8173-elm-hana-rev7.dts
index 256f245ac01d..1c9fc791bdfc 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-elm-hana-rev7.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-elm-hana-rev7.dts
@@ -14,7 +14,7 @@
&cpu_thermal {
trips {
- cpu_crit: cpu_crit0 {
+ cpu_crit: cpu-crit0 {
temperature = <100000>;
type = "critical";
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi b/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi
index 8d614ac2c58e..6d962d437e02 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi
@@ -1135,7 +1135,7 @@
compatible = "mediatek,mt6397-rtc";
};
- syscfg_pctl_pmic: syscfg_pctl_pmic@c000 {
+ syscfg_pctl_pmic: syscon@c000 {
compatible = "mediatek,mt6397-pctl-pmic-syscfg",
"syscon";
reg = <0 0x0000c000 0 0x0108>;
@@ -1155,6 +1155,7 @@
spi-max-frequency = <12000000>;
interrupts-extended = <&pio 0 IRQ_TYPE_LEVEL_LOW>;
google,cros-ec-spi-msg-delay = <500>;
+ wakeup-source;
i2c_tunnel: i2c-tunnel0 {
compatible = "google,cros-ec-i2c-tunnel";
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 0e5c628d1ec3..3fab21f59d18 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -41,7 +41,7 @@
extcon_usb: extcon_iddig {
compatible = "linux,extcon-usb-gpio";
- id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>;
+ id-gpios = <&pio 16 GPIO_ACTIVE_HIGH>;
};
usb_p1_vbus: regulator-usb-p1 {
diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index cac4cd0a0320..3458be7f7f61 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -222,14 +222,14 @@
};
};
- pmu_a53 {
+ pmu-a53 {
compatible = "arm,cortex-a53-pmu";
interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_LOW>,
<GIC_SPI 9 IRQ_TYPE_LEVEL_LOW>;
interrupt-affinity = <&cpu0>, <&cpu1>;
};
- pmu_a72 {
+ pmu-a72 {
compatible = "arm,cortex-a72-pmu";
interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_LOW>,
<GIC_SPI 13 IRQ_TYPE_LEVEL_LOW>;
@@ -286,7 +286,7 @@
type = "passive";
};
- cpu_crit: cpu_crit0 {
+ cpu_crit: cpu-crit0 {
temperature = <115000>;
hysteresis = <2000>;
type = "critical";
@@ -318,7 +318,7 @@
#address-cells = <2>;
#size-cells = <2>;
ranges;
- vpu_dma_reserved: vpu_dma_mem_region@b7000000 {
+ vpu_dma_reserved: vpu-dma-mem@b7000000 {
compatible = "shared-dma-pool";
reg = <0 0xb7000000 0 0x500000>;
alignment = <0x1000>;
@@ -366,7 +366,7 @@
#reset-cells = <1>;
};
- syscfg_pctl_a: syscfg_pctl_a@10005000 {
+ syscfg_pctl_a: syscon@10005000 {
compatible = "mediatek,mt8173-pctl-a-syscfg", "syscon";
reg = <0 0x10005000 0 0x1000>;
};
@@ -590,6 +590,15 @@
reg = <0 0x10206000 0 0x1000>;
#address-cells = <1>;
#size-cells = <1>;
+
+ socinfo-data1@40 {
+ reg = <0x040 0x4>;
+ };
+
+ socinfo-data2@44 {
+ reg = <0x044 0x4>;
+ };
+
thermal_calibration: calib@528 {
reg = <0x528 0xc>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi
index b6a9830af269..bfb9e42c8aca 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi
@@ -360,6 +360,10 @@
};
&cros_ec {
+ cbas {
+ compatible = "google,cros-cbas";
+ };
+
keyboard-controller {
compatible = "google,cros-ec-keyb-switches";
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi
index 306c95166f3f..5c1bf6a1e475 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi
@@ -339,6 +339,10 @@
};
&cros_ec {
+ cbas {
+ compatible = "google,cros-cbas";
+ };
+
keyboard-controller {
compatible = "google,cros-ec-keyb-switches";
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi
index 382e4c6d7191..0f5fa893a774 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi
@@ -343,6 +343,10 @@
};
&cros_ec {
+ cbas {
+ compatible = "google,cros-cbas";
+ };
+
keyboard-controller {
compatible = "google,cros-ec-keyb-switches";
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
index 5506de83f61d..6bd7424ef66c 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
@@ -888,7 +888,7 @@
status = "okay";
cs-gpios = <&pio 86 GPIO_ACTIVE_LOW>;
- cr50@0 {
+ tpm@0 {
compatible = "google,cr50";
reg = <0>;
spi-max-frequency = <1000000>;
@@ -924,6 +924,7 @@
interrupts-extended = <&pio 151 IRQ_TYPE_LEVEL_LOW>;
pinctrl-names = "default";
pinctrl-0 = <&ec_ap_int_odl>;
+ wakeup-source;
i2c_tunnel: i2c-tunnel {
compatible = "google,cros-ec-i2c-tunnel";
@@ -937,10 +938,6 @@
google,usb-port-id = <0>;
};
- cbas {
- compatible = "google,cros-cbas";
- };
-
typec {
compatible = "google,cros-ec-typec";
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts b/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts
index 76449b4cf236..333c516af490 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts
@@ -33,7 +33,7 @@
#size-cells = <2>;
ranges;
- scp_mem_reserved: scp_mem_region@50000000 {
+ scp_mem_reserved: scp-mem@50000000 {
compatible = "shared-dma-pool";
reg = <0 0x50000000 0 0x2900000>;
no-map;
diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
index 920ee415ef5f..93dfbf130231 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
@@ -1585,6 +1585,15 @@
reg = <0 0x11f10000 0 0x1000>;
#address-cells = <1>;
#size-cells = <1>;
+
+ socinfo-data1@4c {
+ reg = <0x04c 0x4>;
+ };
+
+ socinfo-data2@60 {
+ reg = <0x060 0x4>;
+ };
+
thermal_calibration: calib@180 {
reg = <0x180 0xc>;
};
@@ -1955,7 +1964,7 @@
power-domains = <&spm MT8183_POWER_DOMAIN_VENC>;
};
- venc_jpg: venc_jpg@17030000 {
+ venc_jpg: jpeg-encoder@17030000 {
compatible = "mediatek,mt8183-jpgenc", "mediatek,mtk-jpgenc";
reg = <0 0x17030000 0 0x1000>;
interrupts = <GIC_SPI 249 IRQ_TYPE_LEVEL_LOW>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-krabby.dtsi b/arch/arm64/boot/dts/mediatek/mt8186-corsola-krabby.dtsi
new file mode 100644
index 000000000000..7c971198fa95
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-krabby.dtsi
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2022 Google LLC
+ */
+
+/dts-v1/;
+#include "mt8186-corsola.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+ aliases {
+ i2c4 = &i2c4;
+ };
+};
+
+&dsi_out {
+ remote-endpoint = <&ps8640_in>;
+};
+
+&i2c0 {
+ clock-frequency = <400000>;
+
+ edp-bridge@8 {
+ compatible = "parade,ps8640";
+ reg = <0x8>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&ps8640_pins>;
+ powerdown-gpios = <&pio 96 GPIO_ACTIVE_LOW>;
+ reset-gpios = <&pio 98 GPIO_ACTIVE_LOW>;
+ vdd12-supply = <&mt6366_vrf12_reg>;
+ vdd33-supply = <&mt6366_vcn33_reg>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ ps8640_in: endpoint {
+ remote-endpoint = <&dsi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ ps8640_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+
+ aux-bus {
+ panel {
+ compatible = "edp-panel";
+ power-supply = <&pp3300_disp_x>;
+ backlight = <&backlight_lcd0>;
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&ps8640_out>;
+ };
+ };
+ };
+ };
+ };
+};
+
+&i2c1 {
+ i2c-scl-internal-delay-ns = <10000>;
+
+ touchscreen: touchscreen@10 {
+ compatible = "hid-over-i2c";
+ reg = <0x10>;
+ interrupts-extended = <&pio 12 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&touchscreen_pins>;
+ post-power-on-delay-ms = <10>;
+ hid-descr-addr = <0x0001>;
+ vdd-supply = <&pp3300_s3>;
+ };
+};
+
+&i2c4 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c4_pins>;
+ clock-frequency = <400000>;
+ status = "okay";
+
+ proximity@28 {
+ compatible = "semtech,sx9324";
+ reg = <0x28>;
+ #io-channel-cells = <1>;
+ interrupts-extended = <&pio 5 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&sar_sensor_pins>;
+ vdd-supply = <&mt6366_vio18_reg>;
+ svdd-supply = <&mt6366_vio18_reg>;
+ };
+};
+
+&pio {
+ i2c4_pins: i2c4-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO136__FUNC_SDA4>,
+ <PINMUX_GPIO135__FUNC_SCL4>;
+ bias-disable;
+ drive-strength = <4>;
+ input-enable;
+ };
+ };
+
+ ps8640_pins: ps8640-pins {
+ pins-pwrdn-rst {
+ pinmux = <PINMUX_GPIO96__FUNC_GPIO96>,
+ <PINMUX_GPIO98__FUNC_GPIO98>;
+ output-low;
+ };
+ };
+
+ sar_sensor_pins: sar-sensor-pins {
+ pins-irq {
+ pinmux = <PINMUX_GPIO5__FUNC_GPIO5>;
+ input-enable;
+ bias-pull-up;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393216.dts b/arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393216.dts
new file mode 100644
index 000000000000..c9673381ad3b
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393216.dts
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2022 Google LLC
+ */
+
+/dts-v1/;
+#include "mt8186-corsola-steelix.dtsi"
+
+/ {
+ model = "Google Magneton board";
+ compatible = "google,steelix-sku393219", "google,steelix-sku393216",
+ "google,steelix", "mediatek,mt8186";
+ chassis-type = "laptop";
+};
+
+&gpio_keys {
+ status = "disabled";
+};
+
+&i2c1 {
+ touchscreen@10 {
+ compatible = "hid-over-i2c";
+ reg = <0x10>;
+ interrupts-extended = <&pio 12 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&touchscreen_pins>;
+ vdd-supply = <&pp3300_s3>;
+ post-power-on-delay-ms = <350>;
+ hid-descr-addr = <0x0001>;
+ };
+};
+
+&touchscreen {
+ status = "disabled";
+};
+
+&usb_c1 {
+ status = "disabled";
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393217.dts b/arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393217.dts
new file mode 100644
index 000000000000..28e3bbe56421
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393217.dts
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2022 Google LLC
+ */
+
+/dts-v1/;
+#include "mt8186-corsola-steelix.dtsi"
+
+/ {
+ model = "Google Magneton board";
+ compatible = "google,steelix-sku393220", "google,steelix-sku393217",
+ "google,steelix", "mediatek,mt8186";
+ chassis-type = "laptop";
+};
+
+&gpio_keys {
+ status = "disabled";
+};
+
+&i2c1 {
+ touchscreen@40 {
+ compatible = "hid-over-i2c";
+ reg = <0x40>;
+ interrupts-extended = <&pio 12 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&touchscreen_pins>;
+ vdd-supply = <&pp3300_s3>;
+ post-power-on-delay-ms = <450>;
+ hid-descr-addr = <0x0001>;
+ };
+};
+
+&touchscreen {
+ status = "disabled";
+};
+
+&usb_c1 {
+ status = "disabled";
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393218.dts b/arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393218.dts
new file mode 100644
index 000000000000..332894218f07
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-magneton-sku393218.dts
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2022 Google LLC
+ */
+
+/dts-v1/;
+#include "mt8186-corsola-steelix.dtsi"
+
+/ {
+ model = "Google Magneton board";
+ compatible = "google,steelix-sku393221", "google,steelix-sku393218",
+ "google,steelix", "mediatek,mt8186";
+ chassis-type = "laptop";
+};
+
+&gpio_keys {
+ status = "disabled";
+};
+
+&touchscreen {
+ status = "disabled";
+};
+
+&usb_c1 {
+ status = "disabled";
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-rusty-sku196608.dts b/arch/arm64/boot/dts/mediatek/mt8186-corsola-rusty-sku196608.dts
new file mode 100644
index 000000000000..731b0d60228d
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-rusty-sku196608.dts
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2022 Google LLC
+ */
+
+/dts-v1/;
+#include "mt8186-corsola-steelix.dtsi"
+
+/ {
+ model = "Google Rusty board";
+ compatible = "google,steelix-sku196609", "google,steelix-sku196608",
+ "google,steelix", "mediatek,mt8186";
+ chassis-type = "laptop";
+};
+
+&gpio_keys {
+ status = "disabled";
+};
+
+&i2c1 {
+ status = "disabled";
+};
+
+&touchscreen {
+ status = "disabled";
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix-sku131072.dts b/arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix-sku131072.dts
new file mode 100644
index 000000000000..eae17bca8585
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix-sku131072.dts
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2022 Google LLC
+ */
+
+/dts-v1/;
+#include "mt8186-corsola-steelix.dtsi"
+
+/ {
+ model = "Google Steelix board";
+ compatible = "google,steelix-sku131072", "google,steelix",
+ "mediatek,mt8186";
+ chassis-type = "convertible";
+};
+
+&mt6366codec {
+ mediatek,dmic-mode = <0>; /* two-wire */
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix-sku131073.dts b/arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix-sku131073.dts
new file mode 100644
index 000000000000..a55375b95d0d
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix-sku131073.dts
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2022 Google LLC
+ */
+
+/dts-v1/;
+#include "mt8186-corsola-steelix.dtsi"
+
+/ {
+ model = "Google Steelix board";
+ compatible = "google,steelix-sku131073", "google,steelix",
+ "mediatek,mt8186";
+ chassis-type = "convertible";
+};
+
+&mt6366codec {
+ mediatek,dmic-mode = <1>; /* one-wire */
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix.dtsi b/arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix.dtsi
new file mode 100644
index 000000000000..e74e886a00cb
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-steelix.dtsi
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2022 Google LLC
+ */
+
+/dts-v1/;
+#include "mt8186-corsola.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/{
+ pp1000_edpbrdg: regulator-pp1000-edpbrdg {
+ compatible = "regulator-fixed";
+ regulator-name = "pp1000_edpbrdg";
+ pinctrl-names = "default";
+ pinctrl-0 = <&en_pp1000_edpbrdg>;
+ enable-active-high;
+ regulator-boot-on;
+ gpio = <&pio 29 GPIO_ACTIVE_HIGH>;
+ vin-supply = <&pp3300_z2>;
+ };
+
+ pp1800_edpbrdg_dx: regulator-pp1800-edpbrdg-dx {
+ compatible = "regulator-fixed";
+ regulator-name = "pp1800_edpbrdg_dx";
+ pinctrl-names = "default";
+ pinctrl-0 = <&en_pp1800_edpbrdg>;
+ enable-active-high;
+ regulator-boot-on;
+ gpio = <&pio 30 GPIO_ACTIVE_HIGH>;
+ vin-supply = <&mt6366_vio18_reg>;
+ };
+
+ pp3300_edp_dx: regulator-pp3300-edp-dx {
+ compatible = "regulator-fixed";
+ regulator-name = "pp3300_edp_dx";
+ pinctrl-names = "default";
+ pinctrl-0 = <&en_pp3300_edpbrdg>;
+ enable-active-high;
+ regulator-boot-on;
+ gpio = <&pio 31 GPIO_ACTIVE_HIGH>;
+ vin-supply = <&pp3300_z2>;
+ };
+};
+
+&dsi_out {
+ remote-endpoint = <&anx7625_in>;
+};
+
+&i2c0 {
+ clock-frequency = <400000>;
+
+ anx_bridge: anx7625@58 {
+ compatible = "analogix,anx7625";
+ reg = <0x58>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&anx7625_pins>;
+ enable-gpios = <&pio 96 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&pio 98 GPIO_ACTIVE_HIGH>;
+ vdd10-supply = <&pp1000_edpbrdg>;
+ vdd18-supply = <&pp1800_edpbrdg_dx>;
+ vdd33-supply = <&pp3300_edp_dx>;
+ analogix,lane0-swing = /bits/ 8 <0x70 0x30>;
+ analogix,lane1-swing = /bits/ 8 <0x70 0x30>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ anx7625_in: endpoint {
+ remote-endpoint = <&dsi_out>;
+ data-lanes = <0 1 2 3>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ anx7625_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+
+ aux-bus {
+ panel: panel {
+ compatible = "edp-panel";
+ power-supply = <&pp3300_disp_x>;
+ backlight = <&backlight_lcd0>;
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&anx7625_out>;
+ };
+ };
+ };
+ };
+ };
+};
+
+&i2c1 {
+ touchscreen: touchscreen@5d {
+ compatible = "goodix,gt7375p";
+ reg = <0x5d>;
+ interrupts-extended = <&pio 12 IRQ_TYPE_EDGE_FALLING>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&touchscreen_pins>;
+ reset-gpios = <&pio 60 GPIO_ACTIVE_LOW>;
+ vdd-supply = <&pp3300_s3>;
+ goodix,no-reset-during-suspend;
+ };
+};
+
+&i2c2 {
+ i2c-scl-internal-delay-ns = <22000>;
+
+ /* second source component */
+ trackpad@2c {
+ compatible = "hid-over-i2c";
+ reg = <0x2c>;
+ hid-descr-addr = <0x20>;
+ interrupts-extended = <&pio 11 IRQ_TYPE_LEVEL_LOW>;
+ vdd-supply = <&pp3300_s3>;
+ wakeup-source;
+ };
+};
+
+&keyboard_controller {
+ function-row-physmap = <
+ MATRIX_KEY(0x00, 0x02, 0) /* T1 */
+ MATRIX_KEY(0x03, 0x02, 0) /* T2 */
+ MATRIX_KEY(0x02, 0x02, 0) /* T3 */
+ MATRIX_KEY(0x01, 0x02, 0) /* T4 */
+ MATRIX_KEY(0x03, 0x04, 0) /* T5 */
+ MATRIX_KEY(0x02, 0x04, 0) /* T6 */
+ MATRIX_KEY(0x01, 0x04, 0) /* T7 */
+ MATRIX_KEY(0x02, 0x09, 0) /* T8 */
+ MATRIX_KEY(0x01, 0x09, 0) /* T9 */
+ MATRIX_KEY(0x00, 0x04, 0) /* T10 */
+ >;
+
+ linux,keymap = <
+ MATRIX_KEY(0x00, 0x02, KEY_BACK)
+ MATRIX_KEY(0x03, 0x02, KEY_REFRESH)
+ MATRIX_KEY(0x02, 0x02, KEY_ZOOM)
+ MATRIX_KEY(0x01, 0x02, KEY_SCALE)
+ MATRIX_KEY(0x03, 0x04, KEY_BRIGHTNESSDOWN)
+ MATRIX_KEY(0x02, 0x04, KEY_BRIGHTNESSUP)
+ MATRIX_KEY(0x01, 0x04, KEY_MICMUTE)
+ MATRIX_KEY(0x02, 0x09, KEY_MUTE)
+ MATRIX_KEY(0x01, 0x09, KEY_VOLUMEDOWN)
+ MATRIX_KEY(0x00, 0x04, KEY_VOLUMEUP)
+ CROS_STD_MAIN_KEYMAP
+ >;
+};
+
+&pio {
+ anx7625_pins: anx7625-pins {
+ pins-int {
+ pinmux = <PINMUX_GPIO9__FUNC_GPIO9>;
+ input-enable;
+ bias-disable;
+ };
+
+ pins-reset {
+ pinmux = <PINMUX_GPIO98__FUNC_GPIO98>;
+ output-low;
+ };
+
+ pins-power-en {
+ pinmux = <PINMUX_GPIO96__FUNC_GPIO96>;
+ output-low;
+ };
+ };
+
+ en_pp1000_edpbrdg: pp1000-edpbrdg-en-pins {
+ pins-vreg-en {
+ pinmux = <PINMUX_GPIO29__FUNC_GPIO29>;
+ output-low;
+ };
+ };
+
+ en_pp1800_edpbrdg: pp1800-edpbrdg-en-pins {
+ pins-vreg-en {
+ pinmux = <PINMUX_GPIO30__FUNC_GPIO30>;
+ output-low;
+ };
+ };
+
+ en_pp3300_edpbrdg: pp3300-edpbrdg-en-pins {
+ pins-vreg-en {
+ pinmux = <PINMUX_GPIO31__FUNC_GPIO31>;
+ output-low;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacool-sku327681.dts b/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacool-sku327681.dts
new file mode 100644
index 000000000000..9bb64353ca65
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacool-sku327681.dts
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2022 Google LLC
+ */
+
+/dts-v1/;
+#include "mt8186-corsola-krabby.dtsi"
+
+/ {
+ model = "Google Tentacool board";
+ compatible = "google,tentacruel-sku327681", "google,tentacruel", "mediatek,mt8186";
+ chassis-type = "laptop";
+};
+
+/* Tentacool omits the pen. */
+&gpio_keys {
+ status = "disabled";
+};
+
+/* Tentacool omits the touchscreen; nothing else is on i2c1. */
+&i2c1 {
+ status = "disabled";
+};
+
+&keyboard_controller {
+ function-row-physmap = <
+ MATRIX_KEY(0x00, 0x02, 0) /* T1 */
+ MATRIX_KEY(0x03, 0x02, 0) /* T2 */
+ MATRIX_KEY(0x02, 0x02, 0) /* T3 */
+ MATRIX_KEY(0x01, 0x02, 0) /* T4 */
+ MATRIX_KEY(0x03, 0x04, 0) /* T5 */
+ MATRIX_KEY(0x02, 0x04, 0) /* T6 */
+ MATRIX_KEY(0x01, 0x04, 0) /* T7 */
+ MATRIX_KEY(0x02, 0x09, 0) /* T8 */
+ MATRIX_KEY(0x01, 0x09, 0) /* T9 */
+ MATRIX_KEY(0x00, 0x04, 0) /* T10 */
+ >;
+
+ linux,keymap = <
+ MATRIX_KEY(0x00, 0x02, KEY_BACK)
+ MATRIX_KEY(0x03, 0x02, KEY_REFRESH)
+ MATRIX_KEY(0x02, 0x02, KEY_ZOOM)
+ MATRIX_KEY(0x01, 0x02, KEY_SCALE)
+ MATRIX_KEY(0x03, 0x04, KEY_SYSRQ)
+ MATRIX_KEY(0x02, 0x04, KEY_BRIGHTNESSDOWN)
+ MATRIX_KEY(0x01, 0x04, KEY_BRIGHTNESSUP)
+ MATRIX_KEY(0x02, 0x09, KEY_MUTE)
+ MATRIX_KEY(0x01, 0x09, KEY_VOLUMEDOWN)
+ MATRIX_KEY(0x00, 0x04, KEY_VOLUMEUP)
+ CROS_STD_MAIN_KEYMAP
+ >;
+};
+
+/* Tentacool omits the touchscreen. */
+&touchscreen {
+ status = "disabled";
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacool-sku327683.dts b/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacool-sku327683.dts
new file mode 100644
index 000000000000..c3ae6f9616c8
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacool-sku327683.dts
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2023 Google LLC
+ */
+
+#include "mt8186-corsola-tentacool-sku327681.dts"
+
+/ {
+ compatible = "google,tentacruel-sku327683", "google,tentacruel", "mediatek,mt8186";
+};
+
+/* This variant replaces only the trackpad controller. */
+&i2c2 {
+ /delete-node/ trackpad@15;
+
+ trackpad@15 {
+ compatible = "hid-over-i2c";
+ reg = <0x15>;
+ interrupts-extended = <&pio 11 IRQ_TYPE_LEVEL_LOW>;
+ hid-descr-addr = <0x0001>;
+ vdd-supply = <&pp3300_s3>;
+ wakeup-source;
+ };
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacruel-sku262144.dts b/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacruel-sku262144.dts
new file mode 100644
index 000000000000..26d3451a5e47
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacruel-sku262144.dts
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2022 Google LLC
+ */
+
+/dts-v1/;
+#include "mt8186-corsola-krabby.dtsi"
+
+/ {
+ model = "Google Tentacruel board";
+ compatible = "google,tentacruel-sku262147", "google,tentacruel-sku262146",
+ "google,tentacruel-sku262145", "google,tentacruel-sku262144",
+ "google,tentacruel", "mediatek,mt8186";
+ chassis-type = "convertible";
+};
+
+&keyboard_controller {
+ function-row-physmap = <
+ MATRIX_KEY(0x00, 0x02, 0) /* T1 */
+ MATRIX_KEY(0x03, 0x02, 0) /* T2 */
+ MATRIX_KEY(0x02, 0x02, 0) /* T3 */
+ MATRIX_KEY(0x01, 0x02, 0) /* T4 */
+ MATRIX_KEY(0x03, 0x04, 0) /* T5 */
+ MATRIX_KEY(0x02, 0x04, 0) /* T6 */
+ MATRIX_KEY(0x01, 0x04, 0) /* T7 */
+ MATRIX_KEY(0x02, 0x09, 0) /* T8 */
+ MATRIX_KEY(0x01, 0x09, 0) /* T9 */
+ MATRIX_KEY(0x00, 0x04, 0) /* T10 */
+ >;
+
+ linux,keymap = <
+ MATRIX_KEY(0x00, 0x02, KEY_BACK)
+ MATRIX_KEY(0x03, 0x02, KEY_REFRESH)
+ MATRIX_KEY(0x02, 0x02, KEY_ZOOM)
+ MATRIX_KEY(0x01, 0x02, KEY_SCALE)
+ MATRIX_KEY(0x03, 0x04, KEY_SYSRQ)
+ MATRIX_KEY(0x02, 0x04, KEY_BRIGHTNESSDOWN)
+ MATRIX_KEY(0x01, 0x04, KEY_BRIGHTNESSUP)
+ MATRIX_KEY(0x02, 0x09, KEY_MUTE)
+ MATRIX_KEY(0x01, 0x09, KEY_VOLUMEDOWN)
+ MATRIX_KEY(0x00, 0x04, KEY_VOLUMEUP)
+ CROS_STD_MAIN_KEYMAP
+ >;
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacruel-sku262148.dts b/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacruel-sku262148.dts
new file mode 100644
index 000000000000..447b57b12b41
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola-tentacruel-sku262148.dts
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright 2023 Google LLC
+ */
+
+#include "mt8186-corsola-tentacruel-sku262144.dts"
+
+/ {
+ compatible = "google,tentacruel-sku262151", "google,tentacruel-sku262150",
+ "google,tentacruel-sku262149", "google,tentacruel-sku262148",
+ "google,tentacruel", "mediatek,mt8186";
+};
+
+/* This variant replaces only the trackpad controller. */
+&i2c2 {
+ /delete-node/ trackpad@15;
+
+ trackpad@15 {
+ compatible = "hid-over-i2c";
+ reg = <0x15>;
+ interrupts-extended = <&pio 11 IRQ_TYPE_LEVEL_LOW>;
+ hid-descr-addr = <0x0001>;
+ vdd-supply = <&pp3300_s3>;
+ wakeup-source;
+ };
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi b/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi
new file mode 100644
index 000000000000..3dea28f1d806
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi
@@ -0,0 +1,1681 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright (C) 2022 MediaTek Inc.
+ */
+/dts-v1/;
+#include "mt8186.dtsi"
+#include <dt-bindings/pinctrl/mt8186-pinfunc.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/input/gpio-keys.h>
+#include <dt-bindings/regulator/mediatek,mt6397-regulator.h>
+
+/ {
+ aliases {
+ i2c0 = &i2c0;
+ i2c1 = &i2c1;
+ i2c2 = &i2c2;
+ i2c3 = &i2c3;
+ i2c5 = &i2c5;
+ mmc0 = &mmc0;
+ mmc1 = &mmc1;
+ serial0 = &uart0;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ memory@40000000 {
+ device_type = "memory";
+ /* The size should be filled in by the bootloader. */
+ reg = <0 0x40000000 0 0>;
+ };
+
+ backlight_lcd0: backlight-lcd0 {
+ compatible = "pwm-backlight";
+ pwms = <&pwm0 0 500000>;
+ power-supply = <&ppvar_sys>;
+ enable-gpios = <&pio 152 0>;
+ brightness-levels = <0 1023>;
+ num-interpolated-steps = <1023>;
+ default-brightness-level = <576>;
+ };
+
+ bt-sco-codec {
+ compatible = "linux,bt-sco";
+ #sound-dai-cells = <0>;
+ };
+
+ dmic-codec {
+ compatible = "dmic-codec";
+ #sound-dai-cells = <0>;
+ num-channels = <2>;
+ wakeup-delay-ms = <50>;
+ };
+
+ gpio_keys: gpio-keys {
+ compatible = "gpio-keys";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pen_eject>;
+
+ pen_insert: pen-insert-switch {
+ label = "Pen Insert";
+ /* Insert = low, eject = high */
+ gpios = <&pio 18 GPIO_ACTIVE_LOW>;
+ wakeup-event-action = <EV_ACT_DEASSERTED>;
+ wakeup-source;
+ linux,code = <SW_PEN_INSERTED>;
+ linux,input-type = <EV_SW>;
+ };
+ };
+
+ pp1800_dpbrdg_dx: regulator-pp1800-dpbrdg-dx {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&en_pp1800_dpbrdg>;
+ gpios = <&pio 39 GPIO_ACTIVE_HIGH>;
+ regulator-name = "pp1800_dpbrdg_dx";
+ enable-active-high;
+ vin-supply = <&mt6366_vio18_reg>;
+ };
+
+ pp3300_disp_x: regulator-pp3300-disp-x {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&edp_panel_fixed_pins>;
+ gpios = <&pio 153 GPIO_ACTIVE_HIGH>;
+ regulator-name = "pp3300_disp_x";
+ enable-active-high;
+ regulator-boot-on;
+ vin-supply = <&pp3300_z2>;
+ };
+
+ /* system wide LDO 3.3V power rail */
+ pp3300_z5: regulator-pp3300-ldo-z5 {
+ compatible = "regulator-fixed";
+ regulator-name = "pp3300_ldo_z5";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&ppvar_sys>;
+ };
+
+ /* separately switched 3.3V power rail */
+ pp3300_s3: regulator-pp3300-s3 {
+ compatible = "regulator-fixed";
+ regulator-name = "pp3300_s3";
+ /* automatically sequenced by PMIC EXT_PMIC_EN2 */
+ regulator-always-on;
+ regulator-boot-on;
+ vin-supply = <&pp3300_z2>;
+ };
+
+ /* system wide 3.3V power rail */
+ pp3300_z2: regulator-pp3300-z2 {
+ compatible = "regulator-fixed";
+ regulator-name = "pp3300_z2";
+ /* EN pin tied to pp4200_z2, which is controlled by EC */
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&ppvar_sys>;
+ };
+
+ /* system wide 4.2V power rail */
+ pp4200_z2: regulator-pp4200-z2 {
+ compatible = "regulator-fixed";
+ regulator-name = "pp4200_z2";
+ /* controlled by EC */
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <4200000>;
+ regulator-max-microvolt = <4200000>;
+ vin-supply = <&ppvar_sys>;
+ };
+
+ /* system wide switching 5.0V power rail */
+ pp5000_z2: regulator-pp5000-z2 {
+ compatible = "regulator-fixed";
+ regulator-name = "pp5000_z2";
+ /* controlled by EC */
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&ppvar_sys>;
+ };
+
+ /* system wide semi-regulated power rail from battery or USB */
+ ppvar_sys: regulator-ppvar-sys {
+ compatible = "regulator-fixed";
+ regulator-name = "ppvar_sys";
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ reserved_memory: reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ adsp_dma_mem: memory@61000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x61000000 0 0x100000>;
+ no-map;
+ };
+
+ adsp_mem: memory@60000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x60000000 0 0xA00000>;
+ no-map;
+ };
+
+ scp_mem: memory@50000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x50000000 0 0x10a0000>;
+ no-map;
+ };
+ };
+
+ sound: sound {
+ compatible = "mediatek,mt8186-mt6366-rt1019-rt5682s-sound";
+ pinctrl-names = "aud_clk_mosi_off",
+ "aud_clk_mosi_on",
+ "aud_clk_miso_off",
+ "aud_clk_miso_on",
+ "aud_dat_miso_off",
+ "aud_dat_miso_on",
+ "aud_dat_mosi_off",
+ "aud_dat_mosi_on",
+ "aud_gpio_i2s0_off",
+ "aud_gpio_i2s0_on",
+ "aud_gpio_i2s1_off",
+ "aud_gpio_i2s1_on",
+ "aud_gpio_i2s2_off",
+ "aud_gpio_i2s2_on",
+ "aud_gpio_i2s3_off",
+ "aud_gpio_i2s3_on",
+ "aud_gpio_pcm_off",
+ "aud_gpio_pcm_on",
+ "aud_gpio_dmic_sec";
+ pinctrl-0 = <&aud_clk_mosi_off>;
+ pinctrl-1 = <&aud_clk_mosi_on>;
+ pinctrl-2 = <&aud_clk_miso_off>;
+ pinctrl-3 = <&aud_clk_miso_on>;
+ pinctrl-4 = <&aud_dat_miso_off>;
+ pinctrl-5 = <&aud_dat_miso_on>;
+ pinctrl-6 = <&aud_dat_mosi_off>;
+ pinctrl-7 = <&aud_dat_mosi_on>;
+ pinctrl-8 = <&aud_gpio_i2s0_off>;
+ pinctrl-9 = <&aud_gpio_i2s0_on>;
+ pinctrl-10 = <&aud_gpio_i2s1_off>;
+ pinctrl-11 = <&aud_gpio_i2s1_on>;
+ pinctrl-12 = <&aud_gpio_i2s2_off>;
+ pinctrl-13 = <&aud_gpio_i2s2_on>;
+ pinctrl-14 = <&aud_gpio_i2s3_off>;
+ pinctrl-15 = <&aud_gpio_i2s3_on>;
+ pinctrl-16 = <&aud_gpio_pcm_off>;
+ pinctrl-17 = <&aud_gpio_pcm_on>;
+ pinctrl-18 = <&aud_gpio_dmic_sec>;
+ mediatek,adsp = <&adsp>;
+ mediatek,platform = <&afe>;
+
+ playback-codecs {
+ sound-dai = <&it6505dptx>, <&rt1019p>;
+ };
+
+ headset-codec {
+ sound-dai = <&rt5682s 0>;
+ };
+ };
+
+ rt1019p: speaker-codec {
+ compatible = "realtek,rt1019p";
+ pinctrl-names = "default";
+ pinctrl-0 = <&rt1019p_pins_default>;
+ #sound-dai-cells = <0>;
+ sdb-gpios = <&pio 150 GPIO_ACTIVE_HIGH>;
+ };
+
+ usb_p1_vbus: regulator-usb-p1-vbus {
+ compatible = "regulator-fixed";
+ gpio = <&pio 148 GPIO_ACTIVE_HIGH>;
+ regulator-name = "vbus1";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ enable-active-high;
+ vin-supply = <&pp5000_z2>;
+ };
+
+ wifi_pwrseq: wifi-pwrseq {
+ compatible = "mmc-pwrseq-simple";
+ pinctrl-names = "default";
+ pinctrl-0 = <&wifi_enable_pin>;
+ post-power-on-delay-ms = <50>;
+ reset-gpios = <&pio 54 GPIO_ACTIVE_LOW>;
+ };
+
+ wifi_wakeup: wifi-wakeup {
+ compatible = "gpio-keys";
+ pinctrl-names = "default";
+ pinctrl-0 = <&wifi_wakeup_pin>;
+
+ wowlan-event {
+ label = "Wake on WiFi";
+ gpios = <&pio 7 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_WAKEUP>;
+ wakeup-source;
+ };
+ };
+};
+
+&adsp {
+ memory-region = <&adsp_dma_mem>, <&adsp_mem>;
+ status = "okay";
+};
+
+&afe {
+ status = "okay";
+};
+
+&cci {
+ proc-supply = <&mt6366_vproc12_reg>;
+};
+
+&cpu0 {
+ proc-supply = <&mt6366_vproc12_reg>;
+};
+
+&cpu1 {
+ proc-supply = <&mt6366_vproc12_reg>;
+};
+
+&cpu2 {
+ proc-supply = <&mt6366_vproc12_reg>;
+};
+
+&cpu3 {
+ proc-supply = <&mt6366_vproc12_reg>;
+};
+
+&cpu4 {
+ proc-supply = <&mt6366_vproc12_reg>;
+};
+
+&cpu5 {
+ proc-supply = <&mt6366_vproc12_reg>;
+};
+
+&cpu6 {
+ proc-supply = <&mt6366_vproc11_reg>;
+};
+
+&cpu7 {
+ proc-supply = <&mt6366_vproc11_reg>;
+};
+
+&dpi {
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&dpi_pins_default>;
+ pinctrl-1 = <&dpi_pins_sleep>;
+ status = "okay";
+};
+
+&dpi_out {
+ remote-endpoint = <&it6505_in>;
+};
+
+&dsi0 {
+ status = "okay";
+};
+
+&gic {
+ mediatek,broken-save-restore-fw;
+};
+
+&gpu {
+ mali-supply = <&mt6366_vgpu_reg>;
+ status = "okay";
+};
+
+&i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0_pins>;
+ status = "okay";
+};
+
+&i2c1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c1_pins>;
+ clock-frequency = <400000>;
+ i2c-scl-internal-delay-ns = <8000>;
+ status = "okay";
+};
+
+&i2c2 {
+ pinctrl-names = "default";
+ /*
+ * Trackpad pin put here to work around second source components
+ * sharing the pinmux in steelix designs.
+ */
+ pinctrl-0 = <&i2c2_pins>, <&trackpad_pin>;
+ clock-frequency = <400000>;
+ i2c-scl-internal-delay-ns = <10000>;
+ status = "okay";
+
+ trackpad@15 {
+ compatible = "elan,ekth3000";
+ reg = <0x15>;
+ interrupts-extended = <&pio 11 IRQ_TYPE_LEVEL_LOW>;
+ vcc-supply = <&pp3300_s3>;
+ wakeup-source;
+ };
+};
+
+&i2c3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c3_pins>;
+ clock-frequency = <100000>;
+ status = "okay";
+
+ it6505dptx: dp-bridge@5c {
+ compatible = "ite,it6505";
+ reg = <0x5c>;
+ interrupts-extended = <&pio 8 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&it6505_pins>;
+ #sound-dai-cells = <0>;
+ ovdd-supply = <&mt6366_vsim2_reg>;
+ pwr18-supply = <&pp1800_dpbrdg_dx>;
+ reset-gpios = <&pio 177 GPIO_ACTIVE_HIGH>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ it6505_in: endpoint {
+ link-frequencies = /bits/ 64 <150000000>;
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
+ };
+};
+
+&i2c5 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c5_pins>;
+ status = "okay";
+
+ rt5682s: codec@1a {
+ compatible = "realtek,rt5682s";
+ reg = <0x1a>;
+ interrupts-extended = <&pio 17 IRQ_TYPE_EDGE_BOTH>;
+ #sound-dai-cells = <1>;
+ AVDD-supply = <&mt6366_vio18_reg>;
+ DBVDD-supply = <&mt6366_vio18_reg>;
+ LDO1-IN-supply = <&mt6366_vio18_reg>;
+ MICVDD-supply = <&pp3300_z2>;
+ realtek,jd-src = <1>;
+ };
+};
+
+&mfg0 {
+ domain-supply = <&mt6366_vsram_gpu_reg>;
+};
+
+&mfg1 {
+ domain-supply = <&mt6366_vgpu_reg>;
+};
+
+&mipi_tx0 {
+ status = "okay";
+};
+
+&mmc0 {
+ pinctrl-names = "default", "state_uhs";
+ pinctrl-0 = <&mmc0_pins_default>;
+ pinctrl-1 = <&mmc0_pins_uhs>;
+ bus-width = <8>;
+ max-frequency = <200000000>;
+ non-removable;
+ cap-mmc-highspeed;
+ mmc-hs200-1_8v;
+ mmc-hs400-1_8v;
+ supports-cqe;
+ no-sd;
+ no-sdio;
+ cap-mmc-hw-reset;
+ hs400-ds-delay = <0x11814>;
+ mediatek,hs400-ds-dly3 = <0x14>;
+ vmmc-supply = <&mt6366_vemc_reg>;
+ vqmmc-supply = <&mt6366_vio18_reg>;
+ status = "okay";
+};
+
+&mmc1 {
+ pinctrl-names = "default", "state_uhs", "state_eint";
+ pinctrl-0 = <&mmc1_pins_default>;
+ pinctrl-1 = <&mmc1_pins_uhs>;
+ pinctrl-2 = <&mmc1_pins_eint>;
+ /delete-property/ interrupts;
+ interrupt-names = "msdc", "sdio_wakeup";
+ interrupts-extended = <&gic GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH 0>,
+ <&pio 87 IRQ_TYPE_LEVEL_LOW>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ bus-width = <4>;
+ max-frequency = <200000000>;
+ cap-sd-highspeed;
+ sd-uhs-sdr104;
+ sd-uhs-sdr50;
+ keep-power-in-suspend;
+ wakeup-source;
+ cap-sdio-irq;
+ no-mmc;
+ no-sd;
+ non-removable;
+ vmmc-supply = <&pp3300_s3>;
+ vqmmc-supply = <&mt6366_vio18_reg>;
+ mmc-pwrseq = <&wifi_pwrseq>;
+ status = "okay";
+
+ bluetooth@2 {
+ compatible = "mediatek,mt7921s-bluetooth";
+ reg = <2>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&bt_pins_reset>;
+ reset-gpios = <&pio 155 GPIO_ACTIVE_LOW>;
+ };
+};
+
+&nor_flash {
+ assigned-clock-parents = <&topckgen CLK_TOP_MAINPLL_D7_D4>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&nor_pins_default>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "okay";
+
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <39000000>;
+ };
+};
+
+&pio {
+ /* 185 lines */
+ gpio-line-names = "TP",
+ "TP",
+ "TP",
+ "I2S0_HP_DI",
+ "I2S3_DP_SPKR_DO",
+ "SAR_INT_ODL",
+ "BT_WAKE_AP_ODL",
+ "WIFI_INT_ODL",
+ "DPBRDG_INT_ODL",
+ "EDPBRDG_INT_ODL",
+ "EC_AP_HPD_OD",
+ "TCHPAD_INT_ODL",
+ "TCHSCR_INT_1V8_ODL",
+ "EC_AP_INT_ODL",
+ "EC_IN_RW_ODL",
+ "GSC_AP_INT_ODL",
+ /* AP_FLASH_WP_L is crossystem ABI. Rev1 schematics call it AP_WP_ODL. */
+ "AP_FLASH_WP_L",
+ "HP_INT_ODL",
+ "PEN_EJECT_OD",
+ "WCAM_PWDN_L",
+ "WCAM_RST_L",
+ "UCAM_SEN_EN",
+ "UCAM_RST_L",
+ "LTE_RESET_L",
+ "LTE_SAR_DETECT_L",
+ "I2S2_DP_SPK_MCK",
+ "I2S2_DP_SPKR_BCK",
+ "I2S2_DP_SPKR_LRCK",
+ "I2S2_DP_SPKR_DI (TP)",
+ "EN_PP1000_EDPBRDG",
+ "EN_PP1800_EDPBRDG",
+ "EN_PP3300_EDPBRDG",
+ "UART_GSC_TX_AP_RX",
+ "UART_AP_TX_GSC_RX",
+ "UART_DBGCON_TX_ADSP_RX",
+ "UART_ADSP_TX_DBGCON_RX",
+ "EN_PP1000_DPBRDG",
+ "TCHSCR_REPORT_DISABLE",
+ "EN_PP3300_DPBRDG",
+ "EN_PP1800_DPBRDG",
+ "SPI_AP_CLK_EC",
+ "SPI_AP_CS_EC_L",
+ "SPI_AP_DO_EC_DI",
+ "SPI_AP_DI_EC_DO",
+ "SPI_AP_CLK_GSC",
+ "SPI_AP_CS_GSC_L",
+ "SPI_AP_DO_GSC_DI",
+ "SPI_AP_DI_GSC_DO",
+ "UART_DBGCON_TX_SCP_RX",
+ "UART_SCP_TX_DBGCON_RX",
+ "EN_PP1200_CAM_X",
+ "EN_PP2800A_VCM_X",
+ "EN_PP2800A_UCAM_X",
+ "EN_PP2800A_WCAM_X",
+ "WLAN_MODULE_RST_L",
+ "EN_PP1200_UCAM_X",
+ "I2S1_HP_DO",
+ "I2S1_HP_BCK",
+ "I2S1_HP_LRCK",
+ "I2S1_HP_MCK",
+ "TCHSCR_RST_1V8_L",
+ "SPI_AP_CLK_ROM",
+ "SPI_AP_CS_ROM_L",
+ "SPI_AP_DO_ROM_DI",
+ "SPI_AP_DI_ROM_DO",
+ "NC",
+ "NC",
+ "EMMC_STRB",
+ "EMMC_CLK",
+ "EMMC_CMD",
+ "EMMC_RST_L",
+ "EMMC_DATA0",
+ "EMMC_DATA1",
+ "EMMC_DATA2",
+ "EMMC_DATA3",
+ "EMMC_DATA4",
+ "EMMC_DATA5",
+ "EMMC_DATA6",
+ "EMMC_DATA7",
+ "AP_KPCOL0",
+ "NC",
+ "NC",
+ "NC",
+ "TP",
+ "SDIO_CLK",
+ "SDIO_CMD",
+ "SDIO_DATA0",
+ "SDIO_DATA1",
+ "SDIO_DATA2",
+ "SDIO_DATA3",
+ "NC",
+ "NC",
+ "NC",
+ "NC",
+ "NC",
+ "NC",
+ "EDPBRDG_PWREN",
+ "BL_PWM_1V8",
+ "EDPBRDG_RST_L",
+ "MIPI_DPI_CLK",
+ "MIPI_DPI_VSYNC",
+ "MIPI_DPI_HSYNC",
+ "MIPI_DPI_DE",
+ "MIPI_DPI_D0",
+ "MIPI_DPI_D1",
+ "MIPI_DPI_D2",
+ "MIPI_DPI_D3",
+ "MIPI_DPI_D4",
+ "MIPI_DPI_D5",
+ "MIPI_DPI_D6",
+ "MIPI_DPI_DA7",
+ "MIPI_DPI_D8",
+ "MIPI_DPI_D9",
+ "MIPI_DPI_D10",
+ "MIPI_DPI_D11",
+ "PCM_BT_CLK",
+ "PCM_BT_SYNC",
+ "PCM_BT_DI",
+ "PCM_BT_DO",
+ "JTAG_TMS_TP",
+ "JTAG_TCK_TP",
+ "JTAG_TDI_TP",
+ "JTAG_TDO_TP",
+ "JTAG_TRSTN_TP",
+ "CLK_24M_WCAM",
+ "CLK_24M_UCAM",
+ "UCAM_DET_ODL",
+ "AP_I2C_EDPBRDG_SCL_1V8",
+ "AP_I2C_EDPBRDG_SDA_1V8",
+ "AP_I2C_TCHSCR_SCL_1V8",
+ "AP_I2C_TCHSCR_SDA_1V8",
+ "AP_I2C_TCHPAD_SCL_1V8",
+ "AP_I2C_TCHPAD_SDA_1V8",
+ "AP_I2C_DPBRDG_SCL_1V8",
+ "AP_I2C_DPBRDG_SDA_1V8",
+ "AP_I2C_WLAN_SCL_1V8",
+ "AP_I2C_WLAN_SDA_1V8",
+ "AP_I2C_AUD_SCL_1V8",
+ "AP_I2C_AUD_SDA_1V8",
+ "AP_I2C_TPM_SCL_1V8",
+ "AP_I2C_UCAM_SDA_1V8",
+ "AP_I2C_UCAM_SCL_1V8",
+ "AP_I2C_UCAM_SDA_1V8",
+ "AP_I2C_WCAM_SCL_1V8",
+ "AP_I2C_WCAM_SDA_1V8",
+ "SCP_I2C_SENSOR_SCL_1V8",
+ "SCP_I2C_SENSOR_SDA_1V8",
+ "AP_EC_WARM_RST_REQ",
+ "AP_XHCI_INIT_DONE",
+ "USB3_HUB_RST_L",
+ "EN_SPKR",
+ "BEEP_ON",
+ "AP_EDP_BKLTEN",
+ "EN_PP3300_DISP_X",
+ "EN_PP3300_SDBRDG_X",
+ "BT_KILL_1V8_L",
+ "WIFI_KILL_1V8_L",
+ "PWRAP_SPI0_CSN",
+ "PWRAP_SPI0_CK",
+ "PWRAP_SPI0_MO",
+ "PWRAP_SPI0_MI",
+ "SRCLKENA0",
+ "SRCLKENA1",
+ "SCP_VREQ_VAO",
+ "AP_RTC_CLK32K",
+ "AP_PMIC_WDTRST_L",
+ "AUD_CLK_MOSI",
+ "AUD_SYNC_MOSI",
+ "AUD_DAT_MOSI0",
+ "AUD_DAT_MOSI1",
+ "AUD_CLK_MISO",
+ "AUD_SYNC_MISO",
+ "AUD_DAT_MISO0",
+ "AUD_DAT_MISO1",
+ "NC",
+ "NC",
+ "DPBRDG_PWREN",
+ "DPBRDG_RST_L",
+ "LTE_W_DISABLE_L",
+ "LTE_SAR_DETECT_L",
+ "EN_PP3300_LTE_X",
+ "LTE_PWR_OFF_L",
+ "LTE_RESET_L",
+ "TP",
+ "TP";
+
+ aud_clk_mosi_off: aud-clk-mosi-off-pins {
+ pins-clk-sync {
+ pinmux = <PINMUX_GPIO166__FUNC_GPIO166>,
+ <PINMUX_GPIO167__FUNC_GPIO167>;
+ input-enable;
+ bias-pull-down;
+ };
+ };
+
+ aud_clk_mosi_on: aud-clk-mosi-on-pins {
+ pins-clk-sync {
+ pinmux = <PINMUX_GPIO166__FUNC_AUD_CLK_MOSI>,
+ <PINMUX_GPIO167__FUNC_AUD_SYNC_MOSI>;
+ };
+ };
+
+ aud_clk_miso_off: aud-clk-miso-off-pins {
+ pins-clk-sync {
+ pinmux = <PINMUX_GPIO170__FUNC_GPIO170>,
+ <PINMUX_GPIO171__FUNC_GPIO171>;
+ input-enable;
+ bias-pull-down;
+ };
+ };
+
+ aud_clk_miso_on: aud-clk-miso-on-pins {
+ pins-clk-sync {
+ pinmux = <PINMUX_GPIO170__FUNC_AUD_CLK_MISO>,
+ <PINMUX_GPIO171__FUNC_AUD_SYNC_MISO>;
+ };
+ };
+
+ aud_dat_mosi_off: aud-dat-mosi-off-pins {
+ pins-dat {
+ pinmux = <PINMUX_GPIO168__FUNC_GPIO168>,
+ <PINMUX_GPIO169__FUNC_GPIO169>;
+ input-enable;
+ bias-pull-down;
+ };
+ };
+
+ aud_dat_mosi_on: aud-dat-mosi-on-pins {
+ pins-dat {
+ pinmux = <PINMUX_GPIO168__FUNC_AUD_DAT_MOSI0>,
+ <PINMUX_GPIO169__FUNC_AUD_DAT_MOSI1>;
+ };
+ };
+
+ aud_dat_miso_off: aud-dat-miso-off-pins {
+ pins-dat {
+ pinmux = <PINMUX_GPIO172__FUNC_GPIO172>,
+ <PINMUX_GPIO173__FUNC_GPIO173>;
+ input-enable;
+ bias-pull-down;
+ };
+ };
+
+ aud_dat_miso_on: aud-dat-miso-on-pins {
+ pins-dat {
+ pinmux = <PINMUX_GPIO172__FUNC_AUD_DAT_MISO0>,
+ <PINMUX_GPIO173__FUNC_AUD_DAT_MISO1>;
+ input-schmitt-enable;
+ bias-disable;
+ };
+ };
+
+ aud_gpio_i2s0_off: aud-gpio-i2s0-off-pins {
+ pins-sdata {
+ pinmux = <PINMUX_GPIO3__FUNC_GPIO3>;
+ };
+ };
+
+ aud_gpio_i2s0_on: aud-gpio-i2s0-on-pins {
+ pins-sdata {
+ pinmux = <PINMUX_GPIO3__FUNC_I2S0_DI>;
+ };
+ };
+
+ aud_gpio_i2s1_off: aud-gpio-i2s-off-pins {
+ pins-clk-sdata {
+ pinmux = <PINMUX_GPIO56__FUNC_GPIO56>,
+ <PINMUX_GPIO57__FUNC_GPIO57>,
+ <PINMUX_GPIO58__FUNC_GPIO58>,
+ <PINMUX_GPIO59__FUNC_GPIO59>;
+ output-low;
+ };
+ };
+
+ aud_gpio_i2s1_on: aud-gpio-i2s1-on-pins {
+ pins-clk-sdata {
+ pinmux = <PINMUX_GPIO56__FUNC_I2S1_DO>,
+ <PINMUX_GPIO57__FUNC_I2S1_BCK>,
+ <PINMUX_GPIO58__FUNC_I2S1_LRCK>,
+ <PINMUX_GPIO59__FUNC_I2S1_MCK>;
+ };
+ };
+
+ aud_gpio_i2s2_off: aud-gpio-i2s2-off-pins {
+ pins-cmd-dat {
+ pinmux = <PINMUX_GPIO26__FUNC_GPIO26>,
+ <PINMUX_GPIO27__FUNC_GPIO27>;
+ output-low;
+ };
+ };
+
+ aud_gpio_i2s2_on: aud-gpio-i2s2-on-pins {
+ pins-clk {
+ pinmux = <PINMUX_GPIO26__FUNC_I2S2_BCK>,
+ <PINMUX_GPIO27__FUNC_I2S2_LRCK>;
+ drive-strength = <4>;
+ };
+ };
+
+ aud_gpio_i2s3_off: aud-gpio-i2s3-off-pins {
+ pins-sdata {
+ pinmux = <PINMUX_GPIO4__FUNC_GPIO4>;
+ output-low;
+ };
+ };
+
+ aud_gpio_i2s3_on: aud-gpio-i2s3-on-pins {
+ pins-sdata {
+ pinmux = <PINMUX_GPIO4__FUNC_I2S3_DO>;
+ drive-strength = <4>;
+ };
+ };
+
+ aud_gpio_pcm_off: aud-gpio-pcm-off-pins {
+ pins-clk-sdata {
+ pinmux = <PINMUX_GPIO115__FUNC_GPIO115>,
+ <PINMUX_GPIO116__FUNC_GPIO116>,
+ <PINMUX_GPIO117__FUNC_GPIO117>,
+ <PINMUX_GPIO118__FUNC_GPIO118>;
+ output-low;
+ };
+ };
+
+ aud_gpio_pcm_on: aud-gpio-pcm-on-pins {
+ pins-clk-sdata {
+ pinmux = <PINMUX_GPIO115__FUNC_PCM_CLK>,
+ <PINMUX_GPIO116__FUNC_PCM_SYNC>,
+ <PINMUX_GPIO117__FUNC_PCM_DI>,
+ <PINMUX_GPIO118__FUNC_PCM_DO>;
+ };
+ };
+
+ aud_gpio_dmic_sec: aud-gpio-dmic-sec-pins {
+ pins {
+ pinmux = <PINMUX_GPIO23__FUNC_GPIO23>;
+ output-low;
+ };
+ };
+
+ bt_pins_reset: bt-reset-pins {
+ pins-bt-reset {
+ pinmux = <PINMUX_GPIO155__FUNC_GPIO155>;
+ output-high;
+ };
+ };
+
+ dpi_pins_sleep: dpi-sleep-pins {
+ pins-cmd-dat {
+ pinmux = <PINMUX_GPIO103__FUNC_GPIO103>,
+ <PINMUX_GPIO104__FUNC_GPIO104>,
+ <PINMUX_GPIO105__FUNC_GPIO105>,
+ <PINMUX_GPIO106__FUNC_GPIO106>,
+ <PINMUX_GPIO107__FUNC_GPIO107>,
+ <PINMUX_GPIO108__FUNC_GPIO108>,
+ <PINMUX_GPIO109__FUNC_GPIO109>,
+ <PINMUX_GPIO110__FUNC_GPIO110>,
+ <PINMUX_GPIO111__FUNC_GPIO111>,
+ <PINMUX_GPIO112__FUNC_GPIO112>,
+ <PINMUX_GPIO113__FUNC_GPIO113>,
+ <PINMUX_GPIO114__FUNC_GPIO114>,
+ <PINMUX_GPIO101__FUNC_GPIO101>,
+ <PINMUX_GPIO100__FUNC_GPIO100>,
+ <PINMUX_GPIO102__FUNC_GPIO102>,
+ <PINMUX_GPIO99__FUNC_GPIO99>;
+ drive-strength = <10>;
+ output-low;
+ };
+ };
+
+ dpi_pins_default: dpi-default-pins {
+ pins-cmd-dat {
+ pinmux = <PINMUX_GPIO103__FUNC_DPI_DATA0>,
+ <PINMUX_GPIO104__FUNC_DPI_DATA1>,
+ <PINMUX_GPIO105__FUNC_DPI_DATA2>,
+ <PINMUX_GPIO106__FUNC_DPI_DATA3>,
+ <PINMUX_GPIO107__FUNC_DPI_DATA4>,
+ <PINMUX_GPIO108__FUNC_DPI_DATA5>,
+ <PINMUX_GPIO109__FUNC_DPI_DATA6>,
+ <PINMUX_GPIO110__FUNC_DPI_DATA7>,
+ <PINMUX_GPIO111__FUNC_DPI_DATA8>,
+ <PINMUX_GPIO112__FUNC_DPI_DATA9>,
+ <PINMUX_GPIO113__FUNC_DPI_DATA10>,
+ <PINMUX_GPIO114__FUNC_DPI_DATA11>,
+ <PINMUX_GPIO101__FUNC_DPI_HSYNC>,
+ <PINMUX_GPIO100__FUNC_DPI_VSYNC>,
+ <PINMUX_GPIO102__FUNC_DPI_DE>,
+ <PINMUX_GPIO99__FUNC_DPI_PCLK>;
+ drive-strength = <10>;
+ };
+ };
+
+ ec_ap_int: cros-ec-int-pins {
+ pins-ec-ap-int-odl {
+ pinmux = <PINMUX_GPIO13__FUNC_GPIO13>;
+ input-enable;
+ };
+ };
+
+ edp_panel_fixed_pins: edp-panel-fixed-pins {
+ pins-vreg-en {
+ pinmux = <PINMUX_GPIO153__FUNC_GPIO153>;
+ output-high;
+ };
+ };
+
+ en_pp1800_dpbrdg: en-pp1800-dpbrdg-pins {
+ pins-vreg-en {
+ pinmux = <PINMUX_GPIO39__FUNC_GPIO39>;
+ output-low;
+ };
+ };
+
+ gsc_int: gsc-int-pins {
+ pins-gsc-ap-int-odl {
+ pinmux = <PINMUX_GPIO15__FUNC_GPIO15>;
+ input-enable;
+ };
+ };
+
+ i2c0_pins: i2c0-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO128__FUNC_SDA0>,
+ <PINMUX_GPIO127__FUNC_SCL0>;
+ bias-disable;
+ drive-strength = <4>;
+ input-enable;
+ };
+ };
+
+ i2c1_pins: i2c1-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO130__FUNC_SDA1>,
+ <PINMUX_GPIO129__FUNC_SCL1>;
+ bias-disable;
+ drive-strength = <4>;
+ input-enable;
+ };
+ };
+
+ i2c2_pins: i2c2-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO132__FUNC_SDA2>,
+ <PINMUX_GPIO131__FUNC_SCL2>;
+ bias-disable;
+ drive-strength = <4>;
+ input-enable;
+ };
+ };
+
+ i2c3_pins: i2c3-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO134__FUNC_SDA3>,
+ <PINMUX_GPIO133__FUNC_SCL3>;
+ bias-disable;
+ drive-strength = <4>;
+ input-enable;
+ };
+ };
+
+ i2c5_pins: i2c5-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO138__FUNC_SDA5>,
+ <PINMUX_GPIO137__FUNC_SCL5>;
+ bias-disable;
+ drive-strength = <4>;
+ input-enable;
+ };
+ };
+
+ it6505_pins: it6505-pins {
+ pins-hpd {
+ pinmux = <PINMUX_GPIO10__FUNC_GPIO10>;
+ input-enable;
+ bias-pull-up;
+ };
+
+ pins-int {
+ pinmux = <PINMUX_GPIO8__FUNC_GPIO8>;
+ input-enable;
+ bias-pull-up;
+ };
+
+ pins-reset {
+ pinmux = <PINMUX_GPIO177__FUNC_GPIO177>;
+ output-low;
+ bias-pull-up;
+ };
+ };
+
+ mmc0_pins_default: mmc0-default-pins {
+ pins-clk {
+ pinmux = <PINMUX_GPIO68__FUNC_MSDC0_CLK>;
+ bias-pull-down = <MTK_PUPD_SET_R1R0_10>;
+ };
+
+ pins-cmd-dat {
+ pinmux = <PINMUX_GPIO71__FUNC_MSDC0_DAT0>,
+ <PINMUX_GPIO72__FUNC_MSDC0_DAT1>,
+ <PINMUX_GPIO73__FUNC_MSDC0_DAT2>,
+ <PINMUX_GPIO74__FUNC_MSDC0_DAT3>,
+ <PINMUX_GPIO75__FUNC_MSDC0_DAT4>,
+ <PINMUX_GPIO76__FUNC_MSDC0_DAT5>,
+ <PINMUX_GPIO77__FUNC_MSDC0_DAT6>,
+ <PINMUX_GPIO78__FUNC_MSDC0_DAT7>,
+ <PINMUX_GPIO69__FUNC_MSDC0_CMD>;
+ input-enable;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ };
+
+ pins-rst {
+ pinmux = <PINMUX_GPIO70__FUNC_MSDC0_RSTB>;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ };
+ };
+
+ mmc0_pins_uhs: mmc0-uhs-pins {
+ pins-clk {
+ pinmux = <PINMUX_GPIO68__FUNC_MSDC0_CLK>;
+ drive-strength = <6>;
+ bias-pull-down = <MTK_PUPD_SET_R1R0_10>;
+ };
+
+ pins-cmd-dat {
+ pinmux = <PINMUX_GPIO71__FUNC_MSDC0_DAT0>,
+ <PINMUX_GPIO72__FUNC_MSDC0_DAT1>,
+ <PINMUX_GPIO73__FUNC_MSDC0_DAT2>,
+ <PINMUX_GPIO74__FUNC_MSDC0_DAT3>,
+ <PINMUX_GPIO75__FUNC_MSDC0_DAT4>,
+ <PINMUX_GPIO76__FUNC_MSDC0_DAT5>,
+ <PINMUX_GPIO77__FUNC_MSDC0_DAT6>,
+ <PINMUX_GPIO78__FUNC_MSDC0_DAT7>,
+ <PINMUX_GPIO69__FUNC_MSDC0_CMD>;
+ input-enable;
+ drive-strength = <6>;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ };
+
+ pins-ds {
+ pinmux = <PINMUX_GPIO67__FUNC_MSDC0_DSL>;
+ drive-strength = <6>;
+ bias-pull-down = <MTK_PUPD_SET_R1R0_10>;
+ };
+
+ pins-rst {
+ pinmux = <PINMUX_GPIO70__FUNC_MSDC0_RSTB>;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ };
+ };
+
+ mmc1_pins_default: mmc1-default-pins {
+ pins-clk {
+ pinmux = <PINMUX_GPIO84__FUNC_MSDC1_CLK>;
+ drive-strength = <6>;
+ bias-pull-down = <MTK_PUPD_SET_R1R0_10>;
+ };
+
+ pins-cmd-dat {
+ pinmux = <PINMUX_GPIO86__FUNC_MSDC1_DAT0>,
+ <PINMUX_GPIO87__FUNC_MSDC1_DAT1>,
+ <PINMUX_GPIO88__FUNC_MSDC1_DAT2>,
+ <PINMUX_GPIO89__FUNC_MSDC1_DAT3>,
+ <PINMUX_GPIO85__FUNC_MSDC1_CMD>;
+ input-enable;
+ drive-strength = <6>;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ };
+ };
+
+ mmc1_pins_uhs: mmc1-uhs-pins {
+ pins-clk {
+ pinmux = <PINMUX_GPIO84__FUNC_MSDC1_CLK>;
+ drive-strength = <6>;
+ bias-pull-down = <MTK_PUPD_SET_R1R0_10>;
+ };
+
+ pins-cmd-dat {
+ pinmux = <PINMUX_GPIO86__FUNC_MSDC1_DAT0>,
+ <PINMUX_GPIO87__FUNC_MSDC1_DAT1>,
+ <PINMUX_GPIO88__FUNC_MSDC1_DAT2>,
+ <PINMUX_GPIO89__FUNC_MSDC1_DAT3>,
+ <PINMUX_GPIO85__FUNC_MSDC1_CMD>;
+ input-enable;
+ drive-strength = <8>;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ };
+ };
+
+ mmc1_pins_eint: mmc1-eint-pins {
+ pins-dat1 {
+ pinmux = <PINMUX_GPIO87__FUNC_GPIO87>;
+ input-enable;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ };
+ };
+
+ nor_pins_default: nor-default-pins {
+ pins-clk-dat {
+ pinmux = <PINMUX_GPIO63__FUNC_SPINOR_IO0>,
+ <PINMUX_GPIO61__FUNC_SPINOR_CK>,
+ <PINMUX_GPIO64__FUNC_SPINOR_IO1>;
+ drive-strength = <6>;
+ bias-pull-down;
+ };
+
+ pins-cs-dat {
+ pinmux = <PINMUX_GPIO62__FUNC_SPINOR_CS>,
+ <PINMUX_GPIO65__FUNC_SPINOR_IO2>,
+ <PINMUX_GPIO66__FUNC_SPINOR_IO3>;
+ drive-strength = <6>;
+ bias-pull-up;
+ };
+ };
+
+ pen_eject: pen-eject-pins {
+ pins {
+ pinmux = <PINMUX_GPIO18__FUNC_GPIO18>;
+ input-enable;
+ /* External pull-up. */
+ bias-disable;
+ };
+ };
+
+ pwm0_pin: disp-pwm-pins {
+ pins {
+ pinmux = <PINMUX_GPIO97__FUNC_DISP_PWM>;
+ output-high;
+ };
+ };
+
+ rt1019p_pins_default: rt1019p-default-pins {
+ pins-sdb {
+ pinmux = <PINMUX_GPIO150__FUNC_GPIO150>;
+ output-low;
+ };
+ };
+
+ scp_pins: scp-default-pins {
+ pins-scp-uart {
+ pinmux = <PINMUX_GPIO48__FUNC_TP_URXD2_AO>,
+ <PINMUX_GPIO49__FUNC_TP_UTXD2_AO>;
+ };
+ };
+
+ spi1_pins: spi1-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO40__FUNC_SPI1_CLK_A>,
+ <PINMUX_GPIO41__FUNC_SPI1_CSB_A>,
+ <PINMUX_GPIO42__FUNC_SPI1_MO_A>,
+ <PINMUX_GPIO43__FUNC_SPI1_MI_A>;
+ bias-disable;
+ input-enable;
+ };
+ };
+
+ spi2_pins: spi2-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO44__FUNC_SPI2_CLK_A>,
+ <PINMUX_GPIO45__FUNC_GPIO45>,
+ <PINMUX_GPIO46__FUNC_SPI2_MO_A>,
+ <PINMUX_GPIO47__FUNC_SPI2_MI_A>;
+ bias-disable;
+ input-enable;
+ };
+ };
+
+ spmi_pins: spmi-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO183__FUNC_SPMI_SCL>,
+ <PINMUX_GPIO184__FUNC_SPMI_SDA>;
+ };
+ };
+
+ touchscreen_pins: touchscreen-pins {
+ pins-irq {
+ pinmux = <PINMUX_GPIO12__FUNC_GPIO12>;
+ input-enable;
+ bias-pull-up;
+ };
+
+ pins-reset {
+ pinmux = <PINMUX_GPIO60__FUNC_GPIO60>;
+ output-high;
+ };
+
+ pins-report-sw {
+ pinmux = <PINMUX_GPIO37__FUNC_GPIO37>;
+ output-low;
+ };
+ };
+
+ trackpad_pin: trackpad-default-pins {
+ pins-int-n {
+ pinmux = <PINMUX_GPIO11__FUNC_GPIO11>;
+ input-enable;
+ bias-disable; /* pulled externally */
+ };
+ };
+
+ wifi_enable_pin: wifi-enable-pins {
+ pins-wifi-enable {
+ pinmux = <PINMUX_GPIO54__FUNC_GPIO54>;
+ };
+ };
+
+ wifi_wakeup_pin: wifi-wakeup-pins {
+ pins-wifi-wakeup {
+ pinmux = <PINMUX_GPIO7__FUNC_GPIO7>;
+ input-enable;
+ };
+ };
+};
+
+&pwm0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pwm0_pin>;
+ status = "okay";
+};
+
+&pwrap {
+ pmic {
+ compatible = "mediatek,mt6366", "mediatek,mt6358";
+ interrupt-controller;
+ interrupts-extended = <&pio 201 IRQ_TYPE_LEVEL_HIGH>;
+ #interrupt-cells = <2>;
+
+ mt6366codec: codec {
+ compatible = "mediatek,mt6366-sound", "mediatek,mt6358-sound";
+ Avdd-supply = <&mt6366_vaud28_reg>;
+ mediatek,dmic-mode = <1>; /* one-wire */
+ };
+
+ mt6366_regulators: regulators {
+ compatible = "mediatek,mt6366-regulator", "mediatek,mt6358-regulator";
+ vsys-ldo1-supply = <&pp4200_z2>;
+ vsys-ldo2-supply = <&pp4200_z2>;
+ vsys-ldo3-supply = <&pp4200_z2>;
+ vsys-vcore-supply = <&pp4200_z2>;
+ vsys-vdram1-supply = <&pp4200_z2>;
+ vsys-vgpu-supply = <&pp4200_z2>;
+ vsys-vmodem-supply = <&pp4200_z2>;
+ vsys-vpa-supply = <&pp4200_z2>;
+ vsys-vproc11-supply = <&pp4200_z2>;
+ vsys-vproc12-supply = <&pp4200_z2>;
+ vsys-vs1-supply = <&pp4200_z2>;
+ vsys-vs2-supply = <&pp4200_z2>;
+ vs1-ldo1-supply = <&mt6366_vs1_reg>;
+ vs2-ldo1-supply = <&mt6366_vdram1_reg>;
+ vs2-ldo2-supply = <&mt6366_vs2_reg>;
+ vs2-ldo3-supply = <&mt6366_vs2_reg>;
+
+ vcore {
+ regulator-name = "pp0750_dvdd_core";
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <800000>;
+ regulator-ramp-delay = <6250>;
+ regulator-enable-ramp-delay = <200>;
+ regulator-allowed-modes = <MT6397_BUCK_MODE_AUTO
+ MT6397_BUCK_MODE_FORCE_PWM>;
+ regulator-always-on;
+ };
+
+ mt6366_vdram1_reg: vdram1 {
+ regulator-name = "pp1125_emi_vdd2";
+ regulator-min-microvolt = <1125000>;
+ regulator-max-microvolt = <1125000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <0>;
+ regulator-allowed-modes = <MT6397_BUCK_MODE_AUTO
+ MT6397_BUCK_MODE_FORCE_PWM>;
+ regulator-always-on;
+ };
+
+ mt6366_vgpu_reg: vgpu {
+ /*
+ * Called "ppvar_dvdd_gpu" in the schematic.
+ * Called "ppvar_dvdd_vgpu" here to match
+ * regulator coupling requirements.
+ */
+ regulator-name = "ppvar_dvdd_vgpu";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <6250>;
+ regulator-enable-ramp-delay = <200>;
+ regulator-allowed-modes = <MT6397_BUCK_MODE_AUTO
+ MT6397_BUCK_MODE_FORCE_PWM>;
+ regulator-coupled-with = <&mt6366_vsram_gpu_reg>;
+ regulator-coupled-max-spread = <10000>;
+ };
+
+ mt6366_vproc11_reg: vproc11 {
+ regulator-name = "ppvar_dvdd_proc_bc_mt6366";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-ramp-delay = <6250>;
+ regulator-enable-ramp-delay = <200>;
+ regulator-allowed-modes = <MT6397_BUCK_MODE_AUTO
+ MT6397_BUCK_MODE_FORCE_PWM>;
+ regulator-always-on;
+ };
+
+ mt6366_vproc12_reg: vproc12 {
+ regulator-name = "ppvar_dvdd_proc_lc";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-ramp-delay = <6250>;
+ regulator-enable-ramp-delay = <200>;
+ regulator-allowed-modes = <MT6397_BUCK_MODE_AUTO
+ MT6397_BUCK_MODE_FORCE_PWM>;
+ regulator-always-on;
+ };
+
+ mt6366_vs1_reg: vs1 {
+ regulator-name = "pp2000_vs1";
+ regulator-min-microvolt = <2000000>;
+ regulator-max-microvolt = <2000000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <0>;
+ regulator-always-on;
+ };
+
+ mt6366_vs2_reg: vs2 {
+ regulator-name = "pp1350_vs2";
+ regulator-min-microvolt = <1350000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <0>;
+ regulator-always-on;
+ };
+
+ va12 {
+ regulator-name = "pp1200_va12";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-enable-ramp-delay = <270>;
+ regulator-always-on;
+ };
+
+ mt6366_vaud28_reg: vaud28 {
+ regulator-name = "pp2800_vaud28";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-enable-ramp-delay = <270>;
+ };
+
+ mt6366_vaux18_reg: vaux18 {
+ regulator-name = "pp1840_vaux18";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1840000>;
+ regulator-enable-ramp-delay = <270>;
+ };
+
+ mt6366_vbif28_reg: vbif28 {
+ regulator-name = "pp2800_vbif28";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-enable-ramp-delay = <270>;
+ };
+
+ mt6366_vcn18_reg: vcn18 {
+ regulator-name = "pp1800_vcn18_x";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-enable-ramp-delay = <270>;
+ };
+
+ mt6366_vcn28_reg: vcn28 {
+ regulator-name = "pp2800_vcn28_x";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-enable-ramp-delay = <270>;
+ };
+
+ mt6366_vefuse_reg: vefuse {
+ regulator-name = "pp1800_vefuse";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-enable-ramp-delay = <270>;
+ };
+
+ mt6366_vfe28_reg: vfe28 {
+ regulator-name = "pp2800_vfe28_x";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-enable-ramp-delay = <270>;
+ };
+
+ mt6366_vemc_reg: vemc {
+ regulator-name = "pp3000_vemc";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-enable-ramp-delay = <60>;
+ };
+
+ mt6366_vibr_reg: vibr {
+ regulator-name = "pp2800_vibr_x";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-enable-ramp-delay = <60>;
+ };
+
+ mt6366_vio18_reg: vio18 {
+ regulator-name = "pp1800_vio18_s3";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-enable-ramp-delay = <2700>;
+ regulator-always-on;
+ };
+
+ mt6366_vio28_reg: vio28 {
+ regulator-name = "pp2800_vio28_x";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-enable-ramp-delay = <270>;
+ };
+
+ mt6366_vm18_reg: vm18 {
+ regulator-name = "pp1800_emi_vdd1";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1840000>;
+ regulator-enable-ramp-delay = <325>;
+ regulator-always-on;
+ };
+
+ mt6366_vmc_reg: vmc {
+ regulator-name = "pp3000_vmc";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-enable-ramp-delay = <60>;
+ };
+
+ mt6366_vmddr_reg: vmddr {
+ regulator-name = "pm0750_emi_vmddr";
+ regulator-min-microvolt = <700000>;
+ regulator-max-microvolt = <750000>;
+ regulator-enable-ramp-delay = <325>;
+ regulator-always-on;
+ };
+
+ mt6366_vmch_reg: vmch {
+ regulator-name = "pp3000_vmch";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-enable-ramp-delay = <60>;
+ };
+
+ mt6366_vcn33_reg: vcn33 {
+ regulator-name = "pp3300_vcn33_x";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <270>;
+ };
+
+ vdram2 {
+ regulator-name = "pp0600_emi_vddq";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <600000>;
+ regulator-enable-ramp-delay = <3300>;
+ regulator-always-on;
+ };
+
+ mt6366_vrf12_reg: vrf12 {
+ regulator-name = "pp1200_vrf12_x";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-enable-ramp-delay = <120>;
+ };
+
+ mt6366_vrf18_reg: vrf18 {
+ regulator-name = "pp1800_vrf18_x";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-enable-ramp-delay = <120>;
+ };
+
+ vsim1 {
+ regulator-name = "pp1860_vsim1_x";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1860000>;
+ regulator-enable-ramp-delay = <540>;
+ };
+
+ mt6366_vsim2_reg: vsim2 {
+ regulator-name = "pp2760_vsim2_x";
+ regulator-min-microvolt = <2700000>;
+ regulator-max-microvolt = <2760000>;
+ regulator-enable-ramp-delay = <540>;
+ };
+
+ mt6366_vsram_gpu_reg: vsram-gpu {
+ regulator-name = "pp0900_dvdd_sram_gpu";
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-ramp-delay = <6250>;
+ regulator-enable-ramp-delay = <240>;
+ regulator-coupled-with = <&mt6366_vgpu_reg>;
+ regulator-coupled-max-spread = <10000>;
+ };
+
+ mt6366_vsram_others_reg: vsram-others {
+ regulator-name = "pp0900_dvdd_sram_core";
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <900000>;
+ regulator-ramp-delay = <6250>;
+ regulator-enable-ramp-delay = <240>;
+ regulator-always-on;
+ };
+
+ mt6366_vsram_proc11_reg: vsram-proc11 {
+ regulator-name = "pp0900_dvdd_sram_bc";
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1120000>;
+ regulator-ramp-delay = <6250>;
+ regulator-enable-ramp-delay = <240>;
+ regulator-always-on;
+ };
+
+ mt6366_vsram_proc12_reg: vsram-proc12 {
+ regulator-name = "pp0900_dvdd_sram_lc";
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1120000>;
+ regulator-ramp-delay = <6250>;
+ regulator-enable-ramp-delay = <240>;
+ regulator-always-on;
+ };
+
+ vusb {
+ regulator-name = "pp3070_vusb";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3070000>;
+ regulator-enable-ramp-delay = <270>;
+ regulator-always-on;
+ };
+
+ vxo22 {
+ regulator-name = "pp2240_vxo22";
+ regulator-min-microvolt = <2200000>;
+ regulator-max-microvolt = <2240000>;
+ regulator-enable-ramp-delay = <120>;
+ /* Feeds DCXO internally */
+ regulator-always-on;
+ };
+ };
+
+ rtc {
+ compatible = "mediatek,mt6366-rtc", "mediatek,mt6358-rtc";
+ };
+ };
+};
+
+&scp {
+ pinctrl-names = "default";
+ pinctrl-0 = <&scp_pins>;
+ firmware-name = "mediatek/mt8186/scp.img";
+ memory-region = <&scp_mem>;
+ status = "okay";
+
+ cros-ec-rpmsg {
+ compatible = "google,cros-ec-rpmsg";
+ mediatek,rpmsg-name = "cros-ec-rpmsg";
+ };
+};
+
+&spi1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi1_pins>;
+ mediatek,pad-select = <0>;
+ status = "okay";
+
+ cros_ec: ec@0 {
+ compatible = "google,cros-ec-spi";
+ reg = <0>;
+ interrupts-extended = <&pio 13 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&ec_ap_int>;
+ spi-max-frequency = <1000000>;
+
+ i2c_tunnel: i2c-tunnel {
+ compatible = "google,cros-ec-i2c-tunnel";
+ google,remote-bus = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ typec {
+ compatible = "google,cros-ec-typec";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ usb_c0: connector@0 {
+ compatible = "usb-c-connector";
+ reg = <0>;
+ label = "left";
+ power-role = "dual";
+ data-role = "host";
+ try-power-role = "source";
+ };
+
+ usb_c1: connector@1 {
+ compatible = "usb-c-connector";
+ reg = <1>;
+ label = "right";
+ power-role = "dual";
+ data-role = "host";
+ try-power-role = "source";
+ };
+ };
+ };
+};
+
+&spi2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi2_pins>;
+ cs-gpios = <&pio 45 GPIO_ACTIVE_LOW>;
+ mediatek,pad-select = <0>;
+ status = "okay";
+
+ tpm@0 {
+ compatible = "google,cr50";
+ reg = <0>;
+ interrupts-extended = <&pio 15 IRQ_TYPE_EDGE_RISING>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&gsc_int>;
+ spi-max-frequency = <1000000>;
+ };
+};
+
+&ssusb0 {
+ status = "okay";
+};
+
+&ssusb1 {
+ status = "okay";
+};
+
+&u3phy0 {
+ status = "okay";
+};
+
+&u3phy1 {
+ status = "okay";
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&usb_host0 {
+ vbus-supply = <&pp3300_s3>;
+ status = "okay";
+};
+
+&usb_host1 {
+ vbus-supply = <&usb_p1_vbus>;
+ status = "okay";
+};
+
+&watchdog {
+ mediatek,reset-by-toprgu;
+};
+
+#include <arm/cros-ec-keyboard.dtsi>
+#include <arm/cros-ec-sbs.dtsi>
diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi
index 2fec6fd1c1a7..4763ed5dc86c 100644
--- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi
@@ -931,11 +931,17 @@
power-domain@MT8186_POWER_DOMAIN_SSUSB {
reg = <MT8186_POWER_DOMAIN_SSUSB>;
+ clocks = <&topckgen CLK_TOP_USB_TOP>,
+ <&infracfg_ao CLK_INFRA_AO_SSUSB_TOP_REF>;
+ clock-names = "sys_ck", "ref_ck";
#power-domain-cells = <0>;
};
power-domain@MT8186_POWER_DOMAIN_SSUSB_P1 {
reg = <MT8186_POWER_DOMAIN_SSUSB_P1>;
+ clocks = <&infracfg_ao CLK_INFRA_AO_SSUSB_TOP_P1_SYS>,
+ <&infracfg_ao CLK_INFRA_AO_SSUSB_TOP_P1_REF>;
+ clock-names = "sys_ck", "ref_ck";
#power-domain-cells = <0>;
};
@@ -1061,7 +1067,7 @@
reg = <MT8186_POWER_DOMAIN_VENC>;
clocks = <&topckgen CLK_TOP_VENC>,
<&vencsys CLK_VENC_CKE1_VENC>;
- clock-names = "venc0", "larb";
+ clock-names = "venc0", "subsys-larb";
mediatek,infracfg = <&infracfg_ao>;
#power-domain-cells = <0>;
};
@@ -1530,8 +1536,9 @@
clocks = <&topckgen CLK_TOP_USB_TOP>,
<&infracfg_ao CLK_INFRA_AO_SSUSB_TOP_REF>,
<&infracfg_ao CLK_INFRA_AO_SSUSB_TOP_HCLK>,
- <&infracfg_ao CLK_INFRA_AO_ICUSB>;
- clock-names = "sys_ck", "ref_ck", "mcu_ck", "dma_ck";
+ <&infracfg_ao CLK_INFRA_AO_ICUSB>,
+ <&infracfg_ao CLK_INFRA_AO_SSUSB_TOP_XHCI>;
+ clock-names = "sys_ck", "ref_ck", "mcu_ck", "dma_ck", "xhci_ck";
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH 0>;
phys = <&u2port0 PHY_TYPE_USB2>;
power-domains = <&spm MT8186_POWER_DOMAIN_SSUSB>;
@@ -1595,8 +1602,9 @@
clocks = <&infracfg_ao CLK_INFRA_AO_SSUSB_TOP_P1_SYS>,
<&infracfg_ao CLK_INFRA_AO_SSUSB_TOP_P1_REF>,
<&infracfg_ao CLK_INFRA_AO_SSUSB_TOP_P1_HCLK>,
- <&clk26m>;
- clock-names = "sys_ck", "ref_ck", "mcu_ck", "dma_ck";
+ <&clk26m>,
+ <&infracfg_ao CLK_INFRA_AO_SSUSB_TOP_P1_XHCI>;
+ clock-names = "sys_ck", "ref_ck", "mcu_ck", "dma_ck", "xhci_ck";
interrupts = <GIC_SPI 331 IRQ_TYPE_LEVEL_HIGH 0>;
phys = <&u2port1 PHY_TYPE_USB2>, <&u3port1 PHY_TYPE_USB3>;
power-domains = <&spm MT8186_POWER_DOMAIN_SSUSB_P1>;
@@ -1672,6 +1680,10 @@
reg = <0x59c 0x4>;
bits = <0 3>;
};
+
+ socinfo-data1@7a0 {
+ reg = <0x7a0 0x4>;
+ };
};
mipi_tx0: dsi-phy@11cc0000 {
@@ -1959,6 +1971,43 @@
power-domains = <&spm MT8186_POWER_DOMAIN_IMG2>;
};
+ video_decoder: video-decoder@16000000 {
+ compatible = "mediatek,mt8186-vcodec-dec";
+ reg = <0 0x16000000 0 0x1000>;
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>;
+ iommus = <&iommu_mm IOMMU_PORT_L4_HW_VDEC_MC_EXT>;
+ mediatek,scp = <&scp>;
+
+ vcodec_core: video-codec@16025000 {
+ compatible = "mediatek,mtk-vcodec-core";
+ reg = <0 0x16025000 0 0x1000>;
+ interrupts = <GIC_SPI 343 IRQ_TYPE_LEVEL_HIGH 0>;
+ iommus = <&iommu_mm IOMMU_PORT_L4_HW_VDEC_MC_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_UFO_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_PP_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_PRED_RD_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_PRED_WR_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_PPWRAP_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_TILE_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_VLD_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_VLD2_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_AVC_MV_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_UFO_ENC_EXT>,
+ <&iommu_mm IOMMU_PORT_L4_HW_VDEC_RG_CTRL_DMA_EXT>;
+ clocks = <&topckgen CLK_TOP_VDEC>,
+ <&vdecsys CLK_VDEC_CKEN>,
+ <&vdecsys CLK_VDEC_LARB1_CKEN>,
+ <&topckgen CLK_TOP_UNIVPLL_D3>;
+ clock-names = "vdec-sel", "vdec-soc-vdec", "vdec", "vdec-top";
+ assigned-clocks = <&topckgen CLK_TOP_VDEC>;
+ assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D3>;
+ power-domains = <&spm MT8186_POWER_DOMAIN_VDEC>;
+ };
+ };
+
larb4: smi@1602e000 {
compatible = "mediatek,mt8186-smi-larb";
reg = <0 0x1602e000 0 0x1000>;
@@ -1993,6 +2042,40 @@
power-domains = <&spm MT8186_POWER_DOMAIN_VENC>;
};
+ venc: video-encoder@17020000 {
+ compatible = "mediatek,mt8186-vcodec-enc", "mediatek,mt8183-vcodec-enc";
+ reg = <0 0x17020000 0 0x2000>;
+ interrupts = <GIC_SPI 243 IRQ_TYPE_LEVEL_HIGH 0>;
+ iommus = <&iommu_mm IOMMU_PORT_L7_VENC_RCPU>,
+ <&iommu_mm IOMMU_PORT_L7_VENC_REC>,
+ <&iommu_mm IOMMU_PORT_L7_VENC_BSDMA>,
+ <&iommu_mm IOMMU_PORT_L7_VENC_SV_COMV>,
+ <&iommu_mm IOMMU_PORT_L7_VENC_RD_COMV>,
+ <&iommu_mm IOMMU_PORT_L7_VENC_CUR_LUMA>,
+ <&iommu_mm IOMMU_PORT_L7_VENC_CUR_CHROMA>,
+ <&iommu_mm IOMMU_PORT_L7_VENC_REF_LUMA>,
+ <&iommu_mm IOMMU_PORT_L7_VENC_REF_CHROMA>;
+ clocks = <&vencsys CLK_VENC_CKE1_VENC>;
+ clock-names = "venc_sel";
+ assigned-clocks = <&topckgen CLK_TOP_VENC>;
+ assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D3>;
+ power-domains = <&spm MT8186_POWER_DOMAIN_VENC>;
+ mediatek,scp = <&scp>;
+ };
+
+ jpgenc: jpeg-encoder@17030000 {
+ compatible = "mediatek,mt8186-jpgenc", "mediatek,mtk-jpgenc";
+ reg = <0 0x17030000 0 0x10000>;
+ interrupts = <GIC_SPI 245 IRQ_TYPE_LEVEL_HIGH 0>;
+ clocks = <&vencsys CLK_VENC_CKE2_JPGENC>;
+ clock-names = "jpgenc";
+ iommus = <&iommu_mm IOMMU_PORT_L7_JPGENC_Y_RDMA>,
+ <&iommu_mm IOMMU_PORT_L7_JPGENC_C_RDMA>,
+ <&iommu_mm IOMMU_PORT_L7_JPGENC_Q_TABLE>,
+ <&iommu_mm IOMMU_PORT_L7_JPGENC_BSDMA>;
+ power-domains = <&spm MT8186_POWER_DOMAIN_VENC>;
+ };
+
camsys: clock-controller@1a000000 {
compatible = "mediatek,mt8186-camsys";
reg = <0 0x1a000000 0 0x1000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
index f2281250ac35..9b738f6a5d21 100644
--- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
@@ -1332,14 +1332,11 @@
spi-max-frequency = <3000000>;
pinctrl-names = "default";
pinctrl-0 = <&cros_ec_int>;
+ wakeup-source;
#address-cells = <1>;
#size-cells = <0>;
- base_detection: cbas {
- compatible = "google,cros-cbas";
- };
-
cros_ec_pwm: pwm {
compatible = "google,cros-ec-pwm";
#pwm-cells = <1>;
@@ -1402,7 +1399,7 @@
pinctrl-names = "default";
pinctrl-0 = <&spi5_pins>;
- cr50@0 {
+ tpm@0 {
compatible = "google,cr50";
reg = <0>;
interrupts-extended = <&pio 171 IRQ_TYPE_EDGE_RISING>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi
index 6dd32dbfb832..05e401670bce 100644
--- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi
@@ -1164,6 +1164,14 @@
#address-cells = <1>;
#size-cells = <1>;
+ socinfo-data1@44 {
+ reg = <0x044 0x4>;
+ };
+
+ socinfo-data2@50 {
+ reg = <0x050 0x4>;
+ };
+
lvts_e_data1: data1@1c0 {
reg = <0x1c0 0x58>;
};
@@ -1814,7 +1822,7 @@
mediatek,scp = <&scp>;
power-domains = <&spm MT8192_POWER_DOMAIN_VENC>;
clocks = <&vencsys CLK_VENC_SET1_VENC>;
- clock-names = "venc-set1";
+ clock-names = "venc_sel";
assigned-clocks = <&topckgen CLK_TOP_VENC_SEL>;
assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D4>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts
index 2d5e8f371b6d..a82d716f10d4 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts
@@ -23,3 +23,7 @@
&ts_10 {
status = "okay";
};
+
+&watchdog {
+ /delete-property/ mediatek,disable-extrst;
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts
index 2586c32ce6e6..2fe20e0dad83 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts
@@ -43,3 +43,7 @@
&ts_10 {
status = "okay";
};
+
+&watchdog {
+ /delete-property/ mediatek,disable-extrst;
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts
index f54f9477b99d..dd294ca98194 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts
@@ -44,3 +44,7 @@
&ts_10 {
status = "okay";
};
+
+&watchdog {
+ /delete-property/ mediatek,disable-extrst;
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
index 3c6079edda19..f94c07f8b933 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
@@ -1149,6 +1149,7 @@
pinctrl-names = "default";
pinctrl-0 = <&cros_ec_int>;
spi-max-frequency = <3000000>;
+ wakeup-source;
keyboard-backlight {
compatible = "google,cros-kbd-led-backlight";
@@ -1291,11 +1292,32 @@
status = "okay";
};
+/*
+ * For the USB Type-C ports the role and alternate modes switching is
+ * done by the EC so we set dr_mode to host to avoid interfering.
+ */
+&ssusb0 {
+ dr_mode = "host";
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+};
+
+&ssusb2 {
+ dr_mode = "host";
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+};
+
+&ssusb3 {
+ dr_mode = "host";
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+};
+
&xhci0 {
status = "okay";
rx-fifo-depth = <3072>;
- vusb33-supply = <&mt6359_vusb_ldo_reg>;
vbus-supply = <&usb_vbus>;
};
@@ -1309,8 +1331,6 @@
&xhci2 {
status = "okay";
-
- vusb33-supply = <&mt6359_vusb_ldo_reg>;
vbus-supply = <&usb_vbus>;
};
@@ -1319,7 +1339,6 @@
/* MT7921's USB Bluetooth has issues with USB2 LPM */
usb2-lpm-disable;
- vusb33-supply = <&mt6359_vusb_ldo_reg>;
vbus-supply = <&usb_vbus>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
index 69c7f3954ae5..b82f7176b4a1 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
@@ -128,6 +128,7 @@
compatible = "mediatek,mt6360";
reg = <0x34>;
interrupt-controller;
+ #interrupt-cells = <1>;
interrupts-extended = <&pio 101 IRQ_TYPE_EDGE_FALLING>;
interrupt-names = "IRQB";
@@ -528,8 +529,22 @@
status = "okay";
};
-&xhci0 {
+&ssusb0 {
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+};
+
+&ssusb2 {
vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+};
+
+&ssusb3 {
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+};
+
+&xhci0 {
vbus-supply = <&otg_vbus_regulator>;
status = "okay";
};
@@ -540,11 +555,9 @@
};
&xhci2 {
- vusb33-supply = <&mt6359_vusb_ldo_reg>;
status = "okay";
};
&xhci3 {
- vusb33-supply = <&mt6359_vusb_ldo_reg>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-evb.dts b/arch/arm64/boot/dts/mediatek/mt8195-evb.dts
index 690dc7717f2c..341b6e074139 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8195-evb.dts
@@ -160,6 +160,18 @@
status = "okay";
};
+&ssusb0 {
+ status = "okay";
+};
+
+&ssusb2 {
+ status = "okay";
+};
+
+&ssusb3 {
+ status = "okay";
+};
+
&xhci0 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
index b9101662ce40..ea6dc220e1cc 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
@@ -1347,29 +1347,40 @@
};
};
- xhci0: usb@11200000 {
- compatible = "mediatek,mt8195-xhci",
- "mediatek,mtk-xhci";
- reg = <0 0x11200000 0 0x1000>,
- <0 0x11203e00 0 0x0100>;
+ ssusb0: usb@11201000 {
+ compatible = "mediatek,mt8195-mtu3", "mediatek,mtu3";
+ reg = <0 0x11201000 0 0x2dff>, <0 0x11203e00 0 0x0100>;
reg-names = "mac", "ippc";
- interrupts = <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH 0>;
- phys = <&u2port0 PHY_TYPE_USB2>,
- <&u3port0 PHY_TYPE_USB3>;
- assigned-clocks = <&topckgen CLK_TOP_USB_TOP>,
- <&topckgen CLK_TOP_SSUSB_XHCI>;
- assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D5_D4>,
- <&topckgen CLK_TOP_UNIVPLL_D5_D4>;
+ ranges = <0 0 0 0x11200000 0 0x3f00>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH 0>;
clocks = <&infracfg_ao CLK_INFRA_AO_SSUSB>,
<&topckgen CLK_TOP_SSUSB_REF>,
- <&apmixedsys CLK_APMIXED_USB1PLL>,
- <&clk26m>,
<&infracfg_ao CLK_INFRA_AO_SSUSB_XHCI>;
- clock-names = "sys_ck", "ref_ck", "mcu_ck", "dma_ck",
- "xhci_ck";
- mediatek,syscon-wakeup = <&pericfg 0x400 103>;
+ clock-names = "sys_ck", "ref_ck", "mcu_ck";
+ phys = <&u2port0 PHY_TYPE_USB2>, <&u3port0 PHY_TYPE_USB3>;
wakeup-source;
+ mediatek,syscon-wakeup = <&pericfg 0x400 103>;
status = "disabled";
+
+ xhci0: usb@0 {
+ compatible = "mediatek,mt8195-xhci", "mediatek,mtk-xhci";
+ reg = <0 0 0 0x1000>;
+ reg-names = "mac";
+ interrupts = <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH 0>;
+ assigned-clocks = <&topckgen CLK_TOP_USB_TOP>,
+ <&topckgen CLK_TOP_SSUSB_XHCI>;
+ assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D5_D4>,
+ <&topckgen CLK_TOP_UNIVPLL_D5_D4>;
+ clocks = <&infracfg_ao CLK_INFRA_AO_SSUSB>,
+ <&topckgen CLK_TOP_SSUSB_REF>,
+ <&apmixedsys CLK_APMIXED_USB1PLL>,
+ <&clk26m>,
+ <&infracfg_ao CLK_INFRA_AO_SSUSB_XHCI>;
+ clock-names = "sys_ck", "ref_ck", "mcu_ck", "dma_ck", "xhci_ck";
+ status = "disabled";
+ };
};
mmc0: mmc@11230000 {
@@ -1450,52 +1461,68 @@
status = "disabled";
};
- xhci2: usb@112a0000 {
- compatible = "mediatek,mt8195-xhci",
- "mediatek,mtk-xhci";
- reg = <0 0x112a0000 0 0x1000>,
- <0 0x112a3e00 0 0x0100>;
+ ssusb2: usb@112a1000 {
+ compatible = "mediatek,mt8195-mtu3", "mediatek,mtu3";
+ reg = <0 0x112a1000 0 0x2dff>, <0 0x112a3e00 0 0x0100>;
reg-names = "mac", "ippc";
- interrupts = <GIC_SPI 533 IRQ_TYPE_LEVEL_HIGH 0>;
- phys = <&u2port2 PHY_TYPE_USB2>;
- assigned-clocks = <&topckgen CLK_TOP_USB_TOP_2P>,
- <&topckgen CLK_TOP_SSUSB_XHCI_2P>;
- assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D5_D4>,
- <&topckgen CLK_TOP_UNIVPLL_D5_D4>;
+ ranges = <0 0 0 0x112a0000 0 0x3f00>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupts = <GIC_SPI 532 IRQ_TYPE_LEVEL_HIGH 0>;
+ assigned-clocks = <&topckgen CLK_TOP_USB_TOP_2P>;
+ assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D5_D4>;
clocks = <&pericfg_ao CLK_PERI_AO_SSUSB_2P_BUS>,
<&topckgen CLK_TOP_SSUSB_P2_REF>,
- <&clk26m>,
- <&clk26m>,
<&pericfg_ao CLK_PERI_AO_SSUSB_2P_XHCI>;
- clock-names = "sys_ck", "ref_ck", "mcu_ck", "dma_ck",
- "xhci_ck";
- mediatek,syscon-wakeup = <&pericfg 0x400 105>;
+ clock-names = "sys_ck", "ref_ck", "mcu_ck";
+ phys = <&u2port2 PHY_TYPE_USB2>;
wakeup-source;
+ mediatek,syscon-wakeup = <&pericfg 0x400 105>;
status = "disabled";
+
+ xhci2: usb@0 {
+ compatible = "mediatek,mt8195-xhci", "mediatek,mtk-xhci";
+ reg = <0 0 0 0x1000>;
+ reg-names = "mac";
+ interrupts = <GIC_SPI 533 IRQ_TYPE_LEVEL_HIGH 0>;
+ assigned-clocks = <&topckgen CLK_TOP_SSUSB_XHCI_2P>;
+ assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D5_D4>;
+ clocks = <&pericfg_ao CLK_PERI_AO_SSUSB_2P_XHCI>;
+ clock-names = "sys_ck";
+ status = "disabled";
+ };
};
- xhci3: usb@112b0000 {
- compatible = "mediatek,mt8195-xhci",
- "mediatek,mtk-xhci";
- reg = <0 0x112b0000 0 0x1000>,
- <0 0x112b3e00 0 0x0100>;
+ ssusb3: usb@112b1000 {
+ compatible = "mediatek,mt8195-mtu3", "mediatek,mtu3";
+ reg = <0 0x112b1000 0 0x2dff>, <0 0x112b3e00 0 0x0100>;
reg-names = "mac", "ippc";
- interrupts = <GIC_SPI 536 IRQ_TYPE_LEVEL_HIGH 0>;
- phys = <&u2port3 PHY_TYPE_USB2>;
- assigned-clocks = <&topckgen CLK_TOP_USB_TOP_3P>,
- <&topckgen CLK_TOP_SSUSB_XHCI_3P>;
- assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D5_D4>,
- <&topckgen CLK_TOP_UNIVPLL_D5_D4>;
+ ranges = <0 0 0 0x112b0000 0 0x3f00>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupts = <GIC_SPI 535 IRQ_TYPE_LEVEL_HIGH 0>;
+ assigned-clocks = <&topckgen CLK_TOP_USB_TOP_3P>;
+ assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D5_D4>;
clocks = <&pericfg_ao CLK_PERI_AO_SSUSB_3P_BUS>,
<&topckgen CLK_TOP_SSUSB_P3_REF>,
- <&clk26m>,
- <&clk26m>,
<&pericfg_ao CLK_PERI_AO_SSUSB_3P_XHCI>;
- clock-names = "sys_ck", "ref_ck", "mcu_ck", "dma_ck",
- "xhci_ck";
- mediatek,syscon-wakeup = <&pericfg 0x400 106>;
+ clock-names = "sys_ck", "ref_ck", "mcu_ck";
+ phys = <&u2port3 PHY_TYPE_USB2>;
wakeup-source;
+ mediatek,syscon-wakeup = <&pericfg 0x400 106>;
status = "disabled";
+
+ xhci3: usb@0 {
+ compatible = "mediatek,mt8195-xhci", "mediatek,mtk-xhci";
+ reg = <0 0 0 0x1000>;
+ reg-names = "mac";
+ interrupts = <GIC_SPI 536 IRQ_TYPE_LEVEL_HIGH 0>;
+ assigned-clocks = <&topckgen CLK_TOP_SSUSB_XHCI_3P>;
+ assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D5_D4>;
+ clocks = <&pericfg_ao CLK_PERI_AO_SSUSB_3P_XHCI>;
+ clock-names = "sys_ck";
+ status = "disabled";
+ };
};
pcie0: pcie@112f0000 {
@@ -1701,6 +1728,9 @@
svs_calib_data: svs-calib@580 {
reg = <0x580 0x64>;
};
+ socinfo-data1@7a0 {
+ reg = <0x7a0 0x4>;
+ };
};
u3phy2: t-phy@11c40000 {
diff --git a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts
index 7fc515a07c65..1558649f633c 100644
--- a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts
@@ -880,6 +880,21 @@
status = "disabled";
};
+&ssusb0 {
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+};
+
+&ssusb2 {
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+};
+
+&ssusb3 {
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+};
+
&xhci0 {
status = "okay";
};
@@ -890,11 +905,9 @@
};
&xhci2 {
- vusb33-supply = <&mt6359_vusb_ldo_reg>;
status = "okay";
};
&xhci3 {
- vusb33-supply = <&mt6359_vusb_ldo_reg>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8395-radxa-nio-12l.dts b/arch/arm64/boot/dts/mediatek/mt8395-radxa-nio-12l.dts
new file mode 100644
index 000000000000..e5d9b671a405
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8395-radxa-nio-12l.dts
@@ -0,0 +1,825 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright (C) 2023 Radxa Limited
+ * Copyright (C) 2024 Collabora Ltd.
+ * AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ */
+
+#include "mt8195.dtsi"
+#include "mt6359.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/pinctrl/mt8195-pinfunc.h>
+#include <dt-bindings/regulator/mediatek,mt6360-regulator.h>
+#include <dt-bindings/spmi/spmi.h>
+#include <dt-bindings/usb/pd.h>
+
+/ {
+ model = "Radxa NIO 12L";
+ chassis-type = "embedded";
+ compatible = "radxa,nio-12l", "mediatek,mt8395", "mediatek,mt8195";
+
+ aliases {
+ i2c0 = &i2c2;
+ i2c1 = &i2c3;
+ i2c2 = &i2c4;
+ i2c3 = &i2c0;
+ i2c4 = &i2c1;
+ ethernet0 = &eth;
+ serial0 = &uart0;
+ serial1 = &uart1;
+ spi0 = &spi1;
+ spi1 = &spi2;
+ };
+
+ chosen {
+ stdout-path = "serial0:921600n8";
+ };
+
+ firmware {
+ optee {
+ compatible = "linaro,optee-tz";
+ method = "smc";
+ };
+ };
+
+ memory@40000000 {
+ device_type = "memory";
+ reg = <0 0x40000000 0x1 0x0>;
+ };
+
+ wifi_vreg: regulator-wifi-3v3-en {
+ compatible = "regulator-fixed";
+ regulator-name = "wifi_3v3_en";
+ regulator-always-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ enable-active-high;
+ gpio = <&pio 67 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&wifi_vreg_pins>;
+ vin-supply = <&vsys>;
+ };
+
+ /* system wide switching 5.0V power rail */
+ vsys: regulator-vsys {
+ compatible = "regulator-fixed";
+ regulator-name = "vsys";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v0_vsys>;
+ };
+
+ vsys_buck: regulator-vsys-buck {
+ compatible = "regulator-fixed";
+ regulator-name = "vsys_buck";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v0_vsys>;
+ };
+
+ /* Rail from power-only "TYPE C DC" port */
+ vcc5v0_vsys: regulator-vcc5v0-sys {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_sys";
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ /*
+ * 12 MiB reserved for OP-TEE (BL32)
+ * +-----------------------+ 0x43e0_0000
+ * | SHMEM 2MiB |
+ * +-----------------------+ 0x43c0_0000
+ * | | TA_RAM 8MiB |
+ * + TZDRAM +--------------+ 0x4340_0000
+ * | | TEE_RAM 2MiB |
+ * +-----------------------+ 0x4320_0000
+ */
+ optee_reserved: optee@43200000 {
+ reg = <0 0x43200000 0 0xc00000>;
+ no-map;
+ };
+
+ scp_mem: memory@50000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x50000000 0 0x2900000>;
+ no-map;
+ };
+
+ vpu_mem: memory@53000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x53000000 0 0x1400000>; /* 20 MB */
+ };
+
+ /* 2 MiB reserved for ARM Trusted Firmware (BL31) */
+ bl31_secmon_mem: memory@54600000 {
+ reg = <0 0x54600000 0x0 0x200000>;
+ no-map;
+ };
+
+ afe_mem: memory@60000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x60000000 0 0x1100000>;
+ no-map;
+ };
+
+ apu_mem: memory@62000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x62000000 0 0x1400000>; /* 20 MB */
+ };
+ };
+};
+
+&eth {
+ phy-mode = "rgmii-rxid";
+ phy-handle = <&rgmii_phy>;
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&eth_default_pins>;
+ pinctrl-1 = <&eth_sleep_pins>;
+ mediatek,tx-delay-ps = <2030>;
+ mediatek,mac-wol;
+ snps,reset-gpio = <&pio 93 GPIO_ACTIVE_HIGH>;
+ snps,reset-delays-us = <0 20000 100000>;
+ status = "okay";
+
+ mdio {
+ rgmii_phy: ethernet-phy@1 {
+ compatible = "ethernet-phy-id001c.c916";
+ reg = <0x1>;
+ };
+ };
+};
+
+&gpu {
+ mali-supply = <&mt6315_7_vbuck1>;
+ status = "okay";
+};
+
+&i2c2 {
+ clock-frequency = <400000>;
+ pinctrl-0 = <&i2c2_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ typec-mux@48 {
+ compatible = "ite,it5205";
+ reg = <0x48>;
+
+ mode-switch;
+ orientation-switch;
+
+ vcc-supply = <&mt6359_vibr_ldo_reg>;
+
+ port {
+ it5205_sbu_mux: endpoint {
+ remote-endpoint = <&typec_con_mux>;
+ };
+ };
+ };
+};
+
+&i2c4 {
+ clock-frequency = <400000>;
+ pinctrl-0 = <&i2c4_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ /* I2C4 exposed at 39-pins MIPI-LCD connector */
+};
+
+&i2c6 {
+ clock-frequency = <400000>;
+ pinctrl-0 = <&i2c6_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ mt6360: pmic@34 {
+ compatible = "mediatek,mt6360";
+ reg = <0x34>;
+ interrupts-extended = <&pio 101 IRQ_TYPE_EDGE_FALLING>;
+ interrupt-names = "IRQB";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ pinctrl-0 = <&mt6360_pins>;
+
+ charger {
+ compatible = "mediatek,mt6360-chg";
+ richtek,vinovp-microvolt = <14500000>;
+
+ otg_vbus_regulator: usb-otg-vbus-regulator {
+ regulator-name = "usb-otg-vbus";
+ regulator-min-microvolt = <4425000>;
+ regulator-max-microvolt = <5825000>;
+ };
+ };
+
+ regulator {
+ compatible = "mediatek,mt6360-regulator";
+ LDO_VIN1-supply = <&vsys_buck>;
+ LDO_VIN3-supply = <&mt6360_buck2>;
+
+ mt6360_buck1: buck1 {
+ regulator-name = "emi_vdd2";
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1300000>;
+ regulator-allowed-modes = <MT6360_OPMODE_NORMAL
+ MT6360_OPMODE_LP
+ MT6360_OPMODE_ULP>;
+ regulator-always-on;
+ };
+
+ mt6360_buck2: buck2 {
+ regulator-name = "emi_vddq";
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1300000>;
+ regulator-allowed-modes = <MT6360_OPMODE_NORMAL
+ MT6360_OPMODE_LP
+ MT6360_OPMODE_ULP>;
+ regulator-always-on;
+ };
+
+ mt6360_ldo1: ldo1 {
+ regulator-name = "ext_lcd_3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-allowed-modes = <MT6360_OPMODE_NORMAL
+ MT6360_OPMODE_LP>;
+ regulator-always-on;
+ };
+
+ mt6360_ldo2: ldo2 {
+ regulator-name = "panel1_p1v8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-allowed-modes = <MT6360_OPMODE_NORMAL
+ MT6360_OPMODE_LP>;
+ };
+
+ mt6360_ldo3: ldo3 {
+ regulator-name = "vmc_pmu";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3600000>;
+ regulator-allowed-modes = <MT6360_OPMODE_NORMAL
+ MT6360_OPMODE_LP>;
+ };
+
+ mt6360_ldo5: ldo5 {
+ regulator-name = "vmch_pmu";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-allowed-modes = <MT6360_OPMODE_NORMAL
+ MT6360_OPMODE_LP>;
+ regulator-always-on;
+ };
+
+ mt6360_ldo6: ldo6 {
+ regulator-name = "mt6360_ldo6"; /* Test point */
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <2100000>;
+ regulator-allowed-modes = <MT6360_OPMODE_NORMAL
+ MT6360_OPMODE_LP>;
+ };
+
+ mt6360_ldo7: ldo7 {
+ regulator-name = "emi_vmddr_en";
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <2100000>;
+ regulator-allowed-modes = <MT6360_OPMODE_NORMAL
+ MT6360_OPMODE_LP>;
+ regulator-always-on;
+ };
+ };
+
+ typec {
+ compatible = "mediatek,mt6360-tcpc";
+ interrupts-extended = <&pio 100 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-names = "PD_IRQB";
+
+ connector {
+ compatible = "usb-c-connector";
+ label = "USB-C";
+ data-role = "dual";
+ op-sink-microwatt = <10000000>;
+ power-role = "dual";
+ try-power-role = "sink";
+
+ source-pdos = <PDO_FIXED(5000, 1000,
+ PDO_FIXED_DUAL_ROLE |
+ PDO_FIXED_DATA_SWAP)>;
+ sink-pdos = <PDO_FIXED(5000, 3000,
+ PDO_FIXED_DUAL_ROLE |
+ PDO_FIXED_DATA_SWAP)>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ typec_con_hs: endpoint {
+ remote-endpoint = <&mtu3_hs0_role_sw>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+ typec_con_mux: endpoint {
+ remote-endpoint = <&it5205_sbu_mux>;
+ };
+ };
+ };
+ };
+ };
+ };
+};
+
+/* MMC0 Controller: eMMC (HS400). Power lines are shared with UFS! */
+&mmc0 {
+ pinctrl-names = "default", "state_uhs";
+ pinctrl-0 = <&mmc0_default_pins>;
+ pinctrl-1 = <&mmc0_uhs_pins>;
+ bus-width = <8>;
+ max-frequency = <200000000>;
+ hs400-ds-delay = <0x14c11>;
+ cap-mmc-highspeed;
+ cap-mmc-hw-reset;
+ mmc-hs200-1_8v;
+ mmc-hs400-1_8v;
+ no-sdio;
+ no-sd;
+ non-removable;
+ vmmc-supply = <&mt6359_vemc_1_ldo_reg>;
+ vqmmc-supply = <&mt6359_vufs_ldo_reg>;
+ status = "okay";
+};
+
+/* MMC1 Controller: MicroSD card slot */
+&mmc1 {
+ pinctrl-names = "default", "state_uhs";
+ pinctrl-0 = <&mmc1_default_pins>, <&mmc1_pins_detect>;
+ pinctrl-1 = <&mmc1_default_pins>;
+ bus-width = <4>;
+ max-frequency = <200000000>;
+ cap-sd-highspeed;
+ cd-gpios = <&pio 129 GPIO_ACTIVE_LOW>;
+ no-mmc;
+ no-sdio;
+ sd-uhs-sdr50;
+ sd-uhs-sdr104;
+ vmmc-supply = <&mt6360_ldo5>;
+ vqmmc-supply = <&mt6360_ldo3>;
+ status = "okay";
+};
+
+&mt6359_vaud18_ldo_reg {
+ regulator-always-on;
+};
+
+&mt6359_vbbck_ldo_reg {
+ regulator-always-on;
+};
+
+/* For USB Hub */
+&mt6359_vcamio_ldo_reg {
+ regulator-always-on;
+};
+
+&mt6359_vcn33_2_bt_ldo_reg {
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+};
+
+&mt6359_vcore_buck_reg {
+ regulator-always-on;
+};
+
+&mt6359_vgpu11_buck_reg {
+ regulator-always-on;
+};
+
+&mt6359_vproc1_buck_reg {
+ regulator-always-on;
+};
+
+&mt6359_vproc2_buck_reg {
+ regulator-always-on;
+};
+
+&mt6359_vpu_buck_reg {
+ regulator-always-on;
+};
+
+&mt6359_vrf12_ldo_reg {
+ regulator-always-on;
+};
+
+&mt6359_vsram_md_ldo_reg {
+ regulator-always-on;
+};
+
+/* for GPU SRAM */
+&mt6359_vsram_others_ldo_reg {
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+};
+
+&pio {
+ eth_default_pins: eth-default-pins {
+ pins-cc {
+ pinmux = <PINMUX_GPIO85__FUNC_GBE_TXC>,
+ <PINMUX_GPIO86__FUNC_GBE_RXC>,
+ <PINMUX_GPIO87__FUNC_GBE_RXDV>,
+ <PINMUX_GPIO88__FUNC_GBE_TXEN>;
+ drive-strength = <8>;
+ };
+
+ pins-mdio {
+ pinmux = <PINMUX_GPIO89__FUNC_GBE_MDC>,
+ <PINMUX_GPIO90__FUNC_GBE_MDIO>;
+ input-enable;
+ };
+
+ pins-power {
+ pinmux = <PINMUX_GPIO91__FUNC_GPIO91>,
+ <PINMUX_GPIO92__FUNC_GPIO92>;
+ output-high;
+ };
+
+ pins-rst {
+ pinmux = <PINMUX_GPIO93__FUNC_GPIO93>;
+ };
+
+ pins-rxd {
+ pinmux = <PINMUX_GPIO81__FUNC_GBE_RXD3>,
+ <PINMUX_GPIO82__FUNC_GBE_RXD2>,
+ <PINMUX_GPIO83__FUNC_GBE_RXD1>,
+ <PINMUX_GPIO84__FUNC_GBE_RXD0>;
+ };
+
+ pins-txd {
+ pinmux = <PINMUX_GPIO77__FUNC_GBE_TXD3>,
+ <PINMUX_GPIO78__FUNC_GBE_TXD2>,
+ <PINMUX_GPIO79__FUNC_GBE_TXD1>,
+ <PINMUX_GPIO80__FUNC_GBE_TXD0>;
+ drive-strength = <8>;
+ };
+ };
+
+ eth_sleep_pins: eth-sleep-pins {
+ pins-cc {
+ pinmux = <PINMUX_GPIO85__FUNC_GPIO85>,
+ <PINMUX_GPIO86__FUNC_GPIO86>,
+ <PINMUX_GPIO87__FUNC_GPIO87>,
+ <PINMUX_GPIO88__FUNC_GPIO88>;
+ };
+
+ pins-mdio {
+ pinmux = <PINMUX_GPIO89__FUNC_GPIO89>,
+ <PINMUX_GPIO90__FUNC_GPIO90>;
+ bias-disable;
+ input-disable;
+ };
+
+ pins-rxd {
+ pinmux = <PINMUX_GPIO81__FUNC_GPIO81>,
+ <PINMUX_GPIO82__FUNC_GPIO82>,
+ <PINMUX_GPIO83__FUNC_GPIO83>,
+ <PINMUX_GPIO84__FUNC_GPIO84>;
+ };
+
+ pins-txd {
+ pinmux = <PINMUX_GPIO77__FUNC_GPIO77>,
+ <PINMUX_GPIO78__FUNC_GPIO78>,
+ <PINMUX_GPIO79__FUNC_GPIO79>,
+ <PINMUX_GPIO80__FUNC_GPIO80>;
+ };
+ };
+
+ i2c2_pins: i2c2-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO12__FUNC_SDA2>,
+ <PINMUX_GPIO13__FUNC_SCL2>;
+ bias-pull-up = <MTK_PULL_SET_RSEL_111>;
+ drive-strength = <6>;
+ drive-strength-microamp = <1000>;
+ };
+ };
+
+ i2c4_pins: i2c4-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO16__FUNC_SDA4>,
+ <PINMUX_GPIO17__FUNC_SCL4>;
+ bias-pull-up = <MTK_PULL_SET_RSEL_111>;
+ drive-strength-microamp = <1000>;
+ };
+ };
+
+ i2c6_pins: i2c6-pins {
+ pins {
+ pinmux = <PINMUX_GPIO25__FUNC_SDA6>,
+ <PINMUX_GPIO26__FUNC_SCL6>;
+ bias-pull-up = <MTK_PULL_SET_RSEL_111>;
+ };
+ };
+
+ mmc0_default_pins: mmc0-default-pins {
+ pins-clk {
+ pinmux = <PINMUX_GPIO122__FUNC_MSDC0_CLK>;
+ bias-pull-down = <MTK_PUPD_SET_R1R0_10>;
+ drive-strength = <6>;
+ };
+
+ pins-cmd-dat {
+ pinmux = <PINMUX_GPIO126__FUNC_MSDC0_DAT0>,
+ <PINMUX_GPIO125__FUNC_MSDC0_DAT1>,
+ <PINMUX_GPIO124__FUNC_MSDC0_DAT2>,
+ <PINMUX_GPIO123__FUNC_MSDC0_DAT3>,
+ <PINMUX_GPIO119__FUNC_MSDC0_DAT4>,
+ <PINMUX_GPIO118__FUNC_MSDC0_DAT5>,
+ <PINMUX_GPIO117__FUNC_MSDC0_DAT6>,
+ <PINMUX_GPIO116__FUNC_MSDC0_DAT7>,
+ <PINMUX_GPIO121__FUNC_MSDC0_CMD>;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ drive-strength = <6>;
+ input-enable;
+ };
+
+ pins-rst {
+ pinmux = <PINMUX_GPIO120__FUNC_MSDC0_RSTB>;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ drive-strength = <6>;
+ };
+ };
+
+ mmc0_uhs_pins: mmc0-uhs-pins {
+ pins-clk {
+ pinmux = <PINMUX_GPIO122__FUNC_MSDC0_CLK>;
+ bias-pull-down = <MTK_PUPD_SET_R1R0_10>;
+ drive-strength = <8>;
+ };
+
+ pins-cmd-dat {
+ pinmux = <PINMUX_GPIO126__FUNC_MSDC0_DAT0>,
+ <PINMUX_GPIO125__FUNC_MSDC0_DAT1>,
+ <PINMUX_GPIO124__FUNC_MSDC0_DAT2>,
+ <PINMUX_GPIO123__FUNC_MSDC0_DAT3>,
+ <PINMUX_GPIO119__FUNC_MSDC0_DAT4>,
+ <PINMUX_GPIO118__FUNC_MSDC0_DAT5>,
+ <PINMUX_GPIO117__FUNC_MSDC0_DAT6>,
+ <PINMUX_GPIO116__FUNC_MSDC0_DAT7>,
+ <PINMUX_GPIO121__FUNC_MSDC0_CMD>;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ drive-strength = <8>;
+ input-enable;
+ };
+
+ pins-ds {
+ pinmux = <PINMUX_GPIO127__FUNC_MSDC0_DSL>;
+ bias-pull-down = <MTK_PUPD_SET_R1R0_10>;
+ drive-strength = <8>;
+ };
+
+ pins-rst {
+ pinmux = <PINMUX_GPIO120__FUNC_MSDC0_RSTB>;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ drive-strength = <8>;
+ };
+ };
+
+ mmc1_default_pins: mmc1-default-pins {
+ pins-clk {
+ pinmux = <PINMUX_GPIO111__FUNC_MSDC1_CLK>;
+ bias-pull-down = <MTK_PUPD_SET_R1R0_10>;
+ drive-strength = <8>;
+ };
+
+ pins-cmd-dat {
+ pinmux = <PINMUX_GPIO110__FUNC_MSDC1_CMD>,
+ <PINMUX_GPIO112__FUNC_MSDC1_DAT0>,
+ <PINMUX_GPIO113__FUNC_MSDC1_DAT1>,
+ <PINMUX_GPIO114__FUNC_MSDC1_DAT2>,
+ <PINMUX_GPIO115__FUNC_MSDC1_DAT3>;
+ bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+ drive-strength = <8>;
+ input-enable;
+ };
+ };
+
+ mmc1_pins_detect: mmc1-detect-pins {
+ pins-insert {
+ pinmux = <PINMUX_GPIO129__FUNC_GPIO129>;
+ bias-pull-up;
+ };
+ };
+
+ mt6360_pins: mt6360-pins {
+ pins-irq {
+ pinmux = <PINMUX_GPIO100__FUNC_GPIO100>,
+ <PINMUX_GPIO101__FUNC_GPIO101>;
+ input-enable;
+ bias-pull-up;
+ };
+ };
+
+ pcie0_default_pins: pcie0-default-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO19__FUNC_WAKEN>,
+ <PINMUX_GPIO20__FUNC_PERSTN>,
+ <PINMUX_GPIO21__FUNC_CLKREQN>;
+ bias-pull-up;
+ };
+ };
+
+ pcie1_default_pins: pcie1-default-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO0__FUNC_PERSTN_1>,
+ <PINMUX_GPIO1__FUNC_CLKREQN_1>,
+ <PINMUX_GPIO2__FUNC_WAKEN_1>;
+ bias-disable;
+ };
+ };
+
+ spi1_pins: spi1-default-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO136__FUNC_SPIM1_CSB>,
+ <PINMUX_GPIO137__FUNC_SPIM1_CLK>,
+ <PINMUX_GPIO138__FUNC_SPIM1_MO>,
+ <PINMUX_GPIO139__FUNC_SPIM1_MI>;
+ bias-disable;
+ };
+ };
+
+ spi2_pins: spi2-default-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO140__FUNC_SPIM2_CSB>,
+ <PINMUX_GPIO141__FUNC_SPIM2_CLK>,
+ <PINMUX_GPIO142__FUNC_SPIM2_MO>,
+ <PINMUX_GPIO143__FUNC_SPIM2_MI>;
+ bias-disable;
+ };
+ };
+
+ uart0_pins: uart0-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO98__FUNC_UTXD0>,
+ <PINMUX_GPIO99__FUNC_URXD0>;
+ };
+ };
+
+ uart1_pins: uart1-pins {
+ pins-bus {
+ pinmux = <PINMUX_GPIO102__FUNC_UTXD1>,
+ <PINMUX_GPIO103__FUNC_URXD1>;
+ };
+ };
+
+ wifi_vreg_pins: wifi-vreg-pins {
+ pins-wifi-pmu-en {
+ pinmux = <PINMUX_GPIO65__FUNC_GPIO65>;
+ output-high;
+ };
+
+ pins-wifi-vreg-en {
+ pinmux = <PINMUX_GPIO67__FUNC_GPIO67>;
+ };
+ };
+};
+
+&pcie0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie0_default_pins>;
+ status = "okay";
+};
+
+&pcie1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie1_default_pins>;
+ status = "okay";
+};
+
+&pmic {
+ interrupts-extended = <&pio 222 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&scp {
+ memory-region = <&scp_mem>;
+ status = "okay";
+};
+
+&spi1 {
+ /* Exposed at 40 pin connector */
+ pinctrl-0 = <&spi1_pins>;
+ pinctrl-names = "default";
+ mediatek,pad-select = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "okay";
+};
+
+&spi2 {
+ /* Exposed at 40 pin connector */
+ pinctrl-0 = <&spi2_pins>;
+ pinctrl-names = "default";
+ mediatek,pad-select = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "okay";
+};
+
+&spmi {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ mt6315_6: pmic@6 {
+ compatible = "mediatek,mt6315-regulator";
+ reg = <0x6 SPMI_USID>;
+
+ regulators {
+ mt6315_6_vbuck1: vbuck1 {
+ regulator-compatible = "vbuck1";
+ regulator-name = "Vbcpu";
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1193750>;
+ regulator-enable-ramp-delay = <256>;
+ regulator-allowed-modes = <0 1 2>;
+ regulator-always-on;
+ };
+ };
+ };
+
+ mt6315_7: pmic@7 {
+ compatible = "mediatek,mt6315-regulator";
+ reg = <0x7 SPMI_USID>;
+
+ regulators {
+ mt6315_7_vbuck1: vbuck1 {
+ regulator-compatible = "vbuck1";
+ regulator-name = "Vgpu";
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1193750>;
+ regulator-enable-ramp-delay = <256>;
+ regulator-allowed-modes = <0 1 2>;
+ };
+ };
+ };
+};
+
+&uart0 {
+ /* Exposed at 40 pin connector */
+ pinctrl-0 = <&uart0_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&uart1 {
+ /* Exposed at 40 pin connector */
+ pinctrl-0 = <&uart1_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&ssusb0 {
+ role-switch-default-mode = "host";
+ usb-role-switch;
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+
+ port {
+ mtu3_hs0_role_sw: endpoint {
+ remote-endpoint = <&typec_con_hs>;
+ };
+ };
+};
+
+&ssusb2 {
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ status = "okay";
+};
+
+&xhci0 {
+ vbus-supply = <&otg_vbus_regulator>;
+ status = "okay";
+};
+
+&xhci1 {
+ /* MT7921's USB Bluetooth has issues with USB2 LPM */
+ usb2-lpm-disable;
+ vusb33-supply = <&mt6359_vusb_ldo_reg>;
+ vbus-supply = <&vsys>;
+ status = "okay";
+};
+
+&xhci2 {
+ vbus-supply = <&vsys>;
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts
index bbc2e9bef08d..14d58859bb55 100644
--- a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts
@@ -762,6 +762,7 @@
interrupt-parent = <&gpio>;
interrupts = <TEGRA_GPIO(C, 7) IRQ_TYPE_LEVEL_LOW>;
reg = <0>;
+ wakeup-source;
google,cros-ec-spi-msg-delay = <2000>;
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
index 5b59c1986e9b..e8b296d9e0d3 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
@@ -53,6 +53,56 @@
status = "okay";
};
+ i2c@c240000 {
+ status = "okay";
+
+ power-sensor@40 {
+ compatible = "ti,ina3221";
+ reg = <0x40>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ input@0 {
+ reg = <0x0>;
+ label = "GPU";
+ shunt-resistor-micro-ohms = <5000>;
+ };
+ input@1 {
+ reg = <0x1>;
+ label = "CPU";
+ shunt-resistor-micro-ohms = <5000>;
+ };
+ input@2 {
+ reg = <0x2>;
+ label = "SOC";
+ shunt-resistor-micro-ohms = <5000>;
+ };
+ };
+
+ power-sensor@41 {
+ compatible = "ti,ina3221";
+ reg = <0x41>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ input@0 {
+ reg = <0x0>;
+ label = "CV";
+ shunt-resistor-micro-ohms = <5000>;
+ };
+ input@1 {
+ reg = <0x1>;
+ label = "VDDRQ";
+ shunt-resistor-micro-ohms = <5000>;
+ };
+ input@2 {
+ reg = <0x2>;
+ label = "SYS5V";
+ shunt-resistor-micro-ohms = <5000>;
+ };
+ };
+ };
+
serial@3110000 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts b/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
index 64a3398fe7a6..c32876699a43 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
@@ -2062,8 +2062,15 @@
ports {
usb2-0 {
- mode = "host";
+ mode = "otg";
+ usb-role-switch;
status = "okay";
+
+ port {
+ hs_typec_p0: endpoint {
+ remote-endpoint = <&hs_ucsi_ccg_p0>;
+ };
+ };
};
usb2-1 {
@@ -2094,6 +2101,14 @@
};
};
+ usb@3550000 {
+ status = "okay";
+
+ phys = <&{/bus@0/padctl@3520000/pads/usb2/lanes/usb2-0}>,
+ <&{/bus@0/padctl@3520000/pads/usb3/lanes/usb3-2}>;
+ phy-names = "usb2-0", "usb3-0";
+ };
+
usb@3610000 {
status = "okay";
@@ -2106,6 +2121,40 @@
phy-names = "usb2-0", "usb2-1", "usb2-3", "usb3-0", "usb3-2", "usb3-3";
};
+ i2c@c240000 {
+ typec@8 {
+ compatible = "cypress,cypd4226";
+ reg = <0x08>;
+ interrupt-parent = <&gpio_aon>;
+ interrupts = <TEGRA194_AON_GPIO(BB, 2) IRQ_TYPE_LEVEL_LOW>;
+ firmware-name = "nvidia,jetson-agx-xavier";
+ status = "okay";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ccg_typec_con0: connector@0 {
+ compatible = "usb-c-connector";
+ reg = <0>;
+ label = "USB-C";
+ data-role = "dual";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ hs_ucsi_ccg_p0: endpoint {
+ remote-endpoint = <&hs_typec_p0>;
+ };
+ };
+ };
+ };
+ };
+ };
+
i2c@c250000 {
status = "okay";
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi
index 58f190b0f868..59860d19f0f6 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi
@@ -50,6 +50,33 @@
status = "okay";
};
+ i2c@c250000 {
+ status = "okay";
+
+ power-sensor@40 {
+ compatible = "ti,ina3221";
+ reg = <0x40>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ input@0 {
+ reg = <0x0>;
+ label = "VDD_IN";
+ shunt-resistor-micro-ohms = <5000>;
+ };
+ input@1 {
+ reg = <0x1>;
+ label = "VDD_CPU_GPU_CV";
+ shunt-resistor-micro-ohms = <5000>;
+ };
+ input@2 {
+ reg = <0x2>;
+ label = "VDD_SOC";
+ shunt-resistor-micro-ohms = <5000>;
+ };
+ };
+ };
+
serial@3100000 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3701.dtsi b/arch/arm64/boot/dts/nvidia/tegra234-p3701.dtsi
index db6ef711674a..320c8e9b06b4 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234-p3701.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra234-p3701.dtsi
@@ -3,6 +3,11 @@
/ {
compatible = "nvidia,p3701", "nvidia,tegra234";
+ aliases {
+ mmc0 = "/bus@0/mmc@3460000";
+ mmc1 = "/bus@0/mmc@3400000";
+ };
+
bus@0 {
aconnect@2900000 {
status = "okay";
@@ -12,1970 +17,22 @@
i2s@2901000 {
status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- i2s1_cif: endpoint {
- remote-endpoint = <&xbar_i2s1>;
- };
- };
-
- i2s1_port: port@1 {
- reg = <1>;
-
- i2s1_dap: endpoint {
- dai-format = "i2s";
- /* placeholder for external codec */
- };
- };
- };
};
i2s@2901100 {
status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- i2s2_cif: endpoint {
- remote-endpoint = <&xbar_i2s2>;
- };
- };
-
- i2s2_port: port@1 {
- reg = <1>;
-
- i2s2_dap: endpoint {
- dai-format = "i2s";
- /* placeholder for external codec */
- };
- };
- };
};
i2s@2901300 {
status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- i2s4_cif: endpoint {
- remote-endpoint = <&xbar_i2s4>;
- };
- };
-
- i2s4_port: port@1 {
- reg = <1>;
-
- i2s4_dap: endpoint {
- dai-format = "i2s";
- /* placeholder for external codec */
- };
- };
- };
};
i2s@2901500 {
status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- i2s6_cif: endpoint {
- remote-endpoint = <&xbar_i2s6>;
- };
- };
-
- i2s6_port: port@1 {
- reg = <1>;
-
- i2s6_dap: endpoint {
- dai-format = "i2s";
- /* placeholder for external codec */
- };
- };
- };
- };
-
- sfc@2902000 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- sfc1_cif_in: endpoint {
- remote-endpoint = <&xbar_sfc1_in>;
- };
- };
-
- sfc1_out_port: port@1 {
- reg = <1>;
-
- sfc1_cif_out: endpoint {
- remote-endpoint = <&xbar_sfc1_out>;
- };
- };
- };
- };
-
- sfc@2902200 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- sfc2_cif_in: endpoint {
- remote-endpoint = <&xbar_sfc2_in>;
- };
- };
-
- sfc2_out_port: port@1 {
- reg = <1>;
-
- sfc2_cif_out: endpoint {
- remote-endpoint = <&xbar_sfc2_out>;
- };
- };
- };
- };
-
- sfc@2902400 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- sfc3_cif_in: endpoint {
- remote-endpoint = <&xbar_sfc3_in>;
- };
- };
-
- sfc3_out_port: port@1 {
- reg = <1>;
-
- sfc3_cif_out: endpoint {
- remote-endpoint = <&xbar_sfc3_out>;
- };
- };
- };
- };
-
- sfc@2902600 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- sfc4_cif_in: endpoint {
- remote-endpoint = <&xbar_sfc4_in>;
- };
- };
-
- sfc4_out_port: port@1 {
- reg = <1>;
-
- sfc4_cif_out: endpoint {
- remote-endpoint = <&xbar_sfc4_out>;
- };
- };
- };
- };
-
- amx@2903000 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- amx1_in1: endpoint {
- remote-endpoint = <&xbar_amx1_in1>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- amx1_in2: endpoint {
- remote-endpoint = <&xbar_amx1_in2>;
- };
- };
-
- port@2 {
- reg = <2>;
-
- amx1_in3: endpoint {
- remote-endpoint = <&xbar_amx1_in3>;
- };
- };
-
- port@3 {
- reg = <3>;
-
- amx1_in4: endpoint {
- remote-endpoint = <&xbar_amx1_in4>;
- };
- };
-
- amx1_out_port: port@4 {
- reg = <4>;
-
- amx1_out: endpoint {
- remote-endpoint = <&xbar_amx1_out>;
- };
- };
- };
- };
-
- amx@2903100 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- amx2_in1: endpoint {
- remote-endpoint = <&xbar_amx2_in1>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- amx2_in2: endpoint {
- remote-endpoint = <&xbar_amx2_in2>;
- };
- };
-
- port@2 {
- reg = <2>;
-
- amx2_in3: endpoint {
- remote-endpoint = <&xbar_amx2_in3>;
- };
- };
-
- port@3 {
- reg = <3>;
-
- amx2_in4: endpoint {
- remote-endpoint = <&xbar_amx2_in4>;
- };
- };
-
- amx2_out_port: port@4 {
- reg = <4>;
-
- amx2_out: endpoint {
- remote-endpoint = <&xbar_amx2_out>;
- };
- };
- };
- };
-
- amx@2903200 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- amx3_in1: endpoint {
- remote-endpoint = <&xbar_amx3_in1>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- amx3_in2: endpoint {
- remote-endpoint = <&xbar_amx3_in2>;
- };
- };
-
- port@2 {
- reg = <2>;
-
- amx3_in3: endpoint {
- remote-endpoint = <&xbar_amx3_in3>;
- };
- };
-
- port@3 {
- reg = <3>;
-
- amx3_in4: endpoint {
- remote-endpoint = <&xbar_amx3_in4>;
- };
- };
-
- amx3_out_port: port@4 {
- reg = <4>;
-
- amx3_out: endpoint {
- remote-endpoint = <&xbar_amx3_out>;
- };
- };
- };
- };
-
- amx@2903300 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- amx4_in1: endpoint {
- remote-endpoint = <&xbar_amx4_in1>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- amx4_in2: endpoint {
- remote-endpoint = <&xbar_amx4_in2>;
- };
- };
-
- port@2 {
- reg = <2>;
-
- amx4_in3: endpoint {
- remote-endpoint = <&xbar_amx4_in3>;
- };
- };
-
- port@3 {
- reg = <3>;
-
- amx4_in4: endpoint {
- remote-endpoint = <&xbar_amx4_in4>;
- };
- };
-
- amx4_out_port: port@4 {
- reg = <4>;
-
- amx4_out: endpoint {
- remote-endpoint = <&xbar_amx4_out>;
- };
- };
- };
- };
-
- adx@2903800 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- adx1_in: endpoint {
- remote-endpoint = <&xbar_adx1_in>;
- };
- };
-
- adx1_out1_port: port@1 {
- reg = <1>;
-
- adx1_out1: endpoint {
- remote-endpoint = <&xbar_adx1_out1>;
- };
- };
-
- adx1_out2_port: port@2 {
- reg = <2>;
-
- adx1_out2: endpoint {
- remote-endpoint = <&xbar_adx1_out2>;
- };
- };
-
- adx1_out3_port: port@3 {
- reg = <3>;
-
- adx1_out3: endpoint {
- remote-endpoint = <&xbar_adx1_out3>;
- };
- };
-
- adx1_out4_port: port@4 {
- reg = <4>;
-
- adx1_out4: endpoint {
- remote-endpoint = <&xbar_adx1_out4>;
- };
- };
- };
- };
-
- adx@2903900 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- adx2_in: endpoint {
- remote-endpoint = <&xbar_adx2_in>;
- };
- };
-
- adx2_out1_port: port@1 {
- reg = <1>;
-
- adx2_out1: endpoint {
- remote-endpoint = <&xbar_adx2_out1>;
- };
- };
-
- adx2_out2_port: port@2 {
- reg = <2>;
-
- adx2_out2: endpoint {
- remote-endpoint = <&xbar_adx2_out2>;
- };
- };
-
- adx2_out3_port: port@3 {
- reg = <3>;
-
- adx2_out3: endpoint {
- remote-endpoint = <&xbar_adx2_out3>;
- };
- };
-
- adx2_out4_port: port@4 {
- reg = <4>;
-
- adx2_out4: endpoint {
- remote-endpoint = <&xbar_adx2_out4>;
- };
- };
- };
- };
-
- adx@2903a00 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- adx3_in: endpoint {
- remote-endpoint = <&xbar_adx3_in>;
- };
- };
-
- adx3_out1_port: port@1 {
- reg = <1>;
-
- adx3_out1: endpoint {
- remote-endpoint = <&xbar_adx3_out1>;
- };
- };
-
- adx3_out2_port: port@2 {
- reg = <2>;
-
- adx3_out2: endpoint {
- remote-endpoint = <&xbar_adx3_out2>;
- };
- };
-
- adx3_out3_port: port@3 {
- reg = <3>;
-
- adx3_out3: endpoint {
- remote-endpoint = <&xbar_adx3_out3>;
- };
- };
-
- adx3_out4_port: port@4 {
- reg = <4>;
-
- adx3_out4: endpoint {
- remote-endpoint = <&xbar_adx3_out4>;
- };
- };
- };
- };
-
- adx@2903b00 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- adx4_in: endpoint {
- remote-endpoint = <&xbar_adx4_in>;
- };
- };
-
- adx4_out1_port: port@1 {
- reg = <1>;
-
- adx4_out1: endpoint {
- remote-endpoint = <&xbar_adx4_out1>;
- };
- };
-
- adx4_out2_port: port@2 {
- reg = <2>;
-
- adx4_out2: endpoint {
- remote-endpoint = <&xbar_adx4_out2>;
- };
- };
-
- adx4_out3_port: port@3 {
- reg = <3>;
-
- adx4_out3: endpoint {
- remote-endpoint = <&xbar_adx4_out3>;
- };
- };
-
- adx4_out4_port: port@4 {
- reg = <4>;
-
- adx4_out4: endpoint {
- remote-endpoint = <&xbar_adx4_out4>;
- };
- };
- };
};
dmic@2904200 {
status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- dmic3_cif: endpoint {
- remote-endpoint = <&xbar_dmic3>;
- };
- };
-
- dmic3_port: port@1 {
- reg = <1>;
-
- dmic3_dap: endpoint {
- /* placeholder for external codec */
- };
- };
- };
- };
-
- processing-engine@2908000 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0x0>;
-
- ope1_cif_in_ep: endpoint {
- remote-endpoint = <&xbar_ope1_in_ep>;
- };
- };
-
- ope1_out_port: port@1 {
- reg = <0x1>;
-
- ope1_cif_out_ep: endpoint {
- remote-endpoint = <&xbar_ope1_out_ep>;
- };
- };
- };
- };
-
- mvc@290a000 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- mvc1_cif_in: endpoint {
- remote-endpoint = <&xbar_mvc1_in>;
- };
- };
-
- mvc1_out_port: port@1 {
- reg = <1>;
-
- mvc1_cif_out: endpoint {
- remote-endpoint = <&xbar_mvc1_out>;
- };
- };
- };
- };
-
- mvc@290a200 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- mvc2_cif_in: endpoint {
- remote-endpoint = <&xbar_mvc2_in>;
- };
- };
-
- mvc2_out_port: port@1 {
- reg = <1>;
-
- mvc2_cif_out: endpoint {
- remote-endpoint = <&xbar_mvc2_out>;
- };
- };
- };
- };
-
- amixer@290bb00 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0x0>;
-
- mix_in1: endpoint {
- remote-endpoint = <&xbar_mix_in1>;
- };
- };
-
- port@1 {
- reg = <0x1>;
-
- mix_in2: endpoint {
- remote-endpoint = <&xbar_mix_in2>;
- };
- };
-
- port@2 {
- reg = <0x2>;
-
- mix_in3: endpoint {
- remote-endpoint = <&xbar_mix_in3>;
- };
- };
-
- port@3 {
- reg = <0x3>;
-
- mix_in4: endpoint {
- remote-endpoint = <&xbar_mix_in4>;
- };
- };
-
- port@4 {
- reg = <0x4>;
-
- mix_in5: endpoint {
- remote-endpoint = <&xbar_mix_in5>;
- };
- };
-
- port@5 {
- reg = <0x5>;
-
- mix_in6: endpoint {
- remote-endpoint = <&xbar_mix_in6>;
- };
- };
-
- port@6 {
- reg = <0x6>;
-
- mix_in7: endpoint {
- remote-endpoint = <&xbar_mix_in7>;
- };
- };
-
- port@7 {
- reg = <0x7>;
-
- mix_in8: endpoint {
- remote-endpoint = <&xbar_mix_in8>;
- };
- };
-
- port@8 {
- reg = <0x8>;
-
- mix_in9: endpoint {
- remote-endpoint = <&xbar_mix_in9>;
- };
- };
-
- port@9 {
- reg = <0x9>;
-
- mix_in10: endpoint {
- remote-endpoint = <&xbar_mix_in10>;
- };
- };
-
- mix_out1_port: port@a {
- reg = <0xa>;
-
- mix_out1: endpoint {
- remote-endpoint = <&xbar_mix_out1>;
- };
- };
-
- mix_out2_port: port@b {
- reg = <0xb>;
-
- mix_out2: endpoint {
- remote-endpoint = <&xbar_mix_out2>;
- };
- };
-
- mix_out3_port: port@c {
- reg = <0xc>;
-
- mix_out3: endpoint {
- remote-endpoint = <&xbar_mix_out3>;
- };
- };
-
- mix_out4_port: port@d {
- reg = <0xd>;
-
- mix_out4: endpoint {
- remote-endpoint = <&xbar_mix_out4>;
- };
- };
-
- mix_out5_port: port@e {
- reg = <0xe>;
-
- mix_out5: endpoint {
- remote-endpoint = <&xbar_mix_out5>;
- };
- };
- };
- };
-
- admaif@290f000 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- admaif0_port: port@0 {
- reg = <0x0>;
-
- admaif0: endpoint {
- remote-endpoint = <&xbar_admaif0>;
- };
- };
-
- admaif1_port: port@1 {
- reg = <0x1>;
-
- admaif1: endpoint {
- remote-endpoint = <&xbar_admaif1>;
- };
- };
-
- admaif2_port: port@2 {
- reg = <0x2>;
-
- admaif2: endpoint {
- remote-endpoint = <&xbar_admaif2>;
- };
- };
-
- admaif3_port: port@3 {
- reg = <0x3>;
-
- admaif3: endpoint {
- remote-endpoint = <&xbar_admaif3>;
- };
- };
-
- admaif4_port: port@4 {
- reg = <0x4>;
-
- admaif4: endpoint {
- remote-endpoint = <&xbar_admaif4>;
- };
- };
-
- admaif5_port: port@5 {
- reg = <0x5>;
-
- admaif5: endpoint {
- remote-endpoint = <&xbar_admaif5>;
- };
- };
-
- admaif6_port: port@6 {
- reg = <0x6>;
-
- admaif6: endpoint {
- remote-endpoint = <&xbar_admaif6>;
- };
- };
-
- admaif7_port: port@7 {
- reg = <0x7>;
-
- admaif7: endpoint {
- remote-endpoint = <&xbar_admaif7>;
- };
- };
-
- admaif8_port: port@8 {
- reg = <0x8>;
-
- admaif8: endpoint {
- remote-endpoint = <&xbar_admaif8>;
- };
- };
-
- admaif9_port: port@9 {
- reg = <0x9>;
-
- admaif9: endpoint {
- remote-endpoint = <&xbar_admaif9>;
- };
- };
-
- admaif10_port: port@a {
- reg = <0xa>;
-
- admaif10: endpoint {
- remote-endpoint = <&xbar_admaif10>;
- };
- };
-
- admaif11_port: port@b {
- reg = <0xb>;
-
- admaif11: endpoint {
- remote-endpoint = <&xbar_admaif11>;
- };
- };
-
- admaif12_port: port@c {
- reg = <0xc>;
-
- admaif12: endpoint {
- remote-endpoint = <&xbar_admaif12>;
- };
- };
-
- admaif13_port: port@d {
- reg = <0xd>;
-
- admaif13: endpoint {
- remote-endpoint = <&xbar_admaif13>;
- };
- };
-
- admaif14_port: port@e {
- reg = <0xe>;
-
- admaif14: endpoint {
- remote-endpoint = <&xbar_admaif14>;
- };
- };
-
- admaif15_port: port@f {
- reg = <0xf>;
-
- admaif15: endpoint {
- remote-endpoint = <&xbar_admaif15>;
- };
- };
-
- admaif16_port: port@10 {
- reg = <0x10>;
-
- admaif16: endpoint {
- remote-endpoint = <&xbar_admaif16>;
- };
- };
-
- admaif17_port: port@11 {
- reg = <0x11>;
-
- admaif17: endpoint {
- remote-endpoint = <&xbar_admaif17>;
- };
- };
-
- admaif18_port: port@12 {
- reg = <0x12>;
-
- admaif18: endpoint {
- remote-endpoint = <&xbar_admaif18>;
- };
- };
-
- admaif19_port: port@13 {
- reg = <0x13>;
-
- admaif19: endpoint {
- remote-endpoint = <&xbar_admaif19>;
- };
- };
- };
- };
-
- asrc@2910000 {
- status = "okay";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0x0>;
-
- asrc_in1_ep: endpoint {
- remote-endpoint = <&xbar_asrc_in1_ep>;
- };
- };
-
- port@1 {
- reg = <0x1>;
-
- asrc_in2_ep: endpoint {
- remote-endpoint = <&xbar_asrc_in2_ep>;
- };
- };
-
- port@2 {
- reg = <0x2>;
-
- asrc_in3_ep: endpoint {
- remote-endpoint = <&xbar_asrc_in3_ep>;
- };
- };
-
- port@3 {
- reg = <0x3>;
-
- asrc_in4_ep: endpoint {
- remote-endpoint = <&xbar_asrc_in4_ep>;
- };
- };
-
- port@4 {
- reg = <0x4>;
-
- asrc_in5_ep: endpoint {
- remote-endpoint = <&xbar_asrc_in5_ep>;
- };
- };
-
- port@5 {
- reg = <0x5>;
-
- asrc_in6_ep: endpoint {
- remote-endpoint = <&xbar_asrc_in6_ep>;
- };
- };
-
- port@6 {
- reg = <0x6>;
-
- asrc_in7_ep: endpoint {
- remote-endpoint = <&xbar_asrc_in7_ep>;
- };
- };
-
- asrc_out1_port: port@7 {
- reg = <0x7>;
-
- asrc_out1_ep: endpoint {
- remote-endpoint = <&xbar_asrc_out1_ep>;
- };
- };
-
- asrc_out2_port: port@8 {
- reg = <0x8>;
-
- asrc_out2_ep: endpoint {
- remote-endpoint = <&xbar_asrc_out2_ep>;
- };
- };
-
- asrc_out3_port: port@9 {
- reg = <0x9>;
-
- asrc_out3_ep: endpoint {
- remote-endpoint = <&xbar_asrc_out3_ep>;
- };
- };
-
- asrc_out4_port: port@a {
- reg = <0xa>;
-
- asrc_out4_ep: endpoint {
- remote-endpoint = <&xbar_asrc_out4_ep>;
- };
- };
-
- asrc_out5_port: port@b {
- reg = <0xb>;
-
- asrc_out5_ep: endpoint {
- remote-endpoint = <&xbar_asrc_out5_ep>;
- };
- };
-
- asrc_out6_port: port@c {
- reg = <0xc>;
-
- asrc_out6_ep: endpoint {
- remote-endpoint = <&xbar_asrc_out6_ep>;
- };
- };
- };
- };
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0x0>;
-
- xbar_admaif0: endpoint {
- remote-endpoint = <&admaif0>;
- };
- };
-
- port@1 {
- reg = <0x1>;
-
- xbar_admaif1: endpoint {
- remote-endpoint = <&admaif1>;
- };
- };
-
- port@2 {
- reg = <0x2>;
-
- xbar_admaif2: endpoint {
- remote-endpoint = <&admaif2>;
- };
- };
-
- port@3 {
- reg = <0x3>;
-
- xbar_admaif3: endpoint {
- remote-endpoint = <&admaif3>;
- };
- };
-
- port@4 {
- reg = <0x4>;
-
- xbar_admaif4: endpoint {
- remote-endpoint = <&admaif4>;
- };
- };
-
- port@5 {
- reg = <0x5>;
-
- xbar_admaif5: endpoint {
- remote-endpoint = <&admaif5>;
- };
- };
-
- port@6 {
- reg = <0x6>;
-
- xbar_admaif6: endpoint {
- remote-endpoint = <&admaif6>;
- };
- };
-
- port@7 {
- reg = <0x7>;
-
- xbar_admaif7: endpoint {
- remote-endpoint = <&admaif7>;
- };
- };
-
- port@8 {
- reg = <0x8>;
-
- xbar_admaif8: endpoint {
- remote-endpoint = <&admaif8>;
- };
- };
-
- port@9 {
- reg = <0x9>;
-
- xbar_admaif9: endpoint {
- remote-endpoint = <&admaif9>;
- };
- };
-
- port@a {
- reg = <0xa>;
-
- xbar_admaif10: endpoint {
- remote-endpoint = <&admaif10>;
- };
- };
-
- port@b {
- reg = <0xb>;
-
- xbar_admaif11: endpoint {
- remote-endpoint = <&admaif11>;
- };
- };
-
- port@c {
- reg = <0xc>;
-
- xbar_admaif12: endpoint {
- remote-endpoint = <&admaif12>;
- };
- };
-
- port@d {
- reg = <0xd>;
-
- xbar_admaif13: endpoint {
- remote-endpoint = <&admaif13>;
- };
- };
-
- port@e {
- reg = <0xe>;
-
- xbar_admaif14: endpoint {
- remote-endpoint = <&admaif14>;
- };
- };
-
- port@f {
- reg = <0xf>;
-
- xbar_admaif15: endpoint {
- remote-endpoint = <&admaif15>;
- };
- };
-
- port@10 {
- reg = <0x10>;
-
- xbar_admaif16: endpoint {
- remote-endpoint = <&admaif16>;
- };
- };
-
- port@11 {
- reg = <0x11>;
-
- xbar_admaif17: endpoint {
- remote-endpoint = <&admaif17>;
- };
- };
-
- port@12 {
- reg = <0x12>;
-
- xbar_admaif18: endpoint {
- remote-endpoint = <&admaif18>;
- };
- };
-
- port@13 {
- reg = <0x13>;
-
- xbar_admaif19: endpoint {
- remote-endpoint = <&admaif19>;
- };
- };
-
- xbar_i2s1_port: port@14 {
- reg = <0x14>;
-
- xbar_i2s1: endpoint {
- remote-endpoint = <&i2s1_cif>;
- };
- };
-
- xbar_i2s2_port: port@15 {
- reg = <0x15>;
-
- xbar_i2s2: endpoint {
- remote-endpoint = <&i2s2_cif>;
- };
- };
-
- xbar_i2s4_port: port@17 {
- reg = <0x17>;
-
- xbar_i2s4: endpoint {
- remote-endpoint = <&i2s4_cif>;
- };
- };
-
- xbar_i2s6_port: port@19 {
- reg = <0x19>;
-
- xbar_i2s6: endpoint {
- remote-endpoint = <&i2s6_cif>;
- };
- };
-
- xbar_dmic3_port: port@1c {
- reg = <0x1c>;
-
- xbar_dmic3: endpoint {
- remote-endpoint = <&dmic3_cif>;
- };
- };
-
- xbar_sfc1_in_port: port@20 {
- reg = <0x20>;
-
- xbar_sfc1_in: endpoint {
- remote-endpoint = <&sfc1_cif_in>;
- };
- };
-
- port@21 {
- reg = <0x21>;
-
- xbar_sfc1_out: endpoint {
- remote-endpoint = <&sfc1_cif_out>;
- };
- };
-
- xbar_sfc2_in_port: port@22 {
- reg = <0x22>;
-
- xbar_sfc2_in: endpoint {
- remote-endpoint = <&sfc2_cif_in>;
- };
- };
-
- port@23 {
- reg = <0x23>;
-
- xbar_sfc2_out: endpoint {
- remote-endpoint = <&sfc2_cif_out>;
- };
- };
-
- xbar_sfc3_in_port: port@24 {
- reg = <0x24>;
-
- xbar_sfc3_in: endpoint {
- remote-endpoint = <&sfc3_cif_in>;
- };
- };
-
- port@25 {
- reg = <0x25>;
-
- xbar_sfc3_out: endpoint {
- remote-endpoint = <&sfc3_cif_out>;
- };
- };
-
- xbar_sfc4_in_port: port@26 {
- reg = <0x26>;
-
- xbar_sfc4_in: endpoint {
- remote-endpoint = <&sfc4_cif_in>;
- };
- };
-
- port@27 {
- reg = <0x27>;
-
- xbar_sfc4_out: endpoint {
- remote-endpoint = <&sfc4_cif_out>;
- };
- };
-
- xbar_mvc1_in_port: port@28 {
- reg = <0x28>;
-
- xbar_mvc1_in: endpoint {
- remote-endpoint = <&mvc1_cif_in>;
- };
- };
-
- port@29 {
- reg = <0x29>;
-
- xbar_mvc1_out: endpoint {
- remote-endpoint = <&mvc1_cif_out>;
- };
- };
-
- xbar_mvc2_in_port: port@2a {
- reg = <0x2a>;
-
- xbar_mvc2_in: endpoint {
- remote-endpoint = <&mvc2_cif_in>;
- };
- };
-
- port@2b {
- reg = <0x2b>;
-
- xbar_mvc2_out: endpoint {
- remote-endpoint = <&mvc2_cif_out>;
- };
- };
-
- xbar_amx1_in1_port: port@2c {
- reg = <0x2c>;
-
- xbar_amx1_in1: endpoint {
- remote-endpoint = <&amx1_in1>;
- };
- };
-
- xbar_amx1_in2_port: port@2d {
- reg = <0x2d>;
-
- xbar_amx1_in2: endpoint {
- remote-endpoint = <&amx1_in2>;
- };
- };
-
- xbar_amx1_in3_port: port@2e {
- reg = <0x2e>;
-
- xbar_amx1_in3: endpoint {
- remote-endpoint = <&amx1_in3>;
- };
- };
-
- xbar_amx1_in4_port: port@2f {
- reg = <0x2f>;
-
- xbar_amx1_in4: endpoint {
- remote-endpoint = <&amx1_in4>;
- };
- };
-
- port@30 {
- reg = <0x30>;
-
- xbar_amx1_out: endpoint {
- remote-endpoint = <&amx1_out>;
- };
- };
-
- xbar_amx2_in1_port: port@31 {
- reg = <0x31>;
-
- xbar_amx2_in1: endpoint {
- remote-endpoint = <&amx2_in1>;
- };
- };
-
- xbar_amx2_in2_port: port@32 {
- reg = <0x32>;
-
- xbar_amx2_in2: endpoint {
- remote-endpoint = <&amx2_in2>;
- };
- };
-
- xbar_amx2_in3_port: port@33 {
- reg = <0x33>;
-
- xbar_amx2_in3: endpoint {
- remote-endpoint = <&amx2_in3>;
- };
- };
-
- xbar_amx2_in4_port: port@34 {
- reg = <0x34>;
-
- xbar_amx2_in4: endpoint {
- remote-endpoint = <&amx2_in4>;
- };
- };
-
- port@35 {
- reg = <0x35>;
-
- xbar_amx2_out: endpoint {
- remote-endpoint = <&amx2_out>;
- };
- };
-
- xbar_amx3_in1_port: port@36 {
- reg = <0x36>;
-
- xbar_amx3_in1: endpoint {
- remote-endpoint = <&amx3_in1>;
- };
- };
-
- xbar_amx3_in2_port: port@37 {
- reg = <0x37>;
-
- xbar_amx3_in2: endpoint {
- remote-endpoint = <&amx3_in2>;
- };
- };
-
- xbar_amx3_in3_port: port@38 {
- reg = <0x38>;
-
- xbar_amx3_in3: endpoint {
- remote-endpoint = <&amx3_in3>;
- };
- };
-
- xbar_amx3_in4_port: port@39 {
- reg = <0x39>;
-
- xbar_amx3_in4: endpoint {
- remote-endpoint = <&amx3_in4>;
- };
- };
-
- port@3a {
- reg = <0x3a>;
-
- xbar_amx3_out: endpoint {
- remote-endpoint = <&amx3_out>;
- };
- };
-
- xbar_amx4_in1_port: port@3b {
- reg = <0x3b>;
-
- xbar_amx4_in1: endpoint {
- remote-endpoint = <&amx4_in1>;
- };
- };
-
- xbar_amx4_in2_port: port@3c {
- reg = <0x3c>;
-
- xbar_amx4_in2: endpoint {
- remote-endpoint = <&amx4_in2>;
- };
- };
-
- xbar_amx4_in3_port: port@3d {
- reg = <0x3d>;
-
- xbar_amx4_in3: endpoint {
- remote-endpoint = <&amx4_in3>;
- };
- };
-
- xbar_amx4_in4_port: port@3e {
- reg = <0x3e>;
-
- xbar_amx4_in4: endpoint {
- remote-endpoint = <&amx4_in4>;
- };
- };
-
- port@3f {
- reg = <0x3f>;
-
- xbar_amx4_out: endpoint {
- remote-endpoint = <&amx4_out>;
- };
- };
-
- xbar_adx1_in_port: port@40 {
- reg = <0x40>;
-
- xbar_adx1_in: endpoint {
- remote-endpoint = <&adx1_in>;
- };
- };
-
- port@41 {
- reg = <0x41>;
-
- xbar_adx1_out1: endpoint {
- remote-endpoint = <&adx1_out1>;
- };
- };
-
- port@42 {
- reg = <0x42>;
-
- xbar_adx1_out2: endpoint {
- remote-endpoint = <&adx1_out2>;
- };
- };
-
- port@43 {
- reg = <0x43>;
-
- xbar_adx1_out3: endpoint {
- remote-endpoint = <&adx1_out3>;
- };
- };
-
- port@44 {
- reg = <0x44>;
-
- xbar_adx1_out4: endpoint {
- remote-endpoint = <&adx1_out4>;
- };
- };
-
- xbar_adx2_in_port: port@45 {
- reg = <0x45>;
-
- xbar_adx2_in: endpoint {
- remote-endpoint = <&adx2_in>;
- };
- };
-
- port@46 {
- reg = <0x46>;
-
- xbar_adx2_out1: endpoint {
- remote-endpoint = <&adx2_out1>;
- };
- };
-
- port@47 {
- reg = <0x47>;
-
- xbar_adx2_out2: endpoint {
- remote-endpoint = <&adx2_out2>;
- };
- };
-
- port@48 {
- reg = <0x48>;
-
- xbar_adx2_out3: endpoint {
- remote-endpoint = <&adx2_out3>;
- };
- };
-
- port@49 {
- reg = <0x49>;
-
- xbar_adx2_out4: endpoint {
- remote-endpoint = <&adx2_out4>;
- };
- };
-
- xbar_adx3_in_port: port@4a {
- reg = <0x4a>;
-
- xbar_adx3_in: endpoint {
- remote-endpoint = <&adx3_in>;
- };
- };
-
- port@4b {
- reg = <0x4b>;
-
- xbar_adx3_out1: endpoint {
- remote-endpoint = <&adx3_out1>;
- };
- };
-
- port@4c {
- reg = <0x4c>;
-
- xbar_adx3_out2: endpoint {
- remote-endpoint = <&adx3_out2>;
- };
- };
-
- port@4d {
- reg = <0x4d>;
-
- xbar_adx3_out3: endpoint {
- remote-endpoint = <&adx3_out3>;
- };
- };
-
- port@4e {
- reg = <0x4e>;
-
- xbar_adx3_out4: endpoint {
- remote-endpoint = <&adx3_out4>;
- };
- };
-
- xbar_adx4_in_port: port@4f {
- reg = <0x4f>;
-
- xbar_adx4_in: endpoint {
- remote-endpoint = <&adx4_in>;
- };
- };
-
- port@50 {
- reg = <0x50>;
-
- xbar_adx4_out1: endpoint {
- remote-endpoint = <&adx4_out1>;
- };
- };
-
- port@51 {
- reg = <0x51>;
-
- xbar_adx4_out2: endpoint {
- remote-endpoint = <&adx4_out2>;
- };
- };
-
- port@52 {
- reg = <0x52>;
-
- xbar_adx4_out3: endpoint {
- remote-endpoint = <&adx4_out3>;
- };
- };
-
- port@53 {
- reg = <0x53>;
-
- xbar_adx4_out4: endpoint {
- remote-endpoint = <&adx4_out4>;
- };
- };
-
- xbar_mix_in1_port: port@54 {
- reg = <0x54>;
-
- xbar_mix_in1: endpoint {
- remote-endpoint = <&mix_in1>;
- };
- };
-
- xbar_mix_in2_port: port@55 {
- reg = <0x55>;
-
- xbar_mix_in2: endpoint {
- remote-endpoint = <&mix_in2>;
- };
- };
-
- xbar_mix_in3_port: port@56 {
- reg = <0x56>;
-
- xbar_mix_in3: endpoint {
- remote-endpoint = <&mix_in3>;
- };
- };
-
- xbar_mix_in4_port: port@57 {
- reg = <0x57>;
-
- xbar_mix_in4: endpoint {
- remote-endpoint = <&mix_in4>;
- };
- };
-
- xbar_mix_in5_port: port@58 {
- reg = <0x58>;
-
- xbar_mix_in5: endpoint {
- remote-endpoint = <&mix_in5>;
- };
- };
-
- xbar_mix_in6_port: port@59 {
- reg = <0x59>;
-
- xbar_mix_in6: endpoint {
- remote-endpoint = <&mix_in6>;
- };
- };
-
- xbar_mix_in7_port: port@5a {
- reg = <0x5a>;
-
- xbar_mix_in7: endpoint {
- remote-endpoint = <&mix_in7>;
- };
- };
-
- xbar_mix_in8_port: port@5b {
- reg = <0x5b>;
-
- xbar_mix_in8: endpoint {
- remote-endpoint = <&mix_in8>;
- };
- };
-
- xbar_mix_in9_port: port@5c {
- reg = <0x5c>;
-
- xbar_mix_in9: endpoint {
- remote-endpoint = <&mix_in9>;
- };
- };
-
- xbar_mix_in10_port: port@5d {
- reg = <0x5d>;
-
- xbar_mix_in10: endpoint {
- remote-endpoint = <&mix_in10>;
- };
- };
-
- port@5e {
- reg = <0x5e>;
-
- xbar_mix_out1: endpoint {
- remote-endpoint = <&mix_out1>;
- };
- };
-
- port@5f {
- reg = <0x5f>;
-
- xbar_mix_out2: endpoint {
- remote-endpoint = <&mix_out2>;
- };
- };
-
- port@60 {
- reg = <0x60>;
-
- xbar_mix_out3: endpoint {
- remote-endpoint = <&mix_out3>;
- };
- };
-
- port@61 {
- reg = <0x61>;
-
- xbar_mix_out4: endpoint {
- remote-endpoint = <&mix_out4>;
- };
- };
-
- port@62 {
- reg = <0x62>;
-
- xbar_mix_out5: endpoint {
- remote-endpoint = <&mix_out5>;
- };
- };
-
- xbar_asrc_in1_port: port@63 {
- reg = <0x63>;
-
- xbar_asrc_in1_ep: endpoint {
- remote-endpoint = <&asrc_in1_ep>;
- };
- };
-
- port@64 {
- reg = <0x64>;
-
- xbar_asrc_out1_ep: endpoint {
- remote-endpoint = <&asrc_out1_ep>;
- };
- };
-
- xbar_asrc_in2_port: port@65 {
- reg = <0x65>;
-
- xbar_asrc_in2_ep: endpoint {
- remote-endpoint = <&asrc_in2_ep>;
- };
- };
-
- port@66 {
- reg = <0x66>;
-
- xbar_asrc_out2_ep: endpoint {
- remote-endpoint = <&asrc_out2_ep>;
- };
- };
-
- xbar_asrc_in3_port: port@67 {
- reg = <0x67>;
-
- xbar_asrc_in3_ep: endpoint {
- remote-endpoint = <&asrc_in3_ep>;
- };
- };
-
- port@68 {
- reg = <0x68>;
-
- xbar_asrc_out3_ep: endpoint {
- remote-endpoint = <&asrc_out3_ep>;
- };
- };
-
- xbar_asrc_in4_port: port@69 {
- reg = <0x69>;
-
- xbar_asrc_in4_ep: endpoint {
- remote-endpoint = <&asrc_in4_ep>;
- };
- };
-
- port@6a {
- reg = <0x6a>;
-
- xbar_asrc_out4_ep: endpoint {
- remote-endpoint = <&asrc_out4_ep>;
- };
- };
-
- xbar_asrc_in5_port: port@6b {
- reg = <0x6b>;
-
- xbar_asrc_in5_ep: endpoint {
- remote-endpoint = <&asrc_in5_ep>;
- };
- };
-
- port@6c {
- reg = <0x6c>;
-
- xbar_asrc_out5_ep: endpoint {
- remote-endpoint = <&asrc_out5_ep>;
- };
- };
-
- xbar_asrc_in6_port: port@6d {
- reg = <0x6d>;
-
- xbar_asrc_in6_ep: endpoint {
- remote-endpoint = <&asrc_in6_ep>;
- };
- };
-
- port@6e {
- reg = <0x6e>;
-
- xbar_asrc_out6_ep: endpoint {
- remote-endpoint = <&asrc_out6_ep>;
- };
- };
-
- xbar_asrc_in7_port: port@6f {
- reg = <0x6f>;
-
- xbar_asrc_in7_ep: endpoint {
- remote-endpoint = <&asrc_in7_ep>;
- };
- };
-
- xbar_ope1_in_port: port@70 {
- reg = <0x70>;
-
- xbar_ope1_in_ep: endpoint {
- remote-endpoint = <&ope1_cif_in_ep>;
- };
- };
-
- port@71 {
- reg = <0x71>;
-
- xbar_ope1_out_ep: endpoint {
- remote-endpoint = <&ope1_cif_out_ep>;
- };
- };
};
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts
index ea13c4a7027c..69db584253da 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts
@@ -12,7 +12,6 @@
compatible = "nvidia,p3737-0000+p3701-0000", "nvidia,p3701-0000", "nvidia,tegra234";
aliases {
- mmc3 = "/bus@0/mmc@3460000";
serial0 = &tcu;
serial1 = &uarta;
};
@@ -175,7 +174,7 @@
status = "okay";
phy-handle = <&mgbe0_phy>;
- phy-mode = "usxgmii";
+ phy-mode = "10gbase-r";
mdio {
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3767-0000.dtsi b/arch/arm64/boot/dts/nvidia/tegra234-p3767-0000.dtsi
deleted file mode 100644
index baf4f69e410d..000000000000
--- a/arch/arm64/boot/dts/nvidia/tegra234-p3767-0000.dtsi
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include "tegra234-p3767.dtsi"
-
-/ {
- compatible = "nvidia,p3767-0000", "nvidia,tegra234";
- model = "NVIDIA Jetson Orin NX";
-
- bus@0 {
- hda@3510000 {
- nvidia,model = "NVIDIA Jetson Orin NX HDA";
- };
- };
-};
diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3767-0005.dtsi b/arch/arm64/boot/dts/nvidia/tegra234-p3767-0005.dtsi
deleted file mode 100644
index 232fa95ef4ae..000000000000
--- a/arch/arm64/boot/dts/nvidia/tegra234-p3767-0005.dtsi
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include "tegra234-p3767.dtsi"
-
-/ {
- compatible = "nvidia,p3767-0005", "nvidia,tegra234";
- model = "NVIDIA Jetson Orin Nano";
-
- bus@0 {
- hda@3510000 {
- nvidia,model = "NVIDIA Jetson Orin Nano HDA";
- };
- };
-};
diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3767.dtsi b/arch/arm64/boot/dts/nvidia/tegra234-p3767.dtsi
index 59c14ded5e9f..84db7132e8fc 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234-p3767.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra234-p3767.dtsi
@@ -5,7 +5,35 @@
/ {
compatible = "nvidia,p3767", "nvidia,tegra234";
+ aliases {
+ mmc0 = "/bus@0/mmc@3400000";
+ };
+
bus@0 {
+ aconnect@2900000 {
+ status = "okay";
+
+ ahub@2900800 {
+ status = "okay";
+
+ i2s@2901100 {
+ status = "okay";
+ };
+
+ i2s@2901300 {
+ status = "okay";
+ };
+ };
+
+ dma-controller@2930000 {
+ status = "okay";
+ };
+
+ interrupt-controller@2a40000 {
+ status = "okay";
+ };
+ };
+
i2c@3160000 {
status = "okay";
@@ -127,6 +155,64 @@
vin-supply = <&vdd_5v0_sys>;
};
+ sound {
+ compatible = "nvidia,tegra186-audio-graph-card";
+ status = "okay";
+
+ dais = /* ADMAIF (FE) Ports */
+ <&admaif0_port>, <&admaif1_port>, <&admaif2_port>, <&admaif3_port>,
+ <&admaif4_port>, <&admaif5_port>, <&admaif6_port>, <&admaif7_port>,
+ <&admaif8_port>, <&admaif9_port>, <&admaif10_port>, <&admaif11_port>,
+ <&admaif12_port>, <&admaif13_port>, <&admaif14_port>, <&admaif15_port>,
+ <&admaif16_port>, <&admaif17_port>, <&admaif18_port>, <&admaif19_port>,
+ /* XBAR Ports */
+ <&xbar_i2s2_port>, <&xbar_i2s4_port>,
+ <&xbar_sfc1_in_port>, <&xbar_sfc2_in_port>,
+ <&xbar_sfc3_in_port>, <&xbar_sfc4_in_port>,
+ <&xbar_mvc1_in_port>, <&xbar_mvc2_in_port>,
+ <&xbar_amx1_in1_port>, <&xbar_amx1_in2_port>,
+ <&xbar_amx1_in3_port>, <&xbar_amx1_in4_port>,
+ <&xbar_amx2_in1_port>, <&xbar_amx2_in2_port>,
+ <&xbar_amx2_in3_port>, <&xbar_amx2_in4_port>,
+ <&xbar_amx3_in1_port>, <&xbar_amx3_in2_port>,
+ <&xbar_amx3_in3_port>, <&xbar_amx3_in4_port>,
+ <&xbar_amx4_in1_port>, <&xbar_amx4_in2_port>,
+ <&xbar_amx4_in3_port>, <&xbar_amx4_in4_port>,
+ <&xbar_adx1_in_port>, <&xbar_adx2_in_port>,
+ <&xbar_adx3_in_port>, <&xbar_adx4_in_port>,
+ <&xbar_mix_in1_port>, <&xbar_mix_in2_port>,
+ <&xbar_mix_in3_port>, <&xbar_mix_in4_port>,
+ <&xbar_mix_in5_port>, <&xbar_mix_in6_port>,
+ <&xbar_mix_in7_port>, <&xbar_mix_in8_port>,
+ <&xbar_mix_in9_port>, <&xbar_mix_in10_port>,
+ <&xbar_asrc_in1_port>, <&xbar_asrc_in2_port>,
+ <&xbar_asrc_in3_port>, <&xbar_asrc_in4_port>,
+ <&xbar_asrc_in5_port>, <&xbar_asrc_in6_port>,
+ <&xbar_asrc_in7_port>,
+ <&xbar_ope1_in_port>,
+ /* HW accelerators */
+ <&sfc1_out_port>, <&sfc2_out_port>,
+ <&sfc3_out_port>, <&sfc4_out_port>,
+ <&mvc1_out_port>, <&mvc2_out_port>,
+ <&amx1_out_port>, <&amx2_out_port>,
+ <&amx3_out_port>, <&amx4_out_port>,
+ <&adx1_out1_port>, <&adx1_out2_port>,
+ <&adx1_out3_port>, <&adx1_out4_port>,
+ <&adx2_out1_port>, <&adx2_out2_port>,
+ <&adx2_out3_port>, <&adx2_out4_port>,
+ <&adx3_out1_port>, <&adx3_out2_port>,
+ <&adx3_out3_port>, <&adx3_out4_port>,
+ <&adx4_out1_port>, <&adx4_out2_port>,
+ <&adx4_out3_port>, <&adx4_out4_port>,
+ <&mix_out1_port>, <&mix_out2_port>, <&mix_out3_port>,
+ <&mix_out4_port>, <&mix_out5_port>,
+ <&asrc_out1_port>, <&asrc_out2_port>, <&asrc_out3_port>,
+ <&asrc_out4_port>, <&asrc_out5_port>, <&asrc_out6_port>,
+ <&ope1_out_port>,
+ /* BE I/O Ports */
+ <&i2s2_port>, <&i2s4_port>;
+ };
+
thermal-zones {
tj-thermal {
polling-delay = <1000>;
diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3768-0000+p3767-0000.dts b/arch/arm64/boot/dts/nvidia/tegra234-p3768-0000+p3767-0000.dts
index 61b0e69d3d20..1607ee14216f 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234-p3768-0000+p3767-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra234-p3768-0000+p3767-0000.dts
@@ -4,7 +4,7 @@
#include <dt-bindings/input/linux-event-codes.h>
#include <dt-bindings/input/gpio-keys.h>
-#include "tegra234-p3767-0000.dtsi"
+#include "tegra234-p3767.dtsi"
#include "tegra234-p3768-0000.dtsi"
/ {
@@ -37,7 +37,6 @@
hda@3510000 {
nvidia,model = "NVIDIA Jetson Orin NX HDA";
- status = "okay";
};
padctl@3520000 {
@@ -85,6 +84,10 @@
enable-active-high;
};
+ sound {
+ label = "NVIDIA Jetson Orin NX APE";
+ };
+
thermal-zones {
tj-thermal {
cooling-maps {
diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3768-0000+p3767-0005.dts b/arch/arm64/boot/dts/nvidia/tegra234-p3768-0000+p3767-0005.dts
index 9e9bb9ca8be4..dc2d4bef1e83 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234-p3768-0000+p3767-0005.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra234-p3768-0000+p3767-0005.dts
@@ -4,17 +4,27 @@
#include <dt-bindings/input/linux-event-codes.h>
#include <dt-bindings/input/gpio-keys.h>
-#include "tegra234-p3767-0005.dtsi"
+#include "tegra234-p3767.dtsi"
#include "tegra234-p3768-0000.dtsi"
/ {
compatible = "nvidia,p3768-0000+p3767-0005", "nvidia,p3767-0005", "nvidia,tegra234";
model = "NVIDIA Jetson Orin Nano Developer Kit";
+ bus@0 {
+ hda@3510000 {
+ nvidia,model = "NVIDIA Jetson Orin Nano HDA";
+ };
+ };
+
pwm-fan {
cooling-levels = <0 88 187 255>;
};
+ sound {
+ label = "NVIDIA Jetson Orin Nano APE";
+ };
+
thermal-zones {
tj-thermal {
cooling-maps {
diff --git a/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts b/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts
index 9f3e9f30c3f7..292e28376eec 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts
@@ -8,7 +8,6 @@
compatible = "nvidia,tegra234-vdk", "nvidia,tegra234";
aliases {
- mmc3 = "/bus@0/mmc@3460000";
serial0 = &uarta;
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
index 3f16595d099c..78cbfdd98dd1 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
@@ -200,6 +200,28 @@
assigned-clock-rates = <1536000>;
sound-name-prefix = "I2S1";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ i2s1_cif: endpoint {
+ remote-endpoint = <&xbar_i2s1>;
+ };
+ };
+
+ i2s1_port: port@1 {
+ reg = <1>;
+
+ i2s1_dap: endpoint {
+ dai-format = "i2s";
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_i2s2: i2s@2901100 {
@@ -214,6 +236,28 @@
assigned-clock-rates = <1536000>;
sound-name-prefix = "I2S2";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ i2s2_cif: endpoint {
+ remote-endpoint = <&xbar_i2s2>;
+ };
+ };
+
+ i2s2_port: port@1 {
+ reg = <1>;
+
+ i2s2_dap: endpoint {
+ dai-format = "i2s";
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_i2s3: i2s@2901200 {
@@ -228,6 +272,28 @@
assigned-clock-rates = <1536000>;
sound-name-prefix = "I2S3";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ i2s3_cif: endpoint {
+ remote-endpoint = <&xbar_i2s3>;
+ };
+ };
+
+ i2s3_port: port@1 {
+ reg = <1>;
+
+ i2s3_dap: endpoint {
+ dai-format = "i2s";
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_i2s4: i2s@2901300 {
@@ -242,6 +308,28 @@
assigned-clock-rates = <1536000>;
sound-name-prefix = "I2S4";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ i2s4_cif: endpoint {
+ remote-endpoint = <&xbar_i2s4>;
+ };
+ };
+
+ i2s4_port: port@1 {
+ reg = <1>;
+
+ i2s4_dap: endpoint {
+ dai-format = "i2s";
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_i2s5: i2s@2901400 {
@@ -256,6 +344,28 @@
assigned-clock-rates = <1536000>;
sound-name-prefix = "I2S5";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ i2s5_cif: endpoint {
+ remote-endpoint = <&xbar_i2s5>;
+ };
+ };
+
+ i2s5_port: port@1 {
+ reg = <1>;
+
+ i2s5_dap: endpoint {
+ dai-format = "i2s";
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_i2s6: i2s@2901500 {
@@ -270,6 +380,28 @@
assigned-clock-rates = <1536000>;
sound-name-prefix = "I2S6";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ i2s6_cif: endpoint {
+ remote-endpoint = <&xbar_i2s6>;
+ };
+ };
+
+ i2s6_port: port@1 {
+ reg = <1>;
+
+ i2s6_dap: endpoint {
+ dai-format = "i2s";
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_sfc1: sfc@2902000 {
@@ -277,7 +409,27 @@
"nvidia,tegra210-sfc";
reg = <0x0 0x2902000 0x0 0x200>;
sound-name-prefix = "SFC1";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ sfc1_cif_in: endpoint {
+ remote-endpoint = <&xbar_sfc1_in>;
+ };
+ };
+
+ sfc1_out_port: port@1 {
+ reg = <1>;
+
+ sfc1_cif_out: endpoint {
+ remote-endpoint = <&xbar_sfc1_out>;
+ };
+ };
+ };
};
tegra_sfc2: sfc@2902200 {
@@ -285,7 +437,27 @@
"nvidia,tegra210-sfc";
reg = <0x0 0x2902200 0x0 0x200>;
sound-name-prefix = "SFC2";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ sfc2_cif_in: endpoint {
+ remote-endpoint = <&xbar_sfc2_in>;
+ };
+ };
+
+ sfc2_out_port: port@1 {
+ reg = <1>;
+
+ sfc2_cif_out: endpoint {
+ remote-endpoint = <&xbar_sfc2_out>;
+ };
+ };
+ };
};
tegra_sfc3: sfc@2902400 {
@@ -293,7 +465,27 @@
"nvidia,tegra210-sfc";
reg = <0x0 0x2902400 0x0 0x200>;
sound-name-prefix = "SFC3";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ sfc3_cif_in: endpoint {
+ remote-endpoint = <&xbar_sfc3_in>;
+ };
+ };
+
+ sfc3_out_port: port@1 {
+ reg = <1>;
+
+ sfc3_cif_out: endpoint {
+ remote-endpoint = <&xbar_sfc3_out>;
+ };
+ };
+ };
};
tegra_sfc4: sfc@2902600 {
@@ -301,7 +493,27 @@
"nvidia,tegra210-sfc";
reg = <0x0 0x2902600 0x0 0x200>;
sound-name-prefix = "SFC4";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ sfc4_cif_in: endpoint {
+ remote-endpoint = <&xbar_sfc4_in>;
+ };
+ };
+
+ sfc4_out_port: port@1 {
+ reg = <1>;
+
+ sfc4_cif_out: endpoint {
+ remote-endpoint = <&xbar_sfc4_out>;
+ };
+ };
+ };
};
tegra_amx1: amx@2903000 {
@@ -309,7 +521,51 @@
"nvidia,tegra194-amx";
reg = <0x0 0x2903000 0x0 0x100>;
sound-name-prefix = "AMX1";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ amx1_in1: endpoint {
+ remote-endpoint = <&xbar_amx1_in1>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ amx1_in2: endpoint {
+ remote-endpoint = <&xbar_amx1_in2>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ amx1_in3: endpoint {
+ remote-endpoint = <&xbar_amx1_in3>;
+ };
+ };
+
+ port@3 {
+ reg = <3>;
+
+ amx1_in4: endpoint {
+ remote-endpoint = <&xbar_amx1_in4>;
+ };
+ };
+
+ amx1_out_port: port@4 {
+ reg = <4>;
+
+ amx1_out: endpoint {
+ remote-endpoint = <&xbar_amx1_out>;
+ };
+ };
+ };
};
tegra_amx2: amx@2903100 {
@@ -317,7 +573,51 @@
"nvidia,tegra194-amx";
reg = <0x0 0x2903100 0x0 0x100>;
sound-name-prefix = "AMX2";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ amx2_in1: endpoint {
+ remote-endpoint = <&xbar_amx2_in1>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ amx2_in2: endpoint {
+ remote-endpoint = <&xbar_amx2_in2>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ amx2_in3: endpoint {
+ remote-endpoint = <&xbar_amx2_in3>;
+ };
+ };
+
+ port@3 {
+ reg = <3>;
+
+ amx2_in4: endpoint {
+ remote-endpoint = <&xbar_amx2_in4>;
+ };
+ };
+
+ amx2_out_port: port@4 {
+ reg = <4>;
+
+ amx2_out: endpoint {
+ remote-endpoint = <&xbar_amx2_out>;
+ };
+ };
+ };
};
tegra_amx3: amx@2903200 {
@@ -325,7 +625,51 @@
"nvidia,tegra194-amx";
reg = <0x0 0x2903200 0x0 0x100>;
sound-name-prefix = "AMX3";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ amx3_in1: endpoint {
+ remote-endpoint = <&xbar_amx3_in1>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ amx3_in2: endpoint {
+ remote-endpoint = <&xbar_amx3_in2>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ amx3_in3: endpoint {
+ remote-endpoint = <&xbar_amx3_in3>;
+ };
+ };
+
+ port@3 {
+ reg = <3>;
+
+ amx3_in4: endpoint {
+ remote-endpoint = <&xbar_amx3_in4>;
+ };
+ };
+
+ amx3_out_port: port@4 {
+ reg = <4>;
+
+ amx3_out: endpoint {
+ remote-endpoint = <&xbar_amx3_out>;
+ };
+ };
+ };
};
tegra_amx4: amx@2903300 {
@@ -333,7 +677,51 @@
"nvidia,tegra194-amx";
reg = <0x0 0x2903300 0x0 0x100>;
sound-name-prefix = "AMX4";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ amx4_in1: endpoint {
+ remote-endpoint = <&xbar_amx4_in1>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ amx4_in2: endpoint {
+ remote-endpoint = <&xbar_amx4_in2>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ amx4_in3: endpoint {
+ remote-endpoint = <&xbar_amx4_in3>;
+ };
+ };
+
+ port@3 {
+ reg = <3>;
+
+ amx4_in4: endpoint {
+ remote-endpoint = <&xbar_amx4_in4>;
+ };
+ };
+
+ amx4_out_port: port@4 {
+ reg = <4>;
+
+ amx4_out: endpoint {
+ remote-endpoint = <&xbar_amx4_out>;
+ };
+ };
+ };
};
tegra_adx1: adx@2903800 {
@@ -341,7 +729,51 @@
"nvidia,tegra210-adx";
reg = <0x0 0x2903800 0x0 0x100>;
sound-name-prefix = "ADX1";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ adx1_in: endpoint {
+ remote-endpoint = <&xbar_adx1_in>;
+ };
+ };
+
+ adx1_out1_port: port@1 {
+ reg = <1>;
+
+ adx1_out1: endpoint {
+ remote-endpoint = <&xbar_adx1_out1>;
+ };
+ };
+
+ adx1_out2_port: port@2 {
+ reg = <2>;
+
+ adx1_out2: endpoint {
+ remote-endpoint = <&xbar_adx1_out2>;
+ };
+ };
+
+ adx1_out3_port: port@3 {
+ reg = <3>;
+
+ adx1_out3: endpoint {
+ remote-endpoint = <&xbar_adx1_out3>;
+ };
+ };
+
+ adx1_out4_port: port@4 {
+ reg = <4>;
+
+ adx1_out4: endpoint {
+ remote-endpoint = <&xbar_adx1_out4>;
+ };
+ };
+ };
};
tegra_adx2: adx@2903900 {
@@ -349,7 +781,51 @@
"nvidia,tegra210-adx";
reg = <0x0 0x2903900 0x0 0x100>;
sound-name-prefix = "ADX2";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ adx2_in: endpoint {
+ remote-endpoint = <&xbar_adx2_in>;
+ };
+ };
+
+ adx2_out1_port: port@1 {
+ reg = <1>;
+
+ adx2_out1: endpoint {
+ remote-endpoint = <&xbar_adx2_out1>;
+ };
+ };
+
+ adx2_out2_port: port@2 {
+ reg = <2>;
+
+ adx2_out2: endpoint {
+ remote-endpoint = <&xbar_adx2_out2>;
+ };
+ };
+
+ adx2_out3_port: port@3 {
+ reg = <3>;
+
+ adx2_out3: endpoint {
+ remote-endpoint = <&xbar_adx2_out3>;
+ };
+ };
+
+ adx2_out4_port: port@4 {
+ reg = <4>;
+
+ adx2_out4: endpoint {
+ remote-endpoint = <&xbar_adx2_out4>;
+ };
+ };
+ };
};
tegra_adx3: adx@2903a00 {
@@ -357,7 +833,51 @@
"nvidia,tegra210-adx";
reg = <0x0 0x2903a00 0x0 0x100>;
sound-name-prefix = "ADX3";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ adx3_in: endpoint {
+ remote-endpoint = <&xbar_adx3_in>;
+ };
+ };
+
+ adx3_out1_port: port@1 {
+ reg = <1>;
+
+ adx3_out1: endpoint {
+ remote-endpoint = <&xbar_adx3_out1>;
+ };
+ };
+
+ adx3_out2_port: port@2 {
+ reg = <2>;
+
+ adx3_out2: endpoint {
+ remote-endpoint = <&xbar_adx3_out2>;
+ };
+ };
+
+ adx3_out3_port: port@3 {
+ reg = <3>;
+
+ adx3_out3: endpoint {
+ remote-endpoint = <&xbar_adx3_out3>;
+ };
+ };
+
+ adx3_out4_port: port@4 {
+ reg = <4>;
+
+ adx3_out4: endpoint {
+ remote-endpoint = <&xbar_adx3_out4>;
+ };
+ };
+ };
};
tegra_adx4: adx@2903b00 {
@@ -365,7 +885,51 @@
"nvidia,tegra210-adx";
reg = <0x0 0x2903b00 0x0 0x100>;
sound-name-prefix = "ADX4";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ adx4_in: endpoint {
+ remote-endpoint = <&xbar_adx4_in>;
+ };
+ };
+
+ adx4_out1_port: port@1 {
+ reg = <1>;
+
+ adx4_out1: endpoint {
+ remote-endpoint = <&xbar_adx4_out1>;
+ };
+ };
+
+ adx4_out2_port: port@2 {
+ reg = <2>;
+
+ adx4_out2: endpoint {
+ remote-endpoint = <&xbar_adx4_out2>;
+ };
+ };
+
+ adx4_out3_port: port@3 {
+ reg = <3>;
+
+ adx4_out3: endpoint {
+ remote-endpoint = <&xbar_adx4_out3>;
+ };
+ };
+
+ adx4_out4_port: port@4 {
+ reg = <4>;
+
+ adx4_out4: endpoint {
+ remote-endpoint = <&xbar_adx4_out4>;
+ };
+ };
+ };
};
@@ -380,6 +944,27 @@
assigned-clock-rates = <3072000>;
sound-name-prefix = "DMIC1";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ dmic1_cif: endpoint {
+ remote-endpoint = <&xbar_dmic1>;
+ };
+ };
+
+ dmic1_port: port@1 {
+ reg = <1>;
+
+ dmic1_dap: endpoint {
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_dmic2: dmic@2904100 {
@@ -393,6 +978,27 @@
assigned-clock-rates = <3072000>;
sound-name-prefix = "DMIC2";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ dmic2_cif: endpoint {
+ remote-endpoint = <&xbar_dmic2>;
+ };
+ };
+
+ dmic2_port: port@1 {
+ reg = <1>;
+
+ dmic2_dap: endpoint {
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_dmic3: dmic@2904200 {
@@ -406,6 +1012,27 @@
assigned-clock-rates = <3072000>;
sound-name-prefix = "DMIC3";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ dmic3_cif: endpoint {
+ remote-endpoint = <&xbar_dmic3>;
+ };
+ };
+
+ dmic3_port: port@1 {
+ reg = <1>;
+
+ dmic3_dap: endpoint {
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_dmic4: dmic@2904300 {
@@ -419,6 +1046,27 @@
assigned-clock-rates = <3072000>;
sound-name-prefix = "DMIC4";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ dmic4_cif: endpoint {
+ remote-endpoint = <&xbar_dmic4>;
+ };
+ };
+
+ dmic4_port: port@1 {
+ reg = <1>;
+
+ dmic4_dap: endpoint {
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_dspk1: dspk@2905000 {
@@ -432,6 +1080,27 @@
assigned-clock-rates = <12288000>;
sound-name-prefix = "DSPK1";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ dspk1_cif: endpoint {
+ remote-endpoint = <&xbar_dspk1>;
+ };
+ };
+
+ dspk1_port: port@1 {
+ reg = <1>;
+
+ dspk1_dap: endpoint {
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_dspk2: dspk@2905100 {
@@ -445,6 +1114,27 @@
assigned-clock-rates = <12288000>;
sound-name-prefix = "DSPK2";
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ dspk2_cif: endpoint {
+ remote-endpoint = <&xbar_dspk2>;
+ };
+ };
+
+ dspk2_port: port@1 {
+ reg = <1>;
+
+ dspk2_dap: endpoint {
+ /* placeholder for external codec */
+ };
+ };
+ };
};
tegra_ope1: processing-engine@2908000 {
@@ -452,7 +1142,6 @@
"nvidia,tegra210-ope";
reg = <0x0 0x2908000 0x0 0x100>;
sound-name-prefix = "OPE1";
- status = "disabled";
#address-cells = <2>;
#size-cells = <2>;
@@ -469,6 +1158,29 @@
"nvidia,tegra210-mbdrc";
reg = <0x0 0x2908200 0x0 0x200>;
};
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0x0>;
+
+ ope1_cif_in_ep: endpoint {
+ remote-endpoint =
+ <&xbar_ope1_in_ep>;
+ };
+ };
+
+ ope1_out_port: port@1 {
+ reg = <0x1>;
+
+ ope1_cif_out_ep: endpoint {
+ remote-endpoint =
+ <&xbar_ope1_out_ep>;
+ };
+ };
+ };
};
tegra_mvc1: mvc@290a000 {
@@ -476,7 +1188,27 @@
"nvidia,tegra210-mvc";
reg = <0x0 0x290a000 0x0 0x200>;
sound-name-prefix = "MVC1";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ mvc1_cif_in: endpoint {
+ remote-endpoint = <&xbar_mvc1_in>;
+ };
+ };
+
+ mvc1_out_port: port@1 {
+ reg = <1>;
+
+ mvc1_cif_out: endpoint {
+ remote-endpoint = <&xbar_mvc1_out>;
+ };
+ };
+ };
};
tegra_mvc2: mvc@290a200 {
@@ -484,7 +1216,27 @@
"nvidia,tegra210-mvc";
reg = <0x0 0x290a200 0x0 0x200>;
sound-name-prefix = "MVC2";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ mvc2_cif_in: endpoint {
+ remote-endpoint = <&xbar_mvc2_in>;
+ };
+ };
+
+ mvc2_out_port: port@1 {
+ reg = <1>;
+
+ mvc2_cif_out: endpoint {
+ remote-endpoint = <&xbar_mvc2_out>;
+ };
+ };
+ };
};
tegra_amixer: amixer@290bb00 {
@@ -492,7 +1244,131 @@
"nvidia,tegra210-amixer";
reg = <0x0 0x290bb00 0x0 0x800>;
sound-name-prefix = "MIXER1";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0x0>;
+
+ mix_in1: endpoint {
+ remote-endpoint = <&xbar_mix_in1>;
+ };
+ };
+
+ port@1 {
+ reg = <0x1>;
+
+ mix_in2: endpoint {
+ remote-endpoint = <&xbar_mix_in2>;
+ };
+ };
+
+ port@2 {
+ reg = <0x2>;
+
+ mix_in3: endpoint {
+ remote-endpoint = <&xbar_mix_in3>;
+ };
+ };
+
+ port@3 {
+ reg = <0x3>;
+
+ mix_in4: endpoint {
+ remote-endpoint = <&xbar_mix_in4>;
+ };
+ };
+
+ port@4 {
+ reg = <0x4>;
+
+ mix_in5: endpoint {
+ remote-endpoint = <&xbar_mix_in5>;
+ };
+ };
+
+ port@5 {
+ reg = <0x5>;
+
+ mix_in6: endpoint {
+ remote-endpoint = <&xbar_mix_in6>;
+ };
+ };
+
+ port@6 {
+ reg = <0x6>;
+
+ mix_in7: endpoint {
+ remote-endpoint = <&xbar_mix_in7>;
+ };
+ };
+
+ port@7 {
+ reg = <0x7>;
+
+ mix_in8: endpoint {
+ remote-endpoint = <&xbar_mix_in8>;
+ };
+ };
+
+ port@8 {
+ reg = <0x8>;
+
+ mix_in9: endpoint {
+ remote-endpoint = <&xbar_mix_in9>;
+ };
+ };
+
+ port@9 {
+ reg = <0x9>;
+
+ mix_in10: endpoint {
+ remote-endpoint = <&xbar_mix_in10>;
+ };
+ };
+
+ mix_out1_port: port@a {
+ reg = <0xa>;
+
+ mix_out1: endpoint {
+ remote-endpoint = <&xbar_mix_out1>;
+ };
+ };
+
+ mix_out2_port: port@b {
+ reg = <0xb>;
+
+ mix_out2: endpoint {
+ remote-endpoint = <&xbar_mix_out2>;
+ };
+ };
+
+ mix_out3_port: port@c {
+ reg = <0xc>;
+
+ mix_out3: endpoint {
+ remote-endpoint = <&xbar_mix_out3>;
+ };
+ };
+
+ mix_out4_port: port@d {
+ reg = <0xd>;
+
+ mix_out4: endpoint {
+ remote-endpoint = <&xbar_mix_out4>;
+ };
+ };
+
+ mix_out5_port: port@e {
+ reg = <0xe>;
+
+ mix_out5: endpoint {
+ remote-endpoint = <&xbar_mix_out5>;
+ };
+ };
+ };
};
tegra_admaif: admaif@290f000 {
@@ -543,7 +1419,171 @@
<&mc TEGRA234_MEMORY_CLIENT_APEDMAW &emc>;
interconnect-names = "dma-mem", "write";
iommus = <&smmu_niso0 TEGRA234_SID_APE>;
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ admaif0_port: port@0 {
+ reg = <0x0>;
+
+ admaif0: endpoint {
+ remote-endpoint = <&xbar_admaif0>;
+ };
+ };
+
+ admaif1_port: port@1 {
+ reg = <0x1>;
+
+ admaif1: endpoint {
+ remote-endpoint = <&xbar_admaif1>;
+ };
+ };
+
+ admaif2_port: port@2 {
+ reg = <0x2>;
+
+ admaif2: endpoint {
+ remote-endpoint = <&xbar_admaif2>;
+ };
+ };
+
+ admaif3_port: port@3 {
+ reg = <0x3>;
+
+ admaif3: endpoint {
+ remote-endpoint = <&xbar_admaif3>;
+ };
+ };
+
+ admaif4_port: port@4 {
+ reg = <0x4>;
+
+ admaif4: endpoint {
+ remote-endpoint = <&xbar_admaif4>;
+ };
+ };
+
+ admaif5_port: port@5 {
+ reg = <0x5>;
+
+ admaif5: endpoint {
+ remote-endpoint = <&xbar_admaif5>;
+ };
+ };
+
+ admaif6_port: port@6 {
+ reg = <0x6>;
+
+ admaif6: endpoint {
+ remote-endpoint = <&xbar_admaif6>;
+ };
+ };
+
+ admaif7_port: port@7 {
+ reg = <0x7>;
+
+ admaif7: endpoint {
+ remote-endpoint = <&xbar_admaif7>;
+ };
+ };
+
+ admaif8_port: port@8 {
+ reg = <0x8>;
+
+ admaif8: endpoint {
+ remote-endpoint = <&xbar_admaif8>;
+ };
+ };
+
+ admaif9_port: port@9 {
+ reg = <0x9>;
+
+ admaif9: endpoint {
+ remote-endpoint = <&xbar_admaif9>;
+ };
+ };
+
+ admaif10_port: port@a {
+ reg = <0xa>;
+
+ admaif10: endpoint {
+ remote-endpoint = <&xbar_admaif10>;
+ };
+ };
+
+ admaif11_port: port@b {
+ reg = <0xb>;
+
+ admaif11: endpoint {
+ remote-endpoint = <&xbar_admaif11>;
+ };
+ };
+
+ admaif12_port: port@c {
+ reg = <0xc>;
+
+ admaif12: endpoint {
+ remote-endpoint = <&xbar_admaif12>;
+ };
+ };
+
+ admaif13_port: port@d {
+ reg = <0xd>;
+
+ admaif13: endpoint {
+ remote-endpoint = <&xbar_admaif13>;
+ };
+ };
+
+ admaif14_port: port@e {
+ reg = <0xe>;
+
+ admaif14: endpoint {
+ remote-endpoint = <&xbar_admaif14>;
+ };
+ };
+
+ admaif15_port: port@f {
+ reg = <0xf>;
+
+ admaif15: endpoint {
+ remote-endpoint = <&xbar_admaif15>;
+ };
+ };
+
+ admaif16_port: port@10 {
+ reg = <0x10>;
+
+ admaif16: endpoint {
+ remote-endpoint = <&xbar_admaif16>;
+ };
+ };
+
+ admaif17_port: port@11 {
+ reg = <0x11>;
+
+ admaif17: endpoint {
+ remote-endpoint = <&xbar_admaif17>;
+ };
+ };
+
+ admaif18_port: port@12 {
+ reg = <0x12>;
+
+ admaif18: endpoint {
+ remote-endpoint = <&xbar_admaif18>;
+ };
+ };
+
+ admaif19_port: port@13 {
+ reg = <0x13>;
+
+ admaif19: endpoint {
+ remote-endpoint = <&xbar_admaif19>;
+ };
+ };
+ };
};
tegra_asrc: asrc@2910000 {
@@ -551,7 +1591,1045 @@
"nvidia,tegra186-asrc";
reg = <0x0 0x2910000 0x0 0x2000>;
sound-name-prefix = "ASRC1";
- status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0x0>;
+
+ asrc_in1_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_in1_ep>;
+ };
+ };
+
+ port@1 {
+ reg = <0x1>;
+
+ asrc_in2_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_in2_ep>;
+ };
+ };
+
+ port@2 {
+ reg = <0x2>;
+
+ asrc_in3_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_in3_ep>;
+ };
+ };
+
+ port@3 {
+ reg = <0x3>;
+
+ asrc_in4_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_in4_ep>;
+ };
+ };
+
+ port@4 {
+ reg = <0x4>;
+
+ asrc_in5_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_in5_ep>;
+ };
+ };
+
+ port@5 {
+ reg = <0x5>;
+
+ asrc_in6_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_in6_ep>;
+ };
+ };
+
+ port@6 {
+ reg = <0x6>;
+
+ asrc_in7_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_in7_ep>;
+ };
+ };
+
+ asrc_out1_port: port@7 {
+ reg = <0x7>;
+
+ asrc_out1_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_out1_ep>;
+ };
+ };
+
+ asrc_out2_port: port@8 {
+ reg = <0x8>;
+
+ asrc_out2_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_out2_ep>;
+ };
+ };
+
+ asrc_out3_port: port@9 {
+ reg = <0x9>;
+
+ asrc_out3_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_out3_ep>;
+ };
+ };
+
+ asrc_out4_port: port@a {
+ reg = <0xa>;
+
+ asrc_out4_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_out4_ep>;
+ };
+ };
+
+ asrc_out5_port: port@b {
+ reg = <0xb>;
+
+ asrc_out5_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_out5_ep>;
+ };
+ };
+
+ asrc_out6_port: port@c {
+ reg = <0xc>;
+
+ asrc_out6_ep: endpoint {
+ remote-endpoint =
+ <&xbar_asrc_out6_ep>;
+ };
+ };
+ };
+ };
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0x0>;
+
+ xbar_admaif0: endpoint {
+ remote-endpoint = <&admaif0>;
+ };
+ };
+
+ port@1 {
+ reg = <0x1>;
+
+ xbar_admaif1: endpoint {
+ remote-endpoint = <&admaif1>;
+ };
+ };
+
+ port@2 {
+ reg = <0x2>;
+
+ xbar_admaif2: endpoint {
+ remote-endpoint = <&admaif2>;
+ };
+ };
+
+ port@3 {
+ reg = <0x3>;
+
+ xbar_admaif3: endpoint {
+ remote-endpoint = <&admaif3>;
+ };
+ };
+
+ port@4 {
+ reg = <0x4>;
+
+ xbar_admaif4: endpoint {
+ remote-endpoint = <&admaif4>;
+ };
+ };
+
+ port@5 {
+ reg = <0x5>;
+
+ xbar_admaif5: endpoint {
+ remote-endpoint = <&admaif5>;
+ };
+ };
+
+ port@6 {
+ reg = <0x6>;
+
+ xbar_admaif6: endpoint {
+ remote-endpoint = <&admaif6>;
+ };
+ };
+
+ port@7 {
+ reg = <0x7>;
+
+ xbar_admaif7: endpoint {
+ remote-endpoint = <&admaif7>;
+ };
+ };
+
+ port@8 {
+ reg = <0x8>;
+
+ xbar_admaif8: endpoint {
+ remote-endpoint = <&admaif8>;
+ };
+ };
+
+ port@9 {
+ reg = <0x9>;
+
+ xbar_admaif9: endpoint {
+ remote-endpoint = <&admaif9>;
+ };
+ };
+
+ port@a {
+ reg = <0xa>;
+
+ xbar_admaif10: endpoint {
+ remote-endpoint = <&admaif10>;
+ };
+ };
+
+ port@b {
+ reg = <0xb>;
+
+ xbar_admaif11: endpoint {
+ remote-endpoint = <&admaif11>;
+ };
+ };
+
+ port@c {
+ reg = <0xc>;
+
+ xbar_admaif12: endpoint {
+ remote-endpoint = <&admaif12>;
+ };
+ };
+
+ port@d {
+ reg = <0xd>;
+
+ xbar_admaif13: endpoint {
+ remote-endpoint = <&admaif13>;
+ };
+ };
+
+ port@e {
+ reg = <0xe>;
+
+ xbar_admaif14: endpoint {
+ remote-endpoint = <&admaif14>;
+ };
+ };
+
+ port@f {
+ reg = <0xf>;
+
+ xbar_admaif15: endpoint {
+ remote-endpoint = <&admaif15>;
+ };
+ };
+
+ port@10 {
+ reg = <0x10>;
+
+ xbar_admaif16: endpoint {
+ remote-endpoint = <&admaif16>;
+ };
+ };
+
+ port@11 {
+ reg = <0x11>;
+
+ xbar_admaif17: endpoint {
+ remote-endpoint = <&admaif17>;
+ };
+ };
+
+ port@12 {
+ reg = <0x12>;
+
+ xbar_admaif18: endpoint {
+ remote-endpoint = <&admaif18>;
+ };
+ };
+
+ port@13 {
+ reg = <0x13>;
+
+ xbar_admaif19: endpoint {
+ remote-endpoint = <&admaif19>;
+ };
+ };
+
+ xbar_i2s1_port: port@14 {
+ reg = <0x14>;
+
+ xbar_i2s1: endpoint {
+ remote-endpoint = <&i2s1_cif>;
+ };
+ };
+
+ xbar_i2s2_port: port@15 {
+ reg = <0x15>;
+
+ xbar_i2s2: endpoint {
+ remote-endpoint = <&i2s2_cif>;
+ };
+ };
+
+ xbar_i2s3_port: port@16 {
+ reg = <0x16>;
+
+ xbar_i2s3: endpoint {
+ remote-endpoint = <&i2s3_cif>;
+ };
+ };
+
+ xbar_i2s4_port: port@17 {
+ reg = <0x17>;
+
+ xbar_i2s4: endpoint {
+ remote-endpoint = <&i2s4_cif>;
+ };
+ };
+
+ xbar_i2s5_port: port@18 {
+ reg = <0x18>;
+
+ xbar_i2s5: endpoint {
+ remote-endpoint = <&i2s5_cif>;
+ };
+ };
+
+ xbar_i2s6_port: port@19 {
+ reg = <0x19>;
+
+ xbar_i2s6: endpoint {
+ remote-endpoint = <&i2s6_cif>;
+ };
+ };
+
+ xbar_dmic1_port: port@1a {
+ reg = <0x1a>;
+
+ xbar_dmic1: endpoint {
+ remote-endpoint = <&dmic1_cif>;
+ };
+ };
+
+ xbar_dmic2_port: port@1b {
+ reg = <0x1b>;
+
+ xbar_dmic2: endpoint {
+ remote-endpoint = <&dmic2_cif>;
+ };
+ };
+
+ xbar_dmic3_port: port@1c {
+ reg = <0x1c>;
+
+ xbar_dmic3: endpoint {
+ remote-endpoint = <&dmic3_cif>;
+ };
+ };
+
+ xbar_dmic4_port: port@1d {
+ reg = <0x1d>;
+
+ xbar_dmic4: endpoint {
+ remote-endpoint = <&dmic4_cif>;
+ };
+ };
+
+ xbar_dspk1_port: port@1e {
+ reg = <0x1e>;
+
+ xbar_dspk1: endpoint {
+ remote-endpoint = <&dspk1_cif>;
+ };
+ };
+
+ xbar_dspk2_port: port@1f {
+ reg = <0x1f>;
+
+ xbar_dspk2: endpoint {
+ remote-endpoint = <&dspk2_cif>;
+ };
+ };
+
+ xbar_sfc1_in_port: port@20 {
+ reg = <0x20>;
+
+ xbar_sfc1_in: endpoint {
+ remote-endpoint = <&sfc1_cif_in>;
+ };
+ };
+
+ port@21 {
+ reg = <0x21>;
+
+ xbar_sfc1_out: endpoint {
+ remote-endpoint = <&sfc1_cif_out>;
+ };
+ };
+
+ xbar_sfc2_in_port: port@22 {
+ reg = <0x22>;
+
+ xbar_sfc2_in: endpoint {
+ remote-endpoint = <&sfc2_cif_in>;
+ };
+ };
+
+ port@23 {
+ reg = <0x23>;
+
+ xbar_sfc2_out: endpoint {
+ remote-endpoint = <&sfc2_cif_out>;
+ };
+ };
+
+ xbar_sfc3_in_port: port@24 {
+ reg = <0x24>;
+
+ xbar_sfc3_in: endpoint {
+ remote-endpoint = <&sfc3_cif_in>;
+ };
+ };
+
+ port@25 {
+ reg = <0x25>;
+
+ xbar_sfc3_out: endpoint {
+ remote-endpoint = <&sfc3_cif_out>;
+ };
+ };
+
+ xbar_sfc4_in_port: port@26 {
+ reg = <0x26>;
+
+ xbar_sfc4_in: endpoint {
+ remote-endpoint = <&sfc4_cif_in>;
+ };
+ };
+
+ port@27 {
+ reg = <0x27>;
+
+ xbar_sfc4_out: endpoint {
+ remote-endpoint = <&sfc4_cif_out>;
+ };
+ };
+
+ xbar_mvc1_in_port: port@28 {
+ reg = <0x28>;
+
+ xbar_mvc1_in: endpoint {
+ remote-endpoint = <&mvc1_cif_in>;
+ };
+ };
+
+ port@29 {
+ reg = <0x29>;
+
+ xbar_mvc1_out: endpoint {
+ remote-endpoint = <&mvc1_cif_out>;
+ };
+ };
+
+ xbar_mvc2_in_port: port@2a {
+ reg = <0x2a>;
+
+ xbar_mvc2_in: endpoint {
+ remote-endpoint = <&mvc2_cif_in>;
+ };
+ };
+
+ port@2b {
+ reg = <0x2b>;
+
+ xbar_mvc2_out: endpoint {
+ remote-endpoint = <&mvc2_cif_out>;
+ };
+ };
+
+ xbar_amx1_in1_port: port@2c {
+ reg = <0x2c>;
+
+ xbar_amx1_in1: endpoint {
+ remote-endpoint = <&amx1_in1>;
+ };
+ };
+
+ xbar_amx1_in2_port: port@2d {
+ reg = <0x2d>;
+
+ xbar_amx1_in2: endpoint {
+ remote-endpoint = <&amx1_in2>;
+ };
+ };
+
+ xbar_amx1_in3_port: port@2e {
+ reg = <0x2e>;
+
+ xbar_amx1_in3: endpoint {
+ remote-endpoint = <&amx1_in3>;
+ };
+ };
+
+ xbar_amx1_in4_port: port@2f {
+ reg = <0x2f>;
+
+ xbar_amx1_in4: endpoint {
+ remote-endpoint = <&amx1_in4>;
+ };
+ };
+
+ port@30 {
+ reg = <0x30>;
+
+ xbar_amx1_out: endpoint {
+ remote-endpoint = <&amx1_out>;
+ };
+ };
+
+ xbar_amx2_in1_port: port@31 {
+ reg = <0x31>;
+
+ xbar_amx2_in1: endpoint {
+ remote-endpoint = <&amx2_in1>;
+ };
+ };
+
+ xbar_amx2_in2_port: port@32 {
+ reg = <0x32>;
+
+ xbar_amx2_in2: endpoint {
+ remote-endpoint = <&amx2_in2>;
+ };
+ };
+
+ xbar_amx2_in3_port: port@33 {
+ reg = <0x33>;
+
+ xbar_amx2_in3: endpoint {
+ remote-endpoint = <&amx2_in3>;
+ };
+ };
+
+ xbar_amx2_in4_port: port@34 {
+ reg = <0x34>;
+
+ xbar_amx2_in4: endpoint {
+ remote-endpoint = <&amx2_in4>;
+ };
+ };
+
+ port@35 {
+ reg = <0x35>;
+
+ xbar_amx2_out: endpoint {
+ remote-endpoint = <&amx2_out>;
+ };
+ };
+
+ xbar_amx3_in1_port: port@36 {
+ reg = <0x36>;
+
+ xbar_amx3_in1: endpoint {
+ remote-endpoint = <&amx3_in1>;
+ };
+ };
+
+ xbar_amx3_in2_port: port@37 {
+ reg = <0x37>;
+
+ xbar_amx3_in2: endpoint {
+ remote-endpoint = <&amx3_in2>;
+ };
+ };
+
+ xbar_amx3_in3_port: port@38 {
+ reg = <0x38>;
+
+ xbar_amx3_in3: endpoint {
+ remote-endpoint = <&amx3_in3>;
+ };
+ };
+
+ xbar_amx3_in4_port: port@39 {
+ reg = <0x39>;
+
+ xbar_amx3_in4: endpoint {
+ remote-endpoint = <&amx3_in4>;
+ };
+ };
+
+ port@3a {
+ reg = <0x3a>;
+
+ xbar_amx3_out: endpoint {
+ remote-endpoint = <&amx3_out>;
+ };
+ };
+
+ xbar_amx4_in1_port: port@3b {
+ reg = <0x3b>;
+
+ xbar_amx4_in1: endpoint {
+ remote-endpoint = <&amx4_in1>;
+ };
+ };
+
+ xbar_amx4_in2_port: port@3c {
+ reg = <0x3c>;
+
+ xbar_amx4_in2: endpoint {
+ remote-endpoint = <&amx4_in2>;
+ };
+ };
+
+ xbar_amx4_in3_port: port@3d {
+ reg = <0x3d>;
+
+ xbar_amx4_in3: endpoint {
+ remote-endpoint = <&amx4_in3>;
+ };
+ };
+
+ xbar_amx4_in4_port: port@3e {
+ reg = <0x3e>;
+
+ xbar_amx4_in4: endpoint {
+ remote-endpoint = <&amx4_in4>;
+ };
+ };
+
+ port@3f {
+ reg = <0x3f>;
+
+ xbar_amx4_out: endpoint {
+ remote-endpoint = <&amx4_out>;
+ };
+ };
+
+ xbar_adx1_in_port: port@40 {
+ reg = <0x40>;
+
+ xbar_adx1_in: endpoint {
+ remote-endpoint = <&adx1_in>;
+ };
+ };
+
+ port@41 {
+ reg = <0x41>;
+
+ xbar_adx1_out1: endpoint {
+ remote-endpoint = <&adx1_out1>;
+ };
+ };
+
+ port@42 {
+ reg = <0x42>;
+
+ xbar_adx1_out2: endpoint {
+ remote-endpoint = <&adx1_out2>;
+ };
+ };
+
+ port@43 {
+ reg = <0x43>;
+
+ xbar_adx1_out3: endpoint {
+ remote-endpoint = <&adx1_out3>;
+ };
+ };
+
+ port@44 {
+ reg = <0x44>;
+
+ xbar_adx1_out4: endpoint {
+ remote-endpoint = <&adx1_out4>;
+ };
+ };
+
+ xbar_adx2_in_port: port@45 {
+ reg = <0x45>;
+
+ xbar_adx2_in: endpoint {
+ remote-endpoint = <&adx2_in>;
+ };
+ };
+
+ port@46 {
+ reg = <0x46>;
+
+ xbar_adx2_out1: endpoint {
+ remote-endpoint = <&adx2_out1>;
+ };
+ };
+
+ port@47 {
+ reg = <0x47>;
+
+ xbar_adx2_out2: endpoint {
+ remote-endpoint = <&adx2_out2>;
+ };
+ };
+
+ port@48 {
+ reg = <0x48>;
+
+ xbar_adx2_out3: endpoint {
+ remote-endpoint = <&adx2_out3>;
+ };
+ };
+
+ port@49 {
+ reg = <0x49>;
+
+ xbar_adx2_out4: endpoint {
+ remote-endpoint = <&adx2_out4>;
+ };
+ };
+
+ xbar_adx3_in_port: port@4a {
+ reg = <0x4a>;
+
+ xbar_adx3_in: endpoint {
+ remote-endpoint = <&adx3_in>;
+ };
+ };
+
+ port@4b {
+ reg = <0x4b>;
+
+ xbar_adx3_out1: endpoint {
+ remote-endpoint = <&adx3_out1>;
+ };
+ };
+
+ port@4c {
+ reg = <0x4c>;
+
+ xbar_adx3_out2: endpoint {
+ remote-endpoint = <&adx3_out2>;
+ };
+ };
+
+ port@4d {
+ reg = <0x4d>;
+
+ xbar_adx3_out3: endpoint {
+ remote-endpoint = <&adx3_out3>;
+ };
+ };
+
+ port@4e {
+ reg = <0x4e>;
+
+ xbar_adx3_out4: endpoint {
+ remote-endpoint = <&adx3_out4>;
+ };
+ };
+
+ xbar_adx4_in_port: port@4f {
+ reg = <0x4f>;
+
+ xbar_adx4_in: endpoint {
+ remote-endpoint = <&adx4_in>;
+ };
+ };
+
+ port@50 {
+ reg = <0x50>;
+
+ xbar_adx4_out1: endpoint {
+ remote-endpoint = <&adx4_out1>;
+ };
+ };
+
+ port@51 {
+ reg = <0x51>;
+
+ xbar_adx4_out2: endpoint {
+ remote-endpoint = <&adx4_out2>;
+ };
+ };
+
+ port@52 {
+ reg = <0x52>;
+
+ xbar_adx4_out3: endpoint {
+ remote-endpoint = <&adx4_out3>;
+ };
+ };
+
+ port@53 {
+ reg = <0x53>;
+
+ xbar_adx4_out4: endpoint {
+ remote-endpoint = <&adx4_out4>;
+ };
+ };
+
+ xbar_mix_in1_port: port@54 {
+ reg = <0x54>;
+
+ xbar_mix_in1: endpoint {
+ remote-endpoint = <&mix_in1>;
+ };
+ };
+
+ xbar_mix_in2_port: port@55 {
+ reg = <0x55>;
+
+ xbar_mix_in2: endpoint {
+ remote-endpoint = <&mix_in2>;
+ };
+ };
+
+ xbar_mix_in3_port: port@56 {
+ reg = <0x56>;
+
+ xbar_mix_in3: endpoint {
+ remote-endpoint = <&mix_in3>;
+ };
+ };
+
+ xbar_mix_in4_port: port@57 {
+ reg = <0x57>;
+
+ xbar_mix_in4: endpoint {
+ remote-endpoint = <&mix_in4>;
+ };
+ };
+
+ xbar_mix_in5_port: port@58 {
+ reg = <0x58>;
+
+ xbar_mix_in5: endpoint {
+ remote-endpoint = <&mix_in5>;
+ };
+ };
+
+ xbar_mix_in6_port: port@59 {
+ reg = <0x59>;
+
+ xbar_mix_in6: endpoint {
+ remote-endpoint = <&mix_in6>;
+ };
+ };
+
+ xbar_mix_in7_port: port@5a {
+ reg = <0x5a>;
+
+ xbar_mix_in7: endpoint {
+ remote-endpoint = <&mix_in7>;
+ };
+ };
+
+ xbar_mix_in8_port: port@5b {
+ reg = <0x5b>;
+
+ xbar_mix_in8: endpoint {
+ remote-endpoint = <&mix_in8>;
+ };
+ };
+
+ xbar_mix_in9_port: port@5c {
+ reg = <0x5c>;
+
+ xbar_mix_in9: endpoint {
+ remote-endpoint = <&mix_in9>;
+ };
+ };
+
+ xbar_mix_in10_port: port@5d {
+ reg = <0x5d>;
+
+ xbar_mix_in10: endpoint {
+ remote-endpoint = <&mix_in10>;
+ };
+ };
+
+ port@5e {
+ reg = <0x5e>;
+
+ xbar_mix_out1: endpoint {
+ remote-endpoint = <&mix_out1>;
+ };
+ };
+
+ port@5f {
+ reg = <0x5f>;
+
+ xbar_mix_out2: endpoint {
+ remote-endpoint = <&mix_out2>;
+ };
+ };
+
+ port@60 {
+ reg = <0x60>;
+
+ xbar_mix_out3: endpoint {
+ remote-endpoint = <&mix_out3>;
+ };
+ };
+
+ port@61 {
+ reg = <0x61>;
+
+ xbar_mix_out4: endpoint {
+ remote-endpoint = <&mix_out4>;
+ };
+ };
+
+ port@62 {
+ reg = <0x62>;
+
+ xbar_mix_out5: endpoint {
+ remote-endpoint = <&mix_out5>;
+ };
+ };
+
+ xbar_asrc_in1_port: port@63 {
+ reg = <0x63>;
+
+ xbar_asrc_in1_ep: endpoint {
+ remote-endpoint = <&asrc_in1_ep>;
+ };
+ };
+
+ port@64 {
+ reg = <0x64>;
+
+ xbar_asrc_out1_ep: endpoint {
+ remote-endpoint = <&asrc_out1_ep>;
+ };
+ };
+
+ xbar_asrc_in2_port: port@65 {
+ reg = <0x65>;
+
+ xbar_asrc_in2_ep: endpoint {
+ remote-endpoint = <&asrc_in2_ep>;
+ };
+ };
+
+ port@66 {
+ reg = <0x66>;
+
+ xbar_asrc_out2_ep: endpoint {
+ remote-endpoint = <&asrc_out2_ep>;
+ };
+ };
+
+ xbar_asrc_in3_port: port@67 {
+ reg = <0x67>;
+
+ xbar_asrc_in3_ep: endpoint {
+ remote-endpoint = <&asrc_in3_ep>;
+ };
+ };
+
+ port@68 {
+ reg = <0x68>;
+
+ xbar_asrc_out3_ep: endpoint {
+ remote-endpoint = <&asrc_out3_ep>;
+ };
+ };
+
+ xbar_asrc_in4_port: port@69 {
+ reg = <0x69>;
+
+ xbar_asrc_in4_ep: endpoint {
+ remote-endpoint = <&asrc_in4_ep>;
+ };
+ };
+
+ port@6a {
+ reg = <0x6a>;
+
+ xbar_asrc_out4_ep: endpoint {
+ remote-endpoint = <&asrc_out4_ep>;
+ };
+ };
+
+ xbar_asrc_in5_port: port@6b {
+ reg = <0x6b>;
+
+ xbar_asrc_in5_ep: endpoint {
+ remote-endpoint = <&asrc_in5_ep>;
+ };
+ };
+
+ port@6c {
+ reg = <0x6c>;
+
+ xbar_asrc_out5_ep: endpoint {
+ remote-endpoint = <&asrc_out5_ep>;
+ };
+ };
+
+ xbar_asrc_in6_port: port@6d {
+ reg = <0x6d>;
+
+ xbar_asrc_in6_ep: endpoint {
+ remote-endpoint = <&asrc_in6_ep>;
+ };
+ };
+
+ port@6e {
+ reg = <0x6e>;
+
+ xbar_asrc_out6_ep: endpoint {
+ remote-endpoint = <&asrc_out6_ep>;
+ };
+ };
+
+ xbar_asrc_in7_port: port@6f {
+ reg = <0x6f>;
+
+ xbar_asrc_in7_ep: endpoint {
+ remote-endpoint = <&asrc_in7_ep>;
+ };
+ };
+
+ xbar_ope1_in_port: port@70 {
+ reg = <0x70>;
+
+ xbar_ope1_in_ep: endpoint {
+ remote-endpoint = <&ope1_cif_in_ep>;
+ };
+ };
+
+ port@71 {
+ reg = <0x71>;
+
+ xbar_ope1_out_ep: endpoint {
+ remote-endpoint = <&ope1_cif_out_ep>;
+ };
+ };
};
};
@@ -1459,8 +3537,16 @@
<&mc TEGRA234_MEMORY_CLIENT_MGBEAWR &emc>;
interconnect-names = "dma-mem", "write";
iommus = <&smmu_niso0 TEGRA234_SID_MGBE>;
- power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEA>;
+ power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEB>;
status = "disabled";
+
+ snps,axi-config = <&mgbe0_axi_setup>;
+
+ mgbe0_axi_setup: stmmac-axi-config {
+ snps,blen = <256 128 64 32>;
+ snps,rd_osr_lmt = <63>;
+ snps,wr_osr_lmt = <63>;
+ };
};
ethernet@6900000 {
@@ -1493,8 +3579,16 @@
<&mc TEGRA234_MEMORY_CLIENT_MGBEBWR &emc>;
interconnect-names = "dma-mem", "write";
iommus = <&smmu_niso0 TEGRA234_SID_MGBE_VF1>;
- power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEB>;
+ power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEC>;
status = "disabled";
+
+ snps,axi-config = <&mgbe1_axi_setup>;
+
+ mgbe1_axi_setup: stmmac-axi-config {
+ snps,blen = <256 128 64 32>;
+ snps,rd_osr_lmt = <63>;
+ snps,wr_osr_lmt = <63>;
+ };
};
ethernet@6a00000 {
@@ -1527,8 +3621,16 @@
<&mc TEGRA234_MEMORY_CLIENT_MGBECWR &emc>;
interconnect-names = "dma-mem", "write";
iommus = <&smmu_niso0 TEGRA234_SID_MGBE_VF2>;
- power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEC>;
+ power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBED>;
status = "disabled";
+
+ snps,axi-config = <&mgbe2_axi_setup>;
+
+ mgbe2_axi_setup: stmmac-axi-config {
+ snps,blen = <256 128 64 32>;
+ snps,rd_osr_lmt = <63>;
+ snps,wr_osr_lmt = <63>;
+ };
};
ethernet@6b00000 {
diff --git a/arch/arm64/boot/dts/qcom/Makefile b/arch/arm64/boot/dts/qcom/Makefile
index 39889d5f8e12..7d40ec5e7d21 100644
--- a/arch/arm64/boot/dts/qcom/Makefile
+++ b/arch/arm64/boot/dts/qcom/Makefile
@@ -23,6 +23,7 @@ dtb-$(CONFIG_ARCH_QCOM) += ipq9574-rdp433.dtb
dtb-$(CONFIG_ARCH_QCOM) += ipq9574-rdp449.dtb
dtb-$(CONFIG_ARCH_QCOM) += ipq9574-rdp453.dtb
dtb-$(CONFIG_ARCH_QCOM) += ipq9574-rdp454.dtb
+dtb-$(CONFIG_ARCH_QCOM) += msm8216-samsung-fortuna3g.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-acer-a1-724.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-alcatel-idol347.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-asus-z00l.dtb
@@ -35,11 +36,14 @@ dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-a3u-eur.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-a5u-eur.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-e5.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-e7.dtb
+dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-gprimeltecan.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-grandmax.dtb
+dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-grandprimelte.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-gt510.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-gt58.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-j5.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-j5x.dtb
+dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-rossa.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-samsung-serranove.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-thwc-uf896.dtb
dtb-$(CONFIG_ARCH_QCOM) += msm8916-thwc-ufi001c.dtb
@@ -210,6 +214,7 @@ dtb-$(CONFIG_ARCH_QCOM) += sm6125-sony-xperia-seine-pdx201.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm6125-xiaomi-laurel-sprout.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm6350-sony-xperia-lena-pdx213.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm6375-sony-xperia-murray-pdx225.dtb
+dtb-$(CONFIG_ARCH_QCOM) += sm7125-xiaomi-curtana.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm7125-xiaomi-joyeuse.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm7225-fairphone-fp4.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm8150-hdk.dtb
@@ -233,6 +238,7 @@ dtb-$(CONFIG_ARCH_QCOM) += sm8450-hdk.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm8450-qrd.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm8450-sony-xperia-nagara-pdx223.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm8450-sony-xperia-nagara-pdx224.dtb
+dtb-$(CONFIG_ARCH_QCOM) += sm8550-hdk.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm8550-mtp.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm8550-qrd.dtb
dtb-$(CONFIG_ARCH_QCOM) += sm8650-mtp.dtb
diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc-d3-camera-mezzanine.dts b/arch/arm64/boot/dts/qcom/apq8016-sbc-d3-camera-mezzanine.dts
index c08b4be5cc7e..f9cbf8c1d689 100644
--- a/arch/arm64/boot/dts/qcom/apq8016-sbc-d3-camera-mezzanine.dts
+++ b/arch/arm64/boot/dts/qcom/apq8016-sbc-d3-camera-mezzanine.dts
@@ -9,7 +9,7 @@
#include "apq8016-sbc.dts"
/ {
- camera_vdddo_1v8: camera-vdddo-1v8 {
+ camera_vdddo_1v8: regulator-camera-vdddo {
compatible = "regulator-fixed";
regulator-name = "camera_vdddo";
regulator-min-microvolt = <1800000>;
@@ -17,7 +17,7 @@
regulator-always-on;
};
- camera_vdda_2v8: camera-vdda-2v8 {
+ camera_vdda_2v8: regulator-camera-vdda {
compatible = "regulator-fixed";
regulator-name = "camera_vdda";
regulator-min-microvolt = <2800000>;
@@ -25,7 +25,7 @@
regulator-always-on;
};
- camera_vddd_1v5: camera-vddd-1v5 {
+ camera_vddd_1v5: regulator-camera-vddd {
compatible = "regulator-fixed";
regulator-name = "camera_vddd";
regulator-min-microvolt = <1500000>;
@@ -53,7 +53,7 @@
};
&cci_i2c0 {
- camera_rear@3b {
+ camera@3b {
compatible = "ovti,ov5640";
reg = <0x3b>;
diff --git a/arch/arm64/boot/dts/qcom/ipq5332.dtsi b/arch/arm64/boot/dts/qcom/ipq5332.dtsi
index 42e2e48b2bc3..770d9c2fb456 100644
--- a/arch/arm64/boot/dts/qcom/ipq5332.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq5332.dtsi
@@ -320,8 +320,12 @@
compatible = "qcom,ipq5332-dwc3", "qcom,dwc3";
reg = <0x08af8800 0x400>;
- interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hs_phy_irq";
+ interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 53 IRQ_TYPE_EDGE_BOTH>,
+ <GIC_SPI 52 IRQ_TYPE_EDGE_BOTH>;
+ interrupt-names = "pwr_event",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq";
clocks = <&gcc GCC_USB0_MASTER_CLK>,
<&gcc GCC_SNOC_USB_CLK>,
diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
index 5e1277fea725..4e29adea570a 100644
--- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
@@ -9,6 +9,7 @@
#include <dt-bindings/clock/qcom,gcc-ipq6018.h>
#include <dt-bindings/reset/qcom,gcc-ipq6018.h>
#include <dt-bindings/clock/qcom,apss-ipq.h>
+#include <dt-bindings/thermal/thermal.h>
/ {
#address-cells = <2>;
@@ -43,6 +44,7 @@
clock-names = "cpu";
operating-points-v2 = <&cpu_opp_table>;
cpu-supply = <&ipq6018_s2>;
+ #cooling-cells = <2>;
};
CPU1: cpu@1 {
@@ -55,6 +57,7 @@
clock-names = "cpu";
operating-points-v2 = <&cpu_opp_table>;
cpu-supply = <&ipq6018_s2>;
+ #cooling-cells = <2>;
};
CPU2: cpu@2 {
@@ -67,6 +70,7 @@
clock-names = "cpu";
operating-points-v2 = <&cpu_opp_table>;
cpu-supply = <&ipq6018_s2>;
+ #cooling-cells = <2>;
};
CPU3: cpu@3 {
@@ -79,6 +83,7 @@
clock-names = "cpu";
operating-points-v2 = <&cpu_opp_table>;
cpu-supply = <&ipq6018_s2>;
+ #cooling-cells = <2>;
};
L2_0: l2-cache {
@@ -330,6 +335,16 @@
clock-names = "core";
};
+ tsens: thermal-sensor@4a9000 {
+ compatible = "qcom,ipq6018-tsens", "qcom,ipq8074-tsens";
+ reg = <0x0 0x004a9000 0x0 0x1000>,
+ <0x0 0x004a8000 0x0 0x1000>;
+ interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "combined";
+ #qcom,sensors = <16>;
+ #thermal-sensor-cells = <1>;
+ };
+
cryptobam: dma-controller@704000 {
compatible = "qcom,bam-v1.7.0";
reg = <0x0 0x00704000 0x0 0x20000>;
@@ -418,6 +433,12 @@
<&gcc GCC_USB1_MOCK_UTMI_CLK>;
assigned-clock-rates = <133330000>,
<24000000>;
+
+ interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "qusb2_phy";
+
resets = <&gcc GCC_USB1_BCR>;
status = "disabled";
@@ -578,6 +599,21 @@
status = "disabled";
};
+ blsp1_i2c6: i2c@78ba000 {
+ compatible = "qcom,i2c-qup-v2.2.1";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0 0x078ba000 0x0 0x600>;
+ interrupts = <GIC_SPI 300 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&gcc GCC_BLSP1_QUP6_I2C_APPS_CLK>,
+ <&gcc GCC_BLSP1_AHB_CLK>;
+ clock-names = "core", "iface";
+ clock-frequency = <400000>;
+ dmas = <&blsp_dma 22>, <&blsp_dma 23>;
+ dma-names = "tx", "rx";
+ status = "disabled";
+ };
+
qpic_bam: dma-controller@7984000 {
compatible = "qcom,bam-v1.7.0";
reg = <0x0 0x07984000 0x0 0x1a000>;
@@ -630,6 +666,13 @@
<133330000>,
<24000000>;
+ interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "qusb2_phy",
+ "ss_phy_irq";
+
resets = <&gcc GCC_USB0_BCR>;
status = "disabled";
@@ -830,10 +873,10 @@
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
- interrupt-map = <0 0 0 1 &intc 0 75 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
- <0 0 0 2 &intc 0 78 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
- <0 0 0 3 &intc 0 79 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
- <0 0 0 4 &intc 0 83 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
+ interrupt-map = <0 0 0 1 &intc 0 0 0 75 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
+ <0 0 0 2 &intc 0 0 0 78 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
+ <0 0 0 3 &intc 0 0 0 79 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
+ <0 0 0 4 &intc 0 0 0 83 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
clocks = <&gcc GCC_SYS_NOC_PCIE0_AXI_CLK>,
<&gcc GCC_PCIE0_AXI_M_CLK>,
@@ -867,6 +910,122 @@
};
};
+ thermal-zones {
+ nss-top-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsens 4>;
+
+ trips {
+ nss-top-critical {
+ temperature = <125000>;
+ hysteresis = <1000>;
+ type = "critical";
+ };
+ };
+ };
+
+ nss-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsens 5>;
+
+ trips {
+ nss-critical {
+ temperature = <125000>;
+ hysteresis = <1000>;
+ type = "critical";
+ };
+ };
+ };
+
+ wcss-phya0-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsens 7>;
+
+ trips {
+ wcss-phya0-critical {
+ temperature = <125000>;
+ hysteresis = <1000>;
+ type = "critical";
+ };
+ };
+ };
+
+ wcss-phya1-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsens 8>;
+
+ trips {
+ wcss-phya1-critical {
+ temperature = <125000>;
+ hysteresis = <1000>;
+ type = "critical";
+ };
+ };
+ };
+
+ cpu-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsens 13>;
+
+ trips {
+ cpu-critical {
+ temperature = <125000>;
+ hysteresis = <1000>;
+ type = "critical";
+ };
+
+ cpu_alert: cpu-passive {
+ temperature = <110000>;
+ hysteresis = <1000>;
+ type = "passive";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu_alert>;
+ cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&CPU1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&CPU2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ <&CPU3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ lpass-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsens 14>;
+
+ trips {
+ lpass-critical {
+ temperature = <125000>;
+ hysteresis = <1000>;
+ type = "critical";
+ };
+ };
+ };
+
+ ddrss-top-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsens 15>;
+
+ trips {
+ ddrss-top-critical {
+ temperature = <125000>;
+ hysteresis = <1000>;
+ type = "critical";
+ };
+ };
+ };
+ };
+
timer {
compatible = "arm,armv8-timer";
interrupts = <GIC_PPI 2 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi
index cf295bed3299..e5b89753aa5c 100644
--- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi
@@ -252,6 +252,8 @@
clocks = <&gcc GCC_MDIO_AHB_CLK>;
clock-names = "gcc_mdio_ahb_clk";
+ clock-frequency = <6250000>;
+
status = "disabled";
};
@@ -627,6 +629,13 @@
<133330000>,
<19200000>;
+ interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "qusb2_phy",
+ "ss_phy_irq";
+
power-domains = <&gcc USB0_GDSC>;
resets = <&gcc GCC_USB0_BCR>;
@@ -669,6 +678,13 @@
<133330000>,
<19200000>;
+ interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 225 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "qusb2_phy",
+ "ss_phy_irq";
+
power-domains = <&gcc USB1_GDSC>;
resets = <&gcc GCC_USB1_BCR>;
@@ -814,13 +830,13 @@
interrupt-names = "msi";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
- interrupt-map = <0 0 0 1 &intc 0 142
+ interrupt-map = <0 0 0 1 &intc 0 0 142
IRQ_TYPE_LEVEL_HIGH>, /* int_a */
- <0 0 0 2 &intc 0 143
+ <0 0 0 2 &intc 0 0 143
IRQ_TYPE_LEVEL_HIGH>, /* int_b */
- <0 0 0 3 &intc 0 144
+ <0 0 0 3 &intc 0 0 144
IRQ_TYPE_LEVEL_HIGH>, /* int_c */
- <0 0 0 4 &intc 0 145
+ <0 0 0 4 &intc 0 0 145
IRQ_TYPE_LEVEL_HIGH>; /* int_d */
clocks = <&gcc GCC_SYS_NOC_PCIE1_AXI_CLK>,
@@ -876,13 +892,13 @@
interrupt-names = "msi";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
- interrupt-map = <0 0 0 1 &intc 0 75
+ interrupt-map = <0 0 0 1 &intc 0 0 75
IRQ_TYPE_LEVEL_HIGH>, /* int_a */
- <0 0 0 2 &intc 0 78
+ <0 0 0 2 &intc 0 0 78
IRQ_TYPE_LEVEL_HIGH>, /* int_b */
- <0 0 0 3 &intc 0 79
+ <0 0 0 3 &intc 0 0 79
IRQ_TYPE_LEVEL_HIGH>, /* int_c */
- <0 0 0 4 &intc 0 83
+ <0 0 0 4 &intc 0 0 83
IRQ_TYPE_LEVEL_HIGH>; /* int_d */
clocks = <&gcc GCC_SYS_NOC_PCIE0_AXI_CLK>,
diff --git a/arch/arm64/boot/dts/qcom/ipq9574.dtsi b/arch/arm64/boot/dts/qcom/ipq9574.dtsi
index 5f83ee42a719..7f2e5cbf3bbb 100644
--- a/arch/arm64/boot/dts/qcom/ipq9574.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq9574.dtsi
@@ -321,8 +321,10 @@
sdhc_1: mmc@7804000 {
compatible = "qcom,ipq9574-sdhci", "qcom,sdhci-msm-v5";
- reg = <0x07804000 0x1000>, <0x07805000 0x1000>;
- reg-names = "hc", "cqhci";
+ reg = <0x07804000 0x1000>,
+ <0x07805000 0x1000>,
+ <0x07808000 0x2000>;
+ reg-names = "hc", "cqhci", "ice";
interrupts = <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
@@ -330,9 +332,11 @@
clocks = <&gcc GCC_SDCC1_AHB_CLK>,
<&gcc GCC_SDCC1_APPS_CLK>,
- <&xo_board_clk>;
- clock-names = "iface", "core", "xo";
+ <&xo_board_clk>,
+ <&gcc GCC_SDCC1_ICE_CORE_CLK>;
+ clock-names = "iface", "core", "xo", "ice";
non-removable;
+ supports-cqe;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/qcom/msm8216-samsung-fortuna3g.dts b/arch/arm64/boot/dts/qcom/msm8216-samsung-fortuna3g.dts
new file mode 100644
index 000000000000..366914be7d53
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/msm8216-samsung-fortuna3g.dts
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/dts-v1/;
+
+#include "msm8916-samsung-fortuna-common.dtsi"
+
+/ {
+ model = "Samsung Galaxy Grand Prime (SM-G530H)";
+ compatible = "samsung,fortuna3g", "qcom,msm8916";
+ chassis-type = "handset";
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-fortuna-common.dtsi b/arch/arm64/boot/dts/qcom/msm8916-samsung-fortuna-common.dtsi
new file mode 100644
index 000000000000..c2800ad2dd5b
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-fortuna-common.dtsi
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "msm8916-pm8916.dtsi"
+#include "msm8916-modem-qdsp6.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+ aliases {
+ mmc0 = &sdhc_1; /* eMMC */
+ mmc1 = &sdhc_2; /* SD card */
+ serial0 = &blsp_uart2;
+ };
+
+ chosen {
+ stdout-path = "serial0";
+ };
+
+ reserved-memory {
+ /* Additional memory used by Samsung firmware modifications */
+ tz-apps@85a00000 {
+ reg = <0x0 0x85a00000 0x0 0x600000>;
+ no-map;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ pinctrl-0 = <&gpio_keys_default>;
+ pinctrl-names = "default";
+
+ label = "GPIO Buttons";
+
+ button-volume-up {
+ label = "Volume Up";
+ gpios = <&tlmm 107 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_VOLUMEUP>;
+ };
+
+ button-home {
+ label = "Home";
+ gpios = <&tlmm 109 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_HOMEPAGE>;
+ };
+ };
+
+ haptic {
+ compatible = "regulator-haptic";
+ haptic-supply = <&reg_motor_vdd>;
+ min-microvolt = <3300000>;
+ max-microvolt = <3300000>;
+ };
+
+ reg_motor_vdd: regulator-motor-vdd {
+ compatible = "regulator-fixed";
+ regulator-name = "motor_vdd";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+ gpio = <&tlmm 72 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+
+ pinctrl-0 = <&motor_en_default>;
+ pinctrl-names = "default";
+ };
+};
+
+&blsp_i2c1 {
+ status = "okay";
+
+ muic: extcon@25 {
+ compatible = "siliconmitus,sm5502-muic";
+ reg = <0x25>;
+ interrupts-extended = <&tlmm 12 IRQ_TYPE_EDGE_FALLING>;
+ pinctrl-0 = <&muic_int_default>;
+ pinctrl-names = "default";
+ };
+};
+
+&blsp_i2c4 {
+ status = "okay";
+
+ fuel-gauge@35 {
+ compatible = "richtek,rt5033-battery";
+ reg = <0x35>;
+
+ interrupts-extended = <&tlmm 121 IRQ_TYPE_EDGE_FALLING>;
+
+ pinctrl-0 = <&fg_alert_default>;
+ pinctrl-names = "default";
+ };
+};
+
+&blsp_uart2 {
+ status = "okay";
+};
+
+&mpss_mem {
+ reg = <0x0 0x86800000 0x0 0x5000000>;
+};
+
+&pm8916_resin {
+ linux,code = <KEY_VOLUMEDOWN>;
+ status = "okay";
+};
+
+&pm8916_rpm_regulators {
+ pm8916_l17: l17 {
+ regulator-min-microvolt = <2850000>;
+ regulator-max-microvolt = <2850000>;
+ };
+};
+
+&sdhc_1 {
+ status = "okay";
+};
+
+&sdhc_2 {
+ pinctrl-0 = <&sdc2_default &sdc2_cd_default>;
+ pinctrl-1 = <&sdc2_sleep &sdc2_cd_default>;
+ pinctrl-names = "default", "sleep";
+
+ cd-gpios = <&tlmm 38 GPIO_ACTIVE_LOW>;
+
+ status = "okay";
+};
+
+&sound {
+ model = "msm8916-1mic";
+ audio-routing =
+ "AMIC1", "MIC BIAS External1",
+ "AMIC2", "MIC BIAS Internal2",
+ "AMIC3", "MIC BIAS External1";
+};
+
+&usb {
+ extcon = <&muic>, <&muic>;
+ status = "okay";
+};
+
+&usb_hs_phy {
+ extcon = <&muic>;
+};
+
+&venus {
+ status = "okay";
+};
+
+&venus_mem {
+ status = "okay";
+};
+
+&wcnss {
+ status = "okay";
+};
+
+&wcnss_iris {
+ compatible = "qcom,wcn3620";
+};
+
+&wcnss_mem {
+ status = "okay";
+};
+
+&tlmm {
+ fg_alert_default: fg-alert-default-state {
+ pins = "gpio121";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ gpio_keys_default: gpio-keys-default-state {
+ pins = "gpio107", "gpio109";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+
+ motor_en_default: motor-en-default-state {
+ pins = "gpio72";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ muic_int_default: muic-int-default-state {
+ pins = "gpio12";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ sdc2_cd_default: sdc2-cd-default-state {
+ pins = "gpio38";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-gprimeltecan.dts b/arch/arm64/boot/dts/qcom/msm8916-samsung-gprimeltecan.dts
new file mode 100644
index 000000000000..9d65fa58ba92
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-gprimeltecan.dts
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/dts-v1/;
+
+#include "msm8916-samsung-fortuna-common.dtsi"
+
+/ {
+ model = "Samsung Galaxy Grand Prime (SM-G530W)";
+ compatible = "samsung,gprimeltecan", "qcom,msm8916";
+ chassis-type = "handset";
+
+ reserved-memory {
+ /* Firmware for gprimeltecan needs more space */
+ /delete-node/ tz-apps@85a00000;
+
+ /* Additional memory used by Samsung firmware modifications */
+ tz-apps@85500000 {
+ reg = <0x0 0x85500000 0x0 0xb00000>;
+ no-map;
+ };
+ };
+};
+
+&mpss_mem {
+ /* Firmware for gprimeltecan needs more space */
+ reg = <0x0 0x86800000 0x0 0x5400000>;
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-grandprimelte.dts b/arch/arm64/boot/dts/qcom/msm8916-samsung-grandprimelte.dts
new file mode 100644
index 000000000000..a66ce4b13547
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-grandprimelte.dts
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/dts-v1/;
+
+#include "msm8916-samsung-fortuna-common.dtsi"
+
+/ {
+ model = "Samsung Galaxy Grand Prime (SM-G530FZ)";
+ compatible = "samsung,grandprimelte", "qcom,msm8916";
+ chassis-type = "handset";
+};
+
+&mpss_mem {
+ /* Firmware for grandprimelte needs more space */
+ reg = <0x0 0x86800000 0x0 0x5400000>;
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-rossa-common.dtsi b/arch/arm64/boot/dts/qcom/msm8916-samsung-rossa-common.dtsi
new file mode 100644
index 000000000000..42843771ae2a
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-rossa-common.dtsi
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "msm8916-samsung-fortuna-common.dtsi"
+
+/* SM5504 MUIC instead of SM5502 */
+/delete-node/ &muic;
+
+&blsp_i2c1 {
+ muic: extcon@14 {
+ compatible = "siliconmitus,sm5504-muic";
+ reg = <0x14>;
+ interrupts-extended = <&tlmm 12 IRQ_TYPE_EDGE_FALLING>;
+ pinctrl-0 = <&muic_int_default>;
+ pinctrl-names = "default";
+ };
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-rossa.dts b/arch/arm64/boot/dts/qcom/msm8916-samsung-rossa.dts
new file mode 100644
index 000000000000..ebaa13c6b016
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-rossa.dts
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/dts-v1/;
+
+#include "msm8916-samsung-rossa-common.dtsi"
+
+/ {
+ model = "Samsung Galaxy Core Prime LTE";
+ compatible = "samsung,rossa", "qcom,msm8916";
+ chassis-type = "handset";
+};
+
+&mpss_mem {
+ /* Firmware for rossa needs more space */
+ reg = <0x0 0x86800000 0x0 0x5800000>;
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index e423c57ddd41..cedff4166bfb 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -1785,6 +1785,8 @@
power-domains = <&gcc OXILI_GDSC>;
operating-points-v2 = <&gpu_opp_table>;
iommus = <&gpu_iommu 1>, <&gpu_iommu 2>;
+ #cooling-cells = <2>;
+
status = "disabled";
gpu_opp_table: opp-table {
@@ -2688,6 +2690,13 @@
thermal-sensors = <&tsens 2>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
gpu_alert0: trip-point0 {
temperature = <75000>;
diff --git a/arch/arm64/boot/dts/qcom/msm8939.dtsi b/arch/arm64/boot/dts/qcom/msm8939.dtsi
index 82d85ff61045..dd45975682b2 100644
--- a/arch/arm64/boot/dts/qcom/msm8939.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8939.dtsi
@@ -1427,6 +1427,8 @@
power-domains = <&gcc OXILI_GDSC>;
operating-points-v2 = <&opp_table>;
iommus = <&gpu_iommu 1>, <&gpu_iommu 2>;
+ #cooling-cells = <2>;
+
status = "disabled";
opp_table: opp-table {
@@ -2456,6 +2458,13 @@
thermal-sensors = <&tsens 3>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
gpu_alert0: trip-point0 {
temperature = <75000>;
@@ -2463,7 +2472,7 @@
type = "passive";
};
- gpu_crit: gpu_crit {
+ gpu_crit: gpu-crit {
temperature = <95000>;
hysteresis = <2000>;
type = "critical";
diff --git a/arch/arm64/boot/dts/qcom/msm8953.dtsi b/arch/arm64/boot/dts/qcom/msm8953.dtsi
index ad2f8cf9c966..f1011bb641c6 100644
--- a/arch/arm64/boot/dts/qcom/msm8953.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8953.dtsi
@@ -859,6 +859,8 @@
"vsync",
"core";
+ resets = <&gcc GCC_MDSS_BCR>;
+
#address-cells = <1>;
#size-cells = <1>;
ranges;
@@ -1044,6 +1046,125 @@
};
};
+ gpu: gpu@1c00000 {
+ compatible = "qcom,adreno-506.0", "qcom,adreno";
+ reg = <0x01c00000 0x40000>;
+ reg-names = "kgsl_3d0_reg_memory";
+ interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&gcc GCC_OXILI_GFX3D_CLK>,
+ <&gcc GCC_OXILI_AHB_CLK>,
+ <&gcc GCC_BIMC_GFX_CLK>,
+ <&gcc GCC_BIMC_GPU_CLK>,
+ <&gcc GCC_OXILI_TIMER_CLK>,
+ <&gcc GCC_OXILI_AON_CLK>;
+ clock-names = "core",
+ "iface",
+ "mem_iface",
+ "alt_mem_iface",
+ "rbbmtimer",
+ "alwayson";
+ power-domains = <&gcc OXILI_GX_GDSC>;
+
+ iommus = <&gpu_iommu 0>;
+ operating-points-v2 = <&gpu_opp_table>;
+
+ #cooling-cells = <2>;
+
+ status = "disabled";
+
+ zap-shader {
+ memory-region = <&zap_shader_region>;
+ };
+
+ gpu_opp_table: opp-table {
+ compatible = "operating-points-v2";
+
+ opp-19200000 {
+ opp-hz = /bits/ 64 <19200000>;
+ opp-supported-hw = <0xff>;
+ required-opps = <&rpmpd_opp_min_svs>;
+ };
+
+ opp-133300000 {
+ opp-hz = /bits/ 64 <133300000>;
+ opp-supported-hw = <0xff>;
+ required-opps = <&rpmpd_opp_min_svs>;
+ };
+
+ opp-216000000 {
+ opp-hz = /bits/ 64 <216000000>;
+ opp-supported-hw = <0xff>;
+ required-opps = <&rpmpd_opp_low_svs>;
+ };
+
+ opp-320000000 {
+ opp-hz = /bits/ 64 <320000000>;
+ opp-supported-hw = <0xff>;
+ required-opps = <&rpmpd_opp_svs>;
+ };
+
+ opp-400000000 {
+ opp-hz = /bits/ 64 <400000000>;
+ opp-supported-hw = <0xff>;
+ required-opps = <&rpmpd_opp_svs_plus>;
+ };
+
+ opp-510000000 {
+ opp-hz = /bits/ 64 <510000000>;
+ opp-supported-hw = <0xff>;
+ required-opps = <&rpmpd_opp_nom>;
+ };
+
+ opp-560000000 {
+ opp-hz = /bits/ 64 <560000000>;
+ opp-supported-hw = <0xff>;
+ required-opps = <&rpmpd_opp_nom_plus>;
+ };
+
+ /*
+ * This opp is only available on msm8953 and
+ * sdm632, the max for sdm450 is 600MHz.
+ */
+ opp-650000000 {
+ opp-hz = /bits/ 64 <650000000>;
+ opp-supported-hw = <0xff>;
+ required-opps = <&rpmpd_opp_turbo>;
+ };
+ };
+ };
+
+ gpu_iommu: iommu@1c48000 {
+ compatible = "qcom,msm8953-iommu", "qcom,msm-iommu-v2";
+ ranges = <0 0x01c48000 0x8000>;
+
+ clocks = <&gcc GCC_OXILI_AHB_CLK>,
+ <&gcc GCC_BIMC_GFX_CLK>;
+ clock-names = "iface", "bus";
+
+ power-domains = <&gcc OXILI_CX_GDSC>;
+
+ qcom,iommu-secure-id = <18>;
+
+ #address-cells = <1>;
+ #iommu-cells = <1>;
+ #size-cells = <1>;
+
+ /* gfx3d_user */
+ iommu-ctx@0 {
+ compatible = "qcom,msm-iommu-v2-ns";
+ reg = <0x0000 0x1000>;
+ interrupts = <GIC_SPI 225 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ /* gfx3d_secure */
+ iommu-ctx@2000 {
+ compatible = "qcom,msm-iommu-v2-sec";
+ reg = <0x2000 0x1000>;
+ interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
+
apps_iommu: iommu@1e20000 {
compatible = "qcom,msm8953-iommu", "qcom,msm-iommu-v1";
ranges = <0 0x01e20000 0x20000>;
@@ -1160,9 +1281,12 @@
#size-cells = <1>;
ranges;
- interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq";
+ interrupt-names = "pwr_event",
+ "qusb2_phy",
+ "ss_phy_irq";
clocks = <&gcc GCC_USB_PHY_CFG_AHB_CLK>,
<&gcc GCC_USB30_MASTER_CLK>,
@@ -2012,6 +2136,33 @@
};
};
};
+
+ gpu-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsens0 15>;
+
+ trips {
+ gpu_alert: trip-point0 {
+ temperature = <70000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ gpu_crit: crit {
+ temperature = <90000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&gpu_alert>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
};
timer {
diff --git a/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi b/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi
index cbc84459a5ae..10cd244dea4f 100644
--- a/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi
@@ -377,7 +377,7 @@
&blsp2_i2c1 {
status = "okay";
- sideinteraction: ad7147_captouch@2c {
+ sideinteraction: touch@2c {
compatible = "ad,ad7147_captouch";
reg = <0x2c>;
diff --git a/arch/arm64/boot/dts/qcom/msm8994-sony-xperia-kitakami.dtsi b/arch/arm64/boot/dts/qcom/msm8994-sony-xperia-kitakami.dtsi
index 9dbde79f26a2..0163d41f95f8 100644
--- a/arch/arm64/boot/dts/qcom/msm8994-sony-xperia-kitakami.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8994-sony-xperia-kitakami.dtsi
@@ -79,7 +79,7 @@
pmsg-size = <0x80000>;
};
- fb_region: fb_region@40000000 {
+ fb_region: fb@40000000 {
reg = <0 0x40000000 0 0x1000000>;
no-map;
};
diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi
index 8295bf1b219d..695e541832ad 100644
--- a/arch/arm64/boot/dts/qcom/msm8994.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi
@@ -233,7 +233,7 @@
#size-cells = <2>;
ranges;
- dfps_data_mem: dfps_data_mem@3400000 {
+ dfps_data_mem: dfps-data@3400000 {
reg = <0 0x03400000 0 0x1000>;
no-map;
};
@@ -243,7 +243,7 @@
no-map;
};
- smem_mem: smem_region@6a00000 {
+ smem_mem: smem@6a00000 {
reg = <0 0x06a00000 0 0x200000>;
no-map;
};
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 8d41ed261adf..1601e46549e7 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -457,25 +457,6 @@
};
};
- mpm: interrupt-controller {
- compatible = "qcom,mpm";
- qcom,rpm-msg-ram = <&apss_mpm>;
- interrupts = <GIC_SPI 171 IRQ_TYPE_EDGE_RISING>;
- mboxes = <&apcs_glb 1>;
- interrupt-controller;
- #interrupt-cells = <2>;
- #power-domain-cells = <0>;
- interrupt-parent = <&intc>;
- qcom,mpm-pin-count = <96>;
- qcom,mpm-pin-map = <2 184>, /* TSENS1 upper_lower_int */
- <52 243>, /* DWC3_PRI ss_phy_irq */
- <79 347>, /* DWC3_PRI hs_phy_irq */
- <80 352>, /* DWC3_SEC hs_phy_irq */
- <81 347>, /* QUSB2_PHY_PRI DP+DM */
- <82 352>, /* QUSB2_PHY_SEC DP+DM */
- <87 326>; /* SPMI */
- };
-
psci {
compatible = "arm,psci-1.0";
method = "smc";
@@ -765,15 +746,8 @@
};
rpm_msg_ram: sram@68000 {
- compatible = "qcom,rpm-msg-ram", "mmio-sram";
+ compatible = "qcom,rpm-msg-ram";
reg = <0x00068000 0x6000>;
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0x00068000 0x7000>;
-
- apss_mpm: sram@1b8 {
- reg = <0x1b8 0x48>;
- };
};
qfprom@74000 {
@@ -782,12 +756,12 @@
#address-cells = <1>;
#size-cells = <1>;
- qusb2p_hstx_trim: hstx_trim@24e {
+ qusb2p_hstx_trim: hstx-trim@24e {
reg = <0x24e 0x2>;
bits = <5 4>;
};
- qusb2s_hstx_trim: hstx_trim@24f {
+ qusb2s_hstx_trim: hstx-trim@24f {
reg = <0x24f 0x1>;
bits = <1 4>;
};
@@ -856,8 +830,8 @@
reg = <0x004ad000 0x1000>, /* TM */
<0x004ac000 0x1000>; /* SROT */
#qcom,sensors = <8>;
- interrupts-extended = <&mpm 2 IRQ_TYPE_LEVEL_HIGH>,
- <&intc GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "uplow", "critical";
#thermal-sensor-cells = <1>;
};
@@ -1363,7 +1337,6 @@
interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
gpio-controller;
gpio-ranges = <&tlmm 0 0 150>;
- wakeup-parent = <&mpm>;
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <2>;
@@ -1891,7 +1864,7 @@
<0x0400a000 0x002100>;
reg-names = "core", "chnls", "obsrvr", "intr", "cnfg";
interrupt-names = "periph_irq";
- interrupts-extended = <&mpm 87 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 326 IRQ_TYPE_LEVEL_HIGH>;
qcom,ee = <0>;
qcom,channel = <0>;
#address-cells = <2>;
@@ -2104,7 +2077,7 @@
<0 0>,
<0 0>,
<150000000 300000000>,
- <0 0>,
+ <75000000 150000000>,
<0 0>,
<0 0>,
<0 0>,
@@ -2123,8 +2096,8 @@
compatible = "qcom,msm8996-qmp-ufs-phy";
reg = <0x00627000 0x1000>;
- clocks = <&gcc GCC_UFS_CLKREF_CLK>;
- clock-names = "ref";
+ clocks = <&rpmcc RPM_SMD_LN_BB_CLK>, <&gcc GCC_UFS_CLKREF_CLK>;
+ clock-names = "ref", "qref";
resets = <&ufshc 0>;
reset-names = "ufsphy";
@@ -3052,8 +3025,8 @@
#size-cells = <1>;
ranges;
- interrupts-extended = <&mpm 79 IRQ_TYPE_LEVEL_HIGH>,
- <&mpm 52 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 243 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq";
clocks = <&gcc GCC_SYS_NOC_USB3_AXI_CLK>,
@@ -3408,8 +3381,12 @@
#size-cells = <1>;
ranges;
- interrupts = <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hs_phy_irq";
+ interrupts = <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "qusb2_phy",
+ "hs_phy_irq";
clocks = <&gcc GCC_PERIPH_NOC_USB20_AHB_CLK>,
<&gcc GCC_USB20_MASTER_CLK>,
diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi
index 2793cc22d381..4dfe2d09ac28 100644
--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi
@@ -1047,12 +1047,12 @@
compatible = "qcom,msm8998-qmp-ufs-phy";
reg = <0x01da7000 0x1000>;
- clock-names =
- "ref",
- "ref_aux";
- clocks =
- <&gcc GCC_UFS_CLKREF_CLK>,
- <&gcc GCC_UFS_PHY_AUX_CLK>;
+ clocks = <&rpmcc RPM_SMD_LN_BB_CLK1>,
+ <&gcc GCC_UFS_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_CLKREF_CLK>;
+ clock-names = "ref",
+ "ref_aux",
+ "qref";
reset-names = "ufsphy";
resets = <&ufshc 0>;
@@ -1072,6 +1072,11 @@
reg = <0x01f60000 0x20000>;
};
+ tcsr_regs_2: syscon@1fc0000 {
+ compatible = "qcom,msm8998-tcsr", "syscon";
+ reg = <0x01fc0000 0x26000>;
+ };
+
tlmm: pinctrl@3400000 {
compatible = "qcom,msm8998-pinctrl";
reg = <0x03400000 0xc00000>;
@@ -2132,9 +2137,12 @@
<&gcc GCC_USB30_MASTER_CLK>;
assigned-clock-rates = <19200000>, <120000000>;
- interrupts = <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts = <GIC_SPI 180 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 243 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq";
+ interrupt-names = "pwr_event",
+ "qusb2_phy",
+ "ss_phy_irq";
power-domains = <&gcc USB_30_GDSC>;
@@ -2174,6 +2182,8 @@
reset-names = "phy",
"phy_phy";
+ qcom,tcsr-reg = <&tcsr_regs_2 0xb244>;
+
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/qcom/pm2250.dtsi b/arch/arm64/boot/dts/qcom/pm4125.dtsi
index 5f1d15db5c99..cf8c822e80ce 100644
--- a/arch/arm64/boot/dts/qcom/pm2250.dtsi
+++ b/arch/arm64/boot/dts/qcom/pm4125.dtsi
@@ -19,7 +19,7 @@
compatible = "qcom,pm8916-pon";
reg = <0x800>;
- pm2250_pwrkey: pwrkey {
+ pm4125_pwrkey: pwrkey {
compatible = "qcom,pm8941-pwrkey";
interrupts-extended = <&spmi_bus 0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>;
linux,code = <KEY_POWER>;
@@ -27,7 +27,7 @@
bias-pull-up;
};
- pm2250_resin: resin {
+ pm4125_resin: resin {
compatible = "qcom,pm8941-resin";
interrupts-extended = <&spmi_bus 0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>;
debounce = <15625>;
@@ -36,6 +36,36 @@
};
};
+ pm4125_vbus: usb-vbus-regulator@1100 {
+ compatible = "qcom,pm4125-vbus-reg", "qcom,pm8150b-vbus-reg";
+ reg = <0x1100>;
+ status = "disabled";
+ };
+
+ pm4125_typec: typec@1500 {
+ compatible = "qcom,pm4125-typec", "qcom,pmi632-typec";
+ reg = <0x1500>;
+ interrupts = <0x0 0x15 0x00 IRQ_TYPE_EDGE_RISING>,
+ <0x0 0x15 0x01 IRQ_TYPE_EDGE_BOTH>,
+ <0x0 0x15 0x02 IRQ_TYPE_EDGE_RISING>,
+ <0x0 0x15 0x03 IRQ_TYPE_EDGE_BOTH>,
+ <0x0 0x15 0x04 IRQ_TYPE_EDGE_RISING>,
+ <0x0 0x15 0x05 IRQ_TYPE_EDGE_RISING>,
+ <0x0 0x15 0x06 IRQ_TYPE_EDGE_BOTH>,
+ <0x0 0x15 0x07 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "or-rid-detect-change",
+ "vpd-detect",
+ "cc-state-change",
+ "vconn-oc",
+ "vbus-change",
+ "attach-detach",
+ "legacy-cable-detect",
+ "try-snk-src-detect";
+ vdd-vbus-supply = <&pm4125_vbus>;
+
+ status = "disabled";
+ };
+
rtc@6000 {
compatible = "qcom,pm8941-rtc";
reg = <0x6000>, <0x6100>;
@@ -43,11 +73,11 @@
interrupts-extended = <&spmi_bus 0x0 0x61 0x1 IRQ_TYPE_EDGE_RISING>;
};
- pm2250_gpios: gpio@c000 {
+ pm4125_gpios: gpio@c000 {
compatible = "qcom,pm2250-gpio", "qcom,spmi-gpio";
reg = <0xc000>;
gpio-controller;
- gpio-ranges = <&pm2250_gpios 0 0 10>;
+ gpio-ranges = <&pm4125_gpios 0 0 10>;
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <2>;
diff --git a/arch/arm64/boot/dts/qcom/pmi632.dtsi b/arch/arm64/boot/dts/qcom/pmi632.dtsi
index 4eb79e0ce40a..94d53b1cf6c8 100644
--- a/arch/arm64/boot/dts/qcom/pmi632.dtsi
+++ b/arch/arm64/boot/dts/qcom/pmi632.dtsi
@@ -45,6 +45,36 @@
#address-cells = <1>;
#size-cells = <0>;
+ pmi632_vbus: usb-vbus-regulator@1100 {
+ compatible = "qcom,pmi632-vbus-reg", "qcom,pm8150b-vbus-reg";
+ reg = <0x1100>;
+ status = "disabled";
+ };
+
+ pmi632_typec: typec@1500 {
+ compatible = "qcom,pmi632-typec";
+ reg = <0x1500>;
+ interrupts = <0x2 0x15 0x00 IRQ_TYPE_EDGE_RISING>,
+ <0x2 0x15 0x01 IRQ_TYPE_EDGE_BOTH>,
+ <0x2 0x15 0x02 IRQ_TYPE_EDGE_RISING>,
+ <0x2 0x15 0x03 IRQ_TYPE_EDGE_BOTH>,
+ <0x2 0x15 0x04 IRQ_TYPE_EDGE_RISING>,
+ <0x2 0x15 0x05 IRQ_TYPE_EDGE_RISING>,
+ <0x2 0x15 0x06 IRQ_TYPE_EDGE_BOTH>,
+ <0x2 0x15 0x07 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "or-rid-detect-change",
+ "vpd-detect",
+ "cc-state-change",
+ "vconn-oc",
+ "vbus-change",
+ "attach-detach",
+ "legacy-cable-detect",
+ "try-snk-src-detect";
+ vdd-vbus-supply = <&pmi632_vbus>;
+
+ status = "disabled";
+ };
+
pmi632_temp: temp-alarm@2400 {
compatible = "qcom,spmi-temp-alarm";
reg = <0x2400>;
@@ -127,6 +157,11 @@
status = "disabled";
};
+ pmi632_pbs_client3: pbs@7400 {
+ compatible = "qcom,pmi632-pbs", "qcom,pbs";
+ reg = <0x7400>;
+ };
+
pmi632_sdam_7: nvram@b600 {
compatible = "qcom,spmi-sdam";
reg = <0xb600>;
@@ -155,6 +190,10 @@
pmi632_lpg: pwm {
compatible = "qcom,pmi632-lpg";
+ nvmem = <&pmi632_sdam_7>;
+ nvmem-names = "lpg_chan_sdam";
+ qcom,pbs = <&pmi632_pbs_client3>;
+
#address-cells = <1>;
#size-cells = <0>;
#pwm-cells = <2>;
diff --git a/arch/arm64/boot/dts/qcom/qcm2290.dtsi b/arch/arm64/boot/dts/qcom/qcm2290.dtsi
index 0911fb08ed63..89beac833d43 100644
--- a/arch/arm64/boot/dts/qcom/qcm2290.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcm2290.dtsi
@@ -442,6 +442,11 @@
#hwlock-cells = <1>;
};
+ tcsr_regs: syscon@3c0000 {
+ compatible = "qcom,qcm2290-tcsr", "syscon";
+ reg = <0x0 0x003c0000 0x0 0x40000>;
+ };
+
tlmm: pinctrl@500000 {
compatible = "qcom,qcm2290-tlmm";
reg = <0x0 0x00500000 0x0 0x300000>;
@@ -690,6 +695,8 @@
#phy-cells = <0>;
+ qcom,tcsr-reg = <&tcsr_regs 0xb244>;
+
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/qcom/qcm6490-fairphone-fp5.dts b/arch/arm64/boot/dts/qcom/qcm6490-fairphone-fp5.dts
index 176898c9dbbd..4ff9fc24e50e 100644
--- a/arch/arm64/boot/dts/qcom/qcm6490-fairphone-fp5.dts
+++ b/arch/arm64/boot/dts/qcom/qcm6490-fairphone-fp5.dts
@@ -71,6 +71,41 @@
};
};
+ pmic-glink {
+ compatible = "qcom,qcm6490-pmic-glink", "qcom,pmic-glink";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ connector@0 {
+ compatible = "usb-c-connector";
+ reg = <0>;
+ power-role = "dual";
+ data-role = "dual";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ pmic_glink_hs_in: endpoint {
+ remote-endpoint = <&usb_1_dwc3_hs>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ pmic_glink_ss_in: endpoint {
+ remote-endpoint = <&usb_1_dwc3_ss>;
+ };
+ };
+ };
+ };
+ };
+
reserved-memory {
cont_splash_mem: cont-splash@e1000000 {
reg = <0x0 0xe1000000 0x0 0x2300000>;
@@ -82,6 +117,11 @@
no-map;
};
+ removed_mem: removed@c0000000 {
+ reg = <0x0 0xc0000000 0x0 0x5100000>;
+ no-map;
+ };
+
rmtfs_mem: memory@f8500000 {
compatible = "qcom,rmtfs-mem";
reg = <0x0 0xf8500000 0x0 0x600000>;
@@ -886,7 +926,16 @@
};
&usb_1_dwc3 {
- dr_mode = "peripheral";
+ dr_mode = "otg";
+ usb-role-switch;
+};
+
+&usb_1_dwc3_hs {
+ remote-endpoint = <&pmic_glink_hs_in>;
+};
+
+&usb_1_dwc3_ss {
+ remote-endpoint = <&pmic_glink_ss_in>;
};
&usb_1_hsphy {
@@ -915,6 +964,11 @@
status = "okay";
};
+&venus {
+ firmware-name = "qcom/qcm6490/fairphone5/venus.mbn";
+ status = "okay";
+};
+
&wifi {
qcom,ath11k-calibration-variant = "Fairphone_5";
status = "okay";
diff --git a/arch/arm64/boot/dts/qcom/qcm6490-idp.dts b/arch/arm64/boot/dts/qcom/qcm6490-idp.dts
index 03e97e27d16d..e4bfad50a669 100644
--- a/arch/arm64/boot/dts/qcom/qcm6490-idp.dts
+++ b/arch/arm64/boot/dts/qcom/qcm6490-idp.dts
@@ -5,8 +5,14 @@
/dts-v1/;
+/* PM7250B is configured to use SID8/9 */
+#define PM7250B_SID 8
+#define PM7250B_SID1 9
+
+#include <dt-bindings/leds/common.h>
#include <dt-bindings/regulator/qcom,rpmh-regulator.h>
#include "sc7280.dtsi"
+#include "pm7250b.dtsi"
#include "pm7325.dtsi"
#include "pm8350c.dtsi"
#include "pmk8350.dtsi"
@@ -109,7 +115,7 @@
no-map;
};
- trusted_apps_mem: trusted_apps@c1800000 {
+ trusted_apps_mem: trusted-apps@c1800000 {
reg = <0x0 0xc1800000 0x0 0x1c00000>;
no-map;
};
@@ -123,8 +129,8 @@
vph_pwr: vph-pwr-regulator {
compatible = "regulator-fixed";
regulator-name = "vph_pwr";
- regulator-min-microvolt = <2500000>;
- regulator-max-microvolt = <4350000>;
+ regulator-min-microvolt = <3700000>;
+ regulator-max-microvolt = <3700000>;
};
};
@@ -415,6 +421,33 @@
};
};
+&pm8350c_pwm {
+ status = "okay";
+
+ multi-led {
+ color = <LED_COLOR_ID_RGB>;
+ function = LED_FUNCTION_STATUS;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@1 {
+ reg = <1>;
+ color = <LED_COLOR_ID_RED>;
+ };
+
+ led@2 {
+ reg = <2>;
+ color = <LED_COLOR_ID_GREEN>;
+ };
+
+ led@3 {
+ reg = <3>;
+ color = <LED_COLOR_ID_BLUE>;
+ };
+ };
+};
+
&qupv3_id_0 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi
index 2f2eeaf2e945..a05d0234f7fc 100644
--- a/arch/arm64/boot/dts/qcom/qcs404.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi
@@ -675,6 +675,14 @@
assigned-clocks = <&gcc GCC_USB20_MOCK_UTMI_CLK>,
<&gcc GCC_USB30_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
+
+ interrupts = <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 319 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "qusb2_phy";
+
status = "disabled";
usb3_dwc3: usb@7580000 {
@@ -704,6 +712,14 @@
assigned-clocks = <&gcc GCC_USB20_MOCK_UTMI_CLK>,
<&gcc GCC_USB_HS_SYSTEM_CLK>;
assigned-clock-rates = <19200000>, <133333333>;
+
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 318 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "qusb2_phy";
+
status = "disabled";
usb@78c0000 {
diff --git a/arch/arm64/boot/dts/qcom/qcs6490-rb3gen2.dts b/arch/arm64/boot/dts/qcom/qcs6490-rb3gen2.dts
index 8bb7d13d85f6..97824c769ba3 100644
--- a/arch/arm64/boot/dts/qcom/qcs6490-rb3gen2.dts
+++ b/arch/arm64/boot/dts/qcom/qcs6490-rb3gen2.dts
@@ -110,7 +110,7 @@
no-map;
};
- trusted_apps_mem: trusted_apps@c1800000 {
+ trusted_apps_mem: trusted-apps@c1800000 {
reg = <0x0 0xc1800000 0x0 0x1c00000>;
no-map;
};
@@ -124,8 +124,8 @@
vph_pwr: vph-pwr-regulator {
compatible = "regulator-fixed";
regulator-name = "vph_pwr";
- regulator-min-microvolt = <2500000>;
- regulator-max-microvolt = <4350000>;
+ regulator-min-microvolt = <3700000>;
+ regulator-max-microvolt = <3700000>;
};
};
@@ -413,6 +413,23 @@
};
};
+&gcc {
+ protected-clocks = <GCC_CFG_NOC_LPASS_CLK>,
+ <GCC_MSS_CFG_AHB_CLK>,
+ <GCC_MSS_GPLL0_MAIN_DIV_CLK_SRC>,
+ <GCC_MSS_OFFLINE_AXI_CLK>,
+ <GCC_MSS_Q6SS_BOOT_CLK_SRC>,
+ <GCC_MSS_Q6_MEMNOC_AXI_CLK>,
+ <GCC_MSS_SNOC_AXI_CLK>,
+ <GCC_QSPI_CNOC_PERIPH_AHB_CLK>,
+ <GCC_QSPI_CORE_CLK>,
+ <GCC_QSPI_CORE_CLK_SRC>,
+ <GCC_SEC_CTRL_CLK_SRC>,
+ <GCC_WPSS_AHB_BDG_MST_CLK>,
+ <GCC_WPSS_AHB_CLK>,
+ <GCC_WPSS_RSCP_CLK>;
+};
+
&qupv3_id_0 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/qrb2210-rb1.dts b/arch/arm64/boot/dts/qcom/qrb2210-rb1.dts
index aa53b6af6d9c..6e9dd0312adc 100644
--- a/arch/arm64/boot/dts/qcom/qrb2210-rb1.dts
+++ b/arch/arm64/boot/dts/qcom/qrb2210-rb1.dts
@@ -7,7 +7,7 @@
#include <dt-bindings/leds/common.h>
#include "qcm2290.dtsi"
-#include "pm2250.dtsi"
+#include "pm4125.dtsi"
/ {
model = "Qualcomm Technologies, Inc. Robotics RB1";
@@ -177,6 +177,24 @@
};
};
+&CPU_PD0 {
+ /delete-property/ power-domains;
+};
+
+&CPU_PD1 {
+ /delete-property/ power-domains;
+};
+
+&CPU_PD2 {
+ /delete-property/ power-domains;
+};
+
+&CPU_PD3 {
+ /delete-property/ power-domains;
+};
+
+/delete-node/ &CLUSTER_PD;
+
&gpi_dma0 {
status = "okay";
};
@@ -226,7 +244,7 @@
};
&mdss_dsi0 {
- vdda-supply = <&pm2250_l5>;
+ vdda-supply = <&pm4125_l5>;
status = "okay";
};
@@ -239,7 +257,7 @@
status = "okay";
};
-&pm2250_resin {
+&pm4125_resin {
linux,code = <KEY_VOLUMEDOWN>;
status = "okay";
};
@@ -263,23 +281,23 @@
compatible = "qcom,rpm-pm2250-regulators";
vdd_s3-supply = <&vph_pwr>;
vdd_s4-supply = <&vph_pwr>;
- vdd_l1_l2_l3_l5_l6_l7_l8_l9_l10_l11_l12-supply = <&pm2250_s3>;
+ vdd_l1_l2_l3_l5_l6_l7_l8_l9_l10_l11_l12-supply = <&pm4125_s3>;
vdd_l4_l17_l18_l19_l20_l21_l22-supply = <&vph_pwr>;
- vdd_l13_l14_l15_l16-supply = <&pm2250_s4>;
+ vdd_l13_l14_l15_l16-supply = <&pm4125_s4>;
/*
* S1 - VDD_APC
* S2 - VDD_CX
*/
- pm2250_s3: s3 {
+ pm4125_s3: s3 {
/* 0.4V-1.6625V -> 1.3V (Power tree requirements) */
regulator-min-microvolt = <1352000>;
regulator-max-microvolt = <1352000>;
regulator-boot-on;
};
- pm2250_s4: s4 {
+ pm4125_s4: s4 {
/* 1.2V-2.35V -> 2.05V (Power tree requirements) */
regulator-min-microvolt = <2072000>;
regulator-max-microvolt = <2072000>;
@@ -288,7 +306,7 @@
/* L1 - VDD_MX */
- pm2250_l2: l2 {
+ pm4125_l2: l2 {
/* LPDDR4X VDD2 */
regulator-min-microvolt = <1136000>;
regulator-max-microvolt = <1136000>;
@@ -296,7 +314,7 @@
regulator-boot-on;
};
- pm2250_l3: l3 {
+ pm4125_l3: l3 {
/* LPDDR4X VDDQ */
regulator-min-microvolt = <616000>;
regulator-max-microvolt = <616000>;
@@ -304,14 +322,14 @@
regulator-boot-on;
};
- pm2250_l4: l4 {
+ pm4125_l4: l4 {
/* max = 3.05V -> max = 2.7 to disable 3V signaling (SDHCI2) */
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <2700000>;
regulator-allow-set-load;
};
- pm2250_l5: l5 {
+ pm4125_l5: l5 {
/* CSI/DSI */
regulator-min-microvolt = <1232000>;
regulator-max-microvolt = <1232000>;
@@ -319,7 +337,7 @@
regulator-boot-on;
};
- pm2250_l6: l6 {
+ pm4125_l6: l6 {
/* DRAM PLL */
regulator-min-microvolt = <928000>;
regulator-max-microvolt = <928000>;
@@ -327,7 +345,7 @@
regulator-boot-on;
};
- pm2250_l7: l7 {
+ pm4125_l7: l7 {
/* Wi-Fi CX/MX */
regulator-min-microvolt = <664000>;
regulator-max-microvolt = <664000>;
@@ -338,20 +356,20 @@
* L9 - VDD_LPI_MX
*/
- pm2250_l10: l10 {
+ pm4125_l10: l10 {
/* Wi-Fi RFA */
regulator-min-microvolt = <1304000>;
regulator-max-microvolt = <1304000>;
};
- pm2250_l11: l11 {
+ pm4125_l11: l11 {
/* GPS RF1 */
regulator-min-microvolt = <1000000>;
regulator-max-microvolt = <1000000>;
regulator-boot-on;
};
- pm2250_l12: l12 {
+ pm4125_l12: l12 {
/* USB PHYs */
regulator-min-microvolt = <928000>;
regulator-max-microvolt = <928000>;
@@ -359,7 +377,7 @@
regulator-boot-on;
};
- pm2250_l13: l13 {
+ pm4125_l13: l13 {
/* USB/QFPROM/PLLs */
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
@@ -367,7 +385,7 @@
regulator-boot-on;
};
- pm2250_l14: l14 {
+ pm4125_l14: l14 {
/* SDHCI1 VQMMC */
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
@@ -376,7 +394,7 @@
regulator-always-on;
};
- pm2250_l15: l15 {
+ pm4125_l15: l15 {
/* WCD/DSI/BT VDDIO */
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
@@ -385,38 +403,38 @@
regulator-boot-on;
};
- pm2250_l16: l16 {
+ pm4125_l16: l16 {
/* GPS RF2 */
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
regulator-boot-on;
};
- pm2250_l17: l17 {
+ pm4125_l17: l17 {
regulator-min-microvolt = <3000000>;
regulator-max-microvolt = <3000000>;
};
- pm2250_l18: l18 {
+ pm4125_l18: l18 {
/* VDD_PXn */
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- pm2250_l19: l19 {
+ pm4125_l19: l19 {
/* VDD_PXn */
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
};
- pm2250_l20: l20 {
+ pm4125_l20: l20 {
/* SDHCI1 VMMC */
regulator-min-microvolt = <2400000>;
regulator-max-microvolt = <3600000>;
regulator-allow-set-load;
};
- pm2250_l21: l21 {
+ pm4125_l21: l21 {
/* SDHCI2 VMMC */
regulator-min-microvolt = <2960000>;
regulator-max-microvolt = <3300000>;
@@ -424,7 +442,7 @@
regulator-boot-on;
};
- pm2250_l22: l22 {
+ pm4125_l22: l22 {
/* Wi-Fi */
regulator-min-microvolt = <3312000>;
regulator-max-microvolt = <3312000>;
@@ -433,8 +451,8 @@
};
&sdhc_1 {
- vmmc-supply = <&pm2250_l20>;
- vqmmc-supply = <&pm2250_l14>;
+ vmmc-supply = <&pm4125_l20>;
+ vqmmc-supply = <&pm4125_l14>;
pinctrl-0 = <&sdc1_state_on>;
pinctrl-1 = <&sdc1_state_off>;
pinctrl-names = "default", "sleep";
@@ -446,8 +464,8 @@
};
&sdhc_2 {
- vmmc-supply = <&pm2250_l21>;
- vqmmc-supply = <&pm2250_l4>;
+ vmmc-supply = <&pm4125_l21>;
+ vqmmc-supply = <&pm4125_l4>;
cd-gpios = <&tlmm 88 GPIO_ACTIVE_LOW>;
pinctrl-0 = <&sdc2_state_on &sd_det_in_on>;
pinctrl-1 = <&sdc2_state_off &sd_det_in_off>;
@@ -518,8 +536,8 @@
};
&usb_qmpphy {
- vdda-phy-supply = <&pm2250_l12>;
- vdda-pll-supply = <&pm2250_l13>;
+ vdda-phy-supply = <&pm4125_l12>;
+ vdda-pll-supply = <&pm4125_l13>;
status = "okay";
};
@@ -528,17 +546,17 @@
};
&usb_hsphy {
- vdd-supply = <&pm2250_l12>;
- vdda-pll-supply = <&pm2250_l13>;
- vdda-phy-dpdm-supply = <&pm2250_l21>;
+ vdd-supply = <&pm4125_l12>;
+ vdda-pll-supply = <&pm4125_l13>;
+ vdda-phy-dpdm-supply = <&pm4125_l21>;
status = "okay";
};
&wifi {
- vdd-0.8-cx-mx-supply = <&pm2250_l7>;
- vdd-1.8-xo-supply = <&pm2250_l13>;
- vdd-1.3-rfa-supply = <&pm2250_l10>;
- vdd-3.3-ch0-supply = <&pm2250_l22>;
+ vdd-0.8-cx-mx-supply = <&pm4125_l7>;
+ vdd-1.8-xo-supply = <&pm4125_l13>;
+ vdd-1.3-rfa-supply = <&pm4125_l10>;
+ vdd-3.3-ch0-supply = <&pm4125_l22>;
qcom,ath10k-calibration-variant = "Thundercomm_RB1";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/qrb4210-rb2.dts b/arch/arm64/boot/dts/qcom/qrb4210-rb2.dts
index 7c19f874fa71..696d6d43c56b 100644
--- a/arch/arm64/boot/dts/qcom/qrb4210-rb2.dts
+++ b/arch/arm64/boot/dts/qcom/qrb4210-rb2.dts
@@ -6,8 +6,10 @@
/dts-v1/;
#include <dt-bindings/leds/common.h>
+#include <dt-bindings/usb/pd.h>
#include "sm4250.dtsi"
#include "pm6125.dtsi"
+#include "pmi632.dtsi"
/ {
model = "Qualcomm Technologies, Inc. QRB4210 RB2";
@@ -256,6 +258,46 @@
};
};
+&pmi632_typec {
+ status = "okay";
+
+ connector {
+ compatible = "usb-c-connector";
+
+ power-role = "dual";
+ data-role = "dual";
+ self-powered;
+
+ typec-power-opmode = "default";
+ pd-disable;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ pmi632_hs_in: endpoint {
+ remote-endpoint = <&usb_dwc3_hs>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ pmi632_ss_in: endpoint {
+ remote-endpoint = <&usb_qmpphy_out>;
+ };
+ };
+ };
+ };
+};
+
+&pmi632_vbus {
+ regulator-min-microamp = <500000>;
+ regulator-max-microamp = <3000000>;
+ status = "okay";
+};
+
&pon_pwrkey {
status = "okay";
};
@@ -607,8 +649,8 @@
status = "okay";
};
-&usb_dwc3 {
- maximum-speed = "super-speed";
+&usb_dwc3_hs {
+ remote-endpoint = <&pmi632_hs_in>;
};
&usb_hsphy {
@@ -626,6 +668,10 @@
status = "okay";
};
+&usb_qmpphy_out {
+ remote-endpoint = <&pmi632_ss_in>;
+};
+
&wifi {
vdd-0.8-cx-mx-supply = <&vreg_l8a_0p664>;
vdd-1.8-xo-supply = <&vreg_l16a_1p3>;
diff --git a/arch/arm64/boot/dts/qcom/sa8295p-adp.dts b/arch/arm64/boot/dts/qcom/sa8295p-adp.dts
index fd253942e5e5..78e933c42c31 100644
--- a/arch/arm64/boot/dts/qcom/sa8295p-adp.dts
+++ b/arch/arm64/boot/dts/qcom/sa8295p-adp.dts
@@ -108,6 +108,13 @@
};
};
};
+
+ reserved-memory {
+ gpu_mem: gpu-mem@8bf00000 {
+ reg = <0 0x8bf00000 0 0x2000>;
+ no-map;
+ };
+ };
};
&apps_rsc {
@@ -266,6 +273,48 @@
status = "okay";
};
+&i2c12 {
+ pinctrl-0 = <&qup1_i2c4_state>;
+ pinctrl-names = "default";
+
+ status = "okay";
+
+ vdd_gfx: regulator@39 {
+ compatible = "maxim,max20411";
+ reg = <0x39>;
+
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <800000>;
+
+ enable-gpios = <&pmm8540a_gpios 2 GPIO_ACTIVE_HIGH>;
+
+ pinctrl-0 = <&max20411_en>;
+ pinctrl-names = "default";
+ };
+};
+
+&gpucc {
+ vdd-gfx-supply = <&vdd_gfx>;
+ status = "okay";
+};
+
+&gmu {
+ status = "okay";
+};
+
+&gpu {
+ status = "okay";
+
+ zap-shader {
+ memory-region = <&gpu_mem>;
+ firmware-name = "qcom/sa8295p/a690_zap.mbn";
+ };
+};
+
+&gpu_smmu {
+ status = "okay";
+};
+
&mdss0 {
status = "okay";
};
@@ -476,6 +525,10 @@
status = "okay";
};
+&qup1 {
+ status = "okay";
+};
+
&qup2 {
status = "okay";
};
@@ -636,6 +689,14 @@
/* PINCTRL */
+&pmm8540a_gpios {
+ max20411_en: max20411-en-state {
+ pins = "gpio2";
+ function = "normal";
+ output-enable;
+ };
+};
+
&tlmm {
pcie2a_default: pcie2a-default-state {
clkreq-n-pins {
@@ -728,4 +789,11 @@
bias-pull-up;
};
};
+
+ qup1_i2c4_state: qup1-i2c4-state {
+ pins = "gpio0", "gpio1";
+ function = "qup12";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
};
diff --git a/arch/arm64/boot/dts/qcom/sa8540p-ride.dts b/arch/arm64/boot/dts/qcom/sa8540p-ride.dts
index b04f72ec097c..177b9dad6ff7 100644
--- a/arch/arm64/boot/dts/qcom/sa8540p-ride.dts
+++ b/arch/arm64/boot/dts/qcom/sa8540p-ride.dts
@@ -376,14 +376,14 @@
pinctrl-names = "default";
pinctrl-0 = <&pcie2a_default>;
- status = "okay";
+ status = "disabled";
};
&pcie2a_phy {
vdda-phy-supply = <&vreg_l11a>;
vdda-pll-supply = <&vreg_l3a>;
- status = "okay";
+ status = "disabled";
};
&pcie3a {
diff --git a/arch/arm64/boot/dts/qcom/sa8540p.dtsi b/arch/arm64/boot/dts/qcom/sa8540p.dtsi
index 96b2c59ad02b..23888029cc11 100644
--- a/arch/arm64/boot/dts/qcom/sa8540p.dtsi
+++ b/arch/arm64/boot/dts/qcom/sa8540p.dtsi
@@ -168,6 +168,9 @@
};
&gpucc {
+ /* SA8295P and SA8540P doesn't provide gfx.lvl */
+ /delete-property/ power-domains;
+
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi
index a7eaca33d326..231cea1f0fa8 100644
--- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi
+++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi
@@ -356,13 +356,18 @@
no-map;
};
+ ddr_training_checksum: ddr-training-checksum@908c0000 {
+ reg = <0x0 0x908c0000 0x0 0x1000>;
+ no-map;
+ };
+
reserved_mem: reserved@908f0000 {
- reg = <0x0 0x908f0000 0x0 0xf000>;
+ reg = <0x0 0x908f0000 0x0 0xe000>;
no-map;
};
- secdata_apss_mem: secdata-apss@908ff000 {
- reg = <0x0 0x908ff000 0x0 0x1000>;
+ secdata_apss_mem: secdata-apss@908fe000 {
+ reg = <0x0 0x908fe000 0x0 0x2000>;
no-map;
};
@@ -373,8 +378,43 @@
hwlocks = <&tcsr_mutex 3>;
};
- cpucp_fw_mem: cpucp-fw@90b00000 {
- reg = <0x0 0x90b00000 0x0 0x100000>;
+ tz_sail_mailbox_mem: tz-sail-mailbox@90c00000 {
+ reg = <0x0 0x90c00000 0x0 0x100000>;
+ no-map;
+ };
+
+ sail_mailbox_mem: sail-ss@90d00000 {
+ reg = <0x0 0x90d00000 0x0 0x100000>;
+ no-map;
+ };
+
+ sail_ota_mem: sail-ss@90e00000 {
+ reg = <0x0 0x90e00000 0x0 0x300000>;
+ no-map;
+ };
+
+ aoss_backup_mem: aoss-backup@91b00000 {
+ reg = <0x0 0x91b00000 0x0 0x40000>;
+ no-map;
+ };
+
+ cpucp_backup_mem: cpucp-backup@91b40000 {
+ reg = <0x0 0x91b40000 0x0 0x40000>;
+ no-map;
+ };
+
+ tz_config_backup_mem: tz-config-backup@91b80000 {
+ reg = <0x0 0x91b80000 0x0 0x10000>;
+ no-map;
+ };
+
+ ddr_training_data_mem: ddr-training-data@91b90000 {
+ reg = <0x0 0x91b90000 0x0 0x10000>;
+ no-map;
+ };
+
+ cdt_data_backup_mem: cdt-data-backup@91ba0000 {
+ reg = <0x0 0x91ba0000 0x0 0x1000>;
no-map;
};
@@ -433,13 +473,43 @@
no-map;
};
+ audio_mdf_mem: audio-mdf-region@ae000000 {
+ reg = <0x0 0xae000000 0x0 0x1000000>;
+ no-map;
+ };
+
+ firmware_mem: firmware-region@b0000000 {
+ reg = <0x0 0xb0000000 0x0 0x800000>;
+ no-map;
+ };
+
hyptz_reserved_mem: hyptz-reserved@beb00000 {
reg = <0x0 0xbeb00000 0x0 0x11500000>;
no-map;
};
- tz_stat_mem: tz-stat@d0000000 {
- reg = <0x0 0xd0000000 0x0 0x100000>;
+ scmi_mem: scmi-region@d0000000 {
+ reg = <0x0 0xd0000000 0x0 0x40000>;
+ no-map;
+ };
+
+ firmware_logs_mem: firmware-logs@d0040000 {
+ reg = <0x0 0xd0040000 0x0 0x10000>;
+ no-map;
+ };
+
+ firmware_audio_mem: firmware-audio@d0050000 {
+ reg = <0x0 0xd0050000 0x0 0x4000>;
+ no-map;
+ };
+
+ firmware_reserved_mem: firmware-reserved@d0054000 {
+ reg = <0x0 0xd0054000 0x0 0x9c000>;
+ no-map;
+ };
+
+ firmware_quantum_test_mem: firmware-quantum-test@d00f0000 {
+ reg = <0x0 0xd00f0000 0x0 0x10000>;
no-map;
};
@@ -453,8 +523,23 @@
no-map;
};
- trusted_apps_mem: trusted-apps@d1800000 {
- reg = <0x0 0xd1800000 0x0 0x3900000>;
+ deepsleep_backup_mem: deepsleep-backup@d1800000 {
+ reg = <0x0 0xd1800000 0x0 0x100000>;
+ no-map;
+ };
+
+ trusted_apps_mem: trusted-apps@d1900000 {
+ reg = <0x0 0xd1900000 0x0 0x3800000>;
+ no-map;
+ };
+
+ tz_stat_mem: tz-stat@db100000 {
+ reg = <0x0 0xdb100000 0x0 0x100000>;
+ no-map;
+ };
+
+ cpucp_fw_mem: cpucp-fw@db200000 {
+ reg = <0x0 0xdb200000 0x0 0x100000>;
no-map;
};
};
@@ -1615,10 +1700,12 @@
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 287 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 261 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 14 IRQ_TYPE_EDGE_BOTH>,
<&pdc 15 IRQ_TYPE_EDGE_BOTH>,
<&pdc 12 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "pwr_event",
+ "hs_phy_irq",
"dp_hs_phy_irq",
"dm_hs_phy_irq",
"ss_phy_irq";
@@ -1702,10 +1789,12 @@
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 351 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 8 IRQ_TYPE_EDGE_BOTH>,
<&pdc 7 IRQ_TYPE_EDGE_BOTH>,
<&pdc 13 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "pwr_event",
+ "hs_phy_irq",
"dp_hs_phy_irq",
"dm_hs_phy_irq",
"ss_phy_irq";
@@ -1765,9 +1854,11 @@
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 444 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 443 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 10 IRQ_TYPE_EDGE_BOTH>,
<&pdc 9 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "pwr_event",
+ "hs_phy_irq",
"dp_hs_phy_irq",
"dm_hs_phy_irq";
@@ -2394,8 +2485,9 @@
<0x0 0x23016000 0x0 0x100>;
reg-names = "stmmaceth", "rgmii";
- interrupts = <GIC_SPI 929 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "macirq";
+ interrupts = <GIC_SPI 929 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 781 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "sfty";
clocks = <&gcc GCC_EMAC1_AXI_CLK>,
<&gcc GCC_EMAC1_SLV_AHB_CLK>,
@@ -2427,8 +2519,9 @@
<0x0 0x23056000 0x0 0x100>;
reg-names = "stmmaceth", "rgmii";
- interrupts = <GIC_SPI 946 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "macirq";
+ interrupts = <GIC_SPI 946 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 782 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "sfty";
clocks = <&gcc GCC_EMAC0_AXI_CLK>,
<&gcc GCC_EMAC0_SLV_AHB_CLK>,
diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
index 46aaeba28604..f3a6da8b2890 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
@@ -649,6 +649,7 @@ ap_ec_spi: &spi6 {
pinctrl-names = "default";
pinctrl-0 = <&ap_ec_int_l>;
spi-max-frequency = <3000000>;
+ wakeup-source;
cros_ec_pwm: pwm {
compatible = "google,cros-ec-pwm";
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 4dcaa15caef2..2b481e20ae38 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -817,7 +817,7 @@
bits = <1 3>;
};
- gpu_speed_bin: gpu_speed_bin@1d2 {
+ gpu_speed_bin: gpu-speed-bin@1d2 {
reg = <0x1d2 0x2>;
bits = <5 8>;
};
@@ -1532,6 +1532,76 @@
qcom,bcm-voters = <&apps_bcm_voter>;
};
+ ufs_mem_hc: ufshc@1d84000 {
+ compatible = "qcom,sc7180-ufshc", "qcom,ufshc",
+ "jedec,ufs-2.0";
+ reg = <0 0x01d84000 0 0x3000>;
+ interrupts = <GIC_SPI 265 IRQ_TYPE_LEVEL_HIGH>;
+ phys = <&ufs_mem_phy>;
+ phy-names = "ufsphy";
+ lanes-per-direction = <1>;
+ #reset-cells = <1>;
+ resets = <&gcc GCC_UFS_PHY_BCR>;
+ reset-names = "rst";
+
+ power-domains = <&gcc UFS_PHY_GDSC>;
+
+ iommus = <&apps_smmu 0xa0 0x0>;
+
+ clock-names = "core_clk",
+ "bus_aggr_clk",
+ "iface_clk",
+ "core_clk_unipro",
+ "ref_clk",
+ "tx_lane0_sync_clk",
+ "rx_lane0_sync_clk";
+ clocks = <&gcc GCC_UFS_PHY_AXI_CLK>,
+ <&gcc GCC_AGGRE_UFS_PHY_AXI_CLK>,
+ <&gcc GCC_UFS_PHY_AHB_CLK>,
+ <&gcc GCC_UFS_PHY_UNIPRO_CORE_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_UFS_PHY_TX_SYMBOL_0_CLK>,
+ <&gcc GCC_UFS_PHY_RX_SYMBOL_0_CLK>;
+ freq-table-hz = <50000000 200000000>,
+ <0 0>,
+ <0 0>,
+ <37500000 150000000>,
+ <0 0>,
+ <0 0>,
+ <0 0>;
+
+ interconnects = <&aggre1_noc MASTER_UFS_MEM QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
+ &config_noc SLAVE_UFS_MEM_CFG QCOM_ICC_TAG_ALWAYS>;
+ interconnect-names = "ufs-ddr", "cpu-ufs";
+
+ qcom,ice = <&ice>;
+
+ status = "disabled";
+ };
+
+ ufs_mem_phy: phy@1d87000 {
+ compatible = "qcom,sc7180-qmp-ufs-phy",
+ "qcom,sm7150-qmp-ufs-phy";
+ reg = <0 0x01d87000 0 0x1000>;
+ clocks = <&gcc GCC_UFS_MEM_CLKREF_CLK>,
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
+ clock-names = "ref", "ref_aux";
+ power-domains = <&gcc UFS_PHY_GDSC>;
+ resets = <&ufs_mem_hc 0>;
+ reset-names = "ufsphy";
+ #phy-cells = <0>;
+ status = "disabled";
+ };
+
+ ice: crypto@1d90000 {
+ compatible = "qcom,sc7180-inline-crypto-engine",
+ "qcom,inline-crypto-engine";
+ reg = <0 0x01d90000 0 0x8000>;
+ clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>;
+ };
+
ipa: ipa@1e40000 {
compatible = "qcom,sc7180-ipa";
@@ -2964,12 +3034,16 @@
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
assigned-clock-rates = <19200000>, <150000000>;
- interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 9 IRQ_TYPE_EDGE_BOTH>,
<&pdc 8 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 9 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq",
- "dm_hs_phy_irq", "dp_hs_phy_irq";
+ <&pdc 6 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
required-opps = <&rpmhpd_opp_nom>;
diff --git a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi
index c4d00a81da39..cecb3e89f7f7 100644
--- a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi
@@ -18,6 +18,7 @@
*/
/delete-node/ &cdsp_mem;
+/delete-node/ &domain_idle_states;
/delete-node/ &gpu_zap_mem;
/delete-node/ &gpu_zap_shader;
/delete-node/ &hyp_mem;
@@ -26,6 +27,18 @@
/delete-node/ &sec_apps_mem;
/ {
+ cpus {
+ domain_idle_states: domain-idle-states {
+ CLUSTER_SLEEP_0: cluster-sleep-0 {
+ compatible = "domain-idle-state";
+ arm,psci-suspend-param = <0x40003444>;
+ entry-latency-us = <2752>;
+ exit-latency-us = <6562>;
+ min-residency-us = <9926>;
+ };
+ };
+ };
+
reserved-memory {
camera_mem: memory@8ad00000 {
reg = <0x0 0x8ad00000 0x0 0x500000>;
@@ -39,6 +52,10 @@
};
};
+&CLUSTER_PD {
+ domain-idle-states = <&CLUSTER_SLEEP_0>;
+};
+
&lpass_aon {
status = "okay";
};
@@ -119,6 +136,17 @@
dma-coherent;
};
+&venus {
+ iommus = <&apps_smmu 0x2180 0x20>,
+ <&apps_smmu 0x2184 0x20>;
+
+ status = "okay";
+
+ video-firmware {
+ iommus = <&apps_smmu 0x21a2 0x0>;
+ };
+};
+
&watchdog {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi b/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi
index 9ea6636125ad..2ba4ea60cb14 100644
--- a/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi
@@ -548,6 +548,7 @@ ap_ec_spi: &spi10 {
pinctrl-names = "default";
pinctrl-0 = <&ap_ec_int_l>;
spi-max-frequency = <3000000>;
+ wakeup-source;
cros_ec_pwm: pwm {
compatible = "google,cros-ec-pwm";
diff --git a/arch/arm64/boot/dts/qcom/sc7280-idp-ec-h1.dtsi b/arch/arm64/boot/dts/qcom/sc7280-idp-ec-h1.dtsi
index ebae545c587c..fbfac7534d3c 100644
--- a/arch/arm64/boot/dts/qcom/sc7280-idp-ec-h1.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280-idp-ec-h1.dtsi
@@ -19,6 +19,7 @@ ap_ec_spi: &spi10 {
pinctrl-names = "default";
pinctrl-0 = <&ap_ec_int_l>;
spi-max-frequency = <3000000>;
+ wakeup-source;
cros_ec_pwm: pwm {
compatible = "google,cros-ec-pwm";
diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi
index 83b5b76ba179..7e7f0f0fb41b 100644
--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
@@ -202,6 +202,8 @@
power-domain-names = "psci";
next-level-cache = <&L2_0>;
operating-points-v2 = <&cpu0_opp_table>;
+ capacity-dmips-mhz = <1024>;
+ dynamic-power-coefficient = <100>;
interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
<&epss_l3 MASTER_EPSS_L3_APPS &epss_l3 SLAVE_EPSS_L3_SHARED>;
qcom,freq-domain = <&cpufreq_hw 0>;
@@ -229,6 +231,8 @@
power-domain-names = "psci";
next-level-cache = <&L2_100>;
operating-points-v2 = <&cpu0_opp_table>;
+ capacity-dmips-mhz = <1024>;
+ dynamic-power-coefficient = <100>;
interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
<&epss_l3 MASTER_EPSS_L3_APPS &epss_l3 SLAVE_EPSS_L3_SHARED>;
qcom,freq-domain = <&cpufreq_hw 0>;
@@ -251,6 +255,8 @@
power-domain-names = "psci";
next-level-cache = <&L2_200>;
operating-points-v2 = <&cpu0_opp_table>;
+ capacity-dmips-mhz = <1024>;
+ dynamic-power-coefficient = <100>;
interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
<&epss_l3 MASTER_EPSS_L3_APPS &epss_l3 SLAVE_EPSS_L3_SHARED>;
qcom,freq-domain = <&cpufreq_hw 0>;
@@ -273,6 +279,8 @@
power-domain-names = "psci";
next-level-cache = <&L2_300>;
operating-points-v2 = <&cpu0_opp_table>;
+ capacity-dmips-mhz = <1024>;
+ dynamic-power-coefficient = <100>;
interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
<&epss_l3 MASTER_EPSS_L3_APPS &epss_l3 SLAVE_EPSS_L3_SHARED>;
qcom,freq-domain = <&cpufreq_hw 0>;
@@ -295,6 +303,8 @@
power-domain-names = "psci";
next-level-cache = <&L2_400>;
operating-points-v2 = <&cpu4_opp_table>;
+ capacity-dmips-mhz = <1946>;
+ dynamic-power-coefficient = <520>;
interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
<&epss_l3 MASTER_EPSS_L3_APPS &epss_l3 SLAVE_EPSS_L3_SHARED>;
qcom,freq-domain = <&cpufreq_hw 1>;
@@ -317,6 +327,8 @@
power-domain-names = "psci";
next-level-cache = <&L2_500>;
operating-points-v2 = <&cpu4_opp_table>;
+ capacity-dmips-mhz = <1946>;
+ dynamic-power-coefficient = <520>;
interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
<&epss_l3 MASTER_EPSS_L3_APPS &epss_l3 SLAVE_EPSS_L3_SHARED>;
qcom,freq-domain = <&cpufreq_hw 1>;
@@ -339,6 +351,8 @@
power-domain-names = "psci";
next-level-cache = <&L2_600>;
operating-points-v2 = <&cpu4_opp_table>;
+ capacity-dmips-mhz = <1946>;
+ dynamic-power-coefficient = <520>;
interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
<&epss_l3 MASTER_EPSS_L3_APPS &epss_l3 SLAVE_EPSS_L3_SHARED>;
qcom,freq-domain = <&cpufreq_hw 1>;
@@ -361,6 +375,8 @@
power-domain-names = "psci";
next-level-cache = <&L2_700>;
operating-points-v2 = <&cpu7_opp_table>;
+ capacity-dmips-mhz = <1985>;
+ dynamic-power-coefficient = <552>;
interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
<&epss_l3 MASTER_EPSS_L3_APPS &epss_l3 SLAVE_EPSS_L3_SHARED>;
qcom,freq-domain = <&cpufreq_hw 2>;
@@ -453,15 +469,29 @@
};
};
- domain-idle-states {
- CLUSTER_SLEEP_0: cluster-sleep-0 {
+ domain_idle_states: domain-idle-states {
+ CLUSTER_SLEEP_APSS_OFF: cluster-sleep-0 {
compatible = "domain-idle-state";
- idle-state-name = "cluster-power-down";
- arm,psci-suspend-param = <0x40003444>;
+ arm,psci-suspend-param = <0x41000044>;
+ entry-latency-us = <2752>;
+ exit-latency-us = <3048>;
+ min-residency-us = <6118>;
+ };
+
+ CLUSTER_SLEEP_CX_RET: cluster-sleep-1 {
+ compatible = "domain-idle-state";
+ arm,psci-suspend-param = <0x41001344>;
entry-latency-us = <3263>;
+ exit-latency-us = <4562>;
+ min-residency-us = <8467>;
+ };
+
+ CLUSTER_SLEEP_LLCC_OFF: cluster-sleep-2 {
+ compatible = "domain-idle-state";
+ arm,psci-suspend-param = <0x4100b344>;
+ entry-latency-us = <3638>;
exit-latency-us = <6562>;
- min-residency-us = <9926>;
- local-timer-stop;
+ min-residency-us = <9826>;
};
};
};
@@ -872,7 +902,7 @@
CLUSTER_PD: power-domain-cluster {
#power-domain-cells = <0>;
- domain-idle-states = <&CLUSTER_SLEEP_0>;
+ domain-idle-states = <&CLUSTER_SLEEP_APSS_OFF &CLUSTER_SLEEP_CX_RET &CLUSTER_SLEEP_LLCC_OFF>;
};
};
@@ -966,7 +996,7 @@
#address-cells = <1>;
#size-cells = <1>;
- gpu_speed_bin: gpu_speed_bin@1e9 {
+ gpu_speed_bin: gpu-speed-bin@1e9 {
reg = <0x1e9 0x2>;
bits = <5 8>;
};
@@ -2178,8 +2208,16 @@
ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>,
<0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>;
- interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 309 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 313 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0", "msi1", "msi2", "msi3",
+ "msi4", "msi5", "msi6", "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 0 0 434 IRQ_TYPE_LEVEL_HIGH>,
@@ -2345,6 +2383,8 @@
<&apps_smmu 0x4e6 0x0011>;
qcom,ee = <0>;
qcom,controlled-remotely;
+ num-channels = <16>;
+ qcom,num-ees = <4>;
};
crypto: crypto@1dfa000 {
@@ -2648,6 +2688,31 @@
status = "disabled";
};
+ slimbam: dma-controller@3a84000 {
+ compatible = "qcom,bam-v1.7.0";
+ reg = <0 0x03a84000 0 0x20000>;
+ interrupts = <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>;
+ #dma-cells = <1>;
+ qcom,controlled-remotely;
+ num-channels = <31>;
+ qcom,ee = <1>;
+ qcom,num-ees = <2>;
+ iommus = <&apps_smmu 0x1826 0x0>;
+ status = "disabled";
+ };
+
+ slim: slim-ngd@3ac0000 {
+ compatible = "qcom,slim-ngd-v1.5.0";
+ reg = <0 0x03ac0000 0 0x2c000>;
+ interrupts = <GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&slimbam 3>, <&slimbam 4>;
+ dma-names = "rx", "tx";
+ iommus = <&apps_smmu 0x1826 0x0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
lpass_hm: clock-controller@3c00000 {
compatible = "qcom,sc7280-lpasshm";
reg = <0 0x03c00000 0 0x28>;
@@ -3582,10 +3647,12 @@
<&gcc GCC_USB30_SEC_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts-extended = <&intc GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 12 IRQ_TYPE_EDGE_BOTH>,
<&pdc 13 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq",
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
"dp_hs_phy_irq",
"dm_hs_phy_irq";
@@ -4035,11 +4102,13 @@
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 14 IRQ_TYPE_EDGE_BOTH>,
<&pdc 15 IRQ_TYPE_EDGE_BOTH>,
<&pdc 17 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hs_phy_irq",
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
"dp_hs_phy_irq",
"dm_hs_phy_irq",
"ss_phy_irq";
@@ -4065,6 +4134,25 @@
phys = <&usb_1_hsphy>, <&usb_1_qmpphy QMP_USB43DP_USB3_PHY>;
phy-names = "usb2-phy", "usb3-phy";
maximum-speed = "super-speed";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ usb_1_dwc3_hs: endpoint {
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ usb_1_dwc3_ss: endpoint {
+ };
+ };
+ };
};
};
@@ -4091,10 +4179,11 @@
<&mmss_noc MASTER_VIDEO_P0 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "cpu-cfg", "video-mem";
- iommus = <&apps_smmu 0x2180 0x20>,
- <&apps_smmu 0x2184 0x20>;
+ iommus = <&apps_smmu 0x2180 0x20>;
memory-region = <&video_mem>;
+ status = "disabled";
+
video-decoder {
compatible = "venus-decoder";
};
@@ -4103,10 +4192,6 @@
compatible = "venus-encoder";
};
- video-firmware {
- iommus = <&apps_smmu 0x21a2 0x0>;
- };
-
venus_opp_table: opp-table {
compatible = "operating-points-v2";
diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi
index 0430d99091e3..32afc78d5b76 100644
--- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi
@@ -290,7 +290,7 @@
BIG_CPU_SLEEP_0: cpu-sleep-1-0 {
compatible = "arm,idle-state";
arm,psci-suspend-param = <0x40000004>;
- entry-latency-us = <241>;
+ entry-latency-us = <2411>;
exit-latency-us = <1461>;
min-residency-us = <4488>;
local-timer-stop;
@@ -298,7 +298,15 @@
};
domain-idle-states {
- CLUSTER_SLEEP_0: cluster-sleep-0 {
+ CLUSTER_SLEEP_APSS_OFF: cluster-sleep-0 {
+ compatible = "domain-idle-state";
+ arm,psci-suspend-param = <0x41000044>;
+ entry-latency-us = <3300>;
+ exit-latency-us = <3300>;
+ min-residency-us = <6000>;
+ };
+
+ CLUSTER_SLEEP_AOSS_SLEEP: cluster-sleep-1 {
compatible = "domain-idle-state";
arm,psci-suspend-param = <0x4100a344>;
entry-latency-us = <3263>;
@@ -582,7 +590,7 @@
CLUSTER_PD: power-domain-cpu-cluster0 {
#power-domain-cells = <0>;
- domain-idle-states = <&CLUSTER_SLEEP_0>;
+ domain-idle-states = <&CLUSTER_SLEEP_APSS_OFF &CLUSTER_SLEEP_AOSS_SLEEP>;
};
};
@@ -782,6 +790,7 @@
clock-names = "bi_tcxo",
"bi_tcxo_ao",
"sleep_clk";
+ power-domains = <&rpmhpd SC8180X_CX>;
};
qupv3_id_0: geniqup@8c0000 {
@@ -1708,8 +1717,22 @@
ranges = <0x01000000 0x0 0x60200000 0x0 0x60200000 0x0 0x100000>,
<0x02000000 0x0 0x60300000 0x0 0x60300000 0x0 0x3d00000>;
- interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -1805,8 +1828,22 @@
ranges = <0x01000000 0x0 0x40200000 0x0 0x40200000 0x0 0x100000>,
<0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>;
- interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 309 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 313 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 434 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -1903,8 +1940,22 @@
ranges = <0x01000000 0x0 0x68200000 0x0 0x68200000 0x0 0x100000>,
<0x02000000 0x0 0x68300000 0x0 0x68300000 0x0 0x3d00000>;
- interrupts = <GIC_SPI 755 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 756 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 755 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 754 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 753 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 752 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 751 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 750 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 749 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 747 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -2001,8 +2052,22 @@
ranges = <0x01000000 0x0 0x70200000 0x0 0x70200000 0x0 0x100000>,
<0x02000000 0x0 0x70300000 0x0 0x70300000 0x0 0x3d00000>;
- interrupts = <GIC_SPI 671 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 672 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 671 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 670 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 669 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 668 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 667 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 666 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 665 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 663 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -2131,9 +2196,11 @@
reg = <0 0x01d87000 0 0x1000>;
clocks = <&rpmhcc RPMH_CXO_CLK>,
- <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_MEM_CLKREF_EN>;
clock-names = "ref",
- "ref_aux";
+ "ref_aux",
+ "qref";
resets = <&ufs_mem_hc 0>;
reset-names = "ufsphy";
@@ -2173,6 +2240,8 @@
interconnect-names = "gfx-mem";
qcom,gmu = <&gmu>;
+ #cooling-cells = <2>;
+
status = "disabled";
gpu_opp_table: opp-table {
@@ -2692,9 +2761,15 @@
interrupt-controller;
#interrupt-cells = <1>;
- interconnects = <&mmss_noc MASTER_MDP_PORT0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&mmss_noc MASTER_MDP_PORT1 0 &mc_virt SLAVE_EBI_CH0 0>;
- interconnect-names = "mdp0-mem", "mdp1-mem";
+ interconnects = <&mmss_noc MASTER_MDP_PORT0 QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI_CH0 QCOM_ICC_TAG_ALWAYS>,
+ <&mmss_noc MASTER_MDP_PORT1 QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI_CH0 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_AMPSS_M0 QCOM_ICC_TAG_ALWAYS
+ &config_noc SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ALWAYS>;
+ interconnect-names = "mdp0-mem",
+ "mdp1-mem",
+ "cpu-cfg";
iommus = <&apps_smmu 0x800 0x420>;
@@ -2723,10 +2798,8 @@
"rot",
"lut";
- assigned-clocks = <&dispcc DISP_CC_MDSS_MDP_CLK>,
- <&dispcc DISP_CC_MDSS_VSYNC_CLK>;
- assigned-clock-rates = <460000000>,
- <19200000>;
+ assigned-clocks = <&dispcc DISP_CC_MDSS_VSYNC_CLK>;
+ assigned-clock-rates = <19200000>;
operating-points-v2 = <&mdp_opp_table>;
power-domains = <&rpmhpd SC8180X_MMCX>;
@@ -3184,7 +3257,7 @@
<&dispcc DISP_CC_MDSS_AHB_CLK>;
clock-names = "aux", "cfg_ahb";
- power-domains = <&dispcc MDSS_GDSC>;
+ power-domains = <&rpmhpd SC8180X_MX>;
#clock-cells = <1>;
#phy-cells = <0>;
@@ -3210,6 +3283,7 @@
"edp_phy_pll_link_clk",
"edp_phy_pll_vco_div_clk";
power-domains = <&rpmhpd SC8180X_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
#clock-cells = <1>;
#reset-cells = <1>;
#power-domain-cells = <1>;
@@ -3248,7 +3322,7 @@
aoss_qmp: power-controller@c300000 {
compatible = "qcom,sc8180x-aoss-qmp", "qcom,aoss-qmp";
- reg = <0x0 0x0c300000 0x0 0x100000>;
+ reg = <0x0 0x0c300000 0x0 0x400>;
interrupts = <GIC_SPI 389 IRQ_TYPE_EDGE_RISING>;
mboxes = <&apss_shared 0>;
@@ -3256,6 +3330,11 @@
#power-domain-cells = <1>;
};
+ sram@c3f0000 {
+ compatible = "qcom,rpmh-stats";
+ reg = <0x0 0x0c3f0000 0x0 0x400>;
+ };
+
spmi_bus: spmi@c440000 {
compatible = "qcom,spmi-pmic-arb";
reg = <0x0 0x0c440000 0x0 0x0001100>,
@@ -3880,8 +3959,15 @@
thermal-sensors = <&tsens0 15>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_top_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- trip-point0 {
+ gpu_top_alert0: trip-point0 {
temperature = <90000>;
hysteresis = <2000>;
type = "hot";
@@ -4030,8 +4116,15 @@
thermal-sensors = <&tsens1 11>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_bottom_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- trip-point0 {
+ gpu_bottom_alert0: trip-point0 {
temperature = <90000>;
hysteresis = <2000>;
type = "hot";
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
index ffc4406422ae..41215567b3ae 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
@@ -563,6 +563,8 @@
};
&pcie4 {
+ max-link-speed = <2>;
+
perst-gpios = <&tlmm 141 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 139 GPIO_ACTIVE_LOW>;
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
index def3976bd5bb..15ae94c1602d 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
@@ -6,10 +6,8 @@
/dts-v1/;
-#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/iio/qcom,spmi-adc7-pm8350.h>
-#include <dt-bindings/iio/qcom,spmi-adc7-pmk8350.h>
-#include <dt-bindings/iio/qcom,spmi-adc7-pmr735a.h>
+#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/input/gpio-keys.h>
#include <dt-bindings/input/input.h>
#include <dt-bindings/leds/common.h>
@@ -722,6 +720,8 @@
};
&pcie4 {
+ max-link-speed = <2>;
+
perst-gpios = <&tlmm 141 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 139 GPIO_ACTIVE_LOW>;
@@ -853,27 +853,6 @@
};
&pmk8280_vadc {
- status = "okay";
-
- channel@3 {
- reg = <PMK8350_ADC7_DIE_TEMP>;
- qcom,pre-scaling = <1 1>;
- label = "pmk8350_die_temp";
- };
-
- channel@44 {
- reg = <PMK8350_ADC7_AMUX_THM1_100K_PU>;
- qcom,hw-settle-time = <200>;
- qcom,ratiometric;
- label = "pmk8350_xo_therm";
- };
-
- channel@103 {
- reg = <PM8350_ADC7_DIE_TEMP(1)>;
- qcom,pre-scaling = <1 1>;
- label = "pmc8280_1_die_temp";
- };
-
channel@144 {
reg = <PM8350_ADC7_AMUX_THM1_100K_PU(1)>;
qcom,hw-settle-time = <200>;
@@ -902,12 +881,6 @@
label = "sys_therm4";
};
- channel@303 {
- reg = <PM8350_ADC7_DIE_TEMP(3)>;
- qcom,pre-scaling = <1 1>;
- label = "pmc8280_2_die_temp";
- };
-
channel@344 {
reg = <PM8350_ADC7_AMUX_THM1_100K_PU(3)>;
qcom,hw-settle-time = <200>;
@@ -935,12 +908,6 @@
qcom,ratiometric;
label = "sys_therm8";
};
-
- channel@403 {
- reg = <PMR735A_ADC7_DIE_TEMP>;
- qcom,pre-scaling = <1 1>;
- label = "pmr735a_die_temp";
- };
};
&qup0 {
@@ -1202,7 +1169,7 @@
};
&vamacro {
- pinctrl-0 = <&dmic01_default>, <&dmic02_default>;
+ pinctrl-0 = <&dmic01_default>, <&dmic23_default>;
pinctrl-names = "default";
vdd-micb-supply = <&vreg_s10b>;
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-pmics.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp-pmics.dtsi
index 80ee12ded4f4..945de77911de 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-pmics.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-pmics.dtsi
@@ -3,6 +3,9 @@
* Copyright (c) 2022, Linaro Limited
*/
+#include <dt-bindings/iio/qcom,spmi-adc7-pm8350.h>
+#include <dt-bindings/iio/qcom,spmi-adc7-pmk8350.h>
+#include <dt-bindings/iio/qcom,spmi-adc7-pmr735a.h>
#include <dt-bindings/input/input.h>
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/spmi/spmi.h>
@@ -84,7 +87,37 @@
#address-cells = <1>;
#size-cells = <0>;
#io-channel-cells = <1>;
- status = "disabled";
+
+ channel@3 {
+ reg = <PMK8350_ADC7_DIE_TEMP>;
+ qcom,pre-scaling = <1 1>;
+ label = "pmk8350_die_temp";
+ };
+
+ channel@44 {
+ reg = <PMK8350_ADC7_AMUX_THM1_100K_PU>;
+ qcom,hw-settle-time = <200>;
+ qcom,ratiometric;
+ label = "pmk8350_xo_therm";
+ };
+
+ channel@103 {
+ reg = <PM8350_ADC7_DIE_TEMP(1)>;
+ qcom,pre-scaling = <1 1>;
+ label = "pmc8280_1_die_temp";
+ };
+
+ channel@303 {
+ reg = <PM8350_ADC7_DIE_TEMP(3)>;
+ qcom,pre-scaling = <1 1>;
+ label = "pmc8280_2_die_temp";
+ };
+
+ channel@403 {
+ reg = <PMR735A_ADC7_DIE_TEMP>;
+ qcom,pre-scaling = <1 1>;
+ label = "pmr735a_die_temp";
+ };
};
pmk8280_adc_tm: adc-tm@3400 {
@@ -126,6 +159,8 @@
compatible = "qcom,spmi-temp-alarm";
reg = <0xa00>;
interrupts-extended = <&spmi_bus 0x1 0xa 0x0 IRQ_TYPE_EDGE_BOTH>;
+ io-channels = <&pmk8280_vadc PM8350_ADC7_DIE_TEMP(1)>;
+ io-channel-names = "thermal";
#thermal-sensor-cells = <0>;
};
@@ -178,6 +213,8 @@
compatible = "qcom,spmi-temp-alarm";
reg = <0xa00>;
interrupts-extended = <&spmi_bus 0x2 0xa 0x0 IRQ_TYPE_EDGE_BOTH>;
+ io-channels = <&pmk8280_vadc PM8350_ADC7_DIE_TEMP(3)>;
+ io-channel-names = "thermal";
#thermal-sensor-cells = <0>;
};
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
index febf28356ff8..a5b194813079 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
@@ -2257,9 +2257,12 @@
compatible = "qcom,sc8280xp-qmp-ufs-phy";
reg = <0 0x01d87000 0 0x1000>;
- clocks = <&gcc GCC_UFS_CARD_CLKREF_CLK>,
- <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
- clock-names = "ref", "ref_aux";
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_CARD_CLKREF_CLK>;
+ clock-names = "ref",
+ "ref_aux",
+ "qref";
power-domains = <&gcc UFS_PHY_GDSC>;
@@ -2319,9 +2322,12 @@
compatible = "qcom,sc8280xp-qmp-ufs-phy";
reg = <0 0x01da7000 0 0x1000>;
- clocks = <&gcc GCC_UFS_1_CARD_CLKREF_CLK>,
- <&gcc GCC_UFS_CARD_PHY_AUX_CLK>;
- clock-names = "ref", "ref_aux";
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_UFS_CARD_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_1_CARD_CLKREF_CLK>;
+ clock-names = "ref",
+ "ref_aux",
+ "qref";
power-domains = <&gcc UFS_CARD_GDSC>;
@@ -2978,7 +2984,7 @@
};
};
- dmic02_default: dmic02-default-state {
+ dmic23_default: dmic23-default-state {
clk-pins {
pins = "gpio8";
function = "dmic2_clk";
@@ -2994,7 +3000,7 @@
};
};
- dmic02_sleep: dmic02-sleep-state {
+ dmic23_sleep: dmic23-sleep-state {
clk-pins {
pins = "gpio8";
function = "dmic2_clk";
@@ -3451,6 +3457,404 @@
};
};
+ cci0: cci@ac4a000 {
+ compatible = "qcom,sc8280xp-cci", "qcom,msm8996-cci";
+ reg = <0 0x0ac4a000 0 0x1000>;
+
+ interrupts = <GIC_SPI 460 IRQ_TYPE_EDGE_RISING>;
+
+ clocks = <&camcc CAMCC_CAMNOC_AXI_CLK>,
+ <&camcc CAMCC_SLOW_AHB_CLK_SRC>,
+ <&camcc CAMCC_CPAS_AHB_CLK>,
+ <&camcc CAMCC_CCI_0_CLK>;
+ clock-names = "camnoc_axi",
+ "slow_ahb_src",
+ "cpas_ahb",
+ "cci";
+
+ power-domains = <&camcc TITAN_TOP_GDSC>;
+
+ pinctrl-0 = <&cci0_default>;
+ pinctrl-1 = <&cci0_sleep>;
+ pinctrl-names = "default", "sleep";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ status = "disabled";
+
+ cci0_i2c0: i2c-bus@0 {
+ reg = <0>;
+ clock-frequency = <1000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ cci0_i2c1: i2c-bus@1 {
+ reg = <1>;
+ clock-frequency = <1000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ cci1: cci@ac4b000 {
+ compatible = "qcom,sc8280xp-cci", "qcom,msm8996-cci";
+ reg = <0 0x0ac4b000 0 0x1000>;
+
+ interrupts = <GIC_SPI 271 IRQ_TYPE_EDGE_RISING>;
+
+ clocks = <&camcc CAMCC_CAMNOC_AXI_CLK>,
+ <&camcc CAMCC_SLOW_AHB_CLK_SRC>,
+ <&camcc CAMCC_CPAS_AHB_CLK>,
+ <&camcc CAMCC_CCI_1_CLK>;
+ clock-names = "camnoc_axi",
+ "slow_ahb_src",
+ "cpas_ahb",
+ "cci";
+
+ power-domains = <&camcc TITAN_TOP_GDSC>;
+
+ pinctrl-0 = <&cci1_default>;
+ pinctrl-1 = <&cci1_sleep>;
+ pinctrl-names = "default", "sleep";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ status = "disabled";
+
+ cci1_i2c0: i2c-bus@0 {
+ reg = <0>;
+ clock-frequency = <1000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ cci1_i2c1: i2c-bus@1 {
+ reg = <1>;
+ clock-frequency = <1000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ cci2: cci@ac4c000 {
+ compatible = "qcom,sc8280xp-cci", "qcom,msm8996-cci";
+ reg = <0 0x0ac4c000 0 0x1000>;
+
+ interrupts = <GIC_SPI 651 IRQ_TYPE_EDGE_RISING>;
+
+ clocks = <&camcc CAMCC_CAMNOC_AXI_CLK>,
+ <&camcc CAMCC_SLOW_AHB_CLK_SRC>,
+ <&camcc CAMCC_CPAS_AHB_CLK>,
+ <&camcc CAMCC_CCI_2_CLK>;
+ clock-names = "camnoc_axi",
+ "slow_ahb_src",
+ "cpas_ahb",
+ "cci";
+ power-domains = <&camcc TITAN_TOP_GDSC>;
+
+ pinctrl-0 = <&cci2_default>;
+ pinctrl-1 = <&cci2_sleep>;
+ pinctrl-names = "default", "sleep";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ status = "disabled";
+
+ cci2_i2c0: i2c-bus@0 {
+ reg = <0>;
+ clock-frequency = <1000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ cci2_i2c1: i2c-bus@1 {
+ reg = <1>;
+ clock-frequency = <1000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ cci3: cci@ac4d000 {
+ compatible = "qcom,sc8280xp-cci", "qcom,msm8996-cci";
+ reg = <0 0x0ac4d000 0 0x1000>;
+
+ interrupts = <GIC_SPI 650 IRQ_TYPE_EDGE_RISING>;
+
+ clocks = <&camcc CAMCC_CAMNOC_AXI_CLK>,
+ <&camcc CAMCC_SLOW_AHB_CLK_SRC>,
+ <&camcc CAMCC_CPAS_AHB_CLK>,
+ <&camcc CAMCC_CCI_3_CLK>;
+ clock-names = "camnoc_axi",
+ "slow_ahb_src",
+ "cpas_ahb",
+ "cci";
+
+ power-domains = <&camcc TITAN_TOP_GDSC>;
+
+ pinctrl-0 = <&cci3_default>;
+ pinctrl-1 = <&cci3_sleep>;
+ pinctrl-names = "default", "sleep";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ status = "disabled";
+
+ cci3_i2c0: i2c-bus@0 {
+ reg = <0>;
+ clock-frequency = <1000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ cci3_i2c1: i2c-bus@1 {
+ reg = <1>;
+ clock-frequency = <1000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ camss: camss@ac5a000 {
+ compatible = "qcom,sc8280xp-camss";
+
+ reg = <0 0x0ac5a000 0 0x2000>,
+ <0 0x0ac5c000 0 0x2000>,
+ <0 0x0ac65000 0 0x2000>,
+ <0 0x0ac67000 0 0x2000>,
+ <0 0x0acaf000 0 0x4000>,
+ <0 0x0acb3000 0 0x1000>,
+ <0 0x0acb6000 0 0x4000>,
+ <0 0x0acba000 0 0x1000>,
+ <0 0x0acbd000 0 0x4000>,
+ <0 0x0acc1000 0 0x1000>,
+ <0 0x0acc4000 0 0x4000>,
+ <0 0x0acc8000 0 0x1000>,
+ <0 0x0accb000 0 0x4000>,
+ <0 0x0accf000 0 0x1000>,
+ <0 0x0acd2000 0 0x4000>,
+ <0 0x0acd6000 0 0x1000>,
+ <0 0x0acd9000 0 0x4000>,
+ <0 0x0acdd000 0 0x1000>,
+ <0 0x0ace0000 0 0x4000>,
+ <0 0x0ace4000 0 0x1000>;
+ reg-names = "csiphy2",
+ "csiphy3",
+ "csiphy0",
+ "csiphy1",
+ "vfe0",
+ "csid0",
+ "vfe1",
+ "csid1",
+ "vfe2",
+ "csid2",
+ "vfe_lite0",
+ "csid0_lite",
+ "vfe_lite1",
+ "csid1_lite",
+ "vfe_lite2",
+ "csid2_lite",
+ "vfe_lite3",
+ "csid3_lite",
+ "vfe3",
+ "csid3";
+
+ interrupts = <GIC_SPI 359 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 360 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 448 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 464 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 465 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 466 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 467 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 468 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 469 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 477 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 478 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 479 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 640 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 641 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 758 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 759 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 760 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 761 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 762 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 764 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "csid1_lite",
+ "vfe_lite1",
+ "csiphy3",
+ "csid0",
+ "vfe0",
+ "csid1",
+ "vfe1",
+ "csid0_lite",
+ "vfe_lite0",
+ "csiphy0",
+ "csiphy1",
+ "csiphy2",
+ "csid2",
+ "vfe2",
+ "csid3_lite",
+ "csid2_lite",
+ "vfe_lite3",
+ "vfe_lite2",
+ "csid3",
+ "vfe3";
+
+ power-domains = <&camcc IFE_0_GDSC>,
+ <&camcc IFE_1_GDSC>,
+ <&camcc IFE_2_GDSC>,
+ <&camcc IFE_3_GDSC>,
+ <&camcc TITAN_TOP_GDSC>;
+ power-domain-names = "ife0",
+ "ife1",
+ "ife2",
+ "ife3",
+ "top";
+
+ clocks = <&camcc CAMCC_CAMNOC_AXI_CLK>,
+ <&camcc CAMCC_CPAS_AHB_CLK>,
+ <&camcc CAMCC_CSIPHY0_CLK>,
+ <&camcc CAMCC_CSI0PHYTIMER_CLK>,
+ <&camcc CAMCC_CSIPHY1_CLK>,
+ <&camcc CAMCC_CSI1PHYTIMER_CLK>,
+ <&camcc CAMCC_CSIPHY2_CLK>,
+ <&camcc CAMCC_CSI2PHYTIMER_CLK>,
+ <&camcc CAMCC_CSIPHY3_CLK>,
+ <&camcc CAMCC_CSI3PHYTIMER_CLK>,
+ <&camcc CAMCC_IFE_0_AXI_CLK>,
+ <&camcc CAMCC_IFE_0_CLK>,
+ <&camcc CAMCC_IFE_0_CPHY_RX_CLK>,
+ <&camcc CAMCC_IFE_0_CSID_CLK>,
+ <&camcc CAMCC_IFE_1_AXI_CLK>,
+ <&camcc CAMCC_IFE_1_CLK>,
+ <&camcc CAMCC_IFE_1_CPHY_RX_CLK>,
+ <&camcc CAMCC_IFE_1_CSID_CLK>,
+ <&camcc CAMCC_IFE_2_AXI_CLK>,
+ <&camcc CAMCC_IFE_2_CLK>,
+ <&camcc CAMCC_IFE_2_CPHY_RX_CLK>,
+ <&camcc CAMCC_IFE_2_CSID_CLK>,
+ <&camcc CAMCC_IFE_3_AXI_CLK>,
+ <&camcc CAMCC_IFE_3_CLK>,
+ <&camcc CAMCC_IFE_3_CPHY_RX_CLK>,
+ <&camcc CAMCC_IFE_3_CSID_CLK>,
+ <&camcc CAMCC_IFE_LITE_0_CLK>,
+ <&camcc CAMCC_IFE_LITE_0_CPHY_RX_CLK>,
+ <&camcc CAMCC_IFE_LITE_0_CSID_CLK>,
+ <&camcc CAMCC_IFE_LITE_1_CLK>,
+ <&camcc CAMCC_IFE_LITE_1_CPHY_RX_CLK>,
+ <&camcc CAMCC_IFE_LITE_1_CSID_CLK>,
+ <&camcc CAMCC_IFE_LITE_2_CLK>,
+ <&camcc CAMCC_IFE_LITE_2_CPHY_RX_CLK>,
+ <&camcc CAMCC_IFE_LITE_2_CSID_CLK>,
+ <&camcc CAMCC_IFE_LITE_3_CLK>,
+ <&camcc CAMCC_IFE_LITE_3_CPHY_RX_CLK>,
+ <&camcc CAMCC_IFE_LITE_3_CSID_CLK>,
+ <&gcc GCC_CAMERA_HF_AXI_CLK>,
+ <&gcc GCC_CAMERA_SF_AXI_CLK>;
+ clock-names = "camnoc_axi",
+ "cpas_ahb",
+ "csiphy0",
+ "csiphy0_timer",
+ "csiphy1",
+ "csiphy1_timer",
+ "csiphy2",
+ "csiphy2_timer",
+ "csiphy3",
+ "csiphy3_timer",
+ "vfe0_axi",
+ "vfe0",
+ "vfe0_cphy_rx",
+ "vfe0_csid",
+ "vfe1_axi",
+ "vfe1",
+ "vfe1_cphy_rx",
+ "vfe1_csid",
+ "vfe2_axi",
+ "vfe2",
+ "vfe2_cphy_rx",
+ "vfe2_csid",
+ "vfe3_axi",
+ "vfe3",
+ "vfe3_cphy_rx",
+ "vfe3_csid",
+ "vfe_lite0",
+ "vfe_lite0_cphy_rx",
+ "vfe_lite0_csid",
+ "vfe_lite1",
+ "vfe_lite1_cphy_rx",
+ "vfe_lite1_csid",
+ "vfe_lite2",
+ "vfe_lite2_cphy_rx",
+ "vfe_lite2_csid",
+ "vfe_lite3",
+ "vfe_lite3_cphy_rx",
+ "vfe_lite3_csid",
+ "gcc_axi_hf",
+ "gcc_axi_sf";
+
+ iommus = <&apps_smmu 0x2000 0x4e0>,
+ <&apps_smmu 0x2020 0x4e0>,
+ <&apps_smmu 0x2040 0x4e0>,
+ <&apps_smmu 0x2060 0x4e0>,
+ <&apps_smmu 0x2080 0x4e0>,
+ <&apps_smmu 0x20e0 0x4e0>,
+ <&apps_smmu 0x20c0 0x4e0>,
+ <&apps_smmu 0x20a0 0x4e0>,
+ <&apps_smmu 0x2400 0x4e0>,
+ <&apps_smmu 0x2420 0x4e0>,
+ <&apps_smmu 0x2440 0x4e0>,
+ <&apps_smmu 0x2460 0x4e0>,
+ <&apps_smmu 0x2480 0x4e0>,
+ <&apps_smmu 0x24e0 0x4e0>,
+ <&apps_smmu 0x24c0 0x4e0>,
+ <&apps_smmu 0x24a0 0x4e0>;
+
+ interconnects = <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_CAMERA_CFG 0>,
+ <&mmss_noc MASTER_CAMNOC_HF 0 &mc_virt SLAVE_EBI1 0>,
+ <&mmss_noc MASTER_CAMNOC_SF 0 &mc_virt SLAVE_EBI1 0>,
+ <&mmss_noc MASTER_CAMNOC_ICP 0 &mc_virt SLAVE_EBI1 0>;
+ interconnect-names = "cam_ahb",
+ "cam_hf_mnoc",
+ "cam_sf_mnoc",
+ "cam_sf_icp_mnoc";
+
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ port@1 {
+ reg = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ port@2 {
+ reg = <2>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ port@3 {
+ reg = <3>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+ };
+
camcc: clock-controller@ad00000 {
compatible = "qcom,sc8280xp-camcc";
reg = <0 0x0ad00000 0 0x20000>;
@@ -4011,6 +4415,28 @@
interrupt-controller;
};
+ tsens2: thermal-sensor@c251000 {
+ compatible = "qcom,sc8280xp-tsens", "qcom,tsens-v2";
+ reg = <0 0x0c251000 0 0x1ff>,
+ <0 0x0c224000 0 0x8>;
+ #qcom,sensors = <11>;
+ interrupts-extended = <&pdc 122 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 124 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "uplow", "critical";
+ #thermal-sensor-cells = <1>;
+ };
+
+ tsens3: thermal-sensor@c252000 {
+ compatible = "qcom,sc8280xp-tsens", "qcom,tsens-v2";
+ reg = <0 0x0c252000 0 0x1ff>,
+ <0 0x0c225000 0 0x8>;
+ #qcom,sensors = <5>;
+ interrupts-extended = <&pdc 123 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 125 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "uplow", "critical";
+ #thermal-sensor-cells = <1>;
+ };
+
tsens0: thermal-sensor@c263000 {
compatible = "qcom,sc8280xp-tsens", "qcom,tsens-v2";
reg = <0 0x0c263000 0 0x1ff>, /* TM */
@@ -4076,6 +4502,150 @@
#interrupt-cells = <2>;
gpio-ranges = <&tlmm 0 0 230>;
wakeup-parent = <&pdc>;
+
+ cci0_default: cci0-default-state {
+ cci0_i2c0_default: cci0-i2c0-default-pins {
+ /* cci_i2c_sda0, cci_i2c_scl0 */
+ pins = "gpio113", "gpio114";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+
+ cci0_i2c1_default: cci0-i2c1-default-pins {
+ /* cci_i2c_sda1, cci_i2c_scl1 */
+ pins = "gpio115", "gpio116";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+
+ cci0_sleep: cci0-sleep-state {
+ cci0_i2c0_sleep: cci0-i2c0-sleep-pins {
+ /* cci_i2c_sda0, cci_i2c_scl0 */
+ pins = "gpio113", "gpio114";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-down;
+ };
+
+ cci0_i2c1_sleep: cci0-i2c1-sleep-pins {
+ /* cci_i2c_sda1, cci_i2c_scl1 */
+ pins = "gpio115", "gpio116";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-down;
+ };
+ };
+
+ cci1_default: cci1-default-state {
+ cci1_i2c0_default: cci1-i2c0-default-pins {
+ /* cci_i2c_sda2, cci_i2c_scl2 */
+ pins = "gpio10","gpio11";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+
+ cci1_i2c1_default: cci1-i2c1-default-pins {
+ /* cci_i2c_sda3, cci_i2c_scl3 */
+ pins = "gpio123","gpio124";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+
+ cci1_sleep: cci1-sleep-state {
+ cci1_i2c0_sleep: cci1-i2c0-sleep-pins {
+ /* cci_i2c_sda2, cci_i2c_scl2 */
+ pins = "gpio10","gpio11";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-down;
+ };
+
+ cci1_i2c1_sleep: cci1-i2c1-sleep-pins {
+ /* cci_i2c_sda3, cci_i2c_scl3 */
+ pins = "gpio123","gpio124";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-down;
+ };
+ };
+
+ cci2_default: cci2-default-state {
+ cci2_i2c0_default: cci2-i2c0-default-pins {
+ /* cci_i2c_sda4, cci_i2c_scl4 */
+ pins = "gpio117","gpio118";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+
+ cci2_i2c1_default: cci2-i2c1-default-pins {
+ /* cci_i2c_sda5, cci_i2c_scl5 */
+ pins = "gpio12","gpio13";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+
+ cci2_sleep: cci2-sleep-state {
+ cci2_i2c0_sleep: cci2-i2c0-sleep-pins {
+ /* cci_i2c_sda4, cci_i2c_scl4 */
+ pins = "gpio117","gpio118";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-down;
+ };
+
+ cci2_i2c1_sleep: cci2-i2c1-sleep-pins {
+ /* cci_i2c_sda5, cci_i2c_scl5 */
+ pins = "gpio12","gpio13";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-down;
+ };
+ };
+
+ cci3_default: cci3-default-state {
+ cci3_i2c0_default: cci3-i2c0-default-pins {
+ /* cci_i2c_sda6, cci_i2c_scl6 */
+ pins = "gpio145","gpio146";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+
+ cci3_i2c1_default: cci3-i2c1-default-pins {
+ /* cci_i2c_sda7, cci_i2c_scl7 */
+ pins = "gpio164","gpio165";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+
+ cci3_sleep: cci3-sleep-state {
+ cci3_i2c0_sleep: cci3-i2c0-sleep-pins {
+ /* cci_i2c_sda6, cci_i2c_scl6 */
+ pins = "gpio145","gpio146";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-down;
+ };
+
+ cci3_i2c1_sleep: cci3-i2c1-sleep-pins {
+ /* cci_i2c_sda7, cci_i2c_scl7 */
+ pins = "gpio164","gpio165";
+ function = "cci_i2c";
+ drive-strength = <2>;
+ bias-pull-down;
+ };
+ };
};
apps_smmu: iommu@15000000 {
@@ -5212,6 +5782,21 @@
};
};
+ gpu-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens2 2>;
+
+ trips {
+ gpu-crit {
+ temperature = <110000>;
+ hysteresis = <1000>;
+ type = "critical";
+ };
+ };
+ };
+
mem-thermal {
polling-delay-passive = <250>;
polling-delay = <1000>;
diff --git a/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts b/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts
index 2ed39d402d3f..702ab49bbc59 100644
--- a/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts
+++ b/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts
@@ -461,3 +461,8 @@
dr_mode = "peripheral";
extcon = <&extcon_usb>;
};
+
+&usb3_qmpphy {
+ vdda-phy-supply = <&vreg_l1b_0p925>;
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/qcom/sdm450-motorola-ali.dts b/arch/arm64/boot/dts/qcom/sdm450-motorola-ali.dts
index 362be5719dd2..e27f3c5d5bba 100644
--- a/arch/arm64/boot/dts/qcom/sdm450-motorola-ali.dts
+++ b/arch/arm64/boot/dts/qcom/sdm450-motorola-ali.dts
@@ -4,7 +4,7 @@
*/
/dts-v1/;
-#include "msm8953.dtsi"
+#include "sdm450.dtsi"
#include "pm8953.dtsi"
#include "pmi8950.dtsi"
diff --git a/arch/arm64/boot/dts/qcom/sdm450.dtsi b/arch/arm64/boot/dts/qcom/sdm450.dtsi
new file mode 100644
index 000000000000..b222aeb459a3
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/sdm450.dtsi
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Copyright (c) 2023, Luca Weiss <luca@z3ntu.xyz> */
+
+#include "msm8953.dtsi"
+
+&gpu_opp_table {
+ /delete-node/ opp-650000000;
+
+ opp-600000000 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-supported-hw = <0xff>;
+ required-opps = <&rpmpd_opp_turbo>;
+ };
+};
diff --git a/arch/arm64/boot/dts/qcom/sdm630-sony-xperia-nile.dtsi b/arch/arm64/boot/dts/qcom/sdm630-sony-xperia-nile.dtsi
index 87d0293c728d..819a5f8825e7 100644
--- a/arch/arm64/boot/dts/qcom/sdm630-sony-xperia-nile.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm630-sony-xperia-nile.dtsi
@@ -241,6 +241,16 @@
};
};
+&pm660l_wled {
+ status = "okay";
+
+ qcom,switching-freq = <800>;
+ qcom,ovp-millivolt = <29600>;
+ qcom,current-boost-limit = <970>;
+ qcom,current-limit-microamp = <17500>;
+ qcom,num-strings = <2>;
+};
+
&pon_pwrkey {
status = "okay";
};
@@ -658,10 +668,16 @@
};
&usb3 {
+ qcom,select-utmi-as-pipe-clk;
+
status = "okay";
};
&usb3_dwc3 {
+ maximum-speed = "high-speed";
+ phys = <&qusb2phy0>;
+ phy-names = "usb2-phy";
+
dr_mode = "peripheral";
extcon = <&extcon_usb>;
};
diff --git a/arch/arm64/boot/dts/qcom/sdm630.dtsi b/arch/arm64/boot/dts/qcom/sdm630.dtsi
index 513fe5e76b68..f5921b80ef94 100644
--- a/arch/arm64/boot/dts/qcom/sdm630.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm630.dtsi
@@ -13,6 +13,7 @@
#include <dt-bindings/power/qcom-rpmpd.h>
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/thermal/thermal.h>
#include <dt-bindings/soc/qcom,apr.h>
/ {
@@ -1100,6 +1101,7 @@
interconnect-names = "gfx-mem";
operating-points-v2 = <&gpu_sdm630_opp_table>;
+ #cooling-cells = <2>;
status = "disabled";
@@ -1281,12 +1283,16 @@
<&gcc GCC_USB30_MASTER_CLK>;
assigned-clock-rates = <19200000>, <120000000>;
- interrupts = <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts = <GIC_SPI 180 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 243 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq";
+ interrupt-names = "pwr_event",
+ "qusb2_phy",
+ "hs_phy_irq",
+ "ss_phy_irq";
power-domains = <&gcc USB_30_GDSC>;
- qcom,select-utmi-as-pipe-clk;
resets = <&gcc GCC_USB_30_BCR>;
@@ -1297,17 +1303,38 @@
snps,dis_u2_susphy_quirk;
snps,dis_enblslpm_quirk;
- /*
- * SDM630 technically supports USB3 but I
- * haven't seen any devices making use of it.
- */
- maximum-speed = "high-speed";
- phys = <&qusb2phy0>;
- phy-names = "usb2-phy";
+ phys = <&qusb2phy0>, <&usb3_qmpphy>;
+ phy-names = "usb2-phy", "usb3-phy";
snps,hird-threshold = /bits/ 8 <0>;
};
};
+ usb3_qmpphy: phy@c010000 {
+ compatible = "qcom,sdm660-qmp-usb3-phy";
+ reg = <0x0c010000 0x1000>;
+
+ clocks = <&gcc GCC_USB3_PHY_AUX_CLK>,
+ <&gcc GCC_USB3_CLKREF_CLK>,
+ <&gcc GCC_USB_PHY_CFG_AHB2PHY_CLK>,
+ <&gcc GCC_USB3_PHY_PIPE_CLK>;
+ clock-names = "aux",
+ "ref",
+ "cfg_ahb",
+ "pipe";
+ clock-output-names = "usb3_phy_pipe_clk_src";
+ #clock-cells = <0>;
+ #phy-cells = <0>;
+
+ resets = <&gcc GCC_USB3_PHY_BCR>,
+ <&gcc GCC_USB3PHY_PHY_BCR>;
+ reset-names = "phy",
+ "phy_phy";
+
+ qcom,tcsr-reg = <&tcsr_regs_1 0x6b244>;
+
+ status = "disabled";
+ };
+
qusb2phy0: phy@c012000 {
compatible = "qcom,sdm660-qusb2-phy";
reg = <0x0c012000 0x180>;
@@ -1463,8 +1490,12 @@
<&gcc GCC_USB20_MASTER_CLK>;
assigned-clock-rates = <19200000>, <60000000>;
- interrupts = <GIC_SPI 348 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hs_phy_irq";
+ interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 348 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "qusb2_phy",
+ "hs_phy_irq";
qcom,select-utmi-as-pipe-clk;
@@ -2551,6 +2582,13 @@
thermal-sensors = <&tsens 8>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_alert0>;
+ cooling-device = <&adreno_gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
gpu_alert0: trip-point0 {
temperature = <90000>;
diff --git a/arch/arm64/boot/dts/qcom/sdm632.dtsi b/arch/arm64/boot/dts/qcom/sdm632.dtsi
index 645b9f6a801f..95b025ea260b 100644
--- a/arch/arm64/boot/dts/qcom/sdm632.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm632.dtsi
@@ -79,3 +79,11 @@
compatible = "qcom,kryo250";
capacity-dmips-mhz = <1980>;
};
+
+&gpu_opp_table {
+ opp-725000000 {
+ opp-hz = /bits/ 64 <725000000>;
+ opp-supported-hw = <0xff>;
+ required-opps = <&rpmpd_opp_turbo>;
+ };
+};
diff --git a/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts b/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts
index 3c47410ba94c..7167f75bced3 100644
--- a/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts
+++ b/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts
@@ -413,10 +413,16 @@
};
&usb3 {
+ qcom,select-utmi-as-pipe-clk;
+
status = "okay";
};
&usb3_dwc3 {
+ maximum-speed = "high-speed";
+ phys = <&qusb2phy0>;
+ phy-names = "usb2-phy";
+
dr_mode = "peripheral";
extcon = <&extcon_usb>;
};
diff --git a/arch/arm64/boot/dts/qcom/sdm670.dtsi b/arch/arm64/boot/dts/qcom/sdm670.dtsi
index 4d7b77a23159..80e81c4233b3 100644
--- a/arch/arm64/boot/dts/qcom/sdm670.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm670.dtsi
@@ -1320,12 +1320,16 @@
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
assigned-clock-rates = <19200000>, <150000000>;
- interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 9 IRQ_TYPE_EDGE_BOTH>,
<&pdc 8 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 9 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq",
- "dm_hs_phy_irq", "dp_hs_phy_irq";
+ <&pdc 6 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
diff --git a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
index 0ab5e8f53ac9..e8276db9eabb 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
@@ -852,6 +852,7 @@ ap_ts_i2c: &i2c14 {
pinctrl-names = "default";
pinctrl-0 = <&ec_ap_int_l>;
spi-max-frequency = <3000000>;
+ wakeup-source;
cros_ec_pwm: pwm {
compatible = "google,cros-ec-pwm";
diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
index ab6220456513..1f517328199b 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
+++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
@@ -580,7 +580,7 @@
&pcie0 {
status = "okay";
perst-gpios = <&tlmm 35 GPIO_ACTIVE_LOW>;
- enable-gpio = <&tlmm 134 GPIO_ACTIVE_HIGH>;
+ wake-gpios = <&tlmm 134 GPIO_ACTIVE_HIGH>;
vddpe-3v3-supply = <&pcie0_3p3v_dual>;
diff --git a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
index e821103d49c0..46e25c53829a 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
@@ -508,13 +508,13 @@
};
&q6afedai {
- qi2s@22 {
- reg = <22>;
+ dai@22 {
+ reg = <QUATERNARY_MI2S_RX>;
qcom,sd-lines = <1>;
};
- qi2s@23 {
- reg = <23>;
+ dai@23 {
+ reg = <QUATERNARY_MI2S_TX>;
qcom,sd-lines = <0>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
index fbb8655653fb..486ce175e6bc 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
+++ b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
@@ -60,7 +60,7 @@
};
reserved-memory {
- framebuffer_region@9d400000 {
+ framebuffer@9d400000 {
reg = <0x0 0x9d400000 0x0 (1080 * 2160 * 4)>;
no-map;
};
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index c2244824355a..2f20be99ee7e 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -2639,10 +2639,12 @@
compatible = "qcom,sdm845-qmp-ufs-phy";
reg = <0 0x01d87000 0 0x1000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_MEM_CLKREF_CLK>;
clock-names = "ref",
- "ref_aux";
- clocks = <&gcc GCC_UFS_MEM_CLKREF_CLK>,
- <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
+ "ref_aux",
+ "qref";
resets = <&ufs_mem_hc 0>;
reset-names = "ufsphy";
@@ -3366,8 +3368,8 @@
qcom,qmp = <&aoss_qmp>;
- power-domains = <&rpmhpd SDM845_CX>,
- <&rpmhpd SDM845_MX>;
+ power-domains = <&rpmhpd SDM845_LCX>,
+ <&rpmhpd SDM845_LMX>;
power-domain-names = "lcx", "lmx";
memory-region = <&slpi_mem>;
@@ -4058,12 +4060,16 @@
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
assigned-clock-rates = <19200000>, <150000000>;
- interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc_intc 6 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc_intc 9 IRQ_TYPE_EDGE_BOTH>,
<&pdc_intc 8 IRQ_TYPE_EDGE_BOTH>,
- <&pdc_intc 9 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq",
- "dm_hs_phy_irq", "dp_hs_phy_irq";
+ <&pdc_intc 6 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
@@ -4109,12 +4115,16 @@
<&gcc GCC_USB30_SEC_MASTER_CLK>;
assigned-clock-rates = <19200000>, <150000000>;
- interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc_intc 7 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc_intc 11 IRQ_TYPE_EDGE_BOTH>,
<&pdc_intc 10 IRQ_TYPE_EDGE_BOTH>,
- <&pdc_intc 11 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq",
- "dm_hs_phy_irq", "dp_hs_phy_irq";
+ <&pdc_intc 7 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
power-domains = <&gcc USB30_SEC_GDSC>;
@@ -4760,6 +4770,7 @@
operating-points-v2 = <&gpu_opp_table>;
qcom,gmu = <&gmu>;
+ #cooling-cells = <2>;
interconnects = <&mem_noc MASTER_GFX3D 0 &mem_noc SLAVE_EBI1 0>;
interconnect-names = "gfx-mem";
@@ -5568,7 +5579,7 @@
hysteresis = <2000>;
type = "hot";
};
- cluster0_crit: cluster0_crit {
+ cluster0_crit: cluster0-crit {
temperature = <110000>;
hysteresis = <2000>;
type = "critical";
@@ -5588,7 +5599,7 @@
hysteresis = <2000>;
type = "hot";
};
- cluster1_crit: cluster1_crit {
+ cluster1_crit: cluster1-crit {
temperature = <110000>;
hysteresis = <2000>;
type = "critical";
@@ -5602,8 +5613,15 @@
thermal-sensors = <&tsens0 11>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_top_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- gpu1_alert0: trip-point0 {
+ gpu_top_alert0: trip-point0 {
temperature = <90000>;
hysteresis = <2000>;
type = "hot";
@@ -5617,8 +5635,15 @@
thermal-sensors = <&tsens0 12>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_bottom_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- gpu2_alert0: trip-point0 {
+ gpu_bottom_alert0: trip-point0 {
temperature = <90000>;
hysteresis = <2000>;
type = "hot";
diff --git a/arch/arm64/boot/dts/qcom/sm4450.dtsi b/arch/arm64/boot/dts/qcom/sm4450.dtsi
index 3e7ae3bebbe0..603c962661cc 100644
--- a/arch/arm64/boot/dts/qcom/sm4450.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm4450.dtsi
@@ -17,7 +17,7 @@
chosen { };
- clocks{
+ clocks {
xo_board: xo-board {
compatible = "fixed-clock";
clock-frequency = <76800000>;
diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi
index 160e098f1075..aca0a87092e4 100644
--- a/arch/arm64/boot/dts/qcom/sm6115.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi
@@ -14,6 +14,7 @@
#include <dt-bindings/interconnect/qcom,sm6115.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/power/qcom-rpmpd.h>
+#include <dt-bindings/thermal/thermal.h>
/ {
interrupt-parent = <&intc>;
@@ -614,6 +615,11 @@
#hwlock-cells = <1>;
};
+ tcsr_regs: syscon@3c0000 {
+ compatible = "qcom,sm6115-tcsr", "syscon";
+ reg = <0x0 0x003c0000 0x0 0x40000>;
+ };
+
tlmm: pinctrl@500000 {
compatible = "qcom,sm6115-tlmm";
reg = <0x0 0x00500000 0x0 0x400000>,
@@ -878,8 +884,31 @@
clock-output-names = "usb3_phy_pipe_clk_src";
#phy-cells = <0>;
+ orientation-switch;
+
+ qcom,tcsr-reg = <&tcsr_regs 0xb244>;
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ usb_qmpphy_out: endpoint {
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ usb_qmpphy_usb_ss_in: endpoint {
+ remote-endpoint = <&usb_dwc3_ss>;
+ };
+ };
+ };
};
system_noc: interconnect@1880000 {
@@ -1194,8 +1223,12 @@
compatible = "qcom,sm6115-qmp-ufs-phy";
reg = <0x0 0x04807000 0x0 0x1000>;
- clocks = <&gcc GCC_UFS_CLKREF_CLK>, <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
- clock-names = "ref", "ref_aux";
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_CLKREF_CLK>;
+ clock-names = "ref",
+ "ref_aux",
+ "qref";
resets = <&ufs_mem_hc 0>;
reset-names = "ufsphy";
@@ -1304,6 +1337,9 @@
&config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>,
<&system_noc MASTER_QUP_0 RPM_ALWAYS_TAG
&bimc SLAVE_EBI_CH0 RPM_ALWAYS_TAG>;
+ interconnect-names = "qup-core",
+ "qup-config",
+ "qup-memory";
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -1586,9 +1622,14 @@
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
assigned-clock-rates = <19200000>, <66666667>;
- interrupts = <GIC_SPI 260 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts = <GIC_SPI 302 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 260 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 254 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq";
+ interrupt-names = "pwr_event",
+ "qusb2_phy",
+ "hs_phy_irq",
+ "ss_phy_irq";
resets = <&gcc GCC_USB30_PRIM_BCR>;
power-domains = <&gcc GCC_USB30_PRIM_GDSC>;
@@ -1600,7 +1641,6 @@
interconnect-names = "usb-ddr",
"apps-usb";
- qcom,select-utmi-as-pipe-clk;
status = "disabled";
usb_dwc3: usb@4e00000 {
@@ -1615,6 +1655,28 @@
snps,has-lpm-erratum;
snps,hird-threshold = /bits/ 8 <0x10>;
snps,usb3_lpm_capable;
+
+ usb-role-switch;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ usb_dwc3_hs: endpoint {
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ usb_dwc3_ss: endpoint {
+ remote-endpoint = <&usb_qmpphy_usb_ss_in>;
+ };
+ };
+ };
};
};
@@ -1646,6 +1708,7 @@
nvmem-cells = <&gpu_speed_bin>;
nvmem-cell-names = "speed_bin";
+ #cooling-cells = <2>;
status = "disabled";
@@ -3085,7 +3148,7 @@
type = "passive";
};
- cpu4_crit: cpu_crit {
+ cpu4_crit: cpu-crit {
temperature = <110000>;
hysteresis = <1000>;
type = "critical";
@@ -3111,7 +3174,7 @@
type = "passive";
};
- cpu5_crit: cpu_crit {
+ cpu5_crit: cpu-crit {
temperature = <110000>;
hysteresis = <1000>;
type = "critical";
@@ -3137,7 +3200,7 @@
type = "passive";
};
- cpu6_crit: cpu_crit {
+ cpu6_crit: cpu-crit {
temperature = <110000>;
hysteresis = <1000>;
type = "critical";
@@ -3163,7 +3226,7 @@
type = "passive";
};
- cpu7_crit: cpu_crit {
+ cpu7_crit: cpu-crit {
temperature = <110000>;
hysteresis = <1000>;
type = "critical";
@@ -3189,7 +3252,7 @@
type = "passive";
};
- cpu45_crit: cpu_crit {
+ cpu45_crit: cpu-crit {
temperature = <110000>;
hysteresis = <1000>;
type = "critical";
@@ -3215,7 +3278,7 @@
type = "passive";
};
- cpu67_crit: cpu_crit {
+ cpu67_crit: cpu-crit {
temperature = <110000>;
hysteresis = <1000>;
type = "critical";
@@ -3241,7 +3304,7 @@
type = "passive";
};
- cpu0123_crit: cpu_crit {
+ cpu0123_crit: cpu-crit {
temperature = <110000>;
hysteresis = <1000>;
type = "critical";
@@ -3294,8 +3357,15 @@
polling-delay = <0>;
thermal-sensors = <&tsens0 15>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- trip-point0 {
+ gpu_alert0: trip-point0 {
temperature = <115000>;
hysteresis = <5000>;
type = "passive";
@@ -3304,7 +3374,7 @@
trip-point1 {
temperature = <125000>;
hysteresis = <1000>;
- type = "passive";
+ type = "critical";
};
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm6125.dtsi b/arch/arm64/boot/dts/qcom/sm6125.dtsi
index 1dd3a4056e26..98ab08356088 100644
--- a/arch/arm64/boot/dts/qcom/sm6125.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6125.dtsi
@@ -812,10 +812,12 @@
compatible = "qcom,sm6125-qmp-ufs-phy";
reg = <0x04807000 0xdb8>;
- clocks = <&gcc GCC_UFS_MEM_CLKREF_CLK>,
- <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_MEM_CLKREF_CLK>;
clock-names = "ref",
- "ref_aux";
+ "ref_aux",
+ "qref";
resets = <&ufs_mem_hc 0>;
reset-names = "ufsphy";
@@ -1185,9 +1187,14 @@
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
assigned-clock-rates = <19200000>, <66666667>;
- interrupts = <GIC_SPI 260 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts = <GIC_SPI 302 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 260 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 254 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq";
+ interrupt-names = "pwr_event",
+ "qusb2_phy",
+ "hs_phy_irq",
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
qcom,select-utmi-as-pipe-clk;
diff --git a/arch/arm64/boot/dts/qcom/sm6350.dtsi b/arch/arm64/boot/dts/qcom/sm6350.dtsi
index 43cffe8e1247..24bcec3366ef 100644
--- a/arch/arm64/boot/dts/qcom/sm6350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6350.dtsi
@@ -19,6 +19,7 @@
#include <dt-bindings/phy/phy-qcom-qmp.h>
#include <dt-bindings/power/qcom-rpmpd.h>
#include <dt-bindings/soc/qcom,rpmh-rsc.h>
+#include <dt-bindings/thermal/thermal.h>
/ {
interrupt-parent = <&intc>;
@@ -1189,10 +1190,12 @@
compatible = "qcom,sm6350-qmp-ufs-phy";
reg = <0 0x01d87000 0 0x1000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_MEM_CLKREF_CLK>;
clock-names = "ref",
- "ref_aux";
- clocks = <&gcc GCC_UFS_MEM_CLKREF_CLK>,
- <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
+ "ref_aux",
+ "qref";
resets = <&ufs_mem_hc 0>;
reset-names = "ufsphy";
@@ -1325,10 +1328,11 @@
qcom,gmu = <&gmu>;
nvmem-cells = <&gpu_speed_bin>;
nvmem-cell-names = "speed_bin";
+ #cooling-cells = <2>;
status = "disabled";
- zap-shader {
+ gpu_zap_shader: zap-shader {
memory-region = <&pil_gpu_mem>;
};
@@ -1439,8 +1443,6 @@
operating-points-v2 = <&gmu_opp_table>;
- status = "disabled";
-
gmu_opp_table: opp-table {
compatible = "operating-points-v2";
@@ -1830,12 +1832,15 @@
"mock_utmi";
interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 17 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 14 IRQ_TYPE_EDGE_BOTH>,
<&pdc 15 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 14 IRQ_TYPE_EDGE_BOTH>;
-
- interrupt-names = "hs_phy_irq", "ss_phy_irq",
- "dm_hs_phy_irq", "dp_hs_phy_irq";
+ <&pdc 17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
@@ -1966,6 +1971,13 @@
interrupt-controller;
#interrupt-cells = <1>;
+ interconnects = <&mmss_noc MASTER_MDP_PORT0 QCOM_ICC_TAG_ALWAYS
+ &clk_virt SLAVE_EBI_CH0 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_AMPSS_M0 QCOM_ICC_TAG_ACTIVE_ONLY
+ &config_noc SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ACTIVE_ONLY>;
+ interconnect-names = "mdp0-mem",
+ "cpu-cfg";
+
clocks = <&gcc GCC_DISP_AHB_CLK>,
<&gcc GCC_DISP_AXI_CLK>,
<&dispcc DISP_CC_MDSS_MDP_CLK>;
@@ -2698,6 +2710,569 @@
};
};
+ thermal-zones {
+ aoss0-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 0>;
+
+ trips {
+ aoss0-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ aoss1-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 0>;
+
+ trips {
+ aoss1-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ audio-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 2>;
+
+ trips {
+ audio-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ camera-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 5>;
+
+ trips {
+ camera-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ cpu0-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 1>;
+
+ trips {
+ cpu0_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu0-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu0_alert0>;
+ cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpu1-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 2>;
+
+ trips {
+ cpu1_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu1-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu1_alert0>;
+ cooling-device = <&CPU1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpu2-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 3>;
+
+ trips {
+ cpu2_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu2-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu2_alert0>;
+ cooling-device = <&CPU2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpu3-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 4>;
+
+ trips {
+ cpu3_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu3-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu3_alert0>;
+ cooling-device = <&CPU3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpu4-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 5>;
+
+ trips {
+ cpu4_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu4-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu4_alert0>;
+ cooling-device = <&CPU4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpu5-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 6>;
+
+ trips {
+ cpu5_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu5-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu5_alert0>;
+ cooling-device = <&CPU5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpu6-left-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 9>;
+
+ trips {
+ cpu6_left_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu6-left-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu6_left_alert0>;
+ cooling-device = <&CPU6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpu6-right-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 10>;
+
+ trips {
+ cpu6_right_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu6-right-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu6_right_alert0>;
+ cooling-device = <&CPU6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpu7-left-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 11>;
+
+ trips {
+ cpu7_left_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu7-left-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu7_left_alert0>;
+ cooling-device = <&CPU7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpu7-right-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 12>;
+
+ trips {
+ cpu7_right_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu7-right-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu7_right_alert0>;
+ cooling-device = <&CPU7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ cpuss0-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 7>;
+
+ trips {
+ cpuss0-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ cpuss1-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 8>;
+
+ trips {
+ cpuss1-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ cwlan-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 1>;
+
+ trips {
+ cwlan-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ ddr-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 3>;
+
+ trips {
+ ddr-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ gpuss0-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 13>;
+
+ trips {
+ gpuss0_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ gpuss0-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&gpuss0_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ gpuss1-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 14>;
+
+ trips {
+ gpuss1_alert0: trip-point0 {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ gpuss1-crit {
+ temperature = <115000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&gpuss1_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+
+ modem-core0-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 6>;
+
+ trips {
+ modem-core0-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ modem-core1-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 7>;
+
+ trips {
+ modem-core1-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ modem-scl-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 9>;
+
+ trips {
+ modem-scl-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ modem-vec-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 8>;
+
+ trips {
+ modem-vec-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ npu-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 10>;
+
+ trips {
+ npu-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ q6-hvx-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 4>;
+
+ trips {
+ q6-hvx-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+
+ video-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens1 11>;
+
+ trips {
+ video-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+ };
+ };
+
timer {
compatible = "arm,armv8-timer";
clock-frequency = <19200000>;
diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi
index 7ac8bf26dda3..4386f8a9c636 100644
--- a/arch/arm64/boot/dts/qcom/sm6375.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi
@@ -1431,13 +1431,15 @@
assigned-clock-rates = <19200000>, <133333333>;
interrupts-extended = <&intc GIC_SPI 302 IRQ_TYPE_LEVEL_HIGH>,
- <&mpm 12 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 254 IRQ_TYPE_LEVEL_HIGH>,
+ <&mpm 94 IRQ_TYPE_EDGE_BOTH>,
<&mpm 93 IRQ_TYPE_EDGE_BOTH>,
- <&mpm 94 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq",
- "ss_phy_irq",
+ <&mpm 12 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
"dm_hs_phy_irq",
- "dp_hs_phy_irq";
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
diff --git a/arch/arm64/boot/dts/qcom/sm7125-xiaomi-common.dtsi b/arch/arm64/boot/dts/qcom/sm7125-xiaomi-common.dtsi
index e55cd83c19b8..29289fa41b13 100644
--- a/arch/arm64/boot/dts/qcom/sm7125-xiaomi-common.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm7125-xiaomi-common.dtsi
@@ -152,6 +152,9 @@
regulator-min-microvolt = <824000>;
regulator-max-microvolt = <928000>;
regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
};
vreg_l5a_2p7: ldo5 {
@@ -188,6 +191,9 @@
regulator-min-microvolt = <1696000>;
regulator-max-microvolt = <1952000>;
regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
};
vreg_l13a_1p8: ldo13 {
@@ -230,6 +236,9 @@
regulator-min-microvolt = <2696000>;
regulator-max-microvolt = <3304000>;
regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
};
};
@@ -258,6 +267,9 @@
regulator-min-microvolt = <1144000>;
regulator-max-microvolt = <1304000>;
regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
};
vreg_l4c_1p8: ldo4 {
@@ -398,6 +410,20 @@
};
};
+&ufs_mem_hc {
+ vcc-supply = <&vreg_l19a_3p0>;
+ vcc-max-microamp = <600000>;
+ vccq2-supply = <&vreg_l12a_1p8>;
+ vccq2-max-microamp = <600000>;
+ status = "okay";
+};
+
+&ufs_mem_phy {
+ vdda-phy-supply = <&vreg_l4a_0p88>;
+ vdda-pll-supply = <&vreg_l3c_1p23>;
+ status = "okay";
+};
+
&usb_1 {
qcom,select-utmi-as-pipe-clk;
status = "okay";
diff --git a/arch/arm64/boot/dts/qcom/sm7125-xiaomi-curtana.dts b/arch/arm64/boot/dts/qcom/sm7125-xiaomi-curtana.dts
new file mode 100644
index 000000000000..12f517a8492c
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/sm7125-xiaomi-curtana.dts
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023, Joe Mason <buddyjojo06@outlook.com>
+ */
+
+/dts-v1/;
+
+#include "sm7125-xiaomi-common.dtsi"
+
+/ {
+ model = "Xiaomi Redmi Note 9S";
+ compatible = "xiaomi,curtana", "qcom,sm7125";
+
+ /* required for bootloader to select correct board */
+ qcom,board-id = <0x20022 1>;
+};
diff --git a/arch/arm64/boot/dts/qcom/sm7225-fairphone-fp4.dts b/arch/arm64/boot/dts/qcom/sm7225-fairphone-fp4.dts
index ade619805519..bc67e8c1fe4d 100644
--- a/arch/arm64/boot/dts/qcom/sm7225-fairphone-fp4.dts
+++ b/arch/arm64/boot/dts/qcom/sm7225-fairphone-fp4.dts
@@ -68,6 +68,14 @@
};
};
+ /* Dummy regulator until PM6150L has LCDB VSP/VSN support */
+ lcdb_dummy: regulator-lcdb-dummy {
+ compatible = "regulator-fixed";
+ regulator-name = "lcdb_dummy";
+ regulator-min-microvolt = <5500000>;
+ regulator-max-microvolt = <5500000>;
+ };
+
reserved-memory {
/*
* The rmtfs memory region in downstream is 'dynamically allocated'
@@ -116,7 +124,7 @@
};
&adsp {
- firmware-name = "qcom/sm7225/fairphone4/adsp.mdt";
+ firmware-name = "qcom/sm7225/fairphone4/adsp.mbn";
status = "okay";
};
@@ -361,7 +369,7 @@
};
&cdsp {
- firmware-name = "qcom/sm7225/fairphone4/cdsp.mdt";
+ firmware-name = "qcom/sm7225/fairphone4/cdsp.mbn";
status = "okay";
};
@@ -373,6 +381,14 @@
status = "okay";
};
+&gpu {
+ status = "okay";
+};
+
+&gpu_zap_shader {
+ firmware-name = "qcom/sm7225/fairphone4/a615_zap.mbn";
+};
+
&i2c0 {
clock-frequency = <400000>;
status = "okay";
@@ -400,12 +416,49 @@
&ipa {
qcom,gsi-loader = "self";
memory-region = <&pil_ipa_fw_mem>;
- firmware-name = "qcom/sm7225/fairphone4/ipa_fws.mdt";
+ firmware-name = "qcom/sm7225/fairphone4/ipa_fws.mbn";
+ status = "okay";
+};
+
+&mdss {
+ status = "okay";
+};
+
+&mdss_dsi0 {
+ vdda-supply = <&vreg_l22a>;
+ status = "okay";
+
+ panel@0 {
+ compatible = "djn,9a-3r063-1102b";
+ reg = <0>;
+
+ backlight = <&pm6150l_wled>;
+ reset-gpios = <&pm6150l_gpios 9 GPIO_ACTIVE_LOW>;
+
+ vdd1-supply = <&vreg_l1e>;
+ vsn-supply = <&lcdb_dummy>;
+ vsp-supply = <&lcdb_dummy>;
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&mdss_dsi0_out>;
+ };
+ };
+ };
+};
+
+&mdss_dsi0_out {
+ data-lanes = <0 1 2 3>;
+ remote-endpoint = <&panel_in>;
+};
+
+&mdss_dsi0_phy {
+ vdds-supply = <&vreg_l18a>;
status = "okay";
};
&mpss {
- firmware-name = "qcom/sm7225/fairphone4/modem.mdt";
+ firmware-name = "qcom/sm7225/fairphone4/modem.mbn";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi
index 761a6757dc26..a35c0852b5a1 100644
--- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
@@ -967,7 +967,7 @@
#address-cells = <1>;
#size-cells = <1>;
- gpu_speed_bin: gpu_speed_bin@133 {
+ gpu_speed_bin: gpu-speed-bin@133 {
reg = <0x133 0x1>;
bits = <5 3>;
};
@@ -1843,8 +1843,22 @@
ranges = <0x01000000 0x0 0x00000000 0x0 0x60200000 0x0 0x100000>,
<0x02000000 0x0 0x60300000 0x0 0x60300000 0x0 0x3d00000>;
- interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -1858,14 +1872,16 @@
<&gcc GCC_PCIE_0_MSTR_AXI_CLK>,
<&gcc GCC_PCIE_0_SLV_AXI_CLK>,
<&gcc GCC_PCIE_0_SLV_Q2A_AXI_CLK>,
- <&gcc GCC_AGGRE_NOC_PCIE_TBU_CLK>;
+ <&gcc GCC_AGGRE_NOC_PCIE_TBU_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>;
clock-names = "pipe",
"aux",
"cfg",
"bus_master",
"bus_slave",
"slave_q2a",
- "tbu";
+ "tbu",
+ "ref";
iommu-map = <0x0 &apps_smmu 0x1d80 0x1>,
<0x100 &apps_smmu 0x1d81 0x1>;
@@ -1879,7 +1895,7 @@
phy-names = "pciephy";
perst-gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>;
- enable-gpio = <&tlmm 37 GPIO_ACTIVE_HIGH>;
+ wake-gpios = <&tlmm 37 GPIO_ACTIVE_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&pcie0_default_state>;
@@ -1934,8 +1950,22 @@
ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>,
<0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>;
- interrupts = <GIC_SPI 307 IRQ_TYPE_EDGE_RISING>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 309 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 313 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 434 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -1949,14 +1979,16 @@
<&gcc GCC_PCIE_1_MSTR_AXI_CLK>,
<&gcc GCC_PCIE_1_SLV_AXI_CLK>,
<&gcc GCC_PCIE_1_SLV_Q2A_AXI_CLK>,
- <&gcc GCC_AGGRE_NOC_PCIE_TBU_CLK>;
+ <&gcc GCC_AGGRE_NOC_PCIE_TBU_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>;
clock-names = "pipe",
"aux",
"cfg",
"bus_master",
"bus_slave",
"slave_q2a",
- "tbu";
+ "tbu",
+ "ref";
assigned-clocks = <&gcc GCC_PCIE_1_AUX_CLK>;
assigned-clock-rates = <19200000>;
@@ -2063,10 +2095,12 @@
compatible = "qcom,sm8150-qmp-ufs-phy";
reg = <0 0x01d87000 0 0x1000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_MEM_CLKREF_CLK>;
clock-names = "ref",
- "ref_aux";
- clocks = <&gcc GCC_UFS_MEM_CLKREF_CLK>,
- <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
+ "ref_aux",
+ "qref";
power-domains = <&gcc UFS_PHY_GDSC>;
@@ -2198,6 +2232,7 @@
nvmem-cells = <&gpu_speed_bin>;
nvmem-cell-names = "speed_bin";
+ #cooling-cells = <2>;
status = "disabled";
@@ -2428,7 +2463,7 @@
bias-disable;
};
- qup_spi6_default: qup-spi6_default-state {
+ qup_spi6_default: qup-spi6-default-state {
pins = "gpio4", "gpio5", "gpio6", "gpio7";
function = "qup6";
drive-strength = <6>;
@@ -2442,7 +2477,7 @@
bias-disable;
};
- qup_spi7_default: qup-spi7_default-state {
+ qup_spi7_default: qup-spi7-default-state {
pins = "gpio98", "gpio99", "gpio100", "gpio101";
function = "qup7";
drive-strength = <6>;
@@ -3573,12 +3608,16 @@
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 9 IRQ_TYPE_EDGE_BOTH>,
<&pdc 8 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 9 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq",
- "dm_hs_phy_irq", "dp_hs_phy_irq";
+ <&pdc 6 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
@@ -3645,12 +3684,16 @@
<&gcc GCC_USB30_SEC_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 7 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 11 IRQ_TYPE_EDGE_BOTH>,
<&pdc 10 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 11 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq", "ss_phy_irq",
- "dm_hs_phy_irq", "dp_hs_phy_irq";
+ <&pdc 7 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
power-domains = <&gcc USB30_SEC_GDSC>;
@@ -5067,7 +5110,7 @@
hysteresis = <2000>;
type = "hot";
};
- cluster0_crit: cluster0_crit {
+ cluster0_crit: cluster0-crit {
temperature = <110000>;
hysteresis = <2000>;
type = "critical";
@@ -5087,7 +5130,7 @@
hysteresis = <2000>;
type = "hot";
};
- cluster1_crit: cluster1_crit {
+ cluster1_crit: cluster1-crit {
temperature = <110000>;
hysteresis = <2000>;
type = "critical";
@@ -5101,8 +5144,15 @@
thermal-sensors = <&tsens0 15>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_top_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- gpu1_alert0: trip-point0 {
+ gpu_top_alert0: trip-point0 {
temperature = <90000>;
hysteresis = <2000>;
type = "hot";
@@ -5281,8 +5331,15 @@
thermal-sensors = <&tsens1 11>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_bottom_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- gpu2_alert0: trip-point0 {
+ gpu_bottom_alert0: trip-point0 {
temperature = <90000>;
hysteresis = <2000>;
type = "hot";
diff --git a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-common.dtsi b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-common.dtsi
index 946365f15a59..6f54f50a70b0 100644
--- a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-common.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-common.dtsi
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: BSD-3-Clause
/*
- * Copyright (c) 2022, 2023 Jianhua Lu <lujianhua000@gmail.com>
+ * Copyright (c) 2022-2024 Jianhua Lu <lujianhua000@gmail.com>
*/
#include <dt-bindings/arm/qcom,ids.h>
@@ -551,6 +551,7 @@
vddio-supply = <&vreg_l14a_1p88>;
reset-gpios = <&tlmm 75 GPIO_ACTIVE_LOW>;
backlight = <&backlight>;
+ rotation = <90>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi
index 760501c1301a..39bd8f0eba1e 100644
--- a/arch/arm64/boot/dts/qcom/sm8250.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi
@@ -975,7 +975,7 @@
#address-cells = <1>;
#size-cells = <1>;
- gpu_speed_bin: gpu_speed_bin@19b {
+ gpu_speed_bin: gpu-speed-bin@19b {
reg = <0x19b 0x1>;
bits = <5 3>;
};
@@ -2152,8 +2152,14 @@
<GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi0", "msi1", "msi2", "msi3",
- "msi4", "msi5", "msi6", "msi7";
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -2248,8 +2254,22 @@
ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>,
<0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>;
- interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 309 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 313 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 434 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -2349,8 +2369,22 @@
ranges = <0x01000000 0x0 0x00000000 0x0 0x64200000 0x0 0x100000>,
<0x02000000 0x0 0x64300000 0x0 0x64300000 0x0 0x3d00000>;
- interrupts = <GIC_SPI 243 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 243 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 261 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 262 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 263 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 264 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 278 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 288 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 289 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 290 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -2506,10 +2540,12 @@
compatible = "qcom,sm8250-qmp-ufs-phy";
reg = <0 0x01d87000 0 0x1000>;
- clock-names = "ref",
- "ref_aux";
clocks = <&rpmhcc RPMH_CXO_CLK>,
- <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_1X_CLKREF_EN>;
+ clock-names = "ref",
+ "ref_aux",
+ "qref";
resets = <&ufs_mem_hc 0>;
reset-names = "ufsphy";
@@ -2888,6 +2924,7 @@
nvmem-cells = <&gpu_speed_bin>;
nvmem-cell-names = "speed_bin";
+ #cooling-cells = <2>;
status = "disabled";
@@ -4128,14 +4165,16 @@
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 17 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 14 IRQ_TYPE_EDGE_BOTH>,
<&pdc 15 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 14 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq",
- "ss_phy_irq",
+ <&pdc 17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
"dm_hs_phy_irq",
- "dp_hs_phy_irq";
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
wakeup-source;
@@ -4197,14 +4236,16 @@
<&gcc GCC_USB30_SEC_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 16 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 12 IRQ_TYPE_EDGE_BOTH>,
<&pdc 13 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 12 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq",
- "ss_phy_irq",
+ <&pdc 16 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
"dm_hs_phy_irq",
- "dp_hs_phy_irq";
+ "ss_phy_irq";
power-domains = <&gcc USB30_SEC_GDSC>;
wakeup-source;
@@ -6757,7 +6798,7 @@
hysteresis = <2000>;
type = "hot";
};
- cluster0_crit: cluster0_crit {
+ cluster0_crit: cluster0-crit {
temperature = <110000>;
hysteresis = <2000>;
type = "critical";
@@ -6777,7 +6818,7 @@
hysteresis = <2000>;
type = "hot";
};
- cluster1_crit: cluster1_crit {
+ cluster1_crit: cluster1-crit {
temperature = <110000>;
hysteresis = <2000>;
type = "critical";
@@ -6791,8 +6832,15 @@
thermal-sensors = <&tsens0 15>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_top_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- gpu1_alert0: trip-point0 {
+ gpu_top_alert0: trip-point0 {
temperature = <90000>;
hysteresis = <2000>;
type = "hot";
@@ -6926,8 +6974,15 @@
thermal-sensors = <&tsens1 8>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_bottom_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- gpu2_alert0: trip-point0 {
+ gpu_bottom_alert0: trip-point0 {
temperature = <90000>;
hysteresis = <2000>;
type = "hot";
diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi
index e78c83a897c2..a5e7dbbd8c6c 100644
--- a/arch/arm64/boot/dts/qcom/sm8350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi
@@ -1526,8 +1526,14 @@
<GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi0", "msi1", "msi2", "msi3",
- "msi4", "msi5", "msi6", "msi7";
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -1611,8 +1617,22 @@
ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>,
<0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>;
- interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 309 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 313 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 434 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -1726,10 +1746,12 @@
compatible = "qcom,sm8350-qmp-ufs-phy";
reg = <0 0x01d87000 0 0x1000>;
- clock-names = "ref",
- "ref_aux";
clocks = <&rpmhcc RPMH_CXO_CLK>,
- <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&gcc GCC_UFS_1_CLKREF_EN>;
+ clock-names = "ref",
+ "ref_aux",
+ "qref";
resets = <&ufs_mem_hc 0>;
reset-names = "ufsphy";
@@ -1847,6 +1869,7 @@
operating-points-v2 = <&gpu_opp_table>;
qcom,gmu = <&gmu>;
+ #cooling-cells = <2>;
status = "disabled";
@@ -2312,14 +2335,16 @@
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 17 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 14 IRQ_TYPE_EDGE_BOTH>,
<&pdc 15 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 14 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq",
- "ss_phy_irq",
+ <&pdc 17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
"dm_hs_phy_irq",
- "dp_hs_phy_irq";
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
@@ -2385,14 +2410,16 @@
<&gcc GCC_USB30_SEC_MASTER_CLK>;
assigned-clock-rates = <19200000>, <200000000>;
- interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 16 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 12 IRQ_TYPE_EDGE_BOTH>,
<&pdc 13 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 12 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq",
- "ss_phy_irq",
+ <&pdc 16 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
"dm_hs_phy_irq",
- "dp_hs_phy_irq";
+ "ss_phy_irq";
power-domains = <&gcc USB30_SEC_GDSC>;
@@ -4165,7 +4192,7 @@
hysteresis = <2000>;
type = "hot";
};
- cluster0_crit: cluster0_crit {
+ cluster0_crit: cluster0-crit {
temperature = <110000>;
hysteresis = <2000>;
type = "critical";
@@ -4185,7 +4212,7 @@
hysteresis = <2000>;
type = "hot";
};
- cluster1_crit: cluster1_crit {
+ cluster1_crit: cluster1-crit {
temperature = <110000>;
hysteresis = <2000>;
type = "critical";
@@ -4214,8 +4241,15 @@
thermal-sensors = <&tsens1 1>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_top_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- gpu1_alert0: trip-point0 {
+ gpu_top_alert0: trip-point0 {
temperature = <90000>;
hysteresis = <1000>;
type = "hot";
@@ -4229,8 +4263,15 @@
thermal-sensors = <&tsens1 2>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_bottom_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
- gpu2_alert0: trip-point0 {
+ gpu_bottom_alert0: trip-point0 {
temperature = <90000>;
hysteresis = <1000>;
type = "hot";
diff --git a/arch/arm64/boot/dts/qcom/sm8450-hdk.dts b/arch/arm64/boot/dts/qcom/sm8450-hdk.dts
index a20d5d76af35..0786cff07b89 100644
--- a/arch/arm64/boot/dts/qcom/sm8450-hdk.dts
+++ b/arch/arm64/boot/dts/qcom/sm8450-hdk.dts
@@ -938,8 +938,8 @@
"TX DMIC3", "MIC BIAS1",
"TX SWR_INPUT0", "ADC1_OUTPUT",
"TX SWR_INPUT1", "ADC2_OUTPUT",
- "TX SWR_INPUT2", "ADC3_OUTPUT",
- "TX SWR_INPUT3", "ADC4_OUTPUT";
+ "TX SWR_INPUT0", "ADC3_OUTPUT",
+ "TX SWR_INPUT1", "ADC4_OUTPUT";
wcd-playback-dai-link {
link-name = "WCD Playback";
@@ -1147,7 +1147,7 @@
};
&vamacro {
- pinctrl-0 = <&dmic01_default>, <&dmic02_default>;
+ pinctrl-0 = <&dmic01_default>, <&dmic23_default>;
pinctrl-names = "default";
vdd-micb-supply = <&vreg_s10b_1p8>;
qcom,dmic-sample-rate = <600000>;
diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi
index 01e4dfc4babd..b86be34a912b 100644
--- a/arch/arm64/boot/dts/qcom/sm8450.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi
@@ -1028,6 +1028,12 @@
pinctrl-names = "default";
pinctrl-0 = <&qup_uart20_default>;
interrupts = <GIC_SPI 587 IRQ_TYPE_LEVEL_HIGH>;
+ interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS
+ &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
+ &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>;
+ interconnect-names = "qup-core",
+ "qup-config";
status = "disabled";
};
@@ -1420,6 +1426,12 @@
pinctrl-names = "default";
pinctrl-0 = <&qup_uart7_tx>, <&qup_uart7_rx>;
interrupts = <GIC_SPI 608 IRQ_TYPE_LEVEL_HIGH>;
+ interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS
+ &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
+ &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>;
+ interconnect-names = "qup-core",
+ "qup-config";
status = "disabled";
};
};
@@ -1772,8 +1784,22 @@
msi-map = <0x0 &gic_its 0x5981 0x1>,
<0x100 &gic_its 0x5980 0x1>;
msi-map-mask = <0xff00>;
- interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 0 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -1881,8 +1907,22 @@
msi-map = <0x0 &gic_its 0x5a01 0x1>,
<0x100 &gic_its 0x5a00 0x1>;
msi-map-mask = <0xff00>;
- interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 309 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 313 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 0 0 434 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -2038,6 +2078,7 @@
operating-points-v2 = <&gpu_opp_table>;
qcom,gmu = <&gmu>;
+ #cooling-cells = <2>;
status = "disabled";
@@ -3934,7 +3975,7 @@
};
};
- dmic02_default: dmic02-default-state {
+ dmic23_default: dmic23-default-state {
clk-pins {
pins = "gpio8";
function = "dmic2_clk";
@@ -4485,13 +4526,15 @@
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 17 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 14 IRQ_TYPE_EDGE_BOTH>,
<&pdc 15 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 14 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq",
- "ss_phy_irq",
+ <&pdc 17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
"dm_hs_phy_irq",
- "dp_hs_phy_irq";
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
@@ -4890,6 +4933,13 @@
polling-delay = <0>;
thermal-sensors = <&tsens0 14>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_top_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
thermal-engine-config {
temperature = <125000>;
@@ -4909,7 +4959,7 @@
type = "passive";
};
- gpu0_tj_cfg: tj-cfg {
+ gpu_top_alert0: trip-point0 {
temperature = <95000>;
hysteresis = <5000>;
type = "passive";
@@ -4922,6 +4972,13 @@
polling-delay = <0>;
thermal-sensors = <&tsens0 15>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu_bottom_alert0>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
thermal-engine-config {
temperature = <125000>;
@@ -4941,7 +4998,7 @@
type = "passive";
};
- gpu1_tj_cfg: tj-cfg {
+ gpu_bottom_alert0: trip-point0 {
temperature = <95000>;
hysteresis = <5000>;
type = "passive";
diff --git a/arch/arm64/boot/dts/qcom/sm8550-hdk.dts b/arch/arm64/boot/dts/qcom/sm8550-hdk.dts
new file mode 100644
index 000000000000..12d60a0ee095
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/sm8550-hdk.dts
@@ -0,0 +1,1306 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2024 Linaro Limited
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/regulator/qcom,rpmh-regulator.h>
+#include "sm8550.dtsi"
+#include "pm8010.dtsi"
+#include "pm8550.dtsi"
+#include "pm8550b.dtsi"
+#define PMK8550VE_SID 5
+#include "pm8550ve.dtsi"
+#include "pm8550vs.dtsi"
+#include "pmk8550.dtsi"
+#include "pmr735d_a.dtsi"
+
+/ {
+ model = "Qualcomm Technologies, Inc. SM8550 HDK";
+ compatible = "qcom,sm8550-hdk", "qcom,sm8550";
+ chassis-type = "embedded";
+
+ aliases {
+ serial0 = &uart7;
+ serial1 = &uart14;
+ };
+
+ wcd938x: audio-codec {
+ compatible = "qcom,wcd9385-codec";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&wcd_default>;
+
+ qcom,micbias1-microvolt = <1800000>;
+ qcom,micbias2-microvolt = <1800000>;
+ qcom,micbias3-microvolt = <1800000>;
+ qcom,micbias4-microvolt = <1800000>;
+ qcom,mbhc-buttons-vthreshold-microvolt = <75000 150000 237000 500000 500000 500000 500000 500000>;
+ qcom,mbhc-headset-vthreshold-microvolt = <1700000>;
+ qcom,mbhc-headphone-vthreshold-microvolt = <50000>;
+ qcom,rx-device = <&wcd_rx>;
+ qcom,tx-device = <&wcd_tx>;
+
+ reset-gpios = <&tlmm 108 GPIO_ACTIVE_LOW>;
+
+ vdd-buck-supply = <&vreg_l15b_1p8>;
+ vdd-rxtx-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l15b_1p8>;
+ vdd-mic-bias-supply = <&vreg_bob1>;
+
+ #sound-dai-cells = <1>;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ hdmi-out {
+ compatible = "hdmi-connector";
+ type = "a";
+
+ port {
+ hdmi_connector_out: endpoint {
+ remote-endpoint = <&lt9611_out>;
+ };
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ pinctrl-0 = <&volume_up_n>;
+ pinctrl-names = "default";
+
+ key-volume-up {
+ label = "Volume Up";
+ linux,code = <KEY_VOLUMEUP>;
+ gpios = <&pm8550_gpios 6 GPIO_ACTIVE_LOW>;
+ debounce-interval = <15>;
+ linux,can-disable;
+ wakeup-source;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-0 {
+ function = LED_FUNCTION_BLUETOOTH;
+ color = <LED_COLOR_ID_BLUE>;
+ gpios = <&tlmm 159 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "bluetooth-power";
+ default-state = "off";
+ };
+
+ led-1 {
+ function = LED_FUNCTION_INDICATOR;
+ color = <LED_COLOR_ID_GREEN>;
+ gpios = <&tlmm 160 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ panic-indicator;
+ };
+
+ led-2 {
+ function = LED_FUNCTION_WLAN;
+ color = <LED_COLOR_ID_ORANGE>;
+ gpios = <&tlmm 162 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "phy0tx";
+ default-state = "off";
+ };
+ };
+
+ pmic-glink {
+ compatible = "qcom,sm8550-pmic-glink", "qcom,pmic-glink";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ orientation-gpios = <&tlmm 11 GPIO_ACTIVE_HIGH>;
+
+ connector@0 {
+ compatible = "usb-c-connector";
+ reg = <0>;
+ power-role = "dual";
+ data-role = "dual";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ pmic_glink_hs_in: endpoint {
+ remote-endpoint = <&usb_1_dwc3_hs>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ pmic_glink_ss_in: endpoint {
+ remote-endpoint = <&usb_dp_qmpphy_out>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ pmic_glink_sbu: endpoint {
+ remote-endpoint = <&fsa4480_sbu_mux>;
+ };
+ };
+ };
+ };
+ };
+
+ lt9611_1v2: regulator-lt9611-1v2 {
+ compatible = "regulator-fixed";
+
+ regulator-name = "LT9611_1V2";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+
+ vin-supply = <&vph_pwr>;
+ gpio = <&tlmm 152 GPIO_ACTIVE_HIGH>;
+
+ enable-active-high;
+ };
+
+ lt9611_3v3: regulator-lt9611-3v3 {
+ compatible = "regulator-fixed";
+
+ regulator-name = "LT9611_3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+ vin-supply = <&vreg_bob_3v3>;
+ gpio = <&tlmm 6 GPIO_ACTIVE_HIGH>;
+
+ enable-active-high;
+ };
+
+ vph_pwr: regulator-vph-pwr {
+ compatible = "regulator-fixed";
+
+ regulator-name = "vph_pwr";
+ regulator-min-microvolt = <3700000>;
+ regulator-max-microvolt = <3700000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ vreg_bob_3v3: regulator-vreg-bob-3v3 {
+ compatible = "regulator-fixed";
+
+ regulator-name = "VREG_BOB_3P3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+ vin-supply = <&vph_pwr>;
+ };
+
+ sound {
+ compatible = "qcom,sm8550-sndcard", "qcom,sm8450-sndcard";
+ model = "SM8550-HDK";
+ audio-routing = "SpkrLeft IN", "WSA_SPK1 OUT",
+ "SpkrRight IN", "WSA_SPK2 OUT",
+ "IN1_HPHL", "HPHL_OUT",
+ "IN2_HPHR", "HPHR_OUT",
+ "AMIC1", "MIC BIAS1",
+ "AMIC2", "MIC BIAS2",
+ "AMIC5", "MIC BIAS4",
+ "TX SWR_INPUT0", "ADC1_OUTPUT",
+ "TX SWR_INPUT1", "ADC2_OUTPUT",
+ "TX SWR_INPUT1", "ADC4_OUTPUT";
+
+ wcd-playback-dai-link {
+ link-name = "WCD Playback";
+
+ cpu {
+ sound-dai = <&q6apmbedai RX_CODEC_DMA_RX_0>;
+ };
+
+ codec {
+ sound-dai = <&wcd938x 0>, <&swr1 0>, <&lpass_rxmacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+
+ wcd-capture-dai-link {
+ link-name = "WCD Capture";
+
+ cpu {
+ sound-dai = <&q6apmbedai TX_CODEC_DMA_TX_3>;
+ };
+
+ codec {
+ sound-dai = <&wcd938x 1>, <&swr2 0>, <&lpass_txmacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+
+ wsa-dai-link {
+ link-name = "WSA Playback";
+
+ cpu {
+ sound-dai = <&q6apmbedai WSA_CODEC_DMA_RX_0>;
+ };
+
+ codec {
+ sound-dai = <&north_spkr>, <&south_spkr>, <&swr0 0>, <&lpass_wsamacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+
+ va-dai-link {
+ link-name = "VA Capture";
+
+ cpu {
+ sound-dai = <&q6apmbedai TX_CODEC_DMA_TX_3>;
+ };
+
+ codec {
+ sound-dai = <&lpass_vamacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+ };
+};
+
+&apps_rsc {
+ regulators-0 {
+ compatible = "qcom,pm8550-rpmh-regulators";
+
+ vdd-bob1-supply = <&vph_pwr>;
+ vdd-bob2-supply = <&vph_pwr>;
+ vdd-l1-l4-l10-supply = <&vreg_s6g_1p86>;
+ vdd-l2-l13-l14-supply = <&vreg_bob1>;
+ vdd-l3-supply = <&vreg_s4g_1p25>;
+ vdd-l5-l16-supply = <&vreg_bob1>;
+ vdd-l6-l7-supply = <&vreg_bob1>;
+ vdd-l8-l9-supply = <&vreg_bob1>;
+ vdd-l11-supply = <&vreg_s4g_1p25>;
+ vdd-l12-supply = <&vreg_s6g_1p86>;
+ vdd-l15-supply = <&vreg_s6g_1p86>;
+ vdd-l17-supply = <&vreg_bob2>;
+
+ qcom,pmic-id = "b";
+
+ vreg_bob1: bob1 {
+ regulator-name = "vreg_bob1";
+ regulator-min-microvolt = <3296000>;
+ regulator-max-microvolt = <3960000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_bob2: bob2 {
+ regulator-name = "vreg_bob2";
+ regulator-min-microvolt = <2720000>;
+ regulator-max-microvolt = <3960000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l1b_1p8: ldo1 {
+ regulator-name = "vreg_l1b_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l2b_3p0: ldo2 {
+ regulator-name = "vreg_l2b_3p0";
+ regulator-min-microvolt = <3008000>;
+ regulator-max-microvolt = <3008000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l5b_3p1: ldo5 {
+ regulator-name = "vreg_l5b_3p1";
+ regulator-min-microvolt = <3104000>;
+ regulator-max-microvolt = <3104000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l6b_1p8: ldo6 {
+ regulator-name = "vreg_l6b_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3008000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7b_1p8: ldo7 {
+ regulator-name = "vreg_l7b_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3008000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l8b_1p8: ldo8 {
+ regulator-name = "vreg_l8b_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3008000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l9b_2p9: ldo9 {
+ regulator-name = "vreg_l9b_2p9";
+ regulator-min-microvolt = <2960000>;
+ regulator-max-microvolt = <3008000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l11b_1p2: ldo11 {
+ regulator-name = "vreg_l11b_1p2";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1504000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l12b_1p8: ldo12 {
+ regulator-name = "vreg_l12b_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l13b_3p0: ldo13 {
+ regulator-name = "vreg_l13b_3p0";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l14b_3p2: ldo14 {
+ regulator-name = "vreg_l14b_3p2";
+ regulator-min-microvolt = <3200000>;
+ regulator-max-microvolt = <3200000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l15b_1p8: ldo15 {
+ regulator-name = "vreg_l15b_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l16b_2p8: ldo16 {
+ regulator-name = "vreg_l16b_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l17b_2p5: ldo17 {
+ regulator-name = "vreg_l17b_2p5";
+ regulator-min-microvolt = <2504000>;
+ regulator-max-microvolt = <2504000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
+
+ regulators-1 {
+ compatible = "qcom,pm8550vs-rpmh-regulators";
+
+ vdd-l1-supply = <&vreg_s4g_1p25>;
+ vdd-l2-supply = <&vreg_s4e_0p95>;
+ vdd-l3-supply = <&vreg_s4e_0p95>;
+
+ qcom,pmic-id = "c";
+
+ vreg_l3c_0p9: ldo3 {
+ regulator-name = "vreg_l3c_0p9";
+ regulator-min-microvolt = <880000>;
+ regulator-max-microvolt = <912000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
+
+ regulators-2 {
+ compatible = "qcom,pm8550vs-rpmh-regulators";
+
+ vdd-l1-supply = <&vreg_s4e_0p95>;
+ vdd-l2-supply = <&vreg_s4e_0p95>;
+ vdd-l3-supply = <&vreg_s4e_0p95>;
+
+ qcom,pmic-id = "d";
+
+ vreg_l1d_0p88: ldo1 {
+ regulator-name = "vreg_l1d_0p88";
+ regulator-min-microvolt = <880000>;
+ regulator-max-microvolt = <920000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ /* ldo2 supplies SM8550 VDD_LPI_MX */
+ };
+
+ regulators-3 {
+ compatible = "qcom,pm8550vs-rpmh-regulators";
+
+ vdd-l1-supply = <&vreg_s4e_0p95>;
+ vdd-l2-supply = <&vreg_s4e_0p95>;
+ vdd-l3-supply = <&vreg_s4g_1p25>;
+ vdd-s4-supply = <&vph_pwr>;
+ vdd-s5-supply = <&vph_pwr>;
+
+ qcom,pmic-id = "e";
+
+ vreg_s4e_0p95: smps4 {
+ regulator-name = "vreg_s4e_0p95";
+ regulator-min-microvolt = <904000>;
+ regulator-max-microvolt = <984000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_s5e_1p08: smps5 {
+ regulator-name = "vreg_s5e_1p08";
+ regulator-min-microvolt = <1080000>;
+ regulator-max-microvolt = <1120000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l1e_0p88: ldo1 {
+ regulator-name = "vreg_l1e_0p88";
+ regulator-min-microvolt = <880000>;
+ regulator-max-microvolt = <880000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l2e_0p9: ldo2 {
+ regulator-name = "vreg_l2e_0p9";
+ regulator-min-microvolt = <904000>;
+ regulator-max-microvolt = <970000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l3e_1p2: ldo3 {
+ regulator-name = "vreg_l3e_1p2";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
+
+ regulators-4 {
+ compatible = "qcom,pm8550ve-rpmh-regulators";
+
+ vdd-l1-supply = <&vreg_s4e_0p95>;
+ vdd-l2-supply = <&vreg_s4e_0p95>;
+ vdd-l3-supply = <&vreg_s4e_0p95>;
+ vdd-s4-supply = <&vph_pwr>;
+
+ qcom,pmic-id = "f";
+
+ vreg_s4f_0p5: smps4 {
+ regulator-name = "vreg_s4f_0p5";
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <700000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l1f_0p9: ldo1 {
+ regulator-name = "vreg_l1f_0p9";
+ regulator-min-microvolt = <912000>;
+ regulator-max-microvolt = <912000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l2f_0p88: ldo2 {
+ regulator-name = "vreg_l2f_0p88";
+ regulator-min-microvolt = <880000>;
+ regulator-max-microvolt = <912000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l3f_0p88: ldo3 {
+ regulator-name = "vreg_l3f_0p88";
+ regulator-min-microvolt = <880000>;
+ regulator-max-microvolt = <912000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
+
+ regulators-5 {
+ compatible = "qcom,pm8550vs-rpmh-regulators";
+
+ vdd-l1-supply = <&vreg_s4g_1p25>;
+ vdd-l2-supply = <&vreg_s4g_1p25>;
+ vdd-l3-supply = <&vreg_s4g_1p25>;
+ vdd-s1-supply = <&vph_pwr>;
+ vdd-s2-supply = <&vph_pwr>;
+ vdd-s3-supply = <&vph_pwr>;
+ vdd-s4-supply = <&vph_pwr>;
+ vdd-s5-supply = <&vph_pwr>;
+ vdd-s6-supply = <&vph_pwr>;
+
+ qcom,pmic-id = "g";
+
+ vreg_s1g_1p25: smps1 {
+ regulator-name = "vreg_s1g_1p25";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1300000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_s2g_0p85: smps2 {
+ regulator-name = "vreg_s2g_0p85";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_s3g_0p8: smps3 {
+ regulator-name = "vreg_s3g_0p8";
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1004000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_s4g_1p25: smps4 {
+ regulator-name = "vreg_s4g_1p25";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1352000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_s5g_0p85: smps5 {
+ regulator-name = "vreg_s5g_0p85";
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <1004000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_s6g_1p86: smps6 {
+ regulator-name = "vreg_s6g_1p86";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <2000000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l1g_1p2: ldo1 {
+ regulator-name = "vreg_l1g_1p2";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l3g_1p2: ldo3 {
+ regulator-name = "vreg_l3g_1p2";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
+
+ regulators-6 {
+ compatible = "qcom,pm8010-rpmh-regulators";
+
+ vdd-l1-l2-supply = <&vreg_s4g_1p25>;
+ vdd-l3-l4-supply = <&vreg_bob2>;
+ vdd-l5-supply = <&vreg_s6g_1p86>;
+ vdd-l6-supply = <&vreg_s6g_1p86>;
+ vdd-l7-supply = <&vreg_bob1>;
+
+ qcom,pmic-id = "m";
+
+ vreg_l1m_1p056: ldo1 {
+ regulator-name = "vreg_l1m_1p056";
+ regulator-min-microvolt = <1056000>;
+ regulator-max-microvolt = <1056000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l2m_1p056: ldo2 {
+ regulator-name = "vreg_l2m_1p056";
+ regulator-min-microvolt = <1056000>;
+ regulator-max-microvolt = <1056000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l3m_2p8: ldo3 {
+ regulator-name = "vreg_l3m_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l4m_2p8: ldo4 {
+ regulator-name = "vreg_l4m_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l5m_1p8: ldo5 {
+ regulator-name = "vreg_l5m_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l6m_1p8: ldo6 {
+ regulator-name = "vreg_l6m_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7m_2p9: ldo7 {
+ regulator-name = "vreg_l7m_2p9";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2904000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
+
+ regulators-7 {
+ compatible = "qcom,pm8010-rpmh-regulators";
+
+ vdd-l1-l2-supply = <&vreg_s4g_1p25>;
+ vdd-l3-l4-supply = <&vreg_bob2>;
+ vdd-l5-supply = <&vreg_s6g_1p86>;
+ vdd-l6-supply = <&vreg_bob1>;
+ vdd-l7-supply = <&vreg_bob1>;
+
+ qcom,pmic-id = "n";
+
+ vreg_l1n_1p1: ldo1 {
+ regulator-name = "vreg_l1n_1p1";
+ regulator-min-microvolt = <1104000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l2n_1p1: ldo2 {
+ regulator-name = "vreg_l2n_1p1";
+ regulator-min-microvolt = <1104000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l3n_2p8: ldo3 {
+ regulator-name = "vreg_l3n_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l4n_2p8: ldo4 {
+ regulator-name = "vreg_l4n_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l5n_1p8: ldo5 {
+ regulator-name = "vreg_l5n_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l6n_3p3: ldo6 {
+ regulator-name = "vreg_l6n_3p3";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <3304000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7n_2p96: ldo7 {
+ regulator-name = "vreg_l7n_2p96";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2960000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
+};
+
+&i2c0 {
+ clock-frequency = <400000>;
+ status = "okay";
+
+ lt9611_codec: hdmi-bridge@2b {
+ compatible = "lontium,lt9611uxc";
+ reg = <0x2b>;
+
+ interrupts-extended = <&tlmm 8 IRQ_TYPE_EDGE_FALLING>;
+
+ reset-gpios = <&tlmm 7 GPIO_ACTIVE_HIGH>;
+
+ vdd-supply = <&lt9611_1v2>;
+ vcc-supply = <&lt9611_3v3>;
+
+ pinctrl-0 = <&lt9611_irq_pin>, <&lt9611_rst_pin>;
+ pinctrl-names = "default";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ lt9611_a: endpoint {
+ remote-endpoint = <&mdss_dsi0_out>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ lt9611_out: endpoint {
+ remote-endpoint = <&hdmi_connector_out>;
+ };
+ };
+ };
+ };
+};
+
+&i2c_hub_2 {
+ status = "okay";
+
+ typec-mux@42 {
+ compatible = "fcs,fsa4480";
+ reg = <0x42>;
+
+ vcc-supply = <&vreg_bob1>;
+
+ mode-switch;
+ orientation-switch;
+
+ port {
+ fsa4480_sbu_mux: endpoint {
+ remote-endpoint = <&pmic_glink_sbu>;
+ };
+ };
+ };
+};
+
+&i2c_master_hub_0 {
+ status = "okay";
+};
+
+&ipa {
+ qcom,gsi-loader = "self";
+ memory-region = <&ipa_fw_mem>;
+ firmware-name = "qcom/sm8550/ipa_fws.mbn";
+ status = "okay";
+};
+
+&gpi_dma1 {
+ status = "okay";
+};
+
+&gpu {
+ status = "okay";
+
+ zap-shader {
+ firmware-name = "qcom/sm8550/a740_zap.mbn";
+ };
+};
+
+&lpass_tlmm {
+ spkr_1_sd_n_active: spkr-1-sd-n-active-state {
+ pins = "gpio17";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ output-low;
+ };
+
+ spkr_2_sd_n_active: spkr-2-sd-n-active-state {
+ pins = "gpio18";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ output-low;
+ };
+};
+
+&mdss {
+ status = "okay";
+};
+
+&mdss_dsi0 {
+ vdda-supply = <&vreg_l3e_1p2>;
+ status = "okay";
+};
+
+&mdss_dsi0_out {
+ remote-endpoint = <&lt9611_a>;
+ data-lanes = <0 1 2 3>;
+};
+
+&mdss_dsi0_phy {
+ vdds-supply = <&vreg_l1e_0p88>;
+ status = "okay";
+};
+
+&mdss_dp0 {
+ status = "okay";
+};
+
+&mdss_dp0_out {
+ remote-endpoint = <&usb_dp_qmpphy_dp_in>;
+ data-lanes = <0 1>;
+};
+
+&pcie0 {
+ wake-gpios = <&tlmm 96 GPIO_ACTIVE_HIGH>;
+ perst-gpios = <&tlmm 94 GPIO_ACTIVE_LOW>;
+
+ pinctrl-0 = <&pcie0_default_state>;
+ pinctrl-names = "default";
+
+ status = "okay";
+};
+
+&pcie0_phy {
+ vdda-phy-supply = <&vreg_l1e_0p88>;
+ vdda-pll-supply = <&vreg_l3e_1p2>;
+
+ status = "okay";
+};
+
+&pcie1 {
+ wake-gpios = <&tlmm 99 GPIO_ACTIVE_HIGH>;
+ perst-gpios = <&tlmm 97 GPIO_ACTIVE_LOW>;
+
+ pinctrl-0 = <&pcie1_default_state>;
+ pinctrl-names = "default";
+
+ status = "okay";
+};
+
+&pcie1_phy {
+ vdda-phy-supply = <&vreg_l3c_0p9>;
+ vdda-pll-supply = <&vreg_l3e_1p2>;
+ vdda-qref-supply = <&vreg_l1e_0p88>;
+
+ status = "okay";
+};
+
+&pcie_1_phy_aux_clk {
+ clock-frequency = <1000>;
+};
+
+&pm8550_gpios {
+ sdc2_card_det_n: sdc2-card-det-state {
+ pins = "gpio12";
+ function = "normal";
+ input-enable;
+ output-disable;
+ bias-pull-up;
+ power-source = <1>; /* 1.8 V */
+ };
+
+ volume_up_n: volume-up-n-state {
+ pins = "gpio6";
+ function = "normal";
+ power-source = <1>;
+ bias-pull-up;
+ input-enable;
+ };
+};
+
+/* The RGB signals are routed to 3 separate LEDs on the HDK8550 */
+&pm8550_pwm {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ status = "okay";
+
+ led@1 {
+ reg = <1>;
+ function = LED_FUNCTION_STATUS;
+ color = <LED_COLOR_ID_RED>;
+ default-state = "off";
+ };
+
+ led@2 {
+ reg = <2>;
+ function = LED_FUNCTION_STATUS;
+ color = <LED_COLOR_ID_GREEN>;
+ default-state = "off";
+ };
+
+ led@3 {
+ reg = <3>;
+ function = LED_FUNCTION_STATUS;
+ color = <LED_COLOR_ID_BLUE>;
+ default-state = "off";
+ };
+};
+
+&pm8550b_eusb2_repeater {
+ vdd18-supply = <&vreg_l15b_1p8>;
+ vdd3-supply = <&vreg_l5b_3p1>;
+};
+
+&pon_pwrkey {
+ status = "okay";
+};
+
+&pon_resin {
+ linux,code = <KEY_VOLUMEDOWN>;
+
+ status = "okay";
+};
+
+&qupv3_id_0 {
+ status = "okay";
+};
+
+&qupv3_id_1 {
+ status = "okay";
+};
+
+&remoteproc_adsp {
+ firmware-name = "qcom/sm8550/adsp.mbn",
+ "qcom/sm8550/adsp_dtb.mbn";
+ status = "okay";
+};
+
+&remoteproc_cdsp {
+ firmware-name = "qcom/sm8550/cdsp.mbn",
+ "qcom/sm8550/cdsp_dtb.mbn";
+ status = "okay";
+};
+
+&remoteproc_mpss {
+ firmware-name = "qcom/sm8550/modem.mbn",
+ "qcom/sm8550/modem_dtb.mbn";
+ status = "okay";
+};
+
+&sdhc_2 {
+ cd-gpios = <&pm8550_gpios 12 GPIO_ACTIVE_HIGH>;
+
+ pinctrl-0 = <&sdc2_default>, <&sdc2_card_det_n>;
+ pinctrl-1 = <&sdc2_sleep>, <&sdc2_card_det_n>;
+ pinctrl-names = "default", "sleep";
+
+ vmmc-supply = <&vreg_l9b_2p9>;
+ vqmmc-supply = <&vreg_l8b_1p8>;
+
+ bus-width = <4>;
+ no-sdio;
+ no-mmc;
+
+ status = "okay";
+};
+
+&sleep_clk {
+ clock-frequency = <32000>;
+};
+
+&swr0 {
+ status = "okay";
+
+ /* WSA8845, Speaker North */
+ north_spkr: speaker@0,0 {
+ compatible = "sdw20217020400";
+ reg = <0 0>;
+
+ pinctrl-0 = <&spkr_1_sd_n_active>;
+ pinctrl-names = "default";
+
+ powerdown-gpios = <&lpass_tlmm 17 GPIO_ACTIVE_LOW>;
+
+ vdd-1p8-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l15b_1p8>;
+
+ #sound-dai-cells = <0>;
+ sound-name-prefix = "SpkrLeft";
+ };
+
+ /* WSA8845, Speaker South */
+ south_spkr: speaker@0,1 {
+ compatible = "sdw20217020400";
+ reg = <0 1>;
+
+ pinctrl-0 = <&spkr_2_sd_n_active>;
+ pinctrl-names = "default";
+
+ powerdown-gpios = <&lpass_tlmm 18 GPIO_ACTIVE_LOW>;
+
+ vdd-1p8-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l15b_1p8>;
+
+ #sound-dai-cells = <0>;
+ sound-name-prefix = "SpkrRight";
+ };
+};
+
+&swr1 {
+ status = "okay";
+
+ /* WCD9385 RX */
+ wcd_rx: codec@0,4 {
+ compatible = "sdw20217010d00";
+ reg = <0 4>;
+
+ /*
+ * WCD9385 RX Port 1 (HPH_L/R) <=> SWR1 Port 1 (HPH_L/R)
+ * WCD9385 RX Port 2 (CLSH) <=> SWR1 Port 2 (CLSH)
+ * WCD9385 RX Port 3 (COMP_L/R) <=> SWR1 Port 3 (COMP_L/R)
+ * WCD9385 RX Port 4 (LO) <=> SWR1 Port 4 (LO)
+ * WCD9385 RX Port 5 (DSD_L/R) <=> SWR1 Port 5 (DSD_L/R)
+ */
+ qcom,rx-port-mapping = <1 2 3 4 5>;
+ };
+};
+
+&swr2 {
+ status = "okay";
+
+ /* WCD9385 TX */
+ wcd_tx: codec@0,3 {
+ compatible = "sdw20217010d00";
+ reg = <0 3>;
+
+ /*
+ * WCD9385 TX Port 1 (ADC1,2) <=> SWR2 Port 2 (TX SWR_INPUT 0,1,2,3)
+ * WCD9385 TX Port 2 (ADC3,4) <=> SWR2 Port 2 (TX SWR_INPUT 0,1,2,3)
+ * WCD9385 TX Port 3 (DMIC0,1,2,3 & MBHC) <=> SWR2 Port 3 (TX SWR_INPUT 4,5,6,7)
+ * WCD9385 TX Port 4 (DMIC4,5,6,7) <=> SWR2 Port 4 (TX SWR_INPUT 8,9,10,11)
+ */
+ qcom,tx-port-mapping = <2 2 3 4>;
+ };
+};
+
+&tlmm {
+ /* Reserved I/Os for NFC */
+ gpio-reserved-ranges = <32 8>;
+
+ bt_default: bt-default-state {
+ bt-en-pins {
+ pins = "gpio81";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ };
+
+ sw-ctrl-pins {
+ pins = "gpio82";
+ function = "gpio";
+ bias-pull-down;
+ };
+ };
+
+ lt9611_irq_pin: lt9611-irq-state {
+ pins = "gpio8";
+ function = "gpio";
+ bias-disable;
+ };
+
+ lt9611_rst_pin: lt9611-rst-state {
+ pins = "gpio7";
+ function = "gpio";
+ output-high;
+ };
+
+ wcd_default: wcd-reset-n-active-state {
+ pins = "gpio108";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ output-low;
+ };
+};
+
+&uart7 {
+ status = "okay";
+};
+
+&uart14 {
+ status = "okay";
+
+ bluetooth {
+ compatible = "qcom,wcn7850-bt";
+
+ vddio-supply = <&vreg_l15b_1p8>;
+ vddaon-supply = <&vreg_s4e_0p95>;
+ vdddig-supply = <&vreg_s4e_0p95>;
+ vddrfa0p8-supply = <&vreg_s4e_0p95>;
+ vddrfa1p2-supply = <&vreg_s4g_1p25>;
+ vddrfa1p9-supply = <&vreg_s6g_1p86>;
+
+ max-speed = <3200000>;
+
+ enable-gpios = <&tlmm 81 GPIO_ACTIVE_HIGH>;
+ swctrl-gpios = <&tlmm 82 GPIO_ACTIVE_HIGH>;
+
+ pinctrl-0 = <&bt_default>;
+ pinctrl-names = "default";
+ };
+};
+
+&ufs_mem_hc {
+ reset-gpios = <&tlmm 210 GPIO_ACTIVE_LOW>;
+
+ vcc-supply = <&vreg_l17b_2p5>;
+ vcc-max-microamp = <1300000>;
+ vccq-supply = <&vreg_l1g_1p2>;
+ vccq-max-microamp = <1200000>;
+ vdd-hba-supply = <&vreg_l3g_1p2>;
+
+ status = "okay";
+};
+
+&ufs_mem_phy {
+ vdda-phy-supply = <&vreg_l1d_0p88>;
+ vdda-pll-supply = <&vreg_l3e_1p2>;
+
+ status = "okay";
+};
+
+&usb_1 {
+ status = "okay";
+};
+
+&usb_1_dwc3 {
+ dr_mode = "otg";
+ usb-role-switch;
+};
+
+&usb_1_dwc3_hs {
+ remote-endpoint = <&pmic_glink_hs_in>;
+};
+
+&usb_1_dwc3_ss {
+ remote-endpoint = <&usb_dp_qmpphy_usb_ss_in>;
+};
+
+&usb_1_hsphy {
+ vdd-supply = <&vreg_l1e_0p88>;
+ vdda12-supply = <&vreg_l3e_1p2>;
+
+ phys = <&pm8550b_eusb2_repeater>;
+
+ status = "okay";
+};
+
+&usb_dp_qmpphy {
+ vdda-phy-supply = <&vreg_l3e_1p2>;
+ vdda-pll-supply = <&vreg_l3f_0p88>;
+
+ orientation-switch;
+
+ status = "okay";
+};
+
+&usb_dp_qmpphy_dp_in {
+ remote-endpoint = <&mdss_dp0_out>;
+};
+
+&usb_dp_qmpphy_out {
+ remote-endpoint = <&pmic_glink_ss_in>;
+};
+
+&usb_dp_qmpphy_usb_ss_in {
+ remote-endpoint = <&usb_1_dwc3_ss>;
+};
+
+&xo_board {
+ clock-frequency = <76800000>;
+};
diff --git a/arch/arm64/boot/dts/qcom/sm8550-mtp.dts b/arch/arm64/boot/dts/qcom/sm8550-mtp.dts
index c1135ad5fa69..3d4ad5aac70f 100644
--- a/arch/arm64/boot/dts/qcom/sm8550-mtp.dts
+++ b/arch/arm64/boot/dts/qcom/sm8550-mtp.dts
@@ -106,14 +106,21 @@
"SpkrRight IN", "WSA_SPK2 OUT",
"IN1_HPHL", "HPHL_OUT",
"IN2_HPHR", "HPHR_OUT",
+ "AMIC1", "MIC BIAS1",
"AMIC2", "MIC BIAS2",
+ "AMIC3", "MIC BIAS3",
+ "AMIC4", "MIC BIAS3",
+ "AMIC5", "MIC BIAS4",
"VA DMIC0", "MIC BIAS1",
"VA DMIC1", "MIC BIAS1",
"VA DMIC2", "MIC BIAS3",
"TX DMIC0", "MIC BIAS1",
"TX DMIC1", "MIC BIAS2",
"TX DMIC2", "MIC BIAS3",
- "TX SWR_ADC1", "ADC2_OUTPUT";
+ "TX SWR_INPUT0", "ADC1_OUTPUT",
+ "TX SWR_INPUT1", "ADC2_OUTPUT",
+ "TX SWR_INPUT0", "ADC3_OUTPUT",
+ "TX SWR_INPUT1", "ADC4_OUTPUT";
wcd-playback-dai-link {
link-name = "WCD Playback";
@@ -874,7 +881,7 @@
wcd_tx: codec@0,3 {
compatible = "sdw20217010d00";
reg = <0 3>;
- qcom,tx-port-mapping = <1 1 2 3>;
+ qcom,tx-port-mapping = <2 2 3 4>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm8550-qrd.dts b/arch/arm64/boot/dts/qcom/sm8550-qrd.dts
index d401d63e5c4d..92f015017418 100644
--- a/arch/arm64/boot/dts/qcom/sm8550-qrd.dts
+++ b/arch/arm64/boot/dts/qcom/sm8550-qrd.dts
@@ -124,14 +124,21 @@
"SpkrRight IN", "WSA_SPK2 OUT",
"IN1_HPHL", "HPHL_OUT",
"IN2_HPHR", "HPHR_OUT",
+ "AMIC1", "MIC BIAS1",
"AMIC2", "MIC BIAS2",
+ "AMIC3", "MIC BIAS3",
+ "AMIC4", "MIC BIAS3",
+ "AMIC5", "MIC BIAS4",
"VA DMIC0", "MIC BIAS1",
"VA DMIC1", "MIC BIAS1",
"VA DMIC2", "MIC BIAS3",
"TX DMIC0", "MIC BIAS1",
"TX DMIC1", "MIC BIAS2",
"TX DMIC2", "MIC BIAS3",
- "TX SWR_ADC1", "ADC2_OUTPUT";
+ "TX SWR_INPUT0", "ADC1_OUTPUT",
+ "TX SWR_INPUT1", "ADC2_OUTPUT",
+ "TX SWR_INPUT0", "ADC3_OUTPUT",
+ "TX SWR_INPUT1", "ADC4_OUTPUT";
wcd-playback-dai-link {
link-name = "WCD Playback";
@@ -724,6 +731,10 @@
<&usb_dp_qmpphy QMP_USB43DP_USB3_PIPE_CLK>;
};
+&gpi_dma1 {
+ status = "okay";
+};
+
&gpu {
status = "okay";
@@ -960,6 +971,30 @@
};
};
+&spi4 {
+ status = "okay";
+
+ touchscreen@0 {
+ compatible = "goodix,gt9916";
+ reg = <0>;
+
+ interrupt-parent = <&tlmm>;
+ interrupts = <25 IRQ_TYPE_LEVEL_LOW>;
+
+ reset-gpios = <&tlmm 24 GPIO_ACTIVE_LOW>;
+
+ avdd-supply = <&vreg_l14b_3p2>;
+
+ spi-max-frequency = <1000000>;
+
+ touchscreen-size-x = <1080>;
+ touchscreen-size-y = <2400>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&ts_irq>, <&ts_reset>;
+ };
+};
+
&swr1 {
status = "okay";
@@ -978,7 +1013,7 @@
wcd_tx: codec@0,3 {
compatible = "sdw20217010d00";
reg = <0 3>;
- qcom,tx-port-mapping = <1 1 2 3>;
+ qcom,tx-port-mapping = <2 2 3 4>;
};
};
@@ -1028,6 +1063,20 @@
bias-pull-down;
};
+ ts_irq: ts-irq-state {
+ pins = "gpio25";
+ function = "gpio";
+ drive-strength = <8>;
+ bias-pull-up;
+ };
+
+ ts_reset: ts-reset-state {
+ pins = "gpio24";
+ function = "gpio";
+ drive-strength = <8>;
+ bias-pull-up;
+ };
+
wcd_default: wcd-reset-n-active-state {
pins = "gpio108";
function = "gpio";
diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi
index ee1ba5a8c8fc..3904348075f6 100644
--- a/arch/arm64/boot/dts/qcom/sm8550.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi
@@ -1713,9 +1713,22 @@
linux,pci-domain = <0>;
num-lanes = <2>;
- interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
-
+ interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 0 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -1742,6 +1755,9 @@
<&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_0 0>;
interconnect-names = "pcie-mem", "cpu-pcie";
+ /* Entries are reversed due to the unusual ITS DeviceID encoding */
+ msi-map = <0x0 &gic_its 0x1401 0x1>,
+ <0x100 &gic_its 0x1400 0x1>;
iommu-map = <0x0 &apps_smmu 0x1400 0x1>,
<0x100 &apps_smmu 0x1401 0x1>;
@@ -1804,9 +1820,22 @@
linux,pci-domain = <1>;
num-lanes = <2>;
- interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
-
+ interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 309 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 313 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 0 0 434 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
@@ -1838,6 +1867,9 @@
<&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_1 0>;
interconnect-names = "pcie-mem", "cpu-pcie";
+ /* Entries are reversed due to the unusual ITS DeviceID encoding */
+ msi-map = <0x0 &gic_its 0x1481 0x1>,
+ <0x100 &gic_its 0x1480 0x1>;
iommu-map = <0x0 &apps_smmu 0x1480 0x1>,
<0x100 &apps_smmu 0x1481 0x1>;
@@ -1907,9 +1939,12 @@
ufs_mem_phy: phy@1d80000 {
compatible = "qcom,sm8550-qmp-ufs-phy";
reg = <0x0 0x01d80000 0x0 0x2000>;
- clocks = <&tcsr TCSR_UFS_CLKREF_EN>,
- <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
- clock-names = "ref", "ref_aux";
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&tcsr TCSR_UFS_CLKREF_EN>;
+ clock-names = "ref",
+ "ref_aux",
+ "qref";
power-domains = <&gcc UFS_MEM_PHY_GDSC>;
@@ -1940,6 +1975,7 @@
iommus = <&apps_smmu 0x60 0x0>;
dma-coherent;
+ operating-points-v2 = <&ufs_opp_table>;
interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>,
<&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>;
@@ -1960,18 +1996,49 @@
<&gcc GCC_UFS_PHY_TX_SYMBOL_0_CLK>,
<&gcc GCC_UFS_PHY_RX_SYMBOL_0_CLK>,
<&gcc GCC_UFS_PHY_RX_SYMBOL_1_CLK>;
- freq-table-hz =
- <75000000 300000000>,
- <0 0>,
- <0 0>,
- <75000000 300000000>,
- <100000000 403000000>,
- <0 0>,
- <0 0>,
- <0 0>;
qcom,ice = <&ice>;
status = "disabled";
+
+ ufs_opp_table: opp-table {
+ compatible = "operating-points-v2";
+
+ opp-75000000 {
+ opp-hz = /bits/ 64 <75000000>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>,
+ /bits/ 64 <75000000>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ };
+
+ opp-150000000 {
+ opp-hz = /bits/ 64 <150000000>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>,
+ /bits/ 64 <150000000>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>;
+ required-opps = <&rpmhpd_opp_svs>;
+ };
+
+ opp-300000000 {
+ opp-hz = /bits/ 64 <300000000>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>,
+ /bits/ 64 <300000000>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>,
+ /bits/ 64 <0>;
+ required-opps = <&rpmhpd_opp_nom>;
+ };
+ };
};
ice: crypto@1d88000 {
@@ -2012,6 +2079,7 @@
operating-points-v2 = <&gpu_opp_table>;
qcom,gmu = <&gmu>;
+ #cooling-cells = <2>;
status = "disabled";
@@ -2507,7 +2575,7 @@
};
};
- dmic02_default: dmic02-default-state {
+ dmic23_default: dmic23-default-state {
clk-pins {
pins = "gpio8";
function = "dmic2_clk";
@@ -3133,13 +3201,15 @@
assigned-clock-rates = <19200000>, <200000000>;
interrupts-extended = <&intc GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 17 IRQ_TYPE_LEVEL_HIGH>,
+ <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 14 IRQ_TYPE_EDGE_BOTH>,
<&pdc 15 IRQ_TYPE_EDGE_BOTH>,
- <&pdc 14 IRQ_TYPE_EDGE_BOTH>;
- interrupt-names = "hs_phy_irq",
- "ss_phy_irq",
+ <&pdc 17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "hs_phy_irq",
+ "dp_hs_phy_irq",
"dm_hs_phy_irq",
- "dp_hs_phy_irq";
+ "ss_phy_irq";
power-domains = <&gcc USB30_PRIM_GDSC>;
required-opps = <&rpmhpd_opp_nom>;
@@ -3248,7 +3318,7 @@
spmi_bus: spmi@c400000 {
compatible = "qcom,spmi-pmic-arb";
reg = <0 0x0c400000 0 0x3000>,
- <0 0x0c500000 0 0x4000000>,
+ <0 0x0c500000 0 0x400000>,
<0 0x0c440000 0 0x80000>,
<0 0x0c4c0000 0 0x20000>,
<0 0x0c42d000 0 0x4000>;
@@ -4254,6 +4324,7 @@
reg = <3>;
iommus = <&apps_smmu 0x1003 0x80>,
<&apps_smmu 0x1063 0x0>;
+ dma-coherent;
};
compute-cb@4 {
@@ -4261,6 +4332,7 @@
reg = <4>;
iommus = <&apps_smmu 0x1004 0x80>,
<&apps_smmu 0x1064 0x0>;
+ dma-coherent;
};
compute-cb@5 {
@@ -4268,6 +4340,7 @@
reg = <5>;
iommus = <&apps_smmu 0x1005 0x80>,
<&apps_smmu 0x1065 0x0>;
+ dma-coherent;
};
compute-cb@6 {
@@ -4275,6 +4348,7 @@
reg = <6>;
iommus = <&apps_smmu 0x1006 0x80>,
<&apps_smmu 0x1066 0x0>;
+ dma-coherent;
};
compute-cb@7 {
@@ -4282,6 +4356,7 @@
reg = <7>;
iommus = <&apps_smmu 0x1007 0x80>,
<&apps_smmu 0x1067 0x0>;
+ dma-coherent;
};
};
@@ -4388,6 +4463,7 @@
iommus = <&apps_smmu 0x1961 0x0>,
<&apps_smmu 0x0c01 0x20>,
<&apps_smmu 0x19c1 0x10>;
+ dma-coherent;
};
compute-cb@2 {
@@ -4396,6 +4472,7 @@
iommus = <&apps_smmu 0x1962 0x0>,
<&apps_smmu 0x0c02 0x20>,
<&apps_smmu 0x19c2 0x10>;
+ dma-coherent;
};
compute-cb@3 {
@@ -4404,6 +4481,7 @@
iommus = <&apps_smmu 0x1963 0x0>,
<&apps_smmu 0x0c03 0x20>,
<&apps_smmu 0x19c3 0x10>;
+ dma-coherent;
};
compute-cb@4 {
@@ -4412,6 +4490,7 @@
iommus = <&apps_smmu 0x1964 0x0>,
<&apps_smmu 0x0c04 0x20>,
<&apps_smmu 0x19c4 0x10>;
+ dma-coherent;
};
compute-cb@5 {
@@ -4420,6 +4499,7 @@
iommus = <&apps_smmu 0x1965 0x0>,
<&apps_smmu 0x0c05 0x20>,
<&apps_smmu 0x19c5 0x10>;
+ dma-coherent;
};
compute-cb@6 {
@@ -4428,6 +4508,7 @@
iommus = <&apps_smmu 0x1966 0x0>,
<&apps_smmu 0x0c06 0x20>,
<&apps_smmu 0x19c6 0x10>;
+ dma-coherent;
};
compute-cb@7 {
@@ -4436,6 +4517,7 @@
iommus = <&apps_smmu 0x1967 0x0>,
<&apps_smmu 0x0c07 0x20>,
<&apps_smmu 0x19c7 0x10>;
+ dma-coherent;
};
compute-cb@8 {
@@ -4444,6 +4526,7 @@
iommus = <&apps_smmu 0x1968 0x0>,
<&apps_smmu 0x0c08 0x20>,
<&apps_smmu 0x19c8 0x10>;
+ dma-coherent;
};
/* note: secure cb9 in downstream */
@@ -5304,6 +5387,13 @@
polling-delay = <0>;
thermal-sensors = <&tsens2 1>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu0_junction_config>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
thermal-engine-config {
temperature = <125000>;
@@ -5336,6 +5426,13 @@
polling-delay = <0>;
thermal-sensors = <&tsens2 2>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu1_junction_config>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
thermal-engine-config {
temperature = <125000>;
@@ -5368,6 +5465,13 @@
polling-delay = <0>;
thermal-sensors = <&tsens2 3>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu2_junction_config>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
thermal-engine-config {
temperature = <125000>;
@@ -5400,6 +5504,13 @@
polling-delay = <0>;
thermal-sensors = <&tsens2 4>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu3_junction_config>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
thermal-engine-config {
temperature = <125000>;
@@ -5432,6 +5543,13 @@
polling-delay = <0>;
thermal-sensors = <&tsens2 5>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu4_junction_config>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
thermal-engine-config {
temperature = <125000>;
@@ -5464,6 +5582,13 @@
polling-delay = <0>;
thermal-sensors = <&tsens2 6>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu5_junction_config>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
thermal-engine-config {
temperature = <125000>;
@@ -5496,6 +5621,13 @@
polling-delay = <0>;
thermal-sensors = <&tsens2 7>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu6_junction_config>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
thermal-engine-config {
temperature = <125000>;
@@ -5528,6 +5660,13 @@
polling-delay = <0>;
thermal-sensors = <&tsens2 8>;
+ cooling-maps {
+ map0 {
+ trip = <&gpu7_junction_config>;
+ cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+
trips {
thermal-engine-config {
temperature = <125000>;
diff --git a/arch/arm64/boot/dts/qcom/sm8650-mtp.dts b/arch/arm64/boot/dts/qcom/sm8650-mtp.dts
index 9d916edb1c73..4450273f9667 100644
--- a/arch/arm64/boot/dts/qcom/sm8650-mtp.dts
+++ b/arch/arm64/boot/dts/qcom/sm8650-mtp.dts
@@ -66,6 +66,29 @@
};
};
+ sound {
+ compatible = "qcom,sm8650-sndcard", "qcom,sm8450-sndcard";
+ model = "SM8650-MTP";
+ audio-routing = "SpkrLeft IN", "WSA_SPK1 OUT",
+ "SpkrRight IN", "WSA_SPK2 OUT";
+
+ wsa-dai-link {
+ link-name = "WSA Playback";
+
+ cpu {
+ sound-dai = <&q6apmbedai WSA_CODEC_DMA_RX_0>;
+ };
+
+ codec {
+ sound-dai = <&left_spkr>, <&right_spkr>, <&swr0 0>, <&lpass_wsamacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+ };
+
vph_pwr: vph-pwr-regulator {
compatible = "regulator-fixed";
@@ -428,6 +451,138 @@
RPMH_REGULATOR_MODE_HPM>;
};
};
+
+ regulators-6 {
+ compatible = "qcom,pm8010-rpmh-regulators";
+ qcom,pmic-id = "m";
+
+ vdd-l1-l2-supply = <&vreg_s1c_1p2>;
+ vdd-l3-l4-supply = <&vreg_bob2>;
+ vdd-l5-supply = <&vreg_s6c_1p8>;
+ vdd-l6-supply = <&vreg_bob1>;
+ vdd-l7-supply = <&vreg_bob1>;
+
+ vreg_l1m_1p1: ldo1 {
+ regulator-name = "vreg_l1m_1p1";
+ regulator-min-microvolt = <1104000>;
+ regulator-max-microvolt = <1104000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l2m_1p056: ldo2 {
+ regulator-name = "vreg_l2m_1p056";
+ regulator-min-microvolt = <1056000>;
+ regulator-max-microvolt = <1056000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l3m_2p8: ldo3 {
+ regulator-name = "vreg_l3m_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l4m_2p8: ldo4 {
+ regulator-name = "vreg_l4m_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l5m_1p8: ldo5 {
+ regulator-name = "vreg_l5m_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l6m_2p8: ldo6 {
+ regulator-name = "vreg_l6m_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7m_2p96: ldo7 {
+ regulator-name = "vreg_l7m_2p96";
+ regulator-min-microvolt = <2960000>;
+ regulator-max-microvolt = <2960000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
+
+ regulators-7 {
+ compatible = "qcom,pm8010-rpmh-regulators";
+ qcom,pmic-id = "n";
+
+ vdd-l1-l2-supply = <&vreg_s1c_1p2>;
+ vdd-l3-l4-supply = <&vreg_s6c_1p8>;
+ vdd-l5-supply = <&vreg_bob2>;
+ vdd-l6-supply = <&vreg_bob2>;
+ vdd-l7-supply = <&vreg_bob1>;
+
+ vreg_l1n_1p1: ldo1 {
+ regulator-name = "vreg_l1n_1p1";
+ regulator-min-microvolt = <1104000>;
+ regulator-max-microvolt = <1104000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l2n_1p056: ldo2 {
+ regulator-name = "vreg_l2n_1p056";
+ regulator-min-microvolt = <1056000>;
+ regulator-max-microvolt = <1056000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l3n_1p8: ldo3 {
+ regulator-name = "vreg_l3n_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l4n_1p8: ldo4 {
+ regulator-name = "vreg_l4n_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l5n_2p8: ldo5 {
+ regulator-name = "vreg_l5n_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l6n_2p8: ldo6 {
+ regulator-name = "vreg_l6n_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7n_3p3: ldo7 {
+ regulator-name = "vreg_l7n_3p3";
+ regulator-min-microvolt = <3304000>;
+ regulator-max-microvolt = <3304000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
};
&dispcc {
@@ -622,7 +777,7 @@
&tlmm {
/* Reserved I/Os for NFC */
- gpio-reserved-ranges = <32 8>;
+ gpio-reserved-ranges = <32 8>, <74 1>;
disp0_reset_n_active: disp0-reset-n-active-state {
pins = "gpio133";
diff --git a/arch/arm64/boot/dts/qcom/sm8650-qrd.dts b/arch/arm64/boot/dts/qcom/sm8650-qrd.dts
index 592a67a47c78..b07cac2e5bc8 100644
--- a/arch/arm64/boot/dts/qcom/sm8650-qrd.dts
+++ b/arch/arm64/boot/dts/qcom/sm8650-qrd.dts
@@ -77,9 +77,83 @@
reg = <1>;
pmic_glink_ss_in: endpoint {
- remote-endpoint = <&usb_1_dwc3_ss>;
+ remote-endpoint = <&redriver_ss_out>;
};
};
+
+ port@2 {
+ reg = <2>;
+
+ pmic_glink_sbu: endpoint {
+ remote-endpoint = <&wcd_usbss_sbu_mux>;
+ };
+ };
+ };
+ };
+ };
+
+ sound {
+ compatible = "qcom,sm8650-sndcard", "qcom,sm8450-sndcard";
+ model = "SM8650-QRD";
+ audio-routing = "SpkrLeft IN", "WSA_SPK1 OUT",
+ "SpkrRight IN", "WSA_SPK2 OUT",
+ "IN1_HPHL", "HPHL_OUT",
+ "IN2_HPHR", "HPHR_OUT",
+ "AMIC1", "MIC BIAS1",
+ "AMIC2", "MIC BIAS2",
+ "AMIC3", "MIC BIAS3",
+ "AMIC4", "MIC BIAS3",
+ "AMIC5", "MIC BIAS4",
+ "TX SWR_INPUT0", "ADC1_OUTPUT",
+ "TX SWR_INPUT1", "ADC2_OUTPUT",
+ "TX SWR_INPUT2", "ADC3_OUTPUT",
+ "TX SWR_INPUT3", "ADC4_OUTPUT";
+
+ wcd-playback-dai-link {
+ link-name = "WCD Playback";
+
+ cpu {
+ sound-dai = <&q6apmbedai RX_CODEC_DMA_RX_0>;
+ };
+
+ codec {
+ sound-dai = <&wcd939x 0>, <&swr1 0>, <&lpass_rxmacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+
+ wcd-capture-dai-link {
+ link-name = "WCD Capture";
+
+ cpu {
+ sound-dai = <&q6apmbedai TX_CODEC_DMA_TX_3>;
+ };
+
+ codec {
+ sound-dai = <&wcd939x 1>, <&swr2 0>, <&lpass_txmacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+
+ wsa-dai-link {
+ link-name = "WSA Playback";
+
+ cpu {
+ sound-dai = <&q6apmbedai WSA_CODEC_DMA_RX_0>;
+ };
+
+ codec {
+ sound-dai = <&left_spkr>, <&right_spkr>, <&swr0 0>, <&lpass_wsamacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
};
};
};
@@ -94,6 +168,41 @@
regulator-always-on;
regulator-boot-on;
};
+
+ wcd939x: audio-codec {
+ compatible = "qcom,wcd9395-codec", "qcom,wcd9390-codec";
+
+ pinctrl-0 = <&wcd_default>;
+ pinctrl-names = "default";
+
+ qcom,micbias1-microvolt = <1800000>;
+ qcom,micbias2-microvolt = <1800000>;
+ qcom,micbias3-microvolt = <1800000>;
+ qcom,micbias4-microvolt = <1800000>;
+ qcom,mbhc-buttons-vthreshold-microvolt = <75000 150000 237000 500000 500000 500000 500000 500000>;
+ qcom,mbhc-headset-vthreshold-microvolt = <1700000>;
+ qcom,mbhc-headphone-vthreshold-microvolt = <50000>;
+ qcom,rx-device = <&wcd_rx>;
+ qcom,tx-device = <&wcd_tx>;
+
+ reset-gpios = <&tlmm 107 GPIO_ACTIVE_LOW>;
+
+ vdd-buck-supply = <&vreg_l15b_1p8>;
+ vdd-rxtx-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l15b_1p8>;
+ vdd-mic-bias-supply = <&vreg_bob1>;
+
+ #sound-dai-cells = <1>;
+
+ mode-switch;
+ orientation-switch;
+
+ port {
+ wcd_codec_headset_in: endpoint {
+ remote-endpoint = <&wcd_usbss_headset_out>;
+ };
+ };
+ };
};
&apps_rsc {
@@ -436,6 +545,138 @@
RPMH_REGULATOR_MODE_HPM>;
};
};
+
+ regulators-6 {
+ compatible = "qcom,pm8010-rpmh-regulators";
+ qcom,pmic-id = "m";
+
+ vdd-l1-l2-supply = <&vreg_s1c_1p2>;
+ vdd-l3-l4-supply = <&vreg_bob2>;
+ vdd-l5-supply = <&vreg_s6c_1p8>;
+ vdd-l6-supply = <&vreg_bob1>;
+ vdd-l7-supply = <&vreg_bob1>;
+
+ vreg_l1m_1p1: ldo1 {
+ regulator-name = "vreg_l1m_1p1";
+ regulator-min-microvolt = <1104000>;
+ regulator-max-microvolt = <1104000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l2m_1p056: ldo2 {
+ regulator-name = "vreg_l2m_1p056";
+ regulator-min-microvolt = <1056000>;
+ regulator-max-microvolt = <1056000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l3m_2p8: ldo3 {
+ regulator-name = "vreg_l3m_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l4m_2p8: ldo4 {
+ regulator-name = "vreg_l4m_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l5m_1p8: ldo5 {
+ regulator-name = "vreg_l5m_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l6m_2p8: ldo6 {
+ regulator-name = "vreg_l6m_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7m_2p96: ldo7 {
+ regulator-name = "vreg_l7m_2p96";
+ regulator-min-microvolt = <2960000>;
+ regulator-max-microvolt = <2960000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
+
+ regulators-7 {
+ compatible = "qcom,pm8010-rpmh-regulators";
+ qcom,pmic-id = "n";
+
+ vdd-l1-l2-supply = <&vreg_s1c_1p2>;
+ vdd-l3-l4-supply = <&vreg_s6c_1p8>;
+ vdd-l5-supply = <&vreg_bob2>;
+ vdd-l6-supply = <&vreg_bob2>;
+ vdd-l7-supply = <&vreg_bob1>;
+
+ vreg_l1n_1p1: ldo1 {
+ regulator-name = "vreg_l1n_1p1";
+ regulator-min-microvolt = <1104000>;
+ regulator-max-microvolt = <1104000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l2n_1p056: ldo2 {
+ regulator-name = "vreg_l2n_1p056";
+ regulator-min-microvolt = <1056000>;
+ regulator-max-microvolt = <1056000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
+ regulator-allowed-modes = <RPMH_REGULATOR_MODE_LPM
+ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l3n_1p8: ldo3 {
+ regulator-name = "vreg_l3n_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l4n_1p8: ldo4 {
+ regulator-name = "vreg_l4n_1p8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l5n_2p8: ldo5 {
+ regulator-name = "vreg_l5n_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l6n_2p8: ldo6 {
+ regulator-name = "vreg_l6n_2p8";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7n_3p3: ldo7 {
+ regulator-name = "vreg_l7n_3p3";
+ regulator-min-microvolt = <3304000>;
+ regulator-max-microvolt = <3304000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
};
&dispcc {
@@ -446,6 +687,78 @@
status = "okay";
};
+&i2c3 {
+ status = "okay";
+
+ wcd_usbss: typec-mux@e {
+ compatible = "qcom,wcd9395-usbss", "qcom,wcd9390-usbss";
+ reg = <0xe>;
+
+ vdd-supply = <&vreg_l15b_1p8>;
+ reset-gpios = <&tlmm 152 GPIO_ACTIVE_HIGH>;
+
+ mode-switch;
+ orientation-switch;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ wcd_usbss_sbu_mux: endpoint {
+ remote-endpoint = <&pmic_glink_sbu>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ wcd_usbss_headset_out: endpoint {
+ remote-endpoint = <&wcd_codec_headset_in>;
+ };
+ };
+ };
+ };
+};
+
+&i2c6 {
+ status = "okay";
+
+ typec-mux@1c {
+ compatible = "onnn,nb7vpq904m";
+ reg = <0x1c>;
+
+ vcc-supply = <&vreg_l15b_1p8>;
+
+ retimer-switch;
+ orientation-switch;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ redriver_ss_out: endpoint {
+ remote-endpoint = <&pmic_glink_ss_in>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ redriver_ss_in: endpoint {
+ data-lanes = <3 2 1 0>;
+ remote-endpoint = <&usb_dp_qmpphy_out>;
+ };
+ };
+ };
+ };
+};
+
&ipa {
qcom,gsi-loader = "self";
memory-region = <&ipa_fw_mem>;
@@ -453,6 +766,16 @@
status = "okay";
};
+&lpass_tlmm {
+ spkr_1_sd_n_active: spkr-1-sd-n-active-state {
+ pins = "gpio21";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ output-low;
+ };
+};
+
&mdss {
status = "okay";
};
@@ -495,6 +818,15 @@
status = "okay";
};
+&mdss_dp0 {
+ status = "okay";
+};
+
+&mdss_dp0_out {
+ data-lanes = <0 1>;
+ remote-endpoint = <&usb_dp_qmpphy_dp_in>;
+};
+
&mdss_mdp {
status = "okay";
};
@@ -600,6 +932,11 @@
status = "okay";
};
+&qup_i2c3_data_clk {
+ /* Use internal I2C pull-up */
+ bias-pull-up = <2200>;
+};
+
&qupv3_id_0 {
status = "okay";
};
@@ -657,9 +994,77 @@
};
};
+&swr0 {
+ status = "okay";
+
+ /* WSA8845, Speaker Left */
+ left_spkr: speaker@0,0 {
+ compatible = "sdw20217020400";
+ reg = <0 0>;
+ pinctrl-0 = <&spkr_1_sd_n_active>;
+ pinctrl-names = "default";
+ powerdown-gpios = <&lpass_tlmm 21 GPIO_ACTIVE_LOW>;
+ #sound-dai-cells = <0>;
+ sound-name-prefix = "SpkrLeft";
+ vdd-1p8-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l3c_1p2>;
+ };
+
+ /* WSA8845, Speaker Right */
+ right_spkr: speaker@0,1 {
+ compatible = "sdw20217020400";
+ reg = <0 1>;
+ pinctrl-0 = <&spkr_2_sd_n_active>;
+ pinctrl-names = "default";
+ powerdown-gpios = <&tlmm 77 GPIO_ACTIVE_LOW>;
+ #sound-dai-cells = <0>;
+ sound-name-prefix = "SpkrRight";
+ vdd-1p8-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l3c_1p2>;
+ };
+};
+
+&swr1 {
+ status = "okay";
+
+ /* WCD9395 RX */
+ wcd_rx: codec@0,4 {
+ compatible = "sdw20217010e00";
+ reg = <0 4>;
+
+ /*
+ * WCD9395 RX Port 1 (HPH_L/R) <=> SWR1 Port 1 (HPH_L/R)
+ * WCD9395 RX Port 2 (CLSH) <=> SWR1 Port 2 (CLSH)
+ * WCD9395 RX Port 3 (COMP_L/R) <=> SWR1 Port 3 (COMP_L/R)
+ * WCD9395 RX Port 4 (LO) <=> SWR1 Port 4 (LO)
+ * WCD9395 RX Port 5 (DSD_L/R) <=> SWR1 Port 5 (DSD_L/R)
+ * WCD9395 RX Port 6 (HIFI_PCM_L/R) <=> SWR1 Port 9 (HIFI_PCM_L/R)
+ */
+ qcom,rx-port-mapping = <1 2 3 4 5 9>;
+ };
+};
+
+&swr2 {
+ status = "okay";
+
+ /* WCD9395 TX */
+ wcd_tx: codec@0,3 {
+ compatible = "sdw20217010e00";
+ reg = <0 3>;
+
+ /*
+ * WCD9395 TX Port 1 (ADC1,2,3,4) <=> SWR2 Port 2 (TX SWR_INPUT 0,1,2,3)
+ * WCD9395 TX Port 2 (ADC3,4 & DMIC0,1) <=> SWR2 Port 2 (TX SWR_INPUT 0,1,2,3)
+ * WCD9395 TX Port 3 (DMIC0,1,2,3 & MBHC) <=> SWR2 Port 3 (TX SWR_INPUT 4,5,6,7)
+ * WCD9395 TX Port 4 (DMIC4,5,6,7) <=> SWR2 Port 4 (TX SWR_INPUT 8,9,10,11)
+ */
+ qcom,tx-port-mapping = <2 2 3 4>;
+ };
+};
+
&tlmm {
/* Reserved I/Os for NFC */
- gpio-reserved-ranges = <32 8>;
+ gpio-reserved-ranges = <32 8>, <74 1>;
bt_default: bt-default-state {
bt-en-pins {
@@ -704,6 +1109,14 @@
bias-pull-down;
};
+ spkr_2_sd_n_active: spkr-2-sd-n-active-state {
+ pins = "gpio77";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ output-low;
+ };
+
ts_irq: ts-irq-state {
pins = "gpio161";
function = "gpio";
@@ -718,6 +1131,14 @@
drive-strength = <8>;
bias-pull-up;
};
+
+ wcd_default: wcd-reset-n-active-state {
+ pins = "gpio107";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ output-low;
+ };
};
&uart14 {
@@ -787,7 +1208,7 @@
};
&usb_1_dwc3_ss {
- remote-endpoint = <&pmic_glink_ss_in>;
+ remote-endpoint = <&usb_dp_qmpphy_usb_ss_in>;
};
&usb_1_hsphy {
@@ -803,9 +1224,23 @@
vdda-phy-supply = <&vreg_l3i_1p2>;
vdda-pll-supply = <&vreg_l3g_0p91>;
+ orientation-switch;
+
status = "okay";
};
+&usb_dp_qmpphy_dp_in {
+ remote-endpoint = <&mdss_dp0_out>;
+};
+
+&usb_dp_qmpphy_out {
+ remote-endpoint = <&redriver_ss_in>;
+};
+
+&usb_dp_qmpphy_usb_ss_in {
+ remote-endpoint = <&usb_1_dwc3_ss>;
+};
+
&xo_board {
clock-frequency = <76800000>;
};
diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi
index 2df77123a8c7..ba72d8f38420 100644
--- a/arch/arm64/boot/dts/qcom/sm8650.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi
@@ -525,6 +525,11 @@
no-map;
};
+ qlink_logging_mem: qlink-logging@84800000 {
+ reg = <0 0x84800000 0 0x200000>;
+ no-map;
+ };
+
mpss_dsm_mem: mpss-dsm@86b00000 {
reg = <0 0x86b00000 0 0x4900000>;
no-map;
@@ -1228,7 +1233,7 @@
clocks = <&gcc GCC_QUPV3_WRAP2_S6_CLK>;
clock-names = "se";
- interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS
+ interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS
&clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>,
<&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
&config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>;
@@ -1250,7 +1255,7 @@
clocks = <&gcc GCC_QUPV3_WRAP2_S7_CLK>;
clock-names = "se";
- interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS
+ interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS
&clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>,
<&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
&config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>;
@@ -2213,8 +2218,22 @@
<0 0x60100000 0 0x100000>;
reg-names = "parf", "dbi", "elbi", "atu", "config";
- interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
clocks = <&gcc GCC_PCIE_0_AUX_CLK>,
<&gcc GCC_PCIE_0_CFG_AHB_CLK>,
@@ -2255,6 +2274,11 @@
interrupt-map-mask = <0 0 0 0x7>;
#interrupt-cells = <1>;
+ /* Entries are reversed due to the unusual ITS DeviceID encoding */
+ msi-map = <0x0 &gic_its 0x1401 0x1>,
+ <0x100 &gic_its 0x1400 0x1>;
+ msi-map-mask = <0xff00>;
+
linux,pci-domain = <0>;
num-lanes = <2>;
bus-range = <0 0xff>;
@@ -2317,8 +2341,22 @@
"atu",
"config";
- interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "msi";
+ interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 309 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 313 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
clocks = <&gcc GCC_PCIE_1_AUX_CLK>,
<&gcc GCC_PCIE_1_CFG_AHB_CLK>,
@@ -2364,6 +2402,11 @@
interrupt-map-mask = <0 0 0 0x7>;
#interrupt-cells = <1>;
+ /* Entries are reversed due to the unusual ITS DeviceID encoding */
+ msi-map = <0x0 &gic_its 0x1481 0x1>,
+ <0x100 &gic_its 0x1480 0x1>;
+ msi-map-mask = <0xff00>;
+
linux,pci-domain = <1>;
num-lanes = <2>;
bus-range = <0 0xff>;
@@ -2448,10 +2491,12 @@
compatible = "qcom,sm8650-qmp-ufs-phy";
reg = <0 0x01d80000 0 0x2000>;
- clocks = <&tcsr TCSR_UFS_CLKREF_EN>,
- <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+ <&tcsr TCSR_UFS_CLKREF_EN>;
clock-names = "ref",
- "ref_aux";
+ "ref_aux",
+ "qref";
resets = <&ufs_mem_hc 0>;
reset-names = "ufsphy";
@@ -2627,7 +2672,8 @@
"mss";
memory-region = <&mpss_mem>, <&q6_mpss_dtb_mem>,
- <&mpss_dsm_mem>, <&mpss_dsm_mem_2>;
+ <&mpss_dsm_mem>, <&mpss_dsm_mem_2>,
+ <&qlink_logging_mem>;
qcom,qmp = <&aoss_qmp>;
@@ -2919,7 +2965,7 @@
};
};
- dmic02_default: dmic02-default-state {
+ dmic23_default: dmic23-default-state {
clk-pins {
pins = "gpio8";
function = "dmic2_clk";
@@ -3703,7 +3749,7 @@
spmi_bus: spmi@c400000 {
compatible = "qcom,spmi-pmic-arb";
reg = <0 0x0c400000 0 0x3000>,
- <0 0x0c500000 0 0x4000000>,
+ <0 0x0c500000 0 0x400000>,
<0 0x0c440000 0 0x80000>,
<0 0x0c4c0000 0 0x20000>,
<0 0x0c42d000 0 0x4000>;
@@ -4808,6 +4854,7 @@
iommus = <&apps_smmu 0x1003 0x80>,
<&apps_smmu 0x1043 0x20>;
+ dma-coherent;
};
compute-cb@4 {
@@ -4816,6 +4863,7 @@
iommus = <&apps_smmu 0x1004 0x80>,
<&apps_smmu 0x1044 0x20>;
+ dma-coherent;
};
compute-cb@5 {
@@ -4824,6 +4872,7 @@
iommus = <&apps_smmu 0x1005 0x80>,
<&apps_smmu 0x1045 0x20>;
+ dma-coherent;
};
compute-cb@6 {
@@ -4832,6 +4881,7 @@
iommus = <&apps_smmu 0x1006 0x80>,
<&apps_smmu 0x1046 0x20>;
+ dma-coherent;
};
compute-cb@7 {
@@ -4841,6 +4891,7 @@
iommus = <&apps_smmu 0x1007 0x40>,
<&apps_smmu 0x1067 0x0>,
<&apps_smmu 0x1087 0x0>;
+ dma-coherent;
};
};
@@ -4961,6 +5012,7 @@
iommus = <&apps_smmu 0x1961 0x0>,
<&apps_smmu 0x0c01 0x20>,
<&apps_smmu 0x19c1 0x0>;
+ dma-coherent;
};
compute-cb@2 {
@@ -4970,6 +5022,7 @@
iommus = <&apps_smmu 0x1962 0x0>,
<&apps_smmu 0x0c02 0x20>,
<&apps_smmu 0x19c2 0x0>;
+ dma-coherent;
};
compute-cb@3 {
@@ -4979,6 +5032,7 @@
iommus = <&apps_smmu 0x1963 0x0>,
<&apps_smmu 0x0c03 0x20>,
<&apps_smmu 0x19c3 0x0>;
+ dma-coherent;
};
compute-cb@4 {
@@ -4988,6 +5042,7 @@
iommus = <&apps_smmu 0x1964 0x0>,
<&apps_smmu 0x0c04 0x20>,
<&apps_smmu 0x19c4 0x0>;
+ dma-coherent;
};
compute-cb@5 {
@@ -4997,6 +5052,7 @@
iommus = <&apps_smmu 0x1965 0x0>,
<&apps_smmu 0x0c05 0x20>,
<&apps_smmu 0x19c5 0x0>;
+ dma-coherent;
};
compute-cb@6 {
@@ -5006,6 +5062,7 @@
iommus = <&apps_smmu 0x1966 0x0>,
<&apps_smmu 0x0c06 0x20>,
<&apps_smmu 0x19c6 0x0>;
+ dma-coherent;
};
compute-cb@7 {
@@ -5015,6 +5072,7 @@
iommus = <&apps_smmu 0x1967 0x0>,
<&apps_smmu 0x0c07 0x20>,
<&apps_smmu 0x19c7 0x0>;
+ dma-coherent;
};
compute-cb@8 {
@@ -5024,6 +5082,7 @@
iommus = <&apps_smmu 0x1968 0x0>,
<&apps_smmu 0x0c08 0x20>,
<&apps_smmu 0x19c8 0x0>;
+ dma-coherent;
};
};
};
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
index 7532d8eca2de..6a0a54532e5f 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
+++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
@@ -18,10 +18,124 @@
serial0 = &uart21;
};
+ wcd938x: audio-codec {
+ compatible = "qcom,wcd9385-codec";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&wcd_default>;
+
+ qcom,micbias1-microvolt = <1800000>;
+ qcom,micbias2-microvolt = <1800000>;
+ qcom,micbias3-microvolt = <1800000>;
+ qcom,micbias4-microvolt = <1800000>;
+ qcom,mbhc-buttons-vthreshold-microvolt = <75000 150000 237000 500000 500000 500000 500000 500000>;
+ qcom,mbhc-headset-vthreshold-microvolt = <1700000>;
+ qcom,mbhc-headphone-vthreshold-microvolt = <50000>;
+ qcom,rx-device = <&wcd_rx>;
+ qcom,tx-device = <&wcd_tx>;
+
+ reset-gpios = <&tlmm 191 GPIO_ACTIVE_LOW>;
+
+ vdd-buck-supply = <&vreg_l15b_1p8>;
+ vdd-rxtx-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l15b_1p8>;
+ vdd-mic-bias-supply = <&vreg_bob1>;
+
+ #sound-dai-cells = <1>;
+ };
+
chosen {
stdout-path = "serial0:115200n8";
};
+ sound {
+ compatible = "qcom,x1e80100-sndcard";
+ model = "X1E80100-CRD";
+ audio-routing = "WooferLeft IN", "WSA WSA_SPK1 OUT",
+ "TwitterLeft IN", "WSA WSA_SPK2 OUT",
+ "WooferRight IN", "WSA2 WSA_SPK2 OUT",
+ "TwitterRight IN", "WSA2 WSA_SPK2 OUT",
+ "IN1_HPHL", "HPHL_OUT",
+ "IN2_HPHR", "HPHR_OUT",
+ "AMIC2", "MIC BIAS2",
+ "VA DMIC0", "MIC BIAS3",
+ "VA DMIC1", "MIC BIAS3",
+ "VA DMIC2", "MIC BIAS1",
+ "VA DMIC3", "MIC BIAS1",
+ "VA DMIC0", "VA MIC BIAS3",
+ "VA DMIC1", "VA MIC BIAS3",
+ "VA DMIC2", "VA MIC BIAS1",
+ "VA DMIC3", "VA MIC BIAS1",
+ "TX SWR_INPUT1", "ADC2_OUTPUT";
+
+ wcd-playback-dai-link {
+ link-name = "WCD Playback";
+
+ cpu {
+ sound-dai = <&q6apmbedai RX_CODEC_DMA_RX_0>;
+ };
+
+ codec {
+ sound-dai = <&wcd938x 0>, <&swr1 0>, <&lpass_rxmacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+
+ wcd-capture-dai-link {
+ link-name = "WCD Capture";
+
+ cpu {
+ sound-dai = <&q6apmbedai TX_CODEC_DMA_TX_3>;
+ };
+
+ codec {
+ sound-dai = <&wcd938x 1>, <&swr2 0>, <&lpass_txmacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+
+ wsa-dai-link {
+ link-name = "WSA Playback";
+
+ cpu {
+ sound-dai = <&q6apmbedai WSA_CODEC_DMA_RX_0>;
+ };
+
+ codec {
+ sound-dai = <&left_woofer>, <&left_tweeter>,
+ <&swr0 0>, <&lpass_wsamacro 0>,
+ <&right_woofer>, <&right_tweeter>,
+ <&swr3 0>, <&lpass_wsa2macro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+
+ va-dai-link {
+ link-name = "VA Capture";
+
+ cpu {
+ sound-dai = <&q6apmbedai VA_CODEC_DMA_TX_0>;
+ };
+
+ codec {
+ sound-dai = <&lpass_vamacro 0>;
+ };
+
+ platform {
+ sound-dai = <&q6apm>;
+ };
+ };
+ };
+
vph_pwr: vph-pwr-regulator {
compatible = "regulator-fixed";
@@ -401,10 +515,251 @@
};
};
+&i2c0 {
+ clock-frequency = <400000>;
+
+ status = "okay";
+
+ touchpad@15 {
+ compatible = "hid-over-i2c";
+ reg = <0x15>;
+
+ hid-descr-addr = <0x1>;
+ interrupts-extended = <&tlmm 3 IRQ_TYPE_LEVEL_LOW>;
+
+ pinctrl-0 = <&tpad_default>;
+ pinctrl-names = "default";
+
+ wakeup-source;
+ };
+
+ keyboard@3a {
+ compatible = "hid-over-i2c";
+ reg = <0x3a>;
+
+ hid-descr-addr = <0x1>;
+ interrupts-extended = <&tlmm 67 IRQ_TYPE_LEVEL_LOW>;
+
+ pinctrl-0 = <&kybd_default>;
+ pinctrl-names = "default";
+
+ wakeup-source;
+ };
+};
+
+&i2c8 {
+ clock-frequency = <400000>;
+
+ status = "okay";
+
+ touchscreen@10 {
+ compatible = "hid-over-i2c";
+ reg = <0x10>;
+
+ hid-descr-addr = <0x1>;
+ interrupts-extended = <&tlmm 51 IRQ_TYPE_LEVEL_LOW>;
+
+ pinctrl-0 = <&ts0_default>;
+ pinctrl-names = "default";
+ };
+};
+
+&lpass_tlmm {
+ spkr_01_sd_n_active: spkr-01-sd-n-active-state {
+ pins = "gpio12";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ output-low;
+ };
+
+ spkr_23_sd_n_active: spkr-23-sd-n-active-state {
+ pins = "gpio13";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ output-low;
+ };
+};
+
+&lpass_vamacro {
+ pinctrl-0 = <&dmic01_default>, <&dmic23_default>;
+ pinctrl-names = "default";
+
+ vdd-micb-supply = <&vreg_l1b_1p8>;
+ qcom,dmic-sample-rate = <4800000>;
+};
+
+&mdss {
+ status = "okay";
+};
+
+&mdss_dp3 {
+ compatible = "qcom,x1e80100-dp";
+ /delete-property/ #sound-dai-cells;
+
+ data-lanes = <0 1 2 3>;
+
+ status = "okay";
+
+ aux-bus {
+ panel {
+ compatible = "edp-panel";
+ power-supply = <&vreg_edp_3p3>;
+
+ port {
+ edp_panel_in: endpoint {
+ remote-endpoint = <&mdss_dp3_out>;
+ };
+ };
+ };
+ };
+
+ ports {
+ port@1 {
+ reg = <1>;
+ mdss_dp3_out: endpoint {
+ remote-endpoint = <&edp_panel_in>;
+ };
+ };
+ };
+};
+
+&mdss_dp3_phy {
+ vdda-phy-supply = <&vreg_l3j_0p8>;
+ vdda-pll-supply = <&vreg_l2j_1p2>;
+
+ status = "okay";
+};
+
+&pcie4 {
+ status = "okay";
+};
+
+&pcie4_phy {
+ vdda-phy-supply = <&vreg_l3j_0p8>;
+ vdda-pll-supply = <&vreg_l3e_1p2>;
+
+ status = "okay";
+};
+
+&pcie6a {
+ status = "okay";
+};
+
+&pcie6a_phy {
+ vdda-phy-supply = <&vreg_l3j_0p8>;
+ vdda-pll-supply = <&vreg_l2j_1p2>;
+
+ status = "okay";
+};
+
+&qupv3_0 {
+ status = "okay";
+};
+
+&qupv3_1 {
+ status = "okay";
+};
+
&qupv3_2 {
status = "okay";
};
+&remoteproc_adsp {
+ firmware-name = "qcom/x1e80100/adsp.mbn",
+ "qcom/x1e80100/adsp_dtb.mbn";
+
+ status = "okay";
+};
+
+&remoteproc_cdsp {
+ firmware-name = "qcom/x1e80100/cdsp.mbn",
+ "qcom/x1e80100/cdsp_dtb.mbn";
+
+ status = "okay";
+};
+
+&swr0 {
+ status = "okay";
+
+ /* WSA8845, Left Woofer */
+ left_woofer: speaker@0,0 {
+ compatible = "sdw20217020400";
+ reg = <0 0>;
+ pinctrl-0 = <&spkr_01_sd_n_active>;
+ pinctrl-names = "default";
+ powerdown-gpios = <&lpass_tlmm 12 GPIO_ACTIVE_LOW>;
+ #sound-dai-cells = <0>;
+ sound-name-prefix = "WooferLeft";
+ vdd-1p8-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l12b_1p2>;
+ };
+
+ /* WSA8845, Left Tweeter */
+ left_tweeter: speaker@0,1 {
+ compatible = "sdw20217020400";
+ reg = <0 1>;
+ /* pinctrl in left_woofer node because of sharing the GPIO*/
+ powerdown-gpios = <&lpass_tlmm 12 GPIO_ACTIVE_LOW>;
+ #sound-dai-cells = <0>;
+ sound-name-prefix = "TwitterLeft";
+ vdd-1p8-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l12b_1p2>;
+ };
+};
+
+&swr1 {
+ status = "okay";
+
+ /* WCD9385 RX */
+ wcd_rx: codec@0,4 {
+ compatible = "sdw20217010d00";
+ reg = <0 4>;
+ qcom,rx-port-mapping = <1 2 3 4 5>;
+ };
+};
+
+&swr2 {
+ status = "okay";
+
+ /* WCD9385 TX */
+ wcd_tx: codec@0,3 {
+ compatible = "sdw20217010d00";
+ reg = <0 3>;
+ qcom,tx-port-mapping = <1 1 2 3>;
+ };
+};
+
+&swr3 {
+ status = "okay";
+
+ /* WSA8845, Right Woofer */
+ right_woofer: speaker@0,0 {
+ compatible = "sdw20217020400";
+ reg = <0 0>;
+ pinctrl-0 = <&spkr_23_sd_n_active>;
+ pinctrl-names = "default";
+ powerdown-gpios = <&lpass_tlmm 13 GPIO_ACTIVE_LOW>;
+ #sound-dai-cells = <0>;
+ sound-name-prefix = "WooferRight";
+ vdd-1p8-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l12b_1p2>;
+ };
+
+ /* WSA8845, Right Tweeter */
+ right_tweeter: speaker@0,1 {
+ compatible = "sdw20217020400";
+ reg = <0 1>;
+ /* pinctrl in right_woofer node because of sharing the GPIO*/
+ powerdown-gpios = <&lpass_tlmm 13 GPIO_ACTIVE_LOW>;
+ #sound-dai-cells = <0>;
+ sound-name-prefix = "TwitterRight";
+ vdd-1p8-supply = <&vreg_l15b_1p8>;
+ vdd-io-supply = <&vreg_l12b_1p2>;
+ };
+};
+
&tlmm {
gpio-reserved-ranges = <34 2>, /* Unused */
<44 4>, /* SPI (TPM) */
@@ -416,9 +771,104 @@
drive-strength = <16>;
bias-disable;
};
+
+ kybd_default: kybd-default-state {
+ pins = "gpio67";
+ function = "gpio";
+ bias-disable;
+ };
+
+ tpad_default: tpad-default-state {
+ pins = "gpio3";
+ function = "gpio";
+ bias-disable;
+ };
+
+ ts0_default: ts0-default-state {
+ int-n-pins {
+ pins = "gpio51";
+ function = "gpio";
+ bias-disable;
+ };
+
+ reset-n-pins {
+ pins = "gpio48";
+ function = "gpio";
+ output-high;
+ drive-strength = <16>;
+ };
+ };
+
+ wcd_default: wcd-reset-n-active-state {
+ pins = "gpio191";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ output-low;
+ };
};
&uart21 {
compatible = "qcom,geni-debug-uart";
status = "okay";
};
+
+&usb_1_ss0_hsphy {
+ vdd-supply = <&vreg_l2e_0p8>;
+ vdda12-supply = <&vreg_l3e_1p2>;
+
+ status = "okay";
+};
+
+&usb_1_ss0_qmpphy {
+ status = "okay";
+};
+
+&usb_1_ss0 {
+ status = "okay";
+};
+
+&usb_1_ss0_dwc3 {
+ dr_mode = "host";
+ usb-role-switch;
+};
+
+&usb_1_ss1_hsphy {
+ vdd-supply = <&vreg_l2e_0p8>;
+ vdda12-supply = <&vreg_l3e_1p2>;
+
+ status = "okay";
+};
+
+&usb_1_ss1_qmpphy {
+ status = "okay";
+};
+
+&usb_1_ss1 {
+ status = "okay";
+};
+
+&usb_1_ss1_dwc3 {
+ dr_mode = "host";
+ usb-role-switch;
+};
+
+&usb_1_ss2_hsphy {
+ vdd-supply = <&vreg_l2e_0p8>;
+ vdda12-supply = <&vreg_l3e_1p2>;
+
+ status = "okay";
+};
+
+&usb_1_ss2_qmpphy {
+ status = "okay";
+};
+
+&usb_1_ss2 {
+ status = "okay";
+};
+
+&usb_1_ss2_dwc3 {
+ dr_mode = "host";
+ usb-role-switch;
+};
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts
index a37ad9475c90..e76d29053d79 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts
+++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts
@@ -5,6 +5,7 @@
/dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/regulator/qcom,rpmh-regulator.h>
#include "x1e80100.dtsi"
@@ -31,6 +32,23 @@
regulator-always-on;
regulator-boot-on;
};
+
+ vreg_edp_3p3: regulator-edp-3p3 {
+ compatible = "regulator-fixed";
+
+ regulator-name = "VREG_EDP_3P3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+ gpio = <&tlmm 70 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+
+ pinctrl-0 = <&edp_reg_en>;
+ pinctrl-names = "default";
+
+ regulator-always-on;
+ regulator-boot-on;
+ };
};
&apps_rsc {
@@ -243,7 +261,7 @@
qcom,pmic-id = "e";
vdd-l2-supply = <&vreg_s1f_0p7>;
- vdd-l3-supply = <&vph_pwr>;
+ vdd-l3-supply = <&vreg_s5j_1p2>;
vreg_l2e_0p8: ldo2 {
regulator-name = "vreg_l2e_0p8";
@@ -349,7 +367,7 @@
qcom,pmic-id = "j";
vdd-l1-supply = <&vreg_s1f_0p7>;
- vdd-l2-supply = <&vph_pwr>;
+ vdd-l2-supply = <&vreg_s5j_1p2>;
vdd-l3-supply = <&vreg_s1f_0p7>;
vdd-s5-supply = <&vph_pwr>;
@@ -383,17 +401,170 @@
};
};
+&mdss {
+ status = "okay";
+};
+
+&mdss_dp3 {
+ compatible = "qcom,x1e80100-dp";
+ /delete-property/ #sound-dai-cells;
+
+ data-lanes = <0 1 2 3>;
+
+ status = "okay";
+
+ aux-bus {
+ panel {
+ compatible = "edp-panel";
+ power-supply = <&vreg_edp_3p3>;
+
+ port {
+ edp_panel_in: endpoint {
+ remote-endpoint = <&mdss_dp3_out>;
+ };
+ };
+ };
+ };
+
+ ports {
+ port@1 {
+ reg = <1>;
+ mdss_dp3_out: endpoint {
+ remote-endpoint = <&edp_panel_in>;
+ };
+ };
+ };
+};
+
+&mdss_dp3_phy {
+ vdda-phy-supply = <&vreg_l3j_0p8>;
+ vdda-pll-supply = <&vreg_l2j_1p2>;
+
+ status = "okay";
+};
+
+&pcie4 {
+ status = "okay";
+};
+
+&pcie4_phy {
+ vdda-phy-supply = <&vreg_l3j_0p8>;
+ vdda-pll-supply = <&vreg_l3e_1p2>;
+
+ status = "okay";
+};
+
+&pcie6a {
+ status = "okay";
+};
+
+&pcie6a_phy {
+ vdda-phy-supply = <&vreg_l3j_0p8>;
+ vdda-pll-supply = <&vreg_l2j_1p2>;
+
+ status = "okay";
+};
+
+&qupv3_0 {
+ status = "okay";
+};
+
+&qupv3_1 {
+ status = "okay";
+};
+
&qupv3_2 {
status = "okay";
};
+&remoteproc_adsp {
+ firmware-name = "qcom/x1e80100/adsp.mbn",
+ "qcom/x1e80100/adsp_dtb.mbn";
+
+ status = "okay";
+};
+
+&remoteproc_cdsp {
+ firmware-name = "qcom/x1e80100/cdsp.mbn",
+ "qcom/x1e80100/cdsp_dtb.mbn";
+
+ status = "okay";
+};
+
&tlmm {
gpio-reserved-ranges = <33 3>, /* Unused */
<44 4>, /* SPI (TPM) */
<238 1>; /* UFS Reset */
+
+ edp_reg_en: edp-reg-en-state {
+ pins = "gpio70";
+ function = "gpio";
+ drive-strength = <16>;
+ bias-disable;
+ };
};
&uart21 {
compatible = "qcom,geni-debug-uart";
status = "okay";
};
+
+&usb_1_ss0_hsphy {
+ vdd-supply = <&vreg_l2e_0p8>;
+ vdda12-supply = <&vreg_l3e_1p2>;
+
+ status = "okay";
+};
+
+&usb_1_ss0_qmpphy {
+ status = "okay";
+};
+
+&usb_1_ss0 {
+ status = "okay";
+};
+
+&usb_1_ss0_dwc3 {
+ dr_mode = "host";
+ usb-role-switch;
+};
+
+&usb_1_ss1_hsphy {
+ vdd-supply = <&vreg_l2e_0p8>;
+ vdda12-supply = <&vreg_l3e_1p2>;
+
+ status = "okay";
+};
+
+&usb_1_ss1_qmpphy {
+ status = "okay";
+};
+
+&usb_1_ss1 {
+ status = "okay";
+};
+
+&usb_1_ss1_dwc3 {
+ dr_mode = "host";
+ usb-role-switch;
+};
+
+&usb_1_ss2_hsphy {
+ vdd-supply = <&vreg_l2e_0p8>;
+ vdda12-supply = <&vreg_l3e_1p2>;
+
+ status = "okay";
+};
+
+&usb_1_ss2_qmpphy {
+ status = "okay";
+};
+
+&usb_1_ss2 {
+ status = "okay";
+};
+
+&usb_1_ss2_dwc3 {
+ dr_mode = "host";
+ usb-role-switch;
+};
diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
index 6f75fc342ceb..8e517f76189e 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi
+++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
@@ -4,14 +4,20 @@
*/
#include <dt-bindings/clock/qcom,rpmh.h>
+#include <dt-bindings/clock/qcom,x1e80100-dispcc.h>
#include <dt-bindings/clock/qcom,x1e80100-gcc.h>
+#include <dt-bindings/clock/qcom,x1e80100-tcsr.h>
#include <dt-bindings/dma/qcom-gpi.h>
#include <dt-bindings/interconnect/qcom,icc.h>
#include <dt-bindings/interconnect/qcom,x1e80100-rpmh.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/mailbox/qcom-ipcc.h>
+#include <dt-bindings/phy/phy-qcom-qmp.h>
#include <dt-bindings/power/qcom,rpmhpd.h>
#include <dt-bindings/power/qcom-rpmpd.h>
+#include <dt-bindings/soc/qcom,gpr.h>
#include <dt-bindings/soc/qcom,rpmh-rsc.h>
+#include <dt-bindings/sound/qcom,q6dsp-lpass-ports.h>
/ {
interrupt-parent = <&intc>;
@@ -395,16 +401,24 @@
CLUSTER_PD0: power-domain-cpu-cluster0 {
#power-domain-cells = <0>;
domain-idle-states = <&CLUSTER_CL4>, <&CLUSTER_CL5>;
+ power-domains = <&SYSTEM_PD>;
};
CLUSTER_PD1: power-domain-cpu-cluster1 {
#power-domain-cells = <0>;
domain-idle-states = <&CLUSTER_CL4>, <&CLUSTER_CL5>;
+ power-domains = <&SYSTEM_PD>;
};
CLUSTER_PD2: power-domain-cpu-cluster2 {
#power-domain-cells = <0>;
domain-idle-states = <&CLUSTER_CL4>, <&CLUSTER_CL5>;
+ power-domains = <&SYSTEM_PD>;
+ };
+
+ SYSTEM_PD: power-domain-system {
+ #power-domain-cells = <0>;
+ /* TODO: system-wide idle states */
};
};
@@ -662,6 +676,58 @@
};
};
+ smp2p-adsp {
+ compatible = "qcom,smp2p";
+
+ interrupts-extended = <&ipcc IPCC_CLIENT_LPASS
+ IPCC_MPROC_SIGNAL_SMP2P
+ IRQ_TYPE_EDGE_RISING>;
+
+ mboxes = <&ipcc IPCC_CLIENT_LPASS
+ IPCC_MPROC_SIGNAL_SMP2P>;
+
+ qcom,smem = <443>, <429>;
+ qcom,local-pid = <0>;
+ qcom,remote-pid = <2>;
+
+ smp2p_adsp_out: master-kernel {
+ qcom,entry-name = "master-kernel";
+ #qcom,smem-state-cells = <1>;
+ };
+
+ smp2p_adsp_in: slave-kernel {
+ qcom,entry-name = "slave-kernel";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+ };
+
+ smp2p-cdsp {
+ compatible = "qcom,smp2p";
+
+ interrupts-extended = <&ipcc IPCC_CLIENT_CDSP
+ IPCC_MPROC_SIGNAL_SMP2P
+ IRQ_TYPE_EDGE_RISING>;
+
+ mboxes = <&ipcc IPCC_CLIENT_CDSP
+ IPCC_MPROC_SIGNAL_SMP2P>;
+
+ qcom,smem = <94>, <432>;
+ qcom,local-pid = <0>;
+ qcom,remote-pid = <5>;
+
+ smp2p_cdsp_out: master-kernel {
+ qcom,entry-name = "master-kernel";
+ #qcom,smem-state-cells = <1>;
+ };
+
+ smp2p_cdsp_in: slave-kernel {
+ qcom,entry-name = "slave-kernel";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+ };
+
soc: soc@0 {
compatible = "simple-bus";
@@ -677,13 +743,13 @@
clocks = <&bi_tcxo_div2>,
<&sleep_clk>,
<0>,
+ <&pcie4_phy>,
<0>,
+ <&pcie6a_phy>,
<0>,
- <0>,
- <0>,
- <0>,
- <0>,
- <0>;
+ <&usb_1_ss0_qmpphy QMP_USB43DP_USB3_PIPE_CLK>,
+ <&usb_1_ss1_qmpphy QMP_USB43DP_USB3_PIPE_CLK>,
+ <&usb_1_ss2_qmpphy QMP_USB43DP_USB3_PIPE_CLK>;
power-domains = <&rpmhpd RPMHPD_CX>;
#clock-cells = <1>;
@@ -691,6 +757,17 @@
#power-domain-cells = <1>;
};
+ ipcc: mailbox@408000 {
+ compatible = "qcom,x1e80100-ipcc", "qcom,ipcc";
+ reg = <0 0x00408000 0 0x1000>;
+
+ interrupts = <GIC_SPI 229 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
+
+ #mbox-cells = <2>;
+ };
+
gpi_dma2: dma-controller@800000 {
compatible = "qcom,x1e80100-gpi-dma", "qcom,sm6350-gpi-dma";
reg = <0 0x00800000 0 0x60000>;
@@ -1139,7 +1216,7 @@
clocks = <&gcc GCC_QUPV3_WRAP2_S5_CLK>;
clock-names = "se";
- interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS
+ interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS
&clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>,
<&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
&config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>;
@@ -2428,6 +2505,126 @@
};
};
+ usb_1_ss0_hsphy: phy@fd3000 {
+ compatible = "qcom,x1e80100-snps-eusb2-phy",
+ "qcom,sm8550-snps-eusb2-phy";
+ reg = <0 0x00fd3000 0 0x154>;
+ #phy-cells = <0>;
+
+ clocks = <&tcsr TCSR_USB2_1_CLKREF_EN>;
+ clock-names = "ref";
+
+ resets = <&gcc GCC_QUSB2PHY_PRIM_BCR>;
+
+ status = "disabled";
+ };
+
+ usb_1_ss0_qmpphy: phy@fd5000 {
+ compatible = "qcom,x1e80100-qmp-usb3-dp-phy";
+ reg = <0 0x00fd5000 0 0x4000>;
+
+ clocks = <&gcc GCC_USB3_PRIM_PHY_AUX_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_USB3_PRIM_PHY_COM_AUX_CLK>,
+ <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>;
+ clock-names = "aux",
+ "ref",
+ "com_aux",
+ "usb3_pipe";
+
+ power-domains = <&gcc GCC_USB_0_PHY_GDSC>;
+
+ resets = <&gcc GCC_USB3_PHY_PRIM_BCR>,
+ <&gcc GCC_USB4_0_DP0_PHY_PRIM_BCR>;
+ reset-names = "phy",
+ "common";
+
+ #clock-cells = <1>;
+ #phy-cells = <1>;
+
+ status = "disabled";
+ };
+
+ usb_1_ss1_hsphy: phy@fd9000 {
+ compatible = "qcom,x1e80100-snps-eusb2-phy",
+ "qcom,sm8550-snps-eusb2-phy";
+ reg = <0 0x00fd9000 0 0x154>;
+ #phy-cells = <0>;
+
+ clocks = <&tcsr TCSR_USB2_1_CLKREF_EN>;
+ clock-names = "ref";
+
+ resets = <&gcc GCC_QUSB2PHY_SEC_BCR>;
+
+ status = "disabled";
+ };
+
+ usb_1_ss1_qmpphy: phy@fda000 {
+ compatible = "qcom,x1e80100-qmp-usb3-dp-phy";
+ reg = <0 0x00fda000 0 0x4000>;
+
+ clocks = <&gcc GCC_USB3_SEC_PHY_AUX_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_USB3_SEC_PHY_COM_AUX_CLK>,
+ <&gcc GCC_USB3_SEC_PHY_PIPE_CLK>;
+ clock-names = "aux",
+ "ref",
+ "com_aux",
+ "usb3_pipe";
+
+ power-domains = <&gcc GCC_USB_1_PHY_GDSC>;
+
+ resets = <&gcc GCC_USB3_PHY_SEC_BCR>,
+ <&gcc GCC_USB4_1_DP0_PHY_SEC_BCR>;
+ reset-names = "phy",
+ "common";
+
+ #clock-cells = <1>;
+ #phy-cells = <1>;
+
+ status = "disabled";
+ };
+
+ usb_1_ss2_hsphy: phy@fde000 {
+ compatible = "qcom,x1e80100-snps-eusb2-phy",
+ "qcom,sm8550-snps-eusb2-phy";
+ reg = <0 0x00fde000 0 0x154>;
+ #phy-cells = <0>;
+
+ clocks = <&tcsr TCSR_USB2_1_CLKREF_EN>;
+ clock-names = "ref";
+
+ resets = <&gcc GCC_QUSB2PHY_TERT_BCR>;
+
+ status = "disabled";
+ };
+
+ usb_1_ss2_qmpphy: phy@fdf000 {
+ compatible = "qcom,x1e80100-qmp-usb3-dp-phy";
+ reg = <0 0x00fdf000 0 0x4000>;
+
+ clocks = <&gcc GCC_USB3_TERT_PHY_AUX_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_USB3_TERT_PHY_COM_AUX_CLK>,
+ <&gcc GCC_USB3_TERT_PHY_PIPE_CLK>;
+ clock-names = "aux",
+ "ref",
+ "com_aux",
+ "usb3_pipe";
+
+ power-domains = <&gcc GCC_USB_2_PHY_GDSC>;
+
+ resets = <&gcc GCC_USB3_PHY_TERT_BCR>,
+ <&gcc GCC_USB4_2_DP0_PHY_TERT_BCR>;
+ reset-names = "phy",
+ "common";
+
+ #clock-cells = <1>;
+ #phy-cells = <1>;
+
+ status = "disabled";
+ };
+
cnoc_main: interconnect@1500000 {
compatible = "qcom,x1e80100-cnoc-main";
reg = <0 0x1500000 0 0x14400>;
@@ -2536,12 +2733,258 @@
#interconnect-cells = <2>;
};
+ pcie6a: pci@1bf8000 {
+ device_type = "pci";
+ compatible = "qcom,pcie-x1e80100";
+ reg = <0 0x01bf8000 0 0x3000>,
+ <0 0x70000000 0 0xf1d>,
+ <0 0x70000f20 0 0xa8>,
+ <0 0x70001000 0 0x1000>,
+ <0 0x70100000 0 0x100000>;
+ reg-names = "parf",
+ "dbi",
+ "elbi",
+ "atu",
+ "config";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges = <0x01000000 0 0x00000000 0 0x70200000 0 0x100000>,
+ <0x02000000 0 0x70300000 0 0x70300000 0 0x3d00000>;
+ bus-range = <0 0xff>;
+
+ dma-coherent;
+
+ linux,pci-domain = <7>;
+ num-lanes = <2>;
+
+ interrupts = <GIC_SPI 773 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 774 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 837 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 838 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 839 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 840 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 841 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 842 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0x7>;
+ interrupt-map = <0 0 0 1 &intc 0 0 0 843 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 2 &intc 0 0 0 844 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 3 &intc 0 0 0 845 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 4 &intc 0 0 0 772 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&gcc GCC_PCIE_6A_AUX_CLK>,
+ <&gcc GCC_PCIE_6A_CFG_AHB_CLK>,
+ <&gcc GCC_PCIE_6A_MSTR_AXI_CLK>,
+ <&gcc GCC_PCIE_6A_SLV_AXI_CLK>,
+ <&gcc GCC_PCIE_6A_SLV_Q2A_AXI_CLK>,
+ <&gcc GCC_CFG_NOC_PCIE_ANOC_SOUTH_AHB_CLK>,
+ <&gcc GCC_CNOC_PCIE_SOUTH_SF_AXI_CLK>;
+ clock-names = "aux",
+ "cfg",
+ "bus_master",
+ "bus_slave",
+ "slave_q2a",
+ "noc_aggr",
+ "cnoc_sf_axi";
+
+ assigned-clocks = <&gcc GCC_PCIE_6A_AUX_CLK>;
+ assigned-clock-rates = <19200000>;
+
+ interconnects = <&pcie_south_anoc MASTER_PCIE_6A QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
+ &cnoc_main SLAVE_PCIE_6A QCOM_ICC_TAG_ALWAYS>;
+ interconnect-names = "pcie-mem",
+ "cpu-pcie";
+
+ resets = <&gcc GCC_PCIE_6A_BCR>,
+ <&gcc GCC_PCIE_6A_LINK_DOWN_BCR>;
+ reset-names = "pci",
+ "link_down";
+
+ power-domains = <&gcc GCC_PCIE_6A_GDSC>;
+
+ phys = <&pcie6a_phy>;
+ phy-names = "pciephy";
+
+ status = "disabled";
+ };
+
+ pcie6a_phy: phy@1bfc000 {
+ compatible = "qcom,x1e80100-qmp-gen4x2-pcie-phy";
+ reg = <0 0x01bfc000 0 0x2000>;
+
+ clocks = <&gcc GCC_PCIE_6A_PHY_AUX_CLK>,
+ <&gcc GCC_PCIE_6A_CFG_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_PCIE_6A_PHY_RCHNG_CLK>,
+ <&gcc GCC_PCIE_6A_PIPE_CLK>;
+ clock-names = "aux",
+ "cfg_ahb",
+ "ref",
+ "rchng",
+ "pipe";
+
+ resets = <&gcc GCC_PCIE_6A_PHY_BCR>,
+ <&gcc GCC_PCIE_6A_NOCSR_COM_PHY_BCR>;
+ reset-names = "phy",
+ "phy_nocsr";
+
+ assigned-clocks = <&gcc GCC_PCIE_6A_PHY_RCHNG_CLK>;
+ assigned-clock-rates = <100000000>;
+
+ power-domains = <&gcc GCC_PCIE_6_PHY_GDSC>;
+
+ #clock-cells = <0>;
+ clock-output-names = "pcie6a_pipe_clk";
+
+ #phy-cells = <0>;
+
+ status = "disabled";
+ };
+
+ pcie4: pci@1c08000 {
+ device_type = "pci";
+ compatible = "qcom,pcie-x1e80100";
+ reg = <0 0x01c08000 0 0x3000>,
+ <0 0x7c000000 0 0xf1d>,
+ <0 0x7c000f40 0 0xa8>,
+ <0 0x7c001000 0 0x1000>,
+ <0 0x7c100000 0 0x100000>,
+ <0 0x01c0b000 0 0x1000>;
+ reg-names = "parf",
+ "dbi",
+ "elbi",
+ "atu",
+ "config",
+ "mhi";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges = <0x01000000 0 0x00000000 0 0x7c200000 0 0x100000>,
+ <0x02000000 0 0x7c300000 0 0x7c300000 0 0x3d00000>;
+ bus-range = <0x00 0xff>;
+
+ dma-coherent;
+
+ linux,pci-domain = <5>;
+ num-lanes = <2>;
+
+ interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi0",
+ "msi1",
+ "msi2",
+ "msi3",
+ "msi4",
+ "msi5",
+ "msi6",
+ "msi7";
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0x7>;
+ interrupt-map = <0 0 0 1 &intc 0 0 0 149 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 2 &intc 0 0 0 150 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 3 &intc 0 0 0 151 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 4 &intc 0 0 0 152 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&gcc GCC_PCIE_4_AUX_CLK>,
+ <&gcc GCC_PCIE_4_CFG_AHB_CLK>,
+ <&gcc GCC_PCIE_4_MSTR_AXI_CLK>,
+ <&gcc GCC_PCIE_4_SLV_AXI_CLK>,
+ <&gcc GCC_PCIE_4_SLV_Q2A_AXI_CLK>,
+ <&gcc GCC_CFG_NOC_PCIE_ANOC_NORTH_AHB_CLK>,
+ <&gcc GCC_CNOC_PCIE_NORTH_SF_AXI_CLK>;
+ clock-names = "aux",
+ "cfg",
+ "bus_master",
+ "bus_slave",
+ "slave_q2a",
+ "noc_aggr",
+ "cnoc_sf_axi";
+
+ assigned-clocks = <&gcc GCC_PCIE_4_AUX_CLK>;
+ assigned-clock-rates = <19200000>;
+
+ interconnects = <&pcie_south_anoc MASTER_PCIE_4 QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
+ &cnoc_main SLAVE_PCIE_4 QCOM_ICC_TAG_ALWAYS>;
+ interconnect-names = "pcie-mem",
+ "cpu-pcie";
+
+ resets = <&gcc GCC_PCIE_4_BCR>,
+ <&gcc GCC_PCIE_4_LINK_DOWN_BCR>;
+ reset-names = "pci",
+ "link_down";
+
+ power-domains = <&gcc GCC_PCIE_4_GDSC>;
+
+ phys = <&pcie4_phy>;
+ phy-names = "pciephy";
+
+ status = "disabled";
+ };
+
+ pcie4_phy: phy@1c0e000 {
+ compatible = "qcom,x1e80100-qmp-gen3x2-pcie-phy";
+ reg = <0 0x01c0e000 0 0x2000>;
+
+ clocks = <&gcc GCC_PCIE_4_AUX_CLK>,
+ <&gcc GCC_PCIE_4_CFG_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_PCIE_4_PHY_RCHNG_CLK>,
+ <&gcc GCC_PCIE_4_PIPE_CLK>;
+ clock-names = "aux",
+ "cfg_ahb",
+ "ref",
+ "rchng",
+ "pipe";
+
+ resets = <&gcc GCC_PCIE_4_PHY_BCR>;
+ reset-names = "phy";
+
+ assigned-clocks = <&gcc GCC_PCIE_4_PHY_RCHNG_CLK>;
+ assigned-clock-rates = <100000000>;
+
+ power-domains = <&gcc GCC_PCIE_4_PHY_GDSC>;
+
+ #clock-cells = <0>;
+ clock-output-names = "pcie4_pipe_clk";
+
+ #phy-cells = <0>;
+
+ status = "disabled";
+ };
+
tcsr_mutex: hwlock@1f40000 {
compatible = "qcom,tcsr-mutex";
reg = <0 0x01f40000 0 0x20000>;
#hwlock-cells = <1>;
};
+ tcsr: clock-controller@1fc0000 {
+ compatible = "qcom,x1e80100-tcsr", "syscon";
+ reg = <0 0x01fc0000 0 0x30000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
gem_noc: interconnect@26400000 {
compatible = "qcom,x1e80100-gem-noc";
reg = <0 0x26400000 0 0x311200>;
@@ -2560,6 +3003,331 @@
#interconnect-cells = <2>;
};
+ lpass_wsa2macro: codec@6aa0000 {
+ compatible = "qcom,x1e80100-lpass-wsa-macro", "qcom,sm8550-lpass-wsa-macro";
+ reg = <0 0x06aa0000 0 0x1000>;
+ clocks = <&q6prmcc LPASS_CLK_ID_WSA2_CORE_TX_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&lpass_vamacro>;
+ clock-names = "mclk",
+ "macro",
+ "dcodec",
+ "fsgen";
+
+ #clock-cells = <0>;
+ clock-output-names = "wsa2-mclk";
+ #sound-dai-cells = <1>;
+ sound-name-prefix = "WSA2";
+ };
+
+ swr3: soundwire@6ab0000 {
+ compatible = "qcom,soundwire-v2.0.0";
+ reg = <0 0x06ab0000 0 0x10000>;
+ clocks = <&lpass_wsa2macro>;
+ clock-names = "iface";
+ interrupts = <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>;
+ label = "WSA2";
+
+ pinctrl-0 = <&wsa2_swr_active>;
+ pinctrl-names = "default";
+
+ qcom,din-ports = <4>;
+ qcom,dout-ports = <9>;
+
+ qcom,ports-sinterval = /bits/ 16 <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0xc8 0xff 0xff 0x0f 0x0f 0xff 0x31f>;
+ qcom,ports-offset1 = /bits/ 8 <0x01 0x03 0x05 0x02 0x04 0x15 0x00 0xff 0xff 0x06 0x0d 0xff 0x00>;
+ qcom,ports-offset2 = /bits/ 8 <0xff 0x07 0x1f 0xff 0x07 0x1f 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x0f>;
+ qcom,ports-hstop = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x0f>;
+ qcom,ports-word-length = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x18>;
+ qcom,ports-block-pack-mode = /bits/ 8 <0x00 0x01 0x01 0x00 0x01 0x01 0x00 0x00 0x00 0x01 0x01 0x00 0x00>;
+ qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-lane-control = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+
+ #address-cells = <2>;
+ #size-cells = <0>;
+ #sound-dai-cells = <1>;
+ status = "disabled";
+ };
+
+ lpass_rxmacro: codec@6ac0000 {
+ compatible = "qcom,x1e80100-lpass-rx-macro", "qcom,sm8550-lpass-rx-macro";
+ reg = <0 0x06ac0000 0 0x1000>;
+ clocks = <&q6prmcc LPASS_CLK_ID_RX_CORE_TX_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&lpass_vamacro>;
+ clock-names = "mclk",
+ "macro",
+ "dcodec",
+ "fsgen";
+
+ #clock-cells = <0>;
+ clock-output-names = "mclk";
+ #sound-dai-cells = <1>;
+ };
+
+ swr1: soundwire@6ad0000 {
+ compatible = "qcom,soundwire-v2.0.0";
+ reg = <0 0x06ad0000 0 0x10000>;
+ clocks = <&lpass_rxmacro>;
+ clock-names = "iface";
+ interrupts = <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>;
+ label = "RX";
+
+ pinctrl-0 = <&rx_swr_active>;
+ pinctrl-names = "default";
+
+ qcom,din-ports = <1>;
+ qcom,dout-ports = <11>;
+
+ qcom,ports-sinterval = /bits/ 16 <0x03 0x1f 0x1f 0x07 0x00 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-offset1 = /bits/ 8 <0x00 0x00 0x0b 0x01 0x00 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x0b 0x00 0x00 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-hstart = /bits/ 8 <0xff 0x03 0x00 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-hstop = /bits/ 8 <0xff 0x06 0x0f 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-word-length = /bits/ 8 <0x01 0x07 0x04 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-block-pack-mode = /bits/ 8 <0xff 0x00 0x01 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-lane-control = /bits/ 8 <0x01 0x00 0x00 0x00 0x00 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+
+ #address-cells = <2>;
+ #size-cells = <0>;
+ #sound-dai-cells = <1>;
+ status = "disabled";
+ };
+
+ lpass_txmacro: codec@6ae0000 {
+ compatible = "qcom,x1e80100-lpass-tx-macro", "qcom,sm8550-lpass-tx-macro";
+ reg = <0 0x06ae0000 0 0x1000>;
+ clocks = <&q6prmcc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&lpass_vamacro>;
+ clock-names = "mclk",
+ "macro",
+ "dcodec",
+ "fsgen";
+
+ #clock-cells = <0>;
+ clock-output-names = "mclk";
+ #sound-dai-cells = <1>;
+ };
+
+ lpass_wsamacro: codec@6b00000 {
+ compatible = "qcom,x1e80100-lpass-wsa-macro", "qcom,sm8550-lpass-wsa-macro";
+ reg = <0 0x06b00000 0 0x1000>;
+ clocks = <&q6prmcc LPASS_CLK_ID_WSA_CORE_TX_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&lpass_vamacro>;
+ clock-names = "mclk",
+ "macro",
+ "dcodec",
+ "fsgen";
+
+ #clock-cells = <0>;
+ clock-output-names = "mclk";
+ #sound-dai-cells = <1>;
+ sound-name-prefix = "WSA";
+ };
+
+ swr0: soundwire@6b10000 {
+ compatible = "qcom,soundwire-v2.0.0";
+ reg = <0 0x06b10000 0 0x10000>;
+ clocks = <&lpass_wsamacro>;
+ clock-names = "iface";
+ interrupts = <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>;
+ label = "WSA";
+
+ pinctrl-0 = <&wsa_swr_active>;
+ pinctrl-names = "default";
+
+ qcom,din-ports = <4>;
+ qcom,dout-ports = <9>;
+
+ qcom,ports-sinterval = /bits/ 16 <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0xc8 0xff 0xff 0x0f 0x0f 0xff 0x31f>;
+ qcom,ports-offset1 = /bits/ 8 <0x01 0x03 0x05 0x02 0x04 0x15 0x00 0xff 0xff 0x06 0x0d 0xff 0x00>;
+ qcom,ports-offset2 = /bits/ 8 <0xff 0x07 0x1f 0xff 0x07 0x1f 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x0f>;
+ qcom,ports-hstop = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x0f>;
+ qcom,ports-word-length = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x18>;
+ qcom,ports-block-pack-mode = /bits/ 8 <0x00 0x01 0x01 0x00 0x01 0x01 0x00 0x00 0x00 0x01 0x01 0x00 0x00>;
+ qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-lane-control = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
+
+ #address-cells = <2>;
+ #size-cells = <0>;
+ #sound-dai-cells = <1>;
+ status = "disabled";
+ };
+
+ swr2: soundwire@6d30000 {
+ compatible = "qcom,soundwire-v2.0.0";
+ reg = <0 0x06d30000 0 0x10000>;
+ clocks = <&lpass_txmacro>;
+ clock-names = "iface";
+ interrupts = <GIC_SPI 496 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 520 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "core", "wakeup";
+ label = "TX";
+
+ pinctrl-0 = <&tx_swr_active>;
+ pinctrl-names = "default";
+
+ qcom,din-ports = <4>;
+ qcom,dout-ports = <1>;
+
+ qcom,ports-sinterval-low = /bits/ 8 <0x00 0x01 0x03 0x03 0x00>;
+ qcom,ports-offset1 = /bits/ 8 <0x00 0x01 0x02 0x00 0x00>;
+ qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x00 0x00 0xff>;
+ qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-hstop = /bits/ 8 <0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-word-length = /bits/ 8 <0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-block-pack-mode = /bits/ 8 <0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0xff>;
+ qcom,ports-lane-control = /bits/ 8 <0xff 0x00 0x00 0x01 0xff>;
+
+ #address-cells = <2>;
+ #size-cells = <0>;
+ #sound-dai-cells = <1>;
+ status = "disabled";
+ };
+
+ lpass_vamacro: codec@6d44000 {
+ compatible = "qcom,x1e80100-lpass-va-macro", "qcom,sm8550-lpass-va-macro";
+ reg = <0 0x06d44000 0 0x1000>;
+ clocks = <&q6prmcc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>;
+ clock-names = "mclk",
+ "macro",
+ "dcodec";
+
+ #clock-cells = <0>;
+ clock-output-names = "fsgen";
+ #sound-dai-cells = <1>;
+ };
+
+ lpass_tlmm: pinctrl@6e80000 {
+ compatible = "qcom,x1e80100-lpass-lpi-pinctrl", "qcom,sm8550-lpass-lpi-pinctrl";
+ reg = <0 0x06e80000 0 0x20000>,
+ <0 0x07250000 0 0x10000>;
+
+ clocks = <&q6prmcc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>;
+ clock-names = "core", "audio";
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-ranges = <&lpass_tlmm 0 0 23>;
+
+ tx_swr_active: tx-swr-active-state {
+ clk-pins {
+ pins = "gpio0";
+ function = "swr_tx_clk";
+ drive-strength = <2>;
+ slew-rate = <1>;
+ bias-disable;
+ };
+
+ data-pins {
+ pins = "gpio1", "gpio2";
+ function = "swr_tx_data";
+ drive-strength = <2>;
+ slew-rate = <1>;
+ bias-bus-hold;
+ };
+ };
+
+ rx_swr_active: rx-swr-active-state {
+ clk-pins {
+ pins = "gpio3";
+ function = "swr_rx_clk";
+ drive-strength = <2>;
+ slew-rate = <1>;
+ bias-disable;
+ };
+
+ data-pins {
+ pins = "gpio4", "gpio5";
+ function = "swr_rx_data";
+ drive-strength = <2>;
+ slew-rate = <1>;
+ bias-bus-hold;
+ };
+ };
+
+ dmic01_default: dmic01-default-state {
+ clk-pins {
+ pins = "gpio6";
+ function = "dmic1_clk";
+ drive-strength = <8>;
+ output-high;
+ };
+
+ data-pins {
+ pins = "gpio7";
+ function = "dmic1_data";
+ drive-strength = <8>;
+ input-enable;
+ };
+ };
+
+ dmic23_default: dmic23-default-state {
+ clk-pins {
+ pins = "gpio8";
+ function = "dmic2_clk";
+ drive-strength = <8>;
+ output-high;
+ };
+
+ data-pins {
+ pins = "gpio9";
+ function = "dmic2_data";
+ drive-strength = <8>;
+ input-enable;
+ };
+ };
+
+ wsa_swr_active: wsa-swr-active-state {
+ clk-pins {
+ pins = "gpio10";
+ function = "wsa_swr_clk";
+ drive-strength = <2>;
+ slew-rate = <1>;
+ bias-disable;
+ };
+
+ data-pins {
+ pins = "gpio11";
+ function = "wsa_swr_data";
+ drive-strength = <2>;
+ slew-rate = <1>;
+ bias-bus-hold;
+ };
+ };
+
+ wsa2_swr_active: wsa2-swr-active-state {
+ clk-pins {
+ pins = "gpio15";
+ function = "wsa2_swr_clk";
+ drive-strength = <2>;
+ slew-rate = <1>;
+ bias-disable;
+ };
+
+ data-pins {
+ pins = "gpio16";
+ function = "wsa2_swr_data";
+ drive-strength = <2>;
+ slew-rate = <1>;
+ bias-bus-hold;
+ };
+ };
+ };
+
lpass_ag_noc: interconnect@7e40000 {
compatible = "qcom,x1e80100-lpass-ag-noc";
reg = <0 0x7e40000 0 0xE080>;
@@ -2587,6 +3355,849 @@
#interconnect-cells = <2>;
};
+ usb_2_hsphy: phy@88e0000 {
+ compatible = "qcom,x1e80100-snps-eusb2-phy",
+ "qcom,sm8550-snps-eusb2-phy";
+ reg = <0 0x088e0000 0 0x154>;
+ #phy-cells = <0>;
+
+ clocks = <&tcsr TCSR_USB2_2_CLKREF_EN>;
+ clock-names = "ref";
+
+ resets = <&gcc GCC_QUSB2PHY_USB20_HS_BCR>;
+
+ status = "disabled";
+ };
+
+ usb_1_ss2: usb@a0f8800 {
+ compatible = "qcom,x1e80100-dwc3", "qcom,dwc3";
+ reg = <0 0x0a0f8800 0 0x400>;
+
+ clocks = <&gcc GCC_CFG_NOC_USB3_TERT_AXI_CLK>,
+ <&gcc GCC_USB30_TERT_MASTER_CLK>,
+ <&gcc GCC_AGGRE_USB3_TERT_AXI_CLK>,
+ <&gcc GCC_USB30_TERT_SLEEP_CLK>,
+ <&gcc GCC_USB30_TERT_MOCK_UTMI_CLK>,
+ <&gcc GCC_AGGRE_USB_NOC_AXI_CLK>,
+ <&gcc GCC_AGGRE_NOC_USB_NORTH_AXI_CLK>,
+ <&gcc GCC_AGGRE_NOC_USB_SOUTH_AXI_CLK>,
+ <&gcc GCC_SYS_NOC_USB_AXI_CLK>;
+ clock-names = "cfg_noc",
+ "core",
+ "iface",
+ "sleep",
+ "mock_utmi",
+ "noc_aggr",
+ "noc_aggr_north",
+ "noc_aggr_south",
+ "noc_sys";
+
+ assigned-clocks = <&gcc GCC_USB30_TERT_MOCK_UTMI_CLK>,
+ <&gcc GCC_USB30_TERT_MASTER_CLK>;
+ assigned-clock-rates = <19200000>,
+ <200000000>;
+
+ interrupts-extended = <&intc GIC_SPI 370 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 58 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 57 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 10 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
+
+ power-domains = <&gcc GCC_USB30_TERT_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
+
+ resets = <&gcc GCC_USB30_TERT_BCR>;
+
+ interconnects = <&usb_south_anoc MASTER_USB3_2 QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
+ &config_noc SLAVE_USB3_2 QCOM_ICC_TAG_ALWAYS>;
+ interconnect-names = "usb-ddr",
+ "apps-usb";
+
+ wakeup-source;
+
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ status = "disabled";
+
+ usb_1_ss2_dwc3: usb@a000000 {
+ compatible = "snps,dwc3";
+ reg = <0 0x0a000000 0 0xcd00>;
+
+ interrupts = <GIC_SPI 353 IRQ_TYPE_LEVEL_HIGH>;
+
+ iommus = <&apps_smmu 0x14a0 0x0>;
+
+ phys = <&usb_1_ss2_hsphy>,
+ <&usb_1_ss2_qmpphy QMP_USB43DP_USB3_PHY>;
+ phy-names = "usb2-phy",
+ "usb3-phy";
+
+ snps,dis_u2_susphy_quirk;
+ snps,dis_enblslpm_quirk;
+ snps,usb3_lpm_capable;
+
+ dma-coherent;
+
+ port {
+ usb_1_ss2_role_switch: endpoint {
+ };
+ };
+ };
+ };
+
+ usb_2: usb@a2f8800 {
+ compatible = "qcom,x1e80100-dwc3", "qcom,dwc3";
+ reg = <0 0x0a2f8800 0 0x400>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ clocks = <&gcc GCC_CFG_NOC_USB2_PRIM_AXI_CLK>,
+ <&gcc GCC_USB20_MASTER_CLK>,
+ <&gcc GCC_AGGRE_USB2_PRIM_AXI_CLK>,
+ <&gcc GCC_USB20_SLEEP_CLK>,
+ <&gcc GCC_USB20_MOCK_UTMI_CLK>,
+ <&gcc GCC_AGGRE_USB_NOC_AXI_CLK>,
+ <&gcc GCC_AGGRE_NOC_USB_NORTH_AXI_CLK>,
+ <&gcc GCC_AGGRE_NOC_USB_SOUTH_AXI_CLK>,
+ <&gcc GCC_SYS_NOC_USB_AXI_CLK>;
+ clock-names = "cfg_noc",
+ "core",
+ "iface",
+ "sleep",
+ "mock_utmi",
+ "noc_aggr",
+ "noc_aggr_north",
+ "noc_aggr_south",
+ "noc_sys";
+
+ assigned-clocks = <&gcc GCC_USB20_MOCK_UTMI_CLK>,
+ <&gcc GCC_USB20_MASTER_CLK>;
+ assigned-clock-rates = <19200000>, <200000000>;
+
+ interrupts-extended = <&intc GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 50 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 49 IRQ_TYPE_EDGE_BOTH>;
+ interrupt-names = "pwr_event",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq";
+
+ power-domains = <&gcc GCC_USB20_PRIM_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
+
+ resets = <&gcc GCC_USB20_PRIM_BCR>;
+
+ interconnects = <&usb_north_anoc MASTER_USB2 QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
+ &config_noc SLAVE_USB2 QCOM_ICC_TAG_ALWAYS>;
+ interconnect-names = "usb-ddr",
+ "apps-usb";
+
+ wakeup-source;
+
+ status = "disabled";
+
+ usb_2_dwc3: usb@a200000 {
+ compatible = "snps,dwc3";
+ reg = <0 0x0a200000 0 0xcd00>;
+ interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>;
+ iommus = <&apps_smmu 0x14e0 0x0>;
+ phys = <&usb_2_hsphy>;
+ phy-names = "usb2-phy";
+ maximum-speed = "high-speed";
+
+ port {
+ usb_2_role_switch: endpoint {
+ };
+ };
+ };
+ };
+
+ usb_1_ss0: usb@a6f8800 {
+ compatible = "qcom,x1e80100-dwc3", "qcom,dwc3";
+ reg = <0 0x0a6f8800 0 0x400>;
+
+ clocks = <&gcc GCC_CFG_NOC_USB3_PRIM_AXI_CLK>,
+ <&gcc GCC_USB30_PRIM_MASTER_CLK>,
+ <&gcc GCC_AGGRE_USB3_PRIM_AXI_CLK>,
+ <&gcc GCC_USB30_PRIM_SLEEP_CLK>,
+ <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>,
+ <&gcc GCC_AGGRE_USB_NOC_AXI_CLK>,
+ <&gcc GCC_CFG_NOC_USB_ANOC_NORTH_AHB_CLK>,
+ <&gcc GCC_CFG_NOC_USB_ANOC_SOUTH_AHB_CLK>,
+ <&gcc GCC_SYS_NOC_USB_AXI_CLK>;
+ clock-names = "cfg_noc",
+ "core",
+ "iface",
+ "sleep",
+ "mock_utmi",
+ "noc_aggr",
+ "noc_aggr_north",
+ "noc_aggr_south",
+ "noc_sys";
+
+ assigned-clocks = <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>,
+ <&gcc GCC_USB30_PRIM_MASTER_CLK>;
+ assigned-clock-rates = <19200000>,
+ <200000000>;
+
+ interrupts-extended = <&intc GIC_SPI 371 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 61 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 15 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
+
+ power-domains = <&gcc GCC_USB30_PRIM_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
+
+ resets = <&gcc GCC_USB30_PRIM_BCR>;
+
+ wakeup-source;
+
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ status = "disabled";
+
+ usb_1_ss0_dwc3: usb@a600000 {
+ compatible = "snps,dwc3";
+ reg = <0 0x0a600000 0 0xcd00>;
+
+ interrupts = <GIC_SPI 355 IRQ_TYPE_LEVEL_HIGH>;
+
+ iommus = <&apps_smmu 0x1420 0x0>;
+
+ phys = <&usb_1_ss0_hsphy>,
+ <&usb_1_ss0_qmpphy QMP_USB43DP_USB3_PHY>;
+ phy-names = "usb2-phy",
+ "usb3-phy";
+
+ snps,dis_u2_susphy_quirk;
+ snps,dis_enblslpm_quirk;
+ snps,usb3_lpm_capable;
+
+ dma-coherent;
+
+ port {
+ usb_1_ss0_role_switch: endpoint {
+ };
+ };
+ };
+ };
+
+ usb_1_ss1: usb@a8f8800 {
+ compatible = "qcom,x1e80100-dwc3", "qcom,dwc3";
+ reg = <0 0x0a8f8800 0 0x400>;
+
+ clocks = <&gcc GCC_CFG_NOC_USB3_SEC_AXI_CLK>,
+ <&gcc GCC_USB30_SEC_MASTER_CLK>,
+ <&gcc GCC_AGGRE_USB3_SEC_AXI_CLK>,
+ <&gcc GCC_USB30_SEC_SLEEP_CLK>,
+ <&gcc GCC_USB30_SEC_MOCK_UTMI_CLK>,
+ <&gcc GCC_AGGRE_USB_NOC_AXI_CLK>,
+ <&gcc GCC_AGGRE_NOC_USB_NORTH_AXI_CLK>,
+ <&gcc GCC_AGGRE_NOC_USB_SOUTH_AXI_CLK>,
+ <&gcc GCC_SYS_NOC_USB_AXI_CLK>;
+ clock-names = "cfg_noc",
+ "core",
+ "iface",
+ "sleep",
+ "mock_utmi",
+ "noc_aggr",
+ "noc_aggr_north",
+ "noc_aggr_south",
+ "noc_sys";
+
+ assigned-clocks = <&gcc GCC_USB30_SEC_MOCK_UTMI_CLK>,
+ <&gcc GCC_USB30_SEC_MASTER_CLK>;
+ assigned-clock-rates = <19200000>,
+ <200000000>;
+
+ interrupts-extended = <&intc GIC_SPI 372 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 60 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 11 IRQ_TYPE_EDGE_BOTH>,
+ <&pdc 47 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pwr_event",
+ "dp_hs_phy_irq",
+ "dm_hs_phy_irq",
+ "ss_phy_irq";
+
+ power-domains = <&gcc GCC_USB30_SEC_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
+
+ resets = <&gcc GCC_USB30_SEC_BCR>;
+
+ interconnects = <&usb_south_anoc MASTER_USB3_1 QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS
+ &config_noc SLAVE_USB3_1 QCOM_ICC_TAG_ALWAYS>;
+ interconnect-names = "usb-ddr",
+ "apps-usb";
+
+ wakeup-source;
+
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ status = "disabled";
+
+ usb_1_ss1_dwc3: usb@a800000 {
+ compatible = "snps,dwc3";
+ reg = <0 0x0a800000 0 0xcd00>;
+
+ interrupts = <GIC_SPI 357 IRQ_TYPE_LEVEL_HIGH>;
+
+ iommus = <&apps_smmu 0x1460 0x0>;
+
+ phys = <&usb_1_ss1_hsphy>,
+ <&usb_1_ss1_qmpphy QMP_USB43DP_USB3_PHY>;
+ phy-names = "usb2-phy",
+ "usb3-phy";
+
+ snps,dis_u2_susphy_quirk;
+ snps,dis_enblslpm_quirk;
+ snps,usb3_lpm_capable;
+
+ dma-coherent;
+
+ port {
+ usb_1_ss1_role_switch: endpoint {
+ };
+ };
+ };
+ };
+
+ mdss: display-subsystem@ae00000 {
+ compatible = "qcom,x1e80100-mdss";
+ reg = <0 0x0ae00000 0 0x1000>;
+ reg-names = "mdss";
+
+ interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&gcc GCC_DISP_HF_AXI_CLK>,
+ <&dispcc DISP_CC_MDSS_MDP_CLK>;
+
+ resets = <&dispcc DISP_CC_MDSS_CORE_BCR>;
+
+ interconnects = <&mmss_noc MASTER_MDP QCOM_ICC_TAG_ALWAYS
+ &gem_noc SLAVE_LLCC QCOM_ICC_TAG_ALWAYS>,
+ <&mc_virt MASTER_LLCC QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>,
+ <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY
+ &config_noc SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ACTIVE_ONLY>;
+ interconnect-names = "mdp0-mem",
+ "mdp1-mem",
+ "cpu-cfg";
+
+ power-domains = <&dispcc MDSS_GDSC>;
+
+ iommus = <&apps_smmu 0x1c00 0x2>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ status = "disabled";
+
+ mdss_mdp: display-controller@ae01000 {
+ compatible = "qcom,x1e80100-dpu";
+ reg = <0 0x0ae01000 0 0x8f000>,
+ <0 0x0aeb0000 0 0x2008>;
+ reg-names = "mdp",
+ "vbif";
+
+ interrupts-extended = <&mdss 0>;
+
+ clocks = <&gcc GCC_DISP_HF_AXI_CLK>,
+ <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&dispcc DISP_CC_MDSS_MDP_LUT_CLK>,
+ <&dispcc DISP_CC_MDSS_MDP_CLK>,
+ <&dispcc DISP_CC_MDSS_VSYNC_CLK>;
+ clock-names = "nrt_bus",
+ "iface",
+ "lut",
+ "core",
+ "vsync";
+
+ operating-points-v2 = <&mdp_opp_table>;
+
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ mdss_intf0_out: endpoint {
+ remote-endpoint = <&mdss_dp0_in>;
+ };
+ };
+
+ port@4 {
+ reg = <4>;
+
+ mdss_intf4_out: endpoint {
+ remote-endpoint = <&mdss_dp1_in>;
+ };
+ };
+
+ port@5 {
+ reg = <5>;
+
+ mdss_intf5_out: endpoint {
+ remote-endpoint = <&mdss_dp3_in>;
+ };
+ };
+
+ port@6 {
+ reg = <6>;
+
+ mdss_intf6_out: endpoint {
+ remote-endpoint = <&mdss_dp2_in>;
+ };
+ };
+ };
+
+ mdp_opp_table: opp-table {
+ compatible = "operating-points-v2";
+
+ opp-200000000 {
+ opp-hz = /bits/ 64 <200000000>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ };
+
+ opp-325000000 {
+ opp-hz = /bits/ 64 <325000000>;
+ required-opps = <&rpmhpd_opp_svs>;
+ };
+
+ opp-375000000 {
+ opp-hz = /bits/ 64 <375000000>;
+ required-opps = <&rpmhpd_opp_svs_l1>;
+ };
+
+ opp-514000000 {
+ opp-hz = /bits/ 64 <514000000>;
+ required-opps = <&rpmhpd_opp_nom>;
+ };
+
+ opp-575000000 {
+ opp-hz = /bits/ 64 <575000000>;
+ required-opps = <&rpmhpd_opp_nom_l1>;
+ };
+ };
+ };
+
+ mdss_dp0: displayport-controller@ae90000 {
+ compatible = "qcom,x1e80100-dp";
+ reg = <0 0xae90000 0 0x200>,
+ <0 0xae90200 0 0x200>,
+ <0 0xae90400 0 0x600>,
+ <0 0xae91000 0 0x400>,
+ <0 0xae91400 0 0x400>;
+
+ interrupts-extended = <&mdss 12>;
+
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX0_AUX_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX0_LINK_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX0_LINK_INTF_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX0_PIXEL0_CLK>;
+ clock-names = "core_iface",
+ "core_aux",
+ "ctrl_link",
+ "ctrl_link_iface",
+ "stream_pixel";
+
+ assigned-clocks = <&dispcc DISP_CC_MDSS_DPTX0_LINK_CLK_SRC>,
+ <&dispcc DISP_CC_MDSS_DPTX0_PIXEL0_CLK_SRC>;
+ assigned-clock-parents = <&usb_1_ss0_qmpphy QMP_USB43DP_DP_LINK_CLK>,
+ <&usb_1_ss0_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>;
+
+ operating-points-v2 = <&mdss_dp0_opp_table>;
+
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+ phys = <&usb_1_ss0_qmpphy QMP_USB43DP_DP_PHY>;
+ phy-names = "dp";
+
+ #sound-dai-cells = <0>;
+
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ mdss_dp0_in: endpoint {
+ remote-endpoint = <&mdss_intf0_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ mdss_dp0_out: endpoint {
+ };
+ };
+ };
+
+ mdss_dp0_opp_table: opp-table {
+ compatible = "operating-points-v2";
+
+ opp-160000000 {
+ opp-hz = /bits/ 64 <160000000>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ };
+
+ opp-270000000 {
+ opp-hz = /bits/ 64 <270000000>;
+ required-opps = <&rpmhpd_opp_svs>;
+ };
+
+ opp-540000000 {
+ opp-hz = /bits/ 64 <540000000>;
+ required-opps = <&rpmhpd_opp_svs_l1>;
+ };
+
+ opp-810000000 {
+ opp-hz = /bits/ 64 <810000000>;
+ required-opps = <&rpmhpd_opp_nom>;
+ };
+ };
+ };
+
+ mdss_dp1: displayport-controller@ae98000 {
+ compatible = "qcom,x1e80100-dp";
+ reg = <0 0xae98000 0 0x200>,
+ <0 0xae98200 0 0x200>,
+ <0 0xae98400 0 0x600>,
+ <0 0xae99000 0 0x400>,
+ <0 0xae99400 0 0x400>;
+
+ interrupts-extended = <&mdss 13>;
+
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX1_AUX_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX1_LINK_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX1_LINK_INTF_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX1_PIXEL0_CLK>;
+ clock-names = "core_iface",
+ "core_aux",
+ "ctrl_link",
+ "ctrl_link_iface",
+ "stream_pixel";
+
+ assigned-clocks = <&dispcc DISP_CC_MDSS_DPTX1_LINK_CLK_SRC>,
+ <&dispcc DISP_CC_MDSS_DPTX1_PIXEL0_CLK_SRC>;
+ assigned-clock-parents = <&usb_1_ss1_qmpphy QMP_USB43DP_DP_LINK_CLK>,
+ <&usb_1_ss1_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>;
+
+ operating-points-v2 = <&mdss_dp1_opp_table>;
+
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+ phys = <&usb_1_ss1_qmpphy QMP_USB43DP_DP_PHY>;
+ phy-names = "dp";
+
+ #sound-dai-cells = <0>;
+
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ mdss_dp1_in: endpoint {
+ remote-endpoint = <&mdss_intf4_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ mdss_dp1_out: endpoint {
+ };
+ };
+ };
+
+ mdss_dp1_opp_table: opp-table {
+ compatible = "operating-points-v2";
+
+ opp-160000000 {
+ opp-hz = /bits/ 64 <160000000>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ };
+
+ opp-270000000 {
+ opp-hz = /bits/ 64 <270000000>;
+ required-opps = <&rpmhpd_opp_svs>;
+ };
+
+ opp-540000000 {
+ opp-hz = /bits/ 64 <540000000>;
+ required-opps = <&rpmhpd_opp_svs_l1>;
+ };
+
+ opp-810000000 {
+ opp-hz = /bits/ 64 <810000000>;
+ required-opps = <&rpmhpd_opp_nom>;
+ };
+ };
+ };
+
+ mdss_dp2: displayport-controller@ae9a000 {
+ compatible = "qcom,x1e80100-dp";
+ reg = <0 0xae9a000 0 0x200>,
+ <0 0xae9a200 0 0x200>,
+ <0 0xae9a400 0 0x600>,
+ <0 0xae9b000 0 0x400>,
+ <0 0xae9b400 0 0x400>;
+
+ interrupts-extended = <&mdss 14>;
+
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX2_AUX_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX2_LINK_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX2_LINK_INTF_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX2_PIXEL0_CLK>;
+ clock-names = "core_iface",
+ "core_aux",
+ "ctrl_link",
+ "ctrl_link_iface",
+ "stream_pixel";
+
+ assigned-clocks = <&dispcc DISP_CC_MDSS_DPTX2_LINK_CLK_SRC>,
+ <&dispcc DISP_CC_MDSS_DPTX2_PIXEL0_CLK_SRC>;
+ assigned-clock-parents = <&mdss_dp2_phy 0>,
+ <&mdss_dp2_phy 1>;
+
+ operating-points-v2 = <&mdss_dp2_opp_table>;
+
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+ phys = <&mdss_dp2_phy>;
+ phy-names = "dp";
+
+ #sound-dai-cells = <0>;
+
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ mdss_dp2_in: endpoint {
+ remote-endpoint = <&mdss_intf6_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
+
+ mdss_dp2_opp_table: opp-table {
+ compatible = "operating-points-v2";
+
+ opp-160000000 {
+ opp-hz = /bits/ 64 <160000000>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ };
+
+ opp-270000000 {
+ opp-hz = /bits/ 64 <270000000>;
+ required-opps = <&rpmhpd_opp_svs>;
+ };
+
+ opp-540000000 {
+ opp-hz = /bits/ 64 <540000000>;
+ required-opps = <&rpmhpd_opp_svs_l1>;
+ };
+
+ opp-810000000 {
+ opp-hz = /bits/ 64 <810000000>;
+ required-opps = <&rpmhpd_opp_nom>;
+ };
+ };
+ };
+
+ mdss_dp3: displayport-controller@aea0000 {
+ compatible = "qcom,x1e80100-dp";
+ reg = <0 0xaea0000 0 0x200>,
+ <0 0xaea0200 0 0x200>,
+ <0 0xaea0400 0 0x600>,
+ <0 0xaea1000 0 0x400>,
+ <0 0xaea1400 0 0x400>;
+
+ interrupts-extended = <&mdss 15>;
+
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX3_AUX_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX3_LINK_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX3_LINK_INTF_CLK>,
+ <&dispcc DISP_CC_MDSS_DPTX3_PIXEL0_CLK>;
+ clock-names = "core_iface",
+ "core_aux",
+ "ctrl_link",
+ "ctrl_link_iface",
+ "stream_pixel";
+
+ assigned-clocks = <&dispcc DISP_CC_MDSS_DPTX3_LINK_CLK_SRC>,
+ <&dispcc DISP_CC_MDSS_DPTX3_PIXEL0_CLK_SRC>;
+ assigned-clock-parents = <&mdss_dp3_phy 0>,
+ <&mdss_dp3_phy 1>;
+
+ operating-points-v2 = <&mdss_dp3_opp_table>;
+
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+ phys = <&mdss_dp3_phy>;
+ phy-names = "dp";
+
+ #sound-dai-cells = <0>;
+
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ mdss_dp3_in: endpoint {
+ remote-endpoint = <&mdss_intf5_out>;
+
+ link-frequencies = /bits/ 64 <8100000000>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
+
+ mdss_dp3_opp_table: opp-table {
+ compatible = "operating-points-v2";
+
+ opp-160000000 {
+ opp-hz = /bits/ 64 <160000000>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ };
+
+ opp-270000000 {
+ opp-hz = /bits/ 64 <270000000>;
+ required-opps = <&rpmhpd_opp_svs>;
+ };
+
+ opp-540000000 {
+ opp-hz = /bits/ 64 <540000000>;
+ required-opps = <&rpmhpd_opp_svs_l1>;
+ };
+
+ opp-810000000 {
+ opp-hz = /bits/ 64 <810000000>;
+ required-opps = <&rpmhpd_opp_nom>;
+ };
+ };
+ };
+
+ };
+
+ mdss_dp2_phy: phy@aec2a00 {
+ compatible = "qcom,x1e80100-dp-phy";
+ reg = <0 0x0aec2a00 0 0x19c>,
+ <0 0x0aec2200 0 0xec>,
+ <0 0x0aec2600 0 0xec>,
+ <0 0x0aec2000 0 0x1c8>;
+
+ clocks = <&dispcc DISP_CC_MDSS_DPTX2_AUX_CLK>,
+ <&dispcc DISP_CC_MDSS_AHB_CLK>;
+ clock-names = "aux",
+ "cfg_ahb";
+
+ power-domains = <&rpmhpd RPMHPD_MX>;
+
+ #clock-cells = <1>;
+ #phy-cells = <0>;
+
+ status = "disabled";
+ };
+
+ mdss_dp3_phy: phy@aec5a00 {
+ compatible = "qcom,x1e80100-dp-phy";
+ reg = <0 0x0aec5a00 0 0x19c>,
+ <0 0x0aec5200 0 0xec>,
+ <0 0x0aec5600 0 0xec>,
+ <0 0x0aec5000 0 0x1c8>;
+
+ clocks = <&dispcc DISP_CC_MDSS_DPTX3_AUX_CLK>,
+ <&dispcc DISP_CC_MDSS_AHB_CLK>;
+ clock-names = "aux",
+ "cfg_ahb";
+
+ power-domains = <&rpmhpd RPMHPD_MX>;
+
+ #clock-cells = <1>;
+ #phy-cells = <0>;
+
+ status = "disabled";
+ };
+
+ dispcc: clock-controller@af00000 {
+ compatible = "qcom,x1e80100-dispcc";
+ reg = <0 0x0af00000 0 0x20000>;
+ clocks = <&bi_tcxo_div2>,
+ <&bi_tcxo_ao_div2>,
+ <&gcc GCC_DISP_AHB_CLK>,
+ <&sleep_clk>,
+ <0>, /* dsi0 */
+ <0>,
+ <0>, /* dsi1 */
+ <0>,
+ <&usb_1_ss0_qmpphy QMP_USB43DP_DP_LINK_CLK>, /* dp0 */
+ <&usb_1_ss0_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>,
+ <&usb_1_ss1_qmpphy QMP_USB43DP_DP_LINK_CLK>, /* dp1 */
+ <&usb_1_ss1_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>,
+ <&mdss_dp2_phy 0>, /* dp2 */
+ <&mdss_dp2_phy 1>,
+ <&mdss_dp3_phy 0>, /* dp3 */
+ <&mdss_dp3_phy 1>;
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
pdc: interrupt-controller@b220000 {
compatible = "qcom,x1e80100-pdc", "qcom,pdc";
reg = <0 0x0b220000 0 0x30000>, <0 0x174000f0 0 0x64>;
@@ -2599,6 +4210,18 @@
interrupt-controller;
};
+ aoss_qmp: power-management@c300000 {
+ compatible = "qcom,x1e80100-aoss-qmp", "qcom,aoss-qmp";
+ reg = <0 0x0c300000 0 0x400>;
+ interrupt-parent = <&ipcc>;
+ interrupts-extended = <&ipcc IPCC_CLIENT_AOP IPCC_MPROC_SIGNAL_GLINK_QMP
+ IRQ_TYPE_EDGE_RISING>;
+ mboxes = <&ipcc IPCC_CLIENT_AOP IPCC_MPROC_SIGNAL_GLINK_QMP>;
+
+ #clock-cells = <0>;
+ };
+
+
tlmm: pinctrl@f100000 {
compatible = "qcom,x1e80100-tlmm";
reg = <0 0x0f100000 0 0xf00000>;
@@ -3170,7 +4793,7 @@
/* TX, RX */
pins = "gpio86", "gpio87";
function = "qup2_se5";
- drive-strength= <2>;
+ drive-strength = <2>;
bias-disable;
};
};
@@ -3315,7 +4938,6 @@
<0 0x17510000 0 0x10000>,
<0 0x17520000 0 0x10000>;
reg-names = "drv-0", "drv-1", "drv-2";
- qcom,drv-count = <3>;
interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
@@ -3326,6 +4948,7 @@
<WAKE_TCS 2>, <CONTROL_TCS 0>;
label = "apps_rsc";
+ power-domains = <&SYSTEM_PD>;
apps_bcm_voter: bcm-voter {
compatible = "qcom,bcm-voter";
@@ -3514,6 +5137,144 @@
"llcc_broadcast_base";
interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>;
};
+
+ remoteproc_adsp: remoteproc@30000000 {
+ compatible = "qcom,x1e80100-adsp-pas";
+ reg = <0 0x30000000 0 0x100>;
+
+ interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>,
+ <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>,
+ <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>,
+ <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>,
+ <&smp2p_adsp_in 3 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "wdog",
+ "fatal",
+ "ready",
+ "handover",
+ "stop-ack";
+
+ clocks = <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "xo";
+
+ power-domains = <&rpmhpd RPMHPD_LCX>,
+ <&rpmhpd RPMHPD_LMX>;
+ power-domain-names = "lcx",
+ "lmx";
+
+ interconnects = <&lpass_lpicx_noc MASTER_LPASS_PROC QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>;
+
+ memory-region = <&adspslpi_mem>,
+ <&q6_adsp_dtb_mem>;
+
+ qcom,qmp = <&aoss_qmp>;
+
+ qcom,smem-states = <&smp2p_adsp_out 0>;
+ qcom,smem-state-names = "stop";
+
+ status = "disabled";
+
+ glink-edge {
+ interrupts-extended = <&ipcc IPCC_CLIENT_LPASS
+ IPCC_MPROC_SIGNAL_GLINK_QMP
+ IRQ_TYPE_EDGE_RISING>;
+ mboxes = <&ipcc IPCC_CLIENT_LPASS
+ IPCC_MPROC_SIGNAL_GLINK_QMP>;
+
+ label = "lpass";
+ qcom,remote-pid = <2>;
+
+ gpr {
+ compatible = "qcom,gpr";
+ qcom,glink-channels = "adsp_apps";
+ qcom,domain = <GPR_DOMAIN_ID_ADSP>;
+ qcom,intents = <512 20>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ q6apm: service@1 {
+ compatible = "qcom,q6apm";
+ reg = <GPR_APM_MODULE_IID>;
+ #sound-dai-cells = <0>;
+ qcom,protection-domain = "avs/audio",
+ "msm/adsp/audio_pd";
+
+ q6apmbedai: bedais {
+ compatible = "qcom,q6apm-lpass-dais";
+ #sound-dai-cells = <1>;
+ };
+
+ q6apmdai: dais {
+ compatible = "qcom,q6apm-dais";
+ iommus = <&apps_smmu 0x1001 0x80>,
+ <&apps_smmu 0x1061 0x0>;
+ };
+ };
+
+ q6prm: service@2 {
+ compatible = "qcom,q6prm";
+ reg = <GPR_PRM_MODULE_IID>;
+ qcom,protection-domain = "avs/audio",
+ "msm/adsp/audio_pd";
+
+ q6prmcc: clock-controller {
+ compatible = "qcom,q6prm-lpass-clocks";
+ #clock-cells = <2>;
+ };
+ };
+ };
+ };
+ };
+
+ remoteproc_cdsp: remoteproc@32300000 {
+ compatible = "qcom,x1e80100-cdsp-pas";
+ reg = <0 0x32300000 0 0x1400000>;
+
+ interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>,
+ <&smp2p_cdsp_in 0 IRQ_TYPE_EDGE_RISING>,
+ <&smp2p_cdsp_in 1 IRQ_TYPE_EDGE_RISING>,
+ <&smp2p_cdsp_in 2 IRQ_TYPE_EDGE_RISING>,
+ <&smp2p_cdsp_in 3 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "wdog",
+ "fatal",
+ "ready",
+ "handover",
+ "stop-ack";
+
+ clocks = <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "xo";
+
+ power-domains = <&rpmhpd RPMHPD_CX>,
+ <&rpmhpd RPMHPD_MXC>,
+ <&rpmhpd RPMHPD_NSP>;
+ power-domain-names = "cx",
+ "mxc",
+ "nsp";
+
+ interconnects = <&nsp_noc MASTER_CDSP_PROC QCOM_ICC_TAG_ALWAYS
+ &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>;
+
+ memory-region = <&cdsp_mem>,
+ <&q6_cdsp_dtb_mem>;
+
+ qcom,qmp = <&aoss_qmp>;
+
+ qcom,smem-states = <&smp2p_cdsp_out 0>;
+ qcom,smem-state-names = "stop";
+
+ status = "disabled";
+
+ glink-edge {
+ interrupts-extended = <&ipcc IPCC_CLIENT_CDSP
+ IPCC_MPROC_SIGNAL_GLINK_QMP
+ IRQ_TYPE_EDGE_RISING>;
+ mboxes = <&ipcc IPCC_CLIENT_CDSP
+ IPCC_MPROC_SIGNAL_GLINK_QMP>;
+
+ label = "cdsp";
+ qcom,remote-pid = <5>;
+ };
+ };
};
timer {
diff --git a/arch/arm64/boot/dts/renesas/Makefile b/arch/arm64/boot/dts/renesas/Makefile
index 8ea68d582710..5f3e0e61d78d 100644
--- a/arch/arm64/boot/dts/renesas/Makefile
+++ b/arch/arm64/boot/dts/renesas/Makefile
@@ -82,10 +82,15 @@ dtb-$(CONFIG_ARCH_R8A779F0) += r8a779f0-spider.dtb
dtb-$(CONFIG_ARCH_R8A779F0) += r8a779f4-s4sk.dtb
dtb-$(CONFIG_ARCH_R8A779G0) += r8a779g0-white-hawk.dtb
+dtb-$(CONFIG_ARCH_R8A779G0) += r8a779g0-white-hawk-cpu.dtb
dtb-$(CONFIG_ARCH_R8A779G0) += r8a779g0-white-hawk-ard-audio-da7212.dtbo
r8a779g0-white-hawk-ard-audio-da7212-dtbs := r8a779g0-white-hawk.dtb r8a779g0-white-hawk-ard-audio-da7212.dtbo
dtb-$(CONFIG_ARCH_R8A779G0) += r8a779g0-white-hawk-ard-audio-da7212.dtb
+dtb-$(CONFIG_ARCH_R8A779G0) += r8a779g2-white-hawk-single.dtb
+
+dtb-$(CONFIG_ARCH_R8A779H0) += r8a779h0-gray-hawk-single.dtb
+
dtb-$(CONFIG_ARCH_R8A77951) += r8a779m1-salvator-xs.dtb
r8a779m1-salvator-xs-panel-aa104xd12-dtbs := r8a779m1-salvator-xs.dtb salvator-panel-aa104xd12.dtbo
dtb-$(CONFIG_ARCH_R8A77951) += r8a779m1-salvator-xs-panel-aa104xd12.dtb
@@ -103,7 +108,10 @@ r8a779m5-salvator-xs-panel-aa104xd12-dtbs := r8a779m5-salvator-xs.dtb salvator-p
dtb-$(CONFIG_ARCH_R8A77965) += r8a779m5-salvator-xs-panel-aa104xd12.dtb
dtb-$(CONFIG_ARCH_R9A07G043) += r9a07g043u11-smarc.dtb
+dtb-$(CONFIG_ARCH_R9A07G043) += r9a07g043u11-smarc-cru-csi-ov5645.dtbo
dtb-$(CONFIG_ARCH_R9A07G043) += r9a07g043-smarc-pmod.dtbo
+r9a07g043u11-smarc-cru-csi-ov5645-dtbs := r9a07g043u11-smarc.dtb r9a07g043u11-smarc-cru-csi-ov5645.dtbo
+dtb-$(CONFIG_ARCH_R9A07G043) += r9a07g043u11-smarc-cru-csi-ov5645.dtb
r9a07g043u11-smarc-pmod-dtbs := r9a07g043u11-smarc.dtb r9a07g043-smarc-pmod.dtbo
dtb-$(CONFIG_ARCH_R9A07G043) += r9a07g043u11-smarc-pmod.dtb
diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
index 95b0a1f6debf..a8a44fe5e83b 100644
--- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
@@ -529,6 +529,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>;
@@ -541,7 +542,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>;
@@ -554,7 +557,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>;
@@ -568,6 +573,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>;
@@ -581,6 +587,7 @@
interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
index 786660fcdea4..4fff511e994c 100644
--- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
@@ -413,6 +413,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
@@ -425,7 +426,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
@@ -438,7 +441,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
@@ -452,6 +457,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
@@ -465,6 +471,7 @@
interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A774B1_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
index eed94ffed7c1..1ef43d78c3a5 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
@@ -384,6 +384,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A774C0_PD_ALWAYS_ON>;
@@ -396,7 +397,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A774C0_PD_ALWAYS_ON>;
@@ -409,7 +412,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A774C0_PD_ALWAYS_ON>;
@@ -423,6 +428,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A774C0_PD_ALWAYS_ON>;
@@ -436,6 +442,7 @@
interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A774C0_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
index 175e5d296da6..be55ae83944c 100644
--- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
@@ -593,6 +593,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A774E1_PD_ALWAYS_ON>;
@@ -605,7 +606,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A774E1_PD_ALWAYS_ON>;
@@ -618,7 +621,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A774E1_PD_ALWAYS_ON>;
@@ -632,6 +637,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A774E1_PD_ALWAYS_ON>;
@@ -645,6 +651,7 @@
interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A774E1_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
index a4260d9291ba..bea4edd17d53 100644
--- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
@@ -614,6 +614,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
@@ -626,7 +627,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
@@ -639,7 +642,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
@@ -653,6 +658,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
@@ -666,6 +672,7 @@
interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
index a631ead171b2..7846fea8e40d 100644
--- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
@@ -578,6 +578,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
@@ -590,7 +591,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
@@ -603,7 +606,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
@@ -617,6 +622,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
@@ -630,6 +636,7 @@
interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
index 7254912a241f..58f9286a5ab5 100644
--- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
@@ -578,6 +578,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A77961_PD_ALWAYS_ON>;
@@ -590,7 +591,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A77961_PD_ALWAYS_ON>;
@@ -603,7 +606,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A77961_PD_ALWAYS_ON>;
@@ -617,6 +622,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A77961_PD_ALWAYS_ON>;
@@ -630,6 +636,7 @@
interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A77961_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
index e57b9027066e..692940662d38 100644
--- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
@@ -449,6 +449,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
@@ -461,7 +462,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
@@ -474,7 +477,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
@@ -488,6 +493,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
@@ -501,6 +507,7 @@
interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
index ed6e2e47c604..d2d3cecc76d5 100644
--- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
@@ -328,6 +328,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A77970_PD_ALWAYS_ON>;
@@ -340,7 +341,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A77970_PD_ALWAYS_ON>;
@@ -353,7 +356,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A77970_PD_ALWAYS_ON>;
@@ -367,6 +372,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A77970_PD_ALWAYS_ON>;
@@ -380,6 +386,7 @@
interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A77970_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
index 5ed2daaca1f0..c0ba110c74d6 100644
--- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
@@ -357,6 +357,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A77980_PD_ALWAYS_ON>;
@@ -369,7 +370,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A77980_PD_ALWAYS_ON>;
@@ -382,7 +385,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A77980_PD_ALWAYS_ON>;
@@ -395,7 +400,9 @@
reg = <0 0xe6fe0000 0 0x30>;
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A77980_PD_ALWAYS_ON>;
@@ -408,7 +415,9 @@
reg = <0 0xffc00000 0 0x30>;
interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 369 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A77980_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
index 8c2b28342387..37063e3f4e1b 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
@@ -415,6 +415,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
@@ -427,7 +428,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
@@ -440,7 +443,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
@@ -454,6 +459,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
@@ -467,6 +473,7 @@
interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77995.dtsi b/arch/arm64/boot/dts/renesas/r8a77995.dtsi
index 8cf6473c63d3..89990dd8ebf7 100644
--- a/arch/arm64/boot/dts/renesas/r8a77995.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77995.dtsi
@@ -336,6 +336,7 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 125>;
clock-names = "fck";
power-domains = <&sysc R8A77995_PD_ALWAYS_ON>;
@@ -348,7 +349,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 124>;
clock-names = "fck";
power-domains = <&sysc R8A77995_PD_ALWAYS_ON>;
@@ -361,7 +364,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 123>;
clock-names = "fck";
power-domains = <&sysc R8A77995_PD_ALWAYS_ON>;
@@ -375,6 +380,7 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 122>;
clock-names = "fck";
power-domains = <&sysc R8A77995_PD_ALWAYS_ON>;
@@ -388,6 +394,7 @@
interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 121>;
clock-names = "fck";
power-domains = <&sysc R8A77995_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi
index 4e67a0356497..cfa70b441e32 100644
--- a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi
@@ -370,6 +370,7 @@
interrupts = <GIC_SPI 512 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 513 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 514 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 713>;
clock-names = "fck";
power-domains = <&sysc R8A779A0_PD_ALWAYS_ON>;
@@ -382,7 +383,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 504 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 505 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 506 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 506 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 507 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 714>;
clock-names = "fck";
power-domains = <&sysc R8A779A0_PD_ALWAYS_ON>;
@@ -395,7 +398,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 508 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 509 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 510 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 510 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 511 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 715>;
clock-names = "fck";
power-domains = <&sysc R8A779A0_PD_ALWAYS_ON>;
@@ -408,7 +413,9 @@
reg = <0 0xe6fe0000 0 0x30>;
interrupts = <GIC_SPI 472 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 473 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 716>;
clock-names = "fck";
power-domains = <&sysc R8A779A0_PD_ALWAYS_ON>;
@@ -421,7 +428,9 @@
reg = <0 0xffc00000 0 0x30>;
interrupts = <GIC_SPI 476 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 477 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 478 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 478 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 479 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 717>;
clock-names = "fck";
power-domains = <&sysc R8A779A0_PD_ALWAYS_ON>;
@@ -658,7 +667,7 @@
avb0: ethernet@e6800000 {
compatible = "renesas,etheravb-r8a779a0",
"renesas,etheravb-rcar-gen4";
- reg = <0 0xe6800000 0 0x800>;
+ reg = <0 0xe6800000 0 0x1000>;
interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 257 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 258 IRQ_TYPE_LEVEL_HIGH>,
@@ -706,7 +715,7 @@
avb1: ethernet@e6810000 {
compatible = "renesas,etheravb-r8a779a0",
"renesas,etheravb-rcar-gen4";
- reg = <0 0xe6810000 0 0x800>;
+ reg = <0 0xe6810000 0 0x1000>;
interrupts = <GIC_SPI 281 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 283 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/renesas/r8a779f0.dtsi b/arch/arm64/boot/dts/renesas/r8a779f0.dtsi
index 7fb4989cce8a..72cf30341fc4 100644
--- a/arch/arm64/boot/dts/renesas/r8a779f0.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a779f0.dtsi
@@ -501,6 +501,7 @@
interrupts = <GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 476 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 713>;
clock-names = "fck";
power-domains = <&sysc R8A779F0_PD_ALWAYS_ON>;
@@ -513,7 +514,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 477 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 478 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 479 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 479 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 480 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 714>;
clock-names = "fck";
power-domains = <&sysc R8A779F0_PD_ALWAYS_ON>;
@@ -526,7 +529,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 481 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 482 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 483 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 483 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 484 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 715>;
clock-names = "fck";
power-domains = <&sysc R8A779F0_PD_ALWAYS_ON>;
@@ -539,7 +544,9 @@
reg = <0 0xe6fe0000 0 0x30>;
interrupts = <GIC_SPI 485 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 488 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 716>;
clock-names = "fck";
power-domains = <&sysc R8A779F0_PD_ALWAYS_ON>;
@@ -552,7 +559,9 @@
reg = <0 0xffc00000 0 0x30>;
interrupts = <GIC_SPI 489 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 490 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 491 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 491 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 492 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 717>;
clock-names = "fck";
power-domains = <&sysc R8A779F0_PD_ALWAYS_ON>;
diff --git a/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-cpu.dts b/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-cpu.dts
new file mode 100644
index 000000000000..c8b1bb50a8cf
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-cpu.dts
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Device Tree Source for the standalone R-Car V4H White Hawk CPU board
+ *
+ * Copyright (C) 2023 Glider bv
+ */
+
+/dts-v1/;
+#include "r8a779g0-white-hawk-cpu.dtsi"
+
+/ {
+ model = "Renesas White Hawk CPU board based on r8a779g0";
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-cpu.dtsi b/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-cpu.dtsi
index 913f70fe6c5c..b1fe1aedc27d 100644
--- a/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-cpu.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-cpu.dtsi
@@ -1,378 +1,14 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/*
- * Device Tree Source for the White Hawk CPU board
+ * Device Tree Source for the R-Car V4H White Hawk CPU board
*
* Copyright (C) 2022 Renesas Electronics Corp.
*/
#include "r8a779g0.dtsi"
-
-#include <dt-bindings/gpio/gpio.h>
-#include <dt-bindings/input/input.h>
-#include <dt-bindings/leds/common.h>
+#include "white-hawk-cpu-common.dtsi"
/ {
model = "Renesas White Hawk CPU board";
compatible = "renesas,white-hawk-cpu", "renesas,r8a779g0";
-
- aliases {
- ethernet0 = &avb0;
- serial0 = &hscif0;
- };
-
- chosen {
- bootargs = "ignore_loglevel rw root=/dev/nfs ip=on";
- stdout-path = "serial0:921600n8";
- };
-
- keys {
- compatible = "gpio-keys";
-
- pinctrl-0 = <&keys_pins>;
- pinctrl-names = "default";
-
- key-1 {
- gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
- linux,code = <KEY_1>;
- label = "SW47";
- wakeup-source;
- debounce-interval = <20>;
- };
-
- key-2 {
- gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
- linux,code = <KEY_2>;
- label = "SW48";
- wakeup-source;
- debounce-interval = <20>;
- };
-
- key-3 {
- gpios = <&gpio5 2 GPIO_ACTIVE_LOW>;
- linux,code = <KEY_3>;
- label = "SW49";
- wakeup-source;
- debounce-interval = <20>;
- };
- };
-
- leds {
- compatible = "gpio-leds";
-
- led-1 {
- gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>;
- color = <LED_COLOR_ID_GREEN>;
- function = LED_FUNCTION_INDICATOR;
- function-enumerator = <1>;
- };
-
- led-2 {
- gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>;
- color = <LED_COLOR_ID_GREEN>;
- function = LED_FUNCTION_INDICATOR;
- function-enumerator = <2>;
- };
-
- led-3 {
- gpios = <&gpio7 2 GPIO_ACTIVE_HIGH>;
- color = <LED_COLOR_ID_GREEN>;
- function = LED_FUNCTION_INDICATOR;
- function-enumerator = <3>;
- };
- };
-
- memory@48000000 {
- device_type = "memory";
- /* first 128MB is reserved for secure area. */
- reg = <0x0 0x48000000 0x0 0x78000000>;
- };
-
- memory@480000000 {
- device_type = "memory";
- reg = <0x4 0x80000000 0x0 0x80000000>;
- };
-
- memory@600000000 {
- device_type = "memory";
- reg = <0x6 0x00000000 0x1 0x00000000>;
- };
-
- mini-dp-con {
- compatible = "dp-connector";
- label = "CN5";
- type = "mini";
-
- port {
- mini_dp_con_in: endpoint {
- remote-endpoint = <&sn65dsi86_out>;
- };
- };
- };
-
- reg_1p2v: regulator-1p2v {
- compatible = "regulator-fixed";
- regulator-name = "fixed-1.2V";
- regulator-min-microvolt = <1200000>;
- regulator-max-microvolt = <1200000>;
- regulator-boot-on;
- regulator-always-on;
- };
-
- reg_1p8v: regulator-1p8v {
- compatible = "regulator-fixed";
- regulator-name = "fixed-1.8V";
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- regulator-boot-on;
- regulator-always-on;
- };
-
- reg_3p3v: regulator-3p3v {
- compatible = "regulator-fixed";
- regulator-name = "fixed-3.3V";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- regulator-boot-on;
- regulator-always-on;
- };
-
- sn65dsi86_refclk: clk-x6 {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <38400000>;
- };
-};
-
-&avb0 {
- pinctrl-0 = <&avb0_pins>;
- pinctrl-names = "default";
- phy-handle = <&phy0>;
- tx-internal-delay-ps = <2000>;
- status = "okay";
-
- phy0: ethernet-phy@0 {
- compatible = "ethernet-phy-id0022.1622",
- "ethernet-phy-ieee802.3-c22";
- rxc-skew-ps = <1500>;
- reg = <0>;
- interrupt-parent = <&gpio7>;
- interrupts = <5 IRQ_TYPE_LEVEL_LOW>;
- reset-gpios = <&gpio7 10 GPIO_ACTIVE_LOW>;
- };
-};
-
-&dsi0 {
- status = "okay";
-
- ports {
- port@1 {
- dsi0_out: endpoint {
- remote-endpoint = <&sn65dsi86_in>;
- data-lanes = <1 2 3 4>;
- };
- };
- };
-};
-
-&du {
- status = "okay";
-};
-
-&extal_clk {
- clock-frequency = <16666666>;
-};
-
-&extalr_clk {
- clock-frequency = <32768>;
-};
-
-&hscif0 {
- pinctrl-0 = <&hscif0_pins>;
- pinctrl-names = "default";
-
- status = "okay";
-};
-
-&i2c0 {
- pinctrl-0 = <&i2c0_pins>;
- pinctrl-names = "default";
-
- status = "okay";
- clock-frequency = <400000>;
-
- io_expander_a: gpio@20 {
- compatible = "onnn,pca9654";
- reg = <0x20>;
- interrupt-parent = <&gpio0>;
- interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
- gpio-controller;
- #gpio-cells = <2>;
- interrupt-controller;
- #interrupt-cells = <2>;
- };
-
- eeprom@50 {
- compatible = "rohm,br24g01", "atmel,24c01";
- label = "cpu-board";
- reg = <0x50>;
- pagesize = <8>;
- };
-};
-
-&i2c1 {
- pinctrl-0 = <&i2c1_pins>;
- pinctrl-names = "default";
-
- status = "okay";
- clock-frequency = <400000>;
-
- bridge@2c {
- compatible = "ti,sn65dsi86";
- reg = <0x2c>;
-
- clocks = <&sn65dsi86_refclk>;
- clock-names = "refclk";
-
- interrupt-parent = <&intc_ex>;
- interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
-
- enable-gpios = <&gpio1 26 GPIO_ACTIVE_HIGH>;
-
- vccio-supply = <&reg_1p8v>;
- vpll-supply = <&reg_1p8v>;
- vcca-supply = <&reg_1p2v>;
- vcc-supply = <&reg_1p2v>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- sn65dsi86_in: endpoint {
- remote-endpoint = <&dsi0_out>;
- };
- };
-
- port@1 {
- reg = <1>;
- sn65dsi86_out: endpoint {
- remote-endpoint = <&mini_dp_con_in>;
- };
- };
- };
- };
-};
-
-&mmc0 {
- pinctrl-0 = <&mmc_pins>;
- pinctrl-1 = <&mmc_pins>;
- pinctrl-names = "default", "state_uhs";
-
- vmmc-supply = <&reg_3p3v>;
- vqmmc-supply = <&reg_1p8v>;
- mmc-hs200-1_8v;
- mmc-hs400-1_8v;
- bus-width = <8>;
- no-sd;
- no-sdio;
- non-removable;
- full-pwr-cycle-in-suspend;
- status = "okay";
-};
-
-&pfc {
- pinctrl-0 = <&scif_clk_pins>;
- pinctrl-names = "default";
-
- avb0_pins: avb0 {
- mux {
- groups = "avb0_link", "avb0_mdio", "avb0_rgmii",
- "avb0_txcrefclk";
- function = "avb0";
- };
-
- pins_mdio {
- groups = "avb0_mdio";
- drive-strength = <21>;
- };
-
- pins_mii {
- groups = "avb0_rgmii";
- drive-strength = <21>;
- };
-
- };
- hscif0_pins: hscif0 {
- groups = "hscif0_data";
- function = "hscif0";
- };
-
- i2c0_pins: i2c0 {
- groups = "i2c0";
- function = "i2c0";
- };
-
- i2c1_pins: i2c1 {
- groups = "i2c1";
- function = "i2c1";
- };
-
- keys_pins: keys {
- pins = "GP_5_0", "GP_5_1", "GP_5_2";
- bias-pull-up;
- };
-
- mmc_pins: mmc {
- groups = "mmc_data8", "mmc_ctrl", "mmc_ds";
- function = "mmc";
- power-source = <1800>;
- };
-
- qspi0_pins: qspi0 {
- groups = "qspi0_ctrl", "qspi0_data4";
- function = "qspi0";
- };
-
- scif_clk_pins: scif_clk {
- groups = "scif_clk";
- function = "scif_clk";
- };
-};
-
-&rpc {
- pinctrl-0 = <&qspi0_pins>;
- pinctrl-names = "default";
-
- status = "okay";
-
- flash@0 {
- compatible = "spansion,s25fs512s", "jedec,spi-nor";
- reg = <0>;
- spi-max-frequency = <40000000>;
- spi-rx-bus-width = <4>;
-
- partitions {
- compatible = "fixed-partitions";
- #address-cells = <1>;
- #size-cells = <1>;
-
- boot@0 {
- reg = <0x0 0x1200000>;
- read-only;
- };
- user@1200000 {
- reg = <0x1200000 0x2e00000>;
- };
- };
- };
-};
-
-&rwdt {
- timeout-sec = <60>;
- status = "okay";
-};
-
-&scif_clk {
- clock-frequency = <24000000>;
};
diff --git a/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk.dts b/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk.dts
index eff1ef6e2cc8..784d4e8b204c 100644
--- a/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk.dts
+++ b/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk.dts
@@ -1,69 +1,15 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/*
- * Device Tree Source for the White Hawk CPU and BreakOut boards
+ * Device Tree Source for the R-Car V4H White Hawk CPU and BreakOut boards
*
* Copyright (C) 2022 Renesas Electronics Corp.
*/
/dts-v1/;
#include "r8a779g0-white-hawk-cpu.dtsi"
-#include "r8a779g0-white-hawk-csi-dsi.dtsi"
-#include "r8a779g0-white-hawk-ethernet.dtsi"
+#include "white-hawk-common.dtsi"
/ {
model = "Renesas White Hawk CPU and Breakout boards based on r8a779g0";
compatible = "renesas,white-hawk-breakout", "renesas,white-hawk-cpu", "renesas,r8a779g0";
-
- can_transceiver0: can-phy0 {
- compatible = "nxp,tjr1443";
- #phy-cells = <0>;
- enable-gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>;
- max-bitrate = <5000000>;
- };
-};
-
-&can_clk {
- clock-frequency = <40000000>;
-};
-
-&canfd {
- pinctrl-0 = <&canfd0_pins>, <&canfd1_pins>, <&can_clk_pins>;
- pinctrl-names = "default";
-
- status = "okay";
-
- channel0 {
- status = "okay";
- phys = <&can_transceiver0>;
- };
-
- channel1 {
- status = "okay";
- };
-};
-
-&i2c0 {
- eeprom@51 {
- compatible = "rohm,br24g01", "atmel,24c01";
- label = "breakout-board";
- reg = <0x51>;
- pagesize = <8>;
- };
-};
-
-&pfc {
- can_clk_pins: can-clk {
- groups = "can_clk";
- function = "can_clk";
- };
-
- canfd0_pins: canfd0 {
- groups = "canfd0_data";
- function = "canfd0";
- };
-
- canfd1_pins: canfd1 {
- groups = "canfd1_data";
- function = "canfd1";
- };
};
diff --git a/arch/arm64/boot/dts/renesas/r8a779g0.dtsi b/arch/arm64/boot/dts/renesas/r8a779g0.dtsi
index d3d25e077c5d..9bc542bc6169 100644
--- a/arch/arm64/boot/dts/renesas/r8a779g0.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a779g0.dtsi
@@ -161,11 +161,6 @@
};
};
- psci {
- compatible = "arm,psci-1.0", "arm,psci-0.2";
- method = "smc";
- };
-
extal_clk: extal {
compatible = "fixed-clock";
#clock-cells = <0>;
@@ -185,13 +180,24 @@
interrupts-extended = <&gic GIC_PPI 7 IRQ_TYPE_LEVEL_LOW>;
};
- /* External SCIF clock - to be overridden by boards that provide it */
+ psci {
+ compatible = "arm,psci-1.0", "arm,psci-0.2";
+ method = "smc";
+ };
+
+ /* External SCIF clocks - to be overridden by boards that provide them */
scif_clk: scif {
compatible = "fixed-clock";
#clock-cells = <0>;
clock-frequency = <0>;
};
+ scif_clk2: scif2 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ };
+
soc: soc {
compatible = "simple-bus";
interrupt-parent = <&gic>;
@@ -479,6 +485,7 @@
interrupts = <GIC_SPI 289 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 290 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 291 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2";
clocks = <&cpg CPG_MOD 713>;
clock-names = "fck";
power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
@@ -491,7 +498,9 @@
reg = <0 0xe6fc0000 0 0x30>;
interrupts = <GIC_SPI 292 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 293 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 294 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 294 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 295 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 714>;
clock-names = "fck";
power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
@@ -504,7 +513,9 @@
reg = <0 0xe6fd0000 0 0x30>;
interrupts = <GIC_SPI 296 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 297 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 298 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 298 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 299 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 715>;
clock-names = "fck";
power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
@@ -517,7 +528,9 @@
reg = <0 0xe6fe0000 0 0x30>;
interrupts = <GIC_SPI 300 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 301 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 302 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 302 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 716>;
clock-names = "fck";
power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
@@ -530,7 +543,9 @@
reg = <0 0xffc00000 0 0x30>;
interrupts = <GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tuni0", "tuni1", "tuni2", "ticpi2";
clocks = <&cpg CPG_MOD 717>;
clock-names = "fck";
power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
@@ -681,7 +696,7 @@
interrupts = <GIC_SPI 248 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cpg CPG_MOD 516>,
<&cpg CPG_CORE R8A779G0_CLK_SASYNCPERD1>,
- <&scif_clk>;
+ <&scif_clk2>;
clock-names = "fck", "brg_int", "scif_clk";
dmas = <&dmac0 0x35>, <&dmac0 0x34>,
<&dmac1 0x35>, <&dmac1 0x34>;
@@ -761,7 +776,7 @@
avb0: ethernet@e6800000 {
compatible = "renesas,etheravb-r8a779g0",
"renesas,etheravb-rcar-gen4";
- reg = <0 0xe6800000 0 0x800>;
+ reg = <0 0xe6800000 0 0x1000>;
interrupts = <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>,
@@ -808,7 +823,7 @@
avb1: ethernet@e6810000 {
compatible = "renesas,etheravb-r8a779g0",
"renesas,etheravb-rcar-gen4";
- reg = <0 0xe6810000 0 0x800>;
+ reg = <0 0xe6810000 0 0x1000>;
interrupts = <GIC_SPI 360 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 361 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 362 IRQ_TYPE_LEVEL_HIGH>,
@@ -1057,7 +1072,7 @@
interrupts = <GIC_SPI 254 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cpg CPG_MOD 705>,
<&cpg CPG_CORE R8A779G0_CLK_SASYNCPERD1>,
- <&scif_clk>;
+ <&scif_clk2>;
clock-names = "fck", "brg_int", "scif_clk";
dmas = <&dmac0 0x59>, <&dmac0 0x58>,
<&dmac1 0x59>, <&dmac1 0x58>;
@@ -1777,6 +1792,37 @@
};
};
+ mmc0: mmc@ee140000 {
+ compatible = "renesas,sdhi-r8a779g0",
+ "renesas,rcar-gen4-sdhi";
+ reg = <0 0xee140000 0 0x2000>;
+ interrupts = <GIC_SPI 440 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 706>,
+ <&cpg CPG_CORE R8A779G0_CLK_SD0H>;
+ clock-names = "core", "clkh";
+ power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
+ resets = <&cpg 706>;
+ max-frequency = <200000000>;
+ iommus = <&ipmmu_ds0 32>;
+ status = "disabled";
+ };
+
+ rpc: spi@ee200000 {
+ compatible = "renesas,r8a779g0-rpc-if",
+ "renesas,rcar-gen4-rpc-if";
+ reg = <0 0xee200000 0 0x200>,
+ <0 0x08000000 0 0x04000000>,
+ <0 0xee208000 0 0x100>;
+ reg-names = "regs", "dirmap", "wbuf";
+ interrupts = <GIC_SPI 225 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 629>;
+ power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
+ resets = <&cpg 629>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
ipmmu_rt0: iommu@ee480000 {
compatible = "renesas,ipmmu-r8a779g0",
"renesas,rcar-gen4-ipmmu-vmsa";
@@ -1886,37 +1932,6 @@
#iommu-cells = <1>;
};
- mmc0: mmc@ee140000 {
- compatible = "renesas,sdhi-r8a779g0",
- "renesas,rcar-gen4-sdhi";
- reg = <0 0xee140000 0 0x2000>;
- interrupts = <GIC_SPI 440 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&cpg CPG_MOD 706>,
- <&cpg CPG_CORE R8A779G0_CLK_SD0H>;
- clock-names = "core", "clkh";
- power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
- resets = <&cpg 706>;
- max-frequency = <200000000>;
- iommus = <&ipmmu_ds0 32>;
- status = "disabled";
- };
-
- rpc: spi@ee200000 {
- compatible = "renesas,r8a779g0-rpc-if",
- "renesas,rcar-gen4-rpc-if";
- reg = <0 0xee200000 0 0x200>,
- <0 0x08000000 0 0x04000000>,
- <0 0xee208000 0 0x100>;
- reg-names = "regs", "dirmap", "wbuf";
- interrupts = <GIC_SPI 225 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&cpg CPG_MOD 629>;
- power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
- resets = <&cpg 629>;
- #address-cells = <1>;
- #size-cells = <0>;
- status = "disabled";
- };
-
gic: interrupt-controller@f1000000 {
compatible = "arm,gic-v3";
#interrupt-cells = <3>;
diff --git a/arch/arm64/boot/dts/renesas/r8a779g2-white-hawk-single.dts b/arch/arm64/boot/dts/renesas/r8a779g2-white-hawk-single.dts
new file mode 100644
index 000000000000..2f79e5a61248
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r8a779g2-white-hawk-single.dts
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Device Tree Source for the R-Car V4H ES2.0 White Hawk Single board
+ *
+ * Copyright (C) 2023 Glider bv
+ */
+
+/dts-v1/;
+#include "r8a779g2.dtsi"
+#include "white-hawk-cpu-common.dtsi"
+#include "white-hawk-common.dtsi"
+
+/ {
+ model = "Renesas White Hawk Single board based on r8a779g2";
+ compatible = "renesas,white-hawk-single", "renesas,r8a779g2",
+ "renesas,r8a779g0";
+};
+
+&hscif0 {
+ uart-has-rtscts;
+};
+
+&hscif0_pins {
+ groups = "hscif0_data", "hscif0_ctrl";
+ function = "hscif0";
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a779g2.dtsi b/arch/arm64/boot/dts/renesas/r8a779g2.dtsi
new file mode 100644
index 000000000000..e08f531843e2
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r8a779g2.dtsi
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Device Tree Source for the R-Car V4H (R8A779G2) SoC
+ *
+ * Copyright (C) 2023 Glider bv
+ */
+
+#include "r8a779g0.dtsi"
+
+/ {
+ compatible = "renesas,r8a779g2", "renesas,r8a779g0";
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a779h0-gray-hawk-single.dts b/arch/arm64/boot/dts/renesas/r8a779h0-gray-hawk-single.dts
new file mode 100644
index 000000000000..bc8616a56c03
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r8a779h0-gray-hawk-single.dts
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Device Tree Source for the R-Car V4M Gray Hawk Single board
+ *
+ * Copyright (C) 2023 Renesas Electronics Corp.
+ * Copyright (C) 2024 Glider bv
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+
+#include "r8a779h0.dtsi"
+
+/ {
+ model = "Renesas Gray Hawk Single board based on r8a779h0";
+ compatible = "renesas,gray-hawk-single", "renesas,r8a779h0";
+
+ aliases {
+ serial0 = &hscif0;
+ ethernet0 = &avb0;
+ };
+
+ chosen {
+ bootargs = "ignore_loglevel";
+ stdout-path = "serial0:921600n8";
+ };
+
+ memory@48000000 {
+ device_type = "memory";
+ /* first 128MB is reserved for secure area. */
+ reg = <0x0 0x48000000 0x0 0x78000000>;
+ };
+
+ memory@480000000 {
+ device_type = "memory";
+ reg = <0x4 0x80000000 0x1 0x80000000>;
+ };
+
+ reg_1p8v: regulator-1p8v {
+ compatible = "regulator-fixed";
+ regulator-name = "fixed-1.8V";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ reg_3p3v: regulator-3p3v {
+ compatible = "regulator-fixed";
+ regulator-name = "fixed-3.3V";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+};
+
+&avb0 {
+ pinctrl-0 = <&avb0_pins>;
+ pinctrl-names = "default";
+ phy-handle = <&phy0>;
+ tx-internal-delay-ps = <2000>;
+ status = "okay";
+
+ phy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-id0022.1622",
+ "ethernet-phy-ieee802.3-c22";
+ rxc-skew-ps = <1500>;
+ reg = <0>;
+ interrupt-parent = <&gpio7>;
+ interrupts = <5 IRQ_TYPE_LEVEL_LOW>;
+ reset-gpios = <&gpio7 10 GPIO_ACTIVE_LOW>;
+ };
+};
+
+&extal_clk {
+ clock-frequency = <16666666>;
+};
+
+&extalr_clk {
+ clock-frequency = <32768>;
+};
+
+&hscif0 {
+ pinctrl-0 = <&hscif0_pins>;
+ pinctrl-names = "default";
+
+ uart-has-rtscts;
+ status = "okay";
+};
+
+&i2c0 {
+ pinctrl-0 = <&i2c0_pins>;
+ pinctrl-names = "default";
+
+ status = "okay";
+ clock-frequency = <400000>;
+
+ eeprom@50 {
+ compatible = "rohm,br24g01", "atmel,24c01";
+ label = "cpu-board";
+ reg = <0x50>;
+ pagesize = <8>;
+ };
+
+ eeprom@51 {
+ compatible = "rohm,br24g01", "atmel,24c01";
+ label = "breakout-board";
+ reg = <0x51>;
+ pagesize = <8>;
+ };
+
+ eeprom@52 {
+ compatible = "rohm,br24g01", "atmel,24c01";
+ label = "csi-dsi-sub-board-id";
+ reg = <0x52>;
+ pagesize = <8>;
+ };
+
+ eeprom@53 {
+ compatible = "rohm,br24g01", "atmel,24c01";
+ label = "ethernet-sub-board-id";
+ reg = <0x53>;
+ pagesize = <8>;
+ };
+};
+
+&mmc0 {
+ pinctrl-0 = <&mmc_pins>;
+ pinctrl-1 = <&mmc_pins>;
+ pinctrl-names = "default", "state_uhs";
+
+ vmmc-supply = <&reg_3p3v>;
+ vqmmc-supply = <&reg_1p8v>;
+ mmc-hs200-1_8v;
+ mmc-hs400-1_8v;
+ bus-width = <8>;
+ no-sd;
+ no-sdio;
+ non-removable;
+ full-pwr-cycle-in-suspend;
+ status = "okay";
+};
+
+&pfc {
+ pinctrl-0 = <&scif_clk_pins>;
+ pinctrl-names = "default";
+
+ avb0_pins: avb0 {
+ mux {
+ groups = "avb0_link", "avb0_mdio", "avb0_rgmii",
+ "avb0_txcrefclk";
+ function = "avb0";
+ };
+
+ pins_mdio {
+ groups = "avb0_mdio";
+ drive-strength = <21>;
+ };
+
+ pins_mii {
+ groups = "avb0_rgmii";
+ drive-strength = <21>;
+ };
+ };
+
+ hscif0_pins: hscif0 {
+ groups = "hscif0_data", "hscif0_ctrl";
+ function = "hscif0";
+ };
+
+ i2c0_pins: i2c0 {
+ groups = "i2c0";
+ function = "i2c0";
+ };
+
+ mmc_pins: mmc {
+ groups = "mmc_data8", "mmc_ctrl", "mmc_ds";
+ function = "mmc";
+ power-source = <1800>;
+ };
+
+ qspi0_pins: qspi0 {
+ groups = "qspi0_ctrl", "qspi0_data4";
+ function = "qspi0";
+ };
+
+ scif_clk_pins: scif-clk {
+ groups = "scif_clk";
+ function = "scif_clk";
+ };
+};
+
+&rpc {
+ pinctrl-0 = <&qspi0_pins>;
+ pinctrl-names = "default";
+
+ status = "okay";
+
+ flash@0 {
+ compatible = "spansion,s25fs512s", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <40000000>;
+ spi-rx-bus-width = <4>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ boot@0 {
+ reg = <0x0 0x1200000>;
+ read-only;
+ };
+ user@1200000 {
+ reg = <0x1200000 0x2e00000>;
+ };
+ };
+ };
+};
+
+&rwdt {
+ timeout-sec = <60>;
+ status = "okay";
+};
+
+&scif_clk {
+ clock-frequency = <24000000>;
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a779h0.dtsi b/arch/arm64/boot/dts/renesas/r8a779h0.dtsi
new file mode 100644
index 000000000000..11885729181b
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r8a779h0.dtsi
@@ -0,0 +1,664 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Device Tree Source for the R-Car V4M (R8A779H0) SoC
+ *
+ * Copyright (C) 2023 Renesas Electronics Corp.
+ */
+
+#include <dt-bindings/clock/renesas,r8a779h0-cpg-mssr.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/power/renesas,r8a779h0-sysc.h>
+
+/ {
+ compatible = "renesas,r8a779h0";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cluster0_opp: opp-table-0 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp-500000000 {
+ opp-hz = /bits/ 64 <500000000>;
+ opp-microvolt = <825000>;
+ clock-latency-ns = <500000>;
+ };
+ opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <825000>;
+ clock-latency-ns = <500000>;
+ };
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu-map {
+ cluster0 {
+ core0 {
+ cpu = <&a76_0>;
+ };
+ core1 {
+ cpu = <&a76_1>;
+ };
+ core2 {
+ cpu = <&a76_2>;
+ };
+ core3 {
+ cpu = <&a76_3>;
+ };
+ };
+ };
+
+ a76_0: cpu@0 {
+ compatible = "arm,cortex-a76";
+ reg = <0>;
+ device_type = "cpu";
+ power-domains = <&sysc R8A779H0_PD_A1E0D0C0>;
+ next-level-cache = <&L3_CA76>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_SLEEP_0>;
+ clocks = <&cpg CPG_CORE R8A779H0_CLK_ZC0>;
+ operating-points-v2 = <&cluster0_opp>;
+ };
+
+ a76_1: cpu@100 {
+ compatible = "arm,cortex-a76";
+ reg = <0x100>;
+ device_type = "cpu";
+ power-domains = <&sysc R8A779H0_PD_A1E0D0C1>;
+ next-level-cache = <&L3_CA76>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_SLEEP_0>;
+ clocks = <&cpg CPG_CORE R8A779H0_CLK_ZC1>;
+ operating-points-v2 = <&cluster0_opp>;
+ };
+
+ a76_2: cpu@200 {
+ compatible = "arm,cortex-a76";
+ reg = <0x200>;
+ device_type = "cpu";
+ power-domains = <&sysc R8A779H0_PD_A1E0D0C2>;
+ next-level-cache = <&L3_CA76>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_SLEEP_0>;
+ clocks = <&cpg CPG_CORE R8A779H0_CLK_ZC2>;
+ operating-points-v2 = <&cluster0_opp>;
+ };
+
+ a76_3: cpu@300 {
+ compatible = "arm,cortex-a76";
+ reg = <0x300>;
+ device_type = "cpu";
+ power-domains = <&sysc R8A779H0_PD_A1E0D0C3>;
+ next-level-cache = <&L3_CA76>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_SLEEP_0>;
+ clocks = <&cpg CPG_CORE R8A779H0_CLK_ZC3>;
+ operating-points-v2 = <&cluster0_opp>;
+ };
+
+ idle-states {
+ entry-method = "psci";
+
+ CPU_SLEEP_0: cpu-sleep-0 {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x0010000>;
+ local-timer-stop;
+ entry-latency-us = <400>;
+ exit-latency-us = <500>;
+ min-residency-us = <4000>;
+ };
+ };
+
+ L3_CA76: cache-controller {
+ compatible = "cache";
+ power-domains = <&sysc R8A779H0_PD_A2E0D0>;
+ cache-unified;
+ cache-level = <3>;
+ };
+ };
+
+ extal_clk: extal-clk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ /* This value must be overridden by the board */
+ clock-frequency = <0>;
+ };
+
+ extalr_clk: extalr-clk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ /* This value must be overridden by the board */
+ clock-frequency = <0>;
+ };
+
+ pmu-a76 {
+ compatible = "arm,cortex-a76-pmu";
+ interrupts-extended = <&gic GIC_PPI 7 IRQ_TYPE_LEVEL_LOW>;
+ };
+
+ psci {
+ compatible = "arm,psci-1.0", "arm,psci-0.2";
+ method = "smc";
+ };
+
+ /* External SCIF clock - to be overridden by boards that provide it */
+ scif_clk: scif-clk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <0>;
+ };
+
+ soc: soc {
+ compatible = "simple-bus";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ rwdt: watchdog@e6020000 {
+ compatible = "renesas,r8a779h0-wdt",
+ "renesas,rcar-gen4-wdt";
+ reg = <0 0xe6020000 0 0x0c>;
+ interrupts = <GIC_SPI 330 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 907>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 907>;
+ status = "disabled";
+ };
+
+ pfc: pinctrl@e6050000 {
+ compatible = "renesas,pfc-r8a779h0";
+ reg = <0 0xe6050000 0 0x16c>, <0 0xe6050800 0 0x16c>,
+ <0 0xe6058000 0 0x16c>, <0 0xe6058800 0 0x16c>,
+ <0 0xe6060000 0 0x16c>, <0 0xe6060800 0 0x16c>,
+ <0 0xe6061000 0 0x16c>, <0 0xe6061800 0 0x16c>;
+ };
+
+ gpio0: gpio@e6050180 {
+ compatible = "renesas,gpio-r8a779h0",
+ "renesas,rcar-gen4-gpio";
+ reg = <0 0xe6050180 0 0x54>;
+ interrupts = <GIC_SPI 619 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pfc 0 0 19>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ clocks = <&cpg CPG_MOD 915>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 915>;
+ };
+
+ gpio1: gpio@e6050980 {
+ compatible = "renesas,gpio-r8a779h0",
+ "renesas,rcar-gen4-gpio";
+ reg = <0 0xe6050980 0 0x54>;
+ interrupts = <GIC_SPI 623 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pfc 0 32 30>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ clocks = <&cpg CPG_MOD 915>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 915>;
+ };
+
+ gpio2: gpio@e6058180 {
+ compatible = "renesas,gpio-r8a779h0",
+ "renesas,rcar-gen4-gpio";
+ reg = <0 0xe6058180 0 0x54>;
+ interrupts = <GIC_SPI 627 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pfc 0 64 20>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ clocks = <&cpg CPG_MOD 916>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 916>;
+ };
+
+ gpio3: gpio@e6058980 {
+ compatible = "renesas,gpio-r8a779h0",
+ "renesas,rcar-gen4-gpio";
+ reg = <0 0xe6058980 0 0x54>;
+ interrupts = <GIC_SPI 631 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pfc 0 96 32>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ clocks = <&cpg CPG_MOD 916>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 916>;
+ };
+
+ gpio4: gpio@e6060180 {
+ compatible = "renesas,gpio-r8a779h0",
+ "renesas,rcar-gen4-gpio";
+ reg = <0 0xe6060180 0 0x54>;
+ interrupts = <GIC_SPI 635 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pfc 0 128 25>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ clocks = <&cpg CPG_MOD 917>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 917>;
+ };
+
+ gpio5: gpio@e6060980 {
+ compatible = "renesas,gpio-r8a779h0",
+ "renesas,rcar-gen4-gpio";
+ reg = <0 0xe6060980 0 0x54>;
+ interrupts = <GIC_SPI 639 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pfc 0 160 21>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ clocks = <&cpg CPG_MOD 917>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 917>;
+ };
+
+ gpio6: gpio@e6061180 {
+ compatible = "renesas,gpio-r8a779h0",
+ "renesas,rcar-gen4-gpio";
+ reg = <0 0xe6061180 0 0x54>;
+ interrupts = <GIC_SPI 643 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pfc 0 192 21>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ clocks = <&cpg CPG_MOD 917>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 917>;
+ };
+
+ gpio7: gpio@e6061980 {
+ compatible = "renesas,gpio-r8a779h0",
+ "renesas,rcar-gen4-gpio";
+ reg = <0 0xe6061980 0 0x54>;
+ interrupts = <GIC_SPI 647 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pfc 0 224 21>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ clocks = <&cpg CPG_MOD 917>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 917>;
+ };
+
+ cpg: clock-controller@e6150000 {
+ compatible = "renesas,r8a779h0-cpg-mssr";
+ reg = <0 0xe6150000 0 0x4000>;
+ clocks = <&extal_clk>, <&extalr_clk>;
+ clock-names = "extal", "extalr";
+ #clock-cells = <2>;
+ #power-domain-cells = <0>;
+ #reset-cells = <1>;
+ };
+
+ rst: reset-controller@e6160000 {
+ compatible = "renesas,r8a779h0-rst";
+ reg = <0 0xe6160000 0 0x4000>;
+ };
+
+ sysc: system-controller@e6180000 {
+ compatible = "renesas,r8a779h0-sysc";
+ reg = <0 0xe6180000 0 0x4000>;
+ #power-domain-cells = <1>;
+ };
+
+ i2c0: i2c@e6500000 {
+ compatible = "renesas,i2c-r8a779h0",
+ "renesas,rcar-gen4-i2c";
+ reg = <0 0xe6500000 0 0x40>;
+ interrupts = <GIC_SPI 610 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 518>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 518>;
+ dmas = <&dmac1 0x91>, <&dmac1 0x90>,
+ <&dmac2 0x91>, <&dmac2 0x90>;
+ dma-names = "tx", "rx", "tx", "rx";
+ i2c-scl-internal-delay-ns = <110>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ i2c1: i2c@e6508000 {
+ compatible = "renesas,i2c-r8a779h0",
+ "renesas,rcar-gen4-i2c";
+ reg = <0 0xe6508000 0 0x40>;
+ interrupts = <GIC_SPI 611 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 519>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 519>;
+ dmas = <&dmac1 0x93>, <&dmac1 0x92>,
+ <&dmac2 0x93>, <&dmac2 0x92>;
+ dma-names = "tx", "rx", "tx", "rx";
+ i2c-scl-internal-delay-ns = <110>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ i2c2: i2c@e6510000 {
+ compatible = "renesas,i2c-r8a779h0",
+ "renesas,rcar-gen4-i2c";
+ reg = <0 0xe6510000 0 0x40>;
+ interrupts = <GIC_SPI 612 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 520>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 520>;
+ dmas = <&dmac1 0x95>, <&dmac1 0x94>,
+ <&dmac2 0x95>, <&dmac2 0x94>;
+ dma-names = "tx", "rx", "tx", "rx";
+ i2c-scl-internal-delay-ns = <110>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ i2c3: i2c@e66d0000 {
+ compatible = "renesas,i2c-r8a779h0",
+ "renesas,rcar-gen4-i2c";
+ reg = <0 0xe66d0000 0 0x40>;
+ interrupts = <GIC_SPI 613 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 521>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 521>;
+ dmas = <&dmac1 0x97>, <&dmac1 0x96>,
+ <&dmac2 0x97>, <&dmac2 0x96>;
+ dma-names = "tx", "rx", "tx", "rx";
+ i2c-scl-internal-delay-ns = <110>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ hscif0: serial@e6540000 {
+ compatible = "renesas,hscif-r8a779h0",
+ "renesas,rcar-gen4-hscif", "renesas,hscif";
+ reg = <0 0xe6540000 0 0x60>;
+ interrupts = <GIC_SPI 246 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 514>,
+ <&cpg CPG_CORE R8A779H0_CLK_SASYNCPERD1>,
+ <&scif_clk>;
+ clock-names = "fck", "brg_int", "scif_clk";
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 514>;
+ dmas = <&dmac1 0x31>, <&dmac1 0x30>,
+ <&dmac2 0x31>, <&dmac2 0x30>;
+ dma-names = "tx", "rx", "tx", "rx";
+ status = "disabled";
+ };
+
+ avb0: ethernet@e6800000 {
+ compatible = "renesas,etheravb-r8a779h0",
+ "renesas,etheravb-rcar-gen4";
+ reg = <0 0xe6800000 0 0x1000>;
+ interrupts = <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 340 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 342 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 343 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 344 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 345 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 346 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 348 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 349 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 350 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 351 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 353 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 354 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 355 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 356 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 357 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 358 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 359 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15",
+ "ch16", "ch17", "ch18", "ch19",
+ "ch20", "ch21", "ch22", "ch23",
+ "ch24";
+ clocks = <&cpg CPG_MOD 211>;
+ clock-names = "fck";
+ power-domains = <&sysc R8A779H0_PD_C4>;
+ resets = <&cpg 211>;
+ phy-mode = "rgmii";
+ rx-internal-delay-ps = <0>;
+ tx-internal-delay-ps = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ avb1: ethernet@e6810000 {
+ compatible = "renesas,etheravb-r8a779h0",
+ "renesas,etheravb-rcar-gen4";
+ reg = <0 0xe6810000 0 0x1000>;
+ interrupts = <GIC_SPI 360 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 361 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 362 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 363 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 364 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 365 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 366 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 367 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 368 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 369 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 370 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 371 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 372 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 373 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 376 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 377 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 378 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 379 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 380 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 381 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 382 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 383 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 384 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15",
+ "ch16", "ch17", "ch18", "ch19",
+ "ch20", "ch21", "ch22", "ch23",
+ "ch24";
+ clocks = <&cpg CPG_MOD 212>;
+ clock-names = "fck";
+ power-domains = <&sysc R8A779H0_PD_C4>;
+ resets = <&cpg 212>;
+ phy-mode = "rgmii";
+ rx-internal-delay-ps = <0>;
+ tx-internal-delay-ps = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ avb2: ethernet@e6820000 {
+ compatible = "renesas,etheravb-r8a779h0",
+ "renesas,etheravb-rcar-gen4";
+ reg = <0 0xe6820000 0 0x1000>;
+ interrupts = <GIC_SPI 385 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 386 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 387 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 388 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 389 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 390 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 391 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 392 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 393 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 394 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 395 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 396 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 397 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 398 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 399 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 400 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 401 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 402 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 403 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 404 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 405 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 409 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15",
+ "ch16", "ch17", "ch18", "ch19",
+ "ch20", "ch21", "ch22", "ch23",
+ "ch24";
+ clocks = <&cpg CPG_MOD 213>;
+ clock-names = "fck";
+ power-domains = <&sysc R8A779H0_PD_C4>;
+ resets = <&cpg 213>;
+ phy-mode = "rgmii";
+ rx-internal-delay-ps = <0>;
+ tx-internal-delay-ps = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ dmac1: dma-controller@e7350000 {
+ compatible = "renesas,dmac-r8a779h0",
+ "renesas,rcar-gen4-dmac";
+ reg = <0 0xe7350000 0 0x1000>,
+ <0 0xe7300000 0 0x10000>;
+ interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3", "ch4",
+ "ch5", "ch6", "ch7", "ch8", "ch9",
+ "ch10", "ch11", "ch12", "ch13",
+ "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 709>;
+ clock-names = "fck";
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 709>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ };
+
+ dmac2: dma-controller@e7351000 {
+ compatible = "renesas,dmac-r8a779h0",
+ "renesas,rcar-gen4-dmac";
+ reg = <0 0xe7351000 0 0x1000>,
+ <0 0xe7310000 0 0x10000>;
+ interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3", "ch4",
+ "ch5", "ch6", "ch7";
+ clocks = <&cpg CPG_MOD 710>;
+ clock-names = "fck";
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 710>;
+ #dma-cells = <1>;
+ dma-channels = <8>;
+ };
+
+ mmc0: mmc@ee140000 {
+ compatible = "renesas,sdhi-r8a779h0",
+ "renesas,rcar-gen4-sdhi";
+ reg = <0 0xee140000 0 0x2000>;
+ interrupts = <GIC_SPI 440 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 706>,
+ <&cpg CPG_CORE R8A779H0_CLK_SD0H>;
+ clock-names = "core", "clkh";
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 706>;
+ max-frequency = <200000000>;
+ status = "disabled";
+ };
+
+ rpc: spi@ee200000 {
+ compatible = "renesas,r8a779h0-rpc-if",
+ "renesas,rcar-gen4-rpc-if";
+ reg = <0 0xee200000 0 0x200>,
+ <0 0x08000000 0 0x04000000>,
+ <0 0xee208000 0 0x100>;
+ reg-names = "regs", "dirmap", "wbuf";
+ interrupts = <GIC_SPI 225 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 629>;
+ power-domains = <&sysc R8A779H0_PD_ALWAYS_ON>;
+ resets = <&cpg 629>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ gic: interrupt-controller@f1000000 {
+ compatible = "arm,gic-v3";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0x0 0xf1000000 0 0x20000>,
+ <0x0 0xf1060000 0 0x110000>;
+ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ prr: chipid@fff00044 {
+ compatible = "renesas,prr";
+ reg = <0 0xfff00044 0 4>;
+ };
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts-extended = <&gic GIC_PPI 13 IRQ_TYPE_LEVEL_LOW>,
+ <&gic GIC_PPI 14 IRQ_TYPE_LEVEL_LOW>,
+ <&gic GIC_PPI 11 IRQ_TYPE_LEVEL_LOW>,
+ <&gic GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>,
+ <&gic GIC_PPI 12 IRQ_TYPE_LEVEL_LOW>;
+ };
+};
diff --git a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi
index 2ab231572d95..964b0a475eee 100644
--- a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi
@@ -61,6 +61,75 @@
&soc {
interrupt-parent = <&gic>;
+ cru: video@10830000 {
+ compatible = "renesas,r9a07g043-cru", "renesas,rzg2l-cru";
+ reg = <0 0x10830000 0 0x400>;
+ clocks = <&cpg CPG_MOD R9A07G043_CRU_VCLK>,
+ <&cpg CPG_MOD R9A07G043_CRU_PCLK>,
+ <&cpg CPG_MOD R9A07G043_CRU_ACLK>;
+ clock-names = "video", "apb", "axi";
+ interrupts = <SOC_PERIPHERAL_IRQ(167) IRQ_TYPE_LEVEL_HIGH>,
+ <SOC_PERIPHERAL_IRQ(168) IRQ_TYPE_LEVEL_HIGH>,
+ <SOC_PERIPHERAL_IRQ(169) IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "image_conv", "image_conv_err", "axi_mst_err";
+ resets = <&cpg R9A07G043_CRU_PRESETN>,
+ <&cpg R9A07G043_CRU_ARESETN>;
+ reset-names = "presetn", "aresetn";
+ power-domains = <&cpg>;
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ reg = <1>;
+ crucsi2: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&csi2cru>;
+ };
+ };
+ };
+ };
+
+ csi2: csi2@10830400 {
+ compatible = "renesas,r9a07g043-csi2", "renesas,rzg2l-csi2";
+ reg = <0 0x10830400 0 0xfc00>;
+ interrupts = <SOC_PERIPHERAL_IRQ(166) IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD R9A07G043_CRU_SYSCLK>,
+ <&cpg CPG_MOD R9A07G043_CRU_VCLK>,
+ <&cpg CPG_MOD R9A07G043_CRU_PCLK>;
+ clock-names = "system", "video", "apb";
+ resets = <&cpg R9A07G043_CRU_PRESETN>,
+ <&cpg R9A07G043_CRU_CMN_RSTB>;
+ reset-names = "presetn", "cmn-rstb";
+ power-domains = <&cpg>;
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ };
+
+ port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ csi2cru: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&crucsi2>;
+ };
+ };
+ };
+ };
+
irqc: interrupt-controller@110a0000 {
compatible = "renesas,r9a07g043u-irqc",
"renesas,rzg2l-irqc";
@@ -109,7 +178,13 @@
<SOC_PERIPHERAL_IRQ(473) IRQ_TYPE_LEVEL_HIGH>,
<SOC_PERIPHERAL_IRQ(474) IRQ_TYPE_LEVEL_HIGH>,
<SOC_PERIPHERAL_IRQ(475) IRQ_TYPE_LEVEL_HIGH>,
- <SOC_PERIPHERAL_IRQ(25) IRQ_TYPE_EDGE_RISING>;
+ <SOC_PERIPHERAL_IRQ(25) IRQ_TYPE_EDGE_RISING>,
+ <SOC_PERIPHERAL_IRQ(34) IRQ_TYPE_EDGE_RISING>,
+ <SOC_PERIPHERAL_IRQ(35) IRQ_TYPE_EDGE_RISING>,
+ <SOC_PERIPHERAL_IRQ(36) IRQ_TYPE_EDGE_RISING>,
+ <SOC_PERIPHERAL_IRQ(37) IRQ_TYPE_EDGE_RISING>,
+ <SOC_PERIPHERAL_IRQ(38) IRQ_TYPE_EDGE_RISING>,
+ <SOC_PERIPHERAL_IRQ(39) IRQ_TYPE_EDGE_RISING>;
interrupt-names = "nmi",
"irq0", "irq1", "irq2", "irq3",
"irq4", "irq5", "irq6", "irq7",
@@ -121,7 +196,9 @@
"tint20", "tint21", "tint22", "tint23",
"tint24", "tint25", "tint26", "tint27",
"tint28", "tint29", "tint30", "tint31",
- "bus-err";
+ "bus-err", "ec7tie1-0", "ec7tie2-0",
+ "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1",
+ "ec7tiovf-1";
clocks = <&cpg CPG_MOD R9A07G043_IA55_CLK>,
<&cpg CPG_MOD R9A07G043_IA55_PCLK>;
clock-names = "clk", "pclk";
diff --git a/arch/arm64/boot/dts/renesas/r9a07g043u11-smarc-cru-csi-ov5645.dtso b/arch/arm64/boot/dts/renesas/r9a07g043u11-smarc-cru-csi-ov5645.dtso
new file mode 100644
index 000000000000..b41bb4b31a26
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/r9a07g043u11-smarc-cru-csi-ov5645.dtso
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device Tree overlay for the RZ/G2UL SMARC EVK with OV5645 camera
+ * connected to CSI and CRU enabled.
+ *
+ * Copyright (C) 2024 Renesas Electronics Corp.
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/pinctrl/rzg2l-pinctrl.h>
+
+#define OV5645_PARENT_I2C i2c0
+#include "rz-smarc-cru-csi-ov5645.dtsi"
+
+&ov5645 {
+ enable-gpios = <&pinctrl RZG2L_GPIO(4, 4) GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&pinctrl RZG2L_GPIO(0, 1) GPIO_ACTIVE_LOW>;
+};
diff --git a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi
index 66f68fc2b241..9f00b75d2bd0 100644
--- a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi
@@ -793,6 +793,22 @@
reset-names = "rst", "arst", "prst";
power-domains = <&cpg>;
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ dsi0_in: endpoint {
+ remote-endpoint = <&du_out_dsi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
};
vspd: vsp@10870000 {
@@ -820,6 +836,36 @@
resets = <&cpg R9A07G044_LCDC_RESET_N>;
};
+ du: display@10890000 {
+ compatible = "renesas,r9a07g044-du";
+ reg = <0 0x10890000 0 0x10000>;
+ interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD R9A07G044_LCDC_CLK_A>,
+ <&cpg CPG_MOD R9A07G044_LCDC_CLK_P>,
+ <&cpg CPG_MOD R9A07G044_LCDC_CLK_D>;
+ clock-names = "aclk", "pclk", "vclk";
+ power-domains = <&cpg>;
+ resets = <&cpg R9A07G044_LCDC_RESET_N>;
+ renesas,vsps = <&vspd 0>;
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ du_out_dsi: endpoint {
+ remote-endpoint = <&dsi0_in>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
+ };
+
cpg: clock-controller@11010000 {
compatible = "renesas,r9a07g044-cpg";
reg = <0 0x11010000 0 0x10000>;
@@ -905,7 +951,27 @@
<GIC_SPI 472 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 473 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 25 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 34 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 35 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 36 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 37 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 39 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "nmi", "irq0", "irq1", "irq2", "irq3",
+ "irq4", "irq5", "irq6", "irq7",
+ "tint0", "tint1", "tint2", "tint3",
+ "tint4", "tint5", "tint6", "tint7",
+ "tint8", "tint9", "tint10", "tint11",
+ "tint12", "tint13", "tint14", "tint15",
+ "tint16", "tint17", "tint18", "tint19",
+ "tint20", "tint21", "tint22", "tint23",
+ "tint24", "tint25", "tint26", "tint27",
+ "tint28", "tint29", "tint30", "tint31",
+ "bus-err", "ec7tie1-0", "ec7tie2-0",
+ "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1",
+ "ec7tiovf-1";
clocks = <&cpg CPG_MOD R9A07G044_IA55_CLK>,
<&cpg CPG_MOD R9A07G044_IA55_PCLK>;
clock-names = "clk", "pclk";
diff --git a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi
index 1f1d481dc783..53d8905f367a 100644
--- a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi
@@ -798,6 +798,22 @@
reset-names = "rst", "arst", "prst";
power-domains = <&cpg>;
status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ dsi0_in: endpoint {
+ remote-endpoint = <&du_out_dsi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
};
vspd: vsp@10870000 {
@@ -826,6 +842,37 @@
resets = <&cpg R9A07G054_LCDC_RESET_N>;
};
+ du: display@10890000 {
+ compatible = "renesas,r9a07g054-du",
+ "renesas,r9a07g044-du";
+ reg = <0 0x10890000 0 0x10000>;
+ interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD R9A07G054_LCDC_CLK_A>,
+ <&cpg CPG_MOD R9A07G054_LCDC_CLK_P>,
+ <&cpg CPG_MOD R9A07G054_LCDC_CLK_D>;
+ clock-names = "aclk", "pclk", "vclk";
+ power-domains = <&cpg>;
+ resets = <&cpg R9A07G054_LCDC_RESET_N>;
+ renesas,vsps = <&vspd 0>;
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ du_out_dsi: endpoint {
+ remote-endpoint = <&dsi0_in>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
+ };
+
cpg: clock-controller@11010000 {
compatible = "renesas,r9a07g054-cpg";
reg = <0 0x11010000 0 0x10000>;
@@ -912,7 +959,27 @@
<GIC_SPI 472 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 473 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 25 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 34 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 35 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 36 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 37 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 39 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "nmi", "irq0", "irq1", "irq2", "irq3",
+ "irq4", "irq5", "irq6", "irq7",
+ "tint0", "tint1", "tint2", "tint3",
+ "tint4", "tint5", "tint6", "tint7",
+ "tint8", "tint9", "tint10", "tint11",
+ "tint12", "tint13", "tint14", "tint15",
+ "tint16", "tint17", "tint18", "tint19",
+ "tint20", "tint21", "tint22", "tint23",
+ "tint24", "tint25", "tint26", "tint27",
+ "tint28", "tint29", "tint30", "tint31",
+ "bus-err", "ec7tie1-0", "ec7tie2-0",
+ "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1",
+ "ec7tiovf-1";
clocks = <&cpg CPG_MOD R9A07G054_IA55_CLK>,
<&cpg CPG_MOD R9A07G054_IA55_PCLK>;
clock-names = "clk", "pclk";
diff --git a/arch/arm64/boot/dts/renesas/r9a08g045.dtsi b/arch/arm64/boot/dts/renesas/r9a08g045.dtsi
index 5facfad96158..f5f3f4f4c8d6 100644
--- a/arch/arm64/boot/dts/renesas/r9a08g045.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a08g045.dtsi
@@ -42,6 +42,11 @@
clock-frequency = <0>;
};
+ psci {
+ compatible = "arm,psci-1.0", "arm,psci-0.2";
+ method = "smc";
+ };
+
soc: soc {
compatible = "simple-bus";
interrupt-parent = <&gic>;
@@ -152,7 +157,10 @@
<GIC_SPI 458 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 459 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 460 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>;
+ <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "nmi",
"irq0", "irq1", "irq2", "irq3",
"irq4", "irq5", "irq6", "irq7",
@@ -164,7 +172,8 @@
"tint20", "tint21", "tint22", "tint23",
"tint24", "tint25", "tint26", "tint27",
"tint28", "tint29", "tint30", "tint31",
- "bus-err";
+ "bus-err", "ec7tie1-0", "ec7tie2-0",
+ "ec7tiovf-0";
clocks = <&cpg CPG_MOD R9A08G045_IA55_CLK>,
<&cpg CPG_MOD R9A08G045_IA55_PCLK>;
clock-names = "clk", "pclk";
@@ -264,6 +273,20 @@
<0x0 0x12440000 0 0x60000>;
interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_LOW>;
};
+
+ wdt0: watchdog@12800800 {
+ compatible = "renesas,r9a08g045-wdt", "renesas,rzg2l-wdt";
+ reg = <0 0x12800800 0 0x400>;
+ clocks = <&cpg CPG_MOD R9A08G045_WDT0_PCLK>,
+ <&cpg CPG_MOD R9A08G045_WDT0_CLK>;
+ clock-names = "pclk", "oscclk";
+ interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "wdt", "perrout";
+ resets = <&cpg R9A08G045_WDT0_PRESETN>;
+ power-domains = <&cpg>;
+ status = "disabled";
+ };
};
timer {
diff --git a/arch/arm64/boot/dts/renesas/rzg2l-smarc.dtsi b/arch/arm64/boot/dts/renesas/rzg2l-smarc.dtsi
index 37807f1bda4d..887dffe14910 100644
--- a/arch/arm64/boot/dts/renesas/rzg2l-smarc.dtsi
+++ b/arch/arm64/boot/dts/renesas/rzg2l-smarc.dtsi
@@ -40,17 +40,7 @@
status = "okay";
ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- dsi0_in: endpoint {
- };
- };
-
port@1 {
- reg = <1>;
dsi0_out: endpoint {
data-lanes = <1 2 3 4>;
remote-endpoint = <&adv7535_in>;
@@ -59,6 +49,10 @@
};
};
+&du {
+ status = "okay";
+};
+
&i2c1 {
adv7535: hdmi@3d {
compatible = "adi,adv7535";
diff --git a/arch/arm64/boot/dts/renesas/rzg2lc-smarc.dtsi b/arch/arm64/boot/dts/renesas/rzg2lc-smarc.dtsi
index 859bc8745e66..f21508640b6e 100644
--- a/arch/arm64/boot/dts/renesas/rzg2lc-smarc.dtsi
+++ b/arch/arm64/boot/dts/renesas/rzg2lc-smarc.dtsi
@@ -56,17 +56,7 @@
status = "okay";
ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- dsi0_in: endpoint {
- };
- };
-
port@1 {
- reg = <1>;
dsi0_out: endpoint {
data-lanes = <1 2 3 4>;
remote-endpoint = <&adv7535_in>;
@@ -75,6 +65,10 @@
};
};
+&du {
+ status = "okay";
+};
+
&i2c1 {
adv7535: hdmi@3d {
compatible = "adi,adv7535";
diff --git a/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi b/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi
index f062d4ad78b7..acac4666ae59 100644
--- a/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi
+++ b/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi
@@ -193,12 +193,14 @@
#endif
&pinctrl {
+#if SW_CONFIG3 == SW_ON
eth0-phy-irq-hog {
gpio-hog;
gpios = <RZG2L_GPIO(12, 0) GPIO_ACTIVE_LOW>;
input;
line-name = "eth0-phy-irq";
};
+#endif
eth0_pins: eth0 {
txc {
@@ -234,12 +236,14 @@
};
};
+#if SW_CONFIG3 == SW_ON
eth1-phy-irq-hog {
gpio-hog;
gpios = <RZG2L_GPIO(12, 1) GPIO_ACTIVE_LOW>;
input;
line-name = "eth1-phy-irq";
};
+#endif
eth1_pins: eth1 {
txc {
@@ -336,3 +340,8 @@
};
};
};
+
+&wdt0 {
+ timeout-sec = <60>;
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/renesas/rzg3s-smarc.dtsi b/arch/arm64/boot/dts/renesas/rzg3s-smarc.dtsi
index 214520137230..deb2ad37bb2e 100644
--- a/arch/arm64/boot/dts/renesas/rzg3s-smarc.dtsi
+++ b/arch/arm64/boot/dts/renesas/rzg3s-smarc.dtsi
@@ -6,6 +6,7 @@
*/
#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
#include <dt-bindings/pinctrl/rzg2l-pinctrl.h>
/ {
@@ -14,6 +15,37 @@
mmc1 = &sdhi1;
};
+ keys {
+ compatible = "gpio-keys";
+
+ key-1 {
+ interrupts = <RZG2L_GPIO(18, 0) IRQ_TYPE_EDGE_FALLING>;
+ interrupt-parent = <&pinctrl>;
+ linux,code = <KEY_1>;
+ label = "USER_SW1";
+ wakeup-source;
+ debounce-interval = <20>;
+ };
+
+ key-2 {
+ interrupts = <RZG2L_GPIO(0, 1) IRQ_TYPE_EDGE_FALLING>;
+ interrupt-parent = <&pinctrl>;
+ linux,code = <KEY_2>;
+ label = "USER_SW2";
+ wakeup-source;
+ debounce-interval = <20>;
+ };
+
+ key-3 {
+ interrupts = <RZG2L_GPIO(0, 3) IRQ_TYPE_EDGE_FALLING>;
+ interrupt-parent = <&pinctrl>;
+ linux,code = <KEY_3>;
+ label = "USER_SW3";
+ wakeup-source;
+ debounce-interval = <20>;
+ };
+ };
+
vcc_sdhi1: regulator-vcc-sdhi1 {
compatible = "regulator-fixed";
regulator-name = "SDHI1 Vcc";
@@ -35,6 +67,27 @@
};
&pinctrl {
+ key-1-gpio-hog {
+ gpio-hog;
+ gpios = <RZG2L_GPIO(18, 0) GPIO_ACTIVE_LOW>;
+ input;
+ line-name = "key-1-gpio-irq";
+ };
+
+ key-2-gpio-hog {
+ gpio-hog;
+ gpios = <RZG2L_GPIO(0, 1) GPIO_ACTIVE_LOW>;
+ input;
+ line-name = "key-2-gpio-irq";
+ };
+
+ key-3-gpio-hog {
+ gpio-hog;
+ gpios = <RZG2L_GPIO(0, 3) GPIO_ACTIVE_LOW>;
+ input;
+ line-name = "key-3-gpio-irq";
+ };
+
scif0_pins: scif0 {
pinmux = <RZG2L_PORT_PINMUX(6, 3, 1)>, /* RXD */
<RZG2L_PORT_PINMUX(6, 4, 1)>; /* TXD */
diff --git a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi
index 3885ef3454ff..431b37bf5661 100644
--- a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi
+++ b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi
@@ -32,39 +32,40 @@
};
};
- accel_3v3: regulator-acc-3v3 {
+ reg_t1p8v: regulator-t1p8v {
compatible = "regulator-fixed";
- regulator-name = "accel-3v3";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- };
-
- hdmi_1v8: regulator-hdmi-1v8 {
- compatible = "regulator-fixed";
- regulator-name = "hdmi-1v8";
+ regulator-name = "T1.8V";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
+ regulator-boot-on;
+ regulator-always-on;
};
- hdmi_3v3: regulator-hdmi-3v3 {
+ pcie_1v5: regulator-pcie-1v5 {
compatible = "regulator-fixed";
- regulator-name = "hdmi-3v3";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
+ regulator-name = "pcie-1v5";
+ regulator-min-microvolt = <1500000>;
+ regulator-max-microvolt = <1500000>;
+ gpio = <&gpio_exp_77 15 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
};
- snd_3p3v: regulator-snd_3p3v {
+ pcie_3v3: regulator-pcie-3v3 {
compatible = "regulator-fixed";
- regulator-name = "snd-3.3v";
+ regulator-name = "pcie-3v3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
+ gpio = <&gpio_exp_77 14 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
};
- snd_vcc5v: regulator-snd_vcc5v {
+ reg_5v: regulator-5v {
compatible = "regulator-fixed";
- regulator-name = "snd-vcc5v";
+ regulator-name = "fixed-5V";
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
+ regulator-boot-on;
+ regulator-always-on;
};
wlan_en: regulator-wlan_en {
@@ -157,11 +158,11 @@
pd-gpios = <&gpio_exp_75 5 GPIO_ACTIVE_LOW>;
- avdd-supply = <&hdmi_1v8>;
- dvdd-supply = <&hdmi_1v8>;
- pvdd-supply = <&hdmi_1v8>;
- dvdd-3v-supply = <&hdmi_3v3>;
- bgvdd-supply = <&hdmi_1v8>;
+ avdd-supply = <&reg_t1p8v>;
+ dvdd-supply = <&reg_t1p8v>;
+ pvdd-supply = <&reg_t1p8v>;
+ dvdd-3v-supply = <&reg_3p3v>;
+ bgvdd-supply = <&reg_t1p8v>;
adi,input-depth = <8>;
adi,input-colorspace = "rgb";
@@ -198,8 +199,8 @@
compatible = "st,lsm9ds0-imu";
reg = <0x1d>;
- vdd-supply = <&accel_3v3>;
- vddio-supply = <&accel_3v3>;
+ vdd-supply = <&reg_3p3v>;
+ vddio-supply = <&reg_3p3v>;
};
pcm3168a: audio-codec@44 {
@@ -209,20 +210,20 @@
clocks = <&clksndsel>;
clock-names = "scki";
- VDD1-supply = <&snd_3p3v>;
- VDD2-supply = <&snd_3p3v>;
- VCCAD1-supply = <&snd_vcc5v>;
- VCCAD2-supply = <&snd_vcc5v>;
- VCCDA1-supply = <&snd_vcc5v>;
- VCCDA2-supply = <&snd_vcc5v>;
+ VDD1-supply = <&reg_3p3v>;
+ VDD2-supply = <&reg_3p3v>;
+ VCCAD1-supply = <&reg_5v>;
+ VCCAD2-supply = <&reg_5v>;
+ VCCDA1-supply = <&reg_5v>;
+ VCCDA2-supply = <&reg_5v>;
};
gyroscope@6b {
compatible = "st,lsm9ds0-gyro";
reg = <0x6b>;
- vdd-supply = <&accel_3v3>;
- vddio-supply = <&accel_3v3>;
+ vdd-supply = <&reg_3p3v>;
+ vddio-supply = <&reg_3p3v>;
};
};
};
@@ -234,6 +235,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupt-parent = <&gpio6>;
interrupts = <8 IRQ_TYPE_EDGE_FALLING>;
@@ -294,6 +296,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupt-parent = <&gpio6>;
interrupts = <4 IRQ_TYPE_EDGE_FALLING>;
};
@@ -314,6 +317,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupt-parent = <&gpio7>;
interrupts = <3 IRQ_TYPE_EDGE_FALLING>;
};
@@ -324,6 +328,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-controller;
+ #interrupt-cells = <2>;
interrupt-parent = <&gpio5>;
interrupts = <9 IRQ_TYPE_EDGE_FALLING>;
};
@@ -344,6 +349,9 @@
&pciec1 {
status = "okay";
+
+ vpcie1v5-supply = <&pcie_1v5>;
+ vpcie3v3-supply = <&pcie_3v3>;
};
&pfc {
@@ -413,6 +421,13 @@
pinctrl-names = "default";
status = "okay";
+
+ gnss {
+ compatible = "u-blox,neo-m8";
+ reset-gpios = <&gpio_exp_75 6 GPIO_ACTIVE_LOW>;
+ vcc-supply = <&reg_3p3v>;
+ current-speed = <9600>;
+ };
};
&sdhi3 {
diff --git a/arch/arm64/boot/dts/renesas/white-hawk-common.dtsi b/arch/arm64/boot/dts/renesas/white-hawk-common.dtsi
new file mode 100644
index 000000000000..c99086edadca
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/white-hawk-common.dtsi
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Device Tree Source for the common parts shared by the White Hawk BreakOut
+ * and White Hawk Single boards
+ *
+ * Copyright (C) 2022 Renesas Electronics Corp.
+ */
+
+#include "white-hawk-csi-dsi.dtsi"
+#include "white-hawk-ethernet.dtsi"
+
+/ {
+ can_transceiver0: can-phy0 {
+ compatible = "nxp,tjr1443";
+ #phy-cells = <0>;
+ enable-gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>;
+ max-bitrate = <5000000>;
+ };
+};
+
+&can_clk {
+ clock-frequency = <40000000>;
+};
+
+&canfd {
+ pinctrl-0 = <&canfd0_pins>, <&canfd1_pins>, <&can_clk_pins>;
+ pinctrl-names = "default";
+
+ status = "okay";
+
+ channel0 {
+ status = "okay";
+ phys = <&can_transceiver0>;
+ };
+
+ channel1 {
+ status = "okay";
+ };
+};
+
+&i2c0 {
+ eeprom@51 {
+ compatible = "rohm,br24g01", "atmel,24c01";
+ label = "breakout-board";
+ reg = <0x51>;
+ pagesize = <8>;
+ };
+};
+
+&pfc {
+ can_clk_pins: can-clk {
+ groups = "can_clk";
+ function = "can_clk";
+ };
+
+ canfd0_pins: canfd0 {
+ groups = "canfd0_data";
+ function = "canfd0";
+ };
+
+ canfd1_pins: canfd1 {
+ groups = "canfd1_data";
+ function = "canfd1";
+ };
+};
diff --git a/arch/arm64/boot/dts/renesas/white-hawk-cpu-common.dtsi b/arch/arm64/boot/dts/renesas/white-hawk-cpu-common.dtsi
new file mode 100644
index 000000000000..8ac17370ff36
--- /dev/null
+++ b/arch/arm64/boot/dts/renesas/white-hawk-cpu-common.dtsi
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Device Tree Source for the common parts shared by the White Hawk CPU and
+ * White Hawk Single boards
+ *
+ * Copyright (C) 2022 Renesas Electronics Corp.
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+ aliases {
+ ethernet0 = &avb0;
+ serial0 = &hscif0;
+ };
+
+ chosen {
+ bootargs = "ignore_loglevel rw root=/dev/nfs ip=on";
+ stdout-path = "serial0:921600n8";
+ };
+
+ sn65dsi86_refclk: clk-x6 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <38400000>;
+ };
+
+ keys {
+ compatible = "gpio-keys";
+
+ pinctrl-0 = <&keys_pins>;
+ pinctrl-names = "default";
+
+ key-1 {
+ gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_1>;
+ label = "SW47";
+ wakeup-source;
+ debounce-interval = <20>;
+ };
+
+ key-2 {
+ gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_2>;
+ label = "SW48";
+ wakeup-source;
+ debounce-interval = <20>;
+ };
+
+ key-3 {
+ gpios = <&gpio5 2 GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_3>;
+ label = "SW49";
+ wakeup-source;
+ debounce-interval = <20>;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-1 {
+ gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_INDICATOR;
+ function-enumerator = <1>;
+ };
+
+ led-2 {
+ gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_INDICATOR;
+ function-enumerator = <2>;
+ };
+
+ led-3 {
+ gpios = <&gpio7 2 GPIO_ACTIVE_HIGH>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_INDICATOR;
+ function-enumerator = <3>;
+ };
+ };
+
+ memory@48000000 {
+ device_type = "memory";
+ /* first 128MB is reserved for secure area. */
+ reg = <0x0 0x48000000 0x0 0x78000000>;
+ };
+
+ memory@480000000 {
+ device_type = "memory";
+ reg = <0x4 0x80000000 0x0 0x80000000>;
+ };
+
+ memory@600000000 {
+ device_type = "memory";
+ reg = <0x6 0x00000000 0x1 0x00000000>;
+ };
+
+ mini-dp-con {
+ compatible = "dp-connector";
+ label = "CN5";
+ type = "mini";
+
+ port {
+ mini_dp_con_in: endpoint {
+ remote-endpoint = <&sn65dsi86_out>;
+ };
+ };
+ };
+
+ reg_1p2v: regulator-1p2v {
+ compatible = "regulator-fixed";
+ regulator-name = "fixed-1.2V";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ reg_1p8v: regulator-1p8v {
+ compatible = "regulator-fixed";
+ regulator-name = "fixed-1.8V";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ reg_3p3v: regulator-3p3v {
+ compatible = "regulator-fixed";
+ regulator-name = "fixed-3.3V";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+};
+
+&avb0 {
+ pinctrl-0 = <&avb0_pins>;
+ pinctrl-names = "default";
+ phy-handle = <&phy0>;
+ tx-internal-delay-ps = <2000>;
+ status = "okay";
+
+ phy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-id0022.1622",
+ "ethernet-phy-ieee802.3-c22";
+ rxc-skew-ps = <1500>;
+ reg = <0>;
+ interrupt-parent = <&gpio7>;
+ interrupts = <5 IRQ_TYPE_LEVEL_LOW>;
+ reset-gpios = <&gpio7 10 GPIO_ACTIVE_LOW>;
+ };
+};
+
+&dsi0 {
+ status = "okay";
+
+ ports {
+ port@1 {
+ dsi0_out: endpoint {
+ remote-endpoint = <&sn65dsi86_in>;
+ data-lanes = <1 2 3 4>;
+ };
+ };
+ };
+};
+
+&du {
+ status = "okay";
+};
+
+&extal_clk {
+ clock-frequency = <16666666>;
+};
+
+&extalr_clk {
+ clock-frequency = <32768>;
+};
+
+&hscif0 {
+ pinctrl-0 = <&hscif0_pins>;
+ pinctrl-names = "default";
+
+ status = "okay";
+};
+
+&i2c0 {
+ pinctrl-0 = <&i2c0_pins>;
+ pinctrl-names = "default";
+
+ status = "okay";
+ clock-frequency = <400000>;
+
+ io_expander_a: gpio@20 {
+ compatible = "onnn,pca9654";
+ reg = <0x20>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ eeprom@50 {
+ compatible = "rohm,br24g01", "atmel,24c01";
+ label = "cpu-board";
+ reg = <0x50>;
+ pagesize = <8>;
+ };
+};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1_pins>;
+ pinctrl-names = "default";
+
+ status = "okay";
+ clock-frequency = <400000>;
+
+ bridge@2c {
+ compatible = "ti,sn65dsi86";
+ reg = <0x2c>;
+
+ clocks = <&sn65dsi86_refclk>;
+ clock-names = "refclk";
+
+ interrupt-parent = <&intc_ex>;
+ interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+
+ enable-gpios = <&gpio1 26 GPIO_ACTIVE_HIGH>;
+
+ vccio-supply = <&reg_1p8v>;
+ vpll-supply = <&reg_1p8v>;
+ vcca-supply = <&reg_1p2v>;
+ vcc-supply = <&reg_1p2v>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ sn65dsi86_in: endpoint {
+ remote-endpoint = <&dsi0_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ sn65dsi86_out: endpoint {
+ remote-endpoint = <&mini_dp_con_in>;
+ };
+ };
+ };
+ };
+};
+
+&mmc0 {
+ pinctrl-0 = <&mmc_pins>;
+ pinctrl-1 = <&mmc_pins>;
+ pinctrl-names = "default", "state_uhs";
+
+ vmmc-supply = <&reg_3p3v>;
+ vqmmc-supply = <&reg_1p8v>;
+ mmc-hs200-1_8v;
+ mmc-hs400-1_8v;
+ bus-width = <8>;
+ no-sd;
+ no-sdio;
+ non-removable;
+ full-pwr-cycle-in-suspend;
+ status = "okay";
+};
+
+&pfc {
+ pinctrl-0 = <&scif_clk_pins>;
+ pinctrl-names = "default";
+
+ avb0_pins: avb0 {
+ mux {
+ groups = "avb0_link", "avb0_mdio", "avb0_rgmii",
+ "avb0_txcrefclk";
+ function = "avb0";
+ };
+
+ pins_mdio {
+ groups = "avb0_mdio";
+ drive-strength = <21>;
+ };
+
+ pins_mii {
+ groups = "avb0_rgmii";
+ drive-strength = <21>;
+ };
+
+ };
+
+ hscif0_pins: hscif0 {
+ groups = "hscif0_data";
+ function = "hscif0";
+ };
+
+ i2c0_pins: i2c0 {
+ groups = "i2c0";
+ function = "i2c0";
+ };
+
+ i2c1_pins: i2c1 {
+ groups = "i2c1";
+ function = "i2c1";
+ };
+
+ keys_pins: keys {
+ pins = "GP_5_0", "GP_5_1", "GP_5_2";
+ bias-pull-up;
+ };
+
+ mmc_pins: mmc {
+ groups = "mmc_data8", "mmc_ctrl", "mmc_ds";
+ function = "mmc";
+ power-source = <1800>;
+ };
+
+ qspi0_pins: qspi0 {
+ groups = "qspi0_ctrl", "qspi0_data4";
+ function = "qspi0";
+ };
+
+ scif_clk_pins: scif_clk {
+ groups = "scif_clk";
+ function = "scif_clk";
+ };
+};
+
+&rpc {
+ pinctrl-0 = <&qspi0_pins>;
+ pinctrl-names = "default";
+
+ status = "okay";
+
+ flash@0 {
+ compatible = "spansion,s25fs512s", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <40000000>;
+ spi-rx-bus-width = <4>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ boot@0 {
+ reg = <0x0 0x1200000>;
+ read-only;
+ };
+ user@1200000 {
+ reg = <0x1200000 0x2e00000>;
+ };
+ };
+ };
+};
+
+&rwdt {
+ timeout-sec = <60>;
+ status = "okay";
+};
+
+&scif_clk {
+ clock-frequency = <24000000>;
+};
diff --git a/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-csi-dsi.dtsi b/arch/arm64/boot/dts/renesas/white-hawk-csi-dsi.dtsi
index f8537f7ea4de..3006b0a64f41 100644
--- a/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-csi-dsi.dtsi
+++ b/arch/arm64/boot/dts/renesas/white-hawk-csi-dsi.dtsi
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/*
- * Device Tree Source for the R-Car V4H White Hawk CSI/DSI sub-board
+ * Device Tree Source for the White Hawk CSI/DSI sub-board
*
* Copyright (C) 2022 Glider bv
*/
diff --git a/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-ethernet.dtsi b/arch/arm64/boot/dts/renesas/white-hawk-ethernet.dtsi
index 4f411f95c674..a218fda337cf 100644
--- a/arch/arm64/boot/dts/renesas/r8a779g0-white-hawk-ethernet.dtsi
+++ b/arch/arm64/boot/dts/renesas/white-hawk-ethernet.dtsi
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/*
- * Device Tree Source for the R-Car V4H White Hawk RAVB/Ethernet(1000Base-T1)
+ * Device Tree Source for the White Hawk RAVB/Ethernet(1000Base-T1)
* sub-board
*
* Copyright (C) 2022 Glider bv
diff --git a/arch/arm64/boot/dts/rockchip/Makefile b/arch/arm64/boot/dts/rockchip/Makefile
index a7b30e11beaf..f906a868b71a 100644
--- a/arch/arm64/boot/dts/rockchip/Makefile
+++ b/arch/arm64/boot/dts/rockchip/Makefile
@@ -71,6 +71,8 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rockpro64.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-sapphire.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-sapphire-excavator.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399pro-rock-pi-n10.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-anbernic-rg-arc-d.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-anbernic-rg-arc-s.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-anbernic-rg353p.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-anbernic-rg353ps.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-anbernic-rg353v.dtb
@@ -78,6 +80,9 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-anbernic-rg353vs.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-anbernic-rg503.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-pinenote-v1.1.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-pinenote-v1.2.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-pinetab2-v0.1.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-pinetab2-v2.0.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-powkiddy-rgb10max3.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-powkiddy-rgb30.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-powkiddy-rk2023.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3566-powkiddy-x55.dtb
@@ -98,11 +103,13 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-lubancat-2.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-nanopi-r5c.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-nanopi-r5s.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-odroid-m1.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-qnap-ts433.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-radxa-e25.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-roc-pc.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3568-rock-3a.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-coolpi-cm5-evb.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-edgeble-neu6a-io.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-edgeble-neu6a-wifi.dtbo
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-edgeble-neu6b-io.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-evb1-v10.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-jaguar.dtb
@@ -110,9 +117,13 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-nanopc-t6.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-orangepi-5-plus.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-quartzpro64.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-rock-5b.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-tiger-haikou.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-toybrick-x0.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-turing-rk1.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588s-coolpi-4b.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588s-indiedroid-nova.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588s-khadas-edge2.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588s-nanopi-r6s.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588s-nanopi-r6c.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588s-rock-5a.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588s-orangepi-5.dtb
diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
index 16798eb77077..ae398acdcf45 100644
--- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
+++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
@@ -227,6 +227,7 @@
&uart5 {
pinctrl-0 = <&uart5_xfer>;
+ rts-gpios = <&gpio0 RK_PB5 GPIO_ACTIVE_HIGH>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
index 12397755830b..bb1aea82e666 100644
--- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
@@ -347,6 +347,12 @@
};
};
+&pmu_io_domains {
+ pmuio1-supply = <&vcc_3v3>;
+ pmuio2-supply = <&vcc_3v3>;
+ status = "okay";
+};
+
&saradc {
vref-supply = <&vcc_1v8>;
status = "okay";
diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi
index d0905515399b..9137dd76e72c 100644
--- a/arch/arm64/boot/dts/rockchip/px30.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30.dtsi
@@ -631,6 +631,7 @@
clock-names = "spiclk", "apb_pclk";
dmas = <&dmac 12>, <&dmac 13>;
dma-names = "tx", "rx";
+ num-cs = <2>;
pinctrl-names = "default";
pinctrl-0 = <&spi0_clk &spi0_csn &spi0_miso &spi0_mosi>;
#address-cells = <1>;
@@ -646,6 +647,7 @@
clock-names = "spiclk", "apb_pclk";
dmas = <&dmac 14>, <&dmac 15>;
dma-names = "tx", "rx";
+ num-cs = <2>;
pinctrl-names = "default";
pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
index 3cda6c627b68..f09d60bbe6c4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
@@ -148,7 +148,7 @@
assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
clock_in_out = "input";
- phy-handle = <&rtl8211e>;
+ phy-handle = <&rtl8211>;
phy-mode = "rgmii";
phy-supply = <&vcc_io>;
pinctrl-names = "default";
@@ -165,7 +165,7 @@
#address-cells = <1>;
#size-cells = <0>;
- rtl8211e: ethernet-phy@1 {
+ rtl8211: ethernet-phy@1 {
reg = <1>;
pinctrl-0 = <&eth_phy_int_pin>, <&eth_phy_reset_pin>;
pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index fb5dcf6e9327..b6f045069ee2 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -488,7 +488,6 @@
pwm3: pwm@ff1b0030 {
compatible = "rockchip,rk3328-pwm";
reg = <0x0 0xff1b0030 0x0 0x10>;
- interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>;
clock-names = "pwm", "pclk";
pinctrl-names = "default";
@@ -745,11 +744,20 @@
status = "disabled";
ports {
- hdmi_in: port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi_in: port@0 {
+ reg = <0>;
+
hdmi_in_vop: endpoint {
remote-endpoint = <&vop_out_hdmi>;
};
};
+
+ hdmi_out: port@1 {
+ reg = <1>;
+ };
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
index 0f9cc042d9bf..1cba1d857c96 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
@@ -70,7 +70,7 @@
&spi0 {
status = "okay";
- cr50@0 {
+ tpm@0 {
compatible = "google,cr50";
reg = <0>;
interrupt-parent = <&gpio0>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
index c5e7de60c121..5846a11f0e84 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
@@ -706,7 +706,7 @@ camera: &i2c7 {
&spi2 {
status = "okay";
- cr50@0 {
+ tpm@0 {
compatible = "google,cr50";
reg = <0>;
interrupt-parent = <&gpio1>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts
index 9e3aec4440bd..dfb2a0bdea5b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts
@@ -22,9 +22,6 @@
ethernet0 = &gmac;
mmc0 = &sdmmc;
mmc1 = &sdhci;
- spi1 = &spi1;
- spi2 = &spi2;
- spi5 = &spi5;
};
avdd_0v9_s0: avdd-0v9-s0 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts
index e7551449e718..e26e2d86279c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts
@@ -14,7 +14,7 @@
/ {
model = "Orange Pi RK3399 Board";
- compatible = "rockchip,rk3399-orangepi", "rockchip,rk3399";
+ compatible = "xunlong,rk3399-orangepi", "rockchip,rk3399";
aliases {
ethernet0 = &gmac;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
index 18a98c4648ea..2c3984a880af 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
@@ -273,11 +273,12 @@
&uart0 {
pinctrl-names = "default";
- pinctrl-0 = <&uart0_xfer &uart0_cts &uart0_rts>;
+ pinctrl-0 = <&uart0_xfer>;
status = "okay";
};
&uart2 {
+ rts-gpios = <&gpio2 RK_PC3 GPIO_ACTIVE_HIGH>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a.dts
index d5df8939a658..c68f45849c44 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a.dts
@@ -19,6 +19,6 @@
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
- spi-max-frequency = <10000000>;
+ spi-max-frequency = <108000000>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b.dts
index bee6d7588302..6ea3180e57ca 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b.dts
@@ -37,7 +37,7 @@
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
- spi-max-frequency = <10000000>;
+ spi-max-frequency = <108000000>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4c.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4c.dts
index de2ebe4cb4f3..5274938bf1b8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4c.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4c.dts
@@ -49,7 +49,7 @@
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
- spi-max-frequency = <10000000>;
+ spi-max-frequency = <108000000>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index 6e12c5a920ca..9d5f5b083e3c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -38,6 +38,12 @@
serial2 = &uart2;
serial3 = &uart3;
serial4 = &uart4;
+ spi0 = &spi0;
+ spi1 = &spi1;
+ spi2 = &spi2;
+ spi3 = &spi3;
+ spi4 = &spi4;
+ spi5 = &spi5;
};
cpus {
@@ -45,7 +51,7 @@
#size-cells = <0>;
cpu-map {
- cluster0 {
+ cluster0 { /* Cortex-A53 */
core0 {
cpu = <&cpu_l0>;
};
@@ -60,7 +66,7 @@
};
};
- cluster1 {
+ cluster1 { /* Cortex-A72 */
core0 {
cpu = <&cpu_b0>;
};
@@ -80,6 +86,13 @@
#cooling-cells = <2>; /* min followed by max */
dynamic-power-coefficient = <100>;
cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+ i-cache-size = <0x8000>;
+ i-cache-line-size = <64>;
+ i-cache-sets = <256>;
+ d-cache-size = <0x8000>;
+ d-cache-line-size = <64>;
+ d-cache-sets = <128>;
+ next-level-cache = <&l2_cache_l>;
};
cpu_l1: cpu@1 {
@@ -92,6 +105,13 @@
#cooling-cells = <2>; /* min followed by max */
dynamic-power-coefficient = <100>;
cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+ i-cache-size = <0x8000>;
+ i-cache-line-size = <64>;
+ i-cache-sets = <256>;
+ d-cache-size = <0x8000>;
+ d-cache-line-size = <64>;
+ d-cache-sets = <128>;
+ next-level-cache = <&l2_cache_l>;
};
cpu_l2: cpu@2 {
@@ -104,6 +124,13 @@
#cooling-cells = <2>; /* min followed by max */
dynamic-power-coefficient = <100>;
cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+ i-cache-size = <0x8000>;
+ i-cache-line-size = <64>;
+ i-cache-sets = <256>;
+ d-cache-size = <0x8000>;
+ d-cache-line-size = <64>;
+ d-cache-sets = <128>;
+ next-level-cache = <&l2_cache_l>;
};
cpu_l3: cpu@3 {
@@ -116,6 +143,13 @@
#cooling-cells = <2>; /* min followed by max */
dynamic-power-coefficient = <100>;
cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+ i-cache-size = <0x8000>;
+ i-cache-line-size = <64>;
+ i-cache-sets = <256>;
+ d-cache-size = <0x8000>;
+ d-cache-line-size = <64>;
+ d-cache-sets = <128>;
+ next-level-cache = <&l2_cache_l>;
};
cpu_b0: cpu@100 {
@@ -128,6 +162,13 @@
#cooling-cells = <2>; /* min followed by max */
dynamic-power-coefficient = <436>;
cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+ i-cache-size = <0xC000>;
+ i-cache-line-size = <64>;
+ i-cache-sets = <256>;
+ d-cache-size = <0x8000>;
+ d-cache-line-size = <64>;
+ d-cache-sets = <256>;
+ next-level-cache = <&l2_cache_b>;
thermal-idle {
#cooling-cells = <2>;
@@ -146,6 +187,13 @@
#cooling-cells = <2>; /* min followed by max */
dynamic-power-coefficient = <436>;
cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+ i-cache-size = <0xC000>;
+ i-cache-line-size = <64>;
+ i-cache-sets = <256>;
+ d-cache-size = <0x8000>;
+ d-cache-line-size = <64>;
+ d-cache-sets = <256>;
+ next-level-cache = <&l2_cache_b>;
thermal-idle {
#cooling-cells = <2>;
@@ -154,6 +202,24 @@
};
};
+ l2_cache_l: l2-cache-cluster0 {
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ cache-size = <0x80000>;
+ cache-line-size = <64>;
+ cache-sets = <512>;
+ };
+
+ l2_cache_b: l2-cache-cluster1 {
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ cache-size = <0x100000>;
+ cache-line-size = <64>;
+ cache-sets = <1024>;
+ };
+
idle-states {
entry-method = "psci";
@@ -1956,6 +2022,7 @@
hdmi: hdmi@ff940000 {
compatible = "rockchip,rk3399-dw-hdmi";
reg = <0x0 0xff940000 0x0 0x20000>;
+ reg-io-width = <4>;
interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH 0>;
clocks = <&cru PCLK_HDMI_CTRL>,
<&cru SCLK_HDMI_SFR>,
@@ -1964,13 +2031,16 @@
<&cru PLL_VPLL>;
clock-names = "iahb", "isfr", "cec", "grf", "ref";
power-domains = <&power RK3399_PD_HDCP>;
- reg-io-width = <4>;
rockchip,grf = <&grf>;
#sound-dai-cells = <0>;
status = "disabled";
ports {
- hdmi_in: port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi_in: port@0 {
+ reg = <0>;
#address-cells = <1>;
#size-cells = <0>;
@@ -1983,6 +2053,10 @@
remote-endpoint = <&vopl_out_hdmi>;
};
};
+
+ hdmi_out: port@1 {
+ reg = <1>;
+ };
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc-d.dts b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc-d.dts
new file mode 100644
index 000000000000..ab83e8a61615
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc-d.dts
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/linux-event-codes.h>
+#include <dt-bindings/pinctrl/rockchip.h>
+#include "rk3566-anbernic-rg-arc.dtsi"
+
+/ {
+ model = "Anbernic RG ARC-D";
+ compatible = "anbernic,rg-arc-d", "rockchip,rk3566";
+
+ aliases {
+ mmc0 = &sdhci;
+ mmc1 = &sdmmc0;
+ mmc2 = &sdmmc1;
+ mmc3 = &sdmmc2;
+ };
+};
+
+&i2c2 {
+ pinctrl-0 = <&i2c2m1_xfer>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ touchscreen@14 {
+ compatible = "goodix,gt927";
+ reg = <0x14>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <RK_PB1 IRQ_TYPE_EDGE_FALLING>;
+ irq-gpios = <&gpio4 RK_PB1 GPIO_ACTIVE_HIGH>;
+ pinctrl-0 = <&touch_int>;
+ pinctrl-names = "default";
+ reset-gpios = <&gpio4 RK_PA6 GPIO_ACTIVE_HIGH>;
+ touchscreen-inverted-y;
+ touchscreen-size-x = <640>;
+ touchscreen-size-y = <480>;
+ };
+};
+
+&pinctrl {
+ touchscreen {
+ touch_int: touch_int {
+ rockchip,pins = <4 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+};
+
+&sdhci {
+ bus-width = <8>;
+ mmc-hs200-1_8v;
+ non-removable;
+ pinctrl-0 = <&emmc_bus8>, <&emmc_clk>, <&emmc_cmd>,
+ <&emmc_datastrobe>, <&emmc_rstnout>;
+ pinctrl-names = "default";
+ vmmc-supply = <&vcc_3v3>;
+ vqmmc-supply = <&vcc_1v8>;
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc-s.dts b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc-s.dts
new file mode 100644
index 000000000000..6264a8c78d0b
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc-s.dts
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/linux-event-codes.h>
+#include <dt-bindings/pinctrl/rockchip.h>
+#include "rk3566-anbernic-rg-arc.dtsi"
+
+/ {
+ model = "Anbernic RG ARC-S";
+ compatible = "anbernic,rg-arc-s", "rockchip,rk3566";
+
+ aliases {
+ mmc1 = &sdmmc0;
+ mmc2 = &sdmmc1;
+ mmc3 = &sdmmc2;
+ };
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc.dtsi
new file mode 100644
index 000000000000..a4a60e4a53d4
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg-arc.dtsi
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/linux-event-codes.h>
+#include <dt-bindings/pinctrl/rockchip.h>
+#include "rk3566-anbernic-rgxx3.dtsi"
+
+/ {
+ backlight: backlight {
+ compatible = "pwm-backlight";
+ power-supply = <&vcc_sys>;
+ pwms = <&pwm4 0 25000 0>;
+ };
+
+ battery: battery {
+ compatible = "simple-battery";
+ charge-full-design-microamp-hours = <3472000>;
+ charge-term-current-microamp = <300000>;
+ constant-charge-current-max-microamp = <2000000>;
+ constant-charge-voltage-max-microvolt = <4200000>;
+ factory-internal-resistance-micro-ohms = <117000>;
+ voltage-max-design-microvolt = <4172000>;
+ voltage-min-design-microvolt = <3400000>;
+
+ ocv-capacity-celsius = <20>;
+ ocv-capacity-table-0 = <4172000 100>, <4054000 95>, <3984000 90>, <3926000 85>,
+ <3874000 80>, <3826000 75>, <3783000 70>, <3746000 65>,
+ <3714000 60>, <3683000 55>, <3650000 50>, <3628000 45>,
+ <3612000 40>, <3600000 35>, <3587000 30>, <3571000 25>,
+ <3552000 20>, <3525000 15>, <3492000 10>, <3446000 5>,
+ <3400000 0>;
+ };
+
+ /* Channels reversed for both headphones and speakers. */
+ sound {
+ compatible = "simple-audio-card";
+ pinctrl-0 = <&hp_det>;
+ pinctrl-names = "default";
+ simple-audio-card,name = "rk817_ext";
+ simple-audio-card,aux-devs = <&spk_amp>;
+ simple-audio-card,format = "i2s";
+ simple-audio-card,hp-det-gpio = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>;
+ simple-audio-card,mclk-fs = <256>;
+ simple-audio-card,widgets =
+ "Microphone", "Mic Jack",
+ "Headphone", "Headphones",
+ "Speaker", "Internal Speakers";
+ simple-audio-card,routing =
+ "MICL", "Mic Jack",
+ "Headphones", "HPOL",
+ "Headphones", "HPOR",
+ "Internal Speakers", "Speaker Amp OUTL",
+ "Internal Speakers", "Speaker Amp OUTR",
+ "Speaker Amp INL", "HPOL",
+ "Speaker Amp INR", "HPOR";
+ simple-audio-card,pin-switches = "Internal Speakers";
+
+ simple-audio-card,codec {
+ sound-dai = <&rk817>;
+ };
+
+ simple-audio-card,cpu {
+ sound-dai = <&i2s1_8ch>;
+ };
+ };
+
+ spk_amp: audio-amplifier {
+ compatible = "simple-audio-amplifier";
+ enable-gpios = <&gpio4 RK_PC2 GPIO_ACTIVE_HIGH>;
+ pinctrl-0 = <&spk_amp_enable_h>;
+ pinctrl-names = "default";
+ sound-name-prefix = "Speaker Amp";
+ };
+};
+
+&cru {
+ assigned-clocks = <&pmucru CLK_RTC_32K>, <&cru PLL_GPLL>,
+ <&pmucru PLL_PPLL>, <&cru PLL_VPLL>;
+ assigned-clock-rates = <32768>, <1200000000>,
+ <200000000>, <128000000>;
+};
+
+&dsi_dphy0 {
+ status = "okay";
+};
+
+&dsi0 {
+ status = "okay";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ports {
+ dsi0_in: port@0 {
+ reg = <0>;
+ dsi0_in_vp1: endpoint {
+ remote-endpoint = <&vp1_out_dsi0>;
+ };
+ };
+
+ dsi0_out: port@1 {
+ reg = <1>;
+ mipi_out_panel: endpoint {
+ remote-endpoint = <&mipi_in_panel>;
+ };
+ };
+ };
+
+ panel: panel@0 {
+ compatible = "anbernic,rg-arc-panel", "sitronix,st7701";
+ reg = <0>;
+ backlight = <&backlight>;
+ IOVCC-supply = <&vcc3v3_lcd0_n>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&lcd_rst>;
+ reset-gpios = <&gpio4 RK_PA0 GPIO_ACTIVE_HIGH>;
+ rotation = <90>;
+ VCC-supply = <&vcc3v3_lcd0_n>;
+
+ port {
+ mipi_in_panel: endpoint {
+ remote-endpoint = <&mipi_out_panel>;
+ };
+ };
+ };
+};
+
+/*
+ * Device uses a non-standard six button layout for a gamepad with X,
+ * Y, and Z on the top row of buttons and A, B, and C under the bottom
+ * row.
+ */
+&gpio_keys_control {
+ button-a {
+ gpios = <&gpio3 RK_PC3 GPIO_ACTIVE_LOW>;
+ label = "A";
+ linux,code = <BTN_A>;
+ };
+
+ button-b {
+ gpios = <&gpio3 RK_PC2 GPIO_ACTIVE_LOW>;
+ label = "B";
+ linux,code = <BTN_B>;
+ };
+
+ button-c {
+ gpios = <&gpio3 RK_PA2 GPIO_ACTIVE_LOW>;
+ label = "C";
+ linux,code = <BTN_C>;
+ };
+
+ button-left {
+ gpios = <&gpio3 RK_PA6 GPIO_ACTIVE_LOW>;
+ label = "DPAD-LEFT";
+ linux,code = <BTN_DPAD_LEFT>;
+ };
+
+ button-r1 {
+ gpios = <&gpio3 RK_PB4 GPIO_ACTIVE_LOW>;
+ label = "TR";
+ linux,code = <BTN_TR>;
+ };
+
+ button-r2 {
+ gpios = <&gpio3 RK_PB3 GPIO_ACTIVE_LOW>;
+ label = "TR2";
+ linux,code = <BTN_TR2>;
+ };
+
+ button-right {
+ gpios = <&gpio3 RK_PA5 GPIO_ACTIVE_LOW>;
+ label = "DPAD-RIGHT";
+ linux,code = <BTN_DPAD_RIGHT>;
+ };
+
+ button-x {
+ gpios = <&gpio3 RK_PC0 GPIO_ACTIVE_LOW>;
+ label = "X";
+ linux,code = <BTN_X>;
+ };
+
+ button-y {
+ gpios = <&gpio3 RK_PC1 GPIO_ACTIVE_LOW>;
+ label = "Y";
+ linux,code = <BTN_Y>;
+ };
+
+ button-z {
+ gpios = <&gpio3 RK_PA1 GPIO_ACTIVE_LOW>;
+ label = "Z";
+ linux,code = <BTN_Z>;
+ };
+};
+
+&pinctrl {
+ audio-amplifier {
+ spk_amp_enable_h: spk-amp-enable-h {
+ rockchip,pins =
+ <4 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ gpio-lcd {
+ lcd_rst: lcd-rst {
+ rockchip,pins =
+ <4 RK_PA0 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ headphone {
+ hp_det: hp-det {
+ rockchip,pins =
+ <4 RK_PC6 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+};
+
+&pwm4 {
+ status = "okay";
+};
+
+&rk817 {
+ rk817_charger: charger {
+ monitored-battery = <&battery>;
+ rockchip,resistor-sense-micro-ohms = <10000>;
+ rockchip,sleep-enter-current-microamp = <300000>;
+ rockchip,sleep-filter-current-microamp = <100000>;
+ };
+};
+
+&vp1 {
+ vp1_out_dsi0: endpoint@ROCKCHIP_VOP2_EP_MIPI0 {
+ reg = <ROCKCHIP_VOP2_EP_MIPI0>;
+ remote-endpoint = <&dsi0_in_vp1>;
+ };
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353x.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353x.dtsi
index 2a2821f4c580..63a18ff36cea 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353x.dtsi
@@ -8,11 +8,73 @@
#include "rk3566-anbernic-rgxx3.dtsi"
/ {
+ adc-joystick {
+ compatible = "adc-joystick";
+ io-channels = <&adc_mux 0>,
+ <&adc_mux 1>,
+ <&adc_mux 2>,
+ <&adc_mux 3>;
+ pinctrl-0 = <&joy_mux_en>;
+ pinctrl-names = "default";
+ poll-interval = <60>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ axis@0 {
+ reg = <0>;
+ abs-flat = <32>;
+ abs-fuzz = <32>;
+ abs-range = <1023 15>;
+ linux,code = <ABS_X>;
+ };
+
+ axis@1 {
+ reg = <1>;
+ abs-flat = <32>;
+ abs-fuzz = <32>;
+ abs-range = <15 1023>;
+ linux,code = <ABS_RX>;
+ };
+
+ axis@2 {
+ reg = <2>;
+ abs-flat = <32>;
+ abs-fuzz = <32>;
+ abs-range = <15 1023>;
+ linux,code = <ABS_Y>;
+ };
+
+ axis@3 {
+ reg = <3>;
+ abs-flat = <32>;
+ abs-fuzz = <32>;
+ abs-range = <1023 15>;
+ linux,code = <ABS_RY>;
+ };
+ };
+
+ adc_mux: adc-mux {
+ compatible = "io-channel-mux";
+ channels = "left_x", "right_x", "left_y", "right_y";
+ #io-channel-cells = <1>;
+ io-channels = <&saradc 3>;
+ io-channel-names = "parent";
+ mux-controls = <&gpio_mux>;
+ settle-time-us = <100>;
+ };
+
backlight: backlight {
compatible = "pwm-backlight";
power-supply = <&vcc_sys>;
pwms = <&pwm4 0 25000 0>;
};
+
+ gpio_mux: mux-controller {
+ compatible = "gpio-mux";
+ mux-gpios = <&gpio0 RK_PB6 GPIO_ACTIVE_LOW>,
+ <&gpio0 RK_PB7 GPIO_ACTIVE_LOW>;
+ #mux-control-cells = <0>;
+ };
};
&cru {
@@ -83,6 +145,18 @@
linux,code = <BTN_DPAD_RIGHT>;
};
+ button-thumbl {
+ gpios = <&gpio3 RK_PA1 GPIO_ACTIVE_LOW>;
+ label = "THUMBL";
+ linux,code = <BTN_THUMBL>;
+ };
+
+ button-thumbr {
+ gpios = <&gpio3 RK_PA2 GPIO_ACTIVE_LOW>;
+ label = "THUMBR";
+ linux,code = <BTN_THUMBR>;
+ };
+
button-y {
gpios = <&gpio3 RK_PC1 GPIO_ACTIVE_LOW>;
label = "WEST";
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg503.dts b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg503.dts
index c763c7f3b1b3..94e6dd61a2db 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg503.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg503.dts
@@ -17,6 +17,61 @@
mmc2 = &sdmmc2;
};
+ adc-joystick {
+ compatible = "adc-joystick";
+ io-channels = <&adc_mux 0>,
+ <&adc_mux 1>,
+ <&adc_mux 2>,
+ <&adc_mux 3>;
+ pinctrl-0 = <&joy_mux_en>;
+ pinctrl-names = "default";
+ poll-interval = <60>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ axis@0 {
+ reg = <0>;
+ abs-flat = <32>;
+ abs-fuzz = <32>;
+ abs-range = <1023 15>;
+ linux,code = <ABS_X>;
+ };
+
+ axis@1 {
+ reg = <1>;
+ abs-flat = <32>;
+ abs-fuzz = <32>;
+ abs-range = <15 1023>;
+ linux,code = <ABS_RX>;
+ };
+
+ axis@2 {
+ reg = <2>;
+ abs-flat = <32>;
+ abs-fuzz = <32>;
+ abs-range = <15 1023>;
+ linux,code = <ABS_Y>;
+ };
+
+ axis@3 {
+ reg = <3>;
+ abs-flat = <32>;
+ abs-fuzz = <32>;
+ abs-range = <1023 15>;
+ linux,code = <ABS_RY>;
+ };
+ };
+
+ adc_mux: adc-mux {
+ compatible = "io-channel-mux";
+ channels = "left_x", "right_x", "left_y", "right_y";
+ #io-channel-cells = <1>;
+ io-channels = <&saradc 3>;
+ io-channel-names = "parent";
+ mux-controls = <&gpio_mux>;
+ settle-time-us = <100>;
+ };
+
battery: battery {
compatible = "simple-battery";
charge-full-design-microamp-hours = <3472000>;
@@ -36,6 +91,13 @@
<3400000 0>;
};
+ gpio_mux: mux-controller {
+ compatible = "gpio-mux";
+ mux-gpios = <&gpio0 RK_PB6 GPIO_ACTIVE_LOW>,
+ <&gpio0 RK_PB7 GPIO_ACTIVE_LOW>;
+ #mux-control-cells = <0>;
+ };
+
gpio_spi: spi {
compatible = "spi-gpio";
pinctrl-names = "default";
@@ -174,6 +236,18 @@
linux,code = <BTN_DPAD_RIGHT>;
};
+ button-thumbl {
+ gpios = <&gpio3 RK_PA1 GPIO_ACTIVE_LOW>;
+ label = "THUMBL";
+ linux,code = <BTN_THUMBL>;
+ };
+
+ button-thumbr {
+ gpios = <&gpio3 RK_PA2 GPIO_ACTIVE_LOW>;
+ label = "THUMBR";
+ linux,code = <BTN_THUMBR>;
+ };
+
button-y {
gpios = <&gpio3 RK_PC2 GPIO_ACTIVE_LOW>;
label = "WEST";
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi
index 8cbf3d9a4f22..18b8c2e7befa 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi
@@ -14,51 +14,6 @@
stdout-path = "serial2:1500000n8";
};
- adc-joystick {
- compatible = "adc-joystick";
- io-channels = <&adc_mux 0>,
- <&adc_mux 1>,
- <&adc_mux 2>,
- <&adc_mux 3>;
- pinctrl-0 = <&joy_mux_en>;
- pinctrl-names = "default";
- poll-interval = <60>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- axis@0 {
- reg = <0>;
- abs-flat = <32>;
- abs-fuzz = <32>;
- abs-range = <1023 15>;
- linux,code = <ABS_X>;
- };
-
- axis@1 {
- reg = <1>;
- abs-flat = <32>;
- abs-fuzz = <32>;
- abs-range = <15 1023>;
- linux,code = <ABS_RX>;
- };
-
- axis@2 {
- reg = <2>;
- abs-flat = <32>;
- abs-fuzz = <32>;
- abs-range = <15 1023>;
- linux,code = <ABS_Y>;
- };
-
- axis@3 {
- reg = <3>;
- abs-flat = <32>;
- abs-fuzz = <32>;
- abs-range = <1023 15>;
- linux,code = <ABS_RY>;
- };
- };
-
adc_keys: adc-keys {
compatible = "adc-keys";
io-channels = <&saradc 0>;
@@ -77,16 +32,6 @@
};
};
- adc_mux: adc-mux {
- compatible = "io-channel-mux";
- channels = "left_x", "right_x", "left_y", "right_y";
- #io-channel-cells = <1>;
- io-channels = <&saradc 3>;
- io-channel-names = "parent";
- mux-controls = <&gpio_mux>;
- settle-time-us = <100>;
- };
-
gpio_keys_control: gpio-keys-control {
compatible = "gpio-keys";
pinctrl-0 = <&btn_pins_ctrl>;
@@ -128,18 +73,6 @@
linux,code = <BTN_START>;
};
- button-thumbl {
- gpios = <&gpio3 RK_PA1 GPIO_ACTIVE_LOW>;
- label = "THUMBL";
- linux,code = <BTN_THUMBL>;
- };
-
- button-thumbr {
- gpios = <&gpio3 RK_PA2 GPIO_ACTIVE_LOW>;
- label = "THUMBR";
- linux,code = <BTN_THUMBR>;
- };
-
button-up {
gpios = <&gpio3 RK_PA3 GPIO_ACTIVE_LOW>;
label = "DPAD-UP";
@@ -172,13 +105,6 @@
};
};
- gpio_mux: mux-controller {
- compatible = "gpio-mux";
- mux-gpios = <&gpio0 RK_PB6 GPIO_ACTIVE_LOW>,
- <&gpio0 RK_PB7 GPIO_ACTIVE_LOW>;
- #mux-control-cells = <0>;
- };
-
hdmi-con {
compatible = "hdmi-connector";
ddc-i2c-bus = <&i2c5>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinetab2-v0.1.dts b/arch/arm64/boot/dts/rockchip/rk3566-pinetab2-v0.1.dts
new file mode 100644
index 000000000000..5fe6ca5da9d3
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3566-pinetab2-v0.1.dts
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+/dts-v1/;
+
+#include "rk3566-pinetab2.dtsi"
+
+/ {
+ model = "Pine64 PineTab2 v0.1";
+ compatible = "pine64,pinetab2-v0.1", "pine64,pinetab2", "rockchip,rk3566";
+};
+
+&lcd {
+ reset-gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&lcd_pwren_h &lcd0_rst_l>;
+};
+
+&pinctrl {
+ lcd0 {
+ lcd0_rst_l: lcd0-rst-l {
+ rockchip,pins = <0 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+};
+
+&sdmmc1 {
+ vmmc-supply = <&vcc3v3_sys>;
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinetab2-v2.0.dts b/arch/arm64/boot/dts/rockchip/rk3566-pinetab2-v2.0.dts
new file mode 100644
index 000000000000..9349541cbbd0
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3566-pinetab2-v2.0.dts
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+/dts-v1/;
+
+#include "rk3566-pinetab2.dtsi"
+
+/ {
+ model = "Pine64 PineTab2 v2.0";
+ compatible = "pine64,pinetab2-v2.0", "pine64,pinetab2", "rockchip,rk3566";
+};
+
+&gpio_keys {
+ pinctrl-0 = <&kb_id_det>, <&hall_int_l>;
+
+ event-hall-sensor {
+ debounce-interval = <20>;
+ gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_LOW>;
+ label = "Hall Sensor";
+ linux,code = <SW_LID>;
+ linux,input-type = <EV_SW>;
+ wakeup-event-action = <EV_ACT_DEASSERTED>;
+ wakeup-source;
+ };
+};
+
+&lcd {
+ reset-gpios = <&gpio0 RK_PC6 GPIO_ACTIVE_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&lcd_pwren_h &lcd0_rst_l>;
+};
+
+&pinctrl {
+ lcd0 {
+ lcd0_rst_l: lcd0-rst-l {
+ rockchip,pins = <0 RK_PC6 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ hall {
+ hall_int_l: hall-int-l {
+ rockchip,pins = <0 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+};
+
+&sdmmc1 {
+ vmmc-supply = <&vcc_sys>;
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi
new file mode 100644
index 000000000000..db40281eafbe
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi
@@ -0,0 +1,943 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/gpio-keys.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/pinctrl/rockchip.h>
+#include <dt-bindings/soc/rockchip,vop2.h>
+#include <dt-bindings/usb/pd.h>
+#include "rk3566.dtsi"
+
+/ {
+ chassis-type = "tablet";
+
+ aliases {
+ mmc0 = &sdhci;
+ mmc1 = &sdmmc0;
+ };
+
+ chosen {
+ stdout-path = "serial2:1500000n8";
+ };
+
+ adc-keys {
+ compatible = "adc-keys";
+ io-channels = <&saradc 0>;
+ io-channel-names = "buttons";
+ keyup-threshold-microvolt = <1800000>;
+ poll-interval = <25>;
+
+ button-vol-up {
+ label = "Volume Up";
+ linux,code = <KEY_VOLUMEUP>;
+ press-threshold-microvolt = <297500>;
+ };
+
+ button-vol-down {
+ label = "Volume Down";
+ linux,code = <KEY_VOLUMEDOWN>;
+ press-threshold-microvolt = <1750>;
+ };
+ };
+
+ backlight: backlight {
+ compatible = "pwm-backlight";
+ pwms = <&pwm4 0 25000 0>;
+ brightness-levels = <20 220>;
+ num-interpolated-steps = <200>;
+ default-brightness-level = <100>;
+ power-supply = <&vcc_sys>;
+ };
+
+ battery: battery {
+ compatible = "simple-battery";
+ charge-full-design-microamp-hours = <6000000>;
+ charge-term-current-microamp = <300000>;
+ constant-charge-current-max-microamp = <2000000>;
+ constant-charge-voltage-max-microvolt = <4300000>;
+ voltage-max-design-microvolt = <4350000>;
+ voltage-min-design-microvolt = <3400000>;
+
+ ocv-capacity-celsius = <20>;
+ ocv-capacity-table-0 = <4322000 100>, <4250000 95>, <4192000 90>, <4136000 85>,
+ <4080000 80>, <4022000 75>, <3972000 70>, <3928000 65>,
+ <3885000 60>, <3833000 55>, <3798000 50>, <3780000 45>,
+ <3776000 40>, <3773000 35>, <3755000 30>, <3706000 25>,
+ <3640000 20>, <3589000 15>, <3535000 10>, <3492000 5>,
+ <3400000 0>;
+ };
+
+ gpio_keys: gpio-keys {
+ compatible = "gpio-keys";
+ pinctrl-names = "default";
+ pinctrl-0 = <&kb_id_det>;
+
+ tablet-mode-switch {
+ debounce-interval = <20>;
+ gpios = <&gpio4 RK_PA4 GPIO_ACTIVE_HIGH>;
+ label = "Tablet Mode";
+ linux,input-type = <EV_SW>;
+ linux,code = <SW_TABLET_MODE>;
+ };
+ };
+
+ hdmi-connector {
+ compatible = "hdmi-connector";
+ type = "d";
+
+ port {
+ hdmi_con_in: endpoint {
+ remote-endpoint = <&hdmi_out_con>;
+ };
+ };
+ };
+
+ led-0 {
+ compatible = "regulator-led";
+ vled-supply = <&vcc5v0_flashled>;
+ color = <LED_COLOR_ID_WHITE>;
+ function = LED_FUNCTION_FLASH;
+ };
+
+ rk817-sound {
+ compatible = "simple-audio-card";
+ pinctrl-names = "default";
+ pinctrl-0 = <&hp_det_l>;
+ simple-audio-card,format = "i2s";
+ simple-audio-card,name = "rk817_ext";
+ simple-audio-card,mclk-fs = <256>;
+
+ simple-audio-card,widgets =
+ "Microphone", "Mic Jack",
+ "Headphone", "Headphones",
+ "Speaker", "Internal Speakers";
+
+ simple-audio-card,routing =
+ "MICR", "Mic Jack",
+ "Headphones", "HPOL",
+ "Headphones", "HPOR",
+ "Internal Speakers", "Speaker Amplifier OUTL",
+ "Internal Speakers", "Speaker Amplifier OUTR",
+ "Speaker Amplifier INL", "HPOL",
+ "Speaker Amplifier INR", "HPOR";
+ simple-audio-card,hp-det-gpio = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>;
+ simple-audio-card,aux-devs = <&speaker_amp>;
+ simple-audio-card,pin-switches = "Internal Speakers";
+
+ simple-audio-card,cpu {
+ sound-dai = <&i2s1_8ch>;
+ };
+
+ simple-audio-card,codec {
+ sound-dai = <&rk817>;
+ };
+ };
+
+ speaker_amp: speaker-amplifier {
+ compatible = "simple-audio-amplifier";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spk_ctl>;
+ enable-gpios = <&gpio4 RK_PC2 GPIO_ACTIVE_HIGH>;
+ sound-name-prefix = "Speaker Amplifier";
+ VCC-supply = <&vcc_bat>;
+ };
+
+ vcc_3v3: vcc-3v3-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_3v3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vcc3v3_sys>;
+ };
+
+ vcc3v3_minipcie: vcc3v3-minipcie-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio4 RK_PC3 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie_pwren_h>;
+ regulator-name = "vcc3v3_minipcie";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vcc_sys>;
+ };
+
+ vcc3v3_sd: vcc3v3-sd-regulator {
+ compatible = "regulator-fixed";
+ gpio = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdmmc_pwren_l>;
+ regulator-name = "vcc3v3_sd";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vcc3v3_sys>;
+ };
+
+ vcc5v0_flashled: vcc5v0-flashled-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio4 RK_PA5 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&flash_led_en_h>;
+ regulator-name = "vcc5v0_flashled";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v_midu>;
+ };
+
+ vcc5v0_usb_host0: vcc5v0-usb-host0-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio4 RK_PC4 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&usb_host_pwren1_h>;
+ regulator-name = "vcc5v0_usb_host0";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v_midu>;
+ };
+
+ vcc5v0_usb_host2: vcc5v0-usb-host2-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio4 RK_PC5 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&usb_host_pwren2_h>;
+ regulator-name = "vcc5v0_usb_host2";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v_midu>;
+ };
+
+ vcc_bat: vcc-bat-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_bat";
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ vcc_sys: vcc-sys-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_sys";
+ regulator-always-on;
+ regulator-boot-on;
+ vin-supply = <&vcc_bat>;
+ };
+
+ vdd1v2_dvp: vdd1v2-dvp-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vdd1v2_dvp";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ vin-supply = <&vcc_3v3>;
+ };
+};
+
+&combphy1 {
+ status = "okay";
+};
+
+&combphy2 {
+ status = "okay";
+};
+
+&cpu0 {
+ cpu-supply = <&vdd_cpu>;
+};
+
+&cpu1 {
+ cpu-supply = <&vdd_cpu>;
+};
+
+&cpu2 {
+ cpu-supply = <&vdd_cpu>;
+};
+
+&cpu3 {
+ cpu-supply = <&vdd_cpu>;
+};
+
+&cru {
+ assigned-clocks = <&pmucru CLK_RTC_32K>, <&cru PLL_GPLL>,
+ <&pmucru PLL_PPLL>, <&cru PLL_VPLL>;
+ assigned-clock-rates = <32768>, <1200000000>, <200000000>, <500000000>;
+ assigned-clock-parents = <&pmucru CLK_RTC32K_FRAC>;
+};
+
+&csi_dphy {
+ status = "okay";
+};
+
+&dsi0 {
+ status = "okay";
+ clock-master;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ lcd: panel@0 {
+ compatible = "boe,th101mb31ig002-28a";
+ reg = <0>;
+ backlight = <&backlight>;
+ enable-gpios = <&gpio0 RK_PC7 GPIO_ACTIVE_HIGH>;
+ rotation = <90>;
+ power-supply = <&vcc_3v3>;
+
+ port@0 {
+ panel_in_dsi: endpoint@0 {
+ remote-endpoint = <&dsi0_out_con>;
+ };
+ };
+ };
+};
+
+&dsi0_in {
+ dsi0_in_vp1: endpoint {
+ remote-endpoint = <&vp1_out_dsi0>;
+ };
+};
+
+&dsi0_out {
+ dsi0_out_con: endpoint {
+ remote-endpoint = <&panel_in_dsi>;
+ };
+};
+
+&dsi_dphy0 {
+ status = "okay";
+};
+
+&gpu {
+ mali-supply = <&vdd_gpu_npu>;
+ status = "okay";
+};
+
+&hdmi {
+ avdd-0v9-supply = <&vdda_0v9_p>;
+ avdd-1v8-supply = <&vcc_1v8>;
+ status = "okay";
+};
+
+&hdmi_in {
+ hdmi_in_vp0: endpoint {
+ remote-endpoint = <&vp0_out_hdmi>;
+ };
+};
+
+&hdmi_out {
+ hdmi_out_con: endpoint {
+ remote-endpoint = <&hdmi_con_in>;
+ };
+};
+
+&hdmi_sound {
+ status = "okay";
+};
+
+&i2c0 {
+ clock-frequency = <400000>;
+ status = "okay";
+
+ vdd_cpu: regulator@1c {
+ compatible = "tcs,tcs4525";
+ reg = <0x1c>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_cpu";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1150000>;
+ regulator-ramp-delay = <2300>;
+ regulator-always-on;
+ regulator-boot-on;
+ vin-supply = <&vcc_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ rk817: pmic@20 {
+ compatible = "rockchip,rk817";
+ reg = <0x20>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <RK_PA3 IRQ_TYPE_LEVEL_LOW>;
+ assigned-clocks = <&cru I2S1_MCLKOUT_TX>;
+ assigned-clock-parents = <&cru CLK_I2S1_8CH_TX>;
+ clock-names = "mclk";
+ clocks = <&cru I2S1_MCLKOUT_TX>;
+ clock-output-names = "rk808-clkout1", "rk808-clkout2";
+ #clock-cells = <1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pmic_int_l>, <&i2s1m0_mclk>;
+ rockchip,system-power-controller;
+ #sound-dai-cells = <0>;
+ wakeup-source;
+
+ vcc1-supply = <&vcc_sys>;
+ vcc2-supply = <&vcc_sys>;
+ vcc3-supply = <&vcc_sys>;
+ vcc4-supply = <&vcc_sys>;
+ vcc5-supply = <&vcc_sys>;
+ vcc6-supply = <&vcc_sys>;
+ vcc7-supply = <&vcc_sys>;
+ vcc8-supply = <&vcc_sys>;
+ vcc9-supply = <&vcc5v_midu>;
+
+ regulators {
+ vdd_logic: DCDC_REG1 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-ramp-delay = <6001>;
+ regulator-initial-mode = <0x2>;
+ regulator-name = "vdd_logic";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_gpu_npu: DCDC_REG2 {
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-ramp-delay = <6001>;
+ regulator-initial-mode = <0x2>;
+ regulator-name = "vdd_gpu_npu";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_ddr: DCDC_REG3 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-initial-mode = <0x2>;
+ regulator-name = "vcc_ddr";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ vcc3v3_sys: DCDC_REG4 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-initial-mode = <0x2>;
+ regulator-name = "vcc3v3_sys";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcca1v8_pmu: LDO_REG1 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "vcca1v8_pmu";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ vdda_0v9_p: LDO_REG2 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <900000>;
+ regulator-name = "vdda_0v9_p";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdda0v9_pmu: LDO_REG3 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <900000>;
+ regulator-name = "vdda0v9_pmu";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ vccio_acodec: LDO_REG4 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vccio_acodec";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vccio_sd: LDO_REG5 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vccio_sd";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc3v3_pmu: LDO_REG6 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc3v3_pmu";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ vcc_1v8: LDO_REG7 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "vcc_1v8";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc1v8_dvp: LDO_REG8 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "vcc1v8_dvp";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc2v8_dvp: LDO_REG9 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-name = "vcc2v8_dvp";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc5v_midu: BOOST {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-name = "boost";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vbus: OTG_SWITCH {
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-name = "otg_switch";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+ };
+
+ charger {
+ monitored-battery = <&battery>;
+ rockchip,resistor-sense-micro-ohms = <10000>;
+ rockchip,sleep-enter-current-microamp = <300000>;
+ rockchip,sleep-filter-current-microamp = <100000>;
+ };
+ };
+};
+
+&i2c1 {
+ clock-frequency = <400000>;
+ status = "okay";
+
+ touchscreen@5d {
+ compatible = "goodix,gt911";
+ reg = <0x5d>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <RK_PB0 IRQ_TYPE_EDGE_FALLING>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&tp_int_l_pmuio2>, <&tp_rst_l_pmuio2>;
+ AVDD28-supply = <&vcc3v3_pmu>;
+ VDDIO-supply = <&vcca1v8_pmu>;
+ irq-gpios = <&gpio0 RK_PB0 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpio0 RK_PC2 GPIO_ACTIVE_HIGH>;
+ };
+};
+
+&i2c2 {
+ clock-frequency = <400000>;
+ pinctrl-0 = <&i2c2m1_xfer>;
+ status = "okay";
+
+ vcm@c {
+ compatible = "dongwoon,dw9714";
+ reg = <0x0c>;
+ vcc-supply = <&vcc1v8_dvp>;
+ };
+
+ camera@36 {
+ compatible = "ovti,ov5648";
+ reg = <0x36>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&camerab_pdn_l &camerab_rst_l>;
+
+ clocks = <&cru CLK_CIF_OUT>;
+ assigned-clocks = <&cru CLK_CIF_OUT>;
+ assigned-clock-rates = <24000000>;
+
+ avdd-supply = <&vcc2v8_dvp>;
+ dvdd-supply = <&vdd1v2_dvp>;
+ dovdd-supply = <&vcc1v8_dvp>;
+ powerdown-gpios = <&gpio4 RK_PB3 GPIO_ACTIVE_LOW>;
+ reset-gpios = <&gpio4 RK_PB1 GPIO_ACTIVE_LOW>;
+
+ port {
+ endpoint {
+ data-lanes = <1 2>;
+ remote-endpoint = <0>;
+ link-frequencies = /bits/ 64 <210000000 168000000>;
+ };
+ };
+ };
+};
+
+&i2c5 {
+ clock-frequency = <400000>;
+ status = "okay";
+
+ accelerometer@18 {
+ compatible = "silan,sc7a20";
+ reg = <0x18>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <RK_PA2 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&gsensor_int_l>;
+ st,drdy-int-pin = <1>;
+ vdd-supply = <&vcc_1v8>;
+ vddio-supply = <&vcc_1v8>;
+ mount-matrix = "1", "0", "0",
+ "0", "0", "1",
+ "0", "1", "0";
+ };
+};
+
+&i2s0_8ch {
+ status = "okay";
+};
+
+&i2s1_8ch {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2s1m0_sclktx
+ &i2s1m0_lrcktx
+ &i2s1m0_sdi0
+ &i2s1m0_sdo0>;
+ rockchip,trcm-sync-tx-only;
+ status = "okay";
+};
+
+&pcie2x1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie_reset_h>;
+ reset-gpios = <&gpio1 RK_PB2 GPIO_ACTIVE_HIGH>;
+ vpcie3v3-supply = <&vcc3v3_minipcie>;
+ status = "okay";
+};
+
+&pinctrl {
+ camerab {
+ camerab_pdn_l: camerab-pdn-l {
+ rockchip,pins = <4 RK_PB3 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ camerab_rst_l: camerab-rst-l {
+ rockchip,pins = <4 RK_PB1 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ cameraf {
+ cameraf_pdn_l: cameraf-pdn-l {
+ rockchip,pins = <4 RK_PB2 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ cameraf_rst_l: cameraf-rst-l {
+ rockchip,pins = <4 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ flash {
+ flash_led_en_h: flash-led-en-h {
+ rockchip,pins = <4 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ fspi {
+ fspi_dual_io_pins: fspi-dual-io-pins {
+ rockchip,pins =
+ /* fspi_clk */
+ <1 RK_PD0 1 &pcfg_pull_none>,
+ /* fspi_cs0n */
+ <1 RK_PD3 1 &pcfg_pull_none>,
+ /* fspi_d0 */
+ <1 RK_PD1 1 &pcfg_pull_none>,
+ /* fspi_d1 */
+ <1 RK_PD2 1 &pcfg_pull_none>;
+ };
+ };
+
+ gsensor {
+ gsensor_int_l: gsensor-int-l {
+ rockchip,pins = <3 RK_PA2 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+
+ kb {
+ kb_id_det: kb-id-det {
+ rockchip,pins = <4 RK_PA4 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ lcd {
+ lcd_pwren_h: lcd-pwren-h {
+ rockchip,pins = <0 RK_PC7 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ pcie {
+ pcie_pwren_h: pcie-pwren-h {
+ rockchip,pins = <4 RK_PC3 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ pcie_reset_h: pcie-reset-h {
+ rockchip,pins = <1 RK_PB2 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ pmic {
+ pmic_int_l: pmic-int-l {
+ rockchip,pins = <0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+
+ sdmmc {
+ sdmmc_pwren_l: sdmmc-pwren-l {
+ rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ sound {
+ hp_det_l: hp-det-l {
+ rockchip,pins = <4 RK_PC6 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ spk_ctl: spk-ctl {
+ rockchip,pins = <4 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ tp {
+ tp_int_l_pmuio2: tp-int-l-pmuio2 {
+ rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+
+ tp_rst_l_pmuio2: tp-rst-l-pmuio2 {
+ rockchip,pins = <0 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ usb {
+ usbcc_int_l: usbcc-int-l {
+ rockchip,pins = <0 RK_PC5 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ usb_host_pwren1_h: usb-host-pwren1-h {
+ rockchip,pins = <4 RK_PC4 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ usb_host_pwren2_h: usb-host-pwren2-h {
+ rockchip,pins = <4 RK_PC5 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ wifi {
+ host_wake_wl: host-wake-wl {
+ rockchip,pins = <0 RK_PB7 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ wifi_wake_host_h: wifi-wake-host-h {
+ rockchip,pins = <0 RK_PC4 RK_FUNC_GPIO &pcfg_pull_down>;
+ };
+ };
+};
+
+&pmu_io_domains {
+ pmuio1-supply = <&vcc3v3_pmu>;
+ pmuio2-supply = <&vcca1v8_pmu>;
+ vccio1-supply = <&vccio_acodec>;
+ vccio2-supply = <&vcc_1v8>;
+ vccio3-supply = <&vccio_sd>;
+ vccio4-supply = <&vcc_1v8>;
+ vccio5-supply = <&vcc_1v8>;
+ vccio6-supply = <&vcc1v8_dvp>;
+ vccio7-supply = <&vcc_3v3>;
+ status = "okay";
+};
+
+&pwm4 {
+ status = "okay";
+};
+
+&saradc {
+ vref-supply = <&vcc_1v8>;
+ status = "okay";
+};
+
+&sdhci {
+ bus-width = <8>;
+ no-sdio;
+ no-sd;
+ non-removable;
+ max-frequency = <200000000>;
+ mmc-hs200-1_8v;
+ pinctrl-names = "default";
+ pinctrl-0 = <&emmc_bus8
+ &emmc_clk
+ &emmc_cmd
+ &emmc_datastrobe
+ &emmc_rstnout>;
+ vmmc-supply = <&vcc_3v3>;
+ vqmmc-supply = <&vcc_1v8>;
+ status = "okay";
+};
+
+&sdmmc0 {
+ bus-width = <4>;
+ cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
+ disable-wp;
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdmmc0_bus4
+ &sdmmc0_clk
+ &sdmmc0_cmd
+ &sdmmc0_det>;
+ sd-uhs-sdr104;
+ vmmc-supply = <&vcc3v3_sd>;
+ vqmmc-supply = <&vccio_sd>;
+ status = "okay";
+};
+
+&sdmmc1 {
+ bus-width = <4>;
+ cap-sd-highspeed;
+ cap-sdio-irq;
+ keep-power-in-suspend;
+ non-removable;
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdmmc1_bus4
+ &sdmmc1_cmd
+ &sdmmc1_clk>;
+ sd-uhs-sdr104;
+ vqmmc-supply = <&vcca1v8_pmu>;
+ status = "okay";
+};
+
+&sfc {
+ pinctrl-names = "default";
+ pinctrl-0 = <&fspi_dual_io_pins>;
+ status = "okay";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <100000000>;
+ spi-rx-bus-width = <2>;
+ spi-tx-bus-width = <1>;
+ };
+};
+
+&tsadc {
+ rockchip,hw-tshut-mode = <1>;
+ rockchip,hw-tshut-polarity = <0>;
+ status = "okay";
+};
+
+&uart2 {
+ status = "okay";
+};
+
+&usb_host0_ehci {
+ status = "okay";
+};
+
+&usb_host0_ohci {
+ status = "okay";
+};
+
+&usb_host0_xhci {
+ status = "okay";
+};
+
+&usb_host1_xhci {
+ status = "okay";
+};
+
+&usb2phy0 {
+ status = "okay";
+};
+
+&usb2phy0_host {
+ phy-supply = <&vcc5v0_usb_host0>;
+ status = "okay";
+};
+
+&usb2phy0_otg {
+ status = "okay";
+};
+
+&usb2phy1 {
+ status = "okay";
+};
+
+&usb2phy1_otg {
+ phy-supply = <&vcc5v0_usb_host2>;
+ status = "okay";
+};
+
+&vop {
+ assigned-clocks = <&cru DCLK_VOP0>, <&cru DCLK_VOP1>;
+ assigned-clock-parents = <&pmucru PLL_HPLL>, <&cru PLL_VPLL>;
+ status = "okay";
+};
+
+&vop_mmu {
+ status = "okay";
+};
+
+&vp0 {
+ vp0_out_hdmi: endpoint@ROCKCHIP_VOP2_EP_HDMI0 {
+ reg = <ROCKCHIP_VOP2_EP_HDMI0>;
+ remote-endpoint = <&hdmi_in_vp0>;
+ };
+};
+
+&vp1 {
+ vp1_out_dsi0: endpoint@ROCKCHIP_VOP2_EP_MIPI0 {
+ reg = <ROCKCHIP_VOP2_EP_MIPI0>;
+ remote-endpoint = <&dsi0_in_vp1>;
+ };
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rgb10max3.dts b/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rgb10max3.dts
new file mode 100644
index 000000000000..e5a474e681dd
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rgb10max3.dts
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/linux-event-codes.h>
+#include <dt-bindings/pinctrl/rockchip.h>
+#include "rk3566-powkiddy-rk2023.dtsi"
+
+/ {
+ model = "Powkiddy RGB10MAX3";
+ compatible = "powkiddy,rgb10max3", "rockchip,rk3566";
+};
+
+&bluetooth {
+ compatible = "realtek,rtl8723ds-bt";
+};
+
+&cru {
+ assigned-clocks = <&pmucru CLK_RTC_32K>, <&cru PLL_GPLL>,
+ <&pmucru PLL_PPLL>, <&cru PLL_VPLL>;
+ assigned-clock-rates = <32768>, <1200000000>,
+ <200000000>, <126400000>;
+};
+
+&dsi0 {
+ panel: panel@0 {
+ compatible = "powkiddy,rgb10max3-panel";
+ reg = <0>;
+ backlight = <&backlight>;
+ iovcc-supply = <&vcc3v3_lcd0_n>;
+ pinctrl-0 = <&lcd_rst>;
+ pinctrl-names = "default";
+ reset-gpios = <&gpio4 RK_PA0 GPIO_ACTIVE_LOW>;
+ rotation = <270>;
+ vcc-supply = <&vcc3v3_lcd0_n>;
+
+ port {
+ mipi_in_panel: endpoint {
+ remote-endpoint = <&mipi_out_panel>;
+ };
+ };
+ };
+};
+
+&green_led {
+ default-state = "on";
+ function = LED_FUNCTION_POWER;
+};
+
+&i2c0 {
+ vdd_cpu: regulator@40 {
+ compatible = "fcs,fan53555";
+ reg = <0x40>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <712500>;
+ regulator-max-microvolt = <1390000>;
+ regulator-name = "vdd_cpu";
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc_sys>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+};
+
+&leds {
+ amber_led: led-2 {
+ color = <LED_COLOR_ID_AMBER>;
+ function = LED_FUNCTION_CHARGING;
+ max-brightness = <255>;
+ pwms = <&pwm0 0 25000 0>;
+ };
+};
+
+&pwm0 {
+ pinctrl-0 = <&pwm0m1_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&red_led {
+ default-state = "off";
+ function = LED_FUNCTION_STATUS;
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rgb30.dts b/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rgb30.dts
index 0ac64f043b80..1f567a14ac84 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rgb30.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rgb30.dts
@@ -37,3 +37,21 @@
};
};
};
+
+&i2c0 {
+ vdd_cpu: regulator@1c {
+ compatible = "tcs,tcs4525";
+ reg = <0x1c>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <712500>;
+ regulator-max-microvolt = <1390000>;
+ regulator-name = "vdd_cpu";
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc_sys>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rk2023.dts b/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rk2023.dts
index ba32d0793dca..bc9933d9e262 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rk2023.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rk2023.dts
@@ -36,3 +36,21 @@
};
};
};
+
+&i2c0 {
+ vdd_cpu: regulator@1c {
+ compatible = "tcs,tcs4525";
+ reg = <0x1c>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <712500>;
+ regulator-max-microvolt = <1390000>;
+ regulator-name = "vdd_cpu";
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc_sys>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rk2023.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rk2023.dtsi
index 0fa8f06f94cd..3ab751a01cb2 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rk2023.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-powkiddy-rk2023.dtsi
@@ -614,22 +614,6 @@
rockchip,sleep-filter-current-microamp = <100000>;
};
};
-
- vdd_cpu: regulator@1c {
- compatible = "tcs,tcs4525";
- reg = <0x1c>;
- fcs,suspend-voltage-selector = <1>;
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <712500>;
- regulator-max-microvolt = <1390000>;
- regulator-name = "vdd_cpu";
- regulator-ramp-delay = <2300>;
- vin-supply = <&vcc_sys>;
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
};
&i2c5 {
@@ -805,7 +789,7 @@
uart-has-rtscts;
status = "okay";
- bluetooth {
+ bluetooth: bluetooth {
compatible = "realtek,rtl8821cs-bt", "realtek,rtl8723bs-bt";
device-wake-gpios = <&gpio4 4 GPIO_ACTIVE_HIGH>;
enable-gpios = <&gpio4 3 GPIO_ACTIVE_HIGH>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
index f9127ddfbb7d..7b5f3904ef61 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
@@ -13,7 +13,7 @@
/ {
model = "Bananapi-R2 Pro (RK3568) DDR4 Board";
- compatible = "rockchip,rk3568-bpi-r2pro", "rockchip,rk3568";
+ compatible = "sinovoip,rk3568-bpi-r2pro", "rockchip,rk3568";
aliases {
ethernet0 = &gmac0;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts b/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts
new file mode 100644
index 000000000000..6a998166003c
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2021 Rockchip Electronics Co., Ltd.
+ * Copyright (c) 2024 Uwe Kleine-König
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include "rk3568.dtsi"
+
+/ {
+ model = "Qnap TS-433-4G NAS System 4-Bay";
+ compatible = "qnap,ts433", "rockchip,rk3568";
+};
+
+&gmac0 {
+ assigned-clocks = <&cru SCLK_GMAC0_RX_TX>, <&cru SCLK_GMAC0>;
+ assigned-clock-parents = <&cru SCLK_GMAC0_RGMII_SPEED>, <&cru CLK_MAC0_2TOP>;
+ assigned-clock-rates = <0>, <125000000>;
+ clock_in_out = "output";
+ phy-handle = <&rgmii_phy0>;
+ phy-mode = "rgmii";
+ pinctrl-names = "default";
+ pinctrl-0 = <&gmac0_miim
+ &gmac0_tx_bus2
+ &gmac0_rx_bus2
+ &gmac0_rgmii_clk
+ &gmac0_rgmii_bus>;
+ rx_delay = <0x2f>;
+ tx_delay = <0x3c>;
+ status = "okay";
+};
+
+&i2c0 {
+ pmic@20 {
+ compatible = "rockchip,rk809";
+ reg = <0x20>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
+ };
+};
+
+&i2c1 {
+ status = "okay";
+
+ rtc@51 {
+ compatible = "microcrystal,rv8263";
+ reg = <0x51>;
+ wakeup-source;
+ };
+};
+
+&mdio0 {
+ rgmii_phy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0x0>;
+ };
+};
+
+&pcie30phy {
+ status = "okay";
+};
+
+&pcie3x1 {
+ /* The downstream dts has: rockchip,bifurcation, XXX: find out what this is about */
+ reset-gpios = <&gpio0 RK_PC7 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+};
+
+&sdhci {
+ bus-width = <8>;
+ max-frequency = <200000000>;
+ non-removable;
+ status = "okay";
+};
+
+/*
+ * Pins available on CN3 connector at TTL voltage level (3V3).
+ * ,_ _.
+ * |1234| 1=TX 2=VCC
+ * `----' 3=RX 4=GND
+ */
+&uart2 {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index c19c0f1b3778..92f96ec01385 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -597,6 +597,7 @@
compatible = "rockchip,rk3568-vpu";
reg = <0x0 0xfdea0000 0x0 0x800>;
interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "vdpu";
clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>;
clock-names = "aclk", "hclk";
iommus = <&vdpu_mmu>;
@@ -1123,7 +1124,7 @@
dmas = <&dmac1 4>, <&dmac1 5>;
dma-names = "tx", "rx";
resets = <&cru SRST_M_I2S2_2CH>;
- reset-names = "m";
+ reset-names = "tx-m";
rockchip,grf = <&grf>;
pinctrl-names = "default";
pinctrl-0 = <&i2s2m0_sclktx
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts
index d4c70835e0fe..a4946cdc3bb3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts
@@ -72,7 +72,7 @@
vin-supply = <&vcc3v3_sys>;
};
- vcc5v0_usb30_host: vcc5v0-usb30-host-regulator {
+ vcc5v0_usb_host1: vcc5v0_usb_host2: vcc5v0-usb-host-regulator {
compatible = "regulator-fixed";
regulator-name = "vcc5v0_host";
regulator-boot-on;
@@ -114,6 +114,7 @@
status = "okay";
};
+/* Standard pcie */
&pcie3x2 {
reset-gpios = <&gpio3 RK_PB0 GPIO_ACTIVE_HIGH>;
vpcie3v3-supply = <&vcc3v3_sys>;
@@ -122,6 +123,7 @@
/* M.2 M-Key ssd */
&pcie3x4 {
+ num-lanes = <2>;
reset-gpios = <&gpio4 RK_PB6 GPIO_ACTIVE_HIGH>;
vpcie3v3-supply = <&vcc3v3_sys>;
status = "okay";
@@ -188,12 +190,12 @@
};
&u2phy2_host {
- phy-supply = <&vcc5v0_usb30_host>;
+ phy-supply = <&vcc5v0_usb_host1>;
status = "okay";
};
&u2phy3_host {
- phy-supply = <&vcc5v0_usb30_host>;
+ phy-supply = <&vcc5v0_usb_host2>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
index 0b02f4d6e003..cce1c8e83587 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
@@ -16,8 +16,8 @@
aliases {
mmc0 = &sdhci;
- mmc1 = &sdio;
- mmc2 = &sdmmc;
+ mmc1 = &sdmmc;
+ mmc2 = &sdio;
serial2 = &uart2;
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-common.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-common.dtsi
new file mode 100644
index 000000000000..c0d4a15323e2
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-common.dtsi
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2023 Edgeble AI Technologies Pvt. Ltd.
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+ aliases {
+ mmc0 = &sdhci;
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ led_user: led-0 {
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_HEARTBEAT;
+ gpios = <&gpio0 RK_PC2 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "heartbeat";
+ pinctrl-names = "default";
+ pinctrl-0 = <&led_user_en>;
+ };
+ };
+
+ vcc12v_dcin: vcc12v-dcin-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc12v_dcin";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <12000000>;
+ regulator-max-microvolt = <12000000>;
+ };
+
+ vcc5v0_sys: vcc5v0-sys-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_sys";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc12v_dcin>;
+ };
+
+ vcc_1v1_nldo_s3: vcc-1v1-nldo-s3-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_1v1_nldo_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ vin-supply = <&vcc5v0_sys>;
+ };
+};
+
+&cpu_b0 {
+ cpu-supply = <&vdd_cpu_big0_s0>;
+};
+
+&cpu_b1 {
+ cpu-supply = <&vdd_cpu_big0_s0>;
+};
+
+&cpu_b2 {
+ cpu-supply = <&vdd_cpu_big1_s0>;
+};
+
+&cpu_b3 {
+ cpu-supply = <&vdd_cpu_big1_s0>;
+};
+
+&cpu_l0 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l1 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l2 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l3 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0m2_xfer>;
+ status = "okay";
+
+ vdd_cpu_big0_s0: regulator@42 {
+ compatible = "rockchip,rk8602";
+ reg = <0x42>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_cpu_big0_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc5v0_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_cpu_big1_s0: regulator@43 {
+ compatible = "rockchip,rk8603", "rockchip,rk8602";
+ reg = <0x43>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_cpu_big1_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc5v0_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+};
+
+&pinctrl {
+ leds {
+ led_user_en: led_user_en {
+ rockchip,pins = <0 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+};
+
+&sdhci {
+ bus-width = <8>;
+ no-sdio;
+ no-sd;
+ non-removable;
+ mmc-hs400-1_8v;
+ mmc-hs400-enhanced-strobe;
+ status = "okay";
+};
+
+&spi2 {
+ status = "okay";
+ assigned-clocks = <&cru CLK_SPI2>;
+ assigned-clock-rates = <200000000>;
+ num-cs = <1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi2m2_cs0 &spi2m2_pins>;
+
+ pmic@0 {
+ compatible = "rockchip,rk806";
+ spi-max-frequency = <1000000>;
+ reg = <0x0>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <RK_PA7 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>,
+ <&rk806_dvs2_null>, <&rk806_dvs3_null>;
+
+ vcc1-supply = <&vcc5v0_sys>;
+ vcc2-supply = <&vcc5v0_sys>;
+ vcc3-supply = <&vcc5v0_sys>;
+ vcc4-supply = <&vcc5v0_sys>;
+ vcc5-supply = <&vcc5v0_sys>;
+ vcc6-supply = <&vcc5v0_sys>;
+ vcc7-supply = <&vcc5v0_sys>;
+ vcc8-supply = <&vcc5v0_sys>;
+ vcc9-supply = <&vcc5v0_sys>;
+ vcc10-supply = <&vcc5v0_sys>;
+ vcc11-supply = <&vcc_2v0_pldo_s3>;
+ vcc12-supply = <&vcc5v0_sys>;
+ vcc13-supply = <&vcc_1v1_nldo_s3>;
+ vcc14-supply = <&vcc_1v1_nldo_s3>;
+ vcca-supply = <&vcc5v0_sys>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ rk806_dvs1_null: dvs1-null-pins {
+ pins = "gpio_pwrctrl2";
+ function = "pin_fun0";
+ };
+
+ rk806_dvs2_null: dvs2-null-pins {
+ pins = "gpio_pwrctrl2";
+ function = "pin_fun0";
+ };
+
+ rk806_dvs3_null: dvs3-null-pins {
+ pins = "gpio_pwrctrl3";
+ function = "pin_fun0";
+ };
+
+ regulators {
+ vdd_gpu_s0: vdd_gpu_mem_s0: dcdc-reg1 {
+ regulator-name = "vdd_gpu_s0";
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <400>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_cpu_lit_s0: vdd_cpu_lit_mem_s0: dcdc-reg2 {
+ regulator-name = "vdd_cpu_lit_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_log_s0: dcdc-reg3 {
+ regulator-name = "vdd_log_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <750000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <750000>;
+ };
+ };
+
+ vdd_vdenc_s0: vdd_vdenc_mem_s0: dcdc-reg4 {
+ regulator-name = "vdd_vdenc_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_ddr_s0: dcdc-reg5 {
+ regulator-name = "vdd_ddr_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <900000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <850000>;
+ };
+ };
+
+ vdd2_ddr_s3: dcdc-reg6 {
+ regulator-name = "vdd2_ddr_s3";
+ regulator-always-on;
+ regulator-boot-on;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ vcc_2v0_pldo_s3: dcdc-reg7 {
+ regulator-name = "vdd_2v0_pldo_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <2000000>;
+ regulator-max-microvolt = <2000000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <2000000>;
+ };
+ };
+
+ vcc_3v3_s3: dcdc-reg8 {
+ regulator-name = "vcc_3v3_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <3300000>;
+ };
+ };
+
+ vddq_ddr_s0: dcdc-reg9 {
+ regulator-name = "vddq_ddr_s0";
+ regulator-always-on;
+ regulator-boot-on;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_1v8_s3: dcdc-reg10 {
+ regulator-name = "vcc_1v8_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ avcc_1v8_s0: pldo-reg1 {
+ regulator-name = "avcc_1v8_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_1v8_s0: pldo-reg2 {
+ regulator-name = "vcc_1v8_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ avdd_1v2_s0: pldo-reg3 {
+ regulator-name = "avdd_1v2_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_3v3_s0: pldo-reg4 {
+ regulator-name = "vcc_3v3_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vccio_sd_s0: pldo-reg5 {
+ regulator-name = "vccio_sd_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ pldo6_s3: pldo-reg6 {
+ regulator-name = "pldo6_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vdd_0v75_s3: nldo-reg1 {
+ regulator-name = "vdd_0v75_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <750000>;
+ };
+ };
+
+ vdd_ddr_pll_s0: nldo-reg2 {
+ regulator-name = "vdd_ddr_pll_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <850000>;
+ };
+ };
+
+ avdd_0v75_s0: nldo-reg3 {
+ regulator-name = "avdd_0v75_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_0v85_s0: nldo-reg4 {
+ regulator-name = "vdd_0v85_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_0v75_s0: nldo-reg5 {
+ regulator-name = "vdd_0v75_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-io.dts b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-io.dts
index be6a4f4f90f6..46d5e21d4d27 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-io.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-io.dts
@@ -6,18 +6,10 @@
/dts-v1/;
#include "rk3588.dtsi"
#include "rk3588-edgeble-neu6a.dtsi"
+#include "rk3588-edgeble-neu6a-io.dtsi"
/ {
model = "Edgeble Neu6A IO Board";
compatible = "edgeble,neural-compute-module-6a-io",
"edgeble,neural-compute-module-6a", "rockchip,rk3588";
-
- chosen {
- stdout-path = "serial2:1500000n8";
- };
-};
-
-&uart2 {
- pinctrl-0 = <&uart2m0_xfer>;
- status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-io.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-io.dtsi
new file mode 100644
index 000000000000..963e880ccc12
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-io.dtsi
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2023 Edgeble AI Technologies Pvt. Ltd.
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+ chosen {
+ stdout-path = "serial2:1500000n8";
+ };
+
+ vcc3v3_pcie2x1l0: vcc3v3-pcie2x1l0-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc3v3_pcie2x1l0";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ startup-delay-us = <5000>;
+ vin-supply = <&vcc_3v3_s3>;
+ };
+
+ vcc3v3_pcie3x2: vcc3v3-pcie3x2-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpios = <&gpio2 RK_PC4 GPIO_ACTIVE_HIGH>; /* PCIE_4G_PWEN */
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie3x2_vcc3v3_en>;
+ regulator-name = "vcc3v3_pcie3x2";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ startup-delay-us = <5000>;
+ vin-supply = <&vcc5v0_sys>;
+ };
+
+ vcc3v3_pcie3x4: vcc3v3-pcie3x4-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpios = <&gpio2 RK_PC5 GPIO_ACTIVE_HIGH>; /* PCIE30x4_PWREN_H */
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie3x4_vcc3v3_en>;
+ regulator-name = "vcc3v3_pcie3x4";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ startup-delay-us = <5000>;
+ vin-supply = <&vcc5v0_sys>;
+ };
+
+ vcc5v0_host: vcc5v0-host-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio3 RK_PC7 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&vcc5v0_host_en>;
+ regulator-name = "vcc5v0_host";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-boot-on;
+ regulator-always-on;
+ vin-supply = <&vcc5v0_sys>;
+ };
+};
+
+&combphy0_ps {
+ status = "okay";
+};
+
+&combphy1_ps {
+ status = "okay";
+};
+
+&i2c6 {
+ status = "okay";
+
+ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <RK_PB0 IRQ_TYPE_LEVEL_LOW>;
+ #clock-cells = <0>;
+ clock-output-names = "hym8563";
+ pinctrl-names = "default";
+ pinctrl-0 = <&hym8563_int>;
+ wakeup-source;
+ };
+};
+
+/* ETH */
+&pcie2x1l0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie2_0_rst>;
+ reset-gpios = <&gpio4 RK_PA5 GPIO_ACTIVE_HIGH>; /* PCIE20_1_PERST_L */
+ vpcie3v3-supply = <&vcc3v3_pcie2x1l0>;
+ status = "okay";
+};
+
+&pcie30phy {
+ status = "okay";
+};
+
+/* B-Key and E-Key */
+&pcie3x2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie3x2_rst>;
+ reset-gpios = <&gpio4 RK_PB6 GPIO_ACTIVE_HIGH>; /* PCIE30X4_PERSTn_M1_L */
+ vpcie3v3-supply = <&vcc3v3_pcie3x2>;
+ status = "okay";
+};
+
+/* M-Key */
+&pcie3x4 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie3x4_rst>;
+ reset-gpios = <&gpio4 RK_PB0 GPIO_ACTIVE_HIGH>; /* PCIE30X2_PERSTn_M1_L */
+ vpcie3v3-supply = <&vcc3v3_pcie3x4>;
+ status = "okay";
+};
+
+&pinctrl {
+ pcie2 {
+ pcie2_0_rst: pcie2-0-rst {
+ rockchip,pins = <4 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ pcie3 {
+ pcie3x2_rst: pcie3x2-rst {
+ rockchip,pins = <4 RK_PB6 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ pcie3x2_vcc3v3_en: pcie3x2-vcc3v3-en {
+ rockchip,pins = <2 RK_PC4 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ pcie3x4_rst: pcie3x4-rst {
+ rockchip,pins = <4 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ pcie3x4_vcc3v3_en: pcie3x4-vcc3v3-en {
+ rockchip,pins = <2 RK_PC5 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ hym8563 {
+ hym8563_int: hym8563-int {
+ rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ usb {
+ vcc5v0_host_en: vcc5v0-host-en {
+ rockchip,pins = <3 RK_PC7 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+};
+
+/* FAN */
+&pwm2 {
+ pinctrl-0 = <&pwm2m1_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&sata0 {
+ status = "okay";
+};
+
+&sdmmc {
+ bus-width = <4>;
+ cap-mmc-highspeed;
+ cap-sd-highspeed;
+ disable-wp;
+ no-sdio;
+ no-mmc;
+ sd-uhs-sdr104;
+ vmmc-supply = <&vcc_3v3_s3>;
+ vqmmc-supply = <&vccio_sd_s0>;
+ status = "okay";
+};
+
+&uart2 {
+ pinctrl-0 = <&uart2m0_xfer>;
+ status = "okay";
+};
+
+/* RS232 */
+&uart6 {
+ pinctrl-0 = <&uart6m0_xfer>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+/* RS485 */
+&uart7 {
+ pinctrl-0 = <&uart7m2_xfer>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&u2phy2 {
+ status = "okay";
+};
+
+&u2phy2_host {
+ /* connected to USB hub, which is powered by vcc5v0_sys */
+ phy-supply = <&vcc5v0_sys>;
+ status = "okay";
+};
+
+&u2phy3 {
+ status = "okay";
+};
+
+&u2phy3_host {
+ phy-supply = <&vcc5v0_host>;
+ status = "okay";
+};
+
+&usb_host0_ehci {
+ status = "okay";
+};
+
+&usb_host0_ohci {
+ status = "okay";
+};
+
+&usb_host1_ehci {
+ status = "okay";
+};
+
+&usb_host1_ohci {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-wifi.dtso b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-wifi.dtso
new file mode 100644
index 000000000000..e9a3855e8752
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a-wifi.dtso
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2022 Edgeble AI Technologies Pvt. Ltd.
+ *
+ * DT-overlay for Edgeble On-SoM WiFi6/BT M.2 1216 modules,
+ * - AW-XM548NF
+ * - Intel 8260D2W
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/pinctrl/rockchip.h>
+
+&{/} {
+ vcc3v3_pcie2x1l1: vcc3v3-pcie2x1l1-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpios = <&gpio0 RK_PC4 GPIO_ACTIVE_HIGH>; /* WIFI_3V3_EN */
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie2_1_vcc3v3_en>;
+ regulator-name = "vcc3v3_pcie2x1l1";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ startup-delay-us = <50000>;
+ vin-supply = <&vcc5v0_sys>;
+ };
+};
+
+&combphy2_psu {
+ status = "okay";
+};
+
+/* WiFi6 */
+&pcie2x1l1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie2_1_rst>;
+ reset-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>; /* PCIE20_2_WIFI_PERSTn */
+ vpcie3v3-supply = <&vcc3v3_pcie2x1l1>;
+ status = "okay";
+};
+
+&pinctrl {
+ pcie2 {
+ pcie2_1_rst: pcie2-1-rst {
+ rockchip,pins = <4 RK_PA2 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ pcie2_1_vcc3v3_en: pcie2-1-vcc-en {
+ rockchip,pins = <0 RK_PC4 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a.dtsi
index 727580aaa105..4c76a00b41eb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6a.dtsi
@@ -3,29 +3,8 @@
* Copyright (c) 2022 Edgeble AI Technologies Pvt. Ltd.
*/
+#include "rk3588-edgeble-neu6a-common.dtsi"
+
/ {
compatible = "edgeble,neural-compute-module-6a", "rockchip,rk3588";
-
- aliases {
- mmc0 = &sdhci;
- };
-
- vcc12v_dcin: vcc12v-dcin-regulator {
- compatible = "regulator-fixed";
- regulator-name = "vcc12v_dcin";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <12000000>;
- regulator-max-microvolt = <12000000>;
- };
-};
-
-&sdhci {
- bus-width = <8>;
- no-sdio;
- no-sd;
- non-removable;
- mmc-hs400-1_8v;
- mmc-hs400-enhanced-strobe;
- status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6b-io.dts b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6b-io.dts
index 070baeb63431..0d6f1be69ac8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6b-io.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6b-io.dts
@@ -6,84 +6,10 @@
/dts-v1/;
#include "rk3588j.dtsi"
#include "rk3588-edgeble-neu6b.dtsi"
+#include "rk3588-edgeble-neu6a-io.dtsi"
/ {
model = "Edgeble Neu6B IO Board";
compatible = "edgeble,neural-compute-module-6a-io",
"edgeble,neural-compute-module-6b", "rockchip,rk3588";
-
- chosen {
- stdout-path = "serial2:1500000n8";
- };
-};
-
-&combphy0_ps {
- status = "okay";
-};
-
-&i2c6 {
- status = "okay";
-
- hym8563: rtc@51 {
- compatible = "haoyu,hym8563";
- reg = <0x51>;
- interrupt-parent = <&gpio0>;
- interrupts = <RK_PB0 IRQ_TYPE_LEVEL_LOW>;
- #clock-cells = <0>;
- clock-output-names = "hym8563";
- pinctrl-names = "default";
- pinctrl-0 = <&hym8563_int>;
- wakeup-source;
- };
-};
-
-&pinctrl {
- hym8563 {
- hym8563_int: hym8563-int {
- rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
- };
- };
-};
-
-/* FAN */
-&pwm2 {
- pinctrl-0 = <&pwm2m1_pins>;
- pinctrl-names = "default";
- status = "okay";
-};
-
-&sata0 {
- status = "okay";
-};
-
-&sdmmc {
- bus-width = <4>;
- cap-mmc-highspeed;
- cap-sd-highspeed;
- disable-wp;
- no-sdio;
- no-mmc;
- sd-uhs-sdr104;
- vmmc-supply = <&vcc_3v3_s3>;
- vqmmc-supply = <&vccio_sd_s0>;
- status = "okay";
-};
-
-&uart2 {
- pinctrl-0 = <&uart2m0_xfer>;
- status = "okay";
-};
-
-/* RS232 */
-&uart6 {
- pinctrl-0 = <&uart6m0_xfer>;
- pinctrl-names = "default";
- status = "okay";
-};
-
-/* RS485 */
-&uart7 {
- pinctrl-0 = <&uart7m2_xfer>;
- pinctrl-names = "default";
- status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6b.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6b.dtsi
index 017559bba37f..c4634bc09fb4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6b.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-edgeble-neu6b.dtsi
@@ -3,387 +3,8 @@
* Copyright (c) 2023 Edgeble AI Technologies Pvt. Ltd.
*/
+#include "rk3588-edgeble-neu6a-common.dtsi"
+
/ {
compatible = "edgeble,neural-compute-module-6b", "rockchip,rk3588";
-
- aliases {
- mmc0 = &sdhci;
- };
-
- vcc12v_dcin: vcc12v-dcin-regulator {
- compatible = "regulator-fixed";
- regulator-name = "vcc12v_dcin";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <12000000>;
- regulator-max-microvolt = <12000000>;
- };
-
- vcc5v0_sys: vcc5v0-sys-regulator {
- compatible = "regulator-fixed";
- regulator-name = "vcc5v0_sys";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <5000000>;
- regulator-max-microvolt = <5000000>;
- vin-supply = <&vcc12v_dcin>;
- };
-
- vcc_1v1_nldo_s3: vcc-1v1-nldo-s3-regulator {
- compatible = "regulator-fixed";
- regulator-name = "vcc_1v1_nldo_s3";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <1100000>;
- regulator-max-microvolt = <1100000>;
- vin-supply = <&vcc5v0_sys>;
- };
-};
-
-&cpu_l0 {
- cpu-supply = <&vdd_cpu_lit_s0>;
-};
-
-&cpu_l1 {
- cpu-supply = <&vdd_cpu_lit_s0>;
-};
-
-&cpu_l2 {
- cpu-supply = <&vdd_cpu_lit_s0>;
-};
-
-&cpu_l3 {
- cpu-supply = <&vdd_cpu_lit_s0>;
-};
-
-&sdhci {
- bus-width = <8>;
- no-sdio;
- no-sd;
- non-removable;
- mmc-hs400-1_8v;
- mmc-hs400-enhanced-strobe;
- status = "okay";
-};
-
-&spi2 {
- status = "okay";
- assigned-clocks = <&cru CLK_SPI2>;
- assigned-clock-rates = <200000000>;
- num-cs = <1>;
- pinctrl-names = "default";
- pinctrl-0 = <&spi2m2_cs0 &spi2m2_pins>;
-
- pmic@0 {
- compatible = "rockchip,rk806";
- spi-max-frequency = <1000000>;
- reg = <0x0>;
- interrupt-parent = <&gpio0>;
- interrupts = <RK_PA7 IRQ_TYPE_LEVEL_LOW>;
- pinctrl-names = "default";
- pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>,
- <&rk806_dvs2_null>, <&rk806_dvs3_null>;
-
- vcc1-supply = <&vcc5v0_sys>;
- vcc2-supply = <&vcc5v0_sys>;
- vcc3-supply = <&vcc5v0_sys>;
- vcc4-supply = <&vcc5v0_sys>;
- vcc5-supply = <&vcc5v0_sys>;
- vcc6-supply = <&vcc5v0_sys>;
- vcc7-supply = <&vcc5v0_sys>;
- vcc8-supply = <&vcc5v0_sys>;
- vcc9-supply = <&vcc5v0_sys>;
- vcc10-supply = <&vcc5v0_sys>;
- vcc11-supply = <&vcc_2v0_pldo_s3>;
- vcc12-supply = <&vcc5v0_sys>;
- vcc13-supply = <&vcc_1v1_nldo_s3>;
- vcc14-supply = <&vcc_1v1_nldo_s3>;
- vcca-supply = <&vcc5v0_sys>;
-
- gpio-controller;
- #gpio-cells = <2>;
-
- rk806_dvs1_null: dvs1-null-pins {
- pins = "gpio_pwrctrl2";
- function = "pin_fun0";
- };
-
- rk806_dvs2_null: dvs2-null-pins {
- pins = "gpio_pwrctrl2";
- function = "pin_fun0";
- };
-
- rk806_dvs3_null: dvs3-null-pins {
- pins = "gpio_pwrctrl3";
- function = "pin_fun0";
- };
-
- regulators {
- vdd_gpu_s0: vdd_gpu_mem_s0: dcdc-reg1 {
- regulator-name = "vdd_gpu_s0";
- regulator-boot-on;
- regulator-min-microvolt = <550000>;
- regulator-max-microvolt = <950000>;
- regulator-ramp-delay = <12500>;
- regulator-enable-ramp-delay = <400>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
-
- vdd_cpu_lit_s0: vdd_cpu_lit_mem_s0: dcdc-reg2 {
- regulator-name = "vdd_cpu_lit_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <550000>;
- regulator-max-microvolt = <950000>;
- regulator-ramp-delay = <12500>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
-
- vdd_log_s0: dcdc-reg3 {
- regulator-name = "vdd_log_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <675000>;
- regulator-max-microvolt = <750000>;
- regulator-ramp-delay = <12500>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- regulator-suspend-microvolt = <750000>;
- };
- };
-
- vdd_vdenc_s0: vdd_vdenc_mem_s0: dcdc-reg4 {
- regulator-name = "vdd_vdenc_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <550000>;
- regulator-max-microvolt = <950000>;
- regulator-init-microvolt = <750000>;
- regulator-ramp-delay = <12500>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
-
- vdd_ddr_s0: dcdc-reg5 {
- regulator-name = "vdd_ddr_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <675000>;
- regulator-max-microvolt = <900000>;
- regulator-ramp-delay = <12500>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- regulator-suspend-microvolt = <850000>;
- };
- };
-
- vdd2_ddr_s3: dcdc-reg6 {
- regulator-name = "vdd2_ddr_s3";
- regulator-always-on;
- regulator-boot-on;
-
- regulator-state-mem {
- regulator-on-in-suspend;
- };
- };
-
- vcc_2v0_pldo_s3: dcdc-reg7 {
- regulator-name = "vdd_2v0_pldo_s3";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <2000000>;
- regulator-max-microvolt = <2000000>;
- regulator-ramp-delay = <12500>;
-
- regulator-state-mem {
- regulator-on-in-suspend;
- regulator-suspend-microvolt = <2000000>;
- };
- };
-
- vcc_3v3_s3: dcdc-reg8 {
- regulator-name = "vcc_3v3_s3";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
-
- regulator-state-mem {
- regulator-on-in-suspend;
- regulator-suspend-microvolt = <3300000>;
- };
- };
-
- vddq_ddr_s0: dcdc-reg9 {
- regulator-name = "vddq_ddr_s0";
- regulator-always-on;
- regulator-boot-on;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
-
- vcc_1v8_s3: dcdc-reg10 {
- regulator-name = "vcc_1v8_s3";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
-
- regulator-state-mem {
- regulator-on-in-suspend;
- regulator-suspend-microvolt = <1800000>;
- };
- };
-
- avcc_1v8_s0: pldo-reg1 {
- regulator-name = "avcc_1v8_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
-
- vcc_1v8_s0: pldo-reg2 {
- regulator-name = "vcc_1v8_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- regulator-suspend-microvolt = <1800000>;
- };
- };
-
- avdd_1v2_s0: pldo-reg3 {
- regulator-name = "avdd_1v2_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <1200000>;
- regulator-max-microvolt = <1200000>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
-
- vcc_3v3_s0: pldo-reg4 {
- regulator-name = "vcc_3v3_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- regulator-ramp-delay = <12500>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
-
- vccio_sd_s0: pldo-reg5 {
- regulator-name = "vccio_sd_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <3300000>;
- regulator-ramp-delay = <12500>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
-
- pldo6_s3: pldo-reg6 {
- regulator-name = "pldo6_s3";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
-
- regulator-state-mem {
- regulator-on-in-suspend;
- regulator-suspend-microvolt = <1800000>;
- };
- };
-
- vdd_0v75_s3: nldo-reg1 {
- regulator-name = "vdd_0v75_s3";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <750000>;
- regulator-max-microvolt = <750000>;
-
- regulator-state-mem {
- regulator-on-in-suspend;
- regulator-suspend-microvolt = <750000>;
- };
- };
-
- vdd_ddr_pll_s0: nldo-reg2 {
- regulator-name = "vdd_ddr_pll_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <850000>;
- regulator-max-microvolt = <850000>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- regulator-suspend-microvolt = <850000>;
- };
- };
-
- avdd_0v75_s0: nldo-reg3 {
- regulator-name = "avdd_0v75_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <750000>;
- regulator-max-microvolt = <750000>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
-
- vdd_0v85_s0: nldo-reg4 {
- regulator-name = "vdd_0v85_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <850000>;
- regulator-max-microvolt = <850000>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
-
- vdd_0v75_s0: nldo-reg5 {
- regulator-name = "vdd_0v75_s0";
- regulator-always-on;
- regulator-boot-on;
- regulator-min-microvolt = <750000>;
- regulator-max-microvolt = <750000>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
- };
- };
- };
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
index ac7c677b0fb9..de30c2632b8e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
@@ -448,6 +448,7 @@
<&rk806_dvs2_null>, <&rk806_dvs3_null>;
pinctrl-names = "default";
spi-max-frequency = <1000000>;
+ system-power-controller;
vcc1-supply = <&vcc5v0_sys>;
vcc2-supply = <&vcc5v0_sys>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts b/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts
index 4ce70fb75a30..39d65002add1 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts
@@ -62,7 +62,6 @@
compatible = "gpio-leds";
pinctrl-names = "default";
pinctrl-0 = <&led1_pin>;
- status = "okay";
/* LED1 on PCB */
led-1 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts b/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts
index d7722772ecd8..ad8e36a339dc 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts
@@ -159,6 +159,29 @@
regulator-max-microvolt = <3300000>;
vin-supply = <&vcc5v0_sys>;
};
+
+ vcc3v3_sd_s0: vcc3v3-sd-s0-regulator {
+ compatible = "regulator-fixed";
+ enable-active-low;
+ gpio = <&gpio4 RK_PA5 GPIO_ACTIVE_LOW>;
+ regulator-boot-on;
+ regulator-max-microvolt = <3300000>;
+ regulator-min-microvolt = <3300000>;
+ regulator-name = "vcc3v3_sd_s0";
+ vin-supply = <&vcc_3v3_s3>;
+ };
+
+ vdd_4g_3v3: vdd-4g-3v3-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pin_4g_lte_pwren>;
+ regulator-name = "vdd_4g_3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vcc5v0_sys>;
+ };
};
&combphy0_ps {
@@ -189,19 +212,19 @@
cpu-supply = <&vdd_cpu_lit_s0>;
};
-&cpu_b0{
+&cpu_b0 {
cpu-supply = <&vdd_cpu_big0_s0>;
};
-&cpu_b1{
+&cpu_b1 {
cpu-supply = <&vdd_cpu_big0_s0>;
};
-&cpu_b2{
+&cpu_b2 {
cpu-supply = <&vdd_cpu_big1_s0>;
};
-&cpu_b3{
+&cpu_b3 {
cpu-supply = <&vdd_cpu_big1_s0>;
};
@@ -504,6 +527,10 @@
};
usb {
+ pin_4g_lte_pwren: 4g-lte-pwren {
+ rockchip,pins = <4 RK_PC6 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
typec5v_pwren: typec5v-pwren {
rockchip,pins = <1 RK_PD2 RK_FUNC_GPIO &pcfg_pull_none>;
};
@@ -539,11 +566,12 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
no-mmc;
no-sdio;
sd-uhs-sdr104;
- vmmc-supply = <&vcc_3v3_s3>;
+ vmmc-supply = <&vcc3v3_sd_s0>;
vqmmc-supply = <&vccio_sd_s0>;
status = "okay";
};
@@ -884,6 +912,7 @@
};
&u2phy2_host {
+ phy-supply = <&vdd_4g_3v3>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
index 3e660ff6cd5f..1b606ea5b6cf 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
@@ -444,7 +444,6 @@
&sdmmc {
bus-width = <4>;
cap-sd-highspeed;
- cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
index 87a0abf95f7d..67414d72e2b6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
@@ -429,7 +429,6 @@
&sdmmc {
bus-width = <4>;
cap-sd-highspeed;
- cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index a0e303c3a1dc..1fe8b2a0ed75 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -58,6 +58,13 @@
#cooling-cells = <2>;
};
+ rfkill {
+ compatible = "rfkill-gpio";
+ label = "rfkill-pcie-wlan";
+ radio-type = "wlan";
+ shutdown-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>;
+ };
+
vcc3v3_pcie2x1l0: vcc3v3-pcie2x1l0-regulator {
compatible = "regulator-fixed";
enable-active-high;
@@ -371,7 +378,6 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
- cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
sd-uhs-sdr104;
vmmc-supply = <&vcc_3v3_s3>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-tiger-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3588-tiger-haikou.dts
new file mode 100644
index 000000000000..d672198c6b64
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3588-tiger-haikou.dts
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2023 Theobroma Systems Design und Consulting GmbH
+ */
+
+/dts-v1/;
+#include <dt-bindings/input/input.h>
+#include "rk3588-tiger.dtsi"
+
+/ {
+ model = "Theobroma Systems RK3588-Q7 SoM on Haikou devkit";
+ compatible = "tsd,rk3588-tiger-haikou", "tsd,rk3588-tiger", "rockchip,rk3588";
+
+ aliases {
+ ethernet0 = &gmac0;
+ mmc1 = &sdmmc;
+ };
+
+ chosen {
+ stdout-path = "serial2:115200n8";
+ };
+
+ dc_12v: dc-12v-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "dc_12v";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <12000000>;
+ regulator-max-microvolt = <12000000>;
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+ pinctrl-names = "default";
+ pinctrl-0 = <&haikou_keys_pin>;
+
+ button-batlow-n {
+ label = "BATLOW#";
+ linux,code = <KEY_BATTERY>;
+ gpios = <&gpio3 RK_PB5 GPIO_ACTIVE_LOW>;
+ };
+
+ button-slp-btn-n {
+ label = "SLP_BTN#";
+ linux,code = <KEY_SLEEP>;
+ gpios = <&gpio4 RK_PB3 GPIO_ACTIVE_LOW>;
+ };
+
+ button-wake-n {
+ label = "WAKE#";
+ linux,code = <KEY_WAKEUP>;
+ gpios = <&gpio3 RK_PC6 GPIO_ACTIVE_LOW>;
+ wakeup-source;
+ };
+
+ switch-lid-btn-n {
+ label = "LID_BTN#";
+ linux,code = <SW_LID>;
+ linux,input-type = <EV_SW>;
+ gpios = <&gpio3 RK_PD5 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ i2s3-sound {
+ compatible = "simple-audio-card";
+ simple-audio-card,format = "i2s";
+ simple-audio-card,name = "Haikou,I2S-codec";
+ simple-audio-card,mclk-fs = <512>;
+ simple-audio-card,frame-master = <&sgtl5000_codec>;
+ simple-audio-card,bitclock-master = <&sgtl5000_codec>;
+
+ sgtl5000_codec: simple-audio-card,codec {
+ sound-dai = <&sgtl5000>;
+ };
+
+ simple-audio-card,cpu {
+ sound-dai = <&i2s3_2ch>;
+ };
+ };
+
+ sgtl5000_clk: sgtl5000-oscillator {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <24576000>;
+ };
+
+ vcc3v3_baseboard: vcc3v3-baseboard-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc3v3_baseboard";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&dc_12v>;
+ };
+
+ vcc3v3_low_noise: vcc3v3-low-noise-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc3v3_low_noise";
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vcc5v0_usb>;
+ };
+
+ vcc5v0_baseboard: vcc5v0-baseboard-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_baseboard";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&dc_12v>;
+ };
+
+ vcc5v0_usb: vcc5v0-usb-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_usb";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&dc_12v>;
+ };
+
+ vddd_audio_1v6: vddd-audio-1v6-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vddd_audio_1v6";
+ regulator-boot-on;
+ regulator-min-microvolt = <1600000>;
+ regulator-max-microvolt = <1600000>;
+ vin-supply = <&vcc5v0_usb>;
+ };
+};
+
+&combphy2_psu {
+ status = "okay";
+};
+
+&gmac0 {
+ status = "okay";
+};
+
+&i2c1 {
+ status = "okay";
+
+ eeprom@50 {
+ reg = <0x50>;
+ compatible = "atmel,24c01";
+ pagesize = <8>;
+ size = <128>;
+ vcc-supply = <&vcc3v3_baseboard>;
+ };
+};
+
+&i2c5 {
+ clock-frequency = <400000>;
+ status = "okay";
+
+ sgtl5000: codec@a {
+ compatible = "fsl,sgtl5000";
+ reg = <0x0a>;
+ clocks = <&sgtl5000_clk>;
+ #sound-dai-cells = <0>;
+ VDDA-supply = <&vcc3v3_low_noise>;
+ VDDIO-supply = <&vcc3v3_baseboard>;
+ VDDD-supply = <&vddd_audio_1v6>;
+ };
+};
+
+&i2c8 {
+ status = "okay";
+};
+
+&i2s3_2ch {
+ status = "okay";
+};
+
+&pcie30phy {
+ status = "okay";
+};
+
+&pcie3x4 {
+ vpcie3v3-supply = <&vcc3v3_baseboard>;
+ status = "okay";
+};
+
+&pinctrl {
+ haikou {
+ haikou_keys_pin: haikou-keys-pin {
+ rockchip,pins =
+ /* BATLOW# */
+ <3 RK_PB5 RK_FUNC_GPIO &pcfg_pull_up>,
+ /* SLP_BTN# */
+ <4 RK_PB3 RK_FUNC_GPIO &pcfg_pull_up>,
+ /* WAKE# */
+ <3 RK_PC6 RK_FUNC_GPIO &pcfg_pull_up>,
+ /* LID_BTN */
+ <3 RK_PD5 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+};
+
+&sdmmc {
+ /* while the same pin, sdmmc_det does not detect card changes */
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
+ disable-wp;
+ pinctrl-0 = <&sdmmc_bus4 &sdmmc_cmd &sdmmc_clk>;
+ sd-uhs-sdr12;
+ sd-uhs-sdr25;
+ sd-uhs-sdr50;
+ sd-uhs-sdr104;
+ vmmc-supply = <&vcc3v3_baseboard>;
+ status = "okay";
+};
+
+&u2phy2 {
+ status = "okay";
+};
+
+&u2phy2_host {
+ status = "okay";
+};
+
+&u2phy3 {
+ status = "okay";
+};
+
+&u2phy3_host {
+ status = "okay";
+};
+
+&uart2 {
+ pinctrl-0 = <&uart2m2_xfer>;
+ status = "okay";
+};
+
+&uart5 {
+ rts-gpios = <&gpio3 RK_PB3 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+};
+
+/* host0 on Q7_USB_P2, lower usb3 port */
+&usb_host0_ehci {
+ status = "okay";
+};
+
+/* host0 on Q7_USB_P2, lower usb3 port */
+&usb_host0_ohci {
+ status = "okay";
+};
+
+/* host1 on Q7_USB_P3, usb2 port */
+&usb_host1_ehci {
+ status = "okay";
+};
+
+/* host1 on Q7_USB_P3, usb2 port */
+&usb_host1_ohci {
+ status = "okay";
+};
+
+/* host2 on Q7_USB_P2, lower usb3 port */
+&usb_host2_xhci {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
new file mode 100644
index 000000000000..1eb2543a5fde
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2023 Theobroma Systems Design und Consulting GmbH
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/pinctrl/rockchip.h>
+#include "rk3588.dtsi"
+
+/ {
+ compatible = "tsd,rk3588-tiger", "rockchip,rk3588";
+
+ aliases {
+ mmc0 = &sdhci;
+ rtc0 = &rtc_twi;
+ };
+
+ emmc_pwrseq: emmc-pwrseq {
+ compatible = "mmc-pwrseq-emmc";
+ pinctrl-0 = <&emmc_reset>;
+ pinctrl-names = "default";
+ reset-gpios = <&gpio2 RK_PA3 GPIO_ACTIVE_HIGH>;
+ };
+
+ leds {
+ compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&module_led_pin>;
+
+ /* Named LED1 on the board */
+ led-1 {
+ gpios = <&gpio1 RK_PD3 GPIO_ACTIVE_HIGH>;
+ function = LED_FUNCTION_HEARTBEAT;
+ linux,default-trigger = "heartbeat";
+ color = <LED_COLOR_ID_AMBER>;
+ };
+ };
+
+ /*
+ * 100MHz reference clock for PCIe peripherals from PI6C557-05BLE
+ * clock generator.
+ * The clock output is gated via the OE pin on the clock generator.
+ * This is modeled as a fixed-clock plus a gpio-gate-clock.
+ */
+ pcie_refclk_gen: pcie-refclk-gen-clock {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <1000000000>;
+ };
+
+ pcie_refclk: pcie-refclk-clock {
+ compatible = "gpio-gate-clock";
+ clocks = <&pcie_refclk_gen>;
+ #clock-cells = <0>;
+ enable-gpios = <&gpio4 RK_PB4 GPIO_ACTIVE_HIGH>; /* PCIE30X4_CLKREQN_M1_L */
+ };
+
+ vcc_1v1_nldo_s3: vcc-1v1-nldo-s3-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_1v1_nldo_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ vin-supply = <&vcc5v0_sys>;
+ };
+
+ vcc_1v2_s3: vcc-1v2-s3-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_1v2_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ vin-supply = <&vcc5v0_sys>;
+ };
+
+ vcc5v0_sys: vcc5v0-sys-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_sys";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v0_baseboard>;
+ };
+};
+
+&cpu_b0 {
+ cpu-supply = <&vdd_cpu_big0_s0>;
+};
+
+&cpu_b1 {
+ cpu-supply = <&vdd_cpu_big0_s0>;
+};
+
+&cpu_b2 {
+ cpu-supply = <&vdd_cpu_big1_s0>;
+};
+
+&cpu_b3 {
+ cpu-supply = <&vdd_cpu_big1_s0>;
+};
+
+&cpu_l0 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l1 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l2 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l3 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&gmac0 {
+ clock_in_out = "output";
+ phy-handle = <&rgmii_phy>;
+ phy-mode = "rgmii";
+ phy-supply = <&vcc_1v2_s3>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&gmac0_miim
+ &gmac0_rx_bus2
+ &gmac0_tx_bus2
+ &gmac0_rgmii_clk
+ &gmac0_rgmii_bus
+ &eth0_pins
+ &eth_reset>;
+ tx_delay = <0x10>;
+ rx_delay = <0x10>;
+ snps,reset-gpio = <&gpio4 RK_PC3 GPIO_ACTIVE_LOW>;
+ snps,reset-active-low;
+ snps,reset-delays-us = <0 10000 100000>;
+};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1m0_xfer>;
+};
+
+&i2c1m0_xfer {
+ rockchip,pins =
+ /* i2c1_scl_m0 */
+ <0 RK_PB5 9 &pcfg_pull_none_drv_level_0>,
+ /* i2c1_sda_m0 */
+ <0 RK_PB6 9 &pcfg_pull_none_drv_level_0>;
+};
+
+&i2c2 {
+ pinctrl-0 = <&i2c2m3_xfer>;
+ status = "okay";
+};
+
+&i2c2m3_xfer {
+ rockchip,pins =
+ /* i2c2_scl_m3 */
+ <1 RK_PC5 9 &pcfg_pull_none_drv_level_0>,
+ /* i2c2_sda_m3 */
+ <1 RK_PC4 9 &pcfg_pull_none_drv_level_0>;
+};
+
+&i2c3 {
+ pinctrl-0 = <&i2c3m0_xfer>;
+};
+
+&i2c4 {
+ pinctrl-0 = <&i2c4m4_xfer>;
+ status = "okay";
+
+ vdd_npu_s0: regulator@42 {
+ compatible = "rockchip,rk8602";
+ reg = <0x42>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_npu_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc5v0_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+};
+
+&i2c5 {
+ pinctrl-0 = <&i2c5m1_xfer>;
+};
+
+&i2c5m1_xfer {
+ rockchip,pins =
+ /* i2c5_scl_m1 */
+ <4 RK_PB6 9 &pcfg_pull_none_drv_level_0>,
+ /* i2c5_sda_m1 */
+ <4 RK_PB7 9 &pcfg_pull_none_drv_level_0>;
+};
+
+&i2c6 {
+ /*
+ * Mule-ATtiny can handle up to Fast mode Plus (1MHz) on I2C bus,
+ * but SOC can handle only up to (400kHz).
+ */
+ clock-frequency = <400000>;
+ status = "okay";
+
+ fan@18 {
+ compatible = "ti,amc6821";
+ reg = <0x18>;
+ };
+
+ rtc_twi: rtc@6f {
+ compatible = "isil,isl1208";
+ reg = <0x6f>;
+ };
+};
+
+&i2c6m0_xfer {
+ rockchip,pins =
+ /* i2c6_scl_m0 */
+ <0 RK_PD0 9 &pcfg_pull_none_drv_level_0>,
+ /* i2c6_sda_m0 */
+ <0 RK_PC7 9 &pcfg_pull_none_drv_level_0>;
+};
+
+&i2c7 {
+ status = "okay";
+
+ vdd_cpu_big0_s0: regulator@42 {
+ compatible = "rockchip,rk8602";
+ reg = <0x42>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_cpu_big0_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc5v0_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_cpu_big1_s0: regulator@43 {
+ compatible = "rockchip,rk8603", "rockchip,rk8602";
+ reg = <0x43>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_cpu_big1_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc5v0_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+};
+
+&i2c7m0_xfer {
+ rockchip,pins =
+ /* i2c7_scl_m0 */
+ <1 RK_PD0 9 &pcfg_pull_none_drv_level_0>,
+ /* i2c7_sda_m0 */
+ <1 RK_PD1 9 &pcfg_pull_none_drv_level_0>;
+};
+
+&i2c8 {
+ pinctrl-0 = <&i2c8m2_xfer>;
+};
+
+&mdio0 {
+ rgmii_phy: ethernet-phy@6 {
+ /* KSZ9031 or KSZ9131 */
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0x6>;
+ clocks = <&cru REFCLKO25M_ETH0_OUT>;
+ };
+};
+
+&pcie3x4 {
+ /*
+ * The board has a gpio-controlled "pcie_refclk" generator,
+ * so add it to the list of clocks.
+ */
+ clocks = <&cru ACLK_PCIE_4L_MSTR>, <&cru ACLK_PCIE_4L_SLV>,
+ <&cru ACLK_PCIE_4L_DBI>, <&cru PCLK_PCIE_4L>,
+ <&cru CLK_PCIE_AUX0>, <&cru CLK_PCIE4L_PIPE>,
+ <&pcie_refclk>;
+ clock-names = "aclk_mst", "aclk_slv",
+ "aclk_dbi", "pclk",
+ "aux", "pipe",
+ "ref";
+ reset-gpios = <&gpio3 RK_PB6 GPIO_ACTIVE_HIGH>;
+};
+
+&pinctrl {
+ emmc {
+ emmc_reset: emmc-reset {
+ rockchip,pins = <2 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ ethernet {
+ eth_reset: eth-reset {
+ rockchip,pins = <4 RK_PC3 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ leds {
+ module_led_pin: module-led-pin {
+ rockchip,pins = <1 RK_PD3 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+};
+
+&saradc {
+ vref-supply = <&vcc_1v8_s0>;
+ status = "okay";
+};
+
+&sdhci {
+ bus-width = <8>;
+ cap-mmc-highspeed;
+ mmc-ddr-1_8v;
+ mmc-hs200-1_8v;
+ mmc-hs400-1_8v;
+ mmc-hs400-enhanced-strobe;
+ mmc-pwrseq = <&emmc_pwrseq>;
+ no-sdio;
+ no-sd;
+ non-removable;
+ pinctrl-names = "default";
+ pinctrl-0 = <&emmc_bus8 &emmc_cmd &emmc_clk &emmc_data_strobe>;
+ supports-cqe;
+ vmmc-supply = <&vcc_3v3_s3>;
+ vqmmc-supply = <&vcc_1v8_s3>;
+ status = "okay";
+};
+
+&sdmmc {
+ bus-width = <4>;
+ cap-sd-highspeed;
+ max-frequency = <150000000>;
+ vqmmc-supply = <&vccio_sd_s0>;
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0m1_cs0 &spi0m1_cs1 &spi0m3_pins>;
+};
+
+&spi2 {
+ assigned-clocks = <&cru CLK_SPI2>;
+ assigned-clock-rates = <200000000>;
+ num-cs = <1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi2m2_cs0 &spi2m2_pins>;
+ status = "okay";
+
+ pmic@0 {
+ compatible = "rockchip,rk806";
+ reg = <0x0>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <7 IRQ_TYPE_LEVEL_LOW>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>,
+ <&rk806_dvs2_null>, <&rk806_dvs3_null>;
+ spi-max-frequency = <1000000>;
+ system-power-controller;
+ vcc1-supply = <&vcc5v0_sys>;
+ vcc2-supply = <&vcc5v0_sys>;
+ vcc3-supply = <&vcc5v0_sys>;
+ vcc4-supply = <&vcc5v0_sys>;
+ vcc5-supply = <&vcc5v0_sys>;
+ vcc6-supply = <&vcc5v0_sys>;
+ vcc7-supply = <&vcc5v0_sys>;
+ vcc8-supply = <&vcc5v0_sys>;
+ vcc9-supply = <&vcc5v0_sys>;
+ vcc10-supply = <&vcc5v0_sys>;
+ vcc11-supply = <&vcc_2v0_pldo_s3>;
+ vcc12-supply = <&vcc5v0_sys>;
+ vcc13-supply = <&vcc_1v1_nldo_s3>;
+ vcc14-supply = <&vcc_1v1_nldo_s3>;
+ vcca-supply = <&vcc5v0_sys>;
+
+ rk806_dvs1_null: dvs1-null-pins {
+ pins = "gpio_pwrctrl2";
+ function = "pin_fun0";
+ };
+
+ rk806_dvs2_null: dvs2-null-pins {
+ pins = "gpio_pwrctrl2";
+ function = "pin_fun0";
+ };
+
+ rk806_dvs3_null: dvs3-null-pins {
+ pins = "gpio_pwrctrl3";
+ function = "pin_fun0";
+ };
+
+ regulators {
+ vdd_gpu_s0: dcdc-reg1 {
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_gpu_s0";
+ regulator-enable-ramp-delay = <400>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_cpu_lit_s0: dcdc-reg2 {
+ regulator-name = "vdd_cpu_lit_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_log_s0: dcdc-reg3 {
+ regulator-name = "vdd_log_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <750000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <750000>;
+ };
+ };
+
+ vdd_vdenc_s0: dcdc-reg4 {
+ regulator-name = "vdd_vdenc_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_ddr_s0: dcdc-reg5 {
+ regulator-name = "vdd_ddr_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <900000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <850000>;
+ };
+ };
+
+ vdd2_ddr_s3: dcdc-reg6 {
+ regulator-name = "vdd2_ddr_s3";
+ regulator-always-on;
+ regulator-boot-on;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ vcc_2v0_pldo_s3: dcdc-reg7 {
+ regulator-name = "vcc_2v0_pldo_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <2000000>;
+ regulator-max-microvolt = <2000000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <2000000>;
+ };
+ };
+
+ vcc_3v3_s3: dcdc-reg8 {
+ regulator-name = "vcc_3v3_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <3300000>;
+ };
+ };
+
+ vddq_ddr_s0: dcdc-reg9 {
+ regulator-name = "vddq_ddr_s0";
+ regulator-always-on;
+ regulator-boot-on;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_1v8_s3: dcdc-reg10 {
+ regulator-name = "vcc_1v8_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vcca_1v8_s0: pldo-reg1 {
+ regulator-name = "vcca_1v8_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_1v8_s0: pldo-reg2 {
+ regulator-name = "vcc_1v8_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vdda_1v2_s0: pldo-reg3 {
+ regulator-name = "vdda_1v2_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcca_3v3_s0: pldo-reg4 {
+ regulator-name = "vcca_3v3_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vccio_sd_s0: pldo-reg5 {
+ regulator-name = "vccio_sd_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ pldo6_s3: pldo-reg6 {
+ regulator-name = "pldo6_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vdd_0v75_s3: nldo-reg1 {
+ regulator-name = "vdd_0v75_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <750000>;
+ };
+ };
+
+ vdda_ddr_pll_s0: nldo-reg2 {
+ regulator-name = "vdda_ddr_pll_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <850000>;
+ };
+ };
+
+ vdda_0v75_s0: nldo-reg3 {
+ regulator-name = "vdda_0v75_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdda_0v85_s0: nldo-reg4 {
+ regulator-name = "vdda_0v85_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_0v75_s0: nldo-reg5 {
+ regulator-name = "vdd_0v75_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+ };
+ };
+};
+
+&tsadc {
+ status = "okay";
+};
+
+/* Mule-ATtiny UPDI */
+&uart4 {
+ pinctrl-0 = <&uart4m2_xfer>;
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts b/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts
new file mode 100644
index 000000000000..9090c5c99f2a
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts
@@ -0,0 +1,688 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2024 Rockchip Electronics Co., Ltd.
+ *
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/pinctrl/rockchip.h>
+#include "rk3588.dtsi"
+
+/ {
+ model = "Rockchip Toybrick TB-RK3588X Board";
+ compatible = "rockchip,rk3588-toybrick-x0", "rockchip,rk3588";
+
+ aliases {
+ mmc0 = &sdhci;
+ };
+
+ chosen {
+ stdout-path = "serial2:1500000n8";
+ };
+
+ adc-keys {
+ compatible = "adc-keys";
+ io-channels = <&saradc 1>;
+ io-channel-names = "buttons";
+ keyup-threshold-microvolt = <1800000>;
+ poll-interval = <100>;
+
+ button-vol-up {
+ label = "Volume Up";
+ linux,code = <KEY_VOLUMEUP>;
+ press-threshold-microvolt = <17000>;
+ };
+
+ button-vol-down {
+ label = "Volume Down";
+ linux,code = <KEY_VOLUMEDOWN>;
+ press-threshold-microvolt = <417000>;
+ };
+
+ button-menu {
+ label = "Menu";
+ linux,code = <KEY_MENU>;
+ press-threshold-microvolt = <890000>;
+ };
+
+ button-escape {
+ label = "Escape";
+ linux,code = <KEY_ESC>;
+ press-threshold-microvolt = <1235000>;
+ };
+ };
+
+ backlight: backlight {
+ compatible = "pwm-backlight";
+ power-supply = <&vcc12v_dcin>;
+ pwms = <&pwm2 0 25000 0>;
+ };
+
+ pcie20_avdd0v85: pcie20-avdd0v85-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "pcie20_avdd0v85";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+ vin-supply = <&vdd_0v85_s0>;
+ };
+
+ pcie20_avdd1v8: pcie20-avdd1v8-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "pcie20_avdd1v8";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ vin-supply = <&avcc_1v8_s0>;
+ };
+
+ pcie30_avdd0v75: pcie30-avdd0v75-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "pcie30_avdd0v75";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+ vin-supply = <&avdd_0v75_s0>;
+ };
+
+ pcie30_avdd1v8: pcie30-avdd1v8-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "pcie30_avdd1v8";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ vin-supply = <&avcc_1v8_s0>;
+ };
+
+ vcc12v_dcin: vcc12v-dcin-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc12v_dcin";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <12000000>;
+ regulator-max-microvolt = <12000000>;
+ };
+
+ vcc5v0_host: vcc5v0-host-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpio = <&gpio4 RK_PB0 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&vcc5v0_host_en>;
+ regulator-name = "vcc5v0_host";
+ regulator-boot-on;
+ regulator-always-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v0_usb>;
+ };
+
+ vcc5v0_sys: vcc5v0-sys-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_sys";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc12v_dcin>;
+ };
+
+ vcc5v0_usbdcin: vcc5v0-usbdcin-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_usbdcin";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc12v_dcin>;
+ };
+
+ vcc5v0_usb: vcc5v0-usb-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_usb";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v0_usbdcin>;
+ };
+
+ vcc_1v1_nldo_s3: vcc-1v1-nldo-s3-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_1v1_nldo_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ vin-supply = <&vcc5v0_sys>;
+ };
+};
+
+&combphy0_ps {
+ status = "okay";
+};
+
+&combphy2_psu {
+ status = "okay";
+};
+
+&cpu_b0 {
+ cpu-supply = <&vdd_cpu_big0_s0>;
+};
+
+&cpu_b1 {
+ cpu-supply = <&vdd_cpu_big0_s0>;
+};
+
+&cpu_b2 {
+ cpu-supply = <&vdd_cpu_big1_s0>;
+};
+
+&cpu_b3 {
+ cpu-supply = <&vdd_cpu_big1_s0>;
+};
+
+&cpu_l0 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l1 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l2 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l3 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&gmac0 {
+ clock_in_out = "output";
+ phy-handle = <&rgmii_phy>;
+ phy-mode = "rgmii-rxid";
+ pinctrl-0 = <&gmac0_miim
+ &gmac0_tx_bus2
+ &gmac0_rx_bus2
+ &gmac0_rgmii_clk
+ &gmac0_rgmii_bus>;
+ pinctrl-names = "default";
+ rx_delay = <0x00>;
+ tx_delay = <0x43>;
+ status = "okay";
+};
+
+&i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0m2_xfer>;
+ status = "okay";
+
+ vdd_cpu_big0_s0: regulator@42 {
+ compatible = "rockchip,rk8602";
+ reg = <0x42>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_cpu_big0_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc5v0_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_cpu_big1_s0: regulator@43 {
+ compatible = "rockchip,rk8603", "rockchip,rk8602";
+ reg = <0x43>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_cpu_big1_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc5v0_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+};
+
+&i2c2 {
+ status = "okay";
+
+ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+ clock-output-names = "hym8563";
+ interrupt-parent = <&gpio0>;
+ interrupts = <RK_PD4 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&hym8563_int>;
+ wakeup-source;
+ };
+};
+
+&mdio0 {
+ rgmii_phy: ethernet-phy@1 {
+ /* RTL8211F */
+ compatible = "ethernet-phy-id001c.c916";
+ reg = <0x1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&rtl8211f_rst>;
+ reset-assert-us = <20000>;
+ reset-deassert-us = <100000>;
+ reset-gpios = <&gpio4 RK_PB3 GPIO_ACTIVE_LOW>;
+ };
+};
+
+&pinctrl {
+ rtl8211f {
+ rtl8211f_rst: rtl8211f-rst {
+ rockchip,pins = <4 RK_PB3 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ };
+
+ hym8563 {
+ hym8563_int: hym8563-int {
+ rockchip,pins = <0 RK_PD4 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+
+ usb {
+ vcc5v0_host_en: vcc5v0-host-en {
+ rockchip,pins = <4 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+};
+
+&pwm2 {
+ status = "okay";
+};
+
+&saradc {
+ vref-supply = <&vcc_1v8_s0>;
+ status = "okay";
+};
+
+&sdhci {
+ bus-width = <8>;
+ mmc-hs400-1_8v;
+ mmc-hs400-enhanced-strobe;
+ no-sdio;
+ no-sd;
+ non-removable;
+ status = "okay";
+};
+
+&spi2 {
+ assigned-clocks = <&cru CLK_SPI2>;
+ assigned-clock-rates = <200000000>;
+ num-cs = <1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi2m2_cs0 &spi2m2_pins>;
+ status = "okay";
+
+ pmic@0 {
+ compatible = "rockchip,rk806";
+ reg = <0x0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <7 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>,
+ <&rk806_dvs2_null>, <&rk806_dvs3_null>;
+ spi-max-frequency = <1000000>;
+ system-power-controller;
+
+ vcc1-supply = <&vcc5v0_sys>;
+ vcc2-supply = <&vcc5v0_sys>;
+ vcc3-supply = <&vcc5v0_sys>;
+ vcc4-supply = <&vcc5v0_sys>;
+ vcc5-supply = <&vcc5v0_sys>;
+ vcc6-supply = <&vcc5v0_sys>;
+ vcc7-supply = <&vcc5v0_sys>;
+ vcc8-supply = <&vcc5v0_sys>;
+ vcc9-supply = <&vcc5v0_sys>;
+ vcc10-supply = <&vcc5v0_sys>;
+ vcc11-supply = <&vcc_2v0_pldo_s3>;
+ vcc12-supply = <&vcc5v0_sys>;
+ vcc13-supply = <&vcc_1v1_nldo_s3>;
+ vcc14-supply = <&vcc_1v1_nldo_s3>;
+ vcca-supply = <&vcc5v0_sys>;
+
+ rk806_dvs1_null: dvs1-null-pins {
+ pins = "gpio_pwrctrl1";
+ function = "pin_fun0";
+ };
+
+ rk806_dvs2_null: dvs2-null-pins {
+ pins = "gpio_pwrctrl2";
+ function = "pin_fun0";
+ };
+
+ rk806_dvs3_null: dvs3-null-pins {
+ pins = "gpio_pwrctrl3";
+ function = "pin_fun0";
+ };
+
+ regulators {
+ vdd_gpu_s0: vdd_gpu_mem_s0: dcdc-reg1 {
+ regulator-name = "vdd_gpu_s0";
+ regulator-boot-on;
+ regulator-enable-ramp-delay = <400>;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_cpu_lit_s0: vdd_cpu_lit_mem_s0: dcdc-reg2 {
+ regulator-name = "vdd_cpu_lit_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_log_s0: dcdc-reg3 {
+ regulator-name = "vdd_log_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <750000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <750000>;
+ };
+ };
+
+ vdd_vdenc_s0: vdd_vdenc_mem_s0: dcdc-reg4 {
+ regulator-name = "vdd_vdenc_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-init-microvolt = <750000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_ddr_s0: dcdc-reg5 {
+ regulator-name = "vdd_ddr_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <900000>;
+ regulator-ramp-delay = <12500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <850000>;
+ };
+ };
+
+ vdd2_ddr_s3: dcdc-reg6 {
+ regulator-name = "vdd2_ddr_s3";
+ regulator-always-on;
+ regulator-boot-on;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ vcc_2v0_pldo_s3: dcdc-reg7 {
+ regulator-name = "vdd_2v0_pldo_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <2000000>;
+ regulator-max-microvolt = <2000000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <2000000>;
+ };
+ };
+
+ vcc_3v3_s3: dcdc-reg8 {
+ regulator-name = "vcc_3v3_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <3300000>;
+ };
+ };
+
+ vddq_ddr_s0: dcdc-reg9 {
+ regulator-name = "vddq_ddr_s0";
+ regulator-always-on;
+ regulator-boot-on;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_1v8_s3: dcdc-reg10 {
+ regulator-name = "vcc_1v8_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ avcc_1v8_s0: pldo-reg1 {
+ regulator-name = "avcc_1v8_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_1v8_s0: pldo-reg2 {
+ regulator-name = "vcc_1v8_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ avdd_1v2_s0: pldo-reg3 {
+ regulator-name = "avdd_1v2_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_3v3_s0: pldo-reg4 {
+ regulator-name = "vcc_3v3_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vccio_sd_s0: pldo-reg5 {
+ regulator-name = "vccio_sd_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ pldo6_s3: pldo-reg6 {
+ regulator-name = "pldo6_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vdd_0v75_s3: nldo-reg1 {
+ regulator-name = "vdd_0v75_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <750000>;
+ };
+ };
+
+ vdd_ddr_pll_s0: nldo-reg2 {
+ regulator-name = "vdd_ddr_pll_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <850000>;
+ };
+ };
+
+ avdd_0v75_s0: nldo-reg3 {
+ regulator-name = "avdd_0v75_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <837500>;
+ regulator-max-microvolt = <837500>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_0v85_s0: nldo-reg4 {
+ regulator-name = "vdd_0v85_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_0v75_s0: nldo-reg5 {
+ regulator-name = "vdd_0v75_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+ };
+ };
+};
+
+&u2phy2 {
+ status = "okay";
+};
+
+&u2phy2_host {
+ phy-supply = <&vcc5v0_host>;
+ status = "okay";
+};
+
+&u2phy3 {
+ status = "okay";
+};
+
+&u2phy3_host {
+ phy-supply = <&vcc5v0_host>;
+ status = "okay";
+};
+
+&uart2 {
+ pinctrl-0 = <&uart2m0_xfer>;
+ status = "okay";
+};
+
+&usb_host0_ehci {
+ status = "okay";
+};
+
+&usb_host0_ohci {
+ status = "okay";
+};
+
+&usb_host1_ehci {
+ status = "okay";
+};
+
+&usb_host1_ohci {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
index ef4f058c20ff..e037bf9db75a 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
@@ -19,8 +19,8 @@
aliases {
mmc0 = &sdhci;
- mmc1 = &sdio;
- mmc2 = &sdmmc;
+ mmc1 = &sdmmc;
+ mmc2 = &sdio;
};
analog-sound {
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts
index dc677f29a9c7..ce8119cbb824 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts
@@ -141,6 +141,10 @@
status = "okay";
};
+&combphy2_psu {
+ status = "okay";
+};
+
&cpu_l0 {
cpu-supply = <&vdd_cpu_lit_s0>;
};
@@ -195,13 +199,13 @@
&gpio1 {
gpio-line-names = /* GPIO1 A0-A7 */
- "HEADER_27_3v3", "HEADER_28_3v3", "", "",
+ "HEADER_27_3v3", "", "", "",
"HEADER_29_1v8", "", "HEADER_7_1v8", "",
/* GPIO1 B0-B7 */
"", "HEADER_31_1v8", "HEADER_33_1v8", "",
"HEADER_11_1v8", "HEADER_13_1v8", "", "",
/* GPIO1 C0-C7 */
- "", "", "", "",
+ "", "HEADER_28_3v3", "", "",
"", "", "", "",
/* GPIO1 D0-D7 */
"", "", "", "",
@@ -225,11 +229,11 @@
&gpio4 {
gpio-line-names = /* GPIO4 A0-A7 */
- "", "", "HEADER_37_3v3", "HEADER_32_3v3",
- "HEADER_36_3v3", "", "HEADER_35_3v3", "HEADER_38_3v3",
+ "", "", "HEADER_37_3v3", "HEADER_8_3v3",
+ "HEADER_10_3v3", "", "HEADER_32_3v3", "HEADER_35_3v3",
/* GPIO4 B0-B7 */
"", "", "", "HEADER_40_3v3",
- "HEADER_8_3v3", "HEADER_10_3v3", "", "",
+ "HEADER_38_3v3", "HEADER_36_3v3", "", "",
/* GPIO4 C0-C7 */
"", "", "", "",
"", "", "", "",
@@ -842,3 +846,7 @@
&usb_host1_ohci {
status = "okay";
};
+
+&usb_host2_xhci {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6c.dts b/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6c.dts
new file mode 100644
index 000000000000..497bbb57071f
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6c.dts
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+/dts-v1/;
+
+#include "rk3588s-nanopi-r6s.dts"
+
+/ {
+ model = "FriendlyElec NanoPi R6C";
+ compatible = "friendlyarm,nanopi-r6c", "rockchip,rk3588s";
+};
+
+&lan2_led {
+ label = "user_led";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6s.dts b/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6s.dts
new file mode 100644
index 000000000000..4fa644ae510c
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6s.dts
@@ -0,0 +1,764 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+/dts-v1/;
+
+#include <dt-bindings/pinctrl/rockchip.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include "rk3588s.dtsi"
+
+/ {
+ model = "FriendlyElec NanoPi R6S";
+ compatible = "friendlyarm,nanopi-r6s", "rockchip,rk3588s";
+
+ aliases {
+ ethernet0 = &gmac1;
+ mmc0 = &sdmmc;
+ mmc1 = &sdhci;
+ };
+
+ chosen {
+ stdout-path = "serial2:1500000n8";
+ };
+
+ adc-keys {
+ compatible = "adc-keys";
+ io-channels = <&saradc 0>;
+ io-channel-names = "buttons";
+ keyup-threshold-microvolt = <1800000>;
+ poll-interval = <100>;
+
+ button-maskrom {
+ label = "Maskrom";
+ linux,code = <KEY_VENDOR>;
+ press-threshold-microvolt = <1800>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+ pinctrl-names = "default";
+ pinctrl-0 = <&key1_pin>;
+
+ button-user {
+ label = "User";
+ linux,code = <BTN_1>;
+ gpios = <&gpio1 RK_PC0 GPIO_ACTIVE_LOW>;
+ debounce-interval = <50>;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ sys_led: led-0 {
+ label = "sys_led";
+ gpios = <&gpio1 RK_PC1 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "heartbeat";
+ pinctrl-names = "default";
+ pinctrl-0 = <&sys_led_pin>;
+ };
+
+ wan_led: led-1 {
+ label = "wan_led";
+ gpios = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&wan_led_pin>;
+ };
+
+ lan1_led: led-2 {
+ label = "lan1_led";
+ gpios = <&gpio1 RK_PC3 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&lan1_led_pin>;
+ };
+
+ lan2_led: led-3 {
+ label = "lan2_led";
+ gpios = <&gpio1 RK_PC4 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&lan2_led_pin>;
+ };
+ };
+
+ vcc5v0_sys: vcc5v0-sys-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_sys";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ };
+
+ vcc_1v1_nldo_s3: vcc-1v1-nldo-s3-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_1v1_nldo_s3";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ vin-supply = <&vcc5v0_sys>;
+ };
+
+ vcc_3v3_s0: vcc-3v3-s0-regulator {
+ compatible = "regulator-fixed";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc_3v3_s0";
+ vin-supply = <&vcc_3v3_s3>;
+ };
+
+ vcc_3v3_sd_s0: vcc-3v3-sd-s0-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpios = <&gpio4 RK_PB4 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&sd_s0_pwr>;
+ regulator-name = "vcc_3v3_sd_s0";
+ regulator-boot-on;
+ regulator-max-microvolt = <3000000>;
+ regulator-min-microvolt = <3000000>;
+ vin-supply = <&vcc_3v3_s3>;
+ };
+
+ vcc_3v3_pcie20: vcc3v3-pcie20-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_3v3_pcie20";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vcc_3v3_s3>;
+ };
+
+ vcc5v0_usb: vcc5v0-usb-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_usb";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v0_sys>;
+ };
+
+ vcc5v0_usb_otg0: vcc5v0-usb-otg0-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpios = <&gpio1 RK_PD2 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&typec5v_pwren>;
+ regulator-name = "vcc5v0_usb_otg0";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v0_usb>;
+ };
+
+ vcc5v0_host_20: vcc5v0-host-20-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ gpios = <&gpio4 RK_PB5 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&vcc5v0_host20_en>;
+ regulator-name = "vcc5v0_host_20";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vcc5v0_usb>;
+ };
+};
+
+&combphy0_ps {
+ status = "okay";
+};
+
+&combphy2_psu {
+ status = "okay";
+};
+
+&cpu_b0 {
+ cpu-supply = <&vdd_cpu_big0_s0>;
+};
+
+&cpu_b1 {
+ cpu-supply = <&vdd_cpu_big0_s0>;
+};
+
+&cpu_b2 {
+ cpu-supply = <&vdd_cpu_big1_s0>;
+};
+
+&cpu_b3 {
+ cpu-supply = <&vdd_cpu_big1_s0>;
+};
+
+&cpu_l0 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l1 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l2 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l3 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&gmac1 {
+ clock_in_out = "output";
+ phy-handle = <&rgmii_phy1>;
+ phy-mode = "rgmii-rxid";
+ pinctrl-0 = <&gmac1_miim
+ &gmac1_tx_bus2
+ &gmac1_rx_bus2
+ &gmac1_rgmii_clk
+ &gmac1_rgmii_bus>;
+ pinctrl-names = "default";
+ tx_delay = <0x42>;
+ status = "okay";
+};
+
+&i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0m2_xfer>;
+ status = "okay";
+
+ vdd_cpu_big0_s0: regulator@42 {
+ compatible = "rockchip,rk8602";
+ reg = <0x42>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_cpu_big0_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc5v0_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_cpu_big1_s0: regulator@43 {
+ compatible = "rockchip,rk8603", "rockchip,rk8602";
+ reg = <0x43>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_cpu_big1_s0";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-ramp-delay = <2300>;
+ vin-supply = <&vcc5v0_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+};
+
+&i2c2 {
+ status = "okay";
+
+ vdd_npu_s0: regulator@42 {
+ compatible = "rockchip,rk8602";
+ reg = <0x42>;
+ fcs,suspend-voltage-selector = <1>;
+ regulator-name = "vdd_npu_s0";
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <2300>;
+ regulator-boot-on;
+ regulator-always-on;
+ vin-supply = <&vcc5v0_sys>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+};
+
+&i2c6 {
+ clock-frequency = <200000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c6m0_xfer>;
+ status = "okay";
+
+ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+ clock-output-names = "hym8563";
+ pinctrl-names = "default";
+ pinctrl-0 = <&rtc_int>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <RK_PB0 IRQ_TYPE_LEVEL_LOW>;
+ wakeup-source;
+ };
+};
+
+&mdio1 {
+ rgmii_phy1: ethernet-phy@1 {
+ compatible = "ethernet-phy-id001c.c916";
+ reg = <0x1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&rtl8211f_rst>;
+ reset-assert-us = <20000>;
+ reset-deassert-us = <100000>;
+ reset-gpios = <&gpio3 RK_PB7 GPIO_ACTIVE_LOW>;
+ };
+};
+
+&pcie2x1l1 {
+ reset-gpios = <&gpio1 RK_PA7 GPIO_ACTIVE_HIGH>;
+ vpcie3v3-supply = <&vcc_3v3_pcie20>;
+ status = "okay";
+};
+
+&pcie2x1l2 {
+ reset-gpios = <&gpio3 RK_PD1 GPIO_ACTIVE_HIGH>;
+ vpcie3v3-supply = <&vcc_3v3_pcie20>;
+ status = "okay";
+};
+
+&pinctrl {
+ gpio-key {
+ key1_pin: key1-pin {
+ rockchip,pins = <1 RK_PC0 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+
+ gpio-leds {
+ sys_led_pin: sys-led-pin {
+ rockchip,pins =
+ <1 RK_PC1 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ wan_led_pin: wan-led-pin {
+ rockchip,pins =
+ <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ lan1_led_pin: lan1-led-pin {
+ rockchip,pins =
+ <1 RK_PC3 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ lan2_led_pin: lan2-led-pin {
+ rockchip,pins =
+ <1 RK_PC4 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ hym8563 {
+ rtc_int: rtc-int {
+ rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+
+ sdmmc {
+ sd_s0_pwr: sd-s0-pwr {
+ rockchip,pins = <4 RK_PB4 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+
+ usb {
+ typec5v_pwren: typec5v-pwren {
+ rockchip,pins = <1 RK_PD2 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ vcc5v0_host20_en: vcc5v0-host20-en {
+ rockchip,pins = <4 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ rtl8211f {
+ rtl8211f_rst: rtl8211f-rst {
+ rockchip,pins = <3 RK_PB7 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+};
+
+&saradc {
+ vref-supply = <&avcc_1v8_s0>;
+ status = "okay";
+};
+
+&sdhci {
+ bus-width = <8>;
+ no-sdio;
+ no-sd;
+ non-removable;
+ mmc-hs200-1_8v;
+ status = "okay";
+};
+
+&sdmmc {
+ bus-width = <4>;
+ cap-sd-highspeed;
+ disable-wp;
+ max-frequency = <150000000>;
+ no-mmc;
+ no-sdio;
+ sd-uhs-sdr104;
+ vmmc-supply = <&vcc_3v3_sd_s0>;
+ vqmmc-supply = <&vccio_sd_s0>;
+ status = "okay";
+};
+
+&spi2 {
+ status = "okay";
+ assigned-clocks = <&cru CLK_SPI2>;
+ assigned-clock-rates = <200000000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi2m2_cs0 &spi2m2_pins>;
+ num-cs = <1>;
+
+ pmic@0 {
+ compatible = "rockchip,rk806";
+ spi-max-frequency = <1000000>;
+ reg = <0x0>;
+
+ interrupt-parent = <&gpio0>;
+ interrupts = <7 IRQ_TYPE_LEVEL_LOW>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>,
+ <&rk806_dvs2_null>, <&rk806_dvs3_null>;
+
+ system-power-controller;
+
+ vcc1-supply = <&vcc5v0_sys>;
+ vcc2-supply = <&vcc5v0_sys>;
+ vcc3-supply = <&vcc5v0_sys>;
+ vcc4-supply = <&vcc5v0_sys>;
+ vcc5-supply = <&vcc5v0_sys>;
+ vcc6-supply = <&vcc5v0_sys>;
+ vcc7-supply = <&vcc5v0_sys>;
+ vcc8-supply = <&vcc5v0_sys>;
+ vcc9-supply = <&vcc5v0_sys>;
+ vcc10-supply = <&vcc5v0_sys>;
+ vcc11-supply = <&vcc_2v0_pldo_s3>;
+ vcc12-supply = <&vcc5v0_sys>;
+ vcc13-supply = <&vcc_1v1_nldo_s3>;
+ vcc14-supply = <&vcc_1v1_nldo_s3>;
+ vcca-supply = <&vcc5v0_sys>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ rk806_dvs1_null: dvs1-null-pins {
+ pins = "gpio_pwrctrl1";
+ function = "pin_fun0";
+ };
+
+ rk806_dvs2_null: dvs2-null-pins {
+ pins = "gpio_pwrctrl2";
+ function = "pin_fun0";
+ };
+
+ rk806_dvs3_null: dvs3-null-pins {
+ pins = "gpio_pwrctrl3";
+ function = "pin_fun0";
+ };
+
+ regulators {
+ vdd_gpu_s0: vdd_gpu_mem_s0: dcdc-reg1 {
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_gpu_s0";
+ regulator-enable-ramp-delay = <400>;
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_cpu_lit_s0: vdd_cpu_lit_mem_s0: dcdc-reg2 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_cpu_lit_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_log_s0: dcdc-reg3 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <750000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_log_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <750000>;
+ };
+ };
+
+ vdd_vdenc_s0: vdd_vdenc_mem_s0: dcdc-reg4 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_vdenc_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_ddr_s0: dcdc-reg5 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <900000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_ddr_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <850000>;
+ };
+ };
+
+ vdd2_ddr_s3: dcdc-reg6 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-name = "vdd2_ddr_s3";
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ vcc_2v0_pldo_s3: dcdc-reg7 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <2000000>;
+ regulator-max-microvolt = <2000000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_2v0_pldo_s3";
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <2000000>;
+ };
+ };
+
+ vcc_3v3_s3: dcdc-reg8 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc_3v3_s3";
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <3300000>;
+ };
+ };
+
+ vddq_ddr_s0: dcdc-reg9 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-name = "vddq_ddr_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_1v8_s3: dcdc-reg10 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "vcc_1v8_s3";
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ avcc_1v8_s0: pldo-reg1 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "avcc_1v8_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vcc_1v8_s0: pldo-reg2 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "vcc_1v8_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ avdd_1v2_s0: pldo-reg3 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-name = "avdd_1v2_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ avcc_3v3_s0: pldo-reg4 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "avcc_3v3_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vccio_sd_s0: pldo-reg5 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vccio_sd_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ pldo6_s3: pldo-reg6 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "pldo6_s3";
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vdd_0v75_s3: nldo-reg1 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+ regulator-name = "vdd_0v75_s3";
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <750000>;
+ };
+ };
+
+ avdd_ddr_pll_s0: nldo-reg2 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+ regulator-name = "avdd_ddr_pll_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ regulator-suspend-microvolt = <850000>;
+ };
+ };
+
+ avdd_0v75_s0: nldo-reg3 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+ regulator-name = "avdd_0v75_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ avdd_0v85_s0: nldo-reg4 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+ regulator-name = "avdd_0v85_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_0v75_s0: nldo-reg5 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+ regulator-name = "vdd_0v75_s0";
+
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+ };
+ };
+};
+
+&tsadc {
+ status = "okay";
+};
+
+&u2phy2 {
+ status = "okay";
+};
+
+&u2phy2_host {
+ phy-supply = <&vcc5v0_host_20>;
+ status = "okay";
+};
+
+&uart2 {
+ pinctrl-0 = <&uart2m0_xfer>;
+ status = "okay";
+};
+
+&usb_host0_ehci {
+ status = "okay";
+};
+
+&usb_host0_ohci {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
index 2002fd0221fa..00afb90d4eb1 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
@@ -366,7 +366,6 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
- cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 36b1b7acfe6a..87b83c87bd55 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -519,6 +519,7 @@
vo1_grf: syscon@fd5a8000 {
compatible = "rockchip,rk3588-vo-grf", "syscon";
reg = <0x0 0xfd5a8000 0x0 0x100>;
+ clocks = <&cru PCLK_VO1GRF>;
};
php_grf: syscon@fd5b0000 {
@@ -586,6 +587,11 @@
};
};
+ hdptxphy0_grf: syscon@fd5e0000 {
+ compatible = "rockchip,rk3588-hdptxphy-grf", "syscon";
+ reg = <0x0 0xfd5e0000 0x0 0x100>;
+ };
+
ioc: syscon@fd5f0000 {
compatible = "rockchip,rk3588-ioc", "syscon";
reg = <0x0 0xfd5f0000 0x0 0x10000>;
@@ -1704,7 +1710,6 @@
dmas = <&dmac1 0>, <&dmac1 1>;
dma-names = "tx", "rx";
power-domains = <&power RK3588_PD_AUDIO>;
- rockchip,trcm-sync-tx-only;
pinctrl-names = "default";
pinctrl-0 = <&i2s2m1_lrck
&i2s2m1_sclk
@@ -1725,7 +1730,6 @@
dmas = <&dmac1 2>, <&dmac1 3>;
dma-names = "tx", "rx";
power-domains = <&power RK3588_PD_AUDIO>;
- rockchip,trcm-sync-tx-only;
pinctrl-names = "default";
pinctrl-0 = <&i2s3_lrck
&i2s3_sclk
@@ -2360,6 +2364,22 @@
#dma-cells = <1>;
};
+ hdptxphy_hdmi0: phy@fed60000 {
+ compatible = "rockchip,rk3588-hdptx-phy";
+ reg = <0x0 0xfed60000 0x0 0x2000>;
+ clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>, <&cru PCLK_HDPTX0>;
+ clock-names = "ref", "apb";
+ #phy-cells = <0>;
+ resets = <&cru SRST_HDPTX0>, <&cru SRST_P_HDPTX0>,
+ <&cru SRST_HDPTX0_INIT>, <&cru SRST_HDPTX0_CMN>,
+ <&cru SRST_HDPTX0_LANE>, <&cru SRST_HDPTX0_ROPLL>,
+ <&cru SRST_HDPTX0_LCPLL>;
+ reset-names = "phy", "apb", "init", "cmn", "lane", "ropll",
+ "lcpll";
+ rockchip,grf = <&hdptxphy0_grf>;
+ status = "disabled";
+ };
+
combphy0_ps: phy@fee00000 {
compatible = "rockchip,rk3588-naneng-combphy";
reg = <0x0 0xfee00000 0x0 0x100>;
diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi
index 96859d098ef8..5dd4f3580a60 100644
--- a/arch/arm64/boot/dts/st/stm32mp251.dtsi
+++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi
@@ -52,6 +52,18 @@
compatible = "fixed-clock";
clock-frequency = <200000000>;
};
+
+ ck_icn_p_vdec: ck-icn-p-vdec {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <200000000>;
+ };
+
+ ck_icn_p_venc: ck-icn-p-venc {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <200000000>;
+ };
};
firmware {
diff --git a/arch/arm64/boot/dts/st/stm32mp255.dtsi b/arch/arm64/boot/dts/st/stm32mp255.dtsi
index e6fa596211f5..17f197c5b22b 100644
--- a/arch/arm64/boot/dts/st/stm32mp255.dtsi
+++ b/arch/arm64/boot/dts/st/stm32mp255.dtsi
@@ -6,4 +6,21 @@
#include "stm32mp253.dtsi"
/ {
+ soc@0 {
+ rifsc: rifsc-bus@42080000 {
+ vdec: vdec@480d0000 {
+ compatible = "st,stm32mp25-vdec";
+ reg = <0x480d0000 0x3c8>;
+ interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ck_icn_p_vdec>;
+ };
+
+ venc: venc@480e0000 {
+ compatible = "st,stm32mp25-venc";
+ reg = <0x480e0000 0x800>;
+ interrupts = <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ck_icn_ls_mcu>;
+ };
+ };
+ };
};
diff --git a/arch/arm64/boot/dts/tesla/fsd.dtsi b/arch/arm64/boot/dts/tesla/fsd.dtsi
index aaffb50b8b60..047a83cee603 100644
--- a/arch/arm64/boot/dts/tesla/fsd.dtsi
+++ b/arch/arm64/boot/dts/tesla/fsd.dtsi
@@ -601,6 +601,7 @@
clocks = <&clock_peric PERIC_PCLK_UART0>,
<&clock_peric PERIC_SCLK_UART0>;
clock-names = "uart", "clk_uart_baud0";
+ samsung,uart-fifosize = <64>;
status = "disabled";
};
@@ -613,6 +614,7 @@
clocks = <&clock_peric PERIC_PCLK_UART1>,
<&clock_peric PERIC_SCLK_UART1>;
clock-names = "uart", "clk_uart_baud0";
+ samsung,uart-fifosize = <64>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/ti/Makefile b/arch/arm64/boot/dts/ti/Makefile
index 52c1dc910308..9a722c2473fb 100644
--- a/arch/arm64/boot/dts/ti/Makefile
+++ b/arch/arm64/boot/dts/ti/Makefile
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0-only
#
# Make file to build device tree binaries for boards based on
# Texas Instruments Inc processors
@@ -22,6 +22,7 @@ dtb-$(CONFIG_ARCH_K3) += k3-am625-verdin-wifi-dahlia.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am625-verdin-wifi-dev.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am625-verdin-wifi-mallow.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am625-verdin-wifi-yavia.dtb
+dtb-$(CONFIG_ARCH_K3) += k3-am62x-phyboard-lyra-gpio-fan.dtbo
dtb-$(CONFIG_ARCH_K3) += k3-am62-lp-sk.dtb
# Boards with AM62Ax SoC
@@ -37,7 +38,15 @@ dtb-$(CONFIG_ARCH_K3) += k3-am62x-sk-csi2-imx219.dtbo
dtb-$(CONFIG_ARCH_K3) += k3-am62x-sk-hdmi-audio.dtbo
# Boards with AM64x SoC
+k3-am642-hummingboard-t-pcie-dtbs := \
+ k3-am642-hummingboard-t.dtb k3-am642-hummingboard-t-pcie.dtbo
+k3-am642-hummingboard-t-usb3-dtbs := \
+ k3-am642-hummingboard-t.dtb k3-am642-hummingboard-t-usb3.dtbo
dtb-$(CONFIG_ARCH_K3) += k3-am642-evm.dtb
+dtb-$(CONFIG_ARCH_K3) += k3-am642-evm-icssg1-dualemac.dtbo
+dtb-$(CONFIG_ARCH_K3) += k3-am642-hummingboard-t.dtb
+dtb-$(CONFIG_ARCH_K3) += k3-am642-hummingboard-t-pcie.dtb
+dtb-$(CONFIG_ARCH_K3) += k3-am642-hummingboard-t-usb3.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am642-phyboard-electra-rdk.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am642-sk.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am642-tqma64xxl-mbax4xxl.dtb
@@ -45,18 +54,24 @@ dtb-$(CONFIG_ARCH_K3) += k3-am64-tqma64xxl-mbax4xxl-sdcard.dtbo
dtb-$(CONFIG_ARCH_K3) += k3-am64-tqma64xxl-mbax4xxl-wlan.dtbo
# Boards with AM65x SoC
-k3-am654-gp-evm-dtbs := k3-am654-base-board.dtb k3-am654-base-board-rocktech-rk101-panel.dtbo
+k3-am654-gp-evm-dtbs := k3-am654-base-board.dtb \
+ k3-am654-base-board-rocktech-rk101-panel.dtbo \
+ k3-am654-pcie-usb3.dtbo
k3-am654-evm-dtbs := k3-am654-base-board.dtb k3-am654-icssg2.dtbo
-k3-am654-idk-dtbs := k3-am654-evm.dtb k3-am654-idk.dtbo
+k3-am654-idk-dtbs := k3-am654-evm.dtb k3-am654-idk.dtbo k3-am654-pcie-usb2.dtbo
dtb-$(CONFIG_ARCH_K3) += k3-am6528-iot2050-basic.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am6528-iot2050-basic-pg2.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am6548-iot2050-advanced.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am6548-iot2050-advanced-m2.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am6548-iot2050-advanced-pg2.dtb
+dtb-$(CONFIG_ARCH_K3) += k3-am6548-iot2050-advanced-sm.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am654-base-board.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am654-gp-evm.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am654-evm.dtb
dtb-$(CONFIG_ARCH_K3) += k3-am654-idk.dtb
+dtb-$(CONFIG_ARCH_K3) += k3-am654-base-board-rocktech-rk101-panel.dtbo
+dtb-$(CONFIG_ARCH_K3) += k3-am654-pcie-usb2.dtbo
+dtb-$(CONFIG_ARCH_K3) += k3-am654-pcie-usb3.dtbo
# Boards with J7200 SoC
k3-j7200-evm-dtbs := k3-j7200-common-proc-board.dtb k3-j7200-evm-quad-port-eth-exp.dtbo
@@ -69,6 +84,7 @@ dtb-$(CONFIG_ARCH_K3) += k3-j721e-evm.dtb
dtb-$(CONFIG_ARCH_K3) += k3-j721e-evm-gesi-exp-board.dtbo
dtb-$(CONFIG_ARCH_K3) += k3-j721e-evm-pcie0-ep.dtbo
dtb-$(CONFIG_ARCH_K3) += k3-j721e-sk.dtb
+dtb-$(CONFIG_ARCH_K3) += k3-j721e-sk-csi2-dual-imx219.dtbo
# Boards with J721s2 SoC
dtb-$(CONFIG_ARCH_K3) += k3-am68-sk-base-board.dtb
@@ -78,6 +94,9 @@ k3-j721s2-evm-dtbs := k3-j721s2-common-proc-board.dtb k3-j721s2-evm-gesi-exp-boa
dtb-$(CONFIG_ARCH_K3) += k3-j721s2-evm.dtb
dtb-$(CONFIG_ARCH_K3) += k3-j721s2-evm-pcie1-ep.dtbo
+# Boards with J722s SoC
+dtb-$(CONFIG_ARCH_K3) += k3-j722s-evm.dtb
+
# Boards with J784s4 SoC
dtb-$(CONFIG_ARCH_K3) += k3-am69-sk.dtb
dtb-$(CONFIG_ARCH_K3) += k3-j784s4-evm.dtb
@@ -87,6 +106,8 @@ k3-am625-beagleplay-csi2-ov5640-dtbs := k3-am625-beagleplay.dtb \
k3-am625-beagleplay-csi2-ov5640.dtbo
k3-am625-beagleplay-csi2-tevi-ov5640-dtbs := k3-am625-beagleplay.dtb \
k3-am625-beagleplay-csi2-tevi-ov5640.dtbo
+k3-am625-phyboard-lyra-gpio-fan-dtbs := k3-am625-phyboard-lyra-rdk.dtb \
+ k3-am62x-phyboard-lyra-gpio-fan.dtbo
k3-am625-sk-csi2-imx219-dtbs := k3-am625-sk.dtb \
k3-am62x-sk-csi2-imx219.dtbo
k3-am625-sk-csi2-ov5640-dtbs := k3-am625-sk.dtb \
@@ -101,12 +122,27 @@ k3-am62a7-sk-csi2-ov5640-dtbs := k3-am62a7-sk.dtb \
k3-am62x-sk-csi2-ov5640.dtbo
k3-am62a7-sk-csi2-tevi-ov5640-dtbs := k3-am62a7-sk.dtb \
k3-am62x-sk-csi2-tevi-ov5640.dtbo
+k3-am62a7-sk-hdmi-audio-dtbs := k3-am62a7-sk.dtb k3-am62x-sk-hdmi-audio.dtbo
+k3-am62p5-sk-csi2-imx219-dtbs := k3-am62p5-sk.dtb \
+ k3-am62x-sk-csi2-imx219.dtbo
+k3-am62p5-sk-csi2-ov5640-dtbs := k3-am62p5-sk.dtb \
+ k3-am62x-sk-csi2-ov5640.dtbo
+k3-am62p5-sk-csi2-tevi-ov5640-dtbs := k3-am62p5-sk.dtb \
+ k3-am62x-sk-csi2-tevi-ov5640.dtbo
+k3-am642-evm-icssg1-dualemac-dtbs := \
+ k3-am642-evm.dtb k3-am642-evm-icssg1-dualemac.dtbo
k3-am642-tqma64xxl-mbax4xxl-sdcard-dtbs := \
k3-am642-tqma64xxl-mbax4xxl.dtb k3-am64-tqma64xxl-mbax4xxl-sdcard.dtbo
k3-am642-tqma64xxl-mbax4xxl-wlan-dtbs := \
k3-am642-tqma64xxl-mbax4xxl.dtb k3-am64-tqma64xxl-mbax4xxl-wlan.dtbo
+k3-am68-sk-base-board-csi2-dual-imx219-dtbs := k3-am68-sk-base-board.dtb \
+ k3-j721e-sk-csi2-dual-imx219.dtbo
+k3-am69-sk-csi2-dual-imx219-dtbs := k3-am69-sk.dtb \
+ k3-j721e-sk-csi2-dual-imx219.dtbo
k3-j721e-evm-pcie0-ep-dtbs := k3-j721e-common-proc-board.dtb \
k3-j721e-evm-pcie0-ep.dtbo
+k3-j721e-sk-csi2-dual-imx219-dtbs := k3-j721e-sk.dtb \
+ k3-j721e-sk-csi2-dual-imx219.dtbo
k3-j721s2-evm-pcie1-ep-dtbs := k3-j721s2-common-proc-board.dtb \
k3-j721s2-evm-pcie1-ep.dtbo
dtb- += k3-am625-beagleplay-csi2-ov5640.dtb \
@@ -118,9 +154,17 @@ dtb- += k3-am625-beagleplay-csi2-ov5640.dtb \
k3-am62-lp-sk-hdmi-audio.dtb \
k3-am62a7-sk-csi2-imx219.dtb \
k3-am62a7-sk-csi2-ov5640.dtb \
+ k3-am62a7-sk-hdmi-audio.dtb \
+ k3-am62p5-sk-csi2-imx219.dtb \
+ k3-am62p5-sk-csi2-ov5640.dtb \
+ k3-am62p5-sk-csi2-tevi-ov5640.dtb \
+ k3-am642-evm-icssg1-dualemac.dtb \
k3-am642-tqma64xxl-mbax4xxl-sdcard.dtb \
k3-am642-tqma64xxl-mbax4xxl-wlan.dtb \
+ k3-am68-sk-base-board-csi2-dual-imx219-dtbs \
+ k3-am69-sk-csi2-dual-imx219-dtbs \
k3-j721e-evm-pcie0-ep.dtb \
+ k3-j721e-sk-csi2-dual-imx219-dtbs \
k3-j721s2-evm-pcie1-ep.dtb
# Enable support for device-tree overlays
@@ -128,7 +172,12 @@ DTC_FLAGS_k3-am625-beagleplay += -@
DTC_FLAGS_k3-am625-sk += -@
DTC_FLAGS_k3-am62-lp-sk += -@
DTC_FLAGS_k3-am62a7-sk += -@
+DTC_FLAGS_k3-am62p5-sk += -@
+DTC_FLAGS_k3-am642-evm += -@
DTC_FLAGS_k3-am642-tqma64xxl-mbax4xxl += -@
DTC_FLAGS_k3-am6548-iot2050-advanced-m2 += -@
+DTC_FLAGS_k3-am68-sk-base-board += -@
+DTC_FLAGS_k3-am69-sk += -@
DTC_FLAGS_k3-j721e-common-proc-board += -@
+DTC_FLAGS_k3-j721e-sk += -@
DTC_FLAGS_k3-j721s2-common-proc-board += -@
diff --git a/arch/arm64/boot/dts/ti/k3-am62-lp-sk.dts b/arch/arm64/boot/dts/ti/k3-am62-lp-sk.dts
index 5e6feb8cd125..c4149059a4c5 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-lp-sk.dts
+++ b/arch/arm64/boot/dts/ti/k3-am62-lp-sk.dts
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* AM62x LP SK: https://www.ti.com/tool/SK-AM62-LP
*
- * Copyright (C) 2021-2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2021-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am62-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62-main.dtsi
index 464b7565d085..e9cffca073ef 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-main.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM625 SoC Family Main Domain peripherals
*
- * Copyright (C) 2020-2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_main {
@@ -42,9 +42,8 @@
};
};
- main_conf: syscon@100000 {
- compatible = "syscon", "simple-mfd";
- reg = <0x00 0x00100000 0x00 0x20000>;
+ main_conf: bus@100000 {
+ compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
ranges = <0x0 0x00 0x00100000 0x20000>;
@@ -559,10 +558,9 @@
clock-names = "clk_ahb", "clk_xin";
assigned-clocks = <&k3_clks 57 6>;
assigned-clock-parents = <&k3_clks 57 8>;
+ bus-width = <8>;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
- ti,trm-icp = <0x2>;
- bus-width = <8>;
ti,clkbuf-sel = <0x7>;
ti,otap-del-sel-legacy = <0x0>;
ti,otap-del-sel-mmc-hs = <0x0>;
@@ -580,7 +578,8 @@
power-domains = <&k3_pds 58 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 58 5>, <&k3_clks 58 6>;
clock-names = "clk_ahb", "clk_xin";
- ti,trm-icp = <0x2>;
+ bus-width = <4>;
+ ti,clkbuf-sel = <0x7>;
ti,otap-del-sel-legacy = <0x8>;
ti,otap-del-sel-sd-hs = <0x0>;
ti,otap-del-sel-sdr12 = <0x0>;
@@ -592,8 +591,6 @@
ti,itap-del-sel-sd-hs = <0x1>;
ti,itap-del-sel-sdr12 = <0xa>;
ti,itap-del-sel-sdr25 = <0x1>;
- ti,clkbuf-sel = <0x7>;
- bus-width = <4>;
status = "disabled";
};
@@ -604,7 +601,8 @@
power-domains = <&k3_pds 184 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 184 5>, <&k3_clks 184 6>;
clock-names = "clk_ahb", "clk_xin";
- ti,trm-icp = <0x2>;
+ bus-width = <4>;
+ ti,clkbuf-sel = <0x7>;
ti,otap-del-sel-legacy = <0x8>;
ti,otap-del-sel-sd-hs = <0x0>;
ti,otap-del-sel-sdr12 = <0x0>;
@@ -616,7 +614,6 @@
ti,itap-del-sel-sd-hs = <0xa>;
ti,itap-del-sel-sdr12 = <0xa>;
ti,itap-del-sel-sdr25 = <0x1>;
- ti,clkbuf-sel = <0x7>;
status = "disabled";
};
@@ -640,6 +637,8 @@
interrupt-names = "host", "peripheral";
maximum-speed = "high-speed";
dr_mode = "otg";
+ snps,usb2-gadget-lpm-disable;
+ snps,usb2-lpm-disable;
};
};
@@ -663,6 +662,8 @@
interrupt-names = "host", "peripheral";
maximum-speed = "high-speed";
dr_mode = "otg";
+ snps,usb2-gadget-lpm-disable;
+ snps,usb2-lpm-disable;
};
};
@@ -779,9 +780,10 @@
<0x00 0x30207000 0x00 0x1000>, /* ovr1 */
<0x00 0x30208000 0x00 0x1000>, /* ovr2 */
<0x00 0x3020a000 0x00 0x1000>, /* vp1: Used for OLDI */
- <0x00 0x3020b000 0x00 0x1000>; /* vp2: Used as DPI Out */
+ <0x00 0x3020b000 0x00 0x1000>, /* vp2: Used as DPI Out */
+ <0x00 0x30201000 0x00 0x1000>; /* common1 */
reg-names = "common", "vidl1", "vid",
- "ovr1", "ovr2", "vp1", "vp2";
+ "ovr1", "ovr2", "vp1", "vp2", "common1";
power-domains = <&k3_pds 186 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 186 6>,
<&dss_vp1_clk>,
diff --git a/arch/arm64/boot/dts/ti/k3-am62-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am62-mcu.dtsi
index 0e0b234581c6..e66d486ef1f2 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-mcu.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM625 SoC Family MCU Domain peripherals
*
- * Copyright (C) 2020-2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_mcu {
diff --git a/arch/arm64/boot/dts/ti/k3-am62-phycore-som.dtsi b/arch/arm64/boot/dts/ti/k3-am62-phycore-som.dtsi
index aa43e7407eee..43488cc8bcb1 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-phycore-som.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-phycore-som.dtsi
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2022 - 2023 PHYTEC Messtechnik GmbH
+ * Copyright (C) 2022-2024 PHYTEC Messtechnik GmbH
* Author: Wadim Egorov <w.egorov@phytec.de>
*
* Product homepage:
@@ -317,7 +317,6 @@
&sdhci0 {
pinctrl-names = "default";
pinctrl-0 = <&main_mmc0_pins_default>;
- ti,driver-strength-ohm = <50>;
disable-wp;
non-removable;
status = "okay";
diff --git a/arch/arm64/boot/dts/ti/k3-am62-thermal.dtsi b/arch/arm64/boot/dts/ti/k3-am62-thermal.dtsi
index a358757e26f0..12ba833002a1 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-thermal.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-thermal.dtsi
@@ -1,4 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
#include <dt-bindings/thermal/thermal.h>
diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi
index bf6d27e70bc4..6c4cec8728e4 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi
@@ -185,7 +185,6 @@
/* Verdin SD_1 */
&sdhci1 {
- ti,driver-strength-ohm = <33>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin-dev.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin-dev.dtsi
index 680071688dcb..be62648e7818 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-verdin-dev.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-dev.dtsi
@@ -206,7 +206,6 @@
/* Verdin SD_1 */
&sdhci1 {
- ti,driver-strength-ohm = <33>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin-mallow.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin-mallow.dtsi
index 17b93534f658..77b1beb638ad 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-verdin-mallow.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-mallow.dtsi
@@ -127,6 +127,16 @@
<&pinctrl_qspi1_cs2_gpio>;
cs-gpios = <0>, <&main_gpio0 12 GPIO_ACTIVE_LOW>;
status = "okay";
+
+ tpm@1 {
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
+ reg = <1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_qspi1_dqs_gpio>;
+ interrupt-parent = <&main_gpio1>;
+ interrupts = <18 IRQ_TYPE_EDGE_FALLING>;
+ spi-max-frequency = <18500000>;
+ };
};
/* Verdin UART_3 */
diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin-wifi.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin-wifi.dtsi
index a6808b10c7b2..4768ef42c4fc 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-verdin-wifi.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-wifi.dtsi
@@ -26,7 +26,6 @@
mmc-pwrseq = <&wifi_pwrseq>;
non-removable;
ti,fails-without-test-cd;
- ti,driver-strength-ohm = <50>;
vmmc-supply = <&reg_3v3>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
index 6a06724b6d16..e8d8857ad51f 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
@@ -42,6 +42,22 @@
usb1 = &usb1;
};
+ connector {
+ compatible = "gpio-usb-b-connector", "usb-b-connector";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb0_id>;
+ id-gpios = <&main_gpio1 19 GPIO_ACTIVE_HIGH>;
+ label = "USB_1";
+ self-powered;
+ vbus-supply = <&reg_usb0_vbus>;
+
+ port {
+ usb_dr_connector: endpoint {
+ remote-endpoint = <&usb0_ep>;
+ };
+ };
+ };
+
verdin_gpio_keys: gpio-keys {
compatible = "gpio-keys";
pinctrl-names = "default";
@@ -151,6 +167,18 @@
vin-supply = <&reg_sd_3v3_1v8>;
};
+ reg_usb0_vbus: regulator-usb0-vbus {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb0_en>;
+ enable-active-high;
+ /* Verdin USB_1_EN (SODIMM 155) */
+ gpio = <&main_gpio1 50 GPIO_ACTIVE_HIGH>;
+ regulator-max-microvolt = <5000000>;
+ regulator-min-microvolt = <5000000>;
+ regulator-name = "USB_1_EN";
+ };
+
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
@@ -436,6 +464,13 @@
>;
};
+ /* Verdin USB_1_EN */
+ pinctrl_usb0_en: main-gpio1-50-default-pins {
+ pinctrl-single,pins = <
+ AM62X_IOPAD(0x0254, PIN_INPUT, 7) /* (C20) USB0_DRVVBUS.GPIO1_50 */ /* SODIMM 155 */
+ >;
+ };
+
/* On-module I2C - PMIC_I2C */
pinctrl_i2c0: main-i2c0-default-pins {
pinctrl-single,pins = <
@@ -660,13 +695,6 @@
>;
};
- /* Verdin USB_1 */
- pinctrl_usb0: main-usb0-default-pins {
- pinctrl-single,pins = <
- AM62X_IOPAD(0x0254, PIN_OUTPUT, 0) /* (C20) USB0_DRVVBUS */ /* SODIMM 155 */
- >;
- };
-
/* Verdin USB_2 */
pinctrl_usb1: main-usb1-default-pins {
pinctrl-single,pins = <
@@ -1013,7 +1041,7 @@
"",
"",
"SODIMM_17",
- "", /* 50 */
+ "SODIMM_155", /* 50 */
"",
"",
"",
@@ -1118,7 +1146,7 @@
regulator-always-on;
regulator-boot-on;
regulator-max-microvolt = <850000>;
- regulator-min-microvolt = <850000>;
+ regulator-min-microvolt = <750000>;
regulator-name = "+VDD_CORE (PMIC BUCK1)";
};
@@ -1407,7 +1435,6 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_sdhci0>;
non-removable;
- ti,driver-strength-ohm = <50>;
status = "okay";
};
@@ -1416,7 +1443,6 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_sdhci1>;
disable-wp;
- ti,driver-strength-ohm = <50>;
vmmc-supply = <&reg_sdhc1_vmmc>;
vqmmc-supply = <&reg_sdhc1_vqmmc>;
status = "disabled";
@@ -1428,11 +1454,16 @@
status = "disabled";
};
-/* TODO: role swich using ID pin */
&usb0 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_usb0>, <&pinctrl_usb0_id>;
+ adp-disable;
+ usb-role-switch;
status = "disabled";
+
+ port {
+ usb0_ep: endpoint {
+ remote-endpoint = <&usb_dr_connector>;
+ };
+ };
};
/* Verdin USB_2 */
diff --git a/arch/arm64/boot/dts/ti/k3-am62-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am62-wakeup.dtsi
index fef76f52a52e..23ce1bfda8d6 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-wakeup.dtsi
@@ -1,10 +1,12 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM625 SoC Family Wakeup Domain peripherals
*
- * Copyright (C) 2020-2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
+#include <dt-bindings/bus/ti-sysc.h>
+
&cbass_wakeup {
wkup_conf: syscon@43000000 {
bootph-all;
@@ -21,14 +23,34 @@
};
};
- wkup_uart0: serial@2b300000 {
- compatible = "ti,am64-uart", "ti,am654-uart";
- reg = <0x00 0x2b300000 0x00 0x100>;
- interrupts = <GIC_SPI 186 IRQ_TYPE_LEVEL_HIGH>;
+ target-module@2b300050 {
+ compatible = "ti,sysc-omap2", "ti,sysc";
+ reg = <0x00 0x2b300050 0x00 0x4>,
+ <0x00 0x2b300054 0x00 0x4>,
+ <0x00 0x2b300058 0x00 0x4>;
+ reg-names = "rev", "sysc", "syss";
+ ti,sysc-mask = <(SYSC_OMAP2_ENAWAKEUP |
+ SYSC_OMAP2_SOFTRESET |
+ SYSC_OMAP2_AUTOIDLE)>;
+ ti,sysc-sidle = <SYSC_IDLE_FORCE>,
+ <SYSC_IDLE_NO>,
+ <SYSC_IDLE_SMART>,
+ <SYSC_IDLE_SMART_WKUP>;
+ ti,syss-mask = <1>;
+ ti,no-reset-on-init;
power-domains = <&k3_pds 114 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 114 0>;
- clock-names = "fclk";
- status = "disabled";
+ clock-names = "fck";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x0 0x00 0x2b300000 0x100000>;
+
+ wkup_uart0: serial@0 {
+ compatible = "ti,am64-uart", "ti,am654-uart";
+ reg = <0x0 0x100>;
+ interrupts = <GIC_SPI 186 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
};
wkup_i2c0: i2c@2b200000 {
diff --git a/arch/arm64/boot/dts/ti/k3-am62.dtsi b/arch/arm64/boot/dts/ti/k3-am62.dtsi
index f1e15206e1ce..f0781f2bea29 100644
--- a/arch/arm64/boot/dts/ti/k3-am62.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM62 SoC Family
*
- * Copyright (C) 2020-2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/gpio/gpio.h>
diff --git a/arch/arm64/boot/dts/ti/k3-am625-beagleplay-csi2-ov5640.dtso b/arch/arm64/boot/dts/ti/k3-am625-beagleplay-csi2-ov5640.dtso
index 5e80ca7033ba..3b4643b7d19c 100644
--- a/arch/arm64/boot/dts/ti/k3-am625-beagleplay-csi2-ov5640.dtso
+++ b/arch/arm64/boot/dts/ti/k3-am625-beagleplay-csi2-ov5640.dtso
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* ALINX AN5641 & Digilent PCam 5C - OV5640 camera module
- * Copyright (C) 2022-2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am625-beagleplay-csi2-tevi-ov5640.dtso b/arch/arm64/boot/dts/ti/k3-am625-beagleplay-csi2-tevi-ov5640.dtso
index 5e1cbbc27c8f..81a2763d43c6 100644
--- a/arch/arm64/boot/dts/ti/k3-am625-beagleplay-csi2-tevi-ov5640.dtso
+++ b/arch/arm64/boot/dts/ti/k3-am625-beagleplay-csi2-tevi-ov5640.dtso
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Technexion TEVI-OV5640-*-RPI - OV5640 camera module
- * Copyright (C) 2022-2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts b/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts
index eadbdd9ffe37..a34e0df2ab86 100644
--- a/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts
+++ b/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts
@@ -1,9 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* https://beagleplay.org/
*
- * Copyright (C) 2022-2023 Texas Instruments Incorporated - https://www.ti.com/
- * Copyright (C) 2022-2023 Robert Nelson, BeagleBoard.org Foundation
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Robert Nelson, BeagleBoard.org Foundation
*/
/dts-v1/;
@@ -29,7 +29,6 @@
i2c3 = &main_i2c3;
i2c4 = &wkup_i2c0;
i2c5 = &mcu_i2c0;
- mdio-gpio0 = &mdio0;
mmc0 = &sdhci0;
mmc1 = &sdhci1;
mmc2 = &sdhci2;
@@ -231,27 +230,6 @@
};
};
- /* Workaround for errata i2329 - just use mdio bitbang */
- mdio0: mdio {
- compatible = "virtual,mdio-gpio";
- pinctrl-names = "default";
- pinctrl-0 = <&mdio0_pins_default>;
- gpios = <&main_gpio0 86 GPIO_ACTIVE_HIGH>, /* MDC */
- <&main_gpio0 85 GPIO_ACTIVE_HIGH>; /* MDIO */
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpsw3g_phy0: ethernet-phy@0 {
- reg = <0>;
- };
-
- cpsw3g_phy1: ethernet-phy@1 {
- reg = <1>;
- reset-gpios = <&main_gpio1 5 GPIO_ACTIVE_LOW>;
- reset-assert-us = <25>;
- reset-deassert-us = <60000>; /* T2 */
- };
- };
};
&main_pmx0 {
@@ -312,8 +290,8 @@
mdio0_pins_default: mdio0-default-pins {
pinctrl-single,pins = <
- AM62X_IOPAD(0x0160, PIN_OUTPUT, 7) /* (AD24) MDIO0_MDC.GPIO0_86 */
- AM62X_IOPAD(0x015c, PIN_INPUT, 7) /* (AB22) MDIO0_MDIO.GPIO0_85 */
+ AM62X_IOPAD(0x0160, PIN_OUTPUT, 0) /* (AD24) MDIO0_MDC */
+ AM62X_IOPAD(0x015c, PIN_INPUT, 0) /* (AB22) MDIO0_MDIO */
>;
};
@@ -573,11 +551,13 @@
};
&usbss0 {
+ bootph-all;
ti,vbus-divider;
status = "okay";
};
&usb0 {
+ bootph-all;
dr_mode = "peripheral";
};
@@ -611,8 +591,20 @@
};
&cpsw3g_mdio {
- /* Workaround for errata i2329 - Use mdio bitbang */
- status = "disabled";
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&mdio0_pins_default>;
+
+ cpsw3g_phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+
+ cpsw3g_phy1: ethernet-phy@1 {
+ reg = <1>;
+ reset-gpios = <&main_gpio1 5 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <25>;
+ reset-deassert-us = <60000>; /* T2 */
+ };
};
&main_gpio0 {
@@ -827,7 +819,6 @@
bootph-all;
pinctrl-names = "default";
pinctrl-0 = <&emmc_pins_default>;
- ti,driver-strength-ohm = <50>;
disable-wp;
status = "okay";
};
@@ -840,7 +831,6 @@
vmmc-supply = <&vdd_3v3_sd>;
vqmmc-supply = <&vdd_sd_dv>;
- ti,driver-strength-ohm = <50>;
disable-wp;
cd-gpios = <&main_gpio1 48 GPIO_ACTIVE_LOW>;
cd-debounce-delay-ms = <100>;
@@ -852,12 +842,10 @@
vmmc-supply = <&wlan_en>;
pinctrl-names = "default";
pinctrl-0 = <&wifi_pins_default>, <&wifi_32k_clk>;
- bus-width = <4>;
non-removable;
ti,fails-without-test-cd;
cap-power-off-card;
keep-power-in-suspend;
- ti,driver-strength-ohm = <50>;
assigned-clocks = <&k3_clks 157 158>;
assigned-clock-parents = <&k3_clks 157 160>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/ti/k3-am625-phyboard-lyra-rdk.dts b/arch/arm64/boot/dts/ti/k3-am625-phyboard-lyra-rdk.dts
index 4bc0134c987d..a83a90497857 100644
--- a/arch/arm64/boot/dts/ti/k3-am625-phyboard-lyra-rdk.dts
+++ b/arch/arm64/boot/dts/ti/k3-am625-phyboard-lyra-rdk.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2022 - 2023 PHYTEC Messtechnik GmbH
+ * Copyright (C) 2022-2024 PHYTEC Messtechnik GmbH
* Author: Wadim Egorov <w.egorov@phytec.de>
*
* Product homepage:
@@ -222,6 +222,7 @@
cpsw3g_phy3: ethernet-phy@3 {
compatible = "ethernet-phy-id2000.a231", "ethernet-phy-ieee802.3-c22";
reg = <3>;
+ ti,clk-output-sel = <DP83867_CLK_O_SEL_OFF>;
ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>;
ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
};
@@ -333,7 +334,6 @@
vqmmc-supply = <&vddshv5_sdio>;
pinctrl-names = "default";
pinctrl-0 = <&main_mmc1_pins_default>;
- ti,driver-strength-ohm = <50>;
disable-wp;
no-1-8-v;
status = "okay";
diff --git a/arch/arm64/boot/dts/ti/k3-am625-sk.dts b/arch/arm64/boot/dts/ti/k3-am625-sk.dts
index b18092497c9a..ae81ebb39d02 100644
--- a/arch/arm64/boot/dts/ti/k3-am625-sk.dts
+++ b/arch/arm64/boot/dts/ti/k3-am625-sk.dts
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* AM625 SK: https://www.ti.com/lit/zip/sprr448
*
- * Copyright (C) 2021-2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2021-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am625.dtsi b/arch/arm64/boot/dts/ti/k3-am625.dtsi
index 4193c2b3eed6..4014add6320d 100644
--- a/arch/arm64/boot/dts/ti/k3-am625.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am625.dtsi
@@ -1,10 +1,10 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM625 SoC family in Quad core configuration
*
* TRM: https://www.ti.com/lit/pdf/spruiv7
*
- * Copyright (C) 2020-2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi
index f0b8c9ab1459..aa1e057082f0 100644
--- a/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM62A SoC Family Main Domain peripherals
*
- * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_main {
@@ -42,9 +42,8 @@
};
};
- main_conf: syscon@100000 {
- compatible = "ti,j721e-system-controller", "syscon", "simple-mfd";
- reg = <0x00 0x00100000 0x00 0x20000>;
+ main_conf: bus@100000 {
+ compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
ranges = <0x00 0x00 0x00100000 0x20000>;
@@ -536,6 +535,24 @@
status = "disabled";
};
+ sdhci0: mmc@fa10000 {
+ compatible = "ti,am62-sdhci";
+ reg = <0x00 0xfa10000 0x00 0x260>, <0x00 0xfa18000 0x00 0x134>;
+ interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
+ power-domains = <&k3_pds 57 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 57 5>, <&k3_clks 57 6>;
+ clock-names = "clk_ahb", "clk_xin";
+ assigned-clocks = <&k3_clks 57 6>;
+ assigned-clock-parents = <&k3_clks 57 8>;
+ bus-width = <8>;
+ mmc-hs200-1_8v;
+ ti,clkbuf-sel = <0x7>;
+ ti,otap-del-sel-legacy = <0x0>;
+ ti,otap-del-sel-mmc-hs = <0x0>;
+ ti,otap-del-sel-hs200 = <0x6>;
+ status = "disabled";
+ };
+
sdhci1: mmc@fa00000 {
compatible = "ti,am62-sdhci";
reg = <0x00 0xfa00000 0x00 0x260>, <0x00 0xfa08000 0x00 0x134>;
@@ -543,7 +560,8 @@
power-domains = <&k3_pds 58 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 58 5>, <&k3_clks 58 6>;
clock-names = "clk_ahb", "clk_xin";
- ti,trm-icp = <0x2>;
+ bus-width = <4>;
+ ti,clkbuf-sel = <0x7>;
ti,otap-del-sel-legacy = <0x0>;
ti,otap-del-sel-sd-hs = <0x0>;
ti,otap-del-sel-sdr12 = <0xf>;
@@ -555,8 +573,30 @@
ti,itap-del-sel-sd-hs = <0x0>;
ti,itap-del-sel-sdr12 = <0x0>;
ti,itap-del-sel-sdr25 = <0x0>;
- ti,clkbuf-sel = <0x7>;
+ no-1-8-v;
+ status = "disabled";
+ };
+
+ sdhci2: mmc@fa20000 {
+ compatible = "ti,am62-sdhci";
+ reg = <0x00 0xfa20000 0x00 0x260>, <0x00 0xfa28000 0x00 0x134>;
+ interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+ power-domains = <&k3_pds 184 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 184 5>, <&k3_clks 184 6>;
+ clock-names = "clk_ahb", "clk_xin";
bus-width = <4>;
+ ti,clkbuf-sel = <0x7>;
+ ti,otap-del-sel-legacy = <0x0>;
+ ti,otap-del-sel-sd-hs = <0x0>;
+ ti,otap-del-sel-sdr12 = <0xf>;
+ ti,otap-del-sel-sdr25 = <0xf>;
+ ti,otap-del-sel-sdr50 = <0xc>;
+ ti,otap-del-sel-sdr104 = <0x6>;
+ ti,otap-del-sel-ddr50 = <0x9>;
+ ti,itap-del-sel-legacy = <0x0>;
+ ti,itap-del-sel-sd-hs = <0x0>;
+ ti,itap-del-sel-sdr12 = <0x0>;
+ ti,itap-del-sel-sdr25 = <0x0>;
no-1-8-v;
status = "disabled";
};
@@ -985,4 +1025,30 @@
power-domains = <&k3_pds 185 TI_SCI_PD_EXCLUSIVE>;
status = "disabled";
};
+
+ dss: dss@30200000 {
+ compatible = "ti,am62a7-dss";
+ reg = <0x00 0x30200000 0x00 0x1000>, /* common */
+ <0x00 0x30202000 0x00 0x1000>, /* vidl1 */
+ <0x00 0x30206000 0x00 0x1000>, /* vid */
+ <0x00 0x30207000 0x00 0x1000>, /* ovr1 */
+ <0x00 0x30208000 0x00 0x1000>, /* ovr2 */
+ <0x00 0x3020a000 0x00 0x1000>, /* vp1: Tied OFF in the SoC */
+ <0x00 0x3020b000 0x00 0x1000>, /* vp2: Used as DPI Out */
+ <0x00 0x30201000 0x00 0x1000>; /* common1 */
+ reg-names = "common", "vidl1", "vid",
+ "ovr1", "ovr2", "vp1", "vp2", "common1";
+ power-domains = <&k3_pds 186 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 186 6>,
+ <&k3_clks 186 0>,
+ <&k3_clks 186 2>;
+ clock-names = "fck", "vp1", "vp2";
+ interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+
+ dss_ports: ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
};
diff --git a/arch/arm64/boot/dts/ti/k3-am62a-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am62a-mcu.dtsi
index a6d16a94088c..8c36e56f4138 100644
--- a/arch/arm64/boot/dts/ti/k3-am62a-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62a-mcu.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM625 SoC Family MCU Domain peripherals
*
- * Copyright (C) 2020-2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_mcu {
diff --git a/arch/arm64/boot/dts/ti/k3-am62a-thermal.dtsi b/arch/arm64/boot/dts/ti/k3-am62a-thermal.dtsi
index 85ce545633ea..c7486fb2a5b4 100644
--- a/arch/arm64/boot/dts/ti/k3-am62a-thermal.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62a-thermal.dtsi
@@ -1,4 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
#include <dt-bindings/thermal/thermal.h>
diff --git a/arch/arm64/boot/dts/ti/k3-am62a-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am62a-wakeup.dtsi
index 4e8279fa01e1..f7bec484705a 100644
--- a/arch/arm64/boot/dts/ti/k3-am62a-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62a-wakeup.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM62A SoC Family Wakeup Domain peripherals
*
- * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_wakeup {
diff --git a/arch/arm64/boot/dts/ti/k3-am62a.dtsi b/arch/arm64/boot/dts/ti/k3-am62a.dtsi
index 61a210ecd5ff..b1b884600293 100644
--- a/arch/arm64/boot/dts/ti/k3-am62a.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62a.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM62A SoC Family
*
- * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/gpio/gpio.h>
diff --git a/arch/arm64/boot/dts/ti/k3-am62a7-sk.dts b/arch/arm64/boot/dts/ti/k3-am62a7-sk.dts
index 7b7142586295..f241637a5642 100644
--- a/arch/arm64/boot/dts/ti/k3-am62a7-sk.dts
+++ b/arch/arm64/boot/dts/ti/k3-am62a7-sk.dts
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* AM62A SK: https://www.ti.com/lit/zip/sprr459
*
- * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
@@ -20,6 +20,7 @@
serial0 = &wkup_uart0;
serial2 = &main_uart0;
serial3 = &main_uart1;
+ mmc0 = &sdhci0;
mmc1 = &sdhci1;
};
@@ -132,6 +133,18 @@
clock-frequency = <12288000>;
};
+ hdmi0: connector-hdmi {
+ compatible = "hdmi-connector";
+ label = "hdmi";
+ type = "a";
+
+ port {
+ hdmi_connector_in: endpoint {
+ remote-endpoint = <&sii9022_out>;
+ };
+ };
+ };
+
codec_audio: sound {
compatible = "simple-audio-card";
simple-audio-card,name = "AM62Ax-SKEVM";
@@ -181,6 +194,39 @@
};
&main_pmx0 {
+ main_dss0_pins_default: main-dss0-default-pins {
+ pinctrl-single,pins = <
+ AM62AX_IOPAD(0x100, PIN_OUTPUT, 0) /* (V17) VOUT0_VSYNC */
+ AM62AX_IOPAD(0x0f8, PIN_OUTPUT, 0) /* (T18) VOUT0_HSYNC */
+ AM62AX_IOPAD(0x104, PIN_OUTPUT, 0) /* (AA22) VOUT0_PCLK */
+ AM62AX_IOPAD(0x0fc, PIN_OUTPUT, 0) /* (U17) VOUT0_DE */
+ AM62AX_IOPAD(0x0b8, PIN_OUTPUT, 0) /* (U22) VOUT0_DATA0 */
+ AM62AX_IOPAD(0x0bc, PIN_OUTPUT, 0) /* (U21) VOUT0_DATA1 */
+ AM62AX_IOPAD(0x0c0, PIN_OUTPUT, 0) /* (U20) VOUT0_DATA2 */
+ AM62AX_IOPAD(0x0c4, PIN_OUTPUT, 0) /* (U19) VOUT0_DATA3 */
+ AM62AX_IOPAD(0x0c8, PIN_OUTPUT, 0) /* (T19) VOUT0_DATA4 */
+ AM62AX_IOPAD(0x0cc, PIN_OUTPUT, 0) /* (U18) VOUT0_DATA5 */
+ AM62AX_IOPAD(0x0d0, PIN_OUTPUT, 0) /* (V22) VOUT0_DATA6 */
+ AM62AX_IOPAD(0x0d4, PIN_OUTPUT, 0) /* (V21) VOUT0_DATA7 */
+ AM62AX_IOPAD(0x0d8, PIN_OUTPUT, 0) /* (V19) VOUT0_DATA8 */
+ AM62AX_IOPAD(0x0dc, PIN_OUTPUT, 0) /* (V18) VOUT0_DATA9 */
+ AM62AX_IOPAD(0x0e0, PIN_OUTPUT, 0) /* (W22) VOUT0_DATA10 */
+ AM62AX_IOPAD(0x0e4, PIN_OUTPUT, 0) /* (W21) VOUT0_DATA11 */
+ AM62AX_IOPAD(0x0e8, PIN_OUTPUT, 0) /* (W20) VOUT0_DATA12 */
+ AM62AX_IOPAD(0x0ec, PIN_OUTPUT, 0) /* (W19) VOUT0_DATA13 */
+ AM62AX_IOPAD(0x0f0, PIN_OUTPUT, 0) /* (Y21) VOUT0_DATA14 */
+ AM62AX_IOPAD(0x0f4, PIN_OUTPUT, 0) /* (Y22) VOUT0_DATA15 */
+ AM62AX_IOPAD(0x05c, PIN_OUTPUT, 1) /* (P22) GPMC0_AD8.VOUT0_DATA16 */
+ AM62AX_IOPAD(0x060, PIN_OUTPUT, 1) /* (R19) GPMC0_AD9.VOUT0_DATA17 */
+ AM62AX_IOPAD(0x064, PIN_OUTPUT, 1) /* (R20) GPMC0_AD10.VOUT0_DATA18 */
+ AM62AX_IOPAD(0x068, PIN_OUTPUT, 1) /* (R22) GPMC0_AD11.VOUT0_DATA19 */
+ AM62AX_IOPAD(0x06c, PIN_OUTPUT, 1) /* (T22) GPMC0_AD12.VOUT0_DATA20 */
+ AM62AX_IOPAD(0x070, PIN_OUTPUT, 1) /* (R21) GPMC0_AD13.VOUT0_DATA21 */
+ AM62AX_IOPAD(0x074, PIN_OUTPUT, 1) /* (T20) GPMC0_AD14.VOUT0_DATA22 */
+ AM62AX_IOPAD(0x078, PIN_OUTPUT, 1) /* (T21) GPMC0_AD15.VOUT0_DATA23 */
+ >;
+ };
+
main_uart0_pins_default: main-uart0-default-pins {
pinctrl-single,pins = <
AM62AX_IOPAD(0x1c8, PIN_INPUT, 0) /* (E14) UART0_RXD */
@@ -218,6 +264,22 @@
>;
};
+ main_mmc0_pins_default: main-mmc0-default-pins {
+ pinctrl-single,pins = <
+ AM62AX_IOPAD(0x220, PIN_INPUT, 0) /* (Y3) MMC0_CMD */
+ AM62AX_IOPAD(0x218, PIN_INPUT, 0) /* (AB1) MMC0_CLKLB */
+ AM62AX_IOPAD(0x21c, PIN_INPUT, 0) /* (AB1) MMC0_CLK */
+ AM62AX_IOPAD(0x214, PIN_INPUT, 0) /* (AA2) MMC0_DAT0 */
+ AM62AX_IOPAD(0x210, PIN_INPUT_PULLUP, 0) /* (AA1) MMC0_DAT1 */
+ AM62AX_IOPAD(0x20c, PIN_INPUT_PULLUP, 0) /* (AA3) MMC0_DAT2 */
+ AM62AX_IOPAD(0x208, PIN_INPUT_PULLUP, 0) /* (Y4) MMC0_DAT3 */
+ AM62AX_IOPAD(0x204, PIN_INPUT_PULLUP, 0) /* (AB2) MMC0_DAT4 */
+ AM62AX_IOPAD(0x200, PIN_INPUT_PULLUP, 0) /* (AC1) MMC0_DAT5 */
+ AM62AX_IOPAD(0x1fc, PIN_INPUT_PULLUP, 0) /* (AD2) MMC0_DAT6 */
+ AM62AX_IOPAD(0x1f8, PIN_INPUT_PULLUP, 0) /* (AC2) MMC0_DAT7 */
+ >;
+ };
+
main_mmc1_pins_default: main-mmc1-default-pins {
pinctrl-single,pins = <
AM62AX_IOPAD(0x23c, PIN_INPUT, 0) /* (A21) MMC1_CMD */
@@ -466,6 +528,36 @@
"CSI_EN", "AUTO_100M_1000M_CONFIG",
"CSI_VLDO_SEL", "SoC_WLAN_SDIO_RST";
};
+
+ sii9022: bridge-hdmi@3b {
+ compatible = "sil,sii9022";
+ reg = <0x3b>;
+ interrupt-parent = <&exp1>;
+ interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
+ #sound-dai-cells = <0>;
+ sil,i2s-data-lanes = < 0 >;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ sii9022_in: endpoint {
+ remote-endpoint = <&dpi1_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ sii9022_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+ };
+ };
};
&main_i2c2 {
@@ -475,13 +567,21 @@
clock-frequency = <400000>;
};
+&sdhci0 {
+ /* eMMC */
+ status = "okay";
+ non-removable;
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_mmc0_pins_default>;
+ disable-wp;
+};
+
&sdhci1 {
/* SD/MMC */
status = "okay";
vmmc-supply = <&vdd_mmc1>;
pinctrl-names = "default";
pinctrl-0 = <&main_mmc1_pins_default>;
- ti,driver-strength-ohm = <50>;
disable-wp;
};
@@ -583,3 +683,20 @@
tx-num-evt = <32>;
rx-num-evt = <32>;
};
+
+&dss {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_dss0_pins_default>;
+};
+
+&dss_ports {
+ /* VP2: DPI Output */
+ port@1 {
+ reg = <1>;
+
+ dpi1_out: endpoint {
+ remote-endpoint = <&sii9022_in>;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am62a7.dtsi b/arch/arm64/boot/dts/ti/k3-am62a7.dtsi
index 58f1c43edcf8..f86a23404e6d 100644
--- a/arch/arm64/boot/dts/ti/k3-am62a7.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62a7.dtsi
@@ -1,10 +1,10 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM62A7 SoC family in Quad core configuration
*
* TRM: https://www.ti.com/lit/zip/spruj16
*
- * Copyright (C) 2020-2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi
index 4c51bae06b57..7337a9e13535 100644
--- a/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree file for the AM62P main domain peripherals
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_main {
@@ -158,6 +158,43 @@
};
};
+ dmss_csi: bus@4e000000 {
+ compatible = "simple-bus";
+ ranges = <0x00 0x4e000000 0x00 0x4e000000 0x00 0x408000>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dma-ranges;
+ ti,sci-dev-id = <198>;
+
+ inta_main_dmss_csi: interrupt-controller@4e400000 {
+ compatible = "ti,sci-inta";
+ reg = <0x00 0x4e400000 0x00 0x8000>;
+ #interrupt-cells = <0>;
+ interrupt-controller;
+ interrupt-parent = <&gic500>;
+ msi-controller;
+ power-domains = <&k3_pds 182 TI_SCI_PD_EXCLUSIVE>;
+ ti,sci = <&dmsc>;
+ ti,sci-dev-id = <200>;
+ ti,interrupt-ranges = <0 237 8>;
+ ti,unmapped-event-sources = <&main_bcdma_csi>;
+ };
+
+ main_bcdma_csi: dma-controller@4e230000 {
+ compatible = "ti,am62a-dmss-bcdma-csirx";
+ reg = <0x00 0x4e230000 0x00 0x100>,
+ <0x00 0x4e180000 0x00 0x8000>,
+ <0x00 0x4e100000 0x00 0x10000>;
+ reg-names = "gcfg", "rchanrt", "ringrt";
+ #dma-cells = <3>;
+ msi-parent = <&inta_main_dmss_csi>;
+ power-domains = <&k3_pds 182 TI_SCI_PD_EXCLUSIVE>;
+ ti,sci = <&dmsc>;
+ ti,sci-dev-id = <199>;
+ ti,sci-rm-range-rchan = <0x21>;
+ };
+ };
+
dmsc: system-controller@44043000 {
compatible = "ti,k2g-sci";
ti,host-id = <12>;
@@ -534,7 +571,21 @@
clock-names = "clk_ahb", "clk_xin";
assigned-clocks = <&k3_clks 57 2>;
assigned-clock-parents = <&k3_clks 57 4>;
- ti,otap-del-sel-legacy = <0x0>;
+ bus-width = <8>;
+ mmc-ddr-1_8v;
+ mmc-hs200-1_8v;
+ mmc-hs400-1_8v;
+ ti,clkbuf-sel = <0x7>;
+ ti,strobe-sel = <0x77>;
+ ti,trm-icp = <0x8>;
+ ti,otap-del-sel-legacy = <0x1>;
+ ti,otap-del-sel-mmc-hs = <0x1>;
+ ti,otap-del-sel-ddr52 = <0x6>;
+ ti,otap-del-sel-hs200 = <0x8>;
+ ti,otap-del-sel-hs400 = <0x5>;
+ ti,itap-del-sel-legacy = <0x10>;
+ ti,itap-del-sel-mmc-hs = <0xa>;
+ ti,itap-del-sel-ddr52 = <0x3>;
status = "disabled";
};
@@ -545,7 +596,19 @@
power-domains = <&k3_pds 58 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 58 5>, <&k3_clks 58 6>;
clock-names = "clk_ahb", "clk_xin";
- ti,otap-del-sel-legacy = <0x8>;
+ bus-width = <4>;
+ ti,clkbuf-sel = <0x7>;
+ ti,otap-del-sel-legacy = <0x0>;
+ ti,otap-del-sel-sd-hs = <0x0>;
+ ti,otap-del-sel-sdr12 = <0xf>;
+ ti,otap-del-sel-sdr25 = <0xf>;
+ ti,otap-del-sel-sdr50 = <0xc>;
+ ti,otap-del-sel-ddr50 = <0x9>;
+ ti,otap-del-sel-sdr104 = <0x6>;
+ ti,itap-del-sel-legacy = <0x0>;
+ ti,itap-del-sel-sd-hs = <0x0>;
+ ti,itap-del-sel-sdr12 = <0x0>;
+ ti,itap-del-sel-sdr25 = <0x0>;
status = "disabled";
};
@@ -556,7 +619,19 @@
power-domains = <&k3_pds 184 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 184 5>, <&k3_clks 184 6>;
clock-names = "clk_ahb", "clk_xin";
- ti,otap-del-sel-legacy = <0x8>;
+ bus-width = <4>;
+ ti,clkbuf-sel = <0x7>;
+ ti,otap-del-sel-legacy = <0x0>;
+ ti,otap-del-sel-sd-hs = <0x0>;
+ ti,otap-del-sel-sdr12 = <0xf>;
+ ti,otap-del-sel-sdr25 = <0xf>;
+ ti,otap-del-sel-sdr50 = <0xc>;
+ ti,otap-del-sel-ddr50 = <0x9>;
+ ti,otap-del-sel-sdr104 = <0x6>;
+ ti,itap-del-sel-legacy = <0x0>;
+ ti,itap-del-sel-sd-hs = <0x0>;
+ ti,itap-del-sel-sdr12 = <0x0>;
+ ti,itap-del-sel-sdr25 = <0x0>;
status = "disabled";
};
@@ -891,4 +966,73 @@
power-domains = <&k3_pds 192 TI_SCI_PD_EXCLUSIVE>;
status = "disabled";
};
+
+ ti_csi2rx0: ticsi2rx@30102000 {
+ compatible = "ti,j721e-csi2rx-shim";
+ reg = <0x00 0x30102000 0x00 0x1000>;
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dmas = <&main_bcdma_csi 0 0x5000 0>;
+ dma-names = "rx0";
+ power-domains = <&k3_pds 182 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+
+ cdns_csi2rx0: csi-bridge@30101000 {
+ compatible = "ti,j721e-csi2rx", "cdns,csi2rx";
+ reg = <0x00 0x30101000 0x00 0x1000>;
+ clocks = <&k3_clks 182 0>, <&k3_clks 182 3>, <&k3_clks 182 0>,
+ <&k3_clks 182 0>, <&k3_clks 182 4>, <&k3_clks 182 4>;
+ clock-names = "sys_clk", "p_clk", "pixel_if0_clk",
+ "pixel_if1_clk", "pixel_if2_clk", "pixel_if3_clk";
+ phys = <&dphy0>;
+ phy-names = "dphy";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ csi0_port0: port@0 {
+ reg = <0>;
+ status = "disabled";
+ };
+
+ csi0_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ csi0_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ csi0_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ csi0_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+ };
+ };
+ };
+
+ dphy0: phy@30110000 {
+ compatible = "cdns,dphy-rx";
+ reg = <0x00 0x30110000 0x00 0x1100>;
+ #phy-cells = <0>;
+ power-domains = <&k3_pds 185 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+ };
+
+ vpu: video-codec@30210000 {
+ compatible = "ti,j721s2-wave521c", "cnm,wave521c";
+ reg = <0x00 0x30210000 0x00 0x10000>;
+ interrupts = <GIC_SPI 225 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&k3_clks 204 2>;
+ power-domains = <&k3_pds 204 TI_SCI_PD_EXCLUSIVE>;
+ };
};
diff --git a/arch/arm64/boot/dts/ti/k3-am62p-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-mcu.dtsi
index c4b0b91d70cf..b973b550eb9d 100644
--- a/arch/arm64/boot/dts/ti/k3-am62p-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62p-mcu.dtsi
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree file for the AM62P MCU domain peripherals
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_mcu {
@@ -187,6 +187,8 @@
ranges = <0x79000000 0x00 0x79000000 0x8000>,
<0x79020000 0x00 0x79020000 0x8000>;
power-domains = <&k3_pds 7 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+
mcu_r5fss0_core0: r5f@79000000 {
compatible = "ti,am62-r5f";
reg = <0x79000000 0x00008000>,
diff --git a/arch/arm64/boot/dts/ti/k3-am62p-thermal.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-thermal.dtsi
index 85ce545633ea..c7486fb2a5b4 100644
--- a/arch/arm64/boot/dts/ti/k3-am62p-thermal.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62p-thermal.dtsi
@@ -1,4 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
#include <dt-bindings/thermal/thermal.h>
diff --git a/arch/arm64/boot/dts/ti/k3-am62p-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-wakeup.dtsi
index 19f42b39394e..a84756c336d0 100644
--- a/arch/arm64/boot/dts/ti/k3-am62p-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62p-wakeup.dtsi
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree file for the AM62P wakeup domain peripherals
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_wakeup {
@@ -78,6 +78,7 @@
ranges = <0x78000000 0x00 0x78000000 0x8000>,
<0x78100000 0x00 0x78100000 0x8000>;
power-domains = <&k3_pds 119 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
wkup_r5fss0_core0: r5f@78000000 {
compatible = "ti,am62-r5f";
diff --git a/arch/arm64/boot/dts/ti/k3-am62p.dtsi b/arch/arm64/boot/dts/ti/k3-am62p.dtsi
index 84ffe7b9dcaf..94babc412575 100644
--- a/arch/arm64/boot/dts/ti/k3-am62p.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62p.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM62P SoC Family
*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/gpio/gpio.h>
@@ -71,7 +71,7 @@
<0x00 0x43600000 0x00 0x43600000 0x00 0x00010000>, /* SA3 sproxy data */
<0x00 0x44043000 0x00 0x44043000 0x00 0x00000fe0>, /* TI SCI DEBUG */
<0x00 0x44860000 0x00 0x44860000 0x00 0x00040000>, /* SA3 sproxy config */
- <0x00 0x48000000 0x00 0x48000000 0x00 0x06400000>, /* DMSS */
+ <0x00 0x48000000 0x00 0x48000000 0x00 0x06408000>, /* DMSS */
<0x00 0x60000000 0x00 0x60000000 0x00 0x08000000>, /* FSS0 DAT1 */
<0x00 0x70000000 0x00 0x70000000 0x00 0x00010000>, /* OCSRAM */
<0x01 0x00000000 0x01 0x00000000 0x00 0x00310000>, /* A53 PERIPHBASE */
diff --git a/arch/arm64/boot/dts/ti/k3-am62p5-sk.dts b/arch/arm64/boot/dts/ti/k3-am62p5-sk.dts
index 1773c05f752c..e86f34e835c1 100644
--- a/arch/arm64/boot/dts/ti/k3-am62p5-sk.dts
+++ b/arch/arm64/boot/dts/ti/k3-am62p5-sk.dts
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree file for the AM62P5-SK
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*
* Schematics: https://www.ti.com/lit/zip/sprr487
*/
@@ -413,6 +413,7 @@
status = "okay";
ti,driver-strength-ohm = <50>;
disable-wp;
+ bootph-all;
};
&sdhci1 {
@@ -422,9 +423,7 @@
vqmmc-supply = <&vddshv_sdio>;
pinctrl-names = "default";
pinctrl-0 = <&main_mmc1_pins_default>;
- ti,driver-strength-ohm = <50>;
disable-wp;
- no-1-8-v;
bootph-all;
};
@@ -445,6 +444,10 @@
};
&cpsw3g_mdio {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_mdio1_pins_default>;
+ status = "okay";
+
cpsw3g_phy0: ethernet-phy@0 {
reg = <0>;
ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>;
diff --git a/arch/arm64/boot/dts/ti/k3-am62p5.dtsi b/arch/arm64/boot/dts/ti/k3-am62p5.dtsi
index 50147bb63e03..41f479dca455 100644
--- a/arch/arm64/boot/dts/ti/k3-am62p5.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62p5.dtsi
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree file for the AM62P5 SoC family (quad core)
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*
* TRM: https://www.ti.com/lit/pdf/spruj83
*/
diff --git a/arch/arm64/boot/dts/ti/k3-am62x-phyboard-lyra-gpio-fan.dtso b/arch/arm64/boot/dts/ti/k3-am62x-phyboard-lyra-gpio-fan.dtso
new file mode 100644
index 000000000000..f0b2fd4165a7
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am62x-phyboard-lyra-gpio-fan.dtso
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2024 PHYTEC America LLC
+ * Author: Garrett Giordano <ggiordano@phytec.com>
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/thermal/thermal.h>
+#include "k3-pinctrl.h"
+
+&{/} {
+ fan: gpio-fan {
+ compatible = "gpio-fan";
+ gpio-fan,speed-map = <0 0 8600 1>;
+ gpios = <&main_gpio0 40 GPIO_ACTIVE_LOW>;
+ #cooling-cells = <2>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&gpio_fan_pins_default>;
+ };
+};
+
+&main_pmx0 {
+ gpio_fan_pins_default: gpio-fan-default-pins {
+ pinctrl-single,pins = <
+ AM62X_IOPAD(0x0a4, PIN_OUTPUT, 7) /* (M22) GPMC0_DIR.GPIO0_40 */
+ >;
+ };
+};
+
+&thermal_zones {
+ main0_thermal: main0-thermal {
+ trips {
+ main0_thermal_trip0: main0-thermal-trip {
+ temperature = <65000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "active";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&main0_thermal_trip0>;
+ cooling-device = <&fan THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-common.dtsi b/arch/arm64/boot/dts/ti/k3-am62x-sk-common.dtsi
index 33768c02d8eb..3c45782ab2b7 100644
--- a/arch/arm64/boot/dts/ti/k3-am62x-sk-common.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-common.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Common dtsi for AM62x SK and derivatives
*
- * Copyright (C) 2021-2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2021-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/leds/common.h>
@@ -411,7 +411,6 @@
status = "okay";
pinctrl-names = "default";
pinctrl-0 = <&main_mmc0_pins_default>;
- ti,driver-strength-ohm = <50>;
disable-wp;
};
@@ -421,7 +420,6 @@
status = "okay";
pinctrl-names = "default";
pinctrl-0 = <&main_mmc1_pins_default>;
- ti,driver-strength-ohm = <50>;
disable-wp;
};
@@ -460,6 +458,7 @@
};
&usbss0 {
+ bootph-all;
status = "okay";
ti,vbus-divider;
};
@@ -470,6 +469,7 @@
};
&usb0 {
+ bootph-all;
#address-cells = <1>;
#size-cells = <0>;
usb-role-switch;
diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso
index 6f4cd73c2f43..76ca02127f95 100644
--- a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso
+++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* IMX219 (RPi v2) Camera Module
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso
index 9323a4b38389..ccc7f5e43184 100644
--- a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso
+++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* ALINX AN5641 & Digilent PCam 5C - OV5640 camera module
- * Copyright (C) 2022-2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso
index dcaa33a4c8d3..4eaf9d757dd0 100644
--- a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso
+++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Technexion TEVI-OV5640-*-RPI - OV5640 camera module
- * Copyright (C) 2022-2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-hdmi-audio.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-hdmi-audio.dtso
index 43a0ddc123e5..18c3082f68e6 100644
--- a/arch/arm64/boot/dts/ti/k3-am62x-sk-hdmi-audio.dtso
+++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-hdmi-audio.dtso
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/**
* Audio playback via HDMI for AM625-SK and AM62-LP SK.
*
@@ -6,7 +6,7 @@
* AM625 SK: https://www.ti.com/tool/SK-AM62
* AM62-LP SK: https://www.ti.com/tool/SK-AM62-LP
*
- * Copyright (C) 2023 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
index e348114f42e0..6f9aa5e02138 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM642 SoC Family Main Domain peripherals
*
- * Copyright (C) 2020-2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/phy/phy-cadence.h>
@@ -51,10 +51,11 @@
reg = <0x00000014 0x4>;
};
- serdes_ln_ctrl: mux-controller {
- compatible = "mmio-mux";
+ serdes_ln_ctrl: mux-controller@4080 {
+ compatible = "reg-mux";
+ reg = <0x4080 0x4>;
#mux-control-cells = <1>;
- mux-reg-masks = <0x4080 0x3>; /* SERDES0 lane0 select */
+ mux-reg-masks = <0x0 0x3>; /* SERDES0 lane0 select */
};
phy_gmii_sel: phy@4044 {
@@ -626,13 +627,18 @@
power-domains = <&k3_pds 57 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 57 0>, <&k3_clks 57 1>;
clock-names = "clk_ahb", "clk_xin";
+ bus-width = <8>;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
+ ti,clkbuf-sel = <0x7>;
ti,trm-icp = <0x2>;
ti,otap-del-sel-legacy = <0x0>;
ti,otap-del-sel-mmc-hs = <0x0>;
ti,otap-del-sel-ddr52 = <0x6>;
ti,otap-del-sel-hs200 = <0x7>;
+ ti,itap-del-sel-legacy = <0x10>;
+ ti,itap-del-sel-mmc-hs = <0xa>;
+ ti,itap-del-sel-ddr52 = <0x3>;
status = "disabled";
};
@@ -643,15 +649,19 @@
power-domains = <&k3_pds 58 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 58 3>, <&k3_clks 58 4>;
clock-names = "clk_ahb", "clk_xin";
- ti,trm-icp = <0x2>;
+ bus-width = <4>;
+ ti,clkbuf-sel = <0x7>;
ti,otap-del-sel-legacy = <0x0>;
- ti,otap-del-sel-sd-hs = <0xf>;
+ ti,otap-del-sel-sd-hs = <0x0>;
ti,otap-del-sel-sdr12 = <0xf>;
ti,otap-del-sel-sdr25 = <0xf>;
ti,otap-del-sel-sdr50 = <0xc>;
ti,otap-del-sel-sdr104 = <0x6>;
ti,otap-del-sel-ddr50 = <0x9>;
- ti,clkbuf-sel = <0x7>;
+ ti,itap-del-sel-legacy = <0x0>;
+ ti,itap-del-sel-sd-hs = <0x0>;
+ ti,itap-del-sel-sdr12 = <0x0>;
+ ti,itap-del-sel-sdr25 = <0x0>;
status = "disabled";
};
@@ -1041,25 +1051,6 @@
status = "disabled";
};
- pcie0_ep: pcie-ep@f102000 {
- compatible = "ti,am64-pcie-ep", "ti,j721e-pcie-ep";
- reg = <0x00 0x0f102000 0x00 0x1000>,
- <0x00 0x0f100000 0x00 0x400>,
- <0x00 0x0d000000 0x00 0x00800000>,
- <0x00 0x68000000 0x00 0x08000000>;
- reg-names = "intd_cfg", "user_cfg", "reg", "mem";
- interrupt-names = "link_state";
- interrupts = <GIC_SPI 203 IRQ_TYPE_EDGE_RISING>;
- ti,syscon-pcie-ctrl = <&main_conf 0x4070>;
- max-link-speed = <2>;
- num-lanes = <1>;
- power-domains = <&k3_pds 114 TI_SCI_PD_EXCLUSIVE>;
- clocks = <&k3_clks 114 0>;
- clock-names = "fck";
- max-functions = /bits/ 8 <1>;
- status = "disabled";
- };
-
epwm0: pwm@23000000 {
compatible = "ti,am64-epwm", "ti,am3352-ehrpwm";
#pwm-cells = <3>;
@@ -1244,6 +1235,18 @@
};
};
+ icssg0_iep0: iep@2e000 {
+ compatible = "ti,am654-icss-iep";
+ reg = <0x2e000 0x1000>;
+ clocks = <&icssg0_iepclk_mux>;
+ };
+
+ icssg0_iep1: iep@2f000 {
+ compatible = "ti,am654-icss-iep";
+ reg = <0x2f000 0x1000>;
+ clocks = <&icssg0_iepclk_mux>;
+ };
+
icssg0_mii_rt: mii-rt@32000 {
compatible = "ti,pruss-mii", "syscon";
reg = <0x32000 0x100>;
@@ -1385,6 +1388,18 @@
};
};
+ icssg1_iep0: iep@2e000 {
+ compatible = "ti,am654-icss-iep";
+ reg = <0x2e000 0x1000>;
+ clocks = <&icssg1_iepclk_mux>;
+ };
+
+ icssg1_iep1: iep@2f000 {
+ compatible = "ti,am654-icss-iep";
+ reg = <0x2f000 0x1000>;
+ clocks = <&icssg1_iepclk_mux>;
+ };
+
icssg1_mii_rt: mii-rt@32000 {
compatible = "ti,pruss-mii", "syscon";
reg = <0x32000 0x100>;
diff --git a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
index b9508072bebb..ec17285869da 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM64 SoC Family MCU Domain peripherals
*
- * Copyright (C) 2020-2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_mcu {
diff --git a/arch/arm64/boot/dts/ti/k3-am64-phycore-som.dtsi b/arch/arm64/boot/dts/ti/k3-am64-phycore-som.dtsi
index 1678e74cb750..125e507966fb 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-phycore-som.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-phycore-som.dtsi
@@ -1,9 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2021 PHYTEC America, LLC - https://www.phytec.com
+ * Copyright (C) 2021-2024 PHYTEC America, LLC - https://www.phytec.com
* Author: Matt McKee <mmckee@phytec.com>
*
- * Copyright (C) 2022 PHYTEC Messtechnik GmbH
+ * Copyright (C) 2022-2024 PHYTEC Messtechnik GmbH
* Author: Wadim Egorov <w.egorov@phytec.de>
*
* Product homepage:
@@ -318,3 +318,10 @@
disable-wp;
keep-power-in-suspend;
};
+
+&tscadc0 {
+ status = "okay";
+ adc {
+ ti,adc-channels = <0 1 2 3 4 5 6 7>;
+ };
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am64-thermal.dtsi b/arch/arm64/boot/dts/ti/k3-am64-thermal.dtsi
index 036db56ba797..b1cd5542428c 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-thermal.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-thermal.dtsi
@@ -1,4 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
#include <dt-bindings/thermal/thermal.h>
diff --git a/arch/arm64/boot/dts/ti/k3-am64.dtsi b/arch/arm64/boot/dts/ti/k3-am64.dtsi
index 0187c42aed4f..74e56cc68d46 100644
--- a/arch/arm64/boot/dts/ti/k3-am64.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM642 SoC Family
*
- * Copyright (C) 2020-2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/gpio/gpio.h>
diff --git a/arch/arm64/boot/dts/ti/k3-am642-evm-icssg1-dualemac.dtso b/arch/arm64/boot/dts/ti/k3-am642-evm-icssg1-dualemac.dtso
new file mode 100644
index 000000000000..af2fd3e7448b
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am642-evm-icssg1-dualemac.dtso
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/**
+ * DT overlay for enabling 2nd ICSSG1 port on AM642 EVM
+ *
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include "k3-pinctrl.h"
+
+&{/} {
+ aliases {
+ ethernet1 = "/icssg1-eth/ethernet-ports/port@1";
+ };
+
+ mdio-mux-2 {
+ compatible = "mdio-mux-multiplexer";
+ mux-controls = <&mdio_mux>;
+ mdio-parent-bus = <&icssg1_mdio>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ mdio@0 {
+ reg = <0x0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ icssg1_phy2: ethernet-phy@3 {
+ reg = <3>;
+ tx-internal-delay-ps = <250>;
+ rx-internal-delay-ps = <2000>;
+ };
+ };
+ };
+};
+
+&main_pmx0 {
+ icssg1_rgmii2_pins_default: icssg1-rgmii2-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x0108, PIN_INPUT, 2) /* (W11) PRG1_PRU1_GPO0.RGMII2_RD0 */
+ AM64X_IOPAD(0x010c, PIN_INPUT, 2) /* (V11) PRG1_PRU1_GPO1.RGMII2_RD1 */
+ AM64X_IOPAD(0x0110, PIN_INPUT, 2) /* (AA12) PRG1_PRU1_GPO2.RGMII2_RD2 */
+ AM64X_IOPAD(0x0114, PIN_INPUT, 2) /* (Y12) PRG1_PRU1_GPO3.RGMII2_RD3 */
+ AM64X_IOPAD(0x0120, PIN_INPUT, 2) /* (U11) PRG1_PRU1_GPO6.RGMII2_RXC */
+ AM64X_IOPAD(0x0118, PIN_INPUT, 2) /* (W12) PRG1_PRU1_GPO4.RGMII2_RX_CTL */
+ AM64X_IOPAD(0x0134, PIN_OUTPUT, 2) /* (AA10) PRG1_PRU1_GPO11.RGMII2_TD0 */
+ AM64X_IOPAD(0x0138, PIN_OUTPUT, 2) /* (V10) PRG1_PRU1_GPO12.RGMII2_TD1 */
+ AM64X_IOPAD(0x013c, PIN_OUTPUT, 2) /* (U10) PRG1_PRU1_GPO13.RGMII2_TD2 */
+ AM64X_IOPAD(0x0140, PIN_OUTPUT, 2) /* (AA11) PRG1_PRU1_GPO14.RGMII2_TD3 */
+ AM64X_IOPAD(0x0148, PIN_OUTPUT, 2) /* (Y10) PRG1_PRU1_GPO16.RGMII2_TXC */
+ AM64X_IOPAD(0x0144, PIN_OUTPUT, 2) /* (Y11) PRG1_PRU1_GPO15.RGMII2_TX_CTL */
+ >;
+ };
+};
+
+&cpsw3g {
+ pinctrl-0 = <&rgmii1_pins_default>;
+};
+
+&cpsw_port2 {
+ status = "disabled";
+};
+
+&mdio_mux_1 {
+ status = "disabled";
+};
+
+&icssg1_eth {
+ pinctrl-0 = <&icssg1_rgmii1_pins_default>, <&icssg1_rgmii2_pins_default>;
+};
+
+&icssg1_emac1 {
+ status = "okay";
+ phy-handle = <&icssg1_phy2>;
+ phy-mode = "rgmii-id";
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am642-evm.dts b/arch/arm64/boot/dts/ti/k3-am642-evm.dts
index 8c5651d2cf5d..53fe1d065ddb 100644
--- a/arch/arm64/boot/dts/ti/k3-am642-evm.dts
+++ b/arch/arm64/boot/dts/ti/k3-am642-evm.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2020-2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
@@ -32,6 +32,7 @@
mmc1 = &sdhci1;
ethernet0 = &cpsw_port1;
ethernet1 = &cpsw_port2;
+ ethernet2 = &icssg1_emac0;
};
memory@80000000 {
@@ -198,7 +199,7 @@
mux-gpios = <&exp1 12 GPIO_ACTIVE_HIGH>;
};
- mdio-mux-1 {
+ mdio_mux_1: mdio-mux-1 {
compatible = "mdio-mux-multiplexer";
mux-controls = <&mdio_mux>;
mdio-parent-bus = <&cpsw3g_mdio>;
@@ -229,6 +230,64 @@
max-bitrate = <5000000>;
standby-gpios = <&exp1 9 GPIO_ACTIVE_HIGH>;
};
+
+ icssg1_eth: icssg1-eth {
+ compatible = "ti,am642-icssg-prueth";
+ pinctrl-names = "default";
+ pinctrl-0 = <&icssg1_rgmii1_pins_default>;
+ sram = <&oc_sram>;
+ ti,prus = <&pru1_0>, <&rtu1_0>, <&tx_pru1_0>, <&pru1_1>, <&rtu1_1>, <&tx_pru1_1>;
+ firmware-name = "ti-pruss/am64x-sr2-pru0-prueth-fw.elf",
+ "ti-pruss/am64x-sr2-rtu0-prueth-fw.elf",
+ "ti-pruss/am64x-sr2-txpru0-prueth-fw.elf",
+ "ti-pruss/am64x-sr2-pru1-prueth-fw.elf",
+ "ti-pruss/am64x-sr2-rtu1-prueth-fw.elf",
+ "ti-pruss/am64x-sr2-txpru1-prueth-fw.elf";
+
+ ti,pruss-gp-mux-sel = <2>, /* MII mode */
+ <2>,
+ <2>,
+ <2>, /* MII mode */
+ <2>,
+ <2>;
+ ti,mii-g-rt = <&icssg1_mii_g_rt>;
+ ti,mii-rt = <&icssg1_mii_rt>;
+ ti,iep = <&icssg1_iep0>, <&icssg1_iep1>;
+ interrupt-parent = <&icssg1_intc>;
+ interrupts = <24 0 2>, <25 1 3>;
+ interrupt-names = "tx_ts0", "tx_ts1";
+ dmas = <&main_pktdma 0xc200 15>, /* egress slice 0 */
+ <&main_pktdma 0xc201 15>, /* egress slice 0 */
+ <&main_pktdma 0xc202 15>, /* egress slice 0 */
+ <&main_pktdma 0xc203 15>, /* egress slice 0 */
+ <&main_pktdma 0xc204 15>, /* egress slice 1 */
+ <&main_pktdma 0xc205 15>, /* egress slice 1 */
+ <&main_pktdma 0xc206 15>, /* egress slice 1 */
+ <&main_pktdma 0xc207 15>, /* egress slice 1 */
+ <&main_pktdma 0x4200 15>, /* ingress slice 0 */
+ <&main_pktdma 0x4201 15>; /* ingress slice 1 */
+ dma-names = "tx0-0", "tx0-1", "tx0-2", "tx0-3",
+ "tx1-0", "tx1-1", "tx1-2", "tx1-3",
+ "rx0", "rx1";
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ icssg1_emac0: port@0 {
+ reg = <0>;
+ phy-handle = <&icssg1_phy1>;
+ phy-mode = "rgmii-id";
+ /* Filled in by bootloader */
+ local-mac-address = [00 00 00 00 00 00];
+ };
+ icssg1_emac1: port@1 {
+ reg = <1>;
+ /* Filled in by bootloader */
+ local-mac-address = [00 00 00 00 00 00];
+ status = "disabled";
+ };
+ };
+ };
};
&main_pmx0 {
@@ -383,6 +442,30 @@
AM64X_IOPAD(0x0030, PIN_OUTPUT_PULLUP, 7) /* (L18) OSPI0_CSN1.GPIO0_12 */
>;
};
+
+ icssg1_mdio1_pins_default: icssg1-mdio1-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x015c, PIN_OUTPUT, 0) /* (Y6) PRG1_MDIO0_MDC */
+ AM64X_IOPAD(0x0158, PIN_INPUT, 0) /* (AA6) PRG1_MDIO0_MDIO */
+ >;
+ };
+
+ icssg1_rgmii1_pins_default: icssg1-rgmii1-default-pins{
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x00b8, PIN_INPUT, 2) /* (Y7) PRG1_PRU0_GPO0.PRG1_RGMII1_RD0 */
+ AM64X_IOPAD(0x00bc, PIN_INPUT, 2) /* (U8) PRG1_PRU0_GPO1.PRG1_RGMII1_RD1 */
+ AM64X_IOPAD(0x00c0, PIN_INPUT, 2) /* (W8) PRG1_PRU0_GPO2.PRG1_RGMII1_RD2 */
+ AM64X_IOPAD(0x00c4, PIN_INPUT, 2) /* (V8) PRG1_PRU0_GPO3.PRG1_RGMII1_RD3 */
+ AM64X_IOPAD(0x00d0, PIN_INPUT, 2) /* (AA7) PRG1_PRU0_GPO6.PRG1_RGMII1_RXC */
+ AM64X_IOPAD(0x00c8, PIN_INPUT, 2) /* (Y8) PRG1_PRU0_GPO4.PRG1_RGMII1_RX_CTL */
+ AM64X_IOPAD(0x00e4, PIN_INPUT, 2) /* (AA8) PRG1_PRU0_GPO11.PRG1_RGMII1_TD0 */
+ AM64X_IOPAD(0x00e8, PIN_INPUT, 2) /* (U9) PRG1_PRU0_GPO12.PRG1_RGMII1_TD1 */
+ AM64X_IOPAD(0x00ec, PIN_INPUT, 2) /* (W9) PRG1_PRU0_GPO13.PRG1_RGMII1_TD2 */
+ AM64X_IOPAD(0x00f0, PIN_INPUT, 2) /* (AA9) PRG1_PRU0_GPO14.PRG1_RGMII1_TD3 */
+ AM64X_IOPAD(0x00f8, PIN_INPUT, 2) /* (V9) PRG1_PRU0_GPO16.PRG1_RGMII1_TXC */
+ AM64X_IOPAD(0x00f4, PIN_INPUT, 2) /* (Y9) PRG1_PRU0_GPO15.PRG1_RGMII1_TX_CTL */
+ >;
+ };
};
&main_uart0 {
@@ -494,10 +577,10 @@
/* eMMC */
&sdhci0 {
status = "okay";
- bus-width = <8>;
non-removable;
ti,driver-strength-ohm = <50>;
disable-wp;
+ bootph-all;
};
/* SD/MMC */
@@ -506,9 +589,7 @@
status = "okay";
vmmc-supply = <&vdd_mmc1>;
pinctrl-names = "default";
- bus-width = <4>;
pinctrl-0 = <&main_mmc1_pins_default>;
- ti,driver-strength-ohm = <50>;
disable-wp;
};
@@ -660,25 +741,25 @@
};
&main_r5fss0_core0 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss0_core0>;
memory-region = <&main_r5fss0_core0_dma_memory_region>,
<&main_r5fss0_core0_memory_region>;
};
&main_r5fss0_core1 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss0_core1>;
memory-region = <&main_r5fss0_core1_dma_memory_region>,
<&main_r5fss0_core1_memory_region>;
};
&main_r5fss1_core0 {
- mboxes = <&mailbox0_cluster4>, <&mbox_main_r5fss1_core0>;
+ mboxes = <&mailbox0_cluster4 &mbox_main_r5fss1_core0>;
memory-region = <&main_r5fss1_core0_dma_memory_region>,
<&main_r5fss1_core0_memory_region>;
};
&main_r5fss1_core1 {
- mboxes = <&mailbox0_cluster4>, <&mbox_main_r5fss1_core1>;
+ mboxes = <&mailbox0_cluster4 &mbox_main_r5fss1_core1>;
memory-region = <&main_r5fss1_core1_dma_memory_region>,
<&main_r5fss1_core1_memory_region>;
};
@@ -705,12 +786,6 @@
num-lanes = <1>;
};
-&pcie0_ep {
- phys = <&serdes0_pcie_link>;
- phy-names = "pcie-phy";
- num-lanes = <1>;
-};
-
&ecap0 {
status = "okay";
/* PWM is available on Pin 1 of header J12 */
@@ -731,3 +806,15 @@
pinctrl-0 = <&main_mcan1_pins_default>;
phys = <&transceiver2>;
};
+
+&icssg1_mdio {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&icssg1_mdio1_pins_default>;
+
+ icssg1_phy1: ethernet-phy@f {
+ reg = <0xf>;
+ tx-internal-delay-ps = <250>;
+ rx-internal-delay-ps = <2000>;
+ };
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am642-hummingboard-t-pcie.dtso b/arch/arm64/boot/dts/ti/k3-am642-hummingboard-t-pcie.dtso
new file mode 100644
index 000000000000..bd9a5caf20da
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am642-hummingboard-t-pcie.dtso
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2023 Josua Mayer <josua@solid-run.com>
+ *
+ * Overlay for SolidRun AM642 HummingBoard-T to enable PCI-E.
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/phy/phy.h>
+
+#include "k3-serdes.h"
+
+&pcie0_rc {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie0_default_pins>;
+ reset-gpios = <&main_gpio1 15 GPIO_ACTIVE_HIGH>;
+ phys = <&serdes0_link>;
+ phy-names = "pcie-phy";
+ num-lanes = <1>;
+ status = "okay";
+};
+
+&serdes0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ serdes0_link: phy@0 {
+ reg = <0>;
+ cdns,num-lanes = <1>;
+ cdns,phy-type = <PHY_TYPE_PCIE>;
+ #phy-cells = <0>;
+ resets = <&serdes_wiz0 1>;
+ };
+};
+
+&serdes_ln_ctrl {
+ idle-states = <AM64_SERDES0_LANE0_PCIE0>;
+};
+
+&serdes_mux {
+ idle-state = <1>;
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am642-hummingboard-t-usb3.dtso b/arch/arm64/boot/dts/ti/k3-am642-hummingboard-t-usb3.dtso
new file mode 100644
index 000000000000..ffcc3bd3c7bc
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am642-hummingboard-t-usb3.dtso
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2023 Josua Mayer <josua@solid-run.com>
+ *
+ * Overlay for SolidRun AM642 HummingBoard-T to enable USB-3.1.
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/phy/phy.h>
+
+#include "k3-serdes.h"
+
+&serdes0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ serdes0_link: phy@0 {
+ reg = <0>;
+ cdns,num-lanes = <1>;
+ cdns,phy-type = <PHY_TYPE_USB3>;
+ #phy-cells = <0>;
+ resets = <&serdes_wiz0 1>;
+ };
+};
+
+&serdes_ln_ctrl {
+ idle-states = <AM64_SERDES0_LANE0_USB>;
+};
+
+&serdes_mux {
+ idle-state = <0>;
+};
+
+&usbss0 {
+ /delete-property/ ti,usb2-only;
+};
+
+&usb0 {
+ maximum-speed = "super-speed";
+ phys = <&serdes0_link>;
+ phy-names = "cdns3,usb3-phy";
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am642-hummingboard-t.dts b/arch/arm64/boot/dts/ti/k3-am642-hummingboard-t.dts
new file mode 100644
index 000000000000..234d76e4e944
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am642-hummingboard-t.dts
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2023 Josua Mayer <josua@solid-run.com>
+ *
+ * DTS for SolidRun AM642 HummingBoard-T,
+ * running on Cortex A53.
+ *
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/phy/phy.h>
+
+#include "k3-am642.dtsi"
+#include "k3-am642-sr-som.dtsi"
+
+/ {
+ model = "SolidRun AM642 HummingBoard-T";
+ compatible = "solidrun,am642-hummingboard-t", "solidrun,am642-sr-som", "ti,am642";
+
+ aliases {
+ serial5 = &main_uart3;
+ };
+
+ leds {
+ compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&leds_default_pins>;
+
+ /* D24 */
+ led1: led-1 {
+ label = "led1";
+ gpios = <&main_gpio0 29 GPIO_ACTIVE_HIGH>;
+ color = <LED_COLOR_ID_GREEN>;
+ };
+
+ /* D25 */
+ led2: led-2 {
+ label = "led2";
+ gpios = <&main_gpio0 30 GPIO_ACTIVE_HIGH>;
+ color = <LED_COLOR_ID_GREEN>;
+ };
+
+ /* D26 */
+ led3: led-3 {
+ label = "led3";
+ gpios = <&main_gpio0 33 GPIO_ACTIVE_HIGH>;
+ color = <LED_COLOR_ID_GREEN>;
+ };
+ };
+
+ regulator-m2-3v3 {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&regulator_pcie_3v3_default_pins>;
+ regulator-name = "m2-3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&main_gpio1 17 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ regulator-always-on;
+ };
+
+ regulator-vpp-1v8 {
+ compatible = "regulator-fixed";
+ pinctrl-names = "default";
+ pinctrl-0 = <&regulator_vpp_1v8_default_pins>;
+ regulator-name = "vpp-1v8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ gpio = <&main_gpio1 78 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ };
+
+ serdes_mux: mux-controller {
+ compatible = "gpio-mux";
+ pinctrl-names = "default";
+ pinctrl-0 = <&serdes_mux_default_pins>;
+ #mux-control-cells = <0>;
+ /*
+ * Mux has 2 IOs:
+ * - select: 0 = USB-3 (M2); 1 = PCIE (M1)
+ * - shutdown: 0 = active; 1 = disabled (high impedance)
+ */
+ mux-gpios = <&main_gpio1 40 GPIO_ACTIVE_HIGH>, <&main_gpio1 41 GPIO_ACTIVE_HIGH>;
+ /* default disabled */
+ idle-state = <2>;
+ };
+};
+
+&main_gpio0 {
+ m2-reset-hog {
+ gpio-hog;
+ gpios = <12 GPIO_ACTIVE_LOW>;
+ output-low; /* deasserted */
+ line-name = "m2-reset";
+ };
+
+ m1-m2-w-disable1-hog {
+ gpio-hog;
+ gpios = <32 GPIO_ACTIVE_LOW>;
+ output-low; /* deasserted */
+ line-name = "m1-m2-pcie-w-disable1";
+ };
+
+ m1-m2-w-disable2-hog {
+ gpio-hog;
+ gpios = <34 GPIO_ACTIVE_LOW>;
+ output-low; /* deasserted */
+ line-name = "m1-m2-pcie-w-disable2";
+ };
+};
+
+&main_gpio1 {
+ m1-pcie-clkreq0-hog {
+ gpio-hog;
+ gpios = <11 GPIO_ACTIVE_LOW>;
+ input;
+ line-name = "m1-pcie-clkreq0";
+ };
+
+ m2-pcie-clkreq-hog {
+ gpio-hog;
+ gpios = <35 GPIO_ACTIVE_LOW>;
+ input;
+ line-name = "m2-pcie-clkreq";
+ };
+};
+
+&main_i2c0 {
+ pinctrl-0 = <&main_i2c0_default_pins>, <&main_i2c0_int_default_pins>;
+
+ humidity-sensor@41 {
+ compatible = "ti,hdc2010";
+ reg = <0x41>;
+ interrupt-parent = <&main_gpio0>;
+ interrupts = <37 IRQ_TYPE_EDGE_FALLING>;
+ };
+
+ light-sensor@44 {
+ compatible = "ti,opt3001";
+ reg = <0x44>;
+ interrupt-parent = <&main_gpio0>;
+ interrupts = <37 IRQ_TYPE_EDGE_FALLING>;
+ };
+
+ /* charger@6a */
+};
+
+&main_i2c1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_i2c1_default_pins>;
+ status = "okay";
+
+ rtc@69 {
+ compatible = "abracon,abx80x";
+ reg = <0x69>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&rtc_int_default_pins>;
+ abracon,tc-diode = "schottky";
+ abracon,tc-resistor = <3>;
+ interrupt-parent = <&main_gpio0>;
+ interrupts = <44 IRQ_TYPE_EDGE_FALLING>;
+ };
+};
+
+&main_mcan0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_mcan0_default_pins>;
+ status = "okay";
+
+ can-transceiver {
+ max-bitrate = <8000000>;
+ };
+};
+
+&main_mcan1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_mcan1_default_pins>;
+ status = "okay";
+
+ can-transceiver {
+ max-bitrate = <8000000>;
+ };
+};
+
+&main_pmx0 {
+ leds_default_pins: leds-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x0074, PIN_OUTPUT, 7) /* GPMC0_AD14.GPIO0_29 */
+ AM64X_IOPAD(0x0078, PIN_OUTPUT, 7) /* GPMC0_AD15.GPIO0_30 */
+ AM64X_IOPAD(0x0088, PIN_OUTPUT, 7) /* GPMC0_OEn_REn.GPIO0_33 */
+ >;
+ };
+
+ main_i2c0_int_default_pins: main-i2c0-int-default-pins {
+ pinctrl-single,pins = <
+ /* external pull-up on Carrier */
+ AM64X_IOPAD(0x0098, PIN_INPUT, 7) /* GPMC0_WAIT0.GPIO0_37 */
+ >;
+ };
+
+ main_i2c1_default_pins: main-i2c1-default-pins {
+ pinctrl-single,pins = <
+ /* external pull-up on SoM */
+ AM64X_IOPAD(0x0268, PIN_INPUT, 0) /* I2C1_SCL.I2C1_SCL */
+ AM64X_IOPAD(0x026c, PIN_INPUT, 0) /* I2C1_SDA.I2C1_SDA */
+ >;
+ };
+
+ main_mcan0_default_pins: main-mcan0-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x0254, PIN_INPUT, 0) /* MCAN0_RX.MCAN0_RX */
+ AM64X_IOPAD(0x0250, PIN_OUTPUT, 0) /* MCAN0_TX.MCAN0_TX */
+ >;
+ };
+
+ main_mcan1_default_pins: main-mcan1-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x025c, PIN_INPUT, 0) /* MCAN1_RX.MCAN1_RX */
+ AM64X_IOPAD(0x0258, PIN_OUTPUT, 0) /* MCAN1_TX.MCAN1_TX */
+ >;
+ };
+
+ main_uart3_default_pins: main-uart3-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x016c, PIN_INPUT, 10) /* PRG0_PRU0_GPO3.UART3_CTSn */
+ AM64X_IOPAD(0x0170, PIN_OUTPUT, 10) /* PRG0_PRU0_GPO4.UART3_TXD */
+ AM64X_IOPAD(0x0174, PIN_OUTPUT, 10) /* PRG0_PRU0_GPO5.UART3_RTSn */
+ AM64X_IOPAD(0x01ac, PIN_INPUT, 10) /* PRG0_PRU0_GPO19.UART3_RXD */
+ >;
+ };
+
+ pcie0_default_pins: pcie0-default-pins {
+ pinctrl-single,pins = <
+ /* connector M2 RESET */
+ AM64X_IOPAD(0x0030, PIN_OUTPUT, 7) /* OSPI0_CSn1.GPIO0_12 */
+ /* connectors M1 & M2 W_DISABLE1 */
+ AM64X_IOPAD(0x0084, PIN_OUTPUT, 7) /* GPMC0_ADVN_ALE.GPIO0_32 */
+ /* connectors M1 & M2 W_DISABLE2 */
+ AM64X_IOPAD(0x008c, PIN_OUTPUT, 7) /* GPMC0_WEN.GPIO0_34 */
+ /* connectors M1 & M2 PERST0 (PCI Reset) */
+ AM64X_IOPAD(0x019c, PIN_OUTPUT, 7) /* PRG0_PRU0_GPO15.GPIO1_15 */
+ /* connector M1 CLKREQ0 */
+ AM64X_IOPAD(0x018c, PIN_INPUT, 7) /* PRG0_PRU0_GPO11.GPIO1_11 */
+ /* connector M2 CLKREQ0 */
+ AM64X_IOPAD(0x01ec, PIN_INPUT, 7) /* PRG0_PRU1_GPO15.GPIO1_35 */
+ >;
+ };
+
+ regulator_pcie_3v3_default_pins: regulator-pcie-3v3-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x01a4, PIN_OUTPUT, 7) /* PRG0_PRU0_GPO17.GPIO1_17 */
+ >;
+ };
+
+ regulator_vpp_1v8_default_pins: regulator-vpp-1v8-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x029c, PIN_OUTPUT, 7) /* MMC1_SDWP.GPIO1_78 */
+ >;
+ };
+
+ rtc_int_default_pins: rtc-int-default-pins {
+ pinctrl-single,pins = <
+ /* external pull-up on Carrier */
+ AM64X_IOPAD(0x00b4, PIN_INPUT, 7) /* GPMC0_CSn3.GPIO0_44 */
+ >;
+ };
+
+ serdes_mux_default_pins: serdes-mux-default-pins {
+ pinctrl-single,pins = <
+ /* SEL, 10k pull-down on carrier, 2.2k pullup on SoM */
+ AM64X_IOPAD(0x0200, PIN_OUTPUT, 7) /* PRG0_MDIO0_MDIO.GPIO1_40 */
+ /* EN */
+ AM64X_IOPAD(0x0204, PIN_OUTPUT, 7) /* PRG0_MDIO0_MDC.GPIO1_41 */
+ >;
+ };
+};
+
+&main_uart3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_uart3_default_pins>;
+ uart-has-rtscts;
+ rs485-rts-active-low;
+ linux,rs485-enabled-at-boot-time;
+ status = "okay";
+};
+
+&usb0 {
+ dr_mode = "host";
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am642-phyboard-electra-rdk.dts b/arch/arm64/boot/dts/ti/k3-am642-phyboard-electra-rdk.dts
index 53b64e55413f..8237b8c815b8 100644
--- a/arch/arm64/boot/dts/ti/k3-am642-phyboard-electra-rdk.dts
+++ b/arch/arm64/boot/dts/ti/k3-am642-phyboard-electra-rdk.dts
@@ -1,9 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2021 PHYTEC America, LLC - https://www.phytec.com
+ * Copyright (C) 2021-2024 PHYTEC America, LLC - https://www.phytec.com
* Author: Matt McKee <mmckee@phytec.com>
*
- * Copyright (C) 2022 PHYTEC Messtechnik GmbH
+ * Copyright (C) 2022-2024 PHYTEC Messtechnik GmbH
* Author: Wadim Egorov <w.egorov@phytec.de>
*
* Product homepage:
@@ -159,6 +159,15 @@
>;
};
+ main_spi0_pins_default: main-spi0-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x020c, PIN_OUTPUT, 7) /* (C13) SPI0_CS1.GPIO1_43 */
+ AM64X_IOPAD(0x0210, PIN_INPUT, 0) /* (D13) SPI0_CLK */
+ AM64X_IOPAD(0x0214, PIN_OUTPUT, 0) /* (A13) SPI0_D0 */
+ AM64X_IOPAD(0x0218, PIN_INPUT, 0) /* (A14) SPI0_D1 */
+ >;
+ };
+
main_uart0_pins_default: main-uart0-default-pins {
pinctrl-single,pins = <
AM64X_IOPAD(0x0230, PIN_INPUT, 0) /* (D15) UART0_RXD */
@@ -248,6 +257,20 @@
phys = <&can_tc2>;
};
+&main_spi0 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_spi0_pins_default>;
+ cs-gpios = <0>, <&main_gpio1 43 GPIO_ACTIVE_LOW>;
+ ti,pindir-d0-out-d1-in;
+
+ tpm@1 {
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
+ reg = <1>;
+ spi-max-frequency = <10000000>;
+ };
+};
+
&main_uart0 {
status = "okay";
pinctrl-names = "default";
@@ -269,7 +292,6 @@
pinctrl-names = "default";
pinctrl-0 = <&main_mmc1_pins_default>;
bus-width = <4>;
- ti,driver-strength-ohm = <50>;
disable-wp;
no-1-8-v;
};
diff --git a/arch/arm64/boot/dts/ti/k3-am642-sk.dts b/arch/arm64/boot/dts/ti/k3-am642-sk.dts
index 1dddd6fc1a0d..67cd41bf806e 100644
--- a/arch/arm64/boot/dts/ti/k3-am642-sk.dts
+++ b/arch/arm64/boot/dts/ti/k3-am642-sk.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
@@ -469,9 +469,7 @@
status = "okay";
vmmc-supply = <&vdd_mmc1>;
pinctrl-names = "default";
- bus-width = <4>;
pinctrl-0 = <&main_mmc1_pins_default>;
- ti,driver-strength-ohm = <50>;
disable-wp;
};
@@ -646,25 +644,25 @@
};
&main_r5fss0_core0 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss0_core0>;
memory-region = <&main_r5fss0_core0_dma_memory_region>,
<&main_r5fss0_core0_memory_region>;
};
&main_r5fss0_core1 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss0_core1>;
memory-region = <&main_r5fss0_core1_dma_memory_region>,
<&main_r5fss0_core1_memory_region>;
};
&main_r5fss1_core0 {
- mboxes = <&mailbox0_cluster4>, <&mbox_main_r5fss1_core0>;
+ mboxes = <&mailbox0_cluster4 &mbox_main_r5fss1_core0>;
memory-region = <&main_r5fss1_core0_dma_memory_region>,
<&main_r5fss1_core0_memory_region>;
};
&main_r5fss1_core1 {
- mboxes = <&mailbox0_cluster4>, <&mbox_main_r5fss1_core1>;
+ mboxes = <&mailbox0_cluster4 &mbox_main_r5fss1_core1>;
memory-region = <&main_r5fss1_core1_dma_memory_region>,
<&main_r5fss1_core1_memory_region>;
};
diff --git a/arch/arm64/boot/dts/ti/k3-am642-sr-som.dtsi b/arch/arm64/boot/dts/ti/k3-am642-sr-som.dtsi
new file mode 100644
index 000000000000..c19d0b8bbf0f
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am642-sr-som.dtsi
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2023 Josua Mayer <josua@solid-run.com>
+ *
+ */
+
+#include <dt-bindings/net/ti-dp83869.h>
+
+/ {
+ model = "SolidRun AM642 SoM";
+ compatible = "solidrun,am642-sr-som", "ti,am642";
+
+ aliases {
+ ethernet0 = &cpsw_port1;
+ ethernet1 = &icssg1_emac0;
+ ethernet2 = &icssg1_emac1;
+ mmc0 = &sdhci0;
+ mmc1 = &sdhci1;
+ serial2 = &main_uart0;
+ };
+
+ chosen {
+ /* SoC default UART console */
+ stdout-path = "serial2:115200n8";
+ };
+
+ /* PRU Ethernet Controller */
+ ethernet {
+ compatible = "ti,am642-icssg-prueth";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pru_rgmii1_default_pins>, <&pru_rgmii2_default_pins>;
+
+ sram = <&oc_sram>;
+ ti,prus = <&pru1_0>, <&rtu1_0>, <&tx_pru1_0>, <&pru1_1>, <&rtu1_1>, <&tx_pru1_1>;
+ firmware-name = "ti-pruss/am65x-sr2-pru0-prueth-fw.elf",
+ "ti-pruss/am65x-sr2-rtu0-prueth-fw.elf",
+ "ti-pruss/am65x-sr2-txpru0-prueth-fw.elf",
+ "ti-pruss/am65x-sr2-pru1-prueth-fw.elf",
+ "ti-pruss/am65x-sr2-rtu1-prueth-fw.elf",
+ "ti-pruss/am65x-sr2-txpru1-prueth-fw.elf";
+
+ /* configure internal pinmux for mii mode */
+ ti,pruss-gp-mux-sel = <2>, <2>, <2>, <2>, <2>, <2>;
+
+ ti,mii-g-rt = <&icssg1_mii_g_rt>;
+ ti,mii-rt = <&icssg1_mii_rt>;
+ ti,iep = <&icssg1_iep0>, <&icssg1_iep1>;
+
+ /*
+ * Configure icssg interrupt controller to map pru-internal
+ * interrupts 8/9 via channels 0/1 to host interrupts 0/1.
+ *
+ * For details see interrupt controller documentation:
+ * Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml
+ */
+ interrupt-parent = <&icssg1_intc>;
+ interrupts = <24 0 2>, <25 1 3>;
+ interrupt-names = "tx_ts0", "tx_ts1";
+
+ dmas = <&main_pktdma 0xc200 15>, /* egress slice 0 */
+ <&main_pktdma 0xc201 15>, /* egress slice 0 */
+ <&main_pktdma 0xc202 15>, /* egress slice 0 */
+ <&main_pktdma 0xc203 15>, /* egress slice 0 */
+ <&main_pktdma 0xc204 15>, /* egress slice 1 */
+ <&main_pktdma 0xc205 15>, /* egress slice 1 */
+ <&main_pktdma 0xc206 15>, /* egress slice 1 */
+ <&main_pktdma 0xc207 15>, /* egress slice 1 */
+ <&main_pktdma 0x4200 15>, /* ingress slice 0 */
+ <&main_pktdma 0x4201 15>; /* ingress slice 1 */
+ dma-names = "tx0-0", "tx0-1", "tx0-2", "tx0-3",
+ "tx1-0", "tx1-1", "tx1-2", "tx1-3",
+ "rx0", "rx1";
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ icssg1_emac0: port@0 {
+ reg = <0>;
+ ti,syscon-rgmii-delay = <&main_conf 0x4110>;
+ /* Filled in by bootloader */
+ local-mac-address = [00 00 00 00 00 00];
+ phy-handle = <&ethernet_phy2>;
+ phy-mode = "rgmii-id";
+ };
+
+ icssg1_emac1: port@1 {
+ reg = <1>;
+ ti,syscon-rgmii-delay = <&main_conf 0x4114>;
+ /* Filled in by bootloader */
+ local-mac-address = [00 00 00 00 00 00];
+ phy-handle = <&ethernet_phy1>;
+ phy-mode = "rgmii-id";
+ };
+ };
+ };
+
+ /* DDR16SS0:
+ * - Bank 1 @ 0x080000000-0x0FFFFFFFF: max. 2GB in 32-bit address space
+ * - Bank 2 @ 0x880000000-0x9FFFFFFFF: max. 6GB in 64-bit address space
+ */
+ memory@80000000 {
+ reg = <0x00000000 0x80000000 0x00000000 0x80000000>,
+ <0x00000008 0x80000000 0x00000001 0x80000000>;
+ device_type = "memory";
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ secure_ddr: optee@9e800000 {
+ reg = <0x00 0x9e800000 0x00 0x01800000>; /* for OP-TEE */
+ no-map;
+ };
+
+ main_r5fss0_core0_dma_memory_region: r5f-dma-memory@a0000000 {
+ compatible = "shared-dma-pool";
+ reg = <0x00 0xa0000000 0x00 0x100000>;
+ no-map;
+ };
+
+ main_r5fss0_core0_memory_region: r5f-memory@a0100000 {
+ compatible = "shared-dma-pool";
+ reg = <0x00 0xa0100000 0x00 0xf00000>;
+ no-map;
+ };
+
+ main_r5fss0_core1_dma_memory_region: r5f-dma-memory@a1000000 {
+ compatible = "shared-dma-pool";
+ reg = <0x00 0xa1000000 0x00 0x100000>;
+ no-map;
+ };
+
+ main_r5fss0_core1_memory_region: r5f-memory@a1100000 {
+ compatible = "shared-dma-pool";
+ reg = <0x00 0xa1100000 0x00 0xf00000>;
+ no-map;
+ };
+
+ main_r5fss1_core0_dma_memory_region: r5f-dma-memory@a2000000 {
+ compatible = "shared-dma-pool";
+ reg = <0x00 0xa2000000 0x00 0x100000>;
+ no-map;
+ };
+
+ main_r5fss1_core0_memory_region: r5f-memory@a2100000 {
+ compatible = "shared-dma-pool";
+ reg = <0x00 0xa2100000 0x00 0xf00000>;
+ no-map;
+ };
+
+ main_r5fss1_core1_dma_memory_region: r5f-dma-memory@a3000000 {
+ compatible = "shared-dma-pool";
+ reg = <0x00 0xa3000000 0x00 0x100000>;
+ no-map;
+ };
+
+ main_r5fss1_core1_memory_region: r5f-memory@a3100000 {
+ compatible = "shared-dma-pool";
+ reg = <0x00 0xa3100000 0x00 0xf00000>;
+ no-map;
+ };
+ };
+
+ vdd_mmc0: regulator-vdd-mmc0 {
+ compatible = "regulator-fixed";
+ regulator-name = "vdd-mmc0";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+};
+
+&cpsw3g {
+ pinctrl-names = "default";
+ pinctrl-0 = <&rgmii1_default_pins>;
+};
+
+&cpsw3g_mdio {
+ pinctrl-names = "default";
+ pinctrl-0 = <&mdio0_default_pins>;
+ status = "okay";
+
+ ethernet_phy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-id2000.a0f1";
+ reg = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&ethernet_phy0_default_pins>;
+ ti,clk-output-sel = <DP83869_CLK_O_SEL_REF_CLK>;
+ ti,op-mode = <DP83869_RGMII_COPPER_ETHERNET>;
+ /*
+ * Disable interrupts because ISR never clears 0x0040
+ *
+ * interrupt-parent = <&main_gpio1>;
+ * interrupts = <70 IRQ_TYPE_LEVEL_LOW>;
+ */
+ /*
+ * Disable HW Reset because clock signal is daisy-chained
+ *
+ * reset-gpios = <&main_gpio0 84 GPIO_ACTIVE_LOW>;
+ * reset-assert-us = <1>;
+ * reset-deassert-us = <30>;
+ */
+ };
+};
+
+&cpsw_port1 {
+ phy-mode = "rgmii-id";
+ phy-handle = <&ethernet_phy0>;
+};
+
+&cpsw_port2 {
+ status = "disabled";
+};
+
+&icssg1_mdio {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pru1_mdio0_default_pins>;
+ status = "okay";
+
+ ethernet_phy1: ethernet-phy@3 {
+ compatible = "ethernet-phy-id2000.a0f1";
+ reg = <3>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&ethernet_phy1_default_pins>;
+ ti,clk-output-sel = <DP83869_CLK_O_SEL_REF_CLK>;
+ ti,op-mode = <DP83869_RGMII_COPPER_ETHERNET>;
+ /*
+ * Disable interrupts because ISR never clears 0x0040
+ *
+ * interrupt-parent = <&main_gpio1>;
+ * interrupts = <70 IRQ_TYPE_LEVEL_LOW>;
+ */
+ /*
+ * Disable HW Reset because clock signal is daisy-chained
+ *
+ * reset-gpios = <&main_gpio0 20 GPIO_ACTIVE_LOW>;
+ * reset-assert-us = <1>;
+ * reset-deassert-us = <30>;
+ */
+ };
+
+ ethernet_phy2: ethernet-phy@f {
+ compatible = "ethernet-phy-id2000.a0f1";
+ reg = <0xf>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&ethernet_phy2_default_pins>;
+ ti,op-mode = <DP83869_RGMII_COPPER_ETHERNET>;
+ /*
+ * Disable interrupts because ISR never clears 0x0040
+ *
+ * interrupt-parent = <&main_gpio1>;
+ * interrupts = <70 IRQ_TYPE_LEVEL_LOW>;
+ */
+ /*
+ * Disable HW Reset because clock signal is daisy-chained
+ *
+ * reset-gpios = <&main_gpio0 52 GPIO_ACTIVE_LOW>;
+ * reset-assert-us = <1>;
+ * reset-deassert-us = <30>;
+ */
+ };
+};
+
+&mailbox0_cluster2 {
+ status = "okay";
+
+ mbox_main_r5fss0_core0: mbox-main-r5fss0-core0 {
+ ti,mbox-rx = <0 0 2>;
+ ti,mbox-tx = <1 0 2>;
+ };
+
+ mbox_main_r5fss0_core1: mbox-main-r5fss0-core1 {
+ ti,mbox-rx = <2 0 2>;
+ ti,mbox-tx = <3 0 2>;
+ };
+};
+
+&mailbox0_cluster4 {
+ status = "okay";
+
+ mbox_main_r5fss1_core0: mbox-main-r5fss1-core0 {
+ ti,mbox-rx = <0 0 2>;
+ ti,mbox-tx = <1 0 2>;
+ };
+
+ mbox_main_r5fss1_core1: mbox-main-r5fss1-core1 {
+ ti,mbox-rx = <2 0 2>;
+ ti,mbox-tx = <3 0 2>;
+ };
+};
+
+&main_i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_i2c0_default_pins>;
+ status = "okay";
+
+ som_eeprom: eeprom@50 {
+ compatible = "atmel,24c01";
+ reg = <0x50>;
+ pagesize = <8>;
+ };
+};
+
+&main_pmx0 {
+ /* hog global functions */
+ pinctrl-names = "default";
+ pinctrl-0 = <&ethernet_phy_default_pins>;
+
+ ethernet_phy_default_pins: ethernet-phy-default-pins {
+ pinctrl-single,pins = <
+ /* interrupt / power-down, external pull-up on SoM */
+ AM64X_IOPAD(0x0278, PIN_INPUT, 7) /* EXTINTn.GPIO1_70 */
+ >;
+ };
+
+ ethernet_phy0_default_pins: ethernet-phy0-default-pins {
+ pinctrl-single,pins = <
+ /* reset */
+ AM64X_IOPAD(0x0154, PIN_OUTPUT, 7) /* PRG1_PRU1_GPO19.GPIO0_84 */
+ /* reference clock */
+ AM64X_IOPAD(0x0274, PIN_OUTPUT, 5) /* EXT_REFCLK1.CLKOUT0 */
+ >;
+ };
+
+ ethernet_phy1_default_pins: ethernet-phy1-default-pins {
+ pinctrl-single,pins = <
+ /* reset */
+ AM64X_IOPAD(0x0150, PIN_OUTPUT, 7) /* PRG1_PRU1_GPO18.GPIO0_20 */
+ /* led0, external pull-down on SoM */
+ AM64X_IOPAD(0x0128, PIN_INPUT, 7) /* PRG1_PRU1_GPO8.GPIO0_73 */
+ /* led1/rxer */
+ AM64X_IOPAD(0x011c, PIN_INPUT, 7) /* PRG1_PRU1_GPO5.GPIO0_70 */
+ >;
+ };
+
+ ethernet_phy2_default_pins: ethernet-phy2-default-pins {
+ pinctrl-single,pins = <
+ /* reset */
+ AM64X_IOPAD(0x00d4, PIN_OUTPUT, 7) /* PRG1_PRU0_GPO7.GPIO0_52 */
+ /* led0, external pull-down on SoM */
+ AM64X_IOPAD(0x00d8, PIN_INPUT, 7) /* PRG1_PRU0_GPO8.GPIO0_53 */
+ /* led1/rxer */
+ AM64X_IOPAD(0x00cc, PIN_INPUT, 7) /* PRG1_PRU0_GPO5.GPIO0_50 */
+ >;
+ };
+
+ main_i2c0_default_pins: main-i2c0-default-pins {
+ pinctrl-single,pins = <
+ /* external pull-up on SoM */
+ AM64X_IOPAD(0x0260, PIN_INPUT, 0) /* I2C0_SCL.I2C0_SCL */
+ AM64X_IOPAD(0x0264, PIN_INPUT, 0) /* I2C0_SDA.I2C0_SDA */
+ >;
+ };
+
+ /*
+ * main_mmc0_default_pins: main-mmc0-default-pins
+ *
+ * MMC0_CMD: no padconfig
+ * MMC0_CLK: no padconfig, external pull-up on SoM
+ * MMC0_DAT0: no padconfig
+ * MMC0_DAT1: no padconfig
+ * MMC0_DAT2: no padconfig
+ * MMC0_DAT3: no padconfig
+ * MMC0_DAT4: no padconfig
+ * MMC0_DAT5: no padconfig
+ * MMC0_DAT6: no padconfig
+ * MMC0_DAT7: no padconfig
+ * MMC0_DS: no padconfig, external pull-down on SoM
+ */
+
+ main_mmc1_default_pins: main-mmc1-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x0294, PIN_INPUT_PULLUP, 0) /* (J19) MMC1_CMD */
+ AM64X_IOPAD(0x028c, PIN_INPUT, 0) /* MMC1_CLK.MMC1_CLK */
+ AM64X_IOPAD(0x0288, PIN_INPUT_PULLUP, 0) /* MMC1_DAT0.MMC1_DAT0 */
+ AM64X_IOPAD(0x0284, PIN_INPUT_PULLUP, 0) /* MMC1_DAT1.MMC1_DAT1 */
+ AM64X_IOPAD(0x0280, PIN_INPUT_PULLUP, 0) /* MMC1_DAT2.MMC1_DAT2 */
+ AM64X_IOPAD(0x027c, PIN_INPUT_PULLUP, 0) /* MMC1_DAT3.MMC1_DAT3 */
+ /* external pull-down on SoM & Carrier */
+ AM64X_IOPAD(0x0298, PIN_INPUT_PULLUP, 0) /* MMC1_SDCD.MMC1_SDCD */
+ AM64X_IOPAD(0x0290, PIN_INPUT, 0) /* MMC1_CLKLB: clock loopback */
+ >;
+ };
+
+ main_uart0_default_pins: main-uart0-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x0230, PIN_INPUT, 0) /* UART0_RXD.UART0_RXD */
+ AM64X_IOPAD(0x0234, PIN_OUTPUT, 0) /* UART0_TXD.UART0_TXD */
+ >;
+ };
+
+ mdio0_default_pins: mdio0-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x01fc, PIN_OUTPUT, 4) /* PRG0_PRU1_GPO19.MDIO0_MDC */
+ AM64X_IOPAD(0x01f8, PIN_INPUT, 4) /* PRG0_PRU1_GPO18.MDIO0_MDIO */
+ >;
+ };
+
+ ospi0_default_pins: ospi0-default-pins {
+ pinctrl-single,pins = <
+ /* external pull-down on SoM */
+ AM64X_IOPAD(0x0000, PIN_OUTPUT, 0) /* OSPI0_CLK.OSPI0_CLK */
+ AM64X_IOPAD(0x0008, PIN_OUTPUT, 0) /* OSPI0_DQS.OSPI0_DQS */
+ /* external pull-up on SoM */
+ AM64X_IOPAD(0x002c, PIN_OUTPUT, 0) /* OSPI0_CSn0.OSPI0_CSn0 */
+ AM64X_IOPAD(0x000c, PIN_INPUT, 0) /* OSPI0_D0.OSPI0_D0 */
+ AM64X_IOPAD(0x0010, PIN_INPUT, 0) /* OSPI0_D1.OSPI0_D1 */
+ AM64X_IOPAD(0x0014, PIN_INPUT, 0) /* OSPI0_D2.OSPI0_D2 */
+ AM64X_IOPAD(0x0018, PIN_INPUT, 0) /* OSPI0_D3.OSPI0_D3 */
+ AM64X_IOPAD(0x001c, PIN_INPUT, 0) /* OSPI0_D4.OSPI0_D4 */
+ AM64X_IOPAD(0x0020, PIN_INPUT, 0) /* OSPI0_D5.OSPI0_D5 */
+ AM64X_IOPAD(0x0024, PIN_INPUT, 0) /* OSPI0_D6.OSPI0_D6 */
+ AM64X_IOPAD(0x0028, PIN_INPUT, 0) /* OSPI0_D7.OSPI0_D7 */
+ >;
+ };
+
+ ospi0_flash0_default_pins: ospi0-flash0-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x0034, PIN_OUTPUT, 7) /* OSPI0_CSn2.GPIO0_13 */
+ AM64X_IOPAD(0x0038, PIN_INPUT, 7) /* OSPI0_CSn3.GPIO0_14 */
+ >;
+ };
+
+ pru1_mdio0_default_pins: pru1-mdio0-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x015c, PIN_OUTPUT, 0) /* PRG1_MDIO0_MDC.PRG1_MDIO0_MDC */
+ AM64X_IOPAD(0x0158, PIN_INPUT, 0) /* PRG1_MDIO0_MDIO.PRG1_MDIO0_MDIO */
+ >;
+ };
+
+ pru_rgmii1_default_pins: pru-rgmii1-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x00b8, PIN_INPUT, 2) /* (Y7) PRG1_PRU0_GPO0.PRG1_RGMII1_RD0 */
+ AM64X_IOPAD(0x00bc, PIN_INPUT, 2) /* (U8) PRG1_PRU0_GPO1.PRG1_RGMII1_RD1 */
+ AM64X_IOPAD(0x00c0, PIN_INPUT, 2) /* (W8) PRG1_PRU0_GPO2.PRG1_RGMII1_RD2 */
+ AM64X_IOPAD(0x00c4, PIN_INPUT, 2) /* (V8) PRG1_PRU0_GPO3.PRG1_RGMII1_RD3 */
+ AM64X_IOPAD(0x00d0, PIN_INPUT, 2) /* (AA7) PRG1_PRU0_GPO6.PRG1_RGMII1_RXC */
+ AM64X_IOPAD(0x00c8, PIN_INPUT, 2) /* (Y8) PRG1_PRU0_GPO4.PRG1_RGMII1_RX_CTL */
+ AM64X_IOPAD(0x00e4, PIN_OUTPUT, 2) /* (AA8) PRG1_PRU0_GPO11.PRG1_RGMII1_TD0 */
+ AM64X_IOPAD(0x00e8, PIN_OUTPUT, 2) /* (U9) PRG1_PRU0_GPO12.PRG1_RGMII1_TD1 */
+ AM64X_IOPAD(0x00ec, PIN_OUTPUT, 2) /* (W9) PRG1_PRU0_GPO13.PRG1_RGMII1_TD2 */
+ AM64X_IOPAD(0x00f0, PIN_OUTPUT, 2) /* (AA9) PRG1_PRU0_GPO14.PRG1_RGMII1_TD3 */
+ AM64X_IOPAD(0x00f8, PIN_INPUT, 2) /* (V9) PRG1_PRU0_GPO16.PRG1_RGMII1_TXC */
+ AM64X_IOPAD(0x00f4, PIN_OUTPUT, 2) /* (Y9) PRG1_PRU0_GPO15.PRG1_RGMII1_TX_CTL */
+ >;
+ };
+
+ pru_rgmii2_default_pins: pru-rgmii2-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x0108, PIN_INPUT, 2) /* PRG1_PRU1_GPO0.RGMII2_RD0 */
+ AM64X_IOPAD(0x010c, PIN_INPUT, 2) /* PRG1_PRU1_GPO1.RGMII2_RD1 */
+ AM64X_IOPAD(0x0110, PIN_INPUT, 2) /* PRG1_PRU1_GPO2.RGMII2_RD2 */
+ AM64X_IOPAD(0x0114, PIN_INPUT, 2) /* PRG1_PRU1_GPO3.RGMII2_RD3 */
+ AM64X_IOPAD(0x0120, PIN_INPUT, 2) /* PRG1_PRU1_GPO6.RGMII2_RXC */
+ AM64X_IOPAD(0x0118, PIN_INPUT, 2) /* PRG1_PRU1_GPO4.RGMII2_RX_CTL */
+ AM64X_IOPAD(0x0134, PIN_OUTPUT, 2) /* PRG1_PRU1_GPO11.RGMII2_TD0 */
+ AM64X_IOPAD(0x0138, PIN_OUTPUT, 2) /* PRG1_PRU1_GPO12.RGMII2_TD1 */
+ AM64X_IOPAD(0x013c, PIN_OUTPUT, 2) /* PRG1_PRU1_GPO13.RGMII2_TD2 */
+ AM64X_IOPAD(0x0140, PIN_OUTPUT, 2) /* PRG1_PRU1_GPO14.RGMII2_TD3 */
+ AM64X_IOPAD(0x0148, PIN_INPUT, 2) /* PRG1_PRU1_GPO16.RGMII2_TXC */
+ AM64X_IOPAD(0x0144, PIN_OUTPUT, 2) /* PRG1_PRU1_GPO15.RGMII2_TX_CTL */
+ >;
+ };
+
+ rgmii1_default_pins: rgmii1-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x01cc, PIN_INPUT, 4) /* PRG0_PRU1_GPO7.RGMII1_RD0 */
+ AM64X_IOPAD(0x01d4, PIN_INPUT, 4) /* PRG0_PRU1_GPO9.RGMII1_RD1 */
+ AM64X_IOPAD(0x01d8, PIN_INPUT, 4) /* PRG0_PRU1_GPO10.RGMII1_RD2 */
+ AM64X_IOPAD(0x01f4, PIN_INPUT, 4) /* PRG0_PRU1_GPO17.RGMII1_RD3 */
+ AM64X_IOPAD(0x0188, PIN_INPUT, 4) /* PRG0_PRU0_GPO10.RGMII1_RXC */
+ AM64X_IOPAD(0x0184, PIN_INPUT, 4) /* PRG0_PRU0_GPO9.RGMII1_RX_CTL */
+ AM64X_IOPAD(0x0124, PIN_OUTPUT, 4) /* PRG1_PRU1_GPO7.RGMII1_TD0 */
+ AM64X_IOPAD(0x012c, PIN_OUTPUT, 4) /* PRG1_PRU1_GPO9.RGMII1_TD1 */
+ AM64X_IOPAD(0x0130, PIN_OUTPUT, 4) /* PRG1_PRU1_GPO10.RGMII1_TD2 */
+ AM64X_IOPAD(0x014c, PIN_OUTPUT, 4) /* PRG1_PRU1_GPO17.RGMII1_TD3 */
+ AM64X_IOPAD(0x00e0, PIN_INPUT, 4) /* PRG1_PRU0_GPO10.RGMII1_TXC */
+ AM64X_IOPAD(0x00dc, PIN_OUTPUT, 4) /* PRG1_PRU0_GPO9.RGMII1_TX_CTL */
+ >;
+ };
+
+ usb0_default_pins: usb0-default-pins {
+ pinctrl-single,pins = <
+ AM64X_IOPAD(0x02a8, PIN_OUTPUT, 0) /* USB0_DRVVBUS.USB0_DRVVBUS */
+ >;
+ };
+};
+
+&main_r5fss0_core0 {
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss0_core0>;
+ memory-region = <&main_r5fss0_core0_dma_memory_region>,
+ <&main_r5fss0_core0_memory_region>;
+};
+
+&main_r5fss0_core1 {
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss0_core1>;
+ memory-region = <&main_r5fss0_core1_dma_memory_region>,
+ <&main_r5fss0_core1_memory_region>;
+};
+
+&main_r5fss1_core0 {
+ mboxes = <&mailbox0_cluster4 &mbox_main_r5fss1_core0>;
+ memory-region = <&main_r5fss1_core0_dma_memory_region>,
+ <&main_r5fss1_core0_memory_region>;
+};
+
+&main_r5fss1_core1 {
+ mboxes = <&mailbox0_cluster4 &mbox_main_r5fss1_core1>;
+ memory-region = <&main_r5fss1_core1_dma_memory_region>,
+ <&main_r5fss1_core1_memory_region>;
+};
+
+/* SoC default UART console */
+&main_uart0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_uart0_default_pins>;
+ status = "okay";
+};
+
+&ospi0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&ospi0_default_pins>;
+ num-cs = <1>;
+ status = "okay";
+
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&ospi0_flash0_default_pins>;
+ spi-tx-bus-width = <8>;
+ spi-rx-bus-width = <8>;
+ spi-max-frequency = <200000000>;
+ cdns,tshsl-ns = <50>;
+ cdns,tsd2d-ns = <50>;
+ cdns,tchsh-ns = <4>;
+ cdns,tslch-ns = <4>;
+ cdns,read-delay = <0>;
+ interrupt-parent = <&main_gpio0>;
+ interrupts = <14 IRQ_TYPE_LEVEL_LOW>;
+ reset-gpios = <&main_gpio0 13 GPIO_ACTIVE_LOW>;
+ };
+};
+
+&sdhci0 {
+ /* mmc0 pins have no padconfig */
+ bus-width = <8>;
+ ti,driver-strength-ohm = <50>;
+ disable-wp;
+ non-removable;
+ cap-mmc-hw-reset;
+ no-sd;
+ /*
+ * MMC controller supports switching between 1.8V and 3.3V signalling.
+ * However MMC0 (unlike MMC1) does not integrate an LDO.
+ * Explicitly link a regulator node for indicating to the driver which
+ * voltages are actually usable.
+ */
+ vqmmc-supply = <&vdd_mmc0>;
+ status = "okay";
+};
+
+/*
+ * microSD is on carrier - however since SoC can boot from it,
+ * configure it just in case.
+ */
+&sdhci1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_mmc1_default_pins>;
+ bus-width = <4>;
+ ti,driver-strength-ohm = <50>;
+ disable-wp;
+ status = "okay";
+};
+
+/*
+ * USB settings are a carrier choice - however since SoC can boot from it,
+ * configure as USB-2.0 OTG here, keeping USB-3 serdes disabled.
+ */
+&usb0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&usb0_default_pins>;
+ dr_mode = "otg";
+ maximum-speed = "high-speed";
+};
+
+&usbss0 {
+ ti,vbus-divider;
+ ti,usb2-only;
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am642-tqma64xxl-mbax4xxl.dts b/arch/arm64/boot/dts/ti/k3-am642-tqma64xxl-mbax4xxl.dts
index 55102d35cecc..1f4dc5ad1696 100644
--- a/arch/arm64/boot/dts/ti/k3-am642-tqma64xxl-mbax4xxl.dts
+++ b/arch/arm64/boot/dts/ti/k3-am642-tqma64xxl-mbax4xxl.dts
@@ -422,7 +422,6 @@
cd-gpios = <&main_gpio1 77 GPIO_ACTIVE_LOW>;
disable-wp;
no-mmc;
- ti,driver-strength-ohm = <50>;
ti,fails-without-test-cd;
/* Enabled by overlay */
};
diff --git a/arch/arm64/boot/dts/ti/k3-am642.dtsi b/arch/arm64/boot/dts/ti/k3-am642.dtsi
index 7a6eedea3aae..8589ee55ef09 100644
--- a/arch/arm64/boot/dts/ti/k3-am642.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am642.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM642 SoC family in Dual core configuration
*
- * Copyright (C) 2020-2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am65-iot2050-arduino-connector.dtsi b/arch/arm64/boot/dts/ti/k3-am65-iot2050-arduino-connector.dtsi
new file mode 100644
index 000000000000..7ff0abd7c62e
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am65-iot2050-arduino-connector.dtsi
@@ -0,0 +1,768 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Siemens AG, 2018-2023
+ *
+ * Authors:
+ * Le Jin <le.jin@siemens.com>
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * Common bits for IOT2050 variants with Arduino connector
+ */
+
+&wkup_pmx0 {
+ pinctrl-names =
+ "default",
+ "d0-uart0-rxd", "d0-gpio", "d0-gpio-pullup", "d0-gpio-pulldown",
+ "d1-uart0-txd", "d1-gpio", "d1-gpio-pullup", "d1-gpio-pulldown",
+ "d2-uart0-ctsn", "d2-gpio", "d2-gpio-pullup", "d2-gpio-pulldown",
+ "d3-uart0-rtsn", "d3-gpio", "d3-gpio-pullup", "d3-gpio-pulldown",
+ "d10-spi0-cs0", "d10-gpio", "d10-gpio-pullup", "d10-gpio-pulldown",
+ "d11-spi0-d0", "d11-gpio", "d11-gpio-pullup", "d11-gpio-pulldown",
+ "d12-spi0-d1", "d12-gpio", "d12-gpio-pullup", "d12-gpio-pulldown",
+ "d13-spi0-clk", "d13-gpio", "d13-gpio-pullup", "d13-gpio-pulldown",
+ "a0-gpio", "a0-gpio-pullup", "a0-gpio-pulldown",
+ "a1-gpio", "a1-gpio-pullup", "a1-gpio-pulldown",
+ "a2-gpio", "a2-gpio-pullup", "a2-gpio-pulldown",
+ "a3-gpio", "a3-gpio-pullup", "a3-gpio-pulldown",
+ "a4-gpio", "a4-gpio-pullup", "a4-gpio-pulldown",
+ "a5-gpio", "a5-gpio-pullup", "a5-gpio-pulldown";
+
+ pinctrl-0 = <&d0_uart0_rxd>;
+ pinctrl-1 = <&d0_uart0_rxd>;
+ pinctrl-2 = <&d0_gpio>;
+ pinctrl-3 = <&d0_gpio_pullup>;
+ pinctrl-4 = <&d0_gpio_pulldown>;
+ pinctrl-5 = <&d1_uart0_txd>;
+ pinctrl-6 = <&d1_gpio>;
+ pinctrl-7 = <&d1_gpio_pullup>;
+ pinctrl-8 = <&d1_gpio_pulldown>;
+ pinctrl-9 = <&d2_uart0_ctsn>;
+ pinctrl-10 = <&d2_gpio>;
+ pinctrl-11 = <&d2_gpio_pullup>;
+ pinctrl-12 = <&d2_gpio_pulldown>;
+ pinctrl-13 = <&d3_uart0_rtsn>;
+ pinctrl-14 = <&d3_gpio>;
+ pinctrl-15 = <&d3_gpio_pullup>;
+ pinctrl-16 = <&d3_gpio_pulldown>;
+ pinctrl-17 = <&d10_spi0_cs0>;
+ pinctrl-18 = <&d10_gpio>;
+ pinctrl-19 = <&d10_gpio_pullup>;
+ pinctrl-20 = <&d10_gpio_pulldown>;
+ pinctrl-21 = <&d11_spi0_d0>;
+ pinctrl-22 = <&d11_gpio>;
+ pinctrl-23 = <&d11_gpio_pullup>;
+ pinctrl-24 = <&d11_gpio_pulldown>;
+ pinctrl-25 = <&d12_spi0_d1>;
+ pinctrl-26 = <&d12_gpio>;
+ pinctrl-27 = <&d12_gpio_pullup>;
+ pinctrl-28 = <&d12_gpio_pulldown>;
+ pinctrl-29 = <&d13_spi0_clk>;
+ pinctrl-30 = <&d13_gpio>;
+ pinctrl-31 = <&d13_gpio_pullup>;
+ pinctrl-32 = <&d13_gpio_pulldown>;
+ pinctrl-33 = <&a0_gpio>;
+ pinctrl-34 = <&a0_gpio_pullup>;
+ pinctrl-35 = <&a0_gpio_pulldown>;
+ pinctrl-36 = <&a1_gpio>;
+ pinctrl-37 = <&a1_gpio_pullup>;
+ pinctrl-38 = <&a1_gpio_pulldown>;
+ pinctrl-39 = <&a2_gpio>;
+ pinctrl-40 = <&a2_gpio_pullup>;
+ pinctrl-41 = <&a2_gpio_pulldown>;
+ pinctrl-42 = <&a3_gpio>;
+ pinctrl-43 = <&a3_gpio_pullup>;
+ pinctrl-44 = <&a3_gpio_pulldown>;
+ pinctrl-45 = <&a4_gpio>;
+ pinctrl-46 = <&a4_gpio_pullup>;
+ pinctrl-47 = <&a4_gpio_pulldown>;
+ pinctrl-48 = <&a5_gpio>;
+ pinctrl-49 = <&a5_gpio_pullup>;
+ pinctrl-50 = <&a5_gpio_pulldown>;
+
+ d0_uart0_rxd: d0-uart0-rxd-pins {
+ pinctrl-single,pins = <
+ /* (P4) MCU_UART0_RXD */
+ AM65X_WKUP_IOPAD(0x0044, PIN_INPUT, 4)
+ >;
+ };
+
+ d0_gpio: d0-gpio-pins {
+ pinctrl-single,pins = <
+ /* (P4) WKUP_GPIO0_29 */
+ AM65X_WKUP_IOPAD(0x0044, PIN_INPUT, 7)
+ >;
+ };
+
+ d0_gpio_pullup: d0-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (P4) WKUP_GPIO0_29 */
+ AM65X_WKUP_IOPAD(0x0044, PIN_INPUT_PULLUP, 7)
+ >;
+ };
+
+ d0_gpio_pulldown: d0-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (P4) WKUP_GPIO0_29 */
+ AM65X_WKUP_IOPAD(0x0044, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d1_uart0_txd: d1-uart0-txd-pins {
+ pinctrl-single,pins = <
+ /* (P5) MCU_UART0_TXD */
+ AM65X_WKUP_IOPAD(0x0048, PIN_OUTPUT, 4)
+ >;
+ };
+
+ d1_gpio: d1-gpio-pins {
+ pinctrl-single,pins = <
+ /* (P5) WKUP_GPIO0_30 */
+ AM65X_WKUP_IOPAD(0x0048, PIN_INPUT, 7)
+ >;
+ };
+
+ d1_gpio_pullup: d1-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (P5) WKUP_GPIO0_30 */
+ AM65X_WKUP_IOPAD(0x0048, PIN_INPUT, 7)
+ >;
+ };
+
+ d1_gpio_pulldown: d1-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (P5) WKUP_GPIO0_30 */
+ AM65X_WKUP_IOPAD(0x0048, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d2_uart0_ctsn: d2-uart0-ctsn-pins {
+ pinctrl-single,pins = <
+ /* (P1) MCU_UART0_CTSn */
+ AM65X_WKUP_IOPAD(0x004C, PIN_INPUT, 4)
+ >;
+ };
+
+ d2_gpio: d2-gpio-pins {
+ pinctrl-single,pins = <
+ /* (P5) WKUP_GPIO0_31 */
+ AM65X_WKUP_IOPAD(0x004C, PIN_INPUT, 7)
+ >;
+ };
+
+ d2_gpio_pullup: d2-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (P5) WKUP_GPIO0_31 */
+ AM65X_WKUP_IOPAD(0x004C, PIN_INPUT, 7)
+ >;
+ };
+
+ d2_gpio_pulldown: d2-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (P5) WKUP_GPIO0_31 */
+ AM65X_WKUP_IOPAD(0x004C, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d3_uart0_rtsn: d3-uart0-rtsn-pins {
+ pinctrl-single,pins = <
+ /* (N3) MCU_UART0_RTSn */
+ AM65X_WKUP_IOPAD(0x0054, PIN_OUTPUT, 4)
+ >;
+ };
+
+ d3_gpio: d3-gpio-pins {
+ pinctrl-single,pins = <
+ /* (N3) WKUP_GPIO0_33 */
+ AM65X_WKUP_IOPAD(0x0054, PIN_INPUT, 7)
+ >;
+ };
+
+ d3_gpio_pullup: d3-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (N3) WKUP_GPIO0_33 */
+ AM65X_WKUP_IOPAD(0x0054, PIN_INPUT, 7)
+ >;
+ };
+
+ d3_gpio_pulldown: d3-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (N3) WKUP_GPIO0_33 */
+ AM65X_WKUP_IOPAD(0x0054, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d10_spi0_cs0: d10-spi0-cs0-pins {
+ pinctrl-single,pins = <
+ /* (Y4) MCU_SPI0_CS0 */
+ AM65X_WKUP_IOPAD(0x009c, PIN_OUTPUT, 0)
+ >;
+ };
+
+ d10_gpio: d10-gpio-pins {
+ pinctrl-single,pins = <
+ /* (Y4) WKUP_GPIO0_51 */
+ AM65X_WKUP_IOPAD(0x009c, PIN_INPUT, 7)
+ >;
+ };
+
+ d10_gpio_pullup: d10-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (Y4) WKUP_GPIO0_51 */
+ AM65X_WKUP_IOPAD(0x009c, PIN_INPUT, 7)
+ >;
+ };
+
+ d10_gpio_pulldown: d10-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (Y4) WKUP_GPIO0_51 */
+ AM65X_WKUP_IOPAD(0x009c, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d11_spi0_d0: d11-spi0-d0-pins {
+ pinctrl-single,pins = <
+ /* (Y3) MCU_SPI0_D0 */
+ AM65X_WKUP_IOPAD(0x0094, PIN_INPUT, 0)
+ >;
+ };
+
+ d11_gpio: d11-gpio-pins {
+ pinctrl-single,pins = <
+ /* (Y3) WKUP_GPIO0_49 */
+ AM65X_WKUP_IOPAD(0x0094, PIN_INPUT, 7)
+ >;
+ };
+
+ d11_gpio_pullup: d11-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (Y3) WKUP_GPIO0_49 */
+ AM65X_WKUP_IOPAD(0x0094, PIN_INPUT, 7)
+ >;
+ };
+
+ d11_gpio_pulldown: d11-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (Y3) WKUP_GPIO0_49 */
+ AM65X_WKUP_IOPAD(0x0094, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d12_spi0_d1: d12-spi0-d1-pins {
+ pinctrl-single,pins = <
+ /* (Y2) MCU_SPI0_D1 */
+ AM65X_WKUP_IOPAD(0x0098, PIN_INPUT, 0)
+ >;
+ };
+
+ d12_gpio: d12-gpio-pins {
+ pinctrl-single,pins = <
+ /* (Y2) WKUP_GPIO0_50 */
+ AM65X_WKUP_IOPAD(0x0098, PIN_INPUT, 7)
+ >;
+ };
+
+ d12_gpio_pullup: d12-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (Y2) WKUP_GPIO0_50 */
+ AM65X_WKUP_IOPAD(0x0098, PIN_INPUT, 7)
+ >;
+ };
+
+ d12_gpio_pulldown: d12-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (Y2) WKUP_GPIO0_50 */
+ AM65X_WKUP_IOPAD(0x0098, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d13_spi0_clk: d13-spi0-clk-pins {
+ pinctrl-single,pins = <
+ /* (Y1) MCU_SPI0_CLK */
+ AM65X_WKUP_IOPAD(0x0090, PIN_INPUT, 0)
+ >;
+ };
+
+ d13_gpio: d13-gpio-pins {
+ pinctrl-single,pins = <
+ /* (Y1) WKUP_GPIO0_48 */
+ AM65X_WKUP_IOPAD(0x0090, PIN_INPUT, 7)
+ >;
+ };
+
+ d13_gpio_pullup: d13-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (Y1) WKUP_GPIO0_48 */
+ AM65X_WKUP_IOPAD(0x0090, PIN_INPUT, 7)
+ >;
+ };
+
+ d13_gpio_pulldown: d13-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (Y1) WKUP_GPIO0_48 */
+ AM65X_WKUP_IOPAD(0x0090, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ a0_gpio: a0-gpio-pins {
+ pinctrl-single,pins = <
+ /* (L6) WKUP_GPIO0_45 */
+ AM65X_WKUP_IOPAD(0x0084, PIN_INPUT, 7)
+ >;
+ };
+
+ a0_gpio_pullup: a0-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (L6) WKUP_GPIO0_45 */
+ AM65X_WKUP_IOPAD(0x0084, PIN_INPUT, 7)
+ >;
+ };
+
+ a0_gpio_pulldown: a0-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (L6) WKUP_GPIO0_45 */
+ AM65X_WKUP_IOPAD(0x0084, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ a1_gpio: a1-gpio-pins {
+ pinctrl-single,pins = <
+ /* (M6) WKUP_GPIO0_44 */
+ AM65X_WKUP_IOPAD(0x0080, PIN_INPUT, 7)
+ >;
+ };
+
+ a1_gpio_pullup: a1-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (M6) WKUP_GPIO0_44 */
+ AM65X_WKUP_IOPAD(0x0080, PIN_INPUT, 7)
+ >;
+ };
+
+ a1_gpio_pulldown: a1-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (M6) WKUP_GPIO0_44 */
+ AM65X_WKUP_IOPAD(0x0080, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ a2_gpio: a2-gpio-pins {
+ pinctrl-single,pins = <
+ /* (L5) WKUP_GPIO0_43 */
+ AM65X_WKUP_IOPAD(0x007C, PIN_INPUT, 7)
+ >;
+ };
+
+ a2_gpio_pullup: a2-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (L5) WKUP_GPIO0_43 */
+ AM65X_WKUP_IOPAD(0x007C, PIN_INPUT, 7)
+ >;
+ };
+
+ a2_gpio_pulldown: a2-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (L5) WKUP_GPIO0_43 */
+ AM65X_WKUP_IOPAD(0x007C, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ a3_gpio: a3-gpio-pins {
+ pinctrl-single,pins = <
+ /* (M5) WKUP_GPIO0_39 */
+ AM65X_WKUP_IOPAD(0x006C, PIN_INPUT, 7)
+ >;
+ };
+
+ a3_gpio_pullup: a3-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (M5) WKUP_GPIO0_39 */
+ AM65X_WKUP_IOPAD(0x006C, PIN_INPUT, 7)
+ >;
+ };
+
+ a3_gpio_pulldown: a3-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (M5) WKUP_GPIO0_39 */
+ AM65X_WKUP_IOPAD(0x006C, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ a4_gpio: a4-gpio-pins {
+ pinctrl-single,pins = <
+ /* (L2) WKUP_GPIO0_42 */
+ AM65X_WKUP_IOPAD(0x0078, PIN_INPUT, 7)
+ >;
+ };
+
+ a4_gpio_pullup: a4-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (L2) WKUP_GPIO0_42 */
+ AM65X_WKUP_IOPAD(0x0078, PIN_INPUT, 7)
+ >;
+ };
+
+ a4_gpio_pulldown: a4-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (L2) WKUP_GPIO0_42 */
+ AM65X_WKUP_IOPAD(0x0078, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ a5_gpio: a5-gpio-pins {
+ pinctrl-single,pins = <
+ /* (N5) WKUP_GPIO0_35 */
+ AM65X_WKUP_IOPAD(0x005C, PIN_INPUT, 7)
+ >;
+ };
+
+ a5_gpio_pullup: a5-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (N5) WKUP_GPIO0_35 */
+ AM65X_WKUP_IOPAD(0x005C, PIN_INPUT_PULLUP, 7)
+ >;
+ };
+
+ a5_gpio_pulldown: a5-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (N5) WKUP_GPIO0_35 */
+ AM65X_WKUP_IOPAD(0x005C, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ wkup_i2c0_pins_default: wkup-i2c0-default-pins {
+ pinctrl-single,pins = <
+ /* (AC7) WKUP_I2C0_SCL */
+ AM65X_WKUP_IOPAD(0x00e0, PIN_INPUT, 0)
+ /* (AD6) WKUP_I2C0_SDA */
+ AM65X_WKUP_IOPAD(0x00e4, PIN_INPUT, 0)
+ >;
+ };
+
+ arduino_i2c_aio_switch_pins_default: arduino-i2c-aio-switch-default-pins {
+ pinctrl-single,pins = <
+ /* (R2) WKUP_GPIO0_21 */
+ AM65X_WKUP_IOPAD(0x0024, PIN_OUTPUT, 7)
+ >;
+ };
+
+ arduino_io_oe_pins_default: arduino-io-oe-default-pins {
+ pinctrl-single,pins = <
+ /* (N4) WKUP_GPIO0_34 */
+ AM65X_WKUP_IOPAD(0x0058, PIN_OUTPUT, 7)
+ /* (M2) WKUP_GPIO0_36 */
+ AM65X_WKUP_IOPAD(0x0060, PIN_OUTPUT, 7)
+ /* (M3) WKUP_GPIO0_37 */
+ AM65X_WKUP_IOPAD(0x0064, PIN_OUTPUT, 7)
+ /* (M4) WKUP_GPIO0_38 */
+ AM65X_WKUP_IOPAD(0x0068, PIN_OUTPUT, 7)
+ /* (M1) WKUP_GPIO0_41 */
+ AM65X_WKUP_IOPAD(0x0074, PIN_OUTPUT, 7)
+ >;
+ };
+};
+
+&main_pmx0 {
+ pinctrl-names =
+ "default",
+ "d4-ehrpwm0-a", "d4-gpio", "d4-gpio-pullup", "d4-gpio-pulldown",
+ "d5-ehrpwm1-a", "d5-gpio", "d5-gpio-pullup", "d5-gpio-pulldown",
+ "d6-ehrpwm2-a", "d6-gpio", "d6-gpio-pullup", "d6-gpio-pulldown",
+ "d7-ehrpwm3-a", "d7-gpio", "d7-gpio-pullup", "d7-gpio-pulldown",
+ "d8-ehrpwm4-a", "d8-gpio", "d8-gpio-pullup", "d8-gpio-pulldown",
+ "d9-ehrpwm5-a", "d9-gpio", "d9-gpio-pullup", "d9-gpio-pulldown";
+
+ pinctrl-0 = <&d4_ehrpwm0_a>;
+ pinctrl-1 = <&d4_ehrpwm0_a>;
+ pinctrl-2 = <&d4_gpio>;
+ pinctrl-3 = <&d4_gpio_pullup>;
+ pinctrl-4 = <&d4_gpio_pulldown>;
+
+ pinctrl-5 = <&d5_ehrpwm1_a>;
+ pinctrl-6 = <&d5_gpio>;
+ pinctrl-7 = <&d5_gpio_pullup>;
+ pinctrl-8 = <&d5_gpio_pulldown>;
+
+ pinctrl-9 = <&d6_ehrpwm2_a>;
+ pinctrl-10 = <&d6_gpio>;
+ pinctrl-11 = <&d6_gpio_pullup>;
+ pinctrl-12 = <&d6_gpio_pulldown>;
+
+ pinctrl-13 = <&d7_ehrpwm3_a>;
+ pinctrl-14 = <&d7_gpio>;
+ pinctrl-15 = <&d7_gpio_pullup>;
+ pinctrl-16 = <&d7_gpio_pulldown>;
+
+ pinctrl-17 = <&d8_ehrpwm4_a>;
+ pinctrl-18 = <&d8_gpio>;
+ pinctrl-19 = <&d8_gpio_pullup>;
+ pinctrl-20 = <&d8_gpio_pulldown>;
+
+ pinctrl-21 = <&d9_ehrpwm5_a>;
+ pinctrl-22 = <&d9_gpio>;
+ pinctrl-23 = <&d9_gpio_pullup>;
+ pinctrl-24 = <&d9_gpio_pulldown>;
+
+ d4_ehrpwm0_a: d4-ehrpwm0-a-pins {
+ pinctrl-single,pins = <
+ /* (AG18) EHRPWM0_A */
+ AM65X_IOPAD(0x0084, PIN_OUTPUT, 5)
+ >;
+ };
+
+ d4_gpio: d4-gpio-pins {
+ pinctrl-single,pins = <
+ /* (AG18) GPIO0_33 */
+ AM65X_IOPAD(0x0084, PIN_INPUT, 7)
+ >;
+ };
+
+ d4_gpio_pullup: d4-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (AG18) GPIO0_33 */
+ AM65X_IOPAD(0x0084, PIN_INPUT_PULLUP, 7)
+ >;
+ };
+
+ d4_gpio_pulldown: d4-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (AG18) GPIO0_33 */
+ AM65X_IOPAD(0x0084, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d5_ehrpwm1_a: d5-ehrpwm1-a-pins {
+ pinctrl-single,pins = <
+ /* (AF17) EHRPWM1_A */
+ AM65X_IOPAD(0x008C, PIN_OUTPUT, 5)
+ >;
+ };
+
+ d5_gpio: d5-gpio-pins {
+ pinctrl-single,pins = <
+ /* (AF17) GPIO0_35 */
+ AM65X_IOPAD(0x008C, PIN_INPUT, 7)
+ >;
+ };
+
+ d5_gpio_pullup: d5-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (AF17) GPIO0_35 */
+ AM65X_IOPAD(0x008C, PIN_INPUT_PULLUP, 7)
+ >;
+ };
+
+ d5_gpio_pulldown: d5-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (AF17) GPIO0_35 */
+ AM65X_IOPAD(0x008C, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d6_ehrpwm2_a: d6-ehrpwm2-a-pins {
+ pinctrl-single,pins = <
+ /* (AH16) EHRPWM2_A */
+ AM65X_IOPAD(0x0098, PIN_OUTPUT, 5)
+ >;
+ };
+
+ d6_gpio: d6-gpio-pins {
+ pinctrl-single,pins = <
+ /* (AH16) GPIO0_38 */
+ AM65X_IOPAD(0x0098, PIN_INPUT, 7)
+ >;
+ };
+
+ d6_gpio_pullup: d6-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (AH16) GPIO0_38 */
+ AM65X_IOPAD(0x0098, PIN_INPUT_PULLUP, 7)
+ >;
+ };
+
+ d6_gpio_pulldown: d6-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (AH16) GPIO0_38 */
+ AM65X_IOPAD(0x0098, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d7_ehrpwm3_a: d7-ehrpwm3-a-pins {
+ pinctrl-single,pins = <
+ /* (AH15) EHRPWM3_A */
+ AM65X_IOPAD(0x00AC, PIN_OUTPUT, 5)
+ >;
+ };
+
+ d7_gpio: d7-gpio-pins {
+ pinctrl-single,pins = <
+ /* (AH15) GPIO0_43 */
+ AM65X_IOPAD(0x00AC, PIN_INPUT, 7)
+ >;
+ };
+
+ d7_gpio_pullup: d7-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (AH15) GPIO0_43 */
+ AM65X_IOPAD(0x00AC, PIN_INPUT_PULLUP, 7)
+ >;
+ };
+
+ d7_gpio_pulldown: d7-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (AH15) GPIO0_43 */
+ AM65X_IOPAD(0x00AC, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d8_ehrpwm4_a: d8-ehrpwm4-a-pins {
+ pinctrl-single,pins = <
+ /* (AG15) EHRPWM4_A */
+ AM65X_IOPAD(0x00C0, PIN_OUTPUT, 5)
+ >;
+ };
+
+ d8_gpio: d8-gpio-pins {
+ pinctrl-single,pins = <
+ /* (AG15) GPIO0_48 */
+ AM65X_IOPAD(0x00C0, PIN_INPUT, 7)
+ >;
+ };
+
+ d8_gpio_pullup: d8-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (AG15) GPIO0_48 */
+ AM65X_IOPAD(0x00C0, PIN_INPUT_PULLUP, 7)
+ >;
+ };
+
+ d8_gpio_pulldown: d8-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (AG15) GPIO0_48 */
+ AM65X_IOPAD(0x00C0, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+
+ d9_ehrpwm5_a: d9-ehrpwm5-a-pins {
+ pinctrl-single,pins = <
+ /* (AD15) EHRPWM5_A */
+ AM65X_IOPAD(0x00CC, PIN_OUTPUT, 5)
+ >;
+ };
+
+ d9_gpio: d9-gpio-pins {
+ pinctrl-single,pins = <
+ /* (AD15) GPIO0_51 */
+ AM65X_IOPAD(0x00CC, PIN_INPUT, 7)
+ >;
+ };
+
+ d9_gpio_pullup: d9-gpio-pullup-pins {
+ pinctrl-single,pins = <
+ /* (AD15) GPIO0_51 */
+ AM65X_IOPAD(0x00CC, PIN_INPUT_PULLUP, 7)
+ >;
+ };
+
+ d9_gpio_pulldown: d9-gpio-pulldown-pins {
+ pinctrl-single,pins = <
+ /* (AD15) GPIO0_51 */
+ AM65X_IOPAD(0x00CC, PIN_INPUT_PULLDOWN, 7)
+ >;
+ };
+};
+
+&main_gpio0 {
+ gpio-line-names =
+ "main_gpio0-base", "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "", "",
+ "", "", "", "IO4", "", "IO5", "", "", "IO6", "",
+ "", "", "", "IO7", "", "", "", "", "IO8", "",
+ "", "IO9";
+};
+
+&wkup_gpio0 {
+ pinctrl-names = "default";
+ pinctrl-0 =
+ <&arduino_i2c_aio_switch_pins_default>,
+ <&arduino_io_oe_pins_default>,
+ <&push_button_pins_default>,
+ <&db9_com_mode_pins_default>;
+ gpio-line-names =
+ /* 0..9 */
+ "wkup_gpio0-base", "", "", "", "UART0-mode1", "UART0-mode0",
+ "UART0-enable", "UART0-terminate", "", "WIFI-disable",
+ /* 10..19 */
+ "", "", "", "", "", "", "", "", "", "",
+ /* 20..29 */
+ "", "A4A5-I2C-mux", "", "", "", "USER-button", "", "", "","IO0",
+ /* 30..39 */
+ "IO1", "IO2", "", "IO3", "IO17-direction", "A5",
+ "IO16-direction", "IO15-direction", "IO14-direction", "A3",
+ /* 40..49 */
+ "", "IO18-direction", "A4", "A2", "A1", "A0", "", "", "IO13",
+ "IO11",
+ /* 50..51 */
+ "IO12", "IO10";
+};
+
+&wkup_i2c0 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&wkup_i2c0_pins_default>;
+ clock-frequency = <400000>;
+};
+
+&mcu_i2c0 {
+ /* D4200 */
+ pcal9535_1: gpio@20 {
+ compatible = "nxp,pcal9535";
+ reg = <0x20>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-line-names =
+ "A0-pull", "A1-pull", "A2-pull", "A3-pull", "A4-pull",
+ "A5-pull", "", "",
+ "IO14-enable", "IO15-enable", "IO16-enable",
+ "IO17-enable", "IO18-enable", "IO19-enable";
+ };
+
+ /* D4201 */
+ pcal9535_2: gpio@21 {
+ compatible = "nxp,pcal9535";
+ reg = <0x21>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-line-names =
+ "IO0-direction", "IO1-direction", "IO2-direction",
+ "IO3-direction", "IO4-direction", "IO5-direction",
+ "IO6-direction", "IO7-direction",
+ "IO8-direction", "IO9-direction", "IO10-direction",
+ "IO11-direction", "IO12-direction", "IO13-direction",
+ "IO19-direction";
+ };
+
+ /* D4202 */
+ pcal9535_3: gpio@25 {
+ compatible = "nxp,pcal9535";
+ reg = <0x25>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-line-names =
+ "IO0-pull", "IO1-pull", "IO2-pull", "IO3-pull",
+ "IO4-pull", "IO5-pull", "IO6-pull", "IO7-pull",
+ "IO8-pull", "IO9-pull", "IO10-pull", "IO11-pull",
+ "IO12-pull", "IO13-pull";
+ };
+};
+
+&mcu_uart0 {
+ status = "okay";
+};
+
+&tscadc1 {
+ status = "okay";
+ adc {
+ ti,adc-channels = <0 1 2 3 4 5>;
+ };
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg1.dtsi b/arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg1.dtsi
index 1d1979859583..c50a585dd638 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg1.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg1.dtsi
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) Siemens AG, 2021-2023
*
@@ -8,10 +8,7 @@
* Common bits of the IOT2050 Basic and Advanced variants, PG1
*/
-&dss {
- assigned-clocks = <&k3_clks 67 2>;
- assigned-clock-parents = <&k3_clks 67 5>;
-};
+#include "k3-am65-iot2050-dp.dtsi"
&serdes0 {
status = "disabled";
diff --git a/arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg2.dtsi b/arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg2.dtsi
index e9b57b87e42e..e2584a5efe34 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg2.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg2.dtsi
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (c) Siemens AG, 2021
+ * Copyright (c) Siemens AG, 2021-2023
*
* Authors:
* Chao Zeng <chao.zeng@siemens.com>
@@ -9,6 +9,11 @@
* Common bits of the IOT2050 Basic and Advanced variants, PG2
*/
+&mcu_r5fss0 {
+ /* lock-step mode not supported on PG2 boards */
+ ti,cluster-mode = <0>;
+};
+
&main_pmx0 {
cp2102n_reset_pin_default: cp2102n-reset-default-pins {
pinctrl-single,pins = <
@@ -33,21 +38,3 @@
/* Workaround needed to get DP clock of 154Mhz */
assigned-clocks = <&k3_clks 67 0>;
};
-
-&serdes0 {
- assigned-clocks = <&k3_clks 153 4>, <&serdes0 AM654_SERDES_CMU_REFCLK>;
- assigned-clock-parents = <&k3_clks 153 7>, <&k3_clks 153 4>;
-};
-
-&dwc3_0 {
- assigned-clock-parents = <&k3_clks 151 4>, /* set REF_CLK to 20MHz i.e. PER0_PLL/48 */
- <&k3_clks 151 8>; /* set PIPE3_TXB_CLK to WIZ8B2M4VSB */
- phys = <&serdes0 PHY_TYPE_USB3 0>;
- phy-names = "usb3-phy";
-};
-
-&usb0 {
- maximum-speed = "super-speed";
- snps,dis-u1-entry-quirk;
- snps,dis-u2-entry-quirk;
-};
diff --git a/arch/arm64/boot/dts/ti/k3-am65-iot2050-common.dtsi b/arch/arm64/boot/dts/ti/k3-am65-iot2050-common.dtsi
index 61a634afaa4f..ef34b851e178 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-iot2050-common.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-iot2050-common.dtsi
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (c) Siemens AG, 2018-2021
+ * Copyright (c) Siemens AG, 2018-2024
*
* Authors:
* Le Jin <le.jin@siemens.com>
@@ -9,6 +9,7 @@
* Common bits of the IOT2050 Basic and Advanced variants, PG1 and PG2
*/
+#include <dt-bindings/leds/common.h>
#include <dt-bindings/phy/phy.h>
#include <dt-bindings/net/ti-dp83867.h>
@@ -75,6 +76,12 @@
alignment = <0x1000>;
no-map;
};
+
+ /* To reserve the power-on(PON) reason for watchdog reset */
+ wdt_reset_memory_region: wdt-memory@a2200000 {
+ reg = <0x00 0xa2200000 0x00 0x1000>;
+ no-map;
+ };
};
leds {
@@ -82,28 +89,46 @@
pinctrl-names = "default";
pinctrl-0 = <&leds_pins_default>;
- status-led-red {
+ led-0 {
+ color = <LED_COLOR_ID_RED>;
+ function = LED_FUNCTION_STATUS;
+ label = "status-led-red";
gpios = <&wkup_gpio0 32 GPIO_ACTIVE_HIGH>;
panic-indicator;
};
- status-led-green {
+ led-1 {
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_STATUS;
+ label = "status-led-green";
gpios = <&wkup_gpio0 24 GPIO_ACTIVE_HIGH>;
};
- user-led1-red {
+ led-2 {
+ color = <LED_COLOR_ID_RED>;
+ function = LED_FUNCTION_INDICATOR;
+ label = "user-led1-red";
gpios = <&pcal9535_3 14 GPIO_ACTIVE_HIGH>;
};
- user-led1-green {
+ led-3 {
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_INDICATOR;
+ label = "user-led1-green";
gpios = <&pcal9535_2 15 GPIO_ACTIVE_HIGH>;
};
- user-led2-red {
+ led-4 {
+ color = <LED_COLOR_ID_RED>;
+ function = LED_FUNCTION_INDICATOR;
+ label = "user-led2-red";
gpios = <&wkup_gpio0 17 GPIO_ACTIVE_HIGH>;
};
- user-led2-green {
+ led-5 {
+ color = <LED_COLOR_ID_RED>;
+ function = LED_FUNCTION_INDICATOR;
+ label = "user-led2-green";
gpios = <&wkup_gpio0 22 GPIO_ACTIVE_HIGH>;
};
};
@@ -186,434 +211,6 @@
};
&wkup_pmx0 {
- pinctrl-names =
- "default",
- "d0-uart0-rxd", "d0-gpio", "d0-gpio-pullup", "d0-gpio-pulldown",
- "d1-uart0-txd", "d1-gpio", "d1-gpio-pullup", "d1-gpio-pulldown",
- "d2-uart0-ctsn", "d2-gpio", "d2-gpio-pullup", "d2-gpio-pulldown",
- "d3-uart0-rtsn", "d3-gpio", "d3-gpio-pullup", "d3-gpio-pulldown",
- "d10-spi0-cs0", "d10-gpio", "d10-gpio-pullup", "d10-gpio-pulldown",
- "d11-spi0-d0", "d11-gpio", "d11-gpio-pullup", "d11-gpio-pulldown",
- "d12-spi0-d1", "d12-gpio", "d12-gpio-pullup", "d12-gpio-pulldown",
- "d13-spi0-clk", "d13-gpio", "d13-gpio-pullup", "d13-gpio-pulldown",
- "a0-gpio", "a0-gpio-pullup", "a0-gpio-pulldown",
- "a1-gpio", "a1-gpio-pullup", "a1-gpio-pulldown",
- "a2-gpio", "a2-gpio-pullup", "a2-gpio-pulldown",
- "a3-gpio", "a3-gpio-pullup", "a3-gpio-pulldown",
- "a4-gpio", "a4-gpio-pullup", "a4-gpio-pulldown",
- "a5-gpio", "a5-gpio-pullup", "a5-gpio-pulldown";
-
- pinctrl-0 = <&d0_uart0_rxd>;
- pinctrl-1 = <&d0_uart0_rxd>;
- pinctrl-2 = <&d0_gpio>;
- pinctrl-3 = <&d0_gpio_pullup>;
- pinctrl-4 = <&d0_gpio_pulldown>;
- pinctrl-5 = <&d1_uart0_txd>;
- pinctrl-6 = <&d1_gpio>;
- pinctrl-7 = <&d1_gpio_pullup>;
- pinctrl-8 = <&d1_gpio_pulldown>;
- pinctrl-9 = <&d2_uart0_ctsn>;
- pinctrl-10 = <&d2_gpio>;
- pinctrl-11 = <&d2_gpio_pullup>;
- pinctrl-12 = <&d2_gpio_pulldown>;
- pinctrl-13 = <&d3_uart0_rtsn>;
- pinctrl-14 = <&d3_gpio>;
- pinctrl-15 = <&d3_gpio_pullup>;
- pinctrl-16 = <&d3_gpio_pulldown>;
- pinctrl-17 = <&d10_spi0_cs0>;
- pinctrl-18 = <&d10_gpio>;
- pinctrl-19 = <&d10_gpio_pullup>;
- pinctrl-20 = <&d10_gpio_pulldown>;
- pinctrl-21 = <&d11_spi0_d0>;
- pinctrl-22 = <&d11_gpio>;
- pinctrl-23 = <&d11_gpio_pullup>;
- pinctrl-24 = <&d11_gpio_pulldown>;
- pinctrl-25 = <&d12_spi0_d1>;
- pinctrl-26 = <&d12_gpio>;
- pinctrl-27 = <&d12_gpio_pullup>;
- pinctrl-28 = <&d12_gpio_pulldown>;
- pinctrl-29 = <&d13_spi0_clk>;
- pinctrl-30 = <&d13_gpio>;
- pinctrl-31 = <&d13_gpio_pullup>;
- pinctrl-32 = <&d13_gpio_pulldown>;
- pinctrl-33 = <&a0_gpio>;
- pinctrl-34 = <&a0_gpio_pullup>;
- pinctrl-35 = <&a0_gpio_pulldown>;
- pinctrl-36 = <&a1_gpio>;
- pinctrl-37 = <&a1_gpio_pullup>;
- pinctrl-38 = <&a1_gpio_pulldown>;
- pinctrl-39 = <&a2_gpio>;
- pinctrl-40 = <&a2_gpio_pullup>;
- pinctrl-41 = <&a2_gpio_pulldown>;
- pinctrl-42 = <&a3_gpio>;
- pinctrl-43 = <&a3_gpio_pullup>;
- pinctrl-44 = <&a3_gpio_pulldown>;
- pinctrl-45 = <&a4_gpio>;
- pinctrl-46 = <&a4_gpio_pullup>;
- pinctrl-47 = <&a4_gpio_pulldown>;
- pinctrl-48 = <&a5_gpio>;
- pinctrl-49 = <&a5_gpio_pullup>;
- pinctrl-50 = <&a5_gpio_pulldown>;
-
- d0_uart0_rxd: d0-uart0-rxd-pins {
- pinctrl-single,pins = <
- /* (P4) MCU_UART0_RXD */
- AM65X_WKUP_IOPAD(0x0044, PIN_INPUT, 4)
- >;
- };
-
- d0_gpio: d0-gpio-pins {
- pinctrl-single,pins = <
- /* (P4) WKUP_GPIO0_29 */
- AM65X_WKUP_IOPAD(0x0044, PIN_INPUT, 7)
- >;
- };
-
- d0_gpio_pullup: d0-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (P4) WKUP_GPIO0_29 */
- AM65X_WKUP_IOPAD(0x0044, PIN_INPUT_PULLUP, 7)
- >;
- };
-
- d0_gpio_pulldown: d0-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (P4) WKUP_GPIO0_29 */
- AM65X_WKUP_IOPAD(0x0044, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d1_uart0_txd: d1-uart0-txd-pins {
- pinctrl-single,pins = <
- /* (P5) MCU_UART0_TXD */
- AM65X_WKUP_IOPAD(0x0048, PIN_OUTPUT, 4)
- >;
- };
-
- d1_gpio: d1-gpio-pins {
- pinctrl-single,pins = <
- /* (P5) WKUP_GPIO0_30 */
- AM65X_WKUP_IOPAD(0x0048, PIN_INPUT, 7)
- >;
- };
-
- d1_gpio_pullup: d1-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (P5) WKUP_GPIO0_30 */
- AM65X_WKUP_IOPAD(0x0048, PIN_INPUT, 7)
- >;
- };
-
- d1_gpio_pulldown: d1-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (P5) WKUP_GPIO0_30 */
- AM65X_WKUP_IOPAD(0x0048, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d2_uart0_ctsn: d2-uart0-ctsn-pins {
- pinctrl-single,pins = <
- /* (P1) MCU_UART0_CTSn */
- AM65X_WKUP_IOPAD(0x004C, PIN_INPUT, 4)
- >;
- };
-
- d2_gpio: d2-gpio-pins {
- pinctrl-single,pins = <
- /* (P5) WKUP_GPIO0_31 */
- AM65X_WKUP_IOPAD(0x004C, PIN_INPUT, 7)
- >;
- };
-
- d2_gpio_pullup: d2-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (P5) WKUP_GPIO0_31 */
- AM65X_WKUP_IOPAD(0x004C, PIN_INPUT, 7)
- >;
- };
-
- d2_gpio_pulldown: d2-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (P5) WKUP_GPIO0_31 */
- AM65X_WKUP_IOPAD(0x004C, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d3_uart0_rtsn: d3-uart0-rtsn-pins {
- pinctrl-single,pins = <
- /* (N3) MCU_UART0_RTSn */
- AM65X_WKUP_IOPAD(0x0054, PIN_OUTPUT, 4)
- >;
- };
-
- d3_gpio: d3-gpio-pins {
- pinctrl-single,pins = <
- /* (N3) WKUP_GPIO0_33 */
- AM65X_WKUP_IOPAD(0x0054, PIN_INPUT, 7)
- >;
- };
-
- d3_gpio_pullup: d3-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (N3) WKUP_GPIO0_33 */
- AM65X_WKUP_IOPAD(0x0054, PIN_INPUT, 7)
- >;
- };
-
- d3_gpio_pulldown: d3-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (N3) WKUP_GPIO0_33 */
- AM65X_WKUP_IOPAD(0x0054, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d10_spi0_cs0: d10-spi0-cs0-pins {
- pinctrl-single,pins = <
- /* (Y4) MCU_SPI0_CS0 */
- AM65X_WKUP_IOPAD(0x009c, PIN_OUTPUT, 0)
- >;
- };
-
- d10_gpio: d10-gpio-pins {
- pinctrl-single,pins = <
- /* (Y4) WKUP_GPIO0_51 */
- AM65X_WKUP_IOPAD(0x009c, PIN_INPUT, 7)
- >;
- };
-
- d10_gpio_pullup: d10-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (Y4) WKUP_GPIO0_51 */
- AM65X_WKUP_IOPAD(0x009c, PIN_INPUT, 7)
- >;
- };
-
- d10_gpio_pulldown: d10-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (Y4) WKUP_GPIO0_51 */
- AM65X_WKUP_IOPAD(0x009c, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d11_spi0_d0: d11-spi0-d0-pins {
- pinctrl-single,pins = <
- /* (Y3) MCU_SPI0_D0 */
- AM65X_WKUP_IOPAD(0x0094, PIN_INPUT, 0)
- >;
- };
-
- d11_gpio: d11-gpio-pins {
- pinctrl-single,pins = <
- /* (Y3) WKUP_GPIO0_49 */
- AM65X_WKUP_IOPAD(0x0094, PIN_INPUT, 7)
- >;
- };
-
- d11_gpio_pullup: d11-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (Y3) WKUP_GPIO0_49 */
- AM65X_WKUP_IOPAD(0x0094, PIN_INPUT, 7)
- >;
- };
-
- d11_gpio_pulldown: d11-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (Y3) WKUP_GPIO0_49 */
- AM65X_WKUP_IOPAD(0x0094, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d12_spi0_d1: d12-spi0-d1-pins {
- pinctrl-single,pins = <
- /* (Y2) MCU_SPI0_D1 */
- AM65X_WKUP_IOPAD(0x0098, PIN_INPUT, 0)
- >;
- };
-
- d12_gpio: d12-gpio-pins {
- pinctrl-single,pins = <
- /* (Y2) WKUP_GPIO0_50 */
- AM65X_WKUP_IOPAD(0x0098, PIN_INPUT, 7)
- >;
- };
-
- d12_gpio_pullup: d12-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (Y2) WKUP_GPIO0_50 */
- AM65X_WKUP_IOPAD(0x0098, PIN_INPUT, 7)
- >;
- };
-
- d12_gpio_pulldown: d12-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (Y2) WKUP_GPIO0_50 */
- AM65X_WKUP_IOPAD(0x0098, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d13_spi0_clk: d13-spi0-clk-pins {
- pinctrl-single,pins = <
- /* (Y1) MCU_SPI0_CLK */
- AM65X_WKUP_IOPAD(0x0090, PIN_INPUT, 0)
- >;
- };
-
- d13_gpio: d13-gpio-pins {
- pinctrl-single,pins = <
- /* (Y1) WKUP_GPIO0_48 */
- AM65X_WKUP_IOPAD(0x0090, PIN_INPUT, 7)
- >;
- };
-
- d13_gpio_pullup: d13-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (Y1) WKUP_GPIO0_48 */
- AM65X_WKUP_IOPAD(0x0090, PIN_INPUT, 7)
- >;
- };
-
- d13_gpio_pulldown: d13-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (Y1) WKUP_GPIO0_48 */
- AM65X_WKUP_IOPAD(0x0090, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- a0_gpio: a0-gpio-pins {
- pinctrl-single,pins = <
- /* (L6) WKUP_GPIO0_45 */
- AM65X_WKUP_IOPAD(0x0084, PIN_INPUT, 7)
- >;
- };
-
- a0_gpio_pullup: a0-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (L6) WKUP_GPIO0_45 */
- AM65X_WKUP_IOPAD(0x0084, PIN_INPUT, 7)
- >;
- };
-
- a0_gpio_pulldown: a0-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (L6) WKUP_GPIO0_45 */
- AM65X_WKUP_IOPAD(0x0084, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- a1_gpio: a1-gpio-pins {
- pinctrl-single,pins = <
- /* (M6) WKUP_GPIO0_44 */
- AM65X_WKUP_IOPAD(0x0080, PIN_INPUT, 7)
- >;
- };
-
- a1_gpio_pullup: a1-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (M6) WKUP_GPIO0_44 */
- AM65X_WKUP_IOPAD(0x0080, PIN_INPUT, 7)
- >;
- };
-
- a1_gpio_pulldown: a1-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (M6) WKUP_GPIO0_44 */
- AM65X_WKUP_IOPAD(0x0080, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- a2_gpio: a2-gpio-pins {
- pinctrl-single,pins = <
- /* (L5) WKUP_GPIO0_43 */
- AM65X_WKUP_IOPAD(0x007C, PIN_INPUT, 7)
- >;
- };
-
- a2_gpio_pullup: a2-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (L5) WKUP_GPIO0_43 */
- AM65X_WKUP_IOPAD(0x007C, PIN_INPUT, 7)
- >;
- };
-
- a2_gpio_pulldown: a2-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (L5) WKUP_GPIO0_43 */
- AM65X_WKUP_IOPAD(0x007C, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- a3_gpio: a3-gpio-pins {
- pinctrl-single,pins = <
- /* (M5) WKUP_GPIO0_39 */
- AM65X_WKUP_IOPAD(0x006C, PIN_INPUT, 7)
- >;
- };
-
- a3_gpio_pullup: a3-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (M5) WKUP_GPIO0_39 */
- AM65X_WKUP_IOPAD(0x006C, PIN_INPUT, 7)
- >;
- };
-
- a3_gpio_pulldown: a3-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (M5) WKUP_GPIO0_39 */
- AM65X_WKUP_IOPAD(0x006C, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- a4_gpio: a4-gpio-pins {
- pinctrl-single,pins = <
- /* (L2) WKUP_GPIO0_42 */
- AM65X_WKUP_IOPAD(0x0078, PIN_INPUT, 7)
- >;
- };
-
- a4_gpio_pullup: a4-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (L2) WKUP_GPIO0_42 */
- AM65X_WKUP_IOPAD(0x0078, PIN_INPUT, 7)
- >;
- };
-
- a4_gpio_pulldown: a4-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (L2) WKUP_GPIO0_42 */
- AM65X_WKUP_IOPAD(0x0078, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- a5_gpio: a5-gpio-pins {
- pinctrl-single,pins = <
- /* (N5) WKUP_GPIO0_35 */
- AM65X_WKUP_IOPAD(0x005C, PIN_INPUT, 7)
- >;
- };
-
- a5_gpio_pullup: a5-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (N5) WKUP_GPIO0_35 */
- AM65X_WKUP_IOPAD(0x005C, PIN_INPUT_PULLUP, 7)
- >;
- };
-
- a5_gpio_pulldown: a5-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (N5) WKUP_GPIO0_35 */
- AM65X_WKUP_IOPAD(0x005C, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- wkup_i2c0_pins_default: wkup-i2c0-default-pins {
- pinctrl-single,pins = <
- /* (AC7) WKUP_I2C0_SCL */
- AM65X_WKUP_IOPAD(0x00e0, PIN_INPUT, 0)
- /* (AD6) WKUP_I2C0_SDA */
- AM65X_WKUP_IOPAD(0x00e4, PIN_INPUT, 0)
- >;
- };
-
mcu_i2c0_pins_default: mcu-i2c0-default-pins {
pinctrl-single,pins = <
/* (AD8) MCU_I2C0_SCL */
@@ -623,13 +220,6 @@
>;
};
- arduino_i2c_aio_switch_pins_default: arduino-i2c-aio-switch-default-pins {
- pinctrl-single,pins = <
- /* (R2) WKUP_GPIO0_21 */
- AM65X_WKUP_IOPAD(0x0024, PIN_OUTPUT, 7)
- >;
- };
-
push_button_pins_default: push-button-default-pins {
pinctrl-single,pins = <
/* (T1) MCU_OSPI1_CLK.WKUP_GPIO0_25 */
@@ -637,22 +227,6 @@
>;
};
-
- arduino_io_oe_pins_default: arduino-io-oe-default-pins {
- pinctrl-single,pins = <
- /* (N4) WKUP_GPIO0_34 */
- AM65X_WKUP_IOPAD(0x0058, PIN_OUTPUT, 7)
- /* (M2) WKUP_GPIO0_36 */
- AM65X_WKUP_IOPAD(0x0060, PIN_OUTPUT, 7)
- /* (M3) WKUP_GPIO0_37 */
- AM65X_WKUP_IOPAD(0x0064, PIN_OUTPUT, 7)
- /* (M4) WKUP_GPIO0_38 */
- AM65X_WKUP_IOPAD(0x0068, PIN_OUTPUT, 7)
- /* (M1) WKUP_GPIO0_41 */
- AM65X_WKUP_IOPAD(0x0074, PIN_OUTPUT, 7)
- >;
- };
-
mcu_fss0_ospi0_pins_default: mcu-fss0-ospi0-default-pins {
pinctrl-single,pins = <
/* (V1) MCU_OSPI0_CLK */
@@ -716,214 +290,6 @@
};
&main_pmx0 {
- pinctrl-names =
- "default",
- "d4-ehrpwm0-a", "d4-gpio", "d4-gpio-pullup", "d4-gpio-pulldown",
- "d5-ehrpwm1-a", "d5-gpio", "d5-gpio-pullup", "d5-gpio-pulldown",
- "d6-ehrpwm2-a", "d6-gpio", "d6-gpio-pullup", "d6-gpio-pulldown",
- "d7-ehrpwm3-a", "d7-gpio", "d7-gpio-pullup", "d7-gpio-pulldown",
- "d8-ehrpwm4-a", "d8-gpio", "d8-gpio-pullup", "d8-gpio-pulldown",
- "d9-ehrpwm5-a", "d9-gpio", "d9-gpio-pullup", "d9-gpio-pulldown";
-
- pinctrl-0 = <&d4_ehrpwm0_a>;
- pinctrl-1 = <&d4_ehrpwm0_a>;
- pinctrl-2 = <&d4_gpio>;
- pinctrl-3 = <&d4_gpio_pullup>;
- pinctrl-4 = <&d4_gpio_pulldown>;
-
- pinctrl-5 = <&d5_ehrpwm1_a>;
- pinctrl-6 = <&d5_gpio>;
- pinctrl-7 = <&d5_gpio_pullup>;
- pinctrl-8 = <&d5_gpio_pulldown>;
-
- pinctrl-9 = <&d6_ehrpwm2_a>;
- pinctrl-10 = <&d6_gpio>;
- pinctrl-11 = <&d6_gpio_pullup>;
- pinctrl-12 = <&d6_gpio_pulldown>;
-
- pinctrl-13 = <&d7_ehrpwm3_a>;
- pinctrl-14 = <&d7_gpio>;
- pinctrl-15 = <&d7_gpio_pullup>;
- pinctrl-16 = <&d7_gpio_pulldown>;
-
- pinctrl-17 = <&d8_ehrpwm4_a>;
- pinctrl-18 = <&d8_gpio>;
- pinctrl-19 = <&d8_gpio_pullup>;
- pinctrl-20 = <&d8_gpio_pulldown>;
-
- pinctrl-21 = <&d9_ehrpwm5_a>;
- pinctrl-22 = <&d9_gpio>;
- pinctrl-23 = <&d9_gpio_pullup>;
- pinctrl-24 = <&d9_gpio_pulldown>;
-
- d4_ehrpwm0_a: d4-ehrpwm0-a-pins {
- pinctrl-single,pins = <
- /* (AG18) EHRPWM0_A */
- AM65X_IOPAD(0x0084, PIN_OUTPUT, 5)
- >;
- };
-
- d4_gpio: d4-gpio-pins {
- pinctrl-single,pins = <
- /* (AG18) GPIO0_33 */
- AM65X_IOPAD(0x0084, PIN_INPUT, 7)
- >;
- };
-
- d4_gpio_pullup: d4-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (AG18) GPIO0_33 */
- AM65X_IOPAD(0x0084, PIN_INPUT_PULLUP, 7)
- >;
- };
-
- d4_gpio_pulldown: d4-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (AG18) GPIO0_33 */
- AM65X_IOPAD(0x0084, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d5_ehrpwm1_a: d5-ehrpwm1-a-pins {
- pinctrl-single,pins = <
- /* (AF17) EHRPWM1_A */
- AM65X_IOPAD(0x008C, PIN_OUTPUT, 5)
- >;
- };
-
- d5_gpio: d5-gpio-pins {
- pinctrl-single,pins = <
- /* (AF17) GPIO0_35 */
- AM65X_IOPAD(0x008C, PIN_INPUT, 7)
- >;
- };
-
- d5_gpio_pullup: d5-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (AF17) GPIO0_35 */
- AM65X_IOPAD(0x008C, PIN_INPUT_PULLUP, 7)
- >;
- };
-
- d5_gpio_pulldown: d5-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (AF17) GPIO0_35 */
- AM65X_IOPAD(0x008C, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d6_ehrpwm2_a: d6-ehrpwm2-a-pins {
- pinctrl-single,pins = <
- /* (AH16) EHRPWM2_A */
- AM65X_IOPAD(0x0098, PIN_OUTPUT, 5)
- >;
- };
-
- d6_gpio: d6-gpio-pins {
- pinctrl-single,pins = <
- /* (AH16) GPIO0_38 */
- AM65X_IOPAD(0x0098, PIN_INPUT, 7)
- >;
- };
-
- d6_gpio_pullup: d6-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (AH16) GPIO0_38 */
- AM65X_IOPAD(0x0098, PIN_INPUT_PULLUP, 7)
- >;
- };
-
- d6_gpio_pulldown: d6-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (AH16) GPIO0_38 */
- AM65X_IOPAD(0x0098, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d7_ehrpwm3_a: d7-ehrpwm3-a-pins {
- pinctrl-single,pins = <
- /* (AH15) EHRPWM3_A */
- AM65X_IOPAD(0x00AC, PIN_OUTPUT, 5)
- >;
- };
-
- d7_gpio: d7-gpio-pins {
- pinctrl-single,pins = <
- /* (AH15) GPIO0_43 */
- AM65X_IOPAD(0x00AC, PIN_INPUT, 7)
- >;
- };
-
- d7_gpio_pullup: d7-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (AH15) GPIO0_43 */
- AM65X_IOPAD(0x00AC, PIN_INPUT_PULLUP, 7)
- >;
- };
-
- d7_gpio_pulldown: d7-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (AH15) GPIO0_43 */
- AM65X_IOPAD(0x00AC, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d8_ehrpwm4_a: d8-ehrpwm4-a-pins {
- pinctrl-single,pins = <
- /* (AG15) EHRPWM4_A */
- AM65X_IOPAD(0x00C0, PIN_OUTPUT, 5)
- >;
- };
-
- d8_gpio: d8-gpio-pins {
- pinctrl-single,pins = <
- /* (AG15) GPIO0_48 */
- AM65X_IOPAD(0x00C0, PIN_INPUT, 7)
- >;
- };
-
- d8_gpio_pullup: d8-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (AG15) GPIO0_48 */
- AM65X_IOPAD(0x00C0, PIN_INPUT_PULLUP, 7)
- >;
- };
-
- d8_gpio_pulldown: d8-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (AG15) GPIO0_48 */
- AM65X_IOPAD(0x00C0, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
- d9_ehrpwm5_a: d9-ehrpwm5-a-pins {
- pinctrl-single,pins = <
- /* (AD15) EHRPWM5_A */
- AM65X_IOPAD(0x00CC, PIN_OUTPUT, 5)
- >;
- };
-
- d9_gpio: d9-gpio-pins {
- pinctrl-single,pins = <
- /* (AD15) GPIO0_51 */
- AM65X_IOPAD(0x00CC, PIN_INPUT, 7)
- >;
- };
-
- d9_gpio_pullup: d9-gpio-pullup-pins {
- pinctrl-single,pins = <
- /* (AD15) GPIO0_51 */
- AM65X_IOPAD(0x00CC, PIN_INPUT_PULLUP, 7)
- >;
- };
-
- d9_gpio_pulldown: d9-gpio-pulldown-pins {
- pinctrl-single,pins = <
- /* (AD15) GPIO0_51 */
- AM65X_IOPAD(0x00CC, PIN_INPUT_PULLDOWN, 7)
- >;
- };
-
main_pcie_enable_pins_default: main-pcie-enable-default-pins {
pinctrl-single,pins = <
AM65X_IOPAD(0x01c4, PIN_INPUT_PULLUP, 7) /* (AH13) GPIO1_17 */
@@ -971,45 +337,6 @@
>;
};
- dss_vout1_pins_default: dss-vout1-default-pins {
- pinctrl-single,pins = <
- AM65X_IOPAD(0x0000, PIN_OUTPUT, 1) /* VOUT1_DATA0 */
- AM65X_IOPAD(0x0004, PIN_OUTPUT, 1) /* VOUT1_DATA1 */
- AM65X_IOPAD(0x0008, PIN_OUTPUT, 1) /* VOUT1_DATA2 */
- AM65X_IOPAD(0x000c, PIN_OUTPUT, 1) /* VOUT1_DATA3 */
- AM65X_IOPAD(0x0010, PIN_OUTPUT, 1) /* VOUT1_DATA4 */
- AM65X_IOPAD(0x0014, PIN_OUTPUT, 1) /* VOUT1_DATA5 */
- AM65X_IOPAD(0x0018, PIN_OUTPUT, 1) /* VOUT1_DATA6 */
- AM65X_IOPAD(0x001c, PIN_OUTPUT, 1) /* VOUT1_DATA7 */
- AM65X_IOPAD(0x0020, PIN_OUTPUT, 1) /* VOUT1_DATA8 */
- AM65X_IOPAD(0x0024, PIN_OUTPUT, 1) /* VOUT1_DATA9 */
- AM65X_IOPAD(0x0028, PIN_OUTPUT, 1) /* VOUT1_DATA10 */
- AM65X_IOPAD(0x002c, PIN_OUTPUT, 1) /* VOUT1_DATA11 */
- AM65X_IOPAD(0x0030, PIN_OUTPUT, 1) /* VOUT1_DATA12 */
- AM65X_IOPAD(0x0034, PIN_OUTPUT, 1) /* VOUT1_DATA13 */
- AM65X_IOPAD(0x0038, PIN_OUTPUT, 1) /* VOUT1_DATA14 */
- AM65X_IOPAD(0x003c, PIN_OUTPUT, 1) /* VOUT1_DATA15 */
- AM65X_IOPAD(0x0040, PIN_OUTPUT, 1) /* VOUT1_DATA16 */
- AM65X_IOPAD(0x0044, PIN_OUTPUT, 1) /* VOUT1_DATA17 */
- AM65X_IOPAD(0x0048, PIN_OUTPUT, 1) /* VOUT1_DATA18 */
- AM65X_IOPAD(0x004c, PIN_OUTPUT, 1) /* VOUT1_DATA19 */
- AM65X_IOPAD(0x0050, PIN_OUTPUT, 1) /* VOUT1_DATA20 */
- AM65X_IOPAD(0x0054, PIN_OUTPUT, 1) /* VOUT1_DATA21 */
- AM65X_IOPAD(0x0058, PIN_OUTPUT, 1) /* VOUT1_DATA22 */
- AM65X_IOPAD(0x005c, PIN_OUTPUT, 1) /* VOUT1_DATA23 */
- AM65X_IOPAD(0x0060, PIN_OUTPUT, 1) /* VOUT1_VSYNC */
- AM65X_IOPAD(0x0064, PIN_OUTPUT, 1) /* VOUT1_HSYNC */
- AM65X_IOPAD(0x0068, PIN_OUTPUT, 1) /* VOUT1_PCLK */
- AM65X_IOPAD(0x006c, PIN_OUTPUT, 1) /* VOUT1_DE */
- >;
- };
-
- dp_pins_default: dp-default-pins {
- pinctrl-single,pins = <
- AM65X_IOPAD(0x0078, PIN_OUTPUT, 7) /* (AF18) DP rst_n */
- >;
- };
-
main_i2c2_pins_default: main-i2c2-default-pins {
pinctrl-single,pins = <
AM65X_IOPAD(0x0074, PIN_INPUT, 5) /* (T27) I2C2_SCL */
@@ -1082,57 +409,11 @@
pinctrl-0 = <&main_uart1_pins_default>;
};
-&mcu_uart0 {
- status = "okay";
-};
-
-&main_gpio0 {
- gpio-line-names =
- "main_gpio0-base", "", "", "", "", "", "", "", "", "",
- "", "", "", "", "", "", "", "", "", "",
- "", "", "", "", "", "", "", "", "", "",
- "", "", "", "IO4", "", "IO5", "", "", "IO6", "",
- "", "", "", "IO7", "", "", "", "", "IO8", "",
- "", "IO9";
-};
-
&main_gpio1 {
pinctrl-names = "default";
pinctrl-0 = <&main_pcie_enable_pins_default>;
};
-&wkup_gpio0 {
- pinctrl-names = "default";
- pinctrl-0 =
- <&arduino_i2c_aio_switch_pins_default>,
- <&arduino_io_oe_pins_default>,
- <&push_button_pins_default>,
- <&db9_com_mode_pins_default>;
- gpio-line-names =
- /* 0..9 */
- "wkup_gpio0-base", "", "", "", "UART0-mode1", "UART0-mode0",
- "UART0-enable", "UART0-terminate", "", "WIFI-disable",
- /* 10..19 */
- "", "", "", "", "", "", "", "", "", "",
- /* 20..29 */
- "", "A4A5-I2C-mux", "", "", "", "USER-button", "", "", "","IO0",
- /* 30..39 */
- "IO1", "IO2", "", "IO3", "IO17-direction", "A5",
- "IO16-direction", "IO15-direction", "IO14-direction", "A3",
- /* 40..49 */
- "", "IO18-direction", "A4", "A2", "A1", "A0", "", "", "IO13",
- "IO11",
- /* 50..51 */
- "IO12", "IO10";
-};
-
-&wkup_i2c0 {
- status = "okay";
- pinctrl-names = "default";
- pinctrl-0 = <&wkup_i2c0_pins_default>;
- clock-frequency = <400000>;
-};
-
&mcu_i2c0 {
status = "okay";
pinctrl-names = "default";
@@ -1150,47 +431,6 @@
ti,vsel1-state-high;
ti,enable-vout-discharge;
};
-
- /* D4200 */
- pcal9535_1: gpio@20 {
- compatible = "nxp,pcal9535";
- reg = <0x20>;
- #gpio-cells = <2>;
- gpio-controller;
- gpio-line-names =
- "A0-pull", "A1-pull", "A2-pull", "A3-pull", "A4-pull",
- "A5-pull", "", "",
- "IO14-enable", "IO15-enable", "IO16-enable",
- "IO17-enable", "IO18-enable", "IO19-enable";
- };
-
- /* D4201 */
- pcal9535_2: gpio@21 {
- compatible = "nxp,pcal9535";
- reg = <0x21>;
- #gpio-cells = <2>;
- gpio-controller;
- gpio-line-names =
- "IO0-direction", "IO1-direction", "IO2-direction",
- "IO3-direction", "IO4-direction", "IO5-direction",
- "IO6-direction", "IO7-direction",
- "IO8-direction", "IO9-direction", "IO10-direction",
- "IO11-direction", "IO12-direction", "IO13-direction",
- "IO19-direction";
- };
-
- /* D4202 */
- pcal9535_3: gpio@25 {
- compatible = "nxp,pcal9535";
- reg = <0x25>;
- #gpio-cells = <2>;
- gpio-controller;
- gpio-line-names =
- "IO0-pull", "IO1-pull", "IO2-pull", "IO3-pull",
- "IO4-pull", "IO5-pull", "IO6-pull", "IO7-pull",
- "IO8-pull", "IO9-pull", "IO10-pull", "IO11-pull",
- "IO12-pull", "IO13-pull";
- };
};
&main_i2c0 {
@@ -1233,32 +473,6 @@
#address-cells = <1>;
#size-cells = <0>;
-
- edp-bridge@f {
- compatible = "toshiba,tc358767";
- reg = <0x0f>;
- pinctrl-names = "default";
- pinctrl-0 = <&dp_pins_default>;
- reset-gpios = <&main_gpio0 30 GPIO_ACTIVE_HIGH>;
-
- clock-names = "ref";
- clocks = <&dp_refclk>;
-
- toshiba,hpd-pin = <0>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@1 {
- reg = <1>;
-
- bridge_in: endpoint {
- remote-endpoint = <&dpi_out>;
- };
- };
- };
- };
};
&mcu_cpsw {
@@ -1292,13 +506,6 @@
ti,pindir-d0-out-d1-in;
};
-&tscadc1 {
- status = "okay";
- adc {
- ti,adc-channels = <0 1 2 3 4 5>;
- };
-};
-
&ospi0 {
status = "okay";
pinctrl-names = "default";
@@ -1364,26 +571,6 @@
};
};
-&dss {
- pinctrl-names = "default";
- pinctrl-0 = <&dss_vout1_pins_default>;
-
- assigned-clocks = <&k3_clks 67 2>;
- assigned-clock-parents = <&k3_clks 67 5>;
-};
-
-&dss_ports {
- #address-cells = <1>;
- #size-cells = <0>;
- port@1 {
- reg = <1>;
-
- dpi_out: endpoint {
- remote-endpoint = <&bridge_in>;
- };
- };
-};
-
&pcie1_rc {
status = "okay";
pinctrl-names = "default";
@@ -1418,13 +605,17 @@
&mcu_r5fss0_core0 {
memory-region = <&mcu_r5fss0_core0_dma_memory_region>,
<&mcu_r5fss0_core0_memory_region>;
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core0>;
};
&mcu_r5fss0_core1 {
memory-region = <&mcu_r5fss0_core1_dma_memory_region>,
<&mcu_r5fss0_core1_memory_region>;
- mboxes = <&mailbox0_cluster1>, <&mbox_mcu_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster1 &mbox_mcu_r5fss0_core1>;
+};
+
+&mcu_rti1 {
+ memory-region = <&wdt_reset_memory_region>;
};
&icssg0_mdio {
diff --git a/arch/arm64/boot/dts/ti/k3-am65-iot2050-dp.dtsi b/arch/arm64/boot/dts/ti/k3-am65-iot2050-dp.dtsi
new file mode 100644
index 000000000000..984cc80913bc
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am65-iot2050-dp.dtsi
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Siemens AG, 2024
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * Common bits for IOT2050 variants with Display Port
+ */
+
+&main_pmx0 {
+ dss_vout1_pins_default: dss-vout1-default-pins {
+ pinctrl-single,pins = <
+ AM65X_IOPAD(0x0000, PIN_OUTPUT, 1) /* VOUT1_DATA0 */
+ AM65X_IOPAD(0x0004, PIN_OUTPUT, 1) /* VOUT1_DATA1 */
+ AM65X_IOPAD(0x0008, PIN_OUTPUT, 1) /* VOUT1_DATA2 */
+ AM65X_IOPAD(0x000c, PIN_OUTPUT, 1) /* VOUT1_DATA3 */
+ AM65X_IOPAD(0x0010, PIN_OUTPUT, 1) /* VOUT1_DATA4 */
+ AM65X_IOPAD(0x0014, PIN_OUTPUT, 1) /* VOUT1_DATA5 */
+ AM65X_IOPAD(0x0018, PIN_OUTPUT, 1) /* VOUT1_DATA6 */
+ AM65X_IOPAD(0x001c, PIN_OUTPUT, 1) /* VOUT1_DATA7 */
+ AM65X_IOPAD(0x0020, PIN_OUTPUT, 1) /* VOUT1_DATA8 */
+ AM65X_IOPAD(0x0024, PIN_OUTPUT, 1) /* VOUT1_DATA9 */
+ AM65X_IOPAD(0x0028, PIN_OUTPUT, 1) /* VOUT1_DATA10 */
+ AM65X_IOPAD(0x002c, PIN_OUTPUT, 1) /* VOUT1_DATA11 */
+ AM65X_IOPAD(0x0030, PIN_OUTPUT, 1) /* VOUT1_DATA12 */
+ AM65X_IOPAD(0x0034, PIN_OUTPUT, 1) /* VOUT1_DATA13 */
+ AM65X_IOPAD(0x0038, PIN_OUTPUT, 1) /* VOUT1_DATA14 */
+ AM65X_IOPAD(0x003c, PIN_OUTPUT, 1) /* VOUT1_DATA15 */
+ AM65X_IOPAD(0x0040, PIN_OUTPUT, 1) /* VOUT1_DATA16 */
+ AM65X_IOPAD(0x0044, PIN_OUTPUT, 1) /* VOUT1_DATA17 */
+ AM65X_IOPAD(0x0048, PIN_OUTPUT, 1) /* VOUT1_DATA18 */
+ AM65X_IOPAD(0x004c, PIN_OUTPUT, 1) /* VOUT1_DATA19 */
+ AM65X_IOPAD(0x0050, PIN_OUTPUT, 1) /* VOUT1_DATA20 */
+ AM65X_IOPAD(0x0054, PIN_OUTPUT, 1) /* VOUT1_DATA21 */
+ AM65X_IOPAD(0x0058, PIN_OUTPUT, 1) /* VOUT1_DATA22 */
+ AM65X_IOPAD(0x005c, PIN_OUTPUT, 1) /* VOUT1_DATA23 */
+ AM65X_IOPAD(0x0060, PIN_OUTPUT, 1) /* VOUT1_VSYNC */
+ AM65X_IOPAD(0x0064, PIN_OUTPUT, 1) /* VOUT1_HSYNC */
+ AM65X_IOPAD(0x0068, PIN_OUTPUT, 1) /* VOUT1_PCLK */
+ AM65X_IOPAD(0x006c, PIN_OUTPUT, 1) /* VOUT1_DE */
+ >;
+ };
+
+ dp_pins_default: dp-default-pins {
+ pinctrl-single,pins = <
+ AM65X_IOPAD(0x0078, PIN_OUTPUT, 7) /* (AF18) DP rst_n */
+ >;
+ };
+};
+
+&main_i2c3 {
+ edp-bridge@f {
+ compatible = "toshiba,tc358767";
+ reg = <0x0f>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&dp_pins_default>;
+ reset-gpios = <&main_gpio0 30 GPIO_ACTIVE_HIGH>;
+
+ clock-names = "ref";
+ clocks = <&dp_refclk>;
+
+ toshiba,hpd-pin = <0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@1 {
+ reg = <1>;
+
+ bridge_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+ };
+ };
+};
+
+&dss {
+ pinctrl-names = "default";
+ pinctrl-0 = <&dss_vout1_pins_default>;
+
+ assigned-clocks = <&k3_clks 67 2>;
+ assigned-clock-parents = <&k3_clks 67 5>;
+};
+
+&dss_ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ port@1 {
+ reg = <1>;
+
+ dpi_out: endpoint {
+ remote-endpoint = <&bridge_in>;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am65-iot2050-usb3.dtsi b/arch/arm64/boot/dts/ti/k3-am65-iot2050-usb3.dtsi
new file mode 100644
index 000000000000..e5bd7c301b12
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am65-iot2050-usb3.dtsi
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Siemens AG, 2024
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * Common bits for IOT2050 variants with USB3 support
+ */
+
+&serdes0 {
+ assigned-clocks = <&k3_clks 153 4>, <&serdes0 AM654_SERDES_CMU_REFCLK>;
+ assigned-clock-parents = <&k3_clks 153 7>, <&k3_clks 153 4>;
+};
+
+&dwc3_0 {
+ assigned-clock-parents = <&k3_clks 151 4>, /* set REF_CLK to 20MHz i.e. PER0_PLL/48 */
+ <&k3_clks 151 8>; /* set PIPE3_TXB_CLK to WIZ8B2M4VSB */
+ phys = <&serdes0 PHY_TYPE_USB3 0>;
+ phy-names = "usb3-phy";
+};
+
+&usb0 {
+ maximum-speed = "super-speed";
+ snps,dis-u1-entry-quirk;
+ snps,dis-u2-entry-quirk;
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
index fcea54465636..ff857117d719 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM6 SoC Family Main Domain peripherals
*
- * Copyright (C) 2016-2018 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2016-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/phy/phy-am654-serdes.h>
@@ -886,20 +886,6 @@
status = "disabled";
};
- pcie0_ep: pcie-ep@5500000 {
- compatible = "ti,am654-pcie-ep";
- reg = <0x0 0x5500000 0x0 0x1000>, <0x0 0x5501000 0x0 0x1000>, <0x0 0x10000000 0x0 0x8000000>, <0x0 0x5506000 0x0 0x1000>;
- reg-names = "app", "dbics", "addr_space", "atu";
- power-domains = <&k3_pds 120 TI_SCI_PD_EXCLUSIVE>;
- ti,syscon-pcie-mode = <&scm_conf 0x4060>;
- num-ib-windows = <16>;
- num-ob-windows = <16>;
- max-link-speed = <2>;
- dma-coherent;
- interrupts = <GIC_SPI 340 IRQ_TYPE_EDGE_RISING>;
- status = "disabled";
- };
-
pcie1_rc: pcie@5600000 {
compatible = "ti,am654-pcie-rc";
reg = <0x0 0x5600000 0x0 0x1000>, <0x0 0x5601000 0x0 0x1000>, <0x0 0x18000000 0x0 0x2000>, <0x0 0x5606000 0x0 0x1000>;
@@ -921,20 +907,6 @@
status = "disabled";
};
- pcie1_ep: pcie-ep@5600000 {
- compatible = "ti,am654-pcie-ep";
- reg = <0x0 0x5600000 0x0 0x1000>, <0x0 0x5601000 0x0 0x1000>, <0x0 0x18000000 0x0 0x4000000>, <0x0 0x5606000 0x0 0x1000>;
- reg-names = "app", "dbics", "addr_space", "atu";
- power-domains = <&k3_pds 121 TI_SCI_PD_EXCLUSIVE>;
- ti,syscon-pcie-mode = <&scm_conf 0x4070>;
- num-ib-windows = <16>;
- num-ob-windows = <16>;
- max-link-speed = <2>;
- dma-coherent;
- interrupts = <GIC_SPI 355 IRQ_TYPE_EDGE_RISING>;
- status = "disabled";
- };
-
mcasp0: mcasp@2b00000 {
compatible = "ti,am33xx-mcasp-audio";
reg = <0x0 0x02b00000 0x0 0x2000>,
@@ -1019,9 +991,10 @@
<0x0 0x04a07000 0x0 0x1000>, /* ovr1 */
<0x0 0x04a08000 0x0 0x1000>, /* ovr2 */
<0x0 0x04a0a000 0x0 0x1000>, /* vp1 */
- <0x0 0x04a0b000 0x0 0x1000>; /* vp2 */
+ <0x0 0x04a0b000 0x0 0x1000>, /* vp2 */
+ <0x0 0x04a01000 0x0 0x1000>; /* common1 */
reg-names = "common", "vidl1", "vid",
- "ovr1", "ovr2", "vp1", "vp2";
+ "ovr1", "ovr2", "vp1", "vp2", "common1";
ti,am65x-oldi-io-ctrl = <&dss_oldi_io_ctrl>;
@@ -1050,6 +1023,13 @@
};
};
+ gpu: gpu@7000000 {
+ compatible = "ti,am6548-gpu", "img,powervr-sgx544";
+ reg = <0x0 0x7000000 0x0 0x10000>;
+ interrupts = <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>;
+ power-domains = <&k3_pds 65 TI_SCI_PD_EXCLUSIVE>;
+ };
+
ehrpwm0: pwm@3000000 {
compatible = "ti,am654-ehrpwm", "ti,am3352-ehrpwm";
#pwm-cells = <3>;
diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
index ecd7356f3315..6ff3ccc39fb4 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM6 SoC Family MCU Domain peripherals
*
- * Copyright (C) 2016-2020 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2016-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_mcu {
diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
index f037b36243ce..37527890ddea 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM6 SoC Family Wakeup Domain peripherals
*
- * Copyright (C) 2016-2018 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2016-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_wakeup {
diff --git a/arch/arm64/boot/dts/ti/k3-am65.dtsi b/arch/arm64/boot/dts/ti/k3-am65.dtsi
index 4d7b6155a76b..c59baebc5a25 100644
--- a/arch/arm64/boot/dts/ti/k3-am65.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM6 SoC Family
*
- * Copyright (C) 2016-2018 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2016-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/gpio/gpio.h>
diff --git a/arch/arm64/boot/dts/ti/k3-am652.dtsi b/arch/arm64/boot/dts/ti/k3-am652.dtsi
index 0f22e00faa90..cbb3caaf82c3 100644
--- a/arch/arm64/boot/dts/ti/k3-am652.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am652.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM65 SoC family in Dual core configuration
*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include "k3-am65.dtsi"
diff --git a/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-common.dtsi b/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-common.dtsi
index 1d6cddb11991..eed6fe70d297 100644
--- a/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-common.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-common.dtsi
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) Siemens AG, 2018-2021
*
@@ -11,6 +11,7 @@
#include "k3-am652.dtsi"
#include "k3-am65-iot2050-common.dtsi"
+#include "k3-am65-iot2050-arduino-connector.dtsi"
/ {
memory@80000000 {
@@ -40,8 +41,3 @@
pinctrl-names = "default";
pinctrl-0 = <&main_uart0_pins_default>;
};
-
-&mcu_r5fss0 {
- /* lock-step mode not supported on Basic boards */
- ti,cluster-mode = <0>;
-};
diff --git a/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-pg2.dts b/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-pg2.dts
index c62549a4b436..c1faf9497b63 100644
--- a/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-pg2.dts
+++ b/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-pg2.dts
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) Siemens AG, 2018-2021
*
@@ -17,6 +17,8 @@
#include "k3-am6528-iot2050-basic-common.dtsi"
#include "k3-am65-iot2050-common-pg2.dtsi"
+#include "k3-am65-iot2050-dp.dtsi"
+#include "k3-am65-iot2050-usb3.dtsi"
/ {
compatible = "siemens,iot2050-basic-pg2", "ti,am654";
diff --git a/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic.dts b/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic.dts
index 87928ff28214..29a31891b3db 100644
--- a/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic.dts
+++ b/arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic.dts
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) Siemens AG, 2018-2021
*
@@ -22,3 +22,8 @@
compatible = "siemens,iot2050-basic", "ti,am654";
model = "SIMATIC IOT2050 Basic";
};
+
+&mcu_r5fss0 {
+ /* lock-step mode not supported on this board */
+ ti,cluster-mode = <0>;
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board-rocktech-rk101-panel.dtso b/arch/arm64/boot/dts/ti/k3-am654-base-board-rocktech-rk101-panel.dtso
index 3be92c39ecba..364c57b3b3a0 100644
--- a/arch/arm64/boot/dts/ti/k3-am654-base-board-rocktech-rk101-panel.dtso
+++ b/arch/arm64/boot/dts/ti/k3-am654-base-board-rocktech-rk101-panel.dtso
@@ -1,10 +1,10 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/**
* OLDI-LCD1EVM Rocktech integrated panel and touch DT overlay for AM654-EVM.
* Panel Link: https://www.digimax.it/en/tft-lcd/20881-RK101II01D-CT
* AM654 LCD EVM: https://www.ti.com/tool/TMDSLCD1EVM
*
- * Copyright (C) 2023 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
index 822c288d2797..aba0c52b1213 100644
--- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2016-2020 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2016-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
@@ -531,13 +531,13 @@
&mcu_r5fss0_core0 {
memory-region = <&mcu_r5fss0_core0_dma_memory_region>,
<&mcu_r5fss0_core0_memory_region>;
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core0>;
};
&mcu_r5fss0_core1 {
memory-region = <&mcu_r5fss0_core1_dma_memory_region>,
<&mcu_r5fss0_core1_memory_region>;
- mboxes = <&mailbox0_cluster1>, <&mbox_mcu_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster1 &mbox_mcu_r5fss0_core1>;
};
&ospi0 {
diff --git a/arch/arm64/boot/dts/ti/k3-am654-icssg2.dtso b/arch/arm64/boot/dts/ti/k3-am654-icssg2.dtso
index ec8cf20ca3ac..0a6e75265ba9 100644
--- a/arch/arm64/boot/dts/ti/k3-am654-icssg2.dtso
+++ b/arch/arm64/boot/dts/ti/k3-am654-icssg2.dtso
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/**
* DT overlay for IDK application board on AM654 EVM
*
- * Copyright (C) 2018-2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2018-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am654-idk.dtso b/arch/arm64/boot/dts/ti/k3-am654-idk.dtso
index 150428dfce6f..8bdb87fcbde0 100644
--- a/arch/arm64/boot/dts/ti/k3-am654-idk.dtso
+++ b/arch/arm64/boot/dts/ti/k3-am654-idk.dtso
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/**
* DT overlay for IDK application board on AM654 EVM
*
- * Copyright (C) 2018-2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2018-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-am654-industrial-thermal.dtsi b/arch/arm64/boot/dts/ti/k3-am654-industrial-thermal.dtsi
index 9021c738056b..de5a2ed907a7 100644
--- a/arch/arm64/boot/dts/ti/k3-am654-industrial-thermal.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am654-industrial-thermal.dtsi
@@ -1,4 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
#include <dt-bindings/thermal/thermal.h>
diff --git a/arch/arm64/boot/dts/ti/k3-am654-pcie-usb2.dtso b/arch/arm64/boot/dts/ti/k3-am654-pcie-usb2.dtso
new file mode 100644
index 000000000000..c3cb752f8cd7
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am654-pcie-usb2.dtso
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/**
+ * DT overlay for SERDES personality card: 2lane PCIe + USB2.0 Host on AM654 EVM
+ *
+ * Copyright (C) 2018-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
+
+/dts-v1/;
+/plugin/;
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/phy/phy.h>
+#include <dt-bindings/phy/phy-am654-serdes.h>
+#include "k3-pinctrl.h"
+
+&serdes0 {
+ assigned-clocks = <&k3_clks 153 4>,
+ <&serdes0 AM654_SERDES_CMU_REFCLK>,
+ <&serdes0 AM654_SERDES_RO_REFCLK>;
+ assigned-clock-parents = <&k3_clks 153 8>,
+ <&k3_clks 153 4>,
+ <&k3_clks 153 4>;
+ status = "okay";
+};
+
+&serdes1 {
+ assigned-clocks = <&serdes1 AM654_SERDES_CMU_REFCLK>;
+ assigned-clock-parents = <&serdes0 AM654_SERDES_RO_REFCLK>;
+ status = "okay";
+};
+
+&pcie0_rc {
+ num-lanes = <2>;
+ phys = <&serdes0 PHY_TYPE_PCIE 1>, <&serdes1 PHY_TYPE_PCIE 1>;
+ phy-names = "pcie-phy0", "pcie-phy1";
+ reset-gpios = <&pca9555 5 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+};
+
+&main_pmx0 {
+ usb0_pins_default: usb0-default-pins {
+ pinctrl-single,pins = <
+ AM65X_IOPAD(0x02bc, PIN_OUTPUT, 0) /* (AD9) USB0_DRVVBUS */
+ >;
+ };
+};
+
+&dwc3_0 {
+ status = "okay";
+};
+
+&usb0_phy {
+ status = "okay";
+};
+
+&usb0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&usb0_pins_default>;
+ dr_mode = "host";
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am654-pcie-usb3.dtso b/arch/arm64/boot/dts/ti/k3-am654-pcie-usb3.dtso
new file mode 100644
index 000000000000..333e423e8bb6
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am654-pcie-usb3.dtso
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/**
+ * DT overlay for SERDES personality card: 1lane PCIe + USB3.0 DRD on AM654 EVM
+ *
+ * Copyright (C) 2018-2024 Texas Instruments Incorporated - http://www.ti.com/
+ */
+
+/dts-v1/;
+/plugin/;
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/phy/phy.h>
+#include <dt-bindings/phy/phy-am654-serdes.h>
+
+#include "k3-pinctrl.h"
+
+&serdes1 {
+ status = "okay";
+};
+
+&pcie1_rc {
+ num-lanes = <1>;
+ phys = <&serdes1 PHY_TYPE_PCIE 0>;
+ phy-names = "pcie-phy0";
+ reset-gpios = <&pca9555 5 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+};
+
+&main_pmx0 {
+ usb0_pins_default: usb0-default-pins {
+ pinctrl-single,pins = <
+ AM65X_IOPAD(0x02bc, PIN_OUTPUT, 0) /* (AD9) USB0_DRVVBUS */
+ >;
+ };
+};
+
+&serdes0 {
+ status = "okay";
+ assigned-clocks = <&k3_clks 153 4>, <&serdes0 AM654_SERDES_CMU_REFCLK>;
+ assigned-clock-parents = <&k3_clks 153 7>, <&k3_clks 153 4>;
+};
+
+&dwc3_0 {
+ status = "okay";
+ assigned-clock-parents = <&k3_clks 151 4>, /* set REF_CLK to 20MHz i.e. PER0_PLL/48 */
+ <&k3_clks 151 8>; /* set PIPE3_TXB_CLK to WIZ8B2M4VSB */
+ phys = <&serdes0 PHY_TYPE_USB3 0>;
+ phy-names = "usb3-phy";
+};
+
+&usb0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&usb0_pins_default>;
+ dr_mode = "host";
+ maximum-speed = "super-speed";
+ snps,dis-u1-entry-quirk;
+ snps,dis-u2-entry-quirk;
+};
+
+&usb0_phy {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am654.dtsi b/arch/arm64/boot/dts/ti/k3-am654.dtsi
index 888567b921f0..bb77c8454734 100644
--- a/arch/arm64/boot/dts/ti/k3-am654.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am654.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for AM6 SoC family in Quad core configuration
*
- * Copyright (C) 2016-2018 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2016-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include "k3-am65.dtsi"
diff --git a/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-common.dtsi b/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-common.dtsi
index 3864ec54e371..ae842b85b70d 100644
--- a/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-common.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-common.dtsi
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) Siemens AG, 2018-2021
*
diff --git a/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-m2.dts b/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-m2.dts
index bd6f2e696e94..cc619bbec181 100644
--- a/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-m2.dts
+++ b/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-m2.dts
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) Siemens AG, 2018-2023
*
@@ -15,17 +15,14 @@
#include "k3-am6548-iot2050-advanced-common.dtsi"
#include "k3-am65-iot2050-common-pg2.dtsi"
+#include "k3-am65-iot2050-arduino-connector.dtsi"
+#include "k3-am65-iot2050-dp.dtsi"
/ {
compatible = "siemens,iot2050-advanced-m2", "ti,am654";
model = "SIMATIC IOT2050 Advanced M2";
};
-&mcu_r5fss0 {
- /* lock-step mode not supported on this board */
- ti,cluster-mode = <0>;
-};
-
&main_pmx0 {
main_bkey_pcie_reset: main-bkey-pcie-reset-default-pins {
pinctrl-single,pins = <
@@ -96,16 +93,3 @@
&pcie1_rc {
status = "disabled";
};
-
-&dwc3_0 {
- assigned-clock-parents = <&k3_clks 151 4>, /* set REF_CLK to 20MHz i.e. PER0_PLL/48 */
- <&k3_clks 151 9>; /* set PIPE3_TXB_CLK to CLK_12M_RC/256 (for HS only) */
- /delete-property/ phys;
- /delete-property/ phy-names;
-};
-
-&usb0 {
- maximum-speed = "high-speed";
- /delete-property/ snps,dis-u1-entry-quirk;
- /delete-property/ snps,dis-u2-entry-quirk;
-};
diff --git a/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-pg2.dts b/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-pg2.dts
index f00dc86d01b9..ec721275e8e2 100644
--- a/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-pg2.dts
+++ b/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-pg2.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (c) Siemens AG, 2018-2021
+ * Copyright (c) Siemens AG, 2018-2023
*
* Authors:
* Le Jin <le.jin@siemens.com>
@@ -17,13 +17,11 @@
#include "k3-am6548-iot2050-advanced-common.dtsi"
#include "k3-am65-iot2050-common-pg2.dtsi"
+#include "k3-am65-iot2050-arduino-connector.dtsi"
+#include "k3-am65-iot2050-dp.dtsi"
+#include "k3-am65-iot2050-usb3.dtsi"
/ {
compatible = "siemens,iot2050-advanced-pg2", "ti,am654";
model = "SIMATIC IOT2050 Advanced PG2";
};
-
-&mcu_r5fss0 {
- /* lock-step mode not supported on this board */
- ti,cluster-mode = <0>;
-};
diff --git a/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-sm.dts b/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-sm.dts
new file mode 100644
index 000000000000..b829f4bcab69
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-sm.dts
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Siemens AG, 2023
+ *
+ * Authors:
+ * Baocheng Su <baocheng.su@siemens.com>
+ * Chao Zeng <chao.zeng@siemens.com>
+ * Huaqian Li <huaqian.li@siemens.com>
+ *
+ * AM6548-based (quad-core) IOT2050 SM variant, Product Generation 2
+ * 4 GB RAM, 16 GB eMMC, USB-serial converter on connector X30
+ *
+ * Product homepage:
+ * https://new.siemens.com/global/en/products/automation/pc-based/iot-gateways/simatic-iot2050.html
+ */
+
+/dts-v1/;
+
+#include "k3-am6548-iot2050-advanced-common.dtsi"
+#include "k3-am65-iot2050-common-pg2.dtsi"
+
+/ {
+ compatible = "siemens,iot2050-advanced-sm", "ti,am654";
+ model = "SIMATIC IOT2050 Advanced SM";
+
+ memory@80000000 {
+ device_type = "memory";
+ /* 4G RAM */
+ reg = <0x00000000 0x80000000 0x00000000 0x80000000>,
+ <0x00000008 0x80000000 0x00000000 0x80000000>;
+ };
+
+ aliases {
+ spi1 = &main_spi0;
+ };
+
+ leds {
+ pinctrl-0 = <&leds_pins_default>, <&user1_led_pins>;
+
+ led-2 {
+ gpios = <&wkup_gpio0 52 GPIO_ACTIVE_HIGH>;
+ };
+
+ led-3 {
+ gpios = <&wkup_gpio0 53 GPIO_ACTIVE_HIGH>;
+ };
+ };
+};
+
+&main_pmx0 {
+ main_pcie_enable_pins_default: main-pcie-enable-default-pins {
+ pinctrl-single,pins = <
+ AM65X_IOPAD(0x01d8, PIN_OUTPUT, 7) /* (AH12) GPIO1_22 */
+ >;
+ };
+
+ main_spi0_pins: main-spi0-default-pins {
+ pinctrl-single,pins = <
+ AM65X_IOPAD(0x01c4, PIN_INPUT, 0) /* (AH13) SPI0_CLK */
+ AM65X_IOPAD(0x01c8, PIN_INPUT, 0) /* (AE13) SPI0_D0 */
+ AM65X_IOPAD(0x01cc, PIN_INPUT, 0) /* (AD13) SPI0_D1 */
+ AM65X_IOPAD(0x01bc, PIN_OUTPUT, 0) /* (AG13) SPI0_CS0 */
+ >;
+ };
+};
+
+&main_pmx1 {
+ asic_spi_mux_ctrl_pin: asic-spi-mux-ctrl-default-pins {
+ pinctrl-single,pins = <
+ AM65X_IOPAD(0x0010, PIN_OUTPUT, 7) /* (D21) GPIO1_86 */
+ >;
+ };
+};
+
+&wkup_pmx0 {
+ user1_led_pins: user1-led-default-pins {
+ pinctrl-single,pins = <
+ /* (AB1) WKUP_UART0_RXD:WKUP_GPIO0_52, as USER 1 led red */
+ AM65X_WKUP_IOPAD(0x00a0, PIN_OUTPUT, 7)
+ /* (AB5) WKUP_UART0_TXD:WKUP_GPIO0_53, as USER 1 led green */
+ AM65X_WKUP_IOPAD(0x00a4, PIN_OUTPUT, 7)
+ >;
+ };
+
+ soc_asic_pins: soc-asic-default-pins {
+ pinctrl-single,pins = <
+ AM65X_WKUP_IOPAD(0x0044, PIN_INPUT, 7) /* (P4) WKUP_GPIO0_29 */
+ AM65X_WKUP_IOPAD(0x0048, PIN_INPUT, 7) /* (P5) WKUP_GPIO0_30 */
+ AM65X_WKUP_IOPAD(0x004c, PIN_INPUT, 7) /* (P1) WKUP_GPIO0_31 */
+ >;
+ };
+};
+
+&main_gpio0 {
+ gpio-line-names = "main_gpio0-base";
+};
+
+&main_gpio1 {
+ pinctrl-names = "default";
+ pinctrl-0 =
+ <&cp2102n_reset_pin_default>,
+ <&main_pcie_enable_pins_default>,
+ <&asic_spi_mux_ctrl_pin>;
+ gpio-line-names =
+ /* 0..9 */
+ "", "", "", "", "", "", "", "", "", "",
+ /* 10..19 */
+ "", "", "", "", "", "", "", "", "", "",
+ /* 20..29 */
+ "", "", "", "", "CP2102N-RESET", "", "", "", "", "",
+ /* 30..39 */
+ "", "", "", "", "", "", "", "", "", "",
+ /* 40..49 */
+ "", "", "", "", "", "", "", "", "", "",
+ /* 50..59 */
+ "", "", "", "", "", "", "", "", "", "",
+ /* 60..69 */
+ "", "", "", "", "", "", "", "", "", "",
+ /* 70..79 */
+ "", "", "", "", "", "", "", "", "", "",
+ /* 80..86 */
+ "", "", "", "", "", "", "ASIC-spi-mux-ctrl";
+};
+
+&wkup_gpio0 {
+ pinctrl-names = "default";
+ pinctrl-0 =
+ <&push_button_pins_default>,
+ <&db9_com_mode_pins_default>,
+ <&soc_asic_pins>;
+ gpio-line-names =
+ /* 0..9 */
+ "wkup_gpio0-base", "", "", "", "UART0-mode1", "UART0-mode0",
+ "UART0-enable", "UART0-terminate", "", "WIFI-disable",
+ /* 10..19 */
+ "", "", "", "", "", "", "", "", "", "",
+ /* 20..29 */
+ "", "", "", "", "", "USER-button", "", "", "","ASIC-gpio-0",
+ /* 30..31 */
+ "ASIC-gpio-1", "ASIC-gpio-2";
+};
+
+&main_spi0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_spi0_pins>;
+
+ #address-cells = <1>;
+ #size-cells= <0>;
+};
+
+&mcu_spi0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&mcu_spi0_pins_default>;
+};
+
+&main_i2c3 {
+ accelerometer: lsm6dso@6a {
+ compatible = "st,lsm6dso";
+ reg = <0x6a>;
+ };
+};
+
+&dss {
+ status = "disabled";
+};
+
+&serdes0 {
+ assigned-clocks = <&k3_clks 153 4>, <&serdes0 AM654_SERDES_CMU_REFCLK>;
+ assigned-clock-parents = <&k3_clks 153 8>, <&k3_clks 153 4>;
+};
+
+&serdes1 {
+ status = "disabled";
+};
+
+&pcie0_rc {
+ pinctrl-names = "default";
+ pinctrl-0 = <&minipcie_pins_default>;
+
+ num-lanes = <1>;
+ phys = <&serdes0 PHY_TYPE_PCIE 1>;
+ phy-names = "pcie-phy0";
+ reset-gpios = <&wkup_gpio0 27 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+};
+
+&pcie1_rc {
+ status = "disabled";
+};
diff --git a/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced.dts b/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced.dts
index 077f165bdc68..649652a540ef 100644
--- a/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced.dts
+++ b/arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced.dts
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) Siemens AG, 2018-2021
*
@@ -17,6 +17,7 @@
#include "k3-am6548-iot2050-advanced-common.dtsi"
#include "k3-am65-iot2050-common-pg1.dtsi"
+#include "k3-am65-iot2050-arduino-connector.dtsi"
/ {
compatible = "siemens,iot2050-advanced", "ti,am654";
diff --git a/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts b/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts
index d0cfdeac21fb..d743f023cdd9 100644
--- a/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*
* Base Board: https://www.ti.com/lit/zip/SPRR463
*/
@@ -169,6 +169,13 @@
};
};
};
+
+ csi_mux: mux-controller {
+ compatible = "gpio-mux";
+ #mux-state-cells = <1>;
+ mux-gpios = <&exp3 1 GPIO_ACTIVE_HIGH>;
+ idle-state = <0>;
+ };
};
&main_pmx0 {
@@ -186,6 +193,13 @@
>;
};
+ main_i2c1_pins_default: main-i2c1-default-pins {
+ pinctrl-single,pins = <
+ J721S2_IOPAD(0x0ac, PIN_INPUT, 13) /* (AC25) MCASP0_AXR15.I2C1_SCL */
+ J721S2_IOPAD(0x0b0, PIN_INPUT, 13) /* (AD26) MCASP1_AXR3.I2C1_SDA */
+ >;
+ };
+
main_mmc1_pins_default: main-mmc1-default-pins {
pinctrl-single,pins = <
J721S2_IOPAD(0x104, PIN_INPUT, 0) /* (P23) MMC1_CLK */
@@ -431,6 +445,42 @@
};
};
+&main_i2c1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_i2c1_pins_default>;
+ status = "okay";
+
+ exp3: gpio@20 {
+ compatible = "ti,tca6408";
+ reg = <0x20>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-line-names = "CSI_VIO_SEL", "CSI_SEL_FPC_EXPn",
+ "IO_EXP_CSI2_EXP_RSTz","CSI0_B_GPIO1",
+ "CSI1_B_GPIO1";
+ };
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9543";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x70>;
+
+ cam0_i2c: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ cam1_i2c: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ };
+};
+
&main_i2c4 {
status = "okay";
pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/ti/k3-am68-sk-som.dtsi b/arch/arm64/boot/dts/ti/k3-am68-sk-som.dtsi
index 20861a0a46b0..0f4a5da0ebc4 100644
--- a/arch/arm64/boot/dts/ti/k3-am68-sk-som.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am68-sk-som.dtsi
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
@@ -209,51 +209,51 @@
};
&mcu_r5fss0_core0 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core0>;
memory-region = <&mcu_r5fss0_core0_dma_memory_region>,
<&mcu_r5fss0_core0_memory_region>;
};
&mcu_r5fss0_core1 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core1>;
memory-region = <&mcu_r5fss0_core1_dma_memory_region>,
<&mcu_r5fss0_core1_memory_region>;
};
&main_r5fss0_core0 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core0>;
memory-region = <&main_r5fss0_core0_dma_memory_region>,
<&main_r5fss0_core0_memory_region>;
};
&main_r5fss0_core1 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core1>;
memory-region = <&main_r5fss0_core1_dma_memory_region>,
<&main_r5fss0_core1_memory_region>;
};
&main_r5fss1_core0 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core0>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core0>;
memory-region = <&main_r5fss1_core0_dma_memory_region>,
<&main_r5fss1_core0_memory_region>;
};
&main_r5fss1_core1 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core1>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core1>;
memory-region = <&main_r5fss1_core1_dma_memory_region>,
<&main_r5fss1_core1_memory_region>;
};
&c71_0 {
status = "okay";
- mboxes = <&mailbox0_cluster4>, <&mbox_c71_0>;
+ mboxes = <&mailbox0_cluster4 &mbox_c71_0>;
memory-region = <&c71_0_dma_memory_region>,
<&c71_0_memory_region>;
};
&c71_1 {
status = "okay";
- mboxes = <&mailbox0_cluster4>, <&mbox_c71_1>;
+ mboxes = <&mailbox0_cluster4 &mbox_c71_1>;
memory-region = <&c71_1_dma_memory_region>,
<&c71_1_memory_region>;
};
diff --git a/arch/arm64/boot/dts/ti/k3-am69-sk.dts b/arch/arm64/boot/dts/ti/k3-am69-sk.dts
index 8da591579868..50de2a448a3a 100644
--- a/arch/arm64/boot/dts/ti/k3-am69-sk.dts
+++ b/arch/arm64/boot/dts/ti/k3-am69-sk.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2022-2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*
* Design Files: https://www.ti.com/lit/zip/SPRR466
* TRM: https://www.ti.com/lit/zip/spruj52
@@ -33,6 +33,7 @@
memory@80000000 {
device_type = "memory";
+ bootph-all;
/* 32G RAM */
reg = <0x00 0x80000000 0x00 0x80000000>,
<0x08 0x80000000 0x07 0x80000000>;
@@ -321,6 +322,38 @@
};
};
};
+
+ csi_mux: mux-controller {
+ compatible = "gpio-mux";
+ #mux-state-cells = <1>;
+ mux-gpios = <&exp2 1 GPIO_ACTIVE_HIGH>;
+ idle-state = <0>;
+ };
+
+ transceiver1: can-phy0 {
+ compatible = "ti,tcan1042";
+ #phy-cells = <0>;
+ max-bitrate = <5000000>;
+ };
+
+ transceiver2: can-phy1 {
+ compatible = "ti,tcan1042";
+ #phy-cells = <0>;
+ max-bitrate = <5000000>;
+ };
+
+ transceiver3: can-phy2 {
+ compatible = "ti,tcan1042";
+ #phy-cells = <0>;
+ max-bitrate = <5000000>;
+ };
+
+ transceiver4: can-phy3 {
+ compatible = "ti,tcan1042";
+ #phy-cells = <0>;
+ max-bitrate = <5000000>;
+ };
+
};
&main_pmx0 {
@@ -340,6 +373,13 @@
>;
};
+ main_i2c1_pins_default: main-i2c1-default-pins {
+ pinctrl-single,pins = <
+ J784S4_IOPAD(0x0ac, PIN_INPUT_PULLUP, 13) /* (AE34) MCASP0_AXR15.I2C1_SCL */
+ J784S4_IOPAD(0x0b0, PIN_INPUT_PULLUP, 13) /* (AL33) MCASP1_AXR3.I2C1_SDA */
+ >;
+ };
+
main_mmc1_pins_default: main-mmc1-default-pins {
bootph-all;
pinctrl-single,pins = <
@@ -429,6 +469,40 @@
J784S4_IOPAD(0x000, PIN_INPUT, 7) /* (AN35) EXTINTN.GPIO0_0 */
>;
};
+
+ main_mcan6_pins_default: main-mcan6-default-pins {
+ pinctrl-single,pins = <
+ J784S4_IOPAD(0x098, PIN_INPUT, 0) /* (AH36) MCAN6_RX */
+ J784S4_IOPAD(0x094, PIN_OUTPUT, 0) /* (AG35) MCAN6_TX */
+ >;
+ };
+
+ main_mcan7_pins_default: main-mcan7-default-pins {
+ pinctrl-single,pins = <
+ J784S4_IOPAD(0x0A0, PIN_INPUT, 0) /* (AD34) MCAN7_RX */
+ J784S4_IOPAD(0x09C, PIN_OUTPUT, 0) /* (AF35) MCAN7_TX */
+ >;
+ };
+
+};
+
+&wkup_pmx0 {
+ bootph-all;
+ mcu_fss0_ospi0_pins_default: mcu-fss0-ospi0-default-pins {
+ pinctrl-single,pins = <
+ J784S4_WKUP_IOPAD(0x000, PIN_OUTPUT, 0) /* (E32) MCU_OSPI0_CLK */
+ J784S4_WKUP_IOPAD(0x02c, PIN_OUTPUT, 0) /* (A32) MCU_OSPI0_CSn0 */
+ J784S4_WKUP_IOPAD(0x00c, PIN_INPUT, 0) /* (B33) MCU_OSPI0_D0 */
+ J784S4_WKUP_IOPAD(0x010, PIN_INPUT, 0) /* (B32) MCU_OSPI0_D1 */
+ J784S4_WKUP_IOPAD(0x014, PIN_INPUT, 0) /* (C33) MCU_OSPI0_D2 */
+ J784S4_WKUP_IOPAD(0x018, PIN_INPUT, 0) /* (C35) MCU_OSPI0_D3 */
+ J784S4_WKUP_IOPAD(0x01c, PIN_INPUT, 0) /* (D33) MCU_OSPI0_D4 */
+ J784S4_WKUP_IOPAD(0x020, PIN_INPUT, 0) /* (D34) MCU_OSPI0_D5 */
+ J784S4_WKUP_IOPAD(0x024, PIN_INPUT, 0) /* (E34) MCU_OSPI0_D6 */
+ J784S4_WKUP_IOPAD(0x028, PIN_INPUT, 0) /* (E33) MCU_OSPI0_D7 */
+ J784S4_WKUP_IOPAD(0x008, PIN_INPUT, 0) /* (C34) MCU_OSPI0_DQS */
+ >;
+ };
};
&wkup_pmx2 {
@@ -525,6 +599,21 @@
J784S4_WKUP_IOPAD(0x090, PIN_INPUT, 7) /* (H37) WKUP_GPIO0_14 */
>;
};
+
+ mcu_mcan0_pins_default: mcu-mcan0-default-pins {
+ pinctrl-single,pins = <
+ J784S4_WKUP_IOPAD(0x054, PIN_INPUT, 0) /* (F38) MCU_MCAN0_RX */
+ J784S4_WKUP_IOPAD(0x050, PIN_OUTPUT, 0) /* (K33) MCU_MCAN0_TX */
+ >;
+ };
+
+ mcu_mcan1_pins_default: mcu-mcan1-default-pins {
+ pinctrl-single,pins = <
+ J784S4_WKUP_IOPAD(0x06c, PIN_INPUT, 0) /* (K36) WKUP_GPIO0_5.MCU_MCAN1_RX */
+ J784S4_WKUP_IOPAD(0x068, PIN_OUTPUT, 0)/* (H35) WKUP_GPIO0_4.MCU_MCAN1_TX */
+ >;
+ };
+
};
&wkup_pmx3 {
@@ -646,7 +735,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pmic_irq_pins_default>;
interrupt-parent = <&wkup_gpio0>;
- interrupts = <39 IRQ_TYPE_EDGE_FALLING>;
+ interrupts = <83 IRQ_TYPE_EDGE_FALLING>;
gpio-controller;
#gpio-cells = <2>;
ti,primary-pmic;
@@ -774,6 +863,42 @@
};
};
+&main_i2c1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_i2c1_pins_default>;
+ clock-frequency = <400000>;
+ status = "okay";
+
+ exp2: gpio@21 {
+ compatible = "ti,tca6408";
+ reg = <0x21>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-line-names = "CSI_VIO_SEL", "CSI_MUX_SEL_2", "CSI2_RSTz",
+ "IO_EXP_CAM0_GPIO1", "IO_EXP_CAM1_GPIO1";
+ };
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9543";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x70>;
+
+ cam0_i2c: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ cam1_i2c: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ };
+};
+
&main_sdhci0 {
bootph-all;
/* eMMC */
@@ -822,77 +947,77 @@
};
&mcu_r5fss0_core0 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core0>;
memory-region = <&mcu_r5fss0_core0_dma_memory_region>,
<&mcu_r5fss0_core0_memory_region>;
};
&mcu_r5fss0_core1 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core1>;
memory-region = <&mcu_r5fss0_core1_dma_memory_region>,
<&mcu_r5fss0_core1_memory_region>;
};
&main_r5fss0_core0 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core0>;
memory-region = <&main_r5fss0_core0_dma_memory_region>,
<&main_r5fss0_core0_memory_region>;
};
&main_r5fss0_core1 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core1>;
memory-region = <&main_r5fss0_core1_dma_memory_region>,
<&main_r5fss0_core1_memory_region>;
};
&main_r5fss1_core0 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core0>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core0>;
memory-region = <&main_r5fss1_core0_dma_memory_region>,
<&main_r5fss1_core0_memory_region>;
};
&main_r5fss1_core1 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core1>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core1>;
memory-region = <&main_r5fss1_core1_dma_memory_region>,
<&main_r5fss1_core1_memory_region>;
};
&main_r5fss2_core0 {
- mboxes = <&mailbox0_cluster3>, <&mbox_main_r5fss2_core0>;
+ mboxes = <&mailbox0_cluster3 &mbox_main_r5fss2_core0>;
memory-region = <&main_r5fss2_core0_dma_memory_region>,
<&main_r5fss2_core0_memory_region>;
};
&main_r5fss2_core1 {
- mboxes = <&mailbox0_cluster3>, <&mbox_main_r5fss2_core1>;
+ mboxes = <&mailbox0_cluster3 &mbox_main_r5fss2_core1>;
memory-region = <&main_r5fss2_core1_dma_memory_region>,
<&main_r5fss2_core1_memory_region>;
};
&c71_0 {
status = "okay";
- mboxes = <&mailbox0_cluster4>, <&mbox_c71_0>;
+ mboxes = <&mailbox0_cluster4 &mbox_c71_0>;
memory-region = <&c71_0_dma_memory_region>,
<&c71_0_memory_region>;
};
&c71_1 {
status = "okay";
- mboxes = <&mailbox0_cluster4>, <&mbox_c71_1>;
+ mboxes = <&mailbox0_cluster4 &mbox_c71_1>;
memory-region = <&c71_1_dma_memory_region>,
<&c71_1_memory_region>;
};
&c71_2 {
status = "okay";
- mboxes = <&mailbox0_cluster5>, <&mbox_c71_2>;
+ mboxes = <&mailbox0_cluster5 &mbox_c71_2>;
memory-region = <&c71_2_dma_memory_region>,
<&c71_2_memory_region>;
};
&c71_3 {
status = "okay";
- mboxes = <&mailbox0_cluster5>, <&mbox_c71_3>;
+ mboxes = <&mailbox0_cluster5 &mbox_c71_3>;
memory-region = <&c71_3_dma_memory_region>,
<&c71_3_memory_region>;
};
@@ -918,13 +1043,9 @@
pinctrl-names = "default";
pinctrl-0 = <&dss_vout0_pins_default>;
assigned-clocks = <&k3_clks 218 2>,
- <&k3_clks 218 5>,
- <&k3_clks 218 14>,
- <&k3_clks 218 18>;
+ <&k3_clks 218 5>;
assigned-clock-parents = <&k3_clks 218 3>,
- <&k3_clks 218 7>,
- <&k3_clks 218 16>,
- <&k3_clks 218 22>;
+ <&k3_clks 218 7>;
};
&serdes_wiz4 {
@@ -992,3 +1113,93 @@
};
};
};
+
+&mcu_mcan0 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&mcu_mcan0_pins_default>;
+ phys = <&transceiver1>;
+};
+
+&mcu_mcan1 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&mcu_mcan1_pins_default>;
+ phys = <&transceiver2>;
+};
+
+&main_mcan6 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_mcan6_pins_default>;
+ phys = <&transceiver3>;
+};
+
+&main_mcan7 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_mcan7_pins_default>;
+ phys = <&transceiver4>;
+};
+
+&ospi0 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&mcu_fss0_ospi0_pins_default>;
+
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0x0>;
+ spi-tx-bus-width = <8>;
+ spi-rx-bus-width = <8>;
+ spi-max-frequency = <25000000>;
+ cdns,tshsl-ns = <60>;
+ cdns,tsd2d-ns = <60>;
+ cdns,tchsh-ns = <60>;
+ cdns,tslch-ns = <60>;
+ cdns,read-delay = <4>;
+
+ partitions {
+ bootph-all;
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ label = "ospi.tiboot3";
+ reg = <0x0 0x100000>;
+ };
+
+ partition@100000 {
+ label = "ospi.tispl";
+ reg = <0x100000 0x200000>;
+ };
+
+ partition@300000 {
+ label = "ospi.u-boot";
+ reg = <0x300000 0x400000>;
+ };
+
+ partition@700000 {
+ label = "ospi.env";
+ reg = <0x700000 0x40000>;
+ };
+
+ partition@740000 {
+ label = "ospi.env.backup";
+ reg = <0x740000 0x40000>;
+ };
+
+ partition@800000 {
+ label = "ospi.rootfs";
+ reg = <0x800000 0x37c0000>;
+ };
+
+ partition@3fc0000 {
+ bootph-pre-ram;
+ label = "ospi.phypattern";
+ reg = <0x3fc0000 0x40000>;
+ };
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts
index cee2b4b0eb87..6593c5da82c0 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2020 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
@@ -88,27 +88,56 @@
states = <1800000 0x0>,
<3300000 0x1>;
};
+
+ transceiver1: can-phy1 {
+ compatible = "ti,tcan1043";
+ #phy-cells = <0>;
+ max-bitrate = <5000000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&mcu_mcan0_gpio_pins_default>;
+ standby-gpios = <&wkup_gpio0 58 GPIO_ACTIVE_LOW>;
+ enable-gpios = <&wkup_gpio0 0 GPIO_ACTIVE_HIGH>;
+ };
+
+ transceiver2: can-phy2 {
+ compatible = "ti,tcan1042";
+ #phy-cells = <0>;
+ max-bitrate = <5000000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&mcu_mcan1_gpio_pins_default>;
+ standby-gpios = <&wkup_gpio0 2 GPIO_ACTIVE_HIGH>;
+ };
+
+ transceiver3: can-phy3 {
+ compatible = "ti,tcan1043";
+ #phy-cells = <0>;
+ max-bitrate = <5000000>;
+ standby-gpios = <&exp2 7 GPIO_ACTIVE_LOW>;
+ enable-gpios = <&exp2 6 GPIO_ACTIVE_HIGH>;
+ mux-states = <&mux0 1>;
+ };
};
&wkup_pmx0 {
+};
+
+&wkup_pmx2 {
mcu_uart0_pins_default: mcu-uart0-default-pins {
pinctrl-single,pins = <
- J721E_WKUP_IOPAD(0xf4, PIN_INPUT, 0) /* (D20) MCU_UART0_RXD */
- J721E_WKUP_IOPAD(0xf0, PIN_OUTPUT, 0) /* (D19) MCU_UART0_TXD */
- J721E_WKUP_IOPAD(0xf8, PIN_INPUT, 0) /* (E20) MCU_UART0_CTSn */
- J721E_WKUP_IOPAD(0xfc, PIN_OUTPUT, 0) /* (E21) MCU_UART0_RTSn */
+ J721E_WKUP_IOPAD(0x90, PIN_INPUT, 0) /* (E20) MCU_UART0_CTSn */
+ J721E_WKUP_IOPAD(0x94, PIN_OUTPUT, 0) /* (E21) MCU_UART0_RTSn */
+ J721E_WKUP_IOPAD(0x8c, PIN_INPUT, 0) /* (D20) MCU_UART0_RXD */
+ J721E_WKUP_IOPAD(0x88, PIN_OUTPUT, 0) /* (D19) MCU_UART0_TXD */
>;
};
wkup_uart0_pins_default: wkup-uart0-default-pins {
pinctrl-single,pins = <
- J721E_WKUP_IOPAD(0xb0, PIN_INPUT, 0) /* (B14) WKUP_UART0_RXD */
- J721E_WKUP_IOPAD(0xb4, PIN_OUTPUT, 0) /* (A14) WKUP_UART0_TXD */
+ J721E_WKUP_IOPAD(0x48, PIN_INPUT, 0) /* (B14) WKUP_UART0_RXD */
+ J721E_WKUP_IOPAD(0x4c, PIN_OUTPUT, 0) /* (A14) WKUP_UART0_TXD */
>;
};
-};
-&wkup_pmx2 {
mcu_cpsw_pins_default: mcu-cpsw-default-pins {
pinctrl-single,pins = <
J721E_WKUP_IOPAD(0x0000, PIN_OUTPUT, 0) /* MCU_RGMII1_TX_CTL */
@@ -138,6 +167,33 @@
J721E_WKUP_IOPAD(0x0030, PIN_INPUT, 0) /* (L4) MCU_MDIO0_MDIO */
>;
};
+
+ mcu_mcan0_pins_default: mcu-mcan0-default-pins {
+ pinctrl-single,pins = <
+ J721E_WKUP_IOPAD(0x54, PIN_INPUT, 0) /* (A17) MCU_MCAN0_RX */
+ J721E_WKUP_IOPAD(0x50, PIN_OUTPUT, 0) /* (A16) MCU_MCAN0_TX */
+ >;
+ };
+
+ mcu_mcan1_pins_default: mcu-mcan1-default-pins {
+ pinctrl-single,pins = <
+ J721E_WKUP_IOPAD(0x6c, PIN_INPUT, 0) /* (B16) WKUP_GPIO0_5.MCU_MCAN1_RX */
+ J721E_WKUP_IOPAD(0x68, PIN_OUTPUT, 0) /* (D13) WKUP_GPIO0_4.MCU_MCAN1_TX */
+ >;
+ };
+
+ mcu_mcan0_gpio_pins_default: mcu-mcan0-gpio-default-pins {
+ pinctrl-single,pins = <
+ J721E_WKUP_IOPAD(0x58, PIN_INPUT, 7) /* (B18) WKUP_GPIO0_0 */
+ J721E_WKUP_IOPAD(0x40, PIN_INPUT, 7) /* (B17) MCU_SPI0_D1 */
+ >;
+ };
+
+ mcu_mcan1_gpio_pins_default: mcu-mcan1-gpio-default-pins {
+ pinctrl-single,pins = <
+ J721E_WKUP_IOPAD(0x60, PIN_INPUT, 7) /* (D14) WKUP_GPIO0_2 */
+ >;
+ };
};
&main_pmx0 {
@@ -189,6 +245,13 @@
J721E_IOPAD(0xd0, PIN_OUTPUT, 7) /* (T5) SPI0_D1.GPIO0_55 */
>;
};
+
+ main_mcan3_pins_default: main-mcan3-default-pins {
+ pinctrl-single,pins = <
+ J721E_IOPAD(0x3c, PIN_INPUT, 0) /* (W16) MCAN3_RX */
+ J721E_IOPAD(0x38, PIN_OUTPUT, 0) /* (Y21) MCAN3_TX */
+ >;
+ };
};
&main_pmx1 {
@@ -210,7 +273,6 @@
status = "okay";
pinctrl-names = "default";
pinctrl-0 = <&mcu_uart0_pins_default>;
- clock-frequency = <96000000>;
};
&main_uart0 {
@@ -382,15 +444,30 @@
};
&pcie1_rc {
+ status = "okay";
reset-gpios = <&exp1 2 GPIO_ACTIVE_HIGH>;
phys = <&serdes0_pcie_link>;
phy-names = "pcie-phy";
num-lanes = <2>;
};
-&pcie1_ep {
- phys = <&serdes0_pcie_link>;
- phy-names = "pcie-phy";
- num-lanes = <2>;
- status = "disabled";
+&mcu_mcan0 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&mcu_mcan0_pins_default>;
+ phys = <&transceiver1>;
+};
+
+&mcu_mcan1 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&mcu_mcan1_pins_default>;
+ phys = <&transceiver2>;
+};
+
+&main_mcan3 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_mcan3_pins_default>;
+ phys = <&transceiver3>;
};
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-evm-quad-port-eth-exp.dtso b/arch/arm64/boot/dts/ti/k3-j7200-evm-quad-port-eth-exp.dtso
index 32d905235ed7..6432ca08ee8e 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-evm-quad-port-eth-exp.dtso
+++ b/arch/arm64/boot/dts/ti/k3-j7200-evm-quad-port-eth-exp.dtso
@@ -1,9 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/**
* DT Overlay for CPSW5G in QSGMII mode using J7 Quad Port ETH EXP Add-On Ethernet Card with
* J7200 board.
*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
index da67bf8fe703..657f9cc9f4ea 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J7200 SoC Family Main Domain peripherals
*
- * Copyright (C) 2020-2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/ {
@@ -33,10 +33,11 @@
ranges = <0x00 0x00 0x00100000 0x1c000>;
serdes_ln_ctrl: mux-controller@4080 {
- compatible = "mmio-mux";
+ compatible = "reg-mux";
+ reg = <0x4080 0x20>;
#mux-control-cells = <1>;
- mux-reg-masks = <0x4080 0x3>, <0x4084 0x3>, /* SERDES0 lane0/1 select */
- <0x4088 0x3>, <0x408c 0x3>; /* SERDES0 lane2/3 select */
+ mux-reg-masks = <0x0 0x3>, <0x4 0x3>, /* SERDES0 lane0/1 select */
+ <0x8 0x3>, <0xc 0x3>; /* SERDES0 lane2/3 select */
};
cpsw0_phy_gmii_sel: phy@4044 {
@@ -47,9 +48,10 @@
};
usb_serdes_mux: mux-controller@4000 {
- compatible = "mmio-mux";
+ compatible = "reg-mux";
+ reg = <0x4000 0x4>;
#mux-control-cells = <1>;
- mux-reg-masks = <0x4000 0x8000000>; /* USB0 to SERDES0 lane 1/3 mux */
+ mux-reg-masks = <0x0 0x8000000>; /* USB0 to SERDES0 lane 1/3 mux */
};
};
@@ -399,7 +401,7 @@
/* TIMERIO pad input CTRLMMR_TIMER*_CTRL registers */
main_timerio_input: pinctrl@104200 {
- compatible = "pinctrl-single";
+ compatible = "ti,j7200-padconf", "pinctrl-single";
reg = <0x0 0x104200 0x0 0x50>;
#pinctrl-cells = <1>;
pinctrl-single,register-width = <32>;
@@ -408,7 +410,7 @@
/* TIMERIO pad output CTCTRLMMR_TIMERIO*_CTRL registers */
main_timerio_output: pinctrl@104280 {
- compatible = "pinctrl-single";
+ compatible = "ti,j7200-padconf", "pinctrl-single";
reg = <0x0 0x104280 0x0 0x20>;
#pinctrl-cells = <1>;
pinctrl-single,register-width = <32>;
@@ -416,7 +418,7 @@
};
main_pmx0: pinctrl@11c000 {
- compatible = "pinctrl-single";
+ compatible = "ti,j7200-padconf", "pinctrl-single";
/* Proxy 0 addressing */
reg = <0x00 0x11c000 0x00 0x10c>;
#pinctrl-cells = <1>;
@@ -425,7 +427,7 @@
};
main_pmx1: pinctrl@11c11c {
- compatible = "pinctrl-single";
+ compatible = "ti,j7200-padconf", "pinctrl-single";
/* Proxy 0 addressing */
reg = <0x00 0x11c11c 0x00 0xc>;
#pinctrl-cells = <1>;
@@ -770,26 +772,7 @@
ranges = <0x01000000 0x0 0x18001000 0x00 0x18001000 0x0 0x0010000>,
<0x02000000 0x0 0x18011000 0x00 0x18011000 0x0 0x7fef000>;
dma-ranges = <0x02000000 0x0 0x0 0x0 0x0 0x10000 0x0>;
- };
-
- pcie1_ep: pcie-ep@2910000 {
- compatible = "ti,j7200-pcie-ep", "ti,j721e-pcie-ep";
- reg = <0x00 0x02910000 0x00 0x1000>,
- <0x00 0x02917000 0x00 0x400>,
- <0x00 0x0d800000 0x00 0x00800000>,
- <0x00 0x18000000 0x00 0x08000000>;
- reg-names = "intd_cfg", "user_cfg", "reg", "mem";
- interrupt-names = "link_state";
- interrupts = <GIC_SPI 330 IRQ_TYPE_EDGE_RISING>;
- ti,syscon-pcie-ctrl = <&scm_conf 0x4074>;
- max-link-speed = <3>;
- num-lanes = <4>;
- power-domains = <&k3_pds 240 TI_SCI_PD_EXCLUSIVE>;
- clocks = <&k3_clks 240 6>;
- clock-names = "fck";
- max-functions = /bits/ 8 <6>;
- max-virtual-functions = /bits/ 8 <4 4 4 4 0 0>;
- dma-coherent;
+ status = "disabled";
};
usbss0: cdns-usb@4104000 {
@@ -895,6 +878,276 @@
status = "disabled";
};
+ main_mcan0: can@2701000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02701000 0x00 0x200>,
+ <0x00 0x02708000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 156 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 156 0>, <&k3_clks 156 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan1: can@2711000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02711000 0x00 0x200>,
+ <0x00 0x02718000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 158 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 158 0>, <&k3_clks 158 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan2: can@2721000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02721000 0x00 0x200>,
+ <0x00 0x02728000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 160 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 160 0>, <&k3_clks 160 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan3: can@2731000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02731000 0x00 0x200>,
+ <0x00 0x02738000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 161 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 161 0>, <&k3_clks 161 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan4: can@2741000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02741000 0x00 0x200>,
+ <0x00 0x02748000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 162 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 162 0>, <&k3_clks 162 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan5: can@2751000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02751000 0x00 0x200>,
+ <0x00 0x02758000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 163 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 163 0>, <&k3_clks 163 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan6: can@2761000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02761000 0x00 0x200>,
+ <0x00 0x02768000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 164 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 164 0>, <&k3_clks 164 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan7: can@2771000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02771000 0x00 0x200>,
+ <0x00 0x02778000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 165 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 165 0>, <&k3_clks 165 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan8: can@2781000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02781000 0x00 0x200>,
+ <0x00 0x02788000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 166 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 166 0>, <&k3_clks 166 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 576 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 577 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan9: can@2791000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02791000 0x00 0x200>,
+ <0x00 0x02798000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 167 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 167 0>, <&k3_clks 167 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 579 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 580 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan10: can@27a1000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x027a1000 0x00 0x200>,
+ <0x00 0x027a8000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 168 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 168 0>, <&k3_clks 168 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 582 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 583 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan11: can@27b1000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x027b1000 0x00 0x200>,
+ <0x00 0x027b8000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 169 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 169 0>, <&k3_clks 169 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 585 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 586 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan12: can@27c1000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x027c1000 0x00 0x200>,
+ <0x00 0x027c8000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 170 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 170 0>, <&k3_clks 170 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 588 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 589 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan13: can@27d1000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x027d1000 0x00 0x200>,
+ <0x00 0x027d8000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 171 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 171 0>, <&k3_clks 171 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 591 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 592 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan14: can@2681000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02681000 0x00 0x200>,
+ <0x00 0x02688000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 150 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 150 0>, <&k3_clks 150 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 594 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 595 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan15: can@2691000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x02691000 0x00 0x200>,
+ <0x00 0x02698000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 151 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 151 0>, <&k3_clks 151 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 597 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 598 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan16: can@26a1000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x026a1000 0x00 0x200>,
+ <0x00 0x026a8000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 152 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 152 0>, <&k3_clks 152 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 784 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 785 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ main_mcan17: can@26b1000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x026b1000 0x00 0x200>,
+ <0x00 0x026b8000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 153 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 153 0>, <&k3_clks 153 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 787 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 788 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
main_spi0: spi@2100000 {
compatible = "ti,am654-mcspi","ti,omap4-mcspi";
reg = <0x00 0x02100000 0x00 0x400>;
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
index 60b26374ae0c..7cf21c99956e 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J7200 SoC Family MCU/WAKEUP Domain peripherals
*
- * Copyright (C) 2020-2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_mcu_wakeup {
@@ -192,7 +192,7 @@
/* MCU_TIMERIO pad input CTRLMMR_MCU_TIMER*_CTRL registers */
mcu_timerio_input: pinctrl@40f04200 {
- compatible = "pinctrl-single";
+ compatible = "ti,j7200-padconf", "pinctrl-single";
reg = <0x0 0x40f04200 0x0 0x28>;
#pinctrl-cells = <1>;
pinctrl-single,register-width = <32>;
@@ -202,7 +202,7 @@
/* MCU_TIMERIO pad output CTRLMMR_MCU_TIMERIO*_CTRL registers */
mcu_timerio_output: pinctrl@40f04280 {
- compatible = "pinctrl-single";
+ compatible = "ti,j7200-padconf", "pinctrl-single";
reg = <0x0 0x40f04280 0x0 0x28>;
#pinctrl-cells = <1>;
pinctrl-single,register-width = <32>;
@@ -211,7 +211,7 @@
};
wkup_pmx0: pinctrl@4301c000 {
- compatible = "pinctrl-single";
+ compatible = "ti,j7200-padconf", "pinctrl-single";
/* Proxy 0 addressing */
reg = <0x00 0x4301c000 0x00 0x34>;
#pinctrl-cells = <1>;
@@ -220,7 +220,7 @@
};
wkup_pmx1: pinctrl@4301c038 {
- compatible = "pinctrl-single";
+ compatible = "ti,j7200-padconf", "pinctrl-single";
/* Proxy 0 addressing */
reg = <0x00 0x4301c038 0x00 0x8>;
#pinctrl-cells = <1>;
@@ -229,7 +229,7 @@
};
wkup_pmx2: pinctrl@4301c068 {
- compatible = "pinctrl-single";
+ compatible = "ti,j7200-padconf", "pinctrl-single";
/* Proxy 0 addressing */
reg = <0x00 0x4301c068 0x00 0xec>;
#pinctrl-cells = <1>;
@@ -238,7 +238,7 @@
};
wkup_pmx3: pinctrl@4301c174 {
- compatible = "pinctrl-single";
+ compatible = "ti,j7200-padconf", "pinctrl-single";
/* Proxy 0 addressing */
reg = <0x00 0x4301c174 0x00 0x20>;
#pinctrl-cells = <1>;
@@ -518,17 +518,18 @@
status = "disabled";
};
- fss: syscon@47000000 {
- compatible = "syscon", "simple-mfd";
+ fss: bus@47000000 {
+ compatible = "simple-bus";
reg = <0x00 0x47000000 0x00 0x100>;
#address-cells = <2>;
#size-cells = <2>;
ranges;
- hbmc_mux: hbmc-mux {
- compatible = "mmio-mux";
+ hbmc_mux: mux-controller@47000004 {
+ compatible = "reg-mux";
+ reg = <0x00 0x47000004 0x00 0x4>;
#mux-control-cells = <1>;
- mux-reg-masks = <0x4 0x2>; /* HBMC select */
+ mux-reg-masks = <0x0 0x2>; /* HBMC select */
};
hbmc: hyperbus@47034000 {
@@ -655,4 +656,34 @@
ti,esm-pins = <95>;
bootph-pre-ram;
};
+
+ mcu_mcan0: can@40528000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x40528000 0x00 0x200>,
+ <0x00 0x40500000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 172 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 172 0>, <&k3_clks 172 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 832 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 833 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
+
+ mcu_mcan1: can@40568000 {
+ compatible = "bosch,m_can";
+ reg = <0x00 0x40568000 0x00 0x200>,
+ <0x00 0x40540000 0x00 0x8000>;
+ reg-names = "m_can", "message_ram";
+ power-domains = <&k3_pds 173 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 173 0>, <&k3_clks 173 2>;
+ clock-names = "hclk", "cclk";
+ interrupts = <GIC_SPI 835 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 836 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "int0", "int1";
+ bosch,mram-cfg = <0x0 128 64 64 64 64 32 32>;
+ status = "disabled";
+ };
};
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi
index ea47f10d393a..7e6a584ac6f0 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi
@@ -1,10 +1,12 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2020-2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
+
#include "k3-j7200.dtsi"
/ {
@@ -80,6 +82,25 @@
no-map;
};
};
+
+ mux0: mux-controller {
+ compatible = "gpio-mux";
+ #mux-state-cells = <1>;
+ mux-gpios = <&exp_som 1 GPIO_ACTIVE_HIGH>;
+ };
+
+ mux1: mux-controller {
+ compatible = "gpio-mux";
+ #mux-state-cells = <1>;
+ mux-gpios = <&exp_som 2 GPIO_ACTIVE_HIGH>;
+ };
+
+ transceiver0: can-phy0 {
+ /* standby pin has been grounded by default */
+ compatible = "ti,tcan1042";
+ #phy-cells = <0>;
+ max-bitrate = <5000000>;
+ };
};
&wkup_pmx0 {
@@ -142,6 +163,13 @@
J721E_IOPAD(0xd8, PIN_INPUT_PULLUP, 0) /* (W2) I2C0_SDA */
>;
};
+
+ main_mcan0_pins_default: main-mcan0-default-pins {
+ pinctrl-single,pins = <
+ J721E_IOPAD(0x24, PIN_INPUT, 0) /* (V20) MCAN0_RX */
+ J721E_IOPAD(0x20, PIN_OUTPUT, 0) /* (V18) MCAN0_TX */
+ >;
+ };
};
&hbmc {
@@ -222,25 +250,25 @@
};
&mcu_r5fss0_core0 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core0>;
memory-region = <&mcu_r5fss0_core0_dma_memory_region>,
<&mcu_r5fss0_core0_memory_region>;
};
&mcu_r5fss0_core1 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core1>;
memory-region = <&mcu_r5fss0_core1_dma_memory_region>,
<&mcu_r5fss0_core1_memory_region>;
};
&main_r5fss0_core0 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core0>;
memory-region = <&main_r5fss0_core0_dma_memory_region>,
<&main_r5fss0_core0_memory_region>;
};
&main_r5fss0_core1 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core1>;
memory-region = <&main_r5fss0_core1_dma_memory_region>,
<&main_r5fss0_core1_memory_region>;
};
@@ -478,3 +506,10 @@
};
};
};
+
+&main_mcan0 {
+ status = "okay";
+ pinctrl-0 = <&main_mcan0_pins_default>;
+ pinctrl-names = "default";
+ phys = <&transceiver0>;
+};
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-thermal.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-thermal.dtsi
index e7e3a643a6f0..2d22a95a6fdb 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-thermal.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-thermal.dtsi
@@ -1,4 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
#include <dt-bindings/thermal/thermal.h>
diff --git a/arch/arm64/boot/dts/ti/k3-j7200.dtsi b/arch/arm64/boot/dts/ti/k3-j7200.dtsi
index ef73e6d7e858..d411911fdf71 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J7200 SoC Family
*
- * Copyright (C) 2020 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2020-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/interrupt-controller/irq.h>
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-beagleboneai64.dts b/arch/arm64/boot/dts/ti/k3-j721e-beagleboneai64.dts
index 2f954729f353..a2925555fe81 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-beagleboneai64.dts
+++ b/arch/arm64/boot/dts/ti/k3-j721e-beagleboneai64.dts
@@ -1,9 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* https://beagleboard.org/ai-64
- * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com/
- * Copyright (C) 2022 Jason Kridner, BeagleBoard.org Foundation
- * Copyright (C) 2022 Robert Nelson, BeagleBoard.org Foundation
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Jason Kridner, BeagleBoard.org Foundation
+ * Copyright (C) 2022-2024 Robert Nelson, BeagleBoard.org Foundation
*/
/dts-v1/;
@@ -936,58 +936,58 @@
};
&mcu_r5fss0_core0 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core0>;
memory-region = <&mcu_r5fss0_core0_dma_memory_region>,
<&mcu_r5fss0_core0_memory_region>;
};
&mcu_r5fss0_core1 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core1>;
memory-region = <&mcu_r5fss0_core1_dma_memory_region>,
<&mcu_r5fss0_core1_memory_region>;
};
&main_r5fss0_core0 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core0>;
memory-region = <&main_r5fss0_core0_dma_memory_region>,
<&main_r5fss0_core0_memory_region>;
};
&main_r5fss0_core1 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core1>;
memory-region = <&main_r5fss0_core1_dma_memory_region>,
<&main_r5fss0_core1_memory_region>;
};
&main_r5fss1_core0 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core0>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core0>;
memory-region = <&main_r5fss1_core0_dma_memory_region>,
<&main_r5fss1_core0_memory_region>;
};
&main_r5fss1_core1 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core1>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core1>;
memory-region = <&main_r5fss1_core1_dma_memory_region>,
<&main_r5fss1_core1_memory_region>;
};
&c66_0 {
status = "okay";
- mboxes = <&mailbox0_cluster3>, <&mbox_c66_0>;
+ mboxes = <&mailbox0_cluster3 &mbox_c66_0>;
memory-region = <&c66_0_dma_memory_region>,
<&c66_0_memory_region>;
};
&c66_1 {
status = "okay";
- mboxes = <&mailbox0_cluster3>, <&mbox_c66_1>;
+ mboxes = <&mailbox0_cluster3 &mbox_c66_1>;
memory-region = <&c66_1_dma_memory_region>,
<&c66_1_memory_region>;
};
&c71_0 {
status = "okay";
- mboxes = <&mailbox0_cluster4>, <&mbox_c71_0>;
+ mboxes = <&mailbox0_cluster4 &mbox_c71_0>;
memory-region = <&c71_0_dma_memory_region>,
<&c71_0_memory_region>;
};
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts
index fe5207ac7d85..8230d53cd696 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2019-2024 Texas Instruments Incorporated - https://www.ti.com/
*
* Product Link: https://www.ti.com/tool/J721EXCPXEVM
*/
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-evm-gesi-exp-board.dtso b/arch/arm64/boot/dts/ti/k3-j721e-evm-gesi-exp-board.dtso
index 6a7d37575da1..f84aa9f94547 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-evm-gesi-exp-board.dtso
+++ b/arch/arm64/boot/dts/ti/k3-j721e-evm-gesi-exp-board.dtso
@@ -1,11 +1,11 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/**
* DT Overlay for CPSW9G in RGMII mode using J7 GESI EXP BRD board with
* J721E board.
*
* GESI Board Product Link: https://www.ti.com/tool/J7EXPCXEVM
*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-evm-pcie0-ep.dtso b/arch/arm64/boot/dts/ti/k3-j721e-evm-pcie0-ep.dtso
index 0c82a13b65a4..4062709d6579 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-evm-pcie0-ep.dtso
+++ b/arch/arm64/boot/dts/ti/k3-j721e-evm-pcie0-ep.dtso
@@ -1,11 +1,11 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/**
* DT Overlay for enabling PCIE0 instance in Endpoint Configuration with the
* J7 common processor board.
*
* J7 Common Processor Board Product Link: https://www.ti.com/tool/J721EXCPXEVM
*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-evm-quad-port-eth-exp.dtso b/arch/arm64/boot/dts/ti/k3-j721e-evm-quad-port-eth-exp.dtso
index d4c51ffc3d6b..8376fa4b6ee1 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-evm-quad-port-eth-exp.dtso
+++ b/arch/arm64/boot/dts/ti/k3-j721e-evm-quad-port-eth-exp.dtso
@@ -1,9 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/**
* DT Overlay for CPSW9G in QSGMII mode using J7 Quad Port ETH EXP Add-On Ethernet Card with
* J721E board.
*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
index 2569b4c08ffb..c7eafbc862f9 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J721E SoC Family Main Domain peripherals
*
- * Copyright (C) 2016-2020 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2016-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/phy/phy.h>
#include <dt-bindings/phy/phy-ti.h>
@@ -45,15 +45,15 @@
ranges = <0x0 0x0 0x00100000 0x1c000>;
serdes_ln_ctrl: mux-controller@4080 {
- compatible = "mmio-mux";
- reg = <0x00004080 0x50>;
+ compatible = "reg-mux";
+ reg = <0x4080 0x50>;
#mux-control-cells = <1>;
- mux-reg-masks = <0x4080 0x3>, <0x4084 0x3>, /* SERDES0 lane0/1 select */
- <0x4090 0x3>, <0x4094 0x3>, /* SERDES1 lane0/1 select */
- <0x40a0 0x3>, <0x40a4 0x3>, /* SERDES2 lane0/1 select */
- <0x40b0 0x3>, <0x40b4 0x3>, /* SERDES3 lane0/1 select */
- <0x40c0 0x3>, <0x40c4 0x3>, <0x40c8 0x3>, <0x40cc 0x3>;
- /* SERDES4 lane0/1/2/3 select */
+ mux-reg-masks = <0x0 0x3>, <0x4 0x3>, /* SERDES0 lane0/1 select */
+ <0x10 0x3>, <0x14 0x3>, /* SERDES1 lane0/1 select */
+ <0x20 0x3>, <0x24 0x3>, /* SERDES2 lane0/1 select */
+ <0x30 0x3>, <0x34 0x3>, /* SERDES3 lane0/1 select */
+ <0x40 0x3>, <0x44 0x3>, /* SERDES4 lane0/1 select */
+ <0x48 0x3>, <0x4c 0x3>; /* SERDES4 lane2/3 select */
idle-states = <J721E_SERDES0_LANE0_PCIE0_LANE0>, <J721E_SERDES0_LANE1_PCIE0_LANE1>,
<J721E_SERDES1_LANE0_PCIE1_LANE0>, <J721E_SERDES1_LANE1_PCIE1_LANE1>,
<J721E_SERDES2_LANE0_PCIE2_LANE0>, <J721E_SERDES2_LANE1_PCIE2_LANE1>,
@@ -70,10 +70,11 @@
};
usb_serdes_mux: mux-controller@4000 {
- compatible = "mmio-mux";
+ compatible = "reg-mux";
+ reg = <0x4000 0x20>;
#mux-control-cells = <1>;
- mux-reg-masks = <0x4000 0x8000000>, /* USB0 to SERDES0/3 mux */
- <0x4010 0x8000000>; /* USB1 to SERDES1/2 mux */
+ mux-reg-masks = <0x0 0x8000000>, /* USB0 to SERDES0/3 mux */
+ <0x10 0x8000000>; /* USB1 to SERDES1/2 mux */
};
ehrpwm_tbclk: clock-controller@4140 {
@@ -572,6 +573,128 @@
pinctrl-single,function-mask = <0x0000001f>;
};
+ ti_csi2rx0: ticsi2rx@4500000 {
+ compatible = "ti,j721e-csi2rx-shim";
+ reg = <0x0 0x4500000 0x0 0x1000>;
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dmas = <&main_udmap 0x4940>;
+ dma-names = "rx0";
+ power-domains = <&k3_pds 26 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+
+ cdns_csi2rx0: csi-bridge@4504000 {
+ compatible = "ti,j721e-csi2rx", "cdns,csi2rx";
+ reg = <0x0 0x4504000 0x0 0x1000>;
+ clocks = <&k3_clks 26 2>, <&k3_clks 26 0>, <&k3_clks 26 2>,
+ <&k3_clks 26 2>, <&k3_clks 26 3>, <&k3_clks 26 3>;
+ clock-names = "sys_clk", "p_clk", "pixel_if0_clk",
+ "pixel_if1_clk", "pixel_if2_clk", "pixel_if3_clk";
+ phys = <&dphy0>;
+ phy-names = "dphy";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ csi0_port0: port@0 {
+ reg = <0>;
+ status = "disabled";
+ };
+
+ csi0_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ csi0_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ csi0_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ csi0_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+ };
+ };
+ };
+
+ ti_csi2rx1: ticsi2rx@4510000 {
+ compatible = "ti,j721e-csi2rx-shim";
+ reg = <0x0 0x4510000 0x0 0x1000>;
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dmas = <&main_udmap 0x4960>;
+ dma-names = "rx0";
+ power-domains = <&k3_pds 27 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+
+ cdns_csi2rx1: csi-bridge@4514000 {
+ compatible = "ti,j721e-csi2rx", "cdns,csi2rx";
+ reg = <0x0 0x4514000 0x0 0x1000>;
+ clocks = <&k3_clks 27 2>, <&k3_clks 27 0>, <&k3_clks 27 2>,
+ <&k3_clks 27 2>, <&k3_clks 27 3>, <&k3_clks 27 3>;
+ clock-names = "sys_clk", "p_clk", "pixel_if0_clk",
+ "pixel_if1_clk", "pixel_if2_clk", "pixel_if3_clk";
+ phys = <&dphy1>;
+ phy-names = "dphy";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ csi1_port0: port@0 {
+ reg = <0>;
+ status = "disabled";
+ };
+
+ csi1_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ csi1_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ csi1_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ csi1_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+ };
+ };
+ };
+
+ dphy0: phy@4580000 {
+ compatible = "cdns,dphy-rx";
+ reg = <0x0 0x4580000 0x0 0x1100>;
+ #phy-cells = <0>;
+ power-domains = <&k3_pds 147 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+ };
+
+ dphy1: phy@4590000 {
+ compatible = "cdns,dphy-rx";
+ reg = <0x0 0x4590000 0x0 0x1100>;
+ #phy-cells = <0>;
+ power-domains = <&k3_pds 148 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+ };
+
serdes_wiz0: wiz@5000000 {
compatible = "ti,j721e-wiz-16g";
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
index a74912d9e4da..4618b697fbc4 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J721E SoC Family MCU/WAKEUP Domain peripherals
*
- * Copyright (C) 2016-2020 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2016-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_mcu_wakeup {
@@ -353,9 +353,9 @@
hbmc_mux: mux-controller@47000004 {
compatible = "reg-mux";
- reg = <0x00 0x47000004 0x00 0x2>;
+ reg = <0x00 0x47000004 0x00 0x4>;
#mux-control-cells = <1>;
- mux-reg-masks = <0x4 0x2>; /* HBMC select */
+ mux-reg-masks = <0x0 0x2>; /* HBMC select */
};
hbmc: hyperbus@47034000 {
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso b/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso
new file mode 100644
index 000000000000..47bb5480b5b0
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/**
+ * DT Overlay for dual RPi Camera V2.1 (Sony IMX219) interfaced with CSI2
+ * on J721E SK, AM68 SK or AM69-SK board.
+ * https://datasheets.raspberrypi.org/camera/camera-v2-schematic.pdf
+ *
+ * Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include "k3-pinctrl.h"
+
+&{/} {
+ clk_imx219_fixed: imx219-xclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <24000000>;
+ };
+};
+
+&csi_mux {
+ idle-state = <1>;
+};
+
+/* CAM0 I2C */
+&cam0_i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ imx219_0: imx219-0@10 {
+ compatible = "sony,imx219";
+ reg = <0x10>;
+
+ clocks = <&clk_imx219_fixed>;
+ clock-names = "xclk";
+
+ port {
+ csi2_cam0: endpoint {
+ remote-endpoint = <&csi2rx0_in_sensor>;
+ link-frequencies = /bits/ 64 <456000000>;
+ clock-lanes = <0>;
+ data-lanes = <1 2>;
+ };
+ };
+ };
+};
+
+/* CAM1 I2C */
+&cam1_i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ imx219_1: imx219-1@10 {
+ compatible = "sony,imx219";
+ reg = <0x10>;
+
+ clocks = <&clk_imx219_fixed>;
+ clock-names = "xclk";
+
+ port {
+ csi2_cam1: endpoint {
+ remote-endpoint = <&csi2rx1_in_sensor>;
+ link-frequencies = /bits/ 64 <456000000>;
+ clock-lanes = <0>;
+ data-lanes = <1 2>;
+ };
+ };
+ };
+};
+
+
+&cdns_csi2rx0 {
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ csi0_port0: port@0 {
+ reg = <0>;
+ status = "okay";
+
+ csi2rx0_in_sensor: endpoint {
+ remote-endpoint = <&csi2_cam0>;
+ bus-type = <4>; /* CSI2 DPHY. */
+ clock-lanes = <0>;
+ data-lanes = <1 2>;
+ };
+ };
+
+ csi0_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ csi0_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ csi0_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ csi0_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+ };
+};
+
+&dphy0 {
+ status = "okay";
+};
+
+&ti_csi2rx0 {
+ status = "okay";
+};
+
+&cdns_csi2rx1 {
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ csi1_port0: port@0 {
+ reg = <0>;
+ status = "okay";
+
+ csi2rx1_in_sensor: endpoint {
+ remote-endpoint = <&csi2_cam1>;
+ bus-type = <4>; /* CSI2 DPHY. */
+ clock-lanes = <0>;
+ data-lanes = <1 2>;
+ };
+ };
+
+ csi1_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ csi1_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ csi1_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ csi1_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+ };
+};
+
+&dphy1 {
+ status = "okay";
+};
+
+&ti_csi2rx1 {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-sk.dts b/arch/arm64/boot/dts/ti/k3-j721e-sk.dts
index 188dfe291a32..0c4575ad8d7c 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-sk.dts
+++ b/arch/arm64/boot/dts/ti/k3-j721e-sk.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2021-2024 Texas Instruments Incorporated - https://www.ti.com/
*
* J721E SK URL: https://www.ti.com/tool/SK-TDA4VM
*/
@@ -286,6 +286,15 @@
};
};
};
+
+ csi_mux: mux-controller {
+ compatible = "gpio-mux";
+ #mux-state-cells = <1>;
+ mux-gpios = <&main_gpio0 88 GPIO_ACTIVE_HIGH>;
+ idle-state = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_csi_mux_sel_pins_default>;
+ };
};
&main_pmx0 {
@@ -352,6 +361,12 @@
>;
};
+ main_csi_mux_sel_pins_default: main-csi-mux-sel-default-pins {
+ pinctrl-single,pins = <
+ J721E_IOPAD(0x164, PIN_OUTPUT, 7) /* (V29) RGMII5_TD2 */
+ >;
+ };
+
dp0_pins_default: dp0-default-pins {
pinctrl-single,pins = <
J721E_IOPAD(0x1c4, PIN_INPUT, 5) /* SPI0_CS1.DP0_HPD */
@@ -574,7 +589,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pmic_irq_pins_default>;
interrupt-parent = <&wkup_gpio0>;
- interrupts = <9 IRQ_TYPE_EDGE_FALLING>;
+ interrupts = <7 IRQ_TYPE_EDGE_FALLING>;
gpio-controller;
#gpio-cells = <2>;
ti,primary-pmic;
@@ -651,7 +666,7 @@
reg = <0x4c>;
system-power-controller;
interrupt-parent = <&wkup_gpio0>;
- interrupts = <9 IRQ_TYPE_EDGE_FALLING>;
+ interrupts = <7 IRQ_TYPE_EDGE_FALLING>;
gpio-controller;
#gpio-cells = <2>;
buck1234-supply = <&vsys_3v3>;
@@ -858,14 +873,14 @@
reg = <0x70>;
/* CSI0 I2C */
- i2c@0 {
+ cam0_i2c: i2c@0 {
#address-cells = <1>;
#size-cells = <0>;
reg = <0>;
};
/* CSI1 I2C */
- i2c@1 {
+ cam1_i2c: i2c@1 {
#address-cells = <1>;
#size-cells = <0>;
reg = <1>;
@@ -1168,58 +1183,58 @@
};
&mcu_r5fss0_core0 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core0>;
memory-region = <&mcu_r5fss0_core0_dma_memory_region>,
<&mcu_r5fss0_core0_memory_region>;
};
&mcu_r5fss0_core1 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core1>;
memory-region = <&mcu_r5fss0_core1_dma_memory_region>,
<&mcu_r5fss0_core1_memory_region>;
};
&main_r5fss0_core0 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core0>;
memory-region = <&main_r5fss0_core0_dma_memory_region>,
<&main_r5fss0_core0_memory_region>;
};
&main_r5fss0_core1 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core1>;
memory-region = <&main_r5fss0_core1_dma_memory_region>,
<&main_r5fss0_core1_memory_region>;
};
&main_r5fss1_core0 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core0>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core0>;
memory-region = <&main_r5fss1_core0_dma_memory_region>,
<&main_r5fss1_core0_memory_region>;
};
&main_r5fss1_core1 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core1>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core1>;
memory-region = <&main_r5fss1_core1_dma_memory_region>,
<&main_r5fss1_core1_memory_region>;
};
&c66_0 {
status = "okay";
- mboxes = <&mailbox0_cluster3>, <&mbox_c66_0>;
+ mboxes = <&mailbox0_cluster3 &mbox_c66_0>;
memory-region = <&c66_0_dma_memory_region>,
<&c66_0_memory_region>;
};
&c66_1 {
status = "okay";
- mboxes = <&mailbox0_cluster3>, <&mbox_c66_1>;
+ mboxes = <&mailbox0_cluster3 &mbox_c66_1>;
memory-region = <&c66_1_dma_memory_region>,
<&c66_1_memory_region>;
};
&c71_0 {
status = "okay";
- mboxes = <&mailbox0_cluster4>, <&mbox_c71_0>;
+ mboxes = <&mailbox0_cluster4 &mbox_c71_0>;
memory-region = <&c71_0_dma_memory_region>,
<&c71_0_memory_region>;
};
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-som-p0.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-som-p0.dtsi
index a75611eec791..1fae6495db07 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-som-p0.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-som-p0.dtsi
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2019-2020 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2019-2024 Texas Instruments Incorporated - https://www.ti.com/
*
* Product Link: https://www.ti.com/tool/J721EXSOMXEVM
*/
@@ -549,58 +549,58 @@
};
&mcu_r5fss0_core0 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core0>;
memory-region = <&mcu_r5fss0_core0_dma_memory_region>,
<&mcu_r5fss0_core0_memory_region>;
};
&mcu_r5fss0_core1 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core1>;
memory-region = <&mcu_r5fss0_core1_dma_memory_region>,
<&mcu_r5fss0_core1_memory_region>;
};
&main_r5fss0_core0 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core0>;
memory-region = <&main_r5fss0_core0_dma_memory_region>,
<&main_r5fss0_core0_memory_region>;
};
&main_r5fss0_core1 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core1>;
memory-region = <&main_r5fss0_core1_dma_memory_region>,
<&main_r5fss0_core1_memory_region>;
};
&main_r5fss1_core0 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core0>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core0>;
memory-region = <&main_r5fss1_core0_dma_memory_region>,
<&main_r5fss1_core0_memory_region>;
};
&main_r5fss1_core1 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core1>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core1>;
memory-region = <&main_r5fss1_core1_dma_memory_region>,
<&main_r5fss1_core1_memory_region>;
};
&c66_0 {
status = "okay";
- mboxes = <&mailbox0_cluster3>, <&mbox_c66_0>;
+ mboxes = <&mailbox0_cluster3 &mbox_c66_0>;
memory-region = <&c66_0_dma_memory_region>,
<&c66_0_memory_region>;
};
&c66_1 {
status = "okay";
- mboxes = <&mailbox0_cluster3>, <&mbox_c66_1>;
+ mboxes = <&mailbox0_cluster3 &mbox_c66_1>;
memory-region = <&c66_1_dma_memory_region>,
<&c66_1_memory_region>;
};
&c71_0 {
status = "okay";
- mboxes = <&mailbox0_cluster4>, <&mbox_c71_0>;
+ mboxes = <&mailbox0_cluster4 &mbox_c71_0>;
memory-region = <&c71_0_dma_memory_region>,
<&c71_0_memory_region>;
};
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-thermal.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-thermal.dtsi
index c2523279001b..927f7614ae7a 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-thermal.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-thermal.dtsi
@@ -1,4 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
#include <dt-bindings/thermal/thermal.h>
diff --git a/arch/arm64/boot/dts/ti/k3-j721e.dtsi b/arch/arm64/boot/dts/ti/k3-j721e.dtsi
index a200810df54a..5a72c518ceb6 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J721E SoC Family
*
- * Copyright (C) 2016-2019 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2016-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/interrupt-controller/irq.h>
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts
index c6b85bbf9a17..c5a0b7cbb14f 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2021-2024 Texas Instruments Incorporated - https://www.ti.com/
*
* Common Processor Board: https://www.ti.com/tool/J721EXCPXEVM
*/
@@ -147,6 +147,13 @@
>;
};
+ main_i2c5_pins_default: main-i2c5-default-pins {
+ pinctrl-single,pins = <
+ J721S2_IOPAD(0x01c, PIN_INPUT, 8) /* (Y24) MCAN15_TX.I2C5_SCL */
+ J721S2_IOPAD(0x018, PIN_INPUT, 8) /* (W23) MCAN14_RX.I2C5_SDA */
+ >;
+ };
+
main_mmc1_pins_default: main-mmc1-default-pins {
pinctrl-single,pins = <
J721S2_IOPAD(0x104, PIN_INPUT, 0) /* (P23) MMC1_CLK */
@@ -190,8 +197,6 @@
&wkup_pmx2 {
wkup_uart0_pins_default: wkup-uart0-default-pins {
pinctrl-single,pins = <
- J721S2_WKUP_IOPAD(0x070, PIN_INPUT, 0) /* (E25) WKUP_GPIO0_6.WKUP_UART0_CTSn */
- J721S2_WKUP_IOPAD(0x074, PIN_OUTPUT, 0) /* (F28) WKUP_GPIO0_7.WKUP_UART0_RTSn */
J721S2_WKUP_IOPAD(0x048, PIN_INPUT, 0) /* (D28) WKUP_UART0_RXD */
J721S2_WKUP_IOPAD(0x04c, PIN_OUTPUT, 0) /* (D27) WKUP_UART0_TXD */
>;
@@ -356,6 +361,24 @@
};
};
+&main_i2c5 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_i2c5_pins_default>;
+ clock-frequency = <400000>;
+ status = "okay";
+
+ exp5: gpio@20 {
+ compatible = "ti,tca6408";
+ reg = <0x20>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-line-names = "CSI2_EXP_RSTZ", "CSI2_EXP_A_GPIO0",
+ "CSI2_EXP_A_GPIO1", "CSI2_EXP_A_GPIO2",
+ "CSI2_EXP_B_GPIO1", "CSI2_EXP_B_GPIO2",
+ "CSI2_EXP_B_GPIO3", "CSI2_EXP_B_GPIO4";
+ };
+};
+
&main_sdhci0 {
/* eMMC */
status = "okay";
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-evm-gesi-exp-board.dtso b/arch/arm64/boot/dts/ti/k3-j721s2-evm-gesi-exp-board.dtso
index b78feea31b54..1be28283c7d9 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2-evm-gesi-exp-board.dtso
+++ b/arch/arm64/boot/dts/ti/k3-j721s2-evm-gesi-exp-board.dtso
@@ -1,10 +1,10 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/**
* DT Overlay for MAIN CPSW2G using GESI Expansion Board with J7 common processor board.
*
* GESI Board Product Link: https://www.ti.com/tool/J7EXPCXEVM
*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-evm-pcie1-ep.dtso b/arch/arm64/boot/dts/ti/k3-j721s2-evm-pcie1-ep.dtso
index 43568eb67d93..5ff390915b75 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2-evm-pcie1-ep.dtso
+++ b/arch/arm64/boot/dts/ti/k3-j721s2-evm-pcie1-ep.dtso
@@ -1,11 +1,11 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/**
* DT Overlay for enabling PCIE1 instance in Endpoint Configuration with the
* J7 common processor board.
*
* J7 Common Processor Board Product Link: https://www.ti.com/tool/J721EXCPXEVM
*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi
index ea7f2b2ab165..b70c8615e3c1 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J721S2 SoC Family Main Domain peripherals
*
- * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2021-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/phy/phy-cadence.h>
@@ -45,7 +45,7 @@
ranges = <0x00 0x00 0x00104000 0x18000>;
usb_serdes_mux: mux-controller@0 {
- compatible = "mmio-mux";
+ compatible = "reg-mux";
reg = <0x0 0x4>;
#mux-control-cells = <1>;
mux-reg-masks = <0x0 0x8000000>; /* USB0 to SERDES0 lane 1/3 mux */
@@ -58,11 +58,11 @@
};
serdes_ln_ctrl: mux-controller@80 {
- compatible = "mmio-mux";
+ compatible = "reg-mux";
reg = <0x80 0x10>;
#mux-control-cells = <1>;
- mux-reg-masks = <0x80 0x3>, <0x84 0x3>, /* SERDES0 lane0/1 select */
- <0x88 0x3>, <0x8c 0x3>; /* SERDES0 lane2/3 select */
+ mux-reg-masks = <0x0 0x3>, <0x4 0x3>, /* SERDES0 lane0/1 select */
+ <0x8 0x3>, <0xc 0x3>; /* SERDES0 lane2/3 select */
};
ehrpwm_tbclk: clock-controller@140 {
@@ -716,6 +716,14 @@
status = "disabled";
};
+ vpu: video-codec@4210000 {
+ compatible = "ti,j721s2-wave521c", "cnm,wave521c";
+ reg = <0x00 0x4210000 0x00 0x10000>;
+ interrupts = <GIC_SPI 182 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&k3_clks 179 2>;
+ power-domains = <&k3_pds 179 TI_SCI_PD_EXCLUSIVE>;
+ };
+
main_sdhci0: mmc@4f80000 {
compatible = "ti,j721e-sdhci-8bit";
reg = <0x00 0x04f80000 0x00 0x1000>,
@@ -1122,7 +1130,6 @@
ti,sci-dev-id = <225>;
ti,sci-rm-range-rchan = <0x21>;
ti,sci-rm-range-tchan = <0x22>;
- status = "disabled";
};
cpts@310d0000 {
@@ -1233,6 +1240,128 @@
};
};
+ ti_csi2rx0: ticsi2rx@4500000 {
+ compatible = "ti,j721e-csi2rx-shim";
+ reg = <0x00 0x04500000 0x00 0x1000>;
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dmas = <&main_bcdma_csi 0 0x4940 0>;
+ dma-names = "rx0";
+ power-domains = <&k3_pds 38 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+
+ cdns_csi2rx0: csi-bridge@4504000 {
+ compatible = "ti,j721e-csi2rx", "cdns,csi2rx";
+ reg = <0x00 0x04504000 0x00 0x1000>;
+ clocks = <&k3_clks 38 3>, <&k3_clks 38 1>, <&k3_clks 38 3>,
+ <&k3_clks 38 3>, <&k3_clks 38 4>, <&k3_clks 38 4>;
+ clock-names = "sys_clk", "p_clk", "pixel_if0_clk",
+ "pixel_if1_clk", "pixel_if2_clk", "pixel_if3_clk";
+ phys = <&dphy0>;
+ phy-names = "dphy";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ csi0_port0: port@0 {
+ reg = <0>;
+ status = "disabled";
+ };
+
+ csi0_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ csi0_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ csi0_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ csi0_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+ };
+ };
+ };
+
+ ti_csi2rx1: ticsi2rx@4510000 {
+ compatible = "ti,j721e-csi2rx-shim";
+ reg = <0x00 0x04510000 0x00 0x1000>;
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dmas = <&main_bcdma_csi 0 0x4960 0>;
+ dma-names = "rx0";
+ power-domains = <&k3_pds 39 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+
+ cdns_csi2rx1: csi-bridge@4514000 {
+ compatible = "ti,j721e-csi2rx", "cdns,csi2rx";
+ reg = <0x00 0x04514000 0x00 0x1000>;
+ clocks = <&k3_clks 39 3>, <&k3_clks 39 1>, <&k3_clks 39 3>,
+ <&k3_clks 39 3>, <&k3_clks 39 4>, <&k3_clks 39 4>;
+ clock-names = "sys_clk", "p_clk", "pixel_if0_clk",
+ "pixel_if1_clk", "pixel_if2_clk", "pixel_if3_clk";
+ phys = <&dphy1>;
+ phy-names = "dphy";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ csi1_port0: port@0 {
+ reg = <0>;
+ status = "disabled";
+ };
+
+ csi1_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ csi1_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ csi1_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ csi1_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+ };
+ };
+ };
+
+ dphy0: phy@4580000 {
+ compatible = "cdns,dphy-rx";
+ reg = <0x00 0x04580000 0x00 0x1100>;
+ #phy-cells = <0>;
+ power-domains = <&k3_pds 152 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+ };
+
+ dphy1: phy@4590000 {
+ compatible = "cdns,dphy-rx";
+ reg = <0x00 0x04590000 0x00 0x1100>;
+ #phy-cells = <0>;
+ power-domains = <&k3_pds 153 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+ };
+
serdes_wiz0: wiz@5060000 {
compatible = "ti,j721s2-wiz-10g";
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi
index 80aa33c58a45..eaf7f709440e 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J721S2 SoC Family MCU/WAKEUP Domain peripherals
*
- * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2021-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_mcu_wakeup {
@@ -663,7 +663,7 @@
compatible = "ti,j7200-vtm";
reg = <0x00 0x42040000 0x0 0x350>,
<0x00 0x42050000 0x0 0x350>;
- power-domains = <&k3_pds 154 TI_SCI_PD_SHARED>;
+ power-domains = <&k3_pds 180 TI_SCI_PD_SHARED>;
#thermal-sensor-cells = <1>;
};
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-som-p0.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-som-p0.dtsi
index da3237b23b63..623c8421525d 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2-som-p0.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721s2-som-p0.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* SoM: https://www.ti.com/lit/zip/sprr439
*
- * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2021-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
/dts-v1/;
@@ -504,51 +504,51 @@
};
&mcu_r5fss0_core0 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core0>;
memory-region = <&mcu_r5fss0_core0_dma_memory_region>,
<&mcu_r5fss0_core0_memory_region>;
};
&mcu_r5fss0_core1 {
- mboxes = <&mailbox0_cluster0>, <&mbox_mcu_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster0 &mbox_mcu_r5fss0_core1>;
memory-region = <&mcu_r5fss0_core1_dma_memory_region>,
<&mcu_r5fss0_core1_memory_region>;
};
&main_r5fss0_core0 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core0>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core0>;
memory-region = <&main_r5fss0_core0_dma_memory_region>,
<&main_r5fss0_core0_memory_region>;
};
&main_r5fss0_core1 {
- mboxes = <&mailbox0_cluster1>, <&mbox_main_r5fss0_core1>;
+ mboxes = <&mailbox0_cluster1 &mbox_main_r5fss0_core1>;
memory-region = <&main_r5fss0_core1_dma_memory_region>,
<&main_r5fss0_core1_memory_region>;
};
&main_r5fss1_core0 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core0>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core0>;
memory-region = <&main_r5fss1_core0_dma_memory_region>,
<&main_r5fss1_core0_memory_region>;
};
&main_r5fss1_core1 {
- mboxes = <&mailbox0_cluster2>, <&mbox_main_r5fss1_core1>;
+ mboxes = <&mailbox0_cluster2 &mbox_main_r5fss1_core1>;
memory-region = <&main_r5fss1_core1_dma_memory_region>,
<&main_r5fss1_core1_memory_region>;
};
&c71_0 {
status = "okay";
- mboxes = <&mailbox0_cluster4>, <&mbox_c71_0>;
+ mboxes = <&mailbox0_cluster4 &mbox_c71_0>;
memory-region = <&c71_0_dma_memory_region>,
<&c71_0_memory_region>;
};
&c71_1 {
status = "okay";
- mboxes = <&mailbox0_cluster4>, <&mbox_c71_1>;
+ mboxes = <&mailbox0_cluster4 &mbox_c71_1>;
memory-region = <&c71_1_dma_memory_region>,
<&c71_1_memory_region>;
};
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-thermal.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-thermal.dtsi
index f7b1a15b8fa0..e3ef61c1658f 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2-thermal.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721s2-thermal.dtsi
@@ -1,4 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
#include <dt-bindings/thermal/thermal.h>
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi
index 1f636acd4eee..be4502fe1c9d 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi
@@ -1,10 +1,10 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J721S2 SoC Family
*
* TRM (SPRUJ28 NOVEMBER 2021): https://www.ti.com/lit/pdf/spruj28
*
- * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2021-2024 Texas Instruments Incorporated - https://www.ti.com/
*
*/
diff --git a/arch/arm64/boot/dts/ti/k3-j722s-evm.dts b/arch/arm64/boot/dts/ti/k3-j722s-evm.dts
new file mode 100644
index 000000000000..cee3a8661d5e
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-j722s-evm.dts
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Device Tree file for the J722S EVM
+ * Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/
+ *
+ * Schematics: https://www.ti.com/lit/zip/sprr495
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/net/ti-dp83867.h>
+#include "k3-j722s.dtsi"
+
+/ {
+ compatible = "ti,j722s-evm", "ti,j722s";
+ model = "Texas Instruments J722S EVM";
+
+ aliases {
+ serial0 = &wkup_uart0;
+ serial2 = &main_uart0;
+ mmc0 = &sdhci0;
+ mmc1 = &sdhci1;
+ };
+
+ chosen {
+ stdout-path = &main_uart0;
+ };
+
+ memory@80000000 {
+ /* 8G RAM */
+ reg = <0x00000000 0x80000000 0x00000000 0x80000000>,
+ <0x00000008 0x80000000 0x00000001 0x80000000>;
+ device_type = "memory";
+ bootph-pre-ram;
+ };
+
+ reserved_memory: reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ secure_tfa_ddr: tfa@9e780000 {
+ reg = <0x00 0x9e780000 0x00 0x80000>;
+ no-map;
+ };
+
+ secure_ddr: optee@9e800000 {
+ reg = <0x00 0x9e800000 0x00 0x01800000>;
+ no-map;
+ };
+
+ wkup_r5fss0_core0_memory_region: r5f-memory@a0100000 {
+ compatible = "shared-dma-pool";
+ reg = <0x00 0xa0100000 0x00 0xf00000>;
+ no-map;
+ };
+
+ };
+
+ vmain_pd: regulator-0 {
+ /* TPS65988 PD CONTROLLER OUTPUT */
+ compatible = "regulator-fixed";
+ regulator-name = "vmain_pd";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-always-on;
+ regulator-boot-on;
+ bootph-all;
+ };
+
+ vsys_5v0: regulator-vsys5v0 {
+ /* Output of LM5140 */
+ compatible = "regulator-fixed";
+ regulator-name = "vsys_5v0";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vmain_pd>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ vdd_mmc1: regulator-mmc1 {
+ /* TPS22918DBVR */
+ compatible = "regulator-fixed";
+ regulator-name = "vdd_mmc1";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ enable-active-high;
+ gpio = <&exp1 15 GPIO_ACTIVE_HIGH>;
+ bootph-all;
+ };
+
+ vdd_sd_dv: regulator-TLV71033 {
+ compatible = "regulator-gpio";
+ regulator-name = "tlv71033";
+ pinctrl-names = "default";
+ pinctrl-0 = <&vdd_sd_dv_pins_default>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ vin-supply = <&vsys_5v0>;
+ gpios = <&main_gpio0 70 GPIO_ACTIVE_HIGH>;
+ states = <1800000 0x0>,
+ <3300000 0x1>;
+ };
+
+ vsys_io_1v8: regulator-vsys-io-1v8 {
+ compatible = "regulator-fixed";
+ regulator-name = "vsys_io_1v8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ vsys_io_1v2: regulator-vsys-io-1v2 {
+ compatible = "regulator-fixed";
+ regulator-name = "vsys_io_1v2";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+};
+
+&main_pmx0 {
+
+ main_i2c0_pins_default: main-i2c0-default-pins {
+ pinctrl-single,pins = <
+ J722S_IOPAD(0x01e0, PIN_INPUT_PULLUP, 0) /* (D23) I2C0_SCL */
+ J722S_IOPAD(0x01e4, PIN_INPUT_PULLUP, 0) /* (B22) I2C0_SDA */
+ >;
+ bootph-all;
+ };
+
+ main_uart0_pins_default: main-uart0-default-pins {
+ pinctrl-single,pins = <
+ J722S_IOPAD(0x01c8, PIN_INPUT, 0) /* (A22) UART0_RXD */
+ J722S_IOPAD(0x01cc, PIN_OUTPUT, 0) /* (B22) UART0_TXD */
+ >;
+ bootph-all;
+ };
+
+ vdd_sd_dv_pins_default: vdd-sd-dv-default-pins {
+ pinctrl-single,pins = <
+ J722S_IOPAD(0x0120, PIN_INPUT, 7) /* (F27) MMC2_CMD.GPIO0_70 */
+ >;
+ bootph-all;
+ };
+
+ main_mmc1_pins_default: main-mmc1-default-pins {
+ pinctrl-single,pins = <
+ J722S_IOPAD(0x023c, PIN_INPUT, 0) /* (H22) MMC1_CMD */
+ J722S_IOPAD(0x0234, PIN_OUTPUT, 0) /* (H24) MMC1_CLK */
+ J722S_IOPAD(0x0230, PIN_INPUT, 0) /* (H23) MMC1_DAT0 */
+ J722S_IOPAD(0x022c, PIN_INPUT_PULLUP, 0) /* (H20) MMC1_DAT1 */
+ J722S_IOPAD(0x0228, PIN_INPUT_PULLUP, 0) /* (J23) MMC1_DAT2 */
+ J722S_IOPAD(0x0224, PIN_INPUT_PULLUP, 0) /* (H25) MMC1_DAT3 */
+ J722S_IOPAD(0x0240, PIN_INPUT, 0) /* (B24) MMC1_SDCD */
+ >;
+ bootph-all;
+ };
+
+ mdio_pins_default: mdio-default-pins {
+ pinctrl-single,pins = <
+ J722S_IOPAD(0x0160, PIN_OUTPUT, 0) /* (AC24) MDIO0_MDC */
+ J722S_IOPAD(0x015c, PIN_INPUT, 0) /* (AD25) MDIO0_MDIO */
+ >;
+ };
+
+ ospi0_pins_default: ospi0-default-pins {
+ pinctrl-single,pins = <
+ J722S_IOPAD(0x0000, PIN_OUTPUT, 0) /* (L24) OSPI0_CLK */
+ J722S_IOPAD(0x002c, PIN_OUTPUT, 0) /* (K26) OSPI0_CSn0 */
+ J722S_IOPAD(0x000c, PIN_INPUT, 0) /* (K27) OSPI0_D0 */
+ J722S_IOPAD(0x0010, PIN_INPUT, 0) /* (L27) OSPI0_D1 */
+ J722S_IOPAD(0x0014, PIN_INPUT, 0) /* (L26) OSPI0_D2 */
+ J722S_IOPAD(0x0018, PIN_INPUT, 0) /* (L25) OSPI0_D3 */
+ J722S_IOPAD(0x001c, PIN_INPUT, 0) /* (L21) OSPI0_D4 */
+ J722S_IOPAD(0x0020, PIN_INPUT, 0) /* (M26) OSPI0_D5 */
+ J722S_IOPAD(0x0024, PIN_INPUT, 0) /* (N27) OSPI0_D6 */
+ J722S_IOPAD(0x0028, PIN_INPUT, 0) /* (M27) OSPI0_D7 */
+ J722S_IOPAD(0x0008, PIN_INPUT, 0) /* (L22) OSPI0_DQS */
+ >;
+ bootph-all;
+ };
+
+ rgmii1_pins_default: rgmii1-default-pins {
+ pinctrl-single,pins = <
+ J722S_IOPAD(0x014c, PIN_INPUT, 0) /* (AC25) RGMII1_RD0 */
+ J722S_IOPAD(0x0150, PIN_INPUT, 0) /* (AD27) RGMII1_RD1 */
+ J722S_IOPAD(0x0154, PIN_INPUT, 0) /* (AE24) RGMII1_RD2 */
+ J722S_IOPAD(0x0158, PIN_INPUT, 0) /* (AE26) RGMII1_RD3 */
+ J722S_IOPAD(0x0148, PIN_INPUT, 0) /* (AE27) RGMII1_RXC */
+ J722S_IOPAD(0x0144, PIN_INPUT, 0) /* (AD23) RGMII1_RX_CTL */
+ J722S_IOPAD(0x0134, PIN_OUTPUT, 0) /* (AF27) RGMII1_TD0 */
+ J722S_IOPAD(0x0138, PIN_OUTPUT, 0) /* (AE23) RGMII1_TD1 */
+ J722S_IOPAD(0x013c, PIN_OUTPUT, 0) /* (AG25) RGMII1_TD2 */
+ J722S_IOPAD(0x0140, PIN_OUTPUT, 0) /* (AF24) RGMII1_TD3 */
+ J722S_IOPAD(0x0130, PIN_OUTPUT, 0) /* (AG26) RGMII1_TXC */
+ J722S_IOPAD(0x012c, PIN_OUTPUT, 0) /* (AF25) RGMII1_TX_CTL */
+ >;
+ };
+};
+
+&cpsw3g {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&rgmii1_pins_default>;
+};
+
+&cpsw3g_mdio {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&mdio_pins_default>;
+
+ cpsw3g_phy0: ethernet-phy@0 {
+ reg = <0>;
+ ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>;
+ ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
+ ti,min-output-impedance;
+ };
+};
+
+&cpsw_port1 {
+ phy-mode = "rgmii-rxid";
+ phy-handle = <&cpsw3g_phy0>;
+};
+
+&cpsw_port2 {
+ status = "disabled";
+};
+
+&main_gpio1 {
+ status = "okay";
+};
+
+&main_uart0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_uart0_pins_default>;
+ status = "okay";
+ bootph-all;
+};
+
+&mcu_pmx0 {
+
+ wkup_uart0_pins_default: wkup-uart0-default-pins {
+ pinctrl-single,pins = <
+ J722S_MCU_IOPAD(0x02c, PIN_INPUT, 0) /* (C7) WKUP_UART0_CTSn */
+ J722S_MCU_IOPAD(0x030, PIN_OUTPUT, 0) /* (C6) WKUP_UART0_RTSn */
+ J722S_MCU_IOPAD(0x024, PIN_INPUT, 0) /* (D8) WKUP_UART0_RXD */
+ J722S_MCU_IOPAD(0x028, PIN_OUTPUT, 0) /* (D7) WKUP_UART0_TXD */
+ >;
+ bootph-all;
+ };
+
+ wkup_i2c0_pins_default: wkup-i2c0-default-pins {
+ pinctrl-single,pins = <
+ J722S_MCU_IOPAD(0x04c, PIN_INPUT_PULLUP, 0) /* (C7) WKUP_I2C0_SCL */
+ J722S_MCU_IOPAD(0x050, PIN_INPUT_PULLUP, 0) /* (C6) WKUP_I2C1_SDA */
+ >;
+ bootph-all;
+ };
+};
+
+&wkup_uart0 {
+ /* WKUP UART0 is used by Device Manager firmware */
+ pinctrl-names = "default";
+ pinctrl-0 = <&wkup_uart0_pins_default>;
+ status = "reserved";
+ bootph-all;
+};
+
+&wkup_i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&wkup_i2c0_pins_default>;
+ clock-frequency = <400000>;
+ status = "okay";
+ bootph-all;
+};
+
+&main_i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_i2c0_pins_default>;
+ clock-frequency = <400000>;
+ status = "okay";
+ bootph-all;
+
+ exp1: gpio@23 {
+ compatible = "ti,tca6424";
+ reg = <0x23>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-line-names = "TRC_MUX_SEL", "OSPI/ONAND_MUX_SEL",
+ "MCASP1_FET_SEL", "CTRL_PM_I2C_OE#",
+ "CSI_VIO_SEL", "USB2.0_MUX_SEL",
+ "CSI01_MUX_SEL_2", "CSI23_MUX_SEL_2",
+ "LMK1_OE1", "LMK1_OE0",
+ "LMK2_OE0", "LMK2_OE1",
+ "GPIO_RGMII1_RST#", "GPIO_AUD_RSTn",
+ "GPIO_eMMC_RSTn", "GPIO_uSD_PWR_EN",
+ "USER_LED2", "MCAN0_STB",
+ "PCIe0_1L_RC_RSTz", "PCIe0_1L_PRSNT#",
+ "ENET1_EXP_SPARE2", "ENET1_EXP_PWRDN",
+ "PD_I2ENET1_I2CMUX_SELC_IRQ", "ENET1_EXP_RESETZ";
+ };
+};
+
+&ospi0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&ospi0_pins_default>;
+ status = "okay";
+
+ flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0x0>;
+ spi-tx-bus-width = <8>;
+ spi-rx-bus-width = <8>;
+ spi-max-frequency = <25000000>;
+ cdns,tshsl-ns = <60>;
+ cdns,tsd2d-ns = <60>;
+ cdns,tchsh-ns = <60>;
+ cdns,tslch-ns = <60>;
+ cdns,read-delay = <4>;
+ bootph-all;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ label = "ospi.tiboot3";
+ reg = <0x00 0x80000>;
+ };
+
+ partition@80000 {
+ label = "ospi.tispl";
+ reg = <0x80000 0x200000>;
+ };
+
+ partition@280000 {
+ label = "ospi.u-boot";
+ reg = <0x280000 0x400000>;
+ };
+
+ partition@680000 {
+ label = "ospi.env";
+ reg = <0x680000 0x40000>;
+ };
+
+ partition@6c0000 {
+ label = "ospi.env.backup";
+ reg = <0x6c0000 0x40000>;
+ };
+
+ partition@800000 {
+ label = "ospi.rootfs";
+ reg = <0x800000 0x37c0000>;
+ };
+
+ partition@3fc0000 {
+ label = "ospi.phypattern";
+ reg = <0x3fc0000 0x40000>;
+ };
+ };
+ };
+
+};
+
+&sdhci1 {
+ /* SD/MMC */
+ vmmc-supply = <&vdd_mmc1>;
+ vqmmc-supply = <&vdd_sd_dv>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_mmc1_pins_default>;
+ ti,driver-strength-ohm = <50>;
+ disable-wp;
+ no-1-8-v;
+ status = "okay";
+ bootph-all;
+};
diff --git a/arch/arm64/boot/dts/ti/k3-j722s.dtsi b/arch/arm64/boot/dts/ti/k3-j722s.dtsi
new file mode 100644
index 000000000000..c75744edb143
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-j722s.dtsi
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Device Tree Source for J722S SoC Family
+ *
+ * Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/soc/ti,sci_pm_domain.h>
+
+#include "k3-am62p5.dtsi"
+
+/ {
+ model = "Texas Instruments K3 J722S SoC";
+ compatible = "ti,j722s";
+
+ cbass_main: bus@f0000 {
+ compatible = "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ ranges = <0x00 0x000f0000 0x00 0x000f0000 0x00 0x00030000>, /* Main MMRs */
+ <0x00 0x00420000 0x00 0x00420000 0x00 0x00001000>, /* ESM0 */
+ <0x00 0x00600000 0x00 0x00600000 0x00 0x00001100>, /* GPIO */
+ <0x00 0x00703000 0x00 0x00703000 0x00 0x00000200>, /* USB0 debug trace */
+ <0x00 0x0070c000 0x00 0x0070c000 0x00 0x00000200>, /* USB1 debug trace */
+ <0x00 0x00a40000 0x00 0x00a40000 0x00 0x00000800>, /* Timesync router */
+ <0x00 0x01000000 0x00 0x01000000 0x00 0x01b28400>, /* First peripheral window */
+ <0x00 0x08000000 0x00 0x08000000 0x00 0x00200000>, /* Main CPSW */
+ <0x00 0x0d000000 0x00 0x0d000000 0x00 0x00800000>, /* PCIE_0 */
+ <0x00 0x0e000000 0x00 0x0e000000 0x00 0x01d20000>, /* Second peripheral window */
+ <0x00 0x0fd80000 0x00 0x0fd80000 0x00 0x00080000>, /* GPU */
+ <0x00 0x0fd20000 0x00 0x0fd20000 0x00 0x00000100>, /* JPEGENC0_CORE */
+ <0x00 0x0fd20200 0x00 0x0fd20200 0x00 0x00000200>, /* JPEGENC0_CORE_MMU */
+ <0x00 0x20000000 0x00 0x20000000 0x00 0x0a008000>, /* Third peripheral window */
+ <0x00 0x30040000 0x00 0x30040000 0x00 0x00080000>, /* PRUSS-M */
+ <0x00 0x301C0000 0x00 0x301C0000 0x00 0x00001000>, /* DPHY-TX */
+ <0x00 0x30101000 0x00 0x30101000 0x00 0x00080100>, /* CSI window */
+ <0x00 0x30200000 0x00 0x30200000 0x00 0x00010000>, /* DSS */
+ <0x00 0x30210000 0x00 0x30210000 0x00 0x00010000>, /* VPU */
+ <0x00 0x30220000 0x00 0x30220000 0x00 0x00010000>, /* DSS1 */
+ <0x00 0x30270000 0x00 0x30270000 0x00 0x00010000>, /* DSI-base1 */
+ <0x00 0x30500000 0x00 0x30500000 0x00 0x00100000>, /* DSI-base2 */
+ <0x00 0x31000000 0x00 0x31000000 0x00 0x00050000>, /* USB0 DWC3 Core window */
+ <0x00 0x31200000 0x00 0x31200000 0x00 0x00040000>, /* USB1 DWC3 Core window */
+ <0x00 0x40900000 0x00 0x40900000 0x00 0x00030000>, /* SA3UL */
+ <0x00 0x43600000 0x00 0x43600000 0x00 0x00010000>, /* SA3 sproxy data */
+ <0x00 0x44043000 0x00 0x44043000 0x00 0x00000fe0>, /* TI SCI DEBUG */
+ <0x00 0x44860000 0x00 0x44860000 0x00 0x00040000>, /* SA3 sproxy config */
+ <0x00 0x48000000 0x00 0x48000000 0x00 0x06408000>, /* DMSS */
+ <0x00 0x60000000 0x00 0x60000000 0x00 0x08000000>, /* FSS0 DAT1 */
+ <0x00 0x68000000 0x00 0x68000000 0x00 0x08000000>, /* PCIe0 DAT0 */
+ <0x00 0x70000000 0x00 0x70000000 0x00 0x00040000>, /* OCSRAM */
+ <0x00 0x78400000 0x00 0x78400000 0x00 0x00008000>, /* MAIN R5FSS0 ATCM */
+ <0x00 0x78500000 0x00 0x78500000 0x00 0x00008000>, /* MAIN R5FSS0 BTCM */
+ <0x00 0x7e000000 0x00 0x7e000000 0x00 0x00200000>, /* C7X_0 L2SRAM */
+ <0x00 0x7e200000 0x00 0x7e200000 0x00 0x00200000>, /* C7X_1 L2SRAM */
+ <0x01 0x00000000 0x01 0x00000000 0x00 0x00310000>, /* A53 PERIPHBASE */
+ <0x05 0x00000000 0x05 0x00000000 0x01 0x00000000>, /* FSS0 DAT3 */
+ <0x06 0x00000000 0x06 0x00000000 0x01 0x00000000>, /* PCIe0 DAT1 */
+
+ /* MCU Domain Range */
+ <0x00 0x04000000 0x00 0x04000000 0x00 0x01ff1400>,
+ <0x00 0x79000000 0x00 0x79000000 0x00 0x00008000>,
+ <0x00 0x79020000 0x00 0x79020000 0x00 0x00008000>,
+ <0x00 0x79100000 0x00 0x79100000 0x00 0x00040000>,
+ <0x00 0x79140000 0x00 0x79140000 0x00 0x00040000>,
+
+ /* Wakeup Domain Range */
+ <0x00 0x00b00000 0x00 0x00b00000 0x00 0x00002400>,
+ <0x00 0x2b000000 0x00 0x2b000000 0x00 0x00300400>,
+ <0x00 0x43000000 0x00 0x43000000 0x00 0x00020000>,
+ <0x00 0x78000000 0x00 0x78000000 0x00 0x00008000>,
+ <0x00 0x78100000 0x00 0x78100000 0x00 0x00008000>;
+ };
+};
+
+/* Main domain overrides */
+
+&inta_main_dmss {
+ ti,interrupt-ranges = <7 71 21>;
+};
+
+&oc_sram {
+ reg = <0x00 0x70000000 0x00 0x40000>;
+ ranges = <0x00 0x00 0x70000000 0x40000>;
+};
diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts
index f34b92acc56d..81fd7afac8c5 100644
--- a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts
+++ b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
- * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*
* EVM Board Schematics: https://www.ti.com/lit/zip/sprr458
*/
@@ -31,6 +31,7 @@
memory@80000000 {
device_type = "memory";
+ bootph-all;
/* 32G RAM */
reg = <0x00 0x80000000 0x00 0x80000000>,
<0x08 0x80000000 0x07 0x80000000>;
@@ -296,6 +297,13 @@
>;
};
+ main_i2c5_pins_default: main-i2c5-default-pins {
+ pinctrl-single,pins = <
+ J784S4_IOPAD(0x01c, PIN_INPUT, 8) /* (AG34) MCAN15_TX.I2C5_SCL */
+ J784S4_IOPAD(0x018, PIN_INPUT, 8) /* (AK36) MCAN14_RX.I2C5_SDA */
+ >;
+ };
+
main_mmc1_pins_default: main-mmc1-default-pins {
bootph-all;
pinctrl-single,pins = <
@@ -335,8 +343,6 @@
wkup_uart0_pins_default: wkup-uart0-default-pins {
bootph-all;
pinctrl-single,pins = <
- J721S2_WKUP_IOPAD(0x070, PIN_INPUT, 0) /* (L37) WKUP_GPIO0_6.WKUP_UART0_CTSn */
- J721S2_WKUP_IOPAD(0x074, PIN_INPUT, 0) /* (L36) WKUP_GPIO0_7.WKUP_UART0_RTSn */
J721S2_WKUP_IOPAD(0x048, PIN_INPUT, 0) /* (K35) WKUP_UART0_RXD */
J721S2_WKUP_IOPAD(0x04c, PIN_INPUT, 0) /* (K34) WKUP_UART0_TXD */
>;
@@ -760,6 +766,24 @@
};
};
+&main_i2c5 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&main_i2c5_pins_default>;
+ clock-frequency = <400000>;
+ status = "okay";
+
+ exp5: gpio@20 {
+ compatible = "ti,tca6408";
+ reg = <0x20>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-line-names = "CSI2_EXP_RSTZ", "CSI2_EXP_A_GPIO0",
+ "CSI2_EXP_A_GPIO1", "CSI2_EXP_A_GPIO3",
+ "CSI2_EXP_B_GPIO1", "CSI2_EXP_B_GPIO2",
+ "CSI2_EXP_B_GPIO3", "CSI2_EXP_B_GPIO4";
+ };
+};
+
&main_sdhci0 {
bootph-all;
/* eMMC */
diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi
index f2b720ed1e4f..b67c37460a73 100644
--- a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J784S4 SoC Family Main Domain peripherals
*
- * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#include <dt-bindings/mux/mux.h>
@@ -52,12 +52,12 @@
compatible = "reg-mux";
reg = <0x00004080 0x30>;
#mux-control-cells = <1>;
- mux-reg-masks = <0x4080 0x3>, <0x4084 0x3>, /* SERDES0 lane0/1 select */
- <0x4088 0x3>, <0x408c 0x3>, /* SERDES0 lane2/3 select */
- <0x4090 0x3>, <0x4094 0x3>, /* SERDES1 lane0/1 select */
- <0x4098 0x3>, <0x409c 0x3>, /* SERDES1 lane2/3 select */
- <0x40a0 0x3>, <0x40a4 0x3>, /* SERDES2 lane0/1 select */
- <0x40a8 0x3>, <0x40ac 0x3>; /* SERDES2 lane2/3 select */
+ mux-reg-masks = <0x0 0x3>, <0x4 0x3>, /* SERDES0 lane0/1 select */
+ <0x8 0x3>, <0xc 0x3>, /* SERDES0 lane2/3 select */
+ <0x10 0x3>, <0x14 0x3>, /* SERDES1 lane0/1 select */
+ <0x18 0x3>, <0x1c 0x3>, /* SERDES1 lane2/3 select */
+ <0x20 0x3>, <0x24 0x3>, /* SERDES2 lane0/1 select */
+ <0x28 0x3>, <0x2c 0x3>; /* SERDES2 lane2/3 select */
idle-states = <J784S4_SERDES0_LANE0_PCIE1_LANE0>,
<J784S4_SERDES0_LANE1_PCIE1_LANE1>,
<J784S4_SERDES0_LANE2_IP3_UNUSED>,
@@ -662,6 +662,204 @@
status = "disabled";
};
+ ti_csi2rx0: ticsi2rx@4500000 {
+ compatible = "ti,j721e-csi2rx-shim";
+ reg = <0x00 0x04500000 0x00 0x00001000>;
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dmas = <&main_bcdma_csi 0 0x4940 0>;
+ dma-names = "rx0";
+ power-domains = <&k3_pds 72 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+
+ cdns_csi2rx0: csi-bridge@4504000 {
+ compatible = "ti,j721e-csi2rx", "cdns,csi2rx";
+ reg = <0x00 0x04504000 0x00 0x00001000>;
+ clocks = <&k3_clks 72 2>, <&k3_clks 72 0>, <&k3_clks 72 2>,
+ <&k3_clks 72 2>, <&k3_clks 72 3>, <&k3_clks 72 3>;
+ clock-names = "sys_clk", "p_clk", "pixel_if0_clk",
+ "pixel_if1_clk", "pixel_if2_clk", "pixel_if3_clk";
+ phys = <&dphy0>;
+ phy-names = "dphy";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ csi0_port0: port@0 {
+ reg = <0>;
+ status = "disabled";
+ };
+
+ csi0_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ csi0_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ csi0_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ csi0_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+ };
+ };
+ };
+
+ ti_csi2rx1: ticsi2rx@4510000 {
+ compatible = "ti,j721e-csi2rx-shim";
+ reg = <0x00 0x04510000 0x00 0x1000>;
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dmas = <&main_bcdma_csi 0 0x4960 0>;
+ dma-names = "rx0";
+ power-domains = <&k3_pds 73 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+
+ cdns_csi2rx1: csi-bridge@4514000 {
+ compatible = "ti,j721e-csi2rx", "cdns,csi2rx";
+ reg = <0x00 0x04514000 0x00 0x00001000>;
+ clocks = <&k3_clks 73 2>, <&k3_clks 73 0>, <&k3_clks 73 2>,
+ <&k3_clks 73 2>, <&k3_clks 73 3>, <&k3_clks 73 3>;
+ clock-names = "sys_clk", "p_clk", "pixel_if0_clk",
+ "pixel_if1_clk", "pixel_if2_clk", "pixel_if3_clk";
+ phys = <&dphy1>;
+ phy-names = "dphy";
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ csi1_port0: port@0 {
+ reg = <0>;
+ status = "disabled";
+ };
+
+ csi1_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ csi1_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ csi1_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ csi1_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+ };
+ };
+ };
+
+ ti_csi2rx2: ticsi2rx@4520000 {
+ compatible = "ti,j721e-csi2rx-shim";
+ reg = <0x00 0x04520000 0x00 0x00001000>;
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dmas = <&main_bcdma_csi 0 0x4980 0>;
+ dma-names = "rx0";
+ power-domains = <&k3_pds 74 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+
+ cdns_csi2rx2: csi-bridge@4524000 {
+ compatible = "ti,j721e-csi2rx", "cdns,csi2rx";
+ reg = <0x00 0x04524000 0x00 0x00001000>;
+ clocks = <&k3_clks 74 2>, <&k3_clks 74 0>, <&k3_clks 74 2>,
+ <&k3_clks 74 2>, <&k3_clks 74 3>, <&k3_clks 74 3>;
+ clock-names = "sys_clk", "p_clk", "pixel_if0_clk",
+ "pixel_if1_clk", "pixel_if2_clk", "pixel_if3_clk";
+ phys = <&dphy2>;
+ phy-names = "dphy";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ csi2_port0: port@0 {
+ reg = <0>;
+ status = "disabled";
+ };
+
+ csi2_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ csi2_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ csi2_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ csi2_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+ };
+ };
+ };
+
+ dphy0: phy@4580000 {
+ compatible = "cdns,dphy-rx";
+ reg = <0x00 0x04580000 0x00 0x00001100>;
+ #phy-cells = <0>;
+ power-domains = <&k3_pds 212 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+ };
+
+ dphy1: phy@4590000 {
+ compatible = "cdns,dphy-rx";
+ reg = <0x00 0x04590000 0x00 0x00001100>;
+ #phy-cells = <0>;
+ power-domains = <&k3_pds 213 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+ };
+
+ dphy2: phy@45a0000 {
+ compatible = "cdns,dphy-rx";
+ reg = <0x00 0x045a0000 0x00 0x00001100>;
+ #phy-cells = <0>;
+ power-domains = <&k3_pds 214 TI_SCI_PD_EXCLUSIVE>;
+ status = "disabled";
+ };
+
+ vpu0: video-codec@4210000 {
+ compatible = "ti,j721s2-wave521c", "cnm,wave521c";
+ reg = <0x00 0x4210000 0x00 0x10000>;
+ interrupts = <GIC_SPI 182 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&k3_clks 241 2>;
+ power-domains = <&k3_pds 241 TI_SCI_PD_EXCLUSIVE>;
+ };
+
+ vpu1: video-codec@4220000 {
+ compatible = "ti,j721s2-wave521c", "cnm,wave521c";
+ reg = <0x00 0x4220000 0x00 0x10000>;
+ interrupts = <GIC_SPI 183 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&k3_clks 242 2>;
+ power-domains = <&k3_pds 242 TI_SCI_PD_EXCLUSIVE>;
+ };
+
main_sdhci0: mmc@4f80000 {
compatible = "ti,j721e-sdhci-8bit";
reg = <0x00 0x04f80000 0x00 0x1000>,
@@ -1224,7 +1422,6 @@
ti,sci-dev-id = <281>;
ti,sci-rm-range-rchan = <0x21>;
ti,sci-rm-range-tchan = <0x22>;
- status = "disabled";
};
cpts@310d0000 {
diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-mcu-wakeup.dtsi
index 3902a921d7e5..77a8d99139ec 100644
--- a/arch/arm64/boot/dts/ti/k3-j784s4-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j784s4-mcu-wakeup.dtsi
@@ -1,8 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J784S4 SoC Family MCU/WAKEUP Domain peripherals
*
- * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
&cbass_mcu_wakeup {
@@ -628,7 +628,7 @@
compatible = "ti,j7200-vtm";
reg = <0x00 0x42040000 0x00 0x350>,
<0x00 0x42050000 0x00 0x350>;
- power-domains = <&k3_pds 154 TI_SCI_PD_SHARED>;
+ power-domains = <&k3_pds 243 TI_SCI_PD_SHARED>;
#thermal-sensor-cells = <1>;
};
diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-thermal.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-thermal.dtsi
index f7b1a15b8fa0..e3ef61c1658f 100644
--- a/arch/arm64/boot/dts/ti/k3-j784s4-thermal.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j784s4-thermal.dtsi
@@ -1,4 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
+ */
#include <dt-bindings/thermal/thermal.h>
diff --git a/arch/arm64/boot/dts/ti/k3-j784s4.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4.dtsi
index 4398c3a463e1..6e2e92ffe745 100644
--- a/arch/arm64/boot/dts/ti/k3-j784s4.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j784s4.dtsi
@@ -1,10 +1,10 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
/*
* Device Tree Source for J784S4 SoC Family
*
* TRM (SPRUJ43 JULY 2022): https://www.ti.com/lit/zip/spruj52
*
- * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2022-2024 Texas Instruments Incorporated - https://www.ti.com/
*
*/
@@ -235,6 +235,8 @@
ranges = <0x00 0x00100000 0x00 0x00100000 0x00 0x00020000>, /* ctrl mmr */
<0x00 0x00600000 0x00 0x00600000 0x00 0x00031100>, /* GPIO */
<0x00 0x01000000 0x00 0x01000000 0x00 0x0d000000>, /* Most peripherals */
+ <0x00 0x04210000 0x00 0x04210000 0x00 0x00010000>, /* VPU0 */
+ <0x00 0x04220000 0x00 0x04220000 0x00 0x00010000>, /* VPU1 */
<0x00 0x0d000000 0x00 0x0d000000 0x00 0x01000000>, /* PCIe Core*/
<0x00 0x10000000 0x00 0x10000000 0x00 0x08000000>, /* PCIe0 DAT0 */
<0x00 0x18000000 0x00 0x18000000 0x00 0x08000000>, /* PCIe1 DAT0 */
diff --git a/arch/arm64/boot/dts/ti/k3-pinctrl.h b/arch/arm64/boot/dts/ti/k3-pinctrl.h
index 2a4e0e084d69..4cd2df467d0b 100644
--- a/arch/arm64/boot/dts/ti/k3-pinctrl.h
+++ b/arch/arm64/boot/dts/ti/k3-pinctrl.h
@@ -1,9 +1,9 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
/*
* This header provides constants for pinctrl bindings for TI's K3 SoC
* family.
*
- * Copyright (C) 2018-2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2018-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#ifndef DTS_ARM64_TI_K3_PINCTRL_H
#define DTS_ARM64_TI_K3_PINCTRL_H
@@ -59,6 +59,9 @@
#define J721S2_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode))
#define J721S2_WKUP_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode))
+#define J722S_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode))
+#define J722S_MCU_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode))
+
#define J784S4_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode))
#define J784S4_WKUP_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode))
diff --git a/arch/arm64/boot/dts/ti/k3-serdes.h b/arch/arm64/boot/dts/ti/k3-serdes.h
index 21b4886c47ba..a011ad893b44 100644
--- a/arch/arm64/boot/dts/ti/k3-serdes.h
+++ b/arch/arm64/boot/dts/ti/k3-serdes.h
@@ -1,8 +1,8 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
/*
* This header provides constants for SERDES MUX for TI SoCs
*
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Copyright (C) 2023-2024 Texas Instruments Incorporated - https://www.ti.com/
*/
#ifndef DTS_ARM64_TI_K3_SERDES_H
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-clk-ccf.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp-clk-ccf.dtsi
index ccaca29200bb..dd4569e7bd95 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-clk-ccf.dtsi
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-clk-ccf.dtsi
@@ -230,18 +230,30 @@
&uart0 {
clocks = <&zynqmp_clk UART0_REF>, <&zynqmp_clk LPD_LSBUS>;
+ assigned-clocks = <&zynqmp_clk UART0_REF>;
};
&uart1 {
clocks = <&zynqmp_clk UART1_REF>, <&zynqmp_clk LPD_LSBUS>;
+ assigned-clocks = <&zynqmp_clk UART1_REF>;
};
-&dwc3_0 {
+&usb0 {
clocks = <&zynqmp_clk USB0_BUS_REF>, <&zynqmp_clk USB3_DUAL_REF>;
+ assigned-clocks = <&zynqmp_clk USB0_BUS_REF>, <&zynqmp_clk USB3_DUAL_REF>;
};
-&dwc3_1 {
+&dwc3_0 {
+ clocks = <&zynqmp_clk USB3_DUAL_REF>;
+};
+
+&usb1 {
clocks = <&zynqmp_clk USB1_BUS_REF>, <&zynqmp_clk USB3_DUAL_REF>;
+ assigned-clocks = <&zynqmp_clk USB1_BUS_REF>, <&zynqmp_clk USB3_DUAL_REF>;
+};
+
+&dwc3_1 {
+ clocks = <&zynqmp_clk USB3_DUAL_REF>;
};
&watchdog0 {
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revA.dtso b/arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revA.dtso
index 92f4190d564d..d7535a77b45e 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revA.dtso
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revA.dtso
@@ -139,7 +139,7 @@
bus-width = <4>;
};
-&gem3 { /* required by spec */
+&gem3 {
status = "okay";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_gem3_default>;
@@ -166,9 +166,28 @@
};
};
-&pinctrl0 { /* required by spec */
+&pinctrl0 {
status = "okay";
+ pinctrl_gpio0_default: gpio0-default {
+ conf {
+ groups = "gpio0_38_grp";
+ bias-pull-up;
+ power-source = <IO_STANDARD_LVCMOS18>;
+ };
+
+ mux {
+ groups = "gpio0_38_grp";
+ function = "gpio0";
+ };
+
+ conf-tx {
+ pins = "MIO38";
+ bias-disable;
+ output-enable;
+ };
+ };
+
pinctrl_uart1_default: uart1-default {
conf {
groups = "uart1_9_grp";
@@ -185,6 +204,7 @@
conf-tx {
pins = "MIO36";
bias-disable;
+ output-enable;
};
mux {
@@ -207,7 +227,7 @@
};
};
- pinctrl_i2c1_gpio: i2c1-gpio {
+ pinctrl_i2c1_gpio: i2c1-gpio-grp {
conf {
groups = "gpio0_24_grp", "gpio0_25_grp";
slew-rate = <SLEW_RATE_SLOW>;
@@ -236,6 +256,7 @@
conf-bootstrap {
pins = "MIO71", "MIO73", "MIO75";
bias-disable;
+ output-enable;
low-power-disable;
};
@@ -243,6 +264,7 @@
pins = "MIO64", "MIO65", "MIO66",
"MIO67", "MIO68", "MIO69";
bias-disable;
+ output-enable;
low-power-enable;
};
@@ -251,6 +273,7 @@
slew-rate = <SLEW_RATE_SLOW>;
power-source = <IO_STANDARD_LVCMOS18>;
bias-disable;
+ output-enable;
};
mux-mdio {
@@ -281,6 +304,7 @@
pins = "MIO54", "MIO56", "MIO57", "MIO58", "MIO59",
"MIO60", "MIO61", "MIO62", "MIO63";
bias-disable;
+ output-enable;
drive-strength = <4>;
slew-rate = <SLEW_RATE_SLOW>;
};
@@ -319,6 +343,12 @@
};
};
+&gpio {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_gpio0_default>;
+};
+
&uart1 {
status = "okay";
pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revB.dtso b/arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revB.dtso
index f88b71f5b07a..a7b8fffad499 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revB.dtso
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revB.dtso
@@ -94,6 +94,7 @@
pinctrl-0 = <&pinctrl_usb0_default>;
phy-names = "usb3-phy";
phys = <&psgtr 2 PHY_TYPE_USB3 0 1>;
+ assigned-clock-rates = <250000000>, <20000000>;
};
&dwc3_0 {
@@ -122,7 +123,7 @@
bus-width = <4>;
};
-&gem3 { /* required by spec */
+&gem3 {
status = "okay";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_gem3_default>;
@@ -149,9 +150,28 @@
};
};
-&pinctrl0 { /* required by spec */
+&pinctrl0 {
status = "okay";
+ pinctrl_gpio0_default: gpio0-default {
+ conf {
+ groups = "gpio0_38_grp";
+ bias-pull-up;
+ power-source = <IO_STANDARD_LVCMOS18>;
+ };
+
+ mux {
+ groups = "gpio0_38_grp";
+ function = "gpio0";
+ };
+
+ conf-tx {
+ pins = "MIO38";
+ bias-disable;
+ output-enable;
+ };
+ };
+
pinctrl_uart1_default: uart1-default {
conf {
groups = "uart1_9_grp";
@@ -168,6 +188,7 @@
conf-tx {
pins = "MIO36";
bias-disable;
+ output-enable;
};
mux {
@@ -190,7 +211,7 @@
};
};
- pinctrl_i2c1_gpio: i2c1-gpio {
+ pinctrl_i2c1_gpio: i2c1-gpio-grp {
conf {
groups = "gpio0_24_grp", "gpio0_25_grp";
slew-rate = <SLEW_RATE_SLOW>;
@@ -219,6 +240,7 @@
conf-bootstrap {
pins = "MIO71", "MIO73", "MIO75";
bias-disable;
+ output-enable;
low-power-disable;
};
@@ -226,6 +248,7 @@
pins = "MIO64", "MIO65", "MIO66",
"MIO67", "MIO68", "MIO69";
bias-disable;
+ output-enable;
low-power-enable;
};
@@ -234,6 +257,7 @@
slew-rate = <SLEW_RATE_SLOW>;
power-source = <IO_STANDARD_LVCMOS18>;
bias-disable;
+ output-enable;
};
mux-mdio {
@@ -264,6 +288,7 @@
pins = "MIO54", "MIO56", "MIO57", "MIO58", "MIO59",
"MIO60", "MIO61", "MIO62", "MIO63";
bias-disable;
+ output-enable;
drive-strength = <4>;
slew-rate = <SLEW_RATE_SLOW>;
};
@@ -302,6 +327,12 @@
};
};
+&gpio {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_gpio0_default>;
+};
+
&uart1 {
status = "okay";
pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm015-dc1.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm015-dc1.dts
index 73491626e01e..6aff22d43361 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm015-dc1.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm015-dc1.dts
@@ -148,7 +148,7 @@
};
};
- pinctrl_i2c1_gpio: i2c1-gpio {
+ pinctrl_i2c1_gpio: i2c1-gpio-grp {
mux {
groups = "gpio0_36_grp", "gpio0_37_grp";
function = "gpio0";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts
index f767708fb50d..1850325e1d6c 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts
@@ -219,7 +219,7 @@
};
};
- pinctrl_i2c0_gpio: i2c0-gpio {
+ pinctrl_i2c0_gpio: i2c0-gpio-grp {
mux {
groups = "gpio0_6_grp", "gpio0_7_grp";
function = "gpio0";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm019-dc5.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm019-dc5.dts
index b1857e17ab7e..53aa3dca1dca 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm019-dc5.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm019-dc5.dts
@@ -125,7 +125,7 @@
};
};
- pinctrl_i2c0_gpio: i2c0-gpio {
+ pinctrl_i2c0_gpio: i2c0-gpio-grp {
mux {
groups = "gpio0_74_grp", "gpio0_75_grp";
function = "gpio0";
@@ -152,7 +152,7 @@
};
};
- pinctrl_i2c1_gpio: i2c1-gpio {
+ pinctrl_i2c1_gpio: i2c1-gpio-grp {
mux {
groups = "gpio0_76_grp", "gpio0_77_grp";
function = "gpio0";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts
index 52f998c22538..c5945067cd57 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts
@@ -275,7 +275,7 @@
};
};
- pinctrl_i2c1_gpio: i2c1-gpio {
+ pinctrl_i2c1_gpio: i2c1-gpio-grp {
mux {
groups = "gpio0_4_grp", "gpio0_5_grp";
function = "gpio0";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zcu102-revA.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zcu102-revA.dts
index 84952c14f021..ad8f23a0ec67 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-zcu102-revA.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-zcu102-revA.dts
@@ -603,7 +603,7 @@
reg = <0x5d>;
temperature-stability = <50>; /* copy from zc702 */
factory-fout = <156250000>;
- clock-frequency = <148500000>;
+ clock-frequency = <156250000>;
clock-output-names = "si570_mgt";
};
};
@@ -689,7 +689,7 @@
};
};
- pinctrl_i2c0_gpio: i2c0-gpio {
+ pinctrl_i2c0_gpio: i2c0-gpio-grp {
mux {
groups = "gpio0_14_grp", "gpio0_15_grp";
function = "gpio0";
@@ -716,7 +716,7 @@
};
};
- pinctrl_i2c1_gpio: i2c1-gpio {
+ pinctrl_i2c1_gpio: i2c1-gpio-grp {
mux {
groups = "gpio0_16_grp", "gpio0_17_grp";
function = "gpio0";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revA.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revA.dts
index 5084ddcee00f..b1eca1bb6a63 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revA.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revA.dts
@@ -272,7 +272,7 @@
};
};
- pinctrl_i2c1_gpio: i2c1-gpio {
+ pinctrl_i2c1_gpio: i2c1-gpio-grp {
mux {
groups = "gpio0_16_grp", "gpio0_17_grp";
function = "gpio0";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revC.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revC.dts
index b273bd1d920a..ddc74d963a05 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revC.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revC.dts
@@ -284,7 +284,7 @@
};
};
- pinctrl_i2c1_gpio: i2c1-gpio {
+ pinctrl_i2c1_gpio: i2c1-gpio-grp {
mux {
groups = "gpio0_16_grp", "gpio0_17_grp";
function = "gpio0";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zcu106-revA.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zcu106-revA.dts
index 50c384aa253e..7beedd730f94 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-zcu106-revA.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-zcu106-revA.dts
@@ -605,7 +605,7 @@
reg = <0x5d>;
temperature-stability = <50>; /* copy from zc702 */
factory-fout = <156250000>;
- clock-frequency = <148500000>;
+ clock-frequency = <156250000>;
clock-output-names = "si570_mgt";
};
};
@@ -700,7 +700,7 @@
};
};
- pinctrl_i2c0_gpio: i2c0-gpio {
+ pinctrl_i2c0_gpio: i2c0-gpio-grp {
mux {
groups = "gpio0_14_grp", "gpio0_15_grp";
function = "gpio0";
@@ -727,7 +727,7 @@
};
};
- pinctrl_i2c1_gpio: i2c1-gpio {
+ pinctrl_i2c1_gpio: i2c1-gpio-grp {
mux {
groups = "gpio0_16_grp", "gpio0_17_grp";
function = "gpio0";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zcu111-revA.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zcu111-revA.dts
index 617cb0405a7d..b67ff7ecf3c3 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-zcu111-revA.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-zcu111-revA.dts
@@ -589,7 +589,7 @@
};
};
- pinctrl_i2c0_gpio: i2c0-gpio {
+ pinctrl_i2c0_gpio: i2c0-gpio-grp {
mux {
groups = "gpio0_14_grp", "gpio0_15_grp";
function = "gpio0";
@@ -616,7 +616,7 @@
};
};
- pinctrl_i2c1_gpio: i2c1-gpio {
+ pinctrl_i2c1_gpio: i2c1-gpio-grp {
mux {
groups = "gpio0_16_grp", "gpio0_17_grp";
function = "gpio0";
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zcu1275-revA.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zcu1275-revA.dts
index c406017b0348..a38c2baeba6c 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-zcu1275-revA.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-zcu1275-revA.dts
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+
/*
- * dts file for Xilinx ZynqMP ZC1275
+ * dts file for Xilinx ZynqMP ZCU1275
*
* (C) Copyright 2017 - 2021, Xilinx, Inc.
*
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
index eaba466804bc..25d20d803230 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
+++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
@@ -24,6 +24,13 @@
#address-cells = <2>;
#size-cells = <2>;
+ options {
+ u-boot {
+ compatible = "u-boot,config";
+ bootscr-address = /bits/ 64 <0x20000000>;
+ };
+ };
+
cpus {
#address-cells = <1>;
#size-cells = <0>;
@@ -180,13 +187,18 @@
};
firmware {
+ optee: optee {
+ compatible = "linaro,optee-tz";
+ method = "smc";
+ };
+
zynqmp_firmware: zynqmp-firmware {
compatible = "xlnx,zynqmp-firmware";
#power-domain-cells = <1>;
method = "smc";
bootph-all;
- zynqmp_power: zynqmp-power {
+ zynqmp_power: power-management {
bootph-all;
compatible = "xlnx,zynqmp-power";
interrupt-parent = <&gic>;
@@ -281,6 +293,7 @@
interrupt-parent = <&gic>;
tx-fifo-depth = <0x40>;
rx-fifo-depth = <0x40>;
+ resets = <&zynqmp_reset ZYNQMP_RESET_CAN0>;
power-domains = <&zynqmp_firmware PD_CAN_0>;
};
@@ -293,6 +306,7 @@
interrupt-parent = <&gic>;
tx-fifo-depth = <0x40>;
rx-fifo-depth = <0x40>;
+ resets = <&zynqmp_reset ZYNQMP_RESET_CAN1>;
power-domains = <&zynqmp_firmware PD_CAN_1>;
};
@@ -326,7 +340,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <128>;
- iommus = <&smmu 0x14e8>;
+ /* iommus = <&smmu 0x14e8>; */
power-domains = <&zynqmp_firmware PD_GDMA>;
};
@@ -339,7 +353,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <128>;
- iommus = <&smmu 0x14e9>;
+ /* iommus = <&smmu 0x14e9>; */
power-domains = <&zynqmp_firmware PD_GDMA>;
};
@@ -352,7 +366,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <128>;
- iommus = <&smmu 0x14ea>;
+ /* iommus = <&smmu 0x14ea>; */
power-domains = <&zynqmp_firmware PD_GDMA>;
};
@@ -365,7 +379,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <128>;
- iommus = <&smmu 0x14eb>;
+ /* iommus = <&smmu 0x14eb>; */
power-domains = <&zynqmp_firmware PD_GDMA>;
};
@@ -378,7 +392,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <128>;
- iommus = <&smmu 0x14ec>;
+ /* iommus = <&smmu 0x14ec>; */
power-domains = <&zynqmp_firmware PD_GDMA>;
};
@@ -391,7 +405,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <128>;
- iommus = <&smmu 0x14ed>;
+ /* iommus = <&smmu 0x14ed>; */
power-domains = <&zynqmp_firmware PD_GDMA>;
};
@@ -404,7 +418,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <128>;
- iommus = <&smmu 0x14ee>;
+ /* iommus = <&smmu 0x14ee>; */
power-domains = <&zynqmp_firmware PD_GDMA>;
};
@@ -417,7 +431,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <128>;
- iommus = <&smmu 0x14ef>;
+ /* iommus = <&smmu 0x14ef>; */
power-domains = <&zynqmp_firmware PD_GDMA>;
};
@@ -462,7 +476,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <64>;
- iommus = <&smmu 0x868>;
+ /* iommus = <&smmu 0x868>; */
power-domains = <&zynqmp_firmware PD_ADMA>;
};
@@ -475,7 +489,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <64>;
- iommus = <&smmu 0x869>;
+ /* iommus = <&smmu 0x869>; */
power-domains = <&zynqmp_firmware PD_ADMA>;
};
@@ -488,7 +502,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <64>;
- iommus = <&smmu 0x86a>;
+ /* iommus = <&smmu 0x86a>; */
power-domains = <&zynqmp_firmware PD_ADMA>;
};
@@ -501,7 +515,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <64>;
- iommus = <&smmu 0x86b>;
+ /* iommus = <&smmu 0x86b>; */
power-domains = <&zynqmp_firmware PD_ADMA>;
};
@@ -514,7 +528,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <64>;
- iommus = <&smmu 0x86c>;
+ /* iommus = <&smmu 0x86c>; */
power-domains = <&zynqmp_firmware PD_ADMA>;
};
@@ -527,7 +541,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <64>;
- iommus = <&smmu 0x86d>;
+ /* iommus = <&smmu 0x86d>; */
power-domains = <&zynqmp_firmware PD_ADMA>;
};
@@ -540,7 +554,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <64>;
- iommus = <&smmu 0x86e>;
+ /* iommus = <&smmu 0x86e>; */
power-domains = <&zynqmp_firmware PD_ADMA>;
};
@@ -553,7 +567,7 @@
clock-names = "clk_main", "clk_apb";
#dma-cells = <1>;
xlnx,bus-width = <64>;
- iommus = <&smmu 0x86f>;
+ /* iommus = <&smmu 0x86f>; */
power-domains = <&zynqmp_firmware PD_ADMA>;
};
@@ -573,7 +587,7 @@
interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
- iommus = <&smmu 0x872>;
+ /* iommus = <&smmu 0x872>; */
power-domains = <&zynqmp_firmware PD_NAND>;
};
@@ -585,7 +599,7 @@
<GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x0 0xff0b0000 0x0 0x1000>;
clock-names = "pclk", "hclk", "tx_clk", "rx_clk", "tsu_clk";
- iommus = <&smmu 0x874>;
+ /* iommus = <&smmu 0x874>; */
power-domains = <&zynqmp_firmware PD_ETH_0>;
resets = <&zynqmp_reset ZYNQMP_RESET_GEM0>;
reset-names = "gem0_rst";
@@ -599,7 +613,7 @@
<GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x0 0xff0c0000 0x0 0x1000>;
clock-names = "pclk", "hclk", "tx_clk", "rx_clk", "tsu_clk";
- iommus = <&smmu 0x875>;
+ /* iommus = <&smmu 0x875>; */
power-domains = <&zynqmp_firmware PD_ETH_1>;
resets = <&zynqmp_reset ZYNQMP_RESET_GEM1>;
reset-names = "gem1_rst";
@@ -613,7 +627,7 @@
<GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x0 0xff0d0000 0x0 0x1000>;
clock-names = "pclk", "hclk", "tx_clk", "rx_clk", "tsu_clk";
- iommus = <&smmu 0x876>;
+ /* iommus = <&smmu 0x876>; */
power-domains = <&zynqmp_firmware PD_ETH_2>;
resets = <&zynqmp_reset ZYNQMP_RESET_GEM2>;
reset-names = "gem2_rst";
@@ -627,7 +641,7 @@
<GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x0 0xff0e0000 0x0 0x1000>;
clock-names = "pclk", "hclk", "tx_clk", "rx_clk", "tsu_clk";
- iommus = <&smmu 0x877>;
+ /* iommus = <&smmu 0x877>; */
power-domains = <&zynqmp_firmware PD_ETH_3>;
resets = <&zynqmp_reset ZYNQMP_RESET_GEM3>;
reset-names = "gem3_rst";
@@ -689,7 +703,7 @@
msi-parent = <&pcie>;
reg = <0x0 0xfd0e0000 0x0 0x1000>,
<0x0 0xfd480000 0x0 0x1000>,
- <0x80 0x00000000 0x0 0x1000000>;
+ <0x80 0x00000000 0x0 0x10000000>;
reg-names = "breg", "pcireg", "cfg";
ranges = <0x02000000 0x00000000 0xe0000000 0x00000000 0xe0000000 0x00000000 0x10000000>,/* non-prefetchable memory */
<0x43000000 0x00000006 0x00000000 0x00000006 0x00000000 0x00000002 0x00000000>;/* prefetchable memory */
@@ -699,7 +713,7 @@
<0x0 0x0 0x0 0x2 &pcie_intc 0x2>,
<0x0 0x0 0x0 0x3 &pcie_intc 0x3>,
<0x0 0x0 0x0 0x4 &pcie_intc 0x4>;
- iommus = <&smmu 0x4d0>;
+ /* iommus = <&smmu 0x4d0>; */
power-domains = <&zynqmp_firmware PD_PCIE>;
pcie_intc: legacy-interrupt-controller {
interrupt-controller;
@@ -720,7 +734,7 @@
<0x0 0xc0000000 0x0 0x8000000>;
#address-cells = <1>;
#size-cells = <0>;
- iommus = <&smmu 0x873>;
+ /* iommus = <&smmu 0x873>; */
power-domains = <&zynqmp_firmware PD_QSPI>;
};
@@ -752,8 +766,7 @@
interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&zynqmp_firmware PD_SATA>;
resets = <&zynqmp_reset ZYNQMP_RESET_SATA>;
- iommus = <&smmu 0x4c0>, <&smmu 0x4c1>,
- <&smmu 0x4c2>, <&smmu 0x4c3>;
+ /* iommus = <&smmu 0x4c0>, <&smmu 0x4c1>, <&smmu 0x4c2>, <&smmu 0x4c3>; */
};
sdhci0: mmc@ff160000 {
@@ -764,7 +777,7 @@
interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x0 0xff160000 0x0 0x1000>;
clock-names = "clk_xin", "clk_ahb";
- iommus = <&smmu 0x870>;
+ /* iommus = <&smmu 0x870>; */
#clock-cells = <1>;
clock-output-names = "clk_out_sd0", "clk_in_sd0";
power-domains = <&zynqmp_firmware PD_SD_0>;
@@ -779,7 +792,7 @@
interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x0 0xff170000 0x0 0x1000>;
clock-names = "clk_xin", "clk_ahb";
- iommus = <&smmu 0x871>;
+ /* iommus = <&smmu 0x871>; */
#clock-cells = <1>;
clock-output-names = "clk_out_sd1", "clk_in_sd1";
power-domains = <&zynqmp_firmware PD_SD_1>;
@@ -912,6 +925,7 @@
status = "disabled";
compatible = "xlnx,zynqmp-dwc3";
reg = <0x0 0xff9d0000 0x0 0x100>;
+ clock-names = "bus_clk", "ref_clk";
power-domains = <&zynqmp_firmware PD_USB_0>;
resets = <&zynqmp_reset ZYNQMP_RESET_USB0_CORERESET>,
<&zynqmp_reset ZYNQMP_RESET_USB0_HIBERRESET>,
@@ -922,14 +936,15 @@
dwc3_0: usb@fe200000 {
compatible = "snps,dwc3";
+ status = "disabled";
reg = <0x0 0xfe200000 0x0 0x40000>;
interrupt-parent = <&gic>;
interrupt-names = "host", "peripheral", "otg";
interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
- clock-names = "bus_early", "ref";
- iommus = <&smmu 0x860>;
+ clock-names = "ref";
+ /* iommus = <&smmu 0x860>; */
snps,quirk-frame-length-adjustment = <0x20>;
snps,resume-hs-terminations;
/* dma-coherent; */
@@ -942,6 +957,7 @@
status = "disabled";
compatible = "xlnx,zynqmp-dwc3";
reg = <0x0 0xff9e0000 0x0 0x100>;
+ clock-names = "bus_clk", "ref_clk";
power-domains = <&zynqmp_firmware PD_USB_1>;
resets = <&zynqmp_reset ZYNQMP_RESET_USB1_CORERESET>,
<&zynqmp_reset ZYNQMP_RESET_USB1_HIBERRESET>,
@@ -951,14 +967,15 @@
dwc3_1: usb@fe300000 {
compatible = "snps,dwc3";
+ status = "disabled";
reg = <0x0 0xfe300000 0x0 0x40000>;
interrupt-parent = <&gic>;
interrupt-names = "host", "peripheral", "otg";
interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
- clock-names = "bus_early", "ref";
- iommus = <&smmu 0x861>;
+ clock-names = "ref";
+ /* iommus = <&smmu 0x861>; */
snps,quirk-frame-length-adjustment = <0x20>;
snps,resume-hs-terminations;
/* dma-coherent; */
@@ -1018,6 +1035,7 @@
interrupt-parent = <&gic>;
clock-names = "axi_clk";
power-domains = <&zynqmp_firmware PD_DP>;
+ /* iommus = <&smmu 0xce4>; */
#dma-cells = <1>;
};
@@ -1032,6 +1050,7 @@
reg-names = "dp", "blend", "av_buf", "aud";
interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
+ /* iommus = <&smmu 0xce3>; */
clock-names = "dp_apb_clk", "dp_aud_clk",
"dp_vtc_pixel_clk_in";
power-domains = <&zynqmp_firmware PD_DP>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index e6cf3e5d63c3..ed69b4636d0f 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -251,6 +251,9 @@ CONFIG_RASPBERRYPI_FIRMWARE=y
CONFIG_INTEL_STRATIX10_SERVICE=y
CONFIG_INTEL_STRATIX10_RSU=m
CONFIG_MTK_ADSP_IPC=m
+CONFIG_GOOGLE_FIRMWARE=y
+CONFIG_GOOGLE_CBMEM=m
+CONFIG_GOOGLE_COREBOOT_TABLE=m
CONFIG_EFI_CAPSULE_LOADER=y
CONFIG_IMX_SCU=y
CONFIG_QCOM_QSEECOM=y
@@ -432,6 +435,7 @@ CONFIG_MOUSE_ELAN_I2C=m
CONFIG_INPUT_TOUCHSCREEN=y
CONFIG_TOUCHSCREEN_ATMEL_MXT=m
CONFIG_TOUCHSCREEN_GOODIX=m
+CONFIG_TOUCHSCREEN_GOODIX_BERLIN_SPI=m
CONFIG_TOUCHSCREEN_ELAN=m
CONFIG_TOUCHSCREEN_EDT_FT5X06=m
CONFIG_INPUT_MISC=y
@@ -620,6 +624,7 @@ CONFIG_PINCTRL_SM8350_LPASS_LPI=m
CONFIG_PINCTRL_SM8450_LPASS_LPI=m
CONFIG_PINCTRL_SC8280XP_LPASS_LPI=m
CONFIG_PINCTRL_SM8550_LPASS_LPI=m
+CONFIG_PINCTRL_SM8650_LPASS_LPI=m
CONFIG_GPIO_ALTERA=m
CONFIG_GPIO_DAVINCI=y
CONFIG_GPIO_DWAPB=y
@@ -632,6 +637,7 @@ CONFIG_GPIO_SYSCON=y
CONFIG_GPIO_UNIPHIER=y
CONFIG_GPIO_VISCONTI=y
CONFIG_GPIO_WCD934X=m
+CONFIG_GPIO_VF610=y
CONFIG_GPIO_XGENE=y
CONFIG_GPIO_XGENE_SB=y
CONFIG_GPIO_MAX732X=y
@@ -752,6 +758,7 @@ CONFIG_REGULATOR_HI6421V530=y
CONFIG_REGULATOR_HI655X=y
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX8973=y
+CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MP8859=y
CONFIG_REGULATOR_MT6315=m
CONFIG_REGULATOR_MT6357=y
@@ -792,13 +799,16 @@ CONFIG_USB_VIDEO_CLASS=m
CONFIG_V4L_PLATFORM_DRIVERS=y
CONFIG_SDR_PLATFORM_DRIVERS=y
CONFIG_V4L_MEM2MEM_DRIVERS=y
+CONFIG_VIDEO_AMPHION_VPU=m
CONFIG_VIDEO_CADENCE_CSI2RX=m
CONFIG_VIDEO_MEDIATEK_JPEG=m
CONFIG_VIDEO_MEDIATEK_VCODEC=m
+CONFIG_VIDEO_WAVE_VPU=m
CONFIG_VIDEO_IMX7_CSI=m
CONFIG_VIDEO_IMX_MIPI_CSIS=m
CONFIG_VIDEO_IMX8_ISI=m
CONFIG_VIDEO_IMX8_ISI_M2M=y
+CONFIG_VIDEO_IMX8_JPEG=m
CONFIG_VIDEO_QCOM_CAMSS=m
CONFIG_VIDEO_QCOM_VENUS=m
CONFIG_VIDEO_RCAR_ISP=m
@@ -861,6 +871,7 @@ CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
CONFIG_DRM_PANEL_SITRONIX_ST7703=m
CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA=m
CONFIG_DRM_PANEL_VISIONOX_VTDR6130=m
+CONFIG_DRM_FSL_LDB=m
CONFIG_DRM_LONTIUM_LT8912B=m
CONFIG_DRM_LONTIUM_LT9611=m
CONFIG_DRM_LONTIUM_LT9611UXC=m
@@ -992,6 +1003,8 @@ CONFIG_SND_SOC_TLV320AIC32X4_I2C=m
CONFIG_SND_SOC_TLV320AIC3X_I2C=m
CONFIG_SND_SOC_WCD9335=m
CONFIG_SND_SOC_WCD934X=m
+CONFIG_SND_SOC_WCD939X=m
+CONFIG_SND_SOC_WCD939X_SDW=m
CONFIG_SND_SOC_WM8524=m
CONFIG_SND_SOC_WM8904=m
CONFIG_SND_SOC_WM8960=m
@@ -1032,6 +1045,7 @@ CONFIG_USB_CDNS_SUPPORT=m
CONFIG_USB_CDNS3=m
CONFIG_USB_CDNS3_GADGET=y
CONFIG_USB_CDNS3_HOST=y
+CONFIG_USB_CDNS3_IMX=m
CONFIG_USB_MTU3=y
CONFIG_USB_MUSB_HDRC=y
CONFIG_USB_MUSB_SUNXI=y
@@ -1049,6 +1063,7 @@ CONFIG_USB_QCOM_EUD=m
CONFIG_USB_HSIC_USB3503=y
CONFIG_USB_ONBOARD_HUB=m
CONFIG_NOP_USB_XCEIV=y
+CONFIG_USB_MXS_PHY=m
CONFIG_USB_GADGET=y
CONFIG_USB_RENESAS_USBHS_UDC=m
CONFIG_USB_RZV2M_USB3DRD=y
@@ -1079,6 +1094,7 @@ CONFIG_TYPEC_HD3SS3220=m
CONFIG_TYPEC_MUX_FSA4480=m
CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
+CONFIG_TYPEC_MUX_WCD939X_USBSS=m
CONFIG_TYPEC_DP_ALTMODE=m
CONFIG_MMC=y
CONFIG_MMC_BLOCK_MINORS=32
@@ -1242,7 +1258,11 @@ CONFIG_COMMON_CLK_MT8192_SCP_ADSP=y
CONFIG_COMMON_CLK_MT8192_VDECSYS=y
CONFIG_COMMON_CLK_MT8192_VENCSYS=y
CONFIG_COMMON_CLK_QCOM=y
+CONFIG_CLK_X1E80100_CAMCC=m
+CONFIG_CLK_X1E80100_DISPCC=m
CONFIG_CLK_X1E80100_GCC=y
+CONFIG_CLK_X1E80100_GPUCC=m
+CONFIG_CLK_X1E80100_TCSRCC=y
CONFIG_QCOM_A53PLL=y
CONFIG_QCOM_CLK_APCS_MSM8916=y
CONFIG_QCOM_CLK_APCC_MSM8996=y
@@ -1265,6 +1285,7 @@ CONFIG_MSM_MMCC_8998=m
CONFIG_QCM_GCC_2290=y
CONFIG_QCM_DISPCC_2290=m
CONFIG_QCS_GCC_404=y
+CONFIG_QDU_GCC_1000=y
CONFIG_SC_CAMCC_8280XP=m
CONFIG_SC_DISPCC_8280XP=m
CONFIG_SA_GCC_8775P=y
@@ -1366,6 +1387,7 @@ CONFIG_QCOM_STATS=m
CONFIG_QCOM_WCNSS_CTRL=m
CONFIG_QCOM_APR=m
CONFIG_QCOM_ICC_BWMON=m
+CONFIG_QCOM_PBS=m
CONFIG_ARCH_R8A77995=y
CONFIG_ARCH_R8A77990=y
CONFIG_ARCH_R8A77951=y
@@ -1377,6 +1399,7 @@ CONFIG_ARCH_R8A77980=y
CONFIG_ARCH_R8A77970=y
CONFIG_ARCH_R8A779A0=y
CONFIG_ARCH_R8A779G0=y
+CONFIG_ARCH_R8A779H0=y
CONFIG_ARCH_R8A774C0=y
CONFIG_ARCH_R8A774E1=y
CONFIG_ARCH_R8A774A1=y
@@ -1410,6 +1433,7 @@ CONFIG_EXTCON_USBC_CROS_EC=y
CONFIG_RENESAS_RPCIF=m
CONFIG_IIO=y
CONFIG_EXYNOS_ADC=y
+CONFIG_IMX8QXP_ADC=m
CONFIG_IMX93_ADC=m
CONFIG_MAX9611=m
CONFIG_MEDIATEK_MT6577_AUXADC=m
@@ -1461,6 +1485,7 @@ CONFIG_PHY_SUN4I_USB=y
CONFIG_PHY_CADENCE_TORRENT=m
CONFIG_PHY_CADENCE_DPHY_RX=m
CONFIG_PHY_CADENCE_SIERRA=m
+CONFIG_PHY_CADENCE_SALVO=m
CONFIG_PHY_MIXEL_MIPI_DPHY=m
CONFIG_PHY_FSL_IMX8M_PCIE=y
CONFIG_PHY_HI6220_USB=y
@@ -1490,6 +1515,7 @@ CONFIG_PHY_ROCKCHIP_INNO_USB2=y
CONFIG_PHY_ROCKCHIP_INNO_DSIDPHY=m
CONFIG_PHY_ROCKCHIP_NANENG_COMBO_PHY=m
CONFIG_PHY_ROCKCHIP_PCIE=m
+CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX=m
CONFIG_PHY_ROCKCHIP_SNPS_PCIE3=y
CONFIG_PHY_ROCKCHIP_TYPEC=y
CONFIG_PHY_SAMSUNG_UFS=y
@@ -1550,8 +1576,9 @@ CONFIG_INTERCONNECT_QCOM=y
CONFIG_INTERCONNECT_QCOM_MSM8916=m
CONFIG_INTERCONNECT_QCOM_MSM8996=m
CONFIG_INTERCONNECT_QCOM_OSM_L3=m
-CONFIG_INTERCONNECT_QCOM_QCM2290=m
+CONFIG_INTERCONNECT_QCOM_QCM2290=y
CONFIG_INTERCONNECT_QCOM_QCS404=m
+CONFIG_INTERCONNECT_QCOM_QDU1000=y
CONFIG_INTERCONNECT_QCOM_SA8775P=y
CONFIG_INTERCONNECT_QCOM_SC7180=y
CONFIG_INTERCONNECT_QCOM_SC7280=y
@@ -1560,7 +1587,7 @@ CONFIG_INTERCONNECT_QCOM_SC8280XP=y
CONFIG_INTERCONNECT_QCOM_SDM845=y
CONFIG_INTERCONNECT_QCOM_SDX75=y
CONFIG_INTERCONNECT_QCOM_SM8150=m
-CONFIG_INTERCONNECT_QCOM_SM8250=m
+CONFIG_INTERCONNECT_QCOM_SM8250=y
CONFIG_INTERCONNECT_QCOM_SM8350=m
CONFIG_INTERCONNECT_QCOM_SM8450=y
CONFIG_INTERCONNECT_QCOM_SM8550=y
@@ -1571,8 +1598,7 @@ CONFIG_RZ_MTU3_CNT=m
CONFIG_HTE=y
CONFIG_HTE_TEGRA194=y
CONFIG_HTE_TEGRA194_TEST=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
@@ -1590,6 +1616,7 @@ CONFIG_CONFIGFS_FS=y
CONFIG_EFIVAR_FS=y
CONFIG_UBIFS_FS=m
CONFIG_SQUASHFS=y
+CONFIG_PSTORE_RAM=m
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y
diff --git a/arch/arm64/configs/virt.config b/arch/arm64/configs/virt.config
index c47c36f8f67b..2d1d2ce7ae2a 100644
--- a/arch/arm64/configs/virt.config
+++ b/arch/arm64/configs/virt.config
@@ -14,6 +14,7 @@
# CONFIG_ARCH_BERLIN is not set
# CONFIG_ARCH_BRCMSTB is not set
# CONFIG_ARCH_EXYNOS is not set
+# CONFIG_ARCH_SPARX5 is not set
# CONFIG_ARCH_K3 is not set
# CONFIG_ARCH_LAYERSCAPE is not set
# CONFIG_ARCH_LG1K is not set
@@ -23,14 +24,17 @@
# CONFIG_ARCH_MESON is not set
# CONFIG_ARCH_MVEBU is not set
# CONFIG_ARCH_NXP is not set
+# CONFIG_ARCH_MA35 is not set
# CONFIG_ARCH_MXC is not set
# CONFIG_ARCH_NPCM is not set
# CONFIG_ARCH_QCOM is not set
+# CONFIG_ARCH_REALTEK is not set
# CONFIG_ARCH_RENESAS is not set
# CONFIG_ARCH_ROCKCHIP is not set
# CONFIG_ARCH_S32 is not set
# CONFIG_ARCH_SEATTLE is not set
# CONFIG_ARCH_INTEL_SOCFPGA is not set
+# CONFIG_ARCH_STM32 is not set
# CONFIG_ARCH_SYNQUACER is not set
# CONFIG_ARCH_TEGRA is not set
# CONFIG_ARCH_TESLA_FSD is not set
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index bac4cabef607..467ac2f768ac 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req)
src += blocks * AES_BLOCK_SIZE;
}
if (nbytes && walk.nbytes == walk.total) {
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *d = dst;
+
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(buf + sizeof(buf) - nbytes,
+ src, nbytes);
+
neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
nbytes, walk.iv);
+
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ memcpy(d, dst, nbytes);
+
nbytes = 0;
}
kernel_neon_end();
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 210bb43cff2c..d328f549b1a6 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -229,7 +229,7 @@ alternative_has_cap_likely(const unsigned long cpucap)
if (!cpucap_is_possible(cpucap))
return false;
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops)
:
: [cpucap] "i" (cpucap)
@@ -247,7 +247,7 @@ alternative_has_cap_unlikely(const unsigned long cpucap)
if (!cpucap_is_possible(cpucap))
return false;
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap])
:
: [cpucap] "i" (cpucap)
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 21c824edf8ce..bd8d4ca81a48 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -83,7 +83,7 @@ struct arm64_ftr_bits {
* to full-0 denotes that this field has no override
*
* A @mask field set to full-0 with the corresponding @val field set
- * to full-1 denotes thath this field has an invalid override.
+ * to full-1 denotes that this field has an invalid override.
*/
struct arm64_ftr_override {
u64 val;
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 7c7493cb571f..52f076afeb96 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -61,6 +61,7 @@
#define ARM_CPU_IMP_HISI 0x48
#define ARM_CPU_IMP_APPLE 0x61
#define ARM_CPU_IMP_AMPERE 0xC0
+#define ARM_CPU_IMP_MICROSOFT 0x6D
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -135,6 +136,8 @@
#define AMPERE_CPU_PART_AMPERE1 0xAC3
+#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -193,6 +196,7 @@
#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 50e5f25d3024..b67b89c54e1c 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -62,13 +62,13 @@ static inline void cpacr_restore(unsigned long cpacr)
* When we defined the maximum SVE vector length we defined the ABI so
* that the maximum vector length included all the reserved for future
* expansion bits in ZCR rather than those just currently defined by
- * the architecture. While SME follows a similar pattern the fact that
- * it includes a square matrix means that any allocations that attempt
- * to cover the maximum potential vector length (such as happen with
- * the regset used for ptrace) end up being extremely large. Define
- * the much lower actual limit for use in such situations.
+ * the architecture. Using this length to allocate worst size buffers
+ * results in excessively large allocations, and this effect is even
+ * more pronounced for SME due to ZA. Define more suitable VLs for
+ * these situations.
*/
-#define SME_VQ_MAX 16
+#define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1)
+#define SME_VQ_MAX ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1)
struct task_struct;
@@ -386,6 +386,7 @@ extern void sme_alloc(struct task_struct *task, bool flush);
extern unsigned int sme_get_vl(void);
extern int sme_set_current_vl(unsigned long arg);
extern int sme_get_current_vl(void);
+extern void sme_suspend_exit(void);
/*
* Return how many bytes of memory are required to store the full SME
@@ -421,6 +422,7 @@ static inline int sme_max_vl(void) { return 0; }
static inline int sme_max_virtualisable_vl(void) { return 0; }
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
static inline int sme_get_current_vl(void) { return -EINVAL; }
+static inline void sme_suspend_exit(void) { }
static inline size_t sme_state_size(struct task_struct const *task)
{
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index 48ddc0f45d22..6aafbb789991 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -18,7 +18,7 @@
static __always_inline bool arch_static_branch(struct static_key * const key,
const bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: nop \n\t"
" .pushsection __jump_table, \"aw\" \n\t"
" .align 3 \n\t"
@@ -35,7 +35,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key * const key,
const bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: b %l[l_yes] \n\t"
" .pushsection __jump_table, \"aw\" \n\t"
" .align 3 \n\t"
diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h
index 2403f7b4cdbf..792e9fe881dc 100644
--- a/arch/arm64/include/asm/page-def.h
+++ b/arch/arm64/include/asm/page-def.h
@@ -11,7 +11,7 @@
#include <linux/const.h>
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
index 68908b82b168..587bdb91ab7a 100644
--- a/arch/arm64/include/asm/patching.h
+++ b/arch/arm64/include/asm/patching.h
@@ -8,6 +8,8 @@ int aarch64_insn_read(void *addr, u32 *insnp);
int aarch64_insn_write(void *addr, u32 insn);
int aarch64_insn_write_literal_u64(void *addr, u64 val);
+void *aarch64_insn_set(void *dst, u32 insn, size_t len);
+void *aarch64_insn_copy(void *dst, void *src, size_t len);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
index b4ae32109932..4305995c8f82 100644
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -17,9 +17,6 @@
#ifndef __ASSEMBLY__
#include <generated/vdso-offsets.h>
-#ifdef CONFIG_COMPAT_VDSO
-#include <generated/vdso32-offsets.h>
-#endif
#define VDSO_SYMBOL(base, name) \
({ \
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index f3b151ed0d7a..14251abee23c 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -9,7 +9,8 @@
#ifndef __AARCH64EB__
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
struct word_at_a_time {
const unsigned long one_bits, high_bits;
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index e5d03a7039b4..467cb7117273 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -77,9 +77,9 @@ obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o
# We need to prevent the SCS patching code from patching itself. Using
# -mbranch-protection=none here to avoid the patchable PAC opcodes from being
# generated triggers an issue with full LTO on Clang, which stops emitting PAC
-# instructions altogether. So instead, omit the unwind tables used by the
-# patching code, so it will not be able to locate its own PAC instructions.
-CFLAGS_patch-scs.o += -fno-asynchronous-unwind-tables -fno-unwind-tables
+# instructions altogether. So disable LTO as well for the compilation unit.
+CFLAGS_patch-scs.o += -mbranch-protection=none
+CFLAGS_REMOVE_patch-scs.o += $(CC_FLAGS_LTO)
# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 967c7c7a4e7d..76b8dd37092a 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -374,6 +374,7 @@ static const struct midr_range erratum_1463225[] = {
static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2139208
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2119858
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -387,6 +388,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
static const struct midr_range tsb_flush_fail_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2067961
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2054223
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -399,6 +401,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = {
static struct midr_range trbe_write_out_of_range_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2253138
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2224489
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index a5dc6f764195..f27acca550d5 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1311,6 +1311,22 @@ void __init sme_setup(void)
get_sme_default_vl());
}
+void sme_suspend_exit(void)
+{
+ u64 smcr = 0;
+
+ if (!system_supports_sme())
+ return;
+
+ if (system_supports_fa64())
+ smcr |= SMCR_ELx_FA64;
+ if (system_supports_sme2())
+ smcr |= SMCR_ELx_EZT0;
+
+ write_sysreg_s(smcr, SYS_SMCR_EL1);
+ write_sysreg_s(0, SYS_SMPRI_EL1);
+}
+
#endif /* CONFIG_ARM64_SME */
static void sve_init_regs(void)
@@ -1635,7 +1651,7 @@ void fpsimd_preserve_current_state(void)
void fpsimd_signal_preserve_current_state(void)
{
fpsimd_preserve_current_state();
- if (test_thread_flag(TIF_SVE))
+ if (current->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(current);
}
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index b4835f6d594b..255534930368 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -105,6 +105,81 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
return ret;
}
+typedef void text_poke_f(void *dst, void *src, size_t patched, size_t len);
+
+static void *__text_poke(text_poke_f func, void *addr, void *src, size_t len)
+{
+ unsigned long flags;
+ size_t patched = 0;
+ size_t size;
+ void *waddr;
+ void *ptr;
+
+ raw_spin_lock_irqsave(&patch_lock, flags);
+
+ while (patched < len) {
+ ptr = addr + patched;
+ size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr),
+ len - patched);
+
+ waddr = patch_map(ptr, FIX_TEXT_POKE0);
+ func(waddr, src, patched, size);
+ patch_unmap(FIX_TEXT_POKE0);
+
+ patched += size;
+ }
+ raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+ flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len);
+
+ return addr;
+}
+
+static void text_poke_memcpy(void *dst, void *src, size_t patched, size_t len)
+{
+ copy_to_kernel_nofault(dst, src + patched, len);
+}
+
+static void text_poke_memset(void *dst, void *src, size_t patched, size_t len)
+{
+ u32 c = *(u32 *)src;
+
+ memset32(dst, c, len / 4);
+}
+
+/**
+ * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory
+ * @dst: address to modify
+ * @src: source of the copy
+ * @len: length to copy
+ *
+ * Useful for JITs to dump new code blocks into unused regions of RX memory.
+ */
+noinstr void *aarch64_insn_copy(void *dst, void *src, size_t len)
+{
+ /* A64 instructions must be word aligned */
+ if ((uintptr_t)dst & 0x3)
+ return NULL;
+
+ return __text_poke(text_poke_memcpy, dst, src, len);
+}
+
+/**
+ * aarch64_insn_set - memset for RX memory regions.
+ * @dst: address to modify
+ * @insn: value to set
+ * @len: length of memory region.
+ *
+ * Useful for JITs to fill regions of RX memory with illegal instructions.
+ */
+noinstr void *aarch64_insn_set(void *dst, u32 insn, size_t len)
+{
+ if ((uintptr_t)dst & 0x3)
+ return NULL;
+
+ return __text_poke(text_poke_memset, dst, &insn, len);
+}
+
int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
{
u32 *tp = addr;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index dc6cf0e37194..e3bef38fc2e2 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1500,7 +1500,8 @@ static const struct user_regset aarch64_regsets[] = {
#ifdef CONFIG_ARM64_SVE
[REGSET_SVE] = { /* Scalable Vector Extension */
.core_note_type = NT_ARM_SVE,
- .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE),
+ .n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX,
+ SVE_PT_REGS_SVE),
SVE_VQ_BYTES),
.size = SVE_VQ_BYTES,
.align = SVE_VQ_BYTES,
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 0e8beb3349ea..425b1bc17a3f 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -242,7 +242,7 @@ static int preserve_sve_context(struct sve_context __user *ctx)
vl = task_get_sme_vl(current);
vq = sve_vq_from_vl(vl);
flags |= SVE_SIG_FLAG_SM;
- } else if (test_thread_flag(TIF_SVE)) {
+ } else if (current->thread.fp_type == FP_STATE_SVE) {
vq = sve_vq_from_vl(vl);
}
@@ -878,7 +878,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
if (system_supports_sve() || system_supports_sme()) {
unsigned int vq = 0;
- if (add_all || test_thread_flag(TIF_SVE) ||
+ if (add_all || current->thread.fp_type == FP_STATE_SVE ||
thread_sm_enabled(&current->thread)) {
int vl = max(sve_max_vl(), sme_max_vl());
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 7f88028a00c0..684c26511696 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/efi.h>
#include <linux/export.h>
+#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
#include <linux/sched.h>
@@ -247,7 +248,7 @@ struct kunwind_consume_entry_data {
void *cookie;
};
-static bool
+static __always_inline bool
arch_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
{
struct kunwind_consume_entry_data *data = cookie;
@@ -266,6 +267,31 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}
+struct bpf_unwind_consume_entry_data {
+ bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp);
+ void *cookie;
+};
+
+static bool
+arch_bpf_unwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ struct bpf_unwind_consume_entry_data *data = cookie;
+
+ return data->consume_entry(data->cookie, state->common.pc, 0,
+ state->common.fp);
+}
+
+noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u64 ip, u64 sp,
+ u64 fp), void *cookie)
+{
+ struct bpf_unwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL);
+}
+
static bool dump_backtrace_entry(void *arg, unsigned long where)
{
char *loglvl = arg;
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index eca4d0435211..eaaff94329cd 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -12,6 +12,7 @@
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/exec.h>
+#include <asm/fpsimd.h>
#include <asm/mte.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
@@ -80,6 +81,8 @@ void notrace __cpu_suspend_exit(void)
*/
spectre_v4_enable_mitigation(NULL);
+ sme_suspend_exit();
+
/* Restore additional feature-specific configuration */
ptrauth_suspend_exit();
}
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 5562daf38a22..89b6e7840002 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -69,10 +69,7 @@ static struct vdso_abi_info vdso_info[] __ro_after_init = {
/*
* The vDSO data page.
*/
-static union {
- struct vdso_data data[CS_BASES];
- u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
+static union vdso_data_store vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = vdso_data_store.data;
static int vdso_mremap(const struct vm_special_mapping *sm,
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 2266fcdff78a..f5f80fdce0fe 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -127,9 +127,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
- $(call if_changed,vdsosym)
-
# Strip rule for vdso.so
$(obj)/vdso.so: OBJCOPYFLAGS := -S
$(obj)/vdso.so: $(obj)/vdso32.so.dbg FORCE
@@ -166,9 +163,3 @@ quiet_cmd_vdsoas = AS32 $@
quiet_cmd_vdsomunge = MUNGE $@
cmd_vdsomunge = $(obj)/$(munge) $< $@
-
-# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO)
-gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh
-quiet_cmd_vdsosym = VDSOSYM $@
-# The AArch64 nm should be able to read an AArch32 binary
- cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 6c3c8ca73e7f..27ca89b628a0 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -3,7 +3,6 @@
# KVM configuration
#
-source "virt/lib/Kconfig"
source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index c651df904fe3..ab9d05fcf98b 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1419,7 +1419,6 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
level + 1);
if (ret) {
kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level);
- mm_ops->put_page(pgtable);
return ERR_PTR(ret);
}
@@ -1502,7 +1501,6 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
if (!stage2_try_break_pte(ctx, mmu)) {
kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
- mm_ops->put_page(childp);
return -EAGAIN;
}
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 8350fb8fee0b..b7be96a53597 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -101,6 +101,17 @@ void __init kvm_hyp_reserve(void)
hyp_mem_base);
}
+static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
+{
+ if (host_kvm->arch.pkvm.handle) {
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
+ host_kvm->arch.pkvm.handle));
+ }
+
+ host_kvm->arch.pkvm.handle = 0;
+ free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
+}
+
/*
* Allocates and donates memory for hypervisor VM structs at EL2.
*
@@ -181,7 +192,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
return 0;
destroy_vm:
- pkvm_destroy_hyp_vm(host_kvm);
+ __pkvm_destroy_hyp_vm(host_kvm);
return ret;
free_vm:
free_pages_exact(hyp_vm, hyp_vm_sz);
@@ -194,23 +205,19 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm)
{
int ret = 0;
- mutex_lock(&host_kvm->lock);
+ mutex_lock(&host_kvm->arch.config_lock);
if (!host_kvm->arch.pkvm.handle)
ret = __pkvm_create_hyp_vm(host_kvm);
- mutex_unlock(&host_kvm->lock);
+ mutex_unlock(&host_kvm->arch.config_lock);
return ret;
}
void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
{
- if (host_kvm->arch.pkvm.handle) {
- WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
- host_kvm->arch.pkvm.handle));
- }
-
- host_kvm->arch.pkvm.handle = 0;
- free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
+ mutex_lock(&host_kvm->arch.config_lock);
+ __pkvm_destroy_hyp_vm(host_kvm);
+ mutex_unlock(&host_kvm->arch.config_lock);
}
int pkvm_init_host_vm(struct kvm *host_kvm)
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index e2764d0ffa9f..28a93074eca1 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -468,6 +468,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
}
irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
+ if (!irq)
+ continue;
+
raw_spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = pendmask & (1U << bit_nr);
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
@@ -1432,6 +1435,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
for (i = 0; i < irq_count; i++) {
irq = vgic_get_irq(kvm, NULL, intids[i]);
+ if (!irq)
+ continue;
update_affinity(irq, vcpu2);
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 8955da5c47cf..c5b461dda438 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -76,6 +76,7 @@ struct jit_ctx {
int *offset;
int exentry_idx;
__le32 *image;
+ __le32 *ro_image;
u32 stack_size;
int fpb_offset;
};
@@ -205,6 +206,14 @@ static void jit_fill_hole(void *area, unsigned int size)
*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
}
+int bpf_arch_text_invalidate(void *dst, size_t len)
+{
+ if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len))
+ return -EINVAL;
+
+ return 0;
+}
+
static inline int epilogue_offset(const struct jit_ctx *ctx)
{
int to = ctx->epilogue_offset;
@@ -285,7 +294,8 @@ static bool is_lsi_offset(int offset, int scale)
/* Tail call offset to jump into */
#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)
-static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
+static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
+ bool is_exception_cb)
{
const struct bpf_prog *prog = ctx->prog;
const bool is_main_prog = !bpf_is_subprog(prog);
@@ -333,19 +343,34 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
emit(A64_NOP, ctx);
- /* Sign lr */
- if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
- emit(A64_PACIASP, ctx);
-
- /* Save FP and LR registers to stay align with ARM64 AAPCS */
- emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
- emit(A64_MOV(1, A64_FP, A64_SP), ctx);
-
- /* Save callee-saved registers */
- emit(A64_PUSH(r6, r7, A64_SP), ctx);
- emit(A64_PUSH(r8, r9, A64_SP), ctx);
- emit(A64_PUSH(fp, tcc, A64_SP), ctx);
- emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
+ if (!is_exception_cb) {
+ /* Sign lr */
+ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
+ emit(A64_PACIASP, ctx);
+ /* Save FP and LR registers to stay align with ARM64 AAPCS */
+ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
+ emit(A64_MOV(1, A64_FP, A64_SP), ctx);
+
+ /* Save callee-saved registers */
+ emit(A64_PUSH(r6, r7, A64_SP), ctx);
+ emit(A64_PUSH(r8, r9, A64_SP), ctx);
+ emit(A64_PUSH(fp, tcc, A64_SP), ctx);
+ emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
+ } else {
+ /*
+ * Exception callback receives FP of Main Program as third
+ * parameter
+ */
+ emit(A64_MOV(1, A64_FP, A64_R(2)), ctx);
+ /*
+ * Main Program already pushed the frame record and the
+ * callee-saved registers. The exception callback will not push
+ * anything and re-use the main program's stack.
+ *
+ * 10 registers are on the stack
+ */
+ emit(A64_SUB_I(1, A64_SP, A64_FP, 80), ctx);
+ }
/* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);
@@ -365,6 +390,20 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
emit_bti(A64_BTI_J, ctx);
}
+ /*
+ * Program acting as exception boundary should save all ARM64
+ * Callee-saved registers as the exception callback needs to recover
+ * all ARM64 Callee-saved registers in its epilogue.
+ */
+ if (prog->aux->exception_boundary) {
+ /*
+ * As we are pushing two more registers, BPF_FP should be moved
+ * 16 bytes
+ */
+ emit(A64_SUB_I(1, fp, fp, 16), ctx);
+ emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
+ }
+
emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx);
/* Stack must be multiples of 16B */
@@ -653,7 +692,7 @@ static void build_plt(struct jit_ctx *ctx)
plt->target = (u64)&dummy_tramp;
}
-static void build_epilogue(struct jit_ctx *ctx)
+static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb)
{
const u8 r0 = bpf2a64[BPF_REG_0];
const u8 r6 = bpf2a64[BPF_REG_6];
@@ -666,6 +705,15 @@ static void build_epilogue(struct jit_ctx *ctx)
/* We're done with BPF stack */
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
+ /*
+ * Program acting as exception boundary pushes R23 and R24 in addition
+ * to BPF callee-saved registers. Exception callback uses the boundary
+ * program's stack frame, so recover these extra registers in the above
+ * two cases.
+ */
+ if (ctx->prog->aux->exception_boundary || is_exception_cb)
+ emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx);
+
/* Restore x27 and x28 */
emit(A64_POP(fpb, A64_R(28), A64_SP), ctx);
/* Restore fs (x25) and x26 */
@@ -707,7 +755,8 @@ static int add_exception_handler(const struct bpf_insn *insn,
struct jit_ctx *ctx,
int dst_reg)
{
- off_t offset;
+ off_t ins_offset;
+ off_t fixup_offset;
unsigned long pc;
struct exception_table_entry *ex;
@@ -724,12 +773,17 @@ static int add_exception_handler(const struct bpf_insn *insn,
return -EINVAL;
ex = &ctx->prog->aux->extable[ctx->exentry_idx];
- pc = (unsigned long)&ctx->image[ctx->idx - 1];
+ pc = (unsigned long)&ctx->ro_image[ctx->idx - 1];
- offset = pc - (long)&ex->insn;
- if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ /*
+ * This is the relative offset of the instruction that may fault from
+ * the exception table itself. This will be written to the exception
+ * table and if this instruction faults, the destination register will
+ * be set to '0' and the execution will jump to the next instruction.
+ */
+ ins_offset = pc - (long)&ex->insn;
+ if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
return -ERANGE;
- ex->insn = offset;
/*
* Since the extable follows the program, the fixup offset is always
@@ -738,12 +792,25 @@ static int add_exception_handler(const struct bpf_insn *insn,
* bits. We don't need to worry about buildtime or runtime sort
* modifying the upper bits because the table is already sorted, and
* isn't part of the main exception table.
+ *
+ * The fixup_offset is set to the next instruction from the instruction
+ * that may fault. The execution will jump to this after handling the
+ * fault.
*/
- offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
- if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
+ fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
+ if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
return -ERANGE;
- ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
+ /*
+ * The offsets above have been calculated using the RO buffer but we
+ * need to use the R/W buffer for writes.
+ * switch ex to rw buffer for writing.
+ */
+ ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image);
+
+ ex->insn = ins_offset;
+
+ ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
ex->type = EX_TYPE_BPF;
@@ -1511,7 +1578,8 @@ static inline void bpf_flush_icache(void *start, void *end)
struct arm64_jit_data {
struct bpf_binary_header *header;
- u8 *image;
+ u8 *ro_image;
+ struct bpf_binary_header *ro_header;
struct jit_ctx ctx;
};
@@ -1520,12 +1588,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
int image_size, prog_size, extable_size, extable_align, extable_offset;
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
+ struct bpf_binary_header *ro_header;
struct arm64_jit_data *jit_data;
bool was_classic = bpf_prog_was_classic(prog);
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
u8 *image_ptr;
+ u8 *ro_image_ptr;
if (!prog->jit_requested)
return orig_prog;
@@ -1552,8 +1622,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
if (jit_data->ctx.offset) {
ctx = jit_data->ctx;
- image_ptr = jit_data->image;
+ ro_image_ptr = jit_data->ro_image;
+ ro_header = jit_data->ro_header;
header = jit_data->header;
+ image_ptr = (void *)header + ((void *)ro_image_ptr
+ - (void *)ro_header);
extra_pass = true;
prog_size = sizeof(u32) * ctx.idx;
goto skip_init_ctx;
@@ -1575,7 +1648,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
* BPF line info needs ctx->offset[i] to be the offset of
* instruction[i] in jited image, so build prologue first.
*/
- if (build_prologue(&ctx, was_classic)) {
+ if (build_prologue(&ctx, was_classic, prog->aux->exception_cb)) {
prog = orig_prog;
goto out_off;
}
@@ -1586,7 +1659,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
ctx.epilogue_offset = ctx.idx;
- build_epilogue(&ctx);
+ build_epilogue(&ctx, prog->aux->exception_cb);
build_plt(&ctx);
extable_align = __alignof__(struct exception_table_entry);
@@ -1598,63 +1671,81 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* also allocate space for plt target */
extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
image_size = extable_offset + extable_size;
- header = bpf_jit_binary_alloc(image_size, &image_ptr,
- sizeof(u32), jit_fill_hole);
- if (header == NULL) {
+ ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
+ sizeof(u32), &header, &image_ptr,
+ jit_fill_hole);
+ if (!ro_header) {
prog = orig_prog;
goto out_off;
}
/* 2. Now, the actual pass. */
+ /*
+ * Use the image(RW) for writing the JITed instructions. But also save
+ * the ro_image(RX) for calculating the offsets in the image. The RW
+ * image will be later copied to the RX image from where the program
+ * will run. The bpf_jit_binary_pack_finalize() will do this copy in the
+ * final step.
+ */
ctx.image = (__le32 *)image_ptr;
+ ctx.ro_image = (__le32 *)ro_image_ptr;
if (extable_size)
- prog->aux->extable = (void *)image_ptr + extable_offset;
+ prog->aux->extable = (void *)ro_image_ptr + extable_offset;
skip_init_ctx:
ctx.idx = 0;
ctx.exentry_idx = 0;
- build_prologue(&ctx, was_classic);
+ build_prologue(&ctx, was_classic, prog->aux->exception_cb);
if (build_body(&ctx, extra_pass)) {
- bpf_jit_binary_free(header);
prog = orig_prog;
- goto out_off;
+ goto out_free_hdr;
}
- build_epilogue(&ctx);
+ build_epilogue(&ctx, prog->aux->exception_cb);
build_plt(&ctx);
/* 3. Extra pass to validate JITed code. */
if (validate_ctx(&ctx)) {
- bpf_jit_binary_free(header);
prog = orig_prog;
- goto out_off;
+ goto out_free_hdr;
}
/* And we're done. */
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
- bpf_flush_icache(header, ctx.image + ctx.idx);
-
if (!prog->is_func || extra_pass) {
if (extra_pass && ctx.idx != jit_data->ctx.idx) {
pr_err_once("multi-func JIT bug %d != %d\n",
ctx.idx, jit_data->ctx.idx);
- bpf_jit_binary_free(header);
prog->bpf_func = NULL;
prog->jited = 0;
prog->jited_len = 0;
+ goto out_free_hdr;
+ }
+ if (WARN_ON(bpf_jit_binary_pack_finalize(prog, ro_header,
+ header))) {
+ /* ro_header has been freed */
+ ro_header = NULL;
+ prog = orig_prog;
goto out_off;
}
- bpf_jit_binary_lock_ro(header);
+ /*
+ * The instructions have now been copied to the ROX region from
+ * where they will execute. Now the data cache has to be cleaned to
+ * the PoU and the I-cache has to be invalidated for the VAs.
+ */
+ bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx);
} else {
jit_data->ctx = ctx;
- jit_data->image = image_ptr;
+ jit_data->ro_image = ro_image_ptr;
jit_data->header = header;
+ jit_data->ro_header = ro_header;
}
- prog->bpf_func = (void *)ctx.image;
+
+ prog->bpf_func = (void *)ctx.ro_image;
prog->jited = 1;
prog->jited_len = prog_size;
@@ -1675,6 +1766,14 @@ out:
bpf_jit_prog_release_other(prog, prog == orig_prog ?
tmp : orig_prog);
return prog;
+
+out_free_hdr:
+ if (header) {
+ bpf_arch_text_copy(&ro_header->size, &header->size,
+ sizeof(header->size));
+ bpf_jit_binary_pack_free(ro_header, header);
+ }
+ goto out_off;
}
bool bpf_jit_supports_kfunc_call(void)
@@ -1682,6 +1781,13 @@ bool bpf_jit_supports_kfunc_call(void)
return true;
}
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ if (!aarch64_insn_copy(dst, src, len))
+ return ERR_PTR(-EINVAL);
+ return dst;
+}
+
u64 bpf_jit_alloc_exec_limit(void)
{
return VMALLOC_END - VMALLOC_START;
@@ -1970,7 +2076,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
/* store return value */
emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
/* reserve a nop for bpf_tramp_image_put */
- im->ip_after_call = ctx->image + ctx->idx;
+ im->ip_after_call = ctx->ro_image + ctx->idx;
emit(A64_NOP, ctx);
}
@@ -1985,7 +2091,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
run_ctx_off, false);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- im->ip_epilogue = ctx->image + ctx->idx;
+ im->ip_epilogue = ctx->ro_image + ctx->idx;
emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_exit, ctx);
}
@@ -2018,9 +2124,6 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
emit(A64_RET(A64_R(10)), ctx);
}
- if (ctx->image)
- bpf_flush_icache(ctx->image, ctx->image + ctx->idx);
-
kfree(branches);
return ctx->idx;
@@ -2063,14 +2166,43 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
}
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
- void *image_end, const struct btf_func_model *m,
+void *arch_alloc_bpf_trampoline(unsigned int size)
+{
+ return bpf_prog_pack_alloc(size, jit_fill_hole);
+}
+
+void arch_free_bpf_trampoline(void *image, unsigned int size)
+{
+ bpf_prog_pack_free(image, size);
+}
+
+void arch_protect_bpf_trampoline(void *image, unsigned int size)
+{
+}
+
+void arch_unprotect_bpf_trampoline(void *image, unsigned int size)
+{
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
+ void *ro_image_end, const struct btf_func_model *m,
u32 flags, struct bpf_tramp_links *tlinks,
void *func_addr)
{
int ret, nregs;
+ void *image, *tmp;
+ u32 size = ro_image_end - ro_image;
+
+ /* image doesn't need to be in module memory range, so we can
+ * use kvmalloc.
+ */
+ image = kvmalloc(size, GFP_KERNEL);
+ if (!image)
+ return -ENOMEM;
+
struct jit_ctx ctx = {
.image = image,
+ .ro_image = ro_image,
.idx = 0,
};
@@ -2079,15 +2211,26 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
if (nregs > 8)
return -ENOTSUPP;
- jit_fill_hole(image, (unsigned int)(image_end - image));
+ jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
- if (ret > 0 && validate_code(&ctx) < 0)
+ if (ret > 0 && validate_code(&ctx) < 0) {
ret = -EINVAL;
+ goto out;
+ }
if (ret > 0)
ret *= AARCH64_INSN_SIZE;
+ tmp = bpf_arch_text_copy(ro_image, image, size);
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
+ goto out;
+ }
+
+ bpf_flush_icache(ro_image, ro_image + size);
+out:
+ kvfree(image);
return ret;
}
@@ -2305,3 +2448,42 @@ out:
return ret;
}
+
+bool bpf_jit_supports_ptr_xchg(void)
+{
+ return true;
+}
+
+bool bpf_jit_supports_exceptions(void)
+{
+ /* We unwind through both kernel frames starting from within bpf_throw
+ * call and BPF frames. Therefore we require FP unwinder to be enabled
+ * to walk kernel frames and reach BPF frames in the stack trace.
+ * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y
+ */
+ return true;
+}
+
+void bpf_jit_free(struct bpf_prog *prog)
+{
+ if (prog->jited) {
+ struct arm64_jit_data *jit_data = prog->aux->jit_data;
+ struct bpf_binary_header *hdr;
+
+ /*
+ * If we fail the final pass of JIT (from jit_subprogs),
+ * the program may not be finalized yet. Call finalize here
+ * before freeing it.
+ */
+ if (jit_data) {
+ bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size,
+ sizeof(jit_data->header->size));
+ kfree(jit_data);
+ }
+ hdr = bpf_jit_binary_pack_hdr(prog);
+ bpf_jit_binary_pack_free(hdr, NULL);
+ WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
+ }
+
+ bpf_prog_unlock_free(prog);
+}
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index cf2a6fd7dff8..9c2723ab1c94 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -89,6 +89,7 @@ config CSKY
select HAVE_KPROBES if !CPU_CK610
select HAVE_KPROBES_ON_FTRACE if !CPU_CK610
select HAVE_KRETPROBES if !CPU_CK610
+ select HAVE_PAGE_SIZE_4KB
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
diff --git a/arch/csky/include/asm/jump_label.h b/arch/csky/include/asm/jump_label.h
index 98a3f4b168bd..ef2e37a10a0f 100644
--- a/arch/csky/include/asm/jump_label.h
+++ b/arch/csky/include/asm/jump_label.h
@@ -12,7 +12,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: nop32 \n"
" .pushsection __jump_table, \"aw\" \n"
" .align 2 \n"
@@ -29,7 +29,7 @@ label:
static __always_inline bool arch_static_branch_jump(struct static_key *key,
bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: bsr32 %l[label] \n"
" .pushsection __jump_table, \"aw\" \n"
" .align 2 \n"
diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h
index 4a0502e324a6..0ca6c408c07f 100644
--- a/arch/csky/include/asm/page.h
+++ b/arch/csky/include/asm/page.h
@@ -10,7 +10,7 @@
/*
* PAGE_SHIFT determines the page size: 4KB
*/
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE - 1))
#define THREAD_SIZE (PAGE_SIZE * 2)
@@ -82,11 +82,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
return __pa(kaddr) >> PAGE_SHIFT;
}
-static inline void * pfn_to_virt(unsigned long pfn)
-{
- return (void *)((unsigned long)__va(pfn) << PAGE_SHIFT);
-}
-
#define MAP_NR(x) PFN_DOWN((unsigned long)(x) - PAGE_OFFSET - \
PHYS_OFFSET_OFFSET)
#define virt_to_page(x) (mem_map + MAP_NR(x))
diff --git a/arch/csky/include/asm/vdso.h b/arch/csky/include/asm/vdso.h
index bdce581b5fcb..181a15edafe8 100644
--- a/arch/csky/include/asm/vdso.h
+++ b/arch/csky/include/asm/vdso.h
@@ -5,11 +5,6 @@
#include <linux/types.h>
-#ifndef GENERIC_TIME_VSYSCALL
-struct vdso_data {
-};
-#endif
-
/*
* The VDSO symbols are mapped into Linux so we can just use regular symbol
* addressing to get their offsets in userspace. The symbols are mapped at an
diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c
index 8e42352cbf12..92dbbf3e0205 100644
--- a/arch/csky/kernel/smp.c
+++ b/arch/csky/kernel/smp.c
@@ -152,10 +152,6 @@ void arch_irq_work_raise(void)
}
#endif
-void __init smp_prepare_boot_cpu(void)
-{
-}
-
void __init smp_prepare_cpus(unsigned int max_cpus)
{
}
diff --git a/arch/csky/kernel/vdso.c b/arch/csky/kernel/vdso.c
index 16c20d64d165..2ca886e4a458 100644
--- a/arch/csky/kernel/vdso.c
+++ b/arch/csky/kernel/vdso.c
@@ -8,25 +8,15 @@
#include <linux/slab.h>
#include <asm/page.h>
-#ifdef GENERIC_TIME_VSYSCALL
#include <vdso/datapage.h>
-#else
-#include <asm/vdso.h>
-#endif
extern char vdso_start[], vdso_end[];
static unsigned int vdso_pages;
static struct page **vdso_pagelist;
-/*
- * The vDSO data page.
- */
-static union {
- struct vdso_data data;
- u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+static union vdso_data_store vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = vdso_data_store.data;
static int __init vdso_init(void)
{
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index a880ee067d2e..1414052e7d6b 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -8,6 +8,10 @@ config HEXAGON
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_PREEMPT
select DMA_GLOBAL_POOL
+ select HAVE_PAGE_SIZE_4KB
+ select HAVE_PAGE_SIZE_16KB
+ select HAVE_PAGE_SIZE_64KB
+ select HAVE_PAGE_SIZE_256KB
# Other pending projects/to-do items.
# select HAVE_REGS_AND_STACK_ACCESS_API
# select HAVE_HW_BREAKPOINT if PERF_EVENTS
@@ -120,26 +124,6 @@ config NR_CPUS
This is purely to save memory - each supported CPU adds
approximately eight kilobytes to the kernel image.
-choice
- prompt "Kernel page size"
- default PAGE_SIZE_4KB
- help
- Changes the default page size; use with caution.
-
-config PAGE_SIZE_4KB
- bool "4KB"
-
-config PAGE_SIZE_16KB
- bool "16KB"
-
-config PAGE_SIZE_64KB
- bool "64KB"
-
-config PAGE_SIZE_256KB
- bool "256KB"
-
-endchoice
-
source "kernel/Kconfig.hz"
endmenu
diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h
index 10f1bc07423c..8a6af57274c2 100644
--- a/arch/hexagon/include/asm/page.h
+++ b/arch/hexagon/include/asm/page.h
@@ -13,27 +13,22 @@
/* This is probably not the most graceful way to handle this. */
#ifdef CONFIG_PAGE_SIZE_4KB
-#define PAGE_SHIFT 12
#define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_4KB
#endif
#ifdef CONFIG_PAGE_SIZE_16KB
-#define PAGE_SHIFT 14
#define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_16KB
#endif
#ifdef CONFIG_PAGE_SIZE_64KB
-#define PAGE_SHIFT 16
#define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_64KB
#endif
#ifdef CONFIG_PAGE_SIZE_256KB
-#define PAGE_SHIFT 18
#define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_256KB
#endif
#ifdef CONFIG_PAGE_SIZE_1MB
-#define PAGE_SHIFT 20
#define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_1MB
#endif
@@ -50,6 +45,7 @@
#define HVM_HUGEPAGE_SIZE 0x5
#endif
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1))
@@ -133,12 +129,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
return __pa(kaddr) >> PAGE_SHIFT;
}
-static inline void *pfn_to_virt(unsigned long pfn)
-{
- return (void *)((unsigned long)__va(pfn) << PAGE_SHIFT);
-}
-
-
#define page_to_virt(page) __va(page_to_phys(page))
#include <asm/mem-layout.h>
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index 608884bc3396..65e1fdf9fdb2 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -114,10 +114,6 @@ void send_ipi(const struct cpumask *cpumask, enum ipi_message_type msg)
local_irq_restore(flags);
}
-void __init smp_prepare_boot_cpu(void)
-{
-}
-
/*
* interrupts should already be disabled from the VM
* SP should already be correct; need to set THREADINFO_REG
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 10959e6c3583..b274784c2e26 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -12,6 +12,7 @@ config LOONGARCH
select ARCH_DISABLE_KASAN_INLINE
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
@@ -99,6 +100,7 @@ config LOONGARCH
select HAVE_ARCH_KFENCE
select HAVE_ARCH_KGDB if PERF_EVENTS
select HAVE_ARCH_MMAP_RND_BITS if MMU
+ select HAVE_ARCH_SECCOMP
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
@@ -225,15 +227,6 @@ config MACH_LOONGSON64
config FIX_EARLYCON_MEM
def_bool y
-config PAGE_SIZE_4KB
- bool
-
-config PAGE_SIZE_16KB
- bool
-
-config PAGE_SIZE_64KB
- bool
-
config PGTABLE_2LEVEL
bool
@@ -286,7 +279,7 @@ choice
config 4KB_3LEVEL
bool "4KB with 3 levels"
- select PAGE_SIZE_4KB
+ select HAVE_PAGE_SIZE_4KB
select PGTABLE_3LEVEL
help
This option selects 4KB page size with 3 level page tables, which
@@ -294,7 +287,7 @@ config 4KB_3LEVEL
config 4KB_4LEVEL
bool "4KB with 4 levels"
- select PAGE_SIZE_4KB
+ select HAVE_PAGE_SIZE_4KB
select PGTABLE_4LEVEL
help
This option selects 4KB page size with 4 level page tables, which
@@ -302,7 +295,7 @@ config 4KB_4LEVEL
config 16KB_2LEVEL
bool "16KB with 2 levels"
- select PAGE_SIZE_16KB
+ select HAVE_PAGE_SIZE_16KB
select PGTABLE_2LEVEL
help
This option selects 16KB page size with 2 level page tables, which
@@ -310,7 +303,7 @@ config 16KB_2LEVEL
config 16KB_3LEVEL
bool "16KB with 3 levels"
- select PAGE_SIZE_16KB
+ select HAVE_PAGE_SIZE_16KB
select PGTABLE_3LEVEL
help
This option selects 16KB page size with 3 level page tables, which
@@ -318,7 +311,7 @@ config 16KB_3LEVEL
config 64KB_2LEVEL
bool "64KB with 2 levels"
- select PAGE_SIZE_64KB
+ select HAVE_PAGE_SIZE_64KB
select PGTABLE_2LEVEL
help
This option selects 64KB page size with 2 level page tables, which
@@ -326,7 +319,7 @@ config 64KB_2LEVEL
config 64KB_3LEVEL
bool "64KB with 3 levels"
- select PAGE_SIZE_64KB
+ select HAVE_PAGE_SIZE_64KB
select PGTABLE_3LEVEL
help
This option selects 64KB page size with 3 level page tables, which
@@ -632,23 +625,6 @@ config RANDOMIZE_BASE_MAX_OFFSET
This is limited by the size of the lower address memory, 256MB.
-config SECCOMP
- bool "Enable seccomp to safely compute untrusted bytecode"
- depends on PROC_FS
- default y
- help
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via /proc/<pid>/seccomp, it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
- If unsure, say Y. Only embedded should say N here.
-
endmenu
config ARCH_SELECT_MEMORY_MODEL
@@ -667,10 +643,6 @@ config ARCH_SPARSEMEM_ENABLE
or have huge holes in the physical address space for other reasons.
See <file:Documentation/mm/numa.rst> for more.
-config ARCH_ENABLE_THP_MIGRATION
- def_bool y
- depends on TRANSPARENT_HUGEPAGE
-
config ARCH_MEMORY_PROBE
def_bool y
depends on MEMORY_HOTPLUG
diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
index b38071a4d0b0..8aefb0c12672 100644
--- a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
@@ -60,7 +60,7 @@
#address-cells = <1>;
#size-cells = <0>;
- eeprom@57{
+ eeprom@57 {
compatible = "atmel,24c16";
reg = <0x57>;
pagesize = <16>;
diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
index 132a2d1ea8bc..ed4d32434041 100644
--- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
@@ -78,7 +78,7 @@
#address-cells = <1>;
#size-cells = <0>;
- eeprom@57{
+ eeprom@57 {
compatible = "atmel,24c16";
reg = <0x57>;
pagesize = <16>;
diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
index 8de6c4b83a61..49e29b29996f 100644
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -32,8 +32,10 @@ static inline bool acpi_has_cpu_in_madt(void)
return true;
}
+#define MAX_CORE_PIC 256
+
extern struct list_head acpi_wakeup_device_list;
-extern struct acpi_madt_core_pic acpi_core_pic[NR_CPUS];
+extern struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC];
extern int __init parse_acpi_topology(void);
diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h
index 3cea299a5ef5..29acfe3de3fa 100644
--- a/arch/loongarch/include/asm/jump_label.h
+++ b/arch/loongarch/include/asm/jump_label.h
@@ -22,7 +22,7 @@
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: nop \n\t"
JUMP_TABLE_ENTRY
: : "i"(&((char *)key)[branch]) : : l_yes);
@@ -35,7 +35,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
- asm_volatile_goto(
+ asm goto(
"1: b %l[l_yes] \n\t"
JUMP_TABLE_ENTRY
: : "i"(&((char *)key)[branch]) : : l_yes);
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
index e71ceb88f29e..0cb4fdb8a9b5 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -60,7 +60,7 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu);
void kvm_save_lsx(struct loongarch_fpu *fpu);
void kvm_restore_lsx(struct loongarch_fpu *fpu);
#else
-static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { }
+static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { return -EINVAL; }
static inline void kvm_save_lsx(struct loongarch_fpu *fpu) { }
static inline void kvm_restore_lsx(struct loongarch_fpu *fpu) { }
#endif
@@ -70,7 +70,7 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu);
void kvm_save_lasx(struct loongarch_fpu *fpu);
void kvm_restore_lasx(struct loongarch_fpu *fpu);
#else
-static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { }
+static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { return -EINVAL; }
static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { }
static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { }
#endif
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 63f137ce82a4..afb6fa16b826 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -11,15 +11,7 @@
/*
* PAGE_SHIFT determines the page size
*/
-#ifdef CONFIG_PAGE_SIZE_4KB
-#define PAGE_SHIFT 12
-#endif
-#ifdef CONFIG_PAGE_SIZE_16KB
-#define PAGE_SHIFT 14
-#endif
-#ifdef CONFIG_PAGE_SIZE_64KB
-#define PAGE_SHIFT 16
-#endif
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE - 1))
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index b6b097bbf866..5cf59c617126 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -29,11 +29,9 @@ int disabled_cpus;
u64 acpi_saved_sp;
-#define MAX_CORE_PIC 256
-
#define PREFIX "ACPI: "
-struct acpi_madt_core_pic acpi_core_pic[NR_CPUS];
+struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC];
void __init __iomem * __acpi_map_table(unsigned long phys, unsigned long size)
{
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index edf2bba80130..fd915ad69c09 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -357,6 +357,8 @@ void __init platform_init(void)
acpi_gbl_use_default_register_widths = false;
acpi_boot_table_init();
#endif
+
+ early_init_fdt_scan_reserved_mem();
unflatten_and_copy_device_tree();
#ifdef CONFIG_NUMA
@@ -390,8 +392,6 @@ static void __init arch_mem_init(char **cmdline_p)
check_kernel_sections_mem();
- early_init_fdt_scan_reserved_mem();
-
/*
* In order to reduce the possibility of kernel panic when failed to
* get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
@@ -490,7 +490,7 @@ static int __init add_legacy_isa_io(struct fwnode_handle *fwnode,
}
vaddr = (unsigned long)(PCI_IOBASE + range->io_start);
- ioremap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL));
+ vmap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL));
return 0;
}
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index a16e3dbe9f09..aabee0b280fe 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -88,6 +88,73 @@ void show_ipi_list(struct seq_file *p, int prec)
}
}
+static inline void set_cpu_core_map(int cpu)
+{
+ int i;
+
+ cpumask_set_cpu(cpu, &cpu_core_setup_map);
+
+ for_each_cpu(i, &cpu_core_setup_map) {
+ if (cpu_data[cpu].package == cpu_data[i].package) {
+ cpumask_set_cpu(i, &cpu_core_map[cpu]);
+ cpumask_set_cpu(cpu, &cpu_core_map[i]);
+ }
+ }
+}
+
+static inline void set_cpu_sibling_map(int cpu)
+{
+ int i;
+
+ cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
+
+ for_each_cpu(i, &cpu_sibling_setup_map) {
+ if (cpus_are_siblings(cpu, i)) {
+ cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
+ cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
+ }
+ }
+}
+
+static inline void clear_cpu_sibling_map(int cpu)
+{
+ int i;
+
+ for_each_cpu(i, &cpu_sibling_setup_map) {
+ if (cpus_are_siblings(cpu, i)) {
+ cpumask_clear_cpu(i, &cpu_sibling_map[cpu]);
+ cpumask_clear_cpu(cpu, &cpu_sibling_map[i]);
+ }
+ }
+
+ cpumask_clear_cpu(cpu, &cpu_sibling_setup_map);
+}
+
+/*
+ * Calculate a new cpu_foreign_map mask whenever a
+ * new cpu appears or disappears.
+ */
+void calculate_cpu_foreign_map(void)
+{
+ int i, k, core_present;
+ cpumask_t temp_foreign_map;
+
+ /* Re-calculate the mask */
+ cpumask_clear(&temp_foreign_map);
+ for_each_online_cpu(i) {
+ core_present = 0;
+ for_each_cpu(k, &temp_foreign_map)
+ if (cpus_are_siblings(i, k))
+ core_present = 1;
+ if (!core_present)
+ cpumask_set_cpu(i, &temp_foreign_map);
+ }
+
+ for_each_online_cpu(i)
+ cpumask_andnot(&cpu_foreign_map[i],
+ &temp_foreign_map, &cpu_sibling_map[i]);
+}
+
/* Send mailbox buffer via Mail_Send */
static void csr_mail_send(uint64_t data, int cpu, int mailbox)
{
@@ -303,6 +370,7 @@ int loongson_cpu_disable(void)
numa_remove_cpu(cpu);
#endif
set_cpu_online(cpu, false);
+ clear_cpu_sibling_map(cpu);
calculate_cpu_foreign_map();
local_irq_save(flags);
irq_migrate_all_off_this_cpu();
@@ -337,6 +405,7 @@ void __noreturn arch_cpu_idle_dead(void)
addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
} while (addr == 0);
+ local_irq_disable();
init_fn = (void *)TO_CACHE(addr);
iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
@@ -379,59 +448,6 @@ static int __init ipi_pm_init(void)
core_initcall(ipi_pm_init);
#endif
-static inline void set_cpu_sibling_map(int cpu)
-{
- int i;
-
- cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
-
- for_each_cpu(i, &cpu_sibling_setup_map) {
- if (cpus_are_siblings(cpu, i)) {
- cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
- cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
- }
- }
-}
-
-static inline void set_cpu_core_map(int cpu)
-{
- int i;
-
- cpumask_set_cpu(cpu, &cpu_core_setup_map);
-
- for_each_cpu(i, &cpu_core_setup_map) {
- if (cpu_data[cpu].package == cpu_data[i].package) {
- cpumask_set_cpu(i, &cpu_core_map[cpu]);
- cpumask_set_cpu(cpu, &cpu_core_map[i]);
- }
- }
-}
-
-/*
- * Calculate a new cpu_foreign_map mask whenever a
- * new cpu appears or disappears.
- */
-void calculate_cpu_foreign_map(void)
-{
- int i, k, core_present;
- cpumask_t temp_foreign_map;
-
- /* Re-calculate the mask */
- cpumask_clear(&temp_foreign_map);
- for_each_online_cpu(i) {
- core_present = 0;
- for_each_cpu(k, &temp_foreign_map)
- if (cpus_are_siblings(i, k))
- core_present = 1;
- if (!core_present)
- cpumask_set_cpu(i, &temp_foreign_map);
- }
-
- for_each_online_cpu(i)
- cpumask_andnot(&cpu_foreign_map[i],
- &temp_foreign_map, &cpu_sibling_map[i]);
-}
-
/* Preload SMP state for boot cpu */
void smp_prepare_boot_cpu(void)
{
@@ -509,7 +525,6 @@ asmlinkage void start_secondary(void)
sync_counter();
cpu = raw_smp_processor_id();
set_my_cpu_offset(per_cpu_offset(cpu));
- rcutree_report_cpu_starting(cpu);
cpu_probe();
constant_clockevent_init();
diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c
index 14941e4be66d..90dfccb41c14 100644
--- a/arch/loongarch/kernel/vdso.c
+++ b/arch/loongarch/kernel/vdso.c
@@ -21,15 +21,13 @@
#include <asm/vdso.h>
#include <vdso/helpers.h>
#include <vdso/vsyscall.h>
+#include <vdso/datapage.h>
#include <generated/vdso-offsets.h>
extern char vdso_start[], vdso_end[];
/* Kernel-provided data used by the VDSO. */
-static union {
- u8 page[PAGE_SIZE];
- struct vdso_data data[CS_BASES];
-} generic_vdso_data __page_aligned_data;
+static union vdso_data_store generic_vdso_data __page_aligned_data;
static union {
u8 page[LOONGARCH_VDSO_DATA_SIZE];
diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
index 915f17527893..50a6acd7ffe4 100644
--- a/arch/loongarch/kvm/mmu.c
+++ b/arch/loongarch/kvm/mmu.c
@@ -675,7 +675,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
*
* There are several ways to safely use this helper:
*
- * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before
+ * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before
* consuming it. In this case, mmu_lock doesn't need to be held during the
* lookup, but it does need to be held while checking the MMU notifier.
*
@@ -855,7 +855,7 @@ retry:
/* Check if an invalidation has taken place since we got pfn */
spin_lock(&kvm->mmu_lock);
- if (mmu_invalidate_retry_hva(kvm, mmu_seq, hva)) {
+ if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) {
/*
* This can happen when mappings are changed asynchronously, but
* also synchronously if a COW is triggered by
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 27701991886d..36106922b5d7 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -298,74 +298,73 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
return ret;
}
-static int _kvm_get_cpucfg(int id, u64 *v)
+static int _kvm_get_cpucfg_mask(int id, u64 *v)
{
- int ret = 0;
-
- if (id < 0 && id >= KVM_MAX_CPUCFG_REGS)
+ if (id < 0 || id >= KVM_MAX_CPUCFG_REGS)
return -EINVAL;
switch (id) {
case 2:
- /* Return CPUCFG2 features which have been supported by KVM */
+ /* CPUCFG2 features unconditionally supported by KVM */
*v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP |
CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV |
CPUCFG2_LAM;
/*
- * If LSX is supported by CPU, it is also supported by KVM,
- * as we implement it.
+ * For the ISA extensions listed below, if one is supported
+ * by the host, then it is also supported by KVM.
*/
if (cpu_has_lsx)
*v |= CPUCFG2_LSX;
- /*
- * if LASX is supported by CPU, it is also supported by KVM,
- * as we implement it.
- */
if (cpu_has_lasx)
*v |= CPUCFG2_LASX;
- break;
+ return 0;
default:
- ret = -EINVAL;
- break;
+ /*
+ * No restrictions on other valid CPUCFG IDs' values, but
+ * CPUCFG data is limited to 32 bits as the LoongArch ISA
+ * manual says (Volume 1, Section 2.2.10.5 "CPUCFG").
+ */
+ *v = U32_MAX;
+ return 0;
}
- return ret;
}
static int kvm_check_cpucfg(int id, u64 val)
{
- u64 mask;
- int ret = 0;
-
- if (id < 0 && id >= KVM_MAX_CPUCFG_REGS)
- return -EINVAL;
+ int ret;
+ u64 mask = 0;
- if (_kvm_get_cpucfg(id, &mask))
+ ret = _kvm_get_cpucfg_mask(id, &mask);
+ if (ret)
return ret;
+ if (val & ~mask)
+ /* Unsupported features and/or the higher 32 bits should not be set */
+ return -EINVAL;
+
switch (id) {
case 2:
- /* CPUCFG2 features checking */
- if (val & ~mask)
- /* The unsupported features should not be set */
- ret = -EINVAL;
- else if (!(val & CPUCFG2_LLFTP))
- /* The LLFTP must be set, as guest must has a constant timer */
- ret = -EINVAL;
- else if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP)))
- /* Single and double float point must both be set when enable FP */
- ret = -EINVAL;
- else if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP))
- /* FP should be set when enable LSX */
- ret = -EINVAL;
- else if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX))
- /* LSX, FP should be set when enable LASX, and FP has been checked before. */
- ret = -EINVAL;
- break;
+ if (!(val & CPUCFG2_LLFTP))
+ /* Guests must have a constant timer */
+ return -EINVAL;
+ if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP)))
+ /* Single and double float point must both be set when FP is enabled */
+ return -EINVAL;
+ if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP))
+ /* LSX architecturally implies FP but val does not satisfy that */
+ return -EINVAL;
+ if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX))
+ /* LASX architecturally implies LSX and FP but val does not satisfy that */
+ return -EINVAL;
+ return 0;
default:
- break;
+ /*
+ * Values for the other CPUCFG IDs are not being further validated
+ * besides the mask check above.
+ */
+ return 0;
}
- return ret;
}
static int kvm_get_one_reg(struct kvm_vcpu *vcpu,
@@ -566,7 +565,7 @@ static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu,
uint64_t val;
uint64_t __user *uaddr = (uint64_t __user *)attr->addr;
- ret = _kvm_get_cpucfg(attr->attr, &val);
+ ret = _kvm_get_cpucfg_mask(attr->attr, &val);
if (ret)
return ret;
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
index cc3e81fe0186..c608adc99845 100644
--- a/arch/loongarch/mm/kasan_init.c
+++ b/arch/loongarch/mm/kasan_init.c
@@ -44,6 +44,9 @@ void *kasan_mem_to_shadow(const void *addr)
unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
unsigned long offset = 0;
+ if (maddr >= FIXADDR_START)
+ return (void *)(kasan_early_shadow_page);
+
maddr &= XRANGE_SHADOW_MASK;
switch (xrange) {
case XKPRANGE_CC_SEG:
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index 2c0a411f23aa..0b95d32b30c9 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -284,12 +284,16 @@ static void setup_tlb_handler(int cpu)
set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
- }
+ } else {
+ int vec_sz __maybe_unused;
+ void *addr __maybe_unused;
+ struct page *page __maybe_unused;
+
+ /* Avoid lockdep warning */
+ rcutree_report_cpu_starting(cpu);
+
#ifdef CONFIG_NUMA
- else {
- void *addr;
- struct page *page;
- const int vec_sz = sizeof(exception_handlers);
+ vec_sz = sizeof(exception_handlers);
if (pcpu_handlers[cpu])
return;
@@ -305,8 +309,8 @@ static void setup_tlb_handler(int cpu)
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY);
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY);
csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY);
- }
#endif
+ }
}
void tlb_init(int cpu)
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index c74c9921304f..f597cd08a96b 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -2,6 +2,7 @@
# Objects to go into the VDSO.
KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# Include the generic Makefile to check the built vdso.
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 4b3e93cac723..7b709453d5e7 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -84,12 +84,15 @@ config MMU
config MMU_MOTOROLA
bool
+ select HAVE_PAGE_SIZE_4KB
config MMU_COLDFIRE
+ select HAVE_PAGE_SIZE_8KB
bool
config MMU_SUN3
bool
+ select HAVE_PAGE_SIZE_8KB
depends on MMU && !MMU_MOTOROLA && !MMU_COLDFIRE
config ARCH_SUPPORTS_KEXEC
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index 9dcf245c9cbf..c777a129768a 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -30,6 +30,7 @@ config COLDFIRE
select GENERIC_CSUM
select GPIOLIB
select HAVE_LEGACY_CLK
+ select HAVE_PAGE_SIZE_8KB if !MMU
endchoice
@@ -45,6 +46,7 @@ config M68000
select GENERIC_CSUM
select CPU_NO_EFFICIENT_FFS
select HAVE_ARCH_HASH
+ select HAVE_PAGE_SIZE_4KB
select LEGACY_TIMER_TICK
help
The Freescale (was Motorola) 68000 CPU is the first generation of
diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index 43e39040d3ac..0abcf994ce55 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -15,10 +15,10 @@
KBUILD_DEFCONFIG := multi_defconfig
ifdef cross_compiling
- ifeq ($(CROSS_COMPILE),)
+ ifeq ($(CROSS_COMPILE),)
CROSS_COMPILE := $(call cc-cross-prefix, \
m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-)
- endif
+ endif
endif
#
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index b4d71fea558f..a6f6607efe79 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -336,7 +336,6 @@ CONFIG_ATA=y
CONFIG_PATA_GAYLE=y
CONFIG_PATA_BUDDHA=y
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -579,12 +578,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 682d8cd3dd3c..0ca1f9f930bc 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -316,7 +316,6 @@ CONFIG_SCSI_SAS_ATTRS=m
CONFIG_ISCSI_TCP=m
CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -536,12 +535,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 15259ced8463..be030659d8d7 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -331,7 +331,6 @@ CONFIG_ATA=y
# CONFIG_ATA_BMDMA is not set
CONFIG_PATA_FALCON=y
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -556,12 +555,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 7395c12caef6..ad8f81fbb630 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -314,7 +314,6 @@ CONFIG_ISCSI_TCP=m
CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_BVME6000_SCSI=y
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -528,12 +527,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 92506bc7f78d..ff253b6accec 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -315,7 +315,6 @@ CONFIG_SCSI_SAS_ATTRS=m
CONFIG_ISCSI_TCP=m
CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -538,12 +537,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 144bc8c0d8b5..f92b866620a7 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -320,7 +320,6 @@ CONFIG_ATA=y
# CONFIG_ATA_BMDMA is not set
CONFIG_PATA_PLATFORM=y
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -555,12 +554,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 07594c729497..bd813beaa8a7 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -363,7 +363,6 @@ CONFIG_PATA_GAYLE=y
CONFIG_PATA_BUDDHA=y
CONFIG_PATA_PLATFORM=y
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -641,12 +640,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index c34de6c1de20..2237ee0fe433 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -313,7 +313,6 @@ CONFIG_ISCSI_TCP=m
CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_MVME147_SCSI=y
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -527,12 +526,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 83bc029d1f33..afb5aa9c5012 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -314,7 +314,6 @@ CONFIG_ISCSI_TCP=m
CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_MVME16x_SCSI=y
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -528,12 +527,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 4f551dac2ed7..e40f7a308966 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -320,7 +320,6 @@ CONFIG_ATA=y
# CONFIG_ATA_BMDMA is not set
CONFIG_PATA_FALCON=y
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -545,12 +544,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index b1bf01182bd3..4df397c0395f 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -310,7 +310,6 @@ CONFIG_ISCSI_TCP=m
CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_SUN3_SCSI=y
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -526,12 +525,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 5c9a3f71f036..aa7719b3947f 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -311,7 +311,6 @@ CONFIG_ISCSI_TCP=m
CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_SUN3X_ESP=y
CONFIG_MD=y
-CONFIG_MD_LINEAR=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
@@ -526,12 +525,10 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index a708fbd5a844..642fb80c5c4e 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -96,6 +96,9 @@ static const struct block_device_operations nfhd_ops = {
static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
{
+ struct queue_limits lim = {
+ .logical_block_size = bsize,
+ };
struct nfhd_device *dev;
int dev_id = id - NFHD_DEV_OFFSET;
int err = -ENOMEM;
@@ -117,9 +120,11 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
dev->bsize = bsize;
dev->bshift = ffs(bsize) - 10;
- dev->disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!dev->disk)
+ dev->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(dev->disk)) {
+ err = PTR_ERR(dev->disk);
goto free_dev;
+ }
dev->disk->major = major_num;
dev->disk->first_minor = dev_id * 16;
@@ -128,7 +133,6 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
dev->disk->private_data = dev;
sprintf(dev->disk->disk_name, "nfhd%u", dev_id);
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
- blk_queue_logical_block_size(dev->disk->queue, bsize);
err = add_disk(dev->disk);
if (err)
goto out_cleanup_disk;
diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h
index a5993ad83ed8..8cfb84b49975 100644
--- a/arch/m68k/include/asm/page.h
+++ b/arch/m68k/include/asm/page.h
@@ -7,11 +7,7 @@
#include <asm/page_offset.h>
/* PAGE_SHIFT determines the page size */
-#if defined(CONFIG_SUN3) || defined(CONFIG_COLDFIRE)
-#define PAGE_SHIFT 13
-#else
-#define PAGE_SHIFT 12
-#endif
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
#define PAGE_OFFSET (PAGE_OFFSET_RAW)
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 211f338d6235..f18ec02ddeb2 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -31,6 +31,7 @@ config MICROBLAZE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
+ select HAVE_PAGE_SIZE_4KB
select HAVE_PCI
select IRQ_DOMAIN
select XILINX_INTC
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 86a4ce07c192..8810f4f1c3b0 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -20,7 +20,7 @@
#ifdef __KERNEL__
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 797ae590ebdb..6f251746777d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -14,7 +14,7 @@ config MIPS
select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
- select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_UBSAN
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_KEEP_MEMBLOCK
select ARCH_USE_BUILTIN_BSWAP
@@ -81,6 +81,9 @@ config MIPS
select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
+ select HAVE_PAGE_SIZE_4KB if !CPU_LOONGSON2EF && !CPU_LOONGSON64
+ select HAVE_PAGE_SIZE_16KB if !CPU_R3000
+ select HAVE_PAGE_SIZE_64KB if !CPU_R3000
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
@@ -1608,6 +1611,8 @@ config CPU_CAVIUM_OCTEON
depends on SYS_HAS_CPU_CAVIUM_OCTEON
select CPU_HAS_PREFETCH
select CPU_SUPPORTS_64BIT_KERNEL
+ select HAVE_PAGE_SIZE_8KB if !MIPS_VA_BITS_48
+ select HAVE_PAGE_SIZE_32KB if !MIPS_VA_BITS_48
select WEAK_ORDERING
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_HUGEPAGES
@@ -2029,59 +2034,6 @@ config ZBOOT_LOAD_ADDRESS
This is only used if non-zero.
-choice
- prompt "Kernel page size"
- default PAGE_SIZE_4KB
-
-config PAGE_SIZE_4KB
- bool "4kB"
- depends on !CPU_LOONGSON2EF && !CPU_LOONGSON64
- help
- This option select the standard 4kB Linux page size. On some
- R3000-family processors this is the only available page size. Using
- 4kB page size will minimize memory consumption and is therefore
- recommended for low memory systems.
-
-config PAGE_SIZE_8KB
- bool "8kB"
- depends on CPU_CAVIUM_OCTEON
- depends on !MIPS_VA_BITS_48
- help
- Using 8kB page size will result in higher performance kernel at
- the price of higher memory consumption. This option is available
- only on cnMIPS processors. Note that you will need a suitable Linux
- distribution to support this.
-
-config PAGE_SIZE_16KB
- bool "16kB"
- depends on !CPU_R3000
- help
- Using 16kB page size will result in higher performance kernel at
- the price of higher memory consumption. This option is available on
- all non-R3000 family processors. Note that you will need a suitable
- Linux distribution to support this.
-
-config PAGE_SIZE_32KB
- bool "32kB"
- depends on CPU_CAVIUM_OCTEON
- depends on !MIPS_VA_BITS_48
- help
- Using 32kB page size will result in higher performance kernel at
- the price of higher memory consumption. This option is available
- only on cnMIPS cores. Note that you will need a suitable Linux
- distribution to support this.
-
-config PAGE_SIZE_64KB
- bool "64kB"
- depends on !CPU_R3000
- help
- Using 64kB page size will result in higher performance kernel at
- the price of higher memory consumption. This option is available on
- all non-R3000 family processor. Not that at the time of this
- writing this option is still high experimental.
-
-endchoice
-
config ARCH_FORCE_MAX_ORDER
int "Maximum zone order"
default "13" if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_64KB
diff --git a/arch/mips/alchemy/common/prom.c b/arch/mips/alchemy/common/prom.c
index b13d8adf3be4..20d30f6265cd 100644
--- a/arch/mips/alchemy/common/prom.c
+++ b/arch/mips/alchemy/common/prom.c
@@ -40,6 +40,7 @@
#include <linux/string.h>
#include <asm/bootinfo.h>
+#include <prom.h>
int prom_argc;
char **prom_argv;
diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index 2388d68786f4..a7a6d31a7a41 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c
@@ -30,13 +30,11 @@
#include <linux/mm.h>
#include <linux/dma-map-ops.h> /* for dma_default_coherent */
+#include <asm/bootinfo.h>
#include <asm/mipsregs.h>
#include <au1000.h>
-extern void __init board_setup(void);
-extern void __init alchemy_set_lpj(void);
-
static bool alchemy_dma_coherent(void)
{
switch (alchemy_get_cputype()) {
diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 01aff80a5967..99f321b6e417 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -702,7 +702,7 @@ static struct ssb_sprom bcm63xx_sprom = {
.boardflags_hi = 0x0000,
};
-int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+static int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
{
if (bus->bustype == SSB_BUSTYPE_PCI) {
memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom));
diff --git a/arch/mips/bcm63xx/dev-rng.c b/arch/mips/bcm63xx/dev-rng.c
index d277b4dc6c68..f94151f7c96f 100644
--- a/arch/mips/bcm63xx/dev-rng.c
+++ b/arch/mips/bcm63xx/dev-rng.c
@@ -26,7 +26,7 @@ static struct platform_device bcm63xx_rng_device = {
.resource = rng_resources,
};
-int __init bcm63xx_rng_register(void)
+static int __init bcm63xx_rng_register(void)
{
if (!BCMCPU_IS_6368())
return -ENODEV;
diff --git a/arch/mips/bcm63xx/dev-uart.c b/arch/mips/bcm63xx/dev-uart.c
index 3bc7f3bfc9ad..5d6bf0445b29 100644
--- a/arch/mips/bcm63xx/dev-uart.c
+++ b/arch/mips/bcm63xx/dev-uart.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <bcm63xx_cpu.h>
+#include <bcm63xx_dev_uart.h>
static struct resource uart0_resources[] = {
{
diff --git a/arch/mips/bcm63xx/dev-wdt.c b/arch/mips/bcm63xx/dev-wdt.c
index 42130914a3c2..302bf7ed5ad5 100644
--- a/arch/mips/bcm63xx/dev-wdt.c
+++ b/arch/mips/bcm63xx/dev-wdt.c
@@ -34,7 +34,7 @@ static struct platform_device bcm63xx_wdt_device = {
},
};
-int __init bcm63xx_wdt_register(void)
+static int __init bcm63xx_wdt_register(void)
{
wdt_resources[0].start = bcm63xx_regset_address(RSET_WDT);
wdt_resources[0].end = wdt_resources[0].start;
diff --git a/arch/mips/bcm63xx/irq.c b/arch/mips/bcm63xx/irq.c
index 2548013442f6..6240a8f88ea3 100644
--- a/arch/mips/bcm63xx/irq.c
+++ b/arch/mips/bcm63xx/irq.c
@@ -72,7 +72,7 @@ static inline int enable_irq_for_cpu(int cpu, struct irq_data *d,
*/
#define BUILD_IPIC_INTERNAL(width) \
-void __dispatch_internal_##width(int cpu) \
+static void __dispatch_internal_##width(int cpu) \
{ \
u32 pending[width / 32]; \
unsigned int src, tgt; \
diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c
index d811e3e03f81..c13ddb544a23 100644
--- a/arch/mips/bcm63xx/setup.c
+++ b/arch/mips/bcm63xx/setup.c
@@ -159,7 +159,7 @@ void __init plat_mem_setup(void)
board_setup();
}
-int __init bcm63xx_register_devices(void)
+static int __init bcm63xx_register_devices(void)
{
/* register gpiochip */
bcm63xx_gpio_init();
diff --git a/arch/mips/bcm63xx/timer.c b/arch/mips/bcm63xx/timer.c
index a86065854c0c..74b83807df30 100644
--- a/arch/mips/bcm63xx/timer.c
+++ b/arch/mips/bcm63xx/timer.c
@@ -178,7 +178,7 @@ int bcm63xx_timer_set(int id, int monotonic, unsigned int countdown_us)
EXPORT_SYMBOL(bcm63xx_timer_set);
-int bcm63xx_timer_init(void)
+static int bcm63xx_timer_init(void)
{
int ret, irq;
u32 reg;
diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c
index 2e099d55a564..9a266bf78339 100644
--- a/arch/mips/cobalt/setup.c
+++ b/arch/mips/cobalt/setup.c
@@ -23,9 +23,6 @@
#include <cobalt.h>
-extern void cobalt_machine_restart(char *command);
-extern void cobalt_machine_halt(void);
-
const char *get_system_type(void)
{
switch (cobalt_board_id) {
diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c
index 66188739f54d..fb78e6fd5de4 100644
--- a/arch/mips/fw/arc/memory.c
+++ b/arch/mips/fw/arc/memory.c
@@ -37,7 +37,7 @@ static unsigned int nr_prom_mem __initdata;
*/
#define ARC_PAGE_SHIFT 12
-struct linux_mdesc * __init ArcGetMemoryDescriptor(struct linux_mdesc *Current)
+static struct linux_mdesc * __init ArcGetMemoryDescriptor(struct linux_mdesc *Current)
{
return (struct linux_mdesc *) ARC_CALL1(get_mdesc, Current);
}
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h
index 4044eaf989ac..0921ddda11a4 100644
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -241,7 +241,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
" .set pop"
: "=&r" (sum), "=&r" (tmp)
: "r" (saddr), "r" (daddr),
- "0" (htonl(len)), "r" (htonl(proto)), "r" (sum));
+ "0" (htonl(len)), "r" (htonl(proto)), "r" (sum)
+ : "memory");
return csum_fold(sum);
}
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 081be98c71ef..ff5d388502d4 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -39,7 +39,7 @@ extern void jump_label_apply_nops(struct module *mod);
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\t" B_INSN " 2f\n\t"
+ asm goto("1:\t" B_INSN " 2f\n\t"
"2:\t.insn\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
@@ -53,7 +53,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t"
+ asm goto("1:\t" J_INSN " %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
".popsection\n\t"
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index a7eec3364a64..41546777902b 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -597,6 +597,9 @@
#include <asm/cpu.h>
+void alchemy_set_lpj(void);
+void board_setup(void);
+
/* helpers to access the SYS_* registers */
static inline unsigned long alchemy_rdsys(int regofs)
{
diff --git a/arch/mips/include/asm/mach-cobalt/cobalt.h b/arch/mips/include/asm/mach-cobalt/cobalt.h
index 5b9fce73f11d..97f9d5e9446d 100644
--- a/arch/mips/include/asm/mach-cobalt/cobalt.h
+++ b/arch/mips/include/asm/mach-cobalt/cobalt.h
@@ -19,4 +19,7 @@ extern int cobalt_board_id;
#define COBALT_BRD_ID_QUBE2 0x5
#define COBALT_BRD_ID_RAQ2 0x6
+void cobalt_machine_halt(void);
+void cobalt_machine_restart(char *command);
+
#endif /* __ASM_COBALT_H */
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index ef9585d96f6b..4609cb0326cf 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -17,21 +17,7 @@
/*
* PAGE_SHIFT determines the page size
*/
-#ifdef CONFIG_PAGE_SIZE_4KB
-#define PAGE_SHIFT 12
-#endif
-#ifdef CONFIG_PAGE_SIZE_8KB
-#define PAGE_SHIFT 13
-#endif
-#ifdef CONFIG_PAGE_SIZE_16KB
-#define PAGE_SHIFT 14
-#endif
-#ifdef CONFIG_PAGE_SIZE_32KB
-#define PAGE_SHIFT 15
-#endif
-#ifdef CONFIG_PAGE_SIZE_64KB
-#define PAGE_SHIFT 16
-#endif
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1))
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index daf3cf244ea9..d14d0e37ad02 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -60,6 +60,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
regs->cp0_epc = val;
+ regs->cp0_cause &= ~CAUSEF_BD;
}
/* Query offset/name of register from its name/offset */
@@ -154,6 +155,8 @@ static inline long regs_return_value(struct pt_regs *regs)
}
#define instruction_pointer(regs) ((regs)->cp0_epc)
+extern unsigned long exception_ip(struct pt_regs *regs);
+#define exception_ip(regs) exception_ip(regs)
#define profile_pc(regs) instruction_pointer(regs)
extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
diff --git a/arch/mips/include/asm/vdso.h b/arch/mips/include/asm/vdso.h
index cc7b516129a8..afb03d45bcd0 100644
--- a/arch/mips/include/asm/vdso.h
+++ b/arch/mips/include/asm/vdso.h
@@ -50,9 +50,4 @@ extern struct mips_vdso_image vdso_image_o32;
extern struct mips_vdso_image vdso_image_n32;
#endif
-union mips_vdso_data {
- struct vdso_data data[CS_BASES];
- u8 page[PAGE_SIZE];
-};
-
#endif /* __ASM_VDSO_H */
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 5582a4ca1e9e..7aa2c2360ff6 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -11,6 +11,7 @@
#include <asm/cpu-features.h>
#include <asm/cpu-info.h>
+#include <asm/fpu.h>
#ifdef CONFIG_MIPS_FP_SUPPORT
@@ -309,6 +310,11 @@ void mips_set_personality_nan(struct arch_elf_state *state)
struct cpuinfo_mips *c = &boot_cpu_data;
struct task_struct *t = current;
+ /* Do this early so t->thread.fpu.fcr31 won't be clobbered in case
+ * we are preempted before the lose_fpu(0) in start_thread.
+ */
+ lose_fpu(0);
+
t->thread.fpu.fcr31 = c->fpu_csr31;
switch (state->nan_2008) {
case 0:
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index d9df543f7e2c..59288c13b581 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -31,6 +31,7 @@
#include <linux/seccomp.h>
#include <linux/ftrace.h>
+#include <asm/branch.h>
#include <asm/byteorder.h>
#include <asm/cpu.h>
#include <asm/cpu-info.h>
@@ -48,6 +49,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
+unsigned long exception_ip(struct pt_regs *regs)
+{
+ return exception_epc(regs);
+}
+EXPORT_SYMBOL(exception_ip);
+
/*
* Called by kernel/ptrace.c when detaching..
*
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index dec6878b35f6..a1c1cb5de913 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2007,7 +2007,13 @@ unsigned long vi_handlers[64];
void reserve_exception_space(phys_addr_t addr, unsigned long size)
{
- memblock_reserve(addr, size);
+ /*
+ * reserve exception space on CPUs other than CPU0
+ * is too late, since memblock is unavailable when APs
+ * up
+ */
+ if (smp_processor_id() == 0)
+ memblock_reserve(addr, size);
}
void __init *set_except_vector(int n, void *addr)
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index f6d40e43f108..dda36fa26307 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -24,7 +24,7 @@
#include <vdso/vsyscall.h>
/* Kernel-provided data used by the VDSO. */
-static union mips_vdso_data mips_vdso_data __page_aligned_data;
+static union vdso_data_store mips_vdso_data __page_aligned_data;
struct vdso_data *vdso_data = mips_vdso_data.data;
/*
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
index a3cf29365858..0c45767eacf6 100644
--- a/arch/mips/lantiq/prom.c
+++ b/arch/mips/lantiq/prom.c
@@ -108,10 +108,9 @@ void __init prom_init(void)
prom_init_cmdline();
#if defined(CONFIG_MIPS_MT_SMP)
- if (cpu_has_mipsmt) {
- lantiq_smp_ops = vsmp_smp_ops;
+ lantiq_smp_ops = vsmp_smp_ops;
+ if (cpu_has_mipsmt)
lantiq_smp_ops.init_secondary = lantiq_init_secondary;
- register_smp_ops(&lantiq_smp_ops);
- }
+ register_smp_ops(&lantiq_smp_ops);
#endif
}
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
index f25caa6aa9d3..a35dd7311795 100644
--- a/arch/mips/loongson64/init.c
+++ b/arch/mips/loongson64/init.c
@@ -103,6 +103,9 @@ void __init szmem(unsigned int node)
if (loongson_sysconf.vgabios_addr)
memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr),
SZ_256K);
+ /* set nid for reserved memory */
+ memblock_set_node((u64)node << 44, (u64)(node + 1) << 44,
+ &memblock.reserved, node);
}
#ifndef CONFIG_NUMA
@@ -177,7 +180,7 @@ static int __init add_legacy_isa_io(struct fwnode_handle *fwnode, resource_size_
vaddr = PCI_IOBASE + range->io_start;
- ioremap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL));
+ vmap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL));
return 0;
}
diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c
index 8f61e93c0c5b..68dafd6d3e25 100644
--- a/arch/mips/loongson64/numa.c
+++ b/arch/mips/loongson64/numa.c
@@ -132,6 +132,8 @@ static void __init node_mem_init(unsigned int node)
/* Reserve pfn range 0~node[0]->node_start_pfn */
memblock_reserve(0, PAGE_SIZE * start_pfn);
+ /* set nid for reserved memory on node 0 */
+ memblock_set_node(0, 1ULL << 44, &memblock.reserved, 0);
}
}
diff --git a/arch/mips/sgi-ip27/Makefile b/arch/mips/sgi-ip27/Makefile
index 27c14ede191e..9877fcc512b1 100644
--- a/arch/mips/sgi-ip27/Makefile
+++ b/arch/mips/sgi-ip27/Makefile
@@ -5,7 +5,7 @@
obj-y := ip27-berr.o ip27-irq.o ip27-init.o ip27-klconfig.o \
ip27-klnuma.o ip27-memory.o ip27-nmi.o ip27-reset.o ip27-timer.o \
- ip27-hubio.o ip27-xtalk.o
+ ip27-xtalk.o
obj-$(CONFIG_EARLY_PRINTK) += ip27-console.o
obj-$(CONFIG_SMP) += ip27-smp.o
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index 923a63a51cda..9eb497cb5d52 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -22,6 +22,8 @@
#include <asm/traps.h>
#include <linux/uaccess.h>
+#include "ip27-common.h"
+
static void dump_hub_information(unsigned long errst0, unsigned long errst1)
{
static char *err_type[2][8] = {
@@ -57,7 +59,7 @@ static void dump_hub_information(unsigned long errst0, unsigned long errst1)
[st0.pi_stat0_fmt.s0_err_type] ? : "invalid");
}
-int ip27_be_handler(struct pt_regs *regs, int is_fixup)
+static int ip27_be_handler(struct pt_regs *regs, int is_fixup)
{
unsigned long errst0, errst1;
int data = regs->cp0_cause & 4;
diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h
index ed008a08464c..a0059fa13934 100644
--- a/arch/mips/sgi-ip27/ip27-common.h
+++ b/arch/mips/sgi-ip27/ip27-common.h
@@ -10,6 +10,7 @@ extern void hub_rt_clock_event_init(void);
extern void hub_rtc_init(nasid_t nasid);
extern void install_cpu_nmi_handler(int slice);
extern void install_ipi(void);
+extern void ip27_be_init(void);
extern void ip27_reboot_setup(void);
extern const struct plat_smp_ops ip27_smp_ops;
extern unsigned long node_getfirstfree(nasid_t nasid);
@@ -17,4 +18,5 @@ extern void per_cpu_init(void);
extern void replicate_kernel_text(void);
extern void setup_replication_mask(void);
+
#endif /* __IP27_COMMON_H */
diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c
deleted file mode 100644
index c57f0d8f3218..000000000000
--- a/arch/mips/sgi-ip27/ip27-hubio.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 1992-1997, 2000-2003 Silicon Graphics, Inc.
- * Copyright (C) 2004 Christoph Hellwig.
- *
- * Support functions for the HUB ASIC - mostly PIO mapping related.
- */
-
-#include <linux/bitops.h>
-#include <linux/string.h>
-#include <linux/mmzone.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/arch.h>
-#include <asm/sn/agent.h>
-#include <asm/sn/io.h>
-#include <asm/xtalk/xtalk.h>
-
-
-static int force_fire_and_forget = 1;
-
-/**
- * hub_pio_map - establish a HUB PIO mapping
- *
- * @nasid: nasid to perform PIO mapping on
- * @widget: widget ID to perform PIO mapping for
- * @xtalk_addr: xtalk_address that needs to be mapped
- * @size: size of the PIO mapping
- *
- **/
-unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget,
- unsigned long xtalk_addr, size_t size)
-{
- unsigned i;
-
- /* use small-window mapping if possible */
- if ((xtalk_addr % SWIN_SIZE) + size <= SWIN_SIZE)
- return NODE_SWIN_BASE(nasid, widget) + (xtalk_addr % SWIN_SIZE);
-
- if ((xtalk_addr % BWIN_SIZE) + size > BWIN_SIZE) {
- printk(KERN_WARNING "PIO mapping at hub %d widget %d addr 0x%lx"
- " too big (%ld)\n",
- nasid, widget, xtalk_addr, size);
- return 0;
- }
-
- xtalk_addr &= ~(BWIN_SIZE-1);
- for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) {
- if (test_and_set_bit(i, hub_data(nasid)->h_bigwin_used))
- continue;
-
- /*
- * The code below does a PIO write to setup an ITTE entry.
- *
- * We need to prevent other CPUs from seeing our updated
- * memory shadow of the ITTE (in the piomap) until the ITTE
- * entry is actually set up; otherwise, another CPU might
- * attempt a PIO prematurely.
- *
- * Also, the only way we can know that an entry has been
- * received by the hub and can be used by future PIO reads/
- * writes is by reading back the ITTE entry after writing it.
- *
- * For these two reasons, we PIO read back the ITTE entry
- * after we write it.
- */
- IIO_ITTE_PUT(nasid, i, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr);
- __raw_readq(IIO_ITTE_GET(nasid, i));
-
- return NODE_BWIN_BASE(nasid, widget) + (xtalk_addr % BWIN_SIZE);
- }
-
- printk(KERN_WARNING "unable to establish PIO mapping for at"
- " hub %d widget %d addr 0x%lx\n",
- nasid, widget, xtalk_addr);
- return 0;
-}
-
-
-/*
- * hub_setup_prb(nasid, prbnum, credits, conveyor)
- *
- * Put a PRB into fire-and-forget mode if conveyor isn't set. Otherwise,
- * put it into conveyor belt mode with the specified number of credits.
- */
-static void hub_setup_prb(nasid_t nasid, int prbnum, int credits)
-{
- union iprb_u prb;
- int prb_offset;
-
- /*
- * Get the current register value.
- */
- prb_offset = IIO_IOPRB(prbnum);
- prb.iprb_regval = REMOTE_HUB_L(nasid, prb_offset);
-
- /*
- * Clear out some fields.
- */
- prb.iprb_ovflow = 1;
- prb.iprb_bnakctr = 0;
- prb.iprb_anakctr = 0;
-
- /*
- * Enable or disable fire-and-forget mode.
- */
- prb.iprb_ff = force_fire_and_forget ? 1 : 0;
-
- /*
- * Set the appropriate number of PIO credits for the widget.
- */
- prb.iprb_xtalkctr = credits;
-
- /*
- * Store the new value to the register.
- */
- REMOTE_HUB_S(nasid, prb_offset, prb.iprb_regval);
-}
-
-/**
- * hub_set_piomode - set pio mode for a given hub
- *
- * @nasid: physical node ID for the hub in question
- *
- * Put the hub into either "PIO conveyor belt" mode or "fire-and-forget" mode.
- * To do this, we have to make absolutely sure that no PIOs are in progress
- * so we turn off access to all widgets for the duration of the function.
- *
- * XXX - This code should really check what kind of widget we're talking
- * to. Bridges can only handle three requests, but XG will do more.
- * How many can crossbow handle to widget 0? We're assuming 1.
- *
- * XXX - There is a bug in the crossbow that link reset PIOs do not
- * return write responses. The easiest solution to this problem is to
- * leave widget 0 (xbow) in fire-and-forget mode at all times. This
- * only affects pio's to xbow registers, which should be rare.
- **/
-static void hub_set_piomode(nasid_t nasid)
-{
- u64 ii_iowa;
- union hubii_wcr_u ii_wcr;
- unsigned i;
-
- ii_iowa = REMOTE_HUB_L(nasid, IIO_OUTWIDGET_ACCESS);
- REMOTE_HUB_S(nasid, IIO_OUTWIDGET_ACCESS, 0);
-
- ii_wcr.wcr_reg_value = REMOTE_HUB_L(nasid, IIO_WCR);
-
- if (ii_wcr.iwcr_dir_con) {
- /*
- * Assume a bridge here.
- */
- hub_setup_prb(nasid, 0, 3);
- } else {
- /*
- * Assume a crossbow here.
- */
- hub_setup_prb(nasid, 0, 1);
- }
-
- /*
- * XXX - Here's where we should take the widget type into
- * when account assigning credits.
- */
- for (i = HUB_WIDGET_ID_MIN; i <= HUB_WIDGET_ID_MAX; i++)
- hub_setup_prb(nasid, i, 3);
-
- REMOTE_HUB_S(nasid, IIO_OUTWIDGET_ACCESS, ii_iowa);
-}
-
-/*
- * hub_pio_init - PIO-related hub initialization
- *
- * @hub: hubinfo structure for our hub
- */
-void hub_pio_init(nasid_t nasid)
-{
- unsigned i;
-
- /* initialize big window piomaps for this hub */
- bitmap_zero(hub_data(nasid)->h_bigwin_used, HUB_NUM_BIG_WINDOW);
- for (i = 0; i < HUB_NUM_BIG_WINDOW; i++)
- IIO_ITTE_DISABLE(nasid, i);
-
- hub_set_piomode(nasid);
-}
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index a0dd3bd2b81b..8f5299b269e7 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -23,6 +23,8 @@
#include <asm/sn/intr.h>
#include <asm/sn/irq_alloc.h>
+#include "ip27-common.h"
+
struct hub_irq_data {
u64 *irq_mask[2];
cpuid_t cpu;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index f79c48393716..b8ca94cfb4fe 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -23,6 +23,7 @@
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
+#include <asm/sgialib.h>
#include <asm/sn/arch.h>
#include <asm/sn/agent.h>
diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c
index 84889b57d5ff..fc2816398d0c 100644
--- a/arch/mips/sgi-ip27/ip27-nmi.c
+++ b/arch/mips/sgi-ip27/ip27-nmi.c
@@ -11,6 +11,8 @@
#include <asm/sn/arch.h>
#include <asm/sn/agent.h>
+#include "ip27-common.h"
+
#if 0
#define NODE_NUM_CPUS(n) CNODE_NUM_CPUS(n)
#else
@@ -23,16 +25,7 @@
typedef unsigned long machreg_t;
static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-
-/*
- * Let's see what else we need to do here. Set up sp, gp?
- */
-void nmi_dump(void)
-{
- void cont_nmi_dump(void);
-
- cont_nmi_dump();
-}
+static void nmi_dump(void);
void install_cpu_nmi_handler(int slice)
{
@@ -53,7 +46,7 @@ void install_cpu_nmi_handler(int slice)
* into the eframe format for the node under consideration.
*/
-void nmi_cpu_eframe_save(nasid_t nasid, int slice)
+static void nmi_cpu_eframe_save(nasid_t nasid, int slice)
{
struct reg_struct *nr;
int i;
@@ -129,7 +122,7 @@ void nmi_cpu_eframe_save(nasid_t nasid, int slice)
pr_emerg("\n");
}
-void nmi_dump_hub_irq(nasid_t nasid, int slice)
+static void nmi_dump_hub_irq(nasid_t nasid, int slice)
{
u64 mask0, mask1, pend0, pend1;
@@ -153,7 +146,7 @@ void nmi_dump_hub_irq(nasid_t nasid, int slice)
* Copy the cpu registers which have been saved in the IP27prom format
* into the eframe format for the node under consideration.
*/
-void nmi_node_eframe_save(nasid_t nasid)
+static void nmi_node_eframe_save(nasid_t nasid)
{
int slice;
@@ -170,8 +163,7 @@ void nmi_node_eframe_save(nasid_t nasid)
/*
* Save the nmi cpu registers for all cpus in the system.
*/
-void
-nmi_eframes_save(void)
+static void nmi_eframes_save(void)
{
nasid_t nasid;
@@ -179,8 +171,7 @@ nmi_eframes_save(void)
nmi_node_eframe_save(nasid);
}
-void
-cont_nmi_dump(void)
+static void nmi_dump(void)
{
#ifndef REAL_NMI_SIGNAL
static atomic_t nmied_cpus = ATOMIC_INIT(0);
diff --git a/arch/mips/sgi-ip30/ip30-console.c b/arch/mips/sgi-ip30/ip30-console.c
index b91f8c4fdc78..7c6dcf6e73f7 100644
--- a/arch/mips/sgi-ip30/ip30-console.c
+++ b/arch/mips/sgi-ip30/ip30-console.c
@@ -3,6 +3,7 @@
#include <linux/io.h>
#include <asm/sn/ioc3.h>
+#include <asm/setup.h>
static inline struct ioc3_uartregs *console_uart(void)
{
diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c
index 75a34684e704..e8547636a748 100644
--- a/arch/mips/sgi-ip30/ip30-setup.c
+++ b/arch/mips/sgi-ip30/ip30-setup.c
@@ -14,6 +14,7 @@
#include <linux/percpu.h>
#include <linux/memblock.h>
+#include <asm/bootinfo.h>
#include <asm/smp-ops.h>
#include <asm/sgialib.h>
#include <asm/time.h>
diff --git a/arch/mips/sgi-ip32/crime.c b/arch/mips/sgi-ip32/crime.c
index a8e0c776ca6c..b8a0e4cfa9ce 100644
--- a/arch/mips/sgi-ip32/crime.c
+++ b/arch/mips/sgi-ip32/crime.c
@@ -18,6 +18,8 @@
#include <asm/ip32/crime.h>
#include <asm/ip32/mace.h>
+#include "ip32-common.h"
+
struct sgi_crime __iomem *crime;
struct sgi_mace __iomem *mace;
@@ -39,7 +41,7 @@ void __init crime_init(void)
id, rev, field, (unsigned long) CRIME_BASE);
}
-irqreturn_t crime_memerr_intr(unsigned int irq, void *dev_id)
+irqreturn_t crime_memerr_intr(int irq, void *dev_id)
{
unsigned long stat, addr;
int fatal = 0;
@@ -90,7 +92,7 @@ irqreturn_t crime_memerr_intr(unsigned int irq, void *dev_id)
return IRQ_HANDLED;
}
-irqreturn_t crime_cpuerr_intr(unsigned int irq, void *dev_id)
+irqreturn_t crime_cpuerr_intr(int irq, void *dev_id)
{
unsigned long stat = crime->cpu_error_stat & CRIME_CPU_ERROR_MASK;
unsigned long addr = crime->cpu_error_addr & CRIME_CPU_ERROR_ADDR_MASK;
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index 478b63b4c808..7cbc27941f92 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -18,6 +18,8 @@
#include <asm/ptrace.h>
#include <asm/tlbdebug.h>
+#include "ip32-common.h"
+
static int ip32_be_handler(struct pt_regs *regs, int is_fixup)
{
int data = regs->cp0_cause & 4;
diff --git a/arch/mips/sgi-ip32/ip32-common.h b/arch/mips/sgi-ip32/ip32-common.h
new file mode 100644
index 000000000000..cfc0225b1419
--- /dev/null
+++ b/arch/mips/sgi-ip32/ip32-common.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __IP32_COMMON_H
+#define __IP32_COMMON_H
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+
+void __init crime_init(void);
+irqreturn_t crime_memerr_intr(int irq, void *dev_id);
+irqreturn_t crime_cpuerr_intr(int irq, void *dev_id);
+void __init ip32_be_init(void);
+void ip32_prepare_poweroff(void);
+
+#endif /* __IP32_COMMON_H */
diff --git a/arch/mips/sgi-ip32/ip32-irq.c b/arch/mips/sgi-ip32/ip32-irq.c
index e21ea1de05e3..29d04468a06b 100644
--- a/arch/mips/sgi-ip32/ip32-irq.c
+++ b/arch/mips/sgi-ip32/ip32-irq.c
@@ -28,6 +28,8 @@
#include <asm/ip32/mace.h>
#include <asm/ip32/ip32_ints.h>
+#include "ip32-common.h"
+
/* issue a PIO read to make sure no PIO writes are pending */
static inline void flush_crime_bus(void)
{
@@ -107,10 +109,6 @@ static inline void flush_mace_bus(void)
* is quite different anyway.
*/
-/* Some initial interrupts to set up */
-extern irqreturn_t crime_memerr_intr(int irq, void *dev_id);
-extern irqreturn_t crime_cpuerr_intr(int irq, void *dev_id);
-
/*
* This is for pure CRIME interrupts - ie not MACE. The advantage?
* We get to split the register in half and do faster lookups.
diff --git a/arch/mips/sgi-ip32/ip32-memory.c b/arch/mips/sgi-ip32/ip32-memory.c
index 3fc8d0a0bdfa..5fee33744f67 100644
--- a/arch/mips/sgi-ip32/ip32-memory.c
+++ b/arch/mips/sgi-ip32/ip32-memory.c
@@ -15,6 +15,7 @@
#include <asm/ip32/crime.h>
#include <asm/bootinfo.h>
#include <asm/page.h>
+#include <asm/sgialib.h>
extern void crime_init(void);
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 18d1c115cd53..6bdc1421cda4 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -29,6 +29,8 @@
#include <asm/ip32/crime.h>
#include <asm/ip32/ip32_ints.h>
+#include "ip32-common.h"
+
#define POWERDOWN_TIMEOUT 120
/*
* Blink frequency during reboot grace period and when panicked.
diff --git a/arch/mips/sgi-ip32/ip32-setup.c b/arch/mips/sgi-ip32/ip32-setup.c
index 8019dae1721a..aeb0805aae57 100644
--- a/arch/mips/sgi-ip32/ip32-setup.c
+++ b/arch/mips/sgi-ip32/ip32-setup.c
@@ -26,8 +26,7 @@
#include <asm/ip32/mace.h>
#include <asm/ip32/ip32_ints.h>
-extern void ip32_be_init(void);
-extern void crime_init(void);
+#include "ip32-common.h"
#ifdef CONFIG_SGI_O2MACE_ETH
/*
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 58d9565dc2c7..79d3039b29f1 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -15,6 +15,7 @@ config NIOS2
select GENERIC_IRQ_SHOW
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_KGDB
+ select HAVE_PAGE_SIZE_4KB
select IRQ_DOMAIN
select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h
index 0ae7d9ce369b..0722f88e63cc 100644
--- a/arch/nios2/include/asm/page.h
+++ b/arch/nios2/include/asm/page.h
@@ -21,7 +21,7 @@
/*
* PAGE_SHIFT determines the page size
*/
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE - 1))
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index fd9bb76a610b..3586cda55bde 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -25,6 +25,7 @@ config OPENRISC
select GENERIC_CPU_DEVICES
select HAVE_PCI
select HAVE_UID16
+ select HAVE_PAGE_SIZE_8KB
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_SMP_IDLE_THREAD
diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h
index 44fc1fd56717..1d5913f67c31 100644
--- a/arch/openrisc/include/asm/page.h
+++ b/arch/openrisc/include/asm/page.h
@@ -18,7 +18,7 @@
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 13
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#ifdef __ASSEMBLY__
#define PAGE_SIZE (1 << PAGE_SHIFT)
#else
@@ -77,11 +77,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
return __pa(kaddr) >> PAGE_SHIFT;
}
-static inline void * pfn_to_virt(unsigned long pfn)
-{
- return (void *)((unsigned long)__va(pfn) << PAGE_SHIFT);
-}
-
#define virt_to_page(addr) \
(mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c
index 1c5a2d71d675..86da4bc5ee0b 100644
--- a/arch/openrisc/kernel/smp.c
+++ b/arch/openrisc/kernel/smp.c
@@ -57,10 +57,6 @@ static void boot_secondary(unsigned int cpu, struct task_struct *idle)
spin_unlock(&boot_lock);
}
-void __init smp_prepare_boot_cpu(void)
-{
-}
-
void __init smp_init_cpus(void)
{
struct device_node *cpu;
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index d14ccc948a29..052d27fbea15 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -12,7 +12,7 @@ config PARISC
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
- select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_UBSAN
select ARCH_HAS_PTE_SPECIAL
select ARCH_NO_SG_CHAIN
select ARCH_SUPPORTS_HUGETLBFS if PA20
@@ -25,7 +25,6 @@ config PARISC
select RTC_DRV_GENERIC
select INIT_ALL_POSSIBLE
select BUG
- select BUILDTIME_TABLE_SORT
select HAVE_KERNEL_UNCOMPRESSED
select HAVE_PCI
select HAVE_PERF_EVENTS
@@ -274,6 +273,7 @@ choice
config PARISC_PAGE_SIZE_4KB
bool "4KB"
+ select HAVE_PAGE_SIZE_4KB
help
This lets you select the page size of the kernel. For best
performance, a page size of 16KB is recommended. For best
@@ -289,10 +289,12 @@ config PARISC_PAGE_SIZE_4KB
config PARISC_PAGE_SIZE_16KB
bool "16KB"
+ select HAVE_PAGE_SIZE_16KB
depends on PA8X00 && BROKEN && !KFENCE
config PARISC_PAGE_SIZE_64KB
bool "64KB"
+ select HAVE_PAGE_SIZE_64KB
depends on PA8X00 && BROKEN && !KFENCE
endchoice
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 920db57b6b4c..316f84f1d15c 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -50,12 +50,12 @@ export CROSS32CC
# Set default cross compiler for kernel build
ifdef cross_compiling
- ifeq ($(CROSS_COMPILE),)
+ ifeq ($(CROSS_COMPILE),)
CC_SUFFIXES = linux linux-gnu unknown-linux-gnu suse-linux
CROSS_COMPILE := $(call cc-cross-prefix, \
$(foreach a,$(CC_ARCHES), \
$(foreach s,$(CC_SUFFIXES),$(a)-$(s)-)))
- endif
+ endif
endif
ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 74d17d7e759d..5937d5edaba1 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -576,6 +576,7 @@
.section __ex_table,"aw" ! \
.align 4 ! \
.word (fault_addr - .), (except_addr - .) ! \
+ or %r0,%r0,%r0 ! \
.previous
diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h
new file mode 100644
index 000000000000..4ea23e3d79dc
--- /dev/null
+++ b/arch/parisc/include/asm/extable.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PARISC_EXTABLE_H
+#define __PARISC_EXTABLE_H
+
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+
+/*
+ * The exception table consists of three addresses:
+ *
+ * - A relative address to the instruction that is allowed to fault.
+ * - A relative address at which the program should continue (fixup routine)
+ * - An asm statement which specifies which CPU register will
+ * receive -EFAULT when an exception happens if the lowest bit in
+ * the fixup address is set.
+ *
+ * Note: The register specified in the err_opcode instruction will be
+ * modified at runtime if a fault happens. Register %r0 will be ignored.
+ *
+ * Since relative addresses are used, 32bit values are sufficient even on
+ * 64bit kernel.
+ */
+
+struct pt_regs;
+int fixup_exception(struct pt_regs *regs);
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+struct exception_table_entry {
+ int insn; /* relative address of insn that is allowed to fault. */
+ int fixup; /* relative address of fixup routine */
+ int err_opcode; /* sample opcode with register which holds error code */
+};
+
+#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\
+ ".section __ex_table,\"aw\"\n" \
+ ".align 4\n" \
+ ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \
+ opcode "\n" \
+ ".previous\n"
+
+/*
+ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
+ * (with lowest bit set) for which the fault handler in fixup_exception() will
+ * load -EFAULT on fault into the register specified by the err_opcode instruction,
+ * and zeroes the target register in case of a read fault in get_user().
+ */
+#define ASM_EXCEPTIONTABLE_VAR(__err_var) \
+ int __err_var = 0
+#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\
+ ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register)
+
+static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+ struct exception_table_entry *b,
+ struct exception_table_entry tmp,
+ int delta)
+{
+ a->fixup = b->fixup + delta;
+ b->fixup = tmp.fixup - delta;
+ a->err_opcode = b->err_opcode;
+ b->err_opcode = tmp.err_opcode;
+}
+#define swap_ex_entry_fixup swap_ex_entry_fixup
+
+#endif
diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h
index 94428798b6aa..317ebc5edc9f 100644
--- a/arch/parisc/include/asm/jump_label.h
+++ b/arch/parisc/include/asm/jump_label.h
@@ -12,7 +12,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".align %1\n\t"
@@ -29,7 +29,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"b,n %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".align %1\n\t"
diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h
index 0a175ac87698..0f42f5c8e3b6 100644
--- a/arch/parisc/include/asm/kprobes.h
+++ b/arch/parisc/include/asm/kprobes.h
@@ -10,9 +10,10 @@
#ifndef _PARISC_KPROBES_H
#define _PARISC_KPROBES_H
+#include <asm-generic/kprobes.h>
+
#ifdef CONFIG_KPROBES
-#include <asm-generic/kprobes.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/notifier.h>
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 667e703c0e8f..ad4e15d12ed1 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -4,15 +4,7 @@
#include <linux/const.h>
-#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
-# define PAGE_SHIFT 12
-#elif defined(CONFIG_PARISC_PAGE_SIZE_16KB)
-# define PAGE_SHIFT 14
-#elif defined(CONFIG_PARISC_PAGE_SIZE_64KB)
-# define PAGE_SHIFT 16
-#else
-# error "unknown default kernel page size"
-#endif
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
index c822bd0c0e3c..51f40eaf7780 100644
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -8,7 +8,8 @@
"copy %%r0,%0\n" \
"8:\tlpa %%r0(%1),%0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \
+ "or %%r0,%%r0,%%r0") \
: "=&r" (pa) \
: "r" (va) \
: "memory" \
@@ -22,7 +23,8 @@
"copy %%r0,%0\n" \
"8:\tlpa %%r0(%%sr3,%1),%0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \
+ "or %%r0,%%r0,%%r0") \
: "=&r" (pa) \
: "r" (va) \
: "memory" \
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 4165079898d9..88d0ae5769dd 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -7,6 +7,7 @@
*/
#include <asm/page.h>
#include <asm/cache.h>
+#include <asm/extable.h>
#include <linux/bug.h>
#include <linux/string.h>
@@ -26,37 +27,6 @@
#define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr)
#endif
-/*
- * The exception table contains two values: the first is the relative offset to
- * the address of the instruction that is allowed to fault, and the second is
- * the relative offset to the address of the fixup routine. Since relative
- * addresses are used, 32bit values are sufficient even on 64bit kernel.
- */
-
-#define ARCH_HAS_RELATIVE_EXTABLE
-struct exception_table_entry {
- int insn; /* relative address of insn that is allowed to fault. */
- int fixup; /* relative address of fixup routine */
-};
-
-#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
- ".section __ex_table,\"aw\"\n" \
- ".align 4\n" \
- ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
- ".previous\n"
-
-/*
- * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
- * (with lowest bit set) for which the fault handler in fixup_exception() will
- * load -EFAULT into %r29 for a read or write fault, and zeroes the target
- * register in case of a read fault in get_user().
- */
-#define ASM_EXCEPTIONTABLE_REG 29
-#define ASM_EXCEPTIONTABLE_VAR(__variable) \
- register long __variable __asm__ ("r29") = 0
-#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
- ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
-
#define __get_user_internal(sr, val, ptr) \
({ \
ASM_EXCEPTIONTABLE_VAR(__gu_err); \
@@ -83,7 +53,7 @@ struct exception_table_entry {
\
__asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \
: "=r"(__gu_val), "+r"(__gu_err) \
: "i"(sr), "r"(ptr)); \
\
@@ -115,8 +85,8 @@ struct exception_table_entry {
"1: ldw 0(%%sr%2,%3),%0\n" \
"2: ldw 4(%%sr%2,%3),%R0\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \
: "=&r"(__gu_tmp.l), "+r"(__gu_err) \
: "i"(sr), "r"(ptr)); \
\
@@ -174,7 +144,7 @@ struct exception_table_entry {
__asm__ __volatile__ ( \
"1: " stx " %1,0(%%sr%2,%3)\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \
: "+r"(__pu_err) \
: "r"(x), "i"(sr), "r"(ptr))
@@ -186,15 +156,14 @@ struct exception_table_entry {
"1: stw %1,0(%%sr%2,%3)\n" \
"2: stw %R1,4(%%sr%2,%3)\n" \
"9:\n" \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \
: "+r"(__pu_err) \
: "r"(__val), "i"(sr), "r"(ptr)); \
} while (0)
#endif /* !defined(CONFIG_64BIT) */
-
/*
* Complex access routines -- external declarations
*/
@@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src,
#define INLINE_COPY_TO_USER
#define INLINE_COPY_FROM_USER
-struct pt_regs;
-int fixup_exception(struct pt_regs *regs);
-
#endif /* __PARISC_UACCESS_H */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 268d90a9325b..422f3e1e6d9c 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -58,7 +58,7 @@ int pa_serialize_tlb_flushes __ro_after_init;
struct pdc_cache_info cache_info __ro_after_init;
#ifndef CONFIG_PA20
-struct pdc_btlb_info btlb_info __ro_after_init;
+struct pdc_btlb_info btlb_info;
#endif
DEFINE_STATIC_KEY_TRUE(parisc_has_cache);
@@ -264,6 +264,10 @@ parisc_cache_init(void)
icache_stride = CAFL_STRIDE(cache_info.ic_conf);
#undef CAFL_STRIDE
+ /* stride needs to be non-zero, otherwise cache flushes will not work */
+ WARN_ON(cache_info.dc_size && dcache_stride == 0);
+ WARN_ON(cache_info.ic_size && icache_stride == 0);
+
if ((boot_cpu_data.pdc.capabilities & PDC_MODEL_NVA_MASK) ==
PDC_MODEL_NVA_UNSUPPORTED) {
printk(KERN_WARNING "parisc_cache_init: Only equivalent aliasing supported!\n");
@@ -850,7 +854,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
#endif
" fic,m %3(%4,%0)\n"
"2: sync\n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1")
: "+r" (start), "+r" (error)
: "r" (end), "r" (dcache_stride), "i" (SR_USER));
}
@@ -865,7 +869,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
#endif
" fdc,m %3(%4,%0)\n"
"2: sync\n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1")
: "+r" (start), "+r" (error)
: "r" (end), "r" (icache_stride), "i" (SR_USER));
}
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index 25f9b9e9d6df..c7ff339732ba 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -742,7 +742,7 @@ parse_tree_node(struct device *parent, int index, struct hardware_path *modpath)
};
if (device_for_each_child(parent, &recurse_data, descend_children))
- { /* nothing */ };
+ { /* nothing */ }
return d.dev;
}
@@ -1004,6 +1004,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data)
pr_info("\n");
+ /* Prevent hung task messages when printing on serial console */
+ cond_resched();
+
pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n",
hpa, parisc_hardware_description(&dev->id));
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index d1defb9ede70..621a4b386ae4 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -78,7 +78,7 @@ asmlinkage void notrace __hot ftrace_function_trampoline(unsigned long parent,
#endif
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
int ftrace_enable_ftrace_graph_caller(void)
{
static_key_enable(&ftrace_graph_enable.key);
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index e95a977ba5f3..bf73562706b2 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -172,7 +172,6 @@ static int __init processor_probe(struct parisc_device *dev)
p->cpu_num = cpu_info.cpu_num;
p->cpu_loc = cpu_info.cpu_loc;
- set_cpu_possible(cpuid, true);
store_cpu_topology(cpuid);
#ifdef CONFIG_SMP
@@ -474,13 +473,6 @@ static struct parisc_driver cpu_driver __refdata = {
*/
void __init processor_init(void)
{
- unsigned int cpu;
-
reset_cpu_topology();
-
- /* reset possible mask. We will mark those which are possible. */
- for_each_possible_cpu(cpu)
- set_cpu_possible(cpu, false);
-
register_parisc_driver(&cpu_driver);
}
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index ce25acfe4889..c520e551a165 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -120,8 +120,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
"2: ldbs 1(%%sr1,%3), %0\n"
" depw %2, 23, 24, %0\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
: "+r" (val), "+r" (ret), "=&r" (temp1)
: "r" (saddr), "r" (regs->isr) );
@@ -152,8 +152,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
" mtctl %2,11\n"
" vshd %0,%3,%0\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
: "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2)
: "r" (saddr), "r" (regs->isr) );
@@ -189,8 +189,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
" mtsar %%r19\n"
" shrpd %0,%%r20,%%sar,%0\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
: "=r" (val), "+r" (ret)
: "0" (val), "r" (saddr), "r" (regs->isr)
: "r19", "r20" );
@@ -209,9 +209,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
" vshd %0,%R0,%0\n"
" vshd %R0,%4,%R0\n"
"4: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
: "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
: "r" (regs->isr) );
}
@@ -244,8 +244,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg)
"1: stb %1, 0(%%sr1, %3)\n"
"2: stb %2, 1(%%sr1, %3)\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0")
: "+r" (ret), "=&r" (temp1)
: "r" (val), "r" (regs->ior), "r" (regs->isr) );
@@ -285,8 +285,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
" stw %%r20,0(%%sr1,%2)\n"
" stw %%r21,4(%%sr1,%2)\n"
"3: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0")
: "+r" (ret)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
: "r19", "r20", "r21", "r22", "r1" );
@@ -329,10 +329,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
"3: std %%r20,0(%%sr1,%2)\n"
"4: std %%r21,8(%%sr1,%2)\n"
"5: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0")
: "+r" (ret)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
: "r19", "r20", "r21", "r22", "r1" );
@@ -357,11 +357,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
"4: stw %%r1,4(%%sr1,%2)\n"
"5: stw %R1,8(%%sr1,%2)\n"
"6: \n"
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b)
- ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0")
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0")
: "+r" (ret)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
: "r19", "r20", "r21", "r1" );
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 27ae40a443b8..f7e0fee5ee55 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -228,10 +228,8 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
#ifdef CONFIG_IRQSTACKS
extern void * const _call_on_stack;
#endif /* CONFIG_IRQSTACKS */
- void *ptr;
- ptr = dereference_kernel_function_descriptor(&handle_interruption);
- if (pc_is_kernel_fn(pc, ptr)) {
+ if (pc_is_kernel_fn(pc, handle_interruption)) {
struct pt_regs *regs = (struct pt_regs *)(info->sp - frame_size - PT_SZ_ALGN);
dbg("Unwinding through handle_interruption()\n");
info->prev_sp = regs->gr[30];
@@ -239,13 +237,13 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
return 1;
}
- if (pc_is_kernel_fn(pc, ret_from_kernel_thread) ||
- pc_is_kernel_fn(pc, syscall_exit)) {
+ if (pc == (unsigned long)&ret_from_kernel_thread ||
+ pc == (unsigned long)&syscall_exit) {
info->prev_sp = info->prev_ip = 0;
return 1;
}
- if (pc_is_kernel_fn(pc, intr_return)) {
+ if (pc == (unsigned long)&intr_return) {
struct pt_regs *regs;
dbg("Found intr_return()\n");
@@ -257,14 +255,14 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
}
if (pc_is_kernel_fn(pc, _switch_to) ||
- pc_is_kernel_fn(pc, _switch_to_ret)) {
+ pc == (unsigned long)&_switch_to_ret) {
info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE;
info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET);
return 1;
}
#ifdef CONFIG_IRQSTACKS
- if (pc_is_kernel_fn(pc, _call_on_stack)) {
+ if (pc == (unsigned long)&_call_on_stack) {
info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ);
info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET);
return 1;
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 548051b0b4af..b445e47903cf 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -127,7 +127,7 @@ SECTIONS
}
#endif
- RO_DATA(8)
+ RO_DATA(PAGE_SIZE)
/* unwind info */
. = ALIGN(4);
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 2fe5b44986e0..c39de84e98b0 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs)
* Fix up get_user() and put_user().
* ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
* bit in the relative address of the fixup routine to indicate
- * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with
- * -EFAULT to report a userspace access error.
+ * that the register encoded in the "or %r0,%r0,register"
+ * opcode should be loaded with -EFAULT to report a userspace
+ * access error.
*/
if (fix->fixup & 1) {
- regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT;
+ int fault_error_reg = fix->err_opcode & 0x1f;
+ if (!WARN_ON(!fault_error_reg))
+ regs->gr[fault_error_reg] = -EFAULT;
+ pr_debug("Unalignment fixup of register %d at %pS\n",
+ fault_error_reg, (void*)regs->iaoq[0]);
/* zero target register for get_user() */
if (parisc_acctyp(0, regs->iir) == VM_READ) {
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b9fc064d38d2..a91cb070ca4a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -154,7 +154,7 @@ config PPC
select ARCH_HAS_SYSCALL_WRAPPER if !SPU_BASE && !COMPAT
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
- select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_UBSAN
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if PPC_RADIX_MMU
@@ -212,7 +212,7 @@ config PPC
select HAVE_ARCH_HUGE_VMAP if PPC_RADIX_MMU || PPC_8xx
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
- select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
+ select HAVE_ARCH_KASAN if PPC32 && PAGE_SHIFT <= 14
select HAVE_ARCH_KASAN if PPC_RADIX_MMU
select HAVE_ARCH_KASAN if PPC_BOOK3E_64
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
@@ -809,19 +809,23 @@ choice
config PPC_4K_PAGES
bool "4k page size"
select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
+ select HAVE_PAGE_SIZE_4KB
config PPC_16K_PAGES
bool "16k page size"
depends on 44x || PPC_8xx
+ select HAVE_PAGE_SIZE_16KB
config PPC_64K_PAGES
bool "64k page size"
depends on 44x || PPC_BOOK3S_64
select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
+ select HAVE_PAGE_SIZE_64KB
config PPC_256K_PAGES
bool "256k page size (Requires non-standard binutils settings)"
depends on 44x && !PPC_47x
+ select HAVE_PAGE_SIZE_256KB
help
Make the page size 256k.
@@ -832,29 +836,6 @@ config PPC_256K_PAGES
endchoice
-config PAGE_SIZE_4KB
- def_bool y
- depends on PPC_4K_PAGES
-
-config PAGE_SIZE_16KB
- def_bool y
- depends on PPC_16K_PAGES
-
-config PAGE_SIZE_64KB
- def_bool y
- depends on PPC_64K_PAGES
-
-config PAGE_SIZE_256KB
- def_bool y
- depends on PPC_256K_PAGES
-
-config PPC_PAGE_SHIFT
- int
- default 18 if PPC_256K_PAGES
- default 16 if PPC_64K_PAGES
- default 14 if PPC_16K_PAGES
- default 12
-
config THREAD_SHIFT
int "Thread shift" if EXPERT
range 13 15
@@ -891,7 +872,7 @@ config DATA_SHIFT
default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA
default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
default 24 if STRICT_KERNEL_RWX && PPC_85xx
- default PPC_PAGE_SHIFT
+ default PAGE_SHIFT
help
On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
Smaller is the alignment, greater is the number of necessary DBATs.
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 1ebd2ca97f12..107fc5a48456 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -20,14 +20,6 @@
#ifndef __ASSEMBLY__
extern void _mcount(void);
-static inline unsigned long ftrace_call_adjust(unsigned long addr)
-{
- if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY))
- addr += MCOUNT_INSN_SIZE;
-
- return addr;
-}
-
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
unsigned long sp);
@@ -142,8 +134,10 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; }
#ifdef CONFIG_FUNCTION_TRACER
extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
void ftrace_free_init_tramp(void);
+unsigned long ftrace_call_adjust(unsigned long addr);
#else
static inline void ftrace_free_init_tramp(void) { }
+static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; }
#endif
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 93ce3ec25387..2f2a86ed2280 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -17,7 +17,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"nop # arch_static_branch\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".long 1b - ., %l[l_yes] - .\n\t"
@@ -32,7 +32,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"b %l[l_yes] # arch_static_branch_jump\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".long 1b - ., %l[l_yes] - .\n\t"
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index e5fcc79b5bfb..e411e5a70ea3 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -21,7 +21,7 @@
* page size. When using 64K pages however, whether we are really supporting
* 64K pages in HW or not is irrelevant to those definitions.
*/
-#define PAGE_SHIFT CONFIG_PPC_PAGE_SHIFT
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/papr-sysparm.h b/arch/powerpc/include/asm/papr-sysparm.h
index 0dbbff59101d..c3cd5b131033 100644
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -32,7 +32,7 @@ typedef struct {
*/
struct papr_sysparm_buf {
__be16 len;
- char val[PAPR_SYSPARM_MAX_OUTPUT];
+ u8 val[PAPR_SYSPARM_MAX_OUTPUT];
};
struct papr_sysparm_buf *papr_sysparm_buf_alloc(void);
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index ce2b1b5eebdd..a8b7e8682f5b 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -30,6 +30,16 @@ void *pci_traverse_device_nodes(struct device_node *start,
void *data);
extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
+#if defined(CONFIG_IOMMU_API) && (defined(CONFIG_PPC_PSERIES) || \
+ defined(CONFIG_PPC_POWERNV))
+extern void ppc_iommu_register_device(struct pci_controller *phb);
+extern void ppc_iommu_unregister_device(struct pci_controller *phb);
+#else
+static inline void ppc_iommu_register_device(struct pci_controller *phb) { }
+static inline void ppc_iommu_unregister_device(struct pci_controller *phb) { }
+#endif
+
+
/* From rtas_pci.h */
extern void init_pci_config_tokens (void);
extern unsigned long get_phb_buid (struct device_node *);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7fd09f25452d..bb47af9054a9 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -617,6 +617,8 @@
#endif
#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */
#define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */
+#define SPRN_HID2_G2_LE 0x3F3 /* G2_LE HID2 Register */
+#define HID2_G2_LE_HBE (1<<18) /* High BAT Enable (G2_LE) */
#define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */
#define SPRN_IABR2 0x3FA /* 83xx */
#define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9bb2210c8d44..065ffd1b2f8a 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -69,7 +69,7 @@ enum rtas_function_index {
RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE,
RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2,
RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW,
- RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS,
+ RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW,
RTAS_FNIDX__IBM_SCAN_LOG_DUMP,
RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR,
RTAS_FNIDX__IBM_SET_EEH_OPTION,
@@ -164,7 +164,7 @@ typedef struct {
#define RTAS_FN_IBM_READ_SLOT_RESET_STATE rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE)
#define RTAS_FN_IBM_READ_SLOT_RESET_STATE2 rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2)
#define RTAS_FN_IBM_REMOVE_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW)
-#define RTAS_FN_IBM_RESET_PE_DMA_WINDOWS rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS)
+#define RTAS_FN_IBM_RESET_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW)
#define RTAS_FN_IBM_SCAN_LOG_DUMP rtas_fn_handle(RTAS_FNIDX__IBM_SCAN_LOG_DUMP)
#define RTAS_FN_IBM_SET_DYNAMIC_INDICATOR rtas_fn_handle(RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR)
#define RTAS_FN_IBM_SET_EEH_OPTION rtas_fn_handle(RTAS_FNIDX__IBM_SET_EEH_OPTION)
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index ea26665f82cf..f43f3a6b0051 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -14,6 +14,7 @@ typedef struct func_desc func_desc_t;
extern char __head_end[];
extern char __srwx_boundary[];
+extern char __exittext_begin[], __exittext_end[];
/* Patch sites */
extern s32 patch__call_flush_branch_caches1;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index bf5dde1a4114..15c5691dd218 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -14,7 +14,7 @@
#ifdef __KERNEL__
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15
#define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1)
#else
#define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index f1f9890f50d3..de10437fd206 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -74,7 +74,7 @@ __pu_failed: \
/* -mprefixed can generate offsets beyond range, fall back hack */
#ifdef CONFIG_PPC_KERNEL_PREFIXED
#define __put_user_asm_goto(x, addr, label, op) \
- asm_volatile_goto( \
+ asm goto( \
"1: " op " %0,0(%1) # put_user\n" \
EX_TABLE(1b, %l2) \
: \
@@ -83,7 +83,7 @@ __pu_failed: \
: label)
#else
#define __put_user_asm_goto(x, addr, label, op) \
- asm_volatile_goto( \
+ asm goto( \
"1: " op "%U1%X1 %0,%1 # put_user\n" \
EX_TABLE(1b, %l2) \
: \
@@ -97,7 +97,7 @@ __pu_failed: \
__put_user_asm_goto(x, ptr, label, "std")
#else /* __powerpc64__ */
#define __put_user_asm2_goto(x, addr, label) \
- asm_volatile_goto( \
+ asm goto( \
"1: stw%X1 %0, %1\n" \
"2: stw%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
@@ -146,7 +146,7 @@ do { \
/* -mprefixed can generate offsets beyond range, fall back hack */
#ifdef CONFIG_PPC_KERNEL_PREFIXED
#define __get_user_asm_goto(x, addr, label, op) \
- asm_volatile_goto( \
+ asm_goto_output( \
"1: "op" %0,0(%1) # get_user\n" \
EX_TABLE(1b, %l2) \
: "=r" (x) \
@@ -155,7 +155,7 @@ do { \
: label)
#else
#define __get_user_asm_goto(x, addr, label, op) \
- asm_volatile_goto( \
+ asm_goto_output( \
"1: "op"%U1%X1 %0, %1 # get_user\n" \
EX_TABLE(1b, %l2) \
: "=r" (x) \
@@ -169,7 +169,7 @@ do { \
__get_user_asm_goto(x, addr, label, "ld")
#else /* __powerpc64__ */
#define __get_user_asm2_goto(x, addr, label) \
- asm_volatile_goto( \
+ asm_goto_output( \
"1: lwz%X1 %0, %1\n" \
"2: lwz%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index 30a12d208687..54653a863414 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -4,8 +4,8 @@
/*
* Word-at-a-time interfaces for PowerPC.
*/
-
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
#include <asm/asm-compat.h>
#include <asm/extable.h>
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h b/arch/powerpc/include/uapi/asm/papr-sysparm.h
index 9f9a0f267ea5..f733467b1534 100644
--- a/arch/powerpc/include/uapi/asm/papr-sysparm.h
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -14,7 +14,7 @@ enum {
struct papr_sysparm_io_block {
__u32 parameter;
__u16 length;
- char data[PAPR_SYSPARM_MAX_OUTPUT];
+ __u8 data[PAPR_SYSPARM_MAX_OUTPUT];
};
/**
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index f29ce3dd6140..bfd3f442e5eb 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -26,6 +26,15 @@ BEGIN_FTR_SECTION
bl __init_fpu_registers
END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
bl setup_common_caches
+
+ /*
+ * This assumes that all cores using __setup_cpu_603 with
+ * MMU_FTR_USE_HIGH_BATS are G2_LE compatible
+ */
+BEGIN_MMU_FTR_SECTION
+ bl setup_g2_le_hid2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
mtlr r5
blr
_GLOBAL(__setup_cpu_604)
@@ -115,6 +124,16 @@ SYM_FUNC_START_LOCAL(setup_604_hid0)
blr
SYM_FUNC_END(setup_604_hid0)
+/* Enable high BATs for G2_LE and derivatives like e300cX */
+SYM_FUNC_START_LOCAL(setup_g2_le_hid2)
+ mfspr r11,SPRN_HID2_G2_LE
+ oris r11,r11,HID2_G2_LE_HBE@h
+ mtspr SPRN_HID2_G2_LE,r11
+ sync
+ isync
+ blr
+SYM_FUNC_END(setup_g2_le_hid2)
+
/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
* erratas we work around here.
* Moto MPC710CE.pdf describes them, those are errata
@@ -495,4 +514,3 @@ _GLOBAL(__restore_cpu_setup)
mtcr r7
blr
_ASM_NOKPROBE_SYMBOL(__restore_cpu_setup)
-
diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h
index ceb06b109f83..2ae8e9a7b461 100644
--- a/arch/powerpc/kernel/cpu_specs_e500mc.h
+++ b/arch/powerpc/kernel/cpu_specs_e500mc.h
@@ -8,7 +8,8 @@
#ifdef CONFIG_PPC64
#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
- PPC_FEATURE_HAS_FPU | PPC_FEATURE_64)
+ PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \
+ PPC_FEATURE_BOOKE)
#else
#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
PPC_FEATURE_BOOKE)
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index bd863702d812..1ad059a9e2fe 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
mr r10,r1
ld r1,PACAKSAVE(r13)
std r10,0(r1)
- std r11,_NIP(r1)
+ std r11,_LINK(r1)
+ std r11,_NIP(r1) /* Saved LR is also the next instruction */
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
@@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
std r9,GPR13(r1)
SAVE_NVGPRS(r1)
std r11,_XER(r1)
- std r11,_LINK(r1)
std r11,_CTR(r1)
li r11,\trapnr
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index ebe259bdd462..1185efebf032 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1287,20 +1287,22 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_group *grp = iommu_group_get(dev);
struct iommu_table_group *table_group;
- int ret = -EINVAL;
/* At first attach the ownership is already set */
- if (!domain)
+ if (!domain) {
+ iommu_group_put(grp);
return 0;
-
- if (!grp)
- return -ENODEV;
+ }
table_group = iommu_group_get_iommudata(grp);
- ret = table_group->ops->take_ownership(table_group);
+ /*
+ * The domain being set to PLATFORM from earlier
+ * BLOCKED. The table_group ownership has to be released.
+ */
+ table_group->ops->release_ownership(table_group);
iommu_group_put(grp);
- return ret;
+ return 0;
}
static const struct iommu_domain_ops spapr_tce_platform_domain_ops = {
@@ -1312,13 +1314,32 @@ static struct iommu_domain spapr_tce_platform_domain = {
.ops = &spapr_tce_platform_domain_ops,
};
-static struct iommu_domain spapr_tce_blocked_domain = {
- .type = IOMMU_DOMAIN_BLOCKED,
+static int
+spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain,
+ struct device *dev)
+{
+ struct iommu_group *grp = iommu_group_get(dev);
+ struct iommu_table_group *table_group;
+ int ret = -EINVAL;
+
/*
* FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain
* also sets the dma_api ops
*/
- .ops = &spapr_tce_platform_domain_ops,
+ table_group = iommu_group_get_iommudata(grp);
+ ret = table_group->ops->take_ownership(table_group);
+ iommu_group_put(grp);
+
+ return ret;
+}
+
+static const struct iommu_domain_ops spapr_tce_blocked_domain_ops = {
+ .attach_dev = spapr_tce_blocked_iommu_attach_dev,
+};
+
+static struct iommu_domain spapr_tce_blocked_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &spapr_tce_blocked_domain_ops,
};
static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
@@ -1339,7 +1360,7 @@ static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
struct pci_controller *hose;
if (!dev_is_pci(dev))
- return ERR_PTR(-EPERM);
+ return ERR_PTR(-ENODEV);
pdev = to_pci_dev(dev);
hose = pdev->bus->sysdata;
@@ -1388,6 +1409,21 @@ static const struct attribute_group *spapr_tce_iommu_groups[] = {
NULL,
};
+void ppc_iommu_register_device(struct pci_controller *phb)
+{
+ iommu_device_sysfs_add(&phb->iommu, phb->parent,
+ spapr_tce_iommu_groups, "iommu-phb%04x",
+ phb->global_number);
+ iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops,
+ phb->parent);
+}
+
+void ppc_iommu_unregister_device(struct pci_controller *phb)
+{
+ iommu_device_unregister(&phb->iommu);
+ iommu_device_sysfs_remove(&phb->iommu);
+}
+
/*
* This registers IOMMU devices of PHBs. This needs to happen
* after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
@@ -1398,11 +1434,7 @@ static int __init spapr_tce_setup_phb_iommus_initcall(void)
struct pci_controller *hose;
list_for_each_entry(hose, &hose_list, list_node) {
- iommu_device_sysfs_add(&hose->iommu, hose->parent,
- spapr_tce_iommu_groups, "iommu-phb%04x",
- hose->global_number);
- iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops,
- hose->parent);
+ ppc_iommu_register_device(hose);
}
return 0;
}
diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
index 938e66829eae..d5c48d1b0a31 100644
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -230,7 +230,7 @@ again:
* This allows interrupts to be unmasked without hard disabling, and
* also without new hard interrupts coming in ahead of pending ones.
*/
- asm_volatile_goto(
+ asm goto(
"1: \n"
" lbz 9,%0(13) \n"
" cmpwi 9,0 \n"
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 48e0eaf1ad61..5c064485197a 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -46,8 +46,8 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size)
WARN_ON_ONCE(size & ~PAGE_MASK);
if (slab_is_available()) {
- if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa,
- pgprot_noncached(PAGE_KERNEL)))
+ if (vmap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa,
+ pgprot_noncached(PAGE_KERNEL)))
vunmap_range(ISA_IO_BASE, ISA_IO_BASE + size);
} else {
early_ioremap_range(ISA_IO_BASE, pa, size,
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 7e793b503e29..8064d9c3de86 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -375,8 +375,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
[RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = {
.name = "ibm,remove-pe-dma-window",
},
- [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = {
- .name = "ibm,reset-pe-dma-windows",
+ [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW] = {
+ /*
+ * Note: PAPR+ v2.13 7.3.31.4.1 spells this as
+ * "ibm,reset-pe-dma-windows" (plural), but RTAS
+ * implementations use the singular form in practice.
+ */
+ .name = "ibm,reset-pe-dma-window",
},
[RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = {
.name = "ibm,scan-log-dump",
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 693334c20d07..a60e4139214b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -984,7 +984,7 @@ static bool shared_caches __ro_after_init;
/* cpumask of CPUs with asymmetric SMT dependency */
static int powerpc_smt_flags(void)
{
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
@@ -1010,9 +1010,9 @@ static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack);
static int powerpc_shared_cache_flags(void)
{
if (static_branch_unlikely(&splpar_asym_pack))
- return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
+ return SD_SHARE_LLC | SD_ASYM_PACKING;
- return SD_SHARE_PKG_RESOURCES;
+ return SD_SHARE_LLC;
}
static int powerpc_shared_proc_flags(void)
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 82010629cf88..d8d6b4fd9a14 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -27,10 +27,22 @@
#include <asm/ftrace.h>
#include <asm/syscall.h>
#include <asm/inst.h>
+#include <asm/sections.h>
#define NUM_FTRACE_TRAMPS 2
static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end)
+ return 0;
+
+ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY))
+ addr += MCOUNT_INSN_SIZE;
+
+ return addr;
+}
+
static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link)
{
ppc_inst_t op;
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c
index 7b85c3b460a3..12fab1803bcf 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.c
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -37,6 +37,11 @@
#define NUM_FTRACE_TRAMPS 8
static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
static ppc_inst_t
ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
{
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 1c5970df3233..f420df7888a7 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -281,7 +281,9 @@ SECTIONS
* to deal with references from __bug_table
*/
.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+ __exittext_begin = .;
EXIT_TEXT
+ __exittext_end = .;
}
. = ALIGN(PAGE_SIZE);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 52427fc2a33f..0b921704da45 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -391,6 +391,24 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
/* Dummy value used in computing PCR value below */
#define PCR_ARCH_31 (PCR_ARCH_300 << 1)
+static inline unsigned long map_pcr_to_cap(unsigned long pcr)
+{
+ unsigned long cap = 0;
+
+ switch (pcr) {
+ case PCR_ARCH_300:
+ cap = H_GUEST_CAP_POWER9;
+ break;
+ case PCR_ARCH_31:
+ cap = H_GUEST_CAP_POWER10;
+ break;
+ default:
+ break;
+ }
+
+ return cap;
+}
+
static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
unsigned long host_pcr_bit = 0, guest_pcr_bit = 0, cap = 0;
@@ -424,11 +442,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
break;
case PVR_ARCH_300:
guest_pcr_bit = PCR_ARCH_300;
- cap = H_GUEST_CAP_POWER9;
break;
case PVR_ARCH_31:
guest_pcr_bit = PCR_ARCH_31;
- cap = H_GUEST_CAP_POWER10;
break;
default:
return -EINVAL;
@@ -440,6 +456,12 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
return -EINVAL;
if (kvmhv_on_pseries() && kvmhv_is_nestedv2()) {
+ /*
+ * 'arch_compat == 0' would mean the guest should default to
+ * L1's compatibility. In this case, the guest would pick
+ * host's PCR and evaluate the corresponding capabilities.
+ */
+ cap = map_pcr_to_cap(guest_pcr_bit);
if (!(cap & nested_capabilities))
return -EINVAL;
}
diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c
index 5378eb40b162..8e6f5355f08b 100644
--- a/arch/powerpc/kvm/book3s_hv_nestedv2.c
+++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c
@@ -138,6 +138,7 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb,
vector128 v;
int rc, i;
u16 iden;
+ u32 arch_compat = 0;
vcpu = gsm->data;
@@ -347,8 +348,23 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb,
break;
}
case KVMPPC_GSID_LOGICAL_PVR:
- rc = kvmppc_gse_put_u32(gsb, iden,
- vcpu->arch.vcore->arch_compat);
+ /*
+ * Though 'arch_compat == 0' would mean the default
+ * compatibility, arch_compat, being a Guest Wide
+ * Element, cannot be filled with a value of 0 in GSB
+ * as this would result into a kernel trap.
+ * Hence, when `arch_compat == 0`, arch_compat should
+ * default to L1's PVR.
+ */
+ if (!vcpu->arch.vcore->arch_compat) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ arch_compat = PVR_ARCH_31;
+ else if (cpu_has_feature(CPU_FTR_ARCH_300))
+ arch_compat = PVR_ARCH_300;
+ } else {
+ arch_compat = vcpu->arch.vcore->arch_compat;
+ }
+ rc = kvmppc_gse_put_u32(gsb, iden, arch_compat);
break;
}
diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c
index a70828a6d935..aa9aa11927b2 100644
--- a/arch/powerpc/mm/kasan/init_32.c
+++ b/arch/powerpc/mm/kasan/init_32.c
@@ -64,6 +64,7 @@ int __init __weak kasan_init_region(void *start, size_t size)
if (ret)
return ret;
+ k_start = k_start & PAGE_MASK;
block = memblock_alloc(k_end - k_start, PAGE_SIZE);
if (!block)
return -ENOMEM;
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index e966b2ad8ecd..b3327a358eb4 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -27,7 +27,7 @@
#include "mpc85xx.h"
-void __init mpc8536_ds_pic_init(void)
+static void __init mpc8536_ds_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
0, 256, " OpenPIC ");
diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c
index 1b59e45a0c64..19122daadb55 100644
--- a/arch/powerpc/platforms/85xx/mvme2500.c
+++ b/arch/powerpc/platforms/85xx/mvme2500.c
@@ -21,7 +21,7 @@
#include "mpc85xx.h"
-void __init mvme2500_pic_init(void)
+static void __init mvme2500_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0,
MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index 10d6f1fa3327..491895ac8bcf 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -24,7 +24,7 @@
#include "mpc85xx.h"
-void __init p1010_rdb_pic_init(void)
+static void __init p1010_rdb_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 0dd786a061a6..adc3a2ee1415 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -370,7 +370,7 @@ exit:
*
* @pixclock: the wavelength, in picoseconds, of the clock
*/
-void p1022ds_set_pixel_clock(unsigned int pixclock)
+static void p1022ds_set_pixel_clock(unsigned int pixclock)
{
struct device_node *guts_np = NULL;
struct ccsr_guts __iomem *guts;
@@ -418,7 +418,7 @@ void p1022ds_set_pixel_clock(unsigned int pixclock)
/**
* p1022ds_valid_monitor_port: set the monitor port for sysfs
*/
-enum fsl_diu_monitor_port
+static enum fsl_diu_monitor_port
p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port)
{
switch (port) {
@@ -432,7 +432,7 @@ p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port)
#endif
-void __init p1022_ds_pic_init(void)
+static void __init p1022_ds_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index 25ab6e9c1470..6198299d95b1 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -40,7 +40,7 @@
*
* @pixclock: the wavelength, in picoseconds, of the clock
*/
-void p1022rdk_set_pixel_clock(unsigned int pixclock)
+static void p1022rdk_set_pixel_clock(unsigned int pixclock)
{
struct device_node *guts_np = NULL;
struct ccsr_guts __iomem *guts;
@@ -88,7 +88,7 @@ void p1022rdk_set_pixel_clock(unsigned int pixclock)
/**
* p1022rdk_valid_monitor_port: set the monitor port for sysfs
*/
-enum fsl_diu_monitor_port
+static enum fsl_diu_monitor_port
p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port)
{
return FSL_DIU_PORT_DVI;
@@ -96,7 +96,7 @@ p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port)
#endif
-void __init p1022_rdk_pic_init(void)
+static void __init p1022_rdk_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index baa12eff6d5d..60e0b8947ce6 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -8,6 +8,8 @@
#include <linux/of_irq.h>
#include <linux/io.h>
+#include "socrates_fpga_pic.h"
+
/*
* The FPGA supports 9 interrupt sources, which can be routed to 3
* interrupt request lines of the MPIC. The line to be used can be
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index 45f257fc1ade..2582427d8d01 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -37,7 +37,7 @@
#define MPC85xx_L2CTL_L2I 0x40000000 /* L2 flash invalidate */
#define MPC85xx_L2CTL_L2SIZ_MASK 0x30000000 /* L2 SRAM size (R/O) */
-void __init xes_mpc85xx_pic_init(void)
+static void __init xes_mpc85xx_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
0, 256, " OpenPIC ");
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 496e16c588aa..e8c4129697b1 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -574,29 +574,6 @@ static void iommu_table_setparms(struct pci_controller *phb,
struct iommu_table_ops iommu_table_lpar_multi_ops;
-/*
- * iommu_table_setparms_lpar
- *
- * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
- */
-static void iommu_table_setparms_lpar(struct pci_controller *phb,
- struct device_node *dn,
- struct iommu_table *tbl,
- struct iommu_table_group *table_group,
- const __be32 *dma_window)
-{
- unsigned long offset, size, liobn;
-
- of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
-
- iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
- &iommu_table_lpar_multi_ops);
-
-
- table_group->tce32_start = offset;
- table_group->tce32_size = size;
-}
-
struct iommu_table_ops iommu_table_pseries_ops = {
.set = tce_build_pSeries,
.clear = tce_free_pSeries,
@@ -724,26 +701,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = {
* dynamic 64bit DMA window, walking up the device tree.
*/
static struct device_node *pci_dma_find(struct device_node *dn,
- const __be32 **dma_window)
+ struct dynamic_dma_window_prop *prop)
{
- const __be32 *dw = NULL;
+ const __be32 *default_prop = NULL;
+ const __be32 *ddw_prop = NULL;
+ struct device_node *rdn = NULL;
+ bool default_win = false, ddw_win = false;
for ( ; dn && PCI_DN(dn); dn = dn->parent) {
- dw = of_get_property(dn, "ibm,dma-window", NULL);
- if (dw) {
- if (dma_window)
- *dma_window = dw;
- return dn;
+ default_prop = of_get_property(dn, "ibm,dma-window", NULL);
+ if (default_prop) {
+ rdn = dn;
+ default_win = true;
+ }
+ ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL);
+ if (ddw_prop) {
+ rdn = dn;
+ ddw_win = true;
+ break;
+ }
+ ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL);
+ if (ddw_prop) {
+ rdn = dn;
+ ddw_win = true;
+ break;
}
- dw = of_get_property(dn, DIRECT64_PROPNAME, NULL);
- if (dw)
- return dn;
- dw = of_get_property(dn, DMA64_PROPNAME, NULL);
- if (dw)
- return dn;
+
+ /* At least found default window, which is the case for normal boot */
+ if (default_win)
+ break;
}
- return NULL;
+ /* For PCI devices there will always be a DMA window, either on the device
+ * or parent bus
+ */
+ WARN_ON(!(default_win | ddw_win));
+
+ /* caller doesn't want to get DMA window property */
+ if (!prop)
+ return rdn;
+
+ /* parse DMA window property. During normal system boot, only default
+ * DMA window is passed in OF. But, for kdump, a dedicated adapter might
+ * have both default and DDW in FDT. In this scenario, DDW takes precedence
+ * over default window.
+ */
+ if (ddw_win) {
+ struct dynamic_dma_window_prop *p;
+
+ p = (struct dynamic_dma_window_prop *)ddw_prop;
+ prop->liobn = p->liobn;
+ prop->dma_base = p->dma_base;
+ prop->tce_shift = p->tce_shift;
+ prop->window_shift = p->window_shift;
+ } else if (default_win) {
+ unsigned long offset, size, liobn;
+
+ of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size);
+
+ prop->liobn = cpu_to_be32((u32)liobn);
+ prop->dma_base = cpu_to_be64(offset);
+ prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K);
+ prop->window_shift = cpu_to_be32(order_base_2(size));
+ }
+
+ return rdn;
}
static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
@@ -751,17 +773,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
struct iommu_table *tbl;
struct device_node *dn, *pdn;
struct pci_dn *ppci;
- const __be32 *dma_window = NULL;
+ struct dynamic_dma_window_prop prop;
dn = pci_bus_to_OF_node(bus);
pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
dn);
- pdn = pci_dma_find(dn, &dma_window);
+ pdn = pci_dma_find(dn, &prop);
- if (dma_window == NULL)
- pr_debug(" no ibm,dma-window property !\n");
+ /* In PPC architecture, there will always be DMA window on bus or one of the
+ * parent bus. During reboot, there will be ibm,dma-window property to
+ * define DMA window. For kdump, there will at least be default window or DDW
+ * or both.
+ */
ppci = PCI_DN(pdn);
@@ -771,13 +796,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
if (!ppci->table_group) {
ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
tbl = ppci->table_group->tables[0];
- if (dma_window) {
- iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
- ppci->table_group, dma_window);
- if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
- panic("Failed to initialize iommu table");
- }
+ iommu_table_setparms_common(tbl, ppci->phb->bus->number,
+ be32_to_cpu(prop.liobn),
+ be64_to_cpu(prop.dma_base),
+ 1ULL << be32_to_cpu(prop.window_shift),
+ be32_to_cpu(prop.tce_shift), NULL,
+ &iommu_table_lpar_multi_ops);
+
+ /* Only for normal boot with default window. Doesn't matter even
+ * if we set these with DDW which is 64bit during kdump, since
+ * these will not be used during kdump.
+ */
+ ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
+ ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
+
+ if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+ panic("Failed to initialize iommu table");
+
iommu_register_group(ppci->table_group,
pci_domain_nr(bus), 0);
pr_debug(" created table: %p\n", ppci->table_group);
@@ -968,6 +1004,12 @@ static void find_existing_ddw_windows_named(const char *name)
continue;
}
+ /* If at the time of system initialization, there are DDWs in OF,
+ * it means this is during kexec. DDW could be direct or dynamic.
+ * We will just mark DDWs as "dynamic" since this is kdump path,
+ * no need to worry about perforance. ddw_list_new_entry() will
+ * set window->direct = false.
+ */
window = ddw_list_new_entry(pdn, dma64);
if (!window) {
of_node_put(pdn);
@@ -1524,8 +1566,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
{
struct device_node *pdn, *dn;
struct iommu_table *tbl;
- const __be32 *dma_window = NULL;
struct pci_dn *pci;
+ struct dynamic_dma_window_prop prop;
pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
@@ -1538,7 +1580,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
dn = pci_device_to_OF_node(dev);
pr_debug(" node is %pOF\n", dn);
- pdn = pci_dma_find(dn, &dma_window);
+ pdn = pci_dma_find(dn, &prop);
if (!pdn || !PCI_DN(pdn)) {
printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
"no DMA window found for pci dev=%s dn=%pOF\n",
@@ -1551,8 +1593,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
if (!pci->table_group) {
pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
tbl = pci->table_group->tables[0];
- iommu_table_setparms_lpar(pci->phb, pdn, tbl,
- pci->table_group, dma_window);
+
+ iommu_table_setparms_common(tbl, pci->phb->bus->number,
+ be32_to_cpu(prop.liobn),
+ be64_to_cpu(prop.dma_base),
+ 1ULL << be32_to_cpu(prop.window_shift),
+ be32_to_cpu(prop.tce_shift), NULL,
+ &iommu_table_lpar_multi_ops);
+
+ /* Only for normal boot with default window. Doesn't matter even
+ * if we set these with DDW which is 64bit during kdump, since
+ * these will not be used during kdump.
+ */
+ pci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
+ pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
iommu_init_table(tbl, pci->phb->node, 0, 0);
iommu_register_group(pci->table_group,
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 4561667832ed..4e9916bb03d7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -662,8 +662,12 @@ u64 pseries_paravirt_steal_clock(int cpu)
{
struct lppaca *lppaca = &lppaca_of(cpu);
- return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
- be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb));
+ /*
+ * VPA steal time counters are reported at TB frequency. Hence do a
+ * conversion to ns before returning
+ */
+ return tb_to_ns(be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
+ be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)));
}
#endif
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 4ba824568119..4448386268d9 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -35,6 +35,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
pseries_msi_allocate_domains(phb);
+ ppc_iommu_register_device(phb);
+
/* Create EEH devices for the PHB */
eeh_phb_pe_create(phb);
@@ -76,6 +78,8 @@ int remove_phb_dynamic(struct pci_controller *phb)
}
}
+ ppc_iommu_unregister_device(phb);
+
pseries_msi_free_domains(phb);
/* Keep a reference so phb isn't freed yet */
diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c
index 5020044400dc..4de57ba52236 100644
--- a/arch/powerpc/sysdev/udbg_memcons.c
+++ b/arch/powerpc/sysdev/udbg_memcons.c
@@ -41,7 +41,7 @@ struct memcons memcons = {
.input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE],
};
-void memcons_putc(char c)
+static void memcons_putc(char c)
{
char *new_output_pos;
@@ -54,7 +54,7 @@ void memcons_putc(char c)
memcons.output_pos = new_output_pos;
}
-int memcons_getc_poll(void)
+static int memcons_getc_poll(void)
{
char c;
char *new_input_pos;
@@ -77,7 +77,7 @@ int memcons_getc_poll(void)
return -1;
}
-int memcons_getc(void)
+static int memcons_getc(void)
{
int c;
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index bffbd869a068..b50896734a91 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -37,7 +37,7 @@ config RISCV
select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
- select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_UBSAN
select ARCH_HAS_VDSO_DATA
select ARCH_KEEP_MEMBLOCK if ACPI
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
@@ -136,6 +136,7 @@ config RISCV
select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
select HAVE_MOVE_PMD
select HAVE_MOVE_PUD
+ select HAVE_PAGE_SIZE_4KB
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -315,7 +316,6 @@ config AS_HAS_OPTION_ARCH
# https://reviews.llvm.org/D123515
def_bool y
depends on $(as-instr, .option arch$(comma) +m)
- depends on !$(as-instr, .option arch$(comma) -i)
source "arch/riscv/Kconfig.socs"
source "arch/riscv/Kconfig.errata"
@@ -983,7 +983,19 @@ config RISCV_ISA_FALLBACK
config BUILTIN_DTB
bool "Built-in device tree"
depends on OF && NONPORTABLE
- default y if XIP_KERNEL
+ help
+ Build a device tree into the Linux image.
+ This option should be selected if no bootloader is being used.
+ If unsure, say N.
+
+
+config BUILTIN_DTB_SOURCE
+ string "Built-in device tree source"
+ depends on BUILTIN_DTB
+ help
+ DTS file path (without suffix, relative to arch/riscv/boot/dts)
+ for the DTS file that will be used to produce the DTB linked into the
+ kernel.
endmenu # "Boot options"
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index e08e91c49abe..623de5f8a208 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -84,36 +84,4 @@ config SOC_CANAAN
help
This enables support for Canaan Kendryte K210 SoC platform hardware.
-if ARCH_CANAAN
-
-config ARCH_CANAAN_K210_DTB_BUILTIN
- def_bool SOC_CANAAN_K210_DTB_BUILTIN
-
-config SOC_CANAAN_K210_DTB_BUILTIN
- bool "Builtin device tree for the Canaan Kendryte K210"
- depends on ARCH_CANAAN
- default y
- select OF
- select BUILTIN_DTB
- help
- Build a device tree for the Kendryte K210 into the Linux image.
- This option should be selected if no bootloader is being used.
- If unsure, say Y.
-
-config ARCH_CANAAN_K210_DTB_SOURCE
- string
- default SOC_CANAAN_K210_DTB_SOURCE
-
-config SOC_CANAAN_K210_DTB_SOURCE
- string "Source file for the Canaan Kendryte K210 builtin DTB"
- depends on ARCH_CANAAN
- depends on ARCH_CANAAN_K210_DTB_BUILTIN
- default "k210_generic"
- help
- Base name (without suffix, relative to arch/riscv/boot/dts/canaan)
- for the DTS file that will be used to produce the DTB linked into the
- kernel.
-
-endif # ARCH_CANAAN
-
endmenu # "SoC selection"
diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile
index 72030fd727af..fdae05bbf556 100644
--- a/arch/riscv/boot/dts/Makefile
+++ b/arch/riscv/boot/dts/Makefile
@@ -8,4 +8,4 @@ subdir-y += sophgo
subdir-y += starfive
subdir-y += thead
-obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y))
+obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_SOURCE))
diff --git a/arch/riscv/boot/dts/canaan/Makefile b/arch/riscv/boot/dts/canaan/Makefile
index 520623264c87..987d1f0c41f0 100644
--- a/arch/riscv/boot/dts/canaan/Makefile
+++ b/arch/riscv/boot/dts/canaan/Makefile
@@ -5,5 +5,3 @@ dtb-$(CONFIG_ARCH_CANAAN) += sipeed_maix_bit.dtb
dtb-$(CONFIG_ARCH_CANAAN) += sipeed_maix_dock.dtb
dtb-$(CONFIG_ARCH_CANAAN) += sipeed_maix_go.dtb
dtb-$(CONFIG_ARCH_CANAAN) += sipeed_maixduino.dtb
-
-obj-$(CONFIG_ARCH_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .dtb.o, $(CONFIG_ARCH_CANAAN_K210_DTB_SOURCE))
diff --git a/arch/riscv/boot/dts/microchip/Makefile b/arch/riscv/boot/dts/microchip/Makefile
index 45adc4926e79..e177815bf1a2 100644
--- a/arch/riscv/boot/dts/microchip/Makefile
+++ b/arch/riscv/boot/dts/microchip/Makefile
@@ -4,4 +4,3 @@ dtb-$(CONFIG_ARCH_MICROCHIP_POLARFIRE) += mpfs-m100pfsevp.dtb
dtb-$(CONFIG_ARCH_MICROCHIP_POLARFIRE) += mpfs-polarberry.dtb
dtb-$(CONFIG_ARCH_MICROCHIP_POLARFIRE) += mpfs-sev-kit.dtb
dtb-$(CONFIG_ARCH_MICROCHIP_POLARFIRE) += mpfs-tysom-m.dtb
-obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y))
diff --git a/arch/riscv/boot/dts/sifive/Makefile b/arch/riscv/boot/dts/sifive/Makefile
index 6a5fbd4ed96a..495bf760a909 100644
--- a/arch/riscv/boot/dts/sifive/Makefile
+++ b/arch/riscv/boot/dts/sifive/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
dtb-$(CONFIG_ARCH_SIFIVE) += hifive-unleashed-a00.dtb \
hifive-unmatched-a00.dtb
-obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y))
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
index 07387f9c135c..72b87b08ab44 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -123,6 +123,7 @@
interrupt-parent = <&gpio>;
interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
+ #interrupt-cells = <2>;
onkey {
compatible = "dlg,da9063-onkey";
diff --git a/arch/riscv/boot/dts/sophgo/sg2042.dtsi b/arch/riscv/boot/dts/sophgo/sg2042.dtsi
index 93256540d078..81fda312f988 100644
--- a/arch/riscv/boot/dts/sophgo/sg2042.dtsi
+++ b/arch/riscv/boot/dts/sophgo/sg2042.dtsi
@@ -6,6 +6,8 @@
/dts-v1/;
#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/reset/sophgo,sg2042-reset.h>
+
#include "sg2042-cpus.dtsi"
/ {
@@ -93,144 +95,160 @@
<&cpu63_intc 3>;
};
- clint_mtimer0: timer@70ac000000 {
+ clint_mtimer0: timer@70ac004000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac000000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac004000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu0_intc 7>,
<&cpu1_intc 7>,
<&cpu2_intc 7>,
<&cpu3_intc 7>;
};
- clint_mtimer1: timer@70ac010000 {
+ clint_mtimer1: timer@70ac014000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac010000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac014000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu4_intc 7>,
<&cpu5_intc 7>,
<&cpu6_intc 7>,
<&cpu7_intc 7>;
};
- clint_mtimer2: timer@70ac020000 {
+ clint_mtimer2: timer@70ac024000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac020000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac024000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu8_intc 7>,
<&cpu9_intc 7>,
<&cpu10_intc 7>,
<&cpu11_intc 7>;
};
- clint_mtimer3: timer@70ac030000 {
+ clint_mtimer3: timer@70ac034000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac030000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac034000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu12_intc 7>,
<&cpu13_intc 7>,
<&cpu14_intc 7>,
<&cpu15_intc 7>;
};
- clint_mtimer4: timer@70ac040000 {
+ clint_mtimer4: timer@70ac044000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac040000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac044000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu16_intc 7>,
<&cpu17_intc 7>,
<&cpu18_intc 7>,
<&cpu19_intc 7>;
};
- clint_mtimer5: timer@70ac050000 {
+ clint_mtimer5: timer@70ac054000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac050000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac054000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu20_intc 7>,
<&cpu21_intc 7>,
<&cpu22_intc 7>,
<&cpu23_intc 7>;
};
- clint_mtimer6: timer@70ac060000 {
+ clint_mtimer6: timer@70ac064000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac060000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac064000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu24_intc 7>,
<&cpu25_intc 7>,
<&cpu26_intc 7>,
<&cpu27_intc 7>;
};
- clint_mtimer7: timer@70ac070000 {
+ clint_mtimer7: timer@70ac074000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac070000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac074000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu28_intc 7>,
<&cpu29_intc 7>,
<&cpu30_intc 7>,
<&cpu31_intc 7>;
};
- clint_mtimer8: timer@70ac080000 {
+ clint_mtimer8: timer@70ac084000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac080000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac084000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu32_intc 7>,
<&cpu33_intc 7>,
<&cpu34_intc 7>,
<&cpu35_intc 7>;
};
- clint_mtimer9: timer@70ac090000 {
+ clint_mtimer9: timer@70ac094000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac090000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac094000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu36_intc 7>,
<&cpu37_intc 7>,
<&cpu38_intc 7>,
<&cpu39_intc 7>;
};
- clint_mtimer10: timer@70ac0a0000 {
+ clint_mtimer10: timer@70ac0a4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0a0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0a4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu40_intc 7>,
<&cpu41_intc 7>,
<&cpu42_intc 7>,
<&cpu43_intc 7>;
};
- clint_mtimer11: timer@70ac0b0000 {
+ clint_mtimer11: timer@70ac0b4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0b0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0b4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu44_intc 7>,
<&cpu45_intc 7>,
<&cpu46_intc 7>,
<&cpu47_intc 7>;
};
- clint_mtimer12: timer@70ac0c0000 {
+ clint_mtimer12: timer@70ac0c4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0c0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0c4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu48_intc 7>,
<&cpu49_intc 7>,
<&cpu50_intc 7>,
<&cpu51_intc 7>;
};
- clint_mtimer13: timer@70ac0d0000 {
+ clint_mtimer13: timer@70ac0d4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0d0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0d4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu52_intc 7>,
<&cpu53_intc 7>,
<&cpu54_intc 7>,
<&cpu55_intc 7>;
};
- clint_mtimer14: timer@70ac0e0000 {
+ clint_mtimer14: timer@70ac0e4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0e0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0e4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu56_intc 7>,
<&cpu57_intc 7>,
<&cpu58_intc 7>,
<&cpu59_intc 7>;
};
- clint_mtimer15: timer@70ac0f0000 {
+ clint_mtimer15: timer@70ac0f4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0f0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0f4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu60_intc 7>,
<&cpu61_intc 7>,
<&cpu62_intc 7>,
@@ -311,6 +329,12 @@
riscv,ndev = <224>;
};
+ rstgen: reset-controller@7030013000 {
+ compatible = "sophgo,sg2042-reset";
+ reg = <0x00000070 0x30013000 0x00000000 0x0000000c>;
+ #reset-cells = <1>;
+ };
+
uart0: serial@7040000000 {
compatible = "snps,dw-apb-uart";
reg = <0x00000070 0x40000000 0x00000000 0x00001000>;
@@ -319,6 +343,7 @@
clock-frequency = <500000000>;
reg-shift = <2>;
reg-io-width = <4>;
+ resets = <&rstgen RST_UART0>;
status = "disabled";
};
};
diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi
index c216aaecac53..5d499d8aa804 100644
--- a/arch/riscv/boot/dts/starfive/jh7100.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi
@@ -96,14 +96,14 @@
thermal-sensors = <&sfctemp>;
trips {
- cpu_alert0 {
+ cpu-alert0 {
/* milliCelsius */
temperature = <75000>;
hysteresis = <2000>;
type = "passive";
};
- cpu_crit {
+ cpu-crit {
/* milliCelsius */
temperature = <90000>;
hysteresis = <2000>;
@@ -113,30 +113,34 @@
};
};
- osc_sys: osc_sys {
+ osc_sys: osc-sys {
compatible = "fixed-clock";
#clock-cells = <0>;
+ clock-output-names = "osc_sys";
/* This value must be overridden by the board */
clock-frequency = <0>;
};
- osc_aud: osc_aud {
+ osc_aud: osc-aud {
compatible = "fixed-clock";
#clock-cells = <0>;
+ clock-output-names = "osc_aud";
/* This value must be overridden by the board */
clock-frequency = <0>;
};
- gmac_rmii_ref: gmac_rmii_ref {
+ gmac_rmii_ref: gmac-rmii-ref {
compatible = "fixed-clock";
#clock-cells = <0>;
+ clock-output-names = "gmac_rmii_ref";
/* Should be overridden by the board when needed */
clock-frequency = <0>;
};
- gmac_gr_mii_rxclk: gmac_gr_mii_rxclk {
+ gmac_gr_mii_rxclk: gmac-gr-mii-rxclk {
compatible = "fixed-clock";
#clock-cells = <0>;
+ clock-output-names = "gmac_gr_mii_rxclk";
/* Should be overridden by the board when needed */
clock-frequency = <0>;
};
diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi
index 45213cdf50dc..74ed3b9264d8 100644
--- a/arch/riscv/boot/dts/starfive/jh7110.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi
@@ -237,14 +237,14 @@
};
trips {
- cpu_alert0: cpu_alert0 {
+ cpu_alert0: cpu-alert0 {
/* milliCelsius */
temperature = <85000>;
hysteresis = <2000>;
type = "passive";
};
- cpu_crit {
+ cpu-crit {
/* milliCelsius */
temperature = <100000>;
hysteresis = <2000>;
diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig
index 146c46d0525b..7e75200543f4 100644
--- a/arch/riscv/configs/nommu_k210_defconfig
+++ b/arch/riscv/configs/nommu_k210_defconfig
@@ -33,6 +33,8 @@ CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0"
CONFIG_CMDLINE_FORCE=y
+CONFIG_BUILTIN_DTB=y
+CONFIG_BUILTIN_DTB_SOURCE="canaan/k210_generic"
# CONFIG_SECCOMP is not set
# CONFIG_STACKPROTECTOR is not set
# CONFIG_GCC_PLUGINS is not set
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
index 95d8d1808f19..0ba353e9ca71 100644
--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -25,6 +25,8 @@ CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro"
CONFIG_CMDLINE_FORCE=y
+CONFIG_BUILTIN_DTB=y
+CONFIG_BUILTIN_DTB_SOURCE="canaan/k210_generic"
# CONFIG_SECCOMP is not set
# CONFIG_STACKPROTECTOR is not set
# CONFIG_GCC_PLUGINS is not set
diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h
index c20236a0725b..85b2c443823e 100644
--- a/arch/riscv/include/asm/arch_hweight.h
+++ b/arch/riscv/include/asm/arch_hweight.h
@@ -20,7 +20,7 @@
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
#ifdef CONFIG_RISCV_ISA_ZBB
- asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
@@ -51,7 +51,7 @@ static inline unsigned int __arch_hweight8(unsigned int w)
static __always_inline unsigned long __arch_hweight64(__u64 w)
{
# ifdef CONFIG_RISCV_ISA_ZBB
- asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 9ffc35537024..329d8244a9b3 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -39,7 +39,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
{
int num;
- asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
@@ -95,7 +95,7 @@ static __always_inline unsigned long variable__fls(unsigned long word)
{
int num;
- asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
@@ -154,7 +154,7 @@ static __always_inline int variable_ffs(int x)
if (!x)
return 0;
- asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
@@ -209,7 +209,7 @@ static __always_inline int variable_fls(unsigned int x)
if (!x)
return 0;
- asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
diff --git a/arch/riscv/include/asm/cfi.h b/arch/riscv/include/asm/cfi.h
index 8f7a62257044..fb9696d7a3f2 100644
--- a/arch/riscv/include/asm/cfi.h
+++ b/arch/riscv/include/asm/cfi.h
@@ -13,11 +13,28 @@ struct pt_regs;
#ifdef CONFIG_CFI_CLANG
enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
+#define __bpfcall
+static inline int cfi_get_offset(void)
+{
+ return 4;
+}
+
+#define cfi_get_offset cfi_get_offset
+extern u32 cfi_bpf_hash;
+extern u32 cfi_bpf_subprog_hash;
+extern u32 cfi_get_func_hash(void *func);
#else
static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
{
return BUG_TRAP_TYPE_NONE;
}
+
+#define cfi_bpf_hash 0U
+#define cfi_bpf_subprog_hash 0U
+static inline u32 cfi_get_func_hash(void *func)
+{
+ return 0;
+}
#endif /* CONFIG_CFI_CLANG */
#endif /* _ASM_RISCV_CFI_H */
diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h
index a5b60b54b101..88e6f1499e88 100644
--- a/arch/riscv/include/asm/checksum.h
+++ b/arch/riscv/include/asm/checksum.h
@@ -53,7 +53,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
unsigned long fold_temp;
- asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+ asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 5a626ed2c47a..0bd11862b760 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -80,7 +80,7 @@ riscv_has_extension_likely(const unsigned long ext)
"ext must be < RISCV_ISA_EXT_MAX");
if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1)
:
: [ext] "i" (ext)
@@ -103,7 +103,7 @@ riscv_has_extension_unlikely(const unsigned long ext)
"ext must be < RISCV_ISA_EXT_MAX");
if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1)
:
: [ext] "i" (ext)
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 510014051f5d..2468c55933cd 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -424,6 +424,7 @@
# define CSR_STATUS CSR_MSTATUS
# define CSR_IE CSR_MIE
# define CSR_TVEC CSR_MTVEC
+# define CSR_ENVCFG CSR_MENVCFG
# define CSR_SCRATCH CSR_MSCRATCH
# define CSR_EPC CSR_MEPC
# define CSR_CAUSE CSR_MCAUSE
@@ -448,6 +449,7 @@
# define CSR_STATUS CSR_SSTATUS
# define CSR_IE CSR_SIE
# define CSR_TVEC CSR_STVEC
+# define CSR_ENVCFG CSR_SENVCFG
# define CSR_SCRATCH CSR_SSCRATCH
# define CSR_EPC CSR_SEPC
# define CSR_CAUSE CSR_SCAUSE
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index 329172122952..15055f9df4da 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -25,6 +25,11 @@
#define ARCH_SUPPORTS_FTRACE_OPS 1
#ifndef __ASSEMBLY__
+
+extern void *return_address(unsigned int level);
+
+#define ftrace_return_address(n) return_address(n)
+
void MCOUNT_NAME(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
index 4c5b0e929890..22deb7a2a6ec 100644
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -11,6 +11,11 @@ static inline void arch_clear_hugepage_flags(struct page *page)
}
#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+bool arch_hugetlb_migration_supported(struct hstate *h);
+#define arch_hugetlb_migration_supported arch_hugetlb_migration_supported
+#endif
+
#ifdef CONFIG_RISCV_ISA_SVNAPOT
#define __HAVE_ARCH_HUGE_PTE_CLEAR
void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5340f818746b..1f2d2599c655 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -81,6 +81,8 @@
#define RISCV_ISA_EXT_ZTSO 72
#define RISCV_ISA_EXT_ZACAS 73
+#define RISCV_ISA_EXT_XLINUXENVCFG 127
+
#define RISCV_ISA_EXT_MAX 128
#define RISCV_ISA_EXT_INVALID U32_MAX
diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h
index 14a5ea8d8ef0..4a35d787c019 100644
--- a/arch/riscv/include/asm/jump_label.h
+++ b/arch/riscv/include/asm/jump_label.h
@@ -17,7 +17,7 @@
static __always_inline bool arch_static_branch(struct static_key * const key,
const bool branch)
{
- asm_volatile_goto(
+ asm goto(
" .align 2 \n\t"
" .option push \n\t"
" .option norelax \n\t"
@@ -39,7 +39,7 @@ label:
static __always_inline bool arch_static_branch_jump(struct static_key * const key,
const bool branch)
{
- asm_volatile_goto(
+ asm goto(
" .align 2 \n\t"
" .option push \n\t"
" .option norelax \n\t"
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 57e887bfa34c..2947423b5082 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -12,7 +12,7 @@
#include <linux/pfn.h>
#include <linux/const.h>
-#define PAGE_SHIFT (12)
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE - 1))
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index d169a4f41a2e..c80bb9990d32 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -95,7 +95,13 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
__pud_free(mm, pud);
}
-#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
+#define __pud_free_tlb(tlb, pud, addr) \
+do { \
+ if (pgtable_l4_enabled) { \
+ pagetable_pud_dtor(virt_to_ptdesc(pud)); \
+ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \
+ } \
+} while (0)
#define p4d_alloc_one p4d_alloc_one
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -124,7 +130,11 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
__p4d_free(mm, p4d);
}
-#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
+#define __p4d_free_tlb(tlb, p4d, addr) \
+do { \
+ if (pgtable_l5_enabled) \
+ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d)); \
+} while (0)
#endif /* __PAGETABLE_PMD_FOLDED */
static inline void sync_kernel_mappings(pgd_t *pgd)
@@ -149,7 +159,11 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
#ifndef __PAGETABLE_PMD_FOLDED
-#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
+#define __pmd_free_tlb(tlb, pmd, addr) \
+do { \
+ pagetable_pmd_dtor(virt_to_ptdesc(pmd)); \
+ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \
+} while (0)
#endif /* __PAGETABLE_PMD_FOLDED */
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index b42017d76924..b99bd66107a6 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -136,7 +136,7 @@ enum napot_cont_order {
* 10010 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable
*/
#define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60))
-#define _PAGE_NOCACHE_THEAD ((1UL < 61) | (1UL << 60))
+#define _PAGE_NOCACHE_THEAD ((1UL << 61) | (1UL << 60))
#define _PAGE_IO_THEAD ((1UL << 63) | (1UL << 60))
#define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59))
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 0c94260b5d0c..6066822e7396 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -84,7 +84,7 @@
* Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
* is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
*/
-#define vmemmap ((struct page *)VMEMMAP_START)
+#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
#define PCI_IO_SIZE SZ_16M
#define PCI_IO_END VMEMMAP_START
@@ -439,6 +439,10 @@ static inline pte_t pte_mkhuge(pte_t pte)
return pte;
}
+#define pte_leaf_size(pte) (pte_napot(pte) ? \
+ napot_cont_size(napot_cont_order(pte)) :\
+ PAGE_SIZE)
+
#ifdef CONFIG_NUMA_BALANCING
/*
* See the comment in include/asm-generic/pgtable.h
diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h
index f7e8ef2418b9..b1495a7e06ce 100644
--- a/arch/riscv/include/asm/stacktrace.h
+++ b/arch/riscv/include/asm/stacktrace.h
@@ -21,4 +21,9 @@ static inline bool on_thread_stack(void)
return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
}
+
+#ifdef CONFIG_VMAP_STACK
+DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+#endif /* CONFIG_VMAP_STACK */
+
#endif /* _ASM_RISCV_STACKTRACE_H */
diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h
index 02f87867389a..491296a335d0 100644
--- a/arch/riscv/include/asm/suspend.h
+++ b/arch/riscv/include/asm/suspend.h
@@ -14,6 +14,7 @@ struct suspend_context {
struct pt_regs regs;
/* Saved and restored by high-level functions */
unsigned long scratch;
+ unsigned long envcfg;
unsigned long tvec;
unsigned long ie;
#ifdef CONFIG_MMU
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h
index 1eb5682b2af6..50b63b5c15bd 100644
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -16,7 +16,7 @@ static void tlb_flush(struct mmu_gather *tlb);
static inline void tlb_flush(struct mmu_gather *tlb)
{
#ifdef CONFIG_MMU
- if (tlb->fullmm || tlb->need_flush_all)
+ if (tlb->fullmm || tlb->need_flush_all || tlb->freed_tables)
flush_tlb_mm(tlb->mm);
else
flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end,
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 928f096dca21..4112cc8d1d69 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -75,6 +75,7 @@ static inline void flush_tlb_kernel_range(unsigned long start,
#define flush_tlb_mm(mm) flush_tlb_all()
#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
+#define local_flush_tlb_kernel_range(start, end) flush_tlb_all()
#endif /* !CONFIG_SMP || !CONFIG_MMU */
#endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h
index 924d01b56c9a..51f6dfe19745 100644
--- a/arch/riscv/include/asm/vmalloc.h
+++ b/arch/riscv/include/asm/vmalloc.h
@@ -19,65 +19,6 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
return true;
}
-#ifdef CONFIG_RISCV_ISA_SVNAPOT
-#include <linux/pgtable.h>
+#endif
-#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
-static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
- u64 pfn, unsigned int max_page_shift)
-{
- unsigned long map_size = PAGE_SIZE;
- unsigned long size, order;
-
- if (!has_svnapot())
- return map_size;
-
- for_each_napot_order_rev(order) {
- if (napot_cont_shift(order) > max_page_shift)
- continue;
-
- size = napot_cont_size(order);
- if (end - addr < size)
- continue;
-
- if (!IS_ALIGNED(addr, size))
- continue;
-
- if (!IS_ALIGNED(PFN_PHYS(pfn), size))
- continue;
-
- map_size = size;
- break;
- }
-
- return map_size;
-}
-
-#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
-static inline int arch_vmap_pte_supported_shift(unsigned long size)
-{
- int shift = PAGE_SHIFT;
- unsigned long order;
-
- if (!has_svnapot())
- return shift;
-
- WARN_ON_ONCE(size >= PMD_SIZE);
-
- for_each_napot_order_rev(order) {
- if (napot_cont_size(order) > size)
- continue;
-
- if (!IS_ALIGNED(size, napot_cont_size(order)))
- continue;
-
- shift = napot_cont_shift(order);
- break;
- }
-
- return shift;
-}
-
-#endif /* CONFIG_RISCV_ISA_SVNAPOT */
-#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
#endif /* _ASM_RISCV_VMALLOC_H */
diff --git a/arch/riscv/include/asm/word-at-a-time.h b/arch/riscv/include/asm/word-at-a-time.h
index f3f031e34191..3802cda71ab7 100644
--- a/arch/riscv/include/asm/word-at-a-time.h
+++ b/arch/riscv/include/asm/word-at-a-time.h
@@ -10,7 +10,8 @@
#include <asm/asm-extable.h>
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
struct word_at_a_time {
const unsigned long one_bits, high_bits;
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index d6b7a5b95874..7499e88a947c 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -139,6 +139,33 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZIHPM,
KVM_RISCV_ISA_EXT_SMSTATEEN,
KVM_RISCV_ISA_EXT_ZICOND,
+ KVM_RISCV_ISA_EXT_ZBC,
+ KVM_RISCV_ISA_EXT_ZBKB,
+ KVM_RISCV_ISA_EXT_ZBKC,
+ KVM_RISCV_ISA_EXT_ZBKX,
+ KVM_RISCV_ISA_EXT_ZKND,
+ KVM_RISCV_ISA_EXT_ZKNE,
+ KVM_RISCV_ISA_EXT_ZKNH,
+ KVM_RISCV_ISA_EXT_ZKR,
+ KVM_RISCV_ISA_EXT_ZKSED,
+ KVM_RISCV_ISA_EXT_ZKSH,
+ KVM_RISCV_ISA_EXT_ZKT,
+ KVM_RISCV_ISA_EXT_ZVBB,
+ KVM_RISCV_ISA_EXT_ZVBC,
+ KVM_RISCV_ISA_EXT_ZVKB,
+ KVM_RISCV_ISA_EXT_ZVKG,
+ KVM_RISCV_ISA_EXT_ZVKNED,
+ KVM_RISCV_ISA_EXT_ZVKNHA,
+ KVM_RISCV_ISA_EXT_ZVKNHB,
+ KVM_RISCV_ISA_EXT_ZVKSED,
+ KVM_RISCV_ISA_EXT_ZVKSH,
+ KVM_RISCV_ISA_EXT_ZVKT,
+ KVM_RISCV_ISA_EXT_ZFH,
+ KVM_RISCV_ISA_EXT_ZFHMIN,
+ KVM_RISCV_ISA_EXT_ZIHINTNTL,
+ KVM_RISCV_ISA_EXT_ZVFH,
+ KVM_RISCV_ISA_EXT_ZVFHMIN,
+ KVM_RISCV_ISA_EXT_ZFA,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index f71910718053..604d6bf7e476 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
endif
CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,)
@@ -46,6 +47,7 @@ obj-y += irq.o
obj-y += process.o
obj-y += ptrace.o
obj-y += reset.o
+obj-y += return_address.o
obj-y += setup.o
obj-y += signal.o
obj-y += syscall_table.o
diff --git a/arch/riscv/kernel/cfi.c b/arch/riscv/kernel/cfi.c
index 6ec9dbd7292e..64bdd3e1ab8c 100644
--- a/arch/riscv/kernel/cfi.c
+++ b/arch/riscv/kernel/cfi.c
@@ -75,3 +75,56 @@ enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
return report_cfi_failure(regs, regs->epc, &target, type);
}
+
+#ifdef CONFIG_CFI_CLANG
+struct bpf_insn;
+
+/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */
+extern unsigned int __bpf_prog_runX(const void *ctx,
+ const struct bpf_insn *insn);
+
+/*
+ * Force a reference to the external symbol so the compiler generates
+ * __kcfi_typid.
+ */
+__ADDRESSABLE(__bpf_prog_runX);
+
+/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */
+asm (
+" .pushsection .data..ro_after_init,\"aw\",@progbits \n"
+" .type cfi_bpf_hash,@object \n"
+" .globl cfi_bpf_hash \n"
+" .p2align 2, 0x0 \n"
+"cfi_bpf_hash: \n"
+" .word __kcfi_typeid___bpf_prog_runX \n"
+" .size cfi_bpf_hash, 4 \n"
+" .popsection \n"
+);
+
+/* Must match bpf_callback_t */
+extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64);
+
+__ADDRESSABLE(__bpf_callback_fn);
+
+/* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */
+asm (
+" .pushsection .data..ro_after_init,\"aw\",@progbits \n"
+" .type cfi_bpf_subprog_hash,@object \n"
+" .globl cfi_bpf_subprog_hash \n"
+" .p2align 2, 0x0 \n"
+"cfi_bpf_subprog_hash: \n"
+" .word __kcfi_typeid___bpf_callback_fn \n"
+" .size cfi_bpf_subprog_hash, 4 \n"
+" .popsection \n"
+);
+
+u32 cfi_get_func_hash(void *func)
+{
+ u32 hash;
+
+ if (get_kernel_nofault(hash, func - cfi_get_offset()))
+ return 0;
+
+ return hash;
+}
+#endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 89920f84d0a3..79a5a35fab96 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -24,6 +24,7 @@
#include <asm/hwprobe.h>
#include <asm/patch.h>
#include <asm/processor.h>
+#include <asm/sbi.h>
#include <asm/vector.h>
#include "copy-unaligned.h"
@@ -202,6 +203,16 @@ static const unsigned int riscv_zvbb_exts[] = {
};
/*
+ * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V
+ * privileged ISA, the existence of the CSRs is implied by any extension which
+ * specifies [ms]envcfg bit(s). Hence, we define a custom ISA extension for the
+ * existence of the CSR, and treat it as a subset of those other extensions.
+ */
+static const unsigned int riscv_xlinuxenvcfg_exts[] = {
+ RISCV_ISA_EXT_XLINUXENVCFG
+};
+
+/*
* The canonical order of ISA extension names in the ISA string is defined in
* chapter 27 of the unprivileged specification.
*
@@ -250,8 +261,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c),
__RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
- __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
- __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
+ __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts),
+ __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
__RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
__RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
@@ -539,6 +550,20 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
}
/*
+ * "V" in ISA strings is ambiguous in practice: it should mean
+ * just the standard V-1.0 but vendors aren't well behaved.
+ * Many vendors with T-Head CPU cores which implement the 0.7.1
+ * version of the vector specification put "v" into their DTs.
+ * CPU cores with the ratified spec will contain non-zero
+ * marchid.
+ */
+ if (acpi_disabled && riscv_cached_mvendorid(cpu) == THEAD_VENDOR_ID &&
+ riscv_cached_marchid(cpu) == 0x0) {
+ this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v];
+ clear_bit(RISCV_ISA_EXT_v, isainfo->isa);
+ }
+
+ /*
* All "okay" hart should have same isa. Set HWCAP based on
* common capabilities of every "okay" hart, in case they don't
* have.
@@ -950,7 +975,7 @@ arch_initcall(check_unaligned_access_all_cpus);
void riscv_user_isa_enable(void)
{
if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
- csr_set(CSR_SENVCFG, ENVCFG_CBZE);
+ csr_set(CSR_ENVCFG, ENVCFG_CBZE);
}
#ifdef CONFIG_RISCV_ALTERNATIVE
diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
index 8e114f5930ce..0d6225fd3194 100644
--- a/arch/riscv/kernel/paravirt.c
+++ b/arch/riscv/kernel/paravirt.c
@@ -41,7 +41,7 @@ static int __init parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
-DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
+static DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
static bool __init has_pv_steal_clock(void)
{
@@ -91,8 +91,8 @@ static int pv_time_cpu_down_prepare(unsigned int cpu)
static u64 pv_time_steal_clock(int cpu)
{
struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
- u32 sequence;
- u64 steal;
+ __le32 sequence;
+ __le64 steal;
/*
* Check the sequence field before and after reading the steal
diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c
new file mode 100644
index 000000000000..c8115ec8fb30
--- /dev/null
+++ b/arch/riscv/kernel/return_address.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code come from arch/arm64/kernel/return_address.c
+ *
+ * Copyright (C) 2023 SiFive.
+ */
+
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/stacktrace.h>
+
+struct return_address_data {
+ unsigned int level;
+ void *addr;
+};
+
+static bool save_return_addr(void *d, unsigned long pc)
+{
+ struct return_address_data *data = d;
+
+ if (!data->level) {
+ data->addr = (void *)pc;
+ return false;
+ }
+
+ --data->level;
+
+ return true;
+}
+NOKPROBE_SYMBOL(save_return_addr);
+
+noinline void *return_address(unsigned int level)
+{
+ struct return_address_data data;
+
+ data.level = level + 3;
+ data.addr = NULL;
+
+ arch_stack_walk(save_return_addr, &data, current, NULL);
+
+ if (!data.level)
+ return data.addr;
+ else
+ return NULL;
+
+}
+EXPORT_SYMBOL_GPL(return_address);
+NOKPROBE_SYMBOL(return_address);
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 519b6bd946e5..c4ed7d977f57 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -42,10 +42,6 @@
static DECLARE_COMPLETION(cpu_running);
-void __init smp_prepare_boot_cpu(void)
-{
-}
-
void __init smp_prepare_cpus(unsigned int max_cpus)
{
int cpuid;
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
index 239509367e42..299795341e8a 100644
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -15,6 +15,8 @@
void suspend_save_csrs(struct suspend_context *context)
{
context->scratch = csr_read(CSR_SCRATCH);
+ if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
+ context->envcfg = csr_read(CSR_ENVCFG);
context->tvec = csr_read(CSR_TVEC);
context->ie = csr_read(CSR_IE);
@@ -36,6 +38,8 @@ void suspend_save_csrs(struct suspend_context *context)
void suspend_restore_csrs(struct suspend_context *context)
{
csr_write(CSR_SCRATCH, context->scratch);
+ if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
+ csr_write(CSR_ENVCFG, context->envcfg);
csr_write(CSR_TVEC, context->tvec);
csr_write(CSR_IE, context->ie);
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index 2cf76218a5bd..98315b98256d 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -30,14 +30,8 @@ enum rv_vdso_map {
#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT)
-/*
- * The vDSO data page.
- */
-static union {
- struct vdso_data data;
- u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+static union vdso_data_store vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = vdso_data_store.data;
struct __vdso_info {
const char *name;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index fc34557f5356..5f7355e96008 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -42,15 +42,42 @@ static const unsigned long kvm_isa_ext_arr[] = {
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
+ KVM_ISA_EXT_ARR(ZBC),
+ KVM_ISA_EXT_ARR(ZBKB),
+ KVM_ISA_EXT_ARR(ZBKC),
+ KVM_ISA_EXT_ARR(ZBKX),
KVM_ISA_EXT_ARR(ZBS),
+ KVM_ISA_EXT_ARR(ZFA),
+ KVM_ISA_EXT_ARR(ZFH),
+ KVM_ISA_EXT_ARR(ZFHMIN),
KVM_ISA_EXT_ARR(ZICBOM),
KVM_ISA_EXT_ARR(ZICBOZ),
KVM_ISA_EXT_ARR(ZICNTR),
KVM_ISA_EXT_ARR(ZICOND),
KVM_ISA_EXT_ARR(ZICSR),
KVM_ISA_EXT_ARR(ZIFENCEI),
+ KVM_ISA_EXT_ARR(ZIHINTNTL),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZKND),
+ KVM_ISA_EXT_ARR(ZKNE),
+ KVM_ISA_EXT_ARR(ZKNH),
+ KVM_ISA_EXT_ARR(ZKR),
+ KVM_ISA_EXT_ARR(ZKSED),
+ KVM_ISA_EXT_ARR(ZKSH),
+ KVM_ISA_EXT_ARR(ZKT),
+ KVM_ISA_EXT_ARR(ZVBB),
+ KVM_ISA_EXT_ARR(ZVBC),
+ KVM_ISA_EXT_ARR(ZVFH),
+ KVM_ISA_EXT_ARR(ZVFHMIN),
+ KVM_ISA_EXT_ARR(ZVKB),
+ KVM_ISA_EXT_ARR(ZVKG),
+ KVM_ISA_EXT_ARR(ZVKNED),
+ KVM_ISA_EXT_ARR(ZVKNHA),
+ KVM_ISA_EXT_ARR(ZVKNHB),
+ KVM_ISA_EXT_ARR(ZVKSED),
+ KVM_ISA_EXT_ARR(ZVKSH),
+ KVM_ISA_EXT_ARR(ZVKT),
};
static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
@@ -92,13 +119,40 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_RISCV_ISA_EXT_ZBA:
case KVM_RISCV_ISA_EXT_ZBB:
+ case KVM_RISCV_ISA_EXT_ZBC:
+ case KVM_RISCV_ISA_EXT_ZBKB:
+ case KVM_RISCV_ISA_EXT_ZBKC:
+ case KVM_RISCV_ISA_EXT_ZBKX:
case KVM_RISCV_ISA_EXT_ZBS:
+ case KVM_RISCV_ISA_EXT_ZFA:
+ case KVM_RISCV_ISA_EXT_ZFH:
+ case KVM_RISCV_ISA_EXT_ZFHMIN:
case KVM_RISCV_ISA_EXT_ZICNTR:
case KVM_RISCV_ISA_EXT_ZICOND:
case KVM_RISCV_ISA_EXT_ZICSR:
case KVM_RISCV_ISA_EXT_ZIFENCEI:
+ case KVM_RISCV_ISA_EXT_ZIHINTNTL:
case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_RISCV_ISA_EXT_ZIHPM:
+ case KVM_RISCV_ISA_EXT_ZKND:
+ case KVM_RISCV_ISA_EXT_ZKNE:
+ case KVM_RISCV_ISA_EXT_ZKNH:
+ case KVM_RISCV_ISA_EXT_ZKR:
+ case KVM_RISCV_ISA_EXT_ZKSED:
+ case KVM_RISCV_ISA_EXT_ZKSH:
+ case KVM_RISCV_ISA_EXT_ZKT:
+ case KVM_RISCV_ISA_EXT_ZVBB:
+ case KVM_RISCV_ISA_EXT_ZVBC:
+ case KVM_RISCV_ISA_EXT_ZVFH:
+ case KVM_RISCV_ISA_EXT_ZVFHMIN:
+ case KVM_RISCV_ISA_EXT_ZVKB:
+ case KVM_RISCV_ISA_EXT_ZVKG:
+ case KVM_RISCV_ISA_EXT_ZVKNED:
+ case KVM_RISCV_ISA_EXT_ZVKNHA:
+ case KVM_RISCV_ISA_EXT_ZVKNHB:
+ case KVM_RISCV_ISA_EXT_ZVKSED:
+ case KVM_RISCV_ISA_EXT_ZVKSH:
+ case KVM_RISCV_ISA_EXT_ZVKT:
return false;
/* Extensions which can be disabled using Smstateen */
case KVM_RISCV_ISA_EXT_SSAIA:
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c
index 01f09fe8c3b0..d8cf9ca28c61 100644
--- a/arch/riscv/kvm/vcpu_sbi_sta.c
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -26,8 +26,12 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
{
gpa_t shmem = vcpu->arch.sta.shmem;
u64 last_steal = vcpu->arch.sta.last_steal;
- u32 *sequence_ptr, sequence;
- u64 *steal_ptr, steal;
+ __le32 __user *sequence_ptr;
+ __le64 __user *steal_ptr;
+ __le32 sequence_le;
+ __le64 steal_le;
+ u32 sequence;
+ u64 steal;
unsigned long hva;
gfn_t gfn;
@@ -47,22 +51,22 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
return;
}
- sequence_ptr = (u32 *)(hva + offset_in_page(shmem) +
+ sequence_ptr = (__le32 __user *)(hva + offset_in_page(shmem) +
offsetof(struct sbi_sta_struct, sequence));
- steal_ptr = (u64 *)(hva + offset_in_page(shmem) +
+ steal_ptr = (__le64 __user *)(hva + offset_in_page(shmem) +
offsetof(struct sbi_sta_struct, steal));
- if (WARN_ON(get_user(sequence, sequence_ptr)))
+ if (WARN_ON(get_user(sequence_le, sequence_ptr)))
return;
- sequence = le32_to_cpu(sequence);
+ sequence = le32_to_cpu(sequence_le);
sequence += 1;
if (WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr)))
return;
- if (!WARN_ON(get_user(steal, steal_ptr))) {
- steal = le64_to_cpu(steal);
+ if (!WARN_ON(get_user(steal_le, steal_ptr))) {
+ steal = le64_to_cpu(steal_le);
vcpu->arch.sta.last_steal = READ_ONCE(current->sched_info.run_delay);
steal += vcpu->arch.sta.last_steal - last_steal;
WARN_ON(put_user(cpu_to_le64(steal), steal_ptr));
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
index af3df5274ccb..74af3ab520b6 100644
--- a/arch/riscv/lib/csum.c
+++ b/arch/riscv/lib/csum.c
@@ -53,7 +53,7 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
* support, so nop when Zbb is available and jump when Zbb is
* not available.
*/
- asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+ asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
@@ -170,7 +170,7 @@ do_csum_with_alignment(const unsigned char *buff, int len)
* support, so nop when Zbb is available and jump when Zbb is
* not available.
*/
- asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+ asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
@@ -178,7 +178,7 @@ do_csum_with_alignment(const unsigned char *buff, int len)
: no_zbb);
#ifdef CONFIG_32BIT
- asm_volatile_goto(".option push \n\
+ asm_goto_output(".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 16 \n\
andi %[offset], %[offset], 1 \n\
@@ -193,7 +193,7 @@ do_csum_with_alignment(const unsigned char *buff, int len)
return (unsigned short)csum;
#else /* !CONFIG_32BIT */
- asm_volatile_goto(".option push \n\
+ asm_goto_output(".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 32 \n\
add %[csum], %[fold_temp], %[csum] \n\
@@ -257,7 +257,7 @@ do_csum_no_alignment(const unsigned char *buff, int len)
* support, so nop when Zbb is available and jump when Zbb is
* not available.
*/
- asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+ asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index 431596c0e20e..5ef2a6891158 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -125,6 +125,26 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return pte;
}
+unsigned long hugetlb_mask_last_page(struct hstate *h)
+{
+ unsigned long hp_size = huge_page_size(h);
+
+ switch (hp_size) {
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ return P4D_SIZE - PUD_SIZE;
+#endif
+ case PMD_SIZE:
+ return PUD_SIZE - PMD_SIZE;
+ case napot_cont_size(NAPOT_CONT64KB_ORDER):
+ return PMD_SIZE - napot_cont_size(NAPOT_CONT64KB_ORDER);
+ default:
+ break;
+ }
+
+ return 0UL;
+}
+
static pte_t get_clear_contig(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep,
@@ -177,13 +197,36 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
return entry;
}
+static void clear_flush(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ unsigned long pgsize,
+ unsigned long ncontig)
+{
+ struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
+ unsigned long i, saddr = addr;
+
+ for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
+ ptep_get_and_clear(mm, addr, ptep);
+
+ flush_tlb_range(&vma, saddr, addr);
+}
+
+/*
+ * When dealing with NAPOT mappings, the privileged specification indicates that
+ * "if an update needs to be made, the OS generally should first mark all of the
+ * PTEs invalid, then issue SFENCE.VMA instruction(s) covering all 4 KiB regions
+ * within the range, [...] then update the PTE(s), as described in Section
+ * 4.2.1.". That's the equivalent of the Break-Before-Make approach used by
+ * arm64.
+ */
void set_huge_pte_at(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep,
pte_t pte,
unsigned long sz)
{
- unsigned long hugepage_shift;
+ unsigned long hugepage_shift, pgsize;
int i, pte_num;
if (sz >= PGDIR_SIZE)
@@ -198,7 +241,22 @@ void set_huge_pte_at(struct mm_struct *mm,
hugepage_shift = PAGE_SHIFT;
pte_num = sz >> hugepage_shift;
- for (i = 0; i < pte_num; i++, ptep++, addr += (1 << hugepage_shift))
+ pgsize = 1 << hugepage_shift;
+
+ if (!pte_present(pte)) {
+ for (i = 0; i < pte_num; i++, ptep++, addr += pgsize)
+ set_ptes(mm, addr, ptep, pte, 1);
+ return;
+ }
+
+ if (!pte_napot(pte)) {
+ set_ptes(mm, addr, ptep, pte, 1);
+ return;
+ }
+
+ clear_flush(mm, addr, ptep, pgsize, pte_num);
+
+ for (i = 0; i < pte_num; i++, ptep++, addr += pgsize)
set_pte_at(mm, addr, ptep, pte);
}
@@ -306,7 +364,7 @@ void huge_pte_clear(struct mm_struct *mm,
pte_clear(mm, addr, ptep);
}
-static __init bool is_napot_size(unsigned long size)
+static bool is_napot_size(unsigned long size)
{
unsigned long order;
@@ -334,7 +392,7 @@ arch_initcall(napot_hugetlbpages_init);
#else
-static __init bool is_napot_size(unsigned long size)
+static bool is_napot_size(unsigned long size)
{
return false;
}
@@ -351,7 +409,7 @@ int pmd_huge(pmd_t pmd)
return pmd_leaf(pmd);
}
-bool __init arch_hugetlb_valid_size(unsigned long size)
+static bool __hugetlb_valid_size(unsigned long size)
{
if (size == HPAGE_SIZE)
return true;
@@ -363,6 +421,18 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
return false;
}
+bool __init arch_hugetlb_valid_size(unsigned long size)
+{
+ return __hugetlb_valid_size(size);
+}
+
+#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+bool arch_hugetlb_migration_supported(struct hstate *h)
+{
+ return __hugetlb_valid_size(huge_page_size(h));
+}
+#endif
+
#ifdef CONFIG_CONTIG_ALLOC
static __init int gigantic_pages_init(void)
{
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 32cad6a65ccd..fa34cf55037b 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -1385,6 +1385,10 @@ void __init misc_mem_init(void)
early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
arch_numa_init();
sparse_init();
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ /* The entire VMEMMAP region has been populated. Flush TLB for this region */
+ local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END);
+#endif
zone_sizes_init();
arch_reserve_crashkernel();
memblock_dump_all();
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 8d12b26f5ac3..893566e004b7 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -66,9 +66,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
local_flush_tlb_range_threshold_asid(start, size, stride, asid);
}
+/* Flush a range of kernel pages without broadcasting */
void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID);
+ local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID);
}
static void __ipi_flush_tlb_all(void *info)
@@ -233,4 +234,5 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
__flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+ cpumask_clear(&batch->cpumask);
}
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index a5ce1ab76ece..f4b6b3b9edda 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -18,6 +18,11 @@ static inline bool rvc_enabled(void)
return IS_ENABLED(CONFIG_RISCV_ISA_C);
}
+static inline bool rvzbb_enabled(void)
+{
+ return IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && riscv_has_extension_likely(RISCV_ISA_EXT_ZBB);
+}
+
enum {
RV_REG_ZERO = 0, /* The constant value 0 */
RV_REG_RA = 1, /* Return address */
@@ -730,6 +735,33 @@ static inline u16 rvc_swsp(u32 imm8, u8 rs2)
return rv_css_insn(0x6, imm, rs2, 0x2);
}
+/* RVZBB instrutions. */
+static inline u32 rvzbb_sextb(u8 rd, u8 rs1)
+{
+ return rv_i_insn(0x604, rs1, 1, rd, 0x13);
+}
+
+static inline u32 rvzbb_sexth(u8 rd, u8 rs1)
+{
+ return rv_i_insn(0x605, rs1, 1, rd, 0x13);
+}
+
+static inline u32 rvzbb_zexth(u8 rd, u8 rs)
+{
+ if (IS_ENABLED(CONFIG_64BIT))
+ return rv_i_insn(0x80, rs, 4, rd, 0x3b);
+
+ return rv_i_insn(0x80, rs, 4, rd, 0x33);
+}
+
+static inline u32 rvzbb_rev8(u8 rd, u8 rs)
+{
+ if (IS_ENABLED(CONFIG_64BIT))
+ return rv_i_insn(0x6b8, rs, 5, rd, 0x13);
+
+ return rv_i_insn(0x698, rs, 5, rd, 0x13);
+}
+
/*
* RV64-only instructions.
*
@@ -1087,9 +1119,111 @@ static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
emit(rv_subw(rd, rs1, rs2), ctx);
}
+static inline void emit_sextb(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ if (rvzbb_enabled()) {
+ emit(rvzbb_sextb(rd, rs), ctx);
+ return;
+ }
+
+ emit_slli(rd, rs, 56, ctx);
+ emit_srai(rd, rd, 56, ctx);
+}
+
+static inline void emit_sexth(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ if (rvzbb_enabled()) {
+ emit(rvzbb_sexth(rd, rs), ctx);
+ return;
+ }
+
+ emit_slli(rd, rs, 48, ctx);
+ emit_srai(rd, rd, 48, ctx);
+}
+
+static inline void emit_sextw(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ emit_addiw(rd, rs, 0, ctx);
+}
+
+static inline void emit_zexth(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ if (rvzbb_enabled()) {
+ emit(rvzbb_zexth(rd, rs), ctx);
+ return;
+ }
+
+ emit_slli(rd, rs, 48, ctx);
+ emit_srli(rd, rd, 48, ctx);
+}
+
+static inline void emit_zextw(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ emit_slli(rd, rs, 32, ctx);
+ emit_srli(rd, rd, 32, ctx);
+}
+
+static inline void emit_bswap(u8 rd, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvzbb_enabled()) {
+ int bits = 64 - imm;
+
+ emit(rvzbb_rev8(rd, rd), ctx);
+ if (bits)
+ emit_srli(rd, rd, bits, ctx);
+ return;
+ }
+
+ emit_li(RV_REG_T2, 0, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+ if (imm == 16)
+ goto out_be;
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+ if (imm == 32)
+ goto out_be;
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+out_be:
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+
+ emit_mv(rd, RV_REG_T2, ctx);
+}
+
#endif /* __riscv_xlen == 64 */
-void bpf_jit_build_prologue(struct rv_jit_context *ctx);
+void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog);
void bpf_jit_build_epilogue(struct rv_jit_context *ctx);
int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c
index 529a83b85c1c..f5ba73bb153d 100644
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -1301,7 +1301,7 @@ notsupported:
return 0;
}
-void bpf_jit_build_prologue(struct rv_jit_context *ctx)
+void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog)
{
const s8 *fp = bpf2rv32[BPF_REG_FP];
const s8 *r1 = bpf2rv32[BPF_REG_1];
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 58dc64dd94a8..aac190085472 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -11,6 +11,7 @@
#include <linux/memory.h>
#include <linux/stop_machine.h>
#include <asm/patch.h>
+#include <asm/cfi.h>
#include "bpf_jit.h"
#define RV_FENTRY_NINSNS 2
@@ -141,6 +142,19 @@ static bool in_auipc_jalr_range(s64 val)
val < ((1L << 31) - (1L << 11));
}
+/* Modify rd pointer to alternate reg to avoid corrupting original reg */
+static void emit_sextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx)
+{
+ emit_sextw(ra, *rd, ctx);
+ *rd = ra;
+}
+
+static void emit_zextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx)
+{
+ emit_zextw(ra, *rd, ctx);
+ *rd = ra;
+}
+
/* Emit fixed-length instructions for address */
static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx)
{
@@ -326,12 +340,6 @@ static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx);
}
-static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
-{
- emit_slli(reg, reg, 32, ctx);
- emit_srli(reg, reg, 32, ctx);
-}
-
static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
{
int tc_ninsn, off, start_insn = ctx->ninsns;
@@ -346,7 +354,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
*/
tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] :
ctx->offset[0];
- emit_zext_32(RV_REG_A2, ctx);
+ emit_zextw(RV_REG_A2, RV_REG_A2, ctx);
off = offsetof(struct bpf_array, map.max_entries);
if (is_12b_check(off, insn))
@@ -405,38 +413,6 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
*rs = bpf_to_rv_reg(insn->src_reg, ctx);
}
-static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
-{
- emit_mv(RV_REG_T2, *rd, ctx);
- emit_zext_32(RV_REG_T2, ctx);
- emit_mv(RV_REG_T1, *rs, ctx);
- emit_zext_32(RV_REG_T1, ctx);
- *rd = RV_REG_T2;
- *rs = RV_REG_T1;
-}
-
-static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
-{
- emit_addiw(RV_REG_T2, *rd, 0, ctx);
- emit_addiw(RV_REG_T1, *rs, 0, ctx);
- *rd = RV_REG_T2;
- *rs = RV_REG_T1;
-}
-
-static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
-{
- emit_mv(RV_REG_T2, *rd, ctx);
- emit_zext_32(RV_REG_T2, ctx);
- emit_zext_32(RV_REG_T1, ctx);
- *rd = RV_REG_T2;
-}
-
-static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
-{
- emit_addiw(RV_REG_T2, *rd, 0, ctx);
- *rd = RV_REG_T2;
-}
-
static int emit_jump_and_link(u8 rd, s64 rvoff, bool fixed_addr,
struct rv_jit_context *ctx)
{
@@ -480,6 +456,12 @@ static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx)
return emit_jump_and_link(RV_REG_RA, off, fixed_addr, ctx);
}
+static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx)
+{
+ if (IS_ENABLED(CONFIG_CFI_CLANG))
+ emit(hash, ctx);
+}
+
static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
struct rv_jit_context *ctx)
{
@@ -519,32 +501,32 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) :
rv_amoadd_w(rs, rs, rd, 0, 0), ctx);
if (!is64)
- emit_zext_32(rs, ctx);
+ emit_zextw(rs, rs, ctx);
break;
case BPF_AND | BPF_FETCH:
emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) :
rv_amoand_w(rs, rs, rd, 0, 0), ctx);
if (!is64)
- emit_zext_32(rs, ctx);
+ emit_zextw(rs, rs, ctx);
break;
case BPF_OR | BPF_FETCH:
emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) :
rv_amoor_w(rs, rs, rd, 0, 0), ctx);
if (!is64)
- emit_zext_32(rs, ctx);
+ emit_zextw(rs, rs, ctx);
break;
case BPF_XOR | BPF_FETCH:
emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) :
rv_amoxor_w(rs, rs, rd, 0, 0), ctx);
if (!is64)
- emit_zext_32(rs, ctx);
+ emit_zextw(rs, rs, ctx);
break;
/* src_reg = atomic_xchg(dst_reg + off16, src_reg); */
case BPF_XCHG:
emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) :
rv_amoswap_w(rs, rs, rd, 0, 0), ctx);
if (!is64)
- emit_zext_32(rs, ctx);
+ emit_zextw(rs, rs, ctx);
break;
/* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */
case BPF_CMPXCHG:
@@ -795,6 +777,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
void *orig_call = func_addr;
bool save_ret;
u32 insn;
@@ -878,7 +861,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
stack_size = round_up(stack_size, 16);
- if (func_addr) {
+ if (!is_struct_ops) {
/* For the trampoline called from function entry,
* the frame of traced function and the frame of
* trampoline need to be considered.
@@ -893,6 +876,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx);
emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
} else {
+ /* emit kcfi hash */
+ emit_kcfi(cfi_get_func_hash(func_addr), ctx);
/* For the trampoline called directly, just handle
* the frame of trampoline.
*/
@@ -998,7 +983,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
- if (func_addr) {
+ if (!is_struct_ops) {
/* trampoline called from function entry */
emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx);
emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx);
@@ -1090,7 +1075,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU64 | BPF_MOV | BPF_X:
if (imm == 1) {
/* Special mov32 for zext */
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
}
switch (insn->off) {
@@ -1098,16 +1083,17 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_mv(rd, rs, ctx);
break;
case 8:
+ emit_sextb(rd, rs, ctx);
+ break;
case 16:
- emit_slli(RV_REG_T1, rs, 64 - insn->off, ctx);
- emit_srai(rd, RV_REG_T1, 64 - insn->off, ctx);
+ emit_sexth(rd, rs, ctx);
break;
case 32:
- emit_addiw(rd, rs, 0, ctx);
+ emit_sextw(rd, rs, ctx);
break;
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
/* dst = dst OP src */
@@ -1115,7 +1101,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU64 | BPF_ADD | BPF_X:
emit_add(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_X:
case BPF_ALU64 | BPF_SUB | BPF_X:
@@ -1125,31 +1111,31 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_subw(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_AND | BPF_X:
emit_and(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
emit_or(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
emit_xor(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU64 | BPF_MUL | BPF_X:
emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_DIV | BPF_X:
@@ -1158,7 +1144,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
else
emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
@@ -1167,25 +1153,25 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
else
emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU64 | BPF_LSH | BPF_X:
emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_RSH | BPF_X:
case BPF_ALU64 | BPF_RSH | BPF_X:
emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_ARSH | BPF_X:
case BPF_ALU64 | BPF_ARSH | BPF_X:
emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
/* dst = -dst */
@@ -1193,73 +1179,27 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU64 | BPF_NEG:
emit_sub(rd, RV_REG_ZERO, rd, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
/* dst = BSWAP##imm(dst) */
case BPF_ALU | BPF_END | BPF_FROM_LE:
switch (imm) {
case 16:
- emit_slli(rd, rd, 48, ctx);
- emit_srli(rd, rd, 48, ctx);
+ emit_zexth(rd, rd, ctx);
break;
case 32:
if (!aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case 64:
/* Do nothing */
break;
}
break;
-
case BPF_ALU | BPF_END | BPF_FROM_BE:
case BPF_ALU64 | BPF_END | BPF_FROM_LE:
- emit_li(RV_REG_T2, 0, ctx);
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
- if (imm == 16)
- goto out_be;
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
- if (imm == 32)
- goto out_be;
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
-out_be:
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
-
- emit_mv(rd, RV_REG_T2, ctx);
+ emit_bswap(rd, imm, ctx);
break;
/* dst = imm */
@@ -1267,7 +1207,7 @@ out_be:
case BPF_ALU64 | BPF_MOV | BPF_K:
emit_imm(rd, imm, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
/* dst = dst OP imm */
@@ -1280,7 +1220,7 @@ out_be:
emit_add(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_K:
case BPF_ALU64 | BPF_SUB | BPF_K:
@@ -1291,7 +1231,7 @@ out_be:
emit_sub(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_K:
case BPF_ALU64 | BPF_AND | BPF_K:
@@ -1302,7 +1242,7 @@ out_be:
emit_and(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_K:
case BPF_ALU64 | BPF_OR | BPF_K:
@@ -1313,7 +1253,7 @@ out_be:
emit_or(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_K:
case BPF_ALU64 | BPF_XOR | BPF_K:
@@ -1324,7 +1264,7 @@ out_be:
emit_xor(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_MUL | BPF_K:
case BPF_ALU64 | BPF_MUL | BPF_K:
@@ -1332,7 +1272,7 @@ out_be:
emit(is64 ? rv_mul(rd, rd, RV_REG_T1) :
rv_mulw(rd, rd, RV_REG_T1), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_K:
@@ -1344,7 +1284,7 @@ out_be:
emit(is64 ? rv_divu(rd, rd, RV_REG_T1) :
rv_divuw(rd, rd, RV_REG_T1), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_MOD | BPF_K:
case BPF_ALU64 | BPF_MOD | BPF_K:
@@ -1356,14 +1296,14 @@ out_be:
emit(is64 ? rv_remu(rd, rd, RV_REG_T1) :
rv_remuw(rd, rd, RV_REG_T1), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU64 | BPF_LSH | BPF_K:
emit_slli(rd, rd, imm, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_RSH | BPF_K:
case BPF_ALU64 | BPF_RSH | BPF_K:
@@ -1373,7 +1313,7 @@ out_be:
emit(rv_srliw(rd, rd, imm), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_ARSH | BPF_K:
case BPF_ALU64 | BPF_ARSH | BPF_K:
@@ -1383,7 +1323,7 @@ out_be:
emit(rv_sraiw(rd, rd, imm), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
/* JUMP off */
@@ -1424,10 +1364,13 @@ out_be:
rvoff = rv_offset(i, off, ctx);
if (!is64) {
s = ctx->ninsns;
- if (is_signed_bpf_cond(BPF_OP(code)))
- emit_sext_32_rd_rs(&rd, &rs, ctx);
- else
- emit_zext_32_rd_rs(&rd, &rs, ctx);
+ if (is_signed_bpf_cond(BPF_OP(code))) {
+ emit_sextw_alt(&rs, RV_REG_T1, ctx);
+ emit_sextw_alt(&rd, RV_REG_T2, ctx);
+ } else {
+ emit_zextw_alt(&rs, RV_REG_T1, ctx);
+ emit_zextw_alt(&rd, RV_REG_T2, ctx);
+ }
e = ctx->ninsns;
/* Adjust for extra insns */
@@ -1438,8 +1381,7 @@ out_be:
/* Adjust for and */
rvoff -= 4;
emit_and(RV_REG_T1, rd, rs, ctx);
- emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
- ctx);
+ emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
} else {
emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
}
@@ -1468,18 +1410,18 @@ out_be:
case BPF_JMP32 | BPF_JSLE | BPF_K:
rvoff = rv_offset(i, off, ctx);
s = ctx->ninsns;
- if (imm) {
+ if (imm)
emit_imm(RV_REG_T1, imm, ctx);
- rs = RV_REG_T1;
- } else {
- /* If imm is 0, simply use zero register. */
- rs = RV_REG_ZERO;
- }
+ rs = imm ? RV_REG_T1 : RV_REG_ZERO;
if (!is64) {
- if (is_signed_bpf_cond(BPF_OP(code)))
- emit_sext_32_rd(&rd, ctx);
- else
- emit_zext_32_rd_t1(&rd, ctx);
+ if (is_signed_bpf_cond(BPF_OP(code))) {
+ emit_sextw_alt(&rd, RV_REG_T2, ctx);
+ /* rs has been sign extended */
+ } else {
+ emit_zextw_alt(&rd, RV_REG_T2, ctx);
+ if (imm)
+ emit_zextw(rs, rs, ctx);
+ }
}
e = ctx->ninsns;
@@ -1503,7 +1445,7 @@ out_be:
* as t1 is used only in comparison against zero.
*/
if (!is64 && imm < 0)
- emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx);
+ emit_sextw(RV_REG_T1, RV_REG_T1, ctx);
e = ctx->ninsns;
rvoff -= ninsns_rvoff(e - s);
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
@@ -1778,7 +1720,7 @@ out_be:
return 0;
}
-void bpf_jit_build_prologue(struct rv_jit_context *ctx)
+void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog)
{
int i, stack_adjust = 0, store_offset, bpf_stack_adjust;
@@ -1807,6 +1749,9 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx)
store_offset = stack_adjust - 8;
+ /* emit kcfi type preamble immediately before the first insn */
+ emit_kcfi(is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash, ctx);
+
/* nops reserved for auipc+jalr pair */
for (i = 0; i < RV_FENTRY_NINSNS; i++)
emit(rv_nop(), ctx);
@@ -1873,3 +1818,8 @@ bool bpf_jit_supports_kfunc_call(void)
{
return true;
}
+
+bool bpf_jit_supports_ptr_xchg(void)
+{
+ return true;
+}
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 7b70ccb7fec3..6b3acac30c06 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -10,6 +10,7 @@
#include <linux/filter.h>
#include <linux/memory.h>
#include <asm/patch.h>
+#include <asm/cfi.h>
#include "bpf_jit.h"
/* Number of iterations to try until offsets converge. */
@@ -100,7 +101,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
pass++;
ctx->ninsns = 0;
- bpf_jit_build_prologue(ctx);
+ bpf_jit_build_prologue(ctx, bpf_is_subprog(prog));
ctx->prologue_len = ctx->ninsns;
if (build_body(ctx, extra_pass, ctx->offset)) {
@@ -160,7 +161,7 @@ skip_init_ctx:
ctx->ninsns = 0;
ctx->nexentries = 0;
- bpf_jit_build_prologue(ctx);
+ bpf_jit_build_prologue(ctx, bpf_is_subprog(prog));
if (build_body(ctx, extra_pass, NULL)) {
prog = orig_prog;
goto out_free_hdr;
@@ -170,9 +171,9 @@ skip_init_ctx:
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, prog_size, pass, ctx->insns);
- prog->bpf_func = (void *)ctx->ro_insns;
+ prog->bpf_func = (void *)ctx->ro_insns + cfi_get_offset();
prog->jited = 1;
- prog->jited_len = prog_size;
+ prog->jited_len = prog_size - cfi_get_offset();
if (!prog->is_func || extra_pass) {
if (WARN_ON(bpf_jit_binary_pack_finalize(prog, jit_data->ro_header,
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index fe565f3a3a91..9e52461f35cb 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -82,7 +82,7 @@ config S390
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYSCALL_WRAPPER
- select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_UBSAN
select ARCH_HAS_VDSO_DATA
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_INLINE_READ_LOCK
@@ -127,6 +127,7 @@ config S390
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_KERNEL_PMD_MKWRITE
+ select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
@@ -199,6 +200,7 @@ config S390
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_NOP_MCOUNT
+ select HAVE_PAGE_SIZE_4KB
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -448,7 +450,7 @@ config COMPAT
select COMPAT_OLD_SIGACTION
select HAVE_UID16
depends on MULTIUSER
- depends on !CC_IS_CLANG
+ depends on !CC_IS_CLANG && !LD_IS_LLD
help
Select this option if you want to enable your system kernel to
handle system-calls from ELF binaries for 31 bit ESA. This option
@@ -582,14 +584,23 @@ config RELOCATABLE
help
This builds a kernel image that retains relocation information
so it can be loaded at an arbitrary address.
- The kernel is linked as a position-independent executable (PIE)
- and contains dynamic relocations which are processed early in the
- bootup process.
The relocations make the kernel image about 15% larger (compressed
10%), but are discarded at runtime.
Note: this option exists only for documentation purposes, please do
not remove it.
+config PIE_BUILD
+ def_bool CC_IS_CLANG && !$(cc-option,-munaligned-symbols)
+ help
+ If the compiler is unable to generate code that can manage unaligned
+ symbols, the kernel is linked as a position-independent executable
+ (PIE) and includes dynamic relocations that are processed early
+ during bootup.
+
+ For kpatch functionality, it is recommended to build the kernel
+ without the PIE_BUILD option. PIE_BUILD is only enabled when the
+ compiler lacks proper support for handling unaligned symbols.
+
config RANDOMIZE_BASE
bool "Randomize the address of the kernel image (KASLR)"
default y
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 73873e451686..2a58e1864931 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -14,8 +14,14 @@ KBUILD_AFLAGS_MODULE += -fPIC
KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
+ifdef CONFIG_PIE_BUILD
KBUILD_CFLAGS += -fPIE
-LDFLAGS_vmlinux := -pie
+LDFLAGS_vmlinux := -pie -z notext
+else
+KBUILD_CFLAGS += $(call cc-option,-munaligned-symbols,)
+LDFLAGS_vmlinux := --emit-relocs --discard-none
+extra_tools := relocs
+endif
aflags_dwarf := -Wa,-gdwarf-2
KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
ifndef CONFIG_AS_IS_LLVM
@@ -143,7 +149,7 @@ archheaders:
archprepare:
$(Q)$(MAKE) $(build)=$(syscalls) kapi
- $(Q)$(MAKE) $(build)=$(tools) kapi
+ $(Q)$(MAKE) $(build)=$(tools) kapi $(extra_tools)
ifeq ($(KBUILD_EXTMOD),)
# We need to generate vdso-offsets.h before compiling certain files in kernel/.
# In order to do that, we should use the archprepare target, but we can't since
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index f56591bc0897..f5ef099e2fd3 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
image
bzImage
+relocs.S
section_cmp.*
vmlinux
vmlinux.lds
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index c7c81e5f9218..294f08a8811a 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,7 +37,8 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
+obj-y += version.o pgm_check_info.o ctype.o ipl_data.o
+obj-y += $(if $(CONFIG_PIE_BUILD),machine_kexec_reloc.o,relocs.o)
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
@@ -48,6 +49,9 @@ targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y
targets += vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all)
+ifndef CONFIG_PIE_BUILD
+targets += relocs.S
+endif
OBJECTS := $(addprefix $(obj)/,$(obj-y))
OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
@@ -56,9 +60,9 @@ clean-files += vmlinux.map
quiet_cmd_section_cmp = SECTCMP $*
define cmd_section_cmp
- s1=`$(OBJDUMP) -t -j "$*" "$<" | sort | \
+ s1=`$(OBJDUMP) -t "$<" | grep "\s$*\s\+" | sort | \
sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \
- s2=`$(OBJDUMP) -t -j "$*" "$(word 2,$^)" | sort | \
+ s2=`$(OBJDUMP) -t "$(word 2,$^)" | grep "\s$*\s\+" | sort | \
sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \
if [ "$$s1" != "$$s2" ]; then \
echo "error: section $* differs between $< and $(word 2,$^)" >&2; \
@@ -73,11 +77,12 @@ $(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.b
$(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE
$(call if_changed,section_cmp)
-LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup $(if $(CONFIG_VMLINUX_MAP),-Map=$(obj)/vmlinux.map) --build-id=sha1 -T
+LDFLAGS_vmlinux-$(CONFIG_LD_ORPHAN_WARN) := --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
+LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) --oformat $(LD_BFD) -e startup $(if $(CONFIG_VMLINUX_MAP),-Map=$(obj)/vmlinux.map) --build-id=sha1 -T
$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE
$(call if_changed,ld)
-LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T
+LDFLAGS_vmlinux.syms := $(LDFLAGS_vmlinux-y) --oformat $(LD_BFD) -e startup -T
$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(OBJECTS) FORCE
$(call if_changed,ld)
@@ -93,7 +98,7 @@ OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .
$(obj)/syms.o: $(obj)/syms.bin FORCE
$(call if_changed,objcopy)
-OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
+OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=alloc,load
$(obj)/info.bin: vmlinux FORCE
$(call if_changed,objcopy)
@@ -105,6 +110,14 @@ OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
+ifndef CONFIG_PIE_BUILD
+CMD_RELOCS=arch/s390/tools/relocs
+quiet_cmd_relocs = RELOCS $@
+ cmd_relocs = $(CMD_RELOCS) $< > $@
+$(obj)/relocs.S: vmlinux FORCE
+ $(call if_changed,relocs)
+endif
+
suffix-$(CONFIG_KERNEL_GZIP) := .gz
suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
suffix-$(CONFIG_KERNEL_LZ4) := .lz4
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 222c6886acf6..567d60f78bbc 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -25,9 +25,14 @@ struct vmlinux_info {
unsigned long bootdata_size;
unsigned long bootdata_preserved_off;
unsigned long bootdata_preserved_size;
+#ifdef CONFIG_PIE_BUILD
unsigned long dynsym_start;
unsigned long rela_dyn_start;
unsigned long rela_dyn_end;
+#else
+ unsigned long got_start;
+ unsigned long got_end;
+#endif
unsigned long amode31_size;
unsigned long init_mm_off;
unsigned long swapper_pg_dir_off;
@@ -83,6 +88,7 @@ extern unsigned long vmalloc_size;
extern int vmalloc_size_set;
extern char __boot_data_start[], __boot_data_end[];
extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
+extern char __vmlinux_relocs_64_start[], __vmlinux_relocs_64_end[];
extern char _decompressor_syms_start[], _decompressor_syms_end[];
extern char _stack_start[], _stack_end[];
extern char _end[], _decompressor_end[];
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 9cc76e631759..6cf89314209a 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -141,7 +141,8 @@ static void copy_bootdata(void)
memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
}
-static void handle_relocs(unsigned long offset)
+#ifdef CONFIG_PIE_BUILD
+static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, unsigned long offset)
{
Elf64_Rela *rela_start, *rela_end, *rela;
int r_type, r_sym, rc;
@@ -172,6 +173,54 @@ static void handle_relocs(unsigned long offset)
}
}
+static void kaslr_adjust_got(unsigned long offset) {}
+static void rescue_relocs(void) {}
+static void free_relocs(void) {}
+#else
+static int *vmlinux_relocs_64_start;
+static int *vmlinux_relocs_64_end;
+
+static void rescue_relocs(void)
+{
+ unsigned long size = __vmlinux_relocs_64_end - __vmlinux_relocs_64_start;
+
+ vmlinux_relocs_64_start = (void *)physmem_alloc_top_down(RR_RELOC, size, 0);
+ vmlinux_relocs_64_end = (void *)vmlinux_relocs_64_start + size;
+ memmove(vmlinux_relocs_64_start, __vmlinux_relocs_64_start, size);
+}
+
+static void free_relocs(void)
+{
+ physmem_free(RR_RELOC);
+}
+
+static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, unsigned long offset)
+{
+ int *reloc;
+ long loc;
+
+ /* Adjust R_390_64 relocations */
+ for (reloc = vmlinux_relocs_64_start; reloc < vmlinux_relocs_64_end; reloc++) {
+ loc = (long)*reloc + offset;
+ if (loc < min_addr || loc > max_addr)
+ error("64-bit relocation outside of kernel!\n");
+ *(u64 *)loc += offset;
+ }
+}
+
+static void kaslr_adjust_got(unsigned long offset)
+{
+ u64 *entry;
+
+ /*
+ * Even without -fPIE, Clang still uses a global offset table for some
+ * reason. Adjust the GOT entries.
+ */
+ for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++)
+ *entry += offset;
+}
+#endif
+
/*
* Merge information from several sources into a single ident_map_size value.
* "ident_map_size" represents the upper limit of physical memory we may ever
@@ -299,14 +348,19 @@ static void setup_vmalloc_size(void)
vmalloc_size = max(size, vmalloc_size);
}
-static void offset_vmlinux_info(unsigned long offset)
+static void kaslr_adjust_vmlinux_info(unsigned long offset)
{
*(unsigned long *)(&vmlinux.entry) += offset;
vmlinux.bootdata_off += offset;
vmlinux.bootdata_preserved_off += offset;
+#ifdef CONFIG_PIE_BUILD
vmlinux.rela_dyn_start += offset;
vmlinux.rela_dyn_end += offset;
vmlinux.dynsym_start += offset;
+#else
+ vmlinux.got_start += offset;
+ vmlinux.got_end += offset;
+#endif
vmlinux.init_mm_off += offset;
vmlinux.swapper_pg_dir_off += offset;
vmlinux.invalid_pg_dir_off += offset;
@@ -361,6 +415,7 @@ void startup_kernel(void)
detect_physmem_online_ranges(max_physmem_end);
save_ipl_cert_comp_list();
rescue_initrd(safe_addr, ident_map_size);
+ rescue_relocs();
if (kaslr_enabled()) {
vmlinux_lma = randomize_within_range(vmlinux.image_size + vmlinux.bss_size,
@@ -368,7 +423,7 @@ void startup_kernel(void)
ident_map_size);
if (vmlinux_lma) {
__kaslr_offset = vmlinux_lma - vmlinux.default_lma;
- offset_vmlinux_info(__kaslr_offset);
+ kaslr_adjust_vmlinux_info(__kaslr_offset);
}
}
vmlinux_lma = vmlinux_lma ?: vmlinux.default_lma;
@@ -393,18 +448,20 @@ void startup_kernel(void)
/*
* The order of the following operations is important:
*
- * - handle_relocs() must follow clear_bss_section() to establish static
- * memory references to data in .bss to be used by setup_vmem()
+ * - kaslr_adjust_relocs() must follow clear_bss_section() to establish
+ * static memory references to data in .bss to be used by setup_vmem()
* (i.e init_mm.pgd)
*
- * - setup_vmem() must follow handle_relocs() to be able using
+ * - setup_vmem() must follow kaslr_adjust_relocs() to be able using
* static memory references to data in .bss (i.e init_mm.pgd)
*
- * - copy_bootdata() must follow setup_vmem() to propagate changes to
- * bootdata made by setup_vmem()
+ * - copy_bootdata() must follow setup_vmem() to propagate changes
+ * to bootdata made by setup_vmem()
*/
clear_bss_section(vmlinux_lma);
- handle_relocs(__kaslr_offset);
+ kaslr_adjust_relocs(vmlinux_lma, vmlinux_lma + vmlinux.image_size, __kaslr_offset);
+ kaslr_adjust_got(__kaslr_offset);
+ free_relocs();
setup_vmem(asce_limit);
copy_bootdata();
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index 389df0e0d9e5..3d7ea585ab99 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -31,6 +31,7 @@ SECTIONS
_text = .; /* Text */
*(.text)
*(.text.*)
+ INIT_TEXT
_etext = . ;
}
.rodata : {
@@ -39,6 +40,9 @@ SECTIONS
*(.rodata.*)
_erodata = . ;
}
+ .got : {
+ *(.got)
+ }
NOTES
.data : {
_data = . ;
@@ -106,6 +110,24 @@ SECTIONS
_compressed_end = .;
}
+#ifndef CONFIG_PIE_BUILD
+ /*
+ * When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
+ * uncompressed vmlinux.bin is positioned in the bzImage decompressor
+ * image at the default kernel LMA of 0x100000, enabling it to be
+ * executed in-place. However, the size of .vmlinux.relocs could be
+ * large enough to cause an overlap with the uncompressed kernel at the
+ * address 0x100000. To address this issue, .vmlinux.relocs is
+ * positioned after the .rodata.compressed.
+ */
+ . = ALIGN(4);
+ .vmlinux.relocs : {
+ __vmlinux_relocs_64_start = .;
+ *(.vmlinux.relocs_64)
+ __vmlinux_relocs_64_end = .;
+ }
+#endif
+
#define SB_TRAILER_SIZE 32
/* Trailer needed for Secure Boot */
. += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
@@ -118,8 +140,34 @@ SECTIONS
}
_end = .;
+ DWARF_DEBUG
+ ELF_DETAILS
+
+ /*
+ * Make sure that the .got.plt is either completely empty or it
+ * contains only the three reserved double words.
+ */
+ .got.plt : {
+ *(.got.plt)
+ }
+ ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .plt : {
+ *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+ .rela.dyn : {
+ *(.rela.*) *(.rela_*)
+ }
+ ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
+
/* Sections to be discarded */
/DISCARD/ : {
+ COMMON_DISCARDS
*(.eh_frame)
*(__ex_table)
*(*__ksymtab*)
diff --git a/arch/s390/configs/compat.config b/arch/s390/configs/compat.config
new file mode 100644
index 000000000000..6fd051453ae8
--- /dev/null
+++ b/arch/s390/configs/compat.config
@@ -0,0 +1,3 @@
+# Help: Enable compat support
+CONFIG_COMPAT=y
+CONFIG_COMPAT_32BIT_TIME=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index cae2dd34fbb4..4032e6e136ac 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -118,7 +118,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
@@ -374,6 +373,7 @@ CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
@@ -436,9 +436,6 @@ CONFIG_SCSI_DH_ALUA=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_BITMAP_FILE is not set
-CONFIG_MD_LINEAR=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
CONFIG_MD_CLUSTER=m
CONFIG_BCACHE=m
CONFIG_BLK_DEV_DM=y
@@ -637,7 +634,6 @@ CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=m
-CONFIG_NETFS_SUPPORT=m
CONFIG_NETFS_STATS=y
CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
@@ -709,7 +705,6 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
-CONFIG_INIT_STACK_NONE=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
@@ -739,7 +734,6 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
@@ -830,6 +824,8 @@ CONFIG_TEST_LOCKUP=m
CONFIG_DEBUG_PREEMPT=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
+CONFIG_LOCKDEP_BITS=16
+CONFIG_LOCKDEP_CHAINS_BITS=17
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_DEBUG_IRQFLAGS=y
@@ -886,4 +882,3 @@ CONFIG_ATOMIC64_SELFTEST=y
CONFIG_STRING_SELFTEST=y
CONFIG_TEST_BITOPS=m
CONFIG_TEST_BPF=m
-CONFIG_TEST_LIVEPATCH=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 42b988873e54..d33f814f78b2 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -109,7 +109,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
@@ -364,6 +363,7 @@ CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
@@ -426,9 +426,6 @@ CONFIG_SCSI_DH_ALUA=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_BITMAP_FILE is not set
-CONFIG_MD_LINEAR=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
CONFIG_MD_CLUSTER=m
CONFIG_BCACHE=m
CONFIG_BLK_DEV_DM=y
@@ -622,7 +619,6 @@ CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=m
-CONFIG_NETFS_SUPPORT=m
CONFIG_NETFS_STATS=y
CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
@@ -693,7 +689,6 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
-CONFIG_INIT_STACK_NONE=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_USER=m
@@ -724,11 +719,9 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
@@ -815,4 +808,3 @@ CONFIG_KPROBES_SANITY_TEST=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BPF=m
-CONFIG_TEST_LIVEPATCH=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 30d2a1687665..c51f3ec4eb28 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -8,6 +8,7 @@ CONFIG_BPF_SYSCALL=y
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=2
@@ -64,7 +65,6 @@ CONFIG_ZFCP=y
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_LSM="yama,loadpin,safesetid,integrity"
-CONFIG_INIT_STACK_NONE=y
# CONFIG_ZLIB_DFLTCC is not set
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_PRINTK_TIME=y
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
index ed9959e6f714..f8b0c52e77a4 100644
--- a/arch/s390/crypto/chacha-glue.c
+++ b/arch/s390/crypto/chacha-glue.c
@@ -15,14 +15,14 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sizes.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
#include "chacha-s390.h"
static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
unsigned int nbytes, const u32 *key,
u32 *counter)
{
- struct kernel_fpu vxstate;
+ DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
kernel_fpu_begin(&vxstate, KERNEL_VXR);
chacha20_vx(dst, src, nbytes, key, counter);
diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S
index 37cb63f25b17..63f3102678c0 100644
--- a/arch/s390/crypto/chacha-s390.S
+++ b/arch/s390/crypto/chacha-s390.S
@@ -8,7 +8,7 @@
#include <linux/linkage.h>
#include <asm/nospec-insn.h>
-#include <asm/vx-insn.h>
+#include <asm/fpu-insn.h>
#define SP %r15
#define FRAME (16 * 8 + 4 * 8)
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 017143e9cef7..74f17c905d12 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -13,8 +13,8 @@
#include <linux/cpufeature.h>
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
-#include <asm/fpu/api.h>
-
+#include <asm/fpu.h>
+#include "crc32-vx.h"
#define CRC32_BLOCK_SIZE 1
#define CRC32_DIGEST_SIZE 4
@@ -31,11 +31,6 @@ struct crc_desc_ctx {
u32 crc;
};
-/* Prototypes for functions in assembly files */
-u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-
/*
* DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
*
@@ -49,8 +44,8 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
static u32 __pure ___fname(u32 crc, \
unsigned char const *data, size_t datalen) \
{ \
- struct kernel_fpu vxstate; \
unsigned long prealign, aligned, remaining; \
+ DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
\
if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
return ___crc32_sw(crc, data, datalen); \
diff --git a/arch/s390/crypto/crc32-vx.h b/arch/s390/crypto/crc32-vx.h
new file mode 100644
index 000000000000..652c96e1a822
--- /dev/null
+++ b/arch/s390/crypto/crc32-vx.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _CRC32_VX_S390_H
+#define _CRC32_VX_S390_H
+
+#include <linux/types.h>
+
+u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+
+#endif /* _CRC32_VX_S390_H */
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.c
index 34ee47926891..fed7c9c70d05 100644
--- a/arch/s390/crypto/crc32be-vx.S
+++ b/arch/s390/crypto/crc32be-vx.c
@@ -12,20 +12,17 @@
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
-#include <linux/linkage.h>
-#include <asm/nospec-insn.h>
-#include <asm/vx-insn.h>
+#include <linux/types.h>
+#include <asm/fpu.h>
+#include "crc32-vx.h"
/* Vector register range containing CRC-32 constants */
-#define CONST_R1R2 %v9
-#define CONST_R3R4 %v10
-#define CONST_R5 %v11
-#define CONST_R6 %v12
-#define CONST_RU_POLY %v13
-#define CONST_CRC_POLY %v14
-
- .data
- .balign 8
+#define CONST_R1R2 9
+#define CONST_R3R4 10
+#define CONST_R5 11
+#define CONST_R6 12
+#define CONST_RU_POLY 13
+#define CONST_CRC_POLY 14
/*
* The CRC-32 constant block contains reduction constants to fold and
@@ -58,105 +55,74 @@
* P'(x) = 0xEDB88320
*/
-SYM_DATA_START_LOCAL(constants_CRC_32_BE)
- .quad 0x08833794c, 0x0e6228b11 # R1, R2
- .quad 0x0c5b9cd4c, 0x0e8a45605 # R3, R4
- .quad 0x0f200aa66, 1 << 32 # R5, x32
- .quad 0x0490d678d, 1 # R6, 1
- .quad 0x104d101df, 0 # u
- .quad 0x104C11DB7, 0 # P(x)
-SYM_DATA_END(constants_CRC_32_BE)
-
- .previous
-
- GEN_BR_THUNK %r14
-
- .text
-/*
- * The CRC-32 function(s) use these calling conventions:
- *
- * Parameters:
- *
- * %r2: Initial CRC value, typically ~0; and final CRC (return) value.
- * %r3: Input buffer pointer, performance might be improved if the
- * buffer is on a doubleword boundary.
- * %r4: Length of the buffer, must be 64 bytes or greater.
+static unsigned long constants_CRC_32_BE[] = {
+ 0x08833794c, 0x0e6228b11, /* R1, R2 */
+ 0x0c5b9cd4c, 0x0e8a45605, /* R3, R4 */
+ 0x0f200aa66, 1UL << 32, /* R5, x32 */
+ 0x0490d678d, 1, /* R6, 1 */
+ 0x104d101df, 0, /* u */
+ 0x104C11DB7, 0, /* P(x) */
+};
+
+/**
+ * crc32_be_vgfm_16 - Compute CRC-32 (BE variant) with vector registers
+ * @crc: Initial CRC value, typically ~0.
+ * @buf: Input buffer pointer, performance might be improved if the
+ * buffer is on a doubleword boundary.
+ * @size: Size of the buffer, must be 64 bytes or greater.
*
* Register usage:
- *
- * %r5: CRC-32 constant pool base pointer.
* V0: Initial CRC value and intermediate constants and results.
* V1..V4: Data for CRC computation.
* V5..V8: Next data chunks that are fetched from the input buffer.
- *
* V9..V14: CRC-32 constants.
*/
-SYM_FUNC_START(crc32_be_vgfm_16)
+u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
+{
/* Load CRC-32 constants */
- larl %r5,constants_CRC_32_BE
- VLM CONST_R1R2,CONST_CRC_POLY,0,%r5
+ fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &constants_CRC_32_BE);
+ fpu_vzero(0);
/* Load the initial CRC value into the leftmost word of V0. */
- VZERO %v0
- VLVGF %v0,%r2,0
+ fpu_vlvgf(0, crc, 0);
/* Load a 64-byte data chunk and XOR with CRC */
- VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */
- VX %v1,%v0,%v1 /* V1 ^= CRC */
- aghi %r3,64 /* BUF = BUF + 64 */
- aghi %r4,-64 /* LEN = LEN - 64 */
-
- /* Check remaining buffer size and jump to proper folding method */
- cghi %r4,64
- jl .Lless_than_64bytes
-
-.Lfold_64bytes_loop:
- /* Load the next 64-byte data chunk into V5 to V8 */
- VLM %v5,%v8,0,%r3
+ fpu_vlm(1, 4, buf);
+ fpu_vx(1, 0, 1);
+ buf += 64;
+ size -= 64;
+
+ while (size >= 64) {
+ /* Load the next 64-byte data chunk into V5 to V8 */
+ fpu_vlm(5, 8, buf);
+
+ /*
+ * Perform a GF(2) multiplication of the doublewords in V1 with
+ * the reduction constants in V0. The intermediate result is
+ * then folded (accumulated) with the next data chunk in V5 and
+ * stored in V1. Repeat this step for the register contents
+ * in V2, V3, and V4 respectively.
+ */
+ fpu_vgfmag(1, CONST_R1R2, 1, 5);
+ fpu_vgfmag(2, CONST_R1R2, 2, 6);
+ fpu_vgfmag(3, CONST_R1R2, 3, 7);
+ fpu_vgfmag(4, CONST_R1R2, 4, 8);
+ buf += 64;
+ size -= 64;
+ }
- /*
- * Perform a GF(2) multiplication of the doublewords in V1 with
- * the reduction constants in V0. The intermediate result is
- * then folded (accumulated) with the next data chunk in V5 and
- * stored in V1. Repeat this step for the register contents
- * in V2, V3, and V4 respectively.
- */
- VGFMAG %v1,CONST_R1R2,%v1,%v5
- VGFMAG %v2,CONST_R1R2,%v2,%v6
- VGFMAG %v3,CONST_R1R2,%v3,%v7
- VGFMAG %v4,CONST_R1R2,%v4,%v8
-
- /* Adjust buffer pointer and length for next loop */
- aghi %r3,64 /* BUF = BUF + 64 */
- aghi %r4,-64 /* LEN = LEN - 64 */
-
- cghi %r4,64
- jnl .Lfold_64bytes_loop
-
-.Lless_than_64bytes:
/* Fold V1 to V4 into a single 128-bit value in V1 */
- VGFMAG %v1,CONST_R3R4,%v1,%v2
- VGFMAG %v1,CONST_R3R4,%v1,%v3
- VGFMAG %v1,CONST_R3R4,%v1,%v4
-
- /* Check whether to continue with 64-bit folding */
- cghi %r4,16
- jl .Lfinal_fold
+ fpu_vgfmag(1, CONST_R3R4, 1, 2);
+ fpu_vgfmag(1, CONST_R3R4, 1, 3);
+ fpu_vgfmag(1, CONST_R3R4, 1, 4);
-.Lfold_16bytes_loop:
+ while (size >= 16) {
+ fpu_vl(2, buf);
+ fpu_vgfmag(1, CONST_R3R4, 1, 2);
+ buf += 16;
+ size -= 16;
+ }
- VL %v2,0,,%r3 /* Load next data chunk */
- VGFMAG %v1,CONST_R3R4,%v1,%v2 /* Fold next data chunk */
-
- /* Adjust buffer pointer and size for folding next data chunk */
- aghi %r3,16
- aghi %r4,-16
-
- /* Process remaining data chunks */
- cghi %r4,16
- jnl .Lfold_16bytes_loop
-
-.Lfinal_fold:
/*
* The R5 constant is used to fold a 128-bit value into an 96-bit value
* that is XORed with the next 96-bit input data chunk. To use a single
@@ -164,7 +130,7 @@ SYM_FUNC_START(crc32_be_vgfm_16)
* form an intermediate 96-bit value (with appended zeros) which is then
* XORed with the intermediate reduction result.
*/
- VGFMG %v1,CONST_R5,%v1
+ fpu_vgfmg(1, CONST_R5, 1);
/*
* Further reduce the remaining 96-bit value to a 64-bit value using a
@@ -173,7 +139,7 @@ SYM_FUNC_START(crc32_be_vgfm_16)
* doubleword with R6. The result is a 64-bit value and is subject to
* the Barret reduction.
*/
- VGFMG %v1,CONST_R6,%v1
+ fpu_vgfmg(1, CONST_R6, 1);
/*
* The input values to the Barret reduction are the degree-63 polynomial
@@ -194,20 +160,15 @@ SYM_FUNC_START(crc32_be_vgfm_16)
*/
/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
- VUPLLF %v2,%v1
- VGFMG %v2,CONST_RU_POLY,%v2
+ fpu_vupllf(2, 1);
+ fpu_vgfmg(2, CONST_RU_POLY, 2);
/*
* Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
* V2 and XOR the intermediate result, T2(x), with the value in V1.
* The final result is in the rightmost word of V2.
*/
- VUPLLF %v2,%v2
- VGFMAG %v2,CONST_CRC_POLY,%v2,%v1
-
-.Ldone:
- VLGVF %r2,%v2,3
- BR_EX %r14
-SYM_FUNC_END(crc32_be_vgfm_16)
-
-.previous
+ fpu_vupllf(2, 2);
+ fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
+ return fpu_vlgvf(2, 3);
+}
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.c
index 5a819ae09a0b..2f629f394df7 100644
--- a/arch/s390/crypto/crc32le-vx.S
+++ b/arch/s390/crypto/crc32le-vx.c
@@ -13,20 +13,17 @@
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
-#include <linux/linkage.h>
-#include <asm/nospec-insn.h>
-#include <asm/vx-insn.h>
+#include <linux/types.h>
+#include <asm/fpu.h>
+#include "crc32-vx.h"
/* Vector register range containing CRC-32 constants */
-#define CONST_PERM_LE2BE %v9
-#define CONST_R2R1 %v10
-#define CONST_R4R3 %v11
-#define CONST_R5 %v12
-#define CONST_RU_POLY %v13
-#define CONST_CRC_POLY %v14
-
- .data
- .balign 8
+#define CONST_PERM_LE2BE 9
+#define CONST_R2R1 10
+#define CONST_R4R3 11
+#define CONST_R5 12
+#define CONST_RU_POLY 13
+#define CONST_CRC_POLY 14
/*
* The CRC-32 constant block contains reduction constants to fold and
@@ -59,64 +56,43 @@
* P'(x) = 0x82F63B78
*/
-SYM_DATA_START_LOCAL(constants_CRC_32_LE)
- .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask
- .quad 0x1c6e41596, 0x154442bd4 # R2, R1
- .quad 0x0ccaa009e, 0x1751997d0 # R4, R3
- .octa 0x163cd6124 # R5
- .octa 0x1F7011641 # u'
- .octa 0x1DB710641 # P'(x) << 1
-SYM_DATA_END(constants_CRC_32_LE)
-
-SYM_DATA_START_LOCAL(constants_CRC_32C_LE)
- .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask
- .quad 0x09e4addf8, 0x740eef02 # R2, R1
- .quad 0x14cd00bd6, 0xf20c0dfe # R4, R3
- .octa 0x0dd45aab8 # R5
- .octa 0x0dea713f1 # u'
- .octa 0x105ec76f0 # P'(x) << 1
-SYM_DATA_END(constants_CRC_32C_LE)
-
- .previous
-
- GEN_BR_THUNK %r14
-
- .text
-
-/*
- * The CRC-32 functions use these calling conventions:
- *
- * Parameters:
- *
- * %r2: Initial CRC value, typically ~0; and final CRC (return) value.
- * %r3: Input buffer pointer, performance might be improved if the
- * buffer is on a doubleword boundary.
- * %r4: Length of the buffer, must be 64 bytes or greater.
+static unsigned long constants_CRC_32_LE[] = {
+ 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */
+ 0x1c6e41596, 0x154442bd4, /* R2, R1 */
+ 0x0ccaa009e, 0x1751997d0, /* R4, R3 */
+ 0x0, 0x163cd6124, /* R5 */
+ 0x0, 0x1f7011641, /* u' */
+ 0x0, 0x1db710641 /* P'(x) << 1 */
+};
+
+static unsigned long constants_CRC_32C_LE[] = {
+ 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */
+ 0x09e4addf8, 0x740eef02, /* R2, R1 */
+ 0x14cd00bd6, 0xf20c0dfe, /* R4, R3 */
+ 0x0, 0x0dd45aab8, /* R5 */
+ 0x0, 0x0dea713f1, /* u' */
+ 0x0, 0x105ec76f0 /* P'(x) << 1 */
+};
+
+/**
+ * crc32_le_vgfm_generic - Compute CRC-32 (LE variant) with vector registers
+ * @crc: Initial CRC value, typically ~0.
+ * @buf: Input buffer pointer, performance might be improved if the
+ * buffer is on a doubleword boundary.
+ * @size: Size of the buffer, must be 64 bytes or greater.
+ * @constants: CRC-32 constant pool base pointer.
*
* Register usage:
- *
- * %r5: CRC-32 constant pool base pointer.
- * V0: Initial CRC value and intermediate constants and results.
- * V1..V4: Data for CRC computation.
- * V5..V8: Next data chunks that are fetched from the input buffer.
- * V9: Constant for BE->LE conversion and shift operations
- *
+ * V0: Initial CRC value and intermediate constants and results.
+ * V1..V4: Data for CRC computation.
+ * V5..V8: Next data chunks that are fetched from the input buffer.
+ * V9: Constant for BE->LE conversion and shift operations
* V10..V14: CRC-32 constants.
*/
-
-SYM_FUNC_START(crc32_le_vgfm_16)
- larl %r5,constants_CRC_32_LE
- j crc32_le_vgfm_generic
-SYM_FUNC_END(crc32_le_vgfm_16)
-
-SYM_FUNC_START(crc32c_le_vgfm_16)
- larl %r5,constants_CRC_32C_LE
- j crc32_le_vgfm_generic
-SYM_FUNC_END(crc32c_le_vgfm_16)
-
-SYM_FUNC_START(crc32_le_vgfm_generic)
+static u32 crc32_le_vgfm_generic(u32 crc, unsigned char const *buf, size_t size, unsigned long *constants)
+{
/* Load CRC-32 constants */
- VLM CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
+ fpu_vlm(CONST_PERM_LE2BE, CONST_CRC_POLY, constants);
/*
* Load the initial CRC value.
@@ -125,90 +101,73 @@ SYM_FUNC_START(crc32_le_vgfm_generic)
* vector register and is later XORed with the LSB portion
* of the loaded input data.
*/
- VZERO %v0 /* Clear V0 */
- VLVGF %v0,%r2,3 /* Load CRC into rightmost word */
+ fpu_vzero(0); /* Clear V0 */
+ fpu_vlvgf(0, crc, 3); /* Load CRC into rightmost word */
/* Load a 64-byte data chunk and XOR with CRC */
- VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */
- VPERM %v1,%v1,%v1,CONST_PERM_LE2BE
- VPERM %v2,%v2,%v2,CONST_PERM_LE2BE
- VPERM %v3,%v3,%v3,CONST_PERM_LE2BE
- VPERM %v4,%v4,%v4,CONST_PERM_LE2BE
+ fpu_vlm(1, 4, buf);
+ fpu_vperm(1, 1, 1, CONST_PERM_LE2BE);
+ fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
+ fpu_vperm(3, 3, 3, CONST_PERM_LE2BE);
+ fpu_vperm(4, 4, 4, CONST_PERM_LE2BE);
+
+ fpu_vx(1, 0, 1); /* V1 ^= CRC */
+ buf += 64;
+ size -= 64;
+
+ while (size >= 64) {
+ fpu_vlm(5, 8, buf);
+ fpu_vperm(5, 5, 5, CONST_PERM_LE2BE);
+ fpu_vperm(6, 6, 6, CONST_PERM_LE2BE);
+ fpu_vperm(7, 7, 7, CONST_PERM_LE2BE);
+ fpu_vperm(8, 8, 8, CONST_PERM_LE2BE);
+ /*
+ * Perform a GF(2) multiplication of the doublewords in V1 with
+ * the R1 and R2 reduction constants in V0. The intermediate
+ * result is then folded (accumulated) with the next data chunk
+ * in V5 and stored in V1. Repeat this step for the register
+ * contents in V2, V3, and V4 respectively.
+ */
+ fpu_vgfmag(1, CONST_R2R1, 1, 5);
+ fpu_vgfmag(2, CONST_R2R1, 2, 6);
+ fpu_vgfmag(3, CONST_R2R1, 3, 7);
+ fpu_vgfmag(4, CONST_R2R1, 4, 8);
+ buf += 64;
+ size -= 64;
+ }
- VX %v1,%v0,%v1 /* V1 ^= CRC */
- aghi %r3,64 /* BUF = BUF + 64 */
- aghi %r4,-64 /* LEN = LEN - 64 */
-
- cghi %r4,64
- jl .Lless_than_64bytes
-
-.Lfold_64bytes_loop:
- /* Load the next 64-byte data chunk into V5 to V8 */
- VLM %v5,%v8,0,%r3
- VPERM %v5,%v5,%v5,CONST_PERM_LE2BE
- VPERM %v6,%v6,%v6,CONST_PERM_LE2BE
- VPERM %v7,%v7,%v7,CONST_PERM_LE2BE
- VPERM %v8,%v8,%v8,CONST_PERM_LE2BE
-
- /*
- * Perform a GF(2) multiplication of the doublewords in V1 with
- * the R1 and R2 reduction constants in V0. The intermediate result
- * is then folded (accumulated) with the next data chunk in V5 and
- * stored in V1. Repeat this step for the register contents
- * in V2, V3, and V4 respectively.
- */
- VGFMAG %v1,CONST_R2R1,%v1,%v5
- VGFMAG %v2,CONST_R2R1,%v2,%v6
- VGFMAG %v3,CONST_R2R1,%v3,%v7
- VGFMAG %v4,CONST_R2R1,%v4,%v8
-
- aghi %r3,64 /* BUF = BUF + 64 */
- aghi %r4,-64 /* LEN = LEN - 64 */
-
- cghi %r4,64
- jnl .Lfold_64bytes_loop
-
-.Lless_than_64bytes:
/*
* Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3
* and R4 and accumulating the next 128-bit chunk until a single 128-bit
* value remains.
*/
- VGFMAG %v1,CONST_R4R3,%v1,%v2
- VGFMAG %v1,CONST_R4R3,%v1,%v3
- VGFMAG %v1,CONST_R4R3,%v1,%v4
-
- cghi %r4,16
- jl .Lfinal_fold
-
-.Lfold_16bytes_loop:
-
- VL %v2,0,,%r3 /* Load next data chunk */
- VPERM %v2,%v2,%v2,CONST_PERM_LE2BE
- VGFMAG %v1,CONST_R4R3,%v1,%v2 /* Fold next data chunk */
+ fpu_vgfmag(1, CONST_R4R3, 1, 2);
+ fpu_vgfmag(1, CONST_R4R3, 1, 3);
+ fpu_vgfmag(1, CONST_R4R3, 1, 4);
+
+ while (size >= 16) {
+ fpu_vl(2, buf);
+ fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
+ fpu_vgfmag(1, CONST_R4R3, 1, 2);
+ buf += 16;
+ size -= 16;
+ }
- aghi %r3,16
- aghi %r4,-16
-
- cghi %r4,16
- jnl .Lfold_16bytes_loop
-
-.Lfinal_fold:
/*
* Set up a vector register for byte shifts. The shift value must
* be loaded in bits 1-4 in byte element 7 of a vector register.
* Shift by 8 bytes: 0x40
* Shift by 4 bytes: 0x20
*/
- VLEIB %v9,0x40,7
+ fpu_vleib(9, 0x40, 7);
/*
* Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
* to move R4 into the rightmost doubleword and set the leftmost
* doubleword to 0x1.
*/
- VSRLB %v0,CONST_R4R3,%v9
- VLEIG %v0,1,0
+ fpu_vsrlb(0, CONST_R4R3, 9);
+ fpu_vleig(0, 1, 0);
/*
* Compute GF(2) product of V1 and V0. The rightmost doubleword
@@ -216,7 +175,7 @@ SYM_FUNC_START(crc32_le_vgfm_generic)
* multiplied by 0x1 and is then XORed with rightmost product.
* Implicitly, the intermediate leftmost product becomes padded
*/
- VGFMG %v1,%v0,%v1
+ fpu_vgfmg(1, 0, 1);
/*
* Now do the final 32-bit fold by multiplying the rightmost word
@@ -231,10 +190,10 @@ SYM_FUNC_START(crc32_le_vgfm_generic)
* rightmost doubleword and the leftmost doubleword is zero to ignore
* the leftmost product of V1.
*/
- VLEIB %v9,0x20,7 /* Shift by words */
- VSRLB %v2,%v1,%v9 /* Store remaining bits in V2 */
- VUPLLF %v1,%v1 /* Split rightmost doubleword */
- VGFMAG %v1,CONST_R5,%v1,%v2 /* V1 = (V1 * R5) XOR V2 */
+ fpu_vleib(9, 0x20, 7); /* Shift by words */
+ fpu_vsrlb(2, 1, 9); /* Store remaining bits in V2 */
+ fpu_vupllf(1, 1); /* Split rightmost doubleword */
+ fpu_vgfmag(1, CONST_R5, 1, 2); /* V1 = (V1 * R5) XOR V2 */
/*
* Apply a Barret reduction to compute the final 32-bit CRC value.
@@ -256,20 +215,26 @@ SYM_FUNC_START(crc32_le_vgfm_generic)
*/
/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
- VUPLLF %v2,%v1
- VGFMG %v2,CONST_RU_POLY,%v2
+ fpu_vupllf(2, 1);
+ fpu_vgfmg(2, CONST_RU_POLY, 2);
/*
* Compute the GF(2) product of the CRC polynomial with T1(x) in
* V2 and XOR the intermediate result, T2(x), with the value in V1.
* The final result is stored in word element 2 of V2.
*/
- VUPLLF %v2,%v2
- VGFMAG %v2,CONST_CRC_POLY,%v2,%v1
+ fpu_vupllf(2, 2);
+ fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
+
+ return fpu_vlgvf(2, 2);
+}
-.Ldone:
- VLGVF %r2,%v2,2
- BR_EX %r14
-SYM_FUNC_END(crc32_le_vgfm_generic)
+u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
+{
+ return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32_LE[0]);
+}
-.previous
+u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
+{
+ return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32C_LE[0]);
+}
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 55ee5567a5ea..99f7e1f2b70a 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -125,20 +125,8 @@ struct s390_pxts_ctx {
static inline int __paes_keyblob2pkey(struct key_blob *kb,
struct pkey_protkey *pk)
{
- int i, ret;
-
- /* try three times in case of failure */
- for (i = 0; i < 3; i++) {
- if (i > 0 && ret == -EAGAIN && in_task())
- if (msleep_interruptible(1000))
- return -EINTR;
- ret = pkey_keyblob2pkey(kb->key, kb->keylen,
- pk->protkey, &pk->len, &pk->type);
- if (ret == 0)
- break;
- }
-
- return ret;
+ return pkey_keyblob2pkey(kb->key, kb->keylen,
+ pk->protkey, &pk->len, &pk->type);
}
static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index 9a2786079e3a..4131f0daa5ea 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -20,8 +20,7 @@
*/
static void diag0c_fn(void *data)
{
- diag_stat_inc(DIAG_STAT_X00C);
- diag_amode31_ops.diag0c(((void **)data)[smp_processor_id()]);
+ diag0c(((void **)data)[smp_processor_id()]);
}
/*
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
index f5f7e78ddc0c..9fc3f0dae8f0 100644
--- a/arch/s390/hypfs/hypfs_sprp.c
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -25,7 +25,7 @@
static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd)
{
- union register_pair r1 = { .even = (unsigned long)data, };
+ union register_pair r1 = { .even = virt_to_phys(data), };
asm volatile("diag %[r1],%[r3],0x304\n"
: [r1] "+&d" (r1.pair)
@@ -74,7 +74,7 @@ static int __hypfs_sprp_ioctl(void __user *user_area)
int rc;
rc = -ENOMEM;
- data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ data = (void *)get_zeroed_page(GFP_KERNEL);
diag304 = kzalloc(sizeof(*diag304), GFP_KERNEL);
if (!data || !diag304)
goto out;
diff --git a/arch/s390/include/asm/access-regs.h b/arch/s390/include/asm/access-regs.h
new file mode 100644
index 000000000000..1a6412d9f5ad
--- /dev/null
+++ b/arch/s390/include/asm/access-regs.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2024
+ */
+
+#ifndef __ASM_S390_ACCESS_REGS_H
+#define __ASM_S390_ACCESS_REGS_H
+
+#include <linux/instrumented.h>
+#include <asm/sigcontext.h>
+
+struct access_regs {
+ unsigned int regs[NUM_ACRS];
+};
+
+static inline void save_access_regs(unsigned int *acrs)
+{
+ struct access_regs *regs = (struct access_regs *)acrs;
+
+ instrument_write(regs, sizeof(*regs));
+ asm volatile("stamy 0,15,%[regs]"
+ : [regs] "=QS" (*regs)
+ :
+ : "memory");
+}
+
+static inline void restore_access_regs(unsigned int *acrs)
+{
+ struct access_regs *regs = (struct access_regs *)acrs;
+
+ instrument_read(regs, sizeof(*regs));
+ asm volatile("lamy 0,15,%[regs]"
+ :
+ : [regs] "QS" (*regs)
+ : "memory");
+}
+
+#endif /* __ASM_S390_ACCESS_REGS_H */
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
index f2240392c708..a92ebbc7aa7a 100644
--- a/arch/s390/include/asm/appldata.h
+++ b/arch/s390/include/asm/appldata.h
@@ -54,13 +54,13 @@ static inline int appldata_asm(struct appldata_parameter_list *parm_list,
parm_list->function = fn;
parm_list->parlist_length = sizeof(*parm_list);
parm_list->buffer_length = length;
- parm_list->product_id_addr = (unsigned long) id;
+ parm_list->product_id_addr = virt_to_phys(id);
parm_list->buffer_addr = virt_to_phys(buffer);
diag_stat_inc(DIAG_STAT_X0DC);
asm volatile(
" diag %1,%0,0xdc"
: "=d" (ry)
- : "d" (parm_list), "m" (*parm_list), "m" (*id)
+ : "d" (virt_to_phys(parm_list)), "m" (*parm_list), "m" (*id)
: "cc");
return ry;
}
diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
index a873e873e1ee..56096ae26f29 100644
--- a/arch/s390/include/asm/asm-prototypes.h
+++ b/arch/s390/include/asm/asm-prototypes.h
@@ -3,7 +3,7 @@
#include <linux/kvm_host.h>
#include <linux/ftrace.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
#include <asm-generic/asm-prototypes.h>
__int128_t __ashlti3(__int128_t a, int b);
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index aebe1e22c7be..c500d45fb465 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -14,7 +14,7 @@
".section .rodata.str,\"aMS\",@progbits,1\n" \
"1: .asciz \""__FILE__"\"\n" \
".previous\n" \
- ".section __bug_table,\"awM\",@progbits,%2\n" \
+ ".section __bug_table,\"aw\"\n" \
"2: .long 0b-.\n" \
" .long 1b-.\n" \
" .short %0,%1\n" \
@@ -30,7 +30,7 @@
#define __EMIT_BUG(x) do { \
asm_inline volatile( \
"0: mc 0,0\n" \
- ".section __bug_table,\"awM\",@progbits,%1\n" \
+ ".section __bug_table,\"aw\"\n" \
"1: .long 0b-.\n" \
" .short %0\n" \
" .org 1b+%1\n" \
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 69837eec2ff5..b89159591ca0 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -12,36 +12,29 @@
#ifndef _S390_CHECKSUM_H
#define _S390_CHECKSUM_H
-#include <linux/kasan-checks.h>
+#include <linux/instrumented.h>
#include <linux/in6.h>
-/*
- * Computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit).
- *
- * Returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic.
- *
- * This function must be called with even lengths, except
- * for the last fragment, which may be odd.
- *
- * It's best to have buff aligned on a 32-bit boundary.
- */
-static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
+static inline __wsum cksm(const void *buff, int len, __wsum sum)
{
union register_pair rp = {
- .even = (unsigned long) buff,
- .odd = (unsigned long) len,
+ .even = (unsigned long)buff,
+ .odd = (unsigned long)len,
};
- kasan_check_read(buff, len);
- asm volatile(
+ instrument_read(buff, len);
+ asm volatile("\n"
"0: cksm %[sum],%[rp]\n"
" jo 0b\n"
: [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory");
return sum;
}
+__wsum csum_partial(const void *buff, int len, __wsum sum);
+
+#define _HAVE_ARCH_CSUM_AND_COPY
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
+
/*
* Fold a partial checksum without adding pseudo headers.
*/
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index bed804137537..20b94220113b 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -44,6 +44,13 @@ enum diag_stat_enum {
void diag_stat_inc(enum diag_stat_enum nr);
void diag_stat_inc_norecursion(enum diag_stat_enum nr);
+struct hypfs_diag0c_entry;
+
+/*
+ * Diagnose 0c: Pseudo Timer
+ */
+void diag0c(struct hypfs_diag0c_entry *data);
+
/*
* Diagnose 10: Release page range
*/
@@ -331,10 +338,10 @@ struct hypfs_diag0c_entry;
*/
struct diag_ops {
int (*diag210)(struct diag210 *addr);
- int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode);
+ int (*diag26c)(unsigned long rx, unsigned long rx1, enum diag26c_sc subcode);
int (*diag14)(unsigned long rx, unsigned long ry1, unsigned long subcode);
int (*diag8c)(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
- void (*diag0c)(struct hypfs_diag0c_entry *entry);
+ void (*diag0c)(unsigned long rx);
void (*diag308_reset)(void);
};
@@ -342,9 +349,9 @@ extern struct diag_ops diag_amode31_ops;
extern struct diag210 *__diag210_tmp_amode31;
int _diag210_amode31(struct diag210 *addr);
-int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode);
+int _diag26c_amode31(unsigned long rx, unsigned long rx1, enum diag26c_sc subcode);
int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode);
-void _diag0c_amode31(struct hypfs_diag0c_entry *entry);
+void _diag0c_amode31(unsigned long rx);
void _diag308_reset_amode31(void);
int _diag8c_amode31(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index fdd319a622b0..7f5004065e8a 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -8,7 +8,7 @@
#include <linux/processor.h>
#include <linux/uaccess.h>
#include <asm/timex.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
#include <asm/pai.h>
#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
@@ -41,8 +41,7 @@ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
static __always_inline void arch_exit_to_user_mode(void)
{
- if (test_cpu_flag(CIF_FPU))
- __load_fpu_regs();
+ load_user_fpu_regs();
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
debug_user_asce(1);
diff --git a/arch/s390/include/asm/vx-insn-asm.h b/arch/s390/include/asm/fpu-insn-asm.h
index 360f8b36d962..02ccfe46050a 100644
--- a/arch/s390/include/asm/vx-insn-asm.h
+++ b/arch/s390/include/asm/fpu-insn-asm.h
@@ -9,11 +9,11 @@
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
-#ifndef __ASM_S390_VX_INSN_INTERNAL_H
-#define __ASM_S390_VX_INSN_INTERNAL_H
+#ifndef __ASM_S390_FPU_INSN_ASM_H
+#define __ASM_S390_FPU_INSN_ASM_H
-#ifndef __ASM_S390_VX_INSN_H
-#error only <asm/vx-insn.h> can be included directly
+#ifndef __ASM_S390_FPU_INSN_H
+#error only <asm/fpu-insn.h> can be included directly
#endif
#ifdef __ASSEMBLY__
@@ -195,10 +195,26 @@
/* RXB - Compute most significant bit used vector registers
*
* @rxb: Operand to store computed RXB value
- * @v1: First vector register designated operand
- * @v2: Second vector register designated operand
- * @v3: Third vector register designated operand
- * @v4: Fourth vector register designated operand
+ * @v1: Vector register designated operand whose MSB is stored in
+ * RXB bit 0 (instruction bit 36) and whose remaining bits
+ * are stored in instruction bits 8-11.
+ * @v2: Vector register designated operand whose MSB is stored in
+ * RXB bit 1 (instruction bit 37) and whose remaining bits
+ * are stored in instruction bits 12-15.
+ * @v3: Vector register designated operand whose MSB is stored in
+ * RXB bit 2 (instruction bit 38) and whose remaining bits
+ * are stored in instruction bits 16-19.
+ * @v4: Vector register designated operand whose MSB is stored in
+ * RXB bit 3 (instruction bit 39) and whose remaining bits
+ * are stored in instruction bits 32-35.
+ *
+ * Note: In most vector instruction formats [1] V1, V2, V3, and V4 directly
+ * correspond to @v1, @v2, @v3, and @v4. But there are exceptions, such as but
+ * not limited to the vector instruction formats VRR-g, VRR-h, VRS-a, VRS-d,
+ * and VSI.
+ *
+ * [1] IBM z/Architecture Principles of Operation, chapter "Program
+ * Execution, section "Instructions", subsection "Instruction Formats".
*/
.macro RXB rxb v1 v2=0 v3=0 v4=0
\rxb = 0
@@ -223,6 +239,9 @@
* @v2: Second vector register designated operand (for RXB)
* @v3: Third vector register designated operand (for RXB)
* @v4: Fourth vector register designated operand (for RXB)
+ *
+ * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro
+ * description for further details.
*/
.macro MRXB m v1 v2=0 v3=0 v4=0
rxb = 0
@@ -238,6 +257,9 @@
* @v2: Second vector register designated operand (for RXB)
* @v3: Third vector register designated operand (for RXB)
* @v4: Fourth vector register designated operand (for RXB)
+ *
+ * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro
+ * description for further details.
*/
.macro MRXBOPC m opc v1 v2=0 v3=0 v4=0
MRXB \m, \v1, \v2, \v3, \v4
@@ -350,7 +372,7 @@
VX_NUM v3, \vr
.word 0xE700 | (r1 << 4) | (v3&15)
.word (b2 << 12) | (\disp)
- MRXBOPC \m, 0x21, v3
+ MRXBOPC \m, 0x21, 0, v3
.endm
.macro VLGVB gr, vr, disp, base="%r0"
VLGV \gr, \vr, \disp, \base, 0
@@ -499,6 +521,25 @@
VMRL \vr1, \vr2, \vr3, 3
.endm
+/* VECTOR LOAD WITH LENGTH */
+.macro VLL v, gr, disp, base
+ VX_NUM v1, \v
+ GR_NUM b2, \base
+ GR_NUM r3, \gr
+ .word 0xE700 | ((v1&15) << 4) | r3
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x37, v1
+.endm
+
+/* VECTOR STORE WITH LENGTH */
+.macro VSTL v, gr, disp, base
+ VX_NUM v1, \v
+ GR_NUM b2, \base
+ GR_NUM r3, \gr
+ .word 0xE700 | ((v1&15) << 4) | r3
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x3f, v1
+.endm
/* Vector integer instructions */
@@ -512,6 +553,16 @@
MRXBOPC 0, 0x68, v1, v2, v3
.endm
+/* VECTOR CHECKSUM */
+.macro VCKSM vr1, vr2, vr3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC 0, 0x66, v1, v2, v3
+.endm
+
/* VECTOR EXCLUSIVE OR */
.macro VX vr1, vr2, vr3
VX_NUM v1, \vr1
@@ -678,4 +729,4 @@
.endm
#endif /* __ASSEMBLY__ */
-#endif /* __ASM_S390_VX_INSN_INTERNAL_H */
+#endif /* __ASM_S390_FPU_INSN_ASM_H */
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
new file mode 100644
index 000000000000..c1e2e521d9af
--- /dev/null
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -0,0 +1,486 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for Floating Point and Vector Instructions
+ *
+ */
+
+#ifndef __ASM_S390_FPU_INSN_H
+#define __ASM_S390_FPU_INSN_H
+
+#include <asm/fpu-insn-asm.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/instrumented.h>
+#include <asm/asm-extable.h>
+
+asm(".include \"asm/fpu-insn-asm.h\"\n");
+
+/*
+ * Various small helper functions, which can and should be used within
+ * kernel fpu code sections. Each function represents only one floating
+ * point or vector instruction (except for helper functions which require
+ * exception handling).
+ *
+ * This allows to use floating point and vector instructions like C
+ * functions, which has the advantage that all supporting code, like
+ * e.g. loops, can be written in easy to read C code.
+ *
+ * Each of the helper functions provides support for code instrumentation,
+ * like e.g. KASAN. Therefore instrumentation is also covered automatically
+ * when using these functions.
+ *
+ * In order to ensure that code generated with the helper functions stays
+ * within kernel fpu sections, which are guarded with kernel_fpu_begin()
+ * and kernel_fpu_end() calls, each function has a mandatory "memory"
+ * barrier.
+ */
+
+static __always_inline void fpu_cefbr(u8 f1, s32 val)
+{
+ asm volatile("cefbr %[f1],%[val]\n"
+ :
+ : [f1] "I" (f1), [val] "d" (val)
+ : "memory");
+}
+
+static __always_inline unsigned long fpu_cgebr(u8 f2, u8 mode)
+{
+ unsigned long val;
+
+ asm volatile("cgebr %[val],%[mode],%[f2]\n"
+ : [val] "=d" (val)
+ : [f2] "I" (f2), [mode] "I" (mode)
+ : "memory");
+ return val;
+}
+
+static __always_inline void fpu_debr(u8 f1, u8 f2)
+{
+ asm volatile("debr %[f1],%[f2]\n"
+ :
+ : [f1] "I" (f1), [f2] "I" (f2)
+ : "memory");
+}
+
+static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg)
+{
+ instrument_read(reg, sizeof(*reg));
+ asm volatile("ld %[fpr],%[reg]\n"
+ :
+ : [fpr] "I" (fpr), [reg] "Q" (reg->ui)
+ : "memory");
+}
+
+static __always_inline void fpu_ldgr(u8 f1, u32 val)
+{
+ asm volatile("ldgr %[f1],%[val]\n"
+ :
+ : [f1] "I" (f1), [val] "d" (val)
+ : "memory");
+}
+
+static __always_inline void fpu_lfpc(unsigned int *fpc)
+{
+ instrument_read(fpc, sizeof(*fpc));
+ asm volatile("lfpc %[fpc]"
+ :
+ : [fpc] "Q" (*fpc)
+ : "memory");
+}
+
+/**
+ * fpu_lfpc_safe - Load floating point control register safely.
+ * @fpc: new value for floating point control register
+ *
+ * Load floating point control register. This may lead to an exception,
+ * since a saved value may have been modified by user space (ptrace,
+ * signal return, kvm registers) to an invalid value. In such a case
+ * set the floating point control register to zero.
+ */
+static inline void fpu_lfpc_safe(unsigned int *fpc)
+{
+ u32 tmp;
+
+ instrument_read(fpc, sizeof(*fpc));
+ asm volatile("\n"
+ "0: lfpc %[fpc]\n"
+ "1: nopr %%r7\n"
+ ".pushsection .fixup, \"ax\"\n"
+ "2: lghi %[tmp],0\n"
+ " sfpc %[tmp]\n"
+ " jg 1b\n"
+ ".popsection\n"
+ EX_TABLE(1b, 2b)
+ : [tmp] "=d" (tmp)
+ : [fpc] "Q" (*fpc)
+ : "memory");
+}
+
+static __always_inline void fpu_std(unsigned short fpr, freg_t *reg)
+{
+ instrument_write(reg, sizeof(*reg));
+ asm volatile("std %[fpr],%[reg]\n"
+ : [reg] "=Q" (reg->ui)
+ : [fpr] "I" (fpr)
+ : "memory");
+}
+
+static __always_inline void fpu_sfpc(unsigned int fpc)
+{
+ asm volatile("sfpc %[fpc]"
+ :
+ : [fpc] "d" (fpc)
+ : "memory");
+}
+
+static __always_inline void fpu_stfpc(unsigned int *fpc)
+{
+ instrument_write(fpc, sizeof(*fpc));
+ asm volatile("stfpc %[fpc]"
+ : [fpc] "=Q" (*fpc)
+ :
+ : "memory");
+}
+
+static __always_inline void fpu_vab(u8 v1, u8 v2, u8 v3)
+{
+ asm volatile("VAB %[v1],%[v2],%[v3]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+ : "memory");
+}
+
+static __always_inline void fpu_vcksm(u8 v1, u8 v2, u8 v3)
+{
+ asm volatile("VCKSM %[v1],%[v2],%[v3]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+ : "memory");
+}
+
+static __always_inline void fpu_vesravb(u8 v1, u8 v2, u8 v3)
+{
+ asm volatile("VESRAVB %[v1],%[v2],%[v3]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+ : "memory");
+}
+
+static __always_inline void fpu_vgfmag(u8 v1, u8 v2, u8 v3, u8 v4)
+{
+ asm volatile("VGFMAG %[v1],%[v2],%[v3],%[v4]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3), [v4] "I" (v4)
+ : "memory");
+}
+
+static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3)
+{
+ asm volatile("VGFMG %[v1],%[v2],%[v3]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+ : "memory");
+}
+
+#ifdef CONFIG_CC_IS_CLANG
+
+static __always_inline void fpu_vl(u8 v1, const void *vxr)
+{
+ instrument_read(vxr, sizeof(__vector128));
+ asm volatile("\n"
+ " la 1,%[vxr]\n"
+ " VL %[v1],0,,1\n"
+ :
+ : [vxr] "R" (*(__vector128 *)vxr),
+ [v1] "I" (v1)
+ : "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vl(u8 v1, const void *vxr)
+{
+ instrument_read(vxr, sizeof(__vector128));
+ asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n"
+ :
+ : [vxr] "Q" (*(__vector128 *)vxr),
+ [v1] "I" (v1)
+ : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vleib(u8 v, s16 val, u8 index)
+{
+ asm volatile("VLEIB %[v],%[val],%[index]"
+ :
+ : [v] "I" (v), [val] "K" (val), [index] "I" (index)
+ : "memory");
+}
+
+static __always_inline void fpu_vleig(u8 v, s16 val, u8 index)
+{
+ asm volatile("VLEIG %[v],%[val],%[index]"
+ :
+ : [v] "I" (v), [val] "K" (val), [index] "I" (index)
+ : "memory");
+}
+
+static __always_inline u64 fpu_vlgvf(u8 v, u16 index)
+{
+ u64 val;
+
+ asm volatile("VLGVF %[val],%[v],%[index]"
+ : [val] "=d" (val)
+ : [v] "I" (v), [index] "L" (index)
+ : "memory");
+ return val;
+}
+
+#ifdef CONFIG_CC_IS_CLANG
+
+static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
+{
+ unsigned int size;
+
+ size = min(index + 1, sizeof(__vector128));
+ instrument_read(vxr, size);
+ asm volatile("\n"
+ " la 1,%[vxr]\n"
+ " VLL %[v1],%[index],0,1\n"
+ :
+ : [vxr] "R" (*(u8 *)vxr),
+ [index] "d" (index),
+ [v1] "I" (v1)
+ : "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
+{
+ unsigned int size;
+
+ size = min(index + 1, sizeof(__vector128));
+ instrument_read(vxr, size);
+ asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ :
+ : [vxr] "Q" (*(u8 *)vxr),
+ [index] "d" (index),
+ [v1] "I" (v1)
+ : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
+#define fpu_vlm(_v1, _v3, _vxrs) \
+({ \
+ unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \
+ struct { \
+ __vector128 _v[(_v3) - (_v1) + 1]; \
+ } *_v = (void *)(_vxrs); \
+ \
+ instrument_read(_v, size); \
+ asm volatile("\n" \
+ " la 1,%[vxrs]\n" \
+ " VLM %[v1],%[v3],0,1\n" \
+ : \
+ : [vxrs] "R" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
+ (_v3) - (_v1) + 1; \
+})
+
+#else /* CONFIG_CC_IS_CLANG */
+
+#define fpu_vlm(_v1, _v3, _vxrs) \
+({ \
+ unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \
+ struct { \
+ __vector128 _v[(_v3) - (_v1) + 1]; \
+ } *_v = (void *)(_vxrs); \
+ \
+ instrument_read(_v, size); \
+ asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ : \
+ : [vxrs] "Q" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
+ (_v3) - (_v1) + 1; \
+})
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vlr(u8 v1, u8 v2)
+{
+ asm volatile("VLR %[v1],%[v2]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2)
+ : "memory");
+}
+
+static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index)
+{
+ asm volatile("VLVGF %[v],%[val],%[index]"
+ :
+ : [v] "I" (v), [val] "d" (val), [index] "L" (index)
+ : "memory");
+}
+
+static __always_inline void fpu_vn(u8 v1, u8 v2, u8 v3)
+{
+ asm volatile("VN %[v1],%[v2],%[v3]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+ : "memory");
+}
+
+static __always_inline void fpu_vperm(u8 v1, u8 v2, u8 v3, u8 v4)
+{
+ asm volatile("VPERM %[v1],%[v2],%[v3],%[v4]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3), [v4] "I" (v4)
+ : "memory");
+}
+
+static __always_inline void fpu_vrepib(u8 v1, s16 i2)
+{
+ asm volatile("VREPIB %[v1],%[i2]"
+ :
+ : [v1] "I" (v1), [i2] "K" (i2)
+ : "memory");
+}
+
+static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3)
+{
+ asm volatile("VSRLB %[v1],%[v2],%[v3]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+ : "memory");
+}
+
+#ifdef CONFIG_CC_IS_CLANG
+
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+ instrument_write(vxr, sizeof(__vector128));
+ asm volatile("\n"
+ " la 1,%[vxr]\n"
+ " VST %[v1],0,,1\n"
+ : [vxr] "=R" (*(__vector128 *)vxr)
+ : [v1] "I" (v1)
+ : "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+ instrument_write(vxr, sizeof(__vector128));
+ asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
+ : [vxr] "=Q" (*(__vector128 *)vxr)
+ : [v1] "I" (v1)
+ : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
+{
+ unsigned int size;
+
+ size = min(index + 1, sizeof(__vector128));
+ instrument_write(vxr, size);
+ asm volatile("\n"
+ " la 1,%[vxr]\n"
+ " VSTL %[v1],%[index],0,1\n"
+ : [vxr] "=R" (*(u8 *)vxr)
+ : [index] "d" (index), [v1] "I" (v1)
+ : "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
+{
+ unsigned int size;
+
+ size = min(index + 1, sizeof(__vector128));
+ instrument_write(vxr, size);
+ asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ : [vxr] "=Q" (*(u8 *)vxr)
+ : [index] "d" (index), [v1] "I" (v1)
+ : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
+#define fpu_vstm(_v1, _v3, _vxrs) \
+({ \
+ unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \
+ struct { \
+ __vector128 _v[(_v3) - (_v1) + 1]; \
+ } *_v = (void *)(_vxrs); \
+ \
+ instrument_write(_v, size); \
+ asm volatile("\n" \
+ " la 1,%[vxrs]\n" \
+ " VSTM %[v1],%[v3],0,1\n" \
+ : [vxrs] "=R" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
+ (_v3) - (_v1) + 1; \
+})
+
+#else /* CONFIG_CC_IS_CLANG */
+
+#define fpu_vstm(_v1, _v3, _vxrs) \
+({ \
+ unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \
+ struct { \
+ __vector128 _v[(_v3) - (_v1) + 1]; \
+ } *_v = (void *)(_vxrs); \
+ \
+ instrument_write(_v, size); \
+ asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ : [vxrs] "=Q" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
+ (_v3) - (_v1) + 1; \
+})
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vupllf(u8 v1, u8 v2)
+{
+ asm volatile("VUPLLF %[v1],%[v2]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2)
+ : "memory");
+}
+
+static __always_inline void fpu_vx(u8 v1, u8 v2, u8 v3)
+{
+ asm volatile("VX %[v1],%[v2],%[v3]"
+ :
+ : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+ : "memory");
+}
+
+static __always_inline void fpu_vzero(u8 v)
+{
+ asm volatile("VZERO %[v]"
+ :
+ : [v] "I" (v)
+ : "memory");
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_S390_FPU_INSN_H */
diff --git a/arch/s390/include/asm/fpu-types.h b/arch/s390/include/asm/fpu-types.h
new file mode 100644
index 000000000000..8d58d5a95399
--- /dev/null
+++ b/arch/s390/include/asm/fpu-types.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * FPU data structures
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_TYPES_H
+#define _ASM_S390_FPU_TYPES_H
+
+#include <asm/sigcontext.h>
+
+struct fpu {
+ u32 fpc;
+ __vector128 vxrs[__NUM_VXRS] __aligned(8);
+};
+
+struct kernel_fpu_hdr {
+ int mask;
+ u32 fpc;
+};
+
+struct kernel_fpu {
+ struct kernel_fpu_hdr hdr;
+ __vector128 vxrs[] __aligned(8);
+};
+
+#define KERNEL_FPU_STRUCT(vxr_size) \
+struct kernel_fpu_##vxr_size { \
+ struct kernel_fpu_hdr hdr; \
+ __vector128 vxrs[vxr_size] __aligned(8); \
+}
+
+KERNEL_FPU_STRUCT(8);
+KERNEL_FPU_STRUCT(16);
+KERNEL_FPU_STRUCT(32);
+
+#define DECLARE_KERNEL_FPU_ONSTACK(vxr_size, name) \
+ struct kernel_fpu_##vxr_size name __uninitialized
+
+#define DECLARE_KERNEL_FPU_ONSTACK8(name) \
+ DECLARE_KERNEL_FPU_ONSTACK(8, name)
+
+#define DECLARE_KERNEL_FPU_ONSTACK16(name) \
+ DECLARE_KERNEL_FPU_ONSTACK(16, name)
+
+#define DECLARE_KERNEL_FPU_ONSTACK32(name) \
+ DECLARE_KERNEL_FPU_ONSTACK(32, name)
+
+#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h
new file mode 100644
index 000000000000..c84cb33913e2
--- /dev/null
+++ b/arch/s390/include/asm/fpu.h
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * In-kernel FPU support functions
+ *
+ *
+ * Consider these guidelines before using in-kernel FPU functions:
+ *
+ * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
+ * use of floating-point or vector registers and instructions.
+ *
+ * 2. For kernel_fpu_begin(), specify the vector register range you want to
+ * use with the KERNEL_VXR_* constants. Consider these usage guidelines:
+ *
+ * a) If your function typically runs in process-context, use the lower
+ * half of the vector registers, for example, specify KERNEL_VXR_LOW.
+ * b) If your function typically runs in soft-irq or hard-irq context,
+ * prefer using the upper half of the vector registers, for example,
+ * specify KERNEL_VXR_HIGH.
+ *
+ * If you adhere to these guidelines, an interrupted process context
+ * does not require to save and restore vector registers because of
+ * disjoint register ranges.
+ *
+ * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
+ * includes logic to save and restore up to 16 vector registers at once.
+ *
+ * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
+ * struct kernel_fpu states. Vector registers that are in use by outer
+ * levels are saved and restored. You can minimize the save and restore
+ * effort by choosing disjoint vector register ranges.
+ *
+ * 5. To use vector floating-point instructions, specify the KERNEL_FPC
+ * flag to save and restore floating-point controls in addition to any
+ * vector register range.
+ *
+ * 6. To use floating-point registers and instructions only, specify the
+ * KERNEL_FPR flag. This flag triggers a save and restore of vector
+ * registers V0 to V15 and floating-point controls.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_H
+#define _ASM_S390_FPU_H
+
+#include <linux/processor.h>
+#include <linux/preempt.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <asm/sigcontext.h>
+#include <asm/fpu-types.h>
+#include <asm/fpu-insn.h>
+#include <asm/facility.h>
+
+static inline bool cpu_has_vx(void)
+{
+ return likely(test_facility(129));
+}
+
+enum {
+ KERNEL_FPC_BIT = 0,
+ KERNEL_VXR_V0V7_BIT,
+ KERNEL_VXR_V8V15_BIT,
+ KERNEL_VXR_V16V23_BIT,
+ KERNEL_VXR_V24V31_BIT,
+};
+
+#define KERNEL_FPC BIT(KERNEL_FPC_BIT)
+#define KERNEL_VXR_V0V7 BIT(KERNEL_VXR_V0V7_BIT)
+#define KERNEL_VXR_V8V15 BIT(KERNEL_VXR_V8V15_BIT)
+#define KERNEL_VXR_V16V23 BIT(KERNEL_VXR_V16V23_BIT)
+#define KERNEL_VXR_V24V31 BIT(KERNEL_VXR_V24V31_BIT)
+
+#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7 | KERNEL_VXR_V8V15)
+#define KERNEL_VXR_MID (KERNEL_VXR_V8V15 | KERNEL_VXR_V16V23)
+#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23 | KERNEL_VXR_V24V31)
+
+#define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH)
+#define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW)
+
+void load_fpu_state(struct fpu *state, int flags);
+void save_fpu_state(struct fpu *state, int flags);
+void __kernel_fpu_begin(struct kernel_fpu *state, int flags);
+void __kernel_fpu_end(struct kernel_fpu *state, int flags);
+
+static __always_inline void save_vx_regs(__vector128 *vxrs)
+{
+ fpu_vstm(0, 15, &vxrs[0]);
+ fpu_vstm(16, 31, &vxrs[16]);
+}
+
+static __always_inline void load_vx_regs(__vector128 *vxrs)
+{
+ fpu_vlm(0, 15, &vxrs[0]);
+ fpu_vlm(16, 31, &vxrs[16]);
+}
+
+static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset)
+{
+ fpu_std(0, &fprs[0 * offset]);
+ fpu_std(1, &fprs[1 * offset]);
+ fpu_std(2, &fprs[2 * offset]);
+ fpu_std(3, &fprs[3 * offset]);
+ fpu_std(4, &fprs[4 * offset]);
+ fpu_std(5, &fprs[5 * offset]);
+ fpu_std(6, &fprs[6 * offset]);
+ fpu_std(7, &fprs[7 * offset]);
+ fpu_std(8, &fprs[8 * offset]);
+ fpu_std(9, &fprs[9 * offset]);
+ fpu_std(10, &fprs[10 * offset]);
+ fpu_std(11, &fprs[11 * offset]);
+ fpu_std(12, &fprs[12 * offset]);
+ fpu_std(13, &fprs[13 * offset]);
+ fpu_std(14, &fprs[14 * offset]);
+ fpu_std(15, &fprs[15 * offset]);
+}
+
+static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset)
+{
+ fpu_ld(0, &fprs[0 * offset]);
+ fpu_ld(1, &fprs[1 * offset]);
+ fpu_ld(2, &fprs[2 * offset]);
+ fpu_ld(3, &fprs[3 * offset]);
+ fpu_ld(4, &fprs[4 * offset]);
+ fpu_ld(5, &fprs[5 * offset]);
+ fpu_ld(6, &fprs[6 * offset]);
+ fpu_ld(7, &fprs[7 * offset]);
+ fpu_ld(8, &fprs[8 * offset]);
+ fpu_ld(9, &fprs[9 * offset]);
+ fpu_ld(10, &fprs[10 * offset]);
+ fpu_ld(11, &fprs[11 * offset]);
+ fpu_ld(12, &fprs[12 * offset]);
+ fpu_ld(13, &fprs[13 * offset]);
+ fpu_ld(14, &fprs[14 * offset]);
+ fpu_ld(15, &fprs[15 * offset]);
+}
+
+static __always_inline void save_fp_regs(freg_t *fprs)
+{
+ __save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
+}
+
+static __always_inline void load_fp_regs(freg_t *fprs)
+{
+ __load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
+}
+
+static __always_inline void save_fp_regs_vx(__vector128 *vxrs)
+{
+ freg_t *fprs = (freg_t *)&vxrs[0].high;
+
+ __save_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
+}
+
+static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
+{
+ freg_t *fprs = (freg_t *)&vxrs[0].high;
+
+ __load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
+}
+
+static inline void load_user_fpu_regs(void)
+{
+ struct thread_struct *thread = &current->thread;
+
+ if (!thread->ufpu_flags)
+ return;
+ load_fpu_state(&thread->ufpu, thread->ufpu_flags);
+ thread->ufpu_flags = 0;
+}
+
+static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags)
+{
+ save_fpu_state(&thread->ufpu, flags);
+ __atomic_or(flags, &thread->ufpu_flags);
+}
+
+static inline void save_user_fpu_regs(void)
+{
+ struct thread_struct *thread = &current->thread;
+ int mask, flags;
+
+ mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags);
+ flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR);
+ if (flags)
+ __save_user_fpu_regs(thread, flags);
+ barrier();
+ WRITE_ONCE(thread->kfpu_flags, mask);
+}
+
+static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
+{
+ struct thread_struct *thread = &current->thread;
+ int mask, uflags;
+
+ mask = __atomic_or(flags, &thread->kfpu_flags);
+ state->hdr.mask = mask;
+ uflags = READ_ONCE(thread->ufpu_flags);
+ if ((uflags & flags) != flags)
+ __save_user_fpu_regs(thread, ~uflags & flags);
+ if (mask & flags)
+ __kernel_fpu_begin(state, flags);
+}
+
+static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
+{
+ int mask = state->hdr.mask;
+
+ if (mask & flags)
+ __kernel_fpu_end(state, flags);
+ barrier();
+ WRITE_ONCE(current->thread.kfpu_flags, mask);
+}
+
+void __kernel_fpu_invalid_size(void);
+
+static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
+{
+ unsigned int cnt = 0;
+
+ if (flags & KERNEL_VXR_V0V7)
+ cnt += 8;
+ if (flags & KERNEL_VXR_V8V15)
+ cnt += 8;
+ if (flags & KERNEL_VXR_V16V23)
+ cnt += 8;
+ if (flags & KERNEL_VXR_V24V31)
+ cnt += 8;
+ if (cnt != size)
+ __kernel_fpu_invalid_size();
+}
+
+#define kernel_fpu_begin(state, flags) \
+{ \
+ typeof(state) s = (state); \
+ int _flags = (flags); \
+ \
+ kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \
+ _kernel_fpu_begin((struct kernel_fpu *)s, _flags); \
+}
+
+#define kernel_fpu_end(state, flags) \
+{ \
+ typeof(state) s = (state); \
+ int _flags = (flags); \
+ \
+ kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \
+ _kernel_fpu_end((struct kernel_fpu *)s, _flags); \
+}
+
+static inline void save_kernel_fpu_regs(struct thread_struct *thread)
+{
+ if (!thread->kfpu_flags)
+ return;
+ save_fpu_state(&thread->kfpu, thread->kfpu_flags);
+}
+
+static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
+{
+ if (!thread->kfpu_flags)
+ return;
+ load_fpu_state(&thread->kfpu, thread->kfpu_flags);
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+ int i;
+
+ for (i = 0; i < __NUM_FPRS; i++)
+ fprs[i].ui = vxrs[i].high;
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+ int i;
+
+ for (i = 0; i < __NUM_FPRS; i++)
+ vxrs[i].high = fprs[i].ui;
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+ fpregs->pad = 0;
+ fpregs->fpc = fpu->fpc;
+ convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+ fpu->fpc = fpregs->fpc;
+ convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+}
+
+#endif /* _ASM_S390_FPU_H */
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
deleted file mode 100644
index d6ca8bc6ca68..000000000000
--- a/arch/s390/include/asm/fpu/api.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * In-kernel FPU support functions
- *
- *
- * Consider these guidelines before using in-kernel FPU functions:
- *
- * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
- * use of floating-point or vector registers and instructions.
- *
- * 2. For kernel_fpu_begin(), specify the vector register range you want to
- * use with the KERNEL_VXR_* constants. Consider these usage guidelines:
- *
- * a) If your function typically runs in process-context, use the lower
- * half of the vector registers, for example, specify KERNEL_VXR_LOW.
- * b) If your function typically runs in soft-irq or hard-irq context,
- * prefer using the upper half of the vector registers, for example,
- * specify KERNEL_VXR_HIGH.
- *
- * If you adhere to these guidelines, an interrupted process context
- * does not require to save and restore vector registers because of
- * disjoint register ranges.
- *
- * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
- * includes logic to save and restore up to 16 vector registers at once.
- *
- * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
- * struct kernel_fpu states. Vector registers that are in use by outer
- * levels are saved and restored. You can minimize the save and restore
- * effort by choosing disjoint vector register ranges.
- *
- * 5. To use vector floating-point instructions, specify the KERNEL_FPC
- * flag to save and restore floating-point controls in addition to any
- * vector register range.
- *
- * 6. To use floating-point registers and instructions only, specify the
- * KERNEL_FPR flag. This flag triggers a save and restore of vector
- * registers V0 to V15 and floating-point controls.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#ifndef _ASM_S390_FPU_API_H
-#define _ASM_S390_FPU_API_H
-
-#include <linux/preempt.h>
-#include <asm/asm-extable.h>
-#include <asm/fpu/internal.h>
-
-void save_fpu_regs(void);
-void load_fpu_regs(void);
-void __load_fpu_regs(void);
-
-/**
- * sfpc_safe - Set floating point control register safely.
- * @fpc: new value for floating point control register
- *
- * Set floating point control register. This may lead to an exception,
- * since a saved value may have been modified by user space (ptrace,
- * signal return, kvm registers) to an invalid value. In such a case
- * set the floating point control register to zero.
- */
-static inline void sfpc_safe(u32 fpc)
-{
- asm volatile("\n"
- "0: sfpc %[fpc]\n"
- "1: nopr %%r7\n"
- ".pushsection .fixup, \"ax\"\n"
- "2: lghi %[fpc],0\n"
- " jg 0b\n"
- ".popsection\n"
- EX_TABLE(1b, 2b)
- : [fpc] "+d" (fpc)
- : : "memory");
-}
-
-#define KERNEL_FPC 1
-#define KERNEL_VXR_V0V7 2
-#define KERNEL_VXR_V8V15 4
-#define KERNEL_VXR_V16V23 8
-#define KERNEL_VXR_V24V31 16
-
-#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
-#define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
-#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
-
-#define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
-#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_LOW)
-
-struct kernel_fpu;
-
-/*
- * Note the functions below must be called with preemption disabled.
- * Do not enable preemption before calling __kernel_fpu_end() to prevent
- * an corruption of an existing kernel FPU state.
- *
- * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
- */
-void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
-void __kernel_fpu_end(struct kernel_fpu *state, u32 flags);
-
-
-static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
-{
- preempt_disable();
- state->mask = S390_lowcore.fpu_flags;
- if (!test_cpu_flag(CIF_FPU))
- /* Save user space FPU state and register contents */
- save_fpu_regs();
- else if (state->mask & flags)
- /* Save FPU/vector register in-use by the kernel */
- __kernel_fpu_begin(state, flags);
- S390_lowcore.fpu_flags |= flags;
-}
-
-static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags)
-{
- S390_lowcore.fpu_flags = state->mask;
- if (state->mask & flags)
- /* Restore FPU/vector register in-use by the kernel */
- __kernel_fpu_end(state, flags);
- preempt_enable();
-}
-
-#endif /* _ASM_S390_FPU_API_H */
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
deleted file mode 100644
index d511c4cf5afb..000000000000
--- a/arch/s390/include/asm/fpu/internal.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * FPU state and register content conversion primitives
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#ifndef _ASM_S390_FPU_INTERNAL_H
-#define _ASM_S390_FPU_INTERNAL_H
-
-#include <linux/string.h>
-#include <asm/facility.h>
-#include <asm/fpu/types.h>
-
-static inline bool cpu_has_vx(void)
-{
- return likely(test_facility(129));
-}
-
-static inline void save_vx_regs(__vector128 *vxrs)
-{
- asm volatile(
- " la 1,%0\n"
- " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
- " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
- : "=Q" (*(struct vx_array *) vxrs) : : "1");
-}
-
-static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
-{
- int i;
-
- for (i = 0; i < __NUM_FPRS; i++)
- fprs[i].ui = vxrs[i].high;
-}
-
-static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
-{
- int i;
-
- for (i = 0; i < __NUM_FPRS; i++)
- vxrs[i].high = fprs[i].ui;
-}
-
-static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
-{
- fpregs->pad = 0;
- fpregs->fpc = fpu->fpc;
- if (cpu_has_vx())
- convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
- else
- memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
- sizeof(fpregs->fprs));
-}
-
-static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
-{
- fpu->fpc = fpregs->fpc;
- if (cpu_has_vx())
- convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
- else
- memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
- sizeof(fpregs->fprs));
-}
-
-#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
deleted file mode 100644
index d889e9436865..000000000000
--- a/arch/s390/include/asm/fpu/types.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * FPU data structures
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#ifndef _ASM_S390_FPU_TYPES_H
-#define _ASM_S390_FPU_TYPES_H
-
-#include <asm/sigcontext.h>
-
-struct fpu {
- __u32 fpc; /* Floating-point control */
- void *regs; /* Pointer to the current save area */
- union {
- /* Floating-point register save area */
- freg_t fprs[__NUM_FPRS];
- /* Vector register save area */
- __vector128 vxrs[__NUM_VXRS];
- };
-};
-
-/* VX array structure for address operand constraints in inline assemblies */
-struct vx_array { __vector128 _[__NUM_VXRS]; };
-
-/* In-kernel FPU state structure */
-struct kernel_fpu {
- u32 mask;
- u32 fpc;
- union {
- freg_t fprs[__NUM_FPRS];
- __vector128 vxrs[__NUM_VXRS];
- };
-};
-
-#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 895f774bbcc5..bf78cf381dfc 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -25,7 +25,7 @@
*/
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 0,%l[label]\n"
+ asm goto("0: brcl 0,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
@@ -39,7 +39,7 @@ label:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 15,%l[label]\n"
+ asm goto("0: brcl 15,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 52664105a473..95990461888f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -23,7 +23,7 @@
#include <linux/mmu_notifier.h>
#include <asm/debug.h>
#include <asm/cpu.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
#include <asm/isc.h>
#include <asm/guarded_storage.h>
@@ -743,7 +743,6 @@ struct kvm_vcpu_arch {
struct kvm_s390_sie_block *vsie_block;
unsigned int host_acrs[NUM_ACRS];
struct gs_cb *host_gscb;
- struct fpu host_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
@@ -765,6 +764,8 @@ struct kvm_vcpu_arch {
__u64 cputm_start;
bool gs_enabled;
bool skey_enabled;
+ /* Indicator if the access registers have been loaded from guest */
+ bool acrs_loaded;
struct kvm_s390_pv_vcpu pv;
union diag318_info diag318_info;
};
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 5dc1b6345006..8c5f16857539 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -157,7 +157,7 @@ struct lowcore {
__s32 preempt_count; /* 0x03a8 */
__u32 spinlock_lockval; /* 0x03ac */
__u32 spinlock_index; /* 0x03b0 */
- __u32 fpu_flags; /* 0x03b4 */
+ __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */
__u64 percpu_offset; /* 0x03b8 */
__u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */
__u64 machine_flags; /* 0x03c8 */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 73b9c3bf377f..ded9548d11d9 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -11,7 +11,7 @@
#include <linux/const.h>
#include <asm/types.h>
-#define _PAGE_SHIFT 12
+#define _PAGE_SHIFT CONFIG_PAGE_SHIFT
#define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT)
#define _PAGE_MASK (~(_PAGE_SIZE - 1))
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
index 7d1888e3dee6..3f609565734b 100644
--- a/arch/s390/include/asm/pai.h
+++ b/arch/s390/include/asm/pai.h
@@ -16,7 +16,7 @@ struct qpaci_info_block {
u64 header;
struct {
u64 : 8;
- u64 num_cc : 8; /* # of supported crypto counters */
+ u64 num_cc : 8; /* # of supported crypto counters */
u64 : 9;
u64 num_nnpa : 7; /* # of supported NNPA counters */
u64 : 32;
@@ -81,4 +81,5 @@ enum paievt_mode {
PAI_MODE_COUNTING,
};
+#define PAI_SAVE_AREA(x) ((x)->hw.event_base)
#endif
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index e91cd6bbc330..30820a649e6e 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -122,6 +122,7 @@ struct zpci_dev {
struct rcu_head rcu;
struct hotplug_slot hotplug_slot;
+ struct mutex state_lock; /* protect state changes */
enum zpci_state state;
u32 fid; /* function ID, used by sclp */
u32 fh; /* function handle, used by insn's */
@@ -142,7 +143,6 @@ struct zpci_dev {
u8 reserved : 2;
unsigned int devfn; /* DEVFN part of the RID*/
- struct mutex lock;
u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
u32 uid; /* user defined id */
u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
@@ -170,6 +170,7 @@ struct zpci_dev {
u64 dma_mask; /* DMA address space mask */
/* Function measurement block */
+ struct mutex fmb_lock;
struct zpci_fmb *fmb;
u16 fmb_update; /* update interval */
u16 fmb_length;
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
index 9e41a74fce9a..e747b067f8db 100644
--- a/arch/s390/include/asm/physmem_info.h
+++ b/arch/s390/include/asm/physmem_info.h
@@ -22,6 +22,7 @@ enum reserved_range_type {
RR_DECOMPRESSOR,
RR_INITRD,
RR_VMLINUX,
+ RR_RELOC,
RR_AMODE31,
RR_IPLREPORT,
RR_CERT_COMP_LIST,
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index c0b6e74d899a..7cf00cf8fb0b 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -15,13 +15,11 @@
#include <linux/bits.h>
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
-#define CIF_FPU 3 /* restore FPU registers */
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)
-#define _CIF_FPU BIT(CIF_FPU)
#define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT)
#define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST)
#define _CIF_DEDICATED_CPU BIT(CIF_DEDICATED_CPU)
@@ -33,13 +31,12 @@
#include <linux/cpumask.h>
#include <linux/linkage.h>
#include <linux/irqflags.h>
+#include <asm/fpu-types.h>
#include <asm/cpu.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/setup.h>
#include <asm/runtime_instr.h>
-#include <asm/fpu/types.h>
-#include <asm/fpu/internal.h>
#include <asm/irqflags.h>
typedef long (*sys_call_ptr_t)(struct pt_regs *regs);
@@ -169,6 +166,8 @@ struct thread_struct {
unsigned int gmap_write_flag; /* gmap fault write indication */
unsigned int gmap_int_code; /* int code of last gmap fault */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
+ int ufpu_flags; /* user fpu flags */
+ int kfpu_flags; /* kernel fpu flags */
/* Per-thread information related to debugging */
struct per_regs per_user; /* User specified PER registers */
@@ -184,7 +183,8 @@ struct thread_struct {
struct gs_cb *gs_cb; /* Current guarded storage cb */
struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
struct pgm_tdb trap_tdb; /* Transaction abort diagnose block */
- struct fpu fpu; /* FP and VX register save area */
+ struct fpu ufpu; /* User FP and VX register save area */
+ struct fpu kfpu; /* Kernel FP and VX register save area */
};
/* Flag to disable transactions. */
@@ -203,7 +203,6 @@ typedef struct thread_struct thread_struct;
#define INIT_THREAD { \
.ksp = sizeof(init_stack) + (unsigned long) &init_stack, \
- .fpu.regs = (void *) init_task.thread.fpu.fprs, \
.last_break = 1, \
}
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index d28bf8fb2799..788bc4467445 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -203,6 +203,10 @@ static inline int test_and_clear_pt_regs_flag(struct pt_regs *regs, int flag)
return ret;
}
+struct task_struct;
+
+void update_cr_regs(struct task_struct *task);
+
/*
* These are defined as per linux/ptrace.h, which see.
*/
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 31ec4f545e03..433fde85b14e 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -4,7 +4,6 @@
#include <linux/uaccess.h>
#include <linux/ptrace.h>
-#include <asm/switch_to.h>
struct stack_frame_user {
unsigned long back_chain;
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
deleted file mode 100644
index c61b2cc1a8a8..000000000000
--- a/arch/s390/include/asm/switch_to.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 1999, 2009
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#ifndef __ASM_SWITCH_TO_H
-#define __ASM_SWITCH_TO_H
-
-#include <linux/thread_info.h>
-#include <asm/fpu/api.h>
-#include <asm/ptrace.h>
-#include <asm/guarded_storage.h>
-
-extern struct task_struct *__switch_to(void *, void *);
-extern void update_cr_regs(struct task_struct *task);
-
-static inline void save_access_regs(unsigned int *acrs)
-{
- typedef struct { int _[NUM_ACRS]; } acrstype;
-
- asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
-}
-
-static inline void restore_access_regs(unsigned int *acrs)
-{
- typedef struct { int _[NUM_ACRS]; } acrstype;
-
- asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
-}
-
-#define switch_to(prev, next, last) do { \
- /* save_fpu_regs() sets the CIF_FPU flag, which enforces \
- * a restore of the floating point / vector registers as \
- * soon as the next task returns to user space \
- */ \
- save_fpu_regs(); \
- save_access_regs(&prev->thread.acrs[0]); \
- save_ri_cb(prev->thread.ri_cb); \
- save_gs_cb(prev->thread.gs_cb); \
- update_cr_regs(next); \
- restore_access_regs(&next->thread.acrs[0]); \
- restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
- restore_gs_cb(next->thread.gs_cb); \
- prev = __switch_to(prev, next); \
-} while (0)
-
-#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h
index 73ee89142666..0e2b40ef69b0 100644
--- a/arch/s390/include/asm/vdso/data.h
+++ b/arch/s390/include/asm/vdso/data.h
@@ -3,7 +3,6 @@
#define __S390_ASM_VDSO_DATA_H
#include <linux/types.h>
-#include <vdso/datapage.h>
struct arch_vdso_data {
__s64 tod_steering_delta;
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
deleted file mode 100644
index 8c188f1c6d27..000000000000
--- a/arch/s390/include/asm/vx-insn.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Support for Vector Instructions
- *
- * This wrapper header file allows to use the vector instruction macros in
- * both assembler files as well as in inline assemblies in C files.
- */
-
-#ifndef __ASM_S390_VX_INSN_H
-#define __ASM_S390_VX_INSN_H
-
-#include <asm/vx-insn-asm.h>
-
-#ifndef __ASSEMBLY__
-
-asm(".include \"asm/vx-insn-asm.h\"\n");
-
-#endif /* __ASSEMBLY__ */
-#endif /* __ASM_S390_VX_INSN_H */
diff --git a/arch/s390/include/asm/word-at-a-time.h b/arch/s390/include/asm/word-at-a-time.h
index 2579f1694b82..203acd6e431b 100644
--- a/arch/s390/include/asm/word-at-a-time.h
+++ b/arch/s390/include/asm/word-at-a-time.h
@@ -2,7 +2,8 @@
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
#include <asm/asm-extable.h>
#include <asm/bitsperlong.h>
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 56254fa06f99..4f2669030220 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -166,5 +166,6 @@ int populate_cache_leaves(unsigned int cpu)
ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
}
}
+ this_cpu_ci->cpu_map_populated = true;
return 0;
}
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index f8fc6c25d051..1942e2a9f8db 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -24,12 +24,12 @@
#include <linux/tty.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
+#include <asm/access-regs.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
#include <asm/lowcore.h>
-#include <asm/switch_to.h>
#include <asm/vdso.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
#include "compat_linux.h"
#include "compat_ptrace.h"
#include "entry.h"
@@ -56,7 +56,7 @@ typedef struct
static void store_sigregs(void)
{
save_access_regs(current->thread.acrs);
- save_fpu_regs();
+ save_user_fpu_regs();
}
/* Load registers after signal return */
@@ -79,7 +79,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
sizeof(user_sregs.regs.acrs));
- fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+ fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.ufpu);
if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
return -EFAULT;
return 0;
@@ -113,7 +113,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
sizeof(current->thread.acrs));
- fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+ fpregs_load((_s390_fp_regs *)&user_sregs.fpregs, &current->thread.ufpu);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
@@ -136,11 +136,11 @@ static int save_sigregs_ext32(struct pt_regs *regs,
/* Save vector registers to signal stack */
if (cpu_has_vx()) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = current->thread.fpu.vxrs[i].low;
+ vxrs[i] = current->thread.ufpu.vxrs[i].low;
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
- current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
}
@@ -165,12 +165,12 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
if (cpu_has_vx()) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) ||
- __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
&sregs_ext->vxrs_high,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- current->thread.fpu.vxrs[i].low = vxrs[i];
+ current->thread.ufpu.vxrs[i].low = vxrs[i];
}
return 0;
}
@@ -184,7 +184,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask))
goto badframe;
set_current_blocked(&set);
- save_fpu_regs();
+ save_user_fpu_regs();
if (restore_sigregs32(regs, &frame->sregs))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->sregs_ext))
@@ -207,7 +207,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
- save_fpu_regs();
+ save_user_fpu_regs();
if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 5c46c2659305..d09ebb6f5262 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -22,7 +22,7 @@
#include <asm/ipl.h>
#include <asm/sclp.h>
#include <asm/maccess.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 92fdc35f028c..8dee9aa0ec95 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -147,11 +147,40 @@ void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr)
EXPORT_SYMBOL(diag_stat_inc_norecursion);
/*
+ * Diagnose 0c: Pseudo Timer
+ */
+void diag0c(struct hypfs_diag0c_entry *data)
+{
+ diag_stat_inc(DIAG_STAT_X00C);
+ diag_amode31_ops.diag0c(virt_to_phys(data));
+}
+
+/*
* Diagnose 14: Input spool file manipulation
+ *
+ * The subcode parameter determines the type of the first parameter rx.
+ * Currently used are the following 3 subcommands:
+ * 0x0: Read the Next Spool File Buffer (Data Record)
+ * 0x28: Position a Spool File to the Designated Record
+ * 0xfff: Retrieve Next File Descriptor
+ *
+ * For subcommands 0x0 and 0xfff, the value of the first parameter is
+ * a virtual address of a memory buffer and needs virtual to physical
+ * address translation. For other subcommands the rx parameter is not
+ * a virtual address.
*/
int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
{
diag_stat_inc(DIAG_STAT_X014);
+ switch (subcode) {
+ case 0x0:
+ case 0xfff:
+ rx = virt_to_phys((void *)rx);
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
return diag_amode31_ops.diag14(rx, ry1, subcode);
}
EXPORT_SYMBOL(diag14);
@@ -265,6 +294,6 @@ EXPORT_SYMBOL(diag224);
int diag26c(void *req, void *resp, enum diag26c_sc subcode)
{
diag_stat_inc(DIAG_STAT_X26C);
- return diag_amode31_ops.diag26c(req, resp, subcode);
+ return diag_amode31_ops.diag26c(virt_to_phys(req), virt_to_phys(resp), subcode);
}
EXPORT_SYMBOL(diag26c);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 2345ea332b97..c666271433fb 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -19,8 +19,10 @@
#include <linux/kernel.h>
#include <asm/asm-extable.h>
#include <linux/memblock.h>
+#include <asm/access-regs.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
+#include <asm/fpu.h>
#include <asm/ipl.h>
#include <asm/lowcore.h>
#include <asm/processor.h>
@@ -31,7 +33,6 @@
#include <asm/sclp.h>
#include <asm/facility.h>
#include <asm/boot_data.h>
-#include <asm/switch_to.h>
#include "entry.h"
#define decompressor_handled_param(param) \
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 49a11f6dd7ae..fc5277eab554 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -24,7 +24,7 @@
#include <asm/page.h>
#include <asm/sigp.h>
#include <asm/irq.h>
-#include <asm/vx-insn.h>
+#include <asm/fpu-insn.h>
#include <asm/setup.h>
#include <asm/nmi.h>
#include <asm/nospec-insn.h>
@@ -171,13 +171,13 @@ _LPP_OFFSET = __LC_LPP
nop 0
/*
- * Scheduler resume function, called by switch_to
- * gpr2 = (task_struct *) prev
- * gpr3 = (task_struct *) next
+ * Scheduler resume function, called by __switch_to
+ * gpr2 = (task_struct *)prev
+ * gpr3 = (task_struct *)next
* Returns:
* gpr2 = prev
*/
-SYM_FUNC_START(__switch_to)
+SYM_FUNC_START(__switch_to_asm)
stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
lghi %r4,__TASK_stack
lghi %r1,__TASK_thread
@@ -193,7 +193,7 @@ SYM_FUNC_START(__switch_to)
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
BR_EX %r14
-SYM_FUNC_END(__switch_to)
+SYM_FUNC_END(__switch_to_asm)
#if IS_ENABLED(CONFIG_KVM)
/*
@@ -220,8 +220,6 @@ SYM_FUNC_START(__sie64a)
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
tm __SIE_PROG20+3(%r14),3 # last exit...
jnz .Lsie_skip
- TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- jo .Lsie_skip # exit if fp/vx regs changed
lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr
BPEXIT __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
.Lsie_entry:
@@ -489,16 +487,11 @@ SYM_FUNC_END(psw_idle)
*/
SYM_CODE_START(mcck_int_handler)
BPOFF
- la %r1,4095 # validate r1
- spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer
- LBEAR __LC_LAST_BREAK_SAVE_AREA-4095(%r1) # validate bear
- lmg %r0,%r15,__LC_GPREGS_SAVE_AREA # validate gprs
lmg %r8,%r9,__LC_MCK_OLD_PSW
TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
jo .Lmcck_panic # yes -> rest of mcck code invalid
TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID
jno .Lmcck_panic # control registers invalid -> panic
- lctlg %c0,%c15,__LC_CREGS_SAVE_AREA # validate ctl regs
ptlb
lghi %r14,__LC_CPU_TIMER_SAVE_AREA
mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 9f41853f36b9..21969520f947 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -19,6 +19,7 @@ void mcck_int_handler(void);
void restart_int_handler(void);
void early_pgm_check_handler(void);
+struct task_struct *__switch_to_asm(struct task_struct *prev, struct task_struct *next);
void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs);
void __do_pgm_check(struct pt_regs *regs);
void __do_syscall(struct pt_regs *regs, int per_trap);
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index a4f3449cc814..fa90bbdc5ef9 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -8,256 +8,186 @@
#include <linux/kernel.h>
#include <linux/cpu.h>
#include <linux/sched.h>
-#include <asm/fpu/types.h>
-#include <asm/fpu/api.h>
-#include <asm/vx-insn.h>
+#include <asm/fpu.h>
-void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
{
+ __vector128 *vxrs = state->vxrs;
+ int mask;
+
/*
* Limit the save to the FPU/vector registers already
- * in use by the previous context
+ * in use by the previous context.
*/
- flags &= state->mask;
-
+ flags &= state->hdr.mask;
if (flags & KERNEL_FPC)
- /* Save floating point control */
- asm volatile("stfpc %0" : "=Q" (state->fpc));
-
+ fpu_stfpc(&state->hdr.fpc);
if (!cpu_has_vx()) {
- if (flags & KERNEL_VXR_V0V7) {
- /* Save floating-point registers */
- asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
- asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
- asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
- asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
- asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
- asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
- asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
- asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
- asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
- asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
- asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
- asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
- asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
- asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
- asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
- asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
- }
+ if (flags & KERNEL_VXR_LOW)
+ save_fp_regs_vx(vxrs);
return;
}
-
- /* Test and save vector registers */
- asm volatile (
- /*
- * Test if any vector register must be saved and, if so,
- * test if all register can be saved.
- */
- " la 1,%[vxrs]\n" /* load save area */
- " tmll %[m],30\n" /* KERNEL_VXR */
- " jz 7f\n" /* no work -> done */
- " jo 5f\n" /* -> save V0..V31 */
- /*
- * Test for special case KERNEL_FPU_MID only. In this
- * case a vstm V8..V23 is the best instruction
- */
- " chi %[m],12\n" /* KERNEL_VXR_MID */
- " jne 0f\n" /* -> save V8..V23 */
- " VSTM 8,23,128,1\n" /* vstm %v8,%v23,128(%r1) */
- " j 7f\n"
- /* Test and save the first half of 16 vector registers */
- "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */
- " jz 3f\n" /* -> KERNEL_VXR_HIGH */
- " jo 2f\n" /* 11 -> save V0..V15 */
- " brc 2,1f\n" /* 10 -> save V8..V15 */
- " VSTM 0,7,0,1\n" /* vstm %v0,%v7,0(%r1) */
- " j 3f\n"
- "1: VSTM 8,15,128,1\n" /* vstm %v8,%v15,128(%r1) */
- " j 3f\n"
- "2: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */
- /* Test and save the second half of 16 vector registers */
- "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */
- " jz 7f\n"
- " jo 6f\n" /* 11 -> save V16..V31 */
- " brc 2,4f\n" /* 10 -> save V24..V31 */
- " VSTM 16,23,256,1\n" /* vstm %v16,%v23,256(%r1) */
- " j 7f\n"
- "4: VSTM 24,31,384,1\n" /* vstm %v24,%v31,384(%r1) */
- " j 7f\n"
- "5: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */
- "6: VSTM 16,31,256,1\n" /* vstm %v16,%v31,256(%r1) */
- "7:"
- : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
- : [m] "d" (flags)
- : "1", "cc");
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ vxrs += fpu_vstm(0, 15, vxrs);
+ vxrs += fpu_vstm(16, 31, vxrs);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ vxrs += fpu_vstm(8, 23, vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ vxrs += fpu_vstm(0, 15, vxrs);
+ else if (mask == KERNEL_VXR_V0V7)
+ vxrs += fpu_vstm(0, 7, vxrs);
+ else
+ vxrs += fpu_vstm(8, 15, vxrs);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ vxrs += fpu_vstm(16, 31, vxrs);
+ else if (mask == KERNEL_VXR_V16V23)
+ vxrs += fpu_vstm(16, 23, vxrs);
+ else
+ vxrs += fpu_vstm(24, 31, vxrs);
+ }
}
EXPORT_SYMBOL(__kernel_fpu_begin);
-void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
+void __kernel_fpu_end(struct kernel_fpu *state, int flags)
{
+ __vector128 *vxrs = state->vxrs;
+ int mask;
+
/*
* Limit the restore to the FPU/vector registers of the
- * previous context that have been overwritte by the
- * current context
+ * previous context that have been overwritten by the
+ * current context.
*/
- flags &= state->mask;
-
+ flags &= state->hdr.mask;
if (flags & KERNEL_FPC)
- /* Restore floating-point controls */
- asm volatile("lfpc %0" : : "Q" (state->fpc));
-
+ fpu_lfpc(&state->hdr.fpc);
if (!cpu_has_vx()) {
- if (flags & KERNEL_VXR_V0V7) {
- /* Restore floating-point registers */
- asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
- asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
- asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
- asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
- asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
- asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
- asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
- asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
- asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
- asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
- asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
- asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
- asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
- asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
- asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
- asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
- }
+ if (flags & KERNEL_VXR_LOW)
+ load_fp_regs_vx(vxrs);
return;
}
-
- /* Test and restore (load) vector registers */
- asm volatile (
- /*
- * Test if any vector register must be loaded and, if so,
- * test if all registers can be loaded at once.
- */
- " la 1,%[vxrs]\n" /* load restore area */
- " tmll %[m],30\n" /* KERNEL_VXR */
- " jz 7f\n" /* no work -> done */
- " jo 5f\n" /* -> restore V0..V31 */
- /*
- * Test for special case KERNEL_FPU_MID only. In this
- * case a vlm V8..V23 is the best instruction
- */
- " chi %[m],12\n" /* KERNEL_VXR_MID */
- " jne 0f\n" /* -> restore V8..V23 */
- " VLM 8,23,128,1\n" /* vlm %v8,%v23,128(%r1) */
- " j 7f\n"
- /* Test and restore the first half of 16 vector registers */
- "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */
- " jz 3f\n" /* -> KERNEL_VXR_HIGH */
- " jo 2f\n" /* 11 -> restore V0..V15 */
- " brc 2,1f\n" /* 10 -> restore V8..V15 */
- " VLM 0,7,0,1\n" /* vlm %v0,%v7,0(%r1) */
- " j 3f\n"
- "1: VLM 8,15,128,1\n" /* vlm %v8,%v15,128(%r1) */
- " j 3f\n"
- "2: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */
- /* Test and restore the second half of 16 vector registers */
- "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */
- " jz 7f\n"
- " jo 6f\n" /* 11 -> restore V16..V31 */
- " brc 2,4f\n" /* 10 -> restore V24..V31 */
- " VLM 16,23,256,1\n" /* vlm %v16,%v23,256(%r1) */
- " j 7f\n"
- "4: VLM 24,31,384,1\n" /* vlm %v24,%v31,384(%r1) */
- " j 7f\n"
- "5: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */
- "6: VLM 16,31,256,1\n" /* vlm %v16,%v31,256(%r1) */
- "7:"
- : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
- : [m] "d" (flags)
- : "1", "cc");
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ vxrs += fpu_vlm(0, 15, vxrs);
+ vxrs += fpu_vlm(16, 31, vxrs);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ vxrs += fpu_vlm(8, 23, vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ vxrs += fpu_vlm(0, 15, vxrs);
+ else if (mask == KERNEL_VXR_V0V7)
+ vxrs += fpu_vlm(0, 7, vxrs);
+ else
+ vxrs += fpu_vlm(8, 15, vxrs);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ vxrs += fpu_vlm(16, 31, vxrs);
+ else if (mask == KERNEL_VXR_V16V23)
+ vxrs += fpu_vlm(16, 23, vxrs);
+ else
+ vxrs += fpu_vlm(24, 31, vxrs);
+ }
}
EXPORT_SYMBOL(__kernel_fpu_end);
-void __load_fpu_regs(void)
+void load_fpu_state(struct fpu *state, int flags)
{
- unsigned long *regs = current->thread.fpu.regs;
- struct fpu *state = &current->thread.fpu;
+ __vector128 *vxrs = &state->vxrs[0];
+ int mask;
- sfpc_safe(state->fpc);
- if (likely(cpu_has_vx())) {
- asm volatile("lgr 1,%0\n"
- "VLM 0,15,0,1\n"
- "VLM 16,31,256,1\n"
- :
- : "d" (regs)
- : "1", "cc", "memory");
- } else {
- asm volatile("ld 0,%0" : : "Q" (regs[0]));
- asm volatile("ld 1,%0" : : "Q" (regs[1]));
- asm volatile("ld 2,%0" : : "Q" (regs[2]));
- asm volatile("ld 3,%0" : : "Q" (regs[3]));
- asm volatile("ld 4,%0" : : "Q" (regs[4]));
- asm volatile("ld 5,%0" : : "Q" (regs[5]));
- asm volatile("ld 6,%0" : : "Q" (regs[6]));
- asm volatile("ld 7,%0" : : "Q" (regs[7]));
- asm volatile("ld 8,%0" : : "Q" (regs[8]));
- asm volatile("ld 9,%0" : : "Q" (regs[9]));
- asm volatile("ld 10,%0" : : "Q" (regs[10]));
- asm volatile("ld 11,%0" : : "Q" (regs[11]));
- asm volatile("ld 12,%0" : : "Q" (regs[12]));
- asm volatile("ld 13,%0" : : "Q" (regs[13]));
- asm volatile("ld 14,%0" : : "Q" (regs[14]));
- asm volatile("ld 15,%0" : : "Q" (regs[15]));
+ if (flags & KERNEL_FPC)
+ fpu_lfpc(&state->fpc);
+ if (!cpu_has_vx()) {
+ if (flags & KERNEL_VXR_V0V7)
+ load_fp_regs_vx(state->vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ fpu_vlm(0, 15, &vxrs[0]);
+ fpu_vlm(16, 31, &vxrs[16]);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ fpu_vlm(8, 23, &vxrs[8]);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ fpu_vlm(0, 15, &vxrs[0]);
+ else if (mask == KERNEL_VXR_V0V7)
+ fpu_vlm(0, 7, &vxrs[0]);
+ else
+ fpu_vlm(8, 15, &vxrs[8]);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ fpu_vlm(16, 31, &vxrs[16]);
+ else if (mask == KERNEL_VXR_V16V23)
+ fpu_vlm(16, 23, &vxrs[16]);
+ else
+ fpu_vlm(24, 31, &vxrs[24]);
}
- clear_cpu_flag(CIF_FPU);
-}
-
-void load_fpu_regs(void)
-{
- raw_local_irq_disable();
- __load_fpu_regs();
- raw_local_irq_enable();
}
-EXPORT_SYMBOL(load_fpu_regs);
-void save_fpu_regs(void)
+void save_fpu_state(struct fpu *state, int flags)
{
- unsigned long flags, *regs;
- struct fpu *state;
-
- local_irq_save(flags);
+ __vector128 *vxrs = &state->vxrs[0];
+ int mask;
- if (test_cpu_flag(CIF_FPU))
- goto out;
-
- state = &current->thread.fpu;
- regs = current->thread.fpu.regs;
-
- asm volatile("stfpc %0" : "=Q" (state->fpc));
- if (likely(cpu_has_vx())) {
- asm volatile("lgr 1,%0\n"
- "VSTM 0,15,0,1\n"
- "VSTM 16,31,256,1\n"
- :
- : "d" (regs)
- : "1", "cc", "memory");
- } else {
- asm volatile("std 0,%0" : "=Q" (regs[0]));
- asm volatile("std 1,%0" : "=Q" (regs[1]));
- asm volatile("std 2,%0" : "=Q" (regs[2]));
- asm volatile("std 3,%0" : "=Q" (regs[3]));
- asm volatile("std 4,%0" : "=Q" (regs[4]));
- asm volatile("std 5,%0" : "=Q" (regs[5]));
- asm volatile("std 6,%0" : "=Q" (regs[6]));
- asm volatile("std 7,%0" : "=Q" (regs[7]));
- asm volatile("std 8,%0" : "=Q" (regs[8]));
- asm volatile("std 9,%0" : "=Q" (regs[9]));
- asm volatile("std 10,%0" : "=Q" (regs[10]));
- asm volatile("std 11,%0" : "=Q" (regs[11]));
- asm volatile("std 12,%0" : "=Q" (regs[12]));
- asm volatile("std 13,%0" : "=Q" (regs[13]));
- asm volatile("std 14,%0" : "=Q" (regs[14]));
- asm volatile("std 15,%0" : "=Q" (regs[15]));
+ if (flags & KERNEL_FPC)
+ fpu_stfpc(&state->fpc);
+ if (!cpu_has_vx()) {
+ if (flags & KERNEL_VXR_LOW)
+ save_fp_regs_vx(state->vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ fpu_vstm(0, 15, &vxrs[0]);
+ fpu_vstm(16, 31, &vxrs[16]);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ fpu_vstm(8, 23, &vxrs[8]);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ fpu_vstm(0, 15, &vxrs[0]);
+ else if (mask == KERNEL_VXR_V0V7)
+ fpu_vstm(0, 7, &vxrs[0]);
+ else
+ fpu_vstm(8, 15, &vxrs[8]);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ fpu_vstm(16, 31, &vxrs[16]);
+ else if (mask == KERNEL_VXR_V16V23)
+ fpu_vstm(16, 23, &vxrs[16]);
+ else
+ fpu_vstm(24, 31, &vxrs[24]);
}
- set_cpu_flag(CIF_FPU);
-out:
- local_irq_restore(flags);
}
-EXPORT_SYMBOL(save_fpu_regs);
+EXPORT_SYMBOL(save_fpu_state);
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index ba75f6bee774..1486350a4177 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1941,8 +1941,7 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
reipl_type == IPL_TYPE_UNKNOWN)
os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
- csum = (__force unsigned int)
- csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+ csum = (__force unsigned int)cksm(reipl_block_actual, reipl_block_actual->hdr.len, 0);
abs_lc = get_abs_lowcore();
abs_lc->ipib = __pa(reipl_block_actual);
abs_lc->ipib_checksum = csum;
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index aa22ffc16bcd..c5d0c1cf984b 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -13,8 +13,10 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/debug_locks.h>
+#include <asm/guarded_storage.h>
#include <asm/pfault.h>
#include <asm/cio.h>
+#include <asm/fpu.h>
#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/ipl.h>
@@ -26,7 +28,6 @@
#include <asm/os_info.h>
#include <asm/set_memory.h>
#include <asm/stacktrace.h>
-#include <asm/switch_to.h>
#include <asm/nmi.h>
#include <asm/sclp.h>
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 9ad44c26d1a2..c77382a67325 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -23,16 +23,14 @@
#include <linux/export.h>
#include <asm/lowcore.h>
#include <asm/ctlreg.h>
+#include <asm/fpu.h>
#include <asm/smp.h>
#include <asm/stp.h>
#include <asm/cputime.h>
#include <asm/nmi.h>
#include <asm/crw.h>
-#include <asm/switch_to.h>
#include <asm/asm-offsets.h>
#include <asm/pai.h>
-#include <asm/vx-insn.h>
-#include <asm/fpu/api.h>
struct mcck_struct {
unsigned int kill_task : 1;
@@ -204,133 +202,63 @@ void s390_handle_mcck(void)
}
}
-/*
- * returns 0 if register contents could be validated
- * returns 1 otherwise
+/**
+ * nmi_registers_valid - verify if registers are valid
+ * @mci: machine check interruption code
+ *
+ * Inspect a machine check interruption code and verify if all required
+ * registers are valid. For some registers the corresponding validity bit is
+ * ignored and the registers are set to the expected value.
+ * Returns true if all registers are valid, otherwise false.
*/
-static int notrace s390_validate_registers(union mci mci)
+static bool notrace nmi_registers_valid(union mci mci)
{
- struct mcesa *mcesa;
- void *fpt_save_area;
union ctlreg2 cr2;
- int kill_task;
- u64 zero;
-
- kill_task = 0;
- zero = 0;
-
- if (!mci.gr || !mci.fp)
- kill_task = 1;
- fpt_save_area = &S390_lowcore.floating_pt_save_area;
- if (!mci.fc) {
- kill_task = 1;
- asm volatile(
- " lfpc %0\n"
- :
- : "Q" (zero));
- } else {
- asm volatile(
- " lfpc %0\n"
- :
- : "Q" (S390_lowcore.fpt_creg_save_area));
- }
- mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
- if (!cpu_has_vx()) {
- /* Validate floating point registers */
- asm volatile(
- " ld 0,0(%0)\n"
- " ld 1,8(%0)\n"
- " ld 2,16(%0)\n"
- " ld 3,24(%0)\n"
- " ld 4,32(%0)\n"
- " ld 5,40(%0)\n"
- " ld 6,48(%0)\n"
- " ld 7,56(%0)\n"
- " ld 8,64(%0)\n"
- " ld 9,72(%0)\n"
- " ld 10,80(%0)\n"
- " ld 11,88(%0)\n"
- " ld 12,96(%0)\n"
- " ld 13,104(%0)\n"
- " ld 14,112(%0)\n"
- " ld 15,120(%0)\n"
- :
- : "a" (fpt_save_area)
- : "memory");
- } else {
- /* Validate vector registers */
- union ctlreg0 cr0;
-
- /*
- * The vector validity must only be checked if not running a
- * KVM guest. For KVM guests the machine check is forwarded by
- * KVM and it is the responsibility of the guest to take
- * appropriate actions. The host vector or FPU values have been
- * saved by KVM and will be restored by KVM.
- */
- if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST))
- kill_task = 1;
- cr0.reg = S390_lowcore.cregs_save_area[0];
- cr0.afp = cr0.vx = 1;
- local_ctl_load(0, &cr0.reg);
- asm volatile(
- " la 1,%0\n"
- " VLM 0,15,0,1\n"
- " VLM 16,31,256,1\n"
- :
- : "Q" (*(struct vx_array *)mcesa->vector_save_area)
- : "1");
- local_ctl_load(0, &S390_lowcore.cregs_save_area[0]);
- }
- /* Validate access registers */
- asm volatile(
- " lam 0,15,0(%0)\n"
- :
- : "a" (&S390_lowcore.access_regs_save_area)
- : "memory");
- if (!mci.ar)
- kill_task = 1;
- /* Validate guarded storage registers */
- cr2.reg = S390_lowcore.cregs_save_area[2];
- if (cr2.gse) {
- if (!mci.gs) {
- /*
- * 2 cases:
- * - machine check in kernel or userspace
- * - machine check while running SIE (KVM guest)
- * For kernel or userspace the userspace values of
- * guarded storage control can not be recreated, the
- * process must be terminated.
- * For SIE the guest values of guarded storage can not
- * be recreated. This is either due to a bug or due to
- * GS being disabled in the guest. The guest will be
- * notified by KVM code and the guests machine check
- * handling must take care of this. The host values
- * are saved by KVM and are not affected.
- */
- if (!test_cpu_flag(CIF_MCCK_GUEST))
- kill_task = 1;
- } else {
- load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
- }
- }
/*
- * The getcpu vdso syscall reads CPU number from the programmable
+ * The getcpu vdso syscall reads the CPU number from the programmable
* field of the TOD clock. Disregard the TOD programmable register
- * validity bit and load the CPU number into the TOD programmable
- * field unconditionally.
+ * validity bit and load the CPU number into the TOD programmable field
+ * unconditionally.
*/
set_tod_programmable_field(raw_smp_processor_id());
- /* Validate clock comparator register */
+ /*
+ * Set the clock comparator register to the next expected value.
+ */
set_clock_comparator(S390_lowcore.clock_comparator);
-
+ if (!mci.gr || !mci.fp || !mci.fc)
+ return false;
+ /*
+ * The vector validity must only be checked if not running a
+ * KVM guest. For KVM guests the machine check is forwarded by
+ * KVM and it is the responsibility of the guest to take
+ * appropriate actions. The host vector or FPU values have been
+ * saved by KVM and will be restored by KVM.
+ */
+ if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST))
+ return false;
+ if (!mci.ar)
+ return false;
+ /*
+ * Two cases for guarded storage registers:
+ * - machine check in kernel or userspace
+ * - machine check while running SIE (KVM guest)
+ * For kernel or userspace the userspace values of guarded storage
+ * control can not be recreated, the process must be terminated.
+ * For SIE the guest values of guarded storage can not be recreated.
+ * This is either due to a bug or due to GS being disabled in the
+ * guest. The guest will be notified by KVM code and the guests machine
+ * check handling must take care of this. The host values are saved by
+ * KVM and are not affected.
+ */
+ cr2.reg = S390_lowcore.cregs_save_area[2];
+ if (cr2.gse && !mci.gs && !test_cpu_flag(CIF_MCCK_GUEST))
+ return false;
if (!mci.ms || !mci.pm || !mci.ia)
- kill_task = 1;
-
- return kill_task;
+ return false;
+ return true;
}
-NOKPROBE_SYMBOL(s390_validate_registers);
+NOKPROBE_SYMBOL(nmi_registers_valid);
/*
* Backup the guest's machine check info to its description block
@@ -428,7 +356,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
s390_handle_damage();
}
}
- if (s390_validate_registers(mci)) {
+ if (!nmi_registers_valid(mci)) {
if (!user_mode(regs))
s390_handle_damage();
/*
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 6e1824141b29..a801e6bd5341 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -29,7 +29,7 @@ static struct os_info os_info __page_aligned_data;
u32 os_info_csum(struct os_info *os_info)
{
int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
- return (__force u32)csum_partial(&os_info->version_major, size, 0);
+ return (__force u32)cksm(&os_info->version_major, size, 0);
}
/*
@@ -49,7 +49,7 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
{
os_info.entry[nr].addr = __pa(ptr);
os_info.entry[nr].size = size;
- os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0);
+ os_info.entry[nr].csum = (__force u32)cksm(ptr, size, 0);
os_info.csum = os_info_csum(&os_info);
}
@@ -98,7 +98,7 @@ static void os_info_old_alloc(int nr, int align)
msg = "copy failed";
goto fail_free;
}
- csum = (__force u32)csum_partial(buf_align, size, 0);
+ csum = (__force u32)cksm(buf_align, size, 0);
if (csum != os_info_old->entry[nr].csum) {
msg = "checksum failed";
goto fail_free;
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index bf8a672b15a4..823d652e3917 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -98,6 +98,7 @@ static void paicrypt_event_destroy(struct perf_event *event)
event->attr.config, event->cpu,
cpump->active_events, cpump->mode,
refcount_read(&cpump->refcnt));
+ free_page(PAI_SAVE_AREA(event));
if (refcount_dec_and_test(&cpump->refcnt)) {
debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
__func__, (unsigned long)cpump->page,
@@ -260,6 +261,7 @@ static int paicrypt_event_init(struct perf_event *event)
{
struct perf_event_attr *a = &event->attr;
struct paicrypt_map *cpump;
+ int rc = 0;
/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
@@ -274,10 +276,21 @@ static int paicrypt_event_init(struct perf_event *event)
/* Allow only CRYPTO_ALL for sampling. */
if (a->sample_period && a->config != PAI_CRYPTO_BASE)
return -EINVAL;
+ /* Get a page to store last counter values for sampling */
+ if (a->sample_period) {
+ PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
+ if (!PAI_SAVE_AREA(event)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ }
cpump = paicrypt_busy(event);
- if (IS_ERR(cpump))
- return PTR_ERR(cpump);
+ if (IS_ERR(cpump)) {
+ free_page(PAI_SAVE_AREA(event));
+ rc = PTR_ERR(cpump);
+ goto out;
+ }
event->destroy = paicrypt_event_destroy;
@@ -293,7 +306,8 @@ static int paicrypt_event_init(struct perf_event *event)
}
static_branch_inc(&pai_key);
- return 0;
+out:
+ return rc;
}
static void paicrypt_read(struct perf_event *event)
@@ -310,20 +324,15 @@ static void paicrypt_read(struct perf_event *event)
static void paicrypt_start(struct perf_event *event, int flags)
{
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
u64 sum;
- /* Event initialization sets last_tag to 0. When later on the events
- * are deleted and re-added, do not reset the event count value to zero.
- * Events are added, deleted and re-added when 2 or more events
- * are active at the same time.
- */
if (!event->attr.sample_period) { /* Counting */
- if (!event->hw.last_tag) {
- event->hw.last_tag = 1;
- sum = paicrypt_getall(event); /* Get current value */
- local64_set(&event->hw.prev_count, sum);
- }
+ sum = paicrypt_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
} else { /* Sampling */
+ cpump->event = event;
perf_sched_cb_inc(event->pmu);
}
}
@@ -339,7 +348,6 @@ static int paicrypt_add(struct perf_event *event, int flags)
WRITE_ONCE(S390_lowcore.ccd, ccd);
local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
}
- cpump->event = event;
if (flags & PERF_EF_START)
paicrypt_start(event, PERF_EF_RELOAD);
event->hw.state = 0;
@@ -367,23 +375,34 @@ static void paicrypt_del(struct perf_event *event, int flags)
}
}
-/* Create raw data and save it in buffer. Returns number of bytes copied.
- * Saves only positive counter entries of the form
+/* Create raw data and save it in buffer. Calculate the delta for each
+ * counter between this invocation and the last invocation.
+ * Returns number of bytes copied.
+ * Saves only entries with positive counter difference of the form
* 2 bytes: Number of counter
* 8 bytes: Value of counter
*/
static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page,
- bool exclude_user, bool exclude_kernel)
+ unsigned long *page_old, bool exclude_user,
+ bool exclude_kernel)
{
int i, outidx = 0;
for (i = 1; i <= paicrypt_cnt; i++) {
- u64 val = 0;
+ u64 val = 0, val_old = 0;
- if (!exclude_kernel)
+ if (!exclude_kernel) {
val += paicrypt_getctr(page, i, true);
- if (!exclude_user)
+ val_old += paicrypt_getctr(page_old, i, true);
+ }
+ if (!exclude_user) {
val += paicrypt_getctr(page, i, false);
+ val_old += paicrypt_getctr(page_old, i, false);
+ }
+ if (val >= val_old)
+ val -= val_old;
+ else
+ val = (~0ULL - val_old) + val + 1;
if (val) {
userdata[outidx].num = i;
userdata[outidx].value = val;
@@ -426,8 +445,8 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
overflow = perf_event_overflow(event, &data, &regs);
perf_event_update_userpage(event);
- /* Clear lowcore page after read */
- memset(cpump->page, 0, PAGE_SIZE);
+ /* Save crypto counter lowcore page after reading event data. */
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE);
return overflow;
}
@@ -443,6 +462,7 @@ static int paicrypt_have_sample(void)
if (!event) /* No event active */
return 0;
rawsize = paicrypt_copy(cpump->save, cpump->page,
+ (unsigned long *)PAI_SAVE_AREA(event),
cpump->event->attr.exclude_user,
cpump->event->attr.exclude_kernel);
if (rawsize) /* No incremented counters */
@@ -694,6 +714,12 @@ static int __init attr_event_init_one(struct attribute **attrs, int num)
{
struct perf_pmu_events_attr *pa;
+ /* Index larger than array_size, no counter name available */
+ if (num >= ARRAY_SIZE(paicrypt_ctrnames)) {
+ attrs[num] = NULL;
+ return 0;
+ }
+
pa = kzalloc(sizeof(*pa), GFP_KERNEL);
if (!pa)
return -ENOMEM;
@@ -714,14 +740,13 @@ static int __init attr_event_init(void)
struct attribute **attrs;
int ret, i;
- attrs = kmalloc_array(ARRAY_SIZE(paicrypt_ctrnames) + 1, sizeof(*attrs),
- GFP_KERNEL);
+ attrs = kmalloc_array(paicrypt_cnt + 2, sizeof(*attrs), GFP_KERNEL);
if (!attrs)
return -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) {
+ for (i = 0; i <= paicrypt_cnt; i++) {
ret = attr_event_init_one(attrs, i);
if (ret) {
- attr_event_free(attrs, i - 1);
+ attr_event_free(attrs, i);
return ret;
}
}
@@ -742,8 +767,10 @@ static int __init paicrypt_init(void)
paicrypt_cnt = ib.num_cc;
if (paicrypt_cnt == 0)
return 0;
- if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR)
- paicrypt_cnt = PAI_CRYPTO_MAXCTR - 1;
+ if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR) {
+ pr_err("Too many PMU pai_crypto counters %d\n", paicrypt_cnt);
+ return -E2BIG;
+ }
rc = attr_event_init(); /* Export known PAI crypto events */
if (rc) {
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index af7f2b538c8f..616a25606cd6 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -120,6 +120,7 @@ static void paiext_event_destroy(struct perf_event *event)
struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
struct paiext_map *cpump = mp->mapptr;
+ free_page(PAI_SAVE_AREA(event));
mutex_lock(&paiext_reserve_mutex);
cpump->event = NULL;
if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
@@ -202,7 +203,6 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
}
rc = 0;
- cpump->event = event;
undo:
if (rc) {
@@ -256,10 +256,18 @@ static int paiext_event_init(struct perf_event *event)
/* Prohibit exclude_user event selection */
if (a->exclude_user)
return -EINVAL;
+ /* Get a page to store last counter values for sampling */
+ if (a->sample_period) {
+ PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
+ if (!PAI_SAVE_AREA(event))
+ return -ENOMEM;
+ }
rc = paiext_alloc(a, event);
- if (rc)
+ if (rc) {
+ free_page(PAI_SAVE_AREA(event));
return rc;
+ }
event->destroy = paiext_event_destroy;
if (a->sample_period) {
@@ -319,15 +327,15 @@ static void paiext_read(struct perf_event *event)
static void paiext_start(struct perf_event *event, int flags)
{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
u64 sum;
if (!event->attr.sample_period) { /* Counting */
- if (!event->hw.last_tag) {
- event->hw.last_tag = 1;
- sum = paiext_getall(event); /* Get current value */
- local64_set(&event->hw.prev_count, sum);
- }
+ sum = paiext_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
} else { /* Sampling */
+ cpump->event = event;
perf_sched_cb_inc(event->pmu);
}
}
@@ -346,7 +354,6 @@ static int paiext_add(struct perf_event *event, int flags)
debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
__func__, S390_lowcore.aicd, pcb->acc);
}
- cpump->event = event;
if (flags & PERF_EF_START)
paiext_start(event, PERF_EF_RELOAD);
event->hw.state = 0;
@@ -384,13 +391,19 @@ static void paiext_del(struct perf_event *event, int flags)
* 2 bytes: Number of counter
* 8 bytes: Value of counter
*/
-static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area)
+static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area,
+ unsigned long *area_old)
{
int i, outidx = 0;
for (i = 1; i <= paiext_cnt; i++) {
u64 val = paiext_getctr(area, i);
+ u64 val_old = paiext_getctr(area_old, i);
+ if (val >= val_old)
+ val -= val_old;
+ else
+ val = (~0ULL - val_old) + val + 1;
if (val) {
userdata[outidx].num = i;
userdata[outidx].value = val;
@@ -446,8 +459,9 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
overflow = perf_event_overflow(event, &data, &regs);
perf_event_update_userpage(event);
- /* Clear lowcore area after read */
- memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ);
+ /* Save NNPA lowcore area after read in event */
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->area,
+ PAIE1_CTRBLOCK_SZ);
return overflow;
}
@@ -462,7 +476,8 @@ static int paiext_have_sample(void)
if (!event)
return 0;
- rawsize = paiext_copy(cpump->save, cpump->area);
+ rawsize = paiext_copy(cpump->save, cpump->area,
+ (unsigned long *)PAI_SAVE_AREA(event));
if (rawsize) /* Incremented counters */
rc = paiext_push_sample(rawsize, cpump, event);
return rc;
@@ -584,6 +599,12 @@ static int __init attr_event_init_one(struct attribute **attrs, int num)
{
struct perf_pmu_events_attr *pa;
+ /* Index larger than array_size, no counter name available */
+ if (num >= ARRAY_SIZE(paiext_ctrnames)) {
+ attrs[num] = NULL;
+ return 0;
+ }
+
pa = kzalloc(sizeof(*pa), GFP_KERNEL);
if (!pa)
return -ENOMEM;
@@ -604,14 +625,13 @@ static int __init attr_event_init(void)
struct attribute **attrs;
int ret, i;
- attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs),
- GFP_KERNEL);
+ attrs = kmalloc_array(paiext_cnt + 2, sizeof(*attrs), GFP_KERNEL);
if (!attrs)
return -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) {
+ for (i = 0; i <= paiext_cnt; i++) {
ret = attr_event_init_one(attrs, i);
if (ret) {
- attr_event_free(attrs, i - 1);
+ attr_event_free(attrs, i);
return ret;
}
}
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index 3d93656bd948..a6b058ee4a36 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -5,8 +5,7 @@
#include <linux/errno.h>
#include <linux/bug.h>
#include <asm/ptrace.h>
-#include <asm/fpu/api.h>
-#include <asm/fpu/types.h>
+#include <asm/fpu.h>
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
@@ -20,10 +19,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
return 0;
idx -= PERF_REG_S390_FP0;
- if (cpu_has_vx())
- fp = *(freg_t *)(current->thread.fpu.vxrs + idx);
- else
- fp = current->thread.fpu.fprs[idx];
+ fp = *(freg_t *)(current->thread.ufpu.vxrs + idx);
return fp.ui;
}
@@ -65,6 +61,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
*/
regs_user->regs = task_pt_regs(current);
if (user_mode(regs_user->regs))
- save_fpu_regs();
+ save_user_fpu_regs();
regs_user->abi = perf_reg_abi(current);
}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 4e3b366589fb..dd456b475861 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -31,15 +31,19 @@
#include <linux/init_task.h>
#include <linux/entry-common.h>
#include <linux/io.h>
+#include <asm/guarded_storage.h>
+#include <asm/access-regs.h>
+#include <asm/switch_to.h>
#include <asm/cpu_mf.h>
#include <asm/processor.h>
+#include <asm/ptrace.h>
#include <asm/vtimer.h>
#include <asm/exec.h>
+#include <asm/fpu.h>
#include <asm/irq.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/stacktrace.h>
-#include <asm/switch_to.h>
#include <asm/runtime_instr.h>
#include <asm/unwind.h>
#include "entry.h"
@@ -84,13 +88,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
/*
* Save the floating-point or vector register state of the current
- * task and set the CIF_FPU flag to lazy restore the FPU register
+ * task and set the TIF_FPU flag to lazy restore the FPU register
* state when returning to user space.
*/
- save_fpu_regs();
+ save_user_fpu_regs();
*dst = *src;
- dst->thread.fpu.regs = dst->thread.fpu.fprs;
+ dst->thread.kfpu_flags = 0;
/*
* Don't transfer over the runtime instrumentation or the guarded
@@ -186,8 +190,23 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
void execve_tail(void)
{
- current->thread.fpu.fpc = 0;
- asm volatile("sfpc %0" : : "d" (0));
+ current->thread.ufpu.fpc = 0;
+ fpu_sfpc(0);
+}
+
+struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next)
+{
+ save_user_fpu_regs();
+ save_kernel_fpu_regs(&prev->thread);
+ save_access_regs(&prev->thread.acrs[0]);
+ save_ri_cb(prev->thread.ri_cb);
+ save_gs_cb(prev->thread.gs_cb);
+ update_cr_regs(next);
+ restore_kernel_fpu_regs(&next->thread);
+ restore_access_regs(&next->thread.acrs[0]);
+ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);
+ restore_gs_cb(next->thread.gs_cb);
+ return __switch_to_asm(prev, next);
}
unsigned long __get_wchan(struct task_struct *p)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index f1897a8bb221..1cfed8b710b8 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -24,13 +24,14 @@
#include <linux/seccomp.h>
#include <linux/compat.h>
#include <trace/syscall.h>
+#include <asm/guarded_storage.h>
+#include <asm/access-regs.h>
#include <asm/page.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
-#include <asm/switch_to.h>
#include <asm/runtime_instr.h>
#include <asm/facility.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
#include "entry.h"
@@ -246,22 +247,15 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
/*
* floating point control reg. is in the thread structure
*/
- tmp = child->thread.fpu.fpc;
+ tmp = child->thread.ufpu.fpc;
tmp <<= BITS_PER_LONG - 32;
} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
- * floating point regs. are either in child->thread.fpu
- * or the child->thread.fpu.vxrs array
+ * floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct user, regs.fp_regs.fprs);
- if (cpu_has_vx())
- tmp = *(addr_t *)
- ((addr_t) child->thread.fpu.vxrs + 2*offset);
- else
- tmp = *(addr_t *)
- ((addr_t) child->thread.fpu.fprs + offset);
-
+ tmp = *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
/*
* Handle access to the per_info structure.
@@ -395,21 +389,14 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
*/
if ((unsigned int)data != 0)
return -EINVAL;
- child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
+ child->thread.ufpu.fpc = data >> (BITS_PER_LONG - 32);
} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
- * floating point regs. are either in child->thread.fpu
- * or the child->thread.fpu.vxrs array
+ * floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct user, regs.fp_regs.fprs);
- if (cpu_has_vx())
- *(addr_t *)((addr_t)
- child->thread.fpu.vxrs + 2*offset) = data;
- else
- *(addr_t *)((addr_t)
- child->thread.fpu.fprs + offset) = data;
-
+ *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = data;
} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
/*
* Handle access to the per_info structure.
@@ -622,21 +609,14 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
/*
* floating point control reg. is in the thread structure
*/
- tmp = child->thread.fpu.fpc;
+ tmp = child->thread.ufpu.fpc;
} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
- * floating point regs. are either in child->thread.fpu
- * or the child->thread.fpu.vxrs array
+ * floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
- if (cpu_has_vx())
- tmp = *(__u32 *)
- ((addr_t) child->thread.fpu.vxrs + 2*offset);
- else
- tmp = *(__u32 *)
- ((addr_t) child->thread.fpu.fprs + offset);
-
+ tmp = *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
/*
* Handle access to the per_info structure.
@@ -748,21 +728,14 @@ static int __poke_user_compat(struct task_struct *child,
/*
* floating point control reg. is in the thread structure
*/
- child->thread.fpu.fpc = data;
+ child->thread.ufpu.fpc = data;
} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
- * floating point regs. are either in child->thread.fpu
- * or the child->thread.fpu.vxrs array
+ * floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
- if (cpu_has_vx())
- *(__u32 *)((addr_t)
- child->thread.fpu.vxrs + 2*offset) = tmp;
- else
- *(__u32 *)((addr_t)
- child->thread.fpu.fprs + offset) = tmp;
-
+ *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = tmp;
} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
/*
* Handle access to the per_info structure.
@@ -893,10 +866,10 @@ static int s390_fpregs_get(struct task_struct *target,
_s390_fp_regs fp_regs;
if (target == current)
- save_fpu_regs();
+ save_user_fpu_regs();
- fp_regs.fpc = target->thread.fpu.fpc;
- fpregs_store(&fp_regs, &target->thread.fpu);
+ fp_regs.fpc = target->thread.ufpu.fpc;
+ fpregs_store(&fp_regs, &target->thread.ufpu);
return membuf_write(&to, &fp_regs, sizeof(fp_regs));
}
@@ -910,22 +883,17 @@ static int s390_fpregs_set(struct task_struct *target,
freg_t fprs[__NUM_FPRS];
if (target == current)
- save_fpu_regs();
-
- if (cpu_has_vx())
- convert_vx_to_fp(fprs, target->thread.fpu.vxrs);
- else
- memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs));
-
+ save_user_fpu_regs();
+ convert_vx_to_fp(fprs, target->thread.ufpu.vxrs);
if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
- u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
+ u32 ufpc[2] = { target->thread.ufpu.fpc, 0 };
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
0, offsetof(s390_fp_regs, fprs));
if (rc)
return rc;
if (ufpc[1] != 0)
return -EINVAL;
- target->thread.fpu.fpc = ufpc[0];
+ target->thread.ufpu.fpc = ufpc[0];
}
if (rc == 0 && count > 0)
@@ -933,12 +901,7 @@ static int s390_fpregs_set(struct task_struct *target,
fprs, offsetof(s390_fp_regs, fprs), -1);
if (rc)
return rc;
-
- if (cpu_has_vx())
- convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
- else
- memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
-
+ convert_fp_to_vx(target->thread.ufpu.vxrs, fprs);
return rc;
}
@@ -988,9 +951,9 @@ static int s390_vxrs_low_get(struct task_struct *target,
if (!cpu_has_vx())
return -ENODEV;
if (target == current)
- save_fpu_regs();
+ save_user_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = target->thread.fpu.vxrs[i].low;
+ vxrs[i] = target->thread.ufpu.vxrs[i].low;
return membuf_write(&to, vxrs, sizeof(vxrs));
}
@@ -1005,15 +968,15 @@ static int s390_vxrs_low_set(struct task_struct *target,
if (!cpu_has_vx())
return -ENODEV;
if (target == current)
- save_fpu_regs();
+ save_user_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = target->thread.fpu.vxrs[i].low;
+ vxrs[i] = target->thread.ufpu.vxrs[i].low;
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
if (rc == 0)
for (i = 0; i < __NUM_VXRS_LOW; i++)
- target->thread.fpu.vxrs[i].low = vxrs[i];
+ target->thread.ufpu.vxrs[i].low = vxrs[i];
return rc;
}
@@ -1025,8 +988,8 @@ static int s390_vxrs_high_get(struct task_struct *target,
if (!cpu_has_vx())
return -ENODEV;
if (target == current)
- save_fpu_regs();
- return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ save_user_fpu_regs();
+ return membuf_write(&to, target->thread.ufpu.vxrs + __NUM_VXRS_LOW,
__NUM_VXRS_HIGH * sizeof(__vector128));
}
@@ -1040,10 +1003,10 @@ static int s390_vxrs_high_set(struct task_struct *target,
if (!cpu_has_vx())
return -ENODEV;
if (target == current)
- save_fpu_regs();
+ save_user_fpu_regs();
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
+ target->thread.ufpu.vxrs + __NUM_VXRS_LOW, 0, -1);
return rc;
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index d1f3b56e7afc..24ed33f044ec 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -504,12 +504,12 @@ static void __init setup_resources(void)
int j;
u64 i;
- code_resource.start = (unsigned long) _text;
- code_resource.end = (unsigned long) _etext - 1;
- data_resource.start = (unsigned long) _etext;
- data_resource.end = (unsigned long) _edata - 1;
- bss_resource.start = (unsigned long) __bss_start;
- bss_resource.end = (unsigned long) __bss_stop - 1;
+ code_resource.start = __pa_symbol(_text);
+ code_resource.end = __pa_symbol(_etext) - 1;
+ data_resource.start = __pa_symbol(_etext);
+ data_resource.end = __pa_symbol(_edata) - 1;
+ bss_resource.start = __pa_symbol(__bss_start);
+ bss_resource.end = __pa_symbol(__bss_stop) - 1;
for_each_mem_range(i, &start, &end) {
res = memblock_alloc(sizeof(*res), 8);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 43e9661cd715..6c2cb345402f 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -30,8 +30,8 @@
#include <linux/compat.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
+#include <asm/access-regs.h>
#include <asm/lowcore.h>
-#include <asm/switch_to.h>
#include <asm/vdso.h>
#include "entry.h"
@@ -109,7 +109,7 @@ struct rt_sigframe
static void store_sigregs(void)
{
save_access_regs(current->thread.acrs);
- save_fpu_regs();
+ save_user_fpu_regs();
}
/* Load registers after signal return */
@@ -131,7 +131,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
sizeof(user_sregs.regs.acrs));
- fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
+ fpregs_store(&user_sregs.fpregs, &current->thread.ufpu);
if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
return -EFAULT;
return 0;
@@ -165,7 +165,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
sizeof(current->thread.acrs));
- fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
+ fpregs_load(&user_sregs.fpregs, &current->thread.ufpu);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
@@ -181,11 +181,11 @@ static int save_sigregs_ext(struct pt_regs *regs,
/* Save vector registers to signal stack */
if (cpu_has_vx()) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = current->thread.fpu.vxrs[i].low;
+ vxrs[i] = current->thread.ufpu.vxrs[i].low;
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
- current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
}
@@ -202,12 +202,12 @@ static int restore_sigregs_ext(struct pt_regs *regs,
if (cpu_has_vx()) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) ||
- __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
&sregs_ext->vxrs_high,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- current->thread.fpu.vxrs[i].low = vxrs[i];
+ current->thread.ufpu.vxrs[i].low = vxrs[i];
}
return 0;
}
@@ -222,7 +222,7 @@ SYSCALL_DEFINE0(sigreturn)
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
goto badframe;
set_current_blocked(&set);
- save_fpu_regs();
+ save_user_fpu_regs();
if (restore_sigregs(regs, &frame->sregs))
goto badframe;
if (restore_sigregs_ext(regs, &frame->sregs_ext))
@@ -246,7 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
- save_fpu_regs();
+ save_user_fpu_regs();
if (restore_sigregs(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index c39d9f0d4b1c..0324649aae0a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -36,12 +36,13 @@
#include <linux/sched/task_stack.h>
#include <linux/crash_dump.h>
#include <linux/kprobes.h>
+#include <asm/access-regs.h>
#include <asm/asm-offsets.h>
#include <asm/ctlreg.h>
#include <asm/pfault.h>
#include <asm/diag.h>
-#include <asm/switch_to.h>
#include <asm/facility.h>
+#include <asm/fpu.h>
#include <asm/ipl.h>
#include <asm/setup.h>
#include <asm/irq.h>
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index f6f8f498c9be..1b1be3110cfc 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -20,7 +20,7 @@
#include <asm/sysinfo.h>
#include <asm/cpcmd.h>
#include <asm/topology.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
int topology_max_mnest;
@@ -426,9 +426,9 @@ subsys_initcall(create_proc_service_level);
*/
void s390_adjust_jiffies(void)
{
+ DECLARE_KERNEL_FPU_ONSTACK16(fpu);
struct sysinfo_1_2_2 *info;
unsigned long capability;
- struct kernel_fpu fpu;
info = (void *) get_zeroed_page(GFP_KERNEL);
if (!info)
@@ -447,21 +447,14 @@ void s390_adjust_jiffies(void)
* point division ..
*/
kernel_fpu_begin(&fpu, KERNEL_FPR);
- asm volatile(
- " sfpc %3\n"
- " l %0,%1\n"
- " tmlh %0,0xff80\n"
- " jnz 0f\n"
- " cefbr %%f2,%0\n"
- " j 1f\n"
- "0: le %%f2,%1\n"
- "1: cefbr %%f0,%2\n"
- " debr %%f0,%%f2\n"
- " cgebr %0,5,%%f0\n"
- : "=&d" (capability)
- : "Q" (info->capability), "d" (10000000), "d" (0)
- : "cc"
- );
+ fpu_sfpc(0);
+ if (info->capability & 0xff800000)
+ fpu_ldgr(2, info->capability);
+ else
+ fpu_cefbr(2, info->capability);
+ fpu_cefbr(0, 10000000);
+ fpu_debr(0, 2);
+ capability = fpu_cgebr(0, 5);
kernel_fpu_end(&fpu, KERNEL_FPR);
} else
/*
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
index 14c6d25c035f..c0a70efa2426 100644
--- a/arch/s390/kernel/text_amode31.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -90,7 +90,7 @@ SYM_FUNC_START(_diag26c_amode31)
SYM_FUNC_END(_diag26c_amode31)
/*
- * void _diag0c_amode31(struct hypfs_diag0c_entry *entry)
+ * void _diag0c_amode31(unsigned long rx)
*/
SYM_FUNC_START(_diag0c_amode31)
sam31
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 14abad953c02..fb9f31f36628 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -251,8 +251,8 @@ static struct clocksource clocksource_tod = {
.rating = 400,
.read = read_tod_clock,
.mask = CLOCKSOURCE_MASK(64),
- .mult = 1000,
- .shift = 12,
+ .mult = 4096000,
+ .shift = 24,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.vdso_clock_mode = VDSO_CLOCKMODE_TOD,
};
@@ -716,7 +716,7 @@ out_unlock:
/*
* STP subsys sysfs interface functions
*/
-static struct bus_type stp_subsys = {
+static const struct bus_type stp_subsys = {
.name = "stp",
.dev_name = "stp",
};
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 46dac4540ca8..52578b5cecbd 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -28,8 +28,8 @@
#include <linux/cpu.h>
#include <linux/entry-common.h>
#include <asm/asm-extable.h>
-#include <asm/fpu/api.h>
#include <asm/vtime.h>
+#include <asm/fpu.h>
#include "entry.h"
static inline void __user *get_trap_ip(struct pt_regs *regs)
@@ -201,8 +201,8 @@ static void vector_exception(struct pt_regs *regs)
}
/* get vector interrupt code from fpc */
- save_fpu_regs();
- vic = (current->thread.fpu.fpc & 0xf00) >> 8;
+ save_user_fpu_regs();
+ vic = (current->thread.ufpu.fpc & 0xf00) >> 8;
switch (vic) {
case 1: /* invalid vector operation */
si_code = FPE_FLTINV;
@@ -227,9 +227,9 @@ static void vector_exception(struct pt_regs *regs)
static void data_exception(struct pt_regs *regs)
{
- save_fpu_regs();
- if (current->thread.fpu.fpc & FPC_DXC_MASK)
- do_fp_trap(regs, current->thread.fpu.fpc);
+ save_user_fpu_regs();
+ if (current->thread.ufpu.fpc & FPC_DXC_MASK)
+ do_fp_trap(regs, current->thread.ufpu.fpc);
else
do_trap(regs, SIGILL, ILL_ILLOPN, "data exception");
}
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index b88345ef8bd9..5b0633ea8d93 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -12,7 +12,6 @@
#include <linux/kdebug.h>
#include <linux/sched/task_stack.h>
-#include <asm/switch_to.h>
#include <asm/facility.h>
#include <asm/kprobes.h>
#include <asm/dis.h>
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index bbaefd84f15e..a45b3a4c91db 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -25,10 +25,7 @@ extern char vdso32_start[], vdso32_end[];
static struct vm_special_mapping vvar_mapping;
-static union {
- struct vdso_data data[CS_BASES];
- u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
+static union vdso_data_store vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = vdso_data_store.data;
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index caec7db6f966..b12a274cbb47 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -22,7 +22,7 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
-LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \
+LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \
--hash-style=both --build-id=sha1 -melf_s390 -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index edf5ff1debe1..65b9513a5a0e 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -9,7 +9,6 @@
OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
OUTPUT_ARCH(s390:31-bit)
-ENTRY(_start)
SECTIONS
{
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index e3c9085f8fa7..ef9832726097 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -25,8 +25,9 @@ KBUILD_AFLAGS_64 += -m64
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_64))
KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin
-ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \
+ldflags-y := -shared -soname=linux-vdso64.so.1 \
--hash-style=both --build-id=sha1 -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index 4461ea151e49..37e2a505e81d 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -9,7 +9,6 @@
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
-ENTRY(_start)
SECTIONS
{
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index e32ef446f451..48de296e8905 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -59,6 +59,14 @@ SECTIONS
} :text = 0x0700
RO_DATA(PAGE_SIZE)
+ .data.rel.ro : {
+ *(.data.rel.ro .data.rel.ro.*)
+ }
+ .got : {
+ __got_start = .;
+ *(.got)
+ __got_end = .;
+ }
. = ALIGN(PAGE_SIZE);
_sdata = .; /* Start of data section */
@@ -73,6 +81,9 @@ SECTIONS
__end_ro_after_init = .;
RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
+ .data.rel : {
+ *(.data.rel*)
+ }
BOOT_DATA_PRESERVED
. = ALIGN(8);
@@ -181,6 +192,7 @@ SECTIONS
PERCPU_SECTION(0x100)
+#ifdef CONFIG_PIE_BUILD
.dynsym ALIGN(8) : {
__dynsym_start = .;
*(.dynsym)
@@ -191,6 +203,19 @@ SECTIONS
*(.rela*)
__rela_dyn_end = .;
}
+ .dynamic ALIGN(8) : {
+ *(.dynamic)
+ }
+ .dynstr ALIGN(8) : {
+ *(.dynstr)
+ }
+#endif
+ .hash ALIGN(8) : {
+ *(.hash)
+ }
+ .gnu.hash ALIGN(8) : {
+ *(.gnu.hash)
+ }
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */
@@ -214,9 +239,14 @@ SECTIONS
QUAD(__boot_data_preserved_start) /* bootdata_preserved_off */
QUAD(__boot_data_preserved_end -
__boot_data_preserved_start) /* bootdata_preserved_size */
+#ifdef CONFIG_PIE_BUILD
QUAD(__dynsym_start) /* dynsym_start */
QUAD(__rela_dyn_start) /* rela_dyn_start */
QUAD(__rela_dyn_end) /* rela_dyn_end */
+#else
+ QUAD(__got_start) /* got_start */
+ QUAD(__got_end) /* got_end */
+#endif
QUAD(_eamode31 - _samode31) /* amode31_size */
QUAD(init_mm)
QUAD(swapper_pg_dir)
@@ -235,6 +265,30 @@ SECTIONS
DWARF_DEBUG
ELF_DETAILS
+ /*
+ * Make sure that the .got.plt is either completely empty or it
+ * contains only the three reserved double words.
+ */
+ .got.plt : {
+ *(.got.plt)
+ }
+ ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .plt : {
+ *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+#ifndef CONFIG_PIE_BUILD
+ .rela.dyn : {
+ *(.rela.*) *(.rela_*)
+ }
+ ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
+#endif
+
/* Sections to be discarded */
DISCARDS
/DISCARD/ : {
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 5bfcc50c1a68..ee863566910b 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -11,11 +11,11 @@
#include <linux/err.h>
#include <linux/pgtable.h>
#include <linux/bitfield.h>
+#include <asm/access-regs.h>
#include <asm/fault.h>
#include <asm/gmap.h>
#include "kvm-s390.h"
#include "gaccess.h"
-#include <asm/switch_to.h>
union asce {
unsigned long val;
@@ -391,7 +391,8 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
if (ar >= NUM_ACRS)
return -EINVAL;
- save_access_regs(vcpu->run->s.regs.acrs);
+ if (vcpu->arch.acrs_loaded)
+ save_access_regs(vcpu->run->s.regs.acrs);
alet.val = vcpu->run->s.regs.acrs[ar];
if (ar == 0 || alet.val == 0) {
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fc4007cc067a..dc721d50a942 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -19,13 +19,13 @@
#include <linux/slab.h>
#include <linux/bitmap.h>
#include <linux/vmalloc.h>
+#include <asm/access-regs.h>
#include <asm/asm-offsets.h>
#include <asm/dis.h>
#include <linux/uaccess.h>
#include <asm/sclp.h>
#include <asm/isc.h>
#include <asm/gmap.h>
-#include <asm/switch_to.h>
#include <asm/nmi.h>
#include <asm/airq.h>
#include <asm/tpi.h>
@@ -584,7 +584,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
mci.val = mchk->mcic;
/* take care of lazy register loading */
- save_fpu_regs();
+ kvm_s390_fpu_store(vcpu->run);
save_access_regs(vcpu->run->s.regs.acrs);
if (MACHINE_HAS_GS && vcpu->arch.gs_enabled)
save_gs_cb(current->thread.gs_cb);
@@ -648,7 +648,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
}
rc |= write_guest_lc(vcpu, __LC_GPREGS_SAVE_AREA,
vcpu->run->s.regs.gprs, 128);
- rc |= put_guest_lc(vcpu, current->thread.fpu.fpc,
+ rc |= put_guest_lc(vcpu, vcpu->run->s.regs.fpc,
(u32 __user *) __LC_FP_CREG_SAVE_AREA);
rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->todpr,
(u32 __user *) __LC_TOD_PROGREG_SAVE_AREA);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ea63ac769889..b11bb8e780a1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -33,19 +33,19 @@
#include <linux/pgtable.h>
#include <linux/mmu_notifier.h>
+#include <asm/access-regs.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
#include <asm/stp.h>
#include <asm/gmap.h>
#include <asm/nmi.h>
-#include <asm/switch_to.h>
#include <asm/isc.h>
#include <asm/sclp.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
+#include <asm/fpu.h>
#include <asm/ap.h>
#include <asm/uv.h>
-#include <asm/fpu/api.h>
#include "kvm-s390.h"
#include "gaccess.h"
#include "pci.h"
@@ -3951,6 +3951,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT |
KVM_SYNC_DIAG318;
+ vcpu->arch.acrs_loaded = false;
kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
@@ -4829,8 +4830,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
vcpu->run->s.regs.gprs,
sizeof(sie_page->pv_grregs));
}
- if (test_cpu_flag(CIF_FPU))
- load_fpu_regs();
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
@@ -4951,16 +4950,8 @@ static void sync_regs(struct kvm_vcpu *vcpu)
}
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
- /* save host (userspace) fprs/vrs */
- save_fpu_regs();
- vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
- vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
- if (cpu_has_vx())
- current->thread.fpu.regs = vcpu->run->s.regs.vrs;
- else
- current->thread.fpu.regs = vcpu->run->s.regs.fprs;
- current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-
+ vcpu->arch.acrs_loaded = true;
+ kvm_s390_fpu_load(vcpu->run);
/* Sync fmt2 only data */
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
sync_regs_fmt2(vcpu);
@@ -5021,12 +5012,8 @@ static void store_regs(struct kvm_vcpu *vcpu)
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
- /* Save guest register state */
- save_fpu_regs();
- vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
- /* Restore will be done lazily at return */
- current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
- current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+ vcpu->arch.acrs_loaded = false;
+ kvm_s390_fpu_store(vcpu->run);
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
store_regs_fmt2(vcpu);
}
@@ -5034,6 +5021,7 @@ static void store_regs(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
+ DECLARE_KERNEL_FPU_ONSTACK32(fpu);
int rc;
/*
@@ -5075,6 +5063,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
goto out;
}
+ kernel_fpu_begin(&fpu, KERNEL_FPC | KERNEL_VXR);
sync_regs(vcpu);
enable_cpu_timer_accounting(vcpu);
@@ -5098,6 +5087,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
disable_cpu_timer_accounting(vcpu);
store_regs(vcpu);
+ kernel_fpu_end(&fpu, KERNEL_FPC | KERNEL_VXR);
kvm_sigset_deactivate(vcpu);
@@ -5172,8 +5162,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
* switch in the run ioctl. Let's update our copies before we save
* it into the save area
*/
- save_fpu_regs();
- vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
+ kvm_s390_fpu_store(vcpu->run);
save_access_regs(vcpu->run->s.regs.acrs);
return kvm_s390_store_status_unloaded(vcpu, addr);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index a7ea80cfa445..111eb5c74784 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -20,6 +20,24 @@
#include <asm/processor.h>
#include <asm/sclp.h>
+static inline void kvm_s390_fpu_store(struct kvm_run *run)
+{
+ fpu_stfpc(&run->s.regs.fpc);
+ if (cpu_has_vx())
+ save_vx_regs((__vector128 *)&run->s.regs.vrs);
+ else
+ save_fp_regs((freg_t *)&run->s.regs.fprs);
+}
+
+static inline void kvm_s390_fpu_load(struct kvm_run *run)
+{
+ fpu_lfpc_safe(&run->s.regs.fpc);
+ if (cpu_has_vx())
+ load_vx_regs((__vector128 *)&run->s.regs.vrs);
+ else
+ load_fp_regs((freg_t *)&run->s.regs.fprs);
+}
+
/* Transactional Memory Execution related macros */
#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE))
#define TDB_FORMAT1 1
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 621a17fd1a1b..f875a404a0a0 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -676,8 +676,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.crypto.pqap_hook) {
pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook;
ret = pqap_hook(vcpu);
- if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
- kvm_s390_set_psw_cc(vcpu, 3);
+ if (!ret) {
+ if (vcpu->run->s.regs.gprs[1] & 0x00ff0000)
+ kvm_s390_set_psw_cc(vcpu, 3);
+ else
+ kvm_s390_set_psw_cc(vcpu, 0);
+ }
up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
return ret;
}
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index fef42e2a80a2..b2c9f010f0fe 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -18,7 +18,6 @@
#include <asm/sclp.h>
#include <asm/nmi.h>
#include <asm/dis.h>
-#include <asm/fpu/api.h>
#include <asm/facility.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -1149,8 +1148,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
*/
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
barrier();
- if (test_cpu_flag(CIF_FPU))
- load_fpu_regs();
if (!kvm_s390_vcpu_sie_inhibited(vcpu))
rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
barrier();
@@ -1235,7 +1232,6 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
if (IS_ERR(gmap))
return PTR_ERR(gmap);
- gmap->private = vcpu->kvm;
vcpu->kvm->stat.gmap_shadow_create++;
WRITE_ONCE(vsie_page->gmap, gmap);
return 0;
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 7c50eca85ca4..90eac15ea62a 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -4,6 +4,7 @@
#
lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o
+lib-y += csum-partial.o
obj-y += mem.o xor.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/csum-partial.c b/arch/s390/lib/csum-partial.c
new file mode 100644
index 000000000000..458abd9bac70
--- /dev/null
+++ b/arch/s390/lib/csum-partial.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/export.h>
+#include <asm/checksum.h>
+#include <asm/fpu.h>
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit). If copy is true copies to dst.
+ *
+ * Returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic.
+ *
+ * This function must be called with even lengths, except
+ * for the last fragment, which may be odd.
+ *
+ * It's best to have src and dst aligned on a 64-bit boundary.
+ */
+static __always_inline __wsum csum_copy(void *dst, const void *src, int len, __wsum sum, bool copy)
+{
+ DECLARE_KERNEL_FPU_ONSTACK8(vxstate);
+
+ if (!cpu_has_vx()) {
+ if (copy)
+ memcpy(dst, src, len);
+ return cksm(dst, len, sum);
+ }
+ kernel_fpu_begin(&vxstate, KERNEL_VXR_V16V23);
+ fpu_vlvgf(16, (__force u32)sum, 1);
+ fpu_vzero(17);
+ fpu_vzero(18);
+ fpu_vzero(19);
+ while (len >= 64) {
+ fpu_vlm(20, 23, src);
+ if (copy) {
+ fpu_vstm(20, 23, dst);
+ dst += 64;
+ }
+ fpu_vcksm(16, 20, 16);
+ fpu_vcksm(17, 21, 17);
+ fpu_vcksm(18, 22, 18);
+ fpu_vcksm(19, 23, 19);
+ src += 64;
+ len -= 64;
+ }
+ while (len >= 32) {
+ fpu_vlm(20, 21, src);
+ if (copy) {
+ fpu_vstm(20, 21, dst);
+ dst += 32;
+ }
+ fpu_vcksm(16, 20, 16);
+ fpu_vcksm(17, 21, 17);
+ src += 32;
+ len -= 32;
+ }
+ while (len >= 16) {
+ fpu_vl(20, src);
+ if (copy) {
+ fpu_vst(20, dst);
+ dst += 16;
+ }
+ fpu_vcksm(16, 20, 16);
+ src += 16;
+ len -= 16;
+ }
+ if (len) {
+ fpu_vll(20, len - 1, src);
+ if (copy)
+ fpu_vstl(20, len - 1, dst);
+ fpu_vcksm(16, 20, 16);
+ }
+ fpu_vcksm(18, 19, 18);
+ fpu_vcksm(16, 17, 16);
+ fpu_vcksm(16, 18, 16);
+ sum = (__force __wsum)fpu_vlgvf(16, 1);
+ kernel_fpu_end(&vxstate, KERNEL_VXR_V16V23);
+ return sum;
+}
+
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+ return csum_copy(NULL, buff, len, sum, false);
+}
+EXPORT_SYMBOL(csum_partial);
+
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
+{
+ return csum_copy(dst, src, len, 0, true);
+}
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index e41869f5cc95..282fefe107a2 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -136,7 +136,7 @@ dcss_diag(int *func, void *parameter,
unsigned long rx, ry;
int rc;
- rx = (unsigned long) parameter;
+ rx = virt_to_phys(parameter);
ry = (unsigned long) *func;
diag_stat_inc(DIAG_STAT_X064);
@@ -178,7 +178,7 @@ query_segment_type (struct dcss_segment *seg)
/* initialize diag input parameters */
qin->qopcode = DCSS_FINDSEGA;
- qin->qoutptr = (unsigned long) qout;
+ qin->qoutptr = virt_to_phys(qout);
qin->qoutlen = sizeof(struct qout64);
memcpy (qin->qname, seg->dcss_name, 8);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 6f96b5a71c63..8da39deb56ca 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -1691,6 +1691,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
return ERR_PTR(-ENOMEM);
new->mm = parent->mm;
new->parent = gmap_get(parent);
+ new->private = parent->private;
new->orig_asce = asce;
new->edat_level = edat_level;
new->initialized = false;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index fc9a7dc26c5e..b14fc0887654 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -71,6 +71,15 @@ static inline unsigned long mmap_base(unsigned long rnd,
return PAGE_ALIGN(STACK_TOP - gap - rnd);
}
+static int get_align_mask(struct file *filp, unsigned long flags)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+ if (filp || (flags & MAP_SHARED))
+ return MMAP_ALIGN_MASK << PAGE_SHIFT;
+ return 0;
+}
+
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
@@ -97,10 +106,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
- if (filp || (flags & MAP_SHARED))
- info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
- else
- info.align_mask = 0;
+ info.align_mask = get_align_mask(filp, flags);
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
if (offset_in_page(addr))
@@ -138,10 +144,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long ad
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = mm->mmap_base;
- if (filp || (flags & MAP_SHARED))
- info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
- else
- info.align_mask = 0;
+ info.align_mask = get_align_mask(filp, flags);
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 676ac74026a8..26afde0d1ed3 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -28,6 +28,7 @@
#include <linux/jump_label.h>
#include <linux/pci.h>
#include <linux/printk.h>
+#include <linux/lockdep.h>
#include <asm/isc.h>
#include <asm/airq.h>
@@ -252,7 +253,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
/* combine single writes by using store-block insn */
void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
{
- zpci_memcpy_toio(to, from, count);
+ zpci_memcpy_toio(to, from, count * 8);
}
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
@@ -730,12 +731,12 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
* equivalent to its state during boot when first probing a driver.
* Consequently after reset the PCI function requires re-initialization via the
* common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
- * and enabling the function via e.g.pci_enablde_device_flags().The caller
+ * and enabling the function via e.g. pci_enable_device_flags(). The caller
* must guard against concurrent reset attempts.
*
* In most cases this function should not be called directly but through
* pci_reset_function() or pci_reset_bus() which handle the save/restore and
- * locking.
+ * locking - asserted by lockdep.
*
* Return: 0 on success and an error value otherwise
*/
@@ -744,6 +745,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
u8 status;
int rc;
+ lockdep_assert_held(&zdev->state_lock);
zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
if (zdev_enabled(zdev)) {
/* Disables device access, DMAs and IRQs (reset state) */
@@ -806,7 +808,8 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
zdev->state = state;
kref_init(&zdev->kref);
- mutex_init(&zdev->lock);
+ mutex_init(&zdev->state_lock);
+ mutex_init(&zdev->fmb_lock);
mutex_init(&zdev->kzdev_lock);
rc = zpci_init_iommu(zdev);
@@ -870,6 +873,10 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
{
int rc;
+ lockdep_assert_held(&zdev->state_lock);
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+ return 0;
+
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
@@ -889,7 +896,7 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
}
/**
- * zpci_device_reserved() - Mark device as resverved
+ * zpci_device_reserved() - Mark device as reserved
* @zdev: the zpci_dev that was reserved
*
* Handle the case that a given zPCI function was reserved by another system.
@@ -899,8 +906,6 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
*/
void zpci_device_reserved(struct zpci_dev *zdev)
{
- if (zdev->has_hp_slot)
- zpci_exit_slot(zdev);
/*
* Remove device from zpci_list as it is going away. This also
* makes sure we ignore subsequent zPCI events for this device.
@@ -918,6 +923,9 @@ void zpci_release_device(struct kref *kref)
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
int ret;
+ if (zdev->has_hp_slot)
+ zpci_exit_slot(zdev);
+
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 6dde2263c79d..2cb5043a997d 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -91,9 +91,9 @@ static int pci_perf_show(struct seq_file *m, void *v)
if (!zdev)
return 0;
- mutex_lock(&zdev->lock);
+ mutex_lock(&zdev->fmb_lock);
if (!zdev->fmb) {
- mutex_unlock(&zdev->lock);
+ mutex_unlock(&zdev->fmb_lock);
seq_puts(m, "FMB statistics disabled\n");
return 0;
}
@@ -130,7 +130,7 @@ static int pci_perf_show(struct seq_file *m, void *v)
}
pci_sw_counter_show(m);
- mutex_unlock(&zdev->lock);
+ mutex_unlock(&zdev->fmb_lock);
return 0;
}
@@ -148,7 +148,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
if (rc)
return rc;
- mutex_lock(&zdev->lock);
+ mutex_lock(&zdev->fmb_lock);
switch (val) {
case 0:
rc = zpci_fmb_disable_device(zdev);
@@ -157,7 +157,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
rc = zpci_fmb_enable_device(zdev);
break;
}
- mutex_unlock(&zdev->lock);
+ mutex_unlock(&zdev->fmb_lock);
return rc ? rc : count;
}
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 4d9773ef9e0a..dbe95ec5917e 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -267,6 +267,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
zpci_err_hex(ccdf, sizeof(*ccdf));
if (zdev) {
+ mutex_lock(&zdev->state_lock);
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
@@ -294,6 +295,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
}
pci_dev_put(pdev);
no_pdev:
+ if (zdev)
+ mutex_unlock(&zdev->state_lock);
zpci_zdev_put(zdev);
}
@@ -326,6 +329,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
+
+ if (existing_zdev)
+ mutex_lock(&zdev->state_lock);
+
switch (ccdf->pec) {
case 0x0301: /* Reserved|Standby -> Configured */
if (!zdev) {
@@ -348,7 +355,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
case 0x0303: /* Deconfiguration requested */
if (zdev) {
- /* The event may have been queued before we confirgured
+ /* The event may have been queued before we configured
* the device.
*/
if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
@@ -359,7 +366,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
case 0x0304: /* Configured -> Standby|Reserved */
if (zdev) {
- /* The event may have been queued before we confirgured
+ /* The event may have been queued before we configured
* the device.:
*/
if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
@@ -383,8 +390,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
default:
break;
}
- if (existing_zdev)
+ if (existing_zdev) {
+ mutex_unlock(&zdev->state_lock);
zpci_zdev_put(zdev);
+ }
}
void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 8a7abac51816..a0b872b74fe3 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -49,6 +49,39 @@ static ssize_t mio_enabled_show(struct device *dev,
}
static DEVICE_ATTR_RO(mio_enabled);
+static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
+{
+ u8 status;
+ int ret;
+
+ pci_stop_and_remove_bus_device(pdev);
+ if (zdev_enabled(zdev)) {
+ ret = zpci_disable_device(zdev);
+ /*
+ * Due to a z/VM vs LPAR inconsistency in the error
+ * state the FH may indicate an enabled device but
+ * disable says the device is already disabled don't
+ * treat it as an error here.
+ */
+ if (ret == -EINVAL)
+ ret = 0;
+ if (ret)
+ return ret;
+ }
+
+ ret = zpci_enable_device(zdev);
+ if (ret)
+ return ret;
+
+ if (zdev->dma_table) {
+ ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table), &status);
+ if (ret)
+ zpci_disable_device(zdev);
+ }
+ return ret;
+}
+
static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
@@ -56,7 +89,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
struct pci_dev *pdev = to_pci_dev(dev);
struct zpci_dev *zdev = to_zpci(pdev);
int ret = 0;
- u8 status;
/* Can't use device_remove_self() here as that would lead us to lock
* the pci_rescan_remove_lock while holding the device' kernfs lock.
@@ -70,6 +102,12 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
*/
kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
WARN_ON_ONCE(!kn);
+
+ /* Device needs to be configured and state must not change */
+ mutex_lock(&zdev->state_lock);
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+ goto out;
+
/* device_remove_file() serializes concurrent calls ignoring all but
* the first
*/
@@ -82,35 +120,13 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
*/
pci_lock_rescan_remove();
if (pci_dev_is_added(pdev)) {
- pci_stop_and_remove_bus_device(pdev);
- if (zdev_enabled(zdev)) {
- ret = zpci_disable_device(zdev);
- /*
- * Due to a z/VM vs LPAR inconsistency in the error
- * state the FH may indicate an enabled device but
- * disable says the device is already disabled don't
- * treat it as an error here.
- */
- if (ret == -EINVAL)
- ret = 0;
- if (ret)
- goto out;
- }
-
- ret = zpci_enable_device(zdev);
- if (ret)
- goto out;
-
- if (zdev->dma_table) {
- ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
- if (ret)
- zpci_disable_device(zdev);
- }
+ ret = _do_recover(pdev, zdev);
}
-out:
pci_rescan_bus(zdev->zbus->bus);
pci_unlock_rescan_remove();
+
+out:
+ mutex_unlock(&zdev->state_lock);
if (kn)
sysfs_unbreak_active_protection(kn);
return ret ? ret : count;
diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore
index ea62f37b79ef..e6af51d9d183 100644
--- a/arch/s390/tools/.gitignore
+++ b/arch/s390/tools/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
gen_facilities
gen_opcode_table
+relocs
diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile
index f9dd47ff9ac4..f2862364fb42 100644
--- a/arch/s390/tools/Makefile
+++ b/arch/s390/tools/Makefile
@@ -25,3 +25,8 @@ $(kapi)/facility-defs.h: $(obj)/gen_facilities FORCE
$(kapi)/dis-defs.h: $(obj)/gen_opcode_table FORCE
$(call filechk,dis-defs.h)
+
+hostprogs += relocs
+PHONY += relocs
+relocs: $(obj)/relocs
+ @:
diff --git a/arch/s390/tools/relocs.c b/arch/s390/tools/relocs.c
new file mode 100644
index 000000000000..30a732c808f3
--- /dev/null
+++ b/arch/s390/tools/relocs.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+
+#define ELF_BITS 64
+
+#define ELF_MACHINE EM_S390
+#define ELF_MACHINE_NAME "IBM S/390"
+#define SHT_REL_TYPE SHT_RELA
+#define Elf_Rel Elf64_Rela
+
+#define ELF_CLASS ELFCLASS64
+#define ELF_ENDIAN ELFDATA2MSB
+#define ELF_R_SYM(val) ELF64_R_SYM(val)
+#define ELF_R_TYPE(val) ELF64_R_TYPE(val)
+#define ELF_ST_TYPE(o) ELF64_ST_TYPE(o)
+#define ELF_ST_BIND(o) ELF64_ST_BIND(o)
+#define ELF_ST_VISIBILITY(o) ELF64_ST_VISIBILITY(o)
+
+#define ElfW(type) _ElfW(ELF_BITS, type)
+#define _ElfW(bits, type) __ElfW(bits, type)
+#define __ElfW(bits, type) Elf##bits##_##type
+
+#define Elf_Addr ElfW(Addr)
+#define Elf_Ehdr ElfW(Ehdr)
+#define Elf_Phdr ElfW(Phdr)
+#define Elf_Shdr ElfW(Shdr)
+#define Elf_Sym ElfW(Sym)
+
+static Elf_Ehdr ehdr;
+static unsigned long shnum;
+static unsigned int shstrndx;
+
+struct relocs {
+ uint32_t *offset;
+ unsigned long count;
+ unsigned long size;
+};
+
+static struct relocs relocs64;
+#define FMT PRIu64
+
+struct section {
+ Elf_Shdr shdr;
+ struct section *link;
+ Elf_Rel *reltab;
+};
+
+static struct section *secs;
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#define le64_to_cpu(val) (val)
+#define be16_to_cpu(val) bswap_16(val)
+#define be32_to_cpu(val) bswap_32(val)
+#define be64_to_cpu(val) bswap_64(val)
+#endif
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define le16_to_cpu(val) bswap_16(val)
+#define le32_to_cpu(val) bswap_32(val)
+#define le64_to_cpu(val) bswap_64(val)
+#define be16_to_cpu(val) (val)
+#define be32_to_cpu(val) (val)
+#define be64_to_cpu(val) (val)
+#endif
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+ if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+ return le16_to_cpu(val);
+ else
+ return be16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+ if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+ return le32_to_cpu(val);
+ else
+ return be32_to_cpu(val);
+}
+
+#define elf_half_to_cpu(x) elf16_to_cpu(x)
+#define elf_word_to_cpu(x) elf32_to_cpu(x)
+
+static uint64_t elf64_to_cpu(uint64_t val)
+{
+ return be64_to_cpu(val);
+}
+
+#define elf_addr_to_cpu(x) elf64_to_cpu(x)
+#define elf_off_to_cpu(x) elf64_to_cpu(x)
+#define elf_xword_to_cpu(x) elf64_to_cpu(x)
+
+static void die(char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(1);
+}
+
+static void read_ehdr(FILE *fp)
+{
+ if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+ die("Cannot read ELF header: %s\n", strerror(errno));
+ if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0)
+ die("No ELF magic\n");
+ if (ehdr.e_ident[EI_CLASS] != ELF_CLASS)
+ die("Not a %d bit executable\n", ELF_BITS);
+ if (ehdr.e_ident[EI_DATA] != ELF_ENDIAN)
+ die("ELF endian mismatch\n");
+ if (ehdr.e_ident[EI_VERSION] != EV_CURRENT)
+ die("Unknown ELF version\n");
+
+ /* Convert the fields to native endian */
+ ehdr.e_type = elf_half_to_cpu(ehdr.e_type);
+ ehdr.e_machine = elf_half_to_cpu(ehdr.e_machine);
+ ehdr.e_version = elf_word_to_cpu(ehdr.e_version);
+ ehdr.e_entry = elf_addr_to_cpu(ehdr.e_entry);
+ ehdr.e_phoff = elf_off_to_cpu(ehdr.e_phoff);
+ ehdr.e_shoff = elf_off_to_cpu(ehdr.e_shoff);
+ ehdr.e_flags = elf_word_to_cpu(ehdr.e_flags);
+ ehdr.e_ehsize = elf_half_to_cpu(ehdr.e_ehsize);
+ ehdr.e_phentsize = elf_half_to_cpu(ehdr.e_phentsize);
+ ehdr.e_phnum = elf_half_to_cpu(ehdr.e_phnum);
+ ehdr.e_shentsize = elf_half_to_cpu(ehdr.e_shentsize);
+ ehdr.e_shnum = elf_half_to_cpu(ehdr.e_shnum);
+ ehdr.e_shstrndx = elf_half_to_cpu(ehdr.e_shstrndx);
+
+ shnum = ehdr.e_shnum;
+ shstrndx = ehdr.e_shstrndx;
+
+ if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN))
+ die("Unsupported ELF header type\n");
+ if (ehdr.e_machine != ELF_MACHINE)
+ die("Not for %s\n", ELF_MACHINE_NAME);
+ if (ehdr.e_version != EV_CURRENT)
+ die("Unknown ELF version\n");
+ if (ehdr.e_ehsize != sizeof(Elf_Ehdr))
+ die("Bad Elf header size\n");
+ if (ehdr.e_phentsize != sizeof(Elf_Phdr))
+ die("Bad program header entry\n");
+ if (ehdr.e_shentsize != sizeof(Elf_Shdr))
+ die("Bad section header entry\n");
+
+ if (shnum == SHN_UNDEF || shstrndx == SHN_XINDEX) {
+ Elf_Shdr shdr;
+
+ if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
+ die("Seek to %" FMT " failed: %s\n", ehdr.e_shoff, strerror(errno));
+
+ if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+ die("Cannot read initial ELF section header: %s\n", strerror(errno));
+
+ if (shnum == SHN_UNDEF)
+ shnum = elf_xword_to_cpu(shdr.sh_size);
+
+ if (shstrndx == SHN_XINDEX)
+ shstrndx = elf_word_to_cpu(shdr.sh_link);
+ }
+
+ if (shstrndx >= shnum)
+ die("String table index out of bounds\n");
+}
+
+static void read_shdrs(FILE *fp)
+{
+ Elf_Shdr shdr;
+ int i;
+
+ secs = calloc(shnum, sizeof(struct section));
+ if (!secs)
+ die("Unable to allocate %ld section headers\n", shnum);
+
+ if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
+ die("Seek to %" FMT " failed: %s\n", ehdr.e_shoff, strerror(errno));
+
+ for (i = 0; i < shnum; i++) {
+ struct section *sec = &secs[i];
+
+ if (fread(&shdr, sizeof(shdr), 1, fp) != 1) {
+ die("Cannot read ELF section headers %d/%ld: %s\n",
+ i, shnum, strerror(errno));
+ }
+
+ sec->shdr.sh_name = elf_word_to_cpu(shdr.sh_name);
+ sec->shdr.sh_type = elf_word_to_cpu(shdr.sh_type);
+ sec->shdr.sh_flags = elf_xword_to_cpu(shdr.sh_flags);
+ sec->shdr.sh_addr = elf_addr_to_cpu(shdr.sh_addr);
+ sec->shdr.sh_offset = elf_off_to_cpu(shdr.sh_offset);
+ sec->shdr.sh_size = elf_xword_to_cpu(shdr.sh_size);
+ sec->shdr.sh_link = elf_word_to_cpu(shdr.sh_link);
+ sec->shdr.sh_info = elf_word_to_cpu(shdr.sh_info);
+ sec->shdr.sh_addralign = elf_xword_to_cpu(shdr.sh_addralign);
+ sec->shdr.sh_entsize = elf_xword_to_cpu(shdr.sh_entsize);
+
+ if (sec->shdr.sh_link < shnum)
+ sec->link = &secs[sec->shdr.sh_link];
+ }
+
+}
+
+static void read_relocs(FILE *fp)
+{
+ int i, j;
+
+ for (i = 0; i < shnum; i++) {
+ struct section *sec = &secs[i];
+
+ if (sec->shdr.sh_type != SHT_REL_TYPE)
+ continue;
+
+ sec->reltab = malloc(sec->shdr.sh_size);
+ if (!sec->reltab)
+ die("malloc of %" FMT " bytes for relocs failed\n", sec->shdr.sh_size);
+
+ if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+ die("Seek to %" FMT " failed: %s\n", sec->shdr.sh_offset, strerror(errno));
+
+ if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) != sec->shdr.sh_size)
+ die("Cannot read symbol table: %s\n", strerror(errno));
+
+ for (j = 0; j < sec->shdr.sh_size / sizeof(Elf_Rel); j++) {
+ Elf_Rel *rel = &sec->reltab[j];
+
+ rel->r_offset = elf_addr_to_cpu(rel->r_offset);
+ rel->r_info = elf_xword_to_cpu(rel->r_info);
+#if (SHT_REL_TYPE == SHT_RELA)
+ rel->r_addend = elf_xword_to_cpu(rel->r_addend);
+#endif
+ }
+ }
+}
+
+static void add_reloc(struct relocs *r, uint32_t offset)
+{
+ if (r->count == r->size) {
+ unsigned long newsize = r->size + 50000;
+ void *mem = realloc(r->offset, newsize * sizeof(r->offset[0]));
+
+ if (!mem)
+ die("realloc of %ld entries for relocs failed\n", newsize);
+
+ r->offset = mem;
+ r->size = newsize;
+ }
+ r->offset[r->count++] = offset;
+}
+
+static int do_reloc(struct section *sec, Elf_Rel *rel)
+{
+ unsigned int r_type = ELF64_R_TYPE(rel->r_info);
+ ElfW(Addr) offset = rel->r_offset;
+
+ switch (r_type) {
+ case R_390_NONE:
+ case R_390_PC32:
+ case R_390_PC64:
+ case R_390_PC16DBL:
+ case R_390_PC32DBL:
+ case R_390_PLT32DBL:
+ case R_390_GOTENT:
+ case R_390_GOTPCDBL:
+ case R_390_GOTOFF64:
+ break;
+ case R_390_64:
+ add_reloc(&relocs64, offset);
+ break;
+ default:
+ die("Unsupported relocation type: %d\n", r_type);
+ break;
+ }
+
+ return 0;
+}
+
+static void walk_relocs(void)
+{
+ int i;
+
+ /* Walk through the relocations */
+ for (i = 0; i < shnum; i++) {
+ struct section *sec_applies;
+ int j;
+ struct section *sec = &secs[i];
+
+ if (sec->shdr.sh_type != SHT_REL_TYPE)
+ continue;
+
+ sec_applies = &secs[sec->shdr.sh_info];
+ if (!(sec_applies->shdr.sh_flags & SHF_ALLOC))
+ continue;
+
+ for (j = 0; j < sec->shdr.sh_size / sizeof(Elf_Rel); j++) {
+ Elf_Rel *rel = &sec->reltab[j];
+
+ do_reloc(sec, rel);
+ }
+ }
+}
+
+static int cmp_relocs(const void *va, const void *vb)
+{
+ const uint32_t *a, *b;
+
+ a = va; b = vb;
+ return (*a == *b) ? 0 : (*a > *b) ? 1 : -1;
+}
+
+static void sort_relocs(struct relocs *r)
+{
+ qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
+}
+
+static int print_reloc(uint32_t v)
+{
+ return fprintf(stdout, "\t.long 0x%08"PRIx32"\n", v) > 0 ? 0 : -1;
+}
+
+static void emit_relocs(void)
+{
+ int i;
+
+ walk_relocs();
+ sort_relocs(&relocs64);
+
+ printf(".section \".vmlinux.relocs_64\",\"a\"\n");
+ for (i = 0; i < relocs64.count; i++)
+ print_reloc(relocs64.offset[i]);
+}
+
+static void process(FILE *fp)
+{
+ read_ehdr(fp);
+ read_shdrs(fp);
+ read_relocs(fp);
+ emit_relocs();
+}
+
+static void usage(void)
+{
+ die("relocs vmlinux\n");
+}
+
+int main(int argc, char **argv)
+{
+ unsigned char e_ident[EI_NIDENT];
+ const char *fname;
+ FILE *fp;
+
+ fname = NULL;
+
+ if (argc != 2)
+ usage();
+
+ fname = argv[1];
+
+ fp = fopen(fname, "r");
+ if (!fp)
+ die("Cannot open %s: %s\n", fname, strerror(errno));
+
+ if (fread(&e_ident, 1, EI_NIDENT, fp) != EI_NIDENT)
+ die("Cannot read %s: %s", fname, strerror(errno));
+
+ rewind(fp);
+
+ process(fp);
+
+ fclose(fp);
+ return 0;
+}
diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile
index b5e29f99c02c..6c6c791a1d06 100644
--- a/arch/sh/boot/compressed/Makefile
+++ b/arch/sh/boot/compressed/Makefile
@@ -12,6 +12,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \
vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo $(OBJECTS)
GCOV_PROFILE := n
+UBSAN_SANITIZE := n
#
# IMAGE_OFFSET is the load offset of the compression loader
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 62f4b9edcb98..f780b467e75d 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -9,18 +9,7 @@
#include <linux/const.h>
/* PAGE_SHIFT determines the page size */
-#if defined(CONFIG_PAGE_SIZE_4KB)
-# define PAGE_SHIFT 12
-#elif defined(CONFIG_PAGE_SIZE_8KB)
-# define PAGE_SHIFT 13
-#elif defined(CONFIG_PAGE_SIZE_16KB)
-# define PAGE_SHIFT 14
-#elif defined(CONFIG_PAGE_SIZE_64KB)
-# define PAGE_SHIFT 16
-#else
-# error "Bogus kernel page size?"
-#endif
-
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
#define PTE_MASK PAGE_MASK
diff --git a/arch/sh/include/asm/word-at-a-time.h b/arch/sh/include/asm/word-at-a-time.h
index 4aa398455b94..95100ce128d6 100644
--- a/arch/sh/include/asm/word-at-a-time.h
+++ b/arch/sh/include/asm/word-at-a-time.h
@@ -5,6 +5,8 @@
#ifdef CONFIG_CPU_BIG_ENDIAN
# include <asm-generic/word-at-a-time.h>
#else
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
/*
* Little-endian version cribbed from x86.
*/
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 455311d9a5e9..f32a1963ff0c 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -4,6 +4,9 @@ menu "Memory management options"
config MMU
bool "Support for memory management hardware"
depends on !CPU_SH2
+ select HAVE_PAGE_SIZE_4KB
+ select HAVE_PAGE_SIZE_8KB if X2TLB
+ select HAVE_PAGE_SIZE_64KB if CPU_SH4
default y
help
Some SH processors (such as SH-2/SH-2A) lack an MMU. In order to
@@ -13,6 +16,15 @@ config MMU
turning this off will boot the kernel on these machines with the
MMU implicitly switched off.
+config NOMMU
+ def_bool !MMU
+ select HAVE_PAGE_SIZE_4KB
+ select HAVE_PAGE_SIZE_8KB
+ select HAVE_PAGE_SIZE_16KB
+ select HAVE_PAGE_SIZE_64KB
+ help
+ On MMU-less systems, any of these page sizes can be selected
+
config PAGE_OFFSET
hex
default "0x80000000" if MMU
@@ -148,36 +160,6 @@ config HAVE_SRAM_POOL
select GENERIC_ALLOCATOR
choice
- prompt "Kernel page size"
- default PAGE_SIZE_4KB
-
-config PAGE_SIZE_4KB
- bool "4kB"
- help
- This is the default page size used by all SuperH CPUs.
-
-config PAGE_SIZE_8KB
- bool "8kB"
- depends on !MMU || X2TLB
- help
- This enables 8kB pages as supported by SH-X2 and later MMUs.
-
-config PAGE_SIZE_16KB
- bool "16kB"
- depends on !MMU
- help
- This enables 16kB pages on MMU-less SH systems.
-
-config PAGE_SIZE_64KB
- bool "64kB"
- depends on !MMU || CPU_SH4
- help
- This enables support for 64kB pages, possible on all SH-4
- CPUs and later.
-
-endchoice
-
-choice
prompt "HugeTLB page size"
depends on HUGETLB_PAGE
default HUGETLB_PAGE_SIZE_1MB if PAGE_SIZE_64KB
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 204c43cb3d43..7e6bc6fff76b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -58,6 +58,7 @@ config SPARC32
select DMA_DIRECT_REMAP
select GENERIC_ATOMIC64
select HAVE_UID16
+ select HAVE_PAGE_SIZE_4KB
select LOCK_MM_AND_FIND_VMA
select OLD_SIGACTION
select ZONE_DMA
@@ -75,6 +76,7 @@ config SPARC64
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_PAGE_SIZE_8KB
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_CONTEXT_TRACKING_USER
select HAVE_TIF_NOHZ
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 5f6035936131..2a03daa68f28 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -60,7 +60,7 @@ libs-y += arch/sparc/prom/
libs-y += arch/sparc/lib/
drivers-$(CONFIG_PM) += arch/sparc/power/
-drivers-$(CONFIG_FB) += arch/sparc/video/
+drivers-$(CONFIG_FB_CORE) += arch/sparc/video/
boot := arch/sparc/boot
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 94eb529dcb77..2718cbea826a 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -10,7 +10,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"nop\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
@@ -26,7 +26,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"b %l[l_yes]\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h
index 6be6f683f98f..9977c77374cd 100644
--- a/arch/sparc/include/asm/page_32.h
+++ b/arch/sparc/include/asm/page_32.h
@@ -11,7 +11,7 @@
#include <linux/const.h>
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index 254dffd85fb1..e9bd24821c93 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -4,8 +4,7 @@
#include <linux/const.h>
-#define PAGE_SHIFT 13
-
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index f3969a3600db..a0cc9bb41a92 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1206,10 +1206,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
{
}
-void smp_prepare_boot_cpu(void)
-{
-}
-
void __init smp_setup_processor_id(void)
{
if (tlb_type == spitfire)
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index 7f5eedf1f5e0..e8aef2c8ae99 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -2,6 +2,7 @@
#
# Building vDSO images for sparc.
#
+UBSAN_SANITIZE := n
# files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o
diff --git a/arch/sparc/video/Makefile b/arch/sparc/video/Makefile
index 6baddbd58e4d..d4d83f1702c6 100644
--- a/arch/sparc/video/Makefile
+++ b/arch/sparc/video/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_FB) += fbdev.o
+obj-$(CONFIG_FB_CORE) += fbdev.o
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index b5e179360534..93a5a8999b07 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -20,6 +20,7 @@ config UML
select HAVE_UID16
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_BUGVERBOSE
+ select HAVE_PAGE_SIZE_4KB
select NO_DMA if !UML_DMA_EMULATION
select OF_EARLY_FLATTREE if OF
select GENERIC_IRQ_SHOW
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 82f05f250634..34957dcb88b9 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -115,7 +115,9 @@ archprepare:
$(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h
LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
-LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie)
+ifdef CONFIG_LD_SCRIPT_DYN
+LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie
+endif
LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib
CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index cabcc501b448..77c4afb8ab90 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -265,7 +265,7 @@ static void uml_net_poll_controller(struct net_device *dev)
static void uml_net_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
- strscpy(info->driver, DRIVER_NAME, sizeof(info->driver));
+ strscpy(info->driver, DRIVER_NAME);
}
static const struct ethtool_ops uml_net_ethtool_ops = {
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 92ee2697ff39..63fc062add70 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -108,8 +108,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
static DEFINE_MUTEX(ubd_lock);
static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
-static int ubd_open(struct gendisk *disk, blk_mode_t mode);
-static void ubd_release(struct gendisk *disk);
static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg);
static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
@@ -118,16 +116,11 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
static const struct block_device_operations ubd_blops = {
.owner = THIS_MODULE,
- .open = ubd_open,
- .release = ubd_release,
.ioctl = ubd_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl,
.getgeo = ubd_getgeo,
};
-/* Protected by ubd_lock */
-static struct gendisk *ubd_gendisk[MAX_DEV];
-
#ifdef CONFIG_BLK_DEV_UBD_SYNC
#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
.cl = 1 })
@@ -155,7 +148,6 @@ struct ubd {
* backing or the cow file. */
char *file;
char *serial;
- int count;
int fd;
__u64 size;
struct openflags boot_openflags;
@@ -165,7 +157,7 @@ struct ubd {
unsigned no_trim:1;
struct cow cow;
struct platform_device pdev;
- struct request_queue *queue;
+ struct gendisk *disk;
struct blk_mq_tag_set tag_set;
spinlock_t lock;
};
@@ -181,7 +173,6 @@ struct ubd {
#define DEFAULT_UBD { \
.file = NULL, \
.serial = NULL, \
- .count = 0, \
.fd = -1, \
.size = -1, \
.boot_openflags = OPEN_FLAGS, \
@@ -774,8 +765,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
ubd_dev->fd = fd;
if(ubd_dev->cow.file != NULL){
- blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
-
err = -ENOMEM;
ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
if(ubd_dev->cow.bitmap == NULL){
@@ -797,11 +786,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
if(err < 0) goto error;
ubd_dev->cow.fd = err;
}
- if (ubd_dev->no_trim == 0) {
- blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
- blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
- }
- blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
return 0;
error:
os_close_file(ubd_dev->fd);
@@ -851,27 +835,6 @@ static const struct attribute_group *ubd_attr_groups[] = {
NULL,
};
-static int ubd_disk_register(int major, u64 size, int unit,
- struct gendisk *disk)
-{
- disk->major = major;
- disk->first_minor = unit << UBD_SHIFT;
- disk->minors = 1 << UBD_SHIFT;
- disk->fops = &ubd_blops;
- set_capacity(disk, size / 512);
- sprintf(disk->disk_name, "ubd%c", 'a' + unit);
-
- ubd_devs[unit].pdev.id = unit;
- ubd_devs[unit].pdev.name = DRIVER_NAME;
- ubd_devs[unit].pdev.dev.release = ubd_device_release;
- dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
- platform_device_register(&ubd_devs[unit].pdev);
-
- disk->private_data = &ubd_devs[unit];
- disk->queue = ubd_devs[unit].queue;
- return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
-}
-
#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
static const struct blk_mq_ops ubd_mq_ops = {
@@ -881,18 +844,36 @@ static const struct blk_mq_ops ubd_mq_ops = {
static int ubd_add(int n, char **error_out)
{
struct ubd *ubd_dev = &ubd_devs[n];
+ struct queue_limits lim = {
+ .max_segments = MAX_SG,
+ .seg_boundary_mask = PAGE_SIZE - 1,
+ };
struct gendisk *disk;
int err = 0;
if(ubd_dev->file == NULL)
goto out;
+ if (ubd_dev->cow.file)
+ lim.max_hw_sectors = 8 * sizeof(long);
+ if (!ubd_dev->no_trim) {
+ lim.max_hw_discard_sectors = UBD_MAX_REQUEST;
+ lim.max_write_zeroes_sectors = UBD_MAX_REQUEST;
+ }
+
err = ubd_file_size(ubd_dev, &ubd_dev->size);
if(err < 0){
*error_out = "Couldn't determine size of device's file";
goto out;
}
+ err = ubd_open_dev(ubd_dev);
+ if (err) {
+ pr_err("ubd%c: Can't open \"%s\": errno = %d\n",
+ 'a' + n, ubd_dev->file, -err);
+ goto out;
+ }
+
ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
ubd_dev->tag_set.ops = &ubd_mq_ops;
@@ -904,29 +885,43 @@ static int ubd_add(int n, char **error_out)
err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
if (err)
- goto out;
+ goto out_close;
- disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev);
+ disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev);
if (IS_ERR(disk)) {
err = PTR_ERR(disk);
goto out_cleanup_tags;
}
- ubd_dev->queue = disk->queue;
- blk_queue_write_cache(ubd_dev->queue, true, false);
- blk_queue_max_segments(ubd_dev->queue, MAX_SG);
- blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
- err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
+ blk_queue_write_cache(disk->queue, true, false);
+ disk->major = UBD_MAJOR;
+ disk->first_minor = n << UBD_SHIFT;
+ disk->minors = 1 << UBD_SHIFT;
+ disk->fops = &ubd_blops;
+ set_capacity(disk, ubd_dev->size / 512);
+ sprintf(disk->disk_name, "ubd%c", 'a' + n);
+ disk->private_data = ubd_dev;
+ set_disk_ro(disk, !ubd_dev->openflags.w);
+
+ ubd_dev->pdev.id = n;
+ ubd_dev->pdev.name = DRIVER_NAME;
+ ubd_dev->pdev.dev.release = ubd_device_release;
+ dev_set_drvdata(&ubd_dev->pdev.dev, ubd_dev);
+ platform_device_register(&ubd_dev->pdev);
+
+ err = device_add_disk(&ubd_dev->pdev.dev, disk, ubd_attr_groups);
if (err)
goto out_cleanup_disk;
- ubd_gendisk[n] = disk;
return 0;
out_cleanup_disk:
put_disk(disk);
out_cleanup_tags:
blk_mq_free_tag_set(&ubd_dev->tag_set);
+out_close:
+ ubd_close_dev(ubd_dev);
out:
return err;
}
@@ -1012,7 +1007,6 @@ static int ubd_id(char **str, int *start_out, int *end_out)
static int ubd_remove(int n, char **error_out)
{
- struct gendisk *disk = ubd_gendisk[n];
struct ubd *ubd_dev;
int err = -ENODEV;
@@ -1023,15 +1017,15 @@ static int ubd_remove(int n, char **error_out)
if(ubd_dev->file == NULL)
goto out;
- /* you cannot remove a open disk */
- err = -EBUSY;
- if(ubd_dev->count > 0)
- goto out;
+ if (ubd_dev->disk) {
+ /* you cannot remove a open disk */
+ err = -EBUSY;
+ if (disk_openers(ubd_dev->disk))
+ goto out;
- ubd_gendisk[n] = NULL;
- if(disk != NULL){
- del_gendisk(disk);
- put_disk(disk);
+ del_gendisk(ubd_dev->disk);
+ ubd_close_dev(ubd_dev);
+ put_disk(ubd_dev->disk);
}
err = 0;
@@ -1153,37 +1147,6 @@ static int __init ubd_driver_init(void){
device_initcall(ubd_driver_init);
-static int ubd_open(struct gendisk *disk, blk_mode_t mode)
-{
- struct ubd *ubd_dev = disk->private_data;
- int err = 0;
-
- mutex_lock(&ubd_mutex);
- if(ubd_dev->count == 0){
- err = ubd_open_dev(ubd_dev);
- if(err){
- printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
- disk->disk_name, ubd_dev->file, -err);
- goto out;
- }
- }
- ubd_dev->count++;
- set_disk_ro(disk, !ubd_dev->openflags.w);
-out:
- mutex_unlock(&ubd_mutex);
- return err;
-}
-
-static void ubd_release(struct gendisk *disk)
-{
- struct ubd *ubd_dev = disk->private_data;
-
- mutex_lock(&ubd_mutex);
- if(--ubd_dev->count == 0)
- ubd_close_dev(ubd_dev);
- mutex_unlock(&ubd_mutex);
-}
-
static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
__u64 *cow_offset, unsigned long *bitmap,
__u64 bitmap_offset, unsigned long *bitmap_words,
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 131b7cb29576..dc2feae789cb 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1373,7 +1373,7 @@ static void vector_net_poll_controller(struct net_device *dev)
static void vector_net_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
- strscpy(info->driver, DRIVER_NAME, sizeof(info->driver));
+ strscpy(info->driver, DRIVER_NAME);
}
static int vector_net_load_bpf_flash(struct net_device *dev,
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
index c719e1ec4645..b16a5e5619d3 100644
--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@@ -141,7 +141,7 @@ static int create_tap_fd(char *iface)
}
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
- strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
+ strscpy(ifr.ifr_name, iface);
err = ioctl(fd, TUNSETIFF, (void *) &ifr);
if (err != 0) {
@@ -171,7 +171,7 @@ static int create_raw_fd(char *iface, int flags, int proto)
goto raw_fd_cleanup;
}
memset(&ifr, 0, sizeof(ifr));
- strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
+ strscpy(ifr.ifr_name, iface);
if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
err = -errno;
goto raw_fd_cleanup;
diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h
index 4b6d1b526bc1..66fe06db872f 100644
--- a/arch/um/include/asm/cpufeature.h
+++ b/arch/um/include/asm/cpufeature.h
@@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
+ asm goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 84866127d074..9ef9a8aedfa6 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -10,7 +10,7 @@
#include <linux/const.h>
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index 981e11d8e025..326e52450e41 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -51,7 +51,8 @@ static inline int printk(const char *fmt, ...)
extern int in_aton(char *str);
extern size_t strlcat(char *, const char *, size_t);
-extern size_t strscpy(char *, const char *, size_t);
+extern size_t sized_strscpy(char *, const char *, size_t);
+#define strscpy(dst, src) sized_strscpy(dst, src, sizeof(dst))
/* Copied from linux/compiler-gcc.h since we can't include it directly */
#define barrier() __asm__ __volatile__("": : :"memory")
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
index 3363851a4ae8..bdf215c0eca7 100644
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ b/arch/um/os-Linux/drivers/ethertap_user.c
@@ -105,7 +105,7 @@ static int etap_tramp(char *dev, char *gate, int control_me,
sprintf(data_fd_buf, "%d", data_remote);
sprintf(version_buf, "%d", UML_NET_VERSION);
if (gate != NULL) {
- strscpy(gate_buf, gate, sizeof(gate_buf));
+ strscpy(gate_buf, gate);
args = setup_args;
}
else args = nosetup_args;
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
index 2284e9c1cbbb..91f0e27ca3a6 100644
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ b/arch/um/os-Linux/drivers/tuntap_user.c
@@ -146,7 +146,7 @@ static int tuntap_open(void *data)
}
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- strscpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name));
+ strscpy(ifr.ifr_name, pri->dev_name);
if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) {
err = -errno;
printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n",
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 288c422bfa96..e09d65b05d1c 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -40,7 +40,7 @@ static int __init make_uml_dir(void)
__func__);
goto err;
}
- strscpy(dir, home, sizeof(dir));
+ strscpy(dir, home);
uml_dir++;
}
strlcat(dir, uml_dir, sizeof(dir));
@@ -243,7 +243,7 @@ int __init set_umid(char *name)
if (strlen(name) > UMID_LEN - 1)
return -E2BIG;
- strscpy(umid, name, sizeof(umid));
+ strscpy(umid, name);
return 0;
}
@@ -262,7 +262,7 @@ static int __init make_umid(void)
make_uml_dir();
if (*umid == '\0') {
- strscpy(tmp, uml_dir, sizeof(tmp));
+ strscpy(tmp, uml_dir);
strlcat(tmp, "XXXXXX", sizeof(tmp));
fd = mkstemp(tmp);
if (fd < 0) {
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 5a83da703e87..6a1f36df6a18 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -28,5 +28,7 @@ obj-y += net/
obj-$(CONFIG_KEXEC_FILE) += purgatory/
+obj-y += virt/svm/
+
# for cleaning
subdir- += boot tools
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5edec175b9bf..89afacce6951 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -100,7 +100,7 @@ config X86
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_SYSCALL_WRAPPER
- select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_UBSAN
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_ZONE_DMA_SET if EXPERT
select ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -147,6 +147,7 @@ config X86
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
+ select GENERIC_CLOCKEVENTS_BROADCAST_IDLE if GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
@@ -255,6 +256,7 @@ config X86
select HAVE_NOINSTR_VALIDATION if HAVE_OBJTOOL
select HAVE_OBJTOOL if X86_64
select HAVE_OPTPROBES
+ select HAVE_PAGE_SIZE_4KB
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI
@@ -496,6 +498,15 @@ config X86_CPU_RESCTRL
Say N if unsure.
+config X86_FRED
+ bool "Flexible Return and Event Delivery"
+ depends on X86_64
+ help
+ When enabled, try to use Flexible Return and Event Delivery
+ instead of the legacy SYSCALL/SYSENTER/IDT architecture for
+ ring transitions and exception/interrupt handling if the
+ system supports.
+
if X86_32
config X86_BIGSMP
bool "Support for big SMP systems with more than 8 CPUs"
@@ -1539,19 +1550,6 @@ config AMD_MEM_ENCRYPT
This requires an AMD processor that supports Secure Memory
Encryption (SME).
-config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
- bool "Activate AMD Secure Memory Encryption (SME) by default"
- depends on AMD_MEM_ENCRYPT
- help
- Say yes to have system memory encrypted by default if running on
- an AMD processor that supports Secure Memory Encryption (SME).
-
- If set to Y, then the encryption of system memory can be
- deactivated with the mem_encrypt=off command line option.
-
- If set to N, then the encryption of system memory can be
- activated with the mem_encrypt=on command line option.
-
# Common NUMA Features
config NUMA
bool "NUMA Memory Allocation and Scheduler Support"
@@ -2114,11 +2112,11 @@ config PHYSICAL_START
help
This gives the physical address where the kernel is loaded.
- If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
- bzImage will decompress itself to above physical address and
- run from there. Otherwise, bzImage will run from the address where
- it has been loaded by the boot loader and will ignore above physical
- address.
+ If the kernel is not relocatable (CONFIG_RELOCATABLE=n) then bzImage
+ will decompress itself to above physical address and run from there.
+ Otherwise, bzImage will run from the address where it has been loaded
+ by the boot loader. The only exception is if it is loaded below the
+ above physical address, in which case it will relocate itself there.
In normal kdump cases one does not have to set/change this option
as now bzImage can be compiled as a completely relocatable image
@@ -2434,6 +2432,18 @@ source "kernel/livepatch/Kconfig"
endmenu
+config CC_HAS_NAMED_AS
+ def_bool CC_IS_GCC && GCC_VERSION >= 120100
+
+config USE_X86_SEG_SUPPORT
+ def_bool y
+ depends on CC_HAS_NAMED_AS
+ #
+ # -fsanitize=kernel-address (KASAN) is at the moment incompatible
+ # with named address spaces - see GCC PR sanitizer/111736.
+ #
+ depends on !KASAN
+
config CC_HAS_SLS
def_bool $(cc-option,-mharden-sls=all)
@@ -2465,12 +2475,12 @@ config CALL_PADDING
config FINEIBT
def_bool y
- depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE
+ depends on X86_KERNEL_IBT && CFI_CLANG && MITIGATION_RETPOLINE
select CALL_PADDING
config HAVE_CALL_THUNKS
def_bool y
- depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL
+ depends on CC_HAS_ENTRY_PADDING && MITIGATION_RETHUNK && OBJTOOL
config CALL_THUNKS
def_bool n
@@ -2492,7 +2502,7 @@ menuconfig SPECULATION_MITIGATIONS
if SPECULATION_MITIGATIONS
-config PAGE_TABLE_ISOLATION
+config MITIGATION_PAGE_TABLE_ISOLATION
bool "Remove the kernel mapping in user mode"
default y
depends on (X86_64 || X86_PAE)
@@ -2503,7 +2513,7 @@ config PAGE_TABLE_ISOLATION
See Documentation/arch/x86/pti.rst for more details.
-config RETPOLINE
+config MITIGATION_RETPOLINE
bool "Avoid speculative indirect branches in kernel"
select OBJTOOL if HAVE_OBJTOOL
default y
@@ -2513,9 +2523,9 @@ config RETPOLINE
branches. Requires a compiler with -mindirect-branch=thunk-extern
support for full protection. The kernel may run slower.
-config RETHUNK
+config MITIGATION_RETHUNK
bool "Enable return-thunks"
- depends on RETPOLINE && CC_HAS_RETURN_THUNK
+ depends on MITIGATION_RETPOLINE && CC_HAS_RETURN_THUNK
select OBJTOOL if HAVE_OBJTOOL
default y if X86_64
help
@@ -2524,14 +2534,14 @@ config RETHUNK
Requires a compiler with -mfunction-return=thunk-extern
support for full protection. The kernel may run slower.
-config CPU_UNRET_ENTRY
+config MITIGATION_UNRET_ENTRY
bool "Enable UNRET on kernel entry"
- depends on CPU_SUP_AMD && RETHUNK && X86_64
+ depends on CPU_SUP_AMD && MITIGATION_RETHUNK && X86_64
default y
help
Compile the kernel with support for the retbleed=unret mitigation.
-config CALL_DEPTH_TRACKING
+config MITIGATION_CALL_DEPTH_TRACKING
bool "Mitigate RSB underflow with call depth tracking"
depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS
select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
@@ -2551,7 +2561,7 @@ config CALL_DEPTH_TRACKING
config CALL_THUNKS_DEBUG
bool "Enable call thunks and call depth tracking debugging"
- depends on CALL_DEPTH_TRACKING
+ depends on MITIGATION_CALL_DEPTH_TRACKING
select FUNCTION_ALIGNMENT_32B
default n
help
@@ -2562,14 +2572,14 @@ config CALL_THUNKS_DEBUG
Only enable this when you are debugging call thunks as this
creates a noticeable runtime overhead. If unsure say N.
-config CPU_IBPB_ENTRY
+config MITIGATION_IBPB_ENTRY
bool "Enable IBPB on kernel entry"
depends on CPU_SUP_AMD && X86_64
default y
help
Compile the kernel with support for the retbleed=ibpb mitigation.
-config CPU_IBRS_ENTRY
+config MITIGATION_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
depends on CPU_SUP_INTEL && X86_64
default y
@@ -2578,14 +2588,14 @@ config CPU_IBRS_ENTRY
This mitigates both spectre_v2 and retbleed at great cost to
performance.
-config CPU_SRSO
+config MITIGATION_SRSO
bool "Mitigate speculative RAS overflow on AMD"
- depends on CPU_SUP_AMD && X86_64 && RETHUNK
+ depends on CPU_SUP_AMD && X86_64 && MITIGATION_RETHUNK
default y
help
Enable the SRSO mitigation needed on AMD Zen1-4 machines.
-config SLS
+config MITIGATION_SLS
bool "Mitigate Straight-Line-Speculation"
depends on CC_HAS_SLS && X86_64
select OBJTOOL if HAVE_OBJTOOL
@@ -2595,7 +2605,7 @@ config SLS
against straight line speculation. The kernel image might be slightly
larger.
-config GDS_FORCE_MITIGATION
+config MITIGATION_GDS_FORCE
bool "Force GDS Mitigation"
depends on CPU_SUP_INTEL
default n
@@ -2614,6 +2624,17 @@ config GDS_FORCE_MITIGATION
If in doubt, say N.
+config MITIGATION_RFDS
+ bool "RFDS Mitigation"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable mitigation for Register File Data Sampling (RFDS) by default.
+ RFDS is a hardware vulnerability which affects Intel Atom CPUs. It
+ allows unprivileged speculative access to stale data previously
+ stored in floating point, vector and integer registers.
+ See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
+
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b9224cf2ee4d..2a7279d80460 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -379,7 +379,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
default "5" if X86_32 && X86_CMPXCHG64
default "4"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1a068de12a56..1eccc2ee45fb 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -22,7 +22,7 @@ RETPOLINE_VDSO_CFLAGS := -mretpoline
endif
RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix)
-ifdef CONFIG_RETHUNK
+ifdef CONFIG_MITIGATION_RETHUNK
RETHUNK_CFLAGS := -mfunction-return=thunk-extern
RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS)
endif
@@ -53,6 +53,9 @@ REALMODE_CFLAGS += -fno-stack-protector
REALMODE_CFLAGS += -Wno-address-of-packed-member
REALMODE_CFLAGS += $(cc_stack_align4)
REALMODE_CFLAGS += $(CLANG_FLAGS)
+ifdef CONFIG_CC_IS_CLANG
+REALMODE_CFLAGS += -Wno-gnu
+endif
export REALMODE_CFLAGS
# BITS is used as extension for files which are available in a 32 bit
@@ -112,13 +115,13 @@ ifeq ($(CONFIG_X86_32),y)
# temporary until string.h is fixed
KBUILD_CFLAGS += -ffreestanding
- ifeq ($(CONFIG_STACKPROTECTOR),y)
- ifeq ($(CONFIG_SMP),y)
+ ifeq ($(CONFIG_STACKPROTECTOR),y)
+ ifeq ($(CONFIG_SMP),y)
KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
- else
+ else
KBUILD_CFLAGS += -mstack-protector-guard=global
- endif
- endif
+ endif
+ endif
else
BITS := 64
UTS_MACHINE := x86_64
@@ -192,7 +195,7 @@ KBUILD_CFLAGS += -Wno-sign-compare
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
-ifdef CONFIG_RETPOLINE
+ifdef CONFIG_MITIGATION_RETPOLINE
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
# Additionally, avoid generating expensive indirect jumps which
# are subject to retpolines for small number of switch cases.
@@ -205,7 +208,7 @@ ifdef CONFIG_RETPOLINE
endif
endif
-ifdef CONFIG_SLS
+ifdef CONFIG_MITIGATION_SLS
KBUILD_CFLAGS += -mharden-sls=all
endif
@@ -296,12 +299,11 @@ install:
vdso-install-$(CONFIG_X86_64) += arch/x86/entry/vdso/vdso64.so.dbg
vdso-install-$(CONFIG_X86_X32_ABI) += arch/x86/entry/vdso/vdsox32.so.dbg
-vdso-install-$(CONFIG_X86_32) += arch/x86/entry/vdso/vdso32.so.dbg
-vdso-install-$(CONFIG_IA32_EMULATION) += arch/x86/entry/vdso/vdso32.so.dbg
+vdso-install-$(CONFIG_COMPAT_32) += arch/x86/entry/vdso/vdso32.so.dbg
archprepare: checkbin
checkbin:
-ifdef CONFIG_RETPOLINE
+ifdef CONFIG_MITIGATION_RETPOLINE
ifeq ($(RETPOLINE_CFLAGS),)
@echo "You are building kernel with non-retpoline compiler." >&2
@echo "Please update your compiler." >&2
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 18d15d1ce87d..f196b1d1ddf8 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -5,6 +5,8 @@
#include "../string.h"
#include "efi.h"
+#include <asm/bootparam.h>
+
#include <linux/numa.h>
/*
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index c1bb180973ea..e162d7f59cc5 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
+#include <asm/bootparam.h>
+
static unsigned long fs;
static inline void set_fs(unsigned long seg)
{
diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c
index 6edd034b0b30..f2e50f9758e6 100644
--- a/arch/x86/boot/compressed/efi.c
+++ b/arch/x86/boot/compressed/efi.c
@@ -7,6 +7,8 @@
#include "misc.h"
+#include <asm/bootparam.h>
+
/**
* efi_get_type - Given a pointer to boot_params, determine the type of EFI environment.
*
diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
index 866c0af8b5b9..b22300970f97 100644
--- a/arch/x86/boot/compressed/efi.h
+++ b/arch/x86/boot/compressed/efi.h
@@ -97,15 +97,6 @@ typedef struct {
u32 tables;
} efi_system_table_32_t;
-/* kexec external ABI */
-struct efi_setup_data {
- u64 fw_vendor;
- u64 __unused;
- u64 tables;
- u64 smbios;
- u64 reserved[8];
-};
-
struct efi_unaccepted_memory {
u32 version;
u32 unit_size;
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
index d040080d7edb..909f2a35b60c 100644
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -8,8 +8,8 @@
* Copyright (C) 2016 Kees Cook
*/
-/* No PAGE_TABLE_ISOLATION support needed either: */
-#undef CONFIG_PAGE_TABLE_ISOLATION
+/* No MITIGATION_PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
#include "error.h"
#include "misc.h"
@@ -389,5 +389,5 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
void do_boot_nmi_trap(struct pt_regs *regs, unsigned long error_code)
{
- /* Empty handler to ignore NMI during early boot */
+ spurious_nmi_count++;
}
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b99e08e6815b..4535242cc1b1 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -52,6 +52,7 @@ struct port_io_ops pio_ops;
memptr free_mem_ptr;
memptr free_mem_end_ptr;
+int spurious_nmi_count;
static char *vidmem;
static int vidport;
@@ -164,21 +165,34 @@ void __putstr(const char *s)
outb(0xff & (pos >> 1), vidport+1);
}
-void __puthex(unsigned long value)
+static noinline void __putnum(unsigned long value, unsigned int base,
+ int mindig)
{
- char alpha[2] = "0";
- int bits;
+ char buf[8*sizeof(value)+1];
+ char *p;
- for (bits = sizeof(value) * 8 - 4; bits >= 0; bits -= 4) {
- unsigned long digit = (value >> bits) & 0xf;
+ p = buf + sizeof(buf);
+ *--p = '\0';
- if (digit < 0xA)
- alpha[0] = '0' + digit;
- else
- alpha[0] = 'a' + (digit - 0xA);
+ while (mindig-- > 0 || value) {
+ unsigned char digit = value % base;
+ digit += (digit >= 10) ? ('a'-10) : '0';
+ *--p = digit;
- __putstr(alpha);
+ value /= base;
}
+
+ __putstr(p);
+}
+
+void __puthex(unsigned long value)
+{
+ __putnum(value, 16, sizeof(value)*2);
+}
+
+void __putdec(unsigned long value)
+{
+ __putnum(value, 10, 1);
}
#ifdef CONFIG_X86_NEED_RELOCS
@@ -358,6 +372,19 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
}
/*
+ * Set the memory encryption xloadflag based on the mem_encrypt= command line
+ * parameter, if provided.
+ */
+static void parse_mem_encrypt(struct setup_header *hdr)
+{
+ int on = cmdline_find_option_bool("mem_encrypt=on");
+ int off = cmdline_find_option_bool("mem_encrypt=off");
+
+ if (on > off)
+ hdr->xloadflags |= XLF_MEM_ENCRYPTION;
+}
+
+/*
* The compressed kernel image (ZO), has been moved so that its position
* is against the end of the buffer used to hold the uncompressed kernel
* image (VO) and the execution environment (.bss, .brk), which makes sure
@@ -387,6 +414,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
/* Clear flags intended for solely in-kernel use. */
boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG;
+ parse_mem_encrypt(&boot_params_ptr->hdr);
+
sanitize_boot_params(boot_params_ptr);
if (boot_params_ptr->screen_info.orig_video_mode == 7) {
@@ -493,10 +522,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
/* Disable exception handling before booting the kernel */
cleanup_exception_handling();
+ if (spurious_nmi_count) {
+ error_putstr("Spurious early NMIs ignored: ");
+ error_putdec(spurious_nmi_count);
+ error_putstr("\n");
+ }
+
return output + entry_offset;
}
-void fortify_panic(const char *name)
+void __fortify_panic(const u8 reason, size_t avail, size_t size)
{
error("detected buffer overflow");
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index bc2f0f17fb90..b353a7be380c 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -59,12 +59,15 @@ extern char _head[], _end[];
/* misc.c */
extern memptr free_mem_ptr;
extern memptr free_mem_end_ptr;
+extern int spurious_nmi_count;
void *malloc(int size);
void free(void *where);
void __putstr(const char *s);
void __puthex(unsigned long value);
+void __putdec(unsigned long value);
#define error_putstr(__x) __putstr(__x)
#define error_puthex(__x) __puthex(__x)
+#define error_putdec(__x) __putdec(__x)
#ifdef CONFIG_X86_VERBOSE_BOOTUP
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 51f957b24ba7..c882e1f67af0 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
+#include <asm/bootparam.h>
#include <asm/e820/types.h>
#include <asm/processor.h>
#include "pgtable.h"
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 454acd7a2daf..ec71846d28c9 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -12,6 +12,7 @@
*/
#include "misc.h"
+#include <asm/bootparam.h>
#include <asm/pgtable_types.h>
#include <asm/sev.h>
#include <asm/trapnr.h>
@@ -116,6 +117,9 @@ static bool fault_in_kernel_space(unsigned long address)
#undef __init
#define __init
+#undef __head
+#define __head
+
#define __BOOT_COMPRESSED
/* Basic instruction decoding support needed */
@@ -304,6 +308,10 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
if (result != ES_OK)
goto finish;
+ result = vc_check_opcode_bytes(&ctxt, exit_code);
+ if (result != ES_OK)
+ goto finish;
+
switch (exit_code) {
case SVM_EXIT_RDTSC:
case SVM_EXIT_RDTSCP:
@@ -365,7 +373,7 @@ static void enforce_vmpl0(void)
MSR_AMD64_SNP_VMPL_SSS | \
MSR_AMD64_SNP_SECURE_TSC | \
MSR_AMD64_SNP_VMGEXIT_PARAM | \
- MSR_AMD64_SNP_VMSA_REG_PROTECTION | \
+ MSR_AMD64_SNP_VMSA_REG_PROT | \
MSR_AMD64_SNP_RESERVED_BIT13 | \
MSR_AMD64_SNP_RESERVED_BIT15 | \
MSR_AMD64_SNP_RESERVED_MASK)
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b2771710ed98..b5c79f43359b 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -106,17 +106,12 @@ extra_header_fields:
.word 0 # MinorSubsystemVersion
.long 0 # Win32VersionValue
- .long setup_size + ZO__end + pecompat_vsize
- # SizeOfImage
+ .long setup_size + ZO__end # SizeOfImage
.long salign # SizeOfHeaders
.long 0 # CheckSum
.word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application)
-#ifdef CONFIG_EFI_DXE_MEM_ATTRIBUTES
.word IMAGE_DLL_CHARACTERISTICS_NX_COMPAT # DllCharacteristics
-#else
- .word 0 # DllCharacteristics
-#endif
#ifdef CONFIG_X86_32
.long 0 # SizeOfStackReserve
.long 0 # SizeOfStackCommit
@@ -143,7 +138,7 @@ section_table:
.ascii ".setup"
.byte 0
.byte 0
- .long setup_size - salign # VirtualSize
+ .long pecompat_fstart - salign # VirtualSize
.long salign # VirtualAddress
.long pecompat_fstart - salign # SizeOfRawData
.long salign # PointerToRawData
@@ -156,8 +151,8 @@ section_table:
#ifdef CONFIG_EFI_MIXED
.asciz ".compat"
- .long 8 # VirtualSize
- .long setup_size + ZO__end # VirtualAddress
+ .long pecompat_fsize # VirtualSize
+ .long pecompat_fstart # VirtualAddress
.long pecompat_fsize # SizeOfRawData
.long pecompat_fstart # PointerToRawData
@@ -172,17 +167,16 @@ section_table:
* modes this image supports.
*/
.pushsection ".pecompat", "a", @progbits
- .balign falign
- .set pecompat_vsize, salign
+ .balign salign
.globl pecompat_fstart
pecompat_fstart:
.byte 0x1 # Version
.byte 8 # Size
.word IMAGE_FILE_MACHINE_I386 # PE machine type
.long setup_size + ZO_efi32_pe_entry # Entrypoint
+ .byte 0x0 # Sentinel
.popsection
#else
- .set pecompat_vsize, 0
.set pecompat_fstart, setup_size
#endif
.ascii ".text"
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 83bb7efad8ae..3a2d1360abb0 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -24,6 +24,9 @@ SECTIONS
.text : { *(.text .text.*) }
.text32 : { *(.text32) }
+ .pecompat : { *(.pecompat) }
+ PROVIDE(pecompat_fsize = setup_size - pecompat_fstart);
+
. = ALIGN(16);
.rodata : { *(.rodata*) }
@@ -36,9 +39,6 @@ SECTIONS
. = ALIGN(16);
.data : { *(.data*) }
- .pecompat : { *(.pecompat) }
- PROVIDE(pecompat_fsize = setup_size - pecompat_fstart);
-
.signature : {
setup_sig = .;
LONG(0x5a5aaa55)
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index eeec9986570e..d07be9d05cd0 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -14,7 +14,7 @@
#include <asm/processor.h>
enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE;
-static u64 cc_mask __ro_after_init;
+u64 cc_mask __ro_after_init;
static bool noinstr intel_cc_platform_has(enum cc_attr attr)
{
@@ -148,8 +148,3 @@ u64 cc_mkdec(u64 val)
}
}
EXPORT_SYMBOL_GPL(cc_mkdec);
-
-__init void cc_set_mask(u64 mask)
-{
- cc_mask = mask;
-}
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 73abbbdd26f8..91801138b10b 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -42,7 +42,7 @@ CONFIG_EFI_STUB=y
CONFIG_HZ_1000=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
-# CONFIG_RETHUNK is not set
+# CONFIG_MITIGATION_RETHUNK is not set
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_PM_TRACE_RTC=y
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index ca2fe186994b..c93e7f5c2a06 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -18,6 +18,9 @@ obj-y += vdso/
obj-y += vsyscall/
obj-$(CONFIG_PREEMPTION) += thunk_$(BITS).o
+CFLAGS_entry_fred.o += -fno-stack-protector
+CFLAGS_REMOVE_entry_fred.o += -pg $(CC_FLAGS_FTRACE)
+obj-$(CONFIG_X86_FRED) += entry_64_fred.o entry_fred.o
+
obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o
obj-$(CONFIG_X86_X32_ABI) += syscall_x32.o
-
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9f1d94790a54..ea81770629ee 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -65,7 +65,7 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code:
*/
-.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
+.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 unwind_hint=1
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
@@ -87,14 +87,17 @@ For 32-bit we have the following conventions - kernel is built with
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
+
+ .if \unwind_hint
UNWIND_HINT_REGS
+ .endif
.if \save_ret
pushq %rsi /* return address on top of stack */
.endif
.endm
-.macro CLEAR_REGS
+.macro CLEAR_REGS clear_bp=1
/*
* Sanitize registers of values that a speculation attack might
* otherwise want to exploit. The lower registers are likely clobbered
@@ -109,7 +112,9 @@ For 32-bit we have the following conventions - kernel is built with
xorl %r10d, %r10d /* nospec r10 */
xorl %r11d, %r11d /* nospec r11 */
xorl %ebx, %ebx /* nospec rbx */
+ .if \clear_bp
xorl %ebp, %ebp /* nospec rbp */
+ .endif
xorl %r12d, %r12d /* nospec r12 */
xorl %r13d, %r13d /* nospec r13 */
xorl %r14d, %r14d /* nospec r14 */
@@ -117,9 +122,9 @@ For 32-bit we have the following conventions - kernel is built with
.endm
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
- PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret
- CLEAR_REGS
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_bp=1 unwind_hint=1
+ PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret unwind_hint=\unwind_hint
+ CLEAR_REGS clear_bp=\clear_bp
.endm
.macro POP_REGS pop_rdi=1
@@ -142,10 +147,10 @@ For 32-bit we have the following conventions - kernel is built with
.endif
.endm
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
- * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
+ * MITIGATION_PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
* halves:
*/
#define PTI_USER_PGTABLE_BIT PAGE_SHIFT
@@ -160,7 +165,7 @@ For 32-bit we have the following conventions - kernel is built with
.macro ADJUST_KERNEL_CR3 reg:req
ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
- /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
+ /* Clear PCID and "MITIGATION_PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
.endm
@@ -173,7 +178,7 @@ For 32-bit we have the following conventions - kernel is built with
.endm
#define THIS_CPU_user_pcid_flush_mask \
- PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+ PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask)
.macro SWITCH_TO_USER_CR3 scratch_reg:req scratch_reg2:req
mov %cr3, \scratch_reg
@@ -239,17 +244,19 @@ For 32-bit we have the following conventions - kernel is built with
.Ldone_\@:
.endm
-.macro RESTORE_CR3 scratch_reg:req save_reg:req
+/* Restore CR3 from a kernel context. May restore a user CR3 value. */
+.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
- ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
-
/*
- * KERNEL pages can always resume with NOFLUSH as we do
- * explicit flushes.
+ * If CR3 contained the kernel page tables at the paranoid exception
+ * entry, then there is nothing to restore as CR3 is not modified while
+ * handling the exception.
*/
bt $PTI_USER_PGTABLE_BIT, \save_reg
- jnc .Lnoflush_\@
+ jnc .Lend_\@
+
+ ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
/*
* Check if there's a pending flush for the user ASID we're
@@ -257,25 +264,17 @@ For 32-bit we have the following conventions - kernel is built with
*/
movq \save_reg, \scratch_reg
andq $(0x7FF), \scratch_reg
- bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
- jnc .Lnoflush_\@
-
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
- jmp .Lwrcr3_\@
+ jc .Lwrcr3_\@
-.Lnoflush_\@:
SET_NOFLUSH_BIT \save_reg
.Lwrcr3_\@:
- /*
- * The CR3 write could be avoided when not changing its value,
- * but would require a CR3 read *and* a scratch register.
- */
movq \save_reg, %cr3
.Lend_\@:
.endm
-#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
+#else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=n: */
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
.endm
@@ -285,7 +284,7 @@ For 32-bit we have the following conventions - kernel is built with
.endm
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
.endm
-.macro RESTORE_CR3 scratch_reg:req save_reg:req
+.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
.endm
#endif
@@ -303,7 +302,7 @@ For 32-bit we have the following conventions - kernel is built with
* Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
*/
.macro IBRS_ENTER save_reg
-#ifdef CONFIG_CPU_IBRS_ENTRY
+#ifdef CONFIG_MITIGATION_IBRS_ENTRY
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
movl $MSR_IA32_SPEC_CTRL, %ecx
@@ -332,7 +331,7 @@ For 32-bit we have the following conventions - kernel is built with
* regs. Must be called after the last RET.
*/
.macro IBRS_EXIT save_reg
-#ifdef CONFIG_CPU_IBRS_ENTRY
+#ifdef CONFIG_MITIGATION_IBRS_ENTRY
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
movl $MSR_IA32_SPEC_CTRL, %ecx
@@ -426,3 +425,63 @@ For 32-bit we have the following conventions - kernel is built with
.endm
#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_X86_64
+
+/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
+.macro THUNK name, func
+SYM_FUNC_START(\name)
+ pushq %rbp
+ movq %rsp, %rbp
+
+ pushq %rdi
+ pushq %rsi
+ pushq %rdx
+ pushq %rcx
+ pushq %rax
+ pushq %r8
+ pushq %r9
+ pushq %r10
+ pushq %r11
+
+ call \func
+
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+ popq %rbp
+ RET
+SYM_FUNC_END(\name)
+ _ASM_NOKPROBE(\name)
+.endm
+
+#else /* CONFIG_X86_32 */
+
+/* put return address in eax (arg1) */
+.macro THUNK name, func, put_ret_addr_in_eax=0
+SYM_CODE_START_NOALIGN(\name)
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+
+ .if \put_ret_addr_in_eax
+ /* Place EIP in the arg1 */
+ movl 3*4(%esp), %eax
+ .endif
+
+ call \func
+ popl %edx
+ popl %ecx
+ popl %eax
+ RET
+ _ASM_NOKPROBE(\name)
+SYM_CODE_END(\name)
+ .endm
+
+#endif
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 8c8d38f0cb1d..d9feadffa972 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,11 @@
#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
+
+#include "calling.h"
.pushsection .noinstr.text, "ax"
@@ -20,3 +25,24 @@ SYM_FUNC_END(entry_ibpb)
EXPORT_SYMBOL_GPL(entry_ibpb);
.popsection
+
+/*
+ * Define the VERW operand that is disguised as entry code so that
+ * it can be referenced with KPTI enabled. This ensure VERW can be
+ * used late in exit-to-user path after page tables are switched.
+ */
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+ UNWIND_HINT_UNDEFINED
+ ANNOTATE_NOENDBR
+ .word __KERNEL_DS
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
+
+THUNK warn_thunk_thunk, __warn_thunk
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index c73047bf9f4b..d3a814efbff6 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -305,7 +305,7 @@
.macro CHECK_AND_APPLY_ESPFIX
#ifdef CONFIG_X86_ESPFIX32
#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8)
-#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
+#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page + GDT_ESPFIX_OFFSET)
ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX
@@ -649,10 +649,6 @@ SYM_CODE_START_LOCAL(asm_\cfunc)
SYM_CODE_END(asm_\cfunc)
.endm
-.macro idtentry_sysvec vector cfunc
- idtentry \vector asm_\cfunc \cfunc has_error_code=0
-.endm
-
/*
* Include the defines which emit the idt entries which are shared
* shared between 32 and 64 bit and emit the __irqentry_text_* markers
@@ -885,6 +881,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
BUG_IF_WRONG_CR3 no_user_check=1
popfl
popl %eax
+ CLEAR_CPU_BUFFERS
/*
* Return back to the vDSO, which will pop ecx and edx.
@@ -954,6 +951,7 @@ restore_all_switch_stack:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
+ CLEAR_CPU_BUFFERS
.Lirq_return:
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -1146,6 +1144,7 @@ SYM_CODE_START(asm_exc_nmi)
/* Not on SYSENTER stack. */
call exc_nmi
+ CLEAR_CPU_BUFFERS
jmp .Lnmi_return
.Lnmi_from_sysenter_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c40f89ab1b4c..8af2a26b24f6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -161,6 +161,7 @@ syscall_return_via_sysret:
SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
swapgs
+ CLEAR_CPU_BUFFERS
sysretq
SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@@ -190,7 +191,7 @@ SYM_FUNC_START(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
- movq %rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary
+ movq %rbx, PER_CPU_VAR(fixed_percpu_data + FIXED_stack_canary)
#endif
/*
@@ -247,7 +248,13 @@ SYM_CODE_START(ret_from_fork_asm)
* and unwind should work normally.
*/
UNWIND_HINT_REGS
+
+#ifdef CONFIG_X86_FRED
+ ALTERNATIVE "jmp swapgs_restore_regs_and_return_to_usermode", \
+ "jmp asm_fred_exit_user", X86_FEATURE_FRED
+#else
jmp swapgs_restore_regs_and_return_to_usermode
+#endif
SYM_CODE_END(ret_from_fork_asm)
.popsection
@@ -370,14 +377,6 @@ SYM_CODE_END(\asmsym)
idtentry \vector asm_\cfunc \cfunc has_error_code=1
.endm
-/*
- * System vectors which invoke their handlers directly and are not
- * going through the regular common device interrupt handling code.
- */
-.macro idtentry_sysvec vector cfunc
- idtentry \vector asm_\cfunc \cfunc has_error_code=0
-.endm
-
/**
* idtentry_mce_db - Macro to generate entry stubs for #MC and #DB
* @vector: Vector number
@@ -562,7 +561,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
#ifdef CONFIG_XEN_PV
ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
#endif
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
ALTERNATIVE "", "jmp .Lpti_restore_regs_and_return_to_usermode", X86_FEATURE_PTI
#endif
@@ -573,12 +572,13 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
.Lswapgs_and_iret:
swapgs
+ CLEAR_CPU_BUFFERS
/* Assert that the IRET frame indicates user mode. */
testb $3, 8(%rsp)
jnz .Lnative_iret
ud2
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
.Lpti_restore_regs_and_return_to_usermode:
POP_REGS pop_rdi=0
@@ -723,6 +723,8 @@ native_irq_return_ldt:
*/
popq %rax /* Restore user RAX */
+ CLEAR_CPU_BUFFERS
+
/*
* RSP now points to an ordinary IRET frame, except that the page
* is read-only and RSP[31:16] are preloaded with the userspace
@@ -968,14 +970,14 @@ SYM_CODE_START_LOCAL(paranoid_exit)
IBRS_EXIT save_reg=%r15
/*
- * The order of operations is important. RESTORE_CR3 requires
+ * The order of operations is important. PARANOID_RESTORE_CR3 requires
* kernel GSBASE.
*
* NB to anyone to try to optimize this code: this code does
* not execute at all for exceptions from user mode. Those
* exceptions go through error_return instead.
*/
- RESTORE_CR3 scratch_reg=%rax save_reg=%r14
+ PARANOID_RESTORE_CR3 scratch_reg=%rax save_reg=%r14
/* Handle the three GSBASE cases */
ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
@@ -1096,7 +1098,7 @@ SYM_CODE_END(error_return)
*
* Registers:
* %r14: Used to save/restore the CR3 of the interrupted context
- * when PAGE_TABLE_ISOLATION is in use. Do not clobber.
+ * when MITIGATION_PAGE_TABLE_ISOLATION is in use. Do not clobber.
*/
SYM_CODE_START(asm_exc_nmi)
UNWIND_HINT_IRET_ENTRY
@@ -1404,8 +1406,7 @@ end_repeat_nmi:
/* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
IBRS_EXIT save_reg=%r15
- /* Always restore stashed CR3 value (see paranoid_entry) */
- RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+ PARANOID_RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
/*
* The above invocation of paranoid_entry stored the GSBASE
@@ -1450,6 +1451,12 @@ nmi_restore:
movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
+ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
+ * NMI in kernel after user state is restored. For an unprivileged user
+ * these conditions are hard to meet.
+ */
+
+ /*
* iretq reads the "iret" frame and exits the NMI stack in a
* single instruction. We are returning to kernel mode, so this
* cannot result in a fault. Similarly, we don't need to worry
@@ -1466,6 +1473,7 @@ SYM_CODE_START(entry_SYSCALL32_ignore)
UNWIND_HINT_END_OF_STACK
ENDBR
mov $-ENOSYS, %eax
+ CLEAR_CPU_BUFFERS
sysretl
SYM_CODE_END(entry_SYSCALL32_ignore)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index de94e2e84ecc..eabf48c4d4b4 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -270,6 +270,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
xorl %r9d, %r9d
xorl %r10d, %r10d
swapgs
+ CLEAR_CPU_BUFFERS
sysretl
SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
new file mode 100644
index 000000000000..a02bc6f3d2e6
--- /dev/null
+++ b/arch/x86/entry/entry_64_fred.S
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The actual FRED entry points.
+ */
+
+#include <linux/export.h>
+
+#include <asm/asm.h>
+#include <asm/fred.h>
+#include <asm/segment.h>
+
+#include "calling.h"
+
+ .code64
+ .section .noinstr.text, "ax"
+
+.macro FRED_ENTER
+ UNWIND_HINT_END_OF_STACK
+ ENDBR
+ PUSH_AND_CLEAR_REGS
+ movq %rsp, %rdi /* %rdi -> pt_regs */
+.endm
+
+.macro FRED_EXIT
+ UNWIND_HINT_REGS
+ POP_REGS
+.endm
+
+/*
+ * The new RIP value that FRED event delivery establishes is
+ * IA32_FRED_CONFIG & ~FFFH for events that occur in ring 3.
+ * Thus the FRED ring 3 entry point must be 4K page aligned.
+ */
+ .align 4096
+
+SYM_CODE_START_NOALIGN(asm_fred_entrypoint_user)
+ FRED_ENTER
+ call fred_entry_from_user
+SYM_INNER_LABEL(asm_fred_exit_user, SYM_L_GLOBAL)
+ FRED_EXIT
+1: ERETU
+
+ _ASM_EXTABLE_TYPE(1b, asm_fred_entrypoint_user, EX_TYPE_ERETU)
+SYM_CODE_END(asm_fred_entrypoint_user)
+
+/*
+ * The new RIP value that FRED event delivery establishes is
+ * (IA32_FRED_CONFIG & ~FFFH) + 256 for events that occur in
+ * ring 0, i.e., asm_fred_entrypoint_user + 256.
+ */
+ .org asm_fred_entrypoint_user + 256, 0xcc
+SYM_CODE_START_NOALIGN(asm_fred_entrypoint_kernel)
+ FRED_ENTER
+ call fred_entry_from_kernel
+ FRED_EXIT
+ ERETS
+SYM_CODE_END(asm_fred_entrypoint_kernel)
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+SYM_FUNC_START(asm_fred_entry_from_kvm)
+ push %rbp
+ mov %rsp, %rbp
+
+ UNWIND_HINT_SAVE
+
+ /*
+ * Both IRQ and NMI from VMX can be handled on current task stack
+ * because there is no need to protect from reentrancy and the call
+ * stack leading to this helper is effectively constant and shallow
+ * (relatively speaking). Do the same when FRED is active, i.e., no
+ * need to check current stack level for a stack switch.
+ *
+ * Emulate the FRED-defined redzone and stack alignment.
+ */
+ sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
+ and $FRED_STACK_FRAME_RSP_MASK, %rsp
+
+ /*
+ * Start to push a FRED stack frame, which is always 64 bytes:
+ *
+ * +--------+-----------------+
+ * | Bytes | Usage |
+ * +--------+-----------------+
+ * | 63:56 | Reserved |
+ * | 55:48 | Event Data |
+ * | 47:40 | SS + Event Info |
+ * | 39:32 | RSP |
+ * | 31:24 | RFLAGS |
+ * | 23:16 | CS + Aux Info |
+ * | 15:8 | RIP |
+ * | 7:0 | Error Code |
+ * +--------+-----------------+
+ */
+ push $0 /* Reserved, must be 0 */
+ push $0 /* Event data, 0 for IRQ/NMI */
+ push %rdi /* fred_ss handed in by the caller */
+ push %rbp
+ pushf
+ mov $__KERNEL_CS, %rax
+ push %rax
+
+ /*
+ * Unlike the IDT event delivery, FRED _always_ pushes an error code
+ * after pushing the return RIP, thus the CALL instruction CANNOT be
+ * used here to push the return RIP, otherwise there is no chance to
+ * push an error code before invoking the IRQ/NMI handler.
+ *
+ * Use LEA to get the return RIP and push it, then push an error code.
+ */
+ lea 1f(%rip), %rax
+ push %rax /* Return RIP */
+ push $0 /* Error code, 0 for IRQ/NMI */
+
+ PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0
+ movq %rsp, %rdi /* %rdi -> pt_regs */
+ call __fred_entry_from_kvm /* Call the C entry point */
+ POP_REGS
+ ERETS
+1:
+ /*
+ * Objtool doesn't understand what ERETS does, this hint tells it that
+ * yes, we'll reach here and with what stack state. A save/restore pair
+ * isn't strictly needed, but it's the simplest form.
+ */
+ UNWIND_HINT_RESTORE
+ pop %rbp
+ RET
+
+SYM_FUNC_END(asm_fred_entry_from_kvm)
+EXPORT_SYMBOL_GPL(asm_fred_entry_from_kvm);
+#endif
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
new file mode 100644
index 000000000000..ac120cbdaaf2
--- /dev/null
+++ b/arch/x86/entry/entry_fred.c
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The FRED specific kernel/user entry functions which are invoked from
+ * assembly code and dispatch to the associated handlers.
+ */
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/nospec.h>
+
+#include <asm/desc.h>
+#include <asm/fred.h>
+#include <asm/idtentry.h>
+#include <asm/syscall.h>
+#include <asm/trapnr.h>
+#include <asm/traps.h>
+
+/* FRED EVENT_TYPE_OTHER vector numbers */
+#define FRED_SYSCALL 1
+#define FRED_SYSENTER 2
+
+static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code)
+{
+ irqentry_state_t irq_state = irqentry_nmi_enter(regs);
+
+ instrumentation_begin();
+
+ /* Panic on events from a high stack level */
+ if (regs->fred_cs.sl > 0) {
+ pr_emerg("PANIC: invalid or fatal FRED event; event type %u "
+ "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
+ regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax,
+ fred_event_data(regs), regs->cs, regs->ip);
+ die("invalid or fatal FRED event", regs, regs->orig_ax);
+ panic("invalid or fatal FRED event");
+ } else {
+ unsigned long flags = oops_begin();
+ int sig = SIGKILL;
+
+ pr_alert("BUG: invalid or fatal FRED event; event type %u "
+ "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
+ regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax,
+ fred_event_data(regs), regs->cs, regs->ip);
+
+ if (__die("Invalid or fatal FRED event", regs, regs->orig_ax))
+ sig = 0;
+
+ oops_end(flags, regs, sig);
+ }
+
+ instrumentation_end();
+ irqentry_nmi_exit(regs, irq_state);
+}
+
+static noinstr void fred_intx(struct pt_regs *regs)
+{
+ switch (regs->fred_ss.vector) {
+ /* Opcode 0xcd, 0x3, NOT INT3 (opcode 0xcc) */
+ case X86_TRAP_BP:
+ return exc_int3(regs);
+
+ /* Opcode 0xcd, 0x4, NOT INTO (opcode 0xce) */
+ case X86_TRAP_OF:
+ return exc_overflow(regs);
+
+#ifdef CONFIG_IA32_EMULATION
+ /* INT80 */
+ case IA32_SYSCALL_VECTOR:
+ if (ia32_enabled())
+ return int80_emulation(regs);
+ fallthrough;
+#endif
+
+ default:
+ return exc_general_protection(regs, 0);
+ }
+}
+
+static __always_inline void fred_other(struct pt_regs *regs)
+{
+ /* The compiler can fold these conditions into a single test */
+ if (likely(regs->fred_ss.vector == FRED_SYSCALL && regs->fred_ss.lm)) {
+ regs->orig_ax = regs->ax;
+ regs->ax = -ENOSYS;
+ do_syscall_64(regs, regs->orig_ax);
+ return;
+ } else if (ia32_enabled() &&
+ likely(regs->fred_ss.vector == FRED_SYSENTER && !regs->fred_ss.lm)) {
+ regs->orig_ax = regs->ax;
+ regs->ax = -ENOSYS;
+ do_fast_syscall_32(regs);
+ return;
+ } else {
+ exc_invalid_op(regs);
+ return;
+ }
+}
+
+#define SYSVEC(_vector, _function) [_vector - FIRST_SYSTEM_VECTOR] = fred_sysvec_##_function
+
+static idtentry_t sysvec_table[NR_SYSTEM_VECTORS] __ro_after_init = {
+ SYSVEC(ERROR_APIC_VECTOR, error_interrupt),
+ SYSVEC(SPURIOUS_APIC_VECTOR, spurious_apic_interrupt),
+ SYSVEC(LOCAL_TIMER_VECTOR, apic_timer_interrupt),
+ SYSVEC(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi),
+
+ SYSVEC(RESCHEDULE_VECTOR, reschedule_ipi),
+ SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, call_function_single),
+ SYSVEC(CALL_FUNCTION_VECTOR, call_function),
+ SYSVEC(REBOOT_VECTOR, reboot),
+
+ SYSVEC(THRESHOLD_APIC_VECTOR, threshold),
+ SYSVEC(DEFERRED_ERROR_VECTOR, deferred_error),
+ SYSVEC(THERMAL_APIC_VECTOR, thermal),
+
+ SYSVEC(IRQ_WORK_VECTOR, irq_work),
+
+ SYSVEC(POSTED_INTR_VECTOR, kvm_posted_intr_ipi),
+ SYSVEC(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
+ SYSVEC(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
+};
+
+static bool fred_setup_done __initdata;
+
+void __init fred_install_sysvec(unsigned int sysvec, idtentry_t handler)
+{
+ if (WARN_ON_ONCE(sysvec < FIRST_SYSTEM_VECTOR))
+ return;
+
+ if (WARN_ON_ONCE(fred_setup_done))
+ return;
+
+ if (!WARN_ON_ONCE(sysvec_table[sysvec - FIRST_SYSTEM_VECTOR]))
+ sysvec_table[sysvec - FIRST_SYSTEM_VECTOR] = handler;
+}
+
+static noinstr void fred_handle_spurious_interrupt(struct pt_regs *regs)
+{
+ spurious_interrupt(regs, regs->fred_ss.vector);
+}
+
+void __init fred_complete_exception_setup(void)
+{
+ unsigned int vector;
+
+ for (vector = 0; vector < FIRST_EXTERNAL_VECTOR; vector++)
+ set_bit(vector, system_vectors);
+
+ for (vector = 0; vector < NR_SYSTEM_VECTORS; vector++) {
+ if (sysvec_table[vector])
+ set_bit(vector + FIRST_SYSTEM_VECTOR, system_vectors);
+ else
+ sysvec_table[vector] = fred_handle_spurious_interrupt;
+ }
+ fred_setup_done = true;
+}
+
+static noinstr void fred_extint(struct pt_regs *regs)
+{
+ unsigned int vector = regs->fred_ss.vector;
+ unsigned int index = array_index_nospec(vector - FIRST_SYSTEM_VECTOR,
+ NR_SYSTEM_VECTORS);
+
+ if (WARN_ON_ONCE(vector < FIRST_EXTERNAL_VECTOR))
+ return;
+
+ if (likely(vector >= FIRST_SYSTEM_VECTOR)) {
+ irqentry_state_t state = irqentry_enter(regs);
+
+ instrumentation_begin();
+ sysvec_table[index](regs);
+ instrumentation_end();
+ irqentry_exit(regs, state);
+ } else {
+ common_interrupt(regs, vector);
+ }
+}
+
+static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code)
+{
+ /* Optimize for #PF. That's the only exception which matters performance wise */
+ if (likely(regs->fred_ss.vector == X86_TRAP_PF))
+ return exc_page_fault(regs, error_code);
+
+ switch (regs->fred_ss.vector) {
+ case X86_TRAP_DE: return exc_divide_error(regs);
+ case X86_TRAP_DB: return fred_exc_debug(regs);
+ case X86_TRAP_BR: return exc_bounds(regs);
+ case X86_TRAP_UD: return exc_invalid_op(regs);
+ case X86_TRAP_NM: return exc_device_not_available(regs);
+ case X86_TRAP_DF: return exc_double_fault(regs, error_code);
+ case X86_TRAP_TS: return exc_invalid_tss(regs, error_code);
+ case X86_TRAP_NP: return exc_segment_not_present(regs, error_code);
+ case X86_TRAP_SS: return exc_stack_segment(regs, error_code);
+ case X86_TRAP_GP: return exc_general_protection(regs, error_code);
+ case X86_TRAP_MF: return exc_coprocessor_error(regs);
+ case X86_TRAP_AC: return exc_alignment_check(regs, error_code);
+ case X86_TRAP_XF: return exc_simd_coprocessor_error(regs);
+
+#ifdef CONFIG_X86_MCE
+ case X86_TRAP_MC: return fred_exc_machine_check(regs);
+#endif
+#ifdef CONFIG_INTEL_TDX_GUEST
+ case X86_TRAP_VE: return exc_virtualization_exception(regs);
+#endif
+#ifdef CONFIG_X86_CET
+ case X86_TRAP_CP: return exc_control_protection(regs, error_code);
+#endif
+ default: return fred_bad_type(regs, error_code);
+ }
+
+}
+
+static noinstr void fred_swexc(struct pt_regs *regs, unsigned long error_code)
+{
+ switch (regs->fred_ss.vector) {
+ case X86_TRAP_BP: return exc_int3(regs);
+ case X86_TRAP_OF: return exc_overflow(regs);
+ default: return fred_bad_type(regs, error_code);
+ }
+}
+
+__visible noinstr void fred_entry_from_user(struct pt_regs *regs)
+{
+ unsigned long error_code = regs->orig_ax;
+
+ /* Invalidate orig_ax so that syscall_get_nr() works correctly */
+ regs->orig_ax = -1;
+
+ switch (regs->fred_ss.type) {
+ case EVENT_TYPE_EXTINT:
+ return fred_extint(regs);
+ case EVENT_TYPE_NMI:
+ if (likely(regs->fred_ss.vector == X86_TRAP_NMI))
+ return fred_exc_nmi(regs);
+ break;
+ case EVENT_TYPE_HWEXC:
+ return fred_hwexc(regs, error_code);
+ case EVENT_TYPE_SWINT:
+ return fred_intx(regs);
+ case EVENT_TYPE_PRIV_SWEXC:
+ if (likely(regs->fred_ss.vector == X86_TRAP_DB))
+ return fred_exc_debug(regs);
+ break;
+ case EVENT_TYPE_SWEXC:
+ return fred_swexc(regs, error_code);
+ case EVENT_TYPE_OTHER:
+ return fred_other(regs);
+ default: break;
+ }
+
+ return fred_bad_type(regs, error_code);
+}
+
+__visible noinstr void fred_entry_from_kernel(struct pt_regs *regs)
+{
+ unsigned long error_code = regs->orig_ax;
+
+ /* Invalidate orig_ax so that syscall_get_nr() works correctly */
+ regs->orig_ax = -1;
+
+ switch (regs->fred_ss.type) {
+ case EVENT_TYPE_EXTINT:
+ return fred_extint(regs);
+ case EVENT_TYPE_NMI:
+ if (likely(regs->fred_ss.vector == X86_TRAP_NMI))
+ return fred_exc_nmi(regs);
+ break;
+ case EVENT_TYPE_HWEXC:
+ return fred_hwexc(regs, error_code);
+ case EVENT_TYPE_PRIV_SWEXC:
+ if (likely(regs->fred_ss.vector == X86_TRAP_DB))
+ return fred_exc_debug(regs);
+ break;
+ case EVENT_TYPE_SWEXC:
+ return fred_swexc(regs, error_code);
+ default: break;
+ }
+
+ return fred_bad_type(regs, error_code);
+}
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+__visible noinstr void __fred_entry_from_kvm(struct pt_regs *regs)
+{
+ switch (regs->fred_ss.type) {
+ case EVENT_TYPE_EXTINT:
+ return fred_extint(regs);
+ case EVENT_TYPE_NMI:
+ return fred_exc_nmi(regs);
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+#endif
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index 0103e103a657..da37f42f4549 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -4,33 +4,15 @@
* Copyright 2008 by Steven Rostedt, Red Hat, Inc
* (inspired by Andi Kleen's thunk_64.S)
*/
- #include <linux/export.h>
- #include <linux/linkage.h>
- #include <asm/asm.h>
- /* put return address in eax (arg1) */
- .macro THUNK name, func, put_ret_addr_in_eax=0
-SYM_CODE_START_NOALIGN(\name)
- pushl %eax
- pushl %ecx
- pushl %edx
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <asm/asm.h>
- .if \put_ret_addr_in_eax
- /* Place EIP in the arg1 */
- movl 3*4(%esp), %eax
- .endif
+#include "calling.h"
- call \func
- popl %edx
- popl %ecx
- popl %eax
- RET
- _ASM_NOKPROBE(\name)
-SYM_CODE_END(\name)
- .endm
-
- THUNK preempt_schedule_thunk, preempt_schedule
- THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
- EXPORT_SYMBOL(preempt_schedule_thunk)
- EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
+THUNK preempt_schedule_thunk, preempt_schedule
+THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
+EXPORT_SYMBOL(preempt_schedule_thunk)
+EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 416b400f39db..119ebdc3d362 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -9,39 +9,6 @@
#include "calling.h"
#include <asm/asm.h>
- /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
- .macro THUNK name, func
-SYM_FUNC_START(\name)
- pushq %rbp
- movq %rsp, %rbp
-
- pushq %rdi
- pushq %rsi
- pushq %rdx
- pushq %rcx
- pushq %rax
- pushq %r8
- pushq %r9
- pushq %r10
- pushq %r11
-
- call \func
-
- popq %r11
- popq %r10
- popq %r9
- popq %r8
- popq %rax
- popq %rcx
- popq %rdx
- popq %rsi
- popq %rdi
- popq %rbp
- RET
-SYM_FUNC_END(\name)
- _ASM_NOKPROBE(\name)
- .endm
-
THUNK preempt_schedule_thunk, preempt_schedule
THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
EXPORT_SYMBOL(preempt_schedule_thunk)
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index b1b8dd1608f7..620f6257bbe9 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -3,7 +3,7 @@
# Building vDSO images for x86.
#
-# Include the generic Makefile to check the built vdso.
+# Include the generic Makefile to check the built vDSO:
include $(srctree)/lib/vdso/Makefile
# Sanitizer runtimes are unavailable and cannot be linked here.
@@ -18,48 +18,39 @@ OBJECT_FILES_NON_STANDARD := y
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
KCOV_INSTRUMENT := n
-VDSO64-$(CONFIG_X86_64) := y
-VDSOX32-$(CONFIG_X86_X32_ABI) := y
-VDSO32-$(CONFIG_X86_32) := y
-VDSO32-$(CONFIG_IA32_EMULATION) := y
-
-# files to link into the vdso
+# Files to link into the vDSO:
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
vobjs32-y += vdso32/vclock_gettime.o vdso32/vgetcpu.o
vobjs-$(CONFIG_X86_SGX) += vsgx.o
-# files to link into kernel
-obj-y += vma.o extable.o
-KASAN_SANITIZE_vma.o := y
-UBSAN_SANITIZE_vma.o := y
-KCSAN_SANITIZE_vma.o := y
-OBJECT_FILES_NON_STANDARD_vma.o := n
-OBJECT_FILES_NON_STANDARD_extable.o := n
+# Files to link into the kernel:
+obj-y += vma.o extable.o
+KASAN_SANITIZE_vma.o := y
+UBSAN_SANITIZE_vma.o := y
+KCSAN_SANITIZE_vma.o := y
+
+OBJECT_FILES_NON_STANDARD_vma.o := n
+OBJECT_FILES_NON_STANDARD_extable.o := n
-# vDSO images to build
-vdso_img-$(VDSO64-y) += 64
-vdso_img-$(VDSOX32-y) += x32
-vdso_img-$(VDSO32-y) += 32
+# vDSO images to build:
+obj-$(CONFIG_X86_64) += vdso-image-64.o
+obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o
+obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o
-obj-$(VDSO32-y) += vdso32-setup.o
-OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n
+OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n
+OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n
+OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n
-vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
-vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F)
+vobjs := $(addprefix $(obj)/, $(vobjs-y))
+vobjs32 := $(addprefix $(obj)/, $(vobjs32-y))
$(obj)/vdso.o: $(obj)/vdso.so
targets += vdso.lds $(vobjs-y)
targets += vdso32/vdso32.lds $(vobjs32-y)
-# Build the vDSO image C files and link them in.
-vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
-vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
-vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
-obj-y += $(vdso_img_objs)
-targets += $(vdso_img_cfiles)
-targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
+targets += $(foreach x, 64 x32 32, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg)
CPPFLAGS_vdso.lds += -P -C
@@ -87,7 +78,7 @@ CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
-fno-omit-frame-pointer -foptimize-sibling-calls \
-DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
-ifdef CONFIG_RETPOLINE
+ifdef CONFIG_MITIGATION_RETPOLINE
ifneq ($(RETPOLINE_VDSO_CFLAGS),)
CFL += $(RETPOLINE_VDSO_CFLAGS)
endif
@@ -123,7 +114,7 @@ VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \
vobjx32s-y := $(vobjs-y:.o=-x32.o)
# same thing, but in the output directory
-vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
+vobjx32s := $(addprefix $(obj)/, $(vobjx32s-y))
# Convert 64bit object file to x32 for x32 vDSO.
quiet_cmd_x32 = X32 $@
@@ -164,7 +155,7 @@ KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-ifdef CONFIG_RETPOLINE
+ifdef CONFIG_MITIGATION_RETPOLINE
ifneq ($(RETPOLINE_VDSO_CFLAGS),)
KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
endif
@@ -190,5 +181,3 @@ GCOV_PROFILE := n
quiet_cmd_vdso_and_check = VDSO $@
cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
-
-clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 7645730dc228..6d83ceb7f1ba 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -274,59 +274,6 @@ up_fail:
return ret;
}
-#ifdef CONFIG_X86_64
-/*
- * Put the vdso above the (randomized) stack with another randomized
- * offset. This way there is no hole in the middle of address space.
- * To save memory make sure it is still in the same PTE as the stack
- * top. This doesn't give that many random bits.
- *
- * Note that this algorithm is imperfect: the distribution of the vdso
- * start address within a PMD is biased toward the end.
- *
- * Only used for the 64-bit and x32 vdsos.
- */
-static unsigned long vdso_addr(unsigned long start, unsigned len)
-{
- unsigned long addr, end;
- unsigned offset;
-
- /*
- * Round up the start address. It can start out unaligned as a result
- * of stack start randomization.
- */
- start = PAGE_ALIGN(start);
-
- /* Round the lowest possible end address up to a PMD boundary. */
- end = (start + len + PMD_SIZE - 1) & PMD_MASK;
- if (end >= DEFAULT_MAP_WINDOW)
- end = DEFAULT_MAP_WINDOW;
- end -= len;
-
- if (end > start) {
- offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1);
- addr = start + (offset << PAGE_SHIFT);
- } else {
- addr = start;
- }
-
- /*
- * Forcibly align the final address in case we have a hardware
- * issue that requires alignment for performance reasons.
- */
- addr = align_vdso_addr(addr);
-
- return addr;
-}
-
-static int map_vdso_randomized(const struct vdso_image *image)
-{
- unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start);
-
- return map_vdso(image, addr);
-}
-#endif
-
int map_vdso_once(const struct vdso_image *image, unsigned long addr)
{
struct mm_struct *mm = current->mm;
@@ -369,7 +316,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (!vdso64_enabled)
return 0;
- return map_vdso_randomized(&vdso_image_64);
+ return map_vdso(&vdso_image_64, 0);
}
#ifdef CONFIG_COMPAT
@@ -380,7 +327,7 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
if (x32) {
if (!vdso64_enabled)
return 0;
- return map_vdso_randomized(&vdso_image_x32);
+ return map_vdso(&vdso_image_x32, 0);
}
#endif
#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index e0ca8120aea8..a3c0df11d0e6 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -76,7 +76,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
if (!show_unhandled_signals)
return;
- printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
+ printk_ratelimited("%s%s[%d] %s ip:%lx cs:%x sp:%lx ax:%lx si:%lx di:%lx\n",
level, current->comm, task_pid_nr(current),
message, regs->ip, regs->cs,
regs->sp, regs->ax, regs->si, regs->di);
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 81f6d8275b6b..69a3b02e50bb 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -579,7 +579,7 @@ static void amd_pmu_cpu_starting(int cpu)
if (!x86_pmu.amd_nb_constraints)
return;
- nb_id = topology_die_id(cpu);
+ nb_id = topology_amd_node_id(cpu);
WARN_ON_ONCE(nb_id == BAD_APICID);
for_each_online_cpu(i) {
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 5bf03c575812..4ccb8fa483e6 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -71,7 +71,7 @@ union amd_uncore_info {
};
struct amd_uncore {
- union amd_uncore_info * __percpu info;
+ union amd_uncore_info __percpu *info;
struct amd_uncore_pmu *pmus;
unsigned int num_pmus;
bool init_done;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 3804f21ab049..768d1414897f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -17,6 +17,7 @@
#include <linux/kvm_host.h>
#include <asm/cpufeature.h>
+#include <asm/debugreg.h>
#include <asm/hardirq.h>
#include <asm/intel-family.h>
#include <asm/intel_pt.h>
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 4b50a3a9818a..326c8cd5aa2d 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -834,7 +834,7 @@ static int __init cstate_init(void)
}
if (has_cstate_pkg) {
- if (topology_max_die_per_package() > 1) {
+ if (topology_max_dies_per_package() > 1) {
err = perf_pmu_register(&cstate_pkg_pmu,
"cstate_die", -1);
} else {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index d49d661ec0a7..2641ba620f12 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -5,6 +5,7 @@
#include <linux/sched/clock.h>
#include <asm/cpu_entry_area.h>
+#include <asm/debugreg.h>
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/insn.h>
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 7927c0b832fa..258e2cdf28fa 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1893,7 +1893,7 @@ static int __init intel_uncore_init(void)
return -ENODEV;
__uncore_max_dies =
- topology_max_packages() * topology_max_die_per_package();
+ topology_max_packages() * topology_max_dies_per_package();
id = x86_match_cpu(intel_uncore_match);
if (!id) {
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
index 56eea2c66cfb..92da8aaa5966 100644
--- a/arch/x86/events/intel/uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -1221,8 +1221,8 @@ void nhmex_uncore_cpu_init(void)
uncore_nhmex = true;
else
nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
- if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (nhmex_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ nhmex_uncore_cbox.num_boxes = topology_num_cores_per_package();
uncore_msr_uncores = nhmex_msr_uncores;
}
/* end of Nehalem-EX uncore support */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 7fd4334e12a1..9462fd9f3b7a 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -364,8 +364,8 @@ static struct intel_uncore_type *snb_msr_uncores[] = {
void snb_uncore_cpu_init(void)
{
uncore_msr_uncores = snb_msr_uncores;
- if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (snb_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ snb_uncore_cbox.num_boxes = topology_num_cores_per_package();
}
static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
@@ -428,8 +428,8 @@ static struct intel_uncore_type *skl_msr_uncores[] = {
void skl_uncore_cpu_init(void)
{
uncore_msr_uncores = skl_msr_uncores;
- if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (skl_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ skl_uncore_cbox.num_boxes = topology_num_cores_per_package();
snb_uncore_arb.ops = &skl_uncore_msr_ops;
}
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index a96496bef678..2eaf0f339849 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1172,8 +1172,8 @@ static struct intel_uncore_type *snbep_msr_uncores[] = {
void snbep_uncore_cpu_init(void)
{
- if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (snbep_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ snbep_uncore_cbox.num_boxes = topology_num_cores_per_package();
uncore_msr_uncores = snbep_msr_uncores;
}
@@ -1406,7 +1406,7 @@ static int topology_gidnid_map(int nodeid, u32 gidnid)
*/
for (i = 0; i < 8; i++) {
if (nodeid == GIDNIDMAP(gidnid, i)) {
- if (topology_max_die_per_package() > 1)
+ if (topology_max_dies_per_package() > 1)
die_id = i;
else
die_id = topology_phys_to_logical_pkg(i);
@@ -1845,8 +1845,8 @@ static struct intel_uncore_type *ivbep_msr_uncores[] = {
void ivbep_uncore_cpu_init(void)
{
- if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (ivbep_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ ivbep_uncore_cbox.num_boxes = topology_num_cores_per_package();
uncore_msr_uncores = ivbep_msr_uncores;
}
@@ -2917,8 +2917,8 @@ static bool hswep_has_limit_sbox(unsigned int device)
void hswep_uncore_cpu_init(void)
{
- if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (hswep_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ hswep_uncore_cbox.num_boxes = topology_num_cores_per_package();
/* Detect 6-8 core systems with only two SBOXes */
if (hswep_has_limit_sbox(HSWEP_PCU_DID))
@@ -3280,8 +3280,8 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {
void bdx_uncore_cpu_init(void)
{
- if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (bdx_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ bdx_uncore_cbox.num_boxes = topology_num_cores_per_package();
uncore_msr_uncores = bdx_msr_uncores;
/* Detect systems with no SBOXes */
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 8d98d468b976..fb2b1961e5a3 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -674,7 +674,7 @@ static const struct attribute_group *rapl_attr_update[] = {
static int __init init_rapl_pmus(void)
{
- int maxdie = topology_max_packages() * topology_max_die_per_package();
+ int maxdie = topology_max_packages() * topology_max_dies_per_package();
size_t size;
size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *);
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 96e6c51515f5..edd2f35b2a5e 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -16,6 +16,11 @@
extern struct boot_params boot_params;
static struct real_mode_header hv_vtl_real_mode_header;
+static bool __init hv_vtl_msi_ext_dest_id(void)
+{
+ return true;
+}
+
void __init hv_vtl_init_platform(void)
{
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
@@ -26,8 +31,9 @@ void __init hv_vtl_init_platform(void)
x86_init.timers.timer_init = x86_init_noop;
/* Avoid searching for BIOS MP tables */
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.mpparse.early_parse_smp_cfg = x86_init_noop;
+ x86_init.mpparse.parse_smp_cfg = x86_init_noop;
x86_platform.get_wallclock = get_rtc_noop;
x86_platform.set_wallclock = set_rtc_noop;
@@ -38,6 +44,8 @@ void __init hv_vtl_init_platform(void)
x86_platform.legacy.warm_reset = 0;
x86_platform.legacy.reserve_bios_regions = 0;
x86_platform.legacy.devices.pnpbios = 0;
+
+ x86_init.hyper.msi_ext_dest_id = hv_vtl_msi_ext_dest_id;
}
static inline u64 hv_vtl_system_desc_base(struct ldttss_desc *desc)
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 7dcbf153ad72..768d73de0d09 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -15,6 +15,7 @@
#include <asm/io.h>
#include <asm/coco.h>
#include <asm/mem_encrypt.h>
+#include <asm/set_memory.h>
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/mtrr.h>
@@ -503,6 +504,31 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
}
/*
+ * When transitioning memory between encrypted and decrypted, the caller
+ * of set_memory_encrypted() or set_memory_decrypted() is responsible for
+ * ensuring that the memory isn't in use and isn't referenced while the
+ * transition is in progress. The transition has multiple steps, and the
+ * memory is in an inconsistent state until all steps are complete. A
+ * reference while the state is inconsistent could result in an exception
+ * that can't be cleanly fixed up.
+ *
+ * But the Linux kernel load_unaligned_zeropad() mechanism could cause a
+ * stray reference that can't be prevented by the caller, so Linux has
+ * specific code to handle this case. But when the #VC and #VE exceptions
+ * routed to a paravisor, the specific code doesn't work. To avoid this
+ * problem, mark the pages as "not present" while the transition is in
+ * progress. If load_unaligned_zeropad() causes a stray reference, a normal
+ * page fault is generated instead of #VC or #VE, and the page-fault-based
+ * handlers for load_unaligned_zeropad() resolve the reference. When the
+ * transition is complete, hv_vtom_set_host_visibility() marks the pages
+ * as "present" again.
+ */
+static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
+{
+ return !set_memory_np(kbuffer, pagecount);
+}
+
+/*
* hv_vtom_set_host_visibility - Set specified memory visible to host.
*
* In Isolation VM, all guest memory is encrypted from host and guest
@@ -515,16 +541,28 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
enum hv_mem_host_visibility visibility = enc ?
VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE;
u64 *pfn_array;
+ phys_addr_t paddr;
+ void *vaddr;
int ret = 0;
bool result = true;
int i, pfn;
pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
- if (!pfn_array)
- return false;
+ if (!pfn_array) {
+ result = false;
+ goto err_set_memory_p;
+ }
for (i = 0, pfn = 0; i < pagecount; i++) {
- pfn_array[pfn] = virt_to_hvpfn((void *)kbuffer + i * HV_HYP_PAGE_SIZE);
+ /*
+ * Use slow_virt_to_phys() because the PRESENT bit has been
+ * temporarily cleared in the PTEs. slow_virt_to_phys() works
+ * without the PRESENT bit while virt_to_hvpfn() or similar
+ * does not.
+ */
+ vaddr = (void *)kbuffer + (i * HV_HYP_PAGE_SIZE);
+ paddr = slow_virt_to_phys(vaddr);
+ pfn_array[pfn] = paddr >> HV_HYP_PAGE_SHIFT;
pfn++;
if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
@@ -538,14 +576,30 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
}
}
- err_free_pfn_array:
+err_free_pfn_array:
kfree(pfn_array);
+
+err_set_memory_p:
+ /*
+ * Set the PTE PRESENT bits again to revert what hv_vtom_clear_present()
+ * did. Do this even if there is an error earlier in this function in
+ * order to avoid leaving the memory range in a "broken" state. Setting
+ * the PRESENT bits shouldn't fail, but return an error if it does.
+ */
+ if (set_memory_p(kbuffer, pagecount))
+ result = false;
+
return result;
}
static bool hv_vtom_tlb_flush_required(bool private)
{
- return true;
+ /*
+ * Since hv_vtom_clear_present() marks the PTEs as "not present"
+ * and flushes the TLB, they can't be in the TLB. That makes the
+ * flush controlled by this function redundant, so return "false".
+ */
+ return false;
}
static bool hv_vtom_cache_flush_required(void)
@@ -608,6 +662,7 @@ void __init hv_vtom_init(void)
x86_platform.hyper.is_private_mmio = hv_is_private_mmio;
x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required;
x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required;
+ x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present;
x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
/* Set WB as the default cache mode. */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 9d159b771dc8..94ce0f7c9d3a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -46,6 +46,10 @@ extern void x86_32_probe_apic(void);
static inline void x86_32_probe_apic(void) { }
#endif
+extern u32 cpuid_to_apicid[];
+
+#define CPU_ACPIID_INVALID U32_MAX
+
#ifdef CONFIG_X86_LOCAL_APIC
extern int apic_verbosity;
@@ -54,8 +58,6 @@ extern int local_apic_timer_c2_ok;
extern bool apic_is_disabled;
extern unsigned int lapic_timer_period;
-extern u32 cpuid_to_apicid[];
-
extern enum apic_intr_mode_id apic_intr_mode;
enum apic_intr_mode_id {
APIC_PIC,
@@ -169,6 +171,14 @@ extern bool apic_needs_pit(void);
extern void apic_send_IPI_allbutself(unsigned int vector);
+extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present);
+extern void topology_register_boot_apic(u32 apic_id);
+extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id);
+extern void topology_hotunplug_apic(unsigned int cpu);
+extern void topology_apply_cmdline_limits_early(void);
+extern void topology_init_possible_cpus(void);
+extern void topology_reset_possible_cpus_up(void);
+
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
@@ -183,6 +193,8 @@ static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
static inline bool apic_needs_pit(void) { return true; }
+static inline void topology_apply_cmdline_limits_early(void) { }
+static inline void topology_init_possible_cpus(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
@@ -289,16 +301,11 @@ struct apic {
/* Probe, setup and smpboot functions */
int (*probe)(void);
int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
- bool (*apic_id_registered)(void);
- bool (*check_apicid_used)(physid_mask_t *map, u32 apicid);
void (*init_apic_ldr)(void);
- void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
u32 (*cpu_present_to_apicid)(int mps_cpu);
- u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb);
u32 (*get_apic_id)(u32 id);
- u32 (*set_apic_id)(u32 apicid);
/* wakeup_secondary_cpu */
int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip);
@@ -527,7 +534,6 @@ extern int default_apic_id_valid(u32 apicid);
extern u32 apic_default_calc_apicid(unsigned int cpu);
extern u32 apic_flat_calc_apicid(unsigned int cpu);
-extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap);
extern u32 default_cpu_present_to_apicid(int mps_cpu);
void apic_send_nmi_to_offline_cpu(unsigned int cpu);
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index b1a98fa38828..076bf8dee702 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -12,6 +12,7 @@
#include <asm/special_insns.h>
#include <asm/preempt.h>
#include <asm/asm.h>
+#include <asm/fred.h>
#include <asm/gsseg.h>
#ifndef CONFIG_X86_CMPXCHG64
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index fbcfec4dc4cc..ca8eed1d496a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -113,6 +113,20 @@
#endif
+#ifndef __ASSEMBLY__
+#ifndef __pic__
+static __always_inline __pure void *rip_rel_ptr(void *p)
+{
+ asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p));
+
+ return p;
+}
+#define RIP_REL_REF(var) (*(typeof(&(var)))rip_rel_ptr(&(var)))
+#else
+#define RIP_REL_REF(var) (var)
+#endif
+#endif
+
/*
* Macros to generate condition code outputs from inline assembly,
* The output operand must be type "bool".
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 0216f63a366b..fe1e7e3cc844 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -33,7 +33,7 @@
* Returns:
* 0 - (index < size)
*/
-static inline unsigned long array_index_mask_nospec(unsigned long index,
+static __always_inline unsigned long array_index_mask_nospec(unsigned long index,
unsigned long size)
{
unsigned long mask;
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index 6ae2d16a7613..fb7388bbc212 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_COCO_H
#define _ASM_X86_COCO_H
+#include <asm/asm.h>
#include <asm/types.h>
enum cc_vendor {
@@ -10,13 +11,20 @@ enum cc_vendor {
CC_VENDOR_INTEL,
};
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
extern enum cc_vendor cc_vendor;
+extern u64 cc_mask;
+
+static inline void cc_set_mask(u64 mask)
+{
+ RIP_REL_REF(cc_mask) = mask;
+}
-#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
-void cc_set_mask(u64 mask);
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
#else
+#define cc_vendor (CC_VENDOR_NONE)
+
static inline u64 cc_mkenc(u64 val)
{
return val;
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index f8f9a9b79395..aa30fd8cad7f 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -9,18 +9,10 @@
#include <linux/percpu.h>
#include <asm/ibt.h>
-#ifdef CONFIG_SMP
-
-extern void prefill_possible_map(void);
-
-#else /* CONFIG_SMP */
-
-static inline void prefill_possible_map(void) {}
-
+#ifndef CONFIG_SMP
#define cpu_physical_id(cpu) boot_cpu_physical_apicid
#define cpu_acpi_id(cpu) 0
#define safe_smp_processor_id() 0
-
#endif /* CONFIG_SMP */
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a26bebbdff87..a1273698fc43 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,7 +168,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 29cb275a219d..f0337f7bcf16 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -81,10 +81,8 @@
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-
-/* CPU types for specific tunings: */
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */
+#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
@@ -97,7 +95,7 @@
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
@@ -326,7 +324,9 @@
#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */
#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */
#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
+#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */
#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
+#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */
@@ -442,6 +442,7 @@
#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
@@ -505,4 +506,5 @@
/* BUG word 2 */
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
index 9bee3e7bf973..6b122a31da06 100644
--- a/arch/x86/include/asm/cpuid.h
+++ b/arch/x86/include/asm/cpuid.h
@@ -127,6 +127,42 @@ static inline unsigned int cpuid_edx(unsigned int op)
return edx;
}
+static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *regs)
+{
+ regs[CPUID_EAX] = leaf;
+ regs[CPUID_ECX] = subleaf;
+ __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX);
+}
+
+#define cpuid_subleaf(leaf, subleaf, regs) { \
+ static_assert(sizeof(*(regs)) == 16); \
+ __cpuid_read(leaf, subleaf, (u32 *)(regs)); \
+}
+
+#define cpuid_leaf(leaf, regs) { \
+ static_assert(sizeof(*(regs)) == 16); \
+ __cpuid_read(leaf, 0, (u32 *)(regs)); \
+}
+
+static inline void __cpuid_read_reg(unsigned int leaf, unsigned int subleaf,
+ enum cpuid_regs_idx regidx, u32 *reg)
+{
+ u32 regs[4];
+
+ __cpuid_read(leaf, subleaf, regs);
+ *reg = regs[regidx];
+}
+
+#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \
+ static_assert(sizeof(*(reg)) == 4); \
+ __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \
+}
+
+#define cpuid_leaf_reg(leaf, regidx, reg) { \
+ static_assert(sizeof(*(reg)) == 4); \
+ __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \
+}
+
static __always_inline bool cpuid_function_is_indexed(u32 function)
{
switch (function) {
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index dd4b67101bb7..bf5953883ec3 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -18,7 +18,7 @@ struct pcpu_hot {
struct task_struct *current_task;
int preempt_count;
int cpu_number;
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
u64 call_depth;
#endif
unsigned long top_of_stack;
@@ -37,8 +37,15 @@ static_assert(sizeof(struct pcpu_hot) == 64);
DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);
+/* const-qualified alias to pcpu_hot, aliased by linker. */
+DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
+ const_pcpu_hot);
+
static __always_inline struct task_struct *get_current(void)
{
+ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+ return this_cpu_read_const(const_pcpu_hot.current_task);
+
return this_cpu_read_stable(pcpu_hot.current_task);
}
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 0cec92c430cc..fdbbbfec745a 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -5,7 +5,9 @@
#include <linux/bug.h>
#include <linux/percpu.h>
#include <uapi/asm/debugreg.h>
+
#include <asm/cpufeature.h>
+#include <asm/msr.h>
DECLARE_PER_CPU(unsigned long, cpu_dr7);
@@ -159,4 +161,26 @@ static inline unsigned long amd_get_dr_addr_mask(unsigned int dr)
}
#endif
+static inline unsigned long get_debugctlmsr(void)
+{
+ unsigned long debugctlmsr = 0;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+ if (boot_cpu_data.x86 < 6)
+ return 0;
+#endif
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+
+ return debugctlmsr;
+}
+
+static inline void update_debugctlmsr(unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+ if (boot_cpu_data.x86 < 6)
+ return;
+#endif
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+}
+
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index ab97b22ac04a..62dc9f59ea76 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -46,6 +46,7 @@ struct gdt_page {
} __attribute__((aligned(PAGE_SIZE)));
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
+DECLARE_INIT_PER_CPU(gdt_page);
/* Provide the original GDT */
static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
@@ -402,8 +403,6 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
desc->limit1 = (limit >> 16) & 0xf;
}
-void alloc_intr_gate(unsigned int n, const void *addr);
-
static inline void init_idt_data(struct idt_data *data, unsigned int n,
const void *addr)
{
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 702d93fdd10e..da4054fbf533 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -44,32 +44,32 @@
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
#endif
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
# define DISABLE_PTI 0
#else
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
# define DISABLE_RETPOLINE 0
#else
# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
(1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
#endif
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_MITIGATION_RETHUNK
# define DISABLE_RETHUNK 0
#else
# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
#endif
-#ifdef CONFIG_CPU_UNRET_ENTRY
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
# define DISABLE_UNRET 0
#else
# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
#endif
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
# define DISABLE_CALL_DEPTH_TRACKING 0
#else
# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
@@ -117,6 +117,18 @@
#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31))
#endif
+#ifdef CONFIG_X86_FRED
+# define DISABLE_FRED 0
+#else
+# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
+#endif
+
+#ifdef CONFIG_KVM_AMD_SEV
+#define DISABLE_SEV_SNP 0
+#else
+#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -133,7 +145,7 @@
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
-#define DISABLED_MASK12 (DISABLE_LAM)
+#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM)
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
@@ -141,7 +153,7 @@
DISABLE_ENQCMD)
#define DISABLED_MASK17 0
#define DISABLED_MASK18 (DISABLE_IBT)
-#define DISABLED_MASK19 0
+#define DISABLED_MASK19 (DISABLE_SEV_SNP)
#define DISABLED_MASK20 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index c4555b269a1b..1dc600fa3ba5 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -143,15 +143,6 @@ extern void efi_free_boot_services(void);
void arch_efi_call_virt_setup(void);
void arch_efi_call_virt_teardown(void);
-/* kexec external ABI */
-struct efi_setup_data {
- u64 fw_vendor;
- u64 __unused;
- u64 tables;
- u64 smbios;
- u64 reserved[8];
-};
-
extern u64 efi_setup;
#ifdef CONFIG_EFI
@@ -418,8 +409,9 @@ extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap,
void *buf, struct efi_mem_range *mem);
-#define arch_ima_efi_boot_mode \
- ({ extern struct boot_params boot_params; boot_params.secure_boot; })
+extern enum efi_secureboot_mode __x86_ima_efi_boot_mode(void);
+
+#define arch_ima_efi_boot_mode __x86_ima_efi_boot_mode()
#ifdef CONFIG_EFI_RUNTIME_MAP
int efi_get_runtime_map_size(void);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1e16bd5ac781..1fb83d47711f 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -392,5 +392,4 @@ struct va_alignment {
} ____cacheline_aligned;
extern struct va_alignment va_align;
-extern unsigned long align_vdso_addr(unsigned long);
#endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index ce8f50192ae3..7e523bb3d2d3 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
static __always_inline void arch_exit_to_user_mode(void)
{
- mds_user_clear_cpu_buffers();
amd_clear_divider();
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
index fe6312045042..7acf0383be80 100644
--- a/arch/x86/include/asm/extable_fixup_types.h
+++ b/arch/x86/include/asm/extable_fixup_types.h
@@ -64,6 +64,8 @@
#define EX_TYPE_UCOPY_LEN4 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4))
#define EX_TYPE_UCOPY_LEN8 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8))
-#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */
+#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */
+
+#define EX_TYPE_ERETU 21
#endif
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index ca6e5e5f16b2..c485f1944c5f 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -37,10 +37,12 @@ extern void fpu_flush_thread(void);
* The FPU context is only stored/restored for a user task and
* PF_KTHREAD is used to distinguish between kernel and user threads.
*/
-static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
+static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
{
if (cpu_feature_enabled(X86_FEATURE_FPU) &&
- !(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ !(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ struct fpu *old_fpu = &old->thread.fpu;
+
save_fpregs_to_fpstate(old_fpu);
/*
* The save operation preserved register state, so the
@@ -60,10 +62,10 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
* Delay loading of the complete FPU state until the return to userland.
* PKRU is handled separately.
*/
-static inline void switch_fpu_finish(void)
+static inline void switch_fpu_finish(struct task_struct *new)
{
if (cpu_feature_enabled(X86_FEATURE_FPU))
- set_thread_flag(TIF_NEED_FPU_LOAD);
+ set_tsk_thread_flag(new, TIF_NEED_FPU_LOAD);
}
#endif /* _ASM_X86_FPU_SCHED_H */
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
new file mode 100644
index 000000000000..e86c7ba32435
--- /dev/null
+++ b/arch/x86/include/asm/fred.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Macros for Flexible Return and Event Delivery (FRED)
+ */
+
+#ifndef ASM_X86_FRED_H
+#define ASM_X86_FRED_H
+
+#include <linux/const.h>
+
+#include <asm/asm.h>
+#include <asm/trapnr.h>
+
+/*
+ * FRED event return instruction opcodes for ERET{S,U}; supported in
+ * binutils >= 2.41.
+ */
+#define ERETS _ASM_BYTES(0xf2,0x0f,0x01,0xca)
+#define ERETU _ASM_BYTES(0xf3,0x0f,0x01,0xca)
+
+/*
+ * RSP is aligned to a 64-byte boundary before used to push a new stack frame
+ */
+#define FRED_STACK_FRAME_RSP_MASK _AT(unsigned long, (~0x3f))
+
+/*
+ * Used for the return address for call emulation during code patching,
+ * and measured in 64-byte cache lines.
+ */
+#define FRED_CONFIG_REDZONE_AMOUNT 1
+#define FRED_CONFIG_REDZONE (_AT(unsigned long, FRED_CONFIG_REDZONE_AMOUNT) << 6)
+#define FRED_CONFIG_INT_STKLVL(l) (_AT(unsigned long, l) << 9)
+#define FRED_CONFIG_ENTRYPOINT(p) _AT(unsigned long, (p))
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_X86_FRED
+#include <linux/kernel.h>
+
+#include <asm/ptrace.h>
+
+struct fred_info {
+ /* Event data: CR2, DR6, ... */
+ unsigned long edata;
+ unsigned long resv;
+};
+
+/* Full format of the FRED stack frame */
+struct fred_frame {
+ struct pt_regs regs;
+ struct fred_info info;
+};
+
+static __always_inline struct fred_info *fred_info(struct pt_regs *regs)
+{
+ return &container_of(regs, struct fred_frame, regs)->info;
+}
+
+static __always_inline unsigned long fred_event_data(struct pt_regs *regs)
+{
+ return fred_info(regs)->edata;
+}
+
+void asm_fred_entrypoint_user(void);
+void asm_fred_entrypoint_kernel(void);
+void asm_fred_entry_from_kvm(struct fred_ss);
+
+__visible void fred_entry_from_user(struct pt_regs *regs);
+__visible void fred_entry_from_kernel(struct pt_regs *regs);
+__visible void __fred_entry_from_kvm(struct pt_regs *regs);
+
+/* Can be called from noinstr code, thus __always_inline */
+static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector)
+{
+ struct fred_ss ss = {
+ .ss =__KERNEL_DS,
+ .type = type,
+ .vector = vector,
+ .nmi = type == EVENT_TYPE_NMI,
+ .lm = 1,
+ };
+
+ asm_fred_entry_from_kvm(ss);
+}
+
+void cpu_init_fred_exceptions(void);
+void fred_complete_exception_setup(void);
+
+#else /* CONFIG_X86_FRED */
+static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
+static inline void cpu_init_fred_exceptions(void) { }
+static inline void fred_complete_exception_setup(void) { }
+static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
+#endif /* CONFIG_X86_FRED */
+#endif /* !__ASSEMBLY__ */
+
+#endif /* ASM_X86_FRED_H */
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index 35cff5f2becf..9e7e8ca8e299 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -6,7 +6,7 @@
#ifdef CONFIG_X86_64
-#include <asm/msr-index.h>
+#include <asm/msr.h>
/*
* Read/write a task's FSBASE or GSBASE. This returns the value that
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b02c3cd3c0f6..edebf1020e04 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -16,8 +16,6 @@
#include <asm/irq_vectors.h>
-#define IRQ_MATRIX_BITS NR_VECTORS
-
#ifndef __ASSEMBLY__
#include <linux/percpu.h>
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index c7ef6ea2fa99..4212c00c9708 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -69,7 +69,7 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm);
extern bool __ia32_enabled;
-static inline bool ia32_enabled(void)
+static __always_inline bool ia32_enabled(void)
{
return __ia32_enabled;
}
@@ -81,7 +81,7 @@ static inline void ia32_disable(void)
#else /* !CONFIG_IA32_EMULATION */
-static inline bool ia32_enabled(void)
+static __always_inline bool ia32_enabled(void)
{
return IS_ENABLED(CONFIG_X86_32);
}
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 13639e57e1f8..47d4c04d103d 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -13,15 +13,18 @@
#include <asm/irq_stack.h>
+typedef void (*idtentry_t)(struct pt_regs *regs);
+
/**
* DECLARE_IDTENTRY - Declare functions for simple IDT entry points
* No error code pushed by hardware
* @vector: Vector number (ignored for C)
* @func: Function name of the entry point
*
- * Declares three functions:
+ * Declares four functions:
* - The ASM entry point: asm_##func
* - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the FRED event dispatcher (maybe unused)
* - The C handler called from the ASM entry point
*
* Note: This is the C variant of DECLARE_IDTENTRY(). As the name says it
@@ -31,6 +34,7 @@
#define DECLARE_IDTENTRY(vector, func) \
asmlinkage void asm_##func(void); \
asmlinkage void xen_asm_##func(void); \
+ void fred_##func(struct pt_regs *regs); \
__visible void func(struct pt_regs *regs)
/**
@@ -138,6 +142,17 @@ static __always_inline void __##func(struct pt_regs *regs, \
__visible noinstr void func(struct pt_regs *regs)
/**
+ * DEFINE_FREDENTRY_RAW - Emit code for raw FRED entry points
+ * @func: Function name of the entry point
+ *
+ * @func is called from the FRED event dispatcher with interrupts disabled.
+ *
+ * See @DEFINE_IDTENTRY_RAW for further details.
+ */
+#define DEFINE_FREDENTRY_RAW(func) \
+noinstr void fred_##func(struct pt_regs *regs)
+
+/**
* DECLARE_IDTENTRY_RAW_ERRORCODE - Declare functions for raw IDT entry points
* Error code pushed by hardware
* @vector: Vector number (ignored for C)
@@ -233,17 +248,27 @@ static noinline void __##func(struct pt_regs *regs, u32 vector)
#define DEFINE_IDTENTRY_SYSVEC(func) \
static void __##func(struct pt_regs *regs); \
\
+static __always_inline void instr_##func(struct pt_regs *regs) \
+{ \
+ kvm_set_cpu_l1tf_flush_l1d(); \
+ run_sysvec_on_irqstack_cond(__##func, regs); \
+} \
+ \
__visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
instrumentation_begin(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
- run_sysvec_on_irqstack_cond(__##func, regs); \
+ instr_##func (regs); \
instrumentation_end(); \
irqentry_exit(regs, state); \
} \
\
+void fred_##func(struct pt_regs *regs) \
+{ \
+ instr_##func (regs); \
+} \
+ \
static noinline void __##func(struct pt_regs *regs)
/**
@@ -260,19 +285,29 @@ static noinline void __##func(struct pt_regs *regs)
#define DEFINE_IDTENTRY_SYSVEC_SIMPLE(func) \
static __always_inline void __##func(struct pt_regs *regs); \
\
-__visible noinstr void func(struct pt_regs *regs) \
+static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
- irqentry_state_t state = irqentry_enter(regs); \
- \
- instrumentation_begin(); \
__irq_enter_raw(); \
kvm_set_cpu_l1tf_flush_l1d(); \
__##func (regs); \
__irq_exit_raw(); \
+} \
+ \
+__visible noinstr void func(struct pt_regs *regs) \
+{ \
+ irqentry_state_t state = irqentry_enter(regs); \
+ \
+ instrumentation_begin(); \
+ instr_##func (regs); \
instrumentation_end(); \
irqentry_exit(regs, state); \
} \
\
+void fred_##func(struct pt_regs *regs) \
+{ \
+ instr_##func (regs); \
+} \
+ \
static __always_inline void __##func(struct pt_regs *regs)
/**
@@ -410,17 +445,35 @@ __visible noinstr void func(struct pt_regs *regs, \
/* C-Code mapping */
#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW
#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW
+#define DEFINE_FREDENTRY_NMI DEFINE_FREDENTRY_RAW
#ifdef CONFIG_X86_64
#define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST
#define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST
#define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST
+#define DEFINE_FREDENTRY_MCE DEFINE_FREDENTRY_RAW
#define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST
#define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST
#define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST
+#define DEFINE_FREDENTRY_DEBUG DEFINE_FREDENTRY_RAW
+#endif
+
+void idt_install_sysvec(unsigned int n, const void *function);
+
+#ifdef CONFIG_X86_FRED
+void fred_install_sysvec(unsigned int vector, const idtentry_t function);
+#else
+static inline void fred_install_sysvec(unsigned int vector, const idtentry_t function) { }
#endif
+#define sysvec_install(vector, function) { \
+ if (cpu_feature_enabled(X86_FEATURE_FRED)) \
+ fred_install_sysvec(vector, function); \
+ else \
+ idt_install_sysvec(vector, asm_##function); \
+}
+
#else /* !__ASSEMBLY__ */
/*
@@ -447,7 +500,7 @@ __visible noinstr void func(struct pt_regs *regs, \
/* System vector entries */
#define DECLARE_IDTENTRY_SYSVEC(vector, func) \
- idtentry_sysvec vector func
+ DECLARE_IDTENTRY(vector, func)
#ifdef CONFIG_X86_64
# define DECLARE_IDTENTRY_MCE(vector, func) \
@@ -655,23 +708,36 @@ DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi);
DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot);
DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single);
DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function);
+#else
+# define fred_sysvec_reschedule_ipi NULL
+# define fred_sysvec_reboot NULL
+# define fred_sysvec_call_function_single NULL
+# define fred_sysvec_call_function NULL
#endif
#ifdef CONFIG_X86_LOCAL_APIC
# ifdef CONFIG_X86_MCE_THRESHOLD
DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold);
+# else
+# define fred_sysvec_threshold NULL
# endif
# ifdef CONFIG_X86_MCE_AMD
DECLARE_IDTENTRY_SYSVEC(DEFERRED_ERROR_VECTOR, sysvec_deferred_error);
+# else
+# define fred_sysvec_deferred_error NULL
# endif
# ifdef CONFIG_X86_THERMAL_VECTOR
DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VECTOR, sysvec_thermal);
+# else
+# define fred_sysvec_thermal NULL
# endif
# ifdef CONFIG_IRQ_WORK
DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work);
+# else
+# define fred_sysvec_irq_work NULL
# endif
#endif
@@ -679,12 +745,16 @@ DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work);
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi);
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi);
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi);
+#else
+# define fred_sysvec_kvm_posted_intr_ipi NULL
+# define fred_sysvec_kvm_posted_intr_wakeup_ipi NULL
+# define fred_sysvec_kvm_posted_intr_nested_ipi NULL
#endif
#if IS_ENABLED(CONFIG_HYPERV)
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
-DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
#endif
#if IS_ENABLED(CONFIG_ACRN_GUEST)
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 197316121f04..b65e9c46b922 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -162,6 +162,8 @@
#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
+#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */
+
/* Xeon Phi */
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 3814a9263d64..294cd2a40818 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -379,7 +379,7 @@ static inline void iosubmit_cmds512(void __iomem *dst, const void *src,
const u8 *end = from + count * 64;
while (from < end) {
- movdir64b(dst, from);
+ movdir64b_io(dst, from);
from += 64;
}
}
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 51c782600e02..0d806513c4b3 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -140,7 +140,6 @@ extern void mask_ioapic_entries(void);
extern int restore_ioapic_entries(void);
extern void setup_ioapic_ids_from_mpc(void);
-extern void setup_ioapic_ids_from_mpc_nocheck(void);
extern int mp_find_ioapic(u32 gsi);
extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 2fd52b65deac..3be2451e7bc8 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -10,6 +10,7 @@ extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int iommu_merge;
extern int panic_on_overflow;
+extern bool amd_iommu_snp_en;
#ifdef CONFIG_SWIOTLB
extern bool x86_swiotlb_enable;
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 071572e23d3a..cbbef32517f0 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -24,7 +24,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
"jmp %l[l_yes] # objtool NOPs this \n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (2 | branch) : : l_yes);
@@ -38,7 +38,7 @@ l_yes:
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
".byte " __stringify(BYTES_NOP5) "\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
@@ -52,7 +52,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
"jmp %l[l_yes]\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index c9f6a6c5de3c..91ca9a9ee3a2 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -25,7 +25,6 @@
#include <asm/page.h>
#include <asm/ptrace.h>
-#include <asm/bootparam.h>
struct kimage;
diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h
index 8fa6ac0e2d76..d91b37f5b4bb 100644
--- a/arch/x86/include/asm/kmsan.h
+++ b/arch/x86/include/asm/kmsan.h
@@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr)
{
unsigned long x = (unsigned long)addr;
unsigned long y = x - __START_KERNEL_map;
+ bool ret;
/* use the carry flag to determine if x was < __START_KERNEL_map */
if (unlikely(x > y)) {
@@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr)
return false;
}
- return pfn_valid(x >> PAGE_SHIFT);
+ /*
+ * pfn_valid() relies on RCU, and may call into the scheduler on exiting
+ * the critical section. However, this would result in recursion with
+ * KMSAN. Therefore, disable preemption here, and re-enable preemption
+ * below while suppressing reschedules to avoid recursion.
+ *
+ * Note, this sacrifices occasionally breaking scheduling guarantees.
+ * Although, a kernel compiled with KMSAN has already given up on any
+ * performance guarantees due to being heavily instrumented.
+ */
+ preempt_disable();
+ ret = pfn_valid(x >> PAGE_SHIFT);
+ preempt_enable_no_resched();
+
+ return ret;
}
#endif /* !MODULE */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 378ed944b849..ab24ce207988 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -138,6 +138,7 @@ KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL(get_untagged_addr)
+KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b5b2d0fde579..18cbde14cf81 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1145,6 +1145,8 @@ struct kvm_hv {
unsigned int synic_auto_eoi_used;
struct kvm_hv_syndbg hv_syndbg;
+
+ bool xsaves_xsavec_checked;
};
#endif
@@ -1794,6 +1796,7 @@ struct kvm_x86_ops {
unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
+ void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
};
struct kvm_x86_nested_ops {
diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h
index 511b35069187..f163176d6f7f 100644
--- a/arch/x86/include/asm/kvmclock.h
+++ b/arch/x86/include/asm/kvmclock.h
@@ -4,8 +4,6 @@
#include <linux/percpu.h>
-extern struct clocksource kvm_clock;
-
DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
static __always_inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 571fe4d2d232..dc31b13b87a0 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -40,27 +40,27 @@
#ifdef __ASSEMBLY__
-#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define RET jmp __x86_return_thunk
-#else /* CONFIG_RETPOLINE */
-#ifdef CONFIG_SLS
+#else /* CONFIG_MITIGATION_RETPOLINE */
+#ifdef CONFIG_MITIGATION_SLS
#define RET ret; int3
#else
#define RET ret
#endif
-#endif /* CONFIG_RETPOLINE */
+#endif /* CONFIG_MITIGATION_RETPOLINE */
#else /* __ASSEMBLY__ */
-#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define ASM_RET "jmp __x86_return_thunk\n\t"
-#else /* CONFIG_RETPOLINE */
-#ifdef CONFIG_SLS
+#else /* CONFIG_MITIGATION_RETPOLINE */
+#ifdef CONFIG_MITIGATION_SLS
#define ASM_RET "ret; int3\n\t"
#else
#define ASM_RET "ret\n\t"
#endif
-#endif /* CONFIG_RETPOLINE */
+#endif /* CONFIG_MITIGATION_RETPOLINE */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 73dba8b94443..59aa966dc212 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -131,8 +131,20 @@ static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
(typeof(l->a.counter) *) old, new);
}
-/* Always has a lock prefix */
-#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
+/*
+ * Implement local_xchg using CMPXCHG instruction without the LOCK prefix.
+ * XCHG is expensive due to the implied LOCK prefix. The processor
+ * cannot prefetch cachelines if XCHG is used.
+ */
+static __always_inline long
+local_xchg(local_t *l, long n)
+{
+ long c = local_read(l);
+
+ do { } while (!local_try_cmpxchg(l, &c, n));
+
+ return c;
+}
/**
* local_add_unless - add unless the number is already a given value
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 359ada486fa9..f922b682b9b4 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -15,7 +15,8 @@
#include <linux/init.h>
#include <linux/cc_platform.h>
-#include <asm/bootparam.h>
+#include <asm/asm.h>
+struct boot_params;
#ifdef CONFIG_X86_MEM_ENCRYPT
void __init mem_encrypt_init(void);
@@ -46,8 +47,8 @@ void __init sme_unmap_bootdata(char *real_mode_data);
void __init sme_early_init(void);
-void __init sme_encrypt_kernel(struct boot_params *bp);
-void __init sme_enable(struct boot_params *bp);
+void sme_encrypt_kernel(struct boot_params *bp);
+void sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
@@ -58,6 +59,11 @@ void __init mem_encrypt_free_decrypted_mem(void);
void __init sev_es_init_vc_handling(void);
+static inline u64 sme_get_me_mask(void)
+{
+ return RIP_REL_REF(sme_me_mask);
+}
+
#define __bss_decrypted __section(".bss..decrypted")
#else /* !CONFIG_AMD_MEM_ENCRYPT */
@@ -75,8 +81,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
static inline void __init sme_early_init(void) { }
-static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
-static inline void __init sme_enable(struct boot_params *bp) { }
+static inline void sme_encrypt_kernel(struct boot_params *bp) { }
+static inline void sme_enable(struct boot_params *bp) { }
static inline void sev_es_init_vc_handling(void) { }
@@ -89,6 +95,8 @@ early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool en
static inline void mem_encrypt_free_decrypted_mem(void) { }
+static inline u64 sme_get_me_mask(void) { return 0; }
+
#define __bss_decrypted
#endif /* CONFIG_AMD_MEM_ENCRYPT */
@@ -106,11 +114,6 @@ void add_encrypt_protection_map(void);
extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
-static inline u64 sme_get_me_mask(void)
-{
- return sme_me_mask;
-}
-
#endif /* __ASSEMBLY__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 4b0f98a8d338..c72c7ff78fcd 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_MPSPEC_H
#define _ASM_X86_MPSPEC_H
+#include <linux/types.h>
#include <asm/mpspec_def.h>
#include <asm/x86_init.h>
@@ -46,70 +47,31 @@ extern int smp_found_config;
# define smp_found_config 0
#endif
-static inline void get_smp_config(void)
-{
- x86_init.mpparse.get_smp_config(0);
-}
-
-static inline void early_get_smp_config(void)
-{
- x86_init.mpparse.get_smp_config(1);
-}
-
-static inline void find_smp_config(void)
-{
- x86_init.mpparse.find_smp_config();
-}
-
#ifdef CONFIG_X86_MPPARSE
extern void e820__memblock_alloc_reserved_mpc_new(void);
extern int enable_update_mptable;
-extern void default_find_smp_config(void);
-extern void default_get_smp_config(unsigned int early);
+extern void mpparse_find_mptable(void);
+extern void mpparse_parse_early_smp_config(void);
+extern void mpparse_parse_smp_config(void);
#else
static inline void e820__memblock_alloc_reserved_mpc_new(void) { }
-#define enable_update_mptable 0
-#define default_find_smp_config x86_init_noop
-#define default_get_smp_config x86_init_uint_noop
+#define enable_update_mptable 0
+#define mpparse_find_mptable x86_init_noop
+#define mpparse_parse_early_smp_config x86_init_noop
+#define mpparse_parse_smp_config x86_init_noop
#endif
-int generic_processor_info(int apicid);
+extern DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC);
-#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC)
-
-struct physid_mask {
- unsigned long mask[PHYSID_ARRAY_SIZE];
-};
-
-typedef struct physid_mask physid_mask_t;
-
-#define physid_set(physid, map) set_bit(physid, (map).mask)
-#define physid_isset(physid, map) test_bit(physid, (map).mask)
-
-#define physids_or(dst, src1, src2) \
- bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
-
-#define physids_clear(map) \
- bitmap_zero((map).mask, MAX_LOCAL_APIC)
-
-#define physids_empty(map) \
- bitmap_empty((map).mask, MAX_LOCAL_APIC)
-
-static inline void physids_promote(unsigned long physids, physid_mask_t *map)
+static inline void reset_phys_cpu_present_map(u32 apicid)
{
- physids_clear(*map);
- map->mask[0] = physids;
+ bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
+ set_bit(apicid, phys_cpu_present_map);
}
-static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
+static inline void copy_phys_cpu_present_map(unsigned long *dst)
{
- physids_clear(*map);
- physid_set(physid, *map);
+ bitmap_copy(dst, phys_cpu_present_map, MAX_LOCAL_APIC);
}
-#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
-#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
-
-extern physid_mask_t phys_cpu_present_map;
-
#endif /* _ASM_X86_MPSPEC_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index f1bd7b91b3c6..05956bd8bacf 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -36,8 +36,19 @@
#define EFER_FFXSR (1<<_EFER_FFXSR)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
-/* Intel MSRs. Some also available on other CPUs */
+/* FRED MSRs */
+#define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */
+#define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */
+#define MSR_IA32_FRED_RSP2 0x1ce /* Level 2 stack pointer */
+#define MSR_IA32_FRED_RSP3 0x1cf /* Level 3 stack pointer */
+#define MSR_IA32_FRED_STKLVLS 0x1d0 /* Exception stack levels */
+#define MSR_IA32_FRED_SSP0 MSR_IA32_PL0_SSP /* Level 0 shadow stack pointer */
+#define MSR_IA32_FRED_SSP1 0x1d1 /* Level 1 shadow stack pointer */
+#define MSR_IA32_FRED_SSP2 0x1d2 /* Level 2 shadow stack pointer */
+#define MSR_IA32_FRED_SSP3 0x1d3 /* Level 3 shadow stack pointer */
+#define MSR_IA32_FRED_CONFIG 0x1d4 /* Entrypoint and interrupt stack level */
+/* Intel MSRs. Some also available on other CPUs */
#define MSR_TEST_CTRL 0x00000033
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)
@@ -165,6 +176,14 @@
* CPU is not vulnerable to Gather
* Data Sampling (GDS).
*/
+#define ARCH_CAP_RFDS_NO BIT(27) /*
+ * Not susceptible to Register
+ * File Data Sampling.
+ */
+#define ARCH_CAP_RFDS_CLEAR BIT(28) /*
+ * VERW clears CPU Register
+ * File.
+ */
#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
* IA32_XAPIC_DISABLE_STATUS MSR
@@ -594,34 +613,47 @@
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
-#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
-#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
-
-/* SNP feature bits enabled by the hypervisor */
-#define MSR_AMD64_SNP_VTOM BIT_ULL(3)
-#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4)
-#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5)
-#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6)
-#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7)
-#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8)
-#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9)
-#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10)
-#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11)
-#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12)
-#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14)
-#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16)
-#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17)
-
-/* SNP feature bits reserved for future use. */
-#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
-#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
-#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18)
+#define MSR_AMD64_SNP_VTOM_BIT 3
+#define MSR_AMD64_SNP_VTOM BIT_ULL(MSR_AMD64_SNP_VTOM_BIT)
+#define MSR_AMD64_SNP_REFLECT_VC_BIT 4
+#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(MSR_AMD64_SNP_REFLECT_VC_BIT)
+#define MSR_AMD64_SNP_RESTRICTED_INJ_BIT 5
+#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(MSR_AMD64_SNP_RESTRICTED_INJ_BIT)
+#define MSR_AMD64_SNP_ALT_INJ_BIT 6
+#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(MSR_AMD64_SNP_ALT_INJ_BIT)
+#define MSR_AMD64_SNP_DEBUG_SWAP_BIT 7
+#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(MSR_AMD64_SNP_DEBUG_SWAP_BIT)
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT 8
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT)
+#define MSR_AMD64_SNP_BTB_ISOLATION_BIT 9
+#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(MSR_AMD64_SNP_BTB_ISOLATION_BIT)
+#define MSR_AMD64_SNP_VMPL_SSS_BIT 10
+#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(MSR_AMD64_SNP_VMPL_SSS_BIT)
+#define MSR_AMD64_SNP_SECURE_TSC_BIT 11
+#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(MSR_AMD64_SNP_SECURE_TSC_BIT)
+#define MSR_AMD64_SNP_VMGEXIT_PARAM_BIT 12
+#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(MSR_AMD64_SNP_VMGEXIT_PARAM_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
+#define MSR_AMD64_SNP_IBS_VIRT_BIT 14
+#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(MSR_AMD64_SNP_IBS_VIRT_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
+#define MSR_AMD64_SNP_VMSA_REG_PROT_BIT 16
+#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
+#define MSR_AMD64_SNP_SMT_PROT_BIT 17
+#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 18
+#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+#define MSR_AMD64_RMP_BASE 0xc0010132
+#define MSR_AMD64_RMP_END 0xc0010133
+
/* AMD Collaborative Processor Performance Control MSRs */
#define MSR_AMD_CPPC_CAP1 0xc00102b0
#define MSR_AMD_CPPC_ENABLE 0xc00102b1
@@ -708,8 +740,15 @@
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
#define MSR_AMD64_SYSCFG 0xc0010010
-#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24
+#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
+#define MSR_AMD64_SYSCFG_MFDM_BIT 19
+#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
+
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 65ec1965cd28..d642037f9ed5 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -12,11 +12,13 @@
#include <uapi/asm/msr.h>
#include <asm/shared/msr.h>
+#include <linux/percpu.h>
+
struct msr_info {
- u32 msr_no;
- struct msr reg;
- struct msr *msrs;
- int err;
+ u32 msr_no;
+ struct msr reg;
+ struct msr __percpu *msrs;
+ int err;
};
struct msr_regs_info {
@@ -97,6 +99,19 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
+/*
+ * WRMSRNS behaves exactly like WRMSR with the only difference being
+ * that it is not a serializing instruction by default.
+ */
+static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high)
+{
+ /* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */
+ asm volatile("1: .byte 0x0f,0x01,0xc6\n"
+ "2:\n"
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+ : : "c" (msr), "a"(low), "d" (high));
+}
+
#define native_rdmsr(msr, val1, val2) \
do { \
u64 __val = __rdmsr((msr)); \
@@ -297,6 +312,11 @@ do { \
#endif /* !CONFIG_PARAVIRT_XXL */
+static __always_inline void wrmsrns(u32 msr, u64 val)
+{
+ __wrmsrns(msr, val, val >> 32);
+}
+
/*
* 64-bit version of wrmsr_safe():
*/
@@ -305,8 +325,8 @@ static inline int wrmsrl_safe(u32 msr, u64 val)
return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
}
-struct msr *msrs_alloc(void);
-void msrs_free(struct msr *msrs);
+struct msr __percpu *msrs_alloc(void);
+void msrs_free(struct msr __percpu *msrs);
int msr_set_bit(u32 msr, u8 bit);
int msr_clear_bit(u32 msr, u8 bit);
@@ -315,8 +335,8 @@ int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
-void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
-void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
+void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
+void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
@@ -345,14 +365,14 @@ static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
return 0;
}
static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
- struct msr *msrs)
+ struct msr __percpu *msrs)
{
- rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
+ rdmsr_on_cpu(0, msr_no, raw_cpu_ptr(&msrs->l), raw_cpu_ptr(&msrs->h));
}
static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
- struct msr *msrs)
+ struct msr __percpu *msrs)
{
- wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
+ wrmsr_on_cpu(0, msr_no, raw_cpu_read(msrs->l), raw_cpu_read(msrs->h));
}
static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
u32 *l, u32 *h)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 5c5f1e56c404..41a0ebb699ec 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -14,9 +14,6 @@ extern void release_perfctr_nmi(unsigned int);
extern int reserve_evntsel_nmi(unsigned int);
extern void release_evntsel_nmi(unsigned int);
-struct ctl_table;
-extern int proc_nmi_enabled(struct ctl_table *, int ,
- void __user *, size_t *, loff_t *);
extern int unknown_nmi_panic;
#endif /* CONFIG_X86_LOCAL_APIC */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 262e65539f83..fc3a8a3c7ffe 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -59,13 +59,13 @@
#ifdef CONFIG_CALL_THUNKS_DEBUG
# define CALL_THUNKS_DEBUG_INC_CALLS \
- incq %gs:__x86_call_count;
+ incq PER_CPU_VAR(__x86_call_count);
# define CALL_THUNKS_DEBUG_INC_RETS \
- incq %gs:__x86_ret_count;
+ incq PER_CPU_VAR(__x86_ret_count);
# define CALL_THUNKS_DEBUG_INC_STUFFS \
- incq %gs:__x86_stuffs_count;
+ incq PER_CPU_VAR(__x86_stuffs_count);
# define CALL_THUNKS_DEBUG_INC_CTXSW \
- incq %gs:__x86_ctxsw_count;
+ incq PER_CPU_VAR(__x86_ctxsw_count);
#else
# define CALL_THUNKS_DEBUG_INC_CALLS
# define CALL_THUNKS_DEBUG_INC_RETS
@@ -73,16 +73,13 @@
# define CALL_THUNKS_DEBUG_INC_CTXSW
#endif
-#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
+#if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
#include <asm/asm-offsets.h>
#define CREDIT_CALL_DEPTH \
movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
-#define ASM_CREDIT_CALL_DEPTH \
- movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
-
#define RESET_CALL_DEPTH \
xor %eax, %eax; \
bts $63, %rax; \
@@ -95,20 +92,14 @@
CALL_THUNKS_DEBUG_INC_CALLS
#define INCREMENT_CALL_DEPTH \
- sarq $5, %gs:pcpu_hot + X86_call_depth; \
- CALL_THUNKS_DEBUG_INC_CALLS
-
-#define ASM_INCREMENT_CALL_DEPTH \
sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
#else
#define CREDIT_CALL_DEPTH
-#define ASM_CREDIT_CALL_DEPTH
#define RESET_CALL_DEPTH
-#define INCREMENT_CALL_DEPTH
-#define ASM_INCREMENT_CALL_DEPTH
#define RESET_CALL_DEPTH_FROM_CALL
+#define INCREMENT_CALL_DEPTH
#endif
/*
@@ -158,7 +149,7 @@
jnz 771b; \
/* barrier for jnz misprediction */ \
lfence; \
- ASM_CREDIT_CALL_DEPTH \
+ CREDIT_CALL_DEPTH \
CALL_THUNKS_DEBUG_INC_CTXSW
#else
/*
@@ -212,7 +203,7 @@
*/
.macro VALIDATE_UNRET_END
#if defined(CONFIG_NOINSTR_VALIDATION) && \
- (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
+ (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
ANNOTATE_RETPOLINE_SAFE
nop
#endif
@@ -241,7 +232,7 @@
* instruction irrespective of kCFI.
*/
.macro JMP_NOSPEC reg:req
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
__CS_PREFIX \reg
jmp __x86_indirect_thunk_\reg
#else
@@ -251,7 +242,7 @@
.endm
.macro CALL_NOSPEC reg:req
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
__CS_PREFIX \reg
call __x86_indirect_thunk_\reg
#else
@@ -271,7 +262,7 @@
.Lskip_rsb_\@:
.endm
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
+#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
#define CALL_UNTRAIN_RET "call entry_untrain_ret"
#else
#define CALL_UNTRAIN_RET ""
@@ -289,7 +280,7 @@
* where we have a stack but before any RET instruction.
*/
.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
-#if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY)
+#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY)
VALIDATE_UNRET_END
ALTERNATIVE_3 "", \
CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
@@ -309,12 +300,23 @@
.macro CALL_DEPTH_ACCOUNT
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
ALTERNATIVE "", \
- __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+ __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
#endif
.endm
+/*
+ * Macro to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro CLEAR_CPU_BUFFERS
+ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+.endm
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -328,19 +330,19 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_MITIGATION_RETHUNK
extern void __x86_return_thunk(void);
#else
static inline void __x86_return_thunk(void) {}
#endif
-#ifdef CONFIG_CPU_UNRET_ENTRY
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
extern void retbleed_return_thunk(void);
#else
static inline void retbleed_return_thunk(void) {}
#endif
-#ifdef CONFIG_CPU_SRSO
+#ifdef CONFIG_MITIGATION_SRSO
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
#else
@@ -357,7 +359,9 @@ extern void entry_ibpb(void);
extern void (*x86_return_thunk)(void);
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+extern void __warn_thunk(void);
+
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
extern void call_depth_return_thunk(void);
#define CALL_DEPTH_ACCOUNT \
@@ -371,14 +375,14 @@ DECLARE_PER_CPU(u64, __x86_ret_count);
DECLARE_PER_CPU(u64, __x86_stuffs_count);
DECLARE_PER_CPU(u64, __x86_ctxsw_count);
#endif
-#else /* !CONFIG_CALL_DEPTH_TRACKING */
+#else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
static inline void call_depth_return_thunk(void) {}
#define CALL_DEPTH_ACCOUNT ""
-#endif /* CONFIG_CALL_DEPTH_TRACKING */
+#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
@@ -399,7 +403,7 @@ static inline void call_depth_return_thunk(void) {}
/*
* Inline asm uses the %V modifier which is only in newer GCC
- * which is ensured when CONFIG_RETPOLINE is defined.
+ * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined.
*/
# define CALL_NOSPEC \
ALTERNATIVE_2( \
@@ -529,13 +533,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+extern u16 mds_verw_sel;
+
#include <asm/segment.h>
/**
@@ -562,17 +567,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
- *
- * Clear CPU buffers if the corresponding static key is enabled
- */
-static __always_inline void mds_user_clear_cpu_buffers(void)
-{
- if (static_branch_likely(&mds_user_clear))
- mds_clear_cpu_buffers();
-}
-
-/**
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index d18e5c332cb9..1b93ff80b43b 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -66,10 +66,14 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
* virt_addr_valid(kaddr) returns true.
*/
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
extern bool __virt_addr_valid(unsigned long kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr))
+static __always_inline void *pfn_to_kaddr(unsigned long pfn)
+{
+ return __va(pfn << PAGE_SHIFT);
+}
+
static __always_inline u64 __canonical_address(u64 vaddr, u8 vaddr_bits)
{
return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 86bd4311daf8..9da9c8a2f1df 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -7,7 +7,7 @@
#include <linux/mem_encrypt.h>
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index b40c462b4af3..b3ab80a03365 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -10,7 +10,6 @@
#include <linux/numa.h>
#include <asm/io.h>
#include <asm/memtype.h>
-#include <asm/x86_init.h>
struct pci_sysdata {
int domain; /* PCI domain */
@@ -124,16 +123,4 @@ cpumask_of_pcibus(const struct pci_bus *bus)
}
#endif
-struct pci_setup_rom {
- struct setup_data data;
- uint16_t vendor;
- uint16_t devid;
- uint64_t pcilen;
- unsigned long segment;
- unsigned long bus;
- unsigned long device;
- unsigned long function;
- uint8_t romdata[];
-};
-
#endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 5e01883eb51e..44958ebaf626 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -4,17 +4,21 @@
#ifdef CONFIG_X86_64
#define __percpu_seg gs
+#define __percpu_rel (%rip)
#else
#define __percpu_seg fs
+#define __percpu_rel
#endif
#ifdef __ASSEMBLY__
#ifdef CONFIG_SMP
-#define PER_CPU_VAR(var) %__percpu_seg:var
-#else /* ! SMP */
-#define PER_CPU_VAR(var) var
-#endif /* SMP */
+#define __percpu %__percpu_seg:
+#else
+#define __percpu
+#endif
+
+#define PER_CPU_VAR(var) __percpu(var)__percpu_rel
#ifdef CONFIG_X86_64_SMP
#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
@@ -24,30 +28,84 @@
#else /* ...!ASSEMBLY */
+#include <linux/build_bug.h>
#include <linux/stringify.h>
#include <asm/asm.h>
#ifdef CONFIG_SMP
+
+#ifdef CONFIG_CC_HAS_NAMED_AS
+
+#ifdef __CHECKER__
+#define __seg_gs __attribute__((address_space(__seg_gs)))
+#define __seg_fs __attribute__((address_space(__seg_fs)))
+#endif
+
+#ifdef CONFIG_X86_64
+#define __percpu_seg_override __seg_gs
+#else
+#define __percpu_seg_override __seg_fs
+#endif
+
+#define __percpu_prefix ""
+
+#else /* CONFIG_CC_HAS_NAMED_AS */
+
+#define __percpu_seg_override
#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
+
+#endif /* CONFIG_CC_HAS_NAMED_AS */
+
+#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":"
#define __my_cpu_offset this_cpu_read(this_cpu_off)
+#ifdef CONFIG_USE_X86_SEG_SUPPORT
+/*
+ * Efficient implementation for cases in which the compiler supports
+ * named address spaces. Allows the compiler to perform additional
+ * optimizations that can save more instructions.
+ */
+#define arch_raw_cpu_ptr(ptr) \
+({ \
+ unsigned long tcp_ptr__; \
+ tcp_ptr__ = __raw_cpu_read(, this_cpu_off); \
+ \
+ tcp_ptr__ += (unsigned long)(ptr); \
+ (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
+})
+#else /* CONFIG_USE_X86_SEG_SUPPORT */
/*
* Compared to the generic __my_cpu_offset version, the following
* saves one instruction and avoids clobbering a temp register.
*/
-#define arch_raw_cpu_ptr(ptr) \
-({ \
- unsigned long tcp_ptr__; \
- asm ("add " __percpu_arg(1) ", %0" \
- : "=r" (tcp_ptr__) \
- : "m" (this_cpu_off), "0" (ptr)); \
- (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
+#define arch_raw_cpu_ptr(ptr) \
+({ \
+ unsigned long tcp_ptr__; \
+ asm ("mov " __percpu_arg(1) ", %0" \
+ : "=r" (tcp_ptr__) \
+ : "m" (__my_cpu_var(this_cpu_off))); \
+ \
+ tcp_ptr__ += (unsigned long)(ptr); \
+ (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
})
-#else
+#endif /* CONFIG_USE_X86_SEG_SUPPORT */
+
+#define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel
+
+#else /* CONFIG_SMP */
+#define __percpu_seg_override
#define __percpu_prefix ""
-#endif
+#define __force_percpu_prefix ""
+
+#define PER_CPU_VAR(var) (var)__percpu_rel
+#endif /* CONFIG_SMP */
+
+#define __my_cpu_type(var) typeof(var) __percpu_seg_override
+#define __my_cpu_ptr(ptr) (__my_cpu_type(*ptr) *)(uintptr_t)(ptr)
+#define __my_cpu_var(var) (*__my_cpu_ptr(&var))
#define __percpu_arg(x) __percpu_prefix "%" #x
+#define __force_percpu_arg(x) __force_percpu_prefix "%" #x
/*
* Initialized pointers to per-cpu variables needed for the boot
@@ -107,14 +165,14 @@ do { \
(void)pto_tmp__; \
} \
asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \
- : [var] "+m" (_var) \
+ : [var] "+m" (__my_cpu_var(_var)) \
: [val] __pcpu_reg_imm_##size(pto_val__)); \
} while (0)
#define percpu_unary_op(size, qual, op, _var) \
({ \
asm qual (__pcpu_op1_##size(op, __percpu_arg([var])) \
- : [var] "+m" (_var)); \
+ : [var] "+m" (__my_cpu_var(_var))); \
})
/*
@@ -144,16 +202,16 @@ do { \
__pcpu_type_##size pfo_val__; \
asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]") \
: [val] __pcpu_reg_##size("=", pfo_val__) \
- : [var] "m" (_var)); \
+ : [var] "m" (__my_cpu_var(_var))); \
(typeof(_var))(unsigned long) pfo_val__; \
})
#define percpu_stable_op(size, op, _var) \
({ \
__pcpu_type_##size pfo_val__; \
- asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \
+ asm(__pcpu_op2_##size(op, __force_percpu_arg(a[var]), "%[val]") \
: [val] __pcpu_reg_##size("=", pfo_val__) \
- : [var] "p" (&(_var))); \
+ : [var] "i" (&(_var))); \
(typeof(_var))(unsigned long) pfo_val__; \
})
@@ -166,7 +224,7 @@ do { \
asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \
__percpu_arg([var])) \
: [tmp] __pcpu_reg_##size("+", paro_tmp__), \
- [var] "+m" (_var) \
+ [var] "+m" (__my_cpu_var(_var)) \
: : "memory"); \
(typeof(_var))(unsigned long) (paro_tmp__ + _val); \
})
@@ -187,7 +245,7 @@ do { \
__percpu_arg([var])) \
"\n\tjnz 1b" \
: [oval] "=&a" (pxo_old__), \
- [var] "+m" (_var) \
+ [var] "+m" (__my_cpu_var(_var)) \
: [nval] __pcpu_reg_##size(, pxo_new__) \
: "memory"); \
(typeof(_var))(unsigned long) pxo_old__; \
@@ -204,7 +262,7 @@ do { \
asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \
__percpu_arg([var])) \
: [oval] "+a" (pco_old__), \
- [var] "+m" (_var) \
+ [var] "+m" (__my_cpu_var(_var)) \
: [nval] __pcpu_reg_##size(, pco_new__) \
: "memory"); \
(typeof(_var))(unsigned long) pco_old__; \
@@ -221,7 +279,7 @@ do { \
CC_SET(z) \
: CC_OUT(z) (success), \
[oval] "+a" (pco_old__), \
- [var] "+m" (_var) \
+ [var] "+m" (__my_cpu_var(_var)) \
: [nval] __pcpu_reg_##size(, pco_new__) \
: "memory"); \
if (unlikely(!success)) \
@@ -244,7 +302,7 @@ do { \
\
asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
- : [var] "+m" (_var), \
+ : [var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
@@ -276,7 +334,7 @@ do { \
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
CC_SET(z) \
: CC_OUT(z) (success), \
- [var] "+m" (_var), \
+ [var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
@@ -313,7 +371,7 @@ do { \
\
asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
- : [var] "+m" (_var), \
+ : [var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
@@ -345,7 +403,7 @@ do { \
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
CC_SET(z) \
: CC_OUT(z) (success), \
- [var] "+m" (_var), \
+ [var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
@@ -366,9 +424,9 @@ do { \
* accessed while this_cpu_read_stable() allows the value to be cached.
* this_cpu_read_stable() is more efficient and can be used if its value
* is guaranteed to be valid across cpus. The current users include
- * get_current() and get_thread_info() both of which are actually
- * per-thread variables implemented as per-cpu variables and thus
- * stable for the duration of the respective task.
+ * pcpu_hot.current_task and pcpu_hot.top_of_stack, both of which are
+ * actually per-thread variables implemented as per-CPU variables and
+ * thus stable for the duration of the respective task.
*/
#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp)
#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp)
@@ -376,13 +434,72 @@ do { \
#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp)
#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
+#ifdef CONFIG_USE_X86_SEG_SUPPORT
+
+#define __raw_cpu_read(qual, pcp) \
+({ \
+ *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)); \
+})
+
+#define __raw_cpu_write(qual, pcp, val) \
+do { \
+ *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)) = (val); \
+} while (0)
+
+#define raw_cpu_read_1(pcp) __raw_cpu_read(, pcp)
+#define raw_cpu_read_2(pcp) __raw_cpu_read(, pcp)
+#define raw_cpu_read_4(pcp) __raw_cpu_read(, pcp)
+#define raw_cpu_write_1(pcp, val) __raw_cpu_write(, pcp, val)
+#define raw_cpu_write_2(pcp, val) __raw_cpu_write(, pcp, val)
+#define raw_cpu_write_4(pcp, val) __raw_cpu_write(, pcp, val)
+
+#define this_cpu_read_1(pcp) __raw_cpu_read(volatile, pcp)
+#define this_cpu_read_2(pcp) __raw_cpu_read(volatile, pcp)
+#define this_cpu_read_4(pcp) __raw_cpu_read(volatile, pcp)
+#define this_cpu_write_1(pcp, val) __raw_cpu_write(volatile, pcp, val)
+#define this_cpu_write_2(pcp, val) __raw_cpu_write(volatile, pcp, val)
+#define this_cpu_write_4(pcp, val) __raw_cpu_write(volatile, pcp, val)
+
+#ifdef CONFIG_X86_64
+#define raw_cpu_read_8(pcp) __raw_cpu_read(, pcp)
+#define raw_cpu_write_8(pcp, val) __raw_cpu_write(, pcp, val)
+
+#define this_cpu_read_8(pcp) __raw_cpu_read(volatile, pcp)
+#define this_cpu_write_8(pcp, val) __raw_cpu_write(volatile, pcp, val)
+#endif
+
+#define this_cpu_read_const(pcp) __raw_cpu_read(, pcp)
+#else /* CONFIG_USE_X86_SEG_SUPPORT */
+
#define raw_cpu_read_1(pcp) percpu_from_op(1, , "mov", pcp)
#define raw_cpu_read_2(pcp) percpu_from_op(2, , "mov", pcp)
#define raw_cpu_read_4(pcp) percpu_from_op(4, , "mov", pcp)
-
#define raw_cpu_write_1(pcp, val) percpu_to_op(1, , "mov", (pcp), val)
#define raw_cpu_write_2(pcp, val) percpu_to_op(2, , "mov", (pcp), val)
#define raw_cpu_write_4(pcp, val) percpu_to_op(4, , "mov", (pcp), val)
+
+#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp)
+#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp)
+#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp)
+#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val)
+#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val)
+#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val)
+
+#ifdef CONFIG_X86_64
+#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp)
+#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val)
+
+#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp)
+#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val)
+#endif
+
+/*
+ * The generic per-cpu infrastrucutre is not suitable for
+ * reading const-qualified variables.
+ */
+#define this_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; })
+#endif /* CONFIG_USE_X86_SEG_SUPPORT */
+
#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val)
#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val)
#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val)
@@ -408,12 +525,6 @@ do { \
#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
-#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp)
-#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp)
-#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp)
-#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val)
-#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val)
-#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val)
#define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val)
#define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val)
#define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val)
@@ -452,8 +563,6 @@ do { \
* 32 bit must fall back to generic operations.
*/
#ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp)
-#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val)
#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val)
#define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val)
#define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val)
@@ -462,8 +571,6 @@ do { \
#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval)
#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval)
-#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp)
-#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val)
#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
@@ -494,7 +601,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
asm volatile("btl "__percpu_arg(2)",%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
- : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
+ : "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), "Ir" (nr));
return oldbit;
}
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 94de1a05aeba..d65e338b6a5f 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -181,7 +181,7 @@ static inline u64 p4_clear_ht_bit(u64 config)
static inline int p4_ht_active(void)
{
#ifdef CONFIG_SMP
- return smp_num_siblings > 1;
+ return __max_threads_per_core > 1;
#endif
return 0;
}
@@ -189,7 +189,7 @@ static inline int p4_ht_active(void)
static inline int p4_ht_thread(int cpu)
{
#ifdef CONFIG_SMP
- if (smp_num_siblings == 2)
+ if (__max_threads_per_core == 2)
return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
#endif
return 0;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index c7ec5bb88334..dcd836b59beb 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
*/
extern gfp_t __userpte_alloc_gfp;
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
* Instead of one PGD, we acquire two PGDs. Being order-1, it is
* both 8k in size and 8k-aligned. That lets us just flip bit 12
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 9e7c0b719c3c..dabafba957ea 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -52,7 +52,7 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd);
#endif
pxx_xchg64(pud, pudp, native_pud_val(pud));
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 9d077bca6a10..df0f7d4a96f3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -909,7 +909,7 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
pmd_t *populate_extra_pmd(unsigned long vaddr);
pte_t *populate_extra_pte(unsigned long vaddr);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd);
/*
@@ -923,12 +923,12 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
return pgd;
return __pti_set_user_pgtbl(pgdp, pgd);
}
-#else /* CONFIG_PAGE_TABLE_ISOLATION */
+#else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{
return pgd;
}
-#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
#endif /* __ASSEMBLY__ */
@@ -1131,7 +1131,7 @@ static inline int p4d_bad(p4d_t p4d)
{
unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
- if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
return (p4d_flags(p4d) & ~ignore_flags) != 0;
@@ -1177,7 +1177,7 @@ static inline int pgd_bad(pgd_t pgd)
if (!pgtable_l5_enabled())
return 0;
- if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
@@ -1422,9 +1422,9 @@ static inline bool pgdp_maps_userspace(void *__ptr)
#define pgd_leaf pgd_large
static inline int pgd_large(pgd_t pgd) { return 0; }
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
- * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages
* (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
* the user one is in the last 4k. To switch between them, you
* just need to flip the 12th bit in their addresses.
@@ -1469,7 +1469,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
{
return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
}
-#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
@@ -1484,7 +1484,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
memcpy(dst, src, count * sizeof(pgd_t));
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/* Clone the user space pgd as well */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 24af25b1551a..7e9db77231ac 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -143,7 +143,8 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
pgd_t pgd;
- if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+ if (pgtable_l5_enabled() ||
+ !IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) {
WRITE_ONCE(*p4dp, p4d);
return;
}
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 38b54b992f32..9053dfe9fa03 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -21,9 +21,9 @@ typedef unsigned long pgprotval_t;
typedef struct { pteval_t pte; } pte_t;
typedef struct { pmdval_t pmd; } pmd_t;
-#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled;
+#ifdef CONFIG_X86_5LEVEL
#ifdef USE_EARLY_PGTABLE_L5
/*
* cpu_feature_enabled() is not available in early boot code.
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index af77235fded6..919909d8cb77 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -91,7 +91,7 @@ static __always_inline void __preempt_count_sub(int val)
*/
static __always_inline bool __preempt_count_dec_and_test(void)
{
- return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e,
+ return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e,
__percpu_arg([var]));
}
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index d8cccadc83a6..e5f204b9b33d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -51,7 +51,7 @@
#define CR3_NOFLUSH 0
#endif
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
# define X86_CR3_PTI_PCID_USER_BIT 11
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 26620d7642a9..811548f131f4 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -20,7 +20,6 @@ struct vm86;
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
-#include <asm/msr.h>
#include <asm/desc_defs.h>
#include <asm/nops.h>
#include <asm/special_insns.h>
@@ -100,6 +99,9 @@ struct cpuinfo_topology {
u32 logical_pkg_id;
u32 logical_die_id;
+ // AMD Node ID and Nodes per Package info
+ u32 amd_node_id;
+
// Cache level topology IDs
u32 llc_id;
u32 l2c_id;
@@ -119,8 +121,6 @@ struct cpuinfo_x86 {
#endif
__u8 x86_virt_bits;
__u8 x86_phys_bits;
- /* CPUID returned core id bits: */
- __u8 x86_coreid_bits;
/* Max extended CPUID function supported: */
__u32 extended_cpuid_level;
/* Maximum supported CPUID level, -1=no CPUID: */
@@ -148,8 +148,6 @@ struct cpuinfo_x86 {
unsigned long loops_per_jiffy;
/* protected processor identification number */
u64 ppin;
- /* cpuid returned max cores value: */
- u16 x86_max_cores;
u16 x86_clflush_size;
/* number of cores as seen by the OS: */
u16 booted_cores;
@@ -186,13 +184,8 @@ extern struct cpuinfo_x86 new_cpu_data;
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
-#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
-#else
-#define cpu_info boot_cpu_data
-#define cpu_data(cpu) boot_cpu_data
-#endif
extern const struct seq_operations cpuinfo_op;
@@ -533,6 +526,9 @@ static __always_inline unsigned long current_top_of_stack(void)
* and around vm86 mode and sp0 on x86_64 is special because of the
* entry trampoline.
*/
+ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+ return this_cpu_read_const(const_pcpu_hot.top_of_stack);
+
return this_cpu_read_stable(pcpu_hot.top_of_stack);
}
@@ -555,7 +551,7 @@ static inline void load_sp0(unsigned long sp0)
unsigned long __get_wchan(struct task_struct *p);
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
+extern void select_idle_routine(void);
extern void amd_e400_c1e_apic_setup(void);
extern unsigned long boot_option_idle_override;
@@ -576,28 +572,6 @@ extern void cpu_init(void);
extern void cpu_init_exception_handling(void);
extern void cr4_init(void);
-static inline unsigned long get_debugctlmsr(void)
-{
- unsigned long debugctlmsr = 0;
-
-#ifndef CONFIG_X86_DEBUGCTLMSR
- if (boot_cpu_data.x86 < 6)
- return 0;
-#endif
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-
- return debugctlmsr;
-}
-
-static inline void update_debugctlmsr(unsigned long debugctlmsr)
-{
-#ifndef CONFIG_X86_DEBUGCTLMSR
- if (boot_cpu_data.x86 < 6)
- return;
-#endif
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-}
-
extern void set_task_blockstep(struct task_struct *task, bool on);
/* Boot loader type from the setup header: */
@@ -664,8 +638,10 @@ static __always_inline void prefetchw(const void *x)
#else
extern unsigned long __end_init_task[];
-#define INIT_THREAD { \
- .sp = (unsigned long)&__end_init_task - sizeof(struct pt_regs), \
+#define INIT_THREAD { \
+ .sp = (unsigned long)&__end_init_task - \
+ TOP_OF_KERNEL_STACK_PADDING - \
+ sizeof(struct pt_regs), \
}
extern unsigned long KSTK_ESP(struct task_struct *task);
@@ -704,12 +680,10 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu)
}
#ifdef CONFIG_CPU_SUP_AMD
-extern u32 amd_get_nodes_per_socket(void);
extern u32 amd_get_highest_perf(void);
extern void amd_clear_divider(void);
extern void amd_check_microcode(void);
#else
-static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
static inline void amd_clear_divider(void) { }
static inline void amd_check_microcode(void) { }
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 65dee2420624..043758a2e627 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -23,11 +23,11 @@ extern int of_ioapic;
extern u64 initial_dtb;
extern void add_dtb(u64 data);
void x86_of_pci_init(void);
-void x86_dtb_init(void);
+void x86_dtb_parse_smp_config(void);
#else
static inline void add_dtb(u64 data) { }
static inline void x86_of_pci_init(void) { }
-static inline void x86_dtb_init(void) { }
+static inline void x86_dtb_parse_smp_config(void) { }
#define of_ioapic 0
#endif
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 07375b476c4f..ab167c96b9ab 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -3,7 +3,7 @@
#define _ASM_X86_PTI_H
#ifndef __ASSEMBLY__
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
extern void pti_init(void);
extern void pti_check_boottime_disable(void);
extern void pti_finalize(void);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index f4db78b09c8f..5a83fbd9bc0b 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -56,18 +56,64 @@ struct pt_regs {
#else /* __i386__ */
+struct fred_cs {
+ /* CS selector */
+ u64 cs : 16,
+ /* Stack level at event time */
+ sl : 2,
+ /* IBT in WAIT_FOR_ENDBRANCH state */
+ wfe : 1,
+ : 45;
+};
+
+struct fred_ss {
+ /* SS selector */
+ u64 ss : 16,
+ /* STI state */
+ sti : 1,
+ /* Set if syscall, sysenter or INT n */
+ swevent : 1,
+ /* Event is NMI type */
+ nmi : 1,
+ : 13,
+ /* Event vector */
+ vector : 8,
+ : 8,
+ /* Event type */
+ type : 4,
+ : 4,
+ /* Event was incident to enclave execution */
+ enclave : 1,
+ /* CPU was in long mode */
+ lm : 1,
+ /*
+ * Nested exception during FRED delivery, not set
+ * for #DF.
+ */
+ nested : 1,
+ : 1,
+ /*
+ * The length of the instruction causing the event.
+ * Only set for INTO, INT1, INT3, INT n, SYSCALL
+ * and SYSENTER. 0 otherwise.
+ */
+ insnlen : 4;
+};
+
struct pt_regs {
-/*
- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
- * unless syscall needs a complete, fully filled "struct pt_regs".
- */
+ /*
+ * C ABI says these regs are callee-preserved. They aren't saved on
+ * kernel entry unless syscall needs a complete, fully filled
+ * "struct pt_regs".
+ */
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
-/* These regs are callee-clobbered. Always saved on kernel entry. */
+
+ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@@ -77,18 +123,50 @@ struct pt_regs {
unsigned long dx;
unsigned long si;
unsigned long di;
-/*
- * On syscall entry, this is syscall#. On CPU exception, this is error code.
- * On hw interrupt, it's IRQ number:
- */
+
+ /*
+ * orig_ax is used on entry for:
+ * - the syscall number (syscall, sysenter, int80)
+ * - error_code stored by the CPU on traps and exceptions
+ * - the interrupt number for device interrupts
+ *
+ * A FRED stack frame starts here:
+ * 1) It _always_ includes an error code;
+ *
+ * 2) The return frame for ERET[US] starts here, but
+ * the content of orig_ax is ignored.
+ */
unsigned long orig_ax;
-/* Return frame for iretq */
+
+ /* The IRETQ return frame starts here */
unsigned long ip;
- unsigned long cs;
+
+ union {
+ /* CS selector */
+ u16 cs;
+ /* The extended 64-bit data slot containing CS */
+ u64 csx;
+ /* The FRED CS extension */
+ struct fred_cs fred_cs;
+ };
+
unsigned long flags;
unsigned long sp;
- unsigned long ss;
-/* top of stack page */
+
+ union {
+ /* SS selector */
+ u16 ss;
+ /* The extended 64-bit data slot containing SS */
+ u64 ssx;
+ /* The FRED SS extension */
+ struct fred_ss fred_ss;
+ };
+
+ /*
+ * Top of stack on IDT systems, while FRED systems have extra fields
+ * defined above for storing exception related information, e.g. CR2 or
+ * DR6.
+ */
};
#endif /* !__i386__ */
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 255a78d9d906..12dbd2588ca7 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -7,6 +7,13 @@
#include <linux/sched.h>
#include <linux/jump_label.h>
+/*
+ * This value can never be a valid CLOSID, and is used when mapping a
+ * (closid, rmid) pair to an index and back. On x86 only the RMID is
+ * needed. The index is a software defined value.
+ */
+#define X86_RESCTRL_EMPTY_CLOSID ((u32)~0)
+
/**
* struct resctrl_pqr_state - State cache for the PQR MSR
* @cur_rmid: The cached Resource Monitoring ID
@@ -31,10 +38,47 @@ struct resctrl_pqr_state {
DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
+extern bool rdt_alloc_capable;
+extern bool rdt_mon_capable;
+
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
+static inline bool resctrl_arch_alloc_capable(void)
+{
+ return rdt_alloc_capable;
+}
+
+static inline void resctrl_arch_enable_alloc(void)
+{
+ static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
+ static_branch_inc_cpuslocked(&rdt_enable_key);
+}
+
+static inline void resctrl_arch_disable_alloc(void)
+{
+ static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
+ static_branch_dec_cpuslocked(&rdt_enable_key);
+}
+
+static inline bool resctrl_arch_mon_capable(void)
+{
+ return rdt_mon_capable;
+}
+
+static inline void resctrl_arch_enable_mon(void)
+{
+ static_branch_enable_cpuslocked(&rdt_mon_enable_key);
+ static_branch_inc_cpuslocked(&rdt_enable_key);
+}
+
+static inline void resctrl_arch_disable_mon(void)
+{
+ static_branch_disable_cpuslocked(&rdt_mon_enable_key);
+ static_branch_dec_cpuslocked(&rdt_enable_key);
+}
+
/*
* __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
*
@@ -88,12 +132,58 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
return val * scale;
}
+static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk,
+ u32 closid, u32 rmid)
+{
+ WRITE_ONCE(tsk->closid, closid);
+ WRITE_ONCE(tsk->rmid, rmid);
+}
+
+static inline bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
+{
+ return READ_ONCE(tsk->closid) == closid;
+}
+
+static inline bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 ignored,
+ u32 rmid)
+{
+ return READ_ONCE(tsk->rmid) == rmid;
+}
+
static inline void resctrl_sched_in(struct task_struct *tsk)
{
if (static_branch_likely(&rdt_enable_key))
__resctrl_sched_in(tsk);
}
+static inline u32 resctrl_arch_system_num_rmid_idx(void)
+{
+ /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
+ return boot_cpu_data.x86_cache_max_rmid + 1;
+}
+
+static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
+{
+ *rmid = idx;
+ *closid = X86_RESCTRL_EMPTY_CLOSID;
+}
+
+static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid)
+{
+ return rmid;
+}
+
+/* x86 can always read an rmid, nothing needs allocating */
+struct rdt_resource;
+static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid)
+{
+ might_sleep();
+ return NULL;
+};
+
+static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
+ void *ctx) { };
+
void resctrl_cpu_detect(struct cpuinfo_x86 *c);
#else
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 4b081e0d3306..363266cbcada 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -13,7 +13,7 @@
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c = false; \
- asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
+ asm goto (fullop "; j" #cc " %l[cc_label]" \
: : [var] "m" (_var), ## __VA_ARGS__ \
: clobbers : cc_label); \
if (0) { \
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index a5e89641bd2d..9aee31862b4a 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -47,6 +47,7 @@ int set_memory_uc(unsigned long addr, int numpages);
int set_memory_wc(unsigned long addr, int numpages);
int set_memory_wb(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
+int set_memory_p(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 5c83729c8e71..e61e68d71cba 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -48,7 +48,7 @@ extern unsigned long saved_video_mode;
extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
-extern void startup_64_setup_env(unsigned long physbase);
+extern void startup_64_setup_gdt_idt(void);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/include/asm/setup_data.h b/arch/x86/include/asm/setup_data.h
new file mode 100644
index 000000000000..77c51111a893
--- /dev/null
+++ b/arch/x86/include/asm/setup_data.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SETUP_DATA_H
+#define _ASM_X86_SETUP_DATA_H
+
+#include <uapi/asm/setup_data.h>
+
+#ifndef __ASSEMBLY__
+
+struct pci_setup_rom {
+ struct setup_data data;
+ uint16_t vendor;
+ uint16_t devid;
+ uint64_t pcilen;
+ unsigned long segment;
+ unsigned long bus;
+ unsigned long device;
+ unsigned long function;
+ uint8_t romdata[];
+};
+
+/* kexec external ABI */
+struct efi_setup_data {
+ u64 fw_vendor;
+ u64 __unused;
+ u64 tables;
+ u64 smbios;
+ u64 reserved[8];
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 5b4a1ce3d368..9477b4053bce 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -13,7 +13,6 @@
#include <asm/insn.h>
#include <asm/sev-common.h>
-#include <asm/bootparam.h>
#include <asm/coco.h>
#define GHCB_PROTOCOL_MIN 1ULL
@@ -22,6 +21,8 @@
#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); }
+struct boot_params;
+
enum es_result {
ES_OK, /* All good */
ES_UNSUPPORTED, /* Requested operation not supported */
@@ -87,9 +88,23 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
/* Software defined (when rFlags.CF = 1) */
#define PVALIDATE_FAIL_NOUPDATE 255
+/* RMUPDATE detected 4K page and 2MB page overlap. */
+#define RMPUPDATE_FAIL_OVERLAP 4
+
/* RMP page size */
#define RMP_PG_SIZE_4K 0
#define RMP_PG_SIZE_2M 1
+#define RMP_TO_PG_LEVEL(level) (((level) == RMP_PG_SIZE_4K) ? PG_LEVEL_4K : PG_LEVEL_2M)
+#define PG_LEVEL_TO_RMP(level) (((level) == PG_LEVEL_4K) ? RMP_PG_SIZE_4K : RMP_PG_SIZE_2M)
+
+struct rmp_state {
+ u64 gpa;
+ u8 assigned;
+ u8 pagesize;
+ u8 immutable;
+ u8 rsvd;
+ u32 asid;
+} __packed;
#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
@@ -199,20 +214,22 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
struct snp_guest_request_ioctl;
void setup_ghcb(void);
-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
- unsigned long npages);
-void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
- unsigned long npages);
+void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages);
+void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages);
void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
-void __init __noreturn snp_abort(void);
+void __noreturn snp_abort(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
+void kdump_sev_callback(void);
+void sev_show_status(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
@@ -241,6 +258,30 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
+static inline void kdump_sev_callback(void) { }
+static inline void sev_show_status(void) { }
+#endif
+
+#ifdef CONFIG_KVM_AMD_SEV
+bool snp_probe_rmptable_info(void);
+int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
+void snp_dump_hva_rmpentry(unsigned long address);
+int psmash(u64 pfn);
+int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
+int rmp_make_shared(u64 pfn, enum pg_level level);
+void snp_leak_pages(u64 pfn, unsigned int npages);
+#else
+static inline bool snp_probe_rmptable_info(void) { return false; }
+static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
+static inline void snp_dump_hva_rmpentry(unsigned long address) {}
+static inline int psmash(u64 pfn) { return -ENODEV; }
+static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid,
+ bool immutable)
+{
+ return -ENODEV;
+}
+static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
+static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
#endif
#endif
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4fab2ed454f3..a35936b512fe 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -8,9 +8,6 @@
#include <asm/current.h>
#include <asm/thread_info.h>
-extern int smp_num_siblings;
-extern unsigned int num_processors;
-
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
@@ -59,11 +56,6 @@ static inline void stop_other_cpus(void)
smp_ops.stop_other_cpus(1);
}
-static inline void smp_prepare_boot_cpu(void)
-{
- smp_ops.smp_prepare_boot_cpu();
-}
-
static inline void smp_prepare_cpus(unsigned int max_cpus)
{
smp_ops.smp_prepare_cpus(max_cpus);
@@ -110,7 +102,6 @@ void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void smp_prepare_cpus_common(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
-void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
@@ -174,8 +165,6 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
}
#endif /* CONFIG_SMP */
-extern unsigned disabled_cpus;
-
#ifdef CONFIG_DEBUG_NMI_SELFTEST
extern void nmi_selftest(void);
#else
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index c648502e4535..658b690b2ccb 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -96,4 +96,6 @@ static inline void speculative_store_bypass_ht_init(void) { }
extern void speculation_ctrl_update(unsigned long tif);
extern void speculation_ctrl_update_current(void);
+extern bool itlb_multihit_kvm_mitigation;
+
#endif
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index d6cd9344f6c7..2e9fc5c400cd 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -2,11 +2,11 @@
#ifndef _ASM_X86_SPECIAL_INSNS_H
#define _ASM_X86_SPECIAL_INSNS_H
-
#ifdef __KERNEL__
-
#include <asm/nops.h>
#include <asm/processor-flags.h>
+
+#include <linux/errno.h>
#include <linux/irqflags.h>
#include <linux/jump_label.h>
@@ -205,7 +205,7 @@ static inline void clwb(volatile void *__p)
#ifdef CONFIG_X86_USER_SHADOW_STACK
static inline int write_user_shstk_64(u64 __user *addr, u64 val)
{
- asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
+ asm goto("1: wrussq %[val], (%[addr])\n"
_ASM_EXTABLE(1b, %l[fail])
:: [addr] "r" (addr), [val] "r" (val)
:: fail);
@@ -224,10 +224,10 @@ static inline void serialize(void)
}
/* The dst parameter must be 64-bytes aligned */
-static inline void movdir64b(void __iomem *dst, const void *src)
+static inline void movdir64b(void *dst, const void *src)
{
const struct { char _[64]; } *__src = src;
- struct { char _[64]; } __iomem *__dst = dst;
+ struct { char _[64]; } *__dst = dst;
/*
* MOVDIR64B %(rdx), rax.
@@ -245,6 +245,11 @@ static inline void movdir64b(void __iomem *dst, const void *src)
: "m" (*__src), "a" (__dst), "d" (__src));
}
+static inline void movdir64b_io(void __iomem *dst, const void *src)
+{
+ movdir64b((void __force *)dst, src);
+}
+
/**
* enqcmds - Enqueue a command in supervisor (CPL0) mode
* @dst: destination, in MMIO space (must be 512-bit aligned)
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 343b722ccaf2..125c407e2abe 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -46,7 +46,7 @@
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_MITIGATION_RETHUNK
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
#else
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index f42dbf17f52b..c3bd0c0758c9 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -70,9 +70,13 @@ static inline void update_task_stack(struct task_struct *task)
#ifdef CONFIG_X86_32
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
#else
- /* Xen PV enters the kernel on the thread stack. */
- if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ if (cpu_feature_enabled(X86_FEATURE_FRED)) {
+ /* WRMSRNS is a baseline feature for FRED. */
+ wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) + THREAD_SIZE);
+ } else if (cpu_feature_enabled(X86_FEATURE_XENPV)) {
+ /* Xen PV enters the kernel on the thread stack. */
load_sp0(task_top_of_stack(task));
+ }
#endif
}
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 21f9407be5d3..7e88705e907f 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -58,12 +58,29 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
,,regs->di,,regs->si,,regs->dx \
,,regs->r10,,regs->r8,,regs->r9) \
+
+/* SYSCALL_PT_ARGS is Adapted from s390x */
+#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \
+ SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp))
+#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \
+ SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di))
+#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \
+ SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si))
+#define SYSCALL_PT_ARG3(m, t1, t2, t3) \
+ SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx))
+#define SYSCALL_PT_ARG2(m, t1, t2) \
+ SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx))
+#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx))
+#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__)
+
+#define __SC_COMPAT_CAST(t, a) \
+ (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \
+ (unsigned int)a
+
/* Mapping of registers to parameters for syscalls on i386 */
#define SC_IA32_REGS_TO_ARGS(x, ...) \
- __MAP(x,__SC_ARGS \
- ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \
- ,,(unsigned int)regs->dx,,(unsigned int)regs->si \
- ,,(unsigned int)regs->di,,(unsigned int)regs->bp)
+ SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \
+ __MAP(x, __SC_TYPE, __VA_ARGS__)) \
#define __SYS_STUB0(abi, name) \
long __##abi##_##name(const struct pt_regs *regs); \
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 0b70653a98c1..345aafbc1964 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -15,6 +15,8 @@
extern void text_poke_early(void *addr, const void *opcode, size_t len);
+extern void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len);
+
/*
* Clear and restore the kernel write-protection flag on the local CPU.
* Allows the kernel to edit read-only pages.
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index d63b02940747..12da7dfd5ef1 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -31,7 +31,9 @@
* In vm86 mode, the hardware frame is much longer still, so add 16
* bytes to make room for the real-mode segments.
*
- * x86_64 has a fixed-length stack frame.
+ * x86-64 has a fixed-length stack frame, but it depends on whether
+ * or not FRED is enabled. Future versions of FRED might make this
+ * dynamic, but for now it is always 2 words longer.
*/
#ifdef CONFIG_X86_32
# ifdef CONFIG_VM86
@@ -39,8 +41,12 @@
# else
# define TOP_OF_KERNEL_STACK_PADDING 8
# endif
-#else
-# define TOP_OF_KERNEL_STACK_PADDING 0
+#else /* x86-64 */
+# ifdef CONFIG_X86_FRED
+# define TOP_OF_KERNEL_STACK_PADDING (2 * 8)
+# else
+# define TOP_OF_KERNEL_STACK_PADDING 0
+# endif
#endif
/*
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 5f87f6b9b09e..abe3a8f22cbd 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -102,6 +102,35 @@ static inline void setup_node_to_cpumask_map(void) { }
#include <asm-generic/topology.h>
+/* Topology information */
+enum x86_topology_domains {
+ TOPO_SMT_DOMAIN,
+ TOPO_CORE_DOMAIN,
+ TOPO_MODULE_DOMAIN,
+ TOPO_TILE_DOMAIN,
+ TOPO_DIE_DOMAIN,
+ TOPO_DIEGRP_DOMAIN,
+ TOPO_PKG_DOMAIN,
+ TOPO_MAX_DOMAIN,
+};
+
+struct x86_topology_system {
+ unsigned int dom_shifts[TOPO_MAX_DOMAIN];
+ unsigned int dom_size[TOPO_MAX_DOMAIN];
+};
+
+extern struct x86_topology_system x86_topo_system;
+
+static inline unsigned int topology_get_domain_size(enum x86_topology_domains dom)
+{
+ return x86_topo_system.dom_size[dom];
+}
+
+static inline unsigned int topology_get_domain_shift(enum x86_topology_domains dom)
+{
+ return dom == TOPO_SMT_DOMAIN ? 0 : x86_topo_system.dom_shifts[dom - 1];
+}
+
extern const struct cpumask *cpu_coregroup_mask(int cpu);
extern const struct cpumask *cpu_clustergroup_mask(int cpu);
@@ -112,7 +141,42 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
#define topology_core_id(cpu) (cpu_data(cpu).topo.core_id)
#define topology_ppin(cpu) (cpu_data(cpu).ppin)
-extern unsigned int __max_die_per_package;
+#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.amd_node_id)
+
+extern unsigned int __max_dies_per_package;
+extern unsigned int __max_logical_packages;
+extern unsigned int __max_threads_per_core;
+extern unsigned int __num_threads_per_package;
+extern unsigned int __num_cores_per_package;
+
+static inline unsigned int topology_max_packages(void)
+{
+ return __max_logical_packages;
+}
+
+static inline unsigned int topology_max_dies_per_package(void)
+{
+ return __max_dies_per_package;
+}
+
+static inline unsigned int topology_num_cores_per_package(void)
+{
+ return __num_cores_per_package;
+}
+
+static inline unsigned int topology_num_threads_per_package(void)
+{
+ return __num_threads_per_package;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level);
+#else
+static inline int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
+{
+ return 0;
+}
+#endif
#ifdef CONFIG_SMP
#define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id)
@@ -121,12 +185,11 @@ extern unsigned int __max_die_per_package;
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
-extern unsigned int __max_logical_packages;
-#define topology_max_packages() (__max_logical_packages)
-static inline int topology_max_die_per_package(void)
+static inline int topology_phys_to_logical_pkg(unsigned int pkg)
{
- return __max_die_per_package;
+ return topology_get_logical_id(pkg << x86_topo_system.dom_shifts[TOPO_PKG_DOMAIN],
+ TOPO_PKG_DOMAIN);
}
extern int __max_smt_threads;
@@ -138,9 +201,12 @@ static inline int topology_max_smt_threads(void)
#include <linux/cpu_smt.h>
-int topology_update_package_map(unsigned int apicid, unsigned int cpu);
-int topology_update_die_map(unsigned int dieid, unsigned int cpu);
-int topology_phys_to_logical_pkg(unsigned int pkg);
+extern unsigned int __amd_nodes_per_pkg;
+
+static inline unsigned int topology_amd_nodes_per_pkg(void)
+{
+ return __amd_nodes_per_pkg;
+}
extern struct cpumask __cpu_primary_thread_mask;
#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
@@ -153,16 +219,12 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
}
+
#else /* CONFIG_SMP */
-#define topology_max_packages() (1)
-static inline int
-topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
-static inline int
-topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_max_die_per_package(void) { return 1; }
static inline int topology_max_smt_threads(void) { return 1; }
static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
+static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
#endif /* !CONFIG_SMP */
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h
index afa524325e55..a23a7b707b64 100644
--- a/arch/x86/include/asm/trap_pf.h
+++ b/arch/x86/include/asm/trap_pf.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_TRAP_PF_H
#define _ASM_X86_TRAP_PF_H
+#include <linux/bits.h>
+
/*
* Page fault error code bits:
*
@@ -13,16 +15,18 @@
* bit 5 == 1: protection keys block access
* bit 6 == 1: shadow stack access fault
* bit 15 == 1: SGX MMU page-fault
+ * bit 31 == 1: fault was due to RMP violation
*/
enum x86_pf_error_code {
- X86_PF_PROT = 1 << 0,
- X86_PF_WRITE = 1 << 1,
- X86_PF_USER = 1 << 2,
- X86_PF_RSVD = 1 << 3,
- X86_PF_INSTR = 1 << 4,
- X86_PF_PK = 1 << 5,
- X86_PF_SHSTK = 1 << 6,
- X86_PF_SGX = 1 << 15,
+ X86_PF_PROT = BIT(0),
+ X86_PF_WRITE = BIT(1),
+ X86_PF_USER = BIT(2),
+ X86_PF_RSVD = BIT(3),
+ X86_PF_INSTR = BIT(4),
+ X86_PF_PK = BIT(5),
+ X86_PF_SHSTK = BIT(6),
+ X86_PF_SGX = BIT(15),
+ X86_PF_RMP = BIT(31),
};
#endif /* _ASM_X86_TRAP_PF_H */
diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h
index f5d2325aa0b7..8d1154cdf787 100644
--- a/arch/x86/include/asm/trapnr.h
+++ b/arch/x86/include/asm/trapnr.h
@@ -2,6 +2,18 @@
#ifndef _ASM_X86_TRAPNR_H
#define _ASM_X86_TRAPNR_H
+/*
+ * Event type codes used by FRED, Intel VT-x and AMD SVM
+ */
+#define EVENT_TYPE_EXTINT 0 // External interrupt
+#define EVENT_TYPE_RESERVED 1
+#define EVENT_TYPE_NMI 2 // NMI
+#define EVENT_TYPE_HWEXC 3 // Hardware originated traps, exceptions
+#define EVENT_TYPE_SWINT 4 // INT n
+#define EVENT_TYPE_PRIV_SWEXC 5 // INT1
+#define EVENT_TYPE_SWEXC 6 // INTO, INT3
+#define EVENT_TYPE_OTHER 7 // FRED SYSCALL/SYSENTER, VT-x MTF
+
/* Interrupts/Exceptions */
#define X86_TRAP_DE 0 /* Divide-by-zero */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 594fce0ca744..405efb3e4996 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -5,8 +5,9 @@
#ifndef _ASM_X86_TSC_H
#define _ASM_X86_TSC_H
-#include <asm/processor.h>
#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
/*
* Standard way to access the cycle counter.
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5c367c1290c3..237dc8cdd12b 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -133,7 +133,7 @@ extern int __get_user_bad(void);
#ifdef CONFIG_X86_32
#define __put_user_goto_u64(x, addr, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: movl %%eax,0(%1)\n" \
"2: movl %%edx,4(%1)\n" \
_ASM_EXTABLE_UA(1b, %l2) \
@@ -295,7 +295,7 @@ do { \
} while (0)
#define __get_user_asm(x, addr, itype, ltype, label) \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: mov"itype" %[umem],%[output]\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: [output] ltype(x) \
@@ -375,7 +375,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@@ -394,7 +394,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@@ -477,7 +477,7 @@ struct __large_struct { unsigned long buf[100]; };
* aliasing issues.
*/
#define __put_user_goto(x, addr, itype, ltype, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: mov"itype" %0,%1\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: : ltype(x), "m" (__m(addr)) \
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index f2c02e4469cc..04789f45ab2b 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -11,6 +11,7 @@
#include <asm/alternative.h>
#include <asm/cpufeatures.h>
#include <asm/page.h>
+#include <asm/percpu.h>
#ifdef CONFIG_ADDRESS_MASKING
/*
@@ -18,14 +19,10 @@
*/
static inline unsigned long __untagged_addr(unsigned long addr)
{
- /*
- * Refer tlbstate_untag_mask directly to avoid RIP-relative relocation
- * in alternative instructions. The relocation gets wrong when gets
- * copied to the target place.
- */
asm (ALTERNATIVE("",
- "and %%gs:tlbstate_untag_mask, %[addr]\n\t", X86_FEATURE_LAM)
- : [addr] "+r" (addr) : "m" (tlbstate_untag_mask));
+ "and " __percpu_arg([mask]) ", %[addr]", X86_FEATURE_LAM)
+ : [addr] "+r" (addr)
+ : [mask] "m" (__my_cpu_var(tlbstate_untag_mask)));
return addr;
}
@@ -54,7 +51,7 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
* half and a user half. When cast to a signed type, user pointers
* are positive and kernel pointers are negative.
*/
-#define valid_user_address(x) ((long)(x) >= 0)
+#define valid_user_address(x) ((__force long)(x) >= 0)
/*
* User pointers can have tag bits on x86-64. This scheme tolerates
@@ -87,8 +84,9 @@ static inline bool __access_ok(const void __user *ptr, unsigned long size)
if (__builtin_constant_p(size <= PAGE_SIZE) && size <= PAGE_SIZE) {
return valid_user_address(ptr);
} else {
- unsigned long sum = size + (unsigned long)ptr;
- return valid_user_address(sum) && sum >= (unsigned long)ptr;
+ unsigned long sum = size + (__force unsigned long)ptr;
+
+ return valid_user_address(sum) && sum >= (__force unsigned long)ptr;
}
}
#define __access_ok __access_ok
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 0e73616b82f3..4dba17363008 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <uapi/asm/vmx.h>
+#include <asm/trapnr.h>
#include <asm/vmxfeatures.h>
#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f)
@@ -374,14 +375,14 @@ enum vmcs_field {
#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK
#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
-#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
-#define INTR_TYPE_RESERVED (1 << 8) /* reserved */
-#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
-#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
-#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
-#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */
-#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
-#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */
+#define INTR_TYPE_EXT_INTR (EVENT_TYPE_EXTINT << 8) /* external interrupt */
+#define INTR_TYPE_RESERVED (EVENT_TYPE_RESERVED << 8) /* reserved */
+#define INTR_TYPE_NMI_INTR (EVENT_TYPE_NMI << 8) /* NMI */
+#define INTR_TYPE_HARD_EXCEPTION (EVENT_TYPE_HWEXC << 8) /* processor exception */
+#define INTR_TYPE_SOFT_INTR (EVENT_TYPE_SWINT << 8) /* software interrupt */
+#define INTR_TYPE_PRIV_SW_EXCEPTION (EVENT_TYPE_PRIV_SWEXC << 8) /* ICE breakpoint */
+#define INTR_TYPE_SOFT_EXCEPTION (EVENT_TYPE_SWEXC << 8) /* software exception */
+#define INTR_TYPE_OTHER_EVENT (EVENT_TYPE_OTHER << 8) /* other event */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define GUEST_INTR_STATE_STI 0x00000001
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index ab60a71a8dcb..472f0263dbc6 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -4,6 +4,7 @@
#include <linux/seqlock.h>
#include <uapi/asm/vsyscall.h>
+#include <asm/page_types.h>
#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
@@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code,
}
#endif
+/*
+ * The (legacy) vsyscall page is the long page in the kernel portion
+ * of the address space that has user-accessible permissions.
+ */
+static inline bool is_vsyscall_vaddr(unsigned long vaddr)
+{
+ return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
+}
+
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index 46b4f1f7f354..e8d7d4941c4c 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -2,7 +2,8 @@
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
/*
* This is largely generic for little-endian machines, but the
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index c878616a18b8..b89b40f250e6 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -2,8 +2,6 @@
#ifndef _ASM_X86_PLATFORM_H
#define _ASM_X86_PLATFORM_H
-#include <asm/bootparam.h>
-
struct ghcb;
struct mpc_bus;
struct mpc_cpu;
@@ -15,13 +13,15 @@ struct irq_domain;
/**
* struct x86_init_mpparse - platform specific mpparse ops
* @setup_ioapic_ids: platform specific ioapic id override
- * @find_smp_config: find the smp configuration
- * @get_smp_config: get the smp configuration
+ * @find_mptable: Find MPTABLE early to reserve the memory region
+ * @early_parse_smp_cfg: Parse the SMP configuration data early before initmem_init()
+ * @parse_smp_cfg: Parse the SMP configuration data
*/
struct x86_init_mpparse {
void (*setup_ioapic_ids)(void);
- void (*find_smp_config)(void);
- void (*get_smp_config)(unsigned int early);
+ void (*find_mptable)(void);
+ void (*early_parse_smp_cfg)(void);
+ void (*parse_smp_cfg)(void);
};
/**
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 01d19fc22346..9b82eebd7add 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -2,21 +2,7 @@
#ifndef _ASM_X86_BOOTPARAM_H
#define _ASM_X86_BOOTPARAM_H
-/* setup_data/setup_indirect types */
-#define SETUP_NONE 0
-#define SETUP_E820_EXT 1
-#define SETUP_DTB 2
-#define SETUP_PCI 3
-#define SETUP_EFI 4
-#define SETUP_APPLE_PROPERTIES 5
-#define SETUP_JAILHOUSE 6
-#define SETUP_CC_BLOB 7
-#define SETUP_IMA 8
-#define SETUP_RNG_SEED 9
-#define SETUP_ENUM_MAX SETUP_RNG_SEED
-
-#define SETUP_INDIRECT (1<<31)
-#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
+#include <asm/setup_data.h>
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
@@ -38,6 +24,7 @@
#define XLF_EFI_KEXEC (1<<4)
#define XLF_5LEVEL (1<<5)
#define XLF_5LEVEL_ENABLED (1<<6)
+#define XLF_MEM_ENCRYPTION (1<<7)
#ifndef __ASSEMBLY__
@@ -48,22 +35,6 @@
#include <asm/ist.h>
#include <video/edid.h>
-/* extensible setup data list node */
-struct setup_data {
- __u64 next;
- __u32 type;
- __u32 len;
- __u8 data[];
-};
-
-/* extensible setup indirect data node */
-struct setup_indirect {
- __u32 type;
- __u32 reserved; /* Reserved, must be set to zero. */
- __u64 len;
- __u64 addr;
-};
-
struct setup_header {
__u8 setup_sects;
__u16 root_flags;
@@ -137,50 +108,10 @@ struct efi_info {
#define E820_MAX_ENTRIES_ZEROPAGE 128
/*
- * The E820 memory region entry of the boot protocol ABI:
- */
-struct boot_e820_entry {
- __u64 addr;
- __u64 size;
- __u32 type;
-} __attribute__((packed));
-
-/*
* Smallest compatible version of jailhouse_setup_data required by this kernel.
*/
#define JAILHOUSE_SETUP_REQUIRED_VERSION 1
-/*
- * The boot loader is passing platform information via this Jailhouse-specific
- * setup data structure.
- */
-struct jailhouse_setup_data {
- struct {
- __u16 version;
- __u16 compatible_version;
- } __attribute__((packed)) hdr;
- struct {
- __u16 pm_timer_address;
- __u16 num_cpus;
- __u64 pci_mmconfig_base;
- __u32 tsc_khz;
- __u32 apic_khz;
- __u8 standard_ioapic;
- __u8 cpu_ids[255];
- } __attribute__((packed)) v1;
- struct {
- __u32 flags;
- } __attribute__((packed)) v2;
-} __attribute__((packed));
-
-/*
- * IMA buffer setup data information from the previous kernel during kexec
- */
-struct ima_setup_data {
- __u64 addr;
- __u64 size;
-} __attribute__((packed));
-
/* The so-called "zeropage" */
struct boot_params {
struct screen_info screen_info; /* 0x000 */
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index d898432947ff..f1a4adc78272 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -139,6 +139,13 @@
#define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */
#define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT)
+#ifdef __x86_64__
+#define X86_CR4_FRED_BIT 32 /* enable FRED kernel entry */
+#define X86_CR4_FRED _BITUL(X86_CR4_FRED_BIT)
+#else
+#define X86_CR4_FRED (0)
+#endif
+
/*
* x86-64 Task Priority Register, CR8
*/
diff --git a/arch/x86/include/uapi/asm/setup_data.h b/arch/x86/include/uapi/asm/setup_data.h
new file mode 100644
index 000000000000..b111b0c18544
--- /dev/null
+++ b/arch/x86/include/uapi/asm/setup_data.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_SETUP_DATA_H
+#define _UAPI_ASM_X86_SETUP_DATA_H
+
+/* setup_data/setup_indirect types */
+#define SETUP_NONE 0
+#define SETUP_E820_EXT 1
+#define SETUP_DTB 2
+#define SETUP_PCI 3
+#define SETUP_EFI 4
+#define SETUP_APPLE_PROPERTIES 5
+#define SETUP_JAILHOUSE 6
+#define SETUP_CC_BLOB 7
+#define SETUP_IMA 8
+#define SETUP_RNG_SEED 9
+#define SETUP_ENUM_MAX SETUP_RNG_SEED
+
+#define SETUP_INDIRECT (1<<31)
+#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+/* extensible setup data list node */
+struct setup_data {
+ __u64 next;
+ __u32 type;
+ __u32 len;
+ __u8 data[];
+};
+
+/* extensible setup indirect data node */
+struct setup_indirect {
+ __u32 type;
+ __u32 reserved; /* Reserved, must be set to zero. */
+ __u64 len;
+ __u64 addr;
+};
+
+/*
+ * The E820 memory region entry of the boot protocol ABI:
+ */
+struct boot_e820_entry {
+ __u64 addr;
+ __u64 size;
+ __u32 type;
+} __attribute__((packed));
+
+/*
+ * The boot loader is passing platform information via this Jailhouse-specific
+ * setup data structure.
+ */
+struct jailhouse_setup_data {
+ struct {
+ __u16 version;
+ __u16 compatible_version;
+ } __attribute__((packed)) hdr;
+ struct {
+ __u16 pm_timer_address;
+ __u16 num_cpus;
+ __u64 pci_mmconfig_base;
+ __u32 tsc_khz;
+ __u32 apic_khz;
+ __u8 standard_ioapic;
+ __u8 cpu_ids[255];
+ } __attribute__((packed)) v1;
+ struct {
+ __u32 flags;
+ } __attribute__((packed)) v2;
+} __attribute__((packed));
+
+/*
+ * IMA buffer setup data information from the previous kernel during kexec
+ */
+struct ima_setup_data {
+ __u64 addr;
+ __u64 size;
+} __attribute__((packed));
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0000325ab98f..d0c744cb2a0e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -33,6 +33,7 @@ KASAN_SANITIZE_sev.o := n
KCSAN_SANITIZE := n
KMSAN_SANITIZE_head$(BITS).o := n
KMSAN_SANITIZE_nmi.o := n
+KMSAN_SANITIZE_sev.o := n
# If instrumentation of the following files is enabled, boot hangs during
# first second.
@@ -48,6 +49,7 @@ obj-y += platform-quirks.o
obj-y += process_$(BITS).o signal.o signal_$(BITS).o
obj-y += traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o dumpstack.o nmi.o
+obj-$(CONFIG_X86_FRED) += fred.o
obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
obj-$(CONFIG_X86_KERNEL_IBT) += ibt_selftest.o
obj-y += setup.o x86_init.o i8259.o irqinit.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 85a3ce2a3666..4bf82dbd2a6b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -164,35 +164,6 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
return 0;
}
-/**
- * acpi_register_lapic - register a local apic and generates a logic cpu number
- * @id: local apic id to register
- * @acpiid: ACPI id to register
- * @enabled: this cpu is enabled or not
- *
- * Returns the logic cpu number which maps to the local apic
- */
-static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
-{
- int cpu;
-
- if (id >= MAX_LOCAL_APIC) {
- pr_info("skipped apicid that is too big\n");
- return -EINVAL;
- }
-
- if (!enabled) {
- ++disabled_cpus;
- return -EINVAL;
- }
-
- cpu = generic_processor_info(id);
- if (cpu >= 0)
- early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
-
- return cpu;
-}
-
static bool __init acpi_is_processor_usable(u32 lapic_flags)
{
if (lapic_flags & ACPI_MADT_ENABLED)
@@ -254,7 +225,7 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
return 0;
}
- acpi_register_lapic(apic_id, processor->uid, enabled);
+ topology_register_apic(apic_id, processor->uid, enabled);
#else
pr_warn("x2apic entry ignored\n");
#endif
@@ -289,9 +260,9 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
* to not preallocating memory for all NR_CPUS
* when we use CPU hotplug.
*/
- acpi_register_lapic(processor->id, /* APIC ID */
- processor->processor_id, /* ACPI ID */
- processor->lapic_flags & ACPI_MADT_ENABLED);
+ topology_register_apic(processor->id, /* APIC ID */
+ processor->processor_id, /* ACPI ID */
+ processor->lapic_flags & ACPI_MADT_ENABLED);
has_lapic_cpus = true;
return 0;
@@ -309,9 +280,9 @@ acpi_parse_sapic(union acpi_subtable_headers *header, const unsigned long end)
acpi_table_print_madt_entry(&header->common);
- acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */
- processor->processor_id, /* ACPI ID */
- processor->lapic_flags & ACPI_MADT_ENABLED);
+ topology_register_apic((processor->id << 8) | processor->eid,/* APIC ID */
+ processor->processor_id, /* ACPI ID */
+ processor->lapic_flags & ACPI_MADT_ENABLED);
return 0;
}
@@ -844,12 +815,10 @@ static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
return 0;
}
-int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
- int *pcpu)
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu)
{
- int cpu;
+ int cpu = topology_hotplug_apic(physid, acpi_id);
- cpu = acpi_register_lapic(physid, acpi_id, ACPI_MADT_ENABLED);
if (cpu < 0) {
pr_info("Unable to map lapic to logical cpu number\n");
return cpu;
@@ -868,15 +837,11 @@ int acpi_unmap_cpu(int cpu)
#ifdef CONFIG_ACPI_NUMA
set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE);
#endif
-
- per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
- set_cpu_present(cpu, false);
- num_processors--;
-
- return (0);
+ topology_hotunplug_apic(cpu);
+ return 0;
}
EXPORT_SYMBOL(acpi_unmap_cpu);
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
{
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index d5d8a352eafa..94ff83f3d3fe 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -17,7 +17,7 @@
* Hooray, we are in Long 64-bit mode (but still running in low memory)
*/
SYM_FUNC_START(wakeup_long64)
- movq saved_magic, %rax
+ movq saved_magic(%rip), %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
je 2f
@@ -33,14 +33,14 @@ SYM_FUNC_START(wakeup_long64)
movw %ax, %es
movw %ax, %fs
movw %ax, %gs
- movq saved_rsp, %rsp
+ movq saved_rsp(%rip), %rsp
- movq saved_rbx, %rbx
- movq saved_rdi, %rdi
- movq saved_rsi, %rsi
- movq saved_rbp, %rbp
+ movq saved_rbx(%rip), %rbx
+ movq saved_rdi(%rip), %rdi
+ movq saved_rsi(%rip), %rsi
+ movq saved_rbp(%rip), %rbp
- movq saved_rip, %rax
+ movq saved_rip(%rip), %rax
ANNOTATE_RETPOLINE_SAFE
jmp *%rax
SYM_FUNC_END(wakeup_long64)
@@ -72,11 +72,11 @@ SYM_FUNC_START(do_suspend_lowlevel)
movq $.Lresume_point, saved_rip(%rip)
- movq %rsp, saved_rsp
- movq %rbp, saved_rbp
- movq %rbx, saved_rbx
- movq %rdi, saved_rdi
- movq %rsi, saved_rsi
+ movq %rsp, saved_rsp(%rip)
+ movq %rbp, saved_rbp(%rip)
+ movq %rbx, saved_rbx(%rip)
+ movq %rdi, saved_rdi(%rip)
+ movq %rsi, saved_rsi(%rip)
addq $8, %rsp
movl $3, %edi
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index cc130b57542a..ff6e32ec8259 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -45,7 +45,7 @@ EXPORT_SYMBOL_GPL(alternatives_patched);
#define DA_ENDBR 0x08
#define DA_SMP 0x10
-static unsigned int __initdata_or_module debug_alternative;
+static unsigned int debug_alternative;
static int __init debug_alt(char *str)
{
@@ -133,7 +133,7 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
* each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and
* *jump* over instead of executing long and daft NOPs.
*/
-static void __init_or_module add_nop(u8 *instr, unsigned int len)
+static void add_nop(u8 *instr, unsigned int len)
{
u8 *target = instr + len;
@@ -206,7 +206,7 @@ static int skip_nops(u8 *instr, int offset, int len)
* Optimize a sequence of NOPs, possibly preceded by an unconditional jump
* to the end of the NOP sequence into a single NOP.
*/
-static bool __init_or_module
+static bool
__optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target)
{
int i = *next - insn->length;
@@ -335,8 +335,7 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len)
return (target < src || target > src + src_len);
}
-static void __init_or_module noinline
-apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
+void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
{
int prev, target = 0;
@@ -403,7 +402,7 @@ noinstr void BUG_func(void)
{
BUG();
}
-EXPORT_SYMBOL_GPL(BUG_func);
+EXPORT_SYMBOL(BUG_func);
#define CALL_RIP_REL_OPCODE 0xff
#define CALL_RIP_REL_MODRM 0x15
@@ -545,7 +544,7 @@ static inline bool is_jcc32(struct insn *insn)
return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80;
}
-#if defined(CONFIG_RETPOLINE) && defined(CONFIG_OBJTOOL)
+#if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_OBJTOOL)
/*
* CALL/JMP *%\reg
@@ -709,8 +708,8 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
/*
* The compiler is supposed to EMIT an INT3 after every unconditional
* JMP instruction due to AMD BTC. However, if the compiler is too old
- * or SLS isn't enabled, we still need an INT3 after indirect JMPs
- * even on Intel.
+ * or MITIGATION_SLS isn't enabled, we still need an INT3 after
+ * indirect JMPs even on Intel.
*/
if (op == JMP32_INSN_OPCODE && i < insn->length)
bytes[i++] = INT3_INSN_OPCODE;
@@ -770,7 +769,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
}
}
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_MITIGATION_RETHUNK
/*
* Rewrite the compiler generated return thunk tail-calls.
@@ -843,14 +842,14 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
}
#else
void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
-#endif /* CONFIG_RETHUNK */
+#endif /* CONFIG_MITIGATION_RETHUNK */
-#else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */
+#else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
-#endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */
+#endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */
#ifdef CONFIG_X86_KERNEL_IBT
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 053f6dcc6b2c..5bf5f9fc5753 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -386,7 +386,7 @@ struct resource *amd_get_mmconfig_range(struct resource *res)
int amd_get_subcaches(int cpu)
{
- struct pci_dev *link = node_to_amd_nb(topology_die_id(cpu))->link;
+ struct pci_dev *link = node_to_amd_nb(topology_amd_node_id(cpu))->link;
unsigned int mask;
if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
@@ -400,7 +400,7 @@ int amd_get_subcaches(int cpu)
int amd_set_subcaches(int cpu, unsigned long mask)
{
static unsigned int reset, ban;
- struct amd_northbridge *nb = node_to_amd_nb(topology_die_id(cpu));
+ struct amd_northbridge *nb = node_to_amd_nb(topology_amd_node_id(cpu));
unsigned int reg;
int cuid;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4667bc4b00ab..a42d8a6f7149 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -19,6 +19,7 @@
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/acpi_pmtmr.h>
+#include <linux/bitmap.h>
#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/memblock.h>
@@ -67,10 +68,6 @@
#include "local.h"
-unsigned int num_processors;
-
-unsigned disabled_cpus;
-
/* Processor that is doing the boot up */
u32 boot_cpu_physical_apicid __ro_after_init = BAD_APICID;
EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
@@ -78,18 +75,6 @@ EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
u8 boot_cpu_apic_version __ro_after_init;
/*
- * Bitmask of physically existing CPUs:
- */
-physid_mask_t phys_cpu_present_map;
-
-/*
- * Processor to be disabled specified by kernel parameter
- * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
- * avoid undefined behaviour caused by sending INIT from AP to BSP.
- */
-static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID;
-
-/*
* This variable controls which CPUs receive external NMIs. By default,
* external NMIs are delivered only to the BSP.
*/
@@ -108,14 +93,6 @@ static inline bool apic_accessible(void)
return x2apic_mode || apic_mmio_base;
}
-/*
- * Map cpu index to physical APIC ID
- */
-DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID);
-DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
-
#ifdef CONFIG_X86_32
/* Local APIC was disabled by the BIOS and enabled by the kernel */
static int enabled_via_apicbase __ro_after_init;
@@ -261,16 +238,6 @@ u64 native_apic_icr_read(void)
return icr1 | ((u64)icr2 << 32);
}
-#ifdef CONFIG_X86_32
-/**
- * get_physical_broadcast - Get number of physical broadcast IDs
- */
-int get_physical_broadcast(void)
-{
- return modern_apic() ? 0xff : 0xf;
-}
-#endif
-
/**
* lapic_get_maxlvt - get the maximum number of local vector table entries
*/
@@ -1549,9 +1516,6 @@ static void setup_local_APIC(void)
apic_write(APIC_ESR, 0);
}
#endif
- /* Validate that the APIC is registered if required */
- BUG_ON(apic->apic_id_registered && !apic->apic_id_registered());
-
/*
* Intel recommends to set DFR, LDR and TPR before enabling
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
@@ -1690,8 +1654,6 @@ void apic_ap_setup(void)
end_local_APIC_setup();
}
-static __init void cpu_set_boot_apic(void);
-
static __init void apic_read_boot_cpu_id(bool x2apic)
{
/*
@@ -1706,7 +1668,8 @@ static __init void apic_read_boot_cpu_id(bool x2apic)
boot_cpu_physical_apicid = read_apic_id();
boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
}
- cpu_set_boot_apic();
+ topology_register_boot_apic(boot_cpu_physical_apicid);
+ x86_32_probe_bigsmp_early();
}
#ifdef CONFIG_X86_X2APIC
@@ -2091,7 +2054,6 @@ void __init init_apic_mappings(void)
pr_info("APIC: disable apic facility\n");
apic_disable();
}
- num_processors = 1;
}
}
@@ -2305,155 +2267,6 @@ void disconnect_bsp_APIC(int virt_wire_setup)
apic_write(APIC_LVT1, value);
}
-/*
- * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
- * contiguously, it equals to current allocated max logical CPU ID plus 1.
- * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range,
- * so the maximum of nr_logical_cpuids is nr_cpu_ids.
- *
- * NOTE: Reserve 0 for BSP.
- */
-static int nr_logical_cpuids = 1;
-
-/*
- * Used to store mapping between logical CPU IDs and APIC IDs.
- */
-u32 cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = BAD_APICID, };
-
-bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
-{
- return phys_id == (u64)cpuid_to_apicid[cpu];
-}
-
-#ifdef CONFIG_SMP
-static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
-{
- /* Isolate the SMT bit(s) in the APICID and check for 0 */
- u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
-
- if (smp_num_siblings == 1 || !(apicid & mask))
- cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
-}
-
-/*
- * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid
- * during early boot. Initialize the primary thread mask before SMP
- * bringup.
- */
-static int __init smp_init_primary_thread_mask(void)
-{
- unsigned int cpu;
-
- /*
- * XEN/PV provides either none or useless topology information.
- * Pretend that all vCPUs are primary threads.
- */
- if (xen_pv_domain()) {
- cpumask_copy(&__cpu_primary_thread_mask, cpu_possible_mask);
- return 0;
- }
-
- for (cpu = 0; cpu < nr_logical_cpuids; cpu++)
- cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]);
- return 0;
-}
-early_initcall(smp_init_primary_thread_mask);
-#else
-static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
-#endif
-
-/*
- * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
- * and cpuid_to_apicid[] synchronized.
- */
-static int allocate_logical_cpuid(int apicid)
-{
- int i;
-
- /*
- * cpuid <-> apicid mapping is persistent, so when a cpu is up,
- * check if the kernel has allocated a cpuid for it.
- */
- for (i = 0; i < nr_logical_cpuids; i++) {
- if (cpuid_to_apicid[i] == apicid)
- return i;
- }
-
- /* Allocate a new cpuid. */
- if (nr_logical_cpuids >= nr_cpu_ids) {
- WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. "
- "Processor %d/0x%x and the rest are ignored.\n",
- nr_cpu_ids, nr_logical_cpuids, apicid);
- return -EINVAL;
- }
-
- cpuid_to_apicid[nr_logical_cpuids] = apicid;
- return nr_logical_cpuids++;
-}
-
-static void cpu_update_apic(int cpu, u32 apicid)
-{
-#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
- early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-#endif
- set_cpu_possible(cpu, true);
- physid_set(apicid, phys_cpu_present_map);
- set_cpu_present(cpu, true);
- num_processors++;
-
- if (system_state != SYSTEM_BOOTING)
- cpu_mark_primary_thread(cpu, apicid);
-}
-
-static __init void cpu_set_boot_apic(void)
-{
- cpuid_to_apicid[0] = boot_cpu_physical_apicid;
- cpu_update_apic(0, boot_cpu_physical_apicid);
- x86_32_probe_bigsmp_early();
-}
-
-int generic_processor_info(int apicid)
-{
- int cpu, max = nr_cpu_ids;
-
- /* The boot CPU must be set before MADT/MPTABLE parsing happens */
- if (cpuid_to_apicid[0] == BAD_APICID)
- panic("Boot CPU APIC not registered yet\n");
-
- if (apicid == boot_cpu_physical_apicid)
- return 0;
-
- if (disabled_cpu_apicid == apicid) {
- int thiscpu = num_processors + disabled_cpus;
-
- pr_warn("APIC: Disabling requested cpu. Processor %d/0x%x ignored.\n",
- thiscpu, apicid);
-
- disabled_cpus++;
- return -ENODEV;
- }
-
- if (num_processors >= nr_cpu_ids) {
- int thiscpu = max + disabled_cpus;
-
- pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. "
- "Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
-
- disabled_cpus++;
- return -EINVAL;
- }
-
- cpu = allocate_logical_cpuid(apicid);
- if (cpu < 0) {
- disabled_cpus++;
- return -EINVAL;
- }
-
- cpu_update_apic(cpu, apicid);
- return cpu;
-}
-
-
void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
bool dmar)
{
@@ -2496,10 +2309,7 @@ EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid);
static void __init apic_bsp_up_setup(void)
{
-#ifdef CONFIG_X86_64
- apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid));
-#endif
- physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
+ reset_phys_cpu_present_map(boot_cpu_physical_apicid);
}
/**
@@ -2845,15 +2655,6 @@ static int __init lapic_insert_resource(void)
*/
late_initcall(lapic_insert_resource);
-static int __init apic_set_disabled_cpu_apicid(char *arg)
-{
- if (!arg || !get_option(&arg, &disabled_cpu_apicid))
- return -EINVAL;
-
- return 0;
-}
-early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
-
static int __init apic_set_extnmi(char *arg)
{
if (!arg)
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
index 8a00141073ea..9ef3be866832 100644
--- a/arch/x86/kernel/apic/apic_common.c
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -18,16 +18,6 @@ u32 apic_flat_calc_apicid(unsigned int cpu)
return 1U << cpu;
}
-bool default_check_apicid_used(physid_mask_t *map, u32 apicid)
-{
- return physid_isset(apicid, *map);
-}
-
-void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
-{
- *retmap = *phys_map;
-}
-
u32 default_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
@@ -37,11 +27,6 @@ u32 default_cpu_present_to_apicid(int mps_cpu)
}
EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid);
-bool default_apic_id_registered(void)
-{
- return physid_isset(read_apic_id(), phys_cpu_present_map);
-}
-
/*
* Set up the logical destination ID when the APIC operates in logical
* destination mode.
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index b295a056a4fc..f37ad3392fec 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -61,16 +61,6 @@ static u32 flat_get_apic_id(u32 x)
return (x >> 24) & 0xFF;
}
-static u32 set_apic_id(u32 id)
-{
- return (id & 0xFF) << 24;
-}
-
-static u32 flat_phys_pkg_id(u32 initial_apic_id, int index_msb)
-{
- return initial_apic_id >> index_msb;
-}
-
static int flat_probe(void)
{
return 1;
@@ -80,7 +70,6 @@ static struct apic apic_flat __ro_after_init = {
.name = "flat",
.probe = flat_probe,
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
- .apic_id_registered = default_apic_id_registered,
.dest_mode_logical = true,
@@ -88,11 +77,9 @@ static struct apic apic_flat __ro_after_init = {
.init_apic_ldr = default_init_apic_ldr,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .phys_pkg_id = flat_phys_pkg_id,
.max_apic_id = 0xFE,
.get_apic_id = flat_get_apic_id,
- .set_apic_id = set_apic_id,
.calc_dest_apicid = apic_flat_calc_apicid,
@@ -151,18 +138,15 @@ static struct apic apic_physflat __ro_after_init = {
.name = "physical flat",
.probe = physflat_probe,
.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
- .apic_id_registered = default_apic_id_registered,
.dest_mode_logical = false,
.disable_esr = 0,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .phys_pkg_id = flat_phys_pkg_id,
.max_apic_id = 0xFE,
.get_apic_id = flat_get_apic_id,
- .set_apic_id = set_apic_id,
.calc_dest_apicid = apic_default_calc_apicid,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 9f1d553eb48f..b5bb7a2e8340 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -29,7 +29,6 @@ static void noop_send_IPI_self(int vector) { }
static void noop_apic_icr_write(u32 low, u32 id) { }
static int noop_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) { return -1; }
static u64 noop_apic_icr_read(void) { return 0; }
-static u32 noop_phys_pkg_id(u32 cpuid_apic, int index_msb) { return 0; }
static u32 noop_get_apic_id(u32 apicid) { return 0; }
static void noop_apic_eoi(void) { }
@@ -51,12 +50,8 @@ struct apic apic_noop __ro_after_init = {
.disable_esr = 0,
- .check_apicid_used = default_check_apicid_used,
- .ioapic_phys_id_map = default_ioapic_phys_id_map,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .phys_pkg_id = noop_phys_pkg_id,
-
.max_apic_id = 0xFE,
.get_apic_id = noop_get_apic_id,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 7d0c51b9d3bc..16410f087b7a 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -38,11 +38,6 @@ static u32 numachip1_get_apic_id(u32 x)
return id;
}
-static u32 numachip1_set_apic_id(u32 id)
-{
- return (id & 0xff) << 24;
-}
-
static u32 numachip2_get_apic_id(u32 x)
{
u64 mcfg;
@@ -51,16 +46,6 @@ static u32 numachip2_get_apic_id(u32 x)
return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24);
}
-static u32 numachip2_set_apic_id(u32 id)
-{
- return id << 24;
-}
-
-static u32 numachip_phys_pkg_id(u32 initial_apic_id, int index_msb)
-{
- return initial_apic_id >> index_msb;
-}
-
static void numachip1_apic_icr_write(int apicid, unsigned int val)
{
write_lcsr(CSR_G3_EXT_IRQ_GEN, (apicid << 16) | val);
@@ -227,11 +212,9 @@ static const struct apic apic_numachip1 __refconst = {
.disable_esr = 0,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .phys_pkg_id = numachip_phys_pkg_id,
.max_apic_id = UINT_MAX,
.get_apic_id = numachip1_get_apic_id,
- .set_apic_id = numachip1_set_apic_id,
.calc_dest_apicid = apic_default_calc_apicid,
@@ -263,11 +246,9 @@ static const struct apic apic_numachip2 __refconst = {
.disable_esr = 0,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .phys_pkg_id = numachip_phys_pkg_id,
.max_apic_id = UINT_MAX,
.get_apic_id = numachip2_get_apic_id,
- .set_apic_id = numachip2_set_apic_id,
.calc_dest_apicid = apic_default_calc_apicid,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 5a0d60b38e6b..9285d500d5b4 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -18,22 +18,6 @@ static u32 bigsmp_get_apic_id(u32 x)
return (x >> 24) & 0xFF;
}
-static bool bigsmp_check_apicid_used(physid_mask_t *map, u32 apicid)
-{
- return false;
-}
-
-static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
-{
- /* For clustered we don't have a good way to do this yet - hack */
- physids_promote(0xFFL, retmap);
-}
-
-static u32 bigsmp_phys_pkg_id(u32 cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
static void bigsmp_send_IPI_allbutself(int vector)
{
default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
@@ -84,14 +68,10 @@ static struct apic apic_bigsmp __ro_after_init = {
.disable_esr = 1,
- .check_apicid_used = bigsmp_check_apicid_used,
- .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .phys_pkg_id = bigsmp_phys_pkg_id,
.max_apic_id = 0xFE,
.get_apic_id = bigsmp_get_apic_id,
- .set_apic_id = NULL,
.calc_dest_apicid = apic_default_calc_apicid,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 40c7cf180c20..477b740b2f26 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1458,20 +1458,20 @@ void restore_boot_irq_mode(void)
*
* by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
*/
-void __init setup_ioapic_ids_from_mpc_nocheck(void)
+static void __init setup_ioapic_ids_from_mpc_nocheck(void)
{
+ DECLARE_BITMAP(phys_id_present_map, MAX_LOCAL_APIC);
+ const u32 broadcast_id = 0xF;
union IO_APIC_reg_00 reg_00;
- physid_mask_t phys_id_present_map;
- int ioapic_idx;
- int i;
unsigned char old_id;
unsigned long flags;
+ int ioapic_idx, i;
/*
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
*/
- apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
+ copy_phys_cpu_present_map(phys_id_present_map);
/*
* Set the IOAPIC ID to the value stored in the MPC table.
@@ -1484,11 +1484,10 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
old_id = mpc_ioapic_id(ioapic_idx);
- if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
- ioapic_idx, mpc_ioapic_id(ioapic_idx));
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- reg_00.bits.ID);
+ if (mpc_ioapic_id(ioapic_idx) >= broadcast_id) {
+ pr_err(FW_BUG "IO-APIC#%d ID is %d in the MPC table!...\n",
+ ioapic_idx, mpc_ioapic_id(ioapic_idx));
+ pr_err("... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID);
ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
}
@@ -1497,23 +1496,21 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
* system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
- if (apic->check_apicid_used(&phys_id_present_map,
- mpc_ioapic_id(ioapic_idx))) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
- ioapic_idx, mpc_ioapic_id(ioapic_idx));
- for (i = 0; i < get_physical_broadcast(); i++)
- if (!physid_isset(i, phys_id_present_map))
+ if (test_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) {
+ pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n",
+ ioapic_idx, mpc_ioapic_id(ioapic_idx));
+ for (i = 0; i < broadcast_id; i++)
+ if (!test_bit(i, phys_id_present_map))
break;
- if (i >= get_physical_broadcast())
+ if (i >= broadcast_id)
panic("Max APIC ID exceeded!\n");
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- i);
- physid_set(i, phys_id_present_map);
+ pr_err("... fixing up to %d. (tell your hw vendor)\n", i);
+ set_bit(i, phys_id_present_map);
ioapics[ioapic_idx].mp_config.apicid = i;
} else {
apic_printk(APIC_VERBOSE, "Setting %d in the phys_id_present_map\n",
mpc_ioapic_id(ioapic_idx));
- physid_set(mpc_ioapic_id(ioapic_idx), phys_id_present_map);
+ set_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map);
}
/*
@@ -2209,7 +2206,7 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
- panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
+ panic_if_irq_remap(FW_BUG "Timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
@@ -2354,7 +2351,7 @@ static int mp_irqdomain_create(int ioapic)
fwspec.param_count = 1;
fwspec.param[0] = mpc_ioapic_id(ioapic);
- parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_ANY);
+ parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_GENERIC_MSI);
if (!parent) {
if (!cfg->dev)
irq_domain_free_fwnode(fn);
@@ -2494,56 +2491,41 @@ unsigned int arch_dynirq_lower_bound(unsigned int from)
#ifdef CONFIG_X86_32
static int io_apic_get_unique_id(int ioapic, int apic_id)
{
+ static DECLARE_BITMAP(apic_id_map, MAX_LOCAL_APIC);
+ const u32 broadcast_id = 0xF;
union IO_APIC_reg_00 reg_00;
- static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
- physid_mask_t tmp;
unsigned long flags;
int i = 0;
- /*
- * The P4 platform supports up to 256 APIC IDs on two separate APIC
- * buses (one for LAPICs, one for IOAPICs), where predecessors only
- * supports up to 16 on one shared APIC bus.
- *
- * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
- * advantage of new APIC bus architecture.
- */
-
- if (physids_empty(apic_id_map))
- apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
+ /* Initialize the ID map */
+ if (bitmap_empty(apic_id_map, MAX_LOCAL_APIC))
+ copy_phys_cpu_present_map(apic_id_map);
raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic, 0);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
- if (apic_id >= get_physical_broadcast()) {
- printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
- "%d\n", ioapic, apic_id, reg_00.bits.ID);
+ if (apic_id >= broadcast_id) {
+ pr_warn("IOAPIC[%d]: Invalid apic_id %d, trying %d\n",
+ ioapic, apic_id, reg_00.bits.ID);
apic_id = reg_00.bits.ID;
}
- /*
- * Every APIC in a system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- */
- if (apic->check_apicid_used(&apic_id_map, apic_id)) {
-
- for (i = 0; i < get_physical_broadcast(); i++) {
- if (!apic->check_apicid_used(&apic_id_map, i))
+ /* Every APIC in a system must have a unique ID */
+ if (test_bit(apic_id, apic_id_map)) {
+ for (i = 0; i < broadcast_id; i++) {
+ if (!test_bit(i, apic_id_map))
break;
}
- if (i == get_physical_broadcast())
+ if (i == broadcast_id)
panic("Max apic_id exceeded!\n");
- printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
- "trying %d\n", ioapic, apic_id, i);
-
+ pr_warn("IOAPIC[%d]: apic_id %d already used, trying %d\n", ioapic, apic_id, i);
apic_id = i;
}
- physid_set_mask_of_physid(apic_id, &tmp);
- physids_or(apic_id_map, apic_id_map, tmp);
+ set_bit(apic_id, apic_id_map);
if (reg_00.bits.ID != apic_id) {
reg_00.bits.ID = apic_id;
@@ -2569,11 +2551,9 @@ static int io_apic_get_unique_id(int ioapic, int apic_id)
static u8 io_apic_unique_id(int idx, u8 id)
{
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- !APIC_XAPIC(boot_cpu_apic_version))
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && !APIC_XAPIC(boot_cpu_apic_version))
return io_apic_get_unique_id(idx, id);
- else
- return id;
+ return id;
}
#else
static u8 io_apic_unique_id(int idx, u8 id)
diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
index 9ea6186ea88c..842fe28496be 100644
--- a/arch/x86/kernel/apic/local.h
+++ b/arch/x86/kernel/apic/local.h
@@ -16,8 +16,6 @@
/* X2APIC */
void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
u32 x2apic_get_apic_id(u32 id);
-u32 x2apic_set_apic_id(u32 id);
-u32 x2apic_phys_pkg_id(u32 initial_apicid, int index_msb);
void x2apic_send_IPI_all(int vector);
void x2apic_send_IPI_allbutself(int vector);
@@ -63,9 +61,6 @@ void default_send_IPI_allbutself(int vector);
void default_send_IPI_all(int vector);
void default_send_IPI_self(int vector);
-bool default_apic_id_registered(void);
-bool default_check_apicid_used(physid_mask_t *map, u32 apicid);
-
#ifdef CONFIG_X86_32
void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector);
void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector);
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index c0f78059f06a..f75ee345c02d 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -18,11 +18,6 @@
#include "local.h"
-static u32 default_phys_pkg_id(u32 cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
static u32 default_get_apic_id(u32 x)
{
unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -43,17 +38,13 @@ static struct apic apic_default __ro_after_init = {
.name = "default",
.probe = probe_default,
- .apic_id_registered = default_apic_id_registered,
.dest_mode_logical = true,
.disable_esr = 0,
- .check_apicid_used = default_check_apicid_used,
.init_apic_ldr = default_init_apic_ldr,
- .ioapic_phys_id_map = default_ioapic_phys_id_map,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .phys_pkg_id = default_phys_pkg_id,
.max_apic_id = 0xFE,
.get_apic_id = default_get_apic_id,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 28a7d3f2312d..567dbd2fe4b6 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -231,16 +231,12 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
.disable_esr = 0,
- .check_apicid_used = NULL,
.init_apic_ldr = init_x2apic_ldr,
- .ioapic_phys_id_map = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .phys_pkg_id = x2apic_phys_pkg_id,
.max_apic_id = UINT_MAX,
.x2apic_set_max_apicid = true,
.get_apic_id = x2apic_get_apic_id,
- .set_apic_id = x2apic_set_apic_id,
.calc_dest_apicid = x2apic_calc_apicid,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 409815a40668..12d4c35547a6 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -129,16 +129,6 @@ u32 x2apic_get_apic_id(u32 id)
return id;
}
-u32 x2apic_set_apic_id(u32 id)
-{
- return id;
-}
-
-u32 x2apic_phys_pkg_id(u32 initial_apicid, int index_msb)
-{
- return initial_apicid >> index_msb;
-}
-
static struct apic apic_x2apic_phys __ro_after_init = {
.name = "physical x2apic",
@@ -150,12 +140,10 @@ static struct apic apic_x2apic_phys __ro_after_init = {
.disable_esr = 0,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .phys_pkg_id = x2apic_phys_pkg_id,
.max_apic_id = UINT_MAX,
.x2apic_set_max_apicid = true,
.get_apic_id = x2apic_get_apic_id,
- .set_apic_id = x2apic_set_apic_id,
.calc_dest_apicid = apic_default_calc_apicid,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f1766b18dcd0..7fef504ca508 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -241,54 +241,20 @@ static void __init uv_tsc_check_sync(void)
is_uv(UV3) ? sname.s3.field : \
undef)
-/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
-
-#define SMT_LEVEL 0 /* Leaf 0xb SMT level */
-#define INVALID_TYPE 0 /* Leaf 0xb sub-leaf types */
-#define SMT_TYPE 1
-#define CORE_TYPE 2
-#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
-#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
-
-static void set_x2apic_bits(void)
-{
- unsigned int eax, ebx, ecx, edx, sub_index;
- unsigned int sid_shift;
-
- cpuid(0, &eax, &ebx, &ecx, &edx);
- if (eax < 0xb) {
- pr_info("UV: CPU does not have CPUID.11\n");
- return;
- }
-
- cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
- if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) {
- pr_info("UV: CPUID.11 not implemented\n");
- return;
- }
-
- sid_shift = BITS_SHIFT_NEXT_LEVEL(eax);
- sub_index = 1;
- do {
- cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
- if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
- sid_shift = BITS_SHIFT_NEXT_LEVEL(eax);
- break;
- }
- sub_index++;
- } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
-
- uv_cpuid.apicid_shift = 0;
- uv_cpuid.apicid_mask = (~(-1 << sid_shift));
- uv_cpuid.socketid_shift = sid_shift;
-}
-
static void __init early_get_apic_socketid_shift(void)
{
+ unsigned int sid_shift = topology_get_domain_shift(TOPO_PKG_DOMAIN);
+
if (is_uv2_hub() || is_uv3_hub())
uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
- set_x2apic_bits();
+ if (sid_shift) {
+ uv_cpuid.apicid_shift = 0;
+ uv_cpuid.apicid_mask = (~(-1 << sid_shift));
+ uv_cpuid.socketid_shift = sid_shift;
+ } else {
+ pr_info("UV: CPU does not have valid CPUID.11\n");
+ }
pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", uv_cpuid.apicid_shift, uv_cpuid.apicid_mask);
pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", uv_cpuid.socketid_shift, uv_cpuid.pnode_mask);
@@ -779,21 +745,6 @@ static void uv_send_IPI_all(int vector)
uv_send_IPI_mask(cpu_online_mask, vector);
}
-static u32 set_apic_id(u32 id)
-{
- return id;
-}
-
-static unsigned int uv_read_apic_id(void)
-{
- return x2apic_get_apic_id(apic_read(APIC_ID));
-}
-
-static u32 uv_phys_pkg_id(u32 initial_apicid, int index_msb)
-{
- return uv_read_apic_id() >> index_msb;
-}
-
static int uv_probe(void)
{
return apic == &apic_x2apic_uv_x;
@@ -810,11 +761,9 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
.disable_esr = 0,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .phys_pkg_id = uv_phys_pkg_id,
.max_apic_id = UINT_MAX,
.get_apic_id = x2apic_get_apic_id,
- .set_apic_id = set_apic_id,
.calc_dest_apicid = apic_default_calc_apicid,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 76a5ced278c2..b37ab1095707 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1055,35 +1055,6 @@ static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
return APM_SUCCESS;
}
-#if 0
-static int apm_get_battery_status(u_short which, u_short *status,
- u_short *bat, u_short *life, u_short *nbat)
-{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 esi;
-
- if (apm_info.connection_version < 0x0102) {
- /* pretend we only have one battery. */
- if (which != 1)
- return APM_BAD_DEVICE;
- *nbat = 1;
- return apm_get_power_status(status, bat, life);
- }
-
- if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax,
- &ebx, &ecx, &edx, &esi))
- return (eax >> 8) & 0xff;
- *status = ebx;
- *bat = ecx;
- *life = edx;
- *nbat = esi;
- return APM_SUCCESS;
-}
-#endif
-
/**
* apm_engage_power_management - enable PM on a device
* @device: identity of device
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 6913b372ccf7..a98020bf31bb 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -109,7 +109,7 @@ static void __used common(void)
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
OFFSET(X86_current_task, pcpu_hot, current_task);
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
OFFSET(X86_call_depth, pcpu_hot, call_depth);
#endif
#if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64)
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 64ad2ddea121..30335182b6b0 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -24,6 +24,8 @@
static int __initdata_or_module debug_callthunks;
+#define MAX_PATCH_LEN (255-1)
+
#define prdbg(fmt, args...) \
do { \
if (debug_callthunks) \
@@ -42,8 +44,8 @@ DEFINE_PER_CPU(u64, __x86_call_count);
DEFINE_PER_CPU(u64, __x86_ret_count);
DEFINE_PER_CPU(u64, __x86_stuffs_count);
DEFINE_PER_CPU(u64, __x86_ctxsw_count);
-EXPORT_SYMBOL_GPL(__x86_ctxsw_count);
-EXPORT_SYMBOL_GPL(__x86_call_count);
+EXPORT_PER_CPU_SYMBOL_GPL(__x86_ctxsw_count);
+EXPORT_PER_CPU_SYMBOL_GPL(__x86_call_count);
#endif
extern s32 __call_sites[], __call_sites_end[];
@@ -179,10 +181,15 @@ static const u8 nops[] = {
static void *patch_dest(void *dest, bool direct)
{
unsigned int tsize = SKL_TMPL_SIZE;
+ u8 insn_buff[MAX_PATCH_LEN];
u8 *pad = dest - tsize;
+ memcpy(insn_buff, skl_call_thunk_template, tsize);
+ apply_relocation(insn_buff, tsize, pad,
+ skl_call_thunk_template, tsize);
+
/* Already patched? */
- if (!bcmp(pad, skl_call_thunk_template, tsize))
+ if (!bcmp(pad, insn_buff, tsize))
return pad;
/* Ensure there are nops */
@@ -192,9 +199,9 @@ static void *patch_dest(void *dest, bool direct)
}
if (direct)
- memcpy(pad, skl_call_thunk_template, tsize);
+ memcpy(pad, insn_buff, tsize);
else
- text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true);
+ text_poke_copy_locked(pad, insn_buff, tsize, true);
return pad;
}
@@ -290,20 +297,27 @@ void *callthunks_translate_call_dest(void *dest)
static bool is_callthunk(void *addr)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
- void *tmpl = skl_call_thunk_template;
+ u8 insn_buff[MAX_PATCH_LEN];
unsigned long dest;
+ u8 *pad;
dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
if (!thunks_initialized || skip_addr((void *)dest))
return false;
- return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
+ pad = (void *)(dest - tmpl_size);
+
+ memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
+ apply_relocation(insn_buff, tmpl_size, pad,
+ skl_call_thunk_template, tmpl_size);
+
+ return !bcmp(pad, insn_buff, tmpl_size);
}
int x86_call_depth_emit_accounting(u8 **pprog, void *func)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
- void *tmpl = skl_call_thunk_template;
+ u8 insn_buff[MAX_PATCH_LEN];
if (!thunks_initialized)
return 0;
@@ -312,7 +326,11 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
if (func && is_callthunk(func))
return 0;
- memcpy(*pprog, tmpl, tmpl_size);
+ memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
+ apply_relocation(insn_buff, tmpl_size, *pprog,
+ skl_call_thunk_template, tmpl_size);
+
+ memcpy(*pprog, insn_buff, tmpl_size);
*pprog += tmpl_size;
return tmpl_size;
}
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 93eabf544031..eb4dbcdf41f1 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -17,7 +17,8 @@ KMSAN_SANITIZE_common.o := n
# As above, instrumenting secondary CPU boot code causes boot hangs.
KCSAN_SANITIZE_common.o := n
-obj-y := cacheinfo.o scattered.o topology.o
+obj-y := cacheinfo.o scattered.o
+obj-y += topology_common.o topology_ext.o topology_amd.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
@@ -25,14 +26,16 @@ obj-y += bugs.o
obj-y += aperfmperf.o
obj-y += cpuid-deps.o
obj-y += umwait.o
+obj-y += capflags.o powerflags.o
-obj-$(CONFIG_PROC_FS) += proc.o
-obj-y += capflags.o powerflags.o
+obj-$(CONFIG_X86_LOCAL_APIC) += topology.o
-obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o
+obj-$(CONFIG_PROC_FS) += proc.o
+
+obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o
ifdef CONFIG_CPU_SUP_INTEL
-obj-y += intel.o intel_pconfig.o tsx.o
-obj-$(CONFIG_PM) += intel_epb.o
+obj-y += intel.o intel_pconfig.o tsx.o
+obj-$(CONFIG_PM) += intel_epb.o
endif
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o
diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c
index bfeb18fad63f..2c5b51aad91a 100644
--- a/arch/x86/kernel/cpu/acrn.c
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -26,8 +26,8 @@ static u32 __init acrn_detect(void)
static void __init acrn_init_platform(void)
{
- /* Setup the IDT for ACRN hypervisor callback */
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_acrn_hv_callback);
+ /* Install system interrupt handler for ACRN hypervisor callback */
+ sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback);
x86_platform.calibrate_tsc = acrn_get_tsc_khz;
x86_platform.calibrate_cpu = acrn_get_tsc_khz;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9f42d1c59e09..3282a747b645 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -20,6 +20,7 @@
#include <asm/delay.h>
#include <asm/debugreg.h>
#include <asm/resctrl.h>
+#include <asm/sev.h>
#ifdef CONFIG_X86_64
# include <asm/mmconfig.h>
@@ -27,13 +28,6 @@
#include "cpu.h"
-/*
- * nodes_per_socket: Stores the number of nodes per socket.
- * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
- * Node Identifiers[10:8]
- */
-static u32 nodes_per_socket = 1;
-
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
{
u32 gprs[8] = { 0 };
@@ -300,97 +294,6 @@ static int nearby_node(int apicid)
}
#endif
-/*
- * Fix up topo::core_id for pre-F17h systems to be in the
- * [0 .. cores_per_node - 1] range. Not really needed but
- * kept so as not to break existing setups.
- */
-static void legacy_fixup_core_id(struct cpuinfo_x86 *c)
-{
- u32 cus_per_node;
-
- if (c->x86 >= 0x17)
- return;
-
- cus_per_node = c->x86_max_cores / nodes_per_socket;
- c->topo.core_id %= cus_per_node;
-}
-
-/*
- * Fixup core topology information for
- * (1) AMD multi-node processors
- * Assumption: Number of cores in each internal node is the same.
- * (2) AMD processors supporting compute units
- */
-static void amd_get_topology(struct cpuinfo_x86 *c)
-{
- /* get information required for multi-node processors */
- if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
- int err;
- u32 eax, ebx, ecx, edx;
-
- cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-
- c->topo.die_id = ecx & 0xff;
-
- if (c->x86 == 0x15)
- c->topo.cu_id = ebx & 0xff;
-
- if (c->x86 >= 0x17) {
- c->topo.core_id = ebx & 0xff;
-
- if (smp_num_siblings > 1)
- c->x86_max_cores /= smp_num_siblings;
- }
-
- /*
- * In case leaf B is available, use it to derive
- * topology information.
- */
- err = detect_extended_topology(c);
- if (!err)
- c->x86_coreid_bits = get_count_order(c->x86_max_cores);
-
- cacheinfo_amd_init_llc_id(c);
-
- } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
- u64 value;
-
- rdmsrl(MSR_FAM10H_NODE_ID, value);
- c->topo.die_id = value & 7;
- c->topo.llc_id = c->topo.die_id;
- } else
- return;
-
- if (nodes_per_socket > 1) {
- set_cpu_cap(c, X86_FEATURE_AMD_DCM);
- legacy_fixup_core_id(c);
- }
-}
-
-/*
- * On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
- * Assumes number of cores is a power of two.
- */
-static void amd_detect_cmp(struct cpuinfo_x86 *c)
-{
- unsigned bits;
-
- bits = c->x86_coreid_bits;
- /* Low order bits define the core id (index of core in socket) */
- c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1);
- /* Convert the initial APIC ID into the socket ID */
- c->topo.pkg_id = c->topo.initial_apicid >> bits;
- /* use socket ID also for last level cache */
- c->topo.llc_id = c->topo.die_id = c->topo.pkg_id;
-}
-
-u32 amd_get_nodes_per_socket(void)
-{
- return nodes_per_socket;
-}
-EXPORT_SYMBOL_GPL(amd_get_nodes_per_socket);
-
static void srat_detect_node(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_NUMA
@@ -442,32 +345,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-static void early_init_amd_mc(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- unsigned bits, ecx;
-
- /* Multi core CPU? */
- if (c->extended_cpuid_level < 0x80000008)
- return;
-
- ecx = cpuid_ecx(0x80000008);
-
- c->x86_max_cores = (ecx & 0xff) + 1;
-
- /* CPU telling us the core id bits shift? */
- bits = (ecx >> 12) & 0xF;
-
- /* Otherwise recompute */
- if (bits == 0) {
- while ((1 << bits) < c->x86_max_cores)
- bits++;
- }
-
- c->x86_coreid_bits = bits;
-#endif
-}
-
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -500,18 +377,6 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_MWAITX))
use_mwaitx_delay();
- if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
- u32 ecx;
-
- ecx = cpuid_ecx(0x8000001e);
- __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1;
- } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
- u64 value;
-
- rdmsrl(MSR_FAM10H_NODE_ID, value);
- __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1;
- }
-
if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
!boot_cpu_has(X86_FEATURE_VIRT_SSBD) &&
c->x86 >= 0x15 && c->x86 <= 0x17) {
@@ -538,7 +403,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
/* Figure out Zen generations: */
switch (c->x86) {
- case 0x17: {
+ case 0x17:
switch (c->x86_model) {
case 0x00 ... 0x2f:
case 0x50 ... 0x5f:
@@ -554,8 +419,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
goto warn;
}
break;
- }
- case 0x19: {
+
+ case 0x19:
switch (c->x86_model) {
case 0x00 ... 0x0f:
case 0x20 ... 0x5f:
@@ -569,11 +434,39 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
goto warn;
}
break;
- }
+
+ case 0x1a:
+ switch (c->x86_model) {
+ case 0x00 ... 0x0f:
+ case 0x20 ... 0x2f:
+ case 0x40 ... 0x4f:
+ case 0x70 ... 0x7f:
+ setup_force_cpu_cap(X86_FEATURE_ZEN5);
+ break;
+ default:
+ goto warn;
+ }
+ break;
+
default:
break;
}
+ if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
+ /*
+ * RMP table entry format is not architectural and it can vary by processor
+ * and is defined by the per-processor PPR. Restrict SNP support on the
+ * known CPU model and family for which the RMP table entry format is
+ * currently defined for.
+ */
+ if (!boot_cpu_has(X86_FEATURE_ZEN3) &&
+ !boot_cpu_has(X86_FEATURE_ZEN4) &&
+ !boot_cpu_has(X86_FEATURE_ZEN5))
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ else if (!snp_probe_rmptable_info())
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ }
+
return;
warn:
@@ -592,8 +485,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
* SME feature (set in scattered.c).
* If the kernel has not enabled SME via any means then
* don't advertise the SME feature.
- * For SEV: If BIOS has not enabled SEV then don't advertise the
- * SEV and SEV_ES feature (set in scattered.c).
+ * For SEV: If BIOS has not enabled SEV then don't advertise SEV and
+ * any additional functionality based on it.
*
* In all cases, since support for SME and SEV requires long mode,
* don't advertise the feature under CONFIG_X86_32.
@@ -628,6 +521,7 @@ clear_all:
clear_sev:
setup_clear_cpu_cap(X86_FEATURE_SEV);
setup_clear_cpu_cap(X86_FEATURE_SEV_ES);
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
}
}
@@ -636,8 +530,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
u64 value;
u32 dummy;
- early_init_amd_mc(c);
-
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
@@ -717,9 +609,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
}
}
- if (cpu_has(c, X86_FEATURE_TOPOEXT))
- smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
-
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
@@ -928,7 +817,7 @@ static void fix_erratum_1386(struct cpuinfo_x86 *c)
void init_spectral_chicken(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_CPU_UNRET_ENTRY
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
u64 value;
/*
@@ -956,7 +845,6 @@ static void init_amd_zen_common(void)
static void init_amd_zen1(struct cpuinfo_x86 *c)
{
- init_amd_zen_common();
fix_erratum_1386(c);
/* Fix up CPUID bits, but only if not virtualised. */
@@ -1010,7 +898,6 @@ static void zen2_zenbleed_check(struct cpuinfo_x86 *c)
static void init_amd_zen2(struct cpuinfo_x86 *c)
{
- init_amd_zen_common();
init_spectral_chicken(c);
fix_erratum_1386(c);
zen2_zenbleed_check(c);
@@ -1018,8 +905,6 @@ static void init_amd_zen2(struct cpuinfo_x86 *c)
static void init_amd_zen3(struct cpuinfo_x86 *c)
{
- init_amd_zen_common();
-
if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
/*
* Zen3 (Fam19 model < 0x10) parts are not susceptible to
@@ -1033,12 +918,14 @@ static void init_amd_zen3(struct cpuinfo_x86 *c)
static void init_amd_zen4(struct cpuinfo_x86 *c)
{
- init_amd_zen_common();
-
if (!cpu_has(c, X86_FEATURE_HYPERVISOR))
msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
}
+static void init_amd_zen5(struct cpuinfo_x86 *c)
+{
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
u64 vm_cr;
@@ -1058,9 +945,6 @@ static void init_amd(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_FSRM))
set_cpu_cap(c, X86_FEATURE_FSRS);
- /* get apicid instead of initial apic id from cpuid */
- c->topo.apicid = read_apic_id();
-
/* K6s reports MCEs but don't actually have all the MSRs */
if (c->x86 < 6)
clear_cpu_cap(c, X86_FEATURE_MCE);
@@ -1076,6 +960,13 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x16: init_amd_jg(c); break;
}
+ /*
+ * Save up on some future enablement work and do common Zen
+ * settings.
+ */
+ if (c->x86 >= 0x17)
+ init_amd_zen_common();
+
if (boot_cpu_has(X86_FEATURE_ZEN1))
init_amd_zen1(c);
else if (boot_cpu_has(X86_FEATURE_ZEN2))
@@ -1084,6 +975,8 @@ static void init_amd(struct cpuinfo_x86 *c)
init_amd_zen3(c);
else if (boot_cpu_has(X86_FEATURE_ZEN4))
init_amd_zen4(c);
+ else if (boot_cpu_has(X86_FEATURE_ZEN5))
+ init_amd_zen5(c);
/*
* Enable workaround for FXSAVE leak on CPUs
@@ -1094,8 +987,6 @@ static void init_amd(struct cpuinfo_x86 *c)
cpu_detect_cache_sizes(c);
- amd_detect_cmp(c);
- amd_get_topology(c);
srat_detect_node(c);
init_amd_cacheinfo(c);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bb0ab8466b91..e7ba936d798b 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -56,7 +56,7 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
/* The current value of the SPEC_CTRL MSR with task-specific bits set */
DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
-EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
+EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
EXPORT_SYMBOL_GPL(x86_pred_cmd);
@@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-/* Control MDS CPU buffer clear before returning to user space */
-DEFINE_STATIC_KEY_FALSE(mds_user_clear);
-EXPORT_SYMBOL_GPL(mds_user_clear);
/* Control MDS CPU buffer clear before idling (halt, mwait) */
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear);
@@ -252,7 +249,7 @@ static void __init mds_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
mds_mitigation = MDS_MITIGATION_VMWERV;
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
(mds_nosmt || cpu_mitigations_auto_nosmt()))
@@ -356,7 +353,7 @@ static void __init taa_select_mitigation(void)
* For guests that can't determine whether the correct microcode is
* present on host, enable the mitigation for UCODE_NEEDED as well.
*/
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
@@ -424,7 +421,14 @@ static void __init mmio_select_mitigation(void)
*/
if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
boot_cpu_has(X86_FEATURE_RTM)))
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+
+ /*
+ * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based
+ * mitigations, disable KVM-only mitigation in that case.
+ */
+ if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
+ static_branch_disable(&mmio_stale_data_clear);
else
static_branch_enable(&mmio_stale_data_clear);
@@ -477,6 +481,57 @@ static int __init mmio_stale_data_parse_cmdline(char *str)
early_param("mmio_stale_data", mmio_stale_data_parse_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "Register File Data Sampling: " fmt
+
+enum rfds_mitigations {
+ RFDS_MITIGATION_OFF,
+ RFDS_MITIGATION_VERW,
+ RFDS_MITIGATION_UCODE_NEEDED,
+};
+
+/* Default mitigation for Register File Data Sampling */
+static enum rfds_mitigations rfds_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF;
+
+static const char * const rfds_strings[] = {
+ [RFDS_MITIGATION_OFF] = "Vulnerable",
+ [RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File",
+ [RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
+};
+
+static void __init rfds_select_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) {
+ rfds_mitigation = RFDS_MITIGATION_OFF;
+ return;
+ }
+ if (rfds_mitigation == RFDS_MITIGATION_OFF)
+ return;
+
+ if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ else
+ rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
+}
+
+static __init int rfds_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!boot_cpu_has_bug(X86_BUG_RFDS))
+ return 0;
+
+ if (!strcmp(str, "off"))
+ rfds_mitigation = RFDS_MITIGATION_OFF;
+ else if (!strcmp(str, "on"))
+ rfds_mitigation = RFDS_MITIGATION_VERW;
+
+ return 0;
+}
+early_param("reg_file_data_sampling", rfds_parse_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "" fmt
static void __init md_clear_update_mitigation(void)
@@ -484,12 +539,12 @@ static void __init md_clear_update_mitigation(void)
if (cpu_mitigations_off())
return;
- if (!static_key_enabled(&mds_user_clear))
+ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
goto out;
/*
- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
- * mitigation, if necessary.
+ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
+ * Stale Data mitigation, if necessary.
*/
if (mds_mitigation == MDS_MITIGATION_OFF &&
boot_cpu_has_bug(X86_BUG_MDS)) {
@@ -501,11 +556,19 @@ static void __init md_clear_update_mitigation(void)
taa_mitigation = TAA_MITIGATION_VERW;
taa_select_mitigation();
}
- if (mmio_mitigation == MMIO_MITIGATION_OFF &&
- boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
+ /*
+ * MMIO_MITIGATION_OFF is not checked here so that mmio_stale_data_clear
+ * gets updated correctly as per X86_FEATURE_CLEAR_CPU_BUF state.
+ */
+ if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
mmio_mitigation = MMIO_MITIGATION_VERW;
mmio_select_mitigation();
}
+ if (rfds_mitigation == RFDS_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_RFDS)) {
+ rfds_mitigation = RFDS_MITIGATION_VERW;
+ rfds_select_mitigation();
+ }
out:
if (boot_cpu_has_bug(X86_BUG_MDS))
pr_info("MDS: %s\n", mds_strings[mds_mitigation]);
@@ -515,6 +578,8 @@ out:
pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
pr_info("MMIO Stale Data: Unknown: No mitigations\n");
+ if (boot_cpu_has_bug(X86_BUG_RFDS))
+ pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]);
}
static void __init md_clear_select_mitigation(void)
@@ -522,11 +587,12 @@ static void __init md_clear_select_mitigation(void)
mds_select_mitigation();
taa_select_mitigation();
mmio_select_mitigation();
+ rfds_select_mitigation();
/*
- * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update
- * and print their mitigation after MDS, TAA and MMIO Stale Data
- * mitigation selection is done.
+ * As these mitigations are inter-related and rely on VERW instruction
+ * to clear the microarchitural buffers, update and print their status
+ * after mitigation selection is done for each of these vulnerabilities.
*/
md_clear_update_mitigation();
}
@@ -671,7 +737,7 @@ enum gds_mitigations {
GDS_MITIGATION_HYPERVISOR,
};
-#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION)
+#if IS_ENABLED(CONFIG_MITIGATION_GDS_FORCE)
static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE;
#else
static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL;
@@ -982,10 +1048,10 @@ static void __init retbleed_select_mitigation(void)
return;
case RETBLEED_CMD_UNRET:
- if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) {
+ if (IS_ENABLED(CONFIG_MITIGATION_UNRET_ENTRY)) {
retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
} else {
- pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n");
+ pr_err("WARNING: kernel not compiled with MITIGATION_UNRET_ENTRY.\n");
goto do_cmd_auto;
}
break;
@@ -994,24 +1060,24 @@ static void __init retbleed_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_IBPB)) {
pr_err("WARNING: CPU does not support IBPB.\n");
goto do_cmd_auto;
- } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+ } else if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
} else {
- pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+ pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
goto do_cmd_auto;
}
break;
case RETBLEED_CMD_STUFF:
- if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING) &&
+ if (IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) &&
spectre_v2_enabled == SPECTRE_V2_RETPOLINE) {
retbleed_mitigation = RETBLEED_MITIGATION_STUFF;
} else {
- if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING))
+ if (IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING))
pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n");
else
- pr_err("WARNING: kernel not compiled with CALL_DEPTH_TRACKING.\n");
+ pr_err("WARNING: kernel not compiled with MITIGATION_CALL_DEPTH_TRACKING.\n");
goto do_cmd_auto;
}
@@ -1021,9 +1087,10 @@ do_cmd_auto:
case RETBLEED_CMD_AUTO:
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
- if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
+ if (IS_ENABLED(CONFIG_MITIGATION_UNRET_ENTRY))
retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
- else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB))
+ else if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY) &&
+ boot_cpu_has(X86_FEATURE_IBPB))
retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
}
@@ -1102,7 +1169,7 @@ static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
SPECTRE_V2_USER_NONE;
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
static bool spectre_v2_bad_module;
bool retpoline_module_ok(bool has_retpoline)
@@ -1415,7 +1482,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC ||
cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
- !IS_ENABLED(CONFIG_RETPOLINE)) {
+ !IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)) {
pr_err("%s selected but not compiled in. Switching to AUTO select\n",
mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
@@ -1438,7 +1505,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
- if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) {
+ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_MITIGATION_IBRS_ENTRY)) {
pr_err("%s selected but not compiled in. Switching to AUTO select\n",
mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
@@ -1469,7 +1536,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
{
- if (!IS_ENABLED(CONFIG_RETPOLINE)) {
+ if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)) {
pr_err("Kernel not compiled with retpoline; no mitigation available!");
return SPECTRE_V2_NONE;
}
@@ -1564,7 +1631,7 @@ static void __init spectre_v2_select_mitigation(void)
break;
}
- if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
+ if (IS_ENABLED(CONFIG_MITIGATION_IBRS_ENTRY) &&
boot_cpu_has_bug(X86_BUG_RETBLEED) &&
retbleed_cmd != RETBLEED_CMD_OFF &&
retbleed_cmd != RETBLEED_CMD_STUFF &&
@@ -2457,7 +2524,7 @@ static void __init srso_select_mitigation(void)
break;
case SRSO_CMD_SAFE_RET:
- if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+ if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
/*
* Enable the return thunk for generated code
* like ftrace, static_call, etc.
@@ -2477,29 +2544,29 @@ static void __init srso_select_mitigation(void)
else
srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED;
} else {
- pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+ pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
}
break;
case SRSO_CMD_IBPB:
- if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+ if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
srso_mitigation = SRSO_MITIGATION_IBPB;
}
} else {
- pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+ pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
}
break;
case SRSO_CMD_IBPB_ON_VMEXIT:
- if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+ if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
}
} else {
- pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+ pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
}
break;
}
@@ -2615,6 +2682,11 @@ static ssize_t mmio_stale_data_show_state(char *buf)
sched_smt_active() ? "vulnerable" : "disabled");
}
+static ssize_t rfds_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]);
+}
+
static char *stibp_state(void)
{
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
@@ -2774,6 +2846,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_GDS:
return gds_show_state(buf);
+ case X86_BUG_RFDS:
+ return rfds_show_state(buf);
+
default:
break;
}
@@ -2848,4 +2923,14 @@ ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *bu
{
return cpu_show_common(dev, attr, buf, X86_BUG_GDS);
}
+
+ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_RFDS);
+}
#endif
+
+void __warn_thunk(void)
+{
+ WARN_ONCE(1, "Unpatched return thunk in use. This should not happen!\n");
+}
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index c131c412db89..392d09c936d6 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -301,7 +301,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
eax->split.type = types[leaf];
eax->split.level = levels[leaf];
eax->split.num_threads_sharing = 0;
- eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
+ eax->split.num_cores_on_die = topology_num_cores_per_package();
if (assoc == 0xffff)
@@ -595,7 +595,7 @@ static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
if (index < 3)
return;
- node = topology_die_id(smp_processor_id());
+ node = topology_amd_node_id(smp_processor_id());
this_leaf->nb = node_to_amd_nb(node);
if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
amd_calc_l3_indices(this_leaf->nb);
@@ -661,7 +661,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
return i;
}
-void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c)
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
{
/*
* We may have multiple LLCs if L3 caches exist, so check if we
@@ -672,7 +672,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c)
if (c->x86 < 0x17) {
/* LLC is at the node level. */
- c->topo.llc_id = c->topo.die_id;
+ c->topo.llc_id = die_id;
} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
/*
* LLC is at the core complex level.
@@ -1118,15 +1118,16 @@ static void cache_cpu_init(void)
unsigned long flags;
local_irq_save(flags);
- cache_disable();
- if (memory_caching_control & CACHE_MTRR)
+ if (memory_caching_control & CACHE_MTRR) {
+ cache_disable();
mtrr_generic_set_state();
+ cache_enable();
+ }
if (memory_caching_control & CACHE_PAT)
pat_cpu_init();
- cache_enable();
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 345f7d905db6..a3b55db35c96 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -128,10 +128,6 @@ static void init_centaur(struct cpuinfo_x86 *c)
#endif
early_init_centaur(c);
init_intel_cacheinfo(c);
- detect_num_cpu_cores(c);
-#ifdef CONFIG_X86_32
- detect_ht(c);
-#endif
if (c->cpuid_level > 9) {
unsigned int eax = cpuid_eax(10);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0b97bcde70c6..ba8cf5e9ce56 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -61,6 +61,7 @@
#include <asm/microcode.h>
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
+#include <asm/fred.h>
#include <asm/uv/uv.h>
#include <asm/ia32.h>
#include <asm/set_memory.h>
@@ -70,11 +71,26 @@
#include "cpu.h"
+DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
+EXPORT_PER_CPU_SYMBOL(cpu_info);
+
u32 elf_hwcap2 __read_mostly;
/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-EXPORT_SYMBOL(smp_num_siblings);
+unsigned int __max_threads_per_core __ro_after_init = 1;
+EXPORT_SYMBOL(__max_threads_per_core);
+
+unsigned int __max_dies_per_package __ro_after_init = 1;
+EXPORT_SYMBOL(__max_dies_per_package);
+
+unsigned int __max_logical_packages __ro_after_init = 1;
+EXPORT_SYMBOL(__max_logical_packages);
+
+unsigned int __num_cores_per_package __ro_after_init = 1;
+EXPORT_SYMBOL(__num_cores_per_package);
+
+unsigned int __num_threads_per_package __ro_after_init = 1;
+EXPORT_SYMBOL(__num_threads_per_package);
static struct ppin_info {
int feature;
@@ -382,9 +398,8 @@ out:
}
/* These bits should not change their value after CPU init is finished. */
-static const unsigned long cr4_pinned_mask =
- X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
- X86_CR4_FSGSBASE | X86_CR4_CET;
+static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
+ X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED;
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
@@ -790,19 +805,6 @@ static void get_model_name(struct cpuinfo_x86 *c)
*(s + 1) = '\0';
}
-void detect_num_cpu_cores(struct cpuinfo_x86 *c)
-{
- unsigned int eax, ebx, ecx, edx;
-
- c->x86_max_cores = 1;
- if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
- return;
-
- cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
- if (eax & 0x1f)
- c->x86_max_cores = (eax >> 26) + 1;
-}
-
void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -864,51 +866,6 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
}
-int detect_ht_early(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- u32 eax, ebx, ecx, edx;
-
- if (!cpu_has(c, X86_FEATURE_HT))
- return -1;
-
- if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
- return -1;
-
- if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
- return -1;
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
-
- smp_num_siblings = (ebx & 0xff0000) >> 16;
- if (smp_num_siblings == 1)
- pr_info_once("CPU0: Hyper-Threading is disabled\n");
-#endif
- return 0;
-}
-
-void detect_ht(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- int index_msb, core_bits;
-
- if (detect_ht_early(c) < 0)
- return;
-
- index_msb = get_count_order(smp_num_siblings);
- c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb);
-
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
-
- index_msb = get_count_order(smp_num_siblings);
-
- core_bits = get_count_order(c->x86_max_cores);
-
- c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb) &
- ((1 << core_bits) - 1);
-#endif
-}
-
static void get_cpu_vendor(struct cpuinfo_x86 *c)
{
char *v = c->x86_vendor_id;
@@ -1267,6 +1224,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define SRSO BIT(5)
/* CPU is affected by GDS */
#define GDS BIT(6)
+/* CPU is affected by Register File Data Sampling */
+#define RFDS BIT(7)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
@@ -1294,9 +1253,18 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS),
VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_GRACEMONT, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS),
VULNBL_AMD(0x15, RETBLEED),
VULNBL_AMD(0x16, RETBLEED),
@@ -1330,6 +1298,24 @@ static bool arch_cap_mmio_immune(u64 ia32_cap)
ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
}
+static bool __init vulnerable_to_rfds(u64 ia32_cap)
+{
+ /* The "immunity" bit trumps everything else: */
+ if (ia32_cap & ARCH_CAP_RFDS_NO)
+ return false;
+
+ /*
+ * VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to
+ * indicate that mitigation is needed because guest is running on a
+ * vulnerable hardware or may migrate to such hardware:
+ */
+ if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
+ return true;
+
+ /* Only consult the blacklist when there is no enumeration: */
+ return cpu_matches(cpu_vuln_blacklist, RFDS);
+}
+
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = x86_read_arch_cap_msr();
@@ -1355,8 +1341,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
/*
* AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature
* flag and protect from vendor-specific bugs via the whitelist.
+ *
+ * Don't use AutoIBRS when SNP is enabled because it degrades host
+ * userspace indirect branch performance.
*/
- if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) {
+ if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+ (cpu_has(c, X86_FEATURE_AUTOIBRS) &&
+ !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
!(ia32_cap & ARCH_CAP_PBRSB_NO))
@@ -1441,6 +1432,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
boot_cpu_has(X86_FEATURE_AVX))
setup_force_cpu_bug(X86_BUG_GDS);
+ if (vulnerable_to_rfds(ia32_cap))
+ setup_force_cpu_bug(X86_BUG_RFDS);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -1589,8 +1583,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
get_cpu_vendor(c);
get_cpu_cap(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
+ get_cpu_address_sizes(c);
cpu_parse_early_param();
+ cpu_init_topology(c);
+
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
@@ -1601,10 +1598,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
this_cpu->c_bsp_init(c);
} else {
setup_clear_cpu_cap(X86_FEATURE_CPUID);
+ get_cpu_address_sizes(c);
+ cpu_init_topology(c);
}
- get_cpu_address_sizes(c);
-
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
cpu_set_bug_bits(c);
@@ -1748,18 +1745,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
get_cpu_address_sizes(c);
- if (c->cpuid_level >= 0x00000001) {
- c->topo.initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
-#ifdef CONFIG_X86_32
-# ifdef CONFIG_SMP
- c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0);
-# else
- c->topo.apicid = c->topo.initial_apicid;
-# endif
-#endif
- c->topo.pkg_id = c->topo.initial_apicid;
- }
-
get_model_name(c); /* Default name */
/*
@@ -1781,29 +1766,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
}
/*
- * Validate that ACPI/mptables have the same information about the
- * effective APIC id and update the package map.
- */
-static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- unsigned int cpu = smp_processor_id();
- u32 apicid;
-
- apicid = apic->cpu_present_to_apicid(cpu);
-
- if (apicid != c->topo.apicid) {
- pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n",
- cpu, apicid, c->topo.initial_apicid);
- }
- BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu));
- BUG_ON(topology_update_die_map(c->topo.die_id, cpu));
-#else
- c->topo.logical_pkg_id = 0;
-#endif
-}
-
-/*
* This does the hard work of actually picking apart the CPU stuff...
*/
static void identify_cpu(struct cpuinfo_x86 *c)
@@ -1816,11 +1778,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
c->x86_model = c->x86_stepping = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
- c->x86_max_cores = 1;
- c->x86_coreid_bits = 0;
- c->topo.cu_id = 0xff;
- c->topo.llc_id = BAD_APICID;
- c->topo.l2c_id = BAD_APICID;
#ifdef CONFIG_X86_64
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
@@ -1839,17 +1796,14 @@ static void identify_cpu(struct cpuinfo_x86 *c)
generic_identify(c);
+ cpu_parse_topology(c);
+
if (this_cpu->c_identify)
this_cpu->c_identify(c);
/* Clear/Set all flags overridden by options, after probe */
apply_forced_caps(c);
-#ifdef CONFIG_X86_64
- c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0);
-#endif
-
-
/*
* Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and
* Hygon will clear it in ->c_init() below.
@@ -1903,10 +1857,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
c->x86, c->x86_model);
}
-#ifdef CONFIG_X86_64
- detect_ht(c);
-#endif
-
x86_init_rdrand(c);
setup_pku(c);
setup_cet(c);
@@ -1938,8 +1888,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
/* Init Machine Check Exception if available. */
mcheck_cpu_init(c);
- select_idle_routine(c);
-
#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
@@ -1998,7 +1946,6 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_32
enable_sep_cpu();
#endif
- validate_apic_and_package_id(c);
x86_spec_ctrl_setup_ap();
update_srbds_msr();
if (boot_cpu_has_bug(X86_BUG_GDS))
@@ -2050,6 +1997,7 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
.top_of_stack = TOP_OF_INIT_STACK,
};
EXPORT_PER_CPU_SYMBOL(pcpu_hot);
+EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
@@ -2067,10 +2015,8 @@ static void wrmsrl_cstar(unsigned long val)
wrmsrl(MSR_CSTAR, val);
}
-/* May not be marked __init: used by software suspend */
-void syscall_init(void)
+static inline void idt_syscall_init(void)
{
- wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
if (ia32_enabled()) {
@@ -2104,6 +2050,23 @@ void syscall_init(void)
X86_EFLAGS_AC|X86_EFLAGS_ID);
}
+/* May not be marked __init: used by software suspend */
+void syscall_init(void)
+{
+ /* The default user and kernel segments */
+ wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
+
+ /*
+ * Except the IA32_STAR MSR, there is NO need to setup SYSCALL and
+ * SYSENTER MSRs for FRED, because FRED uses the ring 3 FRED
+ * entrypoint for SYSCALL and SYSENTER, and ERETU is the only legit
+ * instruction to return to ring 3 (both sysexit and sysret cause
+ * #UD when FRED is enabled).
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ idt_syscall_init();
+}
+
#else /* CONFIG_X86_64 */
#ifdef CONFIG_STACKPROTECTOR
@@ -2207,8 +2170,9 @@ void cpu_init_exception_handling(void)
/* paranoid_entry() gets the CPU number from the GDT */
setup_getcpu(cpu);
- /* IST vectors need TSS to be set up. */
- tss_setup_ist(tss);
+ /* For IDT mode, IST vectors need to be set in TSS. */
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ tss_setup_ist(tss);
tss_setup_io_bitmap(tss);
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
@@ -2217,8 +2181,10 @@ void cpu_init_exception_handling(void)
/* GHCB needs to be setup to handle #VC. */
setup_ghcb();
- /* Finally load the IDT */
- load_current_idt();
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ cpu_init_fred_exceptions();
+ else
+ load_current_idt();
}
/*
@@ -2343,11 +2309,13 @@ void __init arch_cpu_finalize_init(void)
{
identify_boot_cpu();
+ select_idle_routine();
+
/*
* identify_boot_cpu() initialized SMT support information, let the
* core code know.
*/
- cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings);
+ cpu_smt_set_num_threads(__max_threads_per_core, __max_threads_per_core);
if (!IS_ENABLED(CONFIG_SMP)) {
pr_info("CPU: ");
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 885281ae79a5..ea9e07d57c8d 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -2,6 +2,11 @@
#ifndef ARCH_X86_CPU_H
#define ARCH_X86_CPU_H
+#include <asm/cpu.h>
+#include <asm/topology.h>
+
+#include "topology.h"
+
/* attempt to consolidate cpu attributes */
struct cpu_dev {
const char *c_vendor;
@@ -71,14 +76,9 @@ extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c);
-extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
-extern int detect_extended_topology_early(struct cpuinfo_x86 *c);
-extern int detect_extended_topology(struct cpuinfo_x86 *c);
-extern int detect_ht_early(struct cpuinfo_x86 *c);
-extern void detect_ht(struct cpuinfo_x86 *c);
extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
-void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c);
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id);
void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
@@ -96,4 +96,5 @@ static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
mode == SPECTRE_V2_EIBRS_RETPOLINE ||
mode == SPECTRE_V2_EIBRS_LFENCE;
}
+
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index e462c1d3800a..b7174209d855 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -82,6 +82,8 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
{ X86_FEATURE_SHSTK, X86_FEATURE_XSAVES },
+ { X86_FEATURE_FRED, X86_FEATURE_LKGS },
+ { X86_FEATURE_FRED, X86_FEATURE_WRMSRNS },
{}
};
diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c
index 0c179d684b3b..3baf3e435834 100644
--- a/arch/x86/kernel/cpu/debugfs.c
+++ b/arch/x86/kernel/cpu/debugfs.c
@@ -5,6 +5,8 @@
#include <asm/apic.h>
#include <asm/processor.h>
+#include "cpu.h"
+
static int cpu_debug_show(struct seq_file *m, void *p)
{
unsigned long cpu = (unsigned long)m->private;
@@ -24,9 +26,12 @@ static int cpu_debug_show(struct seq_file *m, void *p)
seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id);
seq_printf(m, "llc_id: %u\n", c->topo.llc_id);
seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id);
- seq_printf(m, "max_cores: %u\n", c->x86_max_cores);
- seq_printf(m, "max_die_per_pkg: %u\n", __max_die_per_package);
- seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings);
+ seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id);
+ seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg());
+ seq_printf(m, "num_threads: %u\n", __num_threads_per_package);
+ seq_printf(m, "num_cores: %u\n", __num_cores_per_package);
+ seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package);
+ seq_printf(m, "max_threads_per_core:%u\n", __max_threads_per_core);
return 0;
}
@@ -42,12 +47,48 @@ static const struct file_operations dfs_cpu_ops = {
.release = single_release,
};
+static int dom_debug_show(struct seq_file *m, void *p)
+{
+ static const char *domain_names[TOPO_MAX_DOMAIN] = {
+ [TOPO_SMT_DOMAIN] = "Thread",
+ [TOPO_CORE_DOMAIN] = "Core",
+ [TOPO_MODULE_DOMAIN] = "Module",
+ [TOPO_TILE_DOMAIN] = "Tile",
+ [TOPO_DIE_DOMAIN] = "Die",
+ [TOPO_DIEGRP_DOMAIN] = "DieGrp",
+ [TOPO_PKG_DOMAIN] = "Package",
+ };
+ unsigned int dom, nthreads = 1;
+
+ for (dom = 0; dom < TOPO_MAX_DOMAIN; dom++) {
+ nthreads *= x86_topo_system.dom_size[dom];
+ seq_printf(m, "domain: %-10s shift: %u dom_size: %5u max_threads: %5u\n",
+ domain_names[dom], x86_topo_system.dom_shifts[dom],
+ x86_topo_system.dom_size[dom], nthreads);
+ }
+ return 0;
+}
+
+static int dom_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, dom_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_dom_ops = {
+ .open = dom_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static __init int cpu_init_debugfs(void)
{
struct dentry *dir, *base = debugfs_create_dir("topo", arch_debugfs_dir);
unsigned long id;
char name[24];
+ debugfs_create_file("domains", 0444, base, NULL, &dfs_dom_ops);
+
dir = debugfs_create_dir("cpus", base);
for_each_possible_cpu(id) {
sprintf(name, "%lu", id);
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index f0cd95502faa..c5191b06f9f2 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -18,14 +18,6 @@
#include "cpu.h"
-#define APICID_SOCKET_ID_BIT 6
-
-/*
- * nodes_per_socket: Stores the number of nodes per socket.
- * Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8]
- */
-static u32 nodes_per_socket = 1;
-
#ifdef CONFIG_NUMA
/*
* To workaround broken NUMA config. Read the comment in
@@ -49,80 +41,6 @@ static int nearby_node(int apicid)
}
#endif
-static void hygon_get_topology_early(struct cpuinfo_x86 *c)
-{
- if (cpu_has(c, X86_FEATURE_TOPOEXT))
- smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
-}
-
-/*
- * Fixup core topology information for
- * (1) Hygon multi-node processors
- * Assumption: Number of cores in each internal node is the same.
- * (2) Hygon processors supporting compute units
- */
-static void hygon_get_topology(struct cpuinfo_x86 *c)
-{
- /* get information required for multi-node processors */
- if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
- int err;
- u32 eax, ebx, ecx, edx;
-
- cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-
- c->topo.die_id = ecx & 0xff;
-
- c->topo.core_id = ebx & 0xff;
-
- if (smp_num_siblings > 1)
- c->x86_max_cores /= smp_num_siblings;
-
- /*
- * In case leaf B is available, use it to derive
- * topology information.
- */
- err = detect_extended_topology(c);
- if (!err)
- c->x86_coreid_bits = get_count_order(c->x86_max_cores);
-
- /*
- * Socket ID is ApicId[6] for the processors with model <= 0x3
- * when running on host.
- */
- if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3)
- c->topo.pkg_id = c->topo.apicid >> APICID_SOCKET_ID_BIT;
-
- cacheinfo_hygon_init_llc_id(c);
- } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
- u64 value;
-
- rdmsrl(MSR_FAM10H_NODE_ID, value);
- c->topo.die_id = value & 7;
- c->topo.llc_id = c->topo.die_id;
- } else
- return;
-
- if (nodes_per_socket > 1)
- set_cpu_cap(c, X86_FEATURE_AMD_DCM);
-}
-
-/*
- * On Hygon setup the lower bits of the APIC id distinguish the cores.
- * Assumes number of cores is a power of two.
- */
-static void hygon_detect_cmp(struct cpuinfo_x86 *c)
-{
- unsigned int bits;
-
- bits = c->x86_coreid_bits;
- /* Low order bits define the core id (index of core in socket) */
- c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1);
- /* Convert the initial APIC ID into the socket ID */
- c->topo.pkg_id = c->topo.initial_apicid >> bits;
- /* Use package ID also for last level cache */
- c->topo.llc_id = c->topo.die_id = c->topo.pkg_id;
-}
-
static void srat_detect_node(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_NUMA
@@ -173,32 +91,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-static void early_init_hygon_mc(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- unsigned int bits, ecx;
-
- /* Multi core CPU? */
- if (c->extended_cpuid_level < 0x80000008)
- return;
-
- ecx = cpuid_ecx(0x80000008);
-
- c->x86_max_cores = (ecx & 0xff) + 1;
-
- /* CPU telling us the core id bits shift? */
- bits = (ecx >> 12) & 0xF;
-
- /* Otherwise recompute */
- if (bits == 0) {
- while ((1 << bits) < c->x86_max_cores)
- bits++;
- }
-
- c->x86_coreid_bits = bits;
-#endif
-}
-
static void bsp_init_hygon(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -212,18 +104,6 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_MWAITX))
use_mwaitx_delay();
- if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
- u32 ecx;
-
- ecx = cpuid_ecx(0x8000001e);
- __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1;
- } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
- u64 value;
-
- rdmsrl(MSR_FAM10H_NODE_ID, value);
- __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1;
- }
-
if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
!boot_cpu_has(X86_FEATURE_VIRT_SSBD)) {
/*
@@ -242,8 +122,6 @@ static void early_init_hygon(struct cpuinfo_x86 *c)
{
u32 dummy;
- early_init_hygon_mc(c);
-
set_cpu_cap(c, X86_FEATURE_K8);
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
@@ -284,8 +162,6 @@ static void early_init_hygon(struct cpuinfo_x86 *c)
* we can set it unconditionally.
*/
set_cpu_cap(c, X86_FEATURE_VMMCALL);
-
- hygon_get_topology_early(c);
}
static void init_hygon(struct cpuinfo_x86 *c)
@@ -302,9 +178,6 @@ static void init_hygon(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
- /* get apicid instead of initial apic id from cpuid */
- c->topo.apicid = read_apic_id();
-
/*
* XXX someone from Hygon needs to confirm this DTRT
*
@@ -316,8 +189,6 @@ static void init_hygon(struct cpuinfo_x86 *c)
cpu_detect_cache_sizes(c);
- hygon_detect_cmp(c);
- hygon_get_topology(c);
srat_detect_node(c);
init_hygon_cacheinfo(c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index a927a8fc9624..be30d7fa2e66 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -184,6 +184,90 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
return false;
}
+#define MSR_IA32_TME_ACTIVATE 0x982
+
+/* Helpers to access TME_ACTIVATE MSR */
+#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
+#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
+
+#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
+#define TME_ACTIVATE_POLICY_AES_XTS_128 0
+
+#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
+
+#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
+#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
+
+/* Values for mktme_status (SW only construct) */
+#define MKTME_ENABLED 0
+#define MKTME_DISABLED 1
+#define MKTME_UNINITIALIZED 2
+static int mktme_status = MKTME_UNINITIALIZED;
+
+static void detect_tme_early(struct cpuinfo_x86 *c)
+{
+ u64 tme_activate, tme_policy, tme_crypto_algs;
+ int keyid_bits = 0, nr_keyids = 0;
+ static u64 tme_activate_cpu0 = 0;
+
+ rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+
+ if (mktme_status != MKTME_UNINITIALIZED) {
+ if (tme_activate != tme_activate_cpu0) {
+ /* Broken BIOS? */
+ pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
+ pr_err_once("x86/tme: MKTME is not usable\n");
+ mktme_status = MKTME_DISABLED;
+
+ /* Proceed. We may need to exclude bits from x86_phys_bits. */
+ }
+ } else {
+ tme_activate_cpu0 = tme_activate;
+ }
+
+ if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
+ pr_info_once("x86/tme: not enabled by BIOS\n");
+ mktme_status = MKTME_DISABLED;
+ return;
+ }
+
+ if (mktme_status != MKTME_UNINITIALIZED)
+ goto detect_keyid_bits;
+
+ pr_info("x86/tme: enabled by BIOS\n");
+
+ tme_policy = TME_ACTIVATE_POLICY(tme_activate);
+ if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
+ pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
+
+ tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
+ if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
+ pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
+ tme_crypto_algs);
+ mktme_status = MKTME_DISABLED;
+ }
+detect_keyid_bits:
+ keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
+ nr_keyids = (1UL << keyid_bits) - 1;
+ if (nr_keyids) {
+ pr_info_once("x86/mktme: enabled by BIOS\n");
+ pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
+ } else {
+ pr_info_once("x86/mktme: disabled by BIOS\n");
+ }
+
+ if (mktme_status == MKTME_UNINITIALIZED) {
+ /* MKTME is usable */
+ mktme_status = MKTME_ENABLED;
+ }
+
+ /*
+ * KeyID bits effectively lower the number of physical address
+ * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
+ */
+ c->x86_phys_bits -= keyid_bits;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -317,11 +401,11 @@ static void early_init_intel(struct cpuinfo_x86 *c)
check_memory_type_self_snoop_errata(c);
/*
- * Get the number of SMT siblings early from the extended topology
- * leaf, if available. Otherwise try the legacy SMT detection.
+ * Adjust the number of physical bits early because it affects the
+ * valid bits of the MTRR mask registers.
*/
- if (detect_extended_topology_early(c) < 0)
- detect_ht_early(c);
+ if (cpu_has(c, X86_FEATURE_TME))
+ detect_tme_early(c);
}
static void bsp_init_intel(struct cpuinfo_x86 *c)
@@ -482,90 +566,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-#define MSR_IA32_TME_ACTIVATE 0x982
-
-/* Helpers to access TME_ACTIVATE MSR */
-#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
-#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
-
-#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
-#define TME_ACTIVATE_POLICY_AES_XTS_128 0
-
-#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
-
-#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
-#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
-
-/* Values for mktme_status (SW only construct) */
-#define MKTME_ENABLED 0
-#define MKTME_DISABLED 1
-#define MKTME_UNINITIALIZED 2
-static int mktme_status = MKTME_UNINITIALIZED;
-
-static void detect_tme(struct cpuinfo_x86 *c)
-{
- u64 tme_activate, tme_policy, tme_crypto_algs;
- int keyid_bits = 0, nr_keyids = 0;
- static u64 tme_activate_cpu0 = 0;
-
- rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
-
- if (mktme_status != MKTME_UNINITIALIZED) {
- if (tme_activate != tme_activate_cpu0) {
- /* Broken BIOS? */
- pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
- pr_err_once("x86/tme: MKTME is not usable\n");
- mktme_status = MKTME_DISABLED;
-
- /* Proceed. We may need to exclude bits from x86_phys_bits. */
- }
- } else {
- tme_activate_cpu0 = tme_activate;
- }
-
- if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
- pr_info_once("x86/tme: not enabled by BIOS\n");
- mktme_status = MKTME_DISABLED;
- return;
- }
-
- if (mktme_status != MKTME_UNINITIALIZED)
- goto detect_keyid_bits;
-
- pr_info("x86/tme: enabled by BIOS\n");
-
- tme_policy = TME_ACTIVATE_POLICY(tme_activate);
- if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
- pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
-
- tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
- if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
- pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
- tme_crypto_algs);
- mktme_status = MKTME_DISABLED;
- }
-detect_keyid_bits:
- keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
- nr_keyids = (1UL << keyid_bits) - 1;
- if (nr_keyids) {
- pr_info_once("x86/mktme: enabled by BIOS\n");
- pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
- } else {
- pr_info_once("x86/mktme: disabled by BIOS\n");
- }
-
- if (mktme_status == MKTME_UNINITIALIZED) {
- /* MKTME is usable */
- mktme_status = MKTME_ENABLED;
- }
-
- /*
- * KeyID bits effectively lower the number of physical address
- * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
- */
- c->x86_phys_bits -= keyid_bits;
-}
-
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
@@ -603,24 +603,6 @@ static void init_intel(struct cpuinfo_x86 *c)
intel_workarounds(c);
- /*
- * Detect the extended topology information if available. This
- * will reinitialise the initial_apicid which will be used
- * in init_intel_cacheinfo()
- */
- detect_extended_topology(c);
-
- if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
- /*
- * let's use the legacy cpuid vector 0x1 and 0x4 for topology
- * detection.
- */
- detect_num_cpu_cores(c);
-#ifdef CONFIG_X86_32
- detect_ht(c);
-#endif
- }
-
init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
@@ -702,9 +684,6 @@ static void init_intel(struct cpuinfo_x86 *c)
init_ia32_feat_ctl(c);
- if (cpu_has(c, X86_FEATURE_TME))
- detect_tme(c);
-
init_intel_misc_features(c);
split_lock_init();
diff --git a/arch/x86/kernel/cpu/intel_pconfig.c b/arch/x86/kernel/cpu/intel_pconfig.c
index 0771a905b286..5be2b1790282 100644
--- a/arch/x86/kernel/cpu/intel_pconfig.c
+++ b/arch/x86/kernel/cpu/intel_pconfig.c
@@ -7,6 +7,8 @@
* Author:
* Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
*/
+#include <linux/bug.h>
+#include <linux/limits.h>
#include <asm/cpufeature.h>
#include <asm/intel_pconfig.h>
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 2b46eb0fdf3a..9a0133ef7e20 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -1231,7 +1231,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
return -ENODEV;
if (is_shared_bank(bank)) {
- nb = node_to_amd_nb(topology_die_id(cpu));
+ nb = node_to_amd_nb(topology_amd_node_id(cpu));
/* threshold descriptor already initialized on this node? */
if (nb && nb->bank4) {
@@ -1335,7 +1335,7 @@ static void threshold_remove_bank(struct threshold_bank *bank)
* The last CPU on this node using the shared bank is going
* away, remove that bank now.
*/
- nb = node_to_amd_nb(topology_die_id(smp_processor_id()));
+ nb = node_to_amd_nb(topology_amd_node_id(smp_processor_id()));
nb->bank4 = NULL;
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index bc39252bc54f..b5cc557cfc37 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -46,6 +46,7 @@
#include <linux/hardirq.h>
#include <linux/kexec.h>
+#include <asm/fred.h>
#include <asm/intel-family.h>
#include <asm/processor.h>
#include <asm/traps.h>
@@ -2166,6 +2167,31 @@ DEFINE_IDTENTRY_MCE_USER(exc_machine_check)
exc_machine_check_user(regs);
local_db_restore(dr7);
}
+
+#ifdef CONFIG_X86_FRED
+/*
+ * When occurred on different ring level, i.e., from user or kernel
+ * context, #MCE needs to be handled on different stack: User #MCE
+ * on current task stack, while kernel #MCE on a dedicated stack.
+ *
+ * This is exactly how FRED event delivery invokes an exception
+ * handler: ring 3 event on level 0 stack, i.e., current task stack;
+ * ring 0 event on the #MCE dedicated stack specified in the
+ * IA32_FRED_STKLVLS MSR. So unlike IDT, the FRED machine check entry
+ * stub doesn't do stack switch.
+ */
+DEFINE_FREDENTRY_MCE(exc_machine_check)
+{
+ unsigned long dr7;
+
+ dr7 = local_db_save();
+ if (user_mode(regs))
+ exc_machine_check_user(regs);
+ else
+ exc_machine_check_kernel(regs);
+ local_db_restore(dr7);
+}
+#endif
#else
/* 32bit unified entry point */
DEFINE_IDTENTRY_RAW(exc_machine_check)
@@ -2431,7 +2457,7 @@ static void mce_enable_ce(void *all)
__mcheck_cpu_init_timer();
}
-static struct bus_type mce_subsys = {
+static const struct bus_type mce_subsys = {
.name = "machinecheck",
.dev_name = "machinecheck",
};
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 72f0695c3dc1..94953d749475 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -430,11 +430,9 @@ static void trigger_thr_int(void *info)
static u32 get_nbc_for_node(int node_id)
{
- struct cpuinfo_x86 *c = &boot_cpu_data;
u32 cores_per_node;
- cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
-
+ cores_per_node = topology_num_threads_per_package() / topology_amd_nodes_per_pkg();
return cores_per_node * node_id;
}
@@ -543,8 +541,8 @@ static void do_inject(void)
if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
b == 4 &&
boot_cpu_data.x86 < 0x17) {
- toggle_nb_mca_mst_cpu(topology_die_id(cpu));
- cpu = get_nbc_for_node(topology_die_id(cpu));
+ toggle_nb_mca_mst_cpu(topology_amd_node_id(cpu));
+ cpu = get_nbc_for_node(topology_amd_node_id(cpu));
}
cpus_read_lock();
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 857e608af641..5f0414452b67 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -641,7 +641,7 @@ static __init void calc_llc_size_per_core(struct cpuinfo_x86 *c)
{
u64 llc_size = c->x86_cache_size * 1024ULL;
- do_div(llc_size, c->x86_max_cores);
+ do_div(llc_size, topology_num_cores_per_package());
llc_size_per_core = (unsigned int)llc_size;
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 01fa06dd06b6..45e0e70e238c 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -539,19 +539,18 @@ static void __init ms_hyperv_init_platform(void)
*/
x86_platform.apic_post_init = hyperv_init;
hyperv_setup_mmu_ops();
- /* Setup the IDT for hypervisor callback */
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_hyperv_callback);
- /* Setup the IDT for reenlightenment notifications */
+ /* Install system interrupt handler for hypervisor callback */
+ sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
+
+ /* Install system interrupt handler for reenlightenment notifications */
if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) {
- alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
- asm_sysvec_hyperv_reenlightenment);
+ sysvec_install(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
}
- /* Setup the IDT for stimer0 */
+ /* Install system interrupt handler for stimer0 */
if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) {
- alloc_intr_gate(HYPERV_STIMER0_VECTOR,
- asm_sysvec_hyperv_stimer0);
+ sysvec_install(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
}
# ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index d3524778a545..422a4ddc2ab7 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -108,6 +108,9 @@ static inline void k8_check_syscfg_dram_mod_en(void)
(boot_cpu_data.x86 >= 0x0f)))
return;
+ if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return;
+
rdmsr(MSR_AMD64_SYSCFG, lo, hi);
if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 26a427fa84ea..eeac00d20926 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -6,6 +6,7 @@
* Authors: Fenghua Yu <fenghua.yu@intel.com>,
* H. Peter Anvin <hpa@linux.intel.com>
*/
+#include <linux/printk.h>
#include <asm/processor.h>
#include <asm/archrandom.h>
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 19e0681f0435..83e40341583e 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -16,6 +16,7 @@
#define pr_fmt(fmt) "resctrl: " fmt
+#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/cacheinfo.h>
@@ -25,8 +26,15 @@
#include <asm/resctrl.h>
#include "internal.h"
-/* Mutex to protect rdtgroup access. */
-DEFINE_MUTEX(rdtgroup_mutex);
+/*
+ * rdt_domain structures are kfree()d when their last CPU goes offline,
+ * and allocated when the first CPU in a new domain comes online.
+ * The rdt_resource's domain list is updated when this happens. Readers of
+ * the domain list must either take cpus_read_lock(), or rely on an RCU
+ * read-side critical section, to avoid observing concurrent modification.
+ * All writers take this mutex:
+ */
+static DEFINE_MUTEX(domain_list_lock);
/*
* The cached resctrl_pqr_state is strictly per CPU and can never be
@@ -136,15 +144,15 @@ static inline void cache_alloc_hsw_probe(void)
{
struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3];
struct rdt_resource *r = &hw_res->r_resctrl;
- u32 l, h, max_cbm = BIT_MASK(20) - 1;
+ u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0;
- if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0))
+ if (wrmsrl_safe(MSR_IA32_L3_CBM_BASE, max_cbm))
return;
- rdmsr(MSR_IA32_L3_CBM_BASE, l, h);
+ rdmsrl(MSR_IA32_L3_CBM_BASE, l3_cbm_0);
/* If all the bits were set in MSR, return success */
- if (l != max_cbm)
+ if (l3_cbm_0 != max_cbm)
return;
hw_res->num_closid = 4;
@@ -231,9 +239,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- union cpuid_0x10_3_eax eax;
- union cpuid_0x10_x_edx edx;
- u32 ebx, ecx, subleaf;
+ u32 eax, ebx, ecx, edx, subleaf;
/*
* Query CPUID_Fn80000020_EDX_x01 for MBA and
@@ -241,9 +247,9 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
*/
subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1;
- cpuid_count(0x80000020, subleaf, &eax.full, &ebx, &ecx, &edx.full);
- hw_res->num_closid = edx.split.cos_max + 1;
- r->default_ctrl = MAX_MBA_BW_AMD;
+ cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx);
+ hw_res->num_closid = edx + 1;
+ r->default_ctrl = 1 << eax;
/* AMD does not use delay */
r->membw.delay_linear = false;
@@ -512,6 +518,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
struct rdt_domain *d;
int err;
+ lockdep_assert_held(&domain_list_lock);
+
d = rdt_find_domain(r, id, &add_pos);
if (IS_ERR(d)) {
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
@@ -545,11 +553,12 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;
}
- list_add_tail(&d->list, add_pos);
+ list_add_tail_rcu(&d->list, add_pos);
err = resctrl_online_domain(r, d);
if (err) {
- list_del(&d->list);
+ list_del_rcu(&d->list);
+ synchronize_rcu();
domain_free(hw_dom);
}
}
@@ -560,6 +569,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;
+ lockdep_assert_held(&domain_list_lock);
+
d = rdt_find_domain(r, id, NULL);
if (IS_ERR_OR_NULL(d)) {
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
@@ -570,7 +581,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cpumask_clear_cpu(cpu, &d->cpu_mask);
if (cpumask_empty(&d->cpu_mask)) {
resctrl_offline_domain(r, d);
- list_del(&d->list);
+ list_del_rcu(&d->list);
+ synchronize_rcu();
/*
* rdt_domain "d" is going to be freed below, so clear
@@ -582,73 +594,47 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
return;
}
-
- if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
- if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
- cancel_delayed_work(&d->mbm_over);
- mbm_setup_overflow_handler(d, 0);
- }
- if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
- has_busy_rmid(r, d)) {
- cancel_delayed_work(&d->cqm_limbo);
- cqm_setup_limbo_handler(d, 0);
- }
- }
}
static void clear_closid_rmid(int cpu)
{
struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
- state->default_closid = 0;
- state->default_rmid = 0;
- state->cur_closid = 0;
- state->cur_rmid = 0;
- wrmsr(MSR_IA32_PQR_ASSOC, 0, 0);
+ state->default_closid = RESCTRL_RESERVED_CLOSID;
+ state->default_rmid = RESCTRL_RESERVED_RMID;
+ state->cur_closid = RESCTRL_RESERVED_CLOSID;
+ state->cur_rmid = RESCTRL_RESERVED_RMID;
+ wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID,
+ RESCTRL_RESERVED_CLOSID);
}
-static int resctrl_online_cpu(unsigned int cpu)
+static int resctrl_arch_online_cpu(unsigned int cpu)
{
struct rdt_resource *r;
- mutex_lock(&rdtgroup_mutex);
+ mutex_lock(&domain_list_lock);
for_each_capable_rdt_resource(r)
domain_add_cpu(cpu, r);
- /* The cpu is set in default rdtgroup after online. */
- cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
+ mutex_unlock(&domain_list_lock);
+
clear_closid_rmid(cpu);
- mutex_unlock(&rdtgroup_mutex);
+ resctrl_online_cpu(cpu);
return 0;
}
-static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
-{
- struct rdtgroup *cr;
-
- list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
- if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) {
- break;
- }
- }
-}
-
-static int resctrl_offline_cpu(unsigned int cpu)
+static int resctrl_arch_offline_cpu(unsigned int cpu)
{
- struct rdtgroup *rdtgrp;
struct rdt_resource *r;
- mutex_lock(&rdtgroup_mutex);
+ resctrl_offline_cpu(cpu);
+
+ mutex_lock(&domain_list_lock);
for_each_capable_rdt_resource(r)
domain_remove_cpu(cpu, r);
- list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
- if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
- clear_childcpus(rdtgrp, cpu);
- break;
- }
- }
+ mutex_unlock(&domain_list_lock);
+
clear_closid_rmid(cpu);
- mutex_unlock(&rdtgroup_mutex);
return 0;
}
@@ -968,7 +954,8 @@ static int __init resctrl_late_init(void)
state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
"x86/resctrl/cat:online:",
- resctrl_online_cpu, resctrl_offline_cpu);
+ resctrl_arch_online_cpu,
+ resctrl_arch_offline_cpu);
if (state < 0)
return state;
@@ -992,8 +979,14 @@ late_initcall(resctrl_late_init);
static void __exit resctrl_exit(void)
{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
cpuhp_remove_state(rdt_online);
+
rdtgroup_exit();
+
+ if (r->mon_capable)
+ rdt_put_mon_l3_config();
}
__exitcall(resctrl_exit);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index beccb0e87ba7..7997b47743a2 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -19,6 +19,8 @@
#include <linux/kernfs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/tick.h>
+
#include "internal.h"
/*
@@ -210,6 +212,9 @@ static int parse_line(char *line, struct resctrl_schema *s,
struct rdt_domain *d;
unsigned long dom_id;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
(r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
@@ -314,6 +319,9 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
struct rdt_domain *d;
u32 idx;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
@@ -379,11 +387,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
return -EINVAL;
buf[nbytes - 1] = '\0';
- cpus_read_lock();
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
rdtgroup_kn_unlock(of->kn);
- cpus_read_unlock();
return -ENOENT;
}
rdt_last_cmd_clear();
@@ -445,7 +451,6 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
out:
rdt_staged_configs_clear();
rdtgroup_kn_unlock(of->kn);
- cpus_read_unlock();
return ret ?: nbytes;
}
@@ -465,6 +470,9 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
bool sep = false;
u32 ctrl_val;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
seq_printf(s, "%*s:", max_name_width, schema->name);
list_for_each_entry(dom, &r->domains, list) {
if (sep)
@@ -522,12 +530,24 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
return ret;
}
+static int smp_mon_event_count(void *arg)
+{
+ mon_event_count(arg);
+
+ return 0;
+}
+
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_domain *d, struct rdtgroup *rdtgrp,
int evtid, int first)
{
+ int cpu;
+
+ /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
/*
- * setup the parameters to send to the IPI to read the data.
+ * Setup the parameters to pass to mon_event_count() to read the data.
*/
rr->rgrp = rdtgrp;
rr->evtid = evtid;
@@ -535,8 +555,26 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
rr->d = d;
rr->val = 0;
rr->first = first;
+ rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
+ if (IS_ERR(rr->arch_mon_ctx)) {
+ rr->err = -EINVAL;
+ return;
+ }
+
+ cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU);
+
+ /*
+ * cpumask_any_housekeeping() prefers housekeeping CPUs, but
+ * are all the CPUs nohz_full? If yes, pick a CPU to IPI.
+ * MPAM's resctrl_arch_rmid_read() is unable to read the
+ * counters on some platforms if its called in IRQ context.
+ */
+ if (tick_nohz_full_cpu(cpu))
+ smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
+ else
+ smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
- smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
+ resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx);
}
int rdtgroup_mondata_show(struct seq_file *m, void *arg)
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index a4f1aa15f0a2..c99f26ebe7a6 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -7,6 +7,9 @@
#include <linux/kernfs.h>
#include <linux/fs_context.h>
#include <linux/jump_label.h>
+#include <linux/tick.h>
+
+#include <asm/resctrl.h>
#define L3_QOS_CDP_ENABLE 0x01ULL
@@ -18,7 +21,6 @@
#define MBM_OVERFLOW_INTERVAL 1000
#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
-#define MAX_MBA_BW_AMD 0x800
#define MBM_CNTR_WIDTH_OFFSET_AMD 20
#define RMID_VAL_ERROR BIT_ULL(63)
@@ -54,6 +56,46 @@
/* Max event bits supported */
#define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
+/**
+ * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
+ * aren't marked nohz_full
+ * @mask: The mask to pick a CPU from.
+ * @exclude_cpu:The CPU to avoid picking.
+ *
+ * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping
+ * CPUs that don't use nohz_full, these are preferred. Pass
+ * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs.
+ *
+ * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available.
+ */
+static inline unsigned int
+cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
+{
+ unsigned int cpu, hk_cpu;
+
+ if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
+ cpu = cpumask_any(mask);
+ else
+ cpu = cpumask_any_but(mask, exclude_cpu);
+
+ if (!IS_ENABLED(CONFIG_NO_HZ_FULL))
+ return cpu;
+
+ /* If the CPU picked isn't marked nohz_full nothing more needs doing. */
+ if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu))
+ return cpu;
+
+ /* Try to find a CPU that isn't nohz_full to use in preference */
+ hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
+ if (hk_cpu == exclude_cpu)
+ hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);
+
+ if (hk_cpu < nr_cpu_ids)
+ cpu = hk_cpu;
+
+ return cpu;
+}
+
struct rdt_fs_context {
struct kernfs_fs_context kfc;
bool enable_cdpl2;
@@ -69,9 +111,6 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
return container_of(kfc, struct rdt_fs_context, kfc);
}
-DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
-DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
-
/**
* struct mon_evt - Entry in the event list of a resource
* @evtid: event id
@@ -112,12 +151,12 @@ struct rmid_read {
bool first;
int err;
u64 val;
+ void *arch_mon_ctx;
};
-extern bool rdt_alloc_capable;
-extern bool rdt_mon_capable;
extern unsigned int rdt_mon_features;
extern struct list_head resctrl_schema_all;
+extern bool resctrl_mounted;
enum rdt_group_type {
RDTCTRL_GROUP = 0,
@@ -296,14 +335,10 @@ struct rftype {
* struct mbm_state - status for each MBM counter in each domain
* @prev_bw_bytes: Previous bytes value read for bandwidth calculation
* @prev_bw: The most recent bandwidth in MBps
- * @delta_bw: Difference between the current and previous bandwidth
- * @delta_comp: Indicates whether to compute the delta_bw
*/
struct mbm_state {
u64 prev_bw_bytes;
u32 prev_bw;
- u32 delta_bw;
- bool delta_comp;
};
/**
@@ -395,6 +430,8 @@ struct rdt_parse_data {
* @msr_update: Function pointer to update QOS MSRs
* @mon_scale: cqm counter * mon_scale = occupancy in bytes
* @mbm_width: Monitor width, to detect and correct for overflow.
+ * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth
+ * Monitoring Event Configuration (BMEC) is supported.
* @cdp_enabled: CDP state of this resource
*
* Members of this structure are either private to the architecture
@@ -409,6 +446,7 @@ struct rdt_hw_resource {
struct rdt_resource *r);
unsigned int mon_scale;
unsigned int mbm_width;
+ unsigned int mbm_cfg_mask;
bool cdp_enabled;
};
@@ -426,8 +464,6 @@ extern struct mutex rdtgroup_mutex;
extern struct rdt_hw_resource rdt_resources_all[];
extern struct rdtgroup rdtgroup_default;
-DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
-
extern struct dentry *debugfs_resctrl;
enum resctrl_res_level {
@@ -543,9 +579,10 @@ void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
int closids_supported(void);
void closid_free(int closid);
-int alloc_rmid(void);
-void free_rmid(u32 rmid);
+int alloc_rmid(u32 closid);
+void free_rmid(u32 closid, u32 rmid);
int rdt_get_mon_l3_config(struct rdt_resource *r);
+void __exit rdt_put_mon_l3_config(void);
bool __init rdt_cpu_has(int flag);
void mon_event_count(void *info);
int rdtgroup_mondata_show(struct seq_file *m, void *arg);
@@ -553,17 +590,21 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_domain *d, struct rdtgroup *rdtgrp,
int evtid, int first);
void mbm_setup_overflow_handler(struct rdt_domain *dom,
- unsigned long delay_ms);
+ unsigned long delay_ms,
+ int exclude_cpu);
void mbm_handle_overflow(struct work_struct *work);
void __init intel_rdt_mbm_apply_quirk(void);
bool is_mba_sc(struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+ int exclude_cpu);
void cqm_handle_limbo(struct work_struct *work);
-bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
+bool has_busy_rmid(struct rdt_domain *d);
void __check_limbo(struct rdt_domain *d, bool force_free);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
void __init thread_throttle_mode_init(void);
void __init mbm_config_rftype_init(const char *config);
void rdt_staged_configs_clear(void);
+bool closid_allocated(unsigned int closid);
+int resctrl_find_cleanest_closid(void);
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f136ac046851..c34a35ec0f03 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -15,6 +15,7 @@
* Software Developer Manual June 2016, volume 3, section 17.17.
*/
+#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <linux/slab.h>
@@ -24,7 +25,20 @@
#include "internal.h"
+/**
+ * struct rmid_entry - dirty tracking for all RMID.
+ * @closid: The CLOSID for this entry.
+ * @rmid: The RMID for this entry.
+ * @busy: The number of domains with cached data using this RMID.
+ * @list: Member of the rmid_free_lru list when busy == 0.
+ *
+ * Depending on the architecture the correct monitor is accessed using
+ * both @closid and @rmid, or @rmid only.
+ *
+ * Take the rdtgroup_mutex when accessing.
+ */
struct rmid_entry {
+ u32 closid;
u32 rmid;
int busy;
struct list_head list;
@@ -38,6 +52,13 @@ struct rmid_entry {
static LIST_HEAD(rmid_free_lru);
/*
+ * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has.
+ * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
+ * Indexed by CLOSID. Protected by rdtgroup_mutex.
+ */
+static u32 *closid_num_dirty_rmid;
+
+/*
* @rmid_limbo_count - count of currently unused but (potentially)
* dirty RMIDs.
* This counts RMIDs that no one is currently using but that
@@ -136,12 +157,29 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
return val;
}
-static inline struct rmid_entry *__rmid_entry(u32 rmid)
+/*
+ * x86 and arm64 differ in their handling of monitoring.
+ * x86's RMID are independent numbers, there is only one source of traffic
+ * with an RMID value of '1'.
+ * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
+ * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
+ * value is no longer unique.
+ * To account for this, resctrl uses an index. On x86 this is just the RMID,
+ * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
+ *
+ * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
+ * must accept an attempt to read every index.
+ */
+static inline struct rmid_entry *__rmid_entry(u32 idx)
{
struct rmid_entry *entry;
+ u32 closid, rmid;
+
+ entry = &rmid_ptrs[idx];
+ resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
- entry = &rmid_ptrs[rmid];
- WARN_ON(entry->rmid != rmid);
+ WARN_ON_ONCE(entry->closid != closid);
+ WARN_ON_ONCE(entry->rmid != rmid);
return entry;
}
@@ -190,7 +228,8 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
}
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
- u32 rmid, enum resctrl_event_id eventid)
+ u32 unused, u32 rmid,
+ enum resctrl_event_id eventid)
{
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct arch_mbm_state *am;
@@ -230,7 +269,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
}
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
- u32 rmid, enum resctrl_event_id eventid, u64 *val)
+ u32 unused, u32 rmid, enum resctrl_event_id eventid,
+ u64 *val, void *ignored)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
@@ -238,6 +278,8 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
u64 msr_val, chunks;
int ret;
+ resctrl_arch_rmid_read_context_check();
+
if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
return -EINVAL;
@@ -260,6 +302,17 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
return 0;
}
+static void limbo_release_entry(struct rmid_entry *entry)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ rmid_limbo_count--;
+ list_add_tail(&entry->list, &rmid_free_lru);
+
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+ closid_num_dirty_rmid[entry->closid]--;
+}
+
/*
* Check the RMIDs that are marked as busy for this domain. If the
* reported LLC occupancy is below the threshold clear the busy bit and
@@ -269,11 +322,20 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
void __check_limbo(struct rdt_domain *d, bool force_free)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry;
- u32 crmid = 1, nrmid;
+ u32 idx, cur_idx = 1;
+ void *arch_mon_ctx;
bool rmid_dirty;
u64 val = 0;
+ arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
+ if (IS_ERR(arch_mon_ctx)) {
+ pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+ PTR_ERR(arch_mon_ctx));
+ return;
+ }
+
/*
* Skip RMID 0 and start from RMID 1 and check all the RMIDs that
* are marked as busy for occupancy < threshold. If the occupancy
@@ -281,53 +343,125 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
* RMID and move it to the free list when the counter reaches 0.
*/
for (;;) {
- nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
- if (nrmid >= r->num_rmid)
+ idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
+ if (idx >= idx_limit)
break;
- entry = __rmid_entry(nrmid);
-
- if (resctrl_arch_rmid_read(r, d, entry->rmid,
- QOS_L3_OCCUP_EVENT_ID, &val)) {
+ entry = __rmid_entry(idx);
+ if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
+ QOS_L3_OCCUP_EVENT_ID, &val,
+ arch_mon_ctx)) {
rmid_dirty = true;
} else {
rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
}
if (force_free || !rmid_dirty) {
- clear_bit(entry->rmid, d->rmid_busy_llc);
- if (!--entry->busy) {
- rmid_limbo_count--;
- list_add_tail(&entry->list, &rmid_free_lru);
- }
+ clear_bit(idx, d->rmid_busy_llc);
+ if (!--entry->busy)
+ limbo_release_entry(entry);
}
- crmid = nrmid + 1;
+ cur_idx = idx + 1;
}
+
+ resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
}
-bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
+bool has_busy_rmid(struct rdt_domain *d)
{
- return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+
+ return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
+}
+
+static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
+{
+ struct rmid_entry *itr;
+ u32 itr_idx, cmp_idx;
+
+ if (list_empty(&rmid_free_lru))
+ return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
+
+ list_for_each_entry(itr, &rmid_free_lru, list) {
+ /*
+ * Get the index of this free RMID, and the index it would need
+ * to be if it were used with this CLOSID.
+ * If the CLOSID is irrelevant on this architecture, the two
+ * index values are always the same on every entry and thus the
+ * very first entry will be returned.
+ */
+ itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
+ cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
+
+ if (itr_idx == cmp_idx)
+ return itr;
+ }
+
+ return ERR_PTR(-ENOSPC);
+}
+
+/**
+ * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
+ * RMID are clean, or the CLOSID that has
+ * the most clean RMID.
+ *
+ * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
+ * may not be able to allocate clean RMID. To avoid this the allocator will
+ * choose the CLOSID with the most clean RMID.
+ *
+ * When the CLOSID and RMID are independent numbers, the first free CLOSID will
+ * be returned.
+ */
+int resctrl_find_cleanest_closid(void)
+{
+ u32 cleanest_closid = ~0;
+ int i = 0;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+ return -EIO;
+
+ for (i = 0; i < closids_supported(); i++) {
+ int num_dirty;
+
+ if (closid_allocated(i))
+ continue;
+
+ num_dirty = closid_num_dirty_rmid[i];
+ if (num_dirty == 0)
+ return i;
+
+ if (cleanest_closid == ~0)
+ cleanest_closid = i;
+
+ if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
+ cleanest_closid = i;
+ }
+
+ if (cleanest_closid == ~0)
+ return -ENOSPC;
+
+ return cleanest_closid;
}
/*
- * As of now the RMIDs allocation is global.
- * However we keep track of which packages the RMIDs
- * are used to optimize the limbo list management.
+ * For MPAM the RMID value is not unique, and has to be considered with
+ * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
+ * allows all domains to be managed by a single free list.
+ * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
*/
-int alloc_rmid(void)
+int alloc_rmid(u32 closid)
{
struct rmid_entry *entry;
lockdep_assert_held(&rdtgroup_mutex);
- if (list_empty(&rmid_free_lru))
- return rmid_limbo_count ? -EBUSY : -ENOSPC;
+ entry = resctrl_find_free_rmid(closid);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
- entry = list_first_entry(&rmid_free_lru,
- struct rmid_entry, list);
list_del(&entry->list);
-
return entry->rmid;
}
@@ -335,47 +469,50 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
struct rdt_domain *d;
- int cpu, err;
- u64 val = 0;
+ u32 idx;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
+ idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
entry->busy = 0;
- cpu = get_cpu();
list_for_each_entry(d, &r->domains, list) {
- if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
- err = resctrl_arch_rmid_read(r, d, entry->rmid,
- QOS_L3_OCCUP_EVENT_ID,
- &val);
- if (err || val <= resctrl_rmid_realloc_threshold)
- continue;
- }
-
/*
* For the first limbo RMID in the domain,
* setup up the limbo worker.
*/
- if (!has_busy_rmid(r, d))
- cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
- set_bit(entry->rmid, d->rmid_busy_llc);
+ if (!has_busy_rmid(d))
+ cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
+ set_bit(idx, d->rmid_busy_llc);
entry->busy++;
}
- put_cpu();
- if (entry->busy)
- rmid_limbo_count++;
- else
- list_add_tail(&entry->list, &rmid_free_lru);
+ rmid_limbo_count++;
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+ closid_num_dirty_rmid[entry->closid]++;
}
-void free_rmid(u32 rmid)
+void free_rmid(u32 closid, u32 rmid)
{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
struct rmid_entry *entry;
- if (!rmid)
- return;
-
lockdep_assert_held(&rdtgroup_mutex);
- entry = __rmid_entry(rmid);
+ /*
+ * Do not allow the default rmid to be free'd. Comparing by index
+ * allows architectures that ignore the closid parameter to avoid an
+ * unnecessary check.
+ */
+ if (idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
+ RESCTRL_RESERVED_RMID))
+ return;
+
+ entry = __rmid_entry(idx);
if (is_llc_occupancy_enabled())
add_rmid_to_limbo(entry);
@@ -383,33 +520,36 @@ void free_rmid(u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}
-static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 rmid,
- enum resctrl_event_id evtid)
+static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid,
+ u32 rmid, enum resctrl_event_id evtid)
{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+
switch (evtid) {
case QOS_L3_MBM_TOTAL_EVENT_ID:
- return &d->mbm_total[rmid];
+ return &d->mbm_total[idx];
case QOS_L3_MBM_LOCAL_EVENT_ID:
- return &d->mbm_local[rmid];
+ return &d->mbm_local[idx];
default:
return NULL;
}
}
-static int __mon_event_count(u32 rmid, struct rmid_read *rr)
+static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
struct mbm_state *m;
u64 tval = 0;
if (rr->first) {
- resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid);
- m = get_mbm_state(rr->d, rmid, rr->evtid);
+ resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
+ m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
if (m)
memset(m, 0, sizeof(struct mbm_state));
return 0;
}
- rr->err = resctrl_arch_rmid_read(rr->r, rr->d, rmid, rr->evtid, &tval);
+ rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid,
+ &tval, rr->arch_mon_ctx);
if (rr->err)
return rr->err;
@@ -421,6 +561,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
/*
* mbm_bw_count() - Update bw count from values previously read by
* __mon_event_count().
+ * @closid: The closid used to identify the cached mbm_state.
* @rmid: The rmid used to identify the cached mbm_state.
* @rr: The struct rmid_read populated by __mon_event_count().
*
@@ -429,9 +570,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
* __mon_event_count() is compared with the chunks value from the previous
* invocation. This must be called once per second to maintain values in MBps.
*/
-static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
+static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
- struct mbm_state *m = &rr->d->mbm_local[rmid];
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ struct mbm_state *m = &rr->d->mbm_local[idx];
u64 cur_bw, bytes, cur_bytes;
cur_bytes = rr->val;
@@ -440,14 +582,11 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
cur_bw = bytes / SZ_1M;
- if (m->delta_comp)
- m->delta_bw = abs(cur_bw - m->prev_bw);
- m->delta_comp = false;
m->prev_bw = cur_bw;
}
/*
- * This is called via IPI to read the CQM/MBM counters
+ * This is scheduled by mon_event_read() to read the CQM/MBM counters
* on a domain.
*/
void mon_event_count(void *info)
@@ -459,7 +598,7 @@ void mon_event_count(void *info)
rdtgrp = rr->rgrp;
- ret = __mon_event_count(rdtgrp->mon.rmid, rr);
+ ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);
/*
* For Ctrl groups read data from child monitor groups and
@@ -470,7 +609,8 @@ void mon_event_count(void *info)
if (rdtgrp->type == RDTCTRL_GROUP) {
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- if (__mon_event_count(entry->mon.rmid, rr) == 0)
+ if (__mon_event_count(entry->closid, entry->mon.rmid,
+ rr) == 0)
ret = 0;
}
}
@@ -520,9 +660,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
{
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
- u32 cur_bw, delta_bw, user_bw;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
+ u32 cur_bw, user_bw, idx;
struct list_head *head;
struct rdtgroup *entry;
@@ -533,7 +673,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
- pmbm_data = &dom_mbm->mbm_local[rmid];
+ idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ pmbm_data = &dom_mbm->mbm_local[idx];
dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
@@ -543,7 +684,6 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
cur_bw = pmbm_data->prev_bw;
user_bw = dom_mba->mbps_val[closid];
- delta_bw = pmbm_data->delta_bw;
/* MBA resource doesn't support CDP */
cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
@@ -555,52 +695,35 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
list_for_each_entry(entry, head, mon.crdtgrp_list) {
cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
cur_bw += cmbm_data->prev_bw;
- delta_bw += cmbm_data->delta_bw;
}
/*
* Scale up/down the bandwidth linearly for the ctrl group. The
* bandwidth step is the bandwidth granularity specified by the
* hardware.
- *
- * The delta_bw is used when increasing the bandwidth so that we
- * dont alternately increase and decrease the control values
- * continuously.
- *
- * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if
- * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep
- * switching between 90 and 110 continuously if we only check
- * cur_bw < user_bw.
+ * Always increase throttling if current bandwidth is above the
+ * target set by user.
+ * But avoid thrashing up and down on every poll by checking
+ * whether a decrease in throttling is likely to push the group
+ * back over target. E.g. if currently throttling to 30% of bandwidth
+ * on a system with 10% granularity steps, check whether moving to
+ * 40% would go past the limit by multiplying current bandwidth by
+ * "(30 + 10) / 30".
*/
if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
} else if (cur_msr_val < MAX_MBA_BW &&
- (user_bw > (cur_bw + delta_bw))) {
+ (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
} else {
return;
}
resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
-
- /*
- * Delta values are updated dynamically package wise for each
- * rdtgrp every time the throttle MSR changes value.
- *
- * This is because (1)the increase in bandwidth is not perfectly
- * linear and only "approximately" linear even when the hardware
- * says it is linear.(2)Also since MBA is a core specific
- * mechanism, the delta values vary based on number of cores used
- * by the rdtgrp.
- */
- pmbm_data->delta_comp = true;
- list_for_each_entry(entry, head, mon.crdtgrp_list) {
- cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
- cmbm_data->delta_comp = true;
- }
}
-static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
+static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
+ u32 closid, u32 rmid)
{
struct rmid_read rr;
@@ -615,12 +738,28 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
if (is_mbm_total_enabled()) {
rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
rr.val = 0;
- __mon_event_count(rmid, &rr);
+ rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+ if (IS_ERR(rr.arch_mon_ctx)) {
+ pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+ PTR_ERR(rr.arch_mon_ctx));
+ return;
+ }
+
+ __mon_event_count(closid, rmid, &rr);
+
+ resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
rr.val = 0;
- __mon_event_count(rmid, &rr);
+ rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+ if (IS_ERR(rr.arch_mon_ctx)) {
+ pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+ PTR_ERR(rr.arch_mon_ctx));
+ return;
+ }
+
+ __mon_event_count(closid, rmid, &rr);
/*
* Call the MBA software controller only for the
@@ -628,7 +767,9 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
* the software controller explicitly.
*/
if (is_mba_sc(NULL))
- mbm_bw_count(rmid, &rr);
+ mbm_bw_count(closid, rmid, &rr);
+
+ resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
}
}
@@ -639,106 +780,193 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
void cqm_handle_limbo(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
- int cpu = smp_processor_id();
- struct rdt_resource *r;
struct rdt_domain *d;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
- r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
d = container_of(work, struct rdt_domain, cqm_limbo.work);
__check_limbo(d, false);
- if (has_busy_rmid(r, d))
- schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
+ if (has_busy_rmid(d)) {
+ d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+ RESCTRL_PICK_ANY_CPU);
+ schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
+ delay);
+ }
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
}
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+/**
+ * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
+ * domain.
+ * @dom: The domain the limbo handler should run for.
+ * @delay_ms: How far in the future the handler should run.
+ * @exclude_cpu: Which CPU the handler should not run on,
+ * RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+ int exclude_cpu)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
int cpu;
- cpu = cpumask_any(&dom->cpu_mask);
+ cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
dom->cqm_work_cpu = cpu;
- schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+ if (cpu < nr_cpu_ids)
+ schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
}
void mbm_handle_overflow(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
struct rdtgroup *prgrp, *crgrp;
- int cpu = smp_processor_id();
struct list_head *head;
struct rdt_resource *r;
struct rdt_domain *d;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
- if (!static_branch_likely(&rdt_mon_enable_key))
+ /*
+ * If the filesystem has been unmounted this work no longer needs to
+ * run.
+ */
+ if (!resctrl_mounted || !resctrl_arch_mon_capable())
goto out_unlock;
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
d = container_of(work, struct rdt_domain, mbm_over.work);
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- mbm_update(r, d, prgrp->mon.rmid);
+ mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list)
- mbm_update(r, d, crgrp->mon.rmid);
+ mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);
if (is_mba_sc(NULL))
update_mba_bw(prgrp, d);
}
- schedule_delayed_work_on(cpu, &d->mbm_over, delay);
+ /*
+ * Re-check for housekeeping CPUs. This allows the overflow handler to
+ * move off a nohz_full CPU quickly.
+ */
+ d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+ RESCTRL_PICK_ANY_CPU);
+ schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
out_unlock:
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
}
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
+/**
+ * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
+ * domain.
+ * @dom: The domain the overflow handler should run for.
+ * @delay_ms: How far in the future the handler should run.
+ * @exclude_cpu: Which CPU the handler should not run on,
+ * RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
+ int exclude_cpu)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
int cpu;
- if (!static_branch_likely(&rdt_mon_enable_key))
+ /*
+ * When a domain comes online there is no guarantee the filesystem is
+ * mounted. If not, there is no need to catch counter overflow.
+ */
+ if (!resctrl_mounted || !resctrl_arch_mon_capable())
return;
- cpu = cpumask_any(&dom->cpu_mask);
+ cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
dom->mbm_work_cpu = cpu;
- schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
+
+ if (cpu < nr_cpu_ids)
+ schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
}
static int dom_data_init(struct rdt_resource *r)
{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+ u32 num_closid = resctrl_arch_get_num_closid(r);
struct rmid_entry *entry = NULL;
- int i, nr_rmids;
+ int err = 0, i;
+ u32 idx;
+
+ mutex_lock(&rdtgroup_mutex);
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+ u32 *tmp;
+
+ /*
+ * If the architecture hasn't provided a sanitised value here,
+ * this may result in larger arrays than necessary. Resctrl will
+ * use a smaller system wide value based on the resources in
+ * use.
+ */
+ tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
- nr_rmids = r->num_rmid;
- rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
- if (!rmid_ptrs)
- return -ENOMEM;
+ closid_num_dirty_rmid = tmp;
+ }
+
+ rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
+ if (!rmid_ptrs) {
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+ kfree(closid_num_dirty_rmid);
+ closid_num_dirty_rmid = NULL;
+ }
+ err = -ENOMEM;
+ goto out_unlock;
+ }
- for (i = 0; i < nr_rmids; i++) {
+ for (i = 0; i < idx_limit; i++) {
entry = &rmid_ptrs[i];
INIT_LIST_HEAD(&entry->list);
- entry->rmid = i;
+ resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
list_add_tail(&entry->list, &rmid_free_lru);
}
/*
- * RMID 0 is special and is always allocated. It's used for all
- * tasks that are not monitored.
+ * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
+ * are always allocated. These are used for the rdtgroup_default
+ * control group, which will be setup later in rdtgroup_init().
*/
- entry = __rmid_entry(0);
+ idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
+ RESCTRL_RESERVED_RMID);
+ entry = __rmid_entry(idx);
list_del(&entry->list);
- return 0;
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
+}
+
+static void __exit dom_data_exit(void)
+{
+ mutex_lock(&rdtgroup_mutex);
+
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+ kfree(closid_num_dirty_rmid);
+ closid_num_dirty_rmid = NULL;
+ }
+
+ kfree(rmid_ptrs);
+ rmid_ptrs = NULL;
+
+ mutex_unlock(&rdtgroup_mutex);
}
static struct mon_evt llc_occupancy_event = {
@@ -813,6 +1041,12 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
return ret;
if (rdt_cpu_has(X86_FEATURE_BMEC)) {
+ u32 eax, ebx, ecx, edx;
+
+ /* Detect list of bandwidth sources that can be tracked */
+ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
+ hw_res->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
+
if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
mbm_total_event.configurable = true;
mbm_config_rftype_init("mbm_total_bytes_config");
@@ -830,6 +1064,11 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
return 0;
}
+void __exit rdt_put_mon_l3_config(void)
+{
+ dom_data_exit();
+}
+
void __init intel_rdt_mbm_apply_quirk(void)
{
int cf_index;
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 8f559eeae08e..884b88e25141 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -581,7 +581,7 @@ static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
if (ret)
goto err_cpus;
- if (rdt_mon_capable) {
+ if (resctrl_arch_mon_capable()) {
ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
if (ret)
goto err_cpus_list;
@@ -628,7 +628,7 @@ static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
if (ret)
goto err_cpus;
- if (rdt_mon_capable) {
+ if (resctrl_arch_mon_capable()) {
ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
if (ret)
goto err_cpus_list;
@@ -752,7 +752,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
* anymore when this group would be used for pseudo-locking. This
* is safe to call on platforms not capable of monitoring.
*/
- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
ret = 0;
goto out;
@@ -776,8 +776,8 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
{
int ret;
- if (rdt_mon_capable) {
- ret = alloc_rmid();
+ if (resctrl_arch_mon_capable()) {
+ ret = alloc_rmid(rdtgrp->closid);
if (ret < 0) {
rdt_last_cmd_puts("Out of RMIDs\n");
return ret;
@@ -787,7 +787,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
ret = rdtgroup_locksetup_user_restore(rdtgrp);
if (ret) {
- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
return ret;
}
@@ -844,6 +844,9 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
struct rdt_domain *d_i;
bool ret = false;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
return true;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 69a1de92384a..011e17efb1a6 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -35,6 +35,10 @@
DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+
+/* Mutex to protect rdtgroup access. */
+DEFINE_MUTEX(rdtgroup_mutex);
+
static struct kernfs_root *rdt_root;
struct rdtgroup rdtgroup_default;
LIST_HEAD(rdt_all_groups);
@@ -42,6 +46,9 @@ LIST_HEAD(rdt_all_groups);
/* list of entries for the schemata file */
LIST_HEAD(resctrl_schema_all);
+/* The filesystem can only be mounted once. */
+bool resctrl_mounted;
+
/* Kernel fs node for "info" directory under root */
static struct kernfs_node *kn_info;
@@ -102,7 +109,7 @@ void rdt_staged_configs_clear(void)
*
* Using a global CLOSID across all resources has some advantages and
* some drawbacks:
- * + We can simply set "current->closid" to assign a task to a resource
+ * + We can simply set current's closid to assign a task to a resource
* group.
* + Context switch code can avoid extra memory references deciding which
* CLOSID to load into the PQR_ASSOC MSR
@@ -111,7 +118,7 @@ void rdt_staged_configs_clear(void)
* - Our choices on how to configure each resource become progressively more
* limited as the number of resources grows.
*/
-static int closid_free_map;
+static unsigned long closid_free_map;
static int closid_free_map_len;
int closids_supported(void)
@@ -130,26 +137,39 @@ static void closid_init(void)
closid_free_map = BIT_MASK(rdt_min_closid) - 1;
- /* CLOSID 0 is always reserved for the default group */
- closid_free_map &= ~1;
+ /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
+ __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map);
closid_free_map_len = rdt_min_closid;
}
static int closid_alloc(void)
{
- u32 closid = ffs(closid_free_map);
+ int cleanest_closid;
+ u32 closid;
- if (closid == 0)
- return -ENOSPC;
- closid--;
- closid_free_map &= ~(1 << closid);
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+ cleanest_closid = resctrl_find_cleanest_closid();
+ if (cleanest_closid < 0)
+ return cleanest_closid;
+ closid = cleanest_closid;
+ } else {
+ closid = ffs(closid_free_map);
+ if (closid == 0)
+ return -ENOSPC;
+ closid--;
+ }
+ __clear_bit(closid, &closid_free_map);
return closid;
}
void closid_free(int closid)
{
- closid_free_map |= 1 << closid;
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ __set_bit(closid, &closid_free_map);
}
/**
@@ -159,9 +179,11 @@ void closid_free(int closid)
* Return: true if @closid is currently associated with a resource group,
* false if @closid is free
*/
-static bool closid_allocated(unsigned int closid)
+bool closid_allocated(unsigned int closid)
{
- return (closid_free_map & (1 << closid)) == 0;
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ return !test_bit(closid, &closid_free_map);
}
/**
@@ -559,14 +581,26 @@ static void update_task_closid_rmid(struct task_struct *t)
_update_task_closid_rmid(t);
}
+static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
+{
+ u32 closid, rmid = rdtgrp->mon.rmid;
+
+ if (rdtgrp->type == RDTCTRL_GROUP)
+ closid = rdtgrp->closid;
+ else if (rdtgrp->type == RDTMON_GROUP)
+ closid = rdtgrp->mon.parent->closid;
+ else
+ return false;
+
+ return resctrl_arch_match_closid(tsk, closid) &&
+ resctrl_arch_match_rmid(tsk, closid, rmid);
+}
+
static int __rdtgroup_move_task(struct task_struct *tsk,
struct rdtgroup *rdtgrp)
{
/* If the task is already in rdtgrp, no need to move the task. */
- if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
- tsk->rmid == rdtgrp->mon.rmid) ||
- (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
- tsk->closid == rdtgrp->mon.parent->closid))
+ if (task_in_rdtgroup(tsk, rdtgrp))
return 0;
/*
@@ -577,19 +611,19 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
* For monitor groups, can move the tasks only from
* their parent CTRL group.
*/
-
- if (rdtgrp->type == RDTCTRL_GROUP) {
- WRITE_ONCE(tsk->closid, rdtgrp->closid);
- WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
- } else if (rdtgrp->type == RDTMON_GROUP) {
- if (rdtgrp->mon.parent->closid == tsk->closid) {
- WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
- } else {
- rdt_last_cmd_puts("Can't move task to different control group\n");
- return -EINVAL;
- }
+ if (rdtgrp->type == RDTMON_GROUP &&
+ !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
+ rdt_last_cmd_puts("Can't move task to different control group\n");
+ return -EINVAL;
}
+ if (rdtgrp->type == RDTMON_GROUP)
+ resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
+ rdtgrp->mon.rmid);
+ else
+ resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
+ rdtgrp->mon.rmid);
+
/*
* Ensure the task's closid and rmid are written before determining if
* the task is current that will decide if it will be interrupted.
@@ -611,14 +645,15 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
{
- return (rdt_alloc_capable &&
- (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
+ return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
+ resctrl_arch_match_closid(t, r->closid));
}
static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
{
- return (rdt_mon_capable &&
- (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
+ return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
+ resctrl_arch_match_rmid(t, r->mon.parent->closid,
+ r->mon.rmid));
}
/**
@@ -853,7 +888,7 @@ int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
mutex_lock(&rdtgroup_mutex);
/* Return empty if resctrl has not been mounted. */
- if (!static_branch_unlikely(&rdt_enable_key)) {
+ if (!resctrl_mounted) {
seq_puts(s, "res:\nmon:\n");
goto unlock;
}
@@ -869,7 +904,7 @@ int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
rdtg->mode != RDT_MODE_EXCLUSIVE)
continue;
- if (rdtg->closid != tsk->closid)
+ if (!resctrl_arch_match_closid(tsk, rdtg->closid))
continue;
seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
@@ -877,7 +912,8 @@ int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
seq_puts(s, "mon:");
list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
mon.crdtgrp_list) {
- if (tsk->rmid != crg->mon.rmid)
+ if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
+ crg->mon.rmid))
continue;
seq_printf(s, "%s", crg->kn->name);
break;
@@ -982,6 +1018,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
bool sep = false;
u32 ctrl_val;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
list_for_each_entry(dom, &r->domains, list) {
@@ -1042,6 +1079,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
}
seq_putc(seq, '\n');
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return 0;
}
@@ -1297,6 +1335,9 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
struct rdt_domain *d;
u32 ctrl;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
list_for_each_entry(s, &resctrl_schema_all, list) {
r = s->res;
if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
@@ -1561,6 +1602,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
struct rdt_domain *dom;
bool sep = false;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
list_for_each_entry(dom, &r->domains, list) {
@@ -1577,6 +1619,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
seq_puts(s, "\n");
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return 0;
}
@@ -1614,17 +1657,10 @@ static void mon_event_config_write(void *info)
wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
}
-static int mbm_config_write_domain(struct rdt_resource *r,
- struct rdt_domain *d, u32 evtid, u32 val)
+static void mbm_config_write_domain(struct rdt_resource *r,
+ struct rdt_domain *d, u32 evtid, u32 val)
{
struct mon_config_info mon_info = {0};
- int ret = 0;
-
- /* mon_config cannot be more than the supported set of events */
- if (val > MAX_EVT_CONFIG_BITS) {
- rdt_last_cmd_puts("Invalid event configuration\n");
- return -EINVAL;
- }
/*
* Read the current config value first. If both are the same then
@@ -1633,7 +1669,7 @@ static int mbm_config_write_domain(struct rdt_resource *r,
mon_info.evtid = evtid;
mondata_config_read(d, &mon_info);
if (mon_info.mon_config == val)
- goto out;
+ return;
mon_info.mon_config = val;
@@ -1656,17 +1692,17 @@ static int mbm_config_write_domain(struct rdt_resource *r,
* mbm_local and mbm_total counts for all the RMIDs.
*/
resctrl_arch_reset_rmid_all(r, d);
-
-out:
- return ret;
}
static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
char *dom_str = NULL, *id_str;
unsigned long dom_id, val;
struct rdt_domain *d;
- int ret = 0;
+
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
next:
if (!tok || tok[0] == '\0')
@@ -1686,11 +1722,16 @@ next:
return -EINVAL;
}
+ /* Value from user cannot be more than the supported set of events */
+ if ((val & hw_res->mbm_cfg_mask) != val) {
+ rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
+ hw_res->mbm_cfg_mask);
+ return -EINVAL;
+ }
+
list_for_each_entry(d, &r->domains, list) {
if (d->id == dom_id) {
- ret = mbm_config_write_domain(r, d, evtid, val);
- if (ret)
- return -EINVAL;
+ mbm_config_write_domain(r, d, evtid, val);
goto next;
}
}
@@ -1709,6 +1750,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
if (nbytes == 0 || buf[nbytes - 1] != '\n')
return -EINVAL;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
rdt_last_cmd_clear();
@@ -1718,6 +1760,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return ret ?: nbytes;
}
@@ -1733,6 +1776,7 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
if (nbytes == 0 || buf[nbytes - 1] != '\n')
return -EINVAL;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
rdt_last_cmd_clear();
@@ -1742,6 +1786,7 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return ret ?: nbytes;
}
@@ -2218,6 +2263,9 @@ static int set_cache_qos_cfg(int level, bool enable)
struct rdt_domain *d;
int cpu;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
if (level == RDT_RESOURCE_L3)
update = l3_qos_cfg_update;
else if (level == RDT_RESOURCE_L2)
@@ -2417,6 +2465,7 @@ struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
rdtgroup_kn_get(rdtgrp, kn);
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
/* Was this group deleted while we waited? */
@@ -2434,6 +2483,8 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
return;
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
rdtgroup_kn_put(rdtgrp, kn);
}
@@ -2584,7 +2635,7 @@ static int rdt_get_tree(struct fs_context *fc)
/*
* resctrl file system can only be mounted once.
*/
- if (static_branch_unlikely(&rdt_enable_key)) {
+ if (resctrl_mounted) {
ret = -EBUSY;
goto out;
}
@@ -2605,7 +2656,7 @@ static int rdt_get_tree(struct fs_context *fc)
closid_init();
- if (rdt_mon_capable)
+ if (resctrl_arch_mon_capable())
flags |= RFTYPE_MON;
ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
@@ -2618,7 +2669,7 @@ static int rdt_get_tree(struct fs_context *fc)
if (ret < 0)
goto out_schemata_free;
- if (rdt_mon_capable) {
+ if (resctrl_arch_mon_capable()) {
ret = mongroup_create_dir(rdtgroup_default.kn,
&rdtgroup_default, "mon_groups",
&kn_mongrp);
@@ -2640,18 +2691,19 @@ static int rdt_get_tree(struct fs_context *fc)
if (ret < 0)
goto out_psl;
- if (rdt_alloc_capable)
- static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
- if (rdt_mon_capable)
- static_branch_enable_cpuslocked(&rdt_mon_enable_key);
+ if (resctrl_arch_alloc_capable())
+ resctrl_arch_enable_alloc();
+ if (resctrl_arch_mon_capable())
+ resctrl_arch_enable_mon();
- if (rdt_alloc_capable || rdt_mon_capable)
- static_branch_enable_cpuslocked(&rdt_enable_key);
+ if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
+ resctrl_mounted = true;
if (is_mbm_enabled()) {
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
list_for_each_entry(dom, &r->domains, list)
- mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
+ mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
}
goto out;
@@ -2659,10 +2711,10 @@ static int rdt_get_tree(struct fs_context *fc)
out_psl:
rdt_pseudo_lock_release();
out_mondata:
- if (rdt_mon_capable)
+ if (resctrl_arch_mon_capable())
kernfs_remove(kn_mondata);
out_mongrp:
- if (rdt_mon_capable)
+ if (resctrl_arch_mon_capable())
kernfs_remove(kn_mongrp);
out_info:
kernfs_remove(kn_info);
@@ -2765,6 +2817,9 @@ static int reset_all_ctrls(struct rdt_resource *r)
struct rdt_domain *d;
int i;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
@@ -2810,8 +2865,8 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
for_each_process_thread(p, t) {
if (!from || is_closid_match(t, from) ||
is_rmid_match(t, from)) {
- WRITE_ONCE(t->closid, to->closid);
- WRITE_ONCE(t->rmid, to->mon.rmid);
+ resctrl_arch_set_closid_rmid(t, to->closid,
+ to->mon.rmid);
/*
* Order the closid/rmid stores above before the loads
@@ -2842,7 +2897,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
head = &rdtgrp->mon.crdtgrp_list;
list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
- free_rmid(sentry->mon.rmid);
+ free_rmid(sentry->closid, sentry->mon.rmid);
list_del(&sentry->mon.crdtgrp_list);
if (atomic_read(&sentry->waitcount) != 0)
@@ -2882,7 +2937,7 @@ static void rmdir_all_sub(void)
cpumask_or(&rdtgroup_default.cpu_mask,
&rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
kernfs_remove(rdtgrp->kn);
list_del(&rdtgrp->rdtgroup_list);
@@ -2917,9 +2972,11 @@ static void rdt_kill_sb(struct super_block *sb)
rdtgroup_default.mode = RDT_MODE_SHAREABLE;
schemata_list_destroy();
rdtgroup_destroy_root();
- static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
- static_branch_disable_cpuslocked(&rdt_mon_enable_key);
- static_branch_disable_cpuslocked(&rdt_enable_key);
+ if (resctrl_arch_alloc_capable())
+ resctrl_arch_disable_alloc();
+ if (resctrl_arch_mon_capable())
+ resctrl_arch_disable_mon();
+ resctrl_mounted = false;
kernfs_kill_sb(sb);
mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
@@ -3047,6 +3104,9 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
struct rdt_domain *dom;
int ret;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
list_for_each_entry(dom, &r->domains, list) {
ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
if (ret)
@@ -3293,6 +3353,36 @@ out:
return ret;
}
+static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
+{
+ int ret;
+
+ if (!resctrl_arch_mon_capable())
+ return 0;
+
+ ret = alloc_rmid(rdtgrp->closid);
+ if (ret < 0) {
+ rdt_last_cmd_puts("Out of RMIDs\n");
+ return ret;
+ }
+ rdtgrp->mon.rmid = ret;
+
+ ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
+ if (ret) {
+ rdt_last_cmd_puts("kernfs subdir error\n");
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
+{
+ if (resctrl_arch_mon_capable())
+ free_rmid(rgrp->closid, rgrp->mon.rmid);
+}
+
static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
const char *name, umode_t mode,
enum rdt_group_type rtype, struct rdtgroup **r)
@@ -3353,7 +3443,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
if (rtype == RDTCTRL_GROUP) {
files = RFTYPE_BASE | RFTYPE_CTRL;
- if (rdt_mon_capable)
+ if (resctrl_arch_mon_capable())
files |= RFTYPE_MON;
} else {
files = RFTYPE_BASE | RFTYPE_MON;
@@ -3365,29 +3455,11 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
goto out_destroy;
}
- if (rdt_mon_capable) {
- ret = alloc_rmid();
- if (ret < 0) {
- rdt_last_cmd_puts("Out of RMIDs\n");
- goto out_destroy;
- }
- rdtgrp->mon.rmid = ret;
-
- ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
- if (ret) {
- rdt_last_cmd_puts("kernfs subdir error\n");
- goto out_idfree;
- }
- }
- kernfs_activate(kn);
-
/*
* The caller unlocks the parent_kn upon success.
*/
return 0;
-out_idfree:
- free_rmid(rdtgrp->mon.rmid);
out_destroy:
kernfs_put(rdtgrp->kn);
kernfs_remove(rdtgrp->kn);
@@ -3401,7 +3473,6 @@ out_unlock:
static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
{
kernfs_remove(rgrp->kn);
- free_rmid(rgrp->mon.rmid);
rdtgroup_remove(rgrp);
}
@@ -3423,12 +3494,21 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
prgrp = rdtgrp->mon.parent;
rdtgrp->closid = prgrp->closid;
+ ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
+ if (ret) {
+ mkdir_rdt_prepare_clean(rdtgrp);
+ goto out_unlock;
+ }
+
+ kernfs_activate(rdtgrp->kn);
+
/*
* Add the rdtgrp to the list of rdtgrps the parent
* ctrl_mon group has to track.
*/
list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
+out_unlock:
rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -3459,13 +3539,20 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
ret = 0;
rdtgrp->closid = closid;
+
+ ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
+ if (ret)
+ goto out_closid_free;
+
+ kernfs_activate(rdtgrp->kn);
+
ret = rdtgroup_init_alloc(rdtgrp);
if (ret < 0)
- goto out_id_free;
+ goto out_rmid_free;
list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
- if (rdt_mon_capable) {
+ if (resctrl_arch_mon_capable()) {
/*
* Create an empty mon_groups directory to hold the subset
* of tasks and cpus to monitor.
@@ -3481,7 +3568,9 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
out_del_list:
list_del(&rdtgrp->rdtgroup_list);
-out_id_free:
+out_rmid_free:
+ mkdir_rdt_prepare_rmid_free(rdtgrp);
+out_closid_free:
closid_free(closid);
out_common_fail:
mkdir_rdt_prepare_clean(rdtgrp);
@@ -3518,14 +3607,14 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
* allocation is supported, add a control and monitoring
* subdirectory
*/
- if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
+ if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
/*
* If RDT monitoring is supported and the parent directory is a valid
* "mon_groups" directory, add a monitoring subdirectory.
*/
- if (rdt_mon_capable && is_mon_groups(parent_kn, name))
+ if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name))
return rdtgroup_mkdir_mon(parent_kn, name, mode);
return -EPERM;
@@ -3550,7 +3639,7 @@ static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
update_closid_rmid(tmpmask, NULL);
rdtgrp->flags = RDT_DELETED;
- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
/*
* Remove the rdtgrp from the parent ctrl_mon group's list
@@ -3596,8 +3685,8 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
update_closid_rmid(tmpmask, NULL);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
closid_free(rdtgrp->closid);
- free_rmid(rdtgrp->mon.rmid);
rdtgroup_ctrl_remove(rdtgrp);
@@ -3829,8 +3918,8 @@ static void __init rdtgroup_setup_default(void)
{
mutex_lock(&rdtgroup_mutex);
- rdtgroup_default.closid = 0;
- rdtgroup_default.mon.rmid = 0;
+ rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
+ rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
rdtgroup_default.type = RDTCTRL_GROUP;
INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
@@ -3848,24 +3937,24 @@ static void domain_destroy_mon_state(struct rdt_domain *d)
void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
{
- lockdep_assert_held(&rdtgroup_mutex);
+ mutex_lock(&rdtgroup_mutex);
if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
mba_sc_domain_destroy(r, d);
if (!r->mon_capable)
- return;
+ goto out_unlock;
/*
* If resctrl is mounted, remove all the
* per domain monitor data directories.
*/
- if (static_branch_unlikely(&rdt_mon_enable_key))
+ if (resctrl_mounted && resctrl_arch_mon_capable())
rmdir_mondata_subdir_allrdtgrp(r, d->id);
if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
- if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) {
+ if (is_llc_occupancy_enabled() && has_busy_rmid(d)) {
/*
* When a package is going down, forcefully
* decrement rmid->ebusy. There is no way to know
@@ -3879,20 +3968,24 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
}
domain_destroy_mon_state(d);
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
}
static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;
if (is_llc_occupancy_enabled()) {
- d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
+ d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
}
if (is_mbm_total_enabled()) {
tsize = sizeof(*d->mbm_total);
- d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_total) {
bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
@@ -3900,7 +3993,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
}
if (is_mbm_local_enabled()) {
tsize = sizeof(*d->mbm_local);
- d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_local) {
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
@@ -3913,34 +4006,97 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
{
- int err;
+ int err = 0;
- lockdep_assert_held(&rdtgroup_mutex);
+ mutex_lock(&rdtgroup_mutex);
- if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
+ if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
/* RDT_RESOURCE_MBA is never mon_capable */
- return mba_sc_domain_allocate(r, d);
+ err = mba_sc_domain_allocate(r, d);
+ goto out_unlock;
+ }
if (!r->mon_capable)
- return 0;
+ goto out_unlock;
err = domain_setup_mon_state(r, d);
if (err)
- return err;
+ goto out_unlock;
if (is_mbm_enabled()) {
INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
- mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
+ mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
}
if (is_llc_occupancy_enabled())
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
- /* If resctrl is mounted, add per domain monitor data directories. */
- if (static_branch_unlikely(&rdt_mon_enable_key))
+ /*
+ * If the filesystem is not mounted then only the default resource group
+ * exists. Creation of its directories is deferred until mount time
+ * by rdt_get_tree() calling mkdir_mondata_all().
+ * If resctrl is mounted, add per domain monitor data directories.
+ */
+ if (resctrl_mounted && resctrl_arch_mon_capable())
mkdir_mondata_subdir_allrdtgrp(r, d);
- return 0;
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
+}
+
+void resctrl_online_cpu(unsigned int cpu)
+{
+ mutex_lock(&rdtgroup_mutex);
+ /* The CPU is set in default rdtgroup after online. */
+ cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
+{
+ struct rdtgroup *cr;
+
+ list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
+ if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
+ break;
+ }
+}
+
+void resctrl_offline_cpu(unsigned int cpu)
+{
+ struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ struct rdtgroup *rdtgrp;
+ struct rdt_domain *d;
+
+ mutex_lock(&rdtgroup_mutex);
+ list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+ if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
+ clear_childcpus(rdtgrp, cpu);
+ break;
+ }
+ }
+
+ if (!l3->mon_capable)
+ goto out_unlock;
+
+ d = get_domain_from_cpu(cpu, l3);
+ if (d) {
+ if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+ cancel_delayed_work(&d->mbm_over);
+ mbm_setup_overflow_handler(d, 0, cpu);
+ }
+ if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+ has_busy_rmid(d)) {
+ cancel_delayed_work(&d->cqm_limbo);
+ cqm_setup_limbo_handler(d, 0, cpu);
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
}
/*
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index dc136703566f..3259b1d4fefe 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -1,167 +1,510 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * Check for extended topology enumeration cpuid leaf 0xb and if it
- * exists, use it for populating initial_apicid and cpu topology
- * detection.
+ * CPU/APIC topology
+ *
+ * The APIC IDs describe the system topology in multiple domain levels.
+ * The CPUID topology parser provides the information which part of the
+ * APIC ID is associated to the individual levels:
+ *
+ * [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD]
+ *
+ * The root space contains the package (socket) IDs.
+ *
+ * Not enumerated levels consume 0 bits space, but conceptually they are
+ * always represented. If e.g. only CORE and THREAD levels are enumerated
+ * then the DIE, MODULE and TILE have the same physical ID as the PACKAGE.
+ *
+ * If SMT is not supported, then the THREAD domain is still used. It then
+ * has the same physical ID as the CORE domain and is the only child of
+ * the core domain.
+ *
+ * This allows a unified view on the system independent of the enumerated
+ * domain levels without requiring any conditionals in the code.
*/
-
+#define pr_fmt(fmt) "CPU topo: " fmt
#include <linux/cpu.h>
+
+#include <xen/xen.h>
+
#include <asm/apic.h>
-#include <asm/memtype.h>
-#include <asm/processor.h>
+#include <asm/hypervisor.h>
+#include <asm/io_apic.h>
+#include <asm/mpspec.h>
+#include <asm/smp.h>
#include "cpu.h"
-/* leaf 0xb SMT level */
-#define SMT_LEVEL 0
+/*
+ * Map cpu index to physical APIC ID
+ */
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
-/* extended topology sub-leaf types */
-#define INVALID_TYPE 0
-#define SMT_TYPE 1
-#define CORE_TYPE 2
-#define DIE_TYPE 5
+/* Bitmap of physically present CPUs. */
+DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly;
-#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
-#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
-#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
+/* Used for CPU number allocation and parallel CPU bringup */
+u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
+
+/* Bitmaps to mark registered APICs at each topology domain */
+static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
+
+/*
+ * Keep track of assigned, disabled and rejected CPUs. Present assigned
+ * with 1 as CPU #0 is reserved for the boot CPU.
+ */
+static struct {
+ unsigned int nr_assigned_cpus;
+ unsigned int nr_disabled_cpus;
+ unsigned int nr_rejected_cpus;
+ u32 boot_cpu_apic_id;
+ u32 real_bsp_apic_id;
+} topo_info __ro_after_init = {
+ .nr_assigned_cpus = 1,
+ .boot_cpu_apic_id = BAD_APICID,
+ .real_bsp_apic_id = BAD_APICID,
+};
-unsigned int __max_die_per_package __read_mostly = 1;
-EXPORT_SYMBOL(__max_die_per_package);
+#define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC)
+
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+ return phys_id == (u64)cpuid_to_apicid[cpu];
+}
#ifdef CONFIG_SMP
+static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
+{
+ if (!(apicid & (__max_threads_per_core - 1)))
+ cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
+}
+#else
+static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
+#endif
+
/*
- * Check if given CPUID extended topology "leaf" is implemented
+ * Convert the APIC ID to a domain level ID by masking out the low bits
+ * below the domain level @dom.
*/
-static int check_extended_topology_leaf(int leaf)
+static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom)
+{
+ if (dom == TOPO_SMT_DOMAIN)
+ return apicid;
+ return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]);
+}
+
+static int topo_lookup_cpuid(u32 apic_id)
{
- unsigned int eax, ebx, ecx, edx;
+ int i;
- cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ /* CPU# to APICID mapping is persistent once it is established */
+ for (i = 0; i < topo_info.nr_assigned_cpus; i++) {
+ if (cpuid_to_apicid[i] == apic_id)
+ return i;
+ }
+ return -ENODEV;
+}
- if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
- return -1;
+static __init int topo_get_cpunr(u32 apic_id)
+{
+ int cpu = topo_lookup_cpuid(apic_id);
- return 0;
+ if (cpu >= 0)
+ return cpu;
+
+ return topo_info.nr_assigned_cpus++;
}
-/*
- * Return best CPUID Extended Topology Leaf supported
+
+static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
+{
+#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
+ early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
+ early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
+#endif
+ set_cpu_possible(cpu, true);
+ set_cpu_present(cpu, true);
+}
+
+static __init bool check_for_real_bsp(u32 apic_id)
+{
+ /*
+ * There is no real good way to detect whether this a kdump()
+ * kernel, but except on the Voyager SMP monstrosity which is not
+ * longer supported, the real BSP APIC ID is the first one which is
+ * enumerated by firmware. That allows to detect whether the boot
+ * CPU is the real BSP. If it is not, then do not register the APIC
+ * because sending INIT to the real BSP would reset the whole
+ * system.
+ *
+ * The first APIC ID which is enumerated by firmware is detectable
+ * because the boot CPU APIC ID is registered before that without
+ * invoking this code.
+ */
+ if (topo_info.real_bsp_apic_id != BAD_APICID)
+ return false;
+
+ if (apic_id == topo_info.boot_cpu_apic_id) {
+ topo_info.real_bsp_apic_id = apic_id;
+ return false;
+ }
+
+ pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n",
+ topo_info.boot_cpu_apic_id, apic_id);
+ pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n");
+
+ topo_info.real_bsp_apic_id = apic_id;
+ return true;
+}
+
+static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level,
+ unsigned long *map)
+{
+ unsigned int id, end, cnt = 0;
+
+ /* Calculate the exclusive end */
+ end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]);
+
+ /* Unfortunately there is no bitmap_weight_range() */
+ for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id))
+ cnt++;
+ return cnt;
+}
+
+static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
+{
+ int cpu, dom;
+
+ if (present) {
+ set_bit(apic_id, phys_cpu_present_map);
+
+ /*
+ * Double registration is valid in case of the boot CPU
+ * APIC because that is registered before the enumeration
+ * of the APICs via firmware parsers or VM guest
+ * mechanisms.
+ */
+ if (apic_id == topo_info.boot_cpu_apic_id)
+ cpu = 0;
+ else
+ cpu = topo_get_cpunr(apic_id);
+
+ cpuid_to_apicid[cpu] = apic_id;
+ topo_set_cpuids(cpu, apic_id, acpi_id);
+ } else {
+ u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN);
+
+ /*
+ * Check for present APICs in the same package when running
+ * on bare metal. Allow the bogosity in a guest.
+ */
+ if (hypervisor_is_type(X86_HYPER_NATIVE) &&
+ topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) {
+ pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n",
+ apic_id);
+ topo_info.nr_rejected_cpus++;
+ return;
+ }
+
+ topo_info.nr_disabled_cpus++;
+ }
+
+ /* Register present and possible CPUs in the domain maps */
+ for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
+ set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
+}
+
+/**
+ * topology_register_apic - Register an APIC in early topology maps
+ * @apic_id: The APIC ID to set up
+ * @acpi_id: The ACPI ID associated to the APIC
+ * @present: True if the corresponding CPU is present
*/
-static int detect_extended_topology_leaf(struct cpuinfo_x86 *c)
+void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present)
{
- if (c->cpuid_level >= 0x1f) {
- if (check_extended_topology_leaf(0x1f) == 0)
- return 0x1f;
+ if (apic_id >= MAX_LOCAL_APIC) {
+ pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1);
+ topo_info.nr_rejected_cpus++;
+ return;
+ }
+
+ if (check_for_real_bsp(apic_id)) {
+ topo_info.nr_rejected_cpus++;
+ return;
}
- if (c->cpuid_level >= 0xb) {
- if (check_extended_topology_leaf(0xb) == 0)
- return 0xb;
+ /* CPU numbers exhausted? */
+ if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) {
+ pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids);
+ topo_info.nr_rejected_cpus++;
+ return;
}
- return -1;
+ topo_register_apic(apic_id, acpi_id, present);
+}
+
+/**
+ * topology_register_boot_apic - Register the boot CPU APIC
+ * @apic_id: The APIC ID to set up
+ *
+ * Separate so CPU #0 can be assigned
+ */
+void __init topology_register_boot_apic(u32 apic_id)
+{
+ WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID);
+
+ topo_info.boot_cpu_apic_id = apic_id;
+ topo_register_apic(apic_id, CPU_ACPIID_INVALID, true);
+}
+
+/**
+ * topology_get_logical_id - Retrieve the logical ID at a given topology domain level
+ * @apicid: The APIC ID for which to lookup the logical ID
+ * @at_level: The topology domain level to use
+ *
+ * @apicid must be a full APIC ID, not the normalized variant. It's valid to have
+ * all bits below the domain level specified by @at_level to be clear. So both
+ * real APIC IDs and backshifted normalized APIC IDs work correctly.
+ *
+ * Returns:
+ * - >= 0: The requested logical ID
+ * - -ERANGE: @apicid is out of range
+ * - -ENODEV: @apicid is not registered
+ */
+int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
+{
+ /* Remove the bits below @at_level to get the proper level ID of @apicid */
+ unsigned int lvlid = topo_apicid(apicid, at_level);
+
+ if (lvlid >= MAX_LOCAL_APIC)
+ return -ERANGE;
+ if (!test_bit(lvlid, apic_maps[at_level].map))
+ return -ENODEV;
+ /* Get the number of set bits before @lvlid. */
+ return bitmap_weight(apic_maps[at_level].map, lvlid);
+}
+EXPORT_SYMBOL_GPL(topology_get_logical_id);
+
+/**
+ * topology_unit_count - Retrieve the count of specified units at a given topology domain level
+ * @apicid: The APIC ID which specifies the search range
+ * @which_units: The domain level specifying the units to count
+ * @at_level: The domain level at which @which_units have to be counted
+ *
+ * This returns the number of possible units according to the enumerated
+ * information.
+ *
+ * E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN)
+ * counts the number of possible cores in the package to which @apicid
+ * belongs.
+ *
+ * @at_level must obviously be greater than @which_level to produce useful
+ * results. If @at_level is equal to @which_units the result is
+ * unsurprisingly 1. If @at_level is less than @which_units the results
+ * is by definition undefined and the function returns 0.
+ */
+unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units,
+ enum x86_topology_domains at_level)
+{
+ /* Remove the bits below @at_level to get the proper level ID of @apicid */
+ unsigned int lvlid = topo_apicid(apicid, at_level);
+
+ if (lvlid >= MAX_LOCAL_APIC)
+ return 0;
+ if (!test_bit(lvlid, apic_maps[at_level].map))
+ return 0;
+ if (which_units > at_level)
+ return 0;
+ if (which_units == at_level)
+ return 1;
+ return topo_unit_count(lvlid, at_level, apic_maps[which_units].map);
+}
+
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+/**
+ * topology_hotplug_apic - Handle a physical hotplugged APIC after boot
+ * @apic_id: The APIC ID to set up
+ * @acpi_id: The ACPI ID associated to the APIC
+ */
+int topology_hotplug_apic(u32 apic_id, u32 acpi_id)
+{
+ int cpu;
+
+ if (apic_id >= MAX_LOCAL_APIC)
+ return -EINVAL;
+
+ /* Reject if the APIC ID was not registered during enumeration. */
+ if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map))
+ return -ENODEV;
+
+ cpu = topo_lookup_cpuid(apic_id);
+ if (cpu < 0)
+ return -ENOSPC;
+
+ set_bit(apic_id, phys_cpu_present_map);
+ topo_set_cpuids(cpu, apic_id, acpi_id);
+ cpu_mark_primary_thread(cpu, apic_id);
+ return cpu;
+}
+
+/**
+ * topology_hotunplug_apic - Remove a physical hotplugged APIC after boot
+ * @cpu: The CPU number for which the APIC ID is removed
+ */
+void topology_hotunplug_apic(unsigned int cpu)
+{
+ u32 apic_id = cpuid_to_apicid[cpu];
+
+ if (apic_id == BAD_APICID)
+ return;
+
+ per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
+ clear_bit(apic_id, phys_cpu_present_map);
+ set_cpu_present(cpu, false);
}
#endif
-int detect_extended_topology_early(struct cpuinfo_x86 *c)
+#ifdef CONFIG_X86_LOCAL_APIC
+static unsigned int max_possible_cpus __initdata = NR_CPUS;
+
+/**
+ * topology_apply_cmdline_limits_early - Apply topology command line limits early
+ *
+ * Ensure that command line limits are in effect before firmware parsing
+ * takes place.
+ */
+void __init topology_apply_cmdline_limits_early(void)
{
-#ifdef CONFIG_SMP
- unsigned int eax, ebx, ecx, edx;
- int leaf;
+ unsigned int possible = nr_cpu_ids;
- leaf = detect_extended_topology_leaf(c);
- if (leaf < 0)
- return -1;
+ /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
+ if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
+ possible = 1;
- set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
+ /* 'possible_cpus=N' */
+ possible = min_t(unsigned int, max_possible_cpus, possible);
+
+ if (possible < nr_cpu_ids) {
+ pr_info("Limiting to %u possible CPUs\n", possible);
+ set_nr_cpu_ids(possible);
+ }
+}
- cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+static __init bool restrict_to_up(void)
+{
+ if (!smp_found_config || ioapic_is_disabled)
+ return true;
/*
- * initial apic id, which also represents 32-bit extended x2apic id.
+ * XEN PV is special as it does not advertise the local APIC
+ * properly, but provides a fake topology for it so that the
+ * infrastructure works. So don't apply the restrictions vs. APIC
+ * here.
*/
- c->topo.initial_apicid = edx;
- smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
-#endif
- return 0;
+ if (xen_pv_domain())
+ return false;
+
+ return apic_is_disabled;
}
-/*
- * Check for extended topology enumeration cpuid leaf, and if it
- * exists, use it for populating initial_apicid and cpu topology
- * detection.
- */
-int detect_extended_topology(struct cpuinfo_x86 *c)
+void __init topology_init_possible_cpus(void)
{
-#ifdef CONFIG_SMP
- unsigned int eax, ebx, ecx, edx, sub_index;
- unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
- unsigned int core_select_mask, core_level_siblings;
- unsigned int die_select_mask, die_level_siblings;
- unsigned int pkg_mask_width;
- bool die_level_present = false;
- int leaf;
-
- leaf = detect_extended_topology_leaf(c);
- if (leaf < 0)
- return -1;
+ unsigned int assigned = topo_info.nr_assigned_cpus;
+ unsigned int disabled = topo_info.nr_disabled_cpus;
+ unsigned int cnta, cntb, cpu, allowed = 1;
+ unsigned int total = assigned + disabled;
+ u32 apicid, firstid;
+
+ if (!restrict_to_up()) {
+ if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
+ disabled += assigned - nr_cpu_ids;
+ assigned = nr_cpu_ids;
+ }
+ allowed = min_t(unsigned int, total, nr_cpu_ids);
+ }
+
+ if (total > allowed)
+ pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed);
+
+ assigned = min_t(unsigned int, allowed, assigned);
+ disabled = allowed - assigned;
+ topo_info.nr_assigned_cpus = assigned;
+ topo_info.nr_disabled_cpus = disabled;
+
+ total_cpus = allowed;
+ set_nr_cpu_ids(allowed);
+
+ cnta = domain_weight(TOPO_PKG_DOMAIN);
+ cntb = domain_weight(TOPO_DIE_DOMAIN);
+ __max_logical_packages = cnta;
+ __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta));
+
+ pr_info("Max. logical packages: %3u\n", cnta);
+ pr_info("Max. logical dies: %3u\n", cntb);
+ pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
+
+ cnta = domain_weight(TOPO_CORE_DOMAIN);
+ cntb = domain_weight(TOPO_SMT_DOMAIN);
/*
- * Populate HT related information from sub-leaf level 0.
+ * Can't use order delta here as order(cnta) can be equal
+ * order(cntb) even if cnta != cntb.
*/
- cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
- c->topo.initial_apicid = edx;
- core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
- smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
- core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
- die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
- pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
-
- sub_index = 1;
- while (true) {
- cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx);
+ __max_threads_per_core = DIV_ROUND_UP(cntb, cnta);
+ pr_info("Max. threads per core: %3u\n", __max_threads_per_core);
- /*
- * Check for the Core type in the implemented sub leaves.
- */
- if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
- core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
- core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
- die_level_siblings = core_level_siblings;
- die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
- }
- if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) {
- die_level_present = true;
- die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
- die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
- }
+ firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC);
+ __num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN);
+ pr_info("Num. cores per package: %3u\n", __num_cores_per_package);
+ __num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN);
+ pr_info("Num. threads per package: %3u\n", __num_threads_per_package);
- if (LEAFB_SUBTYPE(ecx) != INVALID_TYPE)
- pkg_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
- else
- break;
+ pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
+ if (topo_info.nr_rejected_cpus)
+ pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
- sub_index++;
+ init_cpu_present(cpumask_of(0));
+ init_cpu_possible(cpumask_of(0));
+
+ /* Assign CPU numbers to non-present CPUs */
+ for (apicid = 0; disabled; disabled--, apicid++) {
+ apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map,
+ MAX_LOCAL_APIC, apicid);
+ if (apicid >= MAX_LOCAL_APIC)
+ break;
+ cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid;
}
- core_select_mask = (~(-1 << pkg_mask_width)) >> ht_mask_width;
- die_select_mask = (~(-1 << die_plus_mask_width)) >>
- core_plus_mask_width;
+ for (cpu = 0; cpu < allowed; cpu++) {
+ apicid = cpuid_to_apicid[cpu];
- c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid,
- ht_mask_width) & core_select_mask;
+ set_cpu_possible(cpu, true);
- if (die_level_present) {
- c->topo.die_id = apic->phys_pkg_id(c->topo.initial_apicid,
- core_plus_mask_width) & die_select_mask;
+ if (apicid == BAD_APICID)
+ continue;
+
+ cpu_mark_primary_thread(cpu, apicid);
+ set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map));
}
+}
- c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, pkg_mask_width);
- /*
- * Reinit the apicid, now that we have extended initial_apicid.
- */
- c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0);
+/*
+ * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed.
+ */
+void __init topology_reset_possible_cpus_up(void)
+{
+ init_cpu_present(cpumask_of(0));
+ init_cpu_possible(cpumask_of(0));
- c->x86_max_cores = (core_level_siblings / smp_num_siblings);
- __max_die_per_package = (die_level_siblings / core_level_siblings);
-#endif
+ bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
+ if (topo_info.boot_cpu_apic_id != BAD_APICID)
+ set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map);
+}
+
+static int __init setup_possible_cpus(char *str)
+{
+ get_option(&str, &max_possible_cpus);
return 0;
}
+early_param("possible_cpus", setup_possible_cpus);
+#endif
diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h
new file mode 100644
index 000000000000..37326297f80c
--- /dev/null
+++ b/arch/x86/kernel/cpu/topology.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_X86_TOPOLOGY_H
+#define ARCH_X86_TOPOLOGY_H
+
+struct topo_scan {
+ struct cpuinfo_x86 *c;
+ unsigned int dom_shifts[TOPO_MAX_DOMAIN];
+ unsigned int dom_ncpus[TOPO_MAX_DOMAIN];
+
+ /* Legacy CPUID[1]:EBX[23:16] number of logical processors */
+ unsigned int ebx1_nproc_shift;
+
+ /* AMD specific node ID which cannot be mapped into APIC space. */
+ u16 amd_nodes_per_pkg;
+ u16 amd_node_id;
+};
+
+void cpu_init_topology(struct cpuinfo_x86 *c);
+void cpu_parse_topology(struct cpuinfo_x86 *c);
+void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
+ unsigned int shift, unsigned int ncpus);
+bool cpu_parse_topology_ext(struct topo_scan *tscan);
+void cpu_parse_topology_amd(struct topo_scan *tscan);
+void cpu_topology_fixup_amd(struct topo_scan *tscan);
+
+static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom)
+{
+ if (dom == TOPO_SMT_DOMAIN)
+ return apicid;
+ return apicid >> x86_topo_system.dom_shifts[dom - 1];
+}
+
+static inline u32 topo_relative_domain_id(u32 apicid, enum x86_topology_domains dom)
+{
+ if (dom != TOPO_SMT_DOMAIN)
+ apicid >>= x86_topo_system.dom_shifts[dom - 1];
+ return apicid & (x86_topo_system.dom_size[dom] - 1);
+}
+
+static inline u32 topo_domain_mask(enum x86_topology_domains dom)
+{
+ return (1U << x86_topo_system.dom_shifts[dom]) - 1;
+}
+
+/*
+ * Update a domain level after the fact without propagating. Used to fixup
+ * broken CPUID enumerations.
+ */
+static inline void topology_update_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
+ unsigned int shift, unsigned int ncpus)
+{
+ tscan->dom_shifts[dom] = shift;
+ tscan->dom_ncpus[dom] = ncpus;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units,
+ enum x86_topology_domains at_level);
+#else
+static inline unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units,
+ enum x86_topology_domains at_level)
+{
+ return 1;
+}
+#endif
+
+#endif /* ARCH_X86_TOPOLOGY_H */
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
new file mode 100644
index 000000000000..1a8b3ad493af
--- /dev/null
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+
+#include <asm/apic.h>
+#include <asm/memtype.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+static bool parse_8000_0008(struct topo_scan *tscan)
+{
+ struct {
+ // ecx
+ u32 cpu_nthreads : 8, // Number of physical threads - 1
+ : 4, // Reserved
+ apicid_coreid_len : 4, // Number of thread core ID bits (shift) in APIC ID
+ perf_tsc_len : 2, // Performance time-stamp counter size
+ : 14; // Reserved
+ } ecx;
+ unsigned int sft;
+
+ if (tscan->c->extended_cpuid_level < 0x80000008)
+ return false;
+
+ cpuid_leaf_reg(0x80000008, CPUID_ECX, &ecx);
+
+ /* If the thread bits are 0, then get the shift value from ecx.cpu_nthreads */
+ sft = ecx.apicid_coreid_len;
+ if (!sft)
+ sft = get_count_order(ecx.cpu_nthreads + 1);
+
+ topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1);
+ return true;
+}
+
+static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id)
+{
+ /*
+ * Starting with Fam 17h the DIE domain could probably be used to
+ * retrieve the node info on AMD/HYGON. Analysis of CPUID dumps
+ * suggests it's the topmost bit(s) of the CPU cores area, but
+ * that's guess work and neither enumerated nor documented.
+ *
+ * Up to Fam 16h this does not work at all and the legacy node ID
+ * has to be used.
+ */
+ tscan->amd_nodes_per_pkg = nr_nodes;
+ tscan->amd_node_id = node_id;
+}
+
+static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
+{
+ struct {
+ // eax
+ u32 ext_apic_id : 32; // Extended APIC ID
+ // ebx
+ u32 core_id : 8, // Unique per-socket logical core unit ID
+ core_nthreads : 8, // #Threads per core (zero-based)
+ : 16; // Reserved
+ // ecx
+ u32 node_id : 8, // Node (die) ID of invoking logical CPU
+ nnodes_per_socket : 3, // #nodes in invoking logical CPU's package/socket
+ : 21; // Reserved
+ // edx
+ u32 : 32; // Reserved
+ } leaf;
+
+ if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
+ return false;
+
+ cpuid_leaf(0x8000001e, &leaf);
+
+ tscan->c->topo.initial_apicid = leaf.ext_apic_id;
+
+ /*
+ * If leaf 0xb is available, then SMT shift is set already. If not
+ * take it from ecx.threads_per_core and use topo_update_dom() -
+ * topology_set_dom() would propagate and overwrite the already
+ * propagated CORE level.
+ */
+ if (!has_0xb) {
+ unsigned int nthreads = leaf.core_nthreads + 1;
+
+ topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads);
+ }
+
+ store_node(tscan, leaf.nnodes_per_socket + 1, leaf.node_id);
+
+ if (tscan->c->x86_vendor == X86_VENDOR_AMD) {
+ if (tscan->c->x86 == 0x15)
+ tscan->c->topo.cu_id = leaf.core_id;
+
+ cacheinfo_amd_init_llc_id(tscan->c, leaf.node_id);
+ } else {
+ /*
+ * Package ID is ApicId[6..] on certain Hygon CPUs. See
+ * commit e0ceeae708ce for explanation. The topology info
+ * is screwed up: The package shift is always 6 and the
+ * node ID is bit [4:5].
+ */
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && tscan->c->x86_model <= 0x3) {
+ topology_set_dom(tscan, TOPO_CORE_DOMAIN, 6,
+ tscan->dom_ncpus[TOPO_CORE_DOMAIN]);
+ }
+ cacheinfo_hygon_init_llc_id(tscan->c);
+ }
+ return true;
+}
+
+static bool parse_fam10h_node_id(struct topo_scan *tscan)
+{
+ struct {
+ union {
+ u64 node_id : 3,
+ nodes_per_pkg : 3,
+ unused : 58;
+ u64 msr;
+ };
+ } nid;
+
+ if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
+ return false;
+
+ rdmsrl(MSR_FAM10H_NODE_ID, nid.msr);
+ store_node(tscan, nid.nodes_per_pkg + 1, nid.node_id);
+ tscan->c->topo.llc_id = nid.node_id;
+ return true;
+}
+
+static void legacy_set_llc(struct topo_scan *tscan)
+{
+ unsigned int apicid = tscan->c->topo.initial_apicid;
+
+ /* parse_8000_0008() set everything up except llc_id */
+ tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN];
+}
+
+static void parse_topology_amd(struct topo_scan *tscan)
+{
+ bool has_0xb = false;
+
+ /*
+ * If the extended topology leaf 0x8000_001e is available
+ * try to get SMT and CORE shift from leaf 0xb first, then
+ * try to get the CORE shift from leaf 0x8000_0008.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_TOPOEXT))
+ has_0xb = cpu_parse_topology_ext(tscan);
+
+ if (!has_0xb && !parse_8000_0008(tscan))
+ return;
+
+ /* Prefer leaf 0x8000001e if available */
+ if (parse_8000_001e(tscan, has_0xb))
+ return;
+
+ /* Try the NODEID MSR */
+ if (parse_fam10h_node_id(tscan))
+ return;
+
+ legacy_set_llc(tscan);
+}
+
+void cpu_parse_topology_amd(struct topo_scan *tscan)
+{
+ tscan->amd_nodes_per_pkg = 1;
+ parse_topology_amd(tscan);
+
+ if (tscan->amd_nodes_per_pkg > 1)
+ set_cpu_cap(tscan->c, X86_FEATURE_AMD_DCM);
+}
+
+void cpu_topology_fixup_amd(struct topo_scan *tscan)
+{
+ struct cpuinfo_x86 *c = tscan->c;
+
+ /*
+ * Adjust the core_id relative to the node when there is more than
+ * one node.
+ */
+ if (tscan->c->x86 < 0x17 && tscan->amd_nodes_per_pkg > 1)
+ c->topo.core_id %= tscan->dom_ncpus[TOPO_CORE_DOMAIN] / tscan->amd_nodes_per_pkg;
+}
diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
new file mode 100644
index 000000000000..a50ae8d63d1c
--- /dev/null
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+
+#include <xen/xen.h>
+
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <asm/smp.h>
+
+#include "cpu.h"
+
+struct x86_topology_system x86_topo_system __ro_after_init;
+EXPORT_SYMBOL_GPL(x86_topo_system);
+
+unsigned int __amd_nodes_per_pkg __ro_after_init;
+EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg);
+
+void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
+ unsigned int shift, unsigned int ncpus)
+{
+ topology_update_dom(tscan, dom, shift, ncpus);
+
+ /* Propagate to the upper levels */
+ for (dom++; dom < TOPO_MAX_DOMAIN; dom++) {
+ tscan->dom_shifts[dom] = tscan->dom_shifts[dom - 1];
+ tscan->dom_ncpus[dom] = tscan->dom_ncpus[dom - 1];
+ }
+}
+
+static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c)
+{
+ struct {
+ u32 cache_type : 5,
+ unused : 21,
+ ncores : 6;
+ } eax;
+
+ if (c->cpuid_level < 4)
+ return 1;
+
+ cpuid_subleaf_reg(4, 0, CPUID_EAX, &eax);
+ if (!eax.cache_type)
+ return 1;
+
+ return eax.ncores + 1;
+}
+
+static void parse_legacy(struct topo_scan *tscan)
+{
+ unsigned int cores, core_shift, smt_shift = 0;
+ struct cpuinfo_x86 *c = tscan->c;
+
+ cores = parse_num_cores_legacy(c);
+ core_shift = get_count_order(cores);
+
+ if (cpu_has(c, X86_FEATURE_HT)) {
+ if (!WARN_ON_ONCE(tscan->ebx1_nproc_shift < core_shift))
+ smt_shift = tscan->ebx1_nproc_shift - core_shift;
+ /*
+ * The parser expects leaf 0xb/0x1f format, which means
+ * the number of logical processors at core level is
+ * counting threads.
+ */
+ core_shift += smt_shift;
+ cores <<= smt_shift;
+ }
+
+ topology_set_dom(tscan, TOPO_SMT_DOMAIN, smt_shift, 1U << smt_shift);
+ topology_set_dom(tscan, TOPO_CORE_DOMAIN, core_shift, cores);
+}
+
+static bool fake_topology(struct topo_scan *tscan)
+{
+ /*
+ * Preset the CORE level shift for CPUID less systems and XEN_PV,
+ * which has useless CPUID information.
+ */
+ topology_set_dom(tscan, TOPO_SMT_DOMAIN, 0, 1);
+ topology_set_dom(tscan, TOPO_CORE_DOMAIN, 0, 1);
+
+ return tscan->c->cpuid_level < 1;
+}
+
+static void parse_topology(struct topo_scan *tscan, bool early)
+{
+ const struct cpuinfo_topology topo_defaults = {
+ .cu_id = 0xff,
+ .llc_id = BAD_APICID,
+ .l2c_id = BAD_APICID,
+ };
+ struct cpuinfo_x86 *c = tscan->c;
+ struct {
+ u32 unused0 : 16,
+ nproc : 8,
+ apicid : 8;
+ } ebx;
+
+ c->topo = topo_defaults;
+
+ if (fake_topology(tscan))
+ return;
+
+ /* Preset Initial APIC ID from CPUID leaf 1 */
+ cpuid_leaf_reg(1, CPUID_EBX, &ebx);
+ c->topo.initial_apicid = ebx.apicid;
+
+ /*
+ * The initial invocation from early_identify_cpu() happens before
+ * the APIC is mapped or X2APIC enabled. For establishing the
+ * topology, that's not required. Use the initial APIC ID.
+ */
+ if (early)
+ c->topo.apicid = c->topo.initial_apicid;
+ else
+ c->topo.apicid = read_apic_id();
+
+ /* The above is sufficient for UP */
+ if (!IS_ENABLED(CONFIG_SMP))
+ return;
+
+ tscan->ebx1_nproc_shift = get_count_order(ebx.nproc);
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (IS_ENABLED(CONFIG_CPU_SUP_AMD))
+ cpu_parse_topology_amd(tscan);
+ break;
+ case X86_VENDOR_CENTAUR:
+ case X86_VENDOR_ZHAOXIN:
+ parse_legacy(tscan);
+ break;
+ case X86_VENDOR_INTEL:
+ if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan))
+ parse_legacy(tscan);
+ break;
+ case X86_VENDOR_HYGON:
+ if (IS_ENABLED(CONFIG_CPU_SUP_HYGON))
+ cpu_parse_topology_amd(tscan);
+ break;
+ }
+}
+
+static void topo_set_ids(struct topo_scan *tscan)
+{
+ struct cpuinfo_x86 *c = tscan->c;
+ u32 apicid = c->topo.apicid;
+
+ c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN);
+ c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN);
+
+ c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
+ c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
+
+ /* Package relative core ID */
+ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >>
+ x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN];
+
+ c->topo.amd_node_id = tscan->amd_node_id;
+
+ if (c->x86_vendor == X86_VENDOR_AMD)
+ cpu_topology_fixup_amd(tscan);
+}
+
+void cpu_parse_topology(struct cpuinfo_x86 *c)
+{
+ unsigned int dom, cpu = smp_processor_id();
+ struct topo_scan tscan = { .c = c, };
+
+ parse_topology(&tscan, false);
+
+ if (IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
+ if (c->topo.initial_apicid != c->topo.apicid) {
+ pr_err(FW_BUG "CPU%4u: APIC ID mismatch. CPUID: 0x%04x APIC: 0x%04x\n",
+ cpu, c->topo.initial_apicid, c->topo.apicid);
+ }
+
+ if (c->topo.apicid != cpuid_to_apicid[cpu]) {
+ pr_err(FW_BUG "CPU%4u: APIC ID mismatch. Firmware: 0x%04x APIC: 0x%04x\n",
+ cpu, cpuid_to_apicid[cpu], c->topo.apicid);
+ }
+ }
+
+ for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) {
+ if (tscan.dom_shifts[dom] == x86_topo_system.dom_shifts[dom])
+ continue;
+ pr_err(FW_BUG "CPU%d: Topology domain %u shift %u != %u\n", cpu, dom,
+ tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]);
+ }
+
+ topo_set_ids(&tscan);
+}
+
+void __init cpu_init_topology(struct cpuinfo_x86 *c)
+{
+ struct topo_scan tscan = { .c = c, };
+ unsigned int dom, sft;
+
+ parse_topology(&tscan, true);
+
+ /* Copy the shift values and calculate the unit sizes. */
+ memcpy(x86_topo_system.dom_shifts, tscan.dom_shifts, sizeof(x86_topo_system.dom_shifts));
+
+ dom = TOPO_SMT_DOMAIN;
+ x86_topo_system.dom_size[dom] = 1U << x86_topo_system.dom_shifts[dom];
+
+ for (dom++; dom < TOPO_MAX_DOMAIN; dom++) {
+ sft = x86_topo_system.dom_shifts[dom] - x86_topo_system.dom_shifts[dom - 1];
+ x86_topo_system.dom_size[dom] = 1U << sft;
+ }
+
+ topo_set_ids(&tscan);
+
+ /*
+ * AMD systems have Nodes per package which cannot be mapped to
+ * APIC ID.
+ */
+ __amd_nodes_per_pkg = tscan.amd_nodes_per_pkg;
+}
diff --git a/arch/x86/kernel/cpu/topology_ext.c b/arch/x86/kernel/cpu/topology_ext.c
new file mode 100644
index 000000000000..e477228cd5b2
--- /dev/null
+++ b/arch/x86/kernel/cpu/topology_ext.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+
+#include <asm/apic.h>
+#include <asm/memtype.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+enum topo_types {
+ INVALID_TYPE = 0,
+ SMT_TYPE = 1,
+ CORE_TYPE = 2,
+ MAX_TYPE_0B = 3,
+ MODULE_TYPE = 3,
+ TILE_TYPE = 4,
+ DIE_TYPE = 5,
+ DIEGRP_TYPE = 6,
+ MAX_TYPE_1F = 7,
+};
+
+/*
+ * Use a lookup table for the case that there are future types > 6 which
+ * describe an intermediate domain level which does not exist today.
+ */
+static const unsigned int topo_domain_map_0b_1f[MAX_TYPE_1F] = {
+ [SMT_TYPE] = TOPO_SMT_DOMAIN,
+ [CORE_TYPE] = TOPO_CORE_DOMAIN,
+ [MODULE_TYPE] = TOPO_MODULE_DOMAIN,
+ [TILE_TYPE] = TOPO_TILE_DOMAIN,
+ [DIE_TYPE] = TOPO_DIE_DOMAIN,
+ [DIEGRP_TYPE] = TOPO_DIEGRP_DOMAIN,
+};
+
+static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf,
+ unsigned int *last_dom)
+{
+ unsigned int dom, maxtype;
+ const unsigned int *map;
+ struct {
+ // eax
+ u32 x2apic_shift : 5, // Number of bits to shift APIC ID right
+ // for the topology ID at the next level
+ : 27; // Reserved
+ // ebx
+ u32 num_processors : 16, // Number of processors at current level
+ : 16; // Reserved
+ // ecx
+ u32 level : 8, // Current topology level. Same as sub leaf number
+ type : 8, // Level type. If 0, invalid
+ : 16; // Reserved
+ // edx
+ u32 x2apic_id : 32; // X2APIC ID of the current logical processor
+ } sl;
+
+ switch (leaf) {
+ case 0x0b: maxtype = MAX_TYPE_0B; map = topo_domain_map_0b_1f; break;
+ case 0x1f: maxtype = MAX_TYPE_1F; map = topo_domain_map_0b_1f; break;
+ default: return false;
+ }
+
+ cpuid_subleaf(leaf, subleaf, &sl);
+
+ if (!sl.num_processors || sl.type == INVALID_TYPE)
+ return false;
+
+ if (sl.type >= maxtype) {
+ pr_err_once("Topology: leaf 0x%x:%d Unknown domain type %u\n",
+ leaf, subleaf, sl.type);
+ /*
+ * It really would have been too obvious to make the domain
+ * type space sparse and leave a few reserved types between
+ * the points which might change instead of following the
+ * usual "this can be fixed in software" principle.
+ */
+ dom = *last_dom + 1;
+ } else {
+ dom = map[sl.type];
+ *last_dom = dom;
+ }
+
+ if (!dom) {
+ tscan->c->topo.initial_apicid = sl.x2apic_id;
+ } else if (tscan->c->topo.initial_apicid != sl.x2apic_id) {
+ pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf %d APIC ID mismatch %x != %x\n",
+ leaf, subleaf, tscan->c->topo.initial_apicid, sl.x2apic_id);
+ }
+
+ topology_set_dom(tscan, dom, sl.x2apic_shift, sl.num_processors);
+ return true;
+}
+
+static bool parse_topology_leaf(struct topo_scan *tscan, u32 leaf)
+{
+ unsigned int last_dom;
+ u32 subleaf;
+
+ /* Read all available subleafs and populate the levels */
+ for (subleaf = 0, last_dom = 0; topo_subleaf(tscan, leaf, subleaf, &last_dom); subleaf++);
+
+ /* If subleaf 0 failed to parse, give up */
+ if (!subleaf)
+ return false;
+
+ /*
+ * There are machines in the wild which have shift 0 in the subleaf
+ * 0, but advertise 2 logical processors at that level. They are
+ * truly SMT.
+ */
+ if (!tscan->dom_shifts[TOPO_SMT_DOMAIN] && tscan->dom_ncpus[TOPO_SMT_DOMAIN] > 1) {
+ unsigned int sft = get_count_order(tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+
+ pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf 0 has shift level 0 but %u CPUs. Fixing it up.\n",
+ leaf, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+ topology_update_dom(tscan, TOPO_SMT_DOMAIN, sft, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+ }
+
+ set_cpu_cap(tscan->c, X86_FEATURE_XTOPOLOGY);
+ return true;
+}
+
+bool cpu_parse_topology_ext(struct topo_scan *tscan)
+{
+ /* Intel: Try leaf 0x1F first. */
+ if (tscan->c->cpuid_level >= 0x1f && parse_topology_leaf(tscan, 0x1f))
+ return true;
+
+ /* Intel/AMD: Fall back to leaf 0xB if available */
+ return tscan->c->cpuid_level >= 0x0b && parse_topology_leaf(tscan, 0x0b);
+}
diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
index 415564a6523b..90eba7eb5335 100644
--- a/arch/x86/kernel/cpu/zhaoxin.c
+++ b/arch/x86/kernel/cpu/zhaoxin.c
@@ -71,10 +71,6 @@ static void init_zhaoxin(struct cpuinfo_x86 *c)
{
early_init_zhaoxin(c);
init_intel_cacheinfo(c);
- detect_num_cpu_cores(c);
-#ifdef CONFIG_X86_32
- detect_ht(c);
-#endif
if (c->cpuid_level > 9) {
unsigned int eax = cpuid_eax(10);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index b6b044356f1b..e74d0c4286c1 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
#include <linux/vmalloc.h>
#include <linux/memblock.h>
+#include <asm/bootparam.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
#include <asm/nmi.h>
@@ -40,6 +41,7 @@
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
+#include <asm/sev.h>
/* Used while preparing memory map entries for second kernel */
struct crash_memmap_data {
@@ -59,6 +61,8 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_emergency_stop_pt();
+ kdump_sev_callback();
+
disable_local_APIC();
}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index afd09924094e..4aeafe63521b 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -136,7 +136,7 @@ static void __init dtb_cpu_setup(void)
pr_warn("%pOF: missing local APIC ID\n", dn);
continue;
}
- generic_processor_info(apic_id);
+ topology_register_apic(apic_id, CPU_ACPIID_INVALID, true);
}
}
@@ -302,7 +302,7 @@ void __init x86_flattree_get_config(void)
}
#endif
-void __init x86_dtb_init(void)
+void __init x86_dtb_parse_smp_config(void)
{
if (!of_have_populated_dt())
return;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f18ca44c904b..44a91ef5a23b 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -410,7 +410,7 @@ static void __die_header(const char *str, struct pt_regs *regs, long err)
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
- IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
+ IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION) ?
(boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
}
NOKPROBE_SYMBOL(__die_header);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index fb8cf953380d..b66f540de054 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void)
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
/*
- * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need
- * to be reserved.
+ * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
+ * kexec and do not need to be reserved.
*/
- if (data->type != SETUP_EFI && data->type != SETUP_IMA)
+ if (data->type != SETUP_EFI &&
+ data->type != SETUP_IMA &&
+ data->type != SETUP_RNG_SEED)
e820__range_update_kexec(pa_data,
sizeof(*data) + data->len,
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 16f9814c9be0..6726e0473d0b 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -106,6 +106,10 @@ void __init init_espfix_bsp(void)
pgd_t *pgd;
p4d_t *p4d;
+ /* FRED systems always restore the full value of %rsp */
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ return;
+
/* Install the espfix pud into the kernel page directory */
pgd = &init_top_pgt[pgd_index(ESPFIX_BASE_ADDR)];
p4d = p4d_alloc(&init_mm, pgd, ESPFIX_BASE_ADDR);
@@ -129,6 +133,10 @@ void init_espfix_ap(int cpu)
void *stack_page;
pteval_t ptemask;
+ /* FRED systems always restore the full value of %rsp */
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ return;
+
/* We only have to do this once... */
if (likely(per_cpu(espfix_stack, cpu)))
return; /* Already initialized */
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index a06b876bbf2d..edbafc5940e3 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -2,6 +2,8 @@
/*
* x86 FPU bug checks:
*/
+#include <linux/printk.h>
+
#include <asm/cpufeature.h>
#include <asm/fpu/api.h>
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 558076dbde5b..247f2225aa9f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
* Attempt to restore the FPU registers directly from user memory.
* Pagefaults are handled and any errors returned are fatal.
*/
-static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
- bool fx_only, unsigned int size)
+static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
{
struct fpu *fpu = &current->thread.fpu;
int ret;
+ /* Restore enabled features only. */
+ xrestore &= fpu->fpstate->user_xfeatures;
retry:
fpregs_lock();
/* Ensure that XFD is up to date */
@@ -309,7 +310,7 @@ retry:
if (ret != X86_TRAP_PF)
return false;
- if (!fault_in_readable(buf, size))
+ if (!fault_in_readable(buf, fpu->fpstate->user_size))
goto retry;
return false;
}
@@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
struct user_i387_ia32_struct env;
bool success, fx_only = false;
union fpregs_state *fpregs;
- unsigned int state_size;
u64 user_xfeatures = 0;
if (use_xsave()) {
@@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
return false;
fx_only = !fx_sw_user.magic1;
- state_size = fx_sw_user.xstate_size;
user_xfeatures = fx_sw_user.xfeatures;
} else {
user_xfeatures = XFEATURE_MASK_FPSSE;
- state_size = fpu->fpstate->user_size;
}
if (likely(!ia32_fxstate)) {
/* Restore the FPU registers directly from user memory. */
- return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
- state_size);
+ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only);
}
/*
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
new file mode 100644
index 000000000000..4bcd8791ad96
--- /dev/null
+++ b/arch/x86/kernel/fred.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+
+#include <asm/desc.h>
+#include <asm/fred.h>
+#include <asm/tlbflush.h>
+#include <asm/traps.h>
+
+/* #DB in the kernel would imply the use of a kernel debugger. */
+#define FRED_DB_STACK_LEVEL 1UL
+#define FRED_NMI_STACK_LEVEL 2UL
+#define FRED_MC_STACK_LEVEL 2UL
+/*
+ * #DF is the highest level because a #DF means "something went wrong
+ * *while delivering an exception*." The number of cases for which that
+ * can happen with FRED is drastically reduced and basically amounts to
+ * "the stack you pointed me to is broken." Thus, always change stacks
+ * on #DF, which means it should be at the highest level.
+ */
+#define FRED_DF_STACK_LEVEL 3UL
+
+#define FRED_STKLVL(vector, lvl) ((lvl) << (2 * (vector)))
+
+void cpu_init_fred_exceptions(void)
+{
+ /* When FRED is enabled by default, remove this log message */
+ pr_info("Initialize FRED on CPU%d\n", smp_processor_id());
+
+ wrmsrl(MSR_IA32_FRED_CONFIG,
+ /* Reserve for CALL emulation */
+ FRED_CONFIG_REDZONE |
+ FRED_CONFIG_INT_STKLVL(0) |
+ FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
+
+ /*
+ * The purpose of separate stacks for NMI, #DB and #MC *in the kernel*
+ * (remember that user space faults are always taken on stack level 0)
+ * is to avoid overflowing the kernel stack.
+ */
+ wrmsrl(MSR_IA32_FRED_STKLVLS,
+ FRED_STKLVL(X86_TRAP_DB, FRED_DB_STACK_LEVEL) |
+ FRED_STKLVL(X86_TRAP_NMI, FRED_NMI_STACK_LEVEL) |
+ FRED_STKLVL(X86_TRAP_MC, FRED_MC_STACK_LEVEL) |
+ FRED_STKLVL(X86_TRAP_DF, FRED_DF_STACK_LEVEL));
+
+ /* The FRED equivalents to IST stacks... */
+ wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
+ wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
+ wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
+
+ /* Enable FRED */
+ cr4_set_bits(X86_CR4_FRED);
+ /* Any further IDT use is a bug */
+ idt_invalidate();
+
+ /* Use int $0x80 for 32-bit system calls in FRED mode */
+ setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 12df54ff0e81..70139d9d2e01 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -307,7 +307,8 @@ union ftrace_op_code_union {
} __attribute__((packed));
};
-#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
+#define RET_SIZE \
+ (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_MITIGATION_SLS))
static unsigned long
create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index dc0956067944..212e8e06aeba 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -22,6 +22,8 @@
#include <linux/cc_platform.h>
#include <linux/pgtable.h>
+#include <asm/asm.h>
+#include <asm/page_64.h>
#include <asm/processor.h>
#include <asm/proto.h>
#include <asm/smp.h>
@@ -67,42 +69,11 @@ unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
EXPORT_SYMBOL(vmemmap_base);
#endif
-/*
- * GDT used on the boot CPU before switching to virtual addresses.
- */
-static struct desc_struct startup_gdt[GDT_ENTRIES] __initdata = {
- [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(DESC_CODE32, 0, 0xfffff),
- [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(DESC_CODE64, 0, 0xfffff),
- [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(DESC_DATA64, 0, 0xfffff),
-};
-
-/*
- * Address needs to be set at runtime because it references the startup_gdt
- * while the kernel still uses a direct mapping.
- */
-static struct desc_ptr startup_gdt_descr __initdata = {
- .size = sizeof(startup_gdt)-1,
- .address = 0,
-};
-
-static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
-{
- return ptr - (void *)_text + (void *)physaddr;
-}
-
-static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr)
-{
- return fixup_pointer(ptr, physaddr);
-}
-
-#ifdef CONFIG_X86_5LEVEL
-static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
+static inline bool check_la57_support(void)
{
- return fixup_pointer(ptr, physaddr);
-}
+ if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ return false;
-static bool __head check_la57_support(unsigned long physaddr)
-{
/*
* 5-level paging is detected and enabled at kernel decompression
* stage. Only check if it has been enabled there.
@@ -110,21 +81,8 @@ static bool __head check_la57_support(unsigned long physaddr)
if (!(native_read_cr4() & X86_CR4_LA57))
return false;
- *fixup_int(&__pgtable_l5_enabled, physaddr) = 1;
- *fixup_int(&pgdir_shift, physaddr) = 48;
- *fixup_int(&ptrs_per_p4d, physaddr) = 512;
- *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
- *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5;
- *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5;
-
return true;
}
-#else
-static bool __head check_la57_support(unsigned long physaddr)
-{
- return false;
-}
-#endif
static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd)
{
@@ -173,23 +131,22 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv
* doesn't have to generate PC-relative relocations when accessing globals from
* that function. Clang actually does not generate them, which leads to
* boot-time crashes. To work around this problem, every global pointer must
- * be adjusted using fixup_pointer().
+ * be accessed using RIP_REL_REF().
*/
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
- unsigned long load_delta, *p;
+ pmd_t (*early_pgts)[PTRS_PER_PMD] = RIP_REL_REF(early_dynamic_pgts);
unsigned long pgtable_flags;
+ unsigned long load_delta;
pgdval_t *pgd;
p4dval_t *p4d;
pudval_t *pud;
pmdval_t *pmd, pmd_entry;
- pteval_t *mask_ptr;
bool la57;
int i;
- unsigned int *next_pgt_ptr;
- la57 = check_la57_support(physaddr);
+ la57 = check_la57_support();
/* Is the address too large? */
if (physaddr >> MAX_PHYSMEM_BITS)
@@ -200,6 +157,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
* and the address I am actually running at.
*/
load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
+ RIP_REL_REF(phys_base) = load_delta;
/* Is the address not 2M aligned? */
if (load_delta & ~PMD_MASK)
@@ -210,26 +168,21 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Fixup the physical addresses in the page table */
- pgd = fixup_pointer(early_top_pgt, physaddr);
- p = pgd + pgd_index(__START_KERNEL_map);
- if (la57)
- *p = (unsigned long)level4_kernel_pgt;
- else
- *p = (unsigned long)level3_kernel_pgt;
- *p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta;
+ pgd = &RIP_REL_REF(early_top_pgt)->pgd;
+ pgd[pgd_index(__START_KERNEL_map)] += load_delta;
if (la57) {
- p4d = fixup_pointer(level4_kernel_pgt, physaddr);
- p4d[511] += load_delta;
+ p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt);
+ p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
+
+ pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE_NOENC;
}
- pud = fixup_pointer(level3_kernel_pgt, physaddr);
- pud[510] += load_delta;
- pud[511] += load_delta;
+ RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta;
+ RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 1].pud += load_delta;
- pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
- pmd[i] += load_delta;
+ RIP_REL_REF(level2_fixmap_pgt)[i].pmd += load_delta;
/*
* Set up the identity mapping for the switchover. These
@@ -238,15 +191,14 @@ unsigned long __head __startup_64(unsigned long physaddr,
* it avoids problems around wraparound.
*/
- next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr);
- pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
- pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
+ pud = &early_pgts[0]->pmd;
+ pmd = &early_pgts[1]->pmd;
+ RIP_REL_REF(next_early_pgt) = 2;
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
if (la57) {
- p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
- physaddr);
+ p4d = &early_pgts[RIP_REL_REF(next_early_pgt)++]->pmd;
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
@@ -267,8 +219,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
/* Filter out unsupported __PAGE_KERNEL_* bits: */
- mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
- pmd_entry &= *mask_ptr;
+ pmd_entry &= RIP_REL_REF(__supported_pte_mask);
pmd_entry += sme_get_me_mask();
pmd_entry += physaddr;
@@ -294,7 +245,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
* error, causing the BIOS to halt the system.
*/
- pmd = fixup_pointer(level2_kernel_pgt, physaddr);
+ pmd = &RIP_REL_REF(level2_kernel_pgt)->pmd;
/* invalidate pages before the kernel image */
for (i = 0; i < pmd_index((unsigned long)_text); i++)
@@ -309,12 +260,6 @@ unsigned long __head __startup_64(unsigned long physaddr,
for (; i < PTRS_PER_PMD; i++)
pmd[i] &= ~_PAGE_PRESENT;
- /*
- * Fixup phys_base - remove the memory encryption mask to obtain
- * the true physical address.
- */
- *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask();
-
return sme_postprocess_startup(bp, pmd);
}
@@ -486,6 +431,15 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
+ if (check_la57_support()) {
+ __pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
+ page_offset_base = __PAGE_OFFSET_BASE_L5;
+ vmalloc_base = __VMALLOC_BASE_L5;
+ vmemmap_base = __VMEMMAP_BASE_L5;
+ }
+
cr4_init_shadow();
/* Kill off the identity-map trampoline */
@@ -569,62 +523,52 @@ void __init __noreturn x86_64_start_reservations(char *real_mode_data)
*/
static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
-static struct desc_ptr bringup_idt_descr = {
- .size = (NUM_EXCEPTION_VECTORS * sizeof(gate_desc)) - 1,
- .address = 0, /* Set at runtime */
-};
-
-static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler)
+/* This may run while still in the direct mapping */
+static void __head startup_64_load_idt(void *vc_handler)
{
-#ifdef CONFIG_AMD_MEM_ENCRYPT
+ struct desc_ptr desc = {
+ .address = (unsigned long)&RIP_REL_REF(bringup_idt_table),
+ .size = sizeof(bringup_idt_table) - 1,
+ };
struct idt_data data;
- gate_desc desc;
-
- init_idt_data(&data, n, handler);
- idt_init_desc(&desc, &data);
- native_write_idt_entry(idt, n, &desc);
-#endif
-}
+ gate_desc idt_desc;
-/* This runs while still in the direct mapping */
-static void __head startup_64_load_idt(unsigned long physbase)
-{
- struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase);
- gate_desc *idt = fixup_pointer(bringup_idt_table, physbase);
-
-
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
- void *handler;
-
- /* VMM Communication Exception */
- handler = fixup_pointer(vc_no_ghcb, physbase);
- set_bringup_idt_handler(idt, X86_TRAP_VC, handler);
+ /* @vc_handler is set only for a VMM Communication Exception */
+ if (vc_handler) {
+ init_idt_data(&data, X86_TRAP_VC, vc_handler);
+ idt_init_desc(&idt_desc, &data);
+ native_write_idt_entry((gate_desc *)desc.address, X86_TRAP_VC, &idt_desc);
}
- desc->address = (unsigned long)idt;
- native_load_idt(desc);
+ native_load_idt(&desc);
}
/* This is used when running on kernel addresses */
void early_setup_idt(void)
{
- /* VMM Communication Exception */
+ void *handler = NULL;
+
if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
setup_ghcb();
- set_bringup_idt_handler(bringup_idt_table, X86_TRAP_VC, vc_boot_ghcb);
+ handler = vc_boot_ghcb;
}
- bringup_idt_descr.address = (unsigned long)bringup_idt_table;
- native_load_idt(&bringup_idt_descr);
+ startup_64_load_idt(handler);
}
/*
* Setup boot CPU state needed before kernel switches to virtual addresses.
*/
-void __head startup_64_setup_env(unsigned long physbase)
+void __head startup_64_setup_gdt_idt(void)
{
+ void *handler = NULL;
+
+ struct desc_ptr startup_gdt_descr = {
+ .address = (unsigned long)&RIP_REL_REF(init_per_cpu_var(gdt_page.gdt)),
+ .size = GDT_SIZE - 1,
+ };
+
/* Load GDT */
- startup_gdt_descr.address = (unsigned long)fixup_pointer(startup_gdt, physbase);
native_load_gdt(&startup_gdt_descr);
/* New GDT is live - reload data segment registers */
@@ -632,5 +576,8 @@ void __head startup_64_setup_env(unsigned long physbase)
"movl %%eax, %%ss\n"
"movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
- startup_64_load_idt(physbase);
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+ handler = &RIP_REL_REF(vc_no_ghcb);
+
+ startup_64_load_idt(handler);
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 487ac57e2c81..b50f3641c4d6 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -414,7 +414,7 @@ __REFDATA
.align 4
SYM_DATA(initial_code, .long i386_start_kernel)
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
#define PGD_ALIGN (2 * PAGE_SIZE)
#define PTI_USER_PGD_FILL 1024
#else
@@ -474,7 +474,7 @@ SYM_DATA_START(initial_page_table)
# endif
.align PAGE_SIZE /* needs to be page-sized too */
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
* PTI needs another page so sync_initial_pagetable() works correctly
* and does not scribble over the data which is placed behind the
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d4918d03efb4..d8198fbd70e5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -26,6 +26,7 @@
#include <asm/apicdef.h>
#include <asm/fixmap.h>
#include <asm/smp.h>
+#include <asm/thread_info.h>
/*
* We are not able to switch in one step to the final KERNEL ADDRESS SPACE
@@ -39,7 +40,6 @@ L4_START_KERNEL = l4_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
- .text
__HEAD
.code64
SYM_CODE_START_NOALIGN(startup_64)
@@ -66,9 +66,7 @@ SYM_CODE_START_NOALIGN(startup_64)
mov %rsi, %r15
/* Set up the stack for verify_cpu() */
- leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
-
- leaq _text(%rip), %rdi
+ leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
/* Setup GSBASE to allow stack canary access for C code */
movl $MSR_GS_BASE, %ecx
@@ -77,7 +75,7 @@ SYM_CODE_START_NOALIGN(startup_64)
shrq $32, %rdx
wrmsr
- call startup_64_setup_env
+ call startup_64_setup_gdt_idt
/* Now switch to __KERNEL_CS so IRET works reliably */
pushq $__KERNEL_CS
@@ -113,13 +111,11 @@ SYM_CODE_START_NOALIGN(startup_64)
call __startup_64
/* Form the CR3 value being sure to include the CR3 modifier */
- addq $(early_top_pgt - __START_KERNEL_map), %rax
+ leaq early_top_pgt(%rip), %rcx
+ addq %rcx, %rax
#ifdef CONFIG_AMD_MEM_ENCRYPT
mov %rax, %rdi
- mov %rax, %r14
-
- addq phys_base(%rip), %rdi
/*
* For SEV guests: Verify that the C-bit is correct. A malicious
@@ -128,17 +124,23 @@ SYM_CODE_START_NOALIGN(startup_64)
* the next RET instruction.
*/
call sev_verify_cbit
+#endif
/*
- * Restore CR3 value without the phys_base which will be added
- * below, before writing %cr3.
+ * Switch to early_top_pgt which still has the identity mappings
+ * present.
*/
- mov %r14, %rax
-#endif
+ movq %rax, %cr3
- jmp 1f
+ /* Branch to the common startup code at its kernel virtual address */
+ ANNOTATE_RETPOLINE_SAFE
+ jmp *0f(%rip)
SYM_CODE_END(startup_64)
+ __INITRODATA
+0: .quad common_startup_64
+
+ .text
SYM_CODE_START(secondary_startup_64)
UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR
@@ -171,22 +173,39 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
/* Clear %R15 which holds the boot_params pointer on the boot CPU */
- xorq %r15, %r15
+ xorl %r15d, %r15d
+
+ /* Derive the runtime physical address of init_top_pgt[] */
+ movq phys_base(%rip), %rax
+ addq $(init_top_pgt - __START_KERNEL_map), %rax
/*
* Retrieve the modifier (SME encryption mask if SME is active) to be
* added to the initial pgdir entry that will be programmed into CR3.
*/
#ifdef CONFIG_AMD_MEM_ENCRYPT
- movq sme_me_mask, %rax
-#else
- xorq %rax, %rax
+ addq sme_me_mask(%rip), %rax
#endif
+ /*
+ * Switch to the init_top_pgt here, away from the trampoline_pgd and
+ * unmap the identity mapped ranges.
+ */
+ movq %rax, %cr3
- /* Form the CR3 value being sure to include the CR3 modifier */
- addq $(init_top_pgt - __START_KERNEL_map), %rax
-1:
+SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
+ UNWIND_HINT_END_OF_STACK
+ ANNOTATE_NOENDBR
+ /*
+ * Create a mask of CR4 bits to preserve. Omit PGE in order to flush
+ * global 1:1 translations from the TLBs.
+ *
+ * From the SDM:
+ * "If CR4.PGE is changing from 0 to 1, there were no global TLB
+ * entries before the execution; if CR4.PGE is changing from 1 to 0,
+ * there will be no global TLB entries after the execution."
+ */
+ movl $(X86_CR4_PAE | X86_CR4_LA57), %edx
#ifdef CONFIG_X86_MCE
/*
* Preserve CR4.MCE if the kernel will enable #MC support.
@@ -195,52 +214,20 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
* configured will crash the system regardless of the CR4.MCE value set
* here.
*/
- movq %cr4, %rcx
- andl $X86_CR4_MCE, %ecx
-#else
- movl $0, %ecx
+ orl $X86_CR4_MCE, %edx
#endif
+ movq %cr4, %rcx
+ andl %edx, %ecx
- /* Enable PAE mode, PSE, PGE and LA57 */
- orl $(X86_CR4_PAE | X86_CR4_PSE | X86_CR4_PGE), %ecx
-#ifdef CONFIG_X86_5LEVEL
- testb $1, __pgtable_l5_enabled(%rip)
- jz 1f
- orl $X86_CR4_LA57, %ecx
-1:
-#endif
+ /* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */
+ btsl $X86_CR4_PSE_BIT, %ecx
movq %rcx, %cr4
- /* Setup early boot stage 4-/5-level pagetables. */
- addq phys_base(%rip), %rax
-
/*
- * Switch to new page-table
- *
- * For the boot CPU this switches to early_top_pgt which still has the
- * identity mappings present. The secondary CPUs will switch to the
- * init_top_pgt here, away from the trampoline_pgd and unmap the
- * identity mapped ranges.
+ * Set CR4.PGE to re-enable global translations.
*/
- movq %rax, %cr3
-
- /*
- * Do a global TLB flush after the CR3 switch to make sure the TLB
- * entries from the identity mapping are flushed.
- */
- movq %cr4, %rcx
- movq %rcx, %rax
- xorq $X86_CR4_PGE, %rcx
+ btsl $X86_CR4_PGE_BIT, %ecx
movq %rcx, %cr4
- movq %rax, %cr4
-
- /* Ensure I am executing from virtual addresses */
- movq $1f, %rax
- ANNOTATE_RETPOLINE_SAFE
- jmp *%rax
-1:
- UNWIND_HINT_END_OF_STACK
- ANNOTATE_NOENDBR // above
#ifdef CONFIG_SMP
/*
@@ -297,7 +284,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
.Llookup_AP:
/* EAX contains the APIC ID of the current CPU */
- xorq %rcx, %rcx
+ xorl %ecx, %ecx
leaq cpuid_to_apicid(%rip), %rbx
.Lfind_cpunr:
@@ -428,39 +415,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
movq %r15, %rdi
.Ljump_to_C_code:
- /*
- * Jump to run C code and to be on a real kernel address.
- * Since we are running on identity-mapped space we have to jump
- * to the full 64bit address, this is only possible as indirect
- * jump. In addition we need to ensure %cs is set so we make this
- * a far return.
- *
- * Note: do not change to far jump indirect with 64bit offset.
- *
- * AMD does not support far jump indirect with 64bit offset.
- * AMD64 Architecture Programmer's Manual, Volume 3: states only
- * JMP FAR mem16:16 FF /5 Far jump indirect,
- * with the target specified by a far pointer in memory.
- * JMP FAR mem16:32 FF /5 Far jump indirect,
- * with the target specified by a far pointer in memory.
- *
- * Intel64 does support 64bit offset.
- * Software Developer Manual Vol 2: states:
- * FF /5 JMP m16:16 Jump far, absolute indirect,
- * address given in m16:16
- * FF /5 JMP m16:32 Jump far, absolute indirect,
- * address given in m16:32.
- * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
- * address given in m16:64.
- */
- pushq $.Lafter_lret # put return address on stack for unwinder
xorl %ebp, %ebp # clear frame pointer
- movq initial_code(%rip), %rax
- pushq $__KERNEL_CS # set correct cs
- pushq %rax # target address in negative space
- lretq
-.Lafter_lret:
- ANNOTATE_NOENDBR
+ ANNOTATE_RETPOLINE_SAFE
+ callq *initial_code(%rip)
+ ud2
SYM_CODE_END(secondary_startup_64)
#include "verify_cpu.S"
@@ -477,7 +435,7 @@ SYM_CODE_START(soft_restart_cpu)
UNWIND_HINT_END_OF_STACK
/* Find the idle task stack */
- movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx
+ movq PER_CPU_VAR(pcpu_hot + X86_current_task), %rcx
movq TASK_threadsp(%rcx), %rsp
jmp .Ljump_to_C_code
@@ -622,7 +580,7 @@ SYM_CODE_END(vc_no_ghcb)
#define SYM_DATA_START_PAGE_ALIGNED(name) \
SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
* Each PGD needs to be 8k long and 8k aligned. We do not
* ever go out to userspace with these, so we do not
@@ -655,7 +613,8 @@ SYM_CODE_END(vc_no_ghcb)
.balign 4
SYM_DATA_START_PTI_ALIGNED(early_top_pgt)
- .fill 512,8,0
+ .fill 511,8,0
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0
SYM_DATA_END(early_top_pgt)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a38d0c93a66e..c96ae8fee95e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -568,7 +568,7 @@ static struct irq_domain *hpet_create_irq_domain(int hpet_id)
fwspec.param_count = 1;
fwspec.param[0] = hpet_id;
- parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_ANY);
+ parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_GENERIC_MSI);
if (!parent) {
irq_domain_free_fwnode(fn);
kfree(domain_info);
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 660b601f1d6c..0cd53fa8c65d 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -337,7 +337,7 @@ void idt_invalidate(void)
load_idt(&idt);
}
-void __init alloc_intr_gate(unsigned int n, const void *addr)
+void __init idt_install_sysvec(unsigned int n, const void *function)
{
if (WARN_ON(n < FIRST_SYSTEM_VECTOR))
return;
@@ -346,5 +346,5 @@ void __init alloc_intr_gate(unsigned int n, const void *addr)
return;
if (!WARN_ON(test_and_set_bit(n, system_vectors)))
- set_intr_gate(n, addr);
+ set_intr_gate(n, function);
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index c683666876f1..f79c5edc0b89 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -28,6 +28,7 @@
#include <asm/setup.h>
#include <asm/i8259.h>
#include <asm/traps.h>
+#include <asm/fred.h>
#include <asm/prom.h>
/*
@@ -96,7 +97,11 @@ void __init native_init_IRQ(void)
/* Execute any quirks before the call gates are initialised: */
x86_init.irqs.pre_vector_init();
- idt_setup_apic_and_irq_gates();
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ fred_complete_exception_setup();
+ else
+ idt_setup_apic_and_irq_gates();
+
lapic_assign_system_vectors();
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) {
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index 578d16fc040f..df337860612d 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -89,7 +89,7 @@ static void __init jailhouse_x2apic_init(void)
#endif
}
-static void __init jailhouse_get_smp_config(unsigned int early)
+static void __init jailhouse_parse_smp_config(void)
{
struct ioapic_domain_cfg ioapic_cfg = {
.type = IOAPIC_DOMAIN_STRICT,
@@ -102,7 +102,7 @@ static void __init jailhouse_get_smp_config(unsigned int early)
register_lapic_address(0xfee00000);
for (cpu = 0; cpu < setup_data.v1.num_cpus; cpu++)
- generic_processor_info(setup_data.v1.cpu_ids[cpu]);
+ topology_register_apic(setup_data.v1.cpu_ids[cpu], CPU_ACPIID_INVALID, true);
smp_found_config = 1;
@@ -201,21 +201,23 @@ static void __init jailhouse_init_platform(void)
struct setup_data header;
void *mapping;
- x86_init.irqs.pre_vector_init = x86_init_noop;
- x86_init.timers.timer_init = jailhouse_timer_init;
- x86_init.mpparse.get_smp_config = jailhouse_get_smp_config;
- x86_init.pci.arch_init = jailhouse_pci_arch_init;
+ x86_init.irqs.pre_vector_init = x86_init_noop;
+ x86_init.timers.timer_init = jailhouse_timer_init;
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.mpparse.early_parse_smp_cfg = x86_init_noop;
+ x86_init.mpparse.parse_smp_cfg = jailhouse_parse_smp_config;
+ x86_init.pci.arch_init = jailhouse_pci_arch_init;
- x86_platform.calibrate_cpu = jailhouse_get_tsc;
- x86_platform.calibrate_tsc = jailhouse_get_tsc;
- x86_platform.get_wallclock = jailhouse_get_wallclock;
- x86_platform.legacy.rtc = 0;
- x86_platform.legacy.warm_reset = 0;
- x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT;
+ x86_platform.calibrate_cpu = jailhouse_get_tsc;
+ x86_platform.calibrate_tsc = jailhouse_get_tsc;
+ x86_platform.get_wallclock = jailhouse_get_wallclock;
+ x86_platform.legacy.rtc = 0;
+ x86_platform.legacy.warm_reset = 0;
+ x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT;
- legacy_pic = &null_legacy_pic;
+ legacy_pic = &null_legacy_pic;
- machine_ops.emergency_restart = jailhouse_no_restart;
+ machine_ops.emergency_restart = jailhouse_no_restart;
while (pa_data) {
mapping = early_memremap(pa_data, sizeof(header));
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 2a422e00ed4b..cde167b0ea92 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -503,7 +503,10 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
kbuf.bufsz = kernel_len - kern16_size;
kbuf.memsz = PAGE_ALIGN(header->init_size);
kbuf.buf_align = header->kernel_alignment;
- kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
+ if (header->pref_address < MIN_KERNEL_LOAD_ADDR)
+ kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
+ else
+ kbuf.buf_min = header->pref_address;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 517821b48391..36d6809c6c9e 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -324,7 +324,7 @@ static int can_optimize(unsigned long paddr)
* However, the kernel built with retpolines or IBT has jump
* tables disabled so the check can be skipped altogether.
*/
- if (!IS_ENABLED(CONFIG_RETPOLINE) &&
+ if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) &&
!IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
insn_is_indirect_jump(&insn))
return 0;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index dfe9945b9bec..101a7c1bf200 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -434,7 +434,8 @@ static void __init sev_map_percpu_data(void)
{
int cpu;
- if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ if (cc_vendor != CC_VENDOR_AMD ||
+ !cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
for_each_possible_cpu(cpu) {
@@ -829,7 +830,7 @@ static void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
static_branch_enable(&kvm_async_pf_enabled);
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt);
+ sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_kvm_asyncpf_interrupt);
}
#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 5bb395551c44..5b2c15214a6b 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -154,15 +154,15 @@ static int kvm_cs_enable(struct clocksource *cs)
return 0;
}
-struct clocksource kvm_clock = {
+static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_get_cycles,
.rating = 400,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .id = CSID_X86_KVM_CLK,
.enable = kvm_cs_enable,
};
-EXPORT_SYMBOL_GPL(kvm_clock);
static void kvm_register_clock(char *txt)
{
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 7a814b41402d..0f19ef355f5f 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -184,7 +184,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
return new_ldt;
}
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
static void do_sanity_check(struct mm_struct *mm,
bool had_kernel_mapping,
@@ -377,7 +377,7 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
}
-#else /* !CONFIG_PAGE_TABLE_ISOLATION */
+#else /* !CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
static int
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
@@ -388,11 +388,11 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
{
}
-#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
static void free_ldt_pgtables(struct mm_struct *mm)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
struct mmu_gather tlb;
unsigned long start = LDT_BASE_ADDR;
unsigned long end = LDT_END_ADDR;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b223922248e9..1ccd30c8246f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -36,6 +36,8 @@
* Checksum an MP configuration block.
*/
+static unsigned int num_procs __initdata;
+
static int __init mpf_checksum(unsigned char *mp, int len)
{
int sum = 0;
@@ -50,16 +52,15 @@ static void __init MP_processor_info(struct mpc_cpu *m)
{
char *bootup_cpu = "";
- if (!(m->cpuflag & CPU_ENABLED)) {
- disabled_cpus++;
+ topology_register_apic(m->apicid, CPU_ACPIID_INVALID, m->cpuflag & CPU_ENABLED);
+ if (!(m->cpuflag & CPU_ENABLED))
return;
- }
if (m->cpuflag & CPU_BOOTPROCESSOR)
bootup_cpu = " (Bootup-CPU)";
pr_info("Processor #%d%s\n", m->apicid, bootup_cpu);
- generic_processor_info(m->apicid);
+ num_procs++;
}
#ifdef CONFIG_X86_IO_APIC
@@ -236,9 +237,9 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
}
}
- if (!num_processors)
+ if (!num_procs && !acpi_lapic)
pr_err("MPTABLE: no processors registered!\n");
- return num_processors;
+ return num_procs || acpi_lapic;
}
#ifdef CONFIG_X86_IO_APIC
@@ -473,7 +474,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
/*
* Scan the memory blocks for an SMP configuration block.
*/
-void __init default_get_smp_config(unsigned int early)
+static __init void mpparse_get_smp_config(unsigned int early)
{
struct mpf_intel *mpf;
@@ -529,8 +530,8 @@ void __init default_get_smp_config(unsigned int early)
} else
BUG();
- if (!early)
- pr_info("Processors: %d\n", num_processors);
+ if (!early && !acpi_lapic)
+ pr_info("Processors: %d\n", num_procs);
/*
* Only use the first configuration found.
*/
@@ -538,6 +539,16 @@ out:
early_memunmap(mpf, sizeof(*mpf));
}
+void __init mpparse_parse_early_smp_config(void)
+{
+ mpparse_get_smp_config(true);
+}
+
+void __init mpparse_parse_smp_config(void)
+{
+ mpparse_get_smp_config(false);
+}
+
static void __init smp_reserve_memory(struct mpf_intel *mpf)
{
memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
@@ -587,7 +598,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
return ret;
}
-void __init default_find_smp_config(void)
+void __init mpparse_find_mptable(void)
{
unsigned int address;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 17e955ab69fe..9a5b372c706f 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -35,6 +35,7 @@
#include <asm/nospec-branch.h>
#include <asm/microcode.h>
#include <asm/sev.h>
+#include <asm/fred.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
@@ -303,13 +304,13 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
__this_cpu_add(nmi_stats.unknown, 1);
- pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
+ pr_emerg_ratelimited("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
if (unknown_nmi_panic || panic_on_unrecovered_nmi)
nmi_panic(regs, "NMI: Not continuing");
- pr_emerg("Dazed and confused, but trying to continue\n");
+ pr_emerg_ratelimited("Dazed and confused, but trying to continue\n");
}
NOKPROBE_SYMBOL(unknown_nmi_error);
@@ -502,7 +503,7 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
raw_atomic_long_inc(&nsp->idt_calls);
- if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) {
+ if (arch_cpu_is_offline(smp_processor_id())) {
if (microcode_nmi_handler_enabled())
microcode_offline_nmi_handler();
return;
@@ -563,9 +564,6 @@ nmi_restart:
}
if (this_cpu_dec_return(nmi_state))
goto nmi_restart;
-
- if (user_mode(regs))
- mds_user_clear_cpu_buffers();
}
#if IS_ENABLED(CONFIG_KVM_INTEL)
@@ -639,7 +637,7 @@ void nmi_backtrace_stall_check(const struct cpumask *btp)
msgp = nmi_check_stall_msg[idx];
if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
modp = ", but OK because ignore_nmis was set";
- if (nmi_seq & ~0x1)
+ if (nmi_seq & 0x1)
msghp = " (CPU currently in NMI handler function)";
else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
msghp = " (CPU exited one NMI handler function)";
@@ -651,6 +649,47 @@ void nmi_backtrace_stall_check(const struct cpumask *btp)
#endif
+#ifdef CONFIG_X86_FRED
+/*
+ * With FRED, CR2/DR6 is pushed to #PF/#DB stack frame during FRED
+ * event delivery, i.e., there is no problem of transient states.
+ * And NMI unblocking only happens when the stack frame indicates
+ * that so should happen.
+ *
+ * Thus, the NMI entry stub for FRED is really straightforward and
+ * as simple as most exception handlers. As such, #DB is allowed
+ * during NMI handling.
+ */
+DEFINE_FREDENTRY_NMI(exc_nmi)
+{
+ irqentry_state_t irq_state;
+
+ if (arch_cpu_is_offline(smp_processor_id())) {
+ if (microcode_nmi_handler_enabled())
+ microcode_offline_nmi_handler();
+ return;
+ }
+
+ /*
+ * Save CR2 for eventual restore to cover the case where the NMI
+ * hits the VMENTER/VMEXIT region where guest CR2 is life. This
+ * prevents guest state corruption in case that the NMI handler
+ * takes a page fault.
+ */
+ this_cpu_write(nmi_cr2, read_cr2());
+
+ irq_state = irqentry_nmi_enter(regs);
+
+ inc_irq_stat(__nmi_count);
+ default_do_nmi(regs);
+
+ irqentry_nmi_exit(regs, irq_state);
+
+ if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
+ write_cr2(this_cpu_read(nmi_cr2));
+}
+#endif
+
void stop_nmi(void)
{
ignore_nmis++;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ab49ade31b0d..b8441147eb5e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -846,31 +846,6 @@ void __noreturn stop_this_cpu(void *dummy)
}
/*
- * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
- * states (local apic timer and TSC stop).
- *
- * XXX this function is completely buggered vs RCU and tracing.
- */
-static void amd_e400_idle(void)
-{
- /*
- * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E
- * gets set after static_cpu_has() places have been converted via
- * alternatives.
- */
- if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
- default_idle();
- return;
- }
-
- tick_broadcast_enter();
-
- default_idle();
-
- tick_broadcast_exit();
-}
-
-/*
* Prefer MWAIT over HALT if MWAIT is supported, MWAIT_CPUID leaf
* exists and whenever MONITOR/MWAIT extensions are present there is at
* least one C1 substate.
@@ -878,21 +853,22 @@ static void amd_e400_idle(void)
* Do not prefer MWAIT if MONITOR instruction has a bug or idle=nomwait
* is passed to kernel commandline parameter.
*/
-static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+static __init bool prefer_mwait_c1_over_halt(void)
{
+ const struct cpuinfo_x86 *c = &boot_cpu_data;
u32 eax, ebx, ecx, edx;
- /* User has disallowed the use of MWAIT. Fallback to HALT */
- if (boot_option_idle_override == IDLE_NOMWAIT)
- return 0;
+ /* If override is enforced on the command line, fall back to HALT. */
+ if (boot_option_idle_override != IDLE_NO_OVERRIDE)
+ return false;
/* MWAIT is not supported on this platform. Fallback to HALT */
if (!cpu_has(c, X86_FEATURE_MWAIT))
- return 0;
+ return false;
- /* Monitor has a bug. Fallback to HALT */
- if (boot_cpu_has_bug(X86_BUG_MONITOR))
- return 0;
+ /* Monitor has a bug or APIC stops in C1E. Fallback to HALT */
+ if (boot_cpu_has_bug(X86_BUG_MONITOR) || boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E))
+ return false;
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
@@ -901,13 +877,13 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
* with EAX=0, ECX=0.
*/
if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED))
- return 1;
+ return true;
/*
* If MWAIT extensions are available, there should be at least one
* MWAIT C1 substate present.
*/
- return (edx & MWAIT_C1_SUBSTATE_MASK);
+ return !!(edx & MWAIT_C1_SUBSTATE_MASK);
}
/*
@@ -933,26 +909,27 @@ static __cpuidle void mwait_idle(void)
__current_clr_polling();
}
-void select_idle_routine(const struct cpuinfo_x86 *c)
+void __init select_idle_routine(void)
{
-#ifdef CONFIG_SMP
- if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
- pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
-#endif
- if (x86_idle_set() || boot_option_idle_override == IDLE_POLL)
+ if (boot_option_idle_override == IDLE_POLL) {
+ if (IS_ENABLED(CONFIG_SMP) && __max_threads_per_core > 1)
+ pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
+ return;
+ }
+
+ /* Required to guard against xen_set_default_idle() */
+ if (x86_idle_set())
return;
- if (boot_cpu_has_bug(X86_BUG_AMD_E400)) {
- pr_info("using AMD E400 aware idle routine\n");
- static_call_update(x86_idle, amd_e400_idle);
- } else if (prefer_mwait_c1_over_halt(c)) {
+ if (prefer_mwait_c1_over_halt()) {
pr_info("using mwait in idle threads\n");
static_call_update(x86_idle, mwait_idle);
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
pr_info("using TDX aware idle routine\n");
static_call_update(x86_idle, tdx_safe_halt);
- } else
+ } else {
static_call_update(x86_idle, default_idle);
+ }
}
void amd_e400_c1e_apic_setup(void)
@@ -985,7 +962,10 @@ void __init arch_post_acpi_subsys_init(void)
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halt in AMD C1E");
- pr_info("System has AMD C1E enabled\n");
+
+ if (IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST_IDLE))
+ static_branch_enable(&arch_needs_tick_broadcast);
+ pr_info("System has AMD C1E erratum E400. Workaround enabled.\n");
}
static int __init idle_setup(char *str)
@@ -998,24 +978,14 @@ static int __init idle_setup(char *str)
boot_option_idle_override = IDLE_POLL;
cpu_idle_poll_ctrl(true);
} else if (!strcmp(str, "halt")) {
- /*
- * When the boot option of idle=halt is added, halt is
- * forced to be used for CPU idle. In such case CPU C2/C3
- * won't be used again.
- * To continue to load the CPU idle driver, don't touch
- * the boot_option_idle_override.
- */
- static_call_update(x86_idle, default_idle);
+ /* 'idle=halt' HALT for idle. C-states are disabled. */
boot_option_idle_override = IDLE_HALT;
} else if (!strcmp(str, "nomwait")) {
- /*
- * If the boot option of "idle=nomwait" is added,
- * it means that mwait will be disabled for CPU C1/C2/C3
- * states.
- */
+ /* 'idle=nomwait' disables MWAIT for idle */
boot_option_idle_override = IDLE_NOMWAIT;
- } else
- return -1;
+ } else {
+ return -EINVAL;
+ }
return 0;
}
@@ -1030,7 +1000,10 @@ unsigned long arch_align_stack(unsigned long sp)
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
- return randomize_page(mm->brk, 0x02000000);
+ if (mmap_is_ia32())
+ return randomize_page(mm->brk, SZ_32M);
+
+ return randomize_page(mm->brk, SZ_1G);
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 708c87b88cc1..0917c7f25720 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -156,13 +156,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
- struct fpu *prev_fpu = &prev->fpu;
int cpu = smp_processor_id();
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- if (!test_thread_flag(TIF_NEED_FPU_LOAD))
- switch_fpu_prepare(prev_fpu, cpu);
+ if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
+ switch_fpu_prepare(prev_p, cpu);
/*
* Save away %gs. No need to save %fs, as it was saved on the
@@ -209,7 +208,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
raw_cpu_write(pcpu_hot.current_task, next_p);
- switch_fpu_finish();
+ switch_fpu_finish(next_p);
/* Load the Intel cache allocation PQR MSR. */
resctrl_sched_in(next_p);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 33b268747bb7..7062b84dd467 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -56,6 +56,7 @@
#include <asm/resctrl.h>
#include <asm/unistd.h>
#include <asm/fsgsbase.h>
+#include <asm/fred.h>
#ifdef CONFIG_IA32_EMULATION
/* Not included via unistd.h */
#include <asm/unistd_32_ia32.h>
@@ -117,7 +118,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
log_lvl, fs, fsindex, gs, gsindex, shadowgs);
- printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
+ printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n",
log_lvl, regs->cs, ds, es, cr0);
printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
log_lvl, cr2, cr3, cr4);
@@ -166,7 +167,29 @@ static noinstr unsigned long __rdgsbase_inactive(void)
lockdep_assert_irqs_disabled();
- if (!cpu_feature_enabled(X86_FEATURE_XENPV)) {
+ /*
+ * SWAPGS is no longer needed thus NOT allowed with FRED because
+ * FRED transitions ensure that an operating system can _always_
+ * operate with its own GS base address:
+ * - For events that occur in ring 3, FRED event delivery swaps
+ * the GS base address with the IA32_KERNEL_GS_BASE MSR.
+ * - ERETU (the FRED transition that returns to ring 3) also swaps
+ * the GS base address with the IA32_KERNEL_GS_BASE MSR.
+ *
+ * And the operating system can still setup the GS segment for a
+ * user thread without the need of loading a user thread GS with:
+ * - Using LKGS, available with FRED, to modify other attributes
+ * of the GS segment without compromising its ability always to
+ * operate with its own GS base address.
+ * - Accessing the GS segment base address for a user thread as
+ * before using RDMSR or WRMSR on the IA32_KERNEL_GS_BASE MSR.
+ *
+ * Note, LKGS loads the GS base address into the IA32_KERNEL_GS_BASE
+ * MSR instead of the GS segment’s descriptor cache. As such, the
+ * operating system never changes its runtime GS base address.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_FRED) &&
+ !cpu_feature_enabled(X86_FEATURE_XENPV)) {
native_swapgs();
gsbase = rdgsbase();
native_swapgs();
@@ -191,7 +214,8 @@ static noinstr void __wrgsbase_inactive(unsigned long gsbase)
{
lockdep_assert_irqs_disabled();
- if (!cpu_feature_enabled(X86_FEATURE_XENPV)) {
+ if (!cpu_feature_enabled(X86_FEATURE_FRED) &&
+ !cpu_feature_enabled(X86_FEATURE_XENPV)) {
native_swapgs();
wrgsbase(gsbase);
native_swapgs();
@@ -505,7 +529,7 @@ void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
static void
start_thread_common(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp,
- unsigned int _cs, unsigned int _ss, unsigned int _ds)
+ u16 _cs, u16 _ss, u16 _ds)
{
WARN_ON_ONCE(regs != current_pt_regs());
@@ -522,11 +546,36 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
loadsegment(ds, _ds);
load_gs_index(0);
- regs->ip = new_ip;
- regs->sp = new_sp;
- regs->cs = _cs;
- regs->ss = _ss;
- regs->flags = X86_EFLAGS_IF;
+ regs->ip = new_ip;
+ regs->sp = new_sp;
+ regs->csx = _cs;
+ regs->ssx = _ss;
+ /*
+ * Allow single-step trap and NMI when starting a new task, thus
+ * once the new task enters user space, single-step trap and NMI
+ * are both enabled immediately.
+ *
+ * Entering a new task is logically speaking a return from a
+ * system call (exec, fork, clone, etc.). As such, if ptrace
+ * enables single stepping a single step exception should be
+ * allowed to trigger immediately upon entering user space.
+ * This is not optional.
+ *
+ * NMI should *never* be disabled in user space. As such, this
+ * is an optional, opportunistic way to catch errors.
+ *
+ * Paranoia: High-order 48 bits above the lowest 16 bit SS are
+ * discarded by the legacy IRET instruction on all Intel, AMD,
+ * and Cyrix/Centaur/VIA CPUs, thus can be set unconditionally,
+ * even when FRED is not enabled. But we choose the safer side
+ * to use these bits only when FRED is enabled.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_FRED)) {
+ regs->fred_ss.swevent = true;
+ regs->fred_ss.nmi = true;
+ }
+
+ regs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
}
void
@@ -562,14 +611,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread;
- struct fpu *prev_fpu = &prev->fpu;
int cpu = smp_processor_id();
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(pcpu_hot.hardirq_stack_inuse));
- if (!test_thread_flag(TIF_NEED_FPU_LOAD))
- switch_fpu_prepare(prev_fpu, cpu);
+ if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
+ switch_fpu_prepare(prev_p, cpu);
/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
@@ -623,7 +671,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
raw_cpu_write(pcpu_hot.current_task, next_p);
raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p));
- switch_fpu_finish();
+ switch_fpu_finish(next_p);
/* Reload sp0. */
update_task_stack(next_p);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 84201071dfac..46d5a8c520ad 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -970,10 +970,8 @@ void __init setup_arch(char **cmdline_p)
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
#endif
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
+ /* Find and reserve MPTABLE area */
+ x86_init.mpparse.find_mptable();
early_alloc_pgt_buf();
@@ -1090,7 +1088,9 @@ void __init setup_arch(char **cmdline_p)
early_platform_quirks();
+ /* Some platforms need the APIC registered for NUMA configuration */
early_acpi_boot_init();
+ x86_init.mpparse.early_parse_smp_cfg();
x86_flattree_get_config();
@@ -1131,24 +1131,19 @@ void __init setup_arch(char **cmdline_p)
early_quirks();
- /*
- * Read APIC and some other early information from ACPI tables.
- */
- acpi_boot_init();
- x86_dtb_init();
+ topology_apply_cmdline_limits_early();
/*
- * get boot-time SMP configuration:
+ * Parse SMP configuration. Try ACPI first and then the platform
+ * specific parser.
*/
- get_smp_config();
+ acpi_boot_init();
+ x86_init.mpparse.parse_smp_cfg();
- /*
- * Systems w/o ACPI and mptables might not have it mapped the local
- * APIC yet, but prefill_possible_map() might need to access it.
- */
+ /* Last opportunity to detect and map the local APIC */
init_apic_mappings();
- prefill_possible_map();
+ topology_init_possible_cpus();
init_cpu_to_node();
init_gi_nodes();
@@ -1211,6 +1206,16 @@ void __init i386_reserve_resources(void)
#endif /* CONFIG_X86_32 */
+#ifndef CONFIG_SMP
+void __init smp_prepare_boot_cpu(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ *c = boot_cpu_data;
+ c->initialized = true;
+}
+#endif
+
static struct notifier_block kernel_offset_notifier = {
.notifier_call = dump_kernel_offset
};
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 1d24ec679915..8b04958da5e7 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -9,12 +9,18 @@
* and is included directly into both code-bases.
*/
+#include <asm/setup_data.h>
+
#ifndef __BOOT_COMPRESSED
-#define error(v) pr_err(v)
-#define has_cpuflag(f) boot_cpu_has(f)
+#define error(v) pr_err(v)
+#define has_cpuflag(f) boot_cpu_has(f)
+#define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__)
+#define sev_printk_rtl(fmt, ...) printk_ratelimited(fmt, ##__VA_ARGS__)
#else
#undef WARN
#define WARN(condition, format...) (!!(condition))
+#define sev_printk(fmt, ...)
+#define sev_printk_rtl(fmt, ...)
#endif
/* I/O parameters for CPUID-related helpers */
@@ -89,7 +95,8 @@ static bool __init sev_es_check_cpu_features(void)
return true;
}
-static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
+static void __head __noreturn
+sev_es_terminate(unsigned int set, unsigned int reason)
{
u64 val = GHCB_MSR_TERM_REQ;
@@ -326,13 +333,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid
*/
static const struct snp_cpuid_table *snp_cpuid_get_table(void)
{
- void *ptr;
-
- asm ("lea cpuid_table_copy(%%rip), %0"
- : "=r" (ptr)
- : "p" (&cpuid_table_copy));
-
- return ptr;
+ return &RIP_REL_REF(cpuid_table_copy);
}
/*
@@ -391,7 +392,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
return xsave_size;
}
-static bool
+static bool __head
snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -528,7 +529,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
* should be treated as fatal by caller.
*/
-static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+static int __head
+snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -556,9 +558,9 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le
leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
/* Skip post-processing for out-of-range zero leafs. */
- if (!(leaf->fn <= cpuid_std_range_max ||
- (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
- (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
+ if (!(leaf->fn <= RIP_REL_REF(cpuid_std_range_max) ||
+ (leaf->fn >= 0x40000000 && leaf->fn <= RIP_REL_REF(cpuid_hyp_range_max)) ||
+ (leaf->fn >= 0x80000000 && leaf->fn <= RIP_REL_REF(cpuid_ext_range_max))))
return 0;
}
@@ -570,10 +572,11 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le
* page yet, so it only supports the MSR based communication with the
* hypervisor and only the CPUID exit-code.
*/
-void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
+void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
{
unsigned int subfn = lower_bits(regs->cx, 32);
unsigned int fn = lower_bits(regs->ax, 32);
+ u16 opcode = *(unsigned short *)regs->ip;
struct cpuid_leaf leaf;
int ret;
@@ -581,6 +584,10 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
if (exit_code != SVM_EXIT_CPUID)
goto fail;
+ /* Is it really a CPUID insn? */
+ if (opcode != 0xa20f)
+ goto fail;
+
leaf.fn = fn;
leaf.subfn = subfn;
@@ -1016,7 +1023,8 @@ struct cc_setup_data {
* Search for a Confidential Computing blob passed in as a setup_data entry
* via the Linux Boot Protocol.
*/
-static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
+static __head
+struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
{
struct cc_setup_data *sd = NULL;
struct setup_data *hdr;
@@ -1043,7 +1051,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
* mapping needs to be updated in sync with all the changes to virtual memory
* layout and related mapping facilities throughout the boot process.
*/
-static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
+static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
{
const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
int i;
@@ -1063,11 +1071,11 @@ static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
if (fn->eax_in == 0x0)
- cpuid_std_range_max = fn->eax;
+ RIP_REL_REF(cpuid_std_range_max) = fn->eax;
else if (fn->eax_in == 0x40000000)
- cpuid_hyp_range_max = fn->eax;
+ RIP_REL_REF(cpuid_hyp_range_max) = fn->eax;
else if (fn->eax_in == 0x80000000)
- cpuid_ext_range_max = fn->eax;
+ RIP_REL_REF(cpuid_ext_range_max) = fn->eax;
}
}
@@ -1170,3 +1178,92 @@ static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
out:
return ret;
}
+
+static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt,
+ unsigned long exit_code)
+{
+ unsigned int opcode = (unsigned int)ctxt->insn.opcode.value;
+ u8 modrm = ctxt->insn.modrm.value;
+
+ switch (exit_code) {
+
+ case SVM_EXIT_IOIO:
+ case SVM_EXIT_NPF:
+ /* handled separately */
+ return ES_OK;
+
+ case SVM_EXIT_CPUID:
+ if (opcode == 0xa20f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_INVD:
+ if (opcode == 0x080f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_MONITOR:
+ if (opcode == 0x010f && modrm == 0xc8)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_MWAIT:
+ if (opcode == 0x010f && modrm == 0xc9)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_MSR:
+ /* RDMSR */
+ if (opcode == 0x320f ||
+ /* WRMSR */
+ opcode == 0x300f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_RDPMC:
+ if (opcode == 0x330f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_RDTSC:
+ if (opcode == 0x310f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_RDTSCP:
+ if (opcode == 0x010f && modrm == 0xf9)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_READ_DR7:
+ if (opcode == 0x210f &&
+ X86_MODRM_REG(ctxt->insn.modrm.value) == 7)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_VMMCALL:
+ if (opcode == 0x010f && modrm == 0xd9)
+ return ES_OK;
+
+ break;
+
+ case SVM_EXIT_WRITE_DR7:
+ if (opcode == 0x230f &&
+ X86_MODRM_REG(ctxt->insn.modrm.value) == 7)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_WBINVD:
+ if (opcode == 0x90f)
+ return ES_OK;
+ break;
+
+ default:
+ break;
+ }
+
+ sev_printk(KERN_ERR "Wrong/unhandled opcode bytes: 0x%x, exit_code: 0x%lx, rIP: 0x%lx\n",
+ opcode, exit_code, ctxt->regs->ip);
+
+ return ES_UNSUPPORTED;
+}
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index c67285824e82..b59b09c2f284 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -25,6 +25,7 @@
#include <linux/psp-sev.h>
#include <uapi/linux/sev-guest.h>
+#include <asm/init.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
#include <asm/sev.h>
@@ -59,6 +60,25 @@
#define AP_INIT_CR0_DEFAULT 0x60000010
#define AP_INIT_MXCSR_DEFAULT 0x1f80
+static const char * const sev_status_feat_names[] = {
+ [MSR_AMD64_SEV_ENABLED_BIT] = "SEV",
+ [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES",
+ [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP",
+ [MSR_AMD64_SNP_VTOM_BIT] = "vTom",
+ [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC",
+ [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI",
+ [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI",
+ [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap",
+ [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS",
+ [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol",
+ [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS",
+ [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC",
+ [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam",
+ [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
+ [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
+ [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
+};
+
/* For early boot hypervisor communication in SEV-ES enabled guests */
static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
@@ -682,8 +702,9 @@ static u64 __init get_jump_table_addr(void)
return ret;
}
-static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
- unsigned long npages, enum psc_op op)
+static void __head
+early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages, enum psc_op op)
{
unsigned long paddr_end;
u64 val;
@@ -739,7 +760,7 @@ e_term:
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
}
-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages)
{
/*
@@ -748,7 +769,7 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
* This eliminates worries about jump tables or checking boot_cpu_data
* in the cc_platform_has() function.
*/
- if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
return;
/*
@@ -767,7 +788,7 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
* This eliminates worries about jump tables or checking boot_cpu_data
* in the cc_platform_has() function.
*/
- if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
return;
/* Ask hypervisor to mark the memory pages shared in the RMP table. */
@@ -1752,7 +1773,10 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
struct ghcb *ghcb,
unsigned long exit_code)
{
- enum es_result result;
+ enum es_result result = vc_check_opcode_bytes(ctxt, exit_code);
+
+ if (result != ES_OK)
+ return result;
switch (exit_code) {
case SVM_EXIT_READ_DR7:
@@ -2059,7 +2083,7 @@ fail:
*
* Scan for the blob in that order.
*/
-static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
@@ -2085,7 +2109,7 @@ found_cc_info:
return cc_info;
}
-bool __init snp_init(struct boot_params *bp)
+bool __head snp_init(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
@@ -2107,7 +2131,7 @@ bool __init snp_init(struct boot_params *bp)
return true;
}
-void __init __noreturn snp_abort(void)
+void __head __noreturn snp_abort(void)
{
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
@@ -2262,3 +2286,29 @@ static int __init snp_init_platform_device(void)
return 0;
}
device_initcall(snp_init_platform_device);
+
+void kdump_sev_callback(void)
+{
+ /*
+ * Do wbinvd() on remote CPUs when SNP is enabled in order to
+ * safely do SNP_SHUTDOWN on the local CPU.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ wbinvd();
+}
+
+void sev_show_status(void)
+{
+ int i;
+
+ pr_info("Status: ");
+ for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
+ if (sev_status & BIT_ULL(i)) {
+ if (!sev_status_feat_names[i])
+ continue;
+
+ pr_cont("%s ", sev_status_feat_names[i]);
+ }
+ }
+ pr_cont("\n");
+}
diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S
index 3355e27c69eb..1ab65f6c6ae7 100644
--- a/arch/x86/kernel/sev_verify_cbit.S
+++ b/arch/x86/kernel/sev_verify_cbit.S
@@ -77,7 +77,7 @@ SYM_FUNC_START(sev_verify_cbit)
* The check failed, prevent any forward progress to prevent ROP
* attacks, invalidate the stack and go into a hlt loop.
*/
- xorq %rsp, %rsp
+ xorl %esp, %esp
subq $0x1000, %rsp
2: hlt
jmp 2b
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 96a771f9f930..2908e063d7d8 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -148,14 +148,16 @@ static int register_stop_handler(void)
static void native_stop_other_cpus(int wait)
{
- unsigned int cpu = smp_processor_id();
+ unsigned int old_cpu, this_cpu;
unsigned long flags, timeout;
if (reboot_force)
return;
/* Only proceed if this is the first CPU to reach this code */
- if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1)
+ old_cpu = -1;
+ this_cpu = smp_processor_id();
+ if (!atomic_try_cmpxchg(&stopping_cpu, &old_cpu, this_cpu))
return;
/* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */
@@ -186,7 +188,7 @@ static void native_stop_other_cpus(int wait)
* NMIs.
*/
cpumask_copy(&cpus_stop_mask, cpu_online_mask);
- cpumask_clear_cpu(cpu, &cpus_stop_mask);
+ cpumask_clear_cpu(this_cpu, &cpus_stop_mask);
if (!cpumask_empty(&cpus_stop_mask)) {
apic_send_IPI_allbutself(REBOOT_VECTOR);
@@ -210,6 +212,8 @@ static void native_stop_other_cpus(int wait)
* CPUs to stop.
*/
if (!smp_no_nmi_ipi && !register_stop_handler()) {
+ unsigned int cpu;
+
pr_emerg("Shutting down cpus with NMI\n");
for_each_cpu(cpu, &cpus_stop_mask)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3f57ce68a3f1..fe355c89f6c1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,10 +101,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
EXPORT_PER_CPU_SYMBOL(cpu_die_map);
-/* Per CPU bogomips and other parameters */
-DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
-EXPORT_PER_CPU_SYMBOL(cpu_info);
-
/* CPUs which are the primary SMT threads */
struct cpumask __cpu_primary_thread_mask __read_mostly;
@@ -125,25 +121,6 @@ struct mwait_cpu_dead {
*/
static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead);
-/* Logical package management. */
-struct logical_maps {
- u32 phys_pkg_id;
- u32 phys_die_id;
- u32 logical_pkg_id;
- u32 logical_die_id;
-};
-
-/* Temporary workaround until the full topology mechanics is in place */
-static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = {
- .phys_pkg_id = U32_MAX,
- .phys_die_id = U32_MAX,
-};
-
-unsigned int __max_logical_packages __read_mostly;
-EXPORT_SYMBOL(__max_logical_packages);
-static unsigned int logical_packages __read_mostly;
-static unsigned int logical_die __read_mostly;
-
/* Maximum number of SMT threads on any online core */
int __read_mostly __max_smt_threads = 1;
@@ -336,103 +313,11 @@ static void notrace start_secondary(void *unused)
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
-/**
- * topology_phys_to_logical_pkg - Map a physical package id to a logical
- * @phys_pkg: The physical package id to map
- *
- * Returns logical package id or -1 if not found
- */
-int topology_phys_to_logical_pkg(unsigned int phys_pkg)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- if (per_cpu(logical_maps.phys_pkg_id, cpu) == phys_pkg)
- return per_cpu(logical_maps.logical_pkg_id, cpu);
- }
- return -1;
-}
-EXPORT_SYMBOL(topology_phys_to_logical_pkg);
-
-/**
- * topology_phys_to_logical_die - Map a physical die id to logical
- * @die_id: The physical die id to map
- * @cur_cpu: The CPU for which the mapping is done
- *
- * Returns logical die id or -1 if not found
- */
-static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
-{
- int cpu, proc_id = cpu_data(cur_cpu).topo.pkg_id;
-
- for_each_possible_cpu(cpu) {
- if (per_cpu(logical_maps.phys_pkg_id, cpu) == proc_id &&
- per_cpu(logical_maps.phys_die_id, cpu) == die_id)
- return per_cpu(logical_maps.logical_die_id, cpu);
- }
- return -1;
-}
-
-/**
- * topology_update_package_map - Update the physical to logical package map
- * @pkg: The physical package id as retrieved via CPUID
- * @cpu: The cpu for which this is updated
- */
-int topology_update_package_map(unsigned int pkg, unsigned int cpu)
-{
- int new;
-
- /* Already available somewhere? */
- new = topology_phys_to_logical_pkg(pkg);
- if (new >= 0)
- goto found;
-
- new = logical_packages++;
- if (new != pkg) {
- pr_info("CPU %u Converting physical %u to logical package %u\n",
- cpu, pkg, new);
- }
-found:
- per_cpu(logical_maps.phys_pkg_id, cpu) = pkg;
- per_cpu(logical_maps.logical_pkg_id, cpu) = new;
- cpu_data(cpu).topo.logical_pkg_id = new;
- return 0;
-}
-/**
- * topology_update_die_map - Update the physical to logical die map
- * @die: The die id as retrieved via CPUID
- * @cpu: The cpu for which this is updated
- */
-int topology_update_die_map(unsigned int die, unsigned int cpu)
-{
- int new;
-
- /* Already available somewhere? */
- new = topology_phys_to_logical_die(die, cpu);
- if (new >= 0)
- goto found;
-
- new = logical_die++;
- if (new != die) {
- pr_info("CPU %u Converting physical %u to logical die %u\n",
- cpu, die, new);
- }
-found:
- per_cpu(logical_maps.phys_die_id, cpu) = die;
- per_cpu(logical_maps.logical_die_id, cpu) = new;
- cpu_data(cpu).topo.logical_die_id = new;
- return 0;
-}
-
static void __init smp_store_boot_cpu_info(void)
{
- int id = 0; /* CPU 0 */
- struct cpuinfo_x86 *c = &cpu_data(id);
+ struct cpuinfo_x86 *c = &cpu_data(0);
*c = boot_cpu_data;
- c->cpu_index = id;
- topology_update_package_map(c->topo.pkg_id, id);
- topology_update_die_map(c->topo.die_id, id);
c->initialized = true;
}
@@ -488,6 +373,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
if (c->topo.pkg_id == o->topo.pkg_id &&
c->topo.die_id == o->topo.die_id &&
+ c->topo.amd_node_id == o->topo.amd_node_id &&
per_cpu_llc_id(cpu1) == per_cpu_llc_id(cpu2)) {
if (c->topo.core_id == o->topo.core_id)
return topology_sane(c, o, "smt");
@@ -509,10 +395,13 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
- if (c->topo.pkg_id == o->topo.pkg_id &&
- c->topo.die_id == o->topo.die_id)
- return true;
- return false;
+ if (c->topo.pkg_id != o->topo.pkg_id || c->topo.die_id != o->topo.die_id)
+ return false;
+
+ if (cpu_feature_enabled(X86_FEATURE_TOPOEXT) && topology_amd_nodes_per_pkg() > 1)
+ return c->topo.amd_node_id == o->topo.amd_node_id;
+
+ return true;
}
static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
@@ -670,8 +559,8 @@ static void __init build_sched_topology(void)
void set_cpu_sibling_map(int cpu)
{
- bool has_smt = smp_num_siblings > 1;
- bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
+ bool has_smt = __max_threads_per_core > 1;
+ bool has_mp = has_smt || topology_num_cores_per_package() > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
int i, threads;
@@ -1068,9 +957,13 @@ int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
- if (apicid == BAD_APICID || !physid_isset(apicid, phys_cpu_present_map) ||
- !apic_id_valid(apicid)) {
- pr_err("%s: bad cpu %d\n", __func__, cpu);
+ if (apicid == BAD_APICID || !apic_id_valid(apicid)) {
+ pr_err("CPU %u has invalid APIC ID %x. Aborting bringup\n", cpu, apicid);
+ return -EINVAL;
+ }
+
+ if (!test_bit(apicid, phys_cpu_present_map)) {
+ pr_err("CPU %u APIC ID %x is not present. Aborting bringup\n", cpu, apicid);
return -EINVAL;
}
@@ -1139,14 +1032,8 @@ static __init void disable_smp(void)
pr_info("SMP disabled\n");
disable_ioapic_support();
+ topology_reset_possible_cpus_up();
- init_cpu_present(cpumask_of(0));
- init_cpu_possible(cpumask_of(0));
-
- if (smp_found_config)
- physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
- else
- physid_set_mask_of_physid(0, &phys_cpu_present_map);
cpumask_set_cpu(0, topology_sibling_cpumask(0));
cpumask_set_cpu(0, topology_core_cpumask(0));
cpumask_set_cpu(0, topology_die_cpumask(0));
@@ -1187,6 +1074,11 @@ void __init smp_prepare_cpus_common(void)
set_cpu_sibling_map(0);
}
+void __init smp_prepare_boot_cpu(void)
+{
+ smp_ops.smp_prepare_boot_cpu();
+}
+
#ifdef CONFIG_X86_64
/* Establish whether parallel bringup can be supported. */
bool __init arch_cpuhp_init_parallel_bringup(void)
@@ -1265,102 +1157,16 @@ void __init native_smp_prepare_boot_cpu(void)
native_pv_lock_init();
}
-void __init calculate_max_logical_packages(void)
-{
- int ncpus;
-
- /*
- * Today neither Intel nor AMD support heterogeneous systems so
- * extrapolate the boot cpu's data to all packages.
- */
- ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
- __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
- pr_info("Max logical packages: %u\n", __max_logical_packages);
-}
-
void __init native_smp_cpus_done(unsigned int max_cpus)
{
pr_debug("Boot done\n");
- calculate_max_logical_packages();
build_sched_topology();
nmi_selftest();
impress_friends();
cache_aps_init();
}
-static int __initdata setup_possible_cpus = -1;
-static int __init _setup_possible_cpus(char *str)
-{
- get_option(&str, &setup_possible_cpus);
- return 0;
-}
-early_param("possible_cpus", _setup_possible_cpus);
-
-
-/*
- * cpu_possible_mask should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and don't expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_mask on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * - Ashok Raj
- *
- * Three ways to find out the number of additional hotplug CPUs:
- * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with possible_cpus=NUM
- * - Otherwise don't reserve additional CPUs.
- * We do this because additional CPUs waste a lot of memory.
- * -AK
- */
-__init void prefill_possible_map(void)
-{
- int i, possible;
-
- i = setup_max_cpus ?: 1;
- if (setup_possible_cpus == -1) {
- possible = num_processors;
-#ifdef CONFIG_HOTPLUG_CPU
- if (setup_max_cpus)
- possible += disabled_cpus;
-#else
- if (possible > i)
- possible = i;
-#endif
- } else
- possible = setup_possible_cpus;
-
- total_cpus = max_t(int, possible, num_processors + disabled_cpus);
-
- /* nr_cpu_ids could be reduced via nr_cpus= */
- if (possible > nr_cpu_ids) {
- pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
- possible, nr_cpu_ids);
- possible = nr_cpu_ids;
- }
-
-#ifdef CONFIG_HOTPLUG_CPU
- if (!setup_max_cpus)
-#endif
- if (possible > i) {
- pr_warn("%d Processors exceeds max_cpus limit of %u\n",
- possible, setup_max_cpus);
- possible = i;
- }
-
- set_nr_cpu_ids(possible);
-
- pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
- possible, max_t(int, possible - num_processors, 0));
-
- reset_cpu_possible_mask();
-
- for (i = 0; i < possible; i++)
- set_cpu_possible(i, true);
-}
-
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 77a9316da435..4eefaac64c6c 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -172,7 +172,7 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
}
EXPORT_SYMBOL_GPL(arch_static_call_transform);
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_MITIGATION_RETHUNK
/*
* This is called by apply_returns() to fix up static call trampolines,
* specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 8e2b2552b5ee..3e2952679b88 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -6,7 +6,9 @@
#include <linux/sched/task_stack.h>
#include <linux/mm.h>
#include <linux/ptrace.h>
+
#include <asm/desc.h>
+#include <asm/debugreg.h>
#include <asm/mmu_context.h>
unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index c783aeb37dce..cb9fa1d5c66f 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -52,13 +52,6 @@ static unsigned long get_align_bits(void)
return va_align.bits & get_align_mask();
}
-unsigned long align_vdso_addr(unsigned long addr)
-{
- unsigned long align_mask = get_align_mask();
- addr = (addr + align_mask) & ~align_mask;
- return addr | get_align_bits();
-}
-
static int __init control_va_addr_alignment(char *str)
{
/* guard against enabling this on other CPU families */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c3b2f863acf0..4fa0b17e5043 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
#include <asm/ftrace.h>
#include <asm/traps.h>
#include <asm/desc.h>
+#include <asm/fred.h>
#include <asm/fpu/api.h>
#include <asm/cpu.h>
#include <asm/cpu_entry_area.h>
@@ -773,7 +774,7 @@ DEFINE_IDTENTRY_RAW(exc_int3)
*/
asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
{
- struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1;
+ struct pt_regs *regs = (struct pt_regs *)current_top_of_stack() - 1;
if (regs != eregs)
*regs = *eregs;
return regs;
@@ -791,7 +792,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
* trust it and switch to the current kernel stack
*/
if (ip_within_syscall_gap(regs)) {
- sp = this_cpu_read(pcpu_hot.top_of_stack);
+ sp = current_top_of_stack();
goto sync;
}
@@ -935,8 +936,7 @@ static bool notify_debug(struct pt_regs *regs, unsigned long *dr6)
return false;
}
-static __always_inline void exc_debug_kernel(struct pt_regs *regs,
- unsigned long dr6)
+static noinstr void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6)
{
/*
* Disable breakpoints during exception handling; recursive exceptions
@@ -948,6 +948,11 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
*
* Entry text is excluded for HW_BP_X and cpu_entry_area, which
* includes the entry stack is excluded for everything.
+ *
+ * For FRED, nested #DB should just work fine. But when a watchpoint or
+ * breakpoint is set in the code path which is executed by #DB handler,
+ * it results in an endless recursion and stack overflow. Thus we stay
+ * with the IDT approach, i.e., save DR7 and disable #DB.
*/
unsigned long dr7 = local_db_save();
irqentry_state_t irq_state = irqentry_nmi_enter(regs);
@@ -977,7 +982,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
* Catch SYSENTER with TF set and clear DR_STEP. If this hit a
* watchpoint at the same time then that will still be handled.
*/
- if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
+ if (!cpu_feature_enabled(X86_FEATURE_FRED) &&
+ (dr6 & DR_STEP) && is_sysenter_singlestep(regs))
dr6 &= ~DR_STEP;
/*
@@ -1009,8 +1015,7 @@ out:
local_db_restore(dr7);
}
-static __always_inline void exc_debug_user(struct pt_regs *regs,
- unsigned long dr6)
+static noinstr void exc_debug_user(struct pt_regs *regs, unsigned long dr6)
{
bool icebp;
@@ -1094,6 +1099,34 @@ DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
exc_debug_user(regs, debug_read_clear_dr6());
}
+
+#ifdef CONFIG_X86_FRED
+/*
+ * When occurred on different ring level, i.e., from user or kernel
+ * context, #DB needs to be handled on different stack: User #DB on
+ * current task stack, while kernel #DB on a dedicated stack.
+ *
+ * This is exactly how FRED event delivery invokes an exception
+ * handler: ring 3 event on level 0 stack, i.e., current task stack;
+ * ring 0 event on the #DB dedicated stack specified in the
+ * IA32_FRED_STKLVLS MSR. So unlike IDT, the FRED debug exception
+ * entry stub doesn't do stack switch.
+ */
+DEFINE_FREDENTRY_DEBUG(exc_debug)
+{
+ /*
+ * FRED #DB stores DR6 on the stack in the format which
+ * debug_read_clear_dr6() returns for the IDT entry points.
+ */
+ unsigned long dr6 = fred_event_data(regs);
+
+ if (user_mode(regs))
+ exc_debug_user(regs, dr6);
+ else
+ exc_debug_kernel(regs, dr6);
+}
+#endif /* CONFIG_X86_FRED */
+
#else
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
@@ -1369,8 +1402,34 @@ DEFINE_IDTENTRY_SW(iret_error)
}
#endif
+/* Do not enable FRED by default yet. */
+static bool enable_fred __ro_after_init = false;
+
+#ifdef CONFIG_X86_FRED
+static int __init fred_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ return 0;
+
+ if (!strcmp(str, "on"))
+ enable_fred = true;
+ else if (!strcmp(str, "off"))
+ enable_fred = false;
+ else
+ pr_warn("invalid FRED option: 'fred=%s'\n", str);
+ return 0;
+}
+early_param("fred", fred_setup);
+#endif
+
void __init trap_init(void)
{
+ if (cpu_feature_enabled(X86_FEATURE_FRED) && !enable_fred)
+ setup_clear_cpu_cap(X86_FEATURE_FRED);
+
/* Init cpu_entry_area before IST entries are set up */
setup_cpu_entry_areas();
@@ -1379,7 +1438,10 @@ void __init trap_init(void)
/* Initialize TSS before setting up traps so ISTs work */
cpu_init_exception_handling();
+
/* Setup traps as cpu_init() might #GP */
- idt_setup_traps();
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ idt_setup_traps();
+
cpu_init();
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 15f97c0abc9d..5a69a49acc96 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -53,7 +53,7 @@ static int __read_mostly tsc_force_recalibrate;
static u32 art_to_tsc_numerator;
static u32 art_to_tsc_denominator;
static u64 art_to_tsc_offset;
-static struct clocksource *art_related_clocksource;
+static bool have_art;
struct cyc2ns {
struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */
@@ -652,7 +652,7 @@ success:
}
/**
- * native_calibrate_tsc
+ * native_calibrate_tsc - determine TSC frequency
* Determine TSC frequency via CPUID, else return 0.
*/
unsigned long native_calibrate_tsc(void)
@@ -1168,6 +1168,7 @@ static struct clocksource clocksource_tsc_early = {
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY,
+ .id = CSID_X86_TSC_EARLY,
.vdso_clock_mode = VDSO_CLOCKMODE_TSC,
.enable = tsc_cs_enable,
.resume = tsc_resume,
@@ -1190,6 +1191,7 @@ static struct clocksource clocksource_tsc = {
CLOCK_SOURCE_VALID_FOR_HRES |
CLOCK_SOURCE_MUST_VERIFY |
CLOCK_SOURCE_VERIFY_PERCPU,
+ .id = CSID_X86_TSC,
.vdso_clock_mode = VDSO_CLOCKMODE_TSC,
.enable = tsc_cs_enable,
.resume = tsc_resume,
@@ -1309,8 +1311,10 @@ struct system_counterval_t convert_art_to_tsc(u64 art)
do_div(tmp, art_to_tsc_denominator);
res += tmp + art_to_tsc_offset;
- return (struct system_counterval_t) {.cs = art_related_clocksource,
- .cycles = res};
+ return (struct system_counterval_t) {
+ .cs_id = have_art ? CSID_X86_TSC : CSID_GENERIC,
+ .cycles = res,
+ };
}
EXPORT_SYMBOL(convert_art_to_tsc);
@@ -1327,12 +1331,10 @@ EXPORT_SYMBOL(convert_art_to_tsc);
* that this flag is set before conversion to TSC is attempted.
*
* Return:
- * struct system_counterval_t - system counter value with the pointer to the
- * corresponding clocksource
- * @cycles: System counter value
- * @cs: Clocksource corresponding to system counter value. Used
- * by timekeeping code to verify comparability of two cycle
- * values.
+ * struct system_counterval_t - system counter value with the ID of the
+ * corresponding clocksource:
+ * cycles: System counter value
+ * cs_id: The clocksource ID for validating comparability
*/
struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns)
@@ -1347,8 +1349,10 @@ struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns)
do_div(tmp, USEC_PER_SEC);
res += tmp;
- return (struct system_counterval_t) { .cs = art_related_clocksource,
- .cycles = res};
+ return (struct system_counterval_t) {
+ .cs_id = have_art ? CSID_X86_TSC : CSID_GENERIC,
+ .cycles = res,
+ };
}
EXPORT_SYMBOL(convert_art_ns_to_tsc);
@@ -1357,7 +1361,7 @@ static void tsc_refine_calibration_work(struct work_struct *work);
static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
/**
* tsc_refine_calibration_work - Further refine tsc freq calibration
- * @work - ignored.
+ * @work: ignored.
*
* This functions uses delayed work over a period of a
* second to further refine the TSC freq value. Since this is
@@ -1455,7 +1459,7 @@ out:
goto unreg;
if (boot_cpu_has(X86_FEATURE_ART))
- art_related_clocksource = &clocksource_tsc;
+ have_art = true;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
unreg:
clocksource_unregister(&clocksource_tsc_early);
@@ -1481,7 +1485,7 @@ static int __init init_tsc_clocksource(void)
*/
if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
if (boot_cpu_has(X86_FEATURE_ART))
- art_related_clocksource = &clocksource_tsc;
+ have_art = true;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
clocksource_unregister(&clocksource_tsc_early);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a349dbfc6d5a..56451fd2099e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -46,6 +46,7 @@ ENTRY(phys_startup_64)
#endif
jiffies = jiffies_64;
+const_pcpu_hot = pcpu_hot;
#if defined(CONFIG_X86_64)
/*
@@ -132,7 +133,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
SOFTIRQENTRY_TEXT
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
*(.text..__x86.indirect_thunk)
*(.text..__x86.return_thunk)
#endif
@@ -142,7 +143,7 @@ SECTIONS
*(.text..__x86.rethunk_untrain)
ENTRY_TEXT
-#ifdef CONFIG_CPU_SRSO
+#ifdef CONFIG_MITIGATION_SRSO
/*
* See the comment above srso_alias_untrain_ret()'s
* definition.
@@ -267,7 +268,7 @@ SECTIONS
}
#endif
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
/*
* List of instructions that call/jmp/jcc to retpoline thunks
* __x86_indirect_thunk_*(). These instructions can be patched along
@@ -504,11 +505,11 @@ INIT_PER_CPU(irq_stack_backing_store);
"fixed_percpu_data is not at start of per-cpu area");
#endif
-#ifdef CONFIG_CPU_UNRET_ENTRY
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
#endif
-#ifdef CONFIG_CPU_SRSO
+#ifdef CONFIG_MITIGATION_SRSO
. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
/*
* GNU ld cannot do XOR until 2.41.
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index d3fc01770558..73511332bb67 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -127,25 +127,12 @@ static void __init vsmp_cap_cpus(void)
#endif
}
-static u32 apicid_phys_pkg_id(u32 initial_apic_id, int index_msb)
-{
- return read_apic_id() >> index_msb;
-}
-
-static void vsmp_apic_post_init(void)
-{
- /* need to update phys_pkg_id */
- apic->phys_pkg_id = apicid_phys_pkg_id;
-}
-
void __init vsmp_init(void)
{
detect_vsmp_box();
if (!is_vsmp_box())
return;
- x86_platform.apic_post_init = vsmp_apic_post_init;
-
vsmp_cap_cpus();
set_vsmp_ctl();
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a37ebd3b4773..a42830dc151b 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -70,8 +70,9 @@ struct x86_init_ops x86_init __initdata = {
.mpparse = {
.setup_ioapic_ids = x86_init_noop,
- .find_smp_config = default_find_smp_config,
- .get_smp_config = default_get_smp_config,
+ .find_mptable = mpparse_find_mptable,
+ .early_parse_smp_cfg = mpparse_parse_early_smp_config,
+ .parse_smp_cfg = mpparse_parse_smp_config,
},
.irqs = {
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 87e3da7b0439..65ed14b6540b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -80,9 +80,10 @@ config KVM_SW_PROTECTED_VM
depends on KVM && X86_64
select KVM_GENERIC_PRIVATE_MEM
help
- Enable support for KVM software-protected VMs. Currently "protected"
- means the VM can be backed with memory provided by
- KVM_CREATE_GUEST_MEMFD.
+ Enable support for KVM software-protected VMs. Currently, software-
+ protected VMs are purely a development and testing vehicle for
+ KVM_CREATE_GUEST_MEMFD. Attempting to run a "real" VM workload as a
+ software-protected VM will fail miserably.
If unsure, say "N".
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4943f6b2bbee..8a47f8541eab 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1322,6 +1322,56 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
return false;
}
+#define KVM_HV_WIN2016_GUEST_ID 0x1040a00003839
+#define KVM_HV_WIN2016_GUEST_ID_MASK (~GENMASK_ULL(23, 16)) /* mask out the service version */
+
+/*
+ * Hyper-V enabled Windows Server 2016 SMP VMs fail to boot in !XSAVES && XSAVEC
+ * configuration.
+ * Such configuration can result from, for example, AMD Erratum 1386 workaround.
+ *
+ * Print a notice so users aren't left wondering what's suddenly gone wrong.
+ */
+static void __kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_hv *hv = to_kvm_hv(kvm);
+
+ /* Check again under the hv_lock. */
+ if (hv->xsaves_xsavec_checked)
+ return;
+
+ if ((hv->hv_guest_os_id & KVM_HV_WIN2016_GUEST_ID_MASK) !=
+ KVM_HV_WIN2016_GUEST_ID)
+ return;
+
+ hv->xsaves_xsavec_checked = true;
+
+ /* UP configurations aren't affected */
+ if (atomic_read(&kvm->online_vcpus) < 2)
+ return;
+
+ if (guest_cpuid_has(vcpu, X86_FEATURE_XSAVES) ||
+ !guest_cpuid_has(vcpu, X86_FEATURE_XSAVEC))
+ return;
+
+ pr_notice_ratelimited("Booting SMP Windows KVM VM with !XSAVES && XSAVEC. "
+ "If it fails to boot try disabling XSAVEC in the VM config.\n");
+}
+
+void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
+{
+ struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
+
+ if (!vcpu->arch.hyperv_enabled ||
+ hv->xsaves_xsavec_checked)
+ return;
+
+ mutex_lock(&hv->hv_lock);
+ __kvm_hv_xsaves_xsavec_maybe_warn(vcpu);
+ mutex_unlock(&hv->hv_lock);
+}
+
static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
bool host)
{
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 1dc0b6604526..923e64903da9 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -182,6 +182,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock);
void kvm_hv_request_tsc_page_update(struct kvm *kvm);
+void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu);
+
void kvm_hv_init_vm(struct kvm *kvm);
void kvm_hv_destroy_vm(struct kvm *kvm);
int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
@@ -267,6 +269,7 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
static inline void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock) {}
static inline void kvm_hv_request_tsc_page_update(struct kvm *kvm) {}
+static inline void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu) {}
static inline void kvm_hv_init_vm(struct kvm *kvm) {}
static inline void kvm_hv_destroy_vm(struct kvm *kvm) {}
static inline int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3242f3da2457..1edf93ee3395 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2815,7 +2815,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
vcpu->arch.apic = apic;
- apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+ if (kvm_x86_ops.alloc_apic_backing_page)
+ apic->regs = static_call(kvm_x86_alloc_apic_backing_page)(vcpu);
+ else
+ apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (!apic->regs) {
printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
vcpu->vcpu_id);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 2d6cdeab1f8a..2b515acd8e72 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -47,18 +47,18 @@
#include <linux/kern_levels.h>
#include <linux/kstrtox.h>
#include <linux/kthread.h>
+#include <linux/wordpart.h>
#include <asm/page.h>
#include <asm/memtype.h>
#include <asm/cmpxchg.h>
#include <asm/io.h>
#include <asm/set_memory.h>
+#include <asm/spec-ctrl.h>
#include <asm/vmx.h>
#include "trace.h"
-extern bool itlb_multihit_kvm_mitigation;
-
static bool nx_hugepage_mitigation_hard_disabled;
int __read_mostly nx_huge_pages = -1;
@@ -263,7 +263,7 @@ static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu)
{
- if (IS_ENABLED(CONFIG_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3)
+ if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3)
return kvm_read_cr3(vcpu);
return mmu->get_guest_pgd(vcpu);
@@ -4405,6 +4405,31 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
smp_rmb();
+ /*
+ * Check for a relevant mmu_notifier invalidation event before getting
+ * the pfn from the primary MMU, and before acquiring mmu_lock.
+ *
+ * For mmu_lock, if there is an in-progress invalidation and the kernel
+ * allows preemption, the invalidation task may drop mmu_lock and yield
+ * in response to mmu_lock being contended, which is *very* counter-
+ * productive as this vCPU can't actually make forward progress until
+ * the invalidation completes.
+ *
+ * Retrying now can also avoid unnessary lock contention in the primary
+ * MMU, as the primary MMU doesn't necessarily hold a single lock for
+ * the duration of the invalidation, i.e. faulting in a conflicting pfn
+ * can cause the invalidation to take longer by holding locks that are
+ * needed to complete the invalidation.
+ *
+ * Do the pre-check even for non-preemtible kernels, i.e. even if KVM
+ * will never yield mmu_lock in response to contention, as this vCPU is
+ * *guaranteed* to need to retry, i.e. waiting until mmu_lock is held
+ * to detect retry guarantees the worst case latency for the vCPU.
+ */
+ if (fault->slot &&
+ mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn))
+ return RET_PF_RETRY;
+
ret = __kvm_faultin_pfn(vcpu, fault);
if (ret != RET_PF_CONTINUE)
return ret;
@@ -4415,6 +4440,18 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
if (unlikely(!fault->slot))
return kvm_handle_noslot_fault(vcpu, fault, access);
+ /*
+ * Check again for a relevant mmu_notifier invalidation event purely to
+ * avoid contending mmu_lock. Most invalidations will be detected by
+ * the previous check, but checking is extremely cheap relative to the
+ * overall cost of failing to detect the invalidation until after
+ * mmu_lock is acquired.
+ */
+ if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) {
+ kvm_release_pfn_clean(fault->pfn);
+ return RET_PF_RETRY;
+ }
+
return RET_PF_CONTINUE;
}
@@ -4442,6 +4479,11 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
return true;
+ /*
+ * Check for a relevant mmu_notifier invalidation event one last time
+ * now that mmu_lock is held, as the "unsafe" checks performed without
+ * holding mmu_lock can get false negatives.
+ */
return fault->slot &&
mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
}
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 0669a8a668ca..5390a591a571 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -315,7 +315,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
if (!prefetch)
vcpu->stat.pf_taken++;
- if (IS_ENABLED(CONFIG_RETPOLINE) && fault.is_tdp)
+ if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
r = kvm_tdp_page_fault(vcpu, &fault);
else
r = vcpu->arch.mmu->page_fault(vcpu, &fault);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index dee62362a360..55b9a6d96bcf 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1181,7 +1181,7 @@ int svm_allocate_nested(struct vcpu_svm *svm)
if (svm->nested.initialized)
return 0;
- vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ vmcb02_page = snp_safe_alloc_page(&svm->vcpu);
if (!vmcb02_page)
return -ENOMEM;
svm->nested.vmcb02.ptr = page_address(vmcb02_page);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index f760106c31f8..ae0ac12382b9 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -57,7 +57,7 @@ static bool sev_es_enabled = true;
module_param_named(sev_es, sev_es_enabled, bool, 0444);
/* enable/disable SEV-ES DebugSwap support */
-static bool sev_es_debug_swap_enabled = true;
+static bool sev_es_debug_swap_enabled = false;
module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
#else
#define sev_enabled false
@@ -246,6 +246,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_platform_init_args init_args = {0};
int asid, ret;
if (kvm->created_vcpus)
@@ -262,7 +263,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_no_asid;
sev->asid = asid;
- ret = sev_platform_init(&argp->error);
+ init_args.probe = false;
+ ret = sev_platform_init(&init_args);
if (ret)
goto e_free;
@@ -274,6 +276,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
return 0;
e_free:
+ argp->error = init_args.error;
sev_asid_free(sev);
sev->asid = 0;
e_no_asid:
@@ -612,8 +615,11 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->xss = svm->vcpu.arch.ia32_xss;
save->dr6 = svm->vcpu.arch.dr6;
- if (sev_es_debug_swap_enabled)
+ if (sev_es_debug_swap_enabled) {
save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
+ pr_warn_once("Enabling DebugSwap with KVM_SEV_ES_INIT. "
+ "This will not work starting with Linux 6.10\n");
+ }
pr_debug("Virtual Machine Save Area (VMSA):\n");
print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
@@ -1975,20 +1981,22 @@ int sev_mem_enc_register_region(struct kvm *kvm,
goto e_free;
}
- region->uaddr = range->addr;
- region->size = range->size;
-
- list_add_tail(&region->list, &sev->regions_list);
- mutex_unlock(&kvm->lock);
-
/*
* The guest may change the memory encryption attribute from C=0 -> C=1
* or vice versa for this memory range. Lets make sure caches are
* flushed to ensure that guest data gets written into memory with
- * correct C-bit.
+ * correct C-bit. Note, this must be done before dropping kvm->lock,
+ * as region and its array of pages can be freed by a different task
+ * once kvm->lock is released.
*/
sev_clflush_pages(region->pages, region->npages);
+ region->uaddr = range->addr;
+ region->size = range->size;
+
+ list_add_tail(&region->list, &sev->regions_list);
+ mutex_unlock(&kvm->lock);
+
return ret;
e_free:
@@ -3160,3 +3168,35 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
}
+
+struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
+{
+ unsigned long pfn;
+ struct page *p;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+
+ /*
+ * Allocate an SNP-safe page to workaround the SNP erratum where
+ * the CPU will incorrectly signal an RMP violation #PF if a
+ * hugepage (2MB or 1GB) collides with the RMP entry of a
+ * 2MB-aligned VMCB, VMSA, or AVIC backing page.
+ *
+ * Allocate one extra page, choose a page which is not
+ * 2MB-aligned, and free the other.
+ */
+ p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
+ if (!p)
+ return NULL;
+
+ split_page(p, 1);
+
+ pfn = page_to_pfn(p);
+ if (IS_ALIGNED(pfn, PTRS_PER_PMD))
+ __free_page(p++);
+ else
+ __free_page(p + 1);
+
+ return p;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e90b429c84f1..272d5ed37ce7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -703,7 +703,7 @@ static int svm_cpu_init(int cpu)
int ret = -ENOMEM;
memset(sd, 0, sizeof(struct svm_cpu_data));
- sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ sd->save_area = snp_safe_alloc_page(NULL);
if (!sd->save_area)
return ret;
@@ -1421,7 +1421,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu);
err = -ENOMEM;
- vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ vmcb01_page = snp_safe_alloc_page(vcpu);
if (!vmcb01_page)
goto out;
@@ -1430,7 +1430,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
* SEV-ES guests require a separate VMSA page used to contain
* the encrypted register state of the guest.
*/
- vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ vmsa_page = snp_safe_alloc_page(vcpu);
if (!vmsa_page)
goto error_free_vmcb_page;
@@ -3455,7 +3455,7 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
if (!svm_check_exit_valid(exit_code))
return svm_handle_invalid_exit(vcpu, exit_code);
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
if (exit_code == SVM_EXIT_MSR)
return msr_interception(vcpu);
else if (exit_code == SVM_EXIT_VINTR)
@@ -4900,6 +4900,16 @@ static int svm_vm_init(struct kvm *kvm)
return 0;
}
+static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu)
+{
+ struct page *page = snp_safe_alloc_page(vcpu);
+
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
static struct kvm_x86_ops svm_x86_ops __initdata = {
.name = KBUILD_MODNAME,
@@ -5031,6 +5041,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
+ .alloc_apic_backing_page = svm_alloc_apic_backing_page,
};
/*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 8ef95139cd24..7f1fbd874c45 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -694,6 +694,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
+struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
/* vmenter.S */
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 36c8af87a707..4e725854c63a 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -8,7 +8,7 @@
#define svm_asm(insn, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) "\n\t" \
+ asm goto("1: " __stringify(insn) "\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
::: clobber : fault); \
return; \
@@ -18,7 +18,7 @@ fault: \
#define svm_asm1(insn, op1, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
+ asm goto("1: " __stringify(insn) " %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1 : clobber : fault); \
return; \
@@ -28,7 +28,7 @@ fault: \
#define svm_asm2(insn, op1, op2, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
+ asm goto("1: " __stringify(insn) " %1, %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1, op2 : clobber : fault); \
return; \
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 9499f9c6b077..187018c424bf 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -207,7 +207,7 @@ SYM_FUNC_START(__svm_vcpu_run)
7: vmload %_ASM_AX
8:
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
@@ -344,7 +344,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
/* Pop @svm to RDI, guest registers have been saved already. */
pop %_ASM_DI
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a6216c874729..315c7c2ba89b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -71,7 +71,7 @@ static int fixed_pmc_events[] = {
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
{
struct kvm_pmc *pmc;
- u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
+ u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
int i;
pmu->fixed_ctr_ctrl = data;
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
index edc3f16cc189..6a9bfdfbb6e5 100644
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -2,7 +2,10 @@
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H
-#define VMX_RUN_VMRESUME (1 << 0)
-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+#define VMX_RUN_VMRESUME_SHIFT 0
+#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
+
+#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
+#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 906ecd001511..2bfbf758d061 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -139,7 +139,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */
- test $VMX_RUN_VMRESUME, %ebx
+ bt $VMX_RUN_VMRESUME_SHIFT, %ebx
/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@@ -161,8 +161,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
- /* Check EFLAGS.ZF from 'test VMX_RUN_VMRESUME' above */
- jz .Lvmlaunch
+ /* Clobbers EFLAGS.ZF */
+ CLEAR_CPU_BUFFERS
+
+ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
+ jnc .Lvmlaunch
/*
* After a successful VMRESUME/VMLAUNCH, control flow "magically"
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e262bc2ba4e5..305237dcba88 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -38,6 +38,7 @@
#include <asm/desc.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
+#include <asm/fred.h>
#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>
@@ -388,7 +389,16 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
- vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+ /*
+ * Disable VERW's behavior of clearing CPU buffers for the guest if the
+ * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled
+ * the mitigation. Disabling the clearing behavior provides a
+ * performance boost for guests that aren't aware that manually clearing
+ * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry
+ * and VM-Exit.
+ */
+ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
+ (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
!boot_cpu_has_bug(X86_BUG_MDS) &&
!boot_cpu_has_bug(X86_BUG_TAA);
@@ -738,7 +748,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
*/
static int kvm_cpu_vmxoff(void)
{
- asm_volatile_goto("1: vmxoff\n\t"
+ asm goto("1: vmxoff\n\t"
_ASM_EXTABLE(1b, %l[fault])
::: "cc", "memory" : fault);
@@ -2784,7 +2794,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer)
cr4_set_bits(X86_CR4_VMXE);
- asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
+ asm goto("1: vmxon %[vmxon_pointer]\n\t"
_ASM_EXTABLE(1b, %l[fault])
: : [vmxon_pointer] "m"(vmxon_pointer)
: : fault);
@@ -6543,7 +6553,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (exit_reason.basic >= kvm_vmx_max_exit_handlers)
goto unexpected_vmexit;
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
return kvm_emulate_wrmsr(vcpu);
else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER)
@@ -6960,14 +6970,16 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
{
u32 intr_info = vmx_get_intr_info(vcpu);
unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
- gate_desc *desc = (gate_desc *)host_idt_base + vector;
if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
"unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
- vmx_do_interrupt_irqoff(gate_offset(desc));
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
+ else
+ vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
kvm_after_interrupt(vcpu);
vcpu->arch.at_instruction_boundary = true;
@@ -7224,11 +7236,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
guest_state_enter_irqoff();
- /* L1D Flush includes CPU buffer clear to mitigate MDS */
+ /*
+ * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
+ * mitigation for MDS is done late in VMentry and is still
+ * executed in spite of L1D Flush. This is because an extra VERW
+ * should not matter much after the big hammer L1D Flush.
+ */
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
- else if (static_branch_unlikely(&mds_user_clear))
- mds_clear_cpu_buffers();
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
mds_clear_cpu_buffers();
@@ -7260,7 +7275,10 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
is_nmi(vmx_get_intr_info(vcpu))) {
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
- vmx_do_nmi_irqoff();
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ fred_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
+ else
+ vmx_do_nmi_irqoff();
kvm_after_interrupt(vcpu);
}
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index f41ce3c24123..8060e5fc6dbd 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -94,7 +94,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
- asm_volatile_goto("1: vmread %[field], %[output]\n\t"
+ asm_goto_output("1: vmread %[field], %[output]\n\t"
"jna %l[do_fail]\n\t"
_ASM_EXTABLE(1b, %l[do_exception])
@@ -188,7 +188,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
#define vmx_asm1(insn, op1, error_args...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
+ asm goto("1: " __stringify(insn) " %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
@@ -205,7 +205,7 @@ fault: \
#define vmx_asm2(insn, op1, op2, error_args...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
+ asm goto("1: " __stringify(insn) " %1, %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 363b1c080205..ffe580169c93 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1623,7 +1623,8 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
- ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO)
+ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
static u64 kvm_get_arch_capabilities(void)
{
@@ -1655,6 +1656,8 @@ static u64 kvm_get_arch_capabilities(void)
data |= ARCH_CAP_SSB_NO;
if (!boot_cpu_has_bug(X86_BUG_MDS))
data |= ARCH_CAP_MDS_NO;
+ if (!boot_cpu_has_bug(X86_BUG_RFDS))
+ data |= ARCH_CAP_RFDS_NO;
if (!boot_cpu_has(X86_FEATURE_RTM)) {
/*
@@ -1704,22 +1707,17 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
struct kvm_msr_entry msr;
int r;
+ /* Unconditionally clear the output for simplicity */
+ msr.data = 0;
msr.index = index;
r = kvm_get_msr_feature(&msr);
- if (r == KVM_MSR_RET_INVALID) {
- /* Unconditionally clear the output for simplicity */
- *data = 0;
- if (kvm_msr_ignored_check(index, 0, false))
- r = 0;
- }
-
- if (r)
- return r;
+ if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false))
+ r = 0;
*data = msr.data;
- return 0;
+ return r;
}
static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -1782,6 +1780,10 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS)
kvm_mmu_reset_context(vcpu);
+ if (!static_cpu_has(X86_FEATURE_XSAVES) &&
+ (efer & EFER_SVME))
+ kvm_hv_xsaves_xsavec_maybe_warn(vcpu);
+
return 0;
}
@@ -2507,7 +2509,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
}
#ifdef CONFIG_X86_64
-static inline int gtod_is_based_on_tsc(int mode)
+static inline bool gtod_is_based_on_tsc(int mode)
{
return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
}
@@ -4581,7 +4583,7 @@ static bool kvm_is_vm_type_supported(unsigned long type)
{
return type == KVM_X86_DEFAULT_VM ||
(type == KVM_X86_SW_PROTECTED_VM &&
- IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_enabled);
+ IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -5454,7 +5456,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) {
vcpu->arch.nmi_pending = 0;
atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending);
- kvm_make_request(KVM_REQ_NMI, vcpu);
+ if (events->nmi.pending)
+ kvm_make_request(KVM_REQ_NMI, vcpu);
}
static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
@@ -7016,6 +7019,9 @@ set_identity_unlock:
r = -EEXIST;
if (kvm->arch.vpit)
goto create_pit_unlock;
+ r = -ENOENT;
+ if (!pic_in_kernel(kvm))
+ goto create_pit_unlock;
r = -ENOMEM;
kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
if (kvm->arch.vpit)
@@ -8004,6 +8010,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (r < 0)
return X86EMUL_UNHANDLEABLE;
+
+ /*
+ * Mark the page dirty _before_ checking whether or not the CMPXCHG was
+ * successful, as the old value is written back on failure. Note, for
+ * live migration, this is unnecessarily conservative as CMPXCHG writes
+ * back the original value and the access is atomic, but KVM's ABI is
+ * that all writes are dirty logged, regardless of the value written.
+ */
+ kvm_vcpu_mark_page_dirty(vcpu, gpa_to_gfn(gpa));
+
if (r)
return X86EMUL_CMPXCHG_FAILED;
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index ea3a28e7b613..6da73513f026 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -14,19 +14,6 @@ ifdef CONFIG_KCSAN
CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE)
endif
-# Early boot use of cmdline; don't instrument it
-ifdef CONFIG_AMD_MEM_ENCRYPT
-KCOV_INSTRUMENT_cmdline.o := n
-KASAN_SANITIZE_cmdline.o := n
-KCSAN_SANITIZE_cmdline.o := n
-
-ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_cmdline.o = -pg
-endif
-
-CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables
-endif
-
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
quiet_cmd_inat_tables = GEN $@
@@ -49,7 +36,7 @@ lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
-lib-$(CONFIG_RETPOLINE) += retpoline.o
+lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
obj-y += iomem.o
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 6962df315793..4fb44894ad87 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -23,14 +23,14 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
cli
/* if (*ptr == old) */
- cmpq PER_CPU_VAR(0(%rsi)), %rax
+ cmpq __percpu (%rsi), %rax
jne .Lnot_same
- cmpq PER_CPU_VAR(8(%rsi)), %rdx
+ cmpq __percpu 8(%rsi), %rdx
jne .Lnot_same
/* *ptr = new */
- movq %rbx, PER_CPU_VAR(0(%rsi))
- movq %rcx, PER_CPU_VAR(8(%rsi))
+ movq %rbx, __percpu (%rsi)
+ movq %rcx, __percpu 8(%rsi)
/* set ZF in EFLAGS to indicate success */
orl $X86_EFLAGS_ZF, (%rsp)
@@ -42,8 +42,8 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
/* *ptr != old */
/* old = *ptr */
- movq PER_CPU_VAR(0(%rsi)), %rax
- movq PER_CPU_VAR(8(%rsi)), %rdx
+ movq __percpu (%rsi), %rax
+ movq __percpu 8(%rsi), %rdx
/* clear ZF in EFLAGS to indicate failure */
andl $(~X86_EFLAGS_ZF), (%rsp)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index 873e4ef23e49..1c96be769adc 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -24,12 +24,12 @@ SYM_FUNC_START(cmpxchg8b_emu)
pushfl
cli
- cmpl 0(%esi), %eax
+ cmpl (%esi), %eax
jne .Lnot_same
cmpl 4(%esi), %edx
jne .Lnot_same
- movl %ebx, 0(%esi)
+ movl %ebx, (%esi)
movl %ecx, 4(%esi)
orl $X86_EFLAGS_ZF, (%esp)
@@ -38,7 +38,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
RET
.Lnot_same:
- movl 0(%esi), %eax
+ movl (%esi), %eax
movl 4(%esi), %edx
andl $(~X86_EFLAGS_ZF), (%esp)
@@ -53,18 +53,30 @@ EXPORT_SYMBOL(cmpxchg8b_emu)
#ifndef CONFIG_UML
+/*
+ * Emulate 'cmpxchg8b %fs:(%rsi)'
+ *
+ * Inputs:
+ * %esi : memory location to compare
+ * %eax : low 32 bits of old value
+ * %edx : high 32 bits of old value
+ * %ebx : low 32 bits of new value
+ * %ecx : high 32 bits of new value
+ *
+ * Notably this is not LOCK prefixed and is not safe against NMIs
+ */
SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
pushfl
cli
- cmpl PER_CPU_VAR(0(%esi)), %eax
+ cmpl __percpu (%esi), %eax
jne .Lnot_same2
- cmpl PER_CPU_VAR(4(%esi)), %edx
+ cmpl __percpu 4(%esi), %edx
jne .Lnot_same2
- movl %ebx, PER_CPU_VAR(0(%esi))
- movl %ecx, PER_CPU_VAR(4(%esi))
+ movl %ebx, __percpu (%esi)
+ movl %ecx, __percpu 4(%esi)
orl $X86_EFLAGS_ZF, (%esp)
@@ -72,8 +84,8 @@ SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
RET
.Lnot_same2:
- movl PER_CPU_VAR(0(%esi)), %eax
- movl PER_CPU_VAR(4(%esi)), %edx
+ movl __percpu (%esi), %eax
+ movl __percpu 4(%esi), %edx
andl $(~X86_EFLAGS_ZF), (%esp)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 20ef350a60fb..10d5ed8b5990 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -163,23 +163,23 @@ SYM_CODE_END(__get_user_8_handle_exception)
#endif
/* get_user */
- _ASM_EXTABLE(1b, __get_user_handle_exception)
- _ASM_EXTABLE(2b, __get_user_handle_exception)
- _ASM_EXTABLE(3b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(1b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(2b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(3b, __get_user_handle_exception)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE(4b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(4b, __get_user_handle_exception)
#else
- _ASM_EXTABLE(4b, __get_user_8_handle_exception)
- _ASM_EXTABLE(5b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(4b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(5b, __get_user_8_handle_exception)
#endif
/* __get_user */
- _ASM_EXTABLE(6b, __get_user_handle_exception)
- _ASM_EXTABLE(7b, __get_user_handle_exception)
- _ASM_EXTABLE(8b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(6b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(7b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(8b, __get_user_handle_exception)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE(9b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(9b, __get_user_handle_exception)
#else
- _ASM_EXTABLE(9b, __get_user_8_handle_exception)
- _ASM_EXTABLE(10b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(9b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(10b, __get_user_8_handle_exception)
#endif
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 558a605929db..98631c0e7a11 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1129,15 +1129,15 @@ static int get_eff_addr_modrm_16(struct insn *insn, struct pt_regs *regs,
* get_eff_addr_sib() - Obtain referenced effective address via SIB
* @insn: Instruction. Must be valid.
* @regs: Register values as seen when entering kernel mode
- * @regoff: Obtained operand offset, in pt_regs, associated with segment
+ * @base_offset: Obtained operand offset, in pt_regs, associated with segment
* @eff_addr: Obtained effective address
*
* Obtain the effective address referenced by the SIB byte of @insn. After
* identifying the registers involved in the indexed, register-indirect memory
* reference, its value is obtained from the operands in @regs. The computed
* address is stored @eff_addr. Also, the register operand that indicates the
- * associated segment is stored in @regoff, this parameter can later be used to
- * determine such segment.
+ * associated segment is stored in @base_offset; this parameter can later be
+ * used to determine such segment.
*
* Returns:
*
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 55e371cc69fd..1bb155a0955b 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -71,7 +71,7 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
insn->kaddr = kaddr;
insn->end_kaddr = kaddr + buf_len;
insn->next_byte = kaddr;
- insn->x86_64 = x86_64 ? 1 : 0;
+ insn->x86_64 = x86_64;
insn->opnd_bytes = 4;
if (x86_64)
insn->addr_bytes = 8;
@@ -268,11 +268,9 @@ int insn_get_opcode(struct insn *insn)
if (opcode->got)
return 0;
- if (!insn->prefixes.got) {
- ret = insn_get_prefixes(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_prefixes(insn);
+ if (ret)
+ return ret;
/* Get first opcode */
op = get_next(insn_byte_t, insn);
@@ -339,11 +337,9 @@ int insn_get_modrm(struct insn *insn)
if (modrm->got)
return 0;
- if (!insn->opcode.got) {
- ret = insn_get_opcode(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_opcode(insn);
+ if (ret)
+ return ret;
if (inat_has_modrm(insn->attr)) {
mod = get_next(insn_byte_t, insn);
@@ -386,11 +382,9 @@ int insn_rip_relative(struct insn *insn)
if (!insn->x86_64)
return 0;
- if (!modrm->got) {
- ret = insn_get_modrm(insn);
- if (ret)
- return 0;
- }
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return 0;
/*
* For rip-relative instructions, the mod field (top 2 bits)
* is zero and the r/m field (bottom 3 bits) is 0x5.
@@ -417,11 +411,9 @@ int insn_get_sib(struct insn *insn)
if (insn->sib.got)
return 0;
- if (!insn->modrm.got) {
- ret = insn_get_modrm(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return ret;
if (insn->modrm.nbytes) {
modrm = insn->modrm.bytes[0];
@@ -460,11 +452,9 @@ int insn_get_displacement(struct insn *insn)
if (insn->displacement.got)
return 0;
- if (!insn->sib.got) {
- ret = insn_get_sib(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_sib(insn);
+ if (ret)
+ return ret;
if (insn->modrm.nbytes) {
/*
@@ -628,11 +618,9 @@ int insn_get_immediate(struct insn *insn)
if (insn->immediate.got)
return 0;
- if (!insn->displacement.got) {
- ret = insn_get_displacement(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_displacement(insn);
+ if (ret)
+ return ret;
if (inat_has_moffset(insn->attr)) {
if (!__get_moffset(insn))
@@ -703,11 +691,9 @@ int insn_get_length(struct insn *insn)
if (insn->length)
return 0;
- if (!insn->immediate.got) {
- ret = insn_get_immediate(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_immediate(insn);
+ if (ret)
+ return ret;
insn->length = (unsigned char)((unsigned long)insn->next_byte
- (unsigned long)insn->kaddr);
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index 40bbe56bde32..acd463d887e1 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -9,10 +9,9 @@ static void __rdmsr_on_cpu(void *info)
{
struct msr_info *rv = info;
struct msr *reg;
- int this_cpu = raw_smp_processor_id();
if (rv->msrs)
- reg = per_cpu_ptr(rv->msrs, this_cpu);
+ reg = this_cpu_ptr(rv->msrs);
else
reg = &rv->reg;
@@ -23,10 +22,9 @@ static void __wrmsr_on_cpu(void *info)
{
struct msr_info *rv = info;
struct msr *reg;
- int this_cpu = raw_smp_processor_id();
if (rv->msrs)
- reg = per_cpu_ptr(rv->msrs, this_cpu);
+ reg = this_cpu_ptr(rv->msrs);
else
reg = &rv->reg;
@@ -97,7 +95,7 @@ int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
EXPORT_SYMBOL(wrmsrl_on_cpu);
static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
- struct msr *msrs,
+ struct msr __percpu *msrs,
void (*msr_func) (void *info))
{
struct msr_info rv;
@@ -124,7 +122,7 @@ static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
* @msrs: array of MSR values
*
*/
-void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
+void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs)
{
__rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu);
}
@@ -138,7 +136,7 @@ EXPORT_SYMBOL(rdmsr_on_cpus);
* @msrs: array of MSR values
*
*/
-void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
+void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs)
{
__rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu);
}
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 47fd9bd6b91d..4bf4fad5b148 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -6,9 +6,9 @@
#define CREATE_TRACE_POINTS
#include <asm/msr-trace.h>
-struct msr *msrs_alloc(void)
+struct msr __percpu *msrs_alloc(void)
{
- struct msr *msrs = NULL;
+ struct msr __percpu *msrs = NULL;
msrs = alloc_percpu(struct msr);
if (!msrs) {
@@ -20,7 +20,7 @@ struct msr *msrs_alloc(void)
}
EXPORT_SYMBOL(msrs_alloc);
-void msrs_free(struct msr *msrs)
+void msrs_free(struct msr __percpu *msrs)
{
free_percpu(msrs);
}
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 2877f5934177..975c9c18263d 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -133,15 +133,15 @@ SYM_CODE_START_LOCAL(__put_user_handle_exception)
RET
SYM_CODE_END(__put_user_handle_exception)
- _ASM_EXTABLE(1b, __put_user_handle_exception)
- _ASM_EXTABLE(2b, __put_user_handle_exception)
- _ASM_EXTABLE(3b, __put_user_handle_exception)
- _ASM_EXTABLE(4b, __put_user_handle_exception)
- _ASM_EXTABLE(5b, __put_user_handle_exception)
- _ASM_EXTABLE(6b, __put_user_handle_exception)
- _ASM_EXTABLE(7b, __put_user_handle_exception)
- _ASM_EXTABLE(9b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(1b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(2b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(3b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(4b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(5b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(6b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(7b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(9b, __put_user_handle_exception)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE(8b, __put_user_handle_exception)
- _ASM_EXTABLE(10b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(8b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(10b, __put_user_handle_exception)
#endif
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 7b2589877d06..721b528da9ac 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -71,7 +71,7 @@ SYM_CODE_END(__x86_indirect_thunk_array)
#include <asm/GEN-for-each-reg.h>
#undef GEN
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
.macro CALL_THUNK reg
.align RETPOLINE_THUNK_SIZE
@@ -127,7 +127,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
#undef GEN
#endif
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_MITIGATION_RETHUNK
/*
* Be careful here: that label cannot really be removed because in
@@ -138,7 +138,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
*/
.section .text..__x86.return_thunk
-#ifdef CONFIG_CPU_SRSO
+#ifdef CONFIG_MITIGATION_SRSO
/*
* srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
@@ -225,12 +225,12 @@ SYM_CODE_END(srso_return_thunk)
#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret"
#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret"
-#else /* !CONFIG_CPU_SRSO */
+#else /* !CONFIG_MITIGATION_SRSO */
#define JMP_SRSO_UNTRAIN_RET "ud2"
#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2"
-#endif /* CONFIG_CPU_SRSO */
+#endif /* CONFIG_MITIGATION_SRSO */
-#ifdef CONFIG_CPU_UNRET_ENTRY
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
/*
* Some generic notes on the untraining sequences:
@@ -312,11 +312,11 @@ SYM_CODE_END(retbleed_return_thunk)
SYM_FUNC_END(retbleed_untrain_ret)
#define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret"
-#else /* !CONFIG_CPU_UNRET_ENTRY */
+#else /* !CONFIG_MITIGATION_UNRET_ENTRY */
#define JMP_RETBLEED_UNTRAIN_RET "ud2"
-#endif /* CONFIG_CPU_UNRET_ENTRY */
+#endif /* CONFIG_MITIGATION_UNRET_ENTRY */
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
+#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
SYM_FUNC_START(entry_untrain_ret)
ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \
@@ -325,9 +325,9 @@ SYM_FUNC_START(entry_untrain_ret)
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)
-#endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */
+#endif /* CONFIG_MITIGATION_UNRET_ENTRY || CONFIG_MITIGATION_SRSO */
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
.align 64
SYM_FUNC_START(call_depth_return_thunk)
@@ -359,7 +359,7 @@ SYM_FUNC_START(call_depth_return_thunk)
int3
SYM_FUNC_END(call_depth_return_thunk)
-#endif /* CONFIG_CALL_DEPTH_TRACKING */
+#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
/*
* This function name is magical and is used by -mfunction-return=thunk-extern
@@ -369,21 +369,18 @@ SYM_FUNC_END(call_depth_return_thunk)
* 'JMP __x86_return_thunk' sites are changed to something else by
* apply_returns().
*
- * This should be converted eventually to call a warning function which
- * should scream loudly when the default return thunk is called after
- * alternatives have been applied.
- *
- * That warning function cannot BUG() because the bug splat cannot be
- * displayed in all possible configurations, leading to users not really
- * knowing why the machine froze.
+ * The ALTERNATIVE below adds a really loud warning to catch the case
+ * where the insufficient default return thunk ends up getting used for
+ * whatever reason like miscompilation or failure of
+ * objtool/alternatives/etc to patch all the return sites.
*/
SYM_CODE_START(__x86_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
- ANNOTATE_UNRET_SAFE
- ret
+ ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \
+ "jmp warn_thunk_thunk", X86_FEATURE_ALWAYS
int3
SYM_CODE_END(__x86_return_thunk)
EXPORT_SYMBOL(__x86_return_thunk)
-#endif /* CONFIG_RETHUNK */
+#endif /* CONFIG_MITIGATION_RETHUNK */
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 5168ee0360b2..12af572201a2 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1051,8 +1051,8 @@ GrpTable: Grp6
EndTable
GrpTable: Grp7
-0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B)
-1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B)
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B) | WRMSRNS (110),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B) | ERETU (F3),(010),(11B) | ERETS (F2),(010),(11B)
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index c80febc44cd2..428048e73bd2 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -16,6 +16,7 @@ KASAN_SANITIZE_pgprot.o := n
KCSAN_SANITIZE := n
# Avoid recursion by not calling KMSAN hooks for CEA code.
KMSAN_SANITIZE_cpu_entry_area.o := n
+KMSAN_SANITIZE_mem_encrypt_identity.o := n
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_mem_encrypt.o = -pg
@@ -60,7 +61,7 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
-obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
+obj-$(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION) += pti.o
obj-$(CONFIG_X86_MEM_ENCRYPT) += mem_encrypt.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_amd.o
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index b3ca7d23e4b0..9332b36a1091 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -54,13 +54,11 @@ static __init int find_northbridge(void)
int __init amd_numa_init(void)
{
- u64 start = PFN_PHYS(0);
+ unsigned int numnodes, cores, apicid;
+ u64 prevbase, start = PFN_PHYS(0);
u64 end = PFN_PHYS(max_pfn);
- unsigned numnodes;
- u64 prevbase;
- int i, j, nb;
u32 nodeid, reg;
- unsigned int bits, cores, apicid_base;
+ int i, j, nb;
if (!early_pci_allowed())
return -EINVAL;
@@ -158,26 +156,18 @@ int __init amd_numa_init(void)
return -ENOENT;
/*
- * We seem to have valid NUMA configuration. Map apicids to nodes
- * using the coreid bits from early_identify_cpu.
+ * We seem to have valid NUMA configuration. Map apicids to nodes
+ * using the size of the core domain in the APIC space.
*/
- bits = boot_cpu_data.x86_coreid_bits;
- cores = 1 << bits;
- apicid_base = 0;
+ cores = topology_get_domain_size(TOPO_CORE_DOMAIN);
- /*
- * get boot-time SMP configuration:
- */
- early_get_smp_config();
+ apicid = boot_cpu_physical_apicid;
+ if (apicid > 0)
+ pr_info("BSP APIC ID: %02x\n", apicid);
- if (boot_cpu_physical_apicid > 0) {
- pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
- apicid_base = boot_cpu_physical_apicid;
+ for_each_node_mask(i, numa_nodes_parsed) {
+ for (j = 0; j < cores; j++, apicid++)
+ set_apicid_to_node(apicid, i);
}
-
- for_each_node_mask(i, numa_nodes_parsed)
- for (j = apicid_base; j < cores + apicid_base; j++)
- set_apicid_to_node((i << bits) + j, i);
-
return 0;
}
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index b43301cb2a80..ae5c213a1cb0 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -22,7 +22,7 @@ static int ptdump_curknl_show(struct seq_file *m, void *v)
DEFINE_SHOW_ATTRIBUTE(ptdump_curknl);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
static int ptdump_curusr_show(struct seq_file *m, void *v)
{
if (current->mm->pgd)
@@ -54,7 +54,7 @@ static int __init pt_dump_debug_init(void)
debugfs_create_file("current_kernel", 0400, dir, NULL,
&ptdump_curknl_fops);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
debugfs_create_file("current_user", 0400, dir, NULL,
&ptdump_curusr_fops);
#endif
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index e1b599ecbbc2..b7b88c1d91ec 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -408,7 +408,7 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
bool user)
{
pgd_t *pgd = mm->pgd;
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
if (user && boot_cpu_has(X86_FEATURE_PTI))
pgd = kernel_to_user_pgdp(pgd);
#endif
@@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
void ptdump_walk_user_pgd_level_checkwx(void)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
pgd_t *pgd = INIT_PGD;
if (!(__supported_pte_mask & _PAGE_NX) ||
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 271dcb2deabc..b522933bfa56 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -6,6 +6,7 @@
#include <xen/xen.h>
#include <asm/fpu/api.h>
+#include <asm/fred.h>
#include <asm/sev.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
@@ -223,6 +224,79 @@ static bool ex_handler_ucopy_len(const struct exception_table_entry *fixup,
return ex_handler_uaccess(fixup, regs, trapnr, fault_address);
}
+#ifdef CONFIG_X86_FRED
+static bool ex_handler_eretu(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, unsigned long error_code)
+{
+ struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, orig_ax));
+ unsigned short ss = uregs->ss;
+ unsigned short cs = uregs->cs;
+
+ /*
+ * Move the NMI bit from the invalid stack frame, which caused ERETU
+ * to fault, to the fault handler's stack frame, thus to unblock NMI
+ * with the fault handler's ERETS instruction ASAP if NMI is blocked.
+ */
+ regs->fred_ss.nmi = uregs->fred_ss.nmi;
+
+ /*
+ * Sync event information to uregs, i.e., the ERETU return frame, but
+ * is it safe to write to the ERETU return frame which is just above
+ * current event stack frame?
+ *
+ * The RSP used by FRED to push a stack frame is not the value in %rsp,
+ * it is calculated from %rsp with the following 2 steps:
+ * 1) RSP = %rsp - (IA32_FRED_CONFIG & 0x1c0) // Reserve N*64 bytes
+ * 2) RSP = RSP & ~0x3f // Align to a 64-byte cache line
+ * when an event delivery doesn't trigger a stack level change.
+ *
+ * Here is an example with N*64 (N=1) bytes reserved:
+ *
+ * 64-byte cache line ==> ______________
+ * |___Reserved___|
+ * |__Event_data__|
+ * |_____SS_______|
+ * |_____RSP______|
+ * |_____FLAGS____|
+ * |_____CS_______|
+ * |_____IP_______|
+ * 64-byte cache line ==> |__Error_code__| <== ERETU return frame
+ * |______________|
+ * |______________|
+ * |______________|
+ * |______________|
+ * |______________|
+ * |______________|
+ * |______________|
+ * 64-byte cache line ==> |______________| <== RSP after step 1) and 2)
+ * |___Reserved___|
+ * |__Event_data__|
+ * |_____SS_______|
+ * |_____RSP______|
+ * |_____FLAGS____|
+ * |_____CS_______|
+ * |_____IP_______|
+ * 64-byte cache line ==> |__Error_code__| <== ERETS return frame
+ *
+ * Thus a new FRED stack frame will always be pushed below a previous
+ * FRED stack frame ((N*64) bytes may be reserved between), and it is
+ * safe to write to a previous FRED stack frame as they never overlap.
+ */
+ fred_info(uregs)->edata = fred_event_data(regs);
+ uregs->ssx = regs->ssx;
+ uregs->fred_ss.ss = ss;
+ /* The NMI bit was moved away above */
+ uregs->fred_ss.nmi = 0;
+ uregs->csx = regs->csx;
+ uregs->fred_cs.sl = 0;
+ uregs->fred_cs.wfe = 0;
+ uregs->cs = cs;
+ uregs->orig_ax = error_code;
+
+ return ex_handler_default(fixup, regs);
+}
+#endif
+
int ex_get_fixup_type(unsigned long ip)
{
const struct exception_table_entry *e = search_exception_tables(ip);
@@ -300,6 +374,10 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
return ex_handler_ucopy_len(e, regs, trapnr, fault_addr, reg, imm);
case EX_TYPE_ZEROPAD:
return ex_handler_zeropad(e, regs, fault_addr);
+#ifdef CONFIG_X86_FRED
+ case EX_TYPE_ERETU:
+ return ex_handler_eretu(e, regs, error_code);
+#endif
}
BUG();
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 679b09cfe241..402e08f6b7ec 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -34,6 +34,8 @@
#include <asm/kvm_para.h> /* kvm_handle_async_pf */
#include <asm/vdso.h> /* fixup_vdso_exception() */
#include <asm/irq_stack.h>
+#include <asm/fred.h>
+#include <asm/sev.h> /* snp_dump_hva_rmpentry() */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -547,6 +549,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
!(error_code & X86_PF_PROT) ? "not-present page" :
(error_code & X86_PF_RSVD) ? "reserved bit violation" :
(error_code & X86_PF_PK) ? "protection keys violation" :
+ (error_code & X86_PF_RMP) ? "RMP violation" :
"permissions violation");
if (!(error_code & X86_PF_USER) && user_mode(regs)) {
@@ -579,6 +582,9 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
}
dump_pagetable(address);
+
+ if (error_code & X86_PF_RMP)
+ snp_dump_hva_rmpentry(address);
}
static noinline void
@@ -798,15 +804,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
show_opcodes(regs, loglvl);
}
-/*
- * The (legacy) vsyscall page is the long page in the kernel portion
- * of the address space that has user-accessible permissions.
- */
-static bool is_vsyscall_vaddr(unsigned long vaddr)
-{
- return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
-}
-
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address, u32 pkey, int si_code)
@@ -1302,21 +1299,14 @@ void do_user_addr_fault(struct pt_regs *regs,
return;
}
- /*
- * It's safe to allow irq's after cr2 has been saved and the
- * vmalloc fault has been handled.
- *
- * User-mode registers count as a user access even for any
- * potential system fault or CPU buglet:
- */
- if (user_mode(regs)) {
- local_irq_enable();
- flags |= FAULT_FLAG_USER;
- } else {
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
+ /* Legacy check - remove this after verifying that it doesn't trigger */
+ if (WARN_ON_ONCE(!(regs->flags & X86_EFLAGS_IF))) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
}
+ local_irq_enable();
+
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
/*
@@ -1332,6 +1322,14 @@ void do_user_addr_fault(struct pt_regs *regs,
if (error_code & X86_PF_INSTR)
flags |= FAULT_FLAG_INSTRUCTION;
+ /*
+ * We set FAULT_FLAG_USER based on the register state, not
+ * based on X86_PF_USER. User space accesses that cause
+ * system page faults are still user accesses.
+ */
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
#ifdef CONFIG_X86_64
/*
* Faults in the vsyscall page might need emulation. The
@@ -1518,8 +1516,10 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code,
DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
{
- unsigned long address = read_cr2();
irqentry_state_t state;
+ unsigned long address;
+
+ address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2();
prefetchw(&current->mm->mmap_lock);
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d7005f4a7..f50cc210a981 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
+ bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- if (info->direct_gbpages) {
- pud_t pudval;
+ /* if this is already a gbpage, this portion is already mapped */
+ if (pud_large(*pud))
+ continue;
+
+ /* Is using a gbpage allowed? */
+ use_gbpage = info->direct_gbpages;
- if (pud_present(*pud))
- continue;
+ /* Don't use gbpage if it maps more than the requested region. */
+ /* at the begining: */
+ use_gbpage &= ((addr & ~PUD_MASK) == 0);
+ /* ... or at the end: */
+ use_gbpage &= ((next & ~PUD_MASK) == 0);
+
+ /* Never overwrite existing mappings */
+ use_gbpage &= !pud_present(*pud);
+
+ if (use_gbpage) {
+ pud_t pudval;
- addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
index 6993f026adec..42115ac079cf 100644
--- a/arch/x86/mm/maccess.c
+++ b/arch/x86/mm/maccess.c
@@ -3,6 +3,8 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
+#include <asm/vsyscall.h>
+
#ifdef CONFIG_X86_64
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
@@ -16,6 +18,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
return false;
/*
+ * Reading from the vsyscall page may cause an unhandled fault in
+ * certain cases. Though it is at an address above TASK_SIZE_MAX, it is
+ * usually considered as a user space address.
+ */
+ if (is_vsyscall_vaddr(vaddr))
+ return false;
+
+ /*
* Allow everything during early boot before 'x86_virt_bits'
* is initialized. Needed for instruction decoding in early
* exception handlers.
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index c290c55b632b..6f3b3e028718 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -14,6 +14,8 @@
#include <linux/mem_encrypt.h>
#include <linux/virtio_anchor.h>
+#include <asm/sev.h>
+
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev)
{
@@ -42,38 +44,45 @@ bool force_dma_unencrypted(struct device *dev)
static void print_mem_encrypt_feature_info(void)
{
- pr_info("Memory Encryption Features active:");
-
- if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
- pr_cont(" Intel TDX\n");
- return;
- }
+ pr_info("Memory Encryption Features active: ");
- pr_cont(" AMD");
+ switch (cc_vendor) {
+ case CC_VENDOR_INTEL:
+ pr_cont("Intel TDX\n");
+ break;
+ case CC_VENDOR_AMD:
+ pr_cont("AMD");
- /* Secure Memory Encryption */
- if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
+ /* Secure Memory Encryption */
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
/*
* SME is mutually exclusive with any of the SEV
* features below.
- */
- pr_cont(" SME\n");
- return;
- }
+ */
+ pr_cont(" SME\n");
+ return;
+ }
- /* Secure Encrypted Virtualization */
- if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
- pr_cont(" SEV");
+ /* Secure Encrypted Virtualization */
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ pr_cont(" SEV");
+
+ /* Encrypted Register State */
+ if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+ pr_cont(" SEV-ES");
- /* Encrypted Register State */
- if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
- pr_cont(" SEV-ES");
+ /* Secure Nested Paging */
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ pr_cont(" SEV-SNP");
- /* Secure Nested Paging */
- if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
- pr_cont(" SEV-SNP");
+ pr_cont("\n");
- pr_cont("\n");
+ sev_show_status();
+
+ break;
+ default:
+ pr_cont("Unknown\n");
+ }
}
/* Architecture __weak replacement functions */
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index d73aeb16417f..64b5005d49e5 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -41,9 +41,9 @@
#include <linux/mem_encrypt.h>
#include <linux/cc_platform.h>
+#include <asm/init.h>
#include <asm/setup.h>
#include <asm/sections.h>
-#include <asm/cmdline.h>
#include <asm/coco.h>
#include <asm/sev.h>
@@ -95,11 +95,7 @@ struct sme_populate_pgd_data {
*/
static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
-static char sme_cmdline_arg[] __initdata = "mem_encrypt";
-static char sme_cmdline_on[] __initdata = "on";
-static char sme_cmdline_off[] __initdata = "off";
-
-static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
+static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
unsigned long pgd_start, pgd_end, pgd_size;
pgd_t *pgd_p;
@@ -114,7 +110,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
memset(pgd_p, 0, pgd_size);
}
-static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
+static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -151,7 +147,7 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
return pud;
}
-static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
{
pud_t *pud;
pmd_t *pmd;
@@ -167,7 +163,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
}
-static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
{
pud_t *pud;
pmd_t *pmd;
@@ -193,7 +189,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
}
-static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd_large(ppd);
@@ -203,7 +199,7 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
}
}
-static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd(ppd);
@@ -213,7 +209,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
}
}
-static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
pmdval_t pmd_flags, pteval_t pte_flags)
{
unsigned long vaddr_end;
@@ -237,22 +233,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
__sme_map_range_pte(ppd);
}
-static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
}
-static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
}
-static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
}
-static unsigned long __init sme_pgtable_calc(unsigned long len)
+static unsigned long __head sme_pgtable_calc(unsigned long len)
{
unsigned long entries = 0, tables = 0;
@@ -289,7 +285,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return entries + tables;
}
-void __init sme_encrypt_kernel(struct boot_params *bp)
+void __head sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
@@ -305,7 +301,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* instrumentation or checking boot_cpu_data in the cc_platform_has()
* function.
*/
- if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED)
+ if (!sme_get_me_mask() ||
+ RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED)
return;
/*
@@ -323,9 +320,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* memory from being cached.
*/
- /* Physical addresses gives us the identity mapped virtual addresses */
- kernel_start = __pa_symbol(_text);
- kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE);
+ kernel_start = (unsigned long)RIP_REL_REF(_text);
+ kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE);
kernel_len = kernel_end - kernel_start;
initrd_start = 0;
@@ -343,14 +339,6 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
#endif
/*
- * We're running identity mapped, so we must obtain the address to the
- * SME encryption workarea using rip-relative addressing.
- */
- asm ("lea sme_workarea(%%rip), %0"
- : "=r" (workarea_start)
- : "p" (sme_workarea));
-
- /*
* Calculate required number of workarea bytes needed:
* executable encryption area size:
* stack page (PAGE_SIZE)
@@ -359,7 +347,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* pagetable structures for the encryption of the kernel
* pagetable structures for workarea (in case not currently mapped)
*/
- execute_start = workarea_start;
+ execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea);
execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
execute_len = execute_end - execute_start;
@@ -502,14 +490,11 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
native_write_cr3(__native_read_cr3());
}
-void __init sme_enable(struct boot_params *bp)
+void __head sme_enable(struct boot_params *bp)
{
- const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
unsigned int eax, ebx, ecx, edx;
unsigned long feature_mask;
- bool active_by_default;
unsigned long me_mask;
- char buffer[16];
bool snp;
u64 msr;
@@ -543,15 +528,18 @@ void __init sme_enable(struct boot_params *bp)
me_mask = 1UL << (ebx & 0x3f);
/* Check the SEV MSR whether SEV or SME is enabled */
- sev_status = __rdmsr(MSR_AMD64_SEV);
- feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
+ RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV);
+ feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
/* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */
- if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED))
snp_abort();
/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
+ if (!(bp->hdr.xloadflags & XLF_MEM_ENCRYPTION))
+ return;
+
/*
* No SME if Hypervisor bit is set. This check is here to
* prevent a guest from trying to enable SME. For running as a
@@ -571,48 +559,10 @@ void __init sme_enable(struct boot_params *bp)
msr = __rdmsr(MSR_AMD64_SYSCFG);
if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
return;
- } else {
- /* SEV state cannot be controlled by a command line option */
- sme_me_mask = me_mask;
- goto out;
}
- /*
- * Fixups have not been applied to phys_base yet and we're running
- * identity mapped, so we must obtain the address to the SME command
- * line argument data using rip-relative addressing.
- */
- asm ("lea sme_cmdline_arg(%%rip), %0"
- : "=r" (cmdline_arg)
- : "p" (sme_cmdline_arg));
- asm ("lea sme_cmdline_on(%%rip), %0"
- : "=r" (cmdline_on)
- : "p" (sme_cmdline_on));
- asm ("lea sme_cmdline_off(%%rip), %0"
- : "=r" (cmdline_off)
- : "p" (sme_cmdline_off));
-
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
- active_by_default = true;
- else
- active_by_default = false;
-
- cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
- ((u64)bp->ext_cmd_line_ptr << 32));
-
- if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
- return;
-
- if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
- sme_me_mask = me_mask;
- else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
- sme_me_mask = 0;
- else
- sme_me_mask = active_by_default ? me_mask : 0;
-out:
- if (sme_me_mask) {
- physical_mask &= ~sme_me_mask;
- cc_vendor = CC_VENDOR_AMD;
- cc_set_mask(sme_me_mask);
- }
+ RIP_REL_REF(sme_me_mask) = me_mask;
+ physical_mask &= ~me_mask;
+ cc_vendor = CC_VENDOR_AMD;
+ cc_set_mask(me_mask);
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index adc497b93f03..65e9a6e391c0 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -934,7 +934,7 @@ static int __init cmp_memblk(const void *a, const void *b)
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
const struct numa_memblk *mb = *(const struct numa_memblk **)b;
- return ma->start - mb->start;
+ return (ma->start > mb->start) - (ma->start < mb->start);
}
static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
@@ -944,14 +944,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
* @start: address to begin fill
* @end: address to end fill
*
- * Find and extend numa_meminfo memblks to cover the @start-@end
- * physical address range, such that the first memblk includes
- * @start, the last memblk includes @end, and any gaps in between
- * are filled.
+ * Find and extend numa_meminfo memblks to cover the physical
+ * address range @start-@end
*
* RETURNS:
* 0 : Success
- * NUMA_NO_MEMBLK : No memblk exists in @start-@end range
+ * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
*/
int __init numa_fill_memblks(u64 start, u64 end)
@@ -963,17 +961,14 @@ int __init numa_fill_memblks(u64 start, u64 end)
/*
* Create a list of pointers to numa_meminfo memblks that
- * overlap start, end. Exclude (start == bi->end) since
- * end addresses in both a CFMWS range and a memblk range
- * are exclusive.
- *
- * This list of pointers is used to make in-place changes
- * that fill out the numa_meminfo memblks.
+ * overlap start, end. The list is used to make in-place
+ * changes that fill out the numa_meminfo memblks.
*/
for (int i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];
- if (start < bi->end && end >= bi->start) {
+ if (memblock_addrs_overlap(start, end - start, bi->start,
+ bi->end - bi->start)) {
blk[count] = &mi->blk[i];
count++;
}
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 0904d7e8e126..0d72183b5dd0 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -240,6 +240,8 @@ void pat_cpu_init(void)
}
wrmsrl(MSR_IA32_CR_PAT, pat_msr_val);
+
+ __flush_tlb_all();
}
/**
@@ -296,13 +298,8 @@ void __init pat_bp_init(void)
/*
* Xen PV doesn't allow to set PAT MSR, but all cache modes are
* supported.
- * When running as TDX guest setting the PAT MSR won't work either
- * due to the requirement to set CR0.CD when doing so. Rely on
- * firmware to have set the PAT MSR correctly.
*/
- if (pat_disabled ||
- cpu_feature_enabled(X86_FEATURE_XENPV) ||
- cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ if (pat_disabled || cpu_feature_enabled(X86_FEATURE_XENPV)) {
init_cache_modes(pat_msr_val);
return;
}
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index e9b448d1b1b7..e5b2985a7c51 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -755,10 +755,14 @@ pmd_t *lookup_pmd_address(unsigned long address)
* areas on 32-bit NUMA systems. The percpu areas can
* end up in this kind of memory, for instance.
*
- * This could be optimized, but it is only intended to be
- * used at initialization time, and keeping it
- * unoptimized should increase the testing coverage for
- * the more obscure platforms.
+ * Note that as long as the PTEs are well-formed with correct PFNs, this
+ * works without checking the PRESENT bit in the leaf PTE. This is unlike
+ * the similar vmalloc_to_page() and derivatives. Callers may depend on
+ * this behavior.
+ *
+ * This could be optimized, but it is only used in paths that are not perf
+ * sensitive, and keeping it unoptimized should increase the testing coverage
+ * for the more obscure platforms.
*/
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
@@ -2041,17 +2045,12 @@ int set_mce_nospec(unsigned long pfn)
return rc;
}
-static int set_memory_p(unsigned long *addr, int numpages)
-{
- return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
-}
-
/* Restore full speculative operation to the pfn. */
int clear_mce_nospec(unsigned long pfn)
{
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
- return set_memory_p(&addr, 1);
+ return set_memory_p(addr, 1);
}
EXPORT_SYMBOL_GPL(clear_mce_nospec);
#endif /* CONFIG_X86_64 */
@@ -2104,6 +2103,11 @@ int set_memory_np_noalias(unsigned long addr, int numpages)
CPA_NO_CHECK_ALIAS, NULL);
}
+int set_memory_p(unsigned long addr, int numpages)
+{
+ return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
+}
+
int set_memory_4k(unsigned long addr, int numpages)
{
return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
@@ -2153,7 +2157,7 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
/* Notify hypervisor that we are about to set/clr encryption attribute. */
if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc))
- return -EIO;
+ goto vmm_fail;
ret = __change_page_attr_set_clr(&cpa, 1);
@@ -2166,13 +2170,20 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
*/
cpa_flush(&cpa, 0);
+ if (ret)
+ return ret;
+
/* Notify hypervisor that we have successfully set/clr encryption attribute. */
- if (!ret) {
- if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc))
- ret = -EIO;
- }
+ if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc))
+ goto vmm_fail;
- return ret;
+ return 0;
+
+vmm_fail:
+ WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s.\n",
+ (void *)addr, numpages, enc ? "private" : "shared");
+
+ return -EIO;
}
static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 0cbc1b8e8e3d..cceb779d882d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -293,7 +293,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
for (i = 0; i < PREALLOCATED_PMDS; i++)
mop_up_one_pmd(mm, &pgdp[i]);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
if (!boot_cpu_has(X86_FEATURE_PTI))
return;
@@ -325,7 +325,7 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
}
}
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
pgd_t *k_pgd, pmd_t *pmds[])
{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5768d386efab..4af930947380 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -89,10 +89,10 @@
#define CR3_HW_ASID_BITS 12
/*
- * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
+ * When enabled, MITIGATION_PAGE_TABLE_ISOLATION consumes a single bit for
* user/kernel switches
*/
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
# define PTI_CONSUMED_PCID_BITS 1
#else
# define PTI_CONSUMED_PCID_BITS 0
@@ -114,7 +114,7 @@ static inline u16 kern_pcid(u16 asid)
{
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
* Make sure that the dynamic ASID space does not conflict with the
* bit we are using to switch between user and kernel ASIDs.
@@ -149,7 +149,7 @@ static inline u16 kern_pcid(u16 asid)
static inline u16 user_pcid(u16 asid)
{
u16 ret = kern_pcid(asid);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
#endif
return ret;
@@ -262,7 +262,7 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
static inline void invalidate_user_asid(u16 asid)
{
/* There is no user ASID if address space separation is off */
- if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ if (!IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
return;
/*
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 919f647c740f..a7ba8e178645 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -113,6 +113,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
+#define X86_REG_R12 (MAX_BPF_JIT_REG + 3)
/*
* The following table maps BPF registers to x86-64 registers.
@@ -139,6 +140,7 @@ static const int reg2hex[] = {
[BPF_REG_AX] = 2, /* R10 temp register */
[AUX_REG] = 3, /* R11 temp register */
[X86_REG_R9] = 1, /* R9 register, 6th function argument */
+ [X86_REG_R12] = 4, /* R12 callee saved */
};
static const int reg2pt_regs[] = {
@@ -167,6 +169,7 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_8) |
BIT(BPF_REG_9) |
BIT(X86_REG_R9) |
+ BIT(X86_REG_R12) |
BIT(BPF_REG_AX));
}
@@ -205,6 +208,17 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
return byte;
}
+static u8 add_3mod(u8 byte, u32 r1, u32 r2, u32 index)
+{
+ if (is_ereg(r1))
+ byte |= 1;
+ if (is_ereg(index))
+ byte |= 2;
+ if (is_ereg(r2))
+ byte |= 4;
+ return byte;
+}
+
/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
static u8 add_1reg(u8 byte, u32 dst_reg)
{
@@ -553,7 +567,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
} else {
EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */
- if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS))
+ if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || IS_ENABLED(CONFIG_MITIGATION_SLS))
EMIT1(0xCC); /* int3 */
}
@@ -568,7 +582,7 @@ static void emit_return(u8 **pprog, u8 *ip)
emit_jump(&prog, x86_return_thunk, ip);
} else {
EMIT1(0xC3); /* ret */
- if (IS_ENABLED(CONFIG_SLS))
+ if (IS_ENABLED(CONFIG_MITIGATION_SLS))
EMIT1(0xCC); /* int3 */
}
@@ -645,6 +659,8 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
pop_r12(&prog);
} else {
pop_callee_regs(&prog, callee_regs_used);
+ if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
+ pop_r12(&prog);
}
EMIT1(0x58); /* pop rax */
@@ -704,6 +720,8 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
pop_r12(&prog);
} else {
pop_callee_regs(&prog, callee_regs_used);
+ if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
+ pop_r12(&prog);
}
EMIT1(0x58); /* pop rax */
@@ -887,6 +905,18 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
*pprog = prog;
}
+static void emit_insn_suffix_SIB(u8 **pprog, u32 ptr_reg, u32 val_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ if (is_imm8(off)) {
+ EMIT3(add_2reg(0x44, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
+ } else {
+ EMIT2_off32(add_2reg(0x84, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
+ }
+ *pprog = prog;
+}
+
/*
* Emit a REX byte if it will be necessary to address these registers
*/
@@ -968,6 +998,37 @@ static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
*pprog = prog;
}
+static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* movzx rax, byte ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB6);
+ break;
+ case BPF_H:
+ /* movzx rax, word ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB7);
+ break;
+ case BPF_W:
+ /* mov eax, dword ptr [rax + r12 + off] */
+ EMIT2(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x8B);
+ break;
+ case BPF_DW:
+ /* mov rax, qword ptr [rax + r12 + off] */
+ EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x8B);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off);
+ *pprog = prog;
+}
+
+static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
/* STX: *(u8*)(dst_reg + off) = src_reg */
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
@@ -1002,6 +1063,71 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
*pprog = prog;
}
+/* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */
+static void emit_stx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* mov byte ptr [rax + r12 + off], al */
+ EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x88);
+ break;
+ case BPF_H:
+ /* mov word ptr [rax + r12 + off], ax */
+ EMIT3(0x66, add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
+ break;
+ case BPF_W:
+ /* mov dword ptr [rax + r12 + 1], eax */
+ EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
+ break;
+ case BPF_DW:
+ /* mov qword ptr [rax + r12 + 1], rax */
+ EMIT2(add_3mod(0x48, dst_reg, src_reg, index_reg), 0x89);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off);
+ *pprog = prog;
+}
+
+static void emit_stx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ emit_stx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
+/* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */
+static void emit_st_index(u8 **pprog, u32 size, u32 dst_reg, u32 index_reg, int off, int imm)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* mov byte ptr [rax + r12 + off], imm8 */
+ EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC6);
+ break;
+ case BPF_H:
+ /* mov word ptr [rax + r12 + off], imm16 */
+ EMIT3(0x66, add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
+ break;
+ case BPF_W:
+ /* mov dword ptr [rax + r12 + 1], imm32 */
+ EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
+ break;
+ case BPF_DW:
+ /* mov qword ptr [rax + r12 + 1], imm32 */
+ EMIT2(add_3mod(0x48, dst_reg, 0, index_reg), 0xC7);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, dst_reg, 0, index_reg, off);
+ EMIT(imm, bpf_size_to_x86_bytes(size));
+ *pprog = prog;
+}
+
+static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
+{
+ emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
+}
+
static int emit_atomic(u8 **pprog, u8 atomic_op,
u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
{
@@ -1043,12 +1169,15 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
return 0;
}
+#define DONT_CLEAR 1
+
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
u32 reg = x->fixup >> 8;
/* jump over faulting load and clear dest register */
- *(unsigned long *)((void *)regs + reg) = 0;
+ if (reg != DONT_CLEAR)
+ *(unsigned long *)((void *)regs + reg) = 0;
regs->ip += x->fixup & 0xff;
return true;
}
@@ -1147,11 +1276,15 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
+ u64 arena_vm_start, user_vm_start;
int i, excnt = 0;
int ilen, proglen = 0;
u8 *prog = temp;
int err;
+ arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
+ user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
+
detect_reg_usage(insn, insn_cnt, callee_regs_used,
&tail_call_seen);
@@ -1172,8 +1305,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
push_r12(&prog);
push_callee_regs(&prog, all_callee_regs_used);
} else {
+ if (arena_vm_start)
+ push_r12(&prog);
push_callee_regs(&prog, callee_regs_used);
}
+ if (arena_vm_start)
+ emit_mov_imm64(&prog, X86_REG_R12,
+ arena_vm_start >> 32, (u32) arena_vm_start);
ilen = prog - temp;
if (rw_image)
@@ -1213,6 +1351,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
break;
case BPF_ALU64 | BPF_MOV | BPF_X:
+ if (insn->off == BPF_ADDR_SPACE_CAST &&
+ insn->imm == 1U << 16) {
+ if (dst_reg != src_reg)
+ /* 32-bit mov */
+ emit_mov_reg(&prog, false, dst_reg, src_reg);
+ /* shl dst_reg, 32 */
+ maybe_emit_1mod(&prog, dst_reg, true);
+ EMIT3(0xC1, add_1reg(0xE0, dst_reg), 32);
+
+ /* or dst_reg, user_vm_start */
+ maybe_emit_1mod(&prog, dst_reg, true);
+ if (is_axreg(dst_reg))
+ EMIT1_off32(0x0D, user_vm_start >> 32);
+ else
+ EMIT2_off32(0x81, add_1reg(0xC8, dst_reg), user_vm_start >> 32);
+
+ /* rol dst_reg, 32 */
+ maybe_emit_1mod(&prog, dst_reg, true);
+ EMIT3(0xC1, add_1reg(0xC0, dst_reg), 32);
+
+ /* xor r11, r11 */
+ EMIT3(0x4D, 0x31, 0xDB);
+
+ /* test dst_reg32, dst_reg32; check if lower 32-bit are zero */
+ maybe_emit_mod(&prog, dst_reg, dst_reg, false);
+ EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
+
+ /* cmove r11, dst_reg; if so, set dst_reg to zero */
+ /* WARNING: Intel swapped src/dst register encoding in CMOVcc !!! */
+ maybe_emit_mod(&prog, AUX_REG, dst_reg, true);
+ EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg));
+ break;
+ }
+ fallthrough;
case BPF_ALU | BPF_MOV | BPF_X:
if (insn->off == 0)
emit_mov_reg(&prog,
@@ -1564,6 +1736,56 @@ st: if (is_imm8(insn->off))
emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
break;
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
+ start_of_ldx = prog;
+ emit_st_r12(&prog, BPF_SIZE(insn->code), dst_reg, insn->off, insn->imm);
+ goto populate_extable;
+
+ /* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+ start_of_ldx = prog;
+ if (BPF_CLASS(insn->code) == BPF_LDX)
+ emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ else
+ emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+populate_extable:
+ {
+ struct exception_table_entry *ex;
+ u8 *_insn = image + proglen + (start_of_ldx - temp);
+ s64 delta;
+
+ if (!bpf_prog->aux->extable)
+ break;
+
+ if (excnt >= bpf_prog->aux->num_exentries) {
+ pr_err("mem32 extable bug\n");
+ return -EFAULT;
+ }
+ ex = &bpf_prog->aux->extable[excnt++];
+
+ delta = _insn - (u8 *)&ex->insn;
+ /* switch ex to rw buffer for writes */
+ ex = (void *)rw_image + ((void *)ex - (void *)image);
+
+ ex->insn = delta;
+
+ ex->data = EX_TYPE_BPF;
+
+ ex->fixup = (prog - start_of_ldx) |
+ ((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
+ }
+ break;
+
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
@@ -2036,6 +2258,8 @@ emit_jmp:
pop_r12(&prog);
} else {
pop_callee_regs(&prog, callee_regs_used);
+ if (arena_vm_start)
+ pop_r12(&prog);
}
EMIT1(0xC9); /* leave */
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
@@ -3242,3 +3466,13 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
BUG_ON(ret < 0);
}
}
+
+bool bpf_jit_supports_arena(void)
+{
+ return true;
+}
+
+bool bpf_jit_supports_ptr_xchg(void)
+{
+ return true;
+}
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index b18ce19981ec..c10083a8e68e 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1273,7 +1273,7 @@ static int emit_jmp_edx(u8 **pprog, u8 *ip)
u8 *prog = *pprog;
int cnt = 0;
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
#else
EMIT2(0xFF, 0xE2);
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 40745664d92f..f32451bdcfdd 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -135,13 +135,13 @@ static void sdv_pci_init(void)
*/
void __init x86_ce4100_early_setup(void)
{
- x86_init.oem.arch_setup = sdv_arch_setup;
- x86_init.resources.probe_roms = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
- x86_init.pci.init = ce4100_pci_init;
- x86_init.pci.init_irq = sdv_pci_init;
+ x86_init.oem.arch_setup = sdv_arch_setup;
+ x86_init.resources.probe_roms = x86_init_noop;
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.mpparse.early_parse_smp_cfg = x86_init_noop;
+ x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config;
+ x86_init.pci.init = ce4100_pci_init;
+ x86_init.pci.init_irq = sdv_pci_init;
/*
* By default, the reboot method is ACPI which is supported by the
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index e9f99c56f3ce..f090ec972d7b 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -950,3 +950,8 @@ umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n)
}
return attr->mode;
}
+
+enum efi_secureboot_mode __x86_ima_efi_boot_mode(void)
+{
+ return boot_params.secure_boot;
+}
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index f4592dc7a1c1..7be71c2cdc83 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -118,7 +118,8 @@ void __init x86_intel_mid_early_setup(void)
machine_ops.emergency_restart = intel_mid_reboot;
/* Avoid searching for BIOS MP tables */
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.mpparse.early_parse_smp_cfg = x86_init_noop;
+ x86_init.mpparse.parse_smp_cfg = x86_init_noop;
set_bit(MP_BUS_ISA, mp_bus_not_pci);
}
diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c
index 00a92cb2c814..944e0290f2c0 100644
--- a/arch/x86/platform/pvh/enlighten.c
+++ b/arch/x86/platform/pvh/enlighten.c
@@ -3,6 +3,7 @@
#include <xen/hvc-console.h>
+#include <asm/bootparam.h>
#include <asm/io_apic.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 08aa0f25f12a..bc31863c5ee6 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -61,7 +61,7 @@ ifdef CONFIG_STACKPROTECTOR_STRONG
PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong
endif
-ifdef CONFIG_RETPOLINE
+ifdef CONFIG_MITIGATION_RETPOLINE
PURGATORY_CFLAGS_REMOVE += $(RETPOLINE_CFLAGS)
endif
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index c9f76fae902e..14d9c7daf90f 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -37,13 +37,15 @@
.text
.code16
-.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0
+.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0 lock_rip=0
/*
* Make sure only one CPU fiddles with the realmode stack
*/
.Llock_rm\@:
.if \lock_pa
lock btsl $0, pa_tr_lock
+ .elseif \lock_rip
+ lock btsl $0, tr_lock(%rip)
.else
lock btsl $0, tr_lock
.endif
@@ -220,6 +222,35 @@ SYM_CODE_START(trampoline_start64)
lidt tr_idt(%rip)
lgdt tr_gdt64(%rip)
+ /* Check if paging mode has to be changed */
+ movq %cr4, %rax
+ xorl tr_cr4(%rip), %eax
+ testl $X86_CR4_LA57, %eax
+ jnz .L_switch_paging
+
+ /* Paging mode is correct proceed in 64-bit mode */
+
+ LOCK_AND_LOAD_REALMODE_ESP lock_rip=1
+
+ movw $__KERNEL_DS, %dx
+ movl %edx, %ss
+ addl $pa_real_mode_base, %esp
+ movl %edx, %ds
+ movl %edx, %es
+ movl %edx, %fs
+ movl %edx, %gs
+
+ movl $pa_trampoline_pgd, %eax
+ movq %rax, %cr3
+
+ pushq $__KERNEL_CS
+ pushq tr_start(%rip)
+ lretq
+.L_switch_paging:
+ /*
+ * To switch between 4- and 5-level paging modes, it is necessary
+ * to disable paging. This must be done in the compatibility mode.
+ */
ljmpl *tr_compat(%rip)
SYM_CODE_END(trampoline_start64)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index a3bae2b24626..b029fb81ebee 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -653,6 +653,14 @@ static void print_absolute_relocs(void)
if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) {
continue;
}
+ /*
+ * Do not perform relocations in .notes section; any
+ * values there are meant for pre-boot consumption (e.g.
+ * startup_xen).
+ */
+ if (sec_applies->shdr.sh_type == SHT_NOTE) {
+ continue;
+ }
sh_symtab = sec_symtab->symtab;
sym_strtab = sec_symtab->link->strtab;
for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) {
diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile
new file mode 100644
index 000000000000..ef2a31bdcc70
--- /dev/null
+++ b/arch/x86/virt/svm/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_KVM_AMD_SEV) += sev.o
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
new file mode 100644
index 000000000000..cffe1157a90a
--- /dev/null
+++ b/arch/x86/virt/svm/sev.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD SVM-SEV Host Support.
+ *
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Author: Ashish Kalra <ashish.kalra@amd.com>
+ *
+ */
+
+#include <linux/cc_platform.h>
+#include <linux/printk.h>
+#include <linux/mm_types.h>
+#include <linux/set_memory.h>
+#include <linux/memblock.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/cpumask.h>
+#include <linux/iommu.h>
+#include <linux/amd-iommu.h>
+
+#include <asm/sev.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/svm.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+#include <asm/apic.h>
+#include <asm/cpuid.h>
+#include <asm/cmdline.h>
+#include <asm/iommu.h>
+
+/*
+ * The RMP entry format is not architectural. The format is defined in PPR
+ * Family 19h Model 01h, Rev B1 processor.
+ */
+struct rmpentry {
+ union {
+ struct {
+ u64 assigned : 1,
+ pagesize : 1,
+ immutable : 1,
+ rsvd1 : 9,
+ gpa : 39,
+ asid : 10,
+ vmsa : 1,
+ validated : 1,
+ rsvd2 : 1;
+ };
+ u64 lo;
+ };
+ u64 hi;
+} __packed;
+
+/*
+ * The first 16KB from the RMP_BASE is used by the processor for the
+ * bookkeeping, the range needs to be added during the RMP entry lookup.
+ */
+#define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000
+
+/* Mask to apply to a PFN to get the first PFN of a 2MB page */
+#define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
+
+static u64 probed_rmp_base, probed_rmp_size;
+static struct rmpentry *rmptable __ro_after_init;
+static u64 rmptable_max_pfn __ro_after_init;
+
+static LIST_HEAD(snp_leaked_pages_list);
+static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
+
+static unsigned long snp_nr_leaked_pages;
+
+#undef pr_fmt
+#define pr_fmt(fmt) "SEV-SNP: " fmt
+
+static int __mfd_enable(unsigned int cpu)
+{
+ u64 val;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return 0;
+
+ rdmsrl(MSR_AMD64_SYSCFG, val);
+
+ val |= MSR_AMD64_SYSCFG_MFDM;
+
+ wrmsrl(MSR_AMD64_SYSCFG, val);
+
+ return 0;
+}
+
+static __init void mfd_enable(void *arg)
+{
+ __mfd_enable(smp_processor_id());
+}
+
+static int __snp_enable(unsigned int cpu)
+{
+ u64 val;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return 0;
+
+ rdmsrl(MSR_AMD64_SYSCFG, val);
+
+ val |= MSR_AMD64_SYSCFG_SNP_EN;
+ val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN;
+
+ wrmsrl(MSR_AMD64_SYSCFG, val);
+
+ return 0;
+}
+
+static __init void snp_enable(void *arg)
+{
+ __snp_enable(smp_processor_id());
+}
+
+#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
+
+bool snp_probe_rmptable_info(void)
+{
+ u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end;
+
+ rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
+ rdmsrl(MSR_AMD64_RMP_END, rmp_end);
+
+ if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) {
+ pr_err("Memory for the RMP table has not been reserved by BIOS\n");
+ return false;
+ }
+
+ if (rmp_base > rmp_end) {
+ pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end);
+ return false;
+ }
+
+ rmp_sz = rmp_end - rmp_base + 1;
+
+ /*
+ * Calculate the amount the memory that must be reserved by the BIOS to
+ * address the whole RAM, including the bookkeeping area. The RMP itself
+ * must also be covered.
+ */
+ max_rmp_pfn = max_pfn;
+ if (PHYS_PFN(rmp_end) > max_pfn)
+ max_rmp_pfn = PHYS_PFN(rmp_end);
+
+ calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ;
+
+ if (calc_rmp_sz > rmp_sz) {
+ pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
+ calc_rmp_sz, rmp_sz);
+ return false;
+ }
+
+ probed_rmp_base = rmp_base;
+ probed_rmp_size = rmp_sz;
+
+ pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n",
+ probed_rmp_base, probed_rmp_base + probed_rmp_size - 1);
+
+ return true;
+}
+
+/*
+ * Do the necessary preparations which are verified by the firmware as
+ * described in the SNP_INIT_EX firmware command description in the SNP
+ * firmware ABI spec.
+ */
+static int __init snp_rmptable_init(void)
+{
+ void *rmptable_start;
+ u64 rmptable_size;
+ u64 val;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return 0;
+
+ if (!amd_iommu_snp_en)
+ return 0;
+
+ if (!probed_rmp_size)
+ goto nosnp;
+
+ rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB);
+ if (!rmptable_start) {
+ pr_err("Failed to map RMP table\n");
+ return 1;
+ }
+
+ /*
+ * Check if SEV-SNP is already enabled, this can happen in case of
+ * kexec boot.
+ */
+ rdmsrl(MSR_AMD64_SYSCFG, val);
+ if (val & MSR_AMD64_SYSCFG_SNP_EN)
+ goto skip_enable;
+
+ memset(rmptable_start, 0, probed_rmp_size);
+
+ /* Flush the caches to ensure that data is written before SNP is enabled. */
+ wbinvd_on_all_cpus();
+
+ /* MtrrFixDramModEn must be enabled on all the CPUs prior to enabling SNP. */
+ on_each_cpu(mfd_enable, NULL, 1);
+
+ on_each_cpu(snp_enable, NULL, 1);
+
+skip_enable:
+ rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ;
+ rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
+
+ rmptable = (struct rmpentry *)rmptable_start;
+ rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1;
+
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
+
+ /*
+ * Setting crash_kexec_post_notifiers to 'true' to ensure that SNP panic
+ * notifier is invoked to do SNP IOMMU shutdown before kdump.
+ */
+ crash_kexec_post_notifiers = true;
+
+ return 0;
+
+nosnp:
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ return -ENOSYS;
+}
+
+/*
+ * This must be called after the IOMMU has been initialized.
+ */
+device_initcall(snp_rmptable_init);
+
+static struct rmpentry *get_rmpentry(u64 pfn)
+{
+ if (WARN_ON_ONCE(pfn > rmptable_max_pfn))
+ return ERR_PTR(-EFAULT);
+
+ return &rmptable[pfn];
+}
+
+static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
+{
+ struct rmpentry *large_entry, *entry;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return ERR_PTR(-ENODEV);
+
+ entry = get_rmpentry(pfn);
+ if (IS_ERR(entry))
+ return entry;
+
+ /*
+ * Find the authoritative RMP entry for a PFN. This can be either a 4K
+ * RMP entry or a special large RMP entry that is authoritative for a
+ * whole 2M area.
+ */
+ large_entry = get_rmpentry(pfn & PFN_PMD_MASK);
+ if (IS_ERR(large_entry))
+ return large_entry;
+
+ *level = RMP_TO_PG_LEVEL(large_entry->pagesize);
+
+ return entry;
+}
+
+int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level)
+{
+ struct rmpentry *e;
+
+ e = __snp_lookup_rmpentry(pfn, level);
+ if (IS_ERR(e))
+ return PTR_ERR(e);
+
+ *assigned = !!e->assigned;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
+
+/*
+ * Dump the raw RMP entry for a particular PFN. These bits are documented in the
+ * PPR for a particular CPU model and provide useful information about how a
+ * particular PFN is being utilized by the kernel/firmware at the time certain
+ * unexpected events occur, such as RMP faults.
+ */
+static void dump_rmpentry(u64 pfn)
+{
+ u64 pfn_i, pfn_end;
+ struct rmpentry *e;
+ int level;
+
+ e = __snp_lookup_rmpentry(pfn, &level);
+ if (IS_ERR(e)) {
+ pr_err("Failed to read RMP entry for PFN 0x%llx, error %ld\n",
+ pfn, PTR_ERR(e));
+ return;
+ }
+
+ if (e->assigned) {
+ pr_info("PFN 0x%llx, RMP entry: [0x%016llx - 0x%016llx]\n",
+ pfn, e->lo, e->hi);
+ return;
+ }
+
+ /*
+ * If the RMP entry for a particular PFN is not in an assigned state,
+ * then it is sometimes useful to get an idea of whether or not any RMP
+ * entries for other PFNs within the same 2MB region are assigned, since
+ * those too can affect the ability to access a particular PFN in
+ * certain situations, such as when the PFN is being accessed via a 2MB
+ * mapping in the host page table.
+ */
+ pfn_i = ALIGN_DOWN(pfn, PTRS_PER_PMD);
+ pfn_end = pfn_i + PTRS_PER_PMD;
+
+ pr_info("PFN 0x%llx unassigned, dumping non-zero entries in 2M PFN region: [0x%llx - 0x%llx]\n",
+ pfn, pfn_i, pfn_end);
+
+ while (pfn_i < pfn_end) {
+ e = __snp_lookup_rmpentry(pfn_i, &level);
+ if (IS_ERR(e)) {
+ pr_err("Error %ld reading RMP entry for PFN 0x%llx\n",
+ PTR_ERR(e), pfn_i);
+ pfn_i++;
+ continue;
+ }
+
+ if (e->lo || e->hi)
+ pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi);
+ pfn_i++;
+ }
+}
+
+void snp_dump_hva_rmpentry(unsigned long hva)
+{
+ unsigned long paddr;
+ unsigned int level;
+ pgd_t *pgd;
+ pte_t *pte;
+
+ pgd = __va(read_cr3_pa());
+ pgd += pgd_index(hva);
+ pte = lookup_address_in_pgd(pgd, hva, &level);
+
+ if (!pte) {
+ pr_err("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva);
+ return;
+ }
+
+ paddr = PFN_PHYS(pte_pfn(*pte)) | (hva & ~page_level_mask(level));
+ dump_rmpentry(PHYS_PFN(paddr));
+}
+
+/*
+ * PSMASH a 2MB aligned page into 4K pages in the RMP table while preserving the
+ * Validated bit.
+ */
+int psmash(u64 pfn)
+{
+ unsigned long paddr = pfn << PAGE_SHIFT;
+ int ret;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return -ENODEV;
+
+ if (!pfn_valid(pfn))
+ return -EINVAL;
+
+ /* Binutils version 2.36 supports the PSMASH mnemonic. */
+ asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
+ : "=a" (ret)
+ : "a" (paddr)
+ : "memory", "cc");
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(psmash);
+
+/*
+ * If the kernel uses a 2MB or larger directmap mapping to write to an address,
+ * and that mapping contains any 4KB pages that are set to private in the RMP
+ * table, an RMP #PF will trigger and cause a host crash. Hypervisor code that
+ * owns the PFNs being transitioned will never attempt such a write, but other
+ * kernel tasks writing to other PFNs in the range may trigger these checks
+ * inadvertently due a large directmap mapping that happens to overlap such a
+ * PFN.
+ *
+ * Prevent this by splitting any 2MB+ mappings that might end up containing a
+ * mix of private/shared PFNs as a result of a subsequent RMPUPDATE for the
+ * PFN/rmp_level passed in.
+ *
+ * Note that there is no attempt here to scan all the RMP entries for the 2MB
+ * physical range, since it would only be worthwhile in determining if a
+ * subsequent RMPUPDATE for a 4KB PFN would result in all the entries being of
+ * the same shared/private state, thus avoiding the need to split the mapping.
+ * But that would mean the entries are currently in a mixed state, and so the
+ * mapping would have already been split as a result of prior transitions.
+ * And since the 4K split is only done if the mapping is 2MB+, and there isn't
+ * currently a mechanism in place to restore 2MB+ mappings, such a check would
+ * not provide any usable benefit.
+ *
+ * More specifics on how these checks are carried out can be found in APM
+ * Volume 2, "RMP and VMPL Access Checks".
+ */
+static int adjust_direct_map(u64 pfn, int rmp_level)
+{
+ unsigned long vaddr;
+ unsigned int level;
+ int npages, ret;
+ pte_t *pte;
+
+ /*
+ * pfn_to_kaddr() will return a vaddr only within the direct
+ * map range.
+ */
+ vaddr = (unsigned long)pfn_to_kaddr(pfn);
+
+ /* Only 4KB/2MB RMP entries are supported by current hardware. */
+ if (WARN_ON_ONCE(rmp_level > PG_LEVEL_2M))
+ return -EINVAL;
+
+ if (!pfn_valid(pfn))
+ return -EINVAL;
+
+ if (rmp_level == PG_LEVEL_2M &&
+ (!IS_ALIGNED(pfn, PTRS_PER_PMD) || !pfn_valid(pfn + PTRS_PER_PMD - 1)))
+ return -EINVAL;
+
+ /*
+ * If an entire 2MB physical range is being transitioned, then there is
+ * no risk of RMP #PFs due to write accesses from overlapping mappings,
+ * since even accesses from 1GB mappings will be treated as 2MB accesses
+ * as far as RMP table checks are concerned.
+ */
+ if (rmp_level == PG_LEVEL_2M)
+ return 0;
+
+ pte = lookup_address(vaddr, &level);
+ if (!pte || pte_none(*pte))
+ return 0;
+
+ if (level == PG_LEVEL_4K)
+ return 0;
+
+ npages = page_level_size(rmp_level) / PAGE_SIZE;
+ ret = set_memory_4k(vaddr, npages);
+ if (ret)
+ pr_warn("Failed to split direct map for PFN 0x%llx, ret: %d\n",
+ pfn, ret);
+
+ return ret;
+}
+
+/*
+ * It is expected that those operations are seldom enough so that no mutual
+ * exclusion of updaters is needed and thus the overlap error condition below
+ * should happen very rarely and would get resolved relatively quickly by
+ * the firmware.
+ *
+ * If not, one could consider introducing a mutex or so here to sync concurrent
+ * RMP updates and thus diminish the amount of cases where firmware needs to
+ * lock 2M ranges to protect against concurrent updates.
+ *
+ * The optimal solution would be range locking to avoid locking disjoint
+ * regions unnecessarily but there's no support for that yet.
+ */
+static int rmpupdate(u64 pfn, struct rmp_state *state)
+{
+ unsigned long paddr = pfn << PAGE_SHIFT;
+ int ret, level;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return -ENODEV;
+
+ level = RMP_TO_PG_LEVEL(state->pagesize);
+
+ if (adjust_direct_map(pfn, level))
+ return -EFAULT;
+
+ do {
+ /* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
+ asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
+ : "=a" (ret)
+ : "a" (paddr), "c" ((unsigned long)state)
+ : "memory", "cc");
+ } while (ret == RMPUPDATE_FAIL_OVERLAP);
+
+ if (ret) {
+ pr_err("RMPUPDATE failed for PFN %llx, pg_level: %d, ret: %d\n",
+ pfn, level, ret);
+ dump_rmpentry(pfn);
+ dump_stack();
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/* Transition a page to guest-owned/private state in the RMP table. */
+int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable)
+{
+ struct rmp_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.assigned = 1;
+ state.asid = asid;
+ state.immutable = immutable;
+ state.gpa = gpa;
+ state.pagesize = PG_LEVEL_TO_RMP(level);
+
+ return rmpupdate(pfn, &state);
+}
+EXPORT_SYMBOL_GPL(rmp_make_private);
+
+/* Transition a page to hypervisor-owned/shared state in the RMP table. */
+int rmp_make_shared(u64 pfn, enum pg_level level)
+{
+ struct rmp_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.pagesize = PG_LEVEL_TO_RMP(level);
+
+ return rmpupdate(pfn, &state);
+}
+EXPORT_SYMBOL_GPL(rmp_make_shared);
+
+void snp_leak_pages(u64 pfn, unsigned int npages)
+{
+ struct page *page = pfn_to_page(pfn);
+
+ pr_warn("Leaking PFN range 0x%llx-0x%llx\n", pfn, pfn + npages);
+
+ spin_lock(&snp_leaked_pages_list_lock);
+ while (npages--) {
+
+ /*
+ * Reuse the page's buddy list for chaining into the leaked
+ * pages list. This page should not be on a free list currently
+ * and is also unsafe to be added to a free list.
+ */
+ if (likely(!PageCompound(page)) ||
+
+ /*
+ * Skip inserting tail pages of compound page as
+ * page->buddy_list of tail pages is not usable.
+ */
+ (PageHead(page) && compound_nr(page) <= npages))
+ list_add_tail(&page->buddy_list, &snp_leaked_pages_list);
+
+ dump_rmpentry(pfn);
+ snp_nr_leaked_pages++;
+ pfn++;
+ page++;
+ }
+ spin_unlock(&snp_leaked_pages_list_lock);
+}
+EXPORT_SYMBOL_GPL(snp_leak_pages);
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 9dd5490b3318..8b045dd25196 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -33,12 +33,6 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
return 0xfd;
}
-static u32 xen_set_apic_id(u32 x)
-{
- WARN_ON(1);
- return x;
-}
-
static u32 xen_get_apic_id(u32 x)
{
return ((x)>>24) & 0xFFu;
@@ -49,20 +43,20 @@ static u32 xen_apic_read(u32 reg)
struct xen_platform_op op = {
.cmd = XENPF_get_cpuinfo,
.interface_version = XENPF_INTERFACE_VERSION,
- .u.pcpu_info.xen_cpuid = 0,
};
- int ret;
-
- /* Shouldn't need this as APIC is turned off for PV, and we only
- * get called on the bootup processor. But just in case. */
- if (!xen_initial_domain() || smp_processor_id())
- return 0;
+ int ret, cpu;
if (reg == APIC_LVR)
return 0x14;
if (reg != APIC_ID)
return 0;
+ cpu = smp_processor_id();
+ if (!xen_initial_domain())
+ return cpu ? cpuid_to_apicid[cpu] << 24 : 0;
+
+ op.u.pcpu_info.xen_cpuid = cpu;
+
ret = HYPERVISOR_platform_op(&op);
if (ret)
op.u.pcpu_info.apic_id = BAD_APICID;
@@ -110,11 +104,6 @@ static int xen_madt_oem_check(char *oem_id, char *oem_table_id)
return xen_pv_domain();
}
-static u32 xen_phys_pkg_id(u32 initial_apic_id, int index_msb)
-{
- return initial_apic_id >> index_msb;
-}
-
static u32 xen_cpu_present_to_apicid(int cpu)
{
if (cpu_present(cpu))
@@ -133,11 +122,9 @@ static struct apic xen_pv_apic __ro_after_init = {
.disable_esr = 0,
.cpu_present_to_apicid = xen_cpu_present_to_apicid,
- .phys_pkg_id = xen_phys_pkg_id, /* detect_ht */
.max_apic_id = UINT_MAX,
.get_apic_id = xen_get_apic_id,
- .set_apic_id = xen_set_apic_id,
.calc_dest_apicid = apic_flat_calc_apicid,
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 3f8c34707c50..99a68fa71dbe 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -168,7 +168,7 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
*/
xen_uninit_lock_cpu(cpu);
- if (cpu_acpi_id(cpu) != U32_MAX)
+ if (cpu_acpi_id(cpu) != CPU_ACPIID_INVALID)
per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
else
per_cpu(xen_vcpu_id, cpu) = cpu;
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index aeb33e0a3f76..ace2eb054053 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -200,6 +200,9 @@ static void __init xen_pv_init_platform(void)
xen_set_mtrr_data();
else
mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
+
+ /* Adjust nr_cpu_ids before "enumeration" happens */
+ xen_smp_count_cpus();
}
static void __init xen_pv_guest_late_init(void)
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index ada3868c02c2..9e9db601bd52 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -4,6 +4,7 @@
#include <xen/hvc-console.h>
+#include <asm/bootparam.h>
#include <asm/io_apic.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 4b0d6fff88de..935771726f9c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -65,6 +65,8 @@ int xen_smp_intr_init(unsigned int cpu)
char *resched_name, *callfunc_name, *debug_name;
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
+ if (!resched_name)
+ goto fail_mem;
per_cpu(xen_resched_irq, cpu).name = resched_name;
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
cpu,
@@ -77,6 +79,8 @@ int xen_smp_intr_init(unsigned int cpu)
per_cpu(xen_resched_irq, cpu).irq = rc;
callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
+ if (!callfunc_name)
+ goto fail_mem;
per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
cpu,
@@ -90,6 +94,9 @@ int xen_smp_intr_init(unsigned int cpu)
if (!xen_fifo_events) {
debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
+ if (!debug_name)
+ goto fail_mem;
+
per_cpu(xen_debug_irq, cpu).name = debug_name;
rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu,
xen_debug_interrupt,
@@ -101,6 +108,9 @@ int xen_smp_intr_init(unsigned int cpu)
}
callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+ if (!callfunc_name)
+ goto fail_mem;
+
per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
cpu,
@@ -114,6 +124,8 @@ int xen_smp_intr_init(unsigned int cpu)
return 0;
+ fail_mem:
+ rc = -ENOMEM;
fail:
xen_smp_intr_free(cpu);
return rc;
@@ -123,8 +135,6 @@ void __init xen_smp_cpus_done(unsigned int max_cpus)
{
if (xen_hvm_domain())
native_smp_cpus_done(max_cpus);
- else
- calculate_max_logical_packages();
}
void xen_smp_send_reschedule(int cpu)
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index c20cbb14c82b..b8efdbc693f7 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -19,6 +19,7 @@ extern void xen_smp_intr_free(unsigned int cpu);
int xen_smp_intr_init_pv(unsigned int cpu);
void xen_smp_intr_free_pv(unsigned int cpu);
+void xen_smp_count_cpus(void);
void xen_smp_cpus_done(unsigned int max_cpus);
void xen_smp_send_reschedule(int cpu);
@@ -44,6 +45,7 @@ static inline int xen_smp_intr_init_pv(unsigned int cpu)
return 0;
}
static inline void xen_smp_intr_free_pv(unsigned int cpu) {}
+static inline void xen_smp_count_cpus(void) { }
#endif /* CONFIG_SMP */
#endif
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index a0f07bbfcd6e..27d1a5b7f571 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -29,6 +29,7 @@
#include <asm/idtentry.h>
#include <asm/desc.h>
#include <asm/cpu.h>
+#include <asm/apic.h>
#include <asm/io_apic.h>
#include <xen/interface/xen.h>
@@ -73,7 +74,6 @@ static void cpu_bringup(void)
}
cpu = smp_processor_id();
smp_store_cpu_info(cpu);
- cpu_data(cpu).x86_max_cores = 1;
set_cpu_sibling_map(cpu);
speculative_store_bypass_ht_init();
@@ -149,39 +149,16 @@ int xen_smp_intr_init_pv(unsigned int cpu)
return rc;
}
-static void __init _get_smp_config(unsigned int early)
+static void __init xen_pv_smp_config(void)
{
- int i, rc;
- unsigned int subtract = 0;
-
- if (early)
- return;
-
- num_processors = 0;
- disabled_cpus = 0;
- for (i = 0; i < nr_cpu_ids; i++) {
- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0) {
- num_processors++;
- set_cpu_possible(i, true);
- } else {
- set_cpu_possible(i, false);
- set_cpu_present(i, false);
- subtract++;
- }
- }
-#ifdef CONFIG_HOTPLUG_CPU
- /* This is akin to using 'nr_cpus' on the Linux command line.
- * Which is OK as when we use 'dom0_max_vcpus=X' we can only
- * have up to X, while nr_cpu_ids is greater than X. This
- * normally is not a problem, except when CPU hotplugging
- * is involved and then there might be more than X CPUs
- * in the guest - which will not work as there is no
- * hypercall to expand the max number of VCPUs an already
- * running guest has. So cap it up to X. */
- if (subtract)
- set_nr_cpu_ids(nr_cpu_ids - subtract);
-#endif
+ u32 apicid = 0;
+ int i;
+
+ topology_register_boot_apic(apicid++);
+
+ for (i = 1; i < nr_cpu_ids; i++)
+ topology_register_apic(apicid++, CPU_ACPIID_INVALID, true);
+
/* Pretend to be a proper enumerated system */
smp_found_config = 1;
}
@@ -224,8 +201,6 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
smp_prepare_cpus_common();
- cpu_data(0).x86_max_cores = 1;
-
speculative_store_bypass_ht_init();
xen_pmu_init(0);
@@ -434,6 +409,20 @@ static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
+void __init xen_smp_count_cpus(void)
+{
+ unsigned int cpus;
+
+ for (cpus = 0; cpus < nr_cpu_ids; cpus++) {
+ if (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpus, NULL) < 0)
+ break;
+ }
+
+ pr_info("Xen PV: Detected %u vCPUS\n", cpus);
+ if (cpus < nr_cpu_ids)
+ set_nr_cpu_ids(cpus);
+}
+
static const struct smp_ops xen_smp_ops __initconst = {
.smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_pv_smp_prepare_cpus,
@@ -458,6 +447,12 @@ void __init xen_smp_init(void)
smp_ops = xen_smp_ops;
/* Avoid searching for BIOS MP tables */
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.get_smp_config = _get_smp_config;
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.mpparse.early_parse_smp_cfg = x86_init_noop;
+
+ /* XEN/PV Dom0 has halfways sane topology information via CPUID/MADT */
+ if (xen_initial_domain())
+ x86_init.mpparse.parse_smp_cfg = x86_init_noop;
+ else
+ x86_init.mpparse.parse_smp_cfg = xen_pv_smp_config;
}
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
index d97adab8420f..f7547807b0bd 100644
--- a/arch/x86/xen/vga.c
+++ b/arch/x86/xen/vga.c
@@ -2,7 +2,6 @@
#include <linux/screen_info.h>
#include <linux/init.h>
-#include <asm/bootparam.h>
#include <asm/setup.h>
#include <xen/interface/xen.h>
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 1a9cd18dfbd3..83189cf5cdce 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -28,7 +28,7 @@
* non-zero.
*/
SYM_FUNC_START(xen_irq_disable_direct)
- movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
RET
SYM_FUNC_END(xen_irq_disable_direct)
@@ -69,7 +69,7 @@ SYM_FUNC_END(check_events)
SYM_FUNC_START(xen_irq_enable_direct)
FRAME_BEGIN
/* Unmask events */
- movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
/*
* Preempt here doesn't matter because that will deal with any
@@ -78,7 +78,7 @@ SYM_FUNC_START(xen_irq_enable_direct)
*/
/* Test for pending */
- testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_pending)
jz 1f
call check_events
@@ -97,7 +97,7 @@ SYM_FUNC_END(xen_irq_enable_direct)
* x86 use opposite senses (mask vs enable).
*/
SYM_FUNC_START(xen_save_fl_direct)
- testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
setz %ah
addb %ah, %ah
RET
@@ -113,7 +113,7 @@ SYM_FUNC_END(xen_read_cr2);
SYM_FUNC_START(xen_read_cr2_direct)
FRAME_BEGIN
- _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+ _ASM_MOV PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_arch_cr2), %_ASM_AX
FRAME_END
RET
SYM_FUNC_END(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index a0ea285878db..04101b984f24 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -49,7 +49,7 @@ SYM_CODE_START(startup_xen)
ANNOTATE_NOENDBR
cld
- leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
+ leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
/* Set up %gs.
*
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 6f248d87e496..87ec35b3363b 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -44,6 +44,7 @@ config XTENSA
select HAVE_GCC_PLUGINS if GCC_VERSION >= 120000
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_PAGE_SIZE_4KB
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_STACKPROTECTOR
diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h
index c812bf85021c..46c8596259d2 100644
--- a/arch/xtensa/include/asm/jump_label.h
+++ b/arch/xtensa/include/asm/jump_label.h
@@ -13,7 +13,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
"_nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
@@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key,
* make it reachable and wrap both into a no-transform block
* to avoid any assembler interference with this.
*/
- asm_volatile_goto("1:\n\t"
+ asm goto("1:\n\t"
".begin no-transform\n\t"
"_j %l[l_yes]\n\t"
"2:\n\t"
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index a77d04972eb9..4db56ef052d2 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -22,7 +22,7 @@
* PAGE_SHIFT determines the page size
*/
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
#define PAGE_SIZE (__XTENSA_UL_CONST(1) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 178cf96ca10a..defc67909a9c 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -264,16 +264,18 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
struct proc_dir_entry *procdir)
{
char tmp[2] = { '0' + which, 0 };
- int err = -ENOMEM;
+ int err;
dev->fd = -1;
dev->filename = NULL;
spin_lock_init(&dev->lock);
dev->users = 0;
- dev->gd = blk_alloc_disk(NUMA_NO_NODE);
- if (!dev->gd)
+ dev->gd = blk_alloc_disk(NULL, NUMA_NO_NODE);
+ if (IS_ERR(dev->gd)) {
+ err = PTR_ERR(dev->gd);
goto out;
+ }
dev->gd->major = simdisk_major;
dev->gd->first_minor = which;
dev->gd->minors = SIMDISK_MINORS;
diff --git a/block/bdev.c b/block/bdev.c
index e9f1b12bd75c..e7adaaf1c219 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -49,6 +49,12 @@ struct block_device *I_BDEV(struct inode *inode)
}
EXPORT_SYMBOL(I_BDEV);
+struct block_device *file_bdev(struct file *bdev_file)
+{
+ return I_BDEV(bdev_file->f_mapping->host);
+}
+EXPORT_SYMBOL(file_bdev);
+
static void bdev_write_inode(struct block_device *bdev)
{
struct inode *inode = bdev->bd_inode;
@@ -368,24 +374,24 @@ static struct file_system_type bd_type = {
};
struct super_block *blockdev_superblock __ro_after_init;
+struct vfsmount *blockdev_mnt __ro_after_init;
EXPORT_SYMBOL_GPL(blockdev_superblock);
void __init bdev_cache_init(void)
{
int err;
- static struct vfsmount *bd_mnt __ro_after_init;
bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
+ SLAB_ACCOUNT|SLAB_PANIC),
init_once);
err = register_filesystem(&bd_type);
if (err)
panic("Cannot register bdev pseudo-fs");
- bd_mnt = kern_mount(&bd_type);
- if (IS_ERR(bd_mnt))
+ blockdev_mnt = kern_mount(&bd_type);
+ if (IS_ERR(blockdev_mnt))
panic("Cannot create bdev pseudo-fs");
- blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
+ blockdev_superblock = blockdev_mnt->mnt_sb; /* For writeback */
}
struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
@@ -696,6 +702,31 @@ out_blkdev_put:
return ret;
}
+int bdev_permission(dev_t dev, blk_mode_t mode, void *holder)
+{
+ int ret;
+
+ ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
+ MAJOR(dev), MINOR(dev),
+ ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
+ ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
+ if (ret)
+ return ret;
+
+ /* Blocking writes requires exclusive opener */
+ if (mode & BLK_OPEN_RESTRICT_WRITES && !holder)
+ return -EINVAL;
+
+ /*
+ * We're using error pointers to indicate to ->release() when we
+ * failed to open that block device. Also this doesn't make sense.
+ */
+ if (WARN_ON_ONCE(IS_ERR(holder)))
+ return -EINVAL;
+
+ return 0;
+}
+
static void blkdev_put_part(struct block_device *part)
{
struct block_device *whole = bdev_whole(part);
@@ -775,83 +806,55 @@ static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
bdev->bd_writers++;
}
-static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode)
+static void bdev_yield_write_access(struct file *bdev_file)
{
+ struct block_device *bdev;
+
if (bdev_allow_write_mounted)
return;
+ bdev = file_bdev(bdev_file);
/* Yield exclusive or shared write access. */
- if (mode & BLK_OPEN_RESTRICT_WRITES)
- bdev_unblock_writes(bdev);
- else if (mode & BLK_OPEN_WRITE)
- bdev->bd_writers--;
+ if (bdev_file->f_mode & FMODE_WRITE) {
+ if (bdev_writes_blocked(bdev))
+ bdev_unblock_writes(bdev);
+ else
+ bdev->bd_writers--;
+ }
}
/**
- * bdev_open_by_dev - open a block device by device number
- * @dev: device number of block device to open
+ * bdev_open - open a block device
+ * @bdev: block device to open
* @mode: open mode (BLK_OPEN_*)
* @holder: exclusive holder identifier
* @hops: holder operations
+ * @bdev_file: file for the block device
*
- * Open the block device described by device number @dev. If @holder is not
- * %NULL, the block device is opened with exclusive access. Exclusive opens may
- * nest for the same @holder.
- *
- * Use this interface ONLY if you really do not have anything better - i.e. when
- * you are behind a truly sucky interface and all you are given is a device
- * number. Everything else should use bdev_open_by_path().
+ * Open the block device. If @holder is not %NULL, the block device is opened
+ * with exclusive access. Exclusive opens may nest for the same @holder.
*
* CONTEXT:
* Might sleep.
*
* RETURNS:
- * Handle with a reference to the block_device on success, ERR_PTR(-errno) on
- * failure.
+ * zero on success, -errno on failure.
*/
-struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
- const struct blk_holder_ops *hops)
+int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
+ const struct blk_holder_ops *hops, struct file *bdev_file)
{
- struct bdev_handle *handle = kmalloc(sizeof(struct bdev_handle),
- GFP_KERNEL);
- struct block_device *bdev;
bool unblock_events = true;
- struct gendisk *disk;
+ struct gendisk *disk = bdev->bd_disk;
int ret;
- if (!handle)
- return ERR_PTR(-ENOMEM);
-
- ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
- MAJOR(dev), MINOR(dev),
- ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
- ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
- if (ret)
- goto free_handle;
-
- /* Blocking writes requires exclusive opener */
- if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) {
- ret = -EINVAL;
- goto free_handle;
- }
-
- bdev = blkdev_get_no_open(dev);
- if (!bdev) {
- ret = -ENXIO;
- goto free_handle;
- }
- disk = bdev->bd_disk;
-
if (holder) {
mode |= BLK_OPEN_EXCL;
ret = bd_prepare_to_claim(bdev, holder, hops);
if (ret)
- goto put_blkdev;
+ return ret;
} else {
- if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) {
- ret = -EIO;
- goto put_blkdev;
- }
+ if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL))
+ return -EIO;
}
disk_block_events(disk);
@@ -892,10 +895,16 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
if (unblock_events)
disk_unblock_events(disk);
- handle->bdev = bdev;
- handle->holder = holder;
- handle->mode = mode;
- return handle;
+
+ bdev_file->f_flags |= O_LARGEFILE;
+ bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
+ if (bdev_nowait(bdev))
+ bdev_file->f_mode |= FMODE_NOWAIT;
+ bdev_file->f_mapping = bdev->bd_inode->i_mapping;
+ bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
+ bdev_file->private_data = holder;
+
+ return 0;
put_module:
module_put(disk->fops->owner);
abort_claiming:
@@ -903,36 +912,80 @@ abort_claiming:
bd_abort_claiming(bdev, holder);
mutex_unlock(&disk->open_mutex);
disk_unblock_events(disk);
-put_blkdev:
- blkdev_put_no_open(bdev);
-free_handle:
- kfree(handle);
- return ERR_PTR(ret);
+ return ret;
}
-EXPORT_SYMBOL(bdev_open_by_dev);
-/**
- * bdev_open_by_path - open a block device by name
- * @path: path to the block device to open
- * @mode: open mode (BLK_OPEN_*)
- * @holder: exclusive holder identifier
- * @hops: holder operations
- *
- * Open the block device described by the device file at @path. If @holder is
- * not %NULL, the block device is opened with exclusive access. Exclusive opens
- * may nest for the same @holder.
- *
- * CONTEXT:
- * Might sleep.
+/*
+ * If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk
+ * associated with the floppy driver where it has allowed ioctls if the
+ * file was opened for writing, but does not allow reads or writes.
+ * Make sure that this quirk is reflected in @f_flags.
*
- * RETURNS:
- * Handle with a reference to the block_device on success, ERR_PTR(-errno) on
- * failure.
+ * It can also happen if a block device is opened as O_RDWR | O_WRONLY.
*/
-struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
- void *holder, const struct blk_holder_ops *hops)
+static unsigned blk_to_file_flags(blk_mode_t mode)
+{
+ unsigned int flags = 0;
+
+ if ((mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) ==
+ (BLK_OPEN_READ | BLK_OPEN_WRITE))
+ flags |= O_RDWR;
+ else if (mode & BLK_OPEN_WRITE_IOCTL)
+ flags |= O_RDWR | O_WRONLY;
+ else if (mode & BLK_OPEN_WRITE)
+ flags |= O_WRONLY;
+ else if (mode & BLK_OPEN_READ)
+ flags |= O_RDONLY; /* homeopathic, because O_RDONLY is 0 */
+ else
+ WARN_ON_ONCE(true);
+
+ if (mode & BLK_OPEN_NDELAY)
+ flags |= O_NDELAY;
+
+ return flags;
+}
+
+struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
+ const struct blk_holder_ops *hops)
{
- struct bdev_handle *handle;
+ struct file *bdev_file;
+ struct block_device *bdev;
+ unsigned int flags;
+ int ret;
+
+ ret = bdev_permission(dev, mode, holder);
+ if (ret)
+ return ERR_PTR(ret);
+
+ bdev = blkdev_get_no_open(dev);
+ if (!bdev)
+ return ERR_PTR(-ENXIO);
+
+ flags = blk_to_file_flags(mode);
+ bdev_file = alloc_file_pseudo_noaccount(bdev->bd_inode,
+ blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops);
+ if (IS_ERR(bdev_file)) {
+ blkdev_put_no_open(bdev);
+ return bdev_file;
+ }
+ ihold(bdev->bd_inode);
+
+ ret = bdev_open(bdev, mode, holder, hops, bdev_file);
+ if (ret) {
+ /* We failed to open the block device. Let ->release() know. */
+ bdev_file->private_data = ERR_PTR(ret);
+ fput(bdev_file);
+ return ERR_PTR(ret);
+ }
+ return bdev_file;
+}
+EXPORT_SYMBOL(bdev_file_open_by_dev);
+
+struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
+ void *holder,
+ const struct blk_holder_ops *hops)
+{
+ struct file *file;
dev_t dev;
int error;
@@ -940,22 +993,28 @@ struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
if (error)
return ERR_PTR(error);
- handle = bdev_open_by_dev(dev, mode, holder, hops);
- if (!IS_ERR(handle) && (mode & BLK_OPEN_WRITE) &&
- bdev_read_only(handle->bdev)) {
- bdev_release(handle);
- return ERR_PTR(-EACCES);
+ file = bdev_file_open_by_dev(dev, mode, holder, hops);
+ if (!IS_ERR(file) && (mode & BLK_OPEN_WRITE)) {
+ if (bdev_read_only(file_bdev(file))) {
+ fput(file);
+ file = ERR_PTR(-EACCES);
+ }
}
- return handle;
+ return file;
}
-EXPORT_SYMBOL(bdev_open_by_path);
+EXPORT_SYMBOL(bdev_file_open_by_path);
-void bdev_release(struct bdev_handle *handle)
+void bdev_release(struct file *bdev_file)
{
- struct block_device *bdev = handle->bdev;
+ struct block_device *bdev = file_bdev(bdev_file);
+ void *holder = bdev_file->private_data;
struct gendisk *disk = bdev->bd_disk;
+ /* We failed to open that block device. */
+ if (IS_ERR(holder))
+ goto put_no_open;
+
/*
* Sync early if it looks like we're the last one. If someone else
* opens the block device between now and the decrement of bd_openers
@@ -967,10 +1026,10 @@ void bdev_release(struct bdev_handle *handle)
sync_blockdev(bdev);
mutex_lock(&disk->open_mutex);
- bdev_yield_write_access(bdev, handle->mode);
+ bdev_yield_write_access(bdev_file);
- if (handle->holder)
- bd_end_claim(bdev, handle->holder);
+ if (holder)
+ bd_end_claim(bdev, holder);
/*
* Trigger event checking and tell drivers to flush MEDIA_CHANGE
@@ -986,10 +1045,9 @@ void bdev_release(struct bdev_handle *handle)
mutex_unlock(&disk->open_mutex);
module_put(disk->fops->owner);
+put_no_open:
blkdev_put_no_open(bdev);
- kfree(handle);
}
-EXPORT_SYMBOL(bdev_release);
/**
* lookup_bdev() - Look up a struct block_device by name.
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 2c90e5de0acd..d442ee358fc2 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -127,7 +127,7 @@ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
if (!bfqg_stats_waiting(stats))
return;
- now = ktime_get_ns();
+ now = blk_time_get_ns();
if (now > stats->start_group_wait_time)
bfq_stat_add(&stats->group_wait_time,
now - stats->start_group_wait_time);
@@ -144,7 +144,7 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
return;
if (bfqg == curr_bfqg)
return;
- stats->start_group_wait_time = ktime_get_ns();
+ stats->start_group_wait_time = blk_time_get_ns();
bfqg_stats_mark_waiting(stats);
}
@@ -156,7 +156,7 @@ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
if (!bfqg_stats_empty(stats))
return;
- now = ktime_get_ns();
+ now = blk_time_get_ns();
if (now > stats->start_empty_time)
bfq_stat_add(&stats->empty_time,
now - stats->start_empty_time);
@@ -183,7 +183,7 @@ void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
if (bfqg_stats_empty(stats))
return;
- stats->start_empty_time = ktime_get_ns();
+ stats->start_empty_time = blk_time_get_ns();
bfqg_stats_mark_empty(stats);
}
@@ -192,7 +192,7 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
struct bfqg_stats *stats = &bfqg->stats;
if (bfqg_stats_idling(stats)) {
- u64 now = ktime_get_ns();
+ u64 now = blk_time_get_ns();
if (now > stats->start_idle_time)
bfq_stat_add(&stats->idle_time,
@@ -205,7 +205,7 @@ void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
{
struct bfqg_stats *stats = &bfqg->stats;
- stats->start_idle_time = ktime_get_ns();
+ stats->start_idle_time = blk_time_get_ns();
bfqg_stats_mark_idling(stats);
}
@@ -242,7 +242,7 @@ void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
u64 io_start_time_ns, blk_opf_t opf)
{
struct bfqg_stats *stats = &bfqg->stats;
- u64 now = ktime_get_ns();
+ u64 now = blk_time_get_ns();
if (now > io_start_time_ns)
blkg_rwstat_add(&stats->service_time, opf,
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 3cce6de464a7..4b88a54a9b76 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1005,7 +1005,7 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq,
rq = rq_entry_fifo(bfqq->fifo.next);
- if (rq == last || ktime_get_ns() < rq->fifo_time)
+ if (rq == last || blk_time_get_ns() < rq->fifo_time)
return NULL;
bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq);
@@ -1829,7 +1829,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
* bfq_bfqq_update_budg_for_activation for
* details on the usage of the next variable.
*/
- arrived_in_time = ktime_get_ns() <=
+ arrived_in_time = blk_time_get_ns() <=
bfqq->ttime.last_end_request +
bfqd->bfq_slice_idle * 3;
unsigned int act_idx = bfq_actuator_index(bfqd, rq->bio);
@@ -2208,7 +2208,7 @@ static void bfq_add_request(struct request *rq)
struct request *next_rq, *prev;
unsigned int old_wr_coeff = bfqq->wr_coeff;
bool interactive = false;
- u64 now_ns = ktime_get_ns();
+ u64 now_ns = blk_time_get_ns();
bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
bfqq->queued[rq_is_sync(rq)]++;
@@ -2262,7 +2262,7 @@ static void bfq_add_request(struct request *rq)
bfqd->rqs_injected && bfqd->tot_rq_in_driver > 0)) &&
time_is_before_eq_jiffies(bfqq->decrease_time_jif +
msecs_to_jiffies(10))) {
- bfqd->last_empty_occupied_ns = ktime_get_ns();
+ bfqd->last_empty_occupied_ns = blk_time_get_ns();
/*
* Start the state machine for measuring the
* total service time of rq: setting
@@ -3294,7 +3294,7 @@ static void bfq_set_budget_timeout(struct bfq_data *bfqd,
else
timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
- bfqd->last_budget_start = ktime_get();
+ bfqd->last_budget_start = blk_time_get();
bfqq->budget_timeout = jiffies +
bfqd->bfq_timeout * timeout_coeff;
@@ -3394,7 +3394,7 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
else if (bfqq->wr_coeff > 1)
sl = max_t(u32, sl, 20ULL * NSEC_PER_MSEC);
- bfqd->last_idling_start = ktime_get();
+ bfqd->last_idling_start = blk_time_get();
bfqd->last_idling_start_jiffies = jiffies;
hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl),
@@ -3433,7 +3433,7 @@ static void bfq_reset_rate_computation(struct bfq_data *bfqd,
struct request *rq)
{
if (rq != NULL) { /* new rq dispatch now, reset accordingly */
- bfqd->last_dispatch = bfqd->first_dispatch = ktime_get_ns();
+ bfqd->last_dispatch = bfqd->first_dispatch = blk_time_get_ns();
bfqd->peak_rate_samples = 1;
bfqd->sequential_samples = 0;
bfqd->tot_sectors_dispatched = bfqd->last_rq_max_size =
@@ -3590,7 +3590,7 @@ reset_computation:
*/
static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq)
{
- u64 now_ns = ktime_get_ns();
+ u64 now_ns = blk_time_get_ns();
if (bfqd->peak_rate_samples == 0) { /* first dispatch */
bfq_log(bfqd, "update_peak_rate: goto reset, samples %d",
@@ -4162,7 +4162,7 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (compensate)
delta_ktime = bfqd->last_idling_start;
else
- delta_ktime = ktime_get();
+ delta_ktime = blk_time_get();
delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start);
delta_usecs = ktime_to_us(delta_ktime);
@@ -5591,7 +5591,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct bfq_io_cq *bic, pid_t pid, int is_sync,
unsigned int act_idx)
{
- u64 now_ns = ktime_get_ns();
+ u64 now_ns = blk_time_get_ns();
bfqq->actuator_idx = act_idx;
RB_CLEAR_NODE(&bfqq->entity.rb_node);
@@ -5903,7 +5903,7 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd,
*/
if (bfqq->dispatched || bfq_bfqq_busy(bfqq))
return;
- elapsed = ktime_get_ns() - bfqq->ttime.last_end_request;
+ elapsed = blk_time_get_ns() - bfqq->ttime.last_end_request;
elapsed = min_t(u64, elapsed, 2ULL * bfqd->bfq_slice_idle);
ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
@@ -6194,7 +6194,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
bfq_add_request(rq);
idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq);
- rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+ rq->fifo_time = blk_time_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
list_add_tail(&rq->queuelist, &bfqq->fifo);
bfq_rq_enqueued(bfqd, bfqq, rq);
@@ -6370,7 +6370,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
bfq_weights_tree_remove(bfqq);
}
- now_ns = ktime_get_ns();
+ now_ns = blk_time_get_ns();
bfqq->ttime.last_end_request = now_ns;
@@ -6585,7 +6585,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
static void bfq_update_inject_limit(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
- u64 tot_time_ns = ktime_get_ns() - bfqd->last_empty_occupied_ns;
+ u64 tot_time_ns = blk_time_get_ns() - bfqd->last_empty_occupied_ns;
unsigned int old_limit = bfqq->inject_limit;
if (bfqq->last_serv_time_ns > 0 && bfqd->rqs_injected) {
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index c9a16fba58b9..2e3e8e04961e 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -395,6 +395,7 @@ static blk_status_t bio_integrity_process(struct bio *bio,
iter.tuple_size = bi->tuple_size;
iter.seed = proc_iter->bi_sector;
iter.prot_buf = bvec_virt(bip->bip_vec);
+ iter.pi_offset = bi->pi_offset;
__bio_for_each_segment(bv, bio, bviter, *proc_iter) {
void *kaddr = bvec_kmap_local(&bv);
diff --git a/block/bio.c b/block/bio.c
index b9642a41f286..d24420ed1c4c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -16,7 +16,6 @@
#include <linux/workqueue.h>
#include <linux/cgroup.h>
#include <linux/highmem.h>
-#include <linux/sched/sysctl.h>
#include <linux/blk-crypto.h>
#include <linux/xarray.h>
@@ -251,6 +250,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
bio->bi_opf = opf;
bio->bi_flags = 0;
bio->bi_ioprio = 0;
+ bio->bi_write_hint = 0;
bio->bi_status = 0;
bio->bi_iter.bi_sector = 0;
bio->bi_iter.bi_size = 0;
@@ -762,29 +762,31 @@ static inline void bio_put_percpu_cache(struct bio *bio)
struct bio_alloc_cache *cache;
cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
- if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX) {
- put_cpu();
- bio_free(bio);
- return;
- }
+ if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX)
+ goto out_free;
- bio_uninit(bio);
-
- if ((bio->bi_opf & REQ_POLLED) && !WARN_ON_ONCE(in_interrupt())) {
+ if (in_task()) {
+ bio_uninit(bio);
bio->bi_next = cache->free_list;
+ /* Not necessary but helps not to iopoll already freed bios */
bio->bi_bdev = NULL;
cache->free_list = bio;
cache->nr++;
- } else {
- unsigned long flags;
+ } else if (in_hardirq()) {
+ lockdep_assert_irqs_disabled();
- local_irq_save(flags);
+ bio_uninit(bio);
bio->bi_next = cache->free_list_irq;
cache->free_list_irq = bio;
cache->nr_irq++;
- local_irq_restore(flags);
+ } else {
+ goto out_free;
}
put_cpu();
+ return;
+out_free:
+ put_cpu();
+ bio_free(bio);
}
/**
@@ -813,6 +815,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
{
bio_set_flag(bio, BIO_CLONED);
bio->bi_ioprio = bio_src->bi_ioprio;
+ bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter = bio_src->bi_iter;
if (bio->bi_bdev) {
@@ -1152,7 +1155,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
bio_for_each_folio_all(fi, bio) {
struct page *page;
- size_t done = 0;
+ size_t nr_pages;
if (mark_dirty) {
folio_lock(fi.folio);
@@ -1160,10 +1163,11 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
folio_unlock(fi.folio);
}
page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
+ nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE -
+ fi.offset / PAGE_SIZE + 1;
do {
bio_release_page(bio, page++);
- done += PAGE_SIZE;
- } while (done < fi.length);
+ } while (--nr_pages != 0);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1369,21 +1373,12 @@ int submit_bio_wait(struct bio *bio)
{
DECLARE_COMPLETION_ONSTACK_MAP(done,
bio->bi_bdev->bd_disk->lockdep_map);
- unsigned long hang_check;
bio->bi_private = &done;
bio->bi_end_io = submit_bio_wait_endio;
bio->bi_opf |= REQ_SYNC;
submit_bio(bio);
-
- /* Prevent hang_check timer from firing at us during very long I/O */
- hang_check = sysctl_hung_task_timeout_secs;
- if (hang_check)
- while (!wait_for_completion_io_timeout(&done,
- hang_check * (HZ/2)))
- ;
- else
- wait_for_completion_io(&done);
+ blk_wait_io(&done);
return blk_status_to_errno(bio->bi_status);
}
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index ff93c385ba5a..bdbb557feb5a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1846,7 +1846,7 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
{
unsigned long pflags;
bool clamp;
- u64 now = ktime_to_ns(ktime_get());
+ u64 now = blk_time_get_ns();
u64 exp;
u64 delay_nsec = 0;
int tok;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index b927a4a0ad03..78b74106bf10 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -19,6 +19,7 @@
#include <linux/kthread.h>
#include <linux/blk-mq.h>
#include <linux/llist.h>
+#include "blk.h"
struct blkcg_gq;
struct blkg_policy_data;
diff --git a/block/blk-core.c b/block/blk-core.c
index 11342af420d0..a16b5abdbbf5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -49,6 +49,7 @@
#include "blk-pm.h"
#include "blk-cgroup.h"
#include "blk-throttle.h"
+#include "blk-ioprio.h"
struct dentry *blk_debugfs_root;
@@ -393,24 +394,34 @@ static void blk_timeout_work(struct work_struct *work)
{
}
-struct request_queue *blk_alloc_queue(int node_id)
+struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
{
struct request_queue *q;
+ int error;
q = kmem_cache_alloc_node(blk_requestq_cachep, GFP_KERNEL | __GFP_ZERO,
node_id);
if (!q)
- return NULL;
+ return ERR_PTR(-ENOMEM);
q->last_merge = NULL;
q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL);
- if (q->id < 0)
+ if (q->id < 0) {
+ error = q->id;
goto fail_q;
+ }
q->stats = blk_alloc_queue_stats();
- if (!q->stats)
+ if (!q->stats) {
+ error = -ENOMEM;
goto fail_id;
+ }
+
+ error = blk_set_default_limits(lim);
+ if (error)
+ goto fail_stats;
+ q->limits = *lim;
q->node = node_id;
@@ -424,6 +435,7 @@ struct request_queue *blk_alloc_queue(int node_id)
mutex_init(&q->debugfs_mutex);
mutex_init(&q->sysfs_lock);
mutex_init(&q->sysfs_dir_lock);
+ mutex_init(&q->limits_lock);
mutex_init(&q->rq_qos_mutex);
spin_lock_init(&q->queue_lock);
@@ -434,12 +446,12 @@ struct request_queue *blk_alloc_queue(int node_id)
* Init percpu_ref in atomic mode so that it's faster to shutdown.
* See blk_register_queue() for details.
*/
- if (percpu_ref_init(&q->q_usage_counter,
+ error = percpu_ref_init(&q->q_usage_counter,
blk_queue_usage_counter_release,
- PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
+ PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
+ if (error)
goto fail_stats;
- blk_set_default_limits(&q->limits);
q->nr_requests = BLKDEV_DEFAULT_RQ;
return q;
@@ -450,7 +462,7 @@ fail_id:
ida_free(&blk_queue_ida, q->id);
fail_q:
kmem_cache_free(blk_requestq_cachep, q);
- return NULL;
+ return ERR_PTR(error);
}
/**
@@ -833,6 +845,14 @@ end_io:
}
EXPORT_SYMBOL(submit_bio_noacct);
+static void bio_set_ioprio(struct bio *bio)
+{
+ /* Nobody set ioprio so far? Initialize it based on task's nice value */
+ if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE)
+ bio->bi_ioprio = get_current_ioprio();
+ blkcg_set_ioprio(bio);
+}
+
/**
* submit_bio - submit a bio to the block device layer for I/O
* @bio: The &struct bio which describes the I/O
@@ -855,6 +875,7 @@ void submit_bio(struct bio *bio)
count_vm_events(PGPGOUT, bio_sectors(bio));
}
+ bio_set_ioprio(bio);
submit_bio_noacct(bio);
}
EXPORT_SYMBOL(submit_bio);
@@ -1073,6 +1094,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
if (tsk->plug)
return;
+ plug->cur_ktime = 0;
plug->mq_list = NULL;
plug->cached_rq = NULL;
plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
@@ -1172,6 +1194,8 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
*/
if (unlikely(!rq_list_empty(plug->cached_rq)))
blk_mq_free_plug_rqs(plug);
+
+ current->flags &= ~PF_BLOCK_TS;
}
/**
@@ -1219,8 +1243,7 @@ int __init blk_dev_init(void)
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");
- blk_requestq_cachep = kmem_cache_create("request_queue",
- sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
+ blk_requestq_cachep = KMEM_CACHE(request_queue, SLAB_PANIC);
blk_debugfs_root = debugfs_create_dir("block", NULL);
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index e6468eab2681..b1e7415f8439 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -172,6 +172,7 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio;
+ bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3f4d41952ef2..b0f314f4bc14 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -143,7 +143,7 @@ static void blk_account_io_flush(struct request *rq)
part_stat_lock();
part_stat_inc(part, ios[STAT_FLUSH]);
part_stat_add(part, nsecs[STAT_FLUSH],
- ktime_get_ns() - rq->start_time_ns);
+ blk_time_get_ns() - rq->start_time_ns);
part_stat_unlock();
}
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index d4e9b4556d14..ccbeb6dfa87a 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -370,6 +370,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
bi->profile = template->profile ? template->profile : &nop_profile;
bi->tuple_size = template->tuple_size;
bi->tag_size = template->tag_size;
+ bi->pi_offset = template->pi_offset;
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index c8beec6d7df0..9a85bfbbc45a 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -829,7 +829,7 @@ static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk)
/* step up/down based on the vrate */
vrate_pct = div64_u64(ioc->vtime_base_rate * 100, VTIME_PER_USEC);
- now_ns = ktime_get_ns();
+ now_ns = blk_time_get_ns();
if (p->too_fast_vrate_pct && p->too_fast_vrate_pct <= vrate_pct) {
if (!ioc->autop_too_fast_at)
@@ -1044,7 +1044,7 @@ static void ioc_now(struct ioc *ioc, struct ioc_now *now)
unsigned seq;
u64 vrate;
- now->now_ns = ktime_get();
+ now->now_ns = blk_time_get_ns();
now->now = ktime_to_us(now->now_ns);
vrate = atomic64_read(&ioc->vtime_rate);
@@ -1353,6 +1353,13 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
lockdep_assert_held(&iocg->waitq.lock);
+ /*
+ * If the delay is set by another CPU, we may be in the past. No need to
+ * change anything if so. This avoids decay calculation underflow.
+ */
+ if (time_before64(now->now, iocg->delay_at))
+ return false;
+
/* calculate the current delay in effect - 1/2 every second */
tdelta = now->now - iocg->delay_at;
if (iocg->delay)
@@ -2810,7 +2817,7 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
return;
}
- on_q_ns = ktime_get_ns() - rq->alloc_time_ns;
+ on_q_ns = blk_time_get_ns() - rq->alloc_time_ns;
rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
@@ -2893,7 +2900,7 @@ static int blk_iocost_init(struct gendisk *disk)
ioc->vtime_base_rate = VTIME_PER_USEC;
atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC);
seqcount_spinlock_init(&ioc->period_seqcount, &ioc->lock);
- ioc->period_at = ktime_to_us(ktime_get());
+ ioc->period_at = ktime_to_us(blk_time_get());
atomic64_set(&ioc->cur_period, 0);
atomic_set(&ioc->hweight_gen, 0);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index c1a6aba1d59e..ebb522788d97 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -609,7 +609,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
if (!iolat->blkiolat->enabled)
return;
- now = ktime_to_ns(ktime_get());
+ now = blk_time_get_ns();
while (blkg && blkg->parent) {
iolat = blkg_to_lat(blkg);
if (!iolat) {
@@ -661,7 +661,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
struct blk_iolatency *blkiolat = from_timer(blkiolat, t, timer);
struct blkcg_gq *blkg;
struct cgroup_subsys_state *pos_css;
- u64 now = ktime_to_ns(ktime_get());
+ u64 now = blk_time_get_ns();
rcu_read_lock();
blkg_for_each_descendant_pre(blkg, pos_css,
@@ -985,7 +985,7 @@ static void iolatency_pd_init(struct blkg_policy_data *pd)
struct blkcg_gq *blkg = lat_to_blkg(iolat);
struct rq_qos *rqos = iolat_rq_qos(blkg->q);
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
- u64 now = ktime_to_ns(ktime_get());
+ u64 now = blk_time_get_ns();
int cpu;
if (blk_queue_nonrot(blkg->q))
diff --git a/block/blk-lib.c b/block/blk-lib.c
index e59c3069e835..dc8e35d0a51d 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -35,6 +35,26 @@ static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
}
+static void await_bio_endio(struct bio *bio)
+{
+ complete(bio->bi_private);
+ bio_put(bio);
+}
+
+/*
+ * await_bio_chain - ends @bio and waits for every chained bio to complete
+ */
+static void await_bio_chain(struct bio *bio)
+{
+ DECLARE_COMPLETION_ONSTACK_MAP(done,
+ bio->bi_bdev->bd_disk->lockdep_map);
+
+ bio->bi_private = &done;
+ bio->bi_end_io = await_bio_endio;
+ bio_endio(bio);
+ blk_wait_io(&done);
+}
+
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
{
@@ -77,6 +97,10 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
* is disabled.
*/
cond_resched();
+ if (fatal_signal_pending(current)) {
+ await_bio_chain(bio);
+ return -EINTR;
+ }
}
*biop = bio;
@@ -120,32 +144,33 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
struct bio **biop, unsigned flags)
{
struct bio *bio = *biop;
- unsigned int max_write_zeroes_sectors;
+ unsigned int max_sectors;
if (bdev_read_only(bdev))
return -EPERM;
- /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
- max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
+ /* Ensure that max_sectors doesn't overflow bi_size */
+ max_sectors = bdev_write_zeroes_sectors(bdev);
- if (max_write_zeroes_sectors == 0)
+ if (max_sectors == 0)
return -EOPNOTSUPP;
while (nr_sects) {
+ unsigned int len = min_t(sector_t, nr_sects, max_sectors);
+
bio = blk_next_bio(bio, bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
bio->bi_iter.bi_sector = sector;
if (flags & BLKDEV_ZERO_NOUNMAP)
bio->bi_opf |= REQ_NOUNMAP;
- if (nr_sects > max_write_zeroes_sectors) {
- bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
- nr_sects -= max_write_zeroes_sectors;
- sector += max_write_zeroes_sectors;
- } else {
- bio->bi_iter.bi_size = nr_sects << 9;
- nr_sects = 0;
- }
+ bio->bi_iter.bi_size = len << SECTOR_SHIFT;
+ nr_sects -= len;
+ sector += len;
cond_resched();
+ if (fatal_signal_pending(current)) {
+ await_bio_chain(bio);
+ return -EINTR;
+ }
}
*biop = bio;
@@ -190,6 +215,10 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
break;
}
cond_resched();
+ if (fatal_signal_pending(current)) {
+ await_bio_chain(bio);
+ return -EINTR;
+ }
}
*biop = bio;
@@ -280,7 +309,7 @@ retry:
bio_put(bio);
}
blk_finish_plug(&plug);
- if (ret && try_write_zeroes) {
+ if (ret && ret != -EINTR && try_write_zeroes) {
if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
try_write_zeroes = false;
goto retry;
@@ -322,7 +351,7 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
return -EPERM;
blk_start_plug(&plug);
- for (;;) {
+ while (nr_sects) {
unsigned int len = min_t(sector_t, nr_sects, max_sectors);
bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
@@ -331,12 +360,17 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
sector += len;
nr_sects -= len;
- if (!nr_sects) {
- ret = submit_bio_wait(bio);
- bio_put(bio);
+ cond_resched();
+ if (fatal_signal_pending(current)) {
+ await_bio_chain(bio);
+ ret = -EINTR;
+ bio = NULL;
break;
}
- cond_resched();
+ }
+ if (bio) {
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
}
blk_finish_plug(&plug);
diff --git a/block/blk-map.c b/block/blk-map.c
index 8584babf3ea0..71210cdb3442 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -205,12 +205,19 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
/*
* success
*/
- if ((iov_iter_rw(iter) == WRITE &&
- (!map_data || !map_data->null_mapped)) ||
- (map_data && map_data->from_user)) {
+ if (iov_iter_rw(iter) == WRITE &&
+ (!map_data || !map_data->null_mapped)) {
ret = bio_copy_from_iter(bio, iter);
if (ret)
goto cleanup;
+ } else if (map_data && map_data->from_user) {
+ struct iov_iter iter2 = *iter;
+
+ /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */
+ iter2.data_source = ITER_SOURCE;
+ ret = bio_copy_from_iter(bio, &iter2);
+ if (ret)
+ goto cleanup;
} else {
if (bmd->is_our_pages)
zero_fill_bio(bio);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 2d470cf2173e..2a06fd33039d 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -810,6 +810,10 @@ static struct request *attempt_merge(struct request_queue *q,
if (rq_data_dir(req) != rq_data_dir(next))
return NULL;
+ /* Don't merge requests with different write hints. */
+ if (req->write_hint != next->write_hint)
+ return NULL;
+
if (req->ioprio != next->ioprio)
return NULL;
@@ -937,6 +941,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (!bio_crypt_rq_ctx_compatible(rq, bio))
return false;
+ /* Don't merge requests with different write hints. */
+ if (rq->write_hint != bio->bi_write_hint)
+ return false;
+
if (rq->ioprio != bio_prio(bio))
return false;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index aa87fcfda1ec..555ada922cf0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -21,7 +21,6 @@
#include <linux/llist.h>
#include <linux/cpu.h>
#include <linux/cache.h>
-#include <linux/sched/sysctl.h>
#include <linux/sched/topology.h>
#include <linux/sched/signal.h>
#include <linux/delay.h>
@@ -40,7 +39,6 @@
#include "blk-stat.h"
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"
-#include "blk-ioprio.h"
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);
@@ -323,7 +321,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
RB_CLEAR_NODE(&rq->rb_node);
rq->tag = BLK_MQ_NO_TAG;
rq->internal_tag = BLK_MQ_NO_TAG;
- rq->start_time_ns = ktime_get_ns();
+ rq->start_time_ns = blk_time_get_ns();
rq->part = NULL;
blk_crypto_rq_set_defaults(rq);
}
@@ -333,7 +331,7 @@ EXPORT_SYMBOL(blk_rq_init);
static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
{
if (blk_mq_need_time_stamp(rq))
- rq->start_time_ns = ktime_get_ns();
+ rq->start_time_ns = blk_time_get_ns();
else
rq->start_time_ns = 0;
@@ -444,7 +442,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
/* alloc_time includes depth and tag waits */
if (blk_queue_rq_alloc_time(q))
- alloc_time_ns = ktime_get_ns();
+ alloc_time_ns = blk_time_get_ns();
if (data->cmd_flags & REQ_NOWAIT)
data->flags |= BLK_MQ_REQ_NOWAIT;
@@ -629,7 +627,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
/* alloc_time includes depth and tag waits */
if (blk_queue_rq_alloc_time(q))
- alloc_time_ns = ktime_get_ns();
+ alloc_time_ns = blk_time_get_ns();
/*
* If the tag allocator sleeps we could get an allocation for a
@@ -1042,7 +1040,7 @@ static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
{
if (blk_mq_need_time_stamp(rq))
- __blk_mq_end_request_acct(rq, ktime_get_ns());
+ __blk_mq_end_request_acct(rq, blk_time_get_ns());
blk_mq_finish_request(rq);
@@ -1085,7 +1083,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
u64 now = 0;
if (iob->need_ts)
- now = ktime_get_ns();
+ now = blk_time_get_ns();
while ((rq = rq_list_pop(&iob->req_list)) != NULL) {
prefetch(rq->bio);
@@ -1168,10 +1166,11 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
if (force_irqthreads())
return false;
- /* same CPU or cache domain? Complete locally */
+ /* same CPU or cache domain and capacity? Complete locally */
if (cpu == rq->mq_ctx->cpu ||
(!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
- cpus_share_cache(cpu, rq->mq_ctx->cpu)))
+ cpus_share_cache(cpu, rq->mq_ctx->cpu) &&
+ cpus_equal_capacity(cpu, rq->mq_ctx->cpu)))
return false;
/* don't try to IPI to an offline CPU */
@@ -1255,7 +1254,7 @@ void blk_mq_start_request(struct request *rq)
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) &&
!blk_rq_is_passthrough(rq)) {
- rq->io_start_time_ns = ktime_get_ns();
+ rq->io_start_time_ns = blk_time_get_ns();
rq->stats_sectors = blk_rq_sectors(rq);
rq->rq_flags |= RQF_STATS;
rq_qos_issue(q, rq);
@@ -1410,22 +1409,10 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
blk_mq_run_hw_queue(hctx, false);
- if (blk_rq_is_poll(rq)) {
+ if (blk_rq_is_poll(rq))
blk_rq_poll_completion(rq, &wait.done);
- } else {
- /*
- * Prevent hang_check timer from firing at us during very long
- * I/O
- */
- unsigned long hang_check = sysctl_hung_task_timeout_secs;
-
- if (hang_check)
- while (!wait_for_completion_io_timeout(&wait.done,
- hang_check * (HZ/2)))
- ;
- else
- wait_for_completion_io(&wait.done);
- }
+ else
+ blk_wait_io(&wait.done);
return wait.ret;
}
@@ -2585,6 +2572,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
rq->cmd_flags |= REQ_FAILFAST_MASK;
rq->__sector = bio->bi_iter.bi_sector;
+ rq->write_hint = bio->bi_write_hint;
blk_rq_bio_prep(rq, bio, nr_segs);
/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
@@ -2892,9 +2880,6 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
};
struct request *rq;
- if (blk_mq_attempt_bio_merge(q, bio, nsegs))
- return NULL;
-
rq_qos_throttle(q, bio);
if (plug) {
@@ -2913,22 +2898,31 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
}
/*
- * Check if we can use the passed on request for submitting the passed in bio,
- * and remove it from the request list if it can be used.
+ * Check if there is a suitable cached request and return it.
*/
-static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
- struct bio *bio)
+static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
+ struct request_queue *q, blk_opf_t opf)
{
- enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
- enum hctx_type hctx_type = rq->mq_hctx->type;
+ enum hctx_type type = blk_mq_get_hctx_type(opf);
+ struct request *rq;
- WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
+ if (!plug)
+ return NULL;
+ rq = rq_list_peek(&plug->cached_rq);
+ if (!rq || rq->q != q)
+ return NULL;
+ if (type != rq->mq_hctx->type &&
+ (type != HCTX_TYPE_READ || rq->mq_hctx->type != HCTX_TYPE_DEFAULT))
+ return NULL;
+ if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
+ return NULL;
+ return rq;
+}
- if (type != hctx_type &&
- !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
- return false;
- if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
- return false;
+static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
+ struct bio *bio)
+{
+ WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
/*
* If any qos ->throttle() end up blocking, we will have flushed the
@@ -2941,15 +2935,6 @@ static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
blk_mq_rq_time_init(rq, 0);
rq->cmd_flags = bio->bi_opf;
INIT_LIST_HEAD(&rq->queuelist);
- return true;
-}
-
-static void bio_set_ioprio(struct bio *bio)
-{
- /* Nobody set ioprio so far? Initialize it based on task's nice value */
- if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE)
- bio->bi_ioprio = get_current_ioprio();
- blkcg_set_ioprio(bio);
}
/**
@@ -2971,51 +2956,43 @@ void blk_mq_submit_bio(struct bio *bio)
struct blk_plug *plug = blk_mq_plug(bio);
const int is_sync = op_is_sync(bio->bi_opf);
struct blk_mq_hw_ctx *hctx;
- struct request *rq = NULL;
unsigned int nr_segs = 1;
+ struct request *rq;
blk_status_t ret;
bio = blk_queue_bounce(bio, q);
- bio_set_ioprio(bio);
- if (plug) {
- rq = rq_list_peek(&plug->cached_rq);
- if (rq && rq->q != q)
- rq = NULL;
- }
- if (rq) {
- if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
- bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
- if (!bio)
- return;
- }
- if (!bio_integrity_prep(bio))
- return;
- if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
- return;
- if (blk_mq_use_cached_rq(rq, plug, bio))
- goto done;
- percpu_ref_get(&q->q_usage_counter);
- } else {
+ /*
+ * If the plug has a cached request for this queue, try use it.
+ *
+ * The cached request already holds a q_usage_counter reference and we
+ * don't have to acquire a new one if we use it.
+ */
+ rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf);
+ if (!rq) {
if (unlikely(bio_queue_enter(bio)))
return;
- if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
- bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
- if (!bio)
- goto fail;
- }
- if (!bio_integrity_prep(bio))
- goto fail;
}
- rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
- if (unlikely(!rq)) {
-fail:
- blk_queue_exit(q);
- return;
+ if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
+ bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+ if (!bio)
+ goto queue_exit;
+ }
+ if (!bio_integrity_prep(bio))
+ goto queue_exit;
+
+ if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
+ goto queue_exit;
+
+ if (!rq) {
+ rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+ if (unlikely(!rq))
+ goto queue_exit;
+ } else {
+ blk_mq_use_cached_rq(rq, plug, bio);
}
-done:
trace_block_getrq(bio);
rq_qos_track(q, rq, bio);
@@ -3046,6 +3023,15 @@ done:
} else {
blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq));
}
+ return;
+
+queue_exit:
+ /*
+ * Don't drop the queue reference if we were trying to use a cached
+ * request and thus didn't acquire one.
+ */
+ if (!rq)
+ blk_queue_exit(q);
}
#ifdef CONFIG_BLK_MQ_STACKING
@@ -3107,7 +3093,7 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
blk_mq_run_dispatch_ops(q,
ret = blk_mq_request_issue_directly(rq, true));
if (ret)
- blk_account_io_done(rq, ktime_get_ns());
+ blk_account_io_done(rq, blk_time_get_ns());
return ret;
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
@@ -3185,6 +3171,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
}
rq->nr_phys_segments = rq_src->nr_phys_segments;
rq->ioprio = rq_src->ioprio;
+ rq->write_hint = rq_src->write_hint;
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
goto free_and_out;
@@ -4086,15 +4073,16 @@ void blk_mq_release(struct request_queue *q)
blk_mq_sysfs_deinit(q);
}
-static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
- void *queuedata)
+struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
+ struct queue_limits *lim, void *queuedata)
{
+ struct queue_limits default_lim = { };
struct request_queue *q;
int ret;
- q = blk_alloc_queue(set->numa_node);
- if (!q)
- return ERR_PTR(-ENOMEM);
+ q = blk_alloc_queue(lim ? lim : &default_lim, set->numa_node);
+ if (IS_ERR(q))
+ return q;
q->queuedata = queuedata;
ret = blk_mq_init_allocated_queue(set, q);
if (ret) {
@@ -4103,20 +4091,15 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
}
return q;
}
-
-struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
-{
- return blk_mq_init_queue_data(set, NULL);
-}
-EXPORT_SYMBOL(blk_mq_init_queue);
+EXPORT_SYMBOL(blk_mq_alloc_queue);
/**
* blk_mq_destroy_queue - shutdown a request queue
* @q: request queue to shutdown
*
- * This shuts down a request queue allocated by blk_mq_init_queue(). All future
+ * This shuts down a request queue allocated by blk_mq_alloc_queue(). All future
* requests will be failed with -ENODEV. The caller is responsible for dropping
- * the reference from blk_mq_init_queue() by calling blk_put_queue().
+ * the reference from blk_mq_alloc_queue() by calling blk_put_queue().
*
* Context: can sleep
*/
@@ -4137,13 +4120,14 @@ void blk_mq_destroy_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_mq_destroy_queue);
-struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
+ struct queue_limits *lim, void *queuedata,
struct lock_class_key *lkclass)
{
struct request_queue *q;
struct gendisk *disk;
- q = blk_mq_init_queue_data(set, queuedata);
+ q = blk_mq_alloc_queue(set, lim, queuedata);
if (IS_ERR(q))
return ERR_CAST(q);
@@ -4397,7 +4381,7 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
if (set->nr_maps == 1)
set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
- if (set->ops->map_queues && !is_kdump_kernel()) {
+ if (set->ops->map_queues) {
int i;
/*
@@ -4496,14 +4480,12 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
/*
* If a crashdump is active, then we are potentially in a very
- * memory constrained environment. Limit us to 1 queue and
- * 64 tags to prevent using too much memory.
+ * memory constrained environment. Limit us to 64 tags to prevent
+ * using too much memory.
*/
- if (is_kdump_kernel()) {
- set->nr_hw_queues = 1;
- set->nr_maps = 1;
+ if (is_kdump_kernel())
set->queue_depth = min(64U, set->queue_depth);
- }
+
/*
* There is no use for more h/w queues than cpus if we just have
* a single map
@@ -4533,7 +4515,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
GFP_KERNEL, set->numa_node);
if (!set->map[i].mq_map)
goto out_free_mq_map;
- set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
+ set->map[i].nr_queues = set->nr_hw_queues;
}
blk_mq_update_queue_map(set);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 06ea91e51b8b..e160d56e8eda 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -26,52 +26,21 @@ void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
/**
- * blk_set_default_limits - reset limits to default values
- * @lim: the queue_limits structure to reset
- *
- * Description:
- * Returns a queue_limit struct to its default state.
- */
-void blk_set_default_limits(struct queue_limits *lim)
-{
- lim->max_segments = BLK_MAX_SEGMENTS;
- lim->max_discard_segments = 1;
- lim->max_integrity_segments = 0;
- lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
- lim->virt_boundary_mask = 0;
- lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
- lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
- lim->max_user_sectors = lim->max_dev_sectors = 0;
- lim->chunk_sectors = 0;
- lim->max_write_zeroes_sectors = 0;
- lim->max_zone_append_sectors = 0;
- lim->max_discard_sectors = 0;
- lim->max_hw_discard_sectors = 0;
- lim->max_secure_erase_sectors = 0;
- lim->discard_granularity = 512;
- lim->discard_alignment = 0;
- lim->discard_misaligned = 0;
- lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
- lim->bounce = BLK_BOUNCE_NONE;
- lim->alignment_offset = 0;
- lim->io_opt = 0;
- lim->misaligned = 0;
- lim->zoned = false;
- lim->zone_write_granularity = 0;
- lim->dma_alignment = 511;
-}
-
-/**
* blk_set_stacking_limits - set default limits for stacking devices
* @lim: the queue_limits structure to reset
*
- * Description:
- * Returns a queue_limit struct to its default state. Should be used
- * by stacking drivers like DM that have no internal limits.
+ * Prepare queue limits for applying limits from underlying devices using
+ * blk_stack_limits().
*/
void blk_set_stacking_limits(struct queue_limits *lim)
{
- blk_set_default_limits(lim);
+ memset(lim, 0, sizeof(*lim));
+ lim->logical_block_size = SECTOR_SIZE;
+ lim->physical_block_size = SECTOR_SIZE;
+ lim->io_min = SECTOR_SIZE;
+ lim->discard_granularity = SECTOR_SIZE;
+ lim->dma_alignment = SECTOR_SIZE - 1;
+ lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
/* Inherit limits from component devices */
lim->max_segments = USHRT_MAX;
@@ -82,9 +51,239 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_dev_sectors = UINT_MAX;
lim->max_write_zeroes_sectors = UINT_MAX;
lim->max_zone_append_sectors = UINT_MAX;
+ lim->max_user_discard_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
+static void blk_apply_bdi_limits(struct backing_dev_info *bdi,
+ struct queue_limits *lim)
+{
+ /*
+ * For read-ahead of large files to be effective, we need to read ahead
+ * at least twice the optimal I/O size.
+ */
+ bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
+ bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
+}
+
+static int blk_validate_zoned_limits(struct queue_limits *lim)
+{
+ if (!lim->zoned) {
+ if (WARN_ON_ONCE(lim->max_open_zones) ||
+ WARN_ON_ONCE(lim->max_active_zones) ||
+ WARN_ON_ONCE(lim->zone_write_granularity) ||
+ WARN_ON_ONCE(lim->max_zone_append_sectors))
+ return -EINVAL;
+ return 0;
+ }
+
+ if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)))
+ return -EINVAL;
+
+ if (lim->zone_write_granularity < lim->logical_block_size)
+ lim->zone_write_granularity = lim->logical_block_size;
+
+ if (lim->max_zone_append_sectors) {
+ /*
+ * The Zone Append size is limited by the maximum I/O size
+ * and the zone size given that it can't span zones.
+ */
+ lim->max_zone_append_sectors =
+ min3(lim->max_hw_sectors,
+ lim->max_zone_append_sectors,
+ lim->chunk_sectors);
+ }
+
+ return 0;
+}
+
+/*
+ * Check that the limits in lim are valid, initialize defaults for unset
+ * values, and cap values based on others where needed.
+ */
+static int blk_validate_limits(struct queue_limits *lim)
+{
+ unsigned int max_hw_sectors;
+
+ /*
+ * Unless otherwise specified, default to 512 byte logical blocks and a
+ * physical block size equal to the logical block size.
+ */
+ if (!lim->logical_block_size)
+ lim->logical_block_size = SECTOR_SIZE;
+ if (lim->physical_block_size < lim->logical_block_size)
+ lim->physical_block_size = lim->logical_block_size;
+
+ /*
+ * The minimum I/O size defaults to the physical block size unless
+ * explicitly overridden.
+ */
+ if (lim->io_min < lim->physical_block_size)
+ lim->io_min = lim->physical_block_size;
+
+ /*
+ * max_hw_sectors has a somewhat weird default for historical reason,
+ * but driver really should set their own instead of relying on this
+ * value.
+ *
+ * The block layer relies on the fact that every driver can
+ * handle at lest a page worth of data per I/O, and needs the value
+ * aligned to the logical block size.
+ */
+ if (!lim->max_hw_sectors)
+ lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+ if (WARN_ON_ONCE(lim->max_hw_sectors < PAGE_SECTORS))
+ return -EINVAL;
+ lim->max_hw_sectors = round_down(lim->max_hw_sectors,
+ lim->logical_block_size >> SECTOR_SHIFT);
+
+ /*
+ * The actual max_sectors value is a complex beast and also takes the
+ * max_dev_sectors value (set by SCSI ULPs) and a user configurable
+ * value into account. The ->max_sectors value is always calculated
+ * from these, so directly setting it won't have any effect.
+ */
+ max_hw_sectors = min_not_zero(lim->max_hw_sectors,
+ lim->max_dev_sectors);
+ if (lim->max_user_sectors) {
+ if (lim->max_user_sectors > max_hw_sectors ||
+ lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
+ return -EINVAL;
+ lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
+ } else {
+ lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP);
+ }
+ lim->max_sectors = round_down(lim->max_sectors,
+ lim->logical_block_size >> SECTOR_SHIFT);
+
+ /*
+ * Random default for the maximum number of segments. Driver should not
+ * rely on this and set their own.
+ */
+ if (!lim->max_segments)
+ lim->max_segments = BLK_MAX_SEGMENTS;
+
+ lim->max_discard_sectors =
+ min(lim->max_hw_discard_sectors, lim->max_user_discard_sectors);
+
+ if (!lim->max_discard_segments)
+ lim->max_discard_segments = 1;
+
+ if (lim->discard_granularity < lim->physical_block_size)
+ lim->discard_granularity = lim->physical_block_size;
+
+ /*
+ * By default there is no limit on the segment boundary alignment,
+ * but if there is one it can't be smaller than the page size as
+ * that would break all the normal I/O patterns.
+ */
+ if (!lim->seg_boundary_mask)
+ lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
+ if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1))
+ return -EINVAL;
+
+ /*
+ * Devices that require a virtual boundary do not support scatter/gather
+ * I/O natively, but instead require a descriptor list entry for each
+ * page (which might not be identical to the Linux PAGE_SIZE). Because
+ * of that they are not limited by our notion of "segment size".
+ */
+ if (lim->virt_boundary_mask) {
+ if (WARN_ON_ONCE(lim->max_segment_size &&
+ lim->max_segment_size != UINT_MAX))
+ return -EINVAL;
+ lim->max_segment_size = UINT_MAX;
+ } else {
+ /*
+ * The maximum segment size has an odd historic 64k default that
+ * drivers probably should override. Just like the I/O size we
+ * require drivers to at least handle a full page per segment.
+ */
+ if (!lim->max_segment_size)
+ lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
+ if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE))
+ return -EINVAL;
+ }
+
+ /*
+ * We require drivers to at least do logical block aligned I/O, but
+ * historically could not check for that due to the separate calls
+ * to set the limits. Once the transition is finished the check
+ * below should be narrowed down to check the logical block size.
+ */
+ if (!lim->dma_alignment)
+ lim->dma_alignment = SECTOR_SIZE - 1;
+ if (WARN_ON_ONCE(lim->dma_alignment > PAGE_SIZE))
+ return -EINVAL;
+
+ if (lim->alignment_offset) {
+ lim->alignment_offset &= (lim->physical_block_size - 1);
+ lim->misaligned = 0;
+ }
+
+ return blk_validate_zoned_limits(lim);
+}
+
+/*
+ * Set the default limits for a newly allocated queue. @lim contains the
+ * initial limits set by the driver, which could be no limit in which case
+ * all fields are cleared to zero.
+ */
+int blk_set_default_limits(struct queue_limits *lim)
+{
+ /*
+ * Most defaults are set by capping the bounds in blk_validate_limits,
+ * but max_user_discard_sectors is special and needs an explicit
+ * initialization to the max value here.
+ */
+ lim->max_user_discard_sectors = UINT_MAX;
+ return blk_validate_limits(lim);
+}
+
+/**
+ * queue_limits_commit_update - commit an atomic update of queue limits
+ * @q: queue to update
+ * @lim: limits to apply
+ *
+ * Apply the limits in @lim that were obtained from queue_limits_start_update()
+ * and updated by the caller to @q.
+ *
+ * Returns 0 if successful, else a negative error code.
+ */
+int queue_limits_commit_update(struct request_queue *q,
+ struct queue_limits *lim)
+ __releases(q->limits_lock)
+{
+ int error = blk_validate_limits(lim);
+
+ if (!error) {
+ q->limits = *lim;
+ if (q->disk)
+ blk_apply_bdi_limits(q->disk->bdi, lim);
+ }
+ mutex_unlock(&q->limits_lock);
+ return error;
+}
+EXPORT_SYMBOL_GPL(queue_limits_commit_update);
+
+/**
+ * queue_limits_commit_set - apply queue limits to queue
+ * @q: queue to update
+ * @lim: limits to apply
+ *
+ * Apply the limits in @lim that were freshly initialized to @q.
+ * To update existing limits use queue_limits_start_update() and
+ * queue_limits_commit_update() instead.
+ *
+ * Returns 0 if successful, else a negative error code.
+ */
+int queue_limits_set(struct request_queue *q, struct queue_limits *lim)
+{
+ mutex_lock(&q->limits_lock);
+ return queue_limits_commit_update(q, lim);
+}
+EXPORT_SYMBOL_GPL(queue_limits_set);
+
/**
* blk_queue_bounce_limit - set bounce buffer limit for queue
* @q: the request queue for the device
@@ -177,8 +376,11 @@ EXPORT_SYMBOL(blk_queue_chunk_sectors);
void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors)
{
- q->limits.max_hw_discard_sectors = max_discard_sectors;
- q->limits.max_discard_sectors = max_discard_sectors;
+ struct queue_limits *lim = &q->limits;
+
+ lim->max_hw_discard_sectors = max_discard_sectors;
+ lim->max_discard_sectors =
+ min(max_discard_sectors, lim->max_user_discard_sectors);
}
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
@@ -393,15 +595,7 @@ EXPORT_SYMBOL(blk_queue_alignment_offset);
void disk_update_readahead(struct gendisk *disk)
{
- struct request_queue *q = disk->queue;
-
- /*
- * For read-ahead of large files to be effective, we need to read ahead
- * at least twice the optimal I/O size.
- */
- disk->bdi->ra_pages =
- max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
- disk->bdi->io_pages = queue_max_sectors(q) >> (PAGE_SHIFT - 9);
+ blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
}
EXPORT_SYMBOL_GPL(disk_update_readahead);
@@ -689,33 +883,38 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->zone_write_granularity = max(t->zone_write_granularity,
b->zone_write_granularity);
t->zoned = max(t->zoned, b->zoned);
+ if (!t->zoned) {
+ t->zone_write_granularity = 0;
+ t->max_zone_append_sectors = 0;
+ }
return ret;
}
EXPORT_SYMBOL(blk_stack_limits);
/**
- * disk_stack_limits - adjust queue limits for stacked drivers
- * @disk: MD/DM gendisk (top)
+ * queue_limits_stack_bdev - adjust queue_limits for stacked devices
+ * @t: the stacking driver limits (top device)
* @bdev: the underlying block device (bottom)
* @offset: offset to beginning of data within component device
+ * @pfx: prefix to use for warnings logged
*
* Description:
- * Merges the limits for a top level gendisk and a bottom level
- * block_device.
+ * This function is used by stacking drivers like MD and DM to ensure
+ * that all component devices have compatible block sizes and
+ * alignments. The stacking driver must provide a queue_limits
+ * struct (top) and then iteratively call the stacking function for
+ * all component (bottom) devices. The stacking function will
+ * attempt to combine the values and ensure proper alignment.
*/
-void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
- sector_t offset)
+void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
+ sector_t offset, const char *pfx)
{
- struct request_queue *t = disk->queue;
-
- if (blk_stack_limits(&t->limits, &bdev_get_queue(bdev)->limits,
- get_start_sect(bdev) + (offset >> 9)) < 0)
+ if (blk_stack_limits(t, &bdev_get_queue(bdev)->limits,
+ get_start_sect(bdev) + offset))
pr_notice("%s: Warning: Device %pg is misaligned\n",
- disk->disk_name, bdev);
-
- disk_update_readahead(disk);
+ pfx, bdev);
}
-EXPORT_SYMBOL(disk_stack_limits);
+EXPORT_SYMBOL_GPL(queue_limits_stack_bdev);
/**
* blk_queue_update_dma_pad - update pad mask
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 7ff76ae6c76a..e42c263e53fb 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -27,7 +27,7 @@ void blk_rq_stat_init(struct blk_rq_stat *stat)
/* src is a per-cpu stat, mean isn't initialized */
void blk_rq_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
{
- if (!src->nr_samples)
+ if (dst->nr_samples + src->nr_samples <= dst->nr_samples)
return;
dst->min = min(dst->min, src->min);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6b2429cad81a..8c8f69d8ba48 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -174,23 +174,29 @@ static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
static ssize_t queue_discard_max_store(struct request_queue *q,
const char *page, size_t count)
{
- unsigned long max_discard;
- ssize_t ret = queue_var_store(&max_discard, page, count);
+ unsigned long max_discard_bytes;
+ struct queue_limits lim;
+ ssize_t ret;
+ int err;
+ ret = queue_var_store(&max_discard_bytes, page, count);
if (ret < 0)
return ret;
- if (max_discard & (q->limits.discard_granularity - 1))
+ if (max_discard_bytes & (q->limits.discard_granularity - 1))
return -EINVAL;
- max_discard >>= 9;
- if (max_discard > UINT_MAX)
+ if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
return -EINVAL;
- if (max_discard > q->limits.max_hw_discard_sectors)
- max_discard = q->limits.max_hw_discard_sectors;
+ blk_mq_freeze_queue(q);
+ lim = queue_limits_start_update(q);
+ lim.max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
+ err = queue_limits_commit_update(q, &lim);
+ blk_mq_unfreeze_queue(q);
- q->limits.max_discard_sectors = max_discard;
+ if (err)
+ return err;
return ret;
}
@@ -226,35 +232,22 @@ static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{
- unsigned long var;
- unsigned int max_sectors_kb,
- max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
- page_kb = 1 << (PAGE_SHIFT - 10);
- ssize_t ret = queue_var_store(&var, page, count);
+ unsigned long max_sectors_kb;
+ struct queue_limits lim;
+ ssize_t ret;
+ int err;
+ ret = queue_var_store(&max_sectors_kb, page, count);
if (ret < 0)
return ret;
- max_sectors_kb = (unsigned int)var;
- max_hw_sectors_kb = min_not_zero(max_hw_sectors_kb,
- q->limits.max_dev_sectors >> 1);
- if (max_sectors_kb == 0) {
- q->limits.max_user_sectors = 0;
- max_sectors_kb = min(max_hw_sectors_kb,
- BLK_DEF_MAX_SECTORS_CAP >> 1);
- } else {
- if (max_sectors_kb > max_hw_sectors_kb ||
- max_sectors_kb < page_kb)
- return -EINVAL;
- q->limits.max_user_sectors = max_sectors_kb << 1;
- }
-
- spin_lock_irq(&q->queue_lock);
- q->limits.max_sectors = max_sectors_kb << 1;
- if (q->disk)
- q->disk->bdi->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
- spin_unlock_irq(&q->queue_lock);
-
+ blk_mq_freeze_queue(q);
+ lim = queue_limits_start_update(q);
+ lim.max_user_sectors = max_sectors_kb << 1;
+ err = queue_limits_commit_update(q, &lim);
+ blk_mq_unfreeze_queue(q);
+ if (err)
+ return err;
return ret;
}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 16f5766620a4..f4850a6f860b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1098,7 +1098,7 @@ static int throtl_dispatch_tg(struct throtl_grp *tg)
while ((bio = throtl_peek_queued(&sq->queued[READ])) &&
tg_may_dispatch(tg, bio, NULL)) {
- tg_dispatch_one_bio(tg, bio_data_dir(bio));
+ tg_dispatch_one_bio(tg, READ);
nr_reads++;
if (nr_reads >= max_nr_reads)
@@ -1108,7 +1108,7 @@ static int throtl_dispatch_tg(struct throtl_grp *tg)
while ((bio = throtl_peek_queued(&sq->queued[WRITE])) &&
tg_may_dispatch(tg, bio, NULL)) {
- tg_dispatch_one_bio(tg, bio_data_dir(bio));
+ tg_dispatch_one_bio(tg, WRITE);
nr_writes++;
if (nr_writes >= max_nr_writes)
@@ -1815,7 +1815,7 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
ret = tg->latency_target == DFL_LATENCY_TARGET ||
tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
- (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
+ (blk_time_get_ns() >> 10) - tg->last_finish_time > time ||
tg->avg_idletime > tg->idletime_threshold ||
(tg->latency_target && tg->bio_cnt &&
tg->bad_bio_cnt * 5 < tg->bio_cnt);
@@ -2060,7 +2060,7 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg)
if (last_finish_time == 0)
return;
- now = ktime_get_ns() >> 10;
+ now = blk_time_get_ns() >> 10;
if (now <= last_finish_time ||
last_finish_time == tg->checked_last_finish_time)
return;
@@ -2327,7 +2327,7 @@ void blk_throtl_bio_endio(struct bio *bio)
if (!tg->td->limit_valid[LIMIT_LOW])
return;
- finish_time_ns = ktime_get_ns();
+ finish_time_ns = blk_time_get_ns();
tg->last_finish_time = finish_time_ns >> 10;
start_time = bio_issue_time(&bio->bi_issue) >> 10;
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 5ba3cd574eac..64472134dd26 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -29,6 +29,7 @@
#include "blk-wbt.h"
#include "blk-rq-qos.h"
#include "elevator.h"
+#include "blk.h"
#define CREATE_TRACE_POINTS
#include <trace/events/wbt.h>
@@ -163,9 +164,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
*/
static bool wb_recent_wait(struct rq_wb *rwb)
{
- struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
+ struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
- return time_before(jiffies, wb->dirty_sleep + HZ);
+ return time_before(jiffies, bdi->last_bdp_sleep + HZ);
}
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
@@ -274,13 +275,12 @@ static inline bool stat_sample_valid(struct blk_rq_stat *stat)
static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
{
- u64 now, issue = READ_ONCE(rwb->sync_issue);
+ u64 issue = READ_ONCE(rwb->sync_issue);
if (!issue || !rwb->sync_cookie)
return 0;
- now = ktime_to_ns(ktime_get());
- return now - issue;
+ return blk_time_get_ns() - issue;
}
static inline unsigned int wbt_inflight(struct rq_wb *rwb)
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index d343e5756a9c..da0f4b2a8fa0 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/rbtree.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/mm.h>
@@ -177,8 +176,7 @@ static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
}
}
-static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
- gfp_t gfp_mask)
+static int blkdev_zone_reset_all_emulated(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
sector_t capacity = bdev_nr_sectors(bdev);
@@ -205,7 +203,7 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
}
bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
- gfp_mask);
+ GFP_KERNEL);
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;
@@ -223,7 +221,7 @@ out_free_need_reset:
return ret;
}
-static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
+static int blkdev_zone_reset_all(struct block_device *bdev)
{
struct bio bio;
@@ -238,7 +236,6 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
* @sector: Start sector of the first zone to operate on
* @nr_sectors: Number of sectors, should be at least the length of one zone and
* must be zone size aligned.
- * @gfp_mask: Memory allocation flags (for bio_alloc)
*
* Description:
* Perform the specified operation on the range of zones specified by
@@ -248,7 +245,7 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
* or finish request.
*/
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
- sector_t sector, sector_t nr_sectors, gfp_t gfp_mask)
+ sector_t sector, sector_t nr_sectors)
{
struct request_queue *q = bdev_get_queue(bdev);
sector_t zone_sectors = bdev_zone_sectors(bdev);
@@ -285,12 +282,12 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
*/
if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
if (!blk_queue_zone_resetall(q))
- return blkdev_zone_reset_all_emulated(bdev, gfp_mask);
- return blkdev_zone_reset_all(bdev, gfp_mask);
+ return blkdev_zone_reset_all_emulated(bdev);
+ return blkdev_zone_reset_all(bdev);
}
while (sector < end_sector) {
- bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask);
+ bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL);
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;
@@ -419,8 +416,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
return -ENOTTY;
}
- ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
- GFP_KERNEL);
+ ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
fail:
if (cmd == BLKRESETZONE)
diff --git a/block/blk.h b/block/blk.h
index 1ef920f72e0f..a19b7b42e650 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -4,6 +4,8 @@
#include <linux/blk-crypto.h>
#include <linux/memblock.h> /* for max_pfn/max_low_pfn */
+#include <linux/sched/sysctl.h>
+#include <linux/timekeeping.h>
#include <xen/xen.h>
#include "blk-crypto-internal.h"
@@ -70,6 +72,18 @@ static inline int bio_queue_enter(struct bio *bio)
return __bio_queue_enter(q, bio);
}
+static inline void blk_wait_io(struct completion *done)
+{
+ /* Prevent hang_check timer from firing at us during very long I/O */
+ unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2;
+
+ if (timeout)
+ while (!wait_for_completion_io_timeout(done, timeout))
+ ;
+ else
+ wait_for_completion_io(done);
+}
+
#define BIO_INLINE_VECS 4
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
gfp_t gfp_mask);
@@ -329,7 +343,7 @@ void blk_rq_set_mixed_merge(struct request *rq);
bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
-void blk_set_default_limits(struct queue_limits *lim);
+int blk_set_default_limits(struct queue_limits *lim);
int blk_dev_init(void);
/*
@@ -447,7 +461,7 @@ static inline void bio_release_page(struct bio *bio, struct page *page)
unpin_user_page(page);
}
-struct request_queue *blk_alloc_queue(int node_id);
+struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id);
int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode);
@@ -516,4 +530,75 @@ static inline int req_ref_read(struct request *req)
return atomic_read(&req->ref);
}
+static inline u64 blk_time_get_ns(void)
+{
+ struct blk_plug *plug = current->plug;
+
+ if (!plug)
+ return ktime_get_ns();
+
+ /*
+ * 0 could very well be a valid time, but rather than flag "this is
+ * a valid timestamp" separately, just accept that we'll do an extra
+ * ktime_get_ns() if we just happen to get 0 as the current time.
+ */
+ if (!plug->cur_ktime) {
+ plug->cur_ktime = ktime_get_ns();
+ current->flags |= PF_BLOCK_TS;
+ }
+ return plug->cur_ktime;
+}
+
+static inline ktime_t blk_time_get(void)
+{
+ return ns_to_ktime(blk_time_get_ns());
+}
+
+/*
+ * From most significant bit:
+ * 1 bit: reserved for other usage, see below
+ * 12 bits: original size of bio
+ * 51 bits: issue time of bio
+ */
+#define BIO_ISSUE_RES_BITS 1
+#define BIO_ISSUE_SIZE_BITS 12
+#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS)
+#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
+#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
+#define BIO_ISSUE_SIZE_MASK \
+ (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
+#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
+
+/* Reserved bit for blk-throtl */
+#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
+
+static inline u64 __bio_issue_time(u64 time)
+{
+ return time & BIO_ISSUE_TIME_MASK;
+}
+
+static inline u64 bio_issue_time(struct bio_issue *issue)
+{
+ return __bio_issue_time(issue->value);
+}
+
+static inline sector_t bio_issue_size(struct bio_issue *issue)
+{
+ return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
+}
+
+static inline void bio_issue_init(struct bio_issue *issue,
+ sector_t size)
+{
+ size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
+ issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
+ (blk_time_get_ns() & BIO_ISSUE_TIME_MASK) |
+ ((u64)size << BIO_ISSUE_SIZE_SHIFT));
+}
+
+void bdev_release(struct file *bdev_file);
+int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
+ const struct blk_holder_ops *hops, struct file *bdev_file);
+int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index 7cfcb242f9a1..d6a5219f29dd 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -169,6 +169,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio;
+ bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index b3acdbdb6e7e..bcc7dee6abce 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -383,7 +383,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
if (blk_mq_alloc_tag_set(set))
goto out_tag_set;
- q = blk_mq_init_queue(set);
+ q = blk_mq_alloc_queue(set, NULL, NULL);
if (IS_ERR(q)) {
ret = PTR_ERR(q);
goto out_queue;
diff --git a/block/fops.c b/block/fops.c
index 0cf8cf72cdfa..679d9b752fe8 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
}
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+ bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
bio.bi_ioprio = iocb->ki_ioprio;
ret = bio_iov_iter_get_pages(&bio, iter);
@@ -203,6 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
for (;;) {
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+ bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
bio->bi_ioprio = iocb->ki_ioprio;
@@ -321,6 +323,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
dio->flags = 0;
dio->iocb = iocb;
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+ bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
bio->bi_end_io = blkdev_bio_end_io_async;
bio->bi_ioprio = iocb->ki_ioprio;
@@ -482,7 +485,7 @@ static void blkdev_readahead(struct readahead_control *rac)
}
static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
- struct inode *inode, loff_t offset)
+ struct inode *inode, loff_t offset, unsigned int len)
{
loff_t isize = i_size_read(inode);
@@ -569,18 +572,17 @@ static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
blk_mode_t file_to_blk_mode(struct file *file)
{
blk_mode_t mode = 0;
- struct bdev_handle *handle = file->private_data;
if (file->f_mode & FMODE_READ)
mode |= BLK_OPEN_READ;
if (file->f_mode & FMODE_WRITE)
mode |= BLK_OPEN_WRITE;
/*
- * do_dentry_open() clears O_EXCL from f_flags, use handle->mode to
- * determine whether the open was exclusive for already open files.
+ * do_dentry_open() clears O_EXCL from f_flags, use file->private_data
+ * to determine whether the open was exclusive for already open files.
*/
- if (handle)
- mode |= handle->mode & BLK_OPEN_EXCL;
+ if (file->private_data)
+ mode |= BLK_OPEN_EXCL;
else if (file->f_flags & O_EXCL)
mode |= BLK_OPEN_EXCL;
if (file->f_flags & O_NDELAY)
@@ -599,36 +601,31 @@ blk_mode_t file_to_blk_mode(struct file *file)
static int blkdev_open(struct inode *inode, struct file *filp)
{
- struct bdev_handle *handle;
+ struct block_device *bdev;
blk_mode_t mode;
-
- /*
- * Preserve backwards compatibility and allow large file access
- * even if userspace doesn't ask for it explicitly. Some mkfs
- * binary needs it. We might want to drop this workaround
- * during an unstable branch.
- */
- filp->f_flags |= O_LARGEFILE;
- filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
+ int ret;
mode = file_to_blk_mode(filp);
- handle = bdev_open_by_dev(inode->i_rdev, mode,
- mode & BLK_OPEN_EXCL ? filp : NULL, NULL);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ /* Use the file as the holder. */
+ if (mode & BLK_OPEN_EXCL)
+ filp->private_data = filp;
+ ret = bdev_permission(inode->i_rdev, mode, filp->private_data);
+ if (ret)
+ return ret;
- if (bdev_nowait(handle->bdev))
- filp->f_mode |= FMODE_NOWAIT;
+ bdev = blkdev_get_no_open(inode->i_rdev);
+ if (!bdev)
+ return -ENXIO;
- filp->f_mapping = handle->bdev->bd_inode->i_mapping;
- filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
- filp->private_data = handle;
- return 0;
+ ret = bdev_open(bdev, mode, filp->private_data, NULL, filp);
+ if (ret)
+ blkdev_put_no_open(bdev);
+ return ret;
}
static int blkdev_release(struct inode *inode, struct file *filp)
{
- bdev_release(filp->private_data);
+ bdev_release(filp);
return 0;
}
diff --git a/block/genhd.c b/block/genhd.c
index d74fb5b4ae68..bb29a68e1d67 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -342,7 +342,7 @@ EXPORT_SYMBOL_GPL(disk_uevent);
int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
{
- struct bdev_handle *handle;
+ struct file *file;
int ret = 0;
if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
@@ -366,12 +366,12 @@ int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
}
set_bit(GD_NEED_PART_SCAN, &disk->state);
- handle = bdev_open_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL, NULL,
- NULL);
- if (IS_ERR(handle))
- ret = PTR_ERR(handle);
+ file = bdev_file_open_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL,
+ NULL, NULL);
+ if (IS_ERR(file))
+ ret = PTR_ERR(file);
else
- bdev_release(handle);
+ fput(file);
/*
* If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set,
@@ -1201,7 +1201,7 @@ static int block_uevent(const struct device *dev, struct kobj_uevent_env *env)
return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
}
-struct class block_class = {
+const struct class block_class = {
.name = "block",
.dev_uevent = block_uevent,
};
@@ -1391,19 +1391,21 @@ out_free_disk:
return NULL;
}
-struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
+struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node,
+ struct lock_class_key *lkclass)
{
+ struct queue_limits default_lim = { };
struct request_queue *q;
struct gendisk *disk;
- q = blk_alloc_queue(node);
- if (!q)
- return NULL;
+ q = blk_alloc_queue(lim ? lim : &default_lim, node);
+ if (IS_ERR(q))
+ return ERR_CAST(q);
disk = __alloc_disk_node(q, node, lkclass);
if (!disk) {
blk_put_queue(q);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
set_bit(GD_OWNS_QUEUE, &disk->state);
return disk;
diff --git a/block/holder.c b/block/holder.c
index 37d18c13d958..791091a7eac2 100644
--- a/block/holder.c
+++ b/block/holder.c
@@ -8,6 +8,8 @@ struct bd_holder_disk {
int refcnt;
};
+static DEFINE_MUTEX(blk_holder_mutex);
+
static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
struct gendisk *disk)
{
@@ -80,7 +82,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
kobject_get(bdev->bd_holder_dir);
mutex_unlock(&bdev->bd_disk->open_mutex);
- mutex_lock(&disk->open_mutex);
+ mutex_lock(&blk_holder_mutex);
WARN_ON_ONCE(!bdev->bd_holder);
holder = bd_find_holder_disk(bdev, disk);
@@ -108,7 +110,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
goto out_del_symlink;
list_add(&holder->list, &disk->slave_bdevs);
- mutex_unlock(&disk->open_mutex);
+ mutex_unlock(&blk_holder_mutex);
return 0;
out_del_symlink:
@@ -116,7 +118,7 @@ out_del_symlink:
out_free_holder:
kfree(holder);
out_unlock:
- mutex_unlock(&disk->open_mutex);
+ mutex_unlock(&blk_holder_mutex);
if (ret)
kobject_put(bdev->bd_holder_dir);
return ret;
@@ -140,7 +142,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
if (WARN_ON_ONCE(!disk->slave_dir))
return;
- mutex_lock(&disk->open_mutex);
+ mutex_lock(&blk_holder_mutex);
holder = bd_find_holder_disk(bdev, disk);
if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
del_symlink(disk->slave_dir, bdev_kobj(bdev));
@@ -149,6 +151,6 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
list_del_init(&holder->list);
kfree(holder);
}
- mutex_unlock(&disk->open_mutex);
+ mutex_unlock(&blk_holder_mutex);
}
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
diff --git a/block/ioctl.c b/block/ioctl.c
index 9c73a763ef88..0c76137adcaa 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -18,10 +18,8 @@ static int blkpg_do_ioctl(struct block_device *bdev,
{
struct gendisk *disk = bdev->bd_disk;
struct blkpg_partition p;
- sector_t start, length;
+ sector_t start, length, capacity, end;
- if (disk->flags & GENHD_FL_NO_PART)
- return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
@@ -43,6 +41,13 @@ static int blkpg_do_ioctl(struct block_device *bdev,
start = p.start >> SECTOR_SHIFT;
length = p.length >> SECTOR_SHIFT;
+ capacity = get_capacity(disk);
+
+ if (check_add_overflow(start, length, &end))
+ return -EINVAL;
+
+ if (start >= capacity || end > capacity)
+ return -EINVAL;
switch (op) {
case BLKPG_ADD_PARTITION:
@@ -471,7 +476,7 @@ static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode,
int __user *argp)
{
int ret, n;
- struct bdev_handle *handle;
+ struct file *file;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
@@ -483,12 +488,11 @@ static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode,
if (mode & BLK_OPEN_EXCL)
return set_blocksize(bdev, n);
- handle = bdev_open_by_dev(bdev->bd_dev, mode, &bdev, NULL);
- if (IS_ERR(handle))
+ file = bdev_file_open_by_dev(bdev->bd_dev, mode, &bdev, NULL);
+ if (IS_ERR(file))
return -EBUSY;
ret = set_blocksize(bdev, n);
- bdev_release(handle);
-
+ fput(file);
return ret;
}
diff --git a/block/opal_proto.h b/block/opal_proto.h
index dec7ce3a3edb..d247a457bf6e 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -71,6 +71,7 @@ enum opal_response_token {
#define SHORT_ATOM_BYTE 0xBF
#define MEDIUM_ATOM_BYTE 0xDF
#define LONG_ATOM_BYTE 0xE3
+#define EMPTY_ATOM_BYTE 0xFF
#define OPAL_INVAL_PARAM 12
#define OPAL_MANUFACTURED_INACTIVE 0x08
diff --git a/block/partitions/core.c b/block/partitions/core.c
index cab0d76a828e..b11e88c82c8c 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -419,26 +419,20 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start,
int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
sector_t length)
{
- sector_t capacity = get_capacity(disk), end;
struct block_device *part;
int ret;
mutex_lock(&disk->open_mutex);
- if (check_add_overflow(start, length, &end)) {
- ret = -EINVAL;
+ if (!disk_live(disk)) {
+ ret = -ENXIO;
goto out;
}
- if (start >= capacity || end > capacity) {
+ if (disk->flags & GENHD_FL_NO_PART) {
ret = -EINVAL;
goto out;
}
- if (!disk_live(disk)) {
- ret = -ENXIO;
- goto out;
- }
-
if (partition_overlaps(disk, start, length, -1)) {
ret = -EBUSY;
goto out;
diff --git a/block/partitions/mac.c b/block/partitions/mac.c
index 7b521df00a39..c80183156d68 100644
--- a/block/partitions/mac.c
+++ b/block/partitions/mac.c
@@ -20,6 +20,7 @@ extern void note_bootable_part(dev_t dev, int part, int goodness);
* Code to understand MacOS partition tables.
*/
+#ifdef CONFIG_PPC_PMAC
static inline void mac_fix_string(char *stg, int len)
{
int i;
@@ -27,6 +28,7 @@ static inline void mac_fix_string(char *stg, int len)
for (i = len - 1; i >= 0 && stg[i] == ' '; i--)
stg[i] = 0;
}
+#endif
int mac_partition(struct parsed_partitions *state)
{
diff --git a/block/sed-opal.c b/block/sed-opal.c
index 3d9e9cd250bd..14fe0fef811c 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -1056,16 +1056,20 @@ static int response_parse(const u8 *buf, size_t length,
token_length = response_parse_medium(iter, pos);
else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */
token_length = response_parse_long(iter, pos);
+ else if (pos[0] == EMPTY_ATOM_BYTE) /* empty atom */
+ token_length = 1;
else /* TOKEN */
token_length = response_parse_token(iter, pos);
if (token_length < 0)
return token_length;
+ if (pos[0] != EMPTY_ATOM_BYTE)
+ num_entries++;
+
pos += token_length;
total -= token_length;
iter++;
- num_entries++;
}
resp->num = num_entries;
@@ -1208,7 +1212,7 @@ static int cmd_start(struct opal_dev *dev, const u8 *uid, const u8 *method)
static int start_opal_session_cont(struct opal_dev *dev)
{
u32 hsn, tsn;
- int error = 0;
+ int error;
error = parse_and_check_status(dev);
if (error)
@@ -1350,7 +1354,7 @@ static int get_active_key_cont(struct opal_dev *dev)
{
const char *activekey;
size_t keylen;
- int error = 0;
+ int error;
error = parse_and_check_status(dev);
if (error)
@@ -2153,7 +2157,7 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
u8 lr_buffer[OPAL_UID_LENGTH];
struct opal_lock_unlock *lkul = data;
u8 read_locked = 1, write_locked = 1;
- int err = 0;
+ int err;
if (build_locking_range(lr_buffer, sizeof(lr_buffer),
lkul->session.opal_key.lr) < 0)
@@ -2576,7 +2580,7 @@ static int opal_get_discv(struct opal_dev *dev, struct opal_discovery *discv)
const struct opal_step discovery0_step = {
opal_discovery0, discv
};
- int ret = 0;
+ int ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
@@ -3065,7 +3069,7 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
{
struct opal_suspend_data *suspend;
bool was_failure = false;
- int ret = 0;
+ int ret;
if (!dev)
return false;
@@ -3108,10 +3112,9 @@ static int opal_read_table(struct opal_dev *dev,
{ read_table_data, rw_tbl },
{ end_opal_session, }
};
- int ret = 0;
if (!rw_tbl->size)
- return ret;
+ return 0;
return execute_steps(dev, read_table_steps,
ARRAY_SIZE(read_table_steps));
@@ -3125,10 +3128,9 @@ static int opal_write_table(struct opal_dev *dev,
{ write_table_data, rw_tbl },
{ end_opal_session, }
};
- int ret = 0;
if (!rw_tbl->size)
- return ret;
+ return 0;
return execute_steps(dev, write_table_steps,
ARRAY_SIZE(write_table_steps));
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 914d8cddd43a..d90892fd6f2a 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -12,14 +12,14 @@
#include <net/checksum.h>
#include <asm/unaligned.h>
-typedef __be16 (csum_fn) (void *, unsigned int);
+typedef __be16 (csum_fn) (__be16, void *, unsigned int);
-static __be16 t10_pi_crc_fn(void *data, unsigned int len)
+static __be16 t10_pi_crc_fn(__be16 crc, void *data, unsigned int len)
{
- return cpu_to_be16(crc_t10dif(data, len));
+ return cpu_to_be16(crc_t10dif_update(be16_to_cpu(crc), data, len));
}
-static __be16 t10_pi_ip_fn(void *data, unsigned int len)
+static __be16 t10_pi_ip_fn(__be16 csum, void *data, unsigned int len)
{
return (__force __be16)ip_compute_csum(data, len);
}
@@ -32,12 +32,16 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
csum_fn *fn, enum t10_dif_type type)
{
+ u8 offset = iter->pi_offset;
unsigned int i;
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
- struct t10_pi_tuple *pi = iter->prot_buf;
+ struct t10_pi_tuple *pi = iter->prot_buf + offset;
- pi->guard_tag = fn(iter->data_buf, iter->interval);
+ pi->guard_tag = fn(0, iter->data_buf, iter->interval);
+ if (offset)
+ pi->guard_tag = fn(pi->guard_tag, iter->prot_buf,
+ offset);
pi->app_tag = 0;
if (type == T10_PI_TYPE1_PROTECTION)
@@ -56,12 +60,13 @@ static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
csum_fn *fn, enum t10_dif_type type)
{
+ u8 offset = iter->pi_offset;
unsigned int i;
BUG_ON(type == T10_PI_TYPE0_PROTECTION);
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
- struct t10_pi_tuple *pi = iter->prot_buf;
+ struct t10_pi_tuple *pi = iter->prot_buf + offset;
__be16 csum;
if (type == T10_PI_TYPE1_PROTECTION ||
@@ -83,7 +88,9 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
goto next;
}
- csum = fn(iter->data_buf, iter->interval);
+ csum = fn(0, iter->data_buf, iter->interval);
+ if (offset)
+ csum = fn(csum, iter->prot_buf, offset);
if (pi->guard_tag != csum) {
pr_err("%s: guard tag error at sector %llu " \
@@ -134,8 +141,10 @@ static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
*/
static void t10_pi_type1_prepare(struct request *rq)
{
- const int tuple_sz = rq->q->integrity.tuple_size;
+ struct blk_integrity *bi = &rq->q->integrity;
+ const int tuple_sz = bi->tuple_size;
u32 ref_tag = t10_pi_ref_tag(rq);
+ u8 offset = bi->pi_offset;
struct bio *bio;
__rq_for_each_bio(bio, rq) {
@@ -154,7 +163,7 @@ static void t10_pi_type1_prepare(struct request *rq)
p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len; j += tuple_sz) {
- struct t10_pi_tuple *pi = p;
+ struct t10_pi_tuple *pi = p + offset;
if (be32_to_cpu(pi->ref_tag) == virt)
pi->ref_tag = cpu_to_be32(ref_tag);
@@ -183,9 +192,11 @@ static void t10_pi_type1_prepare(struct request *rq)
*/
static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
- unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
- const int tuple_sz = rq->q->integrity.tuple_size;
+ struct blk_integrity *bi = &rq->q->integrity;
+ unsigned intervals = nr_bytes >> bi->interval_exp;
+ const int tuple_sz = bi->tuple_size;
u32 ref_tag = t10_pi_ref_tag(rq);
+ u8 offset = bi->pi_offset;
struct bio *bio;
__rq_for_each_bio(bio, rq) {
@@ -200,7 +211,7 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
- struct t10_pi_tuple *pi = p;
+ struct t10_pi_tuple *pi = p + offset;
if (be32_to_cpu(pi->ref_tag) == ref_tag)
pi->ref_tag = cpu_to_be32(virt);
@@ -280,20 +291,24 @@ const struct blk_integrity_profile t10_pi_type3_ip = {
};
EXPORT_SYMBOL(t10_pi_type3_ip);
-static __be64 ext_pi_crc64(void *data, unsigned int len)
+static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len)
{
- return cpu_to_be64(crc64_rocksoft(data, len));
+ return cpu_to_be64(crc64_rocksoft_update(crc, data, len));
}
static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter,
enum t10_dif_type type)
{
+ u8 offset = iter->pi_offset;
unsigned int i;
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
- struct crc64_pi_tuple *pi = iter->prot_buf;
+ struct crc64_pi_tuple *pi = iter->prot_buf + offset;
- pi->guard_tag = ext_pi_crc64(iter->data_buf, iter->interval);
+ pi->guard_tag = ext_pi_crc64(0, iter->data_buf, iter->interval);
+ if (offset)
+ pi->guard_tag = ext_pi_crc64(be64_to_cpu(pi->guard_tag),
+ iter->prot_buf, offset);
pi->app_tag = 0;
if (type == T10_PI_TYPE1_PROTECTION)
@@ -319,10 +334,11 @@ static bool ext_pi_ref_escape(u8 *ref_tag)
static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
enum t10_dif_type type)
{
+ u8 offset = iter->pi_offset;
unsigned int i;
for (i = 0; i < iter->data_size; i += iter->interval) {
- struct crc64_pi_tuple *pi = iter->prot_buf;
+ struct crc64_pi_tuple *pi = iter->prot_buf + offset;
u64 ref, seed;
__be64 csum;
@@ -343,7 +359,11 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
goto next;
}
- csum = ext_pi_crc64(iter->data_buf, iter->interval);
+ csum = ext_pi_crc64(0, iter->data_buf, iter->interval);
+ if (offset)
+ csum = ext_pi_crc64(be64_to_cpu(csum), iter->prot_buf,
+ offset);
+
if (pi->guard_tag != csum) {
pr_err("%s: guard tag error at sector %llu " \
"(rcvd %016llx, want %016llx)\n",
@@ -373,8 +393,10 @@ static blk_status_t ext_pi_type1_generate_crc64(struct blk_integrity_iter *iter)
static void ext_pi_type1_prepare(struct request *rq)
{
- const int tuple_sz = rq->q->integrity.tuple_size;
+ struct blk_integrity *bi = &rq->q->integrity;
+ const int tuple_sz = bi->tuple_size;
u64 ref_tag = ext_pi_ref_tag(rq);
+ u8 offset = bi->pi_offset;
struct bio *bio;
__rq_for_each_bio(bio, rq) {
@@ -393,7 +415,7 @@ static void ext_pi_type1_prepare(struct request *rq)
p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len; j += tuple_sz) {
- struct crc64_pi_tuple *pi = p;
+ struct crc64_pi_tuple *pi = p + offset;
u64 ref = get_unaligned_be48(pi->ref_tag);
if (ref == virt)
@@ -411,9 +433,11 @@ static void ext_pi_type1_prepare(struct request *rq)
static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
- unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
- const int tuple_sz = rq->q->integrity.tuple_size;
+ struct blk_integrity *bi = &rq->q->integrity;
+ unsigned intervals = nr_bytes >> bi->interval_exp;
+ const int tuple_sz = bi->tuple_size;
u64 ref_tag = ext_pi_ref_tag(rq);
+ u8 offset = bi->pi_offset;
struct bio *bio;
__rq_for_each_bio(bio, rq) {
@@ -428,7 +452,7 @@ static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
- struct crc64_pi_tuple *pi = p;
+ struct crc64_pi_tuple *pi = p + offset;
u64 ref = get_unaligned_be48(pi->ref_tag);
if (ref == ref_tag)
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 82c44d4899b9..e24c829d7a01 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -91,13 +91,13 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
if (!(msg->msg_flags & MSG_MORE)) {
err = hash_alloc_result(sk, ctx);
if (err)
- goto unlock_free;
+ goto unlock_free_result;
ahash_request_set_crypt(&ctx->req, NULL,
ctx->result, 0);
err = crypto_wait_req(crypto_ahash_final(&ctx->req),
&ctx->wait);
if (err)
- goto unlock_free;
+ goto unlock_free_result;
}
goto done_more;
}
@@ -170,6 +170,7 @@ unlock:
unlock_free:
af_alg_free_sg(&ctx->sgl);
+unlock_free_result:
hash_free_result(sk, ctx);
ctx->more = false;
goto unlock;
diff --git a/crypto/cbc.c b/crypto/cbc.c
index eedddef9ce40..e81918ca68b7 100644
--- a/crypto/cbc.c
+++ b/crypto/cbc.c
@@ -148,6 +148,9 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
if (!is_power_of_2(inst->alg.co.base.cra_blocksize))
goto out_free_inst;
+ if (inst->alg.co.statesize)
+ goto out_free_inst;
+
inst->alg.encrypt = crypto_cbc_encrypt;
inst->alg.decrypt = crypto_cbc_decrypt;
diff --git a/crypto/lskcipher.c b/crypto/lskcipher.c
index 0b6dd8aa21f2..0f1bd7dcde24 100644
--- a/crypto/lskcipher.c
+++ b/crypto/lskcipher.c
@@ -212,13 +212,12 @@ static int crypto_lskcipher_crypt_sg(struct skcipher_request *req,
ivsize = crypto_lskcipher_ivsize(tfm);
ivs = PTR_ALIGN(ivs, crypto_skcipher_alignmask(skcipher) + 1);
+ memcpy(ivs, req->iv, ivsize);
flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
if (req->base.flags & CRYPTO_SKCIPHER_REQ_CONT)
flags |= CRYPTO_LSKCIPHER_FLAG_CONT;
- else
- memcpy(ivs, req->iv, ivsize);
if (!(req->base.flags & CRYPTO_SKCIPHER_REQ_NOTFINAL))
flags |= CRYPTO_LSKCIPHER_FLAG_FINAL;
@@ -234,8 +233,7 @@ static int crypto_lskcipher_crypt_sg(struct skcipher_request *req,
flags |= CRYPTO_LSKCIPHER_FLAG_CONT;
}
- if (flags & CRYPTO_LSKCIPHER_FLAG_FINAL)
- memcpy(req->iv, ivs, ivsize);
+ memcpy(req->iv, ivs, ivsize);
return err;
}
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 19035230563d..7cb962e21453 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -102,7 +102,7 @@ static int reset_pending_show(struct seq_file *s, void *v)
{
struct ivpu_device *vdev = seq_to_ivpu(s);
- seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset));
+ seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_pending));
return 0;
}
@@ -130,7 +130,9 @@ dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size
fw->dvfs_mode = dvfs_mode;
- ivpu_pm_schedule_recovery(vdev);
+ ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
+ if (ret)
+ return ret;
return size;
}
@@ -190,7 +192,10 @@ fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf,
return ret;
ivpu_hw_profiling_freq_drive(vdev, enable);
- ivpu_pm_schedule_recovery(vdev);
+
+ ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
+ if (ret)
+ return ret;
return size;
}
@@ -301,11 +306,18 @@ static ssize_t
ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
{
struct ivpu_device *vdev = file->private_data;
+ int ret;
if (!size)
return -EINVAL;
- ivpu_pm_schedule_recovery(vdev);
+ ret = ivpu_rpm_get(vdev);
+ if (ret)
+ return ret;
+
+ ivpu_pm_trigger_recovery(vdev, "debugfs");
+ flush_work(&vdev->pm->recovery_work);
+ ivpu_rpm_put(vdev);
return size;
}
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 64927682161b..4b0640226986 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -6,6 +6,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <drm/drm_accel.h>
#include <drm/drm_file.h>
@@ -17,6 +18,7 @@
#include "ivpu_debugfs.h"
#include "ivpu_drv.h"
#include "ivpu_fw.h"
+#include "ivpu_fw_log.h"
#include "ivpu_gem.h"
#include "ivpu_hw.h"
#include "ivpu_ipc.h"
@@ -65,22 +67,20 @@ struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv)
return file_priv;
}
-struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id)
+static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv)
{
- struct ivpu_file_priv *file_priv;
-
- xa_lock_irq(&vdev->context_xa);
- file_priv = xa_load(&vdev->context_xa, id);
- /* file_priv may still be in context_xa during file_priv_release() */
- if (file_priv && !kref_get_unless_zero(&file_priv->ref))
- file_priv = NULL;
- xa_unlock_irq(&vdev->context_xa);
-
- if (file_priv)
- ivpu_dbg(vdev, KREF, "file_priv get by id: ctx %u refcount %u\n",
- file_priv->ctx.id, kref_read(&file_priv->ref));
-
- return file_priv;
+ mutex_lock(&file_priv->lock);
+ if (file_priv->bound) {
+ ivpu_dbg(vdev, FILE, "file_priv unbind: ctx %u\n", file_priv->ctx.id);
+
+ ivpu_cmdq_release_all_locked(file_priv);
+ ivpu_jsm_context_release(vdev, file_priv->ctx.id);
+ ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx);
+ ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
+ file_priv->bound = false;
+ drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id));
+ }
+ mutex_unlock(&file_priv->lock);
}
static void file_priv_release(struct kref *ref)
@@ -88,13 +88,15 @@ static void file_priv_release(struct kref *ref)
struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref);
struct ivpu_device *vdev = file_priv->vdev;
- ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id);
+ ivpu_dbg(vdev, FILE, "file_priv release: ctx %u bound %d\n",
+ file_priv->ctx.id, (bool)file_priv->bound);
+
+ pm_runtime_get_sync(vdev->drm.dev);
+ mutex_lock(&vdev->context_list_lock);
+ file_priv_unbind(vdev, file_priv);
+ mutex_unlock(&vdev->context_list_lock);
+ pm_runtime_put_autosuspend(vdev->drm.dev);
- ivpu_cmdq_release_all(file_priv);
- ivpu_jsm_context_release(vdev, file_priv->ctx.id);
- ivpu_bo_remove_all_bos_from_context(vdev, &file_priv->ctx);
- ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
- drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
mutex_destroy(&file_priv->lock);
kfree(file_priv);
}
@@ -176,9 +178,6 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS:
args->value = vdev->hw->ranges.user.start;
break;
- case DRM_IVPU_PARAM_CONTEXT_PRIORITY:
- args->value = file_priv->priority;
- break;
case DRM_IVPU_PARAM_CONTEXT_ID:
args->value = file_priv->ctx.id;
break;
@@ -218,17 +217,10 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
static int ivpu_set_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
- struct ivpu_file_priv *file_priv = file->driver_priv;
struct drm_ivpu_param *args = data;
int ret = 0;
switch (args->param) {
- case DRM_IVPU_PARAM_CONTEXT_PRIORITY:
- if (args->value <= DRM_IVPU_CONTEXT_PRIORITY_REALTIME)
- file_priv->priority = args->value;
- else
- ret = -EINVAL;
- break;
default:
ret = -EINVAL;
}
@@ -241,50 +233,53 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
struct ivpu_device *vdev = to_ivpu_device(dev);
struct ivpu_file_priv *file_priv;
u32 ctx_id;
- void *old;
- int ret;
+ int idx, ret;
- ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, NULL, vdev->context_xa_limit, GFP_KERNEL);
- if (ret) {
- ivpu_err(vdev, "Failed to allocate context id: %d\n", ret);
- return ret;
- }
+ if (!drm_dev_enter(dev, &idx))
+ return -ENODEV;
file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
if (!file_priv) {
ret = -ENOMEM;
- goto err_xa_erase;
+ goto err_dev_exit;
}
file_priv->vdev = vdev;
- file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL;
+ file_priv->bound = true;
kref_init(&file_priv->ref);
mutex_init(&file_priv->lock);
+ mutex_lock(&vdev->context_list_lock);
+
+ ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, file_priv,
+ vdev->context_xa_limit, GFP_KERNEL);
+ if (ret) {
+ ivpu_err(vdev, "Failed to allocate context id: %d\n", ret);
+ goto err_unlock;
+ }
+
ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id);
if (ret)
- goto err_mutex_destroy;
+ goto err_xa_erase;
- old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL);
- if (xa_is_err(old)) {
- ret = xa_err(old);
- ivpu_err(vdev, "Failed to store context %u: %d\n", ctx_id, ret);
- goto err_ctx_fini;
- }
+ mutex_unlock(&vdev->context_list_lock);
+ drm_dev_exit(idx);
+
+ file->driver_priv = file_priv;
ivpu_dbg(vdev, FILE, "file_priv create: ctx %u process %s pid %d\n",
ctx_id, current->comm, task_pid_nr(current));
- file->driver_priv = file_priv;
return 0;
-err_ctx_fini:
- ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
-err_mutex_destroy:
- mutex_destroy(&file_priv->lock);
- kfree(file_priv);
err_xa_erase:
xa_erase_irq(&vdev->context_xa, ctx_id);
+err_unlock:
+ mutex_unlock(&vdev->context_list_lock);
+ mutex_destroy(&file_priv->lock);
+ kfree(file_priv);
+err_dev_exit:
+ drm_dev_exit(idx);
return ret;
}
@@ -340,8 +335,6 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
if (!ret)
ivpu_dbg(vdev, PM, "VPU ready message received successfully\n");
- else
- ivpu_hw_diagnose_failure(vdev);
return ret;
}
@@ -369,6 +362,9 @@ int ivpu_boot(struct ivpu_device *vdev)
ret = ivpu_wait_for_ready(vdev);
if (ret) {
ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret);
+ ivpu_hw_diagnose_failure(vdev);
+ ivpu_mmu_evtq_dump(vdev);
+ ivpu_fw_log_dump(vdev);
return ret;
}
@@ -484,9 +480,8 @@ static int ivpu_pci_init(struct ivpu_device *vdev)
/* Clear any pending errors */
pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f);
- /* VPU 37XX does not require 10m D3hot delay */
- if (ivpu_hw_gen(vdev) == IVPU_HW_37XX)
- pdev->d3hot_delay = 0;
+ /* NPU does not require 10m D3hot delay */
+ pdev->d3hot_delay = 0;
ret = pcim_enable_device(pdev);
if (ret) {
@@ -540,6 +535,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
INIT_LIST_HEAD(&vdev->bo_list);
+ ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock);
+ if (ret)
+ goto err_xa_destroy;
+
ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
if (ret)
goto err_xa_destroy;
@@ -611,14 +610,30 @@ err_xa_destroy:
return ret;
}
+static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev)
+{
+ struct ivpu_file_priv *file_priv;
+ unsigned long ctx_id;
+
+ mutex_lock(&vdev->context_list_lock);
+
+ xa_for_each(&vdev->context_xa, ctx_id, file_priv)
+ file_priv_unbind(vdev, file_priv);
+
+ mutex_unlock(&vdev->context_list_lock);
+}
+
static void ivpu_dev_fini(struct ivpu_device *vdev)
{
ivpu_pm_disable(vdev);
ivpu_shutdown(vdev);
if (IVPU_WA(d3hot_after_power_off))
pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+
+ ivpu_jobs_abort_all(vdev);
ivpu_job_done_consumer_fini(vdev);
ivpu_pm_cancel_recovery(vdev);
+ ivpu_bo_unbind_all_user_contexts(vdev);
ivpu_ipc_fini(vdev);
ivpu_fw_fini(vdev);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index ebc4b84f27b2..069ace4adb2d 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -56,6 +56,7 @@
#define IVPU_DBG_JSM BIT(10)
#define IVPU_DBG_KREF BIT(11)
#define IVPU_DBG_RPM BIT(12)
+#define IVPU_DBG_MMU_MAP BIT(13)
#define ivpu_err(vdev, fmt, ...) \
drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
@@ -114,6 +115,7 @@ struct ivpu_device {
struct ivpu_mmu_context gctx;
struct ivpu_mmu_context rctx;
+ struct mutex context_list_lock; /* Protects user context addition/removal */
struct xarray context_xa;
struct xa_limit context_xa_limit;
@@ -145,8 +147,8 @@ struct ivpu_file_priv {
struct mutex lock; /* Protects cmdq */
struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES];
struct ivpu_mmu_context ctx;
- u32 priority;
bool has_mmu_faults;
+ bool bound;
};
extern int ivpu_dbg_mask;
@@ -162,7 +164,6 @@ extern bool ivpu_disable_mmu_cont_pages;
extern int ivpu_test_mode;
struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
-struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id);
void ivpu_file_priv_put(struct ivpu_file_priv **link);
int ivpu_boot(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 6576232f3e67..5fa8bd4603d5 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -222,7 +222,6 @@ ivpu_fw_init_wa(struct ivpu_device *vdev)
const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data;
if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) ||
- (ivpu_hw_gen(vdev) > IVPU_HW_37XX) ||
(ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE))
vdev->wa.disable_d0i3_msg = true;
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 1dda4f38ea25..e9ddbe9f50eb 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -24,14 +24,11 @@ static const struct drm_gem_object_funcs ivpu_gem_funcs;
static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action)
{
- if (bo->ctx)
- ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u ctx %d vpu_addr 0x%llx mmu_mapped %d\n",
- action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt,
- bo->handle, bo->ctx->id, bo->vpu_addr, bo->mmu_mapped);
- else
- ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u (not added to context)\n",
- action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt,
- bo->handle);
+ ivpu_dbg(vdev, BO,
+ "%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n",
+ action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0,
+ (bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc,
+ (bool)bo->base.base.import_attach);
}
/*
@@ -49,12 +46,7 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
mutex_lock(&bo->lock);
ivpu_dbg_bo(vdev, bo, "pin");
-
- if (!bo->ctx) {
- ivpu_err(vdev, "vpu_addr not allocated for BO %d\n", bo->handle);
- ret = -EINVAL;
- goto unlock;
- }
+ drm_WARN_ON(&vdev->drm, !bo->ctx);
if (!bo->mmu_mapped) {
struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
@@ -85,7 +77,10 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
const struct ivpu_addr_range *range)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- int ret;
+ int idx, ret;
+
+ if (!drm_dev_enter(&vdev->drm, &idx))
+ return -ENODEV;
mutex_lock(&bo->lock);
@@ -101,6 +96,8 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
mutex_unlock(&bo->lock);
+ drm_dev_exit(idx);
+
return ret;
}
@@ -108,11 +105,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- lockdep_assert_held(&bo->lock);
-
- ivpu_dbg_bo(vdev, bo, "unbind");
-
- /* TODO: dma_unmap */
+ lockdep_assert(lockdep_is_held(&bo->lock) || !kref_read(&bo->base.base.refcount));
if (bo->mmu_mapped) {
drm_WARN_ON(&vdev->drm, !bo->ctx);
@@ -124,19 +117,23 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
if (bo->ctx) {
ivpu_mmu_context_remove_node(bo->ctx, &bo->mm_node);
- bo->vpu_addr = 0;
bo->ctx = NULL;
}
-}
-static void ivpu_bo_unbind(struct ivpu_bo *bo)
-{
- mutex_lock(&bo->lock);
- ivpu_bo_unbind_locked(bo);
- mutex_unlock(&bo->lock);
+ if (bo->base.base.import_attach)
+ return;
+
+ dma_resv_lock(bo->base.base.resv, NULL);
+ if (bo->base.sgt) {
+ dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0);
+ sg_free_table(bo->base.sgt);
+ kfree(bo->base.sgt);
+ bo->base.sgt = NULL;
+ }
+ dma_resv_unlock(bo->base.base.resv);
}
-void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
+void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
struct ivpu_bo *bo;
@@ -146,8 +143,10 @@ void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_m
mutex_lock(&vdev->bo_list_lock);
list_for_each_entry(bo, &vdev->bo_list, bo_list_node) {
mutex_lock(&bo->lock);
- if (bo->ctx == ctx)
+ if (bo->ctx == ctx) {
+ ivpu_dbg_bo(vdev, bo, "unbind");
ivpu_bo_unbind_locked(bo);
+ }
mutex_unlock(&bo->lock);
}
mutex_unlock(&vdev->bo_list_lock);
@@ -199,9 +198,6 @@ ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags)
list_add_tail(&bo->bo_list_node, &vdev->bo_list);
mutex_unlock(&vdev->bo_list_lock);
- ivpu_dbg(vdev, BO, "create: vpu_addr 0x%llx size %zu flags 0x%x\n",
- bo->vpu_addr, bo->base.base.size, flags);
-
return bo;
}
@@ -212,6 +208,12 @@ static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
struct ivpu_bo *bo = to_ivpu_bo(obj);
struct ivpu_addr_range *range;
+ if (bo->ctx) {
+ ivpu_warn(vdev, "Can't add BO to ctx %u: already in ctx %u\n",
+ file_priv->ctx.id, bo->ctx->id);
+ return -EALREADY;
+ }
+
if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
range = &vdev->hw->ranges.shave;
else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
@@ -227,62 +229,24 @@ static void ivpu_bo_free(struct drm_gem_object *obj)
struct ivpu_device *vdev = to_ivpu_device(obj->dev);
struct ivpu_bo *bo = to_ivpu_bo(obj);
+ ivpu_dbg_bo(vdev, bo, "free");
+
mutex_lock(&vdev->bo_list_lock);
list_del(&bo->bo_list_node);
mutex_unlock(&vdev->bo_list_lock);
drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
- ivpu_dbg_bo(vdev, bo, "free");
-
- ivpu_bo_unbind(bo);
+ ivpu_bo_unbind_locked(bo);
mutex_destroy(&bo->lock);
drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
drm_gem_shmem_free(&bo->base);
}
-static const struct dma_buf_ops ivpu_bo_dmabuf_ops = {
- .cache_sgt_mapping = true,
- .attach = drm_gem_map_attach,
- .detach = drm_gem_map_detach,
- .map_dma_buf = drm_gem_map_dma_buf,
- .unmap_dma_buf = drm_gem_unmap_dma_buf,
- .release = drm_gem_dmabuf_release,
- .mmap = drm_gem_dmabuf_mmap,
- .vmap = drm_gem_dmabuf_vmap,
- .vunmap = drm_gem_dmabuf_vunmap,
-};
-
-static struct dma_buf *ivpu_bo_export(struct drm_gem_object *obj, int flags)
-{
- struct drm_device *dev = obj->dev;
- struct dma_buf_export_info exp_info = {
- .exp_name = KBUILD_MODNAME,
- .owner = dev->driver->fops->owner,
- .ops = &ivpu_bo_dmabuf_ops,
- .size = obj->size,
- .flags = flags,
- .priv = obj,
- .resv = obj->resv,
- };
- void *sgt;
-
- /*
- * Make sure that pages are allocated and dma-mapped before exporting the bo.
- * DMA-mapping is required if the bo will be imported to the same device.
- */
- sgt = drm_gem_shmem_get_pages_sgt(to_drm_gem_shmem_obj(obj));
- if (IS_ERR(sgt))
- return sgt;
-
- return drm_gem_dmabuf_export(dev, &exp_info);
-}
-
static const struct drm_gem_object_funcs ivpu_gem_funcs = {
.free = ivpu_bo_free,
.open = ivpu_bo_open,
- .export = ivpu_bo_export,
.print_info = drm_gem_shmem_object_print_info,
.pin = drm_gem_shmem_object_pin,
.unpin = drm_gem_shmem_object_unpin,
@@ -315,11 +279,9 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
return PTR_ERR(bo);
}
- ret = drm_gem_handle_create(file, &bo->base.base, &bo->handle);
- if (!ret) {
+ ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
+ if (!ret)
args->vpu_addr = bo->vpu_addr;
- args->handle = bo->handle;
- }
drm_gem_object_put(&bo->base.base);
@@ -361,7 +323,9 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
if (ret)
goto err_put;
+ dma_resv_lock(bo->base.base.resv, NULL);
ret = drm_gem_shmem_vmap(&bo->base, &map);
+ dma_resv_unlock(bo->base.base.resv);
if (ret)
goto err_put;
@@ -376,7 +340,10 @@ void ivpu_bo_free_internal(struct ivpu_bo *bo)
{
struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
+ dma_resv_lock(bo->base.base.resv, NULL);
drm_gem_shmem_vunmap(&bo->base, &map);
+ dma_resv_unlock(bo->base.base.resv);
+
drm_gem_object_put(&bo->base.base);
}
@@ -432,19 +399,11 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file
static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
{
- unsigned long dma_refcount = 0;
-
mutex_lock(&bo->lock);
- if (bo->base.base.dma_buf && bo->base.base.dma_buf->file)
- dma_refcount = atomic_long_read(&bo->base.base.dma_buf->file->f_count);
-
- drm_printf(p, "%-3u %-6d 0x%-12llx %-10lu 0x%-8x %-4u %-8lu",
- bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.base.size,
- bo->flags, kref_read(&bo->base.base.refcount), dma_refcount);
-
- if (bo->base.base.import_attach)
- drm_printf(p, " imported");
+ drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u",
+ bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size,
+ bo->flags, kref_read(&bo->base.base.refcount));
if (bo->base.pages)
drm_printf(p, " has_pages");
@@ -452,6 +411,9 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
if (bo->mmu_mapped)
drm_printf(p, " mmu_mapped");
+ if (bo->base.base.import_attach)
+ drm_printf(p, " imported");
+
drm_printf(p, "\n");
mutex_unlock(&bo->lock);
@@ -462,8 +424,8 @@ void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
struct ivpu_device *vdev = to_ivpu_device(dev);
struct ivpu_bo *bo;
- drm_printf(p, "%-3s %-6s %-14s %-10s %-10s %-4s %-8s %s\n",
- "ctx", "handle", "vpu_addr", "size", "flags", "refs", "dma_refs", "attribs");
+ drm_printf(p, "%-9s %-3s %-14s %-10s %-10s %-4s %s\n",
+ "bo", "ctx", "vpu_addr", "size", "flags", "refs", "attribs");
mutex_lock(&vdev->bo_list_lock);
list_for_each_entry(bo, &vdev->bo_list, bo_list_node)
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index d75cad0d3c74..a8559211c70d 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -19,14 +19,13 @@ struct ivpu_bo {
struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */
u64 vpu_addr;
- u32 handle;
u32 flags;
u32 job_status; /* Valid only for command buffer */
bool mmu_mapped;
};
int ivpu_bo_pin(struct ivpu_bo *bo);
-void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
+void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 574cdeefb66b..89af1006df55 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -510,22 +510,12 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
return ret;
}
-static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev)
-{
- ivpu_boot_dpu_active_drive(vdev, false);
- ivpu_boot_pwr_island_isolation_drive(vdev, true);
- ivpu_boot_pwr_island_trickle_drive(vdev, false);
- ivpu_boot_pwr_island_drive(vdev, false);
-
- return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0);
-}
-
static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
{
u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES);
val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val);
- val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val);
+ val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val);
val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val);
REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val);
@@ -616,12 +606,37 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
return 0;
}
+static int ivpu_hw_37xx_ip_reset(struct ivpu_device *vdev)
+{
+ int ret;
+ u32 val;
+
+ if (IVPU_WA(punit_disabled))
+ return 0;
+
+ ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+ if (ret) {
+ ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n");
+ return ret;
+ }
+
+ val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET);
+ val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val);
+ REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val);
+
+ ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+ if (ret)
+ ivpu_err(vdev, "Timed out waiting for RESET completion\n");
+
+ return ret;
+}
+
static int ivpu_hw_37xx_reset(struct ivpu_device *vdev)
{
int ret = 0;
- if (ivpu_boot_pwr_domain_disable(vdev)) {
- ivpu_err(vdev, "Failed to disable power domain\n");
+ if (ivpu_hw_37xx_ip_reset(vdev)) {
+ ivpu_err(vdev, "Failed to reset NPU\n");
ret = -EIO;
}
@@ -661,6 +676,11 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev)
{
int ret;
+ /* PLL requests may fail when powering down, so issue WP 0 here */
+ ret = ivpu_pll_disable(vdev);
+ if (ret)
+ ivpu_warn(vdev, "Failed to disable PLL: %d\n", ret);
+
ret = ivpu_hw_37xx_d0i3_disable(vdev);
if (ret)
ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
@@ -875,24 +895,18 @@ static void ivpu_hw_37xx_irq_disable(struct ivpu_device *vdev)
static void ivpu_hw_37xx_irq_wdt_nce_handler(struct ivpu_device *vdev)
{
- ivpu_err_ratelimited(vdev, "WDT NCE irq\n");
-
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
}
static void ivpu_hw_37xx_irq_wdt_mss_handler(struct ivpu_device *vdev)
{
- ivpu_err_ratelimited(vdev, "WDT MSS irq\n");
-
ivpu_hw_wdt_disable(vdev);
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
}
static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
{
- ivpu_err_ratelimited(vdev, "NOC Firewall irq\n");
-
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
}
/* Handler for IRQs from VPU core (irqV) */
@@ -970,7 +984,7 @@ static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, status);
if (schedule_recovery)
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");
return true;
}
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index eba2fdef2ace..a1523d0b1ef3 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -24,7 +24,7 @@
#define SKU_HW_ID_SHIFT 16u
#define SKU_HW_ID_MASK 0xffff0000u
-#define PLL_CONFIG_DEFAULT 0x1
+#define PLL_CONFIG_DEFAULT 0x0
#define PLL_CDYN_DEFAULT 0x80
#define PLL_EPP_DEFAULT 0x80
#define PLL_REF_CLK_FREQ (50 * 1000000)
@@ -530,7 +530,7 @@ static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES);
val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val);
- val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val);
val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val);
REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val);
@@ -704,7 +704,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev)
{
struct ivpu_hw_info *hw = vdev->hw;
u32 tile_disable;
- u32 tile_enable;
u32 fuse;
fuse = REGB_RD32(VPU_40XX_BUTTRESS_TILE_FUSE);
@@ -725,10 +724,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev)
else
ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", TILE_MAX_NUM);
- tile_enable = (~tile_disable) & TILE_MAX_MASK;
-
- hw->sku = REG_SET_FLD_NUM(SKU, HW_ID, LNL_HW_ID, hw->sku);
- hw->sku = REG_SET_FLD_NUM(SKU, TILE, tile_enable, hw->sku);
hw->tile_fuse = tile_disable;
hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
@@ -746,7 +741,7 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev)
return 0;
}
-static int ivpu_hw_40xx_reset(struct ivpu_device *vdev)
+static int ivpu_hw_40xx_ip_reset(struct ivpu_device *vdev)
{
int ret;
u32 val;
@@ -768,6 +763,23 @@ static int ivpu_hw_40xx_reset(struct ivpu_device *vdev)
return ret;
}
+static int ivpu_hw_40xx_reset(struct ivpu_device *vdev)
+{
+ int ret = 0;
+
+ if (ivpu_hw_40xx_ip_reset(vdev)) {
+ ivpu_err(vdev, "Failed to reset VPU IP\n");
+ ret = -EIO;
+ }
+
+ if (ivpu_pll_disable(vdev)) {
+ ivpu_err(vdev, "Failed to disable PLL\n");
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
static int ivpu_hw_40xx_d0i3_enable(struct ivpu_device *vdev)
{
int ret;
@@ -913,7 +925,7 @@ static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev)
ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev);
- if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_reset(vdev))
+ if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_ip_reset(vdev))
ivpu_warn(vdev, "Failed to reset the VPU\n");
if (ivpu_pll_disable(vdev)) {
@@ -1032,18 +1044,18 @@ static void ivpu_hw_40xx_irq_disable(struct ivpu_device *vdev)
static void ivpu_hw_40xx_irq_wdt_nce_handler(struct ivpu_device *vdev)
{
/* TODO: For LNN hang consider engine reset instead of full recovery */
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
}
static void ivpu_hw_40xx_irq_wdt_mss_handler(struct ivpu_device *vdev)
{
ivpu_hw_wdt_disable(vdev);
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
}
static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
{
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
}
/* Handler for IRQs from VPU core (irqV) */
@@ -1137,7 +1149,7 @@ static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status);
if (schedule_recovery)
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");
return true;
}
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index e86621f16f85..fa66c39b57ec 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -343,10 +343,8 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r
hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
vdev->timeout.jsm);
- if (hb_ret == -ETIMEDOUT) {
- ivpu_hw_diagnose_failure(vdev);
- ivpu_pm_schedule_recovery(vdev);
- }
+ if (hb_ret == -ETIMEDOUT)
+ ivpu_pm_trigger_recovery(vdev, "IPC timeout");
return ret;
}
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 7206cf9cdb4a..e70cfb859339 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -112,22 +112,20 @@ static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engin
}
}
-void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv)
+void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
{
int i;
- mutex_lock(&file_priv->lock);
+ lockdep_assert_held(&file_priv->lock);
for (i = 0; i < IVPU_NUM_ENGINES; i++)
ivpu_cmdq_release_locked(file_priv, i);
-
- mutex_unlock(&file_priv->lock);
}
/*
* Mark the doorbell as unregistered and reset job queue pointers.
* This function needs to be called when the VPU hardware is restarted
- * and FW looses job queue state. The next time job queue is used it
+ * and FW loses job queue state. The next time job queue is used it
* will be registered again.
*/
static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine)
@@ -161,15 +159,13 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev)
struct ivpu_file_priv *file_priv;
unsigned long ctx_id;
- xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
- file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
- if (!file_priv)
- continue;
+ mutex_lock(&vdev->context_list_lock);
+ xa_for_each(&vdev->context_xa, ctx_id, file_priv)
ivpu_cmdq_reset_all(file_priv);
- ivpu_file_priv_put(&file_priv);
- }
+ mutex_unlock(&vdev->context_list_lock);
+
}
static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
@@ -243,60 +239,32 @@ static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev)
return &fence->base;
}
-static void job_get(struct ivpu_job *job, struct ivpu_job **link)
+static void ivpu_job_destroy(struct ivpu_job *job)
{
struct ivpu_device *vdev = job->vdev;
-
- kref_get(&job->ref);
- *link = job;
-
- ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
-}
-
-static void job_release(struct kref *ref)
-{
- struct ivpu_job *job = container_of(ref, struct ivpu_job, ref);
- struct ivpu_device *vdev = job->vdev;
u32 i;
+ ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d",
+ job->job_id, job->file_priv->ctx.id, job->engine_idx);
+
for (i = 0; i < job->bo_count; i++)
if (job->bos[i])
drm_gem_object_put(&job->bos[i]->base.base);
dma_fence_put(job->done_fence);
ivpu_file_priv_put(&job->file_priv);
-
- ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id);
kfree(job);
-
- /* Allow the VPU to get suspended, must be called after ivpu_file_priv_put() */
- ivpu_rpm_put(vdev);
-}
-
-static void job_put(struct ivpu_job *job)
-{
- struct ivpu_device *vdev = job->vdev;
-
- ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
- kref_put(&job->ref, job_release);
}
static struct ivpu_job *
-ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
+ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
{
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_job *job;
- int ret;
-
- ret = ivpu_rpm_get(vdev);
- if (ret < 0)
- return NULL;
job = kzalloc(struct_size(job, bos, bo_count), GFP_KERNEL);
if (!job)
- goto err_rpm_put;
-
- kref_init(&job->ref);
+ return NULL;
job->vdev = vdev;
job->engine_idx = engine_idx;
@@ -310,17 +278,14 @@ ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
job->file_priv = ivpu_file_priv_get(file_priv);
ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx);
-
return job;
err_free_job:
kfree(job);
-err_rpm_put:
- ivpu_rpm_put(vdev);
return NULL;
}
-static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
{
struct ivpu_job *job;
@@ -329,7 +294,7 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
return -ENOENT;
if (job->file_priv->has_mmu_faults)
- job_status = VPU_JSM_STATUS_ABORTED;
+ job_status = DRM_IVPU_JOB_STATUS_ABORTED;
job->bos[CMD_BUF_IDX]->job_status = job_status;
dma_fence_signal(job->done_fence);
@@ -337,9 +302,10 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+ ivpu_job_destroy(job);
ivpu_stop_job_timeout_detection(vdev);
- job_put(job);
+ ivpu_rpm_put(vdev);
return 0;
}
@@ -349,10 +315,10 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
unsigned long id;
xa_for_each(&vdev->submitted_jobs_xa, id, job)
- ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED);
+ ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
}
-static int ivpu_direct_job_submission(struct ivpu_job *job)
+static int ivpu_job_submit(struct ivpu_job *job)
{
struct ivpu_file_priv *file_priv = job->file_priv;
struct ivpu_device *vdev = job->vdev;
@@ -360,53 +326,65 @@ static int ivpu_direct_job_submission(struct ivpu_job *job)
struct ivpu_cmdq *cmdq;
int ret;
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
mutex_lock(&file_priv->lock);
cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx);
if (!cmdq) {
- ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n",
- file_priv->ctx.id, job->engine_idx);
+ ivpu_warn_ratelimited(vdev, "Failed get job queue, ctx %d engine %d\n",
+ file_priv->ctx.id, job->engine_idx);
ret = -EINVAL;
- goto err_unlock;
+ goto err_unlock_file_priv;
}
job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK;
- job_get(job, &job);
- ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
+ xa_lock(&vdev->submitted_jobs_xa);
+ ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
if (ret) {
- ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret);
- goto err_job_put;
+ ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
+ file_priv->ctx.id);
+ ret = -EBUSY;
+ goto err_unlock_submitted_jobs_xa;
}
ret = ivpu_cmdq_push_job(cmdq, job);
if (ret)
- goto err_xa_erase;
+ goto err_erase_xa;
ivpu_start_job_timeout_detection(vdev);
- ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n",
- job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id,
- job->engine_idx, cmdq->jobq->header.tail);
-
- if (ivpu_test_mode & IVPU_TEST_MODE_NULL_HW) {
- ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
+ if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
cmdq->jobq->header.head = cmdq->jobq->header.tail;
wmb(); /* Flush WC buffer for jobq header */
} else {
ivpu_cmdq_ring_db(vdev, cmdq);
}
+ ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d addr 0x%llx next %d\n",
+ job->job_id, file_priv->ctx.id, job->engine_idx,
+ job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
+
+ xa_unlock(&vdev->submitted_jobs_xa);
+
mutex_unlock(&file_priv->lock);
+
+ if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW))
+ ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
+
return 0;
-err_xa_erase:
- xa_erase(&vdev->submitted_jobs_xa, job->job_id);
-err_job_put:
- job_put(job);
-err_unlock:
+err_erase_xa:
+ __xa_erase(&vdev->submitted_jobs_xa, job->job_id);
+err_unlock_submitted_jobs_xa:
+ xa_unlock(&vdev->submitted_jobs_xa);
+err_unlock_file_priv:
mutex_unlock(&file_priv->lock);
+ ivpu_rpm_put(vdev);
return ret;
}
@@ -488,6 +466,9 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (params->engine > DRM_IVPU_ENGINE_COPY)
return -EINVAL;
+ if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
+ return -EINVAL;
+
if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT)
return -EINVAL;
@@ -509,44 +490,49 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
params->buffer_count * sizeof(u32));
if (ret) {
ret = -EFAULT;
- goto free_handles;
+ goto err_free_handles;
}
if (!drm_dev_enter(&vdev->drm, &idx)) {
ret = -ENODEV;
- goto free_handles;
+ goto err_free_handles;
}
ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n",
file_priv->ctx.id, params->buffer_count);
- job = ivpu_create_job(file_priv, params->engine, params->buffer_count);
+ job = ivpu_job_create(file_priv, params->engine, params->buffer_count);
if (!job) {
ivpu_err(vdev, "Failed to create job\n");
ret = -ENOMEM;
- goto dev_exit;
+ goto err_exit_dev;
}
ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count,
params->commands_offset);
if (ret) {
- ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret);
- goto job_put;
+ ivpu_err(vdev, "Failed to prepare job: %d\n", ret);
+ goto err_destroy_job;
}
- ret = ivpu_direct_job_submission(job);
- if (ret) {
- dma_fence_signal(job->done_fence);
- ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret);
- }
+ down_read(&vdev->pm->reset_lock);
+ ret = ivpu_job_submit(job);
+ up_read(&vdev->pm->reset_lock);
+ if (ret)
+ goto err_signal_fence;
-job_put:
- job_put(job);
-dev_exit:
drm_dev_exit(idx);
-free_handles:
kfree(buf_handles);
+ return ret;
+err_signal_fence:
+ dma_fence_signal(job->done_fence);
+err_destroy_job:
+ ivpu_job_destroy(job);
+err_exit_dev:
+ drm_dev_exit(idx);
+err_free_handles:
+ kfree(buf_handles);
return ret;
}
@@ -568,7 +554,7 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
}
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
- ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
+ ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
ivpu_start_job_timeout_detection(vdev);
}
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index 45a2f2ec82e5..ca4984071cc7 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -43,7 +43,6 @@ struct ivpu_cmdq {
will update the job status
*/
struct ivpu_job {
- struct kref ref;
struct ivpu_device *vdev;
struct ivpu_file_priv *file_priv;
struct dma_fence *done_fence;
@@ -56,7 +55,7 @@ struct ivpu_job {
int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
-void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv);
+void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv);
void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 2228c44b115f..91bd640655ab 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -7,6 +7,7 @@
#include <linux/highmem.h>
#include "ivpu_drv.h"
+#include "ivpu_hw.h"
#include "ivpu_hw_reg_io.h"
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
@@ -71,10 +72,10 @@
#define IVPU_MMU_Q_COUNT_LOG2 4 /* 16 entries */
#define IVPU_MMU_Q_COUNT ((u32)1 << IVPU_MMU_Q_COUNT_LOG2)
-#define IVPU_MMU_Q_WRAP_BIT (IVPU_MMU_Q_COUNT << 1)
-#define IVPU_MMU_Q_WRAP_MASK (IVPU_MMU_Q_WRAP_BIT - 1)
-#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1)
+#define IVPU_MMU_Q_WRAP_MASK GENMASK(IVPU_MMU_Q_COUNT_LOG2, 0)
+#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1)
#define IVPU_MMU_Q_IDX(val) ((val) & IVPU_MMU_Q_IDX_MASK)
+#define IVPU_MMU_Q_WRP(val) ((val) & IVPU_MMU_Q_COUNT)
#define IVPU_MMU_CMDQ_CMD_SIZE 16
#define IVPU_MMU_CMDQ_SIZE (IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE)
@@ -474,20 +475,32 @@ static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev)
return 0;
}
+static bool ivpu_mmu_queue_is_full(struct ivpu_mmu_queue *q)
+{
+ return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) &&
+ (IVPU_MMU_Q_WRP(q->prod) != IVPU_MMU_Q_WRP(q->cons)));
+}
+
+static bool ivpu_mmu_queue_is_empty(struct ivpu_mmu_queue *q)
+{
+ return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) &&
+ (IVPU_MMU_Q_WRP(q->prod) == IVPU_MMU_Q_WRP(q->cons)));
+}
+
static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1)
{
- struct ivpu_mmu_queue *q = &vdev->mmu->cmdq;
- u64 *queue_buffer = q->base;
- int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer));
+ struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq;
+ u64 *queue_buffer = cmdq->base;
+ int idx = IVPU_MMU_Q_IDX(cmdq->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer));
- if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) {
+ if (ivpu_mmu_queue_is_full(cmdq)) {
ivpu_err(vdev, "Failed to write MMU CMD %s\n", name);
return -EBUSY;
}
queue_buffer[idx] = data0;
queue_buffer[idx + 1] = data1;
- q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK;
+ cmdq->prod = (cmdq->prod + 1) & IVPU_MMU_Q_WRAP_MASK;
ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1);
@@ -518,6 +531,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
ivpu_err(vdev, "Timed out waiting for MMU consumer: %d, error: %s\n", ret,
ivpu_mmu_cmdq_err_to_str(err));
+ ivpu_hw_diagnose_failure(vdev);
}
return ret;
@@ -558,7 +572,6 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
mmu->cmdq.cons = 0;
memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE);
- clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE);
mmu->evtq.prod = 0;
mmu->evtq.cons = 0;
@@ -872,20 +885,15 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE);
evtq->prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC);
- if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT))
+ if (ivpu_mmu_queue_is_empty(evtq))
return NULL;
- clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE);
-
evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK;
- REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, evtq->cons);
-
return evt;
}
void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
{
- bool schedule_recovery = false;
u32 *event;
u32 ssid;
@@ -895,14 +903,22 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
ivpu_mmu_dump_event(vdev, event);
ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
- if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
- schedule_recovery = true;
- else
- ivpu_mmu_user_context_mark_invalid(vdev, ssid);
+ if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) {
+ ivpu_pm_trigger_recovery(vdev, "MMU event");
+ return;
+ }
+
+ ivpu_mmu_user_context_mark_invalid(vdev, ssid);
+ REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
}
+}
- if (schedule_recovery)
- ivpu_pm_schedule_recovery(vdev);
+void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)
+{
+ u32 *event;
+
+ while ((event = ivpu_mmu_get_event(vdev)) != NULL)
+ ivpu_mmu_dump_event(vdev, event);
}
void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h
index cb551126806b..6fa35c240710 100644
--- a/drivers/accel/ivpu/ivpu_mmu.h
+++ b/drivers/accel/ivpu/ivpu_mmu.h
@@ -46,5 +46,6 @@ int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid);
void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev);
void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev);
+void ivpu_mmu_evtq_dump(struct ivpu_device *vdev);
#endif /* __IVPU_MMU_H__ */
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index 12a8c09d4547..fe6161299236 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -355,6 +355,9 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset;
size_t size = sg_dma_len(sg) + sg->offset;
+ ivpu_dbg(vdev, MMU_MAP, "Map ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n",
+ ctx->id, dma_addr, vpu_addr, size);
+
ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
if (ret) {
ivpu_err(vdev, "Failed to map context pages\n");
@@ -366,6 +369,7 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
/* Ensure page table modifications are flushed from wc buffers to memory */
wmb();
+
mutex_unlock(&ctx->lock);
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
@@ -388,14 +392,19 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
mutex_lock(&ctx->lock);
for_each_sgtable_dma_sg(sgt, sg, i) {
+ dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset;
size_t size = sg_dma_len(sg) + sg->offset;
+ ivpu_dbg(vdev, MMU_MAP, "Unmap ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n",
+ ctx->id, dma_addr, vpu_addr, size);
+
ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
vpu_addr += size;
}
/* Ensure page table modifications are flushed from wc buffers to memory */
wmb();
+
mutex_unlock(&ctx->lock);
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 0af8864cb3b5..5f73854234ba 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -13,6 +13,7 @@
#include "ivpu_drv.h"
#include "ivpu_hw.h"
#include "ivpu_fw.h"
+#include "ivpu_fw_log.h"
#include "ivpu_ipc.h"
#include "ivpu_job.h"
#include "ivpu_jsm_msg.h"
@@ -57,11 +58,14 @@ static int ivpu_suspend(struct ivpu_device *vdev)
{
int ret;
+ /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
+ pci_save_state(to_pci_dev(vdev->drm.dev));
+
ret = ivpu_shutdown(vdev);
- if (ret) {
+ if (ret)
ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
- return ret;
- }
+
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
return ret;
}
@@ -70,6 +74,9 @@ static int ivpu_resume(struct ivpu_device *vdev)
{
int ret;
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+ pci_restore_state(to_pci_dev(vdev->drm.dev));
+
retry:
ret = ivpu_hw_power_up(vdev);
if (ret) {
@@ -111,22 +118,37 @@ static void ivpu_pm_recovery_work(struct work_struct *work)
char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
int ret;
-retry:
- ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
- if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) {
- cond_resched();
- goto retry;
- }
+ ivpu_err(vdev, "Recovering the VPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
- if (ret && ret != -EAGAIN)
- ivpu_err(vdev, "Failed to reset VPU: %d\n", ret);
+ ret = pm_runtime_resume_and_get(vdev->drm.dev);
+ if (ret)
+ ivpu_err(vdev, "Failed to resume VPU: %d\n", ret);
+
+ ivpu_fw_log_dump(vdev);
+
+ atomic_inc(&vdev->pm->reset_counter);
+ atomic_set(&vdev->pm->reset_pending, 1);
+ down_write(&vdev->pm->reset_lock);
+
+ ivpu_suspend(vdev);
+ ivpu_pm_prepare_cold_boot(vdev);
+ ivpu_jobs_abort_all(vdev);
+
+ ret = ivpu_resume(vdev);
+ if (ret)
+ ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
+
+ up_write(&vdev->pm->reset_lock);
+ atomic_set(&vdev->pm->reset_pending, 0);
kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
+ pm_runtime_mark_last_busy(vdev->drm.dev);
+ pm_runtime_put_autosuspend(vdev->drm.dev);
}
-void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
+void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
{
- struct ivpu_pm_info *pm = vdev->pm;
+ ivpu_err(vdev, "Recovery triggered by %s\n", reason);
if (ivpu_disable_recovery) {
ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n");
@@ -138,10 +160,11 @@ void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
return;
}
- /* Schedule recovery if it's not in progress */
- if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) {
- ivpu_hw_irq_disable(vdev);
- queue_work(system_long_wq, &pm->recovery_work);
+ /* Trigger recovery if it's not in progress */
+ if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) {
+ ivpu_hw_diagnose_failure(vdev);
+ ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */
+ queue_work(system_long_wq, &vdev->pm->recovery_work);
}
}
@@ -149,12 +172,8 @@ static void ivpu_job_timeout_work(struct work_struct *work)
{
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
struct ivpu_device *vdev = pm->vdev;
- unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
-
- ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms);
- ivpu_hw_diagnose_failure(vdev);
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "TDR");
}
void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
@@ -192,9 +211,6 @@ int ivpu_pm_suspend_cb(struct device *dev)
ivpu_suspend(vdev);
ivpu_pm_prepare_warm_boot(vdev);
- pci_save_state(to_pci_dev(dev));
- pci_set_power_state(to_pci_dev(dev), PCI_D3hot);
-
ivpu_dbg(vdev, PM, "Suspend done.\n");
return 0;
@@ -208,9 +224,6 @@ int ivpu_pm_resume_cb(struct device *dev)
ivpu_dbg(vdev, PM, "Resume..\n");
- pci_set_power_state(to_pci_dev(dev), PCI_D0);
- pci_restore_state(to_pci_dev(dev));
-
ret = ivpu_resume(vdev);
if (ret)
ivpu_err(vdev, "Failed to resume: %d\n", ret);
@@ -227,6 +240,9 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
bool hw_is_idle = true;
int ret;
+ drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
+ drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work));
+
ivpu_dbg(vdev, PM, "Runtime suspend..\n");
if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) {
@@ -247,7 +263,8 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
if (!hw_is_idle) {
- ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n");
+ ivpu_err(vdev, "VPU failed to enter idle, force suspended.\n");
+ ivpu_fw_log_dump(vdev);
ivpu_pm_prepare_cold_boot(vdev);
} else {
ivpu_pm_prepare_warm_boot(vdev);
@@ -308,11 +325,12 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
{
struct ivpu_device *vdev = pci_get_drvdata(pdev);
- pm_runtime_get_sync(vdev->drm.dev);
-
ivpu_dbg(vdev, PM, "Pre-reset..\n");
atomic_inc(&vdev->pm->reset_counter);
- atomic_set(&vdev->pm->in_reset, 1);
+ atomic_set(&vdev->pm->reset_pending, 1);
+
+ pm_runtime_get_sync(vdev->drm.dev);
+ down_write(&vdev->pm->reset_lock);
ivpu_prepare_for_reset(vdev);
ivpu_hw_reset(vdev);
ivpu_pm_prepare_cold_boot(vdev);
@@ -329,9 +347,11 @@ void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
ret = ivpu_resume(vdev);
if (ret)
ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
- atomic_set(&vdev->pm->in_reset, 0);
+ up_write(&vdev->pm->reset_lock);
+ atomic_set(&vdev->pm->reset_pending, 0);
ivpu_dbg(vdev, PM, "Post-reset done.\n");
+ pm_runtime_mark_last_busy(vdev->drm.dev);
pm_runtime_put_autosuspend(vdev->drm.dev);
}
@@ -344,7 +364,10 @@ void ivpu_pm_init(struct ivpu_device *vdev)
pm->vdev = vdev;
pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
- atomic_set(&pm->in_reset, 0);
+ init_rwsem(&pm->reset_lock);
+ atomic_set(&pm->reset_pending, 0);
+ atomic_set(&pm->reset_counter, 0);
+
INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work);
diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h
index 97c6e0b0aa42..ec60fbeefefc 100644
--- a/drivers/accel/ivpu/ivpu_pm.h
+++ b/drivers/accel/ivpu/ivpu_pm.h
@@ -6,6 +6,7 @@
#ifndef __IVPU_PM_H__
#define __IVPU_PM_H__
+#include <linux/rwsem.h>
#include <linux/types.h>
struct ivpu_device;
@@ -14,8 +15,9 @@ struct ivpu_pm_info {
struct ivpu_device *vdev;
struct delayed_work job_timeout_work;
struct work_struct recovery_work;
- atomic_t in_reset;
+ struct rw_semaphore reset_lock;
atomic_t reset_counter;
+ atomic_t reset_pending;
bool is_warmboot;
u32 suspend_reschedule_counter;
};
@@ -37,7 +39,7 @@ int __must_check ivpu_rpm_get(struct ivpu_device *vdev);
int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev);
void ivpu_rpm_put(struct ivpu_device *vdev);
-void ivpu_pm_schedule_recovery(struct ivpu_device *vdev);
+void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason);
void ivpu_start_job_timeout_detection(struct ivpu_device *vdev);
void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 7b7c605166e0..ab2a82cb1b0b 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -26,7 +26,6 @@
#include <linux/interrupt.h>
#include <linux/timer.h>
#include <linux/cper.h>
-#include <linux/cxl-event.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/ratelimit.h>
@@ -674,78 +673,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}
-/*
- * Only a single callback can be registered for CXL CPER events.
- */
-static DECLARE_RWSEM(cxl_cper_rw_sem);
-static cxl_cper_callback cper_callback;
-
-/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */
-
-/*
- * General Media Event Record
- * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
- */
-#define CPER_SEC_CXL_GEN_MEDIA_GUID \
- GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \
- 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6)
-
-/*
- * DRAM Event Record
- * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
- */
-#define CPER_SEC_CXL_DRAM_GUID \
- GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \
- 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24)
-
-/*
- * Memory Module Event Record
- * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
- */
-#define CPER_SEC_CXL_MEM_MODULE_GUID \
- GUID_INIT(0xfe927475, 0xdd59, 0x4339, \
- 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74)
-
-static void cxl_cper_post_event(enum cxl_event_type event_type,
- struct cxl_cper_event_rec *rec)
-{
- if (rec->hdr.length <= sizeof(rec->hdr) ||
- rec->hdr.length > sizeof(*rec)) {
- pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
- rec->hdr.length);
- return;
- }
-
- if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
- pr_err(FW_WARN "CXL CPER invalid event\n");
- return;
- }
-
- guard(rwsem_read)(&cxl_cper_rw_sem);
- if (cper_callback)
- cper_callback(event_type, rec);
-}
-
-int cxl_cper_register_callback(cxl_cper_callback callback)
-{
- guard(rwsem_write)(&cxl_cper_rw_sem);
- if (cper_callback)
- return -EINVAL;
- cper_callback = callback;
- return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL);
-
-int cxl_cper_unregister_callback(cxl_cper_callback callback)
-{
- guard(rwsem_write)(&cxl_cper_rw_sem);
- if (callback != cper_callback)
- return -EINVAL;
- cper_callback = NULL;
- return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL);
-
static bool ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
@@ -780,22 +707,6 @@ static bool ghes_do_proc(struct ghes *ghes,
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
queued = ghes_handle_arm_hw_error(gdata, sev, sync);
- } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
- struct cxl_cper_event_rec *rec =
- acpi_hest_get_payload(gdata);
-
- cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
- } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
- struct cxl_cper_event_rec *rec =
- acpi_hest_get_payload(gdata);
-
- cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
- } else if (guid_equal(sec_type,
- &CPER_SEC_CXL_MEM_MODULE_GUID)) {
- struct cxl_cper_event_rec *rec =
- acpi_hest_get_payload(gdata);
-
- cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
} else {
void *err = acpi_hest_get_payload(gdata);
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index dbdee2924594..02255795b800 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -525,10 +525,12 @@ static void acpi_ec_clear(struct acpi_ec *ec)
static void acpi_ec_enable_event(struct acpi_ec *ec)
{
- spin_lock(&ec->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ec->lock, flags);
if (acpi_ec_started(ec))
__acpi_ec_enable_event(ec);
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
/* Drain additional events if hardware requires that */
if (EC_FLAGS_CLEAR_ON_RESUME)
@@ -544,9 +546,11 @@ static void __acpi_ec_flush_work(void)
static void acpi_ec_disable_event(struct acpi_ec *ec)
{
- spin_lock(&ec->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ec->lock, flags);
__acpi_ec_disable_event(ec);
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
/*
* When ec_freeze_events is true, we need to flush events in
@@ -567,9 +571,10 @@ void acpi_ec_flush_work(void)
static bool acpi_ec_guard_event(struct acpi_ec *ec)
{
+ unsigned long flags;
bool guarded;
- spin_lock(&ec->lock);
+ spin_lock_irqsave(&ec->lock, flags);
/*
* If firmware SCI_EVT clearing timing is "event", we actually
* don't know when the SCI_EVT will be cleared by firmware after
@@ -585,29 +590,31 @@ static bool acpi_ec_guard_event(struct acpi_ec *ec)
guarded = ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT &&
ec->event_state != EC_EVENT_READY &&
(!ec->curr || ec->curr->command != ACPI_EC_COMMAND_QUERY);
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
return guarded;
}
static int ec_transaction_polled(struct acpi_ec *ec)
{
+ unsigned long flags;
int ret = 0;
- spin_lock(&ec->lock);
+ spin_lock_irqsave(&ec->lock, flags);
if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_POLL))
ret = 1;
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
return ret;
}
static int ec_transaction_completed(struct acpi_ec *ec)
{
+ unsigned long flags;
int ret = 0;
- spin_lock(&ec->lock);
+ spin_lock_irqsave(&ec->lock, flags);
if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_COMPLETE))
ret = 1;
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
return ret;
}
@@ -749,6 +756,7 @@ static int ec_guard(struct acpi_ec *ec)
static int ec_poll(struct acpi_ec *ec)
{
+ unsigned long flags;
int repeat = 5; /* number of command restarts */
while (repeat--) {
@@ -757,14 +765,14 @@ static int ec_poll(struct acpi_ec *ec)
do {
if (!ec_guard(ec))
return 0;
- spin_lock(&ec->lock);
+ spin_lock_irqsave(&ec->lock, flags);
advance_transaction(ec, false);
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
} while (time_before(jiffies, delay));
pr_debug("controller reset, restart transaction\n");
- spin_lock(&ec->lock);
+ spin_lock_irqsave(&ec->lock, flags);
start_transaction(ec);
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
}
return -ETIME;
}
@@ -772,10 +780,11 @@ static int ec_poll(struct acpi_ec *ec)
static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
struct transaction *t)
{
+ unsigned long tmp;
int ret = 0;
/* start transaction */
- spin_lock(&ec->lock);
+ spin_lock_irqsave(&ec->lock, tmp);
/* Enable GPE for command processing (IBF=0/OBF=1) */
if (!acpi_ec_submit_flushable_request(ec)) {
ret = -EINVAL;
@@ -786,11 +795,11 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
ec->curr = t;
ec_dbg_req("Command(%s) started", acpi_ec_cmd_string(t->command));
start_transaction(ec);
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, tmp);
ret = ec_poll(ec);
- spin_lock(&ec->lock);
+ spin_lock_irqsave(&ec->lock, tmp);
if (t->irq_count == ec_storm_threshold)
acpi_ec_unmask_events(ec);
ec_dbg_req("Command(%s) stopped", acpi_ec_cmd_string(t->command));
@@ -799,7 +808,7 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
acpi_ec_complete_request(ec);
ec_dbg_ref(ec, "Decrease command");
unlock:
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, tmp);
return ret;
}
@@ -927,7 +936,9 @@ EXPORT_SYMBOL(ec_get_handle);
static void acpi_ec_start(struct acpi_ec *ec, bool resuming)
{
- spin_lock(&ec->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ec->lock, flags);
if (!test_and_set_bit(EC_FLAGS_STARTED, &ec->flags)) {
ec_dbg_drv("Starting EC");
/* Enable GPE for event processing (SCI_EVT=1) */
@@ -937,28 +948,31 @@ static void acpi_ec_start(struct acpi_ec *ec, bool resuming)
}
ec_log_drv("EC started");
}
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
}
static bool acpi_ec_stopped(struct acpi_ec *ec)
{
+ unsigned long flags;
bool flushed;
- spin_lock(&ec->lock);
+ spin_lock_irqsave(&ec->lock, flags);
flushed = acpi_ec_flushed(ec);
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
return flushed;
}
static void acpi_ec_stop(struct acpi_ec *ec, bool suspending)
{
- spin_lock(&ec->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ec->lock, flags);
if (acpi_ec_started(ec)) {
ec_dbg_drv("Stopping EC");
set_bit(EC_FLAGS_STOPPED, &ec->flags);
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
wait_event(ec->wait, acpi_ec_stopped(ec));
- spin_lock(&ec->lock);
+ spin_lock_irqsave(&ec->lock, flags);
/* Disable GPE for event processing (SCI_EVT=1) */
if (!suspending) {
acpi_ec_complete_request(ec);
@@ -969,25 +983,29 @@ static void acpi_ec_stop(struct acpi_ec *ec, bool suspending)
clear_bit(EC_FLAGS_STOPPED, &ec->flags);
ec_log_drv("EC stopped");
}
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
}
static void acpi_ec_enter_noirq(struct acpi_ec *ec)
{
- spin_lock(&ec->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ec->lock, flags);
ec->busy_polling = true;
ec->polling_guard = 0;
ec_log_drv("interrupt blocked");
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
}
static void acpi_ec_leave_noirq(struct acpi_ec *ec)
{
- spin_lock(&ec->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ec->lock, flags);
ec->busy_polling = ec_busy_polling;
ec->polling_guard = ec_polling_guard;
ec_log_drv("interrupt unblocked");
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
}
void acpi_ec_block_transactions(void)
@@ -1119,9 +1137,9 @@ static void acpi_ec_event_processor(struct work_struct *work)
ec_dbg_evt("Query(0x%02x) stopped", handler->query_bit);
- spin_lock(&ec->lock);
+ spin_lock_irq(&ec->lock);
ec->queries_in_progress--;
- spin_unlock(&ec->lock);
+ spin_unlock_irq(&ec->lock);
acpi_ec_put_query_handler(handler);
kfree(q);
@@ -1184,12 +1202,12 @@ static int acpi_ec_submit_query(struct acpi_ec *ec)
*/
ec_dbg_evt("Query(0x%02x) scheduled", value);
- spin_lock(&ec->lock);
+ spin_lock_irq(&ec->lock);
ec->queries_in_progress++;
queue_work(ec_query_wq, &q->work);
- spin_unlock(&ec->lock);
+ spin_unlock_irq(&ec->lock);
return 0;
@@ -1205,14 +1223,14 @@ static void acpi_ec_event_handler(struct work_struct *work)
ec_dbg_evt("Event started");
- spin_lock(&ec->lock);
+ spin_lock_irq(&ec->lock);
while (ec->events_to_process) {
- spin_unlock(&ec->lock);
+ spin_unlock_irq(&ec->lock);
acpi_ec_submit_query(ec);
- spin_lock(&ec->lock);
+ spin_lock_irq(&ec->lock);
ec->events_to_process--;
}
@@ -1229,11 +1247,11 @@ static void acpi_ec_event_handler(struct work_struct *work)
ec_dbg_evt("Event stopped");
- spin_unlock(&ec->lock);
+ spin_unlock_irq(&ec->lock);
guard_timeout = !!ec_guard(ec);
- spin_lock(&ec->lock);
+ spin_lock_irq(&ec->lock);
/* Take care of SCI_EVT unless someone else is doing that. */
if (guard_timeout && !ec->curr)
@@ -1246,7 +1264,7 @@ static void acpi_ec_event_handler(struct work_struct *work)
ec->events_in_progress--;
- spin_unlock(&ec->lock);
+ spin_unlock_irq(&ec->lock);
}
static void clear_gpe_and_advance_transaction(struct acpi_ec *ec, bool interrupt)
@@ -1271,11 +1289,13 @@ static void clear_gpe_and_advance_transaction(struct acpi_ec *ec, bool interrupt
static void acpi_ec_handle_interrupt(struct acpi_ec *ec)
{
- spin_lock(&ec->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ec->lock, flags);
clear_gpe_and_advance_transaction(ec, true);
- spin_unlock(&ec->lock);
+ spin_unlock_irqrestore(&ec->lock, flags);
}
static u32 acpi_ec_gpe_handler(acpi_handle gpe_device,
@@ -2085,7 +2105,7 @@ bool acpi_ec_dispatch_gpe(void)
* Dispatch the EC GPE in-band, but do not report wakeup in any case
* to allow the caller to process events properly after that.
*/
- spin_lock(&first_ec->lock);
+ spin_lock_irq(&first_ec->lock);
if (acpi_ec_gpe_status_set(first_ec)) {
pm_pr_dbg("ACPI EC GPE status set\n");
@@ -2094,7 +2114,7 @@ bool acpi_ec_dispatch_gpe(void)
work_in_progress = acpi_ec_work_in_progress(first_ec);
}
- spin_unlock(&first_ec->lock);
+ spin_unlock_irq(&first_ec->lock);
if (!work_in_progress)
return false;
@@ -2107,11 +2127,11 @@ bool acpi_ec_dispatch_gpe(void)
pm_pr_dbg("ACPI EC work flushed\n");
- spin_lock(&first_ec->lock);
+ spin_lock_irq(&first_ec->lock);
work_in_progress = acpi_ec_work_in_progress(first_ec);
- spin_unlock(&first_ec->lock);
+ spin_unlock_irq(&first_ec->lock);
} while (work_in_progress && !pm_wakeup_pending());
return false;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 8dd23b19e997..eca24f41556d 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -478,6 +478,16 @@ binder_enqueue_thread_work_ilocked(struct binder_thread *thread,
{
WARN_ON(!list_empty(&thread->waiting_thread_node));
binder_enqueue_work_ilocked(work, &thread->todo);
+
+ /* (e)poll-based threads require an explicit wakeup signal when
+ * queuing their own work; they rely on these events to consume
+ * messages without I/O block. Without it, threads risk waiting
+ * indefinitely without handling the work.
+ */
+ if (thread->looper & BINDER_LOOPER_STATE_POLL &&
+ thread->pid == current->pid && !thread->process_todo)
+ wake_up_interruptible_sync(&thread->wait);
+
thread->process_todo = true;
}
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 42b51c9812a0..928ec93c6b45 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -116,15 +116,14 @@ config SATA_AHCI
If unsure, say N.
config SATA_MOBILE_LPM_POLICY
- int "Default SATA Link Power Management policy for low power chipsets"
+ int "Default SATA Link Power Management policy"
range 0 4
default 0
depends on SATA_AHCI
help
Select the Default SATA Link Power Management (LPM) policy to use
for chipsets / "South Bridges" supporting low-power modes. Such
- chipsets are typically found on most laptops but desktops and
- servers now also widely use chipsets supporting low power modes.
+ chipsets are ubiquitous across laptops, desktops and servers.
The value set has the following meanings:
0 => Keep firmware settings
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 3a5f3255f51b..78570684ff68 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -48,12 +48,20 @@ enum {
enum board_ids {
/* board IDs by feature in alphabetical order */
board_ahci,
+ board_ahci_43bit_dma,
board_ahci_ign_iferr,
- board_ahci_low_power,
board_ahci_no_debounce_delay,
- board_ahci_nomsi,
- board_ahci_noncq,
- board_ahci_nosntf,
+ board_ahci_no_msi,
+ /*
+ * board_ahci_pcs_quirk is for legacy Intel platforms.
+ * Modern Intel platforms should use board_ahci instead.
+ * (Some modern Intel platforms might have been added with
+ * board_ahci_pcs_quirk, however, we cannot change them to board_ahci
+ * without testing that the platform actually works without the quirk.)
+ */
+ board_ahci_pcs_quirk,
+ board_ahci_pcs_quirk_no_devslp,
+ board_ahci_pcs_quirk_no_sntf,
board_ahci_yes_fbs,
/* board IDs for specific chipsets in alphabetical order */
@@ -67,12 +75,6 @@ enum board_ids {
board_ahci_sb700, /* for SB700 and SB800 */
board_ahci_vt8251,
- /*
- * board IDs for Intel chipsets that support more than 6 ports
- * *and* end up needing the PCS quirk.
- */
- board_ahci_pcs7,
-
/* aliases */
board_ahci_mcp_linux = board_ahci_mcp65,
board_ahci_mcp67 = board_ahci_mcp65,
@@ -128,15 +130,15 @@ static const struct ata_port_info ahci_port_info[] = {
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_ops,
},
- [board_ahci_ign_iferr] = {
- AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR),
+ [board_ahci_43bit_dma] = {
+ AHCI_HFLAGS (AHCI_HFLAG_43BIT_ONLY),
.flags = AHCI_FLAG_COMMON,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_ops,
},
- [board_ahci_low_power] = {
- AHCI_HFLAGS (AHCI_HFLAG_USE_LPM_POLICY),
+ [board_ahci_ign_iferr] = {
+ AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR),
.flags = AHCI_FLAG_COMMON,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
@@ -149,22 +151,31 @@ static const struct ata_port_info ahci_port_info[] = {
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_ops,
},
- [board_ahci_nomsi] = {
+ [board_ahci_no_msi] = {
AHCI_HFLAGS (AHCI_HFLAG_NO_MSI),
.flags = AHCI_FLAG_COMMON,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_ops,
},
- [board_ahci_noncq] = {
- AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ),
+ [board_ahci_pcs_quirk] = {
+ AHCI_HFLAGS (AHCI_HFLAG_INTEL_PCS_QUIRK),
+ .flags = AHCI_FLAG_COMMON,
+ .pio_mask = ATA_PIO4,
+ .udma_mask = ATA_UDMA6,
+ .port_ops = &ahci_ops,
+ },
+ [board_ahci_pcs_quirk_no_devslp] = {
+ AHCI_HFLAGS (AHCI_HFLAG_INTEL_PCS_QUIRK |
+ AHCI_HFLAG_NO_DEVSLP),
.flags = AHCI_FLAG_COMMON,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_ops,
},
- [board_ahci_nosntf] = {
- AHCI_HFLAGS (AHCI_HFLAG_NO_SNTF),
+ [board_ahci_pcs_quirk_no_sntf] = {
+ AHCI_HFLAGS (AHCI_HFLAG_INTEL_PCS_QUIRK |
+ AHCI_HFLAG_NO_SNTF),
.flags = AHCI_FLAG_COMMON,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
@@ -186,6 +197,7 @@ static const struct ata_port_info ahci_port_info[] = {
.port_ops = &ahci_ops,
},
[board_ahci_avn] = {
+ AHCI_HFLAGS (AHCI_HFLAG_INTEL_PCS_QUIRK),
.flags = AHCI_FLAG_COMMON,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
@@ -244,119 +256,113 @@ static const struct ata_port_info ahci_port_info[] = {
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_vt8251_ops,
},
- [board_ahci_pcs7] = {
- .flags = AHCI_FLAG_COMMON,
- .pio_mask = ATA_PIO4,
- .udma_mask = ATA_UDMA6,
- .port_ops = &ahci_ops,
- },
};
static const struct pci_device_id ahci_pci_tbl[] = {
/* Intel */
- { PCI_VDEVICE(INTEL, 0x06d6), board_ahci }, /* Comet Lake PCH-H RAID */
- { PCI_VDEVICE(INTEL, 0x2652), board_ahci }, /* ICH6 */
- { PCI_VDEVICE(INTEL, 0x2653), board_ahci }, /* ICH6M */
- { PCI_VDEVICE(INTEL, 0x27c1), board_ahci }, /* ICH7 */
- { PCI_VDEVICE(INTEL, 0x27c5), board_ahci }, /* ICH7M */
- { PCI_VDEVICE(INTEL, 0x27c3), board_ahci }, /* ICH7R */
+ { PCI_VDEVICE(INTEL, 0x06d6), board_ahci_pcs_quirk }, /* Comet Lake PCH-H RAID */
+ { PCI_VDEVICE(INTEL, 0x2652), board_ahci_pcs_quirk }, /* ICH6 */
+ { PCI_VDEVICE(INTEL, 0x2653), board_ahci_pcs_quirk }, /* ICH6M */
+ { PCI_VDEVICE(INTEL, 0x27c1), board_ahci_pcs_quirk }, /* ICH7 */
+ { PCI_VDEVICE(INTEL, 0x27c5), board_ahci_pcs_quirk }, /* ICH7M */
+ { PCI_VDEVICE(INTEL, 0x27c3), board_ahci_pcs_quirk }, /* ICH7R */
{ PCI_VDEVICE(AL, 0x5288), board_ahci_ign_iferr }, /* ULi M5288 */
- { PCI_VDEVICE(INTEL, 0x2681), board_ahci }, /* ESB2 */
- { PCI_VDEVICE(INTEL, 0x2682), board_ahci }, /* ESB2 */
- { PCI_VDEVICE(INTEL, 0x2683), board_ahci }, /* ESB2 */
- { PCI_VDEVICE(INTEL, 0x27c6), board_ahci }, /* ICH7-M DH */
- { PCI_VDEVICE(INTEL, 0x2821), board_ahci }, /* ICH8 */
- { PCI_VDEVICE(INTEL, 0x2822), board_ahci_nosntf }, /* ICH8/Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0x2824), board_ahci }, /* ICH8 */
- { PCI_VDEVICE(INTEL, 0x2829), board_ahci }, /* ICH8M */
- { PCI_VDEVICE(INTEL, 0x282a), board_ahci }, /* ICH8M */
- { PCI_VDEVICE(INTEL, 0x2922), board_ahci }, /* ICH9 */
- { PCI_VDEVICE(INTEL, 0x2923), board_ahci }, /* ICH9 */
- { PCI_VDEVICE(INTEL, 0x2924), board_ahci }, /* ICH9 */
- { PCI_VDEVICE(INTEL, 0x2925), board_ahci }, /* ICH9 */
- { PCI_VDEVICE(INTEL, 0x2927), board_ahci }, /* ICH9 */
- { PCI_VDEVICE(INTEL, 0x2929), board_ahci_low_power }, /* ICH9M */
- { PCI_VDEVICE(INTEL, 0x292a), board_ahci_low_power }, /* ICH9M */
- { PCI_VDEVICE(INTEL, 0x292b), board_ahci_low_power }, /* ICH9M */
- { PCI_VDEVICE(INTEL, 0x292c), board_ahci_low_power }, /* ICH9M */
- { PCI_VDEVICE(INTEL, 0x292f), board_ahci_low_power }, /* ICH9M */
- { PCI_VDEVICE(INTEL, 0x294d), board_ahci }, /* ICH9 */
- { PCI_VDEVICE(INTEL, 0x294e), board_ahci_low_power }, /* ICH9M */
- { PCI_VDEVICE(INTEL, 0x502a), board_ahci }, /* Tolapai */
- { PCI_VDEVICE(INTEL, 0x502b), board_ahci }, /* Tolapai */
- { PCI_VDEVICE(INTEL, 0x3a05), board_ahci }, /* ICH10 */
- { PCI_VDEVICE(INTEL, 0x3a22), board_ahci }, /* ICH10 */
- { PCI_VDEVICE(INTEL, 0x3a25), board_ahci }, /* ICH10 */
- { PCI_VDEVICE(INTEL, 0x3b22), board_ahci }, /* PCH AHCI */
- { PCI_VDEVICE(INTEL, 0x3b23), board_ahci }, /* PCH AHCI */
- { PCI_VDEVICE(INTEL, 0x3b24), board_ahci }, /* PCH RAID */
- { PCI_VDEVICE(INTEL, 0x3b25), board_ahci }, /* PCH RAID */
- { PCI_VDEVICE(INTEL, 0x3b29), board_ahci_low_power }, /* PCH M AHCI */
- { PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */
- { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci_low_power }, /* PCH M RAID */
- { PCI_VDEVICE(INTEL, 0x3b2f), board_ahci }, /* PCH AHCI */
- { PCI_VDEVICE(INTEL, 0x19b0), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19b1), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19b2), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19b3), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19b4), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19b5), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19b6), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19b7), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19bE), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19bF), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19c0), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19c1), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19c2), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19c3), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19c4), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19c5), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19c6), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19c7), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19cE), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x19cF), board_ahci_pcs7 }, /* DNV AHCI */
- { PCI_VDEVICE(INTEL, 0x1c02), board_ahci }, /* CPT AHCI */
- { PCI_VDEVICE(INTEL, 0x1c03), board_ahci_low_power }, /* CPT M AHCI */
- { PCI_VDEVICE(INTEL, 0x1c04), board_ahci }, /* CPT RAID */
- { PCI_VDEVICE(INTEL, 0x1c05), board_ahci_low_power }, /* CPT M RAID */
- { PCI_VDEVICE(INTEL, 0x1c06), board_ahci }, /* CPT RAID */
- { PCI_VDEVICE(INTEL, 0x1c07), board_ahci }, /* CPT RAID */
- { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */
- { PCI_VDEVICE(INTEL, 0x1d04), board_ahci }, /* PBG RAID */
- { PCI_VDEVICE(INTEL, 0x1d06), board_ahci }, /* PBG RAID */
- { PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */
- { PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */
- { PCI_VDEVICE(INTEL, 0x1e03), board_ahci_low_power }, /* Panther M AHCI */
- { PCI_VDEVICE(INTEL, 0x1e04), board_ahci }, /* Panther Point RAID */
- { PCI_VDEVICE(INTEL, 0x1e05), board_ahci }, /* Panther Point RAID */
- { PCI_VDEVICE(INTEL, 0x1e06), board_ahci }, /* Panther Point RAID */
- { PCI_VDEVICE(INTEL, 0x1e07), board_ahci_low_power }, /* Panther M RAID */
- { PCI_VDEVICE(INTEL, 0x1e0e), board_ahci }, /* Panther Point RAID */
- { PCI_VDEVICE(INTEL, 0x8c02), board_ahci }, /* Lynx Point AHCI */
- { PCI_VDEVICE(INTEL, 0x8c03), board_ahci_low_power }, /* Lynx M AHCI */
- { PCI_VDEVICE(INTEL, 0x8c04), board_ahci }, /* Lynx Point RAID */
- { PCI_VDEVICE(INTEL, 0x8c05), board_ahci_low_power }, /* Lynx M RAID */
- { PCI_VDEVICE(INTEL, 0x8c06), board_ahci }, /* Lynx Point RAID */
- { PCI_VDEVICE(INTEL, 0x8c07), board_ahci_low_power }, /* Lynx M RAID */
- { PCI_VDEVICE(INTEL, 0x8c0e), board_ahci }, /* Lynx Point RAID */
- { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci_low_power }, /* Lynx M RAID */
- { PCI_VDEVICE(INTEL, 0x9c02), board_ahci_low_power }, /* Lynx LP AHCI */
- { PCI_VDEVICE(INTEL, 0x9c03), board_ahci_low_power }, /* Lynx LP AHCI */
- { PCI_VDEVICE(INTEL, 0x9c04), board_ahci_low_power }, /* Lynx LP RAID */
- { PCI_VDEVICE(INTEL, 0x9c05), board_ahci_low_power }, /* Lynx LP RAID */
- { PCI_VDEVICE(INTEL, 0x9c06), board_ahci_low_power }, /* Lynx LP RAID */
- { PCI_VDEVICE(INTEL, 0x9c07), board_ahci_low_power }, /* Lynx LP RAID */
- { PCI_VDEVICE(INTEL, 0x9c0e), board_ahci_low_power }, /* Lynx LP RAID */
- { PCI_VDEVICE(INTEL, 0x9c0f), board_ahci_low_power }, /* Lynx LP RAID */
- { PCI_VDEVICE(INTEL, 0x9dd3), board_ahci_low_power }, /* Cannon Lake PCH-LP AHCI */
- { PCI_VDEVICE(INTEL, 0x1f22), board_ahci }, /* Avoton AHCI */
- { PCI_VDEVICE(INTEL, 0x1f23), board_ahci }, /* Avoton AHCI */
- { PCI_VDEVICE(INTEL, 0x1f24), board_ahci }, /* Avoton RAID */
- { PCI_VDEVICE(INTEL, 0x1f25), board_ahci }, /* Avoton RAID */
- { PCI_VDEVICE(INTEL, 0x1f26), board_ahci }, /* Avoton RAID */
- { PCI_VDEVICE(INTEL, 0x1f27), board_ahci }, /* Avoton RAID */
- { PCI_VDEVICE(INTEL, 0x1f2e), board_ahci }, /* Avoton RAID */
- { PCI_VDEVICE(INTEL, 0x1f2f), board_ahci }, /* Avoton RAID */
+ { PCI_VDEVICE(INTEL, 0x2681), board_ahci_pcs_quirk }, /* ESB2 */
+ { PCI_VDEVICE(INTEL, 0x2682), board_ahci_pcs_quirk }, /* ESB2 */
+ { PCI_VDEVICE(INTEL, 0x2683), board_ahci_pcs_quirk }, /* ESB2 */
+ { PCI_VDEVICE(INTEL, 0x27c6), board_ahci_pcs_quirk }, /* ICH7-M DH */
+ { PCI_VDEVICE(INTEL, 0x2821), board_ahci_pcs_quirk }, /* ICH8 */
+ { PCI_VDEVICE(INTEL, 0x2822), board_ahci_pcs_quirk_no_sntf }, /* ICH8/Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0x2824), board_ahci_pcs_quirk }, /* ICH8 */
+ { PCI_VDEVICE(INTEL, 0x2829), board_ahci_pcs_quirk }, /* ICH8M */
+ { PCI_VDEVICE(INTEL, 0x282a), board_ahci_pcs_quirk }, /* ICH8M */
+ { PCI_VDEVICE(INTEL, 0x2922), board_ahci_pcs_quirk }, /* ICH9 */
+ { PCI_VDEVICE(INTEL, 0x2923), board_ahci_pcs_quirk }, /* ICH9 */
+ { PCI_VDEVICE(INTEL, 0x2924), board_ahci_pcs_quirk }, /* ICH9 */
+ { PCI_VDEVICE(INTEL, 0x2925), board_ahci_pcs_quirk }, /* ICH9 */
+ { PCI_VDEVICE(INTEL, 0x2927), board_ahci_pcs_quirk }, /* ICH9 */
+ { PCI_VDEVICE(INTEL, 0x2929), board_ahci_pcs_quirk }, /* ICH9M */
+ { PCI_VDEVICE(INTEL, 0x292a), board_ahci_pcs_quirk }, /* ICH9M */
+ { PCI_VDEVICE(INTEL, 0x292b), board_ahci_pcs_quirk }, /* ICH9M */
+ { PCI_VDEVICE(INTEL, 0x292c), board_ahci_pcs_quirk }, /* ICH9M */
+ { PCI_VDEVICE(INTEL, 0x292f), board_ahci_pcs_quirk }, /* ICH9M */
+ { PCI_VDEVICE(INTEL, 0x294d), board_ahci_pcs_quirk }, /* ICH9 */
+ { PCI_VDEVICE(INTEL, 0x294e), board_ahci_pcs_quirk }, /* ICH9M */
+ { PCI_VDEVICE(INTEL, 0x502a), board_ahci_pcs_quirk }, /* Tolapai */
+ { PCI_VDEVICE(INTEL, 0x502b), board_ahci_pcs_quirk }, /* Tolapai */
+ { PCI_VDEVICE(INTEL, 0x3a05), board_ahci_pcs_quirk }, /* ICH10 */
+ { PCI_VDEVICE(INTEL, 0x3a22), board_ahci_pcs_quirk }, /* ICH10 */
+ { PCI_VDEVICE(INTEL, 0x3a25), board_ahci_pcs_quirk }, /* ICH10 */
+ { PCI_VDEVICE(INTEL, 0x3b22), board_ahci_pcs_quirk }, /* PCH AHCI */
+ { PCI_VDEVICE(INTEL, 0x3b23), board_ahci_pcs_quirk }, /* PCH AHCI */
+ { PCI_VDEVICE(INTEL, 0x3b24), board_ahci_pcs_quirk }, /* PCH RAID */
+ { PCI_VDEVICE(INTEL, 0x3b25), board_ahci_pcs_quirk }, /* PCH RAID */
+ { PCI_VDEVICE(INTEL, 0x3b29), board_ahci_pcs_quirk }, /* PCH M AHCI */
+ { PCI_VDEVICE(INTEL, 0x3b2b), board_ahci_pcs_quirk }, /* PCH RAID */
+ { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci_pcs_quirk }, /* PCH M RAID */
+ { PCI_VDEVICE(INTEL, 0x3b2f), board_ahci_pcs_quirk }, /* PCH AHCI */
+ { PCI_VDEVICE(INTEL, 0x19b0), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19b1), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19b2), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19b3), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19b4), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19b5), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19b6), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19b7), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19bE), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19bF), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19c0), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19c1), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19c2), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19c3), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19c4), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19c5), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19c6), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19c7), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19cE), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x19cF), board_ahci }, /* DNV AHCI */
+ { PCI_VDEVICE(INTEL, 0x1c02), board_ahci_pcs_quirk }, /* CPT AHCI */
+ { PCI_VDEVICE(INTEL, 0x1c03), board_ahci_pcs_quirk }, /* CPT M AHCI */
+ { PCI_VDEVICE(INTEL, 0x1c04), board_ahci_pcs_quirk }, /* CPT RAID */
+ { PCI_VDEVICE(INTEL, 0x1c05), board_ahci_pcs_quirk }, /* CPT M RAID */
+ { PCI_VDEVICE(INTEL, 0x1c06), board_ahci_pcs_quirk }, /* CPT RAID */
+ { PCI_VDEVICE(INTEL, 0x1c07), board_ahci_pcs_quirk }, /* CPT RAID */
+ { PCI_VDEVICE(INTEL, 0x1d02), board_ahci_pcs_quirk }, /* PBG AHCI */
+ { PCI_VDEVICE(INTEL, 0x1d04), board_ahci_pcs_quirk }, /* PBG RAID */
+ { PCI_VDEVICE(INTEL, 0x1d06), board_ahci_pcs_quirk }, /* PBG RAID */
+ { PCI_VDEVICE(INTEL, 0x2323), board_ahci_pcs_quirk }, /* DH89xxCC AHCI */
+ { PCI_VDEVICE(INTEL, 0x1e02), board_ahci_pcs_quirk }, /* Panther Point AHCI */
+ { PCI_VDEVICE(INTEL, 0x1e03), board_ahci_pcs_quirk }, /* Panther M AHCI */
+ { PCI_VDEVICE(INTEL, 0x1e04), board_ahci_pcs_quirk }, /* Panther Point RAID */
+ { PCI_VDEVICE(INTEL, 0x1e05), board_ahci_pcs_quirk }, /* Panther Point RAID */
+ { PCI_VDEVICE(INTEL, 0x1e06), board_ahci_pcs_quirk }, /* Panther Point RAID */
+ { PCI_VDEVICE(INTEL, 0x1e07), board_ahci_pcs_quirk }, /* Panther M RAID */
+ { PCI_VDEVICE(INTEL, 0x1e0e), board_ahci_pcs_quirk }, /* Panther Point RAID */
+ { PCI_VDEVICE(INTEL, 0x8c02), board_ahci_pcs_quirk }, /* Lynx Point AHCI */
+ { PCI_VDEVICE(INTEL, 0x8c03), board_ahci_pcs_quirk }, /* Lynx M AHCI */
+ { PCI_VDEVICE(INTEL, 0x8c04), board_ahci_pcs_quirk }, /* Lynx Point RAID */
+ { PCI_VDEVICE(INTEL, 0x8c05), board_ahci_pcs_quirk }, /* Lynx M RAID */
+ { PCI_VDEVICE(INTEL, 0x8c06), board_ahci_pcs_quirk }, /* Lynx Point RAID */
+ { PCI_VDEVICE(INTEL, 0x8c07), board_ahci_pcs_quirk }, /* Lynx M RAID */
+ { PCI_VDEVICE(INTEL, 0x8c0e), board_ahci_pcs_quirk }, /* Lynx Point RAID */
+ { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci_pcs_quirk }, /* Lynx M RAID */
+ { PCI_VDEVICE(INTEL, 0x9c02), board_ahci_pcs_quirk }, /* Lynx LP AHCI */
+ { PCI_VDEVICE(INTEL, 0x9c03), board_ahci_pcs_quirk }, /* Lynx LP AHCI */
+ { PCI_VDEVICE(INTEL, 0x9c04), board_ahci_pcs_quirk }, /* Lynx LP RAID */
+ { PCI_VDEVICE(INTEL, 0x9c05), board_ahci_pcs_quirk }, /* Lynx LP RAID */
+ { PCI_VDEVICE(INTEL, 0x9c06), board_ahci_pcs_quirk }, /* Lynx LP RAID */
+ { PCI_VDEVICE(INTEL, 0x9c07), board_ahci_pcs_quirk }, /* Lynx LP RAID */
+ { PCI_VDEVICE(INTEL, 0x9c0e), board_ahci_pcs_quirk }, /* Lynx LP RAID */
+ { PCI_VDEVICE(INTEL, 0x9c0f), board_ahci_pcs_quirk }, /* Lynx LP RAID */
+ { PCI_VDEVICE(INTEL, 0x9dd3), board_ahci_pcs_quirk }, /* Cannon Lake PCH-LP AHCI */
+ { PCI_VDEVICE(INTEL, 0x1f22), board_ahci_pcs_quirk }, /* Avoton AHCI */
+ { PCI_VDEVICE(INTEL, 0x1f23), board_ahci_pcs_quirk }, /* Avoton AHCI */
+ { PCI_VDEVICE(INTEL, 0x1f24), board_ahci_pcs_quirk }, /* Avoton RAID */
+ { PCI_VDEVICE(INTEL, 0x1f25), board_ahci_pcs_quirk }, /* Avoton RAID */
+ { PCI_VDEVICE(INTEL, 0x1f26), board_ahci_pcs_quirk }, /* Avoton RAID */
+ { PCI_VDEVICE(INTEL, 0x1f27), board_ahci_pcs_quirk }, /* Avoton RAID */
+ { PCI_VDEVICE(INTEL, 0x1f2e), board_ahci_pcs_quirk }, /* Avoton RAID */
+ { PCI_VDEVICE(INTEL, 0x1f2f), board_ahci_pcs_quirk }, /* Avoton RAID */
{ PCI_VDEVICE(INTEL, 0x1f32), board_ahci_avn }, /* Avoton AHCI */
{ PCI_VDEVICE(INTEL, 0x1f33), board_ahci_avn }, /* Avoton AHCI */
{ PCI_VDEVICE(INTEL, 0x1f34), board_ahci_avn }, /* Avoton RAID */
@@ -365,65 +371,65 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0x1f37), board_ahci_avn }, /* Avoton RAID */
{ PCI_VDEVICE(INTEL, 0x1f3e), board_ahci_avn }, /* Avoton RAID */
{ PCI_VDEVICE(INTEL, 0x1f3f), board_ahci_avn }, /* Avoton RAID */
- { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg/Lewisburg AHCI*/
- { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* *burg SATA0 'RAID' */
- { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* *burg SATA1 'RAID' */
- { PCI_VDEVICE(INTEL, 0x282f), board_ahci }, /* *burg SATA2 'RAID' */
- { PCI_VDEVICE(INTEL, 0x43d4), board_ahci }, /* Rocket Lake PCH-H RAID */
- { PCI_VDEVICE(INTEL, 0x43d5), board_ahci }, /* Rocket Lake PCH-H RAID */
- { PCI_VDEVICE(INTEL, 0x43d6), board_ahci }, /* Rocket Lake PCH-H RAID */
- { PCI_VDEVICE(INTEL, 0x43d7), board_ahci }, /* Rocket Lake PCH-H RAID */
- { PCI_VDEVICE(INTEL, 0x8d02), board_ahci }, /* Wellsburg AHCI */
- { PCI_VDEVICE(INTEL, 0x8d04), board_ahci }, /* Wellsburg RAID */
- { PCI_VDEVICE(INTEL, 0x8d06), board_ahci }, /* Wellsburg RAID */
- { PCI_VDEVICE(INTEL, 0x8d0e), board_ahci }, /* Wellsburg RAID */
- { PCI_VDEVICE(INTEL, 0x8d62), board_ahci }, /* Wellsburg AHCI */
- { PCI_VDEVICE(INTEL, 0x8d64), board_ahci }, /* Wellsburg RAID */
- { PCI_VDEVICE(INTEL, 0x8d66), board_ahci }, /* Wellsburg RAID */
- { PCI_VDEVICE(INTEL, 0x8d6e), board_ahci }, /* Wellsburg RAID */
- { PCI_VDEVICE(INTEL, 0x23a3), board_ahci }, /* Coleto Creek AHCI */
- { PCI_VDEVICE(INTEL, 0x9c83), board_ahci_low_power }, /* Wildcat LP AHCI */
- { PCI_VDEVICE(INTEL, 0x9c85), board_ahci_low_power }, /* Wildcat LP RAID */
- { PCI_VDEVICE(INTEL, 0x9c87), board_ahci_low_power }, /* Wildcat LP RAID */
- { PCI_VDEVICE(INTEL, 0x9c8f), board_ahci_low_power }, /* Wildcat LP RAID */
- { PCI_VDEVICE(INTEL, 0x8c82), board_ahci }, /* 9 Series AHCI */
- { PCI_VDEVICE(INTEL, 0x8c83), board_ahci_low_power }, /* 9 Series M AHCI */
- { PCI_VDEVICE(INTEL, 0x8c84), board_ahci }, /* 9 Series RAID */
- { PCI_VDEVICE(INTEL, 0x8c85), board_ahci_low_power }, /* 9 Series M RAID */
- { PCI_VDEVICE(INTEL, 0x8c86), board_ahci }, /* 9 Series RAID */
- { PCI_VDEVICE(INTEL, 0x8c87), board_ahci_low_power }, /* 9 Series M RAID */
- { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */
- { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci_low_power }, /* 9 Series M RAID */
- { PCI_VDEVICE(INTEL, 0x9d03), board_ahci_low_power }, /* Sunrise LP AHCI */
- { PCI_VDEVICE(INTEL, 0x9d05), board_ahci_low_power }, /* Sunrise LP RAID */
- { PCI_VDEVICE(INTEL, 0x9d07), board_ahci_low_power }, /* Sunrise LP RAID */
- { PCI_VDEVICE(INTEL, 0xa102), board_ahci }, /* Sunrise Point-H AHCI */
- { PCI_VDEVICE(INTEL, 0xa103), board_ahci_low_power }, /* Sunrise M AHCI */
- { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */
- { PCI_VDEVICE(INTEL, 0xa106), board_ahci }, /* Sunrise Point-H RAID */
- { PCI_VDEVICE(INTEL, 0xa107), board_ahci_low_power }, /* Sunrise M RAID */
- { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
- { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/
- { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa1d2), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa1d6), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/
- { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa356), board_ahci }, /* Cannon Lake PCH-H RAID */
- { PCI_VDEVICE(INTEL, 0x06d7), board_ahci }, /* Comet Lake-H RAID */
- { PCI_VDEVICE(INTEL, 0xa386), board_ahci }, /* Comet Lake PCH-V RAID */
- { PCI_VDEVICE(INTEL, 0x0f22), board_ahci_low_power }, /* Bay Trail AHCI */
- { PCI_VDEVICE(INTEL, 0x0f23), board_ahci_low_power }, /* Bay Trail AHCI */
- { PCI_VDEVICE(INTEL, 0x22a3), board_ahci_low_power }, /* Cherry Tr. AHCI */
- { PCI_VDEVICE(INTEL, 0x5ae3), board_ahci_low_power }, /* ApolloLake AHCI */
- { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_low_power }, /* Ice Lake LP AHCI */
- { PCI_VDEVICE(INTEL, 0x02d3), board_ahci_low_power }, /* Comet Lake PCH-U AHCI */
- { PCI_VDEVICE(INTEL, 0x02d7), board_ahci_low_power }, /* Comet Lake PCH RAID */
+ { PCI_VDEVICE(INTEL, 0x2823), board_ahci_pcs_quirk }, /* Wellsburg/Lewisburg AHCI*/
+ { PCI_VDEVICE(INTEL, 0x2826), board_ahci_pcs_quirk }, /* *burg SATA0 'RAID' */
+ { PCI_VDEVICE(INTEL, 0x2827), board_ahci_pcs_quirk }, /* *burg SATA1 'RAID' */
+ { PCI_VDEVICE(INTEL, 0x282f), board_ahci_pcs_quirk }, /* *burg SATA2 'RAID' */
+ { PCI_VDEVICE(INTEL, 0x43d4), board_ahci_pcs_quirk }, /* Rocket Lake PCH-H RAID */
+ { PCI_VDEVICE(INTEL, 0x43d5), board_ahci_pcs_quirk }, /* Rocket Lake PCH-H RAID */
+ { PCI_VDEVICE(INTEL, 0x43d6), board_ahci_pcs_quirk }, /* Rocket Lake PCH-H RAID */
+ { PCI_VDEVICE(INTEL, 0x43d7), board_ahci_pcs_quirk }, /* Rocket Lake PCH-H RAID */
+ { PCI_VDEVICE(INTEL, 0x8d02), board_ahci_pcs_quirk }, /* Wellsburg AHCI */
+ { PCI_VDEVICE(INTEL, 0x8d04), board_ahci_pcs_quirk }, /* Wellsburg RAID */
+ { PCI_VDEVICE(INTEL, 0x8d06), board_ahci_pcs_quirk }, /* Wellsburg RAID */
+ { PCI_VDEVICE(INTEL, 0x8d0e), board_ahci_pcs_quirk }, /* Wellsburg RAID */
+ { PCI_VDEVICE(INTEL, 0x8d62), board_ahci_pcs_quirk }, /* Wellsburg AHCI */
+ { PCI_VDEVICE(INTEL, 0x8d64), board_ahci_pcs_quirk }, /* Wellsburg RAID */
+ { PCI_VDEVICE(INTEL, 0x8d66), board_ahci_pcs_quirk }, /* Wellsburg RAID */
+ { PCI_VDEVICE(INTEL, 0x8d6e), board_ahci_pcs_quirk }, /* Wellsburg RAID */
+ { PCI_VDEVICE(INTEL, 0x23a3), board_ahci_pcs_quirk }, /* Coleto Creek AHCI */
+ { PCI_VDEVICE(INTEL, 0x9c83), board_ahci_pcs_quirk }, /* Wildcat LP AHCI */
+ { PCI_VDEVICE(INTEL, 0x9c85), board_ahci_pcs_quirk }, /* Wildcat LP RAID */
+ { PCI_VDEVICE(INTEL, 0x9c87), board_ahci_pcs_quirk }, /* Wildcat LP RAID */
+ { PCI_VDEVICE(INTEL, 0x9c8f), board_ahci_pcs_quirk }, /* Wildcat LP RAID */
+ { PCI_VDEVICE(INTEL, 0x8c82), board_ahci_pcs_quirk }, /* 9 Series AHCI */
+ { PCI_VDEVICE(INTEL, 0x8c83), board_ahci_pcs_quirk }, /* 9 Series M AHCI */
+ { PCI_VDEVICE(INTEL, 0x8c84), board_ahci_pcs_quirk }, /* 9 Series RAID */
+ { PCI_VDEVICE(INTEL, 0x8c85), board_ahci_pcs_quirk }, /* 9 Series M RAID */
+ { PCI_VDEVICE(INTEL, 0x8c86), board_ahci_pcs_quirk }, /* 9 Series RAID */
+ { PCI_VDEVICE(INTEL, 0x8c87), board_ahci_pcs_quirk }, /* 9 Series M RAID */
+ { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci_pcs_quirk }, /* 9 Series RAID */
+ { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci_pcs_quirk }, /* 9 Series M RAID */
+ { PCI_VDEVICE(INTEL, 0x9d03), board_ahci_pcs_quirk }, /* Sunrise LP AHCI */
+ { PCI_VDEVICE(INTEL, 0x9d05), board_ahci_pcs_quirk }, /* Sunrise LP RAID */
+ { PCI_VDEVICE(INTEL, 0x9d07), board_ahci_pcs_quirk }, /* Sunrise LP RAID */
+ { PCI_VDEVICE(INTEL, 0xa102), board_ahci_pcs_quirk }, /* Sunrise Point-H AHCI */
+ { PCI_VDEVICE(INTEL, 0xa103), board_ahci_pcs_quirk }, /* Sunrise M AHCI */
+ { PCI_VDEVICE(INTEL, 0xa105), board_ahci_pcs_quirk }, /* Sunrise Point-H RAID */
+ { PCI_VDEVICE(INTEL, 0xa106), board_ahci_pcs_quirk }, /* Sunrise Point-H RAID */
+ { PCI_VDEVICE(INTEL, 0xa107), board_ahci_pcs_quirk }, /* Sunrise M RAID */
+ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci_pcs_quirk }, /* Sunrise Point-H RAID */
+ { PCI_VDEVICE(INTEL, 0xa182), board_ahci_pcs_quirk }, /* Lewisburg AHCI*/
+ { PCI_VDEVICE(INTEL, 0xa186), board_ahci_pcs_quirk }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa1d2), board_ahci_pcs_quirk }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa1d6), board_ahci_pcs_quirk }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa202), board_ahci_pcs_quirk }, /* Lewisburg AHCI*/
+ { PCI_VDEVICE(INTEL, 0xa206), board_ahci_pcs_quirk }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa252), board_ahci_pcs_quirk }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa256), board_ahci_pcs_quirk }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa356), board_ahci_pcs_quirk }, /* Cannon Lake PCH-H RAID */
+ { PCI_VDEVICE(INTEL, 0x06d7), board_ahci_pcs_quirk }, /* Comet Lake-H RAID */
+ { PCI_VDEVICE(INTEL, 0xa386), board_ahci_pcs_quirk }, /* Comet Lake PCH-V RAID */
+ { PCI_VDEVICE(INTEL, 0x0f22), board_ahci_pcs_quirk }, /* Bay Trail AHCI */
+ { PCI_VDEVICE(INTEL, 0x0f23), board_ahci_pcs_quirk_no_devslp }, /* Bay Trail AHCI */
+ { PCI_VDEVICE(INTEL, 0x22a3), board_ahci_pcs_quirk }, /* Cherry Tr. AHCI */
+ { PCI_VDEVICE(INTEL, 0x5ae3), board_ahci_pcs_quirk }, /* ApolloLake AHCI */
+ { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_pcs_quirk }, /* Ice Lake LP AHCI */
+ { PCI_VDEVICE(INTEL, 0x02d3), board_ahci_pcs_quirk }, /* Comet Lake PCH-U AHCI */
+ { PCI_VDEVICE(INTEL, 0x02d7), board_ahci_pcs_quirk }, /* Comet Lake PCH RAID */
/* Elkhart Lake IDs 0x4b60 & 0x4b62 https://sata-io.org/product/8803 not tested yet */
- { PCI_VDEVICE(INTEL, 0x4b63), board_ahci_low_power }, /* Elkhart Lake AHCI */
- { PCI_VDEVICE(INTEL, 0x7ae2), board_ahci_low_power }, /* Alder Lake-P AHCI */
+ { PCI_VDEVICE(INTEL, 0x4b63), board_ahci_pcs_quirk }, /* Elkhart Lake AHCI */
+ { PCI_VDEVICE(INTEL, 0x7ae2), board_ahci_pcs_quirk }, /* Alder Lake-P AHCI */
/* JMicron 360/1/3/5/6, match class to avoid IDE function */
{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
@@ -451,14 +457,14 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */
{ PCI_VDEVICE(AMD, 0x7801), board_ahci_no_debounce_delay }, /* AMD Hudson-2 (AHCI mode) */
{ PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */
- { PCI_VDEVICE(AMD, 0x7901), board_ahci_low_power }, /* AMD Green Sardine */
+ { PCI_VDEVICE(AMD, 0x7901), board_ahci }, /* AMD Green Sardine */
/* AMD is using RAID class only for ahci controllers */
{ PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci },
/* Dell S140/S150 */
{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_SUBVENDOR_ID_DELL, PCI_ANY_ID,
- PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci },
+ PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci_pcs_quirk },
/* VIA */
{ PCI_VDEVICE(VIA, 0x3349), board_ahci_vt8251 }, /* VIA VT8251 */
@@ -597,14 +603,14 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */
{ PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */
- /* Asmedia */
- { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */
- { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */
- { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci }, /* ASM1061 */
- { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */
- { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */
- { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */
- { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */
+ /* ASMedia */
+ { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci_43bit_dma }, /* ASM1060 */
+ { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci_43bit_dma }, /* ASM1060 */
+ { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */
+ { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */
+ { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci_43bit_dma }, /* ASM1061R */
+ { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci_43bit_dma }, /* ASM1062R */
+ { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci_43bit_dma }, /* ASM1062+JMB575 */
{ PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */
{ PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */
{ PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */
@@ -615,8 +621,8 @@ static const struct pci_device_id ahci_pci_tbl[] = {
* Samsung SSDs found on some macbooks. NCQ times out if MSI is
* enabled. https://bugzilla.kernel.org/show_bug.cgi?id=60731
*/
- { PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_nomsi },
- { PCI_VDEVICE(SAMSUNG, 0xa800), board_ahci_nomsi },
+ { PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_no_msi },
+ { PCI_VDEVICE(SAMSUNG, 0xa800), board_ahci_no_msi },
/* Enmotus */
{ PCI_DEVICE(0x1c44, 0x8000), board_ahci },
@@ -663,6 +669,19 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets");
static void ahci_pci_save_initial_config(struct pci_dev *pdev,
struct ahci_host_priv *hpriv)
{
+ if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA) {
+ switch (pdev->device) {
+ case 0x1166:
+ dev_info(&pdev->dev, "ASM1166 has only six ports\n");
+ hpriv->saved_port_map = 0x3f;
+ break;
+ case 0x1064:
+ dev_info(&pdev->dev, "ASM1064 has only four ports\n");
+ hpriv->saved_port_map = 0xf;
+ break;
+ }
+ }
+
if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) {
dev_info(&pdev->dev, "JMB361 has only one port\n");
hpriv->saved_port_map = 1;
@@ -949,11 +968,20 @@ static int ahci_pci_device_resume(struct device *dev)
#endif /* CONFIG_PM */
-static int ahci_configure_dma_masks(struct pci_dev *pdev, int using_dac)
+static int ahci_configure_dma_masks(struct pci_dev *pdev,
+ struct ahci_host_priv *hpriv)
{
- const int dma_bits = using_dac ? 64 : 32;
+ int dma_bits;
int rc;
+ if (hpriv->cap & HOST_CAP_64) {
+ dma_bits = 64;
+ if (hpriv->flags & AHCI_HFLAG_43BIT_ONLY)
+ dma_bits = 43;
+ } else {
+ dma_bits = 32;
+ }
+
/*
* If the device fixup already set the dma_mask to some non-standard
* value, don't extend it here. This happens on STA2X11, for example.
@@ -1401,17 +1429,6 @@ static bool ahci_broken_online(struct pci_dev *pdev)
return pdev->bus->number == (val >> 8) && pdev->devfn == (val & 0xff);
}
-static bool ahci_broken_devslp(struct pci_dev *pdev)
-{
- /* device with broken DEVSLP but still showing SDS capability */
- static const struct pci_device_id ids[] = {
- { PCI_VDEVICE(INTEL, 0x0f23)}, /* Valleyview SoC */
- {}
- };
-
- return pci_match_id(ids, pdev);
-}
-
#ifdef CONFIG_ATA_ACPI
static void ahci_gtf_filter_workaround(struct ata_host *host)
{
@@ -1620,14 +1637,31 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports,
return pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX);
}
-static void ahci_update_initial_lpm_policy(struct ata_port *ap,
- struct ahci_host_priv *hpriv)
+static void ahci_mark_external_port(struct ata_port *ap)
{
- int policy = CONFIG_SATA_MOBILE_LPM_POLICY;
+ struct ahci_host_priv *hpriv = ap->host->private_data;
+ void __iomem *port_mmio = ahci_port_base(ap);
+ u32 tmp;
+
+ /* mark external ports (hotplug-capable, eSATA) */
+ tmp = readl(port_mmio + PORT_CMD);
+ if (((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)) ||
+ (tmp & PORT_CMD_HPCP))
+ ap->pflags |= ATA_PFLAG_EXTERNAL;
+}
+static void ahci_update_initial_lpm_policy(struct ata_port *ap)
+{
+ struct ahci_host_priv *hpriv = ap->host->private_data;
+ int policy = CONFIG_SATA_MOBILE_LPM_POLICY;
- /* Ignore processing for chipsets that don't use policy */
- if (!(hpriv->flags & AHCI_HFLAG_USE_LPM_POLICY))
+ /*
+ * AHCI contains a known incompatibility between LPM and hot-plug
+ * removal events, see 7.3.1 Hot Plug Removal Detection and Power
+ * Management Interaction in AHCI 1.3.1. Therefore, do not enable
+ * LPM if the port advertises itself as an external port.
+ */
+ if (ap->pflags & ATA_PFLAG_EXTERNAL)
return;
/* user modified policy via module param */
@@ -1650,17 +1684,9 @@ update_policy:
static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hpriv)
{
- const struct pci_device_id *id = pci_match_id(ahci_pci_tbl, pdev);
u16 tmp16;
- /*
- * Only apply the 6-port PCS quirk for known legacy platforms.
- */
- if (!id || id->vendor != PCI_VENDOR_ID_INTEL)
- return;
-
- /* Skip applying the quirk on Denverton and beyond */
- if (((enum board_ids) id->driver_data) >= board_ahci_pcs7)
+ if (!(hpriv->flags & AHCI_HFLAG_INTEL_PCS_QUIRK))
return;
/*
@@ -1795,10 +1821,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
&dev_attr_remapped_nvme.attr,
NULL);
- /* must set flag prior to save config in order to take effect */
- if (ahci_broken_devslp(pdev))
- hpriv->flags |= AHCI_HFLAG_NO_DEVSLP;
-
#ifdef CONFIG_ARM64
if (pdev->vendor == PCI_VENDOR_ID_HUAWEI &&
pdev->device == 0xa235 &&
@@ -1912,7 +1934,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ap->flags & ATA_FLAG_EM)
ap->em_message_type = hpriv->em_msg_type;
- ahci_update_initial_lpm_policy(ap, hpriv);
+ ahci_mark_external_port(ap);
+
+ ahci_update_initial_lpm_policy(ap);
/* disabled/not-implemented port */
if (!(hpriv->port_map & (1 << i)))
@@ -1926,7 +1950,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
ahci_gtf_filter_workaround(host);
/* initialize adapter */
- rc = ahci_configure_dma_masks(pdev, hpriv->cap & HOST_CAP_64);
+ rc = ahci_configure_dma_masks(pdev, hpriv);
if (rc)
return rc;
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 4bae95b06ae3..344c87210d8f 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -241,12 +241,11 @@ enum {
AHCI_HFLAG_YES_ALPM = BIT(23), /* force ALPM cap on */
AHCI_HFLAG_NO_WRITE_TO_RO = BIT(24), /* don't write to read
only registers */
- AHCI_HFLAG_USE_LPM_POLICY = BIT(25), /* chipset that should use
- SATA_MOBILE_LPM_POLICY
- as default lpm_policy */
- AHCI_HFLAG_SUSPEND_PHYS = BIT(26), /* handle PHYs during
+ AHCI_HFLAG_SUSPEND_PHYS = BIT(25), /* handle PHYs during
suspend/resume */
- AHCI_HFLAG_NO_SXS = BIT(28), /* SXS not supported */
+ AHCI_HFLAG_NO_SXS = BIT(26), /* SXS not supported */
+ AHCI_HFLAG_43BIT_ONLY = BIT(27), /* 43bit DMA addr limit */
+ AHCI_HFLAG_INTEL_PCS_QUIRK = BIT(28), /* apply Intel PCS quirk */
/* ap->flags bits */
diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c
index 64f7f7d6ba84..11a2c199a7c2 100644
--- a/drivers/ata/ahci_ceva.c
+++ b/drivers/ata/ahci_ceva.c
@@ -88,7 +88,6 @@ struct ceva_ahci_priv {
u32 axicc;
bool is_cci_enabled;
int flags;
- struct reset_control *rst;
};
static unsigned int ceva_ahci_read_id(struct ata_device *dev,
@@ -189,6 +188,60 @@ static const struct scsi_host_template ahci_platform_sht = {
AHCI_SHT(DRV_NAME),
};
+static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv)
+{
+ int rc, i;
+
+ rc = ahci_platform_enable_regulators(hpriv);
+ if (rc)
+ return rc;
+
+ rc = ahci_platform_enable_clks(hpriv);
+ if (rc)
+ goto disable_regulator;
+
+ /* Assert the controller reset */
+ rc = ahci_platform_assert_rsts(hpriv);
+ if (rc)
+ goto disable_clks;
+
+ for (i = 0; i < hpriv->nports; i++) {
+ rc = phy_init(hpriv->phys[i]);
+ if (rc)
+ goto disable_rsts;
+ }
+
+ /* De-assert the controller reset */
+ ahci_platform_deassert_rsts(hpriv);
+
+ for (i = 0; i < hpriv->nports; i++) {
+ rc = phy_power_on(hpriv->phys[i]);
+ if (rc) {
+ phy_exit(hpriv->phys[i]);
+ goto disable_phys;
+ }
+ }
+
+ return 0;
+
+disable_rsts:
+ ahci_platform_deassert_rsts(hpriv);
+
+disable_phys:
+ while (--i >= 0) {
+ phy_power_off(hpriv->phys[i]);
+ phy_exit(hpriv->phys[i]);
+ }
+
+disable_clks:
+ ahci_platform_disable_clks(hpriv);
+
+disable_regulator:
+ ahci_platform_disable_regulators(hpriv);
+
+ return rc;
+}
+
static int ceva_ahci_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
@@ -203,47 +256,19 @@ static int ceva_ahci_probe(struct platform_device *pdev)
return -ENOMEM;
cevapriv->ahci_pdev = pdev;
-
- cevapriv->rst = devm_reset_control_get_optional_exclusive(&pdev->dev,
- NULL);
- if (IS_ERR(cevapriv->rst))
- dev_err_probe(&pdev->dev, PTR_ERR(cevapriv->rst),
- "failed to get reset\n");
-
hpriv = ahci_platform_get_resources(pdev, 0);
if (IS_ERR(hpriv))
return PTR_ERR(hpriv);
- if (!cevapriv->rst) {
- rc = ahci_platform_enable_resources(hpriv);
- if (rc)
- return rc;
- } else {
- int i;
+ hpriv->rsts = devm_reset_control_get_optional_exclusive(&pdev->dev,
+ NULL);
+ if (IS_ERR(hpriv->rsts))
+ return dev_err_probe(&pdev->dev, PTR_ERR(hpriv->rsts),
+ "failed to get reset\n");
- rc = ahci_platform_enable_clks(hpriv);
- if (rc)
- return rc;
- /* Assert the controller reset */
- reset_control_assert(cevapriv->rst);
-
- for (i = 0; i < hpriv->nports; i++) {
- rc = phy_init(hpriv->phys[i]);
- if (rc)
- return rc;
- }
-
- /* De-assert the controller reset */
- reset_control_deassert(cevapriv->rst);
-
- for (i = 0; i < hpriv->nports; i++) {
- rc = phy_power_on(hpriv->phys[i]);
- if (rc) {
- phy_exit(hpriv->phys[i]);
- return rc;
- }
- }
- }
+ rc = ceva_ahci_platform_enable_resources(hpriv);
+ if (rc)
+ return rc;
if (of_property_read_bool(np, "ceva,broken-gen2"))
cevapriv->flags = CEVA_FLAG_BROKEN_GEN2;
@@ -252,52 +277,60 @@ static int ceva_ahci_probe(struct platform_device *pdev)
if (of_property_read_u8_array(np, "ceva,p0-cominit-params",
(u8 *)&cevapriv->pp2c[0], 4) < 0) {
dev_warn(dev, "ceva,p0-cominit-params property not defined\n");
- return -EINVAL;
+ rc = -EINVAL;
+ goto disable_resources;
}
if (of_property_read_u8_array(np, "ceva,p1-cominit-params",
(u8 *)&cevapriv->pp2c[1], 4) < 0) {
dev_warn(dev, "ceva,p1-cominit-params property not defined\n");
- return -EINVAL;
+ rc = -EINVAL;
+ goto disable_resources;
}
/* Read OOB timing value for COMWAKE from device-tree*/
if (of_property_read_u8_array(np, "ceva,p0-comwake-params",
(u8 *)&cevapriv->pp3c[0], 4) < 0) {
dev_warn(dev, "ceva,p0-comwake-params property not defined\n");
- return -EINVAL;
+ rc = -EINVAL;
+ goto disable_resources;
}
if (of_property_read_u8_array(np, "ceva,p1-comwake-params",
(u8 *)&cevapriv->pp3c[1], 4) < 0) {
dev_warn(dev, "ceva,p1-comwake-params property not defined\n");
- return -EINVAL;
+ rc = -EINVAL;
+ goto disable_resources;
}
/* Read phy BURST timing value from device-tree */
if (of_property_read_u8_array(np, "ceva,p0-burst-params",
(u8 *)&cevapriv->pp4c[0], 4) < 0) {
dev_warn(dev, "ceva,p0-burst-params property not defined\n");
- return -EINVAL;
+ rc = -EINVAL;
+ goto disable_resources;
}
if (of_property_read_u8_array(np, "ceva,p1-burst-params",
(u8 *)&cevapriv->pp4c[1], 4) < 0) {
dev_warn(dev, "ceva,p1-burst-params property not defined\n");
- return -EINVAL;
+ rc = -EINVAL;
+ goto disable_resources;
}
/* Read phy RETRY interval timing value from device-tree */
if (of_property_read_u16_array(np, "ceva,p0-retry-params",
(u16 *)&cevapriv->pp5c[0], 2) < 0) {
dev_warn(dev, "ceva,p0-retry-params property not defined\n");
- return -EINVAL;
+ rc = -EINVAL;
+ goto disable_resources;
}
if (of_property_read_u16_array(np, "ceva,p1-retry-params",
(u16 *)&cevapriv->pp5c[1], 2) < 0) {
dev_warn(dev, "ceva,p1-retry-params property not defined\n");
- return -EINVAL;
+ rc = -EINVAL;
+ goto disable_resources;
}
/*
@@ -335,7 +368,7 @@ static int __maybe_unused ceva_ahci_resume(struct device *dev)
struct ahci_host_priv *hpriv = host->private_data;
int rc;
- rc = ahci_platform_enable_resources(hpriv);
+ rc = ceva_ahci_platform_enable_resources(hpriv);
if (rc)
return rc;
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 1a63200ea437..83431aae74d8 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1280,10 +1280,8 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
int port_no, void __iomem *mmio,
void __iomem *port_mmio)
{
- struct ahci_host_priv *hpriv = ap->host->private_data;
const char *emsg = NULL;
int rc;
- u32 tmp;
/* make sure port is not active */
rc = ahci_deinit_port(ap, &emsg);
@@ -1291,11 +1289,6 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
dev_warn(dev, "%s (%d)\n", emsg, rc);
ahci_port_clear_pending_irq(ap);
-
- /* mark esata ports */
- tmp = readl(port_mmio + PORT_CMD);
- if ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS))
- ap->pflags |= ATA_PFLAG_EXTERNAL;
}
void ahci_init_controller(struct ata_host *host)
@@ -2627,8 +2620,8 @@ void ahci_print_info(struct ata_host *host, const char *scc_s)
speed_s = "?";
dev_info(host->dev,
- "AHCI %02x%02x.%02x%02x "
- "%u slots %u ports %s Gbps 0x%x impl %s mode\n"
+ "AHCI vers %02x%02x.%02x%02x, "
+ "%u command slots, %s Gbps, %s mode\n"
,
(vers >> 24) & 0xff,
@@ -2637,12 +2630,18 @@ void ahci_print_info(struct ata_host *host, const char *scc_s)
vers & 0xff,
((cap >> 8) & 0x1f) + 1,
- (cap & 0x1f) + 1,
speed_s,
- impl,
scc_s);
dev_info(host->dev,
+ "%u/%u ports implemented (port mask 0x%x)\n"
+ ,
+
+ hweight32(impl),
+ (cap & 0x1f) + 1,
+ impl);
+
+ dev_info(host->dev,
"flags: "
"%s%s%s%s%s%s%s"
"%s%s%s%s%s%s%s"
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 09ed67772fae..be3412cdb22e 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2001,6 +2001,33 @@ bool ata_dev_power_init_tf(struct ata_device *dev, struct ata_taskfile *tf,
return true;
}
+static bool ata_dev_power_is_active(struct ata_device *dev)
+{
+ struct ata_taskfile tf;
+ unsigned int err_mask;
+
+ ata_tf_init(dev, &tf);
+ tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
+ tf.protocol = ATA_PROT_NODATA;
+ tf.command = ATA_CMD_CHK_POWER;
+
+ err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+ if (err_mask) {
+ ata_dev_err(dev, "Check power mode failed (err_mask=0x%x)\n",
+ err_mask);
+ /*
+ * Assume we are in standby mode so that we always force a
+ * spinup in ata_dev_power_set_active().
+ */
+ return false;
+ }
+
+ ata_dev_dbg(dev, "Power mode: 0x%02x\n", tf.nsect);
+
+ /* Active or idle */
+ return tf.nsect == 0xff;
+}
+
/**
* ata_dev_power_set_standby - Set a device power mode to standby
* @dev: target device
@@ -2017,6 +2044,11 @@ void ata_dev_power_set_standby(struct ata_device *dev)
struct ata_taskfile tf;
unsigned int err_mask;
+ /* If the device is already sleeping or in standby, do nothing. */
+ if ((dev->flags & ATA_DFLAG_SLEEPING) ||
+ !ata_dev_power_is_active(dev))
+ return;
+
/*
* Some odd clown BIOSes issue spindown on power off (ACPI S4 or S5)
* causing some drives to spin up and down again. For these, do nothing
@@ -2042,33 +2074,6 @@ void ata_dev_power_set_standby(struct ata_device *dev)
err_mask);
}
-static bool ata_dev_power_is_active(struct ata_device *dev)
-{
- struct ata_taskfile tf;
- unsigned int err_mask;
-
- ata_tf_init(dev, &tf);
- tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
- tf.protocol = ATA_PROT_NODATA;
- tf.command = ATA_CMD_CHK_POWER;
-
- err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
- if (err_mask) {
- ata_dev_err(dev, "Check power mode failed (err_mask=0x%x)\n",
- err_mask);
- /*
- * Assume we are in standby mode so that we always force a
- * spinup in ata_dev_power_set_active().
- */
- return false;
- }
-
- ata_dev_dbg(dev, "Power mode: 0x%02x\n", tf.nsect);
-
- /* Active or idle */
- return tf.nsect == 0xff;
-}
-
/**
* ata_dev_power_set_active - Set a device power mode to active
* @dev: target device
diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
index b6656c287175..0fb1934875f2 100644
--- a/drivers/ata/libata-sata.c
+++ b/drivers/ata/libata-sata.c
@@ -784,7 +784,7 @@ bool sata_lpm_ignore_phy_events(struct ata_link *link)
EXPORT_SYMBOL_GPL(sata_lpm_ignore_phy_events);
static const char *ata_lpm_policy_names[] = {
- [ATA_LPM_UNKNOWN] = "max_performance",
+ [ATA_LPM_UNKNOWN] = "keep_firmware_settings",
[ATA_LPM_MAX_POWER] = "max_performance",
[ATA_LPM_MED_POWER] = "medium_power",
[ATA_LPM_MED_POWER_WITH_DIPM] = "med_power_with_dipm",
diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c
index a7adfdcb5e27..9a2cb9ca9d1d 100644
--- a/drivers/ata/pata_parport/pata_parport.c
+++ b/drivers/ata/pata_parport/pata_parport.c
@@ -464,7 +464,7 @@ static void pata_parport_bus_release(struct device *dev)
/* nothing to do here but required to avoid warning on device removal */
}
-static struct bus_type pata_parport_bus_type = {
+static const struct bus_type pata_parport_bus_type = {
.name = DRV_NAME,
};
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 50d8ce20ae5b..9fb1575f8d88 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -2550,14 +2550,12 @@ static int fore200e_sba_probe(struct platform_device *op)
return 0;
}
-static int fore200e_sba_remove(struct platform_device *op)
+static void fore200e_sba_remove(struct platform_device *op)
{
struct fore200e *fore200e = dev_get_drvdata(&op->dev);
fore200e_shutdown(fore200e);
kfree(fore200e);
-
- return 0;
}
static const struct of_device_id fore200e_sba_match[] = {
@@ -2574,7 +2572,7 @@ static struct platform_driver fore200e_sba_driver = {
.of_match_table = fore200e_sba_match,
},
.probe = fore200e_sba_probe,
- .remove = fore200e_sba_remove,
+ .remove_new = fore200e_sba_remove,
};
#endif
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index e327a0229dc1..e7f713cd70d3 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -2930,6 +2930,8 @@ open_card_ubr0(struct idt77252_dev *card)
vc->scq = alloc_scq(card, vc->class);
if (!vc->scq) {
printk("%s: can't get SCQ.\n", card->name);
+ kfree(card->vcs[0]);
+ card->vcs[0] = NULL;
return -ENOMEM;
}
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 018ac202de34..024b78a0cfc1 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -431,9 +431,6 @@ init_cpu_capacity_callback(struct notifier_block *nb,
struct cpufreq_policy *policy = data;
int cpu;
- if (!raw_capacity)
- return 0;
-
if (val != CPUFREQ_CREATE_POLICY)
return 0;
@@ -450,9 +447,11 @@ init_cpu_capacity_callback(struct notifier_block *nb,
}
if (cpumask_empty(cpus_to_visit)) {
- topology_normalize_cpu_scale();
- schedule_work(&update_topology_flags_work);
- free_raw_capacity();
+ if (raw_capacity) {
+ topology_normalize_cpu_scale();
+ schedule_work(&update_topology_flags_work);
+ free_raw_capacity();
+ }
pr_debug("cpu_capacity: parsing done\n");
schedule_work(&parsing_done_work);
}
@@ -472,7 +471,7 @@ static int __init register_cpufreq_notifier(void)
* On ACPI-based systems skip registering cpufreq notifier as cpufreq
* information is not needed for cpu capacity initialization.
*/
- if (!acpi_disabled || !raw_capacity)
+ if (!acpi_disabled)
return -EINVAL;
if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
diff --git a/drivers/base/base.h b/drivers/base/base.h
index eb4c0ace9242..0738ccad08b2 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -207,7 +207,7 @@ static inline int devtmpfs_init(void) { return 0; }
#endif
#ifdef CONFIG_BLOCK
-extern struct class block_class;
+extern const struct class block_class;
static inline bool is_blockdev(struct device *dev)
{
return dev->class == &block_class;
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 14d46af40f9a..9828da9b933c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -125,7 +125,7 @@ static void __fwnode_link_del(struct fwnode_link *link)
*/
static void __fwnode_link_cycle(struct fwnode_link *link)
{
- pr_debug("%pfwf: Relaxing link with %pfwf\n",
+ pr_debug("%pfwf: cycle: depends on %pfwf\n",
link->consumer, link->supplier);
link->flags |= FWLINK_FLAG_CYCLE;
}
@@ -284,10 +284,12 @@ static bool device_is_ancestor(struct device *dev, struct device *target)
return false;
}
+#define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \
+ DL_FLAG_CYCLE | \
+ DL_FLAG_MANAGED)
static inline bool device_link_flag_is_sync_state_only(u32 flags)
{
- return (flags & ~(DL_FLAG_INFERRED | DL_FLAG_CYCLE)) ==
- (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED);
+ return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY;
}
/**
@@ -1943,6 +1945,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
/* Termination condition. */
if (sup_dev == con) {
+ pr_debug("----- cycle: start -----\n");
ret = true;
goto out;
}
@@ -1974,8 +1977,11 @@ static bool __fw_devlink_relax_cycles(struct device *con,
else
par_dev = fwnode_get_next_parent_dev(sup_handle);
- if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode))
+ if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) {
+ pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle,
+ par_dev->fwnode);
ret = true;
+ }
if (!sup_dev)
goto out;
@@ -1991,6 +1997,8 @@ static bool __fw_devlink_relax_cycles(struct device *con,
if (__fw_devlink_relax_cycles(con,
dev_link->supplier->fwnode)) {
+ pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle,
+ dev_link->supplier->fwnode);
fw_devlink_relax_link(dev_link);
dev_link->flags |= DL_FLAG_CYCLE;
ret = true;
@@ -2058,13 +2066,19 @@ static int fw_devlink_create_devlink(struct device *con,
/*
* SYNC_STATE_ONLY device links don't block probing and supports cycles.
- * So cycle detection isn't necessary and shouldn't be done.
+ * So, one might expect that cycle detection isn't necessary for them.
+ * However, if the device link was marked as SYNC_STATE_ONLY because
+ * it's part of a cycle, then we still need to do cycle detection. This
+ * is because the consumer and supplier might be part of multiple cycles
+ * and we need to detect all those cycles.
*/
- if (!(flags & DL_FLAG_SYNC_STATE_ONLY)) {
+ if (!device_link_flag_is_sync_state_only(flags) ||
+ flags & DL_FLAG_CYCLE) {
device_links_write_lock();
if (__fw_devlink_relax_cycles(con, sup_handle)) {
__fwnode_link_cycle(link);
flags = fw_devlink_get_flags(link->flags);
+ pr_debug("----- cycle: end -----\n");
dev_info(con, "Fixed dependency cycle(s) with %pfwf\n",
sup_handle);
}
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 47de0f140ba6..0b33e81f9c9b 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -588,6 +588,7 @@ CPU_SHOW_VULN_FALLBACK(mmio_stale_data);
CPU_SHOW_VULN_FALLBACK(retbleed);
CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow);
CPU_SHOW_VULN_FALLBACK(gds);
+CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling);
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
@@ -602,6 +603,7 @@ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL);
static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
+static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -617,6 +619,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_retbleed.attr,
&dev_attr_spec_rstack_overflow.attr,
&dev_attr_gather_data_sampling.attr,
+ &dev_attr_reg_file_data_sampling.attr,
NULL
};
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index f37ad34c80ec..0d01890160f3 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -13,6 +13,8 @@
#include <linux/msi.h>
#include <linux/slab.h>
+/* Begin of removal area. Once everything is converted over. Cleanup the includes too! */
+
#define DEV_ID_SHIFT 21
#define MAX_DEV_MSIS (1 << (32 - DEV_ID_SHIFT))
@@ -204,8 +206,8 @@ static void platform_msi_free_priv_data(struct device *dev)
* Returns:
* Zero for success, or an error code in case of failure
*/
-int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
- irq_write_msi_msg_t write_msi_msg)
+static int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg)
{
int err;
@@ -219,18 +221,6 @@ int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
return err;
}
-EXPORT_SYMBOL_GPL(platform_msi_domain_alloc_irqs);
-
-/**
- * platform_msi_domain_free_irqs - Free MSI interrupts for @dev
- * @dev: The device for which to free interrupts
- */
-void platform_msi_domain_free_irqs(struct device *dev)
-{
- msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN);
- platform_msi_free_priv_data(dev);
-}
-EXPORT_SYMBOL_GPL(platform_msi_domain_free_irqs);
/**
* platform_msi_get_host_data - Query the private data associated with
@@ -350,3 +340,104 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir
return msi_domain_populate_irqs(domain->parent, dev, virq, nr_irqs, &data->arg);
}
+
+/* End of removal area */
+
+/* Real per device domain interfaces */
+
+/*
+ * This indirection can go when platform_device_msi_init_and_alloc_irqs()
+ * is switched to a proper irq_chip::irq_write_msi_msg() callback. Keep it
+ * simple for now.
+ */
+static void platform_msi_write_msi_msg(struct irq_data *d, struct msi_msg *msg)
+{
+ irq_write_msi_msg_t cb = d->chip_data;
+
+ cb(irq_data_get_msi_desc(d), msg);
+}
+
+static void platform_msi_set_desc_byindex(msi_alloc_info_t *arg, struct msi_desc *desc)
+{
+ arg->desc = desc;
+ arg->hwirq = desc->msi_index;
+}
+
+static const struct msi_domain_template platform_msi_template = {
+ .chip = {
+ .name = "pMSI",
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_write_msi_msg = platform_msi_write_msi_msg,
+ /* The rest is filled in by the platform MSI parent */
+ },
+
+ .ops = {
+ .set_desc = platform_msi_set_desc_byindex,
+ },
+
+ .info = {
+ .bus_token = DOMAIN_BUS_DEVICE_MSI,
+ },
+};
+
+/**
+ * platform_device_msi_init_and_alloc_irqs - Initialize platform device MSI
+ * and allocate interrupts for @dev
+ * @dev: The device for which to allocate interrupts
+ * @nvec: The number of interrupts to allocate
+ * @write_msi_msg: Callback to write an interrupt message for @dev
+ *
+ * Returns:
+ * Zero for success, or an error code in case of failure
+ *
+ * This creates a MSI domain on @dev which has @dev->msi.domain as
+ * parent. The parent domain sets up the new domain. The domain has
+ * a fixed size of @nvec. The domain is managed by devres and will
+ * be removed when the device is removed.
+ *
+ * Note: For migration purposes this falls back to the original platform_msi code
+ * up to the point where all platforms have been converted to the MSI
+ * parent model.
+ */
+int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg)
+{
+ struct irq_domain *domain = dev->msi.domain;
+
+ if (!domain || !write_msi_msg)
+ return -EINVAL;
+
+ /* Migration support. Will go away once everything is converted */
+ if (!irq_domain_is_msi_parent(domain))
+ return platform_msi_domain_alloc_irqs(dev, nvec, write_msi_msg);
+
+ /*
+ * @write_msi_msg is stored in the resulting msi_domain_info::data.
+ * The underlying domain creation mechanism will assign that
+ * callback to the resulting irq chip.
+ */
+ if (!msi_create_device_irq_domain(dev, MSI_DEFAULT_DOMAIN,
+ &platform_msi_template,
+ nvec, NULL, write_msi_msg))
+ return -ENODEV;
+
+ return msi_domain_alloc_irqs_range(dev, MSI_DEFAULT_DOMAIN, 0, nvec - 1);
+}
+EXPORT_SYMBOL_GPL(platform_device_msi_init_and_alloc_irqs);
+
+/**
+ * platform_device_msi_free_irqs_all - Free all interrupts for @dev
+ * @dev: The device for which to free interrupts
+ */
+void platform_device_msi_free_irqs_all(struct device *dev)
+{
+ struct irq_domain *domain = dev->msi.domain;
+
+ msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN);
+
+ /* Migration support. Will go away once everything is converted */
+ if (!irq_domain_is_msi_parent(domain))
+ platform_msi_free_priv_data(dev);
+}
+EXPORT_SYMBOL_GPL(platform_device_msi_free_irqs_all);
diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c
index 026bdcb45127..0d957c5f1bcc 100644
--- a/drivers/base/regmap/regmap-kunit.c
+++ b/drivers/base/regmap/regmap-kunit.c
@@ -9,6 +9,23 @@
#define BLOCK_TEST_SIZE 12
+static void get_changed_bytes(void *orig, void *new, size_t size)
+{
+ char *o = orig;
+ char *n = new;
+ int i;
+
+ get_random_bytes(new, size);
+
+ /*
+ * This could be nicer and more efficient but we shouldn't
+ * super care.
+ */
+ for (i = 0; i < size; i++)
+ while (n[i] == o[i])
+ get_random_bytes(&n[i], 1);
+}
+
static const struct regmap_config test_regmap_config = {
.max_register = BLOCK_TEST_SIZE,
.reg_stride = 1,
@@ -1202,7 +1219,8 @@ static void raw_noinc_write(struct kunit *test)
struct regmap *map;
struct regmap_config config;
struct regmap_ram_data *data;
- unsigned int val, val_test, val_last;
+ unsigned int val;
+ u16 val_test, val_last;
u16 val_array[BLOCK_TEST_SIZE];
config = raw_regmap_config;
@@ -1251,7 +1269,7 @@ static void raw_sync(struct kunit *test)
struct regmap *map;
struct regmap_config config;
struct regmap_ram_data *data;
- u16 val[2];
+ u16 val[3];
u16 *hw_buf;
unsigned int rval;
int i;
@@ -1265,17 +1283,13 @@ static void raw_sync(struct kunit *test)
hw_buf = (u16 *)data->vals;
- get_random_bytes(&val, sizeof(val));
+ get_changed_bytes(&hw_buf[2], &val[0], sizeof(val));
/* Do a regular write and a raw write in cache only mode */
regcache_cache_only(map, true);
- KUNIT_EXPECT_EQ(test, 0, regmap_raw_write(map, 2, val, sizeof(val)));
- if (config.val_format_endian == REGMAP_ENDIAN_BIG)
- KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 6,
- be16_to_cpu(val[0])));
- else
- KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 6,
- le16_to_cpu(val[0])));
+ KUNIT_EXPECT_EQ(test, 0, regmap_raw_write(map, 2, val,
+ sizeof(u16) * 2));
+ KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 4, val[2]));
/* We should read back the new values, and defaults for the rest */
for (i = 0; i < config.max_register + 1; i++) {
@@ -1284,24 +1298,34 @@ static void raw_sync(struct kunit *test)
switch (i) {
case 2:
case 3:
- case 6:
if (config.val_format_endian == REGMAP_ENDIAN_BIG) {
KUNIT_EXPECT_EQ(test, rval,
- be16_to_cpu(val[i % 2]));
+ be16_to_cpu(val[i - 2]));
} else {
KUNIT_EXPECT_EQ(test, rval,
- le16_to_cpu(val[i % 2]));
+ le16_to_cpu(val[i - 2]));
}
break;
+ case 4:
+ KUNIT_EXPECT_EQ(test, rval, val[i - 2]);
+ break;
default:
KUNIT_EXPECT_EQ(test, config.reg_defaults[i].def, rval);
break;
}
}
+
+ /*
+ * The value written via _write() was translated by the core,
+ * translate the original copy for comparison purposes.
+ */
+ if (config.val_format_endian == REGMAP_ENDIAN_BIG)
+ val[2] = cpu_to_be16(val[2]);
+ else
+ val[2] = cpu_to_le16(val[2]);
/* The values should not appear in the "hardware" */
- KUNIT_EXPECT_MEMNEQ(test, &hw_buf[2], val, sizeof(val));
- KUNIT_EXPECT_MEMNEQ(test, &hw_buf[6], val, sizeof(u16));
+ KUNIT_EXPECT_MEMNEQ(test, &hw_buf[2], &val[0], sizeof(val));
for (i = 0; i < config.max_register + 1; i++)
data->written[i] = false;
@@ -1312,8 +1336,7 @@ static void raw_sync(struct kunit *test)
KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
/* The values should now appear in the "hardware" */
- KUNIT_EXPECT_MEMEQ(test, &hw_buf[2], val, sizeof(val));
- KUNIT_EXPECT_MEMEQ(test, &hw_buf[6], val, sizeof(u16));
+ KUNIT_EXPECT_MEMEQ(test, &hw_buf[2], &val[0], sizeof(val));
regmap_exit(map);
}
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 7061d3ee836a..6b5d34919c72 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -68,7 +68,7 @@ static struct attribute *bcma_device_attrs[] = {
};
ATTRIBUTE_GROUPS(bcma_device);
-static struct bus_type bcma_bus_type = {
+static const struct bus_type bcma_bus_type = {
.name = "bcma",
.match = bcma_bus_match,
.probe = bcma_device_probe,
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 2b98114a9fe0..a25414228e47 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1779,7 +1779,7 @@ static int fd_alloc_disk(int drive, int system)
struct gendisk *disk;
int err;
- disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL);
+ disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL);
if (IS_ERR(disk))
return PTR_ERR(disk);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index d2dbf8aaccb5..b6dac8cee70f 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -24,8 +24,8 @@ static DEFINE_MUTEX(aoeblk_mutex);
static struct kmem_cache *buf_pool_cache;
static struct dentry *aoe_debugfs_dir;
-/* GPFS needs a larger value than the default. */
-static int aoe_maxsectors;
+/* random default picked from the historic block max_sectors cap */
+static int aoe_maxsectors = 2560;
module_param(aoe_maxsectors, int, 0644);
MODULE_PARM_DESC(aoe_maxsectors,
"When nonzero, set the maximum number of sectors per I/O request");
@@ -333,6 +333,11 @@ aoeblk_gdalloc(void *vp)
struct gendisk *gd;
mempool_t *mp;
struct blk_mq_tag_set *set;
+ sector_t ssize;
+ struct queue_limits lim = {
+ .max_hw_sectors = aoe_maxsectors,
+ .io_opt = SZ_2M,
+ };
ulong flags;
int late = 0;
int err;
@@ -370,7 +375,7 @@ aoeblk_gdalloc(void *vp)
goto err_mempool;
}
- gd = blk_mq_alloc_disk(set, d);
+ gd = blk_mq_alloc_disk(set, &lim, d);
if (IS_ERR(gd)) {
pr_err("aoe: cannot allocate block queue for %ld.%d\n",
d->aoemajor, d->aoeminor);
@@ -383,20 +388,15 @@ aoeblk_gdalloc(void *vp)
WARN_ON(d->flags & DEVFL_TKILL);
WARN_ON(d->gd);
WARN_ON(d->flags & DEVFL_UP);
- /* random number picked from the history block max_sectors cap */
- blk_queue_max_hw_sectors(gd->queue, 2560u);
- blk_queue_io_opt(gd->queue, SZ_2M);
d->bufpool = mp;
d->blkq = gd->queue;
d->gd = gd;
- if (aoe_maxsectors)
- blk_queue_max_hw_sectors(gd->queue, aoe_maxsectors);
gd->major = AOE_MAJOR;
gd->first_minor = d->sysminor;
gd->minors = AOE_PARTITIONS;
gd->fops = &aoe_bdops;
gd->private_data = d;
- set_capacity(gd, d->ssize);
+ ssize = d->ssize;
snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
d->aoemajor, d->aoeminor);
@@ -405,6 +405,8 @@ aoeblk_gdalloc(void *vp)
spin_unlock_irqrestore(&d->lock, flags);
+ set_capacity(gd, ssize);
+
err = device_add_disk(NULL, gd, aoe_attr_groups);
if (err)
goto out_disk_cleanup;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d7317425be51..cc9077b588d7 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -419,13 +419,16 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu
rcu_read_lock();
for_each_netdev_rcu(&init_net, ifp) {
dev_hold(ifp);
- if (!is_aoe_netif(ifp))
- goto cont;
+ if (!is_aoe_netif(ifp)) {
+ dev_put(ifp);
+ continue;
+ }
skb = new_skb(sizeof *h + sizeof *ch);
if (skb == NULL) {
printk(KERN_INFO "aoe: skb alloc failure\n");
- goto cont;
+ dev_put(ifp);
+ continue;
}
skb_put(skb, sizeof *h + sizeof *ch);
skb->dev = ifp;
@@ -440,9 +443,6 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu
h->major = cpu_to_be16(aoemajor);
h->minor = aoeminor;
h->cmd = AOECMD_CFG;
-
-cont:
- dev_put(ifp);
}
rcu_read_unlock();
}
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index c51ea95bc2ce..923a134fd766 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -63,6 +63,7 @@ tx(int id) __must_hold(&txlock)
pr_warn("aoe: packet could not be sent on %s. %s\n",
ifp ? ifp->name : "netif",
"consider increasing tx_queue_len");
+ dev_put(ifp);
spin_lock_irq(&txlock);
}
return 0;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 50949207798d..cacc4ba942a8 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1994,7 +1994,7 @@ static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
{
struct gendisk *disk;
- disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL);
+ disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL);
if (IS_ERR(disk))
return PTR_ERR(disk);
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 970bd6ff38c4..e322cef6596b 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -318,6 +318,16 @@ static int brd_alloc(int i)
struct gendisk *disk;
char buf[DISK_NAME_LEN];
int err = -ENOMEM;
+ struct queue_limits lim = {
+ /*
+ * This is so fdisk will align partitions on 4k, because of
+ * direct_access API needing 4k alignment, returning a PFN
+ * (This is only a problem on very small devices <= 4M,
+ * otherwise fdisk will align on 1M. Regardless this call
+ * is harmless)
+ */
+ .physical_block_size = PAGE_SIZE,
+ };
list_for_each_entry(brd, &brd_devices, brd_list)
if (brd->brd_number == i)
@@ -335,10 +345,11 @@ static int brd_alloc(int i)
debugfs_create_u64(buf, 0444, brd_debugfs_dir,
&brd->brd_nr_pages);
- disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!disk)
+ disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(disk)) {
+ err = PTR_ERR(disk);
goto out_free_dev;
-
+ }
disk->major = RAMDISK_MAJOR;
disk->first_minor = i * max_part;
disk->minors = max_part;
@@ -347,15 +358,6 @@ static int brd_alloc(int i)
strscpy(disk->disk_name, buf, DISK_NAME_LEN);
set_capacity(disk, rd_size * 2);
- /*
- * This is so fdisk will align partitions on 4k, because of
- * direct_access API needing 4k alignment, returning a PFN
- * (This is only a problem on very small devices <= 4M,
- * otherwise fdisk will align on 1M. Regardless this call
- * is harmless)
- */
- blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
-
/* Tell the block layer that this is not a rotational device */
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index c21e3732759e..94dc0a235919 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -524,9 +524,9 @@ struct drbd_md {
struct drbd_backing_dev {
struct block_device *backing_bdev;
- struct bdev_handle *backing_bdev_handle;
+ struct file *backing_bdev_file;
struct block_device *md_bdev;
- struct bdev_handle *md_bdev_handle;
+ struct file *f_md_bdev;
struct drbd_md md;
struct disk_conf *disk_conf; /* RCU, for updates: resource->conf_update */
sector_t known_size; /* last known size of that backing device */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 6bc86106c7b2..113b441d4d36 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2690,6 +2690,14 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
int id;
int vnr = adm_ctx->volume;
enum drbd_ret_code err = ERR_NOMEM;
+ struct queue_limits lim = {
+ /*
+ * Setting the max_hw_sectors to an odd value of 8kibyte here.
+ * This triggers a max_bio_size message upon first attach or
+ * connect.
+ */
+ .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8,
+ };
device = minor_to_device(minor);
if (device)
@@ -2708,9 +2716,11 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_init_set_defaults(device);
- disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!disk)
+ disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(disk)) {
+ err = PTR_ERR(disk);
goto out_no_disk;
+ }
device->vdisk = disk;
device->rq_queue = disk->queue;
@@ -2727,9 +2737,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
blk_queue_write_cache(disk->queue, true, true);
- /* Setting the max_hw_sectors to an odd value of 8kibyte here
- This triggers a max_bio_size message upon first attach or connect */
- blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8);
device->md_io.page = alloc_page(GFP_KERNEL);
if (!device->md_io.page)
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 43747a1aae43..5d65c9754d83 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1189,9 +1189,31 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
return 0;
}
-static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity)
+static unsigned int drbd_max_peer_bio_size(struct drbd_device *device)
{
- q->limits.discard_granularity = granularity;
+ /*
+ * We may ignore peer limits if the peer is modern enough. From 8.3.8
+ * onwards the peer can use multiple BIOs for a single peer_request.
+ */
+ if (device->state.conn < C_WF_REPORT_PARAMS)
+ return device->peer_max_bio_size;
+
+ if (first_peer_device(device)->connection->agreed_pro_version < 94)
+ return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+
+ /*
+ * Correct old drbd (up to 8.3.7) if it believes it can do more than
+ * 32KiB.
+ */
+ if (first_peer_device(device)->connection->agreed_pro_version == 94)
+ return DRBD_MAX_SIZE_H80_PACKET;
+
+ /*
+ * drbd 8.3.8 onwards, before 8.4.0
+ */
+ if (first_peer_device(device)->connection->agreed_pro_version < 100)
+ return DRBD_MAX_BIO_SIZE_P95;
+ return DRBD_MAX_BIO_SIZE;
}
static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
@@ -1204,149 +1226,119 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
return AL_EXTENT_SIZE >> 9;
}
-static void decide_on_discard_support(struct drbd_device *device,
+static bool drbd_discard_supported(struct drbd_connection *connection,
struct drbd_backing_dev *bdev)
{
- struct drbd_connection *connection =
- first_peer_device(device)->connection;
- struct request_queue *q = device->rq_queue;
- unsigned int max_discard_sectors;
-
if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
- goto not_supported;
+ return false;
if (connection->cstate >= C_CONNECTED &&
!(connection->agreed_features & DRBD_FF_TRIM)) {
drbd_info(connection,
"peer DRBD too old, does not support TRIM: disabling discards\n");
- goto not_supported;
+ return false;
}
- /*
- * We don't care for the granularity, really.
- *
- * Stacking limits below should fix it for the local device. Whether or
- * not it is a suitable granularity on the remote device is not our
- * problem, really. If you care, you need to use devices with similar
- * topology on all peers.
- */
- blk_queue_discard_granularity(q, 512);
- max_discard_sectors = drbd_max_discard_sectors(connection);
- blk_queue_max_discard_sectors(q, max_discard_sectors);
- blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
- return;
-
-not_supported:
- blk_queue_discard_granularity(q, 0);
- blk_queue_max_discard_sectors(q, 0);
+ return true;
}
-static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
+/* This is the workaround for "bio would need to, but cannot, be split" */
+static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device)
{
- /* Fixup max_write_zeroes_sectors after blk_stack_limits():
- * if we can handle "zeroes" efficiently on the protocol,
- * we want to do that, even if our backend does not announce
- * max_write_zeroes_sectors itself. */
- struct drbd_connection *connection = first_peer_device(device)->connection;
- /* If the peer announces WZEROES support, use it. Otherwise, rather
- * send explicit zeroes than rely on some discard-zeroes-data magic. */
- if (connection->agreed_features & DRBD_FF_WZEROES)
- q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
- else
- q->limits.max_write_zeroes_sectors = 0;
-}
+ unsigned int max_segments;
-static void fixup_discard_support(struct drbd_device *device, struct request_queue *q)
-{
- unsigned int max_discard = device->rq_queue->limits.max_discard_sectors;
- unsigned int discard_granularity =
- device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT;
+ rcu_read_lock();
+ max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
+ rcu_read_unlock();
- if (discard_granularity > max_discard) {
- blk_queue_discard_granularity(q, 0);
- blk_queue_max_discard_sectors(q, 0);
- }
+ if (!max_segments)
+ return BLK_MAX_SEGMENTS;
+ return max_segments;
}
-static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
- unsigned int max_bio_size, struct o_qlim *o)
+void drbd_reconsider_queue_parameters(struct drbd_device *device,
+ struct drbd_backing_dev *bdev, struct o_qlim *o)
{
+ struct drbd_connection *connection =
+ first_peer_device(device)->connection;
struct request_queue * const q = device->rq_queue;
- unsigned int max_hw_sectors = max_bio_size >> 9;
- unsigned int max_segments = 0;
+ unsigned int now = queue_max_hw_sectors(q) << 9;
+ struct queue_limits lim;
struct request_queue *b = NULL;
- struct disk_conf *dc;
+ unsigned int new;
if (bdev) {
b = bdev->backing_bdev->bd_disk->queue;
- max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
- rcu_read_lock();
- dc = rcu_dereference(device->ldev->disk_conf);
- max_segments = dc->max_bio_bvecs;
- rcu_read_unlock();
-
- blk_set_stacking_limits(&q->limits);
+ device->local_max_bio_size =
+ queue_max_hw_sectors(b) << SECTOR_SHIFT;
}
- blk_queue_max_hw_sectors(q, max_hw_sectors);
- /* This is the workaround for "bio would need to, but cannot, be split" */
- blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
- blk_queue_segment_boundary(q, PAGE_SIZE-1);
- decide_on_discard_support(device, bdev);
-
- if (b) {
- blk_stack_limits(&q->limits, &b->limits, 0);
- disk_update_readahead(device->vdisk);
+ /*
+ * We may later detach and re-attach on a disconnected Primary. Avoid
+ * decreasing the value in this case.
+ *
+ * We want to store what we know the peer DRBD can handle, not what the
+ * peer IO backend can handle.
+ */
+ new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size,
+ max(drbd_max_peer_bio_size(device), device->peer_max_bio_size));
+ if (new != now) {
+ if (device->state.role == R_PRIMARY && new < now)
+ drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n",
+ new, now);
+ drbd_info(device, "max BIO size = %u\n", new);
}
- fixup_write_zeroes(device, q);
- fixup_discard_support(device, q);
-}
-
-void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
-{
- unsigned int now, new, local, peer;
-
- now = queue_max_hw_sectors(device->rq_queue) << 9;
- local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
- peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
+ lim = queue_limits_start_update(q);
if (bdev) {
- local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
- device->local_max_bio_size = local;
+ blk_set_stacking_limits(&lim);
+ lim.max_segments = drbd_backing_dev_max_segments(device);
+ } else {
+ lim.max_segments = BLK_MAX_SEGMENTS;
}
- local = min(local, DRBD_MAX_BIO_SIZE);
- /* We may ignore peer limits if the peer is modern enough.
- Because new from 8.3.8 onwards the peer can use multiple
- BIOs for a single peer_request */
- if (device->state.conn >= C_WF_REPORT_PARAMS) {
- if (first_peer_device(device)->connection->agreed_pro_version < 94)
- peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
- /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
- else if (first_peer_device(device)->connection->agreed_pro_version == 94)
- peer = DRBD_MAX_SIZE_H80_PACKET;
- else if (first_peer_device(device)->connection->agreed_pro_version < 100)
- peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */
- else
- peer = DRBD_MAX_BIO_SIZE;
+ lim.max_hw_sectors = new >> SECTOR_SHIFT;
+ lim.seg_boundary_mask = PAGE_SIZE - 1;
- /* We may later detach and re-attach on a disconnected Primary.
- * Avoid this setting to jump back in that case.
- * We want to store what we know the peer DRBD can handle,
- * not what the peer IO backend can handle. */
- if (peer > device->peer_max_bio_size)
- device->peer_max_bio_size = peer;
+ /*
+ * We don't care for the granularity, really.
+ *
+ * Stacking limits below should fix it for the local device. Whether or
+ * not it is a suitable granularity on the remote device is not our
+ * problem, really. If you care, you need to use devices with similar
+ * topology on all peers.
+ */
+ if (drbd_discard_supported(connection, bdev)) {
+ lim.discard_granularity = 512;
+ lim.max_hw_discard_sectors =
+ drbd_max_discard_sectors(connection);
+ } else {
+ lim.discard_granularity = 0;
+ lim.max_hw_discard_sectors = 0;
}
- new = min(local, peer);
- if (device->state.role == R_PRIMARY && new < now)
- drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
+ if (bdev)
+ blk_stack_limits(&lim, &b->limits, 0);
- if (new != now)
- drbd_info(device, "max BIO size = %u\n", new);
+ /*
+ * If we can handle "zeroes" efficiently on the protocol, we want to do
+ * that, even if our backend does not announce max_write_zeroes_sectors
+ * itself.
+ */
+ if (connection->agreed_features & DRBD_FF_WZEROES)
+ lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
+ else
+ lim.max_write_zeroes_sectors = 0;
+
+ if ((lim.discard_granularity >> SECTOR_SHIFT) >
+ lim.max_hw_discard_sectors) {
+ lim.discard_granularity = 0;
+ lim.max_hw_discard_sectors = 0;
+ }
- drbd_setup_queue_param(device, bdev, new, o);
+ if (queue_limits_commit_update(q, &lim))
+ drbd_err(device, "setting new queue limits failed\n");
}
/* Starts the worker thread */
@@ -1635,45 +1627,45 @@ success:
return 0;
}
-static struct bdev_handle *open_backing_dev(struct drbd_device *device,
+static struct file *open_backing_dev(struct drbd_device *device,
const char *bdev_path, void *claim_ptr, bool do_bd_link)
{
- struct bdev_handle *handle;
+ struct file *file;
int err = 0;
- handle = bdev_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE,
- claim_ptr, NULL);
- if (IS_ERR(handle)) {
+ file = bdev_file_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ claim_ptr, NULL);
+ if (IS_ERR(file)) {
drbd_err(device, "open(\"%s\") failed with %ld\n",
- bdev_path, PTR_ERR(handle));
- return handle;
+ bdev_path, PTR_ERR(file));
+ return file;
}
if (!do_bd_link)
- return handle;
+ return file;
- err = bd_link_disk_holder(handle->bdev, device->vdisk);
+ err = bd_link_disk_holder(file_bdev(file), device->vdisk);
if (err) {
- bdev_release(handle);
+ fput(file);
drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
bdev_path, err);
- handle = ERR_PTR(err);
+ file = ERR_PTR(err);
}
- return handle;
+ return file;
}
static int open_backing_devices(struct drbd_device *device,
struct disk_conf *new_disk_conf,
struct drbd_backing_dev *nbc)
{
- struct bdev_handle *handle;
+ struct file *file;
- handle = open_backing_dev(device, new_disk_conf->backing_dev, device,
+ file = open_backing_dev(device, new_disk_conf->backing_dev, device,
true);
- if (IS_ERR(handle))
+ if (IS_ERR(file))
return ERR_OPEN_DISK;
- nbc->backing_bdev = handle->bdev;
- nbc->backing_bdev_handle = handle;
+ nbc->backing_bdev = file_bdev(file);
+ nbc->backing_bdev_file = file;
/*
* meta_dev_idx >= 0: external fixed size, possibly multiple
@@ -1683,7 +1675,7 @@ static int open_backing_devices(struct drbd_device *device,
* should check it for you already; but if you don't, or
* someone fooled it, we need to double check here)
*/
- handle = open_backing_dev(device, new_disk_conf->meta_dev,
+ file = open_backing_dev(device, new_disk_conf->meta_dev,
/* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
* if potentially shared with other drbd minors */
(new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder,
@@ -1691,21 +1683,21 @@ static int open_backing_devices(struct drbd_device *device,
* as would happen with internal metadata. */
(new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
- if (IS_ERR(handle))
+ if (IS_ERR(file))
return ERR_OPEN_MD_DISK;
- nbc->md_bdev = handle->bdev;
- nbc->md_bdev_handle = handle;
+ nbc->md_bdev = file_bdev(file);
+ nbc->f_md_bdev = file;
return NO_ERROR;
}
static void close_backing_dev(struct drbd_device *device,
- struct bdev_handle *handle, bool do_bd_unlink)
+ struct file *bdev_file, bool do_bd_unlink)
{
- if (!handle)
+ if (!bdev_file)
return;
if (do_bd_unlink)
- bd_unlink_disk_holder(handle->bdev, device->vdisk);
- bdev_release(handle);
+ bd_unlink_disk_holder(file_bdev(bdev_file), device->vdisk);
+ fput(bdev_file);
}
void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
@@ -1713,9 +1705,9 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *
if (ldev == NULL)
return;
- close_backing_dev(device, ldev->md_bdev_handle,
+ close_backing_dev(device, ldev->f_md_bdev,
ldev->md_bdev != ldev->backing_bdev);
- close_backing_dev(device, ldev->backing_bdev_handle, true);
+ close_backing_dev(device, ldev->backing_bdev_file, true);
kfree(ldev->disk_conf);
kfree(ldev);
@@ -2131,9 +2123,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
fail:
conn_reconfig_done(connection);
if (nbc) {
- close_backing_dev(device, nbc->md_bdev_handle,
+ close_backing_dev(device, nbc->f_md_bdev,
nbc->md_bdev != nbc->backing_bdev);
- close_backing_dev(device, nbc->backing_bdev_handle, true);
+ close_backing_dev(device, nbc->backing_bdev_file, true);
kfree(nbc);
}
kfree(new_disk_conf);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 287a8d1d3f70..e858e7e0383f 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1542,9 +1542,10 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
int notify_resource_state_change(struct sk_buff *skb,
unsigned int seq,
- struct drbd_resource_state_change *resource_state_change,
+ void *state_change,
enum drbd_notification_type type)
{
+ struct drbd_resource_state_change *resource_state_change = state_change;
struct drbd_resource *resource = resource_state_change->resource;
struct resource_info resource_info = {
.res_role = resource_state_change->role[NEW],
@@ -1558,13 +1559,14 @@ int notify_resource_state_change(struct sk_buff *skb,
int notify_connection_state_change(struct sk_buff *skb,
unsigned int seq,
- struct drbd_connection_state_change *connection_state_change,
+ void *state_change,
enum drbd_notification_type type)
{
- struct drbd_connection *connection = connection_state_change->connection;
+ struct drbd_connection_state_change *p = state_change;
+ struct drbd_connection *connection = p->connection;
struct connection_info connection_info = {
- .conn_connection_state = connection_state_change->cstate[NEW],
- .conn_role = connection_state_change->peer_role[NEW],
+ .conn_connection_state = p->cstate[NEW],
+ .conn_role = p->peer_role[NEW],
};
return notify_connection_state(skb, seq, connection, &connection_info, type);
@@ -1572,9 +1574,10 @@ int notify_connection_state_change(struct sk_buff *skb,
int notify_device_state_change(struct sk_buff *skb,
unsigned int seq,
- struct drbd_device_state_change *device_state_change,
+ void *state_change,
enum drbd_notification_type type)
{
+ struct drbd_device_state_change *device_state_change = state_change;
struct drbd_device *device = device_state_change->device;
struct device_info device_info = {
.dev_disk_state = device_state_change->disk_state[NEW],
@@ -1585,9 +1588,10 @@ int notify_device_state_change(struct sk_buff *skb,
int notify_peer_device_state_change(struct sk_buff *skb,
unsigned int seq,
- struct drbd_peer_device_state_change *p,
+ void *state_change,
enum drbd_notification_type type)
{
+ struct drbd_peer_device_state_change *p = state_change;
struct drbd_peer_device *peer_device = p->peer_device;
struct peer_device_info peer_device_info = {
.peer_repl_state = p->repl_state[NEW],
@@ -1605,8 +1609,8 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
bool resource_state_has_changed;
unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
- int (*last_func)(struct sk_buff *, unsigned int, void *,
- enum drbd_notification_type) = NULL;
+ int (*last_func)(struct sk_buff *, unsigned int,
+ void *, enum drbd_notification_type) = NULL;
void *last_arg = NULL;
#define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW])
@@ -1616,7 +1620,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
})
#define REMEMBER_STATE_CHANGE(func, arg, type) \
({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \
- last_func = (typeof(last_func))func; \
+ last_func = func; \
last_arg = arg; \
})
diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h
index 9d78d8e3912e..a56a57d67686 100644
--- a/drivers/block/drbd/drbd_state_change.h
+++ b/drivers/block/drbd/drbd_state_change.h
@@ -46,19 +46,19 @@ extern void forget_state_change(struct drbd_state_change *);
extern int notify_resource_state_change(struct sk_buff *,
unsigned int,
- struct drbd_resource_state_change *,
+ void *,
enum drbd_notification_type type);
extern int notify_connection_state_change(struct sk_buff *,
unsigned int,
- struct drbd_connection_state_change *,
+ void *,
enum drbd_notification_type type);
extern int notify_device_state_change(struct sk_buff *,
unsigned int,
- struct drbd_device_state_change *,
+ void *,
enum drbd_notification_type type);
extern int notify_peer_device_state_change(struct sk_buff *,
unsigned int,
- struct drbd_peer_device_state_change *,
+ void *,
enum drbd_notification_type type);
#endif /* DRBD_STATE_CHANGE_H */
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index d0e41d52d6a9..1b399ec8c07d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -530,14 +530,13 @@ static struct format_descr format_req;
static char *floppy_track_buffer;
static int max_buffer_sectors;
-typedef void (*done_f)(int);
static const struct cont_t {
void (*interrupt)(void);
/* this is called after the interrupt of the
* main command */
void (*redo)(void); /* this is called to retry the operation */
void (*error)(void); /* this is called to tally an error */
- done_f done; /* this is called to say if the operation has
+ void (*done)(int); /* this is called to say if the operation has
* succeeded/failed */
} *cont;
@@ -985,6 +984,10 @@ static void empty(void)
{
}
+static void empty_done(int result)
+{
+}
+
static void (*floppy_work_fn)(void);
static void floppy_work_workfn(struct work_struct *work)
@@ -1998,14 +2001,14 @@ static const struct cont_t wakeup_cont = {
.interrupt = empty,
.redo = do_wakeup,
.error = empty,
- .done = (done_f)empty
+ .done = empty_done,
};
static const struct cont_t intr_cont = {
.interrupt = empty,
.redo = process_fd_request,
.error = empty,
- .done = (done_f)empty
+ .done = empty_done,
};
/* schedules handler, waiting for completion. May be interrupted, will then
@@ -4513,13 +4516,15 @@ static bool floppy_available(int drive)
static int floppy_alloc_disk(unsigned int drive, unsigned int type)
{
+ struct queue_limits lim = {
+ .max_hw_sectors = 64,
+ };
struct gendisk *disk;
- disk = blk_mq_alloc_disk(&tag_sets[drive], NULL);
+ disk = blk_mq_alloc_disk(&tag_sets[drive], &lim, NULL);
if (IS_ERR(disk))
return PTR_ERR(disk);
- blk_queue_max_hw_sectors(disk->queue, 64);
disk->major = FLOPPY_MAJOR;
disk->first_minor = TOMINOR(drive) | (type << 2);
disk->minors = 1;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f8145499da38..28a95fd366fe 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -750,12 +750,13 @@ static void loop_sysfs_exit(struct loop_device *lo)
&loop_attribute_group);
}
-static void loop_config_discard(struct loop_device *lo)
+static void loop_config_discard(struct loop_device *lo,
+ struct queue_limits *lim)
{
struct file *file = lo->lo_backing_file;
struct inode *inode = file->f_mapping->host;
- struct request_queue *q = lo->lo_queue;
- u32 granularity, max_discard_sectors;
+ u32 granularity = 0, max_discard_sectors = 0;
+ struct kstatfs sbuf;
/*
* If the backing device is a block device, mirror its zeroing
@@ -775,29 +776,17 @@ static void loop_config_discard(struct loop_device *lo)
* We use punch hole to reclaim the free space used by the
* image a.k.a. discard.
*/
- } else if (!file->f_op->fallocate) {
- max_discard_sectors = 0;
- granularity = 0;
-
- } else {
- struct kstatfs sbuf;
-
+ } else if (file->f_op->fallocate && !vfs_statfs(&file->f_path, &sbuf)) {
max_discard_sectors = UINT_MAX >> 9;
- if (!vfs_statfs(&file->f_path, &sbuf))
- granularity = sbuf.f_bsize;
- else
- max_discard_sectors = 0;
+ granularity = sbuf.f_bsize;
}
- if (max_discard_sectors) {
- q->limits.discard_granularity = granularity;
- blk_queue_max_discard_sectors(q, max_discard_sectors);
- blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
- } else {
- q->limits.discard_granularity = 0;
- blk_queue_max_discard_sectors(q, 0);
- blk_queue_max_write_zeroes_sectors(q, 0);
- }
+ lim->max_hw_discard_sectors = max_discard_sectors;
+ lim->max_write_zeroes_sectors = max_discard_sectors;
+ if (max_discard_sectors)
+ lim->discard_granularity = granularity;
+ else
+ lim->discard_granularity = 0;
}
struct loop_worker {
@@ -986,6 +975,20 @@ loop_set_status_from_info(struct loop_device *lo,
return 0;
}
+static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize,
+ bool update_discard_settings)
+{
+ struct queue_limits lim;
+
+ lim = queue_limits_start_update(lo->lo_queue);
+ lim.logical_block_size = bsize;
+ lim.physical_block_size = bsize;
+ lim.io_min = bsize;
+ if (update_discard_settings)
+ loop_config_discard(lo, &lim);
+ return queue_limits_commit_update(lo->lo_queue, &lim);
+}
+
static int loop_configure(struct loop_device *lo, blk_mode_t mode,
struct block_device *bdev,
const struct loop_config *config)
@@ -1083,11 +1086,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
else
bsize = 512;
- blk_queue_logical_block_size(lo->lo_queue, bsize);
- blk_queue_physical_block_size(lo->lo_queue, bsize);
- blk_queue_io_min(lo->lo_queue, bsize);
+ error = loop_reconfigure_limits(lo, bsize, true);
+ if (WARN_ON_ONCE(error))
+ goto out_unlock;
- loop_config_discard(lo);
loop_update_rotational(lo);
loop_update_dio(lo);
loop_sysfs_init(lo);
@@ -1154,9 +1156,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
lo->lo_offset = 0;
lo->lo_sizelimit = 0;
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
- blk_queue_logical_block_size(lo->lo_queue, 512);
- blk_queue_physical_block_size(lo->lo_queue, 512);
- blk_queue_io_min(lo->lo_queue, 512);
+ loop_reconfigure_limits(lo, 512, false);
invalidate_disk(lo->lo_disk);
loop_sysfs_exit(lo);
/* let user-space know about this change */
@@ -1488,9 +1488,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
invalidate_bdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
- blk_queue_logical_block_size(lo->lo_queue, arg);
- blk_queue_physical_block_size(lo->lo_queue, arg);
- blk_queue_io_min(lo->lo_queue, arg);
+ err = loop_reconfigure_limits(lo, arg, false);
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue);
@@ -1982,6 +1980,12 @@ static const struct blk_mq_ops loop_mq_ops = {
static int loop_add(int i)
{
+ struct queue_limits lim = {
+ /*
+ * Random number picked from the historic block max_sectors cap.
+ */
+ .max_hw_sectors = 2560u,
+ };
struct loop_device *lo;
struct gendisk *disk;
int err;
@@ -2025,16 +2029,13 @@ static int loop_add(int i)
if (err)
goto out_free_idr;
- disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, lo);
+ disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, &lim, lo);
if (IS_ERR(disk)) {
err = PTR_ERR(disk);
goto out_cleanup_tags;
}
lo->lo_queue = lo->lo_disk->queue;
- /* random number picked from the history block max_sectors cap */
- blk_queue_max_hw_sectors(lo->lo_queue, 2560u);
-
/*
* By default, we do buffer IO, so it doesn't make sense to enable
* merge because the I/O submitted to backing file is handled page by
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index b200950e8fb5..43a187609ef7 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3401,6 +3401,12 @@ static const struct blk_mq_ops mtip_mq_ops = {
*/
static int mtip_block_initialize(struct driver_data *dd)
{
+ struct queue_limits lim = {
+ .physical_block_size = 4096,
+ .max_hw_sectors = 0xffff,
+ .max_segments = MTIP_MAX_SG,
+ .max_segment_size = 0x400000,
+ };
int rv = 0, wait_for_rebuild = 0;
sector_t capacity;
unsigned int index = 0;
@@ -3431,7 +3437,7 @@ static int mtip_block_initialize(struct driver_data *dd)
goto block_queue_alloc_tag_error;
}
- dd->disk = blk_mq_alloc_disk(&dd->tags, dd);
+ dd->disk = blk_mq_alloc_disk(&dd->tags, &lim, dd);
if (IS_ERR(dd->disk)) {
dev_err(&dd->pdev->dev,
"Unable to allocate request queue\n");
@@ -3481,12 +3487,7 @@ skip_create_disk:
/* Set device limits. */
blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue);
- blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
- blk_queue_physical_block_size(dd->queue, 4096);
- blk_queue_max_hw_sectors(dd->queue, 0xffff);
- blk_queue_max_segment_size(dd->queue, 0x400000);
dma_set_max_seg_size(&dd->pdev->dev, 0x400000);
- blk_queue_io_min(dd->queue, 4096);
/* Set the capacity of the device in 512 byte sectors. */
if (!(mtip_hw_get_capacity(dd, &capacity))) {
diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
index d914156db2d8..27b2187e7a6d 100644
--- a/drivers/block/n64cart.c
+++ b/drivers/block/n64cart.c
@@ -114,6 +114,10 @@ static const struct block_device_operations n64cart_fops = {
*/
static int __init n64cart_probe(struct platform_device *pdev)
{
+ struct queue_limits lim = {
+ .physical_block_size = 4096,
+ .logical_block_size = 4096,
+ };
struct gendisk *disk;
int err = -ENOMEM;
@@ -131,9 +135,11 @@ static int __init n64cart_probe(struct platform_device *pdev)
if (IS_ERR(reg_base))
return PTR_ERR(reg_base);
- disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!disk)
+ disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(disk)) {
+ err = PTR_ERR(disk);
goto out;
+ }
disk->first_minor = 0;
disk->flags = GENHD_FL_NO_PART;
@@ -145,8 +151,6 @@ static int __init n64cart_probe(struct platform_device *pdev)
set_disk_ro(disk, 1);
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
- blk_queue_physical_block_size(disk->queue, 4096);
- blk_queue_logical_block_size(disk->queue, 4096);
err = add_disk(disk);
if (err)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 33a8f37bb6a1..9d4ec9273bf9 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -316,9 +316,12 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
nsock->sent = 0;
}
-static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
+static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
loff_t blksize)
{
+ struct queue_limits lim;
+ int error;
+
if (!blksize)
blksize = 1u << NBD_DEF_BLKSIZE_BITS;
@@ -334,10 +337,16 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
if (!nbd->pid)
return 0;
+ lim = queue_limits_start_update(nbd->disk->queue);
if (nbd->config->flags & NBD_FLAG_SEND_TRIM)
- blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
- blk_queue_logical_block_size(nbd->disk->queue, blksize);
- blk_queue_physical_block_size(nbd->disk->queue, blksize);
+ lim.max_hw_discard_sectors = UINT_MAX;
+ else
+ lim.max_hw_discard_sectors = 0;
+ lim.logical_block_size = blksize;
+ lim.physical_block_size = blksize;
+ error = queue_limits_commit_update(nbd->disk->queue, &lim);
+ if (error)
+ return error;
if (max_part)
set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
@@ -346,6 +355,18 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
return 0;
}
+static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
+ loff_t blksize)
+{
+ int error;
+
+ blk_mq_freeze_queue(nbd->disk->queue);
+ error = __nbd_set_size(nbd, bytesize, blksize);
+ blk_mq_unfreeze_queue(nbd->disk->queue);
+
+ return error;
+}
+
static void nbd_complete_rq(struct request *req)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
@@ -1351,7 +1372,6 @@ static void nbd_config_put(struct nbd_device *nbd)
nbd->config = NULL;
nbd->tag_set.timeout = 0;
- blk_queue_max_discard_sectors(nbd->disk->queue, 0);
mutex_unlock(&nbd->config_lock);
nbd_put(nbd);
@@ -1783,6 +1803,12 @@ static const struct blk_mq_ops nbd_mq_ops = {
static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
{
+ struct queue_limits lim = {
+ .max_hw_sectors = 65536,
+ .max_user_sectors = 256,
+ .max_segments = USHRT_MAX,
+ .max_segment_size = UINT_MAX,
+ };
struct nbd_device *nbd;
struct gendisk *disk;
int err = -ENOMEM;
@@ -1823,7 +1849,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
if (err < 0)
goto out_free_tags;
- disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
+ disk = blk_mq_alloc_disk(&nbd->tag_set, &lim, NULL);
if (IS_ERR(disk)) {
err = PTR_ERR(disk);
goto out_free_idr;
@@ -1843,11 +1869,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
* Tell the block layer that we are not a rotational device
*/
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
- blk_queue_max_discard_sectors(disk->queue, 0);
- blk_queue_max_segment_size(disk->queue, UINT_MAX);
- blk_queue_max_segments(disk->queue, USHRT_MAX);
- blk_queue_max_hw_sectors(disk->queue, 65536);
- disk->queue->limits.max_sectors = 256;
mutex_init(&nbd->config_lock);
refcount_set(&nbd->config_refs, 0);
@@ -2433,6 +2454,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info)
}
dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST);
+ if (!dev_list) {
+ nlmsg_free(reply);
+ ret = -EMSGSIZE;
+ goto out;
+ }
+
if (index == -1) {
ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
if (ret) {
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 36755f263e8e..71c39bcd872c 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -115,6 +115,18 @@ module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
#endif
+/*
+ * Historic queue modes.
+ *
+ * These days nothing but NULL_Q_MQ is actually supported, but we keep it the
+ * enum for error reporting.
+ */
+enum {
+ NULL_Q_BIO = 0,
+ NULL_Q_RQ = 1,
+ NULL_Q_MQ = 2,
+};
+
static int g_queue_mode = NULL_Q_MQ;
static int null_param_store_val(const char *str, int *val, int min, int max)
@@ -165,8 +177,8 @@ static bool g_blocking;
module_param_named(blocking, g_blocking, bool, 0444);
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
-static bool shared_tags;
-module_param(shared_tags, bool, 0444);
+static bool g_shared_tags;
+module_param_named(shared_tags, g_shared_tags, bool, 0444);
MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
static bool g_shared_tag_bitmap;
@@ -426,6 +438,7 @@ NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
+NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
@@ -571,6 +584,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_offline,
&nullb_device_attr_virt_boundary,
&nullb_device_attr_no_sched,
+ &nullb_device_attr_shared_tags,
&nullb_device_attr_shared_tag_bitmap,
NULL,
};
@@ -653,10 +667,11 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page)
"badblocks,blocking,blocksize,cache_size,"
"completion_nsec,discard,home_node,hw_queue_depth,"
"irqmode,max_sectors,mbps,memory_backed,no_sched,"
- "poll_queues,power,queue_mode,shared_tag_bitmap,size,"
- "submit_queues,use_per_node_hctx,virt_boundary,zoned,"
- "zone_capacity,zone_max_active,zone_max_open,"
- "zone_nr_conv,zone_offline,zone_readonly,zone_size\n");
+ "poll_queues,power,queue_mode,shared_tag_bitmap,"
+ "shared_tags,size,submit_queues,use_per_node_hctx,"
+ "virt_boundary,zoned,zone_capacity,zone_max_active,"
+ "zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
+ "zone_size\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -738,6 +753,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_max_active = g_zone_max_active;
dev->virt_boundary = g_virt_boundary;
dev->no_sched = g_no_sched;
+ dev->shared_tags = g_shared_tags;
dev->shared_tag_bitmap = g_shared_tag_bitmap;
return dev;
}
@@ -752,98 +768,11 @@ static void null_free_dev(struct nullb_device *dev)
kfree(dev);
}
-static void put_tag(struct nullb_queue *nq, unsigned int tag)
-{
- clear_bit_unlock(tag, nq->tag_map);
-
- if (waitqueue_active(&nq->wait))
- wake_up(&nq->wait);
-}
-
-static unsigned int get_tag(struct nullb_queue *nq)
-{
- unsigned int tag;
-
- do {
- tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
- if (tag >= nq->queue_depth)
- return -1U;
- } while (test_and_set_bit_lock(tag, nq->tag_map));
-
- return tag;
-}
-
-static void free_cmd(struct nullb_cmd *cmd)
-{
- put_tag(cmd->nq, cmd->tag);
-}
-
-static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
-
-static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
-{
- struct nullb_cmd *cmd;
- unsigned int tag;
-
- tag = get_tag(nq);
- if (tag != -1U) {
- cmd = &nq->cmds[tag];
- cmd->tag = tag;
- cmd->error = BLK_STS_OK;
- cmd->nq = nq;
- if (nq->dev->irqmode == NULL_IRQ_TIMER) {
- hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
- cmd->timer.function = null_cmd_timer_expired;
- }
- return cmd;
- }
-
- return NULL;
-}
-
-static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio)
-{
- struct nullb_cmd *cmd;
- DEFINE_WAIT(wait);
-
- do {
- /*
- * This avoids multiple return statements, multiple calls to
- * __alloc_cmd() and a fast path call to prepare_to_wait().
- */
- cmd = __alloc_cmd(nq);
- if (cmd) {
- cmd->bio = bio;
- return cmd;
- }
- prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
- io_schedule();
- finish_wait(&nq->wait, &wait);
- } while (1);
-}
-
-static void end_cmd(struct nullb_cmd *cmd)
-{
- int queue_mode = cmd->nq->dev->queue_mode;
-
- switch (queue_mode) {
- case NULL_Q_MQ:
- blk_mq_end_request(cmd->rq, cmd->error);
- return;
- case NULL_Q_BIO:
- cmd->bio->bi_status = cmd->error;
- bio_endio(cmd->bio);
- break;
- }
-
- free_cmd(cmd);
-}
-
static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
{
- end_cmd(container_of(timer, struct nullb_cmd, timer));
+ struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer);
+ blk_mq_end_request(blk_mq_rq_from_pdu(cmd), cmd->error);
return HRTIMER_NORESTART;
}
@@ -856,7 +785,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
static void null_complete_rq(struct request *rq)
{
- end_cmd(blk_mq_rq_to_pdu(rq));
+ struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+
+ blk_mq_end_request(rq, cmd->error);
}
static struct nullb_page *null_alloc_page(void)
@@ -1273,7 +1204,7 @@ static int null_transfer(struct nullb *nullb, struct page *page,
static int null_handle_rq(struct nullb_cmd *cmd)
{
- struct request *rq = cmd->rq;
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
struct nullb *nullb = cmd->nq->dev->nullb;
int err;
unsigned int len;
@@ -1298,63 +1229,21 @@ static int null_handle_rq(struct nullb_cmd *cmd)
return 0;
}
-static int null_handle_bio(struct nullb_cmd *cmd)
-{
- struct bio *bio = cmd->bio;
- struct nullb *nullb = cmd->nq->dev->nullb;
- int err;
- unsigned int len;
- sector_t sector = bio->bi_iter.bi_sector;
- struct bio_vec bvec;
- struct bvec_iter iter;
-
- spin_lock_irq(&nullb->lock);
- bio_for_each_segment(bvec, bio, iter) {
- len = bvec.bv_len;
- err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
- op_is_write(bio_op(bio)), sector,
- bio->bi_opf & REQ_FUA);
- if (err) {
- spin_unlock_irq(&nullb->lock);
- return err;
- }
- sector += len >> SECTOR_SHIFT;
- }
- spin_unlock_irq(&nullb->lock);
- return 0;
-}
-
-static void null_stop_queue(struct nullb *nullb)
-{
- struct request_queue *q = nullb->q;
-
- if (nullb->dev->queue_mode == NULL_Q_MQ)
- blk_mq_stop_hw_queues(q);
-}
-
-static void null_restart_queue_async(struct nullb *nullb)
-{
- struct request_queue *q = nullb->q;
-
- if (nullb->dev->queue_mode == NULL_Q_MQ)
- blk_mq_start_stopped_hw_queues(q, true);
-}
-
static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
{
struct nullb_device *dev = cmd->nq->dev;
struct nullb *nullb = dev->nullb;
blk_status_t sts = BLK_STS_OK;
- struct request *rq = cmd->rq;
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
if (!hrtimer_active(&nullb->bw_timer))
hrtimer_restart(&nullb->bw_timer);
if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
- null_stop_queue(nullb);
+ blk_mq_stop_hw_queues(nullb->q);
/* race with timer */
if (atomic_long_read(&nullb->cur_bytes) > 0)
- null_restart_queue_async(nullb);
+ blk_mq_start_stopped_hw_queues(nullb->q, true);
/* requeue request */
sts = BLK_STS_DEV_RESOURCE;
}
@@ -1381,37 +1270,29 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
sector_t nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
- int err;
if (op == REQ_OP_DISCARD)
return null_handle_discard(dev, sector, nr_sectors);
+ return errno_to_blk_status(null_handle_rq(cmd));
- if (dev->queue_mode == NULL_Q_BIO)
- err = null_handle_bio(cmd);
- else
- err = null_handle_rq(cmd);
-
- return errno_to_blk_status(err);
}
static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
{
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
struct nullb_device *dev = cmd->nq->dev;
struct bio *bio;
- if (dev->memory_backed)
- return;
-
- if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
- zero_fill_bio(cmd->bio);
- } else if (req_op(cmd->rq) == REQ_OP_READ) {
- __rq_for_each_bio(bio, cmd->rq)
+ if (!dev->memory_backed && req_op(rq) == REQ_OP_READ) {
+ __rq_for_each_bio(bio, rq)
zero_fill_bio(bio);
}
}
static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
{
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
+
/*
* Since root privileges are required to configure the null_blk
* driver, it is fine that this driver does not initialize the
@@ -1425,20 +1306,10 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
/* Complete IO by inline, softirq or timer */
switch (cmd->nq->dev->irqmode) {
case NULL_IRQ_SOFTIRQ:
- switch (cmd->nq->dev->queue_mode) {
- case NULL_Q_MQ:
- blk_mq_complete_request(cmd->rq);
- break;
- case NULL_Q_BIO:
- /*
- * XXX: no proper submitting cpu information available.
- */
- end_cmd(cmd);
- break;
- }
+ blk_mq_complete_request(rq);
break;
case NULL_IRQ_NONE:
- end_cmd(cmd);
+ blk_mq_end_request(rq, cmd->error);
break;
case NULL_IRQ_TIMER:
null_cmd_end_timer(cmd);
@@ -1499,7 +1370,7 @@ static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
return HRTIMER_NORESTART;
atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
- null_restart_queue_async(nullb);
+ blk_mq_start_stopped_hw_queues(nullb->q, true);
hrtimer_forward_now(&nullb->bw_timer, timer_interval);
@@ -1516,26 +1387,6 @@ static void nullb_setup_bwtimer(struct nullb *nullb)
hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
}
-static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
-{
- int index = 0;
-
- if (nullb->nr_queues != 1)
- index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
-
- return &nullb->queues[index];
-}
-
-static void null_submit_bio(struct bio *bio)
-{
- sector_t sector = bio->bi_iter.bi_sector;
- sector_t nr_sectors = bio_sectors(bio);
- struct nullb *nullb = bio->bi_bdev->bd_disk->private_data;
- struct nullb_queue *nq = nullb_to_queue(nullb);
-
- null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio));
-}
-
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
static bool should_timeout_request(struct request *rq)
@@ -1655,7 +1506,7 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
blk_rq_sectors(req));
if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error,
blk_mq_end_request_batch))
- end_cmd(cmd);
+ blk_mq_end_request(req, cmd->error);
nr++;
}
@@ -1711,7 +1562,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
}
- cmd->rq = rq;
cmd->error = BLK_STS_OK;
cmd->nq = nq;
cmd->fake_timeout = should_timeout_request(rq) ||
@@ -1770,34 +1620,8 @@ static void null_queue_rqs(struct request **rqlist)
*rqlist = requeue_list;
}
-static void cleanup_queue(struct nullb_queue *nq)
-{
- bitmap_free(nq->tag_map);
- kfree(nq->cmds);
-}
-
-static void cleanup_queues(struct nullb *nullb)
-{
- int i;
-
- for (i = 0; i < nullb->nr_queues; i++)
- cleanup_queue(&nullb->queues[i]);
-
- kfree(nullb->queues);
-}
-
-static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
-{
- struct nullb_queue *nq = hctx->driver_data;
- struct nullb *nullb = nq->dev->nullb;
-
- nullb->nr_queues--;
-}
-
static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
{
- init_waitqueue_head(&nq->wait);
- nq->queue_depth = nullb->queue_depth;
nq->dev = nullb->dev;
INIT_LIST_HEAD(&nq->poll_list);
spin_lock_init(&nq->poll_lock);
@@ -1815,7 +1639,6 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
nq = &nullb->queues[hctx_idx];
hctx->driver_data = nq;
null_init_queue(nullb, nq);
- nullb->nr_queues++;
return 0;
}
@@ -1828,7 +1651,6 @@ static const struct blk_mq_ops null_mq_ops = {
.poll = null_poll,
.map_queues = null_map_queues,
.init_hctx = null_init_hctx,
- .exit_hctx = null_exit_hctx,
};
static void null_del_dev(struct nullb *nullb)
@@ -1849,21 +1671,20 @@ static void null_del_dev(struct nullb *nullb)
if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
hrtimer_cancel(&nullb->bw_timer);
atomic_long_set(&nullb->cur_bytes, LONG_MAX);
- null_restart_queue_async(nullb);
+ blk_mq_start_stopped_hw_queues(nullb->q, true);
}
put_disk(nullb->disk);
- if (dev->queue_mode == NULL_Q_MQ &&
- nullb->tag_set == &nullb->__tag_set)
+ if (nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
- cleanup_queues(nullb);
+ kfree(nullb->queues);
if (null_cache_active(nullb))
null_free_device_storage(nullb->dev, true);
kfree(nullb);
dev->nullb = NULL;
}
-static void null_config_discard(struct nullb *nullb)
+static void null_config_discard(struct nullb *nullb, struct queue_limits *lim)
{
if (nullb->dev->discard == false)
return;
@@ -1880,43 +1701,14 @@ static void null_config_discard(struct nullb *nullb)
return;
}
- blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
+ lim->max_hw_discard_sectors = UINT_MAX >> 9;
}
-static const struct block_device_operations null_bio_ops = {
- .owner = THIS_MODULE,
- .submit_bio = null_submit_bio,
- .report_zones = null_report_zones,
-};
-
-static const struct block_device_operations null_rq_ops = {
+static const struct block_device_operations null_ops = {
.owner = THIS_MODULE,
.report_zones = null_report_zones,
};
-static int setup_commands(struct nullb_queue *nq)
-{
- struct nullb_cmd *cmd;
- int i;
-
- nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
- if (!nq->cmds)
- return -ENOMEM;
-
- nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL);
- if (!nq->tag_map) {
- kfree(nq->cmds);
- return -ENOMEM;
- }
-
- for (i = 0; i < nq->queue_depth; i++) {
- cmd = &nq->cmds[i];
- cmd->tag = -1U;
- }
-
- return 0;
-}
-
static int setup_queues(struct nullb *nullb)
{
int nqueues = nr_cpu_ids;
@@ -1929,101 +1721,66 @@ static int setup_queues(struct nullb *nullb)
if (!nullb->queues)
return -ENOMEM;
- nullb->queue_depth = nullb->dev->hw_queue_depth;
return 0;
}
-static int init_driver_queues(struct nullb *nullb)
+static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues)
{
- struct nullb_queue *nq;
- int i, ret = 0;
-
- for (i = 0; i < nullb->dev->submit_queues; i++) {
- nq = &nullb->queues[i];
-
- null_init_queue(nullb, nq);
-
- ret = setup_commands(nq);
- if (ret)
- return ret;
- nullb->nr_queues++;
+ set->ops = &null_mq_ops;
+ set->cmd_size = sizeof(struct nullb_cmd);
+ set->timeout = 5 * HZ;
+ set->nr_maps = 1;
+ if (poll_queues) {
+ set->nr_hw_queues += poll_queues;
+ set->nr_maps += 2;
}
- return 0;
+ return blk_mq_alloc_tag_set(set);
}
-static int null_gendisk_register(struct nullb *nullb)
+static int null_init_global_tag_set(void)
{
- sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
- struct gendisk *disk = nullb->disk;
+ int error;
- set_capacity(disk, size);
-
- disk->major = null_major;
- disk->first_minor = nullb->index;
- disk->minors = 1;
- if (queue_is_mq(nullb->q))
- disk->fops = &null_rq_ops;
- else
- disk->fops = &null_bio_ops;
- disk->private_data = nullb;
- strscpy_pad(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+ if (tag_set.ops)
+ return 0;
- if (nullb->dev->zoned) {
- int ret = null_register_zoned_dev(nullb);
+ tag_set.nr_hw_queues = g_submit_queues;
+ tag_set.queue_depth = g_hw_queue_depth;
+ tag_set.numa_node = g_home_node;
+ tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+ if (g_no_sched)
+ tag_set.flags |= BLK_MQ_F_NO_SCHED;
+ if (g_shared_tag_bitmap)
+ tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+ if (g_blocking)
+ tag_set.flags |= BLK_MQ_F_BLOCKING;
- if (ret)
- return ret;
- }
-
- return add_disk(disk);
+ error = null_init_tag_set(&tag_set, g_poll_queues);
+ if (error)
+ tag_set.ops = NULL;
+ return error;
}
-static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
+static int null_setup_tagset(struct nullb *nullb)
{
- unsigned int flags = BLK_MQ_F_SHOULD_MERGE;
- int hw_queues, numa_node;
- unsigned int queue_depth;
- int poll_queues;
-
- if (nullb) {
- hw_queues = nullb->dev->submit_queues;
- poll_queues = nullb->dev->poll_queues;
- queue_depth = nullb->dev->hw_queue_depth;
- numa_node = nullb->dev->home_node;
- if (nullb->dev->no_sched)
- flags |= BLK_MQ_F_NO_SCHED;
- if (nullb->dev->shared_tag_bitmap)
- flags |= BLK_MQ_F_TAG_HCTX_SHARED;
- if (nullb->dev->blocking)
- flags |= BLK_MQ_F_BLOCKING;
- } else {
- hw_queues = g_submit_queues;
- poll_queues = g_poll_queues;
- queue_depth = g_hw_queue_depth;
- numa_node = g_home_node;
- if (g_no_sched)
- flags |= BLK_MQ_F_NO_SCHED;
- if (g_shared_tag_bitmap)
- flags |= BLK_MQ_F_TAG_HCTX_SHARED;
- if (g_blocking)
- flags |= BLK_MQ_F_BLOCKING;
- }
-
- set->ops = &null_mq_ops;
- set->cmd_size = sizeof(struct nullb_cmd);
- set->flags = flags;
- set->driver_data = nullb;
- set->nr_hw_queues = hw_queues;
- set->queue_depth = queue_depth;
- set->numa_node = numa_node;
- if (poll_queues) {
- set->nr_hw_queues += poll_queues;
- set->nr_maps = 3;
- } else {
- set->nr_maps = 1;
+ if (nullb->dev->shared_tags) {
+ nullb->tag_set = &tag_set;
+ return null_init_global_tag_set();
}
- return blk_mq_alloc_tag_set(set);
+ nullb->tag_set = &nullb->__tag_set;
+ nullb->tag_set->driver_data = nullb;
+ nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues;
+ nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth;
+ nullb->tag_set->numa_node = nullb->dev->home_node;
+ nullb->tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
+ if (nullb->dev->no_sched)
+ nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED;
+ if (nullb->dev->shared_tag_bitmap)
+ nullb->tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+ if (nullb->dev->blocking)
+ nullb->tag_set->flags |= BLK_MQ_F_BLOCKING;
+ return null_init_tag_set(nullb->tag_set, nullb->dev->poll_queues);
}
static int null_validate_conf(struct nullb_device *dev)
@@ -2032,11 +1789,15 @@ static int null_validate_conf(struct nullb_device *dev)
pr_err("legacy IO path is no longer available\n");
return -EINVAL;
}
+ if (dev->queue_mode == NULL_Q_BIO) {
+ pr_err("BIO-based IO path is no longer available, using blk-mq instead.\n");
+ dev->queue_mode = NULL_Q_MQ;
+ }
dev->blocksize = round_down(dev->blocksize, 512);
dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
- if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
+ if (dev->use_per_node_hctx) {
if (dev->submit_queues != nr_online_nodes)
dev->submit_queues = nr_online_nodes;
} else if (dev->submit_queues > nr_cpu_ids)
@@ -2048,8 +1809,6 @@ static int null_validate_conf(struct nullb_device *dev)
if (dev->poll_queues > g_poll_queues)
dev->poll_queues = g_poll_queues;
dev->prev_poll_queues = dev->poll_queues;
-
- dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
/* Do memory allocation, so set blocking */
@@ -2060,9 +1819,6 @@ static int null_validate_conf(struct nullb_device *dev)
dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
dev->cache_size);
dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
- /* can not stop a queue */
- if (dev->queue_mode == NULL_Q_BIO)
- dev->mbps = 0;
if (dev->zoned &&
(!dev->zone_size || !is_power_of_2(dev->zone_size))) {
@@ -2102,6 +1858,12 @@ static bool null_setup_fault(void)
static int null_add_dev(struct nullb_device *dev)
{
+ struct queue_limits lim = {
+ .logical_block_size = dev->blocksize,
+ .physical_block_size = dev->blocksize,
+ .max_hw_sectors = dev->max_sectors,
+ };
+
struct nullb *nullb;
int rv;
@@ -2123,36 +1885,25 @@ static int null_add_dev(struct nullb_device *dev)
if (rv)
goto out_free_nullb;
- if (dev->queue_mode == NULL_Q_MQ) {
- if (shared_tags) {
- nullb->tag_set = &tag_set;
- rv = 0;
- } else {
- nullb->tag_set = &nullb->__tag_set;
- rv = null_init_tag_set(nullb, nullb->tag_set);
- }
+ rv = null_setup_tagset(nullb);
+ if (rv)
+ goto out_cleanup_queues;
+ if (dev->virt_boundary)
+ lim.virt_boundary_mask = PAGE_SIZE - 1;
+ null_config_discard(nullb, &lim);
+ if (dev->zoned) {
+ rv = null_init_zoned_dev(dev, &lim);
if (rv)
- goto out_cleanup_queues;
-
- nullb->tag_set->timeout = 5 * HZ;
- nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb);
- if (IS_ERR(nullb->disk)) {
- rv = PTR_ERR(nullb->disk);
goto out_cleanup_tags;
- }
- nullb->q = nullb->disk->queue;
- } else if (dev->queue_mode == NULL_Q_BIO) {
- rv = -ENOMEM;
- nullb->disk = blk_alloc_disk(nullb->dev->home_node);
- if (!nullb->disk)
- goto out_cleanup_queues;
+ }
- nullb->q = nullb->disk->queue;
- rv = init_driver_queues(nullb);
- if (rv)
- goto out_cleanup_disk;
+ nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb);
+ if (IS_ERR(nullb->disk)) {
+ rv = PTR_ERR(nullb->disk);
+ goto out_cleanup_zone;
}
+ nullb->q = nullb->disk->queue;
if (dev->mbps) {
set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
@@ -2164,12 +1915,6 @@ static int null_add_dev(struct nullb_device *dev)
blk_queue_write_cache(nullb->q, true, true);
}
- if (dev->zoned) {
- rv = null_init_zoned_dev(dev, nullb->q);
- if (rv)
- goto out_cleanup_disk;
- }
-
nullb->q->queuedata = nullb;
blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
@@ -2177,22 +1922,12 @@ static int null_add_dev(struct nullb_device *dev)
rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
if (rv < 0) {
mutex_unlock(&lock);
- goto out_cleanup_zone;
+ goto out_cleanup_disk;
}
nullb->index = rv;
dev->index = rv;
mutex_unlock(&lock);
- blk_queue_logical_block_size(nullb->q, dev->blocksize);
- blk_queue_physical_block_size(nullb->q, dev->blocksize);
- if (dev->max_sectors)
- blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
-
- if (dev->virt_boundary)
- blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1);
-
- null_config_discard(nullb);
-
if (config_item_name(&dev->group.cg_item)) {
/* Use configfs dir name as the device name */
snprintf(nullb->disk_name, sizeof(nullb->disk_name),
@@ -2201,7 +1936,22 @@ static int null_add_dev(struct nullb_device *dev)
sprintf(nullb->disk_name, "nullb%d", nullb->index);
}
- rv = null_gendisk_register(nullb);
+ set_capacity(nullb->disk,
+ ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT);
+ nullb->disk->major = null_major;
+ nullb->disk->first_minor = nullb->index;
+ nullb->disk->minors = 1;
+ nullb->disk->fops = &null_ops;
+ nullb->disk->private_data = nullb;
+ strscpy_pad(nullb->disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+
+ if (nullb->dev->zoned) {
+ rv = null_register_zoned_dev(nullb);
+ if (rv)
+ goto out_ida_free;
+ }
+
+ rv = add_disk(nullb->disk);
if (rv)
goto out_ida_free;
@@ -2220,10 +1970,10 @@ out_cleanup_zone:
out_cleanup_disk:
put_disk(nullb->disk);
out_cleanup_tags:
- if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ if (nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
out_cleanup_queues:
- cleanup_queues(nullb);
+ kfree(nullb->queues);
out_free_nullb:
kfree(nullb);
dev->nullb = NULL;
@@ -2299,7 +2049,7 @@ static int __init null_init(void)
return -EINVAL;
}
- if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+ if (g_use_per_node_hctx) {
if (g_submit_queues != nr_online_nodes) {
pr_warn("submit_queues param is set to %u.\n",
nr_online_nodes);
@@ -2311,18 +2061,12 @@ static int __init null_init(void)
g_submit_queues = 1;
}
- if (g_queue_mode == NULL_Q_MQ && shared_tags) {
- ret = null_init_tag_set(NULL, &tag_set);
- if (ret)
- return ret;
- }
-
config_group_init(&nullb_subsys.su_group);
mutex_init(&nullb_subsys.su_mutex);
ret = configfs_register_subsystem(&nullb_subsys);
if (ret)
- goto err_tagset;
+ return ret;
mutex_init(&lock);
@@ -2349,9 +2093,6 @@ err_dev:
unregister_blkdev(null_major, "nullb");
err_conf:
configfs_unregister_subsystem(&nullb_subsys);
-err_tagset:
- if (g_queue_mode == NULL_Q_MQ && shared_tags)
- blk_mq_free_tag_set(&tag_set);
return ret;
}
@@ -2370,7 +2111,7 @@ static void __exit null_exit(void)
}
mutex_unlock(&lock);
- if (g_queue_mode == NULL_Q_MQ && shared_tags)
+ if (tag_set.ops)
blk_mq_free_tag_set(&tag_set);
}
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 929f659dd255..477b97746823 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -16,11 +16,6 @@
#include <linux/mutex.h>
struct nullb_cmd {
- union {
- struct request *rq;
- struct bio *bio;
- };
- unsigned int tag;
blk_status_t error;
bool fake_timeout;
struct nullb_queue *nq;
@@ -28,16 +23,11 @@ struct nullb_cmd {
};
struct nullb_queue {
- unsigned long *tag_map;
- wait_queue_head_t wait;
- unsigned int queue_depth;
struct nullb_device *dev;
unsigned int requeue_selection;
struct list_head poll_list;
spinlock_t poll_lock;
-
- struct nullb_cmd *cmds;
};
struct nullb_zone {
@@ -60,13 +50,6 @@ struct nullb_zone {
unsigned int capacity;
};
-/* Queue modes */
-enum {
- NULL_Q_BIO = 0,
- NULL_Q_RQ = 1,
- NULL_Q_MQ = 2,
-};
-
struct nullb_device {
struct nullb *nullb;
struct config_group group;
@@ -119,6 +102,7 @@ struct nullb_device {
bool zoned; /* if device is zoned */
bool virt_boundary; /* virtual boundary on/off for the device */
bool no_sched; /* no IO scheduler for the device */
+ bool shared_tags; /* share tag set between devices for blk-mq */
bool shared_tag_bitmap; /* use hostwide shared tags */
};
@@ -130,14 +114,12 @@ struct nullb {
struct gendisk *disk;
struct blk_mq_tag_set *tag_set;
struct blk_mq_tag_set __tag_set;
- unsigned int queue_depth;
atomic_long_t cur_bytes;
struct hrtimer bw_timer;
unsigned long cache_flush_pos;
spinlock_t lock;
struct nullb_queue *queues;
- unsigned int nr_queues;
char disk_name[DISK_NAME_LEN];
};
@@ -147,7 +129,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
sector_t sector, unsigned int nr_sectors);
#ifdef CONFIG_BLK_DEV_ZONED
-int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
+int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim);
int null_register_zoned_dev(struct nullb *nullb);
void null_free_zoned_dev(struct nullb_device *dev);
int null_report_zones(struct gendisk *disk, sector_t sector,
@@ -160,7 +142,7 @@ ssize_t zone_cond_store(struct nullb_device *dev, const char *page,
size_t count, enum blk_zone_cond cond);
#else
static inline int null_init_zoned_dev(struct nullb_device *dev,
- struct request_queue *q)
+ struct queue_limits *lim)
{
pr_err("CONFIG_BLK_DEV_ZONED not enabled\n");
return -EINVAL;
diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h
index 6b2b370e786f..ef2d05d5f0df 100644
--- a/drivers/block/null_blk/trace.h
+++ b/drivers/block/null_blk/trace.h
@@ -41,10 +41,11 @@ TRACE_EVENT(nullb_zone_op,
__field(unsigned int, zone_cond)
),
TP_fast_assign(
- __entry->op = req_op(cmd->rq);
+ __entry->op = req_op(blk_mq_rq_from_pdu(cmd));
__entry->zone_no = zone_no;
__entry->zone_cond = zone_cond;
- __assign_disk_name(__entry->disk, cmd->rq->q->disk);
+ __assign_disk_name(__entry->disk,
+ blk_mq_rq_from_pdu(cmd)->q->disk);
),
TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
__print_disk_name(__entry->disk),
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 6f5e0994862e..1689e2584104 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -58,7 +58,8 @@ static inline void null_unlock_zone(struct nullb_device *dev,
mutex_unlock(&zone->mutex);
}
-int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
+int null_init_zoned_dev(struct nullb_device *dev,
+ struct queue_limits *lim)
{
sector_t dev_capacity_sects, zone_capacity_sects;
struct nullb_zone *zone;
@@ -151,27 +152,22 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
sector += dev->zone_size_sects;
}
+ lim->zoned = true;
+ lim->chunk_sectors = dev->zone_size_sects;
+ lim->max_zone_append_sectors = dev->zone_size_sects;
+ lim->max_open_zones = dev->zone_max_open;
+ lim->max_active_zones = dev->zone_max_active;
return 0;
}
int null_register_zoned_dev(struct nullb *nullb)
{
- struct nullb_device *dev = nullb->dev;
struct request_queue *q = nullb->q;
- disk_set_zoned(nullb->disk);
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
- blk_queue_chunk_sectors(q, dev->zone_size_sects);
nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
- blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
- disk_set_max_open_zones(nullb->disk, dev->zone_max_open);
- disk_set_max_active_zones(nullb->disk, dev->zone_max_active);
-
- if (queue_is_mq(q))
- return blk_revalidate_disk_zones(nullb->disk, NULL);
-
- return 0;
+ return blk_revalidate_disk_zones(nullb->disk, NULL);
}
void null_free_zoned_dev(struct nullb_device *dev)
@@ -394,10 +390,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
*/
if (append) {
sector = zone->wp;
- if (dev->queue_mode == NULL_Q_MQ)
- cmd->rq->__sector = sector;
- else
- cmd->bio->bi_iter.bi_sector = sector;
+ blk_mq_rq_from_pdu(cmd)->__sector = sector;
} else if (sector != zone->wp) {
ret = BLK_STS_IOERR;
goto unlock;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d56d972aadb3..21728e9ea5c3 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -340,8 +340,8 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu
n += sysfs_emit_at(data, n, "%s %u:%u %u:%u\n",
pd->disk->disk_name,
MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev),
- MAJOR(pd->bdev_handle->bdev->bd_dev),
- MINOR(pd->bdev_handle->bdev->bd_dev));
+ MAJOR(file_bdev(pd->bdev_file)->bd_dev),
+ MINOR(file_bdev(pd->bdev_file)->bd_dev));
}
mutex_unlock(&ctl_mutex);
return n;
@@ -438,7 +438,7 @@ static int pkt_seq_show(struct seq_file *m, void *p)
int states[PACKET_NUM_STATES];
seq_printf(m, "Writer %s mapped to %pg:\n", pd->disk->disk_name,
- pd->bdev_handle->bdev);
+ file_bdev(pd->bdev_file));
seq_printf(m, "\nSettings:\n");
seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
@@ -715,7 +715,7 @@ static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *nod
*/
static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
{
- struct request_queue *q = bdev_get_queue(pd->bdev_handle->bdev);
+ struct request_queue *q = bdev_get_queue(file_bdev(pd->bdev_file));
struct scsi_cmnd *scmd;
struct request *rq;
int ret = 0;
@@ -828,6 +828,12 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
*/
static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
{
+ /*
+ * Some CDRW drives can not handle writes larger than one packet,
+ * even if the size is a multiple of the packet size.
+ */
+ bio->bi_opf |= REQ_NOMERGE;
+
spin_lock(&pd->iosched.lock);
if (bio_data_dir(bio) == READ)
bio_list_add(&pd->iosched.read_queue, bio);
@@ -1048,7 +1054,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
continue;
bio = pkt->r_bios[f];
- bio_init(bio, pd->bdev_handle->bdev, bio->bi_inline_vecs, 1,
+ bio_init(bio, file_bdev(pd->bdev_file), bio->bi_inline_vecs, 1,
REQ_OP_READ);
bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
bio->bi_end_io = pkt_end_io_read;
@@ -1264,7 +1270,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
struct device *ddev = disk_to_dev(pd->disk);
int f;
- bio_init(pkt->w_bio, pd->bdev_handle->bdev, pkt->w_bio->bi_inline_vecs,
+ bio_init(pkt->w_bio, file_bdev(pd->bdev_file), pkt->w_bio->bi_inline_vecs,
pkt->frames, REQ_OP_WRITE);
pkt->w_bio->bi_iter.bi_sector = pkt->sector;
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
@@ -2162,20 +2168,20 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write)
int ret;
long lba;
struct request_queue *q;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
/*
* We need to re-open the cdrom device without O_NONBLOCK to be able
* to read/write from/to it. It is already opened in O_NONBLOCK mode
* so open should not fail.
*/
- bdev_handle = bdev_open_by_dev(pd->bdev_handle->bdev->bd_dev,
+ bdev_file = bdev_file_open_by_dev(file_bdev(pd->bdev_file)->bd_dev,
BLK_OPEN_READ, pd, NULL);
- if (IS_ERR(bdev_handle)) {
- ret = PTR_ERR(bdev_handle);
+ if (IS_ERR(bdev_file)) {
+ ret = PTR_ERR(bdev_file);
goto out;
}
- pd->open_bdev_handle = bdev_handle;
+ pd->f_open_bdev = bdev_file;
ret = pkt_get_last_written(pd, &lba);
if (ret) {
@@ -2184,18 +2190,13 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write)
}
set_capacity(pd->disk, lba << 2);
- set_capacity_and_notify(pd->bdev_handle->bdev->bd_disk, lba << 2);
+ set_capacity_and_notify(file_bdev(pd->bdev_file)->bd_disk, lba << 2);
- q = bdev_get_queue(pd->bdev_handle->bdev);
+ q = bdev_get_queue(file_bdev(pd->bdev_file));
if (write) {
ret = pkt_open_write(pd);
if (ret)
goto out_putdev;
- /*
- * Some CDRW drives can not handle writes larger than one packet,
- * even if the size is a multiple of the packet size.
- */
- blk_queue_max_hw_sectors(q, pd->settings.size);
set_bit(PACKET_WRITABLE, &pd->flags);
} else {
pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
@@ -2218,7 +2219,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write)
return 0;
out_putdev:
- bdev_release(bdev_handle);
+ fput(bdev_file);
out:
return ret;
}
@@ -2237,8 +2238,8 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
pkt_lock_door(pd, 0);
pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
- bdev_release(pd->open_bdev_handle);
- pd->open_bdev_handle = NULL;
+ fput(pd->f_open_bdev);
+ pd->f_open_bdev = NULL;
pkt_shrink_pktlist(pd);
}
@@ -2326,7 +2327,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
- struct bio *cloned_bio = bio_alloc_clone(pd->bdev_handle->bdev, bio,
+ struct bio *cloned_bio = bio_alloc_clone(file_bdev(pd->bdev_file), bio,
GFP_NOIO, &pkt_bio_set);
struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
@@ -2338,9 +2339,9 @@ static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
pkt_queue_bio(pd, cloned_bio);
}
-static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
+static void pkt_make_request_write(struct bio *bio)
{
- struct pktcdvd_device *pd = q->queuedata;
+ struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->private_data;
sector_t zone;
struct packet_data *pkt;
int was_empty, blocked_bio;
@@ -2432,7 +2433,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
static void pkt_submit_bio(struct bio *bio)
{
- struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
+ struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->private_data;
struct device *ddev = disk_to_dev(pd->disk);
struct bio *split;
@@ -2476,7 +2477,7 @@ static void pkt_submit_bio(struct bio *bio)
split = bio;
}
- pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split);
+ pkt_make_request_write(split);
} while (split != bio);
return;
@@ -2484,20 +2485,11 @@ end_io:
bio_io_error(bio);
}
-static void pkt_init_queue(struct pktcdvd_device *pd)
-{
- struct request_queue *q = pd->disk->queue;
-
- blk_queue_logical_block_size(q, CD_FRAMESIZE);
- blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
- q->queuedata = pd;
-}
-
static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
{
struct device *ddev = disk_to_dev(pd->disk);
int i;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct scsi_device *sdev;
if (pd->pkt_dev == dev) {
@@ -2508,9 +2500,9 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
struct pktcdvd_device *pd2 = pkt_devs[i];
if (!pd2)
continue;
- if (pd2->bdev_handle->bdev->bd_dev == dev) {
+ if (file_bdev(pd2->bdev_file)->bd_dev == dev) {
dev_err(ddev, "%pg already setup\n",
- pd2->bdev_handle->bdev);
+ file_bdev(pd2->bdev_file));
return -EBUSY;
}
if (pd2->pkt_dev == dev) {
@@ -2519,13 +2511,13 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
}
}
- bdev_handle = bdev_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY,
+ bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY,
NULL, NULL);
- if (IS_ERR(bdev_handle))
- return PTR_ERR(bdev_handle);
- sdev = scsi_device_from_queue(bdev_handle->bdev->bd_disk->queue);
+ if (IS_ERR(bdev_file))
+ return PTR_ERR(bdev_file);
+ sdev = scsi_device_from_queue(file_bdev(bdev_file)->bd_disk->queue);
if (!sdev) {
- bdev_release(bdev_handle);
+ fput(bdev_file);
return -EINVAL;
}
put_device(&sdev->sdev_gendev);
@@ -2533,10 +2525,8 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
- pd->bdev_handle = bdev_handle;
- set_blocksize(bdev_handle->bdev, CD_FRAMESIZE);
-
- pkt_init_queue(pd);
+ pd->bdev_file = bdev_file;
+ set_blocksize(file_bdev(bdev_file), CD_FRAMESIZE);
atomic_set(&pd->cdrw.pending_bios, 0);
pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->disk->disk_name);
@@ -2546,11 +2536,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
}
proc_create_single_data(pd->disk->disk_name, 0, pkt_proc, pkt_seq_show, pd);
- dev_notice(ddev, "writer mapped to %pg\n", bdev_handle->bdev);
+ dev_notice(ddev, "writer mapped to %pg\n", file_bdev(bdev_file));
return 0;
out_mem:
- bdev_release(bdev_handle);
+ fput(bdev_file);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return -ENOMEM;
@@ -2605,9 +2595,9 @@ static unsigned int pkt_check_events(struct gendisk *disk,
if (!pd)
return 0;
- if (!pd->bdev_handle)
+ if (!pd->bdev_file)
return 0;
- attached_disk = pd->bdev_handle->bdev->bd_disk;
+ attached_disk = file_bdev(pd->bdev_file)->bd_disk;
if (!attached_disk || !attached_disk->fops->check_events)
return 0;
return attached_disk->fops->check_events(attached_disk, clearing);
@@ -2634,6 +2624,10 @@ static const struct block_device_operations pktcdvd_ops = {
*/
static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
{
+ struct queue_limits lim = {
+ .max_hw_sectors = PACKET_MAX_SECTORS,
+ .logical_block_size = CD_FRAMESIZE,
+ };
int idx;
int ret = -ENOMEM;
struct pktcdvd_device *pd;
@@ -2673,10 +2667,11 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
pd->write_congestion_on = write_congestion_on;
pd->write_congestion_off = write_congestion_off;
- ret = -ENOMEM;
- disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!disk)
+ disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(disk)) {
+ ret = PTR_ERR(disk);
goto out_mem;
+ }
pd->disk = disk;
disk->major = pktdev_major;
disk->first_minor = idx;
@@ -2692,7 +2687,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
goto out_mem2;
/* inherit events of the host device */
- disk->events = pd->bdev_handle->bdev->bd_disk->events;
+ disk->events = file_bdev(pd->bdev_file)->bd_disk->events;
ret = add_disk(disk);
if (ret)
@@ -2757,7 +2752,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
pkt_debugfs_dev_remove(pd);
pkt_sysfs_dev_remove(pd);
- bdev_release(pd->bdev_handle);
+ fput(pd->bdev_file);
remove_proc_entry(pd->disk->disk_name, pkt_proc);
dev_notice(ddev, "writer unmapped\n");
@@ -2784,7 +2779,7 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
if (pd) {
- ctrl_cmd->dev = new_encode_dev(pd->bdev_handle->bdev->bd_dev);
+ ctrl_cmd->dev = new_encode_dev(file_bdev(pd->bdev_file)->bd_dev);
ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev);
} else {
ctrl_cmd->dev = 0;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 36d7b36c60c7..b810ac0a5c4b 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -382,6 +382,14 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
struct ps3disk_private *priv;
int error;
unsigned int devidx;
+ struct queue_limits lim = {
+ .logical_block_size = dev->blk_size,
+ .max_hw_sectors = dev->bounce_size >> 9,
+ .max_segments = -1,
+ .max_segment_size = dev->bounce_size,
+ .dma_alignment = dev->blk_size - 1,
+ };
+
struct request_queue *queue;
struct gendisk *gendisk;
@@ -431,7 +439,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
if (error)
goto fail_teardown;
- gendisk = blk_mq_alloc_disk(&priv->tag_set, dev);
+ gendisk = blk_mq_alloc_disk(&priv->tag_set, &lim, dev);
if (IS_ERR(gendisk)) {
dev_err(&dev->sbd.core, "%s:%u: blk_mq_alloc_disk failed\n",
__func__, __LINE__);
@@ -441,15 +449,8 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
queue = gendisk->queue;
- blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
- blk_queue_dma_alignment(queue, dev->blk_size-1);
- blk_queue_logical_block_size(queue, dev->blk_size);
-
blk_queue_write_cache(queue, true, false);
- blk_queue_max_segments(queue, -1);
- blk_queue_max_segment_size(queue, dev->bounce_size);
-
priv->gendisk = gendisk;
gendisk->major = ps3disk_major;
gendisk->first_minor = devidx * PS3DISK_MINORS;
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 38d42af01b25..bdcf083b45e2 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -730,10 +730,10 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
ps3vram_proc_init(dev);
- gendisk = blk_alloc_disk(NUMA_NO_NODE);
- if (!gendisk) {
+ gendisk = blk_alloc_disk(NULL, NUMA_NO_NODE);
+ if (IS_ERR(gendisk)) {
dev_err(&dev->core, "blk_alloc_disk failed\n");
- error = -ENOMEM;
+ error = PTR_ERR(gendisk);
goto out_cache_cleanup;
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index a999b698b131..26ff5cd2bf0a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -575,7 +575,7 @@ static const struct attribute_group rbd_bus_group = {
};
__ATTRIBUTE_GROUPS(rbd_bus);
-static struct bus_type rbd_bus_type = {
+static const struct bus_type rbd_bus_type = {
.name = "rbd",
.bus_groups = rbd_bus_groups,
};
@@ -3452,14 +3452,15 @@ static bool rbd_lock_add_request(struct rbd_img_request *img_req)
static void rbd_lock_del_request(struct rbd_img_request *img_req)
{
struct rbd_device *rbd_dev = img_req->rbd_dev;
- bool need_wakeup;
+ bool need_wakeup = false;
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
- rbd_assert(!list_empty(&img_req->lock_item));
- list_del_init(&img_req->lock_item);
- need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
- list_empty(&rbd_dev->running_list));
+ if (!list_empty(&img_req->lock_item)) {
+ list_del_init(&img_req->lock_item);
+ need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
+ list_empty(&rbd_dev->running_list));
+ }
spin_unlock(&rbd_dev->lock_lists_lock);
if (need_wakeup)
complete(&rbd_dev->releasing_wait);
@@ -3842,14 +3843,19 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
return;
}
- list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) {
+ while (!list_empty(&rbd_dev->acquiring_list)) {
+ img_req = list_first_entry(&rbd_dev->acquiring_list,
+ struct rbd_img_request, lock_item);
mutex_lock(&img_req->state_mutex);
rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
+ if (!result)
+ list_move_tail(&img_req->lock_item,
+ &rbd_dev->running_list);
+ else
+ list_del_init(&img_req->lock_item);
rbd_img_schedule(img_req, result);
mutex_unlock(&img_req->state_mutex);
}
-
- list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
}
static bool locker_equal(const struct ceph_locker *lhs,
@@ -4946,6 +4952,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
struct request_queue *q;
unsigned int objset_bytes =
rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
+ struct queue_limits lim = {
+ .max_hw_sectors = objset_bytes >> SECTOR_SHIFT,
+ .max_user_sectors = objset_bytes >> SECTOR_SHIFT,
+ .io_min = rbd_dev->opts->alloc_size,
+ .io_opt = rbd_dev->opts->alloc_size,
+ .max_segments = USHRT_MAX,
+ .max_segment_size = UINT_MAX,
+ };
int err;
memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
@@ -4960,7 +4974,13 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
if (err)
return err;
- disk = blk_mq_alloc_disk(&rbd_dev->tag_set, rbd_dev);
+ if (rbd_dev->opts->trim) {
+ lim.discard_granularity = rbd_dev->opts->alloc_size;
+ lim.max_hw_discard_sectors = objset_bytes >> SECTOR_SHIFT;
+ lim.max_write_zeroes_sectors = objset_bytes >> SECTOR_SHIFT;
+ }
+
+ disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev);
if (IS_ERR(disk)) {
err = PTR_ERR(disk);
goto out_tag_set;
@@ -4981,19 +5001,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
- blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT);
- q->limits.max_sectors = queue_max_hw_sectors(q);
- blk_queue_max_segments(q, USHRT_MAX);
- blk_queue_max_segment_size(q, UINT_MAX);
- blk_queue_io_min(q, rbd_dev->opts->alloc_size);
- blk_queue_io_opt(q, rbd_dev->opts->alloc_size);
-
- if (rbd_dev->opts->trim) {
- q->limits.discard_granularity = rbd_dev->opts->alloc_size;
- blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
- blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
- }
-
if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
@@ -5326,7 +5333,7 @@ static void rbd_dev_release(struct device *dev)
if (need_put) {
destroy_workqueue(rbd_dev->task_wq);
- ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
+ ida_free(&rbd_dev_id_ida, rbd_dev->dev_id);
}
rbd_dev_free(rbd_dev);
@@ -5402,9 +5409,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
return NULL;
/* get an id and fill in device name */
- rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0,
- minor_to_rbd_dev_id(1 << MINORBITS),
- GFP_KERNEL);
+ rbd_dev->dev_id = ida_alloc_max(&rbd_dev_id_ida,
+ minor_to_rbd_dev_id(1 << MINORBITS) - 1,
+ GFP_KERNEL);
if (rbd_dev->dev_id < 0)
goto fail_rbd_dev;
@@ -5425,7 +5432,7 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
return rbd_dev;
fail_dev_id:
- ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
+ ida_free(&rbd_dev_id_ida, rbd_dev->dev_id);
fail_rbd_dev:
rbd_dev_free(rbd_dev);
return NULL;
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 4044c369d22a..b7ffe03c6160 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1329,43 +1329,6 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
}
}
-static void setup_request_queue(struct rnbd_clt_dev *dev,
- struct rnbd_msg_open_rsp *rsp)
-{
- blk_queue_logical_block_size(dev->queue,
- le16_to_cpu(rsp->logical_block_size));
- blk_queue_physical_block_size(dev->queue,
- le16_to_cpu(rsp->physical_block_size));
- blk_queue_max_hw_sectors(dev->queue,
- dev->sess->max_io_size / SECTOR_SIZE);
-
- /*
- * we don't support discards to "discontiguous" segments
- * in on request
- */
- blk_queue_max_discard_segments(dev->queue, 1);
-
- blk_queue_max_discard_sectors(dev->queue,
- le32_to_cpu(rsp->max_discard_sectors));
- dev->queue->limits.discard_granularity =
- le32_to_cpu(rsp->discard_granularity);
- dev->queue->limits.discard_alignment =
- le32_to_cpu(rsp->discard_alignment);
- if (le16_to_cpu(rsp->secure_discard))
- blk_queue_max_secure_erase_sectors(dev->queue,
- le32_to_cpu(rsp->max_discard_sectors));
- blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
- blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
- blk_queue_max_segments(dev->queue, dev->sess->max_segments);
- blk_queue_io_opt(dev->queue, dev->sess->max_io_size);
- blk_queue_virt_boundary(dev->queue, SZ_4K - 1);
- blk_queue_write_cache(dev->queue,
- !!(rsp->cache_policy & RNBD_WRITEBACK),
- !!(rsp->cache_policy & RNBD_FUA));
- blk_queue_max_write_zeroes_sectors(dev->queue,
- le32_to_cpu(rsp->max_write_zeroes_sectors));
-}
-
static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
struct rnbd_msg_open_rsp *rsp, int idx)
{
@@ -1403,18 +1366,41 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
static int rnbd_client_setup_device(struct rnbd_clt_dev *dev,
struct rnbd_msg_open_rsp *rsp)
{
+ struct queue_limits lim = {
+ .logical_block_size = le16_to_cpu(rsp->logical_block_size),
+ .physical_block_size = le16_to_cpu(rsp->physical_block_size),
+ .io_opt = dev->sess->max_io_size,
+ .max_hw_sectors = dev->sess->max_io_size / SECTOR_SIZE,
+ .max_hw_discard_sectors = le32_to_cpu(rsp->max_discard_sectors),
+ .discard_granularity = le32_to_cpu(rsp->discard_granularity),
+ .discard_alignment = le32_to_cpu(rsp->discard_alignment),
+ .max_segments = dev->sess->max_segments,
+ .virt_boundary_mask = SZ_4K - 1,
+ .max_write_zeroes_sectors =
+ le32_to_cpu(rsp->max_write_zeroes_sectors),
+ };
int idx = dev->clt_device_id;
dev->size = le64_to_cpu(rsp->nsectors) *
le16_to_cpu(rsp->logical_block_size);
- dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev);
+ if (rsp->secure_discard) {
+ lim.max_secure_erase_sectors =
+ le32_to_cpu(rsp->max_discard_sectors);
+ }
+
+ dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, &lim, dev);
if (IS_ERR(dev->gd))
return PTR_ERR(dev->gd);
dev->queue = dev->gd->queue;
rnbd_init_mq_hw_queues(dev);
- setup_request_queue(dev, rsp);
+ blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
+ blk_queue_write_cache(dev->queue,
+ !!(rsp->cache_policy & RNBD_WRITEBACK),
+ !!(rsp->cache_policy & RNBD_FUA));
+
return rnbd_clt_setup_gen_disk(dev, rsp, idx);
}
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 3a0d5dcec6f2..f6e3a3c4b76c 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -145,7 +145,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
priv->sess_dev = sess_dev;
priv->id = id;
- bio = bio_alloc(sess_dev->bdev_handle->bdev, 1,
+ bio = bio_alloc(file_bdev(sess_dev->bdev_file), 1,
rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL);
if (bio_add_page(bio, virt_to_page(data), datalen,
offset_in_page(data)) != datalen) {
@@ -219,7 +219,7 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
rnbd_put_sess_dev(sess_dev);
wait_for_completion(&dc); /* wait for inflights to drop to zero */
- bdev_release(sess_dev->bdev_handle);
+ fput(sess_dev->bdev_file);
mutex_lock(&sess_dev->dev->lock);
list_del(&sess_dev->dev_list);
if (!sess_dev->readonly)
@@ -534,7 +534,7 @@ rnbd_srv_get_or_create_srv_dev(struct block_device *bdev,
static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
struct rnbd_srv_sess_dev *sess_dev)
{
- struct block_device *bdev = sess_dev->bdev_handle->bdev;
+ struct block_device *bdev = file_bdev(sess_dev->bdev_file);
rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP);
rsp->device_id = cpu_to_le32(sess_dev->device_id);
@@ -560,7 +560,7 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
static struct rnbd_srv_sess_dev *
rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
const struct rnbd_msg_open *open_msg,
- struct bdev_handle *handle, bool readonly,
+ struct file *bdev_file, bool readonly,
struct rnbd_srv_dev *srv_dev)
{
struct rnbd_srv_sess_dev *sdev = rnbd_sess_dev_alloc(srv_sess);
@@ -572,7 +572,7 @@ rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
strscpy(sdev->pathname, open_msg->dev_name, sizeof(sdev->pathname));
- sdev->bdev_handle = handle;
+ sdev->bdev_file = bdev_file;
sdev->sess = srv_sess;
sdev->dev = srv_dev;
sdev->readonly = readonly;
@@ -678,7 +678,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
struct rnbd_srv_dev *srv_dev;
struct rnbd_srv_sess_dev *srv_sess_dev;
const struct rnbd_msg_open *open_msg = msg;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
blk_mode_t open_flags = BLK_OPEN_READ;
char *full_path;
struct rnbd_msg_open_rsp *rsp = data;
@@ -716,15 +716,15 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
goto reject;
}
- bdev_handle = bdev_open_by_path(full_path, open_flags, NULL, NULL);
- if (IS_ERR(bdev_handle)) {
- ret = PTR_ERR(bdev_handle);
+ bdev_file = bdev_file_open_by_path(full_path, open_flags, NULL, NULL);
+ if (IS_ERR(bdev_file)) {
+ ret = PTR_ERR(bdev_file);
pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %pe\n",
- full_path, srv_sess->sessname, bdev_handle);
+ full_path, srv_sess->sessname, bdev_file);
goto free_path;
}
- srv_dev = rnbd_srv_get_or_create_srv_dev(bdev_handle->bdev, srv_sess,
+ srv_dev = rnbd_srv_get_or_create_srv_dev(file_bdev(bdev_file), srv_sess,
open_msg->access_mode);
if (IS_ERR(srv_dev)) {
pr_err("Opening device '%s' on session %s failed, creating srv_dev failed, err: %pe\n",
@@ -734,7 +734,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
}
srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg,
- bdev_handle,
+ bdev_file,
open_msg->access_mode == RNBD_ACCESS_RO,
srv_dev);
if (IS_ERR(srv_sess_dev)) {
@@ -750,7 +750,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
*/
mutex_lock(&srv_dev->lock);
if (!srv_dev->dev_kobj.state_in_sysfs) {
- ret = rnbd_srv_create_dev_sysfs(srv_dev, bdev_handle->bdev);
+ ret = rnbd_srv_create_dev_sysfs(srv_dev, file_bdev(bdev_file));
if (ret) {
mutex_unlock(&srv_dev->lock);
rnbd_srv_err(srv_sess_dev,
@@ -793,7 +793,7 @@ srv_dev_put:
}
rnbd_put_srv_dev(srv_dev);
blkdev_put:
- bdev_release(bdev_handle);
+ fput(bdev_file);
free_path:
kfree(full_path);
reject:
diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h
index 343cc682b617..18d873808b8d 100644
--- a/drivers/block/rnbd/rnbd-srv.h
+++ b/drivers/block/rnbd/rnbd-srv.h
@@ -46,7 +46,7 @@ struct rnbd_srv_dev {
struct rnbd_srv_sess_dev {
/* Entry inside rnbd_srv_dev struct */
struct list_head dev_list;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct rnbd_srv_session *sess;
struct rnbd_srv_dev *dev;
struct kobject kobj;
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 7bf4b48e2282..c99dd6698977 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -784,6 +784,14 @@ static const struct blk_mq_ops vdc_mq_ops = {
static int probe_disk(struct vdc_port *port)
{
+ struct queue_limits lim = {
+ .physical_block_size = port->vdisk_phys_blksz,
+ .max_hw_sectors = port->max_xfer_size,
+ /* Each segment in a request is up to an aligned page in size. */
+ .seg_boundary_mask = PAGE_SIZE - 1,
+ .max_segment_size = PAGE_SIZE,
+ .max_segments = port->ring_cookies,
+ };
struct request_queue *q;
struct gendisk *g;
int err;
@@ -824,7 +832,7 @@ static int probe_disk(struct vdc_port *port)
if (err)
return err;
- g = blk_mq_alloc_disk(&port->tag_set, port);
+ g = blk_mq_alloc_disk(&port->tag_set, &lim, port);
if (IS_ERR(g)) {
printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
port->vio.name);
@@ -835,12 +843,6 @@ static int probe_disk(struct vdc_port *port)
port->disk = g;
q = g->queue;
- /* Each segment in a request is up to an aligned page in size. */
- blk_queue_segment_boundary(q, PAGE_SIZE - 1);
- blk_queue_max_segment_size(q, PAGE_SIZE);
-
- blk_queue_max_segments(q, port->ring_cookies);
- blk_queue_max_hw_sectors(q, port->max_xfer_size);
g->major = vdc_major;
g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
g->minors = 1 << PARTITION_SHIFT;
@@ -872,8 +874,6 @@ static int probe_disk(struct vdc_port *port)
}
}
- blk_queue_physical_block_size(q, port->vdisk_phys_blksz);
-
pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n",
g->disk_name,
port->vdisk_size, (port->vdisk_size >> (20 - 9)),
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index f85b6af414b4..6731678f3a41 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -820,7 +820,7 @@ static int swim_floppy_init(struct swim_priv *swd)
goto exit_put_disks;
swd->unit[drive].disk =
- blk_mq_alloc_disk(&swd->unit[drive].tag_set,
+ blk_mq_alloc_disk(&swd->unit[drive].tag_set, NULL,
&swd->unit[drive]);
if (IS_ERR(swd->unit[drive].disk)) {
blk_mq_free_tag_set(&swd->unit[drive].tag_set);
@@ -916,7 +916,7 @@ out:
return ret;
}
-static int swim_remove(struct platform_device *dev)
+static void swim_remove(struct platform_device *dev)
{
struct swim_priv *swd = platform_get_drvdata(dev);
int drive;
@@ -937,13 +937,11 @@ static int swim_remove(struct platform_device *dev)
release_mem_region(res->start, resource_size(res));
kfree(swd);
-
- return 0;
}
static struct platform_driver swim_driver = {
.probe = swim_probe,
- .remove = swim_remove,
+ .remove_new = swim_remove,
.driver = {
.name = CARDNAME,
},
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index c2bc85826358..a04756ac778e 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1210,7 +1210,7 @@ static int swim3_attach(struct macio_dev *mdev,
if (rc)
goto out_unregister;
- disk = blk_mq_alloc_disk(&fs->tag_set, fs);
+ disk = blk_mq_alloc_disk(&fs->tag_set, NULL, fs);
if (IS_ERR(disk)) {
rc = PTR_ERR(disk);
goto out_free_tag_set;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 1dfb2e77898b..bea3d5cf8a83 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -246,21 +246,12 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
return 0;
}
-static int ublk_dev_param_zoned_apply(struct ublk_device *ub)
+static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
{
- const struct ublk_param_zoned *p = &ub->params.zoned;
-
- disk_set_zoned(ub->ub_disk);
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue);
blk_queue_required_elevator_features(ub->ub_disk->queue,
ELEVATOR_F_ZBD_SEQ_WRITE);
- disk_set_max_active_zones(ub->ub_disk, p->max_active_zones);
- disk_set_max_open_zones(ub->ub_disk, p->max_open_zones);
- blk_queue_max_zone_append_sectors(ub->ub_disk->queue, p->max_zone_append_sectors);
-
ub->ub_disk->nr_zones = ublk_get_nr_zones(ub);
-
- return 0;
}
/* Based on virtblk_alloc_report_buffer */
@@ -432,9 +423,8 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
return -EOPNOTSUPP;
}
-static int ublk_dev_param_zoned_apply(struct ublk_device *ub)
+static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
{
- return -EOPNOTSUPP;
}
static int ublk_revalidate_disk_zones(struct ublk_device *ub)
@@ -498,11 +488,6 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub)
struct request_queue *q = ub->ub_disk->queue;
const struct ublk_param_basic *p = &ub->params.basic;
- blk_queue_logical_block_size(q, 1 << p->logical_bs_shift);
- blk_queue_physical_block_size(q, 1 << p->physical_bs_shift);
- blk_queue_io_min(q, 1 << p->io_min_shift);
- blk_queue_io_opt(q, 1 << p->io_opt_shift);
-
blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
p->attrs & UBLK_ATTR_FUA);
if (p->attrs & UBLK_ATTR_ROTATIONAL)
@@ -510,29 +495,12 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub)
else
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- blk_queue_max_hw_sectors(q, p->max_sectors);
- blk_queue_chunk_sectors(q, p->chunk_sectors);
- blk_queue_virt_boundary(q, p->virt_boundary_mask);
-
if (p->attrs & UBLK_ATTR_READ_ONLY)
set_disk_ro(ub->ub_disk, true);
set_capacity(ub->ub_disk, p->dev_sectors);
}
-static void ublk_dev_param_discard_apply(struct ublk_device *ub)
-{
- struct request_queue *q = ub->ub_disk->queue;
- const struct ublk_param_discard *p = &ub->params.discard;
-
- q->limits.discard_alignment = p->discard_alignment;
- q->limits.discard_granularity = p->discard_granularity;
- blk_queue_max_discard_sectors(q, p->max_discard_sectors);
- blk_queue_max_write_zeroes_sectors(q,
- p->max_write_zeroes_sectors);
- blk_queue_max_discard_segments(q, p->max_discard_segments);
-}
-
static int ublk_validate_params(const struct ublk_device *ub)
{
/* basic param is the only one which must be set */
@@ -576,20 +544,12 @@ static int ublk_validate_params(const struct ublk_device *ub)
return 0;
}
-static int ublk_apply_params(struct ublk_device *ub)
+static void ublk_apply_params(struct ublk_device *ub)
{
- if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC))
- return -EINVAL;
-
ublk_dev_param_basic_apply(ub);
- if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
- ublk_dev_param_discard_apply(ub);
-
if (ub->params.types & UBLK_PARAM_TYPE_ZONED)
- return ublk_dev_param_zoned_apply(ub);
-
- return 0;
+ ublk_dev_param_zoned_apply(ub);
}
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
@@ -645,14 +605,16 @@ static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
return ubq->flags & UBLK_F_NEED_GET_DATA;
}
-static struct ublk_device *ublk_get_device(struct ublk_device *ub)
+/* Called in slow path only, keep it noinline for trace purpose */
+static noinline struct ublk_device *ublk_get_device(struct ublk_device *ub)
{
if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
return ub;
return NULL;
}
-static void ublk_put_device(struct ublk_device *ub)
+/* Called in slow path only, keep it noinline for trace purpose */
+static noinline void ublk_put_device(struct ublk_device *ub)
{
put_device(&ub->cdev_dev);
}
@@ -711,7 +673,7 @@ static void ublk_free_disk(struct gendisk *disk)
struct ublk_device *ub = disk->private_data;
clear_bit(UB_STATE_USED, &ub->state);
- put_device(&ub->cdev_dev);
+ ublk_put_device(ub);
}
static void ublk_store_owner_uid_gid(unsigned int *owner_uid,
@@ -2182,7 +2144,7 @@ static void ublk_remove(struct ublk_device *ub)
cancel_work_sync(&ub->stop_work);
cancel_work_sync(&ub->quiesce_work);
cdev_device_del(&ub->cdev, &ub->cdev_dev);
- put_device(&ub->cdev_dev);
+ ublk_put_device(ub);
ublks_added--;
}
@@ -2205,12 +2167,47 @@ static struct ublk_device *ublk_get_device_from_id(int idx)
static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
{
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
+ const struct ublk_param_basic *p = &ub->params.basic;
int ublksrv_pid = (int)header->data[0];
+ struct queue_limits lim = {
+ .logical_block_size = 1 << p->logical_bs_shift,
+ .physical_block_size = 1 << p->physical_bs_shift,
+ .io_min = 1 << p->io_min_shift,
+ .io_opt = 1 << p->io_opt_shift,
+ .max_hw_sectors = p->max_sectors,
+ .chunk_sectors = p->chunk_sectors,
+ .virt_boundary_mask = p->virt_boundary_mask,
+
+ };
struct gendisk *disk;
int ret = -EINVAL;
if (ublksrv_pid <= 0)
return -EINVAL;
+ if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC))
+ return -EINVAL;
+
+ if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) {
+ const struct ublk_param_discard *pd = &ub->params.discard;
+
+ lim.discard_alignment = pd->discard_alignment;
+ lim.discard_granularity = pd->discard_granularity;
+ lim.max_hw_discard_sectors = pd->max_discard_sectors;
+ lim.max_write_zeroes_sectors = pd->max_write_zeroes_sectors;
+ lim.max_discard_segments = pd->max_discard_segments;
+ }
+
+ if (ub->params.types & UBLK_PARAM_TYPE_ZONED) {
+ const struct ublk_param_zoned *p = &ub->params.zoned;
+
+ if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
+ return -EOPNOTSUPP;
+
+ lim.zoned = true;
+ lim.max_active_zones = p->max_active_zones;
+ lim.max_open_zones = p->max_open_zones;
+ lim.max_zone_append_sectors = p->max_zone_append_sectors;
+ }
if (wait_for_completion_interruptible(&ub->completion) != 0)
return -EINTR;
@@ -2222,7 +2219,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
goto out_unlock;
}
- disk = blk_mq_alloc_disk(&ub->tag_set, NULL);
+ disk = blk_mq_alloc_disk(&ub->tag_set, &lim, NULL);
if (IS_ERR(disk)) {
ret = PTR_ERR(disk);
goto out_unlock;
@@ -2234,15 +2231,13 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
ub->dev_info.ublksrv_pid = ublksrv_pid;
ub->ub_disk = disk;
- ret = ublk_apply_params(ub);
- if (ret)
- goto out_put_disk;
+ ublk_apply_params(ub);
/* don't probe partitions if any one ubq daemon is un-trusted */
if (ub->nr_privileged_daemon != ub->nr_queues_ready)
set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
- get_device(&ub->cdev_dev);
+ ublk_get_device(ub);
ub->dev_info.state = UBLK_S_DEV_LIVE;
if (ublk_dev_is_zoned(ub)) {
@@ -2262,7 +2257,6 @@ out_put_cdev:
ub->dev_info.state = UBLK_S_DEV_DEAD;
ublk_put_device(ub);
}
-out_put_disk:
if (ret)
put_disk(disk);
out_unlock:
@@ -2474,7 +2468,7 @@ static inline bool ublk_idr_freed(int id)
return ptr == NULL;
}
-static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
+static int ublk_ctrl_del_dev(struct ublk_device **p_ub, bool wait)
{
struct ublk_device *ub = *p_ub;
int idx = ub->ub_number;
@@ -2508,7 +2502,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
* - the device number is freed already, we will not find this
* device via ublk_get_device_from_id()
*/
- if (wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx)))
+ if (wait && wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx)))
return -EINTR;
return 0;
}
@@ -2907,7 +2901,10 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
ret = ublk_ctrl_add_dev(cmd);
break;
case UBLK_CMD_DEL_DEV:
- ret = ublk_ctrl_del_dev(&ub);
+ ret = ublk_ctrl_del_dev(&ub, true);
+ break;
+ case UBLK_U_CMD_DEL_DEV_ASYNC:
+ ret = ublk_ctrl_del_dev(&ub, false);
break;
case UBLK_CMD_GET_QUEUE_AFFINITY:
ret = ublk_ctrl_get_queue_affinity(ub, cmd);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5bf98fd6a651..42dea7601d87 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -720,25 +720,24 @@ fail_report:
return ret;
}
-static int virtblk_probe_zoned_device(struct virtio_device *vdev,
- struct virtio_blk *vblk,
- struct request_queue *q)
+static int virtblk_read_zoned_limits(struct virtio_blk *vblk,
+ struct queue_limits *lim)
{
+ struct virtio_device *vdev = vblk->vdev;
u32 v, wg;
dev_dbg(&vdev->dev, "probing host-managed zoned device\n");
- disk_set_zoned(vblk->disk);
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
+ lim->zoned = true;
virtio_cread(vdev, struct virtio_blk_config,
zoned.max_open_zones, &v);
- disk_set_max_open_zones(vblk->disk, v);
+ lim->max_open_zones = v;
dev_dbg(&vdev->dev, "max open zones = %u\n", v);
virtio_cread(vdev, struct virtio_blk_config,
zoned.max_active_zones, &v);
- disk_set_max_active_zones(vblk->disk, v);
+ lim->max_active_zones = v;
dev_dbg(&vdev->dev, "max active zones = %u\n", v);
virtio_cread(vdev, struct virtio_blk_config,
@@ -747,8 +746,8 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
dev_warn(&vdev->dev, "zero write granularity reported\n");
return -ENODEV;
}
- blk_queue_physical_block_size(q, wg);
- blk_queue_io_min(q, wg);
+ lim->physical_block_size = wg;
+ lim->io_min = wg;
dev_dbg(&vdev->dev, "write granularity = %u\n", wg);
@@ -764,13 +763,13 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
vblk->zone_sectors);
return -ENODEV;
}
- blk_queue_chunk_sectors(q, vblk->zone_sectors);
+ lim->chunk_sectors = vblk->zone_sectors;
dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors);
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
dev_warn(&vblk->vdev->dev,
"ignoring negotiated F_DISCARD for zoned device\n");
- blk_queue_max_discard_sectors(q, 0);
+ lim->max_hw_discard_sectors = 0;
}
virtio_cread(vdev, struct virtio_blk_config,
@@ -785,25 +784,21 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
wg, v);
return -ENODEV;
}
- blk_queue_max_zone_append_sectors(q, v);
+ lim->max_zone_append_sectors = v;
dev_dbg(&vdev->dev, "max append sectors = %u\n", v);
- return blk_revalidate_disk_zones(vblk->disk, NULL);
+ return 0;
}
-
#else
-
/*
- * Zoned block device support is not configured in this kernel.
- * Host-managed zoned devices can't be supported, but others are
- * good to go as regular block devices.
+ * Zoned block device support is not configured in this kernel, host-managed
+ * zoned devices can't be supported.
*/
#define virtblk_report_zones NULL
-
-static inline int virtblk_probe_zoned_device(struct virtio_device *vdev,
- struct virtio_blk *vblk, struct request_queue *q)
+static inline int virtblk_read_zoned_limits(struct virtio_blk *vblk,
+ struct queue_limits *lim)
{
- dev_err(&vdev->dev,
+ dev_err(&vblk->vdev->dev,
"virtio_blk: zoned devices are not supported");
return -EOPNOTSUPP;
}
@@ -1248,31 +1243,17 @@ static const struct blk_mq_ops virtio_mq_ops = {
static unsigned int virtblk_queue_depth;
module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
-static int virtblk_probe(struct virtio_device *vdev)
+static int virtblk_read_limits(struct virtio_blk *vblk,
+ struct queue_limits *lim)
{
- struct virtio_blk *vblk;
- struct request_queue *q;
- int err, index;
-
+ struct virtio_device *vdev = vblk->vdev;
u32 v, blk_size, max_size, sg_elems, opt_io_size;
u32 max_discard_segs = 0;
u32 discard_granularity = 0;
u16 min_io_size;
u8 physical_block_exp, alignment_offset;
- unsigned int queue_depth;
size_t max_dma_size;
-
- if (!vdev->config->get) {
- dev_err(&vdev->dev, "%s failure: config access disabled\n",
- __func__);
- return -EINVAL;
- }
-
- err = ida_alloc_range(&vd_index_ida, 0,
- minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL);
- if (err < 0)
- goto out;
- index = err;
+ int err;
/* We need to know how many segments before we allocate. */
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
@@ -1286,78 +1267,11 @@ static int virtblk_probe(struct virtio_device *vdev)
/* Prevent integer overflows and honor max vq size */
sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
- vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
- if (!vblk) {
- err = -ENOMEM;
- goto out_free_index;
- }
-
- mutex_init(&vblk->vdev_mutex);
-
- vblk->vdev = vdev;
-
- INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
-
- err = init_vq(vblk);
- if (err)
- goto out_free_vblk;
-
- /* Default queue sizing is to fill the ring. */
- if (!virtblk_queue_depth) {
- queue_depth = vblk->vqs[0].vq->num_free;
- /* ... but without indirect descs, we use 2 descs per req */
- if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
- queue_depth /= 2;
- } else {
- queue_depth = virtblk_queue_depth;
- }
-
- memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
- vblk->tag_set.ops = &virtio_mq_ops;
- vblk->tag_set.queue_depth = queue_depth;
- vblk->tag_set.numa_node = NUMA_NO_NODE;
- vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
- vblk->tag_set.cmd_size =
- sizeof(struct virtblk_req) +
- sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT;
- vblk->tag_set.driver_data = vblk;
- vblk->tag_set.nr_hw_queues = vblk->num_vqs;
- vblk->tag_set.nr_maps = 1;
- if (vblk->io_queues[HCTX_TYPE_POLL])
- vblk->tag_set.nr_maps = 3;
-
- err = blk_mq_alloc_tag_set(&vblk->tag_set);
- if (err)
- goto out_free_vq;
-
- vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk);
- if (IS_ERR(vblk->disk)) {
- err = PTR_ERR(vblk->disk);
- goto out_free_tags;
- }
- q = vblk->disk->queue;
-
- virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
-
- vblk->disk->major = major;
- vblk->disk->first_minor = index_to_minor(index);
- vblk->disk->minors = 1 << PART_BITS;
- vblk->disk->private_data = vblk;
- vblk->disk->fops = &virtblk_fops;
- vblk->index = index;
-
- /* configure queue flush support */
- virtblk_update_cache_mode(vdev);
-
- /* If disk is read-only in the host, the guest should obey */
- if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
- set_disk_ro(vblk->disk, 1);
-
/* We can handle whatever the host told us to handle. */
- blk_queue_max_segments(q, sg_elems);
+ lim->max_segments = sg_elems;
/* No real sector limit. */
- blk_queue_max_hw_sectors(q, UINT_MAX);
+ lim->max_hw_sectors = UINT_MAX;
max_dma_size = virtio_max_dma_size(vdev);
max_size = max_dma_size > U32_MAX ? U32_MAX : max_dma_size;
@@ -1369,7 +1283,7 @@ static int virtblk_probe(struct virtio_device *vdev)
if (!err)
max_size = min(max_size, v);
- blk_queue_max_segment_size(q, max_size);
+ lim->max_segment_size = max_size;
/* Host can optionally specify the block size of the device */
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
@@ -1381,38 +1295,37 @@ static int virtblk_probe(struct virtio_device *vdev)
dev_err(&vdev->dev,
"virtio_blk: invalid block size: 0x%x\n",
blk_size);
- goto out_cleanup_disk;
+ return err;
}
- blk_queue_logical_block_size(q, blk_size);
+ lim->logical_block_size = blk_size;
} else
- blk_size = queue_logical_block_size(q);
+ blk_size = lim->logical_block_size;
/* Use topology information if available */
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, physical_block_exp,
&physical_block_exp);
if (!err && physical_block_exp)
- blk_queue_physical_block_size(q,
- blk_size * (1 << physical_block_exp));
+ lim->physical_block_size = blk_size * (1 << physical_block_exp);
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, alignment_offset,
&alignment_offset);
if (!err && alignment_offset)
- blk_queue_alignment_offset(q, blk_size * alignment_offset);
+ lim->alignment_offset = blk_size * alignment_offset;
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, min_io_size,
&min_io_size);
if (!err && min_io_size)
- blk_queue_io_min(q, blk_size * min_io_size);
+ lim->io_min = blk_size * min_io_size;
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, opt_io_size,
&opt_io_size);
if (!err && opt_io_size)
- blk_queue_io_opt(q, blk_size * opt_io_size);
+ lim->io_opt = blk_size * opt_io_size;
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
virtio_cread(vdev, struct virtio_blk_config,
@@ -1420,7 +1333,7 @@ static int virtblk_probe(struct virtio_device *vdev)
virtio_cread(vdev, struct virtio_blk_config,
max_discard_sectors, &v);
- blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
+ lim->max_hw_discard_sectors = v ? v : UINT_MAX;
virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
&max_discard_segs);
@@ -1429,7 +1342,7 @@ static int virtblk_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
virtio_cread(vdev, struct virtio_blk_config,
max_write_zeroes_sectors, &v);
- blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
+ lim->max_write_zeroes_sectors = v ? v : UINT_MAX;
}
/* The discard and secure erase limits are combined since the Linux
@@ -1455,8 +1368,7 @@ static int virtblk_probe(struct virtio_device *vdev)
if (!v) {
dev_err(&vdev->dev,
"virtio_blk: secure_erase_sector_alignment can't be 0\n");
- err = -EINVAL;
- goto out_cleanup_disk;
+ return -EINVAL;
}
discard_granularity = min_not_zero(discard_granularity, v);
@@ -1470,11 +1382,10 @@ static int virtblk_probe(struct virtio_device *vdev)
if (!v) {
dev_err(&vdev->dev,
"virtio_blk: max_secure_erase_sectors can't be 0\n");
- err = -EINVAL;
- goto out_cleanup_disk;
+ return -EINVAL;
}
- blk_queue_max_secure_erase_sectors(q, v);
+ lim->max_secure_erase_sectors = v;
virtio_cread(vdev, struct virtio_blk_config,
max_secure_erase_seg, &v);
@@ -1485,8 +1396,7 @@ static int virtblk_probe(struct virtio_device *vdev)
if (!v) {
dev_err(&vdev->dev,
"virtio_blk: max_secure_erase_seg can't be 0\n");
- err = -EINVAL;
- goto out_cleanup_disk;
+ return -EINVAL;
}
max_discard_segs = min_not_zero(max_discard_segs, v);
@@ -1502,45 +1412,142 @@ static int virtblk_probe(struct virtio_device *vdev)
if (!max_discard_segs)
max_discard_segs = sg_elems;
- blk_queue_max_discard_segments(q,
- min(max_discard_segs, MAX_DISCARD_SEGMENTS));
+ lim->max_discard_segments =
+ min(max_discard_segs, MAX_DISCARD_SEGMENTS);
if (discard_granularity)
- q->limits.discard_granularity = discard_granularity << SECTOR_SHIFT;
+ lim->discard_granularity =
+ discard_granularity << SECTOR_SHIFT;
else
- q->limits.discard_granularity = blk_size;
+ lim->discard_granularity = blk_size;
}
- virtblk_update_capacity(vblk, false);
- virtio_device_ready(vdev);
-
- /*
- * All steps that follow use the VQs therefore they need to be
- * placed after the virtio_device_ready() call above.
- */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) {
u8 model;
- virtio_cread(vdev, struct virtio_blk_config, zoned.model,
- &model);
+ virtio_cread(vdev, struct virtio_blk_config, zoned.model, &model);
switch (model) {
case VIRTIO_BLK_Z_NONE:
case VIRTIO_BLK_Z_HA:
- /* Present the host-aware device as non-zoned */
- break;
+ /* treat host-aware devices as non-zoned */
+ return 0;
case VIRTIO_BLK_Z_HM:
- err = virtblk_probe_zoned_device(vdev, vblk, q);
+ err = virtblk_read_zoned_limits(vblk, lim);
if (err)
- goto out_cleanup_disk;
+ return err;
break;
default:
- dev_err(&vdev->dev, "unsupported zone model %d\n",
- model);
- err = -EINVAL;
- goto out_cleanup_disk;
+ dev_err(&vdev->dev, "unsupported zone model %d\n", model);
+ return -EINVAL;
}
}
+ return 0;
+}
+
+static int virtblk_probe(struct virtio_device *vdev)
+{
+ struct virtio_blk *vblk;
+ struct queue_limits lim = { };
+ int err, index;
+ unsigned int queue_depth;
+
+ if (!vdev->config->get) {
+ dev_err(&vdev->dev, "%s failure: config access disabled\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ err = ida_alloc_range(&vd_index_ida, 0,
+ minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL);
+ if (err < 0)
+ goto out;
+ index = err;
+
+ vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
+ if (!vblk) {
+ err = -ENOMEM;
+ goto out_free_index;
+ }
+
+ mutex_init(&vblk->vdev_mutex);
+
+ vblk->vdev = vdev;
+
+ INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
+
+ err = init_vq(vblk);
+ if (err)
+ goto out_free_vblk;
+
+ /* Default queue sizing is to fill the ring. */
+ if (!virtblk_queue_depth) {
+ queue_depth = vblk->vqs[0].vq->num_free;
+ /* ... but without indirect descs, we use 2 descs per req */
+ if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
+ queue_depth /= 2;
+ } else {
+ queue_depth = virtblk_queue_depth;
+ }
+
+ memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
+ vblk->tag_set.ops = &virtio_mq_ops;
+ vblk->tag_set.queue_depth = queue_depth;
+ vblk->tag_set.numa_node = NUMA_NO_NODE;
+ vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+ vblk->tag_set.cmd_size =
+ sizeof(struct virtblk_req) +
+ sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT;
+ vblk->tag_set.driver_data = vblk;
+ vblk->tag_set.nr_hw_queues = vblk->num_vqs;
+ vblk->tag_set.nr_maps = 1;
+ if (vblk->io_queues[HCTX_TYPE_POLL])
+ vblk->tag_set.nr_maps = 3;
+
+ err = blk_mq_alloc_tag_set(&vblk->tag_set);
+ if (err)
+ goto out_free_vq;
+
+ err = virtblk_read_limits(vblk, &lim);
+ if (err)
+ goto out_free_tags;
+
+ vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, &lim, vblk);
+ if (IS_ERR(vblk->disk)) {
+ err = PTR_ERR(vblk->disk);
+ goto out_free_tags;
+ }
+
+ virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
+
+ vblk->disk->major = major;
+ vblk->disk->first_minor = index_to_minor(index);
+ vblk->disk->minors = 1 << PART_BITS;
+ vblk->disk->private_data = vblk;
+ vblk->disk->fops = &virtblk_fops;
+ vblk->index = index;
+
+ /* configure queue flush support */
+ virtblk_update_cache_mode(vdev);
+
+ /* If disk is read-only in the host, the guest should obey */
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
+ set_disk_ro(vblk->disk, 1);
+
+ virtblk_update_capacity(vblk, false);
+ virtio_device_ready(vdev);
+
+ /*
+ * All steps that follow use the VQs therefore they need to be
+ * placed after the virtio_device_ready() call above.
+ */
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) {
+ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue);
+ err = blk_revalidate_disk_zones(vblk->disk, NULL);
+ if (err)
+ goto out_cleanup_disk;
+ }
+
err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
if (err)
goto out_cleanup_disk;
@@ -1593,14 +1600,15 @@ static int virtblk_freeze(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
+ /* Ensure no requests in virtqueues before deleting vqs. */
+ blk_mq_freeze_queue(vblk->disk->queue);
+
/* Ensure we don't receive any more interrupts */
virtio_reset_device(vdev);
/* Make sure no work handler is accessing the device. */
flush_work(&vblk->config_work);
- blk_mq_quiesce_queue(vblk->disk->queue);
-
vdev->config->del_vqs(vdev);
kfree(vblk->vqs);
@@ -1618,7 +1626,7 @@ static int virtblk_restore(struct virtio_device *vdev)
virtio_device_ready(vdev);
- blk_mq_unquiesce_queue(vblk->disk->queue);
+ blk_mq_unfreeze_queue(vblk->disk->queue);
return 0;
}
#endif
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 4defd7f387c7..944576d582fb 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -465,7 +465,7 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
}
req->dev = vbd->pdevice;
- req->bdev = vbd->bdev_handle->bdev;
+ req->bdev = file_bdev(vbd->bdev_file);
rc = 0;
out:
@@ -969,7 +969,7 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
int err = 0;
int status = BLKIF_RSP_OKAY;
struct xen_blkif *blkif = ring->blkif;
- struct block_device *bdev = blkif->vbd.bdev_handle->bdev;
+ struct block_device *bdev = file_bdev(blkif->vbd.bdev_file);
struct phys_req preq;
xen_blkif_get(blkif);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 1432c83183d0..b427d54bc120 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -221,7 +221,7 @@ struct xen_vbd {
unsigned char type;
/* phys device that this vbd maps to. */
u32 pdevice;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
/* Cached size parameter. */
sector_t size;
unsigned int flush_support:1;
@@ -360,7 +360,7 @@ struct pending_req {
};
-#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev_handle->bdev)
+#define vbd_sz(_v) bdev_nr_sectors(file_bdev((_v)->bdev_file))
#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
#define xen_blkif_put(_b) \
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index e34219ea2b05..0621878940ae 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -81,7 +81,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
int i;
/* Not ready to connect? */
- if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev_handle)
+ if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev_file)
return;
/* Already connected? */
@@ -99,13 +99,12 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
return;
}
- err = sync_blockdev(blkif->vbd.bdev_handle->bdev);
+ err = sync_blockdev(file_bdev(blkif->vbd.bdev_file));
if (err) {
xenbus_dev_error(blkif->be->dev, err, "block flush");
return;
}
- invalidate_inode_pages2(
- blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping);
+ invalidate_inode_pages2(blkif->vbd.bdev_file->f_mapping);
for (i = 0; i < blkif->nr_rings; i++) {
ring = &blkif->rings[i];
@@ -473,9 +472,9 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev)
static void xen_vbd_free(struct xen_vbd *vbd)
{
- if (vbd->bdev_handle)
- bdev_release(vbd->bdev_handle);
- vbd->bdev_handle = NULL;
+ if (vbd->bdev_file)
+ fput(vbd->bdev_file);
+ vbd->bdev_file = NULL;
}
static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
@@ -483,7 +482,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
int cdrom)
{
struct xen_vbd *vbd;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
vbd = &blkif->vbd;
vbd->handle = handle;
@@ -492,17 +491,17 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
vbd->pdevice = MKDEV(major, minor);
- bdev_handle = bdev_open_by_dev(vbd->pdevice, vbd->readonly ?
+ bdev_file = bdev_file_open_by_dev(vbd->pdevice, vbd->readonly ?
BLK_OPEN_READ : BLK_OPEN_WRITE, NULL, NULL);
- if (IS_ERR(bdev_handle)) {
+ if (IS_ERR(bdev_file)) {
pr_warn("xen_vbd_create: device %08x could not be opened\n",
vbd->pdevice);
return -ENOENT;
}
- vbd->bdev_handle = bdev_handle;
- if (vbd->bdev_handle->bdev->bd_disk == NULL) {
+ vbd->bdev_file = bdev_file;
+ if (file_bdev(vbd->bdev_file)->bd_disk == NULL) {
pr_warn("xen_vbd_create: device %08x doesn't exist\n",
vbd->pdevice);
xen_vbd_free(vbd);
@@ -510,14 +509,14 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
}
vbd->size = vbd_sz(vbd);
- if (cdrom || disk_to_cdi(vbd->bdev_handle->bdev->bd_disk))
+ if (cdrom || disk_to_cdi(file_bdev(vbd->bdev_file)->bd_disk))
vbd->type |= VDISK_CDROM;
- if (vbd->bdev_handle->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
+ if (file_bdev(vbd->bdev_file)->bd_disk->flags & GENHD_FL_REMOVABLE)
vbd->type |= VDISK_REMOVABLE;
- if (bdev_write_cache(bdev_handle->bdev))
+ if (bdev_write_cache(file_bdev(bdev_file)))
vbd->flush_support = true;
- if (bdev_max_secure_erase_sectors(bdev_handle->bdev))
+ if (bdev_max_secure_erase_sectors(file_bdev(bdev_file)))
vbd->discard_secure = true;
pr_debug("Successful creation of handle=%04x (dom=%u)\n",
@@ -570,7 +569,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
struct xen_blkif *blkif = be->blkif;
int err;
int state = 0;
- struct block_device *bdev = be->blkif->vbd.bdev_handle->bdev;
+ struct block_device *bdev = file_bdev(be->blkif->vbd.bdev_file);
if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
return;
@@ -932,7 +931,7 @@ again:
}
err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
(unsigned long)bdev_logical_block_size(
- be->blkif->vbd.bdev_handle->bdev));
+ file_bdev(be->blkif->vbd.bdev_file)));
if (err) {
xenbus_dev_fatal(dev, err, "writing %s/sector-size",
dev->nodename);
@@ -940,7 +939,7 @@ again:
}
err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
bdev_physical_block_size(
- be->blkif->vbd.bdev_handle->bdev));
+ file_bdev(be->blkif->vbd.bdev_file)));
if (err)
xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
dev->nodename);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 434fab306777..fd7c0ff2139c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -941,39 +941,35 @@ static const struct blk_mq_ops blkfront_mq_ops = {
.complete = blkif_complete_rq,
};
-static void blkif_set_queue_limits(struct blkfront_info *info)
+static void blkif_set_queue_limits(const struct blkfront_info *info,
+ struct queue_limits *lim)
{
- struct request_queue *rq = info->rq;
- struct gendisk *gd = info->gd;
unsigned int segments = info->max_indirect_segments ? :
BLKIF_MAX_SEGMENTS_PER_REQUEST;
- blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
-
if (info->feature_discard) {
- blk_queue_max_discard_sectors(rq, get_capacity(gd));
- rq->limits.discard_granularity = info->discard_granularity ?:
- info->physical_sector_size;
- rq->limits.discard_alignment = info->discard_alignment;
+ lim->max_hw_discard_sectors = UINT_MAX;
+ if (info->discard_granularity)
+ lim->discard_granularity = info->discard_granularity;
+ lim->discard_alignment = info->discard_alignment;
if (info->feature_secdiscard)
- blk_queue_max_secure_erase_sectors(rq,
- get_capacity(gd));
+ lim->max_secure_erase_sectors = UINT_MAX;
}
/* Hard sector size and max sectors impersonate the equiv. hardware. */
- blk_queue_logical_block_size(rq, info->sector_size);
- blk_queue_physical_block_size(rq, info->physical_sector_size);
- blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
+ lim->logical_block_size = info->sector_size;
+ lim->physical_block_size = info->physical_sector_size;
+ lim->max_hw_sectors = (segments * XEN_PAGE_SIZE) / 512;
/* Each segment in a request is up to an aligned page in size. */
- blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
- blk_queue_max_segment_size(rq, PAGE_SIZE);
+ lim->seg_boundary_mask = PAGE_SIZE - 1;
+ lim->max_segment_size = PAGE_SIZE;
/* Ensure a merged request will fit in a single I/O ring slot. */
- blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
+ lim->max_segments = segments / GRANTS_PER_PSEG;
/* Make sure buffer addresses are sector-aligned. */
- blk_queue_dma_alignment(rq, 511);
+ lim->dma_alignment = 511;
}
static const char *flush_info(struct blkfront_info *info)
@@ -1070,6 +1066,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
struct blkfront_info *info, u16 sector_size,
unsigned int physical_sector_size)
{
+ struct queue_limits lim = {};
struct gendisk *gd;
int nr_minors = 1;
int err;
@@ -1136,11 +1133,13 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if (err)
goto out_release_minors;
- gd = blk_mq_alloc_disk(&info->tag_set, info);
+ blkif_set_queue_limits(info, &lim);
+ gd = blk_mq_alloc_disk(&info->tag_set, &lim, info);
if (IS_ERR(gd)) {
err = PTR_ERR(gd);
goto out_free_tag_set;
}
+ blk_queue_flag_set(QUEUE_FLAG_VIRT, gd->queue);
strcpy(gd->disk_name, DEV_NAME);
ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
@@ -1162,7 +1161,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
info->gd = gd;
info->sector_size = sector_size;
info->physical_sector_size = physical_sector_size;
- blkif_set_queue_limits(info);
xlvbd_flush(info);
@@ -2006,18 +2004,19 @@ static int blkfront_probe(struct xenbus_device *dev,
static int blkif_recover(struct blkfront_info *info)
{
+ struct queue_limits lim;
unsigned int r_index;
struct request *req, *n;
int rc;
struct bio *bio;
- unsigned int segs;
struct blkfront_ring_info *rinfo;
+ lim = queue_limits_start_update(info->rq);
blkfront_gather_backend_features(info);
- /* Reset limits changed by blk_mq_update_nr_hw_queues(). */
- blkif_set_queue_limits(info);
- segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
- blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
+ blkif_set_queue_limits(info, &lim);
+ rc = queue_limits_commit_update(info->rq, &lim);
+ if (rc)
+ return rc;
for_each_rinfo(info, rinfo, r_index) {
rc = blkfront_setup_indirect(rinfo);
@@ -2037,7 +2036,9 @@ static int blkif_recover(struct blkfront_info *info)
list_for_each_entry_safe(req, n, &info->requests, queuelist) {
/* Requeue pending requests (flush or discard) */
list_del_init(&req->queuelist);
- BUG_ON(req->nr_phys_segments > segs);
+ BUG_ON(req->nr_phys_segments >
+ (info->max_indirect_segments ? :
+ BLKIF_MAX_SEGMENTS_PER_REQUEST));
blk_mq_requeue_request(req, false);
}
blk_mq_start_stopped_hw_queues(info->rq, true);
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 11493167b0a8..7c5f4e4d9b50 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -318,7 +318,7 @@ static int z2ram_register_disk(int minor)
struct gendisk *disk;
int err;
- disk = blk_mq_alloc_disk(&tag_set, NULL);
+ disk = blk_mq_alloc_disk(&tag_set, NULL, NULL);
if (IS_ERR(disk))
return PTR_ERR(disk);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 6772e0c654fa..da7a20fa6152 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -426,11 +426,11 @@ static void reset_bdev(struct zram *zram)
if (!zram->backing_dev)
return;
- bdev_release(zram->bdev_handle);
+ fput(zram->bdev_file);
/* hope filp_close flush all of IO */
filp_close(zram->backing_dev, NULL);
zram->backing_dev = NULL;
- zram->bdev_handle = NULL;
+ zram->bdev_file = NULL;
zram->disk->fops = &zram_devops;
kvfree(zram->bitmap);
zram->bitmap = NULL;
@@ -476,7 +476,7 @@ static ssize_t backing_dev_store(struct device *dev,
struct address_space *mapping;
unsigned int bitmap_sz;
unsigned long nr_pages, *bitmap = NULL;
- struct bdev_handle *bdev_handle = NULL;
+ struct file *bdev_file = NULL;
int err;
struct zram *zram = dev_to_zram(dev);
@@ -513,11 +513,11 @@ static ssize_t backing_dev_store(struct device *dev,
goto out;
}
- bdev_handle = bdev_open_by_dev(inode->i_rdev,
+ bdev_file = bdev_file_open_by_dev(inode->i_rdev,
BLK_OPEN_READ | BLK_OPEN_WRITE, zram, NULL);
- if (IS_ERR(bdev_handle)) {
- err = PTR_ERR(bdev_handle);
- bdev_handle = NULL;
+ if (IS_ERR(bdev_file)) {
+ err = PTR_ERR(bdev_file);
+ bdev_file = NULL;
goto out;
}
@@ -531,7 +531,7 @@ static ssize_t backing_dev_store(struct device *dev,
reset_bdev(zram);
- zram->bdev_handle = bdev_handle;
+ zram->bdev_file = bdev_file;
zram->backing_dev = backing_dev;
zram->bitmap = bitmap;
zram->nr_pages = nr_pages;
@@ -544,8 +544,8 @@ static ssize_t backing_dev_store(struct device *dev,
out:
kvfree(bitmap);
- if (bdev_handle)
- bdev_release(bdev_handle);
+ if (bdev_file)
+ fput(bdev_file);
if (backing_dev)
filp_close(backing_dev, NULL);
@@ -587,7 +587,7 @@ static void read_from_bdev_async(struct zram *zram, struct page *page,
{
struct bio *bio;
- bio = bio_alloc(zram->bdev_handle->bdev, 1, parent->bi_opf, GFP_NOIO);
+ bio = bio_alloc(file_bdev(zram->bdev_file), 1, parent->bi_opf, GFP_NOIO);
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
__bio_add_page(bio, page, PAGE_SIZE, 0);
bio_chain(bio, parent);
@@ -703,7 +703,7 @@ static ssize_t writeback_store(struct device *dev,
continue;
}
- bio_init(&bio, zram->bdev_handle->bdev, &bio_vec, 1,
+ bio_init(&bio, file_bdev(zram->bdev_file), &bio_vec, 1,
REQ_OP_WRITE | REQ_SYNC);
bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
__bio_add_page(&bio, page, PAGE_SIZE, 0);
@@ -785,7 +785,7 @@ static void zram_sync_read(struct work_struct *work)
struct bio_vec bv;
struct bio bio;
- bio_init(&bio, zw->zram->bdev_handle->bdev, &bv, 1, REQ_OP_READ);
+ bio_init(&bio, file_bdev(zw->zram->bdev_file), &bv, 1, REQ_OP_READ);
bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9);
__bio_add_page(&bio, zw->page, PAGE_SIZE, 0);
zw->error = submit_bio_wait(&bio);
@@ -2177,6 +2177,28 @@ ATTRIBUTE_GROUPS(zram_disk);
*/
static int zram_add(void)
{
+ struct queue_limits lim = {
+ .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE,
+ /*
+ * To ensure that we always get PAGE_SIZE aligned and
+ * n*PAGE_SIZED sized I/O requests.
+ */
+ .physical_block_size = PAGE_SIZE,
+ .io_min = PAGE_SIZE,
+ .io_opt = PAGE_SIZE,
+ .max_hw_discard_sectors = UINT_MAX,
+ /*
+ * zram_bio_discard() will clear all logical blocks if logical
+ * block size is identical with physical block size(PAGE_SIZE).
+ * But if it is different, we will skip discarding some parts of
+ * logical blocks in the part of the request range which isn't
+ * aligned to physical block size. So we can't ensure that all
+ * discarded logical blocks are zeroed.
+ */
+#if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
+ .max_write_zeroes_sectors = UINT_MAX,
+#endif
+ };
struct zram *zram;
int ret, device_id;
@@ -2195,11 +2217,11 @@ static int zram_add(void)
#endif
/* gendisk structure */
- zram->disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!zram->disk) {
+ zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(zram->disk)) {
pr_err("Error allocating disk structure for device %d\n",
device_id);
- ret = -ENOMEM;
+ ret = PTR_ERR(zram->disk);
goto out_free_idr;
}
@@ -2216,29 +2238,6 @@ static int zram_add(void)
/* zram devices sort of resembles non-rotational disks */
blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue);
-
- /*
- * To ensure that we always get PAGE_SIZE aligned
- * and n*PAGE_SIZED sized I/O requests.
- */
- blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
- blk_queue_logical_block_size(zram->disk->queue,
- ZRAM_LOGICAL_BLOCK_SIZE);
- blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
- blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
- blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
-
- /*
- * zram_bio_discard() will clear all logical blocks if logical block
- * size is identical with physical block size(PAGE_SIZE). But if it is
- * different, we will skip discarding some parts of logical blocks in
- * the part of the request range which isn't aligned to physical block
- * size. So we can't ensure that all discarded logical blocks are
- * zeroed.
- */
- if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
- blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
-
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
if (ret)
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 3b94d12f41b4..37bf29f34d26 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -132,7 +132,7 @@ struct zram {
spinlock_t wb_limit_lock;
bool wb_limit_enable;
u64 bd_wb_limit;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
unsigned long *bitmap;
unsigned long nr_pages;
#endif
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 0a5445ac5e1b..f9a7c790d7e2 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -11,6 +11,7 @@
#include <linux/firmware.h>
#include <linux/dmi.h>
#include <linux/of.h>
+#include <linux/string.h>
#include <asm/unaligned.h>
#include <net/bluetooth/bluetooth.h>
@@ -543,8 +544,6 @@ static const char *btbcm_get_board_name(struct device *dev)
struct device_node *root;
char *board_type;
const char *tmp;
- int len;
- int i;
root = of_find_node_by_path("/");
if (!root)
@@ -554,13 +553,8 @@ static const char *btbcm_get_board_name(struct device *dev)
return NULL;
/* get rid of any '/' in the compatible string */
- len = strlen(tmp) + 1;
- board_type = devm_kzalloc(dev, len, GFP_KERNEL);
- strscpy(board_type, tmp, len);
- for (i = 0; i < len; i++) {
- if (board_type[i] == '/')
- board_type[i] = '-';
- }
+ board_type = devm_kstrdup(dev, tmp, GFP_KERNEL);
+ strreplace(board_type, '/', '-');
of_node_put(root);
return board_type;
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index cdc5c08824a0..6ba7f5d1b837 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -441,7 +441,7 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver)
return PTR_ERR(skb);
}
- if (skb->len != sizeof(*ver)) {
+ if (!skb || skb->len != sizeof(*ver)) {
bt_dev_err(hdev, "Intel version event size mismatch");
kfree_skb(skb);
return -EILSEQ;
@@ -2670,6 +2670,119 @@ static void btintel_set_msft_opcode(struct hci_dev *hdev, u8 hw_variant)
}
}
+static void btintel_print_fseq_info(struct hci_dev *hdev)
+{
+ struct sk_buff *skb;
+ u8 *p;
+ u32 val;
+ const char *str;
+
+ skb = __hci_cmd_sync(hdev, 0xfcb3, 0, NULL, HCI_CMD_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_dbg(hdev, "Reading fseq status command failed (%ld)",
+ PTR_ERR(skb));
+ return;
+ }
+
+ if (skb->len < (sizeof(u32) * 16 + 2)) {
+ bt_dev_dbg(hdev, "Malformed packet of length %u received",
+ skb->len);
+ kfree_skb(skb);
+ return;
+ }
+
+ p = skb_pull_data(skb, 1);
+ if (*p) {
+ bt_dev_dbg(hdev, "Failed to get fseq status (0x%2.2x)", *p);
+ kfree_skb(skb);
+ return;
+ }
+
+ p = skb_pull_data(skb, 1);
+ switch (*p) {
+ case 0:
+ str = "Success";
+ break;
+ case 1:
+ str = "Fatal error";
+ break;
+ case 2:
+ str = "Semaphore acquire error";
+ break;
+ default:
+ str = "Unknown error";
+ break;
+ }
+
+ if (*p) {
+ bt_dev_err(hdev, "Fseq status: %s (0x%2.2x)", str, *p);
+ kfree_skb(skb);
+ return;
+ }
+
+ bt_dev_info(hdev, "Fseq status: %s (0x%2.2x)", str, *p);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Reason: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Global version: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Installed version: 0x%8.8x", val);
+
+ p = skb->data;
+ skb_pull_data(skb, 4);
+ bt_dev_info(hdev, "Fseq executed: %2.2u.%2.2u.%2.2u.%2.2u", p[0], p[1],
+ p[2], p[3]);
+
+ p = skb->data;
+ skb_pull_data(skb, 4);
+ bt_dev_info(hdev, "Fseq BT Top: %2.2u.%2.2u.%2.2u.%2.2u", p[0], p[1],
+ p[2], p[3]);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq Top init version: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq Cnvio init version: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq MBX Wifi file version: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq BT version: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq Top reset address: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq MBX timeout: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq MBX ack: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq CNVi id: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq CNVr id: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq Error handle: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq Magic noalive indication: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq OTP version: 0x%8.8x", val);
+
+ val = get_unaligned_le32(skb_pull_data(skb, 4));
+ bt_dev_dbg(hdev, "Fseq MBX otp version: 0x%8.8x", val);
+
+ kfree_skb(skb);
+}
+
static int btintel_setup_combined(struct hci_dev *hdev)
{
const u8 param[1] = { 0xFF };
@@ -2902,6 +3015,7 @@ static int btintel_setup_combined(struct hci_dev *hdev)
err = btintel_bootloader_setup_tlv(hdev, &ver_tlv);
btintel_register_devcoredump_support(hdev);
+ btintel_print_fseq_info(hdev);
break;
default:
bt_dev_err(hdev, "Unsupported Intel hw variant (%u)",
diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c
index aaabb732082c..ac8ebccd3507 100644
--- a/drivers/bluetooth/btmtk.c
+++ b/drivers/bluetooth/btmtk.c
@@ -372,8 +372,10 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb)
struct btmediatek_data *data = hci_get_priv(hdev);
int err;
- if (!IS_ENABLED(CONFIG_DEV_COREDUMP))
+ if (!IS_ENABLED(CONFIG_DEV_COREDUMP)) {
+ kfree_skb(skb);
return 0;
+ }
switch (data->cd_info.state) {
case HCI_DEVCOREDUMP_IDLE:
@@ -420,5 +422,6 @@ MODULE_LICENSE("GPL");
MODULE_FIRMWARE(FIRMWARE_MT7622);
MODULE_FIRMWARE(FIRMWARE_MT7663);
MODULE_FIRMWARE(FIRMWARE_MT7668);
+MODULE_FIRMWARE(FIRMWARE_MT7922);
MODULE_FIRMWARE(FIRMWARE_MT7961);
MODULE_FIRMWARE(FIRMWARE_MT7925);
diff --git a/drivers/bluetooth/btmtk.h b/drivers/bluetooth/btmtk.h
index 56f5502baadf..cbcdb99a22e6 100644
--- a/drivers/bluetooth/btmtk.h
+++ b/drivers/bluetooth/btmtk.h
@@ -4,6 +4,7 @@
#define FIRMWARE_MT7622 "mediatek/mt7622pr2h.bin"
#define FIRMWARE_MT7663 "mediatek/mt7663pr2h.bin"
#define FIRMWARE_MT7668 "mediatek/mt7668pr2h.bin"
+#define FIRMWARE_MT7922 "mediatek/BT_RAM_CODE_MT7922_1_1_hdr.bin"
#define FIRMWARE_MT7961 "mediatek/BT_RAM_CODE_MT7961_1_2_hdr.bin"
#define FIRMWARE_MT7925 "mediatek/mt7925/BT_RAM_CODE_MT7925_1_1_hdr.bin"
diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index 1d592ac413d1..0b93c2ff29e4 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -126,6 +126,7 @@ struct ps_data {
struct hci_dev *hdev;
struct work_struct work;
struct timer_list ps_timer;
+ struct mutex ps_lock;
};
struct wakeup_cmd_payload {
@@ -317,6 +318,9 @@ static void ps_start_timer(struct btnxpuart_dev *nxpdev)
if (psdata->cur_psmode == PS_MODE_ENABLE)
mod_timer(&psdata->ps_timer, jiffies + msecs_to_jiffies(psdata->h2c_ps_interval));
+
+ if (psdata->ps_state == PS_STATE_AWAKE && psdata->ps_cmd == PS_CMD_ENTER_PS)
+ cancel_work_sync(&psdata->work);
}
static void ps_cancel_timer(struct btnxpuart_dev *nxpdev)
@@ -337,6 +341,7 @@ static void ps_control(struct hci_dev *hdev, u8 ps_state)
!test_bit(BTNXPUART_SERDEV_OPEN, &nxpdev->tx_state))
return;
+ mutex_lock(&psdata->ps_lock);
switch (psdata->cur_h2c_wakeupmode) {
case WAKEUP_METHOD_DTR:
if (ps_state == PS_STATE_AWAKE)
@@ -350,12 +355,15 @@ static void ps_control(struct hci_dev *hdev, u8 ps_state)
status = serdev_device_break_ctl(nxpdev->serdev, 0);
else
status = serdev_device_break_ctl(nxpdev->serdev, -1);
+ msleep(20); /* Allow chip to detect UART-break and enter sleep */
bt_dev_dbg(hdev, "Set UART break: %s, status=%d",
str_on_off(ps_state == PS_STATE_SLEEP), status);
break;
}
if (!status)
psdata->ps_state = ps_state;
+ mutex_unlock(&psdata->ps_lock);
+
if (ps_state == PS_STATE_AWAKE)
btnxpuart_tx_wakeup(nxpdev);
}
@@ -391,17 +399,25 @@ static void ps_setup(struct hci_dev *hdev)
psdata->hdev = hdev;
INIT_WORK(&psdata->work, ps_work_func);
+ mutex_init(&psdata->ps_lock);
timer_setup(&psdata->ps_timer, ps_timeout_func, 0);
}
-static void ps_wakeup(struct btnxpuart_dev *nxpdev)
+static bool ps_wakeup(struct btnxpuart_dev *nxpdev)
{
struct ps_data *psdata = &nxpdev->psdata;
+ u8 ps_state;
+
+ mutex_lock(&psdata->ps_lock);
+ ps_state = psdata->ps_state;
+ mutex_unlock(&psdata->ps_lock);
- if (psdata->ps_state != PS_STATE_AWAKE) {
+ if (ps_state != PS_STATE_AWAKE) {
psdata->ps_cmd = PS_CMD_EXIT_PS;
schedule_work(&psdata->work);
+ return true;
}
+ return false;
}
static int send_ps_cmd(struct hci_dev *hdev, void *data)
@@ -1171,7 +1187,6 @@ static struct sk_buff *nxp_dequeue(void *data)
{
struct btnxpuart_dev *nxpdev = (struct btnxpuart_dev *)data;
- ps_wakeup(nxpdev);
ps_start_timer(nxpdev);
return skb_dequeue(&nxpdev->txq);
}
@@ -1186,6 +1201,9 @@ static void btnxpuart_tx_work(struct work_struct *work)
struct sk_buff *skb;
int len;
+ if (ps_wakeup(nxpdev))
+ return;
+
while ((skb = nxp_dequeue(nxpdev))) {
len = serdev_device_write_buf(serdev, skb->data, skb->len);
hdev->stat.byte_tx += len;
@@ -1234,6 +1252,9 @@ static int btnxpuart_close(struct hci_dev *hdev)
ps_wakeup(nxpdev);
serdev_device_close(nxpdev->serdev);
+ skb_queue_purge(&nxpdev->txq);
+ kfree_skb(nxpdev->rx_skb);
+ nxpdev->rx_skb = NULL;
clear_bit(BTNXPUART_SERDEV_OPEN, &nxpdev->tx_state);
return 0;
}
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index fdb0fae88d1c..b40b32fa7f1c 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev)
bt_dev_dbg(hdev, "QCA Patch config");
skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd),
- cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+ cmd, 0, HCI_INIT_TIMEOUT);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err);
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 277d039ecbb4..cc50de69e8dc 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -69,6 +69,7 @@ enum btrtl_chip_id {
CHIP_ID_8852B = 20,
CHIP_ID_8852C = 25,
CHIP_ID_8851B = 36,
+ CHIP_ID_8852BT = 47,
};
struct id_table {
@@ -307,6 +308,15 @@ static const struct id_table ic_id_table[] = {
.fw_name = "rtl_bt/rtl8851bu_fw",
.cfg_name = "rtl_bt/rtl8851bu_config",
.hw_info = "rtl8851bu" },
+
+ /* 8852BT/8852BE-VT */
+ { IC_INFO(RTL_ROM_LMP_8852A, 0x87, 0xc, HCI_USB),
+ .config_needed = false,
+ .has_rom_version = true,
+ .has_msft_ext = true,
+ .fw_name = "rtl_bt/rtl8852btu_fw",
+ .cfg_name = "rtl_bt/rtl8852btu_config",
+ .hw_info = "rtl8852btu" },
};
static const struct id_table *btrtl_match_ic(u16 lmp_subver, u16 hci_rev,
@@ -645,6 +655,7 @@ static int rtlbt_parse_firmware(struct hci_dev *hdev,
{ RTL_ROM_LMP_8852A, 20 }, /* 8852B */
{ RTL_ROM_LMP_8852A, 25 }, /* 8852C */
{ RTL_ROM_LMP_8851B, 36 }, /* 8851B */
+ { RTL_ROM_LMP_8852A, 47 }, /* 8852BT */
};
if (btrtl_dev->fw_len <= 8)
@@ -1275,6 +1286,7 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev)
case CHIP_ID_8852B:
case CHIP_ID_8852C:
case CHIP_ID_8851B:
+ case CHIP_ID_8852BT:
set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks);
set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
@@ -1505,6 +1517,8 @@ MODULE_FIRMWARE("rtl_bt/rtl8852bs_fw.bin");
MODULE_FIRMWARE("rtl_bt/rtl8852bs_config.bin");
MODULE_FIRMWARE("rtl_bt/rtl8852bu_fw.bin");
MODULE_FIRMWARE("rtl_bt/rtl8852bu_config.bin");
+MODULE_FIRMWARE("rtl_bt/rtl8852btu_fw.bin");
+MODULE_FIRMWARE("rtl_bt/rtl8852btu_config.bin");
MODULE_FIRMWARE("rtl_bt/rtl8852cu_fw.bin");
MODULE_FIRMWARE("rtl_bt/rtl8852cu_fw_v2.bin");
MODULE_FIRMWARE("rtl_bt/rtl8852cu_config.bin");
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index d31edad7a056..06e915b57283 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -553,6 +553,9 @@ static const struct usb_device_id quirks_table[] = {
{ USB_DEVICE(0x13d3, 0x3572), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
+ /* Realtek 8852BT/8852BE-VT Bluetooth devices */
+ { USB_DEVICE(0x0bda, 0x8520), .driver_info = BTUSB_REALTEK |
+ BTUSB_WIDEBAND_SPEECH },
/* Realtek Bluetooth devices */
{ USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01),
.driver_info = BTUSB_REALTEK },
@@ -655,6 +658,11 @@ static const struct usb_device_id quirks_table[] = {
BTUSB_WIDEBAND_SPEECH |
BTUSB_VALID_LE_STATES },
+ /* Additional MediaTek MT7925 Bluetooth devices */
+ { USB_DEVICE(0x13d3, 0x3602), .driver_info = BTUSB_MEDIATEK |
+ BTUSB_WIDEBAND_SPEECH |
+ BTUSB_VALID_LE_STATES },
+
/* Additional Realtek 8723AE Bluetooth devices */
{ USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },
{ USB_DEVICE(0x13d3, 0x3394), .driver_info = BTUSB_REALTEK },
@@ -3080,7 +3088,7 @@ static int btusb_mtk_setup(struct hci_dev *hdev)
int err, status;
u32 dev_id = 0;
char fw_bin_name[64];
- u32 fw_version = 0;
+ u32 fw_version = 0, fw_flavor = 0;
u8 param;
struct btmediatek_data *mediatek;
@@ -3103,6 +3111,11 @@ static int btusb_mtk_setup(struct hci_dev *hdev)
bt_dev_err(hdev, "Failed to get fw version (%d)", err);
return err;
}
+ err = btusb_mtk_id_get(data, 0x70010020, &fw_flavor);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to get fw flavor (%d)", err);
+ return err;
+ }
}
mediatek = hci_get_priv(hdev);
@@ -3127,6 +3140,10 @@ static int btusb_mtk_setup(struct hci_dev *hdev)
snprintf(fw_bin_name, sizeof(fw_bin_name),
"mediatek/mt%04x/BT_RAM_CODE_MT%04x_1_%x_hdr.bin",
dev_id & 0xffff, dev_id & 0xffff, (fw_version & 0xff) + 1);
+ else if (dev_id == 0x7961 && fw_flavor)
+ snprintf(fw_bin_name, sizeof(fw_bin_name),
+ "mediatek/BT_RAM_CODE_MT%04x_1a_%x_hdr.bin",
+ dev_id & 0xffff, (fw_version & 0xff) + 1);
else
snprintf(fw_bin_name, sizeof(fw_bin_name),
"mediatek/BT_RAM_CODE_MT%04x_1_%x_hdr.bin",
@@ -3273,7 +3290,6 @@ static int btusb_recv_acl_mtk(struct hci_dev *hdev, struct sk_buff *skb)
{
struct btusb_data *data = hci_get_drvdata(hdev);
u16 handle = le16_to_cpu(hci_acl_hdr(skb)->handle);
- struct sk_buff *skb_cd;
switch (handle) {
case 0xfc6f: /* Firmware dump from device */
@@ -3286,9 +3302,12 @@ static int btusb_recv_acl_mtk(struct hci_dev *hdev, struct sk_buff *skb)
* for backward compatibility, so we have to clone the packet
* extraly for the in-kernel coredump support.
*/
- skb_cd = skb_clone(skb, GFP_ATOMIC);
- if (skb_cd)
- btmtk_process_coredump(hdev, skb_cd);
+ if (IS_ENABLED(CONFIG_DEV_COREDUMP)) {
+ struct sk_buff *skb_cd = skb_clone(skb, GFP_ATOMIC);
+
+ if (skb_cd)
+ btmtk_process_coredump(hdev, skb_cd);
+ }
fallthrough;
case 0x05ff: /* Firmware debug logging 1 */
@@ -4481,6 +4500,7 @@ static int btusb_probe(struct usb_interface *intf,
set_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks);
set_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks);
set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks);
+ set_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks);
}
if (!reset)
diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c
index a61757835695..9a7243d5db71 100644
--- a/drivers/bluetooth/hci_bcm4377.c
+++ b/drivers/bluetooth/hci_bcm4377.c
@@ -1417,7 +1417,7 @@ static int bcm4377_check_bdaddr(struct bcm4377_data *bcm4377)
bda = (struct hci_rp_read_bd_addr *)skb->data;
if (!bcm4377_is_valid_bdaddr(bcm4377, &bda->bdaddr))
- set_bit(HCI_QUIRK_INVALID_BDADDR, &bcm4377->hdev->quirks);
+ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &bcm4377->hdev->quirks);
kfree_skb(skb);
return 0;
@@ -2368,7 +2368,6 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id)
hdev->set_bdaddr = bcm4377_hci_set_bdaddr;
hdev->setup = bcm4377_hci_setup;
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
if (bcm4377->hw->broken_mws_transport_config)
set_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &hdev->quirks);
if (bcm4377->hw->broken_ext_scan)
diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index 71e748a9477e..c0436881a533 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -113,6 +113,7 @@ struct h5_vnd {
int (*suspend)(struct h5 *h5);
int (*resume)(struct h5 *h5);
const struct acpi_gpio_mapping *acpi_gpio_map;
+ int sizeof_priv;
};
struct h5_device_data {
@@ -863,7 +864,8 @@ static int h5_serdev_probe(struct serdev_device *serdev)
if (IS_ERR(h5->device_wake_gpio))
return PTR_ERR(h5->device_wake_gpio);
- return hci_uart_register_device(&h5->serdev_hu, &h5p);
+ return hci_uart_register_device_priv(&h5->serdev_hu, &h5p,
+ h5->vnd->sizeof_priv);
}
static void h5_serdev_remove(struct serdev_device *serdev)
@@ -1070,6 +1072,7 @@ static struct h5_vnd rtl_vnd = {
.suspend = h5_btrtl_suspend,
.resume = h5_btrtl_resume,
.acpi_gpio_map = acpi_btrtl_gpios,
+ .sizeof_priv = sizeof(struct btrealtek_data),
};
static const struct h5_device_data h5_data_rtl8822cs = {
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 94b8c406f0c0..8a60ad7acd70 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -7,6 +7,7 @@
*
* Copyright (C) 2007 Texas Instruments, Inc.
* Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* Acknowledgements:
* This file is based on hci_ll.c, which was...
@@ -1806,13 +1807,12 @@ static int qca_power_on(struct hci_dev *hdev)
static void hci_coredump_qca(struct hci_dev *hdev)
{
+ int err;
static const u8 param[] = { 0x26 };
- struct sk_buff *skb;
- skb = __hci_cmd_sync(hdev, 0xfc0c, 1, param, HCI_CMD_TIMEOUT);
- if (IS_ERR(skb))
- bt_dev_err(hdev, "%s: trigger crash failed (%ld)", __func__, PTR_ERR(skb));
- kfree_skb(skb);
+ err = __hci_cmd_send(hdev, 0xfc0c, 1, param);
+ if (err < 0)
+ bt_dev_err(hdev, "%s: trigger crash failed (%d)", __func__, err);
}
static int qca_get_data_path_id(struct hci_dev *hdev, __u8 *data_path_id)
@@ -1904,7 +1904,17 @@ retry:
case QCA_WCN6750:
case QCA_WCN6855:
case QCA_WCN7850:
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+
+ /* Set BDA quirk bit for reading BDA value from fwnode property
+ * only if that property exist in DT.
+ */
+ if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
+ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+ bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
+ } else {
+ bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
+ }
+
hci_set_aosp_capable(hdev);
ret = qca_read_soc_version(hdev, &ver, soc_type);
@@ -2316,7 +2326,7 @@ static int qca_serdev_probe(struct serdev_device *serdev)
qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
GPIOD_OUT_LOW);
- if (IS_ERR_OR_NULL(qcadev->bt_en) &&
+ if (IS_ERR(qcadev->bt_en) &&
(data->soc_type == QCA_WCN6750 ||
data->soc_type == QCA_WCN6855)) {
dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n");
@@ -2325,7 +2335,7 @@ static int qca_serdev_probe(struct serdev_device *serdev)
qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl",
GPIOD_IN);
- if (IS_ERR_OR_NULL(qcadev->sw_ctrl) &&
+ if (IS_ERR(qcadev->sw_ctrl) &&
(data->soc_type == QCA_WCN6750 ||
data->soc_type == QCA_WCN6855 ||
data->soc_type == QCA_WCN7850))
@@ -2347,7 +2357,7 @@ static int qca_serdev_probe(struct serdev_device *serdev)
default:
qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
GPIOD_OUT_LOW);
- if (IS_ERR_OR_NULL(qcadev->bt_en)) {
+ if (IS_ERR(qcadev->bt_en)) {
dev_warn(&serdev->dev, "failed to acquire enable gpio\n");
power_ctrl_enabled = false;
}
diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c
index 39c8b567da3c..214fff876eae 100644
--- a/drivers/bluetooth/hci_serdev.c
+++ b/drivers/bluetooth/hci_serdev.c
@@ -300,8 +300,9 @@ static const struct serdev_device_ops hci_serdev_client_ops = {
.write_wakeup = hci_uart_write_wakeup,
};
-int hci_uart_register_device(struct hci_uart *hu,
- const struct hci_uart_proto *p)
+int hci_uart_register_device_priv(struct hci_uart *hu,
+ const struct hci_uart_proto *p,
+ int sizeof_priv)
{
int err;
struct hci_dev *hdev;
@@ -325,7 +326,7 @@ int hci_uart_register_device(struct hci_uart *hu,
set_bit(HCI_UART_PROTO_READY, &hu->flags);
/* Initialize and register HCI device */
- hdev = hci_alloc_dev();
+ hdev = hci_alloc_dev_priv(sizeof_priv);
if (!hdev) {
BT_ERR("Can't allocate HCI device");
err = -ENOMEM;
@@ -394,7 +395,7 @@ err_rwsem:
percpu_free_rwsem(&hu->proto_lock);
return err;
}
-EXPORT_SYMBOL_GPL(hci_uart_register_device);
+EXPORT_SYMBOL_GPL(hci_uart_register_device_priv);
void hci_uart_unregister_device(struct hci_uart *hu)
{
diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
index fb4a2d0d8cc8..68c8c7e95d64 100644
--- a/drivers/bluetooth/hci_uart.h
+++ b/drivers/bluetooth/hci_uart.h
@@ -97,7 +97,17 @@ struct hci_uart {
int hci_uart_register_proto(const struct hci_uart_proto *p);
int hci_uart_unregister_proto(const struct hci_uart_proto *p);
-int hci_uart_register_device(struct hci_uart *hu, const struct hci_uart_proto *p);
+
+int hci_uart_register_device_priv(struct hci_uart *hu,
+ const struct hci_uart_proto *p,
+ int sizeof_priv);
+
+static inline int hci_uart_register_device(struct hci_uart *hu,
+ const struct hci_uart_proto *p)
+{
+ return hci_uart_register_device_priv(hu, p, 0);
+}
+
void hci_uart_unregister_device(struct hci_uart *hu);
int hci_uart_tx_wakeup(struct hci_uart *hu);
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index e6742998f372..d5e7fa9173a1 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -186,11 +186,12 @@ config SUNXI_RSB
config TEGRA_ACONNECT
tristate "Tegra ACONNECT Bus Driver"
- depends on ARCH_TEGRA_210_SOC
+ depends on ARCH_TEGRA
depends on OF && PM
help
Driver for the Tegra ACONNECT bus which is used to interface with
- the devices inside the Audio Processing Engine (APE) for Tegra210.
+ the devices inside the Audio Processing Engine (APE) for
+ Tegra210 and later.
config TEGRA_GMI
tristate "Tegra Generic Memory Interface bus driver"
diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c
index 6b5da73c8541..837bf9d51c6e 100644
--- a/drivers/bus/imx-weim.c
+++ b/drivers/bus/imx-weim.c
@@ -120,7 +120,7 @@ static int imx_weim_gpr_setup(struct platform_device *pdev)
i++;
}
- if (i == 0 || i % 4)
+ if (i == 0)
goto err;
for (i = 0; i < ARRAY_SIZE(gprvals); i++) {
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index fd3e9d82340a..1e29ba76615d 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -128,7 +128,7 @@ struct sunxi_rsb {
};
/* bus / slave device related functions */
-static struct bus_type sunxi_rsb_bus;
+static const struct bus_type sunxi_rsb_bus;
static int sunxi_rsb_device_match(struct device *dev, struct device_driver *drv)
{
@@ -177,7 +177,7 @@ static int sunxi_rsb_device_modalias(const struct device *dev, struct kobj_ueven
return of_device_uevent_modalias(dev, env);
}
-static struct bus_type sunxi_rsb_bus = {
+static const struct bus_type sunxi_rsb_bus = {
.name = RSB_CTRL_NAME,
.match = sunxi_rsb_device_match,
.probe = sunxi_rsb_device_probe,
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 245e5e827d0d..41d33f39efe5 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -2400,7 +2400,7 @@ static int sysc_child_add_clocks(struct sysc *ddata,
return 0;
}
-static struct device_type sysc_device_type = {
+static const struct device_type sysc_device_type = {
};
static struct sysc *sysc_child_to_parent(struct device *dev)
diff --git a/drivers/cache/ax45mp_cache.c b/drivers/cache/ax45mp_cache.c
index 57186c58dc84..1d7dd3d2c101 100644
--- a/drivers/cache/ax45mp_cache.c
+++ b/drivers/cache/ax45mp_cache.c
@@ -129,8 +129,12 @@ static void ax45mp_dma_cache_wback(phys_addr_t paddr, size_t size)
unsigned long line_size;
unsigned long flags;
+ if (unlikely(start == end))
+ return;
+
line_size = ax45mp_priv.ax45mp_cache_line_size;
start = start & (~(line_size - 1));
+ end = ((end + line_size - 1) & (~(line_size - 1)));
local_irq_save(flags);
ax45mp_cpu_dcache_wb_range(start, end);
local_irq_restore(flags);
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index d668b174ace9..eefdd422ad8e 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -724,11 +724,6 @@ static void probe_gdrom_setupdisk(void)
static int probe_gdrom_setupqueue(void)
{
- blk_queue_logical_block_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
- /* using DMA so memory will need to be contiguous */
- blk_queue_max_segments(gd.gdrom_rq, 1);
- /* set a large max size to get most from DMA */
- blk_queue_max_segment_size(gd.gdrom_rq, 0x40000);
gd.disk->queue = gd.gdrom_rq;
return gdrom_init_dma_mode();
}
@@ -743,6 +738,13 @@ static const struct blk_mq_ops gdrom_mq_ops = {
*/
static int probe_gdrom(struct platform_device *devptr)
{
+ struct queue_limits lim = {
+ .logical_block_size = GDROM_HARD_SECTOR,
+ /* using DMA so memory will need to be contiguous */
+ .max_segments = 1,
+ /* set a large max size to get most from DMA */
+ .max_segment_size = 0x40000,
+ };
int err;
/*
@@ -778,7 +780,7 @@ static int probe_gdrom(struct platform_device *devptr)
if (err)
goto probe_fail_free_cd_info;
- gd.disk = blk_mq_alloc_disk(&gd.tag_set, NULL);
+ gd.disk = blk_mq_alloc_disk(&gd.tag_set, &lim, NULL);
if (IS_ERR(gd.disk)) {
err = PTR_ERR(gd.disk);
goto probe_fail_free_tag_set;
@@ -829,7 +831,7 @@ probe_fail_no_mem:
return err;
}
-static int remove_gdrom(struct platform_device *devptr)
+static void remove_gdrom(struct platform_device *devptr)
{
blk_mq_free_tag_set(&gd.tag_set);
free_irq(HW_EVENT_GDROM_CMD, &gd);
@@ -840,13 +842,11 @@ static int remove_gdrom(struct platform_device *devptr)
unregister_cdrom(gd.cd_info);
kfree(gd.cd_info);
kfree(gd.toc);
-
- return 0;
}
static struct platform_driver gdrom_driver = {
.probe = probe_gdrom,
- .remove = remove_gdrom,
+ .remove_new = remove_gdrom,
.driver = {
.name = GDROM_DEV_NAME,
},
diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c
index 6994165e0395..0b60ae78f9d8 100644
--- a/drivers/clk/rockchip/clk-rk3588.c
+++ b/drivers/clk/rockchip/clk-rk3588.c
@@ -2458,15 +2458,18 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = {
static void __init rk3588_clk_init(struct device_node *np)
{
struct rockchip_clk_provider *ctx;
+ unsigned long clk_nr_clks;
void __iomem *reg_base;
+ clk_nr_clks = rockchip_clk_find_max_clk_id(rk3588_clk_branches,
+ ARRAY_SIZE(rk3588_clk_branches)) + 1;
reg_base = of_iomap(np, 0);
if (!reg_base) {
pr_err("%s: could not map cru region\n", __func__);
return;
}
- ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
+ ctx = rockchip_clk_init(np, reg_base, clk_nr_clks);
if (IS_ERR(ctx)) {
pr_err("%s: rockchip clk init failed\n", __func__);
iounmap(reg_base);
diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index 4059d9365ae6..73d2cbdc716b 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -429,6 +429,23 @@ void rockchip_clk_register_plls(struct rockchip_clk_provider *ctx,
}
EXPORT_SYMBOL_GPL(rockchip_clk_register_plls);
+unsigned long rockchip_clk_find_max_clk_id(struct rockchip_clk_branch *list,
+ unsigned int nr_clk)
+{
+ unsigned long max = 0;
+ unsigned int idx;
+
+ for (idx = 0; idx < nr_clk; idx++, list++) {
+ if (list->id > max)
+ max = list->id;
+ if (list->child && list->child->id > max)
+ max = list->id;
+ }
+
+ return max;
+}
+EXPORT_SYMBOL_GPL(rockchip_clk_find_max_clk_id);
+
void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx,
struct rockchip_clk_branch *list,
unsigned int nr_clk)
diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h
index 758ebaf2236b..fd3b476dedda 100644
--- a/drivers/clk/rockchip/clk.h
+++ b/drivers/clk/rockchip/clk.h
@@ -973,6 +973,8 @@ struct rockchip_clk_provider *rockchip_clk_init(struct device_node *np,
void __iomem *base, unsigned long nr_clks);
void rockchip_clk_of_add_provider(struct device_node *np,
struct rockchip_clk_provider *ctx);
+unsigned long rockchip_clk_find_max_clk_id(struct rockchip_clk_branch *list,
+ unsigned int nr_clk);
void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx,
struct rockchip_clk_branch *list,
unsigned int nr_clk);
diff --git a/drivers/clk/samsung/clk-gs101.c b/drivers/clk/samsung/clk-gs101.c
index 0964bb11657f..782993951fff 100644
--- a/drivers/clk/samsung/clk-gs101.c
+++ b/drivers/clk/samsung/clk-gs101.c
@@ -2475,7 +2475,7 @@ static const struct samsung_cmu_info misc_cmu_info __initconst = {
.nr_clk_ids = CLKS_NR_MISC,
.clk_regs = misc_clk_regs,
.nr_clk_regs = ARRAY_SIZE(misc_clk_regs),
- .clk_name = "dout_cmu_misc_bus",
+ .clk_name = "bus",
};
/* ---- platform_driver ----------------------------------------------------- */
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index e054de92de91..8d4a52056684 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1807,7 +1807,7 @@ TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init);
#endif
int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *ts,
- struct clocksource **cs)
+ enum clocksource_ids *cs_id)
{
struct arm_smccc_res hvc_res;
u32 ptp_counter;
@@ -1831,8 +1831,8 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *ts,
*ts = ktime_to_timespec64(ktime);
if (cycle)
*cycle = (u64)hvc_res.a2 << 32 | hvc_res.a3;
- if (cs)
- *cs = &clocksource_counter;
+ if (cs_id)
+ *cs_id = CSID_ARM_ARCH_COUNTER;
return 0;
}
diff --git a/drivers/comedi/drivers/comedi_8255.c b/drivers/comedi/drivers/comedi_8255.c
index e4974b508328..a933ef53845a 100644
--- a/drivers/comedi/drivers/comedi_8255.c
+++ b/drivers/comedi/drivers/comedi_8255.c
@@ -159,6 +159,7 @@ static int __subdev_8255_init(struct comedi_device *dev,
return -ENOMEM;
spriv->context = context;
+ spriv->io = io;
s->type = COMEDI_SUBD_DIO;
s->subdev_flags = SDF_READABLE | SDF_WRITABLE;
diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c
index 30ea8b53ebf8..05ae9122823f 100644
--- a/drivers/comedi/drivers/comedi_test.c
+++ b/drivers/comedi/drivers/comedi_test.c
@@ -87,6 +87,8 @@ struct waveform_private {
struct comedi_device *dev; /* parent comedi device */
u64 ao_last_scan_time; /* time of previous AO scan in usec */
unsigned int ao_scan_period; /* AO scan period in usec */
+ bool ai_timer_enable:1; /* should AI timer be running? */
+ bool ao_timer_enable:1; /* should AO timer be running? */
unsigned short ao_loopbacks[N_CHANS];
};
@@ -236,8 +238,12 @@ static void waveform_ai_timer(struct timer_list *t)
time_increment = devpriv->ai_convert_time - now;
else
time_increment = 1;
- mod_timer(&devpriv->ai_timer,
- jiffies + usecs_to_jiffies(time_increment));
+ spin_lock(&dev->spinlock);
+ if (devpriv->ai_timer_enable) {
+ mod_timer(&devpriv->ai_timer,
+ jiffies + usecs_to_jiffies(time_increment));
+ }
+ spin_unlock(&dev->spinlock);
}
overrun:
@@ -393,9 +399,12 @@ static int waveform_ai_cmd(struct comedi_device *dev,
* Seem to need an extra jiffy here, otherwise timer expires slightly
* early!
*/
+ spin_lock_bh(&dev->spinlock);
+ devpriv->ai_timer_enable = true;
devpriv->ai_timer.expires =
jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1;
add_timer(&devpriv->ai_timer);
+ spin_unlock_bh(&dev->spinlock);
return 0;
}
@@ -404,6 +413,9 @@ static int waveform_ai_cancel(struct comedi_device *dev,
{
struct waveform_private *devpriv = dev->private;
+ spin_lock_bh(&dev->spinlock);
+ devpriv->ai_timer_enable = false;
+ spin_unlock_bh(&dev->spinlock);
if (in_softirq()) {
/* Assume we were called from the timer routine itself. */
del_timer(&devpriv->ai_timer);
@@ -495,8 +507,12 @@ static void waveform_ao_timer(struct timer_list *t)
unsigned int time_inc = devpriv->ao_last_scan_time +
devpriv->ao_scan_period - now;
- mod_timer(&devpriv->ao_timer,
- jiffies + usecs_to_jiffies(time_inc));
+ spin_lock(&dev->spinlock);
+ if (devpriv->ao_timer_enable) {
+ mod_timer(&devpriv->ao_timer,
+ jiffies + usecs_to_jiffies(time_inc));
+ }
+ spin_unlock(&dev->spinlock);
}
underrun:
@@ -517,9 +533,12 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev,
async->inttrig = NULL;
devpriv->ao_last_scan_time = ktime_to_us(ktime_get());
+ spin_lock_bh(&dev->spinlock);
+ devpriv->ao_timer_enable = true;
devpriv->ao_timer.expires =
jiffies + usecs_to_jiffies(devpriv->ao_scan_period);
add_timer(&devpriv->ao_timer);
+ spin_unlock_bh(&dev->spinlock);
return 1;
}
@@ -604,6 +623,9 @@ static int waveform_ao_cancel(struct comedi_device *dev,
struct waveform_private *devpriv = dev->private;
s->async->inttrig = NULL;
+ spin_lock_bh(&dev->spinlock);
+ devpriv->ao_timer_enable = false;
+ spin_unlock_bh(&dev->spinlock);
if (in_softirq()) {
/* Assume we were called from the timer routine itself. */
del_timer(&devpriv->ao_timer);
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 3d5e6d705fc6..44b19e696176 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -108,9 +108,8 @@ static inline void send_msg(struct cn_msg *msg)
filter_data[1] = 0;
}
- if (cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT,
- cn_filter, (void *)filter_data) == -ESRCH)
- atomic_set(&proc_event_num_listeners, 0);
+ cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT,
+ cn_filter, (void *)filter_data);
local_unlock(&local_event.lock);
}
diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c
index 09c77afb33ca..3f24481fc04a 100644
--- a/drivers/counter/counter-core.c
+++ b/drivers/counter/counter-core.c
@@ -31,10 +31,11 @@ struct counter_device_allochelper {
struct counter_device counter;
/*
- * This is cache line aligned to ensure private data behaves like if it
- * were kmalloced separately.
+ * This ensures private data behaves like if it were kmalloced
+ * separately. Also ensures the minimum alignment for safe DMA
+ * operations (which may or may not mean cache alignment).
*/
- unsigned long privdata[] ____cacheline_aligned;
+ unsigned long privdata[] __aligned(ARCH_DMA_MINALIGN);
};
static void counter_device_release(struct device *dev)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 1f6186475715..1791d37fbc53 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1232,14 +1232,13 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+ WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
+ WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
+
max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
cpudata->max_limit_perf);
min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
cpudata->max_limit_perf);
-
- WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
- WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
-
value = READ_ONCE(cpudata->cppc_req_cached);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2ca70b0b5fdc..79619227ea51 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -529,6 +529,30 @@ static int intel_pstate_cppc_get_scaling(int cpu)
}
#endif /* CONFIG_ACPI_CPPC_LIB */
+static int intel_pstate_freq_to_hwp_rel(struct cpudata *cpu, int freq,
+ unsigned int relation)
+{
+ if (freq == cpu->pstate.turbo_freq)
+ return cpu->pstate.turbo_pstate;
+
+ if (freq == cpu->pstate.max_freq)
+ return cpu->pstate.max_pstate;
+
+ switch (relation) {
+ case CPUFREQ_RELATION_H:
+ return freq / cpu->pstate.scaling;
+ case CPUFREQ_RELATION_C:
+ return DIV_ROUND_CLOSEST(freq, cpu->pstate.scaling);
+ }
+
+ return DIV_ROUND_UP(freq, cpu->pstate.scaling);
+}
+
+static int intel_pstate_freq_to_hwp(struct cpudata *cpu, int freq)
+{
+ return intel_pstate_freq_to_hwp_rel(cpu, freq, CPUFREQ_RELATION_L);
+}
+
/**
* intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels.
* @cpu: Target CPU.
@@ -546,6 +570,7 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu);
int scaling = cpu->pstate.scaling;
+ int freq;
pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
@@ -559,16 +584,16 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
perf_ctl_scaling);
- cpu->pstate.max_pstate_physical =
- DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling,
- scaling);
+ freq = perf_ctl_max_phys * perf_ctl_scaling;
+ cpu->pstate.max_pstate_physical = intel_pstate_freq_to_hwp(cpu, freq);
- cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
+ freq = cpu->pstate.min_pstate * perf_ctl_scaling;
+ cpu->pstate.min_freq = freq;
/*
* Cast the min P-state value retrieved via pstate_funcs.get_min() to
* the effective range of HWP performance levels.
*/
- cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling);
+ cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq);
}
static inline void update_turbo_state(void)
@@ -2528,13 +2553,12 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu,
* abstract values to represent performance rather than pure ratios.
*/
if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) {
- int scaling = cpu->pstate.scaling;
int freq;
freq = max_policy_perf * perf_ctl_scaling;
- max_policy_perf = DIV_ROUND_UP(freq, scaling);
+ max_policy_perf = intel_pstate_freq_to_hwp(cpu, freq);
freq = min_policy_perf * perf_ctl_scaling;
- min_policy_perf = DIV_ROUND_UP(freq, scaling);
+ min_policy_perf = intel_pstate_freq_to_hwp(cpu, freq);
}
pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n",
@@ -2908,18 +2932,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
cpufreq_freq_transition_begin(policy, &freqs);
- switch (relation) {
- case CPUFREQ_RELATION_L:
- target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
- break;
- case CPUFREQ_RELATION_H:
- target_pstate = freqs.new / cpu->pstate.scaling;
- break;
- default:
- target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
- break;
- }
-
+ target_pstate = intel_pstate_freq_to_hwp_rel(cpu, freqs.new, relation);
target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false);
freqs.new = target_pstate * cpu->pstate.scaling;
@@ -2937,7 +2950,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
update_turbo_state();
- target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
+ target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq);
target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true);
@@ -2974,6 +2987,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum,
if (min_pstate < cpu->min_perf_ratio)
min_pstate = cpu->min_perf_ratio;
+ if (min_pstate > cpu->max_perf_ratio)
+ min_pstate = cpu->max_perf_ratio;
+
max_pstate = min(cap_pstate, cpu->max_perf_ratio);
if (max_pstate < min_pstate)
max_pstate = min_pstate;
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index 1262a7773ef3..de50c00ba218 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -299,22 +299,6 @@ theend:
return err;
}
-static void sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq)
-{
- struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(breq);
- struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
- struct sun8i_ce_dev *ce = op->ce;
- struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(breq);
- int flow, err;
-
- flow = rctx->flow;
- err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm));
- local_bh_disable();
- crypto_finalize_skcipher_request(engine, breq, err);
- local_bh_enable();
-}
-
static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine,
void *async_req)
{
@@ -360,6 +344,23 @@ static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine,
dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE);
}
+static void sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq)
+{
+ struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(breq);
+ struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct sun8i_ce_dev *ce = op->ce;
+ struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(breq);
+ int flow, err;
+
+ flow = rctx->flow;
+ err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm));
+ sun8i_ce_cipher_unprepare(engine, areq);
+ local_bh_disable();
+ crypto_finalize_skcipher_request(engine, breq, err);
+ local_bh_enable();
+}
+
int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq)
{
int err = sun8i_ce_cipher_prepare(engine, areq);
@@ -368,7 +369,6 @@ int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq)
return err;
sun8i_ce_cipher_run(engine, areq);
- sun8i_ce_cipher_unprepare(engine, areq);
return 0;
}
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index a148ff1f0872..a4f6884416a0 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -4545,6 +4545,7 @@ struct caam_hash_alg {
struct list_head entry;
struct device *dev;
int alg_type;
+ bool is_hmac;
struct ahash_alg ahash_alg;
};
@@ -4571,7 +4572,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
ctx->dev = caam_hash->dev;
- if (alg->setkey) {
+ if (caam_hash->is_hmac) {
ctx->adata.key_dma = dma_map_single_attrs(ctx->dev, ctx->key,
ARRAY_SIZE(ctx->key),
DMA_TO_DEVICE,
@@ -4611,7 +4612,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
* For keyed hash algorithms shared descriptors
* will be created later in setkey() callback
*/
- return alg->setkey ? 0 : ahash_set_sh_desc(ahash);
+ return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash);
}
static void caam_hash_cra_exit(struct crypto_tfm *tfm)
@@ -4646,12 +4647,14 @@ static struct caam_hash_alg *caam_hash_alloc(struct device *dev,
template->hmac_name);
snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
template->hmac_driver_name);
+ t_alg->is_hmac = true;
} else {
snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
template->name);
snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
template->driver_name);
t_alg->ahash_alg.setkey = NULL;
+ t_alg->is_hmac = false;
}
alg->cra_module = THIS_MODULE;
alg->cra_init = caam_hash_cra_init;
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 290c8500c247..fdd724228c2f 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -1753,6 +1753,7 @@ static struct caam_hash_template driver_hash[] = {
struct caam_hash_alg {
struct list_head entry;
int alg_type;
+ bool is_hmac;
struct ahash_engine_alg ahash_alg;
};
@@ -1804,7 +1805,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
} else {
if (priv->era >= 6) {
ctx->dir = DMA_BIDIRECTIONAL;
- ctx->key_dir = alg->setkey ? DMA_TO_DEVICE : DMA_NONE;
+ ctx->key_dir = caam_hash->is_hmac ? DMA_TO_DEVICE : DMA_NONE;
} else {
ctx->dir = DMA_TO_DEVICE;
ctx->key_dir = DMA_NONE;
@@ -1862,7 +1863,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
* For keyed hash algorithms shared descriptors
* will be created later in setkey() callback
*/
- return alg->setkey ? 0 : ahash_set_sh_desc(ahash);
+ return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash);
}
static void caam_hash_cra_exit(struct crypto_tfm *tfm)
@@ -1915,12 +1916,14 @@ caam_hash_alloc(struct caam_hash_template *template,
template->hmac_name);
snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
template->hmac_driver_name);
+ t_alg->is_hmac = true;
} else {
snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
template->name);
snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
template->driver_name);
halg->setkey = NULL;
+ t_alg->is_hmac = false;
}
alg->cra_module = THIS_MODULE;
alg->cra_init = caam_hash_cra_init;
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index 32268e239bf1..f394e45e11ab 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -38,7 +38,7 @@ config CRYPTO_DEV_CCP_CRYPTO
config CRYPTO_DEV_SP_PSP
bool "Platform Security Processor (PSP) device"
default y
- depends on CRYPTO_DEV_CCP_DD && X86_64
+ depends on CRYPTO_DEV_CCP_DD && X86_64 && AMD_IOMMU
help
Provide support for the AMD Platform Security Processor (PSP).
The PSP is a dedicated processor that provides support for key
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index e4d3f45242f6..f44efbb89c34 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -21,14 +21,18 @@
#include <linux/hw_random.h>
#include <linux/ccp.h>
#include <linux/firmware.h>
+#include <linux/panic_notifier.h>
#include <linux/gfp.h>
#include <linux/cpufeature.h>
#include <linux/fs.h>
#include <linux/fs_struct.h>
#include <linux/psp.h>
+#include <linux/amd-iommu.h>
#include <asm/smp.h>
#include <asm/cacheflush.h>
+#include <asm/e820/types.h>
+#include <asm/sev.h>
#include "psp-dev.h"
#include "sev-dev.h"
@@ -37,6 +41,19 @@
#define SEV_FW_FILE "amd/sev.fw"
#define SEV_FW_NAME_SIZE 64
+/* Minimum firmware version required for the SEV-SNP support */
+#define SNP_MIN_API_MAJOR 1
+#define SNP_MIN_API_MINOR 51
+
+/*
+ * Maximum number of firmware-writable buffers that might be specified
+ * in the parameters of a legacy SEV command buffer.
+ */
+#define CMD_BUF_FW_WRITABLE_MAX 2
+
+/* Leave room in the descriptor array for an end-of-list indicator. */
+#define CMD_BUF_DESC_MAX (CMD_BUF_FW_WRITABLE_MAX + 1)
+
static DEFINE_MUTEX(sev_cmd_mutex);
static struct sev_misc_dev *misc_dev;
@@ -68,9 +85,14 @@ static int psp_timeout;
* The TMR is a 1MB area that must be 1MB aligned. Use the page allocator
* to allocate the memory, which will return aligned memory for the specified
* allocation order.
+ *
+ * When SEV-SNP is enabled the TMR needs to be 2MB aligned and 2MB sized.
*/
-#define SEV_ES_TMR_SIZE (1024 * 1024)
+#define SEV_TMR_SIZE (1024 * 1024)
+#define SNP_TMR_SIZE (2 * 1024 * 1024)
+
static void *sev_es_tmr;
+static size_t sev_es_tmr_size = SEV_TMR_SIZE;
/* INIT_EX NV Storage:
* The NV Storage is a 32Kb area and must be 4Kb page aligned. Use the page
@@ -80,6 +102,13 @@ static void *sev_es_tmr;
#define NV_LENGTH (32 * 1024)
static void *sev_init_ex_buffer;
+/*
+ * SEV_DATA_RANGE_LIST:
+ * Array containing range of pages that firmware transitions to HV-fixed
+ * page state.
+ */
+static struct sev_data_range_list *snp_range_list;
+
static inline bool sev_version_greater_or_equal(u8 maj, u8 min)
{
struct sev_device *sev = psp_master->sev_data;
@@ -115,6 +144,25 @@ static int sev_wait_cmd_ioc(struct sev_device *sev,
{
int ret;
+ /*
+ * If invoked during panic handling, local interrupts are disabled,
+ * so the PSP command completion interrupt can't be used. Poll for
+ * PSP command completion instead.
+ */
+ if (irqs_disabled()) {
+ unsigned long timeout_usecs = (timeout * USEC_PER_SEC) / 10;
+
+ /* Poll for SEV command completion: */
+ while (timeout_usecs--) {
+ *reg = ioread32(sev->io_regs + sev->vdata->cmdresp_reg);
+ if (*reg & PSP_CMDRESP_RESP)
+ return 0;
+
+ udelay(10);
+ }
+ return -ETIMEDOUT;
+ }
+
ret = wait_event_timeout(sev->int_queue,
sev->int_rcvd, timeout * HZ);
if (!ret)
@@ -130,6 +178,8 @@ static int sev_cmd_buffer_len(int cmd)
switch (cmd) {
case SEV_CMD_INIT: return sizeof(struct sev_data_init);
case SEV_CMD_INIT_EX: return sizeof(struct sev_data_init_ex);
+ case SEV_CMD_SNP_SHUTDOWN_EX: return sizeof(struct sev_data_snp_shutdown_ex);
+ case SEV_CMD_SNP_INIT_EX: return sizeof(struct sev_data_snp_init_ex);
case SEV_CMD_PLATFORM_STATUS: return sizeof(struct sev_user_data_status);
case SEV_CMD_PEK_CSR: return sizeof(struct sev_data_pek_csr);
case SEV_CMD_PEK_CERT_IMPORT: return sizeof(struct sev_data_pek_cert_import);
@@ -158,23 +208,27 @@ static int sev_cmd_buffer_len(int cmd)
case SEV_CMD_GET_ID: return sizeof(struct sev_data_get_id);
case SEV_CMD_ATTESTATION_REPORT: return sizeof(struct sev_data_attestation_report);
case SEV_CMD_SEND_CANCEL: return sizeof(struct sev_data_send_cancel);
+ case SEV_CMD_SNP_GCTX_CREATE: return sizeof(struct sev_data_snp_addr);
+ case SEV_CMD_SNP_LAUNCH_START: return sizeof(struct sev_data_snp_launch_start);
+ case SEV_CMD_SNP_LAUNCH_UPDATE: return sizeof(struct sev_data_snp_launch_update);
+ case SEV_CMD_SNP_ACTIVATE: return sizeof(struct sev_data_snp_activate);
+ case SEV_CMD_SNP_DECOMMISSION: return sizeof(struct sev_data_snp_addr);
+ case SEV_CMD_SNP_PAGE_RECLAIM: return sizeof(struct sev_data_snp_page_reclaim);
+ case SEV_CMD_SNP_GUEST_STATUS: return sizeof(struct sev_data_snp_guest_status);
+ case SEV_CMD_SNP_LAUNCH_FINISH: return sizeof(struct sev_data_snp_launch_finish);
+ case SEV_CMD_SNP_DBG_DECRYPT: return sizeof(struct sev_data_snp_dbg);
+ case SEV_CMD_SNP_DBG_ENCRYPT: return sizeof(struct sev_data_snp_dbg);
+ case SEV_CMD_SNP_PAGE_UNSMASH: return sizeof(struct sev_data_snp_page_unsmash);
+ case SEV_CMD_SNP_PLATFORM_STATUS: return sizeof(struct sev_data_snp_addr);
+ case SEV_CMD_SNP_GUEST_REQUEST: return sizeof(struct sev_data_snp_guest_request);
+ case SEV_CMD_SNP_CONFIG: return sizeof(struct sev_user_data_snp_config);
+ case SEV_CMD_SNP_COMMIT: return sizeof(struct sev_data_snp_commit);
default: return 0;
}
return 0;
}
-static void *sev_fw_alloc(unsigned long len)
-{
- struct page *page;
-
- page = alloc_pages(GFP_KERNEL, get_order(len));
- if (!page)
- return NULL;
-
- return page_address(page);
-}
-
static struct file *open_file_as_root(const char *filename, int flags, umode_t mode)
{
struct file *fp;
@@ -305,13 +359,485 @@ static int sev_write_init_ex_file_if_required(int cmd_id)
return sev_write_init_ex_file();
}
+/*
+ * snp_reclaim_pages() needs __sev_do_cmd_locked(), and __sev_do_cmd_locked()
+ * needs snp_reclaim_pages(), so a forward declaration is needed.
+ */
+static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret);
+
+static int snp_reclaim_pages(unsigned long paddr, unsigned int npages, bool locked)
+{
+ int ret, err, i;
+
+ paddr = __sme_clr(ALIGN_DOWN(paddr, PAGE_SIZE));
+
+ for (i = 0; i < npages; i++, paddr += PAGE_SIZE) {
+ struct sev_data_snp_page_reclaim data = {0};
+
+ data.paddr = paddr;
+
+ if (locked)
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_PAGE_RECLAIM, &data, &err);
+ else
+ ret = sev_do_cmd(SEV_CMD_SNP_PAGE_RECLAIM, &data, &err);
+
+ if (ret)
+ goto cleanup;
+
+ ret = rmp_make_shared(__phys_to_pfn(paddr), PG_LEVEL_4K);
+ if (ret)
+ goto cleanup;
+ }
+
+ return 0;
+
+cleanup:
+ /*
+ * If there was a failure reclaiming the page then it is no longer safe
+ * to release it back to the system; leak it instead.
+ */
+ snp_leak_pages(__phys_to_pfn(paddr), npages - i);
+ return ret;
+}
+
+static int rmp_mark_pages_firmware(unsigned long paddr, unsigned int npages, bool locked)
+{
+ unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT;
+ int rc, i;
+
+ for (i = 0; i < npages; i++, pfn++) {
+ rc = rmp_make_private(pfn, 0, PG_LEVEL_4K, 0, true);
+ if (rc)
+ goto cleanup;
+ }
+
+ return 0;
+
+cleanup:
+ /*
+ * Try unrolling the firmware state changes by
+ * reclaiming the pages which were already changed to the
+ * firmware state.
+ */
+ snp_reclaim_pages(paddr, i, locked);
+
+ return rc;
+}
+
+static struct page *__snp_alloc_firmware_pages(gfp_t gfp_mask, int order)
+{
+ unsigned long npages = 1ul << order, paddr;
+ struct sev_device *sev;
+ struct page *page;
+
+ if (!psp_master || !psp_master->sev_data)
+ return NULL;
+
+ page = alloc_pages(gfp_mask, order);
+ if (!page)
+ return NULL;
+
+ /* If SEV-SNP is initialized then add the page in RMP table. */
+ sev = psp_master->sev_data;
+ if (!sev->snp_initialized)
+ return page;
+
+ paddr = __pa((unsigned long)page_address(page));
+ if (rmp_mark_pages_firmware(paddr, npages, false))
+ return NULL;
+
+ return page;
+}
+
+void *snp_alloc_firmware_page(gfp_t gfp_mask)
+{
+ struct page *page;
+
+ page = __snp_alloc_firmware_pages(gfp_mask, 0);
+
+ return page ? page_address(page) : NULL;
+}
+EXPORT_SYMBOL_GPL(snp_alloc_firmware_page);
+
+static void __snp_free_firmware_pages(struct page *page, int order, bool locked)
+{
+ struct sev_device *sev = psp_master->sev_data;
+ unsigned long paddr, npages = 1ul << order;
+
+ if (!page)
+ return;
+
+ paddr = __pa((unsigned long)page_address(page));
+ if (sev->snp_initialized &&
+ snp_reclaim_pages(paddr, npages, locked))
+ return;
+
+ __free_pages(page, order);
+}
+
+void snp_free_firmware_page(void *addr)
+{
+ if (!addr)
+ return;
+
+ __snp_free_firmware_pages(virt_to_page(addr), 0, false);
+}
+EXPORT_SYMBOL_GPL(snp_free_firmware_page);
+
+static void *sev_fw_alloc(unsigned long len)
+{
+ struct page *page;
+
+ page = __snp_alloc_firmware_pages(GFP_KERNEL, get_order(len));
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
+/**
+ * struct cmd_buf_desc - descriptors for managing legacy SEV command address
+ * parameters corresponding to buffers that may be written to by firmware.
+ *
+ * @paddr_ptr: pointer to the address parameter in the command buffer which may
+ * need to be saved/restored depending on whether a bounce buffer
+ * is used. In the case of a bounce buffer, the command buffer
+ * needs to be updated with the address of the new bounce buffer
+ * snp_map_cmd_buf_desc() has allocated specifically for it. Must
+ * be NULL if this descriptor is only an end-of-list indicator.
+ *
+ * @paddr_orig: storage for the original address parameter, which can be used to
+ * restore the original value in @paddr_ptr in cases where it is
+ * replaced with the address of a bounce buffer.
+ *
+ * @len: length of buffer located at the address originally stored at @paddr_ptr
+ *
+ * @guest_owned: true if the address corresponds to guest-owned pages, in which
+ * case bounce buffers are not needed.
+ */
+struct cmd_buf_desc {
+ u64 *paddr_ptr;
+ u64 paddr_orig;
+ u32 len;
+ bool guest_owned;
+};
+
+/*
+ * If a legacy SEV command parameter is a memory address, those pages in
+ * turn need to be transitioned to/from firmware-owned before/after
+ * executing the firmware command.
+ *
+ * Additionally, in cases where those pages are not guest-owned, a bounce
+ * buffer is needed in place of the original memory address parameter.
+ *
+ * A set of descriptors are used to keep track of this handling, and
+ * initialized here based on the specific commands being executed.
+ */
+static void snp_populate_cmd_buf_desc_list(int cmd, void *cmd_buf,
+ struct cmd_buf_desc *desc_list)
+{
+ switch (cmd) {
+ case SEV_CMD_PDH_CERT_EXPORT: {
+ struct sev_data_pdh_cert_export *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->pdh_cert_address;
+ desc_list[0].len = data->pdh_cert_len;
+ desc_list[1].paddr_ptr = &data->cert_chain_address;
+ desc_list[1].len = data->cert_chain_len;
+ break;
+ }
+ case SEV_CMD_GET_ID: {
+ struct sev_data_get_id *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ break;
+ }
+ case SEV_CMD_PEK_CSR: {
+ struct sev_data_pek_csr *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ break;
+ }
+ case SEV_CMD_LAUNCH_UPDATE_DATA: {
+ struct sev_data_launch_update_data *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_LAUNCH_UPDATE_VMSA: {
+ struct sev_data_launch_update_vmsa *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_LAUNCH_MEASURE: {
+ struct sev_data_launch_measure *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ break;
+ }
+ case SEV_CMD_LAUNCH_UPDATE_SECRET: {
+ struct sev_data_launch_secret *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->guest_address;
+ desc_list[0].len = data->guest_len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_DBG_DECRYPT: {
+ struct sev_data_dbg *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->dst_addr;
+ desc_list[0].len = data->len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_DBG_ENCRYPT: {
+ struct sev_data_dbg *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->dst_addr;
+ desc_list[0].len = data->len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_ATTESTATION_REPORT: {
+ struct sev_data_attestation_report *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ break;
+ }
+ case SEV_CMD_SEND_START: {
+ struct sev_data_send_start *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->session_address;
+ desc_list[0].len = data->session_len;
+ break;
+ }
+ case SEV_CMD_SEND_UPDATE_DATA: {
+ struct sev_data_send_update_data *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->hdr_address;
+ desc_list[0].len = data->hdr_len;
+ desc_list[1].paddr_ptr = &data->trans_address;
+ desc_list[1].len = data->trans_len;
+ break;
+ }
+ case SEV_CMD_SEND_UPDATE_VMSA: {
+ struct sev_data_send_update_vmsa *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->hdr_address;
+ desc_list[0].len = data->hdr_len;
+ desc_list[1].paddr_ptr = &data->trans_address;
+ desc_list[1].len = data->trans_len;
+ break;
+ }
+ case SEV_CMD_RECEIVE_UPDATE_DATA: {
+ struct sev_data_receive_update_data *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->guest_address;
+ desc_list[0].len = data->guest_len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_RECEIVE_UPDATE_VMSA: {
+ struct sev_data_receive_update_vmsa *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->guest_address;
+ desc_list[0].len = data->guest_len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static int snp_map_cmd_buf_desc(struct cmd_buf_desc *desc)
+{
+ unsigned int npages;
+
+ if (!desc->len)
+ return 0;
+
+ /* Allocate a bounce buffer if this isn't a guest owned page. */
+ if (!desc->guest_owned) {
+ struct page *page;
+
+ page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(desc->len));
+ if (!page) {
+ pr_warn("Failed to allocate bounce buffer for SEV legacy command.\n");
+ return -ENOMEM;
+ }
+
+ desc->paddr_orig = *desc->paddr_ptr;
+ *desc->paddr_ptr = __psp_pa(page_to_virt(page));
+ }
+
+ npages = PAGE_ALIGN(desc->len) >> PAGE_SHIFT;
+
+ /* Transition the buffer to firmware-owned. */
+ if (rmp_mark_pages_firmware(*desc->paddr_ptr, npages, true)) {
+ pr_warn("Error moving pages to firmware-owned state for SEV legacy command.\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int snp_unmap_cmd_buf_desc(struct cmd_buf_desc *desc)
+{
+ unsigned int npages;
+
+ if (!desc->len)
+ return 0;
+
+ npages = PAGE_ALIGN(desc->len) >> PAGE_SHIFT;
+
+ /* Transition the buffers back to hypervisor-owned. */
+ if (snp_reclaim_pages(*desc->paddr_ptr, npages, true)) {
+ pr_warn("Failed to reclaim firmware-owned pages while issuing SEV legacy command.\n");
+ return -EFAULT;
+ }
+
+ /* Copy data from bounce buffer and then free it. */
+ if (!desc->guest_owned) {
+ void *bounce_buf = __va(__sme_clr(*desc->paddr_ptr));
+ void *dst_buf = __va(__sme_clr(desc->paddr_orig));
+
+ memcpy(dst_buf, bounce_buf, desc->len);
+ __free_pages(virt_to_page(bounce_buf), get_order(desc->len));
+
+ /* Restore the original address in the command buffer. */
+ *desc->paddr_ptr = desc->paddr_orig;
+ }
+
+ return 0;
+}
+
+static int snp_map_cmd_buf_desc_list(int cmd, void *cmd_buf, struct cmd_buf_desc *desc_list)
+{
+ int i;
+
+ snp_populate_cmd_buf_desc_list(cmd, cmd_buf, desc_list);
+
+ for (i = 0; i < CMD_BUF_DESC_MAX; i++) {
+ struct cmd_buf_desc *desc = &desc_list[i];
+
+ if (!desc->paddr_ptr)
+ break;
+
+ if (snp_map_cmd_buf_desc(desc))
+ goto err_unmap;
+ }
+
+ return 0;
+
+err_unmap:
+ for (i--; i >= 0; i--)
+ snp_unmap_cmd_buf_desc(&desc_list[i]);
+
+ return -EFAULT;
+}
+
+static int snp_unmap_cmd_buf_desc_list(struct cmd_buf_desc *desc_list)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < CMD_BUF_DESC_MAX; i++) {
+ struct cmd_buf_desc *desc = &desc_list[i];
+
+ if (!desc->paddr_ptr)
+ break;
+
+ if (snp_unmap_cmd_buf_desc(&desc_list[i]))
+ ret = -EFAULT;
+ }
+
+ return ret;
+}
+
+static bool sev_cmd_buf_writable(int cmd)
+{
+ switch (cmd) {
+ case SEV_CMD_PLATFORM_STATUS:
+ case SEV_CMD_GUEST_STATUS:
+ case SEV_CMD_LAUNCH_START:
+ case SEV_CMD_RECEIVE_START:
+ case SEV_CMD_LAUNCH_MEASURE:
+ case SEV_CMD_SEND_START:
+ case SEV_CMD_SEND_UPDATE_DATA:
+ case SEV_CMD_SEND_UPDATE_VMSA:
+ case SEV_CMD_PEK_CSR:
+ case SEV_CMD_PDH_CERT_EXPORT:
+ case SEV_CMD_GET_ID:
+ case SEV_CMD_ATTESTATION_REPORT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* After SNP is INIT'ed, the behavior of legacy SEV commands is changed. */
+static bool snp_legacy_handling_needed(int cmd)
+{
+ struct sev_device *sev = psp_master->sev_data;
+
+ return cmd < SEV_CMD_SNP_INIT && sev->snp_initialized;
+}
+
+static int snp_prep_cmd_buf(int cmd, void *cmd_buf, struct cmd_buf_desc *desc_list)
+{
+ if (!snp_legacy_handling_needed(cmd))
+ return 0;
+
+ if (snp_map_cmd_buf_desc_list(cmd, cmd_buf, desc_list))
+ return -EFAULT;
+
+ /*
+ * Before command execution, the command buffer needs to be put into
+ * the firmware-owned state.
+ */
+ if (sev_cmd_buf_writable(cmd)) {
+ if (rmp_mark_pages_firmware(__pa(cmd_buf), 1, true))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int snp_reclaim_cmd_buf(int cmd, void *cmd_buf)
+{
+ if (!snp_legacy_handling_needed(cmd))
+ return 0;
+
+ /*
+ * After command completion, the command buffer needs to be put back
+ * into the hypervisor-owned state.
+ */
+ if (sev_cmd_buf_writable(cmd))
+ if (snp_reclaim_pages(__pa(cmd_buf), 1, true))
+ return -EFAULT;
+
+ return 0;
+}
+
static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
{
+ struct cmd_buf_desc desc_list[CMD_BUF_DESC_MAX] = {0};
struct psp_device *psp = psp_master;
struct sev_device *sev;
unsigned int cmdbuff_hi, cmdbuff_lo;
unsigned int phys_lsb, phys_msb;
unsigned int reg, ret = 0;
+ void *cmd_buf;
int buf_len;
if (!psp || !psp->sev_data)
@@ -331,12 +857,47 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
* work for some memory, e.g. vmalloc'd addresses, and @data may not be
* physically contiguous.
*/
- if (data)
- memcpy(sev->cmd_buf, data, buf_len);
+ if (data) {
+ /*
+ * Commands are generally issued one at a time and require the
+ * sev_cmd_mutex, but there could be recursive firmware requests
+ * due to SEV_CMD_SNP_PAGE_RECLAIM needing to be issued while
+ * preparing buffers for another command. This is the only known
+ * case of nesting in the current code, so exactly one
+ * additional command buffer is available for that purpose.
+ */
+ if (!sev->cmd_buf_active) {
+ cmd_buf = sev->cmd_buf;
+ sev->cmd_buf_active = true;
+ } else if (!sev->cmd_buf_backup_active) {
+ cmd_buf = sev->cmd_buf_backup;
+ sev->cmd_buf_backup_active = true;
+ } else {
+ dev_err(sev->dev,
+ "SEV: too many firmware commands in progress, no command buffers available.\n");
+ return -EBUSY;
+ }
+
+ memcpy(cmd_buf, data, buf_len);
+
+ /*
+ * The behavior of the SEV-legacy commands is altered when the
+ * SNP firmware is in the INIT state.
+ */
+ ret = snp_prep_cmd_buf(cmd, cmd_buf, desc_list);
+ if (ret) {
+ dev_err(sev->dev,
+ "SEV: failed to prepare buffer for legacy command 0x%x. Error: %d\n",
+ cmd, ret);
+ return ret;
+ }
+ } else {
+ cmd_buf = sev->cmd_buf;
+ }
/* Get the physical address of the command buffer */
- phys_lsb = data ? lower_32_bits(__psp_pa(sev->cmd_buf)) : 0;
- phys_msb = data ? upper_32_bits(__psp_pa(sev->cmd_buf)) : 0;
+ phys_lsb = data ? lower_32_bits(__psp_pa(cmd_buf)) : 0;
+ phys_msb = data ? upper_32_bits(__psp_pa(cmd_buf)) : 0;
dev_dbg(sev->dev, "sev command id %#x buffer 0x%08x%08x timeout %us\n",
cmd, phys_msb, phys_lsb, psp_timeout);
@@ -390,20 +951,41 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
ret = sev_write_init_ex_file_if_required(cmd);
}
- print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,
- buf_len, false);
-
/*
* Copy potential output from the PSP back to data. Do this even on
* failure in case the caller wants to glean something from the error.
*/
- if (data)
- memcpy(data, sev->cmd_buf, buf_len);
+ if (data) {
+ int ret_reclaim;
+ /*
+ * Restore the page state after the command completes.
+ */
+ ret_reclaim = snp_reclaim_cmd_buf(cmd, cmd_buf);
+ if (ret_reclaim) {
+ dev_err(sev->dev,
+ "SEV: failed to reclaim buffer for legacy command %#x. Error: %d\n",
+ cmd, ret_reclaim);
+ return ret_reclaim;
+ }
+
+ memcpy(data, cmd_buf, buf_len);
+
+ if (sev->cmd_buf_backup_active)
+ sev->cmd_buf_backup_active = false;
+ else
+ sev->cmd_buf_active = false;
+
+ if (snp_unmap_cmd_buf_desc_list(desc_list))
+ return -EFAULT;
+ }
+
+ print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,
+ buf_len, false);
return ret;
}
-static int sev_do_cmd(int cmd, void *data, int *psp_ret)
+int sev_do_cmd(int cmd, void *data, int *psp_ret)
{
int rc;
@@ -413,6 +995,7 @@ static int sev_do_cmd(int cmd, void *data, int *psp_ret)
return rc;
}
+EXPORT_SYMBOL_GPL(sev_do_cmd);
static int __sev_init_locked(int *error)
{
@@ -427,7 +1010,7 @@ static int __sev_init_locked(int *error)
data.tmr_address = __pa(sev_es_tmr);
data.flags |= SEV_INIT_FLAGS_SEV_ES;
- data.tmr_len = SEV_ES_TMR_SIZE;
+ data.tmr_len = sev_es_tmr_size;
}
return __sev_do_cmd_locked(SEV_CMD_INIT, &data, error);
@@ -450,7 +1033,7 @@ static int __sev_init_ex_locked(int *error)
data.tmr_address = __pa(sev_es_tmr);
data.flags |= SEV_INIT_FLAGS_SEV_ES;
- data.tmr_len = SEV_ES_TMR_SIZE;
+ data.tmr_len = sev_es_tmr_size;
}
return __sev_do_cmd_locked(SEV_CMD_INIT_EX, &data, error);
@@ -464,25 +1047,217 @@ static inline int __sev_do_init_locked(int *psp_ret)
return __sev_init_locked(psp_ret);
}
-static int __sev_platform_init_locked(int *error)
+static void snp_set_hsave_pa(void *arg)
+{
+ wrmsrl(MSR_VM_HSAVE_PA, 0);
+}
+
+static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg)
+{
+ struct sev_data_range_list *range_list = arg;
+ struct sev_data_range *range = &range_list->ranges[range_list->num_elements];
+ size_t size;
+
+ /*
+ * Ensure the list of HV_FIXED pages that will be passed to firmware
+ * do not exceed the page-sized argument buffer.
+ */
+ if ((range_list->num_elements * sizeof(struct sev_data_range) +
+ sizeof(struct sev_data_range_list)) > PAGE_SIZE)
+ return -E2BIG;
+
+ switch (rs->desc) {
+ case E820_TYPE_RESERVED:
+ case E820_TYPE_PMEM:
+ case E820_TYPE_ACPI:
+ range->base = rs->start & PAGE_MASK;
+ size = PAGE_ALIGN((rs->end + 1) - rs->start);
+ range->page_count = size >> PAGE_SHIFT;
+ range_list->num_elements++;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int __sev_snp_init_locked(int *error)
{
- int rc = 0, psp_ret = SEV_RET_NO_FW_CALL;
struct psp_device *psp = psp_master;
+ struct sev_data_snp_init_ex data;
struct sev_device *sev;
+ void *arg = &data;
+ int cmd, rc = 0;
- if (!psp || !psp->sev_data)
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
return -ENODEV;
sev = psp->sev_data;
- if (sev->state == SEV_STATE_INIT)
+ if (sev->snp_initialized)
return 0;
- if (sev_init_ex_buffer) {
- rc = sev_read_init_ex_file();
- if (rc)
+ if (!sev_version_greater_or_equal(SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR)) {
+ dev_dbg(sev->dev, "SEV-SNP support requires firmware version >= %d:%d\n",
+ SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR);
+ return 0;
+ }
+
+ /* SNP_INIT requires MSR_VM_HSAVE_PA to be cleared on all CPUs. */
+ on_each_cpu(snp_set_hsave_pa, NULL, 1);
+
+ /*
+ * Starting in SNP firmware v1.52, the SNP_INIT_EX command takes a list
+ * of system physical address ranges to convert into HV-fixed page
+ * states during the RMP initialization. For instance, the memory that
+ * UEFI reserves should be included in the that list. This allows system
+ * components that occasionally write to memory (e.g. logging to UEFI
+ * reserved regions) to not fail due to RMP initialization and SNP
+ * enablement.
+ *
+ */
+ if (sev_version_greater_or_equal(SNP_MIN_API_MAJOR, 52)) {
+ /*
+ * Firmware checks that the pages containing the ranges enumerated
+ * in the RANGES structure are either in the default page state or in the
+ * firmware page state.
+ */
+ snp_range_list = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!snp_range_list) {
+ dev_err(sev->dev,
+ "SEV: SNP_INIT_EX range list memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * Retrieve all reserved memory regions from the e820 memory map
+ * to be setup as HV-fixed pages.
+ */
+ rc = walk_iomem_res_desc(IORES_DESC_NONE, IORESOURCE_MEM, 0, ~0,
+ snp_range_list, snp_filter_reserved_mem_regions);
+ if (rc) {
+ dev_err(sev->dev,
+ "SEV: SNP_INIT_EX walk_iomem_res_desc failed rc = %d\n", rc);
return rc;
+ }
+
+ memset(&data, 0, sizeof(data));
+ data.init_rmp = 1;
+ data.list_paddr_en = 1;
+ data.list_paddr = __psp_pa(snp_range_list);
+ cmd = SEV_CMD_SNP_INIT_EX;
+ } else {
+ cmd = SEV_CMD_SNP_INIT;
+ arg = NULL;
+ }
+
+ /*
+ * The following sequence must be issued before launching the first SNP
+ * guest to ensure all dirty cache lines are flushed, including from
+ * updates to the RMP table itself via the RMPUPDATE instruction:
+ *
+ * - WBINVD on all running CPUs
+ * - SEV_CMD_SNP_INIT[_EX] firmware command
+ * - WBINVD on all running CPUs
+ * - SEV_CMD_SNP_DF_FLUSH firmware command
+ */
+ wbinvd_on_all_cpus();
+
+ rc = __sev_do_cmd_locked(cmd, arg, error);
+ if (rc)
+ return rc;
+
+ /* Prepare for first SNP guest launch after INIT. */
+ wbinvd_on_all_cpus();
+ rc = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, error);
+ if (rc)
+ return rc;
+
+ sev->snp_initialized = true;
+ dev_dbg(sev->dev, "SEV-SNP firmware initialized\n");
+
+ sev_es_tmr_size = SNP_TMR_SIZE;
+
+ return rc;
+}
+
+static void __sev_platform_init_handle_tmr(struct sev_device *sev)
+{
+ if (sev_es_tmr)
+ return;
+
+ /* Obtain the TMR memory area for SEV-ES use */
+ sev_es_tmr = sev_fw_alloc(sev_es_tmr_size);
+ if (sev_es_tmr) {
+ /* Must flush the cache before giving it to the firmware */
+ if (!sev->snp_initialized)
+ clflush_cache_range(sev_es_tmr, sev_es_tmr_size);
+ } else {
+ dev_warn(sev->dev, "SEV: TMR allocation failed, SEV-ES support unavailable\n");
}
+}
+
+/*
+ * If an init_ex_path is provided allocate a buffer for the file and
+ * read in the contents. Additionally, if SNP is initialized, convert
+ * the buffer pages to firmware pages.
+ */
+static int __sev_platform_init_handle_init_ex_path(struct sev_device *sev)
+{
+ struct page *page;
+ int rc;
+
+ if (!init_ex_path)
+ return 0;
+
+ if (sev_init_ex_buffer)
+ return 0;
+
+ page = alloc_pages(GFP_KERNEL, get_order(NV_LENGTH));
+ if (!page) {
+ dev_err(sev->dev, "SEV: INIT_EX NV memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ sev_init_ex_buffer = page_address(page);
+
+ rc = sev_read_init_ex_file();
+ if (rc)
+ return rc;
+
+ /* If SEV-SNP is initialized, transition to firmware page. */
+ if (sev->snp_initialized) {
+ unsigned long npages;
+
+ npages = 1UL << get_order(NV_LENGTH);
+ if (rmp_mark_pages_firmware(__pa(sev_init_ex_buffer), npages, false)) {
+ dev_err(sev->dev, "SEV: INIT_EX NV memory page state change failed.\n");
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static int __sev_platform_init_locked(int *error)
+{
+ int rc, psp_ret = SEV_RET_NO_FW_CALL;
+ struct sev_device *sev;
+
+ if (!psp_master || !psp_master->sev_data)
+ return -ENODEV;
+
+ sev = psp_master->sev_data;
+
+ if (sev->state == SEV_STATE_INIT)
+ return 0;
+
+ __sev_platform_init_handle_tmr(sev);
+
+ rc = __sev_platform_init_handle_init_ex_path(sev);
+ if (rc)
+ return rc;
rc = __sev_do_init_locked(&psp_ret);
if (rc && psp_ret == SEV_RET_SECURE_DATA_INVALID) {
@@ -520,12 +1295,46 @@ static int __sev_platform_init_locked(int *error)
return 0;
}
-int sev_platform_init(int *error)
+static int _sev_platform_init_locked(struct sev_platform_init_args *args)
+{
+ struct sev_device *sev;
+ int rc;
+
+ if (!psp_master || !psp_master->sev_data)
+ return -ENODEV;
+
+ sev = psp_master->sev_data;
+
+ if (sev->state == SEV_STATE_INIT)
+ return 0;
+
+ /*
+ * Legacy guests cannot be running while SNP_INIT(_EX) is executing,
+ * so perform SEV-SNP initialization at probe time.
+ */
+ rc = __sev_snp_init_locked(&args->error);
+ if (rc && rc != -ENODEV) {
+ /*
+ * Don't abort the probe if SNP INIT failed,
+ * continue to initialize the legacy SEV firmware.
+ */
+ dev_err(sev->dev, "SEV-SNP: failed to INIT rc %d, error %#x\n",
+ rc, args->error);
+ }
+
+ /* Defer legacy SEV/SEV-ES support if allowed by caller/module. */
+ if (args->probe && !psp_init_on_probe)
+ return 0;
+
+ return __sev_platform_init_locked(&args->error);
+}
+
+int sev_platform_init(struct sev_platform_init_args *args)
{
int rc;
mutex_lock(&sev_cmd_mutex);
- rc = __sev_platform_init_locked(error);
+ rc = _sev_platform_init_locked(args);
mutex_unlock(&sev_cmd_mutex);
return rc;
@@ -534,10 +1343,16 @@ EXPORT_SYMBOL_GPL(sev_platform_init);
static int __sev_platform_shutdown_locked(int *error)
{
- struct sev_device *sev = psp_master->sev_data;
+ struct psp_device *psp = psp_master;
+ struct sev_device *sev;
int ret;
- if (!sev || sev->state == SEV_STATE_UNINIT)
+ if (!psp || !psp->sev_data)
+ return 0;
+
+ sev = psp->sev_data;
+
+ if (sev->state == SEV_STATE_UNINIT)
return 0;
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
@@ -550,17 +1365,6 @@ static int __sev_platform_shutdown_locked(int *error)
return ret;
}
-static int sev_platform_shutdown(int *error)
-{
- int rc;
-
- mutex_lock(&sev_cmd_mutex);
- rc = __sev_platform_shutdown_locked(NULL);
- mutex_unlock(&sev_cmd_mutex);
-
- return rc;
-}
-
static int sev_get_platform_state(int *state, int *error)
{
struct sev_user_data_status data;
@@ -836,6 +1640,72 @@ fw_err:
return ret;
}
+static int __sev_snp_shutdown_locked(int *error, bool panic)
+{
+ struct sev_device *sev = psp_master->sev_data;
+ struct sev_data_snp_shutdown_ex data;
+ int ret;
+
+ if (!sev->snp_initialized)
+ return 0;
+
+ memset(&data, 0, sizeof(data));
+ data.len = sizeof(data);
+ data.iommu_snp_shutdown = 1;
+
+ /*
+ * If invoked during panic handling, local interrupts are disabled
+ * and all CPUs are stopped, so wbinvd_on_all_cpus() can't be called.
+ * In that case, a wbinvd() is done on remote CPUs via the NMI
+ * callback, so only a local wbinvd() is needed here.
+ */
+ if (!panic)
+ wbinvd_on_all_cpus();
+ else
+ wbinvd();
+
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data, error);
+ /* SHUTDOWN may require DF_FLUSH */
+ if (*error == SEV_RET_DFFLUSH_REQUIRED) {
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, NULL);
+ if (ret) {
+ dev_err(sev->dev, "SEV-SNP DF_FLUSH failed\n");
+ return ret;
+ }
+ /* reissue the shutdown command */
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data,
+ error);
+ }
+ if (ret) {
+ dev_err(sev->dev, "SEV-SNP firmware shutdown failed\n");
+ return ret;
+ }
+
+ /*
+ * SNP_SHUTDOWN_EX with IOMMU_SNP_SHUTDOWN set to 1 disables SNP
+ * enforcement by the IOMMU and also transitions all pages
+ * associated with the IOMMU to the Reclaim state.
+ * Firmware was transitioning the IOMMU pages to Hypervisor state
+ * before version 1.53. But, accounting for the number of assigned
+ * 4kB pages in a 2M page was done incorrectly by not transitioning
+ * to the Reclaim state. This resulted in RMP #PF when later accessing
+ * the 2M page containing those pages during kexec boot. Hence, the
+ * firmware now transitions these pages to Reclaim state and hypervisor
+ * needs to transition these pages to shared state. SNP Firmware
+ * version 1.53 and above are needed for kexec boot.
+ */
+ ret = amd_iommu_snp_disable();
+ if (ret) {
+ dev_err(sev->dev, "SNP IOMMU shutdown failed\n");
+ return ret;
+ }
+
+ sev->snp_initialized = false;
+ dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n");
+
+ return ret;
+}
+
static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
@@ -1078,6 +1948,85 @@ e_free_pdh:
return ret;
}
+static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp)
+{
+ struct sev_device *sev = psp_master->sev_data;
+ struct sev_data_snp_addr buf;
+ struct page *status_page;
+ void *data;
+ int ret;
+
+ if (!sev->snp_initialized || !argp->data)
+ return -EINVAL;
+
+ status_page = alloc_page(GFP_KERNEL_ACCOUNT);
+ if (!status_page)
+ return -ENOMEM;
+
+ data = page_address(status_page);
+
+ /*
+ * Firmware expects status page to be in firmware-owned state, otherwise
+ * it will report firmware error code INVALID_PAGE_STATE (0x1A).
+ */
+ if (rmp_mark_pages_firmware(__pa(data), 1, true)) {
+ ret = -EFAULT;
+ goto cleanup;
+ }
+
+ buf.address = __psp_pa(data);
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &argp->error);
+
+ /*
+ * Status page will be transitioned to Reclaim state upon success, or
+ * left in Firmware state in failure. Use snp_reclaim_pages() to
+ * transition either case back to Hypervisor-owned state.
+ */
+ if (snp_reclaim_pages(__pa(data), 1, true))
+ return -EFAULT;
+
+ if (ret)
+ goto cleanup;
+
+ if (copy_to_user((void __user *)argp->data, data,
+ sizeof(struct sev_user_data_snp_status)))
+ ret = -EFAULT;
+
+cleanup:
+ __free_pages(status_page, 0);
+ return ret;
+}
+
+static int sev_ioctl_do_snp_commit(struct sev_issue_cmd *argp)
+{
+ struct sev_device *sev = psp_master->sev_data;
+ struct sev_data_snp_commit buf;
+
+ if (!sev->snp_initialized)
+ return -EINVAL;
+
+ buf.len = sizeof(buf);
+
+ return __sev_do_cmd_locked(SEV_CMD_SNP_COMMIT, &buf, &argp->error);
+}
+
+static int sev_ioctl_do_snp_set_config(struct sev_issue_cmd *argp, bool writable)
+{
+ struct sev_device *sev = psp_master->sev_data;
+ struct sev_user_data_snp_config config;
+
+ if (!sev->snp_initialized || !argp->data)
+ return -EINVAL;
+
+ if (!writable)
+ return -EPERM;
+
+ if (copy_from_user(&config, (void __user *)argp->data, sizeof(config)))
+ return -EFAULT;
+
+ return __sev_do_cmd_locked(SEV_CMD_SNP_CONFIG, &config, &argp->error);
+}
+
static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
{
void __user *argp = (void __user *)arg;
@@ -1129,6 +2078,15 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
case SEV_GET_ID2:
ret = sev_ioctl_do_get_id2(&input);
break;
+ case SNP_PLATFORM_STATUS:
+ ret = sev_ioctl_do_snp_platform_status(&input);
+ break;
+ case SNP_COMMIT:
+ ret = sev_ioctl_do_snp_commit(&input);
+ break;
+ case SNP_SET_CONFIG:
+ ret = sev_ioctl_do_snp_set_config(&input, writable);
+ break;
default:
ret = -EINVAL;
goto out;
@@ -1239,10 +2197,12 @@ int sev_dev_init(struct psp_device *psp)
if (!sev)
goto e_err;
- sev->cmd_buf = (void *)devm_get_free_pages(dev, GFP_KERNEL, 0);
+ sev->cmd_buf = (void *)devm_get_free_pages(dev, GFP_KERNEL, 1);
if (!sev->cmd_buf)
goto e_sev;
+ sev->cmd_buf_backup = (uint8_t *)sev->cmd_buf + PAGE_SIZE;
+
psp->sev_data = sev;
sev->dev = dev;
@@ -1281,24 +2241,51 @@ e_err:
return ret;
}
-static void sev_firmware_shutdown(struct sev_device *sev)
+static void __sev_firmware_shutdown(struct sev_device *sev, bool panic)
{
- sev_platform_shutdown(NULL);
+ int error;
+
+ __sev_platform_shutdown_locked(NULL);
if (sev_es_tmr) {
- /* The TMR area was encrypted, flush it from the cache */
- wbinvd_on_all_cpus();
+ /*
+ * The TMR area was encrypted, flush it from the cache.
+ *
+ * If invoked during panic handling, local interrupts are
+ * disabled and all CPUs are stopped, so wbinvd_on_all_cpus()
+ * can't be used. In that case, wbinvd() is done on remote CPUs
+ * via the NMI callback, and done for this CPU later during
+ * SNP shutdown, so wbinvd_on_all_cpus() can be skipped.
+ */
+ if (!panic)
+ wbinvd_on_all_cpus();
- free_pages((unsigned long)sev_es_tmr,
- get_order(SEV_ES_TMR_SIZE));
+ __snp_free_firmware_pages(virt_to_page(sev_es_tmr),
+ get_order(sev_es_tmr_size),
+ true);
sev_es_tmr = NULL;
}
if (sev_init_ex_buffer) {
- free_pages((unsigned long)sev_init_ex_buffer,
- get_order(NV_LENGTH));
+ __snp_free_firmware_pages(virt_to_page(sev_init_ex_buffer),
+ get_order(NV_LENGTH),
+ true);
sev_init_ex_buffer = NULL;
}
+
+ if (snp_range_list) {
+ kfree(snp_range_list);
+ snp_range_list = NULL;
+ }
+
+ __sev_snp_shutdown_locked(&error, panic);
+}
+
+static void sev_firmware_shutdown(struct sev_device *sev)
+{
+ mutex_lock(&sev_cmd_mutex);
+ __sev_firmware_shutdown(sev, false);
+ mutex_unlock(&sev_cmd_mutex);
}
void sev_dev_destroy(struct psp_device *psp)
@@ -1316,6 +2303,29 @@ void sev_dev_destroy(struct psp_device *psp)
psp_clear_sev_irq_handler(psp);
}
+static int snp_shutdown_on_panic(struct notifier_block *nb,
+ unsigned long reason, void *arg)
+{
+ struct sev_device *sev = psp_master->sev_data;
+
+ /*
+ * If sev_cmd_mutex is already acquired, then it's likely
+ * another PSP command is in flight and issuing a shutdown
+ * would fail in unexpected ways. Rather than create even
+ * more confusion during a panic, just bail out here.
+ */
+ if (mutex_is_locked(&sev_cmd_mutex))
+ return NOTIFY_DONE;
+
+ __sev_firmware_shutdown(sev, true);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block snp_panic_notifier = {
+ .notifier_call = snp_shutdown_on_panic,
+};
+
int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,
void *data, int *error)
{
@@ -1329,7 +2339,8 @@ EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
void sev_pci_init(void)
{
struct sev_device *sev = psp_master->sev_data;
- int error, rc;
+ struct sev_platform_init_args args = {0};
+ int rc;
if (!sev)
return;
@@ -1342,36 +2353,18 @@ void sev_pci_init(void)
if (sev_update_firmware(sev->dev) == 0)
sev_get_api_version();
- /* If an init_ex_path is provided rely on INIT_EX for PSP initialization
- * instead of INIT.
- */
- if (init_ex_path) {
- sev_init_ex_buffer = sev_fw_alloc(NV_LENGTH);
- if (!sev_init_ex_buffer) {
- dev_err(sev->dev,
- "SEV: INIT_EX NV memory allocation failed\n");
- goto err;
- }
- }
-
- /* Obtain the TMR memory area for SEV-ES use */
- sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE);
- if (sev_es_tmr)
- /* Must flush the cache before giving it to the firmware */
- clflush_cache_range(sev_es_tmr, SEV_ES_TMR_SIZE);
- else
- dev_warn(sev->dev,
- "SEV: TMR allocation failed, SEV-ES support unavailable\n");
-
- if (!psp_init_on_probe)
- return;
-
/* Initialize the platform */
- rc = sev_platform_init(&error);
+ args.probe = true;
+ rc = sev_platform_init(&args);
if (rc)
dev_err(sev->dev, "SEV: failed to INIT error %#x, rc %d\n",
- error, rc);
+ args.error, rc);
+ dev_info(sev->dev, "SEV%s API:%d.%d build:%d\n", sev->snp_initialized ?
+ "-SNP" : "", sev->api_major, sev->api_minor, sev->build);
+
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &snp_panic_notifier);
return;
err:
@@ -1386,4 +2379,7 @@ void sev_pci_exit(void)
return;
sev_firmware_shutdown(sev);
+
+ atomic_notifier_chain_unregister(&panic_notifier_list,
+ &snp_panic_notifier);
}
diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h
index 778c95155e74..3e4e5574e88a 100644
--- a/drivers/crypto/ccp/sev-dev.h
+++ b/drivers/crypto/ccp/sev-dev.h
@@ -52,6 +52,11 @@ struct sev_device {
u8 build;
void *cmd_buf;
+ void *cmd_buf_backup;
+ bool cmd_buf_active;
+ bool cmd_buf_backup_active;
+
+ bool snp_initialized;
};
int sev_dev_init(struct psp_device *psp);
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
index 479062aa5e6b..94a0ebb03d8c 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -463,6 +463,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
hw_data->fw_name = ADF_402XX_FW;
hw_data->fw_mmp_name = ADF_402XX_MMP;
hw_data->uof_get_name = uof_get_name_402xx;
+ hw_data->get_ena_thd_mask = get_ena_thd_mask;
break;
case ADF_401XX_PCI_DEVICE_ID:
hw_data->fw_name = ADF_4XXX_FW;
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 1b13b4aa16ec..a235e6c300f1 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -332,12 +332,12 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
theend:
pm_runtime_put_autosuspend(rkc->dev);
+ rk_hash_unprepare(engine, breq);
+
local_bh_disable();
crypto_finalize_hash_request(engine, breq, err);
local_bh_enable();
- rk_hash_unprepare(engine, breq);
-
return 0;
}
diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
index 2621ff8a9376..de53eddf6796 100644
--- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
@@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request *
}
static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx,
- struct virtio_crypto_ctrl_header *header, void *para,
+ struct virtio_crypto_ctrl_header *header,
+ struct virtio_crypto_akcipher_session_para *para,
const uint8_t *key, unsigned int keylen)
{
struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3];
@@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher
ctrl = &vc_ctrl_req->ctrl;
memcpy(&ctrl->header, header, sizeof(ctrl->header));
- memcpy(&ctrl->u, para, sizeof(ctrl->u));
+ memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para));
input = &vc_ctrl_req->input;
input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR);
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index dcf2b39e1048..1a3e6aafbdcc 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -316,31 +316,27 @@ static const struct cxl_root_ops acpi_root_ops = {
.qos_class = cxl_acpi_qos_class,
};
-static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
- const unsigned long end)
+static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
+ struct cxl_cfmws_context *ctx)
{
int target_map[CXL_DECODER_MAX_INTERLEAVE];
- struct cxl_cfmws_context *ctx = arg;
struct cxl_port *root_port = ctx->root_port;
struct resource *cxl_res = ctx->cxl_res;
struct cxl_cxims_context cxims_ctx;
struct cxl_root_decoder *cxlrd;
struct device *dev = ctx->dev;
- struct acpi_cedt_cfmws *cfmws;
cxl_calc_hb_fn cxl_calc_hb;
struct cxl_decoder *cxld;
unsigned int ways, i, ig;
struct resource *res;
int rc;
- cfmws = (struct acpi_cedt_cfmws *) header;
-
rc = cxl_acpi_cfmws_verify(dev, cfmws);
if (rc) {
dev_err(dev, "CFMWS range %#llx-%#llx not registered\n",
cfmws->base_hpa,
cfmws->base_hpa + cfmws->window_size - 1);
- return 0;
+ return rc;
}
rc = eiw_to_ways(cfmws->interleave_ways, &ways);
@@ -376,7 +372,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
if (IS_ERR(cxlrd))
- return 0;
+ return PTR_ERR(cxlrd);
cxld = &cxlrd->cxlsd.cxld;
cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
@@ -420,16 +416,7 @@ err_xormap:
put_device(&cxld->dev);
else
rc = cxl_decoder_autoremove(dev, cxld);
- if (rc) {
- dev_err(dev, "Failed to add decode range: %pr", res);
- return rc;
- }
- dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
- dev_name(&cxld->dev),
- phys_to_target_node(cxld->hpa_range.start),
- cxld->hpa_range.start, cxld->hpa_range.end);
-
- return 0;
+ return rc;
err_insert:
kfree(res->name);
@@ -438,6 +425,29 @@ err_name:
return -ENOMEM;
}
+static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
+ const unsigned long end)
+{
+ struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header;
+ struct cxl_cfmws_context *ctx = arg;
+ struct device *dev = ctx->dev;
+ int rc;
+
+ rc = __cxl_parse_cfmws(cfmws, ctx);
+ if (rc)
+ dev_err(dev,
+ "Failed to add decode range: [%#llx - %#llx] (%d)\n",
+ cfmws->base_hpa,
+ cfmws->base_hpa + cfmws->window_size - 1, rc);
+ else
+ dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n",
+ phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa,
+ cfmws->base_hpa + cfmws->window_size - 1);
+
+ /* never fail cxl_acpi load for a single window failure */
+ return 0;
+}
+
__mock struct acpi_device *to_cxl_host_bridge(struct device *host,
struct device *dev)
{
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index 6fe11546889f..08fd0baea7a0 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -210,19 +210,12 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
return 0;
}
-static void add_perf_entry(struct device *dev, struct dsmas_entry *dent,
- struct list_head *list)
+static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
+ struct cxl_dpa_perf *dpa_perf)
{
- struct cxl_dpa_perf *dpa_perf;
-
- dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL);
- if (!dpa_perf)
- return;
-
dpa_perf->dpa_range = dent->dpa_range;
dpa_perf->coord = dent->coord;
dpa_perf->qos_class = dent->qos_class;
- list_add_tail(&dpa_perf->list, list);
dev_dbg(dev,
"DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
dent->dpa_range.start, dpa_perf->qos_class,
@@ -230,20 +223,6 @@ static void add_perf_entry(struct device *dev, struct dsmas_entry *dent,
dent->coord.read_latency, dent->coord.write_latency);
}
-static void free_perf_ents(void *data)
-{
- struct cxl_memdev_state *mds = data;
- struct cxl_dpa_perf *dpa_perf, *n;
- LIST_HEAD(discard);
-
- list_splice_tail_init(&mds->ram_perf_list, &discard);
- list_splice_tail_init(&mds->pmem_perf_list, &discard);
- list_for_each_entry_safe(dpa_perf, n, &discard, list) {
- list_del(&dpa_perf->list);
- kfree(dpa_perf);
- }
-}
-
static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
struct xarray *dsmas_xa)
{
@@ -263,16 +242,14 @@ static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
xa_for_each(dsmas_xa, index, dent) {
if (resource_size(&cxlds->ram_res) &&
range_contains(&ram_range, &dent->dpa_range))
- add_perf_entry(dev, dent, &mds->ram_perf_list);
+ update_perf_entry(dev, dent, &mds->ram_perf);
else if (resource_size(&cxlds->pmem_res) &&
range_contains(&pmem_range, &dent->dpa_range))
- add_perf_entry(dev, dent, &mds->pmem_perf_list);
+ update_perf_entry(dev, dent, &mds->pmem_perf);
else
dev_dbg(dev, "no partition for dsmas dpa: %#llx\n",
dent->dpa_range.start);
}
-
- devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds);
}
static int match_cxlrd_qos_class(struct device *dev, void *data)
@@ -293,24 +270,24 @@ static int match_cxlrd_qos_class(struct device *dev, void *data)
return 0;
}
-static void cxl_qos_match(struct cxl_port *root_port,
- struct list_head *work_list,
- struct list_head *discard_list)
+static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf)
{
- struct cxl_dpa_perf *dpa_perf, *n;
+ *dpa_perf = (struct cxl_dpa_perf) {
+ .qos_class = CXL_QOS_CLASS_INVALID,
+ };
+}
- list_for_each_entry_safe(dpa_perf, n, work_list, list) {
- int rc;
+static bool cxl_qos_match(struct cxl_port *root_port,
+ struct cxl_dpa_perf *dpa_perf)
+{
+ if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
+ return false;
- if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
- return;
+ if (!device_for_each_child(&root_port->dev, &dpa_perf->qos_class,
+ match_cxlrd_qos_class))
+ return false;
- rc = device_for_each_child(&root_port->dev,
- (void *)&dpa_perf->qos_class,
- match_cxlrd_qos_class);
- if (!rc)
- list_move_tail(&dpa_perf->list, discard_list);
- }
+ return true;
}
static int match_cxlrd_hb(struct device *dev, void *data)
@@ -334,23 +311,10 @@ static int match_cxlrd_hb(struct device *dev, void *data)
return 0;
}
-static void discard_dpa_perf(struct list_head *list)
-{
- struct cxl_dpa_perf *dpa_perf, *n;
-
- list_for_each_entry_safe(dpa_perf, n, list, list) {
- list_del(&dpa_perf->list);
- kfree(dpa_perf);
- }
-}
-DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T))
-
static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- LIST_HEAD(__discard);
- struct list_head *discard __free(dpa_perf) = &__discard;
struct cxl_port *root_port;
int rc;
@@ -363,16 +327,17 @@ static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
root_port = &cxl_root->port;
/* Check that the QTG IDs are all sane between end device and root decoders */
- cxl_qos_match(root_port, &mds->ram_perf_list, discard);
- cxl_qos_match(root_port, &mds->pmem_perf_list, discard);
+ if (!cxl_qos_match(root_port, &mds->ram_perf))
+ reset_dpa_perf(&mds->ram_perf);
+ if (!cxl_qos_match(root_port, &mds->pmem_perf))
+ reset_dpa_perf(&mds->pmem_perf);
/* Check to make sure that the device's host bridge is under a root decoder */
rc = device_for_each_child(&root_port->dev,
- (void *)cxlmd->endpoint->host_bridge,
- match_cxlrd_hb);
+ cxlmd->endpoint->host_bridge, match_cxlrd_hb);
if (!rc) {
- list_splice_tail_init(&mds->ram_perf_list, discard);
- list_splice_tail_init(&mds->pmem_perf_list, discard);
+ reset_dpa_perf(&mds->ram_perf);
+ reset_dpa_perf(&mds->pmem_perf);
}
return rc;
@@ -417,6 +382,7 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port)
cxl_memdev_set_qos_class(cxlds, dsmas_xa);
cxl_qos_class_verify(cxlmd);
+ cxl_memdev_update_perf(cxlmd);
}
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL);
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 27166a411705..9adda4795eb7 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1391,8 +1391,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mds->cxlds.reg_map.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
- INIT_LIST_HEAD(&mds->ram_perf_list);
- INIT_LIST_HEAD(&mds->pmem_perf_list);
+ mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
+ mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;
return mds;
}
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index dae8802ecdb0..d4e259f3a7e9 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -447,13 +447,41 @@ static struct attribute *cxl_memdev_attributes[] = {
NULL,
};
+static ssize_t pmem_qos_class_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+
+ return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class);
+}
+
+static struct device_attribute dev_attr_pmem_qos_class =
+ __ATTR(qos_class, 0444, pmem_qos_class_show, NULL);
+
static struct attribute *cxl_memdev_pmem_attributes[] = {
&dev_attr_pmem_size.attr,
+ &dev_attr_pmem_qos_class.attr,
NULL,
};
+static ssize_t ram_qos_class_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+
+ return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class);
+}
+
+static struct device_attribute dev_attr_ram_qos_class =
+ __ATTR(qos_class, 0444, ram_qos_class_show, NULL);
+
static struct attribute *cxl_memdev_ram_attributes[] = {
&dev_attr_ram_size.attr,
+ &dev_attr_ram_qos_class.attr,
NULL,
};
@@ -477,14 +505,42 @@ static struct attribute_group cxl_memdev_attribute_group = {
.is_visible = cxl_memdev_visible,
};
+static umode_t cxl_ram_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+ if (a == &dev_attr_ram_qos_class.attr)
+ if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID)
+ return 0;
+
+ return a->mode;
+}
+
static struct attribute_group cxl_memdev_ram_attribute_group = {
.name = "ram",
.attrs = cxl_memdev_ram_attributes,
+ .is_visible = cxl_ram_visible,
};
+static umode_t cxl_pmem_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+ if (a == &dev_attr_pmem_qos_class.attr)
+ if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID)
+ return 0;
+
+ return a->mode;
+}
+
static struct attribute_group cxl_memdev_pmem_attribute_group = {
.name = "pmem",
.attrs = cxl_memdev_pmem_attributes,
+ .is_visible = cxl_pmem_visible,
};
static umode_t cxl_memdev_security_visible(struct kobject *kobj,
@@ -519,6 +575,13 @@ static const struct attribute_group *cxl_memdev_attribute_groups[] = {
NULL,
};
+void cxl_memdev_update_perf(struct cxl_memdev *cxlmd)
+{
+ sysfs_update_group(&cxlmd->dev.kobj, &cxl_memdev_ram_attribute_group);
+ sysfs_update_group(&cxlmd->dev.kobj, &cxl_memdev_pmem_attribute_group);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_memdev_update_perf, CXL);
+
static const struct device_type cxl_memdev_type = {
.name = "cxl_memdev",
.release = cxl_memdev_release,
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 6c9c8d92f8f7..e9e6c81ce034 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -477,9 +477,9 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
allowed++;
}
- if (!allowed) {
- cxl_set_mem_enable(cxlds, 0);
- info->mem_enabled = 0;
+ if (!allowed && info->mem_enabled) {
+ dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n");
+ return -ENXIO;
}
/*
@@ -932,11 +932,21 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
void cxl_cor_error_detected(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct device *dev = &cxlds->cxlmd->dev;
+
+ scoped_guard(device, dev) {
+ if (!dev->driver) {
+ dev_warn(&pdev->dev,
+ "%s: memdev disabled, abort error handling\n",
+ dev_name(dev));
+ return;
+ }
- if (cxlds->rcd)
- cxl_handle_rdport_errors(cxlds);
+ if (cxlds->rcd)
+ cxl_handle_rdport_errors(cxlds);
- cxl_handle_endpoint_cor_ras(cxlds);
+ cxl_handle_endpoint_cor_ras(cxlds);
+ }
}
EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
@@ -948,16 +958,25 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
struct device *dev = &cxlmd->dev;
bool ue;
- if (cxlds->rcd)
- cxl_handle_rdport_errors(cxlds);
+ scoped_guard(device, dev) {
+ if (!dev->driver) {
+ dev_warn(&pdev->dev,
+ "%s: memdev disabled, abort error handling\n",
+ dev_name(dev));
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ if (cxlds->rcd)
+ cxl_handle_rdport_errors(cxlds);
+ /*
+ * A frozen channel indicates an impending reset which is fatal to
+ * CXL.mem operation, and will likely crash the system. On the off
+ * chance the situation is recoverable dump the status of the RAS
+ * capability registers and bounce the active state of the memdev.
+ */
+ ue = cxl_handle_endpoint_ras(cxlds);
+ }
- /*
- * A frozen channel indicates an impending reset which is fatal to
- * CXL.mem operation, and will likely crash the system. On the off
- * chance the situation is recoverable dump the status of the RAS
- * capability registers and bounce the active state of the memdev.
- */
- ue = cxl_handle_endpoint_ras(cxlds);
switch (state) {
case pci_channel_io_normal:
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 0f05692bfec3..4c7fd2d5cccb 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -525,7 +525,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
struct cxl_region_params *p = &cxlr->params;
struct resource *res;
- u32 remainder = 0;
+ u64 remainder = 0;
lockdep_assert_held_write(&cxl_region_rwsem);
@@ -545,7 +545,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
(cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
return -ENXIO;
- div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder);
+ div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
if (remainder)
return -EINVAL;
@@ -730,12 +730,17 @@ static int match_auto_decoder(struct device *dev, void *data)
return 0;
}
-static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
- struct cxl_region *cxlr)
+static struct cxl_decoder *
+cxl_region_find_decoder(struct cxl_port *port,
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_region *cxlr)
{
struct device *dev;
int id = 0;
+ if (port == cxled_to_port(cxled))
+ return &cxled->cxld;
+
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
dev = device_find_child(&port->dev, &cxlr->params,
match_auto_decoder);
@@ -753,8 +758,31 @@ static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
return to_cxl_decoder(dev);
}
-static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
- struct cxl_region *cxlr)
+static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
+ struct cxl_decoder *cxld)
+{
+ struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter);
+ struct cxl_decoder *cxld_iter = rr->decoder;
+
+ /*
+ * Allow the out of order assembly of auto-discovered regions.
+ * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders
+ * in HPA order. Confirm that the decoder with the lesser HPA
+ * starting address has the lesser id.
+ */
+ dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n",
+ dev_name(&cxld->dev), cxld->id,
+ dev_name(&cxld_iter->dev), cxld_iter->id);
+
+ if (cxld_iter->id > cxld->id)
+ return true;
+
+ return false;
+}
+
+static struct cxl_region_ref *
+alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled)
{
struct cxl_region_params *p = &cxlr->params;
struct cxl_region_ref *cxl_rr, *iter;
@@ -764,16 +792,21 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
xa_for_each(&port->regions, index, iter) {
struct cxl_region_params *ip = &iter->region->params;
- if (!ip->res)
+ if (!ip->res || ip->res->start < p->res->start)
continue;
- if (ip->res->start > p->res->start) {
- dev_dbg(&cxlr->dev,
- "%s: HPA order violation %s:%pr vs %pr\n",
- dev_name(&port->dev),
- dev_name(&iter->region->dev), ip->res, p->res);
- return ERR_PTR(-EBUSY);
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ struct cxl_decoder *cxld;
+
+ cxld = cxl_region_find_decoder(port, cxled, cxlr);
+ if (auto_order_ok(port, iter->region, cxld))
+ continue;
}
+ dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n",
+ dev_name(&port->dev),
+ dev_name(&iter->region->dev), ip->res, p->res);
+
+ return ERR_PTR(-EBUSY);
}
cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
@@ -853,10 +886,7 @@ static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
{
struct cxl_decoder *cxld;
- if (port == cxled_to_port(cxled))
- cxld = &cxled->cxld;
- else
- cxld = cxl_region_find_decoder(port, cxlr);
+ cxld = cxl_region_find_decoder(port, cxled, cxlr);
if (!cxld) {
dev_dbg(&cxlr->dev, "%s: no decoder available\n",
dev_name(&port->dev));
@@ -953,7 +983,7 @@ static int cxl_port_attach_region(struct cxl_port *port,
nr_targets_inc = true;
}
} else {
- cxl_rr = alloc_region_ref(port, cxlr);
+ cxl_rr = alloc_region_ref(port, cxlr, cxled);
if (IS_ERR(cxl_rr)) {
dev_dbg(&cxlr->dev,
"%s: failed to allocate region reference\n",
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index 89445435303a..bdf117a33744 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -338,7 +338,7 @@ TRACE_EVENT(cxl_general_media,
TP_fast_assign(
CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
- memcpy(&__entry->hdr_uuid, &CXL_EVENT_GEN_MEDIA_UUID, sizeof(uuid_t));
+ __entry->hdr_uuid = CXL_EVENT_GEN_MEDIA_UUID;
/* General Media */
__entry->dpa = le64_to_cpu(rec->phys_addr);
@@ -425,7 +425,7 @@ TRACE_EVENT(cxl_dram,
TP_fast_assign(
CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
- memcpy(&__entry->hdr_uuid, &CXL_EVENT_DRAM_UUID, sizeof(uuid_t));
+ __entry->hdr_uuid = CXL_EVENT_DRAM_UUID;
/* DRAM */
__entry->dpa = le64_to_cpu(rec->phys_addr);
@@ -573,7 +573,7 @@ TRACE_EVENT(cxl_memory_module,
TP_fast_assign(
CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
- memcpy(&__entry->hdr_uuid, &CXL_EVENT_MEM_MODULE_UUID, sizeof(uuid_t));
+ __entry->hdr_uuid = CXL_EVENT_MEM_MODULE_UUID;
/* Memory Module Event */
__entry->event_type = rec->event_type;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index b6017c0c57b4..003feebab79b 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -880,6 +880,8 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord);
+void cxl_memdev_update_perf(struct cxl_memdev *cxlmd);
+
/*
* Unit test builds overrides this to __weak, find the 'strong' version
* of these symbols in tools/testing/cxl/.
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 5303d6942b88..20fb3b35e89e 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -395,13 +395,11 @@ enum cxl_devtype {
/**
* struct cxl_dpa_perf - DPA performance property entry
- * @list - list entry
* @dpa_range - range for DPA address
* @coord - QoS performance data (i.e. latency, bandwidth)
* @qos_class - QoS Class cookies
*/
struct cxl_dpa_perf {
- struct list_head list;
struct range dpa_range;
struct access_coordinate coord;
int qos_class;
@@ -471,8 +469,8 @@ struct cxl_dev_state {
* @security: security driver state info
* @fw: firmware upload / activation state
* @mbox_send: @dev specific transport for transmitting mailbox commands
- * @ram_perf_list: performance data entries matched to RAM
- * @pmem_perf_list: performance data entries matched to PMEM
+ * @ram_perf: performance data entry matched to RAM partition
+ * @pmem_perf: performance data entry matched to PMEM partition
*
* See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
* details on capacity parameters.
@@ -494,8 +492,8 @@ struct cxl_memdev_state {
u64 next_volatile_bytes;
u64 next_persistent_bytes;
- struct list_head ram_perf_list;
- struct list_head pmem_perf_list;
+ struct cxl_dpa_perf ram_perf;
+ struct cxl_dpa_perf pmem_perf;
struct cxl_event_state event;
struct cxl_poison_state poison;
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index c5c9d8e0d88d..0c79d9ce877c 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -215,52 +215,6 @@ static ssize_t trigger_poison_list_store(struct device *dev,
}
static DEVICE_ATTR_WO(trigger_poison_list);
-static ssize_t ram_qos_class_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- struct cxl_dpa_perf *dpa_perf;
-
- if (!dev->driver)
- return -ENOENT;
-
- if (list_empty(&mds->ram_perf_list))
- return -ENOENT;
-
- dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf,
- list);
-
- return sysfs_emit(buf, "%d\n", dpa_perf->qos_class);
-}
-
-static struct device_attribute dev_attr_ram_qos_class =
- __ATTR(qos_class, 0444, ram_qos_class_show, NULL);
-
-static ssize_t pmem_qos_class_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- struct cxl_dpa_perf *dpa_perf;
-
- if (!dev->driver)
- return -ENOENT;
-
- if (list_empty(&mds->pmem_perf_list))
- return -ENOENT;
-
- dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf,
- list);
-
- return sysfs_emit(buf, "%d\n", dpa_perf->qos_class);
-}
-
-static struct device_attribute dev_attr_pmem_qos_class =
- __ATTR(qos_class, 0444, pmem_qos_class_show, NULL);
-
static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
@@ -272,21 +226,11 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
mds->poison.enabled_cmds))
return 0;
- if (a == &dev_attr_pmem_qos_class.attr)
- if (list_empty(&mds->pmem_perf_list))
- return 0;
-
- if (a == &dev_attr_ram_qos_class.attr)
- if (list_empty(&mds->ram_perf_list))
- return 0;
-
return a->mode;
}
static struct attribute *cxl_mem_attrs[] = {
&dev_attr_trigger_poison_list.attr,
- &dev_attr_ram_qos_class.attr,
- &dev_attr_pmem_qos_class.attr,
NULL
};
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 4fd1f207c84e..2ff361e756d6 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -382,7 +382,7 @@ static int cxl_pci_mbox_send(struct cxl_memdev_state *mds,
return rc;
}
-static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
+static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
{
struct cxl_dev_state *cxlds = &mds->cxlds;
const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
@@ -441,7 +441,7 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work);
/* background command interrupts are optional */
- if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ))
+ if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail)
return 0;
msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
@@ -588,7 +588,7 @@ static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds)
return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf);
}
-static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
+static bool cxl_alloc_irq_vectors(struct pci_dev *pdev)
{
int nvecs;
@@ -605,9 +605,9 @@ static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
PCI_IRQ_MSIX | PCI_IRQ_MSI);
if (nvecs < 1) {
dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
- return -ENXIO;
+ return false;
}
- return 0;
+ return true;
}
static irqreturn_t cxl_event_thread(int irq, void *id)
@@ -743,7 +743,7 @@ static bool cxl_event_int_is_fw(u8 setting)
}
static int cxl_event_config(struct pci_host_bridge *host_bridge,
- struct cxl_memdev_state *mds)
+ struct cxl_memdev_state *mds, bool irq_avail)
{
struct cxl_event_interrupt_policy policy;
int rc;
@@ -755,6 +755,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
if (!host_bridge->native_cxl_error)
return 0;
+ if (!irq_avail) {
+ dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n");
+ return 0;
+ }
+
rc = cxl_mem_alloc_event_buf(mds);
if (rc)
return rc;
@@ -789,6 +794,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
struct cxl_register_map map;
struct cxl_memdev *cxlmd;
int i, rc, pmu_count;
+ bool irq_avail;
/*
* Double check the anonymous union trickery in struct cxl_regs
@@ -846,11 +852,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
else
dev_warn(&pdev->dev, "Media not active (%d)\n", rc);
- rc = cxl_alloc_irq_vectors(pdev);
- if (rc)
- return rc;
+ irq_avail = cxl_alloc_irq_vectors(pdev);
- rc = cxl_pci_setup_mailbox(mds);
+ rc = cxl_pci_setup_mailbox(mds, irq_avail);
if (rc)
return rc;
@@ -909,7 +913,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
}
- rc = cxl_event_config(host_bridge, mds);
+ rc = cxl_event_config(host_bridge, mds, irq_avail);
if (rc)
return rc;
@@ -970,61 +974,6 @@ static struct pci_driver cxl_pci_driver = {
},
};
-#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
-static void cxl_cper_event_call(enum cxl_event_type ev_type,
- struct cxl_cper_event_rec *rec)
-{
- struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
- struct pci_dev *pdev __free(pci_dev_put) = NULL;
- enum cxl_event_log_type log_type;
- struct cxl_dev_state *cxlds;
- unsigned int devfn;
- u32 hdr_flags;
-
- devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
- pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
- device_id->bus_num, devfn);
- if (!pdev)
- return;
-
- guard(pci_dev)(pdev);
- if (pdev->driver != &cxl_pci_driver)
- return;
-
- cxlds = pci_get_drvdata(pdev);
- if (!cxlds)
- return;
-
- /* Fabricate a log type */
- hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
- log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
-
- cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type,
- &uuid_null, &rec->event);
-}
-
-static int __init cxl_pci_driver_init(void)
-{
- int rc;
-
- rc = cxl_cper_register_callback(cxl_cper_event_call);
- if (rc)
- return rc;
-
- rc = pci_register_driver(&cxl_pci_driver);
- if (rc)
- cxl_cper_unregister_callback(cxl_cper_event_call);
-
- return rc;
-}
-
-static void __exit cxl_pci_driver_exit(void)
-{
- pci_unregister_driver(&cxl_pci_driver);
- cxl_cper_unregister_callback(cxl_cper_event_call);
-}
-
-module_init(cxl_pci_driver_init);
-module_exit(cxl_pci_driver_exit);
+module_pci_driver(cxl_pci_driver);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index f4b635526345..a0244f6bb44b 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -532,8 +532,7 @@ static int dax_fs_init(void)
int rc;
dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0,
- (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
init_once);
if (!dax_cache)
return -ENOMEM;
diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
index ee899f8e6721..4a63567e93ba 100644
--- a/drivers/dma-buf/heaps/cma_heap.c
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -168,10 +168,7 @@ static vm_fault_t cma_heap_vm_fault(struct vm_fault *vmf)
if (vmf->pgoff > buffer->pagecount)
return VM_FAULT_SIGBUS;
- vmf->page = buffer->pages[vmf->pgoff];
- get_page(vmf->page);
-
- return 0;
+ return vmf_insert_pfn(vma, vmf->address, page_to_pfn(buffer->pages[vmf->pgoff]));
}
static const struct vm_operations_struct dma_heap_vm_ops = {
@@ -185,6 +182,8 @@ static int cma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
return -EINVAL;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+
vma->vm_ops = &dma_heap_vm_ops;
vma->vm_private_data = buffer;
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index fb89ecbf0cc5..40052d1bd0b5 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -222,8 +222,14 @@ struct atdma_sg {
* @vd: pointer to the virtual dma descriptor.
* @atchan: pointer to the atmel dma channel.
* @total_len: total transaction byte count
- * @sg_len: number of sg entries.
+ * @sglen: number of sg entries.
* @sg: array of sgs.
+ * @boundary: number of transfers to perform before the automatic address increment operation
+ * @dst_hole: value to add to the destination address when the boundary has been reached
+ * @src_hole: value to add to the source address when the boundary has been reached
+ * @memset_buffer: buffer used for the memset operation
+ * @memset_paddr: physical address of the buffer used for the memset operation
+ * @memset_vaddr: virtual address of the buffer used for the memset operation
*/
struct at_desc {
struct virt_dma_desc vd;
@@ -245,7 +251,10 @@ struct at_desc {
/*-- Channels --------------------------------------------------------*/
/**
- * atc_status - information bits stored in channel status flag
+ * enum atc_status - information bits stored in channel status flag
+ *
+ * @ATC_IS_PAUSED: If channel is pauses
+ * @ATC_IS_CYCLIC: If channel is cyclic
*
* Manipulated with atomic operations.
*/
@@ -282,7 +291,6 @@ struct at_dma_chan {
u32 save_cfg;
u32 save_dscr;
struct dma_slave_config dma_sconfig;
- bool cyclic;
struct at_desc *desc;
};
@@ -328,12 +336,12 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
/**
* struct at_dma - internal representation of an Atmel HDMA Controller
* @dma_device: dmaengine dma_device object members
- * @atdma_devtype: identifier of DMA controller compatibility
- * @ch_regs: memory mapped register base
+ * @regs: memory mapped register base
* @clk: dma controller clock
* @save_imr: interrupt mask register that is saved on suspend/resume cycle
* @all_chan_mask: all channels availlable in a mask
* @lli_pool: hw lli table
+ * @memset_pool: hw memset pool
* @chan: channels table to store at_dma_chan structures
*/
struct at_dma {
@@ -626,6 +634,9 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
/**
* atc_get_llis_residue - Get residue for a hardware linked list transfer
+ * @atchan: pointer to an atmel hdmac channel.
+ * @desc: pointer to the descriptor for which the residue is calculated.
+ * @residue: residue to be set to dma_tx_state.
*
* Calculate the residue by removing the length of the Linked List Item (LLI)
* already transferred from the total length. To get the current LLI we can use
@@ -661,10 +672,8 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
* two DSCR values are different, we read again the CTRLA then the DSCR till two
* consecutive read values from DSCR are equal or till the maximum trials is
* reach. This algorithm is very unlikely not to find a stable value for DSCR.
- * @atchan: pointer to an atmel hdmac channel.
- * @desc: pointer to the descriptor for which the residue is calculated.
- * @residue: residue to be set to dma_tx_state.
- * Returns 0 on success, -errno otherwise.
+ *
+ * Returns: %0 on success, -errno otherwise.
*/
static int atc_get_llis_residue(struct at_dma_chan *atchan,
struct at_desc *desc, u32 *residue)
@@ -731,7 +740,8 @@ static int atc_get_llis_residue(struct at_dma_chan *atchan,
* @chan: DMA channel
* @cookie: transaction identifier to check status of
* @residue: residue to be updated.
- * Return 0 on success, -errono otherwise.
+ *
+ * Return: %0 on success, -errno otherwise.
*/
static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie,
u32 *residue)
@@ -1710,7 +1720,7 @@ static void atc_issue_pending(struct dma_chan *chan)
* atc_alloc_chan_resources - allocate resources for DMA channel
* @chan: allocate descriptor resources for this channel
*
- * return - the number of allocated descriptors
+ * Return: the number of allocated descriptors
*/
static int atc_alloc_chan_resources(struct dma_chan *chan)
{
diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c
index b38786f0ad79..b75fdaffad9a 100644
--- a/drivers/dma/dw-edma/dw-edma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-edma-v0-core.c
@@ -346,6 +346,20 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
dw_edma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr);
}
+static void dw_edma_v0_sync_ll_data(struct dw_edma_chunk *chunk)
+{
+ /*
+ * In case of remote eDMA engine setup, the DW PCIe RP/EP internal
+ * configuration registers and application memory are normally accessed
+ * over different buses. Ensure LL-data reaches the memory before the
+ * doorbell register is toggled by issuing the dummy-read from the remote
+ * LL memory in a hope that the MRd TLP will return only after the
+ * last MWr TLP is completed
+ */
+ if (!(chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL))
+ readl(chunk->ll_region.vaddr.io);
+}
+
static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
{
struct dw_edma_chan *chan = chunk->chan;
@@ -412,6 +426,9 @@ static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
SET_CH_32(dw, chan->dir, chan->id, llp.msb,
upper_32_bits(chunk->ll_region.paddr));
}
+
+ dw_edma_v0_sync_ll_data(chunk);
+
/* Doorbell */
SET_RW_32(dw, chan->dir, doorbell,
FIELD_PREP(EDMA_V0_DOORBELL_CH_MASK, chan->id));
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c
index 00b735a0202a..10e8f0715114 100644
--- a/drivers/dma/dw-edma/dw-hdma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c
@@ -65,18 +65,12 @@ static void dw_hdma_v0_core_off(struct dw_edma *dw)
static u16 dw_hdma_v0_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir)
{
- u32 num_ch = 0;
- int id;
-
- for (id = 0; id < HDMA_V0_MAX_NR_CH; id++) {
- if (GET_CH_32(dw, id, dir, ch_en) & BIT(0))
- num_ch++;
- }
-
- if (num_ch > HDMA_V0_MAX_NR_CH)
- num_ch = HDMA_V0_MAX_NR_CH;
-
- return (u16)num_ch;
+ /*
+ * The HDMA IP have no way to know the number of hardware channels
+ * available, we set it to maximum channels and let the platform
+ * set the right number of channels.
+ */
+ return HDMA_V0_MAX_NR_CH;
}
static enum dma_status dw_hdma_v0_core_ch_status(struct dw_edma_chan *chan)
@@ -228,6 +222,20 @@ static void dw_hdma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
dw_hdma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr);
}
+static void dw_hdma_v0_sync_ll_data(struct dw_edma_chunk *chunk)
+{
+ /*
+ * In case of remote HDMA engine setup, the DW PCIe RP/EP internal
+ * configuration registers and application memory are normally accessed
+ * over different buses. Ensure LL-data reaches the memory before the
+ * doorbell register is toggled by issuing the dummy-read from the remote
+ * LL memory in a hope that the MRd TLP will return only after the
+ * last MWr TLP is completed
+ */
+ if (!(chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL))
+ readl(chunk->ll_region.vaddr.io);
+}
+
static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
{
struct dw_edma_chan *chan = chunk->chan;
@@ -242,7 +250,9 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
/* Interrupt enable&unmask - done, abort */
tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup) |
HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK |
- HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_STOP_INT_EN;
+ HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_ABORT_INT_EN;
+ if (!(dw->chip->flags & DW_EDMA_CHIP_LOCAL))
+ tmp |= HDMA_V0_REMOTE_STOP_INT_EN | HDMA_V0_REMOTE_ABORT_INT_EN;
SET_CH_32(dw, chan->dir, chan->id, int_setup, tmp);
/* Channel control */
SET_CH_32(dw, chan->dir, chan->id, control1, HDMA_V0_LINKLIST_EN);
@@ -256,6 +266,9 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
/* Set consumer cycle */
SET_CH_32(dw, chan->dir, chan->id, cycle_sync,
HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT);
+
+ dw_hdma_v0_sync_ll_data(chunk);
+
/* Doorbell */
SET_CH_32(dw, chan->dir, chan->id, doorbell, HDMA_V0_DOORBELL_START);
}
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-regs.h b/drivers/dma/dw-edma/dw-hdma-v0-regs.h
index a974abdf8aaf..eab5fd7177e5 100644
--- a/drivers/dma/dw-edma/dw-hdma-v0-regs.h
+++ b/drivers/dma/dw-edma/dw-hdma-v0-regs.h
@@ -15,7 +15,7 @@
#define HDMA_V0_LOCAL_ABORT_INT_EN BIT(6)
#define HDMA_V0_REMOTE_ABORT_INT_EN BIT(5)
#define HDMA_V0_LOCAL_STOP_INT_EN BIT(4)
-#define HDMA_V0_REMOTEL_STOP_INT_EN BIT(3)
+#define HDMA_V0_REMOTE_STOP_INT_EN BIT(3)
#define HDMA_V0_ABORT_INT_MASK BIT(2)
#define HDMA_V0_STOP_INT_MASK BIT(0)
#define HDMA_V0_LINKLIST_EN BIT(0)
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
index 7958ac33e36c..5a8061a307cd 100644
--- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
+++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
@@ -38,15 +38,17 @@ static int dpaa2_qdma_alloc_chan_resources(struct dma_chan *chan)
if (!dpaa2_chan->fd_pool)
goto err;
- dpaa2_chan->fl_pool = dma_pool_create("fl_pool", dev,
- sizeof(struct dpaa2_fl_entry),
- sizeof(struct dpaa2_fl_entry), 0);
+ dpaa2_chan->fl_pool =
+ dma_pool_create("fl_pool", dev,
+ sizeof(struct dpaa2_fl_entry) * 3,
+ sizeof(struct dpaa2_fl_entry), 0);
+
if (!dpaa2_chan->fl_pool)
goto err_fd;
dpaa2_chan->sdd_pool =
dma_pool_create("sdd_pool", dev,
- sizeof(struct dpaa2_qdma_sd_d),
+ sizeof(struct dpaa2_qdma_sd_d) * 2,
sizeof(struct dpaa2_qdma_sd_d), 0);
if (!dpaa2_chan->sdd_pool)
goto err_fl;
diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c
index b53f46245c37..793f1a7ad5e3 100644
--- a/drivers/dma/fsl-edma-common.c
+++ b/drivers/dma/fsl-edma-common.c
@@ -503,7 +503,7 @@ void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan,
if (fsl_chan->is_multi_fifo) {
/* set mloff to support multiple fifo */
burst = cfg->direction == DMA_DEV_TO_MEM ?
- cfg->src_addr_width : cfg->dst_addr_width;
+ cfg->src_maxburst : cfg->dst_maxburst;
nbytes |= EDMA_V3_TCD_NBYTES_MLOFF(-(burst * 4));
/* enable DMLOE/SMLOE */
if (cfg->direction == DMA_MEM_TO_DEV) {
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index bb5221158a77..f5e216b157c7 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -30,8 +30,9 @@
#define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8)
#define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11)
-#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0))
-#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0))
+#define EDMA_TCD_ITER_MASK GENMASK(14, 0)
+#define EDMA_TCD_CITER_CITER(x) ((x) & EDMA_TCD_ITER_MASK)
+#define EDMA_TCD_BITER_BITER(x) ((x) & EDMA_TCD_ITER_MASK)
#define EDMA_TCD_CSR_START BIT(0)
#define EDMA_TCD_CSR_INT_MAJOR BIT(1)
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 45cc419b1b4a..d36e28b9c767 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -10,6 +10,7 @@
*/
#include <dt-bindings/dma/fsl-edma.h>
+#include <linux/bitfield.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/clk.h>
@@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev)
DMAENGINE_ALIGN_32_BYTES;
/* Per worst case 'nbytes = 1' take CITER as the max_seg_size */
- dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff);
+ dma_set_max_seg_size(fsl_edma->dma_dev.dev,
+ FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK));
fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c
index a1d0aa63142a..5005e138fc23 100644
--- a/drivers/dma/fsl-qdma.c
+++ b/drivers/dma/fsl-qdma.c
@@ -109,6 +109,7 @@
#define FSL_QDMA_CMD_WTHROTL_OFFSET 20
#define FSL_QDMA_CMD_DSEN_OFFSET 19
#define FSL_QDMA_CMD_LWC_OFFSET 16
+#define FSL_QDMA_CMD_PF BIT(17)
/* Field definition for Descriptor status */
#define QDMA_CCDF_STATUS_RTE BIT(5)
@@ -160,6 +161,10 @@ struct fsl_qdma_format {
u8 __reserved1[2];
u8 cfg8b_w1;
} __packed;
+ struct {
+ __le32 __reserved2;
+ __le32 cmd;
+ } __packed;
__le64 data;
};
} __packed;
@@ -354,7 +359,6 @@ static void fsl_qdma_free_chan_resources(struct dma_chan *chan)
static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp,
dma_addr_t dst, dma_addr_t src, u32 len)
{
- u32 cmd;
struct fsl_qdma_format *sdf, *ddf;
struct fsl_qdma_format *ccdf, *csgf_desc, *csgf_src, *csgf_dest;
@@ -383,14 +387,11 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp,
/* This entry is the last entry. */
qdma_csgf_set_f(csgf_dest, len);
/* Descriptor Buffer */
- cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE <<
- FSL_QDMA_CMD_RWTTYPE_OFFSET);
- sdf->data = QDMA_SDDF_CMD(cmd);
-
- cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE <<
- FSL_QDMA_CMD_RWTTYPE_OFFSET);
- cmd |= cpu_to_le32(FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET);
- ddf->data = QDMA_SDDF_CMD(cmd);
+ sdf->cmd = cpu_to_le32((FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET) |
+ FSL_QDMA_CMD_PF);
+
+ ddf->cmd = cpu_to_le32((FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET) |
+ (FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET));
}
/*
@@ -514,11 +515,11 @@ static struct fsl_qdma_queue
queue_temp = queue_head + i + (j * queue_num);
queue_temp->cq =
- dma_alloc_coherent(&pdev->dev,
- sizeof(struct fsl_qdma_format) *
- queue_size[i],
- &queue_temp->bus_addr,
- GFP_KERNEL);
+ dmam_alloc_coherent(&pdev->dev,
+ sizeof(struct fsl_qdma_format) *
+ queue_size[i],
+ &queue_temp->bus_addr,
+ GFP_KERNEL);
if (!queue_temp->cq)
return NULL;
queue_temp->block_base = fsl_qdma->block_base +
@@ -563,15 +564,14 @@ static struct fsl_qdma_queue
/*
* Buffer for queue command
*/
- status_head->cq = dma_alloc_coherent(&pdev->dev,
- sizeof(struct fsl_qdma_format) *
- status_size,
- &status_head->bus_addr,
- GFP_KERNEL);
- if (!status_head->cq) {
- devm_kfree(&pdev->dev, status_head);
+ status_head->cq = dmam_alloc_coherent(&pdev->dev,
+ sizeof(struct fsl_qdma_format) *
+ status_size,
+ &status_head->bus_addr,
+ GFP_KERNEL);
+ if (!status_head->cq)
return NULL;
- }
+
status_head->n_cq = status_size;
status_head->virt_head = status_head->cq;
status_head->virt_tail = status_head->cq;
@@ -625,7 +625,7 @@ static int fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma)
static int
fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma,
- void *block,
+ __iomem void *block,
int id)
{
bool duplicate;
@@ -1197,10 +1197,6 @@ static int fsl_qdma_probe(struct platform_device *pdev)
if (!fsl_qdma->queue)
return -ENOMEM;
- ret = fsl_qdma_irq_init(pdev, fsl_qdma);
- if (ret)
- return ret;
-
fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0");
if (fsl_qdma->irq_base < 0)
return fsl_qdma->irq_base;
@@ -1239,16 +1235,19 @@ static int fsl_qdma_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, fsl_qdma);
- ret = dma_async_device_register(&fsl_qdma->dma_dev);
+ ret = fsl_qdma_reg_init(fsl_qdma);
if (ret) {
- dev_err(&pdev->dev,
- "Can't register NXP Layerscape qDMA engine.\n");
+ dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n");
return ret;
}
- ret = fsl_qdma_reg_init(fsl_qdma);
+ ret = fsl_qdma_irq_init(pdev, fsl_qdma);
+ if (ret)
+ return ret;
+
+ ret = dma_async_device_register(&fsl_qdma->dma_dev);
if (ret) {
- dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n");
+ dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n");
return ret;
}
@@ -1268,8 +1267,6 @@ static void fsl_qdma_cleanup_vchan(struct dma_device *dmadev)
static void fsl_qdma_remove(struct platform_device *pdev)
{
- int i;
- struct fsl_qdma_queue *status;
struct device_node *np = pdev->dev.of_node;
struct fsl_qdma_engine *fsl_qdma = platform_get_drvdata(pdev);
@@ -1277,12 +1274,6 @@ static void fsl_qdma_remove(struct platform_device *pdev)
fsl_qdma_cleanup_vchan(&fsl_qdma->dma_dev);
of_dma_controller_free(np);
dma_async_device_unregister(&fsl_qdma->dma_dev);
-
- for (i = 0; i < fsl_qdma->block_number; i++) {
- status = fsl_qdma->status[i];
- dma_free_coherent(&pdev->dev, sizeof(struct fsl_qdma_format) *
- status->n_cq, status->cq, status->bus_addr);
- }
}
static const struct of_device_id fsl_qdma_dt_ids[] = {
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 77f8885cf407..e5a94a93a3cc 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -345,7 +345,7 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid)
spin_lock(&evl->lock);
status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
t = status.tail;
- h = evl->head;
+ h = status.head;
size = evl->size;
while (h != t) {
diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c
index 9cfbd9b14c4c..f3f25ee676f3 100644
--- a/drivers/dma/idxd/debugfs.c
+++ b/drivers/dma/idxd/debugfs.c
@@ -68,9 +68,9 @@ static int debugfs_evl_show(struct seq_file *s, void *d)
spin_lock(&evl->lock);
- h = evl->head;
evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
t = evl_status.tail;
+ h = evl_status.head;
evl_size = evl->size;
seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n",
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 47de3f93ff1e..d0f5db6cf1ed 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -300,7 +300,6 @@ struct idxd_evl {
unsigned int log_size;
/* The number of entries in the event log. */
u16 size;
- u16 head;
unsigned long *bmap;
bool batch_fail[IDXD_MAX_BATCH_IDENT];
};
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 14df1f1347a8..4954adc6bb60 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -343,7 +343,9 @@ static void idxd_cleanup_internals(struct idxd_device *idxd)
static int idxd_init_evl(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
+ unsigned int evl_cache_size;
struct idxd_evl *evl;
+ const char *idxd_name;
if (idxd->hw.gen_cap.evl_support == 0)
return 0;
@@ -355,9 +357,16 @@ static int idxd_init_evl(struct idxd_device *idxd)
spin_lock_init(&evl->lock);
evl->size = IDXD_EVL_SIZE_MIN;
- idxd->evl_cache = kmem_cache_create(dev_name(idxd_confdev(idxd)),
- sizeof(struct idxd_evl_fault) + evl_ent_size(idxd),
- 0, 0, NULL);
+ idxd_name = dev_name(idxd_confdev(idxd));
+ evl_cache_size = sizeof(struct idxd_evl_fault) + evl_ent_size(idxd);
+ /*
+ * Since completion record in evl_cache will be copied to user
+ * when handling completion record page fault, need to create
+ * the cache suitable for user copy.
+ */
+ idxd->evl_cache = kmem_cache_create_usercopy(idxd_name, evl_cache_size,
+ 0, 0, 0, evl_cache_size,
+ NULL);
if (!idxd->evl_cache) {
kfree(evl);
return -ENOMEM;
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index c8a0aa874b11..348aa21389a9 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -367,9 +367,9 @@ static void process_evl_entries(struct idxd_device *idxd)
/* Clear interrupt pending bit */
iowrite32(evl_status.bits_upper32,
idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32));
- h = evl->head;
evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
t = evl_status.tail;
+ h = evl_status.head;
size = idxd->evl->size;
while (h != t) {
@@ -378,7 +378,6 @@ static void process_evl_entries(struct idxd_device *idxd)
h = (h + 1) % size;
}
- evl->head = h;
evl_status.head = h;
iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
spin_unlock(&evl->lock);
diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
index 1ebfbe88e733..97ebc791a30b 100644
--- a/drivers/dma/mv_xor_v2.c
+++ b/drivers/dma/mv_xor_v2.c
@@ -747,8 +747,8 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
if (IS_ERR(xor_dev->clk))
return PTR_ERR(xor_dev->clk);
- ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1,
- mv_xor_v2_set_msi_msg);
+ ret = platform_device_msi_init_and_alloc_irqs(&pdev->dev, 1,
+ mv_xor_v2_set_msi_msg);
if (ret)
return ret;
@@ -851,7 +851,7 @@ free_hw_desq:
xor_dev->desc_size * MV_XOR_V2_DESC_NUM,
xor_dev->hw_desq_virt, xor_dev->hw_desq);
free_msi_irqs:
- platform_msi_domain_free_irqs(&pdev->dev);
+ platform_device_msi_free_irqs_all(&pdev->dev);
return ret;
}
@@ -867,7 +867,7 @@ static void mv_xor_v2_remove(struct platform_device *pdev)
devm_free_irq(&pdev->dev, xor_dev->irq, xor_dev);
- platform_msi_domain_free_irqs(&pdev->dev);
+ platform_device_msi_free_irqs_all(&pdev->dev);
tasklet_kill(&xor_dev->irq_tasklet);
}
diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c
index 1aa65e5de0f3..f79240734807 100644
--- a/drivers/dma/ptdma/ptdma-dmaengine.c
+++ b/drivers/dma/ptdma/ptdma-dmaengine.c
@@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt)
chan->vc.desc_free = pt_do_cleanup;
vchan_init(&chan->vc, dma_dev);
- dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64));
-
ret = dma_async_device_register(dma_dev);
if (ret)
goto err_reg;
diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c
index d63b93dc7047..202ac95227cb 100644
--- a/drivers/dma/qcom/hidma.c
+++ b/drivers/dma/qcom/hidma.c
@@ -696,7 +696,7 @@ static void hidma_free_msis(struct hidma_dev *dmadev)
devm_free_irq(dev, virq, &dmadev->lldev);
}
- platform_msi_domain_free_irqs(dev);
+ platform_device_msi_free_irqs_all(dev);
#endif
}
@@ -706,8 +706,8 @@ static int hidma_request_msi(struct hidma_dev *dmadev,
#ifdef CONFIG_GENERIC_MSI_IRQ
int rc, i, virq;
- rc = platform_msi_domain_alloc_irqs(&pdev->dev, HIDMA_MSI_INTS,
- hidma_write_msi_msg);
+ rc = platform_device_msi_init_and_alloc_irqs(&pdev->dev, HIDMA_MSI_INTS,
+ hidma_write_msi_msg);
if (rc)
return rc;
diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c
index f1f920861fa9..5f8d2e93ff3f 100644
--- a/drivers/dma/ti/edma.c
+++ b/drivers/dma/ti/edma.c
@@ -2404,6 +2404,11 @@ static int edma_probe(struct platform_device *pdev)
if (irq > 0) {
irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint",
dev_name(dev));
+ if (!irq_name) {
+ ret = -ENOMEM;
+ goto err_disable_pm;
+ }
+
ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name,
ecc);
if (ret) {
@@ -2420,6 +2425,11 @@ static int edma_probe(struct platform_device *pdev)
if (irq > 0) {
irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint",
dev_name(dev));
+ if (!irq_name) {
+ ret = -ENOMEM;
+ goto err_disable_pm;
+ }
+
ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name,
ecc);
if (ret) {
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index 2841a539c264..6400d06588a2 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -3968,6 +3968,7 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc,
{
struct udma_chan *uc = to_udma_chan(&vc->chan);
struct udma_desc *d;
+ u8 status;
if (!vd)
return;
@@ -3977,12 +3978,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc,
if (d->metadata_size)
udma_fetch_epib(uc, d);
- /* Provide residue information for the client */
if (result) {
void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx);
if (cppi5_desc_get_type(desc_vaddr) ==
CPPI5_INFO0_DESC_TYPE_VAL_HOST) {
+ /* Provide residue information for the client */
result->residue = d->residue -
cppi5_hdesc_get_pktlen(desc_vaddr);
if (result->residue)
@@ -3991,7 +3992,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc,
result->result = DMA_TRANS_NOERROR;
} else {
result->residue = 0;
- result->result = DMA_TRANS_NOERROR;
+ /* Propagate TR Response errors to the client */
+ status = d->hwdesc[0].tr_resp_base->status;
+ if (status)
+ result->result = DMA_TRANS_ABORTED;
+ else
+ result->result = DMA_TRANS_NOERROR;
}
}
}
diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
index 1eca8cc271f8..64eaca80d736 100644
--- a/drivers/dpll/dpll_core.c
+++ b/drivers/dpll/dpll_core.c
@@ -29,7 +29,7 @@ static u32 dpll_pin_xa_id;
WARN_ON_ONCE(!xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED))
#define ASSERT_DPLL_NOT_REGISTERED(d) \
WARN_ON_ONCE(xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED))
-#define ASSERT_PIN_REGISTERED(p) \
+#define ASSERT_DPLL_PIN_REGISTERED(p) \
WARN_ON_ONCE(!xa_get_mark(&dpll_pin_xa, (p)->id, DPLL_REGISTERED))
struct dpll_device_registration {
@@ -131,9 +131,9 @@ static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin,
reg = dpll_pin_registration_find(ref, ops, priv);
if (WARN_ON(!reg))
return -EINVAL;
+ list_del(&reg->list);
+ kfree(reg);
if (refcount_dec_and_test(&ref->refcount)) {
- list_del(&reg->list);
- kfree(reg);
xa_erase(xa_pins, i);
WARN_ON(!list_empty(&ref->registration_list));
kfree(ref);
@@ -211,9 +211,9 @@ dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll,
reg = dpll_pin_registration_find(ref, ops, priv);
if (WARN_ON(!reg))
return;
+ list_del(&reg->list);
+ kfree(reg);
if (refcount_dec_and_test(&ref->refcount)) {
- list_del(&reg->list);
- kfree(reg);
xa_erase(xa_dplls, i);
WARN_ON(!list_empty(&ref->registration_list));
kfree(ref);
@@ -425,6 +425,53 @@ void dpll_device_unregister(struct dpll_device *dpll,
}
EXPORT_SYMBOL_GPL(dpll_device_unregister);
+static void dpll_pin_prop_free(struct dpll_pin_properties *prop)
+{
+ kfree(prop->package_label);
+ kfree(prop->panel_label);
+ kfree(prop->board_label);
+ kfree(prop->freq_supported);
+}
+
+static int dpll_pin_prop_dup(const struct dpll_pin_properties *src,
+ struct dpll_pin_properties *dst)
+{
+ memcpy(dst, src, sizeof(*dst));
+ if (src->freq_supported && src->freq_supported_num) {
+ size_t freq_size = src->freq_supported_num *
+ sizeof(*src->freq_supported);
+ dst->freq_supported = kmemdup(src->freq_supported,
+ freq_size, GFP_KERNEL);
+ if (!src->freq_supported)
+ return -ENOMEM;
+ }
+ if (src->board_label) {
+ dst->board_label = kstrdup(src->board_label, GFP_KERNEL);
+ if (!dst->board_label)
+ goto err_board_label;
+ }
+ if (src->panel_label) {
+ dst->panel_label = kstrdup(src->panel_label, GFP_KERNEL);
+ if (!dst->panel_label)
+ goto err_panel_label;
+ }
+ if (src->package_label) {
+ dst->package_label = kstrdup(src->package_label, GFP_KERNEL);
+ if (!dst->package_label)
+ goto err_package_label;
+ }
+
+ return 0;
+
+err_package_label:
+ kfree(dst->panel_label);
+err_panel_label:
+ kfree(dst->board_label);
+err_board_label:
+ kfree(dst->freq_supported);
+ return -ENOMEM;
+}
+
static struct dpll_pin *
dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module,
const struct dpll_pin_properties *prop)
@@ -441,24 +488,48 @@ dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module,
if (WARN_ON(prop->type < DPLL_PIN_TYPE_MUX ||
prop->type > DPLL_PIN_TYPE_MAX)) {
ret = -EINVAL;
- goto err;
+ goto err_pin_prop;
}
- pin->prop = prop;
+ ret = dpll_pin_prop_dup(prop, &pin->prop);
+ if (ret)
+ goto err_pin_prop;
refcount_set(&pin->refcount, 1);
xa_init_flags(&pin->dpll_refs, XA_FLAGS_ALLOC);
xa_init_flags(&pin->parent_refs, XA_FLAGS_ALLOC);
ret = xa_alloc_cyclic(&dpll_pin_xa, &pin->id, pin, xa_limit_32b,
&dpll_pin_xa_id, GFP_KERNEL);
if (ret)
- goto err;
+ goto err_xa_alloc;
return pin;
-err:
+err_xa_alloc:
xa_destroy(&pin->dpll_refs);
xa_destroy(&pin->parent_refs);
+ dpll_pin_prop_free(&pin->prop);
+err_pin_prop:
kfree(pin);
return ERR_PTR(ret);
}
+static void dpll_netdev_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin)
+{
+ rtnl_lock();
+ rcu_assign_pointer(dev->dpll_pin, dpll_pin);
+ rtnl_unlock();
+}
+
+void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)
+{
+ WARN_ON(!dpll_pin);
+ dpll_netdev_pin_assign(dev, dpll_pin);
+}
+EXPORT_SYMBOL(dpll_netdev_pin_set);
+
+void dpll_netdev_pin_clear(struct net_device *dev)
+{
+ dpll_netdev_pin_assign(dev, NULL);
+}
+EXPORT_SYMBOL(dpll_netdev_pin_clear);
+
/**
* dpll_pin_get - find existing or create new dpll pin
* @clock_id: clock_id of creator
@@ -511,10 +582,11 @@ void dpll_pin_put(struct dpll_pin *pin)
{
mutex_lock(&dpll_lock);
if (refcount_dec_and_test(&pin->refcount)) {
+ xa_erase(&dpll_pin_xa, pin->id);
xa_destroy(&pin->dpll_refs);
xa_destroy(&pin->parent_refs);
- xa_erase(&dpll_pin_xa, pin->id);
- kfree(pin);
+ dpll_pin_prop_free(&pin->prop);
+ kfree_rcu(pin, rcu);
}
mutex_unlock(&dpll_lock);
}
@@ -564,8 +636,6 @@ dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
WARN_ON(!ops->state_on_dpll_get) ||
WARN_ON(!ops->direction_get))
return -EINVAL;
- if (ASSERT_DPLL_REGISTERED(dpll))
- return -EINVAL;
mutex_lock(&dpll_lock);
if (WARN_ON(!(dpll->module == pin->module &&
@@ -583,6 +653,7 @@ static void
__dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
const struct dpll_pin_ops *ops, void *priv)
{
+ ASSERT_DPLL_PIN_REGISTERED(pin);
dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv);
dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv);
if (xa_empty(&pin->dpll_refs))
@@ -636,15 +707,13 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
unsigned long i, stop;
int ret;
- if (WARN_ON(parent->prop->type != DPLL_PIN_TYPE_MUX))
+ if (WARN_ON(parent->prop.type != DPLL_PIN_TYPE_MUX))
return -EINVAL;
if (WARN_ON(!ops) ||
WARN_ON(!ops->state_on_pin_get) ||
WARN_ON(!ops->direction_get))
return -EINVAL;
- if (ASSERT_PIN_REGISTERED(parent))
- return -EINVAL;
mutex_lock(&dpll_lock);
ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv);
diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h
index 5585873c5c1b..2b6d8ef1cdf3 100644
--- a/drivers/dpll/dpll_core.h
+++ b/drivers/dpll/dpll_core.h
@@ -44,9 +44,10 @@ struct dpll_device {
* @module: module of creator
* @dpll_refs: hold referencees to dplls pin was registered with
* @parent_refs: hold references to parent pins pin was registered with
- * @prop: pointer to pin properties given by registerer
+ * @prop: pin properties copied from the registerer
* @rclk_dev_name: holds name of device when pin can recover clock from it
* @refcount: refcount
+ * @rcu: rcu_head for kfree_rcu()
**/
struct dpll_pin {
u32 id;
@@ -55,8 +56,9 @@ struct dpll_pin {
struct module *module;
struct xarray dpll_refs;
struct xarray parent_refs;
- const struct dpll_pin_properties *prop;
+ struct dpll_pin_properties prop;
refcount_t refcount;
+ struct rcu_head rcu;
};
/**
diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
index 3370dbddb86b..98e6ad8528d3 100644
--- a/drivers/dpll/dpll_netlink.c
+++ b/drivers/dpll/dpll_netlink.c
@@ -8,6 +8,7 @@
*/
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/netdevice.h>
#include <net/genetlink.h>
#include "dpll_core.h"
#include "dpll_netlink.h"
@@ -48,18 +49,6 @@ dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id)
}
/**
- * dpll_msg_pin_handle_size - get size of pin handle attribute for given pin
- * @pin: pin pointer
- *
- * Return: byte size of pin handle attribute for given pin.
- */
-size_t dpll_msg_pin_handle_size(struct dpll_pin *pin)
-{
- return pin ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */
-}
-EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size);
-
-/**
* dpll_msg_add_pin_handle - attach pin handle attribute to a given message
* @msg: pointer to sk_buff message to attach a pin handle
* @pin: pin pointer
@@ -68,7 +57,7 @@ EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size);
* * 0 - success
* * -EMSGSIZE - no space in message to attach pin handle
*/
-int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin)
+static int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin)
{
if (!pin)
return 0;
@@ -76,7 +65,28 @@ int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin)
return -EMSGSIZE;
return 0;
}
-EXPORT_SYMBOL_GPL(dpll_msg_add_pin_handle);
+
+static struct dpll_pin *dpll_netdev_pin(const struct net_device *dev)
+{
+ return rcu_dereference_rtnl(dev->dpll_pin);
+}
+
+/**
+ * dpll_netdev_pin_handle_size - get size of pin handle attribute of a netdev
+ * @dev: netdev from which to get the pin
+ *
+ * Return: byte size of pin handle attribute, or 0 if @dev has no pin.
+ */
+size_t dpll_netdev_pin_handle_size(const struct net_device *dev)
+{
+ return dpll_netdev_pin(dev) ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */
+}
+
+int dpll_netdev_add_pin_handle(struct sk_buff *msg,
+ const struct net_device *dev)
+{
+ return dpll_msg_add_pin_handle(msg, dpll_netdev_pin(dev));
+}
static int
dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll,
@@ -121,14 +131,21 @@ dpll_msg_add_lock_status(struct sk_buff *msg, struct dpll_device *dpll,
struct netlink_ext_ack *extack)
{
const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+ enum dpll_lock_status_error status_error = 0;
enum dpll_lock_status status;
int ret;
- ret = ops->lock_status_get(dpll, dpll_priv(dpll), &status, extack);
+ ret = ops->lock_status_get(dpll, dpll_priv(dpll), &status,
+ &status_error, extack);
if (ret)
return ret;
if (nla_put_u32(msg, DPLL_A_LOCK_STATUS, status))
return -EMSGSIZE;
+ if (status_error &&
+ (status == DPLL_LOCK_STATUS_UNLOCKED ||
+ status == DPLL_LOCK_STATUS_HOLDOVER) &&
+ nla_put_u32(msg, DPLL_A_LOCK_STATUS_ERROR, status_error))
+ return -EMSGSIZE;
return 0;
}
@@ -303,17 +320,17 @@ dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin,
if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY, sizeof(freq), &freq,
DPLL_A_PIN_PAD))
return -EMSGSIZE;
- for (fs = 0; fs < pin->prop->freq_supported_num; fs++) {
+ for (fs = 0; fs < pin->prop.freq_supported_num; fs++) {
nest = nla_nest_start(msg, DPLL_A_PIN_FREQUENCY_SUPPORTED);
if (!nest)
return -EMSGSIZE;
- freq = pin->prop->freq_supported[fs].min;
+ freq = pin->prop.freq_supported[fs].min;
if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MIN, sizeof(freq),
&freq, DPLL_A_PIN_PAD)) {
nla_nest_cancel(msg, nest);
return -EMSGSIZE;
}
- freq = pin->prop->freq_supported[fs].max;
+ freq = pin->prop.freq_supported[fs].max;
if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MAX, sizeof(freq),
&freq, DPLL_A_PIN_PAD)) {
nla_nest_cancel(msg, nest);
@@ -329,9 +346,9 @@ static bool dpll_pin_is_freq_supported(struct dpll_pin *pin, u32 freq)
{
int fs;
- for (fs = 0; fs < pin->prop->freq_supported_num; fs++)
- if (freq >= pin->prop->freq_supported[fs].min &&
- freq <= pin->prop->freq_supported[fs].max)
+ for (fs = 0; fs < pin->prop.freq_supported_num; fs++)
+ if (freq >= pin->prop.freq_supported[fs].min &&
+ freq <= pin->prop.freq_supported[fs].max)
return true;
return false;
}
@@ -421,7 +438,7 @@ static int
dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin,
struct netlink_ext_ack *extack)
{
- const struct dpll_pin_properties *prop = pin->prop;
+ const struct dpll_pin_properties *prop = &pin->prop;
struct dpll_pin_ref *ref;
int ret;
@@ -553,6 +570,24 @@ __dpll_device_change_ntf(struct dpll_device *dpll)
return dpll_device_event_send(DPLL_CMD_DEVICE_CHANGE_NTF, dpll);
}
+static bool dpll_pin_available(struct dpll_pin *pin)
+{
+ struct dpll_pin_ref *par_ref;
+ unsigned long i;
+
+ if (!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED))
+ return false;
+ xa_for_each(&pin->parent_refs, i, par_ref)
+ if (xa_get_mark(&dpll_pin_xa, par_ref->pin->id,
+ DPLL_REGISTERED))
+ return true;
+ xa_for_each(&pin->dpll_refs, i, par_ref)
+ if (xa_get_mark(&dpll_device_xa, par_ref->dpll->id,
+ DPLL_REGISTERED))
+ return true;
+ return false;
+}
+
/**
* dpll_device_change_ntf - notify that the dpll device has been changed
* @dpll: registered dpll pointer
@@ -579,7 +614,7 @@ dpll_pin_event_send(enum dpll_cmd event, struct dpll_pin *pin)
int ret = -ENOMEM;
void *hdr;
- if (WARN_ON(!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED)))
+ if (!dpll_pin_available(pin))
return -ENODEV;
msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
@@ -717,7 +752,7 @@ dpll_pin_on_pin_state_set(struct dpll_pin *pin, u32 parent_idx,
int ret;
if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE &
- pin->prop->capabilities)) {
+ pin->prop.capabilities)) {
NL_SET_ERR_MSG(extack, "state changing is not allowed");
return -EOPNOTSUPP;
}
@@ -753,7 +788,7 @@ dpll_pin_state_set(struct dpll_device *dpll, struct dpll_pin *pin,
int ret;
if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE &
- pin->prop->capabilities)) {
+ pin->prop.capabilities)) {
NL_SET_ERR_MSG(extack, "state changing is not allowed");
return -EOPNOTSUPP;
}
@@ -780,7 +815,7 @@ dpll_pin_prio_set(struct dpll_device *dpll, struct dpll_pin *pin,
int ret;
if (!(DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE &
- pin->prop->capabilities)) {
+ pin->prop.capabilities)) {
NL_SET_ERR_MSG(extack, "prio changing is not allowed");
return -EOPNOTSUPP;
}
@@ -808,7 +843,7 @@ dpll_pin_direction_set(struct dpll_pin *pin, struct dpll_device *dpll,
int ret;
if (!(DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE &
- pin->prop->capabilities)) {
+ pin->prop.capabilities)) {
NL_SET_ERR_MSG(extack, "direction changing is not allowed");
return -EOPNOTSUPP;
}
@@ -838,8 +873,8 @@ dpll_pin_phase_adj_set(struct dpll_pin *pin, struct nlattr *phase_adj_attr,
int ret;
phase_adj = nla_get_s32(phase_adj_attr);
- if (phase_adj > pin->prop->phase_range.max ||
- phase_adj < pin->prop->phase_range.min) {
+ if (phase_adj > pin->prop.phase_range.max ||
+ phase_adj < pin->prop.phase_range.min) {
NL_SET_ERR_MSG_ATTR(extack, phase_adj_attr,
"phase adjust value not supported");
return -EINVAL;
@@ -1023,7 +1058,7 @@ dpll_pin_find(u64 clock_id, struct nlattr *mod_name_attr,
unsigned long i;
xa_for_each_marked(&dpll_pin_xa, i, pin, DPLL_REGISTERED) {
- prop = pin->prop;
+ prop = &pin->prop;
cid_match = clock_id ? pin->clock_id == clock_id : true;
mod_match = mod_name_attr && module_name(pin->module) ?
!nla_strcmp(mod_name_attr,
@@ -1130,6 +1165,10 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info)
}
pin = dpll_pin_find_from_nlattr(info);
if (!IS_ERR(pin)) {
+ if (!dpll_pin_available(pin)) {
+ nlmsg_free(msg);
+ return -ENODEV;
+ }
ret = dpll_msg_add_pin_handle(msg, pin);
if (ret) {
nlmsg_free(msg);
@@ -1177,8 +1216,11 @@ int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
unsigned long i;
int ret = 0;
+ mutex_lock(&dpll_lock);
xa_for_each_marked_start(&dpll_pin_xa, i, pin, DPLL_REGISTERED,
ctx->idx) {
+ if (!dpll_pin_available(pin))
+ continue;
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
&dpll_nl_family, NLM_F_MULTI,
@@ -1194,6 +1236,8 @@ int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
}
genlmsg_end(skb, hdr);
}
+ mutex_unlock(&dpll_lock);
+
if (ret == -EMSGSIZE) {
ctx->idx = i;
return skb->len;
@@ -1349,6 +1393,7 @@ int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
unsigned long i;
int ret = 0;
+ mutex_lock(&dpll_lock);
xa_for_each_marked_start(&dpll_device_xa, i, dpll, DPLL_REGISTERED,
ctx->idx) {
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
@@ -1365,6 +1410,8 @@ int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
}
genlmsg_end(skb, hdr);
}
+ mutex_unlock(&dpll_lock);
+
if (ret == -EMSGSIZE) {
ctx->idx = i;
return skb->len;
@@ -1415,20 +1462,6 @@ dpll_unlock_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
mutex_unlock(&dpll_lock);
}
-int dpll_lock_dumpit(struct netlink_callback *cb)
-{
- mutex_lock(&dpll_lock);
-
- return 0;
-}
-
-int dpll_unlock_dumpit(struct netlink_callback *cb)
-{
- mutex_unlock(&dpll_lock);
-
- return 0;
-}
-
int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info)
{
@@ -1441,7 +1474,8 @@ int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
}
info->user_ptr[0] = xa_load(&dpll_pin_xa,
nla_get_u32(info->attrs[DPLL_A_PIN_ID]));
- if (!info->user_ptr[0]) {
+ if (!info->user_ptr[0] ||
+ !dpll_pin_available(info->user_ptr[0])) {
NL_SET_ERR_MSG(info->extack, "pin not found");
ret = -ENODEV;
goto unlock_dev;
diff --git a/drivers/dpll/dpll_nl.c b/drivers/dpll/dpll_nl.c
index eaee5be7aa64..1e95f5397cfc 100644
--- a/drivers/dpll/dpll_nl.c
+++ b/drivers/dpll/dpll_nl.c
@@ -95,9 +95,7 @@ static const struct genl_split_ops dpll_nl_ops[] = {
},
{
.cmd = DPLL_CMD_DEVICE_GET,
- .start = dpll_lock_dumpit,
.dumpit = dpll_nl_device_get_dumpit,
- .done = dpll_unlock_dumpit,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
},
{
@@ -129,9 +127,7 @@ static const struct genl_split_ops dpll_nl_ops[] = {
},
{
.cmd = DPLL_CMD_PIN_GET,
- .start = dpll_lock_dumpit,
.dumpit = dpll_nl_pin_get_dumpit,
- .done = dpll_unlock_dumpit,
.policy = dpll_pin_get_dump_nl_policy,
.maxattr = DPLL_A_PIN_ID,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
diff --git a/drivers/dpll/dpll_nl.h b/drivers/dpll/dpll_nl.h
index 92d4c9c4f788..f491262bee4f 100644
--- a/drivers/dpll/dpll_nl.h
+++ b/drivers/dpll/dpll_nl.h
@@ -30,8 +30,6 @@ dpll_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
void
dpll_pin_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info);
-int dpll_lock_dumpit(struct netlink_callback *cb);
-int dpll_unlock_dumpit(struct netlink_callback *cb);
int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info);
int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info);
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 5a7f3fabee22..16c8de5050e5 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -78,6 +78,7 @@ config EDAC_GHES
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64)"
depends on AMD_NB && EDAC_DECODE_MCE
+ imply AMD_ATL
help
Support for error detection and correction of DRAM ECC errors on
the AMD64 families (>= K8) of memory controllers.
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 537b9987a431..1f3520d76861 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/ras.h>
#include "amd64_edac.h"
#include <asm/amd_nb.h>
@@ -1051,281 +1052,6 @@ static int fixup_node_id(int node_id, struct mce *m)
return nid - gpu_node_map.base_node_id + 1;
}
-/* Protect the PCI config register pairs used for DF indirect access. */
-static DEFINE_MUTEX(df_indirect_mutex);
-
-/*
- * Data Fabric Indirect Access uses FICAA/FICAD.
- *
- * Fabric Indirect Configuration Access Address (FICAA): Constructed based
- * on the device's Instance Id and the PCI function and register offset of
- * the desired register.
- *
- * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO
- * and FICAD HI registers but so far we only need the LO register.
- *
- * Use Instance Id 0xFF to indicate a broadcast read.
- */
-#define DF_BROADCAST 0xFF
-static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
-{
- struct pci_dev *F4;
- u32 ficaa;
- int err = -ENODEV;
-
- if (node >= amd_nb_num())
- goto out;
-
- F4 = node_to_amd_nb(node)->link;
- if (!F4)
- goto out;
-
- ficaa = (instance_id == DF_BROADCAST) ? 0 : 1;
- ficaa |= reg & 0x3FC;
- ficaa |= (func & 0x7) << 11;
- ficaa |= instance_id << 16;
-
- mutex_lock(&df_indirect_mutex);
-
- err = pci_write_config_dword(F4, 0x5C, ficaa);
- if (err) {
- pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa);
- goto out_unlock;
- }
-
- err = pci_read_config_dword(F4, 0x98, lo);
- if (err)
- pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa);
-
-out_unlock:
- mutex_unlock(&df_indirect_mutex);
-
-out:
- return err;
-}
-
-static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
-{
- return __df_indirect_read(node, func, reg, instance_id, lo);
-}
-
-static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo)
-{
- return __df_indirect_read(node, func, reg, DF_BROADCAST, lo);
-}
-
-struct addr_ctx {
- u64 ret_addr;
- u32 tmp;
- u16 nid;
- u8 inst_id;
-};
-
-static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
-{
- u64 dram_base_addr, dram_limit_addr, dram_hole_base;
-
- u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
- u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
- u8 intlv_addr_sel, intlv_addr_bit;
- u8 num_intlv_bits, hashed_bit;
- u8 lgcy_mmio_hole_en, base = 0;
- u8 cs_mask, cs_id = 0;
- bool hash_enabled = false;
-
- struct addr_ctx ctx;
-
- memset(&ctx, 0, sizeof(ctx));
-
- /* Start from the normalized address */
- ctx.ret_addr = norm_addr;
-
- ctx.nid = nid;
- ctx.inst_id = umc;
-
- /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
- if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp))
- goto out_err;
-
- /* Remove HiAddrOffset from normalized address, if enabled: */
- if (ctx.tmp & BIT(0)) {
- u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8;
-
- if (norm_addr >= hi_addr_offset) {
- ctx.ret_addr -= hi_addr_offset;
- base = 1;
- }
- }
-
- /* Read D18F0x110 (DramBaseAddress). */
- if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp))
- goto out_err;
-
- /* Check if address range is valid. */
- if (!(ctx.tmp & BIT(0))) {
- pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n",
- __func__, ctx.tmp);
- goto out_err;
- }
-
- lgcy_mmio_hole_en = ctx.tmp & BIT(1);
- intlv_num_chan = (ctx.tmp >> 4) & 0xF;
- intlv_addr_sel = (ctx.tmp >> 8) & 0x7;
- dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16;
-
- /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
- if (intlv_addr_sel > 3) {
- pr_err("%s: Invalid interleave address select %d.\n",
- __func__, intlv_addr_sel);
- goto out_err;
- }
-
- /* Read D18F0x114 (DramLimitAddress). */
- if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp))
- goto out_err;
-
- intlv_num_sockets = (ctx.tmp >> 8) & 0x1;
- intlv_num_dies = (ctx.tmp >> 10) & 0x3;
- dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
-
- intlv_addr_bit = intlv_addr_sel + 8;
-
- /* Re-use intlv_num_chan by setting it equal to log2(#channels) */
- switch (intlv_num_chan) {
- case 0: intlv_num_chan = 0; break;
- case 1: intlv_num_chan = 1; break;
- case 3: intlv_num_chan = 2; break;
- case 5: intlv_num_chan = 3; break;
- case 7: intlv_num_chan = 4; break;
-
- case 8: intlv_num_chan = 1;
- hash_enabled = true;
- break;
- default:
- pr_err("%s: Invalid number of interleaved channels %d.\n",
- __func__, intlv_num_chan);
- goto out_err;
- }
-
- num_intlv_bits = intlv_num_chan;
-
- if (intlv_num_dies > 2) {
- pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
- __func__, intlv_num_dies);
- goto out_err;
- }
-
- num_intlv_bits += intlv_num_dies;
-
- /* Add a bit if sockets are interleaved. */
- num_intlv_bits += intlv_num_sockets;
-
- /* Assert num_intlv_bits <= 4 */
- if (num_intlv_bits > 4) {
- pr_err("%s: Invalid interleave bits %d.\n",
- __func__, num_intlv_bits);
- goto out_err;
- }
-
- if (num_intlv_bits > 0) {
- u64 temp_addr_x, temp_addr_i, temp_addr_y;
- u8 die_id_bit, sock_id_bit, cs_fabric_id;
-
- /*
- * Read FabricBlockInstanceInformation3_CS[BlockFabricID].
- * This is the fabric id for this coherent slave. Use
- * umc/channel# as instance id of the coherent slave
- * for FICAA.
- */
- if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp))
- goto out_err;
-
- cs_fabric_id = (ctx.tmp >> 8) & 0xFF;
- die_id_bit = 0;
-
- /* If interleaved over more than 1 channel: */
- if (intlv_num_chan) {
- die_id_bit = intlv_num_chan;
- cs_mask = (1 << die_id_bit) - 1;
- cs_id = cs_fabric_id & cs_mask;
- }
-
- sock_id_bit = die_id_bit;
-
- /* Read D18F1x208 (SystemFabricIdMask). */
- if (intlv_num_dies || intlv_num_sockets)
- if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp))
- goto out_err;
-
- /* If interleaved over more than 1 die. */
- if (intlv_num_dies) {
- sock_id_bit = die_id_bit + intlv_num_dies;
- die_id_shift = (ctx.tmp >> 24) & 0xF;
- die_id_mask = (ctx.tmp >> 8) & 0xFF;
-
- cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
- }
-
- /* If interleaved over more than 1 socket. */
- if (intlv_num_sockets) {
- socket_id_shift = (ctx.tmp >> 28) & 0xF;
- socket_id_mask = (ctx.tmp >> 16) & 0xFF;
-
- cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
- }
-
- /*
- * The pre-interleaved address consists of XXXXXXIIIYYYYY
- * where III is the ID for this CS, and XXXXXXYYYYY are the
- * address bits from the post-interleaved address.
- * "num_intlv_bits" has been calculated to tell us how many "I"
- * bits there are. "intlv_addr_bit" tells us how many "Y" bits
- * there are (where "I" starts).
- */
- temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0);
- temp_addr_i = (cs_id << intlv_addr_bit);
- temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
- ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y;
- }
-
- /* Add dram base address */
- ctx.ret_addr += dram_base_addr;
-
- /* If legacy MMIO hole enabled */
- if (lgcy_mmio_hole_en) {
- if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp))
- goto out_err;
-
- dram_hole_base = ctx.tmp & GENMASK(31, 24);
- if (ctx.ret_addr >= dram_hole_base)
- ctx.ret_addr += (BIT_ULL(32) - dram_hole_base);
- }
-
- if (hash_enabled) {
- /* Save some parentheses and grab ls-bit at the end. */
- hashed_bit = (ctx.ret_addr >> 12) ^
- (ctx.ret_addr >> 18) ^
- (ctx.ret_addr >> 21) ^
- (ctx.ret_addr >> 30) ^
- cs_id;
-
- hashed_bit &= BIT(0);
-
- if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0)))
- ctx.ret_addr ^= BIT(intlv_addr_bit);
- }
-
- /* Is calculated system address is above DRAM limit address? */
- if (ctx.ret_addr > dram_limit_addr)
- goto out_err;
-
- *sys_addr = ctx.ret_addr;
- return 0;
-
-out_err:
- return -EINVAL;
-}
-
static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
/*
@@ -1915,7 +1641,7 @@ ddr3:
/* On F10h and later ErrAddr is MC4_ADDR[47:1] */
static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
{
- u16 mce_nid = topology_die_id(m->extcpu);
+ u16 mce_nid = topology_amd_node_id(m->extcpu);
struct mem_ctl_info *mci;
u8 start_bit = 1;
u8 end_bit = 47;
@@ -3073,9 +2799,10 @@ static void decode_umc_error(int node_id, struct mce *m)
{
u8 ecc_type = (m->status >> 45) & 0x3;
struct mem_ctl_info *mci;
+ unsigned long sys_addr;
struct amd64_pvt *pvt;
+ struct atl_err a_err;
struct err_info err;
- u64 sys_addr;
node_id = fixup_node_id(node_id, m);
@@ -3106,7 +2833,12 @@ static void decode_umc_error(int node_id, struct mce *m)
pvt->ops->get_err_info(m, &err);
- if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
+ a_err.addr = m->addr;
+ a_err.ipid = m->ipid;
+ a_err.cpu = m->extcpu;
+
+ sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
+ if (IS_ERR_VALUE(sys_addr)) {
err.err_code = ERR_NORM_ADDR;
goto log_error;
}
@@ -3446,7 +3178,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
int cpu;
for_each_online_cpu(cpu)
- if (topology_die_id(cpu) == nid)
+ if (topology_amd_node_id(cpu) == nid)
cpumask_set_cpu(cpu, mask);
}
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 2b83d6de9352..3fd22a1eb1a9 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -951,6 +951,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = {
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
+ X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
{}
};
MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index 2b0ecdeba5cd..cdd8480e7368 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -238,6 +238,7 @@ static struct work_struct ecclog_work;
#define DID_ADL_N_SKU9 0x4678
#define DID_ADL_N_SKU10 0x4679
#define DID_ADL_N_SKU11 0x467c
+#define DID_ADL_N_SKU12 0x4632
/* Compute die IDs for Raptor Lake-P with IBECC */
#define DID_RPL_P_SKU1 0xa706
@@ -583,6 +584,7 @@ static const struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ec8b6c9fedfd..8130c3dc64da 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -584,7 +584,7 @@ static void decode_mc3_mce(struct mce *m)
static void decode_mc4_mce(struct mce *m)
{
unsigned int fam = x86_family(m->cpuid);
- int node_id = topology_die_id(m->extcpu);
+ int node_id = topology_amd_node_id(m->extcpu);
u16 ec = EC(m->status);
u8 xec = XEC(m->status, 0x1f);
u8 offset = 0;
@@ -746,7 +746,7 @@ static void decode_smca_error(struct mce *m)
if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) &&
xec == 0 && decode_dram_ecc)
- decode_dram_ecc(topology_die_id(m->extcpu), m);
+ decode_dram_ecc(topology_amd_node_id(m->extcpu), m);
}
static inline void amd_decode_err_code(u16 ec)
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 709babce43ba..5527055b0964 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -1324,11 +1324,9 @@ static int mc_probe(struct platform_device *pdev)
struct synps_edac_priv *priv;
struct mem_ctl_info *mci;
void __iomem *baseaddr;
- struct resource *res;
int rc;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- baseaddr = devm_ioremap_resource(&pdev->dev, res);
+ baseaddr = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(baseaddr))
return PTR_ERR(baseaddr);
diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c
index 62caf454b567..1688a5050f63 100644
--- a/drivers/edac/versal_edac.c
+++ b/drivers/edac/versal_edac.c
@@ -42,8 +42,11 @@
#define ECCW0_FLIP_CTRL 0x109C
#define ECCW0_FLIP0_OFFSET 0x10A0
+#define ECCW0_FLIP0_BITS 31
+#define ECCW0_FLIP1_OFFSET 0x10A4
#define ECCW1_FLIP_CTRL 0x10AC
#define ECCW1_FLIP0_OFFSET 0x10B0
+#define ECCW1_FLIP1_OFFSET 0x10B4
#define ECCR0_CERR_STAT_OFFSET 0x10BC
#define ECCR0_CE_ADDR_LO_OFFSET 0x10C0
#define ECCR0_CE_ADDR_HI_OFFSET 0x10C4
@@ -116,9 +119,6 @@
#define XDDR_BUS_WIDTH_32 1
#define XDDR_BUS_WIDTH_16 2
-#define ECC_CEPOISON_MASK 0x1
-#define ECC_UEPOISON_MASK 0x3
-
#define XDDR_MAX_ROW_CNT 18
#define XDDR_MAX_COL_CNT 10
#define XDDR_MAX_RANK_CNT 2
@@ -133,6 +133,7 @@
* https://docs.xilinx.com/r/en-US/am012-versal-register-reference/PCSR_LOCK-XRAM_SLCR-Register
*/
#define PCSR_UNLOCK_VAL 0xF9E8D7C6
+#define PCSR_LOCK_VAL 1
#define XDDR_ERR_TYPE_CE 0
#define XDDR_ERR_TYPE_UE 1
@@ -142,6 +143,7 @@
#define XILINX_DRAM_SIZE_12G 3
#define XILINX_DRAM_SIZE_16G 4
#define XILINX_DRAM_SIZE_32G 5
+#define NUM_UE_BITPOS 2
/**
* struct ecc_error_info - ECC error log information.
@@ -479,7 +481,7 @@ static void err_callback(const u32 *payload, void *data)
writel(regval, priv->ddrmc_baseaddr + XDDR_ISR_OFFSET);
/* Lock the PCSR registers */
- writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
edac_dbg(3, "Total error count CE %d UE %d\n",
priv->ce_cnt, priv->ue_cnt);
}
@@ -650,7 +652,7 @@ static void enable_intr(struct edac_priv *priv)
writel(XDDR_IRQ_UE_MASK,
priv->ddrmc_baseaddr + XDDR_IRQ1_EN_OFFSET);
/* Lock the PCSR registers */
- writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
}
static void disable_intr(struct edac_priv *priv)
@@ -663,7 +665,7 @@ static void disable_intr(struct edac_priv *priv)
priv->ddrmc_baseaddr + XDDR_IRQ_DIS_OFFSET);
/* Lock the PCSR registers */
- writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
}
#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
@@ -734,38 +736,63 @@ static void poison_setup(struct edac_priv *priv)
writel(regval, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC15_OFFSET);
}
-static ssize_t xddr_inject_data_poison_store(struct mem_ctl_info *mci,
- const char __user *data)
+static void xddr_inject_data_ce_store(struct mem_ctl_info *mci, u8 ce_bitpos)
{
+ u32 ecc0_flip0, ecc1_flip0, ecc0_flip1, ecc1_flip1;
struct edac_priv *priv = mci->pvt_info;
- writel(0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET);
- writel(0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET);
-
- if (strncmp(data, "CE", 2) == 0) {
- writel(ECC_CEPOISON_MASK, priv->ddrmc_baseaddr +
- ECCW0_FLIP0_OFFSET);
- writel(ECC_CEPOISON_MASK, priv->ddrmc_baseaddr +
- ECCW1_FLIP0_OFFSET);
+ if (ce_bitpos < ECCW0_FLIP0_BITS) {
+ ecc0_flip0 = BIT(ce_bitpos);
+ ecc1_flip0 = BIT(ce_bitpos);
+ ecc0_flip1 = 0;
+ ecc1_flip1 = 0;
} else {
- writel(ECC_UEPOISON_MASK, priv->ddrmc_baseaddr +
- ECCW0_FLIP0_OFFSET);
- writel(ECC_UEPOISON_MASK, priv->ddrmc_baseaddr +
- ECCW1_FLIP0_OFFSET);
+ ce_bitpos = ce_bitpos - ECCW0_FLIP0_BITS;
+ ecc0_flip1 = BIT(ce_bitpos);
+ ecc1_flip1 = BIT(ce_bitpos);
+ ecc0_flip0 = 0;
+ ecc1_flip0 = 0;
}
- /* Lock the PCSR registers */
- writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
-
- return 0;
+ writel(ecc0_flip0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET);
+ writel(ecc1_flip0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET);
+ writel(ecc0_flip1, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET);
+ writel(ecc1_flip1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET);
}
-static ssize_t inject_data_poison_store(struct file *file, const char __user *data,
- size_t count, loff_t *ppos)
+/*
+ * To inject a correctable error, the following steps are needed:
+ *
+ * - Write the correctable error bit position value:
+ * echo <bit_pos val> > /sys/kernel/debug/edac/<controller instance>/inject_ce
+ *
+ * poison_setup() derives the row, column, bank, group and rank and
+ * writes to the ADEC registers based on the address given by the user.
+ *
+ * The ADEC12 and ADEC13 are mask registers; write 0 to make sure default
+ * configuration is there and no addresses are masked.
+ *
+ * The row, column, bank, group and rank registers are written to the
+ * match ADEC bit to generate errors at the particular address. ADEC14
+ * and ADEC15 have the match bits.
+ *
+ * xddr_inject_data_ce_store() updates the ECC FLIP registers with the
+ * bits to be corrupted based on the bit position given by the user.
+ *
+ * Upon doing a read to the address the errors are injected.
+ */
+static ssize_t inject_data_ce_store(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
{
struct device *dev = file->private_data;
struct mem_ctl_info *mci = to_mci(dev);
struct edac_priv *priv = mci->pvt_info;
+ u8 ce_bitpos;
+ int ret;
+
+ ret = kstrtou8_from_user(data, count, 0, &ce_bitpos);
+ if (ret)
+ return ret;
/* Unlock the PCSR registers */
writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
@@ -773,17 +800,110 @@ static ssize_t inject_data_poison_store(struct file *file, const char __user *da
poison_setup(priv);
+ xddr_inject_data_ce_store(mci, ce_bitpos);
+ ret = count;
+
/* Lock the PCSR registers */
- writel(1, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
+
+ return ret;
+}
+
+static const struct file_operations xddr_inject_ce_fops = {
+ .open = simple_open,
+ .write = inject_data_ce_store,
+ .llseek = generic_file_llseek,
+};
+
+static void xddr_inject_data_ue_store(struct mem_ctl_info *mci, u32 val0, u32 val1)
+{
+ struct edac_priv *priv = mci->pvt_info;
+
+ writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET);
+ writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET);
+ writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET);
+ writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET);
+}
+
+/*
+ * To inject an uncorrectable error, the following steps are needed:
+ * echo <bit_pos val> > /sys/kernel/debug/edac/<controller instance>/inject_ue
+ *
+ * poison_setup() derives the row, column, bank, group and rank and
+ * writes to the ADEC registers based on the address given by the user.
+ *
+ * The ADEC12 and ADEC13 are mask registers; write 0 so that none of the
+ * addresses are masked. The row, column, bank, group and rank registers
+ * are written to the match ADEC bit to generate errors at the
+ * particular address. ADEC14 and ADEC15 have the match bits.
+ *
+ * xddr_inject_data_ue_store() updates the ECC FLIP registers with the
+ * bits to be corrupted based on the bit position given by the user. For
+ * uncorrectable errors
+ * 2 bit errors are injected.
+ *
+ * Upon doing a read to the address the errors are injected.
+ */
+static ssize_t inject_data_ue_store(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct device *dev = file->private_data;
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct edac_priv *priv = mci->pvt_info;
+ char buf[6], *pbuf, *token[2];
+ u32 val0 = 0, val1 = 0;
+ u8 len, ue0, ue1;
+ int i, ret;
+
+ len = min_t(size_t, count, sizeof(buf));
+ if (copy_from_user(buf, data, len))
+ return -EFAULT;
+
+ buf[len] = '\0';
+ pbuf = &buf[0];
+ for (i = 0; i < NUM_UE_BITPOS; i++)
+ token[i] = strsep(&pbuf, ",");
+
+ ret = kstrtou8(token[0], 0, &ue0);
+ if (ret)
+ return ret;
+
+ ret = kstrtou8(token[1], 0, &ue1);
+ if (ret)
+ return ret;
+
+ if (ue0 < ECCW0_FLIP0_BITS) {
+ val0 = BIT(ue0);
+ } else {
+ ue0 = ue0 - ECCW0_FLIP0_BITS;
+ val1 = BIT(ue0);
+ }
+
+ if (ue1 < ECCW0_FLIP0_BITS) {
+ val0 |= BIT(ue1);
+ } else {
+ ue1 = ue1 - ECCW0_FLIP0_BITS;
+ val1 |= BIT(ue1);
+ }
- xddr_inject_data_poison_store(mci, data);
+ /* Unlock the PCSR registers */
+ writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
+ poison_setup(priv);
+
+ xddr_inject_data_ue_store(mci, val0, val1);
+
+ /* Lock the PCSR registers */
+ writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET);
+ writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET);
return count;
}
-static const struct file_operations xddr_inject_enable_fops = {
+static const struct file_operations xddr_inject_ue_fops = {
.open = simple_open,
- .write = inject_data_poison_store,
+ .write = inject_data_ue_store,
.llseek = generic_file_llseek,
};
@@ -795,8 +915,17 @@ static void create_debugfs_attributes(struct mem_ctl_info *mci)
if (!priv->debugfs)
return;
- edac_debugfs_create_file("inject_error", 0200, priv->debugfs,
- &mci->dev, &xddr_inject_enable_fops);
+ if (!edac_debugfs_create_file("inject_ce", 0200, priv->debugfs,
+ &mci->dev, &xddr_inject_ce_fops)) {
+ debugfs_remove_recursive(priv->debugfs);
+ return;
+ }
+
+ if (!edac_debugfs_create_file("inject_ue", 0200, priv->debugfs,
+ &mci->dev, &xddr_inject_ue_fops)) {
+ debugfs_remove_recursive(priv->debugfs);
+ return;
+ }
debugfs_create_x64("address", 0600, priv->debugfs,
&priv->err_inject_addr);
mci->debugfs = priv->debugfs;
@@ -1031,7 +1160,7 @@ free_edac_mc:
return rc;
}
-static int mc_remove(struct platform_device *pdev)
+static void mc_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
struct edac_priv *priv = mci->pvt_info;
@@ -1049,8 +1178,6 @@ static int mc_remove(struct platform_device *pdev)
XPM_EVENT_ERROR_MASK_DDRMC_NCR, err_callback, mci);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
-
- return 0;
}
static struct platform_driver xilinx_ddr_edac_mc_driver = {
@@ -1059,7 +1186,7 @@ static struct platform_driver xilinx_ddr_edac_mc_driver = {
.of_match_table = xlnx_edac_match,
},
.probe = mc_probe,
- .remove = mc_remove,
+ .remove_new = mc_remove,
};
module_platform_driver(xilinx_ddr_edac_mc_driver);
diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 6ac5ff20a2fe..401a77e3b5fa 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -429,7 +429,23 @@ static void bm_work(struct work_struct *work)
*/
card->bm_generation = generation;
- if (root_device == NULL) {
+ if (card->gap_count == 0) {
+ /*
+ * If self IDs have inconsistent gap counts, do a
+ * bus reset ASAP. The config rom read might never
+ * complete, so don't wait for it. However, still
+ * send a PHY configuration packet prior to the
+ * bus reset. The PHY configuration packet might
+ * fail, but 1394-2008 8.4.5.2 explicitly permits
+ * it in this case, so it should be safe to try.
+ */
+ new_root_id = local_id;
+ /*
+ * We must always send a bus reset if the gap count
+ * is inconsistent, so bypass the 5-reset limit.
+ */
+ card->bm_retries = 0;
+ } else if (root_device == NULL) {
/*
* Either link_on is false, or we failed to read the
* config rom. In either case, pick another root.
@@ -484,7 +500,19 @@ static void bm_work(struct work_struct *work)
fw_notice(card, "phy config: new root=%x, gap_count=%d\n",
new_root_id, gap_count);
fw_send_phy_config(card, new_root_id, generation, gap_count);
- reset_bus(card, true);
+ /*
+ * Where possible, use a short bus reset to minimize
+ * disruption to isochronous transfers. But in the event
+ * of a gap count inconsistency, use a long bus reset.
+ *
+ * As noted in 1394a 8.4.6.2, nodes on a mixed 1394/1394a bus
+ * may set different gap counts after a bus reset. On a mixed
+ * 1394/1394a bus, a short bus reset can get doubled. Some
+ * nodes may treat the double reset as one bus reset and others
+ * may treat it as two, causing a gap count inconsistency
+ * again. Using a long bus reset prevents this.
+ */
+ reset_bus(card, card->gap_count != 0);
/* Will allocate broadcast channel after the reset. */
goto out;
}
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 0547253d16fe..7d3346b3a2bf 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -118,10 +118,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size)
* @buf: where to put the string
* @size: size of @buf, in bytes
*
- * The string is taken from a minimal ASCII text descriptor leaf after
- * the immediate entry with @key. The string is zero-terminated.
- * An overlong string is silently truncated such that it and the
- * zero byte fit into @size.
+ * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the
+ * @key. The string is zero-terminated. An overlong string is silently truncated such that it
+ * and the zero byte fit into @size.
*
* Returns strlen(buf) or a negative error code.
*/
@@ -368,8 +367,17 @@ static ssize_t show_text_leaf(struct device *dev,
for (i = 0; i < ARRAY_SIZE(directories) && !!directories[i]; ++i) {
int result = fw_csr_string(directories[i], attr->key, buf, bufsize);
// Detected.
- if (result >= 0)
+ if (result >= 0) {
ret = result;
+ } else if (i == 0 && attr->key == CSR_VENDOR) {
+ // Sony DVMC-DA1 has configuration ROM such that the descriptor leaf entry
+ // in the root directory follows to the directory entry for vendor ID
+ // instead of the immediate value for vendor ID.
+ result = fw_csr_string(directories[i], CSR_DIRECTORY | attr->key, buf,
+ bufsize);
+ if (result >= 0)
+ ret = result;
+ }
}
if (ret >= 0) {
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 9db9290c3269..7bc71f4be64a 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -3773,6 +3773,7 @@ static int pci_probe(struct pci_dev *dev,
return 0;
fail_msi:
+ devm_free_irq(&dev->dev, dev->irq, ohci);
pci_disable_msi(dev);
return err;
@@ -3800,6 +3801,7 @@ static void pci_remove(struct pci_dev *dev)
software_reset(ohci);
+ devm_free_irq(&dev->dev, dev->irq, ohci);
pci_disable_msi(dev);
dev_notice(&dev->dev, "removing fw-ohci device\n");
diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c
index 1c7940ba5539..2f557e90f2eb 100644
--- a/drivers/firmware/arm_ffa/bus.c
+++ b/drivers/firmware/arm_ffa/bus.c
@@ -105,7 +105,7 @@ static struct attribute *ffa_device_attributes_attrs[] = {
};
ATTRIBUTE_GROUPS(ffa_device_attributes);
-struct bus_type ffa_bus_type = {
+const struct bus_type ffa_bus_type = {
.name = "arm_ffa",
.match = ffa_device_match,
.probe = ffa_device_probe,
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index 6146b2927d5c..f2556a8e9401 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -107,12 +107,12 @@ struct ffa_drv_info {
struct work_struct notif_pcpu_work;
struct work_struct irq_work;
struct xarray partition_info;
- unsigned int partition_count;
DECLARE_HASHTABLE(notifier_hash, ilog2(FFA_MAX_NOTIFICATIONS));
struct mutex notify_lock; /* lock to protect notifier hashtable */
};
static struct ffa_drv_info *drv_info;
+static void ffa_partitions_cleanup(void);
/*
* The driver must be able to support all the versions from the earliest
@@ -733,6 +733,11 @@ static void __do_sched_recv_cb(u16 part_id, u16 vcpu, bool is_per_vcpu)
void *cb_data;
partition = xa_load(&drv_info->partition_info, part_id);
+ if (!partition) {
+ pr_err("%s: Invalid partition ID 0x%x\n", __func__, part_id);
+ return;
+ }
+
read_lock(&partition->rw_lock);
callback = partition->callback;
cb_data = partition->cb_data;
@@ -915,6 +920,11 @@ static int ffa_sched_recv_cb_update(u16 part_id, ffa_sched_recv_cb callback,
return -EOPNOTSUPP;
partition = xa_load(&drv_info->partition_info, part_id);
+ if (!partition) {
+ pr_err("%s: Invalid partition ID 0x%x\n", __func__, part_id);
+ return -EINVAL;
+ }
+
write_lock(&partition->rw_lock);
cb_valid = !!partition->callback;
@@ -1186,9 +1196,9 @@ void ffa_device_match_uuid(struct ffa_device *ffa_dev, const uuid_t *uuid)
kfree(pbuf);
}
-static void ffa_setup_partitions(void)
+static int ffa_setup_partitions(void)
{
- int count, idx;
+ int count, idx, ret;
uuid_t uuid;
struct ffa_device *ffa_dev;
struct ffa_dev_part_info *info;
@@ -1197,7 +1207,7 @@ static void ffa_setup_partitions(void)
count = ffa_partition_probe(&uuid_null, &pbuf);
if (count <= 0) {
pr_info("%s: No partitions found, error %d\n", __func__, count);
- return;
+ return -EINVAL;
}
xa_init(&drv_info->partition_info);
@@ -1226,40 +1236,53 @@ static void ffa_setup_partitions(void)
ffa_device_unregister(ffa_dev);
continue;
}
- xa_store(&drv_info->partition_info, tpbuf->id, info, GFP_KERNEL);
+ rwlock_init(&info->rw_lock);
+ ret = xa_insert(&drv_info->partition_info, tpbuf->id,
+ info, GFP_KERNEL);
+ if (ret) {
+ pr_err("%s: failed to save partition ID 0x%x - ret:%d\n",
+ __func__, tpbuf->id, ret);
+ ffa_device_unregister(ffa_dev);
+ kfree(info);
+ }
}
- drv_info->partition_count = count;
kfree(pbuf);
/* Allocate for the host */
info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return;
- xa_store(&drv_info->partition_info, drv_info->vm_id, info, GFP_KERNEL);
- drv_info->partition_count++;
+ if (!info) {
+ pr_err("%s: failed to alloc Host partition ID 0x%x. Abort.\n",
+ __func__, drv_info->vm_id);
+ /* Already registered devices are freed on bus_exit */
+ ffa_partitions_cleanup();
+ return -ENOMEM;
+ }
+
+ rwlock_init(&info->rw_lock);
+ ret = xa_insert(&drv_info->partition_info, drv_info->vm_id,
+ info, GFP_KERNEL);
+ if (ret) {
+ pr_err("%s: failed to save Host partition ID 0x%x - ret:%d. Abort.\n",
+ __func__, drv_info->vm_id, ret);
+ kfree(info);
+ /* Already registered devices are freed on bus_exit */
+ ffa_partitions_cleanup();
+ }
+
+ return ret;
}
static void ffa_partitions_cleanup(void)
{
- struct ffa_dev_part_info **info;
- int idx, count = drv_info->partition_count;
-
- if (!count)
- return;
-
- info = kcalloc(count, sizeof(*info), GFP_KERNEL);
- if (!info)
- return;
-
- xa_extract(&drv_info->partition_info, (void **)info, 0, VM_ID_MASK,
- count, XA_PRESENT);
+ struct ffa_dev_part_info *info;
+ unsigned long idx;
- for (idx = 0; idx < count; idx++)
- kfree(info[idx]);
- kfree(info);
+ xa_for_each(&drv_info->partition_info, idx, info) {
+ xa_erase(&drv_info->partition_info, idx);
+ kfree(info);
+ }
- drv_info->partition_count = 0;
xa_destroy(&drv_info->partition_info);
}
@@ -1508,7 +1531,11 @@ static int __init ffa_init(void)
ffa_notifications_setup();
- ffa_setup_partitions();
+ ret = ffa_setup_partitions();
+ if (ret) {
+ pr_err("failed to setup partitions\n");
+ goto cleanup_notifs;
+ }
ret = ffa_sched_recv_cb_update(drv_info->vm_id, ffa_self_notif_handle,
drv_info, true);
@@ -1516,6 +1543,9 @@ static int __init ffa_init(void)
pr_info("Failed to register driver sched callback %d\n", ret);
return 0;
+
+cleanup_notifs:
+ ffa_notifications_cleanup();
free_pages:
if (drv_info->tx_buffer)
free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE);
@@ -1535,7 +1565,6 @@ static void __exit ffa_exit(void)
ffa_rxtx_unmap(drv_info->vm_id);
free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE);
free_pages_exact(drv_info->rx_buffer, RXTX_BUFFER_SIZE);
- xa_destroy(&drv_info->partition_info);
kfree(drv_info);
arm_ffa_bus_exit();
}
diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c
index c15928b8c5cc..77c78be6e79c 100644
--- a/drivers/firmware/arm_scmi/bus.c
+++ b/drivers/firmware/arm_scmi/bus.c
@@ -141,6 +141,17 @@ out:
return ret;
}
+static int scmi_protocol_table_register(const struct scmi_device_id *id_table)
+{
+ int ret = 0;
+ const struct scmi_device_id *entry;
+
+ for (entry = id_table; entry->name && ret == 0; entry++)
+ ret = scmi_protocol_device_request(entry);
+
+ return ret;
+}
+
/**
* scmi_protocol_device_unrequest - Helper to unrequest a device
*
@@ -186,6 +197,15 @@ static void scmi_protocol_device_unrequest(const struct scmi_device_id *id_table
mutex_unlock(&scmi_requested_devices_mtx);
}
+static void
+scmi_protocol_table_unregister(const struct scmi_device_id *id_table)
+{
+ const struct scmi_device_id *entry;
+
+ for (entry = id_table; entry->name; entry++)
+ scmi_protocol_device_unrequest(entry);
+}
+
static const struct scmi_device_id *
scmi_dev_match_id(struct scmi_device *scmi_dev, struct scmi_driver *scmi_drv)
{
@@ -263,7 +283,7 @@ static void scmi_dev_remove(struct device *dev)
scmi_drv->remove(scmi_dev);
}
-struct bus_type scmi_bus_type = {
+const struct bus_type scmi_bus_type = {
.name = "scmi_protocol",
.match = scmi_dev_match,
.probe = scmi_dev_probe,
@@ -279,7 +299,7 @@ int scmi_driver_register(struct scmi_driver *driver, struct module *owner,
if (!driver->probe)
return -EINVAL;
- retval = scmi_protocol_device_request(driver->id_table);
+ retval = scmi_protocol_table_register(driver->id_table);
if (retval)
return retval;
@@ -299,7 +319,7 @@ EXPORT_SYMBOL_GPL(scmi_driver_register);
void scmi_driver_unregister(struct scmi_driver *driver)
{
driver_unregister(&driver->driver);
- scmi_protocol_device_unrequest(driver->id_table);
+ scmi_protocol_table_unregister(driver->id_table);
}
EXPORT_SYMBOL_GPL(scmi_driver_unregister);
diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c
index c0644558042a..134019297d08 100644
--- a/drivers/firmware/arm_scmi/clock.c
+++ b/drivers/firmware/arm_scmi/clock.c
@@ -13,7 +13,7 @@
#include "notify.h"
/* Updated only after ALL the mandatory features for that version are merged */
-#define SCMI_PROTOCOL_SUPPORTED_VERSION 0x20001
+#define SCMI_PROTOCOL_SUPPORTED_VERSION 0x30000
enum scmi_clock_protocol_cmd {
CLOCK_ATTRIBUTES = 0x3,
@@ -28,8 +28,13 @@ enum scmi_clock_protocol_cmd {
CLOCK_POSSIBLE_PARENTS_GET = 0xC,
CLOCK_PARENT_SET = 0xD,
CLOCK_PARENT_GET = 0xE,
+ CLOCK_GET_PERMISSIONS = 0xF,
};
+#define CLOCK_STATE_CONTROL_ALLOWED BIT(31)
+#define CLOCK_PARENT_CONTROL_ALLOWED BIT(30)
+#define CLOCK_RATE_CONTROL_ALLOWED BIT(29)
+
enum clk_state {
CLK_STATE_DISABLE,
CLK_STATE_ENABLE,
@@ -49,6 +54,8 @@ struct scmi_msg_resp_clock_attributes {
#define SUPPORTS_RATE_CHANGE_REQUESTED_NOTIF(x) ((x) & BIT(30))
#define SUPPORTS_EXTENDED_NAMES(x) ((x) & BIT(29))
#define SUPPORTS_PARENT_CLOCK(x) ((x) & BIT(28))
+#define SUPPORTS_EXTENDED_CONFIG(x) ((x) & BIT(27))
+#define SUPPORTS_GET_PERMISSIONS(x) ((x) & BIT(1))
u8 name[SCMI_SHORT_NAME_MAX_SIZE];
__le32 clock_enable_latency;
};
@@ -152,14 +159,18 @@ struct clock_info {
u32 version;
int num_clocks;
int max_async_req;
+ bool notify_rate_changed_cmd;
+ bool notify_rate_change_requested_cmd;
atomic_t cur_async_req;
struct scmi_clock_info *clk;
int (*clock_config_set)(const struct scmi_protocol_handle *ph,
u32 clk_id, enum clk_state state,
- u8 oem_type, u32 oem_val, bool atomic);
+ enum scmi_clock_oem_config oem_type,
+ u32 oem_val, bool atomic);
int (*clock_config_get)(const struct scmi_protocol_handle *ph,
- u32 clk_id, u8 oem_type, u32 *attributes,
- bool *enabled, u32 *oem_val, bool atomic);
+ u32 clk_id, enum scmi_clock_oem_config oem_type,
+ u32 *attributes, bool *enabled, u32 *oem_val,
+ bool atomic);
};
static enum scmi_clock_protocol_cmd evt_2_cmd[] = {
@@ -167,6 +178,15 @@ static enum scmi_clock_protocol_cmd evt_2_cmd[] = {
CLOCK_RATE_CHANGE_REQUESTED_NOTIFY,
};
+static inline struct scmi_clock_info *
+scmi_clock_domain_lookup(struct clock_info *ci, u32 clk_id)
+{
+ if (clk_id >= ci->num_clocks)
+ return ERR_PTR(-EINVAL);
+
+ return ci->clk + clk_id;
+}
+
static int
scmi_clock_protocol_attributes_get(const struct scmi_protocol_handle *ph,
struct clock_info *ci)
@@ -189,6 +209,17 @@ scmi_clock_protocol_attributes_get(const struct scmi_protocol_handle *ph,
}
ph->xops->xfer_put(ph, t);
+
+ if (!ret) {
+ if (!ph->hops->protocol_msg_check(ph, CLOCK_RATE_NOTIFY, NULL))
+ ci->notify_rate_changed_cmd = true;
+
+ if (!ph->hops->protocol_msg_check(ph,
+ CLOCK_RATE_CHANGE_REQUESTED_NOTIFY,
+ NULL))
+ ci->notify_rate_change_requested_cmd = true;
+ }
+
return ret;
}
@@ -284,14 +315,44 @@ static int scmi_clock_possible_parents(const struct scmi_protocol_handle *ph, u3
return ret;
}
+static int
+scmi_clock_get_permissions(const struct scmi_protocol_handle *ph, u32 clk_id,
+ struct scmi_clock_info *clk)
+{
+ struct scmi_xfer *t;
+ u32 perm;
+ int ret;
+
+ ret = ph->xops->xfer_get_init(ph, CLOCK_GET_PERMISSIONS,
+ sizeof(clk_id), sizeof(perm), &t);
+ if (ret)
+ return ret;
+
+ put_unaligned_le32(clk_id, t->tx.buf);
+
+ ret = ph->xops->do_xfer(ph, t);
+ if (!ret) {
+ perm = get_unaligned_le32(t->rx.buf);
+
+ clk->state_ctrl_forbidden = !(perm & CLOCK_STATE_CONTROL_ALLOWED);
+ clk->rate_ctrl_forbidden = !(perm & CLOCK_RATE_CONTROL_ALLOWED);
+ clk->parent_ctrl_forbidden = !(perm & CLOCK_PARENT_CONTROL_ALLOWED);
+ }
+
+ ph->xops->xfer_put(ph, t);
+
+ return ret;
+}
+
static int scmi_clock_attributes_get(const struct scmi_protocol_handle *ph,
- u32 clk_id, struct scmi_clock_info *clk,
+ u32 clk_id, struct clock_info *cinfo,
u32 version)
{
int ret;
u32 attributes;
struct scmi_xfer *t;
struct scmi_msg_resp_clock_attributes *attr;
+ struct scmi_clock_info *clk = cinfo->clk + clk_id;
ret = ph->xops->xfer_get_init(ph, CLOCK_ATTRIBUTES,
sizeof(clk_id), sizeof(*attr), &t);
@@ -324,12 +385,20 @@ static int scmi_clock_attributes_get(const struct scmi_protocol_handle *ph,
NULL, clk->name,
SCMI_MAX_STR_SIZE);
- if (SUPPORTS_RATE_CHANGED_NOTIF(attributes))
+ if (cinfo->notify_rate_changed_cmd &&
+ SUPPORTS_RATE_CHANGED_NOTIF(attributes))
clk->rate_changed_notifications = true;
- if (SUPPORTS_RATE_CHANGE_REQUESTED_NOTIF(attributes))
+ if (cinfo->notify_rate_change_requested_cmd &&
+ SUPPORTS_RATE_CHANGE_REQUESTED_NOTIF(attributes))
clk->rate_change_requested_notifications = true;
- if (SUPPORTS_PARENT_CLOCK(attributes))
- scmi_clock_possible_parents(ph, clk_id, clk);
+ if (PROTOCOL_REV_MAJOR(version) >= 0x3) {
+ if (SUPPORTS_PARENT_CLOCK(attributes))
+ scmi_clock_possible_parents(ph, clk_id, clk);
+ if (SUPPORTS_GET_PERMISSIONS(attributes))
+ scmi_clock_get_permissions(ph, clk_id, clk);
+ if (SUPPORTS_EXTENDED_CONFIG(attributes))
+ clk->extended_config = true;
+ }
}
return ret;
@@ -502,6 +571,14 @@ static int scmi_clock_rate_set(const struct scmi_protocol_handle *ph,
struct scmi_xfer *t;
struct scmi_clock_set_rate *cfg;
struct clock_info *ci = ph->get_priv(ph);
+ struct scmi_clock_info *clk;
+
+ clk = scmi_clock_domain_lookup(ci, clk_id);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ if (clk->rate_ctrl_forbidden)
+ return -EACCES;
ret = ph->xops->xfer_get_init(ph, CLOCK_RATE_SET, sizeof(*cfg), 0, &t);
if (ret)
@@ -543,7 +620,8 @@ static int scmi_clock_rate_set(const struct scmi_protocol_handle *ph,
static int
scmi_clock_config_set(const struct scmi_protocol_handle *ph, u32 clk_id,
- enum clk_state state, u8 __unused0, u32 __unused1,
+ enum clk_state state,
+ enum scmi_clock_oem_config __unused0, u32 __unused1,
bool atomic)
{
int ret;
@@ -580,14 +658,16 @@ scmi_clock_set_parent(const struct scmi_protocol_handle *ph, u32 clk_id,
struct clock_info *ci = ph->get_priv(ph);
struct scmi_clock_info *clk;
- if (clk_id >= ci->num_clocks)
- return -EINVAL;
-
- clk = ci->clk + clk_id;
+ clk = scmi_clock_domain_lookup(ci, clk_id);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
if (parent_id >= clk->num_parents)
return -EINVAL;
+ if (clk->parent_ctrl_forbidden)
+ return -EACCES;
+
ret = ph->xops->xfer_get_init(ph, CLOCK_PARENT_SET,
sizeof(*cfg), 0, &t);
if (ret)
@@ -628,10 +708,11 @@ scmi_clock_get_parent(const struct scmi_protocol_handle *ph, u32 clk_id,
return ret;
}
-/* For SCMI clock v2.1 and onwards */
+/* For SCMI clock v3.0 and onwards */
static int
scmi_clock_config_set_v2(const struct scmi_protocol_handle *ph, u32 clk_id,
- enum clk_state state, u8 oem_type, u32 oem_val,
+ enum clk_state state,
+ enum scmi_clock_oem_config oem_type, u32 oem_val,
bool atomic)
{
int ret;
@@ -671,6 +752,14 @@ static int scmi_clock_enable(const struct scmi_protocol_handle *ph, u32 clk_id,
bool atomic)
{
struct clock_info *ci = ph->get_priv(ph);
+ struct scmi_clock_info *clk;
+
+ clk = scmi_clock_domain_lookup(ci, clk_id);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ if (clk->state_ctrl_forbidden)
+ return -EACCES;
return ci->clock_config_set(ph, clk_id, CLK_STATE_ENABLE,
NULL_OEM_TYPE, 0, atomic);
@@ -680,16 +769,24 @@ static int scmi_clock_disable(const struct scmi_protocol_handle *ph, u32 clk_id,
bool atomic)
{
struct clock_info *ci = ph->get_priv(ph);
+ struct scmi_clock_info *clk;
+
+ clk = scmi_clock_domain_lookup(ci, clk_id);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ if (clk->state_ctrl_forbidden)
+ return -EACCES;
return ci->clock_config_set(ph, clk_id, CLK_STATE_DISABLE,
NULL_OEM_TYPE, 0, atomic);
}
-/* For SCMI clock v2.1 and onwards */
+/* For SCMI clock v3.0 and onwards */
static int
scmi_clock_config_get_v2(const struct scmi_protocol_handle *ph, u32 clk_id,
- u8 oem_type, u32 *attributes, bool *enabled,
- u32 *oem_val, bool atomic)
+ enum scmi_clock_oem_config oem_type, u32 *attributes,
+ bool *enabled, u32 *oem_val, bool atomic)
{
int ret;
u32 flags;
@@ -730,8 +827,8 @@ scmi_clock_config_get_v2(const struct scmi_protocol_handle *ph, u32 clk_id,
static int
scmi_clock_config_get(const struct scmi_protocol_handle *ph, u32 clk_id,
- u8 oem_type, u32 *attributes, bool *enabled,
- u32 *oem_val, bool atomic)
+ enum scmi_clock_oem_config oem_type, u32 *attributes,
+ bool *enabled, u32 *oem_val, bool atomic)
{
int ret;
struct scmi_xfer *t;
@@ -768,20 +865,38 @@ static int scmi_clock_state_get(const struct scmi_protocol_handle *ph,
}
static int scmi_clock_config_oem_set(const struct scmi_protocol_handle *ph,
- u32 clk_id, u8 oem_type, u32 oem_val,
- bool atomic)
+ u32 clk_id,
+ enum scmi_clock_oem_config oem_type,
+ u32 oem_val, bool atomic)
{
struct clock_info *ci = ph->get_priv(ph);
+ struct scmi_clock_info *clk;
+
+ clk = scmi_clock_domain_lookup(ci, clk_id);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ if (!clk->extended_config)
+ return -EOPNOTSUPP;
return ci->clock_config_set(ph, clk_id, CLK_STATE_UNCHANGED,
oem_type, oem_val, atomic);
}
static int scmi_clock_config_oem_get(const struct scmi_protocol_handle *ph,
- u32 clk_id, u8 oem_type, u32 *oem_val,
- u32 *attributes, bool atomic)
+ u32 clk_id,
+ enum scmi_clock_oem_config oem_type,
+ u32 *oem_val, u32 *attributes, bool atomic)
{
struct clock_info *ci = ph->get_priv(ph);
+ struct scmi_clock_info *clk;
+
+ clk = scmi_clock_domain_lookup(ci, clk_id);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ if (!clk->extended_config)
+ return -EOPNOTSUPP;
return ci->clock_config_get(ph, clk_id, oem_type, attributes,
NULL, oem_val, atomic);
@@ -800,10 +915,10 @@ scmi_clock_info_get(const struct scmi_protocol_handle *ph, u32 clk_id)
struct scmi_clock_info *clk;
struct clock_info *ci = ph->get_priv(ph);
- if (clk_id >= ci->num_clocks)
+ clk = scmi_clock_domain_lookup(ci, clk_id);
+ if (IS_ERR(clk))
return NULL;
- clk = ci->clk + clk_id;
if (!clk->name[0])
return NULL;
@@ -824,6 +939,28 @@ static const struct scmi_clk_proto_ops clk_proto_ops = {
.parent_get = scmi_clock_get_parent,
};
+static bool scmi_clk_notify_supported(const struct scmi_protocol_handle *ph,
+ u8 evt_id, u32 src_id)
+{
+ bool supported;
+ struct scmi_clock_info *clk;
+ struct clock_info *ci = ph->get_priv(ph);
+
+ if (evt_id >= ARRAY_SIZE(evt_2_cmd))
+ return false;
+
+ clk = scmi_clock_domain_lookup(ci, src_id);
+ if (IS_ERR(clk))
+ return false;
+
+ if (evt_id == SCMI_EVENT_CLOCK_RATE_CHANGED)
+ supported = clk->rate_changed_notifications;
+ else
+ supported = clk->rate_change_requested_notifications;
+
+ return supported;
+}
+
static int scmi_clk_rate_notify(const struct scmi_protocol_handle *ph,
u32 clk_id, int message_id, bool enable)
{
@@ -908,6 +1045,7 @@ static const struct scmi_event clk_events[] = {
};
static const struct scmi_event_ops clk_event_ops = {
+ .is_notify_supported = scmi_clk_notify_supported,
.get_num_sources = scmi_clk_get_num_sources,
.set_notify_enabled = scmi_clk_set_notify_enabled,
.fill_custom_report = scmi_clk_fill_custom_report,
@@ -949,13 +1087,12 @@ static int scmi_clock_protocol_init(const struct scmi_protocol_handle *ph)
for (clkid = 0; clkid < cinfo->num_clocks; clkid++) {
struct scmi_clock_info *clk = cinfo->clk + clkid;
- ret = scmi_clock_attributes_get(ph, clkid, clk, version);
+ ret = scmi_clock_attributes_get(ph, clkid, cinfo, version);
if (!ret)
scmi_clock_describe_rates_get(ph, clkid, clk);
}
- if (PROTOCOL_REV_MAJOR(version) >= 0x2 &&
- PROTOCOL_REV_MINOR(version) >= 0x1) {
+ if (PROTOCOL_REV_MAJOR(version) >= 0x3) {
cinfo->clock_config_set = scmi_clock_config_set_v2;
cinfo->clock_config_get = scmi_clock_config_get_v2;
} else {
diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h
index c46dc5215af7..6affbfdd1dec 100644
--- a/drivers/firmware/arm_scmi/common.h
+++ b/drivers/firmware/arm_scmi/common.h
@@ -141,7 +141,7 @@ scmi_revision_area_get(const struct scmi_protocol_handle *ph);
void scmi_setup_protocol_implemented(const struct scmi_protocol_handle *ph,
u8 *prot_imp);
-extern struct bus_type scmi_bus_type;
+extern const struct bus_type scmi_bus_type;
#define SCMI_BUS_NOTIFY_DEVICE_REQUEST 0
#define SCMI_BUS_NOTIFY_DEVICE_UNREQUEST 1
@@ -314,6 +314,7 @@ void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem,
void shmem_clear_channel(struct scmi_shared_mem __iomem *shmem);
bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem,
struct scmi_xfer *xfer);
+bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem);
/* declarations for message passing transports */
struct scmi_msg_payld;
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index 3ea64b22cf0d..34d77802c990 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -86,6 +86,12 @@ struct scmi_xfers_info {
* @users: A refcount to track effective users of this protocol.
* @priv: Reference for optional protocol private data.
* @version: Protocol version supported by the platform as detected at runtime.
+ * @negotiated_version: When the platform supports a newer protocol version,
+ * the agent will try to negotiate with the platform the
+ * usage of the newest version known to it, since
+ * backward compatibility is NOT automatically assured.
+ * This field is NON-zero when a successful negotiation
+ * has completed.
* @ph: An embedded protocol handle that will be passed down to protocol
* initialization code to identify this instance.
*
@@ -99,6 +105,7 @@ struct scmi_protocol_instance {
refcount_t users;
void *priv;
unsigned int version;
+ unsigned int negotiated_version;
struct scmi_protocol_handle ph;
};
@@ -1754,10 +1761,44 @@ static void scmi_common_fastchannel_db_ring(struct scmi_fc_db_info *db)
#endif
}
+/**
+ * scmi_protocol_msg_check - Check protocol message attributes
+ *
+ * @ph: A reference to the protocol handle.
+ * @message_id: The ID of the message to check.
+ * @attributes: A parameter to optionally return the retrieved message
+ * attributes, in case of Success.
+ *
+ * An helper to check protocol message attributes for a specific protocol
+ * and message pair.
+ *
+ * Return: 0 on SUCCESS
+ */
+static int scmi_protocol_msg_check(const struct scmi_protocol_handle *ph,
+ u32 message_id, u32 *attributes)
+{
+ int ret;
+ struct scmi_xfer *t;
+
+ ret = xfer_get_init(ph, PROTOCOL_MESSAGE_ATTRIBUTES,
+ sizeof(__le32), 0, &t);
+ if (ret)
+ return ret;
+
+ put_unaligned_le32(message_id, t->tx.buf);
+ ret = do_xfer(ph, t);
+ if (!ret && attributes)
+ *attributes = get_unaligned_le32(t->rx.buf);
+ xfer_put(ph, t);
+
+ return ret;
+}
+
static const struct scmi_proto_helpers_ops helpers_ops = {
.extended_name_get = scmi_common_extended_name_get,
.iter_response_init = scmi_iterator_init,
.iter_response_run = scmi_iterator_run,
+ .protocol_msg_check = scmi_protocol_msg_check,
.fastchannel_init = scmi_common_fastchannel_init,
.fastchannel_db_ring = scmi_common_fastchannel_db_ring,
};
@@ -1782,6 +1823,44 @@ scmi_revision_area_get(const struct scmi_protocol_handle *ph)
}
/**
+ * scmi_protocol_version_negotiate - Negotiate protocol version
+ *
+ * @ph: A reference to the protocol handle.
+ *
+ * An helper to negotiate a protocol version different from the latest
+ * advertised as supported from the platform: on Success backward
+ * compatibility is assured by the platform.
+ *
+ * Return: 0 on Success
+ */
+static int scmi_protocol_version_negotiate(struct scmi_protocol_handle *ph)
+{
+ int ret;
+ struct scmi_xfer *t;
+ struct scmi_protocol_instance *pi = ph_to_pi(ph);
+
+ /* At first check if NEGOTIATE_PROTOCOL_VERSION is supported ... */
+ ret = scmi_protocol_msg_check(ph, NEGOTIATE_PROTOCOL_VERSION, NULL);
+ if (ret)
+ return ret;
+
+ /* ... then attempt protocol version negotiation */
+ ret = xfer_get_init(ph, NEGOTIATE_PROTOCOL_VERSION,
+ sizeof(__le32), 0, &t);
+ if (ret)
+ return ret;
+
+ put_unaligned_le32(pi->proto->supported_version, t->tx.buf);
+ ret = do_xfer(ph, t);
+ if (!ret)
+ pi->negotiated_version = pi->proto->supported_version;
+
+ xfer_put(ph, t);
+
+ return ret;
+}
+
+/**
* scmi_alloc_init_protocol_instance - Allocate and initialize a protocol
* instance descriptor.
* @info: The reference to the related SCMI instance.
@@ -1853,11 +1932,21 @@ scmi_alloc_init_protocol_instance(struct scmi_info *info,
devres_close_group(handle->dev, pi->gid);
dev_dbg(handle->dev, "Initialized protocol: 0x%X\n", pi->proto->id);
- if (pi->version > proto->supported_version)
- dev_warn(handle->dev,
- "Detected UNSUPPORTED higher version 0x%X for protocol 0x%X."
- "Backward compatibility is NOT assured.\n",
- pi->version, pi->proto->id);
+ if (pi->version > proto->supported_version) {
+ ret = scmi_protocol_version_negotiate(&pi->ph);
+ if (!ret) {
+ dev_info(handle->dev,
+ "Protocol 0x%X successfully negotiated version 0x%X\n",
+ proto->id, pi->negotiated_version);
+ } else {
+ dev_warn(handle->dev,
+ "Detected UNSUPPORTED higher version 0x%X for protocol 0x%X.\n",
+ pi->version, pi->proto->id);
+ dev_warn(handle->dev,
+ "Trying version 0x%X. Backward compatibility is NOT assured.\n",
+ pi->proto->supported_version);
+ }
+ }
return pi;
diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c
index 19246ed1f01f..b8d470417e8f 100644
--- a/drivers/firmware/arm_scmi/mailbox.c
+++ b/drivers/firmware/arm_scmi/mailbox.c
@@ -45,6 +45,20 @@ static void rx_callback(struct mbox_client *cl, void *m)
{
struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl);
+ /*
+ * An A2P IRQ is NOT valid when received while the platform still has
+ * the ownership of the channel, because the platform at first releases
+ * the SMT channel and then sends the completion interrupt.
+ *
+ * This addresses a possible race condition in which a spurious IRQ from
+ * a previous timed-out reply which arrived late could be wrongly
+ * associated with the next pending transaction.
+ */
+ if (cl->knows_txdone && !shmem_channel_free(smbox->shmem)) {
+ dev_warn(smbox->cinfo->dev, "Ignoring spurious A2P IRQ !\n");
+ return;
+ }
+
scmi_rx_callback(smbox->cinfo, shmem_read_header(smbox->shmem), NULL);
}
diff --git a/drivers/firmware/arm_scmi/notify.c b/drivers/firmware/arm_scmi/notify.c
index 0efd20cd9d69..27c52531194d 100644
--- a/drivers/firmware/arm_scmi/notify.c
+++ b/drivers/firmware/arm_scmi/notify.c
@@ -99,6 +99,7 @@
#define PROTO_ID_MASK GENMASK(31, 24)
#define EVT_ID_MASK GENMASK(23, 16)
#define SRC_ID_MASK GENMASK(15, 0)
+#define NOTIF_UNSUPP -1
/*
* Builds an unsigned 32bit key from the given input tuple to be used
@@ -788,6 +789,7 @@ int scmi_register_protocol_events(const struct scmi_handle *handle, u8 proto_id,
pd->ph = ph;
for (i = 0; i < ee->num_events; i++, evt++) {
+ int id;
struct scmi_registered_event *r_evt;
r_evt = devm_kzalloc(ni->handle->dev, sizeof(*r_evt),
@@ -809,6 +811,11 @@ int scmi_register_protocol_events(const struct scmi_handle *handle, u8 proto_id,
if (!r_evt->report)
return -ENOMEM;
+ for (id = 0; id < r_evt->num_sources; id++)
+ if (ee->ops->is_notify_supported &&
+ !ee->ops->is_notify_supported(ph, r_evt->evt->id, id))
+ refcount_set(&r_evt->sources[id], NOTIF_UNSUPP);
+
pd->registered_events[i] = r_evt;
/* Ensure events are updated */
smp_wmb();
@@ -1166,7 +1173,13 @@ static inline int __scmi_enable_evt(struct scmi_registered_event *r_evt,
int ret = 0;
sid = &r_evt->sources[src_id];
- if (refcount_read(sid) == 0) {
+ if (refcount_read(sid) == NOTIF_UNSUPP) {
+ dev_dbg(r_evt->proto->ph->dev,
+ "Notification NOT supported - proto_id:%d evt_id:%d src_id:%d",
+ r_evt->proto->id, r_evt->evt->id,
+ src_id);
+ ret = -EOPNOTSUPP;
+ } else if (refcount_read(sid) == 0) {
ret = REVT_NOTIFY_ENABLE(r_evt, r_evt->evt->id,
src_id);
if (!ret)
@@ -1179,6 +1192,8 @@ static inline int __scmi_enable_evt(struct scmi_registered_event *r_evt,
} else {
for (; num_sources; src_id++, num_sources--) {
sid = &r_evt->sources[src_id];
+ if (refcount_read(sid) == NOTIF_UNSUPP)
+ continue;
if (refcount_dec_and_test(sid))
REVT_NOTIFY_DISABLE(r_evt,
r_evt->evt->id, src_id);
diff --git a/drivers/firmware/arm_scmi/notify.h b/drivers/firmware/arm_scmi/notify.h
index 4e9b627edfef..76758a736cf4 100644
--- a/drivers/firmware/arm_scmi/notify.h
+++ b/drivers/firmware/arm_scmi/notify.h
@@ -35,6 +35,8 @@ struct scmi_protocol_handle;
/**
* struct scmi_event_ops - Protocol helpers called by the notification core.
+ * @is_notify_supported: Return 0 if the specified notification for the
+ * specified resource (src_id) is supported.
* @get_num_sources: Returns the number of possible events' sources for this
* protocol
* @set_notify_enabled: Enable/disable the required evt_id/src_id notifications
@@ -50,6 +52,8 @@ struct scmi_protocol_handle;
* process context.
*/
struct scmi_event_ops {
+ bool (*is_notify_supported)(const struct scmi_protocol_handle *ph,
+ u8 evt_id, u32 src_id);
int (*get_num_sources)(const struct scmi_protocol_handle *ph);
int (*set_notify_enabled)(const struct scmi_protocol_handle *ph,
u8 evt_id, u32 src_id, bool enabled);
diff --git a/drivers/firmware/arm_scmi/optee.c b/drivers/firmware/arm_scmi/optee.c
index 25bfb465484d..4e7944b91e38 100644
--- a/drivers/firmware/arm_scmi/optee.c
+++ b/drivers/firmware/arm_scmi/optee.c
@@ -109,8 +109,10 @@ enum scmi_optee_pta_cmd {
* @rx_len: Response size
* @mu: Mutex protection on channel access
* @cinfo: SCMI channel information
- * @shmem: Virtual base address of the shared memory
- * @req: Shared memory protocol handle for SCMI request and synchronous response
+ * @req: union for SCMI interface
+ * @req.shmem: Virtual base address of the shared memory
+ * @req.msg: Shared memory protocol handle for SCMI request and
+ * synchronous response
* @tee_shm: TEE shared memory handle @req or NULL if using IOMEM shmem
* @link: Reference in agent's channel list
*/
diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
index 8ea2a7b3d35d..981e327e63e3 100644
--- a/drivers/firmware/arm_scmi/perf.c
+++ b/drivers/firmware/arm_scmi/perf.c
@@ -182,6 +182,8 @@ struct scmi_perf_info {
enum scmi_power_scale power_scale;
u64 stats_addr;
u32 stats_size;
+ bool notify_lvl_cmd;
+ bool notify_lim_cmd;
struct perf_dom_info *dom_info;
};
@@ -222,6 +224,15 @@ static int scmi_perf_attributes_get(const struct scmi_protocol_handle *ph,
}
ph->xops->xfer_put(ph, t);
+
+ if (!ret) {
+ if (!ph->hops->protocol_msg_check(ph, PERF_NOTIFY_LEVEL, NULL))
+ pi->notify_lvl_cmd = true;
+
+ if (!ph->hops->protocol_msg_check(ph, PERF_NOTIFY_LIMITS, NULL))
+ pi->notify_lim_cmd = true;
+ }
+
return ret;
}
@@ -239,6 +250,7 @@ static void scmi_perf_xa_destroy(void *data)
static int
scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph,
struct perf_dom_info *dom_info,
+ bool notify_lim_cmd, bool notify_lvl_cmd,
u32 version)
{
int ret;
@@ -260,8 +272,12 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph,
dom_info->set_limits = SUPPORTS_SET_LIMITS(flags);
dom_info->info.set_perf = SUPPORTS_SET_PERF_LVL(flags);
- dom_info->perf_limit_notify = SUPPORTS_PERF_LIMIT_NOTIFY(flags);
- dom_info->perf_level_notify = SUPPORTS_PERF_LEVEL_NOTIFY(flags);
+ if (notify_lim_cmd)
+ dom_info->perf_limit_notify =
+ SUPPORTS_PERF_LIMIT_NOTIFY(flags);
+ if (notify_lvl_cmd)
+ dom_info->perf_level_notify =
+ SUPPORTS_PERF_LEVEL_NOTIFY(flags);
dom_info->perf_fastchannels = SUPPORTS_PERF_FASTCHANNELS(flags);
if (PROTOCOL_REV_MAJOR(version) >= 0x4)
dom_info->level_indexing_mode =
@@ -270,15 +286,30 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph,
le32_to_cpu(attr->sustained_freq_khz);
dom_info->sustained_perf_level =
le32_to_cpu(attr->sustained_perf_level);
+ /*
+ * sustained_freq_khz = mult_factor * sustained_perf_level
+ * mult_factor must be non zero positive integer(not fraction)
+ */
if (!dom_info->sustained_freq_khz ||
!dom_info->sustained_perf_level ||
- dom_info->level_indexing_mode)
+ dom_info->level_indexing_mode) {
/* CPUFreq converts to kHz, hence default 1000 */
dom_info->mult_factor = 1000;
- else
+ } else {
dom_info->mult_factor =
(dom_info->sustained_freq_khz * 1000UL)
/ dom_info->sustained_perf_level;
+ if ((dom_info->sustained_freq_khz * 1000UL) %
+ dom_info->sustained_perf_level)
+ dev_warn(ph->dev,
+ "multiplier for domain %d rounded\n",
+ dom_info->id);
+ }
+ if (!dom_info->mult_factor)
+ dev_warn(ph->dev,
+ "Wrong sustained perf/frequency(domain %d)\n",
+ dom_info->id);
+
strscpy(dom_info->info.name, attr->name,
SCMI_SHORT_NAME_MAX_SIZE);
}
@@ -295,9 +326,9 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph,
dom_info->id, NULL, dom_info->info.name,
SCMI_MAX_STR_SIZE);
+ xa_init(&dom_info->opps_by_lvl);
if (dom_info->level_indexing_mode) {
xa_init(&dom_info->opps_by_idx);
- xa_init(&dom_info->opps_by_lvl);
hash_init(dom_info->opps_by_freq);
}
@@ -340,32 +371,52 @@ static int iter_perf_levels_update_state(struct scmi_iterator_state *st,
}
static inline void
-process_response_opp(struct scmi_opp *opp, unsigned int loop_idx,
+process_response_opp(struct device *dev, struct perf_dom_info *dom,
+ struct scmi_opp *opp, unsigned int loop_idx,
const struct scmi_msg_resp_perf_describe_levels *r)
{
+ int ret;
+
opp->perf = le32_to_cpu(r->opp[loop_idx].perf_val);
opp->power = le32_to_cpu(r->opp[loop_idx].power);
opp->trans_latency_us =
le16_to_cpu(r->opp[loop_idx].transition_latency_us);
+
+ ret = xa_insert(&dom->opps_by_lvl, opp->perf, opp, GFP_KERNEL);
+ if (ret)
+ dev_warn(dev, "Failed to add opps_by_lvl at %d - ret:%d\n",
+ opp->perf, ret);
}
static inline void
-process_response_opp_v4(struct perf_dom_info *dom, struct scmi_opp *opp,
- unsigned int loop_idx,
+process_response_opp_v4(struct device *dev, struct perf_dom_info *dom,
+ struct scmi_opp *opp, unsigned int loop_idx,
const struct scmi_msg_resp_perf_describe_levels_v4 *r)
{
+ int ret;
+
opp->perf = le32_to_cpu(r->opp[loop_idx].perf_val);
opp->power = le32_to_cpu(r->opp[loop_idx].power);
opp->trans_latency_us =
le16_to_cpu(r->opp[loop_idx].transition_latency_us);
+ ret = xa_insert(&dom->opps_by_lvl, opp->perf, opp, GFP_KERNEL);
+ if (ret)
+ dev_warn(dev, "Failed to add opps_by_lvl at %d - ret:%d\n",
+ opp->perf, ret);
+
/* Note that PERF v4 reports always five 32-bit words */
opp->indicative_freq = le32_to_cpu(r->opp[loop_idx].indicative_freq);
if (dom->level_indexing_mode) {
opp->level_index = le32_to_cpu(r->opp[loop_idx].level_index);
- xa_store(&dom->opps_by_idx, opp->level_index, opp, GFP_KERNEL);
- xa_store(&dom->opps_by_lvl, opp->perf, opp, GFP_KERNEL);
+ ret = xa_insert(&dom->opps_by_idx, opp->level_index, opp,
+ GFP_KERNEL);
+ if (ret)
+ dev_warn(dev,
+ "Failed to add opps_by_idx at %d - ret:%d\n",
+ opp->level_index, ret);
+
hash_add(dom->opps_by_freq, &opp->hash, opp->indicative_freq);
}
}
@@ -380,9 +431,10 @@ iter_perf_levels_process_response(const struct scmi_protocol_handle *ph,
opp = &p->perf_dom->opp[st->desc_index + st->loop_idx];
if (PROTOCOL_REV_MAJOR(p->version) <= 0x3)
- process_response_opp(opp, st->loop_idx, response);
+ process_response_opp(ph->dev, p->perf_dom, opp, st->loop_idx,
+ response);
else
- process_response_opp_v4(p->perf_dom, opp, st->loop_idx,
+ process_response_opp_v4(ph->dev, p->perf_dom, opp, st->loop_idx,
response);
p->perf_dom->opp_count++;
@@ -965,6 +1017,27 @@ static const struct scmi_perf_proto_ops perf_proto_ops = {
.power_scale_get = scmi_power_scale_get,
};
+static bool scmi_perf_notify_supported(const struct scmi_protocol_handle *ph,
+ u8 evt_id, u32 src_id)
+{
+ bool supported;
+ struct perf_dom_info *dom;
+
+ if (evt_id >= ARRAY_SIZE(evt_2_cmd))
+ return false;
+
+ dom = scmi_perf_domain_lookup(ph, src_id);
+ if (IS_ERR(dom))
+ return false;
+
+ if (evt_id == SCMI_EVENT_PERFORMANCE_LIMITS_CHANGED)
+ supported = dom->perf_limit_notify;
+ else
+ supported = dom->perf_level_notify;
+
+ return supported;
+}
+
static int scmi_perf_set_notify_enabled(const struct scmi_protocol_handle *ph,
u8 evt_id, u32 src_id, bool enable)
{
@@ -982,18 +1055,47 @@ static int scmi_perf_set_notify_enabled(const struct scmi_protocol_handle *ph,
return ret;
}
+static int
+scmi_perf_xlate_opp_to_freq(struct perf_dom_info *dom,
+ unsigned int index, unsigned long *freq)
+{
+ struct scmi_opp *opp;
+
+ if (!dom || !freq)
+ return -EINVAL;
+
+ if (!dom->level_indexing_mode) {
+ opp = xa_load(&dom->opps_by_lvl, index);
+ if (!opp)
+ return -ENODEV;
+
+ *freq = opp->perf * dom->mult_factor;
+ } else {
+ opp = xa_load(&dom->opps_by_idx, index);
+ if (!opp)
+ return -ENODEV;
+
+ *freq = opp->indicative_freq * dom->mult_factor;
+ }
+
+ return 0;
+}
+
static void *scmi_perf_fill_custom_report(const struct scmi_protocol_handle *ph,
u8 evt_id, ktime_t timestamp,
const void *payld, size_t payld_sz,
void *report, u32 *src_id)
{
+ int ret;
void *rep = NULL;
+ struct perf_dom_info *dom;
switch (evt_id) {
case SCMI_EVENT_PERFORMANCE_LIMITS_CHANGED:
{
const struct scmi_perf_limits_notify_payld *p = payld;
struct scmi_perf_limits_report *r = report;
+ unsigned long freq_min, freq_max;
if (sizeof(*p) != payld_sz)
break;
@@ -1003,14 +1105,36 @@ static void *scmi_perf_fill_custom_report(const struct scmi_protocol_handle *ph,
r->domain_id = le32_to_cpu(p->domain_id);
r->range_max = le32_to_cpu(p->range_max);
r->range_min = le32_to_cpu(p->range_min);
+ /* Check if the reported domain exist at all */
+ dom = scmi_perf_domain_lookup(ph, r->domain_id);
+ if (IS_ERR(dom))
+ break;
+ /*
+ * Event will be reported from this point on...
+ * ...even if, later, xlated frequencies were not retrieved.
+ */
*src_id = r->domain_id;
rep = r;
+
+ ret = scmi_perf_xlate_opp_to_freq(dom, r->range_max, &freq_max);
+ if (ret)
+ break;
+
+ ret = scmi_perf_xlate_opp_to_freq(dom, r->range_min, &freq_min);
+ if (ret)
+ break;
+
+ /* Report translated freqs ONLY if both available */
+ r->range_max_freq = freq_max;
+ r->range_min_freq = freq_min;
+
break;
}
case SCMI_EVENT_PERFORMANCE_LEVEL_CHANGED:
{
const struct scmi_perf_level_notify_payld *p = payld;
struct scmi_perf_level_report *r = report;
+ unsigned long freq;
if (sizeof(*p) != payld_sz)
break;
@@ -1018,9 +1142,27 @@ static void *scmi_perf_fill_custom_report(const struct scmi_protocol_handle *ph,
r->timestamp = timestamp;
r->agent_id = le32_to_cpu(p->agent_id);
r->domain_id = le32_to_cpu(p->domain_id);
+ /* Report translated freqs ONLY if available */
r->performance_level = le32_to_cpu(p->performance_level);
+ /* Check if the reported domain exist at all */
+ dom = scmi_perf_domain_lookup(ph, r->domain_id);
+ if (IS_ERR(dom))
+ break;
+ /*
+ * Event will be reported from this point on...
+ * ...even if, later, xlated frequencies were not retrieved.
+ */
*src_id = r->domain_id;
rep = r;
+
+ /* Report translated freqs ONLY if available */
+ ret = scmi_perf_xlate_opp_to_freq(dom, r->performance_level,
+ &freq);
+ if (ret)
+ break;
+
+ r->performance_level_freq = freq;
+
break;
}
default:
@@ -1054,6 +1196,7 @@ static const struct scmi_event perf_events[] = {
};
static const struct scmi_event_ops perf_event_ops = {
+ .is_notify_supported = scmi_perf_notify_supported,
.get_num_sources = scmi_perf_get_num_sources,
.set_notify_enabled = scmi_perf_set_notify_enabled,
.fill_custom_report = scmi_perf_fill_custom_report,
@@ -1098,7 +1241,8 @@ static int scmi_perf_protocol_init(const struct scmi_protocol_handle *ph)
struct perf_dom_info *dom = pinfo->dom_info + domain;
dom->id = domain;
- scmi_perf_domain_attributes_get(ph, dom, version);
+ scmi_perf_domain_attributes_get(ph, dom, pinfo->notify_lim_cmd,
+ pinfo->notify_lvl_cmd, version);
scmi_perf_describe_levels_get(ph, dom, version);
if (dom->perf_fastchannels)
diff --git a/drivers/firmware/arm_scmi/power.c b/drivers/firmware/arm_scmi/power.c
index c2e6b9b4d941..49666bd1d8ac 100644
--- a/drivers/firmware/arm_scmi/power.c
+++ b/drivers/firmware/arm_scmi/power.c
@@ -68,6 +68,7 @@ struct power_dom_info {
struct scmi_power_info {
u32 version;
+ bool notify_state_change_cmd;
int num_domains;
u64 stats_addr;
u32 stats_size;
@@ -97,13 +98,18 @@ static int scmi_power_attributes_get(const struct scmi_protocol_handle *ph,
}
ph->xops->xfer_put(ph, t);
+
+ if (!ret)
+ if (!ph->hops->protocol_msg_check(ph, POWER_STATE_NOTIFY, NULL))
+ pi->notify_state_change_cmd = true;
+
return ret;
}
static int
scmi_power_domain_attributes_get(const struct scmi_protocol_handle *ph,
u32 domain, struct power_dom_info *dom_info,
- u32 version)
+ u32 version, bool notify_state_change_cmd)
{
int ret;
u32 flags;
@@ -122,7 +128,9 @@ scmi_power_domain_attributes_get(const struct scmi_protocol_handle *ph,
if (!ret) {
flags = le32_to_cpu(attr->flags);
- dom_info->state_set_notify = SUPPORTS_STATE_SET_NOTIFY(flags);
+ if (notify_state_change_cmd)
+ dom_info->state_set_notify =
+ SUPPORTS_STATE_SET_NOTIFY(flags);
dom_info->state_set_async = SUPPORTS_STATE_SET_ASYNC(flags);
dom_info->state_set_sync = SUPPORTS_STATE_SET_SYNC(flags);
strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE);
@@ -231,6 +239,20 @@ static int scmi_power_request_notify(const struct scmi_protocol_handle *ph,
return ret;
}
+static bool scmi_power_notify_supported(const struct scmi_protocol_handle *ph,
+ u8 evt_id, u32 src_id)
+{
+ struct power_dom_info *dom;
+ struct scmi_power_info *pinfo = ph->get_priv(ph);
+
+ if (evt_id != SCMI_EVENT_POWER_STATE_CHANGED ||
+ src_id >= pinfo->num_domains)
+ return false;
+
+ dom = pinfo->dom_info + src_id;
+ return dom->state_set_notify;
+}
+
static int scmi_power_set_notify_enabled(const struct scmi_protocol_handle *ph,
u8 evt_id, u32 src_id, bool enable)
{
@@ -285,6 +307,7 @@ static const struct scmi_event power_events[] = {
};
static const struct scmi_event_ops power_event_ops = {
+ .is_notify_supported = scmi_power_notify_supported,
.get_num_sources = scmi_power_get_num_sources,
.set_notify_enabled = scmi_power_set_notify_enabled,
.fill_custom_report = scmi_power_fill_custom_report,
@@ -326,7 +349,8 @@ static int scmi_power_protocol_init(const struct scmi_protocol_handle *ph)
for (domain = 0; domain < pinfo->num_domains; domain++) {
struct power_dom_info *dom = pinfo->dom_info + domain;
- scmi_power_domain_attributes_get(ph, domain, dom, version);
+ scmi_power_domain_attributes_get(ph, domain, dom, version,
+ pinfo->notify_state_change_cmd);
}
pinfo->version = version;
diff --git a/drivers/firmware/arm_scmi/powercap.c b/drivers/firmware/arm_scmi/powercap.c
index a4c6cd4716fe..2fab92367e42 100644
--- a/drivers/firmware/arm_scmi/powercap.c
+++ b/drivers/firmware/arm_scmi/powercap.c
@@ -124,6 +124,8 @@ struct scmi_powercap_state {
struct powercap_info {
u32 version;
int num_domains;
+ bool notify_cap_cmd;
+ bool notify_measurements_cmd;
struct scmi_powercap_state *states;
struct scmi_powercap_info *powercaps;
};
@@ -157,6 +159,18 @@ scmi_powercap_attributes_get(const struct scmi_protocol_handle *ph,
}
ph->xops->xfer_put(ph, t);
+
+ if (!ret) {
+ if (!ph->hops->protocol_msg_check(ph,
+ POWERCAP_CAP_NOTIFY, NULL))
+ pi->notify_cap_cmd = true;
+
+ if (!ph->hops->protocol_msg_check(ph,
+ POWERCAP_MEASUREMENTS_NOTIFY,
+ NULL))
+ pi->notify_measurements_cmd = true;
+ }
+
return ret;
}
@@ -200,10 +214,12 @@ scmi_powercap_domain_attributes_get(const struct scmi_protocol_handle *ph,
flags = le32_to_cpu(resp->attributes);
dom_info->id = domain;
- dom_info->notify_powercap_cap_change =
- SUPPORTS_POWERCAP_CAP_CHANGE_NOTIFY(flags);
- dom_info->notify_powercap_measurement_change =
- SUPPORTS_POWERCAP_MEASUREMENTS_CHANGE_NOTIFY(flags);
+ if (pinfo->notify_cap_cmd)
+ dom_info->notify_powercap_cap_change =
+ SUPPORTS_POWERCAP_CAP_CHANGE_NOTIFY(flags);
+ if (pinfo->notify_measurements_cmd)
+ dom_info->notify_powercap_measurement_change =
+ SUPPORTS_POWERCAP_MEASUREMENTS_CHANGE_NOTIFY(flags);
dom_info->async_powercap_cap_set =
SUPPORTS_ASYNC_POWERCAP_CAP_SET(flags);
dom_info->powercap_cap_config =
@@ -788,6 +804,26 @@ static int scmi_powercap_notify(const struct scmi_protocol_handle *ph,
return ret;
}
+static bool
+scmi_powercap_notify_supported(const struct scmi_protocol_handle *ph,
+ u8 evt_id, u32 src_id)
+{
+ bool supported = false;
+ const struct scmi_powercap_info *dom_info;
+ struct powercap_info *pi = ph->get_priv(ph);
+
+ if (evt_id >= ARRAY_SIZE(evt_2_cmd) || src_id >= pi->num_domains)
+ return false;
+
+ dom_info = pi->powercaps + src_id;
+ if (evt_id == SCMI_EVENT_POWERCAP_CAP_CHANGED)
+ supported = dom_info->notify_powercap_cap_change;
+ else if (evt_id == SCMI_EVENT_POWERCAP_MEASUREMENTS_CHANGED)
+ supported = dom_info->notify_powercap_measurement_change;
+
+ return supported;
+}
+
static int
scmi_powercap_set_notify_enabled(const struct scmi_protocol_handle *ph,
u8 evt_id, u32 src_id, bool enable)
@@ -904,6 +940,7 @@ static const struct scmi_event powercap_events[] = {
};
static const struct scmi_event_ops powercap_event_ops = {
+ .is_notify_supported = scmi_powercap_notify_supported,
.get_num_sources = scmi_powercap_get_num_sources,
.set_notify_enabled = scmi_powercap_set_notify_enabled,
.fill_custom_report = scmi_powercap_fill_custom_report,
diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h
index e683c26f24eb..693019fff0f6 100644
--- a/drivers/firmware/arm_scmi/protocols.h
+++ b/drivers/firmware/arm_scmi/protocols.h
@@ -33,6 +33,7 @@ enum scmi_common_cmd {
PROTOCOL_VERSION = 0x0,
PROTOCOL_ATTRIBUTES = 0x1,
PROTOCOL_MESSAGE_ATTRIBUTES = 0x2,
+ NEGOTIATE_PROTOCOL_VERSION = 0x10,
};
/**
@@ -251,6 +252,8 @@ struct scmi_fc_info {
* provided in @ops.
* @iter_response_run: A common helper to trigger the run of a previously
* initialized iterator.
+ * @protocol_msg_check: A common helper to check is a specific protocol message
+ * is supported.
* @fastchannel_init: A common helper used to initialize FC descriptors by
* gathering FC descriptions from the SCMI platform server.
* @fastchannel_db_ring: A common helper to ring a FC doorbell.
@@ -264,6 +267,8 @@ struct scmi_proto_helpers_ops {
unsigned int max_resources, u8 msg_id,
size_t tx_size, void *priv);
int (*iter_response_run)(void *iter);
+ int (*protocol_msg_check)(const struct scmi_protocol_handle *ph,
+ u32 message_id, u32 *attributes);
void (*fastchannel_init)(const struct scmi_protocol_handle *ph,
u8 describe_id, u32 message_id,
u32 valid_size, u32 domain,
diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
index 0493aa3c12bf..350573518503 100644
--- a/drivers/firmware/arm_scmi/raw_mode.c
+++ b/drivers/firmware/arm_scmi/raw_mode.c
@@ -1111,7 +1111,6 @@ static int scmi_raw_mode_setup(struct scmi_raw_mode_info *raw,
int i;
for (i = 0; i < num_chans; i++) {
- void *xret;
struct scmi_raw_queue *q;
q = scmi_raw_queue_init(raw);
@@ -1120,13 +1119,12 @@ static int scmi_raw_mode_setup(struct scmi_raw_mode_info *raw,
goto err_xa;
}
- xret = xa_store(&raw->chans_q, channels[i], q,
+ ret = xa_insert(&raw->chans_q, channels[i], q,
GFP_KERNEL);
- if (xa_err(xret)) {
+ if (ret) {
dev_err(dev,
"Fail to allocate Raw queue 0x%02X\n",
channels[i]);
- ret = xa_err(xret);
goto err_xa;
}
}
@@ -1322,6 +1320,12 @@ void scmi_raw_message_report(void *r, struct scmi_xfer *xfer,
dev = raw->handle->dev;
q = scmi_raw_queue_select(raw, idx,
SCMI_XFER_IS_CHAN_SET(xfer) ? chan_id : 0);
+ if (!q) {
+ dev_warn(dev,
+ "RAW[%d] - NO queue for chan 0x%X. Dropping report.\n",
+ idx, chan_id);
+ return;
+ }
/*
* Grab the msg_q_lock upfront to avoid a possible race between
diff --git a/drivers/firmware/arm_scmi/reset.c b/drivers/firmware/arm_scmi/reset.c
index 19970d9f9e36..1b318316535e 100644
--- a/drivers/firmware/arm_scmi/reset.c
+++ b/drivers/firmware/arm_scmi/reset.c
@@ -67,6 +67,7 @@ struct reset_dom_info {
struct scmi_reset_info {
u32 version;
int num_domains;
+ bool notify_reset_cmd;
struct reset_dom_info *dom_info;
};
@@ -89,18 +90,24 @@ static int scmi_reset_attributes_get(const struct scmi_protocol_handle *ph,
}
ph->xops->xfer_put(ph, t);
+
+ if (!ret)
+ if (!ph->hops->protocol_msg_check(ph, RESET_NOTIFY, NULL))
+ pi->notify_reset_cmd = true;
+
return ret;
}
static int
scmi_reset_domain_attributes_get(const struct scmi_protocol_handle *ph,
- u32 domain, struct reset_dom_info *dom_info,
- u32 version)
+ struct scmi_reset_info *pinfo,
+ u32 domain, u32 version)
{
int ret;
u32 attributes;
struct scmi_xfer *t;
struct scmi_msg_resp_reset_domain_attributes *attr;
+ struct reset_dom_info *dom_info = pinfo->dom_info + domain;
ret = ph->xops->xfer_get_init(ph, RESET_DOMAIN_ATTRIBUTES,
sizeof(domain), sizeof(*attr), &t);
@@ -115,7 +122,9 @@ scmi_reset_domain_attributes_get(const struct scmi_protocol_handle *ph,
attributes = le32_to_cpu(attr->attributes);
dom_info->async_reset = SUPPORTS_ASYNC_RESET(attributes);
- dom_info->reset_notify = SUPPORTS_NOTIFY_RESET(attributes);
+ if (pinfo->notify_reset_cmd)
+ dom_info->reset_notify =
+ SUPPORTS_NOTIFY_RESET(attributes);
dom_info->latency_us = le32_to_cpu(attr->latency);
if (dom_info->latency_us == U32_MAX)
dom_info->latency_us = 0;
@@ -226,6 +235,20 @@ static const struct scmi_reset_proto_ops reset_proto_ops = {
.deassert = scmi_reset_domain_deassert,
};
+static bool scmi_reset_notify_supported(const struct scmi_protocol_handle *ph,
+ u8 evt_id, u32 src_id)
+{
+ struct reset_dom_info *dom;
+ struct scmi_reset_info *pi = ph->get_priv(ph);
+
+ if (evt_id != SCMI_EVENT_RESET_ISSUED || src_id >= pi->num_domains)
+ return false;
+
+ dom = pi->dom_info + src_id;
+
+ return dom->reset_notify;
+}
+
static int scmi_reset_notify(const struct scmi_protocol_handle *ph,
u32 domain_id, bool enable)
{
@@ -301,6 +324,7 @@ static const struct scmi_event reset_events[] = {
};
static const struct scmi_event_ops reset_event_ops = {
+ .is_notify_supported = scmi_reset_notify_supported,
.get_num_sources = scmi_reset_get_num_sources,
.set_notify_enabled = scmi_reset_set_notify_enabled,
.fill_custom_report = scmi_reset_fill_custom_report,
@@ -339,11 +363,8 @@ static int scmi_reset_protocol_init(const struct scmi_protocol_handle *ph)
if (!pinfo->dom_info)
return -ENOMEM;
- for (domain = 0; domain < pinfo->num_domains; domain++) {
- struct reset_dom_info *dom = pinfo->dom_info + domain;
-
- scmi_reset_domain_attributes_get(ph, domain, dom, version);
- }
+ for (domain = 0; domain < pinfo->num_domains; domain++)
+ scmi_reset_domain_attributes_get(ph, pinfo, domain, version);
pinfo->version = version;
return ph->set_priv(ph, pinfo, version);
diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c
index 311149965370..7fc5535ca34c 100644
--- a/drivers/firmware/arm_scmi/sensors.c
+++ b/drivers/firmware/arm_scmi/sensors.c
@@ -215,6 +215,8 @@ struct scmi_sensor_update_notify_payld {
struct sensors_info {
u32 version;
+ bool notify_trip_point_cmd;
+ bool notify_continuos_update_cmd;
int num_sensors;
int max_requests;
u64 reg_addr;
@@ -246,6 +248,18 @@ static int scmi_sensor_attributes_get(const struct scmi_protocol_handle *ph,
}
ph->xops->xfer_put(ph, t);
+
+ if (!ret) {
+ if (!ph->hops->protocol_msg_check(ph,
+ SENSOR_TRIP_POINT_NOTIFY, NULL))
+ si->notify_trip_point_cmd = true;
+
+ if (!ph->hops->protocol_msg_check(ph,
+ SENSOR_CONTINUOUS_UPDATE_NOTIFY,
+ NULL))
+ si->notify_continuos_update_cmd = true;
+ }
+
return ret;
}
@@ -594,7 +608,8 @@ iter_sens_descr_process_response(const struct scmi_protocol_handle *ph,
* Such bitfields are assumed to be zeroed on non
* relevant fw versions...assuming fw not buggy !
*/
- s->update = SUPPORTS_UPDATE_NOTIFY(attrl);
+ if (si->notify_continuos_update_cmd)
+ s->update = SUPPORTS_UPDATE_NOTIFY(attrl);
s->timestamped = SUPPORTS_TIMESTAMP(attrl);
if (s->timestamped)
s->tstamp_scale = S32_EXT(SENSOR_TSTAMP_EXP(attrl));
@@ -988,6 +1003,25 @@ static const struct scmi_sensor_proto_ops sensor_proto_ops = {
.config_set = scmi_sensor_config_set,
};
+static bool scmi_sensor_notify_supported(const struct scmi_protocol_handle *ph,
+ u8 evt_id, u32 src_id)
+{
+ bool supported = false;
+ const struct scmi_sensor_info *s;
+ struct sensors_info *sinfo = ph->get_priv(ph);
+
+ s = scmi_sensor_info_get(ph, src_id);
+ if (!s)
+ return false;
+
+ if (evt_id == SCMI_EVENT_SENSOR_TRIP_POINT_EVENT)
+ supported = sinfo->notify_trip_point_cmd;
+ else if (evt_id == SCMI_EVENT_SENSOR_UPDATE)
+ supported = s->update;
+
+ return supported;
+}
+
static int scmi_sensor_set_notify_enabled(const struct scmi_protocol_handle *ph,
u8 evt_id, u32 src_id, bool enable)
{
@@ -1099,6 +1133,7 @@ static const struct scmi_event sensor_events[] = {
};
static const struct scmi_event_ops sensor_event_ops = {
+ .is_notify_supported = scmi_sensor_notify_supported,
.get_num_sources = scmi_sensor_get_num_sources,
.set_notify_enabled = scmi_sensor_set_notify_enabled,
.fill_custom_report = scmi_sensor_fill_custom_report,
diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c
index 87b4f4d35f06..8bf495bcad09 100644
--- a/drivers/firmware/arm_scmi/shmem.c
+++ b/drivers/firmware/arm_scmi/shmem.c
@@ -10,7 +10,7 @@
#include <linux/processor.h>
#include <linux/types.h>
-#include <asm-generic/bug.h>
+#include <linux/bug.h>
#include "common.h"
@@ -122,3 +122,9 @@ bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem,
(SCMI_SHMEM_CHAN_STAT_CHANNEL_ERROR |
SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE);
}
+
+bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem)
+{
+ return (ioread32(&shmem->channel_status) &
+ SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE);
+}
diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c
index 7611e9665038..39936e1dd30e 100644
--- a/drivers/firmware/arm_scmi/smc.c
+++ b/drivers/firmware/arm_scmi/smc.c
@@ -214,6 +214,13 @@ static int smc_chan_free(int id, void *p, void *data)
struct scmi_chan_info *cinfo = p;
struct scmi_smc *scmi_info = cinfo->transport_info;
+ /*
+ * Different protocols might share the same chan info, so a previous
+ * smc_chan_free call might have already freed the structure.
+ */
+ if (!scmi_info)
+ return 0;
+
/* Ignore any possible further reception on the IRQ path */
if (scmi_info->irq > 0)
free_irq(scmi_info->irq, scmi_info);
diff --git a/drivers/firmware/arm_scmi/system.c b/drivers/firmware/arm_scmi/system.c
index 1621da97bcbb..b6358c155f7f 100644
--- a/drivers/firmware/arm_scmi/system.c
+++ b/drivers/firmware/arm_scmi/system.c
@@ -36,8 +36,20 @@ struct scmi_system_power_state_notifier_payld {
struct scmi_system_info {
u32 version;
bool graceful_timeout_supported;
+ bool power_state_notify_cmd;
};
+static bool scmi_system_notify_supported(const struct scmi_protocol_handle *ph,
+ u8 evt_id, u32 src_id)
+{
+ struct scmi_system_info *pinfo = ph->get_priv(ph);
+
+ if (evt_id != SCMI_EVENT_SYSTEM_POWER_STATE_NOTIFIER)
+ return false;
+
+ return pinfo->power_state_notify_cmd;
+}
+
static int scmi_system_request_notify(const struct scmi_protocol_handle *ph,
bool enable)
{
@@ -114,6 +126,7 @@ static const struct scmi_event system_events[] = {
};
static const struct scmi_event_ops system_event_ops = {
+ .is_notify_supported = scmi_system_notify_supported,
.set_notify_enabled = scmi_system_set_notify_enabled,
.fill_custom_report = scmi_system_fill_custom_report,
};
@@ -147,6 +160,9 @@ static int scmi_system_protocol_init(const struct scmi_protocol_handle *ph)
if (PROTOCOL_REV_MAJOR(pinfo->version) >= 0x2)
pinfo->graceful_timeout_supported = true;
+ if (!ph->hops->protocol_msg_check(ph, SYSTEM_POWER_STATE_NOTIFY, NULL))
+ pinfo->power_state_notify_cmd = true;
+
return ph->set_priv(ph, pinfo, version);
}
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 83f5bb57fa4c..83092d93f36a 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -107,7 +107,7 @@ static int __init arm_enable_runtime_services(void)
efi_memory_desc_t *md;
for_each_efi_memory_desc(md) {
- int md_size = md->num_pages << EFI_PAGE_SHIFT;
+ u64 md_size = md->num_pages << EFI_PAGE_SHIFT;
struct resource *res;
if (!(md->attribute & EFI_MEMORY_SP))
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index 3e8d4b51a814..97bafb5f7038 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file)
return -ENOMEM;
}
- cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
+ cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL);
if (!cap_info->phys) {
kfree(cap_info->pages);
kfree(cap_info);
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 35c37f667781..9b3884ff81e6 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -523,6 +523,17 @@ static void cper_print_tstamp(const char *pfx,
}
}
+struct ignore_section {
+ guid_t guid;
+ const char *name;
+};
+
+static const struct ignore_section ignore_sections[] = {
+ { .guid = CPER_SEC_CXL_GEN_MEDIA_GUID, .name = "CXL General Media Event" },
+ { .guid = CPER_SEC_CXL_DRAM_GUID, .name = "CXL DRAM Event" },
+ { .guid = CPER_SEC_CXL_MEM_MODULE_GUID, .name = "CXL Memory Module Event" },
+};
+
static void
cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
int sec_no)
@@ -543,6 +554,14 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
+
+ for (int i = 0; i < ARRAY_SIZE(ignore_sections); i++) {
+ if (guid_equal(sec_type, &ignore_sections[i].guid)) {
+ printk("%ssection_type: %s\n", newpfx, ignore_sections[i].name);
+ return;
+ }
+ }
+
if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c
index d4987d013080..a00e07b853f2 100644
--- a/drivers/firmware/efi/efi-init.c
+++ b/drivers/firmware/efi/efi-init.c
@@ -144,15 +144,6 @@ static __init int is_usable_memory(efi_memory_desc_t *md)
case EFI_CONVENTIONAL_MEMORY:
case EFI_PERSISTENT_MEMORY:
/*
- * Special purpose memory is 'soft reserved', which means it
- * is set aside initially, but can be hotplugged back in or
- * be assigned to the dax driver after boot.
- */
- if (efi_soft_reserve_enabled() &&
- (md->attribute & EFI_MEMORY_SP))
- return false;
-
- /*
* According to the spec, these regions are no longer reserved
* after calling ExitBootServices(). However, we can only use
* them as System RAM if they can be mapped writeback cacheable.
@@ -196,6 +187,16 @@ static __init void reserve_regions(void)
size = npages << PAGE_SHIFT;
if (is_memory(md)) {
+ /*
+ * Special purpose memory is 'soft reserved', which
+ * means it is set aside initially. Don't add a memblock
+ * for it now so that it can be hotplugged back in or
+ * be assigned to the dax driver after boot.
+ */
+ if (efi_soft_reserve_enabled() &&
+ (md->attribute & EFI_MEMORY_SP))
+ continue;
+
early_init_dt_add_memory_arch(paddr, size);
if (!is_usable_memory(md))
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index e7b9ec6f8a86..833cbb995dd3 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -14,16 +14,43 @@ static unsigned int record_size = 1024;
module_param(record_size, uint, 0444);
MODULE_PARM_DESC(record_size, "size of each pstore UEFI var (in bytes, min/default=1024)");
-static bool efivars_pstore_disable =
- IS_ENABLED(CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE);
-
-module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
-
#define PSTORE_EFI_ATTRIBUTES \
(EFI_VARIABLE_NON_VOLATILE | \
EFI_VARIABLE_BOOTSERVICE_ACCESS | \
EFI_VARIABLE_RUNTIME_ACCESS)
+static bool pstore_disable = IS_ENABLED(CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE);
+
+static int efivars_pstore_init(void);
+static void efivars_pstore_exit(void);
+
+static int efi_pstore_disable_set(const char *val, const struct kernel_param *kp)
+{
+ int err;
+ bool old_pstore_disable = pstore_disable;
+
+ err = param_set_bool(val, kp);
+ if (err)
+ return err;
+
+ if (old_pstore_disable != pstore_disable) {
+ if (pstore_disable)
+ efivars_pstore_exit();
+ else
+ efivars_pstore_init();
+ }
+
+ return 0;
+}
+
+static const struct kernel_param_ops pstore_disable_ops = {
+ .set = efi_pstore_disable_set,
+ .get = param_get_bool,
+};
+
+module_param_cb(pstore_disable, &pstore_disable_ops, &pstore_disable, 0644);
+__MODULE_PARM_TYPE(pstore_disable, "bool");
+
static int efi_pstore_open(struct pstore_info *psi)
{
int err;
@@ -218,12 +245,12 @@ static struct pstore_info efi_pstore_info = {
.erase = efi_pstore_erase,
};
-static __init int efivars_pstore_init(void)
+static int efivars_pstore_init(void)
{
if (!efivar_supports_writes())
return 0;
- if (efivars_pstore_disable)
+ if (pstore_disable)
return 0;
/*
@@ -250,7 +277,7 @@ static __init int efivars_pstore_init(void)
return 0;
}
-static __exit void efivars_pstore_exit(void)
+static void efivars_pstore_exit(void)
{
if (!efi_pstore_info.bufsize)
return;
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 06964a3c130f..73f4810f6db3 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \
-DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \
-DEFI_HAVE_STRCMP -fno-builtin -fpic \
$(call cc-option,-mno-single-pic-base)
-cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE
+cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE -mno-relax
cflags-$(CONFIG_LOONGARCH) += -fpie
cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt
@@ -143,7 +143,7 @@ STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS
# exist.
STUBCOPY_FLAGS-$(CONFIG_RISCV) += --prefix-alloc-sections=.init \
--prefix-symbols=__efistub_
-STUBCOPY_RELOC-$(CONFIG_RISCV) := R_RISCV_HI20
+STUBCOPY_RELOC-$(CONFIG_RISCV) := -E R_RISCV_HI20\|R_RISCV_$(BITS)\|R_RISCV_RELAX
# For LoongArch, keep all the symbols in .init section and make sure that no
# absolute symbols references exist.
diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c
index 6b83c492c3b8..31928bd87e0f 100644
--- a/drivers/firmware/efi/libstub/alignedmem.c
+++ b/drivers/firmware/efi/libstub/alignedmem.c
@@ -14,6 +14,7 @@
* @max: the address that the last allocated memory page shall not
* exceed
* @align: minimum alignment of the base of the allocation
+ * @memory_type: the type of memory to allocate
*
* Allocate pages as EFI_LOADER_DATA. The allocated pages are aligned according
* to @align, which should be >= EFI_ALLOC_ALIGN. The last allocated page will
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index bfa30625f5d0..3dc2f9aaf08d 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -24,6 +24,8 @@ static bool efi_noinitrd;
static bool efi_nosoftreserve;
static bool efi_disable_pci_dma = IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA);
+int efi_mem_encrypt;
+
bool __pure __efi_soft_reserve_enabled(void)
{
return !efi_nosoftreserve;
@@ -75,6 +77,12 @@ efi_status_t efi_parse_options(char const *cmdline)
efi_noinitrd = true;
} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
efi_no5lvl = true;
+ } else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) &&
+ !strcmp(param, "mem_encrypt") && val) {
+ if (parse_option_str(val, "on"))
+ efi_mem_encrypt = 1;
+ else if (parse_option_str(val, "off"))
+ efi_mem_encrypt = -1;
} else if (!strcmp(param, "efi") && val) {
efi_nochunk = parse_option_str(val, "nochunk");
efi_novamap |= parse_option_str(val, "novamap");
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 212687c30d79..fc18fd649ed7 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -37,8 +37,8 @@ extern bool efi_no5lvl;
extern bool efi_nochunk;
extern bool efi_nokaslr;
extern int efi_loglevel;
+extern int efi_mem_encrypt;
extern bool efi_novamap;
-
extern const efi_system_table_t *efi_system_table;
typedef union efi_dxe_services_table efi_dxe_services_table_t;
@@ -956,7 +956,8 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out);
efi_status_t efi_random_alloc(unsigned long size, unsigned long align,
unsigned long *addr, unsigned long random_seed,
- int memory_type, unsigned long alloc_limit);
+ int memory_type, unsigned long alloc_min,
+ unsigned long alloc_max);
efi_status_t efi_random_get_seed(void);
diff --git a/drivers/firmware/efi/libstub/kaslr.c b/drivers/firmware/efi/libstub/kaslr.c
index 62d63f7a2645..1a9808012abd 100644
--- a/drivers/firmware/efi/libstub/kaslr.c
+++ b/drivers/firmware/efi/libstub/kaslr.c
@@ -119,7 +119,7 @@ efi_status_t efi_kaslr_relocate_kernel(unsigned long *image_addr,
*/
status = efi_random_alloc(*reserve_size, min_kimg_align,
reserve_addr, phys_seed,
- EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
+ EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT);
if (status != EFI_SUCCESS)
efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
} else {
diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
index 674a064b8f7a..4e96a855fdf4 100644
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -17,7 +17,7 @@
static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
unsigned long size,
unsigned long align_shift,
- u64 alloc_limit)
+ u64 alloc_min, u64 alloc_max)
{
unsigned long align = 1UL << align_shift;
u64 first_slot, last_slot, region_end;
@@ -30,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
return 0;
region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1,
- alloc_limit);
+ alloc_max);
if (region_end < size)
return 0;
- first_slot = round_up(md->phys_addr, align);
+ first_slot = round_up(max(md->phys_addr, alloc_min), align);
last_slot = round_down(region_end - size + 1, align);
if (first_slot > last_slot)
@@ -56,7 +56,8 @@ efi_status_t efi_random_alloc(unsigned long size,
unsigned long *addr,
unsigned long random_seed,
int memory_type,
- unsigned long alloc_limit)
+ unsigned long alloc_min,
+ unsigned long alloc_max)
{
unsigned long total_slots = 0, target_slot;
unsigned long total_mirrored_slots = 0;
@@ -78,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size,
efi_memory_desc_t *md = (void *)map->map + map_offset;
unsigned long slots;
- slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit);
+ slots = get_entry_num_slots(md, size, ilog2(align), alloc_min,
+ alloc_max);
MD_NUM_SLOTS(md) = slots;
total_slots += slots;
if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 0d510c9a06a4..0336ed175e67 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -223,8 +223,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
}
}
-void efi_adjust_memory_range_protection(unsigned long start,
- unsigned long size)
+efi_status_t efi_adjust_memory_range_protection(unsigned long start,
+ unsigned long size)
{
efi_status_t status;
efi_gcd_memory_space_desc_t desc;
@@ -236,13 +236,17 @@ void efi_adjust_memory_range_protection(unsigned long start,
rounded_end = roundup(start + size, EFI_PAGE_SIZE);
if (memattr != NULL) {
- efi_call_proto(memattr, clear_memory_attributes, rounded_start,
- rounded_end - rounded_start, EFI_MEMORY_XP);
- return;
+ status = efi_call_proto(memattr, clear_memory_attributes,
+ rounded_start,
+ rounded_end - rounded_start,
+ EFI_MEMORY_XP);
+ if (status != EFI_SUCCESS)
+ efi_warn("Failed to clear EFI_MEMORY_XP attribute\n");
+ return status;
}
if (efi_dxe_table == NULL)
- return;
+ return EFI_SUCCESS;
/*
* Don't modify memory region attributes, they are
@@ -255,7 +259,7 @@ void efi_adjust_memory_range_protection(unsigned long start,
status = efi_dxe_call(get_memory_space_descriptor, start, &desc);
if (status != EFI_SUCCESS)
- return;
+ break;
next = desc.base_address + desc.length;
@@ -280,8 +284,10 @@ void efi_adjust_memory_range_protection(unsigned long start,
unprotect_start,
unprotect_start + unprotect_size,
status);
+ break;
}
}
+ return EFI_SUCCESS;
}
static void setup_unaccepted_memory(void)
@@ -793,6 +799,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr,
seed[0], EFI_LOADER_CODE,
+ LOAD_PHYSICAL_ADDR,
EFI_X86_KERNEL_ALLOC_LIMIT);
if (status != EFI_SUCCESS)
return status;
@@ -805,9 +812,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
*kernel_entry = addr + entry;
- efi_adjust_memory_range_protection(addr, kernel_total_size);
-
- return EFI_SUCCESS;
+ return efi_adjust_memory_range_protection(addr, kernel_total_size);
}
static void __noreturn enter_kernel(unsigned long kernel_addr,
@@ -879,6 +884,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
}
}
+ if (efi_mem_encrypt > 0)
+ hdr->xloadflags |= XLF_MEM_ENCRYPTION;
+
status = efi_decompress_kernel(&kernel_entry);
if (status != EFI_SUCCESS) {
efi_err("Failed to decompress kernel\n");
diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h
index 37c5a36b9d8c..1c20e99a6494 100644
--- a/drivers/firmware/efi/libstub/x86-stub.h
+++ b/drivers/firmware/efi/libstub/x86-stub.h
@@ -5,8 +5,8 @@
extern void trampoline_32bit_src(void *, bool);
extern const u16 trampoline_ljmp_imm_offset;
-void efi_adjust_memory_range_protection(unsigned long start,
- unsigned long size);
+efi_status_t efi_adjust_memory_range_protection(unsigned long start,
+ unsigned long size);
#ifdef CONFIG_X86_64
efi_status_t efi_setup_5level_paging(void);
diff --git a/drivers/firmware/efi/libstub/zboot.c b/drivers/firmware/efi/libstub/zboot.c
index bdb17eac0cb4..1ceace956758 100644
--- a/drivers/firmware/efi/libstub/zboot.c
+++ b/drivers/firmware/efi/libstub/zboot.c
@@ -119,7 +119,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab)
}
status = efi_random_alloc(alloc_size, min_kimg_align, &image_base,
- seed, EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
+ seed, EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT);
if (status != EFI_SUCCESS) {
efi_err("Failed to allocate memory\n");
goto free_cmdline;
diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c
index 09525fb5c240..01f0f90ea418 100644
--- a/drivers/firmware/efi/riscv-runtime.c
+++ b/drivers/firmware/efi/riscv-runtime.c
@@ -85,7 +85,7 @@ static int __init riscv_enable_runtime_services(void)
efi_memory_desc_t *md;
for_each_efi_memory_desc(md) {
- int md_size = md->num_pages << EFI_PAGE_SHIFT;
+ u64 md_size = md->num_pages << EFI_PAGE_SHIFT;
struct resource *res;
if (!(md->attribute & EFI_MEMORY_SP))
diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c
index 81f5f62e34fc..fbeeaee4ac85 100644
--- a/drivers/firmware/microchip/mpfs-auto-update.c
+++ b/drivers/firmware/microchip/mpfs-auto-update.c
@@ -167,7 +167,7 @@ static int mpfs_auto_update_verify_image(struct fw_upload *fw_uploader)
u32 *response_msg;
int ret;
- response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(response_msg),
+ response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(*response_msg),
GFP_KERNEL);
if (!response_msg)
return -ENOMEM;
@@ -384,7 +384,8 @@ static int mpfs_auto_update_available(struct mpfs_auto_update_priv *priv)
u32 *response_msg;
int ret;
- response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(response_msg),
+ response_msg = devm_kzalloc(priv->dev,
+ AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(*response_msg),
GFP_KERNEL);
if (!response_msg)
return -ENOMEM;
diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c
index 82fcfd29bc4d..3c197db42c9d 100644
--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
@@ -128,4 +128,4 @@ unlock_mutex:
}
/* must execute after PCI subsystem for EFI quirks */
-subsys_initcall_sync(sysfb_init);
+device_initcall(sysfb_init);
diff --git a/drivers/firmware/tegra/bpmp-debugfs.c b/drivers/firmware/tegra/bpmp-debugfs.c
index bbcdd9fed3fb..4221fed70ad4 100644
--- a/drivers/firmware/tegra/bpmp-debugfs.c
+++ b/drivers/firmware/tegra/bpmp-debugfs.c
@@ -77,7 +77,7 @@ static const char *get_filename(struct tegra_bpmp *bpmp,
root_path_buf = kzalloc(root_path_buf_len, GFP_KERNEL);
if (!root_path_buf)
- goto out;
+ return NULL;
root_path = dentry_path(bpmp->debugfs_mirror, root_path_buf,
root_path_buf_len);
diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index e00c33310517..753e7be039e4 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi)
if (IS_ERR(chip->gpiod_oe))
return PTR_ERR(chip->gpiod_oe);
- gpiod_set_value_cansleep(chip->gpiod_oe, 1);
-
spi_set_drvdata(spi, chip);
chip->gpio_chip.label = spi->modalias;
@@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi)
goto exit_destroy;
}
+ gpiod_set_value_cansleep(chip->gpiod_oe, 1);
+
ret = gpiochip_add_data(&chip->gpio_chip, chip);
if (!ret)
return 0;
diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c
index be7f2fa5aa7b..806b88d8dfb7 100644
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -330,20 +330,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type)
switch (flow_type) {
case IRQ_TYPE_LEVEL_HIGH:
sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1);
+ sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1);
break;
case IRQ_TYPE_LEVEL_LOW:
sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0);
+ sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1);
break;
case IRQ_TYPE_EDGE_RISING:
case IRQ_TYPE_EDGE_FALLING:
case IRQ_TYPE_EDGE_BOTH:
state = sprd_eic_get(chip, offset);
- if (state)
+ if (state) {
sprd_eic_update(chip, offset,
SPRD_EIC_DBNC_IEV, 0);
- else
+ sprd_eic_update(chip, offset,
+ SPRD_EIC_DBNC_IC, 1);
+ } else {
sprd_eic_update(chip, offset,
SPRD_EIC_DBNC_IEV, 1);
+ sprd_eic_update(chip, offset,
+ SPRD_EIC_DBNC_IC, 1);
+ }
break;
default:
return -ENOTSUPP;
@@ -355,20 +362,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type)
switch (flow_type) {
case IRQ_TYPE_LEVEL_HIGH:
sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0);
+ sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1);
break;
case IRQ_TYPE_LEVEL_LOW:
sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1);
+ sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1);
break;
case IRQ_TYPE_EDGE_RISING:
case IRQ_TYPE_EDGE_FALLING:
case IRQ_TYPE_EDGE_BOTH:
state = sprd_eic_get(chip, offset);
- if (state)
+ if (state) {
sprd_eic_update(chip, offset,
SPRD_EIC_LATCH_INTPOL, 0);
- else
+ sprd_eic_update(chip, offset,
+ SPRD_EIC_LATCH_INTCLR, 1);
+ } else {
sprd_eic_update(chip, offset,
SPRD_EIC_LATCH_INTPOL, 1);
+ sprd_eic_update(chip, offset,
+ SPRD_EIC_LATCH_INTCLR, 1);
+ }
break;
default:
return -ENOTSUPP;
@@ -382,29 +396,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type)
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0);
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0);
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1);
+ sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1);
irq_set_handler_locked(data, handle_edge_irq);
break;
case IRQ_TYPE_EDGE_FALLING:
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0);
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0);
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0);
+ sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1);
irq_set_handler_locked(data, handle_edge_irq);
break;
case IRQ_TYPE_EDGE_BOTH:
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0);
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 1);
+ sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1);
irq_set_handler_locked(data, handle_edge_irq);
break;
case IRQ_TYPE_LEVEL_HIGH:
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0);
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1);
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1);
+ sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1);
irq_set_handler_locked(data, handle_level_irq);
break;
case IRQ_TYPE_LEVEL_LOW:
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0);
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1);
sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0);
+ sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1);
irq_set_handler_locked(data, handle_level_irq);
break;
default:
@@ -417,29 +436,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type)
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1);
+ sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1);
irq_set_handler_locked(data, handle_edge_irq);
break;
case IRQ_TYPE_EDGE_FALLING:
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0);
+ sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1);
irq_set_handler_locked(data, handle_edge_irq);
break;
case IRQ_TYPE_EDGE_BOTH:
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1);
+ sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1);
irq_set_handler_locked(data, handle_edge_irq);
break;
case IRQ_TYPE_LEVEL_HIGH:
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1);
+ sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1);
irq_set_handler_locked(data, handle_level_irq);
break;
case IRQ_TYPE_LEVEL_LOW:
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0);
+ sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1);
irq_set_handler_locked(data, handle_level_irq);
break;
default:
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 88066826d8e5..cd3e9657cc36 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -1651,6 +1651,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = {
.ignore_interrupt = "INT33FC:00@3",
},
},
+ {
+ /*
+ * Spurious wakeups from TP_ATTN# pin
+ * Found in BIOS 0.35
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/3073
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "GPD"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "PNP0C50:00@8",
+ },
+ },
{} /* Terminating entry */
};
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 44c8f5743a24..75be4a3ca7f8 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -968,11 +968,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
ret = gpiochip_irqchip_init_valid_mask(gc);
if (ret)
- goto err_remove_acpi_chip;
+ goto err_free_hogs;
ret = gpiochip_irqchip_init_hw(gc);
if (ret)
- goto err_remove_acpi_chip;
+ goto err_remove_irqchip_mask;
ret = gpiochip_add_irqchip(gc, lock_key, request_key);
if (ret)
@@ -997,23 +997,23 @@ err_remove_irqchip:
gpiochip_irqchip_remove(gc);
err_remove_irqchip_mask:
gpiochip_irqchip_free_valid_mask(gc);
-err_remove_acpi_chip:
+err_free_hogs:
+ gpiochip_free_hogs(gc);
acpi_gpiochip_remove(gc);
+ gpiochip_remove_pin_ranges(gc);
err_remove_of_chip:
- gpiochip_free_hogs(gc);
of_gpiochip_remove(gc);
err_free_gpiochip_mask:
- gpiochip_remove_pin_ranges(gc);
gpiochip_free_valid_mask(gc);
+err_remove_from_list:
+ spin_lock_irqsave(&gpio_lock, flags);
+ list_del(&gdev->list);
+ spin_unlock_irqrestore(&gpio_lock, flags);
if (gdev->dev.release) {
/* release() has been registered by gpiochip_setup_dev() */
gpio_device_put(gdev);
goto err_print_message;
}
-err_remove_from_list:
- spin_lock_irqsave(&gpio_lock, flags);
- list_del(&gdev->list);
- spin_unlock_irqrestore(&gpio_lock, flags);
err_free_label:
kfree_const(gdev->label);
err_free_descs:
@@ -2042,6 +2042,11 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_free);
int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset,
unsigned long config)
{
+#ifdef CONFIG_PINCTRL
+ if (list_empty(&gc->gpiodev->pin_ranges))
+ return -ENOTSUPP;
+#endif
+
return pinctrl_gpio_set_config(gc, offset, config);
}
EXPORT_SYMBOL_GPL(gpiochip_generic_config);
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 2520db0b776e..c7edba18a6f0 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -199,7 +199,7 @@ config DRM_TTM
config DRM_TTM_KUNIT_TEST
tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS
default n
- depends on DRM && KUNIT && MMU
+ depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST)
select DRM_TTM
select DRM_EXPORT_FOR_TESTS if m
select DRM_KUNIT_TEST_HELPERS
@@ -207,7 +207,8 @@ config DRM_TTM_KUNIT_TEST
help
Enables unit tests for TTM, a GPU memory manager subsystem used
to manage memory buffers. This option is mostly useful for kernel
- developers.
+ developers. It depends on (UML || COMPILE_TEST) since no other driver
+ which uses TTM can be loaded while running the tests.
If in doubt, say "N".
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 3d8a48f46b01..79827a6dcd7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -200,6 +200,7 @@ extern uint amdgpu_dc_debug_mask;
extern uint amdgpu_dc_visual_confirm;
extern uint amdgpu_dm_abm_level;
extern int amdgpu_backlight;
+extern int amdgpu_damage_clips;
extern struct amdgpu_mgpu_info mgpu_info;
extern int amdgpu_ras_enable;
extern uint amdgpu_ras_mask;
@@ -1078,6 +1079,8 @@ struct amdgpu_device {
bool in_s3;
bool in_s4;
bool in_s0ix;
+ /* indicate amdgpu suspension status */
+ bool suspend_complete;
enum pp_mp1_state mp1_state;
struct amdgpu_doorbell_index doorbell_index;
@@ -1547,9 +1550,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
+void amdgpu_choose_low_power_state(struct amdgpu_device *adev);
#else
static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
+static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { }
#endif
#if defined(CONFIG_DRM_AMD_DC)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 2deebece810e..7099ff9cf8c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1519,4 +1519,22 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
#endif /* CONFIG_AMD_PMC */
}
+/**
+ * amdgpu_choose_low_power_state
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * Choose the target low power state for the GPU
+ */
+void amdgpu_choose_low_power_state(struct amdgpu_device *adev)
+{
+ if (adev->in_runpm)
+ return;
+
+ if (amdgpu_acpi_is_s0ix_active(adev))
+ adev->in_s0ix = true;
+ else if (amdgpu_acpi_is_s3_active(adev))
+ adev->in_s3 = true;
+}
+
#endif /* CONFIG_SUSPEND */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 77e263660288..41db030ddc4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
static const struct drm_client_funcs kfd_client_funcs = {
.unregister = drm_client_release,
};
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (!adev->kfd.init_complete)
+ return 0;
+
+ ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+ &kfd_client_funcs);
+ if (ret) {
+ dev_err(adev->dev, "Failed to init DRM client: %d\n",
+ ret);
+ return ret;
+ }
+
+ drm_client_register(&adev->kfd.client);
+
+ return 0;
+}
+
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
int last_valid_bit;
- int ret;
amdgpu_amdkfd_gpuvm_init_mem_limits();
@@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.enable_mes = adev->enable_mes,
};
- ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
- if (ret) {
- dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
- return;
- }
-
/* this is going to have a few of the MSBs set that we need to
* clear
*/
@@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
&gpu_resources);
- if (adev->kfd.init_complete)
- drm_client_register(&adev->kfd.client);
- else
- drm_client_release(&adev->kfd.client);
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index f262b9d89541..27c61c535e29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm,
struct svm_range_bo *svm_bo);
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
#endif
@@ -301,7 +303,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 899e31e3a5e8..3a3f3ce09f00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
- if (!(ring && drm_sched_wqueue_ready(&ring->sched)))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
/* stop secheduler and drain ring. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f183d7faeeec..231fd927dcfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2085,21 +2085,35 @@ out:
return ret;
}
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
{
struct kfd_mem_attachment *entry;
struct amdgpu_vm *vm;
+ int ret;
vm = drm_priv_to_vm(drm_priv);
mutex_lock(&mem->lock);
+ ret = amdgpu_bo_reserve(mem->bo, true);
+ if (ret)
+ goto out;
+
list_for_each_entry(entry, &mem->attachments, list) {
- if (entry->bo_va->base.vm == vm)
- kfd_mem_dmaunmap_attachment(mem, entry);
+ if (entry->bo_va->base.vm != vm)
+ continue;
+ if (entry->bo_va->base.bo->tbo.ttm &&
+ !entry->bo_va->base.bo->tbo.ttm->sg)
+ continue;
+
+ kfd_mem_dmaunmap_attachment(mem, entry);
}
+ amdgpu_bo_unreserve(mem->bo);
+out:
mutex_unlock(&mem->lock);
+
+ return ret;
}
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index e485dd3357c6..1afbb2e932c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1678,7 +1678,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_wqueue_stop(&ring->sched);
}
@@ -1694,7 +1694,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_wqueue_start(&ring->sched);
}
@@ -1916,8 +1916,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
ring = adev->rings[val];
- if (!ring || !ring->funcs->preempt_ib ||
- !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring) ||
+ !ring->funcs->preempt_ib)
return -EINVAL;
/* the last preemption failed */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b158d27d0a71..94bdb5fa6ebc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4121,23 +4121,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
}
} else {
- switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
- case IP_VERSION(13, 0, 0):
- case IP_VERSION(13, 0, 7):
- case IP_VERSION(13, 0, 10):
- r = psp_gpu_reset(adev);
- break;
- default:
- tmp = amdgpu_reset_method;
- /* It should do a default reset when loading or reloading the driver,
- * regardless of the module parameter reset_method.
- */
- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
- r = amdgpu_asic_reset(adev);
- amdgpu_reset_method = tmp;
- break;
- }
-
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
if (r) {
dev_err(adev->dev, "asic reset on init failed\n");
goto failed;
@@ -4524,13 +4514,15 @@ int amdgpu_device_prepare(struct drm_device *dev)
struct amdgpu_device *adev = drm_to_adev(dev);
int i, r;
+ amdgpu_choose_low_power_state(adev);
+
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
/* Evict the majority of BOs before starting suspend sequence */
r = amdgpu_device_evict_resources(adev);
if (r)
- return r;
+ goto unprepare;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
@@ -4539,10 +4531,15 @@ int amdgpu_device_prepare(struct drm_device *dev)
continue;
r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
if (r)
- return r;
+ goto unprepare;
}
return 0;
+
+unprepare:
+ adev->in_s0ix = adev->in_s3 = false;
+
+ return r;
}
/**
@@ -4579,7 +4576,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
cancel_delayed_work_sync(&adev->delayed_init_work);
- flush_delayed_work(&adev->gfx.gfx_off_delay_work);
amdgpu_ras_suspend(adev);
@@ -5031,7 +5027,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
spin_lock(&ring->sched.job_list_lock);
@@ -5170,7 +5166,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
/* Clear job fence from fence drv to avoid force_completion
@@ -5637,7 +5633,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -5706,7 +5702,7 @@ skip_hw_reset:
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_start(&ring->sched, true);
@@ -6061,7 +6057,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_stop(&ring->sched, NULL);
@@ -6189,7 +6185,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_start(&ring->sched, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index cc69005f5b46..586f4d03039d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -211,6 +211,7 @@ int amdgpu_seamless = -1; /* auto */
uint amdgpu_debug_mask;
int amdgpu_agp = -1; /* auto */
int amdgpu_wbrf = -1;
+int amdgpu_damage_clips = -1; /* auto */
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
@@ -860,6 +861,18 @@ MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (defau
module_param_named(backlight, amdgpu_backlight, bint, 0444);
/**
+ * DOC: damageclips (int)
+ * Enable or disable damage clips support. If damage clips support is disabled,
+ * we will force full frame updates, irrespective of what user space sends to
+ * us.
+ *
+ * Defaults to -1 (where it is enabled unless a PSR-SU display is detected).
+ */
+MODULE_PARM_DESC(damageclips,
+ "Damage clips support (0 = disable, 1 = enable, -1 auto (default))");
+module_param_named(damageclips, amdgpu_damage_clips, int, 0444);
+
+/**
* DOC: tmz (int)
* Trusted Memory Zone (TMZ) is a method to protect data being written
* to or read from memory.
@@ -2255,6 +2268,10 @@ retry_init:
if (ret)
goto err_pci;
+ ret = amdgpu_amdkfd_drm_client_create(adev);
+ if (ret)
+ goto err_pci;
+
/*
* 1. don't init fbdev on hw without DCE
* 2. don't init fbdev if there are no connectors
@@ -2472,6 +2489,7 @@ static int amdgpu_pmops_suspend(struct device *dev)
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ adev->suspend_complete = false;
if (amdgpu_acpi_is_s0ix_active(adev))
adev->in_s0ix = true;
else if (amdgpu_acpi_is_s3_active(adev))
@@ -2486,6 +2504,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev)
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ adev->suspend_complete = true;
if (amdgpu_acpi_should_gpu_reset(adev))
return amdgpu_asic_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 73b8cca35bab..c623e23049d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -121,6 +121,7 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
struct amdgpu_bo_param bp;
dma_addr_t dma_addr;
struct page *p;
+ unsigned long x;
int ret;
if (adev->gart.bo != NULL)
@@ -130,6 +131,10 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
if (!p)
return -ENOMEM;
+ /* assign pages to this device */
+ for (x = 0; x < (1UL << order); x++)
+ p[x].mapping = adev->mman.bdev.dev_mapping;
+
/* If the hardware does not support UTCL2 snooping of the CPU caches
* then set_memory_wc() could be used as a workaround to mark the pages
* as write combine memory.
@@ -223,6 +228,7 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
unsigned int order = get_order(adev->gart.table_size);
struct sg_table *sg = adev->gart.bo->tbo.sg;
struct page *p;
+ unsigned long x;
int ret;
ret = amdgpu_bo_reserve(adev->gart.bo, false);
@@ -234,6 +240,8 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
sg_free_table(sg);
kfree(sg);
p = virt_to_page(adev->gart.ptr);
+ for (x = 0; x < (1UL << order); x++)
+ p[x].mapping = NULL;
__free_pages(p, order);
adev->gart.ptr = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index b9674c57c436..6ddc8e3360e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -723,8 +723,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
if (adev->gfx.gfx_off_req_count == 0 &&
!adev->gfx.gfx_off_state) {
- schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+ /* If going to s2idle, no need to wait */
+ if (adev->in_s0ix) {
+ if (!amdgpu_dpm_set_powergating_by_smu(adev,
+ AMD_IP_BLOCK_TYPE_GFX, true))
+ adev->gfx.gfx_off_state = true;
+ } else {
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
delay);
+ }
}
} else {
if (adev->gfx.gfx_off_req_count == 0) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
index 468a67b302d4..ca5c86e5f7cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -362,7 +362,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
}
}
- if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len))
+ if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
ret = -EFAULT;
err_free_shared_buf:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 45424ebf9681..5505d646f43a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -635,6 +635,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
ring->name);
ring->sched.ready = !r;
+
return r;
}
@@ -717,3 +718,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
}
+
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
+{
+ if (!ring)
+ return false;
+
+ if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
+ return false;
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index bbb53720a018..fe1a61eb6e4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -450,5 +450,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
int amdgpu_ib_pool_init(struct amdgpu_device *adev);
void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
-
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..8db880244324 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -221,8 +221,23 @@ static struct attribute *amdgpu_vram_mgr_attributes[] = {
NULL
};
+static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (attr == &dev_attr_mem_info_vram_vendor.attr &&
+ !adev->gmc.vram_vendor)
+ return 0;
+
+ return attr->mode;
+}
+
const struct attribute_group amdgpu_vram_mgr_attr_group = {
- .attrs = amdgpu_vram_mgr_attributes
+ .attrs = amdgpu_vram_mgr_attributes,
+ .is_visible = amdgpu_vram_attrs_is_visible
};
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 6f7c031dd197..f24e34dc33d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(mmIH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(mmIH_RB_CNTL, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(mmIH_RB_CNTL, tmp);
}
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index b8c47e0cf37a..c19681492efa 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
out:
return (wptr & ih->ptr_mask);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index d63cab294883..dcdecb18b230 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4027,8 +4027,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
err = 0;
adev->gfx.mec2_fw = NULL;
}
- amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
- amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
gfx_v10_0_check_fw_write_wait(adev);
out:
@@ -6589,7 +6587,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 0ea0866c261f..4f3bfdc75b37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -107,23 +107,6 @@ static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
};
-static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = {
- SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_DEBUG5, 0xffffffff, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL2, 0x007f0000, 0x00000000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xffcfffff, 0x0000200a),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regUTCL1_CTRL_2, 0xffffffff, 0x0000048f)
-};
-
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@@ -304,11 +287,6 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_11_0_1,
(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
break;
- case IP_VERSION(11, 5, 0):
- soc15_program_register_sequence(adev,
- golden_settings_gc_11_5_0,
- (const u32)ARRAY_SIZE(golden_settings_gc_11_5_0));
- break;
default:
break;
}
@@ -3846,7 +3824,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
(order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 69c500910746..3bc6943365a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3034,6 +3034,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
gfx_v9_0_cp_gfx_enable(adev, true);
+ /* Now only limit the quirk on the APU gfx9 series and already
+ * confirmed that the APU gfx10/gfx11 needn't such update.
+ */
+ if (adev->flags & AMD_IS_APU &&
+ adev->in_s3 && !adev->suspend_complete) {
+ DRM_INFO(" Will skip the CSB packet resubmit\n");
+ return 0;
+ }
r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 42e103d7077d..59d9215e5556 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -915,8 +915,8 @@ static int gmc_v6_0_hw_init(void *handle)
if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
- else
- return r;
+
+ return 0;
}
static int gmc_v6_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index efc16e580f1e..45a2f8e031a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1099,8 +1099,8 @@ static int gmc_v7_0_hw_init(void *handle)
if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
- else
- return r;
+
+ return 0;
}
static int gmc_v7_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index ff4ae73d27ec..4422b27a3cc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1219,8 +1219,8 @@ static int gmc_v8_0_hw_init(void *handle)
if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
- else
- return r;
+
+ return 0;
}
static int gmc_v8_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f9039d64ff2d..e67a62db9e12 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1947,13 +1947,6 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)
static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
{
- static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
- u32 vram_info;
-
- if (!amdgpu_sriov_vf(adev)) {
- vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
- adev->gmc.vram_vendor = vram_info & 0xF;
- }
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
adev->gmc.vram_width = 128 * 64;
}
@@ -2340,8 +2333,8 @@ static int gmc_v9_0_hw_init(void *handle)
if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
- else
- return r;
+
+ return 0;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index aecad530b10a..2c02ae69883d 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
out:
return (wptr & ih->ptr_mask);
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index d9ed7332d805..ad4ad39f128f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -418,6 +418,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
index 8fb05eae340a..b8da0fc29378 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -418,6 +418,13 @@ static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev,
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index bc38b90f8cf8..88ea58d5c4ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -674,14 +674,6 @@ static int jpeg_v4_0_set_powergating_state(void *handle,
return ret;
}
-static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- return 0;
-}
-
static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned int type,
@@ -765,7 +757,6 @@ static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = {
- .set = jpeg_v4_0_set_interrupt_state,
.process = jpeg_v4_0_process_interrupt,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
index 6ede85b28cc8..78b74daf4eeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -181,7 +181,6 @@ static int jpeg_v4_0_5_hw_fini(void *handle)
RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
}
- amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0);
return 0;
}
@@ -516,14 +515,6 @@ static int jpeg_v4_0_5_set_powergating_state(void *handle,
return ret;
}
-static int jpeg_v4_0_5_set_interrupt_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- return 0;
-}
-
static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -603,7 +594,6 @@ static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = {
- .set = jpeg_v4_0_5_set_interrupt_state,
.process = jpeg_v4_0_5_process_interrupt,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index e64b33115848..de93614726c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index e90f33780803..b4723d68eab0 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -431,6 +431,12 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
u32 inst_mask;
int i;
+ if (amdgpu_sriov_vf(adev))
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(
+ NBIO, 0,
+ regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL)
+ << 2;
WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE,
0xff & ~(adev->gfx.xcc_mask));
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 9a24f17a5750..cada9f300a7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(IH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(IH_RB_CNTL, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(IH_RB_CNTL, tmp);
}
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 15033efec2ba..1c614451dead 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -574,11 +574,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
return AMD_RESET_METHOD_MODE1;
}
+static bool soc15_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+
+ /* Will reset for the following suspend abort cases.
+ * 1) Only reset limit on APU side, dGPU hasn't checked yet.
+ * 2) S3 suspend abort and TOS already launched.
+ */
+ if (adev->flags & AMD_IS_APU && adev->in_s3 &&
+ !adev->suspend_complete &&
+ sol_reg)
+ return true;
+
+ return false;
+}
+
static int soc15_asic_reset(struct amdgpu_device *adev)
{
/* original raven doesn't have full asic reset */
- if ((adev->apu_flags & AMD_APU_IS_RAVEN) ||
- (adev->apu_flags & AMD_APU_IS_RAVEN2))
+ /* On the latest Raven, the GPU reset can be performed
+ * successfully. So now, temporarily enable it for the
+ * S3 suspend abort case.
+ */
+ if (((adev->apu_flags & AMD_APU_IS_RAVEN) ||
+ (adev->apu_flags & AMD_APU_IS_RAVEN2)) &&
+ !soc15_need_reset_on_resume(adev))
return 0;
switch (soc15_asic_reset_method(adev)) {
@@ -1302,6 +1325,10 @@ static int soc15_common_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (soc15_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n");
+ soc15_asic_reset(adev);
+ }
return soc15_common_hw_init(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 48c6efcdeac9..4d7188912edf 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -50,13 +50,13 @@ static const struct amd_ip_funcs soc21_common_ip_funcs;
/* SOC21 */
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};
static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = {
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 917707bba7f3..450b6e831509 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 169ed400ee7b..8ab01ae919d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -2018,22 +2018,6 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta
}
/**
- * vcn_v4_0_set_interrupt_state - set VCN block interrupt state
- *
- * @adev: amdgpu_device pointer
- * @source: interrupt sources
- * @type: interrupt types
- * @state: interrupt states
- *
- * Set VCN block interrupt state
- */
-static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
- unsigned type, enum amdgpu_interrupt_state state)
-{
- return 0;
-}
-
-/**
* vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state
*
* @adev: amdgpu_device pointer
@@ -2097,7 +2081,6 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
}
static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = {
- .set = vcn_v4_0_set_interrupt_state,
.process = vcn_v4_0_process_interrupt,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 2eda30e78f61..49e4c3c09aca 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -269,8 +269,6 @@ static int vcn_v4_0_5_hw_fini(void *handle)
vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
}
}
-
- amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0);
}
return 0;
@@ -1669,22 +1667,6 @@ static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_s
}
/**
- * vcn_v4_0_5_set_interrupt_state - set VCN block interrupt state
- *
- * @adev: amdgpu_device pointer
- * @source: interrupt sources
- * @type: interrupt types
- * @state: interrupt states
- *
- * Set VCN block interrupt state
- */
-static int vcn_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
- unsigned type, enum amdgpu_interrupt_state state)
-{
- return 0;
-}
-
-/**
* vcn_v4_0_5_process_interrupt - process VCN block interrupt
*
* @adev: amdgpu_device pointer
@@ -1726,7 +1708,6 @@ static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgp
}
static const struct amdgpu_irq_src_funcs vcn_v4_0_5_irq_funcs = {
- .set = vcn_v4_0_5_set_interrupt_state,
.process = vcn_v4_0_5_process_interrupt,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index d364c6dd152c..bf68e18e3824 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index ddfc6941f9d5..db66e6cccaf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -421,6 +421,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index df75863393fc..d1caaf0e6a7c 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -674,7 +674,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0x86ea6a6a, 0x8f6e837a,
0xb96ee0c2, 0xbf800002,
0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf810000,
+ 0xbe801f6c, 0xbf9b0000,
};
static const uint32_t cwsr_trap_nv1x_hex[] = {
@@ -1091,7 +1091,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xb9eef807, 0x876dff6d,
0x0000ffff, 0x87fe7e7e,
0x87ea6a6a, 0xb9faf802,
- 0xbe80226c, 0xbf810000,
+ 0xbe80226c, 0xbf9b0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0x00000000,
@@ -1574,7 +1574,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0x86ea6a6a, 0x8f6e837a,
0xb96ee0c2, 0xbf800002,
0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf810000,
+ 0xbe801f6c, 0xbf9b0000,
};
static const uint32_t cwsr_trap_aldebaran_hex[] = {
@@ -2065,7 +2065,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0x86ea6a6a, 0x8f6e837a,
0xb96ee0c2, 0xbf800002,
0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf810000,
+ 0xbe801f6c, 0xbf9b0000,
};
static const uint32_t cwsr_trap_gfx10_hex[] = {
@@ -2500,7 +2500,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x876dff6d, 0x0000ffff,
0x87fe7e7e, 0x87ea6a6a,
0xb9faf802, 0xbe80226c,
- 0xbf810000, 0xbf9f0000,
+ 0xbf9b0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
};
@@ -2944,7 +2944,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0xb8eef802, 0xbf0d866e,
0xbfa20002, 0xb97af802,
0xbe80486c, 0xb97af802,
- 0xbe804a6c, 0xbfb00000,
+ 0xbe804a6c, 0xbfb10000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0x00000000,
@@ -3436,5 +3436,5 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x86ea6a6a, 0x8f6e837a,
0xb96ee0c2, 0xbf800002,
0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf810000,
+ 0xbe801f6c, 0xbf9b0000,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index e0140df0b0ec..71b3dc0c7363 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -1104,7 +1104,7 @@ L_RETURN_WITHOUT_PRIV:
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
L_END_PGM:
- s_endpgm
+ s_endpgm_saved
end
function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index e506411ad28a..bb26338204f4 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -921,7 +921,7 @@ L_RESTORE:
/* the END */
/**************************************************************************/
L_END_PGM:
- s_endpgm
+ s_endpgm_saved
end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index ce4c52ec34d8..80e90fdef291 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1442,7 +1442,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
- amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+ err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+ if (err)
+ goto sync_memory_failed;
}
mutex_unlock(&p->mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index f856901055d3..bdc01ca9609a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -574,7 +574,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
prange->last);
- addr = prange->start << PAGE_SHIFT;
+ addr = migrate->start;
src = (uint64_t *)(scratch + npages);
dst = scratch;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 8b7fed913526..22cbfa1bdadd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -170,6 +170,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 15277f1d5cf0..826bc4f6c8a7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -55,8 +55,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
if (has_wa_flag) {
- uint32_t wa_mask = minfo->update_flag == UPDATE_FLAG_DBG_WA_ENABLE ?
- 0xffff : 0xffffffff;
+ uint32_t wa_mask =
+ (minfo->update_flag & UPDATE_FLAG_DBG_WA_ENABLE) ? 0xffff : 0xffffffff;
m->compute_static_thread_mgmt_se0 = wa_mask;
m->compute_static_thread_mgmt_se1 = wa_mask;
@@ -224,6 +224,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 42d881809dc7..697b6d530d12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -303,6 +303,15 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
+ if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) {
+ if (minfo->update_flag & UPDATE_FLAG_IS_GWS)
+ m->compute_resource_limits |=
+ COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+ else
+ m->compute_resource_limits &=
+ ~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+ }
+
q->is_active = QUEUE_IS_ACTIVE(*q);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 17fbedbf3651..80320b8603fc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -532,6 +532,7 @@ struct queue_properties {
enum mqd_update_flag {
UPDATE_FLAG_DBG_WA_ENABLE = 1,
UPDATE_FLAG_DBG_WA_DISABLE = 2,
+ UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */
};
struct mqd_update_info {
@@ -1488,10 +1489,15 @@ void kfd_dec_compute_active(struct kfd_node *dev);
/* Cgroup Support */
/* Check with device cgroup if @kfd device is accessible */
-static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd)
+static inline int kfd_devcgroup_check_permission(struct kfd_node *node)
{
#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
- struct drm_device *ddev = adev_to_drm(kfd->adev);
+ struct drm_device *ddev;
+
+ if (node->xcp)
+ ddev = node->xcp->ddev;
+ else
+ ddev = adev_to_drm(node->adev);
return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
ddev->render->index,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 43eff221eae5..4858112f9a53 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws)
{
+ struct mqd_update_info minfo = {0};
struct kfd_node *dev = NULL;
struct process_queue_node *pqn;
struct kfd_process_device *pdd;
@@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
}
pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+ minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0;
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
- pqn->q, NULL);
+ pqn->q, &minfo);
}
void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index e5f7c92eebcb..6ed2ec381aaa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1638,12 +1638,10 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
else
mode = UNKNOWN_MEMORY_PARTITION_MODE;
- if (pcache->cache_level == 2)
- pcache->cache_size = pcache_info[cache_type].cache_size * num_xcc;
- else if (mode)
- pcache->cache_size = pcache_info[cache_type].cache_size / mode;
- else
- pcache->cache_size = pcache_info[cache_type].cache_size;
+ pcache->cache_size = pcache_info[cache_type].cache_size;
+ /* Partition mode only affects L3 cache size */
+ if (mode && pcache->cache_level == 3)
+ pcache->cache_size /= mode;
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
pcache->cache_type |= HSA_CACHE_TYPE_DATA;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d4f525b66a09..1a9bbb04bd5e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -272,6 +272,7 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
{
u32 v_blank_start, v_blank_end, h_position, v_position;
struct amdgpu_crtc *acrtc = NULL;
+ struct dc *dc = adev->dm.dc;
if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
return -EINVAL;
@@ -284,6 +285,9 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
return 0;
}
+ if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed)
+ dc_allow_idle_optimizations(dc, false);
+
/*
* TODO rework base driver to use values directly.
* for now parse it back into reg-format
@@ -1715,7 +1719,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0];
init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0];
- init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL;
+ if (amdgpu_dc_debug_mask & DC_DISABLE_IPS)
+ init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL;
+
+ init_data.flags.disable_ips_in_vpb = 1;
/* Enable DWB for tested platforms only */
if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0))
@@ -1836,21 +1843,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
DRM_ERROR("amdgpu: fail to register dmub aux callback");
goto error;
}
- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) {
- DRM_ERROR("amdgpu: fail to register dmub hpd callback");
- goto error;
- }
- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) {
- DRM_ERROR("amdgpu: fail to register dmub hpd callback");
- goto error;
- }
- }
-
- /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
- * It is expected that DMUB will resend any pending notifications at this point, for
- * example HPD from DPIA.
- */
- if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
+ * It is expected that DMUB will resend any pending notifications at this point. Note
+ * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to
+ * align legacy interface initialization sequence. Connection status will be proactivly
+ * detected once in the amdgpu_dm_initialize_drm_device.
+ */
dc_enable_dmub_outbox(adev->dm.dc);
/* DPIA trace goes to dmesg logs only if outbox is enabled */
@@ -1949,7 +1947,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
&adev->dm.dmub_bo_gpu_addr,
&adev->dm.dmub_bo_cpu_addr);
- if (adev->dm.hpd_rx_offload_wq) {
+ if (adev->dm.hpd_rx_offload_wq && adev->dm.dc) {
for (i = 0; i < adev->dm.dc->caps.max_links; i++) {
if (adev->dm.hpd_rx_offload_wq[i].wq) {
destroy_workqueue(adev->dm.hpd_rx_offload_wq[i].wq);
@@ -2280,6 +2278,7 @@ static int dm_sw_fini(void *handle)
if (adev->dm.dmub_srv) {
dmub_srv_destroy(adev->dm.dmub_srv);
+ kfree(adev->dm.dmub_srv);
adev->dm.dmub_srv = NULL;
}
@@ -3529,6 +3528,14 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true))
+ DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true))
+ DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+ }
+
list_for_each_entry(connector,
&dev->mode_config.connector_list, head) {
@@ -3557,10 +3564,6 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
handle_hpd_rx_irq,
(void *) aconnector);
}
-
- if (adev->dm.hpd_rx_offload_wq)
- adev->dm.hpd_rx_offload_wq[connector->index].aconnector =
- aconnector;
}
}
@@ -4554,6 +4557,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
goto fail;
}
+ if (dm->hpd_rx_offload_wq)
+ dm->hpd_rx_offload_wq[aconnector->base.index].aconnector =
+ aconnector;
+
if (!dc_link_detect_connection_type(link, &new_connection_type))
DRM_ERROR("KMS: Failed to detect connector\n");
@@ -5212,6 +5219,7 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
struct drm_plane_state *new_plane_state,
struct drm_crtc_state *crtc_state,
struct dc_flip_addrs *flip_addrs,
+ bool is_psr_su,
bool *dirty_regions_changed)
{
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
@@ -5236,6 +5244,10 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
num_clips = drm_plane_get_damage_clips_count(new_plane_state);
clips = drm_plane_get_damage_clips(new_plane_state);
+ if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 &&
+ is_psr_su)))
+ goto ffu;
+
if (!dm_crtc_state->mpo_requested) {
if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
goto ffu;
@@ -6187,7 +6199,9 @@ create_stream_for_sink(struct drm_connector *connector,
if (recalculate_timing) {
freesync_mode = get_highest_refresh_rate_mode(aconnector, false);
drm_mode_copy(&saved_mode, &mode);
+ saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio;
drm_mode_copy(&mode, freesync_mode);
+ mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio;
} else {
decide_crtc_timing_for_drm_display_mode(
&mode, preferred_mode, scale);
@@ -6520,10 +6534,15 @@ amdgpu_dm_connector_late_register(struct drm_connector *connector)
static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
{
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct dc_link *dc_link = aconnector->dc_link;
struct dc_sink *dc_em_sink = aconnector->dc_em_sink;
struct edid *edid;
+ struct i2c_adapter *ddc;
+
+ if (dc_link && dc_link->aux_mode)
+ ddc = &aconnector->dm_dp_aux.aux.ddc;
+ else
+ ddc = &aconnector->i2c->base;
/*
* Note: drm_get_edid gets edid in the following order:
@@ -6531,7 +6550,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
* 2) firmware EDID if set via edid_firmware module parameter
* 3) regular DDC read.
*/
- edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc);
+ edid = drm_get_edid(connector, ddc);
if (!edid) {
DRM_ERROR("No EDID found on connector: %s.\n", connector->name);
return;
@@ -6572,12 +6591,18 @@ static int get_modes(struct drm_connector *connector)
static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
{
struct drm_connector *connector = &aconnector->base;
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(&aconnector->base);
+ struct dc_link *dc_link = aconnector->dc_link;
struct dc_sink_init_data init_params = {
.link = aconnector->dc_link,
.sink_signal = SIGNAL_TYPE_VIRTUAL
};
struct edid *edid;
+ struct i2c_adapter *ddc;
+
+ if (dc_link->aux_mode)
+ ddc = &aconnector->dm_dp_aux.aux.ddc;
+ else
+ ddc = &aconnector->i2c->base;
/*
* Note: drm_get_edid gets edid in the following order:
@@ -6585,7 +6610,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
* 2) firmware EDID if set via edid_firmware module parameter
* 3) regular DDC read.
*/
- edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc);
+ edid = drm_get_edid(connector, ddc);
if (!edid) {
DRM_ERROR("No EDID found on connector: %s.\n", connector->name);
return;
@@ -8291,6 +8316,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
fill_dc_dirty_rects(plane, old_plane_state,
new_plane_state, new_crtc_state,
&bundle->flip_addrs[planes_count],
+ acrtc_state->stream->link->psr_settings.psr_version ==
+ DC_PSR_VERSION_SU_1,
&dirty_rects_changed);
/*
@@ -8976,16 +9003,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
trace_amdgpu_dm_atomic_commit_tail_begin(state);
- if (dm->dc->caps.ips_support) {
- for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
- if (new_con_state->crtc &&
- new_con_state->crtc->state->active &&
- drm_atomic_crtc_needs_modeset(new_con_state->crtc->state)) {
- dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false);
- break;
- }
- }
- }
+ if (dm->dc->caps.ips_support && dm->dc->idle_optimizations_allowed)
+ dc_allow_idle_optimizations(dm->dc, false);
drm_atomic_helper_update_legacy_modeset_state(dev, state);
drm_dp_mst_atomic_wait_for_dependencies(state);
@@ -9188,6 +9207,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
* To fix this, DC should permit updating only stream properties.
*/
dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC);
+ if (!dummy_updates) {
+ DRM_ERROR("Failed to allocate memory for dummy_updates.\n");
+ continue;
+ }
for (j = 0; j < status->plane_count; j++)
dummy_updates[j].surface = status->plane_states[0];
@@ -10728,11 +10751,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
goto fail;
}
- ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
- if (ret) {
- DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
- ret = -EINVAL;
- goto fail;
+ if (dc_resource_is_dsc_encoding_supported(dc)) {
+ ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
+ if (ret) {
+ DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
+ ret = -EINVAL;
+ goto fail;
+ }
}
ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars);
@@ -11144,14 +11169,23 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
if (range->flags != 1)
continue;
- amdgpu_dm_connector->min_vfreq = range->min_vfreq;
- amdgpu_dm_connector->max_vfreq = range->max_vfreq;
- amdgpu_dm_connector->pixel_clock_mhz =
- range->pixel_clock_mhz * 10;
-
connector->display_info.monitor_range.min_vfreq = range->min_vfreq;
connector->display_info.monitor_range.max_vfreq = range->max_vfreq;
+ if (edid->revision >= 4) {
+ if (data->pad2 & DRM_EDID_RANGE_OFFSET_MIN_VFREQ)
+ connector->display_info.monitor_range.min_vfreq += 255;
+ if (data->pad2 & DRM_EDID_RANGE_OFFSET_MAX_VFREQ)
+ connector->display_info.monitor_range.max_vfreq += 255;
+ }
+
+ amdgpu_dm_connector->min_vfreq =
+ connector->display_info.monitor_range.min_vfreq;
+ amdgpu_dm_connector->max_vfreq =
+ connector->display_info.monitor_range.max_vfreq;
+ amdgpu_dm_connector->pixel_clock_mhz =
+ range->pixel_clock_mhz * 10;
+
break;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 85b7f58a7f35..c27063305a13 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -67,6 +67,8 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps)
/* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */
case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB):
case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B):
+ case drm_edid_encode_panel_id('B', 'O', 'E', 0x092A):
+ case drm_edid_encode_panel_id('L', 'G', 'D', 0x06D1):
DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id);
edid_caps->panel_patch.remove_sink_ext_caps = true;
break;
@@ -120,6 +122,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
edid_caps->edid_hdmi = connector->display_info.is_hdmi;
+ apply_edid_quirks(edid_buf, edid_caps);
+
sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads);
if (sad_count <= 0)
return result;
@@ -146,8 +150,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
else
edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION;
- apply_edid_quirks(edid_buf, edid_caps);
-
kfree(sads);
kfree(sadb);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 58b880acb087..3390f0d8420a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -711,7 +711,7 @@ static inline int dm_irq_state(struct amdgpu_device *adev,
{
bool st;
enum dc_irq_source irq_source;
-
+ struct dc *dc = adev->dm.dc;
struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc_id];
if (!acrtc) {
@@ -729,6 +729,9 @@ static inline int dm_irq_state(struct amdgpu_device *adev,
st = (state == AMDGPU_IRQ_STATE_ENABLE);
+ if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed)
+ dc_allow_idle_optimizations(dc, false);
+
dc_interrupt_set(adev->dm.dc, irq_source, st);
return 0;
}
diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
index f2dfa96f9ef5..39530b2ea495 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
@@ -94,7 +94,7 @@ static void calculate_bandwidth(
const uint32_t s_high = 7;
const uint32_t dmif_chunk_buff_margin = 1;
- uint32_t max_chunks_fbc_mode;
+ uint32_t max_chunks_fbc_mode = 0;
int32_t num_cursor_lines;
int32_t i, j, k;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 960c4b4f6ddf..05f392501c0a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1850,19 +1850,21 @@ static enum bp_result get_firmware_info_v3_2(
/* Vega12 */
smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2,
DATA_TABLES(smu_info));
- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
if (!smu_info_v3_2)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
+
info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10;
} else if (revision.minor == 3) {
/* Vega20 */
smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3,
DATA_TABLES(smu_info));
- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
if (!smu_info_v3_3)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
+
info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10;
}
@@ -2422,10 +2424,11 @@ static enum bp_result get_integrated_info_v11(
info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11,
DATA_TABLES(integratedsysteminfo));
- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
if (info_v11 == NULL)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v11->gpucapinfo);
/*
@@ -2637,11 +2640,12 @@ static enum bp_result get_integrated_info_v2_1(
info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1,
DATA_TABLES(integratedsysteminfo));
- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
if (info_v2_1 == NULL)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v2_1->gpucapinfo);
/*
@@ -2799,11 +2803,11 @@ static enum bp_result get_integrated_info_v2_2(
info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2,
DATA_TABLES(integratedsysteminfo));
- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
-
if (info_v2_2 == NULL)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v2_2->gpucapinfo);
/*
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
index a5489fe6875f..aa9fd1dc550a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
@@ -546,6 +546,8 @@ static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_ta
int i;
for (i = 0; i < VG_NUM_SOC_VOLTAGE_LEVELS; i++) {
+ if (i >= VG_NUM_DCFCLK_DPM_LEVELS)
+ break;
if (clock_table->SocVoltage[i] == voltage)
return clock_table->DcfClocks[i];
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 9c660d1facc7..e64890259235 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -437,32 +437,32 @@ static struct wm_table ddr5_wm_table = {
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 14.0,
- .sr_enter_plus_exit_time_us = 16.0,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 14.0,
- .sr_enter_plus_exit_time_us = 16.0,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 14.0,
- .sr_enter_plus_exit_time_us = 16.0,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 14.0,
- .sr_enter_plus_exit_time_us = 16.0,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
.valid = true,
},
}
@@ -474,32 +474,32 @@ static struct wm_table lpddr5_wm_table = {
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
- .sr_exit_time_us = 14.0,
- .sr_enter_plus_exit_time_us = 16.0,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
- .sr_exit_time_us = 14.0,
- .sr_enter_plus_exit_time_us = 16.0,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
- .sr_exit_time_us = 14.0,
- .sr_enter_plus_exit_time_us = 16.0,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
- .sr_exit_time_us = 14.0,
- .sr_enter_plus_exit_time_us = 16.0,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
.valid = true,
},
}
@@ -655,10 +655,13 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0;
+ uint32_t num_memps, num_fclk, num_dcfclk;
int i;
/* Determine min/max p-state values. */
- for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
+ num_memps = (clock_table->NumMemPstatesEnabled > NUM_MEM_PSTATE_LEVELS) ? NUM_MEM_PSTATE_LEVELS :
+ clock_table->NumMemPstatesEnabled;
+ for (i = 0; i < num_memps; i++) {
uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) {
@@ -670,7 +673,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
min_dram_speed_mts = max_dram_speed_mts;
min_pstate = max_pstate;
- for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
+ for (i = 0; i < num_memps; i++) {
uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) {
@@ -699,9 +702,13 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
/* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
ASSERT(clock_table->NumDcfClkLevelsEnabled > 0);
- max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, clock_table->NumFclkLevelsEnabled);
+ num_fclk = (clock_table->NumFclkLevelsEnabled > NUM_FCLK_DPM_LEVELS) ? NUM_FCLK_DPM_LEVELS :
+ clock_table->NumFclkLevelsEnabled;
+ max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk);
- for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+ num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
+ clock_table->NumDcfClkLevelsEnabled;
+ for (i = 0; i < num_dcfclk; i++) {
int j;
/* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index aa7c02ba948e..2c424e435962 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3817,7 +3817,9 @@ static void commit_planes_for_stream(struct dc *dc,
* programming has completed (we turn on phantom OTG in order
* to complete the plane disable for phantom pipes).
*/
- dc->hwss.apply_ctx_to_hw(dc, context);
+
+ if (dc->hwss.disable_phantom_streams)
+ dc->hwss.disable_phantom_streams(dc, context);
}
if (update_type != UPDATE_TYPE_FAST)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index 88c6436b28b6..180ac47868c2 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -291,11 +291,14 @@ void dc_state_destruct(struct dc_state *state)
dc_stream_release(state->phantom_streams[i]);
state->phantom_streams[i] = NULL;
}
+ state->phantom_stream_count = 0;
for (i = 0; i < state->phantom_plane_count; i++) {
dc_plane_state_release(state->phantom_planes[i]);
state->phantom_planes[i] = NULL;
}
+ state->phantom_plane_count = 0;
+
state->stream_mask = 0;
memset(&state->res_ctx, 0, sizeof(state->res_ctx));
memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg));
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 5d7aa882416b..c9317ea0258e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -434,6 +434,7 @@ struct dc_config {
bool EnableMinDispClkODM;
bool enable_auto_dpm_test_logs;
unsigned int disable_ips;
+ unsigned int disable_ips_in_vpb;
};
enum visual_confirm {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 2b79a0e5638e..363d522603a2 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -125,7 +125,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
unsigned int count,
union dmub_rb_cmd *cmd_list)
{
- struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ struct dc_context *dc_ctx;
struct dmub_srv *dmub;
enum dmub_status status;
int i;
@@ -133,6 +133,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
if (!dc_dmub_srv || !dc_dmub_srv->dmub)
return false;
+ dc_ctx = dc_dmub_srv->ctx;
dmub = dc_dmub_srv->dmub;
for (i = 0 ; i < count; i++) {
@@ -1161,7 +1162,7 @@ void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, con
bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
{
- struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ struct dc_context *dc_ctx;
enum dmub_status status;
if (!dc_dmub_srv || !dc_dmub_srv->dmub)
@@ -1170,6 +1171,8 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation)
return true;
+ dc_ctx = dc_dmub_srv->ctx;
+
if (wait) {
if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
do {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index b08ccb8c68bc..9900dda2eef5 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1034,6 +1034,7 @@ enum replay_FW_Message_type {
Replay_Msg_Not_Support = -1,
Replay_Set_Timing_Sync_Supported,
Replay_Set_Residency_Frameupdate_Timer,
+ Replay_Set_Pseudo_VTotal,
};
union replay_error_status {
@@ -1089,6 +1090,10 @@ struct replay_settings {
uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM];
/* Maximum link off frame count */
enum replay_link_off_frame_count_level link_off_frame_count_level;
+ /* Replay pseudo vtotal for abm + ips on full screen video which can improve ips residency */
+ uint16_t abm_with_ips_on_full_screen_video_pseudo_vtotal;
+ /* Replay last pseudo vtotal set to DMUB */
+ uint16_t last_pseudo_vtotal;
};
/* To split out "global" and "per-panel" config settings.
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
index e8570060d007..5bca67407c5b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
@@ -290,4 +290,5 @@ void dce_panel_cntl_construct(
dce_panel_cntl->base.funcs = &dce_link_panel_cntl_funcs;
dce_panel_cntl->base.ctx = init_data->ctx;
dce_panel_cntl->base.inst = init_data->inst;
+ dce_panel_cntl->base.pwrseq_inst = 0;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c
index e43f77c11c00..5f97a868ada3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c
@@ -56,16 +56,13 @@ static void dpp3_enable_cm_block(
static enum dc_lut_mode dpp30_get_gamcor_current(struct dpp *dpp_base)
{
- enum dc_lut_mode mode;
+ enum dc_lut_mode mode = LUT_BYPASS;
uint32_t state_mode;
uint32_t lut_mode;
struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base);
REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_MODE_CURRENT, &state_mode);
- if (state_mode == 0)
- mode = LUT_BYPASS;
-
if (state_mode == 2) {//Programmable RAM LUT
REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT_CURRENT, &lut_mode);
if (lut_mode == 0)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
index ad0df1a72a90..9e96a3ace207 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
@@ -215,4 +215,5 @@ void dcn301_panel_cntl_construct(
dcn301_panel_cntl->base.funcs = &dcn301_link_panel_cntl_funcs;
dcn301_panel_cntl->base.ctx = init_data->ctx;
dcn301_panel_cntl->base.inst = init_data->inst;
+ dcn301_panel_cntl->base.pwrseq_inst = 0;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
index 03248422d6ff..281be20b1a10 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
@@ -154,8 +154,24 @@ void dcn31_panel_cntl_construct(
struct dcn31_panel_cntl *dcn31_panel_cntl,
const struct panel_cntl_init_data *init_data)
{
+ uint8_t pwrseq_inst = 0xF;
+
dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs;
dcn31_panel_cntl->base.ctx = init_data->ctx;
dcn31_panel_cntl->base.inst = init_data->inst;
- dcn31_panel_cntl->base.pwrseq_inst = init_data->pwrseq_inst;
+
+ switch (init_data->eng_id) {
+ case ENGINE_ID_DIGA:
+ pwrseq_inst = 0;
+ break;
+ case ENGINE_ID_DIGB:
+ pwrseq_inst = 1;
+ break;
+ default:
+ DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id);
+ ASSERT(false);
+ break;
+ }
+
+ dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
index 501388014855..d761b0df2878 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
@@ -203,12 +203,12 @@ void dcn32_link_encoder_construct(
enc10->base.hpd_source = init_data->hpd_source;
enc10->base.connector = init_data->connector;
- if (enc10->base.connector.id == CONNECTOR_ID_USBC)
- enc10->base.features.flags.bits.DP_IS_USB_C = 1;
enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
enc10->base.features = *enc_features;
+ if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+ enc10->base.features.flags.bits.DP_IS_USB_C = 1;
enc10->base.transmitter = init_data->transmitter;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
index da94e5309fba..81e349d5835b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
@@ -184,8 +184,6 @@ void dcn35_link_encoder_construct(
enc10->base.hpd_source = init_data->hpd_source;
enc10->base.connector = init_data->connector;
- if (enc10->base.connector.id == CONNECTOR_ID_USBC)
- enc10->base.features.flags.bits.DP_IS_USB_C = 1;
enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
@@ -240,6 +238,8 @@ void dcn35_link_encoder_construct(
}
enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+ if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+ enc10->base.features.flags.bits.DP_IS_USB_C = 1;
if (bp_funcs->get_connector_speed_cap_info)
result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 6042a5a6a44f..59ade76ffb18 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -72,11 +72,11 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 9f37f717a1f8..a0a65e099104 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -1112,7 +1112,7 @@ struct pipe_slice_table {
struct pipe_ctx *pri_pipe;
struct dc_plane_state *plane;
int slice_count;
- } mpc_combines[MAX_SURFACES];
+ } mpc_combines[MAX_PLANES];
int mpc_combine_count;
};
@@ -1288,7 +1288,7 @@ static bool update_pipes_with_split_flags(struct dc *dc, struct dc_state *contex
return updated;
}
-static bool should_allow_odm_power_optimization(struct dc *dc,
+static bool should_apply_odm_power_optimization(struct dc *dc,
struct dc_state *context, struct vba_vars_st *v, int *split,
bool *merge)
{
@@ -1392,9 +1392,12 @@ static void try_odm_power_optimization_and_revalidate(
{
int i;
unsigned int new_vlevel;
+ unsigned int cur_policy[MAX_PIPES];
- for (i = 0; i < pipe_cnt; i++)
+ for (i = 0; i < pipe_cnt; i++) {
+ cur_policy[i] = pipes[i].pipe.dest.odm_combine_policy;
pipes[i].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+ }
new_vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
@@ -1403,6 +1406,9 @@ static void try_odm_power_optimization_and_revalidate(
memset(merge, 0, MAX_PIPES * sizeof(bool));
*vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, new_vlevel, split, merge);
context->bw_ctx.dml.vba.VoltageLevel = *vlevel;
+ } else {
+ for (i = 0; i < pipe_cnt; i++)
+ pipes[i].pipe.dest.odm_combine_policy = cur_policy[i];
}
}
@@ -1580,7 +1586,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
}
}
- if (should_allow_odm_power_optimization(dc, context, vba, split, merge))
+ if (should_apply_odm_power_optimization(dc, context, vba, split, merge))
try_odm_power_optimization_and_revalidate(
dc, context, pipes, split, merge, vlevel, *pipe_cnt);
@@ -2209,7 +2215,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
int i;
pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
- dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes);
+ if (!dc->config.enable_windowed_mpo_odm)
+ dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes);
/* repopulate_pipes = 1 means the pipes were either split or merged. In this case
* we have to re-calculate the DET allocation and run through DML once more to
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 475c4ec43c01..7ea2bd5374d5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -164,8 +164,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
},
},
.num_states = 5,
- .sr_exit_time_us = 14.0,
- .sr_enter_plus_exit_time_us = 16.0,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
.sr_exit_z8_time_us = 210.0,
.sr_enter_plus_exit_z8_time_us = 320.0,
.fclk_change_latency_us = 24.0,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 64d01a9cd68c..1ba6933d2b36 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -341,9 +341,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
break;
}
- if (dml2->config.bbox_overrides.clks_table.num_states)
- p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states;
-
/* Override from passed values, if available */
for (i = 0; i < p->in_states->num_states; i++) {
if (dml2->config.bbox_overrides.sr_exit_latency_us) {
@@ -400,7 +397,7 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
}
/* Copy clocks tables entries, if available */
if (dml2->config.bbox_overrides.clks_table.num_states) {
-
+ p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states;
for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) {
p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz;
}
@@ -439,6 +436,14 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
}
dml2_policy_build_synthetic_soc_states(s, p);
+ if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 ||
+ dml2->v20.dml_core_ctx.project == dml_project_dcn351) {
+ // Override last out_state with data from last in_state
+ // This will ensure that out_state contains max fclk
+ memcpy(&p->out_states->state_array[p->out_states->num_states - 1],
+ &p->in_states->state_array[p->in_states->num_states - 1],
+ sizeof(struct soc_state_bounding_box_st));
+ }
}
void dml2_translate_ip_params(const struct dc *in, struct ip_params_st *out)
@@ -793,35 +798,28 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p
}
}
-/*TODO no support for mpc combine, need rework - should calculate scaling params based on plane+stream*/
-static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, const struct dc_state *context)
+static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context)
{
int i;
- struct scaler_data data = { 0 };
+ struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe;
+
+ memset(temp_pipe, 0, sizeof(struct pipe_ctx));
for (i = 0; i < MAX_PIPES; i++) {
const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
if (pipe->plane_state == in && !pipe->prev_odm_pipe) {
- const struct pipe_ctx *next_pipe = pipe->next_odm_pipe;
-
- data = context->res_ctx.pipe_ctx[i].plane_res.scl_data;
- while (next_pipe) {
- data.h_active += next_pipe->plane_res.scl_data.h_active;
- data.recout.width += next_pipe->plane_res.scl_data.recout.width;
- if (in->rotation == ROTATION_ANGLE_0 || in->rotation == ROTATION_ANGLE_180) {
- data.viewport.width += next_pipe->plane_res.scl_data.viewport.width;
- } else {
- data.viewport.height += next_pipe->plane_res.scl_data.viewport.height;
- }
- next_pipe = next_pipe->next_odm_pipe;
- }
+ temp_pipe->stream = pipe->stream;
+ temp_pipe->plane_state = pipe->plane_state;
+ temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps;
+
+ resource_build_scaling_params(temp_pipe);
break;
}
}
ASSERT(i < MAX_PIPES);
- return data;
+ return temp_pipe->plane_res.scl_data;
}
static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_stream_state *in)
@@ -866,7 +864,7 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned
out->ScalerEnabled[location] = false;
}
-static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, const struct dc_state *context)
+static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, struct dc_state *context)
{
const struct scaler_data scaler_data = get_scaler_data_for_plane(in, context);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
index 26307e599614..2a58a7687bdb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -76,6 +76,11 @@ static void map_hw_resources(struct dml2_context *dml2,
in_out_display_cfg->hw.DLGRefClkFreqMHz = 50;
}
for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) {
+ if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) {
+ dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n",
+ __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+ break;
+ }
dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i];
dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true;
dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i];
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 5660f15da291..01493c49bd7a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -1183,9 +1183,9 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
dto_params.timing = &pipe_ctx->stream->timing;
dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
if (dccg) {
- dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
}
} else if (dccg && dccg->funcs->disable_symclk_se) {
dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
@@ -1476,7 +1476,7 @@ static enum dc_status dce110_enable_stream_timing(
return DC_OK;
}
-static enum dc_status apply_single_controller_ctx_to_hw(
+enum dc_status dce110_apply_single_controller_ctx_to_hw(
struct pipe_ctx *pipe_ctx,
struct dc_state *context,
struct dc *dc)
@@ -2302,7 +2302,7 @@ enum dc_status dce110_apply_ctx_to_hw(
if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe)
continue;
- status = apply_single_controller_ctx_to_hw(
+ status = dce110_apply_single_controller_ctx_to_hw(
pipe_ctx,
context,
dc);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h
index 08028a1779ae..ed3cc3648e8e 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h
@@ -39,6 +39,10 @@ enum dc_status dce110_apply_ctx_to_hw(
struct dc *dc,
struct dc_state *context);
+enum dc_status dce110_apply_single_controller_ctx_to_hw(
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dc *dc);
void dce110_enable_stream(struct pipe_ctx *pipe_ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index e931342fcf4c..931ac8ed7069 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -2561,7 +2561,7 @@ void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx)
tg->funcs->setup_vertical_interrupt2(tg, start_line);
}
-static void dcn20_reset_back_end_for_pipe(
+void dcn20_reset_back_end_for_pipe(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
struct dc_state *context)
@@ -2790,18 +2790,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
}
if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
- dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
- dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
-
- phyd32clk = get_phyd32clk_src(link);
- dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
-
dto_params.otg_inst = tg->inst;
dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10;
dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx);
dto_params.timing = &pipe_ctx->stream->timing;
dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
+ dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
+
+ phyd32clk = get_phyd32clk_src(link);
+ dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
} else {
if (dccg->funcs->enable_symclk_se)
dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
index b94c85340abf..d950b3e54ec2 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
@@ -84,6 +84,10 @@ enum dc_status dcn20_enable_stream_timing(
void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx);
void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx);
void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_reset_back_end_for_pipe(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
void dcn20_init_blank(
struct dc *dc,
struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
index 8e88dcaf88f5..7252f5f781f0 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
@@ -206,28 +206,32 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
void dcn21_set_pipe(struct pipe_ctx *pipe_ctx)
{
struct abm *abm = pipe_ctx->stream_res.abm;
- uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;
+ uint32_t otg_inst;
+
+ if (!abm || !tg || !panel_cntl)
+ return;
+
+ otg_inst = tg->inst;
if (dmcu) {
dce110_set_pipe(pipe_ctx);
return;
}
- if (abm && panel_cntl) {
- if (abm->funcs && abm->funcs->set_pipe_ex) {
- abm->funcs->set_pipe_ex(abm,
+ if (abm->funcs && abm->funcs->set_pipe_ex) {
+ abm->funcs->set_pipe_ex(abm,
otg_inst,
SET_ABM_PIPE_NORMAL,
panel_cntl->inst,
panel_cntl->pwrseq_inst);
- } else {
- dmub_abm_set_pipe(abm, otg_inst,
- SET_ABM_PIPE_NORMAL,
- panel_cntl->inst,
- panel_cntl->pwrseq_inst);
- }
+ } else {
+ dmub_abm_set_pipe(abm, otg_inst,
+ SET_ABM_PIPE_NORMAL,
+ panel_cntl->inst,
+ panel_cntl->pwrseq_inst);
}
}
@@ -237,34 +241,35 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
{
struct dc_context *dc = pipe_ctx->stream->ctx;
struct abm *abm = pipe_ctx->stream_res.abm;
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
+ uint32_t otg_inst;
+
+ if (!abm || !tg || !panel_cntl)
+ return false;
+
+ otg_inst = tg->inst;
if (dc->dc->res_pool->dmcu) {
dce110_set_backlight_level(pipe_ctx, backlight_pwm_u16_16, frame_ramp);
return true;
}
- if (abm != NULL) {
- uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
-
- if (abm && panel_cntl) {
- if (abm->funcs && abm->funcs->set_pipe_ex) {
- abm->funcs->set_pipe_ex(abm,
- otg_inst,
- SET_ABM_PIPE_NORMAL,
- panel_cntl->inst,
- panel_cntl->pwrseq_inst);
- } else {
- dmub_abm_set_pipe(abm,
- otg_inst,
- SET_ABM_PIPE_NORMAL,
- panel_cntl->inst,
- panel_cntl->pwrseq_inst);
- }
- }
+ if (abm->funcs && abm->funcs->set_pipe_ex) {
+ abm->funcs->set_pipe_ex(abm,
+ otg_inst,
+ SET_ABM_PIPE_NORMAL,
+ panel_cntl->inst,
+ panel_cntl->pwrseq_inst);
+ } else {
+ dmub_abm_set_pipe(abm,
+ otg_inst,
+ SET_ABM_PIPE_NORMAL,
+ panel_cntl->inst,
+ panel_cntl->pwrseq_inst);
}
- if (abm && abm->funcs && abm->funcs->set_backlight_level_pwm)
+ if (abm->funcs && abm->funcs->set_backlight_level_pwm)
abm->funcs->set_backlight_level_pwm(abm, backlight_pwm_u16_16,
frame_ramp, 0, panel_cntl->inst);
else
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 6c9299c7683d..aa36d7a56ca8 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -1474,9 +1474,44 @@ void dcn32_update_dsc_pg(struct dc *dc,
}
}
+void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context)
+{
+ struct dce_hwseq *hws = dc->hwseq;
+ int i;
+
+ for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) {
+ struct pipe_ctx *pipe_ctx_old =
+ &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx_old->stream)
+ continue;
+
+ if (dc_state_get_pipe_subvp_type(dc->current_state, pipe_ctx_old) != SUBVP_PHANTOM)
+ continue;
+
+ if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe)
+ continue;
+
+ if (!pipe_ctx->stream || pipe_need_reprogram(pipe_ctx_old, pipe_ctx) ||
+ (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)) {
+ struct clock_source *old_clk = pipe_ctx_old->clock_source;
+
+ if (hws->funcs.reset_back_end_for_pipe)
+ hws->funcs.reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
+ if (hws->funcs.enable_stream_gating)
+ hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
+ if (old_clk)
+ old_clk->funcs->cs_power_down(old_clk);
+ }
+ }
+}
+
void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
{
unsigned int i;
+ enum dc_status status = DC_OK;
+ struct dce_hwseq *hws = dc->hwseq;
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
@@ -1497,16 +1532,39 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
}
}
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
-
- if (new_pipe->stream && dc_state_get_pipe_subvp_type(context, new_pipe) == SUBVP_PHANTOM) {
- // If old context or new context has phantom pipes, apply
- // the phantom timings now. We can't change the phantom
- // pipe configuration safely without driver acquiring
- // the DMCUB lock first.
- dc->hwss.apply_ctx_to_hw(dc, context);
- break;
+ struct pipe_ctx *pipe_ctx_old =
+ &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream == NULL)
+ continue;
+
+ if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
+ continue;
+
+ if (pipe_ctx->stream == pipe_ctx_old->stream &&
+ pipe_ctx->stream->link->link_state_valid) {
+ continue;
}
+
+ if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))
+ continue;
+
+ if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe)
+ continue;
+
+ if (hws->funcs.apply_single_controller_ctx_to_hw)
+ status = hws->funcs.apply_single_controller_ctx_to_hw(
+ pipe_ctx,
+ context,
+ dc);
+
+ ASSERT(status == DC_OK);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+ if (hws->funcs.resync_fifo_dccg_dio)
+ hws->funcs.resync_fifo_dccg_dio(hws, dc, context);
+#endif
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h
index cecf7f0f5671..069e20bc87c0 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h
@@ -111,6 +111,8 @@ void dcn32_update_dsc_pg(struct dc *dc,
void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context);
+void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context);
+
void dcn32_init_blank(
struct dc *dc,
struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
index 427cfc8c24a4..e8ac94a005b8 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
@@ -109,6 +109,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
.get_dcc_en_bits = dcn10_get_dcc_en_bits,
.commit_subvp_config = dcn32_commit_subvp_config,
.enable_phantom_streams = dcn32_enable_phantom_streams,
+ .disable_phantom_streams = dcn32_disable_phantom_streams,
.subvp_pipe_control_lock = dcn32_subvp_pipe_control_lock,
.update_visual_confirm_color = dcn10_update_visual_confirm_color,
.subvp_pipe_control_lock_fast = dcn32_subvp_pipe_control_lock_fast,
@@ -159,6 +160,8 @@ static const struct hwseq_private_funcs dcn32_private_funcs = {
.set_pixels_per_cycle = dcn32_set_pixels_per_cycle,
.resync_fifo_dccg_dio = dcn32_resync_fifo_dccg_dio,
.is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy,
+ .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw,
+ .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe,
};
void dcn32_hw_sequencer_init_functions(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 9c806385ecbd..8b6c49622f3b 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -680,7 +680,7 @@ void dcn35_power_down_on_boot(struct dc *dc)
bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable)
{
struct dc_link *edp_links[MAX_NUM_EDP];
- int edp_num;
+ int i, edp_num;
if (dc->debug.dmcub_emulation)
return true;
@@ -688,6 +688,13 @@ bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable)
dc_get_edp_links(dc, edp_links, &edp_num);
if (edp_num == 0 || edp_num > 1)
return false;
+
+ for (i = 0; i < dc->current_state->stream_count; ++i) {
+ struct dc_stream_state *stream = dc->current_state->streams[i];
+
+ if (!stream->dpms_off && !dc_is_embedded_signal(stream->signal))
+ return false;
+ }
}
// TODO: review other cases when idle optimization is allowed
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
index a54399383318..64ca7c66509b 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -379,6 +379,7 @@ struct hw_sequencer_funcs {
struct dc_cursor_attributes *cursor_attr);
void (*commit_subvp_config)(struct dc *dc, struct dc_state *context);
void (*enable_phantom_streams)(struct dc *dc, struct dc_state *context);
+ void (*disable_phantom_streams)(struct dc *dc, struct dc_state *context);
void (*subvp_pipe_control_lock)(struct dc *dc,
struct dc_state *context,
bool lock,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
index 6137cf09aa54..b3c62a82cb1c 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
@@ -165,8 +165,15 @@ struct hwseq_private_funcs {
void (*set_pixels_per_cycle)(struct pipe_ctx *pipe_ctx);
void (*resync_fifo_dccg_dio)(struct dce_hwseq *hws, struct dc *dc,
struct dc_state *context);
+ enum dc_status (*apply_single_controller_ctx_to_hw)(
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dc *dc);
bool (*is_dp_dig_pixel_rate_div_policy)(struct pipe_ctx *pipe_ctx);
#endif
+ void (*reset_back_end_for_pipe)(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
};
struct dce_hwseq {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index f74ae0d41d3c..3a6bf77a6873 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -469,6 +469,8 @@ struct resource_context {
unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS];
int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS];
bool is_mpc_3dlut_acquired[MAX_PIPES];
+ /* solely used for build scalar data in dml2 */
+ struct pipe_ctx temp_pipe;
};
struct dce_bw_output {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
index 5dcbaa2db964..e97d964a1791 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
@@ -57,7 +57,7 @@ struct panel_cntl_funcs {
struct panel_cntl_init_data {
struct dc_context *ctx;
uint32_t inst;
- uint32_t pwrseq_inst;
+ uint32_t eng_id;
};
struct panel_cntl {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index c958ef37b78a..77a60aa9f27b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -427,22 +427,18 @@ struct pipe_ctx *resource_get_primary_dpp_pipe(const struct pipe_ctx *dpp_pipe);
int resource_get_mpc_slice_index(const struct pipe_ctx *dpp_pipe);
/*
- * Get number of MPC "cuts" of the plane associated with the pipe. MPC slice
- * count is equal to MPC splits + 1. For example if a plane is cut 3 times, it
- * will have 4 pieces of slice.
- * return - 0 if pipe is not used for a plane with MPCC combine. otherwise
- * the number of MPC "cuts" for the plane.
+ * Get the number of MPC slices associated with the pipe.
+ * The function returns 0 if the pipe is not associated with an MPC combine
+ * pipe topology.
*/
-int resource_get_mpc_slice_count(const struct pipe_ctx *opp_head);
+int resource_get_mpc_slice_count(const struct pipe_ctx *pipe);
/*
- * Get number of ODM "cuts" of the timing associated with the pipe. ODM slice
- * count is equal to ODM splits + 1. For example if a timing is cut 3 times, it
- * will have 4 pieces of slice.
- * return - 0 if pipe is not used for ODM combine. otherwise
- * the number of ODM "cuts" for the timing.
+ * Get the number of ODM slices associated with the pipe.
+ * The function returns 0 if the pipe is not associated with an ODM combine
+ * pipe topology.
*/
-int resource_get_odm_slice_count(const struct pipe_ctx *otg_master);
+int resource_get_odm_slice_count(const struct pipe_ctx *pipe);
/* Get the ODM slice index counting from 0 from left most slice */
int resource_get_odm_slice_index(const struct pipe_ctx *opp_head);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index 37d3027c32dc..cf22b8f28ba6 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -370,30 +370,6 @@ static enum transmitter translate_encoder_to_transmitter(
}
}
-static uint8_t translate_dig_inst_to_pwrseq_inst(struct dc_link *link)
-{
- uint8_t pwrseq_inst = 0xF;
- struct dc_context *dc_ctx = link->dc->ctx;
-
- DC_LOGGER_INIT(dc_ctx->logger);
-
- switch (link->eng_id) {
- case ENGINE_ID_DIGA:
- pwrseq_inst = 0;
- break;
- case ENGINE_ID_DIGB:
- pwrseq_inst = 1;
- break;
- default:
- DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", link->eng_id);
- ASSERT(false);
- break;
- }
-
- return pwrseq_inst;
-}
-
-
static void link_destruct(struct dc_link *link)
{
int i;
@@ -657,7 +633,7 @@ static bool construct_phy(struct dc_link *link,
link->link_id.id == CONNECTOR_ID_LVDS)) {
panel_cntl_init_data.ctx = dc_ctx;
panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count;
- panel_cntl_init_data.pwrseq_inst = translate_dig_inst_to_pwrseq_inst(link);
+ panel_cntl_init_data.eng_id = link->eng_id;
link->panel_cntl =
link->dc->res_pool->funcs->panel_cntl_create(
&panel_cntl_init_data);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
index 8fe66c367850..5b0bc7f6a188 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
@@ -361,7 +361,7 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un
struct dc_link *dpia_link[MAX_DPIA_NUM] = {0};
int num_dpias = 0;
- for (uint8_t i = 0; i < num_streams; ++i) {
+ for (unsigned int i = 0; i < num_streams; ++i) {
if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) {
/* new dpia sst stream, check whether it exceeds max dpia */
if (num_dpias >= MAX_DPIA_NUM)
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
index dd0d2b206462..5491b707cec8 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
@@ -196,7 +196,7 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in
struct dc_link *link_dpia_primary, *link_dpia_secondary;
int total_bw = 0;
- for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) {
+ for (uint8_t i = 0; i < (MAX_PIPES * 2) - 1; ++i) {
if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
continue;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index 5a0b04518956..16a62e018712 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -517,6 +517,7 @@ enum link_training_result dp_check_link_loss_status(
{
enum link_training_result status = LINK_TRAINING_SUCCESS;
union lane_status lane_status;
+ union lane_align_status_updated dpcd_lane_status_updated;
uint8_t dpcd_buf[6] = {0};
uint32_t lane;
@@ -532,10 +533,12 @@ enum link_training_result dp_check_link_loss_status(
* check lanes status
*/
lane_status.raw = dp_get_nibble_at_index(&dpcd_buf[2], lane);
+ dpcd_lane_status_updated.raw = dpcd_buf[4];
if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
!lane_status.bits.CR_DONE_0 ||
- !lane_status.bits.SYMBOL_LOCKED_0) {
+ !lane_status.bits.SYMBOL_LOCKED_0 ||
+ !dp_is_interlane_aligned(dpcd_lane_status_updated)) {
/* if one of the channel equalization, clock
* recovery or symbol lock is dropped
* consider it as (link has been
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
index e8dda44b23cb..5d36bab0029c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
@@ -619,7 +619,7 @@ static enum link_training_result dpia_training_eq_non_transparent(
uint32_t retries_eq = 0;
enum dc_status status;
enum dc_dp_training_pattern tr_pattern;
- uint32_t wait_time_microsec;
+ uint32_t wait_time_microsec = 0;
enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
union lane_align_status_updated dpcd_lane_status_updated = {0};
union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0};
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c
index 5c9a30211c10..fc50931c2aec 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c
@@ -205,7 +205,7 @@ enum dc_status core_link_read_dpcd(
uint32_t extended_size;
/* size of the remaining partitioned address space */
uint32_t size_left_to_read;
- enum dc_status status;
+ enum dc_status status = DC_ERROR_UNEXPECTED;
/* size of the next partition to be read from */
uint32_t partition_size;
uint32_t data_index = 0;
@@ -234,7 +234,7 @@ enum dc_status core_link_write_dpcd(
{
uint32_t partition_size;
uint32_t data_index = 0;
- enum dc_status status;
+ enum dc_status status = DC_ERROR_UNEXPECTED;
while (size) {
partition_size = dpcd_get_next_partition_size(address, size);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
index 511ff6b5b985..7538b548c572 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
@@ -999,7 +999,7 @@ static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id
vpg = dcn301_vpg_create(ctx, vpg_inst);
afmt = dcn301_afmt_create(ctx, afmt_inst);
- if (!enc1 || !vpg || !afmt) {
+ if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) {
kfree(enc1);
kfree(vpg);
kfree(afmt);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index c4d71e7f18af..6f10052caeef 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -1829,7 +1829,21 @@ int dcn32_populate_dml_pipes_from_context(
dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
- pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+ if (dc->config.enable_windowed_mpo_odm &&
+ dc->debug.enable_single_display_2to1_odm_policy) {
+ switch (resource_get_odm_slice_count(pipe)) {
+ case 2:
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+ break;
+ case 4:
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1;
+ break;
+ default:
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+ }
+ } else {
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+ }
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 761ec9891875..5fdcda8f8602 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -780,8 +780,8 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_z10 = false,
.ignore_pg = true,
.psp_disabled_wa = true,
- .ips2_eval_delay_us = 200,
- .ips2_entry_delay_us = 400,
+ .ips2_eval_delay_us = 2000,
+ .ips2_entry_delay_us = 800,
.static_screen_wait_frames = 2,
};
@@ -2130,6 +2130,7 @@ static bool dcn35_resource_construct(
dc->dml2_options.dcn_pipe_count = pool->base.pipe_count;
dc->dml2_options.use_native_pstate_optimization = true;
dc->dml2_options.use_native_soc_bb_construction = true;
+ dc->dml2_options.minimize_dispclk_using_odm = false;
if (dc->config.EnableMinDispClkODM)
dc->dml2_options.minimize_dispclk_using_odm = true;
dc->dml2_options.enable_windowed_mpo_odm = dc->config.enable_windowed_mpo_odm;
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index c64b6c848ef7..e699731ee68e 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -2832,6 +2832,7 @@ struct dmub_rb_cmd_psr_set_power_opt {
#define REPLAY_RESIDENCY_MODE_MASK (0x1 << REPLAY_RESIDENCY_MODE_SHIFT)
# define REPLAY_RESIDENCY_MODE_PHY (0x0 << REPLAY_RESIDENCY_MODE_SHIFT)
# define REPLAY_RESIDENCY_MODE_ALPM (0x1 << REPLAY_RESIDENCY_MODE_SHIFT)
+# define REPLAY_RESIDENCY_MODE_IPS 0x10
#define REPLAY_RESIDENCY_ENABLE_MASK (0x1 << REPLAY_RESIDENCY_ENABLE_SHIFT)
# define REPLAY_RESIDENCY_DISABLE (0x0 << REPLAY_RESIDENCY_ENABLE_SHIFT)
@@ -2894,6 +2895,10 @@ enum dmub_cmd_replay_type {
* Set Residency Frameupdate Timer.
*/
DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER = 6,
+ /**
+ * Set pseudo vtotal
+ */
+ DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL = 7,
};
/**
@@ -3077,6 +3082,26 @@ struct dmub_cmd_replay_set_timing_sync_data {
};
/**
+ * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+ */
+struct dmub_cmd_replay_set_pseudo_vtotal {
+ /**
+ * Panel Instance.
+ * Panel isntance to identify which replay_state to use
+ * Currently the support is only for 0 or 1
+ */
+ uint8_t panel_inst;
+ /**
+ * Source Vtotal that Replay + IPS + ABM full screen video src vtotal
+ */
+ uint16_t vtotal;
+ /**
+ * Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad;
+};
+
+/**
* Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command.
*/
struct dmub_rb_cmd_replay_set_power_opt {
@@ -3157,6 +3182,20 @@ struct dmub_rb_cmd_replay_set_timing_sync {
};
/**
+ * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+ */
+struct dmub_rb_cmd_replay_set_pseudo_vtotal {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+ /**
+ * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+ */
+ struct dmub_cmd_replay_set_pseudo_vtotal data;
+};
+
+/**
* Data passed from driver to FW in DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command.
*/
struct dmub_cmd_replay_frameupdate_timer_data {
@@ -3207,6 +3246,10 @@ union dmub_replay_cmd_set {
* Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command data.
*/
struct dmub_cmd_replay_frameupdate_timer_data timer_data;
+ /**
+ * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command data.
+ */
+ struct dmub_cmd_replay_set_pseudo_vtotal pseudo_vtotal_data;
};
/**
@@ -4358,6 +4401,10 @@ union dmub_rb_cmd {
* Definition of a DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command.
*/
struct dmub_rb_cmd_replay_set_frameupdate_timer replay_set_frameupdate_timer;
+ /**
+ * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+ */
+ struct dmub_rb_cmd_replay_set_pseudo_vtotal replay_set_pseudo_vtotal;
};
/**
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index ad98e504c00d..e304e8435fb8 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -980,6 +980,11 @@ void set_replay_coasting_vtotal(struct dc_link *link,
link->replay_settings.coasting_vtotal_table[type] = vtotal;
}
+void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal)
+{
+ link->replay_settings.abm_with_ips_on_full_screen_video_pseudo_vtotal = vtotal;
+}
+
void calculate_replay_link_off_frame_count(struct dc_link *link,
uint16_t vtotal, uint16_t htotal)
{
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
index c17bbc6fb38c..bef4815e1703 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
@@ -57,6 +57,7 @@ void init_replay_config(struct dc_link *link, struct replay_config *pr_config);
void set_replay_coasting_vtotal(struct dc_link *link,
enum replay_coasting_vtotal_type type,
uint16_t vtotal);
+void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal);
void calculate_replay_link_off_frame_count(struct dc_link *link,
uint16_t vtotal, uint16_t htotal);
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 1dc5dd9b7bf7..df2c7ffe190f 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -258,6 +258,7 @@ enum DC_DEBUG_MASK {
DC_ENABLE_DML2 = 0x100,
DC_DISABLE_PSR_SU = 0x200,
DC_DISABLE_REPLAY = 0x400,
+ DC_DISABLE_IPS = 0x800,
};
enum amd_dpm_forced_level;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
index be519c8edf49..335980e2afbf 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
@@ -138,7 +138,7 @@ static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size,
}
#define amdgpu_asic_get_reg_state_supported(adev) \
- ((adev)->asic_funcs->get_reg_state ? 1 : 0)
+ (((adev)->asic_funcs && (adev)->asic_funcs->get_reg_state) ? 1 : 0)
#define amdgpu_asic_get_reg_state(adev, state, buf, size) \
((adev)->asic_funcs->get_reg_state ? \
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 087d57850304..39c5e1dfa275 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2558,6 +2558,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
int err, ret;
+ u32 pwm_mode;
int value;
if (amdgpu_in_reset(adev))
@@ -2569,13 +2570,22 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
if (err)
return err;
+ if (value == 0)
+ pwm_mode = AMD_FAN_CTRL_NONE;
+ else if (value == 1)
+ pwm_mode = AMD_FAN_CTRL_MANUAL;
+ else if (value == 2)
+ pwm_mode = AMD_FAN_CTRL_AUTO;
+ else
+ return -EINVAL;
+
ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (ret < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return ret;
}
- ret = amdgpu_dpm_set_fan_control_mode(adev, value);
+ ret = amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index df4f20293c16..eb4da3666e05 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev)
return 0;
}
+static int si_set_temperature_range(struct amdgpu_device *adev)
+{
+ int ret;
+
+ ret = si_thermal_enable_alert(adev, false);
+ if (ret)
+ return ret;
+ ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+ if (ret)
+ return ret;
+ ret = si_thermal_enable_alert(adev, true);
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
static void si_dpm_disable(struct amdgpu_device *adev)
{
struct rv7xx_power_info *pi = rv770_get_pi(adev);
@@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev,
static int si_dpm_late_init(void *handle)
{
+ int ret;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->pm.dpm_enabled)
+ return 0;
+
+ ret = si_set_temperature_range(adev);
+ if (ret)
+ return ret;
+#if 0 //TODO ?
+ si_dpm_powergate_uvd(adev, true);
+#endif
return 0;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index c16703868e5c..0ad947df777a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -24,6 +24,7 @@
#include <linux/firmware.h>
#include <linux/pci.h>
+#include <linux/power_supply.h>
#include <linux/reboot.h>
#include "amdgpu.h"
@@ -733,7 +734,7 @@ static int smu_early_init(void *handle)
smu->adev = adev;
smu->pm_enabled = !!amdgpu_dpm;
smu->is_apu = false;
- smu->smu_baco.state = SMU_BACO_STATE_NONE;
+ smu->smu_baco.state = SMU_BACO_STATE_EXIT;
smu->smu_baco.platform_support = false;
smu->user_dpm_profile.fan_mode = -1;
@@ -817,16 +818,8 @@ static int smu_late_init(void *handle)
* handle the switch automatically. Driver involvement
* is unnecessary.
*/
- if (!smu->dc_controlled_by_gpio) {
- ret = smu_set_power_source(smu,
- adev->pm.ac_power ? SMU_POWER_SOURCE_AC :
- SMU_POWER_SOURCE_DC);
- if (ret) {
- dev_err(adev->dev, "Failed to switch to %s mode!\n",
- adev->pm.ac_power ? "AC" : "DC");
- return ret;
- }
- }
+ adev->pm.ac_power = power_supply_is_system_supplied() > 0;
+ smu_set_ac_dc(smu);
if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 1)) ||
(amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 3)))
@@ -1961,31 +1954,10 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
return 0;
}
-static int smu_reset_mp1_state(struct smu_context *smu)
-{
- struct amdgpu_device *adev = smu->adev;
- int ret = 0;
-
- if ((!adev->in_runpm) && (!adev->in_suspend) &&
- (!amdgpu_in_reset(adev)))
- switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
- case IP_VERSION(13, 0, 0):
- case IP_VERSION(13, 0, 7):
- case IP_VERSION(13, 0, 10):
- ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
- break;
- default:
- break;
- }
-
- return ret;
-}
-
static int smu_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = adev->powerplay.pp_handle;
- int ret;
if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return 0;
@@ -2003,15 +1975,7 @@ static int smu_hw_fini(void *handle)
adev->pm.dpm_enabled = false;
- ret = smu_smc_hw_cleanup(smu);
- if (ret)
- return ret;
-
- ret = smu_reset_mp1_state(smu);
- if (ret)
- return ret;
-
- return 0;
+ return smu_smc_hw_cleanup(smu);
}
static void smu_late_fini(void *handle)
@@ -2710,6 +2674,7 @@ int smu_get_power_limit(void *handle,
case SMU_PPT_LIMIT_CURRENT:
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
case IP_VERSION(11, 0, 7):
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 0, 12):
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 2aa4fea87314..66e84defd0b6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -424,7 +424,6 @@ enum smu_reset_mode {
enum smu_baco_state {
SMU_BACO_STATE_ENTER = 0,
SMU_BACO_STATE_EXIT,
- SMU_BACO_STATE_NONE,
};
struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 4cd43bbec910..bcad42534da4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -1303,13 +1303,12 @@ static int arcturus_get_power_limit(struct smu_context *smu,
if (default_power_limit)
*default_power_limit = power_limit;
- if (smu->od_enabled) {
+ if (smu->od_enabled)
od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
- od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
- } else {
+ else
od_percent_upper = 0;
- od_percent_lower = 100;
- }
+
+ od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
od_percent_upper, od_percent_lower, power_limit);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 8d1d29ffb0f1..ed189a3878eb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -2357,13 +2357,12 @@ static int navi10_get_power_limit(struct smu_context *smu,
*default_power_limit = power_limit;
if (smu->od_enabled &&
- navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) {
+ navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT))
od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
- od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
- } else {
+ else
od_percent_upper = 0;
- od_percent_lower = 100;
- }
+
+ od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
od_percent_upper, od_percent_lower, power_limit);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 21fc033528fa..e2ad2b972ab0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -640,13 +640,12 @@ static int sienna_cichlid_get_power_limit(struct smu_context *smu,
if (default_power_limit)
*default_power_limit = power_limit;
- if (smu->od_enabled) {
+ if (smu->od_enabled)
od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]);
- od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]);
- } else {
+ else
od_percent_upper = 0;
- od_percent_lower = 100;
- }
+
+ od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]);
dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
od_percent_upper, od_percent_lower, power_limit);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 5a314d0316c1..c7bfa68bf00f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -1442,10 +1442,12 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev,
case 0x3:
dev_dbg(adev->dev, "Switched to AC mode!\n");
schedule_work(&smu->interrupt_work);
+ adev->pm.ac_power = true;
break;
case 0x4:
dev_dbg(adev->dev, "Switched to DC mode!\n");
schedule_work(&smu->interrupt_work);
+ adev->pm.ac_power = false;
break;
case 0x7:
/*
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 2ff6deedef95..da1f43999d09 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -451,7 +451,7 @@ static int vangogh_init_smc_tables(struct smu_context *smu)
#ifdef CONFIG_X86
/* AMD x86 APU only */
- smu->cpu_core_num = boot_cpu_data.x86_max_cores;
+ smu->cpu_core_num = topology_num_cores_per_package();
#else
smu->cpu_core_num = 4;
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 771a3d457c33..c486182ff275 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -1379,10 +1379,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
case 0x3:
dev_dbg(adev->dev, "Switched to AC mode!\n");
smu_v13_0_ack_ac_dc_interrupt(smu);
+ adev->pm.ac_power = true;
break;
case 0x4:
dev_dbg(adev->dev, "Switched to DC mode!\n");
smu_v13_0_ack_ac_dc_interrupt(smu);
+ adev->pm.ac_power = false;
break;
case 0x7:
/*
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index a9b25faa63e4..9b80f18ea6c3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2357,6 +2357,7 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu,
PPTable_t *pptable = table_context->driver_pptable;
SkuTable_t *skutable = &pptable->SkuTable;
uint32_t power_limit, od_percent_upper, od_percent_lower;
+ uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC];
if (smu_v13_0_get_current_power_limit(smu, &power_limit))
power_limit = smu->adev->pm.ac_power ?
@@ -2368,19 +2369,18 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu,
if (default_power_limit)
*default_power_limit = power_limit;
- if (smu->od_enabled) {
+ if (smu->od_enabled)
od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]);
- od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]);
- } else {
+ else
od_percent_upper = 0;
- od_percent_lower = 100;
- }
+
+ od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]);
dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
od_percent_upper, od_percent_lower, power_limit);
if (max_power_limit) {
- *max_power_limit = power_limit * (100 + od_percent_upper);
+ *max_power_limit = msg_limit * (100 + od_percent_upper);
*max_power_limit /= 100;
}
@@ -2747,13 +2747,7 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
switch (mp1_state) {
case PP_MP1_STATE_UNLOAD:
- ret = smu_cmn_send_smc_msg_with_param(smu,
- SMU_MSG_PrepareMp1ForUnload,
- 0x55, NULL);
-
- if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
- ret = smu_v13_0_disable_pmfw_state(smu);
-
+ ret = smu_cmn_set_mp1_state(smu, mp1_state);
break;
default:
/* Ignore others */
@@ -2949,7 +2943,7 @@ static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 0):
return smu->smc_fw_version >= 0x004e6300;
case IP_VERSION(13, 0, 10):
@@ -2959,6 +2953,55 @@ static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu)
}
}
+static int smu_v13_0_0_set_power_limit(struct smu_context *smu,
+ enum smu_ppt_limit_type limit_type,
+ uint32_t limit)
+{
+ PPTable_t *pptable = smu->smu_table.driver_pptable;
+ SkuTable_t *skutable = &pptable->SkuTable;
+ uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC];
+ struct smu_table_context *table_context = &smu->smu_table;
+ OverDriveTableExternal_t *od_table =
+ (OverDriveTableExternal_t *)table_context->overdrive_table;
+ int ret = 0;
+
+ if (limit_type != SMU_DEFAULT_PPT_LIMIT)
+ return -EINVAL;
+
+ if (limit <= msg_limit) {
+ if (smu->current_power_limit > msg_limit) {
+ od_table->OverDriveTable.Ppt = 0;
+ od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT;
+
+ ret = smu_v13_0_0_upload_overdrive_table(smu, od_table);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to upload overdrive table!\n");
+ return ret;
+ }
+ }
+ return smu_v13_0_set_power_limit(smu, limit_type, limit);
+ } else if (smu->od_enabled) {
+ ret = smu_v13_0_set_power_limit(smu, limit_type, msg_limit);
+ if (ret)
+ return ret;
+
+ od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100;
+ od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT;
+
+ ret = smu_v13_0_0_upload_overdrive_table(smu, od_table);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to upload overdrive table!\n");
+ return ret;
+ }
+
+ smu->current_power_limit = limit;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
@@ -3013,7 +3056,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.set_fan_control_mode = smu_v13_0_set_fan_control_mode,
.enable_mgpu_fan_boost = smu_v13_0_0_enable_mgpu_fan_boost,
.get_power_limit = smu_v13_0_0_get_power_limit,
- .set_power_limit = smu_v13_0_set_power_limit,
+ .set_power_limit = smu_v13_0_0_set_power_limit,
.set_power_source = smu_v13_0_set_power_source,
.get_power_profile_mode = smu_v13_0_0_get_power_profile_mode,
.set_power_profile_mode = smu_v13_0_0_set_power_profile_mode,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 3c98a8a0386a..7e1941cf1796 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -160,8 +160,8 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0),
MSG_MAP(GetMinGfxclkFrequency, PPSMC_MSG_GetMinGfxDpmFreq, 1),
MSG_MAP(GetMaxGfxclkFrequency, PPSMC_MSG_GetMaxGfxDpmFreq, 1),
- MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 0),
- MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 0),
+ MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 1),
+ MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 1),
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareForDriverUnload, 0),
MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit, 0),
MSG_MAP(GetThermalLimit, PPSMC_MSG_ReadThrottlerLimit, 0),
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 59606a19e3d2..3dc7b60cb075 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -2321,6 +2321,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu,
PPTable_t *pptable = table_context->driver_pptable;
SkuTable_t *skutable = &pptable->SkuTable;
uint32_t power_limit, od_percent_upper, od_percent_lower;
+ uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC];
if (smu_v13_0_get_current_power_limit(smu, &power_limit))
power_limit = smu->adev->pm.ac_power ?
@@ -2332,19 +2333,18 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu,
if (default_power_limit)
*default_power_limit = power_limit;
- if (smu->od_enabled) {
+ if (smu->od_enabled)
od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]);
- od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]);
- } else {
+ else
od_percent_upper = 0;
- od_percent_lower = 100;
- }
+
+ od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]);
dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
od_percent_upper, od_percent_lower, power_limit);
if (max_power_limit) {
- *max_power_limit = power_limit * (100 + od_percent_upper);
+ *max_power_limit = msg_limit * (100 + od_percent_upper);
*max_power_limit /= 100;
}
@@ -2504,13 +2504,7 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
switch (mp1_state) {
case PP_MP1_STATE_UNLOAD:
- ret = smu_cmn_send_smc_msg_with_param(smu,
- SMU_MSG_PrepareMp1ForUnload,
- 0x55, NULL);
-
- if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
- ret = smu_v13_0_disable_pmfw_state(smu);
-
+ ret = smu_cmn_set_mp1_state(smu, mp1_state);
break;
default:
/* Ignore others */
@@ -2545,6 +2539,55 @@ static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu)
return smu->smc_fw_version > 0x00524600;
}
+static int smu_v13_0_7_set_power_limit(struct smu_context *smu,
+ enum smu_ppt_limit_type limit_type,
+ uint32_t limit)
+{
+ PPTable_t *pptable = smu->smu_table.driver_pptable;
+ SkuTable_t *skutable = &pptable->SkuTable;
+ uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC];
+ struct smu_table_context *table_context = &smu->smu_table;
+ OverDriveTableExternal_t *od_table =
+ (OverDriveTableExternal_t *)table_context->overdrive_table;
+ int ret = 0;
+
+ if (limit_type != SMU_DEFAULT_PPT_LIMIT)
+ return -EINVAL;
+
+ if (limit <= msg_limit) {
+ if (smu->current_power_limit > msg_limit) {
+ od_table->OverDriveTable.Ppt = 0;
+ od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT;
+
+ ret = smu_v13_0_7_upload_overdrive_table(smu, od_table);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to upload overdrive table!\n");
+ return ret;
+ }
+ }
+ return smu_v13_0_set_power_limit(smu, limit_type, limit);
+ } else if (smu->od_enabled) {
+ ret = smu_v13_0_set_power_limit(smu, limit_type, msg_limit);
+ if (ret)
+ return ret;
+
+ od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100;
+ od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT;
+
+ ret = smu_v13_0_7_upload_overdrive_table(smu, od_table);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to upload overdrive table!\n");
+ return ret;
+ }
+
+ smu->current_power_limit = limit;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -2596,7 +2639,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.set_fan_control_mode = smu_v13_0_set_fan_control_mode,
.enable_mgpu_fan_boost = smu_v13_0_7_enable_mgpu_fan_boost,
.get_power_limit = smu_v13_0_7_get_power_limit,
- .set_power_limit = smu_v13_0_set_power_limit,
+ .set_power_limit = smu_v13_0_7_set_power_limit,
.set_power_source = smu_v13_0_set_power_source,
.get_power_profile_mode = smu_v13_0_7_get_power_profile_mode,
.set_power_profile_mode = smu_v13_0_7_set_power_profile_mode,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index 4894f7ee737b..6dae5ad74ff0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -229,8 +229,6 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_2;
break;
case IP_VERSION(14, 0, 0):
- if ((smu->smc_fw_version < 0x5d3a00))
- dev_warn(smu->adev->dev, "The PMFW version(%x) is behind in this BIOS!\n", smu->smc_fw_version);
smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
break;
default:
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index 47fdbae4adfc..9310c4758e38 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -261,7 +261,10 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
*value = metrics->MpipuclkFrequency;
break;
case METRICS_AVERAGE_GFXACTIVITY:
- *value = metrics->GfxActivity / 100;
+ if ((smu->smc_fw_version > 0x5d4600))
+ *value = metrics->GfxActivity;
+ else
+ *value = metrics->GfxActivity / 100;
break;
case METRICS_AVERAGE_VCNACTIVITY:
*value = metrics->VcnActivity / 100;
diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c
index ef31033439bc..29d91493b101 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.c
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.c
@@ -1762,6 +1762,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux,
u8 request = msg->request & ~DP_AUX_I2C_MOT;
int ret = 0;
+ mutex_lock(&ctx->aux_lock);
pm_runtime_get_sync(dev);
msg->reply = 0;
switch (request) {
@@ -1778,6 +1779,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux,
msg->size, msg->buffer);
pm_runtime_mark_last_busy(dev);
pm_runtime_put_autosuspend(dev);
+ mutex_unlock(&ctx->aux_lock);
return ret;
}
@@ -2474,7 +2476,9 @@ static void anx7625_bridge_atomic_disable(struct drm_bridge *bridge,
ctx->connector = NULL;
anx7625_dp_stop(ctx);
- pm_runtime_put_sync(dev);
+ mutex_lock(&ctx->aux_lock);
+ pm_runtime_put_sync_suspend(dev);
+ mutex_unlock(&ctx->aux_lock);
}
static enum drm_connector_status
@@ -2668,6 +2672,7 @@ static int anx7625_i2c_probe(struct i2c_client *client)
mutex_init(&platform->lock);
mutex_init(&platform->hdcp_wq_lock);
+ mutex_init(&platform->aux_lock);
INIT_DELAYED_WORK(&platform->hdcp_work, hdcp_check_work_func);
platform->hdcp_workqueue = create_workqueue("hdcp workqueue");
diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h
index 66ebee7f3d83..39ed35d33836 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.h
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.h
@@ -475,6 +475,8 @@ struct anx7625_data {
struct workqueue_struct *hdcp_workqueue;
/* Lock for hdcp work queue */
struct mutex hdcp_wq_lock;
+ /* Lock for aux transfer and disable */
+ struct mutex aux_lock;
char edid_block;
struct display_timing dt;
u8 display_timing_valid;
diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
index bb55f697a181..6886db2d9e00 100644
--- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c
+++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
@@ -25,20 +25,18 @@ static void drm_aux_hpd_bridge_release(struct device *dev)
ida_free(&drm_aux_hpd_bridge_ida, adev->id);
of_node_put(adev->dev.platform_data);
+ of_node_put(adev->dev.of_node);
kfree(adev);
}
-static void drm_aux_hpd_bridge_unregister_adev(void *_adev)
+static void drm_aux_hpd_bridge_free_adev(void *_adev)
{
- struct auxiliary_device *adev = _adev;
-
- auxiliary_device_delete(adev);
- auxiliary_device_uninit(adev);
+ auxiliary_device_uninit(_adev);
}
/**
- * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge
+ * devm_drm_dp_hpd_bridge_alloc - allocate a HPD DisplayPort bridge
* @parent: device instance providing this bridge
* @np: device node pointer corresponding to this bridge instance
*
@@ -46,11 +44,9 @@ static void drm_aux_hpd_bridge_unregister_adev(void *_adev)
* DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is
* able to send the HPD events.
*
- * Return: device instance that will handle created bridge or an error code
- * encoded into the pointer.
+ * Return: bridge auxiliary device pointer or an error pointer
*/
-struct device *drm_dp_hpd_bridge_register(struct device *parent,
- struct device_node *np)
+struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np)
{
struct auxiliary_device *adev;
int ret;
@@ -74,18 +70,62 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent,
ret = auxiliary_device_init(adev);
if (ret) {
+ of_node_put(adev->dev.platform_data);
+ of_node_put(adev->dev.of_node);
ida_free(&drm_aux_hpd_bridge_ida, adev->id);
kfree(adev);
return ERR_PTR(ret);
}
- ret = auxiliary_device_add(adev);
- if (ret) {
- auxiliary_device_uninit(adev);
+ ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_free_adev, adev);
+ if (ret)
return ERR_PTR(ret);
- }
- ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev);
+ return adev;
+}
+EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_alloc);
+
+static void drm_aux_hpd_bridge_del_adev(void *_adev)
+{
+ auxiliary_device_delete(_adev);
+}
+
+/**
+ * devm_drm_dp_hpd_bridge_add - register a HDP DisplayPort bridge
+ * @dev: struct device to tie registration lifetime to
+ * @adev: bridge auxiliary device to be registered
+ *
+ * Returns: zero on success or a negative errno
+ */
+int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev)
+{
+ int ret;
+
+ ret = auxiliary_device_add(adev);
+ if (ret)
+ return ret;
+
+ return devm_add_action_or_reset(dev, drm_aux_hpd_bridge_del_adev, adev);
+}
+EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_add);
+
+/**
+ * drm_dp_hpd_bridge_register - allocate and register a HDP DisplayPort bridge
+ * @parent: device instance providing this bridge
+ * @np: device node pointer corresponding to this bridge instance
+ *
+ * Return: device instance that will handle created bridge or an error pointer
+ */
+struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np)
+{
+ struct auxiliary_device *adev;
+ int ret;
+
+ adev = devm_drm_dp_hpd_bridge_alloc(parent, np);
+ if (IS_ERR(adev))
+ return ERR_CAST(adev);
+
+ ret = devm_drm_dp_hpd_bridge_add(parent, adev);
if (ret)
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c
index 541e4f5afc4c..14d4dcf239da 100644
--- a/drivers/gpu/drm/bridge/parade-ps8640.c
+++ b/drivers/gpu/drm/bridge/parade-ps8640.c
@@ -107,6 +107,7 @@ struct ps8640 {
struct device_link *link;
bool pre_enabled;
bool need_post_hpd_delay;
+ struct mutex aux_lock;
};
static const struct regmap_config ps8640_regmap_config[] = {
@@ -345,11 +346,20 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux,
struct device *dev = &ps_bridge->page[PAGE0_DP_CNTL]->dev;
int ret;
+ mutex_lock(&ps_bridge->aux_lock);
pm_runtime_get_sync(dev);
+ ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000);
+ if (ret) {
+ pm_runtime_put_sync_suspend(dev);
+ goto exit;
+ }
ret = ps8640_aux_transfer_msg(aux, msg);
pm_runtime_mark_last_busy(dev);
pm_runtime_put_autosuspend(dev);
+exit:
+ mutex_unlock(&ps_bridge->aux_lock);
+
return ret;
}
@@ -470,7 +480,18 @@ static void ps8640_atomic_post_disable(struct drm_bridge *bridge,
ps_bridge->pre_enabled = false;
ps8640_bridge_vdo_control(ps_bridge, DISABLE);
+
+ /*
+ * The bridge seems to expect everything to be power cycled at the
+ * disable process, so grab a lock here to make sure
+ * ps8640_aux_transfer() is not holding a runtime PM reference and
+ * preventing the bridge from suspend.
+ */
+ mutex_lock(&ps_bridge->aux_lock);
+
pm_runtime_put_sync_suspend(&ps_bridge->page[PAGE0_DP_CNTL]->dev);
+
+ mutex_unlock(&ps_bridge->aux_lock);
}
static int ps8640_bridge_attach(struct drm_bridge *bridge,
@@ -619,6 +640,8 @@ static int ps8640_probe(struct i2c_client *client)
if (!ps_bridge)
return -ENOMEM;
+ mutex_init(&ps_bridge->aux_lock);
+
ps_bridge->supplies[0].supply = "vdd12";
ps_bridge->supplies[1].supply = "vdd33";
ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ps_bridge->supplies),
diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c
index be5914caa17d..63a1a0c88be4 100644
--- a/drivers/gpu/drm/bridge/samsung-dsim.c
+++ b/drivers/gpu/drm/bridge/samsung-dsim.c
@@ -969,10 +969,6 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi)
reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
reg &= ~DSIM_STOP_STATE_CNT_MASK;
reg |= DSIM_STOP_STATE_CNT(driver_data->reg_values[STOP_STATE_CNT]);
-
- if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type))
- reg |= DSIM_FORCE_STOP_STATE;
-
samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
reg = DSIM_BTA_TIMEOUT(0xff) | DSIM_LPDR_TIMEOUT(0xffff);
@@ -1431,18 +1427,6 @@ static void samsung_dsim_disable_irq(struct samsung_dsim *dsi)
disable_irq(dsi->irq);
}
-static void samsung_dsim_set_stop_state(struct samsung_dsim *dsi, bool enable)
-{
- u32 reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
-
- if (enable)
- reg |= DSIM_FORCE_STOP_STATE;
- else
- reg &= ~DSIM_FORCE_STOP_STATE;
-
- samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
-}
-
static int samsung_dsim_init(struct samsung_dsim *dsi)
{
const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
@@ -1492,9 +1476,6 @@ static void samsung_dsim_atomic_pre_enable(struct drm_bridge *bridge,
ret = samsung_dsim_init(dsi);
if (ret)
return;
-
- samsung_dsim_set_display_mode(dsi);
- samsung_dsim_set_display_enable(dsi, true);
}
}
@@ -1503,12 +1484,8 @@ static void samsung_dsim_atomic_enable(struct drm_bridge *bridge,
{
struct samsung_dsim *dsi = bridge_to_dsi(bridge);
- if (samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) {
- samsung_dsim_set_display_mode(dsi);
- samsung_dsim_set_display_enable(dsi, true);
- } else {
- samsung_dsim_set_stop_state(dsi, false);
- }
+ samsung_dsim_set_display_mode(dsi);
+ samsung_dsim_set_display_enable(dsi, true);
dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE;
}
@@ -1521,9 +1498,6 @@ static void samsung_dsim_atomic_disable(struct drm_bridge *bridge,
if (!(dsi->state & DSIM_STATE_ENABLED))
return;
- if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type))
- samsung_dsim_set_stop_state(dsi, true);
-
dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE;
}
@@ -1828,8 +1802,6 @@ static ssize_t samsung_dsim_host_transfer(struct mipi_dsi_host *host,
if (ret)
return ret;
- samsung_dsim_set_stop_state(dsi, false);
-
ret = mipi_dsi_create_packet(&xfer.packet, msg);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c
index 2bdc5b439beb..4560ae9cbce1 100644
--- a/drivers/gpu/drm/bridge/sii902x.c
+++ b/drivers/gpu/drm/bridge/sii902x.c
@@ -1080,6 +1080,26 @@ static int sii902x_init(struct sii902x *sii902x)
return ret;
}
+ ret = sii902x_audio_codec_init(sii902x, dev);
+ if (ret)
+ return ret;
+
+ i2c_set_clientdata(sii902x->i2c, sii902x);
+
+ sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev,
+ 1, 0, I2C_MUX_GATE,
+ sii902x_i2c_bypass_select,
+ sii902x_i2c_bypass_deselect);
+ if (!sii902x->i2cmux) {
+ ret = -ENOMEM;
+ goto err_unreg_audio;
+ }
+
+ sii902x->i2cmux->priv = sii902x;
+ ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0);
+ if (ret)
+ goto err_unreg_audio;
+
sii902x->bridge.funcs = &sii902x_bridge_funcs;
sii902x->bridge.of_node = dev->of_node;
sii902x->bridge.timings = &default_sii902x_timings;
@@ -1090,19 +1110,13 @@ static int sii902x_init(struct sii902x *sii902x)
drm_bridge_add(&sii902x->bridge);
- sii902x_audio_codec_init(sii902x, dev);
-
- i2c_set_clientdata(sii902x->i2c, sii902x);
+ return 0;
- sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev,
- 1, 0, I2C_MUX_GATE,
- sii902x_i2c_bypass_select,
- sii902x_i2c_bypass_deselect);
- if (!sii902x->i2cmux)
- return -ENOMEM;
+err_unreg_audio:
+ if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev))
+ platform_device_unregister(sii902x->audio.pdev);
- sii902x->i2cmux->priv = sii902x;
- return i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0);
+ return ret;
}
static int sii902x_probe(struct i2c_client *client)
@@ -1170,12 +1184,14 @@ static int sii902x_probe(struct i2c_client *client)
}
static void sii902x_remove(struct i2c_client *client)
-
{
struct sii902x *sii902x = i2c_get_clientdata(client);
- i2c_mux_del_adapters(sii902x->i2cmux);
drm_bridge_remove(&sii902x->bridge);
+ i2c_mux_del_adapters(sii902x->i2cmux);
+
+ if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev))
+ platform_device_unregister(sii902x->audio.pdev);
}
static const struct of_device_id sii902x_dt_ids[] = {
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index bd6c24d4213c..f7c6b60629c2 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -5491,6 +5491,7 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc);
* - 0 if the new state is valid
* - %-ENOSPC, if the new state is invalid, because of BW limitation
* @failing_port is set to:
+ *
* - The non-root port where a BW limit check failed
* with all the ports downstream of @failing_port passing
* the BW limit check.
@@ -5499,6 +5500,7 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc);
* - %NULL if the BW limit check failed at the root port
* with all the ports downstream of the root port passing
* the BW limit check.
+ *
* - %-EINVAL, if the new state is invalid, because the root port has
* too many payloads.
*/
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..5ebdd6f8f36e 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm,
u64 start, u64 end,
unsigned int order)
{
+ u64 req_size = mm->chunk_size << order;
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
LIST_HEAD(dfs);
@@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm,
if (drm_buddy_block_is_allocated(block))
continue;
+ if (block_start < start || block_end > end) {
+ u64 adjusted_start = max(block_start, start);
+ u64 adjusted_end = min(block_end, end);
+
+ if (round_down(adjusted_end + 1, req_size) <=
+ round_up(adjusted_start, req_size))
+ continue;
+ }
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
/*
@@ -538,7 +548,13 @@ static int __alloc_range(struct drm_buddy *mm,
list_add(&block->left->tmp_link, dfs);
} while (1);
+ if (total_allocated < size) {
+ err = -ENOSPC;
+ goto err_free;
+ }
+
list_splice_tail(&allocated, blocks);
+
return 0;
err_undo:
@@ -755,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
return -EINVAL;
/* Actual range allocation */
- if (start + size == end)
+ if (start + size == end) {
+ if (!IS_ALIGNED(start | end, min_block_size))
+ return -EINVAL;
+
return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+ }
original_size = size;
original_min_size = min_block_size;
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index cb90e70d85e8..65f9f66933bb 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -904,6 +904,7 @@ out:
connector_set = NULL;
fb = NULL;
mode = NULL;
+ num_connectors = 0;
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index b67eafa55715..b7d42210fccc 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -260,8 +260,7 @@ static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
}
/**
- * drm_gem_vram_offset() - \
- Returns a GEM VRAM object's offset in video memory
+ * drm_gem_vram_offset() - Returns a GEM VRAM object's offset in video memory
* @gbo: the GEM VRAM object
*
* This function returns the buffer object's offset in the device's video
@@ -470,14 +469,15 @@ void drm_gem_vram_vunmap(struct drm_gem_vram_object *gbo,
EXPORT_SYMBOL(drm_gem_vram_vunmap);
/**
- * drm_gem_vram_fill_create_dumb() - \
- Helper for implementing &struct drm_driver.dumb_create
+ * drm_gem_vram_fill_create_dumb() - Helper for implementing
+ * &struct drm_driver.dumb_create
+ *
* @file: the DRM file
* @dev: the DRM device
* @pg_align: the buffer's alignment in multiples of the page size
* @pitch_align: the scanline's alignment in powers of 2
- * @args: the arguments as provided to \
- &struct drm_driver.dumb_create
+ * @args: the arguments as provided to
+ * &struct drm_driver.dumb_create
*
* This helper function fills &struct drm_mode_create_dumb, which is used
* by &struct drm_driver.dumb_create. Implementations of this interface
@@ -575,8 +575,7 @@ static int drm_gem_vram_bo_driver_move(struct drm_gem_vram_object *gbo,
*/
/**
- * drm_gem_vram_object_free() - \
- Implements &struct drm_gem_object_funcs.free
+ * drm_gem_vram_object_free() - Implements &struct drm_gem_object_funcs.free
* @gem: GEM object. Refers to &struct drm_gem_vram_object.gem
*/
static void drm_gem_vram_object_free(struct drm_gem_object *gem)
@@ -591,12 +590,11 @@ static void drm_gem_vram_object_free(struct drm_gem_object *gem)
*/
/**
- * drm_gem_vram_driver_dumb_create() - \
- Implements &struct drm_driver.dumb_create
+ * drm_gem_vram_driver_dumb_create() - Implements &struct drm_driver.dumb_create
* @file: the DRM file
* @dev: the DRM device
- * @args: the arguments as provided to \
- &struct drm_driver.dumb_create
+ * @args: the arguments as provided to
+ * &struct drm_driver.dumb_create
*
* This function requires the driver to use @drm_device.vram_mm for its
* instance of VRAM MM.
@@ -639,8 +637,8 @@ static void __drm_gem_vram_plane_helper_cleanup_fb(struct drm_plane *plane,
}
/**
- * drm_gem_vram_plane_helper_prepare_fb() - \
- * Implements &struct drm_plane_helper_funcs.prepare_fb
+ * drm_gem_vram_plane_helper_prepare_fb() - Implements &struct
+ * drm_plane_helper_funcs.prepare_fb
* @plane: a DRM plane
* @new_state: the plane's new state
*
@@ -690,8 +688,8 @@ err_drm_gem_vram_unpin:
EXPORT_SYMBOL(drm_gem_vram_plane_helper_prepare_fb);
/**
- * drm_gem_vram_plane_helper_cleanup_fb() - \
- * Implements &struct drm_plane_helper_funcs.cleanup_fb
+ * drm_gem_vram_plane_helper_cleanup_fb() - Implements &struct
+ * drm_plane_helper_funcs.cleanup_fb
* @plane: a DRM plane
* @old_state: the plane's old state
*
@@ -717,8 +715,8 @@ EXPORT_SYMBOL(drm_gem_vram_plane_helper_cleanup_fb);
*/
/**
- * drm_gem_vram_simple_display_pipe_prepare_fb() - \
- * Implements &struct drm_simple_display_pipe_funcs.prepare_fb
+ * drm_gem_vram_simple_display_pipe_prepare_fb() - Implements &struct
+ * drm_simple_display_pipe_funcs.prepare_fb
* @pipe: a simple display pipe
* @new_state: the plane's new state
*
@@ -739,8 +737,8 @@ int drm_gem_vram_simple_display_pipe_prepare_fb(
EXPORT_SYMBOL(drm_gem_vram_simple_display_pipe_prepare_fb);
/**
- * drm_gem_vram_simple_display_pipe_cleanup_fb() - \
- * Implements &struct drm_simple_display_pipe_funcs.cleanup_fb
+ * drm_gem_vram_simple_display_pipe_cleanup_fb() - Implements &struct
+ * drm_simple_display_pipe_funcs.cleanup_fb
* @pipe: a simple display pipe
* @old_state: the plane's old state
*
@@ -761,8 +759,7 @@ EXPORT_SYMBOL(drm_gem_vram_simple_display_pipe_cleanup_fb);
*/
/**
- * drm_gem_vram_object_pin() - \
- Implements &struct drm_gem_object_funcs.pin
+ * drm_gem_vram_object_pin() - Implements &struct drm_gem_object_funcs.pin
* @gem: The GEM object to pin
*
* Returns:
@@ -785,8 +782,7 @@ static int drm_gem_vram_object_pin(struct drm_gem_object *gem)
}
/**
- * drm_gem_vram_object_unpin() - \
- Implements &struct drm_gem_object_funcs.unpin
+ * drm_gem_vram_object_unpin() - Implements &struct drm_gem_object_funcs.unpin
* @gem: The GEM object to unpin
*/
static void drm_gem_vram_object_unpin(struct drm_gem_object *gem)
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 834a5e28abbe..7352bde299d5 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -820,7 +820,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
if (max_segment == 0)
max_segment = UINT_MAX;
err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0,
- nr_pages << PAGE_SHIFT,
+ (unsigned long)nr_pages << PAGE_SHIFT,
max_segment, GFP_KERNEL);
if (err) {
kfree(sg);
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 3f479483d7d8..23b4e9a3361d 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -760,9 +760,11 @@ static void output_poll_execute(struct work_struct *work)
changed = dev->mode_config.delayed_event;
dev->mode_config.delayed_event = false;
- if (!drm_kms_helper_poll && dev->mode_config.poll_running) {
- drm_kms_helper_disable_hpd(dev);
- dev->mode_config.poll_running = false;
+ if (!drm_kms_helper_poll) {
+ if (dev->mode_config.poll_running) {
+ drm_kms_helper_disable_hpd(dev);
+ dev->mode_config.poll_running = false;
+ }
goto out;
}
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 84101baeecc6..a6c19de46292 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1040,7 +1040,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
uint64_t *points;
uint32_t signaled_count, i;
- if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+ if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE))
lockdep_assert_none_held_once();
points = kmalloc_array(count, sizeof(*points), GFP_KERNEL);
@@ -1109,7 +1110,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
* fallthough and try a 0 timeout wait!
*/
- if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+ if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) {
for (i = 0; i < count; ++i)
drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]);
}
@@ -1416,10 +1418,21 @@ syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
/* This happens inside the syncobj lock */
fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1));
+ if (!fence)
+ return;
+
ret = dma_fence_chain_find_seqno(&fence, entry->point);
- if (ret != 0 || !fence) {
+ if (ret != 0) {
+ /* The given seqno has not been submitted yet. */
dma_fence_put(fence);
return;
+ } else if (!fence) {
+ /* If dma_fence_chain_find_seqno returns 0 but sets the fence
+ * to NULL, it implies that the given seqno is signaled and a
+ * later seqno has already been submitted. Assign a stub fence
+ * so that the eventfd still gets signaled below.
+ */
+ fence = dma_fence_get_stub();
}
list_del_init(&entry->node);
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 776f2f0b602d..0ef7bc8848b0 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -319,9 +319,9 @@ static void decon_win_set_bldmod(struct decon_context *ctx, unsigned int win,
static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win,
struct drm_framebuffer *fb)
{
- struct exynos_drm_plane plane = ctx->planes[win];
+ struct exynos_drm_plane *plane = &ctx->planes[win];
struct exynos_drm_plane_state *state =
- to_exynos_plane_state(plane.base.state);
+ to_exynos_plane_state(plane->base.state);
unsigned int alpha = state->base.alpha;
unsigned int pixel_alpha;
unsigned long val;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index a9f1c5c05894..f2145227a1e0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -480,7 +480,7 @@ static void fimd_commit(struct exynos_drm_crtc *crtc)
struct fimd_context *ctx = crtc->ctx;
struct drm_display_mode *mode = &crtc->base.state->adjusted_mode;
const struct fimd_driver_data *driver_data = ctx->driver_data;
- void *timing_base = ctx->regs + driver_data->timing_base;
+ void __iomem *timing_base = ctx->regs + driver_data->timing_base;
u32 val;
if (ctx->suspended)
@@ -661,9 +661,9 @@ static void fimd_win_set_bldmod(struct fimd_context *ctx, unsigned int win,
static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win,
struct drm_framebuffer *fb, int width)
{
- struct exynos_drm_plane plane = ctx->planes[win];
+ struct exynos_drm_plane *plane = &ctx->planes[win];
struct exynos_drm_plane_state *state =
- to_exynos_plane_state(plane.base.state);
+ to_exynos_plane_state(plane->base.state);
uint32_t pixel_format = fb->format->format;
unsigned int alpha = state->base.alpha;
u32 val = WINCONx_ENWIN;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index e9a769590415..180507a47700 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1341,7 +1341,7 @@ static int __maybe_unused gsc_runtime_resume(struct device *dev)
for (i = 0; i < ctx->num_clocks; i++) {
ret = clk_prepare_enable(ctx->clocks[i]);
if (ret) {
- while (--i > 0)
+ while (--i >= 0)
clk_disable_unprepare(ctx->clocks[i]);
return ret;
}
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index b5d6e3352071..3089029abba4 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -140,7 +140,7 @@ config DRM_I915_GVT_KVMGT
Note that this driver only supports newer device from Broadwell on.
For further information and setup guide, you can visit:
- http://01.org/igvt-g.
+ https://github.com/intel/gvt-linux/wiki.
If in doubt, say "N".
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e777686190ca..c13f14edb508 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
-subdir-ccflags-y += $(call cc-option, -Wstringop-overflow)
subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
# The following turn off the warnings enabled by -Wextra
ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index ac456a2275db..eda4a8b88590 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1155,6 +1155,7 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
}
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP);
+ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
/* ensure all panel commands dispatched before enabling transcoder */
wait_for_cmds_dispatched_to_panel(encoder);
@@ -1255,8 +1256,6 @@ static void gen11_dsi_enable(struct intel_atomic_state *state,
/* step6d: enable dsi transcoder */
gen11_dsi_enable_transcoder(encoder);
- intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
-
/* step7: enable backlight */
intel_backlight_enable(crtc_state, conn_state);
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c
index 47cd6bb04366..06900ff307b2 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power_well.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c
@@ -246,7 +246,14 @@ static enum phy icl_aux_pw_to_phy(struct drm_i915_private *i915,
enum aux_ch aux_ch = icl_aux_pw_to_ch(power_well);
struct intel_digital_port *dig_port = aux_ch_to_digital_port(i915, aux_ch);
- return intel_port_to_phy(i915, dig_port->base.port);
+ /*
+ * FIXME should we care about the (VBT defined) dig_port->aux_ch
+ * relationship or should this be purely defined by the hardware layout?
+ * Currently if the port doesn't appear in the VBT, or if it's declared
+ * as HDMI-only and routed to a combo PHY, the encoder either won't be
+ * present at all or it will not have an aux_ch assigned.
+ */
+ return dig_port ? intel_port_to_phy(i915, dig_port->base.port) : PHY_NONE;
}
static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv,
@@ -414,7 +421,8 @@ icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
intel_de_rmw(dev_priv, regs->driver, 0, HSW_PWR_WELL_CTL_REQ(pw_idx));
- if (DISPLAY_VER(dev_priv) < 12)
+ /* FIXME this is a mess */
+ if (phy != PHY_NONE)
intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy),
0, ICL_LANE_ENABLE_AUX);
@@ -437,7 +445,10 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv,
drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv));
- intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy), ICL_LANE_ENABLE_AUX, 0);
+ /* FIXME this is a mess */
+ if (phy != PHY_NONE)
+ intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy),
+ ICL_LANE_ENABLE_AUX, 0);
intel_de_rmw(dev_priv, regs->driver, HSW_PWR_WELL_CTL_REQ(pw_idx), 0);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 3fdd8a517983..ac7fe6281afe 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -609,6 +609,13 @@ struct intel_connector {
* and active (i.e. dpms ON state). */
bool (*get_hw_state)(struct intel_connector *);
+ /*
+ * Optional hook called during init/resume to sync any state
+ * stored in the connector (eg. DSC state) wrt. the HW state.
+ */
+ void (*sync_state)(struct intel_connector *connector,
+ const struct intel_crtc_state *crtc_state);
+
/* Panel info for eDP and LVDS */
struct intel_panel panel;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index f5ef95da5534..94d2a15d8444 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2355,6 +2355,9 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp,
limits->min_rate = intel_dp_common_rate(intel_dp, 0);
limits->max_rate = intel_dp_max_link_rate(intel_dp);
+ /* FIXME 128b/132b SST support missing */
+ limits->max_rate = min(limits->max_rate, 810000);
+
limits->min_lane_count = 1;
limits->max_lane_count = intel_dp_max_lane_count(intel_dp);
@@ -5696,6 +5699,9 @@ intel_dp_detect(struct drm_connector *connector,
goto out;
}
+ if (!intel_dp_is_edp(intel_dp))
+ intel_psr_init_dpcd(intel_dp);
+
intel_dp_detect_dsc_caps(intel_dp, intel_connector);
intel_dp_configure_mst(intel_dp);
@@ -5856,6 +5862,19 @@ intel_dp_connector_unregister(struct drm_connector *connector)
intel_connector_unregister(connector);
}
+void intel_dp_connector_sync_state(struct intel_connector *connector,
+ const struct intel_crtc_state *crtc_state)
+{
+ struct drm_i915_private *i915 = to_i915(connector->base.dev);
+
+ if (crtc_state && crtc_state->dsc.compression_enable) {
+ drm_WARN_ON(&i915->drm, !connector->dp.dsc_decompression_aux);
+ connector->dp.dsc_decompression_enabled = true;
+ } else {
+ connector->dp.dsc_decompression_enabled = false;
+ }
+}
+
void intel_dp_encoder_flush_work(struct drm_encoder *encoder)
{
struct intel_digital_port *dig_port = enc_to_dig_port(to_intel_encoder(encoder));
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 05db46b111f2..375d0677cd8c 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -45,6 +45,8 @@ bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state,
int intel_dp_min_bpp(enum intel_output_format output_format);
bool intel_dp_init_connector(struct intel_digital_port *dig_port,
struct intel_connector *intel_connector);
+void intel_dp_connector_sync_state(struct intel_connector *connector,
+ const struct intel_crtc_state *crtc_state);
void intel_dp_set_link_params(struct intel_dp *intel_dp,
int link_rate, int lane_count);
int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
index 3a595cd433d4..8538d1ce2fcb 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
@@ -330,23 +330,13 @@ static const struct hdcp2_dp_msg_data hdcp2_dp_msg_data[] = {
0, 0 },
};
-static struct drm_dp_aux *
-intel_dp_hdcp_get_aux(struct intel_connector *connector)
-{
- struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
-
- if (intel_encoder_is_mst(connector->encoder))
- return &connector->port->aux;
- else
- return &dig_port->dp.aux;
-}
-
static int
intel_dp_hdcp2_read_rx_status(struct intel_connector *connector,
u8 *rx_status)
{
struct drm_i915_private *i915 = to_i915(connector->base.dev);
- struct drm_dp_aux *aux = intel_dp_hdcp_get_aux(connector);
+ struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
+ struct drm_dp_aux *aux = &dig_port->dp.aux;
ssize_t ret;
ret = drm_dp_dpcd_read(aux,
@@ -399,7 +389,9 @@ intel_dp_hdcp2_wait_for_msg(struct intel_connector *connector,
const struct hdcp2_dp_msg_data *hdcp2_msg_data)
{
struct drm_i915_private *i915 = to_i915(connector->base.dev);
- struct intel_hdcp *hdcp = &connector->hdcp;
+ struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
+ struct intel_dp *dp = &dig_port->dp;
+ struct intel_hdcp *hdcp = &dp->attached_connector->hdcp;
u8 msg_id = hdcp2_msg_data->msg_id;
int ret, timeout;
bool msg_ready = false;
@@ -454,8 +446,9 @@ int intel_dp_hdcp2_write_msg(struct intel_connector *connector,
unsigned int offset;
u8 *byte = buf;
ssize_t ret, bytes_to_write, len;
+ struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
+ struct drm_dp_aux *aux = &dig_port->dp.aux;
const struct hdcp2_dp_msg_data *hdcp2_msg_data;
- struct drm_dp_aux *aux;
hdcp2_msg_data = get_hdcp2_dp_msg_data(*byte);
if (!hdcp2_msg_data)
@@ -463,8 +456,6 @@ int intel_dp_hdcp2_write_msg(struct intel_connector *connector,
offset = hdcp2_msg_data->offset;
- aux = intel_dp_hdcp_get_aux(connector);
-
/* No msg_id in DP HDCP2.2 msgs */
bytes_to_write = size - 1;
byte++;
@@ -490,7 +481,8 @@ static
ssize_t get_receiver_id_list_rx_info(struct intel_connector *connector,
u32 *dev_cnt, u8 *byte)
{
- struct drm_dp_aux *aux = intel_dp_hdcp_get_aux(connector);
+ struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
+ struct drm_dp_aux *aux = &dig_port->dp.aux;
ssize_t ret;
u8 *rx_info = byte;
@@ -515,8 +507,9 @@ int intel_dp_hdcp2_read_msg(struct intel_connector *connector,
{
struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
- struct intel_hdcp *hdcp = &connector->hdcp;
- struct drm_dp_aux *aux;
+ struct drm_dp_aux *aux = &dig_port->dp.aux;
+ struct intel_dp *dp = &dig_port->dp;
+ struct intel_hdcp *hdcp = &dp->attached_connector->hdcp;
unsigned int offset;
u8 *byte = buf;
ssize_t ret, bytes_to_recv, len;
@@ -530,8 +523,6 @@ int intel_dp_hdcp2_read_msg(struct intel_connector *connector,
return -EINVAL;
offset = hdcp2_msg_data->offset;
- aux = intel_dp_hdcp_get_aux(connector);
-
ret = intel_dp_hdcp2_wait_for_msg(connector, hdcp2_msg_data);
if (ret < 0)
return ret;
@@ -561,13 +552,8 @@ int intel_dp_hdcp2_read_msg(struct intel_connector *connector,
/* Entire msg read timeout since initiate of msg read */
if (bytes_to_recv == size - 1 && hdcp2_msg_data->msg_read_timeout > 0) {
- if (intel_encoder_is_mst(connector->encoder))
- msg_end = ktime_add_ms(ktime_get_raw(),
- hdcp2_msg_data->msg_read_timeout *
- connector->port->parent->num_ports);
- else
- msg_end = ktime_add_ms(ktime_get_raw(),
- hdcp2_msg_data->msg_read_timeout);
+ msg_end = ktime_add_ms(ktime_get_raw(),
+ hdcp2_msg_data->msg_read_timeout);
}
ret = drm_dp_dpcd_read(aux, offset,
@@ -651,12 +637,11 @@ static
int intel_dp_hdcp2_capable(struct intel_connector *connector,
bool *capable)
{
- struct drm_dp_aux *aux;
+ struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
+ struct drm_dp_aux *aux = &dig_port->dp.aux;
u8 rx_caps[3];
int ret;
- aux = intel_dp_hdcp_get_aux(connector);
-
*capable = false;
ret = drm_dp_dpcd_read(aux,
DP_HDCP_2_2_REG_RX_CAPS_OFFSET,
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 8a9432335030..a01a59f57ae5 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -1534,6 +1534,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
return NULL;
intel_connector->get_hw_state = intel_dp_mst_get_hw_state;
+ intel_connector->sync_state = intel_dp_connector_sync_state;
intel_connector->mst_port = intel_dp;
intel_connector->port = port;
drm_dp_mst_get_port_malloc(port);
diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c
index 94eece7f63be..caeca3a8442c 100644
--- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c
+++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c
@@ -318,12 +318,6 @@ static void intel_modeset_update_connector_atomic_state(struct drm_i915_private
const struct intel_crtc_state *crtc_state =
to_intel_crtc_state(crtc->base.state);
- if (crtc_state->dsc.compression_enable) {
- drm_WARN_ON(&i915->drm, !connector->dp.dsc_decompression_aux);
- connector->dp.dsc_decompression_enabled = true;
- } else {
- connector->dp.dsc_decompression_enabled = false;
- }
conn_state->max_bpc = (crtc_state->pipe_bpp ?: 24) / 3;
}
}
@@ -775,8 +769,9 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
drm_connector_list_iter_begin(&i915->drm, &conn_iter);
for_each_intel_connector_iter(connector, &conn_iter) {
+ struct intel_crtc_state *crtc_state = NULL;
+
if (connector->get_hw_state(connector)) {
- struct intel_crtc_state *crtc_state;
struct intel_crtc *crtc;
connector->base.dpms = DRM_MODE_DPMS_ON;
@@ -802,6 +797,10 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
connector->base.dpms = DRM_MODE_DPMS_OFF;
connector->base.encoder = NULL;
}
+
+ if (connector->sync_state)
+ connector->sync_state(connector, crtc_state);
+
drm_dbg_kms(&i915->drm,
"[CONNECTOR:%d:%s] hw state readout: %s\n",
connector->base.base.id, connector->base.name,
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 8f702c3fc62d..4faaf4b3fc53 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1525,8 +1525,18 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp,
* can rely on frontbuffer tracking.
*/
mask = EDP_PSR_DEBUG_MASK_MEMUP |
- EDP_PSR_DEBUG_MASK_HPD |
- EDP_PSR_DEBUG_MASK_LPSP;
+ EDP_PSR_DEBUG_MASK_HPD;
+
+ /*
+ * For some unknown reason on HSW non-ULT (or at least on
+ * Dell Latitude E6540) external displays start to flicker
+ * when PSR is enabled on the eDP. SR/PC6 residency is much
+ * higher than should be possible with an external display.
+ * As a workaround leave LPSP unmasked to prevent PSR entry
+ * when external displays are active.
+ */
+ if (DISPLAY_VER(dev_priv) >= 8 || IS_HASWELL_ULT(dev_priv))
+ mask |= EDP_PSR_DEBUG_MASK_LPSP;
if (DISPLAY_VER(dev_priv) < 20)
mask |= EDP_PSR_DEBUG_MASK_MAX_SLEEP;
@@ -2766,9 +2776,6 @@ void intel_psr_init(struct intel_dp *intel_dp)
if (!(HAS_PSR(dev_priv) || HAS_DP20(dev_priv)))
return;
- if (!intel_dp_is_edp(intel_dp))
- intel_psr_init_dpcd(intel_dp);
-
/*
* HSW spec explicitly says PSR is tied to port A.
* BDW+ platforms have a instance of PSR registers per transcoder but
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index acc6b6804105..2915d7afe5cc 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -1209,7 +1209,7 @@ static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo,
struct intel_sdvo_tv_format format;
u32 format_map;
- format_map = 1 << conn_state->tv.mode;
+ format_map = 1 << conn_state->tv.legacy_mode;
memset(&format, 0, sizeof(format));
memcpy(&format, &format_map, min(sizeof(format), sizeof(format_map)));
@@ -2298,7 +2298,7 @@ static int intel_sdvo_get_tv_modes(struct drm_connector *connector)
* Read the list of supported input resolutions for the selected TV
* format.
*/
- format_map = 1 << conn_state->tv.mode;
+ format_map = 1 << conn_state->tv.legacy_mode;
memcpy(&tv_res, &format_map,
min(sizeof(format_map), sizeof(struct intel_sdvo_sdtv_resolution_request)));
@@ -2363,7 +2363,7 @@ intel_sdvo_connector_atomic_get_property(struct drm_connector *connector,
int i;
for (i = 0; i < intel_sdvo_connector->format_supported_num; i++)
- if (state->tv.mode == intel_sdvo_connector->tv_format_supported[i]) {
+ if (state->tv.legacy_mode == intel_sdvo_connector->tv_format_supported[i]) {
*val = i;
return 0;
@@ -2419,7 +2419,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector,
struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(state);
if (property == intel_sdvo_connector->tv_format) {
- state->tv.mode = intel_sdvo_connector->tv_format_supported[val];
+ state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[val];
if (state->crtc) {
struct drm_crtc_state *crtc_state =
@@ -3076,7 +3076,7 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo,
drm_property_add_enum(intel_sdvo_connector->tv_format, i,
tv_format_names[intel_sdvo_connector->tv_format_supported[i]]);
- intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0];
+ intel_sdvo_connector->base.base.state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[0];
drm_object_attach_property(&intel_sdvo_connector->base.base.base,
intel_sdvo_connector->tv_format, 0);
return true;
diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index d4386cb3569e..992a725de751 100644
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -949,7 +949,7 @@ intel_disable_tv(struct intel_atomic_state *state,
static const struct tv_mode *intel_tv_mode_find(const struct drm_connector_state *conn_state)
{
- int format = conn_state->tv.mode;
+ int format = conn_state->tv.legacy_mode;
return &tv_modes[format];
}
@@ -1704,7 +1704,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
break;
}
- connector->state->tv.mode = i;
+ connector->state->tv.legacy_mode = i;
}
static int
@@ -1859,7 +1859,7 @@ static int intel_tv_atomic_check(struct drm_connector *connector,
old_state = drm_atomic_get_old_connector_state(state, connector);
new_crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc);
- if (old_state->tv.mode != new_state->tv.mode ||
+ if (old_state->tv.legacy_mode != new_state->tv.legacy_mode ||
old_state->tv.margins.left != new_state->tv.margins.left ||
old_state->tv.margins.right != new_state->tv.margins.right ||
old_state->tv.margins.top != new_state->tv.margins.top ||
@@ -1896,7 +1896,7 @@ static void intel_tv_add_properties(struct drm_connector *connector)
conn_state->tv.margins.right = 46;
conn_state->tv.margins.bottom = 37;
- conn_state->tv.mode = 0;
+ conn_state->tv.legacy_mode = 0;
/* Create TV properties then attach current values */
for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
@@ -1910,7 +1910,7 @@ static void intel_tv_add_properties(struct drm_connector *connector)
drm_object_attach_property(&connector->base,
i915->drm.mode_config.legacy_tv_mode_property,
- conn_state->tv.mode);
+ conn_state->tv.legacy_mode);
drm_object_attach_property(&connector->base,
i915->drm.mode_config.tv_left_margin_property,
conn_state->tv.margins.left);
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h
index 64f440fdc22b..8b21dc8e26d5 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h
@@ -51,8 +51,8 @@
#define DSCC_PICTURE_PARAMETER_SET_0 _MMIO(0x6BA00)
#define _DSCA_PPS_0 0x6B200
#define _DSCC_PPS_0 0x6BA00
-#define DSCA_PPS(pps) _MMIO(_DSCA_PPS_0 + (pps) * 4)
-#define DSCC_PPS(pps) _MMIO(_DSCC_PPS_0 + (pps) * 4)
+#define DSCA_PPS(pps) _MMIO(_DSCA_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4)
+#define DSCC_PPS(pps) _MMIO(_DSCC_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4)
#define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PB 0x78270
#define _ICL_DSC1_PICTURE_PARAMETER_SET_0_PB 0x78370
#define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PC 0x78470
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 1d3ebdf4069b..c08b67593565 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -379,6 +379,9 @@ i915_gem_userptr_release(struct drm_i915_gem_object *obj)
{
GEM_WARN_ON(obj->userptr.page_ref);
+ if (!obj->userptr.notifier.mm)
+ return;
+
mmu_interval_notifier_remove(&obj->userptr.notifier);
obj->userptr.notifier.mm = NULL;
}
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 90f6c1ece57d..efcb00472be2 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2849,8 +2849,7 @@ static int handle_mmio(struct intel_gvt_mmio_table_iter *iter, u32 offset,
for (i = start; i < end; i += 4) {
p = intel_gvt_find_mmio_info(gvt, i);
if (p) {
- WARN(1, "dup mmio definition offset %x\n",
- info->offset);
+ WARN(1, "dup mmio definition offset %x\n", i);
/* We return -EEXIST here to make GVT-g load fail.
* So duplicated MMIO can be found as soon as
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index e98b6d69a91a..9b6d87c8b583 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -41,7 +41,7 @@
* To virtualize GPU resources GVT-g driver depends on hypervisor technology
* e.g KVM/VFIO/mdev, Xen, etc. to provide resource access trapping capability
* and be virtualized within GVT-g device module. More architectural design
- * doc is available on https://01.org/group/2230/documentation-list.
+ * doc is available on https://github.com/intel/gvt-linux/wiki.
*/
static LIST_HEAD(intel_gvt_devices);
diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
index 2990dd4d4a0d..e14ac0ab1314 100644
--- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
+++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
@@ -3,6 +3,8 @@
* Copyright © 2021 Intel Corporation
*/
+#include <linux/jiffies.h>
+
//#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
#include "i915_drv.h"
@@ -12,7 +14,7 @@
#define REDUCED_TIMESLICE 5
#define REDUCED_PREEMPT 10
-#define WAIT_FOR_RESET_TIME 10000
+#define WAIT_FOR_RESET_TIME_MS 10000
struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt)
{
@@ -91,7 +93,7 @@ int intel_selftest_wait_for_rq(struct i915_request *rq)
{
long ret;
- ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME);
+ ret = i915_request_wait(rq, 0, msecs_to_jiffies(WAIT_FOR_RESET_TIME_MS));
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
index 3f73b211fa8e..3407450435e2 100644
--- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c
+++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
@@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv)
if (priv->encoders[MESON_ENC_CVBS]) {
meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS];
drm_bridge_remove(&meson_encoder_cvbs->bridge);
- drm_bridge_remove(meson_encoder_cvbs->next_bridge);
}
}
diff --git a/drivers/gpu/drm/meson/meson_encoder_dsi.c b/drivers/gpu/drm/meson/meson_encoder_dsi.c
index 3f93c70488ca..311b91630fbe 100644
--- a/drivers/gpu/drm/meson/meson_encoder_dsi.c
+++ b/drivers/gpu/drm/meson/meson_encoder_dsi.c
@@ -168,6 +168,5 @@ void meson_encoder_dsi_remove(struct meson_drm *priv)
if (priv->encoders[MESON_ENC_DSI]) {
meson_encoder_dsi = priv->encoders[MESON_ENC_DSI];
drm_bridge_remove(&meson_encoder_dsi->bridge);
- drm_bridge_remove(meson_encoder_dsi->next_bridge);
}
}
diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
index 25ea76558690..c4686568c9ca 100644
--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
@@ -474,6 +474,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv)
if (priv->encoders[MESON_ENC_HDMI]) {
meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI];
drm_bridge_remove(&meson_encoder_hdmi->bridge);
- drm_bridge_remove(meson_encoder_hdmi->next_bridge);
}
}
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index c0bc924cd302..c9c55e2ea584 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1287,7 +1287,7 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
gpu->ubwc_config.highest_bank_bit = 15;
if (adreno_is_a610(gpu)) {
- gpu->ubwc_config.highest_bank_bit = 14;
+ gpu->ubwc_config.highest_bank_bit = 13;
gpu->ubwc_config.min_acc_len = 1;
gpu->ubwc_config.ubwc_mode = 1;
}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 83380bc92a00..6a4b489d44e5 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -144,10 +144,6 @@ enum dpu_enc_rc_states {
* to track crtc in the disable() hook which is called
* _after_ encoder_mask is cleared.
* @connector: If a mode is set, cached pointer to the active connector
- * @crtc_kickoff_cb: Callback into CRTC that will flush & start
- * all CTL paths
- * @crtc_kickoff_cb_data: Opaque user data given to crtc_kickoff_cb
- * @debugfs_root: Debug file system root file node
* @enc_lock: Lock around physical encoder
* create/destroy/enable/disable
* @frame_busy_mask: Bitmask tracking which phys_enc we are still
@@ -2072,7 +2068,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc)
}
/* reset the merge 3D HW block */
- if (phys_enc->hw_pp->merge_3d) {
+ if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) {
phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d,
BLEND_3D_NONE);
if (phys_enc->hw_ctl->ops.update_pending_flush_merge_3d)
@@ -2103,7 +2099,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc)
if (phys_enc->hw_wb)
intf_cfg.wb = phys_enc->hw_wb->idx;
- if (phys_enc->hw_pp->merge_3d)
+ if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d)
intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx;
if (ctl->ops.reset_intf_cfg)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
index b58a9c2ae326..724537ab776d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -29,7 +29,6 @@ static inline bool reserved_by_other(uint32_t *res_map, int idx,
/**
* struct dpu_rm_requirements - Reservation requirements parameter bundle
* @topology: selected topology for the display
- * @hw_res: Hardware resources required as reported by the encoders
*/
struct dpu_rm_requirements {
struct msm_display_topology topology;
@@ -204,6 +203,8 @@ static bool _dpu_rm_needs_split_display(const struct msm_display_topology *top)
* _dpu_rm_get_lm_peer - get the id of a mixer which is a peer of the primary
* @rm: dpu resource manager handle
* @primary_idx: index of primary mixer in rm->mixer_blks[]
+ *
+ * Returns: lm peer mixed id on success or %-EINVAL on error
*/
static int _dpu_rm_get_lm_peer(struct dpu_rm *rm, int primary_idx)
{
diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index 77a8d9366ed7..fb588fde298a 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -135,11 +135,6 @@ static void dp_ctrl_config_ctrl(struct dp_ctrl_private *ctrl)
tbd = dp_link_get_test_bits_depth(ctrl->link,
ctrl->panel->dp_mode.bpp);
- if (tbd == DP_TEST_BIT_DEPTH_UNKNOWN) {
- pr_debug("BIT_DEPTH not set. Configure default\n");
- tbd = DP_TEST_BIT_DEPTH_8;
- }
-
config |= tbd << DP_CONFIGURATION_CTRL_BPC_SHIFT;
/* Num of Lanes */
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index d37d599aec27..4c72124ffb5d 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -329,10 +329,26 @@ static const struct component_ops dp_display_comp_ops = {
.unbind = dp_display_unbind,
};
+static void dp_display_send_hpd_event(struct msm_dp *dp_display)
+{
+ struct dp_display_private *dp;
+ struct drm_connector *connector;
+
+ dp = container_of(dp_display, struct dp_display_private, dp_display);
+
+ connector = dp->dp_display.connector;
+ drm_helper_hpd_irq_event(connector->dev);
+}
+
static int dp_display_send_hpd_notification(struct dp_display_private *dp,
bool hpd)
{
- struct drm_bridge *bridge = dp->dp_display.bridge;
+ if ((hpd && dp->dp_display.link_ready) ||
+ (!hpd && !dp->dp_display.link_ready)) {
+ drm_dbg_dp(dp->drm_dev, "HPD already %s\n",
+ (hpd ? "on" : "off"));
+ return 0;
+ }
/* reset video pattern flag on disconnect */
if (!hpd) {
@@ -348,7 +364,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp,
drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n",
dp->dp_display.connector_type, hpd);
- drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready);
+ dp_display_send_hpd_event(&dp->dp_display);
return 0;
}
diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c
index 98427d45e9a7..49dfac1fd1ef 100644
--- a/drivers/gpu/drm/msm/dp/dp_link.c
+++ b/drivers/gpu/drm/msm/dp/dp_link.c
@@ -7,6 +7,7 @@
#include <drm/drm_print.h>
+#include "dp_reg.h"
#include "dp_link.h"
#include "dp_panel.h"
@@ -1082,7 +1083,7 @@ int dp_link_process_request(struct dp_link *dp_link)
int dp_link_get_colorimetry_config(struct dp_link *dp_link)
{
- u32 cc;
+ u32 cc = DP_MISC0_COLORIMERY_CFG_LEGACY_RGB;
struct dp_link_private *link;
if (!dp_link) {
@@ -1096,10 +1097,11 @@ int dp_link_get_colorimetry_config(struct dp_link *dp_link)
* Unless a video pattern CTS test is ongoing, use RGB_VESA
* Only RGB_VESA and RGB_CEA supported for now
*/
- if (dp_link_is_video_pattern_requested(link))
- cc = link->dp_link.test_video.test_dyn_range;
- else
- cc = DP_TEST_DYNAMIC_RANGE_VESA;
+ if (dp_link_is_video_pattern_requested(link)) {
+ if (link->dp_link.test_video.test_dyn_range &
+ DP_TEST_DYNAMIC_RANGE_CEA)
+ cc = DP_MISC0_COLORIMERY_CFG_CEA_RGB;
+ }
return cc;
}
@@ -1179,6 +1181,9 @@ void dp_link_reset_phy_params_vx_px(struct dp_link *dp_link)
u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp)
{
u32 tbd;
+ struct dp_link_private *link;
+
+ link = container_of(dp_link, struct dp_link_private, dp_link);
/*
* Few simplistic rules and assumptions made here:
@@ -1196,12 +1201,13 @@ u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp)
tbd = DP_TEST_BIT_DEPTH_10;
break;
default:
- tbd = DP_TEST_BIT_DEPTH_UNKNOWN;
+ drm_dbg_dp(link->drm_dev, "bpp=%d not supported, use bpc=8\n",
+ bpp);
+ tbd = DP_TEST_BIT_DEPTH_8;
break;
}
- if (tbd != DP_TEST_BIT_DEPTH_UNKNOWN)
- tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT);
+ tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT);
return tbd;
}
diff --git a/drivers/gpu/drm/msm/dp/dp_reg.h b/drivers/gpu/drm/msm/dp/dp_reg.h
index ea85a691e72b..78785ed4b40c 100644
--- a/drivers/gpu/drm/msm/dp/dp_reg.h
+++ b/drivers/gpu/drm/msm/dp/dp_reg.h
@@ -143,6 +143,9 @@
#define DP_MISC0_COLORIMETRY_CFG_SHIFT (0x00000001)
#define DP_MISC0_TEST_BITS_DEPTH_SHIFT (0x00000005)
+#define DP_MISC0_COLORIMERY_CFG_LEGACY_RGB (0)
+#define DP_MISC0_COLORIMERY_CFG_CEA_RGB (0x04)
+
#define REG_DP_VALID_BOUNDARY (0x00000030)
#define REG_DP_VALID_BOUNDARY_2 (0x00000034)
diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
index 5f68e31a3e4e..0915f3b68752 100644
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c
@@ -26,7 +26,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
{
void *vaddr;
- vaddr = msm_gem_get_vaddr(obj);
+ vaddr = msm_gem_get_vaddr_locked(obj);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
iosys_map_set_vaddr(map, vaddr);
@@ -36,7 +36,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
{
- msm_gem_put_vaddr(obj);
+ msm_gem_put_vaddr_locked(obj);
}
struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 095390774f22..655002b21b0d 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -751,12 +751,14 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned long flags;
- pm_runtime_get_sync(&gpu->pdev->dev);
+ WARN_ON(!mutex_is_locked(&gpu->lock));
- mutex_lock(&gpu->lock);
+ pm_runtime_get_sync(&gpu->pdev->dev);
msm_gpu_hw_init(gpu);
+ submit->seqno = submit->hw_fence->seqno;
+
update_sw_cntrs(gpu);
/*
@@ -781,11 +783,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
gpu->funcs->submit(gpu, submit);
gpu->cur_ctx_seqno = submit->queue->ctx->seqno;
- hangcheck_timer_reset(gpu);
-
- mutex_unlock(&gpu->lock);
-
pm_runtime_put(&gpu->pdev->dev);
+ hangcheck_timer_reset(gpu);
}
/*
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 5cc8d358cc97..d5512037c38b 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -21,6 +21,8 @@ struct msm_iommu_pagetable {
struct msm_mmu base;
struct msm_mmu *parent;
struct io_pgtable_ops *pgtbl_ops;
+ const struct iommu_flush_ops *tlb;
+ struct device *iommu_dev;
unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */
phys_addr_t ttbr;
u32 asid;
@@ -201,11 +203,33 @@ static const struct msm_mmu_funcs pagetable_funcs = {
static void msm_iommu_tlb_flush_all(void *cookie)
{
+ struct msm_iommu_pagetable *pagetable = cookie;
+ struct adreno_smmu_priv *adreno_smmu;
+
+ if (!pm_runtime_get_if_in_use(pagetable->iommu_dev))
+ return;
+
+ adreno_smmu = dev_get_drvdata(pagetable->parent->dev);
+
+ pagetable->tlb->tlb_flush_all((void *)adreno_smmu->cookie);
+
+ pm_runtime_put_autosuspend(pagetable->iommu_dev);
}
static void msm_iommu_tlb_flush_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
+ struct msm_iommu_pagetable *pagetable = cookie;
+ struct adreno_smmu_priv *adreno_smmu;
+
+ if (!pm_runtime_get_if_in_use(pagetable->iommu_dev))
+ return;
+
+ adreno_smmu = dev_get_drvdata(pagetable->parent->dev);
+
+ pagetable->tlb->tlb_flush_walk(iova, size, granule, (void *)adreno_smmu->cookie);
+
+ pm_runtime_put_autosuspend(pagetable->iommu_dev);
}
static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather,
@@ -213,7 +237,7 @@ static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather,
{
}
-static const struct iommu_flush_ops null_tlb_ops = {
+static const struct iommu_flush_ops tlb_ops = {
.tlb_flush_all = msm_iommu_tlb_flush_all,
.tlb_flush_walk = msm_iommu_tlb_flush_walk,
.tlb_add_page = msm_iommu_tlb_add_page,
@@ -254,10 +278,10 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
/* The incoming cfg will have the TTBR1 quirk enabled */
ttbr0_cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1;
- ttbr0_cfg.tlb = &null_tlb_ops;
+ ttbr0_cfg.tlb = &tlb_ops;
pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1,
- &ttbr0_cfg, iommu->domain);
+ &ttbr0_cfg, pagetable);
if (!pagetable->pgtbl_ops) {
kfree(pagetable);
@@ -279,6 +303,8 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
/* Needed later for TLB flush */
pagetable->parent = parent;
+ pagetable->tlb = ttbr1_cfg->tlb;
+ pagetable->iommu_dev = ttbr1_cfg->iommu_dev;
pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap;
pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr;
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 455b2e3a0cdd..35423d10aafa 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -562,6 +562,7 @@ static const struct msm_mdss_data sdm670_data = {
.ubwc_enc_version = UBWC_2_0,
.ubwc_dec_version = UBWC_2_0,
.highest_bank_bit = 1,
+ .reg_bus_bw = 76800,
};
static const struct msm_mdss_data sdm845_data = {
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 4bc13f7d005a..9d6655f96f0c 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -21,8 +21,6 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
msm_fence_init(submit->hw_fence, fctx);
- submit->seqno = submit->hw_fence->seqno;
-
mutex_lock(&priv->lru.lock);
for (i = 0; i < submit->nr_bos; i++) {
@@ -35,8 +33,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
mutex_unlock(&priv->lru.lock);
+ /* TODO move submit path over to using a per-ring lock.. */
+ mutex_lock(&gpu->lock);
+
msm_gpu_submit(gpu, submit);
+ mutex_unlock(&gpu->lock);
+
return dma_fence_get(submit->hw_fence);
}
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index 1e6aaf95ff7c..ceef470c9fbf 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -100,3 +100,11 @@ config DRM_NOUVEAU_SVM
help
Say Y here if you want to enable experimental support for
Shared Virtual Memory (SVM).
+
+config DRM_NOUVEAU_GSP_DEFAULT
+ bool "Use GSP firmware for Turing/Ampere (needs firmware installed)"
+ depends on DRM_NOUVEAU
+ default n
+ help
+ Say Y here if you want to use the GSP codepaths by default on
+ Turing and Ampere GPUs.
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
index 0d9fc741a719..932c9fd0b2d8 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
@@ -11,6 +11,7 @@ struct nvkm_client {
u32 debug;
struct rb_root objroot;
+ spinlock_t obj_lock;
void *data;
int (*event)(u64 token, void *argv, u32 argc);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index d1437c08645f..6f5d376d8fcc 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -9,7 +9,7 @@
#define GSP_PAGE_SIZE BIT(GSP_PAGE_SHIFT)
struct nvkm_gsp_mem {
- u32 size;
+ size_t size;
void *data;
dma_addr_t addr;
};
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index a04156ca8390..80f74ee0fc78 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -128,12 +128,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
struct nouveau_abi16_ntfy *ntfy, *temp;
/* Cancel all jobs from the entity's queue. */
- drm_sched_entity_fini(&chan->sched.entity);
+ if (chan->sched)
+ drm_sched_entity_fini(&chan->sched->entity);
if (chan->chan)
nouveau_channel_idle(chan->chan);
- nouveau_sched_fini(&chan->sched);
+ if (chan->sched)
+ nouveau_sched_destroy(&chan->sched);
/* cleanup notifier state */
list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) {
@@ -197,6 +199,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
struct nouveau_cli *cli = nouveau_cli(file_priv);
struct nouveau_drm *drm = nouveau_drm(dev);
struct nvif_device *device = &drm->client.device;
+ struct nvkm_device *nvkm_device = nvxx_device(&drm->client.device);
struct nvkm_gr *gr = nvxx_gr(device);
struct drm_nouveau_getparam *getparam = data;
struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -261,6 +264,14 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
getparam->value = nouveau_exec_push_max_from_ib_max(ib_max);
break;
}
+ case NOUVEAU_GETPARAM_VRAM_BAR_SIZE:
+ getparam->value = nvkm_device->func->resource_size(nvkm_device, 1);
+ break;
+ case NOUVEAU_GETPARAM_VRAM_USED: {
+ struct ttm_resource_manager *vram_mgr = ttm_manager_type(&drm->ttm.bdev, TTM_PL_VRAM);
+ getparam->value = (u64)ttm_resource_manager_usage(vram_mgr);
+ break;
+ }
default:
NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param);
return -EINVAL;
@@ -337,10 +348,16 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
if (ret)
goto done;
- ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq,
- chan->chan->dma.ib_max);
- if (ret)
- goto done;
+ /* If we're not using the VM_BIND uAPI, we don't need a scheduler.
+ *
+ * The client lock is already acquired by nouveau_abi16_get().
+ */
+ if (nouveau_cli_uvmm(cli)) {
+ ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq,
+ chan->chan->dma.ib_max);
+ if (ret)
+ goto done;
+ }
init->channel = chan->chan->chid;
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index 1f5e243c0c75..11c8c4a80079 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -26,7 +26,7 @@ struct nouveau_abi16_chan {
struct nouveau_bo *ntfy;
struct nouveau_vma *ntfy_vma;
struct nvkm_mm heap;
- struct nouveau_sched sched;
+ struct nouveau_sched *sched;
};
struct nouveau_abi16 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 6f6c31a9937b..a947e1d5f309 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -201,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli)
WARN_ON(!list_empty(&cli->worker));
usif_client_fini(cli);
- nouveau_sched_fini(&cli->sched);
+ if (cli->sched)
+ nouveau_sched_destroy(&cli->sched);
if (uvmm)
nouveau_uvmm_fini(uvmm);
nouveau_vmm_fini(&cli->svm);
@@ -311,7 +312,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
cli->mem = &mems[ret];
/* Don't pass in the (shared) sched_wq in order to let
- * nouveau_sched_init() create a dedicated one for VM_BIND jobs.
+ * nouveau_sched_create() create a dedicated one for VM_BIND jobs.
*
* This is required to ensure that for VM_BIND jobs free_job() work and
* run_job() work can always run concurrently and hence, free_job() work
@@ -320,7 +321,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
* locks which indirectly or directly are held for allocations
* elsewhere.
*/
- ret = nouveau_sched_init(&cli->sched, drm, NULL, 1);
+ ret = nouveau_sched_create(&cli->sched, drm, NULL, 1);
if (ret)
goto done;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 8a6d94c8b163..e239c6bf4afa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -98,7 +98,7 @@ struct nouveau_cli {
bool disabled;
} uvmm;
- struct nouveau_sched sched;
+ struct nouveau_sched *sched;
const struct nvif_mclass *mem;
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
index bc5d71b79ab2..e65c0ef23bc7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -389,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev,
if (ret)
goto out;
- args.sched = &chan16->sched;
+ args.sched = chan16->sched;
args.file_priv = file_priv;
args.chan = chan;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 5057d976fa57..93f08f9479d8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fence *fence)
if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
- if (atomic_dec_and_test(&fctx->notify_ref))
+ if (!--fctx->notify_ref)
drop = 1;
}
@@ -103,7 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
void
nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
{
- cancel_work_sync(&fctx->allow_block_work);
+ cancel_work_sync(&fctx->uevent_work);
nouveau_fence_context_kill(fctx, 0);
nvif_event_dtor(&fctx->event);
fctx->dead = 1;
@@ -146,12 +146,13 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc
return drop;
}
-static int
-nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
+static void
+nouveau_fence_uevent_work(struct work_struct *work)
{
- struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
+ struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
+ uevent_work);
unsigned long flags;
- int ret = NVIF_EVENT_KEEP;
+ int drop = 0;
spin_lock_irqsave(&fctx->lock, flags);
if (!list_empty(&fctx->pending)) {
@@ -161,23 +162,20 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc
fence = list_entry(fctx->pending.next, typeof(*fence), head);
chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
if (nouveau_fence_update(chan, fctx))
- ret = NVIF_EVENT_DROP;
+ drop = 1;
}
- spin_unlock_irqrestore(&fctx->lock, flags);
+ if (drop)
+ nvif_event_block(&fctx->event);
- return ret;
+ spin_unlock_irqrestore(&fctx->lock, flags);
}
-static void
-nouveau_fence_work_allow_block(struct work_struct *work)
+static int
+nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
{
- struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
- allow_block_work);
-
- if (atomic_read(&fctx->notify_ref) == 0)
- nvif_event_block(&fctx->event);
- else
- nvif_event_allow(&fctx->event);
+ struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
+ schedule_work(&fctx->uevent_work);
+ return NVIF_EVENT_KEEP;
}
void
@@ -191,7 +189,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
} args;
int ret;
- INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block);
+ INIT_WORK(&fctx->uevent_work, nouveau_fence_uevent_work);
INIT_LIST_HEAD(&fctx->flip);
INIT_LIST_HEAD(&fctx->pending);
spin_lock_init(&fctx->lock);
@@ -535,19 +533,15 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f)
struct nouveau_fence *fence = from_fence(f);
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
bool ret;
- bool do_work;
- if (atomic_inc_return(&fctx->notify_ref) == 0)
- do_work = true;
+ if (!fctx->notify_ref++)
+ nvif_event_allow(&fctx->event);
ret = nouveau_fence_no_signaling(f);
if (ret)
set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags);
- else if (atomic_dec_and_test(&fctx->notify_ref))
- do_work = true;
-
- if (do_work)
- schedule_work(&fctx->allow_block_work);
+ else if (!--fctx->notify_ref)
+ nvif_event_block(&fctx->event);
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 28f5cf013b89..8bc065acfe35 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -3,7 +3,6 @@
#define __NOUVEAU_FENCE_H__
#include <linux/dma-fence.h>
-#include <linux/workqueue.h>
#include <nvif/event.h>
struct nouveau_drm;
@@ -45,10 +44,9 @@ struct nouveau_fence_chan {
u32 context;
char name[32];
+ struct work_struct uevent_work;
struct nvif_event event;
- struct work_struct allow_block_work;
- atomic_t notify_ref;
- int dead, killed;
+ int notify_ref, dead, killed;
};
struct nouveau_fence_priv {
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 49c2bcbef129..5a887d67dc0e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -764,7 +764,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
return -ENOMEM;
if (unlikely(nouveau_cli_uvmm(cli)))
- return -ENOSYS;
+ return nouveau_abi16_put(abi16, -ENOSYS);
list_for_each_entry(temp, &abi16->channels, head) {
if (temp->chan->chid == req->channel) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index dd98f6910f9c..32fa2e273965 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -398,7 +398,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops = {
.free_job = nouveau_sched_free_job,
};
-int
+static int
nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
struct workqueue_struct *wq, u32 credit_limit)
{
@@ -453,7 +453,30 @@ fail_wq:
return ret;
}
-void
+int
+nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
+ struct workqueue_struct *wq, u32 credit_limit)
+{
+ struct nouveau_sched *sched;
+ int ret;
+
+ sched = kzalloc(sizeof(*sched), GFP_KERNEL);
+ if (!sched)
+ return -ENOMEM;
+
+ ret = nouveau_sched_init(sched, drm, wq, credit_limit);
+ if (ret) {
+ kfree(sched);
+ return ret;
+ }
+
+ *psched = sched;
+
+ return 0;
+}
+
+
+static void
nouveau_sched_fini(struct nouveau_sched *sched)
{
struct drm_gpu_scheduler *drm_sched = &sched->base;
@@ -471,3 +494,14 @@ nouveau_sched_fini(struct nouveau_sched *sched)
if (sched->wq)
destroy_workqueue(sched->wq);
}
+
+void
+nouveau_sched_destroy(struct nouveau_sched **psched)
+{
+ struct nouveau_sched *sched = *psched;
+
+ nouveau_sched_fini(sched);
+ kfree(sched);
+
+ *psched = NULL;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h
index a6528f5981e6..e1f01a23e6f6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -111,8 +111,8 @@ struct nouveau_sched {
} job;
};
-int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
- struct workqueue_struct *wq, u32 credit_limit);
-void nouveau_sched_fini(struct nouveau_sched *sched);
+int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
+ struct workqueue_struct *wq, u32 credit_limit);
+void nouveau_sched_destroy(struct nouveau_sched **psched);
#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index cc03e0c22ff3..5e4565c5011a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -1011,7 +1011,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
if (ret)
return ret;
- buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL);
+ buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL);
if (!buffer->fault)
return -ENOMEM;
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 4f223c972c6a..0a0a11dc9ec0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1740,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
if (ret)
return ret;
- args.sched = &cli->sched;
+ args.sched = cli->sched;
args.file_priv = file_priv;
ret = nouveau_uvmm_vm_bind(&args);
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/client.c b/drivers/gpu/drm/nouveau/nvkm/core/client.c
index ebdeb8eb9e77..c55662937ab2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/client.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/client.c
@@ -180,6 +180,7 @@ nvkm_client_new(const char *name, u64 device, const char *cfg, const char *dbg,
client->device = device;
client->debug = nvkm_dbgopt(dbg, "CLIENT");
client->objroot = RB_ROOT;
+ spin_lock_init(&client->obj_lock);
client->event = event;
INIT_LIST_HEAD(&client->umem);
spin_lock_init(&client->lock);
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c
index 7c554c14e884..aea3ba72027a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/object.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c
@@ -30,8 +30,10 @@ nvkm_object_search(struct nvkm_client *client, u64 handle,
const struct nvkm_object_func *func)
{
struct nvkm_object *object;
+ unsigned long flags;
if (handle) {
+ spin_lock_irqsave(&client->obj_lock, flags);
struct rb_node *node = client->objroot.rb_node;
while (node) {
object = rb_entry(node, typeof(*object), node);
@@ -40,9 +42,12 @@ nvkm_object_search(struct nvkm_client *client, u64 handle,
else
if (handle > object->object)
node = node->rb_right;
- else
+ else {
+ spin_unlock_irqrestore(&client->obj_lock, flags);
goto done;
+ }
}
+ spin_unlock_irqrestore(&client->obj_lock, flags);
return ERR_PTR(-ENOENT);
} else {
object = &client->object;
@@ -57,30 +62,39 @@ done:
void
nvkm_object_remove(struct nvkm_object *object)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&object->client->obj_lock, flags);
if (!RB_EMPTY_NODE(&object->node))
rb_erase(&object->node, &object->client->objroot);
+ spin_unlock_irqrestore(&object->client->obj_lock, flags);
}
bool
nvkm_object_insert(struct nvkm_object *object)
{
- struct rb_node **ptr = &object->client->objroot.rb_node;
+ struct rb_node **ptr;
struct rb_node *parent = NULL;
+ unsigned long flags;
+ spin_lock_irqsave(&object->client->obj_lock, flags);
+ ptr = &object->client->objroot.rb_node;
while (*ptr) {
struct nvkm_object *this = rb_entry(*ptr, typeof(*this), node);
parent = *ptr;
- if (object->object < this->object)
+ if (object->object < this->object) {
ptr = &parent->rb_left;
- else
- if (object->object > this->object)
+ } else if (object->object > this->object) {
ptr = &parent->rb_right;
- else
+ } else {
+ spin_unlock_irqrestore(&object->client->obj_lock, flags);
return false;
+ }
}
rb_link_node(&object->node, parent, ptr);
rb_insert_color(&object->node, &object->client->objroot);
+ spin_unlock_irqrestore(&object->client->obj_lock, flags);
return true;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
index 4135690326f4..3a30bea30e36 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
@@ -168,12 +168,11 @@ r535_bar_new_(const struct nvkm_bar_func *hw, struct nvkm_device *device,
rm->flush = r535_bar_flush;
ret = gf100_bar_new_(rm, device, type, inst, &bar);
- *pbar = bar;
if (ret) {
- if (!bar)
- kfree(rm);
+ kfree(rm);
return ret;
}
+ *pbar = bar;
bar->flushBAR2PhysMode = ioremap(device->func->resource_addr(device, 3), PAGE_SIZE);
if (!bar->flushBAR2PhysMode)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
index 19188683c8fc..8c2bf1c16f2a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
@@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name)
return (void *)fw;
}
+static void
+shadow_fw_release(void *fw)
+{
+ release_firmware(fw);
+}
+
static const struct nvbios_source
shadow_fw = {
.name = "firmware",
.init = shadow_fw_init,
- .fini = (void(*)(void *))release_firmware,
+ .fini = shadow_fw_release,
.read = shadow_fw_read,
.rw = false,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 9ee58e2a0eb2..a73a5b589790 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -997,6 +997,32 @@ r535_gsp_rpc_get_gsp_static_info(struct nvkm_gsp *gsp)
return 0;
}
+static void
+nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
+{
+ if (mem->data) {
+ /*
+ * Poison the buffer to catch any unexpected access from
+ * GSP-RM if the buffer was prematurely freed.
+ */
+ memset(mem->data, 0xFF, mem->size);
+
+ dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr);
+ memset(mem, 0, sizeof(*mem));
+ }
+}
+
+static int
+nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, size_t size, struct nvkm_gsp_mem *mem)
+{
+ mem->size = size;
+ mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL);
+ if (WARN_ON(!mem->data))
+ return -ENOMEM;
+
+ return 0;
+}
+
static int
r535_gsp_postinit(struct nvkm_gsp *gsp)
{
@@ -1024,6 +1050,11 @@ r535_gsp_postinit(struct nvkm_gsp *gsp)
nvkm_inth_allow(&gsp->subdev.inth);
nvkm_wr32(device, 0x110004, 0x00000040);
+
+ /* Release the DMA buffers that were needed only for boot and init */
+ nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw);
+ nvkm_gsp_mem_dtor(gsp, &gsp->libos);
+
return ret;
}
@@ -1078,7 +1109,6 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
if (IS_ERR(rpc))
return PTR_ERR(rpc);
- rpc->size = sizeof(*rpc);
rpc->numEntries = NV_GSP_REG_NUM_ENTRIES;
str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]);
@@ -1094,6 +1124,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
strings += name_len;
str_offset += name_len;
}
+ rpc->size = str_offset;
return nvkm_gsp_rpc_wr(gsp, rpc, false);
}
@@ -1532,27 +1563,6 @@ r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc)
return 0;
}
-static void
-nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
-{
- if (mem->data) {
- dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr);
- mem->data = NULL;
- }
-}
-
-static int
-nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, u32 size, struct nvkm_gsp_mem *mem)
-{
- mem->size = size;
- mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL);
- if (WARN_ON(!mem->data))
- return -ENOMEM;
-
- return 0;
-}
-
-
static int
r535_gsp_booter_unload(struct nvkm_gsp *gsp, u32 mbox0, u32 mbox1)
{
@@ -1938,20 +1948,20 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3)
* See kgspCreateRadix3_IMPL
*/
static int
-nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size,
+nvkm_gsp_radix3_sg(struct nvkm_gsp *gsp, struct sg_table *sgt, u64 size,
struct nvkm_gsp_radix3 *rx3)
{
u64 addr;
for (int i = ARRAY_SIZE(rx3->mem) - 1; i >= 0; i--) {
u64 *ptes;
- int idx;
+ size_t bufsize;
+ int ret, idx;
- rx3->mem[i].size = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE);
- rx3->mem[i].data = dma_alloc_coherent(device->dev, rx3->mem[i].size,
- &rx3->mem[i].addr, GFP_KERNEL);
- if (WARN_ON(!rx3->mem[i].data))
- return -ENOMEM;
+ bufsize = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE);
+ ret = nvkm_gsp_mem_ctor(gsp, bufsize, &rx3->mem[i]);
+ if (ret)
+ return ret;
ptes = rx3->mem[i].data;
if (i == 2) {
@@ -1991,7 +2001,7 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend)
if (ret)
return ret;
- ret = nvkm_gsp_radix3_sg(gsp->subdev.device, &gsp->sr.sgt, len, &gsp->sr.radix3);
+ ret = nvkm_gsp_radix3_sg(gsp, &gsp->sr.sgt, len, &gsp->sr.radix3);
if (ret)
return ret;
@@ -2150,6 +2160,13 @@ r535_gsp_dtor(struct nvkm_gsp *gsp)
mutex_destroy(&gsp->cmdq.mutex);
r535_gsp_dtor_fws(gsp);
+
+ nvkm_gsp_mem_dtor(gsp, &gsp->rmargs);
+ nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta);
+ nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem);
+ nvkm_gsp_mem_dtor(gsp, &gsp->loginit);
+ nvkm_gsp_mem_dtor(gsp, &gsp->logintr);
+ nvkm_gsp_mem_dtor(gsp, &gsp->logrm);
}
int
@@ -2194,7 +2211,7 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp)
memcpy(gsp->sig.data, data, size);
/* Build radix3 page table for ELF image. */
- ret = nvkm_gsp_radix3_sg(device, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3);
+ ret = nvkm_gsp_radix3_sg(gsp, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3);
if (ret)
return ret;
@@ -2295,8 +2312,12 @@ r535_gsp_load(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif)
{
struct nvkm_subdev *subdev = &gsp->subdev;
int ret;
+ bool enable_gsp = fwif->enable;
- if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", fwif->enable))
+#if IS_ENABLED(CONFIG_DRM_NOUVEAU_GSP_DEFAULT)
+ enable_gsp = true;
+#endif
+ if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", enable_gsp))
return -EINVAL;
if ((ret = r535_gsp_load_fw(gsp, "gsp", fwif->ver, &gsp->fws.rm)) ||
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index dad938cf6dec..8f3783742208 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -539,6 +539,8 @@ config DRM_PANEL_RAYDIUM_RM692E5
depends on OF
depends on DRM_MIPI_DSI
depends on BACKLIGHT_CLASS_DEVICE
+ select DRM_DISPLAY_DP_HELPER
+ select DRM_DISPLAY_HELPER
help
Say Y here if you want to enable support for Raydium RM692E5-based
display panels, such as the one found in the Fairphone 5 smartphone.
diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
index c4c0f08e9202..4945a1e787eb 100644
--- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
+++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
@@ -1768,11 +1768,11 @@ static const struct panel_desc starry_qfh032011_53g_desc = {
};
static const struct drm_display_mode starry_himax83102_j02_default_mode = {
- .clock = 162850,
+ .clock = 162680,
.hdisplay = 1200,
- .hsync_start = 1200 + 50,
- .hsync_end = 1200 + 50 + 20,
- .htotal = 1200 + 50 + 20 + 50,
+ .hsync_start = 1200 + 60,
+ .hsync_end = 1200 + 60 + 20,
+ .htotal = 1200 + 60 + 20 + 40,
.vdisplay = 1920,
.vsync_start = 1920 + 116,
.vsync_end = 1920 + 116 + 8,
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c
index ea5a85779382..f23d8832a1ad 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c
@@ -309,7 +309,7 @@ static const struct s6d7aa0_panel_desc s6d7aa0_lsl080al02_desc = {
.off_func = s6d7aa0_lsl080al02_off,
.drm_mode = &s6d7aa0_lsl080al02_mode,
.mode_flags = MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_NO_HFP,
- .bus_flags = DRM_BUS_FLAG_DE_HIGH,
+ .bus_flags = 0,
.has_backlight = false,
.use_passwd3 = false,
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 2214cb09678c..d493ee735c73 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -3948,6 +3948,7 @@ static const struct panel_desc tianma_tm070jdhg30 = {
},
.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
.connector_type = DRM_MODE_CONNECTOR_LVDS,
+ .bus_flags = DRM_BUS_FLAG_DE_HIGH,
};
static const struct panel_desc tianma_tm070jvhg33 = {
@@ -3960,6 +3961,7 @@ static const struct panel_desc tianma_tm070jvhg33 = {
},
.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
.connector_type = DRM_MODE_CONNECTOR_LVDS,
+ .bus_flags = DRM_BUS_FLAG_DE_HIGH,
};
static const struct display_timing tianma_tm070rvhg71_timing = {
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 85b3b4871a1d..fdd768bbd487 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1985,8 +1985,10 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
}
- if (!clock)
+ if (!clock) {
+ vop2_unlock(vop2);
return;
+ }
if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
!(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT))
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 550492a7a031..d442b893275b 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1184,14 +1184,16 @@ static void drm_sched_run_job_work(struct work_struct *w)
if (READ_ONCE(sched->pause_submit))
return;
+ /* Find entity with a ready job */
entity = drm_sched_select_entity(sched);
if (!entity)
- return;
+ return; /* No more work */
sched_job = drm_sched_entity_pop_job(entity);
if (!sched_job) {
complete_all(&entity->entity_idle);
- return; /* No more work */
+ drm_sched_run_job_queue(sched);
+ return;
}
s_fence = sched_job->s_fence;
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index ff36171c8fb7..03d1c76aec2d 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -960,7 +960,8 @@ int host1x_client_iommu_attach(struct host1x_client *client)
* not the shared IOMMU domain, don't try to attach it to a different
* domain. This allows using the IOMMU-backed DMA API.
*/
- if (domain && domain != tegra->domain)
+ if (domain && domain->type != IOMMU_DOMAIN_IDENTITY &&
+ domain != tegra->domain)
return 0;
if (tegra->domain) {
@@ -1242,9 +1243,26 @@ static int host1x_drm_probe(struct host1x_device *dev)
drm_mode_config_reset(drm);
- err = drm_aperture_remove_framebuffers(&tegra_drm_driver);
- if (err < 0)
- goto hub;
+ /*
+ * Only take over from a potential firmware framebuffer if any CRTCs
+ * have been registered. This must not be a fatal error because there
+ * are other accelerators that are exposed via this driver.
+ *
+ * Another case where this happens is on Tegra234 where the display
+ * hardware is no longer part of the host1x complex, so this driver
+ * will not expose any modesetting features.
+ */
+ if (drm->mode_config.num_crtc > 0) {
+ err = drm_aperture_remove_framebuffers(&tegra_drm_driver);
+ if (err < 0)
+ goto hub;
+ } else {
+ /*
+ * Indicate to userspace that this doesn't expose any display
+ * capabilities.
+ */
+ drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
+ }
err = drm_dev_register(drm, 0);
if (err < 0)
diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..e48863a44556 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,16 +8,308 @@
#include <linux/prime_numbers.h>
#include <linux/sched/signal.h>
+#include <linux/sizes.h>
#include <drm/drm_buddy.h>
#include "../lib/drm_random.h"
+static unsigned int random_seed;
+
static inline u64 get_size(int order, u64 chunk_size)
{
return (1 << order) * chunk_size;
}
+static void drm_test_buddy_alloc_range_bias(struct kunit *test)
+{
+ u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
+ DRM_RND_STATE(prng, random_seed);
+ unsigned int i, count, *order;
+ struct drm_buddy mm;
+ LIST_HEAD(allocated);
+
+ bias_size = SZ_1M;
+ ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size);
+ ps = max(SZ_4K, ps);
+ mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
+
+ kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
+
+ KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+ "buddy_init failed\n");
+
+ count = mm_size / bias_size;
+ order = drm_random_order(count, &prng);
+ KUNIT_EXPECT_TRUE(test, order);
+
+ /*
+ * Idea is to split the address space into uniform bias ranges, and then
+ * in some random order allocate within each bias, using various
+ * patterns within. This should detect if allocations leak out from a
+ * given bias, for example.
+ */
+
+ for (i = 0; i < count; i++) {
+ LIST_HEAD(tmp);
+ u32 size;
+
+ bias_start = order[i] * bias_size;
+ bias_end = bias_start + bias_size;
+ bias_rem = bias_size;
+
+ /* internal round_up too big */
+ KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start,
+ bias_end, bias_size + ps, bias_size,
+ &allocated,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+ bias_start, bias_end, bias_size, bias_size);
+
+ /* size too big */
+ KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start,
+ bias_end, bias_size + ps, ps,
+ &allocated,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+ bias_start, bias_end, bias_size + ps, ps);
+
+ /* bias range too small for size */
+ KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start + ps,
+ bias_end, bias_size, ps,
+ &allocated,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+ bias_start + ps, bias_end, bias_size, ps);
+
+ /* bias misaligned */
+ KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start + ps,
+ bias_end - ps,
+ bias_size >> 1, bias_size >> 1,
+ &allocated,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+ bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1);
+
+ /* single big page */
+ KUNIT_ASSERT_FALSE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start,
+ bias_end, bias_size, bias_size,
+ &tmp,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n",
+ bias_start, bias_end, bias_size, bias_size);
+ drm_buddy_free_list(&mm, &tmp);
+
+ /* single page with internal round_up */
+ KUNIT_ASSERT_FALSE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start,
+ bias_end, ps, bias_size,
+ &tmp,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+ bias_start, bias_end, ps, bias_size);
+ drm_buddy_free_list(&mm, &tmp);
+
+ /* random size within */
+ size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+ if (size)
+ KUNIT_ASSERT_FALSE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start,
+ bias_end, size, ps,
+ &tmp,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+ bias_start, bias_end, size, ps);
+
+ bias_rem -= size;
+ /* too big for current avail */
+ KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start,
+ bias_end, bias_rem + ps, ps,
+ &allocated,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+ bias_start, bias_end, bias_rem + ps, ps);
+
+ if (bias_rem) {
+ /* random fill of the remainder */
+ size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+ size = max(size, ps);
+
+ KUNIT_ASSERT_FALSE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start,
+ bias_end, size, ps,
+ &allocated,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+ bias_start, bias_end, size, ps);
+ /*
+ * Intentionally allow some space to be left
+ * unallocated, and ideally not always on the bias
+ * boundaries.
+ */
+ drm_buddy_free_list(&mm, &tmp);
+ } else {
+ list_splice_tail(&tmp, &allocated);
+ }
+ }
+
+ kfree(order);
+ drm_buddy_free_list(&mm, &allocated);
+ drm_buddy_fini(&mm);
+
+ /*
+ * Something more free-form. Idea is to pick a random starting bias
+ * range within the address space and then start filling it up. Also
+ * randomly grow the bias range in both directions as we go along. This
+ * should give us bias start/end which is not always uniform like above,
+ * and in some cases will require the allocator to jump over already
+ * allocated nodes in the middle of the address space.
+ */
+
+ KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+ "buddy_init failed\n");
+
+ bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
+ bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
+ bias_end = max(bias_end, bias_start + ps);
+ bias_rem = bias_end - bias_start;
+
+ do {
+ u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+
+ KUNIT_ASSERT_FALSE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start,
+ bias_end, size, ps,
+ &allocated,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+ bias_start, bias_end, size, ps);
+ bias_rem -= size;
+
+ /*
+ * Try to randomly grow the bias range in both directions, or
+ * only one, or perhaps don't grow at all.
+ */
+ do {
+ u32 old_bias_start = bias_start;
+ u32 old_bias_end = bias_end;
+
+ if (bias_start)
+ bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps);
+ if (bias_end != mm_size)
+ bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps);
+
+ bias_rem += old_bias_start - bias_start;
+ bias_rem += bias_end - old_bias_end;
+ } while (!bias_rem && (bias_start || bias_end != mm_size));
+ } while (bias_rem);
+
+ KUNIT_ASSERT_EQ(test, bias_start, 0);
+ KUNIT_ASSERT_EQ(test, bias_end, mm_size);
+ KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, bias_start, bias_end,
+ ps, ps,
+ &allocated,
+ DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc passed with bias(%x-%x), size=%u\n",
+ bias_start, bias_end, ps);
+
+ drm_buddy_free_list(&mm, &allocated);
+ drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+{
+ const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K;
+ unsigned long i, n_pages, total;
+ struct drm_buddy_block *block;
+ struct drm_buddy mm;
+ LIST_HEAD(left);
+ LIST_HEAD(middle);
+ LIST_HEAD(right);
+ LIST_HEAD(allocated);
+
+ KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+
+ /*
+ * Idea is to fragment the address space by alternating block
+ * allocations between three different lists; one for left, middle and
+ * right. We can then free a list to simulate fragmentation. In
+ * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+ * including the try_harder path.
+ */
+
+ i = 0;
+ n_pages = mm_size / ps;
+ do {
+ struct list_head *list;
+ int slot = i % 3;
+
+ if (slot == 0)
+ list = &left;
+ else if (slot == 1)
+ list = &middle;
+ else
+ list = &right;
+ KUNIT_ASSERT_FALSE_MSG(test,
+ drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ ps, ps, list, 0),
+ "buddy_alloc hit an error size=%lu\n",
+ ps);
+ } while (++i < n_pages);
+
+ KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ 3 * ps, ps, &allocated,
+ DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ "buddy_alloc didn't error size=%lu\n", 3 * ps);
+
+ drm_buddy_free_list(&mm, &middle);
+ KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ 3 * ps, ps, &allocated,
+ DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ "buddy_alloc didn't error size=%lu\n", 3 * ps);
+ KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ 2 * ps, ps, &allocated,
+ DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ "buddy_alloc didn't error size=%lu\n", 2 * ps);
+
+ drm_buddy_free_list(&mm, &right);
+ KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ 3 * ps, ps, &allocated,
+ DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ "buddy_alloc didn't error size=%lu\n", 3 * ps);
+ /*
+ * At this point we should have enough contiguous space for 2 blocks,
+ * however they are never buddies (since we freed middle and right) so
+ * will require the try_harder logic to find them.
+ */
+ KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ 2 * ps, ps, &allocated,
+ DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ "buddy_alloc hit an error size=%lu\n", 2 * ps);
+
+ drm_buddy_free_list(&mm, &left);
+ KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ 3 * ps, ps, &allocated,
+ DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ "buddy_alloc hit an error size=%lu\n", 3 * ps);
+
+ total = 0;
+ list_for_each_entry(block, &allocated, link)
+ total += drm_buddy_block_size(&mm, block);
+
+ KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+ drm_buddy_free_list(&mm, &allocated);
+ drm_buddy_fini(&mm);
+}
+
static void drm_test_buddy_alloc_pathological(struct kunit *test)
{
u64 mm_size, size, start = 0;
@@ -275,16 +567,30 @@ static void drm_test_buddy_alloc_limit(struct kunit *test)
drm_buddy_fini(&mm);
}
+static int drm_buddy_suite_init(struct kunit_suite *suite)
+{
+ while (!random_seed)
+ random_seed = get_random_u32();
+
+ kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n",
+ random_seed);
+
+ return 0;
+}
+
static struct kunit_case drm_buddy_tests[] = {
KUNIT_CASE(drm_test_buddy_alloc_limit),
KUNIT_CASE(drm_test_buddy_alloc_optimistic),
KUNIT_CASE(drm_test_buddy_alloc_pessimistic),
KUNIT_CASE(drm_test_buddy_alloc_pathological),
+ KUNIT_CASE(drm_test_buddy_alloc_contiguous),
+ KUNIT_CASE(drm_test_buddy_alloc_range_bias),
{}
};
static struct kunit_suite drm_buddy_test_suite = {
.name = "drm_buddy",
+ .suite_init = drm_buddy_suite_init,
.test_cases = drm_buddy_tests,
};
diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c
index 4e9247cf9977..f37c0d765865 100644
--- a/drivers/gpu/drm/tests/drm_mm_test.c
+++ b/drivers/gpu/drm/tests/drm_mm_test.c
@@ -157,7 +157,7 @@ static void drm_test_mm_init(struct kunit *test)
/* After creation, it should all be one massive hole */
if (!assert_one_hole(test, &mm, 0, size)) {
- KUNIT_FAIL(test, "");
+ KUNIT_FAIL(test, "mm not one hole on creation");
goto out;
}
@@ -171,14 +171,14 @@ static void drm_test_mm_init(struct kunit *test)
/* After filling the range entirely, there should be no holes */
if (!assert_no_holes(test, &mm)) {
- KUNIT_FAIL(test, "");
+ KUNIT_FAIL(test, "mm has holes when filled");
goto out;
}
/* And then after emptying it again, the massive hole should be back */
drm_mm_remove_node(&tmp);
if (!assert_one_hole(test, &mm, 0, size)) {
- KUNIT_FAIL(test, "");
+ KUNIT_FAIL(test, "mm does not have single hole after emptying");
goto out;
}
@@ -188,13 +188,13 @@ out:
static void drm_test_mm_debug(struct kunit *test)
{
+ struct drm_printer p = drm_debug_printer(test->name);
struct drm_mm mm;
struct drm_mm_node nodes[2];
/* Create a small drm_mm with a couple of nodes and a few holes, and
* check that the debug iterator doesn't explode over a trivial drm_mm.
*/
-
drm_mm_init(&mm, 0, 4096);
memset(nodes, 0, sizeof(nodes));
@@ -209,6 +209,9 @@ static void drm_test_mm_debug(struct kunit *test)
KUNIT_ASSERT_FALSE_MSG(test, drm_mm_reserve_node(&mm, &nodes[1]),
"failed to reserve node[0] {start=%lld, size=%lld)\n",
nodes[0].start, nodes[0].size);
+
+ drm_mm_print(&mm, &p);
+ KUNIT_SUCCEED(test);
}
static bool expect_insert(struct kunit *test, struct drm_mm *mm,
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index f5187b384ae9..76027960054f 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -95,11 +95,17 @@ static int ttm_global_init(void)
ttm_pool_mgr_init(num_pages);
ttm_tt_mgr_init(num_pages, num_dma32);
- glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
+ glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32 |
+ __GFP_NOWARN);
+ /* Retry without GFP_DMA32 for platforms DMA32 is not available */
if (unlikely(glob->dummy_read_page == NULL)) {
- ret = -ENOMEM;
- goto out;
+ glob->dummy_read_page = alloc_page(__GFP_ZERO);
+ if (unlikely(glob->dummy_read_page == NULL)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ pr_warn("Using GFP_DMA32 fallback for dummy_read_page\n");
}
INIT_LIST_HEAD(&glob->device_list);
@@ -195,7 +201,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func
bool use_dma_alloc, bool use_dma32)
{
struct ttm_global *glob = &ttm_glob;
- int ret;
+ int ret, nid;
if (WARN_ON(vma_manager == NULL))
return -EINVAL;
@@ -215,7 +221,12 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func
ttm_sys_man_init(bdev);
- ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32);
+ if (dev)
+ nid = dev_to_node(dev);
+ else
+ nid = NUMA_NO_NODE;
+
+ ttm_pool_init(&bdev->pool, dev, nid, use_dma_alloc, use_dma32);
bdev->vma_manager = vma_manager;
spin_lock_init(&bdev->lru_lock);
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index b62f420a9f96..112438d965ff 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -387,7 +387,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
enum ttm_caching caching,
pgoff_t start_page, pgoff_t end_page)
{
- struct page **pages = tt->pages;
+ struct page **pages = &tt->pages[start_page];
unsigned int order;
pgoff_t i, nr;
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index fcff41dd2315..88f63d526b22 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -147,6 +147,13 @@ v3d_job_allocate(void **container, size_t size)
return 0;
}
+static void
+v3d_job_deallocate(void **container)
+{
+ kfree(*container);
+ *container = NULL;
+}
+
static int
v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
struct v3d_job *job, void (*free)(struct kref *ref),
@@ -273,8 +280,10 @@ v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv,
ret = v3d_job_init(v3d, file_priv, &(*job)->base,
v3d_job_free, args->in_sync, se, V3D_CSD);
- if (ret)
+ if (ret) {
+ v3d_job_deallocate((void *)job);
return ret;
+ }
ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job));
if (ret)
@@ -282,8 +291,10 @@ v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv,
ret = v3d_job_init(v3d, file_priv, *clean_job,
v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
- if (ret)
+ if (ret) {
+ v3d_job_deallocate((void *)clean_job);
return ret;
+ }
(*job)->args = *args;
@@ -860,8 +871,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
ret = v3d_job_init(v3d, file_priv, &render->base,
v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER);
- if (ret)
+ if (ret) {
+ v3d_job_deallocate((void *)&render);
goto fail;
+ }
render->start = args->rcl_start;
render->end = args->rcl_end;
@@ -874,8 +887,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
ret = v3d_job_init(v3d, file_priv, &bin->base,
v3d_job_free, args->in_sync_bcl, &se, V3D_BIN);
- if (ret)
+ if (ret) {
+ v3d_job_deallocate((void *)&bin);
goto fail;
+ }
bin->start = args->bcl_start;
bin->end = args->bcl_end;
@@ -892,8 +907,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
ret = v3d_job_init(v3d, file_priv, clean_job,
v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
- if (ret)
+ if (ret) {
+ v3d_job_deallocate((void *)&clean_job);
goto fail;
+ }
last_job = clean_job;
} else {
@@ -1015,8 +1032,10 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
ret = v3d_job_init(v3d, file_priv, &job->base,
v3d_job_free, args->in_sync, &se, V3D_TFU);
- if (ret)
+ if (ret) {
+ v3d_job_deallocate((void *)&job);
goto fail;
+ }
job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
sizeof(*job->base.bo), GFP_KERNEL);
@@ -1233,8 +1252,10 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
ret = v3d_job_init(v3d, file_priv, &cpu_job->base,
v3d_job_free, 0, &se, V3D_CPU);
- if (ret)
+ if (ret) {
+ v3d_job_deallocate((void *)&cpu_job);
goto fail;
+ }
clean_job = cpu_job->indirect_csd.clean_job;
csd_job = cpu_job->indirect_csd.job;
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index f8e9abe647b9..9539aa28937f 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -94,6 +94,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev)
goto err_free;
}
+ dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX);
ret = virtio_gpu_init(vdev, dev);
if (ret)
goto err_free;
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index 3062e0e0d467..79ba98a169f9 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -50,8 +50,8 @@
#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
-#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16)
-#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffffu << 16)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffffu << 0)
#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn
#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
index 811add10c30d..c165e26c0976 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
@@ -242,8 +242,8 @@ struct slpc_shared_data {
(HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \
HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS)
#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
-#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8)
-#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xffu << 8)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xffu << 0)
#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn
#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
index 3b83f907ece4..0b1146d0c997 100644
--- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -82,11 +82,11 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
#define GUC_CTB_HDR_LEN 1u
#define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN
#define GUC_CTB_MSG_MAX_LEN 256u
-#define GUC_CTB_MSG_0_FENCE (0xffff << 16)
-#define GUC_CTB_MSG_0_FORMAT (0xf << 12)
+#define GUC_CTB_MSG_0_FENCE (0xffffu << 16)
+#define GUC_CTB_MSG_0_FORMAT (0xfu << 12)
#define GUC_CTB_FORMAT_HXG 0u
-#define GUC_CTB_MSG_0_RESERVED (0xf << 8)
-#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0)
+#define GUC_CTB_MSG_0_RESERVED (0xfu << 8)
+#define GUC_CTB_MSG_0_NUM_DWORDS (0xffu << 0)
/**
* DOC: CTB HXG Message
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 47094b9b044c..0400bc0fccdc 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -31,9 +31,9 @@
*/
#define GUC_KLV_LEN_MIN 1u
-#define GUC_KLV_0_KEY (0xffff << 16)
-#define GUC_KLV_0_LEN (0xffff << 0)
-#define GUC_KLV_n_VALUE (0xffffffff << 0)
+#define GUC_KLV_0_KEY (0xffffu << 16)
+#define GUC_KLV_0_LEN (0xffffu << 0)
+#define GUC_KLV_n_VALUE (0xffffffffu << 0)
/**
* DOC: GuC Self Config KLVs
diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
index 3d199016cf88..29e414c82d56 100644
--- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
@@ -40,18 +40,18 @@
*/
#define GUC_HXG_MSG_MIN_LEN 1u
-#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31)
+#define GUC_HXG_MSG_0_ORIGIN (0x1u << 31)
#define GUC_HXG_ORIGIN_HOST 0u
#define GUC_HXG_ORIGIN_GUC 1u
-#define GUC_HXG_MSG_0_TYPE (0x7 << 28)
+#define GUC_HXG_MSG_0_TYPE (0x7u << 28)
#define GUC_HXG_TYPE_REQUEST 0u
#define GUC_HXG_TYPE_EVENT 1u
#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u
#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u
#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u
#define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u
-#define GUC_HXG_MSG_0_AUX (0xfffffff << 0)
-#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0)
+#define GUC_HXG_MSG_0_AUX (0xfffffffu << 0)
+#define GUC_HXG_MSG_n_PAYLOAD (0xffffffffu << 0)
/**
* DOC: HXG Request
@@ -85,8 +85,8 @@
*/
#define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN
-#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16)
-#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0)
+#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfffu << 16)
+#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffffu << 0)
#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD
/**
@@ -117,8 +117,8 @@
*/
#define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN
-#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16)
-#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0)
+#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfffu << 16)
+#define GUC_HXG_EVENT_MSG_0_ACTION (0xffffu << 0)
#define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD
/**
@@ -188,8 +188,8 @@
*/
#define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN
-#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16)
-#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0)
+#define GUC_HXG_FAILURE_MSG_0_HINT (0xfffu << 16)
+#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffffu << 0)
/**
* DOC: HXG Response
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
index 5f19550cc845..777c20ceabab 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
@@ -10,7 +10,7 @@
#include "xe_bo.h"
-#define i915_gem_object_is_shmem(obj) ((obj)->flags & XE_BO_CREATE_SYSTEM_BIT)
+#define i915_gem_object_is_shmem(obj) (0) /* We don't use shmem */
static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n)
{
@@ -35,12 +35,10 @@ static inline int i915_gem_object_read_from_page(struct xe_bo *bo,
u32 ofs, u64 *ptr, u32 size)
{
struct ttm_bo_kmap_obj map;
- void *virtual;
+ void *src;
bool is_iomem;
int ret;
- XE_WARN_ON(size != 8);
-
ret = xe_bo_lock(bo, true);
if (ret)
return ret;
@@ -50,11 +48,12 @@ static inline int i915_gem_object_read_from_page(struct xe_bo *bo,
goto out_unlock;
ofs &= ~PAGE_MASK;
- virtual = ttm_kmap_obj_virtual(&map, &is_iomem);
+ src = ttm_kmap_obj_virtual(&map, &is_iomem);
+ src += ofs;
if (is_iomem)
- *ptr = readq((void __iomem *)(virtual + ofs));
+ memcpy_fromio(ptr, (void __iomem *)src, size);
else
- *ptr = *(u64 *)(virtual + ofs);
+ memcpy(ptr, src, size);
ttm_bo_kunmap(&map);
out_unlock:
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index a6523df0f1d3..c347e2c29f81 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -114,21 +114,21 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
region |
XE_BO_NEEDS_CPU_ACCESS);
if (IS_ERR(remote)) {
- KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %li\n",
- str, PTR_ERR(remote));
+ KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n",
+ str, remote);
return;
}
err = xe_bo_validate(remote, NULL, false);
if (err) {
- KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n",
+ KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n",
str, err);
goto out_unlock;
}
err = xe_bo_vmap(remote);
if (err) {
- KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n",
+ KUNIT_FAIL(test, "Failed to vmap system bo for %s: %i\n",
str, err);
goto out_unlock;
}
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
index ef56bd517b28..421b819fd4ba 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
@@ -21,4 +21,5 @@ kunit_test_suite(xe_mocs_test_suite);
MODULE_AUTHOR("Intel Corporation");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_mocs kunit test");
MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index a53c22a19582..b4715b78ef3b 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -74,9 +74,6 @@ static const struct platform_test_case cases[] = {
SUBPLATFORM_CASE(DG2, G11, B1),
SUBPLATFORM_CASE(DG2, G12, A0),
SUBPLATFORM_CASE(DG2, G12, A1),
- PLATFORM_CASE(PVC, B0),
- PLATFORM_CASE(PVC, B1),
- PLATFORM_CASE(PVC, C0),
GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 0b0e262e2166..4d3b80ec906d 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -28,6 +28,14 @@
#include "xe_ttm_stolen_mgr.h"
#include "xe_vm.h"
+const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = {
+ [XE_PL_SYSTEM] = "system",
+ [XE_PL_TT] = "gtt",
+ [XE_PL_VRAM0] = "vram0",
+ [XE_PL_VRAM1] = "vram1",
+ [XE_PL_STOLEN] = "stolen"
+};
+
static const struct ttm_place sys_placement_flags = {
.fpfn = 0,
.lpfn = 0,
@@ -713,8 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
migrate = xe->tiles[0].migrate;
xe_assert(xe, migrate);
-
- trace_xe_bo_move(bo);
+ trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
xe_device_mem_access_get(xe);
if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 9b1279aca127..8be42ac6cd07 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -243,6 +243,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo);
int xe_bo_restore_pinned(struct xe_bo *bo);
extern struct ttm_device_funcs xe_ttm_funcs;
+extern const char *const xe_mem_type_to_name[];
int xe_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index b8d8da546670..5176c27e4b6a 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -83,9 +83,6 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
return 0;
}
-static void device_kill_persistent_exec_queues(struct xe_device *xe,
- struct xe_file *xef);
-
static void xe_file_close(struct drm_device *dev, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
@@ -102,8 +99,6 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
mutex_unlock(&xef->exec_queue.lock);
xa_destroy(&xef->exec_queue.xa);
mutex_destroy(&xef->exec_queue.lock);
- device_kill_persistent_exec_queues(xe, xef);
-
mutex_lock(&xef->vm.lock);
xa_for_each(&xef->vm.xa, idx, vm)
xe_vm_close_and_put(vm);
@@ -255,9 +250,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xa_erase(&xe->usm.asid_to_vm, asid);
}
- drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock);
- INIT_LIST_HEAD(&xe->persistent_engines.list);
-
spin_lock_init(&xe->pinned.lock);
INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
INIT_LIST_HEAD(&xe->pinned.external_vram);
@@ -570,37 +562,6 @@ void xe_device_shutdown(struct xe_device *xe)
{
}
-void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q)
-{
- mutex_lock(&xe->persistent_engines.lock);
- list_add_tail(&q->persistent.link, &xe->persistent_engines.list);
- mutex_unlock(&xe->persistent_engines.lock);
-}
-
-void xe_device_remove_persistent_exec_queues(struct xe_device *xe,
- struct xe_exec_queue *q)
-{
- mutex_lock(&xe->persistent_engines.lock);
- if (!list_empty(&q->persistent.link))
- list_del(&q->persistent.link);
- mutex_unlock(&xe->persistent_engines.lock);
-}
-
-static void device_kill_persistent_exec_queues(struct xe_device *xe,
- struct xe_file *xef)
-{
- struct xe_exec_queue *q, *next;
-
- mutex_lock(&xe->persistent_engines.lock);
- list_for_each_entry_safe(q, next, &xe->persistent_engines.list,
- persistent.link)
- if (q->persistent.xef == xef) {
- xe_exec_queue_kill(q);
- list_del_init(&q->persistent.link);
- }
- mutex_unlock(&xe->persistent_engines.lock);
-}
-
void xe_device_wmb(struct xe_device *xe)
{
struct xe_gt *gt = xe_root_mmio_gt(xe);
@@ -613,7 +574,7 @@ void xe_device_wmb(struct xe_device *xe)
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
return xe_device_has_flat_ccs(xe) ?
- DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
+ DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
}
bool xe_device_mem_access_ongoing(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 3da83b233206..08d8b72c7731 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -42,10 +42,6 @@ int xe_device_probe(struct xe_device *xe);
void xe_device_remove(struct xe_device *xe);
void xe_device_shutdown(struct xe_device *xe);
-void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q);
-void xe_device_remove_persistent_exec_queues(struct xe_device *xe,
- struct xe_exec_queue *q);
-
void xe_device_wmb(struct xe_device *xe);
static inline struct xe_file *to_xe_file(const struct drm_file *file)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 5dc9127a2029..e8491979a6f2 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -341,14 +341,6 @@ struct xe_device {
struct mutex lock;
} usm;
- /** @persistent_engines: engines that are closed but still running */
- struct {
- /** @lock: protects persistent engines */
- struct mutex lock;
- /** @list: list of persistent engines */
- struct list_head list;
- } persistent_engines;
-
/** @pinned: pinned BO state */
struct {
/** @lock: protected pinned BO list state */
diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c
index 74391d9b11ae..e4db069f0db3 100644
--- a/drivers/gpu/drm/xe/xe_display.c
+++ b/drivers/gpu/drm/xe/xe_display.c
@@ -134,8 +134,6 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
int xe_display_init_nommio(struct xe_device *xe)
{
- int err;
-
if (!xe->info.enable_display)
return 0;
@@ -145,10 +143,6 @@ int xe_display_init_nommio(struct xe_device *xe)
/* This must be called before any calls to HAS_PCH_* */
intel_detect_pch(xe);
- err = intel_power_domains_init(xe);
- if (err)
- return err;
-
return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe);
}
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 64ed303728fd..da2627ed6ae7 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -175,7 +175,7 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
return 0;
}
-const struct dma_buf_ops xe_dmabuf_ops = {
+static const struct dma_buf_ops xe_dmabuf_ops = {
.attach = xe_dma_buf_attach,
.detach = xe_dma_buf_detach,
.pin = xe_dma_buf_pin,
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 82d1305e831f..6040e4d22b28 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -131,14 +131,6 @@ static void bo_meminfo(struct xe_bo *bo,
static void show_meminfo(struct drm_printer *p, struct drm_file *file)
{
- static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES] = {
- [XE_PL_SYSTEM] = "system",
- [XE_PL_TT] = "gtt",
- [XE_PL_VRAM0] = "vram0",
- [XE_PL_VRAM1] = "vram1",
- [4 ... 6] = NULL,
- [XE_PL_STOLEN] = "stolen"
- };
struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {};
struct xe_file *xef = file->driver_priv;
struct ttm_device *bdev = &xef->xe->ttm;
@@ -171,7 +163,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
spin_unlock(&client->bos_lock);
for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) {
- if (!mem_type_to_name[mem_type])
+ if (!xe_mem_type_to_name[mem_type])
continue;
man = ttm_manager_type(bdev, mem_type);
@@ -182,7 +174,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
DRM_GEM_OBJECT_RESIDENT |
(mem_type != XE_PL_SYSTEM ? 0 :
DRM_GEM_OBJECT_PURGEABLE),
- mem_type_to_name[mem_type]);
+ xe_mem_type_to_name[mem_type]);
}
}
}
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index b853feed9ccc..17f26952e665 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -111,7 +111,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
struct drm_exec *exec = &vm_exec.exec;
- u32 i, num_syncs = 0;
+ u32 i, num_syncs = 0, num_ufence = 0;
struct xe_sched_job *job;
struct dma_fence *rebind_fence;
struct xe_vm *vm;
@@ -157,6 +157,14 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
SYNC_PARSE_FLAG_LR_MODE : 0));
if (err)
goto err_syncs;
+
+ if (xe_sync_is_ufence(&syncs[i]))
+ num_ufence++;
+ }
+
+ if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
+ err = -EINVAL;
+ goto err_syncs;
}
if (xe_exec_queue_is_parallel(q)) {
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index bcfc4127c7c5..49223026c89f 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -60,7 +60,6 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
q->fence_irq = &gt->fence_irq[hwe->class];
q->ring_ops = gt->ring_ops[hwe->class];
q->ops = gt->exec_queue_ops;
- INIT_LIST_HEAD(&q->persistent.link);
INIT_LIST_HEAD(&q->compute.link);
INIT_LIST_HEAD(&q->multi_gt_link);
@@ -310,102 +309,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
return q->ops->set_timeslice(q, value);
}
-static int exec_queue_set_preemption_timeout(struct xe_device *xe,
- struct xe_exec_queue *q, u64 value,
- bool create)
-{
- u32 min = 0, max = 0;
-
- xe_exec_queue_get_prop_minmax(q->hwe->eclass,
- XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max);
-
- if (xe_exec_queue_enforce_schedule_limit() &&
- !xe_hw_engine_timeout_in_range(value, min, max))
- return -EINVAL;
-
- return q->ops->set_preempt_timeout(q, value);
-}
-
-static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q,
- u64 value, bool create)
-{
- if (XE_IOCTL_DBG(xe, !create))
- return -EINVAL;
-
- if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm)))
- return -EINVAL;
-
- if (value)
- q->flags |= EXEC_QUEUE_FLAG_PERSISTENT;
- else
- q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT;
-
- return 0;
-}
-
-static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q,
- u64 value, bool create)
-{
- u32 min = 0, max = 0;
-
- if (XE_IOCTL_DBG(xe, !create))
- return -EINVAL;
-
- xe_exec_queue_get_prop_minmax(q->hwe->eclass,
- XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max);
-
- if (xe_exec_queue_enforce_schedule_limit() &&
- !xe_hw_engine_timeout_in_range(value, min, max))
- return -EINVAL;
-
- return q->ops->set_job_timeout(q, value);
-}
-
-static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q,
- u64 value, bool create)
-{
- if (XE_IOCTL_DBG(xe, !create))
- return -EINVAL;
-
- if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
- return -EINVAL;
-
- q->usm.acc_trigger = value;
-
- return 0;
-}
-
-static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q,
- u64 value, bool create)
-{
- if (XE_IOCTL_DBG(xe, !create))
- return -EINVAL;
-
- if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
- return -EINVAL;
-
- q->usm.acc_notify = value;
-
- return 0;
-}
-
-static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q,
- u64 value, bool create)
-{
- if (XE_IOCTL_DBG(xe, !create))
- return -EINVAL;
-
- if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
- return -EINVAL;
-
- if (value > DRM_XE_ACC_GRANULARITY_64M)
- return -EINVAL;
-
- q->usm.acc_granularity = value;
-
- return 0;
-}
-
typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
struct xe_exec_queue *q,
u64 value, bool create);
@@ -413,12 +316,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
- [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout,
- [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence,
- [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout,
- [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger,
- [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify,
- [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity,
};
static int exec_queue_user_ext_set_property(struct xe_device *xe,
@@ -437,10 +334,15 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
if (XE_IOCTL_DBG(xe, ext.property >=
ARRAY_SIZE(exec_queue_set_property_funcs)) ||
- XE_IOCTL_DBG(xe, ext.pad))
+ XE_IOCTL_DBG(xe, ext.pad) ||
+ XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
+ ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE))
return -EINVAL;
idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
+ if (!exec_queue_set_property_funcs[idx])
+ return -EINVAL;
+
return exec_queue_set_property_funcs[idx](xe, q, ext.value, create);
}
@@ -704,9 +606,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
}
q = xe_exec_queue_create(xe, vm, logical_mask,
- args->width, hwe,
- xe_vm_in_lr_mode(vm) ? 0 :
- EXEC_QUEUE_FLAG_PERSISTENT);
+ args->width, hwe, 0);
up_read(&vm->lock);
xe_vm_put(vm);
if (IS_ERR(q))
@@ -728,8 +628,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
goto kill_exec_queue;
}
- q->persistent.xef = xef;
-
mutex_lock(&xef->exec_queue.lock);
err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
mutex_unlock(&xef->exec_queue.lock);
@@ -872,10 +770,7 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, !q))
return -ENOENT;
- if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT))
- xe_exec_queue_kill(q);
- else
- xe_device_add_persistent_exec_queues(xe, q);
+ xe_exec_queue_kill(q);
trace_xe_exec_queue_close(q);
xe_exec_queue_put(q);
@@ -926,20 +821,24 @@ void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
* @q: The exec queue
* @vm: The VM the engine does a bind or exec for
*
- * Get last fence, does not take a ref
+ * Get last fence, takes a ref
*
* Returns: last fence if not signaled, dma fence stub if signaled
*/
struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
struct xe_vm *vm)
{
+ struct dma_fence *fence;
+
xe_exec_queue_last_fence_lockdep_assert(q, vm);
if (q->last_fence &&
test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
xe_exec_queue_last_fence_put(q, vm);
- return q->last_fence ? q->last_fence : dma_fence_get_stub();
+ fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
+ dma_fence_get(fence);
+ return fence;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 8d4b7feb8c30..36f4901d8d7e 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -105,16 +105,6 @@ struct xe_exec_queue {
struct xe_guc_exec_queue *guc;
};
- /**
- * @persistent: persistent exec queue state
- */
- struct {
- /** @xef: file which this exec queue belongs to */
- struct xe_file *xef;
- /** @link: link in list of persistent exec queues */
- struct list_head link;
- } persistent;
-
union {
/**
* @parallel: parallel submission state
@@ -160,16 +150,6 @@ struct xe_exec_queue {
spinlock_t lock;
} compute;
- /** @usm: unified shared memory state */
- struct {
- /** @acc_trigger: access counter trigger */
- u32 acc_trigger;
- /** @acc_notify: access counter notify */
- u32 acc_notify;
- /** @acc_granularity: access counter granularity */
- u32 acc_granularity;
- } usm;
-
/** @ops: submission backend exec queue operations */
const struct xe_exec_queue_ops *ops;
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 96b5224eb478..acb4d9f38fd7 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
{
struct xe_execlist_port *port = exl->port;
- enum xe_exec_queue_priority priority = exl->active_priority;
+ enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
XE_WARN_ON(priority < 0);
@@ -378,8 +378,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w)
list_del(&exl->active_link);
spin_unlock_irqrestore(&exl->port->lock, flags);
- if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT)
- xe_device_remove_persistent_exec_queues(xe, q);
drm_sched_entity_fini(&exl->entity);
drm_sched_fini(&exl->sched);
kfree(exl);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3af2adec1295..35474ddbaf97 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -437,7 +437,10 @@ static int all_fw_domain_init(struct xe_gt *gt)
* USM has its only SA pool to non-block behind user operations
*/
if (gt_to_xe(gt)->info.has_usm) {
- gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16);
+ struct xe_device *xe = gt_to_xe(gt);
+
+ gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt),
+ IS_DGFX(xe) ? SZ_1M : SZ_512K, 16);
if (IS_ERR(gt->usm.bb_pool)) {
err = PTR_ERR(gt->usm.bb_pool);
goto err_force_wake;
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 9358f7336889..9fcae65b6469 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -145,10 +145,10 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
}
if (xe_gt_is_media_type(gt)) {
- sprintf(gtidle->name, "gt%d-mc\n", gt->info.id);
+ sprintf(gtidle->name, "gt%d-mc", gt->info.id);
gtidle->idle_residency = xe_guc_pc_mc6_residency;
} else {
- sprintf(gtidle->name, "gt%d-rc\n", gt->info.id);
+ sprintf(gtidle->name, "gt%d-rc", gt->info.id);
gtidle->idle_residency = xe_guc_pc_rc6_residency;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 77925b35cf8d..8546cd3cc50d 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -480,7 +480,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
* to synchronize with external clients (e.g., firmware), so a semaphore
* register will also need to be taken.
*/
-static void mcr_lock(struct xe_gt *gt)
+static void mcr_lock(struct xe_gt *gt) __acquires(&gt->mcr_lock)
{
struct xe_device *xe = gt_to_xe(gt);
int ret = 0;
@@ -500,7 +500,7 @@ static void mcr_lock(struct xe_gt *gt)
drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
}
-static void mcr_unlock(struct xe_gt *gt)
+static void mcr_unlock(struct xe_gt *gt) __releases(&gt->mcr_lock)
{
/* Release hardware semaphore - this is done by writing 1 to the register */
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 59a70d2e0a7a..73f08f1924df 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -165,7 +165,8 @@ retry_userptr:
goto unlock_vm;
}
- if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) {
+ if (!xe_vma_is_userptr(vma) ||
+ !xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
downgrade_write(&vm->lock);
write_locked = false;
}
@@ -181,11 +182,13 @@ retry_userptr:
/* TODO: Validate fault */
if (xe_vma_is_userptr(vma) && write_locked) {
+ struct xe_userptr_vma *uvma = to_userptr_vma(vma);
+
spin_lock(&vm->userptr.invalidated_lock);
- list_del_init(&vma->userptr.invalidate_link);
+ list_del_init(&uvma->userptr.invalidate_link);
spin_unlock(&vm->userptr.invalidated_lock);
- ret = xe_vma_userptr_pin_pages(vma);
+ ret = xe_vma_userptr_pin_pages(uvma);
if (ret)
goto unlock_vm;
@@ -220,7 +223,7 @@ retry_userptr:
dma_fence_put(fence);
if (xe_vma_is_userptr(vma))
- ret = xe_vma_userptr_check_repin(vma);
+ ret = xe_vma_userptr_check_repin(to_userptr_vma(vma));
vma->usm.tile_invalidated &= ~BIT(tile->id);
unlock_dma_resv:
@@ -332,7 +335,7 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
return -EPROTO;
asid = FIELD_GET(PFD_ASID, msg[1]);
- pf_queue = &gt->usm.pf_queue[asid % NUM_PF_QUEUE];
+ pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE);
spin_lock_irqsave(&pf_queue->lock, flags);
full = pf_queue_full(pf_queue);
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 7eef23a00d77..f4c485289dbe 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -247,6 +247,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
xe_gt_assert(gt, vma);
+ /* Execlists not supported */
+ if (gt_to_xe(gt)->info.force_execlist) {
+ if (fence)
+ __invalidation_fence_signal(fence);
+
+ return 0;
+ }
+
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
if (!xe->info.has_range_tlb_invalidation) {
@@ -317,6 +325,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
struct drm_printer p = drm_err_printer(__func__);
int ret;
+ /* Execlists not supported */
+ if (gt_to_xe(gt)->info.force_execlist)
+ return 0;
+
/*
* XXX: See above, this algorithm only works if seqno are always in
* order
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index f71085228cb3..d91702592520 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -963,7 +963,9 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc)
struct xe_device *xe = pc_to_xe(pc);
if (xe->info.skip_guc_pc) {
+ xe_device_mem_access_get(xe);
xe_gt_idle_disable_c6(pc_to_gt(pc));
+ xe_device_mem_access_put(xe);
return;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 54ffcfcdd41f..f22ae717b0b2 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1028,8 +1028,6 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
if (xe_exec_queue_is_lr(q))
cancel_work_sync(&ge->lr_tdr);
- if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT)
- xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q);
release_guc_id(guc, q);
xe_sched_entity_fini(&ge->entity);
xe_sched_fini(&ge->sched);
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
index a6094c81f2ad..a5de3e7b0bd6 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.c
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -217,13 +217,13 @@ struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
if (!fence)
return ERR_PTR(-ENOMEM);
- dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock,
- ctx->dma_fence_ctx, ctx->next_seqno++);
-
fence->ctx = ctx;
fence->seqno_map = seqno_map;
INIT_LIST_HEAD(&fence->irq_link);
+ dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock,
+ ctx->dma_fence_ctx, ctx->next_seqno++);
+
trace_xe_hw_fence_create(fence);
return fence;
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 6ef2aa1eae8b..174ed2185481 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -419,7 +419,7 @@ static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval)
return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP,
POWER_SETUP_SUBCOMMAND_READ_I1, 0),
- uval, 0);
+ uval, NULL);
}
static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index b7fa3831b684..b38319d2801e 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -21,10 +21,10 @@
#include "xe_map.h"
#include "xe_vm.h"
-#define CTX_VALID (1 << 0)
-#define CTX_PRIVILEGE (1 << 8)
-#define CTX_ADDRESSING_MODE_SHIFT 3
-#define LEGACY_64B_CONTEXT 3
+#define LRC_VALID (1 << 0)
+#define LRC_PRIVILEGE (1 << 8)
+#define LRC_ADDRESSING_MODE_SHIFT 3
+#define LRC_LEGACY_64B_CONTEXT 3
#define ENGINE_CLASS_SHIFT 61
#define ENGINE_INSTANCE_SHIFT 48
@@ -682,8 +682,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
#define PVC_CTX_ASID (0x2e + 1)
#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
-#define ACC_GRANULARITY_S 20
-#define ACC_NOTIFY_S 16
int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
@@ -754,23 +752,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
if (xe->info.has_asid && vm)
- xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
- (q->usm.acc_granularity <<
- ACC_GRANULARITY_S) | vm->usm.asid);
- if (xe->info.has_usm && vm)
- xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
- (q->usm.acc_notify << ACC_NOTIFY_S) |
- q->usm.acc_trigger);
-
- lrc->desc = CTX_VALID;
- lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT;
+ xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
+
+ lrc->desc = LRC_VALID;
+ lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
/* TODO: Priority */
/* While this appears to have something about privileged batches or
* some such, it really just means PPGTT mode.
*/
if (vm)
- lrc->desc |= CTX_PRIVILEGE;
+ lrc->desc |= LRC_PRIVILEGE;
if (GRAPHICS_VERx100(xe) < 1250) {
lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index e05e9e7282b6..70480c305602 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -170,11 +170,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
if (!IS_DGFX(xe)) {
/* Write out batch too */
m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
- if (xe->info.has_usm) {
- batch = tile->primary_gt->usm.bb_pool->bo;
- m->usm_batch_base_ofs = m->batch_base_ofs;
- }
-
for (i = 0; i < batch->size;
i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
XE_PAGE_SIZE) {
@@ -185,6 +180,24 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
entry);
level++;
}
+ if (xe->info.has_usm) {
+ xe_tile_assert(tile, batch->size == SZ_1M);
+
+ batch = tile->primary_gt->usm.bb_pool->bo;
+ m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M;
+ xe_tile_assert(tile, batch->size == SZ_512K);
+
+ for (i = 0; i < batch->size;
+ i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
+ XE_PAGE_SIZE) {
+ entry = vm->pt_ops->pte_encode_bo(batch, i,
+ pat_index, 0);
+
+ xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
+ entry);
+ level++;
+ }
+ }
} else {
u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
@@ -472,7 +485,7 @@ static void emit_pte(struct xe_migrate *m,
/* Indirect access needs compression enabled uncached PAT index */
if (GRAPHICS_VERx100(xe) >= 2000)
pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] :
- xe->pat.idx[XE_CACHE_NONE];
+ xe->pat.idx[XE_CACHE_WB];
else
pat_index = xe->pat.idx[XE_CACHE_WB];
@@ -760,14 +773,14 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
xe_res_next(&src_it, src_L0);
else
- emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0,
- src);
+ emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs,
+ &src_it, src_L0, src);
if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
xe_res_next(&dst_it, src_L0);
else
- emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0,
- dst);
+ emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs,
+ &dst_it, src_L0, dst);
if (copy_system_ccs)
emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
@@ -1009,8 +1022,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
xe_res_next(&src_it, clear_L0);
else
- emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
- dst);
+ emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs,
+ &src_it, clear_L0, dst);
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
update_idx = bb->len;
@@ -1204,8 +1217,11 @@ static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q,
}
if (q) {
fence = xe_exec_queue_last_fence_get(q, vm);
- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+ dma_fence_put(fence);
return false;
+ }
+ dma_fence_put(fence);
}
return true;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index c8c5d74b6e90..02f7808f28ca 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -105,7 +105,7 @@ static void xe_resize_vram_bar(struct xe_device *xe)
pci_bus_for_each_resource(root, root_res, i) {
if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
- root_res->start > 0x100000000ull)
+ (u64)root_res->start > 0x100000000ul)
break;
}
@@ -272,8 +272,8 @@ int xe_mmio_probe_vram(struct xe_device *xe)
drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id,
tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size);
drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id,
- &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size,
- &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size);
+ &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size,
+ &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size);
/* calculate total size using tile size to get the correct HW sizing */
total_size += tile_size;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index de1030a47588..6653c045f3c9 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -20,8 +20,8 @@
struct xe_pt_dir {
struct xe_pt pt;
- /** @dir: Directory structure for the xe_pt_walk functionality */
- struct xe_ptw_dir dir;
+ /** @children: Array of page-table child nodes */
+ struct xe_ptw *children[XE_PDES];
};
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
@@ -44,7 +44,7 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
{
- return container_of(pt_dir->dir.entries[index], struct xe_pt, base);
+ return container_of(pt_dir->children[index], struct xe_pt, base);
}
static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
@@ -65,6 +65,14 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
XE_PTE_NULL;
}
+static void xe_pt_free(struct xe_pt *pt)
+{
+ if (pt->level)
+ kfree(as_xe_pt_dir(pt));
+ else
+ kfree(pt);
+}
+
/**
* xe_pt_create() - Create a page-table.
* @vm: The vm to create for.
@@ -85,15 +93,19 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
{
struct xe_pt *pt;
struct xe_bo *bo;
- size_t size;
int err;
- size = !level ? sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) +
- XE_PDES * sizeof(struct xe_ptw *);
- pt = kzalloc(size, GFP_KERNEL);
+ if (level) {
+ struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL);
+
+ pt = (dir) ? &dir->pt : NULL;
+ } else {
+ pt = kzalloc(sizeof(*pt), GFP_KERNEL);
+ }
if (!pt)
return ERR_PTR(-ENOMEM);
+ pt->level = level;
bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
ttm_bo_type_kernel,
XE_BO_CREATE_VRAM_IF_DGFX(tile) |
@@ -106,8 +118,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
goto err_kfree;
}
pt->bo = bo;
- pt->level = level;
- pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL;
+ pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL;
if (vm->xef)
xe_drm_client_add_bo(vm->xef->client, pt->bo);
@@ -116,7 +127,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
return pt;
err_kfree:
- kfree(pt);
+ xe_pt_free(pt);
return ERR_PTR(err);
}
@@ -193,7 +204,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
deferred);
}
}
- kfree(pt);
+ xe_pt_free(pt);
}
/**
@@ -358,7 +369,7 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
struct iosys_map *map = &parent->bo->vmap;
if (unlikely(xe_child))
- parent->base.dir->entries[offset] = &xe_child->base;
+ parent->base.children[offset] = &xe_child->base;
xe_pt_write(xe_walk->vm->xe, map, offset, pte);
parent->num_live++;
@@ -488,10 +499,12 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
* this device *requires* 64K PTE size for VRAM, fail.
*/
if (level == 0 && !xe_parent->is_compact) {
- if (xe_pt_is_pte_ps64K(addr, next, xe_walk))
+ if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
+ xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
pte |= XE_PTE_PS64;
- else if (XE_WARN_ON(xe_walk->needs_64K))
+ } else if (XE_WARN_ON(xe_walk->needs_64K)) {
return -EINVAL;
+ }
}
ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte);
@@ -534,13 +547,16 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
*child = &xe_child->base;
/*
- * Prefer the compact pagetable layout for L0 if possible.
+ * Prefer the compact pagetable layout for L0 if possible. Only
+ * possible if VMA covers entire 2MB region as compact 64k and
+ * 4k pages cannot be mixed within a 2MB region.
* TODO: Suballocate the pt bo to avoid wasting a lot of
* memory.
*/
if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 &&
covers && xe_pt_scan_64K(addr, next, xe_walk)) {
walk->shifts = xe_compact_pt_shifts;
+ xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT;
flags |= XE_PDE_64K;
xe_child->is_compact = true;
}
@@ -618,8 +634,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
if (!xe_vma_is_null(vma)) {
if (xe_vma_is_userptr(vma))
- xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma),
- &curs);
+ xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0,
+ xe_vma_size(vma), &curs);
else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
xe_vma_size(vma), &curs);
@@ -853,7 +869,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
xe_pt_destroy(xe_pt_entry(pt_dir, j_),
xe_vma_vm(vma)->flags, deferred);
- pt_dir->dir.entries[j_] = &newpte->base;
+ pt_dir->children[j_] = &newpte->base;
}
kfree(entries[i].pt_entries);
}
@@ -906,17 +922,17 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe,
#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
-static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
{
- u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2;
+ u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2;
static u32 count;
if (count++ % divisor == divisor - 1) {
- struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
- vma->userptr.divisor = divisor << 1;
+ uvma->userptr.divisor = divisor << 1;
spin_lock(&vm->userptr.invalidated_lock);
- list_move_tail(&vma->userptr.invalidate_link,
+ list_move_tail(&uvma->userptr.invalidate_link,
&vm->userptr.invalidated);
spin_unlock(&vm->userptr.invalidated_lock);
return true;
@@ -927,7 +943,7 @@ static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
#else
-static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
{
return false;
}
@@ -1000,9 +1016,9 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
{
struct xe_pt_migrate_pt_update *userptr_update =
container_of(pt_update, typeof(*userptr_update), base);
- struct xe_vma *vma = pt_update->vma;
- unsigned long notifier_seq = vma->userptr.notifier_seq;
- struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma);
+ unsigned long notifier_seq = uvma->userptr.notifier_seq;
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
int err = xe_pt_vm_dependencies(pt_update->job,
&vm->rftree[pt_update->tile_id],
pt_update->start,
@@ -1023,7 +1039,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
*/
do {
down_read(&vm->userptr.notifier_lock);
- if (!mmu_interval_read_retry(&vma->userptr.notifier,
+ if (!mmu_interval_read_retry(&uvma->userptr.notifier,
notifier_seq))
break;
@@ -1032,11 +1048,11 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
if (userptr_update->bind)
return -EAGAIN;
- notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
+ notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier);
} while (true);
/* Inject errors to test_whether they are handled correctly */
- if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) {
+ if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) {
up_read(&vm->userptr.notifier_lock);
return -EAGAIN;
}
@@ -1297,7 +1313,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
vma->tile_present |= BIT(tile->id);
if (bind_pt_update.locked) {
- vma->userptr.initial_bind = true;
+ to_userptr_vma(vma)->userptr.initial_bind = true;
up_read(&vm->userptr.notifier_lock);
xe_bo_put_commit(&deferred);
}
@@ -1507,7 +1523,7 @@ xe_pt_commit_unbind(struct xe_vma *vma,
xe_pt_destroy(xe_pt_entry(pt_dir, i),
xe_vma_vm(vma)->flags, deferred);
- pt_dir->dir.entries[i] = NULL;
+ pt_dir->children[i] = NULL;
}
}
}
@@ -1642,7 +1658,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
if (!vma->tile_present) {
spin_lock(&vm->userptr.invalidated_lock);
- list_del_init(&vma->userptr.invalidate_link);
+ list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
spin_unlock(&vm->userptr.invalidated_lock);
}
up_read(&vm->userptr.notifier_lock);
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c
index 8f6c8d063f39..b8b3d2aea492 100644
--- a/drivers/gpu/drm/xe/xe_pt_walk.c
+++ b/drivers/gpu/drm/xe/xe_pt_walk.c
@@ -74,7 +74,7 @@ int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
u64 addr, u64 end, struct xe_pt_walk *walk)
{
pgoff_t offset = xe_pt_offset(addr, level, walk);
- struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL;
+ struct xe_ptw **entries = parent->children ? parent->children : NULL;
const struct xe_pt_walk_ops *ops = walk->ops;
enum page_walk_action action;
struct xe_ptw *child;
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h
index ec3d1e9efa6d..5ecc4d2f0f65 100644
--- a/drivers/gpu/drm/xe/xe_pt_walk.h
+++ b/drivers/gpu/drm/xe/xe_pt_walk.h
@@ -8,28 +8,15 @@
#include <linux/pagewalk.h>
#include <linux/types.h>
-struct xe_ptw_dir;
-
/**
* struct xe_ptw - base class for driver pagetable subclassing.
- * @dir: Pointer to an array of children if any.
+ * @children: Pointer to an array of children if any.
*
* Drivers could subclass this, and if it's a page-directory, typically
- * embed the xe_ptw_dir::entries array in the same allocation.
+ * embed an array of xe_ptw pointers.
*/
struct xe_ptw {
- struct xe_ptw_dir *dir;
-};
-
-/**
- * struct xe_ptw_dir - page directory structure
- * @entries: Array holding page directory children.
- *
- * It is the responsibility of the user to ensure @entries is
- * correctly sized.
- */
-struct xe_ptw_dir {
- struct xe_ptw *entries[0];
+ struct xe_ptw **children;
};
/**
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 9b35673b286c..7e924faeeea0 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -459,21 +459,21 @@ static size_t calc_topo_query_size(struct xe_device *xe)
sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
}
-static void __user *copy_mask(void __user *ptr,
- struct drm_xe_query_topology_mask *topo,
- void *mask, size_t mask_size)
+static int copy_mask(void __user **ptr,
+ struct drm_xe_query_topology_mask *topo,
+ void *mask, size_t mask_size)
{
topo->num_bytes = mask_size;
- if (copy_to_user(ptr, topo, sizeof(*topo)))
- return ERR_PTR(-EFAULT);
- ptr += sizeof(topo);
+ if (copy_to_user(*ptr, topo, sizeof(*topo)))
+ return -EFAULT;
+ *ptr += sizeof(topo);
- if (copy_to_user(ptr, mask, mask_size))
- return ERR_PTR(-EFAULT);
- ptr += mask_size;
+ if (copy_to_user(*ptr, mask, mask_size))
+ return -EFAULT;
+ *ptr += mask_size;
- return ptr;
+ return 0;
}
static int query_gt_topology(struct xe_device *xe,
@@ -493,28 +493,28 @@ static int query_gt_topology(struct xe_device *xe,
}
for_each_gt(gt, xe, id) {
+ int err;
+
topo.gt_id = id;
topo.type = DRM_XE_TOPO_DSS_GEOMETRY;
- query_ptr = copy_mask(query_ptr, &topo,
- gt->fuse_topo.g_dss_mask,
- sizeof(gt->fuse_topo.g_dss_mask));
- if (IS_ERR(query_ptr))
- return PTR_ERR(query_ptr);
+ err = copy_mask(&query_ptr, &topo, gt->fuse_topo.g_dss_mask,
+ sizeof(gt->fuse_topo.g_dss_mask));
+ if (err)
+ return err;
topo.type = DRM_XE_TOPO_DSS_COMPUTE;
- query_ptr = copy_mask(query_ptr, &topo,
- gt->fuse_topo.c_dss_mask,
- sizeof(gt->fuse_topo.c_dss_mask));
- if (IS_ERR(query_ptr))
- return PTR_ERR(query_ptr);
+ err = copy_mask(&query_ptr, &topo, gt->fuse_topo.c_dss_mask,
+ sizeof(gt->fuse_topo.c_dss_mask));
+ if (err)
+ return err;
topo.type = DRM_XE_TOPO_EU_PER_DSS;
- query_ptr = copy_mask(query_ptr, &topo,
- gt->fuse_topo.eu_mask_per_dss,
- sizeof(gt->fuse_topo.eu_mask_per_dss));
- if (IS_ERR(query_ptr))
- return PTR_ERR(query_ptr);
+ err = copy_mask(&query_ptr, &topo,
+ gt->fuse_topo.eu_mask_per_dss,
+ sizeof(gt->fuse_topo.eu_mask_per_dss));
+ if (err)
+ return err;
}
return 0;
diff --git a/drivers/gpu/drm/xe/xe_range_fence.c b/drivers/gpu/drm/xe/xe_range_fence.c
index d35d9ec58e86..372378e89e98 100644
--- a/drivers/gpu/drm/xe/xe_range_fence.c
+++ b/drivers/gpu/drm/xe/xe_range_fence.c
@@ -151,6 +151,11 @@ xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last)
return xe_range_fence_tree_iter_next(rfence, start, last);
}
+static void xe_range_fence_free(struct xe_range_fence *rfence)
+{
+ kfree(rfence);
+}
+
const struct xe_range_fence_ops xe_range_fence_kfree_ops = {
- .free = (void (*)(struct xe_range_fence *rfence)) kfree,
+ .free = xe_range_fence_free,
};
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 01106a1156ad..4e2ccad0e52f 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -274,7 +274,6 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
struct dma_fence *fence;
fence = xe_exec_queue_last_fence_get(job->q, vm);
- dma_fence_get(fence);
return drm_sched_job_add_dependency(&job->drm, fence);
}
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index e4c220cf9115..02c9577fe418 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -19,7 +19,7 @@
#include "xe_macros.h"
#include "xe_sched_job_types.h"
-struct user_fence {
+struct xe_user_fence {
struct xe_device *xe;
struct kref refcount;
struct dma_fence_cb cb;
@@ -27,31 +27,32 @@ struct user_fence {
struct mm_struct *mm;
u64 __user *addr;
u64 value;
+ int signalled;
};
static void user_fence_destroy(struct kref *kref)
{
- struct user_fence *ufence = container_of(kref, struct user_fence,
+ struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence,
refcount);
mmdrop(ufence->mm);
kfree(ufence);
}
-static void user_fence_get(struct user_fence *ufence)
+static void user_fence_get(struct xe_user_fence *ufence)
{
kref_get(&ufence->refcount);
}
-static void user_fence_put(struct user_fence *ufence)
+static void user_fence_put(struct xe_user_fence *ufence)
{
kref_put(&ufence->refcount, user_fence_destroy);
}
-static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr,
- u64 value)
+static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
+ u64 value)
{
- struct user_fence *ufence;
+ struct xe_user_fence *ufence;
ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
if (!ufence)
@@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr,
static void user_fence_worker(struct work_struct *w)
{
- struct user_fence *ufence = container_of(w, struct user_fence, worker);
+ struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker);
if (mmget_not_zero(ufence->mm)) {
kthread_use_mm(ufence->mm);
@@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w)
}
wake_up_all(&ufence->xe->ufence_wq);
+ WRITE_ONCE(ufence->signalled, 1);
user_fence_put(ufence);
}
-static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence)
+static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence)
{
INIT_WORK(&ufence->worker, user_fence_worker);
queue_work(ufence->xe->ordered_wq, &ufence->worker);
@@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence)
static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
{
- struct user_fence *ufence = container_of(cb, struct user_fence, cb);
+ struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb);
kick_ufence(ufence, fence);
}
@@ -307,7 +309,6 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
/* Easy case... */
if (!num_in_fence) {
fence = xe_exec_queue_last_fence_get(q, vm);
- dma_fence_get(fence);
return fence;
}
@@ -322,7 +323,6 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
}
}
fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm);
- dma_fence_get(fences[current_fence - 1]);
cf = dma_fence_array_create(num_in_fence, fences,
vm->composite_fence_ctx,
vm->composite_fence_seqno++,
@@ -342,3 +342,39 @@ err_out:
return ERR_PTR(-ENOMEM);
}
+
+/**
+ * xe_sync_ufence_get() - Get user fence from sync
+ * @sync: input sync
+ *
+ * Get a user fence reference from sync.
+ *
+ * Return: xe_user_fence pointer with reference
+ */
+struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync)
+{
+ user_fence_get(sync->ufence);
+
+ return sync->ufence;
+}
+
+/**
+ * xe_sync_ufence_put() - Put user fence reference
+ * @ufence: user fence reference
+ *
+ */
+void xe_sync_ufence_put(struct xe_user_fence *ufence)
+{
+ user_fence_put(ufence);
+}
+
+/**
+ * xe_sync_ufence_get_status() - Get user fence status
+ * @ufence: user fence
+ *
+ * Return: 1 if signalled, 0 not signalled, <0 on error
+ */
+int xe_sync_ufence_get_status(struct xe_user_fence *ufence)
+{
+ return READ_ONCE(ufence->signalled);
+}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index d284afbe917c..0fd0d51208e6 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -33,4 +33,13 @@ struct dma_fence *
xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
struct xe_exec_queue *q, struct xe_vm *vm);
+static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync)
+{
+ return !!sync->ufence;
+}
+
+struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync);
+void xe_sync_ufence_put(struct xe_user_fence *ufence);
+int xe_sync_ufence_get_status(struct xe_user_fence *ufence);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h
index 852db5e7884f..30ac3f51993b 100644
--- a/drivers/gpu/drm/xe/xe_sync_types.h
+++ b/drivers/gpu/drm/xe/xe_sync_types.h
@@ -18,7 +18,7 @@ struct xe_sync_entry {
struct drm_syncobj *syncobj;
struct dma_fence *fence;
struct dma_fence_chain *chain_fence;
- struct user_fence *ufence;
+ struct xe_user_fence *ufence;
u64 addr;
u64 timeline_value;
u32 type;
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 044c20881de7..0650b2fa75ef 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -167,9 +167,10 @@ int xe_tile_init_noalloc(struct xe_tile *tile)
goto err_mem_access;
tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16);
- if (IS_ERR(tile->mem.kernel_bb_pool))
+ if (IS_ERR(tile->mem.kernel_bb_pool)) {
err = PTR_ERR(tile->mem.kernel_bb_pool);
-
+ goto err_mem_access;
+ }
xe_wa_apply_tile_workarounds(tile);
xe_tile_sysfs_init(tile);
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 95163c303f3e..4ddc55527f9a 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -12,6 +12,7 @@
#include <linux/tracepoint.h>
#include <linux/types.h>
+#include "xe_bo.h"
#include "xe_bo_types.h"
#include "xe_exec_queue_types.h"
#include "xe_gpu_scheduler_types.h"
@@ -26,16 +27,16 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
TP_ARGS(fence),
TP_STRUCT__entry(
- __field(u64, fence)
+ __field(struct xe_gt_tlb_invalidation_fence *, fence)
__field(int, seqno)
),
TP_fast_assign(
- __entry->fence = (u64)fence;
+ __entry->fence = fence;
__entry->seqno = fence->seqno;
),
- TP_printk("fence=0x%016llx, seqno=%d",
+ TP_printk("fence=%p, seqno=%d",
__entry->fence, __entry->seqno)
);
@@ -82,16 +83,16 @@ DECLARE_EVENT_CLASS(xe_bo,
TP_STRUCT__entry(
__field(size_t, size)
__field(u32, flags)
- __field(u64, vm)
+ __field(struct xe_vm *, vm)
),
TP_fast_assign(
__entry->size = bo->size;
__entry->flags = bo->flags;
- __entry->vm = (unsigned long)bo->vm;
+ __entry->vm = bo->vm;
),
- TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx",
+ TP_printk("size=%zu, flags=0x%02x, vm=%p",
__entry->size, __entry->flags, __entry->vm)
);
@@ -100,9 +101,31 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault,
TP_ARGS(bo)
);
-DEFINE_EVENT(xe_bo, xe_bo_move,
- TP_PROTO(struct xe_bo *bo),
- TP_ARGS(bo)
+TRACE_EVENT(xe_bo_move,
+ TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement,
+ bool move_lacks_source),
+ TP_ARGS(bo, new_placement, old_placement, move_lacks_source),
+ TP_STRUCT__entry(
+ __field(struct xe_bo *, bo)
+ __field(size_t, size)
+ __field(u32, new_placement)
+ __field(u32, old_placement)
+ __array(char, device_id, 12)
+ __field(bool, move_lacks_source)
+ ),
+
+ TP_fast_assign(
+ __entry->bo = bo;
+ __entry->size = bo->size;
+ __entry->new_placement = new_placement;
+ __entry->old_placement = old_placement;
+ strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12);
+ __entry->move_lacks_source = move_lacks_source;
+ ),
+ TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s",
+ __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size,
+ xe_mem_type_to_name[__entry->old_placement],
+ xe_mem_type_to_name[__entry->new_placement], __entry->device_id)
);
DECLARE_EVENT_CLASS(xe_exec_queue,
@@ -327,16 +350,16 @@ DECLARE_EVENT_CLASS(xe_hw_fence,
TP_STRUCT__entry(
__field(u64, ctx)
__field(u32, seqno)
- __field(u64, fence)
+ __field(struct xe_hw_fence *, fence)
),
TP_fast_assign(
__entry->ctx = fence->dma.context;
__entry->seqno = fence->dma.seqno;
- __entry->fence = (unsigned long)fence;
+ __entry->fence = fence;
),
- TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u",
+ TP_printk("ctx=0x%016llx, fence=%p, seqno=%u",
__entry->ctx, __entry->fence, __entry->seqno)
);
@@ -365,7 +388,7 @@ DECLARE_EVENT_CLASS(xe_vma,
TP_ARGS(vma),
TP_STRUCT__entry(
- __field(u64, vma)
+ __field(struct xe_vma *, vma)
__field(u32, asid)
__field(u64, start)
__field(u64, end)
@@ -373,14 +396,14 @@ DECLARE_EVENT_CLASS(xe_vma,
),
TP_fast_assign(
- __entry->vma = (unsigned long)vma;
+ __entry->vma = vma;
__entry->asid = xe_vma_vm(vma)->usm.asid;
__entry->start = xe_vma_start(vma);
__entry->end = xe_vma_end(vma) - 1;
__entry->ptr = xe_vma_userptr(vma);
),
- TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,",
+ TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,",
__entry->vma, __entry->asid, __entry->start,
__entry->end, __entry->ptr)
)
@@ -465,16 +488,16 @@ DECLARE_EVENT_CLASS(xe_vm,
TP_ARGS(vm),
TP_STRUCT__entry(
- __field(u64, vm)
+ __field(struct xe_vm *, vm)
__field(u32, asid)
),
TP_fast_assign(
- __entry->vm = (unsigned long)vm;
+ __entry->vm = vm;
__entry->asid = vm->usm.asid;
),
- TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm,
+ TP_printk("vm=%p, asid=0x%05x", __entry->vm,
__entry->asid)
);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 10b6995fbf29..3b21afe5b488 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -37,8 +37,6 @@
#include "generated/xe_wa_oob.h"
#include "xe_wa.h"
-#define TEST_VM_ASYNC_OPS_ERROR
-
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
{
return vm->gpuvm.r_obj;
@@ -46,7 +44,7 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
/**
* xe_vma_userptr_check_repin() - Advisory check for repin needed
- * @vma: The userptr vma
+ * @uvma: The userptr vma
*
* Check if the userptr vma has been invalidated since last successful
* repin. The check is advisory only and can the function can be called
@@ -56,15 +54,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
*
* Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
*/
-int xe_vma_userptr_check_repin(struct xe_vma *vma)
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
{
- return mmu_interval_check_retry(&vma->userptr.notifier,
- vma->userptr.notifier_seq) ?
+ return mmu_interval_check_retry(&uvma->userptr.notifier,
+ uvma->userptr.notifier_seq) ?
-EAGAIN : 0;
}
-int xe_vma_userptr_pin_pages(struct xe_vma *vma)
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
{
+ struct xe_userptr *userptr = &uvma->userptr;
+ struct xe_vma *vma = &uvma->vma;
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_device *xe = vm->xe;
const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
@@ -80,30 +80,30 @@ retry:
if (vma->gpuva.flags & XE_VMA_DESTROYED)
return 0;
- notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
- if (notifier_seq == vma->userptr.notifier_seq)
+ notifier_seq = mmu_interval_read_begin(&userptr->notifier);
+ if (notifier_seq == userptr->notifier_seq)
return 0;
pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
if (!pages)
return -ENOMEM;
- if (vma->userptr.sg) {
+ if (userptr->sg) {
dma_unmap_sgtable(xe->drm.dev,
- vma->userptr.sg,
+ userptr->sg,
read_only ? DMA_TO_DEVICE :
DMA_BIDIRECTIONAL, 0);
- sg_free_table(vma->userptr.sg);
- vma->userptr.sg = NULL;
+ sg_free_table(userptr->sg);
+ userptr->sg = NULL;
}
pinned = ret = 0;
if (in_kthread) {
- if (!mmget_not_zero(vma->userptr.notifier.mm)) {
+ if (!mmget_not_zero(userptr->notifier.mm)) {
ret = -EFAULT;
goto mm_closed;
}
- kthread_use_mm(vma->userptr.notifier.mm);
+ kthread_use_mm(userptr->notifier.mm);
}
while (pinned < num_pages) {
@@ -112,43 +112,40 @@ retry:
num_pages - pinned,
read_only ? 0 : FOLL_WRITE,
&pages[pinned]);
- if (ret < 0) {
- if (in_kthread)
- ret = 0;
+ if (ret < 0)
break;
- }
pinned += ret;
ret = 0;
}
if (in_kthread) {
- kthread_unuse_mm(vma->userptr.notifier.mm);
- mmput(vma->userptr.notifier.mm);
+ kthread_unuse_mm(userptr->notifier.mm);
+ mmput(userptr->notifier.mm);
}
mm_closed:
if (ret)
goto out;
- ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages,
+ ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages,
pinned, 0,
(u64)pinned << PAGE_SHIFT,
xe_sg_segment_size(xe->drm.dev),
GFP_KERNEL);
if (ret) {
- vma->userptr.sg = NULL;
+ userptr->sg = NULL;
goto out;
}
- vma->userptr.sg = &vma->userptr.sgt;
+ userptr->sg = &userptr->sgt;
- ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
+ ret = dma_map_sgtable(xe->drm.dev, userptr->sg,
read_only ? DMA_TO_DEVICE :
DMA_BIDIRECTIONAL,
DMA_ATTR_SKIP_CPU_SYNC |
DMA_ATTR_NO_KERNEL_MAPPING);
if (ret) {
- sg_free_table(vma->userptr.sg);
- vma->userptr.sg = NULL;
+ sg_free_table(userptr->sg);
+ userptr->sg = NULL;
goto out;
}
@@ -167,8 +164,8 @@ out:
kvfree(pages);
if (!(ret < 0)) {
- vma->userptr.notifier_seq = notifier_seq;
- if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
+ userptr->notifier_seq = notifier_seq;
+ if (xe_vma_userptr_check_repin(uvma) == -EAGAIN)
goto retry;
}
@@ -635,7 +632,9 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
{
- struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
+ struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
+ struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
+ struct xe_vma *vma = &uvma->vma;
struct xe_vm *vm = xe_vma_vm(vma);
struct dma_resv_iter cursor;
struct dma_fence *fence;
@@ -651,7 +650,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
mmu_interval_set_seq(mni, cur_seq);
/* No need to stop gpu access if the userptr is not yet bound. */
- if (!vma->userptr.initial_bind) {
+ if (!userptr->initial_bind) {
up_write(&vm->userptr.notifier_lock);
return true;
}
@@ -663,7 +662,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
if (!xe_vm_in_fault_mode(vm) &&
!(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
spin_lock(&vm->userptr.invalidated_lock);
- list_move_tail(&vma->userptr.invalidate_link,
+ list_move_tail(&userptr->invalidate_link,
&vm->userptr.invalidated);
spin_unlock(&vm->userptr.invalidated_lock);
}
@@ -703,7 +702,7 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
int xe_vm_userptr_pin(struct xe_vm *vm)
{
- struct xe_vma *vma, *next;
+ struct xe_userptr_vma *uvma, *next;
int err = 0;
LIST_HEAD(tmp_evict);
@@ -711,22 +710,23 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
/* Collect invalidated userptrs */
spin_lock(&vm->userptr.invalidated_lock);
- list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
+ list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
userptr.invalidate_link) {
- list_del_init(&vma->userptr.invalidate_link);
- list_move_tail(&vma->combined_links.userptr,
+ list_del_init(&uvma->userptr.invalidate_link);
+ list_move_tail(&uvma->userptr.repin_link,
&vm->userptr.repin_list);
}
spin_unlock(&vm->userptr.invalidated_lock);
/* Pin and move to temporary list */
- list_for_each_entry_safe(vma, next, &vm->userptr.repin_list,
- combined_links.userptr) {
- err = xe_vma_userptr_pin_pages(vma);
+ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+ userptr.repin_link) {
+ err = xe_vma_userptr_pin_pages(uvma);
if (err < 0)
return err;
- list_move_tail(&vma->combined_links.userptr, &vm->rebind_list);
+ list_del_init(&uvma->userptr.repin_link);
+ list_move_tail(&uvma->vma.combined_links.rebind, &vm->rebind_list);
}
return 0;
@@ -782,6 +782,14 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
return fence;
}
+static void xe_vma_free(struct xe_vma *vma)
+{
+ if (xe_vma_is_userptr(vma))
+ kfree(to_userptr_vma(vma));
+ else
+ kfree(vma);
+}
+
#define VMA_CREATE_FLAG_READ_ONLY BIT(0)
#define VMA_CREATE_FLAG_IS_NULL BIT(1)
@@ -800,14 +808,26 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
xe_assert(vm->xe, start < end);
xe_assert(vm->xe, end < vm->size);
- if (!bo && !is_null) /* userptr */
+ /*
+ * Allocate and ensure that the xe_vma_is_userptr() return
+ * matches what was allocated.
+ */
+ if (!bo && !is_null) {
+ struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
+
+ if (!uvma)
+ return ERR_PTR(-ENOMEM);
+
+ vma = &uvma->vma;
+ } else {
vma = kzalloc(sizeof(*vma), GFP_KERNEL);
- else
- vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr),
- GFP_KERNEL);
- if (!vma) {
- vma = ERR_PTR(-ENOMEM);
- return vma;
+ if (!vma)
+ return ERR_PTR(-ENOMEM);
+
+ if (is_null)
+ vma->gpuva.flags |= DRM_GPUVA_SPARSE;
+ if (bo)
+ vma->gpuva.gem.obj = &bo->ttm.base;
}
INIT_LIST_HEAD(&vma->combined_links.rebind);
@@ -818,8 +838,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
vma->gpuva.va.range = end - start + 1;
if (read_only)
vma->gpuva.flags |= XE_VMA_READ_ONLY;
- if (is_null)
- vma->gpuva.flags |= DRM_GPUVA_SPARSE;
for_each_tile(tile, vm->xe, id)
vma->tile_mask |= 0x1 << id;
@@ -836,35 +854,35 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
if (IS_ERR(vm_bo)) {
- kfree(vma);
+ xe_vma_free(vma);
return ERR_CAST(vm_bo);
}
drm_gpuvm_bo_extobj_add(vm_bo);
drm_gem_object_get(&bo->ttm.base);
- vma->gpuva.gem.obj = &bo->ttm.base;
vma->gpuva.gem.offset = bo_offset_or_userptr;
drm_gpuva_link(&vma->gpuva, vm_bo);
drm_gpuvm_bo_put(vm_bo);
} else /* userptr or null */ {
if (!is_null) {
+ struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
u64 size = end - start + 1;
int err;
- INIT_LIST_HEAD(&vma->userptr.invalidate_link);
+ INIT_LIST_HEAD(&userptr->invalidate_link);
+ INIT_LIST_HEAD(&userptr->repin_link);
vma->gpuva.gem.offset = bo_offset_or_userptr;
- err = mmu_interval_notifier_insert(&vma->userptr.notifier,
+ err = mmu_interval_notifier_insert(&userptr->notifier,
current->mm,
xe_vma_userptr(vma), size,
&vma_userptr_notifier_ops);
if (err) {
- kfree(vma);
- vma = ERR_PTR(err);
- return vma;
+ xe_vma_free(vma);
+ return ERR_PTR(err);
}
- vma->userptr.notifier_seq = LONG_MAX;
+ userptr->notifier_seq = LONG_MAX;
}
xe_vm_get(vm);
@@ -879,14 +897,21 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
struct xe_device *xe = vm->xe;
bool read_only = xe_vma_read_only(vma);
+ if (vma->ufence) {
+ xe_sync_ufence_put(vma->ufence);
+ vma->ufence = NULL;
+ }
+
if (xe_vma_is_userptr(vma)) {
- if (vma->userptr.sg) {
+ struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+
+ if (userptr->sg) {
dma_unmap_sgtable(xe->drm.dev,
- vma->userptr.sg,
+ userptr->sg,
read_only ? DMA_TO_DEVICE :
DMA_BIDIRECTIONAL, 0);
- sg_free_table(vma->userptr.sg);
- vma->userptr.sg = NULL;
+ sg_free_table(userptr->sg);
+ userptr->sg = NULL;
}
/*
@@ -894,7 +919,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
* the notifer until we're sure the GPU is not accessing
* them anymore
*/
- mmu_interval_notifier_remove(&vma->userptr.notifier);
+ mmu_interval_notifier_remove(&userptr->notifier);
xe_vm_put(vm);
} else if (xe_vma_is_null(vma)) {
xe_vm_put(vm);
@@ -902,7 +927,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
xe_bo_put(xe_vma_bo(vma));
}
- kfree(vma);
+ xe_vma_free(vma);
}
static void vma_destroy_work_func(struct work_struct *w)
@@ -933,7 +958,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
spin_lock(&vm->userptr.invalidated_lock);
- list_del(&vma->userptr.invalidate_link);
+ list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
spin_unlock(&vm->userptr.invalidated_lock);
} else if (!xe_vma_is_null(vma)) {
xe_bo_assert_held(xe_vma_bo(vma));
@@ -975,9 +1000,16 @@ int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
int err;
XE_WARN_ON(!vm);
- err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
- if (!err && bo && !bo->vm)
- err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
+ if (num_shared)
+ err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
+ else
+ err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+ if (!err && bo && !bo->vm) {
+ if (num_shared)
+ err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
+ else
+ err = drm_exec_lock_obj(exec, &bo->ttm.base);
+ }
return err;
}
@@ -1581,6 +1613,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
trace_xe_vma_unbind(vma);
+ if (vma->ufence) {
+ struct xe_user_fence * const f = vma->ufence;
+
+ if (!xe_sync_ufence_get_status(f))
+ return ERR_PTR(-EBUSY);
+
+ vma->ufence = NULL;
+ xe_sync_ufence_put(f);
+ }
+
if (number_tiles > 1) {
fences = kmalloc_array(number_tiles, sizeof(*fences),
GFP_KERNEL);
@@ -1714,6 +1756,21 @@ err_fences:
return ERR_PTR(err);
}
+static struct xe_user_fence *
+find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_syncs; i++) {
+ struct xe_sync_entry *e = &syncs[i];
+
+ if (xe_sync_is_ufence(e))
+ return xe_sync_ufence_get(e);
+ }
+
+ return NULL;
+}
+
static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
struct xe_exec_queue *q, struct xe_sync_entry *syncs,
u32 num_syncs, bool immediate, bool first_op,
@@ -1721,9 +1778,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
{
struct dma_fence *fence;
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+ struct xe_user_fence *ufence;
xe_vm_assert_held(vm);
+ ufence = find_ufence_get(syncs, num_syncs);
+ if (vma->ufence && ufence)
+ xe_sync_ufence_put(vma->ufence);
+
+ vma->ufence = ufence ?: vma->ufence;
+
if (immediate) {
fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
last_op);
@@ -1855,10 +1919,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
mutex_lock(&xef->vm.lock);
err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
mutex_unlock(&xef->vm.lock);
- if (err) {
- xe_vm_close_and_put(vm);
- return err;
- }
+ if (err)
+ goto err_close_and_put;
if (xe->info.has_asid) {
mutex_lock(&xe->usm.lock);
@@ -1866,11 +1928,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
XA_LIMIT(1, XE_MAX_ASID - 1),
&xe->usm.next_asid, GFP_KERNEL);
mutex_unlock(&xe->usm.lock);
- if (err < 0) {
- xe_vm_close_and_put(vm);
- return err;
- }
- err = 0;
+ if (err < 0)
+ goto err_free_id;
+
vm->usm.asid = asid;
}
@@ -1888,6 +1948,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
#endif
return 0;
+
+err_free_id:
+ mutex_lock(&xef->vm.lock);
+ xa_erase(&xef->vm.xa, id);
+ mutex_unlock(&xef->vm.lock);
+err_close_and_put:
+ xe_vm_close_and_put(vm);
+
+ return err;
}
int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
@@ -1954,6 +2023,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
xe_exec_queue_last_fence_get(wait_exec_queue, vm);
xe_sync_entry_signal(&syncs[i], NULL, fence);
+ dma_fence_put(fence);
}
}
@@ -2034,7 +2104,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
struct drm_gpuva_ops *ops;
struct drm_gpuva_op *__op;
- struct xe_vma_op *op;
struct drm_gpuvm_bo *vm_bo;
int err;
@@ -2081,23 +2150,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
if (IS_ERR(ops))
return ops;
-#ifdef TEST_VM_ASYNC_OPS_ERROR
- if (operation & FORCE_ASYNC_OP_ERROR) {
- op = list_first_entry_or_null(&ops->list, struct xe_vma_op,
- base.entry);
- if (op)
- op->inject_error = true;
- }
-#endif
-
drm_gpuva_for_each_op(__op, ops) {
struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
if (__op->op == DRM_GPUVA_OP_MAP) {
- op->map.immediate =
- flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
- op->map.read_only =
- flags & DRM_XE_VM_BIND_FLAG_READONLY;
op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
op->map.pat_index = pat_index;
} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
@@ -2145,7 +2201,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
drm_exec_fini(&exec);
if (xe_vma_is_userptr(vma)) {
- err = xe_vma_userptr_pin_pages(vma);
+ err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
if (err) {
prep_vma_destroy(vm, vma, false);
xe_vma_destroy_unlocked(vma);
@@ -2167,13 +2223,17 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma)
{
if (vma->gpuva.flags & XE_VMA_PTE_1G)
return SZ_1G;
- else if (vma->gpuva.flags & XE_VMA_PTE_2M)
+ else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
return SZ_2M;
+ else if (vma->gpuva.flags & XE_VMA_PTE_64K)
+ return SZ_64K;
+ else if (vma->gpuva.flags & XE_VMA_PTE_4K)
+ return SZ_4K;
- return SZ_4K;
+ return SZ_1G; /* Uninitialized, used max size */
}
-static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
+static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
{
switch (size) {
case SZ_1G:
@@ -2182,9 +2242,13 @@ static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
case SZ_2M:
vma->gpuva.flags |= XE_VMA_PTE_2M;
break;
+ case SZ_64K:
+ vma->gpuva.flags |= XE_VMA_PTE_64K;
+ break;
+ case SZ_4K:
+ vma->gpuva.flags |= XE_VMA_PTE_4K;
+ break;
}
-
- return SZ_4K;
}
static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
@@ -2282,8 +2346,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
{
- flags |= op->map.read_only ?
- VMA_CREATE_FLAG_READ_ONLY : 0;
flags |= op->map.is_null ?
VMA_CREATE_FLAG_IS_NULL : 0;
@@ -2414,7 +2476,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
case DRM_GPUVA_OP_MAP:
err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
op->syncs, op->num_syncs,
- op->map.immediate || !xe_vm_in_fault_mode(vm),
+ !xe_vm_in_fault_mode(vm),
op->flags & XE_VMA_OP_FIRST,
op->flags & XE_VMA_OP_LAST);
break;
@@ -2500,13 +2562,25 @@ retry_userptr:
}
drm_exec_fini(&exec);
- if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
+ if (err == -EAGAIN) {
lockdep_assert_held_write(&vm->lock);
- err = xe_vma_userptr_pin_pages(vma);
- if (!err)
- goto retry_userptr;
- trace_xe_vma_fail(vma);
+ if (op->base.op == DRM_GPUVA_OP_REMAP) {
+ if (!op->remap.unmap_done)
+ vma = gpuva_to_vma(op->base.remap.unmap->va);
+ else if (op->remap.prev)
+ vma = op->remap.prev;
+ else
+ vma = op->remap.next;
+ }
+
+ if (xe_vma_is_userptr(vma)) {
+ err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+ if (!err)
+ goto retry_userptr;
+
+ trace_xe_vma_fail(vma);
+ }
}
return err;
@@ -2518,13 +2592,6 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
lockdep_assert_held_write(&vm->lock);
-#ifdef TEST_VM_ASYNC_OPS_ERROR
- if (op->inject_error) {
- op->inject_error = false;
- return -ENOMEM;
- }
-#endif
-
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
ret = __xe_vma_op_execute(vm, op->map.vma, op);
@@ -2639,7 +2706,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
{
int i;
- for (i = num_ops_list - 1; i; ++i) {
+ for (i = num_ops_list - 1; i >= 0; --i) {
struct drm_gpuva_ops *__ops = ops[i];
struct drm_gpuva_op *__op;
@@ -2684,21 +2751,11 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
return 0;
}
-#ifdef TEST_VM_ASYNC_OPS_ERROR
-#define SUPPORTED_FLAGS \
- (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \
- DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
-#else
-#define SUPPORTED_FLAGS \
- (DRM_XE_VM_BIND_FLAG_READONLY | \
- DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \
- 0xffff)
-#endif
+#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \
+ DRM_XE_VM_BIND_FLAG_DUMPABLE)
#define XE_64K_PAGE_MASK 0xffffull
#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
-#define MAX_BINDS 512 /* FIXME: Picking random upper limit */
-
static int vm_bind_ioctl_check_args(struct xe_device *xe,
struct drm_xe_vm_bind *args,
struct drm_xe_vm_bind_op **bind_ops)
@@ -2710,16 +2767,16 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
return -EINVAL;
- if (XE_IOCTL_DBG(xe, args->extensions) ||
- XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
+ if (XE_IOCTL_DBG(xe, args->extensions))
return -EINVAL;
if (args->num_binds > 1) {
u64 __user *bind_user =
u64_to_user_ptr(args->vector_of_binds);
- *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
- args->num_binds, GFP_KERNEL);
+ *bind_ops = kvmalloc_array(args->num_binds,
+ sizeof(struct drm_xe_vm_bind_op),
+ GFP_KERNEL | __GFP_ACCOUNT);
if (!*bind_ops)
return -ENOMEM;
@@ -2809,7 +2866,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
free_bind_ops:
if (args->num_binds > 1)
- kfree(*bind_ops);
+ kvfree(*bind_ops);
return err;
}
@@ -2846,7 +2903,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct drm_gpuva_ops **ops = NULL;
struct xe_vm *vm;
struct xe_exec_queue *q = NULL;
- u32 num_syncs;
+ u32 num_syncs, num_ufence = 0;
struct xe_sync_entry *syncs = NULL;
struct drm_xe_vm_bind_op *bind_ops;
LIST_HEAD(ops_list);
@@ -2897,13 +2954,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
}
if (args->num_binds) {
- bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL);
+ bos = kvcalloc(args->num_binds, sizeof(*bos),
+ GFP_KERNEL | __GFP_ACCOUNT);
if (!bos) {
err = -ENOMEM;
goto release_vm_lock;
}
- ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL);
+ ops = kvcalloc(args->num_binds, sizeof(*ops),
+ GFP_KERNEL | __GFP_ACCOUNT);
if (!ops) {
err = -ENOMEM;
goto release_vm_lock;
@@ -2983,6 +3042,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
if (err)
goto free_syncs;
+
+ if (xe_sync_is_ufence(&syncs[num_syncs]))
+ num_ufence++;
+ }
+
+ if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
+ err = -EINVAL;
+ goto free_syncs;
}
if (!args->num_binds) {
@@ -3036,10 +3103,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
for (i = 0; bos && i < args->num_binds; ++i)
xe_bo_put(bos[i]);
- kfree(bos);
- kfree(ops);
+ kvfree(bos);
+ kvfree(ops);
if (args->num_binds > 1)
- kfree(bind_ops);
+ kvfree(bind_ops);
return err;
@@ -3063,10 +3130,10 @@ put_exec_queue:
if (q)
xe_exec_queue_put(q);
free_objs:
- kfree(bos);
- kfree(ops);
+ kvfree(bos);
+ kvfree(ops);
if (args->num_binds > 1)
- kfree(bind_ops);
+ kvfree(bind_ops);
return err;
}
@@ -3125,8 +3192,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
if (xe_vma_is_userptr(vma)) {
WARN_ON_ONCE(!mmu_interval_check_retry
- (&vma->userptr.notifier,
- vma->userptr.notifier_seq));
+ (&to_userptr_vma(vma)->userptr.notifier,
+ to_userptr_vma(vma)->userptr.notifier_seq));
WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
DMA_RESV_USAGE_BOOKKEEP));
@@ -3187,11 +3254,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
if (is_null) {
addr = 0;
} else if (is_userptr) {
+ struct sg_table *sg = to_userptr_vma(vma)->userptr.sg;
struct xe_res_cursor cur;
- if (vma->userptr.sg) {
- xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
- &cur);
+ if (sg) {
+ xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur);
addr = xe_res_dma(&cur);
} else {
addr = 0;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index cf2f96e8c1ab..9654a0612fc2 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -160,6 +160,18 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma)
return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma);
}
+/**
+ * to_userptr_vma() - Return a pointer to an embedding userptr vma
+ * @vma: Pointer to the embedded struct xe_vma
+ *
+ * Return: Pointer to the embedding userptr vma
+ */
+static inline struct xe_userptr_vma *to_userptr_vma(struct xe_vma *vma)
+{
+ xe_assert(xe_vma_vm(vma)->xe, xe_vma_is_userptr(vma));
+ return container_of(vma, struct xe_userptr_vma, vma);
+}
+
u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile);
int xe_vm_create_ioctl(struct drm_device *dev, void *data,
@@ -224,9 +236,9 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
}
}
-int xe_vma_userptr_pin_pages(struct xe_vma *vma);
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
-int xe_vma_userptr_check_repin(struct xe_vma *vma);
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 63e8a50b88e9..7300eea5394b 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -19,11 +19,9 @@
struct xe_bo;
struct xe_sync_entry;
+struct xe_user_fence;
struct xe_vm;
-#define TEST_VM_ASYNC_OPS_ERROR
-#define FORCE_ASYNC_OP_ERROR BIT(31)
-
#define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS
#define XE_VMA_DESTROYED (DRM_GPUVA_USERBITS << 1)
#define XE_VMA_ATOMIC_PTE_BIT (DRM_GPUVA_USERBITS << 2)
@@ -32,11 +30,15 @@ struct xe_vm;
#define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 5)
#define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6)
#define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7)
+#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 8)
+#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 9)
/** struct xe_userptr - User pointer */
struct xe_userptr {
/** @invalidate_link: Link for the vm::userptr.invalidated list */
struct list_head invalidate_link;
+ /** @userptr: link into VM repin list if userptr. */
+ struct list_head repin_link;
/**
* @notifier: MMU notifier for user pointer (invalidation call back)
*/
@@ -68,8 +70,6 @@ struct xe_vma {
* resv.
*/
union {
- /** @userptr: link into VM repin list if userptr. */
- struct list_head userptr;
/** @rebind: link into VM if this VMA needs rebinding. */
struct list_head rebind;
/** @destroy: link to contested list when VM is being closed. */
@@ -107,9 +107,19 @@ struct xe_vma {
u16 pat_index;
/**
- * @userptr: user pointer state, only allocated for VMAs that are
- * user pointers
+ * @ufence: The user fence that was provided with MAP.
+ * Needs to be signalled before UNMAP can be processed.
*/
+ struct xe_user_fence *ufence;
+};
+
+/**
+ * struct xe_userptr_vma - A userptr vma subclass
+ * @vma: The vma.
+ * @userptr: Additional userptr information.
+ */
+struct xe_userptr_vma {
+ struct xe_vma vma;
struct xe_userptr userptr;
};
@@ -285,10 +295,6 @@ struct xe_vm {
struct xe_vma_op_map {
/** @vma: VMA to map */
struct xe_vma *vma;
- /** @immediate: Immediate bind */
- bool immediate;
- /** @read_only: Read only */
- bool read_only;
/** @is_null: is NULL binding */
bool is_null;
/** @pat_index: The pat index to use for this operation. */
@@ -356,11 +362,6 @@ struct xe_vma_op {
/** @flags: operation flags */
enum xe_vma_op_flags flags;
-#ifdef TEST_VM_ASYNC_OPS_ERROR
- /** @inject_error: inject error to test async op error handling */
- bool inject_error;
-#endif
-
union {
/** @map: VMA map operation specific data */
struct xe_vma_op_map map;
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 42fd504abbcd..89983d7d73ca 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -169,6 +169,7 @@ static const struct host1x_info host1x06_info = {
.num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
.sid_table = tegra186_sid_table,
.reserve_vblank_syncpts = false,
+ .skip_reset_assert = true,
};
static const struct host1x_sid_entry tegra194_sid_table[] = {
@@ -680,13 +681,15 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev)
host1x_intr_stop(host);
host1x_syncpt_save(host);
- err = reset_control_bulk_assert(host->nresets, host->resets);
- if (err) {
- dev_err(dev, "failed to assert reset: %d\n", err);
- goto resume_host1x;
- }
+ if (!host->info->skip_reset_assert) {
+ err = reset_control_bulk_assert(host->nresets, host->resets);
+ if (err) {
+ dev_err(dev, "failed to assert reset: %d\n", err);
+ goto resume_host1x;
+ }
- usleep_range(1000, 2000);
+ usleep_range(1000, 2000);
+ }
clk_disable_unprepare(host->clk);
reset_control_bulk_release(host->nresets, host->resets);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index c8e302de7625..925a118db23f 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -116,6 +116,12 @@ struct host1x_info {
* the display driver disables VBLANK increments.
*/
bool reserve_vblank_syncpts;
+ /*
+ * On Tegra186, secure world applications may require access to
+ * host1x during suspend/resume. To allow this, we need to leave
+ * host1x not in reset.
+ */
+ bool skip_reset_assert;
};
struct host1x {
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c
index d9ef45fcaeab..e630caf644e8 100644
--- a/drivers/hid/bpf/hid_bpf_dispatch.c
+++ b/drivers/hid/bpf/hid_bpf_dispatch.c
@@ -143,6 +143,9 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s
}
EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup);
+/* Disables missing prototype warnings */
+__bpf_kfunc_start_defs();
+
/**
* hid_bpf_get_data - Get the kernel memory pointer associated with the context @ctx
*
@@ -152,7 +155,7 @@ EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup);
*
* @returns %NULL on error, an %__u8 memory pointer on success
*/
-noinline __u8 *
+__bpf_kfunc __u8 *
hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr_buf_size)
{
struct hid_bpf_ctx_kern *ctx_kern;
@@ -167,14 +170,15 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr
return ctx_kern->data + offset;
}
+__bpf_kfunc_end_defs();
/*
* The following set contains all functions we agree BPF programs
* can use.
*/
-BTF_SET8_START(hid_bpf_kfunc_ids)
+BTF_KFUNCS_START(hid_bpf_kfunc_ids)
BTF_ID_FLAGS(func, hid_bpf_get_data, KF_RET_NULL)
-BTF_SET8_END(hid_bpf_kfunc_ids)
+BTF_KFUNCS_END(hid_bpf_kfunc_ids)
static const struct btf_kfunc_id_set hid_bpf_kfunc_set = {
.owner = THIS_MODULE,
@@ -241,6 +245,42 @@ int hid_bpf_reconnect(struct hid_device *hdev)
return 0;
}
+static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct bpf_prog *prog,
+ __u32 flags)
+{
+ int fd, err, prog_type;
+
+ prog_type = hid_bpf_get_prog_attach_type(prog);
+ if (prog_type < 0)
+ return prog_type;
+
+ if (prog_type >= HID_BPF_PROG_TYPE_MAX)
+ return -EINVAL;
+
+ if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) {
+ err = hid_bpf_allocate_event_data(hdev);
+ if (err)
+ return err;
+ }
+
+ fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, prog, flags);
+ if (fd < 0)
+ return fd;
+
+ if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) {
+ err = hid_bpf_reconnect(hdev);
+ if (err) {
+ close_fd(fd);
+ return err;
+ }
+ }
+
+ return fd;
+}
+
+/* Disables missing prototype warnings */
+__bpf_kfunc_start_defs();
+
/**
* hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device
*
@@ -253,22 +293,17 @@ int hid_bpf_reconnect(struct hid_device *hdev)
* is pinned to the BPF file system).
*/
/* called from syscall */
-noinline int
+__bpf_kfunc int
hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags)
{
struct hid_device *hdev;
+ struct bpf_prog *prog;
struct device *dev;
- int fd, err, prog_type = hid_bpf_get_prog_attach_type(prog_fd);
+ int err, fd;
if (!hid_bpf_ops)
return -EINVAL;
- if (prog_type < 0)
- return prog_type;
-
- if (prog_type >= HID_BPF_PROG_TYPE_MAX)
- return -EINVAL;
-
if ((flags & ~HID_BPF_FLAG_MASK))
return -EINVAL;
@@ -278,25 +313,29 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags)
hdev = to_hid_device(dev);
- if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) {
- err = hid_bpf_allocate_event_data(hdev);
- if (err)
- return err;
+ /*
+ * take a ref on the prog itself, it will be released
+ * on errors or when it'll be detached
+ */
+ prog = bpf_prog_get(prog_fd);
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto out_dev_put;
}
- fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, flags);
- if (fd < 0)
- return fd;
-
- if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) {
- err = hid_bpf_reconnect(hdev);
- if (err) {
- close_fd(fd);
- return err;
- }
+ fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags);
+ if (fd < 0) {
+ err = fd;
+ goto out_prog_put;
}
return fd;
+
+ out_prog_put:
+ bpf_prog_put(prog);
+ out_dev_put:
+ put_device(dev);
+ return err;
}
/**
@@ -306,7 +345,7 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags)
*
* @returns A pointer to &struct hid_bpf_ctx on success, %NULL on error.
*/
-noinline struct hid_bpf_ctx *
+__bpf_kfunc struct hid_bpf_ctx *
hid_bpf_allocate_context(unsigned int hid_id)
{
struct hid_device *hdev;
@@ -323,8 +362,10 @@ hid_bpf_allocate_context(unsigned int hid_id)
hdev = to_hid_device(dev);
ctx_kern = kzalloc(sizeof(*ctx_kern), GFP_KERNEL);
- if (!ctx_kern)
+ if (!ctx_kern) {
+ put_device(dev);
return NULL;
+ }
ctx_kern->ctx.hid = hdev;
@@ -337,14 +378,19 @@ hid_bpf_allocate_context(unsigned int hid_id)
* @ctx: the HID-BPF context to release
*
*/
-noinline void
+__bpf_kfunc void
hid_bpf_release_context(struct hid_bpf_ctx *ctx)
{
struct hid_bpf_ctx_kern *ctx_kern;
+ struct hid_device *hid;
ctx_kern = container_of(ctx, struct hid_bpf_ctx_kern, ctx);
+ hid = (struct hid_device *)ctx_kern->ctx.hid; /* ignore const */
kfree(ctx_kern);
+
+ /* get_device() is called by bus_find_device() */
+ put_device(&hid->dev);
}
/**
@@ -358,7 +404,7 @@ hid_bpf_release_context(struct hid_bpf_ctx *ctx)
*
* @returns %0 on success, a negative error code otherwise.
*/
-noinline int
+__bpf_kfunc int
hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz,
enum hid_report_type rtype, enum hid_class_request reqtype)
{
@@ -426,6 +472,7 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz,
kfree(dma_data);
return ret;
}
+__bpf_kfunc_end_defs();
/* our HID-BPF entrypoints */
BTF_SET8_START(hid_bpf_fmodret_ids)
@@ -440,12 +487,12 @@ static const struct btf_kfunc_id_set hid_bpf_fmodret_set = {
};
/* for syscall HID-BPF */
-BTF_SET8_START(hid_bpf_syscall_kfunc_ids)
+BTF_KFUNCS_START(hid_bpf_syscall_kfunc_ids)
BTF_ID_FLAGS(func, hid_bpf_attach_prog)
BTF_ID_FLAGS(func, hid_bpf_allocate_context, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, hid_bpf_release_context, KF_RELEASE)
BTF_ID_FLAGS(func, hid_bpf_hw_request)
-BTF_SET8_END(hid_bpf_syscall_kfunc_ids)
+BTF_KFUNCS_END(hid_bpf_syscall_kfunc_ids)
static const struct btf_kfunc_id_set hid_bpf_syscall_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.h b/drivers/hid/bpf/hid_bpf_dispatch.h
index 63dfc8605cd2..fbe0639d09f2 100644
--- a/drivers/hid/bpf/hid_bpf_dispatch.h
+++ b/drivers/hid/bpf/hid_bpf_dispatch.h
@@ -12,9 +12,9 @@ struct hid_bpf_ctx_kern {
int hid_bpf_preload_skel(void);
void hid_bpf_free_links_and_skel(void);
-int hid_bpf_get_prog_attach_type(int prog_fd);
+int hid_bpf_get_prog_attach_type(struct bpf_prog *prog);
int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, int prog_fd,
- __u32 flags);
+ struct bpf_prog *prog, __u32 flags);
void __hid_bpf_destroy_device(struct hid_device *hdev);
int hid_bpf_prog_run(struct hid_device *hdev, enum hid_bpf_prog_type type,
struct hid_bpf_ctx_kern *ctx_kern);
diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c
index eca34b7372f9..aa8e1c79cdf5 100644
--- a/drivers/hid/bpf/hid_bpf_jmp_table.c
+++ b/drivers/hid/bpf/hid_bpf_jmp_table.c
@@ -196,6 +196,7 @@ static void __hid_bpf_do_release_prog(int map_fd, unsigned int idx)
static void hid_bpf_release_progs(struct work_struct *work)
{
int i, j, n, map_fd = -1;
+ bool hdev_destroyed;
if (!jmp_table.map)
return;
@@ -220,6 +221,12 @@ static void hid_bpf_release_progs(struct work_struct *work)
if (entry->hdev) {
hdev = entry->hdev;
type = entry->type;
+ /*
+ * hdev is still valid, even if we are called after hid_destroy_device():
+ * when hid_bpf_attach() gets called, it takes a ref on the dev through
+ * bus_find_device()
+ */
+ hdev_destroyed = hdev->bpf.destroyed;
hid_bpf_populate_hdev(hdev, type);
@@ -232,12 +239,19 @@ static void hid_bpf_release_progs(struct work_struct *work)
if (test_bit(next->idx, jmp_table.enabled))
continue;
- if (next->hdev == hdev && next->type == type)
+ if (next->hdev == hdev && next->type == type) {
+ /*
+ * clear the hdev reference and decrement the device ref
+ * that was taken during bus_find_device() while calling
+ * hid_bpf_attach()
+ */
next->hdev = NULL;
+ put_device(&hdev->dev);
+ }
}
- /* if type was rdesc fixup, reconnect device */
- if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP)
+ /* if type was rdesc fixup and the device is not gone, reconnect device */
+ if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP && !hdev_destroyed)
hid_bpf_reconnect(hdev);
}
}
@@ -333,15 +347,10 @@ static int hid_bpf_insert_prog(int prog_fd, struct bpf_prog *prog)
return err;
}
-int hid_bpf_get_prog_attach_type(int prog_fd)
+int hid_bpf_get_prog_attach_type(struct bpf_prog *prog)
{
- struct bpf_prog *prog = NULL;
- int i;
int prog_type = HID_BPF_PROG_TYPE_UNDEF;
-
- prog = bpf_prog_get(prog_fd);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
+ int i;
for (i = 0; i < HID_BPF_PROG_TYPE_MAX; i++) {
if (hid_bpf_btf_ids[i] == prog->aux->attach_btf_id) {
@@ -350,8 +359,6 @@ int hid_bpf_get_prog_attach_type(int prog_fd)
}
}
- bpf_prog_put(prog);
-
return prog_type;
}
@@ -388,19 +395,13 @@ static const struct bpf_link_ops hid_bpf_link_lops = {
/* called from syscall */
noinline int
__hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type,
- int prog_fd, __u32 flags)
+ int prog_fd, struct bpf_prog *prog, __u32 flags)
{
struct bpf_link_primer link_primer;
struct hid_bpf_link *link;
- struct bpf_prog *prog = NULL;
struct hid_bpf_prog_entry *prog_entry;
int cnt, err = -EINVAL, prog_table_idx = -1;
- /* take a ref on the prog itself */
- prog = bpf_prog_get(prog_fd);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
-
mutex_lock(&hid_bpf_attach_lock);
link = kzalloc(sizeof(*link), GFP_USER);
@@ -467,7 +468,6 @@ __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type,
err_unlock:
mutex_unlock(&hid_bpf_attach_lock);
- bpf_prog_put(prog);
kfree(link);
return err;
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index fb30e228d35f..828a5c022c64 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -298,6 +298,9 @@
#define USB_VENDOR_ID_CIDC 0x1677
+#define I2C_VENDOR_ID_CIRQUE 0x0488
+#define I2C_PRODUCT_ID_CIRQUE_1063 0x1063
+
#define USB_VENDOR_ID_CJTOUCH 0x24b8
#define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020 0x0020
#define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040 0x0040
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index fd6d8f1d9b8f..d2f3f234f29d 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -203,6 +203,8 @@ struct hidpp_device {
struct hidpp_scroll_counter vertical_wheel_counter;
u8 wireless_feature_index;
+
+ bool connected_once;
};
/* HID++ 1.0 error codes */
@@ -988,8 +990,13 @@ static int hidpp_root_get_protocol_version(struct hidpp_device *hidpp)
hidpp->protocol_minor = response.rap.params[1];
print_version:
- hid_info(hidpp->hid_dev, "HID++ %u.%u device connected.\n",
- hidpp->protocol_major, hidpp->protocol_minor);
+ if (!hidpp->connected_once) {
+ hid_info(hidpp->hid_dev, "HID++ %u.%u device connected.\n",
+ hidpp->protocol_major, hidpp->protocol_minor);
+ hidpp->connected_once = true;
+ } else
+ hid_dbg(hidpp->hid_dev, "HID++ %u.%u device connected.\n",
+ hidpp->protocol_major, hidpp->protocol_minor);
return 0;
}
@@ -4184,7 +4191,7 @@ static void hidpp_connect_event(struct work_struct *work)
/* Get device version to check if it is connected */
ret = hidpp_root_get_protocol_version(hidpp);
if (ret) {
- hid_info(hidpp->hid_dev, "Disconnected\n");
+ hid_dbg(hidpp->hid_dev, "Disconnected\n");
if (hidpp->battery.ps) {
hidpp->battery.online = false;
hidpp->battery.status = POWER_SUPPLY_STATUS_UNKNOWN;
@@ -4610,6 +4617,8 @@ static const struct hid_device_id hidpp_devices[] = {
HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC088) },
{ /* Logitech G Pro X Superlight Gaming Mouse over USB */
HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC094) },
+ { /* Logitech G Pro X Superlight 2 Gaming Mouse over USB */
+ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC09b) },
{ /* G935 Gaming Headset */
HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0x0a87),
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index fd5b0637dad6..3e91e4d6ba6f 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -2153,6 +2153,10 @@ static const struct hid_device_id mt_devices[] = {
{ .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+ USB_VENDOR_ID_SYNAPTICS, 0xcddc) },
+
+ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
+ HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
USB_VENDOR_ID_SYNAPTICS, 0xce08) },
{ .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c
index 82d0a77359c4..58b15750dbb0 100644
--- a/drivers/hid/hid-nvidia-shield.c
+++ b/drivers/hid/hid-nvidia-shield.c
@@ -800,6 +800,8 @@ static inline int thunderstrike_led_create(struct thunderstrike *ts)
led->name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL,
"thunderstrike%d:blue:led", ts->id);
+ if (!led->name)
+ return -ENOMEM;
led->max_brightness = 1;
led->flags = LED_CORE_SUSPENDRESUME | LED_RETAIN_AT_SHUTDOWN;
led->brightness_get = &thunderstrike_led_get_brightness;
@@ -831,6 +833,8 @@ static inline int thunderstrike_psy_create(struct shield_device *shield_dev)
shield_dev->battery_dev.desc.name =
devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL,
"thunderstrike_%d", ts->id);
+ if (!shield_dev->battery_dev.desc.name)
+ return -ENOMEM;
shield_dev->battery_dev.psy = power_supply_register(
&hdev->dev, &shield_dev->battery_dev.desc, &psy_cfg);
diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
index b3c4e50e248a..b08a5ab58528 100644
--- a/drivers/hid/hid-steam.c
+++ b/drivers/hid/hid-steam.c
@@ -1109,10 +1109,9 @@ static int steam_probe(struct hid_device *hdev,
return hid_hw_start(hdev, HID_CONNECT_DEFAULT);
steam = devm_kzalloc(&hdev->dev, sizeof(*steam), GFP_KERNEL);
- if (!steam) {
- ret = -ENOMEM;
- goto steam_alloc_fail;
- }
+ if (!steam)
+ return -ENOMEM;
+
steam->hdev = hdev;
hid_set_drvdata(hdev, steam);
spin_lock_init(&steam->lock);
@@ -1129,14 +1128,14 @@ static int steam_probe(struct hid_device *hdev,
*/
ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_HIDRAW);
if (ret)
- goto hid_hw_start_fail;
+ goto err_cancel_work;
ret = hid_hw_open(hdev);
if (ret) {
hid_err(hdev,
"%s:hid_hw_open\n",
__func__);
- goto hid_hw_open_fail;
+ goto err_hw_stop;
}
if (steam->quirks & STEAM_QUIRK_WIRELESS) {
@@ -1152,36 +1151,37 @@ static int steam_probe(struct hid_device *hdev,
hid_err(hdev,
"%s:steam_register failed with error %d\n",
__func__, ret);
- goto input_register_fail;
+ goto err_hw_close;
}
}
steam->client_hdev = steam_create_client_hid(hdev);
if (IS_ERR(steam->client_hdev)) {
ret = PTR_ERR(steam->client_hdev);
- goto client_hdev_fail;
+ goto err_stream_unregister;
}
steam->client_hdev->driver_data = steam;
ret = hid_add_device(steam->client_hdev);
if (ret)
- goto client_hdev_add_fail;
+ goto err_destroy;
return 0;
-client_hdev_add_fail:
- hid_hw_stop(hdev);
-client_hdev_fail:
+err_destroy:
hid_destroy_device(steam->client_hdev);
-input_register_fail:
-hid_hw_open_fail:
-hid_hw_start_fail:
+err_stream_unregister:
+ if (steam->connected)
+ steam_unregister(steam);
+err_hw_close:
+ hid_hw_close(hdev);
+err_hw_stop:
+ hid_hw_stop(hdev);
+err_cancel_work:
cancel_work_sync(&steam->work_connect);
cancel_delayed_work_sync(&steam->mode_switch);
cancel_work_sync(&steam->rumble_work);
-steam_alloc_fail:
- hid_err(hdev, "%s: failed with error %d\n",
- __func__, ret);
+
return ret;
}
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 13c8dd8cd350..2bc762d31ac7 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -357,8 +357,11 @@ static int hidraw_release(struct inode * inode, struct file * file)
down_write(&minors_rwsem);
spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags);
- for (int i = list->tail; i < list->head; i++)
- kfree(list->buffer[i].value);
+ while (list->tail != list->head) {
+ kfree(list->buffer[list->tail].value);
+ list->buffer[list->tail].value = NULL;
+ list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1);
+ }
list_del(&list->node);
spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags);
kfree(list);
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index 90f316ae9819..2df1ab3c31cc 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -49,6 +49,7 @@
#define I2C_HID_QUIRK_RESET_ON_RESUME BIT(2)
#define I2C_HID_QUIRK_BAD_INPUT_SIZE BIT(3)
#define I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET BIT(4)
+#define I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND BIT(5)
/* Command opcodes */
#define I2C_HID_OPCODE_RESET 0x01
@@ -131,6 +132,8 @@ static const struct i2c_hid_quirks {
I2C_HID_QUIRK_RESET_ON_RESUME },
{ USB_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720,
I2C_HID_QUIRK_BAD_INPUT_SIZE },
+ { I2C_VENDOR_ID_CIRQUE, I2C_PRODUCT_ID_CIRQUE_1063,
+ I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND },
/*
* Sending the wakeup after reset actually break ELAN touchscreen controller
*/
@@ -956,7 +959,8 @@ static int i2c_hid_core_suspend(struct i2c_hid *ihid, bool force_poweroff)
return ret;
/* Save some power */
- i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP);
+ if (!(ihid->quirks & I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND))
+ i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP);
disable_irq(client->irq);
diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c
index c4e1fa0273c8..8be4d576da77 100644
--- a/drivers/hid/i2c-hid/i2c-hid-of.c
+++ b/drivers/hid/i2c-hid/i2c-hid-of.c
@@ -87,6 +87,7 @@ static int i2c_hid_of_probe(struct i2c_client *client)
if (!ihid_of)
return -ENOMEM;
+ ihid_of->client = client;
ihid_of->ops.power_up = i2c_hid_of_power_up;
ihid_of->ops.power_down = i2c_hid_of_power_down;
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index aa6cb033bb06..03d5601ce807 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -722,6 +722,8 @@ void ishtp_bus_remove_all_clients(struct ishtp_device *ishtp_dev,
spin_lock_irqsave(&ishtp_dev->cl_list_lock, flags);
list_for_each_entry(cl, &ishtp_dev->cl_list, link) {
cl->state = ISHTP_CL_DISCONNECTED;
+ if (warm_reset && cl->device->reference_count)
+ continue;
/*
* Wake any pending process. The waiter would check dev->state
diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c
index 82c907f01bd3..8a7f2f6a4f86 100644
--- a/drivers/hid/intel-ish-hid/ishtp/client.c
+++ b/drivers/hid/intel-ish-hid/ishtp/client.c
@@ -49,7 +49,9 @@ static void ishtp_read_list_flush(struct ishtp_cl *cl)
list_for_each_entry_safe(rb, next, &cl->dev->read_list.list, list)
if (rb->cl && ishtp_cl_cmp_id(cl, rb->cl)) {
list_del(&rb->list);
- ishtp_io_rb_free(rb);
+ spin_lock(&cl->free_list_spinlock);
+ list_add_tail(&rb->list, &cl->free_rb_list.list);
+ spin_unlock(&cl->free_list_spinlock);
}
spin_unlock_irqrestore(&cl->dev->read_list_spinlock, flags);
}
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index b613f11ed949..2bc45b24075c 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2087,7 +2087,7 @@ static int wacom_allocate_inputs(struct wacom *wacom)
return 0;
}
-static int wacom_register_inputs(struct wacom *wacom)
+static int wacom_setup_inputs(struct wacom *wacom)
{
struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev;
struct wacom_wac *wacom_wac = &(wacom->wacom_wac);
@@ -2106,10 +2106,6 @@ static int wacom_register_inputs(struct wacom *wacom)
input_free_device(pen_input_dev);
wacom_wac->pen_input = NULL;
pen_input_dev = NULL;
- } else {
- error = input_register_device(pen_input_dev);
- if (error)
- goto fail;
}
error = wacom_setup_touch_input_capabilities(touch_input_dev, wacom_wac);
@@ -2118,10 +2114,6 @@ static int wacom_register_inputs(struct wacom *wacom)
input_free_device(touch_input_dev);
wacom_wac->touch_input = NULL;
touch_input_dev = NULL;
- } else {
- error = input_register_device(touch_input_dev);
- if (error)
- goto fail;
}
error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac);
@@ -2130,7 +2122,34 @@ static int wacom_register_inputs(struct wacom *wacom)
input_free_device(pad_input_dev);
wacom_wac->pad_input = NULL;
pad_input_dev = NULL;
- } else {
+ }
+
+ return 0;
+}
+
+static int wacom_register_inputs(struct wacom *wacom)
+{
+ struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev;
+ struct wacom_wac *wacom_wac = &(wacom->wacom_wac);
+ int error = 0;
+
+ pen_input_dev = wacom_wac->pen_input;
+ touch_input_dev = wacom_wac->touch_input;
+ pad_input_dev = wacom_wac->pad_input;
+
+ if (pen_input_dev) {
+ error = input_register_device(pen_input_dev);
+ if (error)
+ goto fail;
+ }
+
+ if (touch_input_dev) {
+ error = input_register_device(touch_input_dev);
+ if (error)
+ goto fail;
+ }
+
+ if (pad_input_dev) {
error = input_register_device(pad_input_dev);
if (error)
goto fail;
@@ -2383,6 +2402,20 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
if (error)
goto fail;
+ error = wacom_setup_inputs(wacom);
+ if (error)
+ goto fail;
+
+ if (features->type == HID_GENERIC)
+ connect_mask |= HID_CONNECT_DRIVER;
+
+ /* Regular HID work starts now */
+ error = hid_hw_start(hdev, connect_mask);
+ if (error) {
+ hid_err(hdev, "hw start failed\n");
+ goto fail;
+ }
+
error = wacom_register_inputs(wacom);
if (error)
goto fail;
@@ -2397,16 +2430,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
goto fail;
}
- if (features->type == HID_GENERIC)
- connect_mask |= HID_CONNECT_DRIVER;
-
- /* Regular HID work starts now */
- error = hid_hw_start(hdev, connect_mask);
- if (error) {
- hid_err(hdev, "hw start failed\n");
- goto fail;
- }
-
if (!wireless) {
/* Note that if query fails it is not a hard failure */
wacom_query_tablet_data(wacom);
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index da8a01fedd39..fbe10fbc5769 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2575,7 +2575,14 @@ static void wacom_wac_pen_report(struct hid_device *hdev,
wacom_wac->hid_data.tipswitch);
input_report_key(input, wacom_wac->tool[0], sense);
if (wacom_wac->serial[0]) {
- input_event(input, EV_MSC, MSC_SERIAL, wacom_wac->serial[0]);
+ /*
+ * xf86-input-wacom does not accept a serial number
+ * of '0'. Report the low 32 bits if possible, but
+ * if they are zero, report the upper ones instead.
+ */
+ __u32 serial_lo = wacom_wac->serial[0] & 0xFFFFFFFFu;
+ __u32 serial_hi = wacom_wac->serial[0] >> 32;
+ input_event(input, EV_MSC, MSC_SERIAL, (int)(serial_lo ? serial_lo : serial_hi));
input_report_abs(input, ABS_MISC, sense ? id : 0);
}
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 56f7e06c673e..adbf674355b2 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -322,125 +322,89 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer,
pagecount = hv_gpadl_size(type, size) >> HV_HYP_PAGE_SHIFT;
- /* do we need a gpadl body msg */
pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
sizeof(struct vmbus_channel_gpadl_header) -
sizeof(struct gpa_range);
+ pfncount = umin(pagecount, pfnsize / sizeof(u64));
+
+ msgsize = sizeof(struct vmbus_channel_msginfo) +
+ sizeof(struct vmbus_channel_gpadl_header) +
+ sizeof(struct gpa_range) + pfncount * sizeof(u64);
+ msgheader = kzalloc(msgsize, GFP_KERNEL);
+ if (!msgheader)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&msgheader->submsglist);
+ msgheader->msgsize = msgsize;
+
+ gpadl_header = (struct vmbus_channel_gpadl_header *)
+ msgheader->msg;
+ gpadl_header->rangecount = 1;
+ gpadl_header->range_buflen = sizeof(struct gpa_range) +
+ pagecount * sizeof(u64);
+ gpadl_header->range[0].byte_offset = 0;
+ gpadl_header->range[0].byte_count = hv_gpadl_size(type, size);
+ for (i = 0; i < pfncount; i++)
+ gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn(
+ type, kbuffer, size, send_offset, i);
+ *msginfo = msgheader;
+
+ pfnsum = pfncount;
+ pfnleft = pagecount - pfncount;
+
+ /* how many pfns can we fit in a body message */
+ pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
+ sizeof(struct vmbus_channel_gpadl_body);
pfncount = pfnsize / sizeof(u64);
- if (pagecount > pfncount) {
- /* we need a gpadl body */
- /* fill in the header */
+ /*
+ * If pfnleft is zero, everything fits in the header and no body
+ * messages are needed
+ */
+ while (pfnleft) {
+ pfncurr = umin(pfncount, pfnleft);
msgsize = sizeof(struct vmbus_channel_msginfo) +
- sizeof(struct vmbus_channel_gpadl_header) +
- sizeof(struct gpa_range) + pfncount * sizeof(u64);
- msgheader = kzalloc(msgsize, GFP_KERNEL);
- if (!msgheader)
- goto nomem;
-
- INIT_LIST_HEAD(&msgheader->submsglist);
- msgheader->msgsize = msgsize;
-
- gpadl_header = (struct vmbus_channel_gpadl_header *)
- msgheader->msg;
- gpadl_header->rangecount = 1;
- gpadl_header->range_buflen = sizeof(struct gpa_range) +
- pagecount * sizeof(u64);
- gpadl_header->range[0].byte_offset = 0;
- gpadl_header->range[0].byte_count = hv_gpadl_size(type, size);
- for (i = 0; i < pfncount; i++)
- gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn(
- type, kbuffer, size, send_offset, i);
- *msginfo = msgheader;
-
- pfnsum = pfncount;
- pfnleft = pagecount - pfncount;
-
- /* how many pfns can we fit */
- pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
- sizeof(struct vmbus_channel_gpadl_body);
- pfncount = pfnsize / sizeof(u64);
-
- /* fill in the body */
- while (pfnleft) {
- if (pfnleft > pfncount)
- pfncurr = pfncount;
- else
- pfncurr = pfnleft;
-
- msgsize = sizeof(struct vmbus_channel_msginfo) +
- sizeof(struct vmbus_channel_gpadl_body) +
- pfncurr * sizeof(u64);
- msgbody = kzalloc(msgsize, GFP_KERNEL);
-
- if (!msgbody) {
- struct vmbus_channel_msginfo *pos = NULL;
- struct vmbus_channel_msginfo *tmp = NULL;
- /*
- * Free up all the allocated messages.
- */
- list_for_each_entry_safe(pos, tmp,
- &msgheader->submsglist,
- msglistentry) {
-
- list_del(&pos->msglistentry);
- kfree(pos);
- }
-
- goto nomem;
- }
-
- msgbody->msgsize = msgsize;
- gpadl_body =
- (struct vmbus_channel_gpadl_body *)msgbody->msg;
+ sizeof(struct vmbus_channel_gpadl_body) +
+ pfncurr * sizeof(u64);
+ msgbody = kzalloc(msgsize, GFP_KERNEL);
+ if (!msgbody) {
+ struct vmbus_channel_msginfo *pos = NULL;
+ struct vmbus_channel_msginfo *tmp = NULL;
/*
- * Gpadl is u32 and we are using a pointer which could
- * be 64-bit
- * This is governed by the guest/host protocol and
- * so the hypervisor guarantees that this is ok.
+ * Free up all the allocated messages.
*/
- for (i = 0; i < pfncurr; i++)
- gpadl_body->pfn[i] = hv_gpadl_hvpfn(type,
- kbuffer, size, send_offset, pfnsum + i);
-
- /* add to msg header */
- list_add_tail(&msgbody->msglistentry,
- &msgheader->submsglist);
- pfnsum += pfncurr;
- pfnleft -= pfncurr;
+ list_for_each_entry_safe(pos, tmp,
+ &msgheader->submsglist,
+ msglistentry) {
+
+ list_del(&pos->msglistentry);
+ kfree(pos);
+ }
+ kfree(msgheader);
+ return -ENOMEM;
}
- } else {
- /* everything fits in a header */
- msgsize = sizeof(struct vmbus_channel_msginfo) +
- sizeof(struct vmbus_channel_gpadl_header) +
- sizeof(struct gpa_range) + pagecount * sizeof(u64);
- msgheader = kzalloc(msgsize, GFP_KERNEL);
- if (msgheader == NULL)
- goto nomem;
-
- INIT_LIST_HEAD(&msgheader->submsglist);
- msgheader->msgsize = msgsize;
-
- gpadl_header = (struct vmbus_channel_gpadl_header *)
- msgheader->msg;
- gpadl_header->rangecount = 1;
- gpadl_header->range_buflen = sizeof(struct gpa_range) +
- pagecount * sizeof(u64);
- gpadl_header->range[0].byte_offset = 0;
- gpadl_header->range[0].byte_count = hv_gpadl_size(type, size);
- for (i = 0; i < pagecount; i++)
- gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn(
- type, kbuffer, size, send_offset, i);
-
- *msginfo = msgheader;
+
+ msgbody->msgsize = msgsize;
+ gpadl_body = (struct vmbus_channel_gpadl_body *)msgbody->msg;
+
+ /*
+ * Gpadl is u32 and we are using a pointer which could
+ * be 64-bit
+ * This is governed by the guest/host protocol and
+ * so the hypervisor guarantees that this is ok.
+ */
+ for (i = 0; i < pfncurr; i++)
+ gpadl_body->pfn[i] = hv_gpadl_hvpfn(type,
+ kbuffer, size, send_offset, pfnsum + i);
+
+ /* add to msg header */
+ list_add_tail(&msgbody->msglistentry, &msgheader->submsglist);
+ pfnsum += pfncurr;
+ pfnleft -= pfncurr;
}
return 0;
-nomem:
- kfree(msgheader);
- kfree(msgbody);
- return -ENOMEM;
}
/*
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 42aec2c5606a..9c97c4065fe7 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -296,6 +296,11 @@ static struct {
spinlock_t lock;
} host_ts;
+static bool timesync_implicit;
+
+module_param(timesync_implicit, bool, 0644);
+MODULE_PARM_DESC(timesync_implicit, "If set treat SAMPLE as SYNC when clock is behind");
+
static inline u64 reftime_to_ns(u64 reftime)
{
return (reftime - WLTIMEDELTA) * 100;
@@ -345,6 +350,29 @@ static void hv_set_host_time(struct work_struct *work)
}
/*
+ * Due to a bug on Hyper-V hosts, the sync flag may not always be sent on resume.
+ * Force a sync if the guest is behind.
+ */
+static inline bool hv_implicit_sync(u64 host_time)
+{
+ struct timespec64 new_ts;
+ struct timespec64 threshold_ts;
+
+ new_ts = ns_to_timespec64(reftime_to_ns(host_time));
+ ktime_get_real_ts64(&threshold_ts);
+
+ threshold_ts.tv_sec += 5;
+
+ /*
+ * If guest behind the host by 5 or more seconds.
+ */
+ if (timespec64_compare(&new_ts, &threshold_ts) >= 0)
+ return true;
+
+ return false;
+}
+
+/*
* Synchronize time with host after reboot, restore, etc.
*
* ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
@@ -384,7 +412,8 @@ static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags)
spin_unlock_irqrestore(&host_ts.lock, flags);
/* Schedule work to do do_settimeofday64() */
- if (adj_flags & ICTIMESYNCFLAG_SYNC)
+ if ((adj_flags & ICTIMESYNCFLAG_SYNC) ||
+ (timesync_implicit && hv_implicit_sync(host_ts.host_time)))
schedule_work(&adj_time_work);
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index b33d5abd9beb..7f7965f3d187 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -988,7 +988,7 @@ static const struct dev_pm_ops vmbus_pm = {
};
/* The one and only one */
-static struct bus_type hv_bus = {
+static const struct bus_type hv_bus = {
.name = "vmbus",
.match = vmbus_match,
.shutdown = vmbus_shutdown,
diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c
index f6e1e55e8292..4acc1858d8ac 100644
--- a/drivers/hwmon/aspeed-pwm-tacho.c
+++ b/drivers/hwmon/aspeed-pwm-tacho.c
@@ -195,6 +195,8 @@ struct aspeed_pwm_tacho_data {
u8 fan_tach_ch_source[MAX_ASPEED_FAN_TACH_CHANNELS];
struct aspeed_cooling_device *cdev[8];
const struct attribute_group *groups[3];
+ /* protects access to shared ASPEED_PTCR_RESULT */
+ struct mutex tach_lock;
};
enum type { TYPEM, TYPEN, TYPEO };
@@ -529,6 +531,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
u8 fan_tach_ch_source, type, mode, both;
int ret;
+ mutex_lock(&priv->tach_lock);
+
regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0);
regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch);
@@ -546,6 +550,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
ASPEED_RPM_STATUS_SLEEP_USEC,
usec);
+ mutex_unlock(&priv->tach_lock);
+
/* return -ETIMEDOUT if we didn't get an answer. */
if (ret)
return ret;
@@ -915,6 +921,7 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev)
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
+ mutex_init(&priv->tach_lock);
priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs,
&aspeed_pwm_tacho_regmap_config);
if (IS_ERR(priv->regmap))
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index ba82d1e79c13..b0991dde2e59 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -41,7 +41,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
#define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */
#define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
-#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
+#define NUM_REAL_CORES 512 /* Number of Real cores per cpu */
#define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
#define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
#define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
@@ -419,7 +419,7 @@ static ssize_t show_temp(struct device *dev,
}
static int create_core_attrs(struct temp_data *tdata, struct device *dev,
- int attr_no)
+ int index)
{
int i;
static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev,
@@ -431,13 +431,20 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev,
};
for (i = 0; i < tdata->attr_size; i++) {
+ /*
+ * We map the attr number to core id of the CPU
+ * The attr number is always core id + 2
+ * The Pkgtemp will always show up as temp1_*, if available
+ */
+ int attr_no = tdata->is_pkg_data ? 1 : tdata->cpu_core_id + 2;
+
snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH,
"temp%d_%s", attr_no, suffixes[i]);
sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr);
tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
tdata->sd_attrs[i].dev_attr.attr.mode = 0444;
tdata->sd_attrs[i].dev_attr.show = rd_ptr[i];
- tdata->sd_attrs[i].index = attr_no;
+ tdata->sd_attrs[i].index = index;
tdata->attrs[i] = &tdata->sd_attrs[i].dev_attr.attr;
}
tdata->attr_group.attrs = tdata->attrs;
@@ -495,30 +502,25 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
struct platform_data *pdata = platform_get_drvdata(pdev);
struct cpuinfo_x86 *c = &cpu_data(cpu);
u32 eax, edx;
- int err, index, attr_no;
+ int err, index;
if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
return 0;
/*
- * Find attr number for sysfs:
- * We map the attr number to core id of the CPU
- * The attr number is always core id + 2
- * The Pkgtemp will always show up as temp1_*, if available
+ * Get the index of tdata in pdata->core_data[]
+ * tdata for package: pdata->core_data[1]
+ * tdata for core: pdata->core_data[2] .. pdata->core_data[NUM_REAL_CORES + 1]
*/
if (pkg_flag) {
- attr_no = PKG_SYSFS_ATTR_NO;
+ index = PKG_SYSFS_ATTR_NO;
} else {
- index = ida_alloc(&pdata->ida, GFP_KERNEL);
+ index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL);
if (index < 0)
return index;
- pdata->cpu_map[index] = topology_core_id(cpu);
- attr_no = index + BASE_SYSFS_ATTR_NO;
- }
- if (attr_no > MAX_CORE_DATA - 1) {
- err = -ERANGE;
- goto ida_free;
+ pdata->cpu_map[index] = topology_core_id(cpu);
+ index += BASE_SYSFS_ATTR_NO;
}
tdata = init_temp_data(cpu, pkg_flag);
@@ -544,20 +546,20 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
if (get_ttarget(tdata, &pdev->dev) >= 0)
tdata->attr_size++;
- pdata->core_data[attr_no] = tdata;
+ pdata->core_data[index] = tdata;
/* Create sysfs interfaces */
- err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no);
+ err = create_core_attrs(tdata, pdata->hwmon_dev, index);
if (err)
goto exit_free;
return 0;
exit_free:
- pdata->core_data[attr_no] = NULL;
+ pdata->core_data[index] = NULL;
kfree(tdata);
ida_free:
if (!pkg_flag)
- ida_free(&pdata->ida, index);
+ ida_free(&pdata->ida, index - BASE_SYSFS_ATTR_NO);
return err;
}
@@ -780,7 +782,7 @@ static int __init coretemp_init(void)
if (!x86_match_cpu(coretemp_ids))
return -ENODEV;
- max_zones = topology_max_packages() * topology_max_die_per_package();
+ max_zones = topology_max_packages() * topology_max_dies_per_package();
zone_devices = kcalloc(max_zones, sizeof(struct platform_device *),
GFP_KERNEL);
if (!zone_devices)
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index 6307112c2c0c..9ed2c4b6734e 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -209,7 +209,7 @@ static ssize_t power1_average_show(struct device *dev,
* With the new x86 topology modelling, x86_max_cores is the
* compute unit number.
*/
- cu_num = boot_cpu_data.x86_max_cores;
+ cu_num = topology_num_cores_per_package();
ret = read_registers(data);
if (ret)
diff --git a/drivers/hwmon/gigabyte_waterforce.c b/drivers/hwmon/gigabyte_waterforce.c
index 85e523775714..8129d7b3ceaf 100644
--- a/drivers/hwmon/gigabyte_waterforce.c
+++ b/drivers/hwmon/gigabyte_waterforce.c
@@ -146,7 +146,7 @@ static int waterforce_get_status(struct waterforce_data *priv)
/* Send command for getting status */
ret = waterforce_write_expanded(priv, get_status_cmd, GET_STATUS_CMD_LENGTH);
if (ret < 0)
- return ret;
+ goto unlock_and_return;
ret = wait_for_completion_interruptible_timeout(&priv->status_report_received,
msecs_to_jiffies(STATUS_VALIDITY));
diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c
index 8d2ef3145bca..9fbab8f02334 100644
--- a/drivers/hwmon/nct6775-core.c
+++ b/drivers/hwmon/nct6775-core.c
@@ -3512,6 +3512,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit;
const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL;
int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp;
+ int num_reg_temp_config;
struct device *hwmon_dev;
struct sensor_template_group tsi_temp_tg;
@@ -3594,6 +3595,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
reg_temp_over = NCT6106_REG_TEMP_OVER;
reg_temp_hyst = NCT6106_REG_TEMP_HYST;
reg_temp_config = NCT6106_REG_TEMP_CONFIG;
+ num_reg_temp_config = ARRAY_SIZE(NCT6106_REG_TEMP_CONFIG);
reg_temp_alternate = NCT6106_REG_TEMP_ALTERNATE;
reg_temp_crit = NCT6106_REG_TEMP_CRIT;
reg_temp_crit_l = NCT6106_REG_TEMP_CRIT_L;
@@ -3669,6 +3671,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
reg_temp_over = NCT6106_REG_TEMP_OVER;
reg_temp_hyst = NCT6106_REG_TEMP_HYST;
reg_temp_config = NCT6106_REG_TEMP_CONFIG;
+ num_reg_temp_config = ARRAY_SIZE(NCT6106_REG_TEMP_CONFIG);
reg_temp_alternate = NCT6106_REG_TEMP_ALTERNATE;
reg_temp_crit = NCT6106_REG_TEMP_CRIT;
reg_temp_crit_l = NCT6106_REG_TEMP_CRIT_L;
@@ -3746,6 +3749,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
reg_temp_over = NCT6775_REG_TEMP_OVER;
reg_temp_hyst = NCT6775_REG_TEMP_HYST;
reg_temp_config = NCT6775_REG_TEMP_CONFIG;
+ num_reg_temp_config = ARRAY_SIZE(NCT6775_REG_TEMP_CONFIG);
reg_temp_alternate = NCT6775_REG_TEMP_ALTERNATE;
reg_temp_crit = NCT6775_REG_TEMP_CRIT;
@@ -3821,6 +3825,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
reg_temp_over = NCT6775_REG_TEMP_OVER;
reg_temp_hyst = NCT6775_REG_TEMP_HYST;
reg_temp_config = NCT6776_REG_TEMP_CONFIG;
+ num_reg_temp_config = ARRAY_SIZE(NCT6776_REG_TEMP_CONFIG);
reg_temp_alternate = NCT6776_REG_TEMP_ALTERNATE;
reg_temp_crit = NCT6776_REG_TEMP_CRIT;
@@ -3900,6 +3905,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
reg_temp_over = NCT6779_REG_TEMP_OVER;
reg_temp_hyst = NCT6779_REG_TEMP_HYST;
reg_temp_config = NCT6779_REG_TEMP_CONFIG;
+ num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG);
reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE;
reg_temp_crit = NCT6779_REG_TEMP_CRIT;
@@ -4034,6 +4040,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
reg_temp_over = NCT6779_REG_TEMP_OVER;
reg_temp_hyst = NCT6779_REG_TEMP_HYST;
reg_temp_config = NCT6779_REG_TEMP_CONFIG;
+ num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG);
reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE;
reg_temp_crit = NCT6779_REG_TEMP_CRIT;
@@ -4123,6 +4130,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
reg_temp_over = NCT6798_REG_TEMP_OVER;
reg_temp_hyst = NCT6798_REG_TEMP_HYST;
reg_temp_config = NCT6779_REG_TEMP_CONFIG;
+ num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG);
reg_temp_alternate = NCT6798_REG_TEMP_ALTERNATE;
reg_temp_crit = NCT6798_REG_TEMP_CRIT;
@@ -4204,7 +4212,8 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
= reg_temp_crit[src - 1];
if (reg_temp_crit_l && reg_temp_crit_l[i])
data->reg_temp[4][src - 1] = reg_temp_crit_l[i];
- data->reg_temp_config[src - 1] = reg_temp_config[i];
+ if (i < num_reg_temp_config)
+ data->reg_temp_config[src - 1] = reg_temp_config[i];
data->temp_src[src - 1] = src;
continue;
}
@@ -4217,7 +4226,8 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
data->reg_temp[0][s] = reg_temp[i];
data->reg_temp[1][s] = reg_temp_over[i];
data->reg_temp[2][s] = reg_temp_hyst[i];
- data->reg_temp_config[s] = reg_temp_config[i];
+ if (i < num_reg_temp_config)
+ data->reg_temp_config[s] = reg_temp_config[i];
if (reg_temp_crit_h && reg_temp_crit_h[i])
data->reg_temp[3][s] = reg_temp_crit_h[i];
else if (reg_temp_crit[src - 1])
diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c
index b9bb469e2d8f..e5fa10b3b8bc 100644
--- a/drivers/hwmon/pmbus/mp2975.c
+++ b/drivers/hwmon/pmbus/mp2975.c
@@ -126,6 +126,21 @@ static const struct regulator_desc __maybe_unused mp2975_reg_desc[] = {
#define to_mp2975_data(x) container_of(x, struct mp2975_data, info)
+static int mp2975_read_byte_data(struct i2c_client *client, int page, int reg)
+{
+ switch (reg) {
+ case PMBUS_VOUT_MODE:
+ /*
+ * Report direct format as configured by MFR_DC_LOOP_CTRL.
+ * Unlike on MP2971/MP2973 the reported VOUT_MODE isn't automatically
+ * internally updated, but always reads as PB_VOUT_MODE_VID.
+ */
+ return PB_VOUT_MODE_DIRECT;
+ default:
+ return -ENODATA;
+ }
+}
+
static int
mp2975_read_word_helper(struct i2c_client *client, int page, int phase, u8 reg,
u16 mask)
@@ -869,6 +884,7 @@ static struct pmbus_driver_info mp2975_info = {
PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_POUT |
PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT | PMBUS_PHASE_VIRTUAL,
+ .read_byte_data = mp2975_read_byte_data,
.read_word_data = mp2975_read_word_data,
#if IS_ENABLED(CONFIG_SENSORS_MP2975_REGULATOR)
.num_regulators = 1,
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 3757b9391e60..aa0ee8ecd6f2 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -90,10 +90,8 @@ obj-$(CONFIG_I2C_NPCM) += i2c-npcm7xx.o
obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o
obj-$(CONFIG_I2C_OMAP) += i2c-omap.o
obj-$(CONFIG_I2C_OWL) += i2c-owl.o
-i2c-pasemi-objs := i2c-pasemi-core.o i2c-pasemi-pci.o
-obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o
-i2c-apple-objs := i2c-pasemi-core.o i2c-pasemi-platform.o
-obj-$(CONFIG_I2C_APPLE) += i2c-apple.o
+obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi-core.o i2c-pasemi-pci.o
+obj-$(CONFIG_I2C_APPLE) += i2c-pasemi-core.o i2c-pasemi-platform.o
obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o
obj-$(CONFIG_I2C_PNX) += i2c-pnx.o
obj-$(CONFIG_I2C_PXA) += i2c-pxa.o
diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index 5511fd46a65e..ce8c4846b7fa 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -445,6 +445,7 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
irq_status);
irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
+ irq_handled = irq_status;
bus->cmd_err = ret;
bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
goto out_complete;
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 3932e8d96a17..274e987e4cfa 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -498,11 +498,10 @@ static int i801_block_transaction_by_block(struct i801_priv *priv,
/* Set block buffer mode */
outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv));
- inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
-
if (read_write == I2C_SMBUS_WRITE) {
len = data->block[0];
outb_p(len, SMBHSTDAT0(priv));
+ inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
for (i = 0; i < len; i++)
outb_p(data->block[i+1], SMBBLKDAT(priv));
}
@@ -520,6 +519,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv,
}
data->block[0] = len;
+ inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
for (i = 0; i < len; i++)
data->block[i + 1] = inb_p(SMBBLKDAT(priv));
}
@@ -1416,7 +1416,6 @@ static void i801_add_mux(struct i801_priv *priv)
lookup->table[i] = GPIO_LOOKUP(mux_config->gpio_chip,
mux_config->gpios[i], "mux", 0);
gpiod_add_lookup_table(lookup);
- priv->lookup = lookup;
/*
* Register the mux device, we use PLATFORM_DEVID_NONE here
@@ -1430,7 +1429,10 @@ static void i801_add_mux(struct i801_priv *priv)
sizeof(struct i2c_mux_gpio_platform_data));
if (IS_ERR(priv->mux_pdev)) {
gpiod_remove_lookup_table(lookup);
+ devm_kfree(dev, lookup);
dev_err(dev, "Failed to register i2c-mux-gpio device\n");
+ } else {
+ priv->lookup = lookup;
}
}
@@ -1742,9 +1744,9 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
i801_enable_host_notify(&priv->adapter);
- i801_probe_optional_slaves(priv);
/* We ignore errors - multiplexing is optional */
i801_add_mux(priv);
+ i801_probe_optional_slaves(priv);
pci_set_drvdata(dev, priv);
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 88a053987403..60e813137f84 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -803,6 +803,11 @@ static irqreturn_t i2c_imx_slave_handle(struct imx_i2c_struct *i2c_imx,
ctl &= ~I2CR_MTX;
imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR);
imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
+
+ /* flag the last byte as processed */
+ i2c_imx_slave_event(i2c_imx,
+ I2C_SLAVE_READ_PROCESSED, &value);
+
i2c_imx_slave_finish_op(i2c_imx);
return IRQ_HANDLED;
}
diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c
index 7d54a9f34c74..bd8becbdeeb2 100644
--- a/drivers/i2c/busses/i2c-pasemi-core.c
+++ b/drivers/i2c/busses/i2c-pasemi-core.c
@@ -369,6 +369,7 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus)
return 0;
}
+EXPORT_SYMBOL_GPL(pasemi_i2c_common_probe);
irqreturn_t pasemi_irq_handler(int irq, void *dev_id)
{
@@ -378,3 +379,8 @@ irqreturn_t pasemi_irq_handler(int irq, void *dev_id)
complete(&smbus->irq_completion);
return IRQ_HANDLED;
}
+EXPORT_SYMBOL_GPL(pasemi_irq_handler);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Olof Johansson <olof@lixom.net>");
+MODULE_DESCRIPTION("PA Semi PWRficient SMBus driver");
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 0d2e7171e3a6..da94df466e83 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -613,20 +613,20 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i
peripheral.addr = msgs[i].addr;
+ ret = geni_i2c_gpi(gi2c, &msgs[i], &config,
+ &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c);
+ if (ret)
+ goto err;
+
if (msgs[i].flags & I2C_M_RD) {
ret = geni_i2c_gpi(gi2c, &msgs[i], &config,
&rx_addr, &rx_buf, I2C_READ, gi2c->rx_c);
if (ret)
goto err;
- }
-
- ret = geni_i2c_gpi(gi2c, &msgs[i], &config,
- &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c);
- if (ret)
- goto err;
- if (msgs[i].flags & I2C_M_RD)
dma_async_issue_pending(gi2c->rx_c);
+ }
+
dma_async_issue_pending(gi2c->tx_c);
timeout = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
diff --git a/drivers/i2c/busses/i2c-wmt.c b/drivers/i2c/busses/i2c-wmt.c
index ec2a8da134e5..198afee5233c 100644
--- a/drivers/i2c/busses/i2c-wmt.c
+++ b/drivers/i2c/busses/i2c-wmt.c
@@ -378,11 +378,15 @@ static int wmt_i2c_probe(struct platform_device *pdev)
err = i2c_add_adapter(adap);
if (err)
- return err;
+ goto err_disable_clk;
platform_set_drvdata(pdev, i2c_dev);
return 0;
+
+err_disable_clk:
+ clk_disable_unprepare(i2c_dev->clk);
+ return err;
}
static void wmt_i2c_remove(struct platform_device *pdev)
diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index 91adcac875a4..c9d7afe489e8 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -219,10 +219,12 @@ config BMA400
config BMA400_I2C
tristate
+ select REGMAP_I2C
depends on BMA400
config BMA400_SPI
tristate
+ select REGMAP_SPI
depends on BMA400
config BMC150_ACCEL
diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c
index 90b7ae6d42b7..484fe2e9fb17 100644
--- a/drivers/iio/accel/adxl367.c
+++ b/drivers/iio/accel/adxl367.c
@@ -1429,9 +1429,11 @@ static int adxl367_verify_devid(struct adxl367_state *st)
unsigned int val;
int ret;
- ret = regmap_read_poll_timeout(st->regmap, ADXL367_REG_DEVID, val,
- val == ADXL367_DEVID_AD, 1000, 10000);
+ ret = regmap_read(st->regmap, ADXL367_REG_DEVID, &val);
if (ret)
+ return dev_err_probe(st->dev, ret, "Failed to read dev id\n");
+
+ if (val != ADXL367_DEVID_AD)
return dev_err_probe(st->dev, -ENODEV,
"Invalid dev id 0x%02X, expected 0x%02X\n",
val, ADXL367_DEVID_AD);
@@ -1510,6 +1512,8 @@ int adxl367_probe(struct device *dev, const struct adxl367_ops *ops,
if (ret)
return ret;
+ fsleep(15000);
+
ret = adxl367_verify_devid(st);
if (ret)
return ret;
diff --git a/drivers/iio/accel/adxl367_i2c.c b/drivers/iio/accel/adxl367_i2c.c
index b595fe94f3a3..62c74bdc0d77 100644
--- a/drivers/iio/accel/adxl367_i2c.c
+++ b/drivers/iio/accel/adxl367_i2c.c
@@ -11,7 +11,7 @@
#include "adxl367.h"
-#define ADXL367_I2C_FIFO_DATA 0x42
+#define ADXL367_I2C_FIFO_DATA 0x18
struct adxl367_i2c_state {
struct regmap *regmap;
diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c
index feb86fe6c422..62490424b6ae 100644
--- a/drivers/iio/adc/ad4130.c
+++ b/drivers/iio/adc/ad4130.c
@@ -1821,7 +1821,7 @@ static int ad4130_setup_int_clk(struct ad4130_state *st)
{
struct device *dev = &st->spi->dev;
struct device_node *of_node = dev_of_node(dev);
- struct clk_init_data init;
+ struct clk_init_data init = {};
const char *clk_name;
int ret;
@@ -1891,10 +1891,14 @@ static int ad4130_setup(struct iio_dev *indio_dev)
return ret;
/*
- * Configure all GPIOs for output. If configured, the interrupt function
- * of P2 takes priority over the GPIO out function.
+ * Configure unused GPIOs for output. If configured, the interrupt
+ * function of P2 takes priority over the GPIO out function.
*/
- val = AD4130_IO_CONTROL_GPIO_CTRL_MASK;
+ val = 0;
+ for (i = 0; i < AD4130_MAX_GPIOS; i++)
+ if (st->pins_fn[i + AD4130_AIN2_P1] == AD4130_PIN_FN_NONE)
+ val |= FIELD_PREP(AD4130_IO_CONTROL_GPIO_CTRL_MASK, BIT(i));
+
val |= FIELD_PREP(AD4130_IO_CONTROL_INT_PIN_SEL_MASK, st->int_pin_sel);
ret = regmap_write(st->regmap, AD4130_IO_CONTROL_REG, val);
diff --git a/drivers/iio/adc/ad7091r8.c b/drivers/iio/adc/ad7091r8.c
index 57700f124803..700564305057 100644
--- a/drivers/iio/adc/ad7091r8.c
+++ b/drivers/iio/adc/ad7091r8.c
@@ -195,7 +195,7 @@ static int ad7091r8_gpio_setup(struct ad7091r_state *st)
st->reset_gpio = devm_gpiod_get_optional(st->dev, "reset",
GPIOD_OUT_HIGH);
if (IS_ERR(st->reset_gpio))
- return dev_err_probe(st->dev, PTR_ERR(st->convst_gpio),
+ return dev_err_probe(st->dev, PTR_ERR(st->reset_gpio),
"Error on requesting reset GPIO\n");
if (st->reset_gpio) {
diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig
index 2de5494e7c22..b15b7a3b66d5 100644
--- a/drivers/iio/humidity/Kconfig
+++ b/drivers/iio/humidity/Kconfig
@@ -48,6 +48,18 @@ config HDC2010
To compile this driver as a module, choose M here: the module
will be called hdc2010.
+config HDC3020
+ tristate "TI HDC3020 relative humidity and temperature sensor"
+ depends on I2C
+ select CRC8
+ help
+ Say yes here to build support for the Texas Instruments
+ HDC3020, HDC3021 and HDC3022 relative humidity and temperature
+ sensors.
+
+ To compile this driver as a module, choose M here: the module
+ will be called hdc3020.
+
config HID_SENSOR_HUMIDITY
tristate "HID Environmental humidity sensor"
depends on HID_SENSOR_HUB
diff --git a/drivers/iio/humidity/Makefile b/drivers/iio/humidity/Makefile
index f19ff3de97c5..5fbeef299f61 100644
--- a/drivers/iio/humidity/Makefile
+++ b/drivers/iio/humidity/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_AM2315) += am2315.o
obj-$(CONFIG_DHT11) += dht11.o
obj-$(CONFIG_HDC100X) += hdc100x.o
obj-$(CONFIG_HDC2010) += hdc2010.o
+obj-$(CONFIG_HDC3020) += hdc3020.o
obj-$(CONFIG_HID_SENSOR_HUMIDITY) += hid-sensor-humidity.o
hts221-y := hts221_core.o \
diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c
index 4e3311170725..ed70415512f6 100644
--- a/drivers/iio/humidity/hdc3020.c
+++ b/drivers/iio/humidity/hdc3020.c
@@ -322,7 +322,7 @@ static int hdc3020_read_raw(struct iio_dev *indio_dev,
if (chan->type != IIO_TEMP)
return -EINVAL;
- *val = 16852;
+ *val = -16852;
return IIO_VAL_INT;
default:
diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig
index 83e53acfbe88..c7f5866a177d 100644
--- a/drivers/iio/imu/bno055/Kconfig
+++ b/drivers/iio/imu/bno055/Kconfig
@@ -8,6 +8,7 @@ config BOSCH_BNO055
config BOSCH_BNO055_SERIAL
tristate "Bosch BNO055 attached via UART"
depends on SERIAL_DEV_BUS
+ select REGMAP
select BOSCH_BNO055
help
Enable this to support Bosch BNO055 IMUs attached via UART.
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
index 66d4ba088e70..d4f9b5d8d28d 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
@@ -109,6 +109,8 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p)
/* compute and process only all complete datum */
nb = fifo_count / bytes_per_datum;
fifo_count = nb * bytes_per_datum;
+ if (nb == 0)
+ goto end_session;
/* Each FIFO data contains all sensors, so same number for FIFO and sensor data */
fifo_period = NSEC_PER_SEC / INV_MPU6050_DIVIDER_TO_FIFO_RATE(st->chip_config.divider);
inv_sensors_timestamp_interrupt(&st->timestamp, fifo_period, nb, nb, pf->timestamp);
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c
index 676704f9151f..e6e6e94452a3 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c
@@ -111,6 +111,7 @@ int inv_mpu6050_prepare_fifo(struct inv_mpu6050_state *st, bool enable)
if (enable) {
/* reset timestamping */
inv_sensors_timestamp_reset(&st->timestamp);
+ inv_sensors_timestamp_apply_odr(&st->timestamp, 0, 0, 0);
/* reset FIFO */
d = st->chip_config.user_ctrl | INV_MPU6050_BIT_FIFO_RST;
ret = regmap_write(st->map, st->reg->user_ctrl, d);
@@ -184,6 +185,10 @@ static int inv_mpu6050_set_enable(struct iio_dev *indio_dev, bool enable)
if (result)
goto error_power_off;
} else {
+ st->chip_config.gyro_fifo_enable = 0;
+ st->chip_config.accl_fifo_enable = 0;
+ st->chip_config.temp_fifo_enable = 0;
+ st->chip_config.magn_fifo_enable = 0;
result = inv_mpu6050_prepare_fifo(st, false);
if (result)
goto error_power_off;
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 9a85752124dd..173dc00762a1 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1584,10 +1584,13 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev)
ret = iio_device_register_sysfs_group(indio_dev,
&iio_dev_opaque->chan_attr_group);
if (ret)
- goto error_clear_attrs;
+ goto error_free_chan_attrs;
return 0;
+error_free_chan_attrs:
+ kfree(iio_dev_opaque->chan_attr_group.attrs);
+ iio_dev_opaque->chan_attr_group.attrs = NULL;
error_clear_attrs:
iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list);
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index 5cd27f04b45e..b6c4bef2a7bb 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -226,6 +226,7 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev,
case HID_USAGE_SENSOR_TIME_TIMESTAMP:
als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes,
*(s64 *)raw_data);
+ ret = 0;
break;
default:
break;
diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c
index 69938204456f..42b70cd42b39 100644
--- a/drivers/iio/magnetometer/rm3100-core.c
+++ b/drivers/iio/magnetometer/rm3100-core.c
@@ -530,6 +530,7 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq)
struct rm3100_data *data;
unsigned int tmp;
int ret;
+ int samp_rate_index;
indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
if (!indio_dev)
@@ -586,9 +587,14 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq)
ret = regmap_read(regmap, RM3100_REG_TMRC, &tmp);
if (ret < 0)
return ret;
+
+ samp_rate_index = tmp - RM3100_TMRC_OFFSET;
+ if (samp_rate_index < 0 || samp_rate_index >= RM3100_SAMP_NUM) {
+ dev_err(dev, "The value read from RM3100_REG_TMRC is invalid!\n");
+ return -EINVAL;
+ }
/* Initializing max wait time, which is double conversion time. */
- data->conversion_time = rm3100_samp_rates[tmp - RM3100_TMRC_OFFSET][2]
- * 2;
+ data->conversion_time = rm3100_samp_rates[samp_rate_index][2] * 2;
/* Cycle count values may not be what we want. */
if ((tmp - RM3100_TMRC_OFFSET) == 0)
diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c
index 433d6fac83c4..a444d4b2978b 100644
--- a/drivers/iio/pressure/bmp280-spi.c
+++ b/drivers/iio/pressure/bmp280-spi.c
@@ -4,6 +4,7 @@
*
* Inspired by the older BMP085 driver drivers/misc/bmp085-spi.c
*/
+#include <linux/bits.h>
#include <linux/module.h>
#include <linux/spi/spi.h>
#include <linux/err.h>
@@ -35,6 +36,34 @@ static int bmp280_regmap_spi_read(void *context, const void *reg,
return spi_write_then_read(spi, reg, reg_size, val, val_size);
}
+static int bmp380_regmap_spi_read(void *context, const void *reg,
+ size_t reg_size, void *val, size_t val_size)
+{
+ struct spi_device *spi = to_spi_device(context);
+ u8 rx_buf[4];
+ ssize_t status;
+
+ /*
+ * Maximum number of consecutive bytes read for a temperature or
+ * pressure measurement is 3.
+ */
+ if (val_size > 3)
+ return -EINVAL;
+
+ /*
+ * According to the BMP3xx datasheets, for a basic SPI read opertion,
+ * the first byte needs to be dropped and the rest are the requested
+ * data.
+ */
+ status = spi_write_then_read(spi, reg, 1, rx_buf, val_size + 1);
+ if (status)
+ return status;
+
+ memcpy(val, rx_buf + 1, val_size);
+
+ return 0;
+}
+
static struct regmap_bus bmp280_regmap_bus = {
.write = bmp280_regmap_spi_write,
.read = bmp280_regmap_spi_read,
@@ -42,10 +71,19 @@ static struct regmap_bus bmp280_regmap_bus = {
.val_format_endian_default = REGMAP_ENDIAN_BIG,
};
+static struct regmap_bus bmp380_regmap_bus = {
+ .write = bmp280_regmap_spi_write,
+ .read = bmp380_regmap_spi_read,
+ .read_flag_mask = BIT(7),
+ .reg_format_endian_default = REGMAP_ENDIAN_BIG,
+ .val_format_endian_default = REGMAP_ENDIAN_BIG,
+};
+
static int bmp280_spi_probe(struct spi_device *spi)
{
const struct spi_device_id *id = spi_get_device_id(spi);
const struct bmp280_chip_info *chip_info;
+ struct regmap_bus *bmp_regmap_bus;
struct regmap *regmap;
int ret;
@@ -58,8 +96,18 @@ static int bmp280_spi_probe(struct spi_device *spi)
chip_info = spi_get_device_match_data(spi);
+ switch (chip_info->chip_id[0]) {
+ case BMP380_CHIP_ID:
+ case BMP390_CHIP_ID:
+ bmp_regmap_bus = &bmp380_regmap_bus;
+ break;
+ default:
+ bmp_regmap_bus = &bmp280_regmap_bus;
+ break;
+ }
+
regmap = devm_regmap_init(&spi->dev,
- &bmp280_regmap_bus,
+ bmp_regmap_bus,
&spi->dev,
chip_info->regmap_config);
if (IS_ERR(regmap)) {
@@ -87,6 +135,7 @@ static const struct of_device_id bmp280_of_spi_match[] = {
MODULE_DEVICE_TABLE(of, bmp280_of_spi_match);
static const struct spi_device_id bmp280_spi_id[] = {
+ { "bmp085", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp180", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp181", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp280", (kernel_ulong_t)&bmp280_chip_info },
diff --git a/drivers/iio/pressure/dlhl60d.c b/drivers/iio/pressure/dlhl60d.c
index 28c8269ba65d..0bba4c5a8d40 100644
--- a/drivers/iio/pressure/dlhl60d.c
+++ b/drivers/iio/pressure/dlhl60d.c
@@ -250,18 +250,17 @@ static irqreturn_t dlh_trigger_handler(int irq, void *private)
struct dlh_state *st = iio_priv(indio_dev);
int ret;
unsigned int chn, i = 0;
- __be32 tmp_buf[2];
+ __be32 tmp_buf[2] = { };
ret = dlh_start_capture_and_read(st);
if (ret)
goto out;
for_each_set_bit(chn, indio_dev->active_scan_mask,
- indio_dev->masklength) {
- memcpy(tmp_buf + i,
+ indio_dev->masklength) {
+ memcpy(&tmp_buf[i++],
&st->rx_buf[1] + chn * DLH_NUM_DATA_BYTES,
DLH_NUM_DATA_BYTES);
- i++;
}
iio_push_to_buffers(indio_dev, tmp_buf);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 824349659d69..ce9c5bae83bf 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -401,6 +401,10 @@ static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
struct bnxt_re_fence_data *fence = &pd->fence;
struct ib_mr *ib_mr = &fence->mr->ib_mr;
struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+ struct bnxt_re_dev *rdev = pd->rdev;
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
memset(wqe, 0, sizeof(*wqe));
wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
@@ -455,6 +459,9 @@ static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
struct device *dev = &rdev->en_dev->pdev->dev;
struct bnxt_re_mr *mr = fence->mr;
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
+
if (fence->mw) {
bnxt_re_dealloc_mw(fence->mw);
fence->mw = NULL;
@@ -486,6 +493,9 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
struct ib_mw *mw;
int rc;
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return 0;
+
dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
DMA_BIDIRECTIONAL);
rc = dma_mapping_error(dev, dma_addr);
@@ -1817,7 +1827,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
switch (srq_attr_mask) {
case IB_SRQ_MAX_WR:
/* SRQ resize is not supported */
- break;
+ return -EINVAL;
case IB_SRQ_LIMIT:
/* Change the SRQ threshold */
if (srq_attr->srq_limit > srq->qplib_srq.max_wqe)
@@ -1832,13 +1842,12 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
/* On success, update the shadow */
srq->srq_limit = srq_attr->srq_limit;
/* No need to Build and send response back to udata */
- break;
+ return 0;
default:
ibdev_err(&rdev->ibdev,
"Unsupported srq_attr_mask 0x%x", srq_attr_mask);
return -EINVAL;
}
- return 0;
}
int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
@@ -2556,11 +2565,6 @@ static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr,
wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
- /* Need unconditional fence for local invalidate
- * opcode to work as expected.
- */
- wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
-
if (wr->send_flags & IB_SEND_SIGNALED)
wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
if (wr->send_flags & IB_SEND_SOLICITED)
@@ -2583,12 +2587,6 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
wqe->frmr.levels = qplib_frpl->hwq.level;
wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
- /* Need unconditional fence for reg_mr
- * opcode to function as expected.
- */
-
- wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
-
if (wr->wr.send_flags & IB_SEND_SIGNALED)
wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
@@ -2719,6 +2717,18 @@ bad:
return rc;
}
+static void bnxt_re_legacy_set_uc_fence(struct bnxt_qplib_swqe *wqe)
+{
+ /* Need unconditional fence for non-wire memory opcode
+ * to work as expected.
+ */
+ if (wqe->type == BNXT_QPLIB_SWQE_TYPE_LOCAL_INV ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_REG_MR ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_BIND_MW)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+}
+
int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
@@ -2798,8 +2808,11 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
rc = -EINVAL;
goto bad;
}
- if (!rc)
+ if (!rc) {
+ if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx))
+ bnxt_re_legacy_set_uc_fence(&wqe);
rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+ }
bad:
if (rc) {
ibdev_err(&qp->rdev->ibdev,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index f022c922fae5..54b4d2f3a5d8 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -280,9 +280,6 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
{
-
- if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
- return;
rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev);
if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
bnxt_re_set_resource_limits(rdev);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index c98e04fe2ddd..439d0c7c5d0c 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -744,7 +744,8 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
- srq->threshold = le16_to_cpu(sb->srq_limit);
+ if (!rc)
+ srq->threshold = le16_to_cpu(sb->srq_limit);
dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
sbuf.sb, sbuf.dma_addr);
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 68c621ff59d0..5a91cbda4aee 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -2086,7 +2086,7 @@ int init_credit_return(struct hfi1_devdata *dd)
"Unable to allocate credit return DMA range for NUMA %d\n",
i);
ret = -ENOMEM;
- goto done;
+ goto free_cr_base;
}
}
set_dev_node(&dd->pcidev->dev, dd->node);
@@ -2094,6 +2094,10 @@ int init_credit_return(struct hfi1_devdata *dd)
ret = 0;
done:
return ret;
+
+free_cr_base:
+ free_credit_return(dd);
+ goto done;
}
void free_credit_return(struct hfi1_devdata *dd)
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 6e5ac2023328..b67d23b1f286 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -3158,7 +3158,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{
int rval = 0;
- if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) {
+ if ((unlikely(tx->num_desc == tx->desc_limit))) {
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
__sdma_txclean(dd, tx);
diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h
index 8fb752f2eda2..2cb4b96db721 100644
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -346,6 +346,7 @@ enum irdma_cqp_op_type {
#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b
#define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c
#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e
+#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f
#define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520
#define IRDMA_AE_RESET_SENT 0x0601
#define IRDMA_AE_TERMINATE_SENT 0x0602
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index bd4b2b896444..ad50b77282f8 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -387,6 +387,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
case IRDMA_AE_LLP_TOO_MANY_RETRIES:
case IRDMA_AE_LCE_QP_CATASTROPHIC:
case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
+ case IRDMA_AE_LLP_TOO_MANY_RNRS:
case IRDMA_AE_LCE_CQ_CATASTROPHIC:
case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
default:
@@ -570,6 +571,13 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf,
dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx);
irq_update_affinity_hint(msix_vec->irq, NULL);
free_irq(msix_vec->irq, dev_id);
+ if (rf == dev_id) {
+ tasklet_kill(&rf->dpc_tasklet);
+ } else {
+ struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id;
+
+ tasklet_kill(&iwceq->dpc_tasklet);
+ }
}
/**
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index b5eb8d421988..0b046c061742 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -839,7 +839,9 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline ||
init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags ||
- init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags)
+ init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags ||
+ init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta ||
+ init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta)
return -EINVAL;
if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
@@ -2184,9 +2186,8 @@ static int irdma_create_cq(struct ib_cq *ibcq,
info.cq_base_pa = iwcq->kmem.pa;
}
- if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
- info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
- (u32)IRDMA_MAX_CQ_READ_THRESH);
+ info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
+ (u32)IRDMA_MAX_CQ_READ_THRESH);
if (irdma_sc_cq_init(cq, &info)) {
ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n");
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index f87531318feb..a78a067e3ce7 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -458,6 +458,12 @@ void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev));
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
+ if ((i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID ||
+ i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP))
+ if (!MLX5_CAP_GEN(mdev, roce) ||
+ !MLX5_CAP_ROCE(mdev, roce_cc_general))
+ continue;
+
dbg_cc_params->params[i].offset = i;
dbg_cc_params->params[i].dev = dev;
dbg_cc_params->params[i].port_num = port_num;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 869369cb5b5f..253fea374a72 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2949,7 +2949,7 @@ DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
UVERBS_IDR_ANY_OBJECT,
- UVERBS_ACCESS_WRITE,
+ UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index df1d1b0a3ef7..9947feb7fb8a 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -78,7 +78,7 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
*/
copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
left);
- memcpy(eseg->inline_hdr.start, pdata, copysz);
+ memcpy(eseg->inline_hdr.data, pdata, copysz);
stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
sizeof(eseg->inline_hdr.start) + copysz, 16);
*size += stride / 16;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 7887a6786ed4..f118ce0a9a61 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1879,8 +1879,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
/* RQ - read access only (0) */
rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
ureq.rq_len, true, 0, alloc_and_init);
- if (rc)
+ if (rc) {
+ ib_umem_release(qp->usq.umem);
+ qp->usq.umem = NULL;
+ if (rdma_protocol_roce(&dev->ibdev, 1)) {
+ qedr_free_pbl(dev, &qp->usq.pbl_info,
+ qp->usq.pbl_tbl);
+ } else {
+ kfree(qp->usq.pbl_tbl);
+ }
return rc;
+ }
}
memset(&in_params, 0, sizeof(in_params));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7a5be705d718..6f2a688fccbf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1272,10 +1272,10 @@ static int ipoib_get_iflink(const struct net_device *dev)
/* parent interface */
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
- return dev->ifindex;
+ return READ_ONCE(dev->ifindex);
/* child/vlan interface */
- return priv->parent->ifindex;
+ return READ_ONCE(priv->parent->ifindex);
}
static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 58f70cfec45a..040234c01be4 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -79,12 +79,16 @@ module_param(srpt_srq_size, int, 0444);
MODULE_PARM_DESC(srpt_srq_size,
"Shared receive queue (SRQ) size.");
+static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp)
+{
+ return kstrtou64(buffer, 16, (u64 *)kp->arg);
+}
static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp)
{
return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg);
}
-module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid,
- 0444);
+module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x,
+ &srpt_service_guid, 0444);
MODULE_PARM_DESC(srpt_service_guid,
"Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA.");
@@ -210,10 +214,12 @@ static const char *get_ch_state_name(enum rdma_ch_state s)
/**
* srpt_qp_event - QP event callback function
* @event: Description of the event that occurred.
- * @ch: SRPT RDMA channel.
+ * @ptr: SRPT RDMA channel.
*/
-static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
+static void srpt_qp_event(struct ib_event *event, void *ptr)
{
+ struct srpt_rdma_ch *ch = ptr;
+
pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n",
event->event, ch, ch->sess_name, ch->qp->qp_num,
get_ch_state_name(ch->state));
@@ -1807,8 +1813,7 @@ retry:
ch->cq_size = ch->rq_size + sq_size;
qp_init->qp_context = (void *)ch;
- qp_init->event_handler
- = (void(*)(struct ib_event *, void*))srpt_qp_event;
+ qp_init->event_handler = srpt_qp_event;
qp_init->send_cq = ch->cq;
qp_init->recv_cq = ch->cq;
qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index b1244d7df6cc..14c828adebf7 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -130,7 +130,12 @@ static const struct xpad_device {
{ 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 },
{ 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 },
{ 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 },
+ { 0x03f0, 0x038D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wired */
+ { 0x03f0, 0x048D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wireless */
{ 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE },
+ { 0x03f0, 0x07A0, "HyperX Clutch Gladiate RGB", 0, XTYPE_XBOXONE },
+ { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, /* v2 */
+ { 0x03f0, 0x09B4, "HyperX Clutch Tanto", 0, XTYPE_XBOXONE },
{ 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX },
{ 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX },
{ 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
@@ -294,6 +299,7 @@ static const struct xpad_device {
{ 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 },
{ 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 },
{ 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 },
+ { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 },
{ 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 },
{ 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
@@ -462,6 +468,7 @@ static const struct usb_device_id xpad_table[] = {
{ USB_INTERFACE_INFO('X', 'B', 0) }, /* Xbox USB-IF not-approved class */
XPAD_XBOX360_VENDOR(0x0079), /* GPD Win 2 controller */
XPAD_XBOX360_VENDOR(0x03eb), /* Wooting Keyboards (Legacy) */
+ XPAD_XBOX360_VENDOR(0x03f0), /* HP HyperX Xbox 360 controllers */
XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */
XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster Xbox 360 controllers */
XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */
@@ -491,6 +498,7 @@ static const struct usb_device_id xpad_table[] = {
XPAD_XBOX360_VENDOR(0x15e4), /* Numark Xbox 360 controllers */
XPAD_XBOX360_VENDOR(0x162e), /* Joytech Xbox 360 controllers */
XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */
+ XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */
XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */
XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */
XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 13ef6284223d..7f67f9f2946b 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -811,7 +811,6 @@ static int atkbd_probe(struct atkbd *atkbd)
{
struct ps2dev *ps2dev = &atkbd->ps2dev;
unsigned char param[2];
- bool skip_getid;
/*
* Some systems, where the bit-twiddling when testing the io-lines of the
@@ -825,6 +824,11 @@ static int atkbd_probe(struct atkbd *atkbd)
"keyboard reset failed on %s\n",
ps2dev->serio->phys);
+ if (atkbd_skip_getid(atkbd)) {
+ atkbd->id = 0xab83;
+ goto deactivate_kbd;
+ }
+
/*
* Then we check the keyboard ID. We should get 0xab83 under normal conditions.
* Some keyboards report different values, but the first byte is always 0xab or
@@ -833,18 +837,17 @@ static int atkbd_probe(struct atkbd *atkbd)
*/
param[0] = param[1] = 0xa5; /* initialize with invalid values */
- skip_getid = atkbd_skip_getid(atkbd);
- if (skip_getid || ps2_command(ps2dev, param, ATKBD_CMD_GETID)) {
+ if (ps2_command(ps2dev, param, ATKBD_CMD_GETID)) {
/*
- * If the get ID command was skipped or failed, we check if we can at least set
+ * If the get ID command failed, we check if we can at least set
* the LEDs on the keyboard. This should work on every keyboard out there.
* It also turns the LEDs off, which we want anyway.
*/
param[0] = 0;
if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS))
return -1;
- atkbd->id = skip_getid ? 0xab83 : 0xabba;
+ atkbd->id = 0xabba;
return 0;
}
@@ -860,6 +863,7 @@ static int atkbd_probe(struct atkbd *atkbd)
return -1;
}
+deactivate_kbd:
/*
* Make sure nothing is coming from the keyboard and disturbs our
* internal state.
diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c
index ba00ecfbd343..b41fd1240f43 100644
--- a/drivers/input/keyboard/gpio_keys_polled.c
+++ b/drivers/input/keyboard/gpio_keys_polled.c
@@ -315,12 +315,10 @@ static int gpio_keys_polled_probe(struct platform_device *pdev)
error = devm_gpio_request_one(dev, button->gpio,
flags, button->desc ? : DRV_NAME);
- if (error) {
- dev_err(dev,
- "unable to claim gpio %u, err=%d\n",
- button->gpio, error);
- return error;
- }
+ if (error)
+ return dev_err_probe(dev, error,
+ "unable to claim gpio %u\n",
+ button->gpio);
bdata->gpiod = gpio_to_desc(button->gpio);
if (!bdata->gpiod) {
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 258d5fe3d395..42eaebb3bf5c 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -978,12 +978,12 @@ static int rmi_driver_remove(struct device *dev)
rmi_disable_irq(rmi_dev, false);
- irq_domain_remove(data->irqdomain);
- data->irqdomain = NULL;
-
rmi_f34_remove_sysfs(rmi_dev);
rmi_free_function_list(rmi_dev);
+ irq_domain_remove(data->irqdomain);
+ data->irqdomain = NULL;
+
return 0;
}
diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h
index b585b1dab870..dfc6c581873b 100644
--- a/drivers/input/serio/i8042-acpipnpio.h
+++ b/drivers/input/serio/i8042-acpipnpio.h
@@ -635,6 +635,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.driver_data = (void *)(SERIO_QUIRK_NOAUX)
},
{
+ /* Fujitsu Lifebook U728 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U728"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOAUX)
+ },
+ {
/* Gigabyte M912 */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
@@ -1210,6 +1218,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
},
{
.matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "NS5x_7xPU"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOAUX)
+ },
+ {
+ .matches = {
DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"),
},
.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index af32fbe57b63..b068ff8afbc9 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -884,7 +884,8 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts)
}
}
- if (ts->gpio_count == 2 && ts->gpio_int_idx == 0) {
+ /* Some devices with gpio_int_idx 0 list a third unused GPIO */
+ if ((ts->gpio_count == 2 || ts->gpio_count == 3) && ts->gpio_int_idx == 0) {
ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO;
gpio_mapping = acpi_goodix_int_first_gpios;
} else if (ts->gpio_count == 2 && ts->gpio_int_idx == 1) {
diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c
index 20331e119beb..03d626776ba1 100644
--- a/drivers/interconnect/qcom/sc8180x.c
+++ b/drivers/interconnect/qcom/sc8180x.c
@@ -1372,6 +1372,7 @@ static struct qcom_icc_bcm bcm_mm0 = {
static struct qcom_icc_bcm bcm_co0 = {
.name = "CO0",
+ .keepalive = true,
.num_nodes = 1,
.nodes = { &slv_qns_cdsp_mem_noc }
};
diff --git a/drivers/interconnect/qcom/sm8550.c b/drivers/interconnect/qcom/sm8550.c
index 629faa4c9aae..fc22cecf650f 100644
--- a/drivers/interconnect/qcom/sm8550.c
+++ b/drivers/interconnect/qcom/sm8550.c
@@ -2223,6 +2223,7 @@ static struct platform_driver qnoc_driver = {
.driver = {
.name = "qnoc-sm8550",
.of_match_table = qnoc_of_match,
+ .sync_state = icc_sync_state,
},
};
diff --git a/drivers/interconnect/qcom/sm8650.c b/drivers/interconnect/qcom/sm8650.c
index b83de54577b6..b962e6c233ef 100644
--- a/drivers/interconnect/qcom/sm8650.c
+++ b/drivers/interconnect/qcom/sm8650.c
@@ -1160,7 +1160,7 @@ static struct qcom_icc_node qns_gemnoc_sf = {
static struct qcom_icc_bcm bcm_acv = {
.name = "ACV",
- .enable_mask = BIT(3),
+ .enable_mask = BIT(0),
.num_nodes = 1,
.nodes = { &ebi },
};
diff --git a/drivers/interconnect/qcom/x1e80100.c b/drivers/interconnect/qcom/x1e80100.c
index d19501d913b3..cbaf4f9c41be 100644
--- a/drivers/interconnect/qcom/x1e80100.c
+++ b/drivers/interconnect/qcom/x1e80100.c
@@ -1586,6 +1586,7 @@ static struct qcom_icc_node qns_pcie_south_gem_noc_pcie = {
static struct qcom_icc_bcm bcm_acv = {
.name = "ACV",
+ .enable_mask = BIT(3),
.num_nodes = 1,
.nodes = { &ebi },
};
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 9a29d742617e..0af39bbbe3a3 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -163,6 +163,9 @@ config IOMMU_SVA
select IOMMU_MM_DATA
bool
+config IOMMU_IOPF
+ bool
+
config FSL_PAMU
bool "Freescale IOMMU support"
depends on PCI
@@ -179,7 +182,7 @@ config FSL_PAMU
config MSM_IOMMU
bool "MSM IOMMU Support"
depends on ARM
- depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST
+ depends on ARCH_QCOM || COMPILE_TEST
select IOMMU_API
select IOMMU_IO_PGTABLE_ARMV7S
help
@@ -196,7 +199,7 @@ source "drivers/iommu/iommufd/Kconfig"
config IRQ_REMAP
bool "Support for Interrupt Remapping"
depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI
- select DMAR_TABLE
+ select DMAR_TABLE if INTEL_IOMMU
help
Supports Interrupt remapping for IO-APIC and MSI devices.
To use x2apic mode in the CPU's which support x2APIC enhancements or
@@ -398,6 +401,7 @@ config ARM_SMMU_V3_SVA
bool "Shared Virtual Addressing support for the ARM SMMUv3"
depends on ARM_SMMU_V3
select IOMMU_SVA
+ select IOMMU_IOPF
select MMU_NOTIFIER
help
Support for sharing process address spaces with devices using the
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 95ad9dbfbda0..542760d963ec 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
-obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o
+obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
+obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
obj-$(CONFIG_APPLE_DART) += apple-dart.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 8b3601f285fd..f482aab420f7 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -39,20 +39,16 @@ extern enum io_pgtable_fmt amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
bool amd_iommu_v2_supported(void);
-struct amd_iommu *get_amd_iommu(unsigned int idx);
-u8 amd_iommu_pc_get_max_banks(unsigned int idx);
-bool amd_iommu_pc_supported(void);
-u8 amd_iommu_pc_get_max_counters(unsigned int idx);
-int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
- u8 fxn, u64 *value);
-int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
- u8 fxn, u64 *value);
/* Device capabilities */
int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev);
void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev);
-int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address);
+/* GCR3 setup */
+int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data,
+ ioasid_t pasid, unsigned long gcr3);
+int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid);
+
/*
* This function flushes all internal caches of
* the IOMMU used by this driver.
@@ -63,10 +59,10 @@ void amd_iommu_domain_update(struct protection_domain *domain);
void amd_iommu_domain_flush_complete(struct protection_domain *domain);
void amd_iommu_domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size);
-int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
-int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
- unsigned long cr3);
-int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
+void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
+ ioasid_t pasid, u64 address, size_t size);
+void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data,
+ ioasid_t pasid);
#ifdef CONFIG_IRQ_REMAP
int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
@@ -77,10 +73,6 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
}
#endif
-#define PPR_SUCCESS 0x0
-#define PPR_INVALID 0x1
-#define PPR_FAILURE 0xf
-
int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
int status, int tag);
@@ -150,6 +142,21 @@ static inline void *alloc_pgtable_page(int nid, gfp_t gfp)
return page ? page_address(page) : NULL;
}
+/*
+ * This must be called after device probe completes. During probe
+ * use rlookup_amd_iommu() get the iommu.
+ */
+static inline struct amd_iommu *get_amd_iommu_from_dev(struct device *dev)
+{
+ return iommu_get_iommu_dev(dev, struct amd_iommu, iommu);
+}
+
+/* This must be called after device probe completes. */
+static inline struct amd_iommu *get_amd_iommu_from_dev_data(struct iommu_dev_data *dev_data)
+{
+ return iommu_get_iommu_dev(dev_data->dev, struct amd_iommu, iommu);
+}
+
bool translation_pre_enabled(struct amd_iommu *iommu);
bool amd_iommu_is_attach_deferred(struct device *dev);
int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line);
@@ -164,5 +171,4 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
u64 *root, int mode);
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
-extern bool amd_iommu_snp_en;
#endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 809d74faa1a5..d1fed5fc219b 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -453,15 +453,6 @@
#define MAX_DOMAIN_ID 65536
-/* Protection domain flags */
-#define PD_DMA_OPS_MASK BIT(0) /* domain used for dma_ops */
-#define PD_DEFAULT_MASK BIT(1) /* domain is a default dma_ops
- domain for an IOMMU */
-#define PD_PASSTHROUGH_MASK BIT(2) /* domain has no page
- translation */
-#define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */
-#define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */
-
/* Timeout stuff */
#define LOOP_TIMEOUT 100000
#define MMIO_STATUS_TIMEOUT 2000000
@@ -513,14 +504,6 @@ extern struct kmem_cache *amd_iommu_irq_cache;
#define for_each_iommu_safe(iommu, next) \
list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
-#define APERTURE_RANGE_SHIFT 27 /* 128 MB */
-#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT)
-#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
-#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */
-#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
-#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
-
-
struct amd_iommu;
struct iommu_domain;
struct irq_domain;
@@ -541,6 +524,13 @@ struct amd_irte_ops;
#define io_pgtable_cfg_to_data(x) \
container_of((x), struct amd_io_pgtable, pgtbl_cfg)
+struct gcr3_tbl_info {
+ u64 *gcr3_tbl; /* Guest CR3 table */
+ int glx; /* Number of levels for GCR3 table */
+ u32 pasid_cnt; /* Track attached PASIDs */
+ u16 domid; /* Per device domain ID */
+};
+
struct amd_io_pgtable {
struct io_pgtable_cfg pgtbl_cfg;
struct io_pgtable iop;
@@ -549,6 +539,11 @@ struct amd_io_pgtable {
u64 *pgd; /* v2 pgtable pgd pointer */
};
+enum protection_domain_mode {
+ PD_MODE_V1 = 1,
+ PD_MODE_V2,
+};
+
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
@@ -560,10 +555,8 @@ struct protection_domain {
struct amd_io_pgtable iop;
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
- int glx; /* Number of levels for GCR3 table */
int nid; /* Node ID */
- u64 *gcr3_tbl; /* Guest CR3 table */
- unsigned long flags; /* flags to find out type of domain */
+ enum protection_domain_mode pd_mode; /* Track page table type */
bool dirty_tracking; /* dirty tracking is enabled in the domain */
unsigned dev_cnt; /* devices assigned to this domain */
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
@@ -816,6 +809,7 @@ struct iommu_dev_data {
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
struct protection_domain *domain; /* Domain the device is bound to */
+ struct gcr3_tbl_info gcr3_info; /* Per-device GCR3 table */
struct device *dev;
u16 devid; /* PCI Device ID */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index c83bd0c2a1c9..e7a44929f0da 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -30,6 +30,7 @@
#include <asm/io_apic.h>
#include <asm/irq_remapping.h>
#include <asm/set_memory.h>
+#include <asm/sev.h>
#include <linux/crash_dump.h>
@@ -2068,6 +2069,9 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
/* Prevent binding other PCI device drivers to IOMMU devices */
iommu->dev->match_driver = false;
+ /* ACPI _PRT won't have an IRQ for IOMMU */
+ iommu->dev->irq_managed = 1;
+
pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
&iommu->cap);
@@ -2769,6 +2773,7 @@ static void early_enable_iommu(struct amd_iommu *iommu)
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
iommu_set_exclusion_range(iommu);
+ iommu_enable_gt(iommu);
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
iommu_enable_irtcachedis(iommu);
@@ -2825,6 +2830,7 @@ static void early_enable_iommus(void)
iommu_disable_irtcachedis(iommu);
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
+ iommu_enable_gt(iommu);
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
iommu_enable_irtcachedis(iommu);
@@ -2838,10 +2844,8 @@ static void enable_iommus_v2(void)
{
struct amd_iommu *iommu;
- for_each_iommu(iommu) {
+ for_each_iommu(iommu)
iommu_enable_ppr_log(iommu);
- iommu_enable_gt(iommu);
- }
}
static void enable_iommus_vapic(void)
@@ -3221,6 +3225,36 @@ out:
return true;
}
+static void iommu_snp_enable(void)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return;
+ /*
+ * The SNP support requires that IOMMU must be enabled, and is
+ * not configured in the passthrough mode.
+ */
+ if (no_iommu || iommu_default_passthrough()) {
+ pr_err("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
+ return;
+ }
+
+ amd_iommu_snp_en = check_feature(FEATURE_SNP);
+ if (!amd_iommu_snp_en) {
+ pr_err("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
+ return;
+ }
+
+ pr_info("IOMMU SNP support enabled.\n");
+
+ /* Enforce IOMMU v1 pagetable when SNP is enabled. */
+ if (amd_iommu_pgtable != AMD_IOMMU_V1) {
+ pr_warn("Forcing use of AMD IOMMU v1 page table due to SNP.\n");
+ amd_iommu_pgtable = AMD_IOMMU_V1;
+ }
+#endif
+}
+
/****************************************************************************
*
* AMD IOMMU Initialization State Machine
@@ -3256,6 +3290,7 @@ static int __init state_next(void)
break;
case IOMMU_ENABLED:
register_syscore_ops(&amd_iommu_syscore_ops);
+ iommu_snp_enable();
ret = amd_iommu_init_pci();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
break;
@@ -3694,13 +3729,11 @@ u8 amd_iommu_pc_get_max_banks(unsigned int idx)
return 0;
}
-EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
bool amd_iommu_pc_supported(void)
{
return amd_iommu_pc_present;
}
-EXPORT_SYMBOL(amd_iommu_pc_supported);
u8 amd_iommu_pc_get_max_counters(unsigned int idx)
{
@@ -3711,7 +3744,6 @@ u8 amd_iommu_pc_get_max_counters(unsigned int idx)
return 0;
}
-EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value, bool is_write)
@@ -3767,40 +3799,85 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64
return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
}
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-int amd_iommu_snp_enable(void)
+#ifdef CONFIG_KVM_AMD_SEV
+static int iommu_page_make_shared(void *page)
{
- /*
- * The SNP support requires that IOMMU must be enabled, and is
- * not configured in the passthrough mode.
- */
- if (no_iommu || iommu_default_passthrough()) {
- pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported");
- return -EINVAL;
+ unsigned long paddr, pfn;
+
+ paddr = iommu_virt_to_phys(page);
+ /* Cbit maybe set in the paddr */
+ pfn = __sme_clr(paddr) >> PAGE_SHIFT;
+
+ if (!(pfn % PTRS_PER_PMD)) {
+ int ret, level;
+ bool assigned;
+
+ ret = snp_lookup_rmpentry(pfn, &assigned, &level);
+ if (ret) {
+ pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret);
+ return ret;
+ }
+
+ if (!assigned) {
+ pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn);
+ return -EINVAL;
+ }
+
+ if (level > PG_LEVEL_4K) {
+ ret = psmash(pfn);
+ if (!ret)
+ goto done;
+
+ pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n",
+ pfn, ret, level);
+ return ret;
+ }
}
- /*
- * Prevent enabling SNP after IOMMU_ENABLED state because this process
- * affect how IOMMU driver sets up data structures and configures
- * IOMMU hardware.
- */
- if (init_state > IOMMU_ENABLED) {
- pr_err("SNP: Too late to enable SNP for IOMMU.\n");
- return -EINVAL;
+done:
+ return rmp_make_shared(pfn, PG_LEVEL_4K);
+}
+
+static int iommu_make_shared(void *va, size_t size)
+{
+ void *page;
+ int ret;
+
+ if (!va)
+ return 0;
+
+ for (page = va; page < (va + size); page += PAGE_SIZE) {
+ ret = iommu_page_make_shared(page);
+ if (ret)
+ return ret;
}
- amd_iommu_snp_en = check_feature(FEATURE_SNP);
+ return 0;
+}
+
+int amd_iommu_snp_disable(void)
+{
+ struct amd_iommu *iommu;
+ int ret;
+
if (!amd_iommu_snp_en)
- return -EINVAL;
+ return 0;
- pr_info("SNP enabled\n");
+ for_each_iommu(iommu) {
+ ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE);
+ if (ret)
+ return ret;
- /* Enforce IOMMU v1 pagetable when SNP is enabled. */
- if (amd_iommu_pgtable != AMD_IOMMU_V1) {
- pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n");
- amd_iommu_pgtable = AMD_IOMMU_V1;
+ ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE);
+ if (ret)
+ return ret;
+
+ ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE);
+ if (ret)
+ return ret;
}
return 0;
}
+EXPORT_SYMBOL_GPL(amd_iommu_snp_disable);
#endif
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index 6d69ba60744f..93489d2db4e8 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -350,38 +350,26 @@ static const struct iommu_flush_ops v2_flush_ops = {
static void v2_free_pgtable(struct io_pgtable *iop)
{
- struct protection_domain *pdom;
struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
- pdom = container_of(pgtable, struct protection_domain, iop);
- if (!(pdom->flags & PD_IOMMUV2_MASK))
+ if (!pgtable || !pgtable->pgd)
return;
- /* Clear gcr3 entry */
- amd_iommu_domain_clear_gcr3(&pdom->domain, 0);
-
- /* Make changes visible to IOMMUs */
- amd_iommu_domain_update(pdom);
-
/* Free page table */
free_pgtable(pgtable->pgd, get_pgtable_level());
+ pgtable->pgd = NULL;
}
static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
{
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
struct protection_domain *pdom = (struct protection_domain *)cookie;
- int ret;
int ias = IOMMU_IN_ADDR_BIT_SIZE;
pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC);
if (!pgtable->pgd)
return NULL;
- ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd));
- if (ret)
- goto err_free_pgd;
-
if (get_pgtable_level() == PAGE_MODE_5_LEVEL)
ias = 57;
@@ -395,11 +383,6 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
cfg->tlb = &v2_flush_ops;
return &pgtable->iop;
-
-err_free_pgd:
- free_pgtable_page(pgtable->pgd);
-
- return NULL;
}
struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = {
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 4283dd8191f0..d35c1b8c8e65 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -45,10 +45,6 @@
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
-/* IO virtual address start page frame number */
-#define IOVA_START_PFN (1)
-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-
/* Reserved IOVA ranges */
#define MSI_RANGE_START (0xfee00000)
#define MSI_RANGE_END (0xfeefffff)
@@ -79,6 +75,9 @@ struct kmem_cache *amd_iommu_irq_cache;
static void detach_device(struct device *dev);
+static void set_dte_entry(struct amd_iommu *iommu,
+ struct iommu_dev_data *dev_data);
+
/****************************************************************************
*
* Helper functions
@@ -87,7 +86,7 @@ static void detach_device(struct device *dev);
static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom)
{
- return (pdom && (pdom->flags & PD_IOMMUV2_MASK));
+ return (pdom && (pdom->pd_mode == PD_MODE_V2));
}
static inline int get_acpihid_device_id(struct device *dev,
@@ -1388,14 +1387,9 @@ void amd_iommu_flush_all_caches(struct amd_iommu *iommu)
static int device_flush_iotlb(struct iommu_dev_data *dev_data, u64 address,
size_t size, ioasid_t pasid, bool gn)
{
- struct amd_iommu *iommu;
+ struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
struct iommu_cmd cmd;
- int qdep;
-
- qdep = dev_data->ats_qdep;
- iommu = rlookup_amd_iommu(dev_data->dev);
- if (!iommu)
- return -EINVAL;
+ int qdep = dev_data->ats_qdep;
build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address,
size, pasid, gn);
@@ -1415,16 +1409,12 @@ static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data)
*/
static int device_flush_dte(struct iommu_dev_data *dev_data)
{
- struct amd_iommu *iommu;
+ struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
struct pci_dev *pdev = NULL;
struct amd_iommu_pci_seg *pci_seg;
u16 alias;
int ret;
- iommu = rlookup_amd_iommu(dev_data->dev);
- if (!iommu)
- return -EINVAL;
-
if (dev_is_pci(dev_data->dev))
pdev = to_pci_dev(dev_data->dev);
@@ -1453,27 +1443,37 @@ static int device_flush_dte(struct iommu_dev_data *dev_data)
return ret;
}
-/*
- * TLB invalidation function which is called from the mapping functions.
- * It invalidates a single PTE if the range to flush is within a single
- * page. Otherwise it flushes the whole TLB of the IOMMU.
- */
-static void __domain_flush_pages(struct protection_domain *domain,
+static int domain_flush_pages_v2(struct protection_domain *pdom,
u64 address, size_t size)
{
struct iommu_dev_data *dev_data;
struct iommu_cmd cmd;
- int ret = 0, i;
- ioasid_t pasid = IOMMU_NO_PASID;
- bool gn = false;
+ int ret = 0;
- if (pdom_is_v2_pgtbl_mode(domain))
- gn = true;
+ list_for_each_entry(dev_data, &pdom->dev_list, list) {
+ struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev);
+ u16 domid = dev_data->gcr3_info.domid;
- build_inv_iommu_pages(&cmd, address, size, domain->id, pasid, gn);
+ build_inv_iommu_pages(&cmd, address, size,
+ domid, IOMMU_NO_PASID, true);
+
+ ret |= iommu_queue_command(iommu, &cmd);
+ }
+
+ return ret;
+}
+
+static int domain_flush_pages_v1(struct protection_domain *pdom,
+ u64 address, size_t size)
+{
+ struct iommu_cmd cmd;
+ int ret = 0, i;
+
+ build_inv_iommu_pages(&cmd, address, size,
+ pdom->id, IOMMU_NO_PASID, false);
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
- if (!domain->dev_iommu[i])
+ if (!pdom->dev_iommu[i])
continue;
/*
@@ -1483,6 +1483,28 @@ static void __domain_flush_pages(struct protection_domain *domain,
ret |= iommu_queue_command(amd_iommus[i], &cmd);
}
+ return ret;
+}
+
+/*
+ * TLB invalidation function which is called from the mapping functions.
+ * It flushes range of PTEs of the domain.
+ */
+static void __domain_flush_pages(struct protection_domain *domain,
+ u64 address, size_t size)
+{
+ struct iommu_dev_data *dev_data;
+ int ret = 0;
+ ioasid_t pasid = IOMMU_NO_PASID;
+ bool gn = false;
+
+ if (pdom_is_v2_pgtbl_mode(domain)) {
+ gn = true;
+ ret = domain_flush_pages_v2(domain, address, size);
+ } else {
+ ret = domain_flush_pages_v1(domain, address, size);
+ }
+
list_for_each_entry(dev_data, &domain->dev_list, list) {
if (!dev_data->ats_enabled)
@@ -1551,6 +1573,29 @@ static void amd_iommu_domain_flush_all(struct protection_domain *domain)
CMD_INV_IOMMU_ALL_PAGES_ADDRESS);
}
+void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
+ ioasid_t pasid, u64 address, size_t size)
+{
+ struct iommu_cmd cmd;
+ struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev);
+
+ build_inv_iommu_pages(&cmd, address, size,
+ dev_data->gcr3_info.domid, pasid, true);
+ iommu_queue_command(iommu, &cmd);
+
+ if (dev_data->ats_enabled)
+ device_flush_iotlb(dev_data, address, size, pasid, true);
+
+ iommu_completion_wait(iommu);
+}
+
+void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data,
+ ioasid_t pasid)
+{
+ amd_iommu_dev_flush_pasid_pages(dev_data, 0,
+ CMD_INV_IOMMU_ALL_PAGES_ADDRESS, pasid);
+}
+
void amd_iommu_domain_flush_complete(struct protection_domain *domain)
{
int i;
@@ -1592,6 +1637,49 @@ static void domain_flush_devices(struct protection_domain *domain)
device_flush_dte(dev_data);
}
+static void update_device_table(struct protection_domain *domain)
+{
+ struct iommu_dev_data *dev_data;
+
+ list_for_each_entry(dev_data, &domain->dev_list, list) {
+ struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev);
+
+ set_dte_entry(iommu, dev_data);
+ clone_aliases(iommu, dev_data->dev);
+ }
+}
+
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
+{
+ update_device_table(domain);
+ domain_flush_devices(domain);
+}
+
+void amd_iommu_domain_update(struct protection_domain *domain)
+{
+ /* Update device table */
+ amd_iommu_update_and_flush_device_table(domain);
+
+ /* Flush domain TLB(s) and wait for completion */
+ amd_iommu_domain_flush_all(domain);
+}
+
+int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
+ int status, int tag)
+{
+ struct iommu_dev_data *dev_data;
+ struct amd_iommu *iommu;
+ struct iommu_cmd cmd;
+
+ dev_data = dev_iommu_priv_get(&pdev->dev);
+ iommu = get_amd_iommu_from_dev(&pdev->dev);
+
+ build_complete_ppr(&cmd, dev_data->devid, pasid, status,
+ tag, dev_data->pri_tlp);
+
+ return iommu_queue_command(iommu, &cmd);
+}
+
/****************************************************************************
*
* The next functions belong to the domain allocation. A domain is
@@ -1656,16 +1744,22 @@ static void free_gcr3_tbl_level2(u64 *tbl)
}
}
-static void free_gcr3_table(struct protection_domain *domain)
+static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info)
{
- if (domain->glx == 2)
- free_gcr3_tbl_level2(domain->gcr3_tbl);
- else if (domain->glx == 1)
- free_gcr3_tbl_level1(domain->gcr3_tbl);
+ if (gcr3_info->glx == 2)
+ free_gcr3_tbl_level2(gcr3_info->gcr3_tbl);
+ else if (gcr3_info->glx == 1)
+ free_gcr3_tbl_level1(gcr3_info->gcr3_tbl);
else
- BUG_ON(domain->glx != 0);
+ WARN_ON_ONCE(gcr3_info->glx != 0);
- free_page((unsigned long)domain->gcr3_tbl);
+ gcr3_info->glx = 0;
+
+ /* Free per device domain ID */
+ domain_id_free(gcr3_info->domid);
+
+ free_page((unsigned long)gcr3_info->gcr3_tbl);
+ gcr3_info->gcr3_tbl = NULL;
}
/*
@@ -1684,33 +1778,133 @@ static int get_gcr3_levels(int pasids)
return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels;
}
-/* Note: This function expects iommu_domain->lock to be held prior calling the function. */
-static int setup_gcr3_table(struct protection_domain *domain, int pasids)
+static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
+ struct amd_iommu *iommu, int pasids)
{
int levels = get_gcr3_levels(pasids);
+ int nid = iommu ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
if (levels > amd_iommu_max_glx_val)
return -EINVAL;
- domain->gcr3_tbl = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
- if (domain->gcr3_tbl == NULL)
+ if (gcr3_info->gcr3_tbl)
+ return -EBUSY;
+
+ /* Allocate per device domain ID */
+ gcr3_info->domid = domain_id_alloc();
+
+ gcr3_info->gcr3_tbl = alloc_pgtable_page(nid, GFP_ATOMIC);
+ if (gcr3_info->gcr3_tbl == NULL) {
+ domain_id_free(gcr3_info->domid);
return -ENOMEM;
+ }
- domain->glx = levels;
- domain->flags |= PD_IOMMUV2_MASK;
+ gcr3_info->glx = levels;
- amd_iommu_domain_update(domain);
+ return 0;
+}
+static u64 *__get_gcr3_pte(struct gcr3_tbl_info *gcr3_info,
+ ioasid_t pasid, bool alloc)
+{
+ int index;
+ u64 *pte;
+ u64 *root = gcr3_info->gcr3_tbl;
+ int level = gcr3_info->glx;
+
+ while (true) {
+
+ index = (pasid >> (9 * level)) & 0x1ff;
+ pte = &root[index];
+
+ if (level == 0)
+ break;
+
+ if (!(*pte & GCR3_VALID)) {
+ if (!alloc)
+ return NULL;
+
+ root = (void *)get_zeroed_page(GFP_ATOMIC);
+ if (root == NULL)
+ return NULL;
+
+ *pte = iommu_virt_to_phys(root) | GCR3_VALID;
+ }
+
+ root = iommu_phys_to_virt(*pte & PAGE_MASK);
+
+ level -= 1;
+ }
+
+ return pte;
+}
+
+static int update_gcr3(struct iommu_dev_data *dev_data,
+ ioasid_t pasid, unsigned long gcr3, bool set)
+{
+ struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+ u64 *pte;
+
+ pte = __get_gcr3_pte(gcr3_info, pasid, true);
+ if (pte == NULL)
+ return -ENOMEM;
+
+ if (set)
+ *pte = (gcr3 & PAGE_MASK) | GCR3_VALID;
+ else
+ *pte = 0;
+
+ amd_iommu_dev_flush_pasid_all(dev_data, pasid);
return 0;
}
-static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
- struct protection_domain *domain, bool ats, bool ppr)
+int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid,
+ unsigned long gcr3)
+{
+ struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+ int ret;
+
+ iommu_group_mutex_assert(dev_data->dev);
+
+ ret = update_gcr3(dev_data, pasid, gcr3, true);
+ if (ret)
+ return ret;
+
+ gcr3_info->pasid_cnt++;
+ return ret;
+}
+
+int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid)
+{
+ struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+ int ret;
+
+ iommu_group_mutex_assert(dev_data->dev);
+
+ ret = update_gcr3(dev_data, pasid, 0, false);
+ if (ret)
+ return ret;
+
+ gcr3_info->pasid_cnt--;
+ return ret;
+}
+
+static void set_dte_entry(struct amd_iommu *iommu,
+ struct iommu_dev_data *dev_data)
{
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
+ u16 devid = dev_data->devid;
+ u16 domid;
+ struct protection_domain *domain = dev_data->domain;
struct dev_table_entry *dev_table = get_dev_table(iommu);
+ struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+
+ if (gcr3_info && gcr3_info->gcr3_tbl)
+ domid = dev_data->gcr3_info.domid;
+ else
+ domid = domain->id;
if (domain->iop.mode != PAGE_MODE_NONE)
pte_root = iommu_virt_to_phys(domain->iop.root);
@@ -1724,23 +1918,23 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
* When SNP is enabled, Only set TV bit when IOMMU
* page translation is in use.
*/
- if (!amd_iommu_snp_en || (domain->id != 0))
+ if (!amd_iommu_snp_en || (domid != 0))
pte_root |= DTE_FLAG_TV;
flags = dev_table[devid].data[1];
- if (ats)
+ if (dev_data->ats_enabled)
flags |= DTE_FLAG_IOTLB;
- if (ppr)
+ if (dev_data->ppr)
pte_root |= 1ULL << DEV_ENTRY_PPR;
if (domain->dirty_tracking)
pte_root |= DTE_FLAG_HAD;
- if (domain->flags & PD_IOMMUV2_MASK) {
- u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl);
- u64 glx = domain->glx;
+ if (gcr3_info && gcr3_info->gcr3_tbl) {
+ u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
+ u64 glx = gcr3_info->glx;
u64 tmp;
pte_root |= DTE_FLAG_GV;
@@ -1768,12 +1962,13 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT);
}
- if (domain->flags & PD_GIOV_MASK)
+ /* GIOV is supported with V2 page table mode only */
+ if (pdom_is_v2_pgtbl_mode(domain))
pte_root |= DTE_FLAG_GIOV;
}
flags &= ~DEV_DOMID_MASK;
- flags |= domain->id;
+ flags |= domid;
old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK;
dev_table[devid].data[1] = flags;
@@ -1804,16 +1999,11 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid)
amd_iommu_apply_erratum_63(iommu, devid);
}
-static void do_attach(struct iommu_dev_data *dev_data,
- struct protection_domain *domain)
+static int do_attach(struct iommu_dev_data *dev_data,
+ struct protection_domain *domain)
{
- struct amd_iommu *iommu;
- bool ats;
-
- iommu = rlookup_amd_iommu(dev_data->dev);
- if (!iommu)
- return;
- ats = dev_data->ats_enabled;
+ struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
+ int ret = 0;
/* Update data structures */
dev_data->domain = domain;
@@ -1827,22 +2017,40 @@ static void do_attach(struct iommu_dev_data *dev_data,
domain->dev_iommu[iommu->index] += 1;
domain->dev_cnt += 1;
+ /* Init GCR3 table and update device table */
+ if (domain->pd_mode == PD_MODE_V2) {
+ /* By default, setup GCR3 table to support single PASID */
+ ret = setup_gcr3_table(&dev_data->gcr3_info, iommu, 1);
+ if (ret)
+ return ret;
+
+ ret = update_gcr3(dev_data, 0,
+ iommu_virt_to_phys(domain->iop.pgd), true);
+ if (ret) {
+ free_gcr3_table(&dev_data->gcr3_info);
+ return ret;
+ }
+ }
+
/* Update device table */
- set_dte_entry(iommu, dev_data->devid, domain,
- ats, dev_data->ppr);
+ set_dte_entry(iommu, dev_data);
clone_aliases(iommu, dev_data->dev);
device_flush_dte(dev_data);
+
+ return ret;
}
static void do_detach(struct iommu_dev_data *dev_data)
{
struct protection_domain *domain = dev_data->domain;
- struct amd_iommu *iommu;
+ struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
- iommu = rlookup_amd_iommu(dev_data->dev);
- if (!iommu)
- return;
+ /* Clear GCR3 table */
+ if (domain->pd_mode == PD_MODE_V2) {
+ update_gcr3(dev_data, 0, 0, false);
+ free_gcr3_table(&dev_data->gcr3_info);
+ }
/* Update data structures */
dev_data->domain = NULL;
@@ -1886,7 +2094,7 @@ static int attach_device(struct device *dev,
if (dev_is_pci(dev))
pdev_enable_caps(to_pci_dev(dev));
- do_attach(dev_data, domain);
+ ret = do_attach(dev_data, domain);
out:
spin_unlock(&dev_data->lock);
@@ -1954,8 +2162,7 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
ret = iommu_init_device(iommu, dev);
if (ret) {
- if (ret != -ENOTSUPP)
- dev_err(dev, "Failed to initialize - trying to proceed anyway\n");
+ dev_err(dev, "Failed to initialize - trying to proceed anyway\n");
iommu_dev = ERR_PTR(ret);
iommu_ignore_device(iommu, dev);
} else {
@@ -2000,42 +2207,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
/*****************************************************************************
*
- * The next functions belong to the dma_ops mapping/unmapping code.
- *
- *****************************************************************************/
-
-static void update_device_table(struct protection_domain *domain)
-{
- struct iommu_dev_data *dev_data;
-
- list_for_each_entry(dev_data, &domain->dev_list, list) {
- struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev);
-
- if (!iommu)
- continue;
- set_dte_entry(iommu, dev_data->devid, domain,
- dev_data->ats_enabled, dev_data->ppr);
- clone_aliases(iommu, dev_data->dev);
- }
-}
-
-void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
-{
- update_device_table(domain);
- domain_flush_devices(domain);
-}
-
-void amd_iommu_domain_update(struct protection_domain *domain)
-{
- /* Update device table */
- amd_iommu_update_and_flush_device_table(domain);
-
- /* Flush domain TLB(s) and wait for completion */
- amd_iommu_domain_flush_all(domain);
-}
-
-/*****************************************************************************
- *
* The following functions belong to the exported interface of AMD IOMMU
*
* This interface allows access to lower level functions of the IOMMU
@@ -2070,9 +2241,6 @@ static void protection_domain_free(struct protection_domain *domain)
if (domain->iop.pgtbl_cfg.tlb)
free_io_pgtable_ops(&domain->iop.iop.ops);
- if (domain->flags & PD_IOMMUV2_MASK)
- free_gcr3_table(domain);
-
if (domain->iop.root)
free_page((unsigned long)domain->iop.root);
@@ -2094,19 +2262,16 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
return -ENOMEM;
}
+ domain->pd_mode = PD_MODE_V1;
amd_iommu_domain_set_pgtable(domain, pt_root, mode);
return 0;
}
-static int protection_domain_init_v2(struct protection_domain *domain)
+static int protection_domain_init_v2(struct protection_domain *pdom)
{
- domain->flags |= PD_GIOV_MASK;
-
- domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
-
- if (setup_gcr3_table(domain, 1))
- return -ENOMEM;
+ pdom->pd_mode = PD_MODE_V2;
+ pdom->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
return 0;
}
@@ -2194,11 +2359,8 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
struct protection_domain *domain;
struct amd_iommu *iommu = NULL;
- if (dev) {
- iommu = rlookup_amd_iommu(dev);
- if (!iommu)
- return ERR_PTR(-ENODEV);
- }
+ if (dev)
+ iommu = get_amd_iommu_from_dev(dev);
/*
* Since DTE[Mode]=0 is prohibited on SNP-enabled system,
@@ -2279,7 +2441,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
struct protection_domain *domain = to_pdomain(dom);
- struct amd_iommu *iommu = rlookup_amd_iommu(dev);
+ struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
int ret;
/*
@@ -2337,7 +2499,7 @@ static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova,
int prot = 0;
int ret = -EINVAL;
- if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+ if ((domain->pd_mode == PD_MODE_V1) &&
(domain->iop.mode == PAGE_MODE_NONE))
return -EINVAL;
@@ -2383,7 +2545,7 @@ static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
size_t r;
- if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+ if ((domain->pd_mode == PD_MODE_V1) &&
(domain->iop.mode == PAGE_MODE_NONE))
return 0;
@@ -2418,7 +2580,7 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
case IOMMU_CAP_DIRTY_TRACKING: {
- struct amd_iommu *iommu = rlookup_amd_iommu(dev);
+ struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
return amd_iommu_hd_support(iommu);
}
@@ -2447,9 +2609,7 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
}
list_for_each_entry(dev_data, &pdomain->dev_list, list) {
- iommu = rlookup_amd_iommu(dev_data->dev);
- if (!iommu)
- continue;
+ iommu = get_amd_iommu_from_dev_data(dev_data);
dev_table = get_dev_table(iommu);
pte_root = dev_table[dev_data->devid].data[0];
@@ -2509,9 +2669,7 @@ static void amd_iommu_get_resv_regions(struct device *dev,
return;
devid = PCI_SBDF_TO_DEVID(sbdf);
- iommu = rlookup_amd_iommu(dev);
- if (!iommu)
- return;
+ iommu = get_amd_iommu_from_dev(dev);
pci_seg = iommu->pci_seg;
list_for_each_entry(entry, &pci_seg->unity_map, list) {
@@ -2645,216 +2803,6 @@ const struct iommu_ops amd_iommu_ops = {
}
};
-static int __flush_pasid(struct protection_domain *domain, u32 pasid,
- u64 address, size_t size)
-{
- struct iommu_dev_data *dev_data;
- struct iommu_cmd cmd;
- int i, ret;
-
- if (!(domain->flags & PD_IOMMUV2_MASK))
- return -EINVAL;
-
- build_inv_iommu_pages(&cmd, address, size, domain->id, pasid, true);
-
- /*
- * IOMMU TLB needs to be flushed before Device TLB to
- * prevent device TLB refill from IOMMU TLB
- */
- for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
- if (domain->dev_iommu[i] == 0)
- continue;
-
- ret = iommu_queue_command(amd_iommus[i], &cmd);
- if (ret != 0)
- goto out;
- }
-
- /* Wait until IOMMU TLB flushes are complete */
- amd_iommu_domain_flush_complete(domain);
-
- /* Now flush device TLBs */
- list_for_each_entry(dev_data, &domain->dev_list, list) {
- struct amd_iommu *iommu;
- int qdep;
-
- /*
- There might be non-IOMMUv2 capable devices in an IOMMUv2
- * domain.
- */
- if (!dev_data->ats_enabled)
- continue;
-
- qdep = dev_data->ats_qdep;
- iommu = rlookup_amd_iommu(dev_data->dev);
- if (!iommu)
- continue;
- build_inv_iotlb_pages(&cmd, dev_data->devid, qdep,
- address, size, pasid, true);
-
- ret = iommu_queue_command(iommu, &cmd);
- if (ret != 0)
- goto out;
- }
-
- /* Wait until all device TLBs are flushed */
- amd_iommu_domain_flush_complete(domain);
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-static int __amd_iommu_flush_page(struct protection_domain *domain, u32 pasid,
- u64 address)
-{
- return __flush_pasid(domain, pasid, address, PAGE_SIZE);
-}
-
-int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
- u64 address)
-{
- struct protection_domain *domain = to_pdomain(dom);
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&domain->lock, flags);
- ret = __amd_iommu_flush_page(domain, pasid, address);
- spin_unlock_irqrestore(&domain->lock, flags);
-
- return ret;
-}
-
-static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid)
-{
- return __flush_pasid(domain, pasid, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS);
-}
-
-int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid)
-{
- struct protection_domain *domain = to_pdomain(dom);
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&domain->lock, flags);
- ret = __amd_iommu_flush_tlb(domain, pasid);
- spin_unlock_irqrestore(&domain->lock, flags);
-
- return ret;
-}
-
-static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
-{
- int index;
- u64 *pte;
-
- while (true) {
-
- index = (pasid >> (9 * level)) & 0x1ff;
- pte = &root[index];
-
- if (level == 0)
- break;
-
- if (!(*pte & GCR3_VALID)) {
- if (!alloc)
- return NULL;
-
- root = (void *)get_zeroed_page(GFP_ATOMIC);
- if (root == NULL)
- return NULL;
-
- *pte = iommu_virt_to_phys(root) | GCR3_VALID;
- }
-
- root = iommu_phys_to_virt(*pte & PAGE_MASK);
-
- level -= 1;
- }
-
- return pte;
-}
-
-static int __set_gcr3(struct protection_domain *domain, u32 pasid,
- unsigned long cr3)
-{
- u64 *pte;
-
- if (domain->iop.mode != PAGE_MODE_NONE)
- return -EINVAL;
-
- pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
- if (pte == NULL)
- return -ENOMEM;
-
- *pte = (cr3 & PAGE_MASK) | GCR3_VALID;
-
- return __amd_iommu_flush_tlb(domain, pasid);
-}
-
-static int __clear_gcr3(struct protection_domain *domain, u32 pasid)
-{
- u64 *pte;
-
- if (domain->iop.mode != PAGE_MODE_NONE)
- return -EINVAL;
-
- pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
- if (pte == NULL)
- return 0;
-
- *pte = 0;
-
- return __amd_iommu_flush_tlb(domain, pasid);
-}
-
-int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
- unsigned long cr3)
-{
- struct protection_domain *domain = to_pdomain(dom);
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&domain->lock, flags);
- ret = __set_gcr3(domain, pasid, cr3);
- spin_unlock_irqrestore(&domain->lock, flags);
-
- return ret;
-}
-
-int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
-{
- struct protection_domain *domain = to_pdomain(dom);
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&domain->lock, flags);
- ret = __clear_gcr3(domain, pasid);
- spin_unlock_irqrestore(&domain->lock, flags);
-
- return ret;
-}
-
-int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
- int status, int tag)
-{
- struct iommu_dev_data *dev_data;
- struct amd_iommu *iommu;
- struct iommu_cmd cmd;
-
- dev_data = dev_iommu_priv_get(&pdev->dev);
- iommu = rlookup_amd_iommu(&pdev->dev);
- if (!iommu)
- return -ENODEV;
-
- build_complete_ppr(&cmd, dev_data->devid, pasid, status,
- tag, dev_data->pri_tlp);
-
- return iommu_queue_command(iommu, &cmd);
-}
-
#ifdef CONFIG_IRQ_REMAP
/*****************************************************************************
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index ef3ee95706da..eb1e62cd499a 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -779,7 +779,8 @@ static void apple_dart_domain_free(struct iommu_domain *domain)
kfree(dart_domain);
}
-static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args)
+static int apple_dart_of_xlate(struct device *dev,
+ const struct of_phandle_args *args)
{
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 05722121f00e..2cd433a9c8a0 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -10,7 +10,6 @@
#include <linux/slab.h>
#include "arm-smmu-v3.h"
-#include "../../iommu-sva.h"
#include "../../io-pgtable-arm.h"
struct arm_smmu_mmu_notifier {
@@ -292,10 +291,8 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
struct mm_struct *mm)
{
int ret;
- unsigned long flags;
struct arm_smmu_ctx_desc *cd;
struct arm_smmu_mmu_notifier *smmu_mn;
- struct arm_smmu_master *master;
list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) {
if (smmu_mn->mn.mm == mm) {
@@ -325,28 +322,9 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
goto err_free_cd;
}
- spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_for_each_entry(master, &smmu_domain->devices, domain_head) {
- ret = arm_smmu_write_ctx_desc(master, mm_get_enqcmd_pasid(mm),
- cd);
- if (ret) {
- list_for_each_entry_from_reverse(
- master, &smmu_domain->devices, domain_head)
- arm_smmu_write_ctx_desc(
- master, mm_get_enqcmd_pasid(mm), NULL);
- break;
- }
- }
- spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
- if (ret)
- goto err_put_notifier;
-
list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers);
return smmu_mn;
-err_put_notifier:
- /* Frees smmu_mn */
- mmu_notifier_put(&smmu_mn->mn);
err_free_cd:
arm_smmu_free_shared_cd(cd);
return ERR_PTR(ret);
@@ -363,9 +341,6 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
list_del(&smmu_mn->list);
- arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm),
- NULL);
-
/*
* If we went through clear(), we've already invalidated, and no
* new TLB entry can have been formed.
@@ -381,13 +356,20 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
arm_smmu_free_shared_cd(cd);
}
-static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
+static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid,
+ struct mm_struct *mm)
{
int ret;
struct arm_smmu_bond *bond;
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_domain *smmu_domain;
+
+ if (!(domain->type & __IOMMU_DOMAIN_PAGING))
+ return -ENODEV;
+ smmu_domain = to_smmu_domain(domain);
+ if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
+ return -ENODEV;
if (!master || !master->sva_enabled)
return -ENODEV;
@@ -404,9 +386,15 @@ static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
goto err_free_bond;
}
+ ret = arm_smmu_write_ctx_desc(master, pasid, bond->smmu_mn->cd);
+ if (ret)
+ goto err_put_notifier;
+
list_add(&bond->list, &master->bonds);
return 0;
+err_put_notifier:
+ arm_smmu_mmu_notifier_put(bond->smmu_mn);
err_free_bond:
kfree(bond);
return ret;
@@ -487,7 +475,6 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
{
- int ret;
struct device *dev = master->dev;
/*
@@ -500,16 +487,7 @@ static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
if (!master->iopf_enabled)
return -EINVAL;
- ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev);
- if (ret)
- return ret;
-
- ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
- if (ret) {
- iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
- return ret;
- }
- return 0;
+ return iopf_queue_add_device(master->smmu->evtq.iopf, dev);
}
static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
@@ -519,7 +497,6 @@ static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
if (!master->iopf_enabled)
return;
- iommu_unregister_device_fault_handler(dev);
iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
}
@@ -568,6 +545,9 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
mutex_lock(&sva_lock);
+
+ arm_smmu_write_ctx_desc(master, id, NULL);
+
list_for_each_entry(t, &master->bonds, list) {
if (t->mm == mm) {
bond = t;
@@ -590,7 +570,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
struct mm_struct *mm = domain->mm;
mutex_lock(&sva_lock);
- ret = __arm_smmu_sva_bind(dev, mm);
+ ret = __arm_smmu_sva_bind(dev, id, mm);
mutex_unlock(&sva_lock);
return ret;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 0ffb1cf17e0b..5ed036225e69 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -29,7 +29,6 @@
#include "arm-smmu-v3.h"
#include "../../dma-iommu.h"
-#include "../../iommu-sva.h"
static bool disable_bypass = true;
module_param(disable_bypass, bool, 0444);
@@ -48,6 +47,9 @@ enum arm_smmu_msi_index {
ARM_SMMU_MAX_MSIS,
};
+static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu,
+ ioasid_t sid);
+
static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
[EVTQ_MSI_INDEX] = {
ARM_SMMU_EVTQ_IRQ_CFG0,
@@ -86,6 +88,9 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
{ 0, NULL},
};
+static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_device *smmu);
+
static void parse_driver_options(struct arm_smmu_device *smmu)
{
int i = 0;
@@ -921,31 +926,29 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
}
-static int arm_smmu_page_response(struct device *dev,
- struct iommu_fault_event *unused,
- struct iommu_page_response *resp)
+static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
+ struct iommu_page_response *resp)
{
struct arm_smmu_cmdq_ent cmd = {0};
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
int sid = master->streams[0].id;
- if (master->stall_enabled) {
- cmd.opcode = CMDQ_OP_RESUME;
- cmd.resume.sid = sid;
- cmd.resume.stag = resp->grpid;
- switch (resp->code) {
- case IOMMU_PAGE_RESP_INVALID:
- case IOMMU_PAGE_RESP_FAILURE:
- cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
- break;
- case IOMMU_PAGE_RESP_SUCCESS:
- cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
- break;
- default:
- return -EINVAL;
- }
- } else {
- return -ENODEV;
+ if (WARN_ON(!master->stall_enabled))
+ return;
+
+ cmd.opcode = CMDQ_OP_RESUME;
+ cmd.resume.sid = sid;
+ cmd.resume.stag = resp->grpid;
+ switch (resp->code) {
+ case IOMMU_PAGE_RESP_INVALID:
+ case IOMMU_PAGE_RESP_FAILURE:
+ cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
+ break;
+ case IOMMU_PAGE_RESP_SUCCESS:
+ cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
+ break;
+ default:
+ break;
}
arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
@@ -955,8 +958,6 @@ static int arm_smmu_page_response(struct device *dev,
* terminated... at some point in the future. PRI_RESP is fire and
* forget.
*/
-
- return 0;
}
/* Context descriptor manipulation functions */
@@ -971,6 +972,199 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
+/*
+ * Based on the value of ent report which bits of the STE the HW will access. It
+ * would be nice if this was complete according to the spec, but minimally it
+ * has to capture the bits this driver uses.
+ */
+static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent,
+ struct arm_smmu_ste *used_bits)
+{
+ unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent->data[0]));
+
+ used_bits->data[0] = cpu_to_le64(STRTAB_STE_0_V);
+ if (!(ent->data[0] & cpu_to_le64(STRTAB_STE_0_V)))
+ return;
+
+ used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
+
+ /* S1 translates */
+ if (cfg & BIT(0)) {
+ used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
+ STRTAB_STE_0_S1CTXPTR_MASK |
+ STRTAB_STE_0_S1CDMAX);
+ used_bits->data[1] |=
+ cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
+ STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
+ STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
+ STRTAB_STE_1_EATS);
+ used_bits->data[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
+ }
+
+ /* S2 translates */
+ if (cfg & BIT(1)) {
+ used_bits->data[1] |=
+ cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
+ used_bits->data[2] |=
+ cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
+ STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
+ STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R);
+ used_bits->data[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
+ }
+
+ if (cfg == STRTAB_STE_0_CFG_BYPASS)
+ used_bits->data[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
+}
+
+/*
+ * Figure out if we can do a hitless update of entry to become target. Returns a
+ * bit mask where 1 indicates that qword needs to be set disruptively.
+ * unused_update is an intermediate value of entry that has unused bits set to
+ * their new values.
+ */
+static u8 arm_smmu_entry_qword_diff(const struct arm_smmu_ste *entry,
+ const struct arm_smmu_ste *target,
+ struct arm_smmu_ste *unused_update)
+{
+ struct arm_smmu_ste target_used = {};
+ struct arm_smmu_ste cur_used = {};
+ u8 used_qword_diff = 0;
+ unsigned int i;
+
+ arm_smmu_get_ste_used(entry, &cur_used);
+ arm_smmu_get_ste_used(target, &target_used);
+
+ for (i = 0; i != ARRAY_SIZE(target_used.data); i++) {
+ /*
+ * Check that masks are up to date, the make functions are not
+ * allowed to set a bit to 1 if the used function doesn't say it
+ * is used.
+ */
+ WARN_ON_ONCE(target->data[i] & ~target_used.data[i]);
+
+ /* Bits can change because they are not currently being used */
+ unused_update->data[i] = (entry->data[i] & cur_used.data[i]) |
+ (target->data[i] & ~cur_used.data[i]);
+ /*
+ * Each bit indicates that a used bit in a qword needs to be
+ * changed after unused_update is applied.
+ */
+ if ((unused_update->data[i] & target_used.data[i]) !=
+ target->data[i])
+ used_qword_diff |= 1 << i;
+ }
+ return used_qword_diff;
+}
+
+static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid,
+ struct arm_smmu_ste *entry,
+ const struct arm_smmu_ste *target, unsigned int start,
+ unsigned int len)
+{
+ bool changed = false;
+ unsigned int i;
+
+ for (i = start; len != 0; len--, i++) {
+ if (entry->data[i] != target->data[i]) {
+ WRITE_ONCE(entry->data[i], target->data[i]);
+ changed = true;
+ }
+ }
+
+ if (changed)
+ arm_smmu_sync_ste_for_sid(smmu, sid);
+ return changed;
+}
+
+/*
+ * Update the STE/CD to the target configuration. The transition from the
+ * current entry to the target entry takes place over multiple steps that
+ * attempts to make the transition hitless if possible. This function takes care
+ * not to create a situation where the HW can perceive a corrupted entry. HW is
+ * only required to have a 64 bit atomicity with stores from the CPU, while
+ * entries are many 64 bit values big.
+ *
+ * The difference between the current value and the target value is analyzed to
+ * determine which of three updates are required - disruptive, hitless or no
+ * change.
+ *
+ * In the most general disruptive case we can make any update in three steps:
+ * - Disrupting the entry (V=0)
+ * - Fill now unused qwords, execpt qword 0 which contains V
+ * - Make qword 0 have the final value and valid (V=1) with a single 64
+ * bit store
+ *
+ * However this disrupts the HW while it is happening. There are several
+ * interesting cases where a STE/CD can be updated without disturbing the HW
+ * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
+ * because the used bits don't intersect. We can detect this by calculating how
+ * many 64 bit values need update after adjusting the unused bits and skip the
+ * V=0 process. This relies on the IGNORED behavior described in the
+ * specification.
+ */
+static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
+ struct arm_smmu_ste *entry,
+ const struct arm_smmu_ste *target)
+{
+ unsigned int num_entry_qwords = ARRAY_SIZE(target->data);
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ste unused_update;
+ u8 used_qword_diff;
+
+ used_qword_diff =
+ arm_smmu_entry_qword_diff(entry, target, &unused_update);
+ if (hweight8(used_qword_diff) == 1) {
+ /*
+ * Only one qword needs its used bits to be changed. This is a
+ * hitless update, update all bits the current STE is ignoring
+ * to their new values, then update a single "critical qword" to
+ * change the STE and finally 0 out any bits that are now unused
+ * in the target configuration.
+ */
+ unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
+
+ /*
+ * Skip writing unused bits in the critical qword since we'll be
+ * writing it in the next step anyways. This can save a sync
+ * when the only change is in that qword.
+ */
+ unused_update.data[critical_qword_index] =
+ entry->data[critical_qword_index];
+ entry_set(smmu, sid, entry, &unused_update, 0, num_entry_qwords);
+ entry_set(smmu, sid, entry, target, critical_qword_index, 1);
+ entry_set(smmu, sid, entry, target, 0, num_entry_qwords);
+ } else if (used_qword_diff) {
+ /*
+ * At least two qwords need their inuse bits to be changed. This
+ * requires a breaking update, zero the V bit, write all qwords
+ * but 0, then set qword 0
+ */
+ unused_update.data[0] = entry->data[0] & (~STRTAB_STE_0_V);
+ entry_set(smmu, sid, entry, &unused_update, 0, 1);
+ entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1);
+ entry_set(smmu, sid, entry, target, 0, 1);
+ } else {
+ /*
+ * No inuse bit changed. Sanity check that all unused bits are 0
+ * in the entry. The target was already sanity checked by
+ * compute_qword_diff().
+ */
+ WARN_ON_ONCE(
+ entry_set(smmu, sid, entry, target, 0, num_entry_qwords));
+ }
+
+ /* It's likely that we'll want to use the new STE soon */
+ if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
+ struct arm_smmu_cmdq_ent
+ prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
+ .prefetch = {
+ .sid = sid,
+ } };
+
+ arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
+ }
+}
+
static void arm_smmu_sync_cd(struct arm_smmu_master *master,
int ssid, bool leaf)
{
@@ -1251,158 +1445,131 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
-static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
- struct arm_smmu_ste *dst)
+static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
{
- /*
- * This is hideously complicated, but we only really care about
- * three cases at the moment:
- *
- * 1. Invalid (all zero) -> bypass/fault (init)
- * 2. Bypass/fault -> translation/bypass (attach)
- * 3. Translation/bypass -> bypass/fault (detach)
- *
- * Given that we can't update the STE atomically and the SMMU
- * doesn't read the thing in a defined order, that leaves us
- * with the following maintenance requirements:
- *
- * 1. Update Config, return (init time STEs aren't live)
- * 2. Write everything apart from dword 0, sync, write dword 0, sync
- * 3. Update Config, sync
- */
- u64 val = le64_to_cpu(dst->data[0]);
- bool ste_live = false;
- struct arm_smmu_device *smmu = master->smmu;
- struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
- struct arm_smmu_s2_cfg *s2_cfg = NULL;
- struct arm_smmu_domain *smmu_domain = master->domain;
- struct arm_smmu_cmdq_ent prefetch_cmd = {
- .opcode = CMDQ_OP_PREFETCH_CFG,
- .prefetch = {
- .sid = sid,
- },
- };
-
- if (smmu_domain) {
- switch (smmu_domain->stage) {
- case ARM_SMMU_DOMAIN_S1:
- cd_table = &master->cd_table;
- break;
- case ARM_SMMU_DOMAIN_S2:
- s2_cfg = &smmu_domain->s2_cfg;
- break;
- default:
- break;
- }
- }
-
- if (val & STRTAB_STE_0_V) {
- switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
- case STRTAB_STE_0_CFG_BYPASS:
- break;
- case STRTAB_STE_0_CFG_S1_TRANS:
- case STRTAB_STE_0_CFG_S2_TRANS:
- ste_live = true;
- break;
- case STRTAB_STE_0_CFG_ABORT:
- BUG_ON(!disable_bypass);
- break;
- default:
- BUG(); /* STE corruption */
- }
- }
+ memset(target, 0, sizeof(*target));
+ target->data[0] = cpu_to_le64(
+ STRTAB_STE_0_V |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
+}
- /* Nuke the existing STE_0 value, as we're going to rewrite it */
- val = STRTAB_STE_0_V;
+static void arm_smmu_make_bypass_ste(struct arm_smmu_ste *target)
+{
+ memset(target, 0, sizeof(*target));
+ target->data[0] = cpu_to_le64(
+ STRTAB_STE_0_V |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
+ target->data[1] = cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
+}
- /* Bypass/fault */
- if (!smmu_domain || !(cd_table || s2_cfg)) {
- if (!smmu_domain && disable_bypass)
- val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
- else
- val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
+static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
+ struct arm_smmu_master *master)
+{
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
+ struct arm_smmu_device *smmu = master->smmu;
- dst->data[0] = cpu_to_le64(val);
- dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
- STRTAB_STE_1_SHCFG_INCOMING));
- dst->data[2] = 0; /* Nuke the VMID */
+ memset(target, 0, sizeof(*target));
+ target->data[0] = cpu_to_le64(
+ STRTAB_STE_0_V |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
+ FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
+ (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+ FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
+
+ target->data[1] = cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
+ FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+ FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+ FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
+ ((smmu->features & ARM_SMMU_FEAT_STALLS &&
+ !master->stall_enabled) ?
+ STRTAB_STE_1_S1STALLD :
+ 0) |
+ FIELD_PREP(STRTAB_STE_1_EATS,
+ master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+
+ if (smmu->features & ARM_SMMU_FEAT_E2H) {
/*
- * The SMMU can perform negative caching, so we must sync
- * the STE regardless of whether the old value was live.
+ * To support BTM the streamworld needs to match the
+ * configuration of the CPU so that the ASID broadcasts are
+ * properly matched. This means either S/NS-EL2-E2H (hypervisor)
+ * or NS-EL1 (guest). Since an SVA domain can be installed in a
+ * PASID this should always use a BTM compatible configuration
+ * if the HW supports it.
*/
- if (smmu)
- arm_smmu_sync_ste_for_sid(smmu, sid);
- return;
- }
-
- if (cd_table) {
- u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
- STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
-
- BUG_ON(ste_live);
- dst->data[1] = cpu_to_le64(
- FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
- FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
- FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
- FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
- FIELD_PREP(STRTAB_STE_1_STRW, strw));
-
- if (smmu->features & ARM_SMMU_FEAT_STALLS &&
- !master->stall_enabled)
- dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
+ target->data[1] |= cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
+ } else {
+ target->data[1] |= cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
- val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
- FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
- FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
- FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
+ /*
+ * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
+ * arm_smmu_domain_alloc_id()
+ */
+ target->data[2] =
+ cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
}
+}
- if (s2_cfg) {
- BUG_ON(ste_live);
- dst->data[2] = cpu_to_le64(
- FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
- FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
+static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
+ struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
+ const struct io_pgtable_cfg *pgtbl_cfg =
+ &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
+ typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
+ &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
+ u64 vtcr_val;
+
+ memset(target, 0, sizeof(*target));
+ target->data[0] = cpu_to_le64(
+ STRTAB_STE_0_V |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
+
+ target->data[1] = cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_EATS,
+ master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0) |
+ FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_INCOMING));
+
+ vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
+ target->data[2] = cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
+ FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
+ STRTAB_STE_2_S2AA64 |
#ifdef __BIG_ENDIAN
- STRTAB_STE_2_S2ENDI |
+ STRTAB_STE_2_S2ENDI |
#endif
- STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
- STRTAB_STE_2_S2R);
+ STRTAB_STE_2_S2PTW |
+ STRTAB_STE_2_S2R);
- dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
-
- val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
- }
-
- if (master->ats_enabled)
- dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
- STRTAB_STE_1_EATS_TRANS));
-
- arm_smmu_sync_ste_for_sid(smmu, sid);
- /* See comment in arm_smmu_write_ctx_desc() */
- WRITE_ONCE(dst->data[0], cpu_to_le64(val));
- arm_smmu_sync_ste_for_sid(smmu, sid);
-
- /* It's likely that we'll want to use the new STE soon */
- if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
- arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
+ target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
+ STRTAB_STE_3_S2TTB_MASK);
}
-static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab,
- unsigned int nent, bool force)
+/*
+ * This can safely directly manipulate the STE memory without a sync sequence
+ * because the STE table has not been installed in the SMMU yet.
+ */
+static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
+ unsigned int nent)
{
unsigned int i;
- u64 val = STRTAB_STE_0_V;
-
- if (disable_bypass && !force)
- val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
- else
- val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
for (i = 0; i < nent; ++i) {
- strtab->data[0] = cpu_to_le64(val);
- strtab->data[1] = cpu_to_le64(FIELD_PREP(
- STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
- strtab->data[2] = 0;
+ if (disable_bypass)
+ arm_smmu_make_abort_ste(strtab);
+ else
+ arm_smmu_make_bypass_ste(strtab);
strtab++;
}
}
@@ -1430,7 +1597,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
return -ENOMEM;
}
- arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
+ arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
arm_smmu_write_strtab_l1_desc(strtab, desc);
return 0;
}
@@ -1460,27 +1627,19 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
/* IRQ and event handlers */
static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
{
- int ret;
- u32 reason;
+ int ret = 0;
u32 perm = 0;
struct arm_smmu_master *master;
bool ssid_valid = evt[0] & EVTQ_0_SSV;
u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
- struct iommu_fault_event fault_evt = { };
+ struct iopf_fault fault_evt = { };
struct iommu_fault *flt = &fault_evt.fault;
switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
case EVT_ID_TRANSLATION_FAULT:
- reason = IOMMU_FAULT_REASON_PTE_FETCH;
- break;
case EVT_ID_ADDR_SIZE_FAULT:
- reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
- break;
case EVT_ID_ACCESS_FAULT:
- reason = IOMMU_FAULT_REASON_ACCESS;
- break;
case EVT_ID_PERMISSION_FAULT:
- reason = IOMMU_FAULT_REASON_PERMISSION;
break;
default:
return -EOPNOTSUPP;
@@ -1490,6 +1649,9 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
if (evt[1] & EVTQ_1_S2)
return -EFAULT;
+ if (!(evt[1] & EVTQ_1_STALL))
+ return -EOPNOTSUPP;
+
if (evt[1] & EVTQ_1_RnW)
perm |= IOMMU_FAULT_PERM_READ;
else
@@ -1501,32 +1663,17 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
if (evt[1] & EVTQ_1_PnU)
perm |= IOMMU_FAULT_PERM_PRIV;
- if (evt[1] & EVTQ_1_STALL) {
- flt->type = IOMMU_FAULT_PAGE_REQ;
- flt->prm = (struct iommu_fault_page_request) {
- .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
- .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
- .perm = perm,
- .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
- };
-
- if (ssid_valid) {
- flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
- }
- } else {
- flt->type = IOMMU_FAULT_DMA_UNRECOV;
- flt->event = (struct iommu_fault_unrecoverable) {
- .reason = reason,
- .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
- .perm = perm,
- .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
- };
+ flt->type = IOMMU_FAULT_PAGE_REQ;
+ flt->prm = (struct iommu_fault_page_request) {
+ .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
+ .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
+ .perm = perm,
+ .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
+ };
- if (ssid_valid) {
- flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
- flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
- }
+ if (ssid_valid) {
+ flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
}
mutex_lock(&smmu->streams_mutex);
@@ -1536,17 +1683,7 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
goto out_unlock;
}
- ret = iommu_report_device_fault(master->dev, &fault_evt);
- if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
- /* Nobody cared, abort the access */
- struct iommu_page_response resp = {
- .pasid = flt->prm.pasid,
- .grpid = flt->prm.grpid,
- .code = IOMMU_PAGE_RESP_FAILURE,
- };
- arm_smmu_page_response(master->dev, &fault_evt, &resp);
- }
-
+ iommu_report_device_fault(master->dev, &fault_evt);
out_unlock:
mutex_unlock(&smmu->streams_mutex);
return ret;
@@ -2025,15 +2162,15 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
{
- struct arm_smmu_domain *smmu_domain;
if (type == IOMMU_DOMAIN_SVA)
return arm_smmu_sva_domain_alloc();
+ return ERR_PTR(-EOPNOTSUPP);
+}
- if (type != IOMMU_DOMAIN_UNMANAGED &&
- type != IOMMU_DOMAIN_DMA &&
- type != IOMMU_DOMAIN_IDENTITY)
- return NULL;
+static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
+{
+ struct arm_smmu_domain *smmu_domain;
/*
* Allocate the domain and initialise some of its data structures.
@@ -2042,13 +2179,23 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
*/
smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
if (!smmu_domain)
- return NULL;
+ return ERR_PTR(-ENOMEM);
mutex_init(&smmu_domain->init_mutex);
INIT_LIST_HEAD(&smmu_domain->devices);
spin_lock_init(&smmu_domain->devices_lock);
INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
+ if (dev) {
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ int ret;
+
+ ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
+ if (ret) {
+ kfree(smmu_domain);
+ return ERR_PTR(ret);
+ }
+ }
return &smmu_domain->domain;
}
@@ -2074,12 +2221,12 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
kfree(smmu_domain);
}
-static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
+static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
+ struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
int ret;
u32 asid;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
@@ -2111,13 +2258,12 @@ out_unlock:
return ret;
}
-static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
+static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
+ struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
int vmid;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
- typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
/* Reserve VMID 0 for stage-2 bypass STEs */
vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
@@ -2125,35 +2271,21 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
if (vmid < 0)
return vmid;
- vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
cfg->vmid = (u16)vmid;
- cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
- cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
- FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
- FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
- FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
- FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
- FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
- FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
return 0;
}
-static int arm_smmu_domain_finalise(struct iommu_domain *domain)
+static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_device *smmu)
{
int ret;
unsigned long ias, oas;
enum io_pgtable_fmt fmt;
struct io_pgtable_cfg pgtbl_cfg;
struct io_pgtable_ops *pgtbl_ops;
- int (*finalise_stage_fn)(struct arm_smmu_domain *,
- struct io_pgtable_cfg *);
- struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- struct arm_smmu_device *smmu = smmu_domain->smmu;
-
- if (domain->type == IOMMU_DOMAIN_IDENTITY) {
- smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
- return 0;
- }
+ int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
+ struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *pgtbl_cfg);
/* Restrict the stage to what we can actually support */
if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
@@ -2192,17 +2324,18 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
if (!pgtbl_ops)
return -ENOMEM;
- domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
- domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
- domain->geometry.force_aperture = true;
+ smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+ smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
+ smmu_domain->domain.geometry.force_aperture = true;
- ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
+ ret = finalise_stage_fn(smmu, smmu_domain, &pgtbl_cfg);
if (ret < 0) {
free_io_pgtable_ops(pgtbl_ops);
return ret;
}
smmu_domain->pgtbl_ops = pgtbl_ops;
+ smmu_domain->smmu = smmu;
return 0;
}
@@ -2225,7 +2358,8 @@ arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
}
}
-static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
+static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
+ const struct arm_smmu_ste *target)
{
int i, j;
struct arm_smmu_device *smmu = master->smmu;
@@ -2242,7 +2376,7 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
if (j < i)
continue;
- arm_smmu_write_strtab_ent(master, sid, step);
+ arm_smmu_write_ste(master, sid, step, target);
}
}
@@ -2261,12 +2395,12 @@ static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
}
-static void arm_smmu_enable_ats(struct arm_smmu_master *master)
+static void arm_smmu_enable_ats(struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain)
{
size_t stu;
struct pci_dev *pdev;
struct arm_smmu_device *smmu = master->smmu;
- struct arm_smmu_domain *smmu_domain = master->domain;
/* Don't enable ATS at the endpoint if it's not enabled in the STE */
if (!master->ats_enabled)
@@ -2282,10 +2416,9 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master)
dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
}
-static void arm_smmu_disable_ats(struct arm_smmu_master *master)
+static void arm_smmu_disable_ats(struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain)
{
- struct arm_smmu_domain *smmu_domain = master->domain;
-
if (!master->ats_enabled)
return;
@@ -2348,35 +2481,28 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
static void arm_smmu_detach_dev(struct arm_smmu_master *master)
{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev);
+ struct arm_smmu_domain *smmu_domain;
unsigned long flags;
- struct arm_smmu_domain *smmu_domain = master->domain;
- if (!smmu_domain)
+ if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING))
return;
- arm_smmu_disable_ats(master);
+ smmu_domain = to_smmu_domain(domain);
+ arm_smmu_disable_ats(master, smmu_domain);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_del(&master->domain_head);
+ list_del_init(&master->domain_head);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
- master->domain = NULL;
master->ats_enabled = false;
- arm_smmu_install_ste_for_dev(master);
- /*
- * Clearing the CD entry isn't strictly required to detach the domain
- * since the table is uninstalled anyway, but it helps avoid confusion
- * in the call to arm_smmu_write_ctx_desc on the next attach (which
- * expects the entry to be empty).
- */
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab)
- arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
}
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret = 0;
unsigned long flags;
+ struct arm_smmu_ste target;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_device *smmu;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -2398,15 +2524,10 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
return -EBUSY;
}
- arm_smmu_detach_dev(master);
-
mutex_lock(&smmu_domain->init_mutex);
if (!smmu_domain->smmu) {
- smmu_domain->smmu = smmu;
- ret = arm_smmu_domain_finalise(domain);
- if (ret)
- smmu_domain->smmu = NULL;
+ ret = arm_smmu_domain_finalise(smmu_domain, smmu);
} else if (smmu_domain->smmu != smmu)
ret = -EINVAL;
@@ -2414,57 +2535,140 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
if (ret)
return ret;
- master->domain = smmu_domain;
-
/*
- * The SMMU does not support enabling ATS with bypass. When the STE is
- * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
- * Translated transactions are denied as though ATS is disabled for the
- * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
- * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
+ * Prevent arm_smmu_share_asid() from trying to change the ASID
+ * of either the old or new domain while we are working on it.
+ * This allows the STE and the smmu_domain->devices list to
+ * be inconsistent during this routine.
*/
- if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
- master->ats_enabled = arm_smmu_ats_supported(master);
+ mutex_lock(&arm_smmu_asid_lock);
+
+ arm_smmu_detach_dev(master);
+
+ master->ats_enabled = arm_smmu_ats_supported(master);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_add(&master->domain_head, &smmu_domain->devices);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ switch (smmu_domain->stage) {
+ case ARM_SMMU_DOMAIN_S1:
if (!master->cd_table.cdtab) {
ret = arm_smmu_alloc_cd_tables(master);
- if (ret) {
- master->domain = NULL;
+ if (ret)
+ goto out_list_del;
+ } else {
+ /*
+ * arm_smmu_write_ctx_desc() relies on the entry being
+ * invalid to work, clear any existing entry.
+ */
+ ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
+ NULL);
+ if (ret)
goto out_list_del;
- }
}
- /*
- * Prevent SVA from concurrently modifying the CD or writing to
- * the CD entry
- */
- mutex_lock(&arm_smmu_asid_lock);
ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
- mutex_unlock(&arm_smmu_asid_lock);
- if (ret) {
- master->domain = NULL;
+ if (ret)
goto out_list_del;
- }
- }
- arm_smmu_install_ste_for_dev(master);
+ arm_smmu_make_cdtable_ste(&target, master);
+ arm_smmu_install_ste_for_dev(master, &target);
+ break;
+ case ARM_SMMU_DOMAIN_S2:
+ arm_smmu_make_s2_domain_ste(&target, master, smmu_domain);
+ arm_smmu_install_ste_for_dev(master, &target);
+ if (master->cd_table.cdtab)
+ arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
+ NULL);
+ break;
+ }
- arm_smmu_enable_ats(master);
- return 0;
+ arm_smmu_enable_ats(master, smmu_domain);
+ goto out_unlock;
out_list_del:
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_del(&master->domain_head);
+ list_del_init(&master->domain_head);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+out_unlock:
+ mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
+static int arm_smmu_attach_dev_ste(struct device *dev,
+ struct arm_smmu_ste *ste)
+{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+ if (arm_smmu_master_sva_enabled(master))
+ return -EBUSY;
+
+ /*
+ * Do not allow any ASID to be changed while are working on the STE,
+ * otherwise we could miss invalidations.
+ */
+ mutex_lock(&arm_smmu_asid_lock);
+
+ /*
+ * The SMMU does not support enabling ATS with bypass/abort. When the
+ * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests
+ * and Translated transactions are denied as though ATS is disabled for
+ * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
+ * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
+ */
+ arm_smmu_detach_dev(master);
+
+ arm_smmu_install_ste_for_dev(master, ste);
+ mutex_unlock(&arm_smmu_asid_lock);
+
+ /*
+ * This has to be done after removing the master from the
+ * arm_smmu_domain->devices to avoid races updating the same context
+ * descriptor from arm_smmu_share_asid().
+ */
+ if (master->cd_table.cdtab)
+ arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
+ return 0;
+}
+
+static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct arm_smmu_ste ste;
+
+ arm_smmu_make_bypass_ste(&ste);
+ return arm_smmu_attach_dev_ste(dev, &ste);
+}
+
+static const struct iommu_domain_ops arm_smmu_identity_ops = {
+ .attach_dev = arm_smmu_attach_dev_identity,
+};
+
+static struct iommu_domain arm_smmu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &arm_smmu_identity_ops,
+};
+
+static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct arm_smmu_ste ste;
+
+ arm_smmu_make_abort_ste(&ste);
+ return arm_smmu_attach_dev_ste(dev, &ste);
+}
+
+static const struct iommu_domain_ops arm_smmu_blocked_ops = {
+ .attach_dev = arm_smmu_attach_dev_blocked,
+};
+
+static struct iommu_domain arm_smmu_blocked_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &arm_smmu_blocked_ops,
+};
+
static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -2658,6 +2862,7 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
master->dev = dev;
master->smmu = smmu;
INIT_LIST_HEAD(&master->bonds);
+ INIT_LIST_HEAD(&master->domain_head);
dev_iommu_priv_set(dev, master);
ret = arm_smmu_insert_master(smmu, master);
@@ -2699,7 +2904,13 @@ static void arm_smmu_release_device(struct device *dev)
if (WARN_ON(arm_smmu_master_sva_enabled(master)))
iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
- arm_smmu_detach_dev(master);
+
+ /* Put the STE back to what arm_smmu_init_strtab() sets */
+ if (disable_bypass && !dev->iommu->require_direct)
+ arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
+ else
+ arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
+
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
if (master->cd_table.cdtab)
@@ -2739,7 +2950,8 @@ static int arm_smmu_enable_nesting(struct iommu_domain *domain)
return ret;
}
-static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
+static int arm_smmu_of_xlate(struct device *dev,
+ const struct of_phandle_args *args)
{
return iommu_fwspec_add_ids(dev, args->args, 1);
}
@@ -2844,8 +3056,11 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
}
static struct iommu_ops arm_smmu_ops = {
+ .identity_domain = &arm_smmu_identity_domain,
+ .blocked_domain = &arm_smmu_blocked_domain,
.capable = arm_smmu_capable,
.domain_alloc = arm_smmu_domain_alloc,
+ .domain_alloc_paging = arm_smmu_domain_alloc_paging,
.probe_device = arm_smmu_probe_device,
.release_device = arm_smmu_release_device,
.device_group = arm_smmu_device_group,
@@ -3049,7 +3264,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
cfg->strtab_base_cfg = reg;
- arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
+ arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents);
return 0;
}
@@ -3125,7 +3340,8 @@ static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
static void arm_smmu_free_msis(void *data)
{
struct device *dev = data;
- platform_msi_domain_free_irqs(dev);
+
+ platform_device_msi_free_irqs_all(dev);
}
static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
@@ -3166,7 +3382,7 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
}
/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
- ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
+ ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
if (ret) {
dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
return;
@@ -3760,7 +3976,6 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
list_for_each_entry(e, &rmr_list, list) {
- struct arm_smmu_ste *step;
struct iommu_iort_rmr_data *rmr;
int ret, i;
@@ -3773,8 +3988,12 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
continue;
}
- step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
- arm_smmu_init_bypass_stes(step, 1, true);
+ /*
+ * STE table is not programmed to HW, see
+ * arm_smmu_initial_bypass_stes()
+ */
+ arm_smmu_make_bypass_ste(
+ arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
}
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 65fb388d5173..23baf117e7e4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -609,8 +609,6 @@ struct arm_smmu_ctx_desc_cfg {
struct arm_smmu_s2_cfg {
u16 vmid;
- u64 vttbr;
- u64 vtcr;
};
struct arm_smmu_strtab_cfg {
@@ -697,7 +695,6 @@ struct arm_smmu_stream {
struct arm_smmu_master {
struct arm_smmu_device *smmu;
struct device *dev;
- struct arm_smmu_domain *domain;
struct list_head domain_head;
struct arm_smmu_stream *streams;
/* Locked by the iommu core using the group mutex */
@@ -715,7 +712,6 @@ struct arm_smmu_master {
enum arm_smmu_domain_stage {
ARM_SMMU_DOMAIN_S1 = 0,
ARM_SMMU_DOMAIN_S2,
- ARM_SMMU_DOMAIN_BYPASS,
};
struct arm_smmu_domain {
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 8b04ece00420..5c7cfc51b57c 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -260,6 +260,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,sm6375-mdss" },
{ .compatible = "qcom,sm8150-mdss" },
{ .compatible = "qcom,sm8250-mdss" },
+ { .compatible = "qcom,x1e80100-mdss" },
{ }
};
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 68b6bc5e7c71..c572d877b0e1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -859,10 +859,14 @@ static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain)
arm_smmu_rpm_put(smmu);
}
-static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
+static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
{
struct arm_smmu_domain *smmu_domain;
+ if (type != IOMMU_DOMAIN_UNMANAGED) {
+ if (using_legacy_binding || type != IOMMU_DOMAIN_DMA)
+ return NULL;
+ }
/*
* Allocate the domain and initialise some of its data structures.
* We can't really do anything meaningful until we've added a
@@ -875,15 +879,6 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
mutex_init(&smmu_domain->init_mutex);
spin_lock_init(&smmu_domain->cb_lock);
- if (dev) {
- struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
-
- if (arm_smmu_init_domain_context(smmu_domain, cfg->smmu, dev)) {
- kfree(smmu_domain);
- return NULL;
- }
- }
-
return &smmu_domain->domain;
}
@@ -1551,7 +1546,8 @@ static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
return ret;
}
-static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
+static int arm_smmu_of_xlate(struct device *dev,
+ const struct of_phandle_args *args)
{
u32 mask, fwid = 0;
@@ -1600,7 +1596,7 @@ static struct iommu_ops arm_smmu_ops = {
.identity_domain = &arm_smmu_identity_domain,
.blocked_domain = &arm_smmu_blocked_domain,
.capable = arm_smmu_capable,
- .domain_alloc_paging = arm_smmu_domain_alloc_paging,
+ .domain_alloc = arm_smmu_domain_alloc,
.probe_device = arm_smmu_probe_device,
.release_device = arm_smmu_release_device,
.probe_finalize = arm_smmu_probe_finalize,
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 17a1c163fef6..e079bb7a993e 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -546,7 +546,8 @@ static struct iommu_device *qcom_iommu_probe_device(struct device *dev)
return &qcom_iommu->iommu;
}
-static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
+static int qcom_iommu_of_xlate(struct device *dev,
+ const struct of_phandle_args *args)
{
struct qcom_iommu_dev *qcom_iommu;
struct platform_device *iommu_pdev;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 50ccc4f1ef81..b58f5a3311c3 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -859,6 +859,11 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
iommu_deferred_attach(dev, domain))
return DMA_MAPPING_ERROR;
+ /* If anyone ever wants this we'd need support in the IOVA allocator */
+ if (dev_WARN_ONCE(dev, dma_get_min_align_mask(dev) > iova_mask(iovad),
+ "Unsupported alignment constraint\n"))
+ return DMA_MAPPING_ERROR;
+
size = iova_align(iovad, size + iova_off);
iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 2c6e9094f1e9..d98c9161948a 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1431,7 +1431,7 @@ static void exynos_iommu_release_device(struct device *dev)
}
static int exynos_iommu_of_xlate(struct device *dev,
- struct of_phandle_args *spec)
+ const struct of_phandle_args *spec)
{
struct platform_device *sysmmu = of_find_device_by_node(spec->np);
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 012cd2541a68..6cf9f48e7d8c 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -51,6 +51,7 @@ config INTEL_IOMMU_SVM
depends on X86_64
select MMU_NOTIFIER
select IOMMU_SVA
+ select IOMMU_IOPF
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by
@@ -64,17 +65,6 @@ config INTEL_IOMMU_DEFAULT_ON
one is found. If this option is not selected, DMAR support can
be enabled by passing intel_iommu=on to the kernel.
-config INTEL_IOMMU_BROKEN_GFX_WA
- bool "Workaround broken graphics drivers (going away soon)"
- depends on BROKEN && X86
- help
- Current Graphics drivers tend to use physical address
- for DMA and avoid using DMA APIs. Setting this config
- option permits the IOMMU driver to set a unity map for
- all the OS-visible memory. Hence the driver can continue
- to use physical addresses for DMA, at least until this
- option is removed in the 2.6.32 kernel.
-
config INTEL_IOMMU_FLOPPY_WA
def_bool y
depends on X86
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index 5dabf081a779..5402b699a122 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -5,5 +5,7 @@ obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
+ifdef CONFIG_INTEL_IOMMU
obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
+endif
obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 23cb80d62a9a..36d7427b1202 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1095,7 +1095,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->agaw = agaw;
iommu->msagaw = msagaw;
iommu->segment = drhd->segment;
-
+ iommu->device_rbtree = RB_ROOT;
+ spin_lock_init(&iommu->device_rbtree_lock);
+ mutex_init(&iommu->iopf_lock);
iommu->node = NUMA_NO_NODE;
ver = readl(iommu->reg + DMAR_VER_REG);
@@ -1271,6 +1273,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
{
u32 fault;
int head, tail;
+ struct device *dev;
+ u64 iqe_err, ite_sid;
struct q_inval *qi = iommu->qi;
int shift = qi_shift(iommu);
@@ -1315,6 +1319,13 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
tail = readl(iommu->reg + DMAR_IQT_REG);
tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
+ /*
+ * SID field is valid only when the ITE field is Set in FSTS_REG
+ * see Intel VT-d spec r4.1, section 11.4.9.9
+ */
+ iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
+ ite_sid = DMAR_IQER_REG_ITESID(iqe_err);
+
writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
pr_info("Invalidation Time-out Error (ITE) cleared\n");
@@ -1324,6 +1335,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
head = (head - 2 + QI_LENGTH) % QI_LENGTH;
} while (head != tail);
+ /*
+ * If device was released or isn't present, no need to retry
+ * the ATS invalidate request anymore.
+ *
+ * 0 value of ite_sid means old VT-d device, no ite_sid value.
+ * see Intel VT-d spec r4.1, section 11.4.9.9
+ */
+ if (ite_sid) {
+ dev = device_rbtree_find(iommu, ite_sid);
+ if (!dev || !dev_is_pci(dev) ||
+ !pci_device_is_present(to_pci_dev(dev)))
+ return -ETIMEDOUT;
+ }
if (qi->desc_status[wait_index] == QI_ABORT)
return -EAGAIN;
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 6fb5f6fceea1..50eb9aed47cc 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -27,7 +27,6 @@
#include "iommu.h"
#include "../dma-iommu.h"
#include "../irq_remapping.h"
-#include "../iommu-sva.h"
#include "pasid.h"
#include "cap_audit.h"
#include "perfmon.h"
@@ -97,6 +96,81 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
return re->hi & VTD_PAGE_MASK;
}
+static int device_rid_cmp_key(const void *key, const struct rb_node *node)
+{
+ struct device_domain_info *info =
+ rb_entry(node, struct device_domain_info, node);
+ const u16 *rid_lhs = key;
+
+ if (*rid_lhs < PCI_DEVID(info->bus, info->devfn))
+ return -1;
+
+ if (*rid_lhs > PCI_DEVID(info->bus, info->devfn))
+ return 1;
+
+ return 0;
+}
+
+static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs)
+{
+ struct device_domain_info *info =
+ rb_entry(lhs, struct device_domain_info, node);
+ u16 key = PCI_DEVID(info->bus, info->devfn);
+
+ return device_rid_cmp_key(&key, rhs);
+}
+
+/*
+ * Looks up an IOMMU-probed device using its source ID.
+ *
+ * Returns the pointer to the device if there is a match. Otherwise,
+ * returns NULL.
+ *
+ * Note that this helper doesn't guarantee that the device won't be
+ * released by the iommu subsystem after being returned. The caller
+ * should use its own synchronization mechanism to avoid the device
+ * being released during its use if its possibly the case.
+ */
+struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid)
+{
+ struct device_domain_info *info = NULL;
+ struct rb_node *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
+ node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key);
+ if (node)
+ info = rb_entry(node, struct device_domain_info, node);
+ spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
+
+ return info ? info->dev : NULL;
+}
+
+static int device_rbtree_insert(struct intel_iommu *iommu,
+ struct device_domain_info *info)
+{
+ struct rb_node *curr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
+ curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
+ spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
+ if (WARN_ON(curr))
+ return -EEXIST;
+
+ return 0;
+}
+
+static void device_rbtree_remove(struct device_domain_info *info)
+{
+ struct intel_iommu *iommu = info->iommu;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
+ rb_erase(&info->node, &iommu->device_rbtree);
+ spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
+}
+
/*
* This domain is a statically identity mapping domain.
* 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -396,8 +470,6 @@ static int domain_update_device_node(struct dmar_domain *domain)
return nid;
}
-static void domain_update_iotlb(struct dmar_domain *domain);
-
/* Return the super pagesize bitmap if supported. */
static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
{
@@ -1218,7 +1290,7 @@ domain_lookup_dev_info(struct dmar_domain *domain,
return NULL;
}
-static void domain_update_iotlb(struct dmar_domain *domain)
+void domain_update_iotlb(struct dmar_domain *domain)
{
struct dev_pasid_info *dev_pasid;
struct device_domain_info *info;
@@ -1368,6 +1440,46 @@ static void domain_flush_pasid_iotlb(struct intel_iommu *iommu,
spin_unlock_irqrestore(&domain->lock, flags);
}
+static void __iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
+ unsigned long pfn, unsigned int pages,
+ int ih)
+{
+ unsigned int aligned_pages = __roundup_pow_of_two(pages);
+ unsigned long bitmask = aligned_pages - 1;
+ unsigned int mask = ilog2(aligned_pages);
+ u64 addr = (u64)pfn << VTD_PAGE_SHIFT;
+
+ /*
+ * PSI masks the low order bits of the base address. If the
+ * address isn't aligned to the mask, then compute a mask value
+ * needed to ensure the target range is flushed.
+ */
+ if (unlikely(bitmask & pfn)) {
+ unsigned long end_pfn = pfn + pages - 1, shared_bits;
+
+ /*
+ * Since end_pfn <= pfn + bitmask, the only way bits
+ * higher than bitmask can differ in pfn and end_pfn is
+ * by carrying. This means after masking out bitmask,
+ * high bits starting with the first set bit in
+ * shared_bits are all equal in both pfn and end_pfn.
+ */
+ shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
+ mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
+ }
+
+ /*
+ * Fallback to domain selective flush if no PSI support or
+ * the size is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, did, 0, 0,
+ DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
+ DMA_TLB_PSI_FLUSH);
+}
+
static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
struct dmar_domain *domain,
unsigned long pfn, unsigned int pages,
@@ -1384,42 +1496,10 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
if (ih)
ih = 1 << 6;
- if (domain->use_first_level) {
+ if (domain->use_first_level)
domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih);
- } else {
- unsigned long bitmask = aligned_pages - 1;
-
- /*
- * PSI masks the low order bits of the base address. If the
- * address isn't aligned to the mask, then compute a mask value
- * needed to ensure the target range is flushed.
- */
- if (unlikely(bitmask & pfn)) {
- unsigned long end_pfn = pfn + pages - 1, shared_bits;
-
- /*
- * Since end_pfn <= pfn + bitmask, the only way bits
- * higher than bitmask can differ in pfn and end_pfn is
- * by carrying. This means after masking out bitmask,
- * high bits starting with the first set bit in
- * shared_bits are all equal in both pfn and end_pfn.
- */
- shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
- mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
- }
-
- /*
- * Fallback to domain selective flush if no PSI support or
- * the size is too big.
- */
- if (!cap_pgsel_inv(iommu->cap) ||
- mask > cap_max_amask_val(iommu->cap))
- iommu->flush.flush_iotlb(iommu, did, 0, 0,
- DMA_TLB_DSI_FLUSH);
- else
- iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
- DMA_TLB_PSI_FLUSH);
- }
+ else
+ __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih);
/*
* In caching mode, changes of pages from non-present to present require
@@ -1443,6 +1523,46 @@ static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain *
iommu_flush_write_buffer(iommu);
}
+/*
+ * Flush the relevant caches in nested translation if the domain
+ * also serves as a parent
+ */
+static void parent_domain_flush(struct dmar_domain *domain,
+ unsigned long pfn,
+ unsigned long pages, int ih)
+{
+ struct dmar_domain *s1_domain;
+
+ spin_lock(&domain->s1_lock);
+ list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
+ struct device_domain_info *device_info;
+ struct iommu_domain_info *info;
+ unsigned long flags;
+ unsigned long i;
+
+ xa_for_each(&s1_domain->iommu_array, i, info)
+ __iommu_flush_iotlb_psi(info->iommu, info->did,
+ pfn, pages, ih);
+
+ if (!s1_domain->has_iotlb_device)
+ continue;
+
+ spin_lock_irqsave(&s1_domain->lock, flags);
+ list_for_each_entry(device_info, &s1_domain->devices, link)
+ /*
+ * Address translation cache in device side caches the
+ * result of nested translation. There is no easy way
+ * to identify the exact set of nested translations
+ * affected by a change in S2. So just flush the entire
+ * device cache.
+ */
+ __iommu_flush_dev_iotlb(device_info, 0,
+ MAX_AGAW_PFN_WIDTH);
+ spin_unlock_irqrestore(&s1_domain->lock, flags);
+ }
+ spin_unlock(&domain->s1_lock);
+}
+
static void intel_flush_iotlb_all(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
@@ -1462,6 +1582,9 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain)
if (!cap_caching_mode(iommu->cap))
iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH);
}
+
+ if (dmar_domain->nested_parent)
+ parent_domain_flush(dmar_domain, 0, -1, 0);
}
static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1727,34 +1850,17 @@ static void domain_exit(struct dmar_domain *domain)
kfree(domain);
}
-/*
- * Get the PASID directory size for scalable mode context entry.
- * Value of X in the PDTS field of a scalable mode context entry
- * indicates PASID directory with 2^(X + 7) entries.
- */
-static unsigned long context_get_sm_pds(struct pasid_table *table)
-{
- unsigned long pds, max_pde;
-
- max_pde = table->max_pasid >> PASID_PDE_SHIFT;
- pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
- if (pds < 7)
- return 0;
-
- return pds - 7;
-}
-
static int domain_context_mapping_one(struct dmar_domain *domain,
struct intel_iommu *iommu,
- struct pasid_table *table,
u8 bus, u8 devfn)
{
struct device_domain_info *info =
domain_lookup_dev_info(domain, iommu, bus, devfn);
u16 did = domain_id_iommu(domain, iommu);
int translation = CONTEXT_TT_MULTI_LEVEL;
+ struct dma_pte *pgd = domain->pgd;
struct context_entry *context;
- int ret;
+ int agaw, ret;
if (hw_pass_through && domain_type_is_si(domain))
translation = CONTEXT_TT_PASS_THROUGH;
@@ -1797,65 +1903,37 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
}
context_clear_entry(context);
+ context_set_domain_id(context, did);
- if (sm_supported(iommu)) {
- unsigned long pds;
-
- /* Setup the PASID DIR pointer: */
- pds = context_get_sm_pds(table);
- context->lo = (u64)virt_to_phys(table->table) |
- context_pdts(pds);
-
- /* Setup the RID_PASID field: */
- context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
-
+ if (translation != CONTEXT_TT_PASS_THROUGH) {
/*
- * Setup the Device-TLB enable bit and Page request
- * Enable bit:
+ * Skip top levels of page tables for iommu which has
+ * less agaw than default. Unnecessary for PT mode.
*/
- if (info && info->ats_supported)
- context_set_sm_dte(context);
- if (info && info->pri_supported)
- context_set_sm_pre(context);
- if (info && info->pasid_supported)
- context_set_pasid(context);
- } else {
- struct dma_pte *pgd = domain->pgd;
- int agaw;
-
- context_set_domain_id(context, did);
-
- if (translation != CONTEXT_TT_PASS_THROUGH) {
- /*
- * Skip top levels of page tables for iommu which has
- * less agaw than default. Unnecessary for PT mode.
- */
- for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
- ret = -ENOMEM;
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd))
- goto out_unlock;
- }
-
- if (info && info->ats_supported)
- translation = CONTEXT_TT_DEV_IOTLB;
- else
- translation = CONTEXT_TT_MULTI_LEVEL;
-
- context_set_address_root(context, virt_to_phys(pgd));
- context_set_address_width(context, agaw);
- } else {
- /*
- * In pass through mode, AW must be programmed to
- * indicate the largest AGAW value supported by
- * hardware. And ASR is ignored by hardware.
- */
- context_set_address_width(context, iommu->msagaw);
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ ret = -ENOMEM;
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd))
+ goto out_unlock;
}
- context_set_translation_type(context, translation);
+ if (info && info->ats_supported)
+ translation = CONTEXT_TT_DEV_IOTLB;
+ else
+ translation = CONTEXT_TT_MULTI_LEVEL;
+
+ context_set_address_root(context, virt_to_phys(pgd));
+ context_set_address_width(context, agaw);
+ } else {
+ /*
+ * In pass through mode, AW must be programmed to
+ * indicate the largest AGAW value supported by
+ * hardware. And ASR is ignored by hardware.
+ */
+ context_set_address_width(context, iommu->msagaw);
}
+ context_set_translation_type(context, translation);
context_set_fault_enable(context);
context_set_present(context);
if (!ecap_coherent(iommu->ecap))
@@ -1885,43 +1963,29 @@ out_unlock:
return ret;
}
-struct domain_context_mapping_data {
- struct dmar_domain *domain;
- struct intel_iommu *iommu;
- struct pasid_table *table;
-};
-
static int domain_context_mapping_cb(struct pci_dev *pdev,
u16 alias, void *opaque)
{
- struct domain_context_mapping_data *data = opaque;
+ struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct dmar_domain *domain = opaque;
- return domain_context_mapping_one(data->domain, data->iommu,
- data->table, PCI_BUS_NUM(alias),
- alias & 0xff);
+ return domain_context_mapping_one(domain, iommu,
+ PCI_BUS_NUM(alias), alias & 0xff);
}
static int
domain_context_mapping(struct dmar_domain *domain, struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct domain_context_mapping_data data;
struct intel_iommu *iommu = info->iommu;
u8 bus = info->bus, devfn = info->devfn;
- struct pasid_table *table;
-
- table = intel_pasid_get_table(dev);
if (!dev_is_pci(dev))
- return domain_context_mapping_one(domain, iommu, table,
- bus, devfn);
-
- data.domain = domain;
- data.iommu = iommu;
- data.table = table;
+ return domain_context_mapping_one(domain, iommu, bus, devfn);
return pci_for_each_dma_alias(to_pci_dev(dev),
- &domain_context_mapping_cb, &data);
+ domain_context_mapping_cb, domain);
}
/* Returns a number of VTD pages, but aligned to MM page size */
@@ -1985,6 +2049,9 @@ static void switch_to_super_page(struct dmar_domain *domain,
iommu_flush_iotlb_psi(info->iommu, domain,
start_pfn, lvl_pages,
0, 0);
+ if (domain->nested_parent)
+ parent_domain_flush(domain, start_pfn,
+ lvl_pages, 0);
}
pte++;
@@ -2108,9 +2175,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
struct context_entry *context;
u16 did_old;
- if (!iommu)
- return;
-
spin_lock(&iommu->lock);
context = iommu_context_addr(iommu, bus, devfn, 0);
if (!context) {
@@ -2118,14 +2182,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
return;
}
- if (sm_supported(iommu)) {
- if (hw_pass_through && domain_type_is_si(info->domain))
- did_old = FLPT_DEFAULT_DID;
- else
- did_old = domain_id_iommu(info->domain, iommu);
- } else {
- did_old = context_domain_id(context);
- }
+ did_old = context_domain_id(context);
context_clear_entry(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
@@ -2136,9 +2193,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
- if (sm_supported(iommu))
- qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
-
iommu->flush.flush_iotlb(iommu,
did_old,
0,
@@ -2278,28 +2332,19 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
list_add(&info->link, &domain->devices);
spin_unlock_irqrestore(&domain->lock, flags);
- /* PASID table is mandatory for a PCI device in scalable mode. */
- if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
- /* Setup the PASID entry for requests without PASID: */
- if (hw_pass_through && domain_type_is_si(domain))
- ret = intel_pasid_setup_pass_through(iommu,
- dev, IOMMU_NO_PASID);
- else if (domain->use_first_level)
- ret = domain_setup_first_level(iommu, domain, dev,
- IOMMU_NO_PASID);
- else
- ret = intel_pasid_setup_second_level(iommu, domain,
- dev, IOMMU_NO_PASID);
- if (ret) {
- dev_err(dev, "Setup RID2PASID failed\n");
- device_block_translation(dev);
- return ret;
- }
- }
+ if (dev_is_real_dma_subdevice(dev))
+ return 0;
+
+ if (!sm_supported(iommu))
+ ret = domain_context_mapping(domain, dev);
+ else if (hw_pass_through && domain_type_is_si(domain))
+ ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
+ else if (domain->use_first_level)
+ ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID);
+ else
+ ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID);
- ret = domain_context_mapping(domain, dev);
if (ret) {
- dev_err(dev, "Domain context map failed\n");
device_block_translation(dev);
return ret;
}
@@ -2660,10 +2705,6 @@ static int __init init_dmars(void)
iommu_set_root_entry(iommu);
}
-#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
- dmar_map_gfx = 0;
-#endif
-
if (!dmar_map_gfx)
iommu_identity_mapping |= IDENTMAP_GFX;
@@ -3747,30 +3788,6 @@ static void domain_context_clear(struct device_domain_info *info)
&domain_context_clear_one_cb, info);
}
-static void dmar_remove_one_dev_info(struct device *dev)
-{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct dmar_domain *domain = info->domain;
- struct intel_iommu *iommu = info->iommu;
- unsigned long flags;
-
- if (!dev_is_real_dma_subdevice(info->dev)) {
- if (dev_is_pci(info->dev) && sm_supported(iommu))
- intel_pasid_tear_down_entry(iommu, info->dev,
- IOMMU_NO_PASID, false);
-
- iommu_disable_pci_caps(info);
- domain_context_clear(info);
- }
-
- spin_lock_irqsave(&domain->lock, flags);
- list_del(&info->link);
- spin_unlock_irqrestore(&domain->lock, flags);
-
- domain_detach_iommu(domain, iommu);
- info->domain = NULL;
-}
-
/*
* Clear the page table pointer in context or pasid table entries so that
* all DMA requests without PASID from the device are blocked. If the page
@@ -3883,6 +3900,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
struct intel_iommu *iommu = info->iommu;
+ struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
/* Must be NESTING domain */
@@ -3908,11 +3926,16 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
if (!domain)
return ERR_PTR(-ENOMEM);
- if (nested_parent)
- to_dmar_domain(domain)->nested_parent = true;
+ dmar_domain = to_dmar_domain(domain);
+
+ if (nested_parent) {
+ dmar_domain->nested_parent = true;
+ INIT_LIST_HEAD(&dmar_domain->s1_domains);
+ spin_lock_init(&dmar_domain->s1_lock);
+ }
if (dirty_tracking) {
- if (to_dmar_domain(domain)->use_first_level) {
+ if (dmar_domain->use_first_level) {
iommu_domain_free(domain);
return ERR_PTR(-EOPNOTSUPP);
}
@@ -3924,8 +3947,12 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
static void intel_iommu_domain_free(struct iommu_domain *domain)
{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+
+ WARN_ON(dmar_domain->nested_parent &&
+ !list_empty(&dmar_domain->s1_domains));
if (domain != &si_domain->domain)
- domain_exit(to_dmar_domain(domain));
+ domain_exit(dmar_domain);
}
int prepare_domain_attach_device(struct iommu_domain *domain,
@@ -3965,6 +3992,10 @@ int prepare_domain_attach_device(struct iommu_domain *domain,
dmar_domain->agaw--;
}
+ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
+ context_copied(iommu, info->bus, info->devfn))
+ return intel_pasid_setup_sm_context(dev);
+
return 0;
}
@@ -4107,6 +4138,9 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
start_pfn, nrpages,
list_empty(&gather->freelist), 0);
+ if (dmar_domain->nested_parent)
+ parent_domain_flush(dmar_domain, start_pfn, nrpages,
+ list_empty(&gather->freelist));
put_pages_list(&gather->freelist);
}
@@ -4265,26 +4299,50 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
dev_iommu_priv_set(dev, info);
+ ret = device_rbtree_insert(iommu, info);
+ if (ret)
+ goto free;
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
ret = intel_pasid_alloc_table(dev);
if (ret) {
dev_err(dev, "PASID table allocation failed\n");
- kfree(info);
- return ERR_PTR(ret);
+ goto clear_rbtree;
+ }
+
+ if (!context_copied(iommu, info->bus, info->devfn)) {
+ ret = intel_pasid_setup_sm_context(dev);
+ if (ret)
+ goto free_table;
}
}
intel_iommu_debugfs_create_dev(info);
return &iommu->iommu;
+free_table:
+ intel_pasid_free_table(dev);
+clear_rbtree:
+ device_rbtree_remove(info);
+free:
+ kfree(info);
+
+ return ERR_PTR(ret);
}
static void intel_iommu_release_device(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ mutex_lock(&iommu->iopf_lock);
+ device_rbtree_remove(info);
+ mutex_unlock(&iommu->iopf_lock);
+
+ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
+ !context_copied(iommu, info->bus, info->devfn))
+ intel_pasid_teardown_sm_context(dev);
- dmar_remove_one_dev_info(dev);
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
kfree(info);
@@ -4427,23 +4485,15 @@ static int intel_iommu_enable_iopf(struct device *dev)
if (ret)
return ret;
- ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
- if (ret)
- goto iopf_remove_device;
-
ret = pci_enable_pri(pdev, PRQ_DEPTH);
- if (ret)
- goto iopf_unregister_handler;
+ if (ret) {
+ iopf_queue_remove_device(iommu->iopf_queue, dev);
+ return ret;
+ }
+
info->pri_enabled = 1;
return 0;
-
-iopf_unregister_handler:
- iommu_unregister_device_fault_handler(dev);
-iopf_remove_device:
- iopf_queue_remove_device(iommu->iopf_queue, dev);
-
- return ret;
}
static int intel_iommu_disable_iopf(struct device *dev)
@@ -4464,14 +4514,7 @@ static int intel_iommu_disable_iopf(struct device *dev)
*/
pci_disable_pri(to_pci_dev(dev));
info->pri_enabled = 0;
-
- /*
- * With PRI disabled and outstanding PRQs drained, unregistering
- * fault handler and removing device from iopf queue should never
- * fail.
- */
- WARN_ON(iommu_unregister_device_fault_handler(dev));
- WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev));
+ iopf_queue_remove_device(iommu->iopf_queue, dev);
return 0;
}
@@ -4664,21 +4707,70 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
return vtd;
}
+/*
+ * Set dirty tracking for the device list of a domain. The caller must
+ * hold the domain->lock when calling it.
+ */
+static int device_set_dirty_tracking(struct list_head *devices, bool enable)
+{
+ struct device_domain_info *info;
+ int ret = 0;
+
+ list_for_each_entry(info, devices, link) {
+ ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
+ IOMMU_NO_PASID, enable);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
+ bool enable)
+{
+ struct dmar_domain *s1_domain;
+ unsigned long flags;
+ int ret;
+
+ spin_lock(&domain->s1_lock);
+ list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
+ spin_lock_irqsave(&s1_domain->lock, flags);
+ ret = device_set_dirty_tracking(&s1_domain->devices, enable);
+ spin_unlock_irqrestore(&s1_domain->lock, flags);
+ if (ret)
+ goto err_unwind;
+ }
+ spin_unlock(&domain->s1_lock);
+ return 0;
+
+err_unwind:
+ list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
+ spin_lock_irqsave(&s1_domain->lock, flags);
+ device_set_dirty_tracking(&s1_domain->devices,
+ domain->dirty_tracking);
+ spin_unlock_irqrestore(&s1_domain->lock, flags);
+ }
+ spin_unlock(&domain->s1_lock);
+ return ret;
+}
+
static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
bool enable)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- struct device_domain_info *info;
int ret;
spin_lock(&dmar_domain->lock);
if (dmar_domain->dirty_tracking == enable)
goto out_unlock;
- list_for_each_entry(info, &dmar_domain->devices, link) {
- ret = intel_pasid_setup_dirty_tracking(info->iommu,
- info->domain, info->dev,
- IOMMU_NO_PASID, enable);
+ ret = device_set_dirty_tracking(&dmar_domain->devices, enable);
+ if (ret)
+ goto err_unwind;
+
+ if (dmar_domain->nested_parent) {
+ ret = parent_domain_set_dirty_tracking(dmar_domain, enable);
if (ret)
goto err_unwind;
}
@@ -4690,10 +4782,8 @@ out_unlock:
return 0;
err_unwind:
- list_for_each_entry(info, &dmar_domain->devices, link)
- intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain,
- info->dev, IOMMU_NO_PASID,
- dmar_domain->dirty_tracking);
+ device_set_dirty_tracking(&dmar_domain->devices,
+ dmar_domain->dirty_tracking);
spin_unlock(&dmar_domain->lock);
return ret;
}
@@ -4743,6 +4833,7 @@ static const struct iommu_dirty_ops intel_dirty_ops = {
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
+ .release_domain = &blocking_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index d02f916d8e59..404d2476a877 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -627,6 +627,10 @@ struct dmar_domain {
int agaw;
/* maximum mapped address */
u64 max_addr;
+ /* Protect the s1_domains list */
+ spinlock_t s1_lock;
+ /* Track s1_domains nested on this domain */
+ struct list_head s1_domains;
};
/* Nested user domain */
@@ -637,6 +641,8 @@ struct dmar_domain {
unsigned long s1_pgtbl;
/* page table attributes */
struct iommu_hwpt_vtd_s1 s1_cfg;
+ /* link to parent domain siblings */
+ struct list_head s2_link;
};
};
@@ -713,9 +719,16 @@ struct intel_iommu {
#endif
struct iopf_queue *iopf_queue;
unsigned char iopfq_name[16];
+ /* Synchronization between fault report and iommu device release. */
+ struct mutex iopf_lock;
struct q_inval *qi; /* Queued invalidation info */
u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/
+ /* rb tree for all probed devices */
+ struct rb_root device_rbtree;
+ /* protect the device_rbtree */
+ spinlock_t device_rbtree_lock;
+
#ifdef CONFIG_IRQ_REMAP
struct ir_table *ir_table; /* Interrupt remapping info */
struct irq_domain *ir_domain;
@@ -749,6 +762,8 @@ struct device_domain_info {
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
struct pasid_table *pasid_table; /* pasid table */
+ /* device tracking node(lookup by PCI RID) */
+ struct rb_node node;
#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
struct dentry *debugfs_dentry; /* pointer to device directory dentry */
#endif
@@ -1060,6 +1075,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
*/
#define QI_OPT_WAIT_DRAIN BIT(0)
+void domain_update_iotlb(struct dmar_domain *domain);
int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void device_block_translation(struct device *dev);
@@ -1074,13 +1090,14 @@ void free_pgtable_page(void *vaddr);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
const struct iommu_user_data *user_data);
+struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid);
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
int intel_svm_enable_prq(struct intel_iommu *iommu);
int intel_svm_finish_prq(struct intel_iommu *iommu);
-int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
- struct iommu_page_response *msg);
+void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
+ struct iommu_page_response *msg);
struct iommu_domain *intel_svm_domain_alloc(void);
void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid);
void intel_drain_pasid_prq(struct device *dev, u32 pasid);
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index f26c7f1c46cc..a7d68f3d518a 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -65,12 +65,20 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
list_add(&info->link, &dmar_domain->devices);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ domain_update_iotlb(dmar_domain);
+
return 0;
}
static void intel_nested_domain_free(struct iommu_domain *domain)
{
- kfree(to_dmar_domain(domain));
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct dmar_domain *s2_domain = dmar_domain->s2_domain;
+
+ spin_lock(&s2_domain->s1_lock);
+ list_del(&dmar_domain->s2_link);
+ spin_unlock(&s2_domain->s1_lock);
+ kfree(dmar_domain);
}
static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
@@ -95,7 +103,7 @@ static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
}
static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr,
- unsigned long npages, bool ih)
+ u64 npages, bool ih)
{
struct iommu_domain_info *info;
unsigned int mask;
@@ -201,5 +209,9 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
spin_lock_init(&domain->lock);
xa_init(&domain->iommu_array);
+ spin_lock(&s2_domain->s1_lock);
+ list_add(&domain->s2_link, &s2_domain->s1_domains);
+ spin_unlock(&s2_domain->s1_lock);
+
return &domain->domain;
}
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 3239cefa4c33..11f0b856d74c 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -214,6 +214,9 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
if (!info || !info->ats_enabled)
return;
+ if (pci_dev_is_disconnected(to_pci_dev(dev)))
+ return;
+
sid = info->bus << 8 | info->devfn;
qdep = info->ats_qdep;
pfsid = info->pfsid;
@@ -428,7 +431,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
* Set up dirty tracking on a second only or nested translation type.
*/
int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
- struct dmar_domain *domain,
struct device *dev, u32 pasid,
bool enabled)
{
@@ -445,7 +447,7 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
return -ENODEV;
}
- did = domain_id_iommu(domain, iommu);
+ did = pasid_get_domain_id(pte);
pgtt = pasid_pte_get_pgtt(pte);
if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
pgtt != PASID_ENTRY_PGTT_NESTED) {
@@ -658,6 +660,8 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, s2_domain->agaw);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ if (s2_domain->dirty_tracking)
+ pasid_set_ssade(pte);
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
pasid_set_present(pte);
spin_unlock(&iommu->lock);
@@ -666,3 +670,205 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
return 0;
}
+
+/*
+ * Interfaces to setup or teardown a pasid table to the scalable-mode
+ * context table entry:
+ */
+
+static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct context_entry *context;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, false);
+ if (!context) {
+ spin_unlock(&iommu->lock);
+ return;
+ }
+
+ context_clear_entry(context);
+ __iommu_flush_cache(iommu, context, sizeof(*context));
+ spin_unlock(&iommu->lock);
+
+ /*
+ * Cache invalidation for changes to a scalable-mode context table
+ * entry.
+ *
+ * Section 6.5.3.3 of the VT-d spec:
+ * - Device-selective context-cache invalidation;
+ * - Domain-selective PASID-cache invalidation to affected domains
+ * (can be skipped if all PASID entries were not-present);
+ * - Domain-selective IOTLB invalidation to affected domains;
+ * - Global Device-TLB invalidation to affected functions.
+ *
+ * The iommu has been parked in the blocking state. All domains have
+ * been detached from the device or PASID. The PASID and IOTLB caches
+ * have been invalidated during the domain detach path.
+ */
+ iommu->flush.flush_context(iommu, 0, PCI_DEVID(bus, devfn),
+ DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL);
+ devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
+}
+
+static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct device *dev = data;
+
+ if (dev == &pdev->dev)
+ device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff);
+
+ return 0;
+}
+
+void intel_pasid_teardown_sm_context(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ if (!dev_is_pci(dev)) {
+ device_pasid_table_teardown(dev, info->bus, info->devfn);
+ return;
+ }
+
+ pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev);
+}
+
+/*
+ * Get the PASID directory size for scalable mode context entry.
+ * Value of X in the PDTS field of a scalable mode context entry
+ * indicates PASID directory with 2^(X + 7) entries.
+ */
+static unsigned long context_get_sm_pds(struct pasid_table *table)
+{
+ unsigned long pds, max_pde;
+
+ max_pde = table->max_pasid >> PASID_PDE_SHIFT;
+ pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
+ if (pds < 7)
+ return 0;
+
+ return pds - 7;
+}
+
+static int context_entry_set_pasid_table(struct context_entry *context,
+ struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct pasid_table *table = info->pasid_table;
+ struct intel_iommu *iommu = info->iommu;
+ unsigned long pds;
+
+ context_clear_entry(context);
+
+ pds = context_get_sm_pds(table);
+ context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
+ context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
+
+ if (info->ats_supported)
+ context_set_sm_dte(context);
+ if (info->pri_supported)
+ context_set_sm_pre(context);
+ if (info->pasid_supported)
+ context_set_pasid(context);
+
+ context_set_fault_enable(context);
+ context_set_present(context);
+ __iommu_flush_cache(iommu, context, sizeof(*context));
+
+ return 0;
+}
+
+static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct context_entry *context;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, true);
+ if (!context) {
+ spin_unlock(&iommu->lock);
+ return -ENOMEM;
+ }
+
+ if (context_present(context) && !context_copied(iommu, bus, devfn)) {
+ spin_unlock(&iommu->lock);
+ return 0;
+ }
+
+ if (context_copied(iommu, bus, devfn)) {
+ context_clear_entry(context);
+ __iommu_flush_cache(iommu, context, sizeof(*context));
+
+ /*
+ * For kdump cases, old valid entries may be cached due to
+ * the in-flight DMA and copied pgtable, but there is no
+ * unmapping behaviour for them, thus we need explicit cache
+ * flushes for all affected domain IDs and PASIDs used in
+ * the copied PASID table. Given that we have no idea about
+ * which domain IDs and PASIDs were used in the copied tables,
+ * upgrade them to global PASID and IOTLB cache invalidation.
+ */
+ iommu->flush.flush_context(iommu, 0,
+ PCI_DEVID(bus, devfn),
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
+
+ /*
+ * At this point, the device is supposed to finish reset at
+ * its driver probe stage, so no in-flight DMA will exist,
+ * and we don't need to worry anymore hereafter.
+ */
+ clear_context_copied(iommu, bus, devfn);
+ }
+
+ context_entry_set_pasid_table(context, dev);
+ spin_unlock(&iommu->lock);
+
+ /*
+ * It's a non-present to present mapping. If hardware doesn't cache
+ * non-present entry we don't need to flush the caches. If it does
+ * cache non-present entries, then it does so in the special
+ * domain #0, which we have to flush:
+ */
+ if (cap_caching_mode(iommu->cap)) {
+ iommu->flush.flush_context(iommu, 0,
+ PCI_DEVID(bus, devfn),
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
+ }
+
+ return 0;
+}
+
+static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct device *dev = data;
+
+ if (dev != &pdev->dev)
+ return 0;
+
+ return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff);
+}
+
+/*
+ * Set the device's PASID table to its context table entry.
+ *
+ * The PASID table is set to the context entries of both device itself
+ * and its alias requester ID for DMA.
+ */
+int intel_pasid_setup_sm_context(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ if (!dev_is_pci(dev))
+ return device_pasid_table_setup(dev, info->bus, info->devfn);
+
+ return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev);
+}
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 8d40d4c66e31..da9978fef7ac 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -307,7 +307,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u32 pasid);
int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
- struct dmar_domain *domain,
struct device *dev, u32 pasid,
bool enabled);
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
@@ -319,4 +318,6 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
bool fault_ignore);
void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
struct device *dev, u32 pasid);
+int intel_pasid_setup_sm_context(struct device *dev);
+void intel_pasid_teardown_sm_context(struct device *dev);
#endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c
index 94ee70ac38e3..adc4de6bbd88 100644
--- a/drivers/iommu/intel/perf.c
+++ b/drivers/iommu/intel/perf.c
@@ -33,7 +33,7 @@ int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
spin_lock_irqsave(&latency_lock, flags);
if (!iommu->perf_statistic) {
- iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM,
+ iommu->perf_statistic = kcalloc(DMAR_LATENCY_NUM, sizeof(*lstat),
GFP_ATOMIC);
if (!iommu->perf_statistic) {
ret = -ENOMEM;
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 40edd282903f..c1bed89b1026 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -22,7 +22,6 @@
#include "iommu.h"
#include "pasid.h"
#include "perf.h"
-#include "../iommu-sva.h"
#include "trace.h"
static irqreturn_t prq_event_thread(int irq, void *d);
@@ -315,10 +314,11 @@ out:
return 0;
}
-static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev,
- struct iommu_domain *domain, ioasid_t pasid)
+static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
struct mm_struct *mm = domain->mm;
struct intel_svm_dev *sdev;
struct intel_svm *svm;
@@ -360,7 +360,6 @@ static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev,
sdev->iommu = iommu;
sdev->did = FLPT_DEFAULT_DID;
sdev->sid = PCI_DEVID(info->bus, info->devfn);
- init_rcu_head(&sdev->rcu);
if (info->ats_enabled) {
sdev->qdep = info->ats_qdep;
if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
@@ -408,13 +407,6 @@ void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid)
if (svm->notifier.ops)
mmu_notifier_unregister(&svm->notifier, mm);
pasid_private_remove(svm->pasid);
- /*
- * We mandate that no page faults may be outstanding
- * for the PASID when intel_svm_unbind_mm() is called.
- * If that is not obeyed, subtle errors will happen.
- * Let's make them less subtle...
- */
- memset(svm, 0x6b, sizeof(*svm));
kfree(svm);
}
}
@@ -562,16 +554,12 @@ static int prq_to_iommu_prot(struct page_req_dsc *req)
return prot;
}
-static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
- struct page_req_dsc *desc)
+static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
+ struct page_req_dsc *desc)
{
- struct iommu_fault_event event;
-
- if (!dev || !dev_is_pci(dev))
- return -ENODEV;
+ struct iopf_fault event = { };
/* Fill in event data for device specific processing */
- memset(&event, 0, sizeof(struct iommu_fault_event));
event.fault.type = IOMMU_FAULT_PAGE_REQ;
event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
event.fault.prm.pasid = desc->pasid;
@@ -603,7 +591,7 @@ static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
}
- return iommu_report_device_fault(dev, &event);
+ iommu_report_device_fault(dev, &event);
}
static void handle_bad_prq_event(struct intel_iommu *iommu,
@@ -650,7 +638,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
struct intel_iommu *iommu = d;
struct page_req_dsc *req;
int head, tail, handled;
- struct pci_dev *pdev;
+ struct device *dev;
u64 address;
/*
@@ -696,23 +684,22 @@ bad_req:
if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
goto prq_advance;
- pdev = pci_get_domain_bus_and_slot(iommu->segment,
- PCI_BUS_NUM(req->rid),
- req->rid & 0xff);
/*
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
- if (!pdev)
+ mutex_lock(&iommu->iopf_lock);
+ dev = device_rbtree_find(iommu, req->rid);
+ if (!dev) {
+ mutex_unlock(&iommu->iopf_lock);
goto bad_req;
+ }
- if (intel_svm_prq_report(iommu, &pdev->dev, req))
- handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
- else
- trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
- req->priv_data[0], req->priv_data[1],
- iommu->prq_seq_number++);
- pci_dev_put(pdev);
+ intel_svm_prq_report(iommu, dev, req);
+ trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
+ req->priv_data[0], req->priv_data[1],
+ iommu->prq_seq_number++);
+ mutex_unlock(&iommu->iopf_lock);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
@@ -742,9 +729,8 @@ prq_advance:
return IRQ_RETVAL(handled);
}
-int intel_svm_page_response(struct device *dev,
- struct iommu_fault_event *evt,
- struct iommu_page_response *msg)
+void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
+ struct iommu_page_response *msg)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
@@ -753,7 +739,6 @@ int intel_svm_page_response(struct device *dev,
bool private_present;
bool pasid_present;
bool last_page;
- int ret = 0;
u16 sid;
prm = &evt->fault.prm;
@@ -762,16 +747,6 @@ int intel_svm_page_response(struct device *dev,
private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
- if (!pasid_present) {
- ret = -EINVAL;
- goto out;
- }
-
- if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
- ret = -EINVAL;
- goto out;
- }
-
/*
* Per VT-d spec. v3.0 ch7.7, system software must respond
* with page group response if private data is present (PDP)
@@ -800,17 +775,6 @@ int intel_svm_page_response(struct device *dev,
qi_submit_sync(iommu, &desc, 1, 0);
}
-out:
- return ret;
-}
-
-static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
-{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu = info->iommu;
-
- return intel_svm_bind_mm(iommu, dev, domain, pasid);
}
static void intel_svm_domain_free(struct iommu_domain *domain)
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index e5b8b9110c13..06d78fcc79fd 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -11,101 +11,140 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include "iommu-sva.h"
+#include "iommu-priv.h"
-/**
- * struct iopf_queue - IO Page Fault queue
- * @wq: the fault workqueue
- * @devices: devices attached to this queue
- * @lock: protects the device list
+/*
+ * Return the fault parameter of a device if it exists. Otherwise, return NULL.
+ * On a successful return, the caller takes a reference of this parameter and
+ * should put it after use by calling iopf_put_dev_fault_param().
*/
-struct iopf_queue {
- struct workqueue_struct *wq;
- struct list_head devices;
- struct mutex lock;
-};
+static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev)
+{
+ struct dev_iommu *param = dev->iommu;
+ struct iommu_fault_param *fault_param;
-/**
- * struct iopf_device_param - IO Page Fault data attached to a device
- * @dev: the device that owns this param
- * @queue: IOPF queue
- * @queue_list: index into queue->devices
- * @partial: faults that are part of a Page Request Group for which the last
- * request hasn't been submitted yet.
- */
-struct iopf_device_param {
- struct device *dev;
- struct iopf_queue *queue;
- struct list_head queue_list;
- struct list_head partial;
-};
-
-struct iopf_fault {
- struct iommu_fault fault;
- struct list_head list;
-};
-
-struct iopf_group {
- struct iopf_fault last_fault;
- struct list_head faults;
- struct work_struct work;
- struct device *dev;
-};
-
-static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
- enum iommu_page_response_code status)
+ rcu_read_lock();
+ fault_param = rcu_dereference(param->fault_param);
+ if (fault_param && !refcount_inc_not_zero(&fault_param->users))
+ fault_param = NULL;
+ rcu_read_unlock();
+
+ return fault_param;
+}
+
+/* Caller must hold a reference of the fault parameter. */
+static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param)
{
- struct iommu_page_response resp = {
- .version = IOMMU_PAGE_RESP_VERSION_1,
- .pasid = iopf->fault.prm.pasid,
- .grpid = iopf->fault.prm.grpid,
- .code = status,
- };
+ if (refcount_dec_and_test(&fault_param->users))
+ kfree_rcu(fault_param, rcu);
+}
- if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) &&
- (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID))
- resp.flags = IOMMU_PAGE_RESP_PASID_VALID;
+static void __iopf_free_group(struct iopf_group *group)
+{
+ struct iopf_fault *iopf, *next;
- return iommu_page_response(dev, &resp);
+ list_for_each_entry_safe(iopf, next, &group->faults, list) {
+ if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
+ kfree(iopf);
+ }
+
+ /* Pair with iommu_report_device_fault(). */
+ iopf_put_dev_fault_param(group->fault_param);
}
-static void iopf_handler(struct work_struct *work)
+void iopf_free_group(struct iopf_group *group)
+{
+ __iopf_free_group(group);
+ kfree(group);
+}
+EXPORT_SYMBOL_GPL(iopf_free_group);
+
+static struct iommu_domain *get_domain_for_iopf(struct device *dev,
+ struct iommu_fault *fault)
{
- struct iopf_group *group;
struct iommu_domain *domain;
- struct iopf_fault *iopf, *next;
- enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
- group = container_of(work, struct iopf_group, work);
- domain = iommu_get_domain_for_dev_pasid(group->dev,
- group->last_fault.fault.prm.pasid, 0);
- if (!domain || !domain->iopf_handler)
- status = IOMMU_PAGE_RESP_INVALID;
+ if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
+ domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0);
+ if (IS_ERR(domain))
+ domain = NULL;
+ } else {
+ domain = iommu_get_domain_for_dev(dev);
+ }
- list_for_each_entry_safe(iopf, next, &group->faults, list) {
+ if (!domain || !domain->iopf_handler) {
+ dev_warn_ratelimited(dev,
+ "iopf (pasid %d) without domain attached or handler installed\n",
+ fault->prm.pasid);
+
+ return NULL;
+ }
+
+ return domain;
+}
+
+/* Non-last request of a group. Postpone until the last one. */
+static int report_partial_fault(struct iommu_fault_param *fault_param,
+ struct iommu_fault *fault)
+{
+ struct iopf_fault *iopf;
+
+ iopf = kzalloc(sizeof(*iopf), GFP_KERNEL);
+ if (!iopf)
+ return -ENOMEM;
+
+ iopf->fault = *fault;
+
+ mutex_lock(&fault_param->lock);
+ list_add(&iopf->list, &fault_param->partial);
+ mutex_unlock(&fault_param->lock);
+
+ return 0;
+}
+
+static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
+ struct iopf_fault *evt,
+ struct iopf_group *abort_group)
+{
+ struct iopf_fault *iopf, *next;
+ struct iopf_group *group;
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group) {
/*
- * For the moment, errors are sticky: don't handle subsequent
- * faults in the group if there is an error.
+ * We always need to construct the group as we need it to abort
+ * the request at the driver if it can't be handled.
*/
- if (status == IOMMU_PAGE_RESP_SUCCESS)
- status = domain->iopf_handler(&iopf->fault,
- domain->fault_data);
+ group = abort_group;
+ }
- if (!(iopf->fault.prm.flags &
- IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
- kfree(iopf);
+ group->fault_param = iopf_param;
+ group->last_fault.fault = evt->fault;
+ INIT_LIST_HEAD(&group->faults);
+ INIT_LIST_HEAD(&group->pending_node);
+ list_add(&group->last_fault.list, &group->faults);
+
+ /* See if we have partial faults for this group */
+ mutex_lock(&iopf_param->lock);
+ list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
+ if (iopf->fault.prm.grpid == evt->fault.prm.grpid)
+ /* Insert *before* the last fault */
+ list_move(&iopf->list, &group->faults);
}
+ list_add(&group->pending_node, &iopf_param->faults);
+ mutex_unlock(&iopf_param->lock);
- iopf_complete_group(group->dev, &group->last_fault, status);
- kfree(group);
+ return group;
}
/**
- * iommu_queue_iopf - IO Page Fault handler
- * @fault: fault event
- * @cookie: struct device, passed to iommu_register_device_fault_handler.
+ * iommu_report_device_fault() - Report fault event to device driver
+ * @dev: the device
+ * @evt: fault event data
*
- * Add a fault to the device workqueue, to be handled by mm.
+ * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
+ * handler. If this function fails then ops->page_response() was called to
+ * complete evt if required.
*
* This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard
* them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't
@@ -137,83 +176,57 @@ static void iopf_handler(struct work_struct *work)
* freed after the device has stopped generating page faults (or the iommu
* hardware has been set to block the page faults) and the pending page faults
* have been flushed.
- *
- * Return: 0 on success and <0 on error.
*/
-int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
+void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
{
- int ret;
+ struct iommu_fault *fault = &evt->fault;
+ struct iommu_fault_param *iopf_param;
+ struct iopf_group abort_group = {};
struct iopf_group *group;
- struct iopf_fault *iopf, *next;
- struct iopf_device_param *iopf_param;
- struct device *dev = cookie;
- struct dev_iommu *param = dev->iommu;
-
- lockdep_assert_held(&param->lock);
-
- if (fault->type != IOMMU_FAULT_PAGE_REQ)
- /* Not a recoverable page fault */
- return -EOPNOTSUPP;
-
- /*
- * As long as we're holding param->lock, the queue can't be unlinked
- * from the device and therefore cannot disappear.
- */
- iopf_param = param->iopf_param;
- if (!iopf_param)
- return -ENODEV;
+ iopf_param = iopf_get_dev_fault_param(dev);
+ if (WARN_ON(!iopf_param))
+ return;
if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
- iopf = kzalloc(sizeof(*iopf), GFP_KERNEL);
- if (!iopf)
- return -ENOMEM;
-
- iopf->fault = *fault;
-
- /* Non-last request of a group. Postpone until the last one */
- list_add(&iopf->list, &iopf_param->partial);
-
- return 0;
+ report_partial_fault(iopf_param, fault);
+ iopf_put_dev_fault_param(iopf_param);
+ /* A request that is not the last does not need to be ack'd */
}
- group = kzalloc(sizeof(*group), GFP_KERNEL);
- if (!group) {
- /*
- * The caller will send a response to the hardware. But we do
- * need to clean up before leaving, otherwise partial faults
- * will be stuck.
- */
- ret = -ENOMEM;
- goto cleanup_partial;
- }
+ /*
+ * This is the last page fault of a group. Allocate an iopf group and
+ * pass it to domain's page fault handler. The group holds a reference
+ * count of the fault parameter. It will be released after response or
+ * error path of this function. If an error is returned, the caller
+ * will send a response to the hardware. We need to clean up before
+ * leaving, otherwise partial faults will be stuck.
+ */
+ group = iopf_group_alloc(iopf_param, evt, &abort_group);
+ if (group == &abort_group)
+ goto err_abort;
- group->dev = dev;
- group->last_fault.fault = *fault;
- INIT_LIST_HEAD(&group->faults);
- list_add(&group->last_fault.list, &group->faults);
- INIT_WORK(&group->work, iopf_handler);
+ group->domain = get_domain_for_iopf(dev, fault);
+ if (!group->domain)
+ goto err_abort;
- /* See if we have partial faults for this group */
- list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
- if (iopf->fault.prm.grpid == fault->prm.grpid)
- /* Insert *before* the last fault */
- list_move(&iopf->list, &group->faults);
- }
+ /*
+ * On success iopf_handler must call iopf_group_response() and
+ * iopf_free_group()
+ */
+ if (group->domain->iopf_handler(group))
+ goto err_abort;
- queue_work(iopf_param->queue->wq, &group->work);
- return 0;
+ return;
-cleanup_partial:
- list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
- if (iopf->fault.prm.grpid == fault->prm.grpid) {
- list_del(&iopf->list);
- kfree(iopf);
- }
- }
- return ret;
+err_abort:
+ iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE);
+ if (group == &abort_group)
+ __iopf_free_group(group);
+ else
+ iopf_free_group(group);
}
-EXPORT_SYMBOL_GPL(iommu_queue_iopf);
+EXPORT_SYMBOL_GPL(iommu_report_device_fault);
/**
* iopf_queue_flush_dev - Ensure that all queued faults have been processed
@@ -229,26 +242,52 @@ EXPORT_SYMBOL_GPL(iommu_queue_iopf);
*/
int iopf_queue_flush_dev(struct device *dev)
{
- int ret = 0;
- struct iopf_device_param *iopf_param;
- struct dev_iommu *param = dev->iommu;
+ struct iommu_fault_param *iopf_param;
- if (!param)
+ /*
+ * It's a driver bug to be here after iopf_queue_remove_device().
+ * Therefore, it's safe to dereference the fault parameter without
+ * holding the lock.
+ */
+ iopf_param = rcu_dereference_check(dev->iommu->fault_param, true);
+ if (WARN_ON(!iopf_param))
return -ENODEV;
- mutex_lock(&param->lock);
- iopf_param = param->iopf_param;
- if (iopf_param)
- flush_workqueue(iopf_param->queue->wq);
- else
- ret = -ENODEV;
- mutex_unlock(&param->lock);
+ flush_workqueue(iopf_param->queue->wq);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
/**
+ * iopf_group_response - Respond a group of page faults
+ * @group: the group of faults with the same group id
+ * @status: the response code
+ */
+void iopf_group_response(struct iopf_group *group,
+ enum iommu_page_response_code status)
+{
+ struct iommu_fault_param *fault_param = group->fault_param;
+ struct iopf_fault *iopf = &group->last_fault;
+ struct device *dev = group->fault_param->dev;
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+ struct iommu_page_response resp = {
+ .pasid = iopf->fault.prm.pasid,
+ .grpid = iopf->fault.prm.grpid,
+ .code = status,
+ };
+
+ /* Only send response if there is a fault report pending */
+ mutex_lock(&fault_param->lock);
+ if (!list_empty(&group->pending_node)) {
+ ops->page_response(dev, &group->last_fault, &resp);
+ list_del_init(&group->pending_node);
+ }
+ mutex_unlock(&fault_param->lock);
+}
+EXPORT_SYMBOL_GPL(iopf_group_response);
+
+/**
* iopf_queue_discard_partial - Remove all pending partial fault
* @queue: the queue whose partial faults need to be discarded
*
@@ -261,18 +300,20 @@ EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
int iopf_queue_discard_partial(struct iopf_queue *queue)
{
struct iopf_fault *iopf, *next;
- struct iopf_device_param *iopf_param;
+ struct iommu_fault_param *iopf_param;
if (!queue)
return -EINVAL;
mutex_lock(&queue->lock);
list_for_each_entry(iopf_param, &queue->devices, queue_list) {
+ mutex_lock(&iopf_param->lock);
list_for_each_entry_safe(iopf, next, &iopf_param->partial,
list) {
list_del(&iopf->list);
kfree(iopf);
}
+ mutex_unlock(&iopf_param->lock);
}
mutex_unlock(&queue->lock);
return 0;
@@ -288,34 +329,42 @@ EXPORT_SYMBOL_GPL(iopf_queue_discard_partial);
*/
int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
{
- int ret = -EBUSY;
- struct iopf_device_param *iopf_param;
+ int ret = 0;
struct dev_iommu *param = dev->iommu;
+ struct iommu_fault_param *fault_param;
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
- if (!param)
+ if (!ops->page_response)
return -ENODEV;
- iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL);
- if (!iopf_param)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&iopf_param->partial);
- iopf_param->queue = queue;
- iopf_param->dev = dev;
-
mutex_lock(&queue->lock);
mutex_lock(&param->lock);
- if (!param->iopf_param) {
- list_add(&iopf_param->queue_list, &queue->devices);
- param->iopf_param = iopf_param;
- ret = 0;
+ if (rcu_dereference_check(param->fault_param,
+ lockdep_is_held(&param->lock))) {
+ ret = -EBUSY;
+ goto done_unlock;
+ }
+
+ fault_param = kzalloc(sizeof(*fault_param), GFP_KERNEL);
+ if (!fault_param) {
+ ret = -ENOMEM;
+ goto done_unlock;
}
+
+ mutex_init(&fault_param->lock);
+ INIT_LIST_HEAD(&fault_param->faults);
+ INIT_LIST_HEAD(&fault_param->partial);
+ fault_param->dev = dev;
+ refcount_set(&fault_param->users, 1);
+ list_add(&fault_param->queue_list, &queue->devices);
+ fault_param->queue = queue;
+
+ rcu_assign_pointer(param->fault_param, fault_param);
+
+done_unlock:
mutex_unlock(&param->lock);
mutex_unlock(&queue->lock);
- if (ret)
- kfree(iopf_param);
-
return ret;
}
EXPORT_SYMBOL_GPL(iopf_queue_add_device);
@@ -325,40 +374,66 @@ EXPORT_SYMBOL_GPL(iopf_queue_add_device);
* @queue: IOPF queue
* @dev: device to remove
*
- * Caller makes sure that no more faults are reported for this device.
+ * Removing a device from an iopf_queue. It's recommended to follow these
+ * steps when removing a device:
*
- * Return: 0 on success and <0 on error.
+ * - Disable new PRI reception: Turn off PRI generation in the IOMMU hardware
+ * and flush any hardware page request queues. This should be done before
+ * calling into this helper.
+ * - Acknowledge all outstanding PRQs to the device: Respond to all outstanding
+ * page requests with IOMMU_PAGE_RESP_INVALID, indicating the device should
+ * not retry. This helper function handles this.
+ * - Disable PRI on the device: After calling this helper, the caller could
+ * then disable PRI on the device.
+ *
+ * Calling iopf_queue_remove_device() essentially disassociates the device.
+ * The fault_param might still exist, but iommu_page_response() will do
+ * nothing. The device fault parameter reference count has been properly
+ * passed from iommu_report_device_fault() to the fault handling work, and
+ * will eventually be released after iommu_page_response().
*/
-int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
+void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
{
- int ret = -EINVAL;
- struct iopf_fault *iopf, *next;
- struct iopf_device_param *iopf_param;
+ struct iopf_fault *partial_iopf;
+ struct iopf_fault *next;
+ struct iopf_group *group, *temp;
struct dev_iommu *param = dev->iommu;
-
- if (!param || !queue)
- return -EINVAL;
+ struct iommu_fault_param *fault_param;
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
mutex_lock(&queue->lock);
mutex_lock(&param->lock);
- iopf_param = param->iopf_param;
- if (iopf_param && iopf_param->queue == queue) {
- list_del(&iopf_param->queue_list);
- param->iopf_param = NULL;
- ret = 0;
+ fault_param = rcu_dereference_check(param->fault_param,
+ lockdep_is_held(&param->lock));
+
+ if (WARN_ON(!fault_param || fault_param->queue != queue))
+ goto unlock;
+
+ mutex_lock(&fault_param->lock);
+ list_for_each_entry_safe(partial_iopf, next, &fault_param->partial, list)
+ kfree(partial_iopf);
+
+ list_for_each_entry_safe(group, temp, &fault_param->faults, pending_node) {
+ struct iopf_fault *iopf = &group->last_fault;
+ struct iommu_page_response resp = {
+ .pasid = iopf->fault.prm.pasid,
+ .grpid = iopf->fault.prm.grpid,
+ .code = IOMMU_PAGE_RESP_INVALID
+ };
+
+ ops->page_response(dev, iopf, &resp);
+ list_del_init(&group->pending_node);
}
- mutex_unlock(&param->lock);
- mutex_unlock(&queue->lock);
- if (ret)
- return ret;
+ mutex_unlock(&fault_param->lock);
- /* Just in case some faults are still stuck */
- list_for_each_entry_safe(iopf, next, &iopf_param->partial, list)
- kfree(iopf);
+ list_del(&fault_param->queue_list);
- kfree(iopf_param);
-
- return 0;
+ /* dec the ref owned by iopf_queue_add_device() */
+ rcu_assign_pointer(param->fault_param, NULL);
+ iopf_put_dev_fault_param(fault_param);
+unlock:
+ mutex_unlock(&param->lock);
+ mutex_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(iopf_queue_remove_device);
@@ -404,7 +479,7 @@ EXPORT_SYMBOL_GPL(iopf_queue_alloc);
*/
void iopf_queue_free(struct iopf_queue *queue)
{
- struct iopf_device_param *iopf_param, *next;
+ struct iommu_fault_param *iopf_param, *next;
if (!queue)
return;
diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
index 2024a2313348..5f731d994803 100644
--- a/drivers/iommu/iommu-priv.h
+++ b/drivers/iommu/iommu-priv.h
@@ -21,10 +21,11 @@ int iommu_group_replace_domain(struct iommu_group *group,
struct iommu_domain *new_domain);
int iommu_device_register_bus(struct iommu_device *iommu,
- const struct iommu_ops *ops, struct bus_type *bus,
+ const struct iommu_ops *ops,
+ const struct bus_type *bus,
struct notifier_block *nb);
void iommu_device_unregister_bus(struct iommu_device *iommu,
- struct bus_type *bus,
+ const struct bus_type *bus,
struct notifier_block *nb);
#endif /* __LINUX_IOMMU_PRIV_H */
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index c3fc9201d0be..640acc804e8c 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -7,7 +7,7 @@
#include <linux/sched/mm.h>
#include <linux/iommu.h>
-#include "iommu-sva.h"
+#include "iommu-priv.h"
static DEFINE_MUTEX(iommu_sva_lock);
@@ -41,6 +41,7 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
}
iommu_mm->pasid = pasid;
INIT_LIST_HEAD(&iommu_mm->sva_domains);
+ INIT_LIST_HEAD(&iommu_mm->sva_handles);
/*
* Make sure the write to mm->iommu_mm is not reordered in front of
* initialization to iommu_mm fields. If it does, readers may see a
@@ -82,6 +83,14 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_unlock;
}
+ list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) {
+ if (handle->dev == dev) {
+ refcount_inc(&handle->users);
+ mutex_unlock(&iommu_sva_lock);
+ return handle;
+ }
+ }
+
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
if (!handle) {
ret = -ENOMEM;
@@ -111,6 +120,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
list_add(&domain->next, &mm->iommu_mm->sva_domains);
out:
+ refcount_set(&handle->users, 1);
+ list_add(&handle->handle_item, &mm->iommu_mm->sva_handles);
mutex_unlock(&iommu_sva_lock);
handle->dev = dev;
handle->domain = domain;
@@ -141,6 +152,12 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
struct device *dev = handle->dev;
mutex_lock(&iommu_sva_lock);
+ if (!refcount_dec_and_test(&handle->users)) {
+ mutex_unlock(&iommu_sva_lock);
+ return;
+ }
+ list_del(&handle->handle_item);
+
iommu_detach_device_pasid(domain, dev, iommu_mm->pasid);
if (--domain->users == 0) {
list_del(&domain->next);
@@ -159,15 +176,25 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle)
}
EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
+void mm_pasid_drop(struct mm_struct *mm)
+{
+ struct iommu_mm_data *iommu_mm = mm->iommu_mm;
+
+ if (!iommu_mm)
+ return;
+
+ iommu_free_global_pasid(iommu_mm->pasid);
+ kfree(iommu_mm);
+}
+
/*
* I/O page fault handler for SVA
*/
-enum iommu_page_response_code
-iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+static enum iommu_page_response_code
+iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm)
{
vm_fault_t ret;
struct vm_area_struct *vma;
- struct mm_struct *mm = data;
unsigned int access_flags = 0;
unsigned int fault_flags = FAULT_FLAG_REMOTE;
struct iommu_fault_page_request *prm = &fault->prm;
@@ -217,13 +244,54 @@ out_put_mm:
return status;
}
-void mm_pasid_drop(struct mm_struct *mm)
+static void iommu_sva_handle_iopf(struct work_struct *work)
{
- struct iommu_mm_data *iommu_mm = mm->iommu_mm;
+ struct iopf_fault *iopf;
+ struct iopf_group *group;
+ enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
+
+ group = container_of(work, struct iopf_group, work);
+ list_for_each_entry(iopf, &group->faults, list) {
+ /*
+ * For the moment, errors are sticky: don't handle subsequent
+ * faults in the group if there is an error.
+ */
+ if (status != IOMMU_PAGE_RESP_SUCCESS)
+ break;
+
+ status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm);
+ }
- if (!iommu_mm)
- return;
+ iopf_group_response(group, status);
+ iopf_free_group(group);
+}
- iommu_free_global_pasid(iommu_mm->pasid);
- kfree(iommu_mm);
+static int iommu_sva_iopf_handler(struct iopf_group *group)
+{
+ struct iommu_fault_param *fault_param = group->fault_param;
+
+ INIT_WORK(&group->work, iommu_sva_handle_iopf);
+ if (!queue_work(fault_param->queue->wq, &group->work))
+ return -EBUSY;
+
+ return 0;
+}
+
+struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
+ struct mm_struct *mm)
+{
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+ struct iommu_domain *domain;
+
+ domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
+ if (!domain)
+ return NULL;
+
+ domain->type = IOMMU_DOMAIN_SVA;
+ mmgrab(mm);
+ domain->mm = mm;
+ domain->owner = ops;
+ domain->iopf_handler = iommu_sva_iopf_handler;
+
+ return domain;
}
diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h
deleted file mode 100644
index 54946b5a7caf..000000000000
--- a/drivers/iommu/iommu-sva.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * SVA library for IOMMU drivers
- */
-#ifndef _IOMMU_SVA_H
-#define _IOMMU_SVA_H
-
-#include <linux/mm_types.h>
-
-/* I/O Page fault */
-struct device;
-struct iommu_fault;
-struct iopf_queue;
-
-#ifdef CONFIG_IOMMU_SVA
-int iommu_queue_iopf(struct iommu_fault *fault, void *cookie);
-
-int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
-int iopf_queue_remove_device(struct iopf_queue *queue,
- struct device *dev);
-int iopf_queue_flush_dev(struct device *dev);
-struct iopf_queue *iopf_queue_alloc(const char *name);
-void iopf_queue_free(struct iopf_queue *queue);
-int iopf_queue_discard_partial(struct iopf_queue *queue);
-enum iommu_page_response_code
-iommu_sva_handle_iopf(struct iommu_fault *fault, void *data);
-
-#else /* CONFIG_IOMMU_SVA */
-static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
-{
- return -ENODEV;
-}
-
-static inline int iopf_queue_add_device(struct iopf_queue *queue,
- struct device *dev)
-{
- return -ENODEV;
-}
-
-static inline int iopf_queue_remove_device(struct iopf_queue *queue,
- struct device *dev)
-{
- return -ENODEV;
-}
-
-static inline int iopf_queue_flush_dev(struct device *dev)
-{
- return -ENODEV;
-}
-
-static inline struct iopf_queue *iopf_queue_alloc(const char *name)
-{
- return NULL;
-}
-
-static inline void iopf_queue_free(struct iopf_queue *queue)
-{
-}
-
-static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
-{
- return -ENODEV;
-}
-
-static inline enum iommu_page_response_code
-iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
-{
- return IOMMU_PAGE_RESP_INVALID;
-}
-#endif /* CONFIG_IOMMU_SVA */
-#endif /* _IOMMU_SVA_H */
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 68e648b55767..098869007c69 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -36,8 +36,6 @@
#include "dma-iommu.h"
#include "iommu-priv.h"
-#include "iommu-sva.h"
-
static struct kset *iommu_group_kset;
static DEFINE_IDA(iommu_group_ida);
static DEFINE_IDA(iommu_global_pasid_ida);
@@ -291,7 +289,7 @@ EXPORT_SYMBOL_GPL(iommu_device_unregister);
#if IS_ENABLED(CONFIG_IOMMUFD_TEST)
void iommu_device_unregister_bus(struct iommu_device *iommu,
- struct bus_type *bus,
+ const struct bus_type *bus,
struct notifier_block *nb)
{
bus_unregister_notifier(bus, nb);
@@ -305,7 +303,8 @@ EXPORT_SYMBOL_GPL(iommu_device_unregister_bus);
* some memory to hold a notifier_block.
*/
int iommu_device_register_bus(struct iommu_device *iommu,
- const struct iommu_ops *ops, struct bus_type *bus,
+ const struct iommu_ops *ops,
+ const struct bus_type *bus,
struct notifier_block *nb)
{
int err;
@@ -463,13 +462,24 @@ static void iommu_deinit_device(struct device *dev)
/*
* release_device() must stop using any attached domain on the device.
- * If there are still other devices in the group they are not effected
+ * If there are still other devices in the group, they are not affected
* by this callback.
*
- * The IOMMU driver must set the device to either an identity or
- * blocking translation and stop using any domain pointer, as it is
- * going to be freed.
+ * If the iommu driver provides release_domain, the core code ensures
+ * that domain is attached prior to calling release_device. Drivers can
+ * use this to enforce a translation on the idle iommu. Typically, the
+ * global static blocked_domain is a good choice.
+ *
+ * Otherwise, the iommu driver must set the device to either an identity
+ * or a blocking translation in release_device() and stop using any
+ * domain pointer, as it is going to be freed.
+ *
+ * Regardless, if a delayed attach never occurred, then the release
+ * should still avoid touching any hardware configuration either.
*/
+ if (!dev->iommu->attach_deferred && ops->release_domain)
+ ops->release_domain->ops->attach_dev(ops->release_domain, dev);
+
if (ops->release_device)
ops->release_device(dev);
@@ -1248,6 +1258,25 @@ void iommu_group_remove_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(iommu_group_remove_device);
+#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API)
+/**
+ * iommu_group_mutex_assert - Check device group mutex lock
+ * @dev: the device that has group param set
+ *
+ * This function is called by an iommu driver to check whether it holds
+ * group mutex lock for the given device or not.
+ *
+ * Note that this function must be called after device group param is set.
+ */
+void iommu_group_mutex_assert(struct device *dev)
+{
+ struct iommu_group *group = dev->iommu_group;
+
+ lockdep_assert_held(&group->mutex);
+}
+EXPORT_SYMBOL_GPL(iommu_group_mutex_assert);
+#endif
+
static struct device *iommu_group_first_dev(struct iommu_group *group)
{
lockdep_assert_held(&group->mutex);
@@ -1331,217 +1360,6 @@ void iommu_group_put(struct iommu_group *group)
EXPORT_SYMBOL_GPL(iommu_group_put);
/**
- * iommu_register_device_fault_handler() - Register a device fault handler
- * @dev: the device
- * @handler: the fault handler
- * @data: private data passed as argument to the handler
- *
- * When an IOMMU fault event is received, this handler gets called with the
- * fault event and data as argument. The handler should return 0 on success. If
- * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also
- * complete the fault by calling iommu_page_response() with one of the following
- * response code:
- * - IOMMU_PAGE_RESP_SUCCESS: retry the translation
- * - IOMMU_PAGE_RESP_INVALID: terminate the fault
- * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting
- * page faults if possible.
- *
- * Return 0 if the fault handler was installed successfully, or an error.
- */
-int iommu_register_device_fault_handler(struct device *dev,
- iommu_dev_fault_handler_t handler,
- void *data)
-{
- struct dev_iommu *param = dev->iommu;
- int ret = 0;
-
- if (!param)
- return -EINVAL;
-
- mutex_lock(&param->lock);
- /* Only allow one fault handler registered for each device */
- if (param->fault_param) {
- ret = -EBUSY;
- goto done_unlock;
- }
-
- get_device(dev);
- param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL);
- if (!param->fault_param) {
- put_device(dev);
- ret = -ENOMEM;
- goto done_unlock;
- }
- param->fault_param->handler = handler;
- param->fault_param->data = data;
- mutex_init(&param->fault_param->lock);
- INIT_LIST_HEAD(&param->fault_param->faults);
-
-done_unlock:
- mutex_unlock(&param->lock);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler);
-
-/**
- * iommu_unregister_device_fault_handler() - Unregister the device fault handler
- * @dev: the device
- *
- * Remove the device fault handler installed with
- * iommu_register_device_fault_handler().
- *
- * Return 0 on success, or an error.
- */
-int iommu_unregister_device_fault_handler(struct device *dev)
-{
- struct dev_iommu *param = dev->iommu;
- int ret = 0;
-
- if (!param)
- return -EINVAL;
-
- mutex_lock(&param->lock);
-
- if (!param->fault_param)
- goto unlock;
-
- /* we cannot unregister handler if there are pending faults */
- if (!list_empty(&param->fault_param->faults)) {
- ret = -EBUSY;
- goto unlock;
- }
-
- kfree(param->fault_param);
- param->fault_param = NULL;
- put_device(dev);
-unlock:
- mutex_unlock(&param->lock);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler);
-
-/**
- * iommu_report_device_fault() - Report fault event to device driver
- * @dev: the device
- * @evt: fault event data
- *
- * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
- * handler. When this function fails and the fault is recoverable, it is the
- * caller's responsibility to complete the fault.
- *
- * Return 0 on success, or an error.
- */
-int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
-{
- struct dev_iommu *param = dev->iommu;
- struct iommu_fault_event *evt_pending = NULL;
- struct iommu_fault_param *fparam;
- int ret = 0;
-
- if (!param || !evt)
- return -EINVAL;
-
- /* we only report device fault if there is a handler registered */
- mutex_lock(&param->lock);
- fparam = param->fault_param;
- if (!fparam || !fparam->handler) {
- ret = -EINVAL;
- goto done_unlock;
- }
-
- if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
- (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
- evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event),
- GFP_KERNEL);
- if (!evt_pending) {
- ret = -ENOMEM;
- goto done_unlock;
- }
- mutex_lock(&fparam->lock);
- list_add_tail(&evt_pending->list, &fparam->faults);
- mutex_unlock(&fparam->lock);
- }
-
- ret = fparam->handler(&evt->fault, fparam->data);
- if (ret && evt_pending) {
- mutex_lock(&fparam->lock);
- list_del(&evt_pending->list);
- mutex_unlock(&fparam->lock);
- kfree(evt_pending);
- }
-done_unlock:
- mutex_unlock(&param->lock);
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_report_device_fault);
-
-int iommu_page_response(struct device *dev,
- struct iommu_page_response *msg)
-{
- bool needs_pasid;
- int ret = -EINVAL;
- struct iommu_fault_event *evt;
- struct iommu_fault_page_request *prm;
- struct dev_iommu *param = dev->iommu;
- const struct iommu_ops *ops = dev_iommu_ops(dev);
- bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
-
- if (!ops->page_response)
- return -ENODEV;
-
- if (!param || !param->fault_param)
- return -EINVAL;
-
- if (msg->version != IOMMU_PAGE_RESP_VERSION_1 ||
- msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID)
- return -EINVAL;
-
- /* Only send response if there is a fault report pending */
- mutex_lock(&param->fault_param->lock);
- if (list_empty(&param->fault_param->faults)) {
- dev_warn_ratelimited(dev, "no pending PRQ, drop response\n");
- goto done_unlock;
- }
- /*
- * Check if we have a matching page request pending to respond,
- * otherwise return -EINVAL
- */
- list_for_each_entry(evt, &param->fault_param->faults, list) {
- prm = &evt->fault.prm;
- if (prm->grpid != msg->grpid)
- continue;
-
- /*
- * If the PASID is required, the corresponding request is
- * matched using the group ID, the PASID valid bit and the PASID
- * value. Otherwise only the group ID matches request and
- * response.
- */
- needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
- if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid))
- continue;
-
- if (!needs_pasid && has_pasid) {
- /* No big deal, just clear it. */
- msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID;
- msg->pasid = 0;
- }
-
- ret = ops->page_response(dev, evt, msg);
- list_del(&evt->list);
- kfree(evt);
- break;
- }
-
-done_unlock:
- mutex_unlock(&param->fault_param->lock);
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_page_response);
-
-/**
* iommu_group_id - Return ID for a group
* @group: the group to ID
*
@@ -1799,7 +1617,7 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
* domain. Do not use in new drivers.
*/
if (ops->default_domain) {
- if (req_type)
+ if (req_type != ops->default_domain->type)
return ERR_PTR(-EINVAL);
return ops->default_domain;
}
@@ -1871,10 +1689,18 @@ static int iommu_get_def_domain_type(struct iommu_group *group,
const struct iommu_ops *ops = dev_iommu_ops(dev);
int type;
- if (!ops->def_domain_type)
- return cur_type;
-
- type = ops->def_domain_type(dev);
+ if (ops->default_domain) {
+ /*
+ * Drivers that declare a global static default_domain will
+ * always choose that.
+ */
+ type = ops->default_domain->type;
+ } else {
+ if (ops->def_domain_type)
+ type = ops->def_domain_type(dev);
+ else
+ return cur_type;
+ }
if (!type || cur_type == type)
return cur_type;
if (!cur_type)
@@ -2978,7 +2804,7 @@ bool iommu_default_passthrough(void)
}
EXPORT_SYMBOL_GPL(iommu_default_passthrough);
-const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
+const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
{
const struct iommu_ops *ops = NULL;
struct iommu_device *iommu;
@@ -3029,7 +2855,7 @@ void iommu_fwspec_free(struct device *dev)
}
EXPORT_SYMBOL_GPL(iommu_fwspec_free);
-int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
+int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
int i, new_num;
@@ -3615,26 +3441,6 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
}
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
-struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
- struct mm_struct *mm)
-{
- const struct iommu_ops *ops = dev_iommu_ops(dev);
- struct iommu_domain *domain;
-
- domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
- if (!domain)
- return NULL;
-
- domain->type = IOMMU_DOMAIN_SVA;
- mmgrab(mm);
- domain->mm = mm;
- domain->owner = ops;
- domain->iopf_handler = iommu_sva_handle_iopf;
- domain->fault_data = mm;
-
- return domain;
-}
-
ioasid_t iommu_alloc_global_pasid(struct device *dev)
{
int ret;
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 3f3f1fa1a0a9..33d142f8057d 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -263,7 +263,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
if (cmd->__reserved)
return -EOPNOTSUPP;
- if (cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len)
+ if ((cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len) ||
+ (cmd->data_type != IOMMU_HWPT_DATA_NONE && !cmd->data_len))
return -EINVAL;
idev = iommufd_get_device(ucmd, cmd->dev_id);
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 504ac1b01b2d..05fd9d3abf1b 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -1330,20 +1330,23 @@ out_unlock:
int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access)
{
+ u32 new_id;
int rc;
down_write(&iopt->domains_rwsem);
down_write(&iopt->iova_rwsem);
- rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access,
- xa_limit_16b, GFP_KERNEL_ACCOUNT);
+ rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b,
+ GFP_KERNEL_ACCOUNT);
+
if (rc)
goto out_unlock;
rc = iopt_calculate_iova_alignment(iopt);
if (rc) {
- xa_erase(&iopt->access_list, access->iopt_access_list_id);
+ xa_erase(&iopt->access_list, new_id);
goto out_unlock;
}
+ access->iopt_access_list_id = new_id;
out_unlock:
up_write(&iopt->iova_rwsem);
diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h
index 482d4059f5db..e854d3f67205 100644
--- a/drivers/iommu/iommufd/iommufd_test.h
+++ b/drivers/iommu/iommufd/iommufd_test.h
@@ -45,6 +45,7 @@ enum {
enum {
MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0,
+ MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1,
};
enum {
diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c
index 0a92c9eeaf7f..db8c46bee155 100644
--- a/drivers/iommu/iommufd/iova_bitmap.c
+++ b/drivers/iommu/iommufd/iova_bitmap.c
@@ -100,7 +100,7 @@ struct iova_bitmap {
struct iova_bitmap_map mapped;
/* userspace address of the bitmap */
- u64 __user *bitmap;
+ u8 __user *bitmap;
/* u64 index that @mapped points to */
unsigned long mapped_base_index;
@@ -113,6 +113,9 @@ struct iova_bitmap {
/* length of the IOVA range for the whole bitmap */
size_t length;
+
+ /* length of the IOVA range set ahead the pinned pages */
+ unsigned long set_ahead_length;
};
/*
@@ -162,7 +165,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap)
{
struct iova_bitmap_map *mapped = &bitmap->mapped;
unsigned long npages;
- u64 __user *addr;
+ u8 __user *addr;
long ret;
/*
@@ -176,17 +179,18 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap)
sizeof(*bitmap->bitmap), PAGE_SIZE);
/*
- * We always cap at max number of 'struct page' a base page can fit.
- * This is, for example, on x86 means 2M of bitmap data max.
- */
- npages = min(npages, PAGE_SIZE / sizeof(struct page *));
-
- /*
* Bitmap address to be pinned is calculated via pointer arithmetic
* with bitmap u64 word index.
*/
addr = bitmap->bitmap + bitmap->mapped_base_index;
+ /*
+ * We always cap at max number of 'struct page' a base page can fit.
+ * This is, for example, on x86 means 2M of bitmap data max.
+ */
+ npages = min(npages + !!offset_in_page(addr),
+ PAGE_SIZE / sizeof(struct page *));
+
ret = pin_user_pages_fast((unsigned long)addr, npages,
FOLL_WRITE, mapped->pages);
if (ret <= 0)
@@ -247,7 +251,7 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
mapped = &bitmap->mapped;
mapped->pgshift = __ffs(page_size);
- bitmap->bitmap = data;
+ bitmap->bitmap = (u8 __user *)data;
bitmap->mapped_total_index =
iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
bitmap->iova = iova;
@@ -304,7 +308,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
remaining = min_t(unsigned long, remaining,
- bytes / sizeof(*bitmap->bitmap));
+ DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap)));
return remaining;
}
@@ -341,6 +345,32 @@ static bool iova_bitmap_done(struct iova_bitmap *bitmap)
return bitmap->mapped_base_index >= bitmap->mapped_total_index;
}
+static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap,
+ size_t set_ahead_length)
+{
+ int ret = 0;
+
+ while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) {
+ unsigned long length = iova_bitmap_mapped_length(bitmap);
+ unsigned long iova = iova_bitmap_mapped_iova(bitmap);
+
+ ret = iova_bitmap_get(bitmap);
+ if (ret)
+ break;
+
+ length = min(length, set_ahead_length);
+ iova_bitmap_set(bitmap, iova, length);
+
+ set_ahead_length -= length;
+ bitmap->mapped_base_index +=
+ iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
+ iova_bitmap_put(bitmap);
+ }
+
+ bitmap->set_ahead_length = 0;
+ return ret;
+}
+
/*
* Advances to the next range, releases the current pinned
* pages and pins the next set of bitmap pages.
@@ -357,6 +387,15 @@ static int iova_bitmap_advance(struct iova_bitmap *bitmap)
if (iova_bitmap_done(bitmap))
return 0;
+ /* Iterate, set and skip any bits requested for next iteration */
+ if (bitmap->set_ahead_length) {
+ int ret;
+
+ ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length);
+ if (ret)
+ return ret;
+ }
+
/* When advancing the index we pin the next set of bitmap pages */
return iova_bitmap_get(bitmap);
}
@@ -409,6 +448,7 @@ void iova_bitmap_set(struct iova_bitmap *bitmap,
mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
unsigned long last_bit = (((iova + length - 1) - mapped->iova) >>
mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
+ unsigned long last_page_idx = mapped->npages - 1;
do {
unsigned int page_idx = cur_bit / BITS_PER_PAGE;
@@ -417,10 +457,18 @@ void iova_bitmap_set(struct iova_bitmap *bitmap,
last_bit - cur_bit + 1);
void *kaddr;
+ if (unlikely(page_idx > last_page_idx))
+ break;
+
kaddr = kmap_local_page(mapped->pages[page_idx]);
bitmap_set(kaddr, offset, nbits);
kunmap_local(kaddr);
cur_bit += nbits;
} while (cur_bit <= last_bit);
+
+ if (unlikely(cur_bit <= last_bit)) {
+ bitmap->set_ahead_length =
+ ((last_bit - cur_bit + 1) << bitmap->mapped.pgshift);
+ }
}
EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD);
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index d9e9920c7eba..7a2199470f31 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -36,11 +36,12 @@ static struct mock_bus_type iommufd_mock_bus_type = {
},
};
-static atomic_t mock_dev_num;
+static DEFINE_IDA(mock_dev_ida);
enum {
MOCK_DIRTY_TRACK = 1,
MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2,
+ MOCK_HUGE_PAGE_SIZE = 512 * MOCK_IO_PAGE_SIZE,
/*
* Like a real page table alignment requires the low bits of the address
@@ -53,6 +54,7 @@ enum {
MOCK_PFN_START_IOVA = _MOCK_PFN_START,
MOCK_PFN_LAST_IOVA = _MOCK_PFN_START,
MOCK_PFN_DIRTY_IOVA = _MOCK_PFN_START << 1,
+ MOCK_PFN_HUGE_IOVA = _MOCK_PFN_START << 2,
};
/*
@@ -61,8 +63,8 @@ enum {
* In syzkaller mode the 64 bit IOVA is converted into an nth area and offset
* value. This has a much smaller randomization space and syzkaller can hit it.
*/
-static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
- u64 *iova)
+static unsigned long __iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
+ u64 *iova)
{
struct syz_layout {
__u32 nth_area;
@@ -86,6 +88,21 @@ static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
return 0;
}
+static unsigned long iommufd_test_syz_conv_iova(struct iommufd_access *access,
+ u64 *iova)
+{
+ unsigned long ret;
+
+ mutex_lock(&access->ioas_lock);
+ if (!access->ioas) {
+ mutex_unlock(&access->ioas_lock);
+ return 0;
+ }
+ ret = __iommufd_test_syz_conv_iova(&access->ioas->iopt, iova);
+ mutex_unlock(&access->ioas_lock);
+ return ret;
+}
+
void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
unsigned int ioas_id, u64 *iova, u32 *flags)
{
@@ -98,7 +115,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
ioas = iommufd_get_ioas(ucmd->ictx, ioas_id);
if (IS_ERR(ioas))
return;
- *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova);
+ *iova = __iommufd_test_syz_conv_iova(&ioas->iopt, iova);
iommufd_put_object(ucmd->ictx, &ioas->obj);
}
@@ -121,6 +138,7 @@ enum selftest_obj_type {
struct mock_dev {
struct device dev;
unsigned long flags;
+ int id;
};
struct selftest_obj {
@@ -191,6 +209,34 @@ static int mock_domain_set_dirty_tracking(struct iommu_domain *domain,
return 0;
}
+static bool mock_test_and_clear_dirty(struct mock_iommu_domain *mock,
+ unsigned long iova, size_t page_size,
+ unsigned long flags)
+{
+ unsigned long cur, end = iova + page_size - 1;
+ bool dirty = false;
+ void *ent, *old;
+
+ for (cur = iova; cur < end; cur += MOCK_IO_PAGE_SIZE) {
+ ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE);
+ if (!ent || !(xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA))
+ continue;
+
+ dirty = true;
+ /* Clear dirty */
+ if (!(flags & IOMMU_DIRTY_NO_CLEAR)) {
+ unsigned long val;
+
+ val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA;
+ old = xa_store(&mock->pfns, cur / MOCK_IO_PAGE_SIZE,
+ xa_mk_value(val), GFP_KERNEL);
+ WARN_ON_ONCE(ent != old);
+ }
+ }
+
+ return dirty;
+}
+
static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain,
unsigned long iova, size_t size,
unsigned long flags,
@@ -198,31 +244,31 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain,
{
struct mock_iommu_domain *mock =
container_of(domain, struct mock_iommu_domain, domain);
- unsigned long i, max = size / MOCK_IO_PAGE_SIZE;
- void *ent, *old;
+ unsigned long end = iova + size;
+ void *ent;
if (!(mock->flags & MOCK_DIRTY_TRACK) && dirty->bitmap)
return -EINVAL;
- for (i = 0; i < max; i++) {
- unsigned long cur = iova + i * MOCK_IO_PAGE_SIZE;
+ do {
+ unsigned long pgsize = MOCK_IO_PAGE_SIZE;
+ unsigned long head;
- ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE);
- if (ent && (xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA)) {
- /* Clear dirty */
- if (!(flags & IOMMU_DIRTY_NO_CLEAR)) {
- unsigned long val;
-
- val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA;
- old = xa_store(&mock->pfns,
- cur / MOCK_IO_PAGE_SIZE,
- xa_mk_value(val), GFP_KERNEL);
- WARN_ON_ONCE(ent != old);
- }
- iommu_dirty_bitmap_record(dirty, cur,
- MOCK_IO_PAGE_SIZE);
+ ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
+ if (!ent) {
+ iova += pgsize;
+ continue;
}
- }
+
+ if (xa_to_value(ent) & MOCK_PFN_HUGE_IOVA)
+ pgsize = MOCK_HUGE_PAGE_SIZE;
+ head = iova & ~(pgsize - 1);
+
+ /* Clear dirty */
+ if (mock_test_and_clear_dirty(mock, head, pgsize, flags))
+ iommu_dirty_bitmap_record(dirty, head, pgsize);
+ iova = head + pgsize;
+ } while (iova < end);
return 0;
}
@@ -234,6 +280,7 @@ const struct iommu_dirty_ops dirty_ops = {
static struct iommu_domain *mock_domain_alloc_paging(struct device *dev)
{
+ struct mock_dev *mdev = container_of(dev, struct mock_dev, dev);
struct mock_iommu_domain *mock;
mock = kzalloc(sizeof(*mock), GFP_KERNEL);
@@ -242,6 +289,8 @@ static struct iommu_domain *mock_domain_alloc_paging(struct device *dev)
mock->domain.geometry.aperture_start = MOCK_APERTURE_START;
mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST;
mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE;
+ if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA)
+ mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE;
mock->domain.ops = mock_ops.default_domain_ops;
mock->domain.type = IOMMU_DOMAIN_UNMANAGED;
xa_init(&mock->pfns);
@@ -287,7 +336,7 @@ mock_domain_alloc_user(struct device *dev, u32 flags,
return ERR_PTR(-EOPNOTSUPP);
if (user_data || (has_dirty_flag && no_dirty_ops))
return ERR_PTR(-EOPNOTSUPP);
- domain = mock_domain_alloc_paging(NULL);
+ domain = mock_domain_alloc_paging(dev);
if (!domain)
return ERR_PTR(-ENOMEM);
if (has_dirty_flag)
@@ -350,6 +399,9 @@ static int mock_domain_map_pages(struct iommu_domain *domain,
if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize)
flags = MOCK_PFN_LAST_IOVA;
+ if (pgsize != MOCK_IO_PAGE_SIZE) {
+ flags |= MOCK_PFN_HUGE_IOVA;
+ }
old = xa_store(&mock->pfns, iova / MOCK_IO_PAGE_SIZE,
xa_mk_value((paddr / MOCK_IO_PAGE_SIZE) |
flags),
@@ -394,20 +446,27 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain,
/*
* iommufd generates unmaps that must be a strict
- * superset of the map's performend So every starting
- * IOVA should have been an iova passed to map, and the
+ * superset of the map's performend So every
+ * starting/ending IOVA should have been an iova passed
+ * to map.
*
- * First IOVA must be present and have been a first IOVA
- * passed to map_pages
+ * This simple logic doesn't work when the HUGE_PAGE is
+ * turned on since the core code will automatically
+ * switch between the two page sizes creating a break in
+ * the unmap calls. The break can land in the middle of
+ * contiguous IOVA.
*/
- if (first) {
- WARN_ON(ent && !(xa_to_value(ent) &
- MOCK_PFN_START_IOVA));
- first = false;
+ if (!(domain->pgsize_bitmap & MOCK_HUGE_PAGE_SIZE)) {
+ if (first) {
+ WARN_ON(ent && !(xa_to_value(ent) &
+ MOCK_PFN_START_IOVA));
+ first = false;
+ }
+ if (pgcount == 1 &&
+ cur + MOCK_IO_PAGE_SIZE == pgsize)
+ WARN_ON(ent && !(xa_to_value(ent) &
+ MOCK_PFN_LAST_IOVA));
}
- if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize)
- WARN_ON(ent && !(xa_to_value(ent) &
- MOCK_PFN_LAST_IOVA));
iova += MOCK_IO_PAGE_SIZE;
ret += MOCK_IO_PAGE_SIZE;
@@ -595,7 +654,7 @@ static void mock_dev_release(struct device *dev)
{
struct mock_dev *mdev = container_of(dev, struct mock_dev, dev);
- atomic_dec(&mock_dev_num);
+ ida_free(&mock_dev_ida, mdev->id);
kfree(mdev);
}
@@ -604,7 +663,8 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags)
struct mock_dev *mdev;
int rc;
- if (dev_flags & ~(MOCK_FLAGS_DEVICE_NO_DIRTY))
+ if (dev_flags &
+ ~(MOCK_FLAGS_DEVICE_NO_DIRTY | MOCK_FLAGS_DEVICE_HUGE_IOVA))
return ERR_PTR(-EINVAL);
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
@@ -616,8 +676,12 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags)
mdev->dev.release = mock_dev_release;
mdev->dev.bus = &iommufd_mock_bus_type.bus;
- rc = dev_set_name(&mdev->dev, "iommufd_mock%u",
- atomic_inc_return(&mock_dev_num));
+ rc = ida_alloc(&mock_dev_ida, GFP_KERNEL);
+ if (rc < 0)
+ goto err_put;
+ mdev->id = rc;
+
+ rc = dev_set_name(&mdev->dev, "iommufd_mock%u", mdev->id);
if (rc)
goto err_put;
@@ -1119,7 +1183,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
}
if (flags & MOCK_FLAGS_ACCESS_SYZ)
- iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
+ iova = iommufd_test_syz_conv_iova(staccess->access,
&cmd->access_pages.iova);
npages = (ALIGN(iova + length, PAGE_SIZE) -
@@ -1221,8 +1285,8 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd,
}
if (flags & MOCK_FLAGS_ACCESS_SYZ)
- iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
- &cmd->access_rw.iova);
+ iova = iommufd_test_syz_conv_iova(staccess->access,
+ &cmd->access_rw.iova);
rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags);
if (rc)
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index d30e453d0fb4..d59d0ea2fd21 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -24,24 +24,8 @@ static bool iova_rcache_insert(struct iova_domain *iovad,
static unsigned long iova_rcache_get(struct iova_domain *iovad,
unsigned long size,
unsigned long limit_pfn);
-static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
static void free_iova_rcaches(struct iova_domain *iovad);
-
-unsigned long iova_rcache_range(void)
-{
- return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1);
-}
-
-static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
-{
- struct iova_domain *iovad;
-
- iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
-
- free_cpu_cached_iovas(cpu, iovad);
- return 0;
-}
-
+static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
static void free_global_cached_iovas(struct iova_domain *iovad);
static struct iova *to_iova(struct rb_node *node)
@@ -252,54 +236,6 @@ static void free_iova_mem(struct iova *iova)
kmem_cache_free(iova_cache, iova);
}
-int iova_cache_get(void)
-{
- mutex_lock(&iova_cache_mutex);
- if (!iova_cache_users) {
- int ret;
-
- ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL,
- iova_cpuhp_dead);
- if (ret) {
- mutex_unlock(&iova_cache_mutex);
- pr_err("Couldn't register cpuhp handler\n");
- return ret;
- }
-
- iova_cache = kmem_cache_create(
- "iommu_iova", sizeof(struct iova), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!iova_cache) {
- cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
- mutex_unlock(&iova_cache_mutex);
- pr_err("Couldn't create iova cache\n");
- return -ENOMEM;
- }
- }
-
- iova_cache_users++;
- mutex_unlock(&iova_cache_mutex);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(iova_cache_get);
-
-void iova_cache_put(void)
-{
- mutex_lock(&iova_cache_mutex);
- if (WARN_ON(!iova_cache_users)) {
- mutex_unlock(&iova_cache_mutex);
- return;
- }
- iova_cache_users--;
- if (!iova_cache_users) {
- cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
- kmem_cache_destroy(iova_cache);
- }
- mutex_unlock(&iova_cache_mutex);
-}
-EXPORT_SYMBOL_GPL(iova_cache_put);
-
/**
* alloc_iova - allocates an iova
* @iovad: - iova domain in question
@@ -654,11 +590,18 @@ struct iova_rcache {
struct delayed_work work;
};
+static struct kmem_cache *iova_magazine_cache;
+
+unsigned long iova_rcache_range(void)
+{
+ return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1);
+}
+
static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
{
struct iova_magazine *mag;
- mag = kmalloc(sizeof(*mag), flags);
+ mag = kmem_cache_alloc(iova_magazine_cache, flags);
if (mag)
mag->size = 0;
@@ -667,7 +610,7 @@ static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
static void iova_magazine_free(struct iova_magazine *mag)
{
- kfree(mag);
+ kmem_cache_free(iova_magazine_cache, mag);
}
static void
@@ -990,5 +933,71 @@ static void free_global_cached_iovas(struct iova_domain *iovad)
spin_unlock_irqrestore(&rcache->lock, flags);
}
}
+
+static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
+{
+ struct iova_domain *iovad;
+
+ iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
+
+ free_cpu_cached_iovas(cpu, iovad);
+ return 0;
+}
+
+int iova_cache_get(void)
+{
+ int err = -ENOMEM;
+
+ mutex_lock(&iova_cache_mutex);
+ if (!iova_cache_users) {
+ iova_cache = kmem_cache_create("iommu_iova", sizeof(struct iova), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!iova_cache)
+ goto out_err;
+
+ iova_magazine_cache = kmem_cache_create("iommu_iova_magazine",
+ sizeof(struct iova_magazine),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!iova_magazine_cache)
+ goto out_err;
+
+ err = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead",
+ NULL, iova_cpuhp_dead);
+ if (err) {
+ pr_err("IOVA: Couldn't register cpuhp handler: %pe\n", ERR_PTR(err));
+ goto out_err;
+ }
+ }
+
+ iova_cache_users++;
+ mutex_unlock(&iova_cache_mutex);
+
+ return 0;
+
+out_err:
+ kmem_cache_destroy(iova_cache);
+ kmem_cache_destroy(iova_magazine_cache);
+ mutex_unlock(&iova_cache_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(iova_cache_get);
+
+void iova_cache_put(void)
+{
+ mutex_lock(&iova_cache_mutex);
+ if (WARN_ON(!iova_cache_users)) {
+ mutex_unlock(&iova_cache_mutex);
+ return;
+ }
+ iova_cache_users--;
+ if (!iova_cache_users) {
+ cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
+ kmem_cache_destroy(iova_cache);
+ kmem_cache_destroy(iova_magazine_cache);
+ }
+ mutex_unlock(&iova_cache_mutex);
+}
+EXPORT_SYMBOL_GPL(iova_cache_put);
+
MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
MODULE_LICENSE("GPL");
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index ace1fc4bd34b..b657cc09605f 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -709,7 +709,7 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
}
static int ipmmu_init_platform_device(struct device *dev,
- struct of_phandle_args *args)
+ const struct of_phandle_args *args)
{
struct platform_device *ipmmu_pdev;
@@ -773,7 +773,7 @@ static bool ipmmu_device_is_allowed(struct device *dev)
}
static int ipmmu_of_xlate(struct device *dev,
- struct of_phandle_args *spec)
+ const struct of_phandle_args *spec)
{
if (!ipmmu_device_is_allowed(dev))
return -ENODEV;
@@ -1005,7 +1005,6 @@ static const struct of_device_id ipmmu_of_ids[] = {
static int ipmmu_probe(struct platform_device *pdev)
{
struct ipmmu_vmsa_device *mmu;
- struct resource *res;
int irq;
int ret;
@@ -1025,8 +1024,7 @@ static int ipmmu_probe(struct platform_device *pdev)
return ret;
/* Map I/O memory and request IRQ. */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- mmu->base = devm_ioremap_resource(&pdev->dev, res);
+ mmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(mmu->base))
return PTR_ERR(mmu->base);
@@ -1123,7 +1121,6 @@ static void ipmmu_remove(struct platform_device *pdev)
ipmmu_device_reset(mmu);
}
-#ifdef CONFIG_PM_SLEEP
static int ipmmu_resume_noirq(struct device *dev)
{
struct ipmmu_vmsa_device *mmu = dev_get_drvdata(dev);
@@ -1153,18 +1150,14 @@ static int ipmmu_resume_noirq(struct device *dev)
}
static const struct dev_pm_ops ipmmu_pm = {
- SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(NULL, ipmmu_resume_noirq)
+ NOIRQ_SYSTEM_SLEEP_PM_OPS(NULL, ipmmu_resume_noirq)
};
-#define DEV_PM_OPS &ipmmu_pm
-#else
-#define DEV_PM_OPS NULL
-#endif /* CONFIG_PM_SLEEP */
static struct platform_driver ipmmu_driver = {
.driver = {
.name = "ipmmu-vmsa",
- .of_match_table = of_match_ptr(ipmmu_of_ids),
- .pm = DEV_PM_OPS,
+ .of_match_table = ipmmu_of_ids,
+ .pm = pm_sleep_ptr(&ipmmu_pm),
},
.probe = ipmmu_probe,
.remove_new = ipmmu_remove,
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 83314b9d8f38..ee59647c2050 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -99,7 +99,8 @@ int __init irq_remapping_prepare(void)
if (disable_irq_remap)
return -ENOSYS;
- if (intel_irq_remap_ops.prepare() == 0)
+ if (IS_ENABLED(CONFIG_INTEL_IOMMU) &&
+ intel_irq_remap_ops.prepare() == 0)
remap_ops = &intel_irq_remap_ops;
else if (IS_ENABLED(CONFIG_AMD_IOMMU) &&
amd_iommu_irq_ops.prepare() == 0)
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index f86af9815d6f..989e0869d805 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -598,7 +598,7 @@ static void print_ctx_regs(void __iomem *base, int ctx)
static int insert_iommu_master(struct device *dev,
struct msm_iommu_dev **iommu,
- struct of_phandle_args *spec)
+ const struct of_phandle_args *spec)
{
struct msm_iommu_ctx_dev *master = dev_iommu_priv_get(dev);
int sid;
@@ -626,7 +626,7 @@ static int insert_iommu_master(struct device *dev,
}
static int qcom_iommu_of_xlate(struct device *dev,
- struct of_phandle_args *spec)
+ const struct of_phandle_args *spec)
{
struct msm_iommu_dev *iommu = NULL, *iter;
unsigned long flags;
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 7abe9e85a570..b8c47f18bc26 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -957,7 +957,8 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev)
return group;
}
-static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
+static int mtk_iommu_of_xlate(struct device *dev,
+ const struct of_phandle_args *args)
{
struct platform_device *m4updev;
@@ -1264,7 +1265,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
data->plat_data = of_device_get_match_data(dev);
/* Protect memory. HW will access here while translation fault.*/
- protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, GFP_KERNEL);
+ protect = devm_kcalloc(dev, 2, MTK_PROTECT_PA_ALIGN, GFP_KERNEL);
if (!protect)
return -ENOMEM;
data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 25b41222abae..a9fa2a54dc9b 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -398,7 +398,8 @@ static const struct iommu_ops mtk_iommu_v1_ops;
* MTK generation one iommu HW only support one iommu domain, and all the client
* sharing the same iova address space.
*/
-static int mtk_iommu_v1_create_mapping(struct device *dev, struct of_phandle_args *args)
+static int mtk_iommu_v1_create_mapping(struct device *dev,
+ const struct of_phandle_args *args)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_v1_data *data;
@@ -621,8 +622,8 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev)
data->dev = dev;
/* Protect memory. HW will access here while translation fault.*/
- protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2,
- GFP_KERNEL | GFP_DMA);
+ protect = devm_kcalloc(dev, 2, MTK_PROTECT_PA_ALIGN,
+ GFP_KERNEL | GFP_DMA);
if (!protect)
return -ENOMEM;
data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 719652b60840..3afe0b48a48d 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -29,7 +29,7 @@ static int of_iommu_xlate(struct device *dev,
!of_device_is_available(iommu_spec->np))
return -ENODEV;
- ret = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops);
+ ret = iommu_fwspec_init(dev, fwnode, ops);
if (ret)
return ret;
/*
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 2685861c0a12..da79d9f4cf63 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1140,7 +1140,7 @@ static void rk_iommu_release_device(struct device *dev)
}
static int rk_iommu_of_xlate(struct device *dev,
- struct of_phandle_args *args)
+ const struct of_phandle_args *args)
{
struct platform_device *iommu_dev;
struct rk_iommudata *data;
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 537359f10997..ba53571a8239 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -390,7 +390,8 @@ static struct iommu_device *sprd_iommu_probe_device(struct device *dev)
return &sdev->iommu;
}
-static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
+static int sprd_iommu_of_xlate(struct device *dev,
+ const struct of_phandle_args *args)
{
struct platform_device *pdev;
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 41484a5a399b..decd52cba998 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -819,7 +819,7 @@ static struct iommu_device *sun50i_iommu_probe_device(struct device *dev)
}
static int sun50i_iommu_of_xlate(struct device *dev,
- struct of_phandle_args *args)
+ const struct of_phandle_args *args)
{
struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
unsigned id = args->args[0];
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 310871728ab4..14e525bd0d9b 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -830,7 +830,7 @@ static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
}
static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev,
- struct of_phandle_args *args)
+ const struct of_phandle_args *args)
{
const struct iommu_ops *ops = smmu->iommu.ops;
int err;
@@ -959,7 +959,7 @@ static struct iommu_group *tegra_smmu_device_group(struct device *dev)
}
static int tegra_smmu_of_xlate(struct device *dev,
- struct of_phandle_args *args)
+ const struct of_phandle_args *args)
{
struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
struct tegra_mc *mc = platform_get_drvdata(iommu_pdev);
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 34db37fd9675..04048f64a2c0 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -1051,7 +1051,8 @@ static struct iommu_group *viommu_device_group(struct device *dev)
return generic_device_group(dev);
}
-static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args)
+static int viommu_of_xlate(struct device *dev,
+ const struct of_phandle_args *args)
{
return iommu_fwspec_add_ids(dev, args->args, 1);
}
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index f7149d0f3d45..72c07a12f5e1 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -546,6 +546,17 @@ config SIFIVE_PLIC
select IRQ_DOMAIN_HIERARCHY
select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
+config STARFIVE_JH8100_INTC
+ bool "StarFive JH8100 External Interrupt Controller"
+ depends on ARCH_STARFIVE || COMPILE_TEST
+ default ARCH_STARFIVE
+ select IRQ_DOMAIN_HIERARCHY
+ help
+ This enables support for the INTC chip found in StarFive JH8100
+ SoC.
+
+ If you don't know what to do here, say Y.
+
config EXYNOS_IRQ_COMBINER
bool "Samsung Exynos IRQ combiner support" if COMPILE_TEST
depends on (ARCH_EXYNOS && ARM) || COMPILE_TEST
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index ffd945fe71aa..ec4a18380998 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -96,6 +96,7 @@ obj-$(CONFIG_CSKY_MPINTC) += irq-csky-mpintc.o
obj-$(CONFIG_CSKY_APB_INTC) += irq-csky-apb-intc.o
obj-$(CONFIG_RISCV_INTC) += irq-riscv-intc.o
obj-$(CONFIG_SIFIVE_PLIC) += irq-sifive-plic.o
+obj-$(CONFIG_STARFIVE_JH8100_INTC) += irq-starfive-jh8100-intc.o
obj-$(CONFIG_IMX_IRQSTEER) += irq-imx-irqsteer.o
obj-$(CONFIG_IMX_INTMUX) += irq-imx-intmux.o
obj-$(CONFIG_IMX_MU_MSI) += irq-imx-mu-msi.o
diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c
index 9745a119d0e6..eb02d203c963 100644
--- a/drivers/irqchip/irq-bcm6345-l1.c
+++ b/drivers/irqchip/irq-bcm6345-l1.c
@@ -242,7 +242,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
else if (intc->n_words != n_words)
return -EINVAL;
- cpu = intc->cpus[idx] = kzalloc(sizeof(*cpu) + n_words * sizeof(u32),
+ cpu = intc->cpus[idx] = kzalloc(struct_size(cpu, enable_cache, n_words),
GFP_KERNEL);
if (!cpu)
return -ENOMEM;
diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c
index 24ca1d656adc..36e71af054e9 100644
--- a/drivers/irqchip/irq-bcm7038-l1.c
+++ b/drivers/irqchip/irq-bcm7038-l1.c
@@ -249,7 +249,7 @@ static int __init bcm7038_l1_init_one(struct device_node *dn,
return -EINVAL;
}
- cpu = intc->cpus[idx] = kzalloc(sizeof(*cpu) + n_words * sizeof(u32),
+ cpu = intc->cpus[idx] = kzalloc(struct_size(cpu, mask_cache, n_words),
GFP_KERNEL);
if (!cpu)
return -ENOMEM;
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index 5559c943f03f..2b0b3175cea0 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -2,7 +2,7 @@
/*
* Generic Broadcom Set Top Box Level 2 Interrupt controller driver
*
- * Copyright (C) 2014-2017 Broadcom
+ * Copyright (C) 2014-2024 Broadcom
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -112,6 +112,9 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc)
generic_handle_domain_irq(b->domain, irq);
} while (status);
out:
+ /* Don't ack parent before all device writes are done */
+ wmb();
+
chained_irq_exit(chip, desc);
}
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d097001c1e3e..fca888b36680 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -207,6 +207,11 @@ static bool require_its_list_vmovp(struct its_vm *vm, struct its_node *its)
return (gic_rdists->has_rvpeid || vm->vlpi_count[its->list_nr]);
}
+static bool rdists_support_shareable(void)
+{
+ return !(gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE);
+}
+
static u16 get_its_list(struct its_vm *vm)
{
struct its_node *its;
@@ -2710,10 +2715,12 @@ static u64 inherit_vpe_l1_table_from_its(void)
break;
}
val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, addr >> 12);
- val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK,
- FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser));
- val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK,
- FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser));
+ if (rdists_support_shareable()) {
+ val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK,
+ FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser));
+ val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK,
+ FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser));
+ }
val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1);
return val;
@@ -2936,8 +2943,10 @@ static int allocate_vpe_l1_table(void)
WARN_ON(!IS_ALIGNED(pa, psz));
val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, pa >> 12);
- val |= GICR_VPROPBASER_RaWb;
- val |= GICR_VPROPBASER_InnerShareable;
+ if (rdists_support_shareable()) {
+ val |= GICR_VPROPBASER_RaWb;
+ val |= GICR_VPROPBASER_InnerShareable;
+ }
val |= GICR_VPROPBASER_4_1_Z;
val |= GICR_VPROPBASER_4_1_VALID;
@@ -3126,7 +3135,7 @@ static void its_cpu_init_lpis(void)
gicr_write_propbaser(val, rbase + GICR_PROPBASER);
tmp = gicr_read_propbaser(rbase + GICR_PROPBASER);
- if (gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE)
+ if (!rdists_support_shareable())
tmp &= ~GICR_PROPBASER_SHAREABILITY_MASK;
if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
@@ -3153,7 +3162,7 @@ static void its_cpu_init_lpis(void)
gicr_write_pendbaser(val, rbase + GICR_PENDBASER);
tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER);
- if (gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE)
+ if (!rdists_support_shareable())
tmp &= ~GICR_PENDBASER_SHAREABILITY_MASK;
if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) {
@@ -3172,6 +3181,7 @@ static void its_cpu_init_lpis(void)
val |= GICR_CTLR_ENABLE_LPIS;
writel_relaxed(val, rbase + GICR_CTLR);
+out:
if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) {
void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
@@ -3207,7 +3217,6 @@ static void its_cpu_init_lpis(void)
/* Make sure the GIC has seen the above */
dsb(sy);
-out:
gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED;
pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n",
smp_processor_id(),
@@ -3817,8 +3826,9 @@ static int its_vpe_set_affinity(struct irq_data *d,
bool force)
{
struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
- int from, cpu = cpumask_first(mask_val);
+ struct cpumask common, *table_mask;
unsigned long flags;
+ int from, cpu;
/*
* Changing affinity is mega expensive, so let's be as lazy as
@@ -3834,19 +3844,22 @@ static int its_vpe_set_affinity(struct irq_data *d,
* taken on any vLPI handling path that evaluates vpe->col_idx.
*/
from = vpe_to_cpuid_lock(vpe, &flags);
- if (from == cpu)
- goto out;
-
- vpe->col_idx = cpu;
+ table_mask = gic_data_rdist_cpu(from)->vpe_table_mask;
/*
- * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD
- * is sharing its VPE table with the current one.
+ * If we are offered another CPU in the same GICv4.1 ITS
+ * affinity, pick this one. Otherwise, any CPU will do.
*/
- if (gic_data_rdist_cpu(cpu)->vpe_table_mask &&
- cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask))
+ if (table_mask && cpumask_and(&common, mask_val, table_mask))
+ cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common);
+ else
+ cpu = cpumask_first(mask_val);
+
+ if (from == cpu)
goto out;
+ vpe->col_idx = cpu;
+
its_send_vmovp(vpe);
its_vpe_db_proxy_move(vpe, from, cpu);
@@ -3880,14 +3893,18 @@ static void its_vpe_schedule(struct its_vpe *vpe)
val = virt_to_phys(page_address(vpe->its_vm->vprop_page)) &
GENMASK_ULL(51, 12);
val |= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK;
- val |= GICR_VPROPBASER_RaWb;
- val |= GICR_VPROPBASER_InnerShareable;
+ if (rdists_support_shareable()) {
+ val |= GICR_VPROPBASER_RaWb;
+ val |= GICR_VPROPBASER_InnerShareable;
+ }
gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
val = virt_to_phys(page_address(vpe->vpt_page)) &
GENMASK_ULL(51, 16);
- val |= GICR_VPENDBASER_RaWaWb;
- val |= GICR_VPENDBASER_InnerShareable;
+ if (rdists_support_shareable()) {
+ val |= GICR_VPENDBASER_RaWaWb;
+ val |= GICR_VPENDBASER_InnerShareable;
+ }
/*
* There is no good way of finding out if the pending table is
* empty as we can race against the doorbell interrupt very
@@ -4419,12 +4436,12 @@ static const struct irq_domain_ops its_sgi_domain_ops = {
static int its_vpe_id_alloc(void)
{
- return ida_simple_get(&its_vpeid_ida, 0, ITS_MAX_VPEID, GFP_KERNEL);
+ return ida_alloc_max(&its_vpeid_ida, ITS_MAX_VPEID - 1, GFP_KERNEL);
}
static void its_vpe_id_free(u16 id)
{
- ida_simple_remove(&its_vpeid_ida, id);
+ ida_free(&its_vpeid_ida, id);
}
static int its_vpe_init(struct its_vpe *vpe)
@@ -5078,6 +5095,8 @@ static int __init its_probe_one(struct its_node *its)
u32 ctlr;
int err;
+ its_enable_quirks(its);
+
if (is_v4(its)) {
if (!(its->typer & GITS_TYPER_VMOVP)) {
err = its_compute_its_list_map(its);
@@ -5429,7 +5448,6 @@ static int __init its_of_probe(struct device_node *node)
if (!its)
return -ENOMEM;
- its_enable_quirks(its);
err = its_probe_one(its);
if (err) {
its_node_destroy(its);
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 98b0329b7154..6fb276504bcc 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -19,6 +19,7 @@
#include <linux/percpu.h>
#include <linux/refcount.h>
#include <linux/slab.h>
+#include <linux/iopoll.h>
#include <linux/irqchip.h>
#include <linux/irqchip/arm-gic-common.h>
@@ -180,11 +181,6 @@ static enum gic_intid_range get_intid_range(struct irq_data *d)
return __get_intid_range(d->hwirq);
}
-static inline unsigned int gic_irq(struct irq_data *d)
-{
- return d->hwirq;
-}
-
static inline bool gic_irq_in_rdist(struct irq_data *d)
{
switch (get_intid_range(d)) {
@@ -251,17 +247,13 @@ static inline void __iomem *gic_dist_base(struct irq_data *d)
static void gic_do_wait_for_rwp(void __iomem *base, u32 bit)
{
- u32 count = 1000000; /* 1s! */
+ u32 val;
+ int ret;
- while (readl_relaxed(base + GICD_CTLR) & bit) {
- count--;
- if (!count) {
- pr_err_ratelimited("RWP timeout, gone fishing\n");
- return;
- }
- cpu_relax();
- udelay(1);
- }
+ ret = readl_relaxed_poll_timeout_atomic(base + GICD_CTLR, val, !(val & bit),
+ 1, USEC_PER_SEC);
+ if (ret == -ETIMEDOUT)
+ pr_err_ratelimited("RWP timeout, gone fishing\n");
}
/* Wait for completion of a distributor change */
@@ -279,8 +271,8 @@ static void gic_redist_wait_for_rwp(void)
static void gic_enable_redist(bool enable)
{
void __iomem *rbase;
- u32 count = 1000000; /* 1s! */
u32 val;
+ int ret;
if (gic_data.flags & FLAGS_WORKAROUND_GICR_WAKER_MSM8996)
return;
@@ -301,16 +293,13 @@ static void gic_enable_redist(bool enable)
return; /* No PM support in this redistributor */
}
- while (--count) {
- val = readl_relaxed(rbase + GICR_WAKER);
- if (enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep))
- break;
- cpu_relax();
- udelay(1);
- }
- if (!count)
+ ret = readl_relaxed_poll_timeout_atomic(rbase + GICR_WAKER, val,
+ enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep),
+ 1, USEC_PER_SEC);
+ if (ret == -ETIMEDOUT) {
pr_err_ratelimited("redistributor failed to %s...\n",
enable ? "wakeup" : "sleep");
+ }
}
/*
@@ -548,7 +537,7 @@ static int gic_irq_nmi_setup(struct irq_data *d)
* A secondary irq_chip should be in charge of LPI request,
* it should not be possible to get there
*/
- if (WARN_ON(gic_irq(d) >= 8192))
+ if (WARN_ON(irqd_to_hwirq(d) >= 8192))
return -EINVAL;
/* desc lock should already be held */
@@ -588,7 +577,7 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
* A secondary irq_chip should be in charge of LPI request,
* it should not be possible to get there
*/
- if (WARN_ON(gic_irq(d) >= 8192))
+ if (WARN_ON(irqd_to_hwirq(d) >= 8192))
return;
/* desc lock should already be held */
@@ -626,7 +615,7 @@ static bool gic_arm64_erratum_2941627_needed(struct irq_data *d)
static void gic_eoi_irq(struct irq_data *d)
{
- write_gicreg(gic_irq(d), ICC_EOIR1_EL1);
+ write_gicreg(irqd_to_hwirq(d), ICC_EOIR1_EL1);
isb();
if (gic_arm64_erratum_2941627_needed(d)) {
@@ -646,19 +635,19 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d)
* No need to deactivate an LPI, or an interrupt that
* is is getting forwarded to a vcpu.
*/
- if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d))
+ if (irqd_to_hwirq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d))
return;
if (!gic_arm64_erratum_2941627_needed(d))
- gic_write_dir(gic_irq(d));
+ gic_write_dir(irqd_to_hwirq(d));
else
gic_poke_irq(d, GICD_ICACTIVER);
}
static int gic_set_type(struct irq_data *d, unsigned int type)
{
+ irq_hw_number_t irq = irqd_to_hwirq(d);
enum gic_intid_range range;
- unsigned int irq = gic_irq(d);
void __iomem *base;
u32 offset, index;
int ret;
@@ -684,7 +673,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
ret = gic_configure_irq(index, type, base + offset, NULL);
if (ret && (range == PPI_RANGE || range == EPPI_RANGE)) {
/* Misconfigured PPIs are usually not fatal */
- pr_warn("GIC: PPI INTID%d is secure or misconfigured\n", irq);
+ pr_warn("GIC: PPI INTID%ld is secure or misconfigured\n", irq);
ret = 0;
}
@@ -1702,9 +1691,13 @@ static int gic_irq_domain_select(struct irq_domain *d,
irq_hw_number_t hwirq;
/* Not for us */
- if (fwspec->fwnode != d->fwnode)
+ if (fwspec->fwnode != d->fwnode)
return 0;
+ /* Handle pure domain searches */
+ if (!fwspec->param_count)
+ return d->bus_token == bus_token;
+
/* If this is not DT, then we have a single domain */
if (!is_of_node(fwspec->fwnode))
return 1;
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 412196a7dad5..98aa383e39db 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -162,11 +162,6 @@ static inline void __iomem *gic_cpu_base(struct irq_data *d)
return gic_data_cpu_base(gic_data);
}
-static inline unsigned int gic_irq(struct irq_data *d)
-{
- return d->hwirq;
-}
-
static inline bool cascading_gic_irq(struct irq_data *d)
{
void *data = irq_data_get_irq_handler_data(d);
@@ -183,14 +178,16 @@ static inline bool cascading_gic_irq(struct irq_data *d)
*/
static void gic_poke_irq(struct irq_data *d, u32 offset)
{
- u32 mask = 1 << (gic_irq(d) % 32);
- writel_relaxed(mask, gic_dist_base(d) + offset + (gic_irq(d) / 32) * 4);
+ u32 mask = 1 << (irqd_to_hwirq(d) % 32);
+
+ writel_relaxed(mask, gic_dist_base(d) + offset + (irqd_to_hwirq(d) / 32) * 4);
}
static int gic_peek_irq(struct irq_data *d, u32 offset)
{
- u32 mask = 1 << (gic_irq(d) % 32);
- return !!(readl_relaxed(gic_dist_base(d) + offset + (gic_irq(d) / 32) * 4) & mask);
+ u32 mask = 1 << (irqd_to_hwirq(d) % 32);
+
+ return !!(readl_relaxed(gic_dist_base(d) + offset + (irqd_to_hwirq(d) / 32) * 4) & mask);
}
static void gic_mask_irq(struct irq_data *d)
@@ -220,7 +217,7 @@ static void gic_unmask_irq(struct irq_data *d)
static void gic_eoi_irq(struct irq_data *d)
{
- u32 hwirq = gic_irq(d);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
if (hwirq < 16)
hwirq = this_cpu_read(sgi_intid);
@@ -230,7 +227,7 @@ static void gic_eoi_irq(struct irq_data *d)
static void gic_eoimode1_eoi_irq(struct irq_data *d)
{
- u32 hwirq = gic_irq(d);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
/* Do not deactivate an IRQ forwarded to a vcpu. */
if (irqd_is_forwarded_to_vcpu(d))
@@ -293,8 +290,8 @@ static int gic_irq_get_irqchip_state(struct irq_data *d,
static int gic_set_type(struct irq_data *d, unsigned int type)
{
+ irq_hw_number_t gicirq = irqd_to_hwirq(d);
void __iomem *base = gic_dist_base(d);
- unsigned int gicirq = gic_irq(d);
int ret;
/* Interrupt configuration for SGIs can't be changed */
@@ -309,7 +306,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
ret = gic_configure_irq(gicirq, type, base + GIC_DIST_CONFIG, NULL);
if (ret && gicirq < 32) {
/* Misconfigured PPIs are usually not fatal */
- pr_warn("GIC: PPI%d is secure or misconfigured\n", gicirq - 16);
+ pr_warn("GIC: PPI%ld is secure or misconfigured\n", gicirq - 16);
ret = 0;
}
@@ -319,7 +316,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
{
/* Only interrupts on the primary GIC can be forwarded to a vcpu. */
- if (cascading_gic_irq(d) || gic_irq(d) < 16)
+ if (cascading_gic_irq(d) || irqd_to_hwirq(d) < 16)
return -EINVAL;
if (vcpu)
@@ -796,7 +793,7 @@ static void rmw_writeb(u8 bval, void __iomem *addr)
static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
bool force)
{
- void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + gic_irq(d);
+ void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + irqd_to_hwirq(d);
struct gic_chip_data *gic = irq_data_get_irq_chip_data(d);
unsigned int cpu;
diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c
index 5831be454673..b42ed68acfa6 100644
--- a/drivers/irqchip/irq-imgpdc.c
+++ b/drivers/irqchip/irq-imgpdc.c
@@ -461,12 +461,11 @@ err_generic:
return ret;
}
-static int pdc_intc_remove(struct platform_device *pdev)
+static void pdc_intc_remove(struct platform_device *pdev)
{
struct pdc_intc_priv *priv = platform_get_drvdata(pdev);
irq_domain_remove(priv->domain);
- return 0;
}
static const struct of_device_id pdc_intc_match[] = {
@@ -479,8 +478,8 @@ static struct platform_driver pdc_intc_driver = {
.name = "pdc-intc",
.of_match_table = pdc_intc_match,
},
- .probe = pdc_intc_probe,
- .remove = pdc_intc_remove,
+ .probe = pdc_intc_probe,
+ .remove_new = pdc_intc_remove,
};
static int __init pdc_intc_init(void)
diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c
index aa041e4dfee0..511adfaeec82 100644
--- a/drivers/irqchip/irq-imx-intmux.c
+++ b/drivers/irqchip/irq-imx-intmux.c
@@ -166,6 +166,10 @@ static int imx_intmux_irq_select(struct irq_domain *d, struct irq_fwspec *fwspec
if (fwspec->fwnode != d->fwnode)
return false;
+ /* Handle pure domain searches */
+ if (!fwspec->param_count)
+ return d->bus_token == bus_token;
+
return irqchip_data->chanidx == fwspec->param[1];
}
@@ -282,7 +286,7 @@ out:
return ret;
}
-static int imx_intmux_remove(struct platform_device *pdev)
+static void imx_intmux_remove(struct platform_device *pdev)
{
struct intmux_data *data = platform_get_drvdata(pdev);
int i;
@@ -298,8 +302,6 @@ static int imx_intmux_remove(struct platform_device *pdev)
}
pm_runtime_disable(&pdev->dev);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -354,11 +356,11 @@ static const struct of_device_id imx_intmux_id[] = {
static struct platform_driver imx_intmux_driver = {
.driver = {
- .name = "imx-intmux",
- .of_match_table = imx_intmux_id,
- .pm = &imx_intmux_pm_ops,
+ .name = "imx-intmux",
+ .of_match_table = imx_intmux_id,
+ .pm = &imx_intmux_pm_ops,
},
- .probe = imx_intmux_probe,
- .remove = imx_intmux_remove,
+ .probe = imx_intmux_probe,
+ .remove_new = imx_intmux_remove,
};
builtin_platform_driver(imx_intmux_driver);
diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c
index bd9543314539..20cf7a9e9ece 100644
--- a/drivers/irqchip/irq-imx-irqsteer.c
+++ b/drivers/irqchip/irq-imx-irqsteer.c
@@ -231,7 +231,7 @@ out:
return ret;
}
-static int imx_irqsteer_remove(struct platform_device *pdev)
+static void imx_irqsteer_remove(struct platform_device *pdev)
{
struct irqsteer_data *irqsteer_data = platform_get_drvdata(pdev);
int i;
@@ -243,8 +243,6 @@ static int imx_irqsteer_remove(struct platform_device *pdev)
irq_domain_remove(irqsteer_data->domain);
clk_disable_unprepare(irqsteer_data->ipg_clk);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -307,11 +305,11 @@ static const struct of_device_id imx_irqsteer_dt_ids[] = {
static struct platform_driver imx_irqsteer_driver = {
.driver = {
- .name = "imx-irqsteer",
- .of_match_table = imx_irqsteer_dt_ids,
- .pm = &imx_irqsteer_pm_ops,
+ .name = "imx-irqsteer",
+ .of_match_table = imx_irqsteer_dt_ids,
+ .pm = &imx_irqsteer_pm_ops,
},
- .probe = imx_irqsteer_probe,
- .remove = imx_irqsteer_remove,
+ .probe = imx_irqsteer_probe,
+ .remove_new = imx_irqsteer_remove,
};
builtin_platform_driver(imx_irqsteer_driver);
diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c
index a36396db4b08..30f1979fa124 100644
--- a/drivers/irqchip/irq-keystone.c
+++ b/drivers/irqchip/irq-keystone.c
@@ -190,7 +190,7 @@ static int keystone_irq_probe(struct platform_device *pdev)
return 0;
}
-static int keystone_irq_remove(struct platform_device *pdev)
+static void keystone_irq_remove(struct platform_device *pdev)
{
struct keystone_irq_device *kirq = platform_get_drvdata(pdev);
int hwirq;
@@ -201,7 +201,6 @@ static int keystone_irq_remove(struct platform_device *pdev)
irq_dispose_mapping(irq_find_mapping(kirq->irqd, hwirq));
irq_domain_remove(kirq->irqd);
- return 0;
}
static const struct of_device_id keystone_irq_dt_ids[] = {
@@ -212,7 +211,7 @@ MODULE_DEVICE_TABLE(of, keystone_irq_dt_ids);
static struct platform_driver keystone_irq_device_driver = {
.probe = keystone_irq_probe,
- .remove = keystone_irq_remove,
+ .remove_new = keystone_irq_remove,
.driver = {
.name = "keystone_irq",
.of_match_table = of_match_ptr(keystone_irq_dt_ids),
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index 1623cd779175..b64cbe3052e8 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -198,6 +198,12 @@ static void eiointc_irq_dispatch(struct irq_desc *desc)
for (i = 0; i < eiointc_priv[0]->vec_count / VEC_COUNT_PER_REG; i++) {
pending = iocsr_read64(EIOINTC_REG_ISR + (i << 3));
+
+ /* Skip handling if pending bitmap is zero */
+ if (!pending)
+ continue;
+
+ /* Clear the IRQs */
iocsr_write64(pending, EIOINTC_REG_ISR + (i << 3));
while (pending) {
int bit = __ffs(pending);
@@ -241,7 +247,7 @@ static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq,
int ret;
unsigned int i, type;
unsigned long hwirq = 0;
- struct eiointc *priv = domain->host_data;
+ struct eiointc_priv *priv = domain->host_data;
ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type);
if (ret)
@@ -304,23 +310,7 @@ static int eiointc_suspend(void)
static void eiointc_resume(void)
{
- int i, j;
- struct irq_desc *desc;
- struct irq_data *irq_data;
-
eiointc_router_init(0);
-
- for (i = 0; i < nr_pics; i++) {
- for (j = 0; j < eiointc_priv[0]->vec_count; j++) {
- desc = irq_resolve_mapping(eiointc_priv[i]->eiointc_domain, j);
- if (desc && desc->handle_irq && desc->handle_irq != handle_bad_irq) {
- raw_spin_lock(&desc->lock);
- irq_data = irq_domain_get_irq_data(eiointc_priv[i]->eiointc_domain, irq_desc_get_irq(desc));
- eiointc_set_irq_affinity(irq_data, irq_data->common->affinity, 0);
- raw_spin_unlock(&desc->lock);
- }
- }
- }
}
static struct syscore_ops eiointc_syscore_ops = {
diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c
index 15cf80b46322..1aef5c4d27c6 100644
--- a/drivers/irqchip/irq-ls-scfg-msi.c
+++ b/drivers/irqchip/irq-ls-scfg-msi.c
@@ -398,7 +398,7 @@ static int ls_scfg_msi_probe(struct platform_device *pdev)
return 0;
}
-static int ls_scfg_msi_remove(struct platform_device *pdev)
+static void ls_scfg_msi_remove(struct platform_device *pdev)
{
struct ls_scfg_msi *msi_data = platform_get_drvdata(pdev);
int i;
@@ -410,17 +410,15 @@ static int ls_scfg_msi_remove(struct platform_device *pdev)
irq_domain_remove(msi_data->parent);
platform_set_drvdata(pdev, NULL);
-
- return 0;
}
static struct platform_driver ls_scfg_msi_driver = {
.driver = {
- .name = "ls-scfg-msi",
- .of_match_table = ls_scfg_msi_id,
+ .name = "ls-scfg-msi",
+ .of_match_table = ls_scfg_msi_id,
},
- .probe = ls_scfg_msi_probe,
- .remove = ls_scfg_msi_remove,
+ .probe = ls_scfg_msi_probe,
+ .remove_new = ls_scfg_msi_remove,
};
module_platform_driver(ls_scfg_msi_driver);
diff --git a/drivers/irqchip/irq-madera.c b/drivers/irqchip/irq-madera.c
index 3eb1f8cdf674..acceb6e7fa95 100644
--- a/drivers/irqchip/irq-madera.c
+++ b/drivers/irqchip/irq-madera.c
@@ -222,7 +222,7 @@ static int madera_irq_probe(struct platform_device *pdev)
return 0;
}
-static int madera_irq_remove(struct platform_device *pdev)
+static void madera_irq_remove(struct platform_device *pdev)
{
struct madera *madera = dev_get_drvdata(pdev->dev.parent);
@@ -232,13 +232,11 @@ static int madera_irq_remove(struct platform_device *pdev)
*/
madera->irq_dev = NULL;
regmap_del_irq_chip(madera->irq, madera->irq_data);
-
- return 0;
}
static struct platform_driver madera_irq_driver = {
- .probe = &madera_irq_probe,
- .remove = &madera_irq_remove,
+ .probe = madera_irq_probe,
+ .remove_new = madera_irq_remove,
.driver = {
.name = "madera-irq",
.pm = &madera_irq_pm_ops,
diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 5101a3fb11df..58881d313979 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -235,22 +235,17 @@ static const struct irq_domain_ops mbigen_domain_ops = {
static int mbigen_of_create_domain(struct platform_device *pdev,
struct mbigen_device *mgn_chip)
{
- struct device *parent;
struct platform_device *child;
struct irq_domain *domain;
struct device_node *np;
u32 num_pins;
int ret = 0;
- parent = bus_get_dev_root(&platform_bus_type);
- if (!parent)
- return -ENODEV;
-
for_each_child_of_node(pdev->dev.of_node, np) {
if (!of_property_read_bool(np, "interrupt-controller"))
continue;
- child = of_platform_device_create(np, NULL, parent);
+ child = of_platform_device_create(np, NULL, NULL);
if (!child) {
ret = -ENOMEM;
break;
@@ -273,7 +268,6 @@ static int mbigen_of_create_domain(struct platform_device *pdev,
}
}
- put_device(parent);
if (ret)
of_node_put(np);
diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index f88df39f4129..9a1791908598 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -154,6 +154,10 @@ static const struct meson_gpio_irq_params c3_params = {
INIT_MESON_S4_COMMON_DATA(55)
};
+static const struct meson_gpio_irq_params t7_params = {
+ INIT_MESON_S4_COMMON_DATA(157)
+};
+
static const struct of_device_id meson_irq_gpio_matches[] __maybe_unused = {
{ .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params },
{ .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params },
@@ -165,6 +169,7 @@ static const struct of_device_id meson_irq_gpio_matches[] __maybe_unused = {
{ .compatible = "amlogic,meson-a1-gpio-intc", .data = &a1_params },
{ .compatible = "amlogic,meson-s4-gpio-intc", .data = &s4_params },
{ .compatible = "amlogic,c3-gpio-intc", .data = &c3_params },
+ { .compatible = "amlogic,t7-gpio-intc", .data = &t7_params },
{ }
};
diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c
index ef3d3646ccc2..d17d9c0e2880 100644
--- a/drivers/irqchip/irq-mvebu-pic.c
+++ b/drivers/irqchip/irq-mvebu-pic.c
@@ -167,14 +167,12 @@ static int mvebu_pic_probe(struct platform_device *pdev)
return 0;
}
-static int mvebu_pic_remove(struct platform_device *pdev)
+static void mvebu_pic_remove(struct platform_device *pdev)
{
struct mvebu_pic *pic = platform_get_drvdata(pdev);
on_each_cpu(mvebu_pic_disable_percpu_irq, pic, 1);
irq_domain_remove(pic->domain);
-
- return 0;
}
static const struct of_device_id mvebu_pic_of_match[] = {
@@ -184,11 +182,11 @@ static const struct of_device_id mvebu_pic_of_match[] = {
MODULE_DEVICE_TABLE(of, mvebu_pic_of_match);
static struct platform_driver mvebu_pic_driver = {
- .probe = mvebu_pic_probe,
- .remove = mvebu_pic_remove,
+ .probe = mvebu_pic_probe,
+ .remove_new = mvebu_pic_remove,
.driver = {
- .name = "mvebu-pic",
- .of_match_table = mvebu_pic_of_match,
+ .name = "mvebu-pic",
+ .of_match_table = mvebu_pic_of_match,
},
};
module_platform_driver(mvebu_pic_driver);
diff --git a/drivers/irqchip/irq-pruss-intc.c b/drivers/irqchip/irq-pruss-intc.c
index 0f64ecb9b1f4..060eb000e9d3 100644
--- a/drivers/irqchip/irq-pruss-intc.c
+++ b/drivers/irqchip/irq-pruss-intc.c
@@ -599,7 +599,7 @@ fail_irq:
return ret;
}
-static int pruss_intc_remove(struct platform_device *pdev)
+static void pruss_intc_remove(struct platform_device *pdev)
{
struct pruss_intc *intc = platform_get_drvdata(pdev);
u8 max_system_events = intc->soc_config->num_system_events;
@@ -616,8 +616,6 @@ static int pruss_intc_remove(struct platform_device *pdev)
irq_dispose_mapping(irq_find_mapping(intc->domain, hwirq));
irq_domain_remove(intc->domain);
-
- return 0;
}
static const struct pruss_intc_match_data pruss_intc_data = {
@@ -645,12 +643,12 @@ MODULE_DEVICE_TABLE(of, pruss_intc_of_match);
static struct platform_driver pruss_intc_driver = {
.driver = {
- .name = "pruss-intc",
- .of_match_table = pruss_intc_of_match,
- .suppress_bind_attrs = true,
+ .name = "pruss-intc",
+ .of_match_table = pruss_intc_of_match,
+ .suppress_bind_attrs = true,
},
- .probe = pruss_intc_probe,
- .remove = pruss_intc_remove,
+ .probe = pruss_intc_probe,
+ .remove_new = pruss_intc_remove,
};
module_platform_driver(pruss_intc_driver);
diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c
index cda5838d2232..7942d8eb3d00 100644
--- a/drivers/irqchip/irq-qcom-mpm.c
+++ b/drivers/irqchip/irq-qcom-mpm.c
@@ -389,8 +389,8 @@ static int qcom_mpm_init(struct device_node *np, struct device_node *parent)
/* Don't use devm_ioremap_resource, as we're accessing a shared region. */
priv->base = devm_ioremap(dev, res.start, resource_size(&res));
of_node_put(msgram_np);
- if (IS_ERR(priv->base))
- return PTR_ERR(priv->base);
+ if (!priv->base)
+ return -ENOMEM;
} else {
/* Otherwise, fall back to simple MMIO. */
priv->base = devm_platform_ioremap_resource(pdev, 0);
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index fa19585f3dee..9ad37237ba95 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -561,14 +561,13 @@ err0:
return ret;
}
-static int intc_irqpin_remove(struct platform_device *pdev)
+static void intc_irqpin_remove(struct platform_device *pdev)
{
struct intc_irqpin_priv *p = platform_get_drvdata(pdev);
irq_domain_remove(p->irq_domain);
pm_runtime_put(&pdev->dev);
pm_runtime_disable(&pdev->dev);
- return 0;
}
static int __maybe_unused intc_irqpin_suspend(struct device *dev)
@@ -585,11 +584,11 @@ static SIMPLE_DEV_PM_OPS(intc_irqpin_pm_ops, intc_irqpin_suspend, NULL);
static struct platform_driver intc_irqpin_device_driver = {
.probe = intc_irqpin_probe,
- .remove = intc_irqpin_remove,
+ .remove_new = intc_irqpin_remove,
.driver = {
- .name = "renesas_intc_irqpin",
- .of_match_table = intc_irqpin_dt_ids,
- .pm = &intc_irqpin_pm_ops,
+ .name = "renesas_intc_irqpin",
+ .of_match_table = intc_irqpin_dt_ids,
+ .pm = &intc_irqpin_pm_ops,
}
};
diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index 49b446b396f9..76026e0b8e20 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -218,14 +218,13 @@ err_runtime_pm_disable:
return ret;
}
-static int irqc_remove(struct platform_device *pdev)
+static void irqc_remove(struct platform_device *pdev)
{
struct irqc_priv *p = platform_get_drvdata(pdev);
irq_domain_remove(p->irq_domain);
pm_runtime_put(&pdev->dev);
pm_runtime_disable(&pdev->dev);
- return 0;
}
static int __maybe_unused irqc_suspend(struct device *dev)
@@ -248,11 +247,11 @@ MODULE_DEVICE_TABLE(of, irqc_dt_ids);
static struct platform_driver irqc_device_driver = {
.probe = irqc_probe,
- .remove = irqc_remove,
+ .remove_new = irqc_remove,
.driver = {
- .name = "renesas_irqc",
+ .name = "renesas_irqc",
.of_match_table = irqc_dt_ids,
- .pm = &irqc_pm_ops,
+ .pm = &irqc_pm_ops,
}
};
diff --git a/drivers/irqchip/irq-renesas-rza1.c b/drivers/irqchip/irq-renesas-rza1.c
index e4c99c2e0373..f05afe82db4d 100644
--- a/drivers/irqchip/irq-renesas-rza1.c
+++ b/drivers/irqchip/irq-renesas-rza1.c
@@ -244,12 +244,11 @@ out_put_node:
return ret;
}
-static int rza1_irqc_remove(struct platform_device *pdev)
+static void rza1_irqc_remove(struct platform_device *pdev)
{
struct rza1_irqc_priv *priv = platform_get_drvdata(pdev);
irq_domain_remove(priv->irq_domain);
- return 0;
}
static const struct of_device_id rza1_irqc_dt_ids[] = {
@@ -260,9 +259,9 @@ MODULE_DEVICE_TABLE(of, rza1_irqc_dt_ids);
static struct platform_driver rza1_irqc_device_driver = {
.probe = rza1_irqc_probe,
- .remove = rza1_irqc_remove,
+ .remove_new = rza1_irqc_remove,
.driver = {
- .name = "renesas_rza1_irqc",
+ .name = "renesas_rza1_irqc",
.of_match_table = rza1_irqc_dt_ids,
}
};
diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c
index e8d01b14ccdd..f87aeab460eb 100644
--- a/drivers/irqchip/irq-riscv-intc.c
+++ b/drivers/irqchip/irq-riscv-intc.c
@@ -17,17 +17,29 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/smp.h>
+#include <linux/soc/andes/irq.h>
+
+#include <asm/hwcap.h>
static struct irq_domain *intc_domain;
+static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG;
+static unsigned int riscv_intc_custom_base __ro_after_init = BITS_PER_LONG;
+static unsigned int riscv_intc_custom_nr_irqs __ro_after_init;
static asmlinkage void riscv_intc_irq(struct pt_regs *regs)
{
unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
- if (unlikely(cause >= BITS_PER_LONG))
- panic("unexpected interrupt cause");
+ if (generic_handle_domain_irq(intc_domain, cause))
+ pr_warn_ratelimited("Failed to handle interrupt (cause: %ld)\n", cause);
+}
+
+static asmlinkage void riscv_intc_aia_irq(struct pt_regs *regs)
+{
+ unsigned long topi;
- generic_handle_domain_irq(intc_domain, cause);
+ while ((topi = csr_read(CSR_TOPI)))
+ generic_handle_domain_irq(intc_domain, topi >> TOPI_IID_SHIFT);
}
/*
@@ -39,12 +51,43 @@ static asmlinkage void riscv_intc_irq(struct pt_regs *regs)
static void riscv_intc_irq_mask(struct irq_data *d)
{
- csr_clear(CSR_IE, BIT(d->hwirq));
+ if (IS_ENABLED(CONFIG_32BIT) && d->hwirq >= BITS_PER_LONG)
+ csr_clear(CSR_IEH, BIT(d->hwirq - BITS_PER_LONG));
+ else
+ csr_clear(CSR_IE, BIT(d->hwirq));
}
static void riscv_intc_irq_unmask(struct irq_data *d)
{
- csr_set(CSR_IE, BIT(d->hwirq));
+ if (IS_ENABLED(CONFIG_32BIT) && d->hwirq >= BITS_PER_LONG)
+ csr_set(CSR_IEH, BIT(d->hwirq - BITS_PER_LONG));
+ else
+ csr_set(CSR_IE, BIT(d->hwirq));
+}
+
+static void andes_intc_irq_mask(struct irq_data *d)
+{
+ /*
+ * Andes specific S-mode local interrupt causes (hwirq)
+ * are defined as (256 + n) and controlled by n-th bit
+ * of SLIE.
+ */
+ unsigned int mask = BIT(d->hwirq % BITS_PER_LONG);
+
+ if (d->hwirq < ANDES_SLI_CAUSE_BASE)
+ csr_clear(CSR_IE, mask);
+ else
+ csr_clear(ANDES_CSR_SLIE, mask);
+}
+
+static void andes_intc_irq_unmask(struct irq_data *d)
+{
+ unsigned int mask = BIT(d->hwirq % BITS_PER_LONG);
+
+ if (d->hwirq < ANDES_SLI_CAUSE_BASE)
+ csr_set(CSR_IE, mask);
+ else
+ csr_set(ANDES_CSR_SLIE, mask);
}
static void riscv_intc_irq_eoi(struct irq_data *d)
@@ -70,12 +113,21 @@ static struct irq_chip riscv_intc_chip = {
.irq_eoi = riscv_intc_irq_eoi,
};
+static struct irq_chip andes_intc_chip = {
+ .name = "RISC-V INTC",
+ .irq_mask = andes_intc_irq_mask,
+ .irq_unmask = andes_intc_irq_unmask,
+ .irq_eoi = riscv_intc_irq_eoi,
+};
+
static int riscv_intc_domain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hwirq)
{
+ struct irq_chip *chip = d->host_data;
+
irq_set_percpu_devid(irq);
- irq_domain_set_info(d, irq, hwirq, &riscv_intc_chip, d->host_data,
- handle_percpu_devid_irq, NULL, NULL);
+ irq_domain_set_info(d, irq, hwirq, chip, NULL, handle_percpu_devid_irq,
+ NULL, NULL);
return 0;
}
@@ -93,6 +145,14 @@ static int riscv_intc_domain_alloc(struct irq_domain *domain,
if (ret)
return ret;
+ /*
+ * Only allow hwirq for which we have corresponding standard or
+ * custom interrupt enable register.
+ */
+ if ((hwirq >= riscv_intc_nr_irqs && hwirq < riscv_intc_custom_base) ||
+ (hwirq >= riscv_intc_custom_base + riscv_intc_custom_nr_irqs))
+ return -EINVAL;
+
for (i = 0; i < nr_irqs; i++) {
ret = riscv_intc_domain_map(domain, virq + i, hwirq + i);
if (ret)
@@ -113,18 +173,20 @@ static struct fwnode_handle *riscv_intc_hwnode(void)
return intc_domain->fwnode;
}
-static int __init riscv_intc_init_common(struct fwnode_handle *fn)
+static int __init riscv_intc_init_common(struct fwnode_handle *fn, struct irq_chip *chip)
{
int rc;
- intc_domain = irq_domain_create_linear(fn, BITS_PER_LONG,
- &riscv_intc_domain_ops, NULL);
+ intc_domain = irq_domain_create_tree(fn, &riscv_intc_domain_ops, chip);
if (!intc_domain) {
pr_err("unable to add IRQ domain\n");
return -ENXIO;
}
- rc = set_handle_irq(&riscv_intc_irq);
+ if (riscv_isa_extension_available(NULL, SxAIA))
+ rc = set_handle_irq(&riscv_intc_aia_irq);
+ else
+ rc = set_handle_irq(&riscv_intc_irq);
if (rc) {
pr_err("failed to set irq handler\n");
return rc;
@@ -132,7 +194,11 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn)
riscv_set_intc_hwnode_fn(riscv_intc_hwnode);
- pr_info("%d local interrupts mapped\n", BITS_PER_LONG);
+ pr_info("%d local interrupts mapped%s\n",
+ riscv_isa_extension_available(NULL, SxAIA) ? 64 : riscv_intc_nr_irqs,
+ riscv_isa_extension_available(NULL, SxAIA) ? " using AIA" : "");
+ if (riscv_intc_custom_nr_irqs)
+ pr_info("%d custom local interrupts mapped\n", riscv_intc_custom_nr_irqs);
return 0;
}
@@ -140,8 +206,9 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn)
static int __init riscv_intc_init(struct device_node *node,
struct device_node *parent)
{
- int rc;
+ struct irq_chip *chip = &riscv_intc_chip;
unsigned long hartid;
+ int rc;
rc = riscv_of_parent_hartid(node, &hartid);
if (rc < 0) {
@@ -166,10 +233,17 @@ static int __init riscv_intc_init(struct device_node *node,
return 0;
}
- return riscv_intc_init_common(of_node_to_fwnode(node));
+ if (of_device_is_compatible(node, "andestech,cpu-intc")) {
+ riscv_intc_custom_base = ANDES_SLI_CAUSE_BASE;
+ riscv_intc_custom_nr_irqs = ANDES_RV_IRQ_LAST;
+ chip = &andes_intc_chip;
+ }
+
+ return riscv_intc_init_common(of_node_to_fwnode(node), chip);
}
IRQCHIP_DECLARE(riscv, "riscv,cpu-intc", riscv_intc_init);
+IRQCHIP_DECLARE(andes, "andestech,cpu-intc", riscv_intc_init);
#ifdef CONFIG_ACPI
@@ -196,7 +270,7 @@ static int __init riscv_intc_acpi_init(union acpi_subtable_headers *header,
return -ENOMEM;
}
- return riscv_intc_init_common(fn);
+ return riscv_intc_init_common(fn, &riscv_intc_chip);
}
IRQCHIP_ACPI_DECLARE(riscv_intc, ACPI_MADT_TYPE_RINTC, NULL,
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 5b7bc4fd9517..f3d4cb9e34f7 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -3,7 +3,6 @@
* Copyright (C) 2017 SiFive
* Copyright (C) 2018 Christoph Hellwig
*/
-#define pr_fmt(fmt) "plic: " fmt
#include <linux/cpu.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -64,6 +63,7 @@
#define PLIC_QUIRK_EDGE_INTERRUPT 0
struct plic_priv {
+ struct device *dev;
struct cpumask lmask;
struct irq_domain *irqdomain;
void __iomem *regs;
@@ -103,9 +103,11 @@ static void __plic_toggle(void __iomem *enable_base, int hwirq, int enable)
static void plic_toggle(struct plic_handler *handler, int hwirq, int enable)
{
- raw_spin_lock(&handler->enable_lock);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&handler->enable_lock, flags);
__plic_toggle(handler->enable_base, hwirq, enable);
- raw_spin_unlock(&handler->enable_lock);
+ raw_spin_unlock_irqrestore(&handler->enable_lock, flags);
}
static inline void plic_irq_toggle(const struct cpumask *mask,
@@ -148,7 +150,13 @@ static void plic_irq_eoi(struct irq_data *d)
{
struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
- writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+ if (unlikely(irqd_irq_disabled(d))) {
+ plic_toggle(handler, d->hwirq, 1);
+ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+ plic_toggle(handler, d->hwirq, 0);
+ } else {
+ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+ }
}
#ifdef CONFIG_SMP
@@ -236,6 +244,7 @@ static int plic_irq_set_type(struct irq_data *d, unsigned int type)
static int plic_irq_suspend(void)
{
unsigned int i, cpu;
+ unsigned long flags;
u32 __iomem *reg;
struct plic_priv *priv;
@@ -253,12 +262,12 @@ static int plic_irq_suspend(void)
if (!handler->present)
continue;
- raw_spin_lock(&handler->enable_lock);
+ raw_spin_lock_irqsave(&handler->enable_lock, flags);
for (i = 0; i < DIV_ROUND_UP(priv->nr_irqs, 32); i++) {
reg = handler->enable_base + i * sizeof(u32);
handler->enable_save[i] = readl(reg);
}
- raw_spin_unlock(&handler->enable_lock);
+ raw_spin_unlock_irqrestore(&handler->enable_lock, flags);
}
return 0;
@@ -267,6 +276,7 @@ static int plic_irq_suspend(void)
static void plic_irq_resume(void)
{
unsigned int i, index, cpu;
+ unsigned long flags;
u32 __iomem *reg;
struct plic_priv *priv;
@@ -284,12 +294,12 @@ static void plic_irq_resume(void)
if (!handler->present)
continue;
- raw_spin_lock(&handler->enable_lock);
+ raw_spin_lock_irqsave(&handler->enable_lock, flags);
for (i = 0; i < DIV_ROUND_UP(priv->nr_irqs, 32); i++) {
reg = handler->enable_base + i * sizeof(u32);
writel(handler->enable_save[i], reg);
}
- raw_spin_unlock(&handler->enable_lock);
+ raw_spin_unlock_irqrestore(&handler->enable_lock, flags);
}
}
@@ -370,9 +380,10 @@ static void plic_handle_irq(struct irq_desc *desc)
while ((hwirq = readl(claim))) {
int err = generic_handle_domain_irq(handler->priv->irqdomain,
hwirq);
- if (unlikely(err))
- pr_warn_ratelimited("can't find mapping for hwirq %lu\n",
- hwirq);
+ if (unlikely(err)) {
+ dev_warn_ratelimited(handler->priv->dev,
+ "can't find mapping for hwirq %lu\n", hwirq);
+ }
}
chained_irq_exit(chip, desc);
@@ -400,63 +411,122 @@ static int plic_starting_cpu(unsigned int cpu)
enable_percpu_irq(plic_parent_irq,
irq_get_trigger_type(plic_parent_irq));
else
- pr_warn("cpu%d: parent irq not available\n", cpu);
+ dev_warn(handler->priv->dev, "cpu%d: parent irq not available\n", cpu);
plic_set_threshold(handler, PLIC_ENABLE_THRESHOLD);
return 0;
}
-static int __init __plic_init(struct device_node *node,
- struct device_node *parent,
- unsigned long plic_quirks)
+static const struct of_device_id plic_match[] = {
+ { .compatible = "sifive,plic-1.0.0" },
+ { .compatible = "riscv,plic0" },
+ { .compatible = "andestech,nceplic100",
+ .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) },
+ { .compatible = "thead,c900-plic",
+ .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) },
+ {}
+};
+
+static int plic_parse_nr_irqs_and_contexts(struct platform_device *pdev,
+ u32 *nr_irqs, u32 *nr_contexts)
{
- int error = 0, nr_contexts, nr_handlers = 0, i;
- u32 nr_irqs;
- struct plic_priv *priv;
- struct plic_handler *handler;
- unsigned int cpu;
+ struct device *dev = &pdev->dev;
+ int rc;
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
+ /*
+ * Currently, only OF fwnode is supported so extend this
+ * function for ACPI support.
+ */
+ if (!is_of_node(dev->fwnode))
+ return -EINVAL;
- priv->plic_quirks = plic_quirks;
+ rc = of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", nr_irqs);
+ if (rc) {
+ dev_err(dev, "riscv,ndev property not available\n");
+ return rc;
+ }
- priv->regs = of_iomap(node, 0);
- if (WARN_ON(!priv->regs)) {
- error = -EIO;
- goto out_free_priv;
+ *nr_contexts = of_irq_count(to_of_node(dev->fwnode));
+ if (WARN_ON(!(*nr_contexts))) {
+ dev_err(dev, "no PLIC context available\n");
+ return -EINVAL;
}
- error = -EINVAL;
- of_property_read_u32(node, "riscv,ndev", &nr_irqs);
- if (WARN_ON(!nr_irqs))
- goto out_iounmap;
+ return 0;
+}
- priv->nr_irqs = nr_irqs;
+static int plic_parse_context_parent(struct platform_device *pdev, u32 context,
+ u32 *parent_hwirq, int *parent_cpu)
+{
+ struct device *dev = &pdev->dev;
+ struct of_phandle_args parent;
+ unsigned long hartid;
+ int rc;
- priv->prio_save = bitmap_alloc(nr_irqs, GFP_KERNEL);
- if (!priv->prio_save)
- goto out_free_priority_reg;
+ /*
+ * Currently, only OF fwnode is supported so extend this
+ * function for ACPI support.
+ */
+ if (!is_of_node(dev->fwnode))
+ return -EINVAL;
- nr_contexts = of_irq_count(node);
- if (WARN_ON(!nr_contexts))
- goto out_free_priority_reg;
+ rc = of_irq_parse_one(to_of_node(dev->fwnode), context, &parent);
+ if (rc)
+ return rc;
- error = -ENOMEM;
- priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1,
- &plic_irqdomain_ops, priv);
- if (WARN_ON(!priv->irqdomain))
- goto out_free_priority_reg;
+ rc = riscv_of_parent_hartid(parent.np, &hartid);
+ if (rc)
+ return rc;
- for (i = 0; i < nr_contexts; i++) {
- struct of_phandle_args parent;
- irq_hw_number_t hwirq;
- int cpu;
- unsigned long hartid;
+ *parent_hwirq = parent.args[0];
+ *parent_cpu = riscv_hartid_to_cpuid(hartid);
+ return 0;
+}
+
+static int plic_probe(struct platform_device *pdev)
+{
+ int error = 0, nr_contexts, nr_handlers = 0, cpu, i;
+ struct device *dev = &pdev->dev;
+ unsigned long plic_quirks = 0;
+ struct plic_handler *handler;
+ u32 nr_irqs, parent_hwirq;
+ struct irq_domain *domain;
+ struct plic_priv *priv;
+ irq_hw_number_t hwirq;
+ bool cpuhp_setup;
- if (of_irq_parse_one(node, i, &parent)) {
- pr_err("failed to parse parent for context %d.\n", i);
+ if (is_of_node(dev->fwnode)) {
+ const struct of_device_id *id;
+
+ id = of_match_node(plic_match, to_of_node(dev->fwnode));
+ if (id)
+ plic_quirks = (unsigned long)id->data;
+ }
+
+ error = plic_parse_nr_irqs_and_contexts(pdev, &nr_irqs, &nr_contexts);
+ if (error)
+ return error;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->dev = dev;
+ priv->plic_quirks = plic_quirks;
+ priv->nr_irqs = nr_irqs;
+
+ priv->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (WARN_ON(!priv->regs))
+ return -EIO;
+
+ priv->prio_save = devm_bitmap_zalloc(dev, nr_irqs, GFP_KERNEL);
+ if (!priv->prio_save)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_contexts; i++) {
+ error = plic_parse_context_parent(pdev, i, &parent_hwirq, &cpu);
+ if (error) {
+ dev_warn(dev, "hwirq for context%d not found\n", i);
continue;
}
@@ -464,7 +534,7 @@ static int __init __plic_init(struct device_node *node,
* Skip contexts other than external interrupts for our
* privilege level.
*/
- if (parent.args[0] != RV_IRQ_EXT) {
+ if (parent_hwirq != RV_IRQ_EXT) {
/* Disable S-mode enable bits if running in M-mode. */
if (IS_ENABLED(CONFIG_RISCV_M_MODE)) {
void __iomem *enable_base = priv->regs +
@@ -477,24 +547,17 @@ static int __init __plic_init(struct device_node *node,
continue;
}
- error = riscv_of_parent_hartid(parent.np, &hartid);
- if (error < 0) {
- pr_warn("failed to parse hart ID for context %d.\n", i);
- continue;
- }
-
- cpu = riscv_hartid_to_cpuid(hartid);
if (cpu < 0) {
- pr_warn("Invalid cpuid for context %d\n", i);
+ dev_warn(dev, "Invalid cpuid for context %d\n", i);
continue;
}
/* Find parent domain and register chained handler */
- if (!plic_parent_irq && irq_find_host(parent.np)) {
- plic_parent_irq = irq_of_parse_and_map(node, i);
+ domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY);
+ if (!plic_parent_irq && domain) {
+ plic_parent_irq = irq_create_mapping(domain, RV_IRQ_EXT);
if (plic_parent_irq)
- irq_set_chained_handler(plic_parent_irq,
- plic_handle_irq);
+ irq_set_chained_handler(plic_parent_irq, plic_handle_irq);
}
/*
@@ -504,7 +567,7 @@ static int __init __plic_init(struct device_node *node,
*/
handler = per_cpu_ptr(&plic_handlers, cpu);
if (handler->present) {
- pr_warn("handler already present for context %d.\n", i);
+ dev_warn(dev, "handler already present for context %d.\n", i);
plic_set_threshold(handler, PLIC_DISABLE_THRESHOLD);
goto done;
}
@@ -518,10 +581,10 @@ static int __init __plic_init(struct device_node *node,
i * CONTEXT_ENABLE_SIZE;
handler->priv = priv;
- handler->enable_save = kcalloc(DIV_ROUND_UP(nr_irqs, 32),
- sizeof(*handler->enable_save), GFP_KERNEL);
+ handler->enable_save = devm_kcalloc(dev, DIV_ROUND_UP(nr_irqs, 32),
+ sizeof(*handler->enable_save), GFP_KERNEL);
if (!handler->enable_save)
- goto out_free_enable_reg;
+ goto fail_cleanup_contexts;
done:
for (hwirq = 1; hwirq <= nr_irqs; hwirq++) {
plic_toggle(handler, hwirq, 0);
@@ -531,52 +594,60 @@ done:
nr_handlers++;
}
+ priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1,
+ &plic_irqdomain_ops, priv);
+ if (WARN_ON(!priv->irqdomain))
+ goto fail_cleanup_contexts;
+
/*
* We can have multiple PLIC instances so setup cpuhp state
- * and register syscore operations only when context handler
- * for current/boot CPU is present.
+ * and register syscore operations only once after context
+ * handlers of all online CPUs are initialized.
*/
- handler = this_cpu_ptr(&plic_handlers);
- if (handler->present && !plic_cpuhp_setup_done) {
- cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
- "irqchip/sifive/plic:starting",
- plic_starting_cpu, plic_dying_cpu);
- register_syscore_ops(&plic_irq_syscore_ops);
- plic_cpuhp_setup_done = true;
+ if (!plic_cpuhp_setup_done) {
+ cpuhp_setup = true;
+ for_each_online_cpu(cpu) {
+ handler = per_cpu_ptr(&plic_handlers, cpu);
+ if (!handler->present) {
+ cpuhp_setup = false;
+ break;
+ }
+ }
+ if (cpuhp_setup) {
+ cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
+ "irqchip/sifive/plic:starting",
+ plic_starting_cpu, plic_dying_cpu);
+ register_syscore_ops(&plic_irq_syscore_ops);
+ plic_cpuhp_setup_done = true;
+ }
}
- pr_info("%pOFP: mapped %d interrupts with %d handlers for"
- " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts);
+ dev_info(dev, "mapped %d interrupts with %d handlers for %d contexts.\n",
+ nr_irqs, nr_handlers, nr_contexts);
return 0;
-out_free_enable_reg:
- for_each_cpu(cpu, cpu_present_mask) {
+fail_cleanup_contexts:
+ for (i = 0; i < nr_contexts; i++) {
+ if (plic_parse_context_parent(pdev, i, &parent_hwirq, &cpu))
+ continue;
+ if (parent_hwirq != RV_IRQ_EXT || cpu < 0)
+ continue;
+
handler = per_cpu_ptr(&plic_handlers, cpu);
- kfree(handler->enable_save);
+ handler->present = false;
+ handler->hart_base = NULL;
+ handler->enable_base = NULL;
+ handler->enable_save = NULL;
+ handler->priv = NULL;
}
-out_free_priority_reg:
- kfree(priv->prio_save);
-out_iounmap:
- iounmap(priv->regs);
-out_free_priv:
- kfree(priv);
- return error;
+ return -ENOMEM;
}
-static int __init plic_init(struct device_node *node,
- struct device_node *parent)
-{
- return __plic_init(node, parent, 0);
-}
-
-IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init);
-IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */
-
-static int __init plic_edge_init(struct device_node *node,
- struct device_node *parent)
-{
- return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT));
-}
-
-IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init);
-IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init);
+static struct platform_driver plic_driver = {
+ .driver = {
+ .name = "riscv-plic",
+ .of_match_table = plic_match,
+ },
+ .probe = plic_probe,
+};
+builtin_platform_driver(plic_driver);
diff --git a/drivers/irqchip/irq-starfive-jh8100-intc.c b/drivers/irqchip/irq-starfive-jh8100-intc.c
new file mode 100644
index 000000000000..0f5837176e53
--- /dev/null
+++ b/drivers/irqchip/irq-starfive-jh8100-intc.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * StarFive JH8100 External Interrupt Controller driver
+ *
+ * Copyright (C) 2023 StarFive Technology Co., Ltd.
+ *
+ * Author: Changhuang Liang <changhuang.liang@starfivetech.com>
+ */
+
+#define pr_fmt(fmt) "irq-starfive-jh8100: " fmt
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/reset.h>
+#include <linux/spinlock.h>
+
+#define STARFIVE_INTC_SRC0_CLEAR 0x10
+#define STARFIVE_INTC_SRC0_MASK 0x14
+#define STARFIVE_INTC_SRC0_INT 0x1c
+
+#define STARFIVE_INTC_SRC_IRQ_NUM 32
+
+struct starfive_irq_chip {
+ void __iomem *base;
+ struct irq_domain *domain;
+ raw_spinlock_t lock;
+};
+
+static void starfive_intc_bit_set(struct starfive_irq_chip *irqc,
+ u32 reg, u32 bit_mask)
+{
+ u32 value;
+
+ value = ioread32(irqc->base + reg);
+ value |= bit_mask;
+ iowrite32(value, irqc->base + reg);
+}
+
+static void starfive_intc_bit_clear(struct starfive_irq_chip *irqc,
+ u32 reg, u32 bit_mask)
+{
+ u32 value;
+
+ value = ioread32(irqc->base + reg);
+ value &= ~bit_mask;
+ iowrite32(value, irqc->base + reg);
+}
+
+static void starfive_intc_unmask(struct irq_data *d)
+{
+ struct starfive_irq_chip *irqc = irq_data_get_irq_chip_data(d);
+
+ raw_spin_lock(&irqc->lock);
+ starfive_intc_bit_clear(irqc, STARFIVE_INTC_SRC0_MASK, BIT(d->hwirq));
+ raw_spin_unlock(&irqc->lock);
+}
+
+static void starfive_intc_mask(struct irq_data *d)
+{
+ struct starfive_irq_chip *irqc = irq_data_get_irq_chip_data(d);
+
+ raw_spin_lock(&irqc->lock);
+ starfive_intc_bit_set(irqc, STARFIVE_INTC_SRC0_MASK, BIT(d->hwirq));
+ raw_spin_unlock(&irqc->lock);
+}
+
+static struct irq_chip intc_dev = {
+ .name = "StarFive JH8100 INTC",
+ .irq_unmask = starfive_intc_unmask,
+ .irq_mask = starfive_intc_mask,
+};
+
+static int starfive_intc_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ irq_domain_set_info(d, irq, hwirq, &intc_dev, d->host_data,
+ handle_level_irq, NULL, NULL);
+
+ return 0;
+}
+
+static const struct irq_domain_ops starfive_intc_domain_ops = {
+ .xlate = irq_domain_xlate_onecell,
+ .map = starfive_intc_map,
+};
+
+static void starfive_intc_irq_handler(struct irq_desc *desc)
+{
+ struct starfive_irq_chip *irqc = irq_data_get_irq_handler_data(&desc->irq_data);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned long value;
+ int hwirq;
+
+ chained_irq_enter(chip, desc);
+
+ value = ioread32(irqc->base + STARFIVE_INTC_SRC0_INT);
+ while (value) {
+ hwirq = ffs(value) - 1;
+
+ generic_handle_domain_irq(irqc->domain, hwirq);
+
+ starfive_intc_bit_set(irqc, STARFIVE_INTC_SRC0_CLEAR, BIT(hwirq));
+ starfive_intc_bit_clear(irqc, STARFIVE_INTC_SRC0_CLEAR, BIT(hwirq));
+
+ __clear_bit(hwirq, &value);
+ }
+
+ chained_irq_exit(chip, desc);
+}
+
+static int __init starfive_intc_init(struct device_node *intc,
+ struct device_node *parent)
+{
+ struct starfive_irq_chip *irqc;
+ struct reset_control *rst;
+ struct clk *clk;
+ int parent_irq;
+ int ret;
+
+ irqc = kzalloc(sizeof(*irqc), GFP_KERNEL);
+ if (!irqc)
+ return -ENOMEM;
+
+ irqc->base = of_iomap(intc, 0);
+ if (!irqc->base) {
+ pr_err("Unable to map registers\n");
+ ret = -ENXIO;
+ goto err_free;
+ }
+
+ rst = of_reset_control_get_exclusive(intc, NULL);
+ if (IS_ERR(rst)) {
+ pr_err("Unable to get reset control %pe\n", rst);
+ ret = PTR_ERR(rst);
+ goto err_unmap;
+ }
+
+ clk = of_clk_get(intc, 0);
+ if (IS_ERR(clk)) {
+ pr_err("Unable to get clock %pe\n", clk);
+ ret = PTR_ERR(clk);
+ goto err_reset_put;
+ }
+
+ ret = reset_control_deassert(rst);
+ if (ret)
+ goto err_clk_put;
+
+ ret = clk_prepare_enable(clk);
+ if (ret)
+ goto err_reset_assert;
+
+ raw_spin_lock_init(&irqc->lock);
+
+ irqc->domain = irq_domain_add_linear(intc, STARFIVE_INTC_SRC_IRQ_NUM,
+ &starfive_intc_domain_ops, irqc);
+ if (!irqc->domain) {
+ pr_err("Unable to create IRQ domain\n");
+ ret = -EINVAL;
+ goto err_clk_disable;
+ }
+
+ parent_irq = of_irq_get(intc, 0);
+ if (parent_irq < 0) {
+ pr_err("Failed to get main IRQ: %d\n", parent_irq);
+ ret = parent_irq;
+ goto err_remove_domain;
+ }
+
+ irq_set_chained_handler_and_data(parent_irq, starfive_intc_irq_handler,
+ irqc);
+
+ pr_info("Interrupt controller register, nr_irqs %d\n",
+ STARFIVE_INTC_SRC_IRQ_NUM);
+
+ return 0;
+
+err_remove_domain:
+ irq_domain_remove(irqc->domain);
+err_clk_disable:
+ clk_disable_unprepare(clk);
+err_reset_assert:
+ reset_control_assert(rst);
+err_clk_put:
+ clk_put(clk);
+err_reset_put:
+ reset_control_put(rst);
+err_unmap:
+ iounmap(irqc->base);
+err_free:
+ kfree(irqc);
+ return ret;
+}
+
+IRQCHIP_PLATFORM_DRIVER_BEGIN(starfive_intc)
+IRQCHIP_MATCH("starfive,jh8100-intc", starfive_intc_init)
+IRQCHIP_PLATFORM_DRIVER_END(starfive_intc)
+
+MODULE_DESCRIPTION("StarFive JH8100 External Interrupt Controller");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Changhuang Liang <changhuang.liang@starfivetech.com>");
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index 971240e2e31b..26a5193d0ae4 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -898,10 +898,9 @@ static void stm32_exti_remove_irq(void *data)
irq_domain_remove(domain);
}
-static int stm32_exti_remove(struct platform_device *pdev)
+static void stm32_exti_remove(struct platform_device *pdev)
{
stm32_exti_h_syscore_deinit();
- return 0;
}
static int stm32_exti_probe(struct platform_device *pdev)
@@ -991,10 +990,10 @@ MODULE_DEVICE_TABLE(of, stm32_exti_ids);
static struct platform_driver stm32_exti_driver = {
.probe = stm32_exti_probe,
- .remove = stm32_exti_remove,
+ .remove_new = stm32_exti_remove,
.driver = {
- .name = "stm32_exti",
- .of_match_table = stm32_exti_ids,
+ .name = "stm32_exti",
+ .of_match_table = stm32_exti_ids,
},
};
diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c
index b2d61d4f6fe6..57f610dab6b8 100644
--- a/drivers/irqchip/irq-ts4800.c
+++ b/drivers/irqchip/irq-ts4800.c
@@ -139,13 +139,11 @@ static int ts4800_ic_probe(struct platform_device *pdev)
return 0;
}
-static int ts4800_ic_remove(struct platform_device *pdev)
+static void ts4800_ic_remove(struct platform_device *pdev)
{
struct ts4800_irq_data *data = platform_get_drvdata(pdev);
irq_domain_remove(data->domain);
-
- return 0;
}
static const struct of_device_id ts4800_ic_of_match[] = {
@@ -155,11 +153,11 @@ static const struct of_device_id ts4800_ic_of_match[] = {
MODULE_DEVICE_TABLE(of, ts4800_ic_of_match);
static struct platform_driver ts4800_ic_driver = {
- .probe = ts4800_ic_probe,
- .remove = ts4800_ic_remove,
+ .probe = ts4800_ic_probe,
+ .remove_new = ts4800_ic_remove,
.driver = {
- .name = "ts4800-irqc",
- .of_match_table = ts4800_ic_of_match,
+ .name = "ts4800-irqc",
+ .of_match_table = ts4800_ic_of_match,
},
};
module_platform_driver(ts4800_ic_driver);
diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c
index 9e3d5561e04e..ea93e7236c4a 100644
--- a/drivers/irqchip/irq-vic.c
+++ b/drivers/irqchip/irq-vic.c
@@ -47,9 +47,8 @@
/**
* struct vic_device - VIC PM device
- * @parent_irq: The parent IRQ number of the VIC if cascaded, or 0.
- * @irq: The IRQ number for the base of the VIC.
* @base: The register base for the VIC.
+ * @irq: The IRQ number for the base of the VIC.
* @valid_sources: A bitmask of valid interrupts
* @resume_sources: A bitmask of interrupts for resume.
* @resume_irqs: The IRQs enabled for resume.
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 6e80d7bd3c4d..3ed257334562 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -49,7 +49,9 @@ MODULE_LICENSE("GPL");
/* -------- driver information -------------------------------------- */
static DEFINE_MUTEX(capi_mutex);
-static struct class *capi_class;
+static const struct class capi_class = {
+ .name = "capi",
+};
static int capi_major = 68; /* allocated */
module_param_named(major, capi_major, uint, 0);
@@ -1393,18 +1395,19 @@ static int __init capi_init(void)
kcapi_exit();
return major_ret;
}
- capi_class = class_create("capi");
- if (IS_ERR(capi_class)) {
+
+ ret = class_register(&capi_class);
+ if (ret) {
unregister_chrdev(capi_major, "capi20");
kcapi_exit();
- return PTR_ERR(capi_class);
+ return ret;
}
- device_create(capi_class, NULL, MKDEV(capi_major, 0), NULL, "capi20");
+ device_create(&capi_class, NULL, MKDEV(capi_major, 0), NULL, "capi20");
if (capinc_tty_init() < 0) {
- device_destroy(capi_class, MKDEV(capi_major, 0));
- class_destroy(capi_class);
+ device_destroy(&capi_class, MKDEV(capi_major, 0));
+ class_unregister(&capi_class);
unregister_chrdev(capi_major, "capi20");
kcapi_exit();
return -ENOMEM;
@@ -1427,8 +1430,8 @@ static void __exit capi_exit(void)
{
proc_exit();
- device_destroy(capi_class, MKDEV(capi_major, 0));
- class_destroy(capi_class);
+ device_destroy(&capi_class, MKDEV(capi_major, 0));
+ class_unregister(&capi_class);
unregister_chrdev(capi_major, "capi20");
capinc_tty_exit();
diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c
index 09b72f14d4b7..b4ed0bb8ddfb 100644
--- a/drivers/isdn/mISDN/dsp_pipeline.c
+++ b/drivers/isdn/mISDN/dsp_pipeline.c
@@ -31,7 +31,9 @@ struct dsp_element_entry {
static LIST_HEAD(dsp_elements);
/* sysfs */
-static struct class *elements_class;
+static const struct class elements_class = {
+ .name = "dsp_pipeline",
+};
static ssize_t
attr_show_args(struct device *dev, struct device_attribute *attr, char *buf)
@@ -80,7 +82,7 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem)
INIT_LIST_HEAD(&entry->list);
entry->elem = elem;
- entry->dev.class = elements_class;
+ entry->dev.class = &elements_class;
entry->dev.release = mISDN_dsp_dev_release;
dev_set_drvdata(&entry->dev, elem);
dev_set_name(&entry->dev, "%s", elem->name);
@@ -131,9 +133,11 @@ EXPORT_SYMBOL(mISDN_dsp_element_unregister);
int dsp_pipeline_module_init(void)
{
- elements_class = class_create("dsp_pipeline");
- if (IS_ERR(elements_class))
- return PTR_ERR(elements_class);
+ int err;
+
+ err = class_register(&elements_class);
+ if (err)
+ return err;
dsp_hwec_init();
@@ -146,7 +150,7 @@ void dsp_pipeline_module_exit(void)
dsp_hwec_exit();
- class_destroy(elements_class);
+ class_unregister(&elements_class);
list_for_each_entry_safe(entry, n, &dsp_elements, list) {
list_del(&entry->list);
diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c
index e3e28a4f7d01..b1abc2a0c971 100644
--- a/drivers/mailbox/bcm-flexrm-mailbox.c
+++ b/drivers/mailbox/bcm-flexrm-mailbox.c
@@ -1587,8 +1587,8 @@ static int flexrm_mbox_probe(struct platform_device *pdev)
}
/* Allocate platform MSIs for each ring */
- ret = platform_msi_domain_alloc_irqs(dev, mbox->num_rings,
- flexrm_mbox_msi_write);
+ ret = platform_device_msi_init_and_alloc_irqs(dev, mbox->num_rings,
+ flexrm_mbox_msi_write);
if (ret)
goto fail_destroy_cmpl_pool;
@@ -1641,7 +1641,7 @@ skip_debugfs:
fail_free_debugfs_root:
debugfs_remove_recursive(mbox->root);
- platform_msi_domain_free_irqs(dev);
+ platform_device_msi_free_irqs_all(dev);
fail_destroy_cmpl_pool:
dma_pool_destroy(mbox->cmpl_pool);
fail_destroy_bd_pool:
@@ -1657,7 +1657,7 @@ static void flexrm_mbox_remove(struct platform_device *pdev)
debugfs_remove_recursive(mbox->root);
- platform_msi_domain_free_irqs(dev);
+ platform_device_msi_free_irqs_all(dev);
dma_pool_destroy(mbox->cmpl_pool);
dma_pool_destroy(mbox->bd_pool);
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index a743e2c572fc..68ce56fc61d0 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -634,4 +634,6 @@ config DM_AUDIT
Enables audit logging of several security relevant events in the
particular device-mapper targets, especially the integrity target.
+source "drivers/md/dm-vdo/Kconfig"
+
endif # MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 027d7cfeca3f..476a214e4bdc 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_DM_ZERO) += dm-zero.o
obj-$(CONFIG_DM_RAID) += dm-raid.o
obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
obj-$(CONFIG_DM_VERITY) += dm-verity.o
+obj-$(CONFIG_DM_VDO) += dm-vdo/
obj-$(CONFIG_DM_CACHE) += dm-cache.o
obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o
obj-$(CONFIG_DM_EBS) += dm-ebs.o
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 6ae2329052c9..4e6afa89921f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -300,7 +300,7 @@ struct cached_dev {
struct list_head list;
struct bcache_device disk;
struct block_device *bdev;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct cache_sb sb;
struct cache_sb_disk *sb_disk;
@@ -423,7 +423,7 @@ struct cache {
struct kobject kobj;
struct block_device *bdev;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct task_struct *alloc_thread;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index dc3f50f69714..330bcd9ea4a9 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -900,9 +900,23 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
struct request_queue *q;
const size_t max_stripes = min_t(size_t, INT_MAX,
SIZE_MAX / sizeof(atomic_t));
+ struct queue_limits lim = {
+ .max_hw_sectors = UINT_MAX,
+ .max_sectors = UINT_MAX,
+ .max_segment_size = UINT_MAX,
+ .max_segments = BIO_MAX_VECS,
+ .max_hw_discard_sectors = UINT_MAX,
+ .io_min = block_size,
+ .logical_block_size = block_size,
+ .physical_block_size = block_size,
+ };
uint64_t n;
int idx;
+ if (cached_bdev) {
+ d->stripe_size = bdev_io_opt(cached_bdev) >> SECTOR_SHIFT;
+ lim.io_opt = umax(block_size, bdev_io_opt(cached_bdev));
+ }
if (!d->stripe_size)
d->stripe_size = 1 << 31;
else if (d->stripe_size < BCH_MIN_STRIPE_SZ)
@@ -935,8 +949,21 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
goto out_ida_remove;
- d->disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!d->disk)
+ if (lim.logical_block_size > PAGE_SIZE && cached_bdev) {
+ /*
+ * This should only happen with BCACHE_SB_VERSION_BDEV.
+ * Block/page size is checked for BCACHE_SB_VERSION_CDEV.
+ */
+ pr_info("bcache%i: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n",
+ idx, lim.logical_block_size,
+ PAGE_SIZE, bdev_logical_block_size(cached_bdev));
+
+ /* This also adjusts physical block size/min io size if needed */
+ lim.logical_block_size = bdev_logical_block_size(cached_bdev);
+ }
+
+ d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(d->disk))
goto out_bioset_exit;
set_capacity(d->disk, sectors);
@@ -949,27 +976,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->disk->private_data = d;
q = d->disk->queue;
- q->limits.max_hw_sectors = UINT_MAX;
- q->limits.max_sectors = UINT_MAX;
- q->limits.max_segment_size = UINT_MAX;
- q->limits.max_segments = BIO_MAX_VECS;
- blk_queue_max_discard_sectors(q, UINT_MAX);
- q->limits.io_min = block_size;
- q->limits.logical_block_size = block_size;
- q->limits.physical_block_size = block_size;
-
- if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) {
- /*
- * This should only happen with BCACHE_SB_VERSION_BDEV.
- * Block/page size is checked for BCACHE_SB_VERSION_CDEV.
- */
- pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n",
- d->disk->disk_name, q->limits.logical_block_size,
- PAGE_SIZE, bdev_logical_block_size(cached_bdev));
-
- /* This also adjusts physical block size/min io size if needed */
- blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev));
- }
blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
@@ -1369,8 +1375,8 @@ static CLOSURE_CALLBACK(cached_dev_free)
if (dc->sb_disk)
put_page(virt_to_page(dc->sb_disk));
- if (dc->bdev_handle)
- bdev_release(dc->bdev_handle);
+ if (dc->bdev_file)
+ fput(dc->bdev_file);
wake_up(&unregister_wait);
@@ -1416,9 +1422,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
}
- dc->disk.stripe_size = q->limits.io_opt >> 9;
-
- if (dc->disk.stripe_size)
+ if (bdev_io_opt(dc->bdev))
dc->partial_stripes_expensive =
q->limits.raid_partial_stripes_expensive;
@@ -1428,9 +1432,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
if (ret)
return ret;
- blk_queue_io_opt(dc->disk.disk->queue,
- max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q)));
-
atomic_set(&dc->io_errors, 0);
dc->io_disable = false;
dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
@@ -1445,7 +1446,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
/* Cached device - bcache superblock */
static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
- struct bdev_handle *bdev_handle,
+ struct file *bdev_file,
struct cached_dev *dc)
{
const char *err = "cannot allocate memory";
@@ -1453,8 +1454,8 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
int ret = -ENOMEM;
memcpy(&dc->sb, sb, sizeof(struct cache_sb));
- dc->bdev_handle = bdev_handle;
- dc->bdev = bdev_handle->bdev;
+ dc->bdev_file = bdev_file;
+ dc->bdev = file_bdev(bdev_file);
dc->sb_disk = sb_disk;
if (cached_dev_init(dc, sb->block_size << 9))
@@ -2218,8 +2219,8 @@ void bch_cache_release(struct kobject *kobj)
if (ca->sb_disk)
put_page(virt_to_page(ca->sb_disk));
- if (ca->bdev_handle)
- bdev_release(ca->bdev_handle);
+ if (ca->bdev_file)
+ fput(ca->bdev_file);
kfree(ca);
module_put(THIS_MODULE);
@@ -2339,18 +2340,18 @@ err_free:
}
static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
- struct bdev_handle *bdev_handle,
+ struct file *bdev_file,
struct cache *ca)
{
const char *err = NULL; /* must be set for any error case */
int ret = 0;
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
- ca->bdev_handle = bdev_handle;
- ca->bdev = bdev_handle->bdev;
+ ca->bdev_file = bdev_file;
+ ca->bdev = file_bdev(bdev_file);
ca->sb_disk = sb_disk;
- if (bdev_max_discard_sectors((bdev_handle->bdev)))
+ if (bdev_max_discard_sectors(file_bdev(bdev_file)))
ca->discard = CACHE_DISCARD(&ca->sb);
ret = cache_alloc(ca);
@@ -2361,20 +2362,20 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
err = "cache_alloc(): cache device is too small";
else
err = "cache_alloc(): unknown error";
- pr_notice("error %pg: %s\n", bdev_handle->bdev, err);
+ pr_notice("error %pg: %s\n", file_bdev(bdev_file), err);
/*
* If we failed here, it means ca->kobj is not initialized yet,
* kobject_put() won't be called and there is no chance to
- * call bdev_release() to bdev in bch_cache_release(). So
- * we explicitly call bdev_release() here.
+ * call fput() to bdev in bch_cache_release(). So
+ * we explicitly call fput() on the block device here.
*/
- bdev_release(bdev_handle);
+ fput(bdev_file);
return ret;
}
- if (kobject_add(&ca->kobj, bdev_kobj(bdev_handle->bdev), "bcache")) {
+ if (kobject_add(&ca->kobj, bdev_kobj(file_bdev(bdev_file)), "bcache")) {
pr_notice("error %pg: error calling kobject_add\n",
- bdev_handle->bdev);
+ file_bdev(bdev_file));
ret = -ENOMEM;
goto out;
}
@@ -2388,7 +2389,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
goto out;
}
- pr_info("registered cache device %pg\n", ca->bdev_handle->bdev);
+ pr_info("registered cache device %pg\n", file_bdev(ca->bdev_file));
out:
kobject_put(&ca->kobj);
@@ -2446,7 +2447,7 @@ struct async_reg_args {
char *path;
struct cache_sb *sb;
struct cache_sb_disk *sb_disk;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
void *holder;
};
@@ -2457,7 +2458,7 @@ static void register_bdev_worker(struct work_struct *work)
container_of(work, struct async_reg_args, reg_work.work);
mutex_lock(&bch_register_lock);
- if (register_bdev(args->sb, args->sb_disk, args->bdev_handle,
+ if (register_bdev(args->sb, args->sb_disk, args->bdev_file,
args->holder) < 0)
fail = true;
mutex_unlock(&bch_register_lock);
@@ -2478,7 +2479,7 @@ static void register_cache_worker(struct work_struct *work)
container_of(work, struct async_reg_args, reg_work.work);
/* blkdev_put() will be called in bch_cache_release() */
- if (register_cache(args->sb, args->sb_disk, args->bdev_handle,
+ if (register_cache(args->sb, args->sb_disk, args->bdev_file,
args->holder))
fail = true;
@@ -2516,7 +2517,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
char *path = NULL;
struct cache_sb *sb;
struct cache_sb_disk *sb_disk;
- struct bdev_handle *bdev_handle, *bdev_handle2;
+ struct file *bdev_file, *bdev_file2;
void *holder = NULL;
ssize_t ret;
bool async_registration = false;
@@ -2549,15 +2550,15 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
ret = -EINVAL;
err = "failed to open device";
- bdev_handle = bdev_open_by_path(strim(path), BLK_OPEN_READ, NULL, NULL);
- if (IS_ERR(bdev_handle))
+ bdev_file = bdev_file_open_by_path(strim(path), BLK_OPEN_READ, NULL, NULL);
+ if (IS_ERR(bdev_file))
goto out_free_sb;
err = "failed to set blocksize";
- if (set_blocksize(bdev_handle->bdev, 4096))
+ if (set_blocksize(file_bdev(bdev_file), 4096))
goto out_blkdev_put;
- err = read_super(sb, bdev_handle->bdev, &sb_disk);
+ err = read_super(sb, file_bdev(bdev_file), &sb_disk);
if (err)
goto out_blkdev_put;
@@ -2569,13 +2570,13 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
}
/* Now reopen in exclusive mode with proper holder */
- bdev_handle2 = bdev_open_by_dev(bdev_handle->bdev->bd_dev,
+ bdev_file2 = bdev_file_open_by_dev(file_bdev(bdev_file)->bd_dev,
BLK_OPEN_READ | BLK_OPEN_WRITE, holder, NULL);
- bdev_release(bdev_handle);
- bdev_handle = bdev_handle2;
- if (IS_ERR(bdev_handle)) {
- ret = PTR_ERR(bdev_handle);
- bdev_handle = NULL;
+ fput(bdev_file);
+ bdev_file = bdev_file2;
+ if (IS_ERR(bdev_file)) {
+ ret = PTR_ERR(bdev_file);
+ bdev_file = NULL;
if (ret == -EBUSY) {
dev_t dev;
@@ -2610,7 +2611,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
args->path = path;
args->sb = sb;
args->sb_disk = sb_disk;
- args->bdev_handle = bdev_handle;
+ args->bdev_file = bdev_file;
args->holder = holder;
register_device_async(args);
/* No wait and returns to user space */
@@ -2619,14 +2620,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (SB_IS_BDEV(sb)) {
mutex_lock(&bch_register_lock);
- ret = register_bdev(sb, sb_disk, bdev_handle, holder);
+ ret = register_bdev(sb, sb_disk, bdev_file, holder);
mutex_unlock(&bch_register_lock);
/* blkdev_put() will be called in cached_dev_free() */
if (ret < 0)
goto out_free_sb;
} else {
/* blkdev_put() will be called in bch_cache_release() */
- ret = register_cache(sb, sb_disk, bdev_handle, holder);
+ ret = register_cache(sb, sb_disk, bdev_file, holder);
if (ret)
goto out_free_sb;
}
@@ -2642,8 +2643,8 @@ out_free_holder:
out_put_sb_page:
put_page(virt_to_page(sb_disk));
out_blkdev_put:
- if (bdev_handle)
- bdev_release(bdev_handle);
+ if (bdev_file)
+ fput(bdev_file);
out_free_sb:
kfree(sb);
out_free_path:
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index 9ab32abe5ed4..bca0f39e15b8 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -489,5 +489,5 @@ module_init(dm_bio_prison_init);
module_exit(dm_bio_prison_exit);
MODULE_DESCRIPTION(DM_NAME " bio prison");
-MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 13c65b7e1ed6..098bf526136c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1292,7 +1292,8 @@ static void dmio_complete(unsigned long error, void *context)
}
static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector,
- unsigned int n_sectors, unsigned int offset)
+ unsigned int n_sectors, unsigned int offset,
+ unsigned short ioprio)
{
int r;
struct dm_io_request io_req = {
@@ -1315,7 +1316,7 @@ static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector,
io_req.mem.ptr.vma = (char *)b->data + offset;
}
- r = dm_io(&io_req, 1, &region, NULL);
+ r = dm_io(&io_req, 1, &region, NULL, ioprio);
if (unlikely(r))
b->end_io(b, errno_to_blk_status(r));
}
@@ -1331,7 +1332,8 @@ static void bio_complete(struct bio *bio)
}
static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector,
- unsigned int n_sectors, unsigned int offset)
+ unsigned int n_sectors, unsigned int offset,
+ unsigned short ioprio)
{
struct bio *bio;
char *ptr;
@@ -1339,13 +1341,14 @@ static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector,
bio = bio_kmalloc(1, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN);
if (!bio) {
- use_dmio(b, op, sector, n_sectors, offset);
+ use_dmio(b, op, sector, n_sectors, offset, ioprio);
return;
}
bio_init(bio, b->c->bdev, bio->bi_inline_vecs, 1, op);
bio->bi_iter.bi_sector = sector;
bio->bi_end_io = bio_complete;
bio->bi_private = b;
+ bio->bi_ioprio = ioprio;
ptr = (char *)b->data + offset;
len = n_sectors << SECTOR_SHIFT;
@@ -1368,7 +1371,7 @@ static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block
return sector;
}
-static void submit_io(struct dm_buffer *b, enum req_op op,
+static void submit_io(struct dm_buffer *b, enum req_op op, unsigned short ioprio,
void (*end_io)(struct dm_buffer *, blk_status_t))
{
unsigned int n_sectors;
@@ -1398,9 +1401,9 @@ static void submit_io(struct dm_buffer *b, enum req_op op,
}
if (b->data_mode != DATA_MODE_VMALLOC)
- use_bio(b, op, sector, n_sectors, offset);
+ use_bio(b, op, sector, n_sectors, offset, ioprio);
else
- use_dmio(b, op, sector, n_sectors, offset);
+ use_dmio(b, op, sector, n_sectors, offset, ioprio);
}
/*
@@ -1456,7 +1459,7 @@ static void __write_dirty_buffer(struct dm_buffer *b,
b->write_end = b->dirty_end;
if (!write_list)
- submit_io(b, REQ_OP_WRITE, write_endio);
+ submit_io(b, REQ_OP_WRITE, IOPRIO_DEFAULT, write_endio);
else
list_add_tail(&b->write_list, write_list);
}
@@ -1470,7 +1473,7 @@ static void __flush_write_list(struct list_head *write_list)
struct dm_buffer *b =
list_entry(write_list->next, struct dm_buffer, write_list);
list_del(&b->write_list);
- submit_io(b, REQ_OP_WRITE, write_endio);
+ submit_io(b, REQ_OP_WRITE, IOPRIO_DEFAULT, write_endio);
cond_resched();
}
blk_finish_plug(&plug);
@@ -1852,7 +1855,8 @@ static void read_endio(struct dm_buffer *b, blk_status_t status)
* and uses dm_bufio_mark_buffer_dirty to write new data back).
*/
static void *new_read(struct dm_bufio_client *c, sector_t block,
- enum new_flag nf, struct dm_buffer **bp)
+ enum new_flag nf, struct dm_buffer **bp,
+ unsigned short ioprio)
{
int need_submit = 0;
struct dm_buffer *b;
@@ -1905,7 +1909,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
return NULL;
if (need_submit)
- submit_io(b, REQ_OP_READ, read_endio);
+ submit_io(b, REQ_OP_READ, ioprio, read_endio);
if (nf != NF_GET) /* we already tested this condition above */
wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
@@ -1926,32 +1930,46 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
struct dm_buffer **bp)
{
- return new_read(c, block, NF_GET, bp);
+ return new_read(c, block, NF_GET, bp, IOPRIO_DEFAULT);
}
EXPORT_SYMBOL_GPL(dm_bufio_get);
-void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
- struct dm_buffer **bp)
+static void *__dm_bufio_read(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp, unsigned short ioprio)
{
if (WARN_ON_ONCE(dm_bufio_in_request()))
return ERR_PTR(-EINVAL);
- return new_read(c, block, NF_READ, bp);
+ return new_read(c, block, NF_READ, bp, ioprio);
+}
+
+void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp)
+{
+ return __dm_bufio_read(c, block, bp, IOPRIO_DEFAULT);
}
EXPORT_SYMBOL_GPL(dm_bufio_read);
+void *dm_bufio_read_with_ioprio(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp, unsigned short ioprio)
+{
+ return __dm_bufio_read(c, block, bp, ioprio);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_read_with_ioprio);
+
void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
struct dm_buffer **bp)
{
if (WARN_ON_ONCE(dm_bufio_in_request()))
return ERR_PTR(-EINVAL);
- return new_read(c, block, NF_FRESH, bp);
+ return new_read(c, block, NF_FRESH, bp, IOPRIO_DEFAULT);
}
EXPORT_SYMBOL_GPL(dm_bufio_new);
-void dm_bufio_prefetch(struct dm_bufio_client *c,
- sector_t block, unsigned int n_blocks)
+static void __dm_bufio_prefetch(struct dm_bufio_client *c,
+ sector_t block, unsigned int n_blocks,
+ unsigned short ioprio)
{
struct blk_plug plug;
@@ -1987,7 +2005,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
dm_bufio_unlock(c);
if (need_submit)
- submit_io(b, REQ_OP_READ, read_endio);
+ submit_io(b, REQ_OP_READ, ioprio, read_endio);
dm_bufio_release(b);
cond_resched();
@@ -2002,8 +2020,20 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
flush_plug:
blk_finish_plug(&plug);
}
+
+void dm_bufio_prefetch(struct dm_bufio_client *c, sector_t block, unsigned int n_blocks)
+{
+ return __dm_bufio_prefetch(c, block, n_blocks, IOPRIO_DEFAULT);
+}
EXPORT_SYMBOL_GPL(dm_bufio_prefetch);
+void dm_bufio_prefetch_with_ioprio(struct dm_bufio_client *c, sector_t block,
+ unsigned int n_blocks, unsigned short ioprio)
+{
+ return __dm_bufio_prefetch(c, block, n_blocks, ioprio);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_prefetch_with_ioprio);
+
void dm_bufio_release(struct dm_buffer *b)
{
struct dm_bufio_client *c = b->c;
@@ -2167,7 +2197,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c)
if (WARN_ON_ONCE(dm_bufio_in_request()))
return -EINVAL;
- return dm_io(&io_req, 1, &io_reg, NULL);
+ return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT);
}
EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
@@ -2191,7 +2221,7 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c
if (WARN_ON_ONCE(dm_bufio_in_request()))
return -EINVAL; /* discards are optional */
- return dm_io(&io_req, 1, &io_reg, NULL);
+ return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT);
}
EXPORT_SYMBOL_GPL(dm_bufio_issue_discard);
@@ -2968,6 +2998,6 @@ MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc");
module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, 0444);
MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache");
-MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
+MODULE_AUTHOR("Mikulas Patocka <dm-devel@lists.linux.dev>");
MODULE_DESCRIPTION(DM_NAME " buffered I/O library");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index 8bd2ad743d9a..2ed894155cab 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1947,7 +1947,7 @@ static void __exit smq_exit(void)
module_init(smq_init);
module_exit(smq_exit);
-MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("smq cache policy");
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 095b9b49aa82..e6757a30dcca 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -22,6 +22,8 @@
#include "dm-ima.h"
#define DM_RESERVED_MAX_IOS 1024
+#define DM_MAX_TARGETS 1048576
+#define DM_MAX_TARGET_PARAMS 1024
struct dm_io;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 855b482cbff1..9a74c6316c5d 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -53,15 +53,17 @@
struct convert_context {
struct completion restart;
struct bio *bio_in;
- struct bio *bio_out;
struct bvec_iter iter_in;
+ struct bio *bio_out;
struct bvec_iter iter_out;
- u64 cc_sector;
atomic_t cc_pending;
+ u64 cc_sector;
union {
struct skcipher_request *req;
struct aead_request *req_aead;
} r;
+ bool aead_recheck;
+ bool aead_failed;
};
@@ -73,10 +75,8 @@ struct dm_crypt_io {
struct bio *base_bio;
u8 *integrity_metadata;
bool integrity_metadata_from_pool:1;
- bool in_tasklet:1;
struct work_struct work;
- struct tasklet_struct tasklet;
struct convert_context ctx;
@@ -84,6 +84,8 @@ struct dm_crypt_io {
blk_status_t error;
sector_t sector;
+ struct bvec_iter saved_bi_iter;
+
struct rb_node rb_node;
} CRYPTO_MINALIGN_ATTR;
@@ -1372,10 +1374,13 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
if (r == -EBADMSG) {
sector_t s = le64_to_cpu(*sector);
- DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
- ctx->bio_in->bi_bdev, s);
- dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
- ctx->bio_in, s, 0);
+ ctx->aead_failed = true;
+ if (ctx->aead_recheck) {
+ DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
+ ctx->bio_in->bi_bdev, s);
+ dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
+ ctx->bio_in, s, 0);
+ }
}
if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
@@ -1683,6 +1688,7 @@ retry:
GFP_NOIO, &cc->bs);
clone->bi_private = io;
clone->bi_end_io = crypt_endio;
+ clone->bi_ioprio = io->base_bio->bi_ioprio;
remaining_size = size;
@@ -1759,10 +1765,11 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
io->base_bio = bio;
io->sector = sector;
io->error = 0;
+ io->ctx.aead_recheck = false;
+ io->ctx.aead_failed = false;
io->ctx.r.req = NULL;
io->integrity_metadata = NULL;
io->integrity_metadata_from_pool = false;
- io->in_tasklet = false;
atomic_set(&io->io_pending, 0);
}
@@ -1771,12 +1778,7 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
atomic_inc(&io->io_pending);
}
-static void kcryptd_io_bio_endio(struct work_struct *work)
-{
- struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
-
- bio_endio(io->base_bio);
-}
+static void kcryptd_queue_read(struct dm_crypt_io *io);
/*
* One of the bios was finished. Check for completion of
@@ -1791,6 +1793,15 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
if (!atomic_dec_and_test(&io->io_pending))
return;
+ if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) &&
+ cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) {
+ io->ctx.aead_recheck = true;
+ io->ctx.aead_failed = false;
+ io->error = 0;
+ kcryptd_queue_read(io);
+ return;
+ }
+
if (io->ctx.r.req)
crypt_free_req(cc, io->ctx.r.req, base_bio);
@@ -1801,20 +1812,6 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
base_bio->bi_status = error;
- /*
- * If we are running this function from our tasklet,
- * we can't call bio_endio() here, because it will call
- * clone_endio() from dm.c, which in turn will
- * free the current struct dm_crypt_io structure with
- * our tasklet. In this case we need to delay bio_endio()
- * execution to after the tasklet is done and dequeued.
- */
- if (io->in_tasklet) {
- INIT_WORK(&io->work, kcryptd_io_bio_endio);
- queue_work(cc->io_queue, &io->work);
- return;
- }
-
bio_endio(base_bio);
}
@@ -1840,15 +1837,19 @@ static void crypt_endio(struct bio *clone)
struct dm_crypt_io *io = clone->bi_private;
struct crypt_config *cc = io->cc;
unsigned int rw = bio_data_dir(clone);
- blk_status_t error;
+ blk_status_t error = clone->bi_status;
+
+ if (io->ctx.aead_recheck && !error) {
+ kcryptd_queue_crypt(io);
+ return;
+ }
/*
* free the processed pages
*/
- if (rw == WRITE)
+ if (rw == WRITE || io->ctx.aead_recheck)
crypt_free_buffer_pages(cc, clone);
- error = clone->bi_status;
bio_put(clone);
if (rw == READ && !error) {
@@ -1869,6 +1870,22 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
struct crypt_config *cc = io->cc;
struct bio *clone;
+ if (io->ctx.aead_recheck) {
+ if (!(gfp & __GFP_DIRECT_RECLAIM))
+ return 1;
+ crypt_inc_pending(io);
+ clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
+ if (unlikely(!clone)) {
+ crypt_dec_pending(io);
+ return 1;
+ }
+ clone->bi_iter.bi_sector = cc->start + io->sector;
+ crypt_convert_init(cc, &io->ctx, clone, clone, io->sector);
+ io->saved_bi_iter = clone->bi_iter;
+ dm_submit_bio_remap(io->base_bio, clone);
+ return 0;
+ }
+
/*
* We need the original biovec array in order to decrypt the whole bio
* data *afterwards* -- thanks to immutable biovecs we don't need to
@@ -1948,7 +1965,6 @@ continue_locked:
schedule();
- set_current_state(TASK_RUNNING);
spin_lock_irq(&cc->write_thread_lock);
goto continue_locked;
@@ -2095,6 +2111,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
io->ctx.bio_out = clone;
io->ctx.iter_out = clone->bi_iter;
+ if (crypt_integrity_aead(cc)) {
+ bio_copy_data(clone, io->base_bio);
+ io->ctx.bio_in = clone;
+ io->ctx.iter_in = clone->bi_iter;
+ }
+
sector += bio_sectors(clone);
crypt_inc_pending(io);
@@ -2131,6 +2153,14 @@ dec:
static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
{
+ if (io->ctx.aead_recheck) {
+ if (!io->error) {
+ io->ctx.bio_in->bi_iter = io->saved_bi_iter;
+ bio_copy_data(io->base_bio, io->ctx.bio_in);
+ }
+ crypt_free_buffer_pages(io->cc, io->ctx.bio_in);
+ bio_put(io->ctx.bio_in);
+ }
crypt_dec_pending(io);
}
@@ -2160,11 +2190,17 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
crypt_inc_pending(io);
- crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
- io->sector);
+ if (io->ctx.aead_recheck) {
+ io->ctx.cc_sector = io->sector + cc->iv_offset;
+ r = crypt_convert(cc, &io->ctx,
+ test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
+ } else {
+ crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
+ io->sector);
- r = crypt_convert(cc, &io->ctx,
- test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
+ r = crypt_convert(cc, &io->ctx,
+ test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
+ }
/*
* Crypto API backlogged the request, because its queue was full
* and we're in softirq context, so continue from a workqueue
@@ -2206,10 +2242,13 @@ static void kcryptd_async_done(void *data, int error)
if (error == -EBADMSG) {
sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq));
- DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
- ctx->bio_in->bi_bdev, s);
- dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
- ctx->bio_in, s, 0);
+ ctx->aead_failed = true;
+ if (ctx->aead_recheck) {
+ DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
+ ctx->bio_in->bi_bdev, s);
+ dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
+ ctx->bio_in, s, 0);
+ }
io->error = BLK_STS_PROTECTION;
} else if (error < 0)
io->error = BLK_STS_IOERR;
@@ -2246,11 +2285,6 @@ static void kcryptd_crypt(struct work_struct *work)
kcryptd_crypt_write_convert(io);
}
-static void kcryptd_crypt_tasklet(unsigned long work)
-{
- kcryptd_crypt((struct work_struct *)work);
-}
-
static void kcryptd_queue_crypt(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
@@ -2263,14 +2297,13 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
* it is being executed with irqs disabled.
*/
if (in_hardirq() || irqs_disabled()) {
- io->in_tasklet = true;
- tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
- tasklet_schedule(&io->tasklet);
+ INIT_WORK(&io->work, kcryptd_crypt);
+ queue_work(system_bh_wq, &io->work);
+ return;
+ } else {
+ kcryptd_crypt(&io->work);
return;
}
-
- kcryptd_crypt(&io->work);
- return;
}
INIT_WORK(&io->work, kcryptd_crypt);
@@ -3144,7 +3177,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
sval = strchr(opt_string + strlen("integrity:"), ':') + 1;
if (!strcasecmp(sval, "aead")) {
set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags);
- } else if (strcasecmp(sval, "none")) {
+ } else if (strcasecmp(sval, "none")) {
ti->error = "Unknown integrity profile";
return -EINVAL;
}
@@ -3673,7 +3706,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 24, 0},
+ .version = {1, 25, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c
index 12a377e06d02..1a33820c9f46 100644
--- a/drivers/md/dm-dust.c
+++ b/drivers/md/dm-dust.c
@@ -573,5 +573,5 @@ static struct target_type dust_target = {
module_dm(dust);
MODULE_DESCRIPTION(DM_NAME " dust test target");
-MODULE_AUTHOR("Bryan Gurney <dm-devel@redhat.com>");
+MODULE_AUTHOR("Bryan Gurney <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index 435b45201f4d..b70d4016c2ac 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -454,6 +454,6 @@ static struct target_type ebs_target = {
};
module_dm(ebs);
-MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
+MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@lists.linux.dev>");
MODULE_DESCRIPTION(DM_NAME " emulated block size target");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 7916ed9f10e8..731467d4ed10 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -690,5 +690,5 @@ static struct target_type flakey_target = {
module_dm(flakey);
MODULE_DESCRIPTION(DM_NAME " flakey target");
-MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index c5f03aab4552..d822ab2f739b 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -278,6 +278,8 @@ struct dm_integrity_c {
atomic64_t number_of_mismatches;
+ mempool_t recheck_pool;
+
struct notifier_block reboot_notifier;
};
@@ -553,7 +555,7 @@ static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf)
}
}
- r = dm_io(&io_req, 1, &io_loc, NULL);
+ r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
if (unlikely(r))
return r;
@@ -1071,7 +1073,7 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf,
io_loc.sector = ic->start + SB_SECTORS + sector;
io_loc.count = n_sectors;
- r = dm_io(&io_req, 1, &io_loc, NULL);
+ r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
if (unlikely(r)) {
dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ?
"reading journal" : "writing journal", r);
@@ -1188,7 +1190,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned int section, u
io_loc.sector = target;
io_loc.count = n_sectors;
- r = dm_io(&io_req, 1, &io_loc, NULL);
+ r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
if (unlikely(r)) {
WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
fn(-1UL, data);
@@ -1517,7 +1519,7 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat
fr.io_reg.count = 0,
fr.ic = ic;
init_completion(&fr.comp);
- r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL);
+ r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL, IOPRIO_DEFAULT);
BUG_ON(r);
}
@@ -1689,6 +1691,77 @@ failed:
get_random_bytes(result, ic->tag_size);
}
+static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum)
+{
+ struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
+ struct dm_integrity_c *ic = dio->ic;
+ struct bvec_iter iter;
+ struct bio_vec bv;
+ sector_t sector, logical_sector, area, offset;
+ struct page *page;
+ void *buffer;
+
+ get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
+ dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset,
+ &dio->metadata_offset);
+ sector = get_data_sector(ic, area, offset);
+ logical_sector = dio->range.logical_sector;
+
+ page = mempool_alloc(&ic->recheck_pool, GFP_NOIO);
+ buffer = page_to_virt(page);
+
+ __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
+ unsigned pos = 0;
+
+ do {
+ char *mem;
+ int r;
+ struct dm_io_request io_req;
+ struct dm_io_region io_loc;
+ io_req.bi_opf = REQ_OP_READ;
+ io_req.mem.type = DM_IO_KMEM;
+ io_req.mem.ptr.addr = buffer;
+ io_req.notify.fn = NULL;
+ io_req.client = ic->io;
+ io_loc.bdev = ic->dev->bdev;
+ io_loc.sector = sector;
+ io_loc.count = ic->sectors_per_block;
+
+ r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
+ if (unlikely(r)) {
+ dio->bi_status = errno_to_blk_status(r);
+ goto free_ret;
+ }
+
+ integrity_sector_checksum(ic, logical_sector, buffer, checksum);
+ r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block,
+ &dio->metadata_offset, ic->tag_size, TAG_CMP);
+ if (r) {
+ if (r > 0) {
+ DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
+ bio->bi_bdev, logical_sector);
+ atomic64_inc(&ic->number_of_mismatches);
+ dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
+ bio, logical_sector, 0);
+ r = -EILSEQ;
+ }
+ dio->bi_status = errno_to_blk_status(r);
+ goto free_ret;
+ }
+
+ mem = bvec_kmap_local(&bv);
+ memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT);
+ kunmap_local(mem);
+
+ pos += ic->sectors_per_block << SECTOR_SHIFT;
+ sector += ic->sectors_per_block;
+ logical_sector += ic->sectors_per_block;
+ } while (pos < bv.bv_len);
+ }
+free_ret:
+ mempool_free(page, &ic->recheck_pool);
+}
+
static void integrity_metadata(struct work_struct *w)
{
struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
@@ -1776,15 +1849,8 @@ again:
checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
if (unlikely(r)) {
if (r > 0) {
- sector_t s;
-
- s = sector - ((r + ic->tag_size - 1) / ic->tag_size);
- DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
- bio->bi_bdev, s);
- r = -EILSEQ;
- atomic64_inc(&ic->number_of_mismatches);
- dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
- bio, s, 0);
+ integrity_recheck(dio, checksums);
+ goto skip_io;
}
if (likely(checksums != checksums_onstack))
kfree(checksums);
@@ -2740,7 +2806,7 @@ next_chunk:
io_loc.sector = get_data_sector(ic, area, offset);
io_loc.count = n_sectors;
- r = dm_io(&io_req, 1, &io_loc, NULL);
+ r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
if (unlikely(r)) {
dm_integrity_io_error(ic, "reading data", r);
goto err;
@@ -3419,6 +3485,7 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
limits->dma_alignment = limits->logical_block_size - 1;
}
+ limits->max_integrity_segments = USHRT_MAX;
}
static void calculate_journal_section_size(struct dm_integrity_c *ic)
@@ -3586,7 +3653,6 @@ static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
blk_integrity_register(disk, &bi);
- blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
}
static void dm_integrity_free_page_list(struct page_list *pl)
@@ -4261,6 +4327,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
goto bad;
}
+ r = mempool_init_page_pool(&ic->recheck_pool, 1, 0);
+ if (r) {
+ ti->error = "Cannot allocate mempool";
+ goto bad;
+ }
+
ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE);
if (!ic->metadata_wq) {
@@ -4609,6 +4681,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
kvfree(ic->bbs);
if (ic->bufio)
dm_bufio_client_destroy(ic->bufio);
+ mempool_exit(&ic->recheck_pool);
mempool_exit(&ic->journal_io_mempool);
if (ic->io)
dm_io_client_destroy(ic->io);
@@ -4661,7 +4734,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
static struct target_type integrity_target = {
.name = "integrity",
- .version = {1, 10, 0},
+ .version = {1, 11, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index f053ce245814..7409490259d1 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -305,7 +305,7 @@ static void km_dp_init(struct dpages *dp, void *data)
*/
static void do_region(const blk_opf_t opf, unsigned int region,
struct dm_io_region *where, struct dpages *dp,
- struct io *io)
+ struct io *io, unsigned short ioprio)
{
struct bio *bio;
struct page *page;
@@ -354,6 +354,7 @@ static void do_region(const blk_opf_t opf, unsigned int region,
&io->client->bios);
bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
bio->bi_end_io = endio;
+ bio->bi_ioprio = ioprio;
store_io_and_region_in_bio(bio, io, region);
if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {
@@ -383,7 +384,7 @@ static void do_region(const blk_opf_t opf, unsigned int region,
static void dispatch_io(blk_opf_t opf, unsigned int num_regions,
struct dm_io_region *where, struct dpages *dp,
- struct io *io, int sync)
+ struct io *io, int sync, unsigned short ioprio)
{
int i;
struct dpages old_pages = *dp;
@@ -400,7 +401,7 @@ static void dispatch_io(blk_opf_t opf, unsigned int num_regions,
for (i = 0; i < num_regions; i++) {
*dp = old_pages;
if (where[i].count || (opf & REQ_PREFLUSH))
- do_region(opf, i, where + i, dp, io);
+ do_region(opf, i, where + i, dp, io, ioprio);
}
/*
@@ -425,7 +426,7 @@ static void sync_io_complete(unsigned long error, void *context)
static int sync_io(struct dm_io_client *client, unsigned int num_regions,
struct dm_io_region *where, blk_opf_t opf, struct dpages *dp,
- unsigned long *error_bits)
+ unsigned long *error_bits, unsigned short ioprio)
{
struct io *io;
struct sync_io sio;
@@ -447,7 +448,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
io->vma_invalidate_address = dp->vma_invalidate_address;
io->vma_invalidate_size = dp->vma_invalidate_size;
- dispatch_io(opf, num_regions, where, dp, io, 1);
+ dispatch_io(opf, num_regions, where, dp, io, 1, ioprio);
wait_for_completion_io(&sio.wait);
@@ -459,7 +460,8 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
static int async_io(struct dm_io_client *client, unsigned int num_regions,
struct dm_io_region *where, blk_opf_t opf,
- struct dpages *dp, io_notify_fn fn, void *context)
+ struct dpages *dp, io_notify_fn fn, void *context,
+ unsigned short ioprio)
{
struct io *io;
@@ -479,7 +481,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
io->vma_invalidate_address = dp->vma_invalidate_address;
io->vma_invalidate_size = dp->vma_invalidate_size;
- dispatch_io(opf, num_regions, where, dp, io, 0);
+ dispatch_io(opf, num_regions, where, dp, io, 0, ioprio);
return 0;
}
@@ -521,7 +523,8 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
}
int dm_io(struct dm_io_request *io_req, unsigned int num_regions,
- struct dm_io_region *where, unsigned long *sync_error_bits)
+ struct dm_io_region *where, unsigned long *sync_error_bits,
+ unsigned short ioprio)
{
int r;
struct dpages dp;
@@ -532,11 +535,11 @@ int dm_io(struct dm_io_request *io_req, unsigned int num_regions,
if (!io_req->notify.fn)
return sync_io(io_req->client, num_regions, where,
- io_req->bi_opf, &dp, sync_error_bits);
+ io_req->bi_opf, &dp, sync_error_bits, ioprio);
return async_io(io_req->client, num_regions, where,
io_req->bi_opf, &dp, io_req->notify.fn,
- io_req->notify.context);
+ io_req->notify.context, ioprio);
}
EXPORT_SYMBOL(dm_io);
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index e65058e0ed06..c2c07bfa6471 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -25,7 +25,7 @@
#include <linux/ima.h>
#define DM_MSG_PREFIX "ioctl"
-#define DM_DRIVER_EMAIL "dm-devel@redhat.com"
+#define DM_DRIVER_EMAIL "dm-devel@lists.linux.dev"
struct dm_file {
/*
@@ -1941,7 +1941,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
minimum_data_size - sizeof(param_kernel->version)))
return -EFAULT;
- if (param_kernel->data_size < minimum_data_size) {
+ if (unlikely(param_kernel->data_size < minimum_data_size) ||
+ unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) {
DMERR("Invalid data size in the ioctl structure: %u",
param_kernel->data_size);
return -EINVAL;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 36bcfdccae04..6ea75436a433 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -578,9 +578,9 @@ static int run_io_job(struct kcopyd_job *job)
io_job_start(job->kc->throttle);
if (job->op == REQ_OP_READ)
- r = dm_io(&io_req, 1, &job->source, NULL);
+ r = dm_io(&io_req, 1, &job->source, NULL, IOPRIO_DEFAULT);
else
- r = dm_io(&io_req, job->num_dests, job->dests, NULL);
+ r = dm_io(&io_req, job->num_dests, job->dests, NULL, IOPRIO_DEFAULT);
return r;
}
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index 7e4f27e86150..9fbb4b48fb2b 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -926,5 +926,5 @@ module_init(userspace_dirty_log_init);
module_exit(userspace_dirty_log_exit);
MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
-MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
+MODULE_AUTHOR("Jonathan Brassow <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index f9f84236dfcd..9d85d045f9d9 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -300,7 +300,7 @@ static int rw_header(struct log_c *lc, enum req_op op)
{
lc->io_req.bi_opf = op;
- return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
+ return dm_io(&lc->io_req, 1, &lc->header_location, NULL, IOPRIO_DEFAULT);
}
static int flush_header(struct log_c *lc)
@@ -313,7 +313,7 @@ static int flush_header(struct log_c *lc)
lc->io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
- return dm_io(&lc->io_req, 1, &null_location, NULL);
+ return dm_io(&lc->io_req, 1, &null_location, NULL, IOPRIO_DEFAULT);
}
static int read_header(struct log_c *log)
@@ -908,5 +908,5 @@ module_init(dm_dirty_log_init);
module_exit(dm_dirty_log_exit);
MODULE_DESCRIPTION(DM_NAME " dirty region log");
-MODULE_AUTHOR("Joe Thornber, Heinz Mauelshagen <dm-devel@redhat.com>");
+MODULE_AUTHOR("Joe Thornber, Heinz Mauelshagen <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index bea3cda9938e..05d1328d1811 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -2266,5 +2266,5 @@ module_param_named(queue_if_no_path_timeout_secs, queue_if_no_path_timeout_secs,
MODULE_PARM_DESC(queue_if_no_path_timeout_secs, "No available paths queue IO timeout in seconds");
MODULE_DESCRIPTION(DM_NAME " multipath target");
-MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
+MODULE_AUTHOR("Sistina Software <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c
index 0f04b673597a..d1745b123dc1 100644
--- a/drivers/md/dm-ps-round-robin.c
+++ b/drivers/md/dm-ps-round-robin.c
@@ -240,5 +240,5 @@ module_init(dm_rr_init);
module_exit(dm_rr_exit);
MODULE_DESCRIPTION(DM_NAME " round-robin multipath path selector");
-MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
+MODULE_AUTHOR("Sistina Software <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index eb009d6bb03a..abe88d1e6735 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -213,6 +213,7 @@ struct raid_dev {
#define RT_FLAG_RS_IN_SYNC 6
#define RT_FLAG_RS_RESYNCING 7
#define RT_FLAG_RS_GROW 8
+#define RT_FLAG_RS_FROZEN 9
/* Array elements of 64 bit needed for rebuild/failed disk bits */
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -3240,11 +3241,12 @@ size_check:
rs->md.ro = 1;
rs->md.in_sync = 1;
- /* Keep array frozen until resume. */
- set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
-
/* Has to be held on running the array */
mddev_suspend_and_lock_nointr(&rs->md);
+
+ /* Keep array frozen until resume. */
+ md_frozen_sync_thread(&rs->md);
+
r = md_run(&rs->md);
rs->md.in_sync = 0; /* Assume already marked dirty */
if (r) {
@@ -3329,17 +3331,18 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
struct mddev *mddev = &rs->md;
/*
- * If we're reshaping to add disk(s)), ti->len and
+ * If we're reshaping to add disk(s), ti->len and
* mddev->array_sectors will differ during the process
* (ti->len > mddev->array_sectors), so we have to requeue
* bios with addresses > mddev->array_sectors here or
* there will occur accesses past EOD of the component
* data images thus erroring the raid set.
*/
- if (unlikely(bio_end_sector(bio) > mddev->array_sectors))
+ if (unlikely(bio_has_data(bio) && bio_end_sector(bio) > mddev->array_sectors))
return DM_MAPIO_REQUEUE;
- md_handle_request(mddev, bio);
+ if (unlikely(!md_handle_request(mddev, bio)))
+ return DM_MAPIO_REQUEUE;
return DM_MAPIO_SUBMITTED;
}
@@ -3718,21 +3721,33 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
{
struct raid_set *rs = ti->private;
struct mddev *mddev = &rs->md;
+ int ret = 0;
if (!mddev->pers || !mddev->pers->sync_request)
return -EINVAL;
- if (!strcasecmp(argv[0], "frozen"))
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- else
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags) ||
+ test_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags))
+ return -EBUSY;
- if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
- if (mddev->sync_thread) {
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev);
- }
- } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
+ if (!strcasecmp(argv[0], "frozen")) {
+ ret = mddev_lock(mddev);
+ if (ret)
+ return ret;
+
+ md_frozen_sync_thread(mddev);
+ mddev_unlock(mddev);
+ } else if (!strcasecmp(argv[0], "idle")) {
+ ret = mddev_lock(mddev);
+ if (ret)
+ return ret;
+
+ md_idle_sync_thread(mddev);
+ mddev_unlock(mddev);
+ }
+
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
return -EBUSY;
else if (!strcasecmp(argv[0], "resync"))
; /* MD_RECOVERY_NEEDED set below */
@@ -3791,15 +3806,46 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
}
+static void raid_presuspend(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+ struct mddev *mddev = &rs->md;
+
+ /*
+ * From now on, disallow raid_message() to change sync_thread until
+ * resume, raid_postsuspend() is too late.
+ */
+ set_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
+
+ if (!reshape_interrupted(mddev))
+ return;
+
+ /*
+ * For raid456, if reshape is interrupted, IO across reshape position
+ * will never make progress, while caller will wait for IO to be done.
+ * Inform raid456 to handle those IO to prevent deadlock.
+ */
+ if (mddev->pers && mddev->pers->prepare_suspend)
+ mddev->pers->prepare_suspend(mddev);
+}
+
+static void raid_presuspend_undo(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+
+ clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
+}
+
static void raid_postsuspend(struct dm_target *ti)
{
struct raid_set *rs = ti->private;
if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
- /* Writes have to be stopped before suspending to avoid deadlocks. */
- if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
- md_stop_writes(&rs->md);
-
+ /*
+ * sync_thread must be stopped during suspend, and writes have
+ * to be stopped before suspending to avoid deadlocks.
+ */
+ md_stop_writes(&rs->md);
mddev_suspend(&rs->md, false);
}
}
@@ -4012,8 +4058,6 @@ static int raid_preresume(struct dm_target *ti)
}
/* Check for any resize/reshape on @rs and adjust/initiate */
- /* Be prepared for mddev_resume() in raid_resume() */
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
mddev->resync_min = mddev->recovery_cp;
@@ -4047,7 +4091,9 @@ static void raid_resume(struct dm_target *ti)
* Take this opportunity to check whether any failed
* devices are reachable again.
*/
+ mddev_lock_nointr(mddev);
attempt_restore_of_faulty_devices(rs);
+ mddev_unlock(mddev);
}
if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
@@ -4055,10 +4101,13 @@ static void raid_resume(struct dm_target *ti)
if (mddev->delta_disks < 0)
rs_set_capacity(rs);
+ WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery));
+ WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+ clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
mddev_lock_nointr(mddev);
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
mddev->ro = 0;
mddev->in_sync = 0;
+ md_unfrozen_sync_thread(mddev);
mddev_unlock_and_resume(mddev);
}
}
@@ -4074,6 +4123,8 @@ static struct target_type raid_target = {
.message = raid_message,
.iterate_devices = raid_iterate_devices,
.io_hints = raid_io_hints,
+ .presuspend = raid_presuspend,
+ .presuspend_undo = raid_presuspend_undo,
.postsuspend = raid_postsuspend,
.preresume = raid_preresume,
.resume = raid_resume,
@@ -4091,6 +4142,6 @@ MODULE_ALIAS("dm-raid10");
MODULE_ALIAS("dm-raid4");
MODULE_ALIAS("dm-raid5");
MODULE_ALIAS("dm-raid6");
-MODULE_AUTHOR("Neil Brown <dm-devel@redhat.com>");
-MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
+MODULE_AUTHOR("Neil Brown <dm-devel@lists.linux.dev>");
+MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index ddcb2bc4a617..9511dae5b556 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -278,7 +278,7 @@ static int mirror_flush(struct dm_target *ti)
}
error_bits = -1;
- dm_io(&io_req, ms->nr_mirrors, io, &error_bits);
+ dm_io(&io_req, ms->nr_mirrors, io, &error_bits, IOPRIO_DEFAULT);
if (unlikely(error_bits != 0)) {
for (i = 0; i < ms->nr_mirrors; i++)
if (test_bit(i, &error_bits))
@@ -554,7 +554,7 @@ static void read_async_bio(struct mirror *m, struct bio *bio)
map_region(&io, m, bio);
bio_set_m(bio, m);
- BUG_ON(dm_io(&io_req, 1, &io, NULL));
+ BUG_ON(dm_io(&io_req, 1, &io, NULL, IOPRIO_DEFAULT));
}
static inline int region_in_sync(struct mirror_set *ms, region_t region,
@@ -681,7 +681,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
*/
bio_set_m(bio, get_default_mirror(ms));
- BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL));
+ BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL, IOPRIO_DEFAULT));
}
static void do_writes(struct mirror_set *ms, struct bio_list *writes)
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 852cfa37d48a..a4550975c27d 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -723,5 +723,5 @@ void dm_rh_start_recovery(struct dm_region_hash *rh)
EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
MODULE_DESCRIPTION(DM_NAME " region hash");
-MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
+MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 15649921f2a9..568d10842b1f 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -223,7 +223,7 @@ static void do_metadata(struct work_struct *work)
{
struct mdata_req *req = container_of(work, struct mdata_req, work);
- req->result = dm_io(req->io_req, 1, req->where, NULL);
+ req->result = dm_io(req->io_req, 1, req->where, NULL, IOPRIO_DEFAULT);
}
/*
@@ -247,7 +247,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, blk_opf_t opf,
struct mdata_req req;
if (!metadata)
- return dm_io(&io_req, 1, &where, NULL);
+ return dm_io(&io_req, 1, &where, NULL, IOPRIO_DEFAULT);
req.where = &where;
req.io_req = &io_req;
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index bdc14ec99814..1e5d988f44da 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -66,6 +66,9 @@ struct dm_stats_last_position {
unsigned int last_rw;
};
+#define DM_STAT_MAX_ENTRIES 8388608
+#define DM_STAT_MAX_HISTOGRAM_ENTRIES 134217728
+
/*
* A typo on the command line could possibly make the kernel run out of memory
* and crash. To prevent the crash we account all used memory. We fail if we
@@ -285,6 +288,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
if (n_entries != (size_t)n_entries || !(size_t)(n_entries + 1))
return -EOVERFLOW;
+ if (n_entries > DM_STAT_MAX_ENTRIES)
+ return -EOVERFLOW;
+
shared_alloc_size = struct_size(s, stat_shared, n_entries);
if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries)
return -EOVERFLOW;
@@ -297,6 +303,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
if (histogram_alloc_size / (n_histogram_entries + 1) != (size_t)n_entries * sizeof(unsigned long long))
return -EOVERFLOW;
+ if ((n_histogram_entries + 1) * (size_t)n_entries > DM_STAT_MAX_HISTOGRAM_ENTRIES)
+ return -EOVERFLOW;
+
if (!check_shared_memory(shared_alloc_size + histogram_alloc_size +
num_possible_cpus() * (percpu_alloc_size + histogram_alloc_size)))
return -ENOMEM;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 260b5b8f2b0d..41f1d731ae5a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -129,7 +129,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
int dm_table_create(struct dm_table **result, blk_mode_t mode,
unsigned int num_targets, struct mapped_device *md)
{
- struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
+ struct dm_table *t;
+
+ if (num_targets > DM_MAX_TARGETS)
+ return -EOVERFLOW;
+
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
if (!t)
return -ENOMEM;
@@ -144,7 +149,7 @@ int dm_table_create(struct dm_table **result, blk_mode_t mode,
if (!num_targets) {
kfree(t);
- return -ENOMEM;
+ return -EOVERFLOW;
}
if (alloc_targets(t, num_targets)) {
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 07c7f9795b10..4793ad2aa1f7 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -453,12 +453,13 @@ static int bio_detain(struct pool *pool, struct dm_cell_key *key, struct bio *bi
cell_prealloc = dm_bio_prison_alloc_cell(pool->prison, GFP_NOIO);
r = dm_bio_detain(pool->prison, key, bio, cell_prealloc, cell_result);
- if (r)
+ if (r) {
/*
* We reused an old cell; we can get rid of
* the new one.
*/
dm_bio_prison_free_cell(pool->prison, cell_prealloc);
+ }
return r;
}
@@ -707,9 +708,10 @@ static void get_bio_block_range(struct thin_c *tc, struct bio *bio,
(void) sector_div(e, pool->sectors_per_block);
}
- if (e < b)
+ if (e < b) {
/* Can happen if the bio is within a single block. */
e = b;
+ }
*begin = b;
*end = e;
@@ -721,13 +723,14 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
sector_t bi_sector = bio->bi_iter.bi_sector;
bio_set_dev(bio, tc->pool_dev->bdev);
- if (block_size_is_power_of_two(pool))
+ if (block_size_is_power_of_two(pool)) {
bio->bi_iter.bi_sector =
(block << pool->sectors_per_block_shift) |
(bi_sector & (pool->sectors_per_block - 1));
- else
+ } else {
bio->bi_iter.bi_sector = (block * pool->sectors_per_block) +
sector_div(bi_sector, pool->sectors_per_block);
+ }
}
static void remap_to_origin(struct thin_c *tc, struct bio *bio)
@@ -1401,9 +1404,10 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
if (pool->pf.zero_new_blocks) {
if (io_overwrites_block(pool, bio))
remap_and_issue_overwrite(tc, bio, data_block, m);
- else
+ else {
ll_zero(tc, m, data_block * pool->sectors_per_block,
(data_block + 1) * pool->sectors_per_block);
+ }
} else
process_prepared_mapping(m);
}
@@ -1416,17 +1420,17 @@ static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
sector_t virt_block_begin = virt_block * pool->sectors_per_block;
sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
- if (virt_block_end <= tc->origin_size)
+ if (virt_block_end <= tc->origin_size) {
schedule_copy(tc, virt_block, tc->origin_dev,
virt_block, data_dest, cell, bio,
pool->sectors_per_block);
- else if (virt_block_begin < tc->origin_size)
+ } else if (virt_block_begin < tc->origin_size) {
schedule_copy(tc, virt_block, tc->origin_dev,
virt_block, data_dest, cell, bio,
tc->origin_size - virt_block_begin);
- else
+ } else
schedule_zero(tc, virt_block, data_dest, cell, bio);
}
@@ -4560,5 +4564,5 @@ module_param_named(no_space_timeout, no_space_timeout_secs, uint, 0644);
MODULE_PARM_DESC(no_space_timeout, "Out of data space queue IO timeout in seconds");
MODULE_DESCRIPTION(DM_NAME " thin provisioning target");
-MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-vdo/Kconfig b/drivers/md/dm-vdo/Kconfig
new file mode 100644
index 000000000000..111ecd2c2a24
--- /dev/null
+++ b/drivers/md/dm-vdo/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DM_VDO
+ tristate "VDO: deduplication and compression target"
+ depends on 64BIT
+ depends on BLK_DEV_DM
+ select DM_BUFIO
+ select LZ4_COMPRESS
+ select LZ4_DECOMPRESS
+ help
+ This device mapper target presents a block device with
+ deduplication, compression and thin-provisioning.
+
+ To compile this code as a module, choose M here: the module will
+ be called dm-vdo.
+
+ If unsure, say N.
diff --git a/drivers/md/dm-vdo/Makefile b/drivers/md/dm-vdo/Makefile
new file mode 100644
index 000000000000..33e09abc6acd
--- /dev/null
+++ b/drivers/md/dm-vdo/Makefile
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+ccflags-y := -I$(srctree)/$(src) -I$(srctree)/$(src)/indexer
+
+obj-$(CONFIG_DM_VDO) += dm-vdo.o
+
+dm-vdo-objs := \
+ action-manager.o \
+ admin-state.o \
+ block-map.o \
+ completion.o \
+ data-vio.o \
+ dedupe.o \
+ dm-vdo-target.o \
+ dump.o \
+ encodings.o \
+ errors.o \
+ flush.o \
+ funnel-queue.o \
+ funnel-workqueue.o \
+ int-map.o \
+ io-submitter.o \
+ logger.o \
+ logical-zone.o \
+ memory-alloc.o \
+ message-stats.o \
+ murmurhash3.o \
+ packer.o \
+ permassert.o \
+ physical-zone.o \
+ priority-table.o \
+ recovery-journal.o \
+ repair.o \
+ slab-depot.o \
+ status-codes.o \
+ string-utils.o \
+ thread-device.o \
+ thread-registry.o \
+ thread-utils.o \
+ vdo.o \
+ vio.o \
+ wait-queue.o \
+ indexer/chapter-index.o \
+ indexer/config.o \
+ indexer/delta-index.o \
+ indexer/funnel-requestqueue.o \
+ indexer/geometry.o \
+ indexer/index.o \
+ indexer/index-layout.o \
+ indexer/index-page-map.o \
+ indexer/index-session.o \
+ indexer/io-factory.o \
+ indexer/open-chapter.o \
+ indexer/radix-sort.o \
+ indexer/sparse-cache.o \
+ indexer/volume.o \
+ indexer/volume-index.o
diff --git a/drivers/md/dm-vdo/action-manager.c b/drivers/md/dm-vdo/action-manager.c
new file mode 100644
index 000000000000..a0e5e7077d13
--- /dev/null
+++ b/drivers/md/dm-vdo/action-manager.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "action-manager.h"
+
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "admin-state.h"
+#include "completion.h"
+#include "status-codes.h"
+#include "types.h"
+#include "vdo.h"
+
+/**
+ * struct action - An action to be performed in each of a set of zones.
+ * @in_use: Whether this structure is in use.
+ * @operation: The admin operation associated with this action.
+ * @preamble: The method to run on the initiator thread before the action is applied to each zone.
+ * @zone_action: The action to be performed in each zone.
+ * @conclusion: The method to run on the initiator thread after the action is applied to each zone.
+ * @parent: The object to notify when the action is complete.
+ * @context: The action specific context.
+ * @next: The action to perform after this one.
+ */
+struct action {
+ bool in_use;
+ const struct admin_state_code *operation;
+ vdo_action_preamble_fn preamble;
+ vdo_zone_action_fn zone_action;
+ vdo_action_conclusion_fn conclusion;
+ struct vdo_completion *parent;
+ void *context;
+ struct action *next;
+};
+
+/**
+ * struct action_manager - Definition of an action manager.
+ * @completion: The completion for performing actions.
+ * @state: The state of this action manager.
+ * @actions: The two action slots.
+ * @current_action: The current action slot.
+ * @zones: The number of zones in which an action is to be applied.
+ * @Scheduler: A function to schedule a default next action.
+ * @get_zone_thread_id: A function to get the id of the thread on which to apply an action to a
+ * zone.
+ * @initiator_thread_id: The ID of the thread on which actions may be initiated.
+ * @context: Opaque data associated with this action manager.
+ * @acting_zone: The zone currently being acted upon.
+ */
+struct action_manager {
+ struct vdo_completion completion;
+ struct admin_state state;
+ struct action actions[2];
+ struct action *current_action;
+ zone_count_t zones;
+ vdo_action_scheduler_fn scheduler;
+ vdo_zone_thread_getter_fn get_zone_thread_id;
+ thread_id_t initiator_thread_id;
+ void *context;
+ zone_count_t acting_zone;
+};
+
+static inline struct action_manager *as_action_manager(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_ACTION_COMPLETION);
+ return container_of(completion, struct action_manager, completion);
+}
+
+/* Implements vdo_action_scheduler_fn. */
+static bool no_default_action(void *context __always_unused)
+{
+ return false;
+}
+
+/* Implements vdo_action_preamble_fn. */
+static void no_preamble(void *context __always_unused, struct vdo_completion *completion)
+{
+ vdo_finish_completion(completion);
+}
+
+/* Implements vdo_action_conclusion_fn. */
+static int no_conclusion(void *context __always_unused)
+{
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_make_action_manager() - Make an action manager.
+ * @zones: The number of zones to which actions will be applied.
+ * @get_zone_thread_id: A function to get the thread id associated with a zone.
+ * @initiator_thread_id: The thread on which actions may initiated.
+ * @context: The object which holds the per-zone context for the action.
+ * @scheduler: A function to schedule a next action after an action concludes if there is no
+ * pending action (may be NULL).
+ * @vdo: The vdo used to initialize completions.
+ * @manager_ptr: A pointer to hold the new action manager.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+int vdo_make_action_manager(zone_count_t zones,
+ vdo_zone_thread_getter_fn get_zone_thread_id,
+ thread_id_t initiator_thread_id, void *context,
+ vdo_action_scheduler_fn scheduler, struct vdo *vdo,
+ struct action_manager **manager_ptr)
+{
+ struct action_manager *manager;
+ int result = vdo_allocate(1, struct action_manager, __func__, &manager);
+
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *manager = (struct action_manager) {
+ .zones = zones,
+ .scheduler =
+ ((scheduler == NULL) ? no_default_action : scheduler),
+ .get_zone_thread_id = get_zone_thread_id,
+ .initiator_thread_id = initiator_thread_id,
+ .context = context,
+ };
+
+ manager->actions[0].next = &manager->actions[1];
+ manager->current_action = manager->actions[1].next =
+ &manager->actions[0];
+ vdo_set_admin_state_code(&manager->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+ vdo_initialize_completion(&manager->completion, vdo, VDO_ACTION_COMPLETION);
+ *manager_ptr = manager;
+ return VDO_SUCCESS;
+}
+
+const struct admin_state_code *vdo_get_current_manager_operation(struct action_manager *manager)
+{
+ return vdo_get_admin_state_code(&manager->state);
+}
+
+void *vdo_get_current_action_context(struct action_manager *manager)
+{
+ return manager->current_action->in_use ? manager->current_action->context : NULL;
+}
+
+static void finish_action_callback(struct vdo_completion *completion);
+static void apply_to_zone(struct vdo_completion *completion);
+
+static thread_id_t get_acting_zone_thread_id(struct action_manager *manager)
+{
+ return manager->get_zone_thread_id(manager->context, manager->acting_zone);
+}
+
+static void preserve_error(struct vdo_completion *completion)
+{
+ if (completion->parent != NULL)
+ vdo_set_completion_result(completion->parent, completion->result);
+
+ vdo_reset_completion(completion);
+ vdo_run_completion(completion);
+}
+
+static void prepare_for_next_zone(struct action_manager *manager)
+{
+ vdo_prepare_completion_for_requeue(&manager->completion, apply_to_zone,
+ preserve_error,
+ get_acting_zone_thread_id(manager),
+ manager->current_action->parent);
+}
+
+static void prepare_for_conclusion(struct action_manager *manager)
+{
+ vdo_prepare_completion_for_requeue(&manager->completion, finish_action_callback,
+ preserve_error, manager->initiator_thread_id,
+ manager->current_action->parent);
+}
+
+static void apply_to_zone(struct vdo_completion *completion)
+{
+ zone_count_t zone;
+ struct action_manager *manager = as_action_manager(completion);
+
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() == get_acting_zone_thread_id(manager)),
+ "%s() called on acting zones's thread", __func__);
+
+ zone = manager->acting_zone++;
+ if (manager->acting_zone == manager->zones) {
+ /*
+ * We are about to apply to the last zone. Once that is finished, we're done, so go
+ * back to the initiator thread and finish up.
+ */
+ prepare_for_conclusion(manager);
+ } else {
+ /* Prepare to come back on the next zone */
+ prepare_for_next_zone(manager);
+ }
+
+ manager->current_action->zone_action(manager->context, zone, completion);
+}
+
+static void handle_preamble_error(struct vdo_completion *completion)
+{
+ /* Skip the zone actions since the preamble failed. */
+ completion->callback = finish_action_callback;
+ preserve_error(completion);
+}
+
+static void launch_current_action(struct action_manager *manager)
+{
+ struct action *action = manager->current_action;
+ int result = vdo_start_operation(&manager->state, action->operation);
+
+ if (result != VDO_SUCCESS) {
+ if (action->parent != NULL)
+ vdo_set_completion_result(action->parent, result);
+
+ /* We aren't going to run the preamble, so don't run the conclusion */
+ action->conclusion = no_conclusion;
+ finish_action_callback(&manager->completion);
+ return;
+ }
+
+ if (action->zone_action == NULL) {
+ prepare_for_conclusion(manager);
+ } else {
+ manager->acting_zone = 0;
+ vdo_prepare_completion_for_requeue(&manager->completion, apply_to_zone,
+ handle_preamble_error,
+ get_acting_zone_thread_id(manager),
+ manager->current_action->parent);
+ }
+
+ action->preamble(manager->context, &manager->completion);
+}
+
+/**
+ * vdo_schedule_default_action() - Attempt to schedule the default action.
+ * @manager: The action manager.
+ *
+ * If the manager is not operating normally, the action will not be scheduled.
+ *
+ * Return: true if an action was scheduled.
+ */
+bool vdo_schedule_default_action(struct action_manager *manager)
+{
+ /* Don't schedule a default action if we are operating or not in normal operation. */
+ const struct admin_state_code *code = vdo_get_current_manager_operation(manager);
+
+ return ((code == VDO_ADMIN_STATE_NORMAL_OPERATION) &&
+ manager->scheduler(manager->context));
+}
+
+static void finish_action_callback(struct vdo_completion *completion)
+{
+ bool has_next_action;
+ int result;
+ struct action_manager *manager = as_action_manager(completion);
+ struct action action = *(manager->current_action);
+
+ manager->current_action->in_use = false;
+ manager->current_action = manager->current_action->next;
+
+ /*
+ * We need to check this now to avoid use-after-free issues if running the conclusion or
+ * notifying the parent results in the manager being freed.
+ */
+ has_next_action =
+ (manager->current_action->in_use || vdo_schedule_default_action(manager));
+ result = action.conclusion(manager->context);
+ vdo_finish_operation(&manager->state, VDO_SUCCESS);
+ if (action.parent != NULL)
+ vdo_continue_completion(action.parent, result);
+
+ if (has_next_action)
+ launch_current_action(manager);
+}
+
+/**
+ * vdo_schedule_action() - Schedule an action to be applied to all zones.
+ * @manager: The action manager to schedule the action on.
+ * @preamble: A method to be invoked on the initiator thread once this action is started but before
+ * applying to each zone; may be NULL.
+ * @action: The action to apply to each zone; may be NULL.
+ * @conclusion: A method to be invoked back on the initiator thread once the action has been
+ * applied to all zones; may be NULL.
+ * @parent: The object to notify once the action is complete or if the action can not be scheduled;
+ * may be NULL.
+ *
+ * The action will be launched immediately if there is no current action, or as soon as the current
+ * action completes. If there is already a pending action, this action will not be scheduled, and,
+ * if it has a parent, that parent will be notified. At least one of the preamble, action, or
+ * conclusion must not be NULL.
+ *
+ * Return: true if the action was scheduled.
+ */
+bool vdo_schedule_action(struct action_manager *manager, vdo_action_preamble_fn preamble,
+ vdo_zone_action_fn action, vdo_action_conclusion_fn conclusion,
+ struct vdo_completion *parent)
+{
+ return vdo_schedule_operation(manager, VDO_ADMIN_STATE_OPERATING, preamble,
+ action, conclusion, parent);
+}
+
+/**
+ * vdo_schedule_operation() - Schedule an operation to be applied to all zones.
+ * @manager: The action manager to schedule the action on.
+ * @operation: The operation this action will perform
+ * @preamble: A method to be invoked on the initiator thread once this action is started but before
+ * applying to each zone; may be NULL.
+ * @action: The action to apply to each zone; may be NULL.
+ * @conclusion: A method to be invoked back on the initiator thread once the action has been
+ * applied to all zones; may be NULL.
+ * @parent: The object to notify once the action is complete or if the action can not be scheduled;
+ * may be NULL.
+ *
+ * The operation's action will be launched immediately if there is no current action, or as soon as
+ * the current action completes. If there is already a pending action, this operation will not be
+ * scheduled, and, if it has a parent, that parent will be notified. At least one of the preamble,
+ * action, or conclusion must not be NULL.
+ *
+ * Return: true if the action was scheduled.
+ */
+bool vdo_schedule_operation(struct action_manager *manager,
+ const struct admin_state_code *operation,
+ vdo_action_preamble_fn preamble, vdo_zone_action_fn action,
+ vdo_action_conclusion_fn conclusion,
+ struct vdo_completion *parent)
+{
+ return vdo_schedule_operation_with_context(manager, operation, preamble, action,
+ conclusion, NULL, parent);
+}
+
+/**
+ * vdo_schedule_operation_with_context() - Schedule an operation on all zones.
+ * @manager: The action manager to schedule the action on.
+ * @operation: The operation this action will perform.
+ * @preamble: A method to be invoked on the initiator thread once this action is started but before
+ * applying to each zone; may be NULL.
+ * @action: The action to apply to each zone; may be NULL.
+ * @conclusion: A method to be invoked back on the initiator thread once the action has been
+ * applied to all zones; may be NULL.
+ * @context: An action-specific context which may be retrieved via
+ * vdo_get_current_action_context(); may be NULL.
+ * @parent: The object to notify once the action is complete or if the action can not be scheduled;
+ * may be NULL.
+ *
+ * The operation's action will be launched immediately if there is no current action, or as soon as
+ * the current action completes. If there is already a pending action, this operation will not be
+ * scheduled, and, if it has a parent, that parent will be notified. At least one of the preamble,
+ * action, or conclusion must not be NULL.
+ *
+ * Return: true if the action was scheduled
+ */
+bool vdo_schedule_operation_with_context(struct action_manager *manager,
+ const struct admin_state_code *operation,
+ vdo_action_preamble_fn preamble,
+ vdo_zone_action_fn action,
+ vdo_action_conclusion_fn conclusion,
+ void *context, struct vdo_completion *parent)
+{
+ struct action *current_action;
+
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() == manager->initiator_thread_id),
+ "action initiated from correct thread");
+ if (!manager->current_action->in_use) {
+ current_action = manager->current_action;
+ } else if (!manager->current_action->next->in_use) {
+ current_action = manager->current_action->next;
+ } else {
+ if (parent != NULL)
+ vdo_continue_completion(parent, VDO_COMPONENT_BUSY);
+
+ return false;
+ }
+
+ *current_action = (struct action) {
+ .in_use = true,
+ .operation = operation,
+ .preamble = (preamble == NULL) ? no_preamble : preamble,
+ .zone_action = action,
+ .conclusion = (conclusion == NULL) ? no_conclusion : conclusion,
+ .context = context,
+ .parent = parent,
+ .next = current_action->next,
+ };
+
+ if (current_action == manager->current_action)
+ launch_current_action(manager);
+
+ return true;
+}
diff --git a/drivers/md/dm-vdo/action-manager.h b/drivers/md/dm-vdo/action-manager.h
new file mode 100644
index 000000000000..b0a8d3ddf3db
--- /dev/null
+++ b/drivers/md/dm-vdo/action-manager.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_ACTION_MANAGER_H
+#define VDO_ACTION_MANAGER_H
+
+#include "admin-state.h"
+#include "types.h"
+
+/*
+ * An action_manager provides a generic mechanism for applying actions to multi-zone entities (such
+ * as the block map or slab depot). Each action manager is tied to a specific context for which it
+ * manages actions. The manager ensures that only one action is active on that context at a time,
+ * and supports at most one pending action. Calls to schedule an action when there is already a
+ * pending action will result in VDO_COMPONENT_BUSY errors. Actions may only be submitted to the
+ * action manager from a single thread (which thread is determined when the action manager is
+ * constructed).
+ *
+ * A scheduled action consists of four components:
+ *
+ * preamble
+ * an optional method to be run on the initiator thread before applying the action to all zones
+ * zone_action
+ * an optional method to be applied to each of the zones
+ * conclusion
+ * an optional method to be run on the initiator thread once the per-zone method has been
+ * applied to all zones
+ * parent
+ * an optional completion to be finished once the conclusion is done
+ *
+ * At least one of the three methods must be provided.
+ */
+
+/*
+ * A function which is to be applied asynchronously to a set of zones.
+ * @context: The object which holds the per-zone context for the action
+ * @zone_number: The number of zone to which the action is being applied
+ * @parent: The object to notify when the action is complete
+ */
+typedef void (*vdo_zone_action_fn)(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent);
+
+/*
+ * A function which is to be applied asynchronously on an action manager's initiator thread as the
+ * preamble of an action.
+ * @context: The object which holds the per-zone context for the action
+ * @parent: The object to notify when the action is complete
+ */
+typedef void (*vdo_action_preamble_fn)(void *context, struct vdo_completion *parent);
+
+/*
+ * A function which will run on the action manager's initiator thread as the conclusion of an
+ * action.
+ * @context: The object which holds the per-zone context for the action
+ *
+ * Return: VDO_SUCCESS or an error
+ */
+typedef int (*vdo_action_conclusion_fn)(void *context);
+
+/*
+ * A function to schedule an action.
+ * @context: The object which holds the per-zone context for the action
+ *
+ * Return: true if an action was scheduled
+ */
+typedef bool (*vdo_action_scheduler_fn)(void *context);
+
+/*
+ * A function to get the id of the thread associated with a given zone.
+ * @context: The action context
+ * @zone_number: The number of the zone for which the thread ID is desired
+ */
+typedef thread_id_t (*vdo_zone_thread_getter_fn)(void *context, zone_count_t zone_number);
+
+struct action_manager;
+
+int __must_check vdo_make_action_manager(zone_count_t zones,
+ vdo_zone_thread_getter_fn get_zone_thread_id,
+ thread_id_t initiator_thread_id, void *context,
+ vdo_action_scheduler_fn scheduler,
+ struct vdo *vdo,
+ struct action_manager **manager_ptr);
+
+const struct admin_state_code *__must_check
+vdo_get_current_manager_operation(struct action_manager *manager);
+
+void * __must_check vdo_get_current_action_context(struct action_manager *manager);
+
+bool vdo_schedule_default_action(struct action_manager *manager);
+
+bool vdo_schedule_action(struct action_manager *manager, vdo_action_preamble_fn preamble,
+ vdo_zone_action_fn action, vdo_action_conclusion_fn conclusion,
+ struct vdo_completion *parent);
+
+bool vdo_schedule_operation(struct action_manager *manager,
+ const struct admin_state_code *operation,
+ vdo_action_preamble_fn preamble, vdo_zone_action_fn action,
+ vdo_action_conclusion_fn conclusion,
+ struct vdo_completion *parent);
+
+bool vdo_schedule_operation_with_context(struct action_manager *manager,
+ const struct admin_state_code *operation,
+ vdo_action_preamble_fn preamble,
+ vdo_zone_action_fn action,
+ vdo_action_conclusion_fn conclusion,
+ void *context, struct vdo_completion *parent);
+
+#endif /* VDO_ACTION_MANAGER_H */
diff --git a/drivers/md/dm-vdo/admin-state.c b/drivers/md/dm-vdo/admin-state.c
new file mode 100644
index 000000000000..3f9dba525154
--- /dev/null
+++ b/drivers/md/dm-vdo/admin-state.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "admin-state.h"
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "completion.h"
+#include "types.h"
+
+static const struct admin_state_code VDO_CODE_NORMAL_OPERATION = {
+ .name = "VDO_ADMIN_STATE_NORMAL_OPERATION",
+ .normal = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_NORMAL_OPERATION = &VDO_CODE_NORMAL_OPERATION;
+static const struct admin_state_code VDO_CODE_OPERATING = {
+ .name = "VDO_ADMIN_STATE_OPERATING",
+ .normal = true,
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_OPERATING = &VDO_CODE_OPERATING;
+static const struct admin_state_code VDO_CODE_FORMATTING = {
+ .name = "VDO_ADMIN_STATE_FORMATTING",
+ .operating = true,
+ .loading = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_FORMATTING = &VDO_CODE_FORMATTING;
+static const struct admin_state_code VDO_CODE_PRE_LOADING = {
+ .name = "VDO_ADMIN_STATE_PRE_LOADING",
+ .operating = true,
+ .loading = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_PRE_LOADING = &VDO_CODE_PRE_LOADING;
+static const struct admin_state_code VDO_CODE_PRE_LOADED = {
+ .name = "VDO_ADMIN_STATE_PRE_LOADED",
+};
+const struct admin_state_code *VDO_ADMIN_STATE_PRE_LOADED = &VDO_CODE_PRE_LOADED;
+static const struct admin_state_code VDO_CODE_LOADING = {
+ .name = "VDO_ADMIN_STATE_LOADING",
+ .normal = true,
+ .operating = true,
+ .loading = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_LOADING = &VDO_CODE_LOADING;
+static const struct admin_state_code VDO_CODE_LOADING_FOR_RECOVERY = {
+ .name = "VDO_ADMIN_STATE_LOADING_FOR_RECOVERY",
+ .operating = true,
+ .loading = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_LOADING_FOR_RECOVERY =
+ &VDO_CODE_LOADING_FOR_RECOVERY;
+static const struct admin_state_code VDO_CODE_LOADING_FOR_REBUILD = {
+ .name = "VDO_ADMIN_STATE_LOADING_FOR_REBUILD",
+ .operating = true,
+ .loading = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_LOADING_FOR_REBUILD = &VDO_CODE_LOADING_FOR_REBUILD;
+static const struct admin_state_code VDO_CODE_WAITING_FOR_RECOVERY = {
+ .name = "VDO_ADMIN_STATE_WAITING_FOR_RECOVERY",
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_WAITING_FOR_RECOVERY =
+ &VDO_CODE_WAITING_FOR_RECOVERY;
+static const struct admin_state_code VDO_CODE_NEW = {
+ .name = "VDO_ADMIN_STATE_NEW",
+ .quiescent = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_NEW = &VDO_CODE_NEW;
+static const struct admin_state_code VDO_CODE_INITIALIZED = {
+ .name = "VDO_ADMIN_STATE_INITIALIZED",
+};
+const struct admin_state_code *VDO_ADMIN_STATE_INITIALIZED = &VDO_CODE_INITIALIZED;
+static const struct admin_state_code VDO_CODE_RECOVERING = {
+ .name = "VDO_ADMIN_STATE_RECOVERING",
+ .draining = true,
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_RECOVERING = &VDO_CODE_RECOVERING;
+static const struct admin_state_code VDO_CODE_REBUILDING = {
+ .name = "VDO_ADMIN_STATE_REBUILDING",
+ .draining = true,
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_REBUILDING = &VDO_CODE_REBUILDING;
+static const struct admin_state_code VDO_CODE_SAVING = {
+ .name = "VDO_ADMIN_STATE_SAVING",
+ .draining = true,
+ .quiescing = true,
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_SAVING = &VDO_CODE_SAVING;
+static const struct admin_state_code VDO_CODE_SAVED = {
+ .name = "VDO_ADMIN_STATE_SAVED",
+ .quiescent = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_SAVED = &VDO_CODE_SAVED;
+static const struct admin_state_code VDO_CODE_SCRUBBING = {
+ .name = "VDO_ADMIN_STATE_SCRUBBING",
+ .draining = true,
+ .loading = true,
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_SCRUBBING = &VDO_CODE_SCRUBBING;
+static const struct admin_state_code VDO_CODE_SAVE_FOR_SCRUBBING = {
+ .name = "VDO_ADMIN_STATE_SAVE_FOR_SCRUBBING",
+ .draining = true,
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_SAVE_FOR_SCRUBBING = &VDO_CODE_SAVE_FOR_SCRUBBING;
+static const struct admin_state_code VDO_CODE_STOPPING = {
+ .name = "VDO_ADMIN_STATE_STOPPING",
+ .draining = true,
+ .quiescing = true,
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_STOPPING = &VDO_CODE_STOPPING;
+static const struct admin_state_code VDO_CODE_STOPPED = {
+ .name = "VDO_ADMIN_STATE_STOPPED",
+ .quiescent = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_STOPPED = &VDO_CODE_STOPPED;
+static const struct admin_state_code VDO_CODE_SUSPENDING = {
+ .name = "VDO_ADMIN_STATE_SUSPENDING",
+ .draining = true,
+ .quiescing = true,
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_SUSPENDING = &VDO_CODE_SUSPENDING;
+static const struct admin_state_code VDO_CODE_SUSPENDED = {
+ .name = "VDO_ADMIN_STATE_SUSPENDED",
+ .quiescent = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_SUSPENDED = &VDO_CODE_SUSPENDED;
+static const struct admin_state_code VDO_CODE_SUSPENDED_OPERATION = {
+ .name = "VDO_ADMIN_STATE_SUSPENDED_OPERATION",
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_SUSPENDED_OPERATION = &VDO_CODE_SUSPENDED_OPERATION;
+static const struct admin_state_code VDO_CODE_RESUMING = {
+ .name = "VDO_ADMIN_STATE_RESUMING",
+ .operating = true,
+};
+const struct admin_state_code *VDO_ADMIN_STATE_RESUMING = &VDO_CODE_RESUMING;
+
+/**
+ * get_next_state() - Determine the state which should be set after a given operation completes
+ * based on the operation and the current state.
+ * @operation The operation to be started.
+ *
+ * Return: The state to set when the operation completes or NULL if the operation can not be
+ * started in the current state.
+ */
+static const struct admin_state_code *get_next_state(const struct admin_state *state,
+ const struct admin_state_code *operation)
+{
+ const struct admin_state_code *code = vdo_get_admin_state_code(state);
+
+ if (code->operating)
+ return NULL;
+
+ if (operation == VDO_ADMIN_STATE_SAVING)
+ return (code == VDO_ADMIN_STATE_NORMAL_OPERATION ? VDO_ADMIN_STATE_SAVED : NULL);
+
+ if (operation == VDO_ADMIN_STATE_SUSPENDING) {
+ return (code == VDO_ADMIN_STATE_NORMAL_OPERATION
+ ? VDO_ADMIN_STATE_SUSPENDED
+ : NULL);
+ }
+
+ if (operation == VDO_ADMIN_STATE_STOPPING)
+ return (code == VDO_ADMIN_STATE_NORMAL_OPERATION ? VDO_ADMIN_STATE_STOPPED : NULL);
+
+ if (operation == VDO_ADMIN_STATE_PRE_LOADING)
+ return (code == VDO_ADMIN_STATE_INITIALIZED ? VDO_ADMIN_STATE_PRE_LOADED : NULL);
+
+ if (operation == VDO_ADMIN_STATE_SUSPENDED_OPERATION) {
+ return (((code == VDO_ADMIN_STATE_SUSPENDED) ||
+ (code == VDO_ADMIN_STATE_SAVED)) ? code : NULL);
+ }
+
+ return VDO_ADMIN_STATE_NORMAL_OPERATION;
+}
+
+/**
+ * vdo_finish_operation() - Finish the current operation.
+ *
+ * Will notify the operation waiter if there is one. This method should be used for operations
+ * started with vdo_start_operation(). For operations which were started with vdo_start_draining(),
+ * use vdo_finish_draining() instead.
+ *
+ * Return: true if there was an operation to finish.
+ */
+bool vdo_finish_operation(struct admin_state *state, int result)
+{
+ if (!vdo_get_admin_state_code(state)->operating)
+ return false;
+
+ state->complete = state->starting;
+ if (state->waiter != NULL)
+ vdo_set_completion_result(state->waiter, result);
+
+ if (!state->starting) {
+ vdo_set_admin_state_code(state, state->next_state);
+ if (state->waiter != NULL)
+ vdo_launch_completion(vdo_forget(state->waiter));
+ }
+
+ return true;
+}
+
+/**
+ * begin_operation() - Begin an operation if it may be started given the current state.
+ * @waiter A completion to notify when the operation is complete; may be NULL.
+ * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int __must_check begin_operation(struct admin_state *state,
+ const struct admin_state_code *operation,
+ struct vdo_completion *waiter,
+ vdo_admin_initiator_fn initiator)
+{
+ int result;
+ const struct admin_state_code *next_state = get_next_state(state, operation);
+
+ if (next_state == NULL) {
+ result = vdo_log_error_strerror(VDO_INVALID_ADMIN_STATE,
+ "Can't start %s from %s",
+ operation->name,
+ vdo_get_admin_state_code(state)->name);
+ } else if (state->waiter != NULL) {
+ result = vdo_log_error_strerror(VDO_COMPONENT_BUSY,
+ "Can't start %s with extant waiter",
+ operation->name);
+ } else {
+ state->waiter = waiter;
+ state->next_state = next_state;
+ vdo_set_admin_state_code(state, operation);
+ if (initiator != NULL) {
+ state->starting = true;
+ initiator(state);
+ state->starting = false;
+ if (state->complete)
+ vdo_finish_operation(state, VDO_SUCCESS);
+ }
+
+ return VDO_SUCCESS;
+ }
+
+ if (waiter != NULL)
+ vdo_continue_completion(waiter, result);
+
+ return result;
+}
+
+/**
+ * start_operation() - Start an operation if it may be started given the current state.
+ * @waiter A completion to notify when the operation is complete.
+ * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ *
+ * Return: true if the operation was started.
+ */
+static inline bool __must_check start_operation(struct admin_state *state,
+ const struct admin_state_code *operation,
+ struct vdo_completion *waiter,
+ vdo_admin_initiator_fn initiator)
+{
+ return (begin_operation(state, operation, waiter, initiator) == VDO_SUCCESS);
+}
+
+/**
+ * check_code() - Check the result of a state validation.
+ * @valid true if the code is of an appropriate type.
+ * @code The code which failed to be of the correct type.
+ * @what What the code failed to be, for logging.
+ * @waiter The completion to notify of the error; may be NULL.
+ *
+ * If the result failed, log an invalid state error and, if there is a waiter, notify it.
+ *
+ * Return: The result of the check.
+ */
+static bool check_code(bool valid, const struct admin_state_code *code, const char *what,
+ struct vdo_completion *waiter)
+{
+ int result;
+
+ if (valid)
+ return true;
+
+ result = vdo_log_error_strerror(VDO_INVALID_ADMIN_STATE,
+ "%s is not a %s", code->name, what);
+ if (waiter != NULL)
+ vdo_continue_completion(waiter, result);
+
+ return false;
+}
+
+/**
+ * assert_vdo_drain_operation() - Check that an operation is a drain.
+ * @waiter The completion to finish with an error if the operation is not a drain.
+ *
+ * Return: true if the specified operation is a drain.
+ */
+static bool __must_check assert_vdo_drain_operation(const struct admin_state_code *operation,
+ struct vdo_completion *waiter)
+{
+ return check_code(operation->draining, operation, "drain operation", waiter);
+}
+
+/**
+ * vdo_start_draining() - Initiate a drain operation if the current state permits it.
+ * @operation The type of drain to initiate.
+ * @waiter The completion to notify when the drain is complete.
+ * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ *
+ * Return: true if the drain was initiated, if not the waiter will be notified.
+ */
+bool vdo_start_draining(struct admin_state *state,
+ const struct admin_state_code *operation,
+ struct vdo_completion *waiter, vdo_admin_initiator_fn initiator)
+{
+ const struct admin_state_code *code = vdo_get_admin_state_code(state);
+
+ if (!assert_vdo_drain_operation(operation, waiter))
+ return false;
+
+ if (code->quiescent) {
+ vdo_launch_completion(waiter);
+ return false;
+ }
+
+ if (!code->normal) {
+ vdo_log_error_strerror(VDO_INVALID_ADMIN_STATE, "can't start %s from %s",
+ operation->name, code->name);
+ vdo_continue_completion(waiter, VDO_INVALID_ADMIN_STATE);
+ return false;
+ }
+
+ return start_operation(state, operation, waiter, initiator);
+}
+
+/**
+ * vdo_finish_draining() - Finish a drain operation if one was in progress.
+ *
+ * Return: true if the state was draining; will notify the waiter if so.
+ */
+bool vdo_finish_draining(struct admin_state *state)
+{
+ return vdo_finish_draining_with_result(state, VDO_SUCCESS);
+}
+
+/**
+ * vdo_finish_draining_with_result() - Finish a drain operation with a status code.
+ *
+ * Return: true if the state was draining; will notify the waiter if so.
+ */
+bool vdo_finish_draining_with_result(struct admin_state *state, int result)
+{
+ return (vdo_is_state_draining(state) && vdo_finish_operation(state, result));
+}
+
+/**
+ * vdo_assert_load_operation() - Check that an operation is a load.
+ * @waiter The completion to finish with an error if the operation is not a load.
+ *
+ * Return: true if the specified operation is a load.
+ */
+bool vdo_assert_load_operation(const struct admin_state_code *operation,
+ struct vdo_completion *waiter)
+{
+ return check_code(operation->loading, operation, "load operation", waiter);
+}
+
+/**
+ * vdo_start_loading() - Initiate a load operation if the current state permits it.
+ * @operation The type of load to initiate.
+ * @waiter The completion to notify when the load is complete (may be NULL).
+ * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ *
+ * Return: true if the load was initiated, if not the waiter will be notified.
+ */
+bool vdo_start_loading(struct admin_state *state,
+ const struct admin_state_code *operation,
+ struct vdo_completion *waiter, vdo_admin_initiator_fn initiator)
+{
+ return (vdo_assert_load_operation(operation, waiter) &&
+ start_operation(state, operation, waiter, initiator));
+}
+
+/**
+ * vdo_finish_loading() - Finish a load operation if one was in progress.
+ *
+ * Return: true if the state was loading; will notify the waiter if so.
+ */
+bool vdo_finish_loading(struct admin_state *state)
+{
+ return vdo_finish_loading_with_result(state, VDO_SUCCESS);
+}
+
+/**
+ * vdo_finish_loading_with_result() - Finish a load operation with a status code.
+ * @result The result of the load operation.
+ *
+ * Return: true if the state was loading; will notify the waiter if so.
+ */
+bool vdo_finish_loading_with_result(struct admin_state *state, int result)
+{
+ return (vdo_is_state_loading(state) && vdo_finish_operation(state, result));
+}
+
+/**
+ * assert_vdo_resume_operation() - Check whether an admin_state_code is a resume operation.
+ * @waiter The completion to notify if the operation is not a resume operation; may be NULL.
+ *
+ * Return: true if the code is a resume operation.
+ */
+static bool __must_check assert_vdo_resume_operation(const struct admin_state_code *operation,
+ struct vdo_completion *waiter)
+{
+ return check_code(operation == VDO_ADMIN_STATE_RESUMING, operation,
+ "resume operation", waiter);
+}
+
+/**
+ * vdo_start_resuming() - Initiate a resume operation if the current state permits it.
+ * @operation The type of resume to start.
+ * @waiter The completion to notify when the resume is complete (may be NULL).
+ * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ *
+ * Return: true if the resume was initiated, if not the waiter will be notified.
+ */
+bool vdo_start_resuming(struct admin_state *state,
+ const struct admin_state_code *operation,
+ struct vdo_completion *waiter, vdo_admin_initiator_fn initiator)
+{
+ return (assert_vdo_resume_operation(operation, waiter) &&
+ start_operation(state, operation, waiter, initiator));
+}
+
+/**
+ * vdo_finish_resuming() - Finish a resume operation if one was in progress.
+ *
+ * Return: true if the state was resuming; will notify the waiter if so.
+ */
+bool vdo_finish_resuming(struct admin_state *state)
+{
+ return vdo_finish_resuming_with_result(state, VDO_SUCCESS);
+}
+
+/**
+ * vdo_finish_resuming_with_result() - Finish a resume operation with a status code.
+ * @result The result of the resume operation.
+ *
+ * Return: true if the state was resuming; will notify the waiter if so.
+ */
+bool vdo_finish_resuming_with_result(struct admin_state *state, int result)
+{
+ return (vdo_is_state_resuming(state) && vdo_finish_operation(state, result));
+}
+
+/**
+ * vdo_resume_if_quiescent() - Change the state to normal operation if the current state is
+ * quiescent.
+ *
+ * Return: VDO_SUCCESS if the state resumed, VDO_INVALID_ADMIN_STATE otherwise.
+ */
+int vdo_resume_if_quiescent(struct admin_state *state)
+{
+ if (!vdo_is_state_quiescent(state))
+ return VDO_INVALID_ADMIN_STATE;
+
+ vdo_set_admin_state_code(state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_start_operation() - Attempt to start an operation.
+ *
+ * Return: VDO_SUCCESS if the operation was started, VDO_INVALID_ADMIN_STATE if not
+ */
+int vdo_start_operation(struct admin_state *state,
+ const struct admin_state_code *operation)
+{
+ return vdo_start_operation_with_waiter(state, operation, NULL, NULL);
+}
+
+/**
+ * vdo_start_operation_with_waiter() - Attempt to start an operation.
+ * @waiter the completion to notify when the operation completes or fails to start; may be NULL.
+ * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ *
+ * Return: VDO_SUCCESS if the operation was started, VDO_INVALID_ADMIN_STATE if not
+ */
+int vdo_start_operation_with_waiter(struct admin_state *state,
+ const struct admin_state_code *operation,
+ struct vdo_completion *waiter,
+ vdo_admin_initiator_fn initiator)
+{
+ return (check_code(operation->operating, operation, "operation", waiter) ?
+ begin_operation(state, operation, waiter, initiator) :
+ VDO_INVALID_ADMIN_STATE);
+}
diff --git a/drivers/md/dm-vdo/admin-state.h b/drivers/md/dm-vdo/admin-state.h
new file mode 100644
index 000000000000..a7d6ac2c30a6
--- /dev/null
+++ b/drivers/md/dm-vdo/admin-state.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_ADMIN_STATE_H
+#define VDO_ADMIN_STATE_H
+
+#include "completion.h"
+#include "types.h"
+
+struct admin_state_code {
+ const char *name;
+ /* Normal operation, data_vios may be active */
+ bool normal;
+ /* I/O is draining, new requests should not start */
+ bool draining;
+ /* This is a startup time operation */
+ bool loading;
+ /* The next state will be quiescent */
+ bool quiescing;
+ /* The VDO is quiescent, there should be no I/O */
+ bool quiescent;
+ /* Whether an operation is in progress and so no other operation may be started */
+ bool operating;
+};
+
+extern const struct admin_state_code *VDO_ADMIN_STATE_NORMAL_OPERATION;
+extern const struct admin_state_code *VDO_ADMIN_STATE_OPERATING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_FORMATTING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_PRE_LOADING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_PRE_LOADED;
+extern const struct admin_state_code *VDO_ADMIN_STATE_LOADING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_LOADING_FOR_RECOVERY;
+extern const struct admin_state_code *VDO_ADMIN_STATE_LOADING_FOR_REBUILD;
+extern const struct admin_state_code *VDO_ADMIN_STATE_WAITING_FOR_RECOVERY;
+extern const struct admin_state_code *VDO_ADMIN_STATE_NEW;
+extern const struct admin_state_code *VDO_ADMIN_STATE_INITIALIZED;
+extern const struct admin_state_code *VDO_ADMIN_STATE_RECOVERING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_REBUILDING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_SAVING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_SAVED;
+extern const struct admin_state_code *VDO_ADMIN_STATE_SCRUBBING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_SAVE_FOR_SCRUBBING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_STOPPING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_STOPPED;
+extern const struct admin_state_code *VDO_ADMIN_STATE_SUSPENDING;
+extern const struct admin_state_code *VDO_ADMIN_STATE_SUSPENDED;
+extern const struct admin_state_code *VDO_ADMIN_STATE_SUSPENDED_OPERATION;
+extern const struct admin_state_code *VDO_ADMIN_STATE_RESUMING;
+
+struct admin_state {
+ const struct admin_state_code *current_state;
+ /* The next administrative state (when the current operation finishes) */
+ const struct admin_state_code *next_state;
+ /* A completion waiting on a state change */
+ struct vdo_completion *waiter;
+ /* Whether an operation is being initiated */
+ bool starting;
+ /* Whether an operation has completed in the initiator */
+ bool complete;
+};
+
+/**
+ * typedef vdo_admin_initiator_fn - A method to be called once an admin operation may be initiated.
+ */
+typedef void (*vdo_admin_initiator_fn)(struct admin_state *state);
+
+static inline const struct admin_state_code * __must_check
+vdo_get_admin_state_code(const struct admin_state *state)
+{
+ return READ_ONCE(state->current_state);
+}
+
+/**
+ * vdo_set_admin_state_code() - Set the current admin state code.
+ *
+ * This function should be used primarily for initialization and by adminState internals. Most uses
+ * should go through the operation interfaces.
+ */
+static inline void vdo_set_admin_state_code(struct admin_state *state,
+ const struct admin_state_code *code)
+{
+ WRITE_ONCE(state->current_state, code);
+}
+
+static inline bool __must_check vdo_is_state_normal(const struct admin_state *state)
+{
+ return vdo_get_admin_state_code(state)->normal;
+}
+
+static inline bool __must_check vdo_is_state_suspending(const struct admin_state *state)
+{
+ return (vdo_get_admin_state_code(state) == VDO_ADMIN_STATE_SUSPENDING);
+}
+
+static inline bool __must_check vdo_is_state_saving(const struct admin_state *state)
+{
+ return (vdo_get_admin_state_code(state) == VDO_ADMIN_STATE_SAVING);
+}
+
+static inline bool __must_check vdo_is_state_saved(const struct admin_state *state)
+{
+ return (vdo_get_admin_state_code(state) == VDO_ADMIN_STATE_SAVED);
+}
+
+static inline bool __must_check vdo_is_state_draining(const struct admin_state *state)
+{
+ return vdo_get_admin_state_code(state)->draining;
+}
+
+static inline bool __must_check vdo_is_state_loading(const struct admin_state *state)
+{
+ return vdo_get_admin_state_code(state)->loading;
+}
+
+static inline bool __must_check vdo_is_state_resuming(const struct admin_state *state)
+{
+ return (vdo_get_admin_state_code(state) == VDO_ADMIN_STATE_RESUMING);
+}
+
+static inline bool __must_check vdo_is_state_clean_load(const struct admin_state *state)
+{
+ const struct admin_state_code *code = vdo_get_admin_state_code(state);
+
+ return ((code == VDO_ADMIN_STATE_FORMATTING) || (code == VDO_ADMIN_STATE_LOADING));
+}
+
+static inline bool __must_check vdo_is_state_quiescing(const struct admin_state *state)
+{
+ return vdo_get_admin_state_code(state)->quiescing;
+}
+
+static inline bool __must_check vdo_is_state_quiescent(const struct admin_state *state)
+{
+ return vdo_get_admin_state_code(state)->quiescent;
+}
+
+bool __must_check vdo_assert_load_operation(const struct admin_state_code *operation,
+ struct vdo_completion *waiter);
+
+bool vdo_start_loading(struct admin_state *state,
+ const struct admin_state_code *operation,
+ struct vdo_completion *waiter, vdo_admin_initiator_fn initiator);
+
+bool vdo_finish_loading(struct admin_state *state);
+
+bool vdo_finish_loading_with_result(struct admin_state *state, int result);
+
+bool vdo_start_resuming(struct admin_state *state,
+ const struct admin_state_code *operation,
+ struct vdo_completion *waiter, vdo_admin_initiator_fn initiator);
+
+bool vdo_finish_resuming(struct admin_state *state);
+
+bool vdo_finish_resuming_with_result(struct admin_state *state, int result);
+
+int vdo_resume_if_quiescent(struct admin_state *state);
+
+bool vdo_start_draining(struct admin_state *state,
+ const struct admin_state_code *operation,
+ struct vdo_completion *waiter, vdo_admin_initiator_fn initiator);
+
+bool vdo_finish_draining(struct admin_state *state);
+
+bool vdo_finish_draining_with_result(struct admin_state *state, int result);
+
+int vdo_start_operation(struct admin_state *state,
+ const struct admin_state_code *operation);
+
+int vdo_start_operation_with_waiter(struct admin_state *state,
+ const struct admin_state_code *operation,
+ struct vdo_completion *waiter,
+ vdo_admin_initiator_fn initiator);
+
+bool vdo_finish_operation(struct admin_state *state, int result);
+
+#endif /* VDO_ADMIN_STATE_H */
diff --git a/drivers/md/dm-vdo/block-map.c b/drivers/md/dm-vdo/block-map.c
new file mode 100644
index 000000000000..a0a7c1bd634e
--- /dev/null
+++ b/drivers/md/dm-vdo/block-map.c
@@ -0,0 +1,3318 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "block-map.h"
+
+#include <linux/bio.h>
+#include <linux/ratelimit.h>
+
+#include "errors.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "action-manager.h"
+#include "admin-state.h"
+#include "completion.h"
+#include "constants.h"
+#include "data-vio.h"
+#include "encodings.h"
+#include "io-submitter.h"
+#include "physical-zone.h"
+#include "recovery-journal.h"
+#include "slab-depot.h"
+#include "status-codes.h"
+#include "types.h"
+#include "vdo.h"
+#include "vio.h"
+#include "wait-queue.h"
+
+/**
+ * DOC: Block map eras
+ *
+ * The block map era, or maximum age, is used as follows:
+ *
+ * Each block map page, when dirty, records the earliest recovery journal block sequence number of
+ * the changes reflected in that dirty block. Sequence numbers are classified into eras: every
+ * @maximum_age sequence numbers, we switch to a new era. Block map pages are assigned to eras
+ * according to the sequence number they record.
+ *
+ * In the current (newest) era, block map pages are not written unless there is cache pressure. In
+ * the next oldest era, each time a new journal block is written 1/@maximum_age of the pages in
+ * this era are issued for write. In all older eras, pages are issued for write immediately.
+ */
+
+struct page_descriptor {
+ root_count_t root_index;
+ height_t height;
+ page_number_t page_index;
+ slot_number_t slot;
+} __packed;
+
+union page_key {
+ struct page_descriptor descriptor;
+ u64 key;
+};
+
+struct write_if_not_dirtied_context {
+ struct block_map_zone *zone;
+ u8 generation;
+};
+
+struct block_map_tree_segment {
+ struct tree_page *levels[VDO_BLOCK_MAP_TREE_HEIGHT];
+};
+
+struct block_map_tree {
+ struct block_map_tree_segment *segments;
+};
+
+struct forest {
+ struct block_map *map;
+ size_t segments;
+ struct boundary *boundaries;
+ struct tree_page **pages;
+ struct block_map_tree trees[];
+};
+
+struct cursor_level {
+ page_number_t page_index;
+ slot_number_t slot;
+};
+
+struct cursors;
+
+struct cursor {
+ struct vdo_waiter waiter;
+ struct block_map_tree *tree;
+ height_t height;
+ struct cursors *parent;
+ struct boundary boundary;
+ struct cursor_level levels[VDO_BLOCK_MAP_TREE_HEIGHT];
+ struct pooled_vio *vio;
+};
+
+struct cursors {
+ struct block_map_zone *zone;
+ struct vio_pool *pool;
+ vdo_entry_callback_fn entry_callback;
+ struct vdo_completion *completion;
+ root_count_t active_roots;
+ struct cursor cursors[];
+};
+
+static const physical_block_number_t NO_PAGE = 0xFFFFFFFFFFFFFFFF;
+
+/* Used to indicate that the page holding the location of a tree root has been "loaded". */
+static const physical_block_number_t VDO_INVALID_PBN = 0xFFFFFFFFFFFFFFFF;
+
+const struct block_map_entry UNMAPPED_BLOCK_MAP_ENTRY = {
+ .mapping_state = VDO_MAPPING_STATE_UNMAPPED & 0x0F,
+ .pbn_high_nibble = 0,
+ .pbn_low_word = __cpu_to_le32(VDO_ZERO_BLOCK & UINT_MAX),
+};
+
+#define LOG_INTERVAL 4000
+#define DISPLAY_INTERVAL 100000
+
+/*
+ * For adjusting VDO page cache statistic fields which are only mutated on the logical zone thread.
+ * Prevents any compiler shenanigans from affecting other threads reading those stats.
+ */
+#define ADD_ONCE(value, delta) WRITE_ONCE(value, (value) + (delta))
+
+static inline bool is_dirty(const struct page_info *info)
+{
+ return info->state == PS_DIRTY;
+}
+
+static inline bool is_present(const struct page_info *info)
+{
+ return (info->state == PS_RESIDENT) || (info->state == PS_DIRTY);
+}
+
+static inline bool is_in_flight(const struct page_info *info)
+{
+ return (info->state == PS_INCOMING) || (info->state == PS_OUTGOING);
+}
+
+static inline bool is_incoming(const struct page_info *info)
+{
+ return info->state == PS_INCOMING;
+}
+
+static inline bool is_outgoing(const struct page_info *info)
+{
+ return info->state == PS_OUTGOING;
+}
+
+static inline bool is_valid(const struct page_info *info)
+{
+ return is_present(info) || is_outgoing(info);
+}
+
+static char *get_page_buffer(struct page_info *info)
+{
+ struct vdo_page_cache *cache = info->cache;
+
+ return &cache->pages[(info - cache->infos) * VDO_BLOCK_SIZE];
+}
+
+static inline struct vdo_page_completion *page_completion_from_waiter(struct vdo_waiter *waiter)
+{
+ struct vdo_page_completion *completion;
+
+ if (waiter == NULL)
+ return NULL;
+
+ completion = container_of(waiter, struct vdo_page_completion, waiter);
+ vdo_assert_completion_type(&completion->completion, VDO_PAGE_COMPLETION);
+ return completion;
+}
+
+/**
+ * initialize_info() - Initialize all page info structures and put them on the free list.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int initialize_info(struct vdo_page_cache *cache)
+{
+ struct page_info *info;
+
+ INIT_LIST_HEAD(&cache->free_list);
+ for (info = cache->infos; info < cache->infos + cache->page_count; info++) {
+ int result;
+
+ info->cache = cache;
+ info->state = PS_FREE;
+ info->pbn = NO_PAGE;
+
+ result = create_metadata_vio(cache->vdo, VIO_TYPE_BLOCK_MAP,
+ VIO_PRIORITY_METADATA, info,
+ get_page_buffer(info), &info->vio);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /* The thread ID should never change. */
+ info->vio->completion.callback_thread_id = cache->zone->thread_id;
+
+ INIT_LIST_HEAD(&info->state_entry);
+ list_add_tail(&info->state_entry, &cache->free_list);
+ INIT_LIST_HEAD(&info->lru_entry);
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * allocate_cache_components() - Allocate components of the cache which require their own
+ * allocation.
+ * @maximum_age: The number of journal blocks before a dirtied page is considered old and must be
+ * written out.
+ *
+ * The caller is responsible for all clean up on errors.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int __must_check allocate_cache_components(struct vdo_page_cache *cache)
+{
+ u64 size = cache->page_count * (u64) VDO_BLOCK_SIZE;
+ int result;
+
+ result = vdo_allocate(cache->page_count, struct page_info, "page infos",
+ &cache->infos);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate_memory(size, VDO_BLOCK_SIZE, "cache pages", &cache->pages);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_int_map_create(cache->page_count, &cache->page_map);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ return initialize_info(cache);
+}
+
+/**
+ * assert_on_cache_thread() - Assert that a function has been called on the VDO page cache's
+ * thread.
+ */
+static inline void assert_on_cache_thread(struct vdo_page_cache *cache,
+ const char *function_name)
+{
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+
+ VDO_ASSERT_LOG_ONLY((thread_id == cache->zone->thread_id),
+ "%s() must only be called on cache thread %d, not thread %d",
+ function_name, cache->zone->thread_id, thread_id);
+}
+
+/** assert_io_allowed() - Assert that a page cache may issue I/O. */
+static inline void assert_io_allowed(struct vdo_page_cache *cache)
+{
+ VDO_ASSERT_LOG_ONLY(!vdo_is_state_quiescent(&cache->zone->state),
+ "VDO page cache may issue I/O");
+}
+
+/** report_cache_pressure() - Log and, if enabled, report cache pressure. */
+static void report_cache_pressure(struct vdo_page_cache *cache)
+{
+ ADD_ONCE(cache->stats.cache_pressure, 1);
+ if (cache->waiter_count > cache->page_count) {
+ if ((cache->pressure_report % LOG_INTERVAL) == 0)
+ vdo_log_info("page cache pressure %u", cache->stats.cache_pressure);
+
+ if (++cache->pressure_report >= DISPLAY_INTERVAL)
+ cache->pressure_report = 0;
+ }
+}
+
+/**
+ * get_page_state_name() - Return the name of a page state.
+ *
+ * If the page state is invalid a static string is returned and the invalid state is logged.
+ *
+ * Return: A pointer to a static page state name.
+ */
+static const char * __must_check get_page_state_name(enum vdo_page_buffer_state state)
+{
+ int result;
+ static const char * const state_names[] = {
+ "FREE", "INCOMING", "FAILED", "RESIDENT", "DIRTY", "OUTGOING"
+ };
+
+ BUILD_BUG_ON(ARRAY_SIZE(state_names) != PAGE_STATE_COUNT);
+
+ result = VDO_ASSERT(state < ARRAY_SIZE(state_names),
+ "Unknown page_state value %d", state);
+ if (result != VDO_SUCCESS)
+ return "[UNKNOWN PAGE STATE]";
+
+ return state_names[state];
+}
+
+/**
+ * update_counter() - Update the counter associated with a given state.
+ * @info: The page info to count.
+ * @delta: The delta to apply to the counter.
+ */
+static void update_counter(struct page_info *info, s32 delta)
+{
+ struct block_map_statistics *stats = &info->cache->stats;
+
+ switch (info->state) {
+ case PS_FREE:
+ ADD_ONCE(stats->free_pages, delta);
+ return;
+
+ case PS_INCOMING:
+ ADD_ONCE(stats->incoming_pages, delta);
+ return;
+
+ case PS_OUTGOING:
+ ADD_ONCE(stats->outgoing_pages, delta);
+ return;
+
+ case PS_FAILED:
+ ADD_ONCE(stats->failed_pages, delta);
+ return;
+
+ case PS_RESIDENT:
+ ADD_ONCE(stats->clean_pages, delta);
+ return;
+
+ case PS_DIRTY:
+ ADD_ONCE(stats->dirty_pages, delta);
+ return;
+
+ default:
+ return;
+ }
+}
+
+/** update_lru() - Update the lru information for an active page. */
+static void update_lru(struct page_info *info)
+{
+ if (info->cache->lru_list.prev != &info->lru_entry)
+ list_move_tail(&info->lru_entry, &info->cache->lru_list);
+}
+
+/**
+ * set_info_state() - Set the state of a page_info and put it on the right list, adjusting
+ * counters.
+ */
+static void set_info_state(struct page_info *info, enum vdo_page_buffer_state new_state)
+{
+ if (new_state == info->state)
+ return;
+
+ update_counter(info, -1);
+ info->state = new_state;
+ update_counter(info, 1);
+
+ switch (info->state) {
+ case PS_FREE:
+ case PS_FAILED:
+ list_move_tail(&info->state_entry, &info->cache->free_list);
+ return;
+
+ case PS_OUTGOING:
+ list_move_tail(&info->state_entry, &info->cache->outgoing_list);
+ return;
+
+ case PS_DIRTY:
+ return;
+
+ default:
+ list_del_init(&info->state_entry);
+ }
+}
+
+/** set_info_pbn() - Set the pbn for an info, updating the map as needed. */
+static int __must_check set_info_pbn(struct page_info *info, physical_block_number_t pbn)
+{
+ struct vdo_page_cache *cache = info->cache;
+
+ /* Either the new or the old page number must be NO_PAGE. */
+ int result = VDO_ASSERT((pbn == NO_PAGE) || (info->pbn == NO_PAGE),
+ "Must free a page before reusing it.");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (info->pbn != NO_PAGE)
+ vdo_int_map_remove(cache->page_map, info->pbn);
+
+ info->pbn = pbn;
+
+ if (pbn != NO_PAGE) {
+ result = vdo_int_map_put(cache->page_map, pbn, info, true, NULL);
+ if (result != VDO_SUCCESS)
+ return result;
+ }
+ return VDO_SUCCESS;
+}
+
+/** reset_page_info() - Reset page info to represent an unallocated page. */
+static int reset_page_info(struct page_info *info)
+{
+ int result;
+
+ result = VDO_ASSERT(info->busy == 0, "VDO Page must not be busy");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(!vdo_waitq_has_waiters(&info->waiting),
+ "VDO Page must not have waiters");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = set_info_pbn(info, NO_PAGE);
+ set_info_state(info, PS_FREE);
+ list_del_init(&info->lru_entry);
+ return result;
+}
+
+/**
+ * find_free_page() - Find a free page.
+ *
+ * Return: A pointer to the page info structure (if found), NULL otherwise.
+ */
+static struct page_info * __must_check find_free_page(struct vdo_page_cache *cache)
+{
+ struct page_info *info;
+
+ info = list_first_entry_or_null(&cache->free_list, struct page_info,
+ state_entry);
+ if (info != NULL)
+ list_del_init(&info->state_entry);
+
+ return info;
+}
+
+/**
+ * find_page() - Find the page info (if any) associated with a given pbn.
+ * @pbn: The absolute physical block number of the page.
+ *
+ * Return: The page info for the page if available, or NULL if not.
+ */
+static struct page_info * __must_check find_page(struct vdo_page_cache *cache,
+ physical_block_number_t pbn)
+{
+ if ((cache->last_found != NULL) && (cache->last_found->pbn == pbn))
+ return cache->last_found;
+
+ cache->last_found = vdo_int_map_get(cache->page_map, pbn);
+ return cache->last_found;
+}
+
+/**
+ * select_lru_page() - Determine which page is least recently used.
+ *
+ * Picks the least recently used from among the non-busy entries at the front of each of the lru
+ * ring. Since whenever we mark a page busy we also put it to the end of the ring it is unlikely
+ * that the entries at the front are busy unless the queue is very short, but not impossible.
+ *
+ * Return: A pointer to the info structure for a relevant page, or NULL if no such page can be
+ * found. The page can be dirty or resident.
+ */
+static struct page_info * __must_check select_lru_page(struct vdo_page_cache *cache)
+{
+ struct page_info *info;
+
+ list_for_each_entry(info, &cache->lru_list, lru_entry)
+ if ((info->busy == 0) && !is_in_flight(info))
+ return info;
+
+ return NULL;
+}
+
+/* ASYNCHRONOUS INTERFACE BEYOND THIS POINT */
+
+/**
+ * complete_with_page() - Helper to complete the VDO Page Completion request successfully.
+ * @info: The page info representing the result page.
+ * @vdo_page_comp: The VDO page completion to complete.
+ */
+static void complete_with_page(struct page_info *info,
+ struct vdo_page_completion *vdo_page_comp)
+{
+ bool available = vdo_page_comp->writable ? is_present(info) : is_valid(info);
+
+ if (!available) {
+ vdo_log_error_strerror(VDO_BAD_PAGE,
+ "Requested cache page %llu in state %s is not %s",
+ (unsigned long long) info->pbn,
+ get_page_state_name(info->state),
+ vdo_page_comp->writable ? "present" : "valid");
+ vdo_fail_completion(&vdo_page_comp->completion, VDO_BAD_PAGE);
+ return;
+ }
+
+ vdo_page_comp->info = info;
+ vdo_page_comp->ready = true;
+ vdo_finish_completion(&vdo_page_comp->completion);
+}
+
+/**
+ * complete_waiter_with_error() - Complete a page completion with an error code.
+ * @waiter: The page completion, as a waiter.
+ * @result_ptr: A pointer to the error code.
+ *
+ * Implements waiter_callback_fn.
+ */
+static void complete_waiter_with_error(struct vdo_waiter *waiter, void *result_ptr)
+{
+ int *result = result_ptr;
+
+ vdo_fail_completion(&page_completion_from_waiter(waiter)->completion, *result);
+}
+
+/**
+ * complete_waiter_with_page() - Complete a page completion with a page.
+ * @waiter: The page completion, as a waiter.
+ * @page_info: The page info to complete with.
+ *
+ * Implements waiter_callback_fn.
+ */
+static void complete_waiter_with_page(struct vdo_waiter *waiter, void *page_info)
+{
+ complete_with_page(page_info, page_completion_from_waiter(waiter));
+}
+
+/**
+ * distribute_page_over_waitq() - Complete a waitq of VDO page completions with a page result.
+ *
+ * Upon completion the waitq will be empty.
+ *
+ * Return: The number of pages distributed.
+ */
+static unsigned int distribute_page_over_waitq(struct page_info *info,
+ struct vdo_wait_queue *waitq)
+{
+ size_t num_pages;
+
+ update_lru(info);
+ num_pages = vdo_waitq_num_waiters(waitq);
+
+ /*
+ * Increment the busy count once for each pending completion so that this page does not
+ * stop being busy until all completions have been processed.
+ */
+ info->busy += num_pages;
+
+ vdo_waitq_notify_all_waiters(waitq, complete_waiter_with_page, info);
+ return num_pages;
+}
+
+/**
+ * set_persistent_error() - Set a persistent error which all requests will receive in the future.
+ * @context: A string describing what triggered the error.
+ *
+ * Once triggered, all enqueued completions will get this error. Any future requests will result in
+ * this error as well.
+ */
+static void set_persistent_error(struct vdo_page_cache *cache, const char *context,
+ int result)
+{
+ struct page_info *info;
+ /* If we're already read-only, there's no need to log. */
+ struct vdo *vdo = cache->vdo;
+
+ if ((result != VDO_READ_ONLY) && !vdo_is_read_only(vdo)) {
+ vdo_log_error_strerror(result, "VDO Page Cache persistent error: %s",
+ context);
+ vdo_enter_read_only_mode(vdo, result);
+ }
+
+ assert_on_cache_thread(cache, __func__);
+
+ vdo_waitq_notify_all_waiters(&cache->free_waiters,
+ complete_waiter_with_error, &result);
+ cache->waiter_count = 0;
+
+ for (info = cache->infos; info < cache->infos + cache->page_count; info++) {
+ vdo_waitq_notify_all_waiters(&info->waiting,
+ complete_waiter_with_error, &result);
+ }
+}
+
+/**
+ * validate_completed_page() - Check that a page completion which is being freed to the cache
+ * referred to a valid page and is in a valid state.
+ * @writable: Whether a writable page is required.
+ *
+ * Return: VDO_SUCCESS if the page was valid, otherwise as error
+ */
+static int __must_check validate_completed_page(struct vdo_page_completion *completion,
+ bool writable)
+{
+ int result;
+
+ result = VDO_ASSERT(completion->ready, "VDO Page completion not ready");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(completion->info != NULL,
+ "VDO Page Completion must be complete");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(completion->info->pbn == completion->pbn,
+ "VDO Page Completion pbn must be consistent");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(is_valid(completion->info),
+ "VDO Page Completion page must be valid");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (writable) {
+ result = VDO_ASSERT(completion->writable,
+ "VDO Page Completion must be writable");
+ if (result != VDO_SUCCESS)
+ return result;
+ }
+
+ return VDO_SUCCESS;
+}
+
+static void check_for_drain_complete(struct block_map_zone *zone)
+{
+ if (vdo_is_state_draining(&zone->state) &&
+ (zone->active_lookups == 0) &&
+ !vdo_waitq_has_waiters(&zone->flush_waiters) &&
+ !is_vio_pool_busy(zone->vio_pool) &&
+ (zone->page_cache.outstanding_reads == 0) &&
+ (zone->page_cache.outstanding_writes == 0)) {
+ vdo_finish_draining_with_result(&zone->state,
+ (vdo_is_read_only(zone->block_map->vdo) ?
+ VDO_READ_ONLY : VDO_SUCCESS));
+ }
+}
+
+static void enter_zone_read_only_mode(struct block_map_zone *zone, int result)
+{
+ vdo_enter_read_only_mode(zone->block_map->vdo, result);
+
+ /*
+ * We are in read-only mode, so we won't ever write any page out.
+ * Just take all waiters off the waitq so the zone can drain.
+ */
+ vdo_waitq_init(&zone->flush_waiters);
+ check_for_drain_complete(zone);
+}
+
+static bool __must_check
+validate_completed_page_or_enter_read_only_mode(struct vdo_page_completion *completion,
+ bool writable)
+{
+ int result = validate_completed_page(completion, writable);
+
+ if (result == VDO_SUCCESS)
+ return true;
+
+ enter_zone_read_only_mode(completion->info->cache->zone, result);
+ return false;
+}
+
+/**
+ * handle_load_error() - Handle page load errors.
+ * @completion: The page read vio.
+ */
+static void handle_load_error(struct vdo_completion *completion)
+{
+ int result = completion->result;
+ struct page_info *info = completion->parent;
+ struct vdo_page_cache *cache = info->cache;
+
+ assert_on_cache_thread(cache, __func__);
+ vio_record_metadata_io_error(as_vio(completion));
+ vdo_enter_read_only_mode(cache->zone->block_map->vdo, result);
+ ADD_ONCE(cache->stats.failed_reads, 1);
+ set_info_state(info, PS_FAILED);
+ vdo_waitq_notify_all_waiters(&info->waiting, complete_waiter_with_error, &result);
+ reset_page_info(info);
+
+ /*
+ * Don't decrement until right before calling check_for_drain_complete() to
+ * ensure that the above work can't cause the page cache to be freed out from under us.
+ */
+ cache->outstanding_reads--;
+ check_for_drain_complete(cache->zone);
+}
+
+/**
+ * page_is_loaded() - Callback used when a page has been loaded.
+ * @completion: The vio which has loaded the page. Its parent is the page_info.
+ */
+static void page_is_loaded(struct vdo_completion *completion)
+{
+ struct page_info *info = completion->parent;
+ struct vdo_page_cache *cache = info->cache;
+ nonce_t nonce = info->cache->zone->block_map->nonce;
+ struct block_map_page *page;
+ enum block_map_page_validity validity;
+
+ assert_on_cache_thread(cache, __func__);
+
+ page = (struct block_map_page *) get_page_buffer(info);
+ validity = vdo_validate_block_map_page(page, nonce, info->pbn);
+ if (validity == VDO_BLOCK_MAP_PAGE_BAD) {
+ physical_block_number_t pbn = vdo_get_block_map_page_pbn(page);
+ int result = vdo_log_error_strerror(VDO_BAD_PAGE,
+ "Expected page %llu but got page %llu instead",
+ (unsigned long long) info->pbn,
+ (unsigned long long) pbn);
+
+ vdo_continue_completion(completion, result);
+ return;
+ }
+
+ if (validity == VDO_BLOCK_MAP_PAGE_INVALID)
+ vdo_format_block_map_page(page, nonce, info->pbn, false);
+
+ info->recovery_lock = 0;
+ set_info_state(info, PS_RESIDENT);
+ distribute_page_over_waitq(info, &info->waiting);
+
+ /*
+ * Don't decrement until right before calling check_for_drain_complete() to
+ * ensure that the above work can't cause the page cache to be freed out from under us.
+ */
+ cache->outstanding_reads--;
+ check_for_drain_complete(cache->zone);
+}
+
+/**
+ * handle_rebuild_read_error() - Handle a read error during a read-only rebuild.
+ * @completion: The page load completion.
+ */
+static void handle_rebuild_read_error(struct vdo_completion *completion)
+{
+ struct page_info *info = completion->parent;
+ struct vdo_page_cache *cache = info->cache;
+
+ assert_on_cache_thread(cache, __func__);
+
+ /*
+ * We are doing a read-only rebuild, so treat this as a successful read
+ * of an uninitialized page.
+ */
+ vio_record_metadata_io_error(as_vio(completion));
+ ADD_ONCE(cache->stats.failed_reads, 1);
+ memset(get_page_buffer(info), 0, VDO_BLOCK_SIZE);
+ vdo_reset_completion(completion);
+ page_is_loaded(completion);
+}
+
+static void load_cache_page_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct page_info *info = vio->completion.parent;
+
+ continue_vio_after_io(vio, page_is_loaded, info->cache->zone->thread_id);
+}
+
+/**
+ * launch_page_load() - Begin the process of loading a page.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int __must_check launch_page_load(struct page_info *info,
+ physical_block_number_t pbn)
+{
+ int result;
+ vdo_action_fn callback;
+ struct vdo_page_cache *cache = info->cache;
+
+ assert_io_allowed(cache);
+
+ result = set_info_pbn(info, pbn);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT((info->busy == 0), "Page is not busy before loading.");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ set_info_state(info, PS_INCOMING);
+ cache->outstanding_reads++;
+ ADD_ONCE(cache->stats.pages_loaded, 1);
+ callback = (cache->rebuilding ? handle_rebuild_read_error : handle_load_error);
+ vdo_submit_metadata_vio(info->vio, pbn, load_cache_page_endio,
+ callback, REQ_OP_READ | REQ_PRIO);
+ return VDO_SUCCESS;
+}
+
+static void write_pages(struct vdo_completion *completion);
+
+/** handle_flush_error() - Handle errors flushing the layer. */
+static void handle_flush_error(struct vdo_completion *completion)
+{
+ struct page_info *info = completion->parent;
+
+ vio_record_metadata_io_error(as_vio(completion));
+ set_persistent_error(info->cache, "flush failed", completion->result);
+ write_pages(completion);
+}
+
+static void flush_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct page_info *info = vio->completion.parent;
+
+ continue_vio_after_io(vio, write_pages, info->cache->zone->thread_id);
+}
+
+/** save_pages() - Attempt to save the outgoing pages by first flushing the layer. */
+static void save_pages(struct vdo_page_cache *cache)
+{
+ struct page_info *info;
+ struct vio *vio;
+
+ if ((cache->pages_in_flush > 0) || (cache->pages_to_flush == 0))
+ return;
+
+ assert_io_allowed(cache);
+
+ info = list_first_entry(&cache->outgoing_list, struct page_info, state_entry);
+
+ cache->pages_in_flush = cache->pages_to_flush;
+ cache->pages_to_flush = 0;
+ ADD_ONCE(cache->stats.flush_count, 1);
+
+ vio = info->vio;
+
+ /*
+ * We must make sure that the recovery journal entries that changed these pages were
+ * successfully persisted, and thus must issue a flush before each batch of pages is
+ * written to ensure this.
+ */
+ vdo_submit_flush_vio(vio, flush_endio, handle_flush_error);
+}
+
+/**
+ * schedule_page_save() - Add a page to the outgoing list of pages waiting to be saved.
+ *
+ * Once in the list, a page may not be used until it has been written out.
+ */
+static void schedule_page_save(struct page_info *info)
+{
+ if (info->busy > 0) {
+ info->write_status = WRITE_STATUS_DEFERRED;
+ return;
+ }
+
+ info->cache->pages_to_flush++;
+ info->cache->outstanding_writes++;
+ set_info_state(info, PS_OUTGOING);
+}
+
+/**
+ * launch_page_save() - Add a page to outgoing pages waiting to be saved, and then start saving
+ * pages if another save is not in progress.
+ */
+static void launch_page_save(struct page_info *info)
+{
+ schedule_page_save(info);
+ save_pages(info->cache);
+}
+
+/**
+ * completion_needs_page() - Determine whether a given vdo_page_completion (as a waiter) is
+ * requesting a given page number.
+ * @context: A pointer to the pbn of the desired page.
+ *
+ * Implements waiter_match_fn.
+ *
+ * Return: true if the page completion is for the desired page number.
+ */
+static bool completion_needs_page(struct vdo_waiter *waiter, void *context)
+{
+ physical_block_number_t *pbn = context;
+
+ return (page_completion_from_waiter(waiter)->pbn == *pbn);
+}
+
+/**
+ * allocate_free_page() - Allocate a free page to the first completion in the waiting queue, and
+ * any other completions that match it in page number.
+ */
+static void allocate_free_page(struct page_info *info)
+{
+ int result;
+ struct vdo_waiter *oldest_waiter;
+ physical_block_number_t pbn;
+ struct vdo_page_cache *cache = info->cache;
+
+ assert_on_cache_thread(cache, __func__);
+
+ if (!vdo_waitq_has_waiters(&cache->free_waiters)) {
+ if (cache->stats.cache_pressure > 0) {
+ vdo_log_info("page cache pressure relieved");
+ WRITE_ONCE(cache->stats.cache_pressure, 0);
+ }
+
+ return;
+ }
+
+ result = reset_page_info(info);
+ if (result != VDO_SUCCESS) {
+ set_persistent_error(cache, "cannot reset page info", result);
+ return;
+ }
+
+ oldest_waiter = vdo_waitq_get_first_waiter(&cache->free_waiters);
+ pbn = page_completion_from_waiter(oldest_waiter)->pbn;
+
+ /*
+ * Remove all entries which match the page number in question and push them onto the page
+ * info's waitq.
+ */
+ vdo_waitq_dequeue_matching_waiters(&cache->free_waiters, completion_needs_page,
+ &pbn, &info->waiting);
+ cache->waiter_count -= vdo_waitq_num_waiters(&info->waiting);
+
+ result = launch_page_load(info, pbn);
+ if (result != VDO_SUCCESS) {
+ vdo_waitq_notify_all_waiters(&info->waiting,
+ complete_waiter_with_error, &result);
+ }
+}
+
+/**
+ * discard_a_page() - Begin the process of discarding a page.
+ *
+ * If no page is discardable, increments a count of deferred frees so that the next release of a
+ * page which is no longer busy will kick off another discard cycle. This is an indication that the
+ * cache is not big enough.
+ *
+ * If the selected page is not dirty, immediately allocates the page to the oldest completion
+ * waiting for a free page.
+ */
+static void discard_a_page(struct vdo_page_cache *cache)
+{
+ struct page_info *info = select_lru_page(cache);
+
+ if (info == NULL) {
+ report_cache_pressure(cache);
+ return;
+ }
+
+ if (!is_dirty(info)) {
+ allocate_free_page(info);
+ return;
+ }
+
+ VDO_ASSERT_LOG_ONLY(!is_in_flight(info),
+ "page selected for discard is not in flight");
+
+ cache->discard_count++;
+ info->write_status = WRITE_STATUS_DISCARD;
+ launch_page_save(info);
+}
+
+/**
+ * discard_page_for_completion() - Helper used to trigger a discard so that the completion can get
+ * a different page.
+ */
+static void discard_page_for_completion(struct vdo_page_completion *vdo_page_comp)
+{
+ struct vdo_page_cache *cache = vdo_page_comp->cache;
+
+ cache->waiter_count++;
+ vdo_waitq_enqueue_waiter(&cache->free_waiters, &vdo_page_comp->waiter);
+ discard_a_page(cache);
+}
+
+/**
+ * discard_page_if_needed() - Helper used to trigger a discard if the cache needs another free
+ * page.
+ * @cache: The page cache.
+ */
+static void discard_page_if_needed(struct vdo_page_cache *cache)
+{
+ if (cache->waiter_count > cache->discard_count)
+ discard_a_page(cache);
+}
+
+/**
+ * write_has_finished() - Inform the cache that a write has finished (possibly with an error).
+ * @info: The info structure for the page whose write just completed.
+ *
+ * Return: true if the page write was a discard.
+ */
+static bool write_has_finished(struct page_info *info)
+{
+ bool was_discard = (info->write_status == WRITE_STATUS_DISCARD);
+
+ assert_on_cache_thread(info->cache, __func__);
+ info->cache->outstanding_writes--;
+
+ info->write_status = WRITE_STATUS_NORMAL;
+ return was_discard;
+}
+
+/**
+ * handle_page_write_error() - Handler for page write errors.
+ * @completion: The page write vio.
+ */
+static void handle_page_write_error(struct vdo_completion *completion)
+{
+ int result = completion->result;
+ struct page_info *info = completion->parent;
+ struct vdo_page_cache *cache = info->cache;
+
+ vio_record_metadata_io_error(as_vio(completion));
+
+ /* If we're already read-only, write failures are to be expected. */
+ if (result != VDO_READ_ONLY) {
+ vdo_log_ratelimit(vdo_log_error,
+ "failed to write block map page %llu",
+ (unsigned long long) info->pbn);
+ }
+
+ set_info_state(info, PS_DIRTY);
+ ADD_ONCE(cache->stats.failed_writes, 1);
+ set_persistent_error(cache, "cannot write page", result);
+
+ if (!write_has_finished(info))
+ discard_page_if_needed(cache);
+
+ check_for_drain_complete(cache->zone);
+}
+
+static void page_is_written_out(struct vdo_completion *completion);
+
+static void write_cache_page_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct page_info *info = vio->completion.parent;
+
+ continue_vio_after_io(vio, page_is_written_out, info->cache->zone->thread_id);
+}
+
+/**
+ * page_is_written_out() - Callback used when a page has been written out.
+ * @completion: The vio which wrote the page. Its parent is a page_info.
+ */
+static void page_is_written_out(struct vdo_completion *completion)
+{
+ bool was_discard, reclaimed;
+ u32 reclamations;
+ struct page_info *info = completion->parent;
+ struct vdo_page_cache *cache = info->cache;
+ struct block_map_page *page = (struct block_map_page *) get_page_buffer(info);
+
+ if (!page->header.initialized) {
+ page->header.initialized = true;
+ vdo_submit_metadata_vio(info->vio, info->pbn,
+ write_cache_page_endio,
+ handle_page_write_error,
+ REQ_OP_WRITE | REQ_PRIO | REQ_PREFLUSH);
+ return;
+ }
+
+ /* Handle journal updates and torn write protection. */
+ vdo_release_recovery_journal_block_reference(cache->zone->block_map->journal,
+ info->recovery_lock,
+ VDO_ZONE_TYPE_LOGICAL,
+ cache->zone->zone_number);
+ info->recovery_lock = 0;
+ was_discard = write_has_finished(info);
+ reclaimed = (!was_discard || (info->busy > 0) || vdo_waitq_has_waiters(&info->waiting));
+
+ set_info_state(info, PS_RESIDENT);
+
+ reclamations = distribute_page_over_waitq(info, &info->waiting);
+ ADD_ONCE(cache->stats.reclaimed, reclamations);
+
+ if (was_discard)
+ cache->discard_count--;
+
+ if (reclaimed)
+ discard_page_if_needed(cache);
+ else
+ allocate_free_page(info);
+
+ check_for_drain_complete(cache->zone);
+}
+
+/**
+ * write_pages() - Write the batch of pages which were covered by the layer flush which just
+ * completed.
+ * @flush_completion: The flush vio.
+ *
+ * This callback is registered in save_pages().
+ */
+static void write_pages(struct vdo_completion *flush_completion)
+{
+ struct vdo_page_cache *cache = ((struct page_info *) flush_completion->parent)->cache;
+
+ /*
+ * We need to cache these two values on the stack since it is possible for the last
+ * page info to cause the page cache to get freed. Hence once we launch the last page,
+ * it may be unsafe to dereference the cache.
+ */
+ bool has_unflushed_pages = (cache->pages_to_flush > 0);
+ page_count_t pages_in_flush = cache->pages_in_flush;
+
+ cache->pages_in_flush = 0;
+ while (pages_in_flush-- > 0) {
+ struct page_info *info =
+ list_first_entry(&cache->outgoing_list, struct page_info,
+ state_entry);
+
+ list_del_init(&info->state_entry);
+ if (vdo_is_read_only(info->cache->vdo)) {
+ struct vdo_completion *completion = &info->vio->completion;
+
+ vdo_reset_completion(completion);
+ completion->callback = page_is_written_out;
+ completion->error_handler = handle_page_write_error;
+ vdo_fail_completion(completion, VDO_READ_ONLY);
+ continue;
+ }
+ ADD_ONCE(info->cache->stats.pages_saved, 1);
+ vdo_submit_metadata_vio(info->vio, info->pbn, write_cache_page_endio,
+ handle_page_write_error, REQ_OP_WRITE | REQ_PRIO);
+ }
+
+ if (has_unflushed_pages) {
+ /*
+ * If there are unflushed pages, the cache can't have been freed, so this call is
+ * safe.
+ */
+ save_pages(cache);
+ }
+}
+
+/**
+ * vdo_release_page_completion() - Release a VDO Page Completion.
+ *
+ * The page referenced by this completion (if any) will no longer be held busy by this completion.
+ * If a page becomes discardable and there are completions awaiting free pages then a new round of
+ * page discarding is started.
+ */
+void vdo_release_page_completion(struct vdo_completion *completion)
+{
+ struct page_info *discard_info = NULL;
+ struct vdo_page_completion *page_completion = as_vdo_page_completion(completion);
+ struct vdo_page_cache *cache;
+
+ if (completion->result == VDO_SUCCESS) {
+ if (!validate_completed_page_or_enter_read_only_mode(page_completion, false))
+ return;
+
+ if (--page_completion->info->busy == 0)
+ discard_info = page_completion->info;
+ }
+
+ VDO_ASSERT_LOG_ONLY((page_completion->waiter.next_waiter == NULL),
+ "Page being released after leaving all queues");
+
+ page_completion->info = NULL;
+ cache = page_completion->cache;
+ assert_on_cache_thread(cache, __func__);
+
+ if (discard_info != NULL) {
+ if (discard_info->write_status == WRITE_STATUS_DEFERRED) {
+ discard_info->write_status = WRITE_STATUS_NORMAL;
+ launch_page_save(discard_info);
+ }
+
+ /*
+ * if there are excess requests for pages (that have not already started discards)
+ * we need to discard some page (which may be this one)
+ */
+ discard_page_if_needed(cache);
+ }
+}
+
+/**
+ * load_page_for_completion() - Helper function to load a page as described by a VDO Page
+ * Completion.
+ */
+static void load_page_for_completion(struct page_info *info,
+ struct vdo_page_completion *vdo_page_comp)
+{
+ int result;
+
+ vdo_waitq_enqueue_waiter(&info->waiting, &vdo_page_comp->waiter);
+ result = launch_page_load(info, vdo_page_comp->pbn);
+ if (result != VDO_SUCCESS) {
+ vdo_waitq_notify_all_waiters(&info->waiting,
+ complete_waiter_with_error, &result);
+ }
+}
+
+/**
+ * vdo_get_page() - Initialize a page completion and get a block map page.
+ * @page_completion: The vdo_page_completion to initialize.
+ * @zone: The block map zone of the desired page.
+ * @pbn: The absolute physical block of the desired page.
+ * @writable: Whether the page can be modified.
+ * @parent: The object to notify when the fetch is complete.
+ * @callback: The notification callback.
+ * @error_handler: The handler for fetch errors.
+ * @requeue: Whether we must requeue when notifying the parent.
+ *
+ * May cause another page to be discarded (potentially writing a dirty page) and the one nominated
+ * by the completion to be loaded from disk. When the callback is invoked, the page will be
+ * resident in the cache and marked busy. All callers must call vdo_release_page_completion()
+ * when they are done with the page to clear the busy mark.
+ */
+void vdo_get_page(struct vdo_page_completion *page_completion,
+ struct block_map_zone *zone, physical_block_number_t pbn,
+ bool writable, void *parent, vdo_action_fn callback,
+ vdo_action_fn error_handler, bool requeue)
+{
+ struct vdo_page_cache *cache = &zone->page_cache;
+ struct vdo_completion *completion = &page_completion->completion;
+ struct page_info *info;
+
+ assert_on_cache_thread(cache, __func__);
+ VDO_ASSERT_LOG_ONLY((page_completion->waiter.next_waiter == NULL),
+ "New page completion was not already on a wait queue");
+
+ *page_completion = (struct vdo_page_completion) {
+ .pbn = pbn,
+ .writable = writable,
+ .cache = cache,
+ };
+
+ vdo_initialize_completion(completion, cache->vdo, VDO_PAGE_COMPLETION);
+ vdo_prepare_completion(completion, callback, error_handler,
+ cache->zone->thread_id, parent);
+ completion->requeue = requeue;
+
+ if (page_completion->writable && vdo_is_read_only(cache->vdo)) {
+ vdo_fail_completion(completion, VDO_READ_ONLY);
+ return;
+ }
+
+ if (page_completion->writable)
+ ADD_ONCE(cache->stats.write_count, 1);
+ else
+ ADD_ONCE(cache->stats.read_count, 1);
+
+ info = find_page(cache, page_completion->pbn);
+ if (info != NULL) {
+ /* The page is in the cache already. */
+ if ((info->write_status == WRITE_STATUS_DEFERRED) ||
+ is_incoming(info) ||
+ (is_outgoing(info) && page_completion->writable)) {
+ /* The page is unusable until it has finished I/O. */
+ ADD_ONCE(cache->stats.wait_for_page, 1);
+ vdo_waitq_enqueue_waiter(&info->waiting, &page_completion->waiter);
+ return;
+ }
+
+ if (is_valid(info)) {
+ /* The page is usable. */
+ ADD_ONCE(cache->stats.found_in_cache, 1);
+ if (!is_present(info))
+ ADD_ONCE(cache->stats.read_outgoing, 1);
+ update_lru(info);
+ info->busy++;
+ complete_with_page(info, page_completion);
+ return;
+ }
+
+ /* Something horrible has gone wrong. */
+ VDO_ASSERT_LOG_ONLY(false, "Info found in a usable state.");
+ }
+
+ /* The page must be fetched. */
+ info = find_free_page(cache);
+ if (info != NULL) {
+ ADD_ONCE(cache->stats.fetch_required, 1);
+ load_page_for_completion(info, page_completion);
+ return;
+ }
+
+ /* The page must wait for a page to be discarded. */
+ ADD_ONCE(cache->stats.discard_required, 1);
+ discard_page_for_completion(page_completion);
+}
+
+/**
+ * vdo_request_page_write() - Request that a VDO page be written out as soon as it is not busy.
+ * @completion: The vdo_page_completion containing the page.
+ */
+void vdo_request_page_write(struct vdo_completion *completion)
+{
+ struct page_info *info;
+ struct vdo_page_completion *vdo_page_comp = as_vdo_page_completion(completion);
+
+ if (!validate_completed_page_or_enter_read_only_mode(vdo_page_comp, true))
+ return;
+
+ info = vdo_page_comp->info;
+ set_info_state(info, PS_DIRTY);
+ launch_page_save(info);
+}
+
+/**
+ * vdo_get_cached_page() - Get the block map page from a page completion.
+ * @completion: A vdo page completion whose callback has been called.
+ * @page_ptr: A pointer to hold the page
+ *
+ * Return: VDO_SUCCESS or an error
+ */
+int vdo_get_cached_page(struct vdo_completion *completion,
+ struct block_map_page **page_ptr)
+{
+ int result;
+ struct vdo_page_completion *vpc;
+
+ vpc = as_vdo_page_completion(completion);
+ result = validate_completed_page(vpc, true);
+ if (result == VDO_SUCCESS)
+ *page_ptr = (struct block_map_page *) get_page_buffer(vpc->info);
+
+ return result;
+}
+
+/**
+ * vdo_invalidate_page_cache() - Invalidate all entries in the VDO page cache.
+ *
+ * There must not be any dirty pages in the cache.
+ *
+ * Return: A success or error code.
+ */
+int vdo_invalidate_page_cache(struct vdo_page_cache *cache)
+{
+ struct page_info *info;
+
+ assert_on_cache_thread(cache, __func__);
+
+ /* Make sure we don't throw away any dirty pages. */
+ for (info = cache->infos; info < cache->infos + cache->page_count; info++) {
+ int result = VDO_ASSERT(!is_dirty(info), "cache must have no dirty pages");
+
+ if (result != VDO_SUCCESS)
+ return result;
+ }
+
+ /* Reset the page map by re-allocating it. */
+ vdo_int_map_free(vdo_forget(cache->page_map));
+ return vdo_int_map_create(cache->page_count, &cache->page_map);
+}
+
+/**
+ * get_tree_page_by_index() - Get the tree page for a given height and page index.
+ *
+ * Return: The requested page.
+ */
+static struct tree_page * __must_check get_tree_page_by_index(struct forest *forest,
+ root_count_t root_index,
+ height_t height,
+ page_number_t page_index)
+{
+ page_number_t offset = 0;
+ size_t segment;
+
+ for (segment = 0; segment < forest->segments; segment++) {
+ page_number_t border = forest->boundaries[segment].levels[height - 1];
+
+ if (page_index < border) {
+ struct block_map_tree *tree = &forest->trees[root_index];
+
+ return &(tree->segments[segment].levels[height - 1][page_index - offset]);
+ }
+
+ offset = border;
+ }
+
+ return NULL;
+}
+
+/* Get the page referred to by the lock's tree slot at its current height. */
+static inline struct tree_page *get_tree_page(const struct block_map_zone *zone,
+ const struct tree_lock *lock)
+{
+ return get_tree_page_by_index(zone->block_map->forest, lock->root_index,
+ lock->height,
+ lock->tree_slots[lock->height].page_index);
+}
+
+/** vdo_copy_valid_page() - Validate and copy a buffer to a page. */
+bool vdo_copy_valid_page(char *buffer, nonce_t nonce,
+ physical_block_number_t pbn,
+ struct block_map_page *page)
+{
+ struct block_map_page *loaded = (struct block_map_page *) buffer;
+ enum block_map_page_validity validity =
+ vdo_validate_block_map_page(loaded, nonce, pbn);
+
+ if (validity == VDO_BLOCK_MAP_PAGE_VALID) {
+ memcpy(page, loaded, VDO_BLOCK_SIZE);
+ return true;
+ }
+
+ if (validity == VDO_BLOCK_MAP_PAGE_BAD) {
+ vdo_log_error_strerror(VDO_BAD_PAGE,
+ "Expected page %llu but got page %llu instead",
+ (unsigned long long) pbn,
+ (unsigned long long) vdo_get_block_map_page_pbn(loaded));
+ }
+
+ return false;
+}
+
+/**
+ * in_cyclic_range() - Check whether the given value is between the lower and upper bounds, within
+ * a cyclic range of values from 0 to (modulus - 1).
+ * @lower: The lowest value to accept.
+ * @value: The value to check.
+ * @upper: The highest value to accept.
+ * @modulus: The size of the cyclic space, no more than 2^15.
+ *
+ * The value and both bounds must be smaller than the modulus.
+ *
+ * Return: true if the value is in range.
+ */
+static bool in_cyclic_range(u16 lower, u16 value, u16 upper, u16 modulus)
+{
+ if (value < lower)
+ value += modulus;
+ if (upper < lower)
+ upper += modulus;
+ return (value <= upper);
+}
+
+/**
+ * is_not_older() - Check whether a generation is strictly older than some other generation in the
+ * context of a zone's current generation range.
+ * @zone: The zone in which to do the comparison.
+ * @a: The generation in question.
+ * @b: The generation to compare to.
+ *
+ * Return: true if generation @a is not strictly older than generation @b in the context of @zone
+ */
+static bool __must_check is_not_older(struct block_map_zone *zone, u8 a, u8 b)
+{
+ int result;
+
+ result = VDO_ASSERT((in_cyclic_range(zone->oldest_generation, a, zone->generation, 1 << 8) &&
+ in_cyclic_range(zone->oldest_generation, b, zone->generation, 1 << 8)),
+ "generation(s) %u, %u are out of range [%u, %u]",
+ a, b, zone->oldest_generation, zone->generation);
+ if (result != VDO_SUCCESS) {
+ enter_zone_read_only_mode(zone, result);
+ return true;
+ }
+
+ return in_cyclic_range(b, a, zone->generation, 1 << 8);
+}
+
+static void release_generation(struct block_map_zone *zone, u8 generation)
+{
+ int result;
+
+ result = VDO_ASSERT((zone->dirty_page_counts[generation] > 0),
+ "dirty page count underflow for generation %u", generation);
+ if (result != VDO_SUCCESS) {
+ enter_zone_read_only_mode(zone, result);
+ return;
+ }
+
+ zone->dirty_page_counts[generation]--;
+ while ((zone->dirty_page_counts[zone->oldest_generation] == 0) &&
+ (zone->oldest_generation != zone->generation))
+ zone->oldest_generation++;
+}
+
+static void set_generation(struct block_map_zone *zone, struct tree_page *page,
+ u8 new_generation)
+{
+ u32 new_count;
+ int result;
+ bool decrement_old = vdo_waiter_is_waiting(&page->waiter);
+ u8 old_generation = page->generation;
+
+ if (decrement_old && (old_generation == new_generation))
+ return;
+
+ page->generation = new_generation;
+ new_count = ++zone->dirty_page_counts[new_generation];
+ result = VDO_ASSERT((new_count != 0), "dirty page count overflow for generation %u",
+ new_generation);
+ if (result != VDO_SUCCESS) {
+ enter_zone_read_only_mode(zone, result);
+ return;
+ }
+
+ if (decrement_old)
+ release_generation(zone, old_generation);
+}
+
+static void write_page(struct tree_page *tree_page, struct pooled_vio *vio);
+
+/* Implements waiter_callback_fn */
+static void write_page_callback(struct vdo_waiter *waiter, void *context)
+{
+ write_page(container_of(waiter, struct tree_page, waiter), context);
+}
+
+static void acquire_vio(struct vdo_waiter *waiter, struct block_map_zone *zone)
+{
+ waiter->callback = write_page_callback;
+ acquire_vio_from_pool(zone->vio_pool, waiter);
+}
+
+/* Return: true if all possible generations were not already active */
+static bool attempt_increment(struct block_map_zone *zone)
+{
+ u8 generation = zone->generation + 1;
+
+ if (zone->oldest_generation == generation)
+ return false;
+
+ zone->generation = generation;
+ return true;
+}
+
+/* Launches a flush if one is not already in progress. */
+static void enqueue_page(struct tree_page *page, struct block_map_zone *zone)
+{
+ if ((zone->flusher == NULL) && attempt_increment(zone)) {
+ zone->flusher = page;
+ acquire_vio(&page->waiter, zone);
+ return;
+ }
+
+ vdo_waitq_enqueue_waiter(&zone->flush_waiters, &page->waiter);
+}
+
+static void write_page_if_not_dirtied(struct vdo_waiter *waiter, void *context)
+{
+ struct tree_page *page = container_of(waiter, struct tree_page, waiter);
+ struct write_if_not_dirtied_context *write_context = context;
+
+ if (page->generation == write_context->generation) {
+ acquire_vio(waiter, write_context->zone);
+ return;
+ }
+
+ enqueue_page(page, write_context->zone);
+}
+
+static void return_to_pool(struct block_map_zone *zone, struct pooled_vio *vio)
+{
+ return_vio_to_pool(zone->vio_pool, vio);
+ check_for_drain_complete(zone);
+}
+
+/* This callback is registered in write_initialized_page(). */
+static void finish_page_write(struct vdo_completion *completion)
+{
+ bool dirty;
+ struct vio *vio = as_vio(completion);
+ struct pooled_vio *pooled = container_of(vio, struct pooled_vio, vio);
+ struct tree_page *page = completion->parent;
+ struct block_map_zone *zone = pooled->context;
+
+ vdo_release_recovery_journal_block_reference(zone->block_map->journal,
+ page->writing_recovery_lock,
+ VDO_ZONE_TYPE_LOGICAL,
+ zone->zone_number);
+
+ dirty = (page->writing_generation != page->generation);
+ release_generation(zone, page->writing_generation);
+ page->writing = false;
+
+ if (zone->flusher == page) {
+ struct write_if_not_dirtied_context context = {
+ .zone = zone,
+ .generation = page->writing_generation,
+ };
+
+ vdo_waitq_notify_all_waiters(&zone->flush_waiters,
+ write_page_if_not_dirtied, &context);
+ if (dirty && attempt_increment(zone)) {
+ write_page(page, pooled);
+ return;
+ }
+
+ zone->flusher = NULL;
+ }
+
+ if (dirty) {
+ enqueue_page(page, zone);
+ } else if ((zone->flusher == NULL) && vdo_waitq_has_waiters(&zone->flush_waiters) &&
+ attempt_increment(zone)) {
+ zone->flusher = container_of(vdo_waitq_dequeue_waiter(&zone->flush_waiters),
+ struct tree_page, waiter);
+ write_page(zone->flusher, pooled);
+ return;
+ }
+
+ return_to_pool(zone, pooled);
+}
+
+static void handle_write_error(struct vdo_completion *completion)
+{
+ int result = completion->result;
+ struct vio *vio = as_vio(completion);
+ struct pooled_vio *pooled = container_of(vio, struct pooled_vio, vio);
+ struct block_map_zone *zone = pooled->context;
+
+ vio_record_metadata_io_error(vio);
+ enter_zone_read_only_mode(zone, result);
+ return_to_pool(zone, pooled);
+}
+
+static void write_page_endio(struct bio *bio);
+
+static void write_initialized_page(struct vdo_completion *completion)
+{
+ struct vio *vio = as_vio(completion);
+ struct pooled_vio *pooled = container_of(vio, struct pooled_vio, vio);
+ struct block_map_zone *zone = pooled->context;
+ struct tree_page *tree_page = completion->parent;
+ struct block_map_page *page = (struct block_map_page *) vio->data;
+ blk_opf_t operation = REQ_OP_WRITE | REQ_PRIO;
+
+ /*
+ * Now that we know the page has been written at least once, mark the copy we are writing
+ * as initialized.
+ */
+ page->header.initialized = true;
+
+ if (zone->flusher == tree_page)
+ operation |= REQ_PREFLUSH;
+
+ vdo_submit_metadata_vio(vio, vdo_get_block_map_page_pbn(page),
+ write_page_endio, handle_write_error,
+ operation);
+}
+
+static void write_page_endio(struct bio *bio)
+{
+ struct pooled_vio *vio = bio->bi_private;
+ struct block_map_zone *zone = vio->context;
+ struct block_map_page *page = (struct block_map_page *) vio->vio.data;
+
+ continue_vio_after_io(&vio->vio,
+ (page->header.initialized ?
+ finish_page_write : write_initialized_page),
+ zone->thread_id);
+}
+
+static void write_page(struct tree_page *tree_page, struct pooled_vio *vio)
+{
+ struct vdo_completion *completion = &vio->vio.completion;
+ struct block_map_zone *zone = vio->context;
+ struct block_map_page *page = vdo_as_block_map_page(tree_page);
+
+ if ((zone->flusher != tree_page) &&
+ is_not_older(zone, tree_page->generation, zone->generation)) {
+ /*
+ * This page was re-dirtied after the last flush was issued, hence we need to do
+ * another flush.
+ */
+ enqueue_page(tree_page, zone);
+ return_to_pool(zone, vio);
+ return;
+ }
+
+ completion->parent = tree_page;
+ memcpy(vio->vio.data, tree_page->page_buffer, VDO_BLOCK_SIZE);
+ completion->callback_thread_id = zone->thread_id;
+
+ tree_page->writing = true;
+ tree_page->writing_generation = tree_page->generation;
+ tree_page->writing_recovery_lock = tree_page->recovery_lock;
+
+ /* Clear this now so that we know this page is not on any dirty list. */
+ tree_page->recovery_lock = 0;
+
+ /*
+ * We've already copied the page into the vio which will write it, so if it was not yet
+ * initialized, the first write will indicate that (for torn write protection). It is now
+ * safe to mark it as initialized in memory since if the write fails, the in memory state
+ * will become irrelevant.
+ */
+ if (page->header.initialized) {
+ write_initialized_page(completion);
+ return;
+ }
+
+ page->header.initialized = true;
+ vdo_submit_metadata_vio(&vio->vio, vdo_get_block_map_page_pbn(page),
+ write_page_endio, handle_write_error,
+ REQ_OP_WRITE | REQ_PRIO);
+}
+
+/* Release a lock on a page which was being loaded or allocated. */
+static void release_page_lock(struct data_vio *data_vio, char *what)
+{
+ struct block_map_zone *zone;
+ struct tree_lock *lock_holder;
+ struct tree_lock *lock = &data_vio->tree_lock;
+
+ VDO_ASSERT_LOG_ONLY(lock->locked,
+ "release of unlocked block map page %s for key %llu in tree %u",
+ what, (unsigned long long) lock->key, lock->root_index);
+
+ zone = data_vio->logical.zone->block_map_zone;
+ lock_holder = vdo_int_map_remove(zone->loading_pages, lock->key);
+ VDO_ASSERT_LOG_ONLY((lock_holder == lock),
+ "block map page %s mismatch for key %llu in tree %u",
+ what, (unsigned long long) lock->key, lock->root_index);
+ lock->locked = false;
+}
+
+static void finish_lookup(struct data_vio *data_vio, int result)
+{
+ data_vio->tree_lock.height = 0;
+
+ --data_vio->logical.zone->block_map_zone->active_lookups;
+
+ set_data_vio_logical_callback(data_vio, continue_data_vio_with_block_map_slot);
+ data_vio->vio.completion.error_handler = handle_data_vio_error;
+ continue_data_vio_with_error(data_vio, result);
+}
+
+static void abort_lookup_for_waiter(struct vdo_waiter *waiter, void *context)
+{
+ struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter);
+ int result = *((int *) context);
+
+ if (!data_vio->write) {
+ if (result == VDO_NO_SPACE)
+ result = VDO_SUCCESS;
+ } else if (result != VDO_NO_SPACE) {
+ result = VDO_READ_ONLY;
+ }
+
+ finish_lookup(data_vio, result);
+}
+
+static void abort_lookup(struct data_vio *data_vio, int result, char *what)
+{
+ if (result != VDO_NO_SPACE)
+ enter_zone_read_only_mode(data_vio->logical.zone->block_map_zone, result);
+
+ if (data_vio->tree_lock.locked) {
+ release_page_lock(data_vio, what);
+ vdo_waitq_notify_all_waiters(&data_vio->tree_lock.waiters,
+ abort_lookup_for_waiter,
+ &result);
+ }
+
+ finish_lookup(data_vio, result);
+}
+
+static void abort_load(struct data_vio *data_vio, int result)
+{
+ abort_lookup(data_vio, result, "load");
+}
+
+static bool __must_check is_invalid_tree_entry(const struct vdo *vdo,
+ const struct data_location *mapping,
+ height_t height)
+{
+ if (!vdo_is_valid_location(mapping) ||
+ vdo_is_state_compressed(mapping->state) ||
+ (vdo_is_mapped_location(mapping) && (mapping->pbn == VDO_ZERO_BLOCK)))
+ return true;
+
+ /* Roots aren't physical data blocks, so we can't check their PBNs. */
+ if (height == VDO_BLOCK_MAP_TREE_HEIGHT)
+ return false;
+
+ return !vdo_is_physical_data_block(vdo->depot, mapping->pbn);
+}
+
+static void load_block_map_page(struct block_map_zone *zone, struct data_vio *data_vio);
+static void allocate_block_map_page(struct block_map_zone *zone,
+ struct data_vio *data_vio);
+
+static void continue_with_loaded_page(struct data_vio *data_vio,
+ struct block_map_page *page)
+{
+ struct tree_lock *lock = &data_vio->tree_lock;
+ struct block_map_tree_slot slot = lock->tree_slots[lock->height];
+ struct data_location mapping =
+ vdo_unpack_block_map_entry(&page->entries[slot.block_map_slot.slot]);
+
+ if (is_invalid_tree_entry(vdo_from_data_vio(data_vio), &mapping, lock->height)) {
+ vdo_log_error_strerror(VDO_BAD_MAPPING,
+ "Invalid block map tree PBN: %llu with state %u for page index %u at height %u",
+ (unsigned long long) mapping.pbn, mapping.state,
+ lock->tree_slots[lock->height - 1].page_index,
+ lock->height - 1);
+ abort_load(data_vio, VDO_BAD_MAPPING);
+ return;
+ }
+
+ if (!vdo_is_mapped_location(&mapping)) {
+ /* The page we need is unallocated */
+ allocate_block_map_page(data_vio->logical.zone->block_map_zone,
+ data_vio);
+ return;
+ }
+
+ lock->tree_slots[lock->height - 1].block_map_slot.pbn = mapping.pbn;
+ if (lock->height == 1) {
+ finish_lookup(data_vio, VDO_SUCCESS);
+ return;
+ }
+
+ /* We know what page we need to load next */
+ load_block_map_page(data_vio->logical.zone->block_map_zone, data_vio);
+}
+
+static void continue_load_for_waiter(struct vdo_waiter *waiter, void *context)
+{
+ struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter);
+
+ data_vio->tree_lock.height--;
+ continue_with_loaded_page(data_vio, context);
+}
+
+static void finish_block_map_page_load(struct vdo_completion *completion)
+{
+ physical_block_number_t pbn;
+ struct tree_page *tree_page;
+ struct block_map_page *page;
+ nonce_t nonce;
+ struct vio *vio = as_vio(completion);
+ struct pooled_vio *pooled = vio_as_pooled_vio(vio);
+ struct data_vio *data_vio = completion->parent;
+ struct block_map_zone *zone = pooled->context;
+ struct tree_lock *tree_lock = &data_vio->tree_lock;
+
+ tree_lock->height--;
+ pbn = tree_lock->tree_slots[tree_lock->height].block_map_slot.pbn;
+ tree_page = get_tree_page(zone, tree_lock);
+ page = (struct block_map_page *) tree_page->page_buffer;
+ nonce = zone->block_map->nonce;
+
+ if (!vdo_copy_valid_page(vio->data, nonce, pbn, page))
+ vdo_format_block_map_page(page, nonce, pbn, false);
+ return_vio_to_pool(zone->vio_pool, pooled);
+
+ /* Release our claim to the load and wake any waiters */
+ release_page_lock(data_vio, "load");
+ vdo_waitq_notify_all_waiters(&tree_lock->waiters, continue_load_for_waiter, page);
+ continue_with_loaded_page(data_vio, page);
+}
+
+static void handle_io_error(struct vdo_completion *completion)
+{
+ int result = completion->result;
+ struct vio *vio = as_vio(completion);
+ struct pooled_vio *pooled = container_of(vio, struct pooled_vio, vio);
+ struct data_vio *data_vio = completion->parent;
+ struct block_map_zone *zone = pooled->context;
+
+ vio_record_metadata_io_error(vio);
+ return_vio_to_pool(zone->vio_pool, pooled);
+ abort_load(data_vio, result);
+}
+
+static void load_page_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct data_vio *data_vio = vio->completion.parent;
+
+ continue_vio_after_io(vio, finish_block_map_page_load,
+ data_vio->logical.zone->thread_id);
+}
+
+static void load_page(struct vdo_waiter *waiter, void *context)
+{
+ struct pooled_vio *pooled = context;
+ struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter);
+ struct tree_lock *lock = &data_vio->tree_lock;
+ physical_block_number_t pbn = lock->tree_slots[lock->height - 1].block_map_slot.pbn;
+
+ pooled->vio.completion.parent = data_vio;
+ vdo_submit_metadata_vio(&pooled->vio, pbn, load_page_endio,
+ handle_io_error, REQ_OP_READ | REQ_PRIO);
+}
+
+/*
+ * If the page is already locked, queue up to wait for the lock to be released. If the lock is
+ * acquired, @data_vio->tree_lock.locked will be true.
+ */
+static int attempt_page_lock(struct block_map_zone *zone, struct data_vio *data_vio)
+{
+ int result;
+ struct tree_lock *lock_holder;
+ struct tree_lock *lock = &data_vio->tree_lock;
+ height_t height = lock->height;
+ struct block_map_tree_slot tree_slot = lock->tree_slots[height];
+ union page_key key;
+
+ key.descriptor = (struct page_descriptor) {
+ .root_index = lock->root_index,
+ .height = height,
+ .page_index = tree_slot.page_index,
+ .slot = tree_slot.block_map_slot.slot,
+ };
+ lock->key = key.key;
+
+ result = vdo_int_map_put(zone->loading_pages, lock->key,
+ lock, false, (void **) &lock_holder);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (lock_holder == NULL) {
+ /* We got the lock */
+ data_vio->tree_lock.locked = true;
+ return VDO_SUCCESS;
+ }
+
+ /* Someone else is loading or allocating the page we need */
+ vdo_waitq_enqueue_waiter(&lock_holder->waiters, &data_vio->waiter);
+ return VDO_SUCCESS;
+}
+
+/* Load a block map tree page from disk, for the next level in the data vio tree lock. */
+static void load_block_map_page(struct block_map_zone *zone, struct data_vio *data_vio)
+{
+ int result;
+
+ result = attempt_page_lock(zone, data_vio);
+ if (result != VDO_SUCCESS) {
+ abort_load(data_vio, result);
+ return;
+ }
+
+ if (data_vio->tree_lock.locked) {
+ data_vio->waiter.callback = load_page;
+ acquire_vio_from_pool(zone->vio_pool, &data_vio->waiter);
+ }
+}
+
+static void allocation_failure(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ if (vdo_requeue_completion_if_needed(completion,
+ data_vio->logical.zone->thread_id))
+ return;
+
+ abort_lookup(data_vio, completion->result, "allocation");
+}
+
+static void continue_allocation_for_waiter(struct vdo_waiter *waiter, void *context)
+{
+ struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter);
+ struct tree_lock *tree_lock = &data_vio->tree_lock;
+ physical_block_number_t pbn = *((physical_block_number_t *) context);
+
+ tree_lock->height--;
+ data_vio->tree_lock.tree_slots[tree_lock->height].block_map_slot.pbn = pbn;
+
+ if (tree_lock->height == 0) {
+ finish_lookup(data_vio, VDO_SUCCESS);
+ return;
+ }
+
+ allocate_block_map_page(data_vio->logical.zone->block_map_zone, data_vio);
+}
+
+/** expire_oldest_list() - Expire the oldest list. */
+static void expire_oldest_list(struct dirty_lists *dirty_lists)
+{
+ block_count_t i = dirty_lists->offset++;
+
+ dirty_lists->oldest_period++;
+ if (!list_empty(&dirty_lists->eras[i][VDO_TREE_PAGE])) {
+ list_splice_tail_init(&dirty_lists->eras[i][VDO_TREE_PAGE],
+ &dirty_lists->expired[VDO_TREE_PAGE]);
+ }
+
+ if (!list_empty(&dirty_lists->eras[i][VDO_CACHE_PAGE])) {
+ list_splice_tail_init(&dirty_lists->eras[i][VDO_CACHE_PAGE],
+ &dirty_lists->expired[VDO_CACHE_PAGE]);
+ }
+
+ if (dirty_lists->offset == dirty_lists->maximum_age)
+ dirty_lists->offset = 0;
+}
+
+
+/** update_period() - Update the dirty_lists period if necessary. */
+static void update_period(struct dirty_lists *dirty, sequence_number_t period)
+{
+ while (dirty->next_period <= period) {
+ if ((dirty->next_period - dirty->oldest_period) == dirty->maximum_age)
+ expire_oldest_list(dirty);
+ dirty->next_period++;
+ }
+}
+
+/** write_expired_elements() - Write out the expired list. */
+static void write_expired_elements(struct block_map_zone *zone)
+{
+ struct tree_page *page, *ttmp;
+ struct page_info *info, *ptmp;
+ struct list_head *expired;
+ u8 generation = zone->generation;
+
+ expired = &zone->dirty_lists->expired[VDO_TREE_PAGE];
+ list_for_each_entry_safe(page, ttmp, expired, entry) {
+ int result;
+
+ list_del_init(&page->entry);
+
+ result = VDO_ASSERT(!vdo_waiter_is_waiting(&page->waiter),
+ "Newly expired page not already waiting to write");
+ if (result != VDO_SUCCESS) {
+ enter_zone_read_only_mode(zone, result);
+ continue;
+ }
+
+ set_generation(zone, page, generation);
+ if (!page->writing)
+ enqueue_page(page, zone);
+ }
+
+ expired = &zone->dirty_lists->expired[VDO_CACHE_PAGE];
+ list_for_each_entry_safe(info, ptmp, expired, state_entry) {
+ list_del_init(&info->state_entry);
+ schedule_page_save(info);
+ }
+
+ save_pages(&zone->page_cache);
+}
+
+/**
+ * add_to_dirty_lists() - Add an element to the dirty lists.
+ * @zone: The zone in which we are operating.
+ * @entry: The list entry of the element to add.
+ * @type: The type of page.
+ * @old_period: The period in which the element was previously dirtied, or 0 if it was not dirty.
+ * @new_period: The period in which the element has now been dirtied, or 0 if it does not hold a
+ * lock.
+ */
+static void add_to_dirty_lists(struct block_map_zone *zone,
+ struct list_head *entry,
+ enum block_map_page_type type,
+ sequence_number_t old_period,
+ sequence_number_t new_period)
+{
+ struct dirty_lists *dirty_lists = zone->dirty_lists;
+
+ if ((old_period == new_period) || ((old_period != 0) && (old_period < new_period)))
+ return;
+
+ if (new_period < dirty_lists->oldest_period) {
+ list_move_tail(entry, &dirty_lists->expired[type]);
+ } else {
+ update_period(dirty_lists, new_period);
+ list_move_tail(entry,
+ &dirty_lists->eras[new_period % dirty_lists->maximum_age][type]);
+ }
+
+ write_expired_elements(zone);
+}
+
+/*
+ * Record the allocation in the tree and wake any waiters now that the write lock has been
+ * released.
+ */
+static void finish_block_map_allocation(struct vdo_completion *completion)
+{
+ physical_block_number_t pbn;
+ struct tree_page *tree_page;
+ struct block_map_page *page;
+ sequence_number_t old_lock;
+ struct data_vio *data_vio = as_data_vio(completion);
+ struct block_map_zone *zone = data_vio->logical.zone->block_map_zone;
+ struct tree_lock *tree_lock = &data_vio->tree_lock;
+ height_t height = tree_lock->height;
+
+ assert_data_vio_in_logical_zone(data_vio);
+
+ tree_page = get_tree_page(zone, tree_lock);
+ pbn = tree_lock->tree_slots[height - 1].block_map_slot.pbn;
+
+ /* Record the allocation. */
+ page = (struct block_map_page *) tree_page->page_buffer;
+ old_lock = tree_page->recovery_lock;
+ vdo_update_block_map_page(page, data_vio, pbn,
+ VDO_MAPPING_STATE_UNCOMPRESSED,
+ &tree_page->recovery_lock);
+
+ if (vdo_waiter_is_waiting(&tree_page->waiter)) {
+ /* This page is waiting to be written out. */
+ if (zone->flusher != tree_page) {
+ /*
+ * The outstanding flush won't cover the update we just made,
+ * so mark the page as needing another flush.
+ */
+ set_generation(zone, tree_page, zone->generation);
+ }
+ } else {
+ /* Put the page on a dirty list */
+ if (old_lock == 0)
+ INIT_LIST_HEAD(&tree_page->entry);
+ add_to_dirty_lists(zone, &tree_page->entry, VDO_TREE_PAGE,
+ old_lock, tree_page->recovery_lock);
+ }
+
+ tree_lock->height--;
+ if (height > 1) {
+ /* Format the interior node we just allocated (in memory). */
+ tree_page = get_tree_page(zone, tree_lock);
+ vdo_format_block_map_page(tree_page->page_buffer,
+ zone->block_map->nonce,
+ pbn, false);
+ }
+
+ /* Release our claim to the allocation and wake any waiters */
+ release_page_lock(data_vio, "allocation");
+ vdo_waitq_notify_all_waiters(&tree_lock->waiters,
+ continue_allocation_for_waiter, &pbn);
+ if (tree_lock->height == 0) {
+ finish_lookup(data_vio, VDO_SUCCESS);
+ return;
+ }
+
+ allocate_block_map_page(zone, data_vio);
+}
+
+static void release_block_map_write_lock(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_allocated_zone(data_vio);
+
+ release_data_vio_allocation_lock(data_vio, true);
+ launch_data_vio_logical_callback(data_vio, finish_block_map_allocation);
+}
+
+/*
+ * Newly allocated block map pages are set to have to MAXIMUM_REFERENCES after they are journaled,
+ * to prevent deduplication against the block after we release the write lock on it, but before we
+ * write out the page.
+ */
+static void set_block_map_page_reference_count(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_allocated_zone(data_vio);
+
+ completion->callback = release_block_map_write_lock;
+ vdo_modify_reference_count(completion, &data_vio->increment_updater);
+}
+
+static void journal_block_map_allocation(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_journal_zone(data_vio);
+
+ set_data_vio_allocated_zone_callback(data_vio,
+ set_block_map_page_reference_count);
+ vdo_add_recovery_journal_entry(completion->vdo->recovery_journal, data_vio);
+}
+
+static void allocate_block(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ struct tree_lock *lock = &data_vio->tree_lock;
+ physical_block_number_t pbn;
+
+ assert_data_vio_in_allocated_zone(data_vio);
+
+ if (!vdo_allocate_block_in_zone(data_vio))
+ return;
+
+ pbn = data_vio->allocation.pbn;
+ lock->tree_slots[lock->height - 1].block_map_slot.pbn = pbn;
+ data_vio->increment_updater = (struct reference_updater) {
+ .operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING,
+ .increment = true,
+ .zpbn = {
+ .pbn = pbn,
+ .state = VDO_MAPPING_STATE_UNCOMPRESSED,
+ },
+ .lock = data_vio->allocation.lock,
+ };
+
+ launch_data_vio_journal_callback(data_vio, journal_block_map_allocation);
+}
+
+static void allocate_block_map_page(struct block_map_zone *zone,
+ struct data_vio *data_vio)
+{
+ int result;
+
+ if (!data_vio->write || data_vio->is_discard) {
+ /* This is a pure read or a discard, so there's nothing left to do here. */
+ finish_lookup(data_vio, VDO_SUCCESS);
+ return;
+ }
+
+ result = attempt_page_lock(zone, data_vio);
+ if (result != VDO_SUCCESS) {
+ abort_lookup(data_vio, result, "allocation");
+ return;
+ }
+
+ if (!data_vio->tree_lock.locked)
+ return;
+
+ data_vio_allocate_data_block(data_vio, VIO_BLOCK_MAP_WRITE_LOCK,
+ allocate_block, allocation_failure);
+}
+
+/**
+ * vdo_find_block_map_slot() - Find the block map slot in which the block map entry for a data_vio
+ * resides and cache that result in the data_vio.
+ *
+ * All ancestors in the tree will be allocated or loaded, as needed.
+ */
+void vdo_find_block_map_slot(struct data_vio *data_vio)
+{
+ page_number_t page_index;
+ struct block_map_tree_slot tree_slot;
+ struct data_location mapping;
+ struct block_map_page *page = NULL;
+ struct tree_lock *lock = &data_vio->tree_lock;
+ struct block_map_zone *zone = data_vio->logical.zone->block_map_zone;
+
+ zone->active_lookups++;
+ if (vdo_is_state_draining(&zone->state)) {
+ finish_lookup(data_vio, VDO_SHUTTING_DOWN);
+ return;
+ }
+
+ lock->tree_slots[0].block_map_slot.slot =
+ data_vio->logical.lbn % VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
+ page_index = (lock->tree_slots[0].page_index / zone->block_map->root_count);
+ tree_slot = (struct block_map_tree_slot) {
+ .page_index = page_index / VDO_BLOCK_MAP_ENTRIES_PER_PAGE,
+ .block_map_slot = {
+ .pbn = 0,
+ .slot = page_index % VDO_BLOCK_MAP_ENTRIES_PER_PAGE,
+ },
+ };
+
+ for (lock->height = 1; lock->height <= VDO_BLOCK_MAP_TREE_HEIGHT; lock->height++) {
+ physical_block_number_t pbn;
+
+ lock->tree_slots[lock->height] = tree_slot;
+ page = (struct block_map_page *) (get_tree_page(zone, lock)->page_buffer);
+ pbn = vdo_get_block_map_page_pbn(page);
+ if (pbn != VDO_ZERO_BLOCK) {
+ lock->tree_slots[lock->height].block_map_slot.pbn = pbn;
+ break;
+ }
+
+ /* Calculate the index and slot for the next level. */
+ tree_slot.block_map_slot.slot =
+ tree_slot.page_index % VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
+ tree_slot.page_index = tree_slot.page_index / VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
+ }
+
+ /* The page at this height has been allocated and loaded. */
+ mapping = vdo_unpack_block_map_entry(&page->entries[tree_slot.block_map_slot.slot]);
+ if (is_invalid_tree_entry(vdo_from_data_vio(data_vio), &mapping, lock->height)) {
+ vdo_log_error_strerror(VDO_BAD_MAPPING,
+ "Invalid block map tree PBN: %llu with state %u for page index %u at height %u",
+ (unsigned long long) mapping.pbn, mapping.state,
+ lock->tree_slots[lock->height - 1].page_index,
+ lock->height - 1);
+ abort_load(data_vio, VDO_BAD_MAPPING);
+ return;
+ }
+
+ if (!vdo_is_mapped_location(&mapping)) {
+ /* The page we want one level down has not been allocated, so allocate it. */
+ allocate_block_map_page(zone, data_vio);
+ return;
+ }
+
+ lock->tree_slots[lock->height - 1].block_map_slot.pbn = mapping.pbn;
+ if (lock->height == 1) {
+ /* This is the ultimate block map page, so we're done */
+ finish_lookup(data_vio, VDO_SUCCESS);
+ return;
+ }
+
+ /* We know what page we need to load. */
+ load_block_map_page(zone, data_vio);
+}
+
+/*
+ * Find the PBN of a leaf block map page. This method may only be used after all allocated tree
+ * pages have been loaded, otherwise, it may give the wrong answer (0).
+ */
+physical_block_number_t vdo_find_block_map_page_pbn(struct block_map *map,
+ page_number_t page_number)
+{
+ struct data_location mapping;
+ struct tree_page *tree_page;
+ struct block_map_page *page;
+ root_count_t root_index = page_number % map->root_count;
+ page_number_t page_index = page_number / map->root_count;
+ slot_number_t slot = page_index % VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
+
+ page_index /= VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
+
+ tree_page = get_tree_page_by_index(map->forest, root_index, 1, page_index);
+ page = (struct block_map_page *) tree_page->page_buffer;
+ if (!page->header.initialized)
+ return VDO_ZERO_BLOCK;
+
+ mapping = vdo_unpack_block_map_entry(&page->entries[slot]);
+ if (!vdo_is_valid_location(&mapping) || vdo_is_state_compressed(mapping.state))
+ return VDO_ZERO_BLOCK;
+ return mapping.pbn;
+}
+
+/*
+ * Write a tree page or indicate that it has been re-dirtied if it is already being written. This
+ * method is used when correcting errors in the tree during read-only rebuild.
+ */
+void vdo_write_tree_page(struct tree_page *page, struct block_map_zone *zone)
+{
+ bool waiting = vdo_waiter_is_waiting(&page->waiter);
+
+ if (waiting && (zone->flusher == page))
+ return;
+
+ set_generation(zone, page, zone->generation);
+ if (waiting || page->writing)
+ return;
+
+ enqueue_page(page, zone);
+}
+
+static int make_segment(struct forest *old_forest, block_count_t new_pages,
+ struct boundary *new_boundary, struct forest *forest)
+{
+ size_t index = (old_forest == NULL) ? 0 : old_forest->segments;
+ struct tree_page *page_ptr;
+ page_count_t segment_sizes[VDO_BLOCK_MAP_TREE_HEIGHT];
+ height_t height;
+ root_count_t root;
+ int result;
+
+ forest->segments = index + 1;
+
+ result = vdo_allocate(forest->segments, struct boundary,
+ "forest boundary array", &forest->boundaries);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(forest->segments, struct tree_page *,
+ "forest page pointers", &forest->pages);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(new_pages, struct tree_page,
+ "new forest pages", &forest->pages[index]);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (index > 0) {
+ memcpy(forest->boundaries, old_forest->boundaries,
+ index * sizeof(struct boundary));
+ memcpy(forest->pages, old_forest->pages,
+ index * sizeof(struct tree_page *));
+ }
+
+ memcpy(&(forest->boundaries[index]), new_boundary, sizeof(struct boundary));
+
+ for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
+ segment_sizes[height] = new_boundary->levels[height];
+ if (index > 0)
+ segment_sizes[height] -= old_forest->boundaries[index - 1].levels[height];
+ }
+
+ page_ptr = forest->pages[index];
+ for (root = 0; root < forest->map->root_count; root++) {
+ struct block_map_tree_segment *segment;
+ struct block_map_tree *tree = &(forest->trees[root]);
+ height_t height;
+
+ int result = vdo_allocate(forest->segments,
+ struct block_map_tree_segment,
+ "tree root segments", &tree->segments);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (index > 0) {
+ memcpy(tree->segments, old_forest->trees[root].segments,
+ index * sizeof(struct block_map_tree_segment));
+ }
+
+ segment = &(tree->segments[index]);
+ for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
+ if (segment_sizes[height] == 0)
+ continue;
+
+ segment->levels[height] = page_ptr;
+ if (height == (VDO_BLOCK_MAP_TREE_HEIGHT - 1)) {
+ /* Record the root. */
+ struct block_map_page *page =
+ vdo_format_block_map_page(page_ptr->page_buffer,
+ forest->map->nonce,
+ VDO_INVALID_PBN, true);
+ page->entries[0] =
+ vdo_pack_block_map_entry(forest->map->root_origin + root,
+ VDO_MAPPING_STATE_UNCOMPRESSED);
+ }
+ page_ptr += segment_sizes[height];
+ }
+ }
+
+ return VDO_SUCCESS;
+}
+
+static void deforest(struct forest *forest, size_t first_page_segment)
+{
+ root_count_t root;
+
+ if (forest->pages != NULL) {
+ size_t segment;
+
+ for (segment = first_page_segment; segment < forest->segments; segment++)
+ vdo_free(forest->pages[segment]);
+ vdo_free(forest->pages);
+ }
+
+ for (root = 0; root < forest->map->root_count; root++)
+ vdo_free(forest->trees[root].segments);
+
+ vdo_free(forest->boundaries);
+ vdo_free(forest);
+}
+
+/**
+ * make_forest() - Make a collection of trees for a block_map, expanding the existing forest if
+ * there is one.
+ * @entries: The number of entries the block map will hold.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int make_forest(struct block_map *map, block_count_t entries)
+{
+ struct forest *forest, *old_forest = map->forest;
+ struct boundary new_boundary, *old_boundary = NULL;
+ block_count_t new_pages;
+ int result;
+
+ if (old_forest != NULL)
+ old_boundary = &(old_forest->boundaries[old_forest->segments - 1]);
+
+ new_pages = vdo_compute_new_forest_pages(map->root_count, old_boundary,
+ entries, &new_boundary);
+ if (new_pages == 0) {
+ map->next_entry_count = entries;
+ return VDO_SUCCESS;
+ }
+
+ result = vdo_allocate_extended(struct forest, map->root_count,
+ struct block_map_tree, __func__,
+ &forest);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ forest->map = map;
+ result = make_segment(old_forest, new_pages, &new_boundary, forest);
+ if (result != VDO_SUCCESS) {
+ deforest(forest, forest->segments - 1);
+ return result;
+ }
+
+ map->next_forest = forest;
+ map->next_entry_count = entries;
+ return VDO_SUCCESS;
+}
+
+/**
+ * replace_forest() - Replace a block_map's forest with the already-prepared larger forest.
+ */
+static void replace_forest(struct block_map *map)
+{
+ if (map->next_forest != NULL) {
+ if (map->forest != NULL)
+ deforest(map->forest, map->forest->segments);
+ map->forest = vdo_forget(map->next_forest);
+ }
+
+ map->entry_count = map->next_entry_count;
+ map->next_entry_count = 0;
+}
+
+/**
+ * finish_cursor() - Finish the traversal of a single tree. If it was the last cursor, finish the
+ * traversal.
+ */
+static void finish_cursor(struct cursor *cursor)
+{
+ struct cursors *cursors = cursor->parent;
+ struct vdo_completion *completion = cursors->completion;
+
+ return_vio_to_pool(cursors->pool, vdo_forget(cursor->vio));
+ if (--cursors->active_roots > 0)
+ return;
+
+ vdo_free(cursors);
+
+ vdo_finish_completion(completion);
+}
+
+static void traverse(struct cursor *cursor);
+
+/**
+ * continue_traversal() - Continue traversing a block map tree.
+ * @completion: The VIO doing a read or write.
+ */
+static void continue_traversal(struct vdo_completion *completion)
+{
+ vio_record_metadata_io_error(as_vio(completion));
+ traverse(completion->parent);
+}
+
+/**
+ * finish_traversal_load() - Continue traversing a block map tree now that a page has been loaded.
+ * @completion: The VIO doing the read.
+ */
+static void finish_traversal_load(struct vdo_completion *completion)
+{
+ struct cursor *cursor = completion->parent;
+ height_t height = cursor->height;
+ struct cursor_level *level = &cursor->levels[height];
+ struct tree_page *tree_page =
+ &(cursor->tree->segments[0].levels[height][level->page_index]);
+ struct block_map_page *page = (struct block_map_page *) tree_page->page_buffer;
+
+ vdo_copy_valid_page(cursor->vio->vio.data,
+ cursor->parent->zone->block_map->nonce,
+ pbn_from_vio_bio(cursor->vio->vio.bio), page);
+ traverse(cursor);
+}
+
+static void traversal_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct cursor *cursor = vio->completion.parent;
+
+ continue_vio_after_io(vio, finish_traversal_load,
+ cursor->parent->zone->thread_id);
+}
+
+/**
+ * traverse() - Traverse a single block map tree.
+ *
+ * This is the recursive heart of the traversal process.
+ */
+static void traverse(struct cursor *cursor)
+{
+ for (; cursor->height < VDO_BLOCK_MAP_TREE_HEIGHT; cursor->height++) {
+ height_t height = cursor->height;
+ struct cursor_level *level = &cursor->levels[height];
+ struct tree_page *tree_page =
+ &(cursor->tree->segments[0].levels[height][level->page_index]);
+ struct block_map_page *page = (struct block_map_page *) tree_page->page_buffer;
+
+ if (!page->header.initialized)
+ continue;
+
+ for (; level->slot < VDO_BLOCK_MAP_ENTRIES_PER_PAGE; level->slot++) {
+ struct cursor_level *next_level;
+ page_number_t entry_index =
+ (VDO_BLOCK_MAP_ENTRIES_PER_PAGE * level->page_index) + level->slot;
+ struct data_location location =
+ vdo_unpack_block_map_entry(&page->entries[level->slot]);
+
+ if (!vdo_is_valid_location(&location)) {
+ /* This entry is invalid, so remove it from the page. */
+ page->entries[level->slot] = UNMAPPED_BLOCK_MAP_ENTRY;
+ vdo_write_tree_page(tree_page, cursor->parent->zone);
+ continue;
+ }
+
+ if (!vdo_is_mapped_location(&location))
+ continue;
+
+ /* Erase mapped entries past the end of the logical space. */
+ if (entry_index >= cursor->boundary.levels[height]) {
+ page->entries[level->slot] = UNMAPPED_BLOCK_MAP_ENTRY;
+ vdo_write_tree_page(tree_page, cursor->parent->zone);
+ continue;
+ }
+
+ if (cursor->height < VDO_BLOCK_MAP_TREE_HEIGHT - 1) {
+ int result = cursor->parent->entry_callback(location.pbn,
+ cursor->parent->completion);
+ if (result != VDO_SUCCESS) {
+ page->entries[level->slot] = UNMAPPED_BLOCK_MAP_ENTRY;
+ vdo_write_tree_page(tree_page, cursor->parent->zone);
+ continue;
+ }
+ }
+
+ if (cursor->height == 0)
+ continue;
+
+ cursor->height--;
+ next_level = &cursor->levels[cursor->height];
+ next_level->page_index = entry_index;
+ next_level->slot = 0;
+ level->slot++;
+ vdo_submit_metadata_vio(&cursor->vio->vio, location.pbn,
+ traversal_endio, continue_traversal,
+ REQ_OP_READ | REQ_PRIO);
+ return;
+ }
+ }
+
+ finish_cursor(cursor);
+}
+
+/**
+ * launch_cursor() - Start traversing a single block map tree now that the cursor has a VIO with
+ * which to load pages.
+ * @context: The pooled_vio just acquired.
+ *
+ * Implements waiter_callback_fn.
+ */
+static void launch_cursor(struct vdo_waiter *waiter, void *context)
+{
+ struct cursor *cursor = container_of(waiter, struct cursor, waiter);
+ struct pooled_vio *pooled = context;
+
+ cursor->vio = pooled;
+ pooled->vio.completion.parent = cursor;
+ pooled->vio.completion.callback_thread_id = cursor->parent->zone->thread_id;
+ traverse(cursor);
+}
+
+/**
+ * compute_boundary() - Compute the number of pages used at each level of the given root's tree.
+ *
+ * Return: The list of page counts as a boundary structure.
+ */
+static struct boundary compute_boundary(struct block_map *map, root_count_t root_index)
+{
+ struct boundary boundary;
+ height_t height;
+ page_count_t leaf_pages = vdo_compute_block_map_page_count(map->entry_count);
+ /*
+ * Compute the leaf pages for this root. If the number of leaf pages does not distribute
+ * evenly, we must determine if this root gets an extra page. Extra pages are assigned to
+ * roots starting from tree 0.
+ */
+ page_count_t last_tree_root = (leaf_pages - 1) % map->root_count;
+ page_count_t level_pages = leaf_pages / map->root_count;
+
+ if (root_index <= last_tree_root)
+ level_pages++;
+
+ for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT - 1; height++) {
+ boundary.levels[height] = level_pages;
+ level_pages = DIV_ROUND_UP(level_pages, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
+ }
+
+ /* The root node always exists, even if the root is otherwise unused. */
+ boundary.levels[VDO_BLOCK_MAP_TREE_HEIGHT - 1] = 1;
+
+ return boundary;
+}
+
+/**
+ * vdo_traverse_forest() - Walk the entire forest of a block map.
+ * @callback: A function to call with the pbn of each allocated node in the forest.
+ * @completion: The completion to notify on each traversed PBN, and when traversal completes.
+ */
+void vdo_traverse_forest(struct block_map *map, vdo_entry_callback_fn callback,
+ struct vdo_completion *completion)
+{
+ root_count_t root;
+ struct cursors *cursors;
+ int result;
+
+ result = vdo_allocate_extended(struct cursors, map->root_count,
+ struct cursor, __func__, &cursors);
+ if (result != VDO_SUCCESS) {
+ vdo_fail_completion(completion, result);
+ return;
+ }
+
+ cursors->zone = &map->zones[0];
+ cursors->pool = cursors->zone->vio_pool;
+ cursors->entry_callback = callback;
+ cursors->completion = completion;
+ cursors->active_roots = map->root_count;
+ for (root = 0; root < map->root_count; root++) {
+ struct cursor *cursor = &cursors->cursors[root];
+
+ *cursor = (struct cursor) {
+ .tree = &map->forest->trees[root],
+ .height = VDO_BLOCK_MAP_TREE_HEIGHT - 1,
+ .parent = cursors,
+ .boundary = compute_boundary(map, root),
+ };
+
+ cursor->waiter.callback = launch_cursor;
+ acquire_vio_from_pool(cursors->pool, &cursor->waiter);
+ }
+}
+
+/**
+ * initialize_block_map_zone() - Initialize the per-zone portions of the block map.
+ * @maximum_age: The number of journal blocks before a dirtied page is considered old and must be
+ * written out.
+ */
+static int __must_check initialize_block_map_zone(struct block_map *map,
+ zone_count_t zone_number,
+ page_count_t cache_size,
+ block_count_t maximum_age)
+{
+ int result;
+ block_count_t i;
+ struct vdo *vdo = map->vdo;
+ struct block_map_zone *zone = &map->zones[zone_number];
+
+ BUILD_BUG_ON(sizeof(struct page_descriptor) != sizeof(u64));
+
+ zone->zone_number = zone_number;
+ zone->thread_id = vdo->thread_config.logical_threads[zone_number];
+ zone->block_map = map;
+
+ result = vdo_allocate_extended(struct dirty_lists, maximum_age,
+ dirty_era_t, __func__,
+ &zone->dirty_lists);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ zone->dirty_lists->maximum_age = maximum_age;
+ INIT_LIST_HEAD(&zone->dirty_lists->expired[VDO_TREE_PAGE]);
+ INIT_LIST_HEAD(&zone->dirty_lists->expired[VDO_CACHE_PAGE]);
+
+ for (i = 0; i < maximum_age; i++) {
+ INIT_LIST_HEAD(&zone->dirty_lists->eras[i][VDO_TREE_PAGE]);
+ INIT_LIST_HEAD(&zone->dirty_lists->eras[i][VDO_CACHE_PAGE]);
+ }
+
+ result = vdo_int_map_create(VDO_LOCK_MAP_CAPACITY, &zone->loading_pages);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = make_vio_pool(vdo, BLOCK_MAP_VIO_POOL_SIZE,
+ zone->thread_id, VIO_TYPE_BLOCK_MAP_INTERIOR,
+ VIO_PRIORITY_METADATA, zone, &zone->vio_pool);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo_set_admin_state_code(&zone->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+
+ zone->page_cache.zone = zone;
+ zone->page_cache.vdo = vdo;
+ zone->page_cache.page_count = cache_size / map->zone_count;
+ zone->page_cache.stats.free_pages = zone->page_cache.page_count;
+
+ result = allocate_cache_components(&zone->page_cache);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /* initialize empty circular queues */
+ INIT_LIST_HEAD(&zone->page_cache.lru_list);
+ INIT_LIST_HEAD(&zone->page_cache.outgoing_list);
+
+ return VDO_SUCCESS;
+}
+
+/* Implements vdo_zone_thread_getter_fn */
+static thread_id_t get_block_map_zone_thread_id(void *context, zone_count_t zone_number)
+{
+ struct block_map *map = context;
+
+ return map->zones[zone_number].thread_id;
+}
+
+/* Implements vdo_action_preamble_fn */
+static void prepare_for_era_advance(void *context, struct vdo_completion *parent)
+{
+ struct block_map *map = context;
+
+ map->current_era_point = map->pending_era_point;
+ vdo_finish_completion(parent);
+}
+
+/* Implements vdo_zone_action_fn */
+static void advance_block_map_zone_era(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct block_map *map = context;
+ struct block_map_zone *zone = &map->zones[zone_number];
+
+ update_period(zone->dirty_lists, map->current_era_point);
+ write_expired_elements(zone);
+ vdo_finish_completion(parent);
+}
+
+/*
+ * Schedule an era advance if necessary. This method should not be called directly. Rather, call
+ * vdo_schedule_default_action() on the block map's action manager.
+ *
+ * Implements vdo_action_scheduler_fn.
+ */
+static bool schedule_era_advance(void *context)
+{
+ struct block_map *map = context;
+
+ if (map->current_era_point == map->pending_era_point)
+ return false;
+
+ return vdo_schedule_action(map->action_manager, prepare_for_era_advance,
+ advance_block_map_zone_era, NULL, NULL);
+}
+
+static void uninitialize_block_map_zone(struct block_map_zone *zone)
+{
+ struct vdo_page_cache *cache = &zone->page_cache;
+
+ vdo_free(vdo_forget(zone->dirty_lists));
+ free_vio_pool(vdo_forget(zone->vio_pool));
+ vdo_int_map_free(vdo_forget(zone->loading_pages));
+ if (cache->infos != NULL) {
+ struct page_info *info;
+
+ for (info = cache->infos; info < cache->infos + cache->page_count; info++)
+ free_vio(vdo_forget(info->vio));
+ }
+
+ vdo_int_map_free(vdo_forget(cache->page_map));
+ vdo_free(vdo_forget(cache->infos));
+ vdo_free(vdo_forget(cache->pages));
+}
+
+void vdo_free_block_map(struct block_map *map)
+{
+ zone_count_t zone;
+
+ if (map == NULL)
+ return;
+
+ for (zone = 0; zone < map->zone_count; zone++)
+ uninitialize_block_map_zone(&map->zones[zone]);
+
+ vdo_abandon_block_map_growth(map);
+ if (map->forest != NULL)
+ deforest(vdo_forget(map->forest), 0);
+ vdo_free(vdo_forget(map->action_manager));
+ vdo_free(map);
+}
+
+/* @journal may be NULL. */
+int vdo_decode_block_map(struct block_map_state_2_0 state, block_count_t logical_blocks,
+ struct vdo *vdo, struct recovery_journal *journal,
+ nonce_t nonce, page_count_t cache_size, block_count_t maximum_age,
+ struct block_map **map_ptr)
+{
+ struct block_map *map;
+ int result;
+ zone_count_t zone = 0;
+
+ BUILD_BUG_ON(VDO_BLOCK_MAP_ENTRIES_PER_PAGE !=
+ ((VDO_BLOCK_SIZE - sizeof(struct block_map_page)) /
+ sizeof(struct block_map_entry)));
+ result = VDO_ASSERT(cache_size > 0, "block map cache size is specified");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate_extended(struct block_map,
+ vdo->thread_config.logical_zone_count,
+ struct block_map_zone, __func__, &map);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ map->vdo = vdo;
+ map->root_origin = state.root_origin;
+ map->root_count = state.root_count;
+ map->entry_count = logical_blocks;
+ map->journal = journal;
+ map->nonce = nonce;
+
+ result = make_forest(map, map->entry_count);
+ if (result != VDO_SUCCESS) {
+ vdo_free_block_map(map);
+ return result;
+ }
+
+ replace_forest(map);
+
+ map->zone_count = vdo->thread_config.logical_zone_count;
+ for (zone = 0; zone < map->zone_count; zone++) {
+ result = initialize_block_map_zone(map, zone, cache_size, maximum_age);
+ if (result != VDO_SUCCESS) {
+ vdo_free_block_map(map);
+ return result;
+ }
+ }
+
+ result = vdo_make_action_manager(map->zone_count, get_block_map_zone_thread_id,
+ vdo_get_recovery_journal_thread_id(journal),
+ map, schedule_era_advance, vdo,
+ &map->action_manager);
+ if (result != VDO_SUCCESS) {
+ vdo_free_block_map(map);
+ return result;
+ }
+
+ *map_ptr = map;
+ return VDO_SUCCESS;
+}
+
+struct block_map_state_2_0 vdo_record_block_map(const struct block_map *map)
+{
+ return (struct block_map_state_2_0) {
+ .flat_page_origin = VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
+ /* This is the flat page count, which has turned out to always be 0. */
+ .flat_page_count = 0,
+ .root_origin = map->root_origin,
+ .root_count = map->root_count,
+ };
+}
+
+/* The block map needs to know the journals' sequence number to initialize the eras. */
+void vdo_initialize_block_map_from_journal(struct block_map *map,
+ struct recovery_journal *journal)
+{
+ zone_count_t z = 0;
+
+ map->current_era_point = vdo_get_recovery_journal_current_sequence_number(journal);
+ map->pending_era_point = map->current_era_point;
+
+ for (z = 0; z < map->zone_count; z++) {
+ struct dirty_lists *dirty_lists = map->zones[z].dirty_lists;
+
+ VDO_ASSERT_LOG_ONLY(dirty_lists->next_period == 0, "current period not set");
+ dirty_lists->oldest_period = map->current_era_point;
+ dirty_lists->next_period = map->current_era_point + 1;
+ dirty_lists->offset = map->current_era_point % dirty_lists->maximum_age;
+ }
+}
+
+/* Compute the logical zone for the LBN of a data vio. */
+zone_count_t vdo_compute_logical_zone(struct data_vio *data_vio)
+{
+ struct block_map *map = vdo_from_data_vio(data_vio)->block_map;
+ struct tree_lock *tree_lock = &data_vio->tree_lock;
+ page_number_t page_number = data_vio->logical.lbn / VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
+
+ tree_lock->tree_slots[0].page_index = page_number;
+ tree_lock->root_index = page_number % map->root_count;
+ return (tree_lock->root_index % map->zone_count);
+}
+
+void vdo_advance_block_map_era(struct block_map *map,
+ sequence_number_t recovery_block_number)
+{
+ if (map == NULL)
+ return;
+
+ map->pending_era_point = recovery_block_number;
+ vdo_schedule_default_action(map->action_manager);
+}
+
+/* Implements vdo_admin_initiator_fn */
+static void initiate_drain(struct admin_state *state)
+{
+ struct block_map_zone *zone = container_of(state, struct block_map_zone, state);
+
+ VDO_ASSERT_LOG_ONLY((zone->active_lookups == 0),
+ "%s() called with no active lookups", __func__);
+
+ if (!vdo_is_state_suspending(state)) {
+ while (zone->dirty_lists->oldest_period < zone->dirty_lists->next_period)
+ expire_oldest_list(zone->dirty_lists);
+ write_expired_elements(zone);
+ }
+
+ check_for_drain_complete(zone);
+}
+
+/* Implements vdo_zone_action_fn. */
+static void drain_zone(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct block_map *map = context;
+ struct block_map_zone *zone = &map->zones[zone_number];
+
+ vdo_start_draining(&zone->state,
+ vdo_get_current_manager_operation(map->action_manager),
+ parent, initiate_drain);
+}
+
+void vdo_drain_block_map(struct block_map *map, const struct admin_state_code *operation,
+ struct vdo_completion *parent)
+{
+ vdo_schedule_operation(map->action_manager, operation, NULL, drain_zone, NULL,
+ parent);
+}
+
+/* Implements vdo_zone_action_fn. */
+static void resume_block_map_zone(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct block_map *map = context;
+ struct block_map_zone *zone = &map->zones[zone_number];
+
+ vdo_fail_completion(parent, vdo_resume_if_quiescent(&zone->state));
+}
+
+void vdo_resume_block_map(struct block_map *map, struct vdo_completion *parent)
+{
+ vdo_schedule_operation(map->action_manager, VDO_ADMIN_STATE_RESUMING,
+ NULL, resume_block_map_zone, NULL, parent);
+}
+
+/* Allocate an expanded collection of trees, for a future growth. */
+int vdo_prepare_to_grow_block_map(struct block_map *map,
+ block_count_t new_logical_blocks)
+{
+ if (map->next_entry_count == new_logical_blocks)
+ return VDO_SUCCESS;
+
+ if (map->next_entry_count > 0)
+ vdo_abandon_block_map_growth(map);
+
+ if (new_logical_blocks < map->entry_count) {
+ map->next_entry_count = map->entry_count;
+ return VDO_SUCCESS;
+ }
+
+ return make_forest(map, new_logical_blocks);
+}
+
+/* Implements vdo_action_preamble_fn */
+static void grow_forest(void *context, struct vdo_completion *completion)
+{
+ replace_forest(context);
+ vdo_finish_completion(completion);
+}
+
+/* Requires vdo_prepare_to_grow_block_map() to have been previously called. */
+void vdo_grow_block_map(struct block_map *map, struct vdo_completion *parent)
+{
+ vdo_schedule_operation(map->action_manager,
+ VDO_ADMIN_STATE_SUSPENDED_OPERATION,
+ grow_forest, NULL, NULL, parent);
+}
+
+void vdo_abandon_block_map_growth(struct block_map *map)
+{
+ struct forest *forest = vdo_forget(map->next_forest);
+
+ if (forest != NULL)
+ deforest(forest, forest->segments - 1);
+
+ map->next_entry_count = 0;
+}
+
+/* Release the page completion and then continue the requester. */
+static inline void finish_processing_page(struct vdo_completion *completion, int result)
+{
+ struct vdo_completion *parent = completion->parent;
+
+ vdo_release_page_completion(completion);
+ vdo_continue_completion(parent, result);
+}
+
+static void handle_page_error(struct vdo_completion *completion)
+{
+ finish_processing_page(completion, completion->result);
+}
+
+/* Fetch the mapping page for a block map update, and call the provided handler when fetched. */
+static void fetch_mapping_page(struct data_vio *data_vio, bool modifiable,
+ vdo_action_fn action)
+{
+ struct block_map_zone *zone = data_vio->logical.zone->block_map_zone;
+
+ if (vdo_is_state_draining(&zone->state)) {
+ continue_data_vio_with_error(data_vio, VDO_SHUTTING_DOWN);
+ return;
+ }
+
+ vdo_get_page(&data_vio->page_completion, zone,
+ data_vio->tree_lock.tree_slots[0].block_map_slot.pbn,
+ modifiable, &data_vio->vio.completion,
+ action, handle_page_error, false);
+}
+
+/**
+ * clear_mapped_location() - Clear a data_vio's mapped block location, setting it to be unmapped.
+ *
+ * This indicates the block map entry for the logical block is either unmapped or corrupted.
+ */
+static void clear_mapped_location(struct data_vio *data_vio)
+{
+ data_vio->mapped = (struct zoned_pbn) {
+ .state = VDO_MAPPING_STATE_UNMAPPED,
+ };
+}
+
+/**
+ * set_mapped_location() - Decode and validate a block map entry, and set the mapped location of a
+ * data_vio.
+ *
+ * Return: VDO_SUCCESS or VDO_BAD_MAPPING if the map entry is invalid or an error code for any
+ * other failure
+ */
+static int __must_check set_mapped_location(struct data_vio *data_vio,
+ const struct block_map_entry *entry)
+{
+ /* Unpack the PBN for logging purposes even if the entry is invalid. */
+ struct data_location mapped = vdo_unpack_block_map_entry(entry);
+
+ if (vdo_is_valid_location(&mapped)) {
+ int result;
+
+ result = vdo_get_physical_zone(vdo_from_data_vio(data_vio),
+ mapped.pbn, &data_vio->mapped.zone);
+ if (result == VDO_SUCCESS) {
+ data_vio->mapped.pbn = mapped.pbn;
+ data_vio->mapped.state = mapped.state;
+ return VDO_SUCCESS;
+ }
+
+ /*
+ * Return all errors not specifically known to be errors from validating the
+ * location.
+ */
+ if ((result != VDO_OUT_OF_RANGE) && (result != VDO_BAD_MAPPING))
+ return result;
+ }
+
+ /*
+ * Log the corruption even if we wind up ignoring it for write VIOs, converting all cases
+ * to VDO_BAD_MAPPING.
+ */
+ vdo_log_error_strerror(VDO_BAD_MAPPING,
+ "PBN %llu with state %u read from the block map was invalid",
+ (unsigned long long) mapped.pbn, mapped.state);
+
+ /*
+ * A read VIO has no option but to report the bad mapping--reading zeros would be hiding
+ * known data loss.
+ */
+ if (!data_vio->write)
+ return VDO_BAD_MAPPING;
+
+ /*
+ * A write VIO only reads this mapping to decref the old block. Treat this as an unmapped
+ * entry rather than fail the write.
+ */
+ clear_mapped_location(data_vio);
+ return VDO_SUCCESS;
+}
+
+/* This callback is registered in vdo_get_mapped_block(). */
+static void get_mapping_from_fetched_page(struct vdo_completion *completion)
+{
+ int result;
+ struct vdo_page_completion *vpc = as_vdo_page_completion(completion);
+ const struct block_map_page *page;
+ const struct block_map_entry *entry;
+ struct data_vio *data_vio = as_data_vio(completion->parent);
+ struct block_map_tree_slot *tree_slot;
+
+ if (completion->result != VDO_SUCCESS) {
+ finish_processing_page(completion, completion->result);
+ return;
+ }
+
+ result = validate_completed_page(vpc, false);
+ if (result != VDO_SUCCESS) {
+ finish_processing_page(completion, result);
+ return;
+ }
+
+ page = (const struct block_map_page *) get_page_buffer(vpc->info);
+ tree_slot = &data_vio->tree_lock.tree_slots[0];
+ entry = &page->entries[tree_slot->block_map_slot.slot];
+
+ result = set_mapped_location(data_vio, entry);
+ finish_processing_page(completion, result);
+}
+
+void vdo_update_block_map_page(struct block_map_page *page, struct data_vio *data_vio,
+ physical_block_number_t pbn,
+ enum block_mapping_state mapping_state,
+ sequence_number_t *recovery_lock)
+{
+ struct block_map_zone *zone = data_vio->logical.zone->block_map_zone;
+ struct block_map *block_map = zone->block_map;
+ struct recovery_journal *journal = block_map->journal;
+ sequence_number_t old_locked, new_locked;
+ struct tree_lock *tree_lock = &data_vio->tree_lock;
+
+ /* Encode the new mapping. */
+ page->entries[tree_lock->tree_slots[tree_lock->height].block_map_slot.slot] =
+ vdo_pack_block_map_entry(pbn, mapping_state);
+
+ /* Adjust references on the recovery journal blocks. */
+ old_locked = *recovery_lock;
+ new_locked = data_vio->recovery_sequence_number;
+
+ if ((old_locked == 0) || (old_locked > new_locked)) {
+ vdo_acquire_recovery_journal_block_reference(journal, new_locked,
+ VDO_ZONE_TYPE_LOGICAL,
+ zone->zone_number);
+
+ if (old_locked > 0) {
+ vdo_release_recovery_journal_block_reference(journal, old_locked,
+ VDO_ZONE_TYPE_LOGICAL,
+ zone->zone_number);
+ }
+
+ *recovery_lock = new_locked;
+ }
+
+ /*
+ * FIXME: explain this more
+ * Release the transferred lock from the data_vio.
+ */
+ vdo_release_journal_entry_lock(journal, new_locked);
+ data_vio->recovery_sequence_number = 0;
+}
+
+static void put_mapping_in_fetched_page(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion->parent);
+ sequence_number_t old_lock;
+ struct vdo_page_completion *vpc;
+ struct page_info *info;
+ int result;
+
+ if (completion->result != VDO_SUCCESS) {
+ finish_processing_page(completion, completion->result);
+ return;
+ }
+
+ vpc = as_vdo_page_completion(completion);
+ result = validate_completed_page(vpc, true);
+ if (result != VDO_SUCCESS) {
+ finish_processing_page(completion, result);
+ return;
+ }
+
+ info = vpc->info;
+ old_lock = info->recovery_lock;
+ vdo_update_block_map_page((struct block_map_page *) get_page_buffer(info),
+ data_vio, data_vio->new_mapped.pbn,
+ data_vio->new_mapped.state, &info->recovery_lock);
+ set_info_state(info, PS_DIRTY);
+ add_to_dirty_lists(info->cache->zone, &info->state_entry,
+ VDO_CACHE_PAGE, old_lock, info->recovery_lock);
+ finish_processing_page(completion, VDO_SUCCESS);
+}
+
+/* Read a stored block mapping into a data_vio. */
+void vdo_get_mapped_block(struct data_vio *data_vio)
+{
+ if (data_vio->tree_lock.tree_slots[0].block_map_slot.pbn == VDO_ZERO_BLOCK) {
+ /*
+ * We know that the block map page for this LBN has not been allocated, so the
+ * block must be unmapped.
+ */
+ clear_mapped_location(data_vio);
+ continue_data_vio(data_vio);
+ return;
+ }
+
+ fetch_mapping_page(data_vio, false, get_mapping_from_fetched_page);
+}
+
+/* Update a stored block mapping to reflect a data_vio's new mapping. */
+void vdo_put_mapped_block(struct data_vio *data_vio)
+{
+ fetch_mapping_page(data_vio, true, put_mapping_in_fetched_page);
+}
+
+struct block_map_statistics vdo_get_block_map_statistics(struct block_map *map)
+{
+ zone_count_t zone = 0;
+ struct block_map_statistics totals;
+
+ memset(&totals, 0, sizeof(struct block_map_statistics));
+ for (zone = 0; zone < map->zone_count; zone++) {
+ const struct block_map_statistics *stats =
+ &(map->zones[zone].page_cache.stats);
+
+ totals.dirty_pages += READ_ONCE(stats->dirty_pages);
+ totals.clean_pages += READ_ONCE(stats->clean_pages);
+ totals.free_pages += READ_ONCE(stats->free_pages);
+ totals.failed_pages += READ_ONCE(stats->failed_pages);
+ totals.incoming_pages += READ_ONCE(stats->incoming_pages);
+ totals.outgoing_pages += READ_ONCE(stats->outgoing_pages);
+ totals.cache_pressure += READ_ONCE(stats->cache_pressure);
+ totals.read_count += READ_ONCE(stats->read_count);
+ totals.write_count += READ_ONCE(stats->write_count);
+ totals.failed_reads += READ_ONCE(stats->failed_reads);
+ totals.failed_writes += READ_ONCE(stats->failed_writes);
+ totals.reclaimed += READ_ONCE(stats->reclaimed);
+ totals.read_outgoing += READ_ONCE(stats->read_outgoing);
+ totals.found_in_cache += READ_ONCE(stats->found_in_cache);
+ totals.discard_required += READ_ONCE(stats->discard_required);
+ totals.wait_for_page += READ_ONCE(stats->wait_for_page);
+ totals.fetch_required += READ_ONCE(stats->fetch_required);
+ totals.pages_loaded += READ_ONCE(stats->pages_loaded);
+ totals.pages_saved += READ_ONCE(stats->pages_saved);
+ totals.flush_count += READ_ONCE(stats->flush_count);
+ }
+
+ return totals;
+}
diff --git a/drivers/md/dm-vdo/block-map.h b/drivers/md/dm-vdo/block-map.h
new file mode 100644
index 000000000000..39a13039e4a3
--- /dev/null
+++ b/drivers/md/dm-vdo/block-map.h
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_BLOCK_MAP_H
+#define VDO_BLOCK_MAP_H
+
+#include <linux/list.h>
+
+#include "numeric.h"
+
+#include "admin-state.h"
+#include "completion.h"
+#include "encodings.h"
+#include "int-map.h"
+#include "statistics.h"
+#include "types.h"
+#include "vio.h"
+#include "wait-queue.h"
+
+/*
+ * The block map is responsible for tracking all the logical to physical mappings of a VDO. It
+ * consists of a collection of 60 radix trees gradually allocated as logical addresses are used.
+ * Each tree is assigned to a logical zone such that it is easy to compute which zone must handle
+ * each logical address. Each logical zone also has a dedicated portion of the leaf page cache.
+ *
+ * Each logical zone has a single dedicated queue and thread for performing all updates to the
+ * radix trees assigned to that zone. The concurrency guarantees of this single-threaded model
+ * allow the code to omit more fine-grained locking for the block map structures.
+ *
+ * Load operations must be performed on the admin thread. Normal operations, such as reading and
+ * updating mappings, must be performed on the appropriate logical zone thread. Save operations
+ * must be launched from the same admin thread as the original load operation.
+ */
+
+enum {
+ BLOCK_MAP_VIO_POOL_SIZE = 64,
+};
+
+/*
+ * Generation counter for page references.
+ */
+typedef u32 vdo_page_generation;
+
+extern const struct block_map_entry UNMAPPED_BLOCK_MAP_ENTRY;
+
+/* The VDO Page Cache abstraction. */
+struct vdo_page_cache {
+ /* the VDO which owns this cache */
+ struct vdo *vdo;
+ /* number of pages in cache */
+ page_count_t page_count;
+ /* number of pages to write in the current batch */
+ page_count_t pages_in_batch;
+ /* Whether the VDO is doing a read-only rebuild */
+ bool rebuilding;
+
+ /* array of page information entries */
+ struct page_info *infos;
+ /* raw memory for pages */
+ char *pages;
+ /* cache last found page info */
+ struct page_info *last_found;
+ /* map of page number to info */
+ struct int_map *page_map;
+ /* main LRU list (all infos) */
+ struct list_head lru_list;
+ /* free page list (oldest first) */
+ struct list_head free_list;
+ /* outgoing page list */
+ struct list_head outgoing_list;
+ /* number of read I/O operations pending */
+ page_count_t outstanding_reads;
+ /* number of write I/O operations pending */
+ page_count_t outstanding_writes;
+ /* number of pages covered by the current flush */
+ page_count_t pages_in_flush;
+ /* number of pages waiting to be included in the next flush */
+ page_count_t pages_to_flush;
+ /* number of discards in progress */
+ unsigned int discard_count;
+ /* how many VPCs waiting for free page */
+ unsigned int waiter_count;
+ /* queue of waiters who want a free page */
+ struct vdo_wait_queue free_waiters;
+ /*
+ * Statistics are only updated on the logical zone thread, but are accessed from other
+ * threads.
+ */
+ struct block_map_statistics stats;
+ /* counter for pressure reports */
+ u32 pressure_report;
+ /* the block map zone to which this cache belongs */
+ struct block_map_zone *zone;
+};
+
+/*
+ * The state of a page buffer. If the page buffer is free no particular page is bound to it,
+ * otherwise the page buffer is bound to particular page whose absolute pbn is in the pbn field. If
+ * the page is resident or dirty the page data is stable and may be accessed. Otherwise the page is
+ * in flight (incoming or outgoing) and its data should not be accessed.
+ *
+ * @note Update the static data in get_page_state_name() if you change this enumeration.
+ */
+enum vdo_page_buffer_state {
+ /* this page buffer is not being used */
+ PS_FREE,
+ /* this page is being read from store */
+ PS_INCOMING,
+ /* attempt to load this page failed */
+ PS_FAILED,
+ /* this page is valid and un-modified */
+ PS_RESIDENT,
+ /* this page is valid and modified */
+ PS_DIRTY,
+ /* this page is being written and should not be used */
+ PS_OUTGOING,
+ /* not a state */
+ PAGE_STATE_COUNT,
+} __packed;
+
+/*
+ * The write status of page
+ */
+enum vdo_page_write_status {
+ WRITE_STATUS_NORMAL,
+ WRITE_STATUS_DISCARD,
+ WRITE_STATUS_DEFERRED,
+} __packed;
+
+/* Per-page-slot information. */
+struct page_info {
+ /* Preallocated page struct vio */
+ struct vio *vio;
+ /* back-link for references */
+ struct vdo_page_cache *cache;
+ /* the pbn of the page */
+ physical_block_number_t pbn;
+ /* page is busy (temporarily locked) */
+ u16 busy;
+ /* the write status the page */
+ enum vdo_page_write_status write_status;
+ /* page state */
+ enum vdo_page_buffer_state state;
+ /* queue of completions awaiting this item */
+ struct vdo_wait_queue waiting;
+ /* state linked list entry */
+ struct list_head state_entry;
+ /* LRU entry */
+ struct list_head lru_entry;
+ /*
+ * The earliest recovery journal block containing uncommitted updates to the block map page
+ * associated with this page_info. A reference (lock) is held on that block to prevent it
+ * from being reaped. When this value changes, the reference on the old value must be
+ * released and a reference on the new value must be acquired.
+ */
+ sequence_number_t recovery_lock;
+};
+
+/*
+ * A completion awaiting a specific page. Also a live reference into the page once completed, until
+ * freed.
+ */
+struct vdo_page_completion {
+ /* The generic completion */
+ struct vdo_completion completion;
+ /* The cache involved */
+ struct vdo_page_cache *cache;
+ /* The waiter for the pending list */
+ struct vdo_waiter waiter;
+ /* The absolute physical block number of the page on disk */
+ physical_block_number_t pbn;
+ /* Whether the page may be modified */
+ bool writable;
+ /* Whether the page is available */
+ bool ready;
+ /* The info structure for the page, only valid when ready */
+ struct page_info *info;
+};
+
+struct forest;
+
+struct tree_page {
+ struct vdo_waiter waiter;
+
+ /* Dirty list entry */
+ struct list_head entry;
+
+ /* If dirty, the tree zone flush generation in which it was last dirtied. */
+ u8 generation;
+
+ /* Whether this page is an interior tree page being written out. */
+ bool writing;
+
+ /* If writing, the tree zone flush generation of the copy being written. */
+ u8 writing_generation;
+
+ /*
+ * Sequence number of the earliest recovery journal block containing uncommitted updates to
+ * this page
+ */
+ sequence_number_t recovery_lock;
+
+ /* The value of recovery_lock when the this page last started writing */
+ sequence_number_t writing_recovery_lock;
+
+ char page_buffer[VDO_BLOCK_SIZE];
+};
+
+enum block_map_page_type {
+ VDO_TREE_PAGE,
+ VDO_CACHE_PAGE,
+};
+
+typedef struct list_head dirty_era_t[2];
+
+struct dirty_lists {
+ /* The number of periods after which an element will be expired */
+ block_count_t maximum_age;
+ /* The oldest period which has unexpired elements */
+ sequence_number_t oldest_period;
+ /* One more than the current period */
+ sequence_number_t next_period;
+ /* The offset in the array of lists of the oldest period */
+ block_count_t offset;
+ /* Expired pages */
+ dirty_era_t expired;
+ /* The lists of dirty pages */
+ dirty_era_t eras[];
+};
+
+struct block_map_zone {
+ zone_count_t zone_number;
+ thread_id_t thread_id;
+ struct admin_state state;
+ struct block_map *block_map;
+ /* Dirty pages, by era*/
+ struct dirty_lists *dirty_lists;
+ struct vdo_page_cache page_cache;
+ data_vio_count_t active_lookups;
+ struct int_map *loading_pages;
+ struct vio_pool *vio_pool;
+ /* The tree page which has issued or will be issuing a flush */
+ struct tree_page *flusher;
+ struct vdo_wait_queue flush_waiters;
+ /* The generation after the most recent flush */
+ u8 generation;
+ u8 oldest_generation;
+ /* The counts of dirty pages in each generation */
+ u32 dirty_page_counts[256];
+};
+
+struct block_map {
+ struct vdo *vdo;
+ struct action_manager *action_manager;
+ /* The absolute PBN of the first root of the tree part of the block map */
+ physical_block_number_t root_origin;
+ block_count_t root_count;
+
+ /* The era point we are currently distributing to the zones */
+ sequence_number_t current_era_point;
+ /* The next era point */
+ sequence_number_t pending_era_point;
+
+ /* The number of entries in block map */
+ block_count_t entry_count;
+ nonce_t nonce;
+ struct recovery_journal *journal;
+
+ /* The trees for finding block map pages */
+ struct forest *forest;
+ /* The expanded trees awaiting growth */
+ struct forest *next_forest;
+ /* The number of entries after growth */
+ block_count_t next_entry_count;
+
+ zone_count_t zone_count;
+ struct block_map_zone zones[];
+};
+
+/**
+ * typedef vdo_entry_callback_fn - A function to be called for each allocated PBN when traversing
+ * the forest.
+ * @pbn: A PBN of a tree node.
+ * @completion: The parent completion of the traversal.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+typedef int (*vdo_entry_callback_fn)(physical_block_number_t pbn,
+ struct vdo_completion *completion);
+
+static inline struct vdo_page_completion *as_vdo_page_completion(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_PAGE_COMPLETION);
+ return container_of(completion, struct vdo_page_completion, completion);
+}
+
+void vdo_release_page_completion(struct vdo_completion *completion);
+
+void vdo_get_page(struct vdo_page_completion *page_completion,
+ struct block_map_zone *zone, physical_block_number_t pbn,
+ bool writable, void *parent, vdo_action_fn callback,
+ vdo_action_fn error_handler, bool requeue);
+
+void vdo_request_page_write(struct vdo_completion *completion);
+
+int __must_check vdo_get_cached_page(struct vdo_completion *completion,
+ struct block_map_page **page_ptr);
+
+int __must_check vdo_invalidate_page_cache(struct vdo_page_cache *cache);
+
+static inline struct block_map_page * __must_check
+vdo_as_block_map_page(struct tree_page *tree_page)
+{
+ return (struct block_map_page *) tree_page->page_buffer;
+}
+
+bool vdo_copy_valid_page(char *buffer, nonce_t nonce,
+ physical_block_number_t pbn,
+ struct block_map_page *page);
+
+void vdo_find_block_map_slot(struct data_vio *data_vio);
+
+physical_block_number_t vdo_find_block_map_page_pbn(struct block_map *map,
+ page_number_t page_number);
+
+void vdo_write_tree_page(struct tree_page *page, struct block_map_zone *zone);
+
+void vdo_traverse_forest(struct block_map *map, vdo_entry_callback_fn callback,
+ struct vdo_completion *completion);
+
+int __must_check vdo_decode_block_map(struct block_map_state_2_0 state,
+ block_count_t logical_blocks, struct vdo *vdo,
+ struct recovery_journal *journal, nonce_t nonce,
+ page_count_t cache_size, block_count_t maximum_age,
+ struct block_map **map_ptr);
+
+void vdo_drain_block_map(struct block_map *map, const struct admin_state_code *operation,
+ struct vdo_completion *parent);
+
+void vdo_resume_block_map(struct block_map *map, struct vdo_completion *parent);
+
+int __must_check vdo_prepare_to_grow_block_map(struct block_map *map,
+ block_count_t new_logical_blocks);
+
+void vdo_grow_block_map(struct block_map *map, struct vdo_completion *parent);
+
+void vdo_abandon_block_map_growth(struct block_map *map);
+
+void vdo_free_block_map(struct block_map *map);
+
+struct block_map_state_2_0 __must_check vdo_record_block_map(const struct block_map *map);
+
+void vdo_initialize_block_map_from_journal(struct block_map *map,
+ struct recovery_journal *journal);
+
+zone_count_t vdo_compute_logical_zone(struct data_vio *data_vio);
+
+void vdo_advance_block_map_era(struct block_map *map,
+ sequence_number_t recovery_block_number);
+
+void vdo_update_block_map_page(struct block_map_page *page, struct data_vio *data_vio,
+ physical_block_number_t pbn,
+ enum block_mapping_state mapping_state,
+ sequence_number_t *recovery_lock);
+
+void vdo_get_mapped_block(struct data_vio *data_vio);
+
+void vdo_put_mapped_block(struct data_vio *data_vio);
+
+struct block_map_statistics __must_check vdo_get_block_map_statistics(struct block_map *map);
+
+/**
+ * vdo_convert_maximum_age() - Convert the maximum age to reflect the new recovery journal format
+ * @age: The configured maximum age
+ *
+ * Return: The converted age
+ *
+ * In the old recovery journal format, each journal block held 311 entries, and every write bio
+ * made two entries. The old maximum age was half the usable journal length. In the new format,
+ * each block holds only 217 entries, but each bio only makes one entry. We convert the configured
+ * age so that the number of writes in a block map era is the same in the old and new formats. This
+ * keeps the bound on the amount of work required to recover the block map from the recovery
+ * journal the same across the format change. It also keeps the amortization of block map page
+ * writes to write bios the same.
+ */
+static inline block_count_t vdo_convert_maximum_age(block_count_t age)
+{
+ return DIV_ROUND_UP(age * RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK,
+ 2 * RECOVERY_JOURNAL_ENTRIES_PER_BLOCK);
+}
+
+#endif /* VDO_BLOCK_MAP_H */
diff --git a/drivers/md/dm-vdo/completion.c b/drivers/md/dm-vdo/completion.c
new file mode 100644
index 000000000000..5ad85334632d
--- /dev/null
+++ b/drivers/md/dm-vdo/completion.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "completion.h"
+
+#include <linux/kernel.h>
+
+#include "logger.h"
+#include "permassert.h"
+
+#include "status-codes.h"
+#include "types.h"
+#include "vio.h"
+#include "vdo.h"
+
+/**
+ * DOC: vdo completions.
+ *
+ * Most of vdo's data structures are lock free, each either belonging to a single "zone," or
+ * divided into a number of zones whose accesses to the structure do not overlap. During normal
+ * operation, at most one thread will be operating in any given zone. Each zone has a
+ * vdo_work_queue which holds vdo_completions that are to be run in that zone. A completion may
+ * only be enqueued on one queue or operating in a single zone at a time.
+ *
+ * At each step of a multi-threaded operation, the completion performing the operation is given a
+ * callback, error handler, and thread id for the next step. A completion is "run" when it is
+ * operating on the correct thread (as specified by its callback_thread_id). If the value of its
+ * "result" field is an error (i.e. not VDO_SUCCESS), the function in its "error_handler" will be
+ * invoked. If the error_handler is NULL, or there is no error, the function set as its "callback"
+ * will be invoked. Generally, a completion will not be run directly, but rather will be
+ * "launched." In this case, it will check whether it is operating on the correct thread. If it is,
+ * it will run immediately. Otherwise, it will be enqueue on the vdo_work_queue associated with the
+ * completion's "callback_thread_id". When it is dequeued, it will be on the correct thread, and
+ * will get run. In some cases, the completion should get queued instead of running immediately,
+ * even if it is being launched from the correct thread. This is usually in cases where there is a
+ * long chain of callbacks, all on the same thread, which could overflow the stack. In such cases,
+ * the completion's "requeue" field should be set to true. Doing so will skip the current thread
+ * check and simply enqueue the completion.
+ *
+ * A completion may be "finished," in which case its "complete" field will be set to true before it
+ * is next run. It is a bug to attempt to set the result or re-finish a finished completion.
+ * Because a completion's fields are not safe to examine from any thread other than the one on
+ * which the completion is currently operating, this field is used only to aid in detecting
+ * programming errors. It can not be used for cross-thread checking on the status of an operation.
+ * A completion must be "reset" before it can be reused after it has been finished. Resetting will
+ * also clear any error from the result field.
+ **/
+
+void vdo_initialize_completion(struct vdo_completion *completion,
+ struct vdo *vdo,
+ enum vdo_completion_type type)
+{
+ memset(completion, 0, sizeof(*completion));
+ completion->vdo = vdo;
+ completion->type = type;
+ vdo_reset_completion(completion);
+}
+
+static inline void assert_incomplete(struct vdo_completion *completion)
+{
+ VDO_ASSERT_LOG_ONLY(!completion->complete, "completion is not complete");
+}
+
+/**
+ * vdo_set_completion_result() - Set the result of a completion.
+ *
+ * Older errors will not be masked.
+ */
+void vdo_set_completion_result(struct vdo_completion *completion, int result)
+{
+ assert_incomplete(completion);
+ if (completion->result == VDO_SUCCESS)
+ completion->result = result;
+}
+
+/**
+ * vdo_launch_completion_with_priority() - Run or enqueue a completion.
+ * @priority: The priority at which to enqueue the completion.
+ *
+ * If called on the correct thread (i.e. the one specified in the completion's callback_thread_id
+ * field) and not marked for requeue, the completion will be run immediately. Otherwise, the
+ * completion will be enqueued on the specified thread.
+ */
+void vdo_launch_completion_with_priority(struct vdo_completion *completion,
+ enum vdo_completion_priority priority)
+{
+ thread_id_t callback_thread = completion->callback_thread_id;
+
+ if (completion->requeue || (callback_thread != vdo_get_callback_thread_id())) {
+ vdo_enqueue_completion(completion, priority);
+ return;
+ }
+
+ vdo_run_completion(completion);
+}
+
+/** vdo_finish_completion() - Mark a completion as complete and then launch it. */
+void vdo_finish_completion(struct vdo_completion *completion)
+{
+ assert_incomplete(completion);
+ completion->complete = true;
+ if (completion->callback != NULL)
+ vdo_launch_completion(completion);
+}
+
+void vdo_enqueue_completion(struct vdo_completion *completion,
+ enum vdo_completion_priority priority)
+{
+ struct vdo *vdo = completion->vdo;
+ thread_id_t thread_id = completion->callback_thread_id;
+
+ if (VDO_ASSERT(thread_id < vdo->thread_config.thread_count,
+ "thread_id %u (completion type %d) is less than thread count %u",
+ thread_id, completion->type,
+ vdo->thread_config.thread_count) != VDO_SUCCESS)
+ BUG();
+
+ completion->requeue = false;
+ completion->priority = priority;
+ completion->my_queue = NULL;
+ vdo_enqueue_work_queue(vdo->threads[thread_id].queue, completion);
+}
+
+/**
+ * vdo_requeue_completion_if_needed() - Requeue a completion if not called on the specified thread.
+ *
+ * Return: True if the completion was requeued; callers may not access the completion in this case.
+ */
+bool vdo_requeue_completion_if_needed(struct vdo_completion *completion,
+ thread_id_t callback_thread_id)
+{
+ if (vdo_get_callback_thread_id() == callback_thread_id)
+ return false;
+
+ completion->callback_thread_id = callback_thread_id;
+ vdo_enqueue_completion(completion, VDO_WORK_Q_DEFAULT_PRIORITY);
+ return true;
+}
diff --git a/drivers/md/dm-vdo/completion.h b/drivers/md/dm-vdo/completion.h
new file mode 100644
index 000000000000..3407f34ce58c
--- /dev/null
+++ b/drivers/md/dm-vdo/completion.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_COMPLETION_H
+#define VDO_COMPLETION_H
+
+#include "permassert.h"
+
+#include "status-codes.h"
+#include "types.h"
+
+/**
+ * vdo_run_completion() - Run a completion's callback or error handler on the current thread.
+ *
+ * Context: This function must be called from the correct callback thread.
+ */
+static inline void vdo_run_completion(struct vdo_completion *completion)
+{
+ if ((completion->result != VDO_SUCCESS) && (completion->error_handler != NULL)) {
+ completion->error_handler(completion);
+ return;
+ }
+
+ completion->callback(completion);
+}
+
+void vdo_set_completion_result(struct vdo_completion *completion, int result);
+
+void vdo_initialize_completion(struct vdo_completion *completion, struct vdo *vdo,
+ enum vdo_completion_type type);
+
+/**
+ * vdo_reset_completion() - Reset a completion to a clean state, while keeping the type, vdo and
+ * parent information.
+ */
+static inline void vdo_reset_completion(struct vdo_completion *completion)
+{
+ completion->result = VDO_SUCCESS;
+ completion->complete = false;
+}
+
+void vdo_launch_completion_with_priority(struct vdo_completion *completion,
+ enum vdo_completion_priority priority);
+
+/**
+ * vdo_launch_completion() - Launch a completion with default priority.
+ */
+static inline void vdo_launch_completion(struct vdo_completion *completion)
+{
+ vdo_launch_completion_with_priority(completion, VDO_WORK_Q_DEFAULT_PRIORITY);
+}
+
+/**
+ * vdo_continue_completion() - Continue processing a completion.
+ * @result: The current result (will not mask older errors).
+ *
+ * Continue processing a completion by setting the current result and calling
+ * vdo_launch_completion().
+ */
+static inline void vdo_continue_completion(struct vdo_completion *completion, int result)
+{
+ vdo_set_completion_result(completion, result);
+ vdo_launch_completion(completion);
+}
+
+void vdo_finish_completion(struct vdo_completion *completion);
+
+/**
+ * vdo_fail_completion() - Set the result of a completion if it does not already have an error,
+ * then finish it.
+ */
+static inline void vdo_fail_completion(struct vdo_completion *completion, int result)
+{
+ vdo_set_completion_result(completion, result);
+ vdo_finish_completion(completion);
+}
+
+/**
+ * vdo_assert_completion_type() - Assert that a completion is of the correct type.
+ *
+ * Return: VDO_SUCCESS or an error
+ */
+static inline int vdo_assert_completion_type(struct vdo_completion *completion,
+ enum vdo_completion_type expected)
+{
+ return VDO_ASSERT(expected == completion->type,
+ "completion type should be %u, not %u", expected,
+ completion->type);
+}
+
+static inline void vdo_set_completion_callback(struct vdo_completion *completion,
+ vdo_action_fn callback,
+ thread_id_t callback_thread_id)
+{
+ completion->callback = callback;
+ completion->callback_thread_id = callback_thread_id;
+}
+
+/**
+ * vdo_launch_completion_callback() - Set the callback for a completion and launch it immediately.
+ */
+static inline void vdo_launch_completion_callback(struct vdo_completion *completion,
+ vdo_action_fn callback,
+ thread_id_t callback_thread_id)
+{
+ vdo_set_completion_callback(completion, callback, callback_thread_id);
+ vdo_launch_completion(completion);
+}
+
+/**
+ * vdo_prepare_completion() - Prepare a completion for launch.
+ *
+ * Resets the completion, and then sets its callback, error handler, callback thread, and parent.
+ */
+static inline void vdo_prepare_completion(struct vdo_completion *completion,
+ vdo_action_fn callback,
+ vdo_action_fn error_handler,
+ thread_id_t callback_thread_id, void *parent)
+{
+ vdo_reset_completion(completion);
+ vdo_set_completion_callback(completion, callback, callback_thread_id);
+ completion->error_handler = error_handler;
+ completion->parent = parent;
+}
+
+/**
+ * vdo_prepare_completion_for_requeue() - Prepare a completion for launch ensuring that it will
+ * always be requeued.
+ *
+ * Resets the completion, and then sets its callback, error handler, callback thread, and parent.
+ */
+static inline void vdo_prepare_completion_for_requeue(struct vdo_completion *completion,
+ vdo_action_fn callback,
+ vdo_action_fn error_handler,
+ thread_id_t callback_thread_id,
+ void *parent)
+{
+ vdo_prepare_completion(completion, callback, error_handler,
+ callback_thread_id, parent);
+ completion->requeue = true;
+}
+
+void vdo_enqueue_completion(struct vdo_completion *completion,
+ enum vdo_completion_priority priority);
+
+
+bool vdo_requeue_completion_if_needed(struct vdo_completion *completion,
+ thread_id_t callback_thread_id);
+
+#endif /* VDO_COMPLETION_H */
diff --git a/drivers/md/dm-vdo/constants.h b/drivers/md/dm-vdo/constants.h
new file mode 100644
index 000000000000..a8c4d6e24b38
--- /dev/null
+++ b/drivers/md/dm-vdo/constants.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_CONSTANTS_H
+#define VDO_CONSTANTS_H
+
+#include <linux/blkdev.h>
+
+#include "types.h"
+
+enum {
+ /*
+ * The maximum number of contiguous PBNs which will go to a single bio submission queue,
+ * assuming there is more than one queue.
+ */
+ VDO_BIO_ROTATION_INTERVAL_LIMIT = 1024,
+
+ /* The number of entries on a block map page */
+ VDO_BLOCK_MAP_ENTRIES_PER_PAGE = 812,
+
+ /* The origin of the flat portion of the block map */
+ VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN = 1,
+
+ /*
+ * The height of a block map tree. Assuming a root count of 60 and 812 entries per page,
+ * this is big enough to represent almost 95 PB of logical space.
+ */
+ VDO_BLOCK_MAP_TREE_HEIGHT = 5,
+
+ /* The default number of bio submission queues. */
+ DEFAULT_VDO_BIO_SUBMIT_QUEUE_COUNT = 4,
+
+ /* The number of contiguous PBNs to be submitted to a single bio queue. */
+ DEFAULT_VDO_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL = 64,
+
+ /* The number of trees in the arboreal block map */
+ DEFAULT_VDO_BLOCK_MAP_TREE_ROOT_COUNT = 60,
+
+ /* The default size of the recovery journal, in blocks */
+ DEFAULT_VDO_RECOVERY_JOURNAL_SIZE = 32 * 1024,
+
+ /* The default size of each slab journal, in blocks */
+ DEFAULT_VDO_SLAB_JOURNAL_SIZE = 224,
+
+ /* Unit test minimum */
+ MINIMUM_VDO_SLAB_JOURNAL_BLOCKS = 2,
+
+ /*
+ * The initial size of lbn_operations and pbn_operations, which is based upon the expected
+ * maximum number of outstanding VIOs. This value was chosen to make it highly unlikely
+ * that the maps would need to be resized.
+ */
+ VDO_LOCK_MAP_CAPACITY = 10000,
+
+ /* The maximum number of logical zones */
+ MAX_VDO_LOGICAL_ZONES = 60,
+
+ /* The maximum number of physical zones */
+ MAX_VDO_PHYSICAL_ZONES = 16,
+
+ /* The base-2 logarithm of the maximum blocks in one slab */
+ MAX_VDO_SLAB_BITS = 23,
+
+ /* The maximum number of slabs the slab depot supports */
+ MAX_VDO_SLABS = 8192,
+
+ /*
+ * The maximum number of block map pages to load simultaneously during recovery or rebuild.
+ */
+ MAXIMUM_SIMULTANEOUS_VDO_BLOCK_MAP_RESTORATION_READS = 1024,
+
+ /* The maximum number of entries in the slab summary */
+ MAXIMUM_VDO_SLAB_SUMMARY_ENTRIES = MAX_VDO_SLABS * MAX_VDO_PHYSICAL_ZONES,
+
+ /* The maximum number of total threads in a VDO thread configuration. */
+ MAXIMUM_VDO_THREADS = 100,
+
+ /* The maximum number of VIOs in the system at once */
+ MAXIMUM_VDO_USER_VIOS = 2048,
+
+ /* The only physical block size supported by VDO */
+ VDO_BLOCK_SIZE = 4096,
+
+ /* The number of sectors per block */
+ VDO_SECTORS_PER_BLOCK = (VDO_BLOCK_SIZE >> SECTOR_SHIFT),
+
+ /* The size of a sector that will not be torn */
+ VDO_SECTOR_SIZE = 512,
+
+ /* The physical block number reserved for storing the zero block */
+ VDO_ZERO_BLOCK = 0,
+};
+
+#endif /* VDO_CONSTANTS_H */
diff --git a/drivers/md/dm-vdo/cpu.h b/drivers/md/dm-vdo/cpu.h
new file mode 100644
index 000000000000..d6a2615ba657
--- /dev/null
+++ b/drivers/md/dm-vdo/cpu.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_CPU_H
+#define UDS_CPU_H
+
+#include <linux/cache.h>
+
+/**
+ * uds_prefetch_address() - Minimize cache-miss latency by attempting to move data into a CPU cache
+ * before it is accessed.
+ *
+ * @address: the address to fetch (may be invalid)
+ * @for_write: must be constant at compile time--false if for reading, true if for writing
+ */
+static inline void uds_prefetch_address(const void *address, bool for_write)
+{
+ /*
+ * for_write won't be a constant if we are compiled with optimization turned off, in which
+ * case prefetching really doesn't matter. clang can't figure out that if for_write is a
+ * constant, it can be passed as the second, mandatorily constant argument to prefetch(),
+ * at least currently on llvm 12.
+ */
+ if (__builtin_constant_p(for_write)) {
+ if (for_write)
+ __builtin_prefetch(address, true);
+ else
+ __builtin_prefetch(address, false);
+ }
+}
+
+/**
+ * uds_prefetch_range() - Minimize cache-miss latency by attempting to move a range of addresses
+ * into a CPU cache before they are accessed.
+ *
+ * @start: the starting address to fetch (may be invalid)
+ * @size: the number of bytes in the address range
+ * @for_write: must be constant at compile time--false if for reading, true if for writing
+ */
+static inline void uds_prefetch_range(const void *start, unsigned int size,
+ bool for_write)
+{
+ /*
+ * Count the number of cache lines to fetch, allowing for the address range to span an
+ * extra cache line boundary due to address alignment.
+ */
+ const char *address = (const char *) start;
+ unsigned int offset = ((uintptr_t) address % L1_CACHE_BYTES);
+ unsigned int cache_lines = (1 + ((size + offset) / L1_CACHE_BYTES));
+
+ while (cache_lines-- > 0) {
+ uds_prefetch_address(address, for_write);
+ address += L1_CACHE_BYTES;
+ }
+}
+
+#endif /* UDS_CPU_H */
diff --git a/drivers/md/dm-vdo/data-vio.c b/drivers/md/dm-vdo/data-vio.c
new file mode 100644
index 000000000000..94f6f1ccfb7d
--- /dev/null
+++ b/drivers/md/dm-vdo/data-vio.c
@@ -0,0 +1,2063 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "data-vio.h"
+
+#include <linux/atomic.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/device-mapper.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/lz4.h>
+#include <linux/minmax.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "murmurhash3.h"
+#include "permassert.h"
+
+#include "block-map.h"
+#include "dump.h"
+#include "encodings.h"
+#include "int-map.h"
+#include "io-submitter.h"
+#include "logical-zone.h"
+#include "packer.h"
+#include "recovery-journal.h"
+#include "slab-depot.h"
+#include "status-codes.h"
+#include "types.h"
+#include "vdo.h"
+#include "vio.h"
+#include "wait-queue.h"
+
+/**
+ * DOC: Bio flags.
+ *
+ * For certain flags set on user bios, if the user bio has not yet been acknowledged, setting those
+ * flags on our own bio(s) for that request may help underlying layers better fulfill the user
+ * bio's needs. This constant contains the aggregate of those flags; VDO strips all the other
+ * flags, as they convey incorrect information.
+ *
+ * These flags are always irrelevant if we have already finished the user bio as they are only
+ * hints on IO importance. If VDO has finished the user bio, any remaining IO done doesn't care how
+ * important finishing the finished bio was.
+ *
+ * Note that bio.c contains the complete list of flags we believe may be set; the following list
+ * explains the action taken with each of those flags VDO could receive:
+ *
+ * * REQ_SYNC: Passed down if the user bio is not yet completed, since it indicates the user bio
+ * completion is required for further work to be done by the issuer.
+ * * REQ_META: Passed down if the user bio is not yet completed, since it may mean the lower layer
+ * treats it as more urgent, similar to REQ_SYNC.
+ * * REQ_PRIO: Passed down if the user bio is not yet completed, since it indicates the user bio is
+ * important.
+ * * REQ_NOMERGE: Set only if the incoming bio was split; irrelevant to VDO IO.
+ * * REQ_IDLE: Set if the incoming bio had more IO quickly following; VDO's IO pattern doesn't
+ * match incoming IO, so this flag is incorrect for it.
+ * * REQ_FUA: Handled separately, and irrelevant to VDO IO otherwise.
+ * * REQ_RAHEAD: Passed down, as, for reads, it indicates trivial importance.
+ * * REQ_BACKGROUND: Not passed down, as VIOs are a limited resource and VDO needs them recycled
+ * ASAP to service heavy load, which is the only place where REQ_BACKGROUND might aid in load
+ * prioritization.
+ */
+static blk_opf_t PASSTHROUGH_FLAGS = (REQ_PRIO | REQ_META | REQ_SYNC | REQ_RAHEAD);
+
+/**
+ * DOC:
+ *
+ * The data_vio_pool maintains the pool of data_vios which a vdo uses to service incoming bios. For
+ * correctness, and in order to avoid potentially expensive or blocking memory allocations during
+ * normal operation, the number of concurrently active data_vios is capped. Furthermore, in order
+ * to avoid starvation of reads and writes, at most 75% of the data_vios may be used for
+ * discards. The data_vio_pool is responsible for enforcing these limits. Threads submitting bios
+ * for which a data_vio or discard permit are not available will block until the necessary
+ * resources are available. The pool is also responsible for distributing resources to blocked
+ * threads and waking them. Finally, the pool attempts to batch the work of recycling data_vios by
+ * performing the work of actually assigning resources to blocked threads or placing data_vios back
+ * into the pool on a single cpu at a time.
+ *
+ * The pool contains two "limiters", one for tracking data_vios and one for tracking discard
+ * permits. The limiters also provide safe cross-thread access to pool statistics without the need
+ * to take the pool's lock. When a thread submits a bio to a vdo device, it will first attempt to
+ * get a discard permit if it is a discard, and then to get a data_vio. If the necessary resources
+ * are available, the incoming bio will be assigned to the acquired data_vio, and it will be
+ * launched. However, if either of these are unavailable, the arrival time of the bio is recorded
+ * in the bio's bi_private field, the bio and its submitter are both queued on the appropriate
+ * limiter and the submitting thread will then put itself to sleep. (note that this mechanism will
+ * break if jiffies are only 32 bits.)
+ *
+ * Whenever a data_vio has completed processing for the bio it was servicing, release_data_vio()
+ * will be called on it. This function will add the data_vio to a funnel queue, and then check the
+ * state of the pool. If the pool is not currently processing released data_vios, the pool's
+ * completion will be enqueued on a cpu queue. This obviates the need for the releasing threads to
+ * hold the pool's lock, and also batches release work while avoiding starvation of the cpu
+ * threads.
+ *
+ * Whenever the pool's completion is run on a cpu thread, it calls process_release_callback() which
+ * processes a batch of returned data_vios (currently at most 32) from the pool's funnel queue. For
+ * each data_vio, it first checks whether that data_vio was processing a discard. If so, and there
+ * is a blocked bio waiting for a discard permit, that permit is notionally transferred to the
+ * eldest discard waiter, and that waiter is moved to the end of the list of discard bios waiting
+ * for a data_vio. If there are no discard waiters, the discard permit is returned to the pool.
+ * Next, the data_vio is assigned to the oldest blocked bio which either has a discard permit, or
+ * doesn't need one and relaunched. If neither of these exist, the data_vio is returned to the
+ * pool. Finally, if any waiting bios were launched, the threads which blocked trying to submit
+ * them are awakened.
+ */
+
+#define DATA_VIO_RELEASE_BATCH_SIZE 128
+
+static const unsigned int VDO_SECTORS_PER_BLOCK_MASK = VDO_SECTORS_PER_BLOCK - 1;
+static const u32 COMPRESSION_STATUS_MASK = 0xff;
+static const u32 MAY_NOT_COMPRESS_MASK = 0x80000000;
+
+struct limiter;
+typedef void (*assigner_fn)(struct limiter *limiter);
+
+/* Bookkeeping structure for a single type of resource. */
+struct limiter {
+ /* The data_vio_pool to which this limiter belongs */
+ struct data_vio_pool *pool;
+ /* The maximum number of data_vios available */
+ data_vio_count_t limit;
+ /* The number of resources in use */
+ data_vio_count_t busy;
+ /* The maximum number of resources ever simultaneously in use */
+ data_vio_count_t max_busy;
+ /* The number of resources to release */
+ data_vio_count_t release_count;
+ /* The number of waiters to wake */
+ data_vio_count_t wake_count;
+ /* The list of waiting bios which are known to process_release_callback() */
+ struct bio_list waiters;
+ /* The list of waiting bios which are not yet known to process_release_callback() */
+ struct bio_list new_waiters;
+ /* The list of waiters which have their permits */
+ struct bio_list *permitted_waiters;
+ /* The function for assigning a resource to a waiter */
+ assigner_fn assigner;
+ /* The queue of blocked threads */
+ wait_queue_head_t blocked_threads;
+ /* The arrival time of the eldest waiter */
+ u64 arrival;
+};
+
+/*
+ * A data_vio_pool is a collection of preallocated data_vios which may be acquired from any thread,
+ * and are released in batches.
+ */
+struct data_vio_pool {
+ /* Completion for scheduling releases */
+ struct vdo_completion completion;
+ /* The administrative state of the pool */
+ struct admin_state state;
+ /* Lock protecting the pool */
+ spinlock_t lock;
+ /* The main limiter controlling the total data_vios in the pool. */
+ struct limiter limiter;
+ /* The limiter controlling data_vios for discard */
+ struct limiter discard_limiter;
+ /* The list of bios which have discard permits but still need a data_vio */
+ struct bio_list permitted_discards;
+ /* The list of available data_vios */
+ struct list_head available;
+ /* The queue of data_vios waiting to be returned to the pool */
+ struct funnel_queue *queue;
+ /* Whether the pool is processing, or scheduled to process releases */
+ atomic_t processing;
+ /* The data vios in the pool */
+ struct data_vio data_vios[];
+};
+
+static const char * const ASYNC_OPERATION_NAMES[] = {
+ "launch",
+ "acknowledge_write",
+ "acquire_hash_lock",
+ "attempt_logical_block_lock",
+ "lock_duplicate_pbn",
+ "check_for_duplication",
+ "cleanup",
+ "compress_data_vio",
+ "find_block_map_slot",
+ "get_mapped_block_for_read",
+ "get_mapped_block_for_write",
+ "hash_data_vio",
+ "journal_remapping",
+ "vdo_attempt_packing",
+ "put_mapped_block",
+ "read_data_vio",
+ "update_dedupe_index",
+ "update_reference_counts",
+ "verify_duplication",
+ "write_data_vio",
+};
+
+/* The steps taken cleaning up a VIO, in the order they are performed. */
+enum data_vio_cleanup_stage {
+ VIO_CLEANUP_START,
+ VIO_RELEASE_HASH_LOCK = VIO_CLEANUP_START,
+ VIO_RELEASE_ALLOCATED,
+ VIO_RELEASE_RECOVERY_LOCKS,
+ VIO_RELEASE_LOGICAL,
+ VIO_CLEANUP_DONE
+};
+
+static inline struct data_vio_pool * __must_check
+as_data_vio_pool(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_DATA_VIO_POOL_COMPLETION);
+ return container_of(completion, struct data_vio_pool, completion);
+}
+
+static inline u64 get_arrival_time(struct bio *bio)
+{
+ return (u64) bio->bi_private;
+}
+
+/**
+ * check_for_drain_complete_locked() - Check whether a data_vio_pool has no outstanding data_vios
+ * or waiters while holding the pool's lock.
+ */
+static bool check_for_drain_complete_locked(struct data_vio_pool *pool)
+{
+ if (pool->limiter.busy > 0)
+ return false;
+
+ VDO_ASSERT_LOG_ONLY((pool->discard_limiter.busy == 0),
+ "no outstanding discard permits");
+
+ return (bio_list_empty(&pool->limiter.new_waiters) &&
+ bio_list_empty(&pool->discard_limiter.new_waiters));
+}
+
+static void initialize_lbn_lock(struct data_vio *data_vio, logical_block_number_t lbn)
+{
+ struct vdo *vdo = vdo_from_data_vio(data_vio);
+ zone_count_t zone_number;
+ struct lbn_lock *lock = &data_vio->logical;
+
+ lock->lbn = lbn;
+ lock->locked = false;
+ vdo_waitq_init(&lock->waiters);
+ zone_number = vdo_compute_logical_zone(data_vio);
+ lock->zone = &vdo->logical_zones->zones[zone_number];
+}
+
+static void launch_locked_request(struct data_vio *data_vio)
+{
+ data_vio->logical.locked = true;
+ if (data_vio->write) {
+ struct vdo *vdo = vdo_from_data_vio(data_vio);
+
+ if (vdo_is_read_only(vdo)) {
+ continue_data_vio_with_error(data_vio, VDO_READ_ONLY);
+ return;
+ }
+ }
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_FIND_BLOCK_MAP_SLOT;
+ vdo_find_block_map_slot(data_vio);
+}
+
+static void acknowledge_data_vio(struct data_vio *data_vio)
+{
+ struct vdo *vdo = vdo_from_data_vio(data_vio);
+ struct bio *bio = data_vio->user_bio;
+ int error = vdo_status_to_errno(data_vio->vio.completion.result);
+
+ if (bio == NULL)
+ return;
+
+ VDO_ASSERT_LOG_ONLY((data_vio->remaining_discard <=
+ (u32) (VDO_BLOCK_SIZE - data_vio->offset)),
+ "data_vio to acknowledge is not an incomplete discard");
+
+ data_vio->user_bio = NULL;
+ vdo_count_bios(&vdo->stats.bios_acknowledged, bio);
+ if (data_vio->is_partial)
+ vdo_count_bios(&vdo->stats.bios_acknowledged_partial, bio);
+
+ bio->bi_status = errno_to_blk_status(error);
+ bio_endio(bio);
+}
+
+static void copy_to_bio(struct bio *bio, char *data_ptr)
+{
+ struct bio_vec biovec;
+ struct bvec_iter iter;
+
+ bio_for_each_segment(biovec, bio, iter) {
+ memcpy_to_bvec(&biovec, data_ptr);
+ data_ptr += biovec.bv_len;
+ }
+}
+
+struct data_vio_compression_status get_data_vio_compression_status(struct data_vio *data_vio)
+{
+ u32 packed = atomic_read(&data_vio->compression.status);
+
+ /* pairs with cmpxchg in set_data_vio_compression_status */
+ smp_rmb();
+ return (struct data_vio_compression_status) {
+ .stage = packed & COMPRESSION_STATUS_MASK,
+ .may_not_compress = ((packed & MAY_NOT_COMPRESS_MASK) != 0),
+ };
+}
+
+/**
+ * pack_status() - Convert a data_vio_compression_status into a u32 which may be stored
+ * atomically.
+ * @status: The state to convert.
+ *
+ * Return: The compression state packed into a u32.
+ */
+static u32 __must_check pack_status(struct data_vio_compression_status status)
+{
+ return status.stage | (status.may_not_compress ? MAY_NOT_COMPRESS_MASK : 0);
+}
+
+/**
+ * set_data_vio_compression_status() - Set the compression status of a data_vio.
+ * @state: The expected current status of the data_vio.
+ * @new_state: The status to set.
+ *
+ * Return: true if the new status was set, false if the data_vio's compression status did not
+ * match the expected state, and so was left unchanged.
+ */
+static bool __must_check
+set_data_vio_compression_status(struct data_vio *data_vio,
+ struct data_vio_compression_status status,
+ struct data_vio_compression_status new_status)
+{
+ u32 actual;
+ u32 expected = pack_status(status);
+ u32 replacement = pack_status(new_status);
+
+ /*
+ * Extra barriers because this was original developed using a CAS operation that implicitly
+ * had them.
+ */
+ smp_mb__before_atomic();
+ actual = atomic_cmpxchg(&data_vio->compression.status, expected, replacement);
+ /* same as before_atomic */
+ smp_mb__after_atomic();
+ return (expected == actual);
+}
+
+struct data_vio_compression_status advance_data_vio_compression_stage(struct data_vio *data_vio)
+{
+ for (;;) {
+ struct data_vio_compression_status status =
+ get_data_vio_compression_status(data_vio);
+ struct data_vio_compression_status new_status = status;
+
+ if (status.stage == DATA_VIO_POST_PACKER) {
+ /* We're already in the last stage. */
+ return status;
+ }
+
+ if (status.may_not_compress) {
+ /*
+ * Compression has been dis-allowed for this VIO, so skip the rest of the
+ * path and go to the end.
+ */
+ new_status.stage = DATA_VIO_POST_PACKER;
+ } else {
+ /* Go to the next state. */
+ new_status.stage++;
+ }
+
+ if (set_data_vio_compression_status(data_vio, status, new_status))
+ return new_status;
+
+ /* Another thread changed the status out from under us so try again. */
+ }
+}
+
+/**
+ * cancel_data_vio_compression() - Prevent this data_vio from being compressed or packed.
+ *
+ * Return: true if the data_vio is in the packer and the caller was the first caller to cancel it.
+ */
+bool cancel_data_vio_compression(struct data_vio *data_vio)
+{
+ struct data_vio_compression_status status, new_status;
+
+ for (;;) {
+ status = get_data_vio_compression_status(data_vio);
+ if (status.may_not_compress || (status.stage == DATA_VIO_POST_PACKER)) {
+ /* This data_vio is already set up to not block in the packer. */
+ break;
+ }
+
+ new_status.stage = status.stage;
+ new_status.may_not_compress = true;
+
+ if (set_data_vio_compression_status(data_vio, status, new_status))
+ break;
+ }
+
+ return ((status.stage == DATA_VIO_PACKING) && !status.may_not_compress);
+}
+
+/**
+ * attempt_logical_block_lock() - Attempt to acquire the lock on a logical block.
+ * @completion: The data_vio for an external data request as a completion.
+ *
+ * This is the start of the path for all external requests. It is registered in launch_data_vio().
+ */
+static void attempt_logical_block_lock(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ struct lbn_lock *lock = &data_vio->logical;
+ struct vdo *vdo = vdo_from_data_vio(data_vio);
+ struct data_vio *lock_holder;
+ int result;
+
+ assert_data_vio_in_logical_zone(data_vio);
+
+ if (data_vio->logical.lbn >= vdo->states.vdo.config.logical_blocks) {
+ continue_data_vio_with_error(data_vio, VDO_OUT_OF_RANGE);
+ return;
+ }
+
+ result = vdo_int_map_put(lock->zone->lbn_operations, lock->lbn,
+ data_vio, false, (void **) &lock_holder);
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(data_vio, result);
+ return;
+ }
+
+ if (lock_holder == NULL) {
+ /* We got the lock */
+ launch_locked_request(data_vio);
+ return;
+ }
+
+ result = VDO_ASSERT(lock_holder->logical.locked, "logical block lock held");
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(data_vio, result);
+ return;
+ }
+
+ /*
+ * If the new request is a pure read request (not read-modify-write) and the lock_holder is
+ * writing and has received an allocation, service the read request immediately by copying
+ * data from the lock_holder to avoid having to flush the write out of the packer just to
+ * prevent the read from waiting indefinitely. If the lock_holder does not yet have an
+ * allocation, prevent it from blocking in the packer and wait on it. This is necessary in
+ * order to prevent returning data that may not have actually been written.
+ */
+ if (!data_vio->write && READ_ONCE(lock_holder->allocation_succeeded)) {
+ copy_to_bio(data_vio->user_bio, lock_holder->vio.data + data_vio->offset);
+ acknowledge_data_vio(data_vio);
+ complete_data_vio(completion);
+ return;
+ }
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_ATTEMPT_LOGICAL_BLOCK_LOCK;
+ vdo_waitq_enqueue_waiter(&lock_holder->logical.waiters, &data_vio->waiter);
+
+ /*
+ * Prevent writes and read-modify-writes from blocking indefinitely on lock holders in the
+ * packer.
+ */
+ if (lock_holder->write && cancel_data_vio_compression(lock_holder)) {
+ data_vio->compression.lock_holder = lock_holder;
+ launch_data_vio_packer_callback(data_vio,
+ vdo_remove_lock_holder_from_packer);
+ }
+}
+
+/**
+ * launch_data_vio() - (Re)initialize a data_vio to have a new logical block number, keeping the
+ * same parent and other state and send it on its way.
+ */
+static void launch_data_vio(struct data_vio *data_vio, logical_block_number_t lbn)
+{
+ struct vdo_completion *completion = &data_vio->vio.completion;
+
+ /*
+ * Clearing the tree lock must happen before initializing the LBN lock, which also adds
+ * information to the tree lock.
+ */
+ memset(&data_vio->tree_lock, 0, sizeof(data_vio->tree_lock));
+ initialize_lbn_lock(data_vio, lbn);
+ INIT_LIST_HEAD(&data_vio->hash_lock_entry);
+ INIT_LIST_HEAD(&data_vio->write_entry);
+
+ memset(&data_vio->allocation, 0, sizeof(data_vio->allocation));
+
+ data_vio->is_duplicate = false;
+
+ memset(&data_vio->record_name, 0, sizeof(data_vio->record_name));
+ memset(&data_vio->duplicate, 0, sizeof(data_vio->duplicate));
+ vdo_reset_completion(completion);
+ completion->error_handler = handle_data_vio_error;
+ set_data_vio_logical_callback(data_vio, attempt_logical_block_lock);
+ vdo_enqueue_completion(completion, VDO_DEFAULT_Q_MAP_BIO_PRIORITY);
+}
+
+static bool is_zero_block(char *block)
+{
+ int i;
+
+ for (i = 0; i < VDO_BLOCK_SIZE; i += sizeof(u64)) {
+ if (*((u64 *) &block[i]))
+ return false;
+ }
+
+ return true;
+}
+
+static void copy_from_bio(struct bio *bio, char *data_ptr)
+{
+ struct bio_vec biovec;
+ struct bvec_iter iter;
+
+ bio_for_each_segment(biovec, bio, iter) {
+ memcpy_from_bvec(data_ptr, &biovec);
+ data_ptr += biovec.bv_len;
+ }
+}
+
+static void launch_bio(struct vdo *vdo, struct data_vio *data_vio, struct bio *bio)
+{
+ logical_block_number_t lbn;
+ /*
+ * Zero out the fields which don't need to be preserved (i.e. which are not pointers to
+ * separately allocated objects).
+ */
+ memset(data_vio, 0, offsetof(struct data_vio, vio));
+ memset(&data_vio->compression, 0, offsetof(struct compression_state, block));
+
+ data_vio->user_bio = bio;
+ data_vio->offset = to_bytes(bio->bi_iter.bi_sector & VDO_SECTORS_PER_BLOCK_MASK);
+ data_vio->is_partial = (bio->bi_iter.bi_size < VDO_BLOCK_SIZE) || (data_vio->offset != 0);
+
+ /*
+ * Discards behave very differently than other requests when coming in from device-mapper.
+ * We have to be able to handle any size discards and various sector offsets within a
+ * block.
+ */
+ if (bio_op(bio) == REQ_OP_DISCARD) {
+ data_vio->remaining_discard = bio->bi_iter.bi_size;
+ data_vio->write = true;
+ data_vio->is_discard = true;
+ if (data_vio->is_partial) {
+ vdo_count_bios(&vdo->stats.bios_in_partial, bio);
+ data_vio->read = true;
+ }
+ } else if (data_vio->is_partial) {
+ vdo_count_bios(&vdo->stats.bios_in_partial, bio);
+ data_vio->read = true;
+ if (bio_data_dir(bio) == WRITE)
+ data_vio->write = true;
+ } else if (bio_data_dir(bio) == READ) {
+ data_vio->read = true;
+ } else {
+ /*
+ * Copy the bio data to a char array so that we can continue to use the data after
+ * we acknowledge the bio.
+ */
+ copy_from_bio(bio, data_vio->vio.data);
+ data_vio->is_zero = is_zero_block(data_vio->vio.data);
+ data_vio->write = true;
+ }
+
+ if (data_vio->user_bio->bi_opf & REQ_FUA)
+ data_vio->fua = true;
+
+ lbn = (bio->bi_iter.bi_sector - vdo->starting_sector_offset) / VDO_SECTORS_PER_BLOCK;
+ launch_data_vio(data_vio, lbn);
+}
+
+static void assign_data_vio(struct limiter *limiter, struct data_vio *data_vio)
+{
+ struct bio *bio = bio_list_pop(limiter->permitted_waiters);
+
+ launch_bio(limiter->pool->completion.vdo, data_vio, bio);
+ limiter->wake_count++;
+
+ bio = bio_list_peek(limiter->permitted_waiters);
+ limiter->arrival = ((bio == NULL) ? U64_MAX : get_arrival_time(bio));
+}
+
+static void assign_discard_permit(struct limiter *limiter)
+{
+ struct bio *bio = bio_list_pop(&limiter->waiters);
+
+ if (limiter->arrival == U64_MAX)
+ limiter->arrival = get_arrival_time(bio);
+
+ bio_list_add(limiter->permitted_waiters, bio);
+}
+
+static void get_waiters(struct limiter *limiter)
+{
+ bio_list_merge(&limiter->waiters, &limiter->new_waiters);
+ bio_list_init(&limiter->new_waiters);
+}
+
+static inline struct data_vio *get_available_data_vio(struct data_vio_pool *pool)
+{
+ struct data_vio *data_vio =
+ list_first_entry(&pool->available, struct data_vio, pool_entry);
+
+ list_del_init(&data_vio->pool_entry);
+ return data_vio;
+}
+
+static void assign_data_vio_to_waiter(struct limiter *limiter)
+{
+ assign_data_vio(limiter, get_available_data_vio(limiter->pool));
+}
+
+static void update_limiter(struct limiter *limiter)
+{
+ struct bio_list *waiters = &limiter->waiters;
+ data_vio_count_t available = limiter->limit - limiter->busy;
+
+ VDO_ASSERT_LOG_ONLY((limiter->release_count <= limiter->busy),
+ "Release count %u is not more than busy count %u",
+ limiter->release_count, limiter->busy);
+
+ get_waiters(limiter);
+ for (; (limiter->release_count > 0) && !bio_list_empty(waiters); limiter->release_count--)
+ limiter->assigner(limiter);
+
+ if (limiter->release_count > 0) {
+ WRITE_ONCE(limiter->busy, limiter->busy - limiter->release_count);
+ limiter->release_count = 0;
+ return;
+ }
+
+ for (; (available > 0) && !bio_list_empty(waiters); available--)
+ limiter->assigner(limiter);
+
+ WRITE_ONCE(limiter->busy, limiter->limit - available);
+ if (limiter->max_busy < limiter->busy)
+ WRITE_ONCE(limiter->max_busy, limiter->busy);
+}
+
+/**
+ * schedule_releases() - Ensure that release processing is scheduled.
+ *
+ * If this call switches the state to processing, enqueue. Otherwise, some other thread has already
+ * done so.
+ */
+static void schedule_releases(struct data_vio_pool *pool)
+{
+ /* Pairs with the barrier in process_release_callback(). */
+ smp_mb__before_atomic();
+ if (atomic_cmpxchg(&pool->processing, false, true))
+ return;
+
+ pool->completion.requeue = true;
+ vdo_launch_completion_with_priority(&pool->completion,
+ CPU_Q_COMPLETE_VIO_PRIORITY);
+}
+
+static void reuse_or_release_resources(struct data_vio_pool *pool,
+ struct data_vio *data_vio,
+ struct list_head *returned)
+{
+ if (data_vio->remaining_discard > 0) {
+ if (bio_list_empty(&pool->discard_limiter.waiters)) {
+ /* Return the data_vio's discard permit. */
+ pool->discard_limiter.release_count++;
+ } else {
+ assign_discard_permit(&pool->discard_limiter);
+ }
+ }
+
+ if (pool->limiter.arrival < pool->discard_limiter.arrival) {
+ assign_data_vio(&pool->limiter, data_vio);
+ } else if (pool->discard_limiter.arrival < U64_MAX) {
+ assign_data_vio(&pool->discard_limiter, data_vio);
+ } else {
+ list_add(&data_vio->pool_entry, returned);
+ pool->limiter.release_count++;
+ }
+}
+
+/**
+ * process_release_callback() - Process a batch of data_vio releases.
+ * @completion: The pool with data_vios to release.
+ */
+static void process_release_callback(struct vdo_completion *completion)
+{
+ struct data_vio_pool *pool = as_data_vio_pool(completion);
+ bool reschedule;
+ bool drained;
+ data_vio_count_t processed;
+ data_vio_count_t to_wake;
+ data_vio_count_t discards_to_wake;
+ LIST_HEAD(returned);
+
+ spin_lock(&pool->lock);
+ get_waiters(&pool->discard_limiter);
+ get_waiters(&pool->limiter);
+ spin_unlock(&pool->lock);
+
+ if (pool->limiter.arrival == U64_MAX) {
+ struct bio *bio = bio_list_peek(&pool->limiter.waiters);
+
+ if (bio != NULL)
+ pool->limiter.arrival = get_arrival_time(bio);
+ }
+
+ for (processed = 0; processed < DATA_VIO_RELEASE_BATCH_SIZE; processed++) {
+ struct data_vio *data_vio;
+ struct funnel_queue_entry *entry = vdo_funnel_queue_poll(pool->queue);
+
+ if (entry == NULL)
+ break;
+
+ data_vio = as_data_vio(container_of(entry, struct vdo_completion,
+ work_queue_entry_link));
+ acknowledge_data_vio(data_vio);
+ reuse_or_release_resources(pool, data_vio, &returned);
+ }
+
+ spin_lock(&pool->lock);
+ /*
+ * There is a race where waiters could be added while we are in the unlocked section above.
+ * Those waiters could not see the resources we are now about to release, so we assign
+ * those resources now as we have no guarantee of being rescheduled. This is handled in
+ * update_limiter().
+ */
+ update_limiter(&pool->discard_limiter);
+ list_splice(&returned, &pool->available);
+ update_limiter(&pool->limiter);
+ to_wake = pool->limiter.wake_count;
+ pool->limiter.wake_count = 0;
+ discards_to_wake = pool->discard_limiter.wake_count;
+ pool->discard_limiter.wake_count = 0;
+
+ atomic_set(&pool->processing, false);
+ /* Pairs with the barrier in schedule_releases(). */
+ smp_mb();
+
+ reschedule = !vdo_is_funnel_queue_empty(pool->queue);
+ drained = (!reschedule &&
+ vdo_is_state_draining(&pool->state) &&
+ check_for_drain_complete_locked(pool));
+ spin_unlock(&pool->lock);
+
+ if (to_wake > 0)
+ wake_up_nr(&pool->limiter.blocked_threads, to_wake);
+
+ if (discards_to_wake > 0)
+ wake_up_nr(&pool->discard_limiter.blocked_threads, discards_to_wake);
+
+ if (reschedule)
+ schedule_releases(pool);
+ else if (drained)
+ vdo_finish_draining(&pool->state);
+}
+
+static void initialize_limiter(struct limiter *limiter, struct data_vio_pool *pool,
+ assigner_fn assigner, data_vio_count_t limit)
+{
+ limiter->pool = pool;
+ limiter->assigner = assigner;
+ limiter->limit = limit;
+ limiter->arrival = U64_MAX;
+ init_waitqueue_head(&limiter->blocked_threads);
+}
+
+/**
+ * initialize_data_vio() - Allocate the components of a data_vio.
+ *
+ * The caller is responsible for cleaning up the data_vio on error.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int initialize_data_vio(struct data_vio *data_vio, struct vdo *vdo)
+{
+ struct bio *bio;
+ int result;
+
+ BUILD_BUG_ON(VDO_BLOCK_SIZE > PAGE_SIZE);
+ result = vdo_allocate_memory(VDO_BLOCK_SIZE, 0, "data_vio data",
+ &data_vio->vio.data);
+ if (result != VDO_SUCCESS)
+ return vdo_log_error_strerror(result,
+ "data_vio data allocation failure");
+
+ result = vdo_allocate_memory(VDO_BLOCK_SIZE, 0, "compressed block",
+ &data_vio->compression.block);
+ if (result != VDO_SUCCESS) {
+ return vdo_log_error_strerror(result,
+ "data_vio compressed block allocation failure");
+ }
+
+ result = vdo_allocate_memory(VDO_BLOCK_SIZE, 0, "vio scratch",
+ &data_vio->scratch_block);
+ if (result != VDO_SUCCESS)
+ return vdo_log_error_strerror(result,
+ "data_vio scratch allocation failure");
+
+ result = vdo_create_bio(&bio);
+ if (result != VDO_SUCCESS)
+ return vdo_log_error_strerror(result,
+ "data_vio data bio allocation failure");
+
+ vdo_initialize_completion(&data_vio->decrement_completion, vdo,
+ VDO_DECREMENT_COMPLETION);
+ initialize_vio(&data_vio->vio, bio, 1, VIO_TYPE_DATA, VIO_PRIORITY_DATA, vdo);
+
+ return VDO_SUCCESS;
+}
+
+static void destroy_data_vio(struct data_vio *data_vio)
+{
+ if (data_vio == NULL)
+ return;
+
+ vdo_free_bio(vdo_forget(data_vio->vio.bio));
+ vdo_free(vdo_forget(data_vio->vio.data));
+ vdo_free(vdo_forget(data_vio->compression.block));
+ vdo_free(vdo_forget(data_vio->scratch_block));
+}
+
+/**
+ * make_data_vio_pool() - Initialize a data_vio pool.
+ * @vdo: The vdo to which the pool will belong.
+ * @pool_size: The number of data_vios in the pool.
+ * @discard_limit: The maximum number of data_vios which may be used for discards.
+ * @pool: A pointer to hold the newly allocated pool.
+ */
+int make_data_vio_pool(struct vdo *vdo, data_vio_count_t pool_size,
+ data_vio_count_t discard_limit, struct data_vio_pool **pool_ptr)
+{
+ int result;
+ struct data_vio_pool *pool;
+ data_vio_count_t i;
+
+ result = vdo_allocate_extended(struct data_vio_pool, pool_size, struct data_vio,
+ __func__, &pool);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ VDO_ASSERT_LOG_ONLY((discard_limit <= pool_size),
+ "discard limit does not exceed pool size");
+ initialize_limiter(&pool->discard_limiter, pool, assign_discard_permit,
+ discard_limit);
+ pool->discard_limiter.permitted_waiters = &pool->permitted_discards;
+ initialize_limiter(&pool->limiter, pool, assign_data_vio_to_waiter, pool_size);
+ pool->limiter.permitted_waiters = &pool->limiter.waiters;
+ INIT_LIST_HEAD(&pool->available);
+ spin_lock_init(&pool->lock);
+ vdo_set_admin_state_code(&pool->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+ vdo_initialize_completion(&pool->completion, vdo, VDO_DATA_VIO_POOL_COMPLETION);
+ vdo_prepare_completion(&pool->completion, process_release_callback,
+ process_release_callback, vdo->thread_config.cpu_thread,
+ NULL);
+
+ result = vdo_make_funnel_queue(&pool->queue);
+ if (result != VDO_SUCCESS) {
+ free_data_vio_pool(vdo_forget(pool));
+ return result;
+ }
+
+ for (i = 0; i < pool_size; i++) {
+ struct data_vio *data_vio = &pool->data_vios[i];
+
+ result = initialize_data_vio(data_vio, vdo);
+ if (result != VDO_SUCCESS) {
+ destroy_data_vio(data_vio);
+ free_data_vio_pool(pool);
+ return result;
+ }
+
+ list_add(&data_vio->pool_entry, &pool->available);
+ }
+
+ *pool_ptr = pool;
+ return VDO_SUCCESS;
+}
+
+/**
+ * free_data_vio_pool() - Free a data_vio_pool and the data_vios in it.
+ *
+ * All data_vios must be returned to the pool before calling this function.
+ */
+void free_data_vio_pool(struct data_vio_pool *pool)
+{
+ struct data_vio *data_vio, *tmp;
+
+ if (pool == NULL)
+ return;
+
+ /*
+ * Pairs with the barrier in process_release_callback(). Possibly not needed since it
+ * caters to an enqueue vs. free race.
+ */
+ smp_mb();
+ BUG_ON(atomic_read(&pool->processing));
+
+ spin_lock(&pool->lock);
+ VDO_ASSERT_LOG_ONLY((pool->limiter.busy == 0),
+ "data_vio pool must not have %u busy entries when being freed",
+ pool->limiter.busy);
+ VDO_ASSERT_LOG_ONLY((bio_list_empty(&pool->limiter.waiters) &&
+ bio_list_empty(&pool->limiter.new_waiters)),
+ "data_vio pool must not have threads waiting to read or write when being freed");
+ VDO_ASSERT_LOG_ONLY((bio_list_empty(&pool->discard_limiter.waiters) &&
+ bio_list_empty(&pool->discard_limiter.new_waiters)),
+ "data_vio pool must not have threads waiting to discard when being freed");
+ spin_unlock(&pool->lock);
+
+ list_for_each_entry_safe(data_vio, tmp, &pool->available, pool_entry) {
+ list_del_init(&data_vio->pool_entry);
+ destroy_data_vio(data_vio);
+ }
+
+ vdo_free_funnel_queue(vdo_forget(pool->queue));
+ vdo_free(pool);
+}
+
+static bool acquire_permit(struct limiter *limiter)
+{
+ if (limiter->busy >= limiter->limit)
+ return false;
+
+ WRITE_ONCE(limiter->busy, limiter->busy + 1);
+ if (limiter->max_busy < limiter->busy)
+ WRITE_ONCE(limiter->max_busy, limiter->busy);
+ return true;
+}
+
+static void wait_permit(struct limiter *limiter, struct bio *bio)
+ __releases(&limiter->pool->lock)
+{
+ DEFINE_WAIT(wait);
+
+ bio_list_add(&limiter->new_waiters, bio);
+ prepare_to_wait_exclusive(&limiter->blocked_threads, &wait,
+ TASK_UNINTERRUPTIBLE);
+ spin_unlock(&limiter->pool->lock);
+ io_schedule();
+ finish_wait(&limiter->blocked_threads, &wait);
+}
+
+/**
+ * vdo_launch_bio() - Acquire a data_vio from the pool, assign the bio to it, and launch it.
+ *
+ * This will block if data_vios or discard permits are not available.
+ */
+void vdo_launch_bio(struct data_vio_pool *pool, struct bio *bio)
+{
+ struct data_vio *data_vio;
+
+ VDO_ASSERT_LOG_ONLY(!vdo_is_state_quiescent(&pool->state),
+ "data_vio_pool not quiescent on acquire");
+
+ bio->bi_private = (void *) jiffies;
+ spin_lock(&pool->lock);
+ if ((bio_op(bio) == REQ_OP_DISCARD) &&
+ !acquire_permit(&pool->discard_limiter)) {
+ wait_permit(&pool->discard_limiter, bio);
+ return;
+ }
+
+ if (!acquire_permit(&pool->limiter)) {
+ wait_permit(&pool->limiter, bio);
+ return;
+ }
+
+ data_vio = get_available_data_vio(pool);
+ spin_unlock(&pool->lock);
+ launch_bio(pool->completion.vdo, data_vio, bio);
+}
+
+/* Implements vdo_admin_initiator_fn. */
+static void initiate_drain(struct admin_state *state)
+{
+ bool drained;
+ struct data_vio_pool *pool = container_of(state, struct data_vio_pool, state);
+
+ spin_lock(&pool->lock);
+ drained = check_for_drain_complete_locked(pool);
+ spin_unlock(&pool->lock);
+
+ if (drained)
+ vdo_finish_draining(state);
+}
+
+static void assert_on_vdo_cpu_thread(const struct vdo *vdo, const char *name)
+{
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() == vdo->thread_config.cpu_thread),
+ "%s called on cpu thread", name);
+}
+
+/**
+ * drain_data_vio_pool() - Wait asynchronously for all data_vios to be returned to the pool.
+ * @completion: The completion to notify when the pool has drained.
+ */
+void drain_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion)
+{
+ assert_on_vdo_cpu_thread(completion->vdo, __func__);
+ vdo_start_draining(&pool->state, VDO_ADMIN_STATE_SUSPENDING, completion,
+ initiate_drain);
+}
+
+/**
+ * resume_data_vio_pool() - Resume a data_vio pool.
+ * @completion: The completion to notify when the pool has resumed.
+ */
+void resume_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion)
+{
+ assert_on_vdo_cpu_thread(completion->vdo, __func__);
+ vdo_continue_completion(completion, vdo_resume_if_quiescent(&pool->state));
+}
+
+static void dump_limiter(const char *name, struct limiter *limiter)
+{
+ vdo_log_info("%s: %u of %u busy (max %u), %s", name, limiter->busy,
+ limiter->limit, limiter->max_busy,
+ ((bio_list_empty(&limiter->waiters) &&
+ bio_list_empty(&limiter->new_waiters)) ?
+ "no waiters" : "has waiters"));
+}
+
+/**
+ * dump_data_vio_pool() - Dump a data_vio pool to the log.
+ * @dump_vios: Whether to dump the details of each busy data_vio as well.
+ */
+void dump_data_vio_pool(struct data_vio_pool *pool, bool dump_vios)
+{
+ /*
+ * In order that syslog can empty its buffer, sleep after 35 elements for 4ms (till the
+ * second clock tick). These numbers were picked based on experiments with lab machines.
+ */
+ static const int ELEMENTS_PER_BATCH = 35;
+ static const int SLEEP_FOR_SYSLOG = 4000;
+
+ if (pool == NULL)
+ return;
+
+ spin_lock(&pool->lock);
+ dump_limiter("data_vios", &pool->limiter);
+ dump_limiter("discard permits", &pool->discard_limiter);
+ if (dump_vios) {
+ int i;
+ int dumped = 0;
+
+ for (i = 0; i < pool->limiter.limit; i++) {
+ struct data_vio *data_vio = &pool->data_vios[i];
+
+ if (!list_empty(&data_vio->pool_entry))
+ continue;
+
+ dump_data_vio(data_vio);
+ if (++dumped >= ELEMENTS_PER_BATCH) {
+ spin_unlock(&pool->lock);
+ dumped = 0;
+ fsleep(SLEEP_FOR_SYSLOG);
+ spin_lock(&pool->lock);
+ }
+ }
+ }
+
+ spin_unlock(&pool->lock);
+}
+
+data_vio_count_t get_data_vio_pool_active_discards(struct data_vio_pool *pool)
+{
+ return READ_ONCE(pool->discard_limiter.busy);
+}
+
+data_vio_count_t get_data_vio_pool_discard_limit(struct data_vio_pool *pool)
+{
+ return READ_ONCE(pool->discard_limiter.limit);
+}
+
+data_vio_count_t get_data_vio_pool_maximum_discards(struct data_vio_pool *pool)
+{
+ return READ_ONCE(pool->discard_limiter.max_busy);
+}
+
+int set_data_vio_pool_discard_limit(struct data_vio_pool *pool, data_vio_count_t limit)
+{
+ if (get_data_vio_pool_request_limit(pool) < limit) {
+ // The discard limit may not be higher than the data_vio limit.
+ return -EINVAL;
+ }
+
+ spin_lock(&pool->lock);
+ pool->discard_limiter.limit = limit;
+ spin_unlock(&pool->lock);
+
+ return VDO_SUCCESS;
+}
+
+data_vio_count_t get_data_vio_pool_active_requests(struct data_vio_pool *pool)
+{
+ return READ_ONCE(pool->limiter.busy);
+}
+
+data_vio_count_t get_data_vio_pool_request_limit(struct data_vio_pool *pool)
+{
+ return READ_ONCE(pool->limiter.limit);
+}
+
+data_vio_count_t get_data_vio_pool_maximum_requests(struct data_vio_pool *pool)
+{
+ return READ_ONCE(pool->limiter.max_busy);
+}
+
+static void update_data_vio_error_stats(struct data_vio *data_vio)
+{
+ u8 index = 0;
+ static const char * const operations[] = {
+ [0] = "empty",
+ [1] = "read",
+ [2] = "write",
+ [3] = "read-modify-write",
+ [5] = "read+fua",
+ [6] = "write+fua",
+ [7] = "read-modify-write+fua",
+ };
+
+ if (data_vio->read)
+ index = 1;
+
+ if (data_vio->write)
+ index += 2;
+
+ if (data_vio->fua)
+ index += 4;
+
+ update_vio_error_stats(&data_vio->vio,
+ "Completing %s vio for LBN %llu with error after %s",
+ operations[index],
+ (unsigned long long) data_vio->logical.lbn,
+ get_data_vio_operation_name(data_vio));
+}
+
+static void perform_cleanup_stage(struct data_vio *data_vio,
+ enum data_vio_cleanup_stage stage);
+
+/**
+ * release_allocated_lock() - Release the PBN lock and/or the reference on the allocated block at
+ * the end of processing a data_vio.
+ */
+static void release_allocated_lock(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_allocated_zone(data_vio);
+ release_data_vio_allocation_lock(data_vio, false);
+ perform_cleanup_stage(data_vio, VIO_RELEASE_RECOVERY_LOCKS);
+}
+
+/** release_lock() - Release an uncontended LBN lock. */
+static void release_lock(struct data_vio *data_vio, struct lbn_lock *lock)
+{
+ struct int_map *lock_map = lock->zone->lbn_operations;
+ struct data_vio *lock_holder;
+
+ if (!lock->locked) {
+ /* The lock is not locked, so it had better not be registered in the lock map. */
+ struct data_vio *lock_holder = vdo_int_map_get(lock_map, lock->lbn);
+
+ VDO_ASSERT_LOG_ONLY((data_vio != lock_holder),
+ "no logical block lock held for block %llu",
+ (unsigned long long) lock->lbn);
+ return;
+ }
+
+ /* Release the lock by removing the lock from the map. */
+ lock_holder = vdo_int_map_remove(lock_map, lock->lbn);
+ VDO_ASSERT_LOG_ONLY((data_vio == lock_holder),
+ "logical block lock mismatch for block %llu",
+ (unsigned long long) lock->lbn);
+ lock->locked = false;
+}
+
+/** transfer_lock() - Transfer a contended LBN lock to the eldest waiter. */
+static void transfer_lock(struct data_vio *data_vio, struct lbn_lock *lock)
+{
+ struct data_vio *lock_holder, *next_lock_holder;
+ int result;
+
+ VDO_ASSERT_LOG_ONLY(lock->locked, "lbn_lock with waiters is not locked");
+
+ /* Another data_vio is waiting for the lock, transfer it in a single lock map operation. */
+ next_lock_holder =
+ vdo_waiter_as_data_vio(vdo_waitq_dequeue_waiter(&lock->waiters));
+
+ /* Transfer the remaining lock waiters to the next lock holder. */
+ vdo_waitq_transfer_all_waiters(&lock->waiters,
+ &next_lock_holder->logical.waiters);
+
+ result = vdo_int_map_put(lock->zone->lbn_operations, lock->lbn,
+ next_lock_holder, true, (void **) &lock_holder);
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(next_lock_holder, result);
+ return;
+ }
+
+ VDO_ASSERT_LOG_ONLY((lock_holder == data_vio),
+ "logical block lock mismatch for block %llu",
+ (unsigned long long) lock->lbn);
+ lock->locked = false;
+
+ /*
+ * If there are still waiters, other data_vios must be trying to get the lock we just
+ * transferred. We must ensure that the new lock holder doesn't block in the packer.
+ */
+ if (vdo_waitq_has_waiters(&next_lock_holder->logical.waiters))
+ cancel_data_vio_compression(next_lock_holder);
+
+ /*
+ * Avoid stack overflow on lock transfer.
+ * FIXME: this is only an issue in the 1 thread config.
+ */
+ next_lock_holder->vio.completion.requeue = true;
+ launch_locked_request(next_lock_holder);
+}
+
+/**
+ * release_logical_lock() - Release the logical block lock and flush generation lock at the end of
+ * processing a data_vio.
+ */
+static void release_logical_lock(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ struct lbn_lock *lock = &data_vio->logical;
+
+ assert_data_vio_in_logical_zone(data_vio);
+
+ if (vdo_waitq_has_waiters(&lock->waiters))
+ transfer_lock(data_vio, lock);
+ else
+ release_lock(data_vio, lock);
+
+ vdo_release_flush_generation_lock(data_vio);
+ perform_cleanup_stage(data_vio, VIO_CLEANUP_DONE);
+}
+
+/** clean_hash_lock() - Release the hash lock at the end of processing a data_vio. */
+static void clean_hash_lock(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_hash_zone(data_vio);
+ if (completion->result != VDO_SUCCESS) {
+ vdo_clean_failed_hash_lock(data_vio);
+ return;
+ }
+
+ vdo_release_hash_lock(data_vio);
+ perform_cleanup_stage(data_vio, VIO_RELEASE_LOGICAL);
+}
+
+/**
+ * finish_cleanup() - Make some assertions about a data_vio which has finished cleaning up.
+ *
+ * If it is part of a multi-block discard, starts on the next block, otherwise, returns it to the
+ * pool.
+ */
+static void finish_cleanup(struct data_vio *data_vio)
+{
+ struct vdo_completion *completion = &data_vio->vio.completion;
+
+ VDO_ASSERT_LOG_ONLY(data_vio->allocation.lock == NULL,
+ "complete data_vio has no allocation lock");
+ VDO_ASSERT_LOG_ONLY(data_vio->hash_lock == NULL,
+ "complete data_vio has no hash lock");
+ if ((data_vio->remaining_discard <= VDO_BLOCK_SIZE) ||
+ (completion->result != VDO_SUCCESS)) {
+ struct data_vio_pool *pool = completion->vdo->data_vio_pool;
+
+ vdo_funnel_queue_put(pool->queue, &completion->work_queue_entry_link);
+ schedule_releases(pool);
+ return;
+ }
+
+ data_vio->remaining_discard -= min_t(u32, data_vio->remaining_discard,
+ VDO_BLOCK_SIZE - data_vio->offset);
+ data_vio->is_partial = (data_vio->remaining_discard < VDO_BLOCK_SIZE);
+ data_vio->read = data_vio->is_partial;
+ data_vio->offset = 0;
+ completion->requeue = true;
+ launch_data_vio(data_vio, data_vio->logical.lbn + 1);
+}
+
+/** perform_cleanup_stage() - Perform the next step in the process of cleaning up a data_vio. */
+static void perform_cleanup_stage(struct data_vio *data_vio,
+ enum data_vio_cleanup_stage stage)
+{
+ struct vdo *vdo = vdo_from_data_vio(data_vio);
+
+ switch (stage) {
+ case VIO_RELEASE_HASH_LOCK:
+ if (data_vio->hash_lock != NULL) {
+ launch_data_vio_hash_zone_callback(data_vio, clean_hash_lock);
+ return;
+ }
+ fallthrough;
+
+ case VIO_RELEASE_ALLOCATED:
+ if (data_vio_has_allocation(data_vio)) {
+ launch_data_vio_allocated_zone_callback(data_vio,
+ release_allocated_lock);
+ return;
+ }
+ fallthrough;
+
+ case VIO_RELEASE_RECOVERY_LOCKS:
+ if ((data_vio->recovery_sequence_number > 0) &&
+ (READ_ONCE(vdo->read_only_notifier.read_only_error) == VDO_SUCCESS) &&
+ (data_vio->vio.completion.result != VDO_READ_ONLY))
+ vdo_log_warning("VDO not read-only when cleaning data_vio with RJ lock");
+ fallthrough;
+
+ case VIO_RELEASE_LOGICAL:
+ launch_data_vio_logical_callback(data_vio, release_logical_lock);
+ return;
+
+ default:
+ finish_cleanup(data_vio);
+ }
+}
+
+void complete_data_vio(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ completion->error_handler = NULL;
+ data_vio->last_async_operation = VIO_ASYNC_OP_CLEANUP;
+ perform_cleanup_stage(data_vio,
+ (data_vio->write ? VIO_CLEANUP_START : VIO_RELEASE_LOGICAL));
+}
+
+static void enter_read_only_mode(struct vdo_completion *completion)
+{
+ if (vdo_is_read_only(completion->vdo))
+ return;
+
+ if (completion->result != VDO_READ_ONLY) {
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ vdo_log_error_strerror(completion->result,
+ "Preparing to enter read-only mode: data_vio for LBN %llu (becoming mapped to %llu, previously mapped to %llu, allocated %llu) is completing with a fatal error after operation %s",
+ (unsigned long long) data_vio->logical.lbn,
+ (unsigned long long) data_vio->new_mapped.pbn,
+ (unsigned long long) data_vio->mapped.pbn,
+ (unsigned long long) data_vio->allocation.pbn,
+ get_data_vio_operation_name(data_vio));
+ }
+
+ vdo_enter_read_only_mode(completion->vdo, completion->result);
+}
+
+void handle_data_vio_error(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ if ((completion->result == VDO_READ_ONLY) || (data_vio->user_bio == NULL))
+ enter_read_only_mode(completion);
+
+ update_data_vio_error_stats(data_vio);
+ complete_data_vio(completion);
+}
+
+/**
+ * get_data_vio_operation_name() - Get the name of the last asynchronous operation performed on a
+ * data_vio.
+ */
+const char *get_data_vio_operation_name(struct data_vio *data_vio)
+{
+ BUILD_BUG_ON((MAX_VIO_ASYNC_OPERATION_NUMBER - MIN_VIO_ASYNC_OPERATION_NUMBER) !=
+ ARRAY_SIZE(ASYNC_OPERATION_NAMES));
+
+ return ((data_vio->last_async_operation < MAX_VIO_ASYNC_OPERATION_NUMBER) ?
+ ASYNC_OPERATION_NAMES[data_vio->last_async_operation] :
+ "unknown async operation");
+}
+
+/**
+ * data_vio_allocate_data_block() - Allocate a data block.
+ *
+ * @write_lock_type: The type of write lock to obtain on the block.
+ * @callback: The callback which will attempt an allocation in the current zone and continue if it
+ * succeeds.
+ * @error_handler: The handler for errors while allocating.
+ */
+void data_vio_allocate_data_block(struct data_vio *data_vio,
+ enum pbn_lock_type write_lock_type,
+ vdo_action_fn callback, vdo_action_fn error_handler)
+{
+ struct allocation *allocation = &data_vio->allocation;
+
+ VDO_ASSERT_LOG_ONLY((allocation->pbn == VDO_ZERO_BLOCK),
+ "data_vio does not have an allocation");
+ allocation->write_lock_type = write_lock_type;
+ allocation->zone = vdo_get_next_allocation_zone(data_vio->logical.zone);
+ allocation->first_allocation_zone = allocation->zone->zone_number;
+
+ data_vio->vio.completion.error_handler = error_handler;
+ launch_data_vio_allocated_zone_callback(data_vio, callback);
+}
+
+/**
+ * release_data_vio_allocation_lock() - Release the PBN lock on a data_vio's allocated block.
+ * @reset: If true, the allocation will be reset (i.e. any allocated pbn will be forgotten).
+ *
+ * If the reference to the locked block is still provisional, it will be released as well.
+ */
+void release_data_vio_allocation_lock(struct data_vio *data_vio, bool reset)
+{
+ struct allocation *allocation = &data_vio->allocation;
+ physical_block_number_t locked_pbn = allocation->pbn;
+
+ assert_data_vio_in_allocated_zone(data_vio);
+
+ if (reset || vdo_pbn_lock_has_provisional_reference(allocation->lock))
+ allocation->pbn = VDO_ZERO_BLOCK;
+
+ vdo_release_physical_zone_pbn_lock(allocation->zone, locked_pbn,
+ vdo_forget(allocation->lock));
+}
+
+/**
+ * uncompress_data_vio() - Uncompress the data a data_vio has just read.
+ * @mapping_state: The mapping state indicating which fragment to decompress.
+ * @buffer: The buffer to receive the uncompressed data.
+ */
+int uncompress_data_vio(struct data_vio *data_vio,
+ enum block_mapping_state mapping_state, char *buffer)
+{
+ int size;
+ u16 fragment_offset, fragment_size;
+ struct compressed_block *block = data_vio->compression.block;
+ int result = vdo_get_compressed_block_fragment(mapping_state, block,
+ &fragment_offset, &fragment_size);
+
+ if (result != VDO_SUCCESS) {
+ vdo_log_debug("%s: compressed fragment error %d", __func__, result);
+ return result;
+ }
+
+ size = LZ4_decompress_safe((block->data + fragment_offset), buffer,
+ fragment_size, VDO_BLOCK_SIZE);
+ if (size != VDO_BLOCK_SIZE) {
+ vdo_log_debug("%s: lz4 error", __func__);
+ return VDO_INVALID_FRAGMENT;
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * modify_for_partial_write() - Do the modify-write part of a read-modify-write cycle.
+ * @completion: The data_vio which has just finished its read.
+ *
+ * This callback is registered in read_block().
+ */
+static void modify_for_partial_write(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ char *data = data_vio->vio.data;
+ struct bio *bio = data_vio->user_bio;
+
+ assert_data_vio_on_cpu_thread(data_vio);
+
+ if (bio_op(bio) == REQ_OP_DISCARD) {
+ memset(data + data_vio->offset, '\0', min_t(u32,
+ data_vio->remaining_discard,
+ VDO_BLOCK_SIZE - data_vio->offset));
+ } else {
+ copy_from_bio(bio, data + data_vio->offset);
+ }
+
+ data_vio->is_zero = is_zero_block(data);
+ data_vio->read = false;
+ launch_data_vio_logical_callback(data_vio,
+ continue_data_vio_with_block_map_slot);
+}
+
+static void complete_read(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ char *data = data_vio->vio.data;
+ bool compressed = vdo_is_state_compressed(data_vio->mapped.state);
+
+ assert_data_vio_on_cpu_thread(data_vio);
+
+ if (compressed) {
+ int result = uncompress_data_vio(data_vio, data_vio->mapped.state, data);
+
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(data_vio, result);
+ return;
+ }
+ }
+
+ if (data_vio->write) {
+ modify_for_partial_write(completion);
+ return;
+ }
+
+ if (compressed || data_vio->is_partial)
+ copy_to_bio(data_vio->user_bio, data + data_vio->offset);
+
+ acknowledge_data_vio(data_vio);
+ complete_data_vio(completion);
+}
+
+static void read_endio(struct bio *bio)
+{
+ struct data_vio *data_vio = vio_as_data_vio(bio->bi_private);
+ int result = blk_status_to_errno(bio->bi_status);
+
+ vdo_count_completed_bios(bio);
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(data_vio, result);
+ return;
+ }
+
+ launch_data_vio_cpu_callback(data_vio, complete_read,
+ CPU_Q_COMPLETE_READ_PRIORITY);
+}
+
+static void complete_zero_read(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_on_cpu_thread(data_vio);
+
+ if (data_vio->is_partial) {
+ memset(data_vio->vio.data, 0, VDO_BLOCK_SIZE);
+ if (data_vio->write) {
+ modify_for_partial_write(completion);
+ return;
+ }
+ } else {
+ zero_fill_bio(data_vio->user_bio);
+ }
+
+ complete_read(completion);
+}
+
+/**
+ * read_block() - Read a block asynchronously.
+ *
+ * This is the callback registered in read_block_mapping().
+ */
+static void read_block(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ struct vio *vio = as_vio(completion);
+ int result = VDO_SUCCESS;
+
+ if (data_vio->mapped.pbn == VDO_ZERO_BLOCK) {
+ launch_data_vio_cpu_callback(data_vio, complete_zero_read,
+ CPU_Q_COMPLETE_VIO_PRIORITY);
+ return;
+ }
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_READ_DATA_VIO;
+ if (vdo_is_state_compressed(data_vio->mapped.state)) {
+ result = vio_reset_bio(vio, (char *) data_vio->compression.block,
+ read_endio, REQ_OP_READ, data_vio->mapped.pbn);
+ } else {
+ blk_opf_t opf = ((data_vio->user_bio->bi_opf & PASSTHROUGH_FLAGS) | REQ_OP_READ);
+
+ if (data_vio->is_partial) {
+ result = vio_reset_bio(vio, vio->data, read_endio, opf,
+ data_vio->mapped.pbn);
+ } else {
+ /* A full 4k read. Use the incoming bio to avoid having to copy the data */
+ bio_reset(vio->bio, vio->bio->bi_bdev, opf);
+ bio_init_clone(data_vio->user_bio->bi_bdev, vio->bio,
+ data_vio->user_bio, GFP_KERNEL);
+
+ /* Copy over the original bio iovec and opflags. */
+ vdo_set_bio_properties(vio->bio, vio, read_endio, opf,
+ data_vio->mapped.pbn);
+ }
+ }
+
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(data_vio, result);
+ return;
+ }
+
+ vdo_submit_data_vio(data_vio);
+}
+
+static inline struct data_vio *
+reference_count_update_completion_as_data_vio(struct vdo_completion *completion)
+{
+ if (completion->type == VIO_COMPLETION)
+ return as_data_vio(completion);
+
+ return container_of(completion, struct data_vio, decrement_completion);
+}
+
+/**
+ * update_block_map() - Rendezvous of the data_vio and decrement completions after each has
+ * made its reference updates. Handle any error from either, or proceed
+ * to updating the block map.
+ * @completion: The completion of the write in progress.
+ */
+static void update_block_map(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = reference_count_update_completion_as_data_vio(completion);
+
+ assert_data_vio_in_logical_zone(data_vio);
+
+ if (!data_vio->first_reference_operation_complete) {
+ /* Rendezvous, we're first */
+ data_vio->first_reference_operation_complete = true;
+ return;
+ }
+
+ completion = &data_vio->vio.completion;
+ vdo_set_completion_result(completion, data_vio->decrement_completion.result);
+ if (completion->result != VDO_SUCCESS) {
+ handle_data_vio_error(completion);
+ return;
+ }
+
+ completion->error_handler = handle_data_vio_error;
+ if (data_vio->hash_lock != NULL)
+ set_data_vio_hash_zone_callback(data_vio, vdo_continue_hash_lock);
+ else
+ completion->callback = complete_data_vio;
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_PUT_MAPPED_BLOCK;
+ vdo_put_mapped_block(data_vio);
+}
+
+static void decrement_reference_count(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = container_of(completion, struct data_vio,
+ decrement_completion);
+
+ assert_data_vio_in_mapped_zone(data_vio);
+
+ vdo_set_completion_callback(completion, update_block_map,
+ data_vio->logical.zone->thread_id);
+ completion->error_handler = update_block_map;
+ vdo_modify_reference_count(completion, &data_vio->decrement_updater);
+}
+
+static void increment_reference_count(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_new_mapped_zone(data_vio);
+
+ if (data_vio->downgrade_allocation_lock) {
+ /*
+ * Now that the data has been written, it's safe to deduplicate against the
+ * block. Downgrade the allocation lock to a read lock so it can be used later by
+ * the hash lock. This is done here since it needs to happen sometime before we
+ * return to the hash zone, and we are currently on the correct thread. For
+ * compressed blocks, the downgrade will have already been done.
+ */
+ vdo_downgrade_pbn_write_lock(data_vio->allocation.lock, false);
+ }
+
+ set_data_vio_logical_callback(data_vio, update_block_map);
+ completion->error_handler = update_block_map;
+ vdo_modify_reference_count(completion, &data_vio->increment_updater);
+}
+
+/** journal_remapping() - Add a recovery journal entry for a data remapping. */
+static void journal_remapping(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_journal_zone(data_vio);
+
+ data_vio->decrement_updater.operation = VDO_JOURNAL_DATA_REMAPPING;
+ data_vio->decrement_updater.zpbn = data_vio->mapped;
+ if (data_vio->new_mapped.pbn == VDO_ZERO_BLOCK) {
+ data_vio->first_reference_operation_complete = true;
+ if (data_vio->mapped.pbn == VDO_ZERO_BLOCK)
+ set_data_vio_logical_callback(data_vio, update_block_map);
+ } else {
+ set_data_vio_new_mapped_zone_callback(data_vio,
+ increment_reference_count);
+ }
+
+ if (data_vio->mapped.pbn == VDO_ZERO_BLOCK) {
+ data_vio->first_reference_operation_complete = true;
+ } else {
+ vdo_set_completion_callback(&data_vio->decrement_completion,
+ decrement_reference_count,
+ data_vio->mapped.zone->thread_id);
+ }
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_JOURNAL_REMAPPING;
+ vdo_add_recovery_journal_entry(completion->vdo->recovery_journal, data_vio);
+}
+
+/**
+ * read_old_block_mapping() - Get the previous PBN/LBN mapping of an in-progress write.
+ *
+ * Gets the previous PBN mapped to this LBN from the block map, so as to make an appropriate
+ * journal entry referencing the removal of this LBN->PBN mapping.
+ */
+static void read_old_block_mapping(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_logical_zone(data_vio);
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_GET_MAPPED_BLOCK_FOR_WRITE;
+ set_data_vio_journal_callback(data_vio, journal_remapping);
+ vdo_get_mapped_block(data_vio);
+}
+
+void update_metadata_for_data_vio_write(struct data_vio *data_vio, struct pbn_lock *lock)
+{
+ data_vio->increment_updater = (struct reference_updater) {
+ .operation = VDO_JOURNAL_DATA_REMAPPING,
+ .increment = true,
+ .zpbn = data_vio->new_mapped,
+ .lock = lock,
+ };
+
+ launch_data_vio_logical_callback(data_vio, read_old_block_mapping);
+}
+
+/**
+ * pack_compressed_data() - Attempt to pack the compressed data_vio into a block.
+ *
+ * This is the callback registered in launch_compress_data_vio().
+ */
+static void pack_compressed_data(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_packer_zone(data_vio);
+
+ if (!vdo_get_compressing(vdo_from_data_vio(data_vio)) ||
+ get_data_vio_compression_status(data_vio).may_not_compress) {
+ write_data_vio(data_vio);
+ return;
+ }
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_ATTEMPT_PACKING;
+ vdo_attempt_packing(data_vio);
+}
+
+/**
+ * compress_data_vio() - Do the actual work of compressing the data on a CPU queue.
+ *
+ * This callback is registered in launch_compress_data_vio().
+ */
+static void compress_data_vio(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ int size;
+
+ assert_data_vio_on_cpu_thread(data_vio);
+
+ /*
+ * By putting the compressed data at the start of the compressed block data field, we won't
+ * need to copy it if this data_vio becomes a compressed write agent.
+ */
+ size = LZ4_compress_default(data_vio->vio.data,
+ data_vio->compression.block->data, VDO_BLOCK_SIZE,
+ VDO_MAX_COMPRESSED_FRAGMENT_SIZE,
+ (char *) vdo_get_work_queue_private_data());
+ if ((size > 0) && (size < VDO_COMPRESSED_BLOCK_DATA_SIZE)) {
+ data_vio->compression.size = size;
+ launch_data_vio_packer_callback(data_vio, pack_compressed_data);
+ return;
+ }
+
+ write_data_vio(data_vio);
+}
+
+/**
+ * launch_compress_data_vio() - Continue a write by attempting to compress the data.
+ *
+ * This is a re-entry point to vio_write used by hash locks.
+ */
+void launch_compress_data_vio(struct data_vio *data_vio)
+{
+ VDO_ASSERT_LOG_ONLY(!data_vio->is_duplicate, "compressing a non-duplicate block");
+ VDO_ASSERT_LOG_ONLY(data_vio->hash_lock != NULL,
+ "data_vio to compress has a hash_lock");
+ VDO_ASSERT_LOG_ONLY(data_vio_has_allocation(data_vio),
+ "data_vio to compress has an allocation");
+
+ /*
+ * There are 4 reasons why a data_vio which has reached this point will not be eligible for
+ * compression:
+ *
+ * 1) Since data_vios can block indefinitely in the packer, it would be bad to do so if the
+ * write request also requests FUA.
+ *
+ * 2) A data_vio should not be compressed when compression is disabled for the vdo.
+ *
+ * 3) A data_vio could be doing a partial write on behalf of a larger discard which has not
+ * yet been acknowledged and hence blocking in the packer would be bad.
+ *
+ * 4) Some other data_vio may be waiting on this data_vio in which case blocking in the
+ * packer would also be bad.
+ */
+ if (data_vio->fua ||
+ !vdo_get_compressing(vdo_from_data_vio(data_vio)) ||
+ ((data_vio->user_bio != NULL) && (bio_op(data_vio->user_bio) == REQ_OP_DISCARD)) ||
+ (advance_data_vio_compression_stage(data_vio).stage != DATA_VIO_COMPRESSING)) {
+ write_data_vio(data_vio);
+ return;
+ }
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_COMPRESS_DATA_VIO;
+ launch_data_vio_cpu_callback(data_vio, compress_data_vio,
+ CPU_Q_COMPRESS_BLOCK_PRIORITY);
+}
+
+/**
+ * hash_data_vio() - Hash the data in a data_vio and set the hash zone (which also flags the record
+ * name as set).
+
+ * This callback is registered in prepare_for_dedupe().
+ */
+static void hash_data_vio(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_on_cpu_thread(data_vio);
+ VDO_ASSERT_LOG_ONLY(!data_vio->is_zero, "zero blocks should not be hashed");
+
+ murmurhash3_128(data_vio->vio.data, VDO_BLOCK_SIZE, 0x62ea60be,
+ &data_vio->record_name);
+
+ data_vio->hash_zone = vdo_select_hash_zone(vdo_from_data_vio(data_vio)->hash_zones,
+ &data_vio->record_name);
+ data_vio->last_async_operation = VIO_ASYNC_OP_ACQUIRE_VDO_HASH_LOCK;
+ launch_data_vio_hash_zone_callback(data_vio, vdo_acquire_hash_lock);
+}
+
+/** prepare_for_dedupe() - Prepare for the dedupe path after attempting to get an allocation. */
+static void prepare_for_dedupe(struct data_vio *data_vio)
+{
+ /* We don't care what thread we are on. */
+ VDO_ASSERT_LOG_ONLY(!data_vio->is_zero, "must not prepare to dedupe zero blocks");
+
+ /*
+ * Before we can dedupe, we need to know the record name, so the first
+ * step is to hash the block data.
+ */
+ data_vio->last_async_operation = VIO_ASYNC_OP_HASH_DATA_VIO;
+ launch_data_vio_cpu_callback(data_vio, hash_data_vio, CPU_Q_HASH_BLOCK_PRIORITY);
+}
+
+/**
+ * write_bio_finished() - This is the bio_end_io function registered in write_block() to be called
+ * when a data_vio's write to the underlying storage has completed.
+ */
+static void write_bio_finished(struct bio *bio)
+{
+ struct data_vio *data_vio = vio_as_data_vio((struct vio *) bio->bi_private);
+
+ vdo_count_completed_bios(bio);
+ vdo_set_completion_result(&data_vio->vio.completion,
+ blk_status_to_errno(bio->bi_status));
+ data_vio->downgrade_allocation_lock = true;
+ update_metadata_for_data_vio_write(data_vio, data_vio->allocation.lock);
+}
+
+/** write_data_vio() - Write a data block to storage without compression. */
+void write_data_vio(struct data_vio *data_vio)
+{
+ struct data_vio_compression_status status, new_status;
+ int result;
+
+ if (!data_vio_has_allocation(data_vio)) {
+ /*
+ * There was no space to write this block and we failed to deduplicate or compress
+ * it.
+ */
+ continue_data_vio_with_error(data_vio, VDO_NO_SPACE);
+ return;
+ }
+
+ new_status = (struct data_vio_compression_status) {
+ .stage = DATA_VIO_POST_PACKER,
+ .may_not_compress = true,
+ };
+
+ do {
+ status = get_data_vio_compression_status(data_vio);
+ } while ((status.stage != DATA_VIO_POST_PACKER) &&
+ !set_data_vio_compression_status(data_vio, status, new_status));
+
+ /* Write the data from the data block buffer. */
+ result = vio_reset_bio(&data_vio->vio, data_vio->vio.data,
+ write_bio_finished, REQ_OP_WRITE,
+ data_vio->allocation.pbn);
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(data_vio, result);
+ return;
+ }
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_WRITE_DATA_VIO;
+ vdo_submit_data_vio(data_vio);
+}
+
+/**
+ * acknowledge_write_callback() - Acknowledge a write to the requestor.
+ *
+ * This callback is registered in allocate_block() and continue_write_with_block_map_slot().
+ */
+static void acknowledge_write_callback(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ struct vdo *vdo = completion->vdo;
+
+ VDO_ASSERT_LOG_ONLY((!vdo_uses_bio_ack_queue(vdo) ||
+ (vdo_get_callback_thread_id() == vdo->thread_config.bio_ack_thread)),
+ "%s() called on bio ack queue", __func__);
+ VDO_ASSERT_LOG_ONLY(data_vio_has_flush_generation_lock(data_vio),
+ "write VIO to be acknowledged has a flush generation lock");
+ acknowledge_data_vio(data_vio);
+ if (data_vio->new_mapped.pbn == VDO_ZERO_BLOCK) {
+ /* This is a zero write or discard */
+ update_metadata_for_data_vio_write(data_vio, NULL);
+ return;
+ }
+
+ prepare_for_dedupe(data_vio);
+}
+
+/**
+ * allocate_block() - Attempt to allocate a block in the current allocation zone.
+ *
+ * This callback is registered in continue_write_with_block_map_slot().
+ */
+static void allocate_block(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_allocated_zone(data_vio);
+
+ if (!vdo_allocate_block_in_zone(data_vio))
+ return;
+
+ completion->error_handler = handle_data_vio_error;
+ WRITE_ONCE(data_vio->allocation_succeeded, true);
+ data_vio->new_mapped = (struct zoned_pbn) {
+ .zone = data_vio->allocation.zone,
+ .pbn = data_vio->allocation.pbn,
+ .state = VDO_MAPPING_STATE_UNCOMPRESSED,
+ };
+
+ if (data_vio->fua) {
+ prepare_for_dedupe(data_vio);
+ return;
+ }
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_ACKNOWLEDGE_WRITE;
+ launch_data_vio_on_bio_ack_queue(data_vio, acknowledge_write_callback);
+}
+
+/**
+ * handle_allocation_error() - Handle an error attempting to allocate a block.
+ *
+ * This error handler is registered in continue_write_with_block_map_slot().
+ */
+static void handle_allocation_error(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ if (completion->result == VDO_NO_SPACE) {
+ /* We failed to get an allocation, but we can try to dedupe. */
+ vdo_reset_completion(completion);
+ completion->error_handler = handle_data_vio_error;
+ prepare_for_dedupe(data_vio);
+ return;
+ }
+
+ /* We got a "real" error, not just a failure to allocate, so fail the request. */
+ handle_data_vio_error(completion);
+}
+
+static int assert_is_discard(struct data_vio *data_vio)
+{
+ int result = VDO_ASSERT(data_vio->is_discard,
+ "data_vio with no block map page is a discard");
+
+ return ((result == VDO_SUCCESS) ? result : VDO_READ_ONLY);
+}
+
+/**
+ * continue_data_vio_with_block_map_slot() - Read the data_vio's mapping from the block map.
+ *
+ * This callback is registered in launch_read_data_vio().
+ */
+void continue_data_vio_with_block_map_slot(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+
+ assert_data_vio_in_logical_zone(data_vio);
+ if (data_vio->read) {
+ set_data_vio_logical_callback(data_vio, read_block);
+ data_vio->last_async_operation = VIO_ASYNC_OP_GET_MAPPED_BLOCK_FOR_READ;
+ vdo_get_mapped_block(data_vio);
+ return;
+ }
+
+ vdo_acquire_flush_generation_lock(data_vio);
+
+ if (data_vio->tree_lock.tree_slots[0].block_map_slot.pbn == VDO_ZERO_BLOCK) {
+ /*
+ * This is a discard for a block on a block map page which has not been allocated, so
+ * there's nothing more we need to do.
+ */
+ completion->callback = complete_data_vio;
+ continue_data_vio_with_error(data_vio, assert_is_discard(data_vio));
+ return;
+ }
+
+ /*
+ * We need an allocation if this is neither a full-block discard nor a
+ * full-block zero write.
+ */
+ if (!data_vio->is_zero && (!data_vio->is_discard || data_vio->is_partial)) {
+ data_vio_allocate_data_block(data_vio, VIO_WRITE_LOCK, allocate_block,
+ handle_allocation_error);
+ return;
+ }
+
+
+ /*
+ * We don't need to write any data, so skip allocation and just update the block map and
+ * reference counts (via the journal).
+ */
+ data_vio->new_mapped.pbn = VDO_ZERO_BLOCK;
+ if (data_vio->is_zero)
+ data_vio->new_mapped.state = VDO_MAPPING_STATE_UNCOMPRESSED;
+
+ if (data_vio->remaining_discard > VDO_BLOCK_SIZE) {
+ /* This is not the final block of a discard so we can't acknowledge it yet. */
+ update_metadata_for_data_vio_write(data_vio, NULL);
+ return;
+ }
+
+ data_vio->last_async_operation = VIO_ASYNC_OP_ACKNOWLEDGE_WRITE;
+ launch_data_vio_on_bio_ack_queue(data_vio, acknowledge_write_callback);
+}
diff --git a/drivers/md/dm-vdo/data-vio.h b/drivers/md/dm-vdo/data-vio.h
new file mode 100644
index 000000000000..25926b6cd98b
--- /dev/null
+++ b/drivers/md/dm-vdo/data-vio.h
@@ -0,0 +1,670 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef DATA_VIO_H
+#define DATA_VIO_H
+
+#include <linux/atomic.h>
+#include <linux/bio.h>
+#include <linux/list.h>
+
+#include "permassert.h"
+
+#include "indexer.h"
+
+#include "block-map.h"
+#include "completion.h"
+#include "constants.h"
+#include "dedupe.h"
+#include "encodings.h"
+#include "logical-zone.h"
+#include "physical-zone.h"
+#include "types.h"
+#include "vdo.h"
+#include "vio.h"
+#include "wait-queue.h"
+
+/* Codes for describing the last asynchronous operation performed on a vio. */
+enum async_operation_number {
+ MIN_VIO_ASYNC_OPERATION_NUMBER,
+ VIO_ASYNC_OP_LAUNCH = MIN_VIO_ASYNC_OPERATION_NUMBER,
+ VIO_ASYNC_OP_ACKNOWLEDGE_WRITE,
+ VIO_ASYNC_OP_ACQUIRE_VDO_HASH_LOCK,
+ VIO_ASYNC_OP_ATTEMPT_LOGICAL_BLOCK_LOCK,
+ VIO_ASYNC_OP_LOCK_DUPLICATE_PBN,
+ VIO_ASYNC_OP_CHECK_FOR_DUPLICATION,
+ VIO_ASYNC_OP_CLEANUP,
+ VIO_ASYNC_OP_COMPRESS_DATA_VIO,
+ VIO_ASYNC_OP_FIND_BLOCK_MAP_SLOT,
+ VIO_ASYNC_OP_GET_MAPPED_BLOCK_FOR_READ,
+ VIO_ASYNC_OP_GET_MAPPED_BLOCK_FOR_WRITE,
+ VIO_ASYNC_OP_HASH_DATA_VIO,
+ VIO_ASYNC_OP_JOURNAL_REMAPPING,
+ VIO_ASYNC_OP_ATTEMPT_PACKING,
+ VIO_ASYNC_OP_PUT_MAPPED_BLOCK,
+ VIO_ASYNC_OP_READ_DATA_VIO,
+ VIO_ASYNC_OP_UPDATE_DEDUPE_INDEX,
+ VIO_ASYNC_OP_UPDATE_REFERENCE_COUNTS,
+ VIO_ASYNC_OP_VERIFY_DUPLICATION,
+ VIO_ASYNC_OP_WRITE_DATA_VIO,
+ MAX_VIO_ASYNC_OPERATION_NUMBER,
+} __packed;
+
+struct lbn_lock {
+ logical_block_number_t lbn;
+ bool locked;
+ struct vdo_wait_queue waiters;
+ struct logical_zone *zone;
+};
+
+/* A position in the arboreal block map at a specific level. */
+struct block_map_tree_slot {
+ page_number_t page_index;
+ struct block_map_slot block_map_slot;
+};
+
+/* Fields for using the arboreal block map. */
+struct tree_lock {
+ /* The current height at which this data_vio is operating */
+ height_t height;
+ /* The block map tree for this LBN */
+ root_count_t root_index;
+ /* Whether we hold a page lock */
+ bool locked;
+ /* The key for the lock map */
+ u64 key;
+ /* The queue of waiters for the page this vio is allocating or loading */
+ struct vdo_wait_queue waiters;
+ /* The block map tree slots for this LBN */
+ struct block_map_tree_slot tree_slots[VDO_BLOCK_MAP_TREE_HEIGHT + 1];
+};
+
+struct zoned_pbn {
+ physical_block_number_t pbn;
+ enum block_mapping_state state;
+ struct physical_zone *zone;
+};
+
+/*
+ * Where a data_vio is on the compression path; advance_compression_stage() depends on the order of
+ * this enum.
+ */
+enum data_vio_compression_stage {
+ /* A data_vio which has not yet entered the compression path */
+ DATA_VIO_PRE_COMPRESSOR,
+ /* A data_vio which is in the compressor */
+ DATA_VIO_COMPRESSING,
+ /* A data_vio which is blocked in the packer */
+ DATA_VIO_PACKING,
+ /* A data_vio which is no longer on the compression path (and never will be) */
+ DATA_VIO_POST_PACKER,
+};
+
+struct data_vio_compression_status {
+ enum data_vio_compression_stage stage;
+ bool may_not_compress;
+};
+
+struct compression_state {
+ /*
+ * The current compression status of this data_vio. This field contains a value which
+ * consists of a data_vio_compression_stage and a flag indicating whether a request has
+ * been made to cancel (or prevent) compression for this data_vio.
+ *
+ * This field should be accessed through the get_data_vio_compression_status() and
+ * set_data_vio_compression_status() methods. It should not be accessed directly.
+ */
+ atomic_t status;
+
+ /* The compressed size of this block */
+ u16 size;
+
+ /* The packer input or output bin slot which holds the enclosing data_vio */
+ slot_number_t slot;
+
+ /* The packer bin to which the enclosing data_vio has been assigned */
+ struct packer_bin *bin;
+
+ /* A link in the chain of data_vios which have been packed together */
+ struct data_vio *next_in_batch;
+
+ /* A vio which is blocked in the packer while holding a lock this vio needs. */
+ struct data_vio *lock_holder;
+
+ /*
+ * The compressed block used to hold the compressed form of this block and that of any
+ * other blocks for which this data_vio is the compressed write agent.
+ */
+ struct compressed_block *block;
+};
+
+/* Fields supporting allocation of data blocks. */
+struct allocation {
+ /* The physical zone in which to allocate a physical block */
+ struct physical_zone *zone;
+
+ /* The block allocated to this vio */
+ physical_block_number_t pbn;
+
+ /*
+ * If non-NULL, the pooled PBN lock held on the allocated block. Must be a write lock until
+ * the block has been written, after which it will become a read lock.
+ */
+ struct pbn_lock *lock;
+
+ /* The type of write lock to obtain on the allocated block */
+ enum pbn_lock_type write_lock_type;
+
+ /* The zone which was the start of the current allocation cycle */
+ zone_count_t first_allocation_zone;
+
+ /* Whether this vio should wait for a clean slab */
+ bool wait_for_clean_slab;
+};
+
+struct reference_updater {
+ enum journal_operation operation;
+ bool increment;
+ struct zoned_pbn zpbn;
+ struct pbn_lock *lock;
+ struct vdo_waiter waiter;
+};
+
+/* A vio for processing user data requests. */
+struct data_vio {
+ /* The vdo_wait_queue entry structure */
+ struct vdo_waiter waiter;
+
+ /* The logical block of this request */
+ struct lbn_lock logical;
+
+ /* The state for traversing the block map tree */
+ struct tree_lock tree_lock;
+
+ /* The current partition address of this block */
+ struct zoned_pbn mapped;
+
+ /* The hash of this vio (if not zero) */
+ struct uds_record_name record_name;
+
+ /* Used for logging and debugging */
+ enum async_operation_number last_async_operation;
+
+ /* The operations to record in the recovery and slab journals */
+ struct reference_updater increment_updater;
+ struct reference_updater decrement_updater;
+
+ u16 read : 1;
+ u16 write : 1;
+ u16 fua : 1;
+ u16 is_zero : 1;
+ u16 is_discard : 1;
+ u16 is_partial : 1;
+ u16 is_duplicate : 1;
+ u16 first_reference_operation_complete : 1;
+ u16 downgrade_allocation_lock : 1;
+
+ struct allocation allocation;
+
+ /*
+ * Whether this vio has received an allocation. This field is examined from threads not in
+ * the allocation zone.
+ */
+ bool allocation_succeeded;
+
+ /* The new partition address of this block after the vio write completes */
+ struct zoned_pbn new_mapped;
+
+ /* The hash zone responsible for the name (NULL if is_zero_block) */
+ struct hash_zone *hash_zone;
+
+ /* The lock this vio holds or shares with other vios with the same data */
+ struct hash_lock *hash_lock;
+
+ /* All data_vios sharing a hash lock are kept in a list linking these list entries */
+ struct list_head hash_lock_entry;
+
+ /* The block number in the partition of the UDS deduplication advice */
+ struct zoned_pbn duplicate;
+
+ /*
+ * The sequence number of the recovery journal block containing the increment entry for
+ * this vio.
+ */
+ sequence_number_t recovery_sequence_number;
+
+ /* The point in the recovery journal where this write last made an entry */
+ struct journal_point recovery_journal_point;
+
+ /* The list of vios in user initiated write requests */
+ struct list_head write_entry;
+
+ /* The generation number of the VDO that this vio belongs to */
+ sequence_number_t flush_generation;
+
+ /* The completion to use for fetching block map pages for this vio */
+ struct vdo_page_completion page_completion;
+
+ /* The user bio that initiated this VIO */
+ struct bio *user_bio;
+
+ /* partial block support */
+ block_size_t offset;
+
+ /*
+ * The number of bytes to be discarded. For discards, this field will always be positive,
+ * whereas for non-discards it will always be 0. Hence it can be used to determine whether
+ * a data_vio is processing a discard, even after the user_bio has been acknowledged.
+ */
+ u32 remaining_discard;
+
+ struct dedupe_context *dedupe_context;
+
+ /* Fields beyond this point will not be reset when a pooled data_vio is reused. */
+
+ struct vio vio;
+
+ /* The completion for making reference count decrements */
+ struct vdo_completion decrement_completion;
+
+ /* All of the fields necessary for the compression path */
+ struct compression_state compression;
+
+ /* A block used as output during compression or uncompression */
+ char *scratch_block;
+
+ struct list_head pool_entry;
+};
+
+static inline struct data_vio *vio_as_data_vio(struct vio *vio)
+{
+ VDO_ASSERT_LOG_ONLY((vio->type == VIO_TYPE_DATA), "vio is a data_vio");
+ return container_of(vio, struct data_vio, vio);
+}
+
+static inline struct data_vio *as_data_vio(struct vdo_completion *completion)
+{
+ return vio_as_data_vio(as_vio(completion));
+}
+
+static inline struct data_vio *vdo_waiter_as_data_vio(struct vdo_waiter *waiter)
+{
+ if (waiter == NULL)
+ return NULL;
+
+ return container_of(waiter, struct data_vio, waiter);
+}
+
+static inline struct data_vio *data_vio_from_reference_updater(struct reference_updater *updater)
+{
+ if (updater->increment)
+ return container_of(updater, struct data_vio, increment_updater);
+
+ return container_of(updater, struct data_vio, decrement_updater);
+}
+
+static inline bool data_vio_has_flush_generation_lock(struct data_vio *data_vio)
+{
+ return !list_empty(&data_vio->write_entry);
+}
+
+static inline struct vdo *vdo_from_data_vio(struct data_vio *data_vio)
+{
+ return data_vio->vio.completion.vdo;
+}
+
+static inline bool data_vio_has_allocation(struct data_vio *data_vio)
+{
+ return (data_vio->allocation.pbn != VDO_ZERO_BLOCK);
+}
+
+struct data_vio_compression_status __must_check
+advance_data_vio_compression_stage(struct data_vio *data_vio);
+struct data_vio_compression_status __must_check
+get_data_vio_compression_status(struct data_vio *data_vio);
+bool cancel_data_vio_compression(struct data_vio *data_vio);
+
+struct data_vio_pool;
+
+int make_data_vio_pool(struct vdo *vdo, data_vio_count_t pool_size,
+ data_vio_count_t discard_limit, struct data_vio_pool **pool_ptr);
+void free_data_vio_pool(struct data_vio_pool *pool);
+void vdo_launch_bio(struct data_vio_pool *pool, struct bio *bio);
+void drain_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion);
+void resume_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion);
+
+void dump_data_vio_pool(struct data_vio_pool *pool, bool dump_vios);
+data_vio_count_t get_data_vio_pool_active_discards(struct data_vio_pool *pool);
+data_vio_count_t get_data_vio_pool_discard_limit(struct data_vio_pool *pool);
+data_vio_count_t get_data_vio_pool_maximum_discards(struct data_vio_pool *pool);
+int __must_check set_data_vio_pool_discard_limit(struct data_vio_pool *pool,
+ data_vio_count_t limit);
+data_vio_count_t get_data_vio_pool_active_requests(struct data_vio_pool *pool);
+data_vio_count_t get_data_vio_pool_request_limit(struct data_vio_pool *pool);
+data_vio_count_t get_data_vio_pool_maximum_requests(struct data_vio_pool *pool);
+
+void complete_data_vio(struct vdo_completion *completion);
+void handle_data_vio_error(struct vdo_completion *completion);
+
+static inline void continue_data_vio(struct data_vio *data_vio)
+{
+ vdo_launch_completion(&data_vio->vio.completion);
+}
+
+/**
+ * continue_data_vio_with_error() - Set an error code and then continue processing a data_vio.
+ *
+ * This will not mask older errors. This function can be called with a success code, but it is more
+ * efficient to call continue_data_vio() if the caller knows the result was a success.
+ */
+static inline void continue_data_vio_with_error(struct data_vio *data_vio, int result)
+{
+ vdo_continue_completion(&data_vio->vio.completion, result);
+}
+
+const char * __must_check get_data_vio_operation_name(struct data_vio *data_vio);
+
+static inline void assert_data_vio_in_hash_zone(struct data_vio *data_vio)
+{
+ thread_id_t expected = data_vio->hash_zone->thread_id;
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+ /*
+ * It's odd to use the LBN, but converting the record name to hex is a bit clunky for an
+ * inline, and the LBN better than nothing as an identifier.
+ */
+ VDO_ASSERT_LOG_ONLY((expected == thread_id),
+ "data_vio for logical block %llu on thread %u, should be on hash zone thread %u",
+ (unsigned long long) data_vio->logical.lbn, thread_id, expected);
+}
+
+static inline void set_data_vio_hash_zone_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ vdo_set_completion_callback(&data_vio->vio.completion, callback,
+ data_vio->hash_zone->thread_id);
+}
+
+/**
+ * launch_data_vio_hash_zone_callback() - Set a callback as a hash zone operation and invoke it
+ * immediately.
+ */
+static inline void launch_data_vio_hash_zone_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ set_data_vio_hash_zone_callback(data_vio, callback);
+ vdo_launch_completion(&data_vio->vio.completion);
+}
+
+static inline void assert_data_vio_in_logical_zone(struct data_vio *data_vio)
+{
+ thread_id_t expected = data_vio->logical.zone->thread_id;
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+
+ VDO_ASSERT_LOG_ONLY((expected == thread_id),
+ "data_vio for logical block %llu on thread %u, should be on thread %u",
+ (unsigned long long) data_vio->logical.lbn, thread_id, expected);
+}
+
+static inline void set_data_vio_logical_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ vdo_set_completion_callback(&data_vio->vio.completion, callback,
+ data_vio->logical.zone->thread_id);
+}
+
+/**
+ * launch_data_vio_logical_callback() - Set a callback as a logical block operation and invoke it
+ * immediately.
+ */
+static inline void launch_data_vio_logical_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ set_data_vio_logical_callback(data_vio, callback);
+ vdo_launch_completion(&data_vio->vio.completion);
+}
+
+static inline void assert_data_vio_in_allocated_zone(struct data_vio *data_vio)
+{
+ thread_id_t expected = data_vio->allocation.zone->thread_id;
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+
+ VDO_ASSERT_LOG_ONLY((expected == thread_id),
+ "struct data_vio for allocated physical block %llu on thread %u, should be on thread %u",
+ (unsigned long long) data_vio->allocation.pbn, thread_id,
+ expected);
+}
+
+static inline void set_data_vio_allocated_zone_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ vdo_set_completion_callback(&data_vio->vio.completion, callback,
+ data_vio->allocation.zone->thread_id);
+}
+
+/**
+ * launch_data_vio_allocated_zone_callback() - Set a callback as a physical block operation in a
+ * data_vio's allocated zone and queue the data_vio and
+ * invoke it immediately.
+ */
+static inline void launch_data_vio_allocated_zone_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ set_data_vio_allocated_zone_callback(data_vio, callback);
+ vdo_launch_completion(&data_vio->vio.completion);
+}
+
+static inline void assert_data_vio_in_duplicate_zone(struct data_vio *data_vio)
+{
+ thread_id_t expected = data_vio->duplicate.zone->thread_id;
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+
+ VDO_ASSERT_LOG_ONLY((expected == thread_id),
+ "data_vio for duplicate physical block %llu on thread %u, should be on thread %u",
+ (unsigned long long) data_vio->duplicate.pbn, thread_id,
+ expected);
+}
+
+static inline void set_data_vio_duplicate_zone_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ vdo_set_completion_callback(&data_vio->vio.completion, callback,
+ data_vio->duplicate.zone->thread_id);
+}
+
+/**
+ * launch_data_vio_duplicate_zone_callback() - Set a callback as a physical block operation in a
+ * data_vio's duplicate zone and queue the data_vio and
+ * invoke it immediately.
+ */
+static inline void launch_data_vio_duplicate_zone_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ set_data_vio_duplicate_zone_callback(data_vio, callback);
+ vdo_launch_completion(&data_vio->vio.completion);
+}
+
+static inline void assert_data_vio_in_mapped_zone(struct data_vio *data_vio)
+{
+ thread_id_t expected = data_vio->mapped.zone->thread_id;
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+
+ VDO_ASSERT_LOG_ONLY((expected == thread_id),
+ "data_vio for mapped physical block %llu on thread %u, should be on thread %u",
+ (unsigned long long) data_vio->mapped.pbn, thread_id, expected);
+}
+
+static inline void set_data_vio_mapped_zone_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ vdo_set_completion_callback(&data_vio->vio.completion, callback,
+ data_vio->mapped.zone->thread_id);
+}
+
+static inline void assert_data_vio_in_new_mapped_zone(struct data_vio *data_vio)
+{
+ thread_id_t expected = data_vio->new_mapped.zone->thread_id;
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+
+ VDO_ASSERT_LOG_ONLY((expected == thread_id),
+ "data_vio for new_mapped physical block %llu on thread %u, should be on thread %u",
+ (unsigned long long) data_vio->new_mapped.pbn, thread_id,
+ expected);
+}
+
+static inline void set_data_vio_new_mapped_zone_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ vdo_set_completion_callback(&data_vio->vio.completion, callback,
+ data_vio->new_mapped.zone->thread_id);
+}
+
+static inline void assert_data_vio_in_journal_zone(struct data_vio *data_vio)
+{
+ thread_id_t journal_thread = vdo_from_data_vio(data_vio)->thread_config.journal_thread;
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+
+ VDO_ASSERT_LOG_ONLY((journal_thread == thread_id),
+ "data_vio for logical block %llu on thread %u, should be on journal thread %u",
+ (unsigned long long) data_vio->logical.lbn, thread_id,
+ journal_thread);
+}
+
+static inline void set_data_vio_journal_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ thread_id_t journal_thread = vdo_from_data_vio(data_vio)->thread_config.journal_thread;
+
+ vdo_set_completion_callback(&data_vio->vio.completion, callback, journal_thread);
+}
+
+/**
+ * launch_data_vio_journal_callback() - Set a callback as a journal operation and invoke it
+ * immediately.
+ */
+static inline void launch_data_vio_journal_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ set_data_vio_journal_callback(data_vio, callback);
+ vdo_launch_completion(&data_vio->vio.completion);
+}
+
+static inline void assert_data_vio_in_packer_zone(struct data_vio *data_vio)
+{
+ thread_id_t packer_thread = vdo_from_data_vio(data_vio)->thread_config.packer_thread;
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+
+ VDO_ASSERT_LOG_ONLY((packer_thread == thread_id),
+ "data_vio for logical block %llu on thread %u, should be on packer thread %u",
+ (unsigned long long) data_vio->logical.lbn, thread_id,
+ packer_thread);
+}
+
+static inline void set_data_vio_packer_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ thread_id_t packer_thread = vdo_from_data_vio(data_vio)->thread_config.packer_thread;
+
+ vdo_set_completion_callback(&data_vio->vio.completion, callback, packer_thread);
+}
+
+/**
+ * launch_data_vio_packer_callback() - Set a callback as a packer operation and invoke it
+ * immediately.
+ */
+static inline void launch_data_vio_packer_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ set_data_vio_packer_callback(data_vio, callback);
+ vdo_launch_completion(&data_vio->vio.completion);
+}
+
+static inline void assert_data_vio_on_cpu_thread(struct data_vio *data_vio)
+{
+ thread_id_t cpu_thread = vdo_from_data_vio(data_vio)->thread_config.cpu_thread;
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+
+ VDO_ASSERT_LOG_ONLY((cpu_thread == thread_id),
+ "data_vio for logical block %llu on thread %u, should be on cpu thread %u",
+ (unsigned long long) data_vio->logical.lbn, thread_id,
+ cpu_thread);
+}
+
+static inline void set_data_vio_cpu_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ thread_id_t cpu_thread = vdo_from_data_vio(data_vio)->thread_config.cpu_thread;
+
+ vdo_set_completion_callback(&data_vio->vio.completion, callback, cpu_thread);
+}
+
+/**
+ * launch_data_vio_cpu_callback() - Set a callback to run on the CPU queues and invoke it
+ * immediately.
+ */
+static inline void launch_data_vio_cpu_callback(struct data_vio *data_vio,
+ vdo_action_fn callback,
+ enum vdo_completion_priority priority)
+{
+ set_data_vio_cpu_callback(data_vio, callback);
+ vdo_launch_completion_with_priority(&data_vio->vio.completion, priority);
+}
+
+static inline void set_data_vio_bio_zone_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ vdo_set_completion_callback(&data_vio->vio.completion, callback,
+ get_vio_bio_zone_thread_id(&data_vio->vio));
+}
+
+/**
+ * launch_data_vio_bio_zone_callback() - Set a callback as a bio zone operation and invoke it
+ * immediately.
+ */
+static inline void launch_data_vio_bio_zone_callback(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ set_data_vio_bio_zone_callback(data_vio, callback);
+ vdo_launch_completion_with_priority(&data_vio->vio.completion,
+ BIO_Q_DATA_PRIORITY);
+}
+
+/**
+ * launch_data_vio_on_bio_ack_queue() - If the vdo uses a bio_ack queue, set a callback to run on
+ * it and invoke it immediately, otherwise, just run the
+ * callback on the current thread.
+ */
+static inline void launch_data_vio_on_bio_ack_queue(struct data_vio *data_vio,
+ vdo_action_fn callback)
+{
+ struct vdo_completion *completion = &data_vio->vio.completion;
+ struct vdo *vdo = completion->vdo;
+
+ if (!vdo_uses_bio_ack_queue(vdo)) {
+ callback(completion);
+ return;
+ }
+
+ vdo_set_completion_callback(completion, callback,
+ vdo->thread_config.bio_ack_thread);
+ vdo_launch_completion_with_priority(completion, BIO_ACK_Q_ACK_PRIORITY);
+}
+
+void data_vio_allocate_data_block(struct data_vio *data_vio,
+ enum pbn_lock_type write_lock_type,
+ vdo_action_fn callback, vdo_action_fn error_handler);
+
+void release_data_vio_allocation_lock(struct data_vio *data_vio, bool reset);
+
+int __must_check uncompress_data_vio(struct data_vio *data_vio,
+ enum block_mapping_state mapping_state,
+ char *buffer);
+
+void update_metadata_for_data_vio_write(struct data_vio *data_vio,
+ struct pbn_lock *lock);
+void write_data_vio(struct data_vio *data_vio);
+void launch_compress_data_vio(struct data_vio *data_vio);
+void continue_data_vio_with_block_map_slot(struct vdo_completion *completion);
+
+#endif /* DATA_VIO_H */
diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c
new file mode 100644
index 000000000000..117266e1b3ae
--- /dev/null
+++ b/drivers/md/dm-vdo/dedupe.c
@@ -0,0 +1,3003 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+/**
+ * DOC:
+ *
+ * Hash Locks:
+ *
+ * A hash_lock controls and coordinates writing, index access, and dedupe among groups of data_vios
+ * concurrently writing identical blocks, allowing them to deduplicate not only against advice but
+ * also against each other. This saves on index queries and allows those data_vios to concurrently
+ * deduplicate against a single block instead of being serialized through a PBN read lock. Only one
+ * index query is needed for each hash_lock, instead of one for every data_vio.
+ *
+ * Hash_locks are assigned to hash_zones by computing a modulus on the hash itself. Each hash_zone
+ * has a single dedicated queue and thread for performing all operations on the hash_locks assigned
+ * to that zone. The concurrency guarantees of this single-threaded model allow the code to omit
+ * more fine-grained locking for the hash_lock structures.
+ *
+ * A hash_lock acts like a state machine perhaps more than as a lock. Other than the starting and
+ * ending states INITIALIZING and BYPASSING, every state represents and is held for the duration of
+ * an asynchronous operation. All state transitions are performed on the thread of the hash_zone
+ * containing the lock. An asynchronous operation is almost always performed upon entering a state,
+ * and the callback from that operation triggers exiting the state and entering a new state.
+ *
+ * In all states except DEDUPING, there is a single data_vio, called the lock agent, performing the
+ * asynchronous operations on behalf of the lock. The agent will change during the lifetime of the
+ * lock if the lock is shared by more than one data_vio. data_vios waiting to deduplicate are kept
+ * on a wait queue. Viewed a different way, the agent holds the lock exclusively until the lock
+ * enters the DEDUPING state, at which point it becomes a shared lock that all the waiters (and any
+ * new data_vios that arrive) use to share a PBN lock. In state DEDUPING, there is no agent. When
+ * the last data_vio in the lock calls back in DEDUPING, it becomes the agent and the lock becomes
+ * exclusive again. New data_vios that arrive in the lock will also go on the wait queue.
+ *
+ * The existence of lock waiters is a key factor controlling which state the lock transitions to
+ * next. When the lock is new or has waiters, it will always try to reach DEDUPING, and when it
+ * doesn't, it will try to clean up and exit.
+ *
+ * Deduping requires holding a PBN lock on a block that is known to contain data identical to the
+ * data_vios in the lock, so the lock will send the agent to the duplicate zone to acquire the PBN
+ * lock (LOCKING), to the kernel I/O threads to read and verify the data (VERIFYING), or to write a
+ * new copy of the data to a full data block or a slot in a compressed block (WRITING).
+ *
+ * Cleaning up consists of updating the index when the data location is different from the initial
+ * index query (UPDATING, triggered by stale advice, compression, and rollover), releasing the PBN
+ * lock on the duplicate block (UNLOCKING), and if the agent is the last data_vio referencing the
+ * lock, releasing the hash_lock itself back to the hash zone (BYPASSING).
+ *
+ * The shortest sequence of states is for non-concurrent writes of new data:
+ * INITIALIZING -> QUERYING -> WRITING -> BYPASSING
+ * This sequence is short because no PBN read lock or index update is needed.
+ *
+ * Non-concurrent, finding valid advice looks like this (endpoints elided):
+ * -> QUERYING -> LOCKING -> VERIFYING -> DEDUPING -> UNLOCKING ->
+ * Or with stale advice (endpoints elided):
+ * -> QUERYING -> LOCKING -> VERIFYING -> UNLOCKING -> WRITING -> UPDATING ->
+ *
+ * When there are not enough available reference count increments available on a PBN for a data_vio
+ * to deduplicate, a new lock is forked and the excess waiters roll over to the new lock (which
+ * goes directly to WRITING). The new lock takes the place of the old lock in the lock map so new
+ * data_vios will be directed to it. The two locks will proceed independently, but only the new
+ * lock will have the right to update the index (unless it also forks).
+ *
+ * Since rollover happens in a lock instance, once a valid data location has been selected, it will
+ * not change. QUERYING and WRITING are only performed once per lock lifetime. All other
+ * non-endpoint states can be re-entered.
+ *
+ * The function names in this module follow a convention referencing the states and transitions in
+ * the state machine. For example, for the LOCKING state, there are start_locking() and
+ * finish_locking() functions. start_locking() is invoked by the finish function of the state (or
+ * states) that transition to LOCKING. It performs the actual lock state change and must be invoked
+ * on the hash zone thread. finish_locking() is called by (or continued via callback from) the
+ * code actually obtaining the lock. It does any bookkeeping or decision-making required and
+ * invokes the appropriate start function of the state being transitioned to after LOCKING.
+ *
+ * ----------------------------------------------------------------------
+ *
+ * Index Queries:
+ *
+ * A query to the UDS index is handled asynchronously by the index's threads. When the query is
+ * complete, a callback supplied with the query will be called from one of the those threads. Under
+ * heavy system load, the index may be slower to respond than is desirable for reasonable I/O
+ * throughput. Since deduplication of writes is not necessary for correct operation of a VDO
+ * device, it is acceptable to timeout out slow index queries and proceed to fulfill a write
+ * request without deduplicating. However, because the uds_request struct itself is supplied by the
+ * caller, we can not simply reuse a uds_request object which we have chosen to timeout. Hence,
+ * each hash_zone maintains a pool of dedupe_contexts which each contain a uds_request along with a
+ * reference to the data_vio on behalf of which they are performing a query.
+ *
+ * When a hash_lock needs to query the index, it attempts to acquire an unused dedupe_context from
+ * its hash_zone's pool. If one is available, that context is prepared, associated with the
+ * hash_lock's agent, added to the list of pending contexts, and then sent to the index. The
+ * context's state will be transitioned from DEDUPE_CONTEXT_IDLE to DEDUPE_CONTEXT_PENDING. If all
+ * goes well, the dedupe callback will be called by the index which will change the context's state
+ * to DEDUPE_CONTEXT_COMPLETE, and the associated data_vio will be enqueued to run back in the hash
+ * zone where the query results will be processed and the context will be put back in the idle
+ * state and returned to the hash_zone's available list.
+ *
+ * The first time an index query is launched from a given hash_zone, a timer is started. When the
+ * timer fires, the hash_zone's completion is enqueued to run in the hash_zone where the zone's
+ * pending list will be searched for any contexts in the pending state which have been running for
+ * too long. Those contexts are transitioned to the DEDUPE_CONTEXT_TIMED_OUT state and moved to the
+ * zone's timed_out list where they won't be examined again if there is a subsequent time out). The
+ * data_vios associated with timed out contexts are sent to continue processing their write
+ * operation without deduplicating. The timer is also restarted.
+ *
+ * When the dedupe callback is run for a context which is in the timed out state, that context is
+ * moved to the DEDUPE_CONTEXT_TIMED_OUT_COMPLETE state. No other action need be taken as the
+ * associated data_vios have already been dispatched.
+ *
+ * If a hash_lock needs a dedupe context, and the available list is empty, the timed_out list will
+ * be searched for any contexts which are timed out and complete. One of these will be used
+ * immediately, and the rest will be returned to the available list and marked idle.
+ */
+
+#include "dedupe.h"
+
+#include <linux/atomic.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/ratelimit.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "numeric.h"
+#include "permassert.h"
+#include "string-utils.h"
+
+#include "indexer.h"
+
+#include "action-manager.h"
+#include "admin-state.h"
+#include "completion.h"
+#include "constants.h"
+#include "data-vio.h"
+#include "int-map.h"
+#include "io-submitter.h"
+#include "packer.h"
+#include "physical-zone.h"
+#include "slab-depot.h"
+#include "statistics.h"
+#include "types.h"
+#include "vdo.h"
+#include "wait-queue.h"
+
+struct uds_attribute {
+ struct attribute attr;
+ const char *(*show_string)(struct hash_zones *hash_zones);
+};
+
+#define DEDUPE_QUERY_TIMER_IDLE 0
+#define DEDUPE_QUERY_TIMER_RUNNING 1
+#define DEDUPE_QUERY_TIMER_FIRED 2
+
+enum dedupe_context_state {
+ DEDUPE_CONTEXT_IDLE,
+ DEDUPE_CONTEXT_PENDING,
+ DEDUPE_CONTEXT_TIMED_OUT,
+ DEDUPE_CONTEXT_COMPLETE,
+ DEDUPE_CONTEXT_TIMED_OUT_COMPLETE,
+};
+
+/* Possible index states: closed, opened, or transitioning between those two. */
+enum index_state {
+ IS_CLOSED,
+ IS_CHANGING,
+ IS_OPENED,
+};
+
+static const char *CLOSED = "closed";
+static const char *CLOSING = "closing";
+static const char *ERROR = "error";
+static const char *OFFLINE = "offline";
+static const char *ONLINE = "online";
+static const char *OPENING = "opening";
+static const char *SUSPENDED = "suspended";
+static const char *UNKNOWN = "unknown";
+
+/* Version 2 uses the kernel space UDS index and is limited to 16 bytes */
+#define UDS_ADVICE_VERSION 2
+/* version byte + state byte + 64-bit little-endian PBN */
+#define UDS_ADVICE_SIZE (1 + 1 + sizeof(u64))
+
+enum hash_lock_state {
+ /* State for locks that are not in use or are being initialized. */
+ VDO_HASH_LOCK_INITIALIZING,
+
+ /* This is the sequence of states typically used on the non-dedupe path. */
+ VDO_HASH_LOCK_QUERYING,
+ VDO_HASH_LOCK_WRITING,
+ VDO_HASH_LOCK_UPDATING,
+
+ /* The remaining states are typically used on the dedupe path in this order. */
+ VDO_HASH_LOCK_LOCKING,
+ VDO_HASH_LOCK_VERIFYING,
+ VDO_HASH_LOCK_DEDUPING,
+ VDO_HASH_LOCK_UNLOCKING,
+
+ /*
+ * Terminal state for locks returning to the pool. Must be last both because it's the final
+ * state, and also because it's used to count the states.
+ */
+ VDO_HASH_LOCK_BYPASSING,
+};
+
+static const char * const LOCK_STATE_NAMES[] = {
+ [VDO_HASH_LOCK_BYPASSING] = "BYPASSING",
+ [VDO_HASH_LOCK_DEDUPING] = "DEDUPING",
+ [VDO_HASH_LOCK_INITIALIZING] = "INITIALIZING",
+ [VDO_HASH_LOCK_LOCKING] = "LOCKING",
+ [VDO_HASH_LOCK_QUERYING] = "QUERYING",
+ [VDO_HASH_LOCK_UNLOCKING] = "UNLOCKING",
+ [VDO_HASH_LOCK_UPDATING] = "UPDATING",
+ [VDO_HASH_LOCK_VERIFYING] = "VERIFYING",
+ [VDO_HASH_LOCK_WRITING] = "WRITING",
+};
+
+struct hash_lock {
+ /* The block hash covered by this lock */
+ struct uds_record_name hash;
+
+ /* When the lock is unused, this list entry allows the lock to be pooled */
+ struct list_head pool_node;
+
+ /*
+ * A list containing the data VIOs sharing this lock, all having the same record name and
+ * data block contents, linked by their hash_lock_node fields.
+ */
+ struct list_head duplicate_ring;
+
+ /* The number of data_vios sharing this lock instance */
+ data_vio_count_t reference_count;
+
+ /* The maximum value of reference_count in the lifetime of this lock */
+ data_vio_count_t max_references;
+
+ /* The current state of this lock */
+ enum hash_lock_state state;
+
+ /* True if the UDS index should be updated with new advice */
+ bool update_advice;
+
+ /* True if the advice has been verified to be a true duplicate */
+ bool verified;
+
+ /* True if the lock has already accounted for an initial verification */
+ bool verify_counted;
+
+ /* True if this lock is registered in the lock map (cleared on rollover) */
+ bool registered;
+
+ /*
+ * If verified is false, this is the location of a possible duplicate. If verified is true,
+ * it is the verified location of a true duplicate.
+ */
+ struct zoned_pbn duplicate;
+
+ /* The PBN lock on the block containing the duplicate data */
+ struct pbn_lock *duplicate_lock;
+
+ /* The data_vio designated to act on behalf of the lock */
+ struct data_vio *agent;
+
+ /*
+ * Other data_vios with data identical to the agent who are currently waiting for the agent
+ * to get the information they all need to deduplicate--either against each other, or
+ * against an existing duplicate on disk.
+ */
+ struct vdo_wait_queue waiters;
+};
+
+#define LOCK_POOL_CAPACITY MAXIMUM_VDO_USER_VIOS
+
+struct hash_zones {
+ struct action_manager *manager;
+ struct uds_parameters parameters;
+ struct uds_index_session *index_session;
+ struct ratelimit_state ratelimiter;
+ atomic64_t timeouts;
+ atomic64_t dedupe_context_busy;
+
+ /* This spinlock protects the state fields and the starting of dedupe requests. */
+ spinlock_t lock;
+
+ /* The fields in the next block are all protected by the lock */
+ struct vdo_completion completion;
+ enum index_state index_state;
+ enum index_state index_target;
+ struct admin_state state;
+ bool changing;
+ bool create_flag;
+ bool dedupe_flag;
+ bool error_flag;
+ u64 reported_timeouts;
+
+ /* The number of zones */
+ zone_count_t zone_count;
+ /* The hash zones themselves */
+ struct hash_zone zones[];
+};
+
+/* These are in milliseconds. */
+unsigned int vdo_dedupe_index_timeout_interval = 5000;
+unsigned int vdo_dedupe_index_min_timer_interval = 100;
+/* Same two variables, in jiffies for easier consumption. */
+static u64 vdo_dedupe_index_timeout_jiffies;
+static u64 vdo_dedupe_index_min_timer_jiffies;
+
+static inline struct hash_zone *as_hash_zone(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_HASH_ZONE_COMPLETION);
+ return container_of(completion, struct hash_zone, completion);
+}
+
+static inline struct hash_zones *as_hash_zones(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_HASH_ZONES_COMPLETION);
+ return container_of(completion, struct hash_zones, completion);
+}
+
+static inline void assert_in_hash_zone(struct hash_zone *zone, const char *name)
+{
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() == zone->thread_id),
+ "%s called on hash zone thread", name);
+}
+
+static inline bool change_context_state(struct dedupe_context *context, int old, int new)
+{
+ return (atomic_cmpxchg(&context->state, old, new) == old);
+}
+
+static inline bool change_timer_state(struct hash_zone *zone, int old, int new)
+{
+ return (atomic_cmpxchg(&zone->timer_state, old, new) == old);
+}
+
+/**
+ * return_hash_lock_to_pool() - (Re)initialize a hash lock and return it to its pool.
+ * @zone: The zone from which the lock was borrowed.
+ * @lock: The lock that is no longer in use.
+ */
+static void return_hash_lock_to_pool(struct hash_zone *zone, struct hash_lock *lock)
+{
+ memset(lock, 0, sizeof(*lock));
+ INIT_LIST_HEAD(&lock->pool_node);
+ INIT_LIST_HEAD(&lock->duplicate_ring);
+ vdo_waitq_init(&lock->waiters);
+ list_add_tail(&lock->pool_node, &zone->lock_pool);
+}
+
+/**
+ * vdo_get_duplicate_lock() - Get the PBN lock on the duplicate data location for a data_vio from
+ * the hash_lock the data_vio holds (if there is one).
+ * @data_vio: The data_vio to query.
+ *
+ * Return: The PBN lock on the data_vio's duplicate location.
+ */
+struct pbn_lock *vdo_get_duplicate_lock(struct data_vio *data_vio)
+{
+ if (data_vio->hash_lock == NULL)
+ return NULL;
+
+ return data_vio->hash_lock->duplicate_lock;
+}
+
+/**
+ * hash_lock_key() - Return hash_lock's record name as a hash code.
+ * @lock: The hash lock.
+ *
+ * Return: The key to use for the int map.
+ */
+static inline u64 hash_lock_key(struct hash_lock *lock)
+{
+ return get_unaligned_le64(&lock->hash.name);
+}
+
+/**
+ * get_hash_lock_state_name() - Get the string representation of a hash lock state.
+ * @state: The hash lock state.
+ *
+ * Return: The short string representing the state
+ */
+static const char *get_hash_lock_state_name(enum hash_lock_state state)
+{
+ /* Catch if a state has been added without updating the name array. */
+ BUILD_BUG_ON((VDO_HASH_LOCK_BYPASSING + 1) != ARRAY_SIZE(LOCK_STATE_NAMES));
+ return (state < ARRAY_SIZE(LOCK_STATE_NAMES)) ? LOCK_STATE_NAMES[state] : "INVALID";
+}
+
+/**
+ * assert_hash_lock_agent() - Assert that a data_vio is the agent of its hash lock, and that this
+ * is being called in the hash zone.
+ * @data_vio: The data_vio expected to be the lock agent.
+ * @where: A string describing the function making the assertion.
+ */
+static void assert_hash_lock_agent(struct data_vio *data_vio, const char *where)
+{
+ /* Not safe to access the agent field except from the hash zone. */
+ assert_data_vio_in_hash_zone(data_vio);
+ VDO_ASSERT_LOG_ONLY(data_vio == data_vio->hash_lock->agent,
+ "%s must be for the hash lock agent", where);
+}
+
+/**
+ * set_duplicate_lock() - Set the duplicate lock held by a hash lock. May only be called in the
+ * physical zone of the PBN lock.
+ * @hash_lock: The hash lock to update.
+ * @pbn_lock: The PBN read lock to use as the duplicate lock.
+ */
+static void set_duplicate_lock(struct hash_lock *hash_lock, struct pbn_lock *pbn_lock)
+{
+ VDO_ASSERT_LOG_ONLY((hash_lock->duplicate_lock == NULL),
+ "hash lock must not already hold a duplicate lock");
+ pbn_lock->holder_count += 1;
+ hash_lock->duplicate_lock = pbn_lock;
+}
+
+/**
+ * dequeue_lock_waiter() - Remove the first data_vio from the lock's waitq and return it.
+ * @lock: The lock containing the wait queue.
+ *
+ * Return: The first (oldest) waiter in the queue, or NULL if the queue is empty.
+ */
+static inline struct data_vio *dequeue_lock_waiter(struct hash_lock *lock)
+{
+ return vdo_waiter_as_data_vio(vdo_waitq_dequeue_waiter(&lock->waiters));
+}
+
+/**
+ * set_hash_lock() - Set, change, or clear the hash lock a data_vio is using.
+ * @data_vio: The data_vio to update.
+ * @new_lock: The hash lock the data_vio is joining.
+ *
+ * Updates the hash lock (or locks) to reflect the change in membership.
+ */
+static void set_hash_lock(struct data_vio *data_vio, struct hash_lock *new_lock)
+{
+ struct hash_lock *old_lock = data_vio->hash_lock;
+
+ if (old_lock != NULL) {
+ VDO_ASSERT_LOG_ONLY(data_vio->hash_zone != NULL,
+ "must have a hash zone when holding a hash lock");
+ VDO_ASSERT_LOG_ONLY(!list_empty(&data_vio->hash_lock_entry),
+ "must be on a hash lock ring when holding a hash lock");
+ VDO_ASSERT_LOG_ONLY(old_lock->reference_count > 0,
+ "hash lock reference must be counted");
+
+ if ((old_lock->state != VDO_HASH_LOCK_BYPASSING) &&
+ (old_lock->state != VDO_HASH_LOCK_UNLOCKING)) {
+ /*
+ * If the reference count goes to zero in a non-terminal state, we're most
+ * likely leaking this lock.
+ */
+ VDO_ASSERT_LOG_ONLY(old_lock->reference_count > 1,
+ "hash locks should only become unreferenced in a terminal state, not state %s",
+ get_hash_lock_state_name(old_lock->state));
+ }
+
+ list_del_init(&data_vio->hash_lock_entry);
+ old_lock->reference_count -= 1;
+
+ data_vio->hash_lock = NULL;
+ }
+
+ if (new_lock != NULL) {
+ /*
+ * Keep all data_vios sharing the lock on a ring since they can complete in any
+ * order and we'll always need a pointer to one to compare data.
+ */
+ list_move_tail(&data_vio->hash_lock_entry, &new_lock->duplicate_ring);
+ new_lock->reference_count += 1;
+ if (new_lock->max_references < new_lock->reference_count)
+ new_lock->max_references = new_lock->reference_count;
+
+ data_vio->hash_lock = new_lock;
+ }
+}
+
+/* There are loops in the state diagram, so some forward decl's are needed. */
+static void start_deduping(struct hash_lock *lock, struct data_vio *agent,
+ bool agent_is_done);
+static void start_locking(struct hash_lock *lock, struct data_vio *agent);
+static void start_writing(struct hash_lock *lock, struct data_vio *agent);
+static void unlock_duplicate_pbn(struct vdo_completion *completion);
+static void transfer_allocation_lock(struct data_vio *data_vio);
+
+/**
+ * exit_hash_lock() - Bottleneck for data_vios that have written or deduplicated and that are no
+ * longer needed to be an agent for the hash lock.
+ * @data_vio: The data_vio to complete and send to be cleaned up.
+ */
+static void exit_hash_lock(struct data_vio *data_vio)
+{
+ /* Release the hash lock now, saving a thread transition in cleanup. */
+ vdo_release_hash_lock(data_vio);
+
+ /* Complete the data_vio and start the clean-up path to release any locks it still holds. */
+ data_vio->vio.completion.callback = complete_data_vio;
+
+ continue_data_vio(data_vio);
+}
+
+/**
+ * set_duplicate_location() - Set the location of the duplicate block for data_vio, updating the
+ * is_duplicate and duplicate fields from a zoned_pbn.
+ * @data_vio: The data_vio to modify.
+ * @source: The location of the duplicate.
+ */
+static void set_duplicate_location(struct data_vio *data_vio,
+ const struct zoned_pbn source)
+{
+ data_vio->is_duplicate = (source.pbn != VDO_ZERO_BLOCK);
+ data_vio->duplicate = source;
+}
+
+/**
+ * retire_lock_agent() - Retire the active lock agent, replacing it with the first lock waiter, and
+ * make the retired agent exit the hash lock.
+ * @lock: The hash lock to update.
+ *
+ * Return: The new lock agent (which will be NULL if there was no waiter)
+ */
+static struct data_vio *retire_lock_agent(struct hash_lock *lock)
+{
+ struct data_vio *old_agent = lock->agent;
+ struct data_vio *new_agent = dequeue_lock_waiter(lock);
+
+ lock->agent = new_agent;
+ exit_hash_lock(old_agent);
+ if (new_agent != NULL)
+ set_duplicate_location(new_agent, lock->duplicate);
+ return new_agent;
+}
+
+/**
+ * wait_on_hash_lock() - Add a data_vio to the lock's queue of waiters.
+ * @lock: The hash lock on which to wait.
+ * @data_vio: The data_vio to add to the queue.
+ */
+static void wait_on_hash_lock(struct hash_lock *lock, struct data_vio *data_vio)
+{
+ vdo_waitq_enqueue_waiter(&lock->waiters, &data_vio->waiter);
+
+ /*
+ * Make sure the agent doesn't block indefinitely in the packer since it now has at least
+ * one other data_vio waiting on it.
+ */
+ if ((lock->state != VDO_HASH_LOCK_WRITING) || !cancel_data_vio_compression(lock->agent))
+ return;
+
+ /*
+ * Even though we're waiting, we also have to send ourselves as a one-way message to the
+ * packer to ensure the agent continues executing. This is safe because
+ * cancel_vio_compression() guarantees the agent won't continue executing until this
+ * message arrives in the packer, and because the wait queue link isn't used for sending
+ * the message.
+ */
+ data_vio->compression.lock_holder = lock->agent;
+ launch_data_vio_packer_callback(data_vio, vdo_remove_lock_holder_from_packer);
+}
+
+/**
+ * abort_waiter() - waiter_callback_fn function that shunts waiters to write their blocks without
+ * optimization.
+ * @waiter: The data_vio's waiter link.
+ * @context: Not used.
+ */
+static void abort_waiter(struct vdo_waiter *waiter, void *context __always_unused)
+{
+ write_data_vio(vdo_waiter_as_data_vio(waiter));
+}
+
+/**
+ * start_bypassing() - Stop using the hash lock.
+ * @lock: The hash lock.
+ * @agent: The data_vio acting as the agent for the lock.
+ *
+ * Stops using the hash lock. This is the final transition for hash locks which did not get an
+ * error.
+ */
+static void start_bypassing(struct hash_lock *lock, struct data_vio *agent)
+{
+ lock->state = VDO_HASH_LOCK_BYPASSING;
+ exit_hash_lock(agent);
+}
+
+void vdo_clean_failed_hash_lock(struct data_vio *data_vio)
+{
+ struct hash_lock *lock = data_vio->hash_lock;
+
+ if (lock->state == VDO_HASH_LOCK_BYPASSING) {
+ exit_hash_lock(data_vio);
+ return;
+ }
+
+ if (lock->agent == NULL) {
+ lock->agent = data_vio;
+ } else if (data_vio != lock->agent) {
+ exit_hash_lock(data_vio);
+ return;
+ }
+
+ lock->state = VDO_HASH_LOCK_BYPASSING;
+
+ /* Ensure we don't attempt to update advice when cleaning up. */
+ lock->update_advice = false;
+
+ vdo_waitq_notify_all_waiters(&lock->waiters, abort_waiter, NULL);
+
+ if (lock->duplicate_lock != NULL) {
+ /* The agent must reference the duplicate zone to launch it. */
+ data_vio->duplicate = lock->duplicate;
+ launch_data_vio_duplicate_zone_callback(data_vio, unlock_duplicate_pbn);
+ return;
+ }
+
+ lock->agent = NULL;
+ data_vio->is_duplicate = false;
+ exit_hash_lock(data_vio);
+}
+
+/**
+ * finish_unlocking() - Handle the result of the agent for the lock releasing a read lock on
+ * duplicate candidate.
+ * @completion: The completion of the data_vio acting as the lock's agent.
+ *
+ * This continuation is registered in unlock_duplicate_pbn().
+ */
+static void finish_unlocking(struct vdo_completion *completion)
+{
+ struct data_vio *agent = as_data_vio(completion);
+ struct hash_lock *lock = agent->hash_lock;
+
+ assert_hash_lock_agent(agent, __func__);
+
+ VDO_ASSERT_LOG_ONLY(lock->duplicate_lock == NULL,
+ "must have released the duplicate lock for the hash lock");
+
+ if (!lock->verified) {
+ /*
+ * UNLOCKING -> WRITING transition: The lock we released was on an unverified
+ * block, so it must have been a lock on advice we were verifying, not on a
+ * location that was used for deduplication. Go write (or compress) the block to
+ * get a location to dedupe against.
+ */
+ start_writing(lock, agent);
+ return;
+ }
+
+ /*
+ * With the lock released, the verified duplicate block may already have changed and will
+ * need to be re-verified if a waiter arrived.
+ */
+ lock->verified = false;
+
+ if (vdo_waitq_has_waiters(&lock->waiters)) {
+ /*
+ * UNLOCKING -> LOCKING transition: A new data_vio entered the hash lock while the
+ * agent was releasing the PBN lock. The current agent exits and the waiter has to
+ * re-lock and re-verify the duplicate location.
+ *
+ * TODO: If we used the current agent to re-acquire the PBN lock we wouldn't need
+ * to re-verify.
+ */
+ agent = retire_lock_agent(lock);
+ start_locking(lock, agent);
+ return;
+ }
+
+ /*
+ * UNLOCKING -> BYPASSING transition: The agent is done with the lock and no other
+ * data_vios reference it, so remove it from the lock map and return it to the pool.
+ */
+ start_bypassing(lock, agent);
+}
+
+/**
+ * unlock_duplicate_pbn() - Release a read lock on the PBN of the block that may or may not have
+ * contained duplicate data.
+ * @completion: The completion of the data_vio acting as the lock's agent.
+ *
+ * This continuation is launched by start_unlocking(), and calls back to finish_unlocking() on the
+ * hash zone thread.
+ */
+static void unlock_duplicate_pbn(struct vdo_completion *completion)
+{
+ struct data_vio *agent = as_data_vio(completion);
+ struct hash_lock *lock = agent->hash_lock;
+
+ assert_data_vio_in_duplicate_zone(agent);
+ VDO_ASSERT_LOG_ONLY(lock->duplicate_lock != NULL,
+ "must have a duplicate lock to release");
+
+ vdo_release_physical_zone_pbn_lock(agent->duplicate.zone, agent->duplicate.pbn,
+ vdo_forget(lock->duplicate_lock));
+ if (lock->state == VDO_HASH_LOCK_BYPASSING) {
+ complete_data_vio(completion);
+ return;
+ }
+
+ launch_data_vio_hash_zone_callback(agent, finish_unlocking);
+}
+
+/**
+ * start_unlocking() - Release a read lock on the PBN of the block that may or may not have
+ * contained duplicate data.
+ * @lock: The hash lock.
+ * @agent: The data_vio currently acting as the agent for the lock.
+ */
+static void start_unlocking(struct hash_lock *lock, struct data_vio *agent)
+{
+ lock->state = VDO_HASH_LOCK_UNLOCKING;
+ launch_data_vio_duplicate_zone_callback(agent, unlock_duplicate_pbn);
+}
+
+static void release_context(struct dedupe_context *context)
+{
+ struct hash_zone *zone = context->zone;
+
+ WRITE_ONCE(zone->active, zone->active - 1);
+ list_move(&context->list_entry, &zone->available);
+}
+
+static void process_update_result(struct data_vio *agent)
+{
+ struct dedupe_context *context = agent->dedupe_context;
+
+ if ((context == NULL) ||
+ !change_context_state(context, DEDUPE_CONTEXT_COMPLETE, DEDUPE_CONTEXT_IDLE))
+ return;
+
+ release_context(context);
+}
+
+/**
+ * finish_updating() - Process the result of a UDS update performed by the agent for the lock.
+ * @completion: The completion of the data_vio that performed the update
+ *
+ * This continuation is registered in start_querying().
+ */
+static void finish_updating(struct vdo_completion *completion)
+{
+ struct data_vio *agent = as_data_vio(completion);
+ struct hash_lock *lock = agent->hash_lock;
+
+ assert_hash_lock_agent(agent, __func__);
+
+ process_update_result(agent);
+
+ /*
+ * UDS was updated successfully, so don't update again unless the duplicate location
+ * changes due to rollover.
+ */
+ lock->update_advice = false;
+
+ if (vdo_waitq_has_waiters(&lock->waiters)) {
+ /*
+ * UPDATING -> DEDUPING transition: A new data_vio arrived during the UDS update.
+ * Send it on the verified dedupe path. The agent is done with the lock, but the
+ * lock may still need to use it to clean up after rollover.
+ */
+ start_deduping(lock, agent, true);
+ return;
+ }
+
+ if (lock->duplicate_lock != NULL) {
+ /*
+ * UPDATING -> UNLOCKING transition: No one is waiting to dedupe, but we hold a
+ * duplicate PBN lock, so go release it.
+ */
+ start_unlocking(lock, agent);
+ return;
+ }
+
+ /*
+ * UPDATING -> BYPASSING transition: No one is waiting to dedupe and there's no lock to
+ * release.
+ */
+ start_bypassing(lock, agent);
+}
+
+static void query_index(struct data_vio *data_vio, enum uds_request_type operation);
+
+/**
+ * start_updating() - Continue deduplication with the last step, updating UDS with the location of
+ * the duplicate that should be returned as advice in the future.
+ * @lock: The hash lock.
+ * @agent: The data_vio currently acting as the agent for the lock.
+ */
+static void start_updating(struct hash_lock *lock, struct data_vio *agent)
+{
+ lock->state = VDO_HASH_LOCK_UPDATING;
+
+ VDO_ASSERT_LOG_ONLY(lock->verified, "new advice should have been verified");
+ VDO_ASSERT_LOG_ONLY(lock->update_advice, "should only update advice if needed");
+
+ agent->last_async_operation = VIO_ASYNC_OP_UPDATE_DEDUPE_INDEX;
+ set_data_vio_hash_zone_callback(agent, finish_updating);
+ query_index(agent, UDS_UPDATE);
+}
+
+/**
+ * finish_deduping() - Handle a data_vio that has finished deduplicating against the block locked
+ * by the hash lock.
+ * @lock: The hash lock.
+ * @data_vio: The lock holder that has finished deduplicating.
+ *
+ * If there are other data_vios still sharing the lock, this will just release the data_vio's share
+ * of the lock and finish processing the data_vio. If this is the last data_vio holding the lock,
+ * this makes the data_vio the lock agent and uses it to advance the state of the lock so it can
+ * eventually be released.
+ */
+static void finish_deduping(struct hash_lock *lock, struct data_vio *data_vio)
+{
+ struct data_vio *agent = data_vio;
+
+ VDO_ASSERT_LOG_ONLY(lock->agent == NULL, "shouldn't have an agent in DEDUPING");
+ VDO_ASSERT_LOG_ONLY(!vdo_waitq_has_waiters(&lock->waiters),
+ "shouldn't have any lock waiters in DEDUPING");
+
+ /* Just release the lock reference if other data_vios are still deduping. */
+ if (lock->reference_count > 1) {
+ exit_hash_lock(data_vio);
+ return;
+ }
+
+ /* The hash lock must have an agent for all other lock states. */
+ lock->agent = agent;
+ if (lock->update_advice) {
+ /*
+ * DEDUPING -> UPDATING transition: The location of the duplicate block changed
+ * since the initial UDS query because of compression, rollover, or because the
+ * query agent didn't have an allocation. The UDS update was delayed in case there
+ * was another change in location, but with only this data_vio using the hash lock,
+ * it's time to update the advice.
+ */
+ start_updating(lock, agent);
+ } else {
+ /*
+ * DEDUPING -> UNLOCKING transition: Release the PBN read lock on the duplicate
+ * location so the hash lock itself can be released (contingent on no new data_vios
+ * arriving in the lock before the agent returns).
+ */
+ start_unlocking(lock, agent);
+ }
+}
+
+/**
+ * acquire_lock() - Get the lock for a record name.
+ * @zone: The zone responsible for the hash.
+ * @hash: The hash to lock.
+ * @replace_lock: If non-NULL, the lock already registered for the hash which should be replaced by
+ * the new lock.
+ * @lock_ptr: A pointer to receive the hash lock.
+ *
+ * Gets the lock for the hash (record name) of the data in a data_vio, or if one does not exist (or
+ * if we are explicitly rolling over), initialize a new lock for the hash and register it in the
+ * zone. This must only be called in the correct thread for the zone.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int __must_check acquire_lock(struct hash_zone *zone,
+ const struct uds_record_name *hash,
+ struct hash_lock *replace_lock,
+ struct hash_lock **lock_ptr)
+{
+ struct hash_lock *lock, *new_lock;
+ int result;
+
+ /*
+ * Borrow and prepare a lock from the pool so we don't have to do two int_map accesses
+ * in the common case of no lock contention.
+ */
+ result = VDO_ASSERT(!list_empty(&zone->lock_pool),
+ "never need to wait for a free hash lock");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ new_lock = list_entry(zone->lock_pool.prev, struct hash_lock, pool_node);
+ list_del_init(&new_lock->pool_node);
+
+ /*
+ * Fill in the hash of the new lock so we can map it, since we have to use the hash as the
+ * map key.
+ */
+ new_lock->hash = *hash;
+
+ result = vdo_int_map_put(zone->hash_lock_map, hash_lock_key(new_lock),
+ new_lock, (replace_lock != NULL), (void **) &lock);
+ if (result != VDO_SUCCESS) {
+ return_hash_lock_to_pool(zone, vdo_forget(new_lock));
+ return result;
+ }
+
+ if (replace_lock != NULL) {
+ /* On mismatch put the old lock back and return a severe error */
+ VDO_ASSERT_LOG_ONLY(lock == replace_lock,
+ "old lock must have been in the lock map");
+ /* TODO: Check earlier and bail out? */
+ VDO_ASSERT_LOG_ONLY(replace_lock->registered,
+ "old lock must have been marked registered");
+ replace_lock->registered = false;
+ }
+
+ if (lock == replace_lock) {
+ lock = new_lock;
+ lock->registered = true;
+ } else {
+ /* There's already a lock for the hash, so we don't need the borrowed lock. */
+ return_hash_lock_to_pool(zone, vdo_forget(new_lock));
+ }
+
+ *lock_ptr = lock;
+ return VDO_SUCCESS;
+}
+
+/**
+ * enter_forked_lock() - Bind the data_vio to a new hash lock.
+ *
+ * Implements waiter_callback_fn. Binds the data_vio that was waiting to a new hash lock and waits
+ * on that lock.
+ */
+static void enter_forked_lock(struct vdo_waiter *waiter, void *context)
+{
+ struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter);
+ struct hash_lock *new_lock = context;
+
+ set_hash_lock(data_vio, new_lock);
+ wait_on_hash_lock(new_lock, data_vio);
+}
+
+/**
+ * fork_hash_lock() - Fork a hash lock because it has run out of increments on the duplicate PBN.
+ * @old_lock: The hash lock to fork.
+ * @new_agent: The data_vio that will be the agent for the new lock.
+ *
+ * Transfers the new agent and any lock waiters to a new hash lock instance which takes the place
+ * of the old lock in the lock map. The old lock remains active, but will not update advice.
+ */
+static void fork_hash_lock(struct hash_lock *old_lock, struct data_vio *new_agent)
+{
+ struct hash_lock *new_lock;
+ int result;
+
+ result = acquire_lock(new_agent->hash_zone, &new_agent->record_name, old_lock,
+ &new_lock);
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(new_agent, result);
+ return;
+ }
+
+ /*
+ * Only one of the two locks should update UDS. The old lock is out of references, so it
+ * would be poor dedupe advice in the short term.
+ */
+ old_lock->update_advice = false;
+ new_lock->update_advice = true;
+
+ set_hash_lock(new_agent, new_lock);
+ new_lock->agent = new_agent;
+
+ vdo_waitq_notify_all_waiters(&old_lock->waiters, enter_forked_lock, new_lock);
+
+ new_agent->is_duplicate = false;
+ start_writing(new_lock, new_agent);
+}
+
+/**
+ * launch_dedupe() - Reserve a reference count increment for a data_vio and launch it on the dedupe
+ * path.
+ * @lock: The hash lock.
+ * @data_vio: The data_vio to deduplicate using the hash lock.
+ * @has_claim: true if the data_vio already has claimed an increment from the duplicate lock.
+ *
+ * If no increments are available, this will roll over to a new hash lock and launch the data_vio
+ * as the writing agent for that lock.
+ */
+static void launch_dedupe(struct hash_lock *lock, struct data_vio *data_vio,
+ bool has_claim)
+{
+ if (!has_claim && !vdo_claim_pbn_lock_increment(lock->duplicate_lock)) {
+ /* Out of increments, so must roll over to a new lock. */
+ fork_hash_lock(lock, data_vio);
+ return;
+ }
+
+ /* Deduplicate against the lock's verified location. */
+ set_duplicate_location(data_vio, lock->duplicate);
+ data_vio->new_mapped = data_vio->duplicate;
+ update_metadata_for_data_vio_write(data_vio, lock->duplicate_lock);
+}
+
+/**
+ * start_deduping() - Enter the hash lock state where data_vios deduplicate in parallel against a
+ * true copy of their data on disk.
+ * @lock: The hash lock.
+ * @agent: The data_vio acting as the agent for the lock.
+ * @agent_is_done: true only if the agent has already written or deduplicated against its data.
+ *
+ * If the agent itself needs to deduplicate, an increment for it must already have been claimed
+ * from the duplicate lock, ensuring the hash lock will still have a data_vio holding it.
+ */
+static void start_deduping(struct hash_lock *lock, struct data_vio *agent,
+ bool agent_is_done)
+{
+ lock->state = VDO_HASH_LOCK_DEDUPING;
+
+ /*
+ * We don't take the downgraded allocation lock from the agent unless we actually need to
+ * deduplicate against it.
+ */
+ if (lock->duplicate_lock == NULL) {
+ VDO_ASSERT_LOG_ONLY(!vdo_is_state_compressed(agent->new_mapped.state),
+ "compression must have shared a lock");
+ VDO_ASSERT_LOG_ONLY(agent_is_done,
+ "agent must have written the new duplicate");
+ transfer_allocation_lock(agent);
+ }
+
+ VDO_ASSERT_LOG_ONLY(vdo_is_pbn_read_lock(lock->duplicate_lock),
+ "duplicate_lock must be a PBN read lock");
+
+ /*
+ * This state is not like any of the other states. There is no designated agent--the agent
+ * transitioning to this state and all the waiters will be launched to deduplicate in
+ * parallel.
+ */
+ lock->agent = NULL;
+
+ /*
+ * Launch the agent (if not already deduplicated) and as many lock waiters as we have
+ * available increments for on the dedupe path. If we run out of increments, rollover will
+ * be triggered and the remaining waiters will be transferred to the new lock.
+ */
+ if (!agent_is_done) {
+ launch_dedupe(lock, agent, true);
+ agent = NULL;
+ }
+ while (vdo_waitq_has_waiters(&lock->waiters))
+ launch_dedupe(lock, dequeue_lock_waiter(lock), false);
+
+ if (agent_is_done) {
+ /*
+ * In the degenerate case where all the waiters rolled over to a new lock, this
+ * will continue to use the old agent to clean up this lock, and otherwise it just
+ * lets the agent exit the lock.
+ */
+ finish_deduping(lock, agent);
+ }
+}
+
+/**
+ * increment_stat() - Increment a statistic counter in a non-atomic yet thread-safe manner.
+ * @stat: The statistic field to increment.
+ */
+static inline void increment_stat(u64 *stat)
+{
+ /*
+ * Must only be mutated on the hash zone thread. Prevents any compiler shenanigans from
+ * affecting other threads reading stats.
+ */
+ WRITE_ONCE(*stat, *stat + 1);
+}
+
+/**
+ * finish_verifying() - Handle the result of the agent for the lock comparing its data to the
+ * duplicate candidate.
+ * @completion: The completion of the data_vio used to verify dedupe
+ *
+ * This continuation is registered in start_verifying().
+ */
+static void finish_verifying(struct vdo_completion *completion)
+{
+ struct data_vio *agent = as_data_vio(completion);
+ struct hash_lock *lock = agent->hash_lock;
+
+ assert_hash_lock_agent(agent, __func__);
+
+ lock->verified = agent->is_duplicate;
+
+ /*
+ * Only count the result of the initial verification of the advice as valid or stale, and
+ * not any re-verifications due to PBN lock releases.
+ */
+ if (!lock->verify_counted) {
+ lock->verify_counted = true;
+ if (lock->verified)
+ increment_stat(&agent->hash_zone->statistics.dedupe_advice_valid);
+ else
+ increment_stat(&agent->hash_zone->statistics.dedupe_advice_stale);
+ }
+
+ /*
+ * Even if the block is a verified duplicate, we can't start to deduplicate unless we can
+ * claim a reference count increment for the agent.
+ */
+ if (lock->verified && !vdo_claim_pbn_lock_increment(lock->duplicate_lock)) {
+ agent->is_duplicate = false;
+ lock->verified = false;
+ }
+
+ if (lock->verified) {
+ /*
+ * VERIFYING -> DEDUPING transition: The advice is for a true duplicate, so start
+ * deduplicating against it, if references are available.
+ */
+ start_deduping(lock, agent, false);
+ } else {
+ /*
+ * VERIFYING -> UNLOCKING transition: Either the verify failed or we'd try to
+ * dedupe and roll over immediately, which would fail because it would leave the
+ * lock without an agent to release the PBN lock. In both cases, the data will have
+ * to be written or compressed, but first the advice PBN must be unlocked by the
+ * VERIFYING agent.
+ */
+ lock->update_advice = true;
+ start_unlocking(lock, agent);
+ }
+}
+
+static bool blocks_equal(char *block1, char *block2)
+{
+ int i;
+
+ for (i = 0; i < VDO_BLOCK_SIZE; i += sizeof(u64)) {
+ if (*((u64 *) &block1[i]) != *((u64 *) &block2[i]))
+ return false;
+ }
+
+ return true;
+}
+
+static void verify_callback(struct vdo_completion *completion)
+{
+ struct data_vio *agent = as_data_vio(completion);
+
+ agent->is_duplicate = blocks_equal(agent->vio.data, agent->scratch_block);
+ launch_data_vio_hash_zone_callback(agent, finish_verifying);
+}
+
+static void uncompress_and_verify(struct vdo_completion *completion)
+{
+ struct data_vio *agent = as_data_vio(completion);
+ int result;
+
+ result = uncompress_data_vio(agent, agent->duplicate.state,
+ agent->scratch_block);
+ if (result == VDO_SUCCESS) {
+ verify_callback(completion);
+ return;
+ }
+
+ agent->is_duplicate = false;
+ launch_data_vio_hash_zone_callback(agent, finish_verifying);
+}
+
+static void verify_endio(struct bio *bio)
+{
+ struct data_vio *agent = vio_as_data_vio(bio->bi_private);
+ int result = blk_status_to_errno(bio->bi_status);
+
+ vdo_count_completed_bios(bio);
+ if (result != VDO_SUCCESS) {
+ agent->is_duplicate = false;
+ launch_data_vio_hash_zone_callback(agent, finish_verifying);
+ return;
+ }
+
+ if (vdo_is_state_compressed(agent->duplicate.state)) {
+ launch_data_vio_cpu_callback(agent, uncompress_and_verify,
+ CPU_Q_COMPRESS_BLOCK_PRIORITY);
+ return;
+ }
+
+ launch_data_vio_cpu_callback(agent, verify_callback,
+ CPU_Q_COMPLETE_READ_PRIORITY);
+}
+
+/**
+ * start_verifying() - Begin the data verification phase.
+ * @lock: The hash lock (must be LOCKING).
+ * @agent: The data_vio to use to read and compare candidate data.
+ *
+ * Continue the deduplication path for a hash lock by using the agent to read (and possibly
+ * decompress) the data at the candidate duplicate location, comparing it to the data in the agent
+ * to verify that the candidate is identical to all the data_vios sharing the hash. If so, it can
+ * be deduplicated against, otherwise a data_vio allocation will have to be written to and used for
+ * dedupe.
+ */
+static void start_verifying(struct hash_lock *lock, struct data_vio *agent)
+{
+ int result;
+ struct vio *vio = &agent->vio;
+ char *buffer = (vdo_is_state_compressed(agent->duplicate.state) ?
+ (char *) agent->compression.block :
+ agent->scratch_block);
+
+ lock->state = VDO_HASH_LOCK_VERIFYING;
+ VDO_ASSERT_LOG_ONLY(!lock->verified, "hash lock only verifies advice once");
+
+ agent->last_async_operation = VIO_ASYNC_OP_VERIFY_DUPLICATION;
+ result = vio_reset_bio(vio, buffer, verify_endio, REQ_OP_READ,
+ agent->duplicate.pbn);
+ if (result != VDO_SUCCESS) {
+ set_data_vio_hash_zone_callback(agent, finish_verifying);
+ continue_data_vio_with_error(agent, result);
+ return;
+ }
+
+ set_data_vio_bio_zone_callback(agent, vdo_submit_vio);
+ vdo_launch_completion_with_priority(&vio->completion, BIO_Q_VERIFY_PRIORITY);
+}
+
+/**
+ * finish_locking() - Handle the result of the agent for the lock attempting to obtain a PBN read
+ * lock on the candidate duplicate block.
+ * @completion: The completion of the data_vio that attempted to get the read lock.
+ *
+ * This continuation is registered in lock_duplicate_pbn().
+ */
+static void finish_locking(struct vdo_completion *completion)
+{
+ struct data_vio *agent = as_data_vio(completion);
+ struct hash_lock *lock = agent->hash_lock;
+
+ assert_hash_lock_agent(agent, __func__);
+
+ if (!agent->is_duplicate) {
+ VDO_ASSERT_LOG_ONLY(lock->duplicate_lock == NULL,
+ "must not hold duplicate_lock if not flagged as a duplicate");
+ /*
+ * LOCKING -> WRITING transition: The advice block is being modified or has no
+ * available references, so try to write or compress the data, remembering to
+ * update UDS later with the new advice.
+ */
+ increment_stat(&agent->hash_zone->statistics.dedupe_advice_stale);
+ lock->update_advice = true;
+ start_writing(lock, agent);
+ return;
+ }
+
+ VDO_ASSERT_LOG_ONLY(lock->duplicate_lock != NULL,
+ "must hold duplicate_lock if flagged as a duplicate");
+
+ if (!lock->verified) {
+ /*
+ * LOCKING -> VERIFYING transition: Continue on the unverified dedupe path, reading
+ * the candidate duplicate and comparing it to the agent's data to decide whether
+ * it is a true duplicate or stale advice.
+ */
+ start_verifying(lock, agent);
+ return;
+ }
+
+ if (!vdo_claim_pbn_lock_increment(lock->duplicate_lock)) {
+ /*
+ * LOCKING -> UNLOCKING transition: The verified block was re-locked, but has no
+ * available increments left. Must first release the useless PBN read lock before
+ * rolling over to a new copy of the block.
+ */
+ agent->is_duplicate = false;
+ lock->verified = false;
+ lock->update_advice = true;
+ start_unlocking(lock, agent);
+ return;
+ }
+
+ /*
+ * LOCKING -> DEDUPING transition: Continue on the verified dedupe path, deduplicating
+ * against a location that was previously verified or written to.
+ */
+ start_deduping(lock, agent, false);
+}
+
+static bool acquire_provisional_reference(struct data_vio *agent, struct pbn_lock *lock,
+ struct slab_depot *depot)
+{
+ /* Ensure that the newly-locked block is referenced. */
+ struct vdo_slab *slab = vdo_get_slab(depot, agent->duplicate.pbn);
+ int result = vdo_acquire_provisional_reference(slab, agent->duplicate.pbn, lock);
+
+ if (result == VDO_SUCCESS)
+ return true;
+
+ vdo_log_warning_strerror(result,
+ "Error acquiring provisional reference for dedupe candidate; aborting dedupe");
+ agent->is_duplicate = false;
+ vdo_release_physical_zone_pbn_lock(agent->duplicate.zone,
+ agent->duplicate.pbn, lock);
+ continue_data_vio_with_error(agent, result);
+ return false;
+}
+
+/**
+ * lock_duplicate_pbn() - Acquire a read lock on the PBN of the block containing candidate
+ * duplicate data (compressed or uncompressed).
+ * @completion: The completion of the data_vio attempting to acquire the physical block lock on
+ * behalf of its hash lock.
+ *
+ * If the PBN is already locked for writing, the lock attempt is abandoned and is_duplicate will be
+ * cleared before calling back. This continuation is launched from start_locking(), and calls back
+ * to finish_locking() on the hash zone thread.
+ */
+static void lock_duplicate_pbn(struct vdo_completion *completion)
+{
+ unsigned int increment_limit;
+ struct pbn_lock *lock;
+ int result;
+
+ struct data_vio *agent = as_data_vio(completion);
+ struct slab_depot *depot = vdo_from_data_vio(agent)->depot;
+ struct physical_zone *zone = agent->duplicate.zone;
+
+ assert_data_vio_in_duplicate_zone(agent);
+
+ set_data_vio_hash_zone_callback(agent, finish_locking);
+
+ /*
+ * While in the zone that owns it, find out how many additional references can be made to
+ * the block if it turns out to truly be a duplicate.
+ */
+ increment_limit = vdo_get_increment_limit(depot, agent->duplicate.pbn);
+ if (increment_limit == 0) {
+ /*
+ * We could deduplicate against it later if a reference happened to be released
+ * during verification, but it's probably better to bail out now.
+ */
+ agent->is_duplicate = false;
+ continue_data_vio(agent);
+ return;
+ }
+
+ result = vdo_attempt_physical_zone_pbn_lock(zone, agent->duplicate.pbn,
+ VIO_READ_LOCK, &lock);
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(agent, result);
+ return;
+ }
+
+ if (!vdo_is_pbn_read_lock(lock)) {
+ /*
+ * There are three cases of write locks: uncompressed data block writes, compressed
+ * (packed) block writes, and block map page writes. In all three cases, we give up
+ * on trying to verify the advice and don't bother to try deduplicate against the
+ * data in the write lock holder.
+ *
+ * 1) We don't ever want to try to deduplicate against a block map page.
+ *
+ * 2a) It's very unlikely we'd deduplicate against an entire packed block, both
+ * because of the chance of matching it, and because we don't record advice for it,
+ * but for the uncompressed representation of all the fragments it contains. The
+ * only way we'd be getting lock contention is if we've written the same
+ * representation coincidentally before, had it become unreferenced, and it just
+ * happened to be packed together from compressed writes when we go to verify the
+ * lucky advice. Giving up is a minuscule loss of potential dedupe.
+ *
+ * 2b) If the advice is for a slot of a compressed block, it's about to get
+ * smashed, and the write smashing it cannot contain our data--it would have to be
+ * writing on behalf of our hash lock, but that's impossible since we're the lock
+ * agent.
+ *
+ * 3a) If the lock is held by a data_vio with different data, the advice is already
+ * stale or is about to become stale.
+ *
+ * 3b) If the lock is held by a data_vio that matches us, we may as well either
+ * write it ourselves (or reference the copy we already wrote) instead of
+ * potentially having many duplicates wait for the lock holder to write, journal,
+ * hash, and finally arrive in the hash lock. We lose a chance to avoid a UDS
+ * update in the very rare case of advice for a free block that just happened to be
+ * allocated to a data_vio with the same hash. There's also a chance to save on a
+ * block write, at the cost of a block verify. Saving on a full block compare in
+ * all stale advice cases almost certainly outweighs saving a UDS update and
+ * trading a write for a read in a lucky case where advice would have been saved
+ * from becoming stale.
+ */
+ agent->is_duplicate = false;
+ continue_data_vio(agent);
+ return;
+ }
+
+ if (lock->holder_count == 0) {
+ if (!acquire_provisional_reference(agent, lock, depot))
+ return;
+
+ /*
+ * The increment limit we grabbed earlier is still valid. The lock now holds the
+ * rights to acquire all those references. Those rights will be claimed by hash
+ * locks sharing this read lock.
+ */
+ lock->increment_limit = increment_limit;
+ }
+
+ /*
+ * We've successfully acquired a read lock on behalf of the hash lock, so mark it as such.
+ */
+ set_duplicate_lock(agent->hash_lock, lock);
+
+ /*
+ * TODO: Optimization: We could directly launch the block verify, then switch to a hash
+ * thread.
+ */
+ continue_data_vio(agent);
+}
+
+/**
+ * start_locking() - Continue deduplication for a hash lock that has obtained valid advice of a
+ * potential duplicate through its agent.
+ * @lock: The hash lock (currently must be QUERYING).
+ * @agent: The data_vio bearing the dedupe advice.
+ */
+static void start_locking(struct hash_lock *lock, struct data_vio *agent)
+{
+ VDO_ASSERT_LOG_ONLY(lock->duplicate_lock == NULL,
+ "must not acquire a duplicate lock when already holding it");
+
+ lock->state = VDO_HASH_LOCK_LOCKING;
+
+ /*
+ * TODO: Optimization: If we arrange to continue on the duplicate zone thread when
+ * accepting the advice, and don't explicitly change lock states (or use an agent-local
+ * state, or an atomic), we can avoid a thread transition here.
+ */
+ agent->last_async_operation = VIO_ASYNC_OP_LOCK_DUPLICATE_PBN;
+ launch_data_vio_duplicate_zone_callback(agent, lock_duplicate_pbn);
+}
+
+/**
+ * finish_writing() - Re-entry point for the lock agent after it has finished writing or
+ * compressing its copy of the data block.
+ * @lock: The hash lock, which must be in state WRITING.
+ * @agent: The data_vio that wrote its data for the lock.
+ *
+ * The agent will never need to dedupe against anything, so it's done with the lock, but the lock
+ * may not be finished with it, as a UDS update might still be needed.
+ *
+ * If there are other lock holders, the agent will hand the job to one of them and exit, leaving
+ * the lock to deduplicate against the just-written block. If there are no other lock holders, the
+ * agent either exits (and later tears down the hash lock), or it remains the agent and updates
+ * UDS.
+ */
+static void finish_writing(struct hash_lock *lock, struct data_vio *agent)
+{
+ /*
+ * Dedupe against the data block or compressed block slot the agent wrote. Since we know
+ * the write succeeded, there's no need to verify it.
+ */
+ lock->duplicate = agent->new_mapped;
+ lock->verified = true;
+
+ if (vdo_is_state_compressed(lock->duplicate.state) && lock->registered) {
+ /*
+ * Compression means the location we gave in the UDS query is not the location
+ * we're using to deduplicate.
+ */
+ lock->update_advice = true;
+ }
+
+ /* If there are any waiters, we need to start deduping them. */
+ if (vdo_waitq_has_waiters(&lock->waiters)) {
+ /*
+ * WRITING -> DEDUPING transition: an asynchronously-written block failed to
+ * compress, so the PBN lock on the written copy was already transferred. The agent
+ * is done with the lock, but the lock may still need to use it to clean up after
+ * rollover.
+ */
+ start_deduping(lock, agent, true);
+ return;
+ }
+
+ /*
+ * There are no waiters and the agent has successfully written, so take a step towards
+ * being able to release the hash lock (or just release it).
+ */
+ if (lock->update_advice) {
+ /*
+ * WRITING -> UPDATING transition: There's no waiter and a UDS update is needed, so
+ * retain the WRITING agent and use it to launch the update. The happens on
+ * compression, rollover, or the QUERYING agent not having an allocation.
+ */
+ start_updating(lock, agent);
+ } else if (lock->duplicate_lock != NULL) {
+ /*
+ * WRITING -> UNLOCKING transition: There's no waiter and no update needed, but the
+ * compressed write gave us a shared duplicate lock that we must release.
+ */
+ set_duplicate_location(agent, lock->duplicate);
+ start_unlocking(lock, agent);
+ } else {
+ /*
+ * WRITING -> BYPASSING transition: There's no waiter, no update needed, and no
+ * duplicate lock held, so both the agent and lock have no more work to do. The
+ * agent will release its allocation lock in cleanup.
+ */
+ start_bypassing(lock, agent);
+ }
+}
+
+/**
+ * select_writing_agent() - Search through the lock waiters for a data_vio that has an allocation.
+ * @lock: The hash lock to modify.
+ *
+ * If an allocation is found, swap agents, put the old agent at the head of the wait queue, then
+ * return the new agent. Otherwise, just return the current agent.
+ */
+static struct data_vio *select_writing_agent(struct hash_lock *lock)
+{
+ struct vdo_wait_queue temp_queue;
+ struct data_vio *data_vio;
+
+ vdo_waitq_init(&temp_queue);
+
+ /*
+ * Move waiters to the temp queue one-by-one until we find an allocation. Not ideal to
+ * search, but it only happens when nearly out of space.
+ */
+ while (((data_vio = dequeue_lock_waiter(lock)) != NULL) &&
+ !data_vio_has_allocation(data_vio)) {
+ /* Use the lower-level enqueue since we're just moving waiters around. */
+ vdo_waitq_enqueue_waiter(&temp_queue, &data_vio->waiter);
+ }
+
+ if (data_vio != NULL) {
+ /*
+ * Move the rest of the waiters over to the temp queue, preserving the order they
+ * arrived at the lock.
+ */
+ vdo_waitq_transfer_all_waiters(&lock->waiters, &temp_queue);
+
+ /*
+ * The current agent is being replaced and will have to wait to dedupe; make it the
+ * first waiter since it was the first to reach the lock.
+ */
+ vdo_waitq_enqueue_waiter(&lock->waiters, &lock->agent->waiter);
+ lock->agent = data_vio;
+ } else {
+ /* No one has an allocation, so keep the current agent. */
+ data_vio = lock->agent;
+ }
+
+ /* Swap all the waiters back onto the lock's queue. */
+ vdo_waitq_transfer_all_waiters(&temp_queue, &lock->waiters);
+ return data_vio;
+}
+
+/**
+ * start_writing() - Begin the non-duplicate write path.
+ * @lock: The hash lock (currently must be QUERYING).
+ * @agent: The data_vio currently acting as the agent for the lock.
+ *
+ * Begins the non-duplicate write path for a hash lock that had no advice, selecting a data_vio
+ * with an allocation as a new agent, if necessary, then resuming the agent on the data_vio write
+ * path.
+ */
+static void start_writing(struct hash_lock *lock, struct data_vio *agent)
+{
+ lock->state = VDO_HASH_LOCK_WRITING;
+
+ /*
+ * The agent might not have received an allocation and so can't be used for writing, but
+ * it's entirely possible that one of the waiters did.
+ */
+ if (!data_vio_has_allocation(agent)) {
+ agent = select_writing_agent(lock);
+ /* If none of the waiters had an allocation, the writes all have to fail. */
+ if (!data_vio_has_allocation(agent)) {
+ /*
+ * TODO: Should we keep a variant of BYPASSING that causes new arrivals to
+ * fail immediately if they don't have an allocation? It might be possible
+ * that on some path there would be non-waiters still referencing the lock,
+ * so it would remain in the map as everything is currently spelled, even
+ * if the agent and all waiters release.
+ */
+ continue_data_vio_with_error(agent, VDO_NO_SPACE);
+ return;
+ }
+ }
+
+ /*
+ * If the agent compresses, it might wait indefinitely in the packer, which would be bad if
+ * there are any other data_vios waiting.
+ */
+ if (vdo_waitq_has_waiters(&lock->waiters))
+ cancel_data_vio_compression(agent);
+
+ /*
+ * Send the agent to the compress/pack/write path in vioWrite. If it succeeds, it will
+ * return to the hash lock via vdo_continue_hash_lock() and call finish_writing().
+ */
+ launch_compress_data_vio(agent);
+}
+
+/*
+ * Decode VDO duplicate advice from the old_metadata field of a UDS request.
+ * Returns true if valid advice was found and decoded
+ */
+static bool decode_uds_advice(struct dedupe_context *context)
+{
+ const struct uds_request *request = &context->request;
+ struct data_vio *data_vio = context->requestor;
+ size_t offset = 0;
+ const struct uds_record_data *encoding = &request->old_metadata;
+ struct vdo *vdo = vdo_from_data_vio(data_vio);
+ struct zoned_pbn *advice = &data_vio->duplicate;
+ u8 version;
+ int result;
+
+ if ((request->status != UDS_SUCCESS) || !request->found)
+ return false;
+
+ version = encoding->data[offset++];
+ if (version != UDS_ADVICE_VERSION) {
+ vdo_log_error("invalid UDS advice version code %u", version);
+ return false;
+ }
+
+ advice->state = encoding->data[offset++];
+ advice->pbn = get_unaligned_le64(&encoding->data[offset]);
+ offset += sizeof(u64);
+ BUG_ON(offset != UDS_ADVICE_SIZE);
+
+ /* Don't use advice that's clearly meaningless. */
+ if ((advice->state == VDO_MAPPING_STATE_UNMAPPED) || (advice->pbn == VDO_ZERO_BLOCK)) {
+ vdo_log_debug("Invalid advice from deduplication server: pbn %llu, state %u. Giving up on deduplication of logical block %llu",
+ (unsigned long long) advice->pbn, advice->state,
+ (unsigned long long) data_vio->logical.lbn);
+ atomic64_inc(&vdo->stats.invalid_advice_pbn_count);
+ return false;
+ }
+
+ result = vdo_get_physical_zone(vdo, advice->pbn, &advice->zone);
+ if ((result != VDO_SUCCESS) || (advice->zone == NULL)) {
+ vdo_log_debug("Invalid physical block number from deduplication server: %llu, giving up on deduplication of logical block %llu",
+ (unsigned long long) advice->pbn,
+ (unsigned long long) data_vio->logical.lbn);
+ atomic64_inc(&vdo->stats.invalid_advice_pbn_count);
+ return false;
+ }
+
+ return true;
+}
+
+static void process_query_result(struct data_vio *agent)
+{
+ struct dedupe_context *context = agent->dedupe_context;
+
+ if (context == NULL)
+ return;
+
+ if (change_context_state(context, DEDUPE_CONTEXT_COMPLETE, DEDUPE_CONTEXT_IDLE)) {
+ agent->is_duplicate = decode_uds_advice(context);
+ release_context(context);
+ }
+}
+
+/**
+ * finish_querying() - Process the result of a UDS query performed by the agent for the lock.
+ * @completion: The completion of the data_vio that performed the query.
+ *
+ * This continuation is registered in start_querying().
+ */
+static void finish_querying(struct vdo_completion *completion)
+{
+ struct data_vio *agent = as_data_vio(completion);
+ struct hash_lock *lock = agent->hash_lock;
+
+ assert_hash_lock_agent(agent, __func__);
+
+ process_query_result(agent);
+
+ if (agent->is_duplicate) {
+ lock->duplicate = agent->duplicate;
+ /*
+ * QUERYING -> LOCKING transition: Valid advice was obtained from UDS. Use the
+ * QUERYING agent to start the hash lock on the unverified dedupe path, verifying
+ * that the advice can be used.
+ */
+ start_locking(lock, agent);
+ } else {
+ /*
+ * The agent will be used as the duplicate if has an allocation; if it does, that
+ * location was posted to UDS, so no update will be needed.
+ */
+ lock->update_advice = !data_vio_has_allocation(agent);
+ /*
+ * QUERYING -> WRITING transition: There was no advice or the advice wasn't valid,
+ * so try to write or compress the data.
+ */
+ start_writing(lock, agent);
+ }
+}
+
+/**
+ * start_querying() - Start deduplication for a hash lock.
+ * @lock: The initialized hash lock.
+ * @data_vio: The data_vio that has just obtained the new lock.
+ *
+ * Starts deduplication for a hash lock that has finished initializing by making the data_vio that
+ * requested it the agent, entering the QUERYING state, and using the agent to perform the UDS
+ * query on behalf of the lock.
+ */
+static void start_querying(struct hash_lock *lock, struct data_vio *data_vio)
+{
+ lock->agent = data_vio;
+ lock->state = VDO_HASH_LOCK_QUERYING;
+ data_vio->last_async_operation = VIO_ASYNC_OP_CHECK_FOR_DUPLICATION;
+ set_data_vio_hash_zone_callback(data_vio, finish_querying);
+ query_index(data_vio,
+ (data_vio_has_allocation(data_vio) ? UDS_POST : UDS_QUERY));
+}
+
+/**
+ * report_bogus_lock_state() - Complain that a data_vio has entered a hash_lock that is in an
+ * unimplemented or unusable state and continue the data_vio with an
+ * error.
+ * @lock: The hash lock.
+ * @data_vio: The data_vio attempting to enter the lock.
+ */
+static void report_bogus_lock_state(struct hash_lock *lock, struct data_vio *data_vio)
+{
+ VDO_ASSERT_LOG_ONLY(false, "hash lock must not be in unimplemented state %s",
+ get_hash_lock_state_name(lock->state));
+ continue_data_vio_with_error(data_vio, VDO_LOCK_ERROR);
+}
+
+/**
+ * vdo_continue_hash_lock() - Continue the processing state after writing, compressing, or
+ * deduplicating.
+ * @data_vio: The data_vio to continue processing in its hash lock.
+ *
+ * Asynchronously continue processing a data_vio in its hash lock after it has finished writing,
+ * compressing, or deduplicating, so it can share the result with any data_vios waiting in the hash
+ * lock, or update the UDS index, or simply release its share of the lock.
+ *
+ * Context: This must only be called in the correct thread for the hash zone.
+ */
+void vdo_continue_hash_lock(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ struct hash_lock *lock = data_vio->hash_lock;
+
+ switch (lock->state) {
+ case VDO_HASH_LOCK_WRITING:
+ VDO_ASSERT_LOG_ONLY(data_vio == lock->agent,
+ "only the lock agent may continue the lock");
+ finish_writing(lock, data_vio);
+ break;
+
+ case VDO_HASH_LOCK_DEDUPING:
+ finish_deduping(lock, data_vio);
+ break;
+
+ case VDO_HASH_LOCK_BYPASSING:
+ /* This data_vio has finished the write path and the lock doesn't need it. */
+ exit_hash_lock(data_vio);
+ break;
+
+ case VDO_HASH_LOCK_INITIALIZING:
+ case VDO_HASH_LOCK_QUERYING:
+ case VDO_HASH_LOCK_UPDATING:
+ case VDO_HASH_LOCK_LOCKING:
+ case VDO_HASH_LOCK_VERIFYING:
+ case VDO_HASH_LOCK_UNLOCKING:
+ /* A lock in this state should never be re-entered. */
+ report_bogus_lock_state(lock, data_vio);
+ break;
+
+ default:
+ report_bogus_lock_state(lock, data_vio);
+ }
+}
+
+/**
+ * is_hash_collision() - Check to see if a hash collision has occurred.
+ * @lock: The lock to check.
+ * @candidate: The data_vio seeking to share the lock.
+ *
+ * Check whether the data in data_vios sharing a lock is different than in a data_vio seeking to
+ * share the lock, which should only be possible in the extremely unlikely case of a hash
+ * collision.
+ *
+ * Return: true if the given data_vio must not share the lock because it doesn't have the same data
+ * as the lock holders.
+ */
+static bool is_hash_collision(struct hash_lock *lock, struct data_vio *candidate)
+{
+ struct data_vio *lock_holder;
+ struct hash_zone *zone;
+ bool collides;
+
+ if (list_empty(&lock->duplicate_ring))
+ return false;
+
+ lock_holder = list_first_entry(&lock->duplicate_ring, struct data_vio,
+ hash_lock_entry);
+ zone = candidate->hash_zone;
+ collides = !blocks_equal(lock_holder->vio.data, candidate->vio.data);
+ if (collides)
+ increment_stat(&zone->statistics.concurrent_hash_collisions);
+ else
+ increment_stat(&zone->statistics.concurrent_data_matches);
+
+ return collides;
+}
+
+static inline int assert_hash_lock_preconditions(const struct data_vio *data_vio)
+{
+ int result;
+
+ /* FIXME: BUG_ON() and/or enter read-only mode? */
+ result = VDO_ASSERT(data_vio->hash_lock == NULL,
+ "must not already hold a hash lock");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(list_empty(&data_vio->hash_lock_entry),
+ "must not already be a member of a hash lock ring");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ return VDO_ASSERT(data_vio->recovery_sequence_number == 0,
+ "must not hold a recovery lock when getting a hash lock");
+}
+
+/**
+ * vdo_acquire_hash_lock() - Acquire or share a lock on a record name.
+ * @data_vio: The data_vio acquiring a lock on its record name.
+ *
+ * Acquire or share a lock on the hash (record name) of the data in a data_vio, updating the
+ * data_vio to reference the lock. This must only be called in the correct thread for the zone. In
+ * the unlikely case of a hash collision, this function will succeed, but the data_vio will not get
+ * a lock reference.
+ */
+void vdo_acquire_hash_lock(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ struct hash_lock *lock;
+ int result;
+
+ assert_data_vio_in_hash_zone(data_vio);
+
+ result = assert_hash_lock_preconditions(data_vio);
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(data_vio, result);
+ return;
+ }
+
+ result = acquire_lock(data_vio->hash_zone, &data_vio->record_name, NULL, &lock);
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(data_vio, result);
+ return;
+ }
+
+ if (is_hash_collision(lock, data_vio)) {
+ /*
+ * Hash collisions are extremely unlikely, but the bogus dedupe would be a data
+ * corruption. Bypass optimization entirely. We can't compress a data_vio without
+ * a hash_lock as the compressed write depends on the hash_lock to manage the
+ * references for the compressed block.
+ */
+ write_data_vio(data_vio);
+ return;
+ }
+
+ set_hash_lock(data_vio, lock);
+ switch (lock->state) {
+ case VDO_HASH_LOCK_INITIALIZING:
+ start_querying(lock, data_vio);
+ return;
+
+ case VDO_HASH_LOCK_QUERYING:
+ case VDO_HASH_LOCK_WRITING:
+ case VDO_HASH_LOCK_UPDATING:
+ case VDO_HASH_LOCK_LOCKING:
+ case VDO_HASH_LOCK_VERIFYING:
+ case VDO_HASH_LOCK_UNLOCKING:
+ /* The lock is busy, and can't be shared yet. */
+ wait_on_hash_lock(lock, data_vio);
+ return;
+
+ case VDO_HASH_LOCK_BYPASSING:
+ /* We can't use this lock, so bypass optimization entirely. */
+ vdo_release_hash_lock(data_vio);
+ write_data_vio(data_vio);
+ return;
+
+ case VDO_HASH_LOCK_DEDUPING:
+ launch_dedupe(lock, data_vio, false);
+ return;
+
+ default:
+ /* A lock in this state should not be acquired by new VIOs. */
+ report_bogus_lock_state(lock, data_vio);
+ }
+}
+
+/**
+ * vdo_release_hash_lock() - Release a data_vio's share of a hash lock, if held, and null out the
+ * data_vio's reference to it.
+ * @data_vio: The data_vio releasing its hash lock.
+ *
+ * If the data_vio is the only one holding the lock, this also releases any resources or locks used
+ * by the hash lock (such as a PBN read lock on a block containing data with the same hash) and
+ * returns the lock to the hash zone's lock pool.
+ *
+ * Context: This must only be called in the correct thread for the hash zone.
+ */
+void vdo_release_hash_lock(struct data_vio *data_vio)
+{
+ u64 lock_key;
+ struct hash_lock *lock = data_vio->hash_lock;
+ struct hash_zone *zone = data_vio->hash_zone;
+
+ if (lock == NULL)
+ return;
+
+ set_hash_lock(data_vio, NULL);
+
+ if (lock->reference_count > 0) {
+ /* The lock is still in use by other data_vios. */
+ return;
+ }
+
+ lock_key = hash_lock_key(lock);
+ if (lock->registered) {
+ struct hash_lock *removed;
+
+ removed = vdo_int_map_remove(zone->hash_lock_map, lock_key);
+ VDO_ASSERT_LOG_ONLY(lock == removed,
+ "hash lock being released must have been mapped");
+ } else {
+ VDO_ASSERT_LOG_ONLY(lock != vdo_int_map_get(zone->hash_lock_map, lock_key),
+ "unregistered hash lock must not be in the lock map");
+ }
+
+ VDO_ASSERT_LOG_ONLY(!vdo_waitq_has_waiters(&lock->waiters),
+ "hash lock returned to zone must have no waiters");
+ VDO_ASSERT_LOG_ONLY((lock->duplicate_lock == NULL),
+ "hash lock returned to zone must not reference a PBN lock");
+ VDO_ASSERT_LOG_ONLY((lock->state == VDO_HASH_LOCK_BYPASSING),
+ "returned hash lock must not be in use with state %s",
+ get_hash_lock_state_name(lock->state));
+ VDO_ASSERT_LOG_ONLY(list_empty(&lock->pool_node),
+ "hash lock returned to zone must not be in a pool ring");
+ VDO_ASSERT_LOG_ONLY(list_empty(&lock->duplicate_ring),
+ "hash lock returned to zone must not reference DataVIOs");
+
+ return_hash_lock_to_pool(zone, lock);
+}
+
+/**
+ * transfer_allocation_lock() - Transfer a data_vio's downgraded allocation PBN lock to the
+ * data_vio's hash lock, converting it to a duplicate PBN lock.
+ * @data_vio: The data_vio holding the allocation lock to transfer.
+ */
+static void transfer_allocation_lock(struct data_vio *data_vio)
+{
+ struct allocation *allocation = &data_vio->allocation;
+ struct hash_lock *hash_lock = data_vio->hash_lock;
+
+ VDO_ASSERT_LOG_ONLY(data_vio->new_mapped.pbn == allocation->pbn,
+ "transferred lock must be for the block written");
+
+ allocation->pbn = VDO_ZERO_BLOCK;
+
+ VDO_ASSERT_LOG_ONLY(vdo_is_pbn_read_lock(allocation->lock),
+ "must have downgraded the allocation lock before transfer");
+
+ hash_lock->duplicate = data_vio->new_mapped;
+ data_vio->duplicate = data_vio->new_mapped;
+
+ /*
+ * Since the lock is being transferred, the holder count doesn't change (and isn't even
+ * safe to examine on this thread).
+ */
+ hash_lock->duplicate_lock = vdo_forget(allocation->lock);
+}
+
+/**
+ * vdo_share_compressed_write_lock() - Make a data_vio's hash lock a shared holder of the PBN lock
+ * on the compressed block to which its data was just written.
+ * @data_vio: The data_vio which was just compressed.
+ * @pbn_lock: The PBN lock on the compressed block.
+ *
+ * If the lock is still a write lock (as it will be for the first share), it will be converted to a
+ * read lock. This also reserves a reference count increment for the data_vio.
+ */
+void vdo_share_compressed_write_lock(struct data_vio *data_vio,
+ struct pbn_lock *pbn_lock)
+{
+ bool claimed;
+
+ VDO_ASSERT_LOG_ONLY(vdo_get_duplicate_lock(data_vio) == NULL,
+ "a duplicate PBN lock should not exist when writing");
+ VDO_ASSERT_LOG_ONLY(vdo_is_state_compressed(data_vio->new_mapped.state),
+ "lock transfer must be for a compressed write");
+ assert_data_vio_in_new_mapped_zone(data_vio);
+
+ /* First sharer downgrades the lock. */
+ if (!vdo_is_pbn_read_lock(pbn_lock))
+ vdo_downgrade_pbn_write_lock(pbn_lock, true);
+
+ /*
+ * Get a share of the PBN lock, ensuring it cannot be released until after this data_vio
+ * has had a chance to journal a reference.
+ */
+ data_vio->duplicate = data_vio->new_mapped;
+ data_vio->hash_lock->duplicate = data_vio->new_mapped;
+ set_duplicate_lock(data_vio->hash_lock, pbn_lock);
+
+ /*
+ * Claim a reference for this data_vio. Necessary since another hash_lock might start
+ * deduplicating against it before our incRef.
+ */
+ claimed = vdo_claim_pbn_lock_increment(pbn_lock);
+ VDO_ASSERT_LOG_ONLY(claimed, "impossible to fail to claim an initial increment");
+}
+
+static void start_uds_queue(void *ptr)
+{
+ /*
+ * Allow the UDS dedupe worker thread to do memory allocations. It will only do allocations
+ * during the UDS calls that open or close an index, but those allocations can safely sleep
+ * while reserving a large amount of memory. We could use an allocations_allowed boolean
+ * (like the base threads do), but it would be an unnecessary embellishment.
+ */
+ struct vdo_thread *thread = vdo_get_work_queue_owner(vdo_get_current_work_queue());
+
+ vdo_register_allocating_thread(&thread->allocating_thread, NULL);
+}
+
+static void finish_uds_queue(void *ptr __always_unused)
+{
+ vdo_unregister_allocating_thread();
+}
+
+static void close_index(struct hash_zones *zones)
+ __must_hold(&zones->lock)
+{
+ int result;
+
+ /*
+ * Change the index state so that get_index_statistics() will not try to use the index
+ * session we are closing.
+ */
+ zones->index_state = IS_CHANGING;
+ /* Close the index session, while not holding the lock. */
+ spin_unlock(&zones->lock);
+ result = uds_close_index(zones->index_session);
+
+ if (result != UDS_SUCCESS)
+ vdo_log_error_strerror(result, "Error closing index");
+ spin_lock(&zones->lock);
+ zones->index_state = IS_CLOSED;
+ zones->error_flag |= result != UDS_SUCCESS;
+ /* ASSERTION: We leave in IS_CLOSED state. */
+}
+
+static void open_index(struct hash_zones *zones)
+ __must_hold(&zones->lock)
+{
+ /* ASSERTION: We enter in IS_CLOSED state. */
+ int result;
+ bool create_flag = zones->create_flag;
+
+ zones->create_flag = false;
+ /*
+ * Change the index state so that the it will be reported to the outside world as
+ * "opening".
+ */
+ zones->index_state = IS_CHANGING;
+ zones->error_flag = false;
+
+ /* Open the index session, while not holding the lock */
+ spin_unlock(&zones->lock);
+ result = uds_open_index(create_flag ? UDS_CREATE : UDS_LOAD,
+ &zones->parameters, zones->index_session);
+ if (result != UDS_SUCCESS)
+ vdo_log_error_strerror(result, "Error opening index");
+
+ spin_lock(&zones->lock);
+ if (!create_flag) {
+ switch (result) {
+ case -ENOENT:
+ /*
+ * Either there is no index, or there is no way we can recover the index.
+ * We will be called again and try to create a new index.
+ */
+ zones->index_state = IS_CLOSED;
+ zones->create_flag = true;
+ return;
+ default:
+ break;
+ }
+ }
+ if (result == UDS_SUCCESS) {
+ zones->index_state = IS_OPENED;
+ } else {
+ zones->index_state = IS_CLOSED;
+ zones->index_target = IS_CLOSED;
+ zones->error_flag = true;
+ spin_unlock(&zones->lock);
+ vdo_log_info("Setting UDS index target state to error");
+ spin_lock(&zones->lock);
+ }
+ /*
+ * ASSERTION: On success, we leave in IS_OPENED state.
+ * ASSERTION: On failure, we leave in IS_CLOSED state.
+ */
+}
+
+static void change_dedupe_state(struct vdo_completion *completion)
+{
+ struct hash_zones *zones = as_hash_zones(completion);
+
+ spin_lock(&zones->lock);
+
+ /* Loop until the index is in the target state and the create flag is clear. */
+ while (vdo_is_state_normal(&zones->state) &&
+ ((zones->index_state != zones->index_target) || zones->create_flag)) {
+ if (zones->index_state == IS_OPENED)
+ close_index(zones);
+ else
+ open_index(zones);
+ }
+
+ zones->changing = false;
+ spin_unlock(&zones->lock);
+}
+
+static void start_expiration_timer(struct dedupe_context *context)
+{
+ u64 start_time = context->submission_jiffies;
+ u64 end_time;
+
+ if (!change_timer_state(context->zone, DEDUPE_QUERY_TIMER_IDLE,
+ DEDUPE_QUERY_TIMER_RUNNING))
+ return;
+
+ end_time = max(start_time + vdo_dedupe_index_timeout_jiffies,
+ jiffies + vdo_dedupe_index_min_timer_jiffies);
+ mod_timer(&context->zone->timer, end_time);
+}
+
+/**
+ * report_dedupe_timeouts() - Record and eventually report that some dedupe requests reached their
+ * expiration time without getting answers, so we timed them out.
+ * @zones: the hash zones.
+ * @timeouts: the number of newly timed out requests.
+ */
+static void report_dedupe_timeouts(struct hash_zones *zones, unsigned int timeouts)
+{
+ atomic64_add(timeouts, &zones->timeouts);
+ spin_lock(&zones->lock);
+ if (__ratelimit(&zones->ratelimiter)) {
+ u64 unreported = atomic64_read(&zones->timeouts);
+
+ unreported -= zones->reported_timeouts;
+ vdo_log_debug("UDS index timeout on %llu requests",
+ (unsigned long long) unreported);
+ zones->reported_timeouts += unreported;
+ }
+ spin_unlock(&zones->lock);
+}
+
+static int initialize_index(struct vdo *vdo, struct hash_zones *zones)
+{
+ int result;
+ off_t uds_offset;
+ struct volume_geometry geometry = vdo->geometry;
+ static const struct vdo_work_queue_type uds_queue_type = {
+ .start = start_uds_queue,
+ .finish = finish_uds_queue,
+ .max_priority = UDS_Q_MAX_PRIORITY,
+ .default_priority = UDS_Q_PRIORITY,
+ };
+
+ vdo_set_dedupe_index_timeout_interval(vdo_dedupe_index_timeout_interval);
+ vdo_set_dedupe_index_min_timer_interval(vdo_dedupe_index_min_timer_interval);
+
+ /*
+ * Since we will save up the timeouts that would have been reported but were ratelimited,
+ * we don't need to report ratelimiting.
+ */
+ ratelimit_default_init(&zones->ratelimiter);
+ ratelimit_set_flags(&zones->ratelimiter, RATELIMIT_MSG_ON_RELEASE);
+ uds_offset = ((vdo_get_index_region_start(geometry) -
+ geometry.bio_offset) * VDO_BLOCK_SIZE);
+ zones->parameters = (struct uds_parameters) {
+ .bdev = vdo->device_config->owned_device->bdev,
+ .offset = uds_offset,
+ .size = (vdo_get_index_region_size(geometry) * VDO_BLOCK_SIZE),
+ .memory_size = geometry.index_config.mem,
+ .sparse = geometry.index_config.sparse,
+ .nonce = (u64) geometry.nonce,
+ };
+
+ result = uds_create_index_session(&zones->index_session);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = vdo_make_thread(vdo, vdo->thread_config.dedupe_thread, &uds_queue_type,
+ 1, NULL);
+ if (result != VDO_SUCCESS) {
+ uds_destroy_index_session(vdo_forget(zones->index_session));
+ vdo_log_error("UDS index queue initialization failed (%d)", result);
+ return result;
+ }
+
+ vdo_initialize_completion(&zones->completion, vdo, VDO_HASH_ZONES_COMPLETION);
+ vdo_set_completion_callback(&zones->completion, change_dedupe_state,
+ vdo->thread_config.dedupe_thread);
+ return VDO_SUCCESS;
+}
+
+/**
+ * finish_index_operation() - This is the UDS callback for index queries.
+ * @request: The uds request which has just completed.
+ */
+static void finish_index_operation(struct uds_request *request)
+{
+ struct dedupe_context *context = container_of(request, struct dedupe_context,
+ request);
+
+ if (change_context_state(context, DEDUPE_CONTEXT_PENDING,
+ DEDUPE_CONTEXT_COMPLETE)) {
+ /*
+ * This query has not timed out, so send its data_vio back to its hash zone to
+ * process the results.
+ */
+ continue_data_vio(context->requestor);
+ return;
+ }
+
+ /*
+ * This query has timed out, so try to mark it complete and hence eligible for reuse. Its
+ * data_vio has already moved on.
+ */
+ if (!change_context_state(context, DEDUPE_CONTEXT_TIMED_OUT,
+ DEDUPE_CONTEXT_TIMED_OUT_COMPLETE)) {
+ VDO_ASSERT_LOG_ONLY(false, "uds request was timed out (state %d)",
+ atomic_read(&context->state));
+ }
+
+ vdo_funnel_queue_put(context->zone->timed_out_complete, &context->queue_entry);
+}
+
+/**
+ * check_for_drain_complete() - Check whether this zone has drained.
+ * @zone: The zone to check.
+ */
+static void check_for_drain_complete(struct hash_zone *zone)
+{
+ data_vio_count_t recycled = 0;
+
+ if (!vdo_is_state_draining(&zone->state))
+ return;
+
+ if ((atomic_read(&zone->timer_state) == DEDUPE_QUERY_TIMER_IDLE) ||
+ change_timer_state(zone, DEDUPE_QUERY_TIMER_RUNNING,
+ DEDUPE_QUERY_TIMER_IDLE)) {
+ del_timer_sync(&zone->timer);
+ } else {
+ /*
+ * There is an in flight time-out, which must get processed before we can continue.
+ */
+ return;
+ }
+
+ for (;;) {
+ struct dedupe_context *context;
+ struct funnel_queue_entry *entry;
+
+ entry = vdo_funnel_queue_poll(zone->timed_out_complete);
+ if (entry == NULL)
+ break;
+
+ context = container_of(entry, struct dedupe_context, queue_entry);
+ atomic_set(&context->state, DEDUPE_CONTEXT_IDLE);
+ list_add(&context->list_entry, &zone->available);
+ recycled++;
+ }
+
+ if (recycled > 0)
+ WRITE_ONCE(zone->active, zone->active - recycled);
+ VDO_ASSERT_LOG_ONLY(READ_ONCE(zone->active) == 0, "all contexts inactive");
+ vdo_finish_draining(&zone->state);
+}
+
+static void timeout_index_operations_callback(struct vdo_completion *completion)
+{
+ struct dedupe_context *context, *tmp;
+ struct hash_zone *zone = as_hash_zone(completion);
+ u64 timeout_jiffies = msecs_to_jiffies(vdo_dedupe_index_timeout_interval);
+ unsigned long cutoff = jiffies - timeout_jiffies;
+ unsigned int timed_out = 0;
+
+ atomic_set(&zone->timer_state, DEDUPE_QUERY_TIMER_IDLE);
+ list_for_each_entry_safe(context, tmp, &zone->pending, list_entry) {
+ if (cutoff <= context->submission_jiffies) {
+ /*
+ * We have reached the oldest query which has not timed out yet, so restart
+ * the timer.
+ */
+ start_expiration_timer(context);
+ break;
+ }
+
+ if (!change_context_state(context, DEDUPE_CONTEXT_PENDING,
+ DEDUPE_CONTEXT_TIMED_OUT)) {
+ /*
+ * This context completed between the time the timeout fired, and now. We
+ * can treat it as a successful query, its requestor is already enqueued
+ * to process it.
+ */
+ continue;
+ }
+
+ /*
+ * Remove this context from the pending list so we won't look at it again on a
+ * subsequent timeout. Once the index completes it, it will be reused. Meanwhile,
+ * send its requestor on its way.
+ */
+ list_del_init(&context->list_entry);
+ continue_data_vio(context->requestor);
+ timed_out++;
+ }
+
+ if (timed_out > 0)
+ report_dedupe_timeouts(completion->vdo->hash_zones, timed_out);
+
+ check_for_drain_complete(zone);
+}
+
+static void timeout_index_operations(struct timer_list *t)
+{
+ struct hash_zone *zone = from_timer(zone, t, timer);
+
+ if (change_timer_state(zone, DEDUPE_QUERY_TIMER_RUNNING,
+ DEDUPE_QUERY_TIMER_FIRED))
+ vdo_launch_completion(&zone->completion);
+}
+
+static int __must_check initialize_zone(struct vdo *vdo, struct hash_zones *zones,
+ zone_count_t zone_number)
+{
+ int result;
+ data_vio_count_t i;
+ struct hash_zone *zone = &zones->zones[zone_number];
+
+ result = vdo_int_map_create(VDO_LOCK_MAP_CAPACITY, &zone->hash_lock_map);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo_set_admin_state_code(&zone->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+ zone->zone_number = zone_number;
+ zone->thread_id = vdo->thread_config.hash_zone_threads[zone_number];
+ vdo_initialize_completion(&zone->completion, vdo, VDO_HASH_ZONE_COMPLETION);
+ vdo_set_completion_callback(&zone->completion, timeout_index_operations_callback,
+ zone->thread_id);
+ INIT_LIST_HEAD(&zone->lock_pool);
+ result = vdo_allocate(LOCK_POOL_CAPACITY, struct hash_lock, "hash_lock array",
+ &zone->lock_array);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ for (i = 0; i < LOCK_POOL_CAPACITY; i++)
+ return_hash_lock_to_pool(zone, &zone->lock_array[i]);
+
+ INIT_LIST_HEAD(&zone->available);
+ INIT_LIST_HEAD(&zone->pending);
+ result = vdo_make_funnel_queue(&zone->timed_out_complete);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ timer_setup(&zone->timer, timeout_index_operations, 0);
+
+ for (i = 0; i < MAXIMUM_VDO_USER_VIOS; i++) {
+ struct dedupe_context *context = &zone->contexts[i];
+
+ context->zone = zone;
+ context->request.callback = finish_index_operation;
+ context->request.session = zones->index_session;
+ list_add(&context->list_entry, &zone->available);
+ }
+
+ return vdo_make_default_thread(vdo, zone->thread_id);
+}
+
+/** get_thread_id_for_zone() - Implements vdo_zone_thread_getter_fn. */
+static thread_id_t get_thread_id_for_zone(void *context, zone_count_t zone_number)
+{
+ struct hash_zones *zones = context;
+
+ return zones->zones[zone_number].thread_id;
+}
+
+/**
+ * vdo_make_hash_zones() - Create the hash zones.
+ *
+ * @vdo: The vdo to which the zone will belong.
+ * @zones_ptr: A pointer to hold the zones.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+int vdo_make_hash_zones(struct vdo *vdo, struct hash_zones **zones_ptr)
+{
+ int result;
+ struct hash_zones *zones;
+ zone_count_t z;
+ zone_count_t zone_count = vdo->thread_config.hash_zone_count;
+
+ if (zone_count == 0)
+ return VDO_SUCCESS;
+
+ result = vdo_allocate_extended(struct hash_zones, zone_count, struct hash_zone,
+ __func__, &zones);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = initialize_index(vdo, zones);
+ if (result != VDO_SUCCESS) {
+ vdo_free(zones);
+ return result;
+ }
+
+ vdo_set_admin_state_code(&zones->state, VDO_ADMIN_STATE_NEW);
+
+ zones->zone_count = zone_count;
+ for (z = 0; z < zone_count; z++) {
+ result = initialize_zone(vdo, zones, z);
+ if (result != VDO_SUCCESS) {
+ vdo_free_hash_zones(zones);
+ return result;
+ }
+ }
+
+ result = vdo_make_action_manager(zones->zone_count, get_thread_id_for_zone,
+ vdo->thread_config.admin_thread, zones, NULL,
+ vdo, &zones->manager);
+ if (result != VDO_SUCCESS) {
+ vdo_free_hash_zones(zones);
+ return result;
+ }
+
+ *zones_ptr = zones;
+ return VDO_SUCCESS;
+}
+
+void vdo_finish_dedupe_index(struct hash_zones *zones)
+{
+ if (zones == NULL)
+ return;
+
+ uds_destroy_index_session(vdo_forget(zones->index_session));
+}
+
+/**
+ * vdo_free_hash_zones() - Free the hash zones.
+ * @zones: The zone to free.
+ */
+void vdo_free_hash_zones(struct hash_zones *zones)
+{
+ zone_count_t i;
+
+ if (zones == NULL)
+ return;
+
+ vdo_free(vdo_forget(zones->manager));
+
+ for (i = 0; i < zones->zone_count; i++) {
+ struct hash_zone *zone = &zones->zones[i];
+
+ vdo_free_funnel_queue(vdo_forget(zone->timed_out_complete));
+ vdo_int_map_free(vdo_forget(zone->hash_lock_map));
+ vdo_free(vdo_forget(zone->lock_array));
+ }
+
+ if (zones->index_session != NULL)
+ vdo_finish_dedupe_index(zones);
+
+ ratelimit_state_exit(&zones->ratelimiter);
+ vdo_free(zones);
+}
+
+static void initiate_suspend_index(struct admin_state *state)
+{
+ struct hash_zones *zones = container_of(state, struct hash_zones, state);
+ enum index_state index_state;
+
+ spin_lock(&zones->lock);
+ index_state = zones->index_state;
+ spin_unlock(&zones->lock);
+
+ if (index_state != IS_CLOSED) {
+ bool save = vdo_is_state_saving(&zones->state);
+ int result;
+
+ result = uds_suspend_index_session(zones->index_session, save);
+ if (result != UDS_SUCCESS)
+ vdo_log_error_strerror(result, "Error suspending dedupe index");
+ }
+
+ vdo_finish_draining(state);
+}
+
+/**
+ * suspend_index() - Suspend the UDS index prior to draining hash zones.
+ *
+ * Implements vdo_action_preamble_fn
+ */
+static void suspend_index(void *context, struct vdo_completion *completion)
+{
+ struct hash_zones *zones = context;
+
+ vdo_start_draining(&zones->state,
+ vdo_get_current_manager_operation(zones->manager), completion,
+ initiate_suspend_index);
+}
+
+/**
+ * initiate_drain() - Initiate a drain.
+ *
+ * Implements vdo_admin_initiator_fn.
+ */
+static void initiate_drain(struct admin_state *state)
+{
+ check_for_drain_complete(container_of(state, struct hash_zone, state));
+}
+
+/**
+ * drain_hash_zone() - Drain a hash zone.
+ *
+ * Implements vdo_zone_action_fn.
+ */
+static void drain_hash_zone(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct hash_zones *zones = context;
+
+ vdo_start_draining(&zones->zones[zone_number].state,
+ vdo_get_current_manager_operation(zones->manager), parent,
+ initiate_drain);
+}
+
+/** vdo_drain_hash_zones() - Drain all hash zones. */
+void vdo_drain_hash_zones(struct hash_zones *zones, struct vdo_completion *parent)
+{
+ vdo_schedule_operation(zones->manager, parent->vdo->suspend_type, suspend_index,
+ drain_hash_zone, NULL, parent);
+}
+
+static void launch_dedupe_state_change(struct hash_zones *zones)
+ __must_hold(&zones->lock)
+{
+ /* ASSERTION: We enter with the lock held. */
+ if (zones->changing || !vdo_is_state_normal(&zones->state))
+ /* Either a change is already in progress, or changes are not allowed. */
+ return;
+
+ if (zones->create_flag || (zones->index_state != zones->index_target)) {
+ zones->changing = true;
+ vdo_launch_completion(&zones->completion);
+ return;
+ }
+
+ /* ASSERTION: We exit with the lock held. */
+}
+
+/**
+ * resume_index() - Resume the UDS index prior to resuming hash zones.
+ *
+ * Implements vdo_action_preamble_fn
+ */
+static void resume_index(void *context, struct vdo_completion *parent)
+{
+ struct hash_zones *zones = context;
+ struct device_config *config = parent->vdo->device_config;
+ int result;
+
+ zones->parameters.bdev = config->owned_device->bdev;
+ result = uds_resume_index_session(zones->index_session, zones->parameters.bdev);
+ if (result != UDS_SUCCESS)
+ vdo_log_error_strerror(result, "Error resuming dedupe index");
+
+ spin_lock(&zones->lock);
+ vdo_resume_if_quiescent(&zones->state);
+
+ if (config->deduplication) {
+ zones->index_target = IS_OPENED;
+ WRITE_ONCE(zones->dedupe_flag, true);
+ } else {
+ zones->index_target = IS_CLOSED;
+ }
+
+ launch_dedupe_state_change(zones);
+ spin_unlock(&zones->lock);
+
+ vdo_finish_completion(parent);
+}
+
+/**
+ * resume_hash_zone() - Resume a hash zone.
+ *
+ * Implements vdo_zone_action_fn.
+ */
+static void resume_hash_zone(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct hash_zone *zone = &(((struct hash_zones *) context)->zones[zone_number]);
+
+ vdo_fail_completion(parent, vdo_resume_if_quiescent(&zone->state));
+}
+
+/**
+ * vdo_resume_hash_zones() - Resume a set of hash zones.
+ * @zones: The hash zones to resume.
+ * @parent: The object to notify when the zones have resumed.
+ */
+void vdo_resume_hash_zones(struct hash_zones *zones, struct vdo_completion *parent)
+{
+ if (vdo_is_read_only(parent->vdo)) {
+ vdo_launch_completion(parent);
+ return;
+ }
+
+ vdo_schedule_operation(zones->manager, VDO_ADMIN_STATE_RESUMING, resume_index,
+ resume_hash_zone, NULL, parent);
+}
+
+/**
+ * get_hash_zone_statistics() - Add the statistics for this hash zone to the tally for all zones.
+ * @zone: The hash zone to query.
+ * @tally: The tally
+ */
+static void get_hash_zone_statistics(const struct hash_zone *zone,
+ struct hash_lock_statistics *tally)
+{
+ const struct hash_lock_statistics *stats = &zone->statistics;
+
+ tally->dedupe_advice_valid += READ_ONCE(stats->dedupe_advice_valid);
+ tally->dedupe_advice_stale += READ_ONCE(stats->dedupe_advice_stale);
+ tally->concurrent_data_matches += READ_ONCE(stats->concurrent_data_matches);
+ tally->concurrent_hash_collisions += READ_ONCE(stats->concurrent_hash_collisions);
+ tally->curr_dedupe_queries += READ_ONCE(zone->active);
+}
+
+static void get_index_statistics(struct hash_zones *zones,
+ struct index_statistics *stats)
+{
+ enum index_state state;
+ struct uds_index_stats index_stats;
+ int result;
+
+ spin_lock(&zones->lock);
+ state = zones->index_state;
+ spin_unlock(&zones->lock);
+
+ if (state != IS_OPENED)
+ return;
+
+ result = uds_get_index_session_stats(zones->index_session, &index_stats);
+ if (result != UDS_SUCCESS) {
+ vdo_log_error_strerror(result, "Error reading index stats");
+ return;
+ }
+
+ stats->entries_indexed = index_stats.entries_indexed;
+ stats->posts_found = index_stats.posts_found;
+ stats->posts_not_found = index_stats.posts_not_found;
+ stats->queries_found = index_stats.queries_found;
+ stats->queries_not_found = index_stats.queries_not_found;
+ stats->updates_found = index_stats.updates_found;
+ stats->updates_not_found = index_stats.updates_not_found;
+ stats->entries_discarded = index_stats.entries_discarded;
+}
+
+/**
+ * vdo_get_dedupe_statistics() - Tally the statistics from all the hash zones and the UDS index.
+ * @hash_zones: The hash zones to query
+ *
+ * Return: The sum of the hash lock statistics from all hash zones plus the statistics from the UDS
+ * index
+ */
+void vdo_get_dedupe_statistics(struct hash_zones *zones, struct vdo_statistics *stats)
+
+{
+ zone_count_t zone;
+
+ for (zone = 0; zone < zones->zone_count; zone++)
+ get_hash_zone_statistics(&zones->zones[zone], &stats->hash_lock);
+
+ get_index_statistics(zones, &stats->index);
+
+ /*
+ * zones->timeouts gives the number of timeouts, and dedupe_context_busy gives the number
+ * of queries not made because of earlier timeouts.
+ */
+ stats->dedupe_advice_timeouts =
+ (atomic64_read(&zones->timeouts) + atomic64_read(&zones->dedupe_context_busy));
+}
+
+/**
+ * vdo_select_hash_zone() - Select the hash zone responsible for locking a given record name.
+ * @zones: The hash_zones from which to select.
+ * @name: The record name.
+ *
+ * Return: The hash zone responsible for the record name.
+ */
+struct hash_zone *vdo_select_hash_zone(struct hash_zones *zones,
+ const struct uds_record_name *name)
+{
+ /*
+ * Use a fragment of the record name as a hash code. Eight bits of hash should suffice
+ * since the number of hash zones is small.
+ * TODO: Verify that the first byte is independent enough.
+ */
+ u32 hash = name->name[0];
+
+ /*
+ * Scale the 8-bit hash fragment to a zone index by treating it as a binary fraction and
+ * multiplying that by the zone count. If the hash is uniformly distributed over [0 ..
+ * 2^8-1], then (hash * count / 2^8) should be uniformly distributed over [0 .. count-1].
+ * The multiply and shift is much faster than a divide (modulus) on X86 CPUs.
+ */
+ hash = (hash * zones->zone_count) >> 8;
+ return &zones->zones[hash];
+}
+
+/**
+ * dump_hash_lock() - Dump a compact description of hash_lock to the log if the lock is not on the
+ * free list.
+ * @lock: The hash lock to dump.
+ */
+static void dump_hash_lock(const struct hash_lock *lock)
+{
+ const char *state;
+
+ if (!list_empty(&lock->pool_node)) {
+ /* This lock is on the free list. */
+ return;
+ }
+
+ /*
+ * Necessarily cryptic since we can log a lot of these. First three chars of state is
+ * unambiguous. 'U' indicates a lock not registered in the map.
+ */
+ state = get_hash_lock_state_name(lock->state);
+ vdo_log_info(" hl %px: %3.3s %c%llu/%u rc=%u wc=%zu agt=%px",
+ lock, state, (lock->registered ? 'D' : 'U'),
+ (unsigned long long) lock->duplicate.pbn,
+ lock->duplicate.state, lock->reference_count,
+ vdo_waitq_num_waiters(&lock->waiters), lock->agent);
+}
+
+static const char *index_state_to_string(struct hash_zones *zones,
+ enum index_state state)
+{
+ if (!vdo_is_state_normal(&zones->state))
+ return SUSPENDED;
+
+ switch (state) {
+ case IS_CLOSED:
+ return zones->error_flag ? ERROR : CLOSED;
+ case IS_CHANGING:
+ return zones->index_target == IS_OPENED ? OPENING : CLOSING;
+ case IS_OPENED:
+ return READ_ONCE(zones->dedupe_flag) ? ONLINE : OFFLINE;
+ default:
+ return UNKNOWN;
+ }
+}
+
+/**
+ * dump_hash_zone() - Dump information about a hash zone to the log for debugging.
+ * @zone: The zone to dump.
+ */
+static void dump_hash_zone(const struct hash_zone *zone)
+{
+ data_vio_count_t i;
+
+ if (zone->hash_lock_map == NULL) {
+ vdo_log_info("struct hash_zone %u: NULL map", zone->zone_number);
+ return;
+ }
+
+ vdo_log_info("struct hash_zone %u: mapSize=%zu",
+ zone->zone_number, vdo_int_map_size(zone->hash_lock_map));
+ for (i = 0; i < LOCK_POOL_CAPACITY; i++)
+ dump_hash_lock(&zone->lock_array[i]);
+}
+
+/**
+ * vdo_dump_hash_zones() - Dump information about the hash zones to the log for debugging.
+ * @zones: The zones to dump.
+ */
+void vdo_dump_hash_zones(struct hash_zones *zones)
+{
+ const char *state, *target;
+ zone_count_t zone;
+
+ spin_lock(&zones->lock);
+ state = index_state_to_string(zones, zones->index_state);
+ target = (zones->changing ? index_state_to_string(zones, zones->index_target) : NULL);
+ spin_unlock(&zones->lock);
+
+ vdo_log_info("UDS index: state: %s", state);
+ if (target != NULL)
+ vdo_log_info("UDS index: changing to state: %s", target);
+
+ for (zone = 0; zone < zones->zone_count; zone++)
+ dump_hash_zone(&zones->zones[zone]);
+}
+
+void vdo_set_dedupe_index_timeout_interval(unsigned int value)
+{
+ u64 alb_jiffies;
+
+ /* Arbitrary maximum value is two minutes */
+ if (value > 120000)
+ value = 120000;
+ /* Arbitrary minimum value is 2 jiffies */
+ alb_jiffies = msecs_to_jiffies(value);
+
+ if (alb_jiffies < 2) {
+ alb_jiffies = 2;
+ value = jiffies_to_msecs(alb_jiffies);
+ }
+ vdo_dedupe_index_timeout_interval = value;
+ vdo_dedupe_index_timeout_jiffies = alb_jiffies;
+}
+
+void vdo_set_dedupe_index_min_timer_interval(unsigned int value)
+{
+ u64 min_jiffies;
+
+ /* Arbitrary maximum value is one second */
+ if (value > 1000)
+ value = 1000;
+
+ /* Arbitrary minimum value is 2 jiffies */
+ min_jiffies = msecs_to_jiffies(value);
+
+ if (min_jiffies < 2) {
+ min_jiffies = 2;
+ value = jiffies_to_msecs(min_jiffies);
+ }
+
+ vdo_dedupe_index_min_timer_interval = value;
+ vdo_dedupe_index_min_timer_jiffies = min_jiffies;
+}
+
+/**
+ * acquire_context() - Acquire a dedupe context from a hash_zone if any are available.
+ * @zone: the hash zone
+ *
+ * Return: A dedupe_context or NULL if none are available
+ */
+static struct dedupe_context * __must_check acquire_context(struct hash_zone *zone)
+{
+ struct dedupe_context *context;
+ struct funnel_queue_entry *entry;
+
+ assert_in_hash_zone(zone, __func__);
+
+ if (!list_empty(&zone->available)) {
+ WRITE_ONCE(zone->active, zone->active + 1);
+ context = list_first_entry(&zone->available, struct dedupe_context,
+ list_entry);
+ list_del_init(&context->list_entry);
+ return context;
+ }
+
+ entry = vdo_funnel_queue_poll(zone->timed_out_complete);
+ return ((entry == NULL) ?
+ NULL : container_of(entry, struct dedupe_context, queue_entry));
+}
+
+static void prepare_uds_request(struct uds_request *request, struct data_vio *data_vio,
+ enum uds_request_type operation)
+{
+ request->record_name = data_vio->record_name;
+ request->type = operation;
+ if ((operation == UDS_POST) || (operation == UDS_UPDATE)) {
+ size_t offset = 0;
+ struct uds_record_data *encoding = &request->new_metadata;
+
+ encoding->data[offset++] = UDS_ADVICE_VERSION;
+ encoding->data[offset++] = data_vio->new_mapped.state;
+ put_unaligned_le64(data_vio->new_mapped.pbn, &encoding->data[offset]);
+ offset += sizeof(u64);
+ BUG_ON(offset != UDS_ADVICE_SIZE);
+ }
+}
+
+/*
+ * The index operation will inquire about data_vio.record_name, providing (if the operation is
+ * appropriate) advice from the data_vio's new_mapped fields. The advice found in the index (or
+ * NULL if none) will be returned via receive_data_vio_dedupe_advice(). dedupe_context.status is
+ * set to the return status code of any asynchronous index processing.
+ */
+static void query_index(struct data_vio *data_vio, enum uds_request_type operation)
+{
+ int result;
+ struct dedupe_context *context;
+ struct vdo *vdo = vdo_from_data_vio(data_vio);
+ struct hash_zone *zone = data_vio->hash_zone;
+
+ assert_data_vio_in_hash_zone(data_vio);
+
+ if (!READ_ONCE(vdo->hash_zones->dedupe_flag)) {
+ continue_data_vio(data_vio);
+ return;
+ }
+
+ context = acquire_context(zone);
+ if (context == NULL) {
+ atomic64_inc(&vdo->hash_zones->dedupe_context_busy);
+ continue_data_vio(data_vio);
+ return;
+ }
+
+ data_vio->dedupe_context = context;
+ context->requestor = data_vio;
+ context->submission_jiffies = jiffies;
+ prepare_uds_request(&context->request, data_vio, operation);
+ atomic_set(&context->state, DEDUPE_CONTEXT_PENDING);
+ list_add_tail(&context->list_entry, &zone->pending);
+ start_expiration_timer(context);
+ result = uds_launch_request(&context->request);
+ if (result != UDS_SUCCESS) {
+ context->request.status = result;
+ finish_index_operation(&context->request);
+ }
+}
+
+static void set_target_state(struct hash_zones *zones, enum index_state target,
+ bool change_dedupe, bool dedupe, bool set_create)
+{
+ const char *old_state, *new_state;
+
+ spin_lock(&zones->lock);
+ old_state = index_state_to_string(zones, zones->index_target);
+ if (change_dedupe)
+ WRITE_ONCE(zones->dedupe_flag, dedupe);
+
+ if (set_create)
+ zones->create_flag = true;
+
+ zones->index_target = target;
+ launch_dedupe_state_change(zones);
+ new_state = index_state_to_string(zones, zones->index_target);
+ spin_unlock(&zones->lock);
+
+ if (old_state != new_state)
+ vdo_log_info("Setting UDS index target state to %s", new_state);
+}
+
+const char *vdo_get_dedupe_index_state_name(struct hash_zones *zones)
+{
+ const char *state;
+
+ spin_lock(&zones->lock);
+ state = index_state_to_string(zones, zones->index_state);
+ spin_unlock(&zones->lock);
+
+ return state;
+}
+
+/* Handle a dmsetup message relevant to the index. */
+int vdo_message_dedupe_index(struct hash_zones *zones, const char *name)
+{
+ if (strcasecmp(name, "index-close") == 0) {
+ set_target_state(zones, IS_CLOSED, false, false, false);
+ return 0;
+ } else if (strcasecmp(name, "index-create") == 0) {
+ set_target_state(zones, IS_OPENED, false, false, true);
+ return 0;
+ } else if (strcasecmp(name, "index-disable") == 0) {
+ set_target_state(zones, IS_OPENED, true, false, false);
+ return 0;
+ } else if (strcasecmp(name, "index-enable") == 0) {
+ set_target_state(zones, IS_OPENED, true, true, false);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+void vdo_set_dedupe_state_normal(struct hash_zones *zones)
+{
+ vdo_set_admin_state_code(&zones->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+}
+
+/* If create_flag, create a new index without first attempting to load an existing index. */
+void vdo_start_dedupe_index(struct hash_zones *zones, bool create_flag)
+{
+ set_target_state(zones, IS_OPENED, true, true, create_flag);
+}
diff --git a/drivers/md/dm-vdo/dedupe.h b/drivers/md/dm-vdo/dedupe.h
new file mode 100644
index 000000000000..9000d6f3eece
--- /dev/null
+++ b/drivers/md/dm-vdo/dedupe.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_DEDUPE_H
+#define VDO_DEDUPE_H
+
+#include <linux/list.h>
+#include <linux/timer.h>
+
+#include "indexer.h"
+
+#include "admin-state.h"
+#include "constants.h"
+#include "statistics.h"
+#include "types.h"
+#include "wait-queue.h"
+
+struct dedupe_context {
+ struct hash_zone *zone;
+ struct uds_request request;
+ struct list_head list_entry;
+ struct funnel_queue_entry queue_entry;
+ u64 submission_jiffies;
+ struct data_vio *requestor;
+ atomic_t state;
+};
+
+struct hash_lock;
+
+struct hash_zone {
+ /* Which hash zone this is */
+ zone_count_t zone_number;
+
+ /* The administrative state of the zone */
+ struct admin_state state;
+
+ /* The thread ID for this zone */
+ thread_id_t thread_id;
+
+ /* Mapping from record name fields to hash_locks */
+ struct int_map *hash_lock_map;
+
+ /* List containing all unused hash_locks */
+ struct list_head lock_pool;
+
+ /*
+ * Statistics shared by all hash locks in this zone. Only modified on the hash zone thread,
+ * but queried by other threads.
+ */
+ struct hash_lock_statistics statistics;
+
+ /* Array of all hash_locks */
+ struct hash_lock *lock_array;
+
+ /* These fields are used to manage the dedupe contexts */
+ struct list_head available;
+ struct list_head pending;
+ struct funnel_queue *timed_out_complete;
+ struct timer_list timer;
+ struct vdo_completion completion;
+ unsigned int active;
+ atomic_t timer_state;
+
+ /* The dedupe contexts for querying the index from this zone */
+ struct dedupe_context contexts[MAXIMUM_VDO_USER_VIOS];
+};
+
+struct hash_zones;
+
+struct pbn_lock * __must_check vdo_get_duplicate_lock(struct data_vio *data_vio);
+
+void vdo_acquire_hash_lock(struct vdo_completion *completion);
+void vdo_continue_hash_lock(struct vdo_completion *completion);
+void vdo_release_hash_lock(struct data_vio *data_vio);
+void vdo_clean_failed_hash_lock(struct data_vio *data_vio);
+void vdo_share_compressed_write_lock(struct data_vio *data_vio,
+ struct pbn_lock *pbn_lock);
+
+int __must_check vdo_make_hash_zones(struct vdo *vdo, struct hash_zones **zones_ptr);
+
+void vdo_free_hash_zones(struct hash_zones *zones);
+
+void vdo_drain_hash_zones(struct hash_zones *zones, struct vdo_completion *parent);
+
+void vdo_get_dedupe_statistics(struct hash_zones *zones, struct vdo_statistics *stats);
+
+struct hash_zone * __must_check vdo_select_hash_zone(struct hash_zones *zones,
+ const struct uds_record_name *name);
+
+void vdo_dump_hash_zones(struct hash_zones *zones);
+
+const char *vdo_get_dedupe_index_state_name(struct hash_zones *zones);
+
+u64 vdo_get_dedupe_index_timeout_count(struct hash_zones *zones);
+
+int vdo_message_dedupe_index(struct hash_zones *zones, const char *name);
+
+void vdo_set_dedupe_state_normal(struct hash_zones *zones);
+
+void vdo_start_dedupe_index(struct hash_zones *zones, bool create_flag);
+
+void vdo_resume_hash_zones(struct hash_zones *zones, struct vdo_completion *parent);
+
+void vdo_finish_dedupe_index(struct hash_zones *zones);
+
+/* Interval (in milliseconds) from submission until switching to fast path and skipping UDS. */
+extern unsigned int vdo_dedupe_index_timeout_interval;
+
+/*
+ * Minimum time interval (in milliseconds) between timer invocations to check for requests waiting
+ * for UDS that should now time out.
+ */
+extern unsigned int vdo_dedupe_index_min_timer_interval;
+
+void vdo_set_dedupe_index_timeout_interval(unsigned int value);
+void vdo_set_dedupe_index_min_timer_interval(unsigned int value);
+
+#endif /* VDO_DEDUPE_H */
diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c
new file mode 100644
index 000000000000..5a4b0a927f56
--- /dev/null
+++ b/drivers/md/dm-vdo/dm-vdo-target.c
@@ -0,0 +1,2910 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/device-mapper.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+
+#include "admin-state.h"
+#include "block-map.h"
+#include "completion.h"
+#include "constants.h"
+#include "data-vio.h"
+#include "dedupe.h"
+#include "dump.h"
+#include "encodings.h"
+#include "errors.h"
+#include "flush.h"
+#include "io-submitter.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "message-stats.h"
+#include "recovery-journal.h"
+#include "repair.h"
+#include "slab-depot.h"
+#include "status-codes.h"
+#include "string-utils.h"
+#include "thread-device.h"
+#include "thread-registry.h"
+#include "thread-utils.h"
+#include "types.h"
+#include "vdo.h"
+#include "vio.h"
+
+enum admin_phases {
+ GROW_LOGICAL_PHASE_START,
+ GROW_LOGICAL_PHASE_GROW_BLOCK_MAP,
+ GROW_LOGICAL_PHASE_END,
+ GROW_LOGICAL_PHASE_ERROR,
+ GROW_PHYSICAL_PHASE_START,
+ GROW_PHYSICAL_PHASE_COPY_SUMMARY,
+ GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS,
+ GROW_PHYSICAL_PHASE_USE_NEW_SLABS,
+ GROW_PHYSICAL_PHASE_END,
+ GROW_PHYSICAL_PHASE_ERROR,
+ LOAD_PHASE_START,
+ LOAD_PHASE_LOAD_DEPOT,
+ LOAD_PHASE_MAKE_DIRTY,
+ LOAD_PHASE_PREPARE_TO_ALLOCATE,
+ LOAD_PHASE_SCRUB_SLABS,
+ LOAD_PHASE_DATA_REDUCTION,
+ LOAD_PHASE_FINISHED,
+ LOAD_PHASE_DRAIN_JOURNAL,
+ LOAD_PHASE_WAIT_FOR_READ_ONLY,
+ PRE_LOAD_PHASE_START,
+ PRE_LOAD_PHASE_LOAD_COMPONENTS,
+ PRE_LOAD_PHASE_END,
+ PREPARE_GROW_PHYSICAL_PHASE_START,
+ RESUME_PHASE_START,
+ RESUME_PHASE_ALLOW_READ_ONLY_MODE,
+ RESUME_PHASE_DEDUPE,
+ RESUME_PHASE_DEPOT,
+ RESUME_PHASE_JOURNAL,
+ RESUME_PHASE_BLOCK_MAP,
+ RESUME_PHASE_LOGICAL_ZONES,
+ RESUME_PHASE_PACKER,
+ RESUME_PHASE_FLUSHER,
+ RESUME_PHASE_DATA_VIOS,
+ RESUME_PHASE_END,
+ SUSPEND_PHASE_START,
+ SUSPEND_PHASE_PACKER,
+ SUSPEND_PHASE_DATA_VIOS,
+ SUSPEND_PHASE_DEDUPE,
+ SUSPEND_PHASE_FLUSHES,
+ SUSPEND_PHASE_LOGICAL_ZONES,
+ SUSPEND_PHASE_BLOCK_MAP,
+ SUSPEND_PHASE_JOURNAL,
+ SUSPEND_PHASE_DEPOT,
+ SUSPEND_PHASE_READ_ONLY_WAIT,
+ SUSPEND_PHASE_WRITE_SUPER_BLOCK,
+ SUSPEND_PHASE_END,
+};
+
+static const char * const ADMIN_PHASE_NAMES[] = {
+ "GROW_LOGICAL_PHASE_START",
+ "GROW_LOGICAL_PHASE_GROW_BLOCK_MAP",
+ "GROW_LOGICAL_PHASE_END",
+ "GROW_LOGICAL_PHASE_ERROR",
+ "GROW_PHYSICAL_PHASE_START",
+ "GROW_PHYSICAL_PHASE_COPY_SUMMARY",
+ "GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS",
+ "GROW_PHYSICAL_PHASE_USE_NEW_SLABS",
+ "GROW_PHYSICAL_PHASE_END",
+ "GROW_PHYSICAL_PHASE_ERROR",
+ "LOAD_PHASE_START",
+ "LOAD_PHASE_LOAD_DEPOT",
+ "LOAD_PHASE_MAKE_DIRTY",
+ "LOAD_PHASE_PREPARE_TO_ALLOCATE",
+ "LOAD_PHASE_SCRUB_SLABS",
+ "LOAD_PHASE_DATA_REDUCTION",
+ "LOAD_PHASE_FINISHED",
+ "LOAD_PHASE_DRAIN_JOURNAL",
+ "LOAD_PHASE_WAIT_FOR_READ_ONLY",
+ "PRE_LOAD_PHASE_START",
+ "PRE_LOAD_PHASE_LOAD_COMPONENTS",
+ "PRE_LOAD_PHASE_END",
+ "PREPARE_GROW_PHYSICAL_PHASE_START",
+ "RESUME_PHASE_START",
+ "RESUME_PHASE_ALLOW_READ_ONLY_MODE",
+ "RESUME_PHASE_DEDUPE",
+ "RESUME_PHASE_DEPOT",
+ "RESUME_PHASE_JOURNAL",
+ "RESUME_PHASE_BLOCK_MAP",
+ "RESUME_PHASE_LOGICAL_ZONES",
+ "RESUME_PHASE_PACKER",
+ "RESUME_PHASE_FLUSHER",
+ "RESUME_PHASE_DATA_VIOS",
+ "RESUME_PHASE_END",
+ "SUSPEND_PHASE_START",
+ "SUSPEND_PHASE_PACKER",
+ "SUSPEND_PHASE_DATA_VIOS",
+ "SUSPEND_PHASE_DEDUPE",
+ "SUSPEND_PHASE_FLUSHES",
+ "SUSPEND_PHASE_LOGICAL_ZONES",
+ "SUSPEND_PHASE_BLOCK_MAP",
+ "SUSPEND_PHASE_JOURNAL",
+ "SUSPEND_PHASE_DEPOT",
+ "SUSPEND_PHASE_READ_ONLY_WAIT",
+ "SUSPEND_PHASE_WRITE_SUPER_BLOCK",
+ "SUSPEND_PHASE_END",
+};
+
+/* If we bump this, update the arrays below */
+#define TABLE_VERSION 4
+
+/* arrays for handling different table versions */
+static const u8 REQUIRED_ARGC[] = { 10, 12, 9, 7, 6 };
+/* pool name no longer used. only here for verification of older versions */
+static const u8 POOL_NAME_ARG_INDEX[] = { 8, 10, 8 };
+
+/*
+ * Track in-use instance numbers using a flat bit array.
+ *
+ * O(n) run time isn't ideal, but if we have 1000 VDO devices in use simultaneously we still only
+ * need to scan 16 words, so it's not likely to be a big deal compared to other resource usage.
+ */
+
+/*
+ * This minimum size for the bit array creates a numbering space of 0-999, which allows
+ * successive starts of the same volume to have different instance numbers in any
+ * reasonably-sized test. Changing instances on restart allows vdoMonReport to detect that
+ * the ephemeral stats have reset to zero.
+ */
+#define BIT_COUNT_MINIMUM 1000
+/* Grow the bit array by this many bits when needed */
+#define BIT_COUNT_INCREMENT 100
+
+struct instance_tracker {
+ unsigned int bit_count;
+ unsigned long *words;
+ unsigned int count;
+ unsigned int next;
+};
+
+static DEFINE_MUTEX(instances_lock);
+static struct instance_tracker instances;
+
+/**
+ * free_device_config() - Free a device config created by parse_device_config().
+ * @config: The config to free.
+ */
+static void free_device_config(struct device_config *config)
+{
+ if (config == NULL)
+ return;
+
+ if (config->owned_device != NULL)
+ dm_put_device(config->owning_target, config->owned_device);
+
+ vdo_free(config->parent_device_name);
+ vdo_free(config->original_string);
+
+ /* Reduce the chance a use-after-free (as in BZ 1669960) happens to work. */
+ memset(config, 0, sizeof(*config));
+ vdo_free(config);
+}
+
+/**
+ * get_version_number() - Decide the version number from argv.
+ *
+ * @argc: The number of table values.
+ * @argv: The array of table values.
+ * @error_ptr: A pointer to return a error string in.
+ * @version_ptr: A pointer to return the version.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int get_version_number(int argc, char **argv, char **error_ptr,
+ unsigned int *version_ptr)
+{
+ /* version, if it exists, is in a form of V<n> */
+ if (sscanf(argv[0], "V%u", version_ptr) == 1) {
+ if (*version_ptr < 1 || *version_ptr > TABLE_VERSION) {
+ *error_ptr = "Unknown version number detected";
+ return VDO_BAD_CONFIGURATION;
+ }
+ } else {
+ /* V0 actually has no version number in the table string */
+ *version_ptr = 0;
+ }
+
+ /*
+ * V0 and V1 have no optional parameters. There will always be a parameter for thread
+ * config, even if it's a "." to show it's an empty list.
+ */
+ if (*version_ptr <= 1) {
+ if (argc != REQUIRED_ARGC[*version_ptr]) {
+ *error_ptr = "Incorrect number of arguments for version";
+ return VDO_BAD_CONFIGURATION;
+ }
+ } else if (argc < REQUIRED_ARGC[*version_ptr]) {
+ *error_ptr = "Incorrect number of arguments for version";
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ if (*version_ptr != TABLE_VERSION) {
+ vdo_log_warning("Detected version mismatch between kernel module and tools kernel: %d, tool: %d",
+ TABLE_VERSION, *version_ptr);
+ vdo_log_warning("Please consider upgrading management tools to match kernel.");
+ }
+ return VDO_SUCCESS;
+}
+
+/* Free a list of non-NULL string pointers, and then the list itself. */
+static void free_string_array(char **string_array)
+{
+ unsigned int offset;
+
+ for (offset = 0; string_array[offset] != NULL; offset++)
+ vdo_free(string_array[offset]);
+ vdo_free(string_array);
+}
+
+/*
+ * Split the input string into substrings, separated at occurrences of the indicated character,
+ * returning a null-terminated list of string pointers.
+ *
+ * The string pointers and the pointer array itself should both be freed with vdo_free() when no
+ * longer needed. This can be done with vdo_free_string_array (below) if the pointers in the array
+ * are not changed. Since the array and copied strings are allocated by this function, it may only
+ * be used in contexts where allocation is permitted.
+ *
+ * Empty substrings are not ignored; that is, returned substrings may be empty strings if the
+ * separator occurs twice in a row.
+ */
+static int split_string(const char *string, char separator, char ***substring_array_ptr)
+{
+ unsigned int current_substring = 0, substring_count = 1;
+ const char *s;
+ char **substrings;
+ int result;
+ ptrdiff_t length;
+
+ for (s = string; *s != 0; s++) {
+ if (*s == separator)
+ substring_count++;
+ }
+
+ result = vdo_allocate(substring_count + 1, char *, "string-splitting array",
+ &substrings);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ for (s = string; *s != 0; s++) {
+ if (*s == separator) {
+ ptrdiff_t length = s - string;
+
+ result = vdo_allocate(length + 1, char, "split string",
+ &substrings[current_substring]);
+ if (result != VDO_SUCCESS) {
+ free_string_array(substrings);
+ return result;
+ }
+ /*
+ * Trailing NUL is already in place after allocation; deal with the zero or
+ * more non-NUL bytes in the string.
+ */
+ if (length > 0)
+ memcpy(substrings[current_substring], string, length);
+ string = s + 1;
+ current_substring++;
+ BUG_ON(current_substring >= substring_count);
+ }
+ }
+ /* Process final string, with no trailing separator. */
+ BUG_ON(current_substring != (substring_count - 1));
+ length = strlen(string);
+
+ result = vdo_allocate(length + 1, char, "split string",
+ &substrings[current_substring]);
+ if (result != VDO_SUCCESS) {
+ free_string_array(substrings);
+ return result;
+ }
+ memcpy(substrings[current_substring], string, length);
+ current_substring++;
+ /* substrings[current_substring] is NULL already */
+ *substring_array_ptr = substrings;
+ return VDO_SUCCESS;
+}
+
+/*
+ * Join the input substrings into one string, joined with the indicated character, returning a
+ * string. array_length is a bound on the number of valid elements in substring_array, in case it
+ * is not NULL-terminated.
+ */
+static int join_strings(char **substring_array, size_t array_length, char separator,
+ char **string_ptr)
+{
+ size_t string_length = 0;
+ size_t i;
+ int result;
+ char *output, *current_position;
+
+ for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++)
+ string_length += strlen(substring_array[i]) + 1;
+
+ result = vdo_allocate(string_length, char, __func__, &output);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ current_position = &output[0];
+
+ for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++) {
+ current_position = vdo_append_to_buffer(current_position,
+ output + string_length, "%s",
+ substring_array[i]);
+ *current_position = separator;
+ current_position++;
+ }
+
+ /* We output one too many separators; replace the last with a zero byte. */
+ if (current_position != output)
+ *(current_position - 1) = '\0';
+
+ *string_ptr = output;
+ return VDO_SUCCESS;
+}
+
+/**
+ * parse_bool() - Parse a two-valued option into a bool.
+ * @bool_str: The string value to convert to a bool.
+ * @true_str: The string value which should be converted to true.
+ * @false_str: The string value which should be converted to false.
+ * @bool_ptr: A pointer to return the bool value in.
+ *
+ * Return: VDO_SUCCESS or an error if bool_str is neither true_str nor false_str.
+ */
+static inline int __must_check parse_bool(const char *bool_str, const char *true_str,
+ const char *false_str, bool *bool_ptr)
+{
+ bool value = false;
+
+ if (strcmp(bool_str, true_str) == 0)
+ value = true;
+ else if (strcmp(bool_str, false_str) == 0)
+ value = false;
+ else
+ return VDO_BAD_CONFIGURATION;
+
+ *bool_ptr = value;
+ return VDO_SUCCESS;
+}
+
+/**
+ * process_one_thread_config_spec() - Process one component of a thread parameter configuration
+ * string and update the configuration data structure.
+ * @thread_param_type: The type of thread specified.
+ * @count: The thread count requested.
+ * @config: The configuration data structure to update.
+ *
+ * If the thread count requested is invalid, a message is logged and -EINVAL returned. If the
+ * thread name is unknown, a message is logged but no error is returned.
+ *
+ * Return: VDO_SUCCESS or -EINVAL
+ */
+static int process_one_thread_config_spec(const char *thread_param_type,
+ unsigned int count,
+ struct thread_count_config *config)
+{
+ /* Handle limited thread parameters */
+ if (strcmp(thread_param_type, "bioRotationInterval") == 0) {
+ if (count == 0) {
+ vdo_log_error("thread config string error: 'bioRotationInterval' of at least 1 is required");
+ return -EINVAL;
+ } else if (count > VDO_BIO_ROTATION_INTERVAL_LIMIT) {
+ vdo_log_error("thread config string error: 'bioRotationInterval' cannot be higher than %d",
+ VDO_BIO_ROTATION_INTERVAL_LIMIT);
+ return -EINVAL;
+ }
+ config->bio_rotation_interval = count;
+ return VDO_SUCCESS;
+ }
+ if (strcmp(thread_param_type, "logical") == 0) {
+ if (count > MAX_VDO_LOGICAL_ZONES) {
+ vdo_log_error("thread config string error: at most %d 'logical' threads are allowed",
+ MAX_VDO_LOGICAL_ZONES);
+ return -EINVAL;
+ }
+ config->logical_zones = count;
+ return VDO_SUCCESS;
+ }
+ if (strcmp(thread_param_type, "physical") == 0) {
+ if (count > MAX_VDO_PHYSICAL_ZONES) {
+ vdo_log_error("thread config string error: at most %d 'physical' threads are allowed",
+ MAX_VDO_PHYSICAL_ZONES);
+ return -EINVAL;
+ }
+ config->physical_zones = count;
+ return VDO_SUCCESS;
+ }
+ /* Handle other thread count parameters */
+ if (count > MAXIMUM_VDO_THREADS) {
+ vdo_log_error("thread config string error: at most %d '%s' threads are allowed",
+ MAXIMUM_VDO_THREADS, thread_param_type);
+ return -EINVAL;
+ }
+ if (strcmp(thread_param_type, "hash") == 0) {
+ config->hash_zones = count;
+ return VDO_SUCCESS;
+ }
+ if (strcmp(thread_param_type, "cpu") == 0) {
+ if (count == 0) {
+ vdo_log_error("thread config string error: at least one 'cpu' thread required");
+ return -EINVAL;
+ }
+ config->cpu_threads = count;
+ return VDO_SUCCESS;
+ }
+ if (strcmp(thread_param_type, "ack") == 0) {
+ config->bio_ack_threads = count;
+ return VDO_SUCCESS;
+ }
+ if (strcmp(thread_param_type, "bio") == 0) {
+ if (count == 0) {
+ vdo_log_error("thread config string error: at least one 'bio' thread required");
+ return -EINVAL;
+ }
+ config->bio_threads = count;
+ return VDO_SUCCESS;
+ }
+
+ /*
+ * Don't fail, just log. This will handle version mismatches between user mode tools and
+ * kernel.
+ */
+ vdo_log_info("unknown thread parameter type \"%s\"", thread_param_type);
+ return VDO_SUCCESS;
+}
+
+/**
+ * parse_one_thread_config_spec() - Parse one component of a thread parameter configuration string
+ * and update the configuration data structure.
+ * @spec: The thread parameter specification string.
+ * @config: The configuration data to be updated.
+ */
+static int parse_one_thread_config_spec(const char *spec,
+ struct thread_count_config *config)
+{
+ unsigned int count;
+ char **fields;
+ int result;
+
+ result = split_string(spec, '=', &fields);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if ((fields[0] == NULL) || (fields[1] == NULL) || (fields[2] != NULL)) {
+ vdo_log_error("thread config string error: expected thread parameter assignment, saw \"%s\"",
+ spec);
+ free_string_array(fields);
+ return -EINVAL;
+ }
+
+ result = kstrtouint(fields[1], 10, &count);
+ if (result) {
+ vdo_log_error("thread config string error: integer value needed, found \"%s\"",
+ fields[1]);
+ free_string_array(fields);
+ return result;
+ }
+
+ result = process_one_thread_config_spec(fields[0], count, config);
+ free_string_array(fields);
+ return result;
+}
+
+/**
+ * parse_thread_config_string() - Parse the configuration string passed and update the specified
+ * counts and other parameters of various types of threads to be
+ * created.
+ * @string: Thread parameter configuration string.
+ * @config: The thread configuration data to update.
+ *
+ * The configuration string should contain one or more comma-separated specs of the form
+ * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval",
+ * "logical", "physical", and "hash".
+ *
+ * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop
+ * further parsing.
+ *
+ * This function can't set the "reason" value the caller wants to pass back, because we'd want to
+ * format it to say which field was invalid, and we can't allocate the "reason" strings
+ * dynamically. So if an error occurs, we'll log the details and pass back an error.
+ *
+ * Return: VDO_SUCCESS or -EINVAL or -ENOMEM
+ */
+static int parse_thread_config_string(const char *string,
+ struct thread_count_config *config)
+{
+ int result = VDO_SUCCESS;
+ char **specs;
+
+ if (strcmp(".", string) != 0) {
+ unsigned int i;
+
+ result = split_string(string, ',', &specs);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ for (i = 0; specs[i] != NULL; i++) {
+ result = parse_one_thread_config_spec(specs[i], config);
+ if (result != VDO_SUCCESS)
+ break;
+ }
+ free_string_array(specs);
+ }
+ return result;
+}
+
+/**
+ * process_one_key_value_pair() - Process one component of an optional parameter string and update
+ * the configuration data structure.
+ * @key: The optional parameter key name.
+ * @value: The optional parameter value.
+ * @config: The configuration data structure to update.
+ *
+ * If the value requested is invalid, a message is logged and -EINVAL returned. If the key is
+ * unknown, a message is logged but no error is returned.
+ *
+ * Return: VDO_SUCCESS or -EINVAL
+ */
+static int process_one_key_value_pair(const char *key, unsigned int value,
+ struct device_config *config)
+{
+ /* Non thread optional parameters */
+ if (strcmp(key, "maxDiscard") == 0) {
+ if (value == 0) {
+ vdo_log_error("optional parameter error: at least one max discard block required");
+ return -EINVAL;
+ }
+ /* Max discard sectors in blkdev_issue_discard is UINT_MAX >> 9 */
+ if (value > (UINT_MAX / VDO_BLOCK_SIZE)) {
+ vdo_log_error("optional parameter error: at most %d max discard blocks are allowed",
+ UINT_MAX / VDO_BLOCK_SIZE);
+ return -EINVAL;
+ }
+ config->max_discard_blocks = value;
+ return VDO_SUCCESS;
+ }
+ /* Handles unknown key names */
+ return process_one_thread_config_spec(key, value, &config->thread_counts);
+}
+
+/**
+ * parse_one_key_value_pair() - Parse one key/value pair and update the configuration data
+ * structure.
+ * @key: The optional key name.
+ * @value: The optional value.
+ * @config: The configuration data to be updated.
+ *
+ * Return: VDO_SUCCESS or error.
+ */
+static int parse_one_key_value_pair(const char *key, const char *value,
+ struct device_config *config)
+{
+ unsigned int count;
+ int result;
+
+ if (strcmp(key, "deduplication") == 0)
+ return parse_bool(value, "on", "off", &config->deduplication);
+
+ if (strcmp(key, "compression") == 0)
+ return parse_bool(value, "on", "off", &config->compression);
+
+ /* The remaining arguments must have integral values. */
+ result = kstrtouint(value, 10, &count);
+ if (result) {
+ vdo_log_error("optional config string error: integer value needed, found \"%s\"",
+ value);
+ return result;
+ }
+ return process_one_key_value_pair(key, count, config);
+}
+
+/**
+ * parse_key_value_pairs() - Parse all key/value pairs from a list of arguments.
+ * @argc: The total number of arguments in list.
+ * @argv: The list of key/value pairs.
+ * @config: The device configuration data to update.
+ *
+ * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop
+ * further parsing.
+ *
+ * This function can't set the "reason" value the caller wants to pass back, because we'd want to
+ * format it to say which field was invalid, and we can't allocate the "reason" strings
+ * dynamically. So if an error occurs, we'll log the details and return the error.
+ *
+ * Return: VDO_SUCCESS or error
+ */
+static int parse_key_value_pairs(int argc, char **argv, struct device_config *config)
+{
+ int result = VDO_SUCCESS;
+
+ while (argc) {
+ result = parse_one_key_value_pair(argv[0], argv[1], config);
+ if (result != VDO_SUCCESS)
+ break;
+
+ argc -= 2;
+ argv += 2;
+ }
+
+ return result;
+}
+
+/**
+ * parse_optional_arguments() - Parse the configuration string passed in for optional arguments.
+ * @arg_set: The structure holding the arguments to parse.
+ * @error_ptr: Pointer to a buffer to hold the error string.
+ * @config: Pointer to device configuration data to update.
+ *
+ * For V0/V1 configurations, there will only be one optional parameter; the thread configuration.
+ * The configuration string should contain one or more comma-separated specs of the form
+ * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval",
+ * "logical", "physical", and "hash".
+ *
+ * For V2 configurations and beyond, there could be any number of arguments. They should contain
+ * one or more key/value pairs separated by a space.
+ *
+ * Return: VDO_SUCCESS or error
+ */
+static int parse_optional_arguments(struct dm_arg_set *arg_set, char **error_ptr,
+ struct device_config *config)
+{
+ int result = VDO_SUCCESS;
+
+ if (config->version == 0 || config->version == 1) {
+ result = parse_thread_config_string(arg_set->argv[0],
+ &config->thread_counts);
+ if (result != VDO_SUCCESS) {
+ *error_ptr = "Invalid thread-count configuration";
+ return VDO_BAD_CONFIGURATION;
+ }
+ } else {
+ if ((arg_set->argc % 2) != 0) {
+ *error_ptr = "Odd number of optional arguments given but they should be <key> <value> pairs";
+ return VDO_BAD_CONFIGURATION;
+ }
+ result = parse_key_value_pairs(arg_set->argc, arg_set->argv, config);
+ if (result != VDO_SUCCESS) {
+ *error_ptr = "Invalid optional argument configuration";
+ return VDO_BAD_CONFIGURATION;
+ }
+ }
+ return result;
+}
+
+/**
+ * handle_parse_error() - Handle a parsing error.
+ * @config: The config to free.
+ * @error_ptr: A place to store a constant string about the error.
+ * @error_str: A constant string to store in error_ptr.
+ */
+static void handle_parse_error(struct device_config *config, char **error_ptr,
+ char *error_str)
+{
+ free_device_config(config);
+ *error_ptr = error_str;
+}
+
+/**
+ * parse_device_config() - Convert the dmsetup table into a struct device_config.
+ * @argc: The number of table values.
+ * @argv: The array of table values.
+ * @ti: The target structure for this table.
+ * @config_ptr: A pointer to return the allocated config.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int parse_device_config(int argc, char **argv, struct dm_target *ti,
+ struct device_config **config_ptr)
+{
+ bool enable_512e;
+ size_t logical_bytes = to_bytes(ti->len);
+ struct dm_arg_set arg_set;
+ char **error_ptr = &ti->error;
+ struct device_config *config = NULL;
+ int result;
+
+ if ((logical_bytes % VDO_BLOCK_SIZE) != 0) {
+ handle_parse_error(config, error_ptr,
+ "Logical size must be a multiple of 4096");
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ if (argc == 0) {
+ handle_parse_error(config, error_ptr, "Incorrect number of arguments");
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ result = vdo_allocate(1, struct device_config, "device_config", &config);
+ if (result != VDO_SUCCESS) {
+ handle_parse_error(config, error_ptr,
+ "Could not allocate config structure");
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ config->owning_target = ti;
+ config->logical_blocks = logical_bytes / VDO_BLOCK_SIZE;
+ INIT_LIST_HEAD(&config->config_list);
+
+ /* Save the original string. */
+ result = join_strings(argv, argc, ' ', &config->original_string);
+ if (result != VDO_SUCCESS) {
+ handle_parse_error(config, error_ptr, "Could not populate string");
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ vdo_log_info("table line: %s", config->original_string);
+
+ config->thread_counts = (struct thread_count_config) {
+ .bio_ack_threads = 1,
+ .bio_threads = DEFAULT_VDO_BIO_SUBMIT_QUEUE_COUNT,
+ .bio_rotation_interval = DEFAULT_VDO_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL,
+ .cpu_threads = 1,
+ .logical_zones = 0,
+ .physical_zones = 0,
+ .hash_zones = 0,
+ };
+ config->max_discard_blocks = 1;
+ config->deduplication = true;
+ config->compression = false;
+
+ arg_set.argc = argc;
+ arg_set.argv = argv;
+
+ result = get_version_number(argc, argv, error_ptr, &config->version);
+ if (result != VDO_SUCCESS) {
+ /* get_version_number sets error_ptr itself. */
+ handle_parse_error(config, error_ptr, *error_ptr);
+ return result;
+ }
+ /* Move the arg pointer forward only if the argument was there. */
+ if (config->version >= 1)
+ dm_shift_arg(&arg_set);
+
+ result = vdo_duplicate_string(dm_shift_arg(&arg_set), "parent device name",
+ &config->parent_device_name);
+ if (result != VDO_SUCCESS) {
+ handle_parse_error(config, error_ptr,
+ "Could not copy parent device name");
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ /* Get the physical blocks, if known. */
+ if (config->version >= 1) {
+ result = kstrtoull(dm_shift_arg(&arg_set), 10, &config->physical_blocks);
+ if (result != VDO_SUCCESS) {
+ handle_parse_error(config, error_ptr,
+ "Invalid physical block count");
+ return VDO_BAD_CONFIGURATION;
+ }
+ }
+
+ /* Get the logical block size and validate */
+ result = parse_bool(dm_shift_arg(&arg_set), "512", "4096", &enable_512e);
+ if (result != VDO_SUCCESS) {
+ handle_parse_error(config, error_ptr, "Invalid logical block size");
+ return VDO_BAD_CONFIGURATION;
+ }
+ config->logical_block_size = (enable_512e ? 512 : 4096);
+
+ /* Skip past the two no longer used read cache options. */
+ if (config->version <= 1)
+ dm_consume_args(&arg_set, 2);
+
+ /* Get the page cache size. */
+ result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->cache_size);
+ if (result != VDO_SUCCESS) {
+ handle_parse_error(config, error_ptr,
+ "Invalid block map page cache size");
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ /* Get the block map era length. */
+ result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->block_map_maximum_age);
+ if (result != VDO_SUCCESS) {
+ handle_parse_error(config, error_ptr, "Invalid block map maximum age");
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ /* Skip past the no longer used MD RAID5 optimization mode */
+ if (config->version <= 2)
+ dm_consume_args(&arg_set, 1);
+
+ /* Skip past the no longer used write policy setting */
+ if (config->version <= 3)
+ dm_consume_args(&arg_set, 1);
+
+ /* Skip past the no longer used pool name for older table lines */
+ if (config->version <= 2) {
+ /*
+ * Make sure the enum to get the pool name from argv directly is still in sync with
+ * the parsing of the table line.
+ */
+ if (&arg_set.argv[0] != &argv[POOL_NAME_ARG_INDEX[config->version]]) {
+ handle_parse_error(config, error_ptr,
+ "Pool name not in expected location");
+ return VDO_BAD_CONFIGURATION;
+ }
+ dm_shift_arg(&arg_set);
+ }
+
+ /* Get the optional arguments and validate. */
+ result = parse_optional_arguments(&arg_set, error_ptr, config);
+ if (result != VDO_SUCCESS) {
+ /* parse_optional_arguments sets error_ptr itself. */
+ handle_parse_error(config, error_ptr, *error_ptr);
+ return result;
+ }
+
+ /*
+ * Logical, physical, and hash zone counts can all be zero; then we get one thread doing
+ * everything, our older configuration. If any zone count is non-zero, the others must be
+ * as well.
+ */
+ if (((config->thread_counts.logical_zones == 0) !=
+ (config->thread_counts.physical_zones == 0)) ||
+ ((config->thread_counts.physical_zones == 0) !=
+ (config->thread_counts.hash_zones == 0))) {
+ handle_parse_error(config, error_ptr,
+ "Logical, physical, and hash zones counts must all be zero or all non-zero");
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ if (config->cache_size <
+ (2 * MAXIMUM_VDO_USER_VIOS * config->thread_counts.logical_zones)) {
+ handle_parse_error(config, error_ptr,
+ "Insufficient block map cache for logical zones");
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ result = dm_get_device(ti, config->parent_device_name,
+ dm_table_get_mode(ti->table), &config->owned_device);
+ if (result != 0) {
+ vdo_log_error("couldn't open device \"%s\": error %d",
+ config->parent_device_name, result);
+ handle_parse_error(config, error_ptr, "Unable to open storage device");
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ if (config->version == 0) {
+ u64 device_size = i_size_read(config->owned_device->bdev->bd_inode);
+
+ config->physical_blocks = device_size / VDO_BLOCK_SIZE;
+ }
+
+ *config_ptr = config;
+ return result;
+}
+
+static struct vdo *get_vdo_for_target(struct dm_target *ti)
+{
+ return ((struct device_config *) ti->private)->vdo;
+}
+
+
+static int vdo_map_bio(struct dm_target *ti, struct bio *bio)
+{
+ struct vdo *vdo = get_vdo_for_target(ti);
+ struct vdo_work_queue *current_work_queue;
+ const struct admin_state_code *code = vdo_get_admin_state_code(&vdo->admin.state);
+
+ VDO_ASSERT_LOG_ONLY(code->normal, "vdo should not receive bios while in state %s",
+ code->name);
+
+ /* Count all incoming bios. */
+ vdo_count_bios(&vdo->stats.bios_in, bio);
+
+
+ /* Handle empty bios. Empty flush bios are not associated with a vio. */
+ if ((bio_op(bio) == REQ_OP_FLUSH) || ((bio->bi_opf & REQ_PREFLUSH) != 0)) {
+ vdo_launch_flush(vdo, bio);
+ return DM_MAPIO_SUBMITTED;
+ }
+
+ /* This could deadlock, */
+ current_work_queue = vdo_get_current_work_queue();
+ BUG_ON((current_work_queue != NULL) &&
+ (vdo == vdo_get_work_queue_owner(current_work_queue)->vdo));
+ vdo_launch_bio(vdo->data_vio_pool, bio);
+ return DM_MAPIO_SUBMITTED;
+}
+
+static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+ struct vdo *vdo = get_vdo_for_target(ti);
+
+ limits->logical_block_size = vdo->device_config->logical_block_size;
+ limits->physical_block_size = VDO_BLOCK_SIZE;
+
+ /* The minimum io size for random io */
+ blk_limits_io_min(limits, VDO_BLOCK_SIZE);
+ /* The optimal io size for streamed/sequential io */
+ blk_limits_io_opt(limits, VDO_BLOCK_SIZE);
+
+ /*
+ * Sets the maximum discard size that will be passed into VDO. This value comes from a
+ * table line value passed in during dmsetup create.
+ *
+ * The value 1024 is the largest usable value on HD systems. A 2048 sector discard on a
+ * busy HD system takes 31 seconds. We should use a value no higher than 1024, which takes
+ * 15 to 16 seconds on a busy HD system. However, using large values results in 120 second
+ * blocked task warnings in kernel logs. In order to avoid these warnings, we choose to
+ * use the smallest reasonable value.
+ *
+ * The value is used by dm-thin to determine whether to pass down discards. The block layer
+ * splits large discards on this boundary when this is set.
+ */
+ limits->max_discard_sectors =
+ (vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK);
+
+ /*
+ * Force discards to not begin or end with a partial block by stating the granularity is
+ * 4k.
+ */
+ limits->discard_granularity = VDO_BLOCK_SIZE;
+}
+
+static int vdo_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn,
+ void *data)
+{
+ struct device_config *config = get_vdo_for_target(ti)->device_config;
+
+ return fn(ti, config->owned_device, 0,
+ config->physical_blocks * VDO_SECTORS_PER_BLOCK, data);
+}
+
+/*
+ * Status line is:
+ * <device> <operating mode> <in recovery> <index state> <compression state>
+ * <used physical blocks> <total physical blocks>
+ */
+
+static void vdo_status(struct dm_target *ti, status_type_t status_type,
+ unsigned int status_flags, char *result, unsigned int maxlen)
+{
+ struct vdo *vdo = get_vdo_for_target(ti);
+ struct vdo_statistics *stats;
+ struct device_config *device_config;
+ /* N.B.: The DMEMIT macro uses the variables named "sz", "result", "maxlen". */
+ int sz = 0;
+
+ switch (status_type) {
+ case STATUSTYPE_INFO:
+ /* Report info for dmsetup status */
+ mutex_lock(&vdo->stats_mutex);
+ vdo_fetch_statistics(vdo, &vdo->stats_buffer);
+ stats = &vdo->stats_buffer;
+
+ DMEMIT("/dev/%pg %s %s %s %s %llu %llu",
+ vdo_get_backing_device(vdo), stats->mode,
+ stats->in_recovery_mode ? "recovering" : "-",
+ vdo_get_dedupe_index_state_name(vdo->hash_zones),
+ vdo_get_compressing(vdo) ? "online" : "offline",
+ stats->data_blocks_used + stats->overhead_blocks_used,
+ stats->physical_blocks);
+ mutex_unlock(&vdo->stats_mutex);
+ break;
+
+ case STATUSTYPE_TABLE:
+ /* Report the string actually specified in the beginning. */
+ device_config = (struct device_config *) ti->private;
+ DMEMIT("%s", device_config->original_string);
+ break;
+
+ case STATUSTYPE_IMA:
+ /* FIXME: We ought to be more detailed here, but this is what thin does. */
+ *result = '\0';
+ break;
+ }
+}
+
+static block_count_t __must_check get_underlying_device_block_count(const struct vdo *vdo)
+{
+ return i_size_read(vdo_get_backing_device(vdo)->bd_inode) / VDO_BLOCK_SIZE;
+}
+
+static int __must_check process_vdo_message_locked(struct vdo *vdo, unsigned int argc,
+ char **argv)
+{
+ if ((argc == 2) && (strcasecmp(argv[0], "compression") == 0)) {
+ if (strcasecmp(argv[1], "on") == 0) {
+ vdo_set_compressing(vdo, true);
+ return 0;
+ }
+
+ if (strcasecmp(argv[1], "off") == 0) {
+ vdo_set_compressing(vdo, false);
+ return 0;
+ }
+
+ vdo_log_warning("invalid argument '%s' to dmsetup compression message",
+ argv[1]);
+ return -EINVAL;
+ }
+
+ vdo_log_warning("unrecognized dmsetup message '%s' received", argv[0]);
+ return -EINVAL;
+}
+
+/*
+ * If the message is a dump, just do it. Otherwise, check that no other message is being processed,
+ * and only proceed if so.
+ * Returns -EBUSY if another message is being processed
+ */
+static int __must_check process_vdo_message(struct vdo *vdo, unsigned int argc,
+ char **argv)
+{
+ int result;
+
+ /*
+ * All messages which may be processed in parallel with other messages should be handled
+ * here before the atomic check below. Messages which should be exclusive should be
+ * processed in process_vdo_message_locked().
+ */
+
+ /* Dump messages should always be processed */
+ if (strcasecmp(argv[0], "dump") == 0)
+ return vdo_dump(vdo, argc, argv, "dmsetup message");
+
+ if (argc == 1) {
+ if (strcasecmp(argv[0], "dump-on-shutdown") == 0) {
+ vdo->dump_on_shutdown = true;
+ return 0;
+ }
+
+ /* Index messages should always be processed */
+ if ((strcasecmp(argv[0], "index-close") == 0) ||
+ (strcasecmp(argv[0], "index-create") == 0) ||
+ (strcasecmp(argv[0], "index-disable") == 0) ||
+ (strcasecmp(argv[0], "index-enable") == 0))
+ return vdo_message_dedupe_index(vdo->hash_zones, argv[0]);
+ }
+
+ if (atomic_cmpxchg(&vdo->processing_message, 0, 1) != 0)
+ return -EBUSY;
+
+ result = process_vdo_message_locked(vdo, argc, argv);
+
+ /* Pairs with the implicit barrier in cmpxchg just above */
+ smp_wmb();
+ atomic_set(&vdo->processing_message, 0);
+ return result;
+}
+
+static int vdo_message(struct dm_target *ti, unsigned int argc, char **argv,
+ char *result_buffer, unsigned int maxlen)
+{
+ struct registered_thread allocating_thread, instance_thread;
+ struct vdo *vdo;
+ int result;
+
+ if (argc == 0) {
+ vdo_log_warning("unspecified dmsetup message");
+ return -EINVAL;
+ }
+
+ vdo = get_vdo_for_target(ti);
+ vdo_register_allocating_thread(&allocating_thread, NULL);
+ vdo_register_thread_device_id(&instance_thread, &vdo->instance);
+
+ /*
+ * Must be done here so we don't map return codes. The code in dm-ioctl expects a 1 for a
+ * return code to look at the buffer and see if it is full or not.
+ */
+ if ((argc == 1) && (strcasecmp(argv[0], "stats") == 0)) {
+ vdo_write_stats(vdo, result_buffer, maxlen);
+ result = 1;
+ } else {
+ result = vdo_status_to_errno(process_vdo_message(vdo, argc, argv));
+ }
+
+ vdo_unregister_thread_device_id();
+ vdo_unregister_allocating_thread();
+ return result;
+}
+
+static void configure_target_capabilities(struct dm_target *ti)
+{
+ ti->discards_supported = 1;
+ ti->flush_supported = true;
+ ti->num_discard_bios = 1;
+ ti->num_flush_bios = 1;
+
+ /*
+ * If this value changes, please make sure to update the value for max_discard_sectors
+ * accordingly.
+ */
+ BUG_ON(dm_set_target_max_io_len(ti, VDO_SECTORS_PER_BLOCK) != 0);
+}
+
+/*
+ * Implements vdo_filter_fn.
+ */
+static bool vdo_uses_device(struct vdo *vdo, const void *context)
+{
+ const struct device_config *config = context;
+
+ return vdo_get_backing_device(vdo)->bd_dev == config->owned_device->bdev->bd_dev;
+}
+
+/**
+ * get_thread_id_for_phase() - Get the thread id for the current phase of the admin operation in
+ * progress.
+ */
+static thread_id_t __must_check get_thread_id_for_phase(struct vdo *vdo)
+{
+ switch (vdo->admin.phase) {
+ case RESUME_PHASE_PACKER:
+ case RESUME_PHASE_FLUSHER:
+ case SUSPEND_PHASE_PACKER:
+ case SUSPEND_PHASE_FLUSHES:
+ return vdo->thread_config.packer_thread;
+
+ case RESUME_PHASE_DATA_VIOS:
+ case SUSPEND_PHASE_DATA_VIOS:
+ return vdo->thread_config.cpu_thread;
+
+ case LOAD_PHASE_DRAIN_JOURNAL:
+ case RESUME_PHASE_JOURNAL:
+ case SUSPEND_PHASE_JOURNAL:
+ return vdo->thread_config.journal_thread;
+
+ default:
+ return vdo->thread_config.admin_thread;
+ }
+}
+
+static struct vdo_completion *prepare_admin_completion(struct vdo *vdo,
+ vdo_action_fn callback,
+ vdo_action_fn error_handler)
+{
+ struct vdo_completion *completion = &vdo->admin.completion;
+
+ /*
+ * We can't use vdo_prepare_completion_for_requeue() here because we don't want to reset
+ * any error in the completion.
+ */
+ completion->callback = callback;
+ completion->error_handler = error_handler;
+ completion->callback_thread_id = get_thread_id_for_phase(vdo);
+ completion->requeue = true;
+ return completion;
+}
+
+/**
+ * advance_phase() - Increment the phase of the current admin operation and prepare the admin
+ * completion to run on the thread for the next phase.
+ * @vdo: The on which an admin operation is being performed
+ *
+ * Return: The current phase
+ */
+static u32 advance_phase(struct vdo *vdo)
+{
+ u32 phase = vdo->admin.phase++;
+
+ vdo->admin.completion.callback_thread_id = get_thread_id_for_phase(vdo);
+ vdo->admin.completion.requeue = true;
+ return phase;
+}
+
+/*
+ * Perform an administrative operation (load, suspend, grow logical, or grow physical). This method
+ * should not be called from vdo threads.
+ */
+static int perform_admin_operation(struct vdo *vdo, u32 starting_phase,
+ vdo_action_fn callback, vdo_action_fn error_handler,
+ const char *type)
+{
+ int result;
+ struct vdo_administrator *admin = &vdo->admin;
+
+ if (atomic_cmpxchg(&admin->busy, 0, 1) != 0) {
+ return vdo_log_error_strerror(VDO_COMPONENT_BUSY,
+ "Can't start %s operation, another operation is already in progress",
+ type);
+ }
+
+ admin->phase = starting_phase;
+ reinit_completion(&admin->callback_sync);
+ vdo_reset_completion(&admin->completion);
+ vdo_launch_completion(prepare_admin_completion(vdo, callback, error_handler));
+
+ /*
+ * Using the "interruptible" interface means that Linux will not log a message when we wait
+ * for more than 120 seconds.
+ */
+ while (wait_for_completion_interruptible(&admin->callback_sync)) {
+ /* However, if we get a signal in a user-mode process, we could spin... */
+ fsleep(1000);
+ }
+
+ result = admin->completion.result;
+ /* pairs with implicit barrier in cmpxchg above */
+ smp_wmb();
+ atomic_set(&admin->busy, 0);
+ return result;
+}
+
+/* Assert that we are operating on the correct thread for the current phase. */
+static void assert_admin_phase_thread(struct vdo *vdo, const char *what)
+{
+ VDO_ASSERT_LOG_ONLY(vdo_get_callback_thread_id() == get_thread_id_for_phase(vdo),
+ "%s on correct thread for %s", what,
+ ADMIN_PHASE_NAMES[vdo->admin.phase]);
+}
+
+/**
+ * finish_operation_callback() - Callback to finish an admin operation.
+ * @completion: The admin_completion.
+ */
+static void finish_operation_callback(struct vdo_completion *completion)
+{
+ struct vdo_administrator *admin = &completion->vdo->admin;
+
+ vdo_finish_operation(&admin->state, completion->result);
+ complete(&admin->callback_sync);
+}
+
+/**
+ * decode_from_super_block() - Decode the VDO state from the super block and validate that it is
+ * correct.
+ * @vdo: The vdo being loaded.
+ *
+ * On error from this method, the component states must be destroyed explicitly. If this method
+ * returns successfully, the component states must not be destroyed.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int __must_check decode_from_super_block(struct vdo *vdo)
+{
+ const struct device_config *config = vdo->device_config;
+ int result;
+
+ result = vdo_decode_component_states(vdo->super_block.buffer, &vdo->geometry,
+ &vdo->states);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo_set_state(vdo, vdo->states.vdo.state);
+ vdo->load_state = vdo->states.vdo.state;
+
+ /*
+ * If the device config specifies a larger logical size than was recorded in the super
+ * block, just accept it.
+ */
+ if (vdo->states.vdo.config.logical_blocks < config->logical_blocks) {
+ vdo_log_warning("Growing logical size: a logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
+ (unsigned long long) config->logical_blocks,
+ (unsigned long long) vdo->states.vdo.config.logical_blocks);
+ vdo->states.vdo.config.logical_blocks = config->logical_blocks;
+ }
+
+ result = vdo_validate_component_states(&vdo->states, vdo->geometry.nonce,
+ config->physical_blocks,
+ config->logical_blocks);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo->layout = vdo->states.layout;
+ return VDO_SUCCESS;
+}
+
+/**
+ * decode_vdo() - Decode the component data portion of a super block and fill in the corresponding
+ * portions of the vdo being loaded.
+ * @vdo: The vdo being loaded.
+ *
+ * This will also allocate the recovery journal and slab depot. If this method is called with an
+ * asynchronous layer (i.e. a thread config which specifies at least one base thread), the block
+ * map and packer will be constructed as well.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int __must_check decode_vdo(struct vdo *vdo)
+{
+ block_count_t maximum_age, journal_length;
+ struct partition *partition;
+ int result;
+
+ result = decode_from_super_block(vdo);
+ if (result != VDO_SUCCESS) {
+ vdo_destroy_component_states(&vdo->states);
+ return result;
+ }
+
+ maximum_age = vdo_convert_maximum_age(vdo->device_config->block_map_maximum_age);
+ journal_length =
+ vdo_get_recovery_journal_length(vdo->states.vdo.config.recovery_journal_size);
+ if (maximum_age > (journal_length / 2)) {
+ return vdo_log_error_strerror(VDO_BAD_CONFIGURATION,
+ "maximum age: %llu exceeds limit %llu",
+ (unsigned long long) maximum_age,
+ (unsigned long long) (journal_length / 2));
+ }
+
+ if (maximum_age == 0) {
+ return vdo_log_error_strerror(VDO_BAD_CONFIGURATION,
+ "maximum age must be greater than 0");
+ }
+
+ result = vdo_enable_read_only_entry(vdo);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ partition = vdo_get_known_partition(&vdo->layout,
+ VDO_RECOVERY_JOURNAL_PARTITION);
+ result = vdo_decode_recovery_journal(vdo->states.recovery_journal,
+ vdo->states.vdo.nonce, vdo, partition,
+ vdo->states.vdo.complete_recoveries,
+ vdo->states.vdo.config.recovery_journal_size,
+ &vdo->recovery_journal);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ partition = vdo_get_known_partition(&vdo->layout, VDO_SLAB_SUMMARY_PARTITION);
+ result = vdo_decode_slab_depot(vdo->states.slab_depot, vdo, partition,
+ &vdo->depot);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_decode_block_map(vdo->states.block_map,
+ vdo->states.vdo.config.logical_blocks, vdo,
+ vdo->recovery_journal, vdo->states.vdo.nonce,
+ vdo->device_config->cache_size, maximum_age,
+ &vdo->block_map);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_make_physical_zones(vdo, &vdo->physical_zones);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /* The logical zones depend on the physical zones already existing. */
+ result = vdo_make_logical_zones(vdo, &vdo->logical_zones);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ return vdo_make_hash_zones(vdo, &vdo->hash_zones);
+}
+
+/**
+ * pre_load_callback() - Callback to initiate a pre-load, registered in vdo_initialize().
+ * @completion: The admin completion.
+ */
+static void pre_load_callback(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+ int result;
+
+ assert_admin_phase_thread(vdo, __func__);
+
+ switch (advance_phase(vdo)) {
+ case PRE_LOAD_PHASE_START:
+ result = vdo_start_operation(&vdo->admin.state,
+ VDO_ADMIN_STATE_PRE_LOADING);
+ if (result != VDO_SUCCESS) {
+ vdo_continue_completion(completion, result);
+ return;
+ }
+
+ vdo_load_super_block(vdo, completion);
+ return;
+
+ case PRE_LOAD_PHASE_LOAD_COMPONENTS:
+ vdo_continue_completion(completion, decode_vdo(vdo));
+ return;
+
+ case PRE_LOAD_PHASE_END:
+ break;
+
+ default:
+ vdo_set_completion_result(completion, UDS_BAD_STATE);
+ }
+
+ finish_operation_callback(completion);
+}
+
+static void release_instance(unsigned int instance)
+{
+ mutex_lock(&instances_lock);
+ if (instance >= instances.bit_count) {
+ VDO_ASSERT_LOG_ONLY(false,
+ "instance number %u must be less than bit count %u",
+ instance, instances.bit_count);
+ } else if (test_bit(instance, instances.words) == 0) {
+ VDO_ASSERT_LOG_ONLY(false, "instance number %u must be allocated", instance);
+ } else {
+ __clear_bit(instance, instances.words);
+ instances.count -= 1;
+ }
+ mutex_unlock(&instances_lock);
+}
+
+static void set_device_config(struct dm_target *ti, struct vdo *vdo,
+ struct device_config *config)
+{
+ list_del_init(&config->config_list);
+ list_add_tail(&config->config_list, &vdo->device_config_list);
+ config->vdo = vdo;
+ ti->private = config;
+ configure_target_capabilities(ti);
+}
+
+static int vdo_initialize(struct dm_target *ti, unsigned int instance,
+ struct device_config *config)
+{
+ struct vdo *vdo;
+ int result;
+ u64 block_size = VDO_BLOCK_SIZE;
+ u64 logical_size = to_bytes(ti->len);
+ block_count_t logical_blocks = logical_size / block_size;
+
+ vdo_log_info("loading device '%s'", vdo_get_device_name(ti));
+ vdo_log_debug("Logical block size = %llu", (u64) config->logical_block_size);
+ vdo_log_debug("Logical blocks = %llu", logical_blocks);
+ vdo_log_debug("Physical block size = %llu", (u64) block_size);
+ vdo_log_debug("Physical blocks = %llu", config->physical_blocks);
+ vdo_log_debug("Block map cache blocks = %u", config->cache_size);
+ vdo_log_debug("Block map maximum age = %u", config->block_map_maximum_age);
+ vdo_log_debug("Deduplication = %s", (config->deduplication ? "on" : "off"));
+ vdo_log_debug("Compression = %s", (config->compression ? "on" : "off"));
+
+ vdo = vdo_find_matching(vdo_uses_device, config);
+ if (vdo != NULL) {
+ vdo_log_error("Existing vdo already uses device %s",
+ vdo->device_config->parent_device_name);
+ ti->error = "Cannot share storage device with already-running VDO";
+ return VDO_BAD_CONFIGURATION;
+ }
+
+ result = vdo_make(instance, config, &ti->error, &vdo);
+ if (result != VDO_SUCCESS) {
+ vdo_log_error("Could not create VDO device. (VDO error %d, message %s)",
+ result, ti->error);
+ vdo_destroy(vdo);
+ return result;
+ }
+
+ result = perform_admin_operation(vdo, PRE_LOAD_PHASE_START, pre_load_callback,
+ finish_operation_callback, "pre-load");
+ if (result != VDO_SUCCESS) {
+ ti->error = ((result == VDO_INVALID_ADMIN_STATE) ?
+ "Pre-load is only valid immediately after initialization" :
+ "Cannot load metadata from device");
+ vdo_log_error("Could not start VDO device. (VDO error %d, message %s)",
+ result, ti->error);
+ vdo_destroy(vdo);
+ return result;
+ }
+
+ set_device_config(ti, vdo, config);
+ vdo->device_config = config;
+ return VDO_SUCCESS;
+}
+
+/* Implements vdo_filter_fn. */
+static bool __must_check vdo_is_named(struct vdo *vdo, const void *context)
+{
+ struct dm_target *ti = vdo->device_config->owning_target;
+ const char *device_name = vdo_get_device_name(ti);
+
+ return strcmp(device_name, context) == 0;
+}
+
+/**
+ * get_bit_array_size() - Return the number of bytes needed to store a bit array of the specified
+ * capacity in an array of unsigned longs.
+ * @bit_count: The number of bits the array must hold.
+ *
+ * Return: the number of bytes needed for the array representation.
+ */
+static size_t get_bit_array_size(unsigned int bit_count)
+{
+ /* Round up to a multiple of the word size and convert to a byte count. */
+ return (BITS_TO_LONGS(bit_count) * sizeof(unsigned long));
+}
+
+/**
+ * grow_bit_array() - Re-allocate the bitmap word array so there will more instance numbers that
+ * can be allocated.
+ *
+ * Since the array is initially NULL, this also initializes the array the first time we allocate an
+ * instance number.
+ *
+ * Return: VDO_SUCCESS or an error code from the allocation
+ */
+static int grow_bit_array(void)
+{
+ unsigned int new_count = max(instances.bit_count + BIT_COUNT_INCREMENT,
+ (unsigned int) BIT_COUNT_MINIMUM);
+ unsigned long *new_words;
+ int result;
+
+ result = vdo_reallocate_memory(instances.words,
+ get_bit_array_size(instances.bit_count),
+ get_bit_array_size(new_count),
+ "instance number bit array", &new_words);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ instances.bit_count = new_count;
+ instances.words = new_words;
+ return VDO_SUCCESS;
+}
+
+/**
+ * allocate_instance() - Allocate an instance number.
+ * @instance_ptr: A point to hold the instance number
+ *
+ * Return: VDO_SUCCESS or an error code
+ *
+ * This function must be called while holding the instances lock.
+ */
+static int allocate_instance(unsigned int *instance_ptr)
+{
+ unsigned int instance;
+ int result;
+
+ /* If there are no unallocated instances, grow the bit array. */
+ if (instances.count >= instances.bit_count) {
+ result = grow_bit_array();
+ if (result != VDO_SUCCESS)
+ return result;
+ }
+
+ /*
+ * There must be a zero bit somewhere now. Find it, starting just after the last instance
+ * allocated.
+ */
+ instance = find_next_zero_bit(instances.words, instances.bit_count,
+ instances.next);
+ if (instance >= instances.bit_count) {
+ /* Nothing free after next, so wrap around to instance zero. */
+ instance = find_first_zero_bit(instances.words, instances.bit_count);
+ result = VDO_ASSERT(instance < instances.bit_count,
+ "impossibly, no zero bit found");
+ if (result != VDO_SUCCESS)
+ return result;
+ }
+
+ __set_bit(instance, instances.words);
+ instances.count++;
+ instances.next = instance + 1;
+ *instance_ptr = instance;
+ return VDO_SUCCESS;
+}
+
+static int construct_new_vdo_registered(struct dm_target *ti, unsigned int argc,
+ char **argv, unsigned int instance)
+{
+ int result;
+ struct device_config *config;
+
+ result = parse_device_config(argc, argv, ti, &config);
+ if (result != VDO_SUCCESS) {
+ vdo_log_error_strerror(result, "parsing failed: %s", ti->error);
+ release_instance(instance);
+ return -EINVAL;
+ }
+
+ /* Beyond this point, the instance number will be cleaned up for us if needed */
+ result = vdo_initialize(ti, instance, config);
+ if (result != VDO_SUCCESS) {
+ release_instance(instance);
+ free_device_config(config);
+ return vdo_status_to_errno(result);
+ }
+
+ return VDO_SUCCESS;
+}
+
+static int construct_new_vdo(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ int result;
+ unsigned int instance;
+ struct registered_thread instance_thread;
+
+ mutex_lock(&instances_lock);
+ result = allocate_instance(&instance);
+ mutex_unlock(&instances_lock);
+ if (result != VDO_SUCCESS)
+ return -ENOMEM;
+
+ vdo_register_thread_device_id(&instance_thread, &instance);
+ result = construct_new_vdo_registered(ti, argc, argv, instance);
+ vdo_unregister_thread_device_id();
+ return result;
+}
+
+/**
+ * check_may_grow_physical() - Callback to check that we're not in recovery mode, used in
+ * vdo_prepare_to_grow_physical().
+ * @completion: The admin completion.
+ */
+static void check_may_grow_physical(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+
+ assert_admin_phase_thread(vdo, __func__);
+
+ /* These checks can only be done from a vdo thread. */
+ if (vdo_is_read_only(vdo))
+ vdo_set_completion_result(completion, VDO_READ_ONLY);
+
+ if (vdo_in_recovery_mode(vdo))
+ vdo_set_completion_result(completion, VDO_RETRY_AFTER_REBUILD);
+
+ finish_operation_callback(completion);
+}
+
+static block_count_t get_partition_size(struct layout *layout, enum partition_id id)
+{
+ return vdo_get_known_partition(layout, id)->count;
+}
+
+/**
+ * grow_layout() - Make the layout for growing a vdo.
+ * @vdo: The vdo preparing to grow.
+ * @old_size: The current size of the vdo.
+ * @new_size: The size to which the vdo will be grown.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int grow_layout(struct vdo *vdo, block_count_t old_size, block_count_t new_size)
+{
+ int result;
+ block_count_t min_new_size;
+
+ if (vdo->next_layout.size == new_size) {
+ /* We are already prepared to grow to the new size, so we're done. */
+ return VDO_SUCCESS;
+ }
+
+ /* Make a copy completion if there isn't one */
+ if (vdo->partition_copier == NULL) {
+ vdo->partition_copier = dm_kcopyd_client_create(NULL);
+ if (IS_ERR(vdo->partition_copier)) {
+ result = PTR_ERR(vdo->partition_copier);
+ vdo->partition_copier = NULL;
+ return result;
+ }
+ }
+
+ /* Free any unused preparation. */
+ vdo_uninitialize_layout(&vdo->next_layout);
+
+ /*
+ * Make a new layout with the existing partition sizes for everything but the slab depot
+ * partition.
+ */
+ result = vdo_initialize_layout(new_size, vdo->layout.start,
+ get_partition_size(&vdo->layout,
+ VDO_BLOCK_MAP_PARTITION),
+ get_partition_size(&vdo->layout,
+ VDO_RECOVERY_JOURNAL_PARTITION),
+ get_partition_size(&vdo->layout,
+ VDO_SLAB_SUMMARY_PARTITION),
+ &vdo->next_layout);
+ if (result != VDO_SUCCESS) {
+ dm_kcopyd_client_destroy(vdo_forget(vdo->partition_copier));
+ return result;
+ }
+
+ /* Ensure the new journal and summary are entirely within the added blocks. */
+ min_new_size = (old_size +
+ get_partition_size(&vdo->next_layout,
+ VDO_SLAB_SUMMARY_PARTITION) +
+ get_partition_size(&vdo->next_layout,
+ VDO_RECOVERY_JOURNAL_PARTITION));
+ if (min_new_size > new_size) {
+ /* Copying the journal and summary would destroy some old metadata. */
+ vdo_uninitialize_layout(&vdo->next_layout);
+ dm_kcopyd_client_destroy(vdo_forget(vdo->partition_copier));
+ return VDO_INCREMENT_TOO_SMALL;
+ }
+
+ return VDO_SUCCESS;
+}
+
+static int prepare_to_grow_physical(struct vdo *vdo, block_count_t new_physical_blocks)
+{
+ int result;
+ block_count_t current_physical_blocks = vdo->states.vdo.config.physical_blocks;
+
+ vdo_log_info("Preparing to resize physical to %llu",
+ (unsigned long long) new_physical_blocks);
+ VDO_ASSERT_LOG_ONLY((new_physical_blocks > current_physical_blocks),
+ "New physical size is larger than current physical size");
+ result = perform_admin_operation(vdo, PREPARE_GROW_PHYSICAL_PHASE_START,
+ check_may_grow_physical,
+ finish_operation_callback,
+ "prepare grow-physical");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = grow_layout(vdo, current_physical_blocks, new_physical_blocks);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_prepare_to_grow_slab_depot(vdo->depot,
+ vdo_get_known_partition(&vdo->next_layout,
+ VDO_SLAB_DEPOT_PARTITION));
+ if (result != VDO_SUCCESS) {
+ vdo_uninitialize_layout(&vdo->next_layout);
+ return result;
+ }
+
+ vdo_log_info("Done preparing to resize physical");
+ return VDO_SUCCESS;
+}
+
+/**
+ * validate_new_device_config() - Check whether a new device config represents a valid modification
+ * to an existing config.
+ * @to_validate: The new config to validate.
+ * @config: The existing config.
+ * @may_grow: Set to true if growing the logical and physical size of the vdo is currently
+ * permitted.
+ * @error_ptr: A pointer to hold the reason for any error.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int validate_new_device_config(struct device_config *to_validate,
+ struct device_config *config, bool may_grow,
+ char **error_ptr)
+{
+ if (to_validate->owning_target->begin != config->owning_target->begin) {
+ *error_ptr = "Starting sector cannot change";
+ return VDO_PARAMETER_MISMATCH;
+ }
+
+ if (to_validate->logical_block_size != config->logical_block_size) {
+ *error_ptr = "Logical block size cannot change";
+ return VDO_PARAMETER_MISMATCH;
+ }
+
+ if (to_validate->logical_blocks < config->logical_blocks) {
+ *error_ptr = "Can't shrink VDO logical size";
+ return VDO_PARAMETER_MISMATCH;
+ }
+
+ if (to_validate->cache_size != config->cache_size) {
+ *error_ptr = "Block map cache size cannot change";
+ return VDO_PARAMETER_MISMATCH;
+ }
+
+ if (to_validate->block_map_maximum_age != config->block_map_maximum_age) {
+ *error_ptr = "Block map maximum age cannot change";
+ return VDO_PARAMETER_MISMATCH;
+ }
+
+ if (memcmp(&to_validate->thread_counts, &config->thread_counts,
+ sizeof(struct thread_count_config)) != 0) {
+ *error_ptr = "Thread configuration cannot change";
+ return VDO_PARAMETER_MISMATCH;
+ }
+
+ if (to_validate->physical_blocks < config->physical_blocks) {
+ *error_ptr = "Removing physical storage from a VDO is not supported";
+ return VDO_NOT_IMPLEMENTED;
+ }
+
+ if (!may_grow && (to_validate->physical_blocks > config->physical_blocks)) {
+ *error_ptr = "VDO physical size may not grow in current state";
+ return VDO_NOT_IMPLEMENTED;
+ }
+
+ return VDO_SUCCESS;
+}
+
+static int prepare_to_modify(struct dm_target *ti, struct device_config *config,
+ struct vdo *vdo)
+{
+ int result;
+ bool may_grow = (vdo_get_admin_state(vdo) != VDO_ADMIN_STATE_PRE_LOADED);
+
+ result = validate_new_device_config(config, vdo->device_config, may_grow,
+ &ti->error);
+ if (result != VDO_SUCCESS)
+ return -EINVAL;
+
+ if (config->logical_blocks > vdo->device_config->logical_blocks) {
+ block_count_t logical_blocks = vdo->states.vdo.config.logical_blocks;
+
+ vdo_log_info("Preparing to resize logical to %llu",
+ (unsigned long long) config->logical_blocks);
+ VDO_ASSERT_LOG_ONLY((config->logical_blocks > logical_blocks),
+ "New logical size is larger than current size");
+
+ result = vdo_prepare_to_grow_block_map(vdo->block_map,
+ config->logical_blocks);
+ if (result != VDO_SUCCESS) {
+ ti->error = "Device vdo_prepare_to_grow_logical failed";
+ return result;
+ }
+
+ vdo_log_info("Done preparing to resize logical");
+ }
+
+ if (config->physical_blocks > vdo->device_config->physical_blocks) {
+ result = prepare_to_grow_physical(vdo, config->physical_blocks);
+ if (result != VDO_SUCCESS) {
+ if (result == VDO_PARAMETER_MISMATCH) {
+ /*
+ * If we don't trap this case, vdo_status_to_errno() will remap
+ * it to -EIO, which is misleading and ahistorical.
+ */
+ result = -EINVAL;
+ }
+
+ if (result == VDO_TOO_MANY_SLABS)
+ ti->error = "Device vdo_prepare_to_grow_physical failed (specified physical size too big based on formatted slab size)";
+ else
+ ti->error = "Device vdo_prepare_to_grow_physical failed";
+
+ return result;
+ }
+ }
+
+ if (strcmp(config->parent_device_name, vdo->device_config->parent_device_name) != 0) {
+ const char *device_name = vdo_get_device_name(config->owning_target);
+
+ vdo_log_info("Updating backing device of %s from %s to %s", device_name,
+ vdo->device_config->parent_device_name,
+ config->parent_device_name);
+ }
+
+ return VDO_SUCCESS;
+}
+
+static int update_existing_vdo(const char *device_name, struct dm_target *ti,
+ unsigned int argc, char **argv, struct vdo *vdo)
+{
+ int result;
+ struct device_config *config;
+
+ result = parse_device_config(argc, argv, ti, &config);
+ if (result != VDO_SUCCESS)
+ return -EINVAL;
+
+ vdo_log_info("preparing to modify device '%s'", device_name);
+ result = prepare_to_modify(ti, config, vdo);
+ if (result != VDO_SUCCESS) {
+ free_device_config(config);
+ return vdo_status_to_errno(result);
+ }
+
+ set_device_config(ti, vdo, config);
+ return VDO_SUCCESS;
+}
+
+static int vdo_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ int result;
+ struct registered_thread allocating_thread, instance_thread;
+ const char *device_name;
+ struct vdo *vdo;
+
+ vdo_register_allocating_thread(&allocating_thread, NULL);
+ device_name = vdo_get_device_name(ti);
+ vdo = vdo_find_matching(vdo_is_named, device_name);
+ if (vdo == NULL) {
+ result = construct_new_vdo(ti, argc, argv);
+ } else {
+ vdo_register_thread_device_id(&instance_thread, &vdo->instance);
+ result = update_existing_vdo(device_name, ti, argc, argv, vdo);
+ vdo_unregister_thread_device_id();
+ }
+
+ vdo_unregister_allocating_thread();
+ return result;
+}
+
+static void vdo_dtr(struct dm_target *ti)
+{
+ struct device_config *config = ti->private;
+ struct vdo *vdo = vdo_forget(config->vdo);
+
+ list_del_init(&config->config_list);
+ if (list_empty(&vdo->device_config_list)) {
+ const char *device_name;
+
+ /* This was the last config referencing the VDO. Free it. */
+ unsigned int instance = vdo->instance;
+ struct registered_thread allocating_thread, instance_thread;
+
+ vdo_register_thread_device_id(&instance_thread, &instance);
+ vdo_register_allocating_thread(&allocating_thread, NULL);
+
+ device_name = vdo_get_device_name(ti);
+ vdo_log_info("stopping device '%s'", device_name);
+ if (vdo->dump_on_shutdown)
+ vdo_dump_all(vdo, "device shutdown");
+
+ vdo_destroy(vdo_forget(vdo));
+ vdo_log_info("device '%s' stopped", device_name);
+ vdo_unregister_thread_device_id();
+ vdo_unregister_allocating_thread();
+ release_instance(instance);
+ } else if (config == vdo->device_config) {
+ /*
+ * The VDO still references this config. Give it a reference to a config that isn't
+ * being destroyed.
+ */
+ vdo->device_config = list_first_entry(&vdo->device_config_list,
+ struct device_config, config_list);
+ }
+
+ free_device_config(config);
+ ti->private = NULL;
+}
+
+static void vdo_presuspend(struct dm_target *ti)
+{
+ get_vdo_for_target(ti)->suspend_type =
+ (dm_noflush_suspending(ti) ? VDO_ADMIN_STATE_SUSPENDING : VDO_ADMIN_STATE_SAVING);
+}
+
+/**
+ * write_super_block_for_suspend() - Update the VDO state and save the super block.
+ * @completion: The admin completion
+ */
+static void write_super_block_for_suspend(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+
+ switch (vdo_get_state(vdo)) {
+ case VDO_DIRTY:
+ case VDO_NEW:
+ vdo_set_state(vdo, VDO_CLEAN);
+ break;
+
+ case VDO_CLEAN:
+ case VDO_READ_ONLY_MODE:
+ case VDO_FORCE_REBUILD:
+ case VDO_RECOVERING:
+ case VDO_REBUILD_FOR_UPGRADE:
+ break;
+
+ case VDO_REPLAYING:
+ default:
+ vdo_continue_completion(completion, UDS_BAD_STATE);
+ return;
+ }
+
+ vdo_save_components(vdo, completion);
+}
+
+/**
+ * suspend_callback() - Callback to initiate a suspend, registered in vdo_postsuspend().
+ * @completion: The sub-task completion.
+ */
+static void suspend_callback(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+ struct admin_state *state = &vdo->admin.state;
+ int result;
+
+ assert_admin_phase_thread(vdo, __func__);
+
+ switch (advance_phase(vdo)) {
+ case SUSPEND_PHASE_START:
+ if (vdo_get_admin_state_code(state)->quiescent) {
+ /* Already suspended */
+ break;
+ }
+
+ vdo_continue_completion(completion,
+ vdo_start_operation(state, vdo->suspend_type));
+ return;
+
+ case SUSPEND_PHASE_PACKER:
+ /*
+ * If the VDO was already resumed from a prior suspend while read-only, some of the
+ * components may not have been resumed. By setting a read-only error here, we
+ * guarantee that the result of this suspend will be VDO_READ_ONLY and not
+ * VDO_INVALID_ADMIN_STATE in that case.
+ */
+ if (vdo_in_read_only_mode(vdo))
+ vdo_set_completion_result(completion, VDO_READ_ONLY);
+
+ vdo_drain_packer(vdo->packer, completion);
+ return;
+
+ case SUSPEND_PHASE_DATA_VIOS:
+ drain_data_vio_pool(vdo->data_vio_pool, completion);
+ return;
+
+ case SUSPEND_PHASE_DEDUPE:
+ vdo_drain_hash_zones(vdo->hash_zones, completion);
+ return;
+
+ case SUSPEND_PHASE_FLUSHES:
+ vdo_drain_flusher(vdo->flusher, completion);
+ return;
+
+ case SUSPEND_PHASE_LOGICAL_ZONES:
+ /*
+ * Attempt to flush all I/O before completing post suspend work. We believe a
+ * suspended device is expected to have persisted all data written before the
+ * suspend, even if it hasn't been flushed yet.
+ */
+ result = vdo_synchronous_flush(vdo);
+ if (result != VDO_SUCCESS)
+ vdo_enter_read_only_mode(vdo, result);
+
+ vdo_drain_logical_zones(vdo->logical_zones,
+ vdo_get_admin_state_code(state), completion);
+ return;
+
+ case SUSPEND_PHASE_BLOCK_MAP:
+ vdo_drain_block_map(vdo->block_map, vdo_get_admin_state_code(state),
+ completion);
+ return;
+
+ case SUSPEND_PHASE_JOURNAL:
+ vdo_drain_recovery_journal(vdo->recovery_journal,
+ vdo_get_admin_state_code(state), completion);
+ return;
+
+ case SUSPEND_PHASE_DEPOT:
+ vdo_drain_slab_depot(vdo->depot, vdo_get_admin_state_code(state),
+ completion);
+ return;
+
+ case SUSPEND_PHASE_READ_ONLY_WAIT:
+ vdo_wait_until_not_entering_read_only_mode(completion);
+ return;
+
+ case SUSPEND_PHASE_WRITE_SUPER_BLOCK:
+ if (vdo_is_state_suspending(state) || (completion->result != VDO_SUCCESS)) {
+ /* If we didn't save the VDO or there was an error, we're done. */
+ break;
+ }
+
+ write_super_block_for_suspend(completion);
+ return;
+
+ case SUSPEND_PHASE_END:
+ break;
+
+ default:
+ vdo_set_completion_result(completion, UDS_BAD_STATE);
+ }
+
+ finish_operation_callback(completion);
+}
+
+static void vdo_postsuspend(struct dm_target *ti)
+{
+ struct vdo *vdo = get_vdo_for_target(ti);
+ struct registered_thread instance_thread;
+ const char *device_name;
+ int result;
+
+ vdo_register_thread_device_id(&instance_thread, &vdo->instance);
+ device_name = vdo_get_device_name(vdo->device_config->owning_target);
+ vdo_log_info("suspending device '%s'", device_name);
+
+ /*
+ * It's important to note any error here does not actually stop device-mapper from
+ * suspending the device. All this work is done post suspend.
+ */
+ result = perform_admin_operation(vdo, SUSPEND_PHASE_START, suspend_callback,
+ suspend_callback, "suspend");
+
+ if ((result == VDO_SUCCESS) || (result == VDO_READ_ONLY)) {
+ /*
+ * Treat VDO_READ_ONLY as a success since a read-only suspension still leaves the
+ * VDO suspended.
+ */
+ vdo_log_info("device '%s' suspended", device_name);
+ } else if (result == VDO_INVALID_ADMIN_STATE) {
+ vdo_log_error("Suspend invoked while in unexpected state: %s",
+ vdo_get_admin_state(vdo)->name);
+ } else {
+ vdo_log_error_strerror(result, "Suspend of device '%s' failed",
+ device_name);
+ }
+
+ vdo_unregister_thread_device_id();
+}
+
+/**
+ * was_new() - Check whether the vdo was new when it was loaded.
+ * @vdo: The vdo to query.
+ *
+ * Return: true if the vdo was new.
+ */
+static bool was_new(const struct vdo *vdo)
+{
+ return (vdo->load_state == VDO_NEW);
+}
+
+/**
+ * requires_repair() - Check whether a vdo requires recovery or rebuild.
+ * @vdo: The vdo to query.
+ *
+ * Return: true if the vdo must be repaired.
+ */
+static bool __must_check requires_repair(const struct vdo *vdo)
+{
+ switch (vdo_get_state(vdo)) {
+ case VDO_DIRTY:
+ case VDO_FORCE_REBUILD:
+ case VDO_REPLAYING:
+ case VDO_REBUILD_FOR_UPGRADE:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/**
+ * get_load_type() - Determine how the slab depot was loaded.
+ * @vdo: The vdo.
+ *
+ * Return: How the depot was loaded.
+ */
+static enum slab_depot_load_type get_load_type(struct vdo *vdo)
+{
+ if (vdo_state_requires_read_only_rebuild(vdo->load_state))
+ return VDO_SLAB_DEPOT_REBUILD_LOAD;
+
+ if (vdo_state_requires_recovery(vdo->load_state))
+ return VDO_SLAB_DEPOT_RECOVERY_LOAD;
+
+ return VDO_SLAB_DEPOT_NORMAL_LOAD;
+}
+
+/**
+ * load_callback() - Callback to do the destructive parts of loading a VDO.
+ * @completion: The sub-task completion.
+ */
+static void load_callback(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+ int result;
+
+ assert_admin_phase_thread(vdo, __func__);
+
+ switch (advance_phase(vdo)) {
+ case LOAD_PHASE_START:
+ result = vdo_start_operation(&vdo->admin.state, VDO_ADMIN_STATE_LOADING);
+ if (result != VDO_SUCCESS) {
+ vdo_continue_completion(completion, result);
+ return;
+ }
+
+ /* Prepare the recovery journal for new entries. */
+ vdo_open_recovery_journal(vdo->recovery_journal, vdo->depot,
+ vdo->block_map);
+ vdo_allow_read_only_mode_entry(completion);
+ return;
+
+ case LOAD_PHASE_LOAD_DEPOT:
+ vdo_set_dedupe_state_normal(vdo->hash_zones);
+ if (vdo_is_read_only(vdo)) {
+ /*
+ * In read-only mode we don't use the allocator and it may not even be
+ * readable, so don't bother trying to load it.
+ */
+ vdo_set_completion_result(completion, VDO_READ_ONLY);
+ break;
+ }
+
+ if (requires_repair(vdo)) {
+ vdo_repair(completion);
+ return;
+ }
+
+ vdo_load_slab_depot(vdo->depot,
+ (was_new(vdo) ? VDO_ADMIN_STATE_FORMATTING :
+ VDO_ADMIN_STATE_LOADING),
+ completion, NULL);
+ return;
+
+ case LOAD_PHASE_MAKE_DIRTY:
+ vdo_set_state(vdo, VDO_DIRTY);
+ vdo_save_components(vdo, completion);
+ return;
+
+ case LOAD_PHASE_PREPARE_TO_ALLOCATE:
+ vdo_initialize_block_map_from_journal(vdo->block_map,
+ vdo->recovery_journal);
+ vdo_prepare_slab_depot_to_allocate(vdo->depot, get_load_type(vdo),
+ completion);
+ return;
+
+ case LOAD_PHASE_SCRUB_SLABS:
+ if (vdo_state_requires_recovery(vdo->load_state))
+ vdo_enter_recovery_mode(vdo);
+
+ vdo_scrub_all_unrecovered_slabs(vdo->depot, completion);
+ return;
+
+ case LOAD_PHASE_DATA_REDUCTION:
+ WRITE_ONCE(vdo->compressing, vdo->device_config->compression);
+ if (vdo->device_config->deduplication) {
+ /*
+ * Don't try to load or rebuild the index first (and log scary error
+ * messages) if this is known to be a newly-formatted volume.
+ */
+ vdo_start_dedupe_index(vdo->hash_zones, was_new(vdo));
+ }
+
+ vdo->allocations_allowed = false;
+ fallthrough;
+
+ case LOAD_PHASE_FINISHED:
+ break;
+
+ case LOAD_PHASE_DRAIN_JOURNAL:
+ vdo_drain_recovery_journal(vdo->recovery_journal, VDO_ADMIN_STATE_SAVING,
+ completion);
+ return;
+
+ case LOAD_PHASE_WAIT_FOR_READ_ONLY:
+ /* Avoid an infinite loop */
+ completion->error_handler = NULL;
+ vdo->admin.phase = LOAD_PHASE_FINISHED;
+ vdo_wait_until_not_entering_read_only_mode(completion);
+ return;
+
+ default:
+ vdo_set_completion_result(completion, UDS_BAD_STATE);
+ }
+
+ finish_operation_callback(completion);
+}
+
+/**
+ * handle_load_error() - Handle an error during the load operation.
+ * @completion: The admin completion.
+ *
+ * If at all possible, brings the vdo online in read-only mode. This handler is registered in
+ * vdo_preresume_registered().
+ */
+static void handle_load_error(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+
+ if (vdo_requeue_completion_if_needed(completion,
+ vdo->thread_config.admin_thread))
+ return;
+
+ if (vdo_state_requires_read_only_rebuild(vdo->load_state) &&
+ (vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) {
+ vdo_log_error_strerror(completion->result, "aborting load");
+ vdo->admin.phase = LOAD_PHASE_DRAIN_JOURNAL;
+ load_callback(vdo_forget(completion));
+ return;
+ }
+
+ vdo_log_error_strerror(completion->result,
+ "Entering read-only mode due to load error");
+ vdo->admin.phase = LOAD_PHASE_WAIT_FOR_READ_ONLY;
+ vdo_enter_read_only_mode(vdo, completion->result);
+ completion->result = VDO_READ_ONLY;
+ load_callback(completion);
+}
+
+/**
+ * write_super_block_for_resume() - Update the VDO state and save the super block.
+ * @completion: The admin completion
+ */
+static void write_super_block_for_resume(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+
+ switch (vdo_get_state(vdo)) {
+ case VDO_CLEAN:
+ case VDO_NEW:
+ vdo_set_state(vdo, VDO_DIRTY);
+ vdo_save_components(vdo, completion);
+ return;
+
+ case VDO_DIRTY:
+ case VDO_READ_ONLY_MODE:
+ case VDO_FORCE_REBUILD:
+ case VDO_RECOVERING:
+ case VDO_REBUILD_FOR_UPGRADE:
+ /* No need to write the super block in these cases */
+ vdo_launch_completion(completion);
+ return;
+
+ case VDO_REPLAYING:
+ default:
+ vdo_continue_completion(completion, UDS_BAD_STATE);
+ }
+}
+
+/**
+ * resume_callback() - Callback to resume a VDO.
+ * @completion: The admin completion.
+ */
+static void resume_callback(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+ int result;
+
+ assert_admin_phase_thread(vdo, __func__);
+
+ switch (advance_phase(vdo)) {
+ case RESUME_PHASE_START:
+ result = vdo_start_operation(&vdo->admin.state,
+ VDO_ADMIN_STATE_RESUMING);
+ if (result != VDO_SUCCESS) {
+ vdo_continue_completion(completion, result);
+ return;
+ }
+
+ write_super_block_for_resume(completion);
+ return;
+
+ case RESUME_PHASE_ALLOW_READ_ONLY_MODE:
+ vdo_allow_read_only_mode_entry(completion);
+ return;
+
+ case RESUME_PHASE_DEDUPE:
+ vdo_resume_hash_zones(vdo->hash_zones, completion);
+ return;
+
+ case RESUME_PHASE_DEPOT:
+ vdo_resume_slab_depot(vdo->depot, completion);
+ return;
+
+ case RESUME_PHASE_JOURNAL:
+ vdo_resume_recovery_journal(vdo->recovery_journal, completion);
+ return;
+
+ case RESUME_PHASE_BLOCK_MAP:
+ vdo_resume_block_map(vdo->block_map, completion);
+ return;
+
+ case RESUME_PHASE_LOGICAL_ZONES:
+ vdo_resume_logical_zones(vdo->logical_zones, completion);
+ return;
+
+ case RESUME_PHASE_PACKER:
+ {
+ bool was_enabled = vdo_get_compressing(vdo);
+ bool enable = vdo->device_config->compression;
+
+ if (enable != was_enabled)
+ WRITE_ONCE(vdo->compressing, enable);
+ vdo_log_info("compression is %s", (enable ? "enabled" : "disabled"));
+
+ vdo_resume_packer(vdo->packer, completion);
+ return;
+ }
+
+ case RESUME_PHASE_FLUSHER:
+ vdo_resume_flusher(vdo->flusher, completion);
+ return;
+
+ case RESUME_PHASE_DATA_VIOS:
+ resume_data_vio_pool(vdo->data_vio_pool, completion);
+ return;
+
+ case RESUME_PHASE_END:
+ break;
+
+ default:
+ vdo_set_completion_result(completion, UDS_BAD_STATE);
+ }
+
+ finish_operation_callback(completion);
+}
+
+/**
+ * grow_logical_callback() - Callback to initiate a grow logical.
+ * @completion: The admin completion.
+ *
+ * Registered in perform_grow_logical().
+ */
+static void grow_logical_callback(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+ int result;
+
+ assert_admin_phase_thread(vdo, __func__);
+
+ switch (advance_phase(vdo)) {
+ case GROW_LOGICAL_PHASE_START:
+ if (vdo_is_read_only(vdo)) {
+ vdo_log_error_strerror(VDO_READ_ONLY,
+ "Can't grow logical size of a read-only VDO");
+ vdo_set_completion_result(completion, VDO_READ_ONLY);
+ break;
+ }
+
+ result = vdo_start_operation(&vdo->admin.state,
+ VDO_ADMIN_STATE_SUSPENDED_OPERATION);
+ if (result != VDO_SUCCESS) {
+ vdo_continue_completion(completion, result);
+ return;
+ }
+
+ vdo->states.vdo.config.logical_blocks = vdo->block_map->next_entry_count;
+ vdo_save_components(vdo, completion);
+ return;
+
+ case GROW_LOGICAL_PHASE_GROW_BLOCK_MAP:
+ vdo_grow_block_map(vdo->block_map, completion);
+ return;
+
+ case GROW_LOGICAL_PHASE_END:
+ break;
+
+ case GROW_LOGICAL_PHASE_ERROR:
+ vdo_enter_read_only_mode(vdo, completion->result);
+ break;
+
+ default:
+ vdo_set_completion_result(completion, UDS_BAD_STATE);
+ }
+
+ finish_operation_callback(completion);
+}
+
+/**
+ * handle_logical_growth_error() - Handle an error during the grow physical process.
+ * @completion: The admin completion.
+ */
+static void handle_logical_growth_error(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+
+ if (vdo->admin.phase == GROW_LOGICAL_PHASE_GROW_BLOCK_MAP) {
+ /*
+ * We've failed to write the new size in the super block, so set our in memory
+ * config back to the old size.
+ */
+ vdo->states.vdo.config.logical_blocks = vdo->block_map->entry_count;
+ vdo_abandon_block_map_growth(vdo->block_map);
+ }
+
+ vdo->admin.phase = GROW_LOGICAL_PHASE_ERROR;
+ grow_logical_callback(completion);
+}
+
+/**
+ * perform_grow_logical() - Grow the logical size of the vdo.
+ * @vdo: The vdo to grow.
+ * @new_logical_blocks: The size to which the vdo should be grown.
+ *
+ * Context: This method may only be called when the vdo has been suspended and must not be called
+ * from a base thread.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int perform_grow_logical(struct vdo *vdo, block_count_t new_logical_blocks)
+{
+ int result;
+
+ if (vdo->device_config->logical_blocks == new_logical_blocks) {
+ /*
+ * A table was loaded for which we prepared to grow, but a table without that
+ * growth was what we are resuming with.
+ */
+ vdo_abandon_block_map_growth(vdo->block_map);
+ return VDO_SUCCESS;
+ }
+
+ vdo_log_info("Resizing logical to %llu",
+ (unsigned long long) new_logical_blocks);
+ if (vdo->block_map->next_entry_count != new_logical_blocks)
+ return VDO_PARAMETER_MISMATCH;
+
+ result = perform_admin_operation(vdo, GROW_LOGICAL_PHASE_START,
+ grow_logical_callback,
+ handle_logical_growth_error, "grow logical");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo_log_info("Logical blocks now %llu", (unsigned long long) new_logical_blocks);
+ return VDO_SUCCESS;
+}
+
+static void copy_callback(int read_err, unsigned long write_err, void *context)
+{
+ struct vdo_completion *completion = context;
+ int result = (((read_err == 0) && (write_err == 0)) ? VDO_SUCCESS : -EIO);
+
+ vdo_continue_completion(completion, result);
+}
+
+static void partition_to_region(struct partition *partition, struct vdo *vdo,
+ struct dm_io_region *region)
+{
+ physical_block_number_t pbn = partition->offset - vdo->geometry.bio_offset;
+
+ *region = (struct dm_io_region) {
+ .bdev = vdo_get_backing_device(vdo),
+ .sector = pbn * VDO_SECTORS_PER_BLOCK,
+ .count = partition->count * VDO_SECTORS_PER_BLOCK,
+ };
+}
+
+/**
+ * copy_partition() - Copy a partition from the location specified in the current layout to that in
+ * the next layout.
+ * @vdo: The vdo preparing to grow.
+ * @id: The ID of the partition to copy.
+ * @parent: The completion to notify when the copy is complete.
+ */
+static void copy_partition(struct vdo *vdo, enum partition_id id,
+ struct vdo_completion *parent)
+{
+ struct dm_io_region read_region, write_regions[1];
+ struct partition *from = vdo_get_known_partition(&vdo->layout, id);
+ struct partition *to = vdo_get_known_partition(&vdo->next_layout, id);
+
+ partition_to_region(from, vdo, &read_region);
+ partition_to_region(to, vdo, &write_regions[0]);
+ dm_kcopyd_copy(vdo->partition_copier, &read_region, 1, write_regions, 0,
+ copy_callback, parent);
+}
+
+/**
+ * grow_physical_callback() - Callback to initiate a grow physical.
+ * @completion: The admin completion.
+ *
+ * Registered in perform_grow_physical().
+ */
+static void grow_physical_callback(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+ int result;
+
+ assert_admin_phase_thread(vdo, __func__);
+
+ switch (advance_phase(vdo)) {
+ case GROW_PHYSICAL_PHASE_START:
+ if (vdo_is_read_only(vdo)) {
+ vdo_log_error_strerror(VDO_READ_ONLY,
+ "Can't grow physical size of a read-only VDO");
+ vdo_set_completion_result(completion, VDO_READ_ONLY);
+ break;
+ }
+
+ result = vdo_start_operation(&vdo->admin.state,
+ VDO_ADMIN_STATE_SUSPENDED_OPERATION);
+ if (result != VDO_SUCCESS) {
+ vdo_continue_completion(completion, result);
+ return;
+ }
+
+ /* Copy the journal into the new layout. */
+ copy_partition(vdo, VDO_RECOVERY_JOURNAL_PARTITION, completion);
+ return;
+
+ case GROW_PHYSICAL_PHASE_COPY_SUMMARY:
+ copy_partition(vdo, VDO_SLAB_SUMMARY_PARTITION, completion);
+ return;
+
+ case GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS:
+ vdo_uninitialize_layout(&vdo->layout);
+ vdo->layout = vdo->next_layout;
+ vdo_forget(vdo->next_layout.head);
+ vdo->states.vdo.config.physical_blocks = vdo->layout.size;
+ vdo_update_slab_depot_size(vdo->depot);
+ vdo_save_components(vdo, completion);
+ return;
+
+ case GROW_PHYSICAL_PHASE_USE_NEW_SLABS:
+ vdo_use_new_slabs(vdo->depot, completion);
+ return;
+
+ case GROW_PHYSICAL_PHASE_END:
+ vdo->depot->summary_origin =
+ vdo_get_known_partition(&vdo->layout,
+ VDO_SLAB_SUMMARY_PARTITION)->offset;
+ vdo->recovery_journal->origin =
+ vdo_get_known_partition(&vdo->layout,
+ VDO_RECOVERY_JOURNAL_PARTITION)->offset;
+ break;
+
+ case GROW_PHYSICAL_PHASE_ERROR:
+ vdo_enter_read_only_mode(vdo, completion->result);
+ break;
+
+ default:
+ vdo_set_completion_result(completion, UDS_BAD_STATE);
+ }
+
+ vdo_uninitialize_layout(&vdo->next_layout);
+ finish_operation_callback(completion);
+}
+
+/**
+ * handle_physical_growth_error() - Handle an error during the grow physical process.
+ * @completion: The sub-task completion.
+ */
+static void handle_physical_growth_error(struct vdo_completion *completion)
+{
+ completion->vdo->admin.phase = GROW_PHYSICAL_PHASE_ERROR;
+ grow_physical_callback(completion);
+}
+
+/**
+ * perform_grow_physical() - Grow the physical size of the vdo.
+ * @vdo: The vdo to resize.
+ * @new_physical_blocks: The new physical size in blocks.
+ *
+ * Context: This method may only be called when the vdo has been suspended and must not be called
+ * from a base thread.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int perform_grow_physical(struct vdo *vdo, block_count_t new_physical_blocks)
+{
+ int result;
+ block_count_t new_depot_size, prepared_depot_size;
+ block_count_t old_physical_blocks = vdo->states.vdo.config.physical_blocks;
+
+ /* Skip any noop grows. */
+ if (old_physical_blocks == new_physical_blocks)
+ return VDO_SUCCESS;
+
+ if (new_physical_blocks != vdo->next_layout.size) {
+ /*
+ * Either the VDO isn't prepared to grow, or it was prepared to grow to a different
+ * size. Doing this check here relies on the fact that the call to this method is
+ * done under the dmsetup message lock.
+ */
+ vdo_uninitialize_layout(&vdo->next_layout);
+ vdo_abandon_new_slabs(vdo->depot);
+ return VDO_PARAMETER_MISMATCH;
+ }
+
+ /* Validate that we are prepared to grow appropriately. */
+ new_depot_size =
+ vdo_get_known_partition(&vdo->next_layout, VDO_SLAB_DEPOT_PARTITION)->count;
+ prepared_depot_size = (vdo->depot->new_slabs == NULL) ? 0 : vdo->depot->new_size;
+ if (prepared_depot_size != new_depot_size)
+ return VDO_PARAMETER_MISMATCH;
+
+ result = perform_admin_operation(vdo, GROW_PHYSICAL_PHASE_START,
+ grow_physical_callback,
+ handle_physical_growth_error, "grow physical");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo_log_info("Physical block count was %llu, now %llu",
+ (unsigned long long) old_physical_blocks,
+ (unsigned long long) new_physical_blocks);
+ return VDO_SUCCESS;
+}
+
+/**
+ * apply_new_vdo_configuration() - Attempt to make any configuration changes from the table being
+ * resumed.
+ * @vdo: The vdo being resumed.
+ * @config: The new device configuration derived from the table with which the vdo is being
+ * resumed.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int __must_check apply_new_vdo_configuration(struct vdo *vdo,
+ struct device_config *config)
+{
+ int result;
+
+ result = perform_grow_logical(vdo, config->logical_blocks);
+ if (result != VDO_SUCCESS) {
+ vdo_log_error("grow logical operation failed, result = %d", result);
+ return result;
+ }
+
+ result = perform_grow_physical(vdo, config->physical_blocks);
+ if (result != VDO_SUCCESS)
+ vdo_log_error("resize operation failed, result = %d", result);
+
+ return result;
+}
+
+static int vdo_preresume_registered(struct dm_target *ti, struct vdo *vdo)
+{
+ struct device_config *config = ti->private;
+ const char *device_name = vdo_get_device_name(ti);
+ block_count_t backing_blocks;
+ int result;
+
+ backing_blocks = get_underlying_device_block_count(vdo);
+ if (backing_blocks < config->physical_blocks) {
+ /* FIXME: can this still happen? */
+ vdo_log_error("resume of device '%s' failed: backing device has %llu blocks but VDO physical size is %llu blocks",
+ device_name, (unsigned long long) backing_blocks,
+ (unsigned long long) config->physical_blocks);
+ return -EINVAL;
+ }
+
+ if (vdo_get_admin_state(vdo) == VDO_ADMIN_STATE_PRE_LOADED) {
+ vdo_log_info("starting device '%s'", device_name);
+ result = perform_admin_operation(vdo, LOAD_PHASE_START, load_callback,
+ handle_load_error, "load");
+ if ((result != VDO_SUCCESS) && (result != VDO_READ_ONLY)) {
+ /*
+ * Something has gone very wrong. Make sure everything has drained and
+ * leave the device in an unresumable state.
+ */
+ vdo_log_error_strerror(result,
+ "Start failed, could not load VDO metadata");
+ vdo->suspend_type = VDO_ADMIN_STATE_STOPPING;
+ perform_admin_operation(vdo, SUSPEND_PHASE_START,
+ suspend_callback, suspend_callback,
+ "suspend");
+ return result;
+ }
+
+ /* Even if the VDO is read-only, it is now able to handle read requests. */
+ vdo_log_info("device '%s' started", device_name);
+ }
+
+ vdo_log_info("resuming device '%s'", device_name);
+
+ /* If this fails, the VDO was not in a state to be resumed. This should never happen. */
+ result = apply_new_vdo_configuration(vdo, config);
+ BUG_ON(result == VDO_INVALID_ADMIN_STATE);
+
+ /*
+ * Now that we've tried to modify the vdo, the new config *is* the config, whether the
+ * modifications worked or not.
+ */
+ vdo->device_config = config;
+
+ /*
+ * Any error here is highly unexpected and the state of the vdo is questionable, so we mark
+ * it read-only in memory. Because we are suspended, the read-only state will not be
+ * written to disk.
+ */
+ if (result != VDO_SUCCESS) {
+ vdo_log_error_strerror(result,
+ "Commit of modifications to device '%s' failed",
+ device_name);
+ vdo_enter_read_only_mode(vdo, result);
+ return result;
+ }
+
+ if (vdo_get_admin_state(vdo)->normal) {
+ /* The VDO was just started, so we don't need to resume it. */
+ return VDO_SUCCESS;
+ }
+
+ result = perform_admin_operation(vdo, RESUME_PHASE_START, resume_callback,
+ resume_callback, "resume");
+ BUG_ON(result == VDO_INVALID_ADMIN_STATE);
+ if (result == VDO_READ_ONLY) {
+ /* Even if the vdo is read-only, it has still resumed. */
+ result = VDO_SUCCESS;
+ }
+
+ if (result != VDO_SUCCESS)
+ vdo_log_error("resume of device '%s' failed with error: %d", device_name,
+ result);
+
+ return result;
+}
+
+static int vdo_preresume(struct dm_target *ti)
+{
+ struct registered_thread instance_thread;
+ struct vdo *vdo = get_vdo_for_target(ti);
+ int result;
+
+ vdo_register_thread_device_id(&instance_thread, &vdo->instance);
+ result = vdo_preresume_registered(ti, vdo);
+ if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE))
+ result = -EINVAL;
+ vdo_unregister_thread_device_id();
+ return vdo_status_to_errno(result);
+}
+
+static void vdo_resume(struct dm_target *ti)
+{
+ struct registered_thread instance_thread;
+
+ vdo_register_thread_device_id(&instance_thread,
+ &get_vdo_for_target(ti)->instance);
+ vdo_log_info("device '%s' resumed", vdo_get_device_name(ti));
+ vdo_unregister_thread_device_id();
+}
+
+/*
+ * If anything changes that affects how user tools will interact with vdo, update the version
+ * number and make sure documentation about the change is complete so tools can properly update
+ * their management code.
+ */
+static struct target_type vdo_target_bio = {
+ .features = DM_TARGET_SINGLETON,
+ .name = "vdo",
+ .version = { 9, 0, 0 },
+ .module = THIS_MODULE,
+ .ctr = vdo_ctr,
+ .dtr = vdo_dtr,
+ .io_hints = vdo_io_hints,
+ .iterate_devices = vdo_iterate_devices,
+ .map = vdo_map_bio,
+ .message = vdo_message,
+ .status = vdo_status,
+ .presuspend = vdo_presuspend,
+ .postsuspend = vdo_postsuspend,
+ .preresume = vdo_preresume,
+ .resume = vdo_resume,
+};
+
+static bool dm_registered;
+
+static void vdo_module_destroy(void)
+{
+ vdo_log_debug("unloading");
+
+ if (dm_registered)
+ dm_unregister_target(&vdo_target_bio);
+
+ VDO_ASSERT_LOG_ONLY(instances.count == 0,
+ "should have no instance numbers still in use, but have %u",
+ instances.count);
+ vdo_free(instances.words);
+ memset(&instances, 0, sizeof(struct instance_tracker));
+}
+
+static int __init vdo_init(void)
+{
+ int result = 0;
+
+ /* Memory tracking must be initialized first for accurate accounting. */
+ vdo_memory_init();
+ vdo_initialize_threads_mutex();
+ vdo_initialize_thread_device_registry();
+ vdo_initialize_device_registry_once();
+
+ /* Add VDO errors to the set of errors registered by the indexer. */
+ result = vdo_register_status_codes();
+ if (result != VDO_SUCCESS) {
+ vdo_log_error("vdo_register_status_codes failed %d", result);
+ vdo_module_destroy();
+ return result;
+ }
+
+ result = dm_register_target(&vdo_target_bio);
+ if (result < 0) {
+ vdo_log_error("dm_register_target failed %d", result);
+ vdo_module_destroy();
+ return result;
+ }
+ dm_registered = true;
+
+ return result;
+}
+
+static void __exit vdo_exit(void)
+{
+ vdo_module_destroy();
+ /* Memory tracking cleanup must be done last. */
+ vdo_memory_exit();
+}
+
+module_init(vdo_init);
+module_exit(vdo_exit);
+
+module_param_named(log_level, vdo_log_level, uint, 0644);
+MODULE_PARM_DESC(log_level, "Log level for log messages");
+
+MODULE_DESCRIPTION(DM_NAME " target for transparent deduplication");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-vdo/dump.c b/drivers/md/dm-vdo/dump.c
new file mode 100644
index 000000000000..00e575d7d773
--- /dev/null
+++ b/drivers/md/dm-vdo/dump.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "dump.h"
+
+#include <linux/module.h>
+
+#include "memory-alloc.h"
+#include "string-utils.h"
+
+#include "constants.h"
+#include "data-vio.h"
+#include "dedupe.h"
+#include "funnel-workqueue.h"
+#include "io-submitter.h"
+#include "logger.h"
+#include "types.h"
+#include "vdo.h"
+
+enum dump_options {
+ /* Work queues */
+ SHOW_QUEUES,
+ /* Memory pools */
+ SHOW_VIO_POOL,
+ /* Others */
+ SHOW_VDO_STATUS,
+ /* This one means an option overrides the "default" choices, instead of altering them. */
+ SKIP_DEFAULT
+};
+
+enum dump_option_flags {
+ /* Work queues */
+ FLAG_SHOW_QUEUES = (1 << SHOW_QUEUES),
+ /* Memory pools */
+ FLAG_SHOW_VIO_POOL = (1 << SHOW_VIO_POOL),
+ /* Others */
+ FLAG_SHOW_VDO_STATUS = (1 << SHOW_VDO_STATUS),
+ /* Special */
+ FLAG_SKIP_DEFAULT = (1 << SKIP_DEFAULT)
+};
+
+#define FLAGS_ALL_POOLS (FLAG_SHOW_VIO_POOL)
+#define DEFAULT_DUMP_FLAGS (FLAG_SHOW_QUEUES | FLAG_SHOW_VDO_STATUS)
+/* Another static buffer... log10(256) = 2.408+, round up: */
+#define DIGITS_PER_U64 (1 + sizeof(u64) * 2409 / 1000)
+
+static inline bool is_arg_string(const char *arg, const char *this_option)
+{
+ /* convention seems to be case-independent options */
+ return strncasecmp(arg, this_option, strlen(this_option)) == 0;
+}
+
+static void do_dump(struct vdo *vdo, unsigned int dump_options_requested,
+ const char *why)
+{
+ u32 active, maximum;
+ s64 outstanding;
+
+ vdo_log_info("%s dump triggered via %s", VDO_LOGGING_MODULE_NAME, why);
+ active = get_data_vio_pool_active_requests(vdo->data_vio_pool);
+ maximum = get_data_vio_pool_maximum_requests(vdo->data_vio_pool);
+ outstanding = (atomic64_read(&vdo->stats.bios_submitted) -
+ atomic64_read(&vdo->stats.bios_completed));
+ vdo_log_info("%u device requests outstanding (max %u), %lld bio requests outstanding, device '%s'",
+ active, maximum, outstanding,
+ vdo_get_device_name(vdo->device_config->owning_target));
+ if (((dump_options_requested & FLAG_SHOW_QUEUES) != 0) && (vdo->threads != NULL)) {
+ thread_id_t id;
+
+ for (id = 0; id < vdo->thread_config.thread_count; id++)
+ vdo_dump_work_queue(vdo->threads[id].queue);
+ }
+
+ vdo_dump_hash_zones(vdo->hash_zones);
+ dump_data_vio_pool(vdo->data_vio_pool,
+ (dump_options_requested & FLAG_SHOW_VIO_POOL) != 0);
+ if ((dump_options_requested & FLAG_SHOW_VDO_STATUS) != 0)
+ vdo_dump_status(vdo);
+
+ vdo_report_memory_usage();
+ vdo_log_info("end of %s dump", VDO_LOGGING_MODULE_NAME);
+}
+
+static int parse_dump_options(unsigned int argc, char *const *argv,
+ unsigned int *dump_options_requested_ptr)
+{
+ unsigned int dump_options_requested = 0;
+
+ static const struct {
+ const char *name;
+ unsigned int flags;
+ } option_names[] = {
+ { "viopool", FLAG_SKIP_DEFAULT | FLAG_SHOW_VIO_POOL },
+ { "vdo", FLAG_SKIP_DEFAULT | FLAG_SHOW_VDO_STATUS },
+ { "pools", FLAG_SKIP_DEFAULT | FLAGS_ALL_POOLS },
+ { "queues", FLAG_SKIP_DEFAULT | FLAG_SHOW_QUEUES },
+ { "threads", FLAG_SKIP_DEFAULT | FLAG_SHOW_QUEUES },
+ { "default", FLAG_SKIP_DEFAULT | DEFAULT_DUMP_FLAGS },
+ { "all", ~0 },
+ };
+
+ bool options_okay = true;
+ unsigned int i;
+
+ for (i = 1; i < argc; i++) {
+ unsigned int j;
+
+ for (j = 0; j < ARRAY_SIZE(option_names); j++) {
+ if (is_arg_string(argv[i], option_names[j].name)) {
+ dump_options_requested |= option_names[j].flags;
+ break;
+ }
+ }
+ if (j == ARRAY_SIZE(option_names)) {
+ vdo_log_warning("dump option name '%s' unknown", argv[i]);
+ options_okay = false;
+ }
+ }
+ if (!options_okay)
+ return -EINVAL;
+ if ((dump_options_requested & FLAG_SKIP_DEFAULT) == 0)
+ dump_options_requested |= DEFAULT_DUMP_FLAGS;
+ *dump_options_requested_ptr = dump_options_requested;
+ return 0;
+}
+
+/* Dump as specified by zero or more string arguments. */
+int vdo_dump(struct vdo *vdo, unsigned int argc, char *const *argv, const char *why)
+{
+ unsigned int dump_options_requested = 0;
+ int result = parse_dump_options(argc, argv, &dump_options_requested);
+
+ if (result != 0)
+ return result;
+
+ do_dump(vdo, dump_options_requested, why);
+ return 0;
+}
+
+/* Dump everything we know how to dump */
+void vdo_dump_all(struct vdo *vdo, const char *why)
+{
+ do_dump(vdo, ~0, why);
+}
+
+/*
+ * Dump out the data_vio waiters on a waitq.
+ * wait_on should be the label to print for queue (e.g. logical or physical)
+ */
+static void dump_vio_waiters(struct vdo_wait_queue *waitq, char *wait_on)
+{
+ struct vdo_waiter *waiter, *first = vdo_waitq_get_first_waiter(waitq);
+ struct data_vio *data_vio;
+
+ if (first == NULL)
+ return;
+
+ data_vio = vdo_waiter_as_data_vio(first);
+
+ vdo_log_info(" %s is locked. Waited on by: vio %px pbn %llu lbn %llu d-pbn %llu lastOp %s",
+ wait_on, data_vio, data_vio->allocation.pbn, data_vio->logical.lbn,
+ data_vio->duplicate.pbn, get_data_vio_operation_name(data_vio));
+
+ for (waiter = first->next_waiter; waiter != first; waiter = waiter->next_waiter) {
+ data_vio = vdo_waiter_as_data_vio(waiter);
+ vdo_log_info(" ... and : vio %px pbn %llu lbn %llu d-pbn %llu lastOp %s",
+ data_vio, data_vio->allocation.pbn, data_vio->logical.lbn,
+ data_vio->duplicate.pbn,
+ get_data_vio_operation_name(data_vio));
+ }
+}
+
+/*
+ * Encode various attributes of a data_vio as a string of one-character flags. This encoding is for
+ * logging brevity:
+ *
+ * R => vio completion result not VDO_SUCCESS
+ * W => vio is on a waitq
+ * D => vio is a duplicate
+ * p => vio is a partial block operation
+ * z => vio is a zero block
+ * d => vio is a discard
+ *
+ * The common case of no flags set will result in an empty, null-terminated buffer. If any flags
+ * are encoded, the first character in the string will be a space character.
+ */
+static void encode_vio_dump_flags(struct data_vio *data_vio, char buffer[8])
+{
+ char *p_flag = buffer;
+ *p_flag++ = ' ';
+ if (data_vio->vio.completion.result != VDO_SUCCESS)
+ *p_flag++ = 'R';
+ if (data_vio->waiter.next_waiter != NULL)
+ *p_flag++ = 'W';
+ if (data_vio->is_duplicate)
+ *p_flag++ = 'D';
+ if (data_vio->is_partial)
+ *p_flag++ = 'p';
+ if (data_vio->is_zero)
+ *p_flag++ = 'z';
+ if (data_vio->remaining_discard > 0)
+ *p_flag++ = 'd';
+ if (p_flag == &buffer[1]) {
+ /* No flags, so remove the blank space. */
+ p_flag = buffer;
+ }
+ *p_flag = '\0';
+}
+
+/* Implements buffer_dump_function. */
+void dump_data_vio(void *data)
+{
+ struct data_vio *data_vio = data;
+
+ /*
+ * This just needs to be big enough to hold a queue (thread) name and a function name (plus
+ * a separator character and NUL). The latter is limited only by taste.
+ *
+ * In making this static, we're assuming only one "dump" will run at a time. If more than
+ * one does run, the log output will be garbled anyway.
+ */
+ static char vio_completion_dump_buffer[100 + MAX_VDO_WORK_QUEUE_NAME_LEN];
+ static char vio_block_number_dump_buffer[sizeof("P L D") + 3 * DIGITS_PER_U64];
+ static char vio_flush_generation_buffer[sizeof(" FG") + DIGITS_PER_U64];
+ static char flags_dump_buffer[8];
+
+ /*
+ * We're likely to be logging a couple thousand of these lines, and in some circumstances
+ * syslogd may have trouble keeping up, so keep it BRIEF rather than user-friendly.
+ */
+ vdo_dump_completion_to_buffer(&data_vio->vio.completion,
+ vio_completion_dump_buffer,
+ sizeof(vio_completion_dump_buffer));
+ if (data_vio->is_duplicate) {
+ snprintf(vio_block_number_dump_buffer,
+ sizeof(vio_block_number_dump_buffer), "P%llu L%llu D%llu",
+ data_vio->allocation.pbn, data_vio->logical.lbn,
+ data_vio->duplicate.pbn);
+ } else if (data_vio_has_allocation(data_vio)) {
+ snprintf(vio_block_number_dump_buffer,
+ sizeof(vio_block_number_dump_buffer), "P%llu L%llu",
+ data_vio->allocation.pbn, data_vio->logical.lbn);
+ } else {
+ snprintf(vio_block_number_dump_buffer,
+ sizeof(vio_block_number_dump_buffer), "L%llu",
+ data_vio->logical.lbn);
+ }
+
+ if (data_vio->flush_generation != 0) {
+ snprintf(vio_flush_generation_buffer,
+ sizeof(vio_flush_generation_buffer), " FG%llu",
+ data_vio->flush_generation);
+ } else {
+ vio_flush_generation_buffer[0] = 0;
+ }
+
+ encode_vio_dump_flags(data_vio, flags_dump_buffer);
+
+ vdo_log_info(" vio %px %s%s %s %s%s", data_vio,
+ vio_block_number_dump_buffer,
+ vio_flush_generation_buffer,
+ get_data_vio_operation_name(data_vio),
+ vio_completion_dump_buffer,
+ flags_dump_buffer);
+ /*
+ * might want info on: wantUDSAnswer / operation / status
+ * might want info on: bio / bios_merged
+ */
+
+ dump_vio_waiters(&data_vio->logical.waiters, "lbn");
+
+ /* might want to dump more info from vio here */
+}
diff --git a/drivers/md/dm-vdo/dump.h b/drivers/md/dm-vdo/dump.h
new file mode 100644
index 000000000000..ad47c70cca78
--- /dev/null
+++ b/drivers/md/dm-vdo/dump.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_DUMP_H
+#define VDO_DUMP_H
+
+#include "types.h"
+
+int vdo_dump(struct vdo *vdo, unsigned int argc, char *const *argv, const char *why);
+
+void vdo_dump_all(struct vdo *vdo, const char *why);
+
+void dump_data_vio(void *data);
+
+#endif /* VDO_DUMP_H */
diff --git a/drivers/md/dm-vdo/encodings.c b/drivers/md/dm-vdo/encodings.c
new file mode 100644
index 000000000000..a34ea0229d53
--- /dev/null
+++ b/drivers/md/dm-vdo/encodings.c
@@ -0,0 +1,1483 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "encodings.h"
+
+#include <linux/log2.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "constants.h"
+#include "status-codes.h"
+#include "types.h"
+
+/** The maximum logical space is 4 petabytes, which is 1 terablock. */
+static const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS = 1024ULL * 1024 * 1024 * 1024;
+
+/** The maximum physical space is 256 terabytes, which is 64 gigablocks. */
+static const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS = 1024ULL * 1024 * 1024 * 64;
+
+struct geometry_block {
+ char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE];
+ struct packed_header header;
+ u32 checksum;
+} __packed;
+
+static const struct header GEOMETRY_BLOCK_HEADER_5_0 = {
+ .id = VDO_GEOMETRY_BLOCK,
+ .version = {
+ .major_version = 5,
+ .minor_version = 0,
+ },
+ /*
+ * Note: this size isn't just the payload size following the header, like it is everywhere
+ * else in VDO.
+ */
+ .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry),
+};
+
+static const struct header GEOMETRY_BLOCK_HEADER_4_0 = {
+ .id = VDO_GEOMETRY_BLOCK,
+ .version = {
+ .major_version = 4,
+ .minor_version = 0,
+ },
+ /*
+ * Note: this size isn't just the payload size following the header, like it is everywhere
+ * else in VDO.
+ */
+ .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry_4_0),
+};
+
+const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1] = "dmvdo001";
+
+#define PAGE_HEADER_4_1_SIZE (8 + 8 + 8 + 1 + 1 + 1 + 1)
+
+static const struct version_number BLOCK_MAP_4_1 = {
+ .major_version = 4,
+ .minor_version = 1,
+};
+
+const struct header VDO_BLOCK_MAP_HEADER_2_0 = {
+ .id = VDO_BLOCK_MAP,
+ .version = {
+ .major_version = 2,
+ .minor_version = 0,
+ },
+ .size = sizeof(struct block_map_state_2_0),
+};
+
+const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0 = {
+ .id = VDO_RECOVERY_JOURNAL,
+ .version = {
+ .major_version = 7,
+ .minor_version = 0,
+ },
+ .size = sizeof(struct recovery_journal_state_7_0),
+};
+
+const struct header VDO_SLAB_DEPOT_HEADER_2_0 = {
+ .id = VDO_SLAB_DEPOT,
+ .version = {
+ .major_version = 2,
+ .minor_version = 0,
+ },
+ .size = sizeof(struct slab_depot_state_2_0),
+};
+
+static const struct header VDO_LAYOUT_HEADER_3_0 = {
+ .id = VDO_LAYOUT,
+ .version = {
+ .major_version = 3,
+ .minor_version = 0,
+ },
+ .size = sizeof(struct layout_3_0) + (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT),
+};
+
+static const enum partition_id REQUIRED_PARTITIONS[] = {
+ VDO_BLOCK_MAP_PARTITION,
+ VDO_SLAB_DEPOT_PARTITION,
+ VDO_RECOVERY_JOURNAL_PARTITION,
+ VDO_SLAB_SUMMARY_PARTITION,
+};
+
+/*
+ * The current version for the data encoded in the super block. This must be changed any time there
+ * is a change to encoding of the component data of any VDO component.
+ */
+static const struct version_number VDO_COMPONENT_DATA_41_0 = {
+ .major_version = 41,
+ .minor_version = 0,
+};
+
+const struct version_number VDO_VOLUME_VERSION_67_0 = {
+ .major_version = 67,
+ .minor_version = 0,
+};
+
+static const struct header SUPER_BLOCK_HEADER_12_0 = {
+ .id = VDO_SUPER_BLOCK,
+ .version = {
+ .major_version = 12,
+ .minor_version = 0,
+ },
+
+ /* This is the minimum size, if the super block contains no components. */
+ .size = VDO_SUPER_BLOCK_FIXED_SIZE - VDO_ENCODED_HEADER_SIZE,
+};
+
+/**
+ * validate_version() - Check whether a version matches an expected version.
+ * @expected_version: The expected version.
+ * @actual_version: The version being validated.
+ * @component_name: The name of the component or the calling function (for error logging).
+ *
+ * Logs an error describing a mismatch.
+ *
+ * Return: VDO_SUCCESS if the versions are the same,
+ * VDO_UNSUPPORTED_VERSION if the versions don't match.
+ */
+static int __must_check validate_version(struct version_number expected_version,
+ struct version_number actual_version,
+ const char *component_name)
+{
+ if (!vdo_are_same_version(expected_version, actual_version)) {
+ return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
+ "%s version mismatch, expected %d.%d, got %d.%d",
+ component_name,
+ expected_version.major_version,
+ expected_version.minor_version,
+ actual_version.major_version,
+ actual_version.minor_version);
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_validate_header() - Check whether a header matches expectations.
+ * @expected_header: The expected header.
+ * @actual_header: The header being validated.
+ * @exact_size: If true, the size fields of the two headers must be the same, otherwise it is
+ * required that actual_header.size >= expected_header.size.
+ * @name: The name of the component or the calling function (for error logging).
+ *
+ * Logs an error describing the first mismatch found.
+ *
+ * Return: VDO_SUCCESS if the header meets expectations,
+ * VDO_INCORRECT_COMPONENT if the component ids don't match,
+ * VDO_UNSUPPORTED_VERSION if the versions or sizes don't match.
+ */
+int vdo_validate_header(const struct header *expected_header,
+ const struct header *actual_header, bool exact_size,
+ const char *name)
+{
+ int result;
+
+ if (expected_header->id != actual_header->id) {
+ return vdo_log_error_strerror(VDO_INCORRECT_COMPONENT,
+ "%s ID mismatch, expected %d, got %d",
+ name, expected_header->id,
+ actual_header->id);
+ }
+
+ result = validate_version(expected_header->version, actual_header->version,
+ name);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if ((expected_header->size > actual_header->size) ||
+ (exact_size && (expected_header->size < actual_header->size))) {
+ return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
+ "%s size mismatch, expected %zu, got %zu",
+ name, expected_header->size,
+ actual_header->size);
+ }
+
+ return VDO_SUCCESS;
+}
+
+static void encode_version_number(u8 *buffer, size_t *offset,
+ struct version_number version)
+{
+ struct packed_version_number packed = vdo_pack_version_number(version);
+
+ memcpy(buffer + *offset, &packed, sizeof(packed));
+ *offset += sizeof(packed);
+}
+
+void vdo_encode_header(u8 *buffer, size_t *offset, const struct header *header)
+{
+ struct packed_header packed = vdo_pack_header(header);
+
+ memcpy(buffer + *offset, &packed, sizeof(packed));
+ *offset += sizeof(packed);
+}
+
+static void decode_version_number(u8 *buffer, size_t *offset,
+ struct version_number *version)
+{
+ struct packed_version_number packed;
+
+ memcpy(&packed, buffer + *offset, sizeof(packed));
+ *offset += sizeof(packed);
+ *version = vdo_unpack_version_number(packed);
+}
+
+void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header)
+{
+ struct packed_header packed;
+
+ memcpy(&packed, buffer + *offset, sizeof(packed));
+ *offset += sizeof(packed);
+
+ *header = vdo_unpack_header(&packed);
+}
+
+/**
+ * decode_volume_geometry() - Decode the on-disk representation of a volume geometry from a buffer.
+ * @buffer: A buffer to decode from.
+ * @offset: The offset in the buffer at which to decode.
+ * @geometry: The structure to receive the decoded fields.
+ * @version: The geometry block version to decode.
+ */
+static void decode_volume_geometry(u8 *buffer, size_t *offset,
+ struct volume_geometry *geometry, u32 version)
+{
+ u32 unused, mem;
+ enum volume_region_id id;
+ nonce_t nonce;
+ block_count_t bio_offset = 0;
+ bool sparse;
+
+ /* This is for backwards compatibility. */
+ decode_u32_le(buffer, offset, &unused);
+ geometry->unused = unused;
+
+ decode_u64_le(buffer, offset, &nonce);
+ geometry->nonce = nonce;
+
+ memcpy((unsigned char *) &geometry->uuid, buffer + *offset, sizeof(uuid_t));
+ *offset += sizeof(uuid_t);
+
+ if (version > 4)
+ decode_u64_le(buffer, offset, &bio_offset);
+ geometry->bio_offset = bio_offset;
+
+ for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) {
+ physical_block_number_t start_block;
+ enum volume_region_id saved_id;
+
+ decode_u32_le(buffer, offset, &saved_id);
+ decode_u64_le(buffer, offset, &start_block);
+
+ geometry->regions[id] = (struct volume_region) {
+ .id = saved_id,
+ .start_block = start_block,
+ };
+ }
+
+ decode_u32_le(buffer, offset, &mem);
+ *offset += sizeof(u32);
+ sparse = buffer[(*offset)++];
+
+ geometry->index_config = (struct index_config) {
+ .mem = mem,
+ .sparse = sparse,
+ };
+}
+
+/**
+ * vdo_parse_geometry_block() - Decode and validate an encoded geometry block.
+ * @block: The encoded geometry block.
+ * @geometry: The structure to receive the decoded fields.
+ */
+int __must_check vdo_parse_geometry_block(u8 *block, struct volume_geometry *geometry)
+{
+ u32 checksum, saved_checksum;
+ struct header header;
+ size_t offset = 0;
+ int result;
+
+ if (memcmp(block, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE) != 0)
+ return VDO_BAD_MAGIC;
+ offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE;
+
+ vdo_decode_header(block, &offset, &header);
+ if (header.version.major_version <= 4) {
+ result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_4_0, &header,
+ true, __func__);
+ } else {
+ result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_5_0, &header,
+ true, __func__);
+ }
+ if (result != VDO_SUCCESS)
+ return result;
+
+ decode_volume_geometry(block, &offset, geometry, header.version.major_version);
+
+ result = VDO_ASSERT(header.size == offset + sizeof(u32),
+ "should have decoded up to the geometry checksum");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /* Decode and verify the checksum. */
+ checksum = vdo_crc32(block, offset);
+ decode_u32_le(block, &offset, &saved_checksum);
+
+ return ((checksum == saved_checksum) ? VDO_SUCCESS : VDO_CHECKSUM_MISMATCH);
+}
+
+struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
+ physical_block_number_t pbn,
+ bool initialized)
+{
+ struct block_map_page *page = buffer;
+
+ memset(buffer, 0, VDO_BLOCK_SIZE);
+ page->version = vdo_pack_version_number(BLOCK_MAP_4_1);
+ page->header.nonce = __cpu_to_le64(nonce);
+ page->header.pbn = __cpu_to_le64(pbn);
+ page->header.initialized = initialized;
+ return page;
+}
+
+enum block_map_page_validity vdo_validate_block_map_page(struct block_map_page *page,
+ nonce_t nonce,
+ physical_block_number_t pbn)
+{
+ BUILD_BUG_ON(sizeof(struct block_map_page_header) != PAGE_HEADER_4_1_SIZE);
+
+ if (!vdo_are_same_version(BLOCK_MAP_4_1,
+ vdo_unpack_version_number(page->version)) ||
+ !page->header.initialized || (nonce != __le64_to_cpu(page->header.nonce)))
+ return VDO_BLOCK_MAP_PAGE_INVALID;
+
+ if (pbn != vdo_get_block_map_page_pbn(page))
+ return VDO_BLOCK_MAP_PAGE_BAD;
+
+ return VDO_BLOCK_MAP_PAGE_VALID;
+}
+
+static int decode_block_map_state_2_0(u8 *buffer, size_t *offset,
+ struct block_map_state_2_0 *state)
+{
+ size_t initial_offset;
+ block_count_t flat_page_count, root_count;
+ physical_block_number_t flat_page_origin, root_origin;
+ struct header header;
+ int result;
+
+ vdo_decode_header(buffer, offset, &header);
+ result = vdo_validate_header(&VDO_BLOCK_MAP_HEADER_2_0, &header, true, __func__);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ initial_offset = *offset;
+
+ decode_u64_le(buffer, offset, &flat_page_origin);
+ result = VDO_ASSERT(flat_page_origin == VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
+ "Flat page origin must be %u (recorded as %llu)",
+ VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
+ (unsigned long long) state->flat_page_origin);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ decode_u64_le(buffer, offset, &flat_page_count);
+ result = VDO_ASSERT(flat_page_count == 0,
+ "Flat page count must be 0 (recorded as %llu)",
+ (unsigned long long) state->flat_page_count);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ decode_u64_le(buffer, offset, &root_origin);
+ decode_u64_le(buffer, offset, &root_count);
+
+ result = VDO_ASSERT(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
+ "decoded block map component size must match header size");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *state = (struct block_map_state_2_0) {
+ .flat_page_origin = flat_page_origin,
+ .flat_page_count = flat_page_count,
+ .root_origin = root_origin,
+ .root_count = root_count,
+ };
+
+ return VDO_SUCCESS;
+}
+
+static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
+ struct block_map_state_2_0 state)
+{
+ size_t initial_offset;
+
+ vdo_encode_header(buffer, offset, &VDO_BLOCK_MAP_HEADER_2_0);
+
+ initial_offset = *offset;
+ encode_u64_le(buffer, offset, state.flat_page_origin);
+ encode_u64_le(buffer, offset, state.flat_page_count);
+ encode_u64_le(buffer, offset, state.root_origin);
+ encode_u64_le(buffer, offset, state.root_count);
+
+ VDO_ASSERT_LOG_ONLY(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
+ "encoded block map component size must match header size");
+}
+
+/**
+ * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
+ * level in order to grow the forest to a new number of entries.
+ * @entries: The new number of entries the block map must address.
+ *
+ * Return: The total number of non-leaf pages required.
+ */
+block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
+ struct boundary *old_sizes,
+ block_count_t entries,
+ struct boundary *new_sizes)
+{
+ page_count_t leaf_pages = max(vdo_compute_block_map_page_count(entries), 1U);
+ page_count_t level_size = DIV_ROUND_UP(leaf_pages, root_count);
+ block_count_t total_pages = 0;
+ height_t height;
+
+ for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
+ block_count_t new_pages;
+
+ level_size = DIV_ROUND_UP(level_size, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
+ new_sizes->levels[height] = level_size;
+ new_pages = level_size;
+ if (old_sizes != NULL)
+ new_pages -= old_sizes->levels[height];
+ total_pages += (new_pages * root_count);
+ }
+
+ return total_pages;
+}
+
+/**
+ * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
+ struct recovery_journal_state_7_0 state)
+{
+ size_t initial_offset;
+
+ vdo_encode_header(buffer, offset, &VDO_RECOVERY_JOURNAL_HEADER_7_0);
+
+ initial_offset = *offset;
+ encode_u64_le(buffer, offset, state.journal_start);
+ encode_u64_le(buffer, offset, state.logical_blocks_used);
+ encode_u64_le(buffer, offset, state.block_map_data_blocks);
+
+ VDO_ASSERT_LOG_ONLY(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
+ "encoded recovery journal component size must match header size");
+}
+
+/**
+ * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
+ * @buffer: The buffer containing the saved state.
+ * @state: A pointer to a recovery journal state to hold the result of a successful decode.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int __must_check decode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
+ struct recovery_journal_state_7_0 *state)
+{
+ struct header header;
+ int result;
+ size_t initial_offset;
+ sequence_number_t journal_start;
+ block_count_t logical_blocks_used, block_map_data_blocks;
+
+ vdo_decode_header(buffer, offset, &header);
+ result = vdo_validate_header(&VDO_RECOVERY_JOURNAL_HEADER_7_0, &header, true,
+ __func__);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ initial_offset = *offset;
+ decode_u64_le(buffer, offset, &journal_start);
+ decode_u64_le(buffer, offset, &logical_blocks_used);
+ decode_u64_le(buffer, offset, &block_map_data_blocks);
+
+ result = VDO_ASSERT(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
+ "decoded recovery journal component size must match header size");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *state = (struct recovery_journal_state_7_0) {
+ .journal_start = journal_start,
+ .logical_blocks_used = logical_blocks_used,
+ .block_map_data_blocks = block_map_data_blocks,
+ };
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_get_journal_operation_name() - Get the name of a journal operation.
+ * @operation: The operation to name.
+ *
+ * Return: The name of the operation.
+ */
+const char *vdo_get_journal_operation_name(enum journal_operation operation)
+{
+ switch (operation) {
+ case VDO_JOURNAL_DATA_REMAPPING:
+ return "data remapping";
+
+ case VDO_JOURNAL_BLOCK_MAP_REMAPPING:
+ return "block map remapping";
+
+ default:
+ return "unknown journal operation";
+ }
+}
+
+/**
+ * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
+ */
+static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
+ struct slab_depot_state_2_0 state)
+{
+ size_t initial_offset;
+
+ vdo_encode_header(buffer, offset, &VDO_SLAB_DEPOT_HEADER_2_0);
+
+ initial_offset = *offset;
+ encode_u64_le(buffer, offset, state.slab_config.slab_blocks);
+ encode_u64_le(buffer, offset, state.slab_config.data_blocks);
+ encode_u64_le(buffer, offset, state.slab_config.reference_count_blocks);
+ encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocks);
+ encode_u64_le(buffer, offset, state.slab_config.slab_journal_flushing_threshold);
+ encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocking_threshold);
+ encode_u64_le(buffer, offset, state.slab_config.slab_journal_scrubbing_threshold);
+ encode_u64_le(buffer, offset, state.first_block);
+ encode_u64_le(buffer, offset, state.last_block);
+ buffer[(*offset)++] = state.zone_count;
+
+ VDO_ASSERT_LOG_ONLY(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
+ "encoded block map component size must match header size");
+}
+
+/**
+ * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int decode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
+ struct slab_depot_state_2_0 *state)
+{
+ struct header header;
+ int result;
+ size_t initial_offset;
+ struct slab_config slab_config;
+ block_count_t count;
+ physical_block_number_t first_block, last_block;
+ zone_count_t zone_count;
+
+ vdo_decode_header(buffer, offset, &header);
+ result = vdo_validate_header(&VDO_SLAB_DEPOT_HEADER_2_0, &header, true,
+ __func__);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ initial_offset = *offset;
+ decode_u64_le(buffer, offset, &count);
+ slab_config.slab_blocks = count;
+
+ decode_u64_le(buffer, offset, &count);
+ slab_config.data_blocks = count;
+
+ decode_u64_le(buffer, offset, &count);
+ slab_config.reference_count_blocks = count;
+
+ decode_u64_le(buffer, offset, &count);
+ slab_config.slab_journal_blocks = count;
+
+ decode_u64_le(buffer, offset, &count);
+ slab_config.slab_journal_flushing_threshold = count;
+
+ decode_u64_le(buffer, offset, &count);
+ slab_config.slab_journal_blocking_threshold = count;
+
+ decode_u64_le(buffer, offset, &count);
+ slab_config.slab_journal_scrubbing_threshold = count;
+
+ decode_u64_le(buffer, offset, &first_block);
+ decode_u64_le(buffer, offset, &last_block);
+ zone_count = buffer[(*offset)++];
+
+ result = VDO_ASSERT(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
+ "decoded slab depot component size must match header size");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *state = (struct slab_depot_state_2_0) {
+ .slab_config = slab_config,
+ .first_block = first_block,
+ .last_block = last_block,
+ .zone_count = zone_count,
+ };
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_configure_slab_depot() - Configure the slab depot.
+ * @partition: The slab depot partition
+ * @slab_config: The configuration of a single slab.
+ * @zone_count: The number of zones the depot will use.
+ * @state: The state structure to be configured.
+ *
+ * Configures the slab_depot for the specified storage capacity, finding the number of data blocks
+ * that will fit and still leave room for the depot metadata, then return the saved state for that
+ * configuration.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+int vdo_configure_slab_depot(const struct partition *partition,
+ struct slab_config slab_config, zone_count_t zone_count,
+ struct slab_depot_state_2_0 *state)
+{
+ block_count_t total_slab_blocks, total_data_blocks;
+ size_t slab_count;
+ physical_block_number_t last_block;
+ block_count_t slab_size = slab_config.slab_blocks;
+
+ vdo_log_debug("slabDepot %s(block_count=%llu, first_block=%llu, slab_size=%llu, zone_count=%u)",
+ __func__, (unsigned long long) partition->count,
+ (unsigned long long) partition->offset,
+ (unsigned long long) slab_size, zone_count);
+
+ /* We do not allow runt slabs, so we waste up to a slab's worth. */
+ slab_count = (partition->count / slab_size);
+ if (slab_count == 0)
+ return VDO_NO_SPACE;
+
+ if (slab_count > MAX_VDO_SLABS)
+ return VDO_TOO_MANY_SLABS;
+
+ total_slab_blocks = slab_count * slab_config.slab_blocks;
+ total_data_blocks = slab_count * slab_config.data_blocks;
+ last_block = partition->offset + total_slab_blocks;
+
+ *state = (struct slab_depot_state_2_0) {
+ .slab_config = slab_config,
+ .first_block = partition->offset,
+ .last_block = last_block,
+ .zone_count = zone_count,
+ };
+
+ vdo_log_debug("slab_depot last_block=%llu, total_data_blocks=%llu, slab_count=%zu, left_over=%llu",
+ (unsigned long long) last_block,
+ (unsigned long long) total_data_blocks, slab_count,
+ (unsigned long long) (partition->count - (last_block - partition->offset)));
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_configure_slab() - Measure and initialize the configuration to use for each slab.
+ * @slab_size: The number of blocks per slab.
+ * @slab_journal_blocks: The number of blocks for the slab journal.
+ * @slab_config: The slab configuration to initialize.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_blocks,
+ struct slab_config *slab_config)
+{
+ block_count_t ref_blocks, meta_blocks, data_blocks;
+ block_count_t flushing_threshold, remaining, blocking_threshold;
+ block_count_t minimal_extra_space, scrubbing_threshold;
+
+ if (slab_journal_blocks >= slab_size)
+ return VDO_BAD_CONFIGURATION;
+
+ /*
+ * This calculation should technically be a recurrence, but the total number of metadata
+ * blocks is currently less than a single block of ref_counts, so we'd gain at most one
+ * data block in each slab with more iteration.
+ */
+ ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks);
+ meta_blocks = (ref_blocks + slab_journal_blocks);
+
+ /* Make sure test code hasn't configured slabs to be too small. */
+ if (meta_blocks >= slab_size)
+ return VDO_BAD_CONFIGURATION;
+
+ /*
+ * If the slab size is very small, assume this must be a unit test and override the number
+ * of data blocks to be a power of two (wasting blocks in the slab). Many tests need their
+ * data_blocks fields to be the exact capacity of the configured volume, and that used to
+ * fall out since they use a power of two for the number of data blocks, the slab size was
+ * a power of two, and every block in a slab was a data block.
+ *
+ * TODO: Try to figure out some way of structuring testParameters and unit tests so this
+ * hack isn't needed without having to edit several unit tests every time the metadata size
+ * changes by one block.
+ */
+ data_blocks = slab_size - meta_blocks;
+ if ((slab_size < 1024) && !is_power_of_2(data_blocks))
+ data_blocks = ((block_count_t) 1 << ilog2(data_blocks));
+
+ /*
+ * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in
+ * production, or 3/4ths, so we use this ratio for all sizes.
+ */
+ flushing_threshold = ((slab_journal_blocks * 3) + 3) / 4;
+ /*
+ * The blocking threshold should be far enough from the flushing threshold to not produce
+ * delays, but far enough from the end of the journal to allow multiple successive recovery
+ * failures.
+ */
+ remaining = slab_journal_blocks - flushing_threshold;
+ blocking_threshold = flushing_threshold + ((remaining * 5) / 7);
+ /* The scrubbing threshold should be at least 2048 entries before the end of the journal. */
+ minimal_extra_space = 1 + (MAXIMUM_VDO_USER_VIOS / VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK);
+ scrubbing_threshold = blocking_threshold;
+ if (slab_journal_blocks > minimal_extra_space)
+ scrubbing_threshold = slab_journal_blocks - minimal_extra_space;
+ if (blocking_threshold > scrubbing_threshold)
+ blocking_threshold = scrubbing_threshold;
+
+ *slab_config = (struct slab_config) {
+ .slab_blocks = slab_size,
+ .data_blocks = data_blocks,
+ .reference_count_blocks = ref_blocks,
+ .slab_journal_blocks = slab_journal_blocks,
+ .slab_journal_flushing_threshold = flushing_threshold,
+ .slab_journal_blocking_threshold = blocking_threshold,
+ .slab_journal_scrubbing_threshold = scrubbing_threshold};
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_decode_slab_journal_entry() - Decode a slab journal entry.
+ * @block: The journal block holding the entry.
+ * @entry_count: The number of the entry.
+ *
+ * Return: The decoded entry.
+ */
+struct slab_journal_entry vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
+ journal_entry_count_t entry_count)
+{
+ struct slab_journal_entry entry =
+ vdo_unpack_slab_journal_entry(&block->payload.entries[entry_count]);
+
+ if (block->header.has_block_map_increments &&
+ ((block->payload.full_entries.entry_types[entry_count / 8] &
+ ((u8) 1 << (entry_count % 8))) != 0))
+ entry.operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING;
+
+ return entry;
+}
+
+/**
+ * allocate_partition() - Allocate a partition and add it to a layout.
+ * @layout: The layout containing the partition.
+ * @id: The id of the partition.
+ * @offset: The offset into the layout at which the partition begins.
+ * @size: The size of the partition in blocks.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int allocate_partition(struct layout *layout, u8 id,
+ physical_block_number_t offset, block_count_t size)
+{
+ struct partition *partition;
+ int result;
+
+ result = vdo_allocate(1, struct partition, __func__, &partition);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ partition->id = id;
+ partition->offset = offset;
+ partition->count = size;
+ partition->next = layout->head;
+ layout->head = partition;
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * make_partition() - Create a new partition from the beginning or end of the unused space in a
+ * layout.
+ * @layout: The layout.
+ * @id: The id of the partition to make.
+ * @size: The number of blocks to carve out; if 0, all remaining space will be used.
+ * @beginning: True if the partition should start at the beginning of the unused space.
+ *
+ * Return: A success or error code, particularly VDO_NO_SPACE if there are fewer than size blocks
+ * remaining.
+ */
+static int __must_check make_partition(struct layout *layout, enum partition_id id,
+ block_count_t size, bool beginning)
+{
+ int result;
+ physical_block_number_t offset;
+ block_count_t free_blocks = layout->last_free - layout->first_free;
+
+ if (size == 0) {
+ if (free_blocks == 0)
+ return VDO_NO_SPACE;
+ size = free_blocks;
+ } else if (size > free_blocks) {
+ return VDO_NO_SPACE;
+ }
+
+ result = vdo_get_partition(layout, id, NULL);
+ if (result != VDO_UNKNOWN_PARTITION)
+ return VDO_PARTITION_EXISTS;
+
+ offset = beginning ? layout->first_free : (layout->last_free - size);
+
+ result = allocate_partition(layout, id, offset, size);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ layout->num_partitions++;
+ if (beginning)
+ layout->first_free += size;
+ else
+ layout->last_free = layout->last_free - size;
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_initialize_layout() - Lay out the partitions of a vdo.
+ * @size: The entire size of the vdo.
+ * @origin: The start of the layout on the underlying storage in blocks.
+ * @block_map_blocks: The size of the block map partition.
+ * @journal_blocks: The size of the journal partition.
+ * @summary_blocks: The size of the slab summary partition.
+ * @layout: The layout to initialize.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_initialize_layout(block_count_t size, physical_block_number_t offset,
+ block_count_t block_map_blocks, block_count_t journal_blocks,
+ block_count_t summary_blocks, struct layout *layout)
+{
+ int result;
+ block_count_t necessary_size =
+ (offset + block_map_blocks + journal_blocks + summary_blocks);
+
+ if (necessary_size > size)
+ return vdo_log_error_strerror(VDO_NO_SPACE,
+ "Not enough space to make a VDO");
+
+ *layout = (struct layout) {
+ .start = offset,
+ .size = size,
+ .first_free = offset,
+ .last_free = size,
+ .num_partitions = 0,
+ .head = NULL,
+ };
+
+ result = make_partition(layout, VDO_BLOCK_MAP_PARTITION, block_map_blocks, true);
+ if (result != VDO_SUCCESS) {
+ vdo_uninitialize_layout(layout);
+ return result;
+ }
+
+ result = make_partition(layout, VDO_SLAB_SUMMARY_PARTITION, summary_blocks,
+ false);
+ if (result != VDO_SUCCESS) {
+ vdo_uninitialize_layout(layout);
+ return result;
+ }
+
+ result = make_partition(layout, VDO_RECOVERY_JOURNAL_PARTITION, journal_blocks,
+ false);
+ if (result != VDO_SUCCESS) {
+ vdo_uninitialize_layout(layout);
+ return result;
+ }
+
+ result = make_partition(layout, VDO_SLAB_DEPOT_PARTITION, 0, true);
+ if (result != VDO_SUCCESS)
+ vdo_uninitialize_layout(layout);
+
+ return result;
+}
+
+/**
+ * vdo_uninitialize_layout() - Clean up a layout.
+ * @layout: The layout to clean up.
+ *
+ * All partitions created by this layout become invalid pointers.
+ */
+void vdo_uninitialize_layout(struct layout *layout)
+{
+ while (layout->head != NULL) {
+ struct partition *part = layout->head;
+
+ layout->head = part->next;
+ vdo_free(part);
+ }
+
+ memset(layout, 0, sizeof(struct layout));
+}
+
+/**
+ * vdo_get_partition() - Get a partition by id.
+ * @layout: The layout from which to get a partition.
+ * @id: The id of the partition.
+ * @partition_ptr: A pointer to hold the partition.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_get_partition(struct layout *layout, enum partition_id id,
+ struct partition **partition_ptr)
+{
+ struct partition *partition;
+
+ for (partition = layout->head; partition != NULL; partition = partition->next) {
+ if (partition->id == id) {
+ if (partition_ptr != NULL)
+ *partition_ptr = partition;
+ return VDO_SUCCESS;
+ }
+ }
+
+ return VDO_UNKNOWN_PARTITION;
+}
+
+/**
+ * vdo_get_known_partition() - Get a partition by id from a validated layout.
+ * @layout: The layout from which to get a partition.
+ * @id: The id of the partition.
+ *
+ * Return: the partition
+ */
+struct partition *vdo_get_known_partition(struct layout *layout, enum partition_id id)
+{
+ struct partition *partition;
+ int result = vdo_get_partition(layout, id, &partition);
+
+ VDO_ASSERT_LOG_ONLY(result == VDO_SUCCESS, "layout has expected partition: %u", id);
+
+ return partition;
+}
+
+static void encode_layout(u8 *buffer, size_t *offset, const struct layout *layout)
+{
+ const struct partition *partition;
+ size_t initial_offset;
+ struct header header = VDO_LAYOUT_HEADER_3_0;
+
+ BUILD_BUG_ON(sizeof(enum partition_id) != sizeof(u8));
+ VDO_ASSERT_LOG_ONLY(layout->num_partitions <= U8_MAX,
+ "layout partition count must fit in a byte");
+
+ vdo_encode_header(buffer, offset, &header);
+
+ initial_offset = *offset;
+ encode_u64_le(buffer, offset, layout->first_free);
+ encode_u64_le(buffer, offset, layout->last_free);
+ buffer[(*offset)++] = layout->num_partitions;
+
+ VDO_ASSERT_LOG_ONLY(sizeof(struct layout_3_0) == *offset - initial_offset,
+ "encoded size of a layout header must match structure");
+
+ for (partition = layout->head; partition != NULL; partition = partition->next) {
+ buffer[(*offset)++] = partition->id;
+ encode_u64_le(buffer, offset, partition->offset);
+ /* This field only exists for backwards compatibility */
+ encode_u64_le(buffer, offset, 0);
+ encode_u64_le(buffer, offset, partition->count);
+ }
+
+ VDO_ASSERT_LOG_ONLY(header.size == *offset - initial_offset,
+ "encoded size of a layout must match header size");
+}
+
+static int decode_layout(u8 *buffer, size_t *offset, physical_block_number_t start,
+ block_count_t size, struct layout *layout)
+{
+ struct header header;
+ struct layout_3_0 layout_header;
+ struct partition *partition;
+ size_t initial_offset;
+ physical_block_number_t first_free, last_free;
+ u8 partition_count;
+ u8 i;
+ int result;
+
+ vdo_decode_header(buffer, offset, &header);
+ /* Layout is variable size, so only do a minimum size check here. */
+ result = vdo_validate_header(&VDO_LAYOUT_HEADER_3_0, &header, false, __func__);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ initial_offset = *offset;
+ decode_u64_le(buffer, offset, &first_free);
+ decode_u64_le(buffer, offset, &last_free);
+ partition_count = buffer[(*offset)++];
+ layout_header = (struct layout_3_0) {
+ .first_free = first_free,
+ .last_free = last_free,
+ .partition_count = partition_count,
+ };
+
+ result = VDO_ASSERT(sizeof(struct layout_3_0) == *offset - initial_offset,
+ "decoded size of a layout header must match structure");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ layout->start = start;
+ layout->size = size;
+ layout->first_free = layout_header.first_free;
+ layout->last_free = layout_header.last_free;
+ layout->num_partitions = layout_header.partition_count;
+
+ if (layout->num_partitions > VDO_PARTITION_COUNT) {
+ return vdo_log_error_strerror(VDO_UNKNOWN_PARTITION,
+ "layout has extra partitions");
+ }
+
+ for (i = 0; i < layout->num_partitions; i++) {
+ u8 id;
+ u64 partition_offset, count;
+
+ id = buffer[(*offset)++];
+ decode_u64_le(buffer, offset, &partition_offset);
+ *offset += sizeof(u64);
+ decode_u64_le(buffer, offset, &count);
+
+ result = allocate_partition(layout, id, partition_offset, count);
+ if (result != VDO_SUCCESS) {
+ vdo_uninitialize_layout(layout);
+ return result;
+ }
+ }
+
+ /* Validate that the layout has all (and only) the required partitions */
+ for (i = 0; i < VDO_PARTITION_COUNT; i++) {
+ result = vdo_get_partition(layout, REQUIRED_PARTITIONS[i], &partition);
+ if (result != VDO_SUCCESS) {
+ vdo_uninitialize_layout(layout);
+ return vdo_log_error_strerror(result,
+ "layout is missing required partition %u",
+ REQUIRED_PARTITIONS[i]);
+ }
+
+ start += partition->count;
+ }
+
+ if (start != size) {
+ vdo_uninitialize_layout(layout);
+ return vdo_log_error_strerror(UDS_BAD_STATE,
+ "partitions do not cover the layout");
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * pack_vdo_config() - Convert a vdo_config to its packed on-disk representation.
+ * @config: The vdo config to convert.
+ *
+ * Return: The platform-independent representation of the config.
+ */
+static struct packed_vdo_config pack_vdo_config(struct vdo_config config)
+{
+ return (struct packed_vdo_config) {
+ .logical_blocks = __cpu_to_le64(config.logical_blocks),
+ .physical_blocks = __cpu_to_le64(config.physical_blocks),
+ .slab_size = __cpu_to_le64(config.slab_size),
+ .recovery_journal_size = __cpu_to_le64(config.recovery_journal_size),
+ .slab_journal_blocks = __cpu_to_le64(config.slab_journal_blocks),
+ };
+}
+
+/**
+ * pack_vdo_component() - Convert a vdo_component to its packed on-disk representation.
+ * @component: The VDO component data to convert.
+ *
+ * Return: The platform-independent representation of the component.
+ */
+static struct packed_vdo_component_41_0 pack_vdo_component(const struct vdo_component component)
+{
+ return (struct packed_vdo_component_41_0) {
+ .state = __cpu_to_le32(component.state),
+ .complete_recoveries = __cpu_to_le64(component.complete_recoveries),
+ .read_only_recoveries = __cpu_to_le64(component.read_only_recoveries),
+ .config = pack_vdo_config(component.config),
+ .nonce = __cpu_to_le64(component.nonce),
+ };
+}
+
+static void encode_vdo_component(u8 *buffer, size_t *offset,
+ struct vdo_component component)
+{
+ struct packed_vdo_component_41_0 packed;
+
+ encode_version_number(buffer, offset, VDO_COMPONENT_DATA_41_0);
+ packed = pack_vdo_component(component);
+ memcpy(buffer + *offset, &packed, sizeof(packed));
+ *offset += sizeof(packed);
+}
+
+/**
+ * unpack_vdo_config() - Convert a packed_vdo_config to its native in-memory representation.
+ * @config: The packed vdo config to convert.
+ *
+ * Return: The native in-memory representation of the vdo config.
+ */
+static struct vdo_config unpack_vdo_config(struct packed_vdo_config config)
+{
+ return (struct vdo_config) {
+ .logical_blocks = __le64_to_cpu(config.logical_blocks),
+ .physical_blocks = __le64_to_cpu(config.physical_blocks),
+ .slab_size = __le64_to_cpu(config.slab_size),
+ .recovery_journal_size = __le64_to_cpu(config.recovery_journal_size),
+ .slab_journal_blocks = __le64_to_cpu(config.slab_journal_blocks),
+ };
+}
+
+/**
+ * unpack_vdo_component_41_0() - Convert a packed_vdo_component_41_0 to its native in-memory
+ * representation.
+ * @component: The packed vdo component data to convert.
+ *
+ * Return: The native in-memory representation of the component.
+ */
+static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)
+{
+ return (struct vdo_component) {
+ .state = __le32_to_cpu(component.state),
+ .complete_recoveries = __le64_to_cpu(component.complete_recoveries),
+ .read_only_recoveries = __le64_to_cpu(component.read_only_recoveries),
+ .config = unpack_vdo_config(component.config),
+ .nonce = __le64_to_cpu(component.nonce),
+ };
+}
+
+/**
+ * decode_vdo_component() - Decode the component data for the vdo itself out of the super block.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int decode_vdo_component(u8 *buffer, size_t *offset, struct vdo_component *component)
+{
+ struct version_number version;
+ struct packed_vdo_component_41_0 packed;
+ int result;
+
+ decode_version_number(buffer, offset, &version);
+ result = validate_version(version, VDO_COMPONENT_DATA_41_0,
+ "VDO component data");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ memcpy(&packed, buffer + *offset, sizeof(packed));
+ *offset += sizeof(packed);
+ *component = unpack_vdo_component_41_0(packed);
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_validate_config() - Validate constraints on a VDO config.
+ * @config: The VDO config.
+ * @physical_block_count: The minimum block count of the underlying storage.
+ * @logical_block_count: The expected logical size of the VDO, or 0 if the logical size may be
+ * unspecified.
+ *
+ * Return: A success or error code.
+ */
+int vdo_validate_config(const struct vdo_config *config,
+ block_count_t physical_block_count,
+ block_count_t logical_block_count)
+{
+ struct slab_config slab_config;
+ int result;
+
+ result = VDO_ASSERT(config->slab_size > 0, "slab size unspecified");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(is_power_of_2(config->slab_size),
+ "slab size must be a power of two");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(config->slab_size <= (1 << MAX_VDO_SLAB_BITS),
+ "slab size must be less than or equal to 2^%d",
+ MAX_VDO_SLAB_BITS);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(config->slab_journal_blocks >= MINIMUM_VDO_SLAB_JOURNAL_BLOCKS,
+ "slab journal size meets minimum size");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(config->slab_journal_blocks <= config->slab_size,
+ "slab journal size is within expected bound");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_configure_slab(config->slab_size, config->slab_journal_blocks,
+ &slab_config);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT((slab_config.data_blocks >= 1),
+ "slab must be able to hold at least one block");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(config->physical_blocks > 0, "physical blocks unspecified");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(config->physical_blocks <= MAXIMUM_VDO_PHYSICAL_BLOCKS,
+ "physical block count %llu exceeds maximum %llu",
+ (unsigned long long) config->physical_blocks,
+ (unsigned long long) MAXIMUM_VDO_PHYSICAL_BLOCKS);
+ if (result != VDO_SUCCESS)
+ return VDO_OUT_OF_RANGE;
+
+ if (physical_block_count != config->physical_blocks) {
+ vdo_log_error("A physical size of %llu blocks was specified, not the %llu blocks configured in the vdo super block",
+ (unsigned long long) physical_block_count,
+ (unsigned long long) config->physical_blocks);
+ return VDO_PARAMETER_MISMATCH;
+ }
+
+ if (logical_block_count > 0) {
+ result = VDO_ASSERT((config->logical_blocks > 0),
+ "logical blocks unspecified");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (logical_block_count != config->logical_blocks) {
+ vdo_log_error("A logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
+ (unsigned long long) logical_block_count,
+ (unsigned long long) config->logical_blocks);
+ return VDO_PARAMETER_MISMATCH;
+ }
+ }
+
+ result = VDO_ASSERT(config->logical_blocks <= MAXIMUM_VDO_LOGICAL_BLOCKS,
+ "logical blocks too large");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(config->recovery_journal_size > 0,
+ "recovery journal size unspecified");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(is_power_of_2(config->recovery_journal_size),
+ "recovery journal size must be a power of two");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ return result;
+}
+
+/**
+ * vdo_destroy_component_states() - Clean up any allocations in a vdo_component_states.
+ * @states: The component states to destroy.
+ */
+void vdo_destroy_component_states(struct vdo_component_states *states)
+{
+ if (states == NULL)
+ return;
+
+ vdo_uninitialize_layout(&states->layout);
+}
+
+/**
+ * decode_components() - Decode the components now that we know the component data is a version we
+ * understand.
+ * @buffer: The buffer being decoded.
+ * @offset: The offset to start decoding from.
+ * @geometry: The vdo geometry
+ * @states: An object to hold the successfully decoded state.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int __must_check decode_components(u8 *buffer, size_t *offset,
+ struct volume_geometry *geometry,
+ struct vdo_component_states *states)
+{
+ int result;
+
+ decode_vdo_component(buffer, offset, &states->vdo);
+
+ result = decode_layout(buffer, offset, vdo_get_data_region_start(*geometry) + 1,
+ states->vdo.config.physical_blocks, &states->layout);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = decode_recovery_journal_state_7_0(buffer, offset,
+ &states->recovery_journal);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = decode_slab_depot_state_2_0(buffer, offset, &states->slab_depot);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = decode_block_map_state_2_0(buffer, offset, &states->block_map);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
+ "All decoded component data was used");
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_decode_component_states() - Decode the payload of a super block.
+ * @buffer: The buffer containing the encoded super block contents.
+ * @geometry: The vdo geometry
+ * @states: A pointer to hold the decoded states.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_decode_component_states(u8 *buffer, struct volume_geometry *geometry,
+ struct vdo_component_states *states)
+{
+ int result;
+ size_t offset = VDO_COMPONENT_DATA_OFFSET;
+
+ /* This is for backwards compatibility. */
+ decode_u32_le(buffer, &offset, &states->unused);
+
+ /* Check the VDO volume version */
+ decode_version_number(buffer, &offset, &states->volume_version);
+ result = validate_version(VDO_VOLUME_VERSION_67_0, states->volume_version,
+ "volume");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = decode_components(buffer, &offset, geometry, states);
+ if (result != VDO_SUCCESS)
+ vdo_uninitialize_layout(&states->layout);
+
+ return result;
+}
+
+/**
+ * vdo_validate_component_states() - Validate the decoded super block configuration.
+ * @states: The state decoded from the super block.
+ * @geometry_nonce: The nonce from the geometry block.
+ * @physical_size: The minimum block count of the underlying storage.
+ * @logical_size: The expected logical size of the VDO, or 0 if the logical size may be
+ * unspecified.
+ *
+ * Return: VDO_SUCCESS or an error if the configuration is invalid.
+ */
+int vdo_validate_component_states(struct vdo_component_states *states,
+ nonce_t geometry_nonce, block_count_t physical_size,
+ block_count_t logical_size)
+{
+ if (geometry_nonce != states->vdo.nonce) {
+ return vdo_log_error_strerror(VDO_BAD_NONCE,
+ "Geometry nonce %llu does not match superblock nonce %llu",
+ (unsigned long long) geometry_nonce,
+ (unsigned long long) states->vdo.nonce);
+ }
+
+ return vdo_validate_config(&states->vdo.config, physical_size, logical_size);
+}
+
+/**
+ * vdo_encode_component_states() - Encode the state of all vdo components in the super block.
+ */
+static void vdo_encode_component_states(u8 *buffer, size_t *offset,
+ const struct vdo_component_states *states)
+{
+ /* This is for backwards compatibility. */
+ encode_u32_le(buffer, offset, states->unused);
+ encode_version_number(buffer, offset, states->volume_version);
+ encode_vdo_component(buffer, offset, states->vdo);
+ encode_layout(buffer, offset, &states->layout);
+ encode_recovery_journal_state_7_0(buffer, offset, states->recovery_journal);
+ encode_slab_depot_state_2_0(buffer, offset, states->slab_depot);
+ encode_block_map_state_2_0(buffer, offset, states->block_map);
+
+ VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
+ "All super block component data was encoded");
+}
+
+/**
+ * vdo_encode_super_block() - Encode a super block into its on-disk representation.
+ */
+void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
+{
+ u32 checksum;
+ struct header header = SUPER_BLOCK_HEADER_12_0;
+ size_t offset = 0;
+
+ header.size += VDO_COMPONENT_DATA_SIZE;
+ vdo_encode_header(buffer, &offset, &header);
+ vdo_encode_component_states(buffer, &offset, states);
+
+ checksum = vdo_crc32(buffer, offset);
+ encode_u32_le(buffer, &offset, checksum);
+
+ /*
+ * Even though the buffer is a full block, to avoid the potential corruption from a torn
+ * write, the entire encoding must fit in the first sector.
+ */
+ VDO_ASSERT_LOG_ONLY(offset <= VDO_SECTOR_SIZE,
+ "entire superblock must fit in one sector");
+}
+
+/**
+ * vdo_decode_super_block() - Decode a super block from its on-disk representation.
+ */
+int vdo_decode_super_block(u8 *buffer)
+{
+ struct header header;
+ int result;
+ u32 checksum, saved_checksum;
+ size_t offset = 0;
+
+ /* Decode and validate the header. */
+ vdo_decode_header(buffer, &offset, &header);
+ result = vdo_validate_header(&SUPER_BLOCK_HEADER_12_0, &header, false, __func__);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (header.size > VDO_COMPONENT_DATA_SIZE + sizeof(u32)) {
+ /*
+ * We can't check release version or checksum until we know the content size, so we
+ * have to assume a version mismatch on unexpected values.
+ */
+ return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
+ "super block contents too large: %zu",
+ header.size);
+ }
+
+ /* Skip past the component data for now, to verify the checksum. */
+ offset += VDO_COMPONENT_DATA_SIZE;
+
+ checksum = vdo_crc32(buffer, offset);
+ decode_u32_le(buffer, &offset, &saved_checksum);
+
+ result = VDO_ASSERT(offset == VDO_SUPER_BLOCK_FIXED_SIZE + VDO_COMPONENT_DATA_SIZE,
+ "must have decoded entire superblock payload");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS);
+}
diff --git a/drivers/md/dm-vdo/encodings.h b/drivers/md/dm-vdo/encodings.h
new file mode 100644
index 000000000000..e5ff2b0aaa79
--- /dev/null
+++ b/drivers/md/dm-vdo/encodings.h
@@ -0,0 +1,1298 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_ENCODINGS_H
+#define VDO_ENCODINGS_H
+
+#include <linux/blk_types.h>
+#include <linux/crc32.h>
+#include <linux/limits.h>
+#include <linux/uuid.h>
+
+#include "numeric.h"
+
+#include "constants.h"
+#include "types.h"
+
+/*
+ * An in-memory representation of a version number for versioned structures on disk.
+ *
+ * A version number consists of two portions, a major version and a minor version. Any format
+ * change which does not require an explicit upgrade step from the previous version should
+ * increment the minor version. Any format change which either requires an explicit upgrade step,
+ * or is wholly incompatible (i.e. can not be upgraded to), should increment the major version, and
+ * set the minor version to 0.
+ */
+struct version_number {
+ u32 major_version;
+ u32 minor_version;
+};
+
+/*
+ * A packed, machine-independent, on-disk representation of a version_number. Both fields are
+ * stored in little-endian byte order.
+ */
+struct packed_version_number {
+ __le32 major_version;
+ __le32 minor_version;
+} __packed;
+
+/* The registry of component ids for use in headers */
+#define VDO_SUPER_BLOCK 0
+#define VDO_LAYOUT 1
+#define VDO_RECOVERY_JOURNAL 2
+#define VDO_SLAB_DEPOT 3
+#define VDO_BLOCK_MAP 4
+#define VDO_GEOMETRY_BLOCK 5
+
+/* The header for versioned data stored on disk. */
+struct header {
+ u32 id; /* The component this is a header for */
+ struct version_number version; /* The version of the data format */
+ size_t size; /* The size of the data following this header */
+};
+
+/* A packed, machine-independent, on-disk representation of a component header. */
+struct packed_header {
+ __le32 id;
+ struct packed_version_number version;
+ __le64 size;
+} __packed;
+
+enum {
+ VDO_GEOMETRY_BLOCK_LOCATION = 0,
+ VDO_GEOMETRY_MAGIC_NUMBER_SIZE = 8,
+ VDO_DEFAULT_GEOMETRY_BLOCK_VERSION = 5,
+};
+
+struct index_config {
+ u32 mem;
+ u32 unused;
+ bool sparse;
+} __packed;
+
+enum volume_region_id {
+ VDO_INDEX_REGION = 0,
+ VDO_DATA_REGION = 1,
+ VDO_VOLUME_REGION_COUNT,
+};
+
+struct volume_region {
+ /* The ID of the region */
+ enum volume_region_id id;
+ /*
+ * The absolute starting offset on the device. The region continues until the next region
+ * begins.
+ */
+ physical_block_number_t start_block;
+} __packed;
+
+struct volume_geometry {
+ /* For backwards compatibility */
+ u32 unused;
+ /* The nonce of this volume */
+ nonce_t nonce;
+ /* The uuid of this volume */
+ uuid_t uuid;
+ /* The block offset to be applied to bios */
+ block_count_t bio_offset;
+ /* The regions in ID order */
+ struct volume_region regions[VDO_VOLUME_REGION_COUNT];
+ /* The index config */
+ struct index_config index_config;
+} __packed;
+
+/* This volume geometry struct is used for sizing only */
+struct volume_geometry_4_0 {
+ /* For backwards compatibility */
+ u32 unused;
+ /* The nonce of this volume */
+ nonce_t nonce;
+ /* The uuid of this volume */
+ uuid_t uuid;
+ /* The regions in ID order */
+ struct volume_region regions[VDO_VOLUME_REGION_COUNT];
+ /* The index config */
+ struct index_config index_config;
+} __packed;
+
+extern const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1];
+
+/**
+ * DOC: Block map entries
+ *
+ * The entry for each logical block in the block map is encoded into five bytes, which saves space
+ * in both the on-disk and in-memory layouts. It consists of the 36 low-order bits of a
+ * physical_block_number_t (addressing 256 terabytes with a 4KB block size) and a 4-bit encoding of
+ * a block_mapping_state.
+ *
+ * Of the 8 high bits of the 5-byte structure:
+ *
+ * Bits 7..4: The four highest bits of the 36-bit physical block number
+ * Bits 3..0: The 4-bit block_mapping_state
+ *
+ * The following 4 bytes are the low order bytes of the physical block number, in little-endian
+ * order.
+ *
+ * Conversion functions to and from a data location are provided.
+ */
+struct block_map_entry {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ unsigned mapping_state : 4;
+ unsigned pbn_high_nibble : 4;
+#else
+ unsigned pbn_high_nibble : 4;
+ unsigned mapping_state : 4;
+#endif
+
+ __le32 pbn_low_word;
+} __packed;
+
+struct block_map_page_header {
+ __le64 nonce;
+ __le64 pbn;
+
+ /* May be non-zero on disk */
+ u8 unused_long_word[8];
+
+ /* Whether this page has been written twice to disk */
+ bool initialized;
+
+ /* Always zero on disk */
+ u8 unused_byte1;
+
+ /* May be non-zero on disk */
+ u8 unused_byte2;
+ u8 unused_byte3;
+} __packed;
+
+struct block_map_page {
+ struct packed_version_number version;
+ struct block_map_page_header header;
+ struct block_map_entry entries[];
+} __packed;
+
+enum block_map_page_validity {
+ VDO_BLOCK_MAP_PAGE_VALID,
+ VDO_BLOCK_MAP_PAGE_INVALID,
+ /* Valid page found in the wrong location on disk */
+ VDO_BLOCK_MAP_PAGE_BAD,
+};
+
+struct block_map_state_2_0 {
+ physical_block_number_t flat_page_origin;
+ block_count_t flat_page_count;
+ physical_block_number_t root_origin;
+ block_count_t root_count;
+} __packed;
+
+struct boundary {
+ page_number_t levels[VDO_BLOCK_MAP_TREE_HEIGHT];
+};
+
+extern const struct header VDO_BLOCK_MAP_HEADER_2_0;
+
+/* The state of the recovery journal as encoded in the VDO super block. */
+struct recovery_journal_state_7_0 {
+ /* Sequence number to start the journal */
+ sequence_number_t journal_start;
+ /* Number of logical blocks used by VDO */
+ block_count_t logical_blocks_used;
+ /* Number of block map pages allocated */
+ block_count_t block_map_data_blocks;
+} __packed;
+
+extern const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0;
+
+typedef u16 journal_entry_count_t;
+
+/*
+ * A recovery journal entry stores three physical locations: a data location that is the value of a
+ * single mapping in the block map tree, and the two locations of the block map pages and slots
+ * that are acquiring and releasing a reference to the location. The journal entry also stores an
+ * operation code that says whether the mapping is for a logical block or for the block map tree
+ * itself.
+ */
+struct recovery_journal_entry {
+ struct block_map_slot slot;
+ struct data_location mapping;
+ struct data_location unmapping;
+ enum journal_operation operation;
+};
+
+/* The packed, on-disk representation of a recovery journal entry. */
+struct packed_recovery_journal_entry {
+ /*
+ * In little-endian bit order:
+ * Bits 15..12: The four highest bits of the 36-bit physical block number of the block map
+ * tree page
+ * Bits 11..2: The 10-bit block map page slot number
+ * Bit 1..0: The journal_operation of the entry (this actually only requires 1 bit, but
+ * it is convenient to keep the extra bit as part of this field.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ unsigned operation : 2;
+ unsigned slot_low : 6;
+ unsigned slot_high : 4;
+ unsigned pbn_high_nibble : 4;
+#else
+ unsigned slot_low : 6;
+ unsigned operation : 2;
+ unsigned pbn_high_nibble : 4;
+ unsigned slot_high : 4;
+#endif
+
+ /*
+ * Bits 47..16: The 32 low-order bits of the block map page PBN, in little-endian byte
+ * order
+ */
+ __le32 pbn_low_word;
+
+ /*
+ * Bits 87..48: The five-byte block map entry encoding the location that will be stored in
+ * the block map page slot
+ */
+ struct block_map_entry mapping;
+
+ /*
+ * Bits 127..88: The five-byte block map entry encoding the location that was stored in the
+ * block map page slot
+ */
+ struct block_map_entry unmapping;
+} __packed;
+
+/* The packed, on-disk representation of an old format recovery journal entry. */
+struct packed_recovery_journal_entry_1 {
+ /*
+ * In little-endian bit order:
+ * Bits 15..12: The four highest bits of the 36-bit physical block number of the block map
+ * tree page
+ * Bits 11..2: The 10-bit block map page slot number
+ * Bits 1..0: The 2-bit journal_operation of the entry
+ *
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ unsigned operation : 2;
+ unsigned slot_low : 6;
+ unsigned slot_high : 4;
+ unsigned pbn_high_nibble : 4;
+#else
+ unsigned slot_low : 6;
+ unsigned operation : 2;
+ unsigned pbn_high_nibble : 4;
+ unsigned slot_high : 4;
+#endif
+
+ /*
+ * Bits 47..16: The 32 low-order bits of the block map page PBN, in little-endian byte
+ * order
+ */
+ __le32 pbn_low_word;
+
+ /*
+ * Bits 87..48: The five-byte block map entry encoding the location that was or will be
+ * stored in the block map page slot
+ */
+ struct block_map_entry block_map_entry;
+} __packed;
+
+enum journal_operation_1 {
+ VDO_JOURNAL_DATA_DECREMENT = 0,
+ VDO_JOURNAL_DATA_INCREMENT = 1,
+ VDO_JOURNAL_BLOCK_MAP_DECREMENT = 2,
+ VDO_JOURNAL_BLOCK_MAP_INCREMENT = 3,
+} __packed;
+
+struct recovery_block_header {
+ sequence_number_t block_map_head; /* Block map head sequence number */
+ sequence_number_t slab_journal_head; /* Slab journal head seq. number */
+ sequence_number_t sequence_number; /* Sequence number for this block */
+ nonce_t nonce; /* A given VDO instance's nonce */
+ block_count_t logical_blocks_used; /* Logical blocks in use */
+ block_count_t block_map_data_blocks; /* Allocated block map pages */
+ journal_entry_count_t entry_count; /* Number of entries written */
+ u8 check_byte; /* The protection check byte */
+ u8 recovery_count; /* Number of recoveries completed */
+ enum vdo_metadata_type metadata_type; /* Metadata type */
+};
+
+/*
+ * The packed, on-disk representation of a recovery journal block header. All fields are kept in
+ * little-endian byte order.
+ */
+struct packed_journal_header {
+ /* Block map head 64-bit sequence number */
+ __le64 block_map_head;
+
+ /* Slab journal head 64-bit sequence number */
+ __le64 slab_journal_head;
+
+ /* The 64-bit sequence number for this block */
+ __le64 sequence_number;
+
+ /* A given VDO instance's 64-bit nonce */
+ __le64 nonce;
+
+ /* 8-bit metadata type (should always be one for the recovery journal) */
+ u8 metadata_type;
+
+ /* 16-bit count of the entries encoded in the block */
+ __le16 entry_count;
+
+ /* 64-bit count of the logical blocks used when this block was opened */
+ __le64 logical_blocks_used;
+
+ /* 64-bit count of the block map blocks used when this block was opened */
+ __le64 block_map_data_blocks;
+
+ /* The protection check byte */
+ u8 check_byte;
+
+ /* The number of recoveries completed */
+ u8 recovery_count;
+} __packed;
+
+struct packed_journal_sector {
+ /* The protection check byte */
+ u8 check_byte;
+
+ /* The number of recoveries completed */
+ u8 recovery_count;
+
+ /* The number of entries in this sector */
+ u8 entry_count;
+
+ /* Journal entries for this sector */
+ struct packed_recovery_journal_entry entries[];
+} __packed;
+
+enum {
+ /* The number of entries in each sector (except the last) when filled */
+ RECOVERY_JOURNAL_ENTRIES_PER_SECTOR =
+ ((VDO_SECTOR_SIZE - sizeof(struct packed_journal_sector)) /
+ sizeof(struct packed_recovery_journal_entry)),
+ RECOVERY_JOURNAL_ENTRIES_PER_BLOCK = RECOVERY_JOURNAL_ENTRIES_PER_SECTOR * 7,
+ /* The number of entries in a v1 recovery journal block. */
+ RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK = 311,
+ /* The number of entries in each v1 sector (except the last) when filled */
+ RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR =
+ ((VDO_SECTOR_SIZE - sizeof(struct packed_journal_sector)) /
+ sizeof(struct packed_recovery_journal_entry_1)),
+ /* The number of entries in the last sector when a block is full */
+ RECOVERY_JOURNAL_1_ENTRIES_IN_LAST_SECTOR =
+ (RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK % RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR),
+};
+
+/* A type representing a reference count of a block. */
+typedef u8 vdo_refcount_t;
+
+/* The absolute position of an entry in a recovery journal or slab journal. */
+struct journal_point {
+ sequence_number_t sequence_number;
+ journal_entry_count_t entry_count;
+};
+
+/* A packed, platform-independent encoding of a struct journal_point. */
+struct packed_journal_point {
+ /*
+ * The packed representation is the little-endian 64-bit representation of the low-order 48
+ * bits of the sequence number, shifted up 16 bits, or'ed with the 16-bit entry count.
+ *
+ * Very long-term, the top 16 bits of the sequence number may not always be zero, as this
+ * encoding assumes--see BZ 1523240.
+ */
+ __le64 encoded_point;
+} __packed;
+
+/* Special vdo_refcount_t values. */
+#define EMPTY_REFERENCE_COUNT 0
+enum {
+ MAXIMUM_REFERENCE_COUNT = 254,
+ PROVISIONAL_REFERENCE_COUNT = 255,
+};
+
+enum {
+ COUNTS_PER_SECTOR =
+ ((VDO_SECTOR_SIZE - sizeof(struct packed_journal_point)) / sizeof(vdo_refcount_t)),
+ COUNTS_PER_BLOCK = COUNTS_PER_SECTOR * VDO_SECTORS_PER_BLOCK,
+};
+
+/* The format of each sector of a reference_block on disk. */
+struct packed_reference_sector {
+ struct packed_journal_point commit_point;
+ vdo_refcount_t counts[COUNTS_PER_SECTOR];
+} __packed;
+
+struct packed_reference_block {
+ struct packed_reference_sector sectors[VDO_SECTORS_PER_BLOCK];
+};
+
+struct slab_depot_state_2_0 {
+ struct slab_config slab_config;
+ physical_block_number_t first_block;
+ physical_block_number_t last_block;
+ zone_count_t zone_count;
+} __packed;
+
+extern const struct header VDO_SLAB_DEPOT_HEADER_2_0;
+
+/*
+ * vdo_slab journal blocks may have one of two formats, depending upon whether or not any of the
+ * entries in the block are block map increments. Since the steady state for a VDO is that all of
+ * the necessary block map pages will be allocated, most slab journal blocks will have only data
+ * entries. Such blocks can hold more entries, hence the two formats.
+ */
+
+/* A single slab journal entry */
+struct slab_journal_entry {
+ slab_block_number sbn;
+ enum journal_operation operation;
+ bool increment;
+};
+
+/* A single slab journal entry in its on-disk form */
+typedef struct {
+ u8 offset_low8;
+ u8 offset_mid8;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ unsigned offset_high7 : 7;
+ unsigned increment : 1;
+#else
+ unsigned increment : 1;
+ unsigned offset_high7 : 7;
+#endif
+} __packed packed_slab_journal_entry;
+
+/* The unpacked representation of the header of a slab journal block */
+struct slab_journal_block_header {
+ /* Sequence number for head of journal */
+ sequence_number_t head;
+ /* Sequence number for this block */
+ sequence_number_t sequence_number;
+ /* The nonce for a given VDO instance */
+ nonce_t nonce;
+ /* Recovery journal point for last entry */
+ struct journal_point recovery_point;
+ /* Metadata type */
+ enum vdo_metadata_type metadata_type;
+ /* Whether this block contains block map increments */
+ bool has_block_map_increments;
+ /* The number of entries in the block */
+ journal_entry_count_t entry_count;
+};
+
+/*
+ * The packed, on-disk representation of a slab journal block header. All fields are kept in
+ * little-endian byte order.
+ */
+struct packed_slab_journal_block_header {
+ /* 64-bit sequence number for head of journal */
+ __le64 head;
+ /* 64-bit sequence number for this block */
+ __le64 sequence_number;
+ /* Recovery journal point for the last entry, packed into 64 bits */
+ struct packed_journal_point recovery_point;
+ /* The 64-bit nonce for a given VDO instance */
+ __le64 nonce;
+ /* 8-bit metadata type (should always be two, for the slab journal) */
+ u8 metadata_type;
+ /* Whether this block contains block map increments */
+ bool has_block_map_increments;
+ /* 16-bit count of the entries encoded in the block */
+ __le16 entry_count;
+} __packed;
+
+enum {
+ VDO_SLAB_JOURNAL_PAYLOAD_SIZE =
+ VDO_BLOCK_SIZE - sizeof(struct packed_slab_journal_block_header),
+ VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK = (VDO_SLAB_JOURNAL_PAYLOAD_SIZE * 8) / 25,
+ VDO_SLAB_JOURNAL_ENTRY_TYPES_SIZE =
+ ((VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK - 1) / 8) + 1,
+ VDO_SLAB_JOURNAL_ENTRIES_PER_BLOCK =
+ (VDO_SLAB_JOURNAL_PAYLOAD_SIZE / sizeof(packed_slab_journal_entry)),
+};
+
+/* The payload of a slab journal block which has block map increments */
+struct full_slab_journal_entries {
+ /* The entries themselves */
+ packed_slab_journal_entry entries[VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK];
+ /* The bit map indicating which entries are block map increments */
+ u8 entry_types[VDO_SLAB_JOURNAL_ENTRY_TYPES_SIZE];
+} __packed;
+
+typedef union {
+ /* Entries which include block map increments */
+ struct full_slab_journal_entries full_entries;
+ /* Entries which are only data updates */
+ packed_slab_journal_entry entries[VDO_SLAB_JOURNAL_ENTRIES_PER_BLOCK];
+ /* Ensure the payload fills to the end of the block */
+ u8 space[VDO_SLAB_JOURNAL_PAYLOAD_SIZE];
+} __packed slab_journal_payload;
+
+struct packed_slab_journal_block {
+ struct packed_slab_journal_block_header header;
+ slab_journal_payload payload;
+} __packed;
+
+/* The offset of a slab journal tail block. */
+typedef u8 tail_block_offset_t;
+
+struct slab_summary_entry {
+ /* Bits 7..0: The offset of the tail block within the slab journal */
+ tail_block_offset_t tail_block_offset;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ /* Bits 13..8: A hint about the fullness of the slab */
+ unsigned int fullness_hint : 6;
+ /* Bit 14: Whether the ref_counts must be loaded from the layer */
+ unsigned int load_ref_counts : 1;
+ /* Bit 15: The believed cleanliness of this slab */
+ unsigned int is_dirty : 1;
+#else
+ /* Bit 15: The believed cleanliness of this slab */
+ unsigned int is_dirty : 1;
+ /* Bit 14: Whether the ref_counts must be loaded from the layer */
+ unsigned int load_ref_counts : 1;
+ /* Bits 13..8: A hint about the fullness of the slab */
+ unsigned int fullness_hint : 6;
+#endif
+} __packed;
+
+enum {
+ VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS = 6,
+ VDO_SLAB_SUMMARY_ENTRIES_PER_BLOCK = VDO_BLOCK_SIZE / sizeof(struct slab_summary_entry),
+ VDO_SLAB_SUMMARY_BLOCKS_PER_ZONE = MAX_VDO_SLABS / VDO_SLAB_SUMMARY_ENTRIES_PER_BLOCK,
+ VDO_SLAB_SUMMARY_BLOCKS = VDO_SLAB_SUMMARY_BLOCKS_PER_ZONE * MAX_VDO_PHYSICAL_ZONES,
+};
+
+struct layout {
+ physical_block_number_t start;
+ block_count_t size;
+ physical_block_number_t first_free;
+ physical_block_number_t last_free;
+ size_t num_partitions;
+ struct partition *head;
+};
+
+struct partition {
+ enum partition_id id; /* The id of this partition */
+ physical_block_number_t offset; /* The offset into the layout of this partition */
+ block_count_t count; /* The number of blocks in the partition */
+ struct partition *next; /* A pointer to the next partition in the layout */
+};
+
+struct layout_3_0 {
+ physical_block_number_t first_free;
+ physical_block_number_t last_free;
+ u8 partition_count;
+} __packed;
+
+struct partition_3_0 {
+ enum partition_id id;
+ physical_block_number_t offset;
+ physical_block_number_t base; /* unused but retained for backwards compatibility */
+ block_count_t count;
+} __packed;
+
+/*
+ * The configuration of the VDO service.
+ */
+struct vdo_config {
+ block_count_t logical_blocks; /* number of logical blocks */
+ block_count_t physical_blocks; /* number of physical blocks */
+ block_count_t slab_size; /* number of blocks in a slab */
+ block_count_t recovery_journal_size; /* number of recovery journal blocks */
+ block_count_t slab_journal_blocks; /* number of slab journal blocks */
+};
+
+/* This is the structure that captures the vdo fields saved as a super block component. */
+struct vdo_component {
+ enum vdo_state state;
+ u64 complete_recoveries;
+ u64 read_only_recoveries;
+ struct vdo_config config;
+ nonce_t nonce;
+};
+
+/*
+ * A packed, machine-independent, on-disk representation of the vdo_config in the VDO component
+ * data in the super block.
+ */
+struct packed_vdo_config {
+ __le64 logical_blocks;
+ __le64 physical_blocks;
+ __le64 slab_size;
+ __le64 recovery_journal_size;
+ __le64 slab_journal_blocks;
+} __packed;
+
+/*
+ * A packed, machine-independent, on-disk representation of version 41.0 of the VDO component data
+ * in the super block.
+ */
+struct packed_vdo_component_41_0 {
+ __le32 state;
+ __le64 complete_recoveries;
+ __le64 read_only_recoveries;
+ struct packed_vdo_config config;
+ __le64 nonce;
+} __packed;
+
+/*
+ * The version of the on-disk format of a VDO volume. This should be incremented any time the
+ * on-disk representation of any VDO structure changes. Changes which require only online upgrade
+ * steps should increment the minor version. Changes which require an offline upgrade or which can
+ * not be upgraded to at all should increment the major version and set the minor version to 0.
+ */
+extern const struct version_number VDO_VOLUME_VERSION_67_0;
+
+enum {
+ VDO_ENCODED_HEADER_SIZE = sizeof(struct packed_header),
+ BLOCK_MAP_COMPONENT_ENCODED_SIZE =
+ VDO_ENCODED_HEADER_SIZE + sizeof(struct block_map_state_2_0),
+ RECOVERY_JOURNAL_COMPONENT_ENCODED_SIZE =
+ VDO_ENCODED_HEADER_SIZE + sizeof(struct recovery_journal_state_7_0),
+ SLAB_DEPOT_COMPONENT_ENCODED_SIZE =
+ VDO_ENCODED_HEADER_SIZE + sizeof(struct slab_depot_state_2_0),
+ VDO_PARTITION_COUNT = 4,
+ VDO_LAYOUT_ENCODED_SIZE = (VDO_ENCODED_HEADER_SIZE +
+ sizeof(struct layout_3_0) +
+ (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT)),
+ VDO_SUPER_BLOCK_FIXED_SIZE = VDO_ENCODED_HEADER_SIZE + sizeof(u32),
+ VDO_MAX_COMPONENT_DATA_SIZE = VDO_SECTOR_SIZE - VDO_SUPER_BLOCK_FIXED_SIZE,
+ VDO_COMPONENT_ENCODED_SIZE =
+ (sizeof(struct packed_version_number) + sizeof(struct packed_vdo_component_41_0)),
+ VDO_COMPONENT_DATA_OFFSET = VDO_ENCODED_HEADER_SIZE,
+ VDO_COMPONENT_DATA_SIZE = (sizeof(u32) +
+ sizeof(struct packed_version_number) +
+ VDO_COMPONENT_ENCODED_SIZE +
+ VDO_LAYOUT_ENCODED_SIZE +
+ RECOVERY_JOURNAL_COMPONENT_ENCODED_SIZE +
+ SLAB_DEPOT_COMPONENT_ENCODED_SIZE +
+ BLOCK_MAP_COMPONENT_ENCODED_SIZE),
+};
+
+/* The entirety of the component data encoded in the VDO super block. */
+struct vdo_component_states {
+ /* For backwards compatibility */
+ u32 unused;
+
+ /* The VDO volume version */
+ struct version_number volume_version;
+
+ /* Components */
+ struct vdo_component vdo;
+ struct block_map_state_2_0 block_map;
+ struct recovery_journal_state_7_0 recovery_journal;
+ struct slab_depot_state_2_0 slab_depot;
+
+ /* Our partitioning of the underlying storage */
+ struct layout layout;
+};
+
+/**
+ * vdo_are_same_version() - Check whether two version numbers are the same.
+ * @version_a: The first version.
+ * @version_b: The second version.
+ *
+ * Return: true if the two versions are the same.
+ */
+static inline bool vdo_are_same_version(struct version_number version_a,
+ struct version_number version_b)
+{
+ return ((version_a.major_version == version_b.major_version) &&
+ (version_a.minor_version == version_b.minor_version));
+}
+
+/**
+ * vdo_is_upgradable_version() - Check whether an actual version is upgradable to an expected
+ * version.
+ * @expected_version: The expected version.
+ * @actual_version: The version being validated.
+ *
+ * An actual version is upgradable if its major number is expected but its minor number differs,
+ * and the expected version's minor number is greater than the actual version's minor number.
+ *
+ * Return: true if the actual version is upgradable.
+ */
+static inline bool vdo_is_upgradable_version(struct version_number expected_version,
+ struct version_number actual_version)
+{
+ return ((expected_version.major_version == actual_version.major_version) &&
+ (expected_version.minor_version > actual_version.minor_version));
+}
+
+int __must_check vdo_validate_header(const struct header *expected_header,
+ const struct header *actual_header, bool exact_size,
+ const char *component_name);
+
+void vdo_encode_header(u8 *buffer, size_t *offset, const struct header *header);
+void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header);
+
+/**
+ * vdo_pack_version_number() - Convert a version_number to its packed on-disk representation.
+ * @version: The version number to convert.
+ *
+ * Return: the platform-independent representation of the version
+ */
+static inline struct packed_version_number vdo_pack_version_number(struct version_number version)
+{
+ return (struct packed_version_number) {
+ .major_version = __cpu_to_le32(version.major_version),
+ .minor_version = __cpu_to_le32(version.minor_version),
+ };
+}
+
+/**
+ * vdo_unpack_version_number() - Convert a packed_version_number to its native in-memory
+ * representation.
+ * @version: The version number to convert.
+ *
+ * Return: The platform-independent representation of the version.
+ */
+static inline struct version_number vdo_unpack_version_number(struct packed_version_number version)
+{
+ return (struct version_number) {
+ .major_version = __le32_to_cpu(version.major_version),
+ .minor_version = __le32_to_cpu(version.minor_version),
+ };
+}
+
+/**
+ * vdo_pack_header() - Convert a component header to its packed on-disk representation.
+ * @header: The header to convert.
+ *
+ * Return: the platform-independent representation of the header
+ */
+static inline struct packed_header vdo_pack_header(const struct header *header)
+{
+ return (struct packed_header) {
+ .id = __cpu_to_le32(header->id),
+ .version = vdo_pack_version_number(header->version),
+ .size = __cpu_to_le64(header->size),
+ };
+}
+
+/**
+ * vdo_unpack_header() - Convert a packed_header to its native in-memory representation.
+ * @header: The header to convert.
+ *
+ * Return: The platform-independent representation of the version.
+ */
+static inline struct header vdo_unpack_header(const struct packed_header *header)
+{
+ return (struct header) {
+ .id = __le32_to_cpu(header->id),
+ .version = vdo_unpack_version_number(header->version),
+ .size = __le64_to_cpu(header->size),
+ };
+}
+
+/**
+ * vdo_get_index_region_start() - Get the start of the index region from a geometry.
+ * @geometry: The geometry.
+ *
+ * Return: The start of the index region.
+ */
+static inline physical_block_number_t __must_check
+vdo_get_index_region_start(struct volume_geometry geometry)
+{
+ return geometry.regions[VDO_INDEX_REGION].start_block;
+}
+
+/**
+ * vdo_get_data_region_start() - Get the start of the data region from a geometry.
+ * @geometry: The geometry.
+ *
+ * Return: The start of the data region.
+ */
+static inline physical_block_number_t __must_check
+vdo_get_data_region_start(struct volume_geometry geometry)
+{
+ return geometry.regions[VDO_DATA_REGION].start_block;
+}
+
+/**
+ * vdo_get_index_region_size() - Get the size of the index region from a geometry.
+ * @geometry: The geometry.
+ *
+ * Return: The size of the index region.
+ */
+static inline physical_block_number_t __must_check
+vdo_get_index_region_size(struct volume_geometry geometry)
+{
+ return vdo_get_data_region_start(geometry) -
+ vdo_get_index_region_start(geometry);
+}
+
+int __must_check vdo_parse_geometry_block(unsigned char *block,
+ struct volume_geometry *geometry);
+
+static inline bool vdo_is_state_compressed(const enum block_mapping_state mapping_state)
+{
+ return (mapping_state > VDO_MAPPING_STATE_UNCOMPRESSED);
+}
+
+static inline struct block_map_entry
+vdo_pack_block_map_entry(physical_block_number_t pbn, enum block_mapping_state mapping_state)
+{
+ return (struct block_map_entry) {
+ .mapping_state = (mapping_state & 0x0F),
+ .pbn_high_nibble = ((pbn >> 32) & 0x0F),
+ .pbn_low_word = __cpu_to_le32(pbn & UINT_MAX),
+ };
+}
+
+static inline struct data_location vdo_unpack_block_map_entry(const struct block_map_entry *entry)
+{
+ physical_block_number_t low32 = __le32_to_cpu(entry->pbn_low_word);
+ physical_block_number_t high4 = entry->pbn_high_nibble;
+
+ return (struct data_location) {
+ .pbn = ((high4 << 32) | low32),
+ .state = entry->mapping_state,
+ };
+}
+
+static inline bool vdo_is_mapped_location(const struct data_location *location)
+{
+ return (location->state != VDO_MAPPING_STATE_UNMAPPED);
+}
+
+static inline bool vdo_is_valid_location(const struct data_location *location)
+{
+ if (location->pbn == VDO_ZERO_BLOCK)
+ return !vdo_is_state_compressed(location->state);
+ else
+ return vdo_is_mapped_location(location);
+}
+
+static inline physical_block_number_t __must_check
+vdo_get_block_map_page_pbn(const struct block_map_page *page)
+{
+ return __le64_to_cpu(page->header.pbn);
+}
+
+struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
+ physical_block_number_t pbn,
+ bool initialized);
+
+enum block_map_page_validity __must_check vdo_validate_block_map_page(struct block_map_page *page,
+ nonce_t nonce,
+ physical_block_number_t pbn);
+
+static inline page_count_t vdo_compute_block_map_page_count(block_count_t entries)
+{
+ return DIV_ROUND_UP(entries, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
+}
+
+block_count_t __must_check vdo_compute_new_forest_pages(root_count_t root_count,
+ struct boundary *old_sizes,
+ block_count_t entries,
+ struct boundary *new_sizes);
+
+/**
+ * vdo_pack_recovery_journal_entry() - Return the packed, on-disk representation of a recovery
+ * journal entry.
+ * @entry: The journal entry to pack.
+ *
+ * Return: The packed representation of the journal entry.
+ */
+static inline struct packed_recovery_journal_entry
+vdo_pack_recovery_journal_entry(const struct recovery_journal_entry *entry)
+{
+ return (struct packed_recovery_journal_entry) {
+ .operation = entry->operation,
+ .slot_low = entry->slot.slot & 0x3F,
+ .slot_high = (entry->slot.slot >> 6) & 0x0F,
+ .pbn_high_nibble = (entry->slot.pbn >> 32) & 0x0F,
+ .pbn_low_word = __cpu_to_le32(entry->slot.pbn & UINT_MAX),
+ .mapping = vdo_pack_block_map_entry(entry->mapping.pbn,
+ entry->mapping.state),
+ .unmapping = vdo_pack_block_map_entry(entry->unmapping.pbn,
+ entry->unmapping.state),
+ };
+}
+
+/**
+ * vdo_unpack_recovery_journal_entry() - Unpack the on-disk representation of a recovery journal
+ * entry.
+ * @entry: The recovery journal entry to unpack.
+ *
+ * Return: The unpacked entry.
+ */
+static inline struct recovery_journal_entry
+vdo_unpack_recovery_journal_entry(const struct packed_recovery_journal_entry *entry)
+{
+ physical_block_number_t low32 = __le32_to_cpu(entry->pbn_low_word);
+ physical_block_number_t high4 = entry->pbn_high_nibble;
+
+ return (struct recovery_journal_entry) {
+ .operation = entry->operation,
+ .slot = {
+ .pbn = ((high4 << 32) | low32),
+ .slot = (entry->slot_low | (entry->slot_high << 6)),
+ },
+ .mapping = vdo_unpack_block_map_entry(&entry->mapping),
+ .unmapping = vdo_unpack_block_map_entry(&entry->unmapping),
+ };
+}
+
+const char * __must_check vdo_get_journal_operation_name(enum journal_operation operation);
+
+/**
+ * vdo_is_valid_recovery_journal_sector() - Determine whether the header of the given sector could
+ * describe a valid sector for the given journal block
+ * header.
+ * @header: The unpacked block header to compare against.
+ * @sector: The packed sector to check.
+ * @sector_number: The number of the sector being checked.
+ *
+ * Return: true if the sector matches the block header.
+ */
+static inline bool __must_check
+vdo_is_valid_recovery_journal_sector(const struct recovery_block_header *header,
+ const struct packed_journal_sector *sector,
+ u8 sector_number)
+{
+ if ((header->check_byte != sector->check_byte) ||
+ (header->recovery_count != sector->recovery_count))
+ return false;
+
+ if (header->metadata_type == VDO_METADATA_RECOVERY_JOURNAL_2)
+ return sector->entry_count <= RECOVERY_JOURNAL_ENTRIES_PER_SECTOR;
+
+ if (sector_number == 7)
+ return sector->entry_count <= RECOVERY_JOURNAL_1_ENTRIES_IN_LAST_SECTOR;
+
+ return sector->entry_count <= RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR;
+}
+
+/**
+ * vdo_compute_recovery_journal_block_number() - Compute the physical block number of the recovery
+ * journal block which would have a given sequence
+ * number.
+ * @journal_size: The size of the journal.
+ * @sequence_number: The sequence number.
+ *
+ * Return: The pbn of the journal block which would the specified sequence number.
+ */
+static inline physical_block_number_t __must_check
+vdo_compute_recovery_journal_block_number(block_count_t journal_size,
+ sequence_number_t sequence_number)
+{
+ /*
+ * Since journal size is a power of two, the block number modulus can just be extracted
+ * from the low-order bits of the sequence.
+ */
+ return (sequence_number & (journal_size - 1));
+}
+
+/**
+ * vdo_get_journal_block_sector() - Find the recovery journal sector from the block header and
+ * sector number.
+ * @header: The header of the recovery journal block.
+ * @sector_number: The index of the sector (1-based).
+ *
+ * Return: A packed recovery journal sector.
+ */
+static inline struct packed_journal_sector * __must_check
+vdo_get_journal_block_sector(struct packed_journal_header *header, int sector_number)
+{
+ char *sector_data = ((char *) header) + (VDO_SECTOR_SIZE * sector_number);
+
+ return (struct packed_journal_sector *) sector_data;
+}
+
+/**
+ * vdo_pack_recovery_block_header() - Generate the packed representation of a recovery block
+ * header.
+ * @header: The header containing the values to encode.
+ * @packed: The header into which to pack the values.
+ */
+static inline void vdo_pack_recovery_block_header(const struct recovery_block_header *header,
+ struct packed_journal_header *packed)
+{
+ *packed = (struct packed_journal_header) {
+ .block_map_head = __cpu_to_le64(header->block_map_head),
+ .slab_journal_head = __cpu_to_le64(header->slab_journal_head),
+ .sequence_number = __cpu_to_le64(header->sequence_number),
+ .nonce = __cpu_to_le64(header->nonce),
+ .logical_blocks_used = __cpu_to_le64(header->logical_blocks_used),
+ .block_map_data_blocks = __cpu_to_le64(header->block_map_data_blocks),
+ .entry_count = __cpu_to_le16(header->entry_count),
+ .check_byte = header->check_byte,
+ .recovery_count = header->recovery_count,
+ .metadata_type = header->metadata_type,
+ };
+}
+
+/**
+ * vdo_unpack_recovery_block_header() - Decode the packed representation of a recovery block
+ * header.
+ * @packed: The packed header to decode.
+ *
+ * Return: The unpacked header.
+ */
+static inline struct recovery_block_header
+vdo_unpack_recovery_block_header(const struct packed_journal_header *packed)
+{
+ return (struct recovery_block_header) {
+ .block_map_head = __le64_to_cpu(packed->block_map_head),
+ .slab_journal_head = __le64_to_cpu(packed->slab_journal_head),
+ .sequence_number = __le64_to_cpu(packed->sequence_number),
+ .nonce = __le64_to_cpu(packed->nonce),
+ .logical_blocks_used = __le64_to_cpu(packed->logical_blocks_used),
+ .block_map_data_blocks = __le64_to_cpu(packed->block_map_data_blocks),
+ .entry_count = __le16_to_cpu(packed->entry_count),
+ .check_byte = packed->check_byte,
+ .recovery_count = packed->recovery_count,
+ .metadata_type = packed->metadata_type,
+ };
+}
+
+/**
+ * vdo_compute_slab_count() - Compute the number of slabs a depot with given parameters would have.
+ * @first_block: PBN of the first data block.
+ * @last_block: PBN of the last data block.
+ * @slab_size_shift: Exponent for the number of blocks per slab.
+ *
+ * Return: The number of slabs.
+ */
+static inline slab_count_t vdo_compute_slab_count(physical_block_number_t first_block,
+ physical_block_number_t last_block,
+ unsigned int slab_size_shift)
+{
+ return (slab_count_t) ((last_block - first_block) >> slab_size_shift);
+}
+
+int __must_check vdo_configure_slab_depot(const struct partition *partition,
+ struct slab_config slab_config,
+ zone_count_t zone_count,
+ struct slab_depot_state_2_0 *state);
+
+int __must_check vdo_configure_slab(block_count_t slab_size,
+ block_count_t slab_journal_blocks,
+ struct slab_config *slab_config);
+
+/**
+ * vdo_get_saved_reference_count_size() - Get the number of blocks required to save a reference
+ * counts state covering the specified number of data
+ * blocks.
+ * @block_count: The number of physical data blocks that can be referenced.
+ *
+ * Return: The number of blocks required to save reference counts with the given block count.
+ */
+static inline block_count_t vdo_get_saved_reference_count_size(block_count_t block_count)
+{
+ return DIV_ROUND_UP(block_count, COUNTS_PER_BLOCK);
+}
+
+/**
+ * vdo_get_slab_journal_start_block() - Get the physical block number of the start of the slab
+ * journal relative to the start block allocator partition.
+ * @slab_config: The slab configuration of the VDO.
+ * @origin: The first block of the slab.
+ */
+static inline physical_block_number_t __must_check
+vdo_get_slab_journal_start_block(const struct slab_config *slab_config,
+ physical_block_number_t origin)
+{
+ return origin + slab_config->data_blocks + slab_config->reference_count_blocks;
+}
+
+/**
+ * vdo_advance_journal_point() - Move the given journal point forward by one entry.
+ * @point: The journal point to adjust.
+ * @entries_per_block: The number of entries in one full block.
+ */
+static inline void vdo_advance_journal_point(struct journal_point *point,
+ journal_entry_count_t entries_per_block)
+{
+ point->entry_count++;
+ if (point->entry_count == entries_per_block) {
+ point->sequence_number++;
+ point->entry_count = 0;
+ }
+}
+
+/**
+ * vdo_before_journal_point() - Check whether the first point precedes the second point.
+ * @first: The first journal point.
+ * @second: The second journal point.
+ *
+ * Return: true if the first point precedes the second point.
+ */
+static inline bool vdo_before_journal_point(const struct journal_point *first,
+ const struct journal_point *second)
+{
+ return ((first->sequence_number < second->sequence_number) ||
+ ((first->sequence_number == second->sequence_number) &&
+ (first->entry_count < second->entry_count)));
+}
+
+/**
+ * vdo_pack_journal_point() - Encode the journal location represented by a
+ * journal_point into a packed_journal_point.
+ * @unpacked: The unpacked input point.
+ * @packed: The packed output point.
+ */
+static inline void vdo_pack_journal_point(const struct journal_point *unpacked,
+ struct packed_journal_point *packed)
+{
+ packed->encoded_point =
+ __cpu_to_le64((unpacked->sequence_number << 16) | unpacked->entry_count);
+}
+
+/**
+ * vdo_unpack_journal_point() - Decode the journal location represented by a packed_journal_point
+ * into a journal_point.
+ * @packed: The packed input point.
+ * @unpacked: The unpacked output point.
+ */
+static inline void vdo_unpack_journal_point(const struct packed_journal_point *packed,
+ struct journal_point *unpacked)
+{
+ u64 native = __le64_to_cpu(packed->encoded_point);
+
+ unpacked->sequence_number = (native >> 16);
+ unpacked->entry_count = (native & 0xffff);
+}
+
+/**
+ * vdo_pack_slab_journal_block_header() - Generate the packed representation of a slab block
+ * header.
+ * @header: The header containing the values to encode.
+ * @packed: The header into which to pack the values.
+ */
+static inline void
+vdo_pack_slab_journal_block_header(const struct slab_journal_block_header *header,
+ struct packed_slab_journal_block_header *packed)
+{
+ packed->head = __cpu_to_le64(header->head);
+ packed->sequence_number = __cpu_to_le64(header->sequence_number);
+ packed->nonce = __cpu_to_le64(header->nonce);
+ packed->entry_count = __cpu_to_le16(header->entry_count);
+ packed->metadata_type = header->metadata_type;
+ packed->has_block_map_increments = header->has_block_map_increments;
+
+ vdo_pack_journal_point(&header->recovery_point, &packed->recovery_point);
+}
+
+/**
+ * vdo_unpack_slab_journal_block_header() - Decode the packed representation of a slab block
+ * header.
+ * @packed: The packed header to decode.
+ * @header: The header into which to unpack the values.
+ */
+static inline void
+vdo_unpack_slab_journal_block_header(const struct packed_slab_journal_block_header *packed,
+ struct slab_journal_block_header *header)
+{
+ *header = (struct slab_journal_block_header) {
+ .head = __le64_to_cpu(packed->head),
+ .sequence_number = __le64_to_cpu(packed->sequence_number),
+ .nonce = __le64_to_cpu(packed->nonce),
+ .entry_count = __le16_to_cpu(packed->entry_count),
+ .metadata_type = packed->metadata_type,
+ .has_block_map_increments = packed->has_block_map_increments,
+ };
+ vdo_unpack_journal_point(&packed->recovery_point, &header->recovery_point);
+}
+
+/**
+ * vdo_pack_slab_journal_entry() - Generate the packed encoding of a slab journal entry.
+ * @packed: The entry into which to pack the values.
+ * @sbn: The slab block number of the entry to encode.
+ * @is_increment: The increment flag.
+ */
+static inline void vdo_pack_slab_journal_entry(packed_slab_journal_entry *packed,
+ slab_block_number sbn, bool is_increment)
+{
+ packed->offset_low8 = (sbn & 0x0000FF);
+ packed->offset_mid8 = (sbn & 0x00FF00) >> 8;
+ packed->offset_high7 = (sbn & 0x7F0000) >> 16;
+ packed->increment = is_increment ? 1 : 0;
+}
+
+/**
+ * vdo_unpack_slab_journal_entry() - Decode the packed representation of a slab journal entry.
+ * @packed: The packed entry to decode.
+ *
+ * Return: The decoded slab journal entry.
+ */
+static inline struct slab_journal_entry __must_check
+vdo_unpack_slab_journal_entry(const packed_slab_journal_entry *packed)
+{
+ struct slab_journal_entry entry;
+
+ entry.sbn = packed->offset_high7;
+ entry.sbn <<= 8;
+ entry.sbn |= packed->offset_mid8;
+ entry.sbn <<= 8;
+ entry.sbn |= packed->offset_low8;
+ entry.operation = VDO_JOURNAL_DATA_REMAPPING;
+ entry.increment = packed->increment;
+ return entry;
+}
+
+struct slab_journal_entry __must_check
+vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
+ journal_entry_count_t entry_count);
+
+/**
+ * vdo_get_slab_summary_hint_shift() - Compute the shift for slab summary hints.
+ * @slab_size_shift: Exponent for the number of blocks per slab.
+ *
+ * Return: The hint shift.
+ */
+static inline u8 __must_check vdo_get_slab_summary_hint_shift(unsigned int slab_size_shift)
+{
+ return ((slab_size_shift > VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS) ?
+ (slab_size_shift - VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS) :
+ 0);
+}
+
+int __must_check vdo_initialize_layout(block_count_t size,
+ physical_block_number_t offset,
+ block_count_t block_map_blocks,
+ block_count_t journal_blocks,
+ block_count_t summary_blocks,
+ struct layout *layout);
+
+void vdo_uninitialize_layout(struct layout *layout);
+
+int __must_check vdo_get_partition(struct layout *layout, enum partition_id id,
+ struct partition **partition_ptr);
+
+struct partition * __must_check vdo_get_known_partition(struct layout *layout,
+ enum partition_id id);
+
+int vdo_validate_config(const struct vdo_config *config,
+ block_count_t physical_block_count,
+ block_count_t logical_block_count);
+
+void vdo_destroy_component_states(struct vdo_component_states *states);
+
+int __must_check vdo_decode_component_states(u8 *buffer,
+ struct volume_geometry *geometry,
+ struct vdo_component_states *states);
+
+int __must_check vdo_validate_component_states(struct vdo_component_states *states,
+ nonce_t geometry_nonce,
+ block_count_t physical_size,
+ block_count_t logical_size);
+
+void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states);
+int __must_check vdo_decode_super_block(u8 *buffer);
+
+/* We start with 0L and postcondition with ~0L to match our historical usage in userspace. */
+static inline u32 vdo_crc32(const void *buf, unsigned long len)
+{
+ return (crc32(0L, buf, len) ^ ~0L);
+}
+
+#endif /* VDO_ENCODINGS_H */
diff --git a/drivers/md/dm-vdo/errors.c b/drivers/md/dm-vdo/errors.c
new file mode 100644
index 000000000000..6f89eb1c63a3
--- /dev/null
+++ b/drivers/md/dm-vdo/errors.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "errors.h"
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+
+#include "logger.h"
+#include "permassert.h"
+#include "string-utils.h"
+
+static const struct error_info successful = { "UDS_SUCCESS", "Success" };
+
+static const char *const message_table[] = {
+ [EPERM] = "Operation not permitted",
+ [ENOENT] = "No such file or directory",
+ [ESRCH] = "No such process",
+ [EINTR] = "Interrupted system call",
+ [EIO] = "Input/output error",
+ [ENXIO] = "No such device or address",
+ [E2BIG] = "Argument list too long",
+ [ENOEXEC] = "Exec format error",
+ [EBADF] = "Bad file descriptor",
+ [ECHILD] = "No child processes",
+ [EAGAIN] = "Resource temporarily unavailable",
+ [ENOMEM] = "Cannot allocate memory",
+ [EACCES] = "Permission denied",
+ [EFAULT] = "Bad address",
+ [ENOTBLK] = "Block device required",
+ [EBUSY] = "Device or resource busy",
+ [EEXIST] = "File exists",
+ [EXDEV] = "Invalid cross-device link",
+ [ENODEV] = "No such device",
+ [ENOTDIR] = "Not a directory",
+ [EISDIR] = "Is a directory",
+ [EINVAL] = "Invalid argument",
+ [ENFILE] = "Too many open files in system",
+ [EMFILE] = "Too many open files",
+ [ENOTTY] = "Inappropriate ioctl for device",
+ [ETXTBSY] = "Text file busy",
+ [EFBIG] = "File too large",
+ [ENOSPC] = "No space left on device",
+ [ESPIPE] = "Illegal seek",
+ [EROFS] = "Read-only file system",
+ [EMLINK] = "Too many links",
+ [EPIPE] = "Broken pipe",
+ [EDOM] = "Numerical argument out of domain",
+ [ERANGE] = "Numerical result out of range"
+};
+
+static const struct error_info error_list[] = {
+ { "UDS_OVERFLOW", "Index overflow" },
+ { "UDS_INVALID_ARGUMENT", "Invalid argument passed to internal routine" },
+ { "UDS_BAD_STATE", "UDS data structures are in an invalid state" },
+ { "UDS_DUPLICATE_NAME", "Attempt to enter the same name into a delta index twice" },
+ { "UDS_ASSERTION_FAILED", "Assertion failed" },
+ { "UDS_QUEUED", "Request queued" },
+ { "UDS_ALREADY_REGISTERED", "Error range already registered" },
+ { "UDS_OUT_OF_RANGE", "Cannot access data outside specified limits" },
+ { "UDS_DISABLED", "UDS library context is disabled" },
+ { "UDS_UNSUPPORTED_VERSION", "Unsupported version" },
+ { "UDS_CORRUPT_DATA", "Some index structure is corrupt" },
+ { "UDS_NO_INDEX", "No index found" },
+ { "UDS_INDEX_NOT_SAVED_CLEANLY", "Index not saved cleanly" },
+};
+
+struct error_block {
+ const char *name;
+ int base;
+ int last;
+ int max;
+ const struct error_info *infos;
+};
+
+#define MAX_ERROR_BLOCKS 6
+
+static struct {
+ int allocated;
+ int count;
+ struct error_block blocks[MAX_ERROR_BLOCKS];
+} registered_errors = {
+ .allocated = MAX_ERROR_BLOCKS,
+ .count = 1,
+ .blocks = { {
+ .name = "UDS Error",
+ .base = UDS_ERROR_CODE_BASE,
+ .last = UDS_ERROR_CODE_LAST,
+ .max = UDS_ERROR_CODE_BLOCK_END,
+ .infos = error_list,
+ } },
+};
+
+/* Get the error info for an error number. Also returns the name of the error block, if known. */
+static const char *get_error_info(int errnum, const struct error_info **info_ptr)
+{
+ struct error_block *block;
+
+ if (errnum == UDS_SUCCESS) {
+ *info_ptr = &successful;
+ return NULL;
+ }
+
+ for (block = registered_errors.blocks;
+ block < registered_errors.blocks + registered_errors.count;
+ block++) {
+ if ((errnum >= block->base) && (errnum < block->last)) {
+ *info_ptr = block->infos + (errnum - block->base);
+ return block->name;
+ } else if ((errnum >= block->last) && (errnum < block->max)) {
+ *info_ptr = NULL;
+ return block->name;
+ }
+ }
+
+ return NULL;
+}
+
+/* Return a string describing a system error message. */
+static const char *system_string_error(int errnum, char *buf, size_t buflen)
+{
+ size_t len;
+ const char *error_string = NULL;
+
+ if ((errnum > 0) && (errnum < ARRAY_SIZE(message_table)))
+ error_string = message_table[errnum];
+
+ len = ((error_string == NULL) ?
+ snprintf(buf, buflen, "Unknown error %d", errnum) :
+ snprintf(buf, buflen, "%s", error_string));
+ if (len < buflen)
+ return buf;
+
+ buf[0] = '\0';
+ return "System error";
+}
+
+/* Convert an error code to a descriptive string. */
+const char *uds_string_error(int errnum, char *buf, size_t buflen)
+{
+ char *buffer = buf;
+ char *buf_end = buf + buflen;
+ const struct error_info *info = NULL;
+ const char *block_name;
+
+ if (buf == NULL)
+ return NULL;
+
+ if (errnum < 0)
+ errnum = -errnum;
+
+ block_name = get_error_info(errnum, &info);
+ if (block_name != NULL) {
+ if (info != NULL) {
+ buffer = vdo_append_to_buffer(buffer, buf_end, "%s: %s",
+ block_name, info->message);
+ } else {
+ buffer = vdo_append_to_buffer(buffer, buf_end, "Unknown %s %d",
+ block_name, errnum);
+ }
+ } else if (info != NULL) {
+ buffer = vdo_append_to_buffer(buffer, buf_end, "%s", info->message);
+ } else {
+ const char *tmp = system_string_error(errnum, buffer, buf_end - buffer);
+
+ if (tmp != buffer)
+ buffer = vdo_append_to_buffer(buffer, buf_end, "%s", tmp);
+ else
+ buffer += strlen(tmp);
+ }
+
+ return buf;
+}
+
+/* Convert an error code to its name. */
+const char *uds_string_error_name(int errnum, char *buf, size_t buflen)
+{
+ char *buffer = buf;
+ char *buf_end = buf + buflen;
+ const struct error_info *info = NULL;
+ const char *block_name;
+
+ if (errnum < 0)
+ errnum = -errnum;
+
+ block_name = get_error_info(errnum, &info);
+ if (block_name != NULL) {
+ if (info != NULL) {
+ buffer = vdo_append_to_buffer(buffer, buf_end, "%s", info->name);
+ } else {
+ buffer = vdo_append_to_buffer(buffer, buf_end, "%s %d",
+ block_name, errnum);
+ }
+ } else if (info != NULL) {
+ buffer = vdo_append_to_buffer(buffer, buf_end, "%s", info->name);
+ } else {
+ const char *tmp;
+
+ tmp = system_string_error(errnum, buffer, buf_end - buffer);
+ if (tmp != buffer)
+ buffer = vdo_append_to_buffer(buffer, buf_end, "%s", tmp);
+ else
+ buffer += strlen(tmp);
+ }
+
+ return buf;
+}
+
+/*
+ * Translate an error code into a value acceptable to the kernel. The input error code may be a
+ * system-generated value (such as -EIO), or an internal UDS status code. The result will be a
+ * negative errno value.
+ */
+int uds_status_to_errno(int error)
+{
+ char error_name[VDO_MAX_ERROR_NAME_SIZE];
+ char error_message[VDO_MAX_ERROR_MESSAGE_SIZE];
+
+ /* 0 is success, and negative values are already system error codes. */
+ if (likely(error <= 0))
+ return error;
+
+ if (error < 1024) {
+ /* This is probably an errno from userspace. */
+ return -error;
+ }
+
+ /* Internal UDS errors */
+ switch (error) {
+ case UDS_NO_INDEX:
+ case UDS_CORRUPT_DATA:
+ /* The index doesn't exist or can't be recovered. */
+ return -ENOENT;
+
+ case UDS_INDEX_NOT_SAVED_CLEANLY:
+ case UDS_UNSUPPORTED_VERSION:
+ /*
+ * The index exists, but can't be loaded. Tell the client it exists so they don't
+ * destroy it inadvertently.
+ */
+ return -EEXIST;
+
+ case UDS_DISABLED:
+ /* The session is unusable; only returned by requests. */
+ return -EIO;
+
+ default:
+ /* Translate an unexpected error into something generic. */
+ vdo_log_info("%s: mapping status code %d (%s: %s) to -EIO",
+ __func__, error,
+ uds_string_error_name(error, error_name,
+ sizeof(error_name)),
+ uds_string_error(error, error_message,
+ sizeof(error_message)));
+ return -EIO;
+ }
+}
+
+/*
+ * Register a block of error codes.
+ *
+ * @block_name: the name of the block of error codes
+ * @first_error: the first error code in the block
+ * @next_free_error: one past the highest possible error in the block
+ * @infos: a pointer to the error info array for the block
+ * @info_size: the size of the error info array
+ */
+int uds_register_error_block(const char *block_name, int first_error,
+ int next_free_error, const struct error_info *infos,
+ size_t info_size)
+{
+ int result;
+ struct error_block *block;
+ struct error_block new_block = {
+ .name = block_name,
+ .base = first_error,
+ .last = first_error + (info_size / sizeof(struct error_info)),
+ .max = next_free_error,
+ .infos = infos,
+ };
+
+ result = VDO_ASSERT(first_error < next_free_error,
+ "well-defined error block range");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (registered_errors.count == registered_errors.allocated) {
+ /* This should never happen. */
+ return UDS_OVERFLOW;
+ }
+
+ for (block = registered_errors.blocks;
+ block < registered_errors.blocks + registered_errors.count;
+ block++) {
+ if (strcmp(block_name, block->name) == 0)
+ return UDS_DUPLICATE_NAME;
+
+ /* Ensure error ranges do not overlap. */
+ if ((first_error < block->max) && (next_free_error > block->base))
+ return UDS_ALREADY_REGISTERED;
+ }
+
+ registered_errors.blocks[registered_errors.count++] = new_block;
+ return UDS_SUCCESS;
+}
diff --git a/drivers/md/dm-vdo/errors.h b/drivers/md/dm-vdo/errors.h
new file mode 100644
index 000000000000..24e0e745fd5f
--- /dev/null
+++ b/drivers/md/dm-vdo/errors.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_ERRORS_H
+#define UDS_ERRORS_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/* Custom error codes and error-related utilities */
+#define VDO_SUCCESS 0
+
+/* Valid status codes for internal UDS functions. */
+enum uds_status_codes {
+ /* Successful return */
+ UDS_SUCCESS = VDO_SUCCESS,
+ /* Used as a base value for reporting internal errors */
+ UDS_ERROR_CODE_BASE = 1024,
+ /* Index overflow */
+ UDS_OVERFLOW = UDS_ERROR_CODE_BASE,
+ /* Invalid argument passed to internal routine */
+ UDS_INVALID_ARGUMENT,
+ /* UDS data structures are in an invalid state */
+ UDS_BAD_STATE,
+ /* Attempt to enter the same name into an internal structure twice */
+ UDS_DUPLICATE_NAME,
+ /* An assertion failed */
+ UDS_ASSERTION_FAILED,
+ /* A request has been queued for later processing (not an error) */
+ UDS_QUEUED,
+ /* This error range has already been registered */
+ UDS_ALREADY_REGISTERED,
+ /* Attempt to read or write data outside the valid range */
+ UDS_OUT_OF_RANGE,
+ /* The index session is disabled */
+ UDS_DISABLED,
+ /* The index configuration or volume format is no longer supported */
+ UDS_UNSUPPORTED_VERSION,
+ /* Some index structure is corrupt */
+ UDS_CORRUPT_DATA,
+ /* No index state found */
+ UDS_NO_INDEX,
+ /* Attempt to access incomplete index save data */
+ UDS_INDEX_NOT_SAVED_CLEANLY,
+ /* One more than the last UDS_INTERNAL error code */
+ UDS_ERROR_CODE_LAST,
+ /* One more than the last error this block will ever use */
+ UDS_ERROR_CODE_BLOCK_END = UDS_ERROR_CODE_BASE + 440,
+};
+
+enum {
+ VDO_MAX_ERROR_NAME_SIZE = 80,
+ VDO_MAX_ERROR_MESSAGE_SIZE = 128,
+};
+
+struct error_info {
+ const char *name;
+ const char *message;
+};
+
+const char * __must_check uds_string_error(int errnum, char *buf, size_t buflen);
+
+const char *uds_string_error_name(int errnum, char *buf, size_t buflen);
+
+int uds_status_to_errno(int error);
+
+int uds_register_error_block(const char *block_name, int first_error,
+ int last_reserved_error, const struct error_info *infos,
+ size_t info_size);
+
+#endif /* UDS_ERRORS_H */
diff --git a/drivers/md/dm-vdo/flush.c b/drivers/md/dm-vdo/flush.c
new file mode 100644
index 000000000000..57e87f0d7069
--- /dev/null
+++ b/drivers/md/dm-vdo/flush.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "flush.h"
+
+#include <linux/mempool.h>
+#include <linux/spinlock.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "admin-state.h"
+#include "completion.h"
+#include "io-submitter.h"
+#include "logical-zone.h"
+#include "slab-depot.h"
+#include "types.h"
+#include "vdo.h"
+
+struct flusher {
+ struct vdo_completion completion;
+ /* The vdo to which this flusher belongs */
+ struct vdo *vdo;
+ /* The administrative state of the flusher */
+ struct admin_state state;
+ /* The current flush generation of the vdo */
+ sequence_number_t flush_generation;
+ /* The first unacknowledged flush generation */
+ sequence_number_t first_unacknowledged_generation;
+ /* The queue of flush requests waiting to notify other threads */
+ struct vdo_wait_queue notifiers;
+ /* The queue of flush requests waiting for VIOs to complete */
+ struct vdo_wait_queue pending_flushes;
+ /* The flush generation for which notifications are being sent */
+ sequence_number_t notify_generation;
+ /* The logical zone to notify next */
+ struct logical_zone *logical_zone_to_notify;
+ /* The ID of the thread on which flush requests should be made */
+ thread_id_t thread_id;
+ /* The pool of flush requests */
+ mempool_t *flush_pool;
+ /* Bios waiting for a flush request to become available */
+ struct bio_list waiting_flush_bios;
+ /* The lock to protect the previous fields */
+ spinlock_t lock;
+ /* The rotor for selecting the bio queue for submitting flush bios */
+ zone_count_t bio_queue_rotor;
+ /* The number of flushes submitted to the current bio queue */
+ int flush_count;
+};
+
+/**
+ * assert_on_flusher_thread() - Check that we are on the flusher thread.
+ * @flusher: The flusher.
+ * @caller: The function which is asserting.
+ */
+static inline void assert_on_flusher_thread(struct flusher *flusher, const char *caller)
+{
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() == flusher->thread_id),
+ "%s() called from flusher thread", caller);
+}
+
+/**
+ * as_flusher() - Convert a generic vdo_completion to a flusher.
+ * @completion: The completion to convert.
+ *
+ * Return: The completion as a flusher.
+ */
+static struct flusher *as_flusher(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_FLUSH_NOTIFICATION_COMPLETION);
+ return container_of(completion, struct flusher, completion);
+}
+
+/**
+ * completion_as_vdo_flush() - Convert a generic vdo_completion to a vdo_flush.
+ * @completion: The completion to convert.
+ *
+ * Return: The completion as a vdo_flush.
+ */
+static inline struct vdo_flush *completion_as_vdo_flush(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_FLUSH_COMPLETION);
+ return container_of(completion, struct vdo_flush, completion);
+}
+
+/**
+ * vdo_waiter_as_flush() - Convert a vdo_flush's generic wait queue entry back to the vdo_flush.
+ * @waiter: The wait queue entry to convert.
+ *
+ * Return: The wait queue entry as a vdo_flush.
+ */
+static struct vdo_flush *vdo_waiter_as_flush(struct vdo_waiter *waiter)
+{
+ return container_of(waiter, struct vdo_flush, waiter);
+}
+
+static void *allocate_flush(gfp_t gfp_mask, void *pool_data)
+{
+ struct vdo_flush *flush = NULL;
+
+ if ((gfp_mask & GFP_NOWAIT) == GFP_NOWAIT) {
+ flush = vdo_allocate_memory_nowait(sizeof(struct vdo_flush), __func__);
+ } else {
+ int result = vdo_allocate(1, struct vdo_flush, __func__, &flush);
+
+ if (result != VDO_SUCCESS)
+ vdo_log_error_strerror(result, "failed to allocate spare flush");
+ }
+
+ if (flush != NULL) {
+ struct flusher *flusher = pool_data;
+
+ vdo_initialize_completion(&flush->completion, flusher->vdo,
+ VDO_FLUSH_COMPLETION);
+ }
+
+ return flush;
+}
+
+static void free_flush(void *element, void *pool_data __always_unused)
+{
+ vdo_free(element);
+}
+
+/**
+ * vdo_make_flusher() - Make a flusher for a vdo.
+ * @vdo: The vdo which owns the flusher.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_make_flusher(struct vdo *vdo)
+{
+ int result = vdo_allocate(1, struct flusher, __func__, &vdo->flusher);
+
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo->flusher->vdo = vdo;
+ vdo->flusher->thread_id = vdo->thread_config.packer_thread;
+ vdo_set_admin_state_code(&vdo->flusher->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+ vdo_initialize_completion(&vdo->flusher->completion, vdo,
+ VDO_FLUSH_NOTIFICATION_COMPLETION);
+
+ spin_lock_init(&vdo->flusher->lock);
+ bio_list_init(&vdo->flusher->waiting_flush_bios);
+ vdo->flusher->flush_pool = mempool_create(1, allocate_flush, free_flush,
+ vdo->flusher);
+ return ((vdo->flusher->flush_pool == NULL) ? -ENOMEM : VDO_SUCCESS);
+}
+
+/**
+ * vdo_free_flusher() - Free a flusher.
+ * @flusher: The flusher to free.
+ */
+void vdo_free_flusher(struct flusher *flusher)
+{
+ if (flusher == NULL)
+ return;
+
+ if (flusher->flush_pool != NULL)
+ mempool_destroy(vdo_forget(flusher->flush_pool));
+ vdo_free(flusher);
+}
+
+/**
+ * vdo_get_flusher_thread_id() - Get the ID of the thread on which flusher functions should be
+ * called.
+ * @flusher: The flusher to query.
+ *
+ * Return: The ID of the thread which handles the flusher.
+ */
+thread_id_t vdo_get_flusher_thread_id(struct flusher *flusher)
+{
+ return flusher->thread_id;
+}
+
+static void notify_flush(struct flusher *flusher);
+static void vdo_complete_flush(struct vdo_flush *flush);
+
+/**
+ * finish_notification() - Finish the notification process.
+ * @completion: The flusher completion.
+ *
+ * Finishes the notification process by checking if any flushes have completed and then starting
+ * the notification of the next flush request if one came in while the current notification was in
+ * progress. This callback is registered in flush_packer_callback().
+ */
+static void finish_notification(struct vdo_completion *completion)
+{
+ struct flusher *flusher = as_flusher(completion);
+
+ assert_on_flusher_thread(flusher, __func__);
+
+ vdo_waitq_enqueue_waiter(&flusher->pending_flushes,
+ vdo_waitq_dequeue_waiter(&flusher->notifiers));
+ vdo_complete_flushes(flusher);
+ if (vdo_waitq_has_waiters(&flusher->notifiers))
+ notify_flush(flusher);
+}
+
+/**
+ * flush_packer_callback() - Flush the packer.
+ * @completion: The flusher completion.
+ *
+ * Flushes the packer now that all of the logical and physical zones have been notified of the new
+ * flush request. This callback is registered in increment_generation().
+ */
+static void flush_packer_callback(struct vdo_completion *completion)
+{
+ struct flusher *flusher = as_flusher(completion);
+
+ vdo_increment_packer_flush_generation(flusher->vdo->packer);
+ vdo_launch_completion_callback(completion, finish_notification,
+ flusher->thread_id);
+}
+
+/**
+ * increment_generation() - Increment the flush generation in a logical zone.
+ * @completion: The flusher as a completion.
+ *
+ * If there are more logical zones, go on to the next one, otherwise, prepare the physical zones.
+ * This callback is registered both in notify_flush() and in itself.
+ */
+static void increment_generation(struct vdo_completion *completion)
+{
+ struct flusher *flusher = as_flusher(completion);
+ struct logical_zone *zone = flusher->logical_zone_to_notify;
+
+ vdo_increment_logical_zone_flush_generation(zone, flusher->notify_generation);
+ if (zone->next == NULL) {
+ vdo_launch_completion_callback(completion, flush_packer_callback,
+ flusher->thread_id);
+ return;
+ }
+
+ flusher->logical_zone_to_notify = zone->next;
+ vdo_launch_completion_callback(completion, increment_generation,
+ flusher->logical_zone_to_notify->thread_id);
+}
+
+/**
+ * notify_flush() - Launch a flush notification.
+ * @flusher: The flusher doing the notification.
+ */
+static void notify_flush(struct flusher *flusher)
+{
+ struct vdo_flush *flush =
+ vdo_waiter_as_flush(vdo_waitq_get_first_waiter(&flusher->notifiers));
+
+ flusher->notify_generation = flush->flush_generation;
+ flusher->logical_zone_to_notify = &flusher->vdo->logical_zones->zones[0];
+ flusher->completion.requeue = true;
+ vdo_launch_completion_callback(&flusher->completion, increment_generation,
+ flusher->logical_zone_to_notify->thread_id);
+}
+
+/**
+ * flush_vdo() - Start processing a flush request.
+ * @completion: A flush request (as a vdo_completion)
+ *
+ * This callback is registered in launch_flush().
+ */
+static void flush_vdo(struct vdo_completion *completion)
+{
+ struct vdo_flush *flush = completion_as_vdo_flush(completion);
+ struct flusher *flusher = completion->vdo->flusher;
+ bool may_notify;
+ int result;
+
+ assert_on_flusher_thread(flusher, __func__);
+ result = VDO_ASSERT(vdo_is_state_normal(&flusher->state),
+ "flusher is in normal operation");
+ if (result != VDO_SUCCESS) {
+ vdo_enter_read_only_mode(flusher->vdo, result);
+ vdo_complete_flush(flush);
+ return;
+ }
+
+ flush->flush_generation = flusher->flush_generation++;
+ may_notify = !vdo_waitq_has_waiters(&flusher->notifiers);
+ vdo_waitq_enqueue_waiter(&flusher->notifiers, &flush->waiter);
+ if (may_notify)
+ notify_flush(flusher);
+}
+
+/**
+ * check_for_drain_complete() - Check whether the flusher has drained.
+ * @flusher: The flusher.
+ */
+static void check_for_drain_complete(struct flusher *flusher)
+{
+ bool drained;
+
+ if (!vdo_is_state_draining(&flusher->state) ||
+ vdo_waitq_has_waiters(&flusher->pending_flushes))
+ return;
+
+ spin_lock(&flusher->lock);
+ drained = bio_list_empty(&flusher->waiting_flush_bios);
+ spin_unlock(&flusher->lock);
+
+ if (drained)
+ vdo_finish_draining(&flusher->state);
+}
+
+/**
+ * vdo_complete_flushes() - Attempt to complete any flushes which might have finished.
+ * @flusher: The flusher.
+ */
+void vdo_complete_flushes(struct flusher *flusher)
+{
+ sequence_number_t oldest_active_generation = U64_MAX;
+ struct logical_zone *zone;
+
+ assert_on_flusher_thread(flusher, __func__);
+
+ for (zone = &flusher->vdo->logical_zones->zones[0]; zone != NULL; zone = zone->next)
+ oldest_active_generation =
+ min(oldest_active_generation,
+ READ_ONCE(zone->oldest_active_generation));
+
+ while (vdo_waitq_has_waiters(&flusher->pending_flushes)) {
+ struct vdo_flush *flush =
+ vdo_waiter_as_flush(vdo_waitq_get_first_waiter(&flusher->pending_flushes));
+
+ if (flush->flush_generation >= oldest_active_generation)
+ return;
+
+ VDO_ASSERT_LOG_ONLY((flush->flush_generation ==
+ flusher->first_unacknowledged_generation),
+ "acknowledged next expected flush, %llu, was: %llu",
+ (unsigned long long) flusher->first_unacknowledged_generation,
+ (unsigned long long) flush->flush_generation);
+ vdo_waitq_dequeue_waiter(&flusher->pending_flushes);
+ vdo_complete_flush(flush);
+ flusher->first_unacknowledged_generation++;
+ }
+
+ check_for_drain_complete(flusher);
+}
+
+/**
+ * vdo_dump_flusher() - Dump the flusher, in a thread-unsafe fashion.
+ * @flusher: The flusher.
+ */
+void vdo_dump_flusher(const struct flusher *flusher)
+{
+ vdo_log_info("struct flusher");
+ vdo_log_info(" flush_generation=%llu first_unacknowledged_generation=%llu",
+ (unsigned long long) flusher->flush_generation,
+ (unsigned long long) flusher->first_unacknowledged_generation);
+ vdo_log_info(" notifiers queue is %s; pending_flushes queue is %s",
+ (vdo_waitq_has_waiters(&flusher->notifiers) ? "not empty" : "empty"),
+ (vdo_waitq_has_waiters(&flusher->pending_flushes) ? "not empty" : "empty"));
+}
+
+/**
+ * initialize_flush() - Initialize a vdo_flush structure.
+ * @flush: The flush to initialize.
+ * @vdo: The vdo being flushed.
+ *
+ * Initializes a vdo_flush structure, transferring all the bios in the flusher's waiting_flush_bios
+ * list to it. The caller MUST already hold the lock.
+ */
+static void initialize_flush(struct vdo_flush *flush, struct vdo *vdo)
+{
+ bio_list_init(&flush->bios);
+ bio_list_merge(&flush->bios, &vdo->flusher->waiting_flush_bios);
+ bio_list_init(&vdo->flusher->waiting_flush_bios);
+}
+
+static void launch_flush(struct vdo_flush *flush)
+{
+ struct vdo_completion *completion = &flush->completion;
+
+ vdo_prepare_completion(completion, flush_vdo, flush_vdo,
+ completion->vdo->thread_config.packer_thread, NULL);
+ vdo_enqueue_completion(completion, VDO_DEFAULT_Q_FLUSH_PRIORITY);
+}
+
+/**
+ * vdo_launch_flush() - Function called to start processing a flush request.
+ * @vdo: The vdo.
+ * @bio: The bio containing an empty flush request.
+ *
+ * This is called when we receive an empty flush bio from the block layer, and before acknowledging
+ * a non-empty bio with the FUA flag set.
+ */
+void vdo_launch_flush(struct vdo *vdo, struct bio *bio)
+{
+ /*
+ * Try to allocate a vdo_flush to represent the flush request. If the allocation fails,
+ * we'll deal with it later.
+ */
+ struct vdo_flush *flush = mempool_alloc(vdo->flusher->flush_pool, GFP_NOWAIT);
+ struct flusher *flusher = vdo->flusher;
+ const struct admin_state_code *code = vdo_get_admin_state_code(&flusher->state);
+
+ VDO_ASSERT_LOG_ONLY(!code->quiescent, "Flushing not allowed in state %s",
+ code->name);
+
+ spin_lock(&flusher->lock);
+
+ /* We have a new bio to start. Add it to the list. */
+ bio_list_add(&flusher->waiting_flush_bios, bio);
+
+ if (flush == NULL) {
+ spin_unlock(&flusher->lock);
+ return;
+ }
+
+ /* We have flushes to start. Capture them in the vdo_flush structure. */
+ initialize_flush(flush, vdo);
+ spin_unlock(&flusher->lock);
+
+ /* Finish launching the flushes. */
+ launch_flush(flush);
+}
+
+/**
+ * release_flush() - Release a vdo_flush structure that has completed its work.
+ * @flush: The completed flush structure to re-use or free.
+ *
+ * If there are any pending flush requests whose vdo_flush allocation failed, they will be launched
+ * by immediately re-using the released vdo_flush. If there is no spare vdo_flush, the released
+ * structure will become the spare. Otherwise, the vdo_flush will be freed.
+ */
+static void release_flush(struct vdo_flush *flush)
+{
+ bool relaunch_flush;
+ struct flusher *flusher = flush->completion.vdo->flusher;
+
+ spin_lock(&flusher->lock);
+ if (bio_list_empty(&flusher->waiting_flush_bios)) {
+ relaunch_flush = false;
+ } else {
+ /* We have flushes to start. Capture them in a flush request. */
+ initialize_flush(flush, flusher->vdo);
+ relaunch_flush = true;
+ }
+ spin_unlock(&flusher->lock);
+
+ if (relaunch_flush) {
+ /* Finish launching the flushes. */
+ launch_flush(flush);
+ return;
+ }
+
+ mempool_free(flush, flusher->flush_pool);
+}
+
+/**
+ * vdo_complete_flush_callback() - Function called to complete and free a flush request, registered
+ * in vdo_complete_flush().
+ * @completion: The flush request.
+ */
+static void vdo_complete_flush_callback(struct vdo_completion *completion)
+{
+ struct vdo_flush *flush = completion_as_vdo_flush(completion);
+ struct vdo *vdo = completion->vdo;
+ struct bio *bio;
+
+ while ((bio = bio_list_pop(&flush->bios)) != NULL) {
+ /*
+ * We're not acknowledging this bio now, but we'll never touch it again, so this is
+ * the last chance to account for it.
+ */
+ vdo_count_bios(&vdo->stats.bios_acknowledged, bio);
+
+ /* Update the device, and send it on down... */
+ bio_set_dev(bio, vdo_get_backing_device(vdo));
+ atomic64_inc(&vdo->stats.flush_out);
+ submit_bio_noacct(bio);
+ }
+
+
+ /*
+ * Release the flush structure, freeing it, re-using it as the spare, or using it to launch
+ * any flushes that had to wait when allocations failed.
+ */
+ release_flush(flush);
+}
+
+/**
+ * select_bio_queue() - Select the bio queue on which to finish a flush request.
+ * @flusher: The flusher finishing the request.
+ */
+static thread_id_t select_bio_queue(struct flusher *flusher)
+{
+ struct vdo *vdo = flusher->vdo;
+ zone_count_t bio_threads = flusher->vdo->thread_config.bio_thread_count;
+ int interval;
+
+ if (bio_threads == 1)
+ return vdo->thread_config.bio_threads[0];
+
+ interval = vdo->device_config->thread_counts.bio_rotation_interval;
+ if (flusher->flush_count == interval) {
+ flusher->flush_count = 1;
+ flusher->bio_queue_rotor = ((flusher->bio_queue_rotor + 1) % bio_threads);
+ } else {
+ flusher->flush_count++;
+ }
+
+ return vdo->thread_config.bio_threads[flusher->bio_queue_rotor];
+}
+
+/**
+ * vdo_complete_flush() - Complete and free a vdo flush request.
+ * @flush: The flush request.
+ */
+static void vdo_complete_flush(struct vdo_flush *flush)
+{
+ struct vdo_completion *completion = &flush->completion;
+
+ vdo_prepare_completion(completion, vdo_complete_flush_callback,
+ vdo_complete_flush_callback,
+ select_bio_queue(completion->vdo->flusher), NULL);
+ vdo_enqueue_completion(completion, BIO_Q_FLUSH_PRIORITY);
+}
+
+/**
+ * initiate_drain() - Initiate a drain.
+ *
+ * Implements vdo_admin_initiator_fn.
+ */
+static void initiate_drain(struct admin_state *state)
+{
+ check_for_drain_complete(container_of(state, struct flusher, state));
+}
+
+/**
+ * vdo_drain_flusher() - Drain the flusher.
+ * @flusher: The flusher to drain.
+ * @completion: The completion to finish when the flusher has drained.
+ *
+ * Drains the flusher by preventing any more VIOs from entering the flusher and then flushing. The
+ * flusher will be left in the suspended state.
+ */
+void vdo_drain_flusher(struct flusher *flusher, struct vdo_completion *completion)
+{
+ assert_on_flusher_thread(flusher, __func__);
+ vdo_start_draining(&flusher->state, VDO_ADMIN_STATE_SUSPENDING, completion,
+ initiate_drain);
+}
+
+/**
+ * vdo_resume_flusher() - Resume a flusher which has been suspended.
+ * @flusher: The flusher to resume.
+ * @parent: The completion to finish when the flusher has resumed.
+ */
+void vdo_resume_flusher(struct flusher *flusher, struct vdo_completion *parent)
+{
+ assert_on_flusher_thread(flusher, __func__);
+ vdo_continue_completion(parent, vdo_resume_if_quiescent(&flusher->state));
+}
diff --git a/drivers/md/dm-vdo/flush.h b/drivers/md/dm-vdo/flush.h
new file mode 100644
index 000000000000..97252d6656e0
--- /dev/null
+++ b/drivers/md/dm-vdo/flush.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_FLUSH_H
+#define VDO_FLUSH_H
+
+#include "funnel-workqueue.h"
+#include "types.h"
+#include "vio.h"
+#include "wait-queue.h"
+
+/* A marker for tracking which journal entries are affected by a flush request. */
+struct vdo_flush {
+ /* The completion for enqueueing this flush request. */
+ struct vdo_completion completion;
+ /* The flush bios covered by this request */
+ struct bio_list bios;
+ /* The wait queue entry for this flush */
+ struct vdo_waiter waiter;
+ /* Which flush this struct represents */
+ sequence_number_t flush_generation;
+};
+
+struct flusher;
+
+int __must_check vdo_make_flusher(struct vdo *vdo);
+
+void vdo_free_flusher(struct flusher *flusher);
+
+thread_id_t __must_check vdo_get_flusher_thread_id(struct flusher *flusher);
+
+void vdo_complete_flushes(struct flusher *flusher);
+
+void vdo_dump_flusher(const struct flusher *flusher);
+
+void vdo_launch_flush(struct vdo *vdo, struct bio *bio);
+
+void vdo_drain_flusher(struct flusher *flusher, struct vdo_completion *completion);
+
+void vdo_resume_flusher(struct flusher *flusher, struct vdo_completion *parent);
+
+#endif /* VDO_FLUSH_H */
diff --git a/drivers/md/dm-vdo/funnel-queue.c b/drivers/md/dm-vdo/funnel-queue.c
new file mode 100644
index 000000000000..a63b2f2bfd7d
--- /dev/null
+++ b/drivers/md/dm-vdo/funnel-queue.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "funnel-queue.h"
+
+#include "cpu.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+int vdo_make_funnel_queue(struct funnel_queue **queue_ptr)
+{
+ int result;
+ struct funnel_queue *queue;
+
+ result = vdo_allocate(1, struct funnel_queue, "funnel queue", &queue);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /*
+ * Initialize the stub entry and put it in the queue, establishing the invariant that
+ * queue->newest and queue->oldest are never null.
+ */
+ queue->stub.next = NULL;
+ queue->newest = &queue->stub;
+ queue->oldest = &queue->stub;
+
+ *queue_ptr = queue;
+ return VDO_SUCCESS;
+}
+
+void vdo_free_funnel_queue(struct funnel_queue *queue)
+{
+ vdo_free(queue);
+}
+
+static struct funnel_queue_entry *get_oldest(struct funnel_queue *queue)
+{
+ /*
+ * Barrier requirements: We need a read barrier between reading a "next" field pointer
+ * value and reading anything it points to. There's an accompanying barrier in
+ * vdo_funnel_queue_put() between its caller setting up the entry and making it visible.
+ */
+ struct funnel_queue_entry *oldest = queue->oldest;
+ struct funnel_queue_entry *next = READ_ONCE(oldest->next);
+
+ if (oldest == &queue->stub) {
+ /*
+ * When the oldest entry is the stub and it has no successor, the queue is
+ * logically empty.
+ */
+ if (next == NULL)
+ return NULL;
+ /*
+ * The stub entry has a successor, so the stub can be dequeued and ignored without
+ * breaking the queue invariants.
+ */
+ oldest = next;
+ queue->oldest = oldest;
+ next = READ_ONCE(oldest->next);
+ }
+
+ /*
+ * We have a non-stub candidate to dequeue. If it lacks a successor, we'll need to put the
+ * stub entry back on the queue first.
+ */
+ if (next == NULL) {
+ struct funnel_queue_entry *newest = READ_ONCE(queue->newest);
+
+ if (oldest != newest) {
+ /*
+ * Another thread has already swung queue->newest atomically, but not yet
+ * assigned previous->next. The queue is really still empty.
+ */
+ return NULL;
+ }
+
+ /*
+ * Put the stub entry back on the queue, ensuring a successor will eventually be
+ * seen.
+ */
+ vdo_funnel_queue_put(queue, &queue->stub);
+
+ /* Check again for a successor. */
+ next = READ_ONCE(oldest->next);
+ if (next == NULL) {
+ /*
+ * We lost a race with a producer who swapped queue->newest before we did,
+ * but who hasn't yet updated previous->next. Try again later.
+ */
+ return NULL;
+ }
+ }
+
+ return oldest;
+}
+
+/*
+ * Poll a queue, removing the oldest entry if the queue is not empty. This function must only be
+ * called from a single consumer thread.
+ */
+struct funnel_queue_entry *vdo_funnel_queue_poll(struct funnel_queue *queue)
+{
+ struct funnel_queue_entry *oldest = get_oldest(queue);
+
+ if (oldest == NULL)
+ return oldest;
+
+ /*
+ * Dequeue the oldest entry and return it. Only one consumer thread may call this function,
+ * so no locking, atomic operations, or fences are needed; queue->oldest is owned by the
+ * consumer and oldest->next is never used by a producer thread after it is swung from NULL
+ * to non-NULL.
+ */
+ queue->oldest = READ_ONCE(oldest->next);
+ /*
+ * Make sure the caller sees the proper stored data for this entry. Since we've already
+ * fetched the entry pointer we stored in "queue->oldest", this also ensures that on entry
+ * to the next call we'll properly see the dependent data.
+ */
+ smp_rmb();
+ /*
+ * If "oldest" is a very light-weight work item, we'll be looking for the next one very
+ * soon, so prefetch it now.
+ */
+ uds_prefetch_address(queue->oldest, true);
+ WRITE_ONCE(oldest->next, NULL);
+ return oldest;
+}
+
+/*
+ * Check whether the funnel queue is empty or not. If the queue is in a transition state with one
+ * or more entries being added such that the list view is incomplete, this function will report the
+ * queue as empty.
+ */
+bool vdo_is_funnel_queue_empty(struct funnel_queue *queue)
+{
+ return get_oldest(queue) == NULL;
+}
+
+/*
+ * Check whether the funnel queue is idle or not. If the queue has entries available to be
+ * retrieved, it is not idle. If the queue is in a transition state with one or more entries being
+ * added such that the list view is incomplete, it may not be possible to retrieve an entry with
+ * the vdo_funnel_queue_poll() function, but the queue will not be considered idle.
+ */
+bool vdo_is_funnel_queue_idle(struct funnel_queue *queue)
+{
+ /*
+ * Oldest is not the stub, so there's another entry, though if next is NULL we can't
+ * retrieve it yet.
+ */
+ if (queue->oldest != &queue->stub)
+ return false;
+
+ /*
+ * Oldest is the stub, but newest has been updated by _put(); either there's another,
+ * retrievable entry in the list, or the list is officially empty but in the intermediate
+ * state of having an entry added.
+ *
+ * Whether anything is retrievable depends on whether stub.next has been updated and become
+ * visible to us, but for idleness we don't care. And due to memory ordering in _put(), the
+ * update to newest would be visible to us at the same time or sooner.
+ */
+ if (READ_ONCE(queue->newest) != &queue->stub)
+ return false;
+
+ return true;
+}
diff --git a/drivers/md/dm-vdo/funnel-queue.h b/drivers/md/dm-vdo/funnel-queue.h
new file mode 100644
index 000000000000..bde0f1deff98
--- /dev/null
+++ b/drivers/md/dm-vdo/funnel-queue.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_FUNNEL_QUEUE_H
+#define VDO_FUNNEL_QUEUE_H
+
+#include <linux/atomic.h>
+#include <linux/cache.h>
+
+/*
+ * A funnel queue is a simple (almost) lock-free queue that accepts entries from multiple threads
+ * (multi-producer) and delivers them to a single thread (single-consumer). "Funnel" is an attempt
+ * to evoke the image of requests from more than one producer being "funneled down" to a single
+ * consumer.
+ *
+ * This is an unsynchronized but thread-safe data structure when used as intended. There is no
+ * mechanism to ensure that only one thread is consuming from the queue. If more than one thread
+ * attempts to consume from the queue, the resulting behavior is undefined. Clients must not
+ * directly access or manipulate the internals of the queue, which are only exposed for the purpose
+ * of allowing the very simple enqueue operation to be inlined.
+ *
+ * The implementation requires that a funnel_queue_entry structure (a link pointer) is embedded in
+ * the queue entries, and pointers to those structures are used exclusively by the queue. No macros
+ * are defined to template the queue, so the offset of the funnel_queue_entry in the records placed
+ * in the queue must all be the same so the client can derive their structure pointer from the
+ * entry pointer returned by vdo_funnel_queue_poll().
+ *
+ * Callers are wholly responsible for allocating and freeing the entries. Entries may be freed as
+ * soon as they are returned since this queue is not susceptible to the "ABA problem" present in
+ * many lock-free data structures. The queue is dynamically allocated to ensure cache-line
+ * alignment, but no other dynamic allocation is used.
+ *
+ * The algorithm is not actually 100% lock-free. There is a single point in vdo_funnel_queue_put()
+ * at which a preempted producer will prevent the consumers from seeing items added to the queue by
+ * later producers, and only if the queue is short enough or the consumer fast enough for it to
+ * reach what was the end of the queue at the time of the preemption.
+ *
+ * The consumer function, vdo_funnel_queue_poll(), will return NULL when the queue is empty. To
+ * wait for data to consume, spin (if safe) or combine the queue with a struct event_count to
+ * signal the presence of new entries.
+ */
+
+/* This queue link structure must be embedded in client entries. */
+struct funnel_queue_entry {
+ /* The next (newer) entry in the queue. */
+ struct funnel_queue_entry *next;
+};
+
+/*
+ * The dynamically allocated queue structure, which is allocated on a cache line boundary so the
+ * producer and consumer fields in the structure will land on separate cache lines. This should be
+ * consider opaque but it is exposed here so vdo_funnel_queue_put() can be inlined.
+ */
+struct __aligned(L1_CACHE_BYTES) funnel_queue {
+ /*
+ * The producers' end of the queue, an atomically exchanged pointer that will never be
+ * NULL.
+ */
+ struct funnel_queue_entry *newest;
+
+ /* The consumer's end of the queue, which is owned by the consumer and never NULL. */
+ struct funnel_queue_entry *oldest __aligned(L1_CACHE_BYTES);
+
+ /* A dummy entry used to provide the non-NULL invariants above. */
+ struct funnel_queue_entry stub;
+};
+
+int __must_check vdo_make_funnel_queue(struct funnel_queue **queue_ptr);
+
+void vdo_free_funnel_queue(struct funnel_queue *queue);
+
+/*
+ * Put an entry on the end of the queue.
+ *
+ * The entry pointer must be to the struct funnel_queue_entry embedded in the caller's data
+ * structure. The caller must be able to derive the address of the start of their data structure
+ * from the pointer that passed in here, so every entry in the queue must have the struct
+ * funnel_queue_entry at the same offset within the client's structure.
+ */
+static inline void vdo_funnel_queue_put(struct funnel_queue *queue,
+ struct funnel_queue_entry *entry)
+{
+ struct funnel_queue_entry *previous;
+
+ /*
+ * Barrier requirements: All stores relating to the entry ("next" pointer, containing data
+ * structure fields) must happen before the previous->next store making it visible to the
+ * consumer. Also, the entry's "next" field initialization to NULL must happen before any
+ * other producer threads can see the entry (the xchg) and try to update the "next" field.
+ *
+ * xchg implements a full barrier.
+ */
+ WRITE_ONCE(entry->next, NULL);
+ previous = xchg(&queue->newest, entry);
+ /*
+ * Preemptions between these two statements hide the rest of the queue from the consumer,
+ * preventing consumption until the following assignment runs.
+ */
+ WRITE_ONCE(previous->next, entry);
+}
+
+struct funnel_queue_entry *__must_check vdo_funnel_queue_poll(struct funnel_queue *queue);
+
+bool __must_check vdo_is_funnel_queue_empty(struct funnel_queue *queue);
+
+bool __must_check vdo_is_funnel_queue_idle(struct funnel_queue *queue);
+
+#endif /* VDO_FUNNEL_QUEUE_H */
diff --git a/drivers/md/dm-vdo/funnel-workqueue.c b/drivers/md/dm-vdo/funnel-workqueue.c
new file mode 100644
index 000000000000..ae11941c90a9
--- /dev/null
+++ b/drivers/md/dm-vdo/funnel-workqueue.c
@@ -0,0 +1,638 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "funnel-workqueue.h"
+
+#include <linux/atomic.h>
+#include <linux/cache.h>
+#include <linux/completion.h>
+#include <linux/err.h>
+#include <linux/kthread.h>
+#include <linux/percpu.h>
+
+#include "funnel-queue.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "numeric.h"
+#include "permassert.h"
+#include "string-utils.h"
+
+#include "completion.h"
+#include "status-codes.h"
+
+static DEFINE_PER_CPU(unsigned int, service_queue_rotor);
+
+/**
+ * DOC: Work queue definition.
+ *
+ * There are two types of work queues: simple, with one worker thread, and round-robin, which uses
+ * a group of the former to do the work, and assigns work to them in round-robin fashion (roughly).
+ * Externally, both are represented via the same common sub-structure, though there's actually not
+ * a great deal of overlap between the two types internally.
+ */
+struct vdo_work_queue {
+ /* Name of just the work queue (e.g., "cpuQ12") */
+ char *name;
+ bool round_robin_mode;
+ struct vdo_thread *owner;
+ /* Life cycle functions, etc */
+ const struct vdo_work_queue_type *type;
+};
+
+struct simple_work_queue {
+ struct vdo_work_queue common;
+ struct funnel_queue *priority_lists[VDO_WORK_Q_MAX_PRIORITY + 1];
+ void *private;
+
+ /*
+ * The fields above are unchanged after setup but often read, and are good candidates for
+ * caching -- and if the max priority is 2, just fit in one x86-64 cache line if aligned.
+ * The fields below are often modified as we sleep and wake, so we want a separate cache
+ * line for performance.
+ */
+
+ /* Any (0 or 1) worker threads waiting for new work to do */
+ wait_queue_head_t waiting_worker_threads ____cacheline_aligned;
+ /* Hack to reduce wakeup calls if the worker thread is running */
+ atomic_t idle;
+
+ /* These are infrequently used so in terms of performance we don't care where they land. */
+ struct task_struct *thread;
+ /* Notify creator once worker has initialized */
+ struct completion *started;
+};
+
+struct round_robin_work_queue {
+ struct vdo_work_queue common;
+ struct simple_work_queue **service_queues;
+ unsigned int num_service_queues;
+};
+
+static inline struct simple_work_queue *as_simple_work_queue(struct vdo_work_queue *queue)
+{
+ return ((queue == NULL) ?
+ NULL : container_of(queue, struct simple_work_queue, common));
+}
+
+static inline struct round_robin_work_queue *as_round_robin_work_queue(struct vdo_work_queue *queue)
+{
+ return ((queue == NULL) ?
+ NULL :
+ container_of(queue, struct round_robin_work_queue, common));
+}
+
+/* Processing normal completions. */
+
+/*
+ * Dequeue and return the next waiting completion, if any.
+ *
+ * We scan the funnel queues from highest priority to lowest, once; there is therefore a race
+ * condition where a high-priority completion can be enqueued followed by a lower-priority one, and
+ * we'll grab the latter (but we'll catch the high-priority item on the next call). If strict
+ * enforcement of priorities becomes necessary, this function will need fixing.
+ */
+static struct vdo_completion *poll_for_completion(struct simple_work_queue *queue)
+{
+ int i;
+
+ for (i = queue->common.type->max_priority; i >= 0; i--) {
+ struct funnel_queue_entry *link = vdo_funnel_queue_poll(queue->priority_lists[i]);
+
+ if (link != NULL)
+ return container_of(link, struct vdo_completion, work_queue_entry_link);
+ }
+
+ return NULL;
+}
+
+static void enqueue_work_queue_completion(struct simple_work_queue *queue,
+ struct vdo_completion *completion)
+{
+ VDO_ASSERT_LOG_ONLY(completion->my_queue == NULL,
+ "completion %px (fn %px) to enqueue (%px) is not already queued (%px)",
+ completion, completion->callback, queue, completion->my_queue);
+ if (completion->priority == VDO_WORK_Q_DEFAULT_PRIORITY)
+ completion->priority = queue->common.type->default_priority;
+
+ if (VDO_ASSERT(completion->priority <= queue->common.type->max_priority,
+ "priority is in range for queue") != VDO_SUCCESS)
+ completion->priority = 0;
+
+ completion->my_queue = &queue->common;
+
+ /* Funnel queue handles the synchronization for the put. */
+ vdo_funnel_queue_put(queue->priority_lists[completion->priority],
+ &completion->work_queue_entry_link);
+
+ /*
+ * Due to how funnel queue synchronization is handled (just atomic operations), the
+ * simplest safe implementation here would be to wake-up any waiting threads after
+ * enqueueing each item. Even if the funnel queue is not empty at the time of adding an
+ * item to the queue, the consumer thread may not see this since it is not guaranteed to
+ * have the same view of the queue as a producer thread.
+ *
+ * However, the above is wasteful so instead we attempt to minimize the number of thread
+ * wakeups. Using an idle flag, and careful ordering using memory barriers, we should be
+ * able to determine when the worker thread might be asleep or going to sleep. We use
+ * cmpxchg to try to take ownership (vs other producer threads) of the responsibility for
+ * waking the worker thread, so multiple wakeups aren't tried at once.
+ *
+ * This was tuned for some x86 boxes that were handy; it's untested whether doing the read
+ * first is any better or worse for other platforms, even other x86 configurations.
+ */
+ smp_mb();
+ if ((atomic_read(&queue->idle) != 1) || (atomic_cmpxchg(&queue->idle, 1, 0) != 1))
+ return;
+
+ /* There's a maximum of one thread in this list. */
+ wake_up(&queue->waiting_worker_threads);
+}
+
+static void run_start_hook(struct simple_work_queue *queue)
+{
+ if (queue->common.type->start != NULL)
+ queue->common.type->start(queue->private);
+}
+
+static void run_finish_hook(struct simple_work_queue *queue)
+{
+ if (queue->common.type->finish != NULL)
+ queue->common.type->finish(queue->private);
+}
+
+/*
+ * Wait for the next completion to process, or until kthread_should_stop indicates that it's time
+ * for us to shut down.
+ *
+ * If kthread_should_stop says it's time to stop but we have pending completions return a
+ * completion.
+ *
+ * Also update statistics relating to scheduler interactions.
+ */
+static struct vdo_completion *wait_for_next_completion(struct simple_work_queue *queue)
+{
+ struct vdo_completion *completion;
+ DEFINE_WAIT(wait);
+
+ while (true) {
+ prepare_to_wait(&queue->waiting_worker_threads, &wait,
+ TASK_INTERRUPTIBLE);
+ /*
+ * Don't set the idle flag until a wakeup will not be lost.
+ *
+ * Force synchronization between setting the idle flag and checking the funnel
+ * queue; the producer side will do them in the reverse order. (There's still a
+ * race condition we've chosen to allow, because we've got a timeout below that
+ * unwedges us if we hit it, but this may narrow the window a little.)
+ */
+ atomic_set(&queue->idle, 1);
+ smp_mb(); /* store-load barrier between "idle" and funnel queue */
+
+ completion = poll_for_completion(queue);
+ if (completion != NULL)
+ break;
+
+ /*
+ * We need to check for thread-stop after setting TASK_INTERRUPTIBLE state up
+ * above. Otherwise, schedule() will put the thread to sleep and might miss a
+ * wakeup from kthread_stop() call in vdo_finish_work_queue().
+ */
+ if (kthread_should_stop())
+ break;
+
+ schedule();
+
+ /*
+ * Most of the time when we wake, it should be because there's work to do. If it
+ * was a spurious wakeup, continue looping.
+ */
+ completion = poll_for_completion(queue);
+ if (completion != NULL)
+ break;
+ }
+
+ finish_wait(&queue->waiting_worker_threads, &wait);
+ atomic_set(&queue->idle, 0);
+
+ return completion;
+}
+
+static void process_completion(struct simple_work_queue *queue,
+ struct vdo_completion *completion)
+{
+ if (VDO_ASSERT(completion->my_queue == &queue->common,
+ "completion %px from queue %px marked as being in this queue (%px)",
+ completion, queue, completion->my_queue) == VDO_SUCCESS)
+ completion->my_queue = NULL;
+
+ vdo_run_completion(completion);
+}
+
+static void service_work_queue(struct simple_work_queue *queue)
+{
+ run_start_hook(queue);
+
+ while (true) {
+ struct vdo_completion *completion = poll_for_completion(queue);
+
+ if (completion == NULL)
+ completion = wait_for_next_completion(queue);
+
+ if (completion == NULL) {
+ /* No completions but kthread_should_stop() was triggered. */
+ break;
+ }
+
+ process_completion(queue, completion);
+
+ /*
+ * Be friendly to a CPU that has other work to do, if the kernel has told us to.
+ * This speeds up some performance tests; that "other work" might include other VDO
+ * threads.
+ */
+ if (need_resched())
+ cond_resched();
+ }
+
+ run_finish_hook(queue);
+}
+
+static int work_queue_runner(void *ptr)
+{
+ struct simple_work_queue *queue = ptr;
+
+ complete(queue->started);
+ service_work_queue(queue);
+ return 0;
+}
+
+/* Creation & teardown */
+
+static void free_simple_work_queue(struct simple_work_queue *queue)
+{
+ unsigned int i;
+
+ for (i = 0; i <= VDO_WORK_Q_MAX_PRIORITY; i++)
+ vdo_free_funnel_queue(queue->priority_lists[i]);
+ vdo_free(queue->common.name);
+ vdo_free(queue);
+}
+
+static void free_round_robin_work_queue(struct round_robin_work_queue *queue)
+{
+ struct simple_work_queue **queue_table = queue->service_queues;
+ unsigned int count = queue->num_service_queues;
+ unsigned int i;
+
+ queue->service_queues = NULL;
+
+ for (i = 0; i < count; i++)
+ free_simple_work_queue(queue_table[i]);
+ vdo_free(queue_table);
+ vdo_free(queue->common.name);
+ vdo_free(queue);
+}
+
+void vdo_free_work_queue(struct vdo_work_queue *queue)
+{
+ if (queue == NULL)
+ return;
+
+ vdo_finish_work_queue(queue);
+
+ if (queue->round_robin_mode)
+ free_round_robin_work_queue(as_round_robin_work_queue(queue));
+ else
+ free_simple_work_queue(as_simple_work_queue(queue));
+}
+
+static int make_simple_work_queue(const char *thread_name_prefix, const char *name,
+ struct vdo_thread *owner, void *private,
+ const struct vdo_work_queue_type *type,
+ struct simple_work_queue **queue_ptr)
+{
+ DECLARE_COMPLETION_ONSTACK(started);
+ struct simple_work_queue *queue;
+ int i;
+ struct task_struct *thread = NULL;
+ int result;
+
+ VDO_ASSERT_LOG_ONLY((type->max_priority <= VDO_WORK_Q_MAX_PRIORITY),
+ "queue priority count %u within limit %u", type->max_priority,
+ VDO_WORK_Q_MAX_PRIORITY);
+
+ result = vdo_allocate(1, struct simple_work_queue, "simple work queue", &queue);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ queue->private = private;
+ queue->started = &started;
+ queue->common.type = type;
+ queue->common.owner = owner;
+ init_waitqueue_head(&queue->waiting_worker_threads);
+
+ result = vdo_duplicate_string(name, "queue name", &queue->common.name);
+ if (result != VDO_SUCCESS) {
+ vdo_free(queue);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i <= type->max_priority; i++) {
+ result = vdo_make_funnel_queue(&queue->priority_lists[i]);
+ if (result != VDO_SUCCESS) {
+ free_simple_work_queue(queue);
+ return result;
+ }
+ }
+
+ thread = kthread_run(work_queue_runner, queue, "%s:%s", thread_name_prefix,
+ queue->common.name);
+ if (IS_ERR(thread)) {
+ free_simple_work_queue(queue);
+ return (int) PTR_ERR(thread);
+ }
+
+ queue->thread = thread;
+
+ /*
+ * If we don't wait to ensure the thread is running VDO code, a quick kthread_stop (due to
+ * errors elsewhere) could cause it to never get as far as running VDO, skipping the
+ * cleanup code.
+ *
+ * Eventually we should just make that path safe too, and then we won't need this
+ * synchronization.
+ */
+ wait_for_completion(&started);
+
+ *queue_ptr = queue;
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_make_work_queue() - Create a work queue; if multiple threads are requested, completions will
+ * be distributed to them in round-robin fashion.
+ *
+ * Each queue is associated with a struct vdo_thread which has a single vdo thread id. Regardless
+ * of the actual number of queues and threads allocated here, code outside of the queue
+ * implementation will treat this as a single zone.
+ */
+int vdo_make_work_queue(const char *thread_name_prefix, const char *name,
+ struct vdo_thread *owner, const struct vdo_work_queue_type *type,
+ unsigned int thread_count, void *thread_privates[],
+ struct vdo_work_queue **queue_ptr)
+{
+ struct round_robin_work_queue *queue;
+ int result;
+ char thread_name[TASK_COMM_LEN];
+ unsigned int i;
+
+ if (thread_count == 1) {
+ struct simple_work_queue *simple_queue;
+ void *context = ((thread_privates != NULL) ? thread_privates[0] : NULL);
+
+ result = make_simple_work_queue(thread_name_prefix, name, owner, context,
+ type, &simple_queue);
+ if (result == VDO_SUCCESS)
+ *queue_ptr = &simple_queue->common;
+ return result;
+ }
+
+ result = vdo_allocate(1, struct round_robin_work_queue, "round-robin work queue",
+ &queue);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(thread_count, struct simple_work_queue *,
+ "subordinate work queues", &queue->service_queues);
+ if (result != VDO_SUCCESS) {
+ vdo_free(queue);
+ return result;
+ }
+
+ queue->num_service_queues = thread_count;
+ queue->common.round_robin_mode = true;
+ queue->common.owner = owner;
+
+ result = vdo_duplicate_string(name, "queue name", &queue->common.name);
+ if (result != VDO_SUCCESS) {
+ vdo_free(queue->service_queues);
+ vdo_free(queue);
+ return -ENOMEM;
+ }
+
+ *queue_ptr = &queue->common;
+
+ for (i = 0; i < thread_count; i++) {
+ void *context = ((thread_privates != NULL) ? thread_privates[i] : NULL);
+
+ snprintf(thread_name, sizeof(thread_name), "%s%u", name, i);
+ result = make_simple_work_queue(thread_name_prefix, thread_name, owner,
+ context, type, &queue->service_queues[i]);
+ if (result != VDO_SUCCESS) {
+ queue->num_service_queues = i;
+ /* Destroy previously created subordinates. */
+ vdo_free_work_queue(vdo_forget(*queue_ptr));
+ return result;
+ }
+ }
+
+ return VDO_SUCCESS;
+}
+
+static void finish_simple_work_queue(struct simple_work_queue *queue)
+{
+ if (queue->thread == NULL)
+ return;
+
+ /* Tells the worker thread to shut down and waits for it to exit. */
+ kthread_stop(queue->thread);
+ queue->thread = NULL;
+}
+
+static void finish_round_robin_work_queue(struct round_robin_work_queue *queue)
+{
+ struct simple_work_queue **queue_table = queue->service_queues;
+ unsigned int count = queue->num_service_queues;
+ unsigned int i;
+
+ for (i = 0; i < count; i++)
+ finish_simple_work_queue(queue_table[i]);
+}
+
+/* No enqueueing of completions should be done once this function is called. */
+void vdo_finish_work_queue(struct vdo_work_queue *queue)
+{
+ if (queue == NULL)
+ return;
+
+ if (queue->round_robin_mode)
+ finish_round_robin_work_queue(as_round_robin_work_queue(queue));
+ else
+ finish_simple_work_queue(as_simple_work_queue(queue));
+}
+
+/* Debugging dumps */
+
+static void dump_simple_work_queue(struct simple_work_queue *queue)
+{
+ const char *thread_status = "no threads";
+ char task_state_report = '-';
+
+ if (queue->thread != NULL) {
+ task_state_report = task_state_to_char(queue->thread);
+ thread_status = atomic_read(&queue->idle) ? "idle" : "running";
+ }
+
+ vdo_log_info("workQ %px (%s) %s (%c)", &queue->common, queue->common.name,
+ thread_status, task_state_report);
+
+ /* ->waiting_worker_threads wait queue status? anyone waiting? */
+}
+
+/*
+ * Write to the buffer some info about the completion, for logging. Since the common use case is
+ * dumping info about a lot of completions to syslog all at once, the format favors brevity over
+ * readability.
+ */
+void vdo_dump_work_queue(struct vdo_work_queue *queue)
+{
+ if (queue->round_robin_mode) {
+ struct round_robin_work_queue *round_robin = as_round_robin_work_queue(queue);
+ unsigned int i;
+
+ for (i = 0; i < round_robin->num_service_queues; i++)
+ dump_simple_work_queue(round_robin->service_queues[i]);
+ } else {
+ dump_simple_work_queue(as_simple_work_queue(queue));
+ }
+}
+
+static void get_function_name(void *pointer, char *buffer, size_t buffer_length)
+{
+ if (pointer == NULL) {
+ /*
+ * Format "%ps" logs a null pointer as "(null)" with a bunch of leading spaces. We
+ * sometimes use this when logging lots of data; don't be so verbose.
+ */
+ strscpy(buffer, "-", buffer_length);
+ } else {
+ /*
+ * Use a pragma to defeat gcc's format checking, which doesn't understand that
+ * "%ps" actually does support a precision spec in Linux kernel code.
+ */
+ char *space;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat"
+ snprintf(buffer, buffer_length, "%.*ps", buffer_length - 1, pointer);
+#pragma GCC diagnostic pop
+
+ space = strchr(buffer, ' ');
+ if (space != NULL)
+ *space = '\0';
+ }
+}
+
+void vdo_dump_completion_to_buffer(struct vdo_completion *completion, char *buffer,
+ size_t length)
+{
+ size_t current_length =
+ scnprintf(buffer, length, "%.*s/", TASK_COMM_LEN,
+ (completion->my_queue == NULL ? "-" : completion->my_queue->name));
+
+ if (current_length < length - 1) {
+ get_function_name((void *) completion->callback, buffer + current_length,
+ length - current_length);
+ }
+}
+
+/* Completion submission */
+/*
+ * If the completion has a timeout that has already passed, the timeout handler function may be
+ * invoked by this function.
+ */
+void vdo_enqueue_work_queue(struct vdo_work_queue *queue,
+ struct vdo_completion *completion)
+{
+ /*
+ * Convert the provided generic vdo_work_queue to the simple_work_queue to actually queue
+ * on.
+ */
+ struct simple_work_queue *simple_queue = NULL;
+
+ if (!queue->round_robin_mode) {
+ simple_queue = as_simple_work_queue(queue);
+ } else {
+ struct round_robin_work_queue *round_robin = as_round_robin_work_queue(queue);
+
+ /*
+ * It shouldn't be a big deal if the same rotor gets used for multiple work queues.
+ * Any patterns that might develop are likely to be disrupted by random ordering of
+ * multiple completions and migration between cores, unless the load is so light as
+ * to be regular in ordering of tasks and the threads are confined to individual
+ * cores; with a load that light we won't care.
+ */
+ unsigned int rotor = this_cpu_inc_return(service_queue_rotor);
+ unsigned int index = rotor % round_robin->num_service_queues;
+
+ simple_queue = round_robin->service_queues[index];
+ }
+
+ enqueue_work_queue_completion(simple_queue, completion);
+}
+
+/* Misc */
+
+/*
+ * Return the work queue pointer recorded at initialization time in the work-queue stack handle
+ * initialized on the stack of the current thread, if any.
+ */
+static struct simple_work_queue *get_current_thread_work_queue(void)
+{
+ /*
+ * In interrupt context, if a vdo thread is what got interrupted, the calls below will find
+ * the queue for the thread which was interrupted. However, the interrupted thread may have
+ * been processing a completion, in which case starting to process another would violate
+ * our concurrency assumptions.
+ */
+ if (in_interrupt())
+ return NULL;
+
+ if (kthread_func(current) != work_queue_runner)
+ /* Not a VDO work queue thread. */
+ return NULL;
+
+ return kthread_data(current);
+}
+
+struct vdo_work_queue *vdo_get_current_work_queue(void)
+{
+ struct simple_work_queue *queue = get_current_thread_work_queue();
+
+ return (queue == NULL) ? NULL : &queue->common;
+}
+
+struct vdo_thread *vdo_get_work_queue_owner(struct vdo_work_queue *queue)
+{
+ return queue->owner;
+}
+
+/**
+ * vdo_get_work_queue_private_data() - Returns the private data for the current thread's work
+ * queue, or NULL if none or if the current thread is not a
+ * work queue thread.
+ */
+void *vdo_get_work_queue_private_data(void)
+{
+ struct simple_work_queue *queue = get_current_thread_work_queue();
+
+ return (queue != NULL) ? queue->private : NULL;
+}
+
+bool vdo_work_queue_type_is(struct vdo_work_queue *queue,
+ const struct vdo_work_queue_type *type)
+{
+ return (queue->type == type);
+}
diff --git a/drivers/md/dm-vdo/funnel-workqueue.h b/drivers/md/dm-vdo/funnel-workqueue.h
new file mode 100644
index 000000000000..b5be6e9e83bc
--- /dev/null
+++ b/drivers/md/dm-vdo/funnel-workqueue.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_WORK_QUEUE_H
+#define VDO_WORK_QUEUE_H
+
+#include <linux/sched.h> /* for TASK_COMM_LEN */
+
+#include "types.h"
+
+enum {
+ MAX_VDO_WORK_QUEUE_NAME_LEN = TASK_COMM_LEN,
+};
+
+struct vdo_work_queue_type {
+ void (*start)(void *context);
+ void (*finish)(void *context);
+ enum vdo_completion_priority max_priority;
+ enum vdo_completion_priority default_priority;
+};
+
+struct vdo_completion;
+struct vdo_thread;
+struct vdo_work_queue;
+
+int vdo_make_work_queue(const char *thread_name_prefix, const char *name,
+ struct vdo_thread *owner, const struct vdo_work_queue_type *type,
+ unsigned int thread_count, void *thread_privates[],
+ struct vdo_work_queue **queue_ptr);
+
+void vdo_enqueue_work_queue(struct vdo_work_queue *queue, struct vdo_completion *completion);
+
+void vdo_finish_work_queue(struct vdo_work_queue *queue);
+
+void vdo_free_work_queue(struct vdo_work_queue *queue);
+
+void vdo_dump_work_queue(struct vdo_work_queue *queue);
+
+void vdo_dump_completion_to_buffer(struct vdo_completion *completion, char *buffer,
+ size_t length);
+
+void *vdo_get_work_queue_private_data(void);
+struct vdo_work_queue *vdo_get_current_work_queue(void);
+struct vdo_thread *vdo_get_work_queue_owner(struct vdo_work_queue *queue);
+
+bool __must_check vdo_work_queue_type_is(struct vdo_work_queue *queue,
+ const struct vdo_work_queue_type *type);
+
+#endif /* VDO_WORK_QUEUE_H */
diff --git a/drivers/md/dm-vdo/indexer/chapter-index.c b/drivers/md/dm-vdo/indexer/chapter-index.c
new file mode 100644
index 000000000000..7e32a25d3f2f
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/chapter-index.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "chapter-index.h"
+
+#include "errors.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "hash-utils.h"
+#include "indexer.h"
+
+int uds_make_open_chapter_index(struct open_chapter_index **chapter_index,
+ const struct index_geometry *geometry, u64 volume_nonce)
+{
+ int result;
+ size_t memory_size;
+ struct open_chapter_index *index;
+
+ result = vdo_allocate(1, struct open_chapter_index, "open chapter index", &index);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /*
+ * The delta index will rebalance delta lists when memory gets tight,
+ * so give the chapter index one extra page.
+ */
+ memory_size = ((geometry->index_pages_per_chapter + 1) * geometry->bytes_per_page);
+ index->geometry = geometry;
+ index->volume_nonce = volume_nonce;
+ result = uds_initialize_delta_index(&index->delta_index, 1,
+ geometry->delta_lists_per_chapter,
+ geometry->chapter_mean_delta,
+ geometry->chapter_payload_bits,
+ memory_size, 'm');
+ if (result != UDS_SUCCESS) {
+ vdo_free(index);
+ return result;
+ }
+
+ index->memory_size = index->delta_index.memory_size + sizeof(struct open_chapter_index);
+ *chapter_index = index;
+ return UDS_SUCCESS;
+}
+
+void uds_free_open_chapter_index(struct open_chapter_index *chapter_index)
+{
+ if (chapter_index == NULL)
+ return;
+
+ uds_uninitialize_delta_index(&chapter_index->delta_index);
+ vdo_free(chapter_index);
+}
+
+/* Re-initialize an open chapter index for a new chapter. */
+void uds_empty_open_chapter_index(struct open_chapter_index *chapter_index,
+ u64 virtual_chapter_number)
+{
+ uds_reset_delta_index(&chapter_index->delta_index);
+ chapter_index->virtual_chapter_number = virtual_chapter_number;
+}
+
+static inline bool was_entry_found(const struct delta_index_entry *entry, u32 address)
+{
+ return (!entry->at_end) && (entry->key == address);
+}
+
+/* Associate a record name with the record page containing its metadata. */
+int uds_put_open_chapter_index_record(struct open_chapter_index *chapter_index,
+ const struct uds_record_name *name,
+ u32 page_number)
+{
+ int result;
+ struct delta_index_entry entry;
+ u32 address;
+ u32 list_number;
+ const u8 *found_name;
+ bool found;
+ const struct index_geometry *geometry = chapter_index->geometry;
+ u64 chapter_number = chapter_index->virtual_chapter_number;
+ u32 record_pages = geometry->record_pages_per_chapter;
+
+ result = VDO_ASSERT(page_number < record_pages,
+ "Page number within chapter (%u) exceeds the maximum value %u",
+ page_number, record_pages);
+ if (result != VDO_SUCCESS)
+ return UDS_INVALID_ARGUMENT;
+
+ address = uds_hash_to_chapter_delta_address(name, geometry);
+ list_number = uds_hash_to_chapter_delta_list(name, geometry);
+ result = uds_get_delta_index_entry(&chapter_index->delta_index, list_number,
+ address, name->name, &entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ found = was_entry_found(&entry, address);
+ result = VDO_ASSERT(!(found && entry.is_collision),
+ "Chunk appears more than once in chapter %llu",
+ (unsigned long long) chapter_number);
+ if (result != VDO_SUCCESS)
+ return UDS_BAD_STATE;
+
+ found_name = (found ? name->name : NULL);
+ return uds_put_delta_index_entry(&entry, address, page_number, found_name);
+}
+
+/*
+ * Pack a section of an open chapter index into a chapter index page. A range of delta lists
+ * (starting with a specified list index) is copied from the open chapter index into a memory page.
+ * The number of lists copied onto the page is returned to the caller on success.
+ *
+ * @chapter_index: The open chapter index
+ * @memory: The memory page to use
+ * @first_list: The first delta list number to be copied
+ * @last_page: If true, this is the last page of the chapter index and all the remaining lists must
+ * be packed onto this page
+ * @lists_packed: The number of delta lists that were packed onto this page
+ */
+int uds_pack_open_chapter_index_page(struct open_chapter_index *chapter_index,
+ u8 *memory, u32 first_list, bool last_page,
+ u32 *lists_packed)
+{
+ int result;
+ struct delta_index *delta_index = &chapter_index->delta_index;
+ struct delta_index_stats stats;
+ u64 nonce = chapter_index->volume_nonce;
+ u64 chapter_number = chapter_index->virtual_chapter_number;
+ const struct index_geometry *geometry = chapter_index->geometry;
+ u32 list_count = geometry->delta_lists_per_chapter;
+ unsigned int removals = 0;
+ struct delta_index_entry entry;
+ u32 next_list;
+ s32 list_number;
+
+ for (;;) {
+ result = uds_pack_delta_index_page(delta_index, nonce, memory,
+ geometry->bytes_per_page,
+ chapter_number, first_list,
+ lists_packed);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if ((first_list + *lists_packed) == list_count) {
+ /* All lists are packed. */
+ break;
+ } else if (*lists_packed == 0) {
+ /*
+ * The next delta list does not fit on a page. This delta list will be
+ * removed.
+ */
+ } else if (last_page) {
+ /*
+ * This is the last page and there are lists left unpacked, but all of the
+ * remaining lists must fit on the page. Find a list that contains entries
+ * and remove the entire list. Try the first list that does not fit. If it
+ * is empty, we will select the last list that already fits and has any
+ * entries.
+ */
+ } else {
+ /* This page is done. */
+ break;
+ }
+
+ if (removals == 0) {
+ uds_get_delta_index_stats(delta_index, &stats);
+ vdo_log_warning("The chapter index for chapter %llu contains %llu entries with %llu collisions",
+ (unsigned long long) chapter_number,
+ (unsigned long long) stats.record_count,
+ (unsigned long long) stats.collision_count);
+ }
+
+ list_number = *lists_packed;
+ do {
+ if (list_number < 0)
+ return UDS_OVERFLOW;
+
+ next_list = first_list + list_number--,
+ result = uds_start_delta_index_search(delta_index, next_list, 0,
+ &entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = uds_next_delta_index_entry(&entry);
+ if (result != UDS_SUCCESS)
+ return result;
+ } while (entry.at_end);
+
+ do {
+ result = uds_remove_delta_index_entry(&entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ removals++;
+ } while (!entry.at_end);
+ }
+
+ if (removals > 0) {
+ vdo_log_warning("To avoid chapter index page overflow in chapter %llu, %u entries were removed from the chapter index",
+ (unsigned long long) chapter_number, removals);
+ }
+
+ return UDS_SUCCESS;
+}
+
+/* Make a new chapter index page, initializing it with the data from a given index_page buffer. */
+int uds_initialize_chapter_index_page(struct delta_index_page *index_page,
+ const struct index_geometry *geometry,
+ u8 *page_buffer, u64 volume_nonce)
+{
+ return uds_initialize_delta_index_page(index_page, volume_nonce,
+ geometry->chapter_mean_delta,
+ geometry->chapter_payload_bits,
+ page_buffer, geometry->bytes_per_page);
+}
+
+/* Validate a chapter index page read during rebuild. */
+int uds_validate_chapter_index_page(const struct delta_index_page *index_page,
+ const struct index_geometry *geometry)
+{
+ int result;
+ const struct delta_index *delta_index = &index_page->delta_index;
+ u32 first = index_page->lowest_list_number;
+ u32 last = index_page->highest_list_number;
+ u32 list_number;
+
+ /* We walk every delta list from start to finish. */
+ for (list_number = first; list_number <= last; list_number++) {
+ struct delta_index_entry entry;
+
+ result = uds_start_delta_index_search(delta_index, list_number - first,
+ 0, &entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ for (;;) {
+ result = uds_next_delta_index_entry(&entry);
+ if (result != UDS_SUCCESS) {
+ /*
+ * A random bit stream is highly likely to arrive here when we go
+ * past the end of the delta list.
+ */
+ return result;
+ }
+
+ if (entry.at_end)
+ break;
+
+ /* Also make sure that the record page field contains a plausible value. */
+ if (uds_get_delta_entry_value(&entry) >=
+ geometry->record_pages_per_chapter) {
+ /*
+ * Do not log this as an error. It happens in normal operation when
+ * we are doing a rebuild but haven't written the entire volume
+ * once.
+ */
+ return UDS_CORRUPT_DATA;
+ }
+ }
+ }
+ return UDS_SUCCESS;
+}
+
+/*
+ * Search a chapter index page for a record name, returning the record page number that may contain
+ * the name.
+ */
+int uds_search_chapter_index_page(struct delta_index_page *index_page,
+ const struct index_geometry *geometry,
+ const struct uds_record_name *name,
+ u16 *record_page_ptr)
+{
+ int result;
+ struct delta_index *delta_index = &index_page->delta_index;
+ u32 address = uds_hash_to_chapter_delta_address(name, geometry);
+ u32 delta_list_number = uds_hash_to_chapter_delta_list(name, geometry);
+ u32 sub_list_number = delta_list_number - index_page->lowest_list_number;
+ struct delta_index_entry entry;
+
+ result = uds_get_delta_index_entry(delta_index, sub_list_number, address,
+ name->name, &entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (was_entry_found(&entry, address))
+ *record_page_ptr = uds_get_delta_entry_value(&entry);
+ else
+ *record_page_ptr = NO_CHAPTER_INDEX_ENTRY;
+
+ return UDS_SUCCESS;
+}
diff --git a/drivers/md/dm-vdo/indexer/chapter-index.h b/drivers/md/dm-vdo/indexer/chapter-index.h
new file mode 100644
index 000000000000..be8bf2b675b1
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/chapter-index.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_CHAPTER_INDEX_H
+#define UDS_CHAPTER_INDEX_H
+
+#include <linux/limits.h>
+
+#include "delta-index.h"
+#include "geometry.h"
+
+/*
+ * A chapter index for an open chapter is a mutable structure that tracks all the records that have
+ * been added to the chapter. A chapter index for a closed chapter is similar except that it is
+ * immutable because the contents of a closed chapter can never change, and the immutable structure
+ * is more efficient. Both types of chapter index are implemented with a delta index.
+ */
+
+/* The value returned when no entry is found in the chapter index. */
+#define NO_CHAPTER_INDEX_ENTRY U16_MAX
+
+struct open_chapter_index {
+ const struct index_geometry *geometry;
+ struct delta_index delta_index;
+ u64 virtual_chapter_number;
+ u64 volume_nonce;
+ size_t memory_size;
+};
+
+int __must_check uds_make_open_chapter_index(struct open_chapter_index **chapter_index,
+ const struct index_geometry *geometry,
+ u64 volume_nonce);
+
+void uds_free_open_chapter_index(struct open_chapter_index *chapter_index);
+
+void uds_empty_open_chapter_index(struct open_chapter_index *chapter_index,
+ u64 virtual_chapter_number);
+
+int __must_check uds_put_open_chapter_index_record(struct open_chapter_index *chapter_index,
+ const struct uds_record_name *name,
+ u32 page_number);
+
+int __must_check uds_pack_open_chapter_index_page(struct open_chapter_index *chapter_index,
+ u8 *memory, u32 first_list,
+ bool last_page, u32 *lists_packed);
+
+int __must_check uds_initialize_chapter_index_page(struct delta_index_page *index_page,
+ const struct index_geometry *geometry,
+ u8 *page_buffer, u64 volume_nonce);
+
+int __must_check uds_validate_chapter_index_page(const struct delta_index_page *index_page,
+ const struct index_geometry *geometry);
+
+int __must_check uds_search_chapter_index_page(struct delta_index_page *index_page,
+ const struct index_geometry *geometry,
+ const struct uds_record_name *name,
+ u16 *record_page_ptr);
+
+#endif /* UDS_CHAPTER_INDEX_H */
diff --git a/drivers/md/dm-vdo/indexer/config.c b/drivers/md/dm-vdo/indexer/config.c
new file mode 100644
index 000000000000..5532371b952f
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/config.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "config.h"
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "numeric.h"
+#include "string-utils.h"
+#include "thread-utils.h"
+
+static const u8 INDEX_CONFIG_MAGIC[] = "ALBIC";
+static const u8 INDEX_CONFIG_VERSION_6_02[] = "06.02";
+static const u8 INDEX_CONFIG_VERSION_8_02[] = "08.02";
+
+#define DEFAULT_VOLUME_READ_THREADS 2
+#define MAX_VOLUME_READ_THREADS 16
+#define INDEX_CONFIG_MAGIC_LENGTH (sizeof(INDEX_CONFIG_MAGIC) - 1)
+#define INDEX_CONFIG_VERSION_LENGTH ((int)(sizeof(INDEX_CONFIG_VERSION_6_02) - 1))
+
+static bool is_version(const u8 *version, u8 *buffer)
+{
+ return memcmp(version, buffer, INDEX_CONFIG_VERSION_LENGTH) == 0;
+}
+
+static bool are_matching_configurations(struct uds_configuration *saved_config,
+ struct index_geometry *saved_geometry,
+ struct uds_configuration *user)
+{
+ struct index_geometry *geometry = user->geometry;
+ bool result = true;
+
+ if (saved_geometry->record_pages_per_chapter != geometry->record_pages_per_chapter) {
+ vdo_log_error("Record pages per chapter (%u) does not match (%u)",
+ saved_geometry->record_pages_per_chapter,
+ geometry->record_pages_per_chapter);
+ result = false;
+ }
+
+ if (saved_geometry->chapters_per_volume != geometry->chapters_per_volume) {
+ vdo_log_error("Chapter count (%u) does not match (%u)",
+ saved_geometry->chapters_per_volume,
+ geometry->chapters_per_volume);
+ result = false;
+ }
+
+ if (saved_geometry->sparse_chapters_per_volume != geometry->sparse_chapters_per_volume) {
+ vdo_log_error("Sparse chapter count (%u) does not match (%u)",
+ saved_geometry->sparse_chapters_per_volume,
+ geometry->sparse_chapters_per_volume);
+ result = false;
+ }
+
+ if (saved_config->cache_chapters != user->cache_chapters) {
+ vdo_log_error("Cache size (%u) does not match (%u)",
+ saved_config->cache_chapters, user->cache_chapters);
+ result = false;
+ }
+
+ if (saved_config->volume_index_mean_delta != user->volume_index_mean_delta) {
+ vdo_log_error("Volume index mean delta (%u) does not match (%u)",
+ saved_config->volume_index_mean_delta,
+ user->volume_index_mean_delta);
+ result = false;
+ }
+
+ if (saved_geometry->bytes_per_page != geometry->bytes_per_page) {
+ vdo_log_error("Bytes per page value (%zu) does not match (%zu)",
+ saved_geometry->bytes_per_page, geometry->bytes_per_page);
+ result = false;
+ }
+
+ if (saved_config->sparse_sample_rate != user->sparse_sample_rate) {
+ vdo_log_error("Sparse sample rate (%u) does not match (%u)",
+ saved_config->sparse_sample_rate,
+ user->sparse_sample_rate);
+ result = false;
+ }
+
+ if (saved_config->nonce != user->nonce) {
+ vdo_log_error("Nonce (%llu) does not match (%llu)",
+ (unsigned long long) saved_config->nonce,
+ (unsigned long long) user->nonce);
+ result = false;
+ }
+
+ return result;
+}
+
+/* Read the configuration and validate it against the provided one. */
+int uds_validate_config_contents(struct buffered_reader *reader,
+ struct uds_configuration *user_config)
+{
+ int result;
+ struct uds_configuration config;
+ struct index_geometry geometry;
+ u8 version_buffer[INDEX_CONFIG_VERSION_LENGTH];
+ u32 bytes_per_page;
+ u8 buffer[sizeof(struct uds_configuration_6_02)];
+ size_t offset = 0;
+
+ result = uds_verify_buffered_data(reader, INDEX_CONFIG_MAGIC,
+ INDEX_CONFIG_MAGIC_LENGTH);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = uds_read_from_buffered_reader(reader, version_buffer,
+ INDEX_CONFIG_VERSION_LENGTH);
+ if (result != UDS_SUCCESS)
+ return vdo_log_error_strerror(result, "cannot read index config version");
+
+ if (!is_version(INDEX_CONFIG_VERSION_6_02, version_buffer) &&
+ !is_version(INDEX_CONFIG_VERSION_8_02, version_buffer)) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "unsupported configuration version: '%.*s'",
+ INDEX_CONFIG_VERSION_LENGTH,
+ version_buffer);
+ }
+
+ result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
+ if (result != UDS_SUCCESS)
+ return vdo_log_error_strerror(result, "cannot read config data");
+
+ decode_u32_le(buffer, &offset, &geometry.record_pages_per_chapter);
+ decode_u32_le(buffer, &offset, &geometry.chapters_per_volume);
+ decode_u32_le(buffer, &offset, &geometry.sparse_chapters_per_volume);
+ decode_u32_le(buffer, &offset, &config.cache_chapters);
+ offset += sizeof(u32);
+ decode_u32_le(buffer, &offset, &config.volume_index_mean_delta);
+ decode_u32_le(buffer, &offset, &bytes_per_page);
+ geometry.bytes_per_page = bytes_per_page;
+ decode_u32_le(buffer, &offset, &config.sparse_sample_rate);
+ decode_u64_le(buffer, &offset, &config.nonce);
+
+ result = VDO_ASSERT(offset == sizeof(struct uds_configuration_6_02),
+ "%zu bytes read but not decoded",
+ sizeof(struct uds_configuration_6_02) - offset);
+ if (result != VDO_SUCCESS)
+ return UDS_CORRUPT_DATA;
+
+ if (is_version(INDEX_CONFIG_VERSION_6_02, version_buffer)) {
+ user_config->geometry->remapped_virtual = 0;
+ user_config->geometry->remapped_physical = 0;
+ } else {
+ u8 remapping[sizeof(u64) + sizeof(u64)];
+
+ result = uds_read_from_buffered_reader(reader, remapping,
+ sizeof(remapping));
+ if (result != UDS_SUCCESS)
+ return vdo_log_error_strerror(result, "cannot read converted config");
+
+ offset = 0;
+ decode_u64_le(remapping, &offset,
+ &user_config->geometry->remapped_virtual);
+ decode_u64_le(remapping, &offset,
+ &user_config->geometry->remapped_physical);
+ }
+
+ if (!are_matching_configurations(&config, &geometry, user_config)) {
+ vdo_log_warning("Supplied configuration does not match save");
+ return UDS_NO_INDEX;
+ }
+
+ return UDS_SUCCESS;
+}
+
+/*
+ * Write the configuration to stable storage. If the superblock version is < 4, write the 6.02
+ * version; otherwise write the 8.02 version, indicating the configuration is for an index that has
+ * been reduced by one chapter.
+ */
+int uds_write_config_contents(struct buffered_writer *writer,
+ struct uds_configuration *config, u32 version)
+{
+ int result;
+ struct index_geometry *geometry = config->geometry;
+ u8 buffer[sizeof(struct uds_configuration_8_02)];
+ size_t offset = 0;
+
+ result = uds_write_to_buffered_writer(writer, INDEX_CONFIG_MAGIC,
+ INDEX_CONFIG_MAGIC_LENGTH);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ /*
+ * If version is < 4, the index has not been reduced by a chapter so it must be written out
+ * as version 6.02 so that it is still compatible with older versions of UDS.
+ */
+ if (version >= 4) {
+ result = uds_write_to_buffered_writer(writer, INDEX_CONFIG_VERSION_8_02,
+ INDEX_CONFIG_VERSION_LENGTH);
+ if (result != UDS_SUCCESS)
+ return result;
+ } else {
+ result = uds_write_to_buffered_writer(writer, INDEX_CONFIG_VERSION_6_02,
+ INDEX_CONFIG_VERSION_LENGTH);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ encode_u32_le(buffer, &offset, geometry->record_pages_per_chapter);
+ encode_u32_le(buffer, &offset, geometry->chapters_per_volume);
+ encode_u32_le(buffer, &offset, geometry->sparse_chapters_per_volume);
+ encode_u32_le(buffer, &offset, config->cache_chapters);
+ encode_u32_le(buffer, &offset, 0);
+ encode_u32_le(buffer, &offset, config->volume_index_mean_delta);
+ encode_u32_le(buffer, &offset, geometry->bytes_per_page);
+ encode_u32_le(buffer, &offset, config->sparse_sample_rate);
+ encode_u64_le(buffer, &offset, config->nonce);
+
+ result = VDO_ASSERT(offset == sizeof(struct uds_configuration_6_02),
+ "%zu bytes encoded, of %zu expected", offset,
+ sizeof(struct uds_configuration_6_02));
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (version >= 4) {
+ encode_u64_le(buffer, &offset, geometry->remapped_virtual);
+ encode_u64_le(buffer, &offset, geometry->remapped_physical);
+ }
+
+ return uds_write_to_buffered_writer(writer, buffer, offset);
+}
+
+/* Compute configuration parameters that depend on memory size. */
+static int compute_memory_sizes(uds_memory_config_size_t mem_gb, bool sparse,
+ u32 *chapters_per_volume, u32 *record_pages_per_chapter,
+ u32 *sparse_chapters_per_volume)
+{
+ u32 reduced_chapters = 0;
+ u32 base_chapters;
+
+ if (mem_gb == UDS_MEMORY_CONFIG_256MB) {
+ base_chapters = DEFAULT_CHAPTERS_PER_VOLUME;
+ *record_pages_per_chapter = SMALL_RECORD_PAGES_PER_CHAPTER;
+ } else if (mem_gb == UDS_MEMORY_CONFIG_512MB) {
+ base_chapters = DEFAULT_CHAPTERS_PER_VOLUME;
+ *record_pages_per_chapter = 2 * SMALL_RECORD_PAGES_PER_CHAPTER;
+ } else if (mem_gb == UDS_MEMORY_CONFIG_768MB) {
+ base_chapters = DEFAULT_CHAPTERS_PER_VOLUME;
+ *record_pages_per_chapter = 3 * SMALL_RECORD_PAGES_PER_CHAPTER;
+ } else if ((mem_gb >= 1) && (mem_gb <= UDS_MEMORY_CONFIG_MAX)) {
+ base_chapters = mem_gb * DEFAULT_CHAPTERS_PER_VOLUME;
+ *record_pages_per_chapter = DEFAULT_RECORD_PAGES_PER_CHAPTER;
+ } else if (mem_gb == UDS_MEMORY_CONFIG_REDUCED_256MB) {
+ reduced_chapters = 1;
+ base_chapters = DEFAULT_CHAPTERS_PER_VOLUME;
+ *record_pages_per_chapter = SMALL_RECORD_PAGES_PER_CHAPTER;
+ } else if (mem_gb == UDS_MEMORY_CONFIG_REDUCED_512MB) {
+ reduced_chapters = 1;
+ base_chapters = DEFAULT_CHAPTERS_PER_VOLUME;
+ *record_pages_per_chapter = 2 * SMALL_RECORD_PAGES_PER_CHAPTER;
+ } else if (mem_gb == UDS_MEMORY_CONFIG_REDUCED_768MB) {
+ reduced_chapters = 1;
+ base_chapters = DEFAULT_CHAPTERS_PER_VOLUME;
+ *record_pages_per_chapter = 3 * SMALL_RECORD_PAGES_PER_CHAPTER;
+ } else if ((mem_gb >= 1 + UDS_MEMORY_CONFIG_REDUCED) &&
+ (mem_gb <= UDS_MEMORY_CONFIG_REDUCED_MAX)) {
+ reduced_chapters = 1;
+ base_chapters = ((mem_gb - UDS_MEMORY_CONFIG_REDUCED) *
+ DEFAULT_CHAPTERS_PER_VOLUME);
+ *record_pages_per_chapter = DEFAULT_RECORD_PAGES_PER_CHAPTER;
+ } else {
+ vdo_log_error("received invalid memory size");
+ return -EINVAL;
+ }
+
+ if (sparse) {
+ /* Make 95% of chapters sparse, allowing 10x more records. */
+ *sparse_chapters_per_volume = (19 * base_chapters) / 2;
+ base_chapters *= 10;
+ } else {
+ *sparse_chapters_per_volume = 0;
+ }
+
+ *chapters_per_volume = base_chapters - reduced_chapters;
+ return UDS_SUCCESS;
+}
+
+static unsigned int __must_check normalize_zone_count(unsigned int requested)
+{
+ unsigned int zone_count = requested;
+
+ if (zone_count == 0)
+ zone_count = num_online_cpus() / 2;
+
+ if (zone_count < 1)
+ zone_count = 1;
+
+ if (zone_count > MAX_ZONES)
+ zone_count = MAX_ZONES;
+
+ vdo_log_info("Using %u indexing zone%s for concurrency.",
+ zone_count, zone_count == 1 ? "" : "s");
+ return zone_count;
+}
+
+static unsigned int __must_check normalize_read_threads(unsigned int requested)
+{
+ unsigned int read_threads = requested;
+
+ if (read_threads < 1)
+ read_threads = DEFAULT_VOLUME_READ_THREADS;
+
+ if (read_threads > MAX_VOLUME_READ_THREADS)
+ read_threads = MAX_VOLUME_READ_THREADS;
+
+ return read_threads;
+}
+
+int uds_make_configuration(const struct uds_parameters *params,
+ struct uds_configuration **config_ptr)
+{
+ struct uds_configuration *config;
+ u32 chapters_per_volume = 0;
+ u32 record_pages_per_chapter = 0;
+ u32 sparse_chapters_per_volume = 0;
+ int result;
+
+ result = compute_memory_sizes(params->memory_size, params->sparse,
+ &chapters_per_volume, &record_pages_per_chapter,
+ &sparse_chapters_per_volume);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = vdo_allocate(1, struct uds_configuration, __func__, &config);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = uds_make_index_geometry(DEFAULT_BYTES_PER_PAGE, record_pages_per_chapter,
+ chapters_per_volume, sparse_chapters_per_volume,
+ 0, 0, &config->geometry);
+ if (result != UDS_SUCCESS) {
+ uds_free_configuration(config);
+ return result;
+ }
+
+ config->zone_count = normalize_zone_count(params->zone_count);
+ config->read_threads = normalize_read_threads(params->read_threads);
+
+ config->cache_chapters = DEFAULT_CACHE_CHAPTERS;
+ config->volume_index_mean_delta = DEFAULT_VOLUME_INDEX_MEAN_DELTA;
+ config->sparse_sample_rate = (params->sparse ? DEFAULT_SPARSE_SAMPLE_RATE : 0);
+ config->nonce = params->nonce;
+ config->bdev = params->bdev;
+ config->offset = params->offset;
+ config->size = params->size;
+
+ *config_ptr = config;
+ return UDS_SUCCESS;
+}
+
+void uds_free_configuration(struct uds_configuration *config)
+{
+ if (config != NULL) {
+ uds_free_index_geometry(config->geometry);
+ vdo_free(config);
+ }
+}
+
+void uds_log_configuration(struct uds_configuration *config)
+{
+ struct index_geometry *geometry = config->geometry;
+
+ vdo_log_debug("Configuration:");
+ vdo_log_debug(" Record pages per chapter: %10u", geometry->record_pages_per_chapter);
+ vdo_log_debug(" Chapters per volume: %10u", geometry->chapters_per_volume);
+ vdo_log_debug(" Sparse chapters per volume: %10u", geometry->sparse_chapters_per_volume);
+ vdo_log_debug(" Cache size (chapters): %10u", config->cache_chapters);
+ vdo_log_debug(" Volume index mean delta: %10u", config->volume_index_mean_delta);
+ vdo_log_debug(" Bytes per page: %10zu", geometry->bytes_per_page);
+ vdo_log_debug(" Sparse sample rate: %10u", config->sparse_sample_rate);
+ vdo_log_debug(" Nonce: %llu", (unsigned long long) config->nonce);
+}
diff --git a/drivers/md/dm-vdo/indexer/config.h b/drivers/md/dm-vdo/indexer/config.h
new file mode 100644
index 000000000000..08507dc2f7a1
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/config.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_CONFIG_H
+#define UDS_CONFIG_H
+
+#include "geometry.h"
+#include "indexer.h"
+#include "io-factory.h"
+
+/*
+ * The uds_configuration records a variety of parameters used to configure a new UDS index. Some
+ * parameters are provided by the client, while others are fixed or derived from user-supplied
+ * values. It is created when an index is created, and it is recorded in the index metadata.
+ */
+
+enum {
+ DEFAULT_VOLUME_INDEX_MEAN_DELTA = 4096,
+ DEFAULT_CACHE_CHAPTERS = 7,
+ DEFAULT_SPARSE_SAMPLE_RATE = 32,
+ MAX_ZONES = 16,
+};
+
+/* A set of configuration parameters for the indexer. */
+struct uds_configuration {
+ /* Storage device for the index */
+ struct block_device *bdev;
+
+ /* The maximum allowable size of the index */
+ size_t size;
+
+ /* The offset where the index should start */
+ off_t offset;
+
+ /* Parameters for the volume */
+
+ /* The volume layout */
+ struct index_geometry *geometry;
+
+ /* Index owner's nonce */
+ u64 nonce;
+
+ /* The number of threads used to process index requests */
+ unsigned int zone_count;
+
+ /* The number of threads used to read volume pages */
+ unsigned int read_threads;
+
+ /* Size of the page cache and sparse chapter index cache in chapters */
+ u32 cache_chapters;
+
+ /* Parameters for the volume index */
+
+ /* The mean delta for the volume index */
+ u32 volume_index_mean_delta;
+
+ /* Sampling rate for sparse indexing */
+ u32 sparse_sample_rate;
+};
+
+/* On-disk structure of data for a version 8.02 index. */
+struct uds_configuration_8_02 {
+ /* Smaller (16), Small (64) or large (256) indices */
+ u32 record_pages_per_chapter;
+ /* Total number of chapters per volume */
+ u32 chapters_per_volume;
+ /* Number of sparse chapters per volume */
+ u32 sparse_chapters_per_volume;
+ /* Size of the page cache, in chapters */
+ u32 cache_chapters;
+ /* Unused field */
+ u32 unused;
+ /* The volume index mean delta to use */
+ u32 volume_index_mean_delta;
+ /* Size of a page, used for both record pages and index pages */
+ u32 bytes_per_page;
+ /* Sampling rate for sparse indexing */
+ u32 sparse_sample_rate;
+ /* Index owner's nonce */
+ u64 nonce;
+ /* Virtual chapter remapped from physical chapter 0 */
+ u64 remapped_virtual;
+ /* New physical chapter which remapped chapter was moved to */
+ u64 remapped_physical;
+} __packed;
+
+/* On-disk structure of data for a version 6.02 index. */
+struct uds_configuration_6_02 {
+ /* Smaller (16), Small (64) or large (256) indices */
+ u32 record_pages_per_chapter;
+ /* Total number of chapters per volume */
+ u32 chapters_per_volume;
+ /* Number of sparse chapters per volume */
+ u32 sparse_chapters_per_volume;
+ /* Size of the page cache, in chapters */
+ u32 cache_chapters;
+ /* Unused field */
+ u32 unused;
+ /* The volume index mean delta to use */
+ u32 volume_index_mean_delta;
+ /* Size of a page, used for both record pages and index pages */
+ u32 bytes_per_page;
+ /* Sampling rate for sparse indexing */
+ u32 sparse_sample_rate;
+ /* Index owner's nonce */
+ u64 nonce;
+} __packed;
+
+int __must_check uds_make_configuration(const struct uds_parameters *params,
+ struct uds_configuration **config_ptr);
+
+void uds_free_configuration(struct uds_configuration *config);
+
+int __must_check uds_validate_config_contents(struct buffered_reader *reader,
+ struct uds_configuration *config);
+
+int __must_check uds_write_config_contents(struct buffered_writer *writer,
+ struct uds_configuration *config, u32 version);
+
+void uds_log_configuration(struct uds_configuration *config);
+
+#endif /* UDS_CONFIG_H */
diff --git a/drivers/md/dm-vdo/indexer/delta-index.c b/drivers/md/dm-vdo/indexer/delta-index.c
new file mode 100644
index 000000000000..0ac2443f0df3
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/delta-index.c
@@ -0,0 +1,1970 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+#include "delta-index.h"
+
+#include <linux/bitops.h>
+#include <linux/bits.h>
+#include <linux/compiler.h>
+#include <linux/limits.h>
+#include <linux/log2.h>
+
+#include "cpu.h"
+#include "errors.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "numeric.h"
+#include "permassert.h"
+#include "string-utils.h"
+#include "time-utils.h"
+
+#include "config.h"
+#include "indexer.h"
+
+/*
+ * The entries in a delta index could be stored in a single delta list, but to reduce search times
+ * and update costs it uses multiple delta lists. These lists are stored in a single chunk of
+ * memory managed by the delta_zone structure. The delta_zone can move the data around within its
+ * memory, so the location of each delta list is recorded as a bit offset into the memory. Because
+ * the volume index can contain over a million delta lists, we want to be efficient with the size
+ * of the delta list header information. This information is encoded into 16 bytes per list. The
+ * volume index delta list memory can easily exceed 4 gigabits, so a 64 bit value is needed to
+ * address the memory. The volume index delta lists average around 6 kilobits, so 16 bits are
+ * sufficient to store the size of a delta list.
+ *
+ * Each delta list is stored as a bit stream. Within the delta list encoding, bits and bytes are
+ * numbered in little endian order. Within a byte, bit 0 is the least significant bit (0x1), and
+ * bit 7 is the most significant bit (0x80). Within a bit stream, bit 7 is the most significant bit
+ * of byte 0, and bit 8 is the least significant bit of byte 1. Within a byte array, a byte's
+ * number corresponds to its index in the array.
+ *
+ * A standard delta list entry is stored as a fixed length payload (the value) followed by a
+ * variable length key (the delta). A collision entry is used when two block names have the same
+ * delta list address. A collision entry always follows a standard entry for the hash with which it
+ * collides, and is encoded with DELTA == 0 with an additional 256 bits field at the end,
+ * containing the full block name. An entry with a delta of 0 at the beginning of a delta list
+ * indicates a normal entry.
+ *
+ * The delta in each entry is encoded with a variable-length Huffman code to minimize the memory
+ * used by small deltas. The Huffman code is specified by three parameters, which can be computed
+ * from the desired mean delta when the index is full. (See compute_coding_constants() for
+ * details.)
+ *
+ * The bit field utilities used to read and write delta entries assume that it is possible to read
+ * some bytes beyond the end of the bit field, so a delta_zone memory allocation is guarded by two
+ * invalid delta lists to prevent reading outside the delta_zone memory. The valid delta lists are
+ * numbered 1 to N, and the guard lists are numbered 0 and N+1. The function to decode the bit
+ * stream include a step that skips over bits set to 0 until the first 1 bit is found. A corrupted
+ * delta list could cause this step to run off the end of the delta_zone memory, so as extra
+ * protection against this happening, the tail guard list is set to all ones.
+ *
+ * The delta_index supports two different forms. The mutable form is created by
+ * uds_initialize_delta_index(), and is used for the volume index and for open chapter indexes. The
+ * immutable form is created by uds_initialize_delta_index_page(), and is used for closed (and
+ * cached) chapter index pages. The immutable form does not allocate delta list headers or
+ * temporary offsets, and thus is somewhat more memory efficient.
+ */
+
+/*
+ * This is the largest field size supported by get_field() and set_field(). Any field that is
+ * larger is not guaranteed to fit in a single byte-aligned u32.
+ */
+#define MAX_FIELD_BITS ((sizeof(u32) - 1) * BITS_PER_BYTE + 1)
+
+/*
+ * This is the largest field size supported by get_big_field() and set_big_field(). Any field that
+ * is larger is not guaranteed to fit in a single byte-aligned u64.
+ */
+#define MAX_BIG_FIELD_BITS ((sizeof(u64) - 1) * BITS_PER_BYTE + 1)
+
+/*
+ * This is the number of guard bytes needed at the end of the memory byte array when using the bit
+ * utilities. These utilities call get_big_field() and set_big_field(), which can access up to 7
+ * bytes beyond the end of the desired field. The definition is written to make it clear how this
+ * value is derived.
+ */
+#define POST_FIELD_GUARD_BYTES (sizeof(u64) - 1)
+
+/* The number of guard bits that are needed in the tail guard list */
+#define GUARD_BITS (POST_FIELD_GUARD_BYTES * BITS_PER_BYTE)
+
+/*
+ * The maximum size of a single delta list in bytes. We count guard bytes in this value because a
+ * buffer of this size can be used with move_bits().
+ */
+#define DELTA_LIST_MAX_BYTE_COUNT \
+ ((U16_MAX + BITS_PER_BYTE) / BITS_PER_BYTE + POST_FIELD_GUARD_BYTES)
+
+/* The number of extra bytes and bits needed to store a collision entry */
+#define COLLISION_BYTES UDS_RECORD_NAME_SIZE
+#define COLLISION_BITS (COLLISION_BYTES * BITS_PER_BYTE)
+
+/*
+ * Immutable delta lists are packed into pages containing a header that encodes the delta list
+ * information into 19 bits per list (64KB bit offset).
+ */
+#define IMMUTABLE_HEADER_SIZE 19
+
+/*
+ * Constants and structures for the saved delta index. "DI" is for delta_index, and -##### is a
+ * number to increment when the format of the data changes.
+ */
+#define MAGIC_SIZE 8
+
+static const char DELTA_INDEX_MAGIC[] = "DI-00002";
+
+struct delta_index_header {
+ char magic[MAGIC_SIZE];
+ u32 zone_number;
+ u32 zone_count;
+ u32 first_list;
+ u32 list_count;
+ u64 record_count;
+ u64 collision_count;
+};
+
+/*
+ * Header data used for immutable delta index pages. This data is followed by the delta list offset
+ * table.
+ */
+struct delta_page_header {
+ /* Externally-defined nonce */
+ u64 nonce;
+ /* The virtual chapter number */
+ u64 virtual_chapter_number;
+ /* Index of the first delta list on the page */
+ u16 first_list;
+ /* Number of delta lists on the page */
+ u16 list_count;
+} __packed;
+
+static inline u64 get_delta_list_byte_start(const struct delta_list *delta_list)
+{
+ return delta_list->start / BITS_PER_BYTE;
+}
+
+static inline u16 get_delta_list_byte_size(const struct delta_list *delta_list)
+{
+ unsigned int bit_offset = delta_list->start % BITS_PER_BYTE;
+
+ return BITS_TO_BYTES(bit_offset + delta_list->size);
+}
+
+static void rebalance_delta_zone(const struct delta_zone *delta_zone, u32 first,
+ u32 last)
+{
+ struct delta_list *delta_list;
+ u64 new_start;
+
+ if (first == last) {
+ /* Only one list is moving, and we know there is space. */
+ delta_list = &delta_zone->delta_lists[first];
+ new_start = delta_zone->new_offsets[first];
+ if (delta_list->start != new_start) {
+ u64 source;
+ u64 destination;
+
+ source = get_delta_list_byte_start(delta_list);
+ delta_list->start = new_start;
+ destination = get_delta_list_byte_start(delta_list);
+ memmove(delta_zone->memory + destination,
+ delta_zone->memory + source,
+ get_delta_list_byte_size(delta_list));
+ }
+ } else {
+ /*
+ * There is more than one list. Divide the problem in half, and use recursive calls
+ * to process each half. Note that after this computation, first <= middle, and
+ * middle < last.
+ */
+ u32 middle = (first + last) / 2;
+
+ delta_list = &delta_zone->delta_lists[middle];
+ new_start = delta_zone->new_offsets[middle];
+
+ /*
+ * The direction that our middle list is moving determines which half of the
+ * problem must be processed first.
+ */
+ if (new_start > delta_list->start) {
+ rebalance_delta_zone(delta_zone, middle + 1, last);
+ rebalance_delta_zone(delta_zone, first, middle);
+ } else {
+ rebalance_delta_zone(delta_zone, first, middle);
+ rebalance_delta_zone(delta_zone, middle + 1, last);
+ }
+ }
+}
+
+static inline size_t get_zone_memory_size(unsigned int zone_count, size_t memory_size)
+{
+ /* Round up so that each zone is a multiple of 64K in size. */
+ size_t ALLOC_BOUNDARY = 64 * 1024;
+
+ return (memory_size / zone_count + ALLOC_BOUNDARY - 1) & -ALLOC_BOUNDARY;
+}
+
+void uds_reset_delta_index(const struct delta_index *delta_index)
+{
+ unsigned int z;
+
+ /*
+ * Initialize all delta lists to be empty. We keep 2 extra delta list descriptors, one
+ * before the first real entry and one after so that we don't need to bounds check the
+ * array access when calculating preceding and following gap sizes.
+ */
+ for (z = 0; z < delta_index->zone_count; z++) {
+ u64 list_bits;
+ u64 spacing;
+ u64 offset;
+ unsigned int i;
+ struct delta_zone *zone = &delta_index->delta_zones[z];
+ struct delta_list *delta_lists = zone->delta_lists;
+
+ /* Zeroing the delta list headers initializes the head guard list correctly. */
+ memset(delta_lists, 0,
+ (zone->list_count + 2) * sizeof(struct delta_list));
+
+ /* Set all the bits in the end guard list. */
+ list_bits = (u64) zone->size * BITS_PER_BYTE - GUARD_BITS;
+ delta_lists[zone->list_count + 1].start = list_bits;
+ delta_lists[zone->list_count + 1].size = GUARD_BITS;
+ memset(zone->memory + (list_bits / BITS_PER_BYTE), ~0,
+ POST_FIELD_GUARD_BYTES);
+
+ /* Evenly space out the real delta lists by setting regular offsets. */
+ spacing = list_bits / zone->list_count;
+ offset = spacing / 2;
+ for (i = 1; i <= zone->list_count; i++) {
+ delta_lists[i].start = offset;
+ offset += spacing;
+ }
+
+ /* Update the statistics. */
+ zone->discard_count += zone->record_count;
+ zone->record_count = 0;
+ zone->collision_count = 0;
+ }
+}
+
+/* Compute the Huffman coding parameters for the given mean delta. The Huffman code is specified by
+ * three parameters:
+ *
+ * MINBITS The number of bits in the smallest code
+ * BASE The number of values coded using a code of length MINBITS
+ * INCR The number of values coded by using one additional bit
+ *
+ * These parameters are related by this equation:
+ *
+ * BASE + INCR == 1 << MINBITS
+ *
+ * The math for the Huffman code of an exponential distribution says that
+ *
+ * INCR = log(2) * MEAN_DELTA
+ *
+ * Then use the smallest MINBITS value so that
+ *
+ * (1 << MINBITS) > INCR
+ *
+ * And then
+ *
+ * BASE = (1 << MINBITS) - INCR
+ *
+ * Now the index can generate a code such that
+ * - The first BASE values code using MINBITS bits.
+ * - The next INCR values code using MINBITS+1 bits.
+ * - The next INCR values code using MINBITS+2 bits.
+ * - (and so on).
+ */
+static void compute_coding_constants(u32 mean_delta, u16 *min_bits, u32 *min_keys, u32 *incr_keys)
+{
+ /*
+ * We want to compute the rounded value of log(2) * mean_delta. Since we cannot always use
+ * floating point, use a really good integer approximation.
+ */
+ *incr_keys = (836158UL * mean_delta + 603160UL) / 1206321UL;
+ *min_bits = bits_per(*incr_keys + 1);
+ *min_keys = (1 << *min_bits) - *incr_keys;
+}
+
+void uds_uninitialize_delta_index(struct delta_index *delta_index)
+{
+ unsigned int z;
+
+ if (delta_index->delta_zones == NULL)
+ return;
+
+ for (z = 0; z < delta_index->zone_count; z++) {
+ vdo_free(vdo_forget(delta_index->delta_zones[z].new_offsets));
+ vdo_free(vdo_forget(delta_index->delta_zones[z].delta_lists));
+ vdo_free(vdo_forget(delta_index->delta_zones[z].memory));
+ }
+
+ vdo_free(delta_index->delta_zones);
+ memset(delta_index, 0, sizeof(struct delta_index));
+}
+
+static int initialize_delta_zone(struct delta_zone *delta_zone, size_t size,
+ u32 first_list, u32 list_count, u32 mean_delta,
+ u32 payload_bits, u8 tag)
+{
+ int result;
+
+ result = vdo_allocate(size, u8, "delta list", &delta_zone->memory);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(list_count + 2, u64, "delta list temp",
+ &delta_zone->new_offsets);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /* Allocate the delta lists. */
+ result = vdo_allocate(list_count + 2, struct delta_list, "delta lists",
+ &delta_zone->delta_lists);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ compute_coding_constants(mean_delta, &delta_zone->min_bits,
+ &delta_zone->min_keys, &delta_zone->incr_keys);
+ delta_zone->value_bits = payload_bits;
+ delta_zone->buffered_writer = NULL;
+ delta_zone->size = size;
+ delta_zone->rebalance_time = 0;
+ delta_zone->rebalance_count = 0;
+ delta_zone->record_count = 0;
+ delta_zone->collision_count = 0;
+ delta_zone->discard_count = 0;
+ delta_zone->overflow_count = 0;
+ delta_zone->first_list = first_list;
+ delta_zone->list_count = list_count;
+ delta_zone->tag = tag;
+
+ return UDS_SUCCESS;
+}
+
+int uds_initialize_delta_index(struct delta_index *delta_index, unsigned int zone_count,
+ u32 list_count, u32 mean_delta, u32 payload_bits,
+ size_t memory_size, u8 tag)
+{
+ int result;
+ unsigned int z;
+ size_t zone_memory;
+
+ result = vdo_allocate(zone_count, struct delta_zone, "Delta Index Zones",
+ &delta_index->delta_zones);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ delta_index->zone_count = zone_count;
+ delta_index->list_count = list_count;
+ delta_index->lists_per_zone = DIV_ROUND_UP(list_count, zone_count);
+ delta_index->memory_size = 0;
+ delta_index->mutable = true;
+ delta_index->tag = tag;
+
+ for (z = 0; z < zone_count; z++) {
+ u32 lists_in_zone = delta_index->lists_per_zone;
+ u32 first_list_in_zone = z * lists_in_zone;
+
+ if (z == zone_count - 1) {
+ /*
+ * The last zone gets fewer lists if zone_count doesn't evenly divide
+ * list_count. We'll have an underflow if the assertion below doesn't hold.
+ */
+ if (delta_index->list_count <= first_list_in_zone) {
+ uds_uninitialize_delta_index(delta_index);
+ return vdo_log_error_strerror(UDS_INVALID_ARGUMENT,
+ "%u delta lists not enough for %u zones",
+ list_count, zone_count);
+ }
+ lists_in_zone = delta_index->list_count - first_list_in_zone;
+ }
+
+ zone_memory = get_zone_memory_size(zone_count, memory_size);
+ result = initialize_delta_zone(&delta_index->delta_zones[z], zone_memory,
+ first_list_in_zone, lists_in_zone,
+ mean_delta, payload_bits, tag);
+ if (result != UDS_SUCCESS) {
+ uds_uninitialize_delta_index(delta_index);
+ return result;
+ }
+
+ delta_index->memory_size +=
+ (sizeof(struct delta_zone) + zone_memory +
+ (lists_in_zone + 2) * (sizeof(struct delta_list) + sizeof(u64)));
+ }
+
+ uds_reset_delta_index(delta_index);
+ return UDS_SUCCESS;
+}
+
+/* Read a bit field from an arbitrary bit boundary. */
+static inline u32 get_field(const u8 *memory, u64 offset, u8 size)
+{
+ const void *addr = memory + offset / BITS_PER_BYTE;
+
+ return (get_unaligned_le32(addr) >> (offset % BITS_PER_BYTE)) & ((1 << size) - 1);
+}
+
+/* Write a bit field to an arbitrary bit boundary. */
+static inline void set_field(u32 value, u8 *memory, u64 offset, u8 size)
+{
+ void *addr = memory + offset / BITS_PER_BYTE;
+ int shift = offset % BITS_PER_BYTE;
+ u32 data = get_unaligned_le32(addr);
+
+ data &= ~(((1 << size) - 1) << shift);
+ data |= value << shift;
+ put_unaligned_le32(data, addr);
+}
+
+/* Get the bit offset to the immutable delta list header. */
+static inline u32 get_immutable_header_offset(u32 list_number)
+{
+ return sizeof(struct delta_page_header) * BITS_PER_BYTE +
+ list_number * IMMUTABLE_HEADER_SIZE;
+}
+
+/* Get the bit offset to the start of the immutable delta list bit stream. */
+static inline u32 get_immutable_start(const u8 *memory, u32 list_number)
+{
+ return get_field(memory, get_immutable_header_offset(list_number),
+ IMMUTABLE_HEADER_SIZE);
+}
+
+/* Set the bit offset to the start of the immutable delta list bit stream. */
+static inline void set_immutable_start(u8 *memory, u32 list_number, u32 start)
+{
+ set_field(start, memory, get_immutable_header_offset(list_number),
+ IMMUTABLE_HEADER_SIZE);
+}
+
+static bool verify_delta_index_page(u64 nonce, u16 list_count, u64 expected_nonce,
+ u8 *memory, size_t memory_size)
+{
+ unsigned int i;
+
+ /*
+ * Verify the nonce. A mismatch can happen here during rebuild if we haven't written the
+ * entire volume at least once.
+ */
+ if (nonce != expected_nonce)
+ return false;
+
+ /* Verify that the number of delta lists can fit in the page. */
+ if (list_count > ((memory_size - sizeof(struct delta_page_header)) *
+ BITS_PER_BYTE / IMMUTABLE_HEADER_SIZE))
+ return false;
+
+ /*
+ * Verify that the first delta list is immediately after the last delta
+ * list header.
+ */
+ if (get_immutable_start(memory, 0) != get_immutable_header_offset(list_count + 1))
+ return false;
+
+ /* Verify that the lists are in the correct order. */
+ for (i = 0; i < list_count; i++) {
+ if (get_immutable_start(memory, i) > get_immutable_start(memory, i + 1))
+ return false;
+ }
+
+ /*
+ * Verify that the last list ends on the page, and that there is room
+ * for the post-field guard bits.
+ */
+ if (get_immutable_start(memory, list_count) >
+ (memory_size - POST_FIELD_GUARD_BYTES) * BITS_PER_BYTE)
+ return false;
+
+ /* Verify that the guard bytes are correctly set to all ones. */
+ for (i = 0; i < POST_FIELD_GUARD_BYTES; i++) {
+ if (memory[memory_size - POST_FIELD_GUARD_BYTES + i] != (u8) ~0)
+ return false;
+ }
+
+ /* All verifications passed. */
+ return true;
+}
+
+/* Initialize a delta index page to refer to a supplied page. */
+int uds_initialize_delta_index_page(struct delta_index_page *delta_index_page,
+ u64 expected_nonce, u32 mean_delta, u32 payload_bits,
+ u8 *memory, size_t memory_size)
+{
+ u64 nonce;
+ u64 vcn;
+ u64 first_list;
+ u64 list_count;
+ struct delta_page_header *header = (struct delta_page_header *) memory;
+ struct delta_zone *delta_zone = &delta_index_page->delta_zone;
+ const u8 *nonce_addr = (const u8 *) &header->nonce;
+ const u8 *vcn_addr = (const u8 *) &header->virtual_chapter_number;
+ const u8 *first_list_addr = (const u8 *) &header->first_list;
+ const u8 *list_count_addr = (const u8 *) &header->list_count;
+
+ /* First assume that the header is little endian. */
+ nonce = get_unaligned_le64(nonce_addr);
+ vcn = get_unaligned_le64(vcn_addr);
+ first_list = get_unaligned_le16(first_list_addr);
+ list_count = get_unaligned_le16(list_count_addr);
+ if (!verify_delta_index_page(nonce, list_count, expected_nonce, memory,
+ memory_size)) {
+ /* If that fails, try big endian. */
+ nonce = get_unaligned_be64(nonce_addr);
+ vcn = get_unaligned_be64(vcn_addr);
+ first_list = get_unaligned_be16(first_list_addr);
+ list_count = get_unaligned_be16(list_count_addr);
+ if (!verify_delta_index_page(nonce, list_count, expected_nonce, memory,
+ memory_size)) {
+ /*
+ * Both attempts failed. Do not log this as an error, because it can happen
+ * during a rebuild if we haven't written the entire volume at least once.
+ */
+ return UDS_CORRUPT_DATA;
+ }
+ }
+
+ delta_index_page->delta_index.delta_zones = delta_zone;
+ delta_index_page->delta_index.zone_count = 1;
+ delta_index_page->delta_index.list_count = list_count;
+ delta_index_page->delta_index.lists_per_zone = list_count;
+ delta_index_page->delta_index.mutable = false;
+ delta_index_page->delta_index.tag = 'p';
+ delta_index_page->virtual_chapter_number = vcn;
+ delta_index_page->lowest_list_number = first_list;
+ delta_index_page->highest_list_number = first_list + list_count - 1;
+
+ compute_coding_constants(mean_delta, &delta_zone->min_bits,
+ &delta_zone->min_keys, &delta_zone->incr_keys);
+ delta_zone->value_bits = payload_bits;
+ delta_zone->memory = memory;
+ delta_zone->delta_lists = NULL;
+ delta_zone->new_offsets = NULL;
+ delta_zone->buffered_writer = NULL;
+ delta_zone->size = memory_size;
+ delta_zone->rebalance_time = 0;
+ delta_zone->rebalance_count = 0;
+ delta_zone->record_count = 0;
+ delta_zone->collision_count = 0;
+ delta_zone->discard_count = 0;
+ delta_zone->overflow_count = 0;
+ delta_zone->first_list = 0;
+ delta_zone->list_count = list_count;
+ delta_zone->tag = 'p';
+
+ return UDS_SUCCESS;
+}
+
+/* Read a large bit field from an arbitrary bit boundary. */
+static inline u64 get_big_field(const u8 *memory, u64 offset, u8 size)
+{
+ const void *addr = memory + offset / BITS_PER_BYTE;
+
+ return (get_unaligned_le64(addr) >> (offset % BITS_PER_BYTE)) & ((1UL << size) - 1);
+}
+
+/* Write a large bit field to an arbitrary bit boundary. */
+static inline void set_big_field(u64 value, u8 *memory, u64 offset, u8 size)
+{
+ void *addr = memory + offset / BITS_PER_BYTE;
+ u8 shift = offset % BITS_PER_BYTE;
+ u64 data = get_unaligned_le64(addr);
+
+ data &= ~(((1UL << size) - 1) << shift);
+ data |= value << shift;
+ put_unaligned_le64(data, addr);
+}
+
+/* Set a sequence of bits to all zeros. */
+static inline void set_zero(u8 *memory, u64 offset, u32 size)
+{
+ if (size > 0) {
+ u8 *addr = memory + offset / BITS_PER_BYTE;
+ u8 shift = offset % BITS_PER_BYTE;
+ u32 count = size + shift > BITS_PER_BYTE ? (u32) BITS_PER_BYTE - shift : size;
+
+ *addr++ &= ~(((1 << count) - 1) << shift);
+ for (size -= count; size > BITS_PER_BYTE; size -= BITS_PER_BYTE)
+ *addr++ = 0;
+
+ if (size > 0)
+ *addr &= 0xFF << size;
+ }
+}
+
+/*
+ * Move several bits from a higher to a lower address, moving the lower addressed bits first. The
+ * size and memory offsets are measured in bits.
+ */
+static void move_bits_down(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size)
+{
+ const u8 *source;
+ u8 *destination;
+ u8 offset;
+ u8 count;
+ u64 field;
+
+ /* Start by moving one field that ends on a to int boundary. */
+ count = (MAX_BIG_FIELD_BITS - ((to_offset + MAX_BIG_FIELD_BITS) % BITS_PER_TYPE(u32)));
+ field = get_big_field(from, from_offset, count);
+ set_big_field(field, to, to_offset, count);
+ from_offset += count;
+ to_offset += count;
+ size -= count;
+
+ /* Now do the main loop to copy 32 bit chunks that are int-aligned at the destination. */
+ offset = from_offset % BITS_PER_TYPE(u32);
+ source = from + (from_offset - offset) / BITS_PER_BYTE;
+ destination = to + to_offset / BITS_PER_BYTE;
+ while (size > MAX_BIG_FIELD_BITS) {
+ put_unaligned_le32(get_unaligned_le64(source) >> offset, destination);
+ source += sizeof(u32);
+ destination += sizeof(u32);
+ from_offset += BITS_PER_TYPE(u32);
+ to_offset += BITS_PER_TYPE(u32);
+ size -= BITS_PER_TYPE(u32);
+ }
+
+ /* Finish up by moving any remaining bits. */
+ if (size > 0) {
+ field = get_big_field(from, from_offset, size);
+ set_big_field(field, to, to_offset, size);
+ }
+}
+
+/*
+ * Move several bits from a lower to a higher address, moving the higher addressed bits first. The
+ * size and memory offsets are measured in bits.
+ */
+static void move_bits_up(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size)
+{
+ const u8 *source;
+ u8 *destination;
+ u8 offset;
+ u8 count;
+ u64 field;
+
+ /* Start by moving one field that begins on a destination int boundary. */
+ count = (to_offset + size) % BITS_PER_TYPE(u32);
+ if (count > 0) {
+ size -= count;
+ field = get_big_field(from, from_offset + size, count);
+ set_big_field(field, to, to_offset + size, count);
+ }
+
+ /* Now do the main loop to copy 32 bit chunks that are int-aligned at the destination. */
+ offset = (from_offset + size) % BITS_PER_TYPE(u32);
+ source = from + (from_offset + size - offset) / BITS_PER_BYTE;
+ destination = to + (to_offset + size) / BITS_PER_BYTE;
+ while (size > MAX_BIG_FIELD_BITS) {
+ source -= sizeof(u32);
+ destination -= sizeof(u32);
+ size -= BITS_PER_TYPE(u32);
+ put_unaligned_le32(get_unaligned_le64(source) >> offset, destination);
+ }
+
+ /* Finish up by moving any remaining bits. */
+ if (size > 0) {
+ field = get_big_field(from, from_offset, size);
+ set_big_field(field, to, to_offset, size);
+ }
+}
+
+/*
+ * Move bits from one field to another. When the fields overlap, behave as if we first move all the
+ * bits from the source to a temporary value, and then move all the bits from the temporary value
+ * to the destination. The size and memory offsets are measured in bits.
+ */
+static void move_bits(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size)
+{
+ u64 field;
+
+ /* A small move doesn't require special handling. */
+ if (size <= MAX_BIG_FIELD_BITS) {
+ if (size > 0) {
+ field = get_big_field(from, from_offset, size);
+ set_big_field(field, to, to_offset, size);
+ }
+
+ return;
+ }
+
+ if (from_offset > to_offset)
+ move_bits_down(from, from_offset, to, to_offset, size);
+ else
+ move_bits_up(from, from_offset, to, to_offset, size);
+}
+
+/*
+ * Pack delta lists from a mutable delta index into an immutable delta index page. A range of delta
+ * lists (starting with a specified list index) is copied from the mutable delta index into a
+ * memory page used in the immutable index. The number of lists copied onto the page is returned in
+ * list_count.
+ */
+int uds_pack_delta_index_page(const struct delta_index *delta_index, u64 header_nonce,
+ u8 *memory, size_t memory_size, u64 virtual_chapter_number,
+ u32 first_list, u32 *list_count)
+{
+ const struct delta_zone *delta_zone;
+ struct delta_list *delta_lists;
+ u32 max_lists;
+ u32 n_lists = 0;
+ u32 offset;
+ u32 i;
+ int free_bits;
+ int bits;
+ struct delta_page_header *header;
+
+ delta_zone = &delta_index->delta_zones[0];
+ delta_lists = &delta_zone->delta_lists[first_list + 1];
+ max_lists = delta_index->list_count - first_list;
+
+ /*
+ * Compute how many lists will fit on the page. Subtract the size of the fixed header, one
+ * delta list offset, and the guard bytes from the page size to determine how much space is
+ * available for delta lists.
+ */
+ free_bits = memory_size * BITS_PER_BYTE;
+ free_bits -= get_immutable_header_offset(1);
+ free_bits -= GUARD_BITS;
+ if (free_bits < IMMUTABLE_HEADER_SIZE) {
+ /* This page is too small to store any delta lists. */
+ return vdo_log_error_strerror(UDS_OVERFLOW,
+ "Chapter Index Page of %zu bytes is too small",
+ memory_size);
+ }
+
+ while (n_lists < max_lists) {
+ /* Each list requires a delta list offset and the list data. */
+ bits = IMMUTABLE_HEADER_SIZE + delta_lists[n_lists].size;
+ if (bits > free_bits)
+ break;
+
+ n_lists++;
+ free_bits -= bits;
+ }
+
+ *list_count = n_lists;
+
+ header = (struct delta_page_header *) memory;
+ put_unaligned_le64(header_nonce, (u8 *) &header->nonce);
+ put_unaligned_le64(virtual_chapter_number,
+ (u8 *) &header->virtual_chapter_number);
+ put_unaligned_le16(first_list, (u8 *) &header->first_list);
+ put_unaligned_le16(n_lists, (u8 *) &header->list_count);
+
+ /* Construct the delta list offset table. */
+ offset = get_immutable_header_offset(n_lists + 1);
+ set_immutable_start(memory, 0, offset);
+ for (i = 0; i < n_lists; i++) {
+ offset += delta_lists[i].size;
+ set_immutable_start(memory, i + 1, offset);
+ }
+
+ /* Copy the delta list data onto the memory page. */
+ for (i = 0; i < n_lists; i++) {
+ move_bits(delta_zone->memory, delta_lists[i].start, memory,
+ get_immutable_start(memory, i), delta_lists[i].size);
+ }
+
+ /* Set all the bits in the guard bytes. */
+ memset(memory + memory_size - POST_FIELD_GUARD_BYTES, ~0,
+ POST_FIELD_GUARD_BYTES);
+ return UDS_SUCCESS;
+}
+
+/* Compute the new offsets of the delta lists. */
+static void compute_new_list_offsets(struct delta_zone *delta_zone, u32 growing_index,
+ size_t growing_size, size_t used_space)
+{
+ size_t spacing;
+ u32 i;
+ struct delta_list *delta_lists = delta_zone->delta_lists;
+ u32 tail_guard_index = delta_zone->list_count + 1;
+
+ spacing = (delta_zone->size - used_space) / delta_zone->list_count;
+ delta_zone->new_offsets[0] = 0;
+ for (i = 0; i <= delta_zone->list_count; i++) {
+ delta_zone->new_offsets[i + 1] =
+ (delta_zone->new_offsets[i] +
+ get_delta_list_byte_size(&delta_lists[i]) + spacing);
+ delta_zone->new_offsets[i] *= BITS_PER_BYTE;
+ delta_zone->new_offsets[i] += delta_lists[i].start % BITS_PER_BYTE;
+ if (i == 0)
+ delta_zone->new_offsets[i + 1] -= spacing / 2;
+ if (i + 1 == growing_index)
+ delta_zone->new_offsets[i + 1] += growing_size;
+ }
+
+ delta_zone->new_offsets[tail_guard_index] =
+ (delta_zone->size * BITS_PER_BYTE - delta_lists[tail_guard_index].size);
+}
+
+static void rebalance_lists(struct delta_zone *delta_zone)
+{
+ struct delta_list *delta_lists;
+ u32 i;
+ size_t used_space = 0;
+
+ /* Extend and balance memory to receive the delta lists */
+ delta_lists = delta_zone->delta_lists;
+ for (i = 0; i <= delta_zone->list_count + 1; i++)
+ used_space += get_delta_list_byte_size(&delta_lists[i]);
+
+ compute_new_list_offsets(delta_zone, 0, 0, used_space);
+ for (i = 1; i <= delta_zone->list_count + 1; i++)
+ delta_lists[i].start = delta_zone->new_offsets[i];
+}
+
+/* Start restoring a delta index from multiple input streams. */
+int uds_start_restoring_delta_index(struct delta_index *delta_index,
+ struct buffered_reader **buffered_readers,
+ unsigned int reader_count)
+{
+ int result;
+ unsigned int zone_count = reader_count;
+ u64 record_count = 0;
+ u64 collision_count = 0;
+ u32 first_list[MAX_ZONES];
+ u32 list_count[MAX_ZONES];
+ unsigned int z;
+ u32 list_next = 0;
+ const struct delta_zone *delta_zone;
+
+ /* Read and validate each header. */
+ for (z = 0; z < zone_count; z++) {
+ struct delta_index_header header;
+ u8 buffer[sizeof(struct delta_index_header)];
+ size_t offset = 0;
+
+ result = uds_read_from_buffered_reader(buffered_readers[z], buffer,
+ sizeof(buffer));
+ if (result != UDS_SUCCESS) {
+ return vdo_log_warning_strerror(result,
+ "failed to read delta index header");
+ }
+
+ memcpy(&header.magic, buffer, MAGIC_SIZE);
+ offset += MAGIC_SIZE;
+ decode_u32_le(buffer, &offset, &header.zone_number);
+ decode_u32_le(buffer, &offset, &header.zone_count);
+ decode_u32_le(buffer, &offset, &header.first_list);
+ decode_u32_le(buffer, &offset, &header.list_count);
+ decode_u64_le(buffer, &offset, &header.record_count);
+ decode_u64_le(buffer, &offset, &header.collision_count);
+
+ result = VDO_ASSERT(offset == sizeof(struct delta_index_header),
+ "%zu bytes decoded of %zu expected", offset,
+ sizeof(struct delta_index_header));
+ if (result != VDO_SUCCESS) {
+ return vdo_log_warning_strerror(result,
+ "failed to read delta index header");
+ }
+
+ if (memcmp(header.magic, DELTA_INDEX_MAGIC, MAGIC_SIZE) != 0) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "delta index file has bad magic number");
+ }
+
+ if (zone_count != header.zone_count) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "delta index files contain mismatched zone counts (%u,%u)",
+ zone_count, header.zone_count);
+ }
+
+ if (header.zone_number != z) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "delta index zone %u found in slot %u",
+ header.zone_number, z);
+ }
+
+ first_list[z] = header.first_list;
+ list_count[z] = header.list_count;
+ record_count += header.record_count;
+ collision_count += header.collision_count;
+
+ if (first_list[z] != list_next) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "delta index file for zone %u starts with list %u instead of list %u",
+ z, first_list[z], list_next);
+ }
+
+ list_next += list_count[z];
+ }
+
+ if (list_next != delta_index->list_count) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "delta index files contain %u delta lists instead of %u delta lists",
+ list_next, delta_index->list_count);
+ }
+
+ if (collision_count > record_count) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "delta index files contain %llu collisions and %llu records",
+ (unsigned long long) collision_count,
+ (unsigned long long) record_count);
+ }
+
+ uds_reset_delta_index(delta_index);
+ delta_index->delta_zones[0].record_count = record_count;
+ delta_index->delta_zones[0].collision_count = collision_count;
+
+ /* Read the delta lists and distribute them to the proper zones. */
+ for (z = 0; z < zone_count; z++) {
+ u32 i;
+
+ delta_index->load_lists[z] = 0;
+ for (i = 0; i < list_count[z]; i++) {
+ u16 delta_list_size;
+ u32 list_number;
+ unsigned int zone_number;
+ u8 size_data[sizeof(u16)];
+
+ result = uds_read_from_buffered_reader(buffered_readers[z],
+ size_data,
+ sizeof(size_data));
+ if (result != UDS_SUCCESS) {
+ return vdo_log_warning_strerror(result,
+ "failed to read delta index size");
+ }
+
+ delta_list_size = get_unaligned_le16(size_data);
+ if (delta_list_size > 0)
+ delta_index->load_lists[z] += 1;
+
+ list_number = first_list[z] + i;
+ zone_number = list_number / delta_index->lists_per_zone;
+ delta_zone = &delta_index->delta_zones[zone_number];
+ list_number -= delta_zone->first_list;
+ delta_zone->delta_lists[list_number + 1].size = delta_list_size;
+ }
+ }
+
+ /* Prepare each zone to start receiving the delta list data. */
+ for (z = 0; z < delta_index->zone_count; z++)
+ rebalance_lists(&delta_index->delta_zones[z]);
+
+ return UDS_SUCCESS;
+}
+
+static int restore_delta_list_to_zone(struct delta_zone *delta_zone,
+ const struct delta_list_save_info *save_info,
+ const u8 *data)
+{
+ struct delta_list *delta_list;
+ u16 bit_count;
+ u16 byte_count;
+ u32 list_number = save_info->index - delta_zone->first_list;
+
+ if (list_number >= delta_zone->list_count) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "invalid delta list number %u not in range [%u,%u)",
+ save_info->index, delta_zone->first_list,
+ delta_zone->first_list + delta_zone->list_count);
+ }
+
+ delta_list = &delta_zone->delta_lists[list_number + 1];
+ if (delta_list->size == 0) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "unexpected delta list number %u",
+ save_info->index);
+ }
+
+ bit_count = delta_list->size + save_info->bit_offset;
+ byte_count = BITS_TO_BYTES(bit_count);
+ if (save_info->byte_count != byte_count) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "unexpected delta list size %u != %u",
+ save_info->byte_count, byte_count);
+ }
+
+ move_bits(data, save_info->bit_offset, delta_zone->memory, delta_list->start,
+ delta_list->size);
+ return UDS_SUCCESS;
+}
+
+static int restore_delta_list_data(struct delta_index *delta_index, unsigned int load_zone,
+ struct buffered_reader *buffered_reader, u8 *data)
+{
+ int result;
+ struct delta_list_save_info save_info;
+ u8 buffer[sizeof(struct delta_list_save_info)];
+ unsigned int new_zone;
+
+ result = uds_read_from_buffered_reader(buffered_reader, buffer, sizeof(buffer));
+ if (result != UDS_SUCCESS) {
+ return vdo_log_warning_strerror(result,
+ "failed to read delta list data");
+ }
+
+ save_info = (struct delta_list_save_info) {
+ .tag = buffer[0],
+ .bit_offset = buffer[1],
+ .byte_count = get_unaligned_le16(&buffer[2]),
+ .index = get_unaligned_le32(&buffer[4]),
+ };
+
+ if ((save_info.bit_offset >= BITS_PER_BYTE) ||
+ (save_info.byte_count > DELTA_LIST_MAX_BYTE_COUNT)) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "corrupt delta list data");
+ }
+
+ /* Make sure the data is intended for this delta index. */
+ if (save_info.tag != delta_index->tag)
+ return UDS_CORRUPT_DATA;
+
+ if (save_info.index >= delta_index->list_count) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "invalid delta list number %u of %u",
+ save_info.index,
+ delta_index->list_count);
+ }
+
+ result = uds_read_from_buffered_reader(buffered_reader, data,
+ save_info.byte_count);
+ if (result != UDS_SUCCESS) {
+ return vdo_log_warning_strerror(result,
+ "failed to read delta list data");
+ }
+
+ delta_index->load_lists[load_zone] -= 1;
+ new_zone = save_info.index / delta_index->lists_per_zone;
+ return restore_delta_list_to_zone(&delta_index->delta_zones[new_zone],
+ &save_info, data);
+}
+
+/* Restore delta lists from saved data. */
+int uds_finish_restoring_delta_index(struct delta_index *delta_index,
+ struct buffered_reader **buffered_readers,
+ unsigned int reader_count)
+{
+ int result;
+ int saved_result = UDS_SUCCESS;
+ unsigned int z;
+ u8 *data;
+
+ result = vdo_allocate(DELTA_LIST_MAX_BYTE_COUNT, u8, __func__, &data);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ for (z = 0; z < reader_count; z++) {
+ while (delta_index->load_lists[z] > 0) {
+ result = restore_delta_list_data(delta_index, z,
+ buffered_readers[z], data);
+ if (result != UDS_SUCCESS) {
+ saved_result = result;
+ break;
+ }
+ }
+ }
+
+ vdo_free(data);
+ return saved_result;
+}
+
+int uds_check_guard_delta_lists(struct buffered_reader **buffered_readers,
+ unsigned int reader_count)
+{
+ int result;
+ unsigned int z;
+ u8 buffer[sizeof(struct delta_list_save_info)];
+
+ for (z = 0; z < reader_count; z++) {
+ result = uds_read_from_buffered_reader(buffered_readers[z], buffer,
+ sizeof(buffer));
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (buffer[0] != 'z')
+ return UDS_CORRUPT_DATA;
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int flush_delta_list(struct delta_zone *zone, u32 flush_index)
+{
+ struct delta_list *delta_list;
+ u8 buffer[sizeof(struct delta_list_save_info)];
+ int result;
+
+ delta_list = &zone->delta_lists[flush_index + 1];
+
+ buffer[0] = zone->tag;
+ buffer[1] = delta_list->start % BITS_PER_BYTE;
+ put_unaligned_le16(get_delta_list_byte_size(delta_list), &buffer[2]);
+ put_unaligned_le32(zone->first_list + flush_index, &buffer[4]);
+
+ result = uds_write_to_buffered_writer(zone->buffered_writer, buffer,
+ sizeof(buffer));
+ if (result != UDS_SUCCESS) {
+ vdo_log_warning_strerror(result, "failed to write delta list memory");
+ return result;
+ }
+
+ result = uds_write_to_buffered_writer(zone->buffered_writer,
+ zone->memory + get_delta_list_byte_start(delta_list),
+ get_delta_list_byte_size(delta_list));
+ if (result != UDS_SUCCESS)
+ vdo_log_warning_strerror(result, "failed to write delta list memory");
+
+ return result;
+}
+
+/* Start saving a delta index zone to a buffered output stream. */
+int uds_start_saving_delta_index(const struct delta_index *delta_index,
+ unsigned int zone_number,
+ struct buffered_writer *buffered_writer)
+{
+ int result;
+ u32 i;
+ struct delta_zone *delta_zone;
+ u8 buffer[sizeof(struct delta_index_header)];
+ size_t offset = 0;
+
+ delta_zone = &delta_index->delta_zones[zone_number];
+ memcpy(buffer, DELTA_INDEX_MAGIC, MAGIC_SIZE);
+ offset += MAGIC_SIZE;
+ encode_u32_le(buffer, &offset, zone_number);
+ encode_u32_le(buffer, &offset, delta_index->zone_count);
+ encode_u32_le(buffer, &offset, delta_zone->first_list);
+ encode_u32_le(buffer, &offset, delta_zone->list_count);
+ encode_u64_le(buffer, &offset, delta_zone->record_count);
+ encode_u64_le(buffer, &offset, delta_zone->collision_count);
+
+ result = VDO_ASSERT(offset == sizeof(struct delta_index_header),
+ "%zu bytes encoded of %zu expected", offset,
+ sizeof(struct delta_index_header));
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = uds_write_to_buffered_writer(buffered_writer, buffer, offset);
+ if (result != UDS_SUCCESS)
+ return vdo_log_warning_strerror(result,
+ "failed to write delta index header");
+
+ for (i = 0; i < delta_zone->list_count; i++) {
+ u8 data[sizeof(u16)];
+ struct delta_list *delta_list;
+
+ delta_list = &delta_zone->delta_lists[i + 1];
+ put_unaligned_le16(delta_list->size, data);
+ result = uds_write_to_buffered_writer(buffered_writer, data,
+ sizeof(data));
+ if (result != UDS_SUCCESS)
+ return vdo_log_warning_strerror(result,
+ "failed to write delta list size");
+ }
+
+ delta_zone->buffered_writer = buffered_writer;
+ return UDS_SUCCESS;
+}
+
+int uds_finish_saving_delta_index(const struct delta_index *delta_index,
+ unsigned int zone_number)
+{
+ int result;
+ int first_error = UDS_SUCCESS;
+ u32 i;
+ struct delta_zone *delta_zone;
+ struct delta_list *delta_list;
+
+ delta_zone = &delta_index->delta_zones[zone_number];
+ for (i = 0; i < delta_zone->list_count; i++) {
+ delta_list = &delta_zone->delta_lists[i + 1];
+ if (delta_list->size > 0) {
+ result = flush_delta_list(delta_zone, i);
+ if ((result != UDS_SUCCESS) && (first_error == UDS_SUCCESS))
+ first_error = result;
+ }
+ }
+
+ delta_zone->buffered_writer = NULL;
+ return first_error;
+}
+
+int uds_write_guard_delta_list(struct buffered_writer *buffered_writer)
+{
+ int result;
+ u8 buffer[sizeof(struct delta_list_save_info)];
+
+ memset(buffer, 0, sizeof(struct delta_list_save_info));
+ buffer[0] = 'z';
+
+ result = uds_write_to_buffered_writer(buffered_writer, buffer, sizeof(buffer));
+ if (result != UDS_SUCCESS)
+ vdo_log_warning_strerror(result, "failed to write guard delta list");
+
+ return UDS_SUCCESS;
+}
+
+size_t uds_compute_delta_index_save_bytes(u32 list_count, size_t memory_size)
+{
+ /* One zone will use at least as much memory as other zone counts. */
+ return (sizeof(struct delta_index_header) +
+ list_count * (sizeof(struct delta_list_save_info) + 1) +
+ get_zone_memory_size(1, memory_size));
+}
+
+static int assert_not_at_end(const struct delta_index_entry *delta_entry)
+{
+ int result = VDO_ASSERT(!delta_entry->at_end,
+ "operation is invalid because the list entry is at the end of the delta list");
+ if (result != VDO_SUCCESS)
+ result = UDS_BAD_STATE;
+
+ return result;
+}
+
+/*
+ * Prepare to search for an entry in the specified delta list.
+ *
+ * This is always the first function to be called when dealing with delta index entries. It is
+ * always followed by calls to uds_next_delta_index_entry() to iterate through a delta list. The
+ * fields of the delta_index_entry argument will be set up for iteration, but will not contain an
+ * entry from the list.
+ */
+int uds_start_delta_index_search(const struct delta_index *delta_index, u32 list_number,
+ u32 key, struct delta_index_entry *delta_entry)
+{
+ int result;
+ unsigned int zone_number;
+ struct delta_zone *delta_zone;
+ struct delta_list *delta_list;
+
+ result = VDO_ASSERT((list_number < delta_index->list_count),
+ "Delta list number (%u) is out of range (%u)", list_number,
+ delta_index->list_count);
+ if (result != VDO_SUCCESS)
+ return UDS_CORRUPT_DATA;
+
+ zone_number = list_number / delta_index->lists_per_zone;
+ delta_zone = &delta_index->delta_zones[zone_number];
+ list_number -= delta_zone->first_list;
+ result = VDO_ASSERT((list_number < delta_zone->list_count),
+ "Delta list number (%u) is out of range (%u) for zone (%u)",
+ list_number, delta_zone->list_count, zone_number);
+ if (result != VDO_SUCCESS)
+ return UDS_CORRUPT_DATA;
+
+ if (delta_index->mutable) {
+ delta_list = &delta_zone->delta_lists[list_number + 1];
+ } else {
+ u32 end_offset;
+
+ /*
+ * Translate the immutable delta list header into a temporary
+ * full delta list header.
+ */
+ delta_list = &delta_entry->temp_delta_list;
+ delta_list->start = get_immutable_start(delta_zone->memory, list_number);
+ end_offset = get_immutable_start(delta_zone->memory, list_number + 1);
+ delta_list->size = end_offset - delta_list->start;
+ delta_list->save_key = 0;
+ delta_list->save_offset = 0;
+ }
+
+ if (key > delta_list->save_key) {
+ delta_entry->key = delta_list->save_key;
+ delta_entry->offset = delta_list->save_offset;
+ } else {
+ delta_entry->key = 0;
+ delta_entry->offset = 0;
+ if (key == 0) {
+ /*
+ * This usually means we're about to walk the entire delta list, so get all
+ * of it into the CPU cache.
+ */
+ uds_prefetch_range(&delta_zone->memory[delta_list->start / BITS_PER_BYTE],
+ delta_list->size / BITS_PER_BYTE, false);
+ }
+ }
+
+ delta_entry->at_end = false;
+ delta_entry->delta_zone = delta_zone;
+ delta_entry->delta_list = delta_list;
+ delta_entry->entry_bits = 0;
+ delta_entry->is_collision = false;
+ delta_entry->list_number = list_number;
+ delta_entry->list_overflow = false;
+ delta_entry->value_bits = delta_zone->value_bits;
+ return UDS_SUCCESS;
+}
+
+static inline u64 get_delta_entry_offset(const struct delta_index_entry *delta_entry)
+{
+ return delta_entry->delta_list->start + delta_entry->offset;
+}
+
+/*
+ * Decode a delta index entry delta value. The delta_index_entry basically describes the previous
+ * list entry, and has had its offset field changed to point to the subsequent entry. We decode the
+ * bit stream and update the delta_list_entry to describe the entry.
+ */
+static inline void decode_delta(struct delta_index_entry *delta_entry)
+{
+ int key_bits;
+ u32 delta;
+ const struct delta_zone *delta_zone = delta_entry->delta_zone;
+ const u8 *memory = delta_zone->memory;
+ u64 delta_offset = get_delta_entry_offset(delta_entry) + delta_entry->value_bits;
+ const u8 *addr = memory + delta_offset / BITS_PER_BYTE;
+ int offset = delta_offset % BITS_PER_BYTE;
+ u32 data = get_unaligned_le32(addr) >> offset;
+
+ addr += sizeof(u32);
+ key_bits = delta_zone->min_bits;
+ delta = data & ((1 << key_bits) - 1);
+ if (delta >= delta_zone->min_keys) {
+ data >>= key_bits;
+ if (data == 0) {
+ key_bits = sizeof(u32) * BITS_PER_BYTE - offset;
+ while ((data = get_unaligned_le32(addr)) == 0) {
+ addr += sizeof(u32);
+ key_bits += sizeof(u32) * BITS_PER_BYTE;
+ }
+ }
+ key_bits += ffs(data);
+ delta += ((key_bits - delta_zone->min_bits - 1) * delta_zone->incr_keys);
+ }
+ delta_entry->delta = delta;
+ delta_entry->key += delta;
+
+ /* Check for a collision, a delta of zero after the start. */
+ if (unlikely((delta == 0) && (delta_entry->offset > 0))) {
+ delta_entry->is_collision = true;
+ delta_entry->entry_bits = delta_entry->value_bits + key_bits + COLLISION_BITS;
+ } else {
+ delta_entry->is_collision = false;
+ delta_entry->entry_bits = delta_entry->value_bits + key_bits;
+ }
+}
+
+noinline int uds_next_delta_index_entry(struct delta_index_entry *delta_entry)
+{
+ int result;
+ const struct delta_list *delta_list;
+ u32 next_offset;
+ u16 size;
+
+ result = assert_not_at_end(delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ delta_list = delta_entry->delta_list;
+ delta_entry->offset += delta_entry->entry_bits;
+ size = delta_list->size;
+ if (unlikely(delta_entry->offset >= size)) {
+ delta_entry->at_end = true;
+ delta_entry->delta = 0;
+ delta_entry->is_collision = false;
+ result = VDO_ASSERT((delta_entry->offset == size),
+ "next offset past end of delta list");
+ if (result != VDO_SUCCESS)
+ result = UDS_CORRUPT_DATA;
+
+ return result;
+ }
+
+ decode_delta(delta_entry);
+
+ next_offset = delta_entry->offset + delta_entry->entry_bits;
+ if (next_offset > size) {
+ /*
+ * This is not an assertion because uds_validate_chapter_index_page() wants to
+ * handle this error.
+ */
+ vdo_log_warning("Decoded past the end of the delta list");
+ return UDS_CORRUPT_DATA;
+ }
+
+ return UDS_SUCCESS;
+}
+
+int uds_remember_delta_index_offset(const struct delta_index_entry *delta_entry)
+{
+ int result;
+ struct delta_list *delta_list = delta_entry->delta_list;
+
+ result = VDO_ASSERT(!delta_entry->is_collision, "entry is not a collision");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ delta_list->save_key = delta_entry->key - delta_entry->delta;
+ delta_list->save_offset = delta_entry->offset;
+ return UDS_SUCCESS;
+}
+
+static void set_delta(struct delta_index_entry *delta_entry, u32 delta)
+{
+ const struct delta_zone *delta_zone = delta_entry->delta_zone;
+ u32 key_bits = (delta_zone->min_bits +
+ ((delta_zone->incr_keys - delta_zone->min_keys + delta) /
+ delta_zone->incr_keys));
+
+ delta_entry->delta = delta;
+ delta_entry->entry_bits = delta_entry->value_bits + key_bits;
+}
+
+static void get_collision_name(const struct delta_index_entry *entry, u8 *name)
+{
+ u64 offset = get_delta_entry_offset(entry) + entry->entry_bits - COLLISION_BITS;
+ const u8 *addr = entry->delta_zone->memory + offset / BITS_PER_BYTE;
+ int size = COLLISION_BYTES;
+ int shift = offset % BITS_PER_BYTE;
+
+ while (--size >= 0)
+ *name++ = get_unaligned_le16(addr++) >> shift;
+}
+
+static void set_collision_name(const struct delta_index_entry *entry, const u8 *name)
+{
+ u64 offset = get_delta_entry_offset(entry) + entry->entry_bits - COLLISION_BITS;
+ u8 *addr = entry->delta_zone->memory + offset / BITS_PER_BYTE;
+ int size = COLLISION_BYTES;
+ int shift = offset % BITS_PER_BYTE;
+ u16 mask = ~((u16) 0xFF << shift);
+ u16 data;
+
+ while (--size >= 0) {
+ data = (get_unaligned_le16(addr) & mask) | (*name++ << shift);
+ put_unaligned_le16(data, addr++);
+ }
+}
+
+int uds_get_delta_index_entry(const struct delta_index *delta_index, u32 list_number,
+ u32 key, const u8 *name,
+ struct delta_index_entry *delta_entry)
+{
+ int result;
+
+ result = uds_start_delta_index_search(delta_index, list_number, key,
+ delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ do {
+ result = uds_next_delta_index_entry(delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+ } while (!delta_entry->at_end && (key > delta_entry->key));
+
+ result = uds_remember_delta_index_offset(delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (!delta_entry->at_end && (key == delta_entry->key)) {
+ struct delta_index_entry collision_entry = *delta_entry;
+
+ for (;;) {
+ u8 full_name[COLLISION_BYTES];
+
+ result = uds_next_delta_index_entry(&collision_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (collision_entry.at_end || !collision_entry.is_collision)
+ break;
+
+ get_collision_name(&collision_entry, full_name);
+ if (memcmp(full_name, name, COLLISION_BYTES) == 0) {
+ *delta_entry = collision_entry;
+ break;
+ }
+ }
+ }
+
+ return UDS_SUCCESS;
+}
+
+int uds_get_delta_entry_collision(const struct delta_index_entry *delta_entry, u8 *name)
+{
+ int result;
+
+ result = assert_not_at_end(delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(delta_entry->is_collision,
+ "Cannot get full block name from a non-collision delta index entry");
+ if (result != VDO_SUCCESS)
+ return UDS_BAD_STATE;
+
+ get_collision_name(delta_entry, name);
+ return UDS_SUCCESS;
+}
+
+u32 uds_get_delta_entry_value(const struct delta_index_entry *delta_entry)
+{
+ return get_field(delta_entry->delta_zone->memory,
+ get_delta_entry_offset(delta_entry), delta_entry->value_bits);
+}
+
+static int assert_mutable_entry(const struct delta_index_entry *delta_entry)
+{
+ int result = VDO_ASSERT((delta_entry->delta_list != &delta_entry->temp_delta_list),
+ "delta index is mutable");
+ if (result != VDO_SUCCESS)
+ result = UDS_BAD_STATE;
+
+ return result;
+}
+
+int uds_set_delta_entry_value(const struct delta_index_entry *delta_entry, u32 value)
+{
+ int result;
+ u32 value_mask = (1 << delta_entry->value_bits) - 1;
+
+ result = assert_mutable_entry(delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = assert_not_at_end(delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT((value & value_mask) == value,
+ "Value (%u) being set in a delta index is too large (must fit in %u bits)",
+ value, delta_entry->value_bits);
+ if (result != VDO_SUCCESS)
+ return UDS_INVALID_ARGUMENT;
+
+ set_field(value, delta_entry->delta_zone->memory,
+ get_delta_entry_offset(delta_entry), delta_entry->value_bits);
+ return UDS_SUCCESS;
+}
+
+/*
+ * Extend the memory used by the delta lists by adding growing_size bytes before the list indicated
+ * by growing_index, then rebalancing the lists in the new chunk.
+ */
+static int extend_delta_zone(struct delta_zone *delta_zone, u32 growing_index,
+ size_t growing_size)
+{
+ ktime_t start_time;
+ ktime_t end_time;
+ struct delta_list *delta_lists;
+ u32 i;
+ size_t used_space;
+
+
+ /* Calculate the amount of space that is or will be in use. */
+ start_time = current_time_ns(CLOCK_MONOTONIC);
+ delta_lists = delta_zone->delta_lists;
+ used_space = growing_size;
+ for (i = 0; i <= delta_zone->list_count + 1; i++)
+ used_space += get_delta_list_byte_size(&delta_lists[i]);
+
+ if (delta_zone->size < used_space)
+ return UDS_OVERFLOW;
+
+ /* Compute the new offsets of the delta lists. */
+ compute_new_list_offsets(delta_zone, growing_index, growing_size, used_space);
+
+ /*
+ * When we rebalance the delta list, we will include the end guard list in the rebalancing.
+ * It contains the end guard data, which must be copied.
+ */
+ rebalance_delta_zone(delta_zone, 1, delta_zone->list_count + 1);
+ end_time = current_time_ns(CLOCK_MONOTONIC);
+ delta_zone->rebalance_count++;
+ delta_zone->rebalance_time += ktime_sub(end_time, start_time);
+ return UDS_SUCCESS;
+}
+
+static int insert_bits(struct delta_index_entry *delta_entry, u16 size)
+{
+ u64 free_before;
+ u64 free_after;
+ u64 source;
+ u64 destination;
+ u32 count;
+ bool before_flag;
+ u8 *memory;
+ struct delta_zone *delta_zone = delta_entry->delta_zone;
+ struct delta_list *delta_list = delta_entry->delta_list;
+ /* Compute bits in use before and after the inserted bits. */
+ u32 total_size = delta_list->size;
+ u32 before_size = delta_entry->offset;
+ u32 after_size = total_size - delta_entry->offset;
+
+ if (total_size + size > U16_MAX) {
+ delta_entry->list_overflow = true;
+ delta_zone->overflow_count++;
+ return UDS_OVERFLOW;
+ }
+
+ /* Compute bits available before and after the delta list. */
+ free_before = (delta_list[0].start - (delta_list[-1].start + delta_list[-1].size));
+ free_after = (delta_list[1].start - (delta_list[0].start + delta_list[0].size));
+
+ if ((size <= free_before) && (size <= free_after)) {
+ /*
+ * We have enough space to use either before or after the list. Select the smaller
+ * amount of data. If it is exactly the same, try to take from the larger amount of
+ * free space.
+ */
+ if (before_size < after_size)
+ before_flag = true;
+ else if (after_size < before_size)
+ before_flag = false;
+ else
+ before_flag = free_before > free_after;
+ } else if (size <= free_before) {
+ /* There is space before but not after. */
+ before_flag = true;
+ } else if (size <= free_after) {
+ /* There is space after but not before. */
+ before_flag = false;
+ } else {
+ /*
+ * Neither of the surrounding spaces is large enough for this request. Extend
+ * and/or rebalance the delta list memory choosing to move the least amount of
+ * data.
+ */
+ int result;
+ u32 growing_index = delta_entry->list_number + 1;
+
+ before_flag = before_size < after_size;
+ if (!before_flag)
+ growing_index++;
+ result = extend_delta_zone(delta_zone, growing_index,
+ BITS_TO_BYTES(size));
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ delta_list->size += size;
+ if (before_flag) {
+ source = delta_list->start;
+ destination = source - size;
+ delta_list->start -= size;
+ count = before_size;
+ } else {
+ source = delta_list->start + delta_entry->offset;
+ destination = source + size;
+ count = after_size;
+ }
+
+ memory = delta_zone->memory;
+ move_bits(memory, source, memory, destination, count);
+ return UDS_SUCCESS;
+}
+
+static void encode_delta(const struct delta_index_entry *delta_entry)
+{
+ u32 temp;
+ u32 t1;
+ u32 t2;
+ u64 offset;
+ const struct delta_zone *delta_zone = delta_entry->delta_zone;
+ u8 *memory = delta_zone->memory;
+
+ offset = get_delta_entry_offset(delta_entry) + delta_entry->value_bits;
+ if (delta_entry->delta < delta_zone->min_keys) {
+ set_field(delta_entry->delta, memory, offset, delta_zone->min_bits);
+ return;
+ }
+
+ temp = delta_entry->delta - delta_zone->min_keys;
+ t1 = (temp % delta_zone->incr_keys) + delta_zone->min_keys;
+ t2 = temp / delta_zone->incr_keys;
+ set_field(t1, memory, offset, delta_zone->min_bits);
+ set_zero(memory, offset + delta_zone->min_bits, t2);
+ set_field(1, memory, offset + delta_zone->min_bits + t2, 1);
+}
+
+static void encode_entry(const struct delta_index_entry *delta_entry, u32 value,
+ const u8 *name)
+{
+ u8 *memory = delta_entry->delta_zone->memory;
+ u64 offset = get_delta_entry_offset(delta_entry);
+
+ set_field(value, memory, offset, delta_entry->value_bits);
+ encode_delta(delta_entry);
+ if (name != NULL)
+ set_collision_name(delta_entry, name);
+}
+
+/*
+ * Create a new entry in the delta index. If the entry is a collision, the full 256 bit name must
+ * be provided.
+ */
+int uds_put_delta_index_entry(struct delta_index_entry *delta_entry, u32 key, u32 value,
+ const u8 *name)
+{
+ int result;
+ struct delta_zone *delta_zone;
+
+ result = assert_mutable_entry(delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (delta_entry->is_collision) {
+ /*
+ * The caller wants us to insert a collision entry onto a collision entry. This
+ * happens when we find a collision and attempt to add the name again to the index.
+ * This is normally a fatal error unless we are replaying a closed chapter while we
+ * are rebuilding a volume index.
+ */
+ return UDS_DUPLICATE_NAME;
+ }
+
+ if (delta_entry->offset < delta_entry->delta_list->save_offset) {
+ /*
+ * The saved entry offset is after the new entry and will no longer be valid, so
+ * replace it with the insertion point.
+ */
+ result = uds_remember_delta_index_offset(delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ if (name != NULL) {
+ /* Insert a collision entry which is placed after this entry. */
+ result = assert_not_at_end(delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT((key == delta_entry->key),
+ "incorrect key for collision entry");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ delta_entry->offset += delta_entry->entry_bits;
+ set_delta(delta_entry, 0);
+ delta_entry->is_collision = true;
+ delta_entry->entry_bits += COLLISION_BITS;
+ result = insert_bits(delta_entry, delta_entry->entry_bits);
+ } else if (delta_entry->at_end) {
+ /* Insert a new entry at the end of the delta list. */
+ result = VDO_ASSERT((key >= delta_entry->key), "key past end of list");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ set_delta(delta_entry, key - delta_entry->key);
+ delta_entry->key = key;
+ delta_entry->at_end = false;
+ result = insert_bits(delta_entry, delta_entry->entry_bits);
+ } else {
+ u16 old_entry_size;
+ u16 additional_size;
+ struct delta_index_entry next_entry;
+ u32 next_value;
+
+ /*
+ * Insert a new entry which requires the delta in the following entry to be
+ * updated.
+ */
+ result = VDO_ASSERT((key < delta_entry->key),
+ "key precedes following entry");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT((key >= delta_entry->key - delta_entry->delta),
+ "key effects following entry's delta");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ old_entry_size = delta_entry->entry_bits;
+ next_entry = *delta_entry;
+ next_value = uds_get_delta_entry_value(&next_entry);
+ set_delta(delta_entry, key - (delta_entry->key - delta_entry->delta));
+ delta_entry->key = key;
+ set_delta(&next_entry, next_entry.key - key);
+ next_entry.offset += delta_entry->entry_bits;
+ /* The two new entries are always bigger than the single entry being replaced. */
+ additional_size = (delta_entry->entry_bits +
+ next_entry.entry_bits - old_entry_size);
+ result = insert_bits(delta_entry, additional_size);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ encode_entry(&next_entry, next_value, NULL);
+ }
+
+ if (result != UDS_SUCCESS)
+ return result;
+
+ encode_entry(delta_entry, value, name);
+ delta_zone = delta_entry->delta_zone;
+ delta_zone->record_count++;
+ delta_zone->collision_count += delta_entry->is_collision ? 1 : 0;
+ return UDS_SUCCESS;
+}
+
+static void delete_bits(const struct delta_index_entry *delta_entry, int size)
+{
+ u64 source;
+ u64 destination;
+ u32 count;
+ bool before_flag;
+ struct delta_list *delta_list = delta_entry->delta_list;
+ u8 *memory = delta_entry->delta_zone->memory;
+ /* Compute bits retained before and after the deleted bits. */
+ u32 total_size = delta_list->size;
+ u32 before_size = delta_entry->offset;
+ u32 after_size = total_size - delta_entry->offset - size;
+
+ /*
+ * Determine whether to add to the available space either before or after the delta list.
+ * We prefer to move the least amount of data. If it is exactly the same, try to add to the
+ * smaller amount of free space.
+ */
+ if (before_size < after_size) {
+ before_flag = true;
+ } else if (after_size < before_size) {
+ before_flag = false;
+ } else {
+ u64 free_before =
+ (delta_list[0].start - (delta_list[-1].start + delta_list[-1].size));
+ u64 free_after =
+ (delta_list[1].start - (delta_list[0].start + delta_list[0].size));
+
+ before_flag = (free_before < free_after);
+ }
+
+ delta_list->size -= size;
+ if (before_flag) {
+ source = delta_list->start;
+ destination = source + size;
+ delta_list->start += size;
+ count = before_size;
+ } else {
+ destination = delta_list->start + delta_entry->offset;
+ source = destination + size;
+ count = after_size;
+ }
+
+ move_bits(memory, source, memory, destination, count);
+}
+
+int uds_remove_delta_index_entry(struct delta_index_entry *delta_entry)
+{
+ int result;
+ struct delta_index_entry next_entry;
+ struct delta_zone *delta_zone;
+ struct delta_list *delta_list;
+
+ result = assert_mutable_entry(delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ next_entry = *delta_entry;
+ result = uds_next_delta_index_entry(&next_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ delta_zone = delta_entry->delta_zone;
+
+ if (delta_entry->is_collision) {
+ /* This is a collision entry, so just remove it. */
+ delete_bits(delta_entry, delta_entry->entry_bits);
+ next_entry.offset = delta_entry->offset;
+ delta_zone->collision_count -= 1;
+ } else if (next_entry.at_end) {
+ /* This entry is at the end of the list, so just remove it. */
+ delete_bits(delta_entry, delta_entry->entry_bits);
+ next_entry.key -= delta_entry->delta;
+ next_entry.offset = delta_entry->offset;
+ } else {
+ /* The delta in the next entry needs to be updated. */
+ u32 next_value = uds_get_delta_entry_value(&next_entry);
+ u16 old_size = delta_entry->entry_bits + next_entry.entry_bits;
+
+ if (next_entry.is_collision) {
+ next_entry.is_collision = false;
+ delta_zone->collision_count -= 1;
+ }
+
+ set_delta(&next_entry, delta_entry->delta + next_entry.delta);
+ next_entry.offset = delta_entry->offset;
+ /* The one new entry is always smaller than the two entries being replaced. */
+ delete_bits(delta_entry, old_size - next_entry.entry_bits);
+ encode_entry(&next_entry, next_value, NULL);
+ }
+
+ delta_zone->record_count--;
+ delta_zone->discard_count++;
+ *delta_entry = next_entry;
+
+ delta_list = delta_entry->delta_list;
+ if (delta_entry->offset < delta_list->save_offset) {
+ /* The saved entry offset is no longer valid. */
+ delta_list->save_key = 0;
+ delta_list->save_offset = 0;
+ }
+
+ return UDS_SUCCESS;
+}
+
+void uds_get_delta_index_stats(const struct delta_index *delta_index,
+ struct delta_index_stats *stats)
+{
+ unsigned int z;
+ const struct delta_zone *delta_zone;
+
+ memset(stats, 0, sizeof(struct delta_index_stats));
+ for (z = 0; z < delta_index->zone_count; z++) {
+ delta_zone = &delta_index->delta_zones[z];
+ stats->rebalance_time += delta_zone->rebalance_time;
+ stats->rebalance_count += delta_zone->rebalance_count;
+ stats->record_count += delta_zone->record_count;
+ stats->collision_count += delta_zone->collision_count;
+ stats->discard_count += delta_zone->discard_count;
+ stats->overflow_count += delta_zone->overflow_count;
+ stats->list_count += delta_zone->list_count;
+ }
+}
+
+size_t uds_compute_delta_index_size(u32 entry_count, u32 mean_delta, u32 payload_bits)
+{
+ u16 min_bits;
+ u32 incr_keys;
+ u32 min_keys;
+
+ compute_coding_constants(mean_delta, &min_bits, &min_keys, &incr_keys);
+ /* On average, each delta is encoded into about min_bits + 1.5 bits. */
+ return entry_count * (payload_bits + min_bits + 1) + entry_count / 2;
+}
+
+u32 uds_get_delta_index_page_count(u32 entry_count, u32 list_count, u32 mean_delta,
+ u32 payload_bits, size_t bytes_per_page)
+{
+ unsigned int bits_per_delta_list;
+ unsigned int bits_per_page;
+ size_t bits_per_index;
+
+ /* Compute the expected number of bits needed for all the entries. */
+ bits_per_index = uds_compute_delta_index_size(entry_count, mean_delta,
+ payload_bits);
+ bits_per_delta_list = bits_per_index / list_count;
+
+ /* Add in the immutable delta list headers. */
+ bits_per_index += list_count * IMMUTABLE_HEADER_SIZE;
+ /* Compute the number of usable bits on an immutable index page. */
+ bits_per_page = ((bytes_per_page - sizeof(struct delta_page_header)) * BITS_PER_BYTE);
+ /*
+ * Reduce the bits per page by one immutable delta list header and one delta list to
+ * account for internal fragmentation.
+ */
+ bits_per_page -= IMMUTABLE_HEADER_SIZE + bits_per_delta_list;
+ /* Now compute the number of pages needed. */
+ return DIV_ROUND_UP(bits_per_index, bits_per_page);
+}
+
+void uds_log_delta_index_entry(struct delta_index_entry *delta_entry)
+{
+ vdo_log_ratelimit(vdo_log_info,
+ "List 0x%X Key 0x%X Offset 0x%X%s%s List_size 0x%X%s",
+ delta_entry->list_number, delta_entry->key,
+ delta_entry->offset, delta_entry->at_end ? " end" : "",
+ delta_entry->is_collision ? " collision" : "",
+ delta_entry->delta_list->size,
+ delta_entry->list_overflow ? " overflow" : "");
+ delta_entry->list_overflow = false;
+}
diff --git a/drivers/md/dm-vdo/indexer/delta-index.h b/drivers/md/dm-vdo/indexer/delta-index.h
new file mode 100644
index 000000000000..53f6c6ac0bc7
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/delta-index.h
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_DELTA_INDEX_H
+#define UDS_DELTA_INDEX_H
+
+#include <linux/cache.h>
+
+#include "numeric.h"
+#include "time-utils.h"
+
+#include "config.h"
+#include "io-factory.h"
+
+/*
+ * A delta index is a key-value store, where each entry maps an address (the key) to a payload (the
+ * value). The entries are sorted by address, and only the delta between successive addresses is
+ * stored in the entry. The addresses are assumed to be uniformly distributed, and the deltas are
+ * therefore exponentially distributed.
+ *
+ * A delta_index can either be mutable or immutable depending on its expected use. The immutable
+ * form of a delta index is used for the indexes of closed chapters committed to the volume. The
+ * mutable form of a delta index is used by the volume index, and also by the chapter index in an
+ * open chapter. Like the index as a whole, each mutable delta index is divided into a number of
+ * independent zones.
+ */
+
+struct delta_list {
+ /* The offset of the delta list start, in bits */
+ u64 start;
+ /* The number of bits in the delta list */
+ u16 size;
+ /* Where the last search "found" the key, in bits */
+ u16 save_offset;
+ /* The key for the record just before save_offset */
+ u32 save_key;
+};
+
+struct delta_zone {
+ /* The delta list memory */
+ u8 *memory;
+ /* The delta list headers */
+ struct delta_list *delta_lists;
+ /* Temporary starts of delta lists */
+ u64 *new_offsets;
+ /* Buffered writer for saving an index */
+ struct buffered_writer *buffered_writer;
+ /* The size of delta list memory */
+ size_t size;
+ /* Nanoseconds spent rebalancing */
+ ktime_t rebalance_time;
+ /* Number of memory rebalances */
+ u32 rebalance_count;
+ /* The number of bits in a stored value */
+ u8 value_bits;
+ /* The number of bits in the minimal key code */
+ u16 min_bits;
+ /* The number of keys used in a minimal code */
+ u32 min_keys;
+ /* The number of keys used for another code bit */
+ u32 incr_keys;
+ /* The number of records in the index */
+ u64 record_count;
+ /* The number of collision records */
+ u64 collision_count;
+ /* The number of records removed */
+ u64 discard_count;
+ /* The number of UDS_OVERFLOW errors detected */
+ u64 overflow_count;
+ /* The index of the first delta list */
+ u32 first_list;
+ /* The number of delta lists */
+ u32 list_count;
+ /* Tag belonging to this delta index */
+ u8 tag;
+} __aligned(L1_CACHE_BYTES);
+
+struct delta_list_save_info {
+ /* Tag identifying which delta index this list is in */
+ u8 tag;
+ /* Bit offset of the start of the list data */
+ u8 bit_offset;
+ /* Number of bytes of list data */
+ u16 byte_count;
+ /* The delta list number within the delta index */
+ u32 index;
+} __packed;
+
+struct delta_index {
+ /* The zones */
+ struct delta_zone *delta_zones;
+ /* The number of zones */
+ unsigned int zone_count;
+ /* The number of delta lists */
+ u32 list_count;
+ /* Maximum lists per zone */
+ u32 lists_per_zone;
+ /* Total memory allocated to this index */
+ size_t memory_size;
+ /* The number of non-empty lists at load time per zone */
+ u32 load_lists[MAX_ZONES];
+ /* True if this index is mutable */
+ bool mutable;
+ /* Tag belonging to this delta index */
+ u8 tag;
+};
+
+/*
+ * A delta_index_page describes a single page of a chapter index. The delta_index field allows the
+ * page to be treated as an immutable delta_index. We use the delta_zone field to treat the chapter
+ * index page as a single zone index, and without the need to do an additional memory allocation.
+ */
+struct delta_index_page {
+ struct delta_index delta_index;
+ /* These values are loaded from the delta_page_header */
+ u32 lowest_list_number;
+ u32 highest_list_number;
+ u64 virtual_chapter_number;
+ /* This structure describes the single zone of a delta index page. */
+ struct delta_zone delta_zone;
+};
+
+/*
+ * Notes on the delta_index_entries:
+ *
+ * The fields documented as "public" can be read by any code that uses a delta_index. The fields
+ * documented as "private" carry information between delta_index method calls and should not be
+ * used outside the delta_index module.
+ *
+ * (1) The delta_index_entry is used like an iterator when searching a delta list.
+ *
+ * (2) It is also the result of a successful search and can be used to refer to the element found
+ * by the search.
+ *
+ * (3) It is also the result of an unsuccessful search and can be used to refer to the insertion
+ * point for a new record.
+ *
+ * (4) If at_end is true, the delta_list entry can only be used as the insertion point for a new
+ * record at the end of the list.
+ *
+ * (5) If at_end is false and is_collision is true, the delta_list entry fields refer to a
+ * collision entry in the list, and the delta_list entry can be used as a reference to this
+ * entry.
+ *
+ * (6) If at_end is false and is_collision is false, the delta_list entry fields refer to a
+ * non-collision entry in the list. Such delta_list entries can be used as a reference to a
+ * found entry, or an insertion point for a non-collision entry before this entry, or an
+ * insertion point for a collision entry that collides with this entry.
+ */
+struct delta_index_entry {
+ /* Public fields */
+ /* The key for this entry */
+ u32 key;
+ /* We are after the last list entry */
+ bool at_end;
+ /* This record is a collision */
+ bool is_collision;
+
+ /* Private fields */
+ /* This delta list overflowed */
+ bool list_overflow;
+ /* The number of bits used for the value */
+ u8 value_bits;
+ /* The number of bits used for the entire entry */
+ u16 entry_bits;
+ /* The delta index zone */
+ struct delta_zone *delta_zone;
+ /* The delta list containing the entry */
+ struct delta_list *delta_list;
+ /* The delta list number */
+ u32 list_number;
+ /* Bit offset of this entry within the list */
+ u16 offset;
+ /* The delta between this and previous entry */
+ u32 delta;
+ /* Temporary delta list for immutable indices */
+ struct delta_list temp_delta_list;
+};
+
+struct delta_index_stats {
+ /* Number of bytes allocated */
+ size_t memory_allocated;
+ /* Nanoseconds spent rebalancing */
+ ktime_t rebalance_time;
+ /* Number of memory rebalances */
+ u32 rebalance_count;
+ /* The number of records in the index */
+ u64 record_count;
+ /* The number of collision records */
+ u64 collision_count;
+ /* The number of records removed */
+ u64 discard_count;
+ /* The number of UDS_OVERFLOW errors detected */
+ u64 overflow_count;
+ /* The number of delta lists */
+ u32 list_count;
+};
+
+int __must_check uds_initialize_delta_index(struct delta_index *delta_index,
+ unsigned int zone_count, u32 list_count,
+ u32 mean_delta, u32 payload_bits,
+ size_t memory_size, u8 tag);
+
+int __must_check uds_initialize_delta_index_page(struct delta_index_page *delta_index_page,
+ u64 expected_nonce, u32 mean_delta,
+ u32 payload_bits, u8 *memory,
+ size_t memory_size);
+
+void uds_uninitialize_delta_index(struct delta_index *delta_index);
+
+void uds_reset_delta_index(const struct delta_index *delta_index);
+
+int __must_check uds_pack_delta_index_page(const struct delta_index *delta_index,
+ u64 header_nonce, u8 *memory,
+ size_t memory_size,
+ u64 virtual_chapter_number, u32 first_list,
+ u32 *list_count);
+
+int __must_check uds_start_restoring_delta_index(struct delta_index *delta_index,
+ struct buffered_reader **buffered_readers,
+ unsigned int reader_count);
+
+int __must_check uds_finish_restoring_delta_index(struct delta_index *delta_index,
+ struct buffered_reader **buffered_readers,
+ unsigned int reader_count);
+
+int __must_check uds_check_guard_delta_lists(struct buffered_reader **buffered_readers,
+ unsigned int reader_count);
+
+int __must_check uds_start_saving_delta_index(const struct delta_index *delta_index,
+ unsigned int zone_number,
+ struct buffered_writer *buffered_writer);
+
+int __must_check uds_finish_saving_delta_index(const struct delta_index *delta_index,
+ unsigned int zone_number);
+
+int __must_check uds_write_guard_delta_list(struct buffered_writer *buffered_writer);
+
+size_t __must_check uds_compute_delta_index_save_bytes(u32 list_count,
+ size_t memory_size);
+
+int __must_check uds_start_delta_index_search(const struct delta_index *delta_index,
+ u32 list_number, u32 key,
+ struct delta_index_entry *iterator);
+
+int __must_check uds_next_delta_index_entry(struct delta_index_entry *delta_entry);
+
+int __must_check uds_remember_delta_index_offset(const struct delta_index_entry *delta_entry);
+
+int __must_check uds_get_delta_index_entry(const struct delta_index *delta_index,
+ u32 list_number, u32 key, const u8 *name,
+ struct delta_index_entry *delta_entry);
+
+int __must_check uds_get_delta_entry_collision(const struct delta_index_entry *delta_entry,
+ u8 *name);
+
+u32 __must_check uds_get_delta_entry_value(const struct delta_index_entry *delta_entry);
+
+int __must_check uds_set_delta_entry_value(const struct delta_index_entry *delta_entry, u32 value);
+
+int __must_check uds_put_delta_index_entry(struct delta_index_entry *delta_entry, u32 key,
+ u32 value, const u8 *name);
+
+int __must_check uds_remove_delta_index_entry(struct delta_index_entry *delta_entry);
+
+void uds_get_delta_index_stats(const struct delta_index *delta_index,
+ struct delta_index_stats *stats);
+
+size_t __must_check uds_compute_delta_index_size(u32 entry_count, u32 mean_delta,
+ u32 payload_bits);
+
+u32 uds_get_delta_index_page_count(u32 entry_count, u32 list_count, u32 mean_delta,
+ u32 payload_bits, size_t bytes_per_page);
+
+void uds_log_delta_index_entry(struct delta_index_entry *delta_entry);
+
+#endif /* UDS_DELTA_INDEX_H */
diff --git a/drivers/md/dm-vdo/indexer/funnel-requestqueue.c b/drivers/md/dm-vdo/indexer/funnel-requestqueue.c
new file mode 100644
index 000000000000..1a5735375ddc
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/funnel-requestqueue.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "funnel-requestqueue.h"
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/wait.h>
+
+#include "funnel-queue.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "thread-utils.h"
+
+/*
+ * This queue will attempt to handle requests in reasonably sized batches instead of reacting
+ * immediately to each new request. The wait time between batches is dynamically adjusted up or
+ * down to try to balance responsiveness against wasted thread run time.
+ *
+ * If the wait time becomes long enough, the queue will become dormant and must be explicitly
+ * awoken when a new request is enqueued. The enqueue operation updates "newest" in the funnel
+ * queue via xchg (which is a memory barrier), and later checks "dormant" to decide whether to do a
+ * wakeup of the worker thread.
+ *
+ * When deciding to go to sleep, the worker thread sets "dormant" and then examines "newest" to
+ * decide if the funnel queue is idle. In dormant mode, the last examination of "newest" before
+ * going to sleep is done inside the wait_event_interruptible() macro, after a point where one or
+ * more memory barriers have been issued. (Preparing to sleep uses spin locks.) Even if the funnel
+ * queue's "next" field update isn't visible yet to make the entry accessible, its existence will
+ * kick the worker thread out of dormant mode and back into timer-based mode.
+ *
+ * Unbatched requests are used to communicate between different zone threads and will also cause
+ * the queue to awaken immediately.
+ */
+
+enum {
+ NANOSECOND = 1,
+ MICROSECOND = 1000 * NANOSECOND,
+ MILLISECOND = 1000 * MICROSECOND,
+ DEFAULT_WAIT_TIME = 20 * MICROSECOND,
+ MINIMUM_WAIT_TIME = DEFAULT_WAIT_TIME / 2,
+ MAXIMUM_WAIT_TIME = MILLISECOND,
+ MINIMUM_BATCH = 32,
+ MAXIMUM_BATCH = 64,
+};
+
+struct uds_request_queue {
+ /* Wait queue for synchronizing producers and consumer */
+ struct wait_queue_head wait_head;
+ /* Function to process a request */
+ uds_request_queue_processor_fn processor;
+ /* Queue of new incoming requests */
+ struct funnel_queue *main_queue;
+ /* Queue of old requests to retry */
+ struct funnel_queue *retry_queue;
+ /* The thread id of the worker thread */
+ struct thread *thread;
+ /* True if the worker was started */
+ bool started;
+ /* When true, requests can be enqueued */
+ bool running;
+ /* A flag set when the worker is waiting without a timeout */
+ atomic_t dormant;
+};
+
+static inline struct uds_request *poll_queues(struct uds_request_queue *queue)
+{
+ struct funnel_queue_entry *entry;
+
+ entry = vdo_funnel_queue_poll(queue->retry_queue);
+ if (entry != NULL)
+ return container_of(entry, struct uds_request, queue_link);
+
+ entry = vdo_funnel_queue_poll(queue->main_queue);
+ if (entry != NULL)
+ return container_of(entry, struct uds_request, queue_link);
+
+ return NULL;
+}
+
+static inline bool are_queues_idle(struct uds_request_queue *queue)
+{
+ return vdo_is_funnel_queue_idle(queue->retry_queue) &&
+ vdo_is_funnel_queue_idle(queue->main_queue);
+}
+
+/*
+ * Determine if there is a next request to process, and return it if there is. Also return flags
+ * indicating whether the worker thread can sleep (for the use of wait_event() macros) and whether
+ * the thread did sleep before returning a new request.
+ */
+static inline bool dequeue_request(struct uds_request_queue *queue,
+ struct uds_request **request_ptr, bool *waited_ptr)
+{
+ struct uds_request *request = poll_queues(queue);
+
+ if (request != NULL) {
+ *request_ptr = request;
+ return true;
+ }
+
+ if (!READ_ONCE(queue->running)) {
+ /* Wake the worker thread so it can exit. */
+ *request_ptr = NULL;
+ return true;
+ }
+
+ *request_ptr = NULL;
+ *waited_ptr = true;
+ return false;
+}
+
+static void wait_for_request(struct uds_request_queue *queue, bool dormant,
+ unsigned long timeout, struct uds_request **request,
+ bool *waited)
+{
+ if (dormant) {
+ wait_event_interruptible(queue->wait_head,
+ (dequeue_request(queue, request, waited) ||
+ !are_queues_idle(queue)));
+ return;
+ }
+
+ wait_event_interruptible_hrtimeout(queue->wait_head,
+ dequeue_request(queue, request, waited),
+ ns_to_ktime(timeout));
+}
+
+static void request_queue_worker(void *arg)
+{
+ struct uds_request_queue *queue = arg;
+ struct uds_request *request = NULL;
+ unsigned long time_batch = DEFAULT_WAIT_TIME;
+ bool dormant = atomic_read(&queue->dormant);
+ bool waited = false;
+ long current_batch = 0;
+
+ for (;;) {
+ wait_for_request(queue, dormant, time_batch, &request, &waited);
+ if (likely(request != NULL)) {
+ current_batch++;
+ queue->processor(request);
+ } else if (!READ_ONCE(queue->running)) {
+ break;
+ }
+
+ if (dormant) {
+ /*
+ * The queue has been roused from dormancy. Clear the flag so enqueuers can
+ * stop broadcasting. No fence is needed for this transition.
+ */
+ atomic_set(&queue->dormant, false);
+ dormant = false;
+ time_batch = DEFAULT_WAIT_TIME;
+ } else if (waited) {
+ /*
+ * We waited for this request to show up. Adjust the wait time to smooth
+ * out the batch size.
+ */
+ if (current_batch < MINIMUM_BATCH) {
+ /*
+ * If the last batch of requests was too small, increase the wait
+ * time.
+ */
+ time_batch += time_batch / 4;
+ if (time_batch >= MAXIMUM_WAIT_TIME) {
+ atomic_set(&queue->dormant, true);
+ dormant = true;
+ }
+ } else if (current_batch > MAXIMUM_BATCH) {
+ /*
+ * If the last batch of requests was too large, decrease the wait
+ * time.
+ */
+ time_batch -= time_batch / 4;
+ if (time_batch < MINIMUM_WAIT_TIME)
+ time_batch = MINIMUM_WAIT_TIME;
+ }
+ current_batch = 0;
+ }
+ }
+
+ /*
+ * Ensure that we process any remaining requests that were enqueued before trying to shut
+ * down. The corresponding write barrier is in uds_request_queue_finish().
+ */
+ smp_rmb();
+ while ((request = poll_queues(queue)) != NULL)
+ queue->processor(request);
+}
+
+int uds_make_request_queue(const char *queue_name,
+ uds_request_queue_processor_fn processor,
+ struct uds_request_queue **queue_ptr)
+{
+ int result;
+ struct uds_request_queue *queue;
+
+ result = vdo_allocate(1, struct uds_request_queue, __func__, &queue);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ queue->processor = processor;
+ queue->running = true;
+ atomic_set(&queue->dormant, false);
+ init_waitqueue_head(&queue->wait_head);
+
+ result = vdo_make_funnel_queue(&queue->main_queue);
+ if (result != VDO_SUCCESS) {
+ uds_request_queue_finish(queue);
+ return result;
+ }
+
+ result = vdo_make_funnel_queue(&queue->retry_queue);
+ if (result != VDO_SUCCESS) {
+ uds_request_queue_finish(queue);
+ return result;
+ }
+
+ result = vdo_create_thread(request_queue_worker, queue, queue_name,
+ &queue->thread);
+ if (result != VDO_SUCCESS) {
+ uds_request_queue_finish(queue);
+ return result;
+ }
+
+ queue->started = true;
+ *queue_ptr = queue;
+ return UDS_SUCCESS;
+}
+
+static inline void wake_up_worker(struct uds_request_queue *queue)
+{
+ if (wq_has_sleeper(&queue->wait_head))
+ wake_up(&queue->wait_head);
+}
+
+void uds_request_queue_enqueue(struct uds_request_queue *queue,
+ struct uds_request *request)
+{
+ struct funnel_queue *sub_queue;
+ bool unbatched = request->unbatched;
+
+ sub_queue = request->requeued ? queue->retry_queue : queue->main_queue;
+ vdo_funnel_queue_put(sub_queue, &request->queue_link);
+
+ /*
+ * We must wake the worker thread when it is dormant. A read fence isn't needed here since
+ * we know the queue operation acts as one.
+ */
+ if (atomic_read(&queue->dormant) || unbatched)
+ wake_up_worker(queue);
+}
+
+void uds_request_queue_finish(struct uds_request_queue *queue)
+{
+ if (queue == NULL)
+ return;
+
+ /*
+ * This memory barrier ensures that any requests we queued will be seen. The point is that
+ * when dequeue_request() sees the following update to the running flag, it will also be
+ * able to see any change we made to a next field in the funnel queue entry. The
+ * corresponding read barrier is in request_queue_worker().
+ */
+ smp_wmb();
+ WRITE_ONCE(queue->running, false);
+
+ if (queue->started) {
+ wake_up_worker(queue);
+ vdo_join_threads(queue->thread);
+ }
+
+ vdo_free_funnel_queue(queue->main_queue);
+ vdo_free_funnel_queue(queue->retry_queue);
+ vdo_free(queue);
+}
diff --git a/drivers/md/dm-vdo/indexer/funnel-requestqueue.h b/drivers/md/dm-vdo/indexer/funnel-requestqueue.h
new file mode 100644
index 000000000000..9b0f53939b4d
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/funnel-requestqueue.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_REQUEST_QUEUE_H
+#define UDS_REQUEST_QUEUE_H
+
+#include "indexer.h"
+
+/*
+ * A simple request queue which will handle new requests in the order in which they are received,
+ * and will attempt to handle requeued requests before new ones. However, the nature of the
+ * implementation means that it cannot guarantee this ordering; the prioritization is merely a
+ * hint.
+ */
+
+struct uds_request_queue;
+
+typedef void (*uds_request_queue_processor_fn)(struct uds_request *);
+
+int __must_check uds_make_request_queue(const char *queue_name,
+ uds_request_queue_processor_fn processor,
+ struct uds_request_queue **queue_ptr);
+
+void uds_request_queue_enqueue(struct uds_request_queue *queue,
+ struct uds_request *request);
+
+void uds_request_queue_finish(struct uds_request_queue *queue);
+
+#endif /* UDS_REQUEST_QUEUE_H */
diff --git a/drivers/md/dm-vdo/indexer/geometry.c b/drivers/md/dm-vdo/indexer/geometry.c
new file mode 100644
index 000000000000..c0575612e820
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/geometry.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "geometry.h"
+
+#include <linux/compiler.h>
+#include <linux/log2.h>
+
+#include "errors.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "delta-index.h"
+#include "indexer.h"
+
+/*
+ * An index volume is divided into a fixed number of fixed-size chapters, each consisting of a
+ * fixed number of fixed-size pages. The volume layout is defined by two constants and four
+ * parameters. The constants are that index records are 32 bytes long (16-byte block name plus
+ * 16-byte metadata) and that open chapter index hash slots are one byte long. The four parameters
+ * are the number of bytes in a page, the number of record pages in a chapter, the number of
+ * chapters in a volume, and the number of chapters that are sparse. From these parameters, we can
+ * derive the rest of the layout and other index properties.
+ *
+ * The index volume is sized by its maximum memory footprint. For a dense index, the persistent
+ * storage is about 10 times the size of the memory footprint. For a sparse index, the persistent
+ * storage is about 100 times the size of the memory footprint.
+ *
+ * For a small index with a memory footprint less than 1GB, there are three possible memory
+ * configurations: 0.25GB, 0.5GB and 0.75GB. The default geometry for each is 1024 index records
+ * per 32 KB page, 1024 chapters per volume, and either 64, 128, or 192 record pages per chapter
+ * (resulting in 6, 13, or 20 index pages per chapter) depending on the memory configuration. For
+ * the VDO default of a 0.25 GB index, this yields a deduplication window of 256 GB using about 2.5
+ * GB for the persistent storage and 256 MB of RAM.
+ *
+ * For a larger index with a memory footprint that is a multiple of 1 GB, the geometry is 1024
+ * index records per 32 KB page, 256 record pages per chapter, 26 index pages per chapter, and 1024
+ * chapters for every GB of memory footprint. For a 1 GB volume, this yields a deduplication window
+ * of 1 TB using about 9GB of persistent storage and 1 GB of RAM.
+ *
+ * The above numbers hold for volumes which have no sparse chapters. A sparse volume has 10 times
+ * as many chapters as the corresponding non-sparse volume, which provides 10 times the
+ * deduplication window while using 10 times as much persistent storage as the equivalent
+ * non-sparse volume with the same memory footprint.
+ *
+ * If the volume has been converted from a non-lvm format to an lvm volume, the number of chapters
+ * per volume will have been reduced by one by eliminating physical chapter 0, and the virtual
+ * chapter that formerly mapped to physical chapter 0 may be remapped to another physical chapter.
+ * This remapping is expressed by storing which virtual chapter was remapped, and which physical
+ * chapter it was moved to.
+ */
+
+int uds_make_index_geometry(size_t bytes_per_page, u32 record_pages_per_chapter,
+ u32 chapters_per_volume, u32 sparse_chapters_per_volume,
+ u64 remapped_virtual, u64 remapped_physical,
+ struct index_geometry **geometry_ptr)
+{
+ int result;
+ struct index_geometry *geometry;
+
+ result = vdo_allocate(1, struct index_geometry, "geometry", &geometry);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ geometry->bytes_per_page = bytes_per_page;
+ geometry->record_pages_per_chapter = record_pages_per_chapter;
+ geometry->chapters_per_volume = chapters_per_volume;
+ geometry->sparse_chapters_per_volume = sparse_chapters_per_volume;
+ geometry->dense_chapters_per_volume = chapters_per_volume - sparse_chapters_per_volume;
+ geometry->remapped_virtual = remapped_virtual;
+ geometry->remapped_physical = remapped_physical;
+
+ geometry->records_per_page = bytes_per_page / BYTES_PER_RECORD;
+ geometry->records_per_chapter = geometry->records_per_page * record_pages_per_chapter;
+ geometry->records_per_volume = (u64) geometry->records_per_chapter * chapters_per_volume;
+
+ geometry->chapter_mean_delta = 1 << DEFAULT_CHAPTER_MEAN_DELTA_BITS;
+ geometry->chapter_payload_bits = bits_per(record_pages_per_chapter - 1);
+ /*
+ * We want 1 delta list for every 64 records in the chapter.
+ * The "| 077" ensures that the chapter_delta_list_bits computation
+ * does not underflow.
+ */
+ geometry->chapter_delta_list_bits =
+ bits_per((geometry->records_per_chapter - 1) | 077) - 6;
+ geometry->delta_lists_per_chapter = 1 << geometry->chapter_delta_list_bits;
+ /* We need enough address bits to achieve the desired mean delta. */
+ geometry->chapter_address_bits =
+ (DEFAULT_CHAPTER_MEAN_DELTA_BITS -
+ geometry->chapter_delta_list_bits +
+ bits_per(geometry->records_per_chapter - 1));
+ geometry->index_pages_per_chapter =
+ uds_get_delta_index_page_count(geometry->records_per_chapter,
+ geometry->delta_lists_per_chapter,
+ geometry->chapter_mean_delta,
+ geometry->chapter_payload_bits,
+ bytes_per_page);
+
+ geometry->pages_per_chapter = geometry->index_pages_per_chapter + record_pages_per_chapter;
+ geometry->pages_per_volume = geometry->pages_per_chapter * chapters_per_volume;
+ geometry->bytes_per_volume =
+ bytes_per_page * (geometry->pages_per_volume + HEADER_PAGES_PER_VOLUME);
+
+ *geometry_ptr = geometry;
+ return UDS_SUCCESS;
+}
+
+int uds_copy_index_geometry(struct index_geometry *source,
+ struct index_geometry **geometry_ptr)
+{
+ return uds_make_index_geometry(source->bytes_per_page,
+ source->record_pages_per_chapter,
+ source->chapters_per_volume,
+ source->sparse_chapters_per_volume,
+ source->remapped_virtual, source->remapped_physical,
+ geometry_ptr);
+}
+
+void uds_free_index_geometry(struct index_geometry *geometry)
+{
+ vdo_free(geometry);
+}
+
+u32 __must_check uds_map_to_physical_chapter(const struct index_geometry *geometry,
+ u64 virtual_chapter)
+{
+ u64 delta;
+
+ if (!uds_is_reduced_index_geometry(geometry))
+ return virtual_chapter % geometry->chapters_per_volume;
+
+ if (likely(virtual_chapter > geometry->remapped_virtual)) {
+ delta = virtual_chapter - geometry->remapped_virtual;
+ if (likely(delta > geometry->remapped_physical))
+ return delta % geometry->chapters_per_volume;
+ else
+ return delta - 1;
+ }
+
+ if (virtual_chapter == geometry->remapped_virtual)
+ return geometry->remapped_physical;
+
+ delta = geometry->remapped_virtual - virtual_chapter;
+ if (delta < geometry->chapters_per_volume)
+ return geometry->chapters_per_volume - delta;
+
+ /* This chapter is so old the answer doesn't matter. */
+ return 0;
+}
+
+/* Check whether any sparse chapters are in use. */
+bool uds_has_sparse_chapters(const struct index_geometry *geometry,
+ u64 oldest_virtual_chapter, u64 newest_virtual_chapter)
+{
+ return uds_is_sparse_index_geometry(geometry) &&
+ ((newest_virtual_chapter - oldest_virtual_chapter + 1) >
+ geometry->dense_chapters_per_volume);
+}
+
+bool uds_is_chapter_sparse(const struct index_geometry *geometry,
+ u64 oldest_virtual_chapter, u64 newest_virtual_chapter,
+ u64 virtual_chapter_number)
+{
+ return uds_has_sparse_chapters(geometry, oldest_virtual_chapter,
+ newest_virtual_chapter) &&
+ ((virtual_chapter_number + geometry->dense_chapters_per_volume) <=
+ newest_virtual_chapter);
+}
+
+/* Calculate how many chapters to expire after opening the newest chapter. */
+u32 uds_chapters_to_expire(const struct index_geometry *geometry, u64 newest_chapter)
+{
+ /* If the index isn't full yet, don't expire anything. */
+ if (newest_chapter < geometry->chapters_per_volume)
+ return 0;
+
+ /* If a chapter is out of order... */
+ if (geometry->remapped_physical > 0) {
+ u64 oldest_chapter = newest_chapter - geometry->chapters_per_volume;
+
+ /*
+ * ... expire an extra chapter when expiring the moved chapter to free physical
+ * space for the new chapter ...
+ */
+ if (oldest_chapter == geometry->remapped_virtual)
+ return 2;
+
+ /*
+ * ... but don't expire anything when the new chapter will use the physical chapter
+ * freed by expiring the moved chapter.
+ */
+ if (oldest_chapter == (geometry->remapped_virtual + geometry->remapped_physical))
+ return 0;
+ }
+
+ /* Normally, just expire one. */
+ return 1;
+}
diff --git a/drivers/md/dm-vdo/indexer/geometry.h b/drivers/md/dm-vdo/indexer/geometry.h
new file mode 100644
index 000000000000..a2ecdb238cf2
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/geometry.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_INDEX_GEOMETRY_H
+#define UDS_INDEX_GEOMETRY_H
+
+#include "indexer.h"
+
+/*
+ * The index_geometry records parameters that define the layout of a UDS index volume, and the size and
+ * shape of various index structures. It is created when the index is created, and is referenced by
+ * many index sub-components.
+ */
+
+struct index_geometry {
+ /* Size of a chapter page, in bytes */
+ size_t bytes_per_page;
+ /* Number of record pages in a chapter */
+ u32 record_pages_per_chapter;
+ /* Total number of chapters in a volume */
+ u32 chapters_per_volume;
+ /* Number of sparsely-indexed chapters in a volume */
+ u32 sparse_chapters_per_volume;
+ /* Number of bits used to determine delta list numbers */
+ u8 chapter_delta_list_bits;
+ /* Virtual chapter remapped from physical chapter 0 */
+ u64 remapped_virtual;
+ /* New physical chapter where the remapped chapter can be found */
+ u64 remapped_physical;
+
+ /*
+ * The following properties are derived from the ones above, but they are computed and
+ * recorded as fields for convenience.
+ */
+ /* Total number of pages in a volume, excluding the header */
+ u32 pages_per_volume;
+ /* Total number of bytes in a volume, including the header */
+ size_t bytes_per_volume;
+ /* Number of pages in a chapter */
+ u32 pages_per_chapter;
+ /* Number of index pages in a chapter index */
+ u32 index_pages_per_chapter;
+ /* Number of records that fit on a page */
+ u32 records_per_page;
+ /* Number of records that fit in a chapter */
+ u32 records_per_chapter;
+ /* Number of records that fit in a volume */
+ u64 records_per_volume;
+ /* Number of delta lists per chapter index */
+ u32 delta_lists_per_chapter;
+ /* Mean delta for chapter indexes */
+ u32 chapter_mean_delta;
+ /* Number of bits needed for record page numbers */
+ u8 chapter_payload_bits;
+ /* Number of bits used to compute addresses for chapter delta lists */
+ u8 chapter_address_bits;
+ /* Number of densely-indexed chapters in a volume */
+ u32 dense_chapters_per_volume;
+};
+
+enum {
+ /* The number of bytes in a record (name + metadata) */
+ BYTES_PER_RECORD = (UDS_RECORD_NAME_SIZE + UDS_RECORD_DATA_SIZE),
+
+ /* The default length of a page in a chapter, in bytes */
+ DEFAULT_BYTES_PER_PAGE = 1024 * BYTES_PER_RECORD,
+
+ /* The default maximum number of records per page */
+ DEFAULT_RECORDS_PER_PAGE = DEFAULT_BYTES_PER_PAGE / BYTES_PER_RECORD,
+
+ /* The default number of record pages in a chapter */
+ DEFAULT_RECORD_PAGES_PER_CHAPTER = 256,
+
+ /* The default number of record pages in a chapter for a small index */
+ SMALL_RECORD_PAGES_PER_CHAPTER = 64,
+
+ /* The default number of chapters in a volume */
+ DEFAULT_CHAPTERS_PER_VOLUME = 1024,
+
+ /* The default number of sparsely-indexed chapters in a volume */
+ DEFAULT_SPARSE_CHAPTERS_PER_VOLUME = 0,
+
+ /* The log2 of the default mean delta */
+ DEFAULT_CHAPTER_MEAN_DELTA_BITS = 16,
+
+ /* The log2 of the number of delta lists in a large chapter */
+ DEFAULT_CHAPTER_DELTA_LIST_BITS = 12,
+
+ /* The log2 of the number of delta lists in a small chapter */
+ SMALL_CHAPTER_DELTA_LIST_BITS = 10,
+
+ /* The number of header pages per volume */
+ HEADER_PAGES_PER_VOLUME = 1,
+};
+
+int __must_check uds_make_index_geometry(size_t bytes_per_page, u32 record_pages_per_chapter,
+ u32 chapters_per_volume,
+ u32 sparse_chapters_per_volume, u64 remapped_virtual,
+ u64 remapped_physical,
+ struct index_geometry **geometry_ptr);
+
+int __must_check uds_copy_index_geometry(struct index_geometry *source,
+ struct index_geometry **geometry_ptr);
+
+void uds_free_index_geometry(struct index_geometry *geometry);
+
+u32 __must_check uds_map_to_physical_chapter(const struct index_geometry *geometry,
+ u64 virtual_chapter);
+
+/*
+ * Check whether this geometry is reduced by a chapter. This will only be true if the volume was
+ * converted from a non-lvm volume to an lvm volume.
+ */
+static inline bool __must_check
+uds_is_reduced_index_geometry(const struct index_geometry *geometry)
+{
+ return !!(geometry->chapters_per_volume & 1);
+}
+
+static inline bool __must_check
+uds_is_sparse_index_geometry(const struct index_geometry *geometry)
+{
+ return geometry->sparse_chapters_per_volume > 0;
+}
+
+bool __must_check uds_has_sparse_chapters(const struct index_geometry *geometry,
+ u64 oldest_virtual_chapter,
+ u64 newest_virtual_chapter);
+
+bool __must_check uds_is_chapter_sparse(const struct index_geometry *geometry,
+ u64 oldest_virtual_chapter,
+ u64 newest_virtual_chapter,
+ u64 virtual_chapter_number);
+
+u32 __must_check uds_chapters_to_expire(const struct index_geometry *geometry,
+ u64 newest_chapter);
+
+#endif /* UDS_INDEX_GEOMETRY_H */
diff --git a/drivers/md/dm-vdo/indexer/hash-utils.h b/drivers/md/dm-vdo/indexer/hash-utils.h
new file mode 100644
index 000000000000..6a8dd8ffea6c
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/hash-utils.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_HASH_UTILS_H
+#define UDS_HASH_UTILS_H
+
+#include "numeric.h"
+
+#include "geometry.h"
+#include "indexer.h"
+
+/* Utilities for extracting portions of a request name for various uses. */
+
+/* How various portions of a record name are apportioned. */
+enum {
+ VOLUME_INDEX_BYTES_OFFSET = 0,
+ VOLUME_INDEX_BYTES_COUNT = 8,
+ CHAPTER_INDEX_BYTES_OFFSET = 8,
+ CHAPTER_INDEX_BYTES_COUNT = 6,
+ SAMPLE_BYTES_OFFSET = 14,
+ SAMPLE_BYTES_COUNT = 2,
+};
+
+static inline u64 uds_extract_chapter_index_bytes(const struct uds_record_name *name)
+{
+ const u8 *chapter_bits = &name->name[CHAPTER_INDEX_BYTES_OFFSET];
+ u64 bytes = (u64) get_unaligned_be16(chapter_bits) << 32;
+
+ bytes |= get_unaligned_be32(chapter_bits + 2);
+ return bytes;
+}
+
+static inline u64 uds_extract_volume_index_bytes(const struct uds_record_name *name)
+{
+ return get_unaligned_be64(&name->name[VOLUME_INDEX_BYTES_OFFSET]);
+}
+
+static inline u32 uds_extract_sampling_bytes(const struct uds_record_name *name)
+{
+ return get_unaligned_be16(&name->name[SAMPLE_BYTES_OFFSET]);
+}
+
+/* Compute the chapter delta list for a given name. */
+static inline u32 uds_hash_to_chapter_delta_list(const struct uds_record_name *name,
+ const struct index_geometry *geometry)
+{
+ return ((uds_extract_chapter_index_bytes(name) >> geometry->chapter_address_bits) &
+ ((1 << geometry->chapter_delta_list_bits) - 1));
+}
+
+/* Compute the chapter delta address for a given name. */
+static inline u32 uds_hash_to_chapter_delta_address(const struct uds_record_name *name,
+ const struct index_geometry *geometry)
+{
+ return uds_extract_chapter_index_bytes(name) & ((1 << geometry->chapter_address_bits) - 1);
+}
+
+static inline unsigned int uds_name_to_hash_slot(const struct uds_record_name *name,
+ unsigned int slot_count)
+{
+ return (unsigned int) (uds_extract_chapter_index_bytes(name) % slot_count);
+}
+
+#endif /* UDS_HASH_UTILS_H */
diff --git a/drivers/md/dm-vdo/indexer/index-layout.c b/drivers/md/dm-vdo/indexer/index-layout.c
new file mode 100644
index 000000000000..627adc24af3b
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/index-layout.c
@@ -0,0 +1,1765 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "index-layout.h"
+
+#include <linux/random.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "murmurhash3.h"
+#include "numeric.h"
+#include "time-utils.h"
+
+#include "config.h"
+#include "open-chapter.h"
+#include "volume-index.h"
+
+/*
+ * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
+ * which are computed when the index is created. Every header and region begins on 4K block
+ * boundary. Save regions are further sub-divided into regions of their own.
+ *
+ * Each region has a kind and an instance number. Some kinds only have one instance and therefore
+ * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
+ * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
+ * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
+ *
+ * Every region header has a type and version.
+ *
+ * +-+-+---------+--------+--------+-+
+ * | | | I N D E X 0 101, 0 | |
+ * |H|C+---------+--------+--------+S|
+ * |D|f| Volume | Save | Save |e|
+ * |R|g| Region | Region | Region |a|
+ * | | | 201, -1 | 202, 0 | 202, 1 |l|
+ * +-+-+--------+---------+--------+-+
+ *
+ * The header contains the encoded region layout table as well as some index configuration data.
+ * The sub-index region and its subdivisions are maintained in the same table.
+ *
+ * There are two save regions to preserve the old state in case saving the new state is incomplete.
+ * They are used in alternation. Each save region is further divided into sub-regions.
+ *
+ * +-+-----+------+------+-----+-----+
+ * |H| IPM | MI | MI | | OC |
+ * |D| | zone | zone | ... | |
+ * |R| 301 | 302 | 302 | | 303 |
+ * | | -1 | 0 | 1 | | -1 |
+ * +-+-----+------+------+-----+-----+
+ *
+ * The header contains the encoded region layout table as well as index state data for that save.
+ * Each save also has a unique nonce.
+ */
+
+#define MAGIC_SIZE 32
+#define NONCE_INFO_SIZE 32
+#define MAX_SAVES 2
+
+enum region_kind {
+ RL_KIND_EMPTY = 0,
+ RL_KIND_HEADER = 1,
+ RL_KIND_CONFIG = 100,
+ RL_KIND_INDEX = 101,
+ RL_KIND_SEAL = 102,
+ RL_KIND_VOLUME = 201,
+ RL_KIND_SAVE = 202,
+ RL_KIND_INDEX_PAGE_MAP = 301,
+ RL_KIND_VOLUME_INDEX = 302,
+ RL_KIND_OPEN_CHAPTER = 303,
+};
+
+/* Some region types are historical and are no longer used. */
+enum region_type {
+ RH_TYPE_FREE = 0, /* unused */
+ RH_TYPE_SUPER = 1,
+ RH_TYPE_SAVE = 2,
+ RH_TYPE_CHECKPOINT = 3, /* unused */
+ RH_TYPE_UNSAVED = 4,
+};
+
+#define RL_SOLE_INSTANCE 65535
+
+/*
+ * Super block version 2 is the first released version.
+ *
+ * Super block version 3 is the normal version used from RHEL 8.2 onwards.
+ *
+ * Super block versions 4 through 6 were incremental development versions and
+ * are not supported.
+ *
+ * Super block version 7 is used for volumes which have been reduced in size by one chapter in
+ * order to make room to prepend LVM metadata to a volume originally created without lvm. This
+ * allows the index to retain most its deduplication records.
+ */
+#define SUPER_VERSION_MINIMUM 3
+#define SUPER_VERSION_CURRENT 3
+#define SUPER_VERSION_MAXIMUM 7
+
+static const u8 LAYOUT_MAGIC[MAGIC_SIZE] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
+static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
+
+struct region_header {
+ u64 magic;
+ u64 region_blocks;
+ u16 type;
+ /* Currently always version 1 */
+ u16 version;
+ u16 region_count;
+ u16 payload;
+};
+
+struct layout_region {
+ u64 start_block;
+ u64 block_count;
+ u32 __unused;
+ u16 kind;
+ u16 instance;
+};
+
+struct region_table {
+ size_t encoded_size;
+ struct region_header header;
+ struct layout_region regions[];
+};
+
+struct index_save_data {
+ u64 timestamp;
+ u64 nonce;
+ /* Currently always version 1 */
+ u32 version;
+ u32 unused__;
+};
+
+struct index_state_version {
+ s32 signature;
+ s32 version_id;
+};
+
+static const struct index_state_version INDEX_STATE_VERSION_301 = {
+ .signature = -1,
+ .version_id = 301,
+};
+
+struct index_state_data301 {
+ struct index_state_version version;
+ u64 newest_chapter;
+ u64 oldest_chapter;
+ u64 last_save;
+ u32 unused;
+ u32 padding;
+};
+
+struct index_save_layout {
+ unsigned int zone_count;
+ struct layout_region index_save;
+ struct layout_region header;
+ struct layout_region index_page_map;
+ struct layout_region free_space;
+ struct layout_region volume_index_zones[MAX_ZONES];
+ struct layout_region open_chapter;
+ struct index_save_data save_data;
+ struct index_state_data301 state_data;
+};
+
+struct sub_index_layout {
+ u64 nonce;
+ struct layout_region sub_index;
+ struct layout_region volume;
+ struct index_save_layout *saves;
+};
+
+struct super_block_data {
+ u8 magic_label[MAGIC_SIZE];
+ u8 nonce_info[NONCE_INFO_SIZE];
+ u64 nonce;
+ u32 version;
+ u32 block_size;
+ u16 index_count;
+ u16 max_saves;
+ /* Padding reflects a blank field on permanent storage */
+ u8 padding[4];
+ u64 open_chapter_blocks;
+ u64 page_map_blocks;
+ u64 volume_offset;
+ u64 start_offset;
+};
+
+struct index_layout {
+ struct io_factory *factory;
+ size_t factory_size;
+ off_t offset;
+ struct super_block_data super;
+ struct layout_region header;
+ struct layout_region config;
+ struct sub_index_layout index;
+ struct layout_region seal;
+ u64 total_blocks;
+};
+
+struct save_layout_sizes {
+ unsigned int save_count;
+ size_t block_size;
+ u64 volume_blocks;
+ u64 volume_index_blocks;
+ u64 page_map_blocks;
+ u64 open_chapter_blocks;
+ u64 save_blocks;
+ u64 sub_index_blocks;
+ u64 total_blocks;
+ size_t total_size;
+};
+
+static inline bool is_converted_super_block(struct super_block_data *super)
+{
+ return super->version == 7;
+}
+
+static int __must_check compute_sizes(const struct uds_configuration *config,
+ struct save_layout_sizes *sls)
+{
+ int result;
+ struct index_geometry *geometry = config->geometry;
+
+ memset(sls, 0, sizeof(*sls));
+ sls->save_count = MAX_SAVES;
+ sls->block_size = UDS_BLOCK_SIZE;
+ sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
+
+ result = uds_compute_volume_index_save_blocks(config, sls->block_size,
+ &sls->volume_index_blocks);
+ if (result != UDS_SUCCESS)
+ return vdo_log_error_strerror(result, "cannot compute index save size");
+
+ sls->page_map_blocks =
+ DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
+ sls->block_size);
+ sls->open_chapter_blocks =
+ DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
+ sls->block_size);
+ sls->save_blocks =
+ 1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
+ sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
+ sls->total_blocks = 3 + sls->sub_index_blocks;
+ sls->total_size = sls->total_blocks * sls->block_size;
+
+ return UDS_SUCCESS;
+}
+
+int uds_compute_index_size(const struct uds_parameters *parameters, u64 *index_size)
+{
+ int result;
+ struct uds_configuration *index_config;
+ struct save_layout_sizes sizes;
+
+ if (index_size == NULL) {
+ vdo_log_error("Missing output size pointer");
+ return -EINVAL;
+ }
+
+ result = uds_make_configuration(parameters, &index_config);
+ if (result != UDS_SUCCESS) {
+ vdo_log_error_strerror(result, "cannot compute index size");
+ return uds_status_to_errno(result);
+ }
+
+ result = compute_sizes(index_config, &sizes);
+ uds_free_configuration(index_config);
+ if (result != UDS_SUCCESS)
+ return uds_status_to_errno(result);
+
+ *index_size = sizes.total_size;
+ return UDS_SUCCESS;
+}
+
+/* Create unique data using the current time and a pseudorandom number. */
+static void create_unique_nonce_data(u8 *buffer)
+{
+ ktime_t now = current_time_ns(CLOCK_REALTIME);
+ u32 rand;
+ size_t offset = 0;
+
+ get_random_bytes(&rand, sizeof(u32));
+ memcpy(buffer + offset, &now, sizeof(now));
+ offset += sizeof(now);
+ memcpy(buffer + offset, &rand, sizeof(rand));
+ offset += sizeof(rand);
+ while (offset < NONCE_INFO_SIZE) {
+ size_t len = min(NONCE_INFO_SIZE - offset, offset);
+
+ memcpy(buffer + offset, buffer, len);
+ offset += len;
+ }
+}
+
+static u64 hash_stuff(u64 start, const void *data, size_t len)
+{
+ u32 seed = start ^ (start >> 27);
+ u8 hash_buffer[16];
+
+ murmurhash3_128(data, len, seed, hash_buffer);
+ return get_unaligned_le64(hash_buffer + 4);
+}
+
+/* Generate a primary nonce from the provided data. */
+static u64 generate_primary_nonce(const void *data, size_t len)
+{
+ return hash_stuff(0xa1b1e0fc, data, len);
+}
+
+/*
+ * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
+ * hashing the original nonce and the data to produce a new nonce.
+ */
+static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
+{
+ return hash_stuff(nonce + 1, data, len);
+}
+
+static int __must_check open_layout_reader(struct index_layout *layout,
+ struct layout_region *lr, off_t offset,
+ struct buffered_reader **reader_ptr)
+{
+ return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
+ lr->block_count, reader_ptr);
+}
+
+static int open_region_reader(struct index_layout *layout, struct layout_region *region,
+ struct buffered_reader **reader_ptr)
+{
+ return open_layout_reader(layout, region, -layout->super.start_offset,
+ reader_ptr);
+}
+
+static int __must_check open_layout_writer(struct index_layout *layout,
+ struct layout_region *lr, off_t offset,
+ struct buffered_writer **writer_ptr)
+{
+ return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
+ lr->block_count, writer_ptr);
+}
+
+static int open_region_writer(struct index_layout *layout, struct layout_region *region,
+ struct buffered_writer **writer_ptr)
+{
+ return open_layout_writer(layout, region, -layout->super.start_offset,
+ writer_ptr);
+}
+
+static void generate_super_block_data(struct save_layout_sizes *sls,
+ struct super_block_data *super)
+{
+ memset(super, 0, sizeof(*super));
+ memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
+ create_unique_nonce_data(super->nonce_info);
+
+ super->nonce = generate_primary_nonce(super->nonce_info,
+ sizeof(super->nonce_info));
+ super->version = SUPER_VERSION_CURRENT;
+ super->block_size = sls->block_size;
+ super->index_count = 1;
+ super->max_saves = sls->save_count;
+ super->open_chapter_blocks = sls->open_chapter_blocks;
+ super->page_map_blocks = sls->page_map_blocks;
+ super->volume_offset = 0;
+ super->start_offset = 0;
+}
+
+static void define_sub_index_nonce(struct index_layout *layout)
+{
+ struct sub_index_nonce_data {
+ u64 offset;
+ u16 index_id;
+ };
+ struct sub_index_layout *sil = &layout->index;
+ u64 primary_nonce = layout->super.nonce;
+ u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
+ size_t offset = 0;
+
+ encode_u64_le(buffer, &offset, sil->sub_index.start_block);
+ encode_u16_le(buffer, &offset, 0);
+ sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
+ if (sil->nonce == 0) {
+ sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
+ sizeof(buffer));
+ }
+}
+
+static void setup_sub_index(struct index_layout *layout, u64 start_block,
+ struct save_layout_sizes *sls)
+{
+ struct sub_index_layout *sil = &layout->index;
+ u64 next_block = start_block;
+ unsigned int i;
+
+ sil->sub_index = (struct layout_region) {
+ .start_block = start_block,
+ .block_count = sls->sub_index_blocks,
+ .kind = RL_KIND_INDEX,
+ .instance = 0,
+ };
+
+ sil->volume = (struct layout_region) {
+ .start_block = next_block,
+ .block_count = sls->volume_blocks,
+ .kind = RL_KIND_VOLUME,
+ .instance = RL_SOLE_INSTANCE,
+ };
+
+ next_block += sls->volume_blocks;
+
+ for (i = 0; i < sls->save_count; i++) {
+ sil->saves[i].index_save = (struct layout_region) {
+ .start_block = next_block,
+ .block_count = sls->save_blocks,
+ .kind = RL_KIND_SAVE,
+ .instance = i,
+ };
+
+ next_block += sls->save_blocks;
+ }
+
+ define_sub_index_nonce(layout);
+}
+
+static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
+{
+ u64 next_block = layout->offset / sls->block_size;
+
+ layout->total_blocks = sls->total_blocks;
+ generate_super_block_data(sls, &layout->super);
+ layout->header = (struct layout_region) {
+ .start_block = next_block++,
+ .block_count = 1,
+ .kind = RL_KIND_HEADER,
+ .instance = RL_SOLE_INSTANCE,
+ };
+
+ layout->config = (struct layout_region) {
+ .start_block = next_block++,
+ .block_count = 1,
+ .kind = RL_KIND_CONFIG,
+ .instance = RL_SOLE_INSTANCE,
+ };
+
+ setup_sub_index(layout, next_block, sls);
+ next_block += sls->sub_index_blocks;
+
+ layout->seal = (struct layout_region) {
+ .start_block = next_block,
+ .block_count = 1,
+ .kind = RL_KIND_SEAL,
+ .instance = RL_SOLE_INSTANCE,
+ };
+}
+
+static int __must_check make_index_save_region_table(struct index_save_layout *isl,
+ struct region_table **table_ptr)
+{
+ int result;
+ unsigned int z;
+ struct region_table *table;
+ struct layout_region *lr;
+ u16 region_count;
+ size_t payload;
+ size_t type;
+
+ if (isl->zone_count > 0) {
+ /*
+ * Normal save regions: header, page map, volume index zones,
+ * open chapter, and possibly free space.
+ */
+ region_count = 3 + isl->zone_count;
+ if (isl->free_space.block_count > 0)
+ region_count++;
+
+ payload = sizeof(isl->save_data) + sizeof(isl->state_data);
+ type = RH_TYPE_SAVE;
+ } else {
+ /* Empty save regions: header, page map, free space. */
+ region_count = 3;
+ payload = sizeof(isl->save_data);
+ type = RH_TYPE_UNSAVED;
+ }
+
+ result = vdo_allocate_extended(struct region_table, region_count,
+ struct layout_region,
+ "layout region table for ISL", &table);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ lr = &table->regions[0];
+ *lr++ = isl->header;
+ *lr++ = isl->index_page_map;
+ for (z = 0; z < isl->zone_count; z++)
+ *lr++ = isl->volume_index_zones[z];
+
+ if (isl->zone_count > 0)
+ *lr++ = isl->open_chapter;
+
+ if (isl->free_space.block_count > 0)
+ *lr++ = isl->free_space;
+
+ table->header = (struct region_header) {
+ .magic = REGION_MAGIC,
+ .region_blocks = isl->index_save.block_count,
+ .type = type,
+ .version = 1,
+ .region_count = region_count,
+ .payload = payload,
+ };
+
+ table->encoded_size = (sizeof(struct region_header) + payload +
+ region_count * sizeof(struct layout_region));
+ *table_ptr = table;
+ return UDS_SUCCESS;
+}
+
+static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
+{
+ unsigned int i;
+
+ encode_u64_le(buffer, offset, REGION_MAGIC);
+ encode_u64_le(buffer, offset, table->header.region_blocks);
+ encode_u16_le(buffer, offset, table->header.type);
+ encode_u16_le(buffer, offset, table->header.version);
+ encode_u16_le(buffer, offset, table->header.region_count);
+ encode_u16_le(buffer, offset, table->header.payload);
+
+ for (i = 0; i < table->header.region_count; i++) {
+ encode_u64_le(buffer, offset, table->regions[i].start_block);
+ encode_u64_le(buffer, offset, table->regions[i].block_count);
+ encode_u32_le(buffer, offset, 0);
+ encode_u16_le(buffer, offset, table->regions[i].kind);
+ encode_u16_le(buffer, offset, table->regions[i].instance);
+ }
+}
+
+static int __must_check write_index_save_header(struct index_save_layout *isl,
+ struct region_table *table,
+ struct buffered_writer *writer)
+{
+ int result;
+ u8 *buffer;
+ size_t offset = 0;
+
+ result = vdo_allocate(table->encoded_size, u8, "index save data", &buffer);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ encode_region_table(buffer, &offset, table);
+ encode_u64_le(buffer, &offset, isl->save_data.timestamp);
+ encode_u64_le(buffer, &offset, isl->save_data.nonce);
+ encode_u32_le(buffer, &offset, isl->save_data.version);
+ encode_u32_le(buffer, &offset, 0);
+ if (isl->zone_count > 0) {
+ encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
+ encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
+ encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
+ encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
+ encode_u64_le(buffer, &offset, isl->state_data.last_save);
+ encode_u64_le(buffer, &offset, 0);
+ }
+
+ result = uds_write_to_buffered_writer(writer, buffer, offset);
+ vdo_free(buffer);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ return uds_flush_buffered_writer(writer);
+}
+
+static int write_index_save_layout(struct index_layout *layout,
+ struct index_save_layout *isl)
+{
+ int result;
+ struct region_table *table;
+ struct buffered_writer *writer;
+
+ result = make_index_save_region_table(isl, &table);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = open_region_writer(layout, &isl->header, &writer);
+ if (result != UDS_SUCCESS) {
+ vdo_free(table);
+ return result;
+ }
+
+ result = write_index_save_header(isl, table, writer);
+ vdo_free(table);
+ uds_free_buffered_writer(writer);
+
+ return result;
+}
+
+static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
+{
+ u64 free_blocks;
+ u64 next_block = isl->index_save.start_block;
+
+ isl->zone_count = 0;
+ memset(&isl->save_data, 0, sizeof(isl->save_data));
+
+ isl->header = (struct layout_region) {
+ .start_block = next_block++,
+ .block_count = 1,
+ .kind = RL_KIND_HEADER,
+ .instance = RL_SOLE_INSTANCE,
+ };
+
+ isl->index_page_map = (struct layout_region) {
+ .start_block = next_block,
+ .block_count = page_map_blocks,
+ .kind = RL_KIND_INDEX_PAGE_MAP,
+ .instance = RL_SOLE_INSTANCE,
+ };
+
+ next_block += page_map_blocks;
+
+ free_blocks = isl->index_save.block_count - page_map_blocks - 1;
+ isl->free_space = (struct layout_region) {
+ .start_block = next_block,
+ .block_count = free_blocks,
+ .kind = RL_KIND_EMPTY,
+ .instance = RL_SOLE_INSTANCE,
+ };
+}
+
+static int __must_check invalidate_old_save(struct index_layout *layout,
+ struct index_save_layout *isl)
+{
+ reset_index_save_layout(isl, layout->super.page_map_blocks);
+ return write_index_save_layout(layout, isl);
+}
+
+static int discard_index_state_data(struct index_layout *layout)
+{
+ int result;
+ int saved_result = UDS_SUCCESS;
+ unsigned int i;
+
+ for (i = 0; i < layout->super.max_saves; i++) {
+ result = invalidate_old_save(layout, &layout->index.saves[i]);
+ if (result != UDS_SUCCESS)
+ saved_result = result;
+ }
+
+ if (saved_result != UDS_SUCCESS) {
+ return vdo_log_error_strerror(result,
+ "%s: cannot destroy all index saves",
+ __func__);
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int __must_check make_layout_region_table(struct index_layout *layout,
+ struct region_table **table_ptr)
+{
+ int result;
+ unsigned int i;
+ /* Regions: header, config, index, volume, saves, seal */
+ u16 region_count = 5 + layout->super.max_saves;
+ u16 payload;
+ struct region_table *table;
+ struct layout_region *lr;
+
+ result = vdo_allocate_extended(struct region_table, region_count,
+ struct layout_region, "layout region table",
+ &table);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ lr = &table->regions[0];
+ *lr++ = layout->header;
+ *lr++ = layout->config;
+ *lr++ = layout->index.sub_index;
+ *lr++ = layout->index.volume;
+
+ for (i = 0; i < layout->super.max_saves; i++)
+ *lr++ = layout->index.saves[i].index_save;
+
+ *lr++ = layout->seal;
+
+ if (is_converted_super_block(&layout->super)) {
+ payload = sizeof(struct super_block_data);
+ } else {
+ payload = (sizeof(struct super_block_data) -
+ sizeof(layout->super.volume_offset) -
+ sizeof(layout->super.start_offset));
+ }
+
+ table->header = (struct region_header) {
+ .magic = REGION_MAGIC,
+ .region_blocks = layout->total_blocks,
+ .type = RH_TYPE_SUPER,
+ .version = 1,
+ .region_count = region_count,
+ .payload = payload,
+ };
+
+ table->encoded_size = (sizeof(struct region_header) + payload +
+ region_count * sizeof(struct layout_region));
+ *table_ptr = table;
+ return UDS_SUCCESS;
+}
+
+static int __must_check write_layout_header(struct index_layout *layout,
+ struct region_table *table,
+ struct buffered_writer *writer)
+{
+ int result;
+ u8 *buffer;
+ size_t offset = 0;
+
+ result = vdo_allocate(table->encoded_size, u8, "layout data", &buffer);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ encode_region_table(buffer, &offset, table);
+ memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
+ offset += MAGIC_SIZE;
+ memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
+ offset += NONCE_INFO_SIZE;
+ encode_u64_le(buffer, &offset, layout->super.nonce);
+ encode_u32_le(buffer, &offset, layout->super.version);
+ encode_u32_le(buffer, &offset, layout->super.block_size);
+ encode_u16_le(buffer, &offset, layout->super.index_count);
+ encode_u16_le(buffer, &offset, layout->super.max_saves);
+ encode_u32_le(buffer, &offset, 0);
+ encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
+ encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
+
+ if (is_converted_super_block(&layout->super)) {
+ encode_u64_le(buffer, &offset, layout->super.volume_offset);
+ encode_u64_le(buffer, &offset, layout->super.start_offset);
+ }
+
+ result = uds_write_to_buffered_writer(writer, buffer, offset);
+ vdo_free(buffer);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ return uds_flush_buffered_writer(writer);
+}
+
+static int __must_check write_uds_index_config(struct index_layout *layout,
+ struct uds_configuration *config,
+ off_t offset)
+{
+ int result;
+ struct buffered_writer *writer = NULL;
+
+ result = open_layout_writer(layout, &layout->config, offset, &writer);
+ if (result != UDS_SUCCESS)
+ return vdo_log_error_strerror(result, "failed to open config region");
+
+ result = uds_write_config_contents(writer, config, layout->super.version);
+ if (result != UDS_SUCCESS) {
+ uds_free_buffered_writer(writer);
+ return vdo_log_error_strerror(result, "failed to write config region");
+ }
+
+ result = uds_flush_buffered_writer(writer);
+ if (result != UDS_SUCCESS) {
+ uds_free_buffered_writer(writer);
+ return vdo_log_error_strerror(result, "cannot flush config writer");
+ }
+
+ uds_free_buffered_writer(writer);
+ return UDS_SUCCESS;
+}
+
+static int __must_check save_layout(struct index_layout *layout, off_t offset)
+{
+ int result;
+ struct buffered_writer *writer = NULL;
+ struct region_table *table;
+
+ result = make_layout_region_table(layout, &table);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = open_layout_writer(layout, &layout->header, offset, &writer);
+ if (result != UDS_SUCCESS) {
+ vdo_free(table);
+ return result;
+ }
+
+ result = write_layout_header(layout, table, writer);
+ vdo_free(table);
+ uds_free_buffered_writer(writer);
+
+ return result;
+}
+
+static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
+{
+ int result;
+ struct save_layout_sizes sizes;
+
+ result = compute_sizes(config, &sizes);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = vdo_allocate(sizes.save_count, struct index_save_layout, __func__,
+ &layout->index.saves);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ initialize_layout(layout, &sizes);
+
+ result = discard_index_state_data(layout);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = write_uds_index_config(layout, config, 0);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ return save_layout(layout, 0);
+}
+
+static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
+{
+ struct save_nonce_data {
+ struct index_save_data data;
+ u64 offset;
+ } nonce_data;
+ u8 buffer[sizeof(nonce_data)];
+ size_t offset = 0;
+
+ encode_u64_le(buffer, &offset, isl->save_data.timestamp);
+ encode_u64_le(buffer, &offset, 0);
+ encode_u32_le(buffer, &offset, isl->save_data.version);
+ encode_u32_le(buffer, &offset, 0U);
+ encode_u64_le(buffer, &offset, isl->index_save.start_block);
+ VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
+ "%zu bytes encoded of %zu expected",
+ offset, sizeof(nonce_data));
+ return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
+}
+
+static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
+{
+ if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
+ return 0;
+
+ if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
+ return 0;
+
+ return isl->save_data.timestamp;
+}
+
+static int find_latest_uds_index_save_slot(struct index_layout *layout,
+ struct index_save_layout **isl_ptr)
+{
+ struct index_save_layout *latest = NULL;
+ struct index_save_layout *isl;
+ unsigned int i;
+ u64 save_time = 0;
+ u64 latest_time = 0;
+
+ for (i = 0; i < layout->super.max_saves; i++) {
+ isl = &layout->index.saves[i];
+ save_time = validate_index_save_layout(isl, layout->index.nonce);
+ if (save_time > latest_time) {
+ latest = isl;
+ latest_time = save_time;
+ }
+ }
+
+ if (latest == NULL) {
+ vdo_log_error("No valid index save found");
+ return UDS_INDEX_NOT_SAVED_CLEANLY;
+ }
+
+ *isl_ptr = latest;
+ return UDS_SUCCESS;
+}
+
+int uds_discard_open_chapter(struct index_layout *layout)
+{
+ int result;
+ struct index_save_layout *isl;
+ struct buffered_writer *writer;
+
+ result = find_latest_uds_index_save_slot(layout, &isl);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = open_region_writer(layout, &isl->open_chapter, &writer);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
+ if (result != UDS_SUCCESS) {
+ uds_free_buffered_writer(writer);
+ return result;
+ }
+
+ result = uds_flush_buffered_writer(writer);
+ uds_free_buffered_writer(writer);
+ return result;
+}
+
+int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
+{
+ int result;
+ unsigned int zone;
+ struct index_save_layout *isl;
+ struct buffered_reader *readers[MAX_ZONES];
+
+ result = find_latest_uds_index_save_slot(layout, &isl);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ index->newest_virtual_chapter = isl->state_data.newest_chapter;
+ index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
+ index->last_save = isl->state_data.last_save;
+
+ result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = uds_load_open_chapter(index, readers[0]);
+ uds_free_buffered_reader(readers[0]);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ for (zone = 0; zone < isl->zone_count; zone++) {
+ result = open_region_reader(layout, &isl->volume_index_zones[zone],
+ &readers[zone]);
+ if (result != UDS_SUCCESS) {
+ for (; zone > 0; zone--)
+ uds_free_buffered_reader(readers[zone - 1]);
+
+ return result;
+ }
+ }
+
+ result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
+ for (zone = 0; zone < isl->zone_count; zone++)
+ uds_free_buffered_reader(readers[zone]);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
+ uds_free_buffered_reader(readers[0]);
+
+ return result;
+}
+
+static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
+{
+ struct index_save_layout *oldest = NULL;
+ struct index_save_layout *isl;
+ unsigned int i;
+ u64 save_time = 0;
+ u64 oldest_time = 0;
+
+ for (i = 0; i < layout->super.max_saves; i++) {
+ isl = &layout->index.saves[i];
+ save_time = validate_index_save_layout(isl, layout->index.nonce);
+ if (oldest == NULL || save_time < oldest_time) {
+ oldest = isl;
+ oldest_time = save_time;
+ }
+ }
+
+ return oldest;
+}
+
+static void instantiate_index_save_layout(struct index_save_layout *isl,
+ struct super_block_data *super,
+ u64 volume_nonce, unsigned int zone_count)
+{
+ unsigned int z;
+ u64 next_block;
+ u64 free_blocks;
+ u64 volume_index_blocks;
+
+ isl->zone_count = zone_count;
+ memset(&isl->save_data, 0, sizeof(isl->save_data));
+ isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
+ isl->save_data.version = 1;
+ isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
+
+ next_block = isl->index_save.start_block;
+ isl->header = (struct layout_region) {
+ .start_block = next_block++,
+ .block_count = 1,
+ .kind = RL_KIND_HEADER,
+ .instance = RL_SOLE_INSTANCE,
+ };
+
+ isl->index_page_map = (struct layout_region) {
+ .start_block = next_block,
+ .block_count = super->page_map_blocks,
+ .kind = RL_KIND_INDEX_PAGE_MAP,
+ .instance = RL_SOLE_INSTANCE,
+ };
+ next_block += super->page_map_blocks;
+
+ free_blocks = (isl->index_save.block_count - 1 -
+ super->page_map_blocks -
+ super->open_chapter_blocks);
+ volume_index_blocks = free_blocks / isl->zone_count;
+ for (z = 0; z < isl->zone_count; z++) {
+ isl->volume_index_zones[z] = (struct layout_region) {
+ .start_block = next_block,
+ .block_count = volume_index_blocks,
+ .kind = RL_KIND_VOLUME_INDEX,
+ .instance = z,
+ };
+
+ next_block += volume_index_blocks;
+ free_blocks -= volume_index_blocks;
+ }
+
+ isl->open_chapter = (struct layout_region) {
+ .start_block = next_block,
+ .block_count = super->open_chapter_blocks,
+ .kind = RL_KIND_OPEN_CHAPTER,
+ .instance = RL_SOLE_INSTANCE,
+ };
+
+ next_block += super->open_chapter_blocks;
+
+ isl->free_space = (struct layout_region) {
+ .start_block = next_block,
+ .block_count = free_blocks,
+ .kind = RL_KIND_EMPTY,
+ .instance = RL_SOLE_INSTANCE,
+ };
+}
+
+static int setup_uds_index_save_slot(struct index_layout *layout,
+ unsigned int zone_count,
+ struct index_save_layout **isl_ptr)
+{
+ int result;
+ struct index_save_layout *isl;
+
+ isl = select_oldest_index_save_layout(layout);
+ result = invalidate_old_save(layout, isl);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
+ zone_count);
+
+ *isl_ptr = isl;
+ return UDS_SUCCESS;
+}
+
+static void cancel_uds_index_save(struct index_save_layout *isl)
+{
+ memset(&isl->save_data, 0, sizeof(isl->save_data));
+ memset(&isl->state_data, 0, sizeof(isl->state_data));
+ isl->zone_count = 0;
+}
+
+int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
+{
+ int result;
+ unsigned int zone;
+ struct index_save_layout *isl;
+ struct buffered_writer *writers[MAX_ZONES];
+
+ result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ isl->state_data = (struct index_state_data301) {
+ .newest_chapter = index->newest_virtual_chapter,
+ .oldest_chapter = index->oldest_virtual_chapter,
+ .last_save = index->last_save,
+ };
+
+ result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
+ if (result != UDS_SUCCESS) {
+ cancel_uds_index_save(isl);
+ return result;
+ }
+
+ result = uds_save_open_chapter(index, writers[0]);
+ uds_free_buffered_writer(writers[0]);
+ if (result != UDS_SUCCESS) {
+ cancel_uds_index_save(isl);
+ return result;
+ }
+
+ for (zone = 0; zone < index->zone_count; zone++) {
+ result = open_region_writer(layout, &isl->volume_index_zones[zone],
+ &writers[zone]);
+ if (result != UDS_SUCCESS) {
+ for (; zone > 0; zone--)
+ uds_free_buffered_writer(writers[zone - 1]);
+
+ cancel_uds_index_save(isl);
+ return result;
+ }
+ }
+
+ result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
+ for (zone = 0; zone < index->zone_count; zone++)
+ uds_free_buffered_writer(writers[zone]);
+ if (result != UDS_SUCCESS) {
+ cancel_uds_index_save(isl);
+ return result;
+ }
+
+ result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
+ if (result != UDS_SUCCESS) {
+ cancel_uds_index_save(isl);
+ return result;
+ }
+
+ result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
+ uds_free_buffered_writer(writers[0]);
+ if (result != UDS_SUCCESS) {
+ cancel_uds_index_save(isl);
+ return result;
+ }
+
+ return write_index_save_layout(layout, isl);
+}
+
+static int __must_check load_region_table(struct buffered_reader *reader,
+ struct region_table **table_ptr)
+{
+ int result;
+ unsigned int i;
+ struct region_header header;
+ struct region_table *table;
+ u8 buffer[sizeof(struct region_header)];
+ size_t offset = 0;
+
+ result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
+ if (result != UDS_SUCCESS)
+ return vdo_log_error_strerror(result, "cannot read region table header");
+
+ decode_u64_le(buffer, &offset, &header.magic);
+ decode_u64_le(buffer, &offset, &header.region_blocks);
+ decode_u16_le(buffer, &offset, &header.type);
+ decode_u16_le(buffer, &offset, &header.version);
+ decode_u16_le(buffer, &offset, &header.region_count);
+ decode_u16_le(buffer, &offset, &header.payload);
+
+ if (header.magic != REGION_MAGIC)
+ return UDS_NO_INDEX;
+
+ if (header.version != 1) {
+ return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
+ "unknown region table version %hu",
+ header.version);
+ }
+
+ result = vdo_allocate_extended(struct region_table, header.region_count,
+ struct layout_region,
+ "single file layout region table", &table);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ table->header = header;
+ for (i = 0; i < header.region_count; i++) {
+ u8 region_buffer[sizeof(struct layout_region)];
+
+ offset = 0;
+ result = uds_read_from_buffered_reader(reader, region_buffer,
+ sizeof(region_buffer));
+ if (result != UDS_SUCCESS) {
+ vdo_free(table);
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "cannot read region table layouts");
+ }
+
+ decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
+ decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
+ offset += sizeof(u32);
+ decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
+ decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
+ }
+
+ *table_ptr = table;
+ return UDS_SUCCESS;
+}
+
+static int __must_check read_super_block_data(struct buffered_reader *reader,
+ struct index_layout *layout,
+ size_t saved_size)
+{
+ int result;
+ struct super_block_data *super = &layout->super;
+ u8 *buffer;
+ size_t offset = 0;
+
+ result = vdo_allocate(saved_size, u8, "super block data", &buffer);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = uds_read_from_buffered_reader(reader, buffer, saved_size);
+ if (result != UDS_SUCCESS) {
+ vdo_free(buffer);
+ return vdo_log_error_strerror(result, "cannot read region table header");
+ }
+
+ memcpy(&super->magic_label, buffer, MAGIC_SIZE);
+ offset += MAGIC_SIZE;
+ memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
+ offset += NONCE_INFO_SIZE;
+ decode_u64_le(buffer, &offset, &super->nonce);
+ decode_u32_le(buffer, &offset, &super->version);
+ decode_u32_le(buffer, &offset, &super->block_size);
+ decode_u16_le(buffer, &offset, &super->index_count);
+ decode_u16_le(buffer, &offset, &super->max_saves);
+ offset += sizeof(u32);
+ decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
+ decode_u64_le(buffer, &offset, &super->page_map_blocks);
+
+ if (is_converted_super_block(super)) {
+ decode_u64_le(buffer, &offset, &super->volume_offset);
+ decode_u64_le(buffer, &offset, &super->start_offset);
+ } else {
+ super->volume_offset = 0;
+ super->start_offset = 0;
+ }
+
+ vdo_free(buffer);
+
+ if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "unknown superblock magic label");
+
+ if ((super->version < SUPER_VERSION_MINIMUM) ||
+ (super->version == 4) || (super->version == 5) || (super->version == 6) ||
+ (super->version > SUPER_VERSION_MAXIMUM)) {
+ return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
+ "unknown superblock version number %u",
+ super->version);
+ }
+
+ if (super->volume_offset < super->start_offset) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "inconsistent offsets (start %llu, volume %llu)",
+ (unsigned long long) super->start_offset,
+ (unsigned long long) super->volume_offset);
+ }
+
+ /* Sub-indexes are no longer used but the layout retains this field. */
+ if (super->index_count != 1) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "invalid subindex count %u",
+ super->index_count);
+ }
+
+ if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "inconsistent superblock nonce");
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int __must_check verify_region(struct layout_region *lr, u64 start_block,
+ enum region_kind kind, unsigned int instance)
+{
+ if (lr->start_block != start_block)
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "incorrect layout region offset");
+
+ if (lr->kind != kind)
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "incorrect layout region kind");
+
+ if (lr->instance != instance) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "incorrect layout region instance");
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
+ struct region_table *table)
+{
+ int result;
+ unsigned int i;
+ struct sub_index_layout *sil = &layout->index;
+ u64 next_block = start_block;
+
+ sil->sub_index = table->regions[2];
+ result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ define_sub_index_nonce(layout);
+
+ sil->volume = table->regions[3];
+ result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
+ RL_SOLE_INSTANCE);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ next_block += sil->volume.block_count + layout->super.volume_offset;
+
+ for (i = 0; i < layout->super.max_saves; i++) {
+ sil->saves[i].index_save = table->regions[i + 4];
+ result = verify_region(&sil->saves[i].index_save, next_block,
+ RL_KIND_SAVE, i);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ next_block += sil->saves[i].index_save.block_count;
+ }
+
+ next_block -= layout->super.volume_offset;
+ if (next_block != start_block + sil->sub_index.block_count) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "sub index region does not span all saves");
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int __must_check reconstitute_layout(struct index_layout *layout,
+ struct region_table *table, u64 first_block)
+{
+ int result;
+ u64 next_block = first_block;
+
+ result = vdo_allocate(layout->super.max_saves, struct index_save_layout,
+ __func__, &layout->index.saves);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ layout->total_blocks = table->header.region_blocks;
+
+ layout->header = table->regions[0];
+ result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
+ RL_SOLE_INSTANCE);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ layout->config = table->regions[1];
+ result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
+ RL_SOLE_INSTANCE);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = verify_sub_index(layout, next_block, table);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ next_block += layout->index.sub_index.block_count;
+
+ layout->seal = table->regions[table->header.region_count - 1];
+ result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
+ RL_KIND_SEAL, RL_SOLE_INSTANCE);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (++next_block != (first_block + layout->total_blocks)) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "layout table does not span total blocks");
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
+ u64 first_block, struct buffered_reader *reader)
+{
+ int result;
+ struct region_table *table = NULL;
+ struct super_block_data *super = &layout->super;
+
+ result = load_region_table(reader, &table);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (table->header.type != RH_TYPE_SUPER) {
+ vdo_free(table);
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "not a superblock region table");
+ }
+
+ result = read_super_block_data(reader, layout, table->header.payload);
+ if (result != UDS_SUCCESS) {
+ vdo_free(table);
+ return vdo_log_error_strerror(result, "unknown superblock format");
+ }
+
+ if (super->block_size != block_size) {
+ vdo_free(table);
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "superblock saved block_size %u differs from supplied block_size %zu",
+ super->block_size, block_size);
+ }
+
+ first_block -= (super->volume_offset - super->start_offset);
+ result = reconstitute_layout(layout, table, first_block);
+ vdo_free(table);
+ return result;
+}
+
+static int __must_check read_index_save_data(struct buffered_reader *reader,
+ struct index_save_layout *isl,
+ size_t saved_size)
+{
+ int result;
+ struct index_state_version file_version;
+ u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
+ size_t offset = 0;
+
+ if (saved_size != sizeof(buffer)) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "unexpected index save data size %zu",
+ saved_size);
+ }
+
+ result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
+ if (result != UDS_SUCCESS)
+ return vdo_log_error_strerror(result, "cannot read index save data");
+
+ decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
+ decode_u64_le(buffer, &offset, &isl->save_data.nonce);
+ decode_u32_le(buffer, &offset, &isl->save_data.version);
+ offset += sizeof(u32);
+
+ if (isl->save_data.version > 1) {
+ return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
+ "unknown index save version number %u",
+ isl->save_data.version);
+ }
+
+ decode_s32_le(buffer, &offset, &file_version.signature);
+ decode_s32_le(buffer, &offset, &file_version.version_id);
+
+ if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
+ (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
+ return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
+ "index state version %d,%d is unsupported",
+ file_version.signature,
+ file_version.version_id);
+ }
+
+ decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
+ decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
+ decode_u64_le(buffer, &offset, &isl->state_data.last_save);
+ /* Skip past some historical fields that are now unused */
+ offset += sizeof(u32) + sizeof(u32);
+ return UDS_SUCCESS;
+}
+
+static int __must_check reconstruct_index_save(struct index_save_layout *isl,
+ struct region_table *table)
+{
+ int result;
+ unsigned int z;
+ struct layout_region *last_region;
+ u64 next_block = isl->index_save.start_block;
+ u64 last_block = next_block + isl->index_save.block_count;
+
+ isl->zone_count = table->header.region_count - 3;
+
+ last_region = &table->regions[table->header.region_count - 1];
+ if (last_region->kind == RL_KIND_EMPTY) {
+ isl->free_space = *last_region;
+ isl->zone_count--;
+ } else {
+ isl->free_space = (struct layout_region) {
+ .start_block = last_block,
+ .block_count = 0,
+ .kind = RL_KIND_EMPTY,
+ .instance = RL_SOLE_INSTANCE,
+ };
+ }
+
+ isl->header = table->regions[0];
+ result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
+ RL_SOLE_INSTANCE);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ isl->index_page_map = table->regions[1];
+ result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
+ RL_SOLE_INSTANCE);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ next_block += isl->index_page_map.block_count;
+
+ for (z = 0; z < isl->zone_count; z++) {
+ isl->volume_index_zones[z] = table->regions[z + 2];
+ result = verify_region(&isl->volume_index_zones[z], next_block,
+ RL_KIND_VOLUME_INDEX, z);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ next_block += isl->volume_index_zones[z].block_count;
+ }
+
+ isl->open_chapter = table->regions[isl->zone_count + 2];
+ result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
+ RL_SOLE_INSTANCE);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ next_block += isl->open_chapter.block_count;
+
+ result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
+ RL_SOLE_INSTANCE);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ next_block += isl->free_space.block_count;
+ if (next_block != last_block) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "index save layout table incomplete");
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int __must_check load_index_save(struct index_save_layout *isl,
+ struct buffered_reader *reader,
+ unsigned int instance)
+{
+ int result;
+ struct region_table *table = NULL;
+
+ result = load_region_table(reader, &table);
+ if (result != UDS_SUCCESS) {
+ return vdo_log_error_strerror(result, "cannot read index save %u header",
+ instance);
+ }
+
+ if (table->header.region_blocks != isl->index_save.block_count) {
+ u64 region_blocks = table->header.region_blocks;
+
+ vdo_free(table);
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "unexpected index save %u region block count %llu",
+ instance,
+ (unsigned long long) region_blocks);
+ }
+
+ if (table->header.type == RH_TYPE_UNSAVED) {
+ vdo_free(table);
+ reset_index_save_layout(isl, 0);
+ return UDS_SUCCESS;
+ }
+
+
+ if (table->header.type != RH_TYPE_SAVE) {
+ vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "unexpected index save %u header type %u",
+ instance, table->header.type);
+ vdo_free(table);
+ return UDS_CORRUPT_DATA;
+ }
+
+ result = read_index_save_data(reader, isl, table->header.payload);
+ if (result != UDS_SUCCESS) {
+ vdo_free(table);
+ return vdo_log_error_strerror(result,
+ "unknown index save %u data format",
+ instance);
+ }
+
+ result = reconstruct_index_save(isl, table);
+ vdo_free(table);
+ if (result != UDS_SUCCESS) {
+ return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
+ instance);
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int __must_check load_sub_index_regions(struct index_layout *layout)
+{
+ int result;
+ unsigned int j;
+ struct index_save_layout *isl;
+ struct buffered_reader *reader;
+
+ for (j = 0; j < layout->super.max_saves; j++) {
+ isl = &layout->index.saves[j];
+ result = open_region_reader(layout, &isl->index_save, &reader);
+
+ if (result != UDS_SUCCESS) {
+ vdo_log_error_strerror(result,
+ "cannot get reader for index 0 save %u",
+ j);
+ return result;
+ }
+
+ result = load_index_save(isl, reader, j);
+ uds_free_buffered_reader(reader);
+ if (result != UDS_SUCCESS) {
+ /* Another save slot might be valid. */
+ reset_index_save_layout(isl, 0);
+ continue;
+ }
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int __must_check verify_uds_index_config(struct index_layout *layout,
+ struct uds_configuration *config)
+{
+ int result;
+ struct buffered_reader *reader = NULL;
+ u64 offset;
+
+ offset = layout->super.volume_offset - layout->super.start_offset;
+ result = open_layout_reader(layout, &layout->config, offset, &reader);
+ if (result != UDS_SUCCESS)
+ return vdo_log_error_strerror(result, "failed to open config reader");
+
+ result = uds_validate_config_contents(reader, config);
+ if (result != UDS_SUCCESS) {
+ uds_free_buffered_reader(reader);
+ return vdo_log_error_strerror(result, "failed to read config region");
+ }
+
+ uds_free_buffered_reader(reader);
+ return UDS_SUCCESS;
+}
+
+static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
+{
+ int result;
+ struct buffered_reader *reader;
+
+ result = uds_make_buffered_reader(layout->factory,
+ layout->offset / UDS_BLOCK_SIZE, 1, &reader);
+ if (result != UDS_SUCCESS)
+ return vdo_log_error_strerror(result, "unable to read superblock");
+
+ result = load_super_block(layout, UDS_BLOCK_SIZE,
+ layout->offset / UDS_BLOCK_SIZE, reader);
+ uds_free_buffered_reader(reader);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = verify_uds_index_config(layout, config);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ return load_sub_index_regions(layout);
+}
+
+static int create_layout_factory(struct index_layout *layout,
+ const struct uds_configuration *config)
+{
+ int result;
+ size_t writable_size;
+ struct io_factory *factory = NULL;
+
+ result = uds_make_io_factory(config->bdev, &factory);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
+ if (writable_size < config->size + config->offset) {
+ uds_put_io_factory(factory);
+ vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
+ writable_size, config->size + config->offset);
+ return -ENOSPC;
+ }
+
+ layout->factory = factory;
+ layout->factory_size = (config->size > 0) ? config->size : writable_size;
+ layout->offset = config->offset;
+ return UDS_SUCCESS;
+}
+
+int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
+ struct index_layout **layout_ptr)
+{
+ int result;
+ struct index_layout *layout = NULL;
+ struct save_layout_sizes sizes;
+
+ result = compute_sizes(config, &sizes);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = vdo_allocate(1, struct index_layout, __func__, &layout);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = create_layout_factory(layout, config);
+ if (result != UDS_SUCCESS) {
+ uds_free_index_layout(layout);
+ return result;
+ }
+
+ if (layout->factory_size < sizes.total_size) {
+ vdo_log_error("index storage (%zu) is smaller than the required size %llu",
+ layout->factory_size,
+ (unsigned long long) sizes.total_size);
+ uds_free_index_layout(layout);
+ return -ENOSPC;
+ }
+
+ if (new_layout)
+ result = create_index_layout(layout, config);
+ else
+ result = load_index_layout(layout, config);
+ if (result != UDS_SUCCESS) {
+ uds_free_index_layout(layout);
+ return result;
+ }
+
+ *layout_ptr = layout;
+ return UDS_SUCCESS;
+}
+
+void uds_free_index_layout(struct index_layout *layout)
+{
+ if (layout == NULL)
+ return;
+
+ vdo_free(layout->index.saves);
+ if (layout->factory != NULL)
+ uds_put_io_factory(layout->factory);
+
+ vdo_free(layout);
+}
+
+int uds_replace_index_layout_storage(struct index_layout *layout,
+ struct block_device *bdev)
+{
+ return uds_replace_storage(layout->factory, bdev);
+}
+
+/* Obtain a dm_bufio_client for the volume region. */
+int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
+ unsigned int reserved_buffers,
+ struct dm_bufio_client **client_ptr)
+{
+ off_t offset = (layout->index.volume.start_block +
+ layout->super.volume_offset -
+ layout->super.start_offset);
+
+ return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
+ client_ptr);
+}
+
+u64 uds_get_volume_nonce(struct index_layout *layout)
+{
+ return layout->index.nonce;
+}
diff --git a/drivers/md/dm-vdo/indexer/index-layout.h b/drivers/md/dm-vdo/indexer/index-layout.h
new file mode 100644
index 000000000000..e9ac6f4302d6
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/index-layout.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_INDEX_LAYOUT_H
+#define UDS_INDEX_LAYOUT_H
+
+#include "config.h"
+#include "indexer.h"
+#include "io-factory.h"
+
+/*
+ * The index layout describes the format of the index on the underlying storage, and is responsible
+ * for creating those structures when the index is first created. It also validates the index data
+ * when loading a saved index, and updates it when saving the index.
+ */
+
+struct index_layout;
+
+int __must_check uds_make_index_layout(struct uds_configuration *config, bool new_layout,
+ struct index_layout **layout_ptr);
+
+void uds_free_index_layout(struct index_layout *layout);
+
+int __must_check uds_replace_index_layout_storage(struct index_layout *layout,
+ struct block_device *bdev);
+
+int __must_check uds_load_index_state(struct index_layout *layout,
+ struct uds_index *index);
+
+int __must_check uds_save_index_state(struct index_layout *layout,
+ struct uds_index *index);
+
+int __must_check uds_discard_open_chapter(struct index_layout *layout);
+
+u64 __must_check uds_get_volume_nonce(struct index_layout *layout);
+
+int __must_check uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
+ unsigned int reserved_buffers,
+ struct dm_bufio_client **client_ptr);
+
+#endif /* UDS_INDEX_LAYOUT_H */
diff --git a/drivers/md/dm-vdo/indexer/index-page-map.c b/drivers/md/dm-vdo/indexer/index-page-map.c
new file mode 100644
index 000000000000..00b44e07d0c1
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/index-page-map.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "index-page-map.h"
+
+#include "errors.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "numeric.h"
+#include "permassert.h"
+#include "string-utils.h"
+#include "thread-utils.h"
+
+#include "hash-utils.h"
+#include "indexer.h"
+
+/*
+ * The index page map is conceptually a two-dimensional array indexed by chapter number and index
+ * page number within the chapter. Each entry contains the number of the last delta list on that
+ * index page. In order to save memory, the information for the last page in each chapter is not
+ * recorded, as it is known from the geometry.
+ */
+
+static const u8 PAGE_MAP_MAGIC[] = "ALBIPM02";
+
+#define PAGE_MAP_MAGIC_LENGTH (sizeof(PAGE_MAP_MAGIC) - 1)
+
+static inline u32 get_entry_count(const struct index_geometry *geometry)
+{
+ return geometry->chapters_per_volume * (geometry->index_pages_per_chapter - 1);
+}
+
+int uds_make_index_page_map(const struct index_geometry *geometry,
+ struct index_page_map **map_ptr)
+{
+ int result;
+ struct index_page_map *map;
+
+ result = vdo_allocate(1, struct index_page_map, "page map", &map);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ map->geometry = geometry;
+ map->entries_per_chapter = geometry->index_pages_per_chapter - 1;
+ result = vdo_allocate(get_entry_count(geometry), u16, "Index Page Map Entries",
+ &map->entries);
+ if (result != VDO_SUCCESS) {
+ uds_free_index_page_map(map);
+ return result;
+ }
+
+ *map_ptr = map;
+ return UDS_SUCCESS;
+}
+
+void uds_free_index_page_map(struct index_page_map *map)
+{
+ if (map != NULL) {
+ vdo_free(map->entries);
+ vdo_free(map);
+ }
+}
+
+void uds_update_index_page_map(struct index_page_map *map, u64 virtual_chapter_number,
+ u32 chapter_number, u32 index_page_number,
+ u32 delta_list_number)
+{
+ size_t slot;
+
+ map->last_update = virtual_chapter_number;
+ if (index_page_number == map->entries_per_chapter)
+ return;
+
+ slot = (chapter_number * map->entries_per_chapter) + index_page_number;
+ map->entries[slot] = delta_list_number;
+}
+
+u32 uds_find_index_page_number(const struct index_page_map *map,
+ const struct uds_record_name *name, u32 chapter_number)
+{
+ u32 delta_list_number = uds_hash_to_chapter_delta_list(name, map->geometry);
+ u32 slot = chapter_number * map->entries_per_chapter;
+ u32 page;
+
+ for (page = 0; page < map->entries_per_chapter; page++) {
+ if (delta_list_number <= map->entries[slot + page])
+ break;
+ }
+
+ return page;
+}
+
+void uds_get_list_number_bounds(const struct index_page_map *map, u32 chapter_number,
+ u32 index_page_number, u32 *lowest_list,
+ u32 *highest_list)
+{
+ u32 slot = chapter_number * map->entries_per_chapter;
+
+ *lowest_list = ((index_page_number == 0) ?
+ 0 : map->entries[slot + index_page_number - 1] + 1);
+ *highest_list = ((index_page_number < map->entries_per_chapter) ?
+ map->entries[slot + index_page_number] :
+ map->geometry->delta_lists_per_chapter - 1);
+}
+
+u64 uds_compute_index_page_map_save_size(const struct index_geometry *geometry)
+{
+ return PAGE_MAP_MAGIC_LENGTH + sizeof(u64) + sizeof(u16) * get_entry_count(geometry);
+}
+
+int uds_write_index_page_map(struct index_page_map *map, struct buffered_writer *writer)
+{
+ int result;
+ u8 *buffer;
+ size_t offset = 0;
+ u64 saved_size = uds_compute_index_page_map_save_size(map->geometry);
+ u32 i;
+
+ result = vdo_allocate(saved_size, u8, "page map data", &buffer);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ memcpy(buffer, PAGE_MAP_MAGIC, PAGE_MAP_MAGIC_LENGTH);
+ offset += PAGE_MAP_MAGIC_LENGTH;
+ encode_u64_le(buffer, &offset, map->last_update);
+ for (i = 0; i < get_entry_count(map->geometry); i++)
+ encode_u16_le(buffer, &offset, map->entries[i]);
+
+ result = uds_write_to_buffered_writer(writer, buffer, offset);
+ vdo_free(buffer);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ return uds_flush_buffered_writer(writer);
+}
+
+int uds_read_index_page_map(struct index_page_map *map, struct buffered_reader *reader)
+{
+ int result;
+ u8 magic[PAGE_MAP_MAGIC_LENGTH];
+ u8 *buffer;
+ size_t offset = 0;
+ u64 saved_size = uds_compute_index_page_map_save_size(map->geometry);
+ u32 i;
+
+ result = vdo_allocate(saved_size, u8, "page map data", &buffer);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = uds_read_from_buffered_reader(reader, buffer, saved_size);
+ if (result != UDS_SUCCESS) {
+ vdo_free(buffer);
+ return result;
+ }
+
+ memcpy(&magic, buffer, PAGE_MAP_MAGIC_LENGTH);
+ offset += PAGE_MAP_MAGIC_LENGTH;
+ if (memcmp(magic, PAGE_MAP_MAGIC, PAGE_MAP_MAGIC_LENGTH) != 0) {
+ vdo_free(buffer);
+ return UDS_CORRUPT_DATA;
+ }
+
+ decode_u64_le(buffer, &offset, &map->last_update);
+ for (i = 0; i < get_entry_count(map->geometry); i++)
+ decode_u16_le(buffer, &offset, &map->entries[i]);
+
+ vdo_free(buffer);
+ vdo_log_debug("read index page map, last update %llu",
+ (unsigned long long) map->last_update);
+ return UDS_SUCCESS;
+}
diff --git a/drivers/md/dm-vdo/indexer/index-page-map.h b/drivers/md/dm-vdo/indexer/index-page-map.h
new file mode 100644
index 000000000000..b327c0bb9656
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/index-page-map.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_INDEX_PAGE_MAP_H
+#define UDS_INDEX_PAGE_MAP_H
+
+#include "geometry.h"
+#include "io-factory.h"
+
+/*
+ * The index maintains a page map which records how the chapter delta lists are distributed among
+ * the index pages for each chapter, allowing the volume to be efficient about reading only pages
+ * that it knows it will need.
+ */
+
+struct index_page_map {
+ const struct index_geometry *geometry;
+ u64 last_update;
+ u32 entries_per_chapter;
+ u16 *entries;
+};
+
+int __must_check uds_make_index_page_map(const struct index_geometry *geometry,
+ struct index_page_map **map_ptr);
+
+void uds_free_index_page_map(struct index_page_map *map);
+
+int __must_check uds_read_index_page_map(struct index_page_map *map,
+ struct buffered_reader *reader);
+
+int __must_check uds_write_index_page_map(struct index_page_map *map,
+ struct buffered_writer *writer);
+
+void uds_update_index_page_map(struct index_page_map *map, u64 virtual_chapter_number,
+ u32 chapter_number, u32 index_page_number,
+ u32 delta_list_number);
+
+u32 __must_check uds_find_index_page_number(const struct index_page_map *map,
+ const struct uds_record_name *name,
+ u32 chapter_number);
+
+void uds_get_list_number_bounds(const struct index_page_map *map, u32 chapter_number,
+ u32 index_page_number, u32 *lowest_list,
+ u32 *highest_list);
+
+u64 uds_compute_index_page_map_save_size(const struct index_geometry *geometry);
+
+#endif /* UDS_INDEX_PAGE_MAP_H */
diff --git a/drivers/md/dm-vdo/indexer/index-session.c b/drivers/md/dm-vdo/indexer/index-session.c
new file mode 100644
index 000000000000..aee0914d604a
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/index-session.c
@@ -0,0 +1,739 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "index-session.h"
+
+#include <linux/atomic.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "time-utils.h"
+
+#include "funnel-requestqueue.h"
+#include "index.h"
+#include "index-layout.h"
+
+/*
+ * The index session contains a lock (the request_mutex) which ensures that only one thread can
+ * change the state of its index at a time. The state field indicates the current state of the
+ * index through a set of descriptive flags. The request_mutex must be notified whenever a
+ * non-transient state flag is cleared. The request_mutex is also used to count the number of
+ * requests currently in progress so that they can be drained when suspending or closing the index.
+ *
+ * If the index session is suspended shortly after opening an index, it may have to suspend during
+ * a rebuild. Depending on the size of the index, a rebuild may take a significant amount of time,
+ * so UDS allows the rebuild to be paused in order to suspend the session in a timely manner. When
+ * the index session is resumed, the rebuild can continue from where it left off. If the index
+ * session is shut down with a suspended rebuild, the rebuild progress is abandoned and the rebuild
+ * will start from the beginning the next time the index is loaded. The mutex and status fields in
+ * the index_load_context are used to record the state of any interrupted rebuild.
+ */
+
+enum index_session_flag_bit {
+ IS_FLAG_BIT_START = 8,
+ /* The session has started loading an index but not completed it. */
+ IS_FLAG_BIT_LOADING = IS_FLAG_BIT_START,
+ /* The session has loaded an index, which can handle requests. */
+ IS_FLAG_BIT_LOADED,
+ /* The session's index has been permanently disabled. */
+ IS_FLAG_BIT_DISABLED,
+ /* The session's index is suspended. */
+ IS_FLAG_BIT_SUSPENDED,
+ /* The session is handling some index state change. */
+ IS_FLAG_BIT_WAITING,
+ /* The session's index is closing and draining requests. */
+ IS_FLAG_BIT_CLOSING,
+ /* The session is being destroyed and is draining requests. */
+ IS_FLAG_BIT_DESTROYING,
+};
+
+enum index_session_flag {
+ IS_FLAG_LOADED = (1 << IS_FLAG_BIT_LOADED),
+ IS_FLAG_LOADING = (1 << IS_FLAG_BIT_LOADING),
+ IS_FLAG_DISABLED = (1 << IS_FLAG_BIT_DISABLED),
+ IS_FLAG_SUSPENDED = (1 << IS_FLAG_BIT_SUSPENDED),
+ IS_FLAG_WAITING = (1 << IS_FLAG_BIT_WAITING),
+ IS_FLAG_CLOSING = (1 << IS_FLAG_BIT_CLOSING),
+ IS_FLAG_DESTROYING = (1 << IS_FLAG_BIT_DESTROYING),
+};
+
+/* Release a reference to an index session. */
+static void release_index_session(struct uds_index_session *index_session)
+{
+ mutex_lock(&index_session->request_mutex);
+ if (--index_session->request_count == 0)
+ uds_broadcast_cond(&index_session->request_cond);
+ mutex_unlock(&index_session->request_mutex);
+}
+
+/*
+ * Acquire a reference to the index session for an asynchronous index request. The reference must
+ * eventually be released with a corresponding call to release_index_session().
+ */
+static int get_index_session(struct uds_index_session *index_session)
+{
+ unsigned int state;
+ int result = UDS_SUCCESS;
+
+ mutex_lock(&index_session->request_mutex);
+ index_session->request_count++;
+ state = index_session->state;
+ mutex_unlock(&index_session->request_mutex);
+
+ if (state == IS_FLAG_LOADED) {
+ return UDS_SUCCESS;
+ } else if (state & IS_FLAG_DISABLED) {
+ result = UDS_DISABLED;
+ } else if ((state & IS_FLAG_LOADING) ||
+ (state & IS_FLAG_SUSPENDED) ||
+ (state & IS_FLAG_WAITING)) {
+ result = -EBUSY;
+ } else {
+ result = UDS_NO_INDEX;
+ }
+
+ release_index_session(index_session);
+ return result;
+}
+
+int uds_launch_request(struct uds_request *request)
+{
+ size_t internal_size;
+ int result;
+
+ if (request->callback == NULL) {
+ vdo_log_error("missing required callback");
+ return -EINVAL;
+ }
+
+ switch (request->type) {
+ case UDS_DELETE:
+ case UDS_POST:
+ case UDS_QUERY:
+ case UDS_QUERY_NO_UPDATE:
+ case UDS_UPDATE:
+ break;
+ default:
+ vdo_log_error("received invalid callback type");
+ return -EINVAL;
+ }
+
+ /* Reset all internal fields before processing. */
+ internal_size =
+ sizeof(struct uds_request) - offsetof(struct uds_request, zone_number);
+ // FIXME should be using struct_group for this instead
+ memset((char *) request + sizeof(*request) - internal_size, 0, internal_size);
+
+ result = get_index_session(request->session);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ request->found = false;
+ request->unbatched = false;
+ request->index = request->session->index;
+
+ uds_enqueue_request(request, STAGE_TRIAGE);
+ return UDS_SUCCESS;
+}
+
+static void enter_callback_stage(struct uds_request *request)
+{
+ if (request->status != UDS_SUCCESS) {
+ /* All request errors are considered unrecoverable */
+ mutex_lock(&request->session->request_mutex);
+ request->session->state |= IS_FLAG_DISABLED;
+ mutex_unlock(&request->session->request_mutex);
+ }
+
+ uds_request_queue_enqueue(request->session->callback_queue, request);
+}
+
+static inline void count_once(u64 *count_ptr)
+{
+ WRITE_ONCE(*count_ptr, READ_ONCE(*count_ptr) + 1);
+}
+
+static void update_session_stats(struct uds_request *request)
+{
+ struct session_stats *session_stats = &request->session->stats;
+
+ count_once(&session_stats->requests);
+
+ switch (request->type) {
+ case UDS_POST:
+ if (request->found)
+ count_once(&session_stats->posts_found);
+ else
+ count_once(&session_stats->posts_not_found);
+
+ if (request->location == UDS_LOCATION_IN_OPEN_CHAPTER)
+ count_once(&session_stats->posts_found_open_chapter);
+ else if (request->location == UDS_LOCATION_IN_DENSE)
+ count_once(&session_stats->posts_found_dense);
+ else if (request->location == UDS_LOCATION_IN_SPARSE)
+ count_once(&session_stats->posts_found_sparse);
+ break;
+
+ case UDS_UPDATE:
+ if (request->found)
+ count_once(&session_stats->updates_found);
+ else
+ count_once(&session_stats->updates_not_found);
+ break;
+
+ case UDS_DELETE:
+ if (request->found)
+ count_once(&session_stats->deletions_found);
+ else
+ count_once(&session_stats->deletions_not_found);
+ break;
+
+ case UDS_QUERY:
+ case UDS_QUERY_NO_UPDATE:
+ if (request->found)
+ count_once(&session_stats->queries_found);
+ else
+ count_once(&session_stats->queries_not_found);
+ break;
+
+ default:
+ request->status = VDO_ASSERT(false, "unknown request type: %d",
+ request->type);
+ }
+}
+
+static void handle_callbacks(struct uds_request *request)
+{
+ struct uds_index_session *index_session = request->session;
+
+ if (request->status == UDS_SUCCESS)
+ update_session_stats(request);
+
+ request->status = uds_status_to_errno(request->status);
+ request->callback(request);
+ release_index_session(index_session);
+}
+
+static int __must_check make_empty_index_session(struct uds_index_session **index_session_ptr)
+{
+ int result;
+ struct uds_index_session *session;
+
+ result = vdo_allocate(1, struct uds_index_session, __func__, &session);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ mutex_init(&session->request_mutex);
+ uds_init_cond(&session->request_cond);
+ mutex_init(&session->load_context.mutex);
+ uds_init_cond(&session->load_context.cond);
+
+ result = uds_make_request_queue("callbackW", &handle_callbacks,
+ &session->callback_queue);
+ if (result != UDS_SUCCESS) {
+ vdo_free(session);
+ return result;
+ }
+
+ *index_session_ptr = session;
+ return UDS_SUCCESS;
+}
+
+int uds_create_index_session(struct uds_index_session **session)
+{
+ if (session == NULL) {
+ vdo_log_error("missing session pointer");
+ return -EINVAL;
+ }
+
+ return uds_status_to_errno(make_empty_index_session(session));
+}
+
+static int __must_check start_loading_index_session(struct uds_index_session *index_session)
+{
+ int result;
+
+ mutex_lock(&index_session->request_mutex);
+ if (index_session->state & IS_FLAG_SUSPENDED) {
+ vdo_log_info("Index session is suspended");
+ result = -EBUSY;
+ } else if (index_session->state != 0) {
+ vdo_log_info("Index is already loaded");
+ result = -EBUSY;
+ } else {
+ index_session->state |= IS_FLAG_LOADING;
+ result = UDS_SUCCESS;
+ }
+ mutex_unlock(&index_session->request_mutex);
+ return result;
+}
+
+static void finish_loading_index_session(struct uds_index_session *index_session,
+ int result)
+{
+ mutex_lock(&index_session->request_mutex);
+ index_session->state &= ~IS_FLAG_LOADING;
+ if (result == UDS_SUCCESS)
+ index_session->state |= IS_FLAG_LOADED;
+
+ uds_broadcast_cond(&index_session->request_cond);
+ mutex_unlock(&index_session->request_mutex);
+}
+
+static int initialize_index_session(struct uds_index_session *index_session,
+ enum uds_open_index_type open_type)
+{
+ int result;
+ struct uds_configuration *config;
+
+ result = uds_make_configuration(&index_session->parameters, &config);
+ if (result != UDS_SUCCESS) {
+ vdo_log_error_strerror(result, "Failed to allocate config");
+ return result;
+ }
+
+ memset(&index_session->stats, 0, sizeof(index_session->stats));
+ result = uds_make_index(config, open_type, &index_session->load_context,
+ enter_callback_stage, &index_session->index);
+ if (result != UDS_SUCCESS)
+ vdo_log_error_strerror(result, "Failed to make index");
+ else
+ uds_log_configuration(config);
+
+ uds_free_configuration(config);
+ return result;
+}
+
+static const char *get_open_type_string(enum uds_open_index_type open_type)
+{
+ switch (open_type) {
+ case UDS_CREATE:
+ return "creating index";
+ case UDS_LOAD:
+ return "loading or rebuilding index";
+ case UDS_NO_REBUILD:
+ return "loading index";
+ default:
+ return "unknown open method";
+ }
+}
+
+/*
+ * Open an index under the given session. This operation will fail if the
+ * index session is suspended, or if there is already an open index.
+ */
+int uds_open_index(enum uds_open_index_type open_type,
+ const struct uds_parameters *parameters,
+ struct uds_index_session *session)
+{
+ int result;
+ char name[BDEVNAME_SIZE];
+
+ if (parameters == NULL) {
+ vdo_log_error("missing required parameters");
+ return -EINVAL;
+ }
+ if (parameters->bdev == NULL) {
+ vdo_log_error("missing required block device");
+ return -EINVAL;
+ }
+ if (session == NULL) {
+ vdo_log_error("missing required session pointer");
+ return -EINVAL;
+ }
+
+ result = start_loading_index_session(session);
+ if (result != UDS_SUCCESS)
+ return uds_status_to_errno(result);
+
+ session->parameters = *parameters;
+ format_dev_t(name, parameters->bdev->bd_dev);
+ vdo_log_info("%s: %s", get_open_type_string(open_type), name);
+
+ result = initialize_index_session(session, open_type);
+ if (result != UDS_SUCCESS)
+ vdo_log_error_strerror(result, "Failed %s",
+ get_open_type_string(open_type));
+
+ finish_loading_index_session(session, result);
+ return uds_status_to_errno(result);
+}
+
+static void wait_for_no_requests_in_progress(struct uds_index_session *index_session)
+{
+ mutex_lock(&index_session->request_mutex);
+ while (index_session->request_count > 0) {
+ uds_wait_cond(&index_session->request_cond,
+ &index_session->request_mutex);
+ }
+ mutex_unlock(&index_session->request_mutex);
+}
+
+static int __must_check save_index(struct uds_index_session *index_session)
+{
+ wait_for_no_requests_in_progress(index_session);
+ return uds_save_index(index_session->index);
+}
+
+static void suspend_rebuild(struct uds_index_session *session)
+{
+ mutex_lock(&session->load_context.mutex);
+ switch (session->load_context.status) {
+ case INDEX_OPENING:
+ session->load_context.status = INDEX_SUSPENDING;
+
+ /* Wait until the index indicates that it is not replaying. */
+ while ((session->load_context.status != INDEX_SUSPENDED) &&
+ (session->load_context.status != INDEX_READY)) {
+ uds_wait_cond(&session->load_context.cond,
+ &session->load_context.mutex);
+ }
+
+ break;
+
+ case INDEX_READY:
+ /* Index load does not need to be suspended. */
+ break;
+
+ case INDEX_SUSPENDED:
+ case INDEX_SUSPENDING:
+ case INDEX_FREEING:
+ default:
+ /* These cases should not happen. */
+ VDO_ASSERT_LOG_ONLY(false, "Bad load context state %u",
+ session->load_context.status);
+ break;
+ }
+ mutex_unlock(&session->load_context.mutex);
+}
+
+/*
+ * Suspend index operation, draining all current index requests and preventing new index requests
+ * from starting. Optionally saves all index data before returning.
+ */
+int uds_suspend_index_session(struct uds_index_session *session, bool save)
+{
+ int result = UDS_SUCCESS;
+ bool no_work = false;
+ bool rebuilding = false;
+
+ /* Wait for any current index state change to complete. */
+ mutex_lock(&session->request_mutex);
+ while (session->state & IS_FLAG_CLOSING)
+ uds_wait_cond(&session->request_cond, &session->request_mutex);
+
+ if ((session->state & IS_FLAG_WAITING) || (session->state & IS_FLAG_DESTROYING)) {
+ no_work = true;
+ vdo_log_info("Index session is already changing state");
+ result = -EBUSY;
+ } else if (session->state & IS_FLAG_SUSPENDED) {
+ no_work = true;
+ } else if (session->state & IS_FLAG_LOADING) {
+ session->state |= IS_FLAG_WAITING;
+ rebuilding = true;
+ } else if (session->state & IS_FLAG_LOADED) {
+ session->state |= IS_FLAG_WAITING;
+ } else {
+ no_work = true;
+ session->state |= IS_FLAG_SUSPENDED;
+ uds_broadcast_cond(&session->request_cond);
+ }
+ mutex_unlock(&session->request_mutex);
+
+ if (no_work)
+ return uds_status_to_errno(result);
+
+ if (rebuilding)
+ suspend_rebuild(session);
+ else if (save)
+ result = save_index(session);
+ else
+ result = uds_flush_index_session(session);
+
+ mutex_lock(&session->request_mutex);
+ session->state &= ~IS_FLAG_WAITING;
+ session->state |= IS_FLAG_SUSPENDED;
+ uds_broadcast_cond(&session->request_cond);
+ mutex_unlock(&session->request_mutex);
+ return uds_status_to_errno(result);
+}
+
+static int replace_device(struct uds_index_session *session, struct block_device *bdev)
+{
+ int result;
+
+ result = uds_replace_index_storage(session->index, bdev);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ session->parameters.bdev = bdev;
+ return UDS_SUCCESS;
+}
+
+/*
+ * Resume index operation after being suspended. If the index is suspended and the supplied block
+ * device differs from the current backing store, the index will start using the new backing store.
+ */
+int uds_resume_index_session(struct uds_index_session *session,
+ struct block_device *bdev)
+{
+ int result = UDS_SUCCESS;
+ bool no_work = false;
+ bool resume_replay = false;
+
+ mutex_lock(&session->request_mutex);
+ if (session->state & IS_FLAG_WAITING) {
+ vdo_log_info("Index session is already changing state");
+ no_work = true;
+ result = -EBUSY;
+ } else if (!(session->state & IS_FLAG_SUSPENDED)) {
+ /* If not suspended, just succeed. */
+ no_work = true;
+ result = UDS_SUCCESS;
+ } else {
+ session->state |= IS_FLAG_WAITING;
+ if (session->state & IS_FLAG_LOADING)
+ resume_replay = true;
+ }
+ mutex_unlock(&session->request_mutex);
+
+ if (no_work)
+ return result;
+
+ if ((session->index != NULL) && (bdev != session->parameters.bdev)) {
+ result = replace_device(session, bdev);
+ if (result != UDS_SUCCESS) {
+ mutex_lock(&session->request_mutex);
+ session->state &= ~IS_FLAG_WAITING;
+ uds_broadcast_cond(&session->request_cond);
+ mutex_unlock(&session->request_mutex);
+ return uds_status_to_errno(result);
+ }
+ }
+
+ if (resume_replay) {
+ mutex_lock(&session->load_context.mutex);
+ switch (session->load_context.status) {
+ case INDEX_SUSPENDED:
+ session->load_context.status = INDEX_OPENING;
+ /* Notify the index to start replaying again. */
+ uds_broadcast_cond(&session->load_context.cond);
+ break;
+
+ case INDEX_READY:
+ /* There is no index rebuild to resume. */
+ break;
+
+ case INDEX_OPENING:
+ case INDEX_SUSPENDING:
+ case INDEX_FREEING:
+ default:
+ /* These cases should not happen; do nothing. */
+ VDO_ASSERT_LOG_ONLY(false, "Bad load context state %u",
+ session->load_context.status);
+ break;
+ }
+ mutex_unlock(&session->load_context.mutex);
+ }
+
+ mutex_lock(&session->request_mutex);
+ session->state &= ~IS_FLAG_WAITING;
+ session->state &= ~IS_FLAG_SUSPENDED;
+ uds_broadcast_cond(&session->request_cond);
+ mutex_unlock(&session->request_mutex);
+ return UDS_SUCCESS;
+}
+
+static int save_and_free_index(struct uds_index_session *index_session)
+{
+ int result = UDS_SUCCESS;
+ bool suspended;
+ struct uds_index *index = index_session->index;
+
+ if (index == NULL)
+ return UDS_SUCCESS;
+
+ mutex_lock(&index_session->request_mutex);
+ suspended = (index_session->state & IS_FLAG_SUSPENDED);
+ mutex_unlock(&index_session->request_mutex);
+
+ if (!suspended) {
+ result = uds_save_index(index);
+ if (result != UDS_SUCCESS)
+ vdo_log_warning_strerror(result,
+ "ignoring error from save_index");
+ }
+ uds_free_index(index);
+ index_session->index = NULL;
+
+ /*
+ * Reset all index state that happens to be in the index
+ * session, so it doesn't affect any future index.
+ */
+ mutex_lock(&index_session->load_context.mutex);
+ index_session->load_context.status = INDEX_OPENING;
+ mutex_unlock(&index_session->load_context.mutex);
+
+ mutex_lock(&index_session->request_mutex);
+ /* Only the suspend bit will remain relevant. */
+ index_session->state &= IS_FLAG_SUSPENDED;
+ mutex_unlock(&index_session->request_mutex);
+
+ return result;
+}
+
+/* Save and close the current index. */
+int uds_close_index(struct uds_index_session *index_session)
+{
+ int result = UDS_SUCCESS;
+
+ /* Wait for any current index state change to complete. */
+ mutex_lock(&index_session->request_mutex);
+ while ((index_session->state & IS_FLAG_WAITING) ||
+ (index_session->state & IS_FLAG_CLOSING)) {
+ uds_wait_cond(&index_session->request_cond,
+ &index_session->request_mutex);
+ }
+
+ if (index_session->state & IS_FLAG_SUSPENDED) {
+ vdo_log_info("Index session is suspended");
+ result = -EBUSY;
+ } else if ((index_session->state & IS_FLAG_DESTROYING) ||
+ !(index_session->state & IS_FLAG_LOADED)) {
+ /* The index doesn't exist, hasn't finished loading, or is being destroyed. */
+ result = UDS_NO_INDEX;
+ } else {
+ index_session->state |= IS_FLAG_CLOSING;
+ }
+ mutex_unlock(&index_session->request_mutex);
+ if (result != UDS_SUCCESS)
+ return uds_status_to_errno(result);
+
+ vdo_log_debug("Closing index");
+ wait_for_no_requests_in_progress(index_session);
+ result = save_and_free_index(index_session);
+ vdo_log_debug("Closed index");
+
+ mutex_lock(&index_session->request_mutex);
+ index_session->state &= ~IS_FLAG_CLOSING;
+ uds_broadcast_cond(&index_session->request_cond);
+ mutex_unlock(&index_session->request_mutex);
+ return uds_status_to_errno(result);
+}
+
+/* This will save and close an open index before destroying the session. */
+int uds_destroy_index_session(struct uds_index_session *index_session)
+{
+ int result;
+ bool load_pending = false;
+
+ vdo_log_debug("Destroying index session");
+
+ /* Wait for any current index state change to complete. */
+ mutex_lock(&index_session->request_mutex);
+ while ((index_session->state & IS_FLAG_WAITING) ||
+ (index_session->state & IS_FLAG_CLOSING)) {
+ uds_wait_cond(&index_session->request_cond,
+ &index_session->request_mutex);
+ }
+
+ if (index_session->state & IS_FLAG_DESTROYING) {
+ mutex_unlock(&index_session->request_mutex);
+ vdo_log_info("Index session is already closing");
+ return -EBUSY;
+ }
+
+ index_session->state |= IS_FLAG_DESTROYING;
+ load_pending = ((index_session->state & IS_FLAG_LOADING) &&
+ (index_session->state & IS_FLAG_SUSPENDED));
+ mutex_unlock(&index_session->request_mutex);
+
+ if (load_pending) {
+ /* Tell the index to terminate the rebuild. */
+ mutex_lock(&index_session->load_context.mutex);
+ if (index_session->load_context.status == INDEX_SUSPENDED) {
+ index_session->load_context.status = INDEX_FREEING;
+ uds_broadcast_cond(&index_session->load_context.cond);
+ }
+ mutex_unlock(&index_session->load_context.mutex);
+
+ /* Wait until the load exits before proceeding. */
+ mutex_lock(&index_session->request_mutex);
+ while (index_session->state & IS_FLAG_LOADING) {
+ uds_wait_cond(&index_session->request_cond,
+ &index_session->request_mutex);
+ }
+ mutex_unlock(&index_session->request_mutex);
+ }
+
+ wait_for_no_requests_in_progress(index_session);
+ result = save_and_free_index(index_session);
+ uds_request_queue_finish(index_session->callback_queue);
+ index_session->callback_queue = NULL;
+ vdo_log_debug("Destroyed index session");
+ vdo_free(index_session);
+ return uds_status_to_errno(result);
+}
+
+/* Wait until all callbacks for index operations are complete. */
+int uds_flush_index_session(struct uds_index_session *index_session)
+{
+ wait_for_no_requests_in_progress(index_session);
+ uds_wait_for_idle_index(index_session->index);
+ return UDS_SUCCESS;
+}
+
+/* Statistics collection is intended to be thread-safe. */
+static void collect_stats(const struct uds_index_session *index_session,
+ struct uds_index_stats *stats)
+{
+ const struct session_stats *session_stats = &index_session->stats;
+
+ stats->current_time = ktime_to_seconds(current_time_ns(CLOCK_REALTIME));
+ stats->posts_found = READ_ONCE(session_stats->posts_found);
+ stats->in_memory_posts_found = READ_ONCE(session_stats->posts_found_open_chapter);
+ stats->dense_posts_found = READ_ONCE(session_stats->posts_found_dense);
+ stats->sparse_posts_found = READ_ONCE(session_stats->posts_found_sparse);
+ stats->posts_not_found = READ_ONCE(session_stats->posts_not_found);
+ stats->updates_found = READ_ONCE(session_stats->updates_found);
+ stats->updates_not_found = READ_ONCE(session_stats->updates_not_found);
+ stats->deletions_found = READ_ONCE(session_stats->deletions_found);
+ stats->deletions_not_found = READ_ONCE(session_stats->deletions_not_found);
+ stats->queries_found = READ_ONCE(session_stats->queries_found);
+ stats->queries_not_found = READ_ONCE(session_stats->queries_not_found);
+ stats->requests = READ_ONCE(session_stats->requests);
+}
+
+int uds_get_index_session_stats(struct uds_index_session *index_session,
+ struct uds_index_stats *stats)
+{
+ if (stats == NULL) {
+ vdo_log_error("received a NULL index stats pointer");
+ return -EINVAL;
+ }
+
+ collect_stats(index_session, stats);
+ if (index_session->index != NULL) {
+ uds_get_index_stats(index_session->index, stats);
+ } else {
+ stats->entries_indexed = 0;
+ stats->memory_used = 0;
+ stats->collisions = 0;
+ stats->entries_discarded = 0;
+ }
+
+ return UDS_SUCCESS;
+}
+
+void uds_wait_cond(struct cond_var *cv, struct mutex *mutex)
+{
+ DEFINE_WAIT(__wait);
+
+ prepare_to_wait(&cv->wait_queue, &__wait, TASK_IDLE);
+ mutex_unlock(mutex);
+ schedule();
+ finish_wait(&cv->wait_queue, &__wait);
+ mutex_lock(mutex);
+}
diff --git a/drivers/md/dm-vdo/indexer/index-session.h b/drivers/md/dm-vdo/indexer/index-session.h
new file mode 100644
index 000000000000..066648f6e062
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/index-session.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_INDEX_SESSION_H
+#define UDS_INDEX_SESSION_H
+
+#include <linux/atomic.h>
+#include <linux/cache.h>
+
+#include "thread-utils.h"
+
+#include "config.h"
+#include "indexer.h"
+
+/*
+ * The index session mediates all interactions with a UDS index. Once the index session is created,
+ * it can be used to open, close, suspend, or recreate an index. It implements the majority of the
+ * functions in the top-level UDS API.
+ *
+ * If any deduplication request fails due to an internal error, the index is marked disabled. It
+ * will not accept any further requests and can only be closed. Closing the index will clear the
+ * disabled flag, and the index can then be reopened and recovered using the same index session.
+ */
+
+struct __aligned(L1_CACHE_BYTES) session_stats {
+ /* Post requests that found an entry */
+ u64 posts_found;
+ /* Post requests found in the open chapter */
+ u64 posts_found_open_chapter;
+ /* Post requests found in the dense index */
+ u64 posts_found_dense;
+ /* Post requests found in the sparse index */
+ u64 posts_found_sparse;
+ /* Post requests that did not find an entry */
+ u64 posts_not_found;
+ /* Update requests that found an entry */
+ u64 updates_found;
+ /* Update requests that did not find an entry */
+ u64 updates_not_found;
+ /* Delete requests that found an entry */
+ u64 deletions_found;
+ /* Delete requests that did not find an entry */
+ u64 deletions_not_found;
+ /* Query requests that found an entry */
+ u64 queries_found;
+ /* Query requests that did not find an entry */
+ u64 queries_not_found;
+ /* Total number of requests */
+ u64 requests;
+};
+
+enum index_suspend_status {
+ /* An index load has started but the index is not ready for use. */
+ INDEX_OPENING = 0,
+ /* The index is able to handle requests. */
+ INDEX_READY,
+ /* The index is attempting to suspend a rebuild. */
+ INDEX_SUSPENDING,
+ /* An index rebuild has been suspended. */
+ INDEX_SUSPENDED,
+ /* An index rebuild is being stopped in order to shut down. */
+ INDEX_FREEING,
+};
+
+struct index_load_context {
+ struct mutex mutex;
+ struct cond_var cond;
+ enum index_suspend_status status;
+};
+
+struct uds_index_session {
+ unsigned int state;
+ struct uds_index *index;
+ struct uds_request_queue *callback_queue;
+ struct uds_parameters parameters;
+ struct index_load_context load_context;
+ struct mutex request_mutex;
+ struct cond_var request_cond;
+ int request_count;
+ struct session_stats stats;
+};
+
+#endif /* UDS_INDEX_SESSION_H */
diff --git a/drivers/md/dm-vdo/indexer/index.c b/drivers/md/dm-vdo/indexer/index.c
new file mode 100644
index 000000000000..1ba767144426
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/index.c
@@ -0,0 +1,1388 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+
+#include "index.h"
+
+#include "logger.h"
+#include "memory-alloc.h"
+
+#include "funnel-requestqueue.h"
+#include "hash-utils.h"
+#include "sparse-cache.h"
+
+static const u64 NO_LAST_SAVE = U64_MAX;
+
+/*
+ * When searching for deduplication records, the index first searches the volume index, and then
+ * searches the chapter index for the relevant chapter. If the chapter has been fully committed to
+ * storage, the chapter pages are loaded into the page cache. If the chapter has not yet been
+ * committed (either the open chapter or a recently closed one), the index searches the in-memory
+ * representation of the chapter. Finally, if the volume index does not find a record and the index
+ * is sparse, the index will search the sparse cache.
+ *
+ * The index send two kinds of messages to coordinate between zones: chapter close messages for the
+ * chapter writer, and sparse cache barrier messages for the sparse cache.
+ *
+ * The chapter writer is responsible for committing chapters of records to storage. Since zones can
+ * get different numbers of records, some zones may fall behind others. Each time a zone fills up
+ * its available space in a chapter, it informs the chapter writer that the chapter is complete,
+ * and also informs all other zones that it has closed the chapter. Each other zone will then close
+ * the chapter immediately, regardless of how full it is, in order to minimize skew between zones.
+ * Once every zone has closed the chapter, the chapter writer will commit that chapter to storage.
+ *
+ * The last zone to close the chapter also removes the oldest chapter from the volume index.
+ * Although that chapter is invalid for zones that have moved on, the existence of the open chapter
+ * means that those zones will never ask the volume index about it. No zone is allowed to get more
+ * than one chapter ahead of any other. If a zone is so far ahead that it tries to close another
+ * chapter before the previous one has been closed by all zones, it is forced to wait.
+ *
+ * The sparse cache relies on having the same set of chapter indexes available to all zones. When a
+ * request wants to add a chapter to the sparse cache, it sends a barrier message to each zone
+ * during the triage stage that acts as a rendezvous. Once every zone has reached the barrier and
+ * paused its operations, the cache membership is changed and each zone is then informed that it
+ * can proceed. More details can be found in the sparse cache documentation.
+ *
+ * If a sparse cache has only one zone, it will not create a triage queue, but it still needs the
+ * barrier message to change the sparse cache membership, so the index simulates the message by
+ * invoking the handler directly.
+ */
+
+struct chapter_writer {
+ /* The index to which we belong */
+ struct uds_index *index;
+ /* The thread to do the writing */
+ struct thread *thread;
+ /* The lock protecting the following fields */
+ struct mutex mutex;
+ /* The condition signalled on state changes */
+ struct cond_var cond;
+ /* Set to true to stop the thread */
+ bool stop;
+ /* The result from the most recent write */
+ int result;
+ /* The number of bytes allocated by the chapter writer */
+ size_t memory_size;
+ /* The number of zones which have submitted a chapter for writing */
+ unsigned int zones_to_write;
+ /* Open chapter index used by uds_close_open_chapter() */
+ struct open_chapter_index *open_chapter_index;
+ /* Collated records used by uds_close_open_chapter() */
+ struct uds_volume_record *collated_records;
+ /* The chapters to write (one per zone) */
+ struct open_chapter_zone *chapters[];
+};
+
+static bool is_zone_chapter_sparse(const struct index_zone *zone, u64 virtual_chapter)
+{
+ return uds_is_chapter_sparse(zone->index->volume->geometry,
+ zone->oldest_virtual_chapter,
+ zone->newest_virtual_chapter, virtual_chapter);
+}
+
+static int launch_zone_message(struct uds_zone_message message, unsigned int zone,
+ struct uds_index *index)
+{
+ int result;
+ struct uds_request *request;
+
+ result = vdo_allocate(1, struct uds_request, __func__, &request);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ request->index = index;
+ request->unbatched = true;
+ request->zone_number = zone;
+ request->zone_message = message;
+
+ uds_enqueue_request(request, STAGE_MESSAGE);
+ return UDS_SUCCESS;
+}
+
+static void enqueue_barrier_messages(struct uds_index *index, u64 virtual_chapter)
+{
+ struct uds_zone_message message = {
+ .type = UDS_MESSAGE_SPARSE_CACHE_BARRIER,
+ .virtual_chapter = virtual_chapter,
+ };
+ unsigned int zone;
+
+ for (zone = 0; zone < index->zone_count; zone++) {
+ int result = launch_zone_message(message, zone, index);
+
+ VDO_ASSERT_LOG_ONLY((result == UDS_SUCCESS), "barrier message allocation");
+ }
+}
+
+/*
+ * Determine whether this request should trigger a sparse cache barrier message to change the
+ * membership of the sparse cache. If a change in membership is desired, the function returns the
+ * chapter number to add.
+ */
+static u64 triage_index_request(struct uds_index *index, struct uds_request *request)
+{
+ u64 virtual_chapter;
+ struct index_zone *zone;
+
+ virtual_chapter = uds_lookup_volume_index_name(index->volume_index,
+ &request->record_name);
+ if (virtual_chapter == NO_CHAPTER)
+ return NO_CHAPTER;
+
+ zone = index->zones[request->zone_number];
+ if (!is_zone_chapter_sparse(zone, virtual_chapter))
+ return NO_CHAPTER;
+
+ /*
+ * FIXME: Optimize for a common case by remembering the chapter from the most recent
+ * barrier message and skipping this chapter if is it the same.
+ */
+
+ return virtual_chapter;
+}
+
+/*
+ * Simulate a message to change the sparse cache membership for a single-zone sparse index. This
+ * allows us to forgo the complicated locking required by a multi-zone sparse index. Any other kind
+ * of index does nothing here.
+ */
+static int simulate_index_zone_barrier_message(struct index_zone *zone,
+ struct uds_request *request)
+{
+ u64 sparse_virtual_chapter;
+
+ if ((zone->index->zone_count > 1) ||
+ !uds_is_sparse_index_geometry(zone->index->volume->geometry))
+ return UDS_SUCCESS;
+
+ sparse_virtual_chapter = triage_index_request(zone->index, request);
+ if (sparse_virtual_chapter == NO_CHAPTER)
+ return UDS_SUCCESS;
+
+ return uds_update_sparse_cache(zone, sparse_virtual_chapter);
+}
+
+/* This is the request processing function for the triage queue. */
+static void triage_request(struct uds_request *request)
+{
+ struct uds_index *index = request->index;
+ u64 sparse_virtual_chapter = triage_index_request(index, request);
+
+ if (sparse_virtual_chapter != NO_CHAPTER)
+ enqueue_barrier_messages(index, sparse_virtual_chapter);
+
+ uds_enqueue_request(request, STAGE_INDEX);
+}
+
+static int finish_previous_chapter(struct uds_index *index, u64 current_chapter_number)
+{
+ int result;
+ struct chapter_writer *writer = index->chapter_writer;
+
+ mutex_lock(&writer->mutex);
+ while (index->newest_virtual_chapter < current_chapter_number)
+ uds_wait_cond(&writer->cond, &writer->mutex);
+ result = writer->result;
+ mutex_unlock(&writer->mutex);
+
+ if (result != UDS_SUCCESS)
+ return vdo_log_error_strerror(result,
+ "Writing of previous open chapter failed");
+
+ return UDS_SUCCESS;
+}
+
+static int swap_open_chapter(struct index_zone *zone)
+{
+ int result;
+ struct open_chapter_zone *temporary_chapter;
+
+ result = finish_previous_chapter(zone->index, zone->newest_virtual_chapter);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ temporary_chapter = zone->open_chapter;
+ zone->open_chapter = zone->writing_chapter;
+ zone->writing_chapter = temporary_chapter;
+ return UDS_SUCCESS;
+}
+
+/*
+ * Inform the chapter writer that this zone is done with this chapter. The chapter won't start
+ * writing until all zones have closed it.
+ */
+static unsigned int start_closing_chapter(struct uds_index *index,
+ unsigned int zone_number,
+ struct open_chapter_zone *chapter)
+{
+ unsigned int finished_zones;
+ struct chapter_writer *writer = index->chapter_writer;
+
+ mutex_lock(&writer->mutex);
+ finished_zones = ++writer->zones_to_write;
+ writer->chapters[zone_number] = chapter;
+ uds_broadcast_cond(&writer->cond);
+ mutex_unlock(&writer->mutex);
+
+ return finished_zones;
+}
+
+static int announce_chapter_closed(struct index_zone *zone, u64 closed_chapter)
+{
+ int result;
+ unsigned int i;
+ struct uds_zone_message zone_message = {
+ .type = UDS_MESSAGE_ANNOUNCE_CHAPTER_CLOSED,
+ .virtual_chapter = closed_chapter,
+ };
+
+ for (i = 0; i < zone->index->zone_count; i++) {
+ if (zone->id == i)
+ continue;
+
+ result = launch_zone_message(zone_message, i, zone->index);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int open_next_chapter(struct index_zone *zone)
+{
+ int result;
+ u64 closed_chapter;
+ u64 expiring;
+ unsigned int finished_zones;
+ u32 expire_chapters;
+
+ vdo_log_debug("closing chapter %llu of zone %u after %u entries (%u short)",
+ (unsigned long long) zone->newest_virtual_chapter, zone->id,
+ zone->open_chapter->size,
+ zone->open_chapter->capacity - zone->open_chapter->size);
+
+ result = swap_open_chapter(zone);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ closed_chapter = zone->newest_virtual_chapter++;
+ uds_set_volume_index_zone_open_chapter(zone->index->volume_index, zone->id,
+ zone->newest_virtual_chapter);
+ uds_reset_open_chapter(zone->open_chapter);
+
+ finished_zones = start_closing_chapter(zone->index, zone->id,
+ zone->writing_chapter);
+ if ((finished_zones == 1) && (zone->index->zone_count > 1)) {
+ result = announce_chapter_closed(zone, closed_chapter);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ expiring = zone->oldest_virtual_chapter;
+ expire_chapters = uds_chapters_to_expire(zone->index->volume->geometry,
+ zone->newest_virtual_chapter);
+ zone->oldest_virtual_chapter += expire_chapters;
+
+ if (finished_zones < zone->index->zone_count)
+ return UDS_SUCCESS;
+
+ while (expire_chapters-- > 0)
+ uds_forget_chapter(zone->index->volume, expiring++);
+
+ return UDS_SUCCESS;
+}
+
+static int handle_chapter_closed(struct index_zone *zone, u64 virtual_chapter)
+{
+ if (zone->newest_virtual_chapter == virtual_chapter)
+ return open_next_chapter(zone);
+
+ return UDS_SUCCESS;
+}
+
+static int dispatch_index_zone_control_request(struct uds_request *request)
+{
+ struct uds_zone_message *message = &request->zone_message;
+ struct index_zone *zone = request->index->zones[request->zone_number];
+
+ switch (message->type) {
+ case UDS_MESSAGE_SPARSE_CACHE_BARRIER:
+ return uds_update_sparse_cache(zone, message->virtual_chapter);
+
+ case UDS_MESSAGE_ANNOUNCE_CHAPTER_CLOSED:
+ return handle_chapter_closed(zone, message->virtual_chapter);
+
+ default:
+ vdo_log_error("invalid message type: %d", message->type);
+ return UDS_INVALID_ARGUMENT;
+ }
+}
+
+static void set_request_location(struct uds_request *request,
+ enum uds_index_region new_location)
+{
+ request->location = new_location;
+ request->found = ((new_location == UDS_LOCATION_IN_OPEN_CHAPTER) ||
+ (new_location == UDS_LOCATION_IN_DENSE) ||
+ (new_location == UDS_LOCATION_IN_SPARSE));
+}
+
+static void set_chapter_location(struct uds_request *request,
+ const struct index_zone *zone, u64 virtual_chapter)
+{
+ request->found = true;
+ if (virtual_chapter == zone->newest_virtual_chapter)
+ request->location = UDS_LOCATION_IN_OPEN_CHAPTER;
+ else if (is_zone_chapter_sparse(zone, virtual_chapter))
+ request->location = UDS_LOCATION_IN_SPARSE;
+ else
+ request->location = UDS_LOCATION_IN_DENSE;
+}
+
+static int search_sparse_cache_in_zone(struct index_zone *zone, struct uds_request *request,
+ u64 virtual_chapter, bool *found)
+{
+ int result;
+ struct volume *volume;
+ u16 record_page_number;
+ u32 chapter;
+
+ result = uds_search_sparse_cache(zone, &request->record_name, &virtual_chapter,
+ &record_page_number);
+ if ((result != UDS_SUCCESS) || (virtual_chapter == NO_CHAPTER))
+ return result;
+
+ request->virtual_chapter = virtual_chapter;
+ volume = zone->index->volume;
+ chapter = uds_map_to_physical_chapter(volume->geometry, virtual_chapter);
+ return uds_search_cached_record_page(volume, request, chapter,
+ record_page_number, found);
+}
+
+static int get_record_from_zone(struct index_zone *zone, struct uds_request *request,
+ bool *found)
+{
+ struct volume *volume;
+
+ if (request->location == UDS_LOCATION_RECORD_PAGE_LOOKUP) {
+ *found = true;
+ return UDS_SUCCESS;
+ } else if (request->location == UDS_LOCATION_UNAVAILABLE) {
+ *found = false;
+ return UDS_SUCCESS;
+ }
+
+ if (request->virtual_chapter == zone->newest_virtual_chapter) {
+ uds_search_open_chapter(zone->open_chapter, &request->record_name,
+ &request->old_metadata, found);
+ return UDS_SUCCESS;
+ }
+
+ if ((zone->newest_virtual_chapter > 0) &&
+ (request->virtual_chapter == (zone->newest_virtual_chapter - 1)) &&
+ (zone->writing_chapter->size > 0)) {
+ uds_search_open_chapter(zone->writing_chapter, &request->record_name,
+ &request->old_metadata, found);
+ return UDS_SUCCESS;
+ }
+
+ volume = zone->index->volume;
+ if (is_zone_chapter_sparse(zone, request->virtual_chapter) &&
+ uds_sparse_cache_contains(volume->sparse_cache, request->virtual_chapter,
+ request->zone_number))
+ return search_sparse_cache_in_zone(zone, request,
+ request->virtual_chapter, found);
+
+ return uds_search_volume_page_cache(volume, request, found);
+}
+
+static int put_record_in_zone(struct index_zone *zone, struct uds_request *request,
+ const struct uds_record_data *metadata)
+{
+ unsigned int remaining;
+
+ remaining = uds_put_open_chapter(zone->open_chapter, &request->record_name,
+ metadata);
+ if (remaining == 0)
+ return open_next_chapter(zone);
+
+ return UDS_SUCCESS;
+}
+
+static int search_index_zone(struct index_zone *zone, struct uds_request *request)
+{
+ int result;
+ struct volume_index_record record;
+ bool overflow_record, found = false;
+ struct uds_record_data *metadata;
+ u64 chapter;
+
+ result = uds_get_volume_index_record(zone->index->volume_index,
+ &request->record_name, &record);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (record.is_found) {
+ if (request->requeued && request->virtual_chapter != record.virtual_chapter)
+ set_request_location(request, UDS_LOCATION_UNKNOWN);
+
+ request->virtual_chapter = record.virtual_chapter;
+ result = get_record_from_zone(zone, request, &found);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ if (found)
+ set_chapter_location(request, zone, record.virtual_chapter);
+
+ /*
+ * If a record has overflowed a chapter index in more than one chapter (or overflowed in
+ * one chapter and collided with an existing record), it will exist as a collision record
+ * in the volume index, but we won't find it in the volume. This case needs special
+ * handling.
+ */
+ overflow_record = (record.is_found && record.is_collision && !found);
+ chapter = zone->newest_virtual_chapter;
+ if (found || overflow_record) {
+ if ((request->type == UDS_QUERY_NO_UPDATE) ||
+ ((request->type == UDS_QUERY) && overflow_record)) {
+ /* There is nothing left to do. */
+ return UDS_SUCCESS;
+ }
+
+ if (record.virtual_chapter != chapter) {
+ /*
+ * Update the volume index to reference the new chapter for the block. If
+ * the record had been deleted or dropped from the chapter index, it will
+ * be back.
+ */
+ result = uds_set_volume_index_record_chapter(&record, chapter);
+ } else if (request->type != UDS_UPDATE) {
+ /* The record is already in the open chapter. */
+ return UDS_SUCCESS;
+ }
+ } else {
+ /*
+ * The record wasn't in the volume index, so check whether the
+ * name is in a cached sparse chapter. If we found the name on
+ * a previous search, use that result instead.
+ */
+ if (request->location == UDS_LOCATION_RECORD_PAGE_LOOKUP) {
+ found = true;
+ } else if (request->location == UDS_LOCATION_UNAVAILABLE) {
+ found = false;
+ } else if (uds_is_sparse_index_geometry(zone->index->volume->geometry) &&
+ !uds_is_volume_index_sample(zone->index->volume_index,
+ &request->record_name)) {
+ result = search_sparse_cache_in_zone(zone, request, NO_CHAPTER,
+ &found);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ if (found)
+ set_request_location(request, UDS_LOCATION_IN_SPARSE);
+
+ if ((request->type == UDS_QUERY_NO_UPDATE) ||
+ ((request->type == UDS_QUERY) && !found)) {
+ /* There is nothing left to do. */
+ return UDS_SUCCESS;
+ }
+
+ /*
+ * Add a new entry to the volume index referencing the open chapter. This needs to
+ * be done both for new records, and for records from cached sparse chapters.
+ */
+ result = uds_put_volume_index_record(&record, chapter);
+ }
+
+ if (result == UDS_OVERFLOW) {
+ /*
+ * The volume index encountered a delta list overflow. The condition was already
+ * logged. We will go on without adding the record to the open chapter.
+ */
+ return UDS_SUCCESS;
+ }
+
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (!found || (request->type == UDS_UPDATE)) {
+ /* This is a new record or we're updating an existing record. */
+ metadata = &request->new_metadata;
+ } else {
+ /* Move the existing record to the open chapter. */
+ metadata = &request->old_metadata;
+ }
+
+ return put_record_in_zone(zone, request, metadata);
+}
+
+static int remove_from_index_zone(struct index_zone *zone, struct uds_request *request)
+{
+ int result;
+ struct volume_index_record record;
+
+ result = uds_get_volume_index_record(zone->index->volume_index,
+ &request->record_name, &record);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (!record.is_found)
+ return UDS_SUCCESS;
+
+ /* If the request was requeued, check whether the saved state is still valid. */
+
+ if (record.is_collision) {
+ set_chapter_location(request, zone, record.virtual_chapter);
+ } else {
+ /* Non-collision records are hints, so resolve the name in the chapter. */
+ bool found;
+
+ if (request->requeued && request->virtual_chapter != record.virtual_chapter)
+ set_request_location(request, UDS_LOCATION_UNKNOWN);
+
+ request->virtual_chapter = record.virtual_chapter;
+ result = get_record_from_zone(zone, request, &found);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (!found) {
+ /* There is no record to remove. */
+ return UDS_SUCCESS;
+ }
+ }
+
+ set_chapter_location(request, zone, record.virtual_chapter);
+
+ /*
+ * Delete the volume index entry for the named record only. Note that a later search might
+ * later return stale advice if there is a colliding name in the same chapter, but it's a
+ * very rare case (1 in 2^21).
+ */
+ result = uds_remove_volume_index_record(&record);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ /*
+ * If the record is in the open chapter, we must remove it or mark it deleted to avoid
+ * trouble if the record is added again later.
+ */
+ if (request->location == UDS_LOCATION_IN_OPEN_CHAPTER)
+ uds_remove_from_open_chapter(zone->open_chapter, &request->record_name);
+
+ return UDS_SUCCESS;
+}
+
+static int dispatch_index_request(struct uds_index *index, struct uds_request *request)
+{
+ int result;
+ struct index_zone *zone = index->zones[request->zone_number];
+
+ if (!request->requeued) {
+ result = simulate_index_zone_barrier_message(zone, request);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ switch (request->type) {
+ case UDS_POST:
+ case UDS_UPDATE:
+ case UDS_QUERY:
+ case UDS_QUERY_NO_UPDATE:
+ result = search_index_zone(zone, request);
+ break;
+
+ case UDS_DELETE:
+ result = remove_from_index_zone(zone, request);
+ break;
+
+ default:
+ result = vdo_log_warning_strerror(UDS_INVALID_ARGUMENT,
+ "invalid request type: %d",
+ request->type);
+ break;
+ }
+
+ return result;
+}
+
+/* This is the request processing function invoked by each zone's thread. */
+static void execute_zone_request(struct uds_request *request)
+{
+ int result;
+ struct uds_index *index = request->index;
+
+ if (request->zone_message.type != UDS_MESSAGE_NONE) {
+ result = dispatch_index_zone_control_request(request);
+ if (result != UDS_SUCCESS) {
+ vdo_log_error_strerror(result, "error executing message: %d",
+ request->zone_message.type);
+ }
+
+ /* Once the message is processed it can be freed. */
+ vdo_free(vdo_forget(request));
+ return;
+ }
+
+ index->need_to_save = true;
+ if (request->requeued && (request->status != UDS_SUCCESS)) {
+ set_request_location(request, UDS_LOCATION_UNAVAILABLE);
+ index->callback(request);
+ return;
+ }
+
+ result = dispatch_index_request(index, request);
+ if (result == UDS_QUEUED) {
+ /* The request has been requeued so don't let it complete. */
+ return;
+ }
+
+ if (!request->found)
+ set_request_location(request, UDS_LOCATION_UNAVAILABLE);
+
+ request->status = result;
+ index->callback(request);
+}
+
+static int initialize_index_queues(struct uds_index *index,
+ const struct index_geometry *geometry)
+{
+ int result;
+ unsigned int i;
+
+ for (i = 0; i < index->zone_count; i++) {
+ result = uds_make_request_queue("indexW", &execute_zone_request,
+ &index->zone_queues[i]);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ /* The triage queue is only needed for sparse multi-zone indexes. */
+ if ((index->zone_count > 1) && uds_is_sparse_index_geometry(geometry)) {
+ result = uds_make_request_queue("triageW", &triage_request,
+ &index->triage_queue);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ return UDS_SUCCESS;
+}
+
+/* This is the driver function for the chapter writer thread. */
+static void close_chapters(void *arg)
+{
+ int result;
+ struct chapter_writer *writer = arg;
+ struct uds_index *index = writer->index;
+
+ vdo_log_debug("chapter writer starting");
+ mutex_lock(&writer->mutex);
+ for (;;) {
+ while (writer->zones_to_write < index->zone_count) {
+ if (writer->stop && (writer->zones_to_write == 0)) {
+ /*
+ * We've been told to stop, and all of the zones are in the same
+ * open chapter, so we can exit now.
+ */
+ mutex_unlock(&writer->mutex);
+ vdo_log_debug("chapter writer stopping");
+ return;
+ }
+ uds_wait_cond(&writer->cond, &writer->mutex);
+ }
+
+ /*
+ * Release the lock while closing a chapter. We probably don't need to do this, but
+ * it seems safer in principle. It's OK to access the chapter and chapter_number
+ * fields without the lock since those aren't allowed to change until we're done.
+ */
+ mutex_unlock(&writer->mutex);
+
+ if (index->has_saved_open_chapter) {
+ /*
+ * Remove the saved open chapter the first time we close an open chapter
+ * after loading from a clean shutdown, or after doing a clean save. The
+ * lack of the saved open chapter will indicate that a recovery is
+ * necessary.
+ */
+ index->has_saved_open_chapter = false;
+ result = uds_discard_open_chapter(index->layout);
+ if (result == UDS_SUCCESS)
+ vdo_log_debug("Discarding saved open chapter");
+ }
+
+ result = uds_close_open_chapter(writer->chapters, index->zone_count,
+ index->volume,
+ writer->open_chapter_index,
+ writer->collated_records,
+ index->newest_virtual_chapter);
+
+ mutex_lock(&writer->mutex);
+ index->newest_virtual_chapter++;
+ index->oldest_virtual_chapter +=
+ uds_chapters_to_expire(index->volume->geometry,
+ index->newest_virtual_chapter);
+ writer->result = result;
+ writer->zones_to_write = 0;
+ uds_broadcast_cond(&writer->cond);
+ }
+}
+
+static void stop_chapter_writer(struct chapter_writer *writer)
+{
+ struct thread *writer_thread = NULL;
+
+ mutex_lock(&writer->mutex);
+ if (writer->thread != NULL) {
+ writer_thread = writer->thread;
+ writer->thread = NULL;
+ writer->stop = true;
+ uds_broadcast_cond(&writer->cond);
+ }
+ mutex_unlock(&writer->mutex);
+
+ if (writer_thread != NULL)
+ vdo_join_threads(writer_thread);
+}
+
+static void free_chapter_writer(struct chapter_writer *writer)
+{
+ if (writer == NULL)
+ return;
+
+ stop_chapter_writer(writer);
+ uds_free_open_chapter_index(writer->open_chapter_index);
+ vdo_free(writer->collated_records);
+ vdo_free(writer);
+}
+
+static int make_chapter_writer(struct uds_index *index,
+ struct chapter_writer **writer_ptr)
+{
+ int result;
+ struct chapter_writer *writer;
+ size_t collated_records_size =
+ (sizeof(struct uds_volume_record) * index->volume->geometry->records_per_chapter);
+
+ result = vdo_allocate_extended(struct chapter_writer, index->zone_count,
+ struct open_chapter_zone *, "Chapter Writer",
+ &writer);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ writer->index = index;
+ mutex_init(&writer->mutex);
+ uds_init_cond(&writer->cond);
+
+ result = vdo_allocate_cache_aligned(collated_records_size, "collated records",
+ &writer->collated_records);
+ if (result != VDO_SUCCESS) {
+ free_chapter_writer(writer);
+ return result;
+ }
+
+ result = uds_make_open_chapter_index(&writer->open_chapter_index,
+ index->volume->geometry,
+ index->volume->nonce);
+ if (result != UDS_SUCCESS) {
+ free_chapter_writer(writer);
+ return result;
+ }
+
+ writer->memory_size = (sizeof(struct chapter_writer) +
+ index->zone_count * sizeof(struct open_chapter_zone *) +
+ collated_records_size +
+ writer->open_chapter_index->memory_size);
+
+ result = vdo_create_thread(close_chapters, writer, "writer", &writer->thread);
+ if (result != VDO_SUCCESS) {
+ free_chapter_writer(writer);
+ return result;
+ }
+
+ *writer_ptr = writer;
+ return UDS_SUCCESS;
+}
+
+static int load_index(struct uds_index *index)
+{
+ int result;
+ u64 last_save_chapter;
+
+ result = uds_load_index_state(index->layout, index);
+ if (result != UDS_SUCCESS)
+ return UDS_INDEX_NOT_SAVED_CLEANLY;
+
+ last_save_chapter = ((index->last_save != NO_LAST_SAVE) ? index->last_save : 0);
+
+ vdo_log_info("loaded index from chapter %llu through chapter %llu",
+ (unsigned long long) index->oldest_virtual_chapter,
+ (unsigned long long) last_save_chapter);
+
+ return UDS_SUCCESS;
+}
+
+static int rebuild_index_page_map(struct uds_index *index, u64 vcn)
+{
+ int result;
+ struct delta_index_page *chapter_index_page;
+ struct index_geometry *geometry = index->volume->geometry;
+ u32 chapter = uds_map_to_physical_chapter(geometry, vcn);
+ u32 expected_list_number = 0;
+ u32 index_page_number;
+ u32 lowest_delta_list;
+ u32 highest_delta_list;
+
+ for (index_page_number = 0;
+ index_page_number < geometry->index_pages_per_chapter;
+ index_page_number++) {
+ result = uds_get_volume_index_page(index->volume, chapter,
+ index_page_number,
+ &chapter_index_page);
+ if (result != UDS_SUCCESS) {
+ return vdo_log_error_strerror(result,
+ "failed to read index page %u in chapter %u",
+ index_page_number, chapter);
+ }
+
+ lowest_delta_list = chapter_index_page->lowest_list_number;
+ highest_delta_list = chapter_index_page->highest_list_number;
+ if (lowest_delta_list != expected_list_number) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "chapter %u index page %u is corrupt",
+ chapter, index_page_number);
+ }
+
+ uds_update_index_page_map(index->volume->index_page_map, vcn, chapter,
+ index_page_number, highest_delta_list);
+ expected_list_number = highest_delta_list + 1;
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int replay_record(struct uds_index *index, const struct uds_record_name *name,
+ u64 virtual_chapter, bool will_be_sparse_chapter)
+{
+ int result;
+ struct volume_index_record record;
+ bool update_record;
+
+ if (will_be_sparse_chapter &&
+ !uds_is_volume_index_sample(index->volume_index, name)) {
+ /*
+ * This entry will be in a sparse chapter after the rebuild completes, and it is
+ * not a sample, so just skip over it.
+ */
+ return UDS_SUCCESS;
+ }
+
+ result = uds_get_volume_index_record(index->volume_index, name, &record);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (record.is_found) {
+ if (record.is_collision) {
+ if (record.virtual_chapter == virtual_chapter) {
+ /* The record is already correct. */
+ return UDS_SUCCESS;
+ }
+
+ update_record = true;
+ } else if (record.virtual_chapter == virtual_chapter) {
+ /*
+ * There is a volume index entry pointing to the current chapter, but we
+ * don't know if it is for the same name as the one we are currently
+ * working on or not. For now, we're just going to assume that it isn't.
+ * This will create one extra collision record if there was a deleted
+ * record in the current chapter.
+ */
+ update_record = false;
+ } else {
+ /*
+ * If we're rebuilding, we don't normally want to go to disk to see if the
+ * record exists, since we will likely have just read the record from disk
+ * (i.e. we know it's there). The exception to this is when we find an
+ * entry in the volume index that has a different chapter. In this case, we
+ * need to search that chapter to determine if the volume index entry was
+ * for the same record or a different one.
+ */
+ result = uds_search_volume_page_cache_for_rebuild(index->volume,
+ name,
+ record.virtual_chapter,
+ &update_record);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+ } else {
+ update_record = false;
+ }
+
+ if (update_record) {
+ /*
+ * Update the volume index to reference the new chapter for the block. If the
+ * record had been deleted or dropped from the chapter index, it will be back.
+ */
+ result = uds_set_volume_index_record_chapter(&record, virtual_chapter);
+ } else {
+ /*
+ * Add a new entry to the volume index referencing the open chapter. This should be
+ * done regardless of whether we are a brand new record or a sparse record, i.e.
+ * one that doesn't exist in the index but does on disk, since for a sparse record,
+ * we would want to un-sparsify if it did exist.
+ */
+ result = uds_put_volume_index_record(&record, virtual_chapter);
+ }
+
+ if ((result == UDS_DUPLICATE_NAME) || (result == UDS_OVERFLOW)) {
+ /* The rebuilt index will lose these records. */
+ return UDS_SUCCESS;
+ }
+
+ return result;
+}
+
+static bool check_for_suspend(struct uds_index *index)
+{
+ bool closing;
+
+ if (index->load_context == NULL)
+ return false;
+
+ mutex_lock(&index->load_context->mutex);
+ if (index->load_context->status != INDEX_SUSPENDING) {
+ mutex_unlock(&index->load_context->mutex);
+ return false;
+ }
+
+ /* Notify that we are suspended and wait for the resume. */
+ index->load_context->status = INDEX_SUSPENDED;
+ uds_broadcast_cond(&index->load_context->cond);
+
+ while ((index->load_context->status != INDEX_OPENING) &&
+ (index->load_context->status != INDEX_FREEING))
+ uds_wait_cond(&index->load_context->cond, &index->load_context->mutex);
+
+ closing = (index->load_context->status == INDEX_FREEING);
+ mutex_unlock(&index->load_context->mutex);
+ return closing;
+}
+
+static int replay_chapter(struct uds_index *index, u64 virtual, bool sparse)
+{
+ int result;
+ u32 i;
+ u32 j;
+ const struct index_geometry *geometry;
+ u32 physical_chapter;
+
+ if (check_for_suspend(index)) {
+ vdo_log_info("Replay interrupted by index shutdown at chapter %llu",
+ (unsigned long long) virtual);
+ return -EBUSY;
+ }
+
+ geometry = index->volume->geometry;
+ physical_chapter = uds_map_to_physical_chapter(geometry, virtual);
+ uds_prefetch_volume_chapter(index->volume, physical_chapter);
+ uds_set_volume_index_open_chapter(index->volume_index, virtual);
+
+ result = rebuild_index_page_map(index, virtual);
+ if (result != UDS_SUCCESS) {
+ return vdo_log_error_strerror(result,
+ "could not rebuild index page map for chapter %u",
+ physical_chapter);
+ }
+
+ for (i = 0; i < geometry->record_pages_per_chapter; i++) {
+ u8 *record_page;
+ u32 record_page_number;
+
+ record_page_number = geometry->index_pages_per_chapter + i;
+ result = uds_get_volume_record_page(index->volume, physical_chapter,
+ record_page_number, &record_page);
+ if (result != UDS_SUCCESS) {
+ return vdo_log_error_strerror(result, "could not get page %d",
+ record_page_number);
+ }
+
+ for (j = 0; j < geometry->records_per_page; j++) {
+ const u8 *name_bytes;
+ struct uds_record_name name;
+
+ name_bytes = record_page + (j * BYTES_PER_RECORD);
+ memcpy(&name.name, name_bytes, UDS_RECORD_NAME_SIZE);
+ result = replay_record(index, &name, virtual, sparse);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int replay_volume(struct uds_index *index)
+{
+ int result;
+ u64 old_map_update;
+ u64 new_map_update;
+ u64 virtual;
+ u64 from_virtual = index->oldest_virtual_chapter;
+ u64 upto_virtual = index->newest_virtual_chapter;
+ bool will_be_sparse;
+
+ vdo_log_info("Replaying volume from chapter %llu through chapter %llu",
+ (unsigned long long) from_virtual,
+ (unsigned long long) upto_virtual);
+
+ /*
+ * The index failed to load, so the volume index is empty. Add records to the volume index
+ * in order, skipping non-hooks in chapters which will be sparse to save time.
+ *
+ * Go through each record page of each chapter and add the records back to the volume
+ * index. This should not cause anything to be written to either the open chapter or the
+ * on-disk volume. Also skip the on-disk chapter corresponding to upto_virtual, as this
+ * would have already been purged from the volume index when the chapter was opened.
+ *
+ * Also, go through each index page for each chapter and rebuild the index page map.
+ */
+ old_map_update = index->volume->index_page_map->last_update;
+ for (virtual = from_virtual; virtual < upto_virtual; virtual++) {
+ will_be_sparse = uds_is_chapter_sparse(index->volume->geometry,
+ from_virtual, upto_virtual,
+ virtual);
+ result = replay_chapter(index, virtual, will_be_sparse);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ /* Also reap the chapter being replaced by the open chapter. */
+ uds_set_volume_index_open_chapter(index->volume_index, upto_virtual);
+
+ new_map_update = index->volume->index_page_map->last_update;
+ if (new_map_update != old_map_update) {
+ vdo_log_info("replay changed index page map update from %llu to %llu",
+ (unsigned long long) old_map_update,
+ (unsigned long long) new_map_update);
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int rebuild_index(struct uds_index *index)
+{
+ int result;
+ u64 lowest;
+ u64 highest;
+ bool is_empty = false;
+ u32 chapters_per_volume = index->volume->geometry->chapters_per_volume;
+
+ index->volume->lookup_mode = LOOKUP_FOR_REBUILD;
+ result = uds_find_volume_chapter_boundaries(index->volume, &lowest, &highest,
+ &is_empty);
+ if (result != UDS_SUCCESS) {
+ return vdo_log_fatal_strerror(result,
+ "cannot rebuild index: unknown volume chapter boundaries");
+ }
+
+ if (is_empty) {
+ index->newest_virtual_chapter = 0;
+ index->oldest_virtual_chapter = 0;
+ index->volume->lookup_mode = LOOKUP_NORMAL;
+ return UDS_SUCCESS;
+ }
+
+ index->newest_virtual_chapter = highest + 1;
+ index->oldest_virtual_chapter = lowest;
+ if (index->newest_virtual_chapter ==
+ (index->oldest_virtual_chapter + chapters_per_volume)) {
+ /* Skip the chapter shadowed by the open chapter. */
+ index->oldest_virtual_chapter++;
+ }
+
+ result = replay_volume(index);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ index->volume->lookup_mode = LOOKUP_NORMAL;
+ return UDS_SUCCESS;
+}
+
+static void free_index_zone(struct index_zone *zone)
+{
+ if (zone == NULL)
+ return;
+
+ uds_free_open_chapter(zone->open_chapter);
+ uds_free_open_chapter(zone->writing_chapter);
+ vdo_free(zone);
+}
+
+static int make_index_zone(struct uds_index *index, unsigned int zone_number)
+{
+ int result;
+ struct index_zone *zone;
+
+ result = vdo_allocate(1, struct index_zone, "index zone", &zone);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = uds_make_open_chapter(index->volume->geometry, index->zone_count,
+ &zone->open_chapter);
+ if (result != UDS_SUCCESS) {
+ free_index_zone(zone);
+ return result;
+ }
+
+ result = uds_make_open_chapter(index->volume->geometry, index->zone_count,
+ &zone->writing_chapter);
+ if (result != UDS_SUCCESS) {
+ free_index_zone(zone);
+ return result;
+ }
+
+ zone->index = index;
+ zone->id = zone_number;
+ index->zones[zone_number] = zone;
+
+ return UDS_SUCCESS;
+}
+
+int uds_make_index(struct uds_configuration *config, enum uds_open_index_type open_type,
+ struct index_load_context *load_context, index_callback_fn callback,
+ struct uds_index **new_index)
+{
+ int result;
+ bool loaded = false;
+ bool new = (open_type == UDS_CREATE);
+ struct uds_index *index = NULL;
+ struct index_zone *zone;
+ u64 nonce;
+ unsigned int z;
+
+ result = vdo_allocate_extended(struct uds_index, config->zone_count,
+ struct uds_request_queue *, "index", &index);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ index->zone_count = config->zone_count;
+
+ result = uds_make_index_layout(config, new, &index->layout);
+ if (result != UDS_SUCCESS) {
+ uds_free_index(index);
+ return result;
+ }
+
+ result = vdo_allocate(index->zone_count, struct index_zone *, "zones",
+ &index->zones);
+ if (result != VDO_SUCCESS) {
+ uds_free_index(index);
+ return result;
+ }
+
+ result = uds_make_volume(config, index->layout, &index->volume);
+ if (result != UDS_SUCCESS) {
+ uds_free_index(index);
+ return result;
+ }
+
+ index->volume->lookup_mode = LOOKUP_NORMAL;
+ for (z = 0; z < index->zone_count; z++) {
+ result = make_index_zone(index, z);
+ if (result != UDS_SUCCESS) {
+ uds_free_index(index);
+ return vdo_log_error_strerror(result,
+ "Could not create index zone");
+ }
+ }
+
+ nonce = uds_get_volume_nonce(index->layout);
+ result = uds_make_volume_index(config, nonce, &index->volume_index);
+ if (result != UDS_SUCCESS) {
+ uds_free_index(index);
+ return vdo_log_error_strerror(result, "could not make volume index");
+ }
+
+ index->load_context = load_context;
+ index->callback = callback;
+
+ result = initialize_index_queues(index, config->geometry);
+ if (result != UDS_SUCCESS) {
+ uds_free_index(index);
+ return result;
+ }
+
+ result = make_chapter_writer(index, &index->chapter_writer);
+ if (result != UDS_SUCCESS) {
+ uds_free_index(index);
+ return result;
+ }
+
+ if (!new) {
+ result = load_index(index);
+ switch (result) {
+ case UDS_SUCCESS:
+ loaded = true;
+ break;
+ case -ENOMEM:
+ /* We should not try a rebuild for this error. */
+ vdo_log_error_strerror(result, "index could not be loaded");
+ break;
+ default:
+ vdo_log_error_strerror(result, "index could not be loaded");
+ if (open_type == UDS_LOAD) {
+ result = rebuild_index(index);
+ if (result != UDS_SUCCESS) {
+ vdo_log_error_strerror(result,
+ "index could not be rebuilt");
+ }
+ }
+ break;
+ }
+ }
+
+ if (result != UDS_SUCCESS) {
+ uds_free_index(index);
+ return vdo_log_error_strerror(result, "fatal error in %s()", __func__);
+ }
+
+ for (z = 0; z < index->zone_count; z++) {
+ zone = index->zones[z];
+ zone->oldest_virtual_chapter = index->oldest_virtual_chapter;
+ zone->newest_virtual_chapter = index->newest_virtual_chapter;
+ }
+
+ if (index->load_context != NULL) {
+ mutex_lock(&index->load_context->mutex);
+ index->load_context->status = INDEX_READY;
+ /*
+ * If we get here, suspend is meaningless, but notify any thread trying to suspend
+ * us so it doesn't hang.
+ */
+ uds_broadcast_cond(&index->load_context->cond);
+ mutex_unlock(&index->load_context->mutex);
+ }
+
+ index->has_saved_open_chapter = loaded;
+ index->need_to_save = !loaded;
+ *new_index = index;
+ return UDS_SUCCESS;
+}
+
+void uds_free_index(struct uds_index *index)
+{
+ unsigned int i;
+
+ if (index == NULL)
+ return;
+
+ uds_request_queue_finish(index->triage_queue);
+ for (i = 0; i < index->zone_count; i++)
+ uds_request_queue_finish(index->zone_queues[i]);
+
+ free_chapter_writer(index->chapter_writer);
+
+ uds_free_volume_index(index->volume_index);
+ if (index->zones != NULL) {
+ for (i = 0; i < index->zone_count; i++)
+ free_index_zone(index->zones[i]);
+ vdo_free(index->zones);
+ }
+
+ uds_free_volume(index->volume);
+ uds_free_index_layout(vdo_forget(index->layout));
+ vdo_free(index);
+}
+
+/* Wait for the chapter writer to complete any outstanding writes. */
+void uds_wait_for_idle_index(struct uds_index *index)
+{
+ struct chapter_writer *writer = index->chapter_writer;
+
+ mutex_lock(&writer->mutex);
+ while (writer->zones_to_write > 0)
+ uds_wait_cond(&writer->cond, &writer->mutex);
+ mutex_unlock(&writer->mutex);
+}
+
+/* This function assumes that all requests have been drained. */
+int uds_save_index(struct uds_index *index)
+{
+ int result;
+
+ if (!index->need_to_save)
+ return UDS_SUCCESS;
+
+ uds_wait_for_idle_index(index);
+ index->prev_save = index->last_save;
+ index->last_save = ((index->newest_virtual_chapter == 0) ?
+ NO_LAST_SAVE : index->newest_virtual_chapter - 1);
+ vdo_log_info("beginning save (vcn %llu)", (unsigned long long) index->last_save);
+
+ result = uds_save_index_state(index->layout, index);
+ if (result != UDS_SUCCESS) {
+ vdo_log_info("save index failed");
+ index->last_save = index->prev_save;
+ } else {
+ index->has_saved_open_chapter = true;
+ index->need_to_save = false;
+ vdo_log_info("finished save (vcn %llu)",
+ (unsigned long long) index->last_save);
+ }
+
+ return result;
+}
+
+int uds_replace_index_storage(struct uds_index *index, struct block_device *bdev)
+{
+ return uds_replace_volume_storage(index->volume, index->layout, bdev);
+}
+
+/* Accessing statistics should be safe from any thread. */
+void uds_get_index_stats(struct uds_index *index, struct uds_index_stats *counters)
+{
+ struct volume_index_stats stats;
+
+ uds_get_volume_index_stats(index->volume_index, &stats);
+ counters->entries_indexed = stats.record_count;
+ counters->collisions = stats.collision_count;
+ counters->entries_discarded = stats.discard_count;
+
+ counters->memory_used = (index->volume_index->memory_size +
+ index->volume->cache_size +
+ index->chapter_writer->memory_size);
+}
+
+void uds_enqueue_request(struct uds_request *request, enum request_stage stage)
+{
+ struct uds_index *index = request->index;
+ struct uds_request_queue *queue;
+
+ switch (stage) {
+ case STAGE_TRIAGE:
+ if (index->triage_queue != NULL) {
+ queue = index->triage_queue;
+ break;
+ }
+
+ fallthrough;
+
+ case STAGE_INDEX:
+ request->zone_number =
+ uds_get_volume_index_zone(index->volume_index, &request->record_name);
+ fallthrough;
+
+ case STAGE_MESSAGE:
+ queue = index->zone_queues[request->zone_number];
+ break;
+
+ default:
+ VDO_ASSERT_LOG_ONLY(false, "invalid index stage: %d", stage);
+ return;
+ }
+
+ uds_request_queue_enqueue(queue, request);
+}
diff --git a/drivers/md/dm-vdo/indexer/index.h b/drivers/md/dm-vdo/indexer/index.h
new file mode 100644
index 000000000000..edabb239548e
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/index.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_INDEX_H
+#define UDS_INDEX_H
+
+#include "index-layout.h"
+#include "index-session.h"
+#include "open-chapter.h"
+#include "volume.h"
+#include "volume-index.h"
+
+/*
+ * The index is a high-level structure which represents the totality of the UDS index. It manages
+ * the queues for incoming requests and dispatches them to the appropriate sub-components like the
+ * volume or the volume index. It also manages administrative tasks such as saving and loading the
+ * index.
+ *
+ * The index is divided into a number of independent zones and assigns each request to a zone based
+ * on its name. Most sub-components are similarly divided into zones as well so that requests in
+ * each zone usually operate without interference or coordination between zones.
+ */
+
+typedef void (*index_callback_fn)(struct uds_request *request);
+
+struct index_zone {
+ struct uds_index *index;
+ struct open_chapter_zone *open_chapter;
+ struct open_chapter_zone *writing_chapter;
+ u64 oldest_virtual_chapter;
+ u64 newest_virtual_chapter;
+ unsigned int id;
+};
+
+struct uds_index {
+ bool has_saved_open_chapter;
+ bool need_to_save;
+ struct index_load_context *load_context;
+ struct index_layout *layout;
+ struct volume_index *volume_index;
+ struct volume *volume;
+ unsigned int zone_count;
+ struct index_zone **zones;
+
+ u64 oldest_virtual_chapter;
+ u64 newest_virtual_chapter;
+
+ u64 last_save;
+ u64 prev_save;
+ struct chapter_writer *chapter_writer;
+
+ index_callback_fn callback;
+ struct uds_request_queue *triage_queue;
+ struct uds_request_queue *zone_queues[];
+};
+
+enum request_stage {
+ STAGE_TRIAGE,
+ STAGE_INDEX,
+ STAGE_MESSAGE,
+};
+
+int __must_check uds_make_index(struct uds_configuration *config,
+ enum uds_open_index_type open_type,
+ struct index_load_context *load_context,
+ index_callback_fn callback, struct uds_index **new_index);
+
+int __must_check uds_save_index(struct uds_index *index);
+
+void uds_free_index(struct uds_index *index);
+
+int __must_check uds_replace_index_storage(struct uds_index *index,
+ struct block_device *bdev);
+
+void uds_get_index_stats(struct uds_index *index, struct uds_index_stats *counters);
+
+void uds_enqueue_request(struct uds_request *request, enum request_stage stage);
+
+void uds_wait_for_idle_index(struct uds_index *index);
+
+#endif /* UDS_INDEX_H */
diff --git a/drivers/md/dm-vdo/indexer/indexer.h b/drivers/md/dm-vdo/indexer/indexer.h
new file mode 100644
index 000000000000..3744aaf625b0
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/indexer.h
@@ -0,0 +1,353 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef INDEXER_H
+#define INDEXER_H
+
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include "funnel-queue.h"
+
+/*
+ * UDS public API
+ *
+ * The Universal Deduplication System (UDS) is an efficient name-value store. When used for
+ * deduplicating storage, the names are generally hashes of data blocks and the associated data is
+ * where that block is located on the underlying storage medium. The stored names are expected to
+ * be randomly distributed among the space of possible names. If this assumption is violated, the
+ * UDS index will store fewer names than normal but will otherwise continue to work. The data
+ * associated with each name can be any 16-byte value.
+ *
+ * A client must first create an index session to interact with an index. Once created, the session
+ * can be shared among multiple threads or users. When a session is destroyed, it will also close
+ * and save any associated index.
+ *
+ * To make a request, a client must allocate a uds_request structure and set the required fields
+ * before launching it. UDS will invoke the provided callback to complete the request. After the
+ * callback has been called, the uds_request structure can be freed or reused for a new request.
+ * There are five types of requests:
+ *
+ * A UDS_UPDATE request will associate the provided name with the provided data. Any previous data
+ * associated with that name will be discarded.
+ *
+ * A UDS_QUERY request will return the data associated with the provided name, if any. The entry
+ * for the name will also be marked as most recent, as if the data had been updated.
+ *
+ * A UDS_POST request is a combination of UDS_QUERY and UDS_UPDATE. If there is already data
+ * associated with the provided name, that data is returned. If there is no existing association,
+ * the name is associated with the newly provided data. This request is equivalent to a UDS_QUERY
+ * request followed by a UDS_UPDATE request if no data is found, but it is much more efficient.
+ *
+ * A UDS_QUERY_NO_UPDATE request will return the data associated with the provided name, but will
+ * not change the recency of the entry for the name. This request is primarily useful for testing,
+ * to determine whether an entry exists without changing the internal state of the index.
+ *
+ * A UDS_DELETE request removes any data associated with the provided name. This operation is
+ * generally not necessary, because the index will automatically discard its oldest entries once it
+ * becomes full.
+ */
+
+/* General UDS constants and structures */
+
+enum uds_request_type {
+ /* Create or update the mapping for a name, and make the name most recent. */
+ UDS_UPDATE,
+
+ /* Return any mapped data for a name, and make the name most recent. */
+ UDS_QUERY,
+
+ /*
+ * Return any mapped data for a name, or map the provided data to the name if there is no
+ * current data, and make the name most recent.
+ */
+ UDS_POST,
+
+ /* Return any mapped data for a name without updating its recency. */
+ UDS_QUERY_NO_UPDATE,
+
+ /* Remove any mapping for a name. */
+ UDS_DELETE,
+
+};
+
+enum uds_open_index_type {
+ /* Create a new index. */
+ UDS_CREATE,
+
+ /* Load an existing index and try to recover if necessary. */
+ UDS_LOAD,
+
+ /* Load an existing index, but only if it was saved cleanly. */
+ UDS_NO_REBUILD,
+};
+
+enum {
+ /* The record name size in bytes */
+ UDS_RECORD_NAME_SIZE = 16,
+ /* The maximum record data size in bytes */
+ UDS_RECORD_DATA_SIZE = 16,
+};
+
+/*
+ * A type representing a UDS memory configuration which is either a positive integer number of
+ * gigabytes or one of the six special constants for configurations smaller than one gigabyte.
+ */
+typedef int uds_memory_config_size_t;
+
+enum {
+ /* The maximum configurable amount of memory */
+ UDS_MEMORY_CONFIG_MAX = 1024,
+ /* Flag indicating that the index has one less chapter than usual */
+ UDS_MEMORY_CONFIG_REDUCED = 0x1000,
+ UDS_MEMORY_CONFIG_REDUCED_MAX = 1024 + UDS_MEMORY_CONFIG_REDUCED,
+ /* Special values indicating sizes less than 1 GB */
+ UDS_MEMORY_CONFIG_256MB = -256,
+ UDS_MEMORY_CONFIG_512MB = -512,
+ UDS_MEMORY_CONFIG_768MB = -768,
+ UDS_MEMORY_CONFIG_REDUCED_256MB = -1280,
+ UDS_MEMORY_CONFIG_REDUCED_512MB = -1536,
+ UDS_MEMORY_CONFIG_REDUCED_768MB = -1792,
+};
+
+struct uds_record_name {
+ unsigned char name[UDS_RECORD_NAME_SIZE];
+};
+
+struct uds_record_data {
+ unsigned char data[UDS_RECORD_DATA_SIZE];
+};
+
+struct uds_volume_record {
+ struct uds_record_name name;
+ struct uds_record_data data;
+};
+
+struct uds_parameters {
+ /* The block_device used for storage */
+ struct block_device *bdev;
+ /* The maximum allowable size of the index on storage */
+ size_t size;
+ /* The offset where the index should start */
+ off_t offset;
+ /* The maximum memory allocation, in GB */
+ uds_memory_config_size_t memory_size;
+ /* Whether the index should include sparse chapters */
+ bool sparse;
+ /* A 64-bit nonce to validate the index */
+ u64 nonce;
+ /* The number of threads used to process index requests */
+ unsigned int zone_count;
+ /* The number of threads used to read volume pages */
+ unsigned int read_threads;
+};
+
+/*
+ * These statistics capture characteristics of the current index, including resource usage and
+ * requests processed since the index was opened.
+ */
+struct uds_index_stats {
+ /* The total number of records stored in the index */
+ u64 entries_indexed;
+ /* An estimate of the index's memory usage, in bytes */
+ u64 memory_used;
+ /* The number of collisions recorded in the volume index */
+ u64 collisions;
+ /* The number of entries discarded from the index since startup */
+ u64 entries_discarded;
+ /* The time at which these statistics were fetched */
+ s64 current_time;
+ /* The number of post calls that found an existing entry */
+ u64 posts_found;
+ /* The number of post calls that added an entry */
+ u64 posts_not_found;
+ /*
+ * The number of post calls that found an existing entry that is current enough to only
+ * exist in memory and not have been committed to disk yet
+ */
+ u64 in_memory_posts_found;
+ /*
+ * The number of post calls that found an existing entry in the dense portion of the index
+ */
+ u64 dense_posts_found;
+ /*
+ * The number of post calls that found an existing entry in the sparse portion of the index
+ */
+ u64 sparse_posts_found;
+ /* The number of update calls that updated an existing entry */
+ u64 updates_found;
+ /* The number of update calls that added a new entry */
+ u64 updates_not_found;
+ /* The number of delete requests that deleted an existing entry */
+ u64 deletions_found;
+ /* The number of delete requests that did nothing */
+ u64 deletions_not_found;
+ /* The number of query calls that found existing entry */
+ u64 queries_found;
+ /* The number of query calls that did not find an entry */
+ u64 queries_not_found;
+ /* The total number of requests processed */
+ u64 requests;
+};
+
+enum uds_index_region {
+ /* No location information has been determined */
+ UDS_LOCATION_UNKNOWN = 0,
+ /* The index page entry has been found */
+ UDS_LOCATION_INDEX_PAGE_LOOKUP,
+ /* The record page entry has been found */
+ UDS_LOCATION_RECORD_PAGE_LOOKUP,
+ /* The record is not in the index */
+ UDS_LOCATION_UNAVAILABLE,
+ /* The record was found in the open chapter */
+ UDS_LOCATION_IN_OPEN_CHAPTER,
+ /* The record was found in the dense part of the index */
+ UDS_LOCATION_IN_DENSE,
+ /* The record was found in the sparse part of the index */
+ UDS_LOCATION_IN_SPARSE,
+} __packed;
+
+/* Zone message requests are used to communicate between index zones. */
+enum uds_zone_message_type {
+ /* A standard request with no message */
+ UDS_MESSAGE_NONE = 0,
+ /* Add a chapter to the sparse chapter index cache */
+ UDS_MESSAGE_SPARSE_CACHE_BARRIER,
+ /* Close a chapter to keep the zone from falling behind */
+ UDS_MESSAGE_ANNOUNCE_CHAPTER_CLOSED,
+} __packed;
+
+struct uds_zone_message {
+ /* The type of message, determining how it will be processed */
+ enum uds_zone_message_type type;
+ /* The virtual chapter number to which the message applies */
+ u64 virtual_chapter;
+};
+
+struct uds_index_session;
+struct uds_index;
+struct uds_request;
+
+/* Once this callback has been invoked, the uds_request structure can be reused or freed. */
+typedef void (*uds_request_callback_fn)(struct uds_request *request);
+
+struct uds_request {
+ /* These input fields must be set before launching a request. */
+
+ /* The name of the record to look up or create */
+ struct uds_record_name record_name;
+ /* New data to associate with the record name, if applicable */
+ struct uds_record_data new_metadata;
+ /* A callback to invoke when the request is complete */
+ uds_request_callback_fn callback;
+ /* The index session that will manage this request */
+ struct uds_index_session *session;
+ /* The type of operation to perform, as describe above */
+ enum uds_request_type type;
+
+ /* These output fields are set when a request is complete. */
+
+ /* The existing data associated with the request name, if any */
+ struct uds_record_data old_metadata;
+ /* Either UDS_SUCCESS or an error code for the request */
+ int status;
+ /* True if the record name had an existing entry in the index */
+ bool found;
+
+ /*
+ * The remaining fields are used internally and should not be altered by clients. The index
+ * relies on zone_number being the first field in this section.
+ */
+
+ /* The number of the zone which will process this request*/
+ unsigned int zone_number;
+ /* A link for adding a request to a lock-free queue */
+ struct funnel_queue_entry queue_link;
+ /* A link for adding a request to a standard linked list */
+ struct uds_request *next_request;
+ /* A pointer to the index processing this request */
+ struct uds_index *index;
+ /* Control message for coordinating between zones */
+ struct uds_zone_message zone_message;
+ /* If true, process request immediately by waking the worker thread */
+ bool unbatched;
+ /* If true, continue this request before processing newer requests */
+ bool requeued;
+ /* The virtual chapter containing the record name, if known */
+ u64 virtual_chapter;
+ /* The region of the index containing the record name */
+ enum uds_index_region location;
+};
+
+/* Compute the number of bytes needed to store an index. */
+int __must_check uds_compute_index_size(const struct uds_parameters *parameters,
+ u64 *index_size);
+
+/* A session is required for most index operations. */
+int __must_check uds_create_index_session(struct uds_index_session **session);
+
+/* Destroying an index session also closes and saves the associated index. */
+int uds_destroy_index_session(struct uds_index_session *session);
+
+/*
+ * Create or open an index with an existing session. This operation fails if the index session is
+ * suspended, or if there is already an open index.
+ */
+int __must_check uds_open_index(enum uds_open_index_type open_type,
+ const struct uds_parameters *parameters,
+ struct uds_index_session *session);
+
+/*
+ * Wait until all callbacks for index operations are complete, and prevent new index operations
+ * from starting. New index operations will fail with EBUSY until the session is resumed. Also
+ * optionally saves the index.
+ */
+int __must_check uds_suspend_index_session(struct uds_index_session *session, bool save);
+
+/*
+ * Allow new index operations for an index, whether it was suspended or not. If the index is
+ * suspended and the supplied block device differs from the current backing store, the index will
+ * start using the new backing store instead.
+ */
+int __must_check uds_resume_index_session(struct uds_index_session *session,
+ struct block_device *bdev);
+
+/* Wait until all outstanding index operations are complete. */
+int __must_check uds_flush_index_session(struct uds_index_session *session);
+
+/* Close an index. This operation fails if the index session is suspended. */
+int __must_check uds_close_index(struct uds_index_session *session);
+
+/* Get index statistics since the last time the index was opened. */
+int __must_check uds_get_index_session_stats(struct uds_index_session *session,
+ struct uds_index_stats *stats);
+
+/* This function will fail if any required field of the request is not set. */
+int __must_check uds_launch_request(struct uds_request *request);
+
+struct cond_var {
+ wait_queue_head_t wait_queue;
+};
+
+static inline void uds_init_cond(struct cond_var *cv)
+{
+ init_waitqueue_head(&cv->wait_queue);
+}
+
+static inline void uds_signal_cond(struct cond_var *cv)
+{
+ wake_up(&cv->wait_queue);
+}
+
+static inline void uds_broadcast_cond(struct cond_var *cv)
+{
+ wake_up_all(&cv->wait_queue);
+}
+
+void uds_wait_cond(struct cond_var *cv, struct mutex *mutex);
+
+#endif /* INDEXER_H */
diff --git a/drivers/md/dm-vdo/indexer/io-factory.c b/drivers/md/dm-vdo/indexer/io-factory.c
new file mode 100644
index 000000000000..515765d35794
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/io-factory.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "io-factory.h"
+
+#include <linux/atomic.h>
+#include <linux/blkdev.h>
+#include <linux/err.h>
+#include <linux/mount.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "numeric.h"
+
+/*
+ * The I/O factory object manages access to index storage, which is a contiguous range of blocks on
+ * a block device.
+ *
+ * The factory holds the open device and is responsible for closing it. The factory has methods to
+ * make helper structures that can be used to access sections of the index.
+ */
+struct io_factory {
+ struct block_device *bdev;
+ atomic_t ref_count;
+};
+
+/* The buffered reader allows efficient I/O by reading page-sized segments into a buffer. */
+struct buffered_reader {
+ struct io_factory *factory;
+ struct dm_bufio_client *client;
+ struct dm_buffer *buffer;
+ sector_t limit;
+ sector_t block_number;
+ u8 *start;
+ u8 *end;
+};
+
+#define MAX_READ_AHEAD_BLOCKS 4
+
+/*
+ * The buffered writer allows efficient I/O by buffering writes and committing page-sized segments
+ * to storage.
+ */
+struct buffered_writer {
+ struct io_factory *factory;
+ struct dm_bufio_client *client;
+ struct dm_buffer *buffer;
+ sector_t limit;
+ sector_t block_number;
+ u8 *start;
+ u8 *end;
+ int error;
+};
+
+static void uds_get_io_factory(struct io_factory *factory)
+{
+ atomic_inc(&factory->ref_count);
+}
+
+int uds_make_io_factory(struct block_device *bdev, struct io_factory **factory_ptr)
+{
+ int result;
+ struct io_factory *factory;
+
+ result = vdo_allocate(1, struct io_factory, __func__, &factory);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ factory->bdev = bdev;
+ atomic_set_release(&factory->ref_count, 1);
+
+ *factory_ptr = factory;
+ return UDS_SUCCESS;
+}
+
+int uds_replace_storage(struct io_factory *factory, struct block_device *bdev)
+{
+ factory->bdev = bdev;
+ return UDS_SUCCESS;
+}
+
+/* Free an I/O factory once all references have been released. */
+void uds_put_io_factory(struct io_factory *factory)
+{
+ if (atomic_add_return(-1, &factory->ref_count) <= 0)
+ vdo_free(factory);
+}
+
+size_t uds_get_writable_size(struct io_factory *factory)
+{
+ return i_size_read(factory->bdev->bd_inode);
+}
+
+/* Create a struct dm_bufio_client for an index region starting at offset. */
+int uds_make_bufio(struct io_factory *factory, off_t block_offset, size_t block_size,
+ unsigned int reserved_buffers, struct dm_bufio_client **client_ptr)
+{
+ struct dm_bufio_client *client;
+
+ client = dm_bufio_client_create(factory->bdev, block_size, reserved_buffers, 0,
+ NULL, NULL, 0);
+ if (IS_ERR(client))
+ return -PTR_ERR(client);
+
+ dm_bufio_set_sector_offset(client, block_offset * SECTORS_PER_BLOCK);
+ *client_ptr = client;
+ return UDS_SUCCESS;
+}
+
+static void read_ahead(struct buffered_reader *reader, sector_t block_number)
+{
+ if (block_number < reader->limit) {
+ sector_t read_ahead = min((sector_t) MAX_READ_AHEAD_BLOCKS,
+ reader->limit - block_number);
+
+ dm_bufio_prefetch(reader->client, block_number, read_ahead);
+ }
+}
+
+void uds_free_buffered_reader(struct buffered_reader *reader)
+{
+ if (reader == NULL)
+ return;
+
+ if (reader->buffer != NULL)
+ dm_bufio_release(reader->buffer);
+
+ dm_bufio_client_destroy(reader->client);
+ uds_put_io_factory(reader->factory);
+ vdo_free(reader);
+}
+
+/* Create a buffered reader for an index region starting at offset. */
+int uds_make_buffered_reader(struct io_factory *factory, off_t offset, u64 block_count,
+ struct buffered_reader **reader_ptr)
+{
+ int result;
+ struct dm_bufio_client *client = NULL;
+ struct buffered_reader *reader = NULL;
+
+ result = uds_make_bufio(factory, offset, UDS_BLOCK_SIZE, 1, &client);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = vdo_allocate(1, struct buffered_reader, "buffered reader", &reader);
+ if (result != VDO_SUCCESS) {
+ dm_bufio_client_destroy(client);
+ return result;
+ }
+
+ *reader = (struct buffered_reader) {
+ .factory = factory,
+ .client = client,
+ .buffer = NULL,
+ .limit = block_count,
+ .block_number = 0,
+ .start = NULL,
+ .end = NULL,
+ };
+
+ read_ahead(reader, 0);
+ uds_get_io_factory(factory);
+ *reader_ptr = reader;
+ return UDS_SUCCESS;
+}
+
+static int position_reader(struct buffered_reader *reader, sector_t block_number,
+ off_t offset)
+{
+ struct dm_buffer *buffer = NULL;
+ void *data;
+
+ if ((reader->end == NULL) || (block_number != reader->block_number)) {
+ if (block_number >= reader->limit)
+ return UDS_OUT_OF_RANGE;
+
+ if (reader->buffer != NULL)
+ dm_bufio_release(vdo_forget(reader->buffer));
+
+ data = dm_bufio_read(reader->client, block_number, &buffer);
+ if (IS_ERR(data))
+ return -PTR_ERR(data);
+
+ reader->buffer = buffer;
+ reader->start = data;
+ if (block_number == reader->block_number + 1)
+ read_ahead(reader, block_number + 1);
+ }
+
+ reader->block_number = block_number;
+ reader->end = reader->start + offset;
+ return UDS_SUCCESS;
+}
+
+static size_t bytes_remaining_in_read_buffer(struct buffered_reader *reader)
+{
+ return (reader->end == NULL) ? 0 : reader->start + UDS_BLOCK_SIZE - reader->end;
+}
+
+static int reset_reader(struct buffered_reader *reader)
+{
+ sector_t block_number;
+
+ if (bytes_remaining_in_read_buffer(reader) > 0)
+ return UDS_SUCCESS;
+
+ block_number = reader->block_number;
+ if (reader->end != NULL)
+ block_number++;
+
+ return position_reader(reader, block_number, 0);
+}
+
+int uds_read_from_buffered_reader(struct buffered_reader *reader, u8 *data,
+ size_t length)
+{
+ int result = UDS_SUCCESS;
+ size_t chunk_size;
+
+ while (length > 0) {
+ result = reset_reader(reader);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ chunk_size = min(length, bytes_remaining_in_read_buffer(reader));
+ memcpy(data, reader->end, chunk_size);
+ length -= chunk_size;
+ data += chunk_size;
+ reader->end += chunk_size;
+ }
+
+ return UDS_SUCCESS;
+}
+
+/*
+ * Verify that the next data on the reader matches the required value. If the value matches, the
+ * matching contents are consumed. If the value does not match, the reader state is unchanged.
+ */
+int uds_verify_buffered_data(struct buffered_reader *reader, const u8 *value,
+ size_t length)
+{
+ int result = UDS_SUCCESS;
+ size_t chunk_size;
+ sector_t start_block_number = reader->block_number;
+ int start_offset = reader->end - reader->start;
+
+ while (length > 0) {
+ result = reset_reader(reader);
+ if (result != UDS_SUCCESS) {
+ result = UDS_CORRUPT_DATA;
+ break;
+ }
+
+ chunk_size = min(length, bytes_remaining_in_read_buffer(reader));
+ if (memcmp(value, reader->end, chunk_size) != 0) {
+ result = UDS_CORRUPT_DATA;
+ break;
+ }
+
+ length -= chunk_size;
+ value += chunk_size;
+ reader->end += chunk_size;
+ }
+
+ if (result != UDS_SUCCESS)
+ position_reader(reader, start_block_number, start_offset);
+
+ return result;
+}
+
+/* Create a buffered writer for an index region starting at offset. */
+int uds_make_buffered_writer(struct io_factory *factory, off_t offset, u64 block_count,
+ struct buffered_writer **writer_ptr)
+{
+ int result;
+ struct dm_bufio_client *client = NULL;
+ struct buffered_writer *writer;
+
+ result = uds_make_bufio(factory, offset, UDS_BLOCK_SIZE, 1, &client);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = vdo_allocate(1, struct buffered_writer, "buffered writer", &writer);
+ if (result != VDO_SUCCESS) {
+ dm_bufio_client_destroy(client);
+ return result;
+ }
+
+ *writer = (struct buffered_writer) {
+ .factory = factory,
+ .client = client,
+ .buffer = NULL,
+ .limit = block_count,
+ .start = NULL,
+ .end = NULL,
+ .block_number = 0,
+ .error = UDS_SUCCESS,
+ };
+
+ uds_get_io_factory(factory);
+ *writer_ptr = writer;
+ return UDS_SUCCESS;
+}
+
+static size_t get_remaining_write_space(struct buffered_writer *writer)
+{
+ return writer->start + UDS_BLOCK_SIZE - writer->end;
+}
+
+static int __must_check prepare_next_buffer(struct buffered_writer *writer)
+{
+ struct dm_buffer *buffer = NULL;
+ void *data;
+
+ if (writer->block_number >= writer->limit) {
+ writer->error = UDS_OUT_OF_RANGE;
+ return UDS_OUT_OF_RANGE;
+ }
+
+ data = dm_bufio_new(writer->client, writer->block_number, &buffer);
+ if (IS_ERR(data)) {
+ writer->error = -PTR_ERR(data);
+ return writer->error;
+ }
+
+ writer->buffer = buffer;
+ writer->start = data;
+ writer->end = data;
+ return UDS_SUCCESS;
+}
+
+static int flush_previous_buffer(struct buffered_writer *writer)
+{
+ size_t available;
+
+ if (writer->buffer == NULL)
+ return writer->error;
+
+ if (writer->error == UDS_SUCCESS) {
+ available = get_remaining_write_space(writer);
+
+ if (available > 0)
+ memset(writer->end, 0, available);
+
+ dm_bufio_mark_buffer_dirty(writer->buffer);
+ }
+
+ dm_bufio_release(writer->buffer);
+ writer->buffer = NULL;
+ writer->start = NULL;
+ writer->end = NULL;
+ writer->block_number++;
+ return writer->error;
+}
+
+void uds_free_buffered_writer(struct buffered_writer *writer)
+{
+ int result;
+
+ if (writer == NULL)
+ return;
+
+ flush_previous_buffer(writer);
+ result = -dm_bufio_write_dirty_buffers(writer->client);
+ if (result != UDS_SUCCESS)
+ vdo_log_warning_strerror(result, "%s: failed to sync storage", __func__);
+
+ dm_bufio_client_destroy(writer->client);
+ uds_put_io_factory(writer->factory);
+ vdo_free(writer);
+}
+
+/*
+ * Append data to the buffer, writing as needed. If no data is provided, zeros are written instead.
+ * If a write error occurs, it is recorded and returned on every subsequent write attempt.
+ */
+int uds_write_to_buffered_writer(struct buffered_writer *writer, const u8 *data,
+ size_t length)
+{
+ int result = writer->error;
+ size_t chunk_size;
+
+ while ((length > 0) && (result == UDS_SUCCESS)) {
+ if (writer->buffer == NULL) {
+ result = prepare_next_buffer(writer);
+ continue;
+ }
+
+ chunk_size = min(length, get_remaining_write_space(writer));
+ if (data == NULL) {
+ memset(writer->end, 0, chunk_size);
+ } else {
+ memcpy(writer->end, data, chunk_size);
+ data += chunk_size;
+ }
+
+ length -= chunk_size;
+ writer->end += chunk_size;
+
+ if (get_remaining_write_space(writer) == 0)
+ result = uds_flush_buffered_writer(writer);
+ }
+
+ return result;
+}
+
+int uds_flush_buffered_writer(struct buffered_writer *writer)
+{
+ if (writer->error != UDS_SUCCESS)
+ return writer->error;
+
+ return flush_previous_buffer(writer);
+}
diff --git a/drivers/md/dm-vdo/indexer/io-factory.h b/drivers/md/dm-vdo/indexer/io-factory.h
new file mode 100644
index 000000000000..7fb5a0616a79
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/io-factory.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_IO_FACTORY_H
+#define UDS_IO_FACTORY_H
+
+#include <linux/dm-bufio.h>
+
+/*
+ * The I/O factory manages all low-level I/O operations to the underlying storage device. Its main
+ * clients are the index layout and the volume. The buffered reader and buffered writer interfaces
+ * are helpers for accessing data in a contiguous range of storage blocks.
+ */
+
+struct buffered_reader;
+struct buffered_writer;
+
+struct io_factory;
+
+enum {
+ UDS_BLOCK_SIZE = 4096,
+ SECTORS_PER_BLOCK = UDS_BLOCK_SIZE >> SECTOR_SHIFT,
+};
+
+int __must_check uds_make_io_factory(struct block_device *bdev,
+ struct io_factory **factory_ptr);
+
+int __must_check uds_replace_storage(struct io_factory *factory,
+ struct block_device *bdev);
+
+void uds_put_io_factory(struct io_factory *factory);
+
+size_t __must_check uds_get_writable_size(struct io_factory *factory);
+
+int __must_check uds_make_bufio(struct io_factory *factory, off_t block_offset,
+ size_t block_size, unsigned int reserved_buffers,
+ struct dm_bufio_client **client_ptr);
+
+int __must_check uds_make_buffered_reader(struct io_factory *factory, off_t offset,
+ u64 block_count,
+ struct buffered_reader **reader_ptr);
+
+void uds_free_buffered_reader(struct buffered_reader *reader);
+
+int __must_check uds_read_from_buffered_reader(struct buffered_reader *reader, u8 *data,
+ size_t length);
+
+int __must_check uds_verify_buffered_data(struct buffered_reader *reader, const u8 *value,
+ size_t length);
+
+int __must_check uds_make_buffered_writer(struct io_factory *factory, off_t offset,
+ u64 block_count,
+ struct buffered_writer **writer_ptr);
+
+void uds_free_buffered_writer(struct buffered_writer *buffer);
+
+int __must_check uds_write_to_buffered_writer(struct buffered_writer *writer,
+ const u8 *data, size_t length);
+
+int __must_check uds_flush_buffered_writer(struct buffered_writer *writer);
+
+#endif /* UDS_IO_FACTORY_H */
diff --git a/drivers/md/dm-vdo/indexer/open-chapter.c b/drivers/md/dm-vdo/indexer/open-chapter.c
new file mode 100644
index 000000000000..4a67bcadaae0
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/open-chapter.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "open-chapter.h"
+
+#include <linux/log2.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "numeric.h"
+#include "permassert.h"
+
+#include "config.h"
+#include "hash-utils.h"
+
+/*
+ * Each index zone has a dedicated open chapter zone structure which gets an equal share of the
+ * open chapter space. Records are assigned to zones based on their record name. Within each zone,
+ * records are stored in an array in the order they arrive. Additionally, a reference to each
+ * record is stored in a hash table to help determine if a new record duplicates an existing one.
+ * If new metadata for an existing name arrives, the record is altered in place. The array of
+ * records is 1-based so that record number 0 can be used to indicate an unused hash slot.
+ *
+ * Deleted records are marked with a flag rather than actually removed to simplify hash table
+ * management. The array of deleted flags overlays the array of hash slots, but the flags are
+ * indexed by record number instead of by record name. The number of hash slots will always be a
+ * power of two that is greater than the number of records to be indexed, guaranteeing that hash
+ * insertion cannot fail, and that there are sufficient flags for all records.
+ *
+ * Once any open chapter zone fills its available space, the chapter is closed. The records from
+ * each zone are interleaved to attempt to preserve temporal locality and assigned to record pages.
+ * Empty or deleted records are replaced by copies of a valid record so that the record pages only
+ * contain valid records. The chapter then constructs a delta index which maps each record name to
+ * the record page on which that record can be found, which is split into index pages. These
+ * structures are then passed to the volume to be recorded on storage.
+ *
+ * When the index is saved, the open chapter records are saved in a single array, once again
+ * interleaved to attempt to preserve temporal locality. When the index is reloaded, there may be a
+ * different number of zones than previously, so the records must be parcelled out to their new
+ * zones. In addition, depending on the distribution of record names, a new zone may have more
+ * records than it has space. In this case, the latest records for that zone will be discarded.
+ */
+
+static const u8 OPEN_CHAPTER_MAGIC[] = "ALBOC";
+static const u8 OPEN_CHAPTER_VERSION[] = "02.00";
+
+#define OPEN_CHAPTER_MAGIC_LENGTH (sizeof(OPEN_CHAPTER_MAGIC) - 1)
+#define OPEN_CHAPTER_VERSION_LENGTH (sizeof(OPEN_CHAPTER_VERSION) - 1)
+#define LOAD_RATIO 2
+
+static inline size_t records_size(const struct open_chapter_zone *open_chapter)
+{
+ return sizeof(struct uds_volume_record) * (1 + open_chapter->capacity);
+}
+
+static inline size_t slots_size(size_t slot_count)
+{
+ return sizeof(struct open_chapter_zone_slot) * slot_count;
+}
+
+int uds_make_open_chapter(const struct index_geometry *geometry, unsigned int zone_count,
+ struct open_chapter_zone **open_chapter_ptr)
+{
+ int result;
+ struct open_chapter_zone *open_chapter;
+ size_t capacity = geometry->records_per_chapter / zone_count;
+ size_t slot_count = (1 << bits_per(capacity * LOAD_RATIO));
+
+ result = vdo_allocate_extended(struct open_chapter_zone, slot_count,
+ struct open_chapter_zone_slot, "open chapter",
+ &open_chapter);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ open_chapter->slot_count = slot_count;
+ open_chapter->capacity = capacity;
+ result = vdo_allocate_cache_aligned(records_size(open_chapter), "record pages",
+ &open_chapter->records);
+ if (result != VDO_SUCCESS) {
+ uds_free_open_chapter(open_chapter);
+ return result;
+ }
+
+ *open_chapter_ptr = open_chapter;
+ return UDS_SUCCESS;
+}
+
+void uds_reset_open_chapter(struct open_chapter_zone *open_chapter)
+{
+ open_chapter->size = 0;
+ open_chapter->deletions = 0;
+
+ memset(open_chapter->records, 0, records_size(open_chapter));
+ memset(open_chapter->slots, 0, slots_size(open_chapter->slot_count));
+}
+
+static unsigned int probe_chapter_slots(struct open_chapter_zone *open_chapter,
+ const struct uds_record_name *name)
+{
+ struct uds_volume_record *record;
+ unsigned int slot_count = open_chapter->slot_count;
+ unsigned int slot = uds_name_to_hash_slot(name, slot_count);
+ unsigned int record_number;
+ unsigned int attempts = 1;
+
+ while (true) {
+ record_number = open_chapter->slots[slot].record_number;
+
+ /*
+ * If the hash slot is empty, we've reached the end of a chain without finding the
+ * record and should terminate the search.
+ */
+ if (record_number == 0)
+ return slot;
+
+ /*
+ * If the name of the record referenced by the slot matches and has not been
+ * deleted, then we've found the requested name.
+ */
+ record = &open_chapter->records[record_number];
+ if ((memcmp(&record->name, name, UDS_RECORD_NAME_SIZE) == 0) &&
+ !open_chapter->slots[record_number].deleted)
+ return slot;
+
+ /*
+ * Quadratic probing: advance the probe by 1, 2, 3, etc. and try again. This
+ * performs better than linear probing and works best for 2^N slots.
+ */
+ slot = (slot + attempts++) % slot_count;
+ }
+}
+
+void uds_search_open_chapter(struct open_chapter_zone *open_chapter,
+ const struct uds_record_name *name,
+ struct uds_record_data *metadata, bool *found)
+{
+ unsigned int slot;
+ unsigned int record_number;
+
+ slot = probe_chapter_slots(open_chapter, name);
+ record_number = open_chapter->slots[slot].record_number;
+ if (record_number == 0) {
+ *found = false;
+ } else {
+ *found = true;
+ *metadata = open_chapter->records[record_number].data;
+ }
+}
+
+/* Add a record to the open chapter zone and return the remaining space. */
+int uds_put_open_chapter(struct open_chapter_zone *open_chapter,
+ const struct uds_record_name *name,
+ const struct uds_record_data *metadata)
+{
+ unsigned int slot;
+ unsigned int record_number;
+ struct uds_volume_record *record;
+
+ if (open_chapter->size >= open_chapter->capacity)
+ return 0;
+
+ slot = probe_chapter_slots(open_chapter, name);
+ record_number = open_chapter->slots[slot].record_number;
+
+ if (record_number == 0) {
+ record_number = ++open_chapter->size;
+ open_chapter->slots[slot].record_number = record_number;
+ }
+
+ record = &open_chapter->records[record_number];
+ record->name = *name;
+ record->data = *metadata;
+
+ return open_chapter->capacity - open_chapter->size;
+}
+
+void uds_remove_from_open_chapter(struct open_chapter_zone *open_chapter,
+ const struct uds_record_name *name)
+{
+ unsigned int slot;
+ unsigned int record_number;
+
+ slot = probe_chapter_slots(open_chapter, name);
+ record_number = open_chapter->slots[slot].record_number;
+
+ if (record_number > 0) {
+ open_chapter->slots[record_number].deleted = true;
+ open_chapter->deletions += 1;
+ }
+}
+
+void uds_free_open_chapter(struct open_chapter_zone *open_chapter)
+{
+ if (open_chapter != NULL) {
+ vdo_free(open_chapter->records);
+ vdo_free(open_chapter);
+ }
+}
+
+/* Map each record name to its record page number in the delta chapter index. */
+static int fill_delta_chapter_index(struct open_chapter_zone **chapter_zones,
+ unsigned int zone_count,
+ struct open_chapter_index *index,
+ struct uds_volume_record *collated_records)
+{
+ int result;
+ unsigned int records_per_chapter;
+ unsigned int records_per_page;
+ unsigned int record_index;
+ unsigned int records = 0;
+ u32 page_number;
+ unsigned int z;
+ int overflow_count = 0;
+ struct uds_volume_record *fill_record = NULL;
+
+ /*
+ * The record pages should not have any empty space, so find a record with which to fill
+ * the chapter zone if it was closed early, and also to replace any deleted records. The
+ * last record in any filled zone is guaranteed to not have been deleted, so use one of
+ * those.
+ */
+ for (z = 0; z < zone_count; z++) {
+ struct open_chapter_zone *zone = chapter_zones[z];
+
+ if (zone->size == zone->capacity) {
+ fill_record = &zone->records[zone->size];
+ break;
+ }
+ }
+
+ records_per_chapter = index->geometry->records_per_chapter;
+ records_per_page = index->geometry->records_per_page;
+
+ for (records = 0; records < records_per_chapter; records++) {
+ struct uds_volume_record *record = &collated_records[records];
+ struct open_chapter_zone *open_chapter;
+
+ /* The record arrays in the zones are 1-based. */
+ record_index = 1 + (records / zone_count);
+ page_number = records / records_per_page;
+ open_chapter = chapter_zones[records % zone_count];
+
+ /* Use the fill record in place of an unused record. */
+ if (record_index > open_chapter->size ||
+ open_chapter->slots[record_index].deleted) {
+ *record = *fill_record;
+ continue;
+ }
+
+ *record = open_chapter->records[record_index];
+ result = uds_put_open_chapter_index_record(index, &record->name,
+ page_number);
+ switch (result) {
+ case UDS_SUCCESS:
+ break;
+ case UDS_OVERFLOW:
+ overflow_count++;
+ break;
+ default:
+ vdo_log_error_strerror(result,
+ "failed to build open chapter index");
+ return result;
+ }
+ }
+
+ if (overflow_count > 0)
+ vdo_log_warning("Failed to add %d entries to chapter index",
+ overflow_count);
+
+ return UDS_SUCCESS;
+}
+
+int uds_close_open_chapter(struct open_chapter_zone **chapter_zones,
+ unsigned int zone_count, struct volume *volume,
+ struct open_chapter_index *chapter_index,
+ struct uds_volume_record *collated_records,
+ u64 virtual_chapter_number)
+{
+ int result;
+
+ uds_empty_open_chapter_index(chapter_index, virtual_chapter_number);
+ result = fill_delta_chapter_index(chapter_zones, zone_count, chapter_index,
+ collated_records);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ return uds_write_chapter(volume, chapter_index, collated_records);
+}
+
+int uds_save_open_chapter(struct uds_index *index, struct buffered_writer *writer)
+{
+ int result;
+ struct open_chapter_zone *open_chapter;
+ struct uds_volume_record *record;
+ u8 record_count_data[sizeof(u32)];
+ u32 record_count = 0;
+ unsigned int record_index;
+ unsigned int z;
+
+ result = uds_write_to_buffered_writer(writer, OPEN_CHAPTER_MAGIC,
+ OPEN_CHAPTER_MAGIC_LENGTH);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = uds_write_to_buffered_writer(writer, OPEN_CHAPTER_VERSION,
+ OPEN_CHAPTER_VERSION_LENGTH);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ for (z = 0; z < index->zone_count; z++) {
+ open_chapter = index->zones[z]->open_chapter;
+ record_count += open_chapter->size - open_chapter->deletions;
+ }
+
+ put_unaligned_le32(record_count, record_count_data);
+ result = uds_write_to_buffered_writer(writer, record_count_data,
+ sizeof(record_count_data));
+ if (result != UDS_SUCCESS)
+ return result;
+
+ record_index = 1;
+ while (record_count > 0) {
+ for (z = 0; z < index->zone_count; z++) {
+ open_chapter = index->zones[z]->open_chapter;
+ if (record_index > open_chapter->size)
+ continue;
+
+ if (open_chapter->slots[record_index].deleted)
+ continue;
+
+ record = &open_chapter->records[record_index];
+ result = uds_write_to_buffered_writer(writer, (u8 *) record,
+ sizeof(*record));
+ if (result != UDS_SUCCESS)
+ return result;
+
+ record_count--;
+ }
+
+ record_index++;
+ }
+
+ return uds_flush_buffered_writer(writer);
+}
+
+u64 uds_compute_saved_open_chapter_size(struct index_geometry *geometry)
+{
+ unsigned int records_per_chapter = geometry->records_per_chapter;
+
+ return OPEN_CHAPTER_MAGIC_LENGTH + OPEN_CHAPTER_VERSION_LENGTH + sizeof(u32) +
+ records_per_chapter * sizeof(struct uds_volume_record);
+}
+
+static int load_version20(struct uds_index *index, struct buffered_reader *reader)
+{
+ int result;
+ u32 record_count;
+ u8 record_count_data[sizeof(u32)];
+ struct uds_volume_record record;
+
+ /*
+ * Track which zones cannot accept any more records. If the open chapter had a different
+ * number of zones previously, some new zones may have more records than they have space
+ * for. These overflow records will be discarded.
+ */
+ bool full_flags[MAX_ZONES] = {
+ false,
+ };
+
+ result = uds_read_from_buffered_reader(reader, (u8 *) &record_count_data,
+ sizeof(record_count_data));
+ if (result != UDS_SUCCESS)
+ return result;
+
+ record_count = get_unaligned_le32(record_count_data);
+ while (record_count-- > 0) {
+ unsigned int zone = 0;
+
+ result = uds_read_from_buffered_reader(reader, (u8 *) &record,
+ sizeof(record));
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (index->zone_count > 1)
+ zone = uds_get_volume_index_zone(index->volume_index,
+ &record.name);
+
+ if (!full_flags[zone]) {
+ struct open_chapter_zone *open_chapter;
+ unsigned int remaining;
+
+ open_chapter = index->zones[zone]->open_chapter;
+ remaining = uds_put_open_chapter(open_chapter, &record.name,
+ &record.data);
+ /* Do not allow any zone to fill completely. */
+ full_flags[zone] = (remaining <= 1);
+ }
+ }
+
+ return UDS_SUCCESS;
+}
+
+int uds_load_open_chapter(struct uds_index *index, struct buffered_reader *reader)
+{
+ u8 version[OPEN_CHAPTER_VERSION_LENGTH];
+ int result;
+
+ result = uds_verify_buffered_data(reader, OPEN_CHAPTER_MAGIC,
+ OPEN_CHAPTER_MAGIC_LENGTH);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = uds_read_from_buffered_reader(reader, version, sizeof(version));
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (memcmp(OPEN_CHAPTER_VERSION, version, sizeof(version)) != 0) {
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "Invalid open chapter version: %.*s",
+ (int) sizeof(version), version);
+ }
+
+ return load_version20(index, reader);
+}
diff --git a/drivers/md/dm-vdo/indexer/open-chapter.h b/drivers/md/dm-vdo/indexer/open-chapter.h
new file mode 100644
index 000000000000..a4250bb19525
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/open-chapter.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_OPEN_CHAPTER_H
+#define UDS_OPEN_CHAPTER_H
+
+#include "chapter-index.h"
+#include "geometry.h"
+#include "index.h"
+#include "volume.h"
+
+/*
+ * The open chapter tracks the newest records in memory. Like the index as a whole, each open
+ * chapter is divided into a number of independent zones which are interleaved when the chapter is
+ * committed to the volume.
+ */
+
+enum {
+ OPEN_CHAPTER_RECORD_NUMBER_BITS = 23,
+};
+
+struct open_chapter_zone_slot {
+ /* If non-zero, the record number addressed by this hash slot */
+ unsigned int record_number : OPEN_CHAPTER_RECORD_NUMBER_BITS;
+ /* If true, the record at the index of this hash slot was deleted */
+ bool deleted : 1;
+} __packed;
+
+struct open_chapter_zone {
+ /* The maximum number of records that can be stored */
+ unsigned int capacity;
+ /* The number of records stored */
+ unsigned int size;
+ /* The number of deleted records */
+ unsigned int deletions;
+ /* Array of chunk records, 1-based */
+ struct uds_volume_record *records;
+ /* The number of slots in the hash table */
+ unsigned int slot_count;
+ /* The hash table slots, referencing virtual record numbers */
+ struct open_chapter_zone_slot slots[];
+};
+
+int __must_check uds_make_open_chapter(const struct index_geometry *geometry,
+ unsigned int zone_count,
+ struct open_chapter_zone **open_chapter_ptr);
+
+void uds_reset_open_chapter(struct open_chapter_zone *open_chapter);
+
+void uds_search_open_chapter(struct open_chapter_zone *open_chapter,
+ const struct uds_record_name *name,
+ struct uds_record_data *metadata, bool *found);
+
+int __must_check uds_put_open_chapter(struct open_chapter_zone *open_chapter,
+ const struct uds_record_name *name,
+ const struct uds_record_data *metadata);
+
+void uds_remove_from_open_chapter(struct open_chapter_zone *open_chapter,
+ const struct uds_record_name *name);
+
+void uds_free_open_chapter(struct open_chapter_zone *open_chapter);
+
+int __must_check uds_close_open_chapter(struct open_chapter_zone **chapter_zones,
+ unsigned int zone_count, struct volume *volume,
+ struct open_chapter_index *chapter_index,
+ struct uds_volume_record *collated_records,
+ u64 virtual_chapter_number);
+
+int __must_check uds_save_open_chapter(struct uds_index *index,
+ struct buffered_writer *writer);
+
+int __must_check uds_load_open_chapter(struct uds_index *index,
+ struct buffered_reader *reader);
+
+u64 uds_compute_saved_open_chapter_size(struct index_geometry *geometry);
+
+#endif /* UDS_OPEN_CHAPTER_H */
diff --git a/drivers/md/dm-vdo/indexer/radix-sort.c b/drivers/md/dm-vdo/indexer/radix-sort.c
new file mode 100644
index 000000000000..66b8c706a1ef
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/radix-sort.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "radix-sort.h"
+
+#include <linux/limits.h>
+#include <linux/types.h>
+
+#include "memory-alloc.h"
+#include "string-utils.h"
+
+/*
+ * This implementation allocates one large object to do the sorting, which can be reused as many
+ * times as desired. The amount of memory required is logarithmically proportional to the number of
+ * keys to be sorted.
+ */
+
+/* Piles smaller than this are handled with a simple insertion sort. */
+#define INSERTION_SORT_THRESHOLD 12
+
+/* Sort keys are pointers to immutable fixed-length arrays of bytes. */
+typedef const u8 *sort_key_t;
+
+/*
+ * The keys are separated into piles based on the byte in each keys at the current offset, so the
+ * number of keys with each byte must be counted.
+ */
+struct histogram {
+ /* The number of non-empty bins */
+ u16 used;
+ /* The index (key byte) of the first non-empty bin */
+ u16 first;
+ /* The index (key byte) of the last non-empty bin */
+ u16 last;
+ /* The number of occurrences of each specific byte */
+ u32 size[256];
+};
+
+/*
+ * Sub-tasks are manually managed on a stack, both for performance and to put a logarithmic bound
+ * on the stack space needed.
+ */
+struct task {
+ /* Pointer to the first key to sort. */
+ sort_key_t *first_key;
+ /* Pointer to the last key to sort. */
+ sort_key_t *last_key;
+ /* The offset into the key at which to continue sorting. */
+ u16 offset;
+ /* The number of bytes remaining in the sort keys. */
+ u16 length;
+};
+
+struct radix_sorter {
+ unsigned int count;
+ struct histogram bins;
+ sort_key_t *pile[256];
+ struct task *end_of_stack;
+ struct task insertion_list[256];
+ struct task stack[];
+};
+
+/* Compare a segment of two fixed-length keys starting at an offset. */
+static inline int compare(sort_key_t key1, sort_key_t key2, u16 offset, u16 length)
+{
+ return memcmp(&key1[offset], &key2[offset], length);
+}
+
+/* Insert the next unsorted key into an array of sorted keys. */
+static inline void insert_key(const struct task task, sort_key_t *next)
+{
+ /* Pull the unsorted key out, freeing up the array slot. */
+ sort_key_t unsorted = *next;
+
+ /* Compare the key to the preceding sorted entries, shifting down ones that are larger. */
+ while ((--next >= task.first_key) &&
+ (compare(unsorted, next[0], task.offset, task.length) < 0))
+ next[1] = next[0];
+
+ /* Insert the key into the last slot that was cleared, sorting it. */
+ next[1] = unsorted;
+}
+
+/*
+ * Sort a range of key segments using an insertion sort. This simple sort is faster than the
+ * 256-way radix sort when the number of keys to sort is small.
+ */
+static inline void insertion_sort(const struct task task)
+{
+ sort_key_t *next;
+
+ for (next = task.first_key + 1; next <= task.last_key; next++)
+ insert_key(task, next);
+}
+
+/* Push a sorting task onto a task stack. */
+static inline void push_task(struct task **stack_pointer, sort_key_t *first_key,
+ u32 count, u16 offset, u16 length)
+{
+ struct task *task = (*stack_pointer)++;
+
+ task->first_key = first_key;
+ task->last_key = &first_key[count - 1];
+ task->offset = offset;
+ task->length = length;
+}
+
+static inline void swap_keys(sort_key_t *a, sort_key_t *b)
+{
+ sort_key_t c = *a;
+ *a = *b;
+ *b = c;
+}
+
+/*
+ * Count the number of times each byte value appears in the arrays of keys to sort at the current
+ * offset, keeping track of the number of non-empty bins, and the index of the first and last
+ * non-empty bin.
+ */
+static inline void measure_bins(const struct task task, struct histogram *bins)
+{
+ sort_key_t *key_ptr;
+
+ /*
+ * Subtle invariant: bins->used and bins->size[] are zero because the sorting code clears
+ * it all out as it goes. Even though this structure is re-used, we don't need to pay to
+ * zero it before starting a new tally.
+ */
+ bins->first = U8_MAX;
+ bins->last = 0;
+
+ for (key_ptr = task.first_key; key_ptr <= task.last_key; key_ptr++) {
+ /* Increment the count for the byte in the key at the current offset. */
+ u8 bin = (*key_ptr)[task.offset];
+ u32 size = ++bins->size[bin];
+
+ /* Track non-empty bins. */
+ if (size == 1) {
+ bins->used += 1;
+ if (bin < bins->first)
+ bins->first = bin;
+
+ if (bin > bins->last)
+ bins->last = bin;
+ }
+ }
+}
+
+/*
+ * Convert the bin sizes to pointers to where each pile goes.
+ *
+ * pile[0] = first_key + bin->size[0],
+ * pile[1] = pile[0] + bin->size[1], etc.
+ *
+ * After the keys are moved to the appropriate pile, we'll need to sort each of the piles by the
+ * next radix position. A new task is put on the stack for each pile containing lots of keys, or a
+ * new task is put on the list for each pile containing few keys.
+ *
+ * @stack: pointer the top of the stack
+ * @end_of_stack: the end of the stack
+ * @list: pointer the head of the list
+ * @pile: array for pointers to the end of each pile
+ * @bins: the histogram of the sizes of each pile
+ * @first_key: the first key of the stack
+ * @offset: the next radix position to sort by
+ * @length: the number of bytes remaining in the sort keys
+ *
+ * Return: UDS_SUCCESS or an error code
+ */
+static inline int push_bins(struct task **stack, struct task *end_of_stack,
+ struct task **list, sort_key_t *pile[],
+ struct histogram *bins, sort_key_t *first_key,
+ u16 offset, u16 length)
+{
+ sort_key_t *pile_start = first_key;
+ int bin;
+
+ for (bin = bins->first; ; bin++) {
+ u32 size = bins->size[bin];
+
+ /* Skip empty piles. */
+ if (size == 0)
+ continue;
+
+ /* There's no need to sort empty keys. */
+ if (length > 0) {
+ if (size > INSERTION_SORT_THRESHOLD) {
+ if (*stack >= end_of_stack)
+ return UDS_BAD_STATE;
+
+ push_task(stack, pile_start, size, offset, length);
+ } else if (size > 1) {
+ push_task(list, pile_start, size, offset, length);
+ }
+ }
+
+ pile_start += size;
+ pile[bin] = pile_start;
+ if (--bins->used == 0)
+ break;
+ }
+
+ return UDS_SUCCESS;
+}
+
+int uds_make_radix_sorter(unsigned int count, struct radix_sorter **sorter)
+{
+ int result;
+ unsigned int stack_size = count / INSERTION_SORT_THRESHOLD;
+ struct radix_sorter *radix_sorter;
+
+ result = vdo_allocate_extended(struct radix_sorter, stack_size, struct task,
+ __func__, &radix_sorter);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ radix_sorter->count = count;
+ radix_sorter->end_of_stack = radix_sorter->stack + stack_size;
+ *sorter = radix_sorter;
+ return UDS_SUCCESS;
+}
+
+void uds_free_radix_sorter(struct radix_sorter *sorter)
+{
+ vdo_free(sorter);
+}
+
+/*
+ * Sort pointers to fixed-length keys (arrays of bytes) using a radix sort. The sort implementation
+ * is unstable, so the relative ordering of equal keys is not preserved.
+ */
+int uds_radix_sort(struct radix_sorter *sorter, const unsigned char *keys[],
+ unsigned int count, unsigned short length)
+{
+ struct task start;
+ struct histogram *bins = &sorter->bins;
+ sort_key_t **pile = sorter->pile;
+ struct task *task_stack = sorter->stack;
+
+ /* All zero-length keys are identical and therefore already sorted. */
+ if ((count == 0) || (length == 0))
+ return UDS_SUCCESS;
+
+ /* The initial task is to sort the entire length of all the keys. */
+ start = (struct task) {
+ .first_key = keys,
+ .last_key = &keys[count - 1],
+ .offset = 0,
+ .length = length,
+ };
+
+ if (count <= INSERTION_SORT_THRESHOLD) {
+ insertion_sort(start);
+ return UDS_SUCCESS;
+ }
+
+ if (count > sorter->count)
+ return UDS_INVALID_ARGUMENT;
+
+ /*
+ * Repeatedly consume a sorting task from the stack and process it, pushing new sub-tasks
+ * onto the stack for each radix-sorted pile. When all tasks and sub-tasks have been
+ * processed, the stack will be empty and all the keys in the starting task will be fully
+ * sorted.
+ */
+ for (*task_stack = start; task_stack >= sorter->stack; task_stack--) {
+ const struct task task = *task_stack;
+ struct task *insertion_task_list;
+ int result;
+ sort_key_t *fence;
+ sort_key_t *end;
+
+ measure_bins(task, bins);
+
+ /*
+ * Now that we know how large each bin is, generate pointers for each of the piles
+ * and push a new task to sort each pile by the next radix byte.
+ */
+ insertion_task_list = sorter->insertion_list;
+ result = push_bins(&task_stack, sorter->end_of_stack,
+ &insertion_task_list, pile, bins, task.first_key,
+ task.offset + 1, task.length - 1);
+ if (result != UDS_SUCCESS) {
+ memset(bins, 0, sizeof(*bins));
+ return result;
+ }
+
+ /* Now bins->used is zero again. */
+
+ /*
+ * Don't bother processing the last pile: when piles 0..N-1 are all in place, then
+ * pile N must also be in place.
+ */
+ end = task.last_key - bins->size[bins->last];
+ bins->size[bins->last] = 0;
+
+ for (fence = task.first_key; fence <= end; ) {
+ u8 bin;
+ sort_key_t key = *fence;
+
+ /*
+ * The radix byte of the key tells us which pile it belongs in. Swap it for
+ * an unprocessed item just below that pile, and repeat.
+ */
+ while (--pile[bin = key[task.offset]] > fence)
+ swap_keys(pile[bin], &key);
+
+ /*
+ * The pile reached the fence. Put the key at the bottom of that pile,
+ * completing it, and advance the fence to the next pile.
+ */
+ *fence = key;
+ fence += bins->size[bin];
+ bins->size[bin] = 0;
+ }
+
+ /* Now bins->size[] is all zero again. */
+
+ /*
+ * When the number of keys in a task gets small enough, it is faster to use an
+ * insertion sort than to keep subdividing into tiny piles.
+ */
+ while (--insertion_task_list >= sorter->insertion_list)
+ insertion_sort(*insertion_task_list);
+ }
+
+ return UDS_SUCCESS;
+}
diff --git a/drivers/md/dm-vdo/indexer/radix-sort.h b/drivers/md/dm-vdo/indexer/radix-sort.h
new file mode 100644
index 000000000000..812949bc2cee
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/radix-sort.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_RADIX_SORT_H
+#define UDS_RADIX_SORT_H
+
+/*
+ * Radix sort is implemented using an American Flag sort, an unstable, in-place 8-bit radix
+ * exchange sort. This is adapted from the algorithm in the paper by Peter M. McIlroy, Keith
+ * Bostic, and M. Douglas McIlroy, "Engineering Radix Sort".
+ *
+ * http://www.usenix.org/publications/compsystems/1993/win_mcilroy.pdf
+ */
+
+struct radix_sorter;
+
+int __must_check uds_make_radix_sorter(unsigned int count, struct radix_sorter **sorter);
+
+void uds_free_radix_sorter(struct radix_sorter *sorter);
+
+int __must_check uds_radix_sort(struct radix_sorter *sorter, const unsigned char *keys[],
+ unsigned int count, unsigned short length);
+
+#endif /* UDS_RADIX_SORT_H */
diff --git a/drivers/md/dm-vdo/indexer/sparse-cache.c b/drivers/md/dm-vdo/indexer/sparse-cache.c
new file mode 100644
index 000000000000..28920167827c
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/sparse-cache.c
@@ -0,0 +1,624 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "sparse-cache.h"
+
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/dm-bufio.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "chapter-index.h"
+#include "config.h"
+#include "index.h"
+
+/*
+ * Since the cache is small, it is implemented as a simple array of cache entries. Searching for a
+ * specific virtual chapter is implemented as a linear search. The cache replacement policy is
+ * least-recently-used (LRU). Again, the small size of the cache allows the LRU order to be
+ * maintained by shifting entries in an array list.
+ *
+ * Changing the contents of the cache requires the coordinated participation of all zone threads
+ * via the careful use of barrier messages sent to all the index zones by the triage queue worker
+ * thread. The critical invariant for coordination is that the cache membership must not change
+ * between updates, so that all calls to uds_sparse_cache_contains() from the zone threads must all
+ * receive the same results for every virtual chapter number. To ensure that critical invariant,
+ * state changes such as "that virtual chapter is no longer in the volume" and "skip searching that
+ * chapter because it has had too many cache misses" are represented separately from the cache
+ * membership information (the virtual chapter number).
+ *
+ * As a result of this invariant, we have the guarantee that every zone thread will call
+ * uds_update_sparse_cache() once and exactly once to request a chapter that is not in the cache,
+ * and the serialization of the barrier requests from the triage queue ensures they will all
+ * request the same chapter number. This means the only synchronization we need can be provided by
+ * a pair of thread barriers used only in the uds_update_sparse_cache() call, providing a critical
+ * section where a single zone thread can drive the cache update while all the other zone threads
+ * are known to be blocked, waiting in the second barrier. Outside that critical section, all the
+ * zone threads implicitly hold a shared lock. Inside it, the thread for zone zero holds an
+ * exclusive lock. No other threads may access or modify the cache entries.
+ *
+ * Chapter statistics must only be modified by a single thread, which is also the zone zero thread.
+ * All fields that might be frequently updated by that thread are kept in separate cache-aligned
+ * structures so they will not cause cache contention via "false sharing" with the fields that are
+ * frequently accessed by all of the zone threads.
+ *
+ * The LRU order is managed independently by each zone thread, and each zone uses its own list for
+ * searching and cache membership queries. The zone zero list is used to decide which chapter to
+ * evict when the cache is updated, and its search list is copied to the other threads at that
+ * time.
+ *
+ * The virtual chapter number field of the cache entry is the single field indicating whether a
+ * chapter is a member of the cache or not. The value NO_CHAPTER is used to represent a null or
+ * undefined chapter number. When present in the virtual chapter number field of a
+ * cached_chapter_index, it indicates that the cache entry is dead, and all the other fields of
+ * that entry (other than immutable pointers to cache memory) are undefined and irrelevant. Any
+ * cache entry that is not marked as dead is fully defined and a member of the cache, and
+ * uds_sparse_cache_contains() will always return true for any virtual chapter number that appears
+ * in any of the cache entries.
+ *
+ * A chapter index that is a member of the cache may be excluded from searches between calls to
+ * uds_update_sparse_cache() in two different ways. First, when a chapter falls off the end of the
+ * volume, its virtual chapter number will be less that the oldest virtual chapter number. Since
+ * that chapter is no longer part of the volume, there's no point in continuing to search that
+ * chapter index. Once invalidated, that virtual chapter will still be considered a member of the
+ * cache, but it will no longer be searched for matching names.
+ *
+ * The second mechanism is a heuristic based on keeping track of the number of consecutive search
+ * misses in a given chapter index. Once that count exceeds a threshold, the skip_search flag will
+ * be set to true, causing the chapter to be skipped when searching the entire cache, but still
+ * allowing it to be found when searching for a hook in that specific chapter. Finding a hook will
+ * clear the skip_search flag, once again allowing the non-hook searches to use that cache entry.
+ * Again, regardless of the state of the skip_search flag, the virtual chapter must still
+ * considered to be a member of the cache for uds_sparse_cache_contains().
+ */
+
+#define SKIP_SEARCH_THRESHOLD 20000
+#define ZONE_ZERO 0
+
+/*
+ * These counters are essentially fields of the struct cached_chapter_index, but are segregated
+ * into this structure because they are frequently modified. They are grouped and aligned to keep
+ * them on different cache lines from the chapter fields that are accessed far more often than they
+ * are updated.
+ */
+struct __aligned(L1_CACHE_BYTES) cached_index_counters {
+ u64 consecutive_misses;
+};
+
+struct __aligned(L1_CACHE_BYTES) cached_chapter_index {
+ /*
+ * The virtual chapter number of the cached chapter index. NO_CHAPTER means this cache
+ * entry is unused. This field must only be modified in the critical section in
+ * uds_update_sparse_cache().
+ */
+ u64 virtual_chapter;
+
+ u32 index_pages_count;
+
+ /*
+ * These pointers are immutable during the life of the cache. The contents of the arrays
+ * change when the cache entry is replaced.
+ */
+ struct delta_index_page *index_pages;
+ struct dm_buffer **page_buffers;
+
+ /*
+ * If set, skip the chapter when searching the entire cache. This flag is just a
+ * performance optimization. This flag is mutable between cache updates, but it rarely
+ * changes and is frequently accessed, so it groups with the immutable fields.
+ */
+ bool skip_search;
+
+ /*
+ * The cache-aligned counters change often and are placed at the end of the structure to
+ * prevent false sharing with the more stable fields above.
+ */
+ struct cached_index_counters counters;
+};
+
+/*
+ * A search_list represents an ordering of the sparse chapter index cache entry array, from most
+ * recently accessed to least recently accessed, which is the order in which the indexes should be
+ * searched and the reverse order in which they should be evicted from the cache.
+ *
+ * Cache entries that are dead or empty are kept at the end of the list, avoiding the need to even
+ * iterate over them to search, and ensuring that dead entries are replaced before any live entries
+ * are evicted.
+ *
+ * The search list is instantiated for each zone thread, avoiding any need for synchronization. The
+ * structure is allocated on a cache boundary to avoid false sharing of memory cache lines between
+ * zone threads.
+ */
+struct search_list {
+ u8 capacity;
+ u8 first_dead_entry;
+ struct cached_chapter_index *entries[];
+};
+
+struct threads_barrier {
+ /* Lock for this barrier object */
+ struct semaphore lock;
+ /* Semaphore for threads waiting at this barrier */
+ struct semaphore wait;
+ /* Number of threads which have arrived */
+ int arrived;
+ /* Total number of threads using this barrier */
+ int thread_count;
+};
+
+struct sparse_cache {
+ const struct index_geometry *geometry;
+ unsigned int capacity;
+ unsigned int zone_count;
+
+ unsigned int skip_threshold;
+ struct search_list *search_lists[MAX_ZONES];
+ struct cached_chapter_index **scratch_entries;
+
+ struct threads_barrier begin_update_barrier;
+ struct threads_barrier end_update_barrier;
+
+ struct cached_chapter_index chapters[];
+};
+
+static void initialize_threads_barrier(struct threads_barrier *barrier,
+ unsigned int thread_count)
+{
+ sema_init(&barrier->lock, 1);
+ barrier->arrived = 0;
+ barrier->thread_count = thread_count;
+ sema_init(&barrier->wait, 0);
+}
+
+static inline void __down(struct semaphore *semaphore)
+{
+ /*
+ * Do not use down(semaphore). Instead use down_interruptible so that
+ * we do not get 120 second stall messages in kern.log.
+ */
+ while (down_interruptible(semaphore) != 0) {
+ /*
+ * If we're called from a user-mode process (e.g., "dmsetup
+ * remove") while waiting for an operation that may take a
+ * while (e.g., UDS index save), and a signal is sent (SIGINT,
+ * SIGUSR2), then down_interruptible will not block. If that
+ * happens, sleep briefly to avoid keeping the CPU locked up in
+ * this loop. We could just call cond_resched, but then we'd
+ * still keep consuming CPU time slices and swamp other threads
+ * trying to do computational work.
+ */
+ fsleep(1000);
+ }
+}
+
+static void enter_threads_barrier(struct threads_barrier *barrier)
+{
+ __down(&barrier->lock);
+ if (++barrier->arrived == barrier->thread_count) {
+ /* last thread */
+ int i;
+
+ for (i = 1; i < barrier->thread_count; i++)
+ up(&barrier->wait);
+
+ barrier->arrived = 0;
+ up(&barrier->lock);
+ } else {
+ up(&barrier->lock);
+ __down(&barrier->wait);
+ }
+}
+
+static int __must_check initialize_cached_chapter_index(struct cached_chapter_index *chapter,
+ const struct index_geometry *geometry)
+{
+ int result;
+
+ chapter->virtual_chapter = NO_CHAPTER;
+ chapter->index_pages_count = geometry->index_pages_per_chapter;
+
+ result = vdo_allocate(chapter->index_pages_count, struct delta_index_page,
+ __func__, &chapter->index_pages);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ return vdo_allocate(chapter->index_pages_count, struct dm_buffer *,
+ "sparse index volume pages", &chapter->page_buffers);
+}
+
+static int __must_check make_search_list(struct sparse_cache *cache,
+ struct search_list **list_ptr)
+{
+ struct search_list *list;
+ unsigned int bytes;
+ u8 i;
+ int result;
+
+ bytes = (sizeof(struct search_list) +
+ (cache->capacity * sizeof(struct cached_chapter_index *)));
+ result = vdo_allocate_cache_aligned(bytes, "search list", &list);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ list->capacity = cache->capacity;
+ list->first_dead_entry = 0;
+
+ for (i = 0; i < list->capacity; i++)
+ list->entries[i] = &cache->chapters[i];
+
+ *list_ptr = list;
+ return UDS_SUCCESS;
+}
+
+int uds_make_sparse_cache(const struct index_geometry *geometry, unsigned int capacity,
+ unsigned int zone_count, struct sparse_cache **cache_ptr)
+{
+ int result;
+ unsigned int i;
+ struct sparse_cache *cache;
+ unsigned int bytes;
+
+ bytes = (sizeof(struct sparse_cache) + (capacity * sizeof(struct cached_chapter_index)));
+ result = vdo_allocate_cache_aligned(bytes, "sparse cache", &cache);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ cache->geometry = geometry;
+ cache->capacity = capacity;
+ cache->zone_count = zone_count;
+
+ /*
+ * Scale down the skip threshold since the cache only counts cache misses in zone zero, but
+ * requests are being handled in all zones.
+ */
+ cache->skip_threshold = (SKIP_SEARCH_THRESHOLD / zone_count);
+
+ initialize_threads_barrier(&cache->begin_update_barrier, zone_count);
+ initialize_threads_barrier(&cache->end_update_barrier, zone_count);
+
+ for (i = 0; i < capacity; i++) {
+ result = initialize_cached_chapter_index(&cache->chapters[i], geometry);
+ if (result != UDS_SUCCESS)
+ goto out;
+ }
+
+ for (i = 0; i < zone_count; i++) {
+ result = make_search_list(cache, &cache->search_lists[i]);
+ if (result != UDS_SUCCESS)
+ goto out;
+ }
+
+ /* purge_search_list() needs some temporary lists for sorting. */
+ result = vdo_allocate(capacity * 2, struct cached_chapter_index *,
+ "scratch entries", &cache->scratch_entries);
+ if (result != VDO_SUCCESS)
+ goto out;
+
+ *cache_ptr = cache;
+ return UDS_SUCCESS;
+out:
+ uds_free_sparse_cache(cache);
+ return result;
+}
+
+static inline void set_skip_search(struct cached_chapter_index *chapter,
+ bool skip_search)
+{
+ /* Check before setting to reduce cache line contention. */
+ if (READ_ONCE(chapter->skip_search) != skip_search)
+ WRITE_ONCE(chapter->skip_search, skip_search);
+}
+
+static void score_search_hit(struct cached_chapter_index *chapter)
+{
+ chapter->counters.consecutive_misses = 0;
+ set_skip_search(chapter, false);
+}
+
+static void score_search_miss(struct sparse_cache *cache,
+ struct cached_chapter_index *chapter)
+{
+ chapter->counters.consecutive_misses++;
+ if (chapter->counters.consecutive_misses > cache->skip_threshold)
+ set_skip_search(chapter, true);
+}
+
+static void release_cached_chapter_index(struct cached_chapter_index *chapter)
+{
+ unsigned int i;
+
+ chapter->virtual_chapter = NO_CHAPTER;
+ if (chapter->page_buffers == NULL)
+ return;
+
+ for (i = 0; i < chapter->index_pages_count; i++) {
+ if (chapter->page_buffers[i] != NULL)
+ dm_bufio_release(vdo_forget(chapter->page_buffers[i]));
+ }
+}
+
+void uds_free_sparse_cache(struct sparse_cache *cache)
+{
+ unsigned int i;
+
+ if (cache == NULL)
+ return;
+
+ vdo_free(cache->scratch_entries);
+
+ for (i = 0; i < cache->zone_count; i++)
+ vdo_free(cache->search_lists[i]);
+
+ for (i = 0; i < cache->capacity; i++) {
+ release_cached_chapter_index(&cache->chapters[i]);
+ vdo_free(cache->chapters[i].index_pages);
+ vdo_free(cache->chapters[i].page_buffers);
+ }
+
+ vdo_free(cache);
+}
+
+/*
+ * Take the indicated element of the search list and move it to the start, pushing the pointers
+ * previously before it back down the list.
+ */
+static inline void set_newest_entry(struct search_list *search_list, u8 index)
+{
+ struct cached_chapter_index *newest;
+
+ if (index > 0) {
+ newest = search_list->entries[index];
+ memmove(&search_list->entries[1], &search_list->entries[0],
+ index * sizeof(struct cached_chapter_index *));
+ search_list->entries[0] = newest;
+ }
+
+ /*
+ * This function may have moved a dead chapter to the front of the list for reuse, in which
+ * case the set of dead chapters becomes smaller.
+ */
+ if (search_list->first_dead_entry <= index)
+ search_list->first_dead_entry++;
+}
+
+bool uds_sparse_cache_contains(struct sparse_cache *cache, u64 virtual_chapter,
+ unsigned int zone_number)
+{
+ struct search_list *search_list;
+ struct cached_chapter_index *chapter;
+ u8 i;
+
+ /*
+ * The correctness of the barriers depends on the invariant that between calls to
+ * uds_update_sparse_cache(), the answers this function returns must never vary: the result
+ * for a given chapter must be identical across zones. That invariant must be maintained
+ * even if the chapter falls off the end of the volume, or if searching it is disabled
+ * because of too many search misses.
+ */
+ search_list = cache->search_lists[zone_number];
+ for (i = 0; i < search_list->first_dead_entry; i++) {
+ chapter = search_list->entries[i];
+
+ if (virtual_chapter == chapter->virtual_chapter) {
+ if (zone_number == ZONE_ZERO)
+ score_search_hit(chapter);
+
+ set_newest_entry(search_list, i);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Re-sort cache entries into three sets (active, skippable, and dead) while maintaining the LRU
+ * ordering that already existed. This operation must only be called during the critical section in
+ * uds_update_sparse_cache().
+ */
+static void purge_search_list(struct search_list *search_list,
+ struct sparse_cache *cache, u64 oldest_virtual_chapter)
+{
+ struct cached_chapter_index **entries;
+ struct cached_chapter_index **skipped;
+ struct cached_chapter_index **dead;
+ struct cached_chapter_index *chapter;
+ unsigned int next_alive = 0;
+ unsigned int next_skipped = 0;
+ unsigned int next_dead = 0;
+ unsigned int i;
+
+ entries = &search_list->entries[0];
+ skipped = &cache->scratch_entries[0];
+ dead = &cache->scratch_entries[search_list->capacity];
+
+ for (i = 0; i < search_list->first_dead_entry; i++) {
+ chapter = search_list->entries[i];
+ if ((chapter->virtual_chapter < oldest_virtual_chapter) ||
+ (chapter->virtual_chapter == NO_CHAPTER))
+ dead[next_dead++] = chapter;
+ else if (chapter->skip_search)
+ skipped[next_skipped++] = chapter;
+ else
+ entries[next_alive++] = chapter;
+ }
+
+ memcpy(&entries[next_alive], skipped,
+ next_skipped * sizeof(struct cached_chapter_index *));
+ memcpy(&entries[next_alive + next_skipped], dead,
+ next_dead * sizeof(struct cached_chapter_index *));
+ search_list->first_dead_entry = next_alive + next_skipped;
+}
+
+static int __must_check cache_chapter_index(struct cached_chapter_index *chapter,
+ u64 virtual_chapter,
+ const struct volume *volume)
+{
+ int result;
+
+ release_cached_chapter_index(chapter);
+
+ result = uds_read_chapter_index_from_volume(volume, virtual_chapter,
+ chapter->page_buffers,
+ chapter->index_pages);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ chapter->counters.consecutive_misses = 0;
+ chapter->virtual_chapter = virtual_chapter;
+ chapter->skip_search = false;
+
+ return UDS_SUCCESS;
+}
+
+static inline void copy_search_list(const struct search_list *source,
+ struct search_list *target)
+{
+ *target = *source;
+ memcpy(target->entries, source->entries,
+ source->capacity * sizeof(struct cached_chapter_index *));
+}
+
+/*
+ * Update the sparse cache to contain a chapter index. This function must be called by all the zone
+ * threads with the same chapter number to correctly enter the thread barriers used to synchronize
+ * the cache updates.
+ */
+int uds_update_sparse_cache(struct index_zone *zone, u64 virtual_chapter)
+{
+ int result = UDS_SUCCESS;
+ const struct uds_index *index = zone->index;
+ struct sparse_cache *cache = index->volume->sparse_cache;
+
+ if (uds_sparse_cache_contains(cache, virtual_chapter, zone->id))
+ return UDS_SUCCESS;
+
+ /*
+ * Wait for every zone thread to reach its corresponding barrier request and invoke this
+ * function before starting to modify the cache.
+ */
+ enter_threads_barrier(&cache->begin_update_barrier);
+
+ /*
+ * This is the start of the critical section: the zone zero thread is captain, effectively
+ * holding an exclusive lock on the sparse cache. All the other zone threads must do
+ * nothing between the two barriers. They will wait at the end_update_barrier again for the
+ * captain to finish the update.
+ */
+
+ if (zone->id == ZONE_ZERO) {
+ unsigned int z;
+ struct search_list *list = cache->search_lists[ZONE_ZERO];
+
+ purge_search_list(list, cache, zone->oldest_virtual_chapter);
+
+ if (virtual_chapter >= index->oldest_virtual_chapter) {
+ set_newest_entry(list, list->capacity - 1);
+ result = cache_chapter_index(list->entries[0], virtual_chapter,
+ index->volume);
+ }
+
+ for (z = 1; z < cache->zone_count; z++)
+ copy_search_list(list, cache->search_lists[z]);
+ }
+
+ /*
+ * This is the end of the critical section. All cache invariants must have been restored.
+ */
+ enter_threads_barrier(&cache->end_update_barrier);
+ return result;
+}
+
+void uds_invalidate_sparse_cache(struct sparse_cache *cache)
+{
+ unsigned int i;
+
+ for (i = 0; i < cache->capacity; i++)
+ release_cached_chapter_index(&cache->chapters[i]);
+}
+
+static inline bool should_skip_chapter(struct cached_chapter_index *chapter,
+ u64 oldest_chapter, u64 requested_chapter)
+{
+ if ((chapter->virtual_chapter == NO_CHAPTER) ||
+ (chapter->virtual_chapter < oldest_chapter))
+ return true;
+
+ if (requested_chapter != NO_CHAPTER)
+ return requested_chapter != chapter->virtual_chapter;
+ else
+ return READ_ONCE(chapter->skip_search);
+}
+
+static int __must_check search_cached_chapter_index(struct cached_chapter_index *chapter,
+ const struct index_geometry *geometry,
+ const struct index_page_map *index_page_map,
+ const struct uds_record_name *name,
+ u16 *record_page_ptr)
+{
+ u32 physical_chapter =
+ uds_map_to_physical_chapter(geometry, chapter->virtual_chapter);
+ u32 index_page_number =
+ uds_find_index_page_number(index_page_map, name, physical_chapter);
+ struct delta_index_page *index_page =
+ &chapter->index_pages[index_page_number];
+
+ return uds_search_chapter_index_page(index_page, geometry, name,
+ record_page_ptr);
+}
+
+int uds_search_sparse_cache(struct index_zone *zone, const struct uds_record_name *name,
+ u64 *virtual_chapter_ptr, u16 *record_page_ptr)
+{
+ int result;
+ struct volume *volume = zone->index->volume;
+ struct sparse_cache *cache = volume->sparse_cache;
+ struct cached_chapter_index *chapter;
+ struct search_list *search_list;
+ u8 i;
+ /* Search the entire cache unless a specific chapter was requested. */
+ bool search_one = (*virtual_chapter_ptr != NO_CHAPTER);
+
+ *record_page_ptr = NO_CHAPTER_INDEX_ENTRY;
+ search_list = cache->search_lists[zone->id];
+ for (i = 0; i < search_list->first_dead_entry; i++) {
+ chapter = search_list->entries[i];
+
+ if (should_skip_chapter(chapter, zone->oldest_virtual_chapter,
+ *virtual_chapter_ptr))
+ continue;
+
+ result = search_cached_chapter_index(chapter, cache->geometry,
+ volume->index_page_map, name,
+ record_page_ptr);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (*record_page_ptr != NO_CHAPTER_INDEX_ENTRY) {
+ /*
+ * In theory, this might be a false match while a true match exists in
+ * another chapter, but that's a very rare case and not worth the extra
+ * search complexity.
+ */
+ set_newest_entry(search_list, i);
+ if (zone->id == ZONE_ZERO)
+ score_search_hit(chapter);
+
+ *virtual_chapter_ptr = chapter->virtual_chapter;
+ return UDS_SUCCESS;
+ }
+
+ if (zone->id == ZONE_ZERO)
+ score_search_miss(cache, chapter);
+
+ if (search_one)
+ break;
+ }
+
+ return UDS_SUCCESS;
+}
diff --git a/drivers/md/dm-vdo/indexer/sparse-cache.h b/drivers/md/dm-vdo/indexer/sparse-cache.h
new file mode 100644
index 000000000000..45e2dcf165b5
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/sparse-cache.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_SPARSE_CACHE_H
+#define UDS_SPARSE_CACHE_H
+
+#include "geometry.h"
+#include "indexer.h"
+
+/*
+ * The sparse cache is a cache of entire chapter indexes from sparse chapters used for searching
+ * for names after all other search paths have failed. It contains only complete chapter indexes;
+ * record pages from sparse chapters and single index pages used for resolving hooks are kept in
+ * the regular page cache in the volume.
+ *
+ * The most important property of this cache is the absence of synchronization for read operations.
+ * Safe concurrent access to the cache by the zone threads is controlled by the triage queue and
+ * the barrier requests it issues to the zone queues. The set of cached chapters does not and must
+ * not change between the carefully coordinated calls to uds_update_sparse_cache() from the zone
+ * threads. Outside of updates, every zone will get the same result when calling
+ * uds_sparse_cache_contains() as every other zone.
+ */
+
+struct index_zone;
+struct sparse_cache;
+
+int __must_check uds_make_sparse_cache(const struct index_geometry *geometry,
+ unsigned int capacity, unsigned int zone_count,
+ struct sparse_cache **cache_ptr);
+
+void uds_free_sparse_cache(struct sparse_cache *cache);
+
+bool uds_sparse_cache_contains(struct sparse_cache *cache, u64 virtual_chapter,
+ unsigned int zone_number);
+
+int __must_check uds_update_sparse_cache(struct index_zone *zone, u64 virtual_chapter);
+
+void uds_invalidate_sparse_cache(struct sparse_cache *cache);
+
+int __must_check uds_search_sparse_cache(struct index_zone *zone,
+ const struct uds_record_name *name,
+ u64 *virtual_chapter_ptr, u16 *record_page_ptr);
+
+#endif /* UDS_SPARSE_CACHE_H */
diff --git a/drivers/md/dm-vdo/indexer/volume-index.c b/drivers/md/dm-vdo/indexer/volume-index.c
new file mode 100644
index 000000000000..12f954a0c532
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/volume-index.c
@@ -0,0 +1,1283 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+#include "volume-index.h"
+
+#include <linux/bitops.h>
+#include <linux/bits.h>
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/log2.h>
+
+#include "errors.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "numeric.h"
+#include "permassert.h"
+#include "thread-utils.h"
+
+#include "config.h"
+#include "geometry.h"
+#include "hash-utils.h"
+#include "indexer.h"
+
+/*
+ * The volume index is a combination of two separate subindexes, one containing sparse hook entries
+ * (retained for all chapters), and one containing the remaining entries (retained only for the
+ * dense chapters). If there are no sparse chapters, only the non-hook sub index is used, and it
+ * will contain all records for all chapters.
+ *
+ * The volume index is also divided into zones, with one thread operating on each zone. Each
+ * incoming request is dispatched to the appropriate thread, and then to the appropriate subindex.
+ * Each delta list is handled by a single zone. To ensure that the distribution of delta lists to
+ * zones doesn't underflow (leaving some zone with no delta lists), the minimum number of delta
+ * lists must be the square of the maximum zone count for both subindexes.
+ *
+ * Each subindex zone is a delta index where the payload is a chapter number. The volume index can
+ * compute the delta list number, address, and zone number from the record name in order to
+ * dispatch record handling to the correct structures.
+ *
+ * Most operations that use all the zones take place either before request processing is allowed,
+ * or after all requests have been flushed in order to shut down. The only multi-threaded operation
+ * supported during normal operation is the uds_lookup_volume_index_name() method, used to determine
+ * whether a new chapter should be loaded into the sparse index cache. This operation only uses the
+ * sparse hook subindex, and the zone mutexes are used to make this operation safe.
+ *
+ * There are three ways of expressing chapter numbers in the volume index: virtual, index, and
+ * rolling. The interface to the volume index uses virtual chapter numbers, which are 64 bits long.
+ * Internally the subindex stores only the minimal number of bits necessary by masking away the
+ * high-order bits. When the index needs to deal with ordering of index chapter numbers, as when
+ * flushing entries from older chapters, it rolls the index chapter number around so that the
+ * smallest one in use is mapped to 0. See convert_index_to_virtual() or flush_invalid_entries()
+ * for an example of this technique.
+ *
+ * For efficiency, when older chapter numbers become invalid, the index does not immediately remove
+ * the invalidated entries. Instead it lazily removes them from a given delta list the next time it
+ * walks that list during normal operation. Because of this, the index size must be increased
+ * somewhat to accommodate all the invalid entries that have not yet been removed. For the standard
+ * index sizes, this requires about 4 chapters of old entries per 1024 chapters of valid entries in
+ * the index.
+ */
+
+struct sub_index_parameters {
+ /* The number of bits in address mask */
+ u8 address_bits;
+ /* The number of bits in chapter number */
+ u8 chapter_bits;
+ /* The mean delta */
+ u32 mean_delta;
+ /* The number of delta lists */
+ u64 list_count;
+ /* The number of chapters used */
+ u32 chapter_count;
+ /* The number of bits per chapter */
+ size_t chapter_size_in_bits;
+ /* The number of bytes of delta list memory */
+ size_t memory_size;
+ /* The number of bytes the index should keep free at all times */
+ size_t target_free_bytes;
+};
+
+struct split_config {
+ /* The hook subindex configuration */
+ struct uds_configuration hook_config;
+ struct index_geometry hook_geometry;
+
+ /* The non-hook subindex configuration */
+ struct uds_configuration non_hook_config;
+ struct index_geometry non_hook_geometry;
+};
+
+struct chapter_range {
+ u32 chapter_start;
+ u32 chapter_count;
+};
+
+#define MAGIC_SIZE 8
+
+static const char MAGIC_START_5[] = "MI5-0005";
+
+struct sub_index_data {
+ char magic[MAGIC_SIZE]; /* MAGIC_START_5 */
+ u64 volume_nonce;
+ u64 virtual_chapter_low;
+ u64 virtual_chapter_high;
+ u32 first_list;
+ u32 list_count;
+};
+
+static const char MAGIC_START_6[] = "MI6-0001";
+
+struct volume_index_data {
+ char magic[MAGIC_SIZE]; /* MAGIC_START_6 */
+ u32 sparse_sample_rate;
+};
+
+static inline u32 extract_address(const struct volume_sub_index *sub_index,
+ const struct uds_record_name *name)
+{
+ return uds_extract_volume_index_bytes(name) & sub_index->address_mask;
+}
+
+static inline u32 extract_dlist_num(const struct volume_sub_index *sub_index,
+ const struct uds_record_name *name)
+{
+ u64 bits = uds_extract_volume_index_bytes(name);
+
+ return (bits >> sub_index->address_bits) % sub_index->list_count;
+}
+
+static inline const struct volume_sub_index_zone *
+get_zone_for_record(const struct volume_index_record *record)
+{
+ return &record->sub_index->zones[record->zone_number];
+}
+
+static inline u64 convert_index_to_virtual(const struct volume_index_record *record,
+ u32 index_chapter)
+{
+ const struct volume_sub_index_zone *volume_index_zone = get_zone_for_record(record);
+ u32 rolling_chapter = ((index_chapter - volume_index_zone->virtual_chapter_low) &
+ record->sub_index->chapter_mask);
+
+ return volume_index_zone->virtual_chapter_low + rolling_chapter;
+}
+
+static inline u32 convert_virtual_to_index(const struct volume_sub_index *sub_index,
+ u64 virtual_chapter)
+{
+ return virtual_chapter & sub_index->chapter_mask;
+}
+
+static inline bool is_virtual_chapter_indexed(const struct volume_index_record *record,
+ u64 virtual_chapter)
+{
+ const struct volume_sub_index_zone *volume_index_zone = get_zone_for_record(record);
+
+ return ((virtual_chapter >= volume_index_zone->virtual_chapter_low) &&
+ (virtual_chapter <= volume_index_zone->virtual_chapter_high));
+}
+
+static inline bool has_sparse(const struct volume_index *volume_index)
+{
+ return volume_index->sparse_sample_rate > 0;
+}
+
+bool uds_is_volume_index_sample(const struct volume_index *volume_index,
+ const struct uds_record_name *name)
+{
+ if (!has_sparse(volume_index))
+ return false;
+
+ return (uds_extract_sampling_bytes(name) % volume_index->sparse_sample_rate) == 0;
+}
+
+static inline const struct volume_sub_index *
+get_volume_sub_index(const struct volume_index *volume_index,
+ const struct uds_record_name *name)
+{
+ return (uds_is_volume_index_sample(volume_index, name) ?
+ &volume_index->vi_hook :
+ &volume_index->vi_non_hook);
+}
+
+static unsigned int get_volume_sub_index_zone(const struct volume_sub_index *sub_index,
+ const struct uds_record_name *name)
+{
+ return extract_dlist_num(sub_index, name) / sub_index->delta_index.lists_per_zone;
+}
+
+unsigned int uds_get_volume_index_zone(const struct volume_index *volume_index,
+ const struct uds_record_name *name)
+{
+ return get_volume_sub_index_zone(get_volume_sub_index(volume_index, name), name);
+}
+
+#define DELTA_LIST_SIZE 256
+
+static int compute_volume_sub_index_parameters(const struct uds_configuration *config,
+ struct sub_index_parameters *params)
+{
+ u64 entries_in_volume_index, address_span;
+ u32 chapters_in_volume_index, invalid_chapters;
+ u32 rounded_chapters;
+ u64 delta_list_records;
+ u32 address_count;
+ u64 index_size_in_bits;
+ size_t expected_index_size;
+ u64 min_delta_lists = MAX_ZONES * MAX_ZONES;
+ struct index_geometry *geometry = config->geometry;
+ u64 records_per_chapter = geometry->records_per_chapter;
+
+ params->chapter_count = geometry->chapters_per_volume;
+ /*
+ * Make sure that the number of delta list records in the volume index does not change when
+ * the volume is reduced by one chapter. This preserves the mapping from name to volume
+ * index delta list.
+ */
+ rounded_chapters = params->chapter_count;
+ if (uds_is_reduced_index_geometry(geometry))
+ rounded_chapters += 1;
+ delta_list_records = records_per_chapter * rounded_chapters;
+ address_count = config->volume_index_mean_delta * DELTA_LIST_SIZE;
+ params->list_count = max(delta_list_records / DELTA_LIST_SIZE, min_delta_lists);
+ params->address_bits = bits_per(address_count - 1);
+ params->chapter_bits = bits_per(rounded_chapters - 1);
+ if ((u32) params->list_count != params->list_count) {
+ return vdo_log_warning_strerror(UDS_INVALID_ARGUMENT,
+ "cannot initialize volume index with %llu delta lists",
+ (unsigned long long) params->list_count);
+ }
+
+ if (params->address_bits > 31) {
+ return vdo_log_warning_strerror(UDS_INVALID_ARGUMENT,
+ "cannot initialize volume index with %u address bits",
+ params->address_bits);
+ }
+
+ /*
+ * The probability that a given delta list is not touched during the writing of an entire
+ * chapter is:
+ *
+ * double p_not_touched = pow((double) (params->list_count - 1) / params->list_count,
+ * records_per_chapter);
+ *
+ * For the standard index sizes, about 78% of the delta lists are not touched, and
+ * therefore contain old index entries that have not been eliminated by the lazy LRU
+ * processing. Then the number of old index entries that accumulate over the entire index,
+ * in terms of full chapters worth of entries, is:
+ *
+ * double invalid_chapters = p_not_touched / (1.0 - p_not_touched);
+ *
+ * For the standard index sizes, the index needs about 3.5 chapters of space for the old
+ * entries in a 1024 chapter index, so round this up to use 4 chapters per 1024 chapters in
+ * the index.
+ */
+ invalid_chapters = max(rounded_chapters / 256, 2U);
+ chapters_in_volume_index = rounded_chapters + invalid_chapters;
+ entries_in_volume_index = records_per_chapter * chapters_in_volume_index;
+
+ address_span = params->list_count << params->address_bits;
+ params->mean_delta = address_span / entries_in_volume_index;
+
+ /*
+ * Compute the expected size of a full index, then set the total memory to be 6% larger
+ * than that expected size. This number should be large enough that there are not many
+ * rebalances when the index is full.
+ */
+ params->chapter_size_in_bits = uds_compute_delta_index_size(records_per_chapter,
+ params->mean_delta,
+ params->chapter_bits);
+ index_size_in_bits = params->chapter_size_in_bits * chapters_in_volume_index;
+ expected_index_size = index_size_in_bits / BITS_PER_BYTE;
+ params->memory_size = expected_index_size * 106 / 100;
+
+ params->target_free_bytes = expected_index_size / 20;
+ return UDS_SUCCESS;
+}
+
+static void uninitialize_volume_sub_index(struct volume_sub_index *sub_index)
+{
+ vdo_free(vdo_forget(sub_index->flush_chapters));
+ vdo_free(vdo_forget(sub_index->zones));
+ uds_uninitialize_delta_index(&sub_index->delta_index);
+}
+
+void uds_free_volume_index(struct volume_index *volume_index)
+{
+ if (volume_index == NULL)
+ return;
+
+ if (volume_index->zones != NULL)
+ vdo_free(vdo_forget(volume_index->zones));
+
+ uninitialize_volume_sub_index(&volume_index->vi_non_hook);
+ uninitialize_volume_sub_index(&volume_index->vi_hook);
+ vdo_free(volume_index);
+}
+
+
+static int compute_volume_sub_index_save_bytes(const struct uds_configuration *config,
+ size_t *bytes)
+{
+ struct sub_index_parameters params = { .address_bits = 0 };
+ int result;
+
+ result = compute_volume_sub_index_parameters(config, &params);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ *bytes = (sizeof(struct sub_index_data) + params.list_count * sizeof(u64) +
+ uds_compute_delta_index_save_bytes(params.list_count,
+ params.memory_size));
+ return UDS_SUCCESS;
+}
+
+/* This function is only useful if the configuration includes sparse chapters. */
+static void split_configuration(const struct uds_configuration *config,
+ struct split_config *split)
+{
+ u64 sample_rate, sample_records;
+ u64 dense_chapters, sparse_chapters;
+
+ /* Start with copies of the base configuration. */
+ split->hook_config = *config;
+ split->hook_geometry = *config->geometry;
+ split->hook_config.geometry = &split->hook_geometry;
+ split->non_hook_config = *config;
+ split->non_hook_geometry = *config->geometry;
+ split->non_hook_config.geometry = &split->non_hook_geometry;
+
+ sample_rate = config->sparse_sample_rate;
+ sparse_chapters = config->geometry->sparse_chapters_per_volume;
+ dense_chapters = config->geometry->chapters_per_volume - sparse_chapters;
+ sample_records = config->geometry->records_per_chapter / sample_rate;
+
+ /* Adjust the number of records indexed for each chapter. */
+ split->hook_geometry.records_per_chapter = sample_records;
+ split->non_hook_geometry.records_per_chapter -= sample_records;
+
+ /* Adjust the number of chapters indexed. */
+ split->hook_geometry.sparse_chapters_per_volume = 0;
+ split->non_hook_geometry.sparse_chapters_per_volume = 0;
+ split->non_hook_geometry.chapters_per_volume = dense_chapters;
+}
+
+static int compute_volume_index_save_bytes(const struct uds_configuration *config,
+ size_t *bytes)
+{
+ size_t hook_bytes, non_hook_bytes;
+ struct split_config split;
+ int result;
+
+ if (!uds_is_sparse_index_geometry(config->geometry))
+ return compute_volume_sub_index_save_bytes(config, bytes);
+
+ split_configuration(config, &split);
+ result = compute_volume_sub_index_save_bytes(&split.hook_config, &hook_bytes);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = compute_volume_sub_index_save_bytes(&split.non_hook_config,
+ &non_hook_bytes);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ *bytes = sizeof(struct volume_index_data) + hook_bytes + non_hook_bytes;
+ return UDS_SUCCESS;
+}
+
+int uds_compute_volume_index_save_blocks(const struct uds_configuration *config,
+ size_t block_size, u64 *block_count)
+{
+ size_t bytes;
+ int result;
+
+ result = compute_volume_index_save_bytes(config, &bytes);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ bytes += sizeof(struct delta_list_save_info);
+ *block_count = DIV_ROUND_UP(bytes, block_size) + MAX_ZONES;
+ return UDS_SUCCESS;
+}
+
+/* Flush invalid entries while walking the delta list. */
+static inline int flush_invalid_entries(struct volume_index_record *record,
+ struct chapter_range *flush_range,
+ u32 *next_chapter_to_invalidate)
+{
+ int result;
+
+ result = uds_next_delta_index_entry(&record->delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ while (!record->delta_entry.at_end) {
+ u32 index_chapter = uds_get_delta_entry_value(&record->delta_entry);
+ u32 relative_chapter = ((index_chapter - flush_range->chapter_start) &
+ record->sub_index->chapter_mask);
+
+ if (likely(relative_chapter >= flush_range->chapter_count)) {
+ if (relative_chapter < *next_chapter_to_invalidate)
+ *next_chapter_to_invalidate = relative_chapter;
+ break;
+ }
+
+ result = uds_remove_delta_index_entry(&record->delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ return UDS_SUCCESS;
+}
+
+/* Find the matching record, or the list offset where the record would go. */
+static int get_volume_index_entry(struct volume_index_record *record, u32 list_number,
+ u32 key, struct chapter_range *flush_range)
+{
+ struct volume_index_record other_record;
+ const struct volume_sub_index *sub_index = record->sub_index;
+ u32 next_chapter_to_invalidate = sub_index->chapter_mask;
+ int result;
+
+ result = uds_start_delta_index_search(&sub_index->delta_index, list_number, 0,
+ &record->delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ do {
+ result = flush_invalid_entries(record, flush_range,
+ &next_chapter_to_invalidate);
+ if (result != UDS_SUCCESS)
+ return result;
+ } while (!record->delta_entry.at_end && (key > record->delta_entry.key));
+
+ result = uds_remember_delta_index_offset(&record->delta_entry);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ /* Check any collision records for a more precise match. */
+ other_record = *record;
+ if (!other_record.delta_entry.at_end && (key == other_record.delta_entry.key)) {
+ for (;;) {
+ u8 collision_name[UDS_RECORD_NAME_SIZE];
+
+ result = flush_invalid_entries(&other_record, flush_range,
+ &next_chapter_to_invalidate);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (other_record.delta_entry.at_end ||
+ !other_record.delta_entry.is_collision)
+ break;
+
+ result = uds_get_delta_entry_collision(&other_record.delta_entry,
+ collision_name);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (memcmp(collision_name, record->name, UDS_RECORD_NAME_SIZE) == 0) {
+ *record = other_record;
+ break;
+ }
+ }
+ }
+ while (!other_record.delta_entry.at_end) {
+ result = flush_invalid_entries(&other_record, flush_range,
+ &next_chapter_to_invalidate);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+ next_chapter_to_invalidate += flush_range->chapter_start;
+ next_chapter_to_invalidate &= sub_index->chapter_mask;
+ flush_range->chapter_start = next_chapter_to_invalidate;
+ flush_range->chapter_count = 0;
+ return UDS_SUCCESS;
+}
+
+static int get_volume_sub_index_record(struct volume_sub_index *sub_index,
+ const struct uds_record_name *name,
+ struct volume_index_record *record)
+{
+ int result;
+ const struct volume_sub_index_zone *volume_index_zone;
+ u32 address = extract_address(sub_index, name);
+ u32 delta_list_number = extract_dlist_num(sub_index, name);
+ u64 flush_chapter = sub_index->flush_chapters[delta_list_number];
+
+ record->sub_index = sub_index;
+ record->mutex = NULL;
+ record->name = name;
+ record->zone_number = delta_list_number / sub_index->delta_index.lists_per_zone;
+ volume_index_zone = get_zone_for_record(record);
+
+ if (flush_chapter < volume_index_zone->virtual_chapter_low) {
+ struct chapter_range range;
+ u64 flush_count = volume_index_zone->virtual_chapter_low - flush_chapter;
+
+ range.chapter_start = convert_virtual_to_index(sub_index, flush_chapter);
+ range.chapter_count = (flush_count > sub_index->chapter_mask ?
+ sub_index->chapter_mask + 1 :
+ flush_count);
+ result = get_volume_index_entry(record, delta_list_number, address,
+ &range);
+ flush_chapter = convert_index_to_virtual(record, range.chapter_start);
+ if (flush_chapter > volume_index_zone->virtual_chapter_high)
+ flush_chapter = volume_index_zone->virtual_chapter_high;
+ sub_index->flush_chapters[delta_list_number] = flush_chapter;
+ } else {
+ result = uds_get_delta_index_entry(&sub_index->delta_index,
+ delta_list_number, address,
+ name->name, &record->delta_entry);
+ }
+
+ if (result != UDS_SUCCESS)
+ return result;
+
+ record->is_found =
+ (!record->delta_entry.at_end && (record->delta_entry.key == address));
+ if (record->is_found) {
+ u32 index_chapter = uds_get_delta_entry_value(&record->delta_entry);
+
+ record->virtual_chapter = convert_index_to_virtual(record, index_chapter);
+ }
+
+ record->is_collision = record->delta_entry.is_collision;
+ return UDS_SUCCESS;
+}
+
+int uds_get_volume_index_record(struct volume_index *volume_index,
+ const struct uds_record_name *name,
+ struct volume_index_record *record)
+{
+ int result;
+
+ if (uds_is_volume_index_sample(volume_index, name)) {
+ /*
+ * Other threads cannot be allowed to call uds_lookup_volume_index_name() while
+ * this thread is finding the volume index record. Due to the lazy LRU flushing of
+ * the volume index, uds_get_volume_index_record() is not a read-only operation.
+ */
+ unsigned int zone =
+ get_volume_sub_index_zone(&volume_index->vi_hook, name);
+ struct mutex *mutex = &volume_index->zones[zone].hook_mutex;
+
+ mutex_lock(mutex);
+ result = get_volume_sub_index_record(&volume_index->vi_hook, name,
+ record);
+ mutex_unlock(mutex);
+ /* Remember the mutex so that other operations on the index record can use it. */
+ record->mutex = mutex;
+ } else {
+ result = get_volume_sub_index_record(&volume_index->vi_non_hook, name,
+ record);
+ }
+
+ return result;
+}
+
+int uds_put_volume_index_record(struct volume_index_record *record, u64 virtual_chapter)
+{
+ int result;
+ u32 address;
+ const struct volume_sub_index *sub_index = record->sub_index;
+
+ if (!is_virtual_chapter_indexed(record, virtual_chapter)) {
+ u64 low = get_zone_for_record(record)->virtual_chapter_low;
+ u64 high = get_zone_for_record(record)->virtual_chapter_high;
+
+ return vdo_log_warning_strerror(UDS_INVALID_ARGUMENT,
+ "cannot put record into chapter number %llu that is out of the valid range %llu to %llu",
+ (unsigned long long) virtual_chapter,
+ (unsigned long long) low,
+ (unsigned long long) high);
+ }
+ address = extract_address(sub_index, record->name);
+ if (unlikely(record->mutex != NULL))
+ mutex_lock(record->mutex);
+ result = uds_put_delta_index_entry(&record->delta_entry, address,
+ convert_virtual_to_index(sub_index,
+ virtual_chapter),
+ record->is_found ? record->name->name : NULL);
+ if (unlikely(record->mutex != NULL))
+ mutex_unlock(record->mutex);
+ switch (result) {
+ case UDS_SUCCESS:
+ record->virtual_chapter = virtual_chapter;
+ record->is_collision = record->delta_entry.is_collision;
+ record->is_found = true;
+ break;
+ case UDS_OVERFLOW:
+ vdo_log_ratelimit(vdo_log_warning_strerror, UDS_OVERFLOW,
+ "Volume index entry dropped due to overflow condition");
+ uds_log_delta_index_entry(&record->delta_entry);
+ break;
+ default:
+ break;
+ }
+
+ return result;
+}
+
+int uds_remove_volume_index_record(struct volume_index_record *record)
+{
+ int result;
+
+ if (!record->is_found)
+ return vdo_log_warning_strerror(UDS_BAD_STATE,
+ "illegal operation on new record");
+
+ /* Mark the record so that it cannot be used again */
+ record->is_found = false;
+ if (unlikely(record->mutex != NULL))
+ mutex_lock(record->mutex);
+ result = uds_remove_delta_index_entry(&record->delta_entry);
+ if (unlikely(record->mutex != NULL))
+ mutex_unlock(record->mutex);
+ return result;
+}
+
+static void set_volume_sub_index_zone_open_chapter(struct volume_sub_index *sub_index,
+ unsigned int zone_number,
+ u64 virtual_chapter)
+{
+ u64 used_bits = 0;
+ struct volume_sub_index_zone *zone = &sub_index->zones[zone_number];
+ struct delta_zone *delta_zone;
+ u32 i;
+
+ zone->virtual_chapter_low = (virtual_chapter >= sub_index->chapter_count ?
+ virtual_chapter - sub_index->chapter_count + 1 :
+ 0);
+ zone->virtual_chapter_high = virtual_chapter;
+
+ /* Check to see if the new zone data is too large. */
+ delta_zone = &sub_index->delta_index.delta_zones[zone_number];
+ for (i = 1; i <= delta_zone->list_count; i++)
+ used_bits += delta_zone->delta_lists[i].size;
+
+ if (used_bits > sub_index->max_zone_bits) {
+ /* Expire enough chapters to free the desired space. */
+ u64 expire_count =
+ 1 + (used_bits - sub_index->max_zone_bits) / sub_index->chapter_zone_bits;
+
+ if (expire_count == 1) {
+ vdo_log_ratelimit(vdo_log_info,
+ "zone %u: At chapter %llu, expiring chapter %llu early",
+ zone_number,
+ (unsigned long long) virtual_chapter,
+ (unsigned long long) zone->virtual_chapter_low);
+ zone->early_flushes++;
+ zone->virtual_chapter_low++;
+ } else {
+ u64 first_expired = zone->virtual_chapter_low;
+
+ if (first_expired + expire_count < zone->virtual_chapter_high) {
+ zone->early_flushes += expire_count;
+ zone->virtual_chapter_low += expire_count;
+ } else {
+ zone->early_flushes +=
+ zone->virtual_chapter_high - zone->virtual_chapter_low;
+ zone->virtual_chapter_low = zone->virtual_chapter_high;
+ }
+ vdo_log_ratelimit(vdo_log_info,
+ "zone %u: At chapter %llu, expiring chapters %llu to %llu early",
+ zone_number,
+ (unsigned long long) virtual_chapter,
+ (unsigned long long) first_expired,
+ (unsigned long long) zone->virtual_chapter_low - 1);
+ }
+ }
+}
+
+void uds_set_volume_index_zone_open_chapter(struct volume_index *volume_index,
+ unsigned int zone_number,
+ u64 virtual_chapter)
+{
+ struct mutex *mutex = &volume_index->zones[zone_number].hook_mutex;
+
+ set_volume_sub_index_zone_open_chapter(&volume_index->vi_non_hook, zone_number,
+ virtual_chapter);
+
+ /*
+ * Other threads cannot be allowed to call uds_lookup_volume_index_name() while the open
+ * chapter number is changing.
+ */
+ if (has_sparse(volume_index)) {
+ mutex_lock(mutex);
+ set_volume_sub_index_zone_open_chapter(&volume_index->vi_hook,
+ zone_number, virtual_chapter);
+ mutex_unlock(mutex);
+ }
+}
+
+/*
+ * Set the newest open chapter number for the index, while also advancing the oldest valid chapter
+ * number.
+ */
+void uds_set_volume_index_open_chapter(struct volume_index *volume_index,
+ u64 virtual_chapter)
+{
+ unsigned int zone;
+
+ for (zone = 0; zone < volume_index->zone_count; zone++)
+ uds_set_volume_index_zone_open_chapter(volume_index, zone, virtual_chapter);
+}
+
+int uds_set_volume_index_record_chapter(struct volume_index_record *record,
+ u64 virtual_chapter)
+{
+ const struct volume_sub_index *sub_index = record->sub_index;
+ int result;
+
+ if (!record->is_found)
+ return vdo_log_warning_strerror(UDS_BAD_STATE,
+ "illegal operation on new record");
+
+ if (!is_virtual_chapter_indexed(record, virtual_chapter)) {
+ u64 low = get_zone_for_record(record)->virtual_chapter_low;
+ u64 high = get_zone_for_record(record)->virtual_chapter_high;
+
+ return vdo_log_warning_strerror(UDS_INVALID_ARGUMENT,
+ "cannot set chapter number %llu that is out of the valid range %llu to %llu",
+ (unsigned long long) virtual_chapter,
+ (unsigned long long) low,
+ (unsigned long long) high);
+ }
+
+ if (unlikely(record->mutex != NULL))
+ mutex_lock(record->mutex);
+ result = uds_set_delta_entry_value(&record->delta_entry,
+ convert_virtual_to_index(sub_index,
+ virtual_chapter));
+ if (unlikely(record->mutex != NULL))
+ mutex_unlock(record->mutex);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ record->virtual_chapter = virtual_chapter;
+ return UDS_SUCCESS;
+}
+
+static u64 lookup_volume_sub_index_name(const struct volume_sub_index *sub_index,
+ const struct uds_record_name *name)
+{
+ int result;
+ u32 address = extract_address(sub_index, name);
+ u32 delta_list_number = extract_dlist_num(sub_index, name);
+ unsigned int zone_number = get_volume_sub_index_zone(sub_index, name);
+ const struct volume_sub_index_zone *zone = &sub_index->zones[zone_number];
+ u64 virtual_chapter;
+ u32 index_chapter;
+ u32 rolling_chapter;
+ struct delta_index_entry delta_entry;
+
+ result = uds_get_delta_index_entry(&sub_index->delta_index, delta_list_number,
+ address, name->name, &delta_entry);
+ if (result != UDS_SUCCESS)
+ return NO_CHAPTER;
+
+ if (delta_entry.at_end || (delta_entry.key != address))
+ return NO_CHAPTER;
+
+ index_chapter = uds_get_delta_entry_value(&delta_entry);
+ rolling_chapter = (index_chapter - zone->virtual_chapter_low) & sub_index->chapter_mask;
+
+ virtual_chapter = zone->virtual_chapter_low + rolling_chapter;
+ if (virtual_chapter > zone->virtual_chapter_high)
+ return NO_CHAPTER;
+
+ return virtual_chapter;
+}
+
+/* Do a read-only lookup of the record name for sparse cache management. */
+u64 uds_lookup_volume_index_name(const struct volume_index *volume_index,
+ const struct uds_record_name *name)
+{
+ unsigned int zone_number = uds_get_volume_index_zone(volume_index, name);
+ struct mutex *mutex = &volume_index->zones[zone_number].hook_mutex;
+ u64 virtual_chapter;
+
+ if (!uds_is_volume_index_sample(volume_index, name))
+ return NO_CHAPTER;
+
+ mutex_lock(mutex);
+ virtual_chapter = lookup_volume_sub_index_name(&volume_index->vi_hook, name);
+ mutex_unlock(mutex);
+
+ return virtual_chapter;
+}
+
+static void abort_restoring_volume_sub_index(struct volume_sub_index *sub_index)
+{
+ uds_reset_delta_index(&sub_index->delta_index);
+}
+
+static void abort_restoring_volume_index(struct volume_index *volume_index)
+{
+ abort_restoring_volume_sub_index(&volume_index->vi_non_hook);
+ if (has_sparse(volume_index))
+ abort_restoring_volume_sub_index(&volume_index->vi_hook);
+}
+
+static int start_restoring_volume_sub_index(struct volume_sub_index *sub_index,
+ struct buffered_reader **readers,
+ unsigned int reader_count)
+{
+ unsigned int z;
+ int result;
+ u64 virtual_chapter_low = 0, virtual_chapter_high = 0;
+ unsigned int i;
+
+ for (i = 0; i < reader_count; i++) {
+ struct sub_index_data header;
+ u8 buffer[sizeof(struct sub_index_data)];
+ size_t offset = 0;
+ u32 j;
+
+ result = uds_read_from_buffered_reader(readers[i], buffer,
+ sizeof(buffer));
+ if (result != UDS_SUCCESS) {
+ return vdo_log_warning_strerror(result,
+ "failed to read volume index header");
+ }
+
+ memcpy(&header.magic, buffer, MAGIC_SIZE);
+ offset += MAGIC_SIZE;
+ decode_u64_le(buffer, &offset, &header.volume_nonce);
+ decode_u64_le(buffer, &offset, &header.virtual_chapter_low);
+ decode_u64_le(buffer, &offset, &header.virtual_chapter_high);
+ decode_u32_le(buffer, &offset, &header.first_list);
+ decode_u32_le(buffer, &offset, &header.list_count);
+
+ result = VDO_ASSERT(offset == sizeof(buffer),
+ "%zu bytes decoded of %zu expected", offset,
+ sizeof(buffer));
+ if (result != VDO_SUCCESS)
+ result = UDS_CORRUPT_DATA;
+
+ if (memcmp(header.magic, MAGIC_START_5, MAGIC_SIZE) != 0) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "volume index file had bad magic number");
+ }
+
+ if (sub_index->volume_nonce == 0) {
+ sub_index->volume_nonce = header.volume_nonce;
+ } else if (header.volume_nonce != sub_index->volume_nonce) {
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "volume index volume nonce incorrect");
+ }
+
+ if (i == 0) {
+ virtual_chapter_low = header.virtual_chapter_low;
+ virtual_chapter_high = header.virtual_chapter_high;
+ } else if (virtual_chapter_high != header.virtual_chapter_high) {
+ u64 low = header.virtual_chapter_low;
+ u64 high = header.virtual_chapter_high;
+
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "Inconsistent volume index zone files: Chapter range is [%llu,%llu], chapter range %d is [%llu,%llu]",
+ (unsigned long long) virtual_chapter_low,
+ (unsigned long long) virtual_chapter_high,
+ i, (unsigned long long) low,
+ (unsigned long long) high);
+ } else if (virtual_chapter_low < header.virtual_chapter_low) {
+ virtual_chapter_low = header.virtual_chapter_low;
+ }
+
+ for (j = 0; j < header.list_count; j++) {
+ u8 decoded[sizeof(u64)];
+
+ result = uds_read_from_buffered_reader(readers[i], decoded,
+ sizeof(u64));
+ if (result != UDS_SUCCESS) {
+ return vdo_log_warning_strerror(result,
+ "failed to read volume index flush ranges");
+ }
+
+ sub_index->flush_chapters[header.first_list + j] =
+ get_unaligned_le64(decoded);
+ }
+ }
+
+ for (z = 0; z < sub_index->zone_count; z++) {
+ memset(&sub_index->zones[z], 0, sizeof(struct volume_sub_index_zone));
+ sub_index->zones[z].virtual_chapter_low = virtual_chapter_low;
+ sub_index->zones[z].virtual_chapter_high = virtual_chapter_high;
+ }
+
+ result = uds_start_restoring_delta_index(&sub_index->delta_index, readers,
+ reader_count);
+ if (result != UDS_SUCCESS)
+ return vdo_log_warning_strerror(result, "restoring delta index failed");
+
+ return UDS_SUCCESS;
+}
+
+static int start_restoring_volume_index(struct volume_index *volume_index,
+ struct buffered_reader **buffered_readers,
+ unsigned int reader_count)
+{
+ unsigned int i;
+ int result;
+
+ if (!has_sparse(volume_index)) {
+ return start_restoring_volume_sub_index(&volume_index->vi_non_hook,
+ buffered_readers, reader_count);
+ }
+
+ for (i = 0; i < reader_count; i++) {
+ struct volume_index_data header;
+ u8 buffer[sizeof(struct volume_index_data)];
+ size_t offset = 0;
+
+ result = uds_read_from_buffered_reader(buffered_readers[i], buffer,
+ sizeof(buffer));
+ if (result != UDS_SUCCESS) {
+ return vdo_log_warning_strerror(result,
+ "failed to read volume index header");
+ }
+
+ memcpy(&header.magic, buffer, MAGIC_SIZE);
+ offset += MAGIC_SIZE;
+ decode_u32_le(buffer, &offset, &header.sparse_sample_rate);
+
+ result = VDO_ASSERT(offset == sizeof(buffer),
+ "%zu bytes decoded of %zu expected", offset,
+ sizeof(buffer));
+ if (result != VDO_SUCCESS)
+ result = UDS_CORRUPT_DATA;
+
+ if (memcmp(header.magic, MAGIC_START_6, MAGIC_SIZE) != 0)
+ return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "volume index file had bad magic number");
+
+ if (i == 0) {
+ volume_index->sparse_sample_rate = header.sparse_sample_rate;
+ } else if (volume_index->sparse_sample_rate != header.sparse_sample_rate) {
+ vdo_log_warning_strerror(UDS_CORRUPT_DATA,
+ "Inconsistent sparse sample rate in delta index zone files: %u vs. %u",
+ volume_index->sparse_sample_rate,
+ header.sparse_sample_rate);
+ return UDS_CORRUPT_DATA;
+ }
+ }
+
+ result = start_restoring_volume_sub_index(&volume_index->vi_non_hook,
+ buffered_readers, reader_count);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ return start_restoring_volume_sub_index(&volume_index->vi_hook, buffered_readers,
+ reader_count);
+}
+
+static int finish_restoring_volume_sub_index(struct volume_sub_index *sub_index,
+ struct buffered_reader **buffered_readers,
+ unsigned int reader_count)
+{
+ return uds_finish_restoring_delta_index(&sub_index->delta_index,
+ buffered_readers, reader_count);
+}
+
+static int finish_restoring_volume_index(struct volume_index *volume_index,
+ struct buffered_reader **buffered_readers,
+ unsigned int reader_count)
+{
+ int result;
+
+ result = finish_restoring_volume_sub_index(&volume_index->vi_non_hook,
+ buffered_readers, reader_count);
+ if ((result == UDS_SUCCESS) && has_sparse(volume_index)) {
+ result = finish_restoring_volume_sub_index(&volume_index->vi_hook,
+ buffered_readers,
+ reader_count);
+ }
+
+ return result;
+}
+
+int uds_load_volume_index(struct volume_index *volume_index,
+ struct buffered_reader **readers, unsigned int reader_count)
+{
+ int result;
+
+ /* Start by reading the header section of the stream. */
+ result = start_restoring_volume_index(volume_index, readers, reader_count);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = finish_restoring_volume_index(volume_index, readers, reader_count);
+ if (result != UDS_SUCCESS) {
+ abort_restoring_volume_index(volume_index);
+ return result;
+ }
+
+ /* Check the final guard lists to make sure there is no extra data. */
+ result = uds_check_guard_delta_lists(readers, reader_count);
+ if (result != UDS_SUCCESS)
+ abort_restoring_volume_index(volume_index);
+
+ return result;
+}
+
+static int start_saving_volume_sub_index(const struct volume_sub_index *sub_index,
+ unsigned int zone_number,
+ struct buffered_writer *buffered_writer)
+{
+ int result;
+ struct volume_sub_index_zone *volume_index_zone = &sub_index->zones[zone_number];
+ u32 first_list = sub_index->delta_index.delta_zones[zone_number].first_list;
+ u32 list_count = sub_index->delta_index.delta_zones[zone_number].list_count;
+ u8 buffer[sizeof(struct sub_index_data)];
+ size_t offset = 0;
+ u32 i;
+
+ memcpy(buffer, MAGIC_START_5, MAGIC_SIZE);
+ offset += MAGIC_SIZE;
+ encode_u64_le(buffer, &offset, sub_index->volume_nonce);
+ encode_u64_le(buffer, &offset, volume_index_zone->virtual_chapter_low);
+ encode_u64_le(buffer, &offset, volume_index_zone->virtual_chapter_high);
+ encode_u32_le(buffer, &offset, first_list);
+ encode_u32_le(buffer, &offset, list_count);
+
+ result = VDO_ASSERT(offset == sizeof(struct sub_index_data),
+ "%zu bytes of config written, of %zu expected", offset,
+ sizeof(struct sub_index_data));
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = uds_write_to_buffered_writer(buffered_writer, buffer, offset);
+ if (result != UDS_SUCCESS)
+ return vdo_log_warning_strerror(result,
+ "failed to write volume index header");
+
+ for (i = 0; i < list_count; i++) {
+ u8 encoded[sizeof(u64)];
+
+ put_unaligned_le64(sub_index->flush_chapters[first_list + i], &encoded);
+ result = uds_write_to_buffered_writer(buffered_writer, encoded,
+ sizeof(u64));
+ if (result != UDS_SUCCESS) {
+ return vdo_log_warning_strerror(result,
+ "failed to write volume index flush ranges");
+ }
+ }
+
+ return uds_start_saving_delta_index(&sub_index->delta_index, zone_number,
+ buffered_writer);
+}
+
+static int start_saving_volume_index(const struct volume_index *volume_index,
+ unsigned int zone_number,
+ struct buffered_writer *writer)
+{
+ u8 buffer[sizeof(struct volume_index_data)];
+ size_t offset = 0;
+ int result;
+
+ if (!has_sparse(volume_index)) {
+ return start_saving_volume_sub_index(&volume_index->vi_non_hook,
+ zone_number, writer);
+ }
+
+ memcpy(buffer, MAGIC_START_6, MAGIC_SIZE);
+ offset += MAGIC_SIZE;
+ encode_u32_le(buffer, &offset, volume_index->sparse_sample_rate);
+ result = VDO_ASSERT(offset == sizeof(struct volume_index_data),
+ "%zu bytes of header written, of %zu expected", offset,
+ sizeof(struct volume_index_data));
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = uds_write_to_buffered_writer(writer, buffer, offset);
+ if (result != UDS_SUCCESS) {
+ vdo_log_warning_strerror(result, "failed to write volume index header");
+ return result;
+ }
+
+ result = start_saving_volume_sub_index(&volume_index->vi_non_hook, zone_number,
+ writer);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ return start_saving_volume_sub_index(&volume_index->vi_hook, zone_number,
+ writer);
+}
+
+static int finish_saving_volume_sub_index(const struct volume_sub_index *sub_index,
+ unsigned int zone_number)
+{
+ return uds_finish_saving_delta_index(&sub_index->delta_index, zone_number);
+}
+
+static int finish_saving_volume_index(const struct volume_index *volume_index,
+ unsigned int zone_number)
+{
+ int result;
+
+ result = finish_saving_volume_sub_index(&volume_index->vi_non_hook, zone_number);
+ if ((result == UDS_SUCCESS) && has_sparse(volume_index))
+ result = finish_saving_volume_sub_index(&volume_index->vi_hook, zone_number);
+ return result;
+}
+
+int uds_save_volume_index(struct volume_index *volume_index,
+ struct buffered_writer **writers, unsigned int writer_count)
+{
+ int result = UDS_SUCCESS;
+ unsigned int zone;
+
+ for (zone = 0; zone < writer_count; zone++) {
+ result = start_saving_volume_index(volume_index, zone, writers[zone]);
+ if (result != UDS_SUCCESS)
+ break;
+
+ result = finish_saving_volume_index(volume_index, zone);
+ if (result != UDS_SUCCESS)
+ break;
+
+ result = uds_write_guard_delta_list(writers[zone]);
+ if (result != UDS_SUCCESS)
+ break;
+
+ result = uds_flush_buffered_writer(writers[zone]);
+ if (result != UDS_SUCCESS)
+ break;
+ }
+
+ return result;
+}
+
+static void get_volume_sub_index_stats(const struct volume_sub_index *sub_index,
+ struct volume_index_stats *stats)
+{
+ struct delta_index_stats dis;
+ unsigned int z;
+
+ uds_get_delta_index_stats(&sub_index->delta_index, &dis);
+ stats->rebalance_time = dis.rebalance_time;
+ stats->rebalance_count = dis.rebalance_count;
+ stats->record_count = dis.record_count;
+ stats->collision_count = dis.collision_count;
+ stats->discard_count = dis.discard_count;
+ stats->overflow_count = dis.overflow_count;
+ stats->delta_lists = dis.list_count;
+ stats->early_flushes = 0;
+ for (z = 0; z < sub_index->zone_count; z++)
+ stats->early_flushes += sub_index->zones[z].early_flushes;
+}
+
+void uds_get_volume_index_stats(const struct volume_index *volume_index,
+ struct volume_index_stats *stats)
+{
+ struct volume_index_stats sparse_stats;
+
+ get_volume_sub_index_stats(&volume_index->vi_non_hook, stats);
+ if (!has_sparse(volume_index))
+ return;
+
+ get_volume_sub_index_stats(&volume_index->vi_hook, &sparse_stats);
+ stats->rebalance_time += sparse_stats.rebalance_time;
+ stats->rebalance_count += sparse_stats.rebalance_count;
+ stats->record_count += sparse_stats.record_count;
+ stats->collision_count += sparse_stats.collision_count;
+ stats->discard_count += sparse_stats.discard_count;
+ stats->overflow_count += sparse_stats.overflow_count;
+ stats->delta_lists += sparse_stats.delta_lists;
+ stats->early_flushes += sparse_stats.early_flushes;
+}
+
+static int initialize_volume_sub_index(const struct uds_configuration *config,
+ u64 volume_nonce, u8 tag,
+ struct volume_sub_index *sub_index)
+{
+ struct sub_index_parameters params = { .address_bits = 0 };
+ unsigned int zone_count = config->zone_count;
+ u64 available_bytes = 0;
+ unsigned int z;
+ int result;
+
+ result = compute_volume_sub_index_parameters(config, &params);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ sub_index->address_bits = params.address_bits;
+ sub_index->address_mask = (1u << params.address_bits) - 1;
+ sub_index->chapter_bits = params.chapter_bits;
+ sub_index->chapter_mask = (1u << params.chapter_bits) - 1;
+ sub_index->chapter_count = params.chapter_count;
+ sub_index->list_count = params.list_count;
+ sub_index->zone_count = zone_count;
+ sub_index->chapter_zone_bits = params.chapter_size_in_bits / zone_count;
+ sub_index->volume_nonce = volume_nonce;
+
+ result = uds_initialize_delta_index(&sub_index->delta_index, zone_count,
+ params.list_count, params.mean_delta,
+ params.chapter_bits, params.memory_size,
+ tag);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ for (z = 0; z < sub_index->delta_index.zone_count; z++)
+ available_bytes += sub_index->delta_index.delta_zones[z].size;
+ available_bytes -= params.target_free_bytes;
+ sub_index->max_zone_bits = (available_bytes * BITS_PER_BYTE) / zone_count;
+ sub_index->memory_size = (sub_index->delta_index.memory_size +
+ sizeof(struct volume_sub_index) +
+ (params.list_count * sizeof(u64)) +
+ (zone_count * sizeof(struct volume_sub_index_zone)));
+
+ /* The following arrays are initialized to all zeros. */
+ result = vdo_allocate(params.list_count, u64, "first chapter to flush",
+ &sub_index->flush_chapters);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ return vdo_allocate(zone_count, struct volume_sub_index_zone,
+ "volume index zones", &sub_index->zones);
+}
+
+int uds_make_volume_index(const struct uds_configuration *config, u64 volume_nonce,
+ struct volume_index **volume_index_ptr)
+{
+ struct split_config split;
+ unsigned int zone;
+ struct volume_index *volume_index;
+ int result;
+
+ result = vdo_allocate(1, struct volume_index, "volume index", &volume_index);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ volume_index->zone_count = config->zone_count;
+
+ if (!uds_is_sparse_index_geometry(config->geometry)) {
+ result = initialize_volume_sub_index(config, volume_nonce, 'm',
+ &volume_index->vi_non_hook);
+ if (result != UDS_SUCCESS) {
+ uds_free_volume_index(volume_index);
+ return result;
+ }
+
+ volume_index->memory_size = volume_index->vi_non_hook.memory_size;
+ *volume_index_ptr = volume_index;
+ return UDS_SUCCESS;
+ }
+
+ volume_index->sparse_sample_rate = config->sparse_sample_rate;
+
+ result = vdo_allocate(config->zone_count, struct volume_index_zone,
+ "volume index zones", &volume_index->zones);
+ if (result != VDO_SUCCESS) {
+ uds_free_volume_index(volume_index);
+ return result;
+ }
+
+ for (zone = 0; zone < config->zone_count; zone++)
+ mutex_init(&volume_index->zones[zone].hook_mutex);
+
+ split_configuration(config, &split);
+ result = initialize_volume_sub_index(&split.non_hook_config, volume_nonce, 'd',
+ &volume_index->vi_non_hook);
+ if (result != UDS_SUCCESS) {
+ uds_free_volume_index(volume_index);
+ return vdo_log_error_strerror(result,
+ "Error creating non hook volume index");
+ }
+
+ result = initialize_volume_sub_index(&split.hook_config, volume_nonce, 's',
+ &volume_index->vi_hook);
+ if (result != UDS_SUCCESS) {
+ uds_free_volume_index(volume_index);
+ return vdo_log_error_strerror(result,
+ "Error creating hook volume index");
+ }
+
+ volume_index->memory_size =
+ volume_index->vi_non_hook.memory_size + volume_index->vi_hook.memory_size;
+ *volume_index_ptr = volume_index;
+ return UDS_SUCCESS;
+}
diff --git a/drivers/md/dm-vdo/indexer/volume-index.h b/drivers/md/dm-vdo/indexer/volume-index.h
new file mode 100644
index 000000000000..583998c547b7
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/volume-index.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_VOLUME_INDEX_H
+#define UDS_VOLUME_INDEX_H
+
+#include <linux/limits.h>
+
+#include "thread-utils.h"
+
+#include "config.h"
+#include "delta-index.h"
+#include "indexer.h"
+
+/*
+ * The volume index is the primary top-level index for UDS. It contains records which map a record
+ * name to the chapter where a record with that name is stored. This mapping can definitively say
+ * when no record exists. However, because we only use a subset of the name for this index, it
+ * cannot definitively say that a record for the entry does exist. It can only say that if a record
+ * exists, it will be in a particular chapter. The request can then be dispatched to that chapter
+ * for further processing.
+ *
+ * If the volume_index_record does not actually match the record name, the index can store a more
+ * specific collision record to disambiguate the new entry from the existing one. Index entries are
+ * managed with volume_index_record structures.
+ */
+
+#define NO_CHAPTER U64_MAX
+
+struct volume_index_stats {
+ /* Nanoseconds spent rebalancing */
+ ktime_t rebalance_time;
+ /* Number of memory rebalances */
+ u32 rebalance_count;
+ /* The number of records in the index */
+ u64 record_count;
+ /* The number of collision records */
+ u64 collision_count;
+ /* The number of records removed */
+ u64 discard_count;
+ /* The number of UDS_OVERFLOWs detected */
+ u64 overflow_count;
+ /* The number of delta lists */
+ u32 delta_lists;
+ /* Number of early flushes */
+ u64 early_flushes;
+};
+
+struct volume_sub_index_zone {
+ u64 virtual_chapter_low;
+ u64 virtual_chapter_high;
+ u64 early_flushes;
+} __aligned(L1_CACHE_BYTES);
+
+struct volume_sub_index {
+ /* The delta index */
+ struct delta_index delta_index;
+ /* The first chapter to be flushed in each zone */
+ u64 *flush_chapters;
+ /* The zones */
+ struct volume_sub_index_zone *zones;
+ /* The volume nonce */
+ u64 volume_nonce;
+ /* Expected size of a chapter (per zone) */
+ u64 chapter_zone_bits;
+ /* Maximum size of the index (per zone) */
+ u64 max_zone_bits;
+ /* The number of bits in address mask */
+ u8 address_bits;
+ /* Mask to get address within delta list */
+ u32 address_mask;
+ /* The number of bits in chapter number */
+ u8 chapter_bits;
+ /* The largest storable chapter number */
+ u32 chapter_mask;
+ /* The number of chapters used */
+ u32 chapter_count;
+ /* The number of delta lists */
+ u32 list_count;
+ /* The number of zones */
+ unsigned int zone_count;
+ /* The amount of memory allocated */
+ u64 memory_size;
+};
+
+struct volume_index_zone {
+ /* Protects the sampled index in this zone */
+ struct mutex hook_mutex;
+} __aligned(L1_CACHE_BYTES);
+
+struct volume_index {
+ u32 sparse_sample_rate;
+ unsigned int zone_count;
+ u64 memory_size;
+ struct volume_sub_index vi_non_hook;
+ struct volume_sub_index vi_hook;
+ struct volume_index_zone *zones;
+};
+
+/*
+ * The volume_index_record structure is used to facilitate processing of a record name. A client
+ * first calls uds_get_volume_index_record() to find the volume index record for a record name. The
+ * fields of the record can then be examined to determine the state of the record.
+ *
+ * If is_found is false, then the index did not find an entry for the record name. Calling
+ * uds_put_volume_index_record() will insert a new entry for that name at the proper place.
+ *
+ * If is_found is true, then we did find an entry for the record name, and the virtual_chapter and
+ * is_collision fields reflect the entry found. Subsequently, a call to
+ * uds_remove_volume_index_record() will remove the entry, a call to
+ * uds_set_volume_index_record_chapter() will update the existing entry, and a call to
+ * uds_put_volume_index_record() will insert a new collision record after the existing entry.
+ */
+struct volume_index_record {
+ /* Public fields */
+
+ /* Chapter where the record info is found */
+ u64 virtual_chapter;
+ /* This record is a collision */
+ bool is_collision;
+ /* This record is the requested record */
+ bool is_found;
+
+ /* Private fields */
+
+ /* Zone that contains this name */
+ unsigned int zone_number;
+ /* The volume index */
+ struct volume_sub_index *sub_index;
+ /* Mutex for accessing this delta index entry in the hook index */
+ struct mutex *mutex;
+ /* The record name to which this record refers */
+ const struct uds_record_name *name;
+ /* The delta index entry for this record */
+ struct delta_index_entry delta_entry;
+};
+
+int __must_check uds_make_volume_index(const struct uds_configuration *config,
+ u64 volume_nonce,
+ struct volume_index **volume_index);
+
+void uds_free_volume_index(struct volume_index *volume_index);
+
+int __must_check uds_compute_volume_index_save_blocks(const struct uds_configuration *config,
+ size_t block_size,
+ u64 *block_count);
+
+unsigned int __must_check uds_get_volume_index_zone(const struct volume_index *volume_index,
+ const struct uds_record_name *name);
+
+bool __must_check uds_is_volume_index_sample(const struct volume_index *volume_index,
+ const struct uds_record_name *name);
+
+/*
+ * This function is only used to manage sparse cache membership. Most requests should use
+ * uds_get_volume_index_record() to look up index records instead.
+ */
+u64 __must_check uds_lookup_volume_index_name(const struct volume_index *volume_index,
+ const struct uds_record_name *name);
+
+int __must_check uds_get_volume_index_record(struct volume_index *volume_index,
+ const struct uds_record_name *name,
+ struct volume_index_record *record);
+
+int __must_check uds_put_volume_index_record(struct volume_index_record *record,
+ u64 virtual_chapter);
+
+int __must_check uds_remove_volume_index_record(struct volume_index_record *record);
+
+int __must_check uds_set_volume_index_record_chapter(struct volume_index_record *record,
+ u64 virtual_chapter);
+
+void uds_set_volume_index_open_chapter(struct volume_index *volume_index,
+ u64 virtual_chapter);
+
+void uds_set_volume_index_zone_open_chapter(struct volume_index *volume_index,
+ unsigned int zone_number,
+ u64 virtual_chapter);
+
+int __must_check uds_load_volume_index(struct volume_index *volume_index,
+ struct buffered_reader **readers,
+ unsigned int reader_count);
+
+int __must_check uds_save_volume_index(struct volume_index *volume_index,
+ struct buffered_writer **writers,
+ unsigned int writer_count);
+
+void uds_get_volume_index_stats(const struct volume_index *volume_index,
+ struct volume_index_stats *stats);
+
+#endif /* UDS_VOLUME_INDEX_H */
diff --git a/drivers/md/dm-vdo/indexer/volume.c b/drivers/md/dm-vdo/indexer/volume.c
new file mode 100644
index 000000000000..655453bb276b
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/volume.c
@@ -0,0 +1,1693 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "volume.h"
+
+#include <linux/atomic.h>
+#include <linux/dm-bufio.h>
+#include <linux/err.h>
+
+#include "errors.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+#include "string-utils.h"
+#include "thread-utils.h"
+
+#include "chapter-index.h"
+#include "config.h"
+#include "geometry.h"
+#include "hash-utils.h"
+#include "index.h"
+#include "sparse-cache.h"
+
+/*
+ * The first block of the volume layout is reserved for the volume header, which is no longer used.
+ * The remainder of the volume is divided into chapters consisting of several pages of records, and
+ * several pages of static index to use to find those records. The index pages are recorded first,
+ * followed by the record pages. The chapters are written in order as they are filled, so the
+ * volume storage acts as a circular log of the most recent chapters, with each new chapter
+ * overwriting the oldest saved one.
+ *
+ * When a new chapter is filled and closed, the records from that chapter are sorted and
+ * interleaved in approximate temporal order, and assigned to record pages. Then a static delta
+ * index is generated to store which record page contains each record. The in-memory index page map
+ * is also updated to indicate which delta lists fall on each chapter index page. This means that
+ * when a record is read, the volume only has to load a single index page and a single record page,
+ * rather than search the entire chapter. These index and record pages are written to storage, and
+ * the index pages are transferred to the page cache under the theory that the most recently
+ * written chapter is likely to be accessed again soon.
+ *
+ * When reading a record, the volume index will indicate which chapter should contain it. The
+ * volume uses the index page map to determine which chapter index page needs to be loaded, and
+ * then reads the relevant record page number from the chapter index. Both index and record pages
+ * are stored in a page cache when read for the common case that subsequent records need the same
+ * pages. The page cache evicts the least recently accessed entries when caching new pages. In
+ * addition, the volume uses dm-bufio to manage access to the storage, which may allow for
+ * additional caching depending on available system resources.
+ *
+ * Record requests are handled from cached pages when possible. If a page needs to be read, it is
+ * placed on a queue along with the request that wants to read it. Any requests for the same page
+ * that arrive while the read is pending are added to the queue entry. A separate reader thread
+ * handles the queued reads, adding the page to the cache and updating any requests queued with it
+ * so they can continue processing. This allows the index zone threads to continue processing new
+ * requests rather than wait for the storage reads.
+ *
+ * When an index rebuild is necessary, the volume reads each stored chapter to determine which
+ * range of chapters contain valid records, so that those records can be used to reconstruct the
+ * in-memory volume index.
+ */
+
+/* The maximum allowable number of contiguous bad chapters */
+#define MAX_BAD_CHAPTERS 100
+#define VOLUME_CACHE_MAX_ENTRIES (U16_MAX >> 1)
+#define VOLUME_CACHE_QUEUED_FLAG (1 << 15)
+#define VOLUME_CACHE_MAX_QUEUED_READS 4096
+
+static const u64 BAD_CHAPTER = U64_MAX;
+
+/*
+ * The invalidate counter is two 32 bits fields stored together atomically. The low order 32 bits
+ * are the physical page number of the cached page being read. The high order 32 bits are a
+ * sequence number. This value is written when the zone that owns it begins or completes a cache
+ * search. Any other thread will only read the counter in wait_for_pending_searches() while waiting
+ * to update the cache contents.
+ */
+union invalidate_counter {
+ u64 value;
+ struct {
+ u32 page;
+ u32 counter;
+ };
+};
+
+static inline u32 map_to_page_number(struct index_geometry *geometry, u32 physical_page)
+{
+ return (physical_page - HEADER_PAGES_PER_VOLUME) % geometry->pages_per_chapter;
+}
+
+static inline u32 map_to_chapter_number(struct index_geometry *geometry, u32 physical_page)
+{
+ return (physical_page - HEADER_PAGES_PER_VOLUME) / geometry->pages_per_chapter;
+}
+
+static inline bool is_record_page(struct index_geometry *geometry, u32 physical_page)
+{
+ return map_to_page_number(geometry, physical_page) >= geometry->index_pages_per_chapter;
+}
+
+static u32 map_to_physical_page(const struct index_geometry *geometry, u32 chapter, u32 page)
+{
+ /* Page zero is the header page, so the first chapter index page is page one. */
+ return HEADER_PAGES_PER_VOLUME + (geometry->pages_per_chapter * chapter) + page;
+}
+
+static inline union invalidate_counter get_invalidate_counter(struct page_cache *cache,
+ unsigned int zone_number)
+{
+ return (union invalidate_counter) {
+ .value = READ_ONCE(cache->search_pending_counters[zone_number].atomic_value),
+ };
+}
+
+static inline void set_invalidate_counter(struct page_cache *cache,
+ unsigned int zone_number,
+ union invalidate_counter invalidate_counter)
+{
+ WRITE_ONCE(cache->search_pending_counters[zone_number].atomic_value,
+ invalidate_counter.value);
+}
+
+static inline bool search_pending(union invalidate_counter invalidate_counter)
+{
+ return (invalidate_counter.counter & 1) != 0;
+}
+
+/* Lock the cache for a zone in order to search for a page. */
+static void begin_pending_search(struct page_cache *cache, u32 physical_page,
+ unsigned int zone_number)
+{
+ union invalidate_counter invalidate_counter =
+ get_invalidate_counter(cache, zone_number);
+
+ invalidate_counter.page = physical_page;
+ invalidate_counter.counter++;
+ set_invalidate_counter(cache, zone_number, invalidate_counter);
+ VDO_ASSERT_LOG_ONLY(search_pending(invalidate_counter),
+ "Search is pending for zone %u", zone_number);
+ /*
+ * This memory barrier ensures that the write to the invalidate counter is seen by other
+ * threads before this thread accesses the cached page. The corresponding read memory
+ * barrier is in wait_for_pending_searches().
+ */
+ smp_mb();
+}
+
+/* Unlock the cache for a zone by clearing its invalidate counter. */
+static void end_pending_search(struct page_cache *cache, unsigned int zone_number)
+{
+ union invalidate_counter invalidate_counter;
+
+ /*
+ * This memory barrier ensures that this thread completes reads of the
+ * cached page before other threads see the write to the invalidate
+ * counter.
+ */
+ smp_mb();
+
+ invalidate_counter = get_invalidate_counter(cache, zone_number);
+ VDO_ASSERT_LOG_ONLY(search_pending(invalidate_counter),
+ "Search is pending for zone %u", zone_number);
+ invalidate_counter.counter++;
+ set_invalidate_counter(cache, zone_number, invalidate_counter);
+}
+
+static void wait_for_pending_searches(struct page_cache *cache, u32 physical_page)
+{
+ union invalidate_counter initial_counters[MAX_ZONES];
+ unsigned int i;
+
+ /*
+ * We hold the read_threads_mutex. We are waiting for threads that do not hold the
+ * read_threads_mutex. Those threads have "locked" their targeted page by setting the
+ * search_pending_counter. The corresponding write memory barrier is in
+ * begin_pending_search().
+ */
+ smp_mb();
+
+ for (i = 0; i < cache->zone_count; i++)
+ initial_counters[i] = get_invalidate_counter(cache, i);
+ for (i = 0; i < cache->zone_count; i++) {
+ if (search_pending(initial_counters[i]) &&
+ (initial_counters[i].page == physical_page)) {
+ /*
+ * There is an active search using the physical page. We need to wait for
+ * the search to finish.
+ *
+ * FIXME: Investigate using wait_event() to wait for the search to finish.
+ */
+ while (initial_counters[i].value ==
+ get_invalidate_counter(cache, i).value)
+ cond_resched();
+ }
+ }
+}
+
+static void release_page_buffer(struct cached_page *page)
+{
+ if (page->buffer != NULL)
+ dm_bufio_release(vdo_forget(page->buffer));
+}
+
+static void clear_cache_page(struct page_cache *cache, struct cached_page *page)
+{
+ /* Do not clear read_pending because the read queue relies on it. */
+ release_page_buffer(page);
+ page->physical_page = cache->indexable_pages;
+ WRITE_ONCE(page->last_used, 0);
+}
+
+static void make_page_most_recent(struct page_cache *cache, struct cached_page *page)
+{
+ /*
+ * ASSERTION: We are either a zone thread holding a search_pending_counter, or we are any
+ * thread holding the read_threads_mutex.
+ */
+ if (atomic64_read(&cache->clock) != READ_ONCE(page->last_used))
+ WRITE_ONCE(page->last_used, atomic64_inc_return(&cache->clock));
+}
+
+/* Select a page to remove from the cache to make space for a new entry. */
+static struct cached_page *select_victim_in_cache(struct page_cache *cache)
+{
+ struct cached_page *page;
+ int oldest_index = 0;
+ s64 oldest_time = S64_MAX;
+ s64 last_used;
+ u16 i;
+
+ /* Find the oldest unclaimed page. We hold the read_threads_mutex. */
+ for (i = 0; i < cache->cache_slots; i++) {
+ /* A page with a pending read must not be replaced. */
+ if (cache->cache[i].read_pending)
+ continue;
+
+ last_used = READ_ONCE(cache->cache[i].last_used);
+ if (last_used <= oldest_time) {
+ oldest_time = last_used;
+ oldest_index = i;
+ }
+ }
+
+ page = &cache->cache[oldest_index];
+ if (page->physical_page != cache->indexable_pages) {
+ WRITE_ONCE(cache->index[page->physical_page], cache->cache_slots);
+ wait_for_pending_searches(cache, page->physical_page);
+ }
+
+ page->read_pending = true;
+ clear_cache_page(cache, page);
+ return page;
+}
+
+/* Make a newly filled cache entry available to other threads. */
+static int put_page_in_cache(struct page_cache *cache, u32 physical_page,
+ struct cached_page *page)
+{
+ int result;
+
+ /* We hold the read_threads_mutex. */
+ result = VDO_ASSERT((page->read_pending), "page to install has a pending read");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ page->physical_page = physical_page;
+ make_page_most_recent(cache, page);
+ page->read_pending = false;
+
+ /*
+ * We hold the read_threads_mutex, but we must have a write memory barrier before making
+ * the cached_page available to the readers that do not hold the mutex. The corresponding
+ * read memory barrier is in get_page_and_index().
+ */
+ smp_wmb();
+
+ /* This assignment also clears the queued flag. */
+ WRITE_ONCE(cache->index[physical_page], page - cache->cache);
+ return UDS_SUCCESS;
+}
+
+static void cancel_page_in_cache(struct page_cache *cache, u32 physical_page,
+ struct cached_page *page)
+{
+ int result;
+
+ /* We hold the read_threads_mutex. */
+ result = VDO_ASSERT((page->read_pending), "page to install has a pending read");
+ if (result != VDO_SUCCESS)
+ return;
+
+ clear_cache_page(cache, page);
+ page->read_pending = false;
+
+ /* Clear the mapping and the queued flag for the new page. */
+ WRITE_ONCE(cache->index[physical_page], cache->cache_slots);
+}
+
+static inline u16 next_queue_position(u16 position)
+{
+ return (position + 1) % VOLUME_CACHE_MAX_QUEUED_READS;
+}
+
+static inline void advance_queue_position(u16 *position)
+{
+ *position = next_queue_position(*position);
+}
+
+static inline bool read_queue_is_full(struct page_cache *cache)
+{
+ return cache->read_queue_first == next_queue_position(cache->read_queue_last);
+}
+
+static bool enqueue_read(struct page_cache *cache, struct uds_request *request,
+ u32 physical_page)
+{
+ struct queued_read *queue_entry;
+ u16 last = cache->read_queue_last;
+ u16 read_queue_index;
+
+ /* We hold the read_threads_mutex. */
+ if ((cache->index[physical_page] & VOLUME_CACHE_QUEUED_FLAG) == 0) {
+ /* This page has no existing entry in the queue. */
+ if (read_queue_is_full(cache))
+ return false;
+
+ /* Fill in the read queue entry. */
+ cache->read_queue[last].physical_page = physical_page;
+ cache->read_queue[last].invalid = false;
+ cache->read_queue[last].first_request = NULL;
+ cache->read_queue[last].last_request = NULL;
+
+ /* Point the cache index to the read queue entry. */
+ read_queue_index = last;
+ WRITE_ONCE(cache->index[physical_page],
+ read_queue_index | VOLUME_CACHE_QUEUED_FLAG);
+
+ advance_queue_position(&cache->read_queue_last);
+ } else {
+ /* It's already queued, so add this request to the existing entry. */
+ read_queue_index = cache->index[physical_page] & ~VOLUME_CACHE_QUEUED_FLAG;
+ }
+
+ request->next_request = NULL;
+ queue_entry = &cache->read_queue[read_queue_index];
+ if (queue_entry->first_request == NULL)
+ queue_entry->first_request = request;
+ else
+ queue_entry->last_request->next_request = request;
+ queue_entry->last_request = request;
+
+ return true;
+}
+
+static void enqueue_page_read(struct volume *volume, struct uds_request *request,
+ u32 physical_page)
+{
+ /* Mark the page as queued, so that chapter invalidation knows to cancel a read. */
+ while (!enqueue_read(&volume->page_cache, request, physical_page)) {
+ vdo_log_debug("Read queue full, waiting for reads to finish");
+ uds_wait_cond(&volume->read_threads_read_done_cond,
+ &volume->read_threads_mutex);
+ }
+
+ uds_signal_cond(&volume->read_threads_cond);
+}
+
+/*
+ * Reserve the next read queue entry for processing, but do not actually remove it from the queue.
+ * Must be followed by release_queued_requests().
+ */
+static struct queued_read *reserve_read_queue_entry(struct page_cache *cache)
+{
+ /* We hold the read_threads_mutex. */
+ struct queued_read *entry;
+ u16 index_value;
+ bool queued;
+
+ /* No items to dequeue */
+ if (cache->read_queue_next_read == cache->read_queue_last)
+ return NULL;
+
+ entry = &cache->read_queue[cache->read_queue_next_read];
+ index_value = cache->index[entry->physical_page];
+ queued = (index_value & VOLUME_CACHE_QUEUED_FLAG) != 0;
+ /* Check to see if it's still queued before resetting. */
+ if (entry->invalid && queued)
+ WRITE_ONCE(cache->index[entry->physical_page], cache->cache_slots);
+
+ /*
+ * If a synchronous read has taken this page, set invalid to true so it doesn't get
+ * overwritten. Requests will just be requeued.
+ */
+ if (!queued)
+ entry->invalid = true;
+
+ entry->reserved = true;
+ advance_queue_position(&cache->read_queue_next_read);
+ return entry;
+}
+
+static inline struct queued_read *wait_to_reserve_read_queue_entry(struct volume *volume)
+{
+ struct queued_read *queue_entry = NULL;
+
+ while (!volume->read_threads_exiting) {
+ queue_entry = reserve_read_queue_entry(&volume->page_cache);
+ if (queue_entry != NULL)
+ break;
+
+ uds_wait_cond(&volume->read_threads_cond, &volume->read_threads_mutex);
+ }
+
+ return queue_entry;
+}
+
+static int init_chapter_index_page(const struct volume *volume, u8 *index_page,
+ u32 chapter, u32 index_page_number,
+ struct delta_index_page *chapter_index_page)
+{
+ u64 ci_virtual;
+ u32 ci_chapter;
+ u32 lowest_list;
+ u32 highest_list;
+ struct index_geometry *geometry = volume->geometry;
+ int result;
+
+ result = uds_initialize_chapter_index_page(chapter_index_page, geometry,
+ index_page, volume->nonce);
+ if (volume->lookup_mode == LOOKUP_FOR_REBUILD)
+ return result;
+
+ if (result != UDS_SUCCESS) {
+ return vdo_log_error_strerror(result,
+ "Reading chapter index page for chapter %u page %u",
+ chapter, index_page_number);
+ }
+
+ uds_get_list_number_bounds(volume->index_page_map, chapter, index_page_number,
+ &lowest_list, &highest_list);
+ ci_virtual = chapter_index_page->virtual_chapter_number;
+ ci_chapter = uds_map_to_physical_chapter(geometry, ci_virtual);
+ if ((chapter == ci_chapter) &&
+ (lowest_list == chapter_index_page->lowest_list_number) &&
+ (highest_list == chapter_index_page->highest_list_number))
+ return UDS_SUCCESS;
+
+ vdo_log_warning("Index page map updated to %llu",
+ (unsigned long long) volume->index_page_map->last_update);
+ vdo_log_warning("Page map expects that chapter %u page %u has range %u to %u, but chapter index page has chapter %llu with range %u to %u",
+ chapter, index_page_number, lowest_list, highest_list,
+ (unsigned long long) ci_virtual,
+ chapter_index_page->lowest_list_number,
+ chapter_index_page->highest_list_number);
+ return vdo_log_error_strerror(UDS_CORRUPT_DATA,
+ "index page map mismatch with chapter index");
+}
+
+static int initialize_index_page(const struct volume *volume, u32 physical_page,
+ struct cached_page *page)
+{
+ u32 chapter = map_to_chapter_number(volume->geometry, physical_page);
+ u32 index_page_number = map_to_page_number(volume->geometry, physical_page);
+
+ return init_chapter_index_page(volume, dm_bufio_get_block_data(page->buffer),
+ chapter, index_page_number, &page->index_page);
+}
+
+static bool search_record_page(const u8 record_page[],
+ const struct uds_record_name *name,
+ const struct index_geometry *geometry,
+ struct uds_record_data *metadata)
+{
+ /*
+ * The array of records is sorted by name and stored as a binary tree in heap order, so the
+ * root of the tree is the first array element.
+ */
+ u32 node = 0;
+ const struct uds_volume_record *records = (const struct uds_volume_record *) record_page;
+
+ while (node < geometry->records_per_page) {
+ int result;
+ const struct uds_volume_record *record = &records[node];
+
+ result = memcmp(name, &record->name, UDS_RECORD_NAME_SIZE);
+ if (result == 0) {
+ if (metadata != NULL)
+ *metadata = record->data;
+ return true;
+ }
+
+ /* The children of node N are at indexes 2N+1 and 2N+2. */
+ node = ((2 * node) + ((result < 0) ? 1 : 2));
+ }
+
+ return false;
+}
+
+/*
+ * If we've read in a record page, we're going to do an immediate search, to speed up processing by
+ * avoiding get_record_from_zone(), and to ensure that requests make progress even when queued. If
+ * we've read in an index page, we save the record page number so we don't have to resolve the
+ * index page again. We use the location, virtual_chapter, and old_metadata fields in the request
+ * to allow the index code to know where to begin processing the request again.
+ */
+static int search_page(struct cached_page *page, const struct volume *volume,
+ struct uds_request *request, u32 physical_page)
+{
+ int result;
+ enum uds_index_region location;
+ u16 record_page_number;
+
+ if (is_record_page(volume->geometry, physical_page)) {
+ if (search_record_page(dm_bufio_get_block_data(page->buffer),
+ &request->record_name, volume->geometry,
+ &request->old_metadata))
+ location = UDS_LOCATION_RECORD_PAGE_LOOKUP;
+ else
+ location = UDS_LOCATION_UNAVAILABLE;
+ } else {
+ result = uds_search_chapter_index_page(&page->index_page,
+ volume->geometry,
+ &request->record_name,
+ &record_page_number);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (record_page_number == NO_CHAPTER_INDEX_ENTRY) {
+ location = UDS_LOCATION_UNAVAILABLE;
+ } else {
+ location = UDS_LOCATION_INDEX_PAGE_LOOKUP;
+ *((u16 *) &request->old_metadata) = record_page_number;
+ }
+ }
+
+ request->location = location;
+ request->found = false;
+ return UDS_SUCCESS;
+}
+
+static int process_entry(struct volume *volume, struct queued_read *entry)
+{
+ u32 page_number = entry->physical_page;
+ struct uds_request *request;
+ struct cached_page *page = NULL;
+ u8 *page_data;
+ int result;
+
+ if (entry->invalid) {
+ vdo_log_debug("Requeuing requests for invalid page");
+ return UDS_SUCCESS;
+ }
+
+ page = select_victim_in_cache(&volume->page_cache);
+
+ mutex_unlock(&volume->read_threads_mutex);
+ page_data = dm_bufio_read(volume->client, page_number, &page->buffer);
+ mutex_lock(&volume->read_threads_mutex);
+ if (IS_ERR(page_data)) {
+ result = -PTR_ERR(page_data);
+ vdo_log_warning_strerror(result,
+ "error reading physical page %u from volume",
+ page_number);
+ cancel_page_in_cache(&volume->page_cache, page_number, page);
+ return result;
+ }
+
+ if (entry->invalid) {
+ vdo_log_warning("Page %u invalidated after read", page_number);
+ cancel_page_in_cache(&volume->page_cache, page_number, page);
+ return UDS_SUCCESS;
+ }
+
+ if (!is_record_page(volume->geometry, page_number)) {
+ result = initialize_index_page(volume, page_number, page);
+ if (result != UDS_SUCCESS) {
+ vdo_log_warning("Error initializing chapter index page");
+ cancel_page_in_cache(&volume->page_cache, page_number, page);
+ return result;
+ }
+ }
+
+ result = put_page_in_cache(&volume->page_cache, page_number, page);
+ if (result != UDS_SUCCESS) {
+ vdo_log_warning("Error putting page %u in cache", page_number);
+ cancel_page_in_cache(&volume->page_cache, page_number, page);
+ return result;
+ }
+
+ request = entry->first_request;
+ while ((request != NULL) && (result == UDS_SUCCESS)) {
+ result = search_page(page, volume, request, page_number);
+ request = request->next_request;
+ }
+
+ return result;
+}
+
+static void release_queued_requests(struct volume *volume, struct queued_read *entry,
+ int result)
+{
+ struct page_cache *cache = &volume->page_cache;
+ u16 next_read = cache->read_queue_next_read;
+ struct uds_request *request;
+ struct uds_request *next;
+
+ for (request = entry->first_request; request != NULL; request = next) {
+ next = request->next_request;
+ request->status = result;
+ request->requeued = true;
+ uds_enqueue_request(request, STAGE_INDEX);
+ }
+
+ entry->reserved = false;
+
+ /* Move the read_queue_first pointer as far as we can. */
+ while ((cache->read_queue_first != next_read) &&
+ (!cache->read_queue[cache->read_queue_first].reserved))
+ advance_queue_position(&cache->read_queue_first);
+ uds_broadcast_cond(&volume->read_threads_read_done_cond);
+}
+
+static void read_thread_function(void *arg)
+{
+ struct volume *volume = arg;
+
+ vdo_log_debug("reader starting");
+ mutex_lock(&volume->read_threads_mutex);
+ while (true) {
+ struct queued_read *queue_entry;
+ int result;
+
+ queue_entry = wait_to_reserve_read_queue_entry(volume);
+ if (volume->read_threads_exiting)
+ break;
+
+ result = process_entry(volume, queue_entry);
+ release_queued_requests(volume, queue_entry, result);
+ }
+ mutex_unlock(&volume->read_threads_mutex);
+ vdo_log_debug("reader done");
+}
+
+static void get_page_and_index(struct page_cache *cache, u32 physical_page,
+ int *queue_index, struct cached_page **page_ptr)
+{
+ u16 index_value;
+ u16 index;
+ bool queued;
+
+ /*
+ * ASSERTION: We are either a zone thread holding a search_pending_counter, or we are any
+ * thread holding the read_threads_mutex.
+ *
+ * Holding only a search_pending_counter is the most frequent case.
+ */
+ /*
+ * It would be unlikely for the compiler to turn the usage of index_value into two reads of
+ * cache->index, but it would be possible and very bad if those reads did not return the
+ * same bits.
+ */
+ index_value = READ_ONCE(cache->index[physical_page]);
+ queued = (index_value & VOLUME_CACHE_QUEUED_FLAG) != 0;
+ index = index_value & ~VOLUME_CACHE_QUEUED_FLAG;
+
+ if (!queued && (index < cache->cache_slots)) {
+ *page_ptr = &cache->cache[index];
+ /*
+ * We have acquired access to the cached page, but unless we hold the
+ * read_threads_mutex, we need a read memory barrier now. The corresponding write
+ * memory barrier is in put_page_in_cache().
+ */
+ smp_rmb();
+ } else {
+ *page_ptr = NULL;
+ }
+
+ *queue_index = queued ? index : -1;
+}
+
+static void get_page_from_cache(struct page_cache *cache, u32 physical_page,
+ struct cached_page **page)
+{
+ /*
+ * ASSERTION: We are in a zone thread.
+ * ASSERTION: We holding a search_pending_counter or the read_threads_mutex.
+ */
+ int queue_index = -1;
+
+ get_page_and_index(cache, physical_page, &queue_index, page);
+}
+
+static int read_page_locked(struct volume *volume, u32 physical_page,
+ struct cached_page **page_ptr)
+{
+ int result = UDS_SUCCESS;
+ struct cached_page *page = NULL;
+ u8 *page_data;
+
+ page = select_victim_in_cache(&volume->page_cache);
+ page_data = dm_bufio_read(volume->client, physical_page, &page->buffer);
+ if (IS_ERR(page_data)) {
+ result = -PTR_ERR(page_data);
+ vdo_log_warning_strerror(result,
+ "error reading physical page %u from volume",
+ physical_page);
+ cancel_page_in_cache(&volume->page_cache, physical_page, page);
+ return result;
+ }
+
+ if (!is_record_page(volume->geometry, physical_page)) {
+ result = initialize_index_page(volume, physical_page, page);
+ if (result != UDS_SUCCESS) {
+ if (volume->lookup_mode != LOOKUP_FOR_REBUILD)
+ vdo_log_warning("Corrupt index page %u", physical_page);
+ cancel_page_in_cache(&volume->page_cache, physical_page, page);
+ return result;
+ }
+ }
+
+ result = put_page_in_cache(&volume->page_cache, physical_page, page);
+ if (result != UDS_SUCCESS) {
+ vdo_log_warning("Error putting page %u in cache", physical_page);
+ cancel_page_in_cache(&volume->page_cache, physical_page, page);
+ return result;
+ }
+
+ *page_ptr = page;
+ return UDS_SUCCESS;
+}
+
+/* Retrieve a page from the cache while holding the read threads mutex. */
+static int get_volume_page_locked(struct volume *volume, u32 physical_page,
+ struct cached_page **page_ptr)
+{
+ int result;
+ struct cached_page *page = NULL;
+
+ get_page_from_cache(&volume->page_cache, physical_page, &page);
+ if (page == NULL) {
+ result = read_page_locked(volume, physical_page, &page);
+ if (result != UDS_SUCCESS)
+ return result;
+ } else {
+ make_page_most_recent(&volume->page_cache, page);
+ }
+
+ *page_ptr = page;
+ return UDS_SUCCESS;
+}
+
+/* Retrieve a page from the cache while holding a search_pending lock. */
+static int get_volume_page_protected(struct volume *volume, struct uds_request *request,
+ u32 physical_page, struct cached_page **page_ptr)
+{
+ struct cached_page *page;
+
+ get_page_from_cache(&volume->page_cache, physical_page, &page);
+ if (page != NULL) {
+ if (request->zone_number == 0) {
+ /* Only one zone is allowed to update the LRU. */
+ make_page_most_recent(&volume->page_cache, page);
+ }
+
+ *page_ptr = page;
+ return UDS_SUCCESS;
+ }
+
+ /* Prepare to enqueue a read for the page. */
+ end_pending_search(&volume->page_cache, request->zone_number);
+ mutex_lock(&volume->read_threads_mutex);
+
+ /*
+ * Do the lookup again while holding the read mutex (no longer the fast case so this should
+ * be fine to repeat). We need to do this because a page may have been added to the cache
+ * by a reader thread between the time we searched above and the time we went to actually
+ * try to enqueue it below. This could result in us enqueuing another read for a page which
+ * is already in the cache, which would mean we end up with two entries in the cache for
+ * the same page.
+ */
+ get_page_from_cache(&volume->page_cache, physical_page, &page);
+ if (page == NULL) {
+ enqueue_page_read(volume, request, physical_page);
+ /*
+ * The performance gain from unlocking first, while "search pending" mode is off,
+ * turns out to be significant in some cases. The page is not available yet so
+ * the order does not matter for correctness as it does below.
+ */
+ mutex_unlock(&volume->read_threads_mutex);
+ begin_pending_search(&volume->page_cache, physical_page,
+ request->zone_number);
+ return UDS_QUEUED;
+ }
+
+ /*
+ * Now that the page is loaded, the volume needs to switch to "reader thread unlocked" and
+ * "search pending" state in careful order so no other thread can mess with the data before
+ * the caller gets to look at it.
+ */
+ begin_pending_search(&volume->page_cache, physical_page, request->zone_number);
+ mutex_unlock(&volume->read_threads_mutex);
+ *page_ptr = page;
+ return UDS_SUCCESS;
+}
+
+static int get_volume_page(struct volume *volume, u32 chapter, u32 page_number,
+ struct cached_page **page_ptr)
+{
+ int result;
+ u32 physical_page = map_to_physical_page(volume->geometry, chapter, page_number);
+
+ mutex_lock(&volume->read_threads_mutex);
+ result = get_volume_page_locked(volume, physical_page, page_ptr);
+ mutex_unlock(&volume->read_threads_mutex);
+ return result;
+}
+
+int uds_get_volume_record_page(struct volume *volume, u32 chapter, u32 page_number,
+ u8 **data_ptr)
+{
+ int result;
+ struct cached_page *page = NULL;
+
+ result = get_volume_page(volume, chapter, page_number, &page);
+ if (result == UDS_SUCCESS)
+ *data_ptr = dm_bufio_get_block_data(page->buffer);
+ return result;
+}
+
+int uds_get_volume_index_page(struct volume *volume, u32 chapter, u32 page_number,
+ struct delta_index_page **index_page_ptr)
+{
+ int result;
+ struct cached_page *page = NULL;
+
+ result = get_volume_page(volume, chapter, page_number, &page);
+ if (result == UDS_SUCCESS)
+ *index_page_ptr = &page->index_page;
+ return result;
+}
+
+/*
+ * Find the record page associated with a name in a given index page. This will return UDS_QUEUED
+ * if the page in question must be read from storage.
+ */
+static int search_cached_index_page(struct volume *volume, struct uds_request *request,
+ u32 chapter, u32 index_page_number,
+ u16 *record_page_number)
+{
+ int result;
+ struct cached_page *page = NULL;
+ u32 physical_page = map_to_physical_page(volume->geometry, chapter,
+ index_page_number);
+
+ /*
+ * Make sure the invalidate counter is updated before we try and read the mapping. This
+ * prevents this thread from reading a page in the cache which has already been marked for
+ * invalidation by the reader thread, before the reader thread has noticed that the
+ * invalidate_counter has been incremented.
+ */
+ begin_pending_search(&volume->page_cache, physical_page, request->zone_number);
+
+ result = get_volume_page_protected(volume, request, physical_page, &page);
+ if (result != UDS_SUCCESS) {
+ end_pending_search(&volume->page_cache, request->zone_number);
+ return result;
+ }
+
+ result = uds_search_chapter_index_page(&page->index_page, volume->geometry,
+ &request->record_name,
+ record_page_number);
+ end_pending_search(&volume->page_cache, request->zone_number);
+ return result;
+}
+
+/*
+ * Find the metadata associated with a name in a given record page. This will return UDS_QUEUED if
+ * the page in question must be read from storage.
+ */
+int uds_search_cached_record_page(struct volume *volume, struct uds_request *request,
+ u32 chapter, u16 record_page_number, bool *found)
+{
+ struct cached_page *record_page;
+ struct index_geometry *geometry = volume->geometry;
+ int result;
+ u32 physical_page, page_number;
+
+ *found = false;
+ if (record_page_number == NO_CHAPTER_INDEX_ENTRY)
+ return UDS_SUCCESS;
+
+ result = VDO_ASSERT(record_page_number < geometry->record_pages_per_chapter,
+ "0 <= %d < %u", record_page_number,
+ geometry->record_pages_per_chapter);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ page_number = geometry->index_pages_per_chapter + record_page_number;
+
+ physical_page = map_to_physical_page(volume->geometry, chapter, page_number);
+
+ /*
+ * Make sure the invalidate counter is updated before we try and read the mapping. This
+ * prevents this thread from reading a page in the cache which has already been marked for
+ * invalidation by the reader thread, before the reader thread has noticed that the
+ * invalidate_counter has been incremented.
+ */
+ begin_pending_search(&volume->page_cache, physical_page, request->zone_number);
+
+ result = get_volume_page_protected(volume, request, physical_page, &record_page);
+ if (result != UDS_SUCCESS) {
+ end_pending_search(&volume->page_cache, request->zone_number);
+ return result;
+ }
+
+ if (search_record_page(dm_bufio_get_block_data(record_page->buffer),
+ &request->record_name, geometry, &request->old_metadata))
+ *found = true;
+
+ end_pending_search(&volume->page_cache, request->zone_number);
+ return UDS_SUCCESS;
+}
+
+void uds_prefetch_volume_chapter(const struct volume *volume, u32 chapter)
+{
+ const struct index_geometry *geometry = volume->geometry;
+ u32 physical_page = map_to_physical_page(geometry, chapter, 0);
+
+ dm_bufio_prefetch(volume->client, physical_page, geometry->pages_per_chapter);
+}
+
+int uds_read_chapter_index_from_volume(const struct volume *volume, u64 virtual_chapter,
+ struct dm_buffer *volume_buffers[],
+ struct delta_index_page index_pages[])
+{
+ int result;
+ u32 i;
+ const struct index_geometry *geometry = volume->geometry;
+ u32 physical_chapter = uds_map_to_physical_chapter(geometry, virtual_chapter);
+ u32 physical_page = map_to_physical_page(geometry, physical_chapter, 0);
+
+ dm_bufio_prefetch(volume->client, physical_page, geometry->index_pages_per_chapter);
+ for (i = 0; i < geometry->index_pages_per_chapter; i++) {
+ u8 *index_page;
+
+ index_page = dm_bufio_read(volume->client, physical_page + i,
+ &volume_buffers[i]);
+ if (IS_ERR(index_page)) {
+ result = -PTR_ERR(index_page);
+ vdo_log_warning_strerror(result,
+ "error reading physical page %u",
+ physical_page);
+ return result;
+ }
+
+ result = init_chapter_index_page(volume, index_page, physical_chapter, i,
+ &index_pages[i]);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ return UDS_SUCCESS;
+}
+
+int uds_search_volume_page_cache(struct volume *volume, struct uds_request *request,
+ bool *found)
+{
+ int result;
+ u32 physical_chapter =
+ uds_map_to_physical_chapter(volume->geometry, request->virtual_chapter);
+ u32 index_page_number;
+ u16 record_page_number;
+
+ index_page_number = uds_find_index_page_number(volume->index_page_map,
+ &request->record_name,
+ physical_chapter);
+
+ if (request->location == UDS_LOCATION_INDEX_PAGE_LOOKUP) {
+ record_page_number = *((u16 *) &request->old_metadata);
+ } else {
+ result = search_cached_index_page(volume, request, physical_chapter,
+ index_page_number,
+ &record_page_number);
+ if (result != UDS_SUCCESS)
+ return result;
+ }
+
+ return uds_search_cached_record_page(volume, request, physical_chapter,
+ record_page_number, found);
+}
+
+int uds_search_volume_page_cache_for_rebuild(struct volume *volume,
+ const struct uds_record_name *name,
+ u64 virtual_chapter, bool *found)
+{
+ int result;
+ struct index_geometry *geometry = volume->geometry;
+ struct cached_page *page;
+ u32 physical_chapter = uds_map_to_physical_chapter(geometry, virtual_chapter);
+ u32 index_page_number;
+ u16 record_page_number;
+ u32 page_number;
+
+ *found = false;
+ index_page_number =
+ uds_find_index_page_number(volume->index_page_map, name,
+ physical_chapter);
+ result = get_volume_page(volume, physical_chapter, index_page_number, &page);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = uds_search_chapter_index_page(&page->index_page, geometry, name,
+ &record_page_number);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ if (record_page_number == NO_CHAPTER_INDEX_ENTRY)
+ return UDS_SUCCESS;
+
+ page_number = geometry->index_pages_per_chapter + record_page_number;
+ result = get_volume_page(volume, physical_chapter, page_number, &page);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ *found = search_record_page(dm_bufio_get_block_data(page->buffer), name,
+ geometry, NULL);
+ return UDS_SUCCESS;
+}
+
+static void invalidate_page(struct page_cache *cache, u32 physical_page)
+{
+ struct cached_page *page;
+ int queue_index = -1;
+
+ /* We hold the read_threads_mutex. */
+ get_page_and_index(cache, physical_page, &queue_index, &page);
+ if (page != NULL) {
+ WRITE_ONCE(cache->index[page->physical_page], cache->cache_slots);
+ wait_for_pending_searches(cache, page->physical_page);
+ clear_cache_page(cache, page);
+ } else if (queue_index > -1) {
+ vdo_log_debug("setting pending read to invalid");
+ cache->read_queue[queue_index].invalid = true;
+ }
+}
+
+void uds_forget_chapter(struct volume *volume, u64 virtual_chapter)
+{
+ u32 physical_chapter =
+ uds_map_to_physical_chapter(volume->geometry, virtual_chapter);
+ u32 first_page = map_to_physical_page(volume->geometry, physical_chapter, 0);
+ u32 i;
+
+ vdo_log_debug("forgetting chapter %llu", (unsigned long long) virtual_chapter);
+ mutex_lock(&volume->read_threads_mutex);
+ for (i = 0; i < volume->geometry->pages_per_chapter; i++)
+ invalidate_page(&volume->page_cache, first_page + i);
+ mutex_unlock(&volume->read_threads_mutex);
+}
+
+/*
+ * Donate an index pages from a newly written chapter to the page cache since it is likely to be
+ * used again soon. The caller must already hold the reader thread mutex.
+ */
+static int donate_index_page_locked(struct volume *volume, u32 physical_chapter,
+ u32 index_page_number, struct dm_buffer *page_buffer)
+{
+ int result;
+ struct cached_page *page = NULL;
+ u32 physical_page =
+ map_to_physical_page(volume->geometry, physical_chapter,
+ index_page_number);
+
+ page = select_victim_in_cache(&volume->page_cache);
+ page->buffer = page_buffer;
+ result = init_chapter_index_page(volume, dm_bufio_get_block_data(page_buffer),
+ physical_chapter, index_page_number,
+ &page->index_page);
+ if (result != UDS_SUCCESS) {
+ vdo_log_warning("Error initialize chapter index page");
+ cancel_page_in_cache(&volume->page_cache, physical_page, page);
+ return result;
+ }
+
+ result = put_page_in_cache(&volume->page_cache, physical_page, page);
+ if (result != UDS_SUCCESS) {
+ vdo_log_warning("Error putting page %u in cache", physical_page);
+ cancel_page_in_cache(&volume->page_cache, physical_page, page);
+ return result;
+ }
+
+ return UDS_SUCCESS;
+}
+
+static int write_index_pages(struct volume *volume, u32 physical_chapter_number,
+ struct open_chapter_index *chapter_index)
+{
+ struct index_geometry *geometry = volume->geometry;
+ struct dm_buffer *page_buffer;
+ u32 first_index_page = map_to_physical_page(geometry, physical_chapter_number, 0);
+ u32 delta_list_number = 0;
+ u32 index_page_number;
+
+ for (index_page_number = 0;
+ index_page_number < geometry->index_pages_per_chapter;
+ index_page_number++) {
+ u8 *page_data;
+ u32 physical_page = first_index_page + index_page_number;
+ u32 lists_packed;
+ bool last_page;
+ int result;
+
+ page_data = dm_bufio_new(volume->client, physical_page, &page_buffer);
+ if (IS_ERR(page_data)) {
+ return vdo_log_warning_strerror(-PTR_ERR(page_data),
+ "failed to prepare index page");
+ }
+
+ last_page = ((index_page_number + 1) == geometry->index_pages_per_chapter);
+ result = uds_pack_open_chapter_index_page(chapter_index, page_data,
+ delta_list_number, last_page,
+ &lists_packed);
+ if (result != UDS_SUCCESS) {
+ dm_bufio_release(page_buffer);
+ return vdo_log_warning_strerror(result,
+ "failed to pack index page");
+ }
+
+ dm_bufio_mark_buffer_dirty(page_buffer);
+
+ if (lists_packed == 0) {
+ vdo_log_debug("no delta lists packed on chapter %u page %u",
+ physical_chapter_number, index_page_number);
+ } else {
+ delta_list_number += lists_packed;
+ }
+
+ uds_update_index_page_map(volume->index_page_map,
+ chapter_index->virtual_chapter_number,
+ physical_chapter_number, index_page_number,
+ delta_list_number - 1);
+
+ mutex_lock(&volume->read_threads_mutex);
+ result = donate_index_page_locked(volume, physical_chapter_number,
+ index_page_number, page_buffer);
+ mutex_unlock(&volume->read_threads_mutex);
+ if (result != UDS_SUCCESS) {
+ dm_bufio_release(page_buffer);
+ return result;
+ }
+ }
+
+ return UDS_SUCCESS;
+}
+
+static u32 encode_tree(u8 record_page[],
+ const struct uds_volume_record *sorted_pointers[],
+ u32 next_record, u32 node, u32 node_count)
+{
+ if (node < node_count) {
+ u32 child = (2 * node) + 1;
+
+ next_record = encode_tree(record_page, sorted_pointers, next_record,
+ child, node_count);
+
+ /*
+ * In-order traversal: copy the contents of the next record into the page at the
+ * node offset.
+ */
+ memcpy(&record_page[node * BYTES_PER_RECORD],
+ sorted_pointers[next_record++], BYTES_PER_RECORD);
+
+ next_record = encode_tree(record_page, sorted_pointers, next_record,
+ child + 1, node_count);
+ }
+
+ return next_record;
+}
+
+static int encode_record_page(const struct volume *volume,
+ const struct uds_volume_record records[], u8 record_page[])
+{
+ int result;
+ u32 i;
+ u32 records_per_page = volume->geometry->records_per_page;
+ const struct uds_volume_record **record_pointers = volume->record_pointers;
+
+ for (i = 0; i < records_per_page; i++)
+ record_pointers[i] = &records[i];
+
+ /*
+ * Sort the record pointers by using just the names in the records, which is less work than
+ * sorting the entire record values.
+ */
+ BUILD_BUG_ON(offsetof(struct uds_volume_record, name) != 0);
+ result = uds_radix_sort(volume->radix_sorter, (const u8 **) record_pointers,
+ records_per_page, UDS_RECORD_NAME_SIZE);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ encode_tree(record_page, record_pointers, 0, 0, records_per_page);
+ return UDS_SUCCESS;
+}
+
+static int write_record_pages(struct volume *volume, u32 physical_chapter_number,
+ const struct uds_volume_record *records)
+{
+ u32 record_page_number;
+ struct index_geometry *geometry = volume->geometry;
+ struct dm_buffer *page_buffer;
+ const struct uds_volume_record *next_record = records;
+ u32 first_record_page = map_to_physical_page(geometry, physical_chapter_number,
+ geometry->index_pages_per_chapter);
+
+ for (record_page_number = 0;
+ record_page_number < geometry->record_pages_per_chapter;
+ record_page_number++) {
+ u8 *page_data;
+ u32 physical_page = first_record_page + record_page_number;
+ int result;
+
+ page_data = dm_bufio_new(volume->client, physical_page, &page_buffer);
+ if (IS_ERR(page_data)) {
+ return vdo_log_warning_strerror(-PTR_ERR(page_data),
+ "failed to prepare record page");
+ }
+
+ result = encode_record_page(volume, next_record, page_data);
+ if (result != UDS_SUCCESS) {
+ dm_bufio_release(page_buffer);
+ return vdo_log_warning_strerror(result,
+ "failed to encode record page %u",
+ record_page_number);
+ }
+
+ next_record += geometry->records_per_page;
+ dm_bufio_mark_buffer_dirty(page_buffer);
+ dm_bufio_release(page_buffer);
+ }
+
+ return UDS_SUCCESS;
+}
+
+int uds_write_chapter(struct volume *volume, struct open_chapter_index *chapter_index,
+ const struct uds_volume_record *records)
+{
+ int result;
+ u32 physical_chapter_number =
+ uds_map_to_physical_chapter(volume->geometry,
+ chapter_index->virtual_chapter_number);
+
+ result = write_index_pages(volume, physical_chapter_number, chapter_index);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = write_record_pages(volume, physical_chapter_number, records);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ result = -dm_bufio_write_dirty_buffers(volume->client);
+ if (result != UDS_SUCCESS)
+ vdo_log_error_strerror(result, "cannot sync chapter to volume");
+
+ return result;
+}
+
+static void probe_chapter(struct volume *volume, u32 chapter_number,
+ u64 *virtual_chapter_number)
+{
+ const struct index_geometry *geometry = volume->geometry;
+ u32 expected_list_number = 0;
+ u32 i;
+ u64 vcn = BAD_CHAPTER;
+
+ *virtual_chapter_number = BAD_CHAPTER;
+ dm_bufio_prefetch(volume->client,
+ map_to_physical_page(geometry, chapter_number, 0),
+ geometry->index_pages_per_chapter);
+
+ for (i = 0; i < geometry->index_pages_per_chapter; i++) {
+ struct delta_index_page *page;
+ int result;
+
+ result = uds_get_volume_index_page(volume, chapter_number, i, &page);
+ if (result != UDS_SUCCESS)
+ return;
+
+ if (page->virtual_chapter_number == BAD_CHAPTER) {
+ vdo_log_error("corrupt index page in chapter %u",
+ chapter_number);
+ return;
+ }
+
+ if (vcn == BAD_CHAPTER) {
+ vcn = page->virtual_chapter_number;
+ } else if (page->virtual_chapter_number != vcn) {
+ vdo_log_error("inconsistent chapter %u index page %u: expected vcn %llu, got vcn %llu",
+ chapter_number, i, (unsigned long long) vcn,
+ (unsigned long long) page->virtual_chapter_number);
+ return;
+ }
+
+ if (expected_list_number != page->lowest_list_number) {
+ vdo_log_error("inconsistent chapter %u index page %u: expected list number %u, got list number %u",
+ chapter_number, i, expected_list_number,
+ page->lowest_list_number);
+ return;
+ }
+ expected_list_number = page->highest_list_number + 1;
+
+ result = uds_validate_chapter_index_page(page, geometry);
+ if (result != UDS_SUCCESS)
+ return;
+ }
+
+ if (chapter_number != uds_map_to_physical_chapter(geometry, vcn)) {
+ vdo_log_error("chapter %u vcn %llu is out of phase (%u)", chapter_number,
+ (unsigned long long) vcn, geometry->chapters_per_volume);
+ return;
+ }
+
+ *virtual_chapter_number = vcn;
+}
+
+/* Find the last valid physical chapter in the volume. */
+static void find_real_end_of_volume(struct volume *volume, u32 limit, u32 *limit_ptr)
+{
+ u32 span = 1;
+ u32 tries = 0;
+
+ while (limit > 0) {
+ u32 chapter = (span > limit) ? 0 : limit - span;
+ u64 vcn = 0;
+
+ probe_chapter(volume, chapter, &vcn);
+ if (vcn == BAD_CHAPTER) {
+ limit = chapter;
+ if (++tries > 1)
+ span *= 2;
+ } else {
+ if (span == 1)
+ break;
+ span /= 2;
+ tries = 0;
+ }
+ }
+
+ *limit_ptr = limit;
+}
+
+static int find_chapter_limits(struct volume *volume, u32 chapter_limit, u64 *lowest_vcn,
+ u64 *highest_vcn)
+{
+ struct index_geometry *geometry = volume->geometry;
+ u64 zero_vcn;
+ u64 lowest = BAD_CHAPTER;
+ u64 highest = BAD_CHAPTER;
+ u64 moved_chapter = BAD_CHAPTER;
+ u32 left_chapter = 0;
+ u32 right_chapter = 0;
+ u32 bad_chapters = 0;
+
+ /*
+ * This method assumes there is at most one run of contiguous bad chapters caused by
+ * unflushed writes. Either the bad spot is at the beginning and end, or somewhere in the
+ * middle. Wherever it is, the highest and lowest VCNs are adjacent to it. Otherwise the
+ * volume is cleanly saved and somewhere in the middle of it the highest VCN immediately
+ * precedes the lowest one.
+ */
+
+ /* It doesn't matter if this results in a bad spot (BAD_CHAPTER). */
+ probe_chapter(volume, 0, &zero_vcn);
+
+ /*
+ * Binary search for end of the discontinuity in the monotonically increasing virtual
+ * chapter numbers; bad spots are treated as a span of BAD_CHAPTER values. In effect we're
+ * searching for the index of the smallest value less than zero_vcn. In the case we go off
+ * the end it means that chapter 0 has the lowest vcn.
+ *
+ * If a virtual chapter is out-of-order, it will be the one moved by conversion. Always
+ * skip over the moved chapter when searching, adding it to the range at the end if
+ * necessary.
+ */
+ if (geometry->remapped_physical > 0) {
+ u64 remapped_vcn;
+
+ probe_chapter(volume, geometry->remapped_physical, &remapped_vcn);
+ if (remapped_vcn == geometry->remapped_virtual)
+ moved_chapter = geometry->remapped_physical;
+ }
+
+ left_chapter = 0;
+ right_chapter = chapter_limit;
+
+ while (left_chapter < right_chapter) {
+ u64 probe_vcn;
+ u32 chapter = (left_chapter + right_chapter) / 2;
+
+ if (chapter == moved_chapter)
+ chapter--;
+
+ probe_chapter(volume, chapter, &probe_vcn);
+ if (zero_vcn <= probe_vcn) {
+ left_chapter = chapter + 1;
+ if (left_chapter == moved_chapter)
+ left_chapter++;
+ } else {
+ right_chapter = chapter;
+ }
+ }
+
+ /* If left_chapter goes off the end, chapter 0 has the lowest virtual chapter number.*/
+ if (left_chapter >= chapter_limit)
+ left_chapter = 0;
+
+ /* At this point, left_chapter is the chapter with the lowest virtual chapter number. */
+ probe_chapter(volume, left_chapter, &lowest);
+
+ /* The moved chapter might be the lowest in the range. */
+ if ((moved_chapter != BAD_CHAPTER) && (lowest == geometry->remapped_virtual + 1))
+ lowest = geometry->remapped_virtual;
+
+ /*
+ * Circularly scan backwards, moving over any bad chapters until encountering a good one,
+ * which is the chapter with the highest vcn.
+ */
+ while (highest == BAD_CHAPTER) {
+ right_chapter = (right_chapter + chapter_limit - 1) % chapter_limit;
+ if (right_chapter == moved_chapter)
+ continue;
+
+ probe_chapter(volume, right_chapter, &highest);
+ if (bad_chapters++ >= MAX_BAD_CHAPTERS) {
+ vdo_log_error("too many bad chapters in volume: %u",
+ bad_chapters);
+ return UDS_CORRUPT_DATA;
+ }
+ }
+
+ *lowest_vcn = lowest;
+ *highest_vcn = highest;
+ return UDS_SUCCESS;
+}
+
+/*
+ * Find the highest and lowest contiguous chapters present in the volume and determine their
+ * virtual chapter numbers. This is used by rebuild.
+ */
+int uds_find_volume_chapter_boundaries(struct volume *volume, u64 *lowest_vcn,
+ u64 *highest_vcn, bool *is_empty)
+{
+ u32 chapter_limit = volume->geometry->chapters_per_volume;
+
+ find_real_end_of_volume(volume, chapter_limit, &chapter_limit);
+ if (chapter_limit == 0) {
+ *lowest_vcn = 0;
+ *highest_vcn = 0;
+ *is_empty = true;
+ return UDS_SUCCESS;
+ }
+
+ *is_empty = false;
+ return find_chapter_limits(volume, chapter_limit, lowest_vcn, highest_vcn);
+}
+
+int __must_check uds_replace_volume_storage(struct volume *volume,
+ struct index_layout *layout,
+ struct block_device *bdev)
+{
+ int result;
+ u32 i;
+
+ result = uds_replace_index_layout_storage(layout, bdev);
+ if (result != UDS_SUCCESS)
+ return result;
+
+ /* Release all outstanding dm_bufio objects */
+ for (i = 0; i < volume->page_cache.indexable_pages; i++)
+ volume->page_cache.index[i] = volume->page_cache.cache_slots;
+ for (i = 0; i < volume->page_cache.cache_slots; i++)
+ clear_cache_page(&volume->page_cache, &volume->page_cache.cache[i]);
+ if (volume->sparse_cache != NULL)
+ uds_invalidate_sparse_cache(volume->sparse_cache);
+ if (volume->client != NULL)
+ dm_bufio_client_destroy(vdo_forget(volume->client));
+
+ return uds_open_volume_bufio(layout, volume->geometry->bytes_per_page,
+ volume->reserved_buffers, &volume->client);
+}
+
+static int __must_check initialize_page_cache(struct page_cache *cache,
+ const struct index_geometry *geometry,
+ u32 chapters_in_cache,
+ unsigned int zone_count)
+{
+ int result;
+ u32 i;
+
+ cache->indexable_pages = geometry->pages_per_volume + 1;
+ cache->cache_slots = chapters_in_cache * geometry->record_pages_per_chapter;
+ cache->zone_count = zone_count;
+ atomic64_set(&cache->clock, 1);
+
+ result = VDO_ASSERT((cache->cache_slots <= VOLUME_CACHE_MAX_ENTRIES),
+ "requested cache size, %u, within limit %u",
+ cache->cache_slots, VOLUME_CACHE_MAX_ENTRIES);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(VOLUME_CACHE_MAX_QUEUED_READS, struct queued_read,
+ "volume read queue", &cache->read_queue);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(cache->zone_count, struct search_pending_counter,
+ "Volume Cache Zones", &cache->search_pending_counters);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(cache->indexable_pages, u16, "page cache index",
+ &cache->index);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(cache->cache_slots, struct cached_page, "page cache cache",
+ &cache->cache);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /* Initialize index values to invalid values. */
+ for (i = 0; i < cache->indexable_pages; i++)
+ cache->index[i] = cache->cache_slots;
+
+ for (i = 0; i < cache->cache_slots; i++)
+ clear_cache_page(cache, &cache->cache[i]);
+
+ return UDS_SUCCESS;
+}
+
+int uds_make_volume(const struct uds_configuration *config, struct index_layout *layout,
+ struct volume **new_volume)
+{
+ unsigned int i;
+ struct volume *volume = NULL;
+ struct index_geometry *geometry;
+ unsigned int reserved_buffers;
+ int result;
+
+ result = vdo_allocate(1, struct volume, "volume", &volume);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ volume->nonce = uds_get_volume_nonce(layout);
+
+ result = uds_copy_index_geometry(config->geometry, &volume->geometry);
+ if (result != UDS_SUCCESS) {
+ uds_free_volume(volume);
+ return vdo_log_warning_strerror(result,
+ "failed to allocate geometry: error");
+ }
+ geometry = volume->geometry;
+
+ /*
+ * Reserve a buffer for each entry in the page cache, one for the chapter writer, and one
+ * for each entry in the sparse cache.
+ */
+ reserved_buffers = config->cache_chapters * geometry->record_pages_per_chapter;
+ reserved_buffers += 1;
+ if (uds_is_sparse_index_geometry(geometry))
+ reserved_buffers += (config->cache_chapters * geometry->index_pages_per_chapter);
+ volume->reserved_buffers = reserved_buffers;
+ result = uds_open_volume_bufio(layout, geometry->bytes_per_page,
+ volume->reserved_buffers, &volume->client);
+ if (result != UDS_SUCCESS) {
+ uds_free_volume(volume);
+ return result;
+ }
+
+ result = uds_make_radix_sorter(geometry->records_per_page,
+ &volume->radix_sorter);
+ if (result != UDS_SUCCESS) {
+ uds_free_volume(volume);
+ return result;
+ }
+
+ result = vdo_allocate(geometry->records_per_page,
+ const struct uds_volume_record *, "record pointers",
+ &volume->record_pointers);
+ if (result != VDO_SUCCESS) {
+ uds_free_volume(volume);
+ return result;
+ }
+
+ if (uds_is_sparse_index_geometry(geometry)) {
+ size_t page_size = sizeof(struct delta_index_page) + geometry->bytes_per_page;
+
+ result = uds_make_sparse_cache(geometry, config->cache_chapters,
+ config->zone_count,
+ &volume->sparse_cache);
+ if (result != UDS_SUCCESS) {
+ uds_free_volume(volume);
+ return result;
+ }
+
+ volume->cache_size =
+ page_size * geometry->index_pages_per_chapter * config->cache_chapters;
+ }
+
+ result = initialize_page_cache(&volume->page_cache, geometry,
+ config->cache_chapters, config->zone_count);
+ if (result != UDS_SUCCESS) {
+ uds_free_volume(volume);
+ return result;
+ }
+
+ volume->cache_size += volume->page_cache.cache_slots * sizeof(struct delta_index_page);
+ result = uds_make_index_page_map(geometry, &volume->index_page_map);
+ if (result != UDS_SUCCESS) {
+ uds_free_volume(volume);
+ return result;
+ }
+
+ mutex_init(&volume->read_threads_mutex);
+ uds_init_cond(&volume->read_threads_read_done_cond);
+ uds_init_cond(&volume->read_threads_cond);
+
+ result = vdo_allocate(config->read_threads, struct thread *, "reader threads",
+ &volume->reader_threads);
+ if (result != VDO_SUCCESS) {
+ uds_free_volume(volume);
+ return result;
+ }
+
+ for (i = 0; i < config->read_threads; i++) {
+ result = vdo_create_thread(read_thread_function, (void *) volume,
+ "reader", &volume->reader_threads[i]);
+ if (result != VDO_SUCCESS) {
+ uds_free_volume(volume);
+ return result;
+ }
+
+ volume->read_thread_count = i + 1;
+ }
+
+ *new_volume = volume;
+ return UDS_SUCCESS;
+}
+
+static void uninitialize_page_cache(struct page_cache *cache)
+{
+ u16 i;
+
+ if (cache->cache != NULL) {
+ for (i = 0; i < cache->cache_slots; i++)
+ release_page_buffer(&cache->cache[i]);
+ }
+ vdo_free(cache->index);
+ vdo_free(cache->cache);
+ vdo_free(cache->search_pending_counters);
+ vdo_free(cache->read_queue);
+}
+
+void uds_free_volume(struct volume *volume)
+{
+ if (volume == NULL)
+ return;
+
+ if (volume->reader_threads != NULL) {
+ unsigned int i;
+
+ /* This works even if some threads weren't started. */
+ mutex_lock(&volume->read_threads_mutex);
+ volume->read_threads_exiting = true;
+ uds_broadcast_cond(&volume->read_threads_cond);
+ mutex_unlock(&volume->read_threads_mutex);
+ for (i = 0; i < volume->read_thread_count; i++)
+ vdo_join_threads(volume->reader_threads[i]);
+ vdo_free(volume->reader_threads);
+ volume->reader_threads = NULL;
+ }
+
+ /* Must destroy the client AFTER freeing the cached pages. */
+ uninitialize_page_cache(&volume->page_cache);
+ uds_free_sparse_cache(volume->sparse_cache);
+ if (volume->client != NULL)
+ dm_bufio_client_destroy(vdo_forget(volume->client));
+
+ uds_free_index_page_map(volume->index_page_map);
+ uds_free_radix_sorter(volume->radix_sorter);
+ vdo_free(volume->geometry);
+ vdo_free(volume->record_pointers);
+ vdo_free(volume);
+}
diff --git a/drivers/md/dm-vdo/indexer/volume.h b/drivers/md/dm-vdo/indexer/volume.h
new file mode 100644
index 000000000000..8679a5e55347
--- /dev/null
+++ b/drivers/md/dm-vdo/indexer/volume.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_VOLUME_H
+#define UDS_VOLUME_H
+
+#include <linux/atomic.h>
+#include <linux/cache.h>
+#include <linux/dm-bufio.h>
+#include <linux/limits.h>
+
+#include "permassert.h"
+#include "thread-utils.h"
+
+#include "chapter-index.h"
+#include "config.h"
+#include "geometry.h"
+#include "indexer.h"
+#include "index-layout.h"
+#include "index-page-map.h"
+#include "radix-sort.h"
+#include "sparse-cache.h"
+
+/*
+ * The volume manages deduplication records on permanent storage. The term "volume" can also refer
+ * to the region of permanent storage where the records (and the chapters containing them) are
+ * stored. The volume handles all I/O to this region by reading, caching, and writing chapter pages
+ * as necessary.
+ */
+
+enum index_lookup_mode {
+ /* Always do lookups in all chapters normally */
+ LOOKUP_NORMAL,
+ /* Only do a subset of lookups needed when rebuilding an index */
+ LOOKUP_FOR_REBUILD,
+};
+
+struct queued_read {
+ bool invalid;
+ bool reserved;
+ u32 physical_page;
+ struct uds_request *first_request;
+ struct uds_request *last_request;
+};
+
+struct __aligned(L1_CACHE_BYTES) search_pending_counter {
+ u64 atomic_value;
+};
+
+struct cached_page {
+ /* Whether this page is currently being read asynchronously */
+ bool read_pending;
+ /* The physical page stored in this cache entry */
+ u32 physical_page;
+ /* The value of the volume clock when this page was last used */
+ s64 last_used;
+ /* The cached page buffer */
+ struct dm_buffer *buffer;
+ /* The chapter index page, meaningless for record pages */
+ struct delta_index_page index_page;
+};
+
+struct page_cache {
+ /* The number of zones */
+ unsigned int zone_count;
+ /* The number of volume pages that can be cached */
+ u32 indexable_pages;
+ /* The maximum number of simultaneously cached pages */
+ u16 cache_slots;
+ /* An index for each physical page noting where it is in the cache */
+ u16 *index;
+ /* The array of cached pages */
+ struct cached_page *cache;
+ /* A counter for each zone tracking if a search is occurring there */
+ struct search_pending_counter *search_pending_counters;
+ /* The read queue entries as a circular array */
+ struct queued_read *read_queue;
+
+ /* All entries above this point are constant after initialization. */
+
+ /*
+ * These values are all indexes into the array of read queue entries. New entries in the
+ * read queue are enqueued at read_queue_last. To dequeue entries, a reader thread gets the
+ * lock and then claims the entry pointed to by read_queue_next_read and increments that
+ * value. After the read is completed, the reader thread calls release_read_queue_entry(),
+ * which increments read_queue_first until it points to a pending read, or is equal to
+ * read_queue_next_read. This means that if multiple reads are outstanding,
+ * read_queue_first might not advance until the last of the reads finishes.
+ */
+ u16 read_queue_first;
+ u16 read_queue_next_read;
+ u16 read_queue_last;
+
+ atomic64_t clock;
+};
+
+struct volume {
+ struct index_geometry *geometry;
+ struct dm_bufio_client *client;
+ u64 nonce;
+ size_t cache_size;
+
+ /* A single page worth of records, for sorting */
+ const struct uds_volume_record **record_pointers;
+ /* Sorter for sorting records within each page */
+ struct radix_sorter *radix_sorter;
+
+ struct sparse_cache *sparse_cache;
+ struct page_cache page_cache;
+ struct index_page_map *index_page_map;
+
+ struct mutex read_threads_mutex;
+ struct cond_var read_threads_cond;
+ struct cond_var read_threads_read_done_cond;
+ struct thread **reader_threads;
+ unsigned int read_thread_count;
+ bool read_threads_exiting;
+
+ enum index_lookup_mode lookup_mode;
+ unsigned int reserved_buffers;
+};
+
+int __must_check uds_make_volume(const struct uds_configuration *config,
+ struct index_layout *layout,
+ struct volume **new_volume);
+
+void uds_free_volume(struct volume *volume);
+
+int __must_check uds_replace_volume_storage(struct volume *volume,
+ struct index_layout *layout,
+ struct block_device *bdev);
+
+int __must_check uds_find_volume_chapter_boundaries(struct volume *volume,
+ u64 *lowest_vcn, u64 *highest_vcn,
+ bool *is_empty);
+
+int __must_check uds_search_volume_page_cache(struct volume *volume,
+ struct uds_request *request,
+ bool *found);
+
+int __must_check uds_search_volume_page_cache_for_rebuild(struct volume *volume,
+ const struct uds_record_name *name,
+ u64 virtual_chapter,
+ bool *found);
+
+int __must_check uds_search_cached_record_page(struct volume *volume,
+ struct uds_request *request, u32 chapter,
+ u16 record_page_number, bool *found);
+
+void uds_forget_chapter(struct volume *volume, u64 chapter);
+
+int __must_check uds_write_chapter(struct volume *volume,
+ struct open_chapter_index *chapter_index,
+ const struct uds_volume_record records[]);
+
+void uds_prefetch_volume_chapter(const struct volume *volume, u32 chapter);
+
+int __must_check uds_read_chapter_index_from_volume(const struct volume *volume,
+ u64 virtual_chapter,
+ struct dm_buffer *volume_buffers[],
+ struct delta_index_page index_pages[]);
+
+int __must_check uds_get_volume_record_page(struct volume *volume, u32 chapter,
+ u32 page_number, u8 **data_ptr);
+
+int __must_check uds_get_volume_index_page(struct volume *volume, u32 chapter,
+ u32 page_number,
+ struct delta_index_page **page_ptr);
+
+#endif /* UDS_VOLUME_H */
diff --git a/drivers/md/dm-vdo/int-map.c b/drivers/md/dm-vdo/int-map.c
new file mode 100644
index 000000000000..3aa438f84ea1
--- /dev/null
+++ b/drivers/md/dm-vdo/int-map.c
@@ -0,0 +1,707 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+/**
+ * DOC:
+ *
+ * Hash table implementation of a map from integers to pointers, implemented using the Hopscotch
+ * Hashing algorithm by Herlihy, Shavit, and Tzafrir (see
+ * http://en.wikipedia.org/wiki/Hopscotch_hashing). This implementation does not contain any of the
+ * locking/concurrency features of the algorithm, just the collision resolution scheme.
+ *
+ * Hopscotch Hashing is based on hashing with open addressing and linear probing. All the entries
+ * are stored in a fixed array of buckets, with no dynamic allocation for collisions. Unlike linear
+ * probing, all the entries that hash to a given bucket are stored within a fixed neighborhood
+ * starting at that bucket. Chaining is effectively represented as a bit vector relative to each
+ * bucket instead of as pointers or explicit offsets.
+ *
+ * When an empty bucket cannot be found within a given neighborhood, subsequent neighborhoods are
+ * searched, and one or more entries will "hop" into those neighborhoods. When this process works,
+ * an empty bucket will move into the desired neighborhood, allowing the entry to be added. When
+ * that process fails (typically when the buckets are around 90% full), the table must be resized
+ * and the all entries rehashed and added to the expanded table.
+ *
+ * Unlike linear probing, the number of buckets that must be searched in the worst case has a fixed
+ * upper bound (the size of the neighborhood). Those entries occupy a small number of memory cache
+ * lines, leading to improved use of the cache (fewer misses on both successful and unsuccessful
+ * searches). Hopscotch hashing outperforms linear probing at much higher load factors, so even
+ * with the increased memory burden for maintaining the hop vectors, less memory is needed to
+ * achieve that performance. Hopscotch is also immune to "contamination" from deleting entries
+ * since entries are genuinely removed instead of being replaced by a placeholder.
+ *
+ * The published description of the algorithm used a bit vector, but the paper alludes to an offset
+ * scheme which is used by this implementation. Since the entries in the neighborhood are within N
+ * entries of the hash bucket at the start of the neighborhood, a pair of small offset fields each
+ * log2(N) bits wide is all that's needed to maintain the hops as a linked list. In order to encode
+ * "no next hop" (i.e. NULL) as the natural initial value of zero, the offsets are biased by one
+ * (i.e. 0 => NULL, 1 => offset=0, 2 => offset=1, etc.) We can represent neighborhoods of up to 255
+ * entries with just 8+8=16 bits per entry. The hop list is sorted by hop offset so the first entry
+ * in the list is always the bucket closest to the start of the neighborhood.
+ *
+ * While individual accesses tend to be very fast, the table resize operations are very, very
+ * expensive. If an upper bound on the latency of adding an entry to the table is needed, we either
+ * need to ensure the table is pre-sized to be large enough so no resize is ever needed, or we'll
+ * need to develop an approach to incrementally resize the table.
+ */
+
+#include "int-map.h"
+
+#include <linux/minmax.h>
+
+#include "errors.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "numeric.h"
+#include "permassert.h"
+
+#define DEFAULT_CAPACITY 16 /* the number of neighborhoods in a new table */
+#define NEIGHBORHOOD 255 /* the number of buckets in each neighborhood */
+#define MAX_PROBES 1024 /* limit on the number of probes for a free bucket */
+#define NULL_HOP_OFFSET 0 /* the hop offset value terminating the hop list */
+#define DEFAULT_LOAD 75 /* a compromise between memory use and performance */
+
+/**
+ * struct bucket - hash bucket
+ *
+ * Buckets are packed together to reduce memory usage and improve cache efficiency. It would be
+ * tempting to encode the hop offsets separately and maintain alignment of key/value pairs, but
+ * it's crucial to keep the hop fields near the buckets that they use them so they'll tend to share
+ * cache lines.
+ */
+struct __packed bucket {
+ /**
+ * @first_hop: The biased offset of the first entry in the hop list of the neighborhood
+ * that hashes to this bucket.
+ */
+ u8 first_hop;
+ /** @next_hop: The biased offset of the next bucket in the hop list. */
+ u8 next_hop;
+ /** @key: The key stored in this bucket. */
+ u64 key;
+ /** @value: The value stored in this bucket (NULL if empty). */
+ void *value;
+};
+
+/**
+ * struct int_map - The concrete definition of the opaque int_map type.
+ *
+ * To avoid having to wrap the neighborhoods of the last entries back around to the start of the
+ * bucket array, we allocate a few more buckets at the end of the array instead, which is why
+ * capacity and bucket_count are different.
+ */
+struct int_map {
+ /** @size: The number of entries stored in the map. */
+ size_t size;
+ /** @capacity: The number of neighborhoods in the map. */
+ size_t capacity;
+ /* @bucket_count: The number of buckets in the bucket array. */
+ size_t bucket_count;
+ /** @buckets: The array of hash buckets. */
+ struct bucket *buckets;
+};
+
+/**
+ * mix() - The Google CityHash 16-byte hash mixing function.
+ * @input1: The first input value.
+ * @input2: The second input value.
+ *
+ * Return: A hash of the two inputs.
+ */
+static u64 mix(u64 input1, u64 input2)
+{
+ static const u64 CITY_MULTIPLIER = 0x9ddfea08eb382d69ULL;
+ u64 hash = (input1 ^ input2);
+
+ hash *= CITY_MULTIPLIER;
+ hash ^= (hash >> 47);
+ hash ^= input2;
+ hash *= CITY_MULTIPLIER;
+ hash ^= (hash >> 47);
+ hash *= CITY_MULTIPLIER;
+ return hash;
+}
+
+/**
+ * hash_key() - Calculate a 64-bit non-cryptographic hash value for the provided 64-bit integer
+ * key.
+ * @key: The mapping key.
+ *
+ * The implementation is based on Google's CityHash, only handling the specific case of an 8-byte
+ * input.
+ *
+ * Return: The hash of the mapping key.
+ */
+static u64 hash_key(u64 key)
+{
+ /*
+ * Aliasing restrictions forbid us from casting pointer types, so use a union to convert a
+ * single u64 to two u32 values.
+ */
+ union {
+ u64 u64;
+ u32 u32[2];
+ } pun = {.u64 = key};
+
+ return mix(sizeof(key) + (((u64) pun.u32[0]) << 3), pun.u32[1]);
+}
+
+/**
+ * allocate_buckets() - Initialize an int_map.
+ * @map: The map to initialize.
+ * @capacity: The initial capacity of the map.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int allocate_buckets(struct int_map *map, size_t capacity)
+{
+ map->size = 0;
+ map->capacity = capacity;
+
+ /*
+ * Allocate NEIGHBORHOOD - 1 extra buckets so the last bucket can have a full neighborhood
+ * without have to wrap back around to element zero.
+ */
+ map->bucket_count = capacity + (NEIGHBORHOOD - 1);
+ return vdo_allocate(map->bucket_count, struct bucket,
+ "struct int_map buckets", &map->buckets);
+}
+
+/**
+ * vdo_int_map_create() - Allocate and initialize an int_map.
+ * @initial_capacity: The number of entries the map should initially be capable of holding (zero
+ * tells the map to use its own small default).
+ * @map_ptr: Output, a pointer to hold the new int_map.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+int vdo_int_map_create(size_t initial_capacity, struct int_map **map_ptr)
+{
+ struct int_map *map;
+ int result;
+ size_t capacity;
+
+ result = vdo_allocate(1, struct int_map, "struct int_map", &map);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /* Use the default capacity if the caller did not specify one. */
+ capacity = (initial_capacity > 0) ? initial_capacity : DEFAULT_CAPACITY;
+
+ /*
+ * Scale up the capacity by the specified initial load factor. (i.e to hold 1000 entries at
+ * 80% load we need a capacity of 1250)
+ */
+ capacity = capacity * 100 / DEFAULT_LOAD;
+
+ result = allocate_buckets(map, capacity);
+ if (result != VDO_SUCCESS) {
+ vdo_int_map_free(vdo_forget(map));
+ return result;
+ }
+
+ *map_ptr = map;
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_int_map_free() - Free an int_map.
+ * @map: The int_map to free.
+ *
+ * NOTE: The map does not own the pointer values stored in the map and they are not freed by this
+ * call.
+ */
+void vdo_int_map_free(struct int_map *map)
+{
+ if (map == NULL)
+ return;
+
+ vdo_free(vdo_forget(map->buckets));
+ vdo_free(vdo_forget(map));
+}
+
+/**
+ * vdo_int_map_size() - Get the number of entries stored in an int_map.
+ * @map: The int_map to query.
+ *
+ * Return: The number of entries in the map.
+ */
+size_t vdo_int_map_size(const struct int_map *map)
+{
+ return map->size;
+}
+
+/**
+ * dereference_hop() - Convert a biased hop offset within a neighborhood to a pointer to the bucket
+ * it references.
+ * @neighborhood: The first bucket in the neighborhood.
+ * @hop_offset: The biased hop offset to the desired bucket.
+ *
+ * Return: NULL if hop_offset is zero, otherwise a pointer to the bucket in the neighborhood at
+ * hop_offset - 1.
+ */
+static struct bucket *dereference_hop(struct bucket *neighborhood, unsigned int hop_offset)
+{
+ BUILD_BUG_ON(NULL_HOP_OFFSET != 0);
+ if (hop_offset == NULL_HOP_OFFSET)
+ return NULL;
+
+ return &neighborhood[hop_offset - 1];
+}
+
+/**
+ * insert_in_hop_list() - Add a bucket into the hop list for the neighborhood.
+ * @neighborhood: The first bucket in the neighborhood.
+ * @new_bucket: The bucket to add to the hop list.
+ *
+ * The bucket is inserted it into the list so the hop list remains sorted by hop offset.
+ */
+static void insert_in_hop_list(struct bucket *neighborhood, struct bucket *new_bucket)
+{
+ /* Zero indicates a NULL hop offset, so bias the hop offset by one. */
+ int hop_offset = 1 + (new_bucket - neighborhood);
+
+ /* Handle the special case of adding a bucket at the start of the list. */
+ int next_hop = neighborhood->first_hop;
+
+ if ((next_hop == NULL_HOP_OFFSET) || (next_hop > hop_offset)) {
+ new_bucket->next_hop = next_hop;
+ neighborhood->first_hop = hop_offset;
+ return;
+ }
+
+ /* Search the hop list for the insertion point that maintains the sort order. */
+ for (;;) {
+ struct bucket *bucket = dereference_hop(neighborhood, next_hop);
+
+ next_hop = bucket->next_hop;
+
+ if ((next_hop == NULL_HOP_OFFSET) || (next_hop > hop_offset)) {
+ new_bucket->next_hop = next_hop;
+ bucket->next_hop = hop_offset;
+ return;
+ }
+ }
+}
+
+/**
+ * select_bucket() - Select and return the hash bucket for a given search key.
+ * @map: The map to search.
+ * @key: The mapping key.
+ */
+static struct bucket *select_bucket(const struct int_map *map, u64 key)
+{
+ /*
+ * Calculate a good hash value for the provided key. We want exactly 32 bits, so mask the
+ * result.
+ */
+ u64 hash = hash_key(key) & 0xFFFFFFFF;
+
+ /*
+ * Scale the 32-bit hash to a bucket index by treating it as a binary fraction and
+ * multiplying that by the capacity. If the hash is uniformly distributed over [0 ..
+ * 2^32-1], then (hash * capacity / 2^32) should be uniformly distributed over [0 ..
+ * capacity-1]. The multiply and shift is much faster than a divide (modulus) on X86 CPUs.
+ */
+ return &map->buckets[(hash * map->capacity) >> 32];
+}
+
+/**
+ * search_hop_list() - Search the hop list associated with given hash bucket for a given search
+ * key.
+ * @map: The map being searched.
+ * @bucket: The map bucket to search for the key.
+ * @key: The mapping key.
+ * @previous_ptr: Output. if not NULL, a pointer in which to store the bucket in the list preceding
+ * the one that had the matching key
+ *
+ * If the key is found, returns a pointer to the entry (bucket or collision), otherwise returns
+ * NULL.
+ *
+ * Return: An entry that matches the key, or NULL if not found.
+ */
+static struct bucket *search_hop_list(struct int_map *map __always_unused,
+ struct bucket *bucket,
+ u64 key,
+ struct bucket **previous_ptr)
+{
+ struct bucket *previous = NULL;
+ unsigned int next_hop = bucket->first_hop;
+
+ while (next_hop != NULL_HOP_OFFSET) {
+ /*
+ * Check the neighboring bucket indexed by the offset for the
+ * desired key.
+ */
+ struct bucket *entry = dereference_hop(bucket, next_hop);
+
+ if ((key == entry->key) && (entry->value != NULL)) {
+ if (previous_ptr != NULL)
+ *previous_ptr = previous;
+ return entry;
+ }
+ next_hop = entry->next_hop;
+ previous = entry;
+ }
+
+ return NULL;
+}
+
+/**
+ * vdo_int_map_get() - Retrieve the value associated with a given key from the int_map.
+ * @map: The int_map to query.
+ * @key: The key to look up.
+ *
+ * Return: The value associated with the given key, or NULL if the key is not mapped to any value.
+ */
+void *vdo_int_map_get(struct int_map *map, u64 key)
+{
+ struct bucket *match = search_hop_list(map, select_bucket(map, key), key, NULL);
+
+ return ((match != NULL) ? match->value : NULL);
+}
+
+/**
+ * resize_buckets() - Increase the number of hash buckets.
+ * @map: The map to resize.
+ *
+ * Resizes and rehashes all the existing entries, storing them in the new buckets.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int resize_buckets(struct int_map *map)
+{
+ int result;
+ size_t i;
+
+ /* Copy the top-level map data to the stack. */
+ struct int_map old_map = *map;
+
+ /* Re-initialize the map to be empty and 50% larger. */
+ size_t new_capacity = map->capacity / 2 * 3;
+
+ vdo_log_info("%s: attempting resize from %zu to %zu, current size=%zu",
+ __func__, map->capacity, new_capacity, map->size);
+ result = allocate_buckets(map, new_capacity);
+ if (result != VDO_SUCCESS) {
+ *map = old_map;
+ return result;
+ }
+
+ /* Populate the new hash table from the entries in the old bucket array. */
+ for (i = 0; i < old_map.bucket_count; i++) {
+ struct bucket *entry = &old_map.buckets[i];
+
+ if (entry->value == NULL)
+ continue;
+
+ result = vdo_int_map_put(map, entry->key, entry->value, true, NULL);
+ if (result != VDO_SUCCESS) {
+ /* Destroy the new partial map and restore the map from the stack. */
+ vdo_free(vdo_forget(map->buckets));
+ *map = old_map;
+ return result;
+ }
+ }
+
+ /* Destroy the old bucket array. */
+ vdo_free(vdo_forget(old_map.buckets));
+ return VDO_SUCCESS;
+}
+
+/**
+ * find_empty_bucket() - Probe the bucket array starting at the given bucket for the next empty
+ * bucket, returning a pointer to it.
+ * @map: The map containing the buckets to search.
+ * @bucket: The bucket at which to start probing.
+ * @max_probes: The maximum number of buckets to search.
+ *
+ * NULL will be returned if the search reaches the end of the bucket array or if the number of
+ * linear probes exceeds a specified limit.
+ *
+ * Return: The next empty bucket, or NULL if the search failed.
+ */
+static struct bucket *
+find_empty_bucket(struct int_map *map, struct bucket *bucket, unsigned int max_probes)
+{
+ /*
+ * Limit the search to either the nearer of the end of the bucket array or a fixed distance
+ * beyond the initial bucket.
+ */
+ ptrdiff_t remaining = &map->buckets[map->bucket_count] - bucket;
+ struct bucket *sentinel = &bucket[min_t(ptrdiff_t, remaining, max_probes)];
+ struct bucket *entry;
+
+ for (entry = bucket; entry < sentinel; entry++) {
+ if (entry->value == NULL)
+ return entry;
+ }
+
+ return NULL;
+}
+
+/**
+ * move_empty_bucket() - Move an empty bucket closer to the start of the bucket array.
+ * @map: The map containing the bucket.
+ * @hole: The empty bucket to fill with an entry that precedes it in one of its enclosing
+ * neighborhoods.
+ *
+ * This searches the neighborhoods that contain the empty bucket for a non-empty bucket closer to
+ * the start of the array. If such a bucket is found, this swaps the two buckets by moving the
+ * entry to the empty bucket.
+ *
+ * Return: The bucket that was vacated by moving its entry to the provided hole, or NULL if no
+ * entry could be moved.
+ */
+static struct bucket *move_empty_bucket(struct int_map *map __always_unused,
+ struct bucket *hole)
+{
+ /*
+ * Examine every neighborhood that the empty bucket is part of, starting with the one in
+ * which it is the last bucket. No boundary check is needed for the negative array
+ * arithmetic since this function is only called when hole is at least NEIGHBORHOOD cells
+ * deeper into the array than a valid bucket.
+ */
+ struct bucket *bucket;
+
+ for (bucket = &hole[1 - NEIGHBORHOOD]; bucket < hole; bucket++) {
+ /*
+ * Find the entry that is nearest to the bucket, which means it will be nearest to
+ * the hash bucket whose neighborhood is full.
+ */
+ struct bucket *new_hole = dereference_hop(bucket, bucket->first_hop);
+
+ if (new_hole == NULL) {
+ /*
+ * There are no buckets in this neighborhood that are in use by this one
+ * (they must all be owned by overlapping neighborhoods).
+ */
+ continue;
+ }
+
+ /*
+ * Skip this bucket if its first entry is actually further away than the hole that
+ * we're already trying to fill.
+ */
+ if (hole < new_hole)
+ continue;
+
+ /*
+ * We've found an entry in this neighborhood that we can "hop" further away, moving
+ * the hole closer to the hash bucket, if not all the way into its neighborhood.
+ */
+
+ /*
+ * The entry that will be the new hole is the first bucket in the list, so setting
+ * first_hop is all that's needed remove it from the list.
+ */
+ bucket->first_hop = new_hole->next_hop;
+ new_hole->next_hop = NULL_HOP_OFFSET;
+
+ /* Move the entry into the original hole. */
+ hole->key = new_hole->key;
+ hole->value = new_hole->value;
+ new_hole->value = NULL;
+
+ /* Insert the filled hole into the hop list for the neighborhood. */
+ insert_in_hop_list(bucket, hole);
+ return new_hole;
+ }
+
+ /* We couldn't find an entry to relocate to the hole. */
+ return NULL;
+}
+
+/**
+ * update_mapping() - Find and update any existing mapping for a given key, returning the value
+ * associated with the key in the provided pointer.
+ * @map: The int_map to attempt to modify.
+ * @neighborhood: The first bucket in the neighborhood that would contain the search key
+ * @key: The key with which to associate the new value.
+ * @new_value: The value to be associated with the key.
+ * @update: Whether to overwrite an existing value.
+ * @old_value_ptr: a pointer in which to store the old value (unmodified if no mapping was found)
+ *
+ * Return: true if the map contains a mapping for the key, false if it does not.
+ */
+static bool update_mapping(struct int_map *map, struct bucket *neighborhood,
+ u64 key, void *new_value, bool update, void **old_value_ptr)
+{
+ struct bucket *bucket = search_hop_list(map, neighborhood, key, NULL);
+
+ if (bucket == NULL) {
+ /* There is no bucket containing the key in the neighborhood. */
+ return false;
+ }
+
+ /*
+ * Return the value of the current mapping (if desired) and update the mapping with the new
+ * value (if desired).
+ */
+ if (old_value_ptr != NULL)
+ *old_value_ptr = bucket->value;
+ if (update)
+ bucket->value = new_value;
+ return true;
+}
+
+/**
+ * find_or_make_vacancy() - Find an empty bucket.
+ * @map: The int_map to search or modify.
+ * @neighborhood: The first bucket in the neighborhood in which an empty bucket is needed for a new
+ * mapping.
+ *
+ * Find an empty bucket in a specified neighborhood for a new mapping or attempt to re-arrange
+ * mappings so there is such a bucket. This operation may fail (returning NULL) if an empty bucket
+ * is not available or could not be relocated to the neighborhood.
+ *
+ * Return: a pointer to an empty bucket in the desired neighborhood, or NULL if a vacancy could not
+ * be found or arranged.
+ */
+static struct bucket *find_or_make_vacancy(struct int_map *map,
+ struct bucket *neighborhood)
+{
+ /* Probe within and beyond the neighborhood for the first empty bucket. */
+ struct bucket *hole = find_empty_bucket(map, neighborhood, MAX_PROBES);
+
+ /*
+ * Keep trying until the empty bucket is in the bucket's neighborhood or we are unable to
+ * move it any closer by swapping it with a filled bucket.
+ */
+ while (hole != NULL) {
+ int distance = hole - neighborhood;
+
+ if (distance < NEIGHBORHOOD) {
+ /*
+ * We've found or relocated an empty bucket close enough to the initial
+ * hash bucket to be referenced by its hop vector.
+ */
+ return hole;
+ }
+
+ /*
+ * The nearest empty bucket isn't within the neighborhood that must contain the new
+ * entry, so try to swap it with bucket that is closer.
+ */
+ hole = move_empty_bucket(map, hole);
+ }
+
+ return NULL;
+}
+
+/**
+ * vdo_int_map_put() - Try to associate a value with an integer.
+ * @map: The int_map to attempt to modify.
+ * @key: The key with which to associate the new value.
+ * @new_value: The value to be associated with the key.
+ * @update: Whether to overwrite an existing value.
+ * @old_value_ptr: A pointer in which to store either the old value (if the key was already mapped)
+ * or NULL if the map did not contain the key; NULL may be provided if the caller
+ * does not need to know the old value
+ *
+ * Try to associate a value (a pointer) with an integer in an int_map. If the map already contains
+ * a mapping for the provided key, the old value is only replaced with the specified value if
+ * update is true. In either case the old value is returned. If the map does not already contain a
+ * value for the specified key, the new value is added regardless of the value of update.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+int vdo_int_map_put(struct int_map *map, u64 key, void *new_value, bool update,
+ void **old_value_ptr)
+{
+ struct bucket *neighborhood, *bucket;
+
+ if (unlikely(new_value == NULL))
+ return -EINVAL;
+
+ /*
+ * Select the bucket at the start of the neighborhood that must contain any entry for the
+ * provided key.
+ */
+ neighborhood = select_bucket(map, key);
+
+ /*
+ * Check whether the neighborhood already contains an entry for the key, in which case we
+ * optionally update it, returning the old value.
+ */
+ if (update_mapping(map, neighborhood, key, new_value, update, old_value_ptr))
+ return VDO_SUCCESS;
+
+ /*
+ * Find an empty bucket in the desired neighborhood for the new entry or re-arrange entries
+ * in the map so there is such a bucket. This operation will usually succeed; the loop body
+ * will only be executed on the rare occasions that we have to resize the map.
+ */
+ while ((bucket = find_or_make_vacancy(map, neighborhood)) == NULL) {
+ int result;
+
+ /*
+ * There is no empty bucket in which to put the new entry in the current map, so
+ * we're forced to allocate a new bucket array with a larger capacity, re-hash all
+ * the entries into those buckets, and try again (a very expensive operation for
+ * large maps).
+ */
+ result = resize_buckets(map);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /*
+ * Resizing the map invalidates all pointers to buckets, so recalculate the
+ * neighborhood pointer.
+ */
+ neighborhood = select_bucket(map, key);
+ }
+
+ /* Put the new entry in the empty bucket, adding it to the neighborhood. */
+ bucket->key = key;
+ bucket->value = new_value;
+ insert_in_hop_list(neighborhood, bucket);
+ map->size += 1;
+
+ /* There was no existing entry, so there was no old value to be returned. */
+ if (old_value_ptr != NULL)
+ *old_value_ptr = NULL;
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_int_map_remove() - Remove the mapping for a given key from the int_map.
+ * @map: The int_map from which to remove the mapping.
+ * @key: The key whose mapping is to be removed.
+ *
+ * Return: the value that was associated with the key, or NULL if it was not mapped.
+ */
+void *vdo_int_map_remove(struct int_map *map, u64 key)
+{
+ void *value;
+
+ /* Select the bucket to search and search it for an existing entry. */
+ struct bucket *bucket = select_bucket(map, key);
+ struct bucket *previous;
+ struct bucket *victim = search_hop_list(map, bucket, key, &previous);
+
+ if (victim == NULL) {
+ /* There is no matching entry to remove. */
+ return NULL;
+ }
+
+ /*
+ * We found an entry to remove. Save the mapped value to return later and empty the bucket.
+ */
+ map->size -= 1;
+ value = victim->value;
+ victim->value = NULL;
+ victim->key = 0;
+
+ /* The victim bucket is now empty, but it still needs to be spliced out of the hop list. */
+ if (previous == NULL) {
+ /* The victim is the head of the list, so swing first_hop. */
+ bucket->first_hop = victim->next_hop;
+ } else {
+ previous->next_hop = victim->next_hop;
+ }
+
+ victim->next_hop = NULL_HOP_OFFSET;
+ return value;
+}
diff --git a/drivers/md/dm-vdo/int-map.h b/drivers/md/dm-vdo/int-map.h
new file mode 100644
index 000000000000..1858ad799887
--- /dev/null
+++ b/drivers/md/dm-vdo/int-map.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_INT_MAP_H
+#define VDO_INT_MAP_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/**
+ * DOC: int_map
+ *
+ * An int_map associates pointers (void *) with integer keys (u64). NULL pointer values are
+ * not supported.
+ *
+ * The map is implemented as hash table, which should provide constant-time insert, query, and
+ * remove operations, although the insert may occasionally grow the table, which is linear in the
+ * number of entries in the map. The table will grow as needed to hold new entries, but will not
+ * shrink as entries are removed.
+ */
+
+struct int_map;
+
+int __must_check vdo_int_map_create(size_t initial_capacity, struct int_map **map_ptr);
+
+void vdo_int_map_free(struct int_map *map);
+
+size_t vdo_int_map_size(const struct int_map *map);
+
+void *vdo_int_map_get(struct int_map *map, u64 key);
+
+int __must_check vdo_int_map_put(struct int_map *map, u64 key, void *new_value,
+ bool update, void **old_value_ptr);
+
+void *vdo_int_map_remove(struct int_map *map, u64 key);
+
+#endif /* VDO_INT_MAP_H */
diff --git a/drivers/md/dm-vdo/io-submitter.c b/drivers/md/dm-vdo/io-submitter.c
new file mode 100644
index 000000000000..9a3716bb3c05
--- /dev/null
+++ b/drivers/md/dm-vdo/io-submitter.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "io-submitter.h"
+
+#include <linux/bio.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "data-vio.h"
+#include "logger.h"
+#include "types.h"
+#include "vdo.h"
+#include "vio.h"
+
+/*
+ * Submission of bio operations to the underlying storage device will go through a separate work
+ * queue thread (or more than one) to prevent blocking in other threads if the storage device has a
+ * full queue. The plug structure allows that thread to do better batching of requests to make the
+ * I/O more efficient.
+ *
+ * When multiple worker threads are used, a thread is chosen for a I/O operation submission based
+ * on the PBN, so a given PBN will consistently wind up on the same thread. Flush operations are
+ * assigned round-robin.
+ *
+ * The map (protected by the mutex) collects pending I/O operations so that the worker thread can
+ * reorder them to try to encourage I/O request merging in the request queue underneath.
+ */
+struct bio_queue_data {
+ struct vdo_work_queue *queue;
+ struct blk_plug plug;
+ struct int_map *map;
+ struct mutex lock;
+ unsigned int queue_number;
+};
+
+struct io_submitter {
+ unsigned int num_bio_queues_used;
+ unsigned int bio_queue_rotation_interval;
+ struct bio_queue_data bio_queue_data[];
+};
+
+static void start_bio_queue(void *ptr)
+{
+ struct bio_queue_data *bio_queue_data = ptr;
+
+ blk_start_plug(&bio_queue_data->plug);
+}
+
+static void finish_bio_queue(void *ptr)
+{
+ struct bio_queue_data *bio_queue_data = ptr;
+
+ blk_finish_plug(&bio_queue_data->plug);
+}
+
+static const struct vdo_work_queue_type bio_queue_type = {
+ .start = start_bio_queue,
+ .finish = finish_bio_queue,
+ .max_priority = BIO_Q_MAX_PRIORITY,
+ .default_priority = BIO_Q_DATA_PRIORITY,
+};
+
+/**
+ * count_all_bios() - Determine which bio counter to use.
+ * @vio: The vio associated with the bio.
+ * @bio: The bio to count.
+ */
+static void count_all_bios(struct vio *vio, struct bio *bio)
+{
+ struct atomic_statistics *stats = &vio->completion.vdo->stats;
+
+ if (is_data_vio(vio)) {
+ vdo_count_bios(&stats->bios_out, bio);
+ return;
+ }
+
+ vdo_count_bios(&stats->bios_meta, bio);
+ if (vio->type == VIO_TYPE_RECOVERY_JOURNAL)
+ vdo_count_bios(&stats->bios_journal, bio);
+ else if (vio->type == VIO_TYPE_BLOCK_MAP)
+ vdo_count_bios(&stats->bios_page_cache, bio);
+}
+
+/**
+ * assert_in_bio_zone() - Assert that a vio is in the correct bio zone and not in interrupt
+ * context.
+ * @vio: The vio to check.
+ */
+static void assert_in_bio_zone(struct vio *vio)
+{
+ VDO_ASSERT_LOG_ONLY(!in_interrupt(), "not in interrupt context");
+ assert_vio_in_bio_zone(vio);
+}
+
+/**
+ * send_bio_to_device() - Update stats and tracing info, then submit the supplied bio to the OS for
+ * processing.
+ * @vio: The vio associated with the bio.
+ * @bio: The bio to submit to the OS.
+ */
+static void send_bio_to_device(struct vio *vio, struct bio *bio)
+{
+ struct vdo *vdo = vio->completion.vdo;
+
+ assert_in_bio_zone(vio);
+ atomic64_inc(&vdo->stats.bios_submitted);
+ count_all_bios(vio, bio);
+ bio_set_dev(bio, vdo_get_backing_device(vdo));
+ submit_bio_noacct(bio);
+}
+
+/**
+ * vdo_submit_vio() - Submits a vio's bio to the underlying block device. May block if the device
+ * is busy. This callback should be used by vios which did not attempt to merge.
+ */
+void vdo_submit_vio(struct vdo_completion *completion)
+{
+ struct vio *vio = as_vio(completion);
+
+ send_bio_to_device(vio, vio->bio);
+}
+
+/**
+ * get_bio_list() - Extract the list of bios to submit from a vio.
+ * @vio: The vio submitting I/O.
+ *
+ * The list will always contain at least one entry (the bio for the vio on which it is called), but
+ * other bios may have been merged with it as well.
+ *
+ * Return: bio The head of the bio list to submit.
+ */
+static struct bio *get_bio_list(struct vio *vio)
+{
+ struct bio *bio;
+ struct io_submitter *submitter = vio->completion.vdo->io_submitter;
+ struct bio_queue_data *bio_queue_data = &(submitter->bio_queue_data[vio->bio_zone]);
+
+ assert_in_bio_zone(vio);
+
+ mutex_lock(&bio_queue_data->lock);
+ vdo_int_map_remove(bio_queue_data->map,
+ vio->bios_merged.head->bi_iter.bi_sector);
+ vdo_int_map_remove(bio_queue_data->map,
+ vio->bios_merged.tail->bi_iter.bi_sector);
+ bio = vio->bios_merged.head;
+ bio_list_init(&vio->bios_merged);
+ mutex_unlock(&bio_queue_data->lock);
+
+ return bio;
+}
+
+/**
+ * submit_data_vio() - Submit a data_vio's bio to the storage below along with
+ * any bios that have been merged with it.
+ *
+ * Context: This call may block and so should only be called from a bio thread.
+ */
+static void submit_data_vio(struct vdo_completion *completion)
+{
+ struct bio *bio, *next;
+ struct vio *vio = as_vio(completion);
+
+ assert_in_bio_zone(vio);
+ for (bio = get_bio_list(vio); bio != NULL; bio = next) {
+ next = bio->bi_next;
+ bio->bi_next = NULL;
+ send_bio_to_device((struct vio *) bio->bi_private, bio);
+ }
+}
+
+/**
+ * get_mergeable_locked() - Attempt to find an already queued bio that the current bio can be
+ * merged with.
+ * @map: The bio map to use for merging.
+ * @vio: The vio we want to merge.
+ * @back_merge: Set to true for a back merge, false for a front merge.
+ *
+ * There are two types of merging possible, forward and backward, which are distinguished by a flag
+ * that uses kernel elevator terminology.
+ *
+ * Return: the vio to merge to, NULL if no merging is possible.
+ */
+static struct vio *get_mergeable_locked(struct int_map *map, struct vio *vio,
+ bool back_merge)
+{
+ struct bio *bio = vio->bio;
+ sector_t merge_sector = bio->bi_iter.bi_sector;
+ struct vio *vio_merge;
+
+ if (back_merge)
+ merge_sector -= VDO_SECTORS_PER_BLOCK;
+ else
+ merge_sector += VDO_SECTORS_PER_BLOCK;
+
+ vio_merge = vdo_int_map_get(map, merge_sector);
+
+ if (vio_merge == NULL)
+ return NULL;
+
+ if (vio->completion.priority != vio_merge->completion.priority)
+ return NULL;
+
+ if (bio_data_dir(bio) != bio_data_dir(vio_merge->bio))
+ return NULL;
+
+ if (bio_list_empty(&vio_merge->bios_merged))
+ return NULL;
+
+ if (back_merge) {
+ return (vio_merge->bios_merged.tail->bi_iter.bi_sector == merge_sector ?
+ vio_merge : NULL);
+ }
+
+ return (vio_merge->bios_merged.head->bi_iter.bi_sector == merge_sector ?
+ vio_merge : NULL);
+}
+
+static int map_merged_vio(struct int_map *bio_map, struct vio *vio)
+{
+ int result;
+ sector_t bio_sector;
+
+ bio_sector = vio->bios_merged.head->bi_iter.bi_sector;
+ result = vdo_int_map_put(bio_map, bio_sector, vio, true, NULL);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ bio_sector = vio->bios_merged.tail->bi_iter.bi_sector;
+ return vdo_int_map_put(bio_map, bio_sector, vio, true, NULL);
+}
+
+static int merge_to_prev_tail(struct int_map *bio_map, struct vio *vio,
+ struct vio *prev_vio)
+{
+ vdo_int_map_remove(bio_map, prev_vio->bios_merged.tail->bi_iter.bi_sector);
+ bio_list_merge(&prev_vio->bios_merged, &vio->bios_merged);
+ return map_merged_vio(bio_map, prev_vio);
+}
+
+static int merge_to_next_head(struct int_map *bio_map, struct vio *vio,
+ struct vio *next_vio)
+{
+ /*
+ * Handle "next merge" and "gap fill" cases the same way so as to reorder bios in a way
+ * that's compatible with using funnel queues in work queues. This avoids removing an
+ * existing completion.
+ */
+ vdo_int_map_remove(bio_map, next_vio->bios_merged.head->bi_iter.bi_sector);
+ bio_list_merge_head(&next_vio->bios_merged, &vio->bios_merged);
+ return map_merged_vio(bio_map, next_vio);
+}
+
+/**
+ * try_bio_map_merge() - Attempt to merge a vio's bio with other pending I/Os.
+ * @vio: The vio to merge.
+ *
+ * Currently this is only used for data_vios, but is broken out for future use with metadata vios.
+ *
+ * Return: whether or not the vio was merged.
+ */
+static bool try_bio_map_merge(struct vio *vio)
+{
+ int result;
+ bool merged = true;
+ struct bio *bio = vio->bio;
+ struct vio *prev_vio, *next_vio;
+ struct vdo *vdo = vio->completion.vdo;
+ struct bio_queue_data *bio_queue_data =
+ &vdo->io_submitter->bio_queue_data[vio->bio_zone];
+
+ bio->bi_next = NULL;
+ bio_list_init(&vio->bios_merged);
+ bio_list_add(&vio->bios_merged, bio);
+
+ mutex_lock(&bio_queue_data->lock);
+ prev_vio = get_mergeable_locked(bio_queue_data->map, vio, true);
+ next_vio = get_mergeable_locked(bio_queue_data->map, vio, false);
+ if (prev_vio == next_vio)
+ next_vio = NULL;
+
+ if ((prev_vio == NULL) && (next_vio == NULL)) {
+ /* no merge. just add to bio_queue */
+ merged = false;
+ result = vdo_int_map_put(bio_queue_data->map,
+ bio->bi_iter.bi_sector,
+ vio, true, NULL);
+ } else if (next_vio == NULL) {
+ /* Only prev. merge to prev's tail */
+ result = merge_to_prev_tail(bio_queue_data->map, vio, prev_vio);
+ } else {
+ /* Only next. merge to next's head */
+ result = merge_to_next_head(bio_queue_data->map, vio, next_vio);
+ }
+ mutex_unlock(&bio_queue_data->lock);
+
+ /* We don't care about failure of int_map_put in this case. */
+ VDO_ASSERT_LOG_ONLY(result == VDO_SUCCESS, "bio map insertion succeeds");
+ return merged;
+}
+
+/**
+ * vdo_submit_data_vio() - Submit I/O for a data_vio.
+ * @data_vio: the data_vio for which to issue I/O.
+ *
+ * If possible, this I/O will be merged other pending I/Os. Otherwise, the data_vio will be sent to
+ * the appropriate bio zone directly.
+ */
+void vdo_submit_data_vio(struct data_vio *data_vio)
+{
+ if (try_bio_map_merge(&data_vio->vio))
+ return;
+
+ launch_data_vio_bio_zone_callback(data_vio, submit_data_vio);
+}
+
+/**
+ * __submit_metadata_vio() - Submit I/O for a metadata vio.
+ * @vio: the vio for which to issue I/O
+ * @physical: the physical block number to read or write
+ * @callback: the bio endio function which will be called after the I/O completes
+ * @error_handler: the handler for submission or I/O errors (may be NULL)
+ * @operation: the type of I/O to perform
+ * @data: the buffer to read or write (may be NULL)
+ *
+ * The vio is enqueued on a vdo bio queue so that bio submission (which may block) does not block
+ * other vdo threads.
+ *
+ * That the error handler will run on the correct thread is only true so long as the thread calling
+ * this function, and the thread set in the endio callback are the same, as well as the fact that
+ * no error can occur on the bio queue. Currently this is true for all callers, but additional care
+ * will be needed if this ever changes.
+ */
+void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical,
+ bio_end_io_t callback, vdo_action_fn error_handler,
+ blk_opf_t operation, char *data)
+{
+ int result;
+ struct vdo_completion *completion = &vio->completion;
+ const struct admin_state_code *code = vdo_get_admin_state(completion->vdo);
+
+
+ VDO_ASSERT_LOG_ONLY(!code->quiescent, "I/O not allowed in state %s", code->name);
+ VDO_ASSERT_LOG_ONLY(vio->bio->bi_next == NULL, "metadata bio has no next bio");
+
+ vdo_reset_completion(completion);
+ completion->error_handler = error_handler;
+ result = vio_reset_bio(vio, data, callback, operation | REQ_META, physical);
+ if (result != VDO_SUCCESS) {
+ continue_vio(vio, result);
+ return;
+ }
+
+ vdo_set_completion_callback(completion, vdo_submit_vio,
+ get_vio_bio_zone_thread_id(vio));
+ vdo_launch_completion_with_priority(completion, get_metadata_priority(vio));
+}
+
+/**
+ * vdo_make_io_submitter() - Create an io_submitter structure.
+ * @thread_count: Number of bio-submission threads to set up.
+ * @rotation_interval: Interval to use when rotating between bio-submission threads when enqueuing
+ * completions.
+ * @max_requests_active: Number of bios for merge tracking.
+ * @vdo: The vdo which will use this submitter.
+ * @io_submitter: pointer to the new data structure.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_make_io_submitter(unsigned int thread_count, unsigned int rotation_interval,
+ unsigned int max_requests_active, struct vdo *vdo,
+ struct io_submitter **io_submitter_ptr)
+{
+ unsigned int i;
+ struct io_submitter *io_submitter;
+ int result;
+
+ result = vdo_allocate_extended(struct io_submitter, thread_count,
+ struct bio_queue_data, "bio submission data",
+ &io_submitter);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ io_submitter->bio_queue_rotation_interval = rotation_interval;
+
+ /* Setup for each bio-submission work queue */
+ for (i = 0; i < thread_count; i++) {
+ struct bio_queue_data *bio_queue_data = &io_submitter->bio_queue_data[i];
+
+ mutex_init(&bio_queue_data->lock);
+ /*
+ * One I/O operation per request, but both first & last sector numbers.
+ *
+ * If requests are assigned to threads round-robin, they should be distributed
+ * quite evenly. But if they're assigned based on PBN, things can sometimes be very
+ * uneven. So for now, we'll assume that all requests *may* wind up on one thread,
+ * and thus all in the same map.
+ */
+ result = vdo_int_map_create(max_requests_active * 2,
+ &bio_queue_data->map);
+ if (result != VDO_SUCCESS) {
+ /*
+ * Clean up the partially initialized bio-queue entirely and indicate that
+ * initialization failed.
+ */
+ vdo_log_error("bio map initialization failed %d", result);
+ vdo_cleanup_io_submitter(io_submitter);
+ vdo_free_io_submitter(io_submitter);
+ return result;
+ }
+
+ bio_queue_data->queue_number = i;
+ result = vdo_make_thread(vdo, vdo->thread_config.bio_threads[i],
+ &bio_queue_type, 1, (void **) &bio_queue_data);
+ if (result != VDO_SUCCESS) {
+ /*
+ * Clean up the partially initialized bio-queue entirely and indicate that
+ * initialization failed.
+ */
+ vdo_int_map_free(vdo_forget(bio_queue_data->map));
+ vdo_log_error("bio queue initialization failed %d", result);
+ vdo_cleanup_io_submitter(io_submitter);
+ vdo_free_io_submitter(io_submitter);
+ return result;
+ }
+
+ bio_queue_data->queue = vdo->threads[vdo->thread_config.bio_threads[i]].queue;
+ io_submitter->num_bio_queues_used++;
+ }
+
+ *io_submitter_ptr = io_submitter;
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_cleanup_io_submitter() - Tear down the io_submitter fields as needed for a physical layer.
+ * @io_submitter: The I/O submitter data to tear down (may be NULL).
+ */
+void vdo_cleanup_io_submitter(struct io_submitter *io_submitter)
+{
+ int i;
+
+ if (io_submitter == NULL)
+ return;
+
+ for (i = io_submitter->num_bio_queues_used - 1; i >= 0; i--)
+ vdo_finish_work_queue(io_submitter->bio_queue_data[i].queue);
+}
+
+/**
+ * vdo_free_io_submitter() - Free the io_submitter fields and structure as needed.
+ * @io_submitter: The I/O submitter data to destroy.
+ *
+ * This must be called after vdo_cleanup_io_submitter(). It is used to release resources late in
+ * the shutdown process to avoid or reduce the chance of race conditions.
+ */
+void vdo_free_io_submitter(struct io_submitter *io_submitter)
+{
+ int i;
+
+ if (io_submitter == NULL)
+ return;
+
+ for (i = io_submitter->num_bio_queues_used - 1; i >= 0; i--) {
+ io_submitter->num_bio_queues_used--;
+ /* vdo_destroy() will free the work queue, so just give up our reference to it. */
+ vdo_forget(io_submitter->bio_queue_data[i].queue);
+ vdo_int_map_free(vdo_forget(io_submitter->bio_queue_data[i].map));
+ }
+ vdo_free(io_submitter);
+}
diff --git a/drivers/md/dm-vdo/io-submitter.h b/drivers/md/dm-vdo/io-submitter.h
new file mode 100644
index 000000000000..80748699496f
--- /dev/null
+++ b/drivers/md/dm-vdo/io-submitter.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_IO_SUBMITTER_H
+#define VDO_IO_SUBMITTER_H
+
+#include <linux/bio.h>
+
+#include "types.h"
+
+struct io_submitter;
+
+int vdo_make_io_submitter(unsigned int thread_count, unsigned int rotation_interval,
+ unsigned int max_requests_active, struct vdo *vdo,
+ struct io_submitter **io_submitter);
+
+void vdo_cleanup_io_submitter(struct io_submitter *io_submitter);
+
+void vdo_free_io_submitter(struct io_submitter *io_submitter);
+
+void vdo_submit_vio(struct vdo_completion *completion);
+
+void vdo_submit_data_vio(struct data_vio *data_vio);
+
+void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical,
+ bio_end_io_t callback, vdo_action_fn error_handler,
+ blk_opf_t operation, char *data);
+
+static inline void vdo_submit_metadata_vio(struct vio *vio, physical_block_number_t physical,
+ bio_end_io_t callback, vdo_action_fn error_handler,
+ blk_opf_t operation)
+{
+ __submit_metadata_vio(vio, physical, callback, error_handler,
+ operation, vio->data);
+}
+
+static inline void vdo_submit_flush_vio(struct vio *vio, bio_end_io_t callback,
+ vdo_action_fn error_handler)
+{
+ /* FIXME: Can we just use REQ_OP_FLUSH? */
+ __submit_metadata_vio(vio, 0, callback, error_handler,
+ REQ_OP_WRITE | REQ_PREFLUSH, NULL);
+}
+
+#endif /* VDO_IO_SUBMITTER_H */
diff --git a/drivers/md/dm-vdo/logger.c b/drivers/md/dm-vdo/logger.c
new file mode 100644
index 000000000000..3f7dc2cb6b98
--- /dev/null
+++ b/drivers/md/dm-vdo/logger.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "logger.h"
+
+#include <asm/current.h>
+#include <linux/delay.h>
+#include <linux/hardirq.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+
+#include "errors.h"
+#include "thread-device.h"
+#include "thread-utils.h"
+
+int vdo_log_level = VDO_LOG_DEFAULT;
+
+int vdo_get_log_level(void)
+{
+ int log_level_latch = READ_ONCE(vdo_log_level);
+
+ if (unlikely(log_level_latch > VDO_LOG_MAX)) {
+ log_level_latch = VDO_LOG_DEFAULT;
+ WRITE_ONCE(vdo_log_level, log_level_latch);
+ }
+ return log_level_latch;
+}
+
+static const char *get_current_interrupt_type(void)
+{
+ if (in_nmi())
+ return "NMI";
+
+ if (in_irq())
+ return "HI";
+
+ if (in_softirq())
+ return "SI";
+
+ return "INTR";
+}
+
+/**
+ * emit_log_message_to_kernel() - Emit a log message to the kernel at the specified priority.
+ *
+ * @priority: The priority at which to log the message
+ * @fmt: The format string of the message
+ */
+static void emit_log_message_to_kernel(int priority, const char *fmt, ...)
+{
+ va_list args;
+ struct va_format vaf;
+
+ if (priority > vdo_get_log_level())
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ switch (priority) {
+ case VDO_LOG_EMERG:
+ case VDO_LOG_ALERT:
+ case VDO_LOG_CRIT:
+ pr_crit("%pV", &vaf);
+ break;
+ case VDO_LOG_ERR:
+ pr_err("%pV", &vaf);
+ break;
+ case VDO_LOG_WARNING:
+ pr_warn("%pV", &vaf);
+ break;
+ case VDO_LOG_NOTICE:
+ case VDO_LOG_INFO:
+ pr_info("%pV", &vaf);
+ break;
+ case VDO_LOG_DEBUG:
+ pr_debug("%pV", &vaf);
+ break;
+ default:
+ printk(KERN_DEFAULT "%pV", &vaf);
+ break;
+ }
+
+ va_end(args);
+}
+
+/**
+ * emit_log_message() - Emit a log message to the kernel log in a format suited to the current
+ * thread context.
+ *
+ * Context info formats:
+ *
+ * interrupt: uds[NMI]: blah
+ * kvdo thread: kvdo12:foobarQ: blah
+ * thread w/device id: kvdo12:myprog: blah
+ * other thread: uds: myprog: blah
+ *
+ * Fields: module name, interrupt level, process name, device ID.
+ *
+ * @priority: the priority at which to log the message
+ * @module: The name of the module doing the logging
+ * @prefix: The prefix of the log message
+ * @vaf1: The first message format descriptor
+ * @vaf2: The second message format descriptor
+ */
+static void emit_log_message(int priority, const char *module, const char *prefix,
+ const struct va_format *vaf1, const struct va_format *vaf2)
+{
+ int device_instance;
+
+ /*
+ * In interrupt context, identify the interrupt type and module. Ignore the process/thread
+ * since it could be anything.
+ */
+ if (in_interrupt()) {
+ const char *type = get_current_interrupt_type();
+
+ emit_log_message_to_kernel(priority, "%s[%s]: %s%pV%pV\n", module, type,
+ prefix, vaf1, vaf2);
+ return;
+ }
+
+ /* Not at interrupt level; we have a process we can look at, and might have a device ID. */
+ device_instance = vdo_get_thread_device_id();
+ if (device_instance >= 0) {
+ emit_log_message_to_kernel(priority, "%s%u:%s: %s%pV%pV\n", module,
+ device_instance, current->comm, prefix, vaf1,
+ vaf2);
+ return;
+ }
+
+ /*
+ * If it's a kernel thread and the module name is a prefix of its name, assume it is ours
+ * and only identify the thread.
+ */
+ if (((current->flags & PF_KTHREAD) != 0) &&
+ (strncmp(module, current->comm, strlen(module)) == 0)) {
+ emit_log_message_to_kernel(priority, "%s: %s%pV%pV\n", current->comm,
+ prefix, vaf1, vaf2);
+ return;
+ }
+
+ /* Identify the module and the process. */
+ emit_log_message_to_kernel(priority, "%s: %s: %s%pV%pV\n", module, current->comm,
+ prefix, vaf1, vaf2);
+}
+
+/*
+ * vdo_log_embedded_message() - Log a message embedded within another message.
+ * @priority: the priority at which to log the message
+ * @module: the name of the module doing the logging
+ * @prefix: optional string prefix to message, may be NULL
+ * @fmt1: format of message first part (required)
+ * @args1: arguments for message first part (required)
+ * @fmt2: format of message second part
+ */
+void vdo_log_embedded_message(int priority, const char *module, const char *prefix,
+ const char *fmt1, va_list args1, const char *fmt2, ...)
+{
+ va_list args1_copy;
+ va_list args2;
+ struct va_format vaf1, vaf2;
+
+ va_start(args2, fmt2);
+
+ if (module == NULL)
+ module = VDO_LOGGING_MODULE_NAME;
+
+ if (prefix == NULL)
+ prefix = "";
+
+ /*
+ * It is implementation dependent whether va_list is defined as an array type that decays
+ * to a pointer when passed as an argument. Copy args1 and args2 with va_copy so that vaf1
+ * and vaf2 get proper va_list pointers irrespective of how va_list is defined.
+ */
+ va_copy(args1_copy, args1);
+ vaf1.fmt = fmt1;
+ vaf1.va = &args1_copy;
+
+ vaf2.fmt = fmt2;
+ vaf2.va = &args2;
+
+ emit_log_message(priority, module, prefix, &vaf1, &vaf2);
+
+ va_end(args1_copy);
+ va_end(args2);
+}
+
+int vdo_vlog_strerror(int priority, int errnum, const char *module, const char *format,
+ va_list args)
+{
+ char errbuf[VDO_MAX_ERROR_MESSAGE_SIZE];
+ const char *message = uds_string_error(errnum, errbuf, sizeof(errbuf));
+
+ vdo_log_embedded_message(priority, module, NULL, format, args, ": %s (%d)",
+ message, errnum);
+ return errnum;
+}
+
+int __vdo_log_strerror(int priority, int errnum, const char *module, const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ vdo_vlog_strerror(priority, errnum, module, format, args);
+ va_end(args);
+ return errnum;
+}
+
+void vdo_log_backtrace(int priority)
+{
+ if (priority > vdo_get_log_level())
+ return;
+
+ dump_stack();
+}
+
+void __vdo_log_message(int priority, const char *module, const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ vdo_log_embedded_message(priority, module, NULL, format, args, "%s", "");
+ va_end(args);
+}
+
+/*
+ * Sleep or delay a few milliseconds in an attempt to allow the log buffers to be flushed lest they
+ * be overrun.
+ */
+void vdo_pause_for_logger(void)
+{
+ fsleep(4000);
+}
diff --git a/drivers/md/dm-vdo/logger.h b/drivers/md/dm-vdo/logger.h
new file mode 100644
index 000000000000..ae6ad691c027
--- /dev/null
+++ b/drivers/md/dm-vdo/logger.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_LOGGER_H
+#define VDO_LOGGER_H
+
+#include <linux/kern_levels.h>
+#include <linux/module.h>
+#include <linux/ratelimit.h>
+#include <linux/device-mapper.h>
+
+/* Custom logging utilities for UDS */
+
+enum {
+ VDO_LOG_EMERG = LOGLEVEL_EMERG,
+ VDO_LOG_ALERT = LOGLEVEL_ALERT,
+ VDO_LOG_CRIT = LOGLEVEL_CRIT,
+ VDO_LOG_ERR = LOGLEVEL_ERR,
+ VDO_LOG_WARNING = LOGLEVEL_WARNING,
+ VDO_LOG_NOTICE = LOGLEVEL_NOTICE,
+ VDO_LOG_INFO = LOGLEVEL_INFO,
+ VDO_LOG_DEBUG = LOGLEVEL_DEBUG,
+
+ VDO_LOG_MAX = VDO_LOG_DEBUG,
+ VDO_LOG_DEFAULT = VDO_LOG_INFO,
+};
+
+extern int vdo_log_level;
+
+#define DM_MSG_PREFIX "vdo"
+#define VDO_LOGGING_MODULE_NAME DM_NAME ": " DM_MSG_PREFIX
+
+/* Apply a rate limiter to a log method call. */
+#define vdo_log_ratelimit(log_fn, ...) \
+ do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ if (__ratelimit(&_rs)) { \
+ log_fn(__VA_ARGS__); \
+ } \
+ } while (0)
+
+int vdo_get_log_level(void);
+
+void vdo_log_embedded_message(int priority, const char *module, const char *prefix,
+ const char *fmt1, va_list args1, const char *fmt2, ...)
+ __printf(4, 0) __printf(6, 7);
+
+void vdo_log_backtrace(int priority);
+
+/* All log functions will preserve the caller's value of errno. */
+
+#define vdo_log_strerror(priority, errnum, ...) \
+ __vdo_log_strerror(priority, errnum, VDO_LOGGING_MODULE_NAME, __VA_ARGS__)
+
+int __vdo_log_strerror(int priority, int errnum, const char *module,
+ const char *format, ...)
+ __printf(4, 5);
+
+int vdo_vlog_strerror(int priority, int errnum, const char *module, const char *format,
+ va_list args)
+ __printf(4, 0);
+
+/* Log an error prefixed with the string associated with the errnum. */
+#define vdo_log_error_strerror(errnum, ...) \
+ vdo_log_strerror(VDO_LOG_ERR, errnum, __VA_ARGS__)
+
+#define vdo_log_debug_strerror(errnum, ...) \
+ vdo_log_strerror(VDO_LOG_DEBUG, errnum, __VA_ARGS__)
+
+#define vdo_log_info_strerror(errnum, ...) \
+ vdo_log_strerror(VDO_LOG_INFO, errnum, __VA_ARGS__)
+
+#define vdo_log_warning_strerror(errnum, ...) \
+ vdo_log_strerror(VDO_LOG_WARNING, errnum, __VA_ARGS__)
+
+#define vdo_log_fatal_strerror(errnum, ...) \
+ vdo_log_strerror(VDO_LOG_CRIT, errnum, __VA_ARGS__)
+
+#define vdo_log_message(priority, ...) \
+ __vdo_log_message(priority, VDO_LOGGING_MODULE_NAME, __VA_ARGS__)
+
+void __vdo_log_message(int priority, const char *module, const char *format, ...)
+ __printf(3, 4);
+
+#define vdo_log_debug(...) vdo_log_message(VDO_LOG_DEBUG, __VA_ARGS__)
+
+#define vdo_log_info(...) vdo_log_message(VDO_LOG_INFO, __VA_ARGS__)
+
+#define vdo_log_warning(...) vdo_log_message(VDO_LOG_WARNING, __VA_ARGS__)
+
+#define vdo_log_error(...) vdo_log_message(VDO_LOG_ERR, __VA_ARGS__)
+
+#define vdo_log_fatal(...) vdo_log_message(VDO_LOG_CRIT, __VA_ARGS__)
+
+void vdo_pause_for_logger(void);
+#endif /* VDO_LOGGER_H */
diff --git a/drivers/md/dm-vdo/logical-zone.c b/drivers/md/dm-vdo/logical-zone.c
new file mode 100644
index 000000000000..026f031ffc9e
--- /dev/null
+++ b/drivers/md/dm-vdo/logical-zone.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "logical-zone.h"
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+#include "string-utils.h"
+
+#include "action-manager.h"
+#include "admin-state.h"
+#include "block-map.h"
+#include "completion.h"
+#include "constants.h"
+#include "data-vio.h"
+#include "flush.h"
+#include "int-map.h"
+#include "physical-zone.h"
+#include "vdo.h"
+
+#define ALLOCATIONS_PER_ZONE 128
+
+/**
+ * as_logical_zone() - Convert a generic vdo_completion to a logical_zone.
+ * @completion: The completion to convert.
+ *
+ * Return: The completion as a logical_zone.
+ */
+static struct logical_zone *as_logical_zone(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_GENERATION_FLUSHED_COMPLETION);
+ return container_of(completion, struct logical_zone, completion);
+}
+
+/* get_thread_id_for_zone() - Implements vdo_zone_thread_getter_fn. */
+static thread_id_t get_thread_id_for_zone(void *context, zone_count_t zone_number)
+{
+ struct logical_zones *zones = context;
+
+ return zones->zones[zone_number].thread_id;
+}
+
+/**
+ * initialize_zone() - Initialize a logical zone.
+ * @zones: The logical_zones to which this zone belongs.
+ * @zone_number: The logical_zone's index.
+ */
+static int initialize_zone(struct logical_zones *zones, zone_count_t zone_number)
+{
+ int result;
+ struct vdo *vdo = zones->vdo;
+ struct logical_zone *zone = &zones->zones[zone_number];
+ zone_count_t allocation_zone_number;
+
+ result = vdo_int_map_create(VDO_LOCK_MAP_CAPACITY, &zone->lbn_operations);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (zone_number < vdo->thread_config.logical_zone_count - 1)
+ zone->next = &zones->zones[zone_number + 1];
+
+ vdo_initialize_completion(&zone->completion, vdo,
+ VDO_GENERATION_FLUSHED_COMPLETION);
+ zone->zones = zones;
+ zone->zone_number = zone_number;
+ zone->thread_id = vdo->thread_config.logical_threads[zone_number];
+ zone->block_map_zone = &vdo->block_map->zones[zone_number];
+ INIT_LIST_HEAD(&zone->write_vios);
+ vdo_set_admin_state_code(&zone->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+
+ allocation_zone_number = zone->thread_id % vdo->thread_config.physical_zone_count;
+ zone->allocation_zone = &vdo->physical_zones->zones[allocation_zone_number];
+
+ return vdo_make_default_thread(vdo, zone->thread_id);
+}
+
+/**
+ * vdo_make_logical_zones() - Create a set of logical zones.
+ * @vdo: The vdo to which the zones will belong.
+ * @zones_ptr: A pointer to hold the new zones.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+int vdo_make_logical_zones(struct vdo *vdo, struct logical_zones **zones_ptr)
+{
+ struct logical_zones *zones;
+ int result;
+ zone_count_t zone;
+ zone_count_t zone_count = vdo->thread_config.logical_zone_count;
+
+ if (zone_count == 0)
+ return VDO_SUCCESS;
+
+ result = vdo_allocate_extended(struct logical_zones, zone_count,
+ struct logical_zone, __func__, &zones);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ zones->vdo = vdo;
+ zones->zone_count = zone_count;
+ for (zone = 0; zone < zone_count; zone++) {
+ result = initialize_zone(zones, zone);
+ if (result != VDO_SUCCESS) {
+ vdo_free_logical_zones(zones);
+ return result;
+ }
+ }
+
+ result = vdo_make_action_manager(zones->zone_count, get_thread_id_for_zone,
+ vdo->thread_config.admin_thread, zones, NULL,
+ vdo, &zones->manager);
+ if (result != VDO_SUCCESS) {
+ vdo_free_logical_zones(zones);
+ return result;
+ }
+
+ *zones_ptr = zones;
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_free_logical_zones() - Free a set of logical zones.
+ * @zones: The set of zones to free.
+ */
+void vdo_free_logical_zones(struct logical_zones *zones)
+{
+ zone_count_t index;
+
+ if (zones == NULL)
+ return;
+
+ vdo_free(vdo_forget(zones->manager));
+
+ for (index = 0; index < zones->zone_count; index++)
+ vdo_int_map_free(vdo_forget(zones->zones[index].lbn_operations));
+
+ vdo_free(zones);
+}
+
+static inline void assert_on_zone_thread(struct logical_zone *zone, const char *what)
+{
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() == zone->thread_id),
+ "%s() called on correct thread", what);
+}
+
+/**
+ * check_for_drain_complete() - Check whether this zone has drained.
+ * @zone: The zone to check.
+ */
+static void check_for_drain_complete(struct logical_zone *zone)
+{
+ if (!vdo_is_state_draining(&zone->state) || zone->notifying ||
+ !list_empty(&zone->write_vios))
+ return;
+
+ vdo_finish_draining(&zone->state);
+}
+
+/**
+ * initiate_drain() - Initiate a drain.
+ *
+ * Implements vdo_admin_initiator_fn.
+ */
+static void initiate_drain(struct admin_state *state)
+{
+ check_for_drain_complete(container_of(state, struct logical_zone, state));
+}
+
+/**
+ * drain_logical_zone() - Drain a logical zone.
+ *
+ * Implements vdo_zone_action_fn.
+ */
+static void drain_logical_zone(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct logical_zones *zones = context;
+
+ vdo_start_draining(&zones->zones[zone_number].state,
+ vdo_get_current_manager_operation(zones->manager), parent,
+ initiate_drain);
+}
+
+void vdo_drain_logical_zones(struct logical_zones *zones,
+ const struct admin_state_code *operation,
+ struct vdo_completion *parent)
+{
+ vdo_schedule_operation(zones->manager, operation, NULL, drain_logical_zone, NULL,
+ parent);
+}
+
+/**
+ * resume_logical_zone() - Resume a logical zone.
+ *
+ * Implements vdo_zone_action_fn.
+ */
+static void resume_logical_zone(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct logical_zone *zone = &(((struct logical_zones *) context)->zones[zone_number]);
+
+ vdo_fail_completion(parent, vdo_resume_if_quiescent(&zone->state));
+}
+
+/**
+ * vdo_resume_logical_zones() - Resume a set of logical zones.
+ * @zones: The logical zones to resume.
+ * @parent: The object to notify when the zones have resumed.
+ */
+void vdo_resume_logical_zones(struct logical_zones *zones, struct vdo_completion *parent)
+{
+ vdo_schedule_operation(zones->manager, VDO_ADMIN_STATE_RESUMING, NULL,
+ resume_logical_zone, NULL, parent);
+}
+
+/**
+ * update_oldest_active_generation() - Update the oldest active generation.
+ * @zone: The zone.
+ *
+ * Return: true if the oldest active generation has changed.
+ */
+static bool update_oldest_active_generation(struct logical_zone *zone)
+{
+ struct data_vio *data_vio =
+ list_first_entry_or_null(&zone->write_vios, struct data_vio,
+ write_entry);
+ sequence_number_t oldest =
+ (data_vio == NULL) ? zone->flush_generation : data_vio->flush_generation;
+
+ if (oldest == zone->oldest_active_generation)
+ return false;
+
+ WRITE_ONCE(zone->oldest_active_generation, oldest);
+ return true;
+}
+
+/**
+ * vdo_increment_logical_zone_flush_generation() - Increment the flush generation in a logical
+ * zone.
+ * @zone: The logical zone.
+ * @expected_generation: The expected value of the flush generation before the increment.
+ */
+void vdo_increment_logical_zone_flush_generation(struct logical_zone *zone,
+ sequence_number_t expected_generation)
+{
+ assert_on_zone_thread(zone, __func__);
+ VDO_ASSERT_LOG_ONLY((zone->flush_generation == expected_generation),
+ "logical zone %u flush generation %llu should be %llu before increment",
+ zone->zone_number, (unsigned long long) zone->flush_generation,
+ (unsigned long long) expected_generation);
+
+ zone->flush_generation++;
+ zone->ios_in_flush_generation = 0;
+ update_oldest_active_generation(zone);
+}
+
+/**
+ * vdo_acquire_flush_generation_lock() - Acquire the shared lock on a flush generation by a write
+ * data_vio.
+ * @data_vio: The data_vio.
+ */
+void vdo_acquire_flush_generation_lock(struct data_vio *data_vio)
+{
+ struct logical_zone *zone = data_vio->logical.zone;
+
+ assert_on_zone_thread(zone, __func__);
+ VDO_ASSERT_LOG_ONLY(vdo_is_state_normal(&zone->state), "vdo state is normal");
+
+ data_vio->flush_generation = zone->flush_generation;
+ list_add_tail(&data_vio->write_entry, &zone->write_vios);
+ zone->ios_in_flush_generation++;
+}
+
+static void attempt_generation_complete_notification(struct vdo_completion *completion);
+
+/**
+ * notify_flusher() - Notify the flush that at least one generation no longer has active VIOs.
+ * @completion: The zone completion.
+ *
+ * This callback is registered in attempt_generation_complete_notification().
+ */
+static void notify_flusher(struct vdo_completion *completion)
+{
+ struct logical_zone *zone = as_logical_zone(completion);
+
+ vdo_complete_flushes(zone->zones->vdo->flusher);
+ vdo_launch_completion_callback(completion,
+ attempt_generation_complete_notification,
+ zone->thread_id);
+}
+
+/**
+ * attempt_generation_complete_notification() - Notify the flusher if some generation no
+ * longer has active VIOs.
+ * @completion: The zone completion.
+ */
+static void attempt_generation_complete_notification(struct vdo_completion *completion)
+{
+ struct logical_zone *zone = as_logical_zone(completion);
+
+ assert_on_zone_thread(zone, __func__);
+ if (zone->oldest_active_generation <= zone->notification_generation) {
+ zone->notifying = false;
+ check_for_drain_complete(zone);
+ return;
+ }
+
+ zone->notifying = true;
+ zone->notification_generation = zone->oldest_active_generation;
+ vdo_launch_completion_callback(&zone->completion, notify_flusher,
+ vdo_get_flusher_thread_id(zone->zones->vdo->flusher));
+}
+
+/**
+ * vdo_release_flush_generation_lock() - Release the shared lock on a flush generation held by a
+ * write data_vio.
+ * @data_vio: The data_vio whose lock is to be released.
+ *
+ * If there are pending flushes, and this data_vio completes the oldest generation active in this
+ * zone, an attempt will be made to finish any flushes which may now be complete.
+ */
+void vdo_release_flush_generation_lock(struct data_vio *data_vio)
+{
+ struct logical_zone *zone = data_vio->logical.zone;
+
+ assert_on_zone_thread(zone, __func__);
+
+ if (!data_vio_has_flush_generation_lock(data_vio))
+ return;
+
+ list_del_init(&data_vio->write_entry);
+ VDO_ASSERT_LOG_ONLY((zone->oldest_active_generation <= data_vio->flush_generation),
+ "data_vio releasing lock on generation %llu is not older than oldest active generation %llu",
+ (unsigned long long) data_vio->flush_generation,
+ (unsigned long long) zone->oldest_active_generation);
+
+ if (!update_oldest_active_generation(zone) || zone->notifying)
+ return;
+
+ attempt_generation_complete_notification(&zone->completion);
+}
+
+struct physical_zone *vdo_get_next_allocation_zone(struct logical_zone *zone)
+{
+ if (zone->allocation_count == ALLOCATIONS_PER_ZONE) {
+ zone->allocation_count = 0;
+ zone->allocation_zone = zone->allocation_zone->next;
+ }
+
+ zone->allocation_count++;
+ return zone->allocation_zone;
+}
+
+/**
+ * vdo_dump_logical_zone() - Dump information about a logical zone to the log for debugging.
+ * @zone: The zone to dump
+ *
+ * Context: the information is dumped in a thread-unsafe fashion.
+ *
+ */
+void vdo_dump_logical_zone(const struct logical_zone *zone)
+{
+ vdo_log_info("logical_zone %u", zone->zone_number);
+ vdo_log_info(" flush_generation=%llu oldest_active_generation=%llu notification_generation=%llu notifying=%s ios_in_flush_generation=%llu",
+ (unsigned long long) READ_ONCE(zone->flush_generation),
+ (unsigned long long) READ_ONCE(zone->oldest_active_generation),
+ (unsigned long long) READ_ONCE(zone->notification_generation),
+ vdo_bool_to_string(READ_ONCE(zone->notifying)),
+ (unsigned long long) READ_ONCE(zone->ios_in_flush_generation));
+}
diff --git a/drivers/md/dm-vdo/logical-zone.h b/drivers/md/dm-vdo/logical-zone.h
new file mode 100644
index 000000000000..1b666c84a193
--- /dev/null
+++ b/drivers/md/dm-vdo/logical-zone.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_LOGICAL_ZONE_H
+#define VDO_LOGICAL_ZONE_H
+
+#include <linux/list.h>
+
+#include "admin-state.h"
+#include "int-map.h"
+#include "types.h"
+
+struct physical_zone;
+
+struct logical_zone {
+ /* The completion for flush notifications */
+ struct vdo_completion completion;
+ /* The owner of this zone */
+ struct logical_zones *zones;
+ /* Which logical zone this is */
+ zone_count_t zone_number;
+ /* The thread id for this zone */
+ thread_id_t thread_id;
+ /* In progress operations keyed by LBN */
+ struct int_map *lbn_operations;
+ /* The logical to physical map */
+ struct block_map_zone *block_map_zone;
+ /* The current flush generation */
+ sequence_number_t flush_generation;
+ /*
+ * The oldest active generation in this zone. This is mutated only on the logical zone
+ * thread but is queried from the flusher thread.
+ */
+ sequence_number_t oldest_active_generation;
+ /* The number of IOs in the current flush generation */
+ block_count_t ios_in_flush_generation;
+ /* The youngest generation of the current notification */
+ sequence_number_t notification_generation;
+ /* Whether a notification is in progress */
+ bool notifying;
+ /* The queue of active data write VIOs */
+ struct list_head write_vios;
+ /* The administrative state of the zone */
+ struct admin_state state;
+ /* The physical zone from which to allocate */
+ struct physical_zone *allocation_zone;
+ /* The number of allocations done from the current allocation_zone */
+ block_count_t allocation_count;
+ /* The next zone */
+ struct logical_zone *next;
+};
+
+struct logical_zones {
+ /* The vdo whose zones these are */
+ struct vdo *vdo;
+ /* The manager for administrative actions */
+ struct action_manager *manager;
+ /* The number of zones */
+ zone_count_t zone_count;
+ /* The logical zones themselves */
+ struct logical_zone zones[];
+};
+
+int __must_check vdo_make_logical_zones(struct vdo *vdo,
+ struct logical_zones **zones_ptr);
+
+void vdo_free_logical_zones(struct logical_zones *zones);
+
+void vdo_drain_logical_zones(struct logical_zones *zones,
+ const struct admin_state_code *operation,
+ struct vdo_completion *completion);
+
+void vdo_resume_logical_zones(struct logical_zones *zones,
+ struct vdo_completion *parent);
+
+void vdo_increment_logical_zone_flush_generation(struct logical_zone *zone,
+ sequence_number_t expected_generation);
+
+void vdo_acquire_flush_generation_lock(struct data_vio *data_vio);
+
+void vdo_release_flush_generation_lock(struct data_vio *data_vio);
+
+struct physical_zone * __must_check vdo_get_next_allocation_zone(struct logical_zone *zone);
+
+void vdo_dump_logical_zone(const struct logical_zone *zone);
+
+#endif /* VDO_LOGICAL_ZONE_H */
diff --git a/drivers/md/dm-vdo/memory-alloc.c b/drivers/md/dm-vdo/memory-alloc.c
new file mode 100644
index 000000000000..185f259c7245
--- /dev/null
+++ b/drivers/md/dm-vdo/memory-alloc.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <linux/sched/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+/*
+ * UDS and VDO keep track of which threads are allowed to allocate memory freely, and which threads
+ * must be careful to not do a memory allocation that does an I/O request. The 'allocating_threads'
+ * thread_registry and its associated methods implement this tracking.
+ */
+static struct thread_registry allocating_threads;
+
+static inline bool allocations_allowed(void)
+{
+ return vdo_lookup_thread(&allocating_threads) != NULL;
+}
+
+/*
+ * Register the current thread as an allocating thread.
+ *
+ * An optional flag location can be supplied indicating whether, at any given point in time, the
+ * threads associated with that flag should be allocating storage. If the flag is false, a message
+ * will be logged.
+ *
+ * If no flag is supplied, the thread is always allowed to allocate storage without complaint.
+ *
+ * @new_thread: registered_thread structure to use for the current thread
+ * @flag_ptr: Location of the allocation-allowed flag
+ */
+void vdo_register_allocating_thread(struct registered_thread *new_thread,
+ const bool *flag_ptr)
+{
+ if (flag_ptr == NULL) {
+ static const bool allocation_always_allowed = true;
+
+ flag_ptr = &allocation_always_allowed;
+ }
+
+ vdo_register_thread(&allocating_threads, new_thread, flag_ptr);
+}
+
+/* Unregister the current thread as an allocating thread. */
+void vdo_unregister_allocating_thread(void)
+{
+ vdo_unregister_thread(&allocating_threads);
+}
+
+/*
+ * We track how much memory has been allocated and freed. When we unload the module, we log an
+ * error if we have not freed all the memory that we allocated. Nearly all memory allocation and
+ * freeing is done using this module.
+ *
+ * We do not use kernel functions like the kvasprintf() method, which allocate memory indirectly
+ * using kmalloc.
+ *
+ * These data structures and methods are used to track the amount of memory used.
+ */
+
+/*
+ * We allocate very few large objects, and allocation/deallocation isn't done in a
+ * performance-critical stage for us, so a linked list should be fine.
+ */
+struct vmalloc_block_info {
+ void *ptr;
+ size_t size;
+ struct vmalloc_block_info *next;
+};
+
+static struct {
+ spinlock_t lock;
+ size_t kmalloc_blocks;
+ size_t kmalloc_bytes;
+ size_t vmalloc_blocks;
+ size_t vmalloc_bytes;
+ size_t peak_bytes;
+ struct vmalloc_block_info *vmalloc_list;
+} memory_stats __cacheline_aligned;
+
+static void update_peak_usage(void)
+{
+ size_t total_bytes = memory_stats.kmalloc_bytes + memory_stats.vmalloc_bytes;
+
+ if (total_bytes > memory_stats.peak_bytes)
+ memory_stats.peak_bytes = total_bytes;
+}
+
+static void add_kmalloc_block(size_t size)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&memory_stats.lock, flags);
+ memory_stats.kmalloc_blocks++;
+ memory_stats.kmalloc_bytes += size;
+ update_peak_usage();
+ spin_unlock_irqrestore(&memory_stats.lock, flags);
+}
+
+static void remove_kmalloc_block(size_t size)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&memory_stats.lock, flags);
+ memory_stats.kmalloc_blocks--;
+ memory_stats.kmalloc_bytes -= size;
+ spin_unlock_irqrestore(&memory_stats.lock, flags);
+}
+
+static void add_vmalloc_block(struct vmalloc_block_info *block)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&memory_stats.lock, flags);
+ block->next = memory_stats.vmalloc_list;
+ memory_stats.vmalloc_list = block;
+ memory_stats.vmalloc_blocks++;
+ memory_stats.vmalloc_bytes += block->size;
+ update_peak_usage();
+ spin_unlock_irqrestore(&memory_stats.lock, flags);
+}
+
+static void remove_vmalloc_block(void *ptr)
+{
+ struct vmalloc_block_info *block;
+ struct vmalloc_block_info **block_ptr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&memory_stats.lock, flags);
+ for (block_ptr = &memory_stats.vmalloc_list;
+ (block = *block_ptr) != NULL;
+ block_ptr = &block->next) {
+ if (block->ptr == ptr) {
+ *block_ptr = block->next;
+ memory_stats.vmalloc_blocks--;
+ memory_stats.vmalloc_bytes -= block->size;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&memory_stats.lock, flags);
+ if (block != NULL)
+ vdo_free(block);
+ else
+ vdo_log_info("attempting to remove ptr %px not found in vmalloc list", ptr);
+}
+
+/*
+ * Determine whether allocating a memory block should use kmalloc or __vmalloc.
+ *
+ * vmalloc can allocate any integral number of pages.
+ *
+ * kmalloc can allocate any number of bytes up to a configured limit, which defaults to 8 megabytes
+ * on some systems. kmalloc is especially good when memory is being both allocated and freed, and
+ * it does this efficiently in a multi CPU environment.
+ *
+ * kmalloc usually rounds the size of the block up to the next power of two, so when the requested
+ * block is bigger than PAGE_SIZE / 2 bytes, kmalloc will never give you less space than the
+ * corresponding vmalloc allocation. Sometimes vmalloc will use less overhead than kmalloc.
+ *
+ * The advantages of kmalloc do not help out UDS or VDO, because we allocate all our memory up
+ * front and do not free and reallocate it. Sometimes we have problems using kmalloc, because the
+ * Linux memory page map can become so fragmented that kmalloc will not give us a 32KB chunk. We
+ * have used vmalloc as a backup to kmalloc in the past, and a follow-up vmalloc of 32KB will work.
+ * But there is no strong case to be made for using kmalloc over vmalloc for these size chunks.
+ *
+ * The kmalloc/vmalloc boundary is set at 4KB, and kmalloc gets the 4KB requests. There is no
+ * strong reason for favoring either kmalloc or vmalloc for 4KB requests, except that tracking
+ * vmalloc statistics uses a linked list implementation. Using a simple test, this choice of
+ * boundary results in 132 vmalloc calls. Using vmalloc for requests of exactly 4KB results in an
+ * additional 6374 vmalloc calls, which is much less efficient for tracking.
+ *
+ * @size: How many bytes to allocate
+ */
+static inline bool use_kmalloc(size_t size)
+{
+ return size <= PAGE_SIZE;
+}
+
+/*
+ * Allocate storage based on memory size and alignment, logging an error if the allocation fails.
+ * The memory will be zeroed.
+ *
+ * @size: The size of an object
+ * @align: The required alignment
+ * @what: What is being allocated (for error logging)
+ * @ptr: A pointer to hold the allocated memory
+ *
+ * Return: VDO_SUCCESS or an error code
+ */
+int vdo_allocate_memory(size_t size, size_t align, const char *what, void *ptr)
+{
+ /*
+ * The __GFP_RETRY_MAYFAIL flag means the VM implementation will retry memory reclaim
+ * procedures that have previously failed if there is some indication that progress has
+ * been made elsewhere. It can wait for other tasks to attempt high level approaches to
+ * freeing memory such as compaction (which removes fragmentation) and page-out. There is
+ * still a definite limit to the number of retries, but it is a larger limit than with
+ * __GFP_NORETRY. Allocations with this flag may fail, but only when there is genuinely
+ * little unused memory. While these allocations do not directly trigger the OOM killer,
+ * their failure indicates that the system is likely to need to use the OOM killer soon.
+ * The caller must handle failure, but can reasonably do so by failing a higher-level
+ * request, or completing it only in a much less efficient manner.
+ */
+ const gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL;
+ unsigned int noio_flags;
+ bool allocations_restricted = !allocations_allowed();
+ unsigned long start_time;
+ void *p = NULL;
+
+ if (unlikely(ptr == NULL))
+ return -EINVAL;
+
+ if (size == 0) {
+ *((void **) ptr) = NULL;
+ return VDO_SUCCESS;
+ }
+
+ if (allocations_restricted)
+ noio_flags = memalloc_noio_save();
+
+ start_time = jiffies;
+ if (use_kmalloc(size) && (align < PAGE_SIZE)) {
+ p = kmalloc(size, gfp_flags | __GFP_NOWARN);
+ if (p == NULL) {
+ /*
+ * It is possible for kmalloc to fail to allocate memory because there is
+ * no page available. A short sleep may allow the page reclaimer to
+ * free a page.
+ */
+ fsleep(1000);
+ p = kmalloc(size, gfp_flags);
+ }
+
+ if (p != NULL)
+ add_kmalloc_block(ksize(p));
+ } else {
+ struct vmalloc_block_info *block;
+
+ if (vdo_allocate(1, struct vmalloc_block_info, __func__, &block) == VDO_SUCCESS) {
+ /*
+ * It is possible for __vmalloc to fail to allocate memory because there
+ * are no pages available. A short sleep may allow the page reclaimer
+ * to free enough pages for a small allocation.
+ *
+ * For larger allocations, the page_alloc code is racing against the page
+ * reclaimer. If the page reclaimer can stay ahead of page_alloc, the
+ * __vmalloc will succeed. But if page_alloc overtakes the page reclaimer,
+ * the allocation fails. It is possible that more retries will succeed.
+ */
+ for (;;) {
+ p = __vmalloc(size, gfp_flags | __GFP_NOWARN);
+ if (p != NULL)
+ break;
+
+ if (jiffies_to_msecs(jiffies - start_time) > 1000) {
+ /* Try one more time, logging a failure for this call. */
+ p = __vmalloc(size, gfp_flags);
+ break;
+ }
+
+ fsleep(1000);
+ }
+
+ if (p == NULL) {
+ vdo_free(block);
+ } else {
+ block->ptr = p;
+ block->size = PAGE_ALIGN(size);
+ add_vmalloc_block(block);
+ }
+ }
+ }
+
+ if (allocations_restricted)
+ memalloc_noio_restore(noio_flags);
+
+ if (unlikely(p == NULL)) {
+ vdo_log_error("Could not allocate %zu bytes for %s in %u msecs",
+ size, what, jiffies_to_msecs(jiffies - start_time));
+ return -ENOMEM;
+ }
+
+ *((void **) ptr) = p;
+ return VDO_SUCCESS;
+}
+
+/*
+ * Allocate storage based on memory size, failing immediately if the required memory is not
+ * available. The memory will be zeroed.
+ *
+ * @size: The size of an object.
+ * @what: What is being allocated (for error logging)
+ *
+ * Return: pointer to the allocated memory, or NULL if the required space is not available.
+ */
+void *vdo_allocate_memory_nowait(size_t size, const char *what __maybe_unused)
+{
+ void *p = kmalloc(size, GFP_NOWAIT | __GFP_ZERO);
+
+ if (p != NULL)
+ add_kmalloc_block(ksize(p));
+
+ return p;
+}
+
+void vdo_free(void *ptr)
+{
+ if (ptr != NULL) {
+ if (is_vmalloc_addr(ptr)) {
+ remove_vmalloc_block(ptr);
+ vfree(ptr);
+ } else {
+ remove_kmalloc_block(ksize(ptr));
+ kfree(ptr);
+ }
+ }
+}
+
+/*
+ * Reallocate dynamically allocated memory. There are no alignment guarantees for the reallocated
+ * memory. If the new memory is larger than the old memory, the new space will be zeroed.
+ *
+ * @ptr: The memory to reallocate.
+ * @old_size: The old size of the memory
+ * @size: The new size to allocate
+ * @what: What is being allocated (for error logging)
+ * @new_ptr: A pointer to hold the reallocated pointer
+ *
+ * Return: VDO_SUCCESS or an error code
+ */
+int vdo_reallocate_memory(void *ptr, size_t old_size, size_t size, const char *what,
+ void *new_ptr)
+{
+ int result;
+
+ if (size == 0) {
+ vdo_free(ptr);
+ *(void **) new_ptr = NULL;
+ return VDO_SUCCESS;
+ }
+
+ result = vdo_allocate(size, char, what, new_ptr);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (ptr != NULL) {
+ if (old_size < size)
+ size = old_size;
+
+ memcpy(*((void **) new_ptr), ptr, size);
+ vdo_free(ptr);
+ }
+
+ return VDO_SUCCESS;
+}
+
+int vdo_duplicate_string(const char *string, const char *what, char **new_string)
+{
+ int result;
+ u8 *dup;
+
+ result = vdo_allocate(strlen(string) + 1, u8, what, &dup);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ memcpy(dup, string, strlen(string) + 1);
+ *new_string = dup;
+ return VDO_SUCCESS;
+}
+
+void vdo_memory_init(void)
+{
+ spin_lock_init(&memory_stats.lock);
+ vdo_initialize_thread_registry(&allocating_threads);
+}
+
+void vdo_memory_exit(void)
+{
+ VDO_ASSERT_LOG_ONLY(memory_stats.kmalloc_bytes == 0,
+ "kmalloc memory used (%zd bytes in %zd blocks) is returned to the kernel",
+ memory_stats.kmalloc_bytes, memory_stats.kmalloc_blocks);
+ VDO_ASSERT_LOG_ONLY(memory_stats.vmalloc_bytes == 0,
+ "vmalloc memory used (%zd bytes in %zd blocks) is returned to the kernel",
+ memory_stats.vmalloc_bytes, memory_stats.vmalloc_blocks);
+ vdo_log_debug("peak usage %zd bytes", memory_stats.peak_bytes);
+}
+
+void vdo_get_memory_stats(u64 *bytes_used, u64 *peak_bytes_used)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&memory_stats.lock, flags);
+ *bytes_used = memory_stats.kmalloc_bytes + memory_stats.vmalloc_bytes;
+ *peak_bytes_used = memory_stats.peak_bytes;
+ spin_unlock_irqrestore(&memory_stats.lock, flags);
+}
+
+/*
+ * Report stats on any allocated memory that we're tracking. Not all allocation types are
+ * guaranteed to be tracked in bytes (e.g., bios).
+ */
+void vdo_report_memory_usage(void)
+{
+ unsigned long flags;
+ u64 kmalloc_blocks;
+ u64 kmalloc_bytes;
+ u64 vmalloc_blocks;
+ u64 vmalloc_bytes;
+ u64 peak_usage;
+ u64 total_bytes;
+
+ spin_lock_irqsave(&memory_stats.lock, flags);
+ kmalloc_blocks = memory_stats.kmalloc_blocks;
+ kmalloc_bytes = memory_stats.kmalloc_bytes;
+ vmalloc_blocks = memory_stats.vmalloc_blocks;
+ vmalloc_bytes = memory_stats.vmalloc_bytes;
+ peak_usage = memory_stats.peak_bytes;
+ spin_unlock_irqrestore(&memory_stats.lock, flags);
+ total_bytes = kmalloc_bytes + vmalloc_bytes;
+ vdo_log_info("current module memory tracking (actual allocation sizes, not requested):");
+ vdo_log_info(" %llu bytes in %llu kmalloc blocks",
+ (unsigned long long) kmalloc_bytes,
+ (unsigned long long) kmalloc_blocks);
+ vdo_log_info(" %llu bytes in %llu vmalloc blocks",
+ (unsigned long long) vmalloc_bytes,
+ (unsigned long long) vmalloc_blocks);
+ vdo_log_info(" total %llu bytes, peak usage %llu bytes",
+ (unsigned long long) total_bytes, (unsigned long long) peak_usage);
+}
diff --git a/drivers/md/dm-vdo/memory-alloc.h b/drivers/md/dm-vdo/memory-alloc.h
new file mode 100644
index 000000000000..0093d9f940d9
--- /dev/null
+++ b/drivers/md/dm-vdo/memory-alloc.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_MEMORY_ALLOC_H
+#define VDO_MEMORY_ALLOC_H
+
+#include <linux/cache.h>
+#include <linux/io.h> /* for PAGE_SIZE */
+
+#include "permassert.h"
+#include "thread-registry.h"
+
+/* Custom memory allocation function that tracks memory usage */
+int __must_check vdo_allocate_memory(size_t size, size_t align, const char *what, void *ptr);
+
+/*
+ * Allocate storage based on element counts, sizes, and alignment.
+ *
+ * This is a generalized form of our allocation use case: It allocates an array of objects,
+ * optionally preceded by one object of another type (i.e., a struct with trailing variable-length
+ * array), with the alignment indicated.
+ *
+ * Why is this inline? The sizes and alignment will always be constant, when invoked through the
+ * macros below, and often the count will be a compile-time constant 1 or the number of extra bytes
+ * will be a compile-time constant 0. So at least some of the arithmetic can usually be optimized
+ * away, and the run-time selection between allocation functions always can. In many cases, it'll
+ * boil down to just a function call with a constant size.
+ *
+ * @count: The number of objects to allocate
+ * @size: The size of an object
+ * @extra: The number of additional bytes to allocate
+ * @align: The required alignment
+ * @what: What is being allocated (for error logging)
+ * @ptr: A pointer to hold the allocated memory
+ *
+ * Return: VDO_SUCCESS or an error code
+ */
+static inline int __vdo_do_allocation(size_t count, size_t size, size_t extra,
+ size_t align, const char *what, void *ptr)
+{
+ size_t total_size = count * size + extra;
+
+ /* Overflow check: */
+ if ((size > 0) && (count > ((SIZE_MAX - extra) / size))) {
+ /*
+ * This is kind of a hack: We rely on the fact that SIZE_MAX would cover the entire
+ * address space (minus one byte) and thus the system can never allocate that much
+ * and the call will always fail. So we can report an overflow as "out of memory"
+ * by asking for "merely" SIZE_MAX bytes.
+ */
+ total_size = SIZE_MAX;
+ }
+
+ return vdo_allocate_memory(total_size, align, what, ptr);
+}
+
+/*
+ * Allocate one or more elements of the indicated type, logging an error if the allocation fails.
+ * The memory will be zeroed.
+ *
+ * @COUNT: The number of objects to allocate
+ * @TYPE: The type of objects to allocate. This type determines the alignment of the allocation.
+ * @WHAT: What is being allocated (for error logging)
+ * @PTR: A pointer to hold the allocated memory
+ *
+ * Return: VDO_SUCCESS or an error code
+ */
+#define vdo_allocate(COUNT, TYPE, WHAT, PTR) \
+ __vdo_do_allocation(COUNT, sizeof(TYPE), 0, __alignof__(TYPE), WHAT, PTR)
+
+/*
+ * Allocate one object of an indicated type, followed by one or more elements of a second type,
+ * logging an error if the allocation fails. The memory will be zeroed.
+ *
+ * @TYPE1: The type of the primary object to allocate. This type determines the alignment of the
+ * allocated memory.
+ * @COUNT: The number of objects to allocate
+ * @TYPE2: The type of array objects to allocate
+ * @WHAT: What is being allocated (for error logging)
+ * @PTR: A pointer to hold the allocated memory
+ *
+ * Return: VDO_SUCCESS or an error code
+ */
+#define vdo_allocate_extended(TYPE1, COUNT, TYPE2, WHAT, PTR) \
+ __extension__({ \
+ int _result; \
+ TYPE1 **_ptr = (PTR); \
+ BUILD_BUG_ON(__alignof__(TYPE1) < __alignof__(TYPE2)); \
+ _result = __vdo_do_allocation(COUNT, \
+ sizeof(TYPE2), \
+ sizeof(TYPE1), \
+ __alignof__(TYPE1), \
+ WHAT, \
+ _ptr); \
+ _result; \
+ })
+
+/*
+ * Allocate memory starting on a cache line boundary, logging an error if the allocation fails. The
+ * memory will be zeroed.
+ *
+ * @size: The number of bytes to allocate
+ * @what: What is being allocated (for error logging)
+ * @ptr: A pointer to hold the allocated memory
+ *
+ * Return: VDO_SUCCESS or an error code
+ */
+static inline int __must_check vdo_allocate_cache_aligned(size_t size, const char *what, void *ptr)
+{
+ return vdo_allocate_memory(size, L1_CACHE_BYTES, what, ptr);
+}
+
+/*
+ * Allocate one element of the indicated type immediately, failing if the required memory is not
+ * immediately available.
+ *
+ * @size: The number of bytes to allocate
+ * @what: What is being allocated (for error logging)
+ *
+ * Return: pointer to the memory, or NULL if the memory is not available.
+ */
+void *__must_check vdo_allocate_memory_nowait(size_t size, const char *what);
+
+int __must_check vdo_reallocate_memory(void *ptr, size_t old_size, size_t size,
+ const char *what, void *new_ptr);
+
+int __must_check vdo_duplicate_string(const char *string, const char *what,
+ char **new_string);
+
+/* Free memory allocated with vdo_allocate(). */
+void vdo_free(void *ptr);
+
+static inline void *__vdo_forget(void **ptr_ptr)
+{
+ void *ptr = *ptr_ptr;
+
+ *ptr_ptr = NULL;
+ return ptr;
+}
+
+/*
+ * Null out a pointer and return a copy to it. This macro should be used when passing a pointer to
+ * a function for which it is not safe to access the pointer once the function returns.
+ */
+#define vdo_forget(ptr) __vdo_forget((void **) &(ptr))
+
+void vdo_memory_init(void);
+
+void vdo_memory_exit(void);
+
+void vdo_register_allocating_thread(struct registered_thread *new_thread,
+ const bool *flag_ptr);
+
+void vdo_unregister_allocating_thread(void);
+
+void vdo_get_memory_stats(u64 *bytes_used, u64 *peak_bytes_used);
+
+void vdo_report_memory_usage(void);
+
+#endif /* VDO_MEMORY_ALLOC_H */
diff --git a/drivers/md/dm-vdo/message-stats.c b/drivers/md/dm-vdo/message-stats.c
new file mode 100644
index 000000000000..2802cf92922b
--- /dev/null
+++ b/drivers/md/dm-vdo/message-stats.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "dedupe.h"
+#include "logger.h"
+#include "memory-alloc.h"
+#include "message-stats.h"
+#include "statistics.h"
+#include "thread-device.h"
+#include "vdo.h"
+
+static void write_u64(char *prefix, u64 value, char *suffix, char **buf,
+ unsigned int *maxlen)
+{
+ int count;
+
+ count = scnprintf(*buf, *maxlen, "%s%llu%s", prefix == NULL ? "" : prefix,
+ value, suffix == NULL ? "" : suffix);
+ *buf += count;
+ *maxlen -= count;
+}
+
+static void write_u32(char *prefix, u32 value, char *suffix, char **buf,
+ unsigned int *maxlen)
+{
+ int count;
+
+ count = scnprintf(*buf, *maxlen, "%s%u%s", prefix == NULL ? "" : prefix,
+ value, suffix == NULL ? "" : suffix);
+ *buf += count;
+ *maxlen -= count;
+}
+
+static void write_block_count_t(char *prefix, block_count_t value, char *suffix,
+ char **buf, unsigned int *maxlen)
+{
+ int count;
+
+ count = scnprintf(*buf, *maxlen, "%s%llu%s", prefix == NULL ? "" : prefix,
+ value, suffix == NULL ? "" : suffix);
+ *buf += count;
+ *maxlen -= count;
+}
+
+static void write_string(char *prefix, char *value, char *suffix, char **buf,
+ unsigned int *maxlen)
+{
+ int count;
+
+ count = scnprintf(*buf, *maxlen, "%s%s%s", prefix == NULL ? "" : prefix,
+ value, suffix == NULL ? "" : suffix);
+ *buf += count;
+ *maxlen -= count;
+}
+
+static void write_bool(char *prefix, bool value, char *suffix, char **buf,
+ unsigned int *maxlen)
+{
+ int count;
+
+ count = scnprintf(*buf, *maxlen, "%s%d%s", prefix == NULL ? "" : prefix,
+ value, suffix == NULL ? "" : suffix);
+ *buf += count;
+ *maxlen -= count;
+}
+
+static void write_u8(char *prefix, u8 value, char *suffix, char **buf,
+ unsigned int *maxlen)
+{
+ int count;
+
+ count = scnprintf(*buf, *maxlen, "%s%u%s", prefix == NULL ? "" : prefix,
+ value, suffix == NULL ? "" : suffix);
+ *buf += count;
+ *maxlen -= count;
+}
+
+static void write_block_allocator_statistics(char *prefix,
+ struct block_allocator_statistics *stats,
+ char *suffix, char **buf,
+ unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* The total number of slabs from which blocks may be allocated */
+ write_u64("slabCount : ", stats->slab_count, ", ", buf, maxlen);
+ /* The total number of slabs from which blocks have ever been allocated */
+ write_u64("slabsOpened : ", stats->slabs_opened, ", ", buf, maxlen);
+ /* The number of times since loading that a slab has been re-opened */
+ write_u64("slabsReopened : ", stats->slabs_reopened, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_commit_statistics(char *prefix, struct commit_statistics *stats,
+ char *suffix, char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* The total number of items on which processing has started */
+ write_u64("started : ", stats->started, ", ", buf, maxlen);
+ /* The total number of items for which a write operation has been issued */
+ write_u64("written : ", stats->written, ", ", buf, maxlen);
+ /* The total number of items for which a write operation has completed */
+ write_u64("committed : ", stats->committed, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_recovery_journal_statistics(char *prefix,
+ struct recovery_journal_statistics *stats,
+ char *suffix, char **buf,
+ unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* Number of times the on-disk journal was full */
+ write_u64("diskFull : ", stats->disk_full, ", ", buf, maxlen);
+ /* Number of times the recovery journal requested slab journal commits. */
+ write_u64("slabJournalCommitsRequested : ",
+ stats->slab_journal_commits_requested, ", ", buf, maxlen);
+ /* Write/Commit totals for individual journal entries */
+ write_commit_statistics("entries : ", &stats->entries, ", ", buf, maxlen);
+ /* Write/Commit totals for journal blocks */
+ write_commit_statistics("blocks : ", &stats->blocks, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_packer_statistics(char *prefix, struct packer_statistics *stats,
+ char *suffix, char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* Number of compressed data items written since startup */
+ write_u64("compressedFragmentsWritten : ",
+ stats->compressed_fragments_written, ", ", buf, maxlen);
+ /* Number of blocks containing compressed items written since startup */
+ write_u64("compressedBlocksWritten : ",
+ stats->compressed_blocks_written, ", ", buf, maxlen);
+ /* Number of VIOs that are pending in the packer */
+ write_u64("compressedFragmentsInPacker : ",
+ stats->compressed_fragments_in_packer, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_slab_journal_statistics(char *prefix,
+ struct slab_journal_statistics *stats,
+ char *suffix, char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* Number of times the on-disk journal was full */
+ write_u64("diskFullCount : ", stats->disk_full_count, ", ", buf, maxlen);
+ /* Number of times an entry was added over the flush threshold */
+ write_u64("flushCount : ", stats->flush_count, ", ", buf, maxlen);
+ /* Number of times an entry was added over the block threshold */
+ write_u64("blockedCount : ", stats->blocked_count, ", ", buf, maxlen);
+ /* Number of times a tail block was written */
+ write_u64("blocksWritten : ", stats->blocks_written, ", ", buf, maxlen);
+ /* Number of times we had to wait for the tail to write */
+ write_u64("tailBusyCount : ", stats->tail_busy_count, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_slab_summary_statistics(char *prefix,
+ struct slab_summary_statistics *stats,
+ char *suffix, char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* Number of blocks written */
+ write_u64("blocksWritten : ", stats->blocks_written, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_ref_counts_statistics(char *prefix, struct ref_counts_statistics *stats,
+ char *suffix, char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* Number of reference blocks written */
+ write_u64("blocksWritten : ", stats->blocks_written, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_block_map_statistics(char *prefix, struct block_map_statistics *stats,
+ char *suffix, char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* number of dirty (resident) pages */
+ write_u32("dirtyPages : ", stats->dirty_pages, ", ", buf, maxlen);
+ /* number of clean (resident) pages */
+ write_u32("cleanPages : ", stats->clean_pages, ", ", buf, maxlen);
+ /* number of free pages */
+ write_u32("freePages : ", stats->free_pages, ", ", buf, maxlen);
+ /* number of pages in failed state */
+ write_u32("failedPages : ", stats->failed_pages, ", ", buf, maxlen);
+ /* number of pages incoming */
+ write_u32("incomingPages : ", stats->incoming_pages, ", ", buf, maxlen);
+ /* number of pages outgoing */
+ write_u32("outgoingPages : ", stats->outgoing_pages, ", ", buf, maxlen);
+ /* how many times free page not avail */
+ write_u32("cachePressure : ", stats->cache_pressure, ", ", buf, maxlen);
+ /* number of get_vdo_page() calls for read */
+ write_u64("readCount : ", stats->read_count, ", ", buf, maxlen);
+ /* number of get_vdo_page() calls for write */
+ write_u64("writeCount : ", stats->write_count, ", ", buf, maxlen);
+ /* number of times pages failed to read */
+ write_u64("failedReads : ", stats->failed_reads, ", ", buf, maxlen);
+ /* number of times pages failed to write */
+ write_u64("failedWrites : ", stats->failed_writes, ", ", buf, maxlen);
+ /* number of gets that are reclaimed */
+ write_u64("reclaimed : ", stats->reclaimed, ", ", buf, maxlen);
+ /* number of gets for outgoing pages */
+ write_u64("readOutgoing : ", stats->read_outgoing, ", ", buf, maxlen);
+ /* number of gets that were already there */
+ write_u64("foundInCache : ", stats->found_in_cache, ", ", buf, maxlen);
+ /* number of gets requiring discard */
+ write_u64("discardRequired : ", stats->discard_required, ", ", buf, maxlen);
+ /* number of gets enqueued for their page */
+ write_u64("waitForPage : ", stats->wait_for_page, ", ", buf, maxlen);
+ /* number of gets that have to fetch */
+ write_u64("fetchRequired : ", stats->fetch_required, ", ", buf, maxlen);
+ /* number of page fetches */
+ write_u64("pagesLoaded : ", stats->pages_loaded, ", ", buf, maxlen);
+ /* number of page saves */
+ write_u64("pagesSaved : ", stats->pages_saved, ", ", buf, maxlen);
+ /* the number of flushes issued */
+ write_u64("flushCount : ", stats->flush_count, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_hash_lock_statistics(char *prefix, struct hash_lock_statistics *stats,
+ char *suffix, char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* Number of times the UDS advice proved correct */
+ write_u64("dedupeAdviceValid : ", stats->dedupe_advice_valid, ", ", buf, maxlen);
+ /* Number of times the UDS advice proved incorrect */
+ write_u64("dedupeAdviceStale : ", stats->dedupe_advice_stale, ", ", buf, maxlen);
+ /* Number of writes with the same data as another in-flight write */
+ write_u64("concurrentDataMatches : ", stats->concurrent_data_matches,
+ ", ", buf, maxlen);
+ /* Number of writes whose hash collided with an in-flight write */
+ write_u64("concurrentHashCollisions : ",
+ stats->concurrent_hash_collisions, ", ", buf, maxlen);
+ /* Current number of dedupe queries that are in flight */
+ write_u32("currDedupeQueries : ", stats->curr_dedupe_queries, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_error_statistics(char *prefix, struct error_statistics *stats,
+ char *suffix, char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* number of times VDO got an invalid dedupe advice PBN from UDS */
+ write_u64("invalidAdvicePBNCount : ", stats->invalid_advice_pbn_count,
+ ", ", buf, maxlen);
+ /* number of times a VIO completed with a VDO_NO_SPACE error */
+ write_u64("noSpaceErrorCount : ", stats->no_space_error_count, ", ",
+ buf, maxlen);
+ /* number of times a VIO completed with a VDO_READ_ONLY error */
+ write_u64("readOnlyErrorCount : ", stats->read_only_error_count, ", ",
+ buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_bio_stats(char *prefix, struct bio_stats *stats, char *suffix,
+ char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* Number of REQ_OP_READ bios */
+ write_u64("read : ", stats->read, ", ", buf, maxlen);
+ /* Number of REQ_OP_WRITE bios with data */
+ write_u64("write : ", stats->write, ", ", buf, maxlen);
+ /* Number of bios tagged with REQ_PREFLUSH and containing no data */
+ write_u64("emptyFlush : ", stats->empty_flush, ", ", buf, maxlen);
+ /* Number of REQ_OP_DISCARD bios */
+ write_u64("discard : ", stats->discard, ", ", buf, maxlen);
+ /* Number of bios tagged with REQ_PREFLUSH */
+ write_u64("flush : ", stats->flush, ", ", buf, maxlen);
+ /* Number of bios tagged with REQ_FUA */
+ write_u64("fua : ", stats->fua, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_memory_usage(char *prefix, struct memory_usage *stats, char *suffix,
+ char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* Tracked bytes currently allocated. */
+ write_u64("bytesUsed : ", stats->bytes_used, ", ", buf, maxlen);
+ /* Maximum tracked bytes allocated. */
+ write_u64("peakBytesUsed : ", stats->peak_bytes_used, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_index_statistics(char *prefix, struct index_statistics *stats,
+ char *suffix, char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ /* Number of records stored in the index */
+ write_u64("entriesIndexed : ", stats->entries_indexed, ", ", buf, maxlen);
+ /* Number of post calls that found an existing entry */
+ write_u64("postsFound : ", stats->posts_found, ", ", buf, maxlen);
+ /* Number of post calls that added a new entry */
+ write_u64("postsNotFound : ", stats->posts_not_found, ", ", buf, maxlen);
+ /* Number of query calls that found an existing entry */
+ write_u64("queriesFound : ", stats->queries_found, ", ", buf, maxlen);
+ /* Number of query calls that added a new entry */
+ write_u64("queriesNotFound : ", stats->queries_not_found, ", ", buf, maxlen);
+ /* Number of update calls that found an existing entry */
+ write_u64("updatesFound : ", stats->updates_found, ", ", buf, maxlen);
+ /* Number of update calls that added a new entry */
+ write_u64("updatesNotFound : ", stats->updates_not_found, ", ", buf, maxlen);
+ /* Number of entries discarded */
+ write_u64("entriesDiscarded : ", stats->entries_discarded, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+static void write_vdo_statistics(char *prefix, struct vdo_statistics *stats, char *suffix,
+ char **buf, unsigned int *maxlen)
+{
+ write_string(prefix, "{ ", NULL, buf, maxlen);
+ write_u32("version : ", stats->version, ", ", buf, maxlen);
+ /* Number of blocks used for data */
+ write_u64("dataBlocksUsed : ", stats->data_blocks_used, ", ", buf, maxlen);
+ /* Number of blocks used for VDO metadata */
+ write_u64("overheadBlocksUsed : ", stats->overhead_blocks_used, ", ",
+ buf, maxlen);
+ /* Number of logical blocks that are currently mapped to physical blocks */
+ write_u64("logicalBlocksUsed : ", stats->logical_blocks_used, ", ", buf, maxlen);
+ /* number of physical blocks */
+ write_block_count_t("physicalBlocks : ", stats->physical_blocks, ", ",
+ buf, maxlen);
+ /* number of logical blocks */
+ write_block_count_t("logicalBlocks : ", stats->logical_blocks, ", ",
+ buf, maxlen);
+ /* Size of the block map page cache, in bytes */
+ write_u64("blockMapCacheSize : ", stats->block_map_cache_size, ", ",
+ buf, maxlen);
+ /* The physical block size */
+ write_u64("blockSize : ", stats->block_size, ", ", buf, maxlen);
+ /* Number of times the VDO has successfully recovered */
+ write_u64("completeRecoveries : ", stats->complete_recoveries, ", ",
+ buf, maxlen);
+ /* Number of times the VDO has recovered from read-only mode */
+ write_u64("readOnlyRecoveries : ", stats->read_only_recoveries, ", ",
+ buf, maxlen);
+ /* String describing the operating mode of the VDO */
+ write_string("mode : ", stats->mode, ", ", buf, maxlen);
+ /* Whether the VDO is in recovery mode */
+ write_bool("inRecoveryMode : ", stats->in_recovery_mode, ", ", buf, maxlen);
+ /* What percentage of recovery mode work has been completed */
+ write_u8("recoveryPercentage : ", stats->recovery_percentage, ", ", buf, maxlen);
+ /* The statistics for the compressed block packer */
+ write_packer_statistics("packer : ", &stats->packer, ", ", buf, maxlen);
+ /* Counters for events in the block allocator */
+ write_block_allocator_statistics("allocator : ", &stats->allocator,
+ ", ", buf, maxlen);
+ /* Counters for events in the recovery journal */
+ write_recovery_journal_statistics("journal : ", &stats->journal, ", ",
+ buf, maxlen);
+ /* The statistics for the slab journals */
+ write_slab_journal_statistics("slabJournal : ", &stats->slab_journal,
+ ", ", buf, maxlen);
+ /* The statistics for the slab summary */
+ write_slab_summary_statistics("slabSummary : ", &stats->slab_summary,
+ ", ", buf, maxlen);
+ /* The statistics for the reference counts */
+ write_ref_counts_statistics("refCounts : ", &stats->ref_counts, ", ",
+ buf, maxlen);
+ /* The statistics for the block map */
+ write_block_map_statistics("blockMap : ", &stats->block_map, ", ", buf, maxlen);
+ /* The dedupe statistics from hash locks */
+ write_hash_lock_statistics("hashLock : ", &stats->hash_lock, ", ", buf, maxlen);
+ /* Counts of error conditions */
+ write_error_statistics("errors : ", &stats->errors, ", ", buf, maxlen);
+ /* The VDO instance */
+ write_u32("instance : ", stats->instance, ", ", buf, maxlen);
+ /* Current number of active VIOs */
+ write_u32("currentVIOsInProgress : ", stats->current_vios_in_progress,
+ ", ", buf, maxlen);
+ /* Maximum number of active VIOs */
+ write_u32("maxVIOs : ", stats->max_vios, ", ", buf, maxlen);
+ /* Number of times the UDS index was too slow in responding */
+ write_u64("dedupeAdviceTimeouts : ", stats->dedupe_advice_timeouts,
+ ", ", buf, maxlen);
+ /* Number of flush requests submitted to the storage device */
+ write_u64("flushOut : ", stats->flush_out, ", ", buf, maxlen);
+ /* Logical block size */
+ write_u64("logicalBlockSize : ", stats->logical_block_size, ", ", buf, maxlen);
+ /* Bios submitted into VDO from above */
+ write_bio_stats("biosIn : ", &stats->bios_in, ", ", buf, maxlen);
+ write_bio_stats("biosInPartial : ", &stats->bios_in_partial, ", ", buf, maxlen);
+ /* Bios submitted onward for user data */
+ write_bio_stats("biosOut : ", &stats->bios_out, ", ", buf, maxlen);
+ /* Bios submitted onward for metadata */
+ write_bio_stats("biosMeta : ", &stats->bios_meta, ", ", buf, maxlen);
+ write_bio_stats("biosJournal : ", &stats->bios_journal, ", ", buf, maxlen);
+ write_bio_stats("biosPageCache : ", &stats->bios_page_cache, ", ", buf, maxlen);
+ write_bio_stats("biosOutCompleted : ", &stats->bios_out_completed, ", ",
+ buf, maxlen);
+ write_bio_stats("biosMetaCompleted : ", &stats->bios_meta_completed,
+ ", ", buf, maxlen);
+ write_bio_stats("biosJournalCompleted : ",
+ &stats->bios_journal_completed, ", ", buf, maxlen);
+ write_bio_stats("biosPageCacheCompleted : ",
+ &stats->bios_page_cache_completed, ", ", buf, maxlen);
+ write_bio_stats("biosAcknowledged : ", &stats->bios_acknowledged, ", ",
+ buf, maxlen);
+ write_bio_stats("biosAcknowledgedPartial : ",
+ &stats->bios_acknowledged_partial, ", ", buf, maxlen);
+ /* Current number of bios in progress */
+ write_bio_stats("biosInProgress : ", &stats->bios_in_progress, ", ",
+ buf, maxlen);
+ /* Memory usage stats. */
+ write_memory_usage("memoryUsage : ", &stats->memory_usage, ", ", buf, maxlen);
+ /* The statistics for the UDS index */
+ write_index_statistics("index : ", &stats->index, ", ", buf, maxlen);
+ write_string(NULL, "}", suffix, buf, maxlen);
+}
+
+int vdo_write_stats(struct vdo *vdo, char *buf, unsigned int maxlen)
+{
+ struct vdo_statistics *stats;
+ int result;
+
+ result = vdo_allocate(1, struct vdo_statistics, __func__, &stats);
+ if (result != VDO_SUCCESS) {
+ vdo_log_error("Cannot allocate memory to write VDO statistics");
+ return result;
+ }
+
+ vdo_fetch_statistics(vdo, stats);
+ write_vdo_statistics(NULL, stats, NULL, &buf, &maxlen);
+ vdo_free(stats);
+ return VDO_SUCCESS;
+}
diff --git a/drivers/md/dm-vdo/message-stats.h b/drivers/md/dm-vdo/message-stats.h
new file mode 100644
index 000000000000..f7fceca9acab
--- /dev/null
+++ b/drivers/md/dm-vdo/message-stats.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_MESSAGE_STATS_H
+#define VDO_MESSAGE_STATS_H
+
+#include "types.h"
+
+int vdo_write_stats(struct vdo *vdo, char *buf, unsigned int maxlen);
+
+#endif /* VDO_MESSAGE_STATS_H */
diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c
new file mode 100644
index 000000000000..00c9b9c05001
--- /dev/null
+++ b/drivers/md/dm-vdo/murmurhash3.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: LGPL-2.1+
+/*
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ *
+ * Adapted by John Wiele (jwiele@redhat.com).
+ */
+
+#include "murmurhash3.h"
+
+static inline u64 rotl64(u64 x, s8 r)
+{
+ return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL64(x, y) rotl64(x, y)
+static __always_inline u64 getblock64(const u64 *p, int i)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return p[i];
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return __builtin_bswap64(p[i]);
+#else
+#error "can't figure out byte order"
+#endif
+}
+
+static __always_inline void putblock64(u64 *p, int i, u64 value)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ p[i] = value;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ p[i] = __builtin_bswap64(value);
+#else
+#error "can't figure out byte order"
+#endif
+}
+
+/* Finalization mix - force all bits of a hash block to avalanche */
+
+static __always_inline u64 fmix64(u64 k)
+{
+ k ^= k >> 33;
+ k *= 0xff51afd7ed558ccdLLU;
+ k ^= k >> 33;
+ k *= 0xc4ceb9fe1a85ec53LLU;
+ k ^= k >> 33;
+
+ return k;
+}
+
+void murmurhash3_128(const void *key, const int len, const u32 seed, void *out)
+{
+ const u8 *data = key;
+ const int nblocks = len / 16;
+
+ u64 h1 = seed;
+ u64 h2 = seed;
+
+ const u64 c1 = 0x87c37b91114253d5LLU;
+ const u64 c2 = 0x4cf5ad432745937fLLU;
+
+ /* body */
+
+ const u64 *blocks = (const u64 *)(data);
+
+ int i;
+
+ for (i = 0; i < nblocks; i++) {
+ u64 k1 = getblock64(blocks, i * 2 + 0);
+ u64 k2 = getblock64(blocks, i * 2 + 1);
+
+ k1 *= c1;
+ k1 = ROTL64(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+
+ h1 = ROTL64(h1, 27);
+ h1 += h2;
+ h1 = h1 * 5 + 0x52dce729;
+
+ k2 *= c2;
+ k2 = ROTL64(k2, 33);
+ k2 *= c1;
+ h2 ^= k2;
+
+ h2 = ROTL64(h2, 31);
+ h2 += h1;
+ h2 = h2 * 5 + 0x38495ab5;
+ }
+
+ /* tail */
+
+ {
+ const u8 *tail = (const u8 *)(data + nblocks * 16);
+
+ u64 k1 = 0;
+ u64 k2 = 0;
+
+ switch (len & 15) {
+ case 15:
+ k2 ^= ((u64)tail[14]) << 48;
+ fallthrough;
+ case 14:
+ k2 ^= ((u64)tail[13]) << 40;
+ fallthrough;
+ case 13:
+ k2 ^= ((u64)tail[12]) << 32;
+ fallthrough;
+ case 12:
+ k2 ^= ((u64)tail[11]) << 24;
+ fallthrough;
+ case 11:
+ k2 ^= ((u64)tail[10]) << 16;
+ fallthrough;
+ case 10:
+ k2 ^= ((u64)tail[9]) << 8;
+ fallthrough;
+ case 9:
+ k2 ^= ((u64)tail[8]) << 0;
+ k2 *= c2;
+ k2 = ROTL64(k2, 33);
+ k2 *= c1;
+ h2 ^= k2;
+ fallthrough;
+
+ case 8:
+ k1 ^= ((u64)tail[7]) << 56;
+ fallthrough;
+ case 7:
+ k1 ^= ((u64)tail[6]) << 48;
+ fallthrough;
+ case 6:
+ k1 ^= ((u64)tail[5]) << 40;
+ fallthrough;
+ case 5:
+ k1 ^= ((u64)tail[4]) << 32;
+ fallthrough;
+ case 4:
+ k1 ^= ((u64)tail[3]) << 24;
+ fallthrough;
+ case 3:
+ k1 ^= ((u64)tail[2]) << 16;
+ fallthrough;
+ case 2:
+ k1 ^= ((u64)tail[1]) << 8;
+ fallthrough;
+ case 1:
+ k1 ^= ((u64)tail[0]) << 0;
+ k1 *= c1;
+ k1 = ROTL64(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+ break;
+ default:
+ break;
+ };
+ }
+ /* finalization */
+
+ h1 ^= len;
+ h2 ^= len;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix64(h1);
+ h2 = fmix64(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ putblock64((u64 *)out, 0, h1);
+ putblock64((u64 *)out, 1, h2);
+}
diff --git a/drivers/md/dm-vdo/murmurhash3.h b/drivers/md/dm-vdo/murmurhash3.h
new file mode 100644
index 000000000000..d84711ddb659
--- /dev/null
+++ b/drivers/md/dm-vdo/murmurhash3.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ */
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+void murmurhash3_128(const void *key, int len, u32 seed, void *out);
+
+#endif /* _MURMURHASH3_H_ */
diff --git a/drivers/md/dm-vdo/numeric.h b/drivers/md/dm-vdo/numeric.h
new file mode 100644
index 000000000000..dc8c400b21d2
--- /dev/null
+++ b/drivers/md/dm-vdo/numeric.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_NUMERIC_H
+#define UDS_NUMERIC_H
+
+#include <asm/unaligned.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+/*
+ * These utilities encode or decode a number from an offset in a larger data buffer and then
+ * advance the offset pointer to the next field in the buffer.
+ */
+
+static inline void decode_s64_le(const u8 *buffer, size_t *offset, s64 *decoded)
+{
+ *decoded = get_unaligned_le64(buffer + *offset);
+ *offset += sizeof(s64);
+}
+
+static inline void encode_s64_le(u8 *data, size_t *offset, s64 to_encode)
+{
+ put_unaligned_le64(to_encode, data + *offset);
+ *offset += sizeof(s64);
+}
+
+static inline void decode_u64_le(const u8 *buffer, size_t *offset, u64 *decoded)
+{
+ *decoded = get_unaligned_le64(buffer + *offset);
+ *offset += sizeof(u64);
+}
+
+static inline void encode_u64_le(u8 *data, size_t *offset, u64 to_encode)
+{
+ put_unaligned_le64(to_encode, data + *offset);
+ *offset += sizeof(u64);
+}
+
+static inline void decode_s32_le(const u8 *buffer, size_t *offset, s32 *decoded)
+{
+ *decoded = get_unaligned_le32(buffer + *offset);
+ *offset += sizeof(s32);
+}
+
+static inline void encode_s32_le(u8 *data, size_t *offset, s32 to_encode)
+{
+ put_unaligned_le32(to_encode, data + *offset);
+ *offset += sizeof(s32);
+}
+
+static inline void decode_u32_le(const u8 *buffer, size_t *offset, u32 *decoded)
+{
+ *decoded = get_unaligned_le32(buffer + *offset);
+ *offset += sizeof(u32);
+}
+
+static inline void encode_u32_le(u8 *data, size_t *offset, u32 to_encode)
+{
+ put_unaligned_le32(to_encode, data + *offset);
+ *offset += sizeof(u32);
+}
+
+static inline void decode_u16_le(const u8 *buffer, size_t *offset, u16 *decoded)
+{
+ *decoded = get_unaligned_le16(buffer + *offset);
+ *offset += sizeof(u16);
+}
+
+static inline void encode_u16_le(u8 *data, size_t *offset, u16 to_encode)
+{
+ put_unaligned_le16(to_encode, data + *offset);
+ *offset += sizeof(u16);
+}
+
+#endif /* UDS_NUMERIC_H */
diff --git a/drivers/md/dm-vdo/packer.c b/drivers/md/dm-vdo/packer.c
new file mode 100644
index 000000000000..16cf29b4c90a
--- /dev/null
+++ b/drivers/md/dm-vdo/packer.c
@@ -0,0 +1,780 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "packer.h"
+
+#include <linux/atomic.h>
+#include <linux/blkdev.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+#include "string-utils.h"
+
+#include "admin-state.h"
+#include "completion.h"
+#include "constants.h"
+#include "data-vio.h"
+#include "dedupe.h"
+#include "encodings.h"
+#include "io-submitter.h"
+#include "physical-zone.h"
+#include "status-codes.h"
+#include "vdo.h"
+#include "vio.h"
+
+static const struct version_number COMPRESSED_BLOCK_1_0 = {
+ .major_version = 1,
+ .minor_version = 0,
+};
+
+#define COMPRESSED_BLOCK_1_0_SIZE (4 + 4 + (2 * VDO_MAX_COMPRESSION_SLOTS))
+
+/**
+ * vdo_get_compressed_block_fragment() - Get a reference to a compressed fragment from a compressed
+ * block.
+ * @mapping_state [in] The mapping state for the look up.
+ * @compressed_block [in] The compressed block that was read from disk.
+ * @fragment_offset [out] The offset of the fragment within a compressed block.
+ * @fragment_size [out] The size of the fragment.
+ *
+ * Return: If a valid compressed fragment is found, VDO_SUCCESS; otherwise, VDO_INVALID_FRAGMENT if
+ * the fragment is invalid.
+ */
+int vdo_get_compressed_block_fragment(enum block_mapping_state mapping_state,
+ struct compressed_block *block,
+ u16 *fragment_offset, u16 *fragment_size)
+{
+ u16 compressed_size;
+ u16 offset = 0;
+ unsigned int i;
+ u8 slot;
+ struct version_number version;
+
+ if (!vdo_is_state_compressed(mapping_state))
+ return VDO_INVALID_FRAGMENT;
+
+ version = vdo_unpack_version_number(block->header.version);
+ if (!vdo_are_same_version(version, COMPRESSED_BLOCK_1_0))
+ return VDO_INVALID_FRAGMENT;
+
+ slot = mapping_state - VDO_MAPPING_STATE_COMPRESSED_BASE;
+ if (slot >= VDO_MAX_COMPRESSION_SLOTS)
+ return VDO_INVALID_FRAGMENT;
+
+ compressed_size = __le16_to_cpu(block->header.sizes[slot]);
+ for (i = 0; i < slot; i++) {
+ offset += __le16_to_cpu(block->header.sizes[i]);
+ if (offset >= VDO_COMPRESSED_BLOCK_DATA_SIZE)
+ return VDO_INVALID_FRAGMENT;
+ }
+
+ if ((offset + compressed_size) > VDO_COMPRESSED_BLOCK_DATA_SIZE)
+ return VDO_INVALID_FRAGMENT;
+
+ *fragment_offset = offset;
+ *fragment_size = compressed_size;
+ return VDO_SUCCESS;
+}
+
+/**
+ * assert_on_packer_thread() - Check that we are on the packer thread.
+ * @packer: The packer.
+ * @caller: The function which is asserting.
+ */
+static inline void assert_on_packer_thread(struct packer *packer, const char *caller)
+{
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() == packer->thread_id),
+ "%s() called from packer thread", caller);
+}
+
+/**
+ * insert_in_sorted_list() - Insert a bin to the list.
+ * @packer: The packer.
+ * @bin: The bin to move to its sorted position.
+ *
+ * The list is in ascending order of free space. Since all bins are already in the list, this
+ * actually moves the bin to the correct position in the list.
+ */
+static void insert_in_sorted_list(struct packer *packer, struct packer_bin *bin)
+{
+ struct packer_bin *active_bin;
+
+ list_for_each_entry(active_bin, &packer->bins, list)
+ if (active_bin->free_space > bin->free_space) {
+ list_move_tail(&bin->list, &active_bin->list);
+ return;
+ }
+
+ list_move_tail(&bin->list, &packer->bins);
+}
+
+/**
+ * make_bin() - Allocate a bin and put it into the packer's list.
+ * @packer: The packer.
+ */
+static int __must_check make_bin(struct packer *packer)
+{
+ struct packer_bin *bin;
+ int result;
+
+ result = vdo_allocate_extended(struct packer_bin, VDO_MAX_COMPRESSION_SLOTS,
+ struct vio *, __func__, &bin);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ bin->free_space = VDO_COMPRESSED_BLOCK_DATA_SIZE;
+ INIT_LIST_HEAD(&bin->list);
+ list_add_tail(&bin->list, &packer->bins);
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_make_packer() - Make a new block packer.
+ *
+ * @vdo: The vdo to which this packer belongs.
+ * @bin_count: The number of partial bins to keep in memory.
+ * @packer_ptr: A pointer to hold the new packer.
+ *
+ * Return: VDO_SUCCESS or an error
+ */
+int vdo_make_packer(struct vdo *vdo, block_count_t bin_count, struct packer **packer_ptr)
+{
+ struct packer *packer;
+ block_count_t i;
+ int result;
+
+ result = vdo_allocate(1, struct packer, __func__, &packer);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ packer->thread_id = vdo->thread_config.packer_thread;
+ packer->size = bin_count;
+ INIT_LIST_HEAD(&packer->bins);
+ vdo_set_admin_state_code(&packer->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+
+ for (i = 0; i < bin_count; i++) {
+ result = make_bin(packer);
+ if (result != VDO_SUCCESS) {
+ vdo_free_packer(packer);
+ return result;
+ }
+ }
+
+ /*
+ * The canceled bin can hold up to half the number of user vios. Every canceled vio in the
+ * bin must have a canceler for which it is waiting, and any canceler will only have
+ * canceled one lock holder at a time.
+ */
+ result = vdo_allocate_extended(struct packer_bin, MAXIMUM_VDO_USER_VIOS / 2,
+ struct vio *, __func__, &packer->canceled_bin);
+ if (result != VDO_SUCCESS) {
+ vdo_free_packer(packer);
+ return result;
+ }
+
+ result = vdo_make_default_thread(vdo, packer->thread_id);
+ if (result != VDO_SUCCESS) {
+ vdo_free_packer(packer);
+ return result;
+ }
+
+ *packer_ptr = packer;
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_free_packer() - Free a block packer.
+ * @packer: The packer to free.
+ */
+void vdo_free_packer(struct packer *packer)
+{
+ struct packer_bin *bin, *tmp;
+
+ if (packer == NULL)
+ return;
+
+ list_for_each_entry_safe(bin, tmp, &packer->bins, list) {
+ list_del_init(&bin->list);
+ vdo_free(bin);
+ }
+
+ vdo_free(vdo_forget(packer->canceled_bin));
+ vdo_free(packer);
+}
+
+/**
+ * get_packer_from_data_vio() - Get the packer from a data_vio.
+ * @data_vio: The data_vio.
+ *
+ * Return: The packer from the VDO to which the data_vio belongs.
+ */
+static inline struct packer *get_packer_from_data_vio(struct data_vio *data_vio)
+{
+ return vdo_from_data_vio(data_vio)->packer;
+}
+
+/**
+ * vdo_get_packer_statistics() - Get the current statistics from the packer.
+ * @packer: The packer to query.
+ *
+ * Return: a copy of the current statistics for the packer.
+ */
+struct packer_statistics vdo_get_packer_statistics(const struct packer *packer)
+{
+ const struct packer_statistics *stats = &packer->statistics;
+
+ return (struct packer_statistics) {
+ .compressed_fragments_written = READ_ONCE(stats->compressed_fragments_written),
+ .compressed_blocks_written = READ_ONCE(stats->compressed_blocks_written),
+ .compressed_fragments_in_packer = READ_ONCE(stats->compressed_fragments_in_packer),
+ };
+}
+
+/**
+ * abort_packing() - Abort packing a data_vio.
+ * @data_vio: The data_vio to abort.
+ */
+static void abort_packing(struct data_vio *data_vio)
+{
+ struct packer *packer = get_packer_from_data_vio(data_vio);
+
+ WRITE_ONCE(packer->statistics.compressed_fragments_in_packer,
+ packer->statistics.compressed_fragments_in_packer - 1);
+
+ write_data_vio(data_vio);
+}
+
+/**
+ * release_compressed_write_waiter() - Update a data_vio for which a successful compressed write
+ * has completed and send it on its way.
+
+ * @data_vio: The data_vio to release.
+ * @allocation: The allocation to which the compressed block was written.
+ */
+static void release_compressed_write_waiter(struct data_vio *data_vio,
+ struct allocation *allocation)
+{
+ data_vio->new_mapped = (struct zoned_pbn) {
+ .pbn = allocation->pbn,
+ .zone = allocation->zone,
+ .state = data_vio->compression.slot + VDO_MAPPING_STATE_COMPRESSED_BASE,
+ };
+
+ vdo_share_compressed_write_lock(data_vio, allocation->lock);
+ update_metadata_for_data_vio_write(data_vio, allocation->lock);
+}
+
+/**
+ * finish_compressed_write() - Finish a compressed block write.
+ * @completion: The compressed write completion.
+ *
+ * This callback is registered in continue_after_allocation().
+ */
+static void finish_compressed_write(struct vdo_completion *completion)
+{
+ struct data_vio *agent = as_data_vio(completion);
+ struct data_vio *client, *next;
+
+ assert_data_vio_in_allocated_zone(agent);
+
+ /*
+ * Process all the non-agent waiters first to ensure that the pbn lock can not be released
+ * until all of them have had a chance to journal their increfs.
+ */
+ for (client = agent->compression.next_in_batch; client != NULL; client = next) {
+ next = client->compression.next_in_batch;
+ release_compressed_write_waiter(client, &agent->allocation);
+ }
+
+ completion->error_handler = handle_data_vio_error;
+ release_compressed_write_waiter(agent, &agent->allocation);
+}
+
+static void handle_compressed_write_error(struct vdo_completion *completion)
+{
+ struct data_vio *agent = as_data_vio(completion);
+ struct allocation *allocation = &agent->allocation;
+ struct data_vio *client, *next;
+
+ if (vdo_requeue_completion_if_needed(completion, allocation->zone->thread_id))
+ return;
+
+ update_vio_error_stats(as_vio(completion),
+ "Completing compressed write vio for physical block %llu with error",
+ (unsigned long long) allocation->pbn);
+
+ for (client = agent->compression.next_in_batch; client != NULL; client = next) {
+ next = client->compression.next_in_batch;
+ write_data_vio(client);
+ }
+
+ /* Now that we've released the batch from the packer, forget the error and continue on. */
+ vdo_reset_completion(completion);
+ completion->error_handler = handle_data_vio_error;
+ write_data_vio(agent);
+}
+
+/**
+ * add_to_bin() - Put a data_vio in a specific packer_bin in which it will definitely fit.
+ * @bin: The bin in which to put the data_vio.
+ * @data_vio: The data_vio to add.
+ */
+static void add_to_bin(struct packer_bin *bin, struct data_vio *data_vio)
+{
+ data_vio->compression.bin = bin;
+ data_vio->compression.slot = bin->slots_used;
+ bin->incoming[bin->slots_used++] = data_vio;
+}
+
+/**
+ * remove_from_bin() - Get the next data_vio whose compression has not been canceled from a bin.
+ * @packer: The packer.
+ * @bin: The bin from which to get a data_vio.
+ *
+ * Any canceled data_vios will be moved to the canceled bin.
+ * Return: An uncanceled data_vio from the bin or NULL if there are none.
+ */
+static struct data_vio *remove_from_bin(struct packer *packer, struct packer_bin *bin)
+{
+ while (bin->slots_used > 0) {
+ struct data_vio *data_vio = bin->incoming[--bin->slots_used];
+
+ if (!advance_data_vio_compression_stage(data_vio).may_not_compress) {
+ data_vio->compression.bin = NULL;
+ return data_vio;
+ }
+
+ add_to_bin(packer->canceled_bin, data_vio);
+ }
+
+ /* The bin is now empty. */
+ bin->free_space = VDO_COMPRESSED_BLOCK_DATA_SIZE;
+ return NULL;
+}
+
+/**
+ * initialize_compressed_block() - Initialize a compressed block.
+ * @block: The compressed block to initialize.
+ * @size: The size of the agent's fragment.
+ *
+ * This method initializes the compressed block in the compressed write agent. Because the
+ * compressor already put the agent's compressed fragment at the start of the compressed block's
+ * data field, it needn't be copied. So all we need do is initialize the header and set the size of
+ * the agent's fragment.
+ */
+static void initialize_compressed_block(struct compressed_block *block, u16 size)
+{
+ /*
+ * Make sure the block layout isn't accidentally changed by changing the length of the
+ * block header.
+ */
+ BUILD_BUG_ON(sizeof(struct compressed_block_header) != COMPRESSED_BLOCK_1_0_SIZE);
+
+ block->header.version = vdo_pack_version_number(COMPRESSED_BLOCK_1_0);
+ block->header.sizes[0] = __cpu_to_le16(size);
+}
+
+/**
+ * pack_fragment() - Pack a data_vio's fragment into the compressed block in which it is already
+ * known to fit.
+ * @compression: The agent's compression_state to pack in to.
+ * @data_vio: The data_vio to pack.
+ * @offset: The offset into the compressed block at which to pack the fragment.
+ * @compressed_block: The compressed block which will be written out when batch is fully packed.
+ *
+ * Return: The new amount of space used.
+ */
+static block_size_t __must_check pack_fragment(struct compression_state *compression,
+ struct data_vio *data_vio,
+ block_size_t offset, slot_number_t slot,
+ struct compressed_block *block)
+{
+ struct compression_state *to_pack = &data_vio->compression;
+ char *fragment = to_pack->block->data;
+
+ to_pack->next_in_batch = compression->next_in_batch;
+ compression->next_in_batch = data_vio;
+ to_pack->slot = slot;
+ block->header.sizes[slot] = __cpu_to_le16(to_pack->size);
+ memcpy(&block->data[offset], fragment, to_pack->size);
+ return (offset + to_pack->size);
+}
+
+/**
+ * compressed_write_end_io() - The bio_end_io for a compressed block write.
+ * @bio: The bio for the compressed write.
+ */
+static void compressed_write_end_io(struct bio *bio)
+{
+ struct data_vio *data_vio = vio_as_data_vio(bio->bi_private);
+
+ vdo_count_completed_bios(bio);
+ set_data_vio_allocated_zone_callback(data_vio, finish_compressed_write);
+ continue_data_vio_with_error(data_vio, blk_status_to_errno(bio->bi_status));
+}
+
+/**
+ * write_bin() - Write out a bin.
+ * @packer: The packer.
+ * @bin: The bin to write.
+ */
+static void write_bin(struct packer *packer, struct packer_bin *bin)
+{
+ int result;
+ block_size_t offset;
+ slot_number_t slot = 1;
+ struct compression_state *compression;
+ struct compressed_block *block;
+ struct data_vio *agent = remove_from_bin(packer, bin);
+ struct data_vio *client;
+ struct packer_statistics *stats;
+
+ if (agent == NULL)
+ return;
+
+ compression = &agent->compression;
+ compression->slot = 0;
+ block = compression->block;
+ initialize_compressed_block(block, compression->size);
+ offset = compression->size;
+
+ while ((client = remove_from_bin(packer, bin)) != NULL)
+ offset = pack_fragment(compression, client, offset, slot++, block);
+
+ /*
+ * If the batch contains only a single vio, then we save nothing by saving the compressed
+ * form. Continue processing the single vio in the batch.
+ */
+ if (slot == 1) {
+ abort_packing(agent);
+ return;
+ }
+
+ if (slot < VDO_MAX_COMPRESSION_SLOTS) {
+ /* Clear out the sizes of the unused slots */
+ memset(&block->header.sizes[slot], 0,
+ (VDO_MAX_COMPRESSION_SLOTS - slot) * sizeof(__le16));
+ }
+
+ agent->vio.completion.error_handler = handle_compressed_write_error;
+ if (vdo_is_read_only(vdo_from_data_vio(agent))) {
+ continue_data_vio_with_error(agent, VDO_READ_ONLY);
+ return;
+ }
+
+ result = vio_reset_bio(&agent->vio, (char *) block, compressed_write_end_io,
+ REQ_OP_WRITE, agent->allocation.pbn);
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(agent, result);
+ return;
+ }
+
+ /*
+ * Once the compressed write is submitted, the fragments are no longer in the packer, so
+ * update stats now.
+ */
+ stats = &packer->statistics;
+ WRITE_ONCE(stats->compressed_fragments_in_packer,
+ (stats->compressed_fragments_in_packer - slot));
+ WRITE_ONCE(stats->compressed_fragments_written,
+ (stats->compressed_fragments_written + slot));
+ WRITE_ONCE(stats->compressed_blocks_written,
+ stats->compressed_blocks_written + 1);
+
+ vdo_submit_data_vio(agent);
+}
+
+/**
+ * add_data_vio_to_packer_bin() - Add a data_vio to a bin's incoming queue
+ * @packer: The packer.
+ * @bin: The bin to which to add the data_vio.
+ * @data_vio: The data_vio to add to the bin's queue.
+ *
+ * Adds a data_vio to a bin's incoming queue, handles logical space change, and calls physical
+ * space processor.
+ */
+static void add_data_vio_to_packer_bin(struct packer *packer, struct packer_bin *bin,
+ struct data_vio *data_vio)
+{
+ /* If the selected bin doesn't have room, start a new batch to make room. */
+ if (bin->free_space < data_vio->compression.size)
+ write_bin(packer, bin);
+
+ add_to_bin(bin, data_vio);
+ bin->free_space -= data_vio->compression.size;
+
+ /* If we happen to exactly fill the bin, start a new batch. */
+ if ((bin->slots_used == VDO_MAX_COMPRESSION_SLOTS) ||
+ (bin->free_space == 0))
+ write_bin(packer, bin);
+
+ /* Now that we've finished changing the free space, restore the sort order. */
+ insert_in_sorted_list(packer, bin);
+}
+
+/**
+ * select_bin() - Select the bin that should be used to pack the compressed data in a data_vio with
+ * other data_vios.
+ * @packer: The packer.
+ * @data_vio: The data_vio.
+ */
+static struct packer_bin * __must_check select_bin(struct packer *packer,
+ struct data_vio *data_vio)
+{
+ /*
+ * First best fit: select the bin with the least free space that has enough room for the
+ * compressed data in the data_vio.
+ */
+ struct packer_bin *bin, *fullest_bin;
+
+ list_for_each_entry(bin, &packer->bins, list) {
+ if (bin->free_space >= data_vio->compression.size)
+ return bin;
+ }
+
+ /*
+ * None of the bins have enough space for the data_vio. We're not allowed to create new
+ * bins, so we have to overflow one of the existing bins. It's pretty intuitive to select
+ * the fullest bin, since that "wastes" the least amount of free space in the compressed
+ * block. But if the space currently used in the fullest bin is smaller than the compressed
+ * size of the incoming block, it seems wrong to force that bin to write when giving up on
+ * compressing the incoming data_vio would likewise "waste" the least amount of free space.
+ */
+ fullest_bin = list_first_entry(&packer->bins, struct packer_bin, list);
+ if (data_vio->compression.size >=
+ (VDO_COMPRESSED_BLOCK_DATA_SIZE - fullest_bin->free_space))
+ return NULL;
+
+ /*
+ * The fullest bin doesn't have room, but writing it out and starting a new batch with the
+ * incoming data_vio will increase the packer's free space.
+ */
+ return fullest_bin;
+}
+
+/**
+ * vdo_attempt_packing() - Attempt to rewrite the data in this data_vio as part of a compressed
+ * block.
+ * @data_vio: The data_vio to pack.
+ */
+void vdo_attempt_packing(struct data_vio *data_vio)
+{
+ int result;
+ struct packer_bin *bin;
+ struct data_vio_compression_status status = get_data_vio_compression_status(data_vio);
+ struct packer *packer = get_packer_from_data_vio(data_vio);
+
+ assert_on_packer_thread(packer, __func__);
+
+ result = VDO_ASSERT((status.stage == DATA_VIO_COMPRESSING),
+ "attempt to pack data_vio not ready for packing, stage: %u",
+ status.stage);
+ if (result != VDO_SUCCESS)
+ return;
+
+ /*
+ * Increment whether or not this data_vio will be packed or not since abort_packing()
+ * always decrements the counter.
+ */
+ WRITE_ONCE(packer->statistics.compressed_fragments_in_packer,
+ packer->statistics.compressed_fragments_in_packer + 1);
+
+ /*
+ * If packing of this data_vio is disallowed for administrative reasons, give up before
+ * making any state changes.
+ */
+ if (!vdo_is_state_normal(&packer->state) ||
+ (data_vio->flush_generation < packer->flush_generation)) {
+ abort_packing(data_vio);
+ return;
+ }
+
+ /*
+ * The advance_data_vio_compression_stage() check here verifies that the data_vio is
+ * allowed to be compressed (if it has already been canceled, we'll fall out here). Once
+ * the data_vio is in the DATA_VIO_PACKING state, it must be guaranteed to be put in a bin
+ * before any more requests can be processed by the packer thread. Otherwise, a canceling
+ * data_vio could attempt to remove the canceled data_vio from the packer and fail to
+ * rendezvous with it. Thus, we must call select_bin() first to ensure that we will
+ * actually add the data_vio to a bin before advancing to the DATA_VIO_PACKING stage.
+ */
+ bin = select_bin(packer, data_vio);
+ if ((bin == NULL) ||
+ (advance_data_vio_compression_stage(data_vio).stage != DATA_VIO_PACKING)) {
+ abort_packing(data_vio);
+ return;
+ }
+
+ add_data_vio_to_packer_bin(packer, bin, data_vio);
+}
+
+/**
+ * check_for_drain_complete() - Check whether the packer has drained.
+ * @packer: The packer.
+ */
+static void check_for_drain_complete(struct packer *packer)
+{
+ if (vdo_is_state_draining(&packer->state) && (packer->canceled_bin->slots_used == 0))
+ vdo_finish_draining(&packer->state);
+}
+
+/**
+ * write_all_non_empty_bins() - Write out all non-empty bins on behalf of a flush or suspend.
+ * @packer: The packer being flushed.
+ */
+static void write_all_non_empty_bins(struct packer *packer)
+{
+ struct packer_bin *bin;
+
+ list_for_each_entry(bin, &packer->bins, list)
+ write_bin(packer, bin);
+ /*
+ * We don't need to re-sort the bin here since this loop will make every bin have
+ * the same amount of free space, so every ordering is sorted.
+ */
+
+ check_for_drain_complete(packer);
+}
+
+/**
+ * vdo_flush_packer() - Request that the packer flush asynchronously.
+ * @packer: The packer to flush.
+ *
+ * All bins with at least two compressed data blocks will be written out, and any solitary pending
+ * VIOs will be released from the packer. While flushing is in progress, any VIOs submitted to
+ * vdo_attempt_packing() will be continued immediately without attempting to pack them.
+ */
+void vdo_flush_packer(struct packer *packer)
+{
+ assert_on_packer_thread(packer, __func__);
+ if (vdo_is_state_normal(&packer->state))
+ write_all_non_empty_bins(packer);
+}
+
+/**
+ * vdo_remove_lock_holder_from_packer() - Remove a lock holder from the packer.
+ * @completion: The data_vio which needs a lock held by a data_vio in the packer. The data_vio's
+ * compression.lock_holder field will point to the data_vio to remove.
+ */
+void vdo_remove_lock_holder_from_packer(struct vdo_completion *completion)
+{
+ struct data_vio *data_vio = as_data_vio(completion);
+ struct packer *packer = get_packer_from_data_vio(data_vio);
+ struct data_vio *lock_holder;
+ struct packer_bin *bin;
+ slot_number_t slot;
+
+ assert_data_vio_in_packer_zone(data_vio);
+
+ lock_holder = vdo_forget(data_vio->compression.lock_holder);
+ bin = lock_holder->compression.bin;
+ VDO_ASSERT_LOG_ONLY((bin != NULL), "data_vio in packer has a bin");
+
+ slot = lock_holder->compression.slot;
+ bin->slots_used--;
+ if (slot < bin->slots_used) {
+ bin->incoming[slot] = bin->incoming[bin->slots_used];
+ bin->incoming[slot]->compression.slot = slot;
+ }
+
+ lock_holder->compression.bin = NULL;
+ lock_holder->compression.slot = 0;
+
+ if (bin != packer->canceled_bin) {
+ bin->free_space += lock_holder->compression.size;
+ insert_in_sorted_list(packer, bin);
+ }
+
+ abort_packing(lock_holder);
+ check_for_drain_complete(packer);
+}
+
+/**
+ * vdo_increment_packer_flush_generation() - Increment the flush generation in the packer.
+ * @packer: The packer.
+ *
+ * This will also cause the packer to flush so that any VIOs from previous generations will exit
+ * the packer.
+ */
+void vdo_increment_packer_flush_generation(struct packer *packer)
+{
+ assert_on_packer_thread(packer, __func__);
+ packer->flush_generation++;
+ vdo_flush_packer(packer);
+}
+
+/**
+ * initiate_drain() - Initiate a drain.
+ *
+ * Implements vdo_admin_initiator_fn.
+ */
+static void initiate_drain(struct admin_state *state)
+{
+ struct packer *packer = container_of(state, struct packer, state);
+
+ write_all_non_empty_bins(packer);
+}
+
+/**
+ * vdo_drain_packer() - Drain the packer by preventing any more VIOs from entering the packer and
+ * then flushing.
+ * @packer: The packer to drain.
+ * @completion: The completion to finish when the packer has drained.
+ */
+void vdo_drain_packer(struct packer *packer, struct vdo_completion *completion)
+{
+ assert_on_packer_thread(packer, __func__);
+ vdo_start_draining(&packer->state, VDO_ADMIN_STATE_SUSPENDING, completion,
+ initiate_drain);
+}
+
+/**
+ * vdo_resume_packer() - Resume a packer which has been suspended.
+ * @packer: The packer to resume.
+ * @parent: The completion to finish when the packer has resumed.
+ */
+void vdo_resume_packer(struct packer *packer, struct vdo_completion *parent)
+{
+ assert_on_packer_thread(packer, __func__);
+ vdo_continue_completion(parent, vdo_resume_if_quiescent(&packer->state));
+}
+
+static void dump_packer_bin(const struct packer_bin *bin, bool canceled)
+{
+ if (bin->slots_used == 0)
+ /* Don't dump empty bins. */
+ return;
+
+ vdo_log_info(" %sBin slots_used=%u free_space=%zu",
+ (canceled ? "Canceled" : ""), bin->slots_used, bin->free_space);
+
+ /*
+ * FIXME: dump vios in bin->incoming? The vios should have been dumped from the vio pool.
+ * Maybe just dump their addresses so it's clear they're here?
+ */
+}
+
+/**
+ * vdo_dump_packer() - Dump the packer.
+ * @packer: The packer.
+ *
+ * Context: dumps in a thread-unsafe fashion.
+ */
+void vdo_dump_packer(const struct packer *packer)
+{
+ struct packer_bin *bin;
+
+ vdo_log_info("packer");
+ vdo_log_info(" flushGeneration=%llu state %s packer_bin_count=%llu",
+ (unsigned long long) packer->flush_generation,
+ vdo_get_admin_state_code(&packer->state)->name,
+ (unsigned long long) packer->size);
+
+ list_for_each_entry(bin, &packer->bins, list)
+ dump_packer_bin(bin, false);
+
+ dump_packer_bin(packer->canceled_bin, true);
+}
diff --git a/drivers/md/dm-vdo/packer.h b/drivers/md/dm-vdo/packer.h
new file mode 100644
index 000000000000..0f3be44710b5
--- /dev/null
+++ b/drivers/md/dm-vdo/packer.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_PACKER_H
+#define VDO_PACKER_H
+
+#include <linux/list.h>
+
+#include "admin-state.h"
+#include "constants.h"
+#include "encodings.h"
+#include "statistics.h"
+#include "types.h"
+#include "wait-queue.h"
+
+enum {
+ DEFAULT_PACKER_BINS = 16,
+};
+
+/* The header of a compressed block. */
+struct compressed_block_header {
+ /* Unsigned 32-bit major and minor versions, little-endian */
+ struct packed_version_number version;
+
+ /* List of unsigned 16-bit compressed block sizes, little-endian */
+ __le16 sizes[VDO_MAX_COMPRESSION_SLOTS];
+} __packed;
+
+enum {
+ VDO_COMPRESSED_BLOCK_DATA_SIZE = VDO_BLOCK_SIZE - sizeof(struct compressed_block_header),
+
+ /*
+ * A compressed block is only written if we can pack at least two fragments into it, so a
+ * fragment which fills the entire data portion of a compressed block is too big.
+ */
+ VDO_MAX_COMPRESSED_FRAGMENT_SIZE = VDO_COMPRESSED_BLOCK_DATA_SIZE - 1,
+};
+
+/* * The compressed block overlay. */
+struct compressed_block {
+ struct compressed_block_header header;
+ char data[VDO_COMPRESSED_BLOCK_DATA_SIZE];
+} __packed;
+
+/*
+ * Each packer_bin holds an incomplete batch of data_vios that only partially fill a compressed
+ * block. The bins are kept in a ring sorted by the amount of unused space so the first bin with
+ * enough space to hold a newly-compressed data_vio can easily be found. When the bin fills up or
+ * is flushed, the first uncanceled data_vio in the bin is selected to be the agent for that bin.
+ * Upon entering the packer, each data_vio already has its compressed data in the first slot of the
+ * data_vio's compressed_block (overlaid on the data_vio's scratch_block). So the agent's fragment
+ * is already in place. The fragments for the other uncanceled data_vios in the bin are packed into
+ * the agent's compressed block. The agent then writes out the compressed block. If the write is
+ * successful, the agent shares its pbn lock which each of the other data_vios in its compressed
+ * block and sends each on its way. Finally the agent itself continues on the write path as before.
+ *
+ * There is one special bin which is used to hold data_vios which have been canceled and removed
+ * from their bin by the packer. These data_vios need to wait for the canceller to rendezvous with
+ * them and so they sit in this special bin.
+ */
+struct packer_bin {
+ /* List links for packer.packer_bins */
+ struct list_head list;
+ /* The number of items in the bin */
+ slot_number_t slots_used;
+ /* The number of compressed block bytes remaining in the current batch */
+ size_t free_space;
+ /* The current partial batch of data_vios, waiting for more */
+ struct data_vio *incoming[];
+};
+
+struct packer {
+ /* The ID of the packer's callback thread */
+ thread_id_t thread_id;
+ /* The number of bins */
+ block_count_t size;
+ /* A list of all packer_bins, kept sorted by free_space */
+ struct list_head bins;
+ /*
+ * A bin to hold data_vios which were canceled out of the packer and are waiting to
+ * rendezvous with the canceling data_vio.
+ */
+ struct packer_bin *canceled_bin;
+
+ /* The current flush generation */
+ sequence_number_t flush_generation;
+
+ /* The administrative state of the packer */
+ struct admin_state state;
+
+ /* Statistics are only updated on the packer thread, but are accessed from other threads */
+ struct packer_statistics statistics;
+};
+
+int vdo_get_compressed_block_fragment(enum block_mapping_state mapping_state,
+ struct compressed_block *block,
+ u16 *fragment_offset, u16 *fragment_size);
+
+int __must_check vdo_make_packer(struct vdo *vdo, block_count_t bin_count,
+ struct packer **packer_ptr);
+
+void vdo_free_packer(struct packer *packer);
+
+struct packer_statistics __must_check vdo_get_packer_statistics(const struct packer *packer);
+
+void vdo_attempt_packing(struct data_vio *data_vio);
+
+void vdo_flush_packer(struct packer *packer);
+
+void vdo_remove_lock_holder_from_packer(struct vdo_completion *completion);
+
+void vdo_increment_packer_flush_generation(struct packer *packer);
+
+void vdo_drain_packer(struct packer *packer, struct vdo_completion *completion);
+
+void vdo_resume_packer(struct packer *packer, struct vdo_completion *parent);
+
+void vdo_dump_packer(const struct packer *packer);
+
+#endif /* VDO_PACKER_H */
diff --git a/drivers/md/dm-vdo/permassert.c b/drivers/md/dm-vdo/permassert.c
new file mode 100644
index 000000000000..bf9eccea1cb3
--- /dev/null
+++ b/drivers/md/dm-vdo/permassert.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "permassert.h"
+
+#include "errors.h"
+#include "logger.h"
+
+int vdo_assertion_failed(const char *expression_string, const char *file_name,
+ int line_number, const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+
+ vdo_log_embedded_message(VDO_LOG_ERR, VDO_LOGGING_MODULE_NAME, "assertion \"",
+ format, args, "\" (%s) failed at %s:%d",
+ expression_string, file_name, line_number);
+ vdo_log_backtrace(VDO_LOG_ERR);
+
+ va_end(args);
+
+ return UDS_ASSERTION_FAILED;
+}
diff --git a/drivers/md/dm-vdo/permassert.h b/drivers/md/dm-vdo/permassert.h
new file mode 100644
index 000000000000..c34f2ba650e1
--- /dev/null
+++ b/drivers/md/dm-vdo/permassert.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef PERMASSERT_H
+#define PERMASSERT_H
+
+#include <linux/compiler.h>
+
+#include "errors.h"
+
+/* Utilities for asserting that certain conditions are met */
+
+#define STRINGIFY(X) #X
+
+/*
+ * A hack to apply the "warn if unused" attribute to an integral expression.
+ *
+ * Since GCC doesn't propagate the warn_unused_result attribute to conditional expressions
+ * incorporating calls to functions with that attribute, this function can be used to wrap such an
+ * expression. With optimization enabled, this function contributes no additional instructions, but
+ * the warn_unused_result attribute still applies to the code calling it.
+ */
+static inline int __must_check vdo_must_use(int value)
+{
+ return value;
+}
+
+/* Assert that an expression is true and return an error if it is not. */
+#define VDO_ASSERT(expr, ...) vdo_must_use(__VDO_ASSERT(expr, __VA_ARGS__))
+
+/* Log a message if the expression is not true. */
+#define VDO_ASSERT_LOG_ONLY(expr, ...) __VDO_ASSERT(expr, __VA_ARGS__)
+
+#define __VDO_ASSERT(expr, ...) \
+ (likely(expr) ? VDO_SUCCESS \
+ : vdo_assertion_failed(STRINGIFY(expr), __FILE__, __LINE__, __VA_ARGS__))
+
+/* Log an assertion failure message. */
+int vdo_assertion_failed(const char *expression_string, const char *file_name,
+ int line_number, const char *format, ...)
+ __printf(4, 5);
+
+#endif /* PERMASSERT_H */
diff --git a/drivers/md/dm-vdo/physical-zone.c b/drivers/md/dm-vdo/physical-zone.c
new file mode 100644
index 000000000000..2fee3a7c1191
--- /dev/null
+++ b/drivers/md/dm-vdo/physical-zone.c
@@ -0,0 +1,644 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "physical-zone.h"
+
+#include <linux/list.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "block-map.h"
+#include "completion.h"
+#include "constants.h"
+#include "data-vio.h"
+#include "dedupe.h"
+#include "encodings.h"
+#include "flush.h"
+#include "int-map.h"
+#include "slab-depot.h"
+#include "status-codes.h"
+#include "vdo.h"
+
+/* Each user data_vio needs a PBN read lock and write lock. */
+#define LOCK_POOL_CAPACITY (2 * MAXIMUM_VDO_USER_VIOS)
+
+struct pbn_lock_implementation {
+ enum pbn_lock_type type;
+ const char *name;
+ const char *release_reason;
+};
+
+/* This array must have an entry for every pbn_lock_type value. */
+static const struct pbn_lock_implementation LOCK_IMPLEMENTATIONS[] = {
+ [VIO_READ_LOCK] = {
+ .type = VIO_READ_LOCK,
+ .name = "read",
+ .release_reason = "candidate duplicate",
+ },
+ [VIO_WRITE_LOCK] = {
+ .type = VIO_WRITE_LOCK,
+ .name = "write",
+ .release_reason = "newly allocated",
+ },
+ [VIO_BLOCK_MAP_WRITE_LOCK] = {
+ .type = VIO_BLOCK_MAP_WRITE_LOCK,
+ .name = "block map write",
+ .release_reason = "block map write",
+ },
+};
+
+static inline bool has_lock_type(const struct pbn_lock *lock, enum pbn_lock_type type)
+{
+ return (lock->implementation == &LOCK_IMPLEMENTATIONS[type]);
+}
+
+/**
+ * vdo_is_pbn_read_lock() - Check whether a pbn_lock is a read lock.
+ * @lock: The lock to check.
+ *
+ * Return: true if the lock is a read lock.
+ */
+bool vdo_is_pbn_read_lock(const struct pbn_lock *lock)
+{
+ return has_lock_type(lock, VIO_READ_LOCK);
+}
+
+static inline void set_pbn_lock_type(struct pbn_lock *lock, enum pbn_lock_type type)
+{
+ lock->implementation = &LOCK_IMPLEMENTATIONS[type];
+}
+
+/**
+ * vdo_downgrade_pbn_write_lock() - Downgrade a PBN write lock to a PBN read lock.
+ * @lock: The PBN write lock to downgrade.
+ *
+ * The lock holder count is cleared and the caller is responsible for setting the new count.
+ */
+void vdo_downgrade_pbn_write_lock(struct pbn_lock *lock, bool compressed_write)
+{
+ VDO_ASSERT_LOG_ONLY(!vdo_is_pbn_read_lock(lock),
+ "PBN lock must not already have been downgraded");
+ VDO_ASSERT_LOG_ONLY(!has_lock_type(lock, VIO_BLOCK_MAP_WRITE_LOCK),
+ "must not downgrade block map write locks");
+ VDO_ASSERT_LOG_ONLY(lock->holder_count == 1,
+ "PBN write lock should have one holder but has %u",
+ lock->holder_count);
+ /*
+ * data_vio write locks are downgraded in place--the writer retains the hold on the lock.
+ * If this was a compressed write, the holder has not yet journaled its own inc ref,
+ * otherwise, it has.
+ */
+ lock->increment_limit =
+ (compressed_write ? MAXIMUM_REFERENCE_COUNT : MAXIMUM_REFERENCE_COUNT - 1);
+ set_pbn_lock_type(lock, VIO_READ_LOCK);
+}
+
+/**
+ * vdo_claim_pbn_lock_increment() - Try to claim one of the available reference count increments on
+ * a read lock.
+ * @lock: The PBN read lock from which to claim an increment.
+ *
+ * Claims may be attempted from any thread. A claim is only valid until the PBN lock is released.
+ *
+ * Return: true if the claim succeeded, guaranteeing one increment can be made without overflowing
+ * the PBN's reference count.
+ */
+bool vdo_claim_pbn_lock_increment(struct pbn_lock *lock)
+{
+ /*
+ * Claim the next free reference atomically since hash locks from multiple hash zone
+ * threads might be concurrently deduplicating against a single PBN lock on compressed
+ * block. As long as hitting the increment limit will lead to the PBN lock being released
+ * in a sane time-frame, we won't overflow a 32-bit claim counter, allowing a simple add
+ * instead of a compare-and-swap.
+ */
+ u32 claim_number = (u32) atomic_add_return(1, &lock->increments_claimed);
+
+ return (claim_number <= lock->increment_limit);
+}
+
+/**
+ * vdo_assign_pbn_lock_provisional_reference() - Inform a PBN lock that it is responsible for a
+ * provisional reference.
+ * @lock: The PBN lock.
+ */
+void vdo_assign_pbn_lock_provisional_reference(struct pbn_lock *lock)
+{
+ VDO_ASSERT_LOG_ONLY(!lock->has_provisional_reference,
+ "lock does not have a provisional reference");
+ lock->has_provisional_reference = true;
+}
+
+/**
+ * vdo_unassign_pbn_lock_provisional_reference() - Inform a PBN lock that it is no longer
+ * responsible for a provisional reference.
+ * @lock: The PBN lock.
+ */
+void vdo_unassign_pbn_lock_provisional_reference(struct pbn_lock *lock)
+{
+ lock->has_provisional_reference = false;
+}
+
+/**
+ * release_pbn_lock_provisional_reference() - If the lock is responsible for a provisional
+ * reference, release that reference.
+ * @lock: The lock.
+ * @locked_pbn: The PBN covered by the lock.
+ * @allocator: The block allocator from which to release the reference.
+ *
+ * This method is called when the lock is released.
+ */
+static void release_pbn_lock_provisional_reference(struct pbn_lock *lock,
+ physical_block_number_t locked_pbn,
+ struct block_allocator *allocator)
+{
+ int result;
+
+ if (!vdo_pbn_lock_has_provisional_reference(lock))
+ return;
+
+ result = vdo_release_block_reference(allocator, locked_pbn);
+ if (result != VDO_SUCCESS) {
+ vdo_log_error_strerror(result,
+ "Failed to release reference to %s physical block %llu",
+ lock->implementation->release_reason,
+ (unsigned long long) locked_pbn);
+ }
+
+ vdo_unassign_pbn_lock_provisional_reference(lock);
+}
+
+/**
+ * union idle_pbn_lock - PBN lock list entries.
+ *
+ * Unused (idle) PBN locks are kept in a list. Just like in a malloc implementation, the lock
+ * structure is unused memory, so we can save a bit of space (and not pollute the lock structure
+ * proper) by using a union to overlay the lock structure with the free list.
+ */
+typedef union {
+ /** @entry: Only used while locks are in the pool. */
+ struct list_head entry;
+ /** @lock: Only used while locks are not in the pool. */
+ struct pbn_lock lock;
+} idle_pbn_lock;
+
+/**
+ * struct pbn_lock_pool - list of PBN locks.
+ *
+ * The lock pool is little more than the memory allocated for the locks.
+ */
+struct pbn_lock_pool {
+ /** @capacity: The number of locks allocated for the pool. */
+ size_t capacity;
+ /** @borrowed: The number of locks currently borrowed from the pool. */
+ size_t borrowed;
+ /** @idle_list: A list containing all idle PBN lock instances. */
+ struct list_head idle_list;
+ /** @locks: The memory for all the locks allocated by this pool. */
+ idle_pbn_lock locks[];
+};
+
+/**
+ * return_pbn_lock_to_pool() - Return a pbn lock to its pool.
+ * @pool: The pool from which the lock was borrowed.
+ * @lock: The last reference to the lock being returned.
+ *
+ * It must be the last live reference, as if the memory were being freed (the lock memory will
+ * re-initialized or zeroed).
+ */
+static void return_pbn_lock_to_pool(struct pbn_lock_pool *pool, struct pbn_lock *lock)
+{
+ idle_pbn_lock *idle;
+
+ /* A bit expensive, but will promptly catch some use-after-free errors. */
+ memset(lock, 0, sizeof(*lock));
+
+ idle = container_of(lock, idle_pbn_lock, lock);
+ INIT_LIST_HEAD(&idle->entry);
+ list_add_tail(&idle->entry, &pool->idle_list);
+
+ VDO_ASSERT_LOG_ONLY(pool->borrowed > 0, "shouldn't return more than borrowed");
+ pool->borrowed -= 1;
+}
+
+/**
+ * make_pbn_lock_pool() - Create a new PBN lock pool and all the lock instances it can loan out.
+ *
+ * @capacity: The number of PBN locks to allocate for the pool.
+ * @pool_ptr: A pointer to receive the new pool.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int make_pbn_lock_pool(size_t capacity, struct pbn_lock_pool **pool_ptr)
+{
+ size_t i;
+ struct pbn_lock_pool *pool;
+ int result;
+
+ result = vdo_allocate_extended(struct pbn_lock_pool, capacity, idle_pbn_lock,
+ __func__, &pool);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ pool->capacity = capacity;
+ pool->borrowed = capacity;
+ INIT_LIST_HEAD(&pool->idle_list);
+
+ for (i = 0; i < capacity; i++)
+ return_pbn_lock_to_pool(pool, &pool->locks[i].lock);
+
+ *pool_ptr = pool;
+ return VDO_SUCCESS;
+}
+
+/**
+ * free_pbn_lock_pool() - Free a PBN lock pool.
+ * @pool: The lock pool to free.
+ *
+ * This also frees all the PBN locks it allocated, so the caller must ensure that all locks have
+ * been returned to the pool.
+ */
+static void free_pbn_lock_pool(struct pbn_lock_pool *pool)
+{
+ if (pool == NULL)
+ return;
+
+ VDO_ASSERT_LOG_ONLY(pool->borrowed == 0,
+ "All PBN locks must be returned to the pool before it is freed, but %zu locks are still on loan",
+ pool->borrowed);
+ vdo_free(pool);
+}
+
+/**
+ * borrow_pbn_lock_from_pool() - Borrow a PBN lock from the pool and initialize it with the
+ * provided type.
+ * @pool: The pool from which to borrow.
+ * @type: The type with which to initialize the lock.
+ * @lock_ptr: A pointer to receive the borrowed lock.
+ *
+ * Pools do not grow on demand or allocate memory, so this will fail if the pool is empty. Borrowed
+ * locks are still associated with this pool and must be returned to only this pool.
+ *
+ * Return: VDO_SUCCESS, or VDO_LOCK_ERROR if the pool is empty.
+ */
+static int __must_check borrow_pbn_lock_from_pool(struct pbn_lock_pool *pool,
+ enum pbn_lock_type type,
+ struct pbn_lock **lock_ptr)
+{
+ int result;
+ struct list_head *idle_entry;
+ idle_pbn_lock *idle;
+
+ if (pool->borrowed >= pool->capacity)
+ return vdo_log_error_strerror(VDO_LOCK_ERROR,
+ "no free PBN locks left to borrow");
+ pool->borrowed += 1;
+
+ result = VDO_ASSERT(!list_empty(&pool->idle_list),
+ "idle list should not be empty if pool not at capacity");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ idle_entry = pool->idle_list.prev;
+ list_del(idle_entry);
+ memset(idle_entry, 0, sizeof(*idle_entry));
+
+ idle = list_entry(idle_entry, idle_pbn_lock, entry);
+ idle->lock.holder_count = 0;
+ set_pbn_lock_type(&idle->lock, type);
+
+ *lock_ptr = &idle->lock;
+ return VDO_SUCCESS;
+}
+
+/**
+ * initialize_zone() - Initialize a physical zone.
+ * @vdo: The vdo to which the zone will belong.
+ * @zones: The physical_zones to which the zone being initialized belongs
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int initialize_zone(struct vdo *vdo, struct physical_zones *zones)
+{
+ int result;
+ zone_count_t zone_number = zones->zone_count;
+ struct physical_zone *zone = &zones->zones[zone_number];
+
+ result = vdo_int_map_create(VDO_LOCK_MAP_CAPACITY, &zone->pbn_operations);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = make_pbn_lock_pool(LOCK_POOL_CAPACITY, &zone->lock_pool);
+ if (result != VDO_SUCCESS) {
+ vdo_int_map_free(zone->pbn_operations);
+ return result;
+ }
+
+ zone->zone_number = zone_number;
+ zone->thread_id = vdo->thread_config.physical_threads[zone_number];
+ zone->allocator = &vdo->depot->allocators[zone_number];
+ zone->next = &zones->zones[(zone_number + 1) % vdo->thread_config.physical_zone_count];
+ result = vdo_make_default_thread(vdo, zone->thread_id);
+ if (result != VDO_SUCCESS) {
+ free_pbn_lock_pool(vdo_forget(zone->lock_pool));
+ vdo_int_map_free(zone->pbn_operations);
+ return result;
+ }
+ return result;
+}
+
+/**
+ * vdo_make_physical_zones() - Make the physical zones for a vdo.
+ * @vdo: The vdo being constructed
+ * @zones_ptr: A pointer to hold the zones
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+int vdo_make_physical_zones(struct vdo *vdo, struct physical_zones **zones_ptr)
+{
+ struct physical_zones *zones;
+ int result;
+ zone_count_t zone_count = vdo->thread_config.physical_zone_count;
+
+ if (zone_count == 0)
+ return VDO_SUCCESS;
+
+ result = vdo_allocate_extended(struct physical_zones, zone_count,
+ struct physical_zone, __func__, &zones);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ for (zones->zone_count = 0; zones->zone_count < zone_count; zones->zone_count++) {
+ result = initialize_zone(vdo, zones);
+ if (result != VDO_SUCCESS) {
+ vdo_free_physical_zones(zones);
+ return result;
+ }
+ }
+
+ *zones_ptr = zones;
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_free_physical_zones() - Destroy the physical zones.
+ * @zones: The zones to free.
+ */
+void vdo_free_physical_zones(struct physical_zones *zones)
+{
+ zone_count_t index;
+
+ if (zones == NULL)
+ return;
+
+ for (index = 0; index < zones->zone_count; index++) {
+ struct physical_zone *zone = &zones->zones[index];
+
+ free_pbn_lock_pool(vdo_forget(zone->lock_pool));
+ vdo_int_map_free(vdo_forget(zone->pbn_operations));
+ }
+
+ vdo_free(zones);
+}
+
+/**
+ * vdo_get_physical_zone_pbn_lock() - Get the lock on a PBN if one exists.
+ * @zone: The physical zone responsible for the PBN.
+ * @pbn: The physical block number whose lock is desired.
+ *
+ * Return: The lock or NULL if the PBN is not locked.
+ */
+struct pbn_lock *vdo_get_physical_zone_pbn_lock(struct physical_zone *zone,
+ physical_block_number_t pbn)
+{
+ return ((zone == NULL) ? NULL : vdo_int_map_get(zone->pbn_operations, pbn));
+}
+
+/**
+ * vdo_attempt_physical_zone_pbn_lock() - Attempt to lock a physical block in the zone responsible
+ * for it.
+ * @zone: The physical zone responsible for the PBN.
+ * @pbn: The physical block number to lock.
+ * @type: The type with which to initialize a new lock.
+ * @lock_ptr: A pointer to receive the lock, existing or new.
+ *
+ * If the PBN is already locked, the existing lock will be returned. Otherwise, a new lock instance
+ * will be borrowed from the pool, initialized, and returned. The lock owner will be NULL for a new
+ * lock acquired by the caller, who is responsible for setting that field promptly. The lock owner
+ * will be non-NULL when there is already an existing lock on the PBN.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_attempt_physical_zone_pbn_lock(struct physical_zone *zone,
+ physical_block_number_t pbn,
+ enum pbn_lock_type type,
+ struct pbn_lock **lock_ptr)
+{
+ /*
+ * Borrow and prepare a lock from the pool so we don't have to do two int_map accesses in
+ * the common case of no lock contention.
+ */
+ struct pbn_lock *lock, *new_lock = NULL;
+ int result;
+
+ result = borrow_pbn_lock_from_pool(zone->lock_pool, type, &new_lock);
+ if (result != VDO_SUCCESS) {
+ VDO_ASSERT_LOG_ONLY(false, "must always be able to borrow a PBN lock");
+ return result;
+ }
+
+ result = vdo_int_map_put(zone->pbn_operations, pbn, new_lock, false,
+ (void **) &lock);
+ if (result != VDO_SUCCESS) {
+ return_pbn_lock_to_pool(zone->lock_pool, new_lock);
+ return result;
+ }
+
+ if (lock != NULL) {
+ /* The lock is already held, so we don't need the borrowed one. */
+ return_pbn_lock_to_pool(zone->lock_pool, vdo_forget(new_lock));
+ result = VDO_ASSERT(lock->holder_count > 0, "physical block %llu lock held",
+ (unsigned long long) pbn);
+ if (result != VDO_SUCCESS)
+ return result;
+ *lock_ptr = lock;
+ } else {
+ *lock_ptr = new_lock;
+ }
+ return VDO_SUCCESS;
+}
+
+/**
+ * allocate_and_lock_block() - Attempt to allocate a block from this zone.
+ * @allocation: The struct allocation of the data_vio attempting to allocate.
+ *
+ * If a block is allocated, the recipient will also hold a lock on it.
+ *
+ * Return: VDO_SUCCESS if a block was allocated, or an error code.
+ */
+static int allocate_and_lock_block(struct allocation *allocation)
+{
+ int result;
+ struct pbn_lock *lock;
+
+ VDO_ASSERT_LOG_ONLY(allocation->lock == NULL,
+ "must not allocate a block while already holding a lock on one");
+
+ result = vdo_allocate_block(allocation->zone->allocator, &allocation->pbn);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_attempt_physical_zone_pbn_lock(allocation->zone, allocation->pbn,
+ allocation->write_lock_type, &lock);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (lock->holder_count > 0) {
+ /* This block is already locked, which should be impossible. */
+ return vdo_log_error_strerror(VDO_LOCK_ERROR,
+ "Newly allocated block %llu was spuriously locked (holder_count=%u)",
+ (unsigned long long) allocation->pbn,
+ lock->holder_count);
+ }
+
+ /* We've successfully acquired a new lock, so mark it as ours. */
+ lock->holder_count += 1;
+ allocation->lock = lock;
+ vdo_assign_pbn_lock_provisional_reference(lock);
+ return VDO_SUCCESS;
+}
+
+/**
+ * retry_allocation() - Retry allocating a block now that we're done waiting for scrubbing.
+ * @waiter: The allocating_vio that was waiting to allocate.
+ * @context: The context (unused).
+ */
+static void retry_allocation(struct vdo_waiter *waiter, void *context __always_unused)
+{
+ struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter);
+
+ /* Now that some slab has scrubbed, restart the allocation process. */
+ data_vio->allocation.wait_for_clean_slab = false;
+ data_vio->allocation.first_allocation_zone = data_vio->allocation.zone->zone_number;
+ continue_data_vio(data_vio);
+}
+
+/**
+ * continue_allocating() - Continue searching for an allocation by enqueuing to wait for scrubbing
+ * or switching to the next zone.
+ * @data_vio: The data_vio attempting to get an allocation.
+ *
+ * This method should only be called from the error handler set in data_vio_allocate_data_block.
+ *
+ * Return: true if the allocation process has continued in another zone.
+ */
+static bool continue_allocating(struct data_vio *data_vio)
+{
+ struct allocation *allocation = &data_vio->allocation;
+ struct physical_zone *zone = allocation->zone;
+ struct vdo_completion *completion = &data_vio->vio.completion;
+ int result = VDO_SUCCESS;
+ bool was_waiting = allocation->wait_for_clean_slab;
+ bool tried_all = (allocation->first_allocation_zone == zone->next->zone_number);
+
+ vdo_reset_completion(completion);
+
+ if (tried_all && !was_waiting) {
+ /*
+ * We've already looked in all the zones, and found nothing. So go through the
+ * zones again, and wait for each to scrub before trying to allocate.
+ */
+ allocation->wait_for_clean_slab = true;
+ allocation->first_allocation_zone = zone->zone_number;
+ }
+
+ if (allocation->wait_for_clean_slab) {
+ data_vio->waiter.callback = retry_allocation;
+ result = vdo_enqueue_clean_slab_waiter(zone->allocator,
+ &data_vio->waiter);
+ if (result == VDO_SUCCESS) {
+ /* We've enqueued to wait for a slab to be scrubbed. */
+ return true;
+ }
+
+ if ((result != VDO_NO_SPACE) || (was_waiting && tried_all)) {
+ vdo_set_completion_result(completion, result);
+ return false;
+ }
+ }
+
+ allocation->zone = zone->next;
+ completion->callback_thread_id = allocation->zone->thread_id;
+ vdo_launch_completion(completion);
+ return true;
+}
+
+/**
+ * vdo_allocate_block_in_zone() - Attempt to allocate a block in the current physical zone, and if
+ * that fails try the next if possible.
+ * @data_vio: The data_vio needing an allocation.
+ *
+ * Return: true if a block was allocated, if not the data_vio will have been dispatched so the
+ * caller must not touch it.
+ */
+bool vdo_allocate_block_in_zone(struct data_vio *data_vio)
+{
+ int result = allocate_and_lock_block(&data_vio->allocation);
+
+ if (result == VDO_SUCCESS)
+ return true;
+
+ if ((result != VDO_NO_SPACE) || !continue_allocating(data_vio))
+ continue_data_vio_with_error(data_vio, result);
+
+ return false;
+}
+
+/**
+ * vdo_release_physical_zone_pbn_lock() - Release a physical block lock if it is held and return it
+ * to the lock pool.
+ * @zone: The physical zone in which the lock was obtained.
+ * @locked_pbn: The physical block number to unlock.
+ * @lock: The lock being released.
+ *
+ * It must be the last live reference, as if the memory were being freed (the
+ * lock memory will re-initialized or zeroed).
+ */
+void vdo_release_physical_zone_pbn_lock(struct physical_zone *zone,
+ physical_block_number_t locked_pbn,
+ struct pbn_lock *lock)
+{
+ struct pbn_lock *holder;
+
+ if (lock == NULL)
+ return;
+
+ VDO_ASSERT_LOG_ONLY(lock->holder_count > 0,
+ "should not be releasing a lock that is not held");
+
+ lock->holder_count -= 1;
+ if (lock->holder_count > 0) {
+ /* The lock was shared and is still referenced, so don't release it yet. */
+ return;
+ }
+
+ holder = vdo_int_map_remove(zone->pbn_operations, locked_pbn);
+ VDO_ASSERT_LOG_ONLY((lock == holder), "physical block lock mismatch for block %llu",
+ (unsigned long long) locked_pbn);
+
+ release_pbn_lock_provisional_reference(lock, locked_pbn, zone->allocator);
+ return_pbn_lock_to_pool(zone->lock_pool, lock);
+}
+
+/**
+ * vdo_dump_physical_zone() - Dump information about a physical zone to the log for debugging.
+ * @zone: The zone to dump.
+ */
+void vdo_dump_physical_zone(const struct physical_zone *zone)
+{
+ vdo_dump_block_allocator(zone->allocator);
+}
diff --git a/drivers/md/dm-vdo/physical-zone.h b/drivers/md/dm-vdo/physical-zone.h
new file mode 100644
index 000000000000..47d874fd5a0b
--- /dev/null
+++ b/drivers/md/dm-vdo/physical-zone.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_PHYSICAL_ZONE_H
+#define VDO_PHYSICAL_ZONE_H
+
+#include <linux/atomic.h>
+
+#include "types.h"
+
+/*
+ * The type of a PBN lock.
+ */
+enum pbn_lock_type {
+ VIO_READ_LOCK,
+ VIO_WRITE_LOCK,
+ VIO_BLOCK_MAP_WRITE_LOCK,
+};
+
+struct pbn_lock_implementation;
+
+/*
+ * A PBN lock.
+ */
+struct pbn_lock {
+ /* The implementation of the lock */
+ const struct pbn_lock_implementation *implementation;
+
+ /* The number of VIOs holding or sharing this lock */
+ data_vio_count_t holder_count;
+ /*
+ * The number of compressed block writers holding a share of this lock while they are
+ * acquiring a reference to the PBN.
+ */
+ u8 fragment_locks;
+
+ /* Whether the locked PBN has been provisionally referenced on behalf of the lock holder. */
+ bool has_provisional_reference;
+
+ /*
+ * For read locks, the number of references that were known to be available on the locked
+ * block at the time the lock was acquired.
+ */
+ u8 increment_limit;
+
+ /*
+ * For read locks, the number of data_vios that have tried to claim one of the available
+ * increments during the lifetime of the lock. Each claim will first increment this
+ * counter, so it can exceed the increment limit.
+ */
+ atomic_t increments_claimed;
+};
+
+struct physical_zone {
+ /* Which physical zone this is */
+ zone_count_t zone_number;
+ /* The thread ID for this zone */
+ thread_id_t thread_id;
+ /* In progress operations keyed by PBN */
+ struct int_map *pbn_operations;
+ /* Pool of unused pbn_lock instances */
+ struct pbn_lock_pool *lock_pool;
+ /* The block allocator for this zone */
+ struct block_allocator *allocator;
+ /* The next zone from which to attempt an allocation */
+ struct physical_zone *next;
+};
+
+struct physical_zones {
+ /* The number of zones */
+ zone_count_t zone_count;
+ /* The physical zones themselves */
+ struct physical_zone zones[];
+};
+
+bool __must_check vdo_is_pbn_read_lock(const struct pbn_lock *lock);
+void vdo_downgrade_pbn_write_lock(struct pbn_lock *lock, bool compressed_write);
+bool __must_check vdo_claim_pbn_lock_increment(struct pbn_lock *lock);
+
+/**
+ * vdo_pbn_lock_has_provisional_reference() - Check whether a PBN lock has a provisional reference.
+ * @lock: The PBN lock.
+ */
+static inline bool vdo_pbn_lock_has_provisional_reference(struct pbn_lock *lock)
+{
+ return ((lock != NULL) && lock->has_provisional_reference);
+}
+
+void vdo_assign_pbn_lock_provisional_reference(struct pbn_lock *lock);
+void vdo_unassign_pbn_lock_provisional_reference(struct pbn_lock *lock);
+
+int __must_check vdo_make_physical_zones(struct vdo *vdo,
+ struct physical_zones **zones_ptr);
+
+void vdo_free_physical_zones(struct physical_zones *zones);
+
+struct pbn_lock * __must_check vdo_get_physical_zone_pbn_lock(struct physical_zone *zone,
+ physical_block_number_t pbn);
+
+int __must_check vdo_attempt_physical_zone_pbn_lock(struct physical_zone *zone,
+ physical_block_number_t pbn,
+ enum pbn_lock_type type,
+ struct pbn_lock **lock_ptr);
+
+bool __must_check vdo_allocate_block_in_zone(struct data_vio *data_vio);
+
+void vdo_release_physical_zone_pbn_lock(struct physical_zone *zone,
+ physical_block_number_t locked_pbn,
+ struct pbn_lock *lock);
+
+void vdo_dump_physical_zone(const struct physical_zone *zone);
+
+#endif /* VDO_PHYSICAL_ZONE_H */
diff --git a/drivers/md/dm-vdo/priority-table.c b/drivers/md/dm-vdo/priority-table.c
new file mode 100644
index 000000000000..42d3d8d0e4b5
--- /dev/null
+++ b/drivers/md/dm-vdo/priority-table.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "priority-table.h"
+
+#include <linux/log2.h>
+
+#include "errors.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "status-codes.h"
+
+/* We use a single 64-bit search vector, so the maximum priority is 63 */
+#define MAX_PRIORITY 63
+
+/*
+ * All the entries with the same priority are queued in a circular list in a bucket for that
+ * priority. The table is essentially an array of buckets.
+ */
+struct bucket {
+ /*
+ * The head of a queue of table entries, all having the same priority
+ */
+ struct list_head queue;
+ /* The priority of all the entries in this bucket */
+ unsigned int priority;
+};
+
+/*
+ * A priority table is an array of buckets, indexed by priority. New entries are added to the end
+ * of the queue in the appropriate bucket. The dequeue operation finds the highest-priority
+ * non-empty bucket by searching a bit vector represented as a single 8-byte word, which is very
+ * fast with compiler and CPU support.
+ */
+struct priority_table {
+ /* The maximum priority of entries that may be stored in this table */
+ unsigned int max_priority;
+ /* A bit vector flagging all buckets that are currently non-empty */
+ u64 search_vector;
+ /* The array of all buckets, indexed by priority */
+ struct bucket buckets[];
+};
+
+/**
+ * vdo_make_priority_table() - Allocate and initialize a new priority_table.
+ * @max_priority: The maximum priority value for table entries.
+ * @table_ptr: A pointer to hold the new table.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+int vdo_make_priority_table(unsigned int max_priority, struct priority_table **table_ptr)
+{
+ struct priority_table *table;
+ int result;
+ unsigned int priority;
+
+ if (max_priority > MAX_PRIORITY)
+ return UDS_INVALID_ARGUMENT;
+
+ result = vdo_allocate_extended(struct priority_table, max_priority + 1,
+ struct bucket, __func__, &table);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ for (priority = 0; priority <= max_priority; priority++) {
+ struct bucket *bucket = &table->buckets[priority];
+
+ bucket->priority = priority;
+ INIT_LIST_HEAD(&bucket->queue);
+ }
+
+ table->max_priority = max_priority;
+ table->search_vector = 0;
+
+ *table_ptr = table;
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_free_priority_table() - Free a priority_table.
+ * @table: The table to free.
+ *
+ * The table does not own the entries stored in it and they are not freed by this call.
+ */
+void vdo_free_priority_table(struct priority_table *table)
+{
+ if (table == NULL)
+ return;
+
+ /*
+ * Unlink the buckets from any entries still in the table so the entries won't be left with
+ * dangling pointers to freed memory.
+ */
+ vdo_reset_priority_table(table);
+
+ vdo_free(table);
+}
+
+/**
+ * vdo_reset_priority_table() - Reset a priority table, leaving it in the same empty state as when
+ * newly constructed.
+ * @table: The table to reset.
+ *
+ * The table does not own the entries stored in it and they are not freed (or even unlinked from
+ * each other) by this call.
+ */
+void vdo_reset_priority_table(struct priority_table *table)
+{
+ unsigned int priority;
+
+ table->search_vector = 0;
+ for (priority = 0; priority <= table->max_priority; priority++)
+ list_del_init(&table->buckets[priority].queue);
+}
+
+/**
+ * vdo_priority_table_enqueue() - Add a new entry to the priority table, appending it to the queue
+ * for entries with the specified priority.
+ * @table: The table in which to store the entry.
+ * @priority: The priority of the entry.
+ * @entry: The list_head embedded in the entry to store in the table (the caller must have
+ * initialized it).
+ */
+void vdo_priority_table_enqueue(struct priority_table *table, unsigned int priority,
+ struct list_head *entry)
+{
+ VDO_ASSERT_LOG_ONLY((priority <= table->max_priority),
+ "entry priority must be valid for the table");
+
+ /* Append the entry to the queue in the specified bucket. */
+ list_move_tail(entry, &table->buckets[priority].queue);
+
+ /* Flag the bucket in the search vector since it must be non-empty. */
+ table->search_vector |= (1ULL << priority);
+}
+
+static inline void mark_bucket_empty(struct priority_table *table, struct bucket *bucket)
+{
+ table->search_vector &= ~(1ULL << bucket->priority);
+}
+
+/**
+ * vdo_priority_table_dequeue() - Find the highest-priority entry in the table, remove it from the
+ * table, and return it.
+ * @table: The priority table from which to remove an entry.
+ *
+ * If there are multiple entries with the same priority, the one that has been in the table with
+ * that priority the longest will be returned.
+ *
+ * Return: The dequeued entry, or NULL if the table is currently empty.
+ */
+struct list_head *vdo_priority_table_dequeue(struct priority_table *table)
+{
+ struct bucket *bucket;
+ struct list_head *entry;
+ int top_priority;
+
+ if (table->search_vector == 0) {
+ /* All buckets are empty. */
+ return NULL;
+ }
+
+ /*
+ * Find the highest priority non-empty bucket by finding the highest-order non-zero bit in
+ * the search vector.
+ */
+ top_priority = ilog2(table->search_vector);
+
+ /* Dequeue the first entry in the bucket. */
+ bucket = &table->buckets[top_priority];
+ entry = bucket->queue.next;
+ list_del_init(entry);
+
+ /* Clear the bit in the search vector if the bucket has been emptied. */
+ if (list_empty(&bucket->queue))
+ mark_bucket_empty(table, bucket);
+
+ return entry;
+}
+
+/**
+ * vdo_priority_table_remove() - Remove a specified entry from its priority table.
+ * @table: The table from which to remove the entry.
+ * @entry: The entry to remove from the table.
+ */
+void vdo_priority_table_remove(struct priority_table *table, struct list_head *entry)
+{
+ struct list_head *next_entry;
+
+ /*
+ * We can't guard against calls where the entry is on a list for a different table, but
+ * it's easy to deal with an entry not in any table or list.
+ */
+ if (list_empty(entry))
+ return;
+
+ /*
+ * Remove the entry from the bucket list, remembering a pointer to another entry in the
+ * ring.
+ */
+ next_entry = entry->next;
+ list_del_init(entry);
+
+ /*
+ * If the rest of the list is now empty, the next node must be the list head in the bucket
+ * and we can use it to update the search vector.
+ */
+ if (list_empty(next_entry))
+ mark_bucket_empty(table, list_entry(next_entry, struct bucket, queue));
+}
+
+/**
+ * vdo_is_priority_table_empty() - Return whether the priority table is empty.
+ * @table: The table to check.
+ *
+ * Return: true if the table is empty.
+ */
+bool vdo_is_priority_table_empty(struct priority_table *table)
+{
+ return (table->search_vector == 0);
+}
diff --git a/drivers/md/dm-vdo/priority-table.h b/drivers/md/dm-vdo/priority-table.h
new file mode 100644
index 000000000000..8b060462e3e4
--- /dev/null
+++ b/drivers/md/dm-vdo/priority-table.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_PRIORITY_TABLE_H
+#define VDO_PRIORITY_TABLE_H
+
+#include <linux/list.h>
+
+/*
+ * A priority_table is a simple implementation of a priority queue for entries with priorities that
+ * are small non-negative integer values. It implements the obvious priority queue operations of
+ * enqueuing an entry and dequeuing an entry with the maximum priority. It also supports removing
+ * an arbitrary entry. The priority of an entry already in the table can be changed by removing it
+ * and re-enqueuing it with a different priority. All operations have O(1) complexity.
+ *
+ * The links for the table entries must be embedded in the entries themselves. Lists are used to
+ * link entries in the table and no wrapper type is declared, so an existing list entry in an
+ * object can also be used to queue it in a priority_table, assuming the field is not used for
+ * anything else while so queued.
+ *
+ * The table is implemented as an array of queues (circular lists) indexed by priority, along with
+ * a hint for which queues are non-empty. Steven Skiena calls a very similar structure a "bounded
+ * height priority queue", but given the resemblance to a hash table, "priority table" seems both
+ * shorter and more apt, if somewhat novel.
+ */
+
+struct priority_table;
+
+int __must_check vdo_make_priority_table(unsigned int max_priority,
+ struct priority_table **table_ptr);
+
+void vdo_free_priority_table(struct priority_table *table);
+
+void vdo_priority_table_enqueue(struct priority_table *table, unsigned int priority,
+ struct list_head *entry);
+
+void vdo_reset_priority_table(struct priority_table *table);
+
+struct list_head * __must_check vdo_priority_table_dequeue(struct priority_table *table);
+
+void vdo_priority_table_remove(struct priority_table *table, struct list_head *entry);
+
+bool __must_check vdo_is_priority_table_empty(struct priority_table *table);
+
+#endif /* VDO_PRIORITY_TABLE_H */
diff --git a/drivers/md/dm-vdo/recovery-journal.c b/drivers/md/dm-vdo/recovery-journal.c
new file mode 100644
index 000000000000..ee6321a3e523
--- /dev/null
+++ b/drivers/md/dm-vdo/recovery-journal.c
@@ -0,0 +1,1762 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "recovery-journal.h"
+
+#include <linux/atomic.h>
+#include <linux/bio.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "block-map.h"
+#include "completion.h"
+#include "constants.h"
+#include "data-vio.h"
+#include "encodings.h"
+#include "io-submitter.h"
+#include "slab-depot.h"
+#include "types.h"
+#include "vdo.h"
+#include "vio.h"
+#include "wait-queue.h"
+
+static const u64 RECOVERY_COUNT_MASK = 0xff;
+
+/*
+ * The number of reserved blocks must be large enough to prevent a new recovery journal
+ * block write from overwriting a block which appears to still be a valid head block of the
+ * journal. Currently, that means reserving enough space for all 2048 data_vios.
+ */
+#define RECOVERY_JOURNAL_RESERVED_BLOCKS \
+ ((MAXIMUM_VDO_USER_VIOS / RECOVERY_JOURNAL_ENTRIES_PER_BLOCK) + 2)
+
+/**
+ * DOC: Lock Counters.
+ *
+ * A lock_counter is intended to keep all of the locks for the blocks in the recovery journal. The
+ * per-zone counters are all kept in a single array which is arranged by zone (i.e. zone 0's lock 0
+ * is at index 0, zone 0's lock 1 is at index 1, and zone 1's lock 0 is at index 'locks'. This
+ * arrangement is intended to minimize cache-line contention for counters from different zones.
+ *
+ * The locks are implemented as a single object instead of as a lock counter per lock both to
+ * afford this opportunity to reduce cache line contention and also to eliminate the need to have a
+ * completion per lock.
+ *
+ * Lock sets are laid out with the set for recovery journal first, followed by the logical zones,
+ * and then the physical zones.
+ */
+
+enum lock_counter_state {
+ LOCK_COUNTER_STATE_NOT_NOTIFYING,
+ LOCK_COUNTER_STATE_NOTIFYING,
+ LOCK_COUNTER_STATE_SUSPENDED,
+};
+
+/**
+ * get_zone_count_ptr() - Get a pointer to the zone count for a given lock on a given zone.
+ * @journal: The recovery journal.
+ * @lock_number: The lock to get.
+ * @zone_type: The zone type whose count is desired.
+ *
+ * Return: A pointer to the zone count for the given lock and zone.
+ */
+static inline atomic_t *get_zone_count_ptr(struct recovery_journal *journal,
+ block_count_t lock_number,
+ enum vdo_zone_type zone_type)
+{
+ return ((zone_type == VDO_ZONE_TYPE_LOGICAL)
+ ? &journal->lock_counter.logical_zone_counts[lock_number]
+ : &journal->lock_counter.physical_zone_counts[lock_number]);
+}
+
+/**
+ * get_counter() - Get the zone counter for a given lock on a given zone.
+ * @journal: The recovery journal.
+ * @lock_number: The lock to get.
+ * @zone_type: The zone type whose count is desired.
+ * @zone_id: The zone index whose count is desired.
+ *
+ * Return: The counter for the given lock and zone.
+ */
+static inline u16 *get_counter(struct recovery_journal *journal,
+ block_count_t lock_number, enum vdo_zone_type zone_type,
+ zone_count_t zone_id)
+{
+ struct lock_counter *counter = &journal->lock_counter;
+ block_count_t zone_counter = (counter->locks * zone_id) + lock_number;
+
+ if (zone_type == VDO_ZONE_TYPE_JOURNAL)
+ return &counter->journal_counters[zone_counter];
+
+ if (zone_type == VDO_ZONE_TYPE_LOGICAL)
+ return &counter->logical_counters[zone_counter];
+
+ return &counter->physical_counters[zone_counter];
+}
+
+static atomic_t *get_decrement_counter(struct recovery_journal *journal,
+ block_count_t lock_number)
+{
+ return &journal->lock_counter.journal_decrement_counts[lock_number];
+}
+
+/**
+ * is_journal_zone_locked() - Check whether the journal zone is locked for a given lock.
+ * @journal: The recovery journal.
+ * @lock_number: The lock to check.
+ *
+ * Return: true if the journal zone is locked.
+ */
+static bool is_journal_zone_locked(struct recovery_journal *journal,
+ block_count_t lock_number)
+{
+ u16 journal_value = *get_counter(journal, lock_number, VDO_ZONE_TYPE_JOURNAL, 0);
+ u32 decrements = atomic_read(get_decrement_counter(journal, lock_number));
+
+ /* Pairs with barrier in vdo_release_journal_entry_lock() */
+ smp_rmb();
+ VDO_ASSERT_LOG_ONLY((decrements <= journal_value),
+ "journal zone lock counter must not underflow");
+ return (journal_value != decrements);
+}
+
+/**
+ * vdo_release_recovery_journal_block_reference() - Release a reference to a recovery journal
+ * block.
+ * @journal: The recovery journal.
+ * @sequence_number: The journal sequence number of the referenced block.
+ * @zone_type: The type of the zone making the adjustment.
+ * @zone_id: The ID of the zone making the adjustment.
+ *
+ * If this is the last reference for a given zone type, an attempt will be made to reap the
+ * journal.
+ */
+void vdo_release_recovery_journal_block_reference(struct recovery_journal *journal,
+ sequence_number_t sequence_number,
+ enum vdo_zone_type zone_type,
+ zone_count_t zone_id)
+{
+ u16 *current_value;
+ block_count_t lock_number;
+ int prior_state;
+
+ if (sequence_number == 0)
+ return;
+
+ lock_number = vdo_get_recovery_journal_block_number(journal, sequence_number);
+ current_value = get_counter(journal, lock_number, zone_type, zone_id);
+
+ VDO_ASSERT_LOG_ONLY((*current_value >= 1),
+ "decrement of lock counter must not underflow");
+ *current_value -= 1;
+
+ if (zone_type == VDO_ZONE_TYPE_JOURNAL) {
+ if (is_journal_zone_locked(journal, lock_number))
+ return;
+ } else {
+ atomic_t *zone_count;
+
+ if (*current_value != 0)
+ return;
+
+ zone_count = get_zone_count_ptr(journal, lock_number, zone_type);
+
+ if (atomic_add_return(-1, zone_count) > 0)
+ return;
+ }
+
+ /*
+ * Extra barriers because this was original developed using a CAS operation that implicitly
+ * had them.
+ */
+ smp_mb__before_atomic();
+ prior_state = atomic_cmpxchg(&journal->lock_counter.state,
+ LOCK_COUNTER_STATE_NOT_NOTIFYING,
+ LOCK_COUNTER_STATE_NOTIFYING);
+ /* same as before_atomic */
+ smp_mb__after_atomic();
+
+ if (prior_state != LOCK_COUNTER_STATE_NOT_NOTIFYING)
+ return;
+
+ vdo_launch_completion(&journal->lock_counter.completion);
+}
+
+static inline struct recovery_journal_block * __must_check get_journal_block(struct list_head *list)
+{
+ return list_first_entry_or_null(list, struct recovery_journal_block, list_node);
+}
+
+/**
+ * pop_free_list() - Get a block from the end of the free list.
+ * @journal: The journal.
+ *
+ * Return: The block or NULL if the list is empty.
+ */
+static struct recovery_journal_block * __must_check pop_free_list(struct recovery_journal *journal)
+{
+ struct recovery_journal_block *block;
+
+ if (list_empty(&journal->free_tail_blocks))
+ return NULL;
+
+ block = list_last_entry(&journal->free_tail_blocks,
+ struct recovery_journal_block, list_node);
+ list_del_init(&block->list_node);
+ return block;
+}
+
+/**
+ * is_block_dirty() - Check whether a recovery block is dirty.
+ * @block: The block to check.
+ *
+ * Indicates it has any uncommitted entries, which includes both entries not written and entries
+ * written but not yet acknowledged.
+ *
+ * Return: true if the block has any uncommitted entries.
+ */
+static inline bool __must_check is_block_dirty(const struct recovery_journal_block *block)
+{
+ return (block->uncommitted_entry_count > 0);
+}
+
+/**
+ * is_block_empty() - Check whether a journal block is empty.
+ * @block: The block to check.
+ *
+ * Return: true if the block has no entries.
+ */
+static inline bool __must_check is_block_empty(const struct recovery_journal_block *block)
+{
+ return (block->entry_count == 0);
+}
+
+/**
+ * is_block_full() - Check whether a journal block is full.
+ * @block: The block to check.
+ *
+ * Return: true if the block is full.
+ */
+static inline bool __must_check is_block_full(const struct recovery_journal_block *block)
+{
+ return ((block == NULL) || (block->journal->entries_per_block == block->entry_count));
+}
+
+/**
+ * assert_on_journal_thread() - Assert that we are running on the journal thread.
+ * @journal: The journal.
+ * @function_name: The function doing the check (for logging).
+ */
+static void assert_on_journal_thread(struct recovery_journal *journal,
+ const char *function_name)
+{
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() == journal->thread_id),
+ "%s() called on journal thread", function_name);
+}
+
+/**
+ * continue_waiter() - Release a data_vio from the journal.
+ *
+ * Invoked whenever a data_vio is to be released from the journal, either because its entry was
+ * committed to disk, or because there was an error. Implements waiter_callback_fn.
+ */
+static void continue_waiter(struct vdo_waiter *waiter, void *context)
+{
+ continue_data_vio_with_error(vdo_waiter_as_data_vio(waiter), *((int *) context));
+}
+
+/**
+ * has_block_waiters() - Check whether the journal has any waiters on any blocks.
+ * @journal: The journal in question.
+ *
+ * Return: true if any block has a waiter.
+ */
+static inline bool has_block_waiters(struct recovery_journal *journal)
+{
+ struct recovery_journal_block *block = get_journal_block(&journal->active_tail_blocks);
+
+ /*
+ * Either the first active tail block (if it exists) has waiters, or no active tail block
+ * has waiters.
+ */
+ return ((block != NULL) &&
+ (vdo_waitq_has_waiters(&block->entry_waiters) ||
+ vdo_waitq_has_waiters(&block->commit_waiters)));
+}
+
+static void recycle_journal_blocks(struct recovery_journal *journal);
+static void recycle_journal_block(struct recovery_journal_block *block);
+static void notify_commit_waiters(struct recovery_journal *journal);
+
+/**
+ * suspend_lock_counter() - Prevent the lock counter from notifying.
+ * @counter: The counter.
+ *
+ * Return: true if the lock counter was not notifying and hence the suspend was efficacious.
+ */
+static bool suspend_lock_counter(struct lock_counter *counter)
+{
+ int prior_state;
+
+ /*
+ * Extra barriers because this was originally developed using a CAS operation that
+ * implicitly had them.
+ */
+ smp_mb__before_atomic();
+ prior_state = atomic_cmpxchg(&counter->state, LOCK_COUNTER_STATE_NOT_NOTIFYING,
+ LOCK_COUNTER_STATE_SUSPENDED);
+ /* same as before_atomic */
+ smp_mb__after_atomic();
+
+ return ((prior_state == LOCK_COUNTER_STATE_SUSPENDED) ||
+ (prior_state == LOCK_COUNTER_STATE_NOT_NOTIFYING));
+}
+
+static inline bool is_read_only(struct recovery_journal *journal)
+{
+ return vdo_is_read_only(journal->flush_vio->completion.vdo);
+}
+
+/**
+ * check_for_drain_complete() - Check whether the journal has drained.
+ * @journal: The journal which may have just drained.
+ */
+static void check_for_drain_complete(struct recovery_journal *journal)
+{
+ int result = VDO_SUCCESS;
+
+ if (is_read_only(journal)) {
+ result = VDO_READ_ONLY;
+ /*
+ * Clean up any full active blocks which were not written due to read-only mode.
+ *
+ * FIXME: This would probably be better as a short-circuit in write_block().
+ */
+ notify_commit_waiters(journal);
+ recycle_journal_blocks(journal);
+
+ /* Release any data_vios waiting to be assigned entries. */
+ vdo_waitq_notify_all_waiters(&journal->entry_waiters,
+ continue_waiter, &result);
+ }
+
+ if (!vdo_is_state_draining(&journal->state) ||
+ journal->reaping ||
+ has_block_waiters(journal) ||
+ vdo_waitq_has_waiters(&journal->entry_waiters) ||
+ !suspend_lock_counter(&journal->lock_counter))
+ return;
+
+ if (vdo_is_state_saving(&journal->state)) {
+ if (journal->active_block != NULL) {
+ VDO_ASSERT_LOG_ONLY(((result == VDO_READ_ONLY) ||
+ !is_block_dirty(journal->active_block)),
+ "journal being saved has clean active block");
+ recycle_journal_block(journal->active_block);
+ }
+
+ VDO_ASSERT_LOG_ONLY(list_empty(&journal->active_tail_blocks),
+ "all blocks in a journal being saved must be inactive");
+ }
+
+ vdo_finish_draining_with_result(&journal->state, result);
+}
+
+/**
+ * notify_recovery_journal_of_read_only_mode() - Notify a recovery journal that the VDO has gone
+ * read-only.
+ * @listener: The journal.
+ * @parent: The completion to notify in order to acknowledge the notification.
+ *
+ * Implements vdo_read_only_notification_fn.
+ */
+static void notify_recovery_journal_of_read_only_mode(void *listener,
+ struct vdo_completion *parent)
+{
+ check_for_drain_complete(listener);
+ vdo_finish_completion(parent);
+}
+
+/**
+ * enter_journal_read_only_mode() - Put the journal in read-only mode.
+ * @journal: The journal which has failed.
+ * @error_code: The error result triggering this call.
+ *
+ * All attempts to add entries after this function is called will fail. All VIOs waiting for
+ * commits will be awakened with an error.
+ */
+static void enter_journal_read_only_mode(struct recovery_journal *journal,
+ int error_code)
+{
+ vdo_enter_read_only_mode(journal->flush_vio->completion.vdo, error_code);
+ check_for_drain_complete(journal);
+}
+
+/**
+ * vdo_get_recovery_journal_current_sequence_number() - Obtain the recovery journal's current
+ * sequence number.
+ * @journal: The journal in question.
+ *
+ * Exposed only so the block map can be initialized therefrom.
+ *
+ * Return: The sequence number of the tail block.
+ */
+sequence_number_t vdo_get_recovery_journal_current_sequence_number(struct recovery_journal *journal)
+{
+ return journal->tail;
+}
+
+/**
+ * get_recovery_journal_head() - Get the head of the recovery journal.
+ * @journal: The journal.
+ *
+ * The head is the lowest sequence number of the block map head and the slab journal head.
+ *
+ * Return: the head of the journal.
+ */
+static inline sequence_number_t get_recovery_journal_head(const struct recovery_journal *journal)
+{
+ return min(journal->block_map_head, journal->slab_journal_head);
+}
+
+/**
+ * compute_recovery_count_byte() - Compute the recovery count byte for a given recovery count.
+ * @recovery_count: The recovery count.
+ *
+ * Return: The byte corresponding to the recovery count.
+ */
+static inline u8 __must_check compute_recovery_count_byte(u64 recovery_count)
+{
+ return (u8)(recovery_count & RECOVERY_COUNT_MASK);
+}
+
+/**
+ * check_slab_journal_commit_threshold() - Check whether the journal is over the threshold, and if
+ * so, force the oldest slab journal tail block to commit.
+ * @journal: The journal.
+ */
+static void check_slab_journal_commit_threshold(struct recovery_journal *journal)
+{
+ block_count_t current_length = journal->tail - journal->slab_journal_head;
+
+ if (current_length > journal->slab_journal_commit_threshold) {
+ journal->events.slab_journal_commits_requested++;
+ vdo_commit_oldest_slab_journal_tail_blocks(journal->depot,
+ journal->slab_journal_head);
+ }
+}
+
+static void reap_recovery_journal(struct recovery_journal *journal);
+static void assign_entries(struct recovery_journal *journal);
+
+/**
+ * finish_reaping() - Finish reaping the journal.
+ * @journal: The journal being reaped.
+ */
+static void finish_reaping(struct recovery_journal *journal)
+{
+ block_count_t blocks_reaped;
+ sequence_number_t old_head = get_recovery_journal_head(journal);
+
+ journal->block_map_head = journal->block_map_reap_head;
+ journal->slab_journal_head = journal->slab_journal_reap_head;
+ blocks_reaped = get_recovery_journal_head(journal) - old_head;
+ journal->available_space += blocks_reaped * journal->entries_per_block;
+ journal->reaping = false;
+ check_slab_journal_commit_threshold(journal);
+ assign_entries(journal);
+ check_for_drain_complete(journal);
+}
+
+/**
+ * complete_reaping() - Finish reaping the journal after flushing the lower layer.
+ * @completion: The journal's flush VIO.
+ *
+ * This is the callback registered in reap_recovery_journal().
+ */
+static void complete_reaping(struct vdo_completion *completion)
+{
+ struct recovery_journal *journal = completion->parent;
+
+ finish_reaping(journal);
+
+ /* Try reaping again in case more locks were released while flush was out. */
+ reap_recovery_journal(journal);
+}
+
+/**
+ * handle_flush_error() - Handle an error when flushing the lower layer due to reaping.
+ * @completion: The journal's flush VIO.
+ */
+static void handle_flush_error(struct vdo_completion *completion)
+{
+ struct recovery_journal *journal = completion->parent;
+
+ vio_record_metadata_io_error(as_vio(completion));
+ journal->reaping = false;
+ enter_journal_read_only_mode(journal, completion->result);
+}
+
+static void flush_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct recovery_journal *journal = vio->completion.parent;
+
+ continue_vio_after_io(vio, complete_reaping, journal->thread_id);
+}
+
+/**
+ * initialize_journal_state() - Set all journal fields appropriately to start journaling from the
+ * current active block.
+ * @journal: The journal to be reset based on its active block.
+ */
+static void initialize_journal_state(struct recovery_journal *journal)
+{
+ journal->append_point.sequence_number = journal->tail;
+ journal->last_write_acknowledged = journal->tail;
+ journal->block_map_head = journal->tail;
+ journal->slab_journal_head = journal->tail;
+ journal->block_map_reap_head = journal->tail;
+ journal->slab_journal_reap_head = journal->tail;
+ journal->block_map_head_block_number =
+ vdo_get_recovery_journal_block_number(journal, journal->block_map_head);
+ journal->slab_journal_head_block_number =
+ vdo_get_recovery_journal_block_number(journal,
+ journal->slab_journal_head);
+ journal->available_space =
+ (journal->entries_per_block * vdo_get_recovery_journal_length(journal->size));
+}
+
+/**
+ * vdo_get_recovery_journal_length() - Get the number of usable recovery journal blocks.
+ * @journal_size: The size of the recovery journal in blocks.
+ *
+ * Return: the number of recovery journal blocks usable for entries.
+ */
+block_count_t vdo_get_recovery_journal_length(block_count_t journal_size)
+{
+ block_count_t reserved_blocks = journal_size / 4;
+
+ if (reserved_blocks > RECOVERY_JOURNAL_RESERVED_BLOCKS)
+ reserved_blocks = RECOVERY_JOURNAL_RESERVED_BLOCKS;
+ return (journal_size - reserved_blocks);
+}
+
+/**
+ * reap_recovery_journal_callback() - Attempt to reap the journal.
+ * @completion: The lock counter completion.
+ *
+ * Attempts to reap the journal now that all the locks on some journal block have been released.
+ * This is the callback registered with the lock counter.
+ */
+static void reap_recovery_journal_callback(struct vdo_completion *completion)
+{
+ struct recovery_journal *journal = (struct recovery_journal *) completion->parent;
+ /*
+ * The acknowledgment must be done before reaping so that there is no race between
+ * acknowledging the notification and unlocks wishing to notify.
+ */
+ smp_wmb();
+ atomic_set(&journal->lock_counter.state, LOCK_COUNTER_STATE_NOT_NOTIFYING);
+
+ if (vdo_is_state_quiescing(&journal->state)) {
+ /*
+ * Don't start reaping when the journal is trying to quiesce. Do check if this
+ * notification is the last thing the is waiting on.
+ */
+ check_for_drain_complete(journal);
+ return;
+ }
+
+ reap_recovery_journal(journal);
+ check_slab_journal_commit_threshold(journal);
+}
+
+/**
+ * initialize_lock_counter() - Initialize a lock counter.
+ *
+ * @journal: The recovery journal.
+ * @vdo: The vdo.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int __must_check initialize_lock_counter(struct recovery_journal *journal,
+ struct vdo *vdo)
+{
+ int result;
+ struct thread_config *config = &vdo->thread_config;
+ struct lock_counter *counter = &journal->lock_counter;
+
+ result = vdo_allocate(journal->size, u16, __func__, &counter->journal_counters);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(journal->size, atomic_t, __func__,
+ &counter->journal_decrement_counts);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(journal->size * config->logical_zone_count, u16, __func__,
+ &counter->logical_counters);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(journal->size, atomic_t, __func__,
+ &counter->logical_zone_counts);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(journal->size * config->physical_zone_count, u16, __func__,
+ &counter->physical_counters);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(journal->size, atomic_t, __func__,
+ &counter->physical_zone_counts);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo_initialize_completion(&counter->completion, vdo,
+ VDO_LOCK_COUNTER_COMPLETION);
+ vdo_prepare_completion(&counter->completion, reap_recovery_journal_callback,
+ reap_recovery_journal_callback, config->journal_thread,
+ journal);
+ counter->logical_zones = config->logical_zone_count;
+ counter->physical_zones = config->physical_zone_count;
+ counter->locks = journal->size;
+ return VDO_SUCCESS;
+}
+
+/**
+ * set_journal_tail() - Set the journal's tail sequence number.
+ * @journal: The journal whose tail is to be set.
+ * @tail: The new tail value.
+ */
+static void set_journal_tail(struct recovery_journal *journal, sequence_number_t tail)
+{
+ /* VDO does not support sequence numbers above 1 << 48 in the slab journal. */
+ if (tail >= (1ULL << 48))
+ enter_journal_read_only_mode(journal, VDO_JOURNAL_OVERFLOW);
+
+ journal->tail = tail;
+}
+
+/**
+ * initialize_recovery_block() - Initialize a journal block.
+ * @vdo: The vdo from which to construct vios.
+ * @journal: The journal to which the block will belong.
+ * @block: The block to initialize.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int initialize_recovery_block(struct vdo *vdo, struct recovery_journal *journal,
+ struct recovery_journal_block *block)
+{
+ char *data;
+ int result;
+
+ /*
+ * Ensure that a block is large enough to store RECOVERY_JOURNAL_ENTRIES_PER_BLOCK entries.
+ */
+ BUILD_BUG_ON(RECOVERY_JOURNAL_ENTRIES_PER_BLOCK >
+ ((VDO_BLOCK_SIZE - sizeof(struct packed_journal_header)) /
+ sizeof(struct packed_recovery_journal_entry)));
+
+ /*
+ * Allocate a full block for the journal block even though not all of the space is used
+ * since the VIO needs to write a full disk block.
+ */
+ result = vdo_allocate(VDO_BLOCK_SIZE, char, __func__, &data);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = allocate_vio_components(vdo, VIO_TYPE_RECOVERY_JOURNAL,
+ VIO_PRIORITY_HIGH, block, 1, data, &block->vio);
+ if (result != VDO_SUCCESS) {
+ vdo_free(data);
+ return result;
+ }
+
+ list_add_tail(&block->list_node, &journal->free_tail_blocks);
+ block->journal = journal;
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_decode_recovery_journal() - Make a recovery journal and initialize it with the state that
+ * was decoded from the super block.
+ *
+ * @state: The decoded state of the journal.
+ * @nonce: The nonce of the VDO.
+ * @vdo: The VDO.
+ * @partition: The partition for the journal.
+ * @recovery_count: The VDO's number of completed recoveries.
+ * @journal_size: The number of blocks in the journal on disk.
+ * @journal_ptr: The pointer to hold the new recovery journal.
+ *
+ * Return: A success or error code.
+ */
+int vdo_decode_recovery_journal(struct recovery_journal_state_7_0 state, nonce_t nonce,
+ struct vdo *vdo, struct partition *partition,
+ u64 recovery_count, block_count_t journal_size,
+ struct recovery_journal **journal_ptr)
+{
+ block_count_t i;
+ struct recovery_journal *journal;
+ int result;
+
+ result = vdo_allocate_extended(struct recovery_journal,
+ RECOVERY_JOURNAL_RESERVED_BLOCKS,
+ struct recovery_journal_block, __func__,
+ &journal);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ INIT_LIST_HEAD(&journal->free_tail_blocks);
+ INIT_LIST_HEAD(&journal->active_tail_blocks);
+ vdo_waitq_init(&journal->pending_writes);
+
+ journal->thread_id = vdo->thread_config.journal_thread;
+ journal->origin = partition->offset;
+ journal->nonce = nonce;
+ journal->recovery_count = compute_recovery_count_byte(recovery_count);
+ journal->size = journal_size;
+ journal->slab_journal_commit_threshold = (journal_size * 2) / 3;
+ journal->logical_blocks_used = state.logical_blocks_used;
+ journal->block_map_data_blocks = state.block_map_data_blocks;
+ journal->entries_per_block = RECOVERY_JOURNAL_ENTRIES_PER_BLOCK;
+ set_journal_tail(journal, state.journal_start);
+ initialize_journal_state(journal);
+ /* TODO: this will have to change if we make initial resume of a VDO a real resume */
+ vdo_set_admin_state_code(&journal->state, VDO_ADMIN_STATE_SUSPENDED);
+
+ for (i = 0; i < RECOVERY_JOURNAL_RESERVED_BLOCKS; i++) {
+ struct recovery_journal_block *block = &journal->blocks[i];
+
+ result = initialize_recovery_block(vdo, journal, block);
+ if (result != VDO_SUCCESS) {
+ vdo_free_recovery_journal(journal);
+ return result;
+ }
+ }
+
+ result = initialize_lock_counter(journal, vdo);
+ if (result != VDO_SUCCESS) {
+ vdo_free_recovery_journal(journal);
+ return result;
+ }
+
+ result = create_metadata_vio(vdo, VIO_TYPE_RECOVERY_JOURNAL, VIO_PRIORITY_HIGH,
+ journal, NULL, &journal->flush_vio);
+ if (result != VDO_SUCCESS) {
+ vdo_free_recovery_journal(journal);
+ return result;
+ }
+
+ result = vdo_register_read_only_listener(vdo, journal,
+ notify_recovery_journal_of_read_only_mode,
+ journal->thread_id);
+ if (result != VDO_SUCCESS) {
+ vdo_free_recovery_journal(journal);
+ return result;
+ }
+
+ result = vdo_make_default_thread(vdo, journal->thread_id);
+ if (result != VDO_SUCCESS) {
+ vdo_free_recovery_journal(journal);
+ return result;
+ }
+
+ journal->flush_vio->completion.callback_thread_id = journal->thread_id;
+ *journal_ptr = journal;
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_free_recovery_journal() - Free a recovery journal.
+ * @journal: The recovery journal to free.
+ */
+void vdo_free_recovery_journal(struct recovery_journal *journal)
+{
+ block_count_t i;
+
+ if (journal == NULL)
+ return;
+
+ vdo_free(vdo_forget(journal->lock_counter.logical_zone_counts));
+ vdo_free(vdo_forget(journal->lock_counter.physical_zone_counts));
+ vdo_free(vdo_forget(journal->lock_counter.journal_counters));
+ vdo_free(vdo_forget(journal->lock_counter.journal_decrement_counts));
+ vdo_free(vdo_forget(journal->lock_counter.logical_counters));
+ vdo_free(vdo_forget(journal->lock_counter.physical_counters));
+ free_vio(vdo_forget(journal->flush_vio));
+
+ /*
+ * FIXME: eventually, the journal should be constructed in a quiescent state which
+ * requires opening before use.
+ */
+ if (!vdo_is_state_quiescent(&journal->state)) {
+ VDO_ASSERT_LOG_ONLY(list_empty(&journal->active_tail_blocks),
+ "journal being freed has no active tail blocks");
+ } else if (!vdo_is_state_saved(&journal->state) &&
+ !list_empty(&journal->active_tail_blocks)) {
+ vdo_log_warning("journal being freed has uncommitted entries");
+ }
+
+ for (i = 0; i < RECOVERY_JOURNAL_RESERVED_BLOCKS; i++) {
+ struct recovery_journal_block *block = &journal->blocks[i];
+
+ vdo_free(vdo_forget(block->vio.data));
+ free_vio_components(&block->vio);
+ }
+
+ vdo_free(journal);
+}
+
+/**
+ * vdo_initialize_recovery_journal_post_repair() - Initialize the journal after a repair.
+ * @journal: The journal in question.
+ * @recovery_count: The number of completed recoveries.
+ * @tail: The new tail block sequence number.
+ * @logical_blocks_used: The new number of logical blocks used.
+ * @block_map_data_blocks: The new number of block map data blocks.
+ */
+void vdo_initialize_recovery_journal_post_repair(struct recovery_journal *journal,
+ u64 recovery_count,
+ sequence_number_t tail,
+ block_count_t logical_blocks_used,
+ block_count_t block_map_data_blocks)
+{
+ set_journal_tail(journal, tail + 1);
+ journal->recovery_count = compute_recovery_count_byte(recovery_count);
+ initialize_journal_state(journal);
+ journal->logical_blocks_used = logical_blocks_used;
+ journal->block_map_data_blocks = block_map_data_blocks;
+}
+
+/**
+ * vdo_get_journal_block_map_data_blocks_used() - Get the number of block map pages, allocated from
+ * data blocks, currently in use.
+ * @journal: The journal in question.
+ *
+ * Return: The number of block map pages allocated from slabs.
+ */
+block_count_t vdo_get_journal_block_map_data_blocks_used(struct recovery_journal *journal)
+{
+ return journal->block_map_data_blocks;
+}
+
+/**
+ * vdo_get_recovery_journal_thread_id() - Get the ID of a recovery journal's thread.
+ * @journal: The journal to query.
+ *
+ * Return: The ID of the journal's thread.
+ */
+thread_id_t vdo_get_recovery_journal_thread_id(struct recovery_journal *journal)
+{
+ return journal->thread_id;
+}
+
+/**
+ * vdo_open_recovery_journal() - Prepare the journal for new entries.
+ * @journal: The journal in question.
+ * @depot: The slab depot for this VDO.
+ * @block_map: The block map for this VDO.
+ */
+void vdo_open_recovery_journal(struct recovery_journal *journal,
+ struct slab_depot *depot, struct block_map *block_map)
+{
+ journal->depot = depot;
+ journal->block_map = block_map;
+ WRITE_ONCE(journal->state.current_state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+}
+
+/**
+ * vdo_record_recovery_journal() - Record the state of a recovery journal for encoding in the super
+ * block.
+ * @journal: the recovery journal.
+ *
+ * Return: the state of the journal.
+ */
+struct recovery_journal_state_7_0
+vdo_record_recovery_journal(const struct recovery_journal *journal)
+{
+ struct recovery_journal_state_7_0 state = {
+ .logical_blocks_used = journal->logical_blocks_used,
+ .block_map_data_blocks = journal->block_map_data_blocks,
+ };
+
+ if (vdo_is_state_saved(&journal->state)) {
+ /*
+ * If the journal is saved, we should start one past the active block (since the
+ * active block is not guaranteed to be empty).
+ */
+ state.journal_start = journal->tail;
+ } else {
+ /*
+ * When we're merely suspended or have gone read-only, we must record the first
+ * block that might have entries that need to be applied.
+ */
+ state.journal_start = get_recovery_journal_head(journal);
+ }
+
+ return state;
+}
+
+/**
+ * get_block_header() - Get a pointer to the packed journal block header in the block buffer.
+ * @block: The recovery block.
+ *
+ * Return: The block's header.
+ */
+static inline struct packed_journal_header *
+get_block_header(const struct recovery_journal_block *block)
+{
+ return (struct packed_journal_header *) block->vio.data;
+}
+
+/**
+ * set_active_sector() - Set the current sector of the current block and initialize it.
+ * @block: The block to update.
+ * @sector: A pointer to the first byte of the new sector.
+ */
+static void set_active_sector(struct recovery_journal_block *block, void *sector)
+{
+ block->sector = sector;
+ block->sector->check_byte = get_block_header(block)->check_byte;
+ block->sector->recovery_count = block->journal->recovery_count;
+ block->sector->entry_count = 0;
+}
+
+/**
+ * advance_tail() - Advance the tail of the journal.
+ * @journal: The journal whose tail should be advanced.
+ *
+ * Return: true if the tail was advanced.
+ */
+static bool advance_tail(struct recovery_journal *journal)
+{
+ struct recovery_block_header unpacked;
+ struct packed_journal_header *header;
+ struct recovery_journal_block *block;
+
+ block = journal->active_block = pop_free_list(journal);
+ if (block == NULL)
+ return false;
+
+ list_move_tail(&block->list_node, &journal->active_tail_blocks);
+
+ unpacked = (struct recovery_block_header) {
+ .metadata_type = VDO_METADATA_RECOVERY_JOURNAL_2,
+ .block_map_data_blocks = journal->block_map_data_blocks,
+ .logical_blocks_used = journal->logical_blocks_used,
+ .nonce = journal->nonce,
+ .recovery_count = journal->recovery_count,
+ .sequence_number = journal->tail,
+ .check_byte = vdo_compute_recovery_journal_check_byte(journal,
+ journal->tail),
+ };
+
+ header = get_block_header(block);
+ memset(block->vio.data, 0x0, VDO_BLOCK_SIZE);
+ block->sequence_number = journal->tail;
+ block->entry_count = 0;
+ block->uncommitted_entry_count = 0;
+ block->block_number = vdo_get_recovery_journal_block_number(journal,
+ journal->tail);
+
+ vdo_pack_recovery_block_header(&unpacked, header);
+ set_active_sector(block, vdo_get_journal_block_sector(header, 1));
+ set_journal_tail(journal, journal->tail + 1);
+ vdo_advance_block_map_era(journal->block_map, journal->tail);
+ return true;
+}
+
+/**
+ * initialize_lock_count() - Initialize the value of the journal zone's counter for a given lock.
+ * @journal: The recovery journal.
+ *
+ * Context: This must be called from the journal zone.
+ */
+static void initialize_lock_count(struct recovery_journal *journal)
+{
+ u16 *journal_value;
+ block_count_t lock_number = journal->active_block->block_number;
+ atomic_t *decrement_counter = get_decrement_counter(journal, lock_number);
+
+ journal_value = get_counter(journal, lock_number, VDO_ZONE_TYPE_JOURNAL, 0);
+ VDO_ASSERT_LOG_ONLY((*journal_value == atomic_read(decrement_counter)),
+ "count to be initialized not in use");
+ *journal_value = journal->entries_per_block + 1;
+ atomic_set(decrement_counter, 0);
+}
+
+/**
+ * prepare_to_assign_entry() - Prepare the currently active block to receive an entry and check
+ * whether an entry of the given type may be assigned at this time.
+ * @journal: The journal receiving an entry.
+ *
+ * Return: true if there is space in the journal to store an entry of the specified type.
+ */
+static bool prepare_to_assign_entry(struct recovery_journal *journal)
+{
+ if (journal->available_space == 0)
+ return false;
+
+ if (is_block_full(journal->active_block) && !advance_tail(journal))
+ return false;
+
+ if (!is_block_empty(journal->active_block))
+ return true;
+
+ if ((journal->tail - get_recovery_journal_head(journal)) > journal->size) {
+ /* Cannot use this block since the journal is full. */
+ journal->events.disk_full++;
+ return false;
+ }
+
+ /*
+ * Don't allow the new block to be reaped until all of its entries have been committed to
+ * the block map and until the journal block has been fully committed as well. Because the
+ * block map update is done only after any slab journal entries have been made, the
+ * per-entry lock for the block map entry serves to protect those as well.
+ */
+ initialize_lock_count(journal);
+ return true;
+}
+
+static void write_blocks(struct recovery_journal *journal);
+
+/**
+ * schedule_block_write() - Queue a block for writing.
+ * @journal: The journal in question.
+ * @block: The block which is now ready to write.
+ *
+ * The block is expected to be full. If the block is currently writing, this is a noop as the block
+ * will be queued for writing when the write finishes. The block must not currently be queued for
+ * writing.
+ */
+static void schedule_block_write(struct recovery_journal *journal,
+ struct recovery_journal_block *block)
+{
+ if (!block->committing)
+ vdo_waitq_enqueue_waiter(&journal->pending_writes, &block->write_waiter);
+ /*
+ * At the end of adding entries, or discovering this partial block is now full and ready to
+ * rewrite, we will call write_blocks() and write a whole batch.
+ */
+}
+
+/**
+ * release_journal_block_reference() - Release a reference to a journal block.
+ * @block: The journal block from which to release a reference.
+ */
+static void release_journal_block_reference(struct recovery_journal_block *block)
+{
+ vdo_release_recovery_journal_block_reference(block->journal,
+ block->sequence_number,
+ VDO_ZONE_TYPE_JOURNAL, 0);
+}
+
+static void update_usages(struct recovery_journal *journal, struct data_vio *data_vio)
+{
+ if (data_vio->increment_updater.operation == VDO_JOURNAL_BLOCK_MAP_REMAPPING) {
+ journal->block_map_data_blocks++;
+ return;
+ }
+
+ if (data_vio->new_mapped.state != VDO_MAPPING_STATE_UNMAPPED)
+ journal->logical_blocks_used++;
+
+ if (data_vio->mapped.state != VDO_MAPPING_STATE_UNMAPPED)
+ journal->logical_blocks_used--;
+}
+
+/**
+ * assign_entry() - Assign an entry waiter to the active block.
+ *
+ * Implements waiter_callback_fn.
+ */
+static void assign_entry(struct vdo_waiter *waiter, void *context)
+{
+ struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter);
+ struct recovery_journal_block *block = context;
+ struct recovery_journal *journal = block->journal;
+
+ /* Record the point at which we will make the journal entry. */
+ data_vio->recovery_journal_point = (struct journal_point) {
+ .sequence_number = block->sequence_number,
+ .entry_count = block->entry_count,
+ };
+
+ update_usages(journal, data_vio);
+ journal->available_space--;
+
+ if (!vdo_waitq_has_waiters(&block->entry_waiters))
+ journal->events.blocks.started++;
+
+ vdo_waitq_enqueue_waiter(&block->entry_waiters, &data_vio->waiter);
+ block->entry_count++;
+ block->uncommitted_entry_count++;
+ journal->events.entries.started++;
+
+ if (is_block_full(block)) {
+ /*
+ * The block is full, so we can write it anytime henceforth. If it is already
+ * committing, we'll queue it for writing when it comes back.
+ */
+ schedule_block_write(journal, block);
+ }
+
+ /* Force out slab journal tail blocks when threshold is reached. */
+ check_slab_journal_commit_threshold(journal);
+}
+
+static void assign_entries(struct recovery_journal *journal)
+{
+ if (journal->adding_entries) {
+ /* Protect against re-entrancy. */
+ return;
+ }
+
+ journal->adding_entries = true;
+ while (vdo_waitq_has_waiters(&journal->entry_waiters) &&
+ prepare_to_assign_entry(journal)) {
+ vdo_waitq_notify_next_waiter(&journal->entry_waiters,
+ assign_entry, journal->active_block);
+ }
+
+ /* Now that we've finished with entries, see if we have a batch of blocks to write. */
+ write_blocks(journal);
+ journal->adding_entries = false;
+}
+
+/**
+ * recycle_journal_block() - Prepare an in-memory journal block to be reused now that it has been
+ * fully committed.
+ * @block: The block to be recycled.
+ */
+static void recycle_journal_block(struct recovery_journal_block *block)
+{
+ struct recovery_journal *journal = block->journal;
+ block_count_t i;
+
+ list_move_tail(&block->list_node, &journal->free_tail_blocks);
+
+ /* Release any unused entry locks. */
+ for (i = block->entry_count; i < journal->entries_per_block; i++)
+ release_journal_block_reference(block);
+
+ /*
+ * Release our own lock against reaping now that the block is completely committed, or
+ * we're giving up because we're in read-only mode.
+ */
+ if (block->entry_count > 0)
+ release_journal_block_reference(block);
+
+ if (block == journal->active_block)
+ journal->active_block = NULL;
+}
+
+/**
+ * continue_committed_waiter() - invoked whenever a VIO is to be released from the journal because
+ * its entry was committed to disk.
+ *
+ * Implements waiter_callback_fn.
+ */
+static void continue_committed_waiter(struct vdo_waiter *waiter, void *context)
+{
+ struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter);
+ struct recovery_journal *journal = context;
+ int result = (is_read_only(journal) ? VDO_READ_ONLY : VDO_SUCCESS);
+ bool has_decrement;
+
+ VDO_ASSERT_LOG_ONLY(vdo_before_journal_point(&journal->commit_point,
+ &data_vio->recovery_journal_point),
+ "DataVIOs released from recovery journal in order. Recovery journal point is (%llu, %u), but commit waiter point is (%llu, %u)",
+ (unsigned long long) journal->commit_point.sequence_number,
+ journal->commit_point.entry_count,
+ (unsigned long long) data_vio->recovery_journal_point.sequence_number,
+ data_vio->recovery_journal_point.entry_count);
+
+ journal->commit_point = data_vio->recovery_journal_point;
+ data_vio->last_async_operation = VIO_ASYNC_OP_UPDATE_REFERENCE_COUNTS;
+ if (result != VDO_SUCCESS) {
+ continue_data_vio_with_error(data_vio, result);
+ return;
+ }
+
+ /*
+ * The increment must be launched first since it must come before the
+ * decrement if they are in the same slab.
+ */
+ has_decrement = (data_vio->decrement_updater.zpbn.pbn != VDO_ZERO_BLOCK);
+ if ((data_vio->increment_updater.zpbn.pbn != VDO_ZERO_BLOCK) || !has_decrement)
+ continue_data_vio(data_vio);
+
+ if (has_decrement)
+ vdo_launch_completion(&data_vio->decrement_completion);
+}
+
+/**
+ * notify_commit_waiters() - Notify any VIOs whose entries have now committed.
+ * @journal: The recovery journal to update.
+ */
+static void notify_commit_waiters(struct recovery_journal *journal)
+{
+ struct recovery_journal_block *block;
+
+ list_for_each_entry(block, &journal->active_tail_blocks, list_node) {
+ if (block->committing)
+ return;
+
+ vdo_waitq_notify_all_waiters(&block->commit_waiters,
+ continue_committed_waiter, journal);
+ if (is_read_only(journal)) {
+ vdo_waitq_notify_all_waiters(&block->entry_waiters,
+ continue_committed_waiter,
+ journal);
+ } else if (is_block_dirty(block) || !is_block_full(block)) {
+ /* Stop at partially-committed or partially-filled blocks. */
+ return;
+ }
+ }
+}
+
+/**
+ * recycle_journal_blocks() - Recycle any journal blocks which have been fully committed.
+ * @journal: The recovery journal to update.
+ */
+static void recycle_journal_blocks(struct recovery_journal *journal)
+{
+ struct recovery_journal_block *block, *tmp;
+
+ list_for_each_entry_safe(block, tmp, &journal->active_tail_blocks, list_node) {
+ if (block->committing) {
+ /* Don't recycle committing blocks. */
+ return;
+ }
+
+ if (!is_read_only(journal) &&
+ (is_block_dirty(block) || !is_block_full(block))) {
+ /*
+ * Don't recycle partially written or partially full blocks, except in
+ * read-only mode.
+ */
+ return;
+ }
+
+ recycle_journal_block(block);
+ }
+}
+
+/**
+ * complete_write() - Handle post-commit processing.
+ * @completion: The completion of the VIO writing this block.
+ *
+ * This is the callback registered by write_block(). If more entries accumulated in the block being
+ * committed while the commit was in progress, another commit will be initiated.
+ */
+static void complete_write(struct vdo_completion *completion)
+{
+ struct recovery_journal_block *block = completion->parent;
+ struct recovery_journal *journal = block->journal;
+ struct recovery_journal_block *last_active_block;
+
+ assert_on_journal_thread(journal, __func__);
+
+ journal->pending_write_count -= 1;
+ journal->events.blocks.committed += 1;
+ journal->events.entries.committed += block->entries_in_commit;
+ block->uncommitted_entry_count -= block->entries_in_commit;
+ block->entries_in_commit = 0;
+ block->committing = false;
+
+ /* If this block is the latest block to be acknowledged, record that fact. */
+ if (block->sequence_number > journal->last_write_acknowledged)
+ journal->last_write_acknowledged = block->sequence_number;
+
+ last_active_block = get_journal_block(&journal->active_tail_blocks);
+ VDO_ASSERT_LOG_ONLY((block->sequence_number >= last_active_block->sequence_number),
+ "completed journal write is still active");
+
+ notify_commit_waiters(journal);
+
+ /*
+ * Is this block now full? Reaping, and adding entries, might have already sent it off for
+ * rewriting; else, queue it for rewrite.
+ */
+ if (is_block_dirty(block) && is_block_full(block))
+ schedule_block_write(journal, block);
+
+ recycle_journal_blocks(journal);
+ write_blocks(journal);
+
+ check_for_drain_complete(journal);
+}
+
+static void handle_write_error(struct vdo_completion *completion)
+{
+ struct recovery_journal_block *block = completion->parent;
+ struct recovery_journal *journal = block->journal;
+
+ vio_record_metadata_io_error(as_vio(completion));
+ vdo_log_error_strerror(completion->result,
+ "cannot write recovery journal block %llu",
+ (unsigned long long) block->sequence_number);
+ enter_journal_read_only_mode(journal, completion->result);
+ complete_write(completion);
+}
+
+static void complete_write_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct recovery_journal_block *block = vio->completion.parent;
+ struct recovery_journal *journal = block->journal;
+
+ continue_vio_after_io(vio, complete_write, journal->thread_id);
+}
+
+/**
+ * add_queued_recovery_entries() - Actually add entries from the queue to the given block.
+ * @block: The journal block.
+ */
+static void add_queued_recovery_entries(struct recovery_journal_block *block)
+{
+ while (vdo_waitq_has_waiters(&block->entry_waiters)) {
+ struct data_vio *data_vio =
+ vdo_waiter_as_data_vio(vdo_waitq_dequeue_waiter(&block->entry_waiters));
+ struct tree_lock *lock = &data_vio->tree_lock;
+ struct packed_recovery_journal_entry *packed_entry;
+ struct recovery_journal_entry new_entry;
+
+ if (block->sector->entry_count == RECOVERY_JOURNAL_ENTRIES_PER_SECTOR)
+ set_active_sector(block,
+ (char *) block->sector + VDO_SECTOR_SIZE);
+
+ /* Compose and encode the entry. */
+ packed_entry = &block->sector->entries[block->sector->entry_count++];
+ new_entry = (struct recovery_journal_entry) {
+ .mapping = {
+ .pbn = data_vio->increment_updater.zpbn.pbn,
+ .state = data_vio->increment_updater.zpbn.state,
+ },
+ .unmapping = {
+ .pbn = data_vio->decrement_updater.zpbn.pbn,
+ .state = data_vio->decrement_updater.zpbn.state,
+ },
+ .operation = data_vio->increment_updater.operation,
+ .slot = lock->tree_slots[lock->height].block_map_slot,
+ };
+ *packed_entry = vdo_pack_recovery_journal_entry(&new_entry);
+ data_vio->recovery_sequence_number = block->sequence_number;
+
+ /* Enqueue the data_vio to wait for its entry to commit. */
+ vdo_waitq_enqueue_waiter(&block->commit_waiters, &data_vio->waiter);
+ }
+}
+
+/**
+ * write_block() - Issue a block for writing.
+ *
+ * Implements waiter_callback_fn.
+ */
+static void write_block(struct vdo_waiter *waiter, void *context __always_unused)
+{
+ struct recovery_journal_block *block =
+ container_of(waiter, struct recovery_journal_block, write_waiter);
+ struct recovery_journal *journal = block->journal;
+ struct packed_journal_header *header = get_block_header(block);
+
+ if (block->committing || !vdo_waitq_has_waiters(&block->entry_waiters) ||
+ is_read_only(journal))
+ return;
+
+ block->entries_in_commit = vdo_waitq_num_waiters(&block->entry_waiters);
+ add_queued_recovery_entries(block);
+
+ journal->pending_write_count += 1;
+ journal->events.blocks.written += 1;
+ journal->events.entries.written += block->entries_in_commit;
+
+ header->block_map_head = __cpu_to_le64(journal->block_map_head);
+ header->slab_journal_head = __cpu_to_le64(journal->slab_journal_head);
+ header->entry_count = __cpu_to_le16(block->entry_count);
+
+ block->committing = true;
+
+ /*
+ * We must issue a flush and a FUA for every commit. The flush is necessary to ensure that
+ * the data being referenced is stable. The FUA is necessary to ensure that the journal
+ * block itself is stable before allowing overwrites of the lbn's previous data.
+ */
+ vdo_submit_metadata_vio(&block->vio, journal->origin + block->block_number,
+ complete_write_endio, handle_write_error,
+ REQ_OP_WRITE | REQ_PRIO | REQ_PREFLUSH | REQ_SYNC | REQ_FUA);
+}
+
+
+/**
+ * write_blocks() - Attempt to commit blocks, according to write policy.
+ * @journal: The recovery journal.
+ */
+static void write_blocks(struct recovery_journal *journal)
+{
+ assert_on_journal_thread(journal, __func__);
+ /*
+ * We call this function after adding entries to the journal and after finishing a block
+ * write. Thus, when this function terminates we must either have no VIOs waiting in the
+ * journal or have some outstanding IO to provide a future wakeup.
+ *
+ * We want to only issue full blocks if there are no pending writes. However, if there are
+ * no outstanding writes and some unwritten entries, we must issue a block, even if it's
+ * the active block and it isn't full.
+ */
+ if (journal->pending_write_count > 0)
+ return;
+
+ /* Write all the full blocks. */
+ vdo_waitq_notify_all_waiters(&journal->pending_writes, write_block, NULL);
+
+ /*
+ * Do we need to write the active block? Only if we have no outstanding writes, even after
+ * issuing all of the full writes.
+ */
+ if ((journal->pending_write_count == 0) && (journal->active_block != NULL))
+ write_block(&journal->active_block->write_waiter, NULL);
+}
+
+/**
+ * vdo_add_recovery_journal_entry() - Add an entry to a recovery journal.
+ * @journal: The journal in which to make an entry.
+ * @data_vio: The data_vio for which to add the entry. The entry will be taken
+ * from the logical and new_mapped fields of the data_vio. The
+ * data_vio's recovery_sequence_number field will be set to the
+ * sequence number of the journal block in which the entry was
+ * made.
+ *
+ * This method is asynchronous. The data_vio will not be called back until the entry is committed
+ * to the on-disk journal.
+ */
+void vdo_add_recovery_journal_entry(struct recovery_journal *journal,
+ struct data_vio *data_vio)
+{
+ assert_on_journal_thread(journal, __func__);
+ if (!vdo_is_state_normal(&journal->state)) {
+ continue_data_vio_with_error(data_vio, VDO_INVALID_ADMIN_STATE);
+ return;
+ }
+
+ if (is_read_only(journal)) {
+ continue_data_vio_with_error(data_vio, VDO_READ_ONLY);
+ return;
+ }
+
+ VDO_ASSERT_LOG_ONLY(data_vio->recovery_sequence_number == 0,
+ "journal lock not held for new entry");
+
+ vdo_advance_journal_point(&journal->append_point, journal->entries_per_block);
+ vdo_waitq_enqueue_waiter(&journal->entry_waiters, &data_vio->waiter);
+ assign_entries(journal);
+}
+
+/**
+ * is_lock_locked() - Check whether a lock is locked for a zone type.
+ * @journal: The recovery journal.
+ * @lock_number: The lock to check.
+ * @zone_type: The type of the zone.
+ *
+ * If the recovery journal has a lock on the lock number, both logical and physical zones are
+ * considered locked.
+ *
+ * Return: true if the specified lock has references (is locked).
+ */
+static bool is_lock_locked(struct recovery_journal *journal, block_count_t lock_number,
+ enum vdo_zone_type zone_type)
+{
+ atomic_t *zone_count;
+ bool locked;
+
+ if (is_journal_zone_locked(journal, lock_number))
+ return true;
+
+ zone_count = get_zone_count_ptr(journal, lock_number, zone_type);
+ locked = (atomic_read(zone_count) != 0);
+ /* Pairs with implicit barrier in vdo_release_recovery_journal_block_reference() */
+ smp_rmb();
+ return locked;
+}
+
+/**
+ * reap_recovery_journal() - Conduct a sweep on a recovery journal to reclaim unreferenced blocks.
+ * @journal: The recovery journal.
+ */
+static void reap_recovery_journal(struct recovery_journal *journal)
+{
+ if (journal->reaping) {
+ /*
+ * We already have an outstanding reap in progress. We need to wait for it to
+ * finish.
+ */
+ return;
+ }
+
+ if (vdo_is_state_quiescent(&journal->state)) {
+ /* We are supposed to not do IO. Don't botch it by reaping. */
+ return;
+ }
+
+ /*
+ * Start reclaiming blocks only when the journal head has no references. Then stop when a
+ * block is referenced.
+ */
+ while ((journal->block_map_reap_head < journal->last_write_acknowledged) &&
+ !is_lock_locked(journal, journal->block_map_head_block_number,
+ VDO_ZONE_TYPE_LOGICAL)) {
+ journal->block_map_reap_head++;
+ if (++journal->block_map_head_block_number == journal->size)
+ journal->block_map_head_block_number = 0;
+ }
+
+ while ((journal->slab_journal_reap_head < journal->last_write_acknowledged) &&
+ !is_lock_locked(journal, journal->slab_journal_head_block_number,
+ VDO_ZONE_TYPE_PHYSICAL)) {
+ journal->slab_journal_reap_head++;
+ if (++journal->slab_journal_head_block_number == journal->size)
+ journal->slab_journal_head_block_number = 0;
+ }
+
+ if ((journal->block_map_reap_head == journal->block_map_head) &&
+ (journal->slab_journal_reap_head == journal->slab_journal_head)) {
+ /* Nothing happened. */
+ return;
+ }
+
+ /*
+ * If the block map head will advance, we must flush any block map page modified by the
+ * entries we are reaping. If the slab journal head will advance, we must flush the slab
+ * summary update covering the slab journal that just released some lock.
+ */
+ journal->reaping = true;
+ vdo_submit_flush_vio(journal->flush_vio, flush_endio, handle_flush_error);
+}
+
+/**
+ * vdo_acquire_recovery_journal_block_reference() - Acquire a reference to a recovery journal block
+ * from somewhere other than the journal itself.
+ * @journal: The recovery journal.
+ * @sequence_number: The journal sequence number of the referenced block.
+ * @zone_type: The type of the zone making the adjustment.
+ * @zone_id: The ID of the zone making the adjustment.
+ */
+void vdo_acquire_recovery_journal_block_reference(struct recovery_journal *journal,
+ sequence_number_t sequence_number,
+ enum vdo_zone_type zone_type,
+ zone_count_t zone_id)
+{
+ block_count_t lock_number;
+ u16 *current_value;
+
+ if (sequence_number == 0)
+ return;
+
+ VDO_ASSERT_LOG_ONLY((zone_type != VDO_ZONE_TYPE_JOURNAL),
+ "invalid lock count increment from journal zone");
+
+ lock_number = vdo_get_recovery_journal_block_number(journal, sequence_number);
+ current_value = get_counter(journal, lock_number, zone_type, zone_id);
+ VDO_ASSERT_LOG_ONLY(*current_value < U16_MAX,
+ "increment of lock counter must not overflow");
+
+ if (*current_value == 0) {
+ /*
+ * This zone is acquiring this lock for the first time. Extra barriers because this
+ * was original developed using an atomic add operation that implicitly had them.
+ */
+ smp_mb__before_atomic();
+ atomic_inc(get_zone_count_ptr(journal, lock_number, zone_type));
+ /* same as before_atomic */
+ smp_mb__after_atomic();
+ }
+
+ *current_value += 1;
+}
+
+/**
+ * vdo_release_journal_entry_lock() - Release a single per-entry reference count for a recovery
+ * journal block.
+ * @journal: The recovery journal.
+ * @sequence_number: The journal sequence number of the referenced block.
+ */
+void vdo_release_journal_entry_lock(struct recovery_journal *journal,
+ sequence_number_t sequence_number)
+{
+ block_count_t lock_number;
+
+ if (sequence_number == 0)
+ return;
+
+ lock_number = vdo_get_recovery_journal_block_number(journal, sequence_number);
+ /*
+ * Extra barriers because this was originally developed using an atomic add operation that
+ * implicitly had them.
+ */
+ smp_mb__before_atomic();
+ atomic_inc(get_decrement_counter(journal, lock_number));
+ /* same as before_atomic */
+ smp_mb__after_atomic();
+}
+
+/**
+ * initiate_drain() - Initiate a drain.
+ *
+ * Implements vdo_admin_initiator_fn.
+ */
+static void initiate_drain(struct admin_state *state)
+{
+ check_for_drain_complete(container_of(state, struct recovery_journal, state));
+}
+
+/**
+ * vdo_drain_recovery_journal() - Drain recovery journal I/O.
+ * @journal: The journal to drain.
+ * @operation: The drain operation (suspend or save).
+ * @parent: The completion to notify once the journal is drained.
+ *
+ * All uncommitted entries will be written out.
+ */
+void vdo_drain_recovery_journal(struct recovery_journal *journal,
+ const struct admin_state_code *operation,
+ struct vdo_completion *parent)
+{
+ assert_on_journal_thread(journal, __func__);
+ vdo_start_draining(&journal->state, operation, parent, initiate_drain);
+}
+
+/**
+ * resume_lock_counter() - Re-allow notifications from a suspended lock counter.
+ * @counter: The counter.
+ *
+ * Return: true if the lock counter was suspended.
+ */
+static bool resume_lock_counter(struct lock_counter *counter)
+{
+ int prior_state;
+
+ /*
+ * Extra barriers because this was original developed using a CAS operation that implicitly
+ * had them.
+ */
+ smp_mb__before_atomic();
+ prior_state = atomic_cmpxchg(&counter->state, LOCK_COUNTER_STATE_SUSPENDED,
+ LOCK_COUNTER_STATE_NOT_NOTIFYING);
+ /* same as before_atomic */
+ smp_mb__after_atomic();
+
+ return (prior_state == LOCK_COUNTER_STATE_SUSPENDED);
+}
+
+/**
+ * vdo_resume_recovery_journal() - Resume a recovery journal which has been drained.
+ * @journal: The journal to resume.
+ * @parent: The completion to finish once the journal is resumed.
+ */
+void vdo_resume_recovery_journal(struct recovery_journal *journal,
+ struct vdo_completion *parent)
+{
+ bool saved;
+
+ assert_on_journal_thread(journal, __func__);
+ saved = vdo_is_state_saved(&journal->state);
+ vdo_set_completion_result(parent, vdo_resume_if_quiescent(&journal->state));
+ if (is_read_only(journal)) {
+ vdo_continue_completion(parent, VDO_READ_ONLY);
+ return;
+ }
+
+ if (saved)
+ initialize_journal_state(journal);
+
+ if (resume_lock_counter(&journal->lock_counter)) {
+ /* We might have missed a notification. */
+ reap_recovery_journal(journal);
+ }
+
+ vdo_launch_completion(parent);
+}
+
+/**
+ * vdo_get_recovery_journal_logical_blocks_used() - Get the number of logical blocks in use by the
+ * VDO.
+ * @journal: The journal.
+ *
+ * Return: The number of logical blocks in use by the VDO.
+ */
+block_count_t vdo_get_recovery_journal_logical_blocks_used(const struct recovery_journal *journal)
+{
+ return journal->logical_blocks_used;
+}
+
+/**
+ * vdo_get_recovery_journal_statistics() - Get the current statistics from the recovery journal.
+ * @journal: The recovery journal to query.
+ *
+ * Return: A copy of the current statistics for the journal.
+ */
+struct recovery_journal_statistics
+vdo_get_recovery_journal_statistics(const struct recovery_journal *journal)
+{
+ return journal->events;
+}
+
+/**
+ * dump_recovery_block() - Dump the contents of the recovery block to the log.
+ * @block: The block to dump.
+ */
+static void dump_recovery_block(const struct recovery_journal_block *block)
+{
+ vdo_log_info(" sequence number %llu; entries %u; %s; %zu entry waiters; %zu commit waiters",
+ (unsigned long long) block->sequence_number, block->entry_count,
+ (block->committing ? "committing" : "waiting"),
+ vdo_waitq_num_waiters(&block->entry_waiters),
+ vdo_waitq_num_waiters(&block->commit_waiters));
+}
+
+/**
+ * vdo_dump_recovery_journal_statistics() - Dump some current statistics and other debug info from
+ * the recovery journal.
+ * @journal: The recovery journal to dump.
+ */
+void vdo_dump_recovery_journal_statistics(const struct recovery_journal *journal)
+{
+ const struct recovery_journal_block *block;
+ struct recovery_journal_statistics stats = vdo_get_recovery_journal_statistics(journal);
+
+ vdo_log_info("Recovery Journal");
+ vdo_log_info(" block_map_head=%llu slab_journal_head=%llu last_write_acknowledged=%llu tail=%llu block_map_reap_head=%llu slab_journal_reap_head=%llu disk_full=%llu slab_journal_commits_requested=%llu entry_waiters=%zu",
+ (unsigned long long) journal->block_map_head,
+ (unsigned long long) journal->slab_journal_head,
+ (unsigned long long) journal->last_write_acknowledged,
+ (unsigned long long) journal->tail,
+ (unsigned long long) journal->block_map_reap_head,
+ (unsigned long long) journal->slab_journal_reap_head,
+ (unsigned long long) stats.disk_full,
+ (unsigned long long) stats.slab_journal_commits_requested,
+ vdo_waitq_num_waiters(&journal->entry_waiters));
+ vdo_log_info(" entries: started=%llu written=%llu committed=%llu",
+ (unsigned long long) stats.entries.started,
+ (unsigned long long) stats.entries.written,
+ (unsigned long long) stats.entries.committed);
+ vdo_log_info(" blocks: started=%llu written=%llu committed=%llu",
+ (unsigned long long) stats.blocks.started,
+ (unsigned long long) stats.blocks.written,
+ (unsigned long long) stats.blocks.committed);
+
+ vdo_log_info(" active blocks:");
+ list_for_each_entry(block, &journal->active_tail_blocks, list_node)
+ dump_recovery_block(block);
+}
diff --git a/drivers/md/dm-vdo/recovery-journal.h b/drivers/md/dm-vdo/recovery-journal.h
new file mode 100644
index 000000000000..899071173015
--- /dev/null
+++ b/drivers/md/dm-vdo/recovery-journal.h
@@ -0,0 +1,316 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_RECOVERY_JOURNAL_H
+#define VDO_RECOVERY_JOURNAL_H
+
+#include <linux/list.h>
+
+#include "numeric.h"
+
+#include "admin-state.h"
+#include "constants.h"
+#include "encodings.h"
+#include "flush.h"
+#include "statistics.h"
+#include "types.h"
+#include "wait-queue.h"
+
+/**
+ * DOC: recovery journal.
+ *
+ * The recovery_journal provides a log of all block mapping and reference count changes which have
+ * not yet been stably written to the block map or slab journals. This log helps to reduce the
+ * write amplification of writes by providing amortization of slab journal and block map page
+ * updates.
+ *
+ * The recovery journal has a single dedicated queue and thread for performing all journal updates.
+ * The concurrency guarantees of this single-threaded model allow the code to omit more
+ * fine-grained locking for recovery journal structures.
+ *
+ * The journal consists of a set of on-disk blocks arranged as a circular log with monotonically
+ * increasing sequence numbers. Three sequence numbers serve to define the active extent of the
+ * journal. The 'head' is the oldest active block in the journal. The 'tail' is the end of the
+ * half-open interval containing the active blocks. 'active' is the number of the block actively
+ * receiving entries. In an empty journal, head == active == tail. Once any entries are added, tail
+ * = active + 1, and head may be any value in the interval [tail - size, active].
+ *
+ * The journal also contains a set of in-memory blocks which are used to buffer up entries until
+ * they can be committed. In general the number of in-memory blocks ('tail_buffer_count') will be
+ * less than the on-disk size. Each in-memory block is also a vdo_completion. Each in-memory block
+ * has a vio which is used to commit that block to disk. The vio's data is the on-disk
+ * representation of the journal block. In addition each in-memory block has a buffer which is used
+ * to accumulate entries while a partial commit of the block is in progress. In-memory blocks are
+ * kept on two rings. Free blocks live on the 'free_tail_blocks' ring. When a block becomes active
+ * (see below) it is moved to the 'active_tail_blocks' ring. When a block is fully committed, it is
+ * moved back to the 'free_tail_blocks' ring.
+ *
+ * When entries are added to the journal, they are added to the active in-memory block, as
+ * indicated by the 'active_block' field. If the caller wishes to wait for the entry to be
+ * committed, the requesting VIO will be attached to the in-memory block to which the caller's
+ * entry was added. If the caller does wish to wait, or if the entry filled the active block, an
+ * attempt will be made to commit that block to disk. If there is already another commit in
+ * progress, the attempt will be ignored and then automatically retried when the in-progress commit
+ * completes. If there is no commit in progress, any data_vios waiting on the block are transferred
+ * to the block's vio which is then written, automatically waking all of the waiters when it
+ * completes. When the write completes, any entries which accumulated in the block are copied to
+ * the vio's data buffer.
+ *
+ * Finally, the journal maintains a set of counters, one for each on disk journal block. These
+ * counters are used as locks to prevent premature reaping of journal blocks. Each time a new
+ * sequence number is used, the counter for the corresponding block is incremented. The counter is
+ * subsequently decremented when that block is filled and then committed for the last time. This
+ * prevents blocks from being reaped while they are still being updated. The counter is also
+ * incremented once for each entry added to a block, and decremented once each time the block map
+ * is updated in memory for that request. This prevents blocks from being reaped while their VIOs
+ * are still active. Finally, each in-memory block map page tracks the oldest journal block that
+ * contains entries corresponding to uncommitted updates to that block map page. Each time an
+ * in-memory block map page is updated, it checks if the journal block for the VIO is earlier than
+ * the one it references, in which case it increments the count on the earlier journal block and
+ * decrements the count on the later journal block, maintaining a lock on the oldest journal block
+ * containing entries for that page. When a block map page has been flushed from the cache, the
+ * counter for the journal block it references is decremented. Whenever the counter for the head
+ * block goes to 0, the head is advanced until it comes to a block whose counter is not 0 or until
+ * it reaches the active block. This is the mechanism for reclaiming journal space on disk.
+ *
+ * If there is no in-memory space when a VIO attempts to add an entry, the VIO will be attached to
+ * the 'commit_completion' and will be woken the next time a full block has committed. If there is
+ * no on-disk space when a VIO attempts to add an entry, the VIO will be attached to the
+ * 'reap_completion', and will be woken the next time a journal block is reaped.
+ */
+
+enum vdo_zone_type {
+ VDO_ZONE_TYPE_ADMIN,
+ VDO_ZONE_TYPE_JOURNAL,
+ VDO_ZONE_TYPE_LOGICAL,
+ VDO_ZONE_TYPE_PHYSICAL,
+};
+
+struct lock_counter {
+ /* The completion for notifying the owner of a lock release */
+ struct vdo_completion completion;
+ /* The number of logical zones which may hold locks */
+ zone_count_t logical_zones;
+ /* The number of physical zones which may hold locks */
+ zone_count_t physical_zones;
+ /* The number of locks */
+ block_count_t locks;
+ /* Whether the lock release notification is in flight */
+ atomic_t state;
+ /* The number of logical zones which hold each lock */
+ atomic_t *logical_zone_counts;
+ /* The number of physical zones which hold each lock */
+ atomic_t *physical_zone_counts;
+ /* The per-lock counts for the journal zone */
+ u16 *journal_counters;
+ /* The per-lock decrement counts for the journal zone */
+ atomic_t *journal_decrement_counts;
+ /* The per-zone, per-lock reference counts for logical zones */
+ u16 *logical_counters;
+ /* The per-zone, per-lock reference counts for physical zones */
+ u16 *physical_counters;
+};
+
+struct recovery_journal_block {
+ /* The doubly linked pointers for the free or active lists */
+ struct list_head list_node;
+ /* The waiter for the pending full block list */
+ struct vdo_waiter write_waiter;
+ /* The journal to which this block belongs */
+ struct recovery_journal *journal;
+ /* A pointer to the current sector in the packed block buffer */
+ struct packed_journal_sector *sector;
+ /* The vio for writing this block */
+ struct vio vio;
+ /* The sequence number for this block */
+ sequence_number_t sequence_number;
+ /* The location of this block in the on-disk journal */
+ physical_block_number_t block_number;
+ /* Whether this block is being committed */
+ bool committing;
+ /* The total number of entries in this block */
+ journal_entry_count_t entry_count;
+ /* The total number of uncommitted entries (queued or committing) */
+ journal_entry_count_t uncommitted_entry_count;
+ /* The number of new entries in the current commit */
+ journal_entry_count_t entries_in_commit;
+ /* The queue of vios which will make entries for the next commit */
+ struct vdo_wait_queue entry_waiters;
+ /* The queue of vios waiting for the current commit */
+ struct vdo_wait_queue commit_waiters;
+};
+
+struct recovery_journal {
+ /* The thread ID of the journal zone */
+ thread_id_t thread_id;
+ /* The slab depot which can hold locks on this journal */
+ struct slab_depot *depot;
+ /* The block map which can hold locks on this journal */
+ struct block_map *block_map;
+ /* The queue of vios waiting to make entries */
+ struct vdo_wait_queue entry_waiters;
+ /* The number of free entries in the journal */
+ u64 available_space;
+ /* The number of decrement entries which need to be made */
+ data_vio_count_t pending_decrement_count;
+ /* Whether the journal is adding entries from the increment or decrement waiters queues */
+ bool adding_entries;
+ /* The administrative state of the journal */
+ struct admin_state state;
+ /* Whether a reap is in progress */
+ bool reaping;
+ /* The location of the first journal block */
+ physical_block_number_t origin;
+ /* The oldest active block in the journal on disk for block map rebuild */
+ sequence_number_t block_map_head;
+ /* The oldest active block in the journal on disk for slab journal replay */
+ sequence_number_t slab_journal_head;
+ /* The newest block in the journal on disk to which a write has finished */
+ sequence_number_t last_write_acknowledged;
+ /* The end of the half-open interval of the active journal */
+ sequence_number_t tail;
+ /* The point at which the last entry will have been added */
+ struct journal_point append_point;
+ /* The journal point of the vio most recently released from the journal */
+ struct journal_point commit_point;
+ /* The nonce of the VDO */
+ nonce_t nonce;
+ /* The number of recoveries completed by the VDO */
+ u8 recovery_count;
+ /* The number of entries which fit in a single block */
+ journal_entry_count_t entries_per_block;
+ /* Unused in-memory journal blocks */
+ struct list_head free_tail_blocks;
+ /* In-memory journal blocks with records */
+ struct list_head active_tail_blocks;
+ /* A pointer to the active block (the one we are adding entries to now) */
+ struct recovery_journal_block *active_block;
+ /* Journal blocks that need writing */
+ struct vdo_wait_queue pending_writes;
+ /* The new block map reap head after reaping */
+ sequence_number_t block_map_reap_head;
+ /* The head block number for the block map rebuild range */
+ block_count_t block_map_head_block_number;
+ /* The new slab journal reap head after reaping */
+ sequence_number_t slab_journal_reap_head;
+ /* The head block number for the slab journal replay range */
+ block_count_t slab_journal_head_block_number;
+ /* The data-less vio, usable only for flushing */
+ struct vio *flush_vio;
+ /* The number of blocks in the on-disk journal */
+ block_count_t size;
+ /* The number of logical blocks that are in-use */
+ block_count_t logical_blocks_used;
+ /* The number of block map pages that are allocated */
+ block_count_t block_map_data_blocks;
+ /* The number of journal blocks written but not yet acknowledged */
+ block_count_t pending_write_count;
+ /* The threshold at which slab journal tail blocks will be written out */
+ block_count_t slab_journal_commit_threshold;
+ /* Counters for events in the journal that are reported as statistics */
+ struct recovery_journal_statistics events;
+ /* The locks for each on-disk block */
+ struct lock_counter lock_counter;
+ /* The tail blocks */
+ struct recovery_journal_block blocks[];
+};
+
+/**
+ * vdo_get_recovery_journal_block_number() - Get the physical block number for a given sequence
+ * number.
+ * @journal: The journal.
+ * @sequence: The sequence number of the desired block.
+ *
+ * Return: The block number corresponding to the sequence number.
+ */
+static inline physical_block_number_t __must_check
+vdo_get_recovery_journal_block_number(const struct recovery_journal *journal,
+ sequence_number_t sequence)
+{
+ /*
+ * Since journal size is a power of two, the block number modulus can just be extracted
+ * from the low-order bits of the sequence.
+ */
+ return vdo_compute_recovery_journal_block_number(journal->size, sequence);
+}
+
+/**
+ * vdo_compute_recovery_journal_check_byte() - Compute the check byte for a given sequence number.
+ * @journal: The journal.
+ * @sequence: The sequence number.
+ *
+ * Return: The check byte corresponding to the sequence number.
+ */
+static inline u8 __must_check
+vdo_compute_recovery_journal_check_byte(const struct recovery_journal *journal,
+ sequence_number_t sequence)
+{
+ /* The check byte must change with each trip around the journal. */
+ return (((sequence / journal->size) & 0x7F) | 0x80);
+}
+
+int __must_check vdo_decode_recovery_journal(struct recovery_journal_state_7_0 state,
+ nonce_t nonce, struct vdo *vdo,
+ struct partition *partition,
+ u64 recovery_count,
+ block_count_t journal_size,
+ struct recovery_journal **journal_ptr);
+
+void vdo_free_recovery_journal(struct recovery_journal *journal);
+
+void vdo_initialize_recovery_journal_post_repair(struct recovery_journal *journal,
+ u64 recovery_count,
+ sequence_number_t tail,
+ block_count_t logical_blocks_used,
+ block_count_t block_map_data_blocks);
+
+block_count_t __must_check
+vdo_get_journal_block_map_data_blocks_used(struct recovery_journal *journal);
+
+thread_id_t __must_check vdo_get_recovery_journal_thread_id(struct recovery_journal *journal);
+
+void vdo_open_recovery_journal(struct recovery_journal *journal,
+ struct slab_depot *depot, struct block_map *block_map);
+
+sequence_number_t
+vdo_get_recovery_journal_current_sequence_number(struct recovery_journal *journal);
+
+block_count_t __must_check vdo_get_recovery_journal_length(block_count_t journal_size);
+
+struct recovery_journal_state_7_0 __must_check
+vdo_record_recovery_journal(const struct recovery_journal *journal);
+
+void vdo_add_recovery_journal_entry(struct recovery_journal *journal,
+ struct data_vio *data_vio);
+
+void vdo_acquire_recovery_journal_block_reference(struct recovery_journal *journal,
+ sequence_number_t sequence_number,
+ enum vdo_zone_type zone_type,
+ zone_count_t zone_id);
+
+void vdo_release_recovery_journal_block_reference(struct recovery_journal *journal,
+ sequence_number_t sequence_number,
+ enum vdo_zone_type zone_type,
+ zone_count_t zone_id);
+
+void vdo_release_journal_entry_lock(struct recovery_journal *journal,
+ sequence_number_t sequence_number);
+
+void vdo_drain_recovery_journal(struct recovery_journal *journal,
+ const struct admin_state_code *operation,
+ struct vdo_completion *parent);
+
+void vdo_resume_recovery_journal(struct recovery_journal *journal,
+ struct vdo_completion *parent);
+
+block_count_t __must_check
+vdo_get_recovery_journal_logical_blocks_used(const struct recovery_journal *journal);
+
+struct recovery_journal_statistics __must_check
+vdo_get_recovery_journal_statistics(const struct recovery_journal *journal);
+
+void vdo_dump_recovery_journal_statistics(const struct recovery_journal *journal);
+
+#endif /* VDO_RECOVERY_JOURNAL_H */
diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c
new file mode 100644
index 000000000000..defc9359f10e
--- /dev/null
+++ b/drivers/md/dm-vdo/repair.c
@@ -0,0 +1,1756 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "repair.h"
+
+#include <linux/min_heap.h>
+#include <linux/minmax.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "block-map.h"
+#include "completion.h"
+#include "constants.h"
+#include "encodings.h"
+#include "int-map.h"
+#include "io-submitter.h"
+#include "recovery-journal.h"
+#include "slab-depot.h"
+#include "types.h"
+#include "vdo.h"
+#include "wait-queue.h"
+
+/*
+ * An explicitly numbered block mapping. Numbering the mappings allows them to be sorted by logical
+ * block number during repair while still preserving the relative order of journal entries with
+ * the same logical block number.
+ */
+struct numbered_block_mapping {
+ struct block_map_slot block_map_slot;
+ struct block_map_entry block_map_entry;
+ /* A serial number to use during replay */
+ u32 number;
+} __packed;
+
+/*
+ * The absolute position of an entry in the recovery journal, including the sector number and the
+ * entry number within the sector.
+ */
+struct recovery_point {
+ /* Block sequence number */
+ sequence_number_t sequence_number;
+ /* Sector number */
+ u8 sector_count;
+ /* Entry number */
+ journal_entry_count_t entry_count;
+ /* Whether or not the increment portion of the current entry has been applied */
+ bool increment_applied;
+};
+
+struct repair_completion {
+ /* The completion header */
+ struct vdo_completion completion;
+
+ /* A buffer to hold the data read off disk */
+ char *journal_data;
+
+ /* For loading the journal */
+ data_vio_count_t vio_count;
+ data_vio_count_t vios_complete;
+ struct vio *vios;
+
+ /* The number of entries to be applied to the block map */
+ size_t block_map_entry_count;
+ /* The sequence number of the first valid block for block map recovery */
+ sequence_number_t block_map_head;
+ /* The sequence number of the first valid block for slab journal replay */
+ sequence_number_t slab_journal_head;
+ /* The sequence number of the last valid block of the journal (if known) */
+ sequence_number_t tail;
+ /*
+ * The highest sequence number of the journal. During recovery (vs read-only rebuild), not
+ * the same as the tail, since the tail ignores blocks after the first hole.
+ */
+ sequence_number_t highest_tail;
+
+ /* The number of logical blocks currently known to be in use */
+ block_count_t logical_blocks_used;
+ /* The number of block map data blocks known to be allocated */
+ block_count_t block_map_data_blocks;
+
+ /* These fields are for playing the journal into the block map */
+ /* The entry data for the block map recovery */
+ struct numbered_block_mapping *entries;
+ /* The number of entries in the entry array */
+ size_t entry_count;
+ /* number of pending (non-ready) requests*/
+ page_count_t outstanding;
+ /* number of page completions */
+ page_count_t page_count;
+ bool launching;
+ /*
+ * a heap wrapping journal_entries. It re-orders and sorts journal entries in ascending LBN
+ * order, then original journal order. This permits efficient iteration over the journal
+ * entries in order.
+ */
+ struct min_heap replay_heap;
+ /* Fields tracking progress through the journal entries. */
+ struct numbered_block_mapping *current_entry;
+ struct numbered_block_mapping *current_unfetched_entry;
+ /* Current requested page's PBN */
+ physical_block_number_t pbn;
+
+ /* These fields are only used during recovery. */
+ /* A location just beyond the last valid entry of the journal */
+ struct recovery_point tail_recovery_point;
+ /* The location of the next recovery journal entry to apply */
+ struct recovery_point next_recovery_point;
+ /* The journal point to give to the next synthesized decref */
+ struct journal_point next_journal_point;
+ /* The number of entries played into slab journals */
+ size_t entries_added_to_slab_journals;
+
+ /* These fields are only used during read-only rebuild */
+ page_count_t page_to_fetch;
+ /* the number of leaf pages in the block map */
+ page_count_t leaf_pages;
+ /* the last slot of the block map */
+ struct block_map_slot last_slot;
+
+ /*
+ * The page completions used for playing the journal into the block map, and, during
+ * read-only rebuild, for rebuilding the reference counts from the block map.
+ */
+ struct vdo_page_completion page_completions[];
+};
+
+/*
+ * This is a min_heap callback function that orders numbered_block_mappings using the
+ * 'block_map_slot' field as the primary key and the mapping 'number' field as the secondary key.
+ * Using the mapping number preserves the journal order of entries for the same slot, allowing us
+ * to sort by slot while still ensuring we replay all entries with the same slot in the exact order
+ * as they appeared in the journal.
+ */
+static bool mapping_is_less_than(const void *item1, const void *item2)
+{
+ const struct numbered_block_mapping *mapping1 =
+ (const struct numbered_block_mapping *) item1;
+ const struct numbered_block_mapping *mapping2 =
+ (const struct numbered_block_mapping *) item2;
+
+ if (mapping1->block_map_slot.pbn != mapping2->block_map_slot.pbn)
+ return mapping1->block_map_slot.pbn < mapping2->block_map_slot.pbn;
+
+ if (mapping1->block_map_slot.slot != mapping2->block_map_slot.slot)
+ return mapping1->block_map_slot.slot < mapping2->block_map_slot.slot;
+
+ if (mapping1->number != mapping2->number)
+ return mapping1->number < mapping2->number;
+
+ return 0;
+}
+
+static void swap_mappings(void *item1, void *item2)
+{
+ struct numbered_block_mapping *mapping1 = item1;
+ struct numbered_block_mapping *mapping2 = item2;
+
+ swap(*mapping1, *mapping2);
+}
+
+static const struct min_heap_callbacks repair_min_heap = {
+ .elem_size = sizeof(struct numbered_block_mapping),
+ .less = mapping_is_less_than,
+ .swp = swap_mappings,
+};
+
+static struct numbered_block_mapping *sort_next_heap_element(struct repair_completion *repair)
+{
+ struct min_heap *heap = &repair->replay_heap;
+ struct numbered_block_mapping *last;
+
+ if (heap->nr == 0)
+ return NULL;
+
+ /*
+ * Swap the next heap element with the last one on the heap, popping it off the heap,
+ * restore the heap invariant, and return a pointer to the popped element.
+ */
+ last = &repair->entries[--heap->nr];
+ swap_mappings(heap->data, last);
+ min_heapify(heap, 0, &repair_min_heap);
+ return last;
+}
+
+/**
+ * as_repair_completion() - Convert a generic completion to a repair_completion.
+ * @completion: The completion to convert.
+ *
+ * Return: The repair_completion.
+ */
+static inline struct repair_completion * __must_check
+as_repair_completion(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_REPAIR_COMPLETION);
+ return container_of(completion, struct repair_completion, completion);
+}
+
+static void prepare_repair_completion(struct repair_completion *repair,
+ vdo_action_fn callback, enum vdo_zone_type zone_type)
+{
+ struct vdo_completion *completion = &repair->completion;
+ const struct thread_config *thread_config = &completion->vdo->thread_config;
+ thread_id_t thread_id;
+
+ /* All blockmap access is done on single thread, so use logical zone 0. */
+ thread_id = ((zone_type == VDO_ZONE_TYPE_LOGICAL) ?
+ thread_config->logical_threads[0] :
+ thread_config->admin_thread);
+ vdo_reset_completion(completion);
+ vdo_set_completion_callback(completion, callback, thread_id);
+}
+
+static void launch_repair_completion(struct repair_completion *repair,
+ vdo_action_fn callback, enum vdo_zone_type zone_type)
+{
+ prepare_repair_completion(repair, callback, zone_type);
+ vdo_launch_completion(&repair->completion);
+}
+
+static void uninitialize_vios(struct repair_completion *repair)
+{
+ while (repair->vio_count > 0)
+ free_vio_components(&repair->vios[--repair->vio_count]);
+
+ vdo_free(vdo_forget(repair->vios));
+}
+
+static void free_repair_completion(struct repair_completion *repair)
+{
+ if (repair == NULL)
+ return;
+
+ /*
+ * We do this here because this function is the only common bottleneck for all clean up
+ * paths.
+ */
+ repair->completion.vdo->block_map->zones[0].page_cache.rebuilding = false;
+
+ uninitialize_vios(repair);
+ vdo_free(vdo_forget(repair->journal_data));
+ vdo_free(vdo_forget(repair->entries));
+ vdo_free(repair);
+}
+
+static void finish_repair(struct vdo_completion *completion)
+{
+ struct vdo_completion *parent = completion->parent;
+ struct vdo *vdo = completion->vdo;
+ struct repair_completion *repair = as_repair_completion(completion);
+
+ vdo_assert_on_admin_thread(vdo, __func__);
+
+ if (vdo->load_state != VDO_REBUILD_FOR_UPGRADE)
+ vdo->states.vdo.complete_recoveries++;
+
+ vdo_initialize_recovery_journal_post_repair(vdo->recovery_journal,
+ vdo->states.vdo.complete_recoveries,
+ repair->highest_tail,
+ repair->logical_blocks_used,
+ repair->block_map_data_blocks);
+ free_repair_completion(vdo_forget(repair));
+
+ if (vdo_state_requires_read_only_rebuild(vdo->load_state)) {
+ vdo_log_info("Read-only rebuild complete");
+ vdo_launch_completion(parent);
+ return;
+ }
+
+ /* FIXME: shouldn't this say either "recovery" or "repair"? */
+ vdo_log_info("Rebuild complete");
+
+ /*
+ * Now that we've freed the repair completion and its vast array of journal entries, we
+ * can allocate refcounts.
+ */
+ vdo_continue_completion(parent, vdo_allocate_reference_counters(vdo->depot));
+}
+
+/**
+ * abort_repair() - Handle a repair error.
+ * @completion: The repair completion.
+ */
+static void abort_repair(struct vdo_completion *completion)
+{
+ struct vdo_completion *parent = completion->parent;
+ int result = completion->result;
+ struct repair_completion *repair = as_repair_completion(completion);
+
+ if (vdo_state_requires_read_only_rebuild(completion->vdo->load_state))
+ vdo_log_info("Read-only rebuild aborted");
+ else
+ vdo_log_warning("Recovery aborted");
+
+ free_repair_completion(vdo_forget(repair));
+ vdo_continue_completion(parent, result);
+}
+
+/**
+ * abort_on_error() - Abort a repair if there is an error.
+ * @result: The result to check.
+ * @repair: The repair completion.
+ *
+ * Return: true if the result was an error.
+ */
+static bool __must_check abort_on_error(int result, struct repair_completion *repair)
+{
+ if (result == VDO_SUCCESS)
+ return false;
+
+ vdo_fail_completion(&repair->completion, result);
+ return true;
+}
+
+/**
+ * drain_slab_depot() - Flush out all dirty refcounts blocks now that they have been rebuilt or
+ * recovered.
+ */
+static void drain_slab_depot(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+ struct repair_completion *repair = as_repair_completion(completion);
+ const struct admin_state_code *operation;
+
+ vdo_assert_on_admin_thread(vdo, __func__);
+
+ prepare_repair_completion(repair, finish_repair, VDO_ZONE_TYPE_ADMIN);
+ if (vdo_state_requires_read_only_rebuild(vdo->load_state)) {
+ vdo_log_info("Saving rebuilt state");
+ operation = VDO_ADMIN_STATE_REBUILDING;
+ } else {
+ vdo_log_info("Replayed %zu journal entries into slab journals",
+ repair->entries_added_to_slab_journals);
+ operation = VDO_ADMIN_STATE_RECOVERING;
+ }
+
+ vdo_drain_slab_depot(vdo->depot, operation, completion);
+}
+
+/**
+ * flush_block_map_updates() - Flush the block map now that all the reference counts are rebuilt.
+ * @completion: The repair completion.
+ *
+ * This callback is registered in finish_if_done().
+ */
+static void flush_block_map_updates(struct vdo_completion *completion)
+{
+ vdo_assert_on_admin_thread(completion->vdo, __func__);
+
+ vdo_log_info("Flushing block map changes");
+ prepare_repair_completion(as_repair_completion(completion), drain_slab_depot,
+ VDO_ZONE_TYPE_ADMIN);
+ vdo_drain_block_map(completion->vdo->block_map, VDO_ADMIN_STATE_RECOVERING,
+ completion);
+}
+
+static bool fetch_page(struct repair_completion *repair,
+ struct vdo_completion *completion);
+
+/**
+ * handle_page_load_error() - Handle an error loading a page.
+ * @completion: The vdo_page_completion.
+ */
+static void handle_page_load_error(struct vdo_completion *completion)
+{
+ struct repair_completion *repair = completion->parent;
+
+ repair->outstanding--;
+ vdo_set_completion_result(&repair->completion, completion->result);
+ vdo_release_page_completion(completion);
+ fetch_page(repair, completion);
+}
+
+/**
+ * unmap_entry() - Unmap an invalid entry and indicate that its page must be written out.
+ * @page: The page containing the entries
+ * @completion: The page_completion for writing the page
+ * @slot: The slot to unmap
+ */
+static void unmap_entry(struct block_map_page *page, struct vdo_completion *completion,
+ slot_number_t slot)
+{
+ page->entries[slot] = UNMAPPED_BLOCK_MAP_ENTRY;
+ vdo_request_page_write(completion);
+}
+
+/**
+ * remove_out_of_bounds_entries() - Unmap entries which outside the logical space.
+ * @page: The page containing the entries
+ * @completion: The page_completion for writing the page
+ * @start: The first slot to check
+ */
+static void remove_out_of_bounds_entries(struct block_map_page *page,
+ struct vdo_completion *completion,
+ slot_number_t start)
+{
+ slot_number_t slot;
+
+ for (slot = start; slot < VDO_BLOCK_MAP_ENTRIES_PER_PAGE; slot++) {
+ struct data_location mapping = vdo_unpack_block_map_entry(&page->entries[slot]);
+
+ if (vdo_is_mapped_location(&mapping))
+ unmap_entry(page, completion, slot);
+ }
+}
+
+/**
+ * process_slot() - Update the reference counts for a single entry.
+ * @page: The page containing the entries
+ * @completion: The page_completion for writing the page
+ * @slot: The slot to check
+ *
+ * Return: true if the entry was a valid mapping
+ */
+static bool process_slot(struct block_map_page *page, struct vdo_completion *completion,
+ slot_number_t slot)
+{
+ struct slab_depot *depot = completion->vdo->depot;
+ int result;
+ struct data_location mapping = vdo_unpack_block_map_entry(&page->entries[slot]);
+
+ if (!vdo_is_valid_location(&mapping)) {
+ /* This entry is invalid, so remove it from the page. */
+ unmap_entry(page, completion, slot);
+ return false;
+ }
+
+ if (!vdo_is_mapped_location(&mapping))
+ return false;
+
+
+ if (mapping.pbn == VDO_ZERO_BLOCK)
+ return true;
+
+ if (!vdo_is_physical_data_block(depot, mapping.pbn)) {
+ /*
+ * This is a nonsense mapping. Remove it from the map so we're at least consistent
+ * and mark the page dirty.
+ */
+ unmap_entry(page, completion, slot);
+ return false;
+ }
+
+ result = vdo_adjust_reference_count_for_rebuild(depot, mapping.pbn,
+ VDO_JOURNAL_DATA_REMAPPING);
+ if (result == VDO_SUCCESS)
+ return true;
+
+ vdo_log_error_strerror(result,
+ "Could not adjust reference count for PBN %llu, slot %u mapped to PBN %llu",
+ (unsigned long long) vdo_get_block_map_page_pbn(page),
+ slot, (unsigned long long) mapping.pbn);
+ unmap_entry(page, completion, slot);
+ return false;
+}
+
+/**
+ * rebuild_reference_counts_from_page() - Rebuild reference counts from a block map page.
+ * @repair: The repair completion.
+ * @completion: The page completion holding the page.
+ */
+static void rebuild_reference_counts_from_page(struct repair_completion *repair,
+ struct vdo_completion *completion)
+{
+ slot_number_t slot, last_slot;
+ struct block_map_page *page;
+ int result;
+
+ result = vdo_get_cached_page(completion, &page);
+ if (result != VDO_SUCCESS) {
+ vdo_set_completion_result(&repair->completion, result);
+ return;
+ }
+
+ if (!page->header.initialized)
+ return;
+
+ /* Remove any bogus entries which exist beyond the end of the logical space. */
+ if (vdo_get_block_map_page_pbn(page) == repair->last_slot.pbn) {
+ last_slot = repair->last_slot.slot;
+ remove_out_of_bounds_entries(page, completion, last_slot);
+ } else {
+ last_slot = VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
+ }
+
+ /* Inform the slab depot of all entries on this page. */
+ for (slot = 0; slot < last_slot; slot++) {
+ if (process_slot(page, completion, slot))
+ repair->logical_blocks_used++;
+ }
+}
+
+/**
+ * page_loaded() - Process a page which has just been loaded.
+ * @completion: The vdo_page_completion for the fetched page.
+ *
+ * This callback is registered by fetch_page().
+ */
+static void page_loaded(struct vdo_completion *completion)
+{
+ struct repair_completion *repair = completion->parent;
+
+ repair->outstanding--;
+ rebuild_reference_counts_from_page(repair, completion);
+ vdo_release_page_completion(completion);
+
+ /* Advance progress to the next page, and fetch the next page we haven't yet requested. */
+ fetch_page(repair, completion);
+}
+
+static physical_block_number_t get_pbn_to_fetch(struct repair_completion *repair,
+ struct block_map *block_map)
+{
+ physical_block_number_t pbn = VDO_ZERO_BLOCK;
+
+ if (repair->completion.result != VDO_SUCCESS)
+ return VDO_ZERO_BLOCK;
+
+ while ((pbn == VDO_ZERO_BLOCK) && (repair->page_to_fetch < repair->leaf_pages))
+ pbn = vdo_find_block_map_page_pbn(block_map, repair->page_to_fetch++);
+
+ if (vdo_is_physical_data_block(repair->completion.vdo->depot, pbn))
+ return pbn;
+
+ vdo_set_completion_result(&repair->completion, VDO_BAD_MAPPING);
+ return VDO_ZERO_BLOCK;
+}
+
+/**
+ * fetch_page() - Fetch a page from the block map.
+ * @repair: The repair_completion.
+ * @completion: The page completion to use.
+ *
+ * Return true if the rebuild is complete
+ */
+static bool fetch_page(struct repair_completion *repair,
+ struct vdo_completion *completion)
+{
+ struct vdo_page_completion *page_completion = (struct vdo_page_completion *) completion;
+ struct block_map *block_map = repair->completion.vdo->block_map;
+ physical_block_number_t pbn = get_pbn_to_fetch(repair, block_map);
+
+ if (pbn != VDO_ZERO_BLOCK) {
+ repair->outstanding++;
+ /*
+ * We must set the requeue flag here to ensure that we don't blow the stack if all
+ * the requested pages are already in the cache or get load errors.
+ */
+ vdo_get_page(page_completion, &block_map->zones[0], pbn, true, repair,
+ page_loaded, handle_page_load_error, true);
+ }
+
+ if (repair->outstanding > 0)
+ return false;
+
+ launch_repair_completion(repair, flush_block_map_updates, VDO_ZONE_TYPE_ADMIN);
+ return true;
+}
+
+/**
+ * rebuild_from_leaves() - Rebuild reference counts from the leaf block map pages.
+ * @completion: The repair completion.
+ *
+ * Rebuilds reference counts from the leaf block map pages now that reference counts have been
+ * rebuilt from the interior tree pages (which have been loaded in the process). This callback is
+ * registered in rebuild_reference_counts().
+ */
+static void rebuild_from_leaves(struct vdo_completion *completion)
+{
+ page_count_t i;
+ struct repair_completion *repair = as_repair_completion(completion);
+ struct block_map *map = completion->vdo->block_map;
+
+ repair->logical_blocks_used = 0;
+
+ /*
+ * The PBN calculation doesn't work until the tree pages have been loaded, so we can't set
+ * this value at the start of repair.
+ */
+ repair->leaf_pages = vdo_compute_block_map_page_count(map->entry_count);
+ repair->last_slot = (struct block_map_slot) {
+ .slot = map->entry_count % VDO_BLOCK_MAP_ENTRIES_PER_PAGE,
+ .pbn = vdo_find_block_map_page_pbn(map, repair->leaf_pages - 1),
+ };
+ if (repair->last_slot.slot == 0)
+ repair->last_slot.slot = VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
+
+ for (i = 0; i < repair->page_count; i++) {
+ if (fetch_page(repair, &repair->page_completions[i].completion)) {
+ /*
+ * The rebuild has already moved on, so it isn't safe nor is there a need
+ * to launch any more fetches.
+ */
+ return;
+ }
+ }
+}
+
+/**
+ * process_entry() - Process a single entry from the block map tree.
+ * @pbn: A pbn which holds a block map tree page.
+ * @completion: The parent completion of the traversal.
+ *
+ * Implements vdo_entry_callback_fn.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int process_entry(physical_block_number_t pbn, struct vdo_completion *completion)
+{
+ struct repair_completion *repair = as_repair_completion(completion);
+ struct slab_depot *depot = completion->vdo->depot;
+ int result;
+
+ if ((pbn == VDO_ZERO_BLOCK) || !vdo_is_physical_data_block(depot, pbn)) {
+ return vdo_log_error_strerror(VDO_BAD_CONFIGURATION,
+ "PBN %llu out of range",
+ (unsigned long long) pbn);
+ }
+
+ result = vdo_adjust_reference_count_for_rebuild(depot, pbn,
+ VDO_JOURNAL_BLOCK_MAP_REMAPPING);
+ if (result != VDO_SUCCESS) {
+ return vdo_log_error_strerror(result,
+ "Could not adjust reference count for block map tree PBN %llu",
+ (unsigned long long) pbn);
+ }
+
+ repair->block_map_data_blocks++;
+ return VDO_SUCCESS;
+}
+
+static void rebuild_reference_counts(struct vdo_completion *completion)
+{
+ struct repair_completion *repair = as_repair_completion(completion);
+ struct vdo *vdo = completion->vdo;
+ struct vdo_page_cache *cache = &vdo->block_map->zones[0].page_cache;
+
+ /* We must allocate ref_counts before we can rebuild them. */
+ if (abort_on_error(vdo_allocate_reference_counters(vdo->depot), repair))
+ return;
+
+ /*
+ * Completion chaining from page cache hits can lead to stack overflow during the rebuild,
+ * so clear out the cache before this rebuild phase.
+ */
+ if (abort_on_error(vdo_invalidate_page_cache(cache), repair))
+ return;
+
+ prepare_repair_completion(repair, rebuild_from_leaves, VDO_ZONE_TYPE_LOGICAL);
+ vdo_traverse_forest(vdo->block_map, process_entry, completion);
+}
+
+/**
+ * increment_recovery_point() - Move the given recovery point forward by one entry.
+ */
+static void increment_recovery_point(struct recovery_point *point)
+{
+ if (++point->entry_count < RECOVERY_JOURNAL_ENTRIES_PER_SECTOR)
+ return;
+
+ point->entry_count = 0;
+ if (point->sector_count < (VDO_SECTORS_PER_BLOCK - 1)) {
+ point->sector_count++;
+ return;
+ }
+
+ point->sequence_number++;
+ point->sector_count = 1;
+}
+
+/**
+ * advance_points() - Advance the current recovery and journal points.
+ * @repair: The repair_completion whose points are to be advanced.
+ * @entries_per_block: The number of entries in a recovery journal block.
+ */
+static void advance_points(struct repair_completion *repair,
+ journal_entry_count_t entries_per_block)
+{
+ if (!repair->next_recovery_point.increment_applied) {
+ repair->next_recovery_point.increment_applied = true;
+ return;
+ }
+
+ increment_recovery_point(&repair->next_recovery_point);
+ vdo_advance_journal_point(&repair->next_journal_point, entries_per_block);
+ repair->next_recovery_point.increment_applied = false;
+}
+
+/**
+ * before_recovery_point() - Check whether the first point precedes the second point.
+ * @first: The first recovery point.
+ * @second: The second recovery point.
+ *
+ * Return: true if the first point precedes the second point.
+ */
+static bool __must_check before_recovery_point(const struct recovery_point *first,
+ const struct recovery_point *second)
+{
+ if (first->sequence_number < second->sequence_number)
+ return true;
+
+ if (first->sequence_number > second->sequence_number)
+ return false;
+
+ if (first->sector_count < second->sector_count)
+ return true;
+
+ return ((first->sector_count == second->sector_count) &&
+ (first->entry_count < second->entry_count));
+}
+
+static struct packed_journal_sector * __must_check get_sector(struct recovery_journal *journal,
+ char *journal_data,
+ sequence_number_t sequence,
+ u8 sector_number)
+{
+ off_t offset;
+
+ offset = ((vdo_get_recovery_journal_block_number(journal, sequence) * VDO_BLOCK_SIZE) +
+ (VDO_SECTOR_SIZE * sector_number));
+ return (struct packed_journal_sector *) (journal_data + offset);
+}
+
+/**
+ * get_entry() - Unpack the recovery journal entry associated with the given recovery point.
+ * @repair: The repair completion.
+ * @point: The recovery point.
+ *
+ * Return: The unpacked contents of the matching recovery journal entry.
+ */
+static struct recovery_journal_entry get_entry(const struct repair_completion *repair,
+ const struct recovery_point *point)
+{
+ struct packed_journal_sector *sector;
+
+ sector = get_sector(repair->completion.vdo->recovery_journal,
+ repair->journal_data, point->sequence_number,
+ point->sector_count);
+ return vdo_unpack_recovery_journal_entry(&sector->entries[point->entry_count]);
+}
+
+/**
+ * validate_recovery_journal_entry() - Validate a recovery journal entry.
+ * @vdo: The vdo.
+ * @entry: The entry to validate.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int validate_recovery_journal_entry(const struct vdo *vdo,
+ const struct recovery_journal_entry *entry)
+{
+ if ((entry->slot.pbn >= vdo->states.vdo.config.physical_blocks) ||
+ (entry->slot.slot >= VDO_BLOCK_MAP_ENTRIES_PER_PAGE) ||
+ !vdo_is_valid_location(&entry->mapping) ||
+ !vdo_is_valid_location(&entry->unmapping) ||
+ !vdo_is_physical_data_block(vdo->depot, entry->mapping.pbn) ||
+ !vdo_is_physical_data_block(vdo->depot, entry->unmapping.pbn)) {
+ return vdo_log_error_strerror(VDO_CORRUPT_JOURNAL,
+ "Invalid entry: %s (%llu, %u) from %llu to %llu is not within bounds",
+ vdo_get_journal_operation_name(entry->operation),
+ (unsigned long long) entry->slot.pbn,
+ entry->slot.slot,
+ (unsigned long long) entry->unmapping.pbn,
+ (unsigned long long) entry->mapping.pbn);
+ }
+
+ if ((entry->operation == VDO_JOURNAL_BLOCK_MAP_REMAPPING) &&
+ (vdo_is_state_compressed(entry->mapping.state) ||
+ (entry->mapping.pbn == VDO_ZERO_BLOCK) ||
+ (entry->unmapping.state != VDO_MAPPING_STATE_UNMAPPED) ||
+ (entry->unmapping.pbn != VDO_ZERO_BLOCK))) {
+ return vdo_log_error_strerror(VDO_CORRUPT_JOURNAL,
+ "Invalid entry: %s (%llu, %u) from %llu to %llu is not a valid tree mapping",
+ vdo_get_journal_operation_name(entry->operation),
+ (unsigned long long) entry->slot.pbn,
+ entry->slot.slot,
+ (unsigned long long) entry->unmapping.pbn,
+ (unsigned long long) entry->mapping.pbn);
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * add_slab_journal_entries() - Replay recovery journal entries into the slab journals of the
+ * allocator currently being recovered.
+ * @completion: The allocator completion.
+ *
+ * Waits for slab journal tailblock space when necessary. This method is its own callback.
+ */
+static void add_slab_journal_entries(struct vdo_completion *completion)
+{
+ struct recovery_point *recovery_point;
+ struct repair_completion *repair = completion->parent;
+ struct vdo *vdo = completion->vdo;
+ struct recovery_journal *journal = vdo->recovery_journal;
+ struct block_allocator *allocator = vdo_as_block_allocator(completion);
+
+ /* Get ready in case we need to enqueue again. */
+ vdo_prepare_completion(completion, add_slab_journal_entries,
+ vdo_notify_slab_journals_are_recovered,
+ completion->callback_thread_id, repair);
+ for (recovery_point = &repair->next_recovery_point;
+ before_recovery_point(recovery_point, &repair->tail_recovery_point);
+ advance_points(repair, journal->entries_per_block)) {
+ int result;
+ physical_block_number_t pbn;
+ struct vdo_slab *slab;
+ struct recovery_journal_entry entry = get_entry(repair, recovery_point);
+ bool increment = !repair->next_recovery_point.increment_applied;
+
+ if (increment) {
+ result = validate_recovery_journal_entry(vdo, &entry);
+ if (result != VDO_SUCCESS) {
+ vdo_enter_read_only_mode(vdo, result);
+ vdo_fail_completion(completion, result);
+ return;
+ }
+
+ pbn = entry.mapping.pbn;
+ } else {
+ pbn = entry.unmapping.pbn;
+ }
+
+ if (pbn == VDO_ZERO_BLOCK)
+ continue;
+
+ slab = vdo_get_slab(vdo->depot, pbn);
+ if (slab->allocator != allocator)
+ continue;
+
+ if (!vdo_attempt_replay_into_slab(slab, pbn, entry.operation, increment,
+ &repair->next_journal_point,
+ completion))
+ return;
+
+ repair->entries_added_to_slab_journals++;
+ }
+
+ vdo_notify_slab_journals_are_recovered(completion);
+}
+
+/**
+ * vdo_replay_into_slab_journals() - Replay recovery journal entries in the slab journals of slabs
+ * owned by a given block_allocator.
+ * @allocator: The allocator whose slab journals are to be recovered.
+ * @context: The slab depot load context supplied by a recovery when it loads the depot.
+ */
+void vdo_replay_into_slab_journals(struct block_allocator *allocator, void *context)
+{
+ struct vdo_completion *completion = &allocator->completion;
+ struct repair_completion *repair = context;
+ struct vdo *vdo = completion->vdo;
+
+ vdo_assert_on_physical_zone_thread(vdo, allocator->zone_number, __func__);
+ if (repair->entry_count == 0) {
+ /* there's nothing to replay */
+ repair->logical_blocks_used = vdo->recovery_journal->logical_blocks_used;
+ repair->block_map_data_blocks = vdo->recovery_journal->block_map_data_blocks;
+ vdo_notify_slab_journals_are_recovered(completion);
+ return;
+ }
+
+ repair->next_recovery_point = (struct recovery_point) {
+ .sequence_number = repair->slab_journal_head,
+ .sector_count = 1,
+ .entry_count = 0,
+ };
+
+ repair->next_journal_point = (struct journal_point) {
+ .sequence_number = repair->slab_journal_head,
+ .entry_count = 0,
+ };
+
+ vdo_log_info("Replaying entries into slab journals for zone %u",
+ allocator->zone_number);
+ completion->parent = repair;
+ add_slab_journal_entries(completion);
+}
+
+static void load_slab_depot(struct vdo_completion *completion)
+{
+ struct repair_completion *repair = as_repair_completion(completion);
+ const struct admin_state_code *operation;
+
+ vdo_assert_on_admin_thread(completion->vdo, __func__);
+
+ if (vdo_state_requires_read_only_rebuild(completion->vdo->load_state)) {
+ prepare_repair_completion(repair, rebuild_reference_counts,
+ VDO_ZONE_TYPE_LOGICAL);
+ operation = VDO_ADMIN_STATE_LOADING_FOR_REBUILD;
+ } else {
+ prepare_repair_completion(repair, drain_slab_depot, VDO_ZONE_TYPE_ADMIN);
+ operation = VDO_ADMIN_STATE_LOADING_FOR_RECOVERY;
+ }
+
+ vdo_load_slab_depot(completion->vdo->depot, operation, completion, repair);
+}
+
+static void flush_block_map(struct vdo_completion *completion)
+{
+ struct repair_completion *repair = as_repair_completion(completion);
+ const struct admin_state_code *operation;
+
+ vdo_assert_on_admin_thread(completion->vdo, __func__);
+
+ vdo_log_info("Flushing block map changes");
+ prepare_repair_completion(repair, load_slab_depot, VDO_ZONE_TYPE_ADMIN);
+ operation = (vdo_state_requires_read_only_rebuild(completion->vdo->load_state) ?
+ VDO_ADMIN_STATE_REBUILDING :
+ VDO_ADMIN_STATE_RECOVERING);
+ vdo_drain_block_map(completion->vdo->block_map, operation, completion);
+}
+
+static bool finish_if_done(struct repair_completion *repair)
+{
+ /* Pages are still being launched or there is still work to do */
+ if (repair->launching || (repair->outstanding > 0))
+ return false;
+
+ if (repair->completion.result != VDO_SUCCESS) {
+ page_count_t i;
+
+ for (i = 0; i < repair->page_count; i++) {
+ struct vdo_page_completion *page_completion =
+ &repair->page_completions[i];
+
+ if (page_completion->ready)
+ vdo_release_page_completion(&page_completion->completion);
+ }
+
+ vdo_launch_completion(&repair->completion);
+ return true;
+ }
+
+ if (repair->current_entry >= repair->entries)
+ return false;
+
+ launch_repair_completion(repair, flush_block_map, VDO_ZONE_TYPE_ADMIN);
+ return true;
+}
+
+static void abort_block_map_recovery(struct repair_completion *repair, int result)
+{
+ vdo_set_completion_result(&repair->completion, result);
+ finish_if_done(repair);
+}
+
+/**
+ * find_entry_starting_next_page() - Find the first journal entry after a given entry which is not
+ * on the same block map page.
+ * @current_entry: The entry to search from.
+ * @needs_sort: Whether sorting is needed to proceed.
+ *
+ * Return: Pointer to the first later journal entry on a different block map page, or a pointer to
+ * just before the journal entries if no subsequent entry is on a different block map page.
+ */
+static struct numbered_block_mapping *
+find_entry_starting_next_page(struct repair_completion *repair,
+ struct numbered_block_mapping *current_entry, bool needs_sort)
+{
+ size_t current_page;
+
+ /* If current_entry is invalid, return immediately. */
+ if (current_entry < repair->entries)
+ return current_entry;
+
+ current_page = current_entry->block_map_slot.pbn;
+
+ /* Decrement current_entry until it's out of bounds or on a different page. */
+ while ((current_entry >= repair->entries) &&
+ (current_entry->block_map_slot.pbn == current_page)) {
+ if (needs_sort) {
+ struct numbered_block_mapping *just_sorted_entry =
+ sort_next_heap_element(repair);
+ VDO_ASSERT_LOG_ONLY(just_sorted_entry < current_entry,
+ "heap is returning elements in an unexpected order");
+ }
+
+ current_entry--;
+ }
+
+ return current_entry;
+}
+
+/*
+ * Apply a range of journal entries [starting_entry, ending_entry) journal
+ * entries to a block map page.
+ */
+static void apply_journal_entries_to_page(struct block_map_page *page,
+ struct numbered_block_mapping *starting_entry,
+ struct numbered_block_mapping *ending_entry)
+{
+ struct numbered_block_mapping *current_entry = starting_entry;
+
+ while (current_entry != ending_entry) {
+ page->entries[current_entry->block_map_slot.slot] = current_entry->block_map_entry;
+ current_entry--;
+ }
+}
+
+static void recover_ready_pages(struct repair_completion *repair,
+ struct vdo_completion *completion);
+
+static void block_map_page_loaded(struct vdo_completion *completion)
+{
+ struct repair_completion *repair = as_repair_completion(completion->parent);
+
+ repair->outstanding--;
+ if (!repair->launching)
+ recover_ready_pages(repair, completion);
+}
+
+static void handle_block_map_page_load_error(struct vdo_completion *completion)
+{
+ struct repair_completion *repair = as_repair_completion(completion->parent);
+
+ repair->outstanding--;
+ abort_block_map_recovery(repair, completion->result);
+}
+
+static void fetch_block_map_page(struct repair_completion *repair,
+ struct vdo_completion *completion)
+{
+ physical_block_number_t pbn;
+
+ if (repair->current_unfetched_entry < repair->entries)
+ /* Nothing left to fetch. */
+ return;
+
+ /* Fetch the next page we haven't yet requested. */
+ pbn = repair->current_unfetched_entry->block_map_slot.pbn;
+ repair->current_unfetched_entry =
+ find_entry_starting_next_page(repair, repair->current_unfetched_entry,
+ true);
+ repair->outstanding++;
+ vdo_get_page(((struct vdo_page_completion *) completion),
+ &repair->completion.vdo->block_map->zones[0], pbn, true,
+ &repair->completion, block_map_page_loaded,
+ handle_block_map_page_load_error, false);
+}
+
+static struct vdo_page_completion *get_next_page_completion(struct repair_completion *repair,
+ struct vdo_page_completion *completion)
+{
+ completion++;
+ if (completion == (&repair->page_completions[repair->page_count]))
+ completion = &repair->page_completions[0];
+ return completion;
+}
+
+static void recover_ready_pages(struct repair_completion *repair,
+ struct vdo_completion *completion)
+{
+ struct vdo_page_completion *page_completion = (struct vdo_page_completion *) completion;
+
+ if (finish_if_done(repair))
+ return;
+
+ if (repair->pbn != page_completion->pbn)
+ return;
+
+ while (page_completion->ready) {
+ struct numbered_block_mapping *start_of_next_page;
+ struct block_map_page *page;
+ int result;
+
+ result = vdo_get_cached_page(completion, &page);
+ if (result != VDO_SUCCESS) {
+ abort_block_map_recovery(repair, result);
+ return;
+ }
+
+ start_of_next_page =
+ find_entry_starting_next_page(repair, repair->current_entry,
+ false);
+ apply_journal_entries_to_page(page, repair->current_entry,
+ start_of_next_page);
+ repair->current_entry = start_of_next_page;
+ vdo_request_page_write(completion);
+ vdo_release_page_completion(completion);
+
+ if (finish_if_done(repair))
+ return;
+
+ repair->pbn = repair->current_entry->block_map_slot.pbn;
+ fetch_block_map_page(repair, completion);
+ page_completion = get_next_page_completion(repair, page_completion);
+ completion = &page_completion->completion;
+ }
+}
+
+static void recover_block_map(struct vdo_completion *completion)
+{
+ struct repair_completion *repair = as_repair_completion(completion);
+ struct vdo *vdo = completion->vdo;
+ struct numbered_block_mapping *first_sorted_entry;
+ page_count_t i;
+
+ vdo_assert_on_logical_zone_thread(vdo, 0, __func__);
+
+ /* Suppress block map errors. */
+ vdo->block_map->zones[0].page_cache.rebuilding =
+ vdo_state_requires_read_only_rebuild(vdo->load_state);
+
+ if (repair->block_map_entry_count == 0) {
+ vdo_log_info("Replaying 0 recovery entries into block map");
+ vdo_free(vdo_forget(repair->journal_data));
+ launch_repair_completion(repair, load_slab_depot, VDO_ZONE_TYPE_ADMIN);
+ return;
+ }
+
+ /*
+ * Organize the journal entries into a binary heap so we can iterate over them in sorted
+ * order incrementally, avoiding an expensive sort call.
+ */
+ repair->replay_heap = (struct min_heap) {
+ .data = repair->entries,
+ .nr = repair->block_map_entry_count,
+ .size = repair->block_map_entry_count,
+ };
+ min_heapify_all(&repair->replay_heap, &repair_min_heap);
+
+ vdo_log_info("Replaying %zu recovery entries into block map",
+ repair->block_map_entry_count);
+
+ repair->current_entry = &repair->entries[repair->block_map_entry_count - 1];
+ first_sorted_entry = sort_next_heap_element(repair);
+ VDO_ASSERT_LOG_ONLY(first_sorted_entry == repair->current_entry,
+ "heap is returning elements in an unexpected order");
+
+ /* Prevent any page from being processed until all pages have been launched. */
+ repair->launching = true;
+ repair->pbn = repair->current_entry->block_map_slot.pbn;
+ repair->current_unfetched_entry = repair->current_entry;
+ for (i = 0; i < repair->page_count; i++) {
+ if (repair->current_unfetched_entry < repair->entries)
+ break;
+
+ fetch_block_map_page(repair, &repair->page_completions[i].completion);
+ }
+ repair->launching = false;
+
+ /* Process any ready pages. */
+ recover_ready_pages(repair, &repair->page_completions[0].completion);
+}
+
+/**
+ * get_recovery_journal_block_header() - Get the block header for a block at a position in the
+ * journal data and unpack it.
+ * @journal: The recovery journal.
+ * @data: The recovery journal data.
+ * @sequence: The sequence number.
+ *
+ * Return: The unpacked header.
+ */
+static struct recovery_block_header __must_check
+get_recovery_journal_block_header(struct recovery_journal *journal, char *data,
+ sequence_number_t sequence)
+{
+ physical_block_number_t pbn =
+ vdo_get_recovery_journal_block_number(journal, sequence);
+ char *header = &data[pbn * VDO_BLOCK_SIZE];
+
+ return vdo_unpack_recovery_block_header((struct packed_journal_header *) header);
+}
+
+/**
+ * is_valid_recovery_journal_block() - Determine whether the given header describes a valid block
+ * for the given journal.
+ * @journal: The journal to use.
+ * @header: The unpacked block header to check.
+ * @old_ok: Whether an old format header is valid.
+ *
+ * A block is not valid if it is unformatted, or if it is older than the last successful recovery
+ * or reformat.
+ *
+ * Return: True if the header is valid.
+ */
+static bool __must_check is_valid_recovery_journal_block(const struct recovery_journal *journal,
+ const struct recovery_block_header *header,
+ bool old_ok)
+{
+ if ((header->nonce != journal->nonce) ||
+ (header->recovery_count != journal->recovery_count))
+ return false;
+
+ if (header->metadata_type == VDO_METADATA_RECOVERY_JOURNAL_2)
+ return (header->entry_count <= journal->entries_per_block);
+
+ return (old_ok &&
+ (header->metadata_type == VDO_METADATA_RECOVERY_JOURNAL) &&
+ (header->entry_count <= RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK));
+}
+
+/**
+ * is_exact_recovery_journal_block() - Determine whether the given header describes the exact block
+ * indicated.
+ * @journal: The journal to use.
+ * @header: The unpacked block header to check.
+ * @sequence: The expected sequence number.
+ * @type: The expected metadata type.
+ *
+ * Return: True if the block matches.
+ */
+static bool __must_check is_exact_recovery_journal_block(const struct recovery_journal *journal,
+ const struct recovery_block_header *header,
+ sequence_number_t sequence,
+ enum vdo_metadata_type type)
+{
+ return ((header->metadata_type == type) &&
+ (header->sequence_number == sequence) &&
+ (is_valid_recovery_journal_block(journal, header, true)));
+}
+
+/**
+ * find_recovery_journal_head_and_tail() - Find the tail and head of the journal.
+ *
+ * Return: True if there were valid journal blocks.
+ */
+static bool find_recovery_journal_head_and_tail(struct repair_completion *repair)
+{
+ struct recovery_journal *journal = repair->completion.vdo->recovery_journal;
+ bool found_entries = false;
+ physical_block_number_t i;
+
+ /*
+ * Ensure that we don't replay old entries since we know the tail recorded in the super
+ * block must be a lower bound. Not doing so can result in extra data loss by setting the
+ * tail too early.
+ */
+ repair->highest_tail = journal->tail;
+ for (i = 0; i < journal->size; i++) {
+ struct recovery_block_header header =
+ get_recovery_journal_block_header(journal, repair->journal_data, i);
+
+ if (!is_valid_recovery_journal_block(journal, &header, true)) {
+ /* This block is old or incorrectly formatted */
+ continue;
+ }
+
+ if (vdo_get_recovery_journal_block_number(journal, header.sequence_number) != i) {
+ /* This block is in the wrong location */
+ continue;
+ }
+
+ if (header.sequence_number >= repair->highest_tail) {
+ found_entries = true;
+ repair->highest_tail = header.sequence_number;
+ }
+
+ if (!found_entries)
+ continue;
+
+ if (header.block_map_head > repair->block_map_head)
+ repair->block_map_head = header.block_map_head;
+
+ if (header.slab_journal_head > repair->slab_journal_head)
+ repair->slab_journal_head = header.slab_journal_head;
+ }
+
+ return found_entries;
+}
+
+/**
+ * unpack_entry() - Unpack a recovery journal entry in either format.
+ * @vdo: The vdo.
+ * @packed: The entry to unpack.
+ * @format: The expected format of the entry.
+ * @entry: The unpacked entry.
+ *
+ * Return: true if the entry should be applied.3
+ */
+static bool unpack_entry(struct vdo *vdo, char *packed, enum vdo_metadata_type format,
+ struct recovery_journal_entry *entry)
+{
+ if (format == VDO_METADATA_RECOVERY_JOURNAL_2) {
+ struct packed_recovery_journal_entry *packed_entry =
+ (struct packed_recovery_journal_entry *) packed;
+
+ *entry = vdo_unpack_recovery_journal_entry(packed_entry);
+ } else {
+ physical_block_number_t low32, high4;
+
+ struct packed_recovery_journal_entry_1 *packed_entry =
+ (struct packed_recovery_journal_entry_1 *) packed;
+
+ if (packed_entry->operation == VDO_JOURNAL_DATA_INCREMENT)
+ entry->operation = VDO_JOURNAL_DATA_REMAPPING;
+ else if (packed_entry->operation == VDO_JOURNAL_BLOCK_MAP_INCREMENT)
+ entry->operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING;
+ else
+ return false;
+
+ low32 = __le32_to_cpu(packed_entry->pbn_low_word);
+ high4 = packed_entry->pbn_high_nibble;
+ entry->slot = (struct block_map_slot) {
+ .pbn = ((high4 << 32) | low32),
+ .slot = (packed_entry->slot_low | (packed_entry->slot_high << 6)),
+ };
+ entry->mapping = vdo_unpack_block_map_entry(&packed_entry->block_map_entry);
+ entry->unmapping = (struct data_location) {
+ .pbn = VDO_ZERO_BLOCK,
+ .state = VDO_MAPPING_STATE_UNMAPPED,
+ };
+ }
+
+ return (validate_recovery_journal_entry(vdo, entry) == VDO_SUCCESS);
+}
+
+/**
+ * append_sector_entries() - Append an array of recovery journal entries from a journal block
+ * sector to the array of numbered mappings in the repair completion,
+ * numbering each entry in the order they are appended.
+ * @repair: The repair completion.
+ * @entries: The entries in the sector.
+ * @format: The format of the sector.
+ * @entry_count: The number of entries to append.
+ */
+static void append_sector_entries(struct repair_completion *repair, char *entries,
+ enum vdo_metadata_type format,
+ journal_entry_count_t entry_count)
+{
+ journal_entry_count_t i;
+ struct vdo *vdo = repair->completion.vdo;
+ off_t increment = ((format == VDO_METADATA_RECOVERY_JOURNAL_2)
+ ? sizeof(struct packed_recovery_journal_entry)
+ : sizeof(struct packed_recovery_journal_entry_1));
+
+ for (i = 0; i < entry_count; i++, entries += increment) {
+ struct recovery_journal_entry entry;
+
+ if (!unpack_entry(vdo, entries, format, &entry))
+ /* When recovering from read-only mode, ignore damaged entries. */
+ continue;
+
+ repair->entries[repair->block_map_entry_count] =
+ (struct numbered_block_mapping) {
+ .block_map_slot = entry.slot,
+ .block_map_entry = vdo_pack_block_map_entry(entry.mapping.pbn,
+ entry.mapping.state),
+ .number = repair->block_map_entry_count,
+ };
+ repair->block_map_entry_count++;
+ }
+}
+
+static journal_entry_count_t entries_per_sector(enum vdo_metadata_type format,
+ u8 sector_number)
+{
+ if (format == VDO_METADATA_RECOVERY_JOURNAL_2)
+ return RECOVERY_JOURNAL_ENTRIES_PER_SECTOR;
+
+ return ((sector_number == (VDO_SECTORS_PER_BLOCK - 1))
+ ? RECOVERY_JOURNAL_1_ENTRIES_IN_LAST_SECTOR
+ : RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR);
+}
+
+static void extract_entries_from_block(struct repair_completion *repair,
+ struct recovery_journal *journal,
+ sequence_number_t sequence,
+ enum vdo_metadata_type format,
+ journal_entry_count_t entries)
+{
+ sector_count_t i;
+ struct recovery_block_header header =
+ get_recovery_journal_block_header(journal, repair->journal_data,
+ sequence);
+
+ if (!is_exact_recovery_journal_block(journal, &header, sequence, format)) {
+ /* This block is invalid, so skip it. */
+ return;
+ }
+
+ entries = min(entries, header.entry_count);
+ for (i = 1; i < VDO_SECTORS_PER_BLOCK; i++) {
+ struct packed_journal_sector *sector =
+ get_sector(journal, repair->journal_data, sequence, i);
+ journal_entry_count_t sector_entries =
+ min(entries, entries_per_sector(format, i));
+
+ if (vdo_is_valid_recovery_journal_sector(&header, sector, i)) {
+ /* Only extract as many as the block header calls for. */
+ append_sector_entries(repair, (char *) sector->entries, format,
+ min_t(journal_entry_count_t,
+ sector->entry_count,
+ sector_entries));
+ }
+
+ /*
+ * Even if the sector wasn't full, count it as full when counting up to the
+ * entry count the block header claims.
+ */
+ entries -= sector_entries;
+ }
+}
+
+static int parse_journal_for_rebuild(struct repair_completion *repair)
+{
+ int result;
+ sequence_number_t i;
+ block_count_t count;
+ enum vdo_metadata_type format;
+ struct vdo *vdo = repair->completion.vdo;
+ struct recovery_journal *journal = vdo->recovery_journal;
+ journal_entry_count_t entries_per_block = journal->entries_per_block;
+
+ format = get_recovery_journal_block_header(journal, repair->journal_data,
+ repair->highest_tail).metadata_type;
+ if (format == VDO_METADATA_RECOVERY_JOURNAL)
+ entries_per_block = RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK;
+
+ /*
+ * Allocate an array of numbered_block_mapping structures large enough to transcribe every
+ * packed_recovery_journal_entry from every valid journal block.
+ */
+ count = ((repair->highest_tail - repair->block_map_head + 1) * entries_per_block);
+ result = vdo_allocate(count, struct numbered_block_mapping, __func__,
+ &repair->entries);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ for (i = repair->block_map_head; i <= repair->highest_tail; i++)
+ extract_entries_from_block(repair, journal, i, format, entries_per_block);
+
+ return VDO_SUCCESS;
+}
+
+static int validate_heads(struct repair_completion *repair)
+{
+ /* Both reap heads must be behind the tail. */
+ if ((repair->block_map_head <= repair->tail) &&
+ (repair->slab_journal_head <= repair->tail))
+ return VDO_SUCCESS;
+
+
+ return vdo_log_error_strerror(VDO_CORRUPT_JOURNAL,
+ "Journal tail too early. block map head: %llu, slab journal head: %llu, tail: %llu",
+ (unsigned long long) repair->block_map_head,
+ (unsigned long long) repair->slab_journal_head,
+ (unsigned long long) repair->tail);
+}
+
+/**
+ * extract_new_mappings() - Find all valid new mappings to be applied to the block map.
+ *
+ * The mappings are extracted from the journal and stored in a sortable array so that all of the
+ * mappings to be applied to a given block map page can be done in a single page fetch.
+ */
+static int extract_new_mappings(struct repair_completion *repair)
+{
+ int result;
+ struct vdo *vdo = repair->completion.vdo;
+ struct recovery_point recovery_point = {
+ .sequence_number = repair->block_map_head,
+ .sector_count = 1,
+ .entry_count = 0,
+ };
+
+ /*
+ * Allocate an array of numbered_block_mapping structs just large enough to transcribe
+ * every packed_recovery_journal_entry from every valid journal block.
+ */
+ result = vdo_allocate(repair->entry_count, struct numbered_block_mapping,
+ __func__, &repair->entries);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ for (; before_recovery_point(&recovery_point, &repair->tail_recovery_point);
+ increment_recovery_point(&recovery_point)) {
+ struct recovery_journal_entry entry = get_entry(repair, &recovery_point);
+
+ result = validate_recovery_journal_entry(vdo, &entry);
+ if (result != VDO_SUCCESS) {
+ vdo_enter_read_only_mode(vdo, result);
+ return result;
+ }
+
+ repair->entries[repair->block_map_entry_count] =
+ (struct numbered_block_mapping) {
+ .block_map_slot = entry.slot,
+ .block_map_entry = vdo_pack_block_map_entry(entry.mapping.pbn,
+ entry.mapping.state),
+ .number = repair->block_map_entry_count,
+ };
+ repair->block_map_entry_count++;
+ }
+
+ result = VDO_ASSERT((repair->block_map_entry_count <= repair->entry_count),
+ "approximate entry count is an upper bound");
+ if (result != VDO_SUCCESS)
+ vdo_enter_read_only_mode(vdo, result);
+
+ return result;
+}
+
+/**
+ * compute_usages() - Compute the lbns in use and block map data blocks counts from the tail of
+ * the journal.
+ */
+static noinline int compute_usages(struct repair_completion *repair)
+{
+ /*
+ * This function is declared noinline to avoid a spurious valgrind error regarding the
+ * following structure being uninitialized.
+ */
+ struct recovery_point recovery_point = {
+ .sequence_number = repair->tail,
+ .sector_count = 1,
+ .entry_count = 0,
+ };
+
+ struct vdo *vdo = repair->completion.vdo;
+ struct recovery_journal *journal = vdo->recovery_journal;
+ struct recovery_block_header header =
+ get_recovery_journal_block_header(journal, repair->journal_data,
+ repair->tail);
+
+ repair->logical_blocks_used = header.logical_blocks_used;
+ repair->block_map_data_blocks = header.block_map_data_blocks;
+
+ for (; before_recovery_point(&recovery_point, &repair->tail_recovery_point);
+ increment_recovery_point(&recovery_point)) {
+ struct recovery_journal_entry entry = get_entry(repair, &recovery_point);
+ int result;
+
+ result = validate_recovery_journal_entry(vdo, &entry);
+ if (result != VDO_SUCCESS) {
+ vdo_enter_read_only_mode(vdo, result);
+ return result;
+ }
+
+ if (entry.operation == VDO_JOURNAL_BLOCK_MAP_REMAPPING) {
+ repair->block_map_data_blocks++;
+ continue;
+ }
+
+ if (vdo_is_mapped_location(&entry.mapping))
+ repair->logical_blocks_used++;
+
+ if (vdo_is_mapped_location(&entry.unmapping))
+ repair->logical_blocks_used--;
+ }
+
+ return VDO_SUCCESS;
+}
+
+static int parse_journal_for_recovery(struct repair_completion *repair)
+{
+ int result;
+ sequence_number_t i, head;
+ bool found_entries = false;
+ struct recovery_journal *journal = repair->completion.vdo->recovery_journal;
+
+ head = min(repair->block_map_head, repair->slab_journal_head);
+ for (i = head; i <= repair->highest_tail; i++) {
+ struct recovery_block_header header;
+ journal_entry_count_t block_entries;
+ u8 j;
+
+ repair->tail = i;
+ repair->tail_recovery_point = (struct recovery_point) {
+ .sequence_number = i,
+ .sector_count = 0,
+ .entry_count = 0,
+ };
+
+ header = get_recovery_journal_block_header(journal, repair->journal_data, i);
+ if (header.metadata_type == VDO_METADATA_RECOVERY_JOURNAL) {
+ /* This is an old format block, so we need to upgrade */
+ vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
+ "Recovery journal is in the old format, a read-only rebuild is required.");
+ vdo_enter_read_only_mode(repair->completion.vdo,
+ VDO_UNSUPPORTED_VERSION);
+ return VDO_UNSUPPORTED_VERSION;
+ }
+
+ if (!is_exact_recovery_journal_block(journal, &header, i,
+ VDO_METADATA_RECOVERY_JOURNAL_2)) {
+ /* A bad block header was found so this must be the end of the journal. */
+ break;
+ }
+
+ block_entries = header.entry_count;
+
+ /* Examine each sector in turn to determine the last valid sector. */
+ for (j = 1; j < VDO_SECTORS_PER_BLOCK; j++) {
+ struct packed_journal_sector *sector =
+ get_sector(journal, repair->journal_data, i, j);
+ journal_entry_count_t sector_entries =
+ min_t(journal_entry_count_t, sector->entry_count,
+ block_entries);
+
+ /* A bad sector means that this block was torn. */
+ if (!vdo_is_valid_recovery_journal_sector(&header, sector, j))
+ break;
+
+ if (sector_entries > 0) {
+ found_entries = true;
+ repair->tail_recovery_point.sector_count++;
+ repair->tail_recovery_point.entry_count = sector_entries;
+ block_entries -= sector_entries;
+ repair->entry_count += sector_entries;
+ }
+
+ /* If this sector is short, the later sectors can't matter. */
+ if ((sector_entries < RECOVERY_JOURNAL_ENTRIES_PER_SECTOR) ||
+ (block_entries == 0))
+ break;
+ }
+
+ /* If this block was not filled, or if it tore, no later block can matter. */
+ if ((header.entry_count != journal->entries_per_block) || (block_entries > 0))
+ break;
+ }
+
+ if (!found_entries)
+ return validate_heads(repair);
+
+ /* Set the tail to the last valid tail block, if there is one. */
+ if (repair->tail_recovery_point.sector_count == 0)
+ repair->tail--;
+
+ result = validate_heads(repair);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo_log_info("Highest-numbered recovery journal block has sequence number %llu, and the highest-numbered usable block is %llu",
+ (unsigned long long) repair->highest_tail,
+ (unsigned long long) repair->tail);
+
+ result = extract_new_mappings(repair);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ return compute_usages(repair);
+}
+
+static int parse_journal(struct repair_completion *repair)
+{
+ if (!find_recovery_journal_head_and_tail(repair))
+ return VDO_SUCCESS;
+
+ return (vdo_state_requires_read_only_rebuild(repair->completion.vdo->load_state) ?
+ parse_journal_for_rebuild(repair) :
+ parse_journal_for_recovery(repair));
+}
+
+static void finish_journal_load(struct vdo_completion *completion)
+{
+ struct repair_completion *repair = completion->parent;
+
+ if (++repair->vios_complete != repair->vio_count)
+ return;
+
+ vdo_log_info("Finished reading recovery journal");
+ uninitialize_vios(repair);
+ prepare_repair_completion(repair, recover_block_map, VDO_ZONE_TYPE_LOGICAL);
+ vdo_continue_completion(&repair->completion, parse_journal(repair));
+}
+
+static void handle_journal_load_error(struct vdo_completion *completion)
+{
+ struct repair_completion *repair = completion->parent;
+
+ /* Preserve the error */
+ vdo_set_completion_result(&repair->completion, completion->result);
+ vio_record_metadata_io_error(as_vio(completion));
+ completion->callback(completion);
+}
+
+static void read_journal_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct vdo *vdo = vio->completion.vdo;
+
+ continue_vio_after_io(vio, finish_journal_load, vdo->thread_config.admin_thread);
+}
+
+/**
+ * vdo_repair() - Load the recovery journal and then recover or rebuild a vdo.
+ * @parent: The completion to notify when the operation is complete
+ */
+void vdo_repair(struct vdo_completion *parent)
+{
+ int result;
+ char *ptr;
+ struct repair_completion *repair;
+ struct vdo *vdo = parent->vdo;
+ struct recovery_journal *journal = vdo->recovery_journal;
+ physical_block_number_t pbn = journal->origin;
+ block_count_t remaining = journal->size;
+ block_count_t vio_count = DIV_ROUND_UP(remaining, MAX_BLOCKS_PER_VIO);
+ page_count_t page_count = min_t(page_count_t,
+ vdo->device_config->cache_size >> 1,
+ MAXIMUM_SIMULTANEOUS_VDO_BLOCK_MAP_RESTORATION_READS);
+
+ vdo_assert_on_admin_thread(vdo, __func__);
+
+ if (vdo->load_state == VDO_FORCE_REBUILD) {
+ vdo_log_warning("Rebuilding reference counts to clear read-only mode");
+ vdo->states.vdo.read_only_recoveries++;
+ } else if (vdo->load_state == VDO_REBUILD_FOR_UPGRADE) {
+ vdo_log_warning("Rebuilding reference counts for upgrade");
+ } else {
+ vdo_log_warning("Device was dirty, rebuilding reference counts");
+ }
+
+ result = vdo_allocate_extended(struct repair_completion, page_count,
+ struct vdo_page_completion, __func__,
+ &repair);
+ if (result != VDO_SUCCESS) {
+ vdo_fail_completion(parent, result);
+ return;
+ }
+
+ vdo_initialize_completion(&repair->completion, vdo, VDO_REPAIR_COMPLETION);
+ repair->completion.error_handler = abort_repair;
+ repair->completion.parent = parent;
+ prepare_repair_completion(repair, finish_repair, VDO_ZONE_TYPE_ADMIN);
+ repair->page_count = page_count;
+
+ result = vdo_allocate(remaining * VDO_BLOCK_SIZE, char, __func__,
+ &repair->journal_data);
+ if (abort_on_error(result, repair))
+ return;
+
+ result = vdo_allocate(vio_count, struct vio, __func__, &repair->vios);
+ if (abort_on_error(result, repair))
+ return;
+
+ ptr = repair->journal_data;
+ for (repair->vio_count = 0; repair->vio_count < vio_count; repair->vio_count++) {
+ block_count_t blocks = min_t(block_count_t, remaining,
+ MAX_BLOCKS_PER_VIO);
+
+ result = allocate_vio_components(vdo, VIO_TYPE_RECOVERY_JOURNAL,
+ VIO_PRIORITY_METADATA,
+ repair, blocks, ptr,
+ &repair->vios[repair->vio_count]);
+ if (abort_on_error(result, repair))
+ return;
+
+ ptr += (blocks * VDO_BLOCK_SIZE);
+ remaining -= blocks;
+ }
+
+ for (vio_count = 0; vio_count < repair->vio_count;
+ vio_count++, pbn += MAX_BLOCKS_PER_VIO) {
+ vdo_submit_metadata_vio(&repair->vios[vio_count], pbn, read_journal_endio,
+ handle_journal_load_error, REQ_OP_READ);
+ }
+}
diff --git a/drivers/md/dm-vdo/repair.h b/drivers/md/dm-vdo/repair.h
new file mode 100644
index 000000000000..ff255cf41486
--- /dev/null
+++ b/drivers/md/dm-vdo/repair.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_REPAIR_H
+#define VDO_REPAIR_H
+
+#include "types.h"
+
+void vdo_replay_into_slab_journals(struct block_allocator *allocator, void *context);
+void vdo_repair(struct vdo_completion *parent);
+
+#endif /* VDO_REPAIR_H */
diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c
new file mode 100644
index 000000000000..46e4721e5b4f
--- /dev/null
+++ b/drivers/md/dm-vdo/slab-depot.c
@@ -0,0 +1,5101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "slab-depot.h"
+
+#include <linux/atomic.h>
+#include <linux/bio.h>
+#include <linux/err.h>
+#include <linux/log2.h>
+#include <linux/min_heap.h>
+#include <linux/minmax.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "numeric.h"
+#include "permassert.h"
+#include "string-utils.h"
+
+#include "action-manager.h"
+#include "admin-state.h"
+#include "completion.h"
+#include "constants.h"
+#include "data-vio.h"
+#include "encodings.h"
+#include "io-submitter.h"
+#include "physical-zone.h"
+#include "priority-table.h"
+#include "recovery-journal.h"
+#include "repair.h"
+#include "status-codes.h"
+#include "types.h"
+#include "vdo.h"
+#include "vio.h"
+#include "wait-queue.h"
+
+static const u64 BYTES_PER_WORD = sizeof(u64);
+static const bool NORMAL_OPERATION = true;
+
+/**
+ * get_lock() - Get the lock object for a slab journal block by sequence number.
+ * @journal: vdo_slab journal to retrieve from.
+ * @sequence_number: Sequence number of the block.
+ *
+ * Return: The lock object for the given sequence number.
+ */
+static inline struct journal_lock * __must_check get_lock(struct slab_journal *journal,
+ sequence_number_t sequence_number)
+{
+ return &journal->locks[sequence_number % journal->size];
+}
+
+static bool is_slab_open(struct vdo_slab *slab)
+{
+ return (!vdo_is_state_quiescing(&slab->state) &&
+ !vdo_is_state_quiescent(&slab->state));
+}
+
+/**
+ * must_make_entries_to_flush() - Check whether there are entry waiters which should delay a flush.
+ * @journal: The journal to check.
+ *
+ * Return: true if there are no entry waiters, or if the slab is unrecovered.
+ */
+static inline bool __must_check must_make_entries_to_flush(struct slab_journal *journal)
+{
+ return ((journal->slab->status != VDO_SLAB_REBUILDING) &&
+ vdo_waitq_has_waiters(&journal->entry_waiters));
+}
+
+/**
+ * is_reaping() - Check whether a reap is currently in progress.
+ * @journal: The journal which may be reaping.
+ *
+ * Return: true if the journal is reaping.
+ */
+static inline bool __must_check is_reaping(struct slab_journal *journal)
+{
+ return (journal->head != journal->unreapable);
+}
+
+/**
+ * initialize_tail_block() - Initialize tail block as a new block.
+ * @journal: The journal whose tail block is being initialized.
+ */
+static void initialize_tail_block(struct slab_journal *journal)
+{
+ struct slab_journal_block_header *header = &journal->tail_header;
+
+ header->sequence_number = journal->tail;
+ header->entry_count = 0;
+ header->has_block_map_increments = false;
+}
+
+/**
+ * initialize_journal_state() - Set all journal fields appropriately to start journaling.
+ * @journal: The journal to be reset, based on its tail sequence number.
+ */
+static void initialize_journal_state(struct slab_journal *journal)
+{
+ journal->unreapable = journal->head;
+ journal->reap_lock = get_lock(journal, journal->unreapable);
+ journal->next_commit = journal->tail;
+ journal->summarized = journal->last_summarized = journal->tail;
+ initialize_tail_block(journal);
+}
+
+/**
+ * block_is_full() - Check whether a journal block is full.
+ * @journal: The slab journal for the block.
+ *
+ * Return: true if the tail block is full.
+ */
+static bool __must_check block_is_full(struct slab_journal *journal)
+{
+ journal_entry_count_t count = journal->tail_header.entry_count;
+
+ return (journal->tail_header.has_block_map_increments ?
+ (journal->full_entries_per_block == count) :
+ (journal->entries_per_block == count));
+}
+
+static void add_entries(struct slab_journal *journal);
+static void update_tail_block_location(struct slab_journal *journal);
+static void release_journal_locks(struct vdo_waiter *waiter, void *context);
+
+/**
+ * is_slab_journal_blank() - Check whether a slab's journal is blank.
+ *
+ * A slab journal is blank if it has never had any entries recorded in it.
+ *
+ * Return: true if the slab's journal has never been modified.
+ */
+static bool is_slab_journal_blank(const struct vdo_slab *slab)
+{
+ return ((slab->journal.tail == 1) &&
+ (slab->journal.tail_header.entry_count == 0));
+}
+
+/**
+ * mark_slab_journal_dirty() - Put a slab journal on the dirty ring of its allocator in the correct
+ * order.
+ * @journal: The journal to be marked dirty.
+ * @lock: The recovery journal lock held by the slab journal.
+ */
+static void mark_slab_journal_dirty(struct slab_journal *journal, sequence_number_t lock)
+{
+ struct slab_journal *dirty_journal;
+ struct list_head *dirty_list = &journal->slab->allocator->dirty_slab_journals;
+
+ VDO_ASSERT_LOG_ONLY(journal->recovery_lock == 0, "slab journal was clean");
+
+ journal->recovery_lock = lock;
+ list_for_each_entry_reverse(dirty_journal, dirty_list, dirty_entry) {
+ if (dirty_journal->recovery_lock <= journal->recovery_lock)
+ break;
+ }
+
+ list_move_tail(&journal->dirty_entry, dirty_journal->dirty_entry.next);
+}
+
+static void mark_slab_journal_clean(struct slab_journal *journal)
+{
+ journal->recovery_lock = 0;
+ list_del_init(&journal->dirty_entry);
+}
+
+static void check_if_slab_drained(struct vdo_slab *slab)
+{
+ bool read_only;
+ struct slab_journal *journal = &slab->journal;
+ const struct admin_state_code *code;
+
+ if (!vdo_is_state_draining(&slab->state) ||
+ must_make_entries_to_flush(journal) ||
+ is_reaping(journal) ||
+ journal->waiting_to_commit ||
+ !list_empty(&journal->uncommitted_blocks) ||
+ journal->updating_slab_summary ||
+ (slab->active_count > 0))
+ return;
+
+ /* When not suspending or recovering, the slab must be clean. */
+ code = vdo_get_admin_state_code(&slab->state);
+ read_only = vdo_is_read_only(slab->allocator->depot->vdo);
+ if (!read_only &&
+ vdo_waitq_has_waiters(&slab->dirty_blocks) &&
+ (code != VDO_ADMIN_STATE_SUSPENDING) &&
+ (code != VDO_ADMIN_STATE_RECOVERING))
+ return;
+
+ vdo_finish_draining_with_result(&slab->state,
+ (read_only ? VDO_READ_ONLY : VDO_SUCCESS));
+}
+
+/* FULLNESS HINT COMPUTATION */
+
+/**
+ * compute_fullness_hint() - Translate a slab's free block count into a 'fullness hint' that can be
+ * stored in a slab_summary_entry's 7 bits that are dedicated to its free
+ * count.
+ * @depot: The depot whose summary being updated.
+ * @free_blocks: The number of free blocks.
+ *
+ * Note: the number of free blocks must be strictly less than 2^23 blocks, even though
+ * theoretically slabs could contain precisely 2^23 blocks; there is an assumption that at least
+ * one block is used by metadata. This assumption is necessary; otherwise, the fullness hint might
+ * overflow. The fullness hint formula is roughly (fullness >> 16) & 0x7f, but (2^23 >> 16) & 0x7f
+ * is 0, which would make it impossible to distinguish completely full from completely empty.
+ *
+ * Return: A fullness hint, which can be stored in 7 bits.
+ */
+static u8 __must_check compute_fullness_hint(struct slab_depot *depot,
+ block_count_t free_blocks)
+{
+ block_count_t hint;
+
+ VDO_ASSERT_LOG_ONLY((free_blocks < (1 << 23)), "free blocks must be less than 2^23");
+
+ if (free_blocks == 0)
+ return 0;
+
+ hint = free_blocks >> depot->hint_shift;
+ return ((hint == 0) ? 1 : hint);
+}
+
+/**
+ * check_summary_drain_complete() - Check whether an allocators summary has finished draining.
+ */
+static void check_summary_drain_complete(struct block_allocator *allocator)
+{
+ if (!vdo_is_state_draining(&allocator->summary_state) ||
+ (allocator->summary_write_count > 0))
+ return;
+
+ vdo_finish_operation(&allocator->summary_state,
+ (vdo_is_read_only(allocator->depot->vdo) ?
+ VDO_READ_ONLY : VDO_SUCCESS));
+}
+
+/**
+ * notify_summary_waiters() - Wake all the waiters in a given queue.
+ * @allocator: The block allocator summary which owns the queue.
+ * @queue: The queue to notify.
+ */
+static void notify_summary_waiters(struct block_allocator *allocator,
+ struct vdo_wait_queue *queue)
+{
+ int result = (vdo_is_read_only(allocator->depot->vdo) ?
+ VDO_READ_ONLY : VDO_SUCCESS);
+
+ vdo_waitq_notify_all_waiters(queue, NULL, &result);
+}
+
+static void launch_write(struct slab_summary_block *summary_block);
+
+/**
+ * finish_updating_slab_summary_block() - Finish processing a block which attempted to write,
+ * whether or not the attempt succeeded.
+ * @block: The block.
+ */
+static void finish_updating_slab_summary_block(struct slab_summary_block *block)
+{
+ notify_summary_waiters(block->allocator, &block->current_update_waiters);
+ block->writing = false;
+ block->allocator->summary_write_count--;
+ if (vdo_waitq_has_waiters(&block->next_update_waiters))
+ launch_write(block);
+ else
+ check_summary_drain_complete(block->allocator);
+}
+
+/**
+ * finish_update() - This is the callback for a successful summary block write.
+ * @completion: The write vio.
+ */
+static void finish_update(struct vdo_completion *completion)
+{
+ struct slab_summary_block *block =
+ container_of(as_vio(completion), struct slab_summary_block, vio);
+
+ atomic64_inc(&block->allocator->depot->summary_statistics.blocks_written);
+ finish_updating_slab_summary_block(block);
+}
+
+/**
+ * handle_write_error() - Handle an error writing a slab summary block.
+ * @completion: The write VIO.
+ */
+static void handle_write_error(struct vdo_completion *completion)
+{
+ struct slab_summary_block *block =
+ container_of(as_vio(completion), struct slab_summary_block, vio);
+
+ vio_record_metadata_io_error(as_vio(completion));
+ vdo_enter_read_only_mode(completion->vdo, completion->result);
+ finish_updating_slab_summary_block(block);
+}
+
+static void write_slab_summary_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct slab_summary_block *block =
+ container_of(vio, struct slab_summary_block, vio);
+
+ continue_vio_after_io(vio, finish_update, block->allocator->thread_id);
+}
+
+/**
+ * launch_write() - Write a slab summary block unless it is currently out for writing.
+ * @block: The block that needs to be committed.
+ */
+static void launch_write(struct slab_summary_block *block)
+{
+ struct block_allocator *allocator = block->allocator;
+ struct slab_depot *depot = allocator->depot;
+ physical_block_number_t pbn;
+
+ if (block->writing)
+ return;
+
+ allocator->summary_write_count++;
+ vdo_waitq_transfer_all_waiters(&block->next_update_waiters,
+ &block->current_update_waiters);
+ block->writing = true;
+
+ if (vdo_is_read_only(depot->vdo)) {
+ finish_updating_slab_summary_block(block);
+ return;
+ }
+
+ memcpy(block->outgoing_entries, block->entries, VDO_BLOCK_SIZE);
+
+ /*
+ * Flush before writing to ensure that the slab journal tail blocks and reference updates
+ * covered by this summary update are stable. Otherwise, a subsequent recovery could
+ * encounter a slab summary update that refers to a slab journal tail block that has not
+ * actually been written. In such cases, the slab journal referenced will be treated as
+ * empty, causing any data within the slab which predates the existing recovery journal
+ * entries to be lost.
+ */
+ pbn = (depot->summary_origin +
+ (VDO_SLAB_SUMMARY_BLOCKS_PER_ZONE * allocator->zone_number) +
+ block->index);
+ vdo_submit_metadata_vio(&block->vio, pbn, write_slab_summary_endio,
+ handle_write_error, REQ_OP_WRITE | REQ_PREFLUSH);
+}
+
+/**
+ * update_slab_summary_entry() - Update the entry for a slab.
+ * @slab: The slab whose entry is to be updated
+ * @waiter: The waiter that is updating the summary.
+ * @tail_block_offset: The offset of the slab journal's tail block.
+ * @load_ref_counts: Whether the reference counts must be loaded from disk on the vdo load.
+ * @is_clean: Whether the slab is clean.
+ * @free_blocks: The number of free blocks.
+ */
+static void update_slab_summary_entry(struct vdo_slab *slab, struct vdo_waiter *waiter,
+ tail_block_offset_t tail_block_offset,
+ bool load_ref_counts, bool is_clean,
+ block_count_t free_blocks)
+{
+ u8 index = slab->slab_number / VDO_SLAB_SUMMARY_ENTRIES_PER_BLOCK;
+ struct block_allocator *allocator = slab->allocator;
+ struct slab_summary_block *block = &allocator->summary_blocks[index];
+ int result;
+ struct slab_summary_entry *entry;
+
+ if (vdo_is_read_only(block->vio.completion.vdo)) {
+ result = VDO_READ_ONLY;
+ waiter->callback(waiter, &result);
+ return;
+ }
+
+ if (vdo_is_state_draining(&allocator->summary_state) ||
+ vdo_is_state_quiescent(&allocator->summary_state)) {
+ result = VDO_INVALID_ADMIN_STATE;
+ waiter->callback(waiter, &result);
+ return;
+ }
+
+ entry = &allocator->summary_entries[slab->slab_number];
+ *entry = (struct slab_summary_entry) {
+ .tail_block_offset = tail_block_offset,
+ .load_ref_counts = (entry->load_ref_counts || load_ref_counts),
+ .is_dirty = !is_clean,
+ .fullness_hint = compute_fullness_hint(allocator->depot, free_blocks),
+ };
+ vdo_waitq_enqueue_waiter(&block->next_update_waiters, waiter);
+ launch_write(block);
+}
+
+/**
+ * finish_reaping() - Actually advance the head of the journal now that any necessary flushes are
+ * complete.
+ * @journal: The journal to be reaped.
+ */
+static void finish_reaping(struct slab_journal *journal)
+{
+ journal->head = journal->unreapable;
+ add_entries(journal);
+ check_if_slab_drained(journal->slab);
+}
+
+static void reap_slab_journal(struct slab_journal *journal);
+
+/**
+ * complete_reaping() - Finish reaping now that we have flushed the lower layer and then try
+ * reaping again in case we deferred reaping due to an outstanding vio.
+ * @completion: The flush vio.
+ */
+static void complete_reaping(struct vdo_completion *completion)
+{
+ struct slab_journal *journal = completion->parent;
+
+ return_vio_to_pool(journal->slab->allocator->vio_pool,
+ vio_as_pooled_vio(as_vio(vdo_forget(completion))));
+ finish_reaping(journal);
+ reap_slab_journal(journal);
+}
+
+/**
+ * handle_flush_error() - Handle an error flushing the lower layer.
+ * @completion: The flush vio.
+ */
+static void handle_flush_error(struct vdo_completion *completion)
+{
+ vio_record_metadata_io_error(as_vio(completion));
+ vdo_enter_read_only_mode(completion->vdo, completion->result);
+ complete_reaping(completion);
+}
+
+static void flush_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct slab_journal *journal = vio->completion.parent;
+
+ continue_vio_after_io(vio, complete_reaping,
+ journal->slab->allocator->thread_id);
+}
+
+/**
+ * flush_for_reaping() - A waiter callback for getting a vio with which to flush the lower layer
+ * prior to reaping.
+ * @waiter: The journal as a flush waiter.
+ * @context: The newly acquired flush vio.
+ */
+static void flush_for_reaping(struct vdo_waiter *waiter, void *context)
+{
+ struct slab_journal *journal =
+ container_of(waiter, struct slab_journal, flush_waiter);
+ struct pooled_vio *pooled = context;
+ struct vio *vio = &pooled->vio;
+
+ vio->completion.parent = journal;
+ vdo_submit_flush_vio(vio, flush_endio, handle_flush_error);
+}
+
+/**
+ * reap_slab_journal() - Conduct a reap on a slab journal to reclaim unreferenced blocks.
+ * @journal: The slab journal.
+ */
+static void reap_slab_journal(struct slab_journal *journal)
+{
+ bool reaped = false;
+
+ if (is_reaping(journal)) {
+ /* We already have a reap in progress so wait for it to finish. */
+ return;
+ }
+
+ if ((journal->slab->status != VDO_SLAB_REBUILT) ||
+ !vdo_is_state_normal(&journal->slab->state) ||
+ vdo_is_read_only(journal->slab->allocator->depot->vdo)) {
+ /*
+ * We must not reap in the first two cases, and there's no point in read-only mode.
+ */
+ return;
+ }
+
+ /*
+ * Start reclaiming blocks only when the journal head has no references. Then stop when a
+ * block is referenced or reap reaches the most recently written block, referenced by the
+ * slab summary, which has the sequence number just before the tail.
+ */
+ while ((journal->unreapable < journal->tail) && (journal->reap_lock->count == 0)) {
+ reaped = true;
+ journal->unreapable++;
+ journal->reap_lock++;
+ if (journal->reap_lock == &journal->locks[journal->size])
+ journal->reap_lock = &journal->locks[0];
+ }
+
+ if (!reaped)
+ return;
+
+ /*
+ * It is never safe to reap a slab journal block without first issuing a flush, regardless
+ * of whether a user flush has been received or not. In the absence of the flush, the
+ * reference block write which released the locks allowing the slab journal to reap may not
+ * be persisted. Although slab summary writes will eventually issue flushes, multiple slab
+ * journal block writes can be issued while previous slab summary updates have not yet been
+ * made. Even though those slab journal block writes will be ignored if the slab summary
+ * update is not persisted, they may still overwrite the to-be-reaped slab journal block
+ * resulting in a loss of reference count updates.
+ */
+ journal->flush_waiter.callback = flush_for_reaping;
+ acquire_vio_from_pool(journal->slab->allocator->vio_pool,
+ &journal->flush_waiter);
+}
+
+/**
+ * adjust_slab_journal_block_reference() - Adjust the reference count for a slab journal block.
+ * @journal: The slab journal.
+ * @sequence_number: The journal sequence number of the referenced block.
+ * @adjustment: Amount to adjust the reference counter.
+ *
+ * Note that when the adjustment is negative, the slab journal will be reaped.
+ */
+static void adjust_slab_journal_block_reference(struct slab_journal *journal,
+ sequence_number_t sequence_number,
+ int adjustment)
+{
+ struct journal_lock *lock;
+
+ if (sequence_number == 0)
+ return;
+
+ if (journal->slab->status == VDO_SLAB_REPLAYING) {
+ /* Locks should not be used during offline replay. */
+ return;
+ }
+
+ VDO_ASSERT_LOG_ONLY((adjustment != 0), "adjustment must be non-zero");
+ lock = get_lock(journal, sequence_number);
+ if (adjustment < 0) {
+ VDO_ASSERT_LOG_ONLY((-adjustment <= lock->count),
+ "adjustment %d of lock count %u for slab journal block %llu must not underflow",
+ adjustment, lock->count,
+ (unsigned long long) sequence_number);
+ }
+
+ lock->count += adjustment;
+ if (lock->count == 0)
+ reap_slab_journal(journal);
+}
+
+/**
+ * release_journal_locks() - Callback invoked after a slab summary update completes.
+ * @waiter: The slab summary waiter that has just been notified.
+ * @context: The result code of the update.
+ *
+ * Registered in the constructor on behalf of update_tail_block_location().
+ *
+ * Implements waiter_callback_fn.
+ */
+static void release_journal_locks(struct vdo_waiter *waiter, void *context)
+{
+ sequence_number_t first, i;
+ struct slab_journal *journal =
+ container_of(waiter, struct slab_journal, slab_summary_waiter);
+ int result = *((int *) context);
+
+ if (result != VDO_SUCCESS) {
+ if (result != VDO_READ_ONLY) {
+ /*
+ * Don't bother logging what might be lots of errors if we are already in
+ * read-only mode.
+ */
+ vdo_log_error_strerror(result, "failed slab summary update %llu",
+ (unsigned long long) journal->summarized);
+ }
+
+ journal->updating_slab_summary = false;
+ vdo_enter_read_only_mode(journal->slab->allocator->depot->vdo, result);
+ check_if_slab_drained(journal->slab);
+ return;
+ }
+
+ if (journal->partial_write_in_progress && (journal->summarized == journal->tail)) {
+ journal->partial_write_in_progress = false;
+ add_entries(journal);
+ }
+
+ first = journal->last_summarized;
+ journal->last_summarized = journal->summarized;
+ for (i = journal->summarized - 1; i >= first; i--) {
+ /*
+ * Release the lock the summarized block held on the recovery journal. (During
+ * replay, recovery_start will always be 0.)
+ */
+ if (journal->recovery_journal != NULL) {
+ zone_count_t zone_number = journal->slab->allocator->zone_number;
+ struct journal_lock *lock = get_lock(journal, i);
+
+ vdo_release_recovery_journal_block_reference(journal->recovery_journal,
+ lock->recovery_start,
+ VDO_ZONE_TYPE_PHYSICAL,
+ zone_number);
+ }
+
+ /*
+ * Release our own lock against reaping for blocks that are committed. (This
+ * function will not change locks during replay.)
+ */
+ adjust_slab_journal_block_reference(journal, i, -1);
+ }
+
+ journal->updating_slab_summary = false;
+
+ reap_slab_journal(journal);
+
+ /* Check if the slab summary needs to be updated again. */
+ update_tail_block_location(journal);
+}
+
+/**
+ * update_tail_block_location() - Update the tail block location in the slab summary, if necessary.
+ * @journal: The slab journal that is updating its tail block location.
+ */
+static void update_tail_block_location(struct slab_journal *journal)
+{
+ block_count_t free_block_count;
+ struct vdo_slab *slab = journal->slab;
+
+ if (journal->updating_slab_summary ||
+ vdo_is_read_only(journal->slab->allocator->depot->vdo) ||
+ (journal->last_summarized >= journal->next_commit)) {
+ check_if_slab_drained(slab);
+ return;
+ }
+
+ if (slab->status != VDO_SLAB_REBUILT) {
+ u8 hint = slab->allocator->summary_entries[slab->slab_number].fullness_hint;
+
+ free_block_count = ((block_count_t) hint) << slab->allocator->depot->hint_shift;
+ } else {
+ free_block_count = slab->free_blocks;
+ }
+
+ journal->summarized = journal->next_commit;
+ journal->updating_slab_summary = true;
+
+ /*
+ * Update slab summary as dirty.
+ * vdo_slab journal can only reap past sequence number 1 when all the ref counts for this
+ * slab have been written to the layer. Therefore, indicate that the ref counts must be
+ * loaded when the journal head has reaped past sequence number 1.
+ */
+ update_slab_summary_entry(slab, &journal->slab_summary_waiter,
+ journal->summarized % journal->size,
+ (journal->head > 1), false, free_block_count);
+}
+
+/**
+ * reopen_slab_journal() - Reopen a slab's journal by emptying it and then adding pending entries.
+ */
+static void reopen_slab_journal(struct vdo_slab *slab)
+{
+ struct slab_journal *journal = &slab->journal;
+ sequence_number_t block;
+
+ VDO_ASSERT_LOG_ONLY(journal->tail_header.entry_count == 0,
+ "vdo_slab journal's active block empty before reopening");
+ journal->head = journal->tail;
+ initialize_journal_state(journal);
+
+ /* Ensure no locks are spuriously held on an empty journal. */
+ for (block = 1; block <= journal->size; block++) {
+ VDO_ASSERT_LOG_ONLY((get_lock(journal, block)->count == 0),
+ "Scrubbed journal's block %llu is not locked",
+ (unsigned long long) block);
+ }
+
+ add_entries(journal);
+}
+
+static sequence_number_t get_committing_sequence_number(const struct pooled_vio *vio)
+{
+ const struct packed_slab_journal_block *block =
+ (const struct packed_slab_journal_block *) vio->vio.data;
+
+ return __le64_to_cpu(block->header.sequence_number);
+}
+
+/**
+ * complete_write() - Handle post-commit processing.
+ * @completion: The write vio as a completion.
+ *
+ * This is the callback registered by write_slab_journal_block().
+ */
+static void complete_write(struct vdo_completion *completion)
+{
+ int result = completion->result;
+ struct pooled_vio *pooled = vio_as_pooled_vio(as_vio(completion));
+ struct slab_journal *journal = completion->parent;
+ sequence_number_t committed = get_committing_sequence_number(pooled);
+
+ list_del_init(&pooled->list_entry);
+ return_vio_to_pool(journal->slab->allocator->vio_pool, vdo_forget(pooled));
+
+ if (result != VDO_SUCCESS) {
+ vio_record_metadata_io_error(as_vio(completion));
+ vdo_log_error_strerror(result, "cannot write slab journal block %llu",
+ (unsigned long long) committed);
+ vdo_enter_read_only_mode(journal->slab->allocator->depot->vdo, result);
+ check_if_slab_drained(journal->slab);
+ return;
+ }
+
+ WRITE_ONCE(journal->events->blocks_written, journal->events->blocks_written + 1);
+
+ if (list_empty(&journal->uncommitted_blocks)) {
+ /* If no blocks are outstanding, then the commit point is at the tail. */
+ journal->next_commit = journal->tail;
+ } else {
+ /* The commit point is always the beginning of the oldest incomplete block. */
+ pooled = container_of(journal->uncommitted_blocks.next,
+ struct pooled_vio, list_entry);
+ journal->next_commit = get_committing_sequence_number(pooled);
+ }
+
+ update_tail_block_location(journal);
+}
+
+static void write_slab_journal_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct slab_journal *journal = vio->completion.parent;
+
+ continue_vio_after_io(vio, complete_write, journal->slab->allocator->thread_id);
+}
+
+/**
+ * write_slab_journal_block() - Write a slab journal block.
+ * @waiter: The vio pool waiter which was just notified.
+ * @context: The vio pool entry for the write.
+ *
+ * Callback from acquire_vio_from_pool() registered in commit_tail().
+ */
+static void write_slab_journal_block(struct vdo_waiter *waiter, void *context)
+{
+ struct pooled_vio *pooled = context;
+ struct vio *vio = &pooled->vio;
+ struct slab_journal *journal =
+ container_of(waiter, struct slab_journal, resource_waiter);
+ struct slab_journal_block_header *header = &journal->tail_header;
+ int unused_entries = journal->entries_per_block - header->entry_count;
+ physical_block_number_t block_number;
+ const struct admin_state_code *operation;
+
+ header->head = journal->head;
+ list_add_tail(&pooled->list_entry, &journal->uncommitted_blocks);
+ vdo_pack_slab_journal_block_header(header, &journal->block->header);
+
+ /* Copy the tail block into the vio. */
+ memcpy(pooled->vio.data, journal->block, VDO_BLOCK_SIZE);
+
+ VDO_ASSERT_LOG_ONLY(unused_entries >= 0, "vdo_slab journal block is not overfull");
+ if (unused_entries > 0) {
+ /*
+ * Release the per-entry locks for any unused entries in the block we are about to
+ * write.
+ */
+ adjust_slab_journal_block_reference(journal, header->sequence_number,
+ -unused_entries);
+ journal->partial_write_in_progress = !block_is_full(journal);
+ }
+
+ block_number = journal->slab->journal_origin +
+ (header->sequence_number % journal->size);
+ vio->completion.parent = journal;
+
+ /*
+ * This block won't be read in recovery until the slab summary is updated to refer to it.
+ * The slab summary update does a flush which is sufficient to protect us from corruption
+ * due to out of order slab journal, reference block, or block map writes.
+ */
+ vdo_submit_metadata_vio(vdo_forget(vio), block_number, write_slab_journal_endio,
+ complete_write, REQ_OP_WRITE);
+
+ /* Since the write is submitted, the tail block structure can be reused. */
+ journal->tail++;
+ initialize_tail_block(journal);
+ journal->waiting_to_commit = false;
+
+ operation = vdo_get_admin_state_code(&journal->slab->state);
+ if (operation == VDO_ADMIN_STATE_WAITING_FOR_RECOVERY) {
+ vdo_finish_operation(&journal->slab->state,
+ (vdo_is_read_only(journal->slab->allocator->depot->vdo) ?
+ VDO_READ_ONLY : VDO_SUCCESS));
+ return;
+ }
+
+ add_entries(journal);
+}
+
+/**
+ * commit_tail() - Commit the tail block of the slab journal.
+ * @journal: The journal whose tail block should be committed.
+ */
+static void commit_tail(struct slab_journal *journal)
+{
+ if ((journal->tail_header.entry_count == 0) && must_make_entries_to_flush(journal)) {
+ /*
+ * There are no entries at the moment, but there are some waiters, so defer
+ * initiating the flush until those entries are ready to write.
+ */
+ return;
+ }
+
+ if (vdo_is_read_only(journal->slab->allocator->depot->vdo) ||
+ journal->waiting_to_commit ||
+ (journal->tail_header.entry_count == 0)) {
+ /*
+ * There is nothing to do since the tail block is empty, or writing, or the journal
+ * is in read-only mode.
+ */
+ return;
+ }
+
+ /*
+ * Since we are about to commit the tail block, this journal no longer needs to be on the
+ * ring of journals which the recovery journal might ask to commit.
+ */
+ mark_slab_journal_clean(journal);
+
+ journal->waiting_to_commit = true;
+
+ journal->resource_waiter.callback = write_slab_journal_block;
+ acquire_vio_from_pool(journal->slab->allocator->vio_pool,
+ &journal->resource_waiter);
+}
+
+/**
+ * encode_slab_journal_entry() - Encode a slab journal entry.
+ * @tail_header: The unpacked header for the block.
+ * @payload: The journal block payload to hold the entry.
+ * @sbn: The slab block number of the entry to encode.
+ * @operation: The type of the entry.
+ * @increment: True if this is an increment.
+ *
+ * Exposed for unit tests.
+ */
+static void encode_slab_journal_entry(struct slab_journal_block_header *tail_header,
+ slab_journal_payload *payload,
+ slab_block_number sbn,
+ enum journal_operation operation,
+ bool increment)
+{
+ journal_entry_count_t entry_number = tail_header->entry_count++;
+
+ if (operation == VDO_JOURNAL_BLOCK_MAP_REMAPPING) {
+ if (!tail_header->has_block_map_increments) {
+ memset(payload->full_entries.entry_types, 0,
+ VDO_SLAB_JOURNAL_ENTRY_TYPES_SIZE);
+ tail_header->has_block_map_increments = true;
+ }
+
+ payload->full_entries.entry_types[entry_number / 8] |=
+ ((u8)1 << (entry_number % 8));
+ }
+
+ vdo_pack_slab_journal_entry(&payload->entries[entry_number], sbn, increment);
+}
+
+/**
+ * expand_journal_point() - Convert a recovery journal journal_point which refers to both an
+ * increment and a decrement to a single point which refers to one or the
+ * other.
+ * @recovery_point: The journal point to convert.
+ * @increment: Whether the current entry is an increment.
+ *
+ * Return: The expanded journal point
+ *
+ * Because each data_vio has but a single recovery journal point, but may need to make both
+ * increment and decrement entries in the same slab journal. In order to distinguish the two
+ * entries, the entry count of the expanded journal point is twice the actual recovery journal
+ * entry count for increments, and one more than that for decrements.
+ */
+static struct journal_point expand_journal_point(struct journal_point recovery_point,
+ bool increment)
+{
+ recovery_point.entry_count *= 2;
+ if (!increment)
+ recovery_point.entry_count++;
+
+ return recovery_point;
+}
+
+/**
+ * add_entry() - Actually add an entry to the slab journal, potentially firing off a write if a
+ * block becomes full.
+ * @journal: The slab journal to append to.
+ * @pbn: The pbn being adjusted.
+ * @operation: The type of entry to make.
+ * @increment: True if this is an increment.
+ * @recovery_point: The expanded recovery point.
+ *
+ * This function is synchronous.
+ */
+static void add_entry(struct slab_journal *journal, physical_block_number_t pbn,
+ enum journal_operation operation, bool increment,
+ struct journal_point recovery_point)
+{
+ struct packed_slab_journal_block *block = journal->block;
+ int result;
+
+ result = VDO_ASSERT(vdo_before_journal_point(&journal->tail_header.recovery_point,
+ &recovery_point),
+ "recovery journal point is monotonically increasing, recovery point: %llu.%u, block recovery point: %llu.%u",
+ (unsigned long long) recovery_point.sequence_number,
+ recovery_point.entry_count,
+ (unsigned long long) journal->tail_header.recovery_point.sequence_number,
+ journal->tail_header.recovery_point.entry_count);
+ if (result != VDO_SUCCESS) {
+ vdo_enter_read_only_mode(journal->slab->allocator->depot->vdo, result);
+ return;
+ }
+
+ if (operation == VDO_JOURNAL_BLOCK_MAP_REMAPPING) {
+ result = VDO_ASSERT((journal->tail_header.entry_count <
+ journal->full_entries_per_block),
+ "block has room for full entries");
+ if (result != VDO_SUCCESS) {
+ vdo_enter_read_only_mode(journal->slab->allocator->depot->vdo,
+ result);
+ return;
+ }
+ }
+
+ encode_slab_journal_entry(&journal->tail_header, &block->payload,
+ pbn - journal->slab->start, operation, increment);
+ journal->tail_header.recovery_point = recovery_point;
+ if (block_is_full(journal))
+ commit_tail(journal);
+}
+
+static inline block_count_t journal_length(const struct slab_journal *journal)
+{
+ return journal->tail - journal->head;
+}
+
+/**
+ * vdo_attempt_replay_into_slab() - Replay a recovery journal entry into a slab's journal.
+ * @slab: The slab to play into.
+ * @pbn: The PBN for the entry.
+ * @operation: The type of entry to add.
+ * @increment: True if this entry is an increment.
+ * @recovery_point: The recovery journal point corresponding to this entry.
+ * @parent: The completion to notify when there is space to add the entry if the entry could not be
+ * added immediately.
+ *
+ * Return: true if the entry was added immediately.
+ */
+bool vdo_attempt_replay_into_slab(struct vdo_slab *slab, physical_block_number_t pbn,
+ enum journal_operation operation, bool increment,
+ struct journal_point *recovery_point,
+ struct vdo_completion *parent)
+{
+ struct slab_journal *journal = &slab->journal;
+ struct slab_journal_block_header *header = &journal->tail_header;
+ struct journal_point expanded = expand_journal_point(*recovery_point, increment);
+
+ /* Only accept entries after the current recovery point. */
+ if (!vdo_before_journal_point(&journal->tail_header.recovery_point, &expanded))
+ return true;
+
+ if ((header->entry_count >= journal->full_entries_per_block) &&
+ (header->has_block_map_increments || (operation == VDO_JOURNAL_BLOCK_MAP_REMAPPING))) {
+ /*
+ * The tail block does not have room for the entry we are attempting to add so
+ * commit the tail block now.
+ */
+ commit_tail(journal);
+ }
+
+ if (journal->waiting_to_commit) {
+ vdo_start_operation_with_waiter(&journal->slab->state,
+ VDO_ADMIN_STATE_WAITING_FOR_RECOVERY,
+ parent, NULL);
+ return false;
+ }
+
+ if (journal_length(journal) >= journal->size) {
+ /*
+ * We must have reaped the current head before the crash, since the blocked
+ * threshold keeps us from having more entries than fit in a slab journal; hence we
+ * can just advance the head (and unreapable block), as needed.
+ */
+ journal->head++;
+ journal->unreapable++;
+ }
+
+ if (journal->slab->status == VDO_SLAB_REBUILT)
+ journal->slab->status = VDO_SLAB_REPLAYING;
+
+ add_entry(journal, pbn, operation, increment, expanded);
+ return true;
+}
+
+/**
+ * requires_reaping() - Check whether the journal must be reaped before adding new entries.
+ * @journal: The journal to check.
+ *
+ * Return: true if the journal must be reaped.
+ */
+static bool requires_reaping(const struct slab_journal *journal)
+{
+ return (journal_length(journal) >= journal->blocking_threshold);
+}
+
+/** finish_summary_update() - A waiter callback that resets the writing state of a slab. */
+static void finish_summary_update(struct vdo_waiter *waiter, void *context)
+{
+ struct vdo_slab *slab = container_of(waiter, struct vdo_slab, summary_waiter);
+ int result = *((int *) context);
+
+ slab->active_count--;
+
+ if ((result != VDO_SUCCESS) && (result != VDO_READ_ONLY)) {
+ vdo_log_error_strerror(result, "failed to update slab summary");
+ vdo_enter_read_only_mode(slab->allocator->depot->vdo, result);
+ }
+
+ check_if_slab_drained(slab);
+}
+
+static void write_reference_block(struct vdo_waiter *waiter, void *context);
+
+/**
+ * launch_reference_block_write() - Launch the write of a dirty reference block by first acquiring
+ * a VIO for it from the pool.
+ * @waiter: The waiter of the block which is starting to write.
+ * @context: The parent slab of the block.
+ *
+ * This can be asynchronous since the writer will have to wait if all VIOs in the pool are
+ * currently in use.
+ */
+static void launch_reference_block_write(struct vdo_waiter *waiter, void *context)
+{
+ struct vdo_slab *slab = context;
+
+ if (vdo_is_read_only(slab->allocator->depot->vdo))
+ return;
+
+ slab->active_count++;
+ container_of(waiter, struct reference_block, waiter)->is_writing = true;
+ waiter->callback = write_reference_block;
+ acquire_vio_from_pool(slab->allocator->vio_pool, waiter);
+}
+
+static void save_dirty_reference_blocks(struct vdo_slab *slab)
+{
+ vdo_waitq_notify_all_waiters(&slab->dirty_blocks,
+ launch_reference_block_write, slab);
+ check_if_slab_drained(slab);
+}
+
+/**
+ * finish_reference_block_write() - After a reference block has written, clean it, release its
+ * locks, and return its VIO to the pool.
+ * @completion: The VIO that just finished writing.
+ */
+static void finish_reference_block_write(struct vdo_completion *completion)
+{
+ struct vio *vio = as_vio(completion);
+ struct pooled_vio *pooled = vio_as_pooled_vio(vio);
+ struct reference_block *block = completion->parent;
+ struct vdo_slab *slab = block->slab;
+ tail_block_offset_t offset;
+
+ slab->active_count--;
+
+ /* Release the slab journal lock. */
+ adjust_slab_journal_block_reference(&slab->journal,
+ block->slab_journal_lock_to_release, -1);
+ return_vio_to_pool(slab->allocator->vio_pool, pooled);
+
+ /*
+ * We can't clear the is_writing flag earlier as releasing the slab journal lock may cause
+ * us to be dirtied again, but we don't want to double enqueue.
+ */
+ block->is_writing = false;
+
+ if (vdo_is_read_only(completion->vdo)) {
+ check_if_slab_drained(slab);
+ return;
+ }
+
+ /* Re-queue the block if it was re-dirtied while it was writing. */
+ if (block->is_dirty) {
+ vdo_waitq_enqueue_waiter(&block->slab->dirty_blocks, &block->waiter);
+ if (vdo_is_state_draining(&slab->state)) {
+ /* We must be saving, and this block will otherwise not be relaunched. */
+ save_dirty_reference_blocks(slab);
+ }
+
+ return;
+ }
+
+ /*
+ * Mark the slab as clean in the slab summary if there are no dirty or writing blocks
+ * and no summary update in progress.
+ */
+ if ((slab->active_count > 0) || vdo_waitq_has_waiters(&slab->dirty_blocks)) {
+ check_if_slab_drained(slab);
+ return;
+ }
+
+ offset = slab->allocator->summary_entries[slab->slab_number].tail_block_offset;
+ slab->active_count++;
+ slab->summary_waiter.callback = finish_summary_update;
+ update_slab_summary_entry(slab, &slab->summary_waiter, offset,
+ true, true, slab->free_blocks);
+}
+
+/**
+ * get_reference_counters_for_block() - Find the reference counters for a given block.
+ * @block: The reference_block in question.
+ *
+ * Return: A pointer to the reference counters for this block.
+ */
+static vdo_refcount_t * __must_check get_reference_counters_for_block(struct reference_block *block)
+{
+ size_t block_index = block - block->slab->reference_blocks;
+
+ return &block->slab->counters[block_index * COUNTS_PER_BLOCK];
+}
+
+/**
+ * pack_reference_block() - Copy data from a reference block to a buffer ready to be written out.
+ * @block: The block to copy.
+ * @buffer: The char buffer to fill with the packed block.
+ */
+static void pack_reference_block(struct reference_block *block, void *buffer)
+{
+ struct packed_reference_block *packed = buffer;
+ vdo_refcount_t *counters = get_reference_counters_for_block(block);
+ sector_count_t i;
+ struct packed_journal_point commit_point;
+
+ vdo_pack_journal_point(&block->slab->slab_journal_point, &commit_point);
+
+ for (i = 0; i < VDO_SECTORS_PER_BLOCK; i++) {
+ packed->sectors[i].commit_point = commit_point;
+ memcpy(packed->sectors[i].counts, counters + (i * COUNTS_PER_SECTOR),
+ (sizeof(vdo_refcount_t) * COUNTS_PER_SECTOR));
+ }
+}
+
+static void write_reference_block_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct reference_block *block = vio->completion.parent;
+ thread_id_t thread_id = block->slab->allocator->thread_id;
+
+ continue_vio_after_io(vio, finish_reference_block_write, thread_id);
+}
+
+/**
+ * handle_io_error() - Handle an I/O error reading or writing a reference count block.
+ * @completion: The VIO doing the I/O as a completion.
+ */
+static void handle_io_error(struct vdo_completion *completion)
+{
+ int result = completion->result;
+ struct vio *vio = as_vio(completion);
+ struct vdo_slab *slab = ((struct reference_block *) completion->parent)->slab;
+
+ vio_record_metadata_io_error(vio);
+ return_vio_to_pool(slab->allocator->vio_pool, vio_as_pooled_vio(vio));
+ slab->active_count--;
+ vdo_enter_read_only_mode(slab->allocator->depot->vdo, result);
+ check_if_slab_drained(slab);
+}
+
+/**
+ * write_reference_block() - After a dirty block waiter has gotten a VIO from the VIO pool, copy
+ * its counters and associated data into the VIO, and launch the write.
+ * @waiter: The waiter of the dirty block.
+ * @context: The VIO returned by the pool.
+ */
+static void write_reference_block(struct vdo_waiter *waiter, void *context)
+{
+ size_t block_offset;
+ physical_block_number_t pbn;
+ struct pooled_vio *pooled = context;
+ struct vdo_completion *completion = &pooled->vio.completion;
+ struct reference_block *block = container_of(waiter, struct reference_block,
+ waiter);
+
+ pack_reference_block(block, pooled->vio.data);
+ block_offset = (block - block->slab->reference_blocks);
+ pbn = (block->slab->ref_counts_origin + block_offset);
+ block->slab_journal_lock_to_release = block->slab_journal_lock;
+ completion->parent = block;
+
+ /*
+ * Mark the block as clean, since we won't be committing any updates that happen after this
+ * moment. As long as VIO order is preserved, two VIOs updating this block at once will not
+ * cause complications.
+ */
+ block->is_dirty = false;
+
+ /*
+ * Flush before writing to ensure that the recovery journal and slab journal entries which
+ * cover this reference update are stable. This prevents data corruption that can be caused
+ * by out of order writes.
+ */
+ WRITE_ONCE(block->slab->allocator->ref_counts_statistics.blocks_written,
+ block->slab->allocator->ref_counts_statistics.blocks_written + 1);
+
+ completion->callback_thread_id = ((struct block_allocator *) pooled->context)->thread_id;
+ vdo_submit_metadata_vio(&pooled->vio, pbn, write_reference_block_endio,
+ handle_io_error, REQ_OP_WRITE | REQ_PREFLUSH);
+}
+
+static void reclaim_journal_space(struct slab_journal *journal)
+{
+ block_count_t length = journal_length(journal);
+ struct vdo_slab *slab = journal->slab;
+ block_count_t write_count = vdo_waitq_num_waiters(&slab->dirty_blocks);
+ block_count_t written;
+
+ if ((length < journal->flushing_threshold) || (write_count == 0))
+ return;
+
+ /* The slab journal is over the first threshold, schedule some reference block writes. */
+ WRITE_ONCE(journal->events->flush_count, journal->events->flush_count + 1);
+ if (length < journal->flushing_deadline) {
+ /* Schedule more writes the closer to the deadline we get. */
+ write_count /= journal->flushing_deadline - length + 1;
+ write_count = max_t(block_count_t, write_count, 1);
+ }
+
+ for (written = 0; written < write_count; written++) {
+ vdo_waitq_notify_next_waiter(&slab->dirty_blocks,
+ launch_reference_block_write, slab);
+ }
+}
+
+/**
+ * reference_count_to_status() - Convert a reference count to a reference status.
+ * @count: The count to convert.
+ *
+ * Return: The appropriate reference status.
+ */
+static enum reference_status __must_check reference_count_to_status(vdo_refcount_t count)
+{
+ if (count == EMPTY_REFERENCE_COUNT)
+ return RS_FREE;
+ else if (count == 1)
+ return RS_SINGLE;
+ else if (count == PROVISIONAL_REFERENCE_COUNT)
+ return RS_PROVISIONAL;
+ else
+ return RS_SHARED;
+}
+
+/**
+ * dirty_block() - Mark a reference count block as dirty, potentially adding it to the dirty queue
+ * if it wasn't already dirty.
+ * @block: The reference block to mark as dirty.
+ */
+static void dirty_block(struct reference_block *block)
+{
+ if (block->is_dirty)
+ return;
+
+ block->is_dirty = true;
+ if (!block->is_writing)
+ vdo_waitq_enqueue_waiter(&block->slab->dirty_blocks, &block->waiter);
+}
+
+/**
+ * get_reference_block() - Get the reference block that covers the given block index.
+ */
+static struct reference_block * __must_check get_reference_block(struct vdo_slab *slab,
+ slab_block_number index)
+{
+ return &slab->reference_blocks[index / COUNTS_PER_BLOCK];
+}
+
+/**
+ * slab_block_number_from_pbn() - Determine the index within the slab of a particular physical
+ * block number.
+ * @slab: The slab.
+ * @physical_block_number: The physical block number.
+ * @slab_block_number_ptr: A pointer to the slab block number.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int __must_check slab_block_number_from_pbn(struct vdo_slab *slab,
+ physical_block_number_t pbn,
+ slab_block_number *slab_block_number_ptr)
+{
+ u64 slab_block_number;
+
+ if (pbn < slab->start)
+ return VDO_OUT_OF_RANGE;
+
+ slab_block_number = pbn - slab->start;
+ if (slab_block_number >= slab->allocator->depot->slab_config.data_blocks)
+ return VDO_OUT_OF_RANGE;
+
+ *slab_block_number_ptr = slab_block_number;
+ return VDO_SUCCESS;
+}
+
+/**
+ * get_reference_counter() - Get the reference counter that covers the given physical block number.
+ * @slab: The slab to query.
+ * @pbn: The physical block number.
+ * @counter_ptr: A pointer to the reference counter.
+ */
+static int __must_check get_reference_counter(struct vdo_slab *slab,
+ physical_block_number_t pbn,
+ vdo_refcount_t **counter_ptr)
+{
+ slab_block_number index;
+ int result = slab_block_number_from_pbn(slab, pbn, &index);
+
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *counter_ptr = &slab->counters[index];
+
+ return VDO_SUCCESS;
+}
+
+static unsigned int calculate_slab_priority(struct vdo_slab *slab)
+{
+ block_count_t free_blocks = slab->free_blocks;
+ unsigned int unopened_slab_priority = slab->allocator->unopened_slab_priority;
+ unsigned int priority;
+
+ /*
+ * Wholly full slabs must be the only ones with lowest priority, 0.
+ *
+ * Slabs that have never been opened (empty, newly initialized, and never been written to)
+ * have lower priority than previously opened slabs that have a significant number of free
+ * blocks. This ranking causes VDO to avoid writing physical blocks for the first time
+ * unless there are very few free blocks that have been previously written to.
+ *
+ * Since VDO doesn't discard blocks currently, reusing previously written blocks makes VDO
+ * a better client of any underlying storage that is thinly-provisioned (though discarding
+ * would be better).
+ *
+ * For all other slabs, the priority is derived from the logarithm of the number of free
+ * blocks. Slabs with the same order of magnitude of free blocks have the same priority.
+ * With 2^23 blocks, the priority will range from 1 to 25. The reserved
+ * unopened_slab_priority divides the range and is skipped by the logarithmic mapping.
+ */
+
+ if (free_blocks == 0)
+ return 0;
+
+ if (is_slab_journal_blank(slab))
+ return unopened_slab_priority;
+
+ priority = (1 + ilog2(free_blocks));
+ return ((priority < unopened_slab_priority) ? priority : priority + 1);
+}
+
+/*
+ * Slabs are essentially prioritized by an approximation of the number of free blocks in the slab
+ * so slabs with lots of free blocks will be opened for allocation before slabs that have few free
+ * blocks.
+ */
+static void prioritize_slab(struct vdo_slab *slab)
+{
+ VDO_ASSERT_LOG_ONLY(list_empty(&slab->allocq_entry),
+ "a slab must not already be on a ring when prioritizing");
+ slab->priority = calculate_slab_priority(slab);
+ vdo_priority_table_enqueue(slab->allocator->prioritized_slabs,
+ slab->priority, &slab->allocq_entry);
+}
+
+/**
+ * adjust_free_block_count() - Adjust the free block count and (if needed) reprioritize the slab.
+ * @incremented: true if the free block count went up.
+ */
+static void adjust_free_block_count(struct vdo_slab *slab, bool incremented)
+{
+ struct block_allocator *allocator = slab->allocator;
+
+ WRITE_ONCE(allocator->allocated_blocks,
+ allocator->allocated_blocks + (incremented ? -1 : 1));
+
+ /* The open slab doesn't need to be reprioritized until it is closed. */
+ if (slab == allocator->open_slab)
+ return;
+
+ /* Don't bother adjusting the priority table if unneeded. */
+ if (slab->priority == calculate_slab_priority(slab))
+ return;
+
+ /*
+ * Reprioritize the slab to reflect the new free block count by removing it from the table
+ * and re-enqueuing it with the new priority.
+ */
+ vdo_priority_table_remove(allocator->prioritized_slabs, &slab->allocq_entry);
+ prioritize_slab(slab);
+}
+
+/**
+ * increment_for_data() - Increment the reference count for a data block.
+ * @slab: The slab which owns the block.
+ * @block: The reference block which contains the block being updated.
+ * @block_number: The block to update.
+ * @old_status: The reference status of the data block before this increment.
+ * @lock: The pbn_lock associated with this increment (may be NULL).
+ * @counter_ptr: A pointer to the count for the data block (in, out).
+ * @adjust_block_count: Whether to update the allocator's free block count.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int increment_for_data(struct vdo_slab *slab, struct reference_block *block,
+ slab_block_number block_number,
+ enum reference_status old_status,
+ struct pbn_lock *lock, vdo_refcount_t *counter_ptr,
+ bool adjust_block_count)
+{
+ switch (old_status) {
+ case RS_FREE:
+ *counter_ptr = 1;
+ block->allocated_count++;
+ slab->free_blocks--;
+ if (adjust_block_count)
+ adjust_free_block_count(slab, false);
+
+ break;
+
+ case RS_PROVISIONAL:
+ *counter_ptr = 1;
+ break;
+
+ default:
+ /* Single or shared */
+ if (*counter_ptr >= MAXIMUM_REFERENCE_COUNT) {
+ return vdo_log_error_strerror(VDO_REF_COUNT_INVALID,
+ "Incrementing a block already having 254 references (slab %u, offset %u)",
+ slab->slab_number, block_number);
+ }
+ (*counter_ptr)++;
+ }
+
+ if (lock != NULL)
+ vdo_unassign_pbn_lock_provisional_reference(lock);
+ return VDO_SUCCESS;
+}
+
+/**
+ * decrement_for_data() - Decrement the reference count for a data block.
+ * @slab: The slab which owns the block.
+ * @block: The reference block which contains the block being updated.
+ * @block_number: The block to update.
+ * @old_status: The reference status of the data block before this decrement.
+ * @updater: The reference updater doing this operation in case we need to look up the pbn lock.
+ * @lock: The pbn_lock associated with the block being decremented (may be NULL).
+ * @counter_ptr: A pointer to the count for the data block (in, out).
+ * @adjust_block_count: Whether to update the allocator's free block count.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int decrement_for_data(struct vdo_slab *slab, struct reference_block *block,
+ slab_block_number block_number,
+ enum reference_status old_status,
+ struct reference_updater *updater,
+ vdo_refcount_t *counter_ptr, bool adjust_block_count)
+{
+ switch (old_status) {
+ case RS_FREE:
+ return vdo_log_error_strerror(VDO_REF_COUNT_INVALID,
+ "Decrementing free block at offset %u in slab %u",
+ block_number, slab->slab_number);
+
+ case RS_PROVISIONAL:
+ case RS_SINGLE:
+ if (updater->zpbn.zone != NULL) {
+ struct pbn_lock *lock = vdo_get_physical_zone_pbn_lock(updater->zpbn.zone,
+ updater->zpbn.pbn);
+
+ if (lock != NULL) {
+ /*
+ * There is a read lock on this block, so the block must not become
+ * unreferenced.
+ */
+ *counter_ptr = PROVISIONAL_REFERENCE_COUNT;
+ vdo_assign_pbn_lock_provisional_reference(lock);
+ break;
+ }
+ }
+
+ *counter_ptr = EMPTY_REFERENCE_COUNT;
+ block->allocated_count--;
+ slab->free_blocks++;
+ if (adjust_block_count)
+ adjust_free_block_count(slab, true);
+
+ break;
+
+ default:
+ /* Shared */
+ (*counter_ptr)--;
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * increment_for_block_map() - Increment the reference count for a block map page.
+ * @slab: The slab which owns the block.
+ * @block: The reference block which contains the block being updated.
+ * @block_number: The block to update.
+ * @old_status: The reference status of the block before this increment.
+ * @lock: The pbn_lock associated with this increment (may be NULL).
+ * @normal_operation: Whether we are in normal operation vs. recovery or rebuild.
+ * @counter_ptr: A pointer to the count for the block (in, out).
+ * @adjust_block_count: Whether to update the allocator's free block count.
+ *
+ * All block map increments should be from provisional to MAXIMUM_REFERENCE_COUNT. Since block map
+ * blocks never dedupe they should never be adjusted from any other state. The adjustment always
+ * results in MAXIMUM_REFERENCE_COUNT as this value is used to prevent dedupe against block map
+ * blocks.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int increment_for_block_map(struct vdo_slab *slab, struct reference_block *block,
+ slab_block_number block_number,
+ enum reference_status old_status,
+ struct pbn_lock *lock, bool normal_operation,
+ vdo_refcount_t *counter_ptr, bool adjust_block_count)
+{
+ switch (old_status) {
+ case RS_FREE:
+ if (normal_operation) {
+ return vdo_log_error_strerror(VDO_REF_COUNT_INVALID,
+ "Incrementing unallocated block map block (slab %u, offset %u)",
+ slab->slab_number, block_number);
+ }
+
+ *counter_ptr = MAXIMUM_REFERENCE_COUNT;
+ block->allocated_count++;
+ slab->free_blocks--;
+ if (adjust_block_count)
+ adjust_free_block_count(slab, false);
+
+ return VDO_SUCCESS;
+
+ case RS_PROVISIONAL:
+ if (!normal_operation)
+ return vdo_log_error_strerror(VDO_REF_COUNT_INVALID,
+ "Block map block had provisional reference during replay (slab %u, offset %u)",
+ slab->slab_number, block_number);
+
+ *counter_ptr = MAXIMUM_REFERENCE_COUNT;
+ if (lock != NULL)
+ vdo_unassign_pbn_lock_provisional_reference(lock);
+ return VDO_SUCCESS;
+
+ default:
+ return vdo_log_error_strerror(VDO_REF_COUNT_INVALID,
+ "Incrementing a block map block which is already referenced %u times (slab %u, offset %u)",
+ *counter_ptr, slab->slab_number,
+ block_number);
+ }
+}
+
+static bool __must_check is_valid_journal_point(const struct journal_point *point)
+{
+ return ((point != NULL) && (point->sequence_number > 0));
+}
+
+/**
+ * update_reference_count() - Update the reference count of a block.
+ * @slab: The slab which owns the block.
+ * @block: The reference block which contains the block being updated.
+ * @block_number: The block to update.
+ * @slab_journal_point: The slab journal point at which this update is journaled.
+ * @updater: The reference updater.
+ * @normal_operation: Whether we are in normal operation vs. recovery or rebuild.
+ * @adjust_block_count: Whether to update the slab's free block count.
+ * @provisional_decrement_ptr: A pointer which will be set to true if this update was a decrement
+ * of a provisional reference.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int update_reference_count(struct vdo_slab *slab, struct reference_block *block,
+ slab_block_number block_number,
+ const struct journal_point *slab_journal_point,
+ struct reference_updater *updater,
+ bool normal_operation, bool adjust_block_count,
+ bool *provisional_decrement_ptr)
+{
+ vdo_refcount_t *counter_ptr = &slab->counters[block_number];
+ enum reference_status old_status = reference_count_to_status(*counter_ptr);
+ int result;
+
+ if (!updater->increment) {
+ result = decrement_for_data(slab, block, block_number, old_status,
+ updater, counter_ptr, adjust_block_count);
+ if ((result == VDO_SUCCESS) && (old_status == RS_PROVISIONAL)) {
+ if (provisional_decrement_ptr != NULL)
+ *provisional_decrement_ptr = true;
+ return VDO_SUCCESS;
+ }
+ } else if (updater->operation == VDO_JOURNAL_DATA_REMAPPING) {
+ result = increment_for_data(slab, block, block_number, old_status,
+ updater->lock, counter_ptr, adjust_block_count);
+ } else {
+ result = increment_for_block_map(slab, block, block_number, old_status,
+ updater->lock, normal_operation,
+ counter_ptr, adjust_block_count);
+ }
+
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (is_valid_journal_point(slab_journal_point))
+ slab->slab_journal_point = *slab_journal_point;
+
+ return VDO_SUCCESS;
+}
+
+static int __must_check adjust_reference_count(struct vdo_slab *slab,
+ struct reference_updater *updater,
+ const struct journal_point *slab_journal_point)
+{
+ slab_block_number block_number;
+ int result;
+ struct reference_block *block;
+ bool provisional_decrement = false;
+
+ if (!is_slab_open(slab))
+ return VDO_INVALID_ADMIN_STATE;
+
+ result = slab_block_number_from_pbn(slab, updater->zpbn.pbn, &block_number);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ block = get_reference_block(slab, block_number);
+ result = update_reference_count(slab, block, block_number, slab_journal_point,
+ updater, NORMAL_OPERATION, true,
+ &provisional_decrement);
+ if ((result != VDO_SUCCESS) || provisional_decrement)
+ return result;
+
+ if (block->is_dirty && (block->slab_journal_lock > 0)) {
+ sequence_number_t entry_lock = slab_journal_point->sequence_number;
+ /*
+ * This block is already dirty and a slab journal entry has been made for it since
+ * the last time it was clean. We must release the per-entry slab journal lock for
+ * the entry associated with the update we are now doing.
+ */
+ result = VDO_ASSERT(is_valid_journal_point(slab_journal_point),
+ "Reference count adjustments need slab journal points.");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ adjust_slab_journal_block_reference(&slab->journal, entry_lock, -1);
+ return VDO_SUCCESS;
+ }
+
+ /*
+ * This may be the first time we are applying an update for which there is a slab journal
+ * entry to this block since the block was cleaned. Therefore, we convert the per-entry
+ * slab journal lock to an uncommitted reference block lock, if there is a per-entry lock.
+ */
+ if (is_valid_journal_point(slab_journal_point))
+ block->slab_journal_lock = slab_journal_point->sequence_number;
+ else
+ block->slab_journal_lock = 0;
+
+ dirty_block(block);
+ return VDO_SUCCESS;
+}
+
+/**
+ * add_entry_from_waiter() - Add an entry to the slab journal.
+ * @waiter: The vio which should make an entry now.
+ * @context: The slab journal to make an entry in.
+ *
+ * This callback is invoked by add_entries() once it has determined that we are ready to make
+ * another entry in the slab journal. Implements waiter_callback_fn.
+ */
+static void add_entry_from_waiter(struct vdo_waiter *waiter, void *context)
+{
+ int result;
+ struct reference_updater *updater =
+ container_of(waiter, struct reference_updater, waiter);
+ struct data_vio *data_vio = data_vio_from_reference_updater(updater);
+ struct slab_journal *journal = context;
+ struct slab_journal_block_header *header = &journal->tail_header;
+ struct journal_point slab_journal_point = {
+ .sequence_number = header->sequence_number,
+ .entry_count = header->entry_count,
+ };
+ sequence_number_t recovery_block = data_vio->recovery_journal_point.sequence_number;
+
+ if (header->entry_count == 0) {
+ /*
+ * This is the first entry in the current tail block, so get a lock on the recovery
+ * journal which we will hold until this tail block is committed.
+ */
+ get_lock(journal, header->sequence_number)->recovery_start = recovery_block;
+ if (journal->recovery_journal != NULL) {
+ zone_count_t zone_number = journal->slab->allocator->zone_number;
+
+ vdo_acquire_recovery_journal_block_reference(journal->recovery_journal,
+ recovery_block,
+ VDO_ZONE_TYPE_PHYSICAL,
+ zone_number);
+ }
+
+ mark_slab_journal_dirty(journal, recovery_block);
+ reclaim_journal_space(journal);
+ }
+
+ add_entry(journal, updater->zpbn.pbn, updater->operation, updater->increment,
+ expand_journal_point(data_vio->recovery_journal_point,
+ updater->increment));
+
+ if (journal->slab->status != VDO_SLAB_REBUILT) {
+ /*
+ * If the slab is unrecovered, scrubbing will take care of the count since the
+ * update is now recorded in the journal.
+ */
+ adjust_slab_journal_block_reference(journal,
+ slab_journal_point.sequence_number, -1);
+ result = VDO_SUCCESS;
+ } else {
+ /* Now that an entry has been made in the slab journal, update the counter. */
+ result = adjust_reference_count(journal->slab, updater,
+ &slab_journal_point);
+ }
+
+ if (updater->increment)
+ continue_data_vio_with_error(data_vio, result);
+ else
+ vdo_continue_completion(&data_vio->decrement_completion, result);
+}
+
+/**
+ * is_next_entry_a_block_map_increment() - Check whether the next entry to be made is a block map
+ * increment.
+ * @journal: The journal.
+ *
+ * Return: true if the first entry waiter's operation is a block map increment.
+ */
+static inline bool is_next_entry_a_block_map_increment(struct slab_journal *journal)
+{
+ struct vdo_waiter *waiter = vdo_waitq_get_first_waiter(&journal->entry_waiters);
+ struct reference_updater *updater =
+ container_of(waiter, struct reference_updater, waiter);
+
+ return (updater->operation == VDO_JOURNAL_BLOCK_MAP_REMAPPING);
+}
+
+/**
+ * add_entries() - Add as many entries as possible from the queue of vios waiting to make entries.
+ * @journal: The journal to which entries may be added.
+ *
+ * By processing the queue in order, we ensure that slab journal entries are made in the same order
+ * as recovery journal entries for the same increment or decrement.
+ */
+static void add_entries(struct slab_journal *journal)
+{
+ if (journal->adding_entries) {
+ /* Protect against re-entrancy. */
+ return;
+ }
+
+ journal->adding_entries = true;
+ while (vdo_waitq_has_waiters(&journal->entry_waiters)) {
+ struct slab_journal_block_header *header = &journal->tail_header;
+
+ if (journal->partial_write_in_progress ||
+ (journal->slab->status == VDO_SLAB_REBUILDING)) {
+ /*
+ * Don't add entries while rebuilding or while a partial write is
+ * outstanding, as it could result in reference count corruption.
+ */
+ break;
+ }
+
+ if (journal->waiting_to_commit) {
+ /*
+ * If we are waiting for resources to write the tail block, and the tail
+ * block is full, we can't make another entry.
+ */
+ WRITE_ONCE(journal->events->tail_busy_count,
+ journal->events->tail_busy_count + 1);
+ break;
+ } else if (is_next_entry_a_block_map_increment(journal) &&
+ (header->entry_count >= journal->full_entries_per_block)) {
+ /*
+ * The tail block does not have room for a block map increment, so commit
+ * it now.
+ */
+ commit_tail(journal);
+ if (journal->waiting_to_commit) {
+ WRITE_ONCE(journal->events->tail_busy_count,
+ journal->events->tail_busy_count + 1);
+ break;
+ }
+ }
+
+ /* If the slab is over the blocking threshold, make the vio wait. */
+ if (requires_reaping(journal)) {
+ WRITE_ONCE(journal->events->blocked_count,
+ journal->events->blocked_count + 1);
+ save_dirty_reference_blocks(journal->slab);
+ break;
+ }
+
+ if (header->entry_count == 0) {
+ struct journal_lock *lock =
+ get_lock(journal, header->sequence_number);
+
+ /*
+ * Check if the on disk slab journal is full. Because of the blocking and
+ * scrubbing thresholds, this should never happen.
+ */
+ if (lock->count > 0) {
+ VDO_ASSERT_LOG_ONLY((journal->head + journal->size) == journal->tail,
+ "New block has locks, but journal is not full");
+
+ /*
+ * The blocking threshold must let the journal fill up if the new
+ * block has locks; if the blocking threshold is smaller than the
+ * journal size, the new block cannot possibly have locks already.
+ */
+ VDO_ASSERT_LOG_ONLY((journal->blocking_threshold >= journal->size),
+ "New block can have locks already iff blocking threshold is at the end of the journal");
+
+ WRITE_ONCE(journal->events->disk_full_count,
+ journal->events->disk_full_count + 1);
+ save_dirty_reference_blocks(journal->slab);
+ break;
+ }
+
+ /*
+ * Don't allow the new block to be reaped until all of the reference count
+ * blocks are written and the journal block has been fully committed as
+ * well.
+ */
+ lock->count = journal->entries_per_block + 1;
+
+ if (header->sequence_number == 1) {
+ struct vdo_slab *slab = journal->slab;
+ block_count_t i;
+
+ /*
+ * This is the first entry in this slab journal, ever. Dirty all of
+ * the reference count blocks. Each will acquire a lock on the tail
+ * block so that the journal won't be reaped until the reference
+ * counts are initialized. The lock acquisition must be done by the
+ * ref_counts since here we don't know how many reference blocks
+ * the ref_counts has.
+ */
+ for (i = 0; i < slab->reference_block_count; i++) {
+ slab->reference_blocks[i].slab_journal_lock = 1;
+ dirty_block(&slab->reference_blocks[i]);
+ }
+
+ adjust_slab_journal_block_reference(journal, 1,
+ slab->reference_block_count);
+ }
+ }
+
+ vdo_waitq_notify_next_waiter(&journal->entry_waiters,
+ add_entry_from_waiter, journal);
+ }
+
+ journal->adding_entries = false;
+
+ /* If there are no waiters, and we are flushing or saving, commit the tail block. */
+ if (vdo_is_state_draining(&journal->slab->state) &&
+ !vdo_is_state_suspending(&journal->slab->state) &&
+ !vdo_waitq_has_waiters(&journal->entry_waiters))
+ commit_tail(journal);
+}
+
+/**
+ * reset_search_cursor() - Reset the free block search back to the first reference counter in the
+ * first reference block of a slab.
+ */
+static void reset_search_cursor(struct vdo_slab *slab)
+{
+ struct search_cursor *cursor = &slab->search_cursor;
+
+ cursor->block = cursor->first_block;
+ cursor->index = 0;
+ /* Unit tests have slabs with only one reference block (and it's a runt). */
+ cursor->end_index = min_t(u32, COUNTS_PER_BLOCK, slab->block_count);
+}
+
+/**
+ * advance_search_cursor() - Advance the search cursor to the start of the next reference block in
+ * a slab,
+ *
+ * Wraps around to the first reference block if the current block is the last reference block.
+ *
+ * Return: true unless the cursor was at the last reference block.
+ */
+static bool advance_search_cursor(struct vdo_slab *slab)
+{
+ struct search_cursor *cursor = &slab->search_cursor;
+
+ /*
+ * If we just finished searching the last reference block, then wrap back around to the
+ * start of the array.
+ */
+ if (cursor->block == cursor->last_block) {
+ reset_search_cursor(slab);
+ return false;
+ }
+
+ /* We're not already at the end, so advance to cursor to the next block. */
+ cursor->block++;
+ cursor->index = cursor->end_index;
+
+ if (cursor->block == cursor->last_block) {
+ /* The last reference block will usually be a runt. */
+ cursor->end_index = slab->block_count;
+ } else {
+ cursor->end_index += COUNTS_PER_BLOCK;
+ }
+
+ return true;
+}
+
+/**
+ * vdo_adjust_reference_count_for_rebuild() - Adjust the reference count of a block during rebuild.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_adjust_reference_count_for_rebuild(struct slab_depot *depot,
+ physical_block_number_t pbn,
+ enum journal_operation operation)
+{
+ int result;
+ slab_block_number block_number;
+ struct reference_block *block;
+ struct vdo_slab *slab = vdo_get_slab(depot, pbn);
+ struct reference_updater updater = {
+ .operation = operation,
+ .increment = true,
+ };
+
+ result = slab_block_number_from_pbn(slab, pbn, &block_number);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ block = get_reference_block(slab, block_number);
+ result = update_reference_count(slab, block, block_number, NULL,
+ &updater, !NORMAL_OPERATION, false, NULL);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ dirty_block(block);
+ return VDO_SUCCESS;
+}
+
+/**
+ * replay_reference_count_change() - Replay the reference count adjustment from a slab journal
+ * entry into the reference count for a block.
+ * @slab: The slab.
+ * @entry_point: The slab journal point for the entry.
+ * @entry: The slab journal entry being replayed.
+ *
+ * The adjustment will be ignored if it was already recorded in the reference count.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int replay_reference_count_change(struct vdo_slab *slab,
+ const struct journal_point *entry_point,
+ struct slab_journal_entry entry)
+{
+ int result;
+ struct reference_block *block = get_reference_block(slab, entry.sbn);
+ sector_count_t sector = (entry.sbn % COUNTS_PER_BLOCK) / COUNTS_PER_SECTOR;
+ struct reference_updater updater = {
+ .operation = entry.operation,
+ .increment = entry.increment,
+ };
+
+ if (!vdo_before_journal_point(&block->commit_points[sector], entry_point)) {
+ /* This entry is already reflected in the existing counts, so do nothing. */
+ return VDO_SUCCESS;
+ }
+
+ /* This entry is not yet counted in the reference counts. */
+ result = update_reference_count(slab, block, entry.sbn, entry_point,
+ &updater, !NORMAL_OPERATION, false, NULL);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ dirty_block(block);
+ return VDO_SUCCESS;
+}
+
+/**
+ * find_zero_byte_in_word() - Find the array index of the first zero byte in word-sized range of
+ * reference counters.
+ * @word_ptr: A pointer to the eight counter bytes to check.
+ * @start_index: The array index corresponding to word_ptr[0].
+ * @fail_index: The array index to return if no zero byte is found.
+ *
+ * The search does no bounds checking; the function relies on the array being sufficiently padded.
+ *
+ * Return: The array index of the first zero byte in the word, or the value passed as fail_index if
+ * no zero byte was found.
+ */
+static inline slab_block_number find_zero_byte_in_word(const u8 *word_ptr,
+ slab_block_number start_index,
+ slab_block_number fail_index)
+{
+ u64 word = get_unaligned_le64(word_ptr);
+
+ /* This looks like a loop, but GCC will unroll the eight iterations for us. */
+ unsigned int offset;
+
+ for (offset = 0; offset < BYTES_PER_WORD; offset++) {
+ /* Assumes little-endian byte order, which we have on X86. */
+ if ((word & 0xFF) == 0)
+ return (start_index + offset);
+ word >>= 8;
+ }
+
+ return fail_index;
+}
+
+/**
+ * find_free_block() - Find the first block with a reference count of zero in the specified
+ * range of reference counter indexes.
+ * @slab: The slab counters to scan.
+ * @index_ptr: A pointer to hold the array index of the free block.
+ *
+ * Exposed for unit testing.
+ *
+ * Return: true if a free block was found in the specified range.
+ */
+static bool find_free_block(const struct vdo_slab *slab, slab_block_number *index_ptr)
+{
+ slab_block_number zero_index;
+ slab_block_number next_index = slab->search_cursor.index;
+ slab_block_number end_index = slab->search_cursor.end_index;
+ u8 *next_counter = &slab->counters[next_index];
+ u8 *end_counter = &slab->counters[end_index];
+
+ /*
+ * Search every byte of the first unaligned word. (Array is padded so reading past end is
+ * safe.)
+ */
+ zero_index = find_zero_byte_in_word(next_counter, next_index, end_index);
+ if (zero_index < end_index) {
+ *index_ptr = zero_index;
+ return true;
+ }
+
+ /*
+ * On architectures where unaligned word access is expensive, this would be a good place to
+ * advance to an alignment boundary.
+ */
+ next_index += BYTES_PER_WORD;
+ next_counter += BYTES_PER_WORD;
+
+ /*
+ * Now we're word-aligned; check an word at a time until we find a word containing a zero.
+ * (Array is padded so reading past end is safe.)
+ */
+ while (next_counter < end_counter) {
+ /*
+ * The following code is currently an exact copy of the code preceding the loop,
+ * but if you try to merge them by using a do loop, it runs slower because a jump
+ * instruction gets added at the start of the iteration.
+ */
+ zero_index = find_zero_byte_in_word(next_counter, next_index, end_index);
+ if (zero_index < end_index) {
+ *index_ptr = zero_index;
+ return true;
+ }
+
+ next_index += BYTES_PER_WORD;
+ next_counter += BYTES_PER_WORD;
+ }
+
+ return false;
+}
+
+/**
+ * search_current_reference_block() - Search the reference block currently saved in the search
+ * cursor for a reference count of zero, starting at the saved
+ * counter index.
+ * @slab: The slab to search.
+ * @free_index_ptr: A pointer to receive the array index of the zero reference count.
+ *
+ * Return: true if an unreferenced counter was found.
+ */
+static bool search_current_reference_block(const struct vdo_slab *slab,
+ slab_block_number *free_index_ptr)
+{
+ /* Don't bother searching if the current block is known to be full. */
+ return ((slab->search_cursor.block->allocated_count < COUNTS_PER_BLOCK) &&
+ find_free_block(slab, free_index_ptr));
+}
+
+/**
+ * search_reference_blocks() - Search each reference block for a reference count of zero.
+ * @slab: The slab to search.
+ * @free_index_ptr: A pointer to receive the array index of the zero reference count.
+ *
+ * Searches each reference block for a reference count of zero, starting at the reference block and
+ * counter index saved in the search cursor and searching up to the end of the last reference
+ * block. The search does not wrap.
+ *
+ * Return: true if an unreferenced counter was found.
+ */
+static bool search_reference_blocks(struct vdo_slab *slab,
+ slab_block_number *free_index_ptr)
+{
+ /* Start searching at the saved search position in the current block. */
+ if (search_current_reference_block(slab, free_index_ptr))
+ return true;
+
+ /* Search each reference block up to the end of the slab. */
+ while (advance_search_cursor(slab)) {
+ if (search_current_reference_block(slab, free_index_ptr))
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * make_provisional_reference() - Do the bookkeeping for making a provisional reference.
+ */
+static void make_provisional_reference(struct vdo_slab *slab,
+ slab_block_number block_number)
+{
+ struct reference_block *block = get_reference_block(slab, block_number);
+
+ /*
+ * Make the initial transition from an unreferenced block to a
+ * provisionally allocated block.
+ */
+ slab->counters[block_number] = PROVISIONAL_REFERENCE_COUNT;
+
+ /* Account for the allocation. */
+ block->allocated_count++;
+ slab->free_blocks--;
+}
+
+/**
+ * dirty_all_reference_blocks() - Mark all reference count blocks in a slab as dirty.
+ */
+static void dirty_all_reference_blocks(struct vdo_slab *slab)
+{
+ block_count_t i;
+
+ for (i = 0; i < slab->reference_block_count; i++)
+ dirty_block(&slab->reference_blocks[i]);
+}
+
+/**
+ * clear_provisional_references() - Clear the provisional reference counts from a reference block.
+ * @block: The block to clear.
+ */
+static void clear_provisional_references(struct reference_block *block)
+{
+ vdo_refcount_t *counters = get_reference_counters_for_block(block);
+ block_count_t j;
+
+ for (j = 0; j < COUNTS_PER_BLOCK; j++) {
+ if (counters[j] == PROVISIONAL_REFERENCE_COUNT) {
+ counters[j] = EMPTY_REFERENCE_COUNT;
+ block->allocated_count--;
+ }
+ }
+}
+
+static inline bool journal_points_equal(struct journal_point first,
+ struct journal_point second)
+{
+ return ((first.sequence_number == second.sequence_number) &&
+ (first.entry_count == second.entry_count));
+}
+
+/**
+ * unpack_reference_block() - Unpack reference counts blocks into the internal memory structure.
+ * @packed: The written reference block to be unpacked.
+ * @block: The internal reference block to be loaded.
+ */
+static void unpack_reference_block(struct packed_reference_block *packed,
+ struct reference_block *block)
+{
+ block_count_t index;
+ sector_count_t i;
+ struct vdo_slab *slab = block->slab;
+ vdo_refcount_t *counters = get_reference_counters_for_block(block);
+
+ for (i = 0; i < VDO_SECTORS_PER_BLOCK; i++) {
+ struct packed_reference_sector *sector = &packed->sectors[i];
+
+ vdo_unpack_journal_point(&sector->commit_point, &block->commit_points[i]);
+ memcpy(counters + (i * COUNTS_PER_SECTOR), sector->counts,
+ (sizeof(vdo_refcount_t) * COUNTS_PER_SECTOR));
+ /* The slab_journal_point must be the latest point found in any sector. */
+ if (vdo_before_journal_point(&slab->slab_journal_point,
+ &block->commit_points[i]))
+ slab->slab_journal_point = block->commit_points[i];
+
+ if ((i > 0) &&
+ !journal_points_equal(block->commit_points[0],
+ block->commit_points[i])) {
+ size_t block_index = block - block->slab->reference_blocks;
+
+ vdo_log_warning("Torn write detected in sector %u of reference block %zu of slab %u",
+ i, block_index, block->slab->slab_number);
+ }
+ }
+
+ block->allocated_count = 0;
+ for (index = 0; index < COUNTS_PER_BLOCK; index++) {
+ if (counters[index] != EMPTY_REFERENCE_COUNT)
+ block->allocated_count++;
+ }
+}
+
+/**
+ * finish_reference_block_load() - After a reference block has been read, unpack it.
+ * @completion: The VIO that just finished reading.
+ */
+static void finish_reference_block_load(struct vdo_completion *completion)
+{
+ struct vio *vio = as_vio(completion);
+ struct pooled_vio *pooled = vio_as_pooled_vio(vio);
+ struct reference_block *block = completion->parent;
+ struct vdo_slab *slab = block->slab;
+
+ unpack_reference_block((struct packed_reference_block *) vio->data, block);
+ return_vio_to_pool(slab->allocator->vio_pool, pooled);
+ slab->active_count--;
+ clear_provisional_references(block);
+
+ slab->free_blocks -= block->allocated_count;
+ check_if_slab_drained(slab);
+}
+
+static void load_reference_block_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct reference_block *block = vio->completion.parent;
+
+ continue_vio_after_io(vio, finish_reference_block_load,
+ block->slab->allocator->thread_id);
+}
+
+/**
+ * load_reference_block() - After a block waiter has gotten a VIO from the VIO pool, load the
+ * block.
+ * @waiter: The waiter of the block to load.
+ * @context: The VIO returned by the pool.
+ */
+static void load_reference_block(struct vdo_waiter *waiter, void *context)
+{
+ struct pooled_vio *pooled = context;
+ struct vio *vio = &pooled->vio;
+ struct reference_block *block =
+ container_of(waiter, struct reference_block, waiter);
+ size_t block_offset = (block - block->slab->reference_blocks);
+
+ vio->completion.parent = block;
+ vdo_submit_metadata_vio(vio, block->slab->ref_counts_origin + block_offset,
+ load_reference_block_endio, handle_io_error,
+ REQ_OP_READ);
+}
+
+/**
+ * load_reference_blocks() - Load a slab's reference blocks from the underlying storage into a
+ * pre-allocated reference counter.
+ */
+static void load_reference_blocks(struct vdo_slab *slab)
+{
+ block_count_t i;
+
+ slab->free_blocks = slab->block_count;
+ slab->active_count = slab->reference_block_count;
+ for (i = 0; i < slab->reference_block_count; i++) {
+ struct vdo_waiter *waiter = &slab->reference_blocks[i].waiter;
+
+ waiter->callback = load_reference_block;
+ acquire_vio_from_pool(slab->allocator->vio_pool, waiter);
+ }
+}
+
+/**
+ * drain_slab() - Drain all reference count I/O.
+ *
+ * Depending upon the type of drain being performed (as recorded in the ref_count's vdo_slab), the
+ * reference blocks may be loaded from disk or dirty reference blocks may be written out.
+ */
+static void drain_slab(struct vdo_slab *slab)
+{
+ bool save;
+ bool load;
+ const struct admin_state_code *state = vdo_get_admin_state_code(&slab->state);
+
+ if (state == VDO_ADMIN_STATE_SUSPENDING)
+ return;
+
+ if ((state != VDO_ADMIN_STATE_REBUILDING) &&
+ (state != VDO_ADMIN_STATE_SAVE_FOR_SCRUBBING))
+ commit_tail(&slab->journal);
+
+ if ((state == VDO_ADMIN_STATE_RECOVERING) || (slab->counters == NULL))
+ return;
+
+ save = false;
+ load = slab->allocator->summary_entries[slab->slab_number].load_ref_counts;
+ if (state == VDO_ADMIN_STATE_SCRUBBING) {
+ if (load) {
+ load_reference_blocks(slab);
+ return;
+ }
+ } else if (state == VDO_ADMIN_STATE_SAVE_FOR_SCRUBBING) {
+ if (!load) {
+ /* These reference counts were never written, so mark them all dirty. */
+ dirty_all_reference_blocks(slab);
+ }
+ save = true;
+ } else if (state == VDO_ADMIN_STATE_REBUILDING) {
+ /*
+ * Write out the counters if the slab has written them before, or it has any
+ * non-zero reference counts, or there are any slab journal blocks.
+ */
+ block_count_t data_blocks = slab->allocator->depot->slab_config.data_blocks;
+
+ if (load || (slab->free_blocks != data_blocks) ||
+ !is_slab_journal_blank(slab)) {
+ dirty_all_reference_blocks(slab);
+ save = true;
+ }
+ } else if (state == VDO_ADMIN_STATE_SAVING) {
+ save = (slab->status == VDO_SLAB_REBUILT);
+ } else {
+ vdo_finish_draining_with_result(&slab->state, VDO_SUCCESS);
+ return;
+ }
+
+ if (save)
+ save_dirty_reference_blocks(slab);
+}
+
+static int allocate_slab_counters(struct vdo_slab *slab)
+{
+ int result;
+ size_t index, bytes;
+
+ result = VDO_ASSERT(slab->reference_blocks == NULL,
+ "vdo_slab %u doesn't allocate refcounts twice",
+ slab->slab_number);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(slab->reference_block_count, struct reference_block,
+ __func__, &slab->reference_blocks);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /*
+ * Allocate such that the runt slab has a full-length memory array, plus a little padding
+ * so we can word-search even at the very end.
+ */
+ bytes = (slab->reference_block_count * COUNTS_PER_BLOCK) + (2 * BYTES_PER_WORD);
+ result = vdo_allocate(bytes, vdo_refcount_t, "ref counts array",
+ &slab->counters);
+ if (result != VDO_SUCCESS) {
+ vdo_free(vdo_forget(slab->reference_blocks));
+ return result;
+ }
+
+ slab->search_cursor.first_block = slab->reference_blocks;
+ slab->search_cursor.last_block = &slab->reference_blocks[slab->reference_block_count - 1];
+ reset_search_cursor(slab);
+
+ for (index = 0; index < slab->reference_block_count; index++) {
+ slab->reference_blocks[index] = (struct reference_block) {
+ .slab = slab,
+ };
+ }
+
+ return VDO_SUCCESS;
+}
+
+static int allocate_counters_if_clean(struct vdo_slab *slab)
+{
+ if (vdo_is_state_clean_load(&slab->state))
+ return allocate_slab_counters(slab);
+
+ return VDO_SUCCESS;
+}
+
+static void finish_loading_journal(struct vdo_completion *completion)
+{
+ struct vio *vio = as_vio(completion);
+ struct slab_journal *journal = completion->parent;
+ struct vdo_slab *slab = journal->slab;
+ struct packed_slab_journal_block *block = (struct packed_slab_journal_block *) vio->data;
+ struct slab_journal_block_header header;
+
+ vdo_unpack_slab_journal_block_header(&block->header, &header);
+
+ /* FIXME: should it be an error if the following conditional fails? */
+ if ((header.metadata_type == VDO_METADATA_SLAB_JOURNAL) &&
+ (header.nonce == slab->allocator->nonce)) {
+ journal->tail = header.sequence_number + 1;
+
+ /*
+ * If the slab is clean, this implies the slab journal is empty, so advance the
+ * head appropriately.
+ */
+ journal->head = (slab->allocator->summary_entries[slab->slab_number].is_dirty ?
+ header.head : journal->tail);
+ journal->tail_header = header;
+ initialize_journal_state(journal);
+ }
+
+ return_vio_to_pool(slab->allocator->vio_pool, vio_as_pooled_vio(vio));
+ vdo_finish_loading_with_result(&slab->state, allocate_counters_if_clean(slab));
+}
+
+static void read_slab_journal_tail_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct slab_journal *journal = vio->completion.parent;
+
+ continue_vio_after_io(vio, finish_loading_journal,
+ journal->slab->allocator->thread_id);
+}
+
+static void handle_load_error(struct vdo_completion *completion)
+{
+ int result = completion->result;
+ struct slab_journal *journal = completion->parent;
+ struct vio *vio = as_vio(completion);
+
+ vio_record_metadata_io_error(vio);
+ return_vio_to_pool(journal->slab->allocator->vio_pool, vio_as_pooled_vio(vio));
+ vdo_finish_loading_with_result(&journal->slab->state, result);
+}
+
+/**
+ * read_slab_journal_tail() - Read the slab journal tail block by using a vio acquired from the vio
+ * pool.
+ * @waiter: The vio pool waiter which has just been notified.
+ * @context: The vio pool entry given to the waiter.
+ *
+ * This is the success callback from acquire_vio_from_pool() when loading a slab journal.
+ */
+static void read_slab_journal_tail(struct vdo_waiter *waiter, void *context)
+{
+ struct slab_journal *journal =
+ container_of(waiter, struct slab_journal, resource_waiter);
+ struct vdo_slab *slab = journal->slab;
+ struct pooled_vio *pooled = context;
+ struct vio *vio = &pooled->vio;
+ tail_block_offset_t last_commit_point =
+ slab->allocator->summary_entries[slab->slab_number].tail_block_offset;
+
+ /*
+ * Slab summary keeps the commit point offset, so the tail block is the block before that.
+ * Calculation supports small journals in unit tests.
+ */
+ tail_block_offset_t tail_block = ((last_commit_point == 0) ?
+ (tail_block_offset_t)(journal->size - 1) :
+ (last_commit_point - 1));
+
+ vio->completion.parent = journal;
+ vio->completion.callback_thread_id = slab->allocator->thread_id;
+ vdo_submit_metadata_vio(vio, slab->journal_origin + tail_block,
+ read_slab_journal_tail_endio, handle_load_error,
+ REQ_OP_READ);
+}
+
+/**
+ * load_slab_journal() - Load a slab's journal by reading the journal's tail.
+ */
+static void load_slab_journal(struct vdo_slab *slab)
+{
+ struct slab_journal *journal = &slab->journal;
+ tail_block_offset_t last_commit_point;
+
+ last_commit_point = slab->allocator->summary_entries[slab->slab_number].tail_block_offset;
+ if ((last_commit_point == 0) &&
+ !slab->allocator->summary_entries[slab->slab_number].load_ref_counts) {
+ /*
+ * This slab claims that it has a tail block at (journal->size - 1), but a head of
+ * 1. This is impossible, due to the scrubbing threshold, on a real system, so
+ * don't bother reading the (bogus) data off disk.
+ */
+ VDO_ASSERT_LOG_ONLY(((journal->size < 16) ||
+ (journal->scrubbing_threshold < (journal->size - 1))),
+ "Scrubbing threshold protects against reads of unwritten slab journal blocks");
+ vdo_finish_loading_with_result(&slab->state,
+ allocate_counters_if_clean(slab));
+ return;
+ }
+
+ journal->resource_waiter.callback = read_slab_journal_tail;
+ acquire_vio_from_pool(slab->allocator->vio_pool, &journal->resource_waiter);
+}
+
+static void register_slab_for_scrubbing(struct vdo_slab *slab, bool high_priority)
+{
+ struct slab_scrubber *scrubber = &slab->allocator->scrubber;
+
+ VDO_ASSERT_LOG_ONLY((slab->status != VDO_SLAB_REBUILT),
+ "slab to be scrubbed is unrecovered");
+
+ if (slab->status != VDO_SLAB_REQUIRES_SCRUBBING)
+ return;
+
+ list_del_init(&slab->allocq_entry);
+ if (!slab->was_queued_for_scrubbing) {
+ WRITE_ONCE(scrubber->slab_count, scrubber->slab_count + 1);
+ slab->was_queued_for_scrubbing = true;
+ }
+
+ if (high_priority) {
+ slab->status = VDO_SLAB_REQUIRES_HIGH_PRIORITY_SCRUBBING;
+ list_add_tail(&slab->allocq_entry, &scrubber->high_priority_slabs);
+ return;
+ }
+
+ list_add_tail(&slab->allocq_entry, &scrubber->slabs);
+}
+
+/* Queue a slab for allocation or scrubbing. */
+static void queue_slab(struct vdo_slab *slab)
+{
+ struct block_allocator *allocator = slab->allocator;
+ block_count_t free_blocks;
+ int result;
+
+ VDO_ASSERT_LOG_ONLY(list_empty(&slab->allocq_entry),
+ "a requeued slab must not already be on a ring");
+
+ if (vdo_is_read_only(allocator->depot->vdo))
+ return;
+
+ free_blocks = slab->free_blocks;
+ result = VDO_ASSERT((free_blocks <= allocator->depot->slab_config.data_blocks),
+ "rebuilt slab %u must have a valid free block count (has %llu, expected maximum %llu)",
+ slab->slab_number, (unsigned long long) free_blocks,
+ (unsigned long long) allocator->depot->slab_config.data_blocks);
+ if (result != VDO_SUCCESS) {
+ vdo_enter_read_only_mode(allocator->depot->vdo, result);
+ return;
+ }
+
+ if (slab->status != VDO_SLAB_REBUILT) {
+ register_slab_for_scrubbing(slab, false);
+ return;
+ }
+
+ if (!vdo_is_state_resuming(&slab->state)) {
+ /*
+ * If the slab is resuming, we've already accounted for it here, so don't do it
+ * again.
+ * FIXME: under what situation would the slab be resuming here?
+ */
+ WRITE_ONCE(allocator->allocated_blocks,
+ allocator->allocated_blocks - free_blocks);
+ if (!is_slab_journal_blank(slab)) {
+ WRITE_ONCE(allocator->statistics.slabs_opened,
+ allocator->statistics.slabs_opened + 1);
+ }
+ }
+
+ if (allocator->depot->vdo->suspend_type == VDO_ADMIN_STATE_SAVING)
+ reopen_slab_journal(slab);
+
+ prioritize_slab(slab);
+}
+
+/**
+ * initiate_slab_action() - Initiate a slab action.
+ *
+ * Implements vdo_admin_initiator_fn.
+ */
+static void initiate_slab_action(struct admin_state *state)
+{
+ struct vdo_slab *slab = container_of(state, struct vdo_slab, state);
+
+ if (vdo_is_state_draining(state)) {
+ const struct admin_state_code *operation = vdo_get_admin_state_code(state);
+
+ if (operation == VDO_ADMIN_STATE_SCRUBBING)
+ slab->status = VDO_SLAB_REBUILDING;
+
+ drain_slab(slab);
+ check_if_slab_drained(slab);
+ return;
+ }
+
+ if (vdo_is_state_loading(state)) {
+ load_slab_journal(slab);
+ return;
+ }
+
+ if (vdo_is_state_resuming(state)) {
+ queue_slab(slab);
+ vdo_finish_resuming(state);
+ return;
+ }
+
+ vdo_finish_operation(state, VDO_INVALID_ADMIN_STATE);
+}
+
+/**
+ * get_next_slab() - Get the next slab to scrub.
+ * @scrubber: The slab scrubber.
+ *
+ * Return: The next slab to scrub or NULL if there are none.
+ */
+static struct vdo_slab *get_next_slab(struct slab_scrubber *scrubber)
+{
+ struct vdo_slab *slab;
+
+ slab = list_first_entry_or_null(&scrubber->high_priority_slabs,
+ struct vdo_slab, allocq_entry);
+ if (slab != NULL)
+ return slab;
+
+ return list_first_entry_or_null(&scrubber->slabs, struct vdo_slab,
+ allocq_entry);
+}
+
+/**
+ * has_slabs_to_scrub() - Check whether a scrubber has slabs to scrub.
+ * @scrubber: The scrubber to check.
+ *
+ * Return: true if the scrubber has slabs to scrub.
+ */
+static inline bool __must_check has_slabs_to_scrub(struct slab_scrubber *scrubber)
+{
+ return (get_next_slab(scrubber) != NULL);
+}
+
+/**
+ * uninitialize_scrubber_vio() - Clean up the slab_scrubber's vio.
+ * @scrubber: The scrubber.
+ */
+static void uninitialize_scrubber_vio(struct slab_scrubber *scrubber)
+{
+ vdo_free(vdo_forget(scrubber->vio.data));
+ free_vio_components(&scrubber->vio);
+}
+
+/**
+ * finish_scrubbing() - Stop scrubbing, either because there are no more slabs to scrub or because
+ * there's been an error.
+ * @scrubber: The scrubber.
+ */
+static void finish_scrubbing(struct slab_scrubber *scrubber, int result)
+{
+ bool notify = vdo_waitq_has_waiters(&scrubber->waiters);
+ bool done = !has_slabs_to_scrub(scrubber);
+ struct block_allocator *allocator =
+ container_of(scrubber, struct block_allocator, scrubber);
+
+ if (done)
+ uninitialize_scrubber_vio(scrubber);
+
+ if (scrubber->high_priority_only) {
+ scrubber->high_priority_only = false;
+ vdo_fail_completion(vdo_forget(scrubber->vio.completion.parent), result);
+ } else if (done && (atomic_add_return(-1, &allocator->depot->zones_to_scrub) == 0)) {
+ /* All of our slabs were scrubbed, and we're the last allocator to finish. */
+ enum vdo_state prior_state =
+ atomic_cmpxchg(&allocator->depot->vdo->state, VDO_RECOVERING,
+ VDO_DIRTY);
+
+ /*
+ * To be safe, even if the CAS failed, ensure anything that follows is ordered with
+ * respect to whatever state change did happen.
+ */
+ smp_mb__after_atomic();
+
+ /*
+ * We must check the VDO state here and not the depot's read_only_notifier since
+ * the compare-swap-above could have failed due to a read-only entry which our own
+ * thread does not yet know about.
+ */
+ if (prior_state == VDO_DIRTY)
+ vdo_log_info("VDO commencing normal operation");
+ else if (prior_state == VDO_RECOVERING)
+ vdo_log_info("Exiting recovery mode");
+ }
+
+ /*
+ * Note that the scrubber has stopped, and inform anyone who might be waiting for that to
+ * happen.
+ */
+ if (!vdo_finish_draining(&scrubber->admin_state))
+ WRITE_ONCE(scrubber->admin_state.current_state,
+ VDO_ADMIN_STATE_SUSPENDED);
+
+ /*
+ * We can't notify waiters until after we've finished draining or they'll just requeue.
+ * Fortunately if there were waiters, we can't have been freed yet.
+ */
+ if (notify)
+ vdo_waitq_notify_all_waiters(&scrubber->waiters, NULL, NULL);
+}
+
+static void scrub_next_slab(struct slab_scrubber *scrubber);
+
+/**
+ * slab_scrubbed() - Notify the scrubber that a slab has been scrubbed.
+ * @completion: The slab rebuild completion.
+ *
+ * This callback is registered in apply_journal_entries().
+ */
+static void slab_scrubbed(struct vdo_completion *completion)
+{
+ struct slab_scrubber *scrubber =
+ container_of(as_vio(completion), struct slab_scrubber, vio);
+ struct vdo_slab *slab = scrubber->slab;
+
+ slab->status = VDO_SLAB_REBUILT;
+ queue_slab(slab);
+ reopen_slab_journal(slab);
+ WRITE_ONCE(scrubber->slab_count, scrubber->slab_count - 1);
+ scrub_next_slab(scrubber);
+}
+
+/**
+ * abort_scrubbing() - Abort scrubbing due to an error.
+ * @scrubber: The slab scrubber.
+ * @result: The error.
+ */
+static void abort_scrubbing(struct slab_scrubber *scrubber, int result)
+{
+ vdo_enter_read_only_mode(scrubber->vio.completion.vdo, result);
+ finish_scrubbing(scrubber, result);
+}
+
+/**
+ * handle_scrubber_error() - Handle errors while rebuilding a slab.
+ * @completion: The slab rebuild completion.
+ */
+static void handle_scrubber_error(struct vdo_completion *completion)
+{
+ struct vio *vio = as_vio(completion);
+
+ vio_record_metadata_io_error(vio);
+ abort_scrubbing(container_of(vio, struct slab_scrubber, vio),
+ completion->result);
+}
+
+/**
+ * apply_block_entries() - Apply all the entries in a block to the reference counts.
+ * @block: A block with entries to apply.
+ * @entry_count: The number of entries to apply.
+ * @block_number: The sequence number of the block.
+ * @slab: The slab to apply the entries to.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int apply_block_entries(struct packed_slab_journal_block *block,
+ journal_entry_count_t entry_count,
+ sequence_number_t block_number, struct vdo_slab *slab)
+{
+ struct journal_point entry_point = {
+ .sequence_number = block_number,
+ .entry_count = 0,
+ };
+ int result;
+ slab_block_number max_sbn = slab->end - slab->start;
+
+ while (entry_point.entry_count < entry_count) {
+ struct slab_journal_entry entry =
+ vdo_decode_slab_journal_entry(block, entry_point.entry_count);
+
+ if (entry.sbn > max_sbn) {
+ /* This entry is out of bounds. */
+ return vdo_log_error_strerror(VDO_CORRUPT_JOURNAL,
+ "vdo_slab journal entry (%llu, %u) had invalid offset %u in slab (size %u blocks)",
+ (unsigned long long) block_number,
+ entry_point.entry_count,
+ entry.sbn, max_sbn);
+ }
+
+ result = replay_reference_count_change(slab, &entry_point, entry);
+ if (result != VDO_SUCCESS) {
+ vdo_log_error_strerror(result,
+ "vdo_slab journal entry (%llu, %u) (%s of offset %u) could not be applied in slab %u",
+ (unsigned long long) block_number,
+ entry_point.entry_count,
+ vdo_get_journal_operation_name(entry.operation),
+ entry.sbn, slab->slab_number);
+ return result;
+ }
+ entry_point.entry_count++;
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * apply_journal_entries() - Find the relevant vio of the slab journal and apply all valid entries.
+ * @completion: The metadata read vio completion.
+ *
+ * This is a callback registered in start_scrubbing().
+ */
+static void apply_journal_entries(struct vdo_completion *completion)
+{
+ int result;
+ struct slab_scrubber *scrubber =
+ container_of(as_vio(completion), struct slab_scrubber, vio);
+ struct vdo_slab *slab = scrubber->slab;
+ struct slab_journal *journal = &slab->journal;
+
+ /* Find the boundaries of the useful part of the journal. */
+ sequence_number_t tail = journal->tail;
+ tail_block_offset_t end_index = (tail - 1) % journal->size;
+ char *end_data = scrubber->vio.data + (end_index * VDO_BLOCK_SIZE);
+ struct packed_slab_journal_block *end_block =
+ (struct packed_slab_journal_block *) end_data;
+
+ sequence_number_t head = __le64_to_cpu(end_block->header.head);
+ tail_block_offset_t head_index = head % journal->size;
+ block_count_t index = head_index;
+
+ struct journal_point ref_counts_point = slab->slab_journal_point;
+ struct journal_point last_entry_applied = ref_counts_point;
+ sequence_number_t sequence;
+
+ for (sequence = head; sequence < tail; sequence++) {
+ char *block_data = scrubber->vio.data + (index * VDO_BLOCK_SIZE);
+ struct packed_slab_journal_block *block =
+ (struct packed_slab_journal_block *) block_data;
+ struct slab_journal_block_header header;
+
+ vdo_unpack_slab_journal_block_header(&block->header, &header);
+
+ if ((header.nonce != slab->allocator->nonce) ||
+ (header.metadata_type != VDO_METADATA_SLAB_JOURNAL) ||
+ (header.sequence_number != sequence) ||
+ (header.entry_count > journal->entries_per_block) ||
+ (header.has_block_map_increments &&
+ (header.entry_count > journal->full_entries_per_block))) {
+ /* The block is not what we expect it to be. */
+ vdo_log_error("vdo_slab journal block for slab %u was invalid",
+ slab->slab_number);
+ abort_scrubbing(scrubber, VDO_CORRUPT_JOURNAL);
+ return;
+ }
+
+ result = apply_block_entries(block, header.entry_count, sequence, slab);
+ if (result != VDO_SUCCESS) {
+ abort_scrubbing(scrubber, result);
+ return;
+ }
+
+ last_entry_applied.sequence_number = sequence;
+ last_entry_applied.entry_count = header.entry_count - 1;
+ index++;
+ if (index == journal->size)
+ index = 0;
+ }
+
+ /*
+ * At the end of rebuild, the reference counters should be accurate to the end of the
+ * journal we just applied.
+ */
+ result = VDO_ASSERT(!vdo_before_journal_point(&last_entry_applied,
+ &ref_counts_point),
+ "Refcounts are not more accurate than the slab journal");
+ if (result != VDO_SUCCESS) {
+ abort_scrubbing(scrubber, result);
+ return;
+ }
+
+ /* Save out the rebuilt reference blocks. */
+ vdo_prepare_completion(completion, slab_scrubbed, handle_scrubber_error,
+ slab->allocator->thread_id, completion->parent);
+ vdo_start_operation_with_waiter(&slab->state,
+ VDO_ADMIN_STATE_SAVE_FOR_SCRUBBING,
+ completion, initiate_slab_action);
+}
+
+static void read_slab_journal_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct slab_scrubber *scrubber = container_of(vio, struct slab_scrubber, vio);
+
+ continue_vio_after_io(bio->bi_private, apply_journal_entries,
+ scrubber->slab->allocator->thread_id);
+}
+
+/**
+ * start_scrubbing() - Read the current slab's journal from disk now that it has been flushed.
+ * @completion: The scrubber's vio completion.
+ *
+ * This callback is registered in scrub_next_slab().
+ */
+static void start_scrubbing(struct vdo_completion *completion)
+{
+ struct slab_scrubber *scrubber =
+ container_of(as_vio(completion), struct slab_scrubber, vio);
+ struct vdo_slab *slab = scrubber->slab;
+
+ if (!slab->allocator->summary_entries[slab->slab_number].is_dirty) {
+ slab_scrubbed(completion);
+ return;
+ }
+
+ vdo_submit_metadata_vio(&scrubber->vio, slab->journal_origin,
+ read_slab_journal_endio, handle_scrubber_error,
+ REQ_OP_READ);
+}
+
+/**
+ * scrub_next_slab() - Scrub the next slab if there is one.
+ * @scrubber: The scrubber.
+ */
+static void scrub_next_slab(struct slab_scrubber *scrubber)
+{
+ struct vdo_completion *completion = &scrubber->vio.completion;
+ struct vdo_slab *slab;
+
+ /*
+ * Note: this notify call is always safe only because scrubbing can only be started when
+ * the VDO is quiescent.
+ */
+ vdo_waitq_notify_all_waiters(&scrubber->waiters, NULL, NULL);
+
+ if (vdo_is_read_only(completion->vdo)) {
+ finish_scrubbing(scrubber, VDO_READ_ONLY);
+ return;
+ }
+
+ slab = get_next_slab(scrubber);
+ if ((slab == NULL) ||
+ (scrubber->high_priority_only && list_empty(&scrubber->high_priority_slabs))) {
+ finish_scrubbing(scrubber, VDO_SUCCESS);
+ return;
+ }
+
+ if (vdo_finish_draining(&scrubber->admin_state))
+ return;
+
+ list_del_init(&slab->allocq_entry);
+ scrubber->slab = slab;
+ vdo_prepare_completion(completion, start_scrubbing, handle_scrubber_error,
+ slab->allocator->thread_id, completion->parent);
+ vdo_start_operation_with_waiter(&slab->state, VDO_ADMIN_STATE_SCRUBBING,
+ completion, initiate_slab_action);
+}
+
+/**
+ * scrub_slabs() - Scrub all of an allocator's slabs that are eligible for scrubbing.
+ * @allocator: The block_allocator to scrub.
+ * @parent: The completion to notify when scrubbing is done, implies high_priority, may be NULL.
+ */
+static void scrub_slabs(struct block_allocator *allocator, struct vdo_completion *parent)
+{
+ struct slab_scrubber *scrubber = &allocator->scrubber;
+
+ scrubber->vio.completion.parent = parent;
+ scrubber->high_priority_only = (parent != NULL);
+ if (!has_slabs_to_scrub(scrubber)) {
+ finish_scrubbing(scrubber, VDO_SUCCESS);
+ return;
+ }
+
+ if (scrubber->high_priority_only &&
+ vdo_is_priority_table_empty(allocator->prioritized_slabs) &&
+ list_empty(&scrubber->high_priority_slabs))
+ register_slab_for_scrubbing(get_next_slab(scrubber), true);
+
+ vdo_resume_if_quiescent(&scrubber->admin_state);
+ scrub_next_slab(scrubber);
+}
+
+static inline void assert_on_allocator_thread(thread_id_t thread_id,
+ const char *function_name)
+{
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() == thread_id),
+ "%s called on correct thread", function_name);
+}
+
+static void register_slab_with_allocator(struct block_allocator *allocator,
+ struct vdo_slab *slab)
+{
+ allocator->slab_count++;
+ allocator->last_slab = slab->slab_number;
+}
+
+/**
+ * get_depot_slab_iterator() - Return a slab_iterator over the slabs in a slab_depot.
+ * @depot: The depot over which to iterate.
+ * @start: The number of the slab to start iterating from.
+ * @end: The number of the last slab which may be returned.
+ * @stride: The difference in slab number between successive slabs.
+ *
+ * Iteration always occurs from higher to lower numbered slabs.
+ *
+ * Return: An initialized iterator structure.
+ */
+static struct slab_iterator get_depot_slab_iterator(struct slab_depot *depot,
+ slab_count_t start, slab_count_t end,
+ slab_count_t stride)
+{
+ struct vdo_slab **slabs = depot->slabs;
+
+ return (struct slab_iterator) {
+ .slabs = slabs,
+ .next = (((slabs == NULL) || (start < end)) ? NULL : slabs[start]),
+ .end = end,
+ .stride = stride,
+ };
+}
+
+static struct slab_iterator get_slab_iterator(const struct block_allocator *allocator)
+{
+ return get_depot_slab_iterator(allocator->depot, allocator->last_slab,
+ allocator->zone_number,
+ allocator->depot->zone_count);
+}
+
+/**
+ * next_slab() - Get the next slab from a slab_iterator and advance the iterator
+ * @iterator: The slab_iterator.
+ *
+ * Return: The next slab or NULL if the iterator is exhausted.
+ */
+static struct vdo_slab *next_slab(struct slab_iterator *iterator)
+{
+ struct vdo_slab *slab = iterator->next;
+
+ if ((slab == NULL) || (slab->slab_number < iterator->end + iterator->stride))
+ iterator->next = NULL;
+ else
+ iterator->next = iterator->slabs[slab->slab_number - iterator->stride];
+
+ return slab;
+}
+
+/**
+ * abort_waiter() - Abort vios waiting to make journal entries when read-only.
+ *
+ * This callback is invoked on all vios waiting to make slab journal entries after the VDO has gone
+ * into read-only mode. Implements waiter_callback_fn.
+ */
+static void abort_waiter(struct vdo_waiter *waiter, void *context __always_unused)
+{
+ struct reference_updater *updater =
+ container_of(waiter, struct reference_updater, waiter);
+ struct data_vio *data_vio = data_vio_from_reference_updater(updater);
+
+ if (updater->increment) {
+ continue_data_vio_with_error(data_vio, VDO_READ_ONLY);
+ return;
+ }
+
+ vdo_continue_completion(&data_vio->decrement_completion, VDO_READ_ONLY);
+}
+
+/* Implements vdo_read_only_notification_fn. */
+static void notify_block_allocator_of_read_only_mode(void *listener,
+ struct vdo_completion *parent)
+{
+ struct block_allocator *allocator = listener;
+ struct slab_iterator iterator;
+
+ assert_on_allocator_thread(allocator->thread_id, __func__);
+ iterator = get_slab_iterator(allocator);
+ while (iterator.next != NULL) {
+ struct vdo_slab *slab = next_slab(&iterator);
+
+ vdo_waitq_notify_all_waiters(&slab->journal.entry_waiters,
+ abort_waiter, &slab->journal);
+ check_if_slab_drained(slab);
+ }
+
+ vdo_finish_completion(parent);
+}
+
+/**
+ * vdo_acquire_provisional_reference() - Acquire a provisional reference on behalf of a PBN lock if
+ * the block it locks is unreferenced.
+ * @slab: The slab which contains the block.
+ * @pbn: The physical block to reference.
+ * @lock: The lock.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_acquire_provisional_reference(struct vdo_slab *slab, physical_block_number_t pbn,
+ struct pbn_lock *lock)
+{
+ slab_block_number block_number;
+ int result;
+
+ if (vdo_pbn_lock_has_provisional_reference(lock))
+ return VDO_SUCCESS;
+
+ if (!is_slab_open(slab))
+ return VDO_INVALID_ADMIN_STATE;
+
+ result = slab_block_number_from_pbn(slab, pbn, &block_number);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (slab->counters[block_number] == EMPTY_REFERENCE_COUNT) {
+ make_provisional_reference(slab, block_number);
+ if (lock != NULL)
+ vdo_assign_pbn_lock_provisional_reference(lock);
+ }
+
+ if (vdo_pbn_lock_has_provisional_reference(lock))
+ adjust_free_block_count(slab, false);
+
+ return VDO_SUCCESS;
+}
+
+static int __must_check allocate_slab_block(struct vdo_slab *slab,
+ physical_block_number_t *block_number_ptr)
+{
+ slab_block_number free_index;
+
+ if (!is_slab_open(slab))
+ return VDO_INVALID_ADMIN_STATE;
+
+ if (!search_reference_blocks(slab, &free_index))
+ return VDO_NO_SPACE;
+
+ VDO_ASSERT_LOG_ONLY((slab->counters[free_index] == EMPTY_REFERENCE_COUNT),
+ "free block must have ref count of zero");
+ make_provisional_reference(slab, free_index);
+ adjust_free_block_count(slab, false);
+
+ /*
+ * Update the search hint so the next search will start at the array index just past the
+ * free block we just found.
+ */
+ slab->search_cursor.index = (free_index + 1);
+
+ *block_number_ptr = slab->start + free_index;
+ return VDO_SUCCESS;
+}
+
+/**
+ * open_slab() - Prepare a slab to be allocated from.
+ * @slab: The slab.
+ */
+static void open_slab(struct vdo_slab *slab)
+{
+ reset_search_cursor(slab);
+ if (is_slab_journal_blank(slab)) {
+ WRITE_ONCE(slab->allocator->statistics.slabs_opened,
+ slab->allocator->statistics.slabs_opened + 1);
+ dirty_all_reference_blocks(slab);
+ } else {
+ WRITE_ONCE(slab->allocator->statistics.slabs_reopened,
+ slab->allocator->statistics.slabs_reopened + 1);
+ }
+
+ slab->allocator->open_slab = slab;
+}
+
+
+/*
+ * The block allocated will have a provisional reference and the reference must be either confirmed
+ * with a subsequent increment or vacated with a subsequent decrement via
+ * vdo_release_block_reference().
+ */
+int vdo_allocate_block(struct block_allocator *allocator,
+ physical_block_number_t *block_number_ptr)
+{
+ int result;
+
+ if (allocator->open_slab != NULL) {
+ /* Try to allocate the next block in the currently open slab. */
+ result = allocate_slab_block(allocator->open_slab, block_number_ptr);
+ if ((result == VDO_SUCCESS) || (result != VDO_NO_SPACE))
+ return result;
+
+ /* Put the exhausted open slab back into the priority table. */
+ prioritize_slab(allocator->open_slab);
+ }
+
+ /* Remove the highest priority slab from the priority table and make it the open slab. */
+ open_slab(list_entry(vdo_priority_table_dequeue(allocator->prioritized_slabs),
+ struct vdo_slab, allocq_entry));
+
+ /*
+ * Try allocating again. If we're out of space immediately after opening a slab, then every
+ * slab must be fully allocated.
+ */
+ return allocate_slab_block(allocator->open_slab, block_number_ptr);
+}
+
+/**
+ * vdo_enqueue_clean_slab_waiter() - Wait for a clean slab.
+ * @allocator: The block_allocator on which to wait.
+ * @waiter: The waiter.
+ *
+ * Return: VDO_SUCCESS if the waiter was queued, VDO_NO_SPACE if there are no slabs to scrub, and
+ * some other error otherwise.
+ */
+int vdo_enqueue_clean_slab_waiter(struct block_allocator *allocator,
+ struct vdo_waiter *waiter)
+{
+ if (vdo_is_read_only(allocator->depot->vdo))
+ return VDO_READ_ONLY;
+
+ if (vdo_is_state_quiescent(&allocator->scrubber.admin_state))
+ return VDO_NO_SPACE;
+
+ vdo_waitq_enqueue_waiter(&allocator->scrubber.waiters, waiter);
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_modify_reference_count() - Modify the reference count of a block by first making a slab
+ * journal entry and then updating the reference counter.
+ *
+ * @data_vio: The data_vio for which to add the entry.
+ * @updater: Which of the data_vio's reference updaters is being submitted.
+ */
+void vdo_modify_reference_count(struct vdo_completion *completion,
+ struct reference_updater *updater)
+{
+ struct vdo_slab *slab = vdo_get_slab(completion->vdo->depot, updater->zpbn.pbn);
+
+ if (!is_slab_open(slab)) {
+ vdo_continue_completion(completion, VDO_INVALID_ADMIN_STATE);
+ return;
+ }
+
+ if (vdo_is_read_only(completion->vdo)) {
+ vdo_continue_completion(completion, VDO_READ_ONLY);
+ return;
+ }
+
+ vdo_waitq_enqueue_waiter(&slab->journal.entry_waiters, &updater->waiter);
+ if ((slab->status != VDO_SLAB_REBUILT) && requires_reaping(&slab->journal))
+ register_slab_for_scrubbing(slab, true);
+
+ add_entries(&slab->journal);
+}
+
+/* Release an unused provisional reference. */
+int vdo_release_block_reference(struct block_allocator *allocator,
+ physical_block_number_t pbn)
+{
+ struct reference_updater updater;
+
+ if (pbn == VDO_ZERO_BLOCK)
+ return VDO_SUCCESS;
+
+ updater = (struct reference_updater) {
+ .operation = VDO_JOURNAL_DATA_REMAPPING,
+ .increment = false,
+ .zpbn = {
+ .pbn = pbn,
+ },
+ };
+
+ return adjust_reference_count(vdo_get_slab(allocator->depot, pbn),
+ &updater, NULL);
+}
+
+/*
+ * This is a min_heap callback function orders slab_status structures using the 'is_clean' field as
+ * the primary key and the 'emptiness' field as the secondary key.
+ *
+ * Slabs need to be pushed onto the rings in the same order they are to be popped off. Popping
+ * should always get the most empty first, so pushing should be from most empty to least empty.
+ * Thus, the ordering is reversed from the usual sense since min_heap returns smaller elements
+ * before larger ones.
+ */
+static bool slab_status_is_less_than(const void *item1, const void *item2)
+{
+ const struct slab_status *info1 = item1;
+ const struct slab_status *info2 = item2;
+
+ if (info1->is_clean != info2->is_clean)
+ return info1->is_clean;
+ if (info1->emptiness != info2->emptiness)
+ return info1->emptiness > info2->emptiness;
+ return info1->slab_number < info2->slab_number;
+}
+
+static void swap_slab_statuses(void *item1, void *item2)
+{
+ struct slab_status *info1 = item1;
+ struct slab_status *info2 = item2;
+
+ swap(*info1, *info2);
+}
+
+static const struct min_heap_callbacks slab_status_min_heap = {
+ .elem_size = sizeof(struct slab_status),
+ .less = slab_status_is_less_than,
+ .swp = swap_slab_statuses,
+};
+
+/* Inform the slab actor that a action has finished on some slab; used by apply_to_slabs(). */
+static void slab_action_callback(struct vdo_completion *completion)
+{
+ struct block_allocator *allocator = vdo_as_block_allocator(completion);
+ struct slab_actor *actor = &allocator->slab_actor;
+
+ if (--actor->slab_action_count == 0) {
+ actor->callback(completion);
+ return;
+ }
+
+ vdo_reset_completion(completion);
+}
+
+/* Preserve the error from part of an action and continue. */
+static void handle_operation_error(struct vdo_completion *completion)
+{
+ struct block_allocator *allocator = vdo_as_block_allocator(completion);
+
+ if (allocator->state.waiter != NULL)
+ vdo_set_completion_result(allocator->state.waiter, completion->result);
+ completion->callback(completion);
+}
+
+/* Perform an action on each of an allocator's slabs in parallel. */
+static void apply_to_slabs(struct block_allocator *allocator, vdo_action_fn callback)
+{
+ struct slab_iterator iterator;
+
+ vdo_prepare_completion(&allocator->completion, slab_action_callback,
+ handle_operation_error, allocator->thread_id, NULL);
+ allocator->completion.requeue = false;
+
+ /*
+ * Since we are going to dequeue all of the slabs, the open slab will become invalid, so
+ * clear it.
+ */
+ allocator->open_slab = NULL;
+
+ /* Ensure that we don't finish before we're done starting. */
+ allocator->slab_actor = (struct slab_actor) {
+ .slab_action_count = 1,
+ .callback = callback,
+ };
+
+ iterator = get_slab_iterator(allocator);
+ while (iterator.next != NULL) {
+ const struct admin_state_code *operation =
+ vdo_get_admin_state_code(&allocator->state);
+ struct vdo_slab *slab = next_slab(&iterator);
+
+ list_del_init(&slab->allocq_entry);
+ allocator->slab_actor.slab_action_count++;
+ vdo_start_operation_with_waiter(&slab->state, operation,
+ &allocator->completion,
+ initiate_slab_action);
+ }
+
+ slab_action_callback(&allocator->completion);
+}
+
+static void finish_loading_allocator(struct vdo_completion *completion)
+{
+ struct block_allocator *allocator = vdo_as_block_allocator(completion);
+ const struct admin_state_code *operation =
+ vdo_get_admin_state_code(&allocator->state);
+
+ if (allocator->eraser != NULL)
+ dm_kcopyd_client_destroy(vdo_forget(allocator->eraser));
+
+ if (operation == VDO_ADMIN_STATE_LOADING_FOR_RECOVERY) {
+ void *context =
+ vdo_get_current_action_context(allocator->depot->action_manager);
+
+ vdo_replay_into_slab_journals(allocator, context);
+ return;
+ }
+
+ vdo_finish_loading(&allocator->state);
+}
+
+static void erase_next_slab_journal(struct block_allocator *allocator);
+
+static void copy_callback(int read_err, unsigned long write_err, void *context)
+{
+ struct block_allocator *allocator = context;
+ int result = (((read_err == 0) && (write_err == 0)) ? VDO_SUCCESS : -EIO);
+
+ if (result != VDO_SUCCESS) {
+ vdo_fail_completion(&allocator->completion, result);
+ return;
+ }
+
+ erase_next_slab_journal(allocator);
+}
+
+/* erase_next_slab_journal() - Erase the next slab journal. */
+static void erase_next_slab_journal(struct block_allocator *allocator)
+{
+ struct vdo_slab *slab;
+ physical_block_number_t pbn;
+ struct dm_io_region regions[1];
+ struct slab_depot *depot = allocator->depot;
+ block_count_t blocks = depot->slab_config.slab_journal_blocks;
+
+ if (allocator->slabs_to_erase.next == NULL) {
+ vdo_finish_completion(&allocator->completion);
+ return;
+ }
+
+ slab = next_slab(&allocator->slabs_to_erase);
+ pbn = slab->journal_origin - depot->vdo->geometry.bio_offset;
+ regions[0] = (struct dm_io_region) {
+ .bdev = vdo_get_backing_device(depot->vdo),
+ .sector = pbn * VDO_SECTORS_PER_BLOCK,
+ .count = blocks * VDO_SECTORS_PER_BLOCK,
+ };
+ dm_kcopyd_zero(allocator->eraser, 1, regions, 0, copy_callback, allocator);
+}
+
+/* Implements vdo_admin_initiator_fn. */
+static void initiate_load(struct admin_state *state)
+{
+ struct block_allocator *allocator =
+ container_of(state, struct block_allocator, state);
+ const struct admin_state_code *operation = vdo_get_admin_state_code(state);
+
+ if (operation == VDO_ADMIN_STATE_LOADING_FOR_REBUILD) {
+ /*
+ * Must requeue because the kcopyd client cannot be freed in the same stack frame
+ * as the kcopyd callback, lest it deadlock.
+ */
+ vdo_prepare_completion_for_requeue(&allocator->completion,
+ finish_loading_allocator,
+ handle_operation_error,
+ allocator->thread_id, NULL);
+ allocator->eraser = dm_kcopyd_client_create(NULL);
+ if (IS_ERR(allocator->eraser)) {
+ vdo_fail_completion(&allocator->completion,
+ PTR_ERR(allocator->eraser));
+ allocator->eraser = NULL;
+ return;
+ }
+ allocator->slabs_to_erase = get_slab_iterator(allocator);
+
+ erase_next_slab_journal(allocator);
+ return;
+ }
+
+ apply_to_slabs(allocator, finish_loading_allocator);
+}
+
+/**
+ * vdo_notify_slab_journals_are_recovered() - Inform a block allocator that its slab journals have
+ * been recovered from the recovery journal.
+ * @completion The allocator completion
+ */
+void vdo_notify_slab_journals_are_recovered(struct vdo_completion *completion)
+{
+ struct block_allocator *allocator = vdo_as_block_allocator(completion);
+
+ vdo_finish_loading_with_result(&allocator->state, completion->result);
+}
+
+static int get_slab_statuses(struct block_allocator *allocator,
+ struct slab_status **statuses_ptr)
+{
+ int result;
+ struct slab_status *statuses;
+ struct slab_iterator iterator = get_slab_iterator(allocator);
+
+ result = vdo_allocate(allocator->slab_count, struct slab_status, __func__,
+ &statuses);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *statuses_ptr = statuses;
+
+ while (iterator.next != NULL) {
+ slab_count_t slab_number = next_slab(&iterator)->slab_number;
+
+ *statuses++ = (struct slab_status) {
+ .slab_number = slab_number,
+ .is_clean = !allocator->summary_entries[slab_number].is_dirty,
+ .emptiness = allocator->summary_entries[slab_number].fullness_hint,
+ };
+ }
+
+ return VDO_SUCCESS;
+}
+
+/* Prepare slabs for allocation or scrubbing. */
+static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator *allocator)
+{
+ struct slab_status current_slab_status;
+ struct min_heap heap;
+ int result;
+ struct slab_status *slab_statuses;
+ struct slab_depot *depot = allocator->depot;
+
+ WRITE_ONCE(allocator->allocated_blocks,
+ allocator->slab_count * depot->slab_config.data_blocks);
+ result = get_slab_statuses(allocator, &slab_statuses);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /* Sort the slabs by cleanliness, then by emptiness hint. */
+ heap = (struct min_heap) {
+ .data = slab_statuses,
+ .nr = allocator->slab_count,
+ .size = allocator->slab_count,
+ };
+ min_heapify_all(&heap, &slab_status_min_heap);
+
+ while (heap.nr > 0) {
+ bool high_priority;
+ struct vdo_slab *slab;
+ struct slab_journal *journal;
+
+ current_slab_status = slab_statuses[0];
+ min_heap_pop(&heap, &slab_status_min_heap);
+ slab = depot->slabs[current_slab_status.slab_number];
+
+ if ((depot->load_type == VDO_SLAB_DEPOT_REBUILD_LOAD) ||
+ (!allocator->summary_entries[slab->slab_number].load_ref_counts &&
+ current_slab_status.is_clean)) {
+ queue_slab(slab);
+ continue;
+ }
+
+ slab->status = VDO_SLAB_REQUIRES_SCRUBBING;
+ journal = &slab->journal;
+ high_priority = ((current_slab_status.is_clean &&
+ (depot->load_type == VDO_SLAB_DEPOT_NORMAL_LOAD)) ||
+ (journal_length(journal) >= journal->scrubbing_threshold));
+ register_slab_for_scrubbing(slab, high_priority);
+ }
+
+ vdo_free(slab_statuses);
+ return VDO_SUCCESS;
+}
+
+static const char *status_to_string(enum slab_rebuild_status status)
+{
+ switch (status) {
+ case VDO_SLAB_REBUILT:
+ return "REBUILT";
+ case VDO_SLAB_REQUIRES_SCRUBBING:
+ return "SCRUBBING";
+ case VDO_SLAB_REQUIRES_HIGH_PRIORITY_SCRUBBING:
+ return "PRIORITY_SCRUBBING";
+ case VDO_SLAB_REBUILDING:
+ return "REBUILDING";
+ case VDO_SLAB_REPLAYING:
+ return "REPLAYING";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+void vdo_dump_block_allocator(const struct block_allocator *allocator)
+{
+ unsigned int pause_counter = 0;
+ struct slab_iterator iterator = get_slab_iterator(allocator);
+ const struct slab_scrubber *scrubber = &allocator->scrubber;
+
+ vdo_log_info("block_allocator zone %u", allocator->zone_number);
+ while (iterator.next != NULL) {
+ struct vdo_slab *slab = next_slab(&iterator);
+ struct slab_journal *journal = &slab->journal;
+
+ if (slab->reference_blocks != NULL) {
+ /* Terse because there are a lot of slabs to dump and syslog is lossy. */
+ vdo_log_info("slab %u: P%u, %llu free", slab->slab_number,
+ slab->priority,
+ (unsigned long long) slab->free_blocks);
+ } else {
+ vdo_log_info("slab %u: status %s", slab->slab_number,
+ status_to_string(slab->status));
+ }
+
+ vdo_log_info(" slab journal: entry_waiters=%zu waiting_to_commit=%s updating_slab_summary=%s head=%llu unreapable=%llu tail=%llu next_commit=%llu summarized=%llu last_summarized=%llu recovery_lock=%llu dirty=%s",
+ vdo_waitq_num_waiters(&journal->entry_waiters),
+ vdo_bool_to_string(journal->waiting_to_commit),
+ vdo_bool_to_string(journal->updating_slab_summary),
+ (unsigned long long) journal->head,
+ (unsigned long long) journal->unreapable,
+ (unsigned long long) journal->tail,
+ (unsigned long long) journal->next_commit,
+ (unsigned long long) journal->summarized,
+ (unsigned long long) journal->last_summarized,
+ (unsigned long long) journal->recovery_lock,
+ vdo_bool_to_string(journal->recovery_lock != 0));
+ /*
+ * Given the frequency with which the locks are just a tiny bit off, it might be
+ * worth dumping all the locks, but that might be too much logging.
+ */
+
+ if (slab->counters != NULL) {
+ /* Terse because there are a lot of slabs to dump and syslog is lossy. */
+ vdo_log_info(" slab: free=%u/%u blocks=%u dirty=%zu active=%zu journal@(%llu,%u)",
+ slab->free_blocks, slab->block_count,
+ slab->reference_block_count,
+ vdo_waitq_num_waiters(&slab->dirty_blocks),
+ slab->active_count,
+ (unsigned long long) slab->slab_journal_point.sequence_number,
+ slab->slab_journal_point.entry_count);
+ } else {
+ vdo_log_info(" no counters");
+ }
+
+ /*
+ * Wait for a while after each batch of 32 slabs dumped, an arbitrary number,
+ * allowing the kernel log a chance to be flushed instead of being overrun.
+ */
+ if (pause_counter++ == 31) {
+ pause_counter = 0;
+ vdo_pause_for_logger();
+ }
+ }
+
+ vdo_log_info("slab_scrubber slab_count %u waiters %zu %s%s",
+ READ_ONCE(scrubber->slab_count),
+ vdo_waitq_num_waiters(&scrubber->waiters),
+ vdo_get_admin_state_code(&scrubber->admin_state)->name,
+ scrubber->high_priority_only ? ", high_priority_only " : "");
+}
+
+static void free_slab(struct vdo_slab *slab)
+{
+ if (slab == NULL)
+ return;
+
+ list_del(&slab->allocq_entry);
+ vdo_free(vdo_forget(slab->journal.block));
+ vdo_free(vdo_forget(slab->journal.locks));
+ vdo_free(vdo_forget(slab->counters));
+ vdo_free(vdo_forget(slab->reference_blocks));
+ vdo_free(slab);
+}
+
+static int initialize_slab_journal(struct vdo_slab *slab)
+{
+ struct slab_journal *journal = &slab->journal;
+ const struct slab_config *slab_config = &slab->allocator->depot->slab_config;
+ int result;
+
+ result = vdo_allocate(slab_config->slab_journal_blocks, struct journal_lock,
+ __func__, &journal->locks);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(VDO_BLOCK_SIZE, char, "struct packed_slab_journal_block",
+ (char **) &journal->block);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ journal->slab = slab;
+ journal->size = slab_config->slab_journal_blocks;
+ journal->flushing_threshold = slab_config->slab_journal_flushing_threshold;
+ journal->blocking_threshold = slab_config->slab_journal_blocking_threshold;
+ journal->scrubbing_threshold = slab_config->slab_journal_scrubbing_threshold;
+ journal->entries_per_block = VDO_SLAB_JOURNAL_ENTRIES_PER_BLOCK;
+ journal->full_entries_per_block = VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK;
+ journal->events = &slab->allocator->slab_journal_statistics;
+ journal->recovery_journal = slab->allocator->depot->vdo->recovery_journal;
+ journal->tail = 1;
+ journal->head = 1;
+
+ journal->flushing_deadline = journal->flushing_threshold;
+ /*
+ * Set there to be some time between the deadline and the blocking threshold, so that
+ * hopefully all are done before blocking.
+ */
+ if ((journal->blocking_threshold - journal->flushing_threshold) > 5)
+ journal->flushing_deadline = journal->blocking_threshold - 5;
+
+ journal->slab_summary_waiter.callback = release_journal_locks;
+
+ INIT_LIST_HEAD(&journal->dirty_entry);
+ INIT_LIST_HEAD(&journal->uncommitted_blocks);
+
+ journal->tail_header.nonce = slab->allocator->nonce;
+ journal->tail_header.metadata_type = VDO_METADATA_SLAB_JOURNAL;
+ initialize_journal_state(journal);
+ return VDO_SUCCESS;
+}
+
+/**
+ * make_slab() - Construct a new, empty slab.
+ * @slab_origin: The physical block number within the block allocator partition of the first block
+ * in the slab.
+ * @allocator: The block allocator to which the slab belongs.
+ * @slab_number: The slab number of the slab.
+ * @is_new: true if this slab is being allocated as part of a resize.
+ * @slab_ptr: A pointer to receive the new slab.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int __must_check make_slab(physical_block_number_t slab_origin,
+ struct block_allocator *allocator,
+ slab_count_t slab_number, bool is_new,
+ struct vdo_slab **slab_ptr)
+{
+ const struct slab_config *slab_config = &allocator->depot->slab_config;
+ struct vdo_slab *slab;
+ int result;
+
+ result = vdo_allocate(1, struct vdo_slab, __func__, &slab);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *slab = (struct vdo_slab) {
+ .allocator = allocator,
+ .start = slab_origin,
+ .end = slab_origin + slab_config->slab_blocks,
+ .slab_number = slab_number,
+ .ref_counts_origin = slab_origin + slab_config->data_blocks,
+ .journal_origin =
+ vdo_get_slab_journal_start_block(slab_config, slab_origin),
+ .block_count = slab_config->data_blocks,
+ .free_blocks = slab_config->data_blocks,
+ .reference_block_count =
+ vdo_get_saved_reference_count_size(slab_config->data_blocks),
+ };
+ INIT_LIST_HEAD(&slab->allocq_entry);
+
+ result = initialize_slab_journal(slab);
+ if (result != VDO_SUCCESS) {
+ free_slab(slab);
+ return result;
+ }
+
+ if (is_new) {
+ vdo_set_admin_state_code(&slab->state, VDO_ADMIN_STATE_NEW);
+ result = allocate_slab_counters(slab);
+ if (result != VDO_SUCCESS) {
+ free_slab(slab);
+ return result;
+ }
+ } else {
+ vdo_set_admin_state_code(&slab->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+ }
+
+ *slab_ptr = slab;
+ return VDO_SUCCESS;
+}
+
+/**
+ * allocate_slabs() - Allocate a new slab pointer array.
+ * @depot: The depot.
+ * @slab_count: The number of slabs the depot should have in the new array.
+ *
+ * Any existing slab pointers will be copied into the new array, and slabs will be allocated as
+ * needed. The newly allocated slabs will not be distributed for use by the block allocators.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int allocate_slabs(struct slab_depot *depot, slab_count_t slab_count)
+{
+ block_count_t slab_size;
+ bool resizing = false;
+ physical_block_number_t slab_origin;
+ int result;
+
+ result = vdo_allocate(slab_count, struct vdo_slab *,
+ "slab pointer array", &depot->new_slabs);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ if (depot->slabs != NULL) {
+ memcpy(depot->new_slabs, depot->slabs,
+ depot->slab_count * sizeof(struct vdo_slab *));
+ resizing = true;
+ }
+
+ slab_size = depot->slab_config.slab_blocks;
+ slab_origin = depot->first_block + (depot->slab_count * slab_size);
+
+ for (depot->new_slab_count = depot->slab_count;
+ depot->new_slab_count < slab_count;
+ depot->new_slab_count++, slab_origin += slab_size) {
+ struct block_allocator *allocator =
+ &depot->allocators[depot->new_slab_count % depot->zone_count];
+ struct vdo_slab **slab_ptr = &depot->new_slabs[depot->new_slab_count];
+
+ result = make_slab(slab_origin, allocator, depot->new_slab_count,
+ resizing, slab_ptr);
+ if (result != VDO_SUCCESS)
+ return result;
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_abandon_new_slabs() - Abandon any new slabs in this depot, freeing them as needed.
+ * @depot: The depot.
+ */
+void vdo_abandon_new_slabs(struct slab_depot *depot)
+{
+ slab_count_t i;
+
+ if (depot->new_slabs == NULL)
+ return;
+
+ for (i = depot->slab_count; i < depot->new_slab_count; i++)
+ free_slab(vdo_forget(depot->new_slabs[i]));
+ depot->new_slab_count = 0;
+ depot->new_size = 0;
+ vdo_free(vdo_forget(depot->new_slabs));
+}
+
+/**
+ * get_allocator_thread_id() - Get the ID of the thread on which a given allocator operates.
+ *
+ * Implements vdo_zone_thread_getter_fn.
+ */
+static thread_id_t get_allocator_thread_id(void *context, zone_count_t zone_number)
+{
+ return ((struct slab_depot *) context)->allocators[zone_number].thread_id;
+}
+
+/**
+ * release_recovery_journal_lock() - Request the slab journal to release the recovery journal lock
+ * it may hold on a specified recovery journal block.
+ * @journal: The slab journal.
+ * @recovery_lock: The sequence number of the recovery journal block whose locks should be
+ * released.
+ *
+ * Return: true if the journal does hold a lock on the specified block (which it will release).
+ */
+static bool __must_check release_recovery_journal_lock(struct slab_journal *journal,
+ sequence_number_t recovery_lock)
+{
+ if (recovery_lock > journal->recovery_lock) {
+ VDO_ASSERT_LOG_ONLY((recovery_lock < journal->recovery_lock),
+ "slab journal recovery lock is not older than the recovery journal head");
+ return false;
+ }
+
+ if ((recovery_lock < journal->recovery_lock) ||
+ vdo_is_read_only(journal->slab->allocator->depot->vdo))
+ return false;
+
+ /* All locks are held by the block which is in progress; write it. */
+ commit_tail(journal);
+ return true;
+}
+
+/*
+ * Request a commit of all dirty tail blocks which are locking the recovery journal block the depot
+ * is seeking to release.
+ *
+ * Implements vdo_zone_action_fn.
+ */
+static void release_tail_block_locks(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct slab_journal *journal, *tmp;
+ struct slab_depot *depot = context;
+ struct list_head *list = &depot->allocators[zone_number].dirty_slab_journals;
+
+ list_for_each_entry_safe(journal, tmp, list, dirty_entry) {
+ if (!release_recovery_journal_lock(journal,
+ depot->active_release_request))
+ break;
+ }
+
+ vdo_finish_completion(parent);
+}
+
+/**
+ * prepare_for_tail_block_commit() - Prepare to commit oldest tail blocks.
+ *
+ * Implements vdo_action_preamble_fn.
+ */
+static void prepare_for_tail_block_commit(void *context, struct vdo_completion *parent)
+{
+ struct slab_depot *depot = context;
+
+ depot->active_release_request = depot->new_release_request;
+ vdo_finish_completion(parent);
+}
+
+/**
+ * schedule_tail_block_commit() - Schedule a tail block commit if necessary.
+ *
+ * This method should not be called directly. Rather, call vdo_schedule_default_action() on the
+ * depot's action manager.
+ *
+ * Implements vdo_action_scheduler_fn.
+ */
+static bool schedule_tail_block_commit(void *context)
+{
+ struct slab_depot *depot = context;
+
+ if (depot->new_release_request == depot->active_release_request)
+ return false;
+
+ return vdo_schedule_action(depot->action_manager,
+ prepare_for_tail_block_commit,
+ release_tail_block_locks,
+ NULL, NULL);
+}
+
+/**
+ * initialize_slab_scrubber() - Initialize an allocator's slab scrubber.
+ * @allocator: The allocator being initialized
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int initialize_slab_scrubber(struct block_allocator *allocator)
+{
+ struct slab_scrubber *scrubber = &allocator->scrubber;
+ block_count_t slab_journal_size =
+ allocator->depot->slab_config.slab_journal_blocks;
+ char *journal_data;
+ int result;
+
+ result = vdo_allocate(VDO_BLOCK_SIZE * slab_journal_size,
+ char, __func__, &journal_data);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = allocate_vio_components(allocator->completion.vdo,
+ VIO_TYPE_SLAB_JOURNAL,
+ VIO_PRIORITY_METADATA,
+ allocator, slab_journal_size,
+ journal_data, &scrubber->vio);
+ if (result != VDO_SUCCESS) {
+ vdo_free(journal_data);
+ return result;
+ }
+
+ INIT_LIST_HEAD(&scrubber->high_priority_slabs);
+ INIT_LIST_HEAD(&scrubber->slabs);
+ vdo_set_admin_state_code(&scrubber->admin_state, VDO_ADMIN_STATE_SUSPENDED);
+ return VDO_SUCCESS;
+}
+
+/**
+ * initialize_slab_summary_block() - Initialize a slab_summary_block.
+ * @allocator: The allocator which owns the block.
+ * @index: The index of this block in its zone's summary.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int __must_check initialize_slab_summary_block(struct block_allocator *allocator,
+ block_count_t index)
+{
+ struct slab_summary_block *block = &allocator->summary_blocks[index];
+ int result;
+
+ result = vdo_allocate(VDO_BLOCK_SIZE, char, __func__, &block->outgoing_entries);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = allocate_vio_components(allocator->depot->vdo, VIO_TYPE_SLAB_SUMMARY,
+ VIO_PRIORITY_METADATA, NULL, 1,
+ block->outgoing_entries, &block->vio);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ block->allocator = allocator;
+ block->entries = &allocator->summary_entries[VDO_SLAB_SUMMARY_ENTRIES_PER_BLOCK * index];
+ block->index = index;
+ return VDO_SUCCESS;
+}
+
+static int __must_check initialize_block_allocator(struct slab_depot *depot,
+ zone_count_t zone)
+{
+ int result;
+ block_count_t i;
+ struct block_allocator *allocator = &depot->allocators[zone];
+ struct vdo *vdo = depot->vdo;
+ block_count_t max_free_blocks = depot->slab_config.data_blocks;
+ unsigned int max_priority = (2 + ilog2(max_free_blocks));
+
+ *allocator = (struct block_allocator) {
+ .depot = depot,
+ .zone_number = zone,
+ .thread_id = vdo->thread_config.physical_threads[zone],
+ .nonce = vdo->states.vdo.nonce,
+ };
+
+ INIT_LIST_HEAD(&allocator->dirty_slab_journals);
+ vdo_set_admin_state_code(&allocator->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
+ result = vdo_register_read_only_listener(vdo, allocator,
+ notify_block_allocator_of_read_only_mode,
+ allocator->thread_id);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo_initialize_completion(&allocator->completion, vdo, VDO_BLOCK_ALLOCATOR_COMPLETION);
+ result = make_vio_pool(vdo, BLOCK_ALLOCATOR_VIO_POOL_SIZE, allocator->thread_id,
+ VIO_TYPE_SLAB_JOURNAL, VIO_PRIORITY_METADATA,
+ allocator, &allocator->vio_pool);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = initialize_slab_scrubber(allocator);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_make_priority_table(max_priority, &allocator->prioritized_slabs);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(VDO_SLAB_SUMMARY_BLOCKS_PER_ZONE,
+ struct slab_summary_block, __func__,
+ &allocator->summary_blocks);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ vdo_set_admin_state_code(&allocator->summary_state,
+ VDO_ADMIN_STATE_NORMAL_OPERATION);
+ allocator->summary_entries = depot->summary_entries + (MAX_VDO_SLABS * zone);
+
+ /* Initialize each summary block. */
+ for (i = 0; i < VDO_SLAB_SUMMARY_BLOCKS_PER_ZONE; i++) {
+ result = initialize_slab_summary_block(allocator, i);
+ if (result != VDO_SUCCESS)
+ return result;
+ }
+
+ /*
+ * Performing well atop thin provisioned storage requires either that VDO discards freed
+ * blocks, or that the block allocator try to use slabs that already have allocated blocks
+ * in preference to slabs that have never been opened. For reasons we have not been able to
+ * fully understand, some SSD machines have been have been very sensitive (50% reduction in
+ * test throughput) to very slight differences in the timing and locality of block
+ * allocation. Assigning a low priority to unopened slabs (max_priority/2, say) would be
+ * ideal for the story, but anything less than a very high threshold (max_priority - 1)
+ * hurts on these machines.
+ *
+ * This sets the free block threshold for preferring to open an unopened slab to the binary
+ * floor of 3/4ths the total number of data blocks in a slab, which will generally evaluate
+ * to about half the slab size.
+ */
+ allocator->unopened_slab_priority = (1 + ilog2((max_free_blocks * 3) / 4));
+
+ return VDO_SUCCESS;
+}
+
+static int allocate_components(struct slab_depot *depot,
+ struct partition *summary_partition)
+{
+ int result;
+ zone_count_t zone;
+ slab_count_t slab_count;
+ u8 hint;
+ u32 i;
+ const struct thread_config *thread_config = &depot->vdo->thread_config;
+
+ result = vdo_make_action_manager(depot->zone_count, get_allocator_thread_id,
+ thread_config->journal_thread, depot,
+ schedule_tail_block_commit,
+ depot->vdo, &depot->action_manager);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ depot->origin = depot->first_block;
+
+ /* block size must be a multiple of entry size */
+ BUILD_BUG_ON((VDO_BLOCK_SIZE % sizeof(struct slab_summary_entry)) != 0);
+
+ depot->summary_origin = summary_partition->offset;
+ depot->hint_shift = vdo_get_slab_summary_hint_shift(depot->slab_size_shift);
+ result = vdo_allocate(MAXIMUM_VDO_SLAB_SUMMARY_ENTRIES,
+ struct slab_summary_entry, __func__,
+ &depot->summary_entries);
+ if (result != VDO_SUCCESS)
+ return result;
+
+
+ /* Initialize all the entries. */
+ hint = compute_fullness_hint(depot, depot->slab_config.data_blocks);
+ for (i = 0; i < MAXIMUM_VDO_SLAB_SUMMARY_ENTRIES; i++) {
+ /*
+ * This default tail block offset must be reflected in
+ * slabJournal.c::read_slab_journal_tail().
+ */
+ depot->summary_entries[i] = (struct slab_summary_entry) {
+ .tail_block_offset = 0,
+ .fullness_hint = hint,
+ .load_ref_counts = false,
+ .is_dirty = false,
+ };
+ }
+
+ slab_count = vdo_compute_slab_count(depot->first_block, depot->last_block,
+ depot->slab_size_shift);
+ if (thread_config->physical_zone_count > slab_count) {
+ return vdo_log_error_strerror(VDO_BAD_CONFIGURATION,
+ "%u physical zones exceeds slab count %u",
+ thread_config->physical_zone_count,
+ slab_count);
+ }
+
+ /* Initialize the block allocators. */
+ for (zone = 0; zone < depot->zone_count; zone++) {
+ result = initialize_block_allocator(depot, zone);
+ if (result != VDO_SUCCESS)
+ return result;
+ }
+
+ /* Allocate slabs. */
+ result = allocate_slabs(depot, slab_count);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ /* Use the new slabs. */
+ for (i = depot->slab_count; i < depot->new_slab_count; i++) {
+ struct vdo_slab *slab = depot->new_slabs[i];
+
+ register_slab_with_allocator(slab->allocator, slab);
+ WRITE_ONCE(depot->slab_count, depot->slab_count + 1);
+ }
+
+ depot->slabs = depot->new_slabs;
+ depot->new_slabs = NULL;
+ depot->new_slab_count = 0;
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_decode_slab_depot() - Make a slab depot and configure it with the state read from the super
+ * block.
+ * @state: The slab depot state from the super block.
+ * @vdo: The VDO which will own the depot.
+ * @summary_partition: The partition which holds the slab summary.
+ * @depot_ptr: A pointer to hold the depot.
+ *
+ * Return: A success or error code.
+ */
+int vdo_decode_slab_depot(struct slab_depot_state_2_0 state, struct vdo *vdo,
+ struct partition *summary_partition,
+ struct slab_depot **depot_ptr)
+{
+ unsigned int slab_size_shift;
+ struct slab_depot *depot;
+ int result;
+
+ /*
+ * Calculate the bit shift for efficiently mapping block numbers to slabs. Using a shift
+ * requires that the slab size be a power of two.
+ */
+ block_count_t slab_size = state.slab_config.slab_blocks;
+
+ if (!is_power_of_2(slab_size)) {
+ return vdo_log_error_strerror(UDS_INVALID_ARGUMENT,
+ "slab size must be a power of two");
+ }
+ slab_size_shift = ilog2(slab_size);
+
+ result = vdo_allocate_extended(struct slab_depot,
+ vdo->thread_config.physical_zone_count,
+ struct block_allocator, __func__, &depot);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ depot->vdo = vdo;
+ depot->old_zone_count = state.zone_count;
+ depot->zone_count = vdo->thread_config.physical_zone_count;
+ depot->slab_config = state.slab_config;
+ depot->first_block = state.first_block;
+ depot->last_block = state.last_block;
+ depot->slab_size_shift = slab_size_shift;
+
+ result = allocate_components(depot, summary_partition);
+ if (result != VDO_SUCCESS) {
+ vdo_free_slab_depot(depot);
+ return result;
+ }
+
+ *depot_ptr = depot;
+ return VDO_SUCCESS;
+}
+
+static void uninitialize_allocator_summary(struct block_allocator *allocator)
+{
+ block_count_t i;
+
+ if (allocator->summary_blocks == NULL)
+ return;
+
+ for (i = 0; i < VDO_SLAB_SUMMARY_BLOCKS_PER_ZONE; i++) {
+ free_vio_components(&allocator->summary_blocks[i].vio);
+ vdo_free(vdo_forget(allocator->summary_blocks[i].outgoing_entries));
+ }
+
+ vdo_free(vdo_forget(allocator->summary_blocks));
+}
+
+/**
+ * vdo_free_slab_depot() - Destroy a slab depot.
+ * @depot: The depot to destroy.
+ */
+void vdo_free_slab_depot(struct slab_depot *depot)
+{
+ zone_count_t zone = 0;
+
+ if (depot == NULL)
+ return;
+
+ vdo_abandon_new_slabs(depot);
+
+ for (zone = 0; zone < depot->zone_count; zone++) {
+ struct block_allocator *allocator = &depot->allocators[zone];
+
+ if (allocator->eraser != NULL)
+ dm_kcopyd_client_destroy(vdo_forget(allocator->eraser));
+
+ uninitialize_allocator_summary(allocator);
+ uninitialize_scrubber_vio(&allocator->scrubber);
+ free_vio_pool(vdo_forget(allocator->vio_pool));
+ vdo_free_priority_table(vdo_forget(allocator->prioritized_slabs));
+ }
+
+ if (depot->slabs != NULL) {
+ slab_count_t i;
+
+ for (i = 0; i < depot->slab_count; i++)
+ free_slab(vdo_forget(depot->slabs[i]));
+ }
+
+ vdo_free(vdo_forget(depot->slabs));
+ vdo_free(vdo_forget(depot->action_manager));
+ vdo_free(vdo_forget(depot->summary_entries));
+ vdo_free(depot);
+}
+
+/**
+ * vdo_record_slab_depot() - Record the state of a slab depot for encoding into the super block.
+ * @depot: The depot to encode.
+ *
+ * Return: The depot state.
+ */
+struct slab_depot_state_2_0 vdo_record_slab_depot(const struct slab_depot *depot)
+{
+ /*
+ * If this depot is currently using 0 zones, it must have been synchronously loaded by a
+ * tool and is now being saved. We did not load and combine the slab summary, so we still
+ * need to do that next time we load with the old zone count rather than 0.
+ */
+ struct slab_depot_state_2_0 state;
+ zone_count_t zones_to_record = depot->zone_count;
+
+ if (depot->zone_count == 0)
+ zones_to_record = depot->old_zone_count;
+
+ state = (struct slab_depot_state_2_0) {
+ .slab_config = depot->slab_config,
+ .first_block = depot->first_block,
+ .last_block = depot->last_block,
+ .zone_count = zones_to_record,
+ };
+
+ return state;
+}
+
+/**
+ * vdo_allocate_reference_counters() - Allocate the reference counters for all slabs in the depot.
+ *
+ * Context: This method may be called only before entering normal operation from the load thread.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_allocate_reference_counters(struct slab_depot *depot)
+{
+ struct slab_iterator iterator =
+ get_depot_slab_iterator(depot, depot->slab_count - 1, 0, 1);
+
+ while (iterator.next != NULL) {
+ int result = allocate_slab_counters(next_slab(&iterator));
+
+ if (result != VDO_SUCCESS)
+ return result;
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * get_slab_number() - Get the number of the slab that contains a specified block.
+ * @depot: The slab depot.
+ * @pbn: The physical block number.
+ * @slab_number_ptr: A pointer to hold the slab number.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int __must_check get_slab_number(const struct slab_depot *depot,
+ physical_block_number_t pbn,
+ slab_count_t *slab_number_ptr)
+{
+ slab_count_t slab_number;
+
+ if (pbn < depot->first_block)
+ return VDO_OUT_OF_RANGE;
+
+ slab_number = (pbn - depot->first_block) >> depot->slab_size_shift;
+ if (slab_number >= depot->slab_count)
+ return VDO_OUT_OF_RANGE;
+
+ *slab_number_ptr = slab_number;
+ return VDO_SUCCESS;
+}
+
+/**
+ * vdo_get_slab() - Get the slab object for the slab that contains a specified block.
+ * @depot: The slab depot.
+ * @pbn: The physical block number.
+ *
+ * Will put the VDO in read-only mode if the PBN is not a valid data block nor the zero block.
+ *
+ * Return: The slab containing the block, or NULL if the block number is the zero block or
+ * otherwise out of range.
+ */
+struct vdo_slab *vdo_get_slab(const struct slab_depot *depot,
+ physical_block_number_t pbn)
+{
+ slab_count_t slab_number;
+ int result;
+
+ if (pbn == VDO_ZERO_BLOCK)
+ return NULL;
+
+ result = get_slab_number(depot, pbn, &slab_number);
+ if (result != VDO_SUCCESS) {
+ vdo_enter_read_only_mode(depot->vdo, result);
+ return NULL;
+ }
+
+ return depot->slabs[slab_number];
+}
+
+/**
+ * vdo_get_increment_limit() - Determine how many new references a block can acquire.
+ * @depot: The slab depot.
+ * @pbn: The physical block number that is being queried.
+ *
+ * Context: This method must be called from the physical zone thread of the PBN.
+ *
+ * Return: The number of available references.
+ */
+u8 vdo_get_increment_limit(struct slab_depot *depot, physical_block_number_t pbn)
+{
+ struct vdo_slab *slab = vdo_get_slab(depot, pbn);
+ vdo_refcount_t *counter_ptr = NULL;
+ int result;
+
+ if ((slab == NULL) || (slab->status != VDO_SLAB_REBUILT))
+ return 0;
+
+ result = get_reference_counter(slab, pbn, &counter_ptr);
+ if (result != VDO_SUCCESS)
+ return 0;
+
+ if (*counter_ptr == PROVISIONAL_REFERENCE_COUNT)
+ return (MAXIMUM_REFERENCE_COUNT - 1);
+
+ return (MAXIMUM_REFERENCE_COUNT - *counter_ptr);
+}
+
+/**
+ * vdo_is_physical_data_block() - Determine whether the given PBN refers to a data block.
+ * @depot: The depot.
+ * @pbn: The physical block number to ask about.
+ *
+ * Return: True if the PBN corresponds to a data block.
+ */
+bool vdo_is_physical_data_block(const struct slab_depot *depot,
+ physical_block_number_t pbn)
+{
+ slab_count_t slab_number;
+ slab_block_number sbn;
+
+ return ((pbn == VDO_ZERO_BLOCK) ||
+ ((get_slab_number(depot, pbn, &slab_number) == VDO_SUCCESS) &&
+ (slab_block_number_from_pbn(depot->slabs[slab_number], pbn, &sbn) ==
+ VDO_SUCCESS)));
+}
+
+/**
+ * vdo_get_slab_depot_allocated_blocks() - Get the total number of data blocks allocated across all
+ * the slabs in the depot.
+ * @depot: The slab depot.
+ *
+ * This is the total number of blocks with a non-zero reference count.
+ *
+ * Context: This may be called from any thread.
+ *
+ * Return: The total number of blocks with a non-zero reference count.
+ */
+block_count_t vdo_get_slab_depot_allocated_blocks(const struct slab_depot *depot)
+{
+ block_count_t total = 0;
+ zone_count_t zone;
+
+ for (zone = 0; zone < depot->zone_count; zone++) {
+ /* The allocators are responsible for thread safety. */
+ total += READ_ONCE(depot->allocators[zone].allocated_blocks);
+ }
+
+ return total;
+}
+
+/**
+ * vdo_get_slab_depot_data_blocks() - Get the total number of data blocks in all the slabs in the
+ * depot.
+ * @depot: The slab depot.
+ *
+ * Context: This may be called from any thread.
+ *
+ * Return: The total number of data blocks in all slabs.
+ */
+block_count_t vdo_get_slab_depot_data_blocks(const struct slab_depot *depot)
+{
+ return (READ_ONCE(depot->slab_count) * depot->slab_config.data_blocks);
+}
+
+/**
+ * finish_combining_zones() - Clean up after saving out the combined slab summary.
+ * @completion: The vio which was used to write the summary data.
+ */
+static void finish_combining_zones(struct vdo_completion *completion)
+{
+ int result = completion->result;
+ struct vdo_completion *parent = completion->parent;
+
+ free_vio(as_vio(vdo_forget(completion)));
+ vdo_fail_completion(parent, result);
+}
+
+static void handle_combining_error(struct vdo_completion *completion)
+{
+ vio_record_metadata_io_error(as_vio(completion));
+ finish_combining_zones(completion);
+}
+
+static void write_summary_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct vdo *vdo = vio->completion.vdo;
+
+ continue_vio_after_io(vio, finish_combining_zones,
+ vdo->thread_config.admin_thread);
+}
+
+/**
+ * combine_summaries() - Treating the current entries buffer as the on-disk value of all zones,
+ * update every zone to the correct values for every slab.
+ * @depot: The depot whose summary entries should be combined.
+ */
+static void combine_summaries(struct slab_depot *depot)
+{
+ /*
+ * Combine all the old summary data into the portion of the buffer corresponding to the
+ * first zone.
+ */
+ zone_count_t zone = 0;
+ struct slab_summary_entry *entries = depot->summary_entries;
+
+ if (depot->old_zone_count > 1) {
+ slab_count_t entry_number;
+
+ for (entry_number = 0; entry_number < MAX_VDO_SLABS; entry_number++) {
+ if (zone != 0) {
+ memcpy(entries + entry_number,
+ entries + (zone * MAX_VDO_SLABS) + entry_number,
+ sizeof(struct slab_summary_entry));
+ }
+
+ zone++;
+ if (zone == depot->old_zone_count)
+ zone = 0;
+ }
+ }
+
+ /* Copy the combined data to each zones's region of the buffer. */
+ for (zone = 1; zone < MAX_VDO_PHYSICAL_ZONES; zone++) {
+ memcpy(entries + (zone * MAX_VDO_SLABS), entries,
+ MAX_VDO_SLABS * sizeof(struct slab_summary_entry));
+ }
+}
+
+/**
+ * finish_loading_summary() - Finish loading slab summary data.
+ * @completion: The vio which was used to read the summary data.
+ *
+ * Combines the slab summary data from all the previously written zones and copies the combined
+ * summary to each partition's data region. Then writes the combined summary back out to disk. This
+ * callback is registered in load_summary_endio().
+ */
+static void finish_loading_summary(struct vdo_completion *completion)
+{
+ struct slab_depot *depot = completion->vdo->depot;
+
+ /* Combine the summary from each zone so each zone is correct for all slabs. */
+ combine_summaries(depot);
+
+ /* Write the combined summary back out. */
+ vdo_submit_metadata_vio(as_vio(completion), depot->summary_origin,
+ write_summary_endio, handle_combining_error,
+ REQ_OP_WRITE);
+}
+
+static void load_summary_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct vdo *vdo = vio->completion.vdo;
+
+ continue_vio_after_io(vio, finish_loading_summary,
+ vdo->thread_config.admin_thread);
+}
+
+/**
+ * load_slab_summary() - The preamble of a load operation.
+ *
+ * Implements vdo_action_preamble_fn.
+ */
+static void load_slab_summary(void *context, struct vdo_completion *parent)
+{
+ int result;
+ struct vio *vio;
+ struct slab_depot *depot = context;
+ const struct admin_state_code *operation =
+ vdo_get_current_manager_operation(depot->action_manager);
+
+ result = create_multi_block_metadata_vio(depot->vdo, VIO_TYPE_SLAB_SUMMARY,
+ VIO_PRIORITY_METADATA, parent,
+ VDO_SLAB_SUMMARY_BLOCKS,
+ (char *) depot->summary_entries, &vio);
+ if (result != VDO_SUCCESS) {
+ vdo_fail_completion(parent, result);
+ return;
+ }
+
+ if ((operation == VDO_ADMIN_STATE_FORMATTING) ||
+ (operation == VDO_ADMIN_STATE_LOADING_FOR_REBUILD)) {
+ finish_loading_summary(&vio->completion);
+ return;
+ }
+
+ vdo_submit_metadata_vio(vio, depot->summary_origin, load_summary_endio,
+ handle_combining_error, REQ_OP_READ);
+}
+
+/* Implements vdo_zone_action_fn. */
+static void load_allocator(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct slab_depot *depot = context;
+
+ vdo_start_loading(&depot->allocators[zone_number].state,
+ vdo_get_current_manager_operation(depot->action_manager),
+ parent, initiate_load);
+}
+
+/**
+ * vdo_load_slab_depot() - Asynchronously load any slab depot state that isn't included in the
+ * super_block component.
+ * @depot: The depot to load.
+ * @operation: The type of load to perform.
+ * @parent: The completion to notify when the load is complete.
+ * @context: Additional context for the load operation; may be NULL.
+ *
+ * This method may be called only before entering normal operation from the load thread.
+ */
+void vdo_load_slab_depot(struct slab_depot *depot,
+ const struct admin_state_code *operation,
+ struct vdo_completion *parent, void *context)
+{
+ if (!vdo_assert_load_operation(operation, parent))
+ return;
+
+ vdo_schedule_operation_with_context(depot->action_manager, operation,
+ load_slab_summary, load_allocator,
+ NULL, context, parent);
+}
+
+/* Implements vdo_zone_action_fn. */
+static void prepare_to_allocate(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct slab_depot *depot = context;
+ struct block_allocator *allocator = &depot->allocators[zone_number];
+ int result;
+
+ result = vdo_prepare_slabs_for_allocation(allocator);
+ if (result != VDO_SUCCESS) {
+ vdo_fail_completion(parent, result);
+ return;
+ }
+
+ scrub_slabs(allocator, parent);
+}
+
+/**
+ * vdo_prepare_slab_depot_to_allocate() - Prepare the slab depot to come online and start
+ * allocating blocks.
+ * @depot: The depot to prepare.
+ * @load_type: The load type.
+ * @parent: The completion to notify when the operation is complete.
+ *
+ * This method may be called only before entering normal operation from the load thread. It must be
+ * called before allocation may proceed.
+ */
+void vdo_prepare_slab_depot_to_allocate(struct slab_depot *depot,
+ enum slab_depot_load_type load_type,
+ struct vdo_completion *parent)
+{
+ depot->load_type = load_type;
+ atomic_set(&depot->zones_to_scrub, depot->zone_count);
+ vdo_schedule_action(depot->action_manager, NULL,
+ prepare_to_allocate, NULL, parent);
+}
+
+/**
+ * vdo_update_slab_depot_size() - Update the slab depot to reflect its new size in memory.
+ * @depot: The depot to update.
+ *
+ * This size is saved to disk as part of the super block.
+ */
+void vdo_update_slab_depot_size(struct slab_depot *depot)
+{
+ depot->last_block = depot->new_last_block;
+}
+
+/**
+ * vdo_prepare_to_grow_slab_depot() - Allocate new memory needed for a resize of a slab depot to
+ * the given size.
+ * @depot: The depot to prepare to resize.
+ * @partition: The new depot partition
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_prepare_to_grow_slab_depot(struct slab_depot *depot,
+ const struct partition *partition)
+{
+ struct slab_depot_state_2_0 new_state;
+ int result;
+ slab_count_t new_slab_count;
+
+ if ((partition->count >> depot->slab_size_shift) <= depot->slab_count)
+ return VDO_INCREMENT_TOO_SMALL;
+
+ /* Generate the depot configuration for the new block count. */
+ VDO_ASSERT_LOG_ONLY(depot->first_block == partition->offset,
+ "New slab depot partition doesn't change origin");
+ result = vdo_configure_slab_depot(partition, depot->slab_config,
+ depot->zone_count, &new_state);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ new_slab_count = vdo_compute_slab_count(depot->first_block,
+ new_state.last_block,
+ depot->slab_size_shift);
+ if (new_slab_count <= depot->slab_count)
+ return vdo_log_error_strerror(VDO_INCREMENT_TOO_SMALL,
+ "Depot can only grow");
+ if (new_slab_count == depot->new_slab_count) {
+ /* Check it out, we've already got all the new slabs allocated! */
+ return VDO_SUCCESS;
+ }
+
+ vdo_abandon_new_slabs(depot);
+ result = allocate_slabs(depot, new_slab_count);
+ if (result != VDO_SUCCESS) {
+ vdo_abandon_new_slabs(depot);
+ return result;
+ }
+
+ depot->new_size = partition->count;
+ depot->old_last_block = depot->last_block;
+ depot->new_last_block = new_state.last_block;
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * finish_registration() - Finish registering new slabs now that all of the allocators have
+ * received their new slabs.
+ *
+ * Implements vdo_action_conclusion_fn.
+ */
+static int finish_registration(void *context)
+{
+ struct slab_depot *depot = context;
+
+ WRITE_ONCE(depot->slab_count, depot->new_slab_count);
+ vdo_free(depot->slabs);
+ depot->slabs = depot->new_slabs;
+ depot->new_slabs = NULL;
+ depot->new_slab_count = 0;
+ return VDO_SUCCESS;
+}
+
+/* Implements vdo_zone_action_fn. */
+static void register_new_slabs(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct slab_depot *depot = context;
+ struct block_allocator *allocator = &depot->allocators[zone_number];
+ slab_count_t i;
+
+ for (i = depot->slab_count; i < depot->new_slab_count; i++) {
+ struct vdo_slab *slab = depot->new_slabs[i];
+
+ if (slab->allocator == allocator)
+ register_slab_with_allocator(allocator, slab);
+ }
+
+ vdo_finish_completion(parent);
+}
+
+/**
+ * vdo_use_new_slabs() - Use the new slabs allocated for resize.
+ * @depot: The depot.
+ * @parent: The object to notify when complete.
+ */
+void vdo_use_new_slabs(struct slab_depot *depot, struct vdo_completion *parent)
+{
+ VDO_ASSERT_LOG_ONLY(depot->new_slabs != NULL, "Must have new slabs to use");
+ vdo_schedule_operation(depot->action_manager,
+ VDO_ADMIN_STATE_SUSPENDED_OPERATION,
+ NULL, register_new_slabs,
+ finish_registration, parent);
+}
+
+/**
+ * stop_scrubbing() - Tell the scrubber to stop scrubbing after it finishes the slab it is
+ * currently working on.
+ * @scrubber: The scrubber to stop.
+ * @parent: The completion to notify when scrubbing has stopped.
+ */
+static void stop_scrubbing(struct block_allocator *allocator)
+{
+ struct slab_scrubber *scrubber = &allocator->scrubber;
+
+ if (vdo_is_state_quiescent(&scrubber->admin_state)) {
+ vdo_finish_completion(&allocator->completion);
+ } else {
+ vdo_start_draining(&scrubber->admin_state,
+ VDO_ADMIN_STATE_SUSPENDING,
+ &allocator->completion, NULL);
+ }
+}
+
+/* Implements vdo_admin_initiator_fn. */
+static void initiate_summary_drain(struct admin_state *state)
+{
+ check_summary_drain_complete(container_of(state, struct block_allocator,
+ summary_state));
+}
+
+static void do_drain_step(struct vdo_completion *completion)
+{
+ struct block_allocator *allocator = vdo_as_block_allocator(completion);
+
+ vdo_prepare_completion_for_requeue(&allocator->completion, do_drain_step,
+ handle_operation_error, allocator->thread_id,
+ NULL);
+ switch (++allocator->drain_step) {
+ case VDO_DRAIN_ALLOCATOR_STEP_SCRUBBER:
+ stop_scrubbing(allocator);
+ return;
+
+ case VDO_DRAIN_ALLOCATOR_STEP_SLABS:
+ apply_to_slabs(allocator, do_drain_step);
+ return;
+
+ case VDO_DRAIN_ALLOCATOR_STEP_SUMMARY:
+ vdo_start_draining(&allocator->summary_state,
+ vdo_get_admin_state_code(&allocator->state),
+ completion, initiate_summary_drain);
+ return;
+
+ case VDO_DRAIN_ALLOCATOR_STEP_FINISHED:
+ VDO_ASSERT_LOG_ONLY(!is_vio_pool_busy(allocator->vio_pool),
+ "vio pool not busy");
+ vdo_finish_draining_with_result(&allocator->state, completion->result);
+ return;
+
+ default:
+ vdo_finish_draining_with_result(&allocator->state, UDS_BAD_STATE);
+ }
+}
+
+/* Implements vdo_admin_initiator_fn. */
+static void initiate_drain(struct admin_state *state)
+{
+ struct block_allocator *allocator =
+ container_of(state, struct block_allocator, state);
+
+ allocator->drain_step = VDO_DRAIN_ALLOCATOR_START;
+ do_drain_step(&allocator->completion);
+}
+
+/*
+ * Drain all allocator I/O. Depending upon the type of drain, some or all dirty metadata may be
+ * written to disk. The type of drain will be determined from the state of the allocator's depot.
+ *
+ * Implements vdo_zone_action_fn.
+ */
+static void drain_allocator(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct slab_depot *depot = context;
+
+ vdo_start_draining(&depot->allocators[zone_number].state,
+ vdo_get_current_manager_operation(depot->action_manager),
+ parent, initiate_drain);
+}
+
+/**
+ * vdo_drain_slab_depot() - Drain all slab depot I/O.
+ * @depot: The depot to drain.
+ * @operation: The drain operation (flush, rebuild, suspend, or save).
+ * @parent: The completion to finish when the drain is complete.
+ *
+ * If saving, or flushing, all dirty depot metadata will be written out. If saving or suspending,
+ * the depot will be left in a suspended state.
+ */
+void vdo_drain_slab_depot(struct slab_depot *depot,
+ const struct admin_state_code *operation,
+ struct vdo_completion *parent)
+{
+ vdo_schedule_operation(depot->action_manager, operation,
+ NULL, drain_allocator, NULL, parent);
+}
+
+/**
+ * resume_scrubbing() - Tell the scrubber to resume scrubbing if it has been stopped.
+ * @allocator: The allocator being resumed.
+ */
+static void resume_scrubbing(struct block_allocator *allocator)
+{
+ int result;
+ struct slab_scrubber *scrubber = &allocator->scrubber;
+
+ if (!has_slabs_to_scrub(scrubber)) {
+ vdo_finish_completion(&allocator->completion);
+ return;
+ }
+
+ result = vdo_resume_if_quiescent(&scrubber->admin_state);
+ if (result != VDO_SUCCESS) {
+ vdo_fail_completion(&allocator->completion, result);
+ return;
+ }
+
+ scrub_next_slab(scrubber);
+ vdo_finish_completion(&allocator->completion);
+}
+
+static void do_resume_step(struct vdo_completion *completion)
+{
+ struct block_allocator *allocator = vdo_as_block_allocator(completion);
+
+ vdo_prepare_completion_for_requeue(&allocator->completion, do_resume_step,
+ handle_operation_error,
+ allocator->thread_id, NULL);
+ switch (--allocator->drain_step) {
+ case VDO_DRAIN_ALLOCATOR_STEP_SUMMARY:
+ vdo_fail_completion(completion,
+ vdo_resume_if_quiescent(&allocator->summary_state));
+ return;
+
+ case VDO_DRAIN_ALLOCATOR_STEP_SLABS:
+ apply_to_slabs(allocator, do_resume_step);
+ return;
+
+ case VDO_DRAIN_ALLOCATOR_STEP_SCRUBBER:
+ resume_scrubbing(allocator);
+ return;
+
+ case VDO_DRAIN_ALLOCATOR_START:
+ vdo_finish_resuming_with_result(&allocator->state, completion->result);
+ return;
+
+ default:
+ vdo_finish_resuming_with_result(&allocator->state, UDS_BAD_STATE);
+ }
+}
+
+/* Implements vdo_admin_initiator_fn. */
+static void initiate_resume(struct admin_state *state)
+{
+ struct block_allocator *allocator =
+ container_of(state, struct block_allocator, state);
+
+ allocator->drain_step = VDO_DRAIN_ALLOCATOR_STEP_FINISHED;
+ do_resume_step(&allocator->completion);
+}
+
+/* Implements vdo_zone_action_fn. */
+static void resume_allocator(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct slab_depot *depot = context;
+
+ vdo_start_resuming(&depot->allocators[zone_number].state,
+ vdo_get_current_manager_operation(depot->action_manager),
+ parent, initiate_resume);
+}
+
+/**
+ * vdo_resume_slab_depot() - Resume a suspended slab depot.
+ * @depot: The depot to resume.
+ * @parent: The completion to finish when the depot has resumed.
+ */
+void vdo_resume_slab_depot(struct slab_depot *depot, struct vdo_completion *parent)
+{
+ if (vdo_is_read_only(depot->vdo)) {
+ vdo_continue_completion(parent, VDO_READ_ONLY);
+ return;
+ }
+
+ vdo_schedule_operation(depot->action_manager, VDO_ADMIN_STATE_RESUMING,
+ NULL, resume_allocator, NULL, parent);
+}
+
+/**
+ * vdo_commit_oldest_slab_journal_tail_blocks() - Commit all dirty tail blocks which are locking a
+ * given recovery journal block.
+ * @depot: The depot.
+ * @recovery_block_number: The sequence number of the recovery journal block whose locks should be
+ * released.
+ *
+ * Context: This method must be called from the journal zone thread.
+ */
+void vdo_commit_oldest_slab_journal_tail_blocks(struct slab_depot *depot,
+ sequence_number_t recovery_block_number)
+{
+ if (depot == NULL)
+ return;
+
+ depot->new_release_request = recovery_block_number;
+ vdo_schedule_default_action(depot->action_manager);
+}
+
+/* Implements vdo_zone_action_fn. */
+static void scrub_all_unrecovered_slabs(void *context, zone_count_t zone_number,
+ struct vdo_completion *parent)
+{
+ struct slab_depot *depot = context;
+
+ scrub_slabs(&depot->allocators[zone_number], NULL);
+ vdo_launch_completion(parent);
+}
+
+/**
+ * vdo_scrub_all_unrecovered_slabs() - Scrub all unrecovered slabs.
+ * @depot: The depot to scrub.
+ * @parent: The object to notify when scrubbing has been launched for all zones.
+ */
+void vdo_scrub_all_unrecovered_slabs(struct slab_depot *depot,
+ struct vdo_completion *parent)
+{
+ vdo_schedule_action(depot->action_manager, NULL,
+ scrub_all_unrecovered_slabs,
+ NULL, parent);
+}
+
+/**
+ * get_block_allocator_statistics() - Get the total of the statistics from all the block allocators
+ * in the depot.
+ * @depot: The slab depot.
+ *
+ * Return: The statistics from all block allocators in the depot.
+ */
+static struct block_allocator_statistics __must_check
+get_block_allocator_statistics(const struct slab_depot *depot)
+{
+ struct block_allocator_statistics totals;
+ zone_count_t zone;
+
+ memset(&totals, 0, sizeof(totals));
+
+ for (zone = 0; zone < depot->zone_count; zone++) {
+ const struct block_allocator *allocator = &depot->allocators[zone];
+ const struct block_allocator_statistics *stats = &allocator->statistics;
+
+ totals.slab_count += allocator->slab_count;
+ totals.slabs_opened += READ_ONCE(stats->slabs_opened);
+ totals.slabs_reopened += READ_ONCE(stats->slabs_reopened);
+ }
+
+ return totals;
+}
+
+/**
+ * get_ref_counts_statistics() - Get the cumulative ref_counts statistics for the depot.
+ * @depot: The slab depot.
+ *
+ * Return: The cumulative statistics for all ref_counts in the depot.
+ */
+static struct ref_counts_statistics __must_check
+get_ref_counts_statistics(const struct slab_depot *depot)
+{
+ struct ref_counts_statistics totals;
+ zone_count_t zone;
+
+ memset(&totals, 0, sizeof(totals));
+
+ for (zone = 0; zone < depot->zone_count; zone++) {
+ totals.blocks_written +=
+ READ_ONCE(depot->allocators[zone].ref_counts_statistics.blocks_written);
+ }
+
+ return totals;
+}
+
+/**
+ * get_slab_journal_statistics() - Get the aggregated slab journal statistics for the depot.
+ * @depot: The slab depot.
+ *
+ * Return: The aggregated statistics for all slab journals in the depot.
+ */
+static struct slab_journal_statistics __must_check
+get_slab_journal_statistics(const struct slab_depot *depot)
+{
+ struct slab_journal_statistics totals;
+ zone_count_t zone;
+
+ memset(&totals, 0, sizeof(totals));
+
+ for (zone = 0; zone < depot->zone_count; zone++) {
+ const struct slab_journal_statistics *stats =
+ &depot->allocators[zone].slab_journal_statistics;
+
+ totals.disk_full_count += READ_ONCE(stats->disk_full_count);
+ totals.flush_count += READ_ONCE(stats->flush_count);
+ totals.blocked_count += READ_ONCE(stats->blocked_count);
+ totals.blocks_written += READ_ONCE(stats->blocks_written);
+ totals.tail_busy_count += READ_ONCE(stats->tail_busy_count);
+ }
+
+ return totals;
+}
+
+/**
+ * vdo_get_slab_depot_statistics() - Get all the vdo_statistics fields that are properties of the
+ * slab depot.
+ * @depot: The slab depot.
+ * @stats: The vdo statistics structure to partially fill.
+ */
+void vdo_get_slab_depot_statistics(const struct slab_depot *depot,
+ struct vdo_statistics *stats)
+{
+ slab_count_t slab_count = READ_ONCE(depot->slab_count);
+ slab_count_t unrecovered = 0;
+ zone_count_t zone;
+
+ for (zone = 0; zone < depot->zone_count; zone++) {
+ /* The allocators are responsible for thread safety. */
+ unrecovered += READ_ONCE(depot->allocators[zone].scrubber.slab_count);
+ }
+
+ stats->recovery_percentage = (slab_count - unrecovered) * 100 / slab_count;
+ stats->allocator = get_block_allocator_statistics(depot);
+ stats->ref_counts = get_ref_counts_statistics(depot);
+ stats->slab_journal = get_slab_journal_statistics(depot);
+ stats->slab_summary = (struct slab_summary_statistics) {
+ .blocks_written = atomic64_read(&depot->summary_statistics.blocks_written),
+ };
+}
+
+/**
+ * vdo_dump_slab_depot() - Dump the slab depot, in a thread-unsafe fashion.
+ * @depot: The slab depot.
+ */
+void vdo_dump_slab_depot(const struct slab_depot *depot)
+{
+ vdo_log_info("vdo slab depot");
+ vdo_log_info(" zone_count=%u old_zone_count=%u slabCount=%u active_release_request=%llu new_release_request=%llu",
+ (unsigned int) depot->zone_count,
+ (unsigned int) depot->old_zone_count, READ_ONCE(depot->slab_count),
+ (unsigned long long) depot->active_release_request,
+ (unsigned long long) depot->new_release_request);
+}
diff --git a/drivers/md/dm-vdo/slab-depot.h b/drivers/md/dm-vdo/slab-depot.h
new file mode 100644
index 000000000000..f234853501ca
--- /dev/null
+++ b/drivers/md/dm-vdo/slab-depot.h
@@ -0,0 +1,601 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_SLAB_DEPOT_H
+#define VDO_SLAB_DEPOT_H
+
+#include <linux/atomic.h>
+#include <linux/dm-kcopyd.h>
+#include <linux/list.h>
+
+#include "numeric.h"
+
+#include "admin-state.h"
+#include "completion.h"
+#include "data-vio.h"
+#include "encodings.h"
+#include "physical-zone.h"
+#include "priority-table.h"
+#include "recovery-journal.h"
+#include "statistics.h"
+#include "types.h"
+#include "vio.h"
+#include "wait-queue.h"
+
+/*
+ * A slab_depot is responsible for managing all of the slabs and block allocators of a VDO. It has
+ * a single array of slabs in order to eliminate the need for additional math in order to compute
+ * which physical zone a PBN is in. It also has a block_allocator per zone.
+ *
+ * Each physical zone has a single dedicated queue and thread for performing all updates to the
+ * slabs assigned to that zone. The concurrency guarantees of this single-threaded model allow the
+ * code to omit more fine-grained locking for the various slab structures. Each physical zone
+ * maintains a separate copy of the slab summary to remove the need for explicit locking on that
+ * structure as well.
+ *
+ * Load operations must be performed on the admin thread. Normal operations, such as allocations
+ * and reference count updates, must be performed on the appropriate physical zone thread. Requests
+ * from the recovery journal to commit slab journal tail blocks must be scheduled from the recovery
+ * journal thread to run on the appropriate physical zone thread. Save operations must be launched
+ * from the same admin thread as the original load operation.
+ */
+
+enum {
+ /* The number of vios in the vio pool is proportional to the throughput of the VDO. */
+ BLOCK_ALLOCATOR_VIO_POOL_SIZE = 128,
+};
+
+/*
+ * Represents the possible status of a block.
+ */
+enum reference_status {
+ RS_FREE, /* this block is free */
+ RS_SINGLE, /* this block is singly-referenced */
+ RS_SHARED, /* this block is shared */
+ RS_PROVISIONAL /* this block is provisionally allocated */
+};
+
+struct vdo_slab;
+
+struct journal_lock {
+ u16 count;
+ sequence_number_t recovery_start;
+};
+
+struct slab_journal {
+ /* A waiter object for getting a VIO pool entry */
+ struct vdo_waiter resource_waiter;
+ /* A waiter object for updating the slab summary */
+ struct vdo_waiter slab_summary_waiter;
+ /* A waiter object for getting a vio with which to flush */
+ struct vdo_waiter flush_waiter;
+ /* The queue of VIOs waiting to make an entry */
+ struct vdo_wait_queue entry_waiters;
+ /* The parent slab reference of this journal */
+ struct vdo_slab *slab;
+
+ /* Whether a tail block commit is pending */
+ bool waiting_to_commit;
+ /* Whether the journal is updating the slab summary */
+ bool updating_slab_summary;
+ /* Whether the journal is adding entries from the entry_waiters queue */
+ bool adding_entries;
+ /* Whether a partial write is in progress */
+ bool partial_write_in_progress;
+
+ /* The oldest block in the journal on disk */
+ sequence_number_t head;
+ /* The oldest block in the journal which may not be reaped */
+ sequence_number_t unreapable;
+ /* The end of the half-open interval of the active journal */
+ sequence_number_t tail;
+ /* The next journal block to be committed */
+ sequence_number_t next_commit;
+ /* The tail sequence number that is written in the slab summary */
+ sequence_number_t summarized;
+ /* The tail sequence number that was last summarized in slab summary */
+ sequence_number_t last_summarized;
+
+ /* The sequence number of the recovery journal lock */
+ sequence_number_t recovery_lock;
+
+ /*
+ * The number of entries which fit in a single block. Can't use the constant because unit
+ * tests change this number.
+ */
+ journal_entry_count_t entries_per_block;
+ /*
+ * The number of full entries which fit in a single block. Can't use the constant because
+ * unit tests change this number.
+ */
+ journal_entry_count_t full_entries_per_block;
+
+ /* The recovery journal of the VDO (slab journal holds locks on it) */
+ struct recovery_journal *recovery_journal;
+
+ /* The statistics shared by all slab journals in our physical zone */
+ struct slab_journal_statistics *events;
+ /* A list of the VIO pool entries for outstanding journal block writes */
+ struct list_head uncommitted_blocks;
+
+ /*
+ * The current tail block header state. This will be packed into the block just before it
+ * is written.
+ */
+ struct slab_journal_block_header tail_header;
+ /* A pointer to a block-sized buffer holding the packed block data */
+ struct packed_slab_journal_block *block;
+
+ /* The number of blocks in the on-disk journal */
+ block_count_t size;
+ /* The number of blocks at which to start pushing reference blocks */
+ block_count_t flushing_threshold;
+ /* The number of blocks at which all reference blocks should be writing */
+ block_count_t flushing_deadline;
+ /* The number of blocks at which to wait for reference blocks to write */
+ block_count_t blocking_threshold;
+ /* The number of blocks at which to scrub the slab before coming online */
+ block_count_t scrubbing_threshold;
+
+ /* This list entry is for block_allocator to keep a queue of dirty journals */
+ struct list_head dirty_entry;
+
+ /* The lock for the oldest unreaped block of the journal */
+ struct journal_lock *reap_lock;
+ /* The locks for each on disk block */
+ struct journal_lock *locks;
+};
+
+/*
+ * Reference_block structure
+ *
+ * Blocks are used as a proxy, permitting saves of partial refcounts.
+ */
+struct reference_block {
+ /* This block waits on the ref_counts to tell it to write */
+ struct vdo_waiter waiter;
+ /* The slab to which this reference_block belongs */
+ struct vdo_slab *slab;
+ /* The number of references in this block that represent allocations */
+ block_size_t allocated_count;
+ /* The slab journal block on which this block must hold a lock */
+ sequence_number_t slab_journal_lock;
+ /* The slab journal block which should be released when this block is committed */
+ sequence_number_t slab_journal_lock_to_release;
+ /* The point up to which each sector is accurate on disk */
+ struct journal_point commit_points[VDO_SECTORS_PER_BLOCK];
+ /* Whether this block has been modified since it was written to disk */
+ bool is_dirty;
+ /* Whether this block is currently writing */
+ bool is_writing;
+};
+
+/* The search_cursor represents the saved position of a free block search. */
+struct search_cursor {
+ /* The reference block containing the current search index */
+ struct reference_block *block;
+ /* The position at which to start searching for the next free counter */
+ slab_block_number index;
+ /* The position just past the last valid counter in the current block */
+ slab_block_number end_index;
+
+ /* A pointer to the first reference block in the slab */
+ struct reference_block *first_block;
+ /* A pointer to the last reference block in the slab */
+ struct reference_block *last_block;
+};
+
+enum slab_rebuild_status {
+ VDO_SLAB_REBUILT,
+ VDO_SLAB_REPLAYING,
+ VDO_SLAB_REQUIRES_SCRUBBING,
+ VDO_SLAB_REQUIRES_HIGH_PRIORITY_SCRUBBING,
+ VDO_SLAB_REBUILDING,
+};
+
+/*
+ * This is the type declaration for the vdo_slab type. A vdo_slab currently consists of a run of
+ * 2^23 data blocks, but that will soon change to dedicate a small number of those blocks for
+ * metadata storage for the reference counts and slab journal for the slab.
+ *
+ * A reference count is maintained for each physical block number. The vast majority of blocks have
+ * a very small reference count (usually 0 or 1). For references less than or equal to MAXIMUM_REFS
+ * (254) the reference count is stored in counters[pbn].
+ */
+struct vdo_slab {
+ /* A list entry to queue this slab in a block_allocator list */
+ struct list_head allocq_entry;
+
+ /* The struct block_allocator that owns this slab */
+ struct block_allocator *allocator;
+
+ /* The journal for this slab */
+ struct slab_journal journal;
+
+ /* The slab number of this slab */
+ slab_count_t slab_number;
+ /* The offset in the allocator partition of the first block in this slab */
+ physical_block_number_t start;
+ /* The offset of the first block past the end of this slab */
+ physical_block_number_t end;
+ /* The starting translated PBN of the slab journal */
+ physical_block_number_t journal_origin;
+ /* The starting translated PBN of the reference counts */
+ physical_block_number_t ref_counts_origin;
+
+ /* The administrative state of the slab */
+ struct admin_state state;
+ /* The status of the slab */
+ enum slab_rebuild_status status;
+ /* Whether the slab was ever queued for scrubbing */
+ bool was_queued_for_scrubbing;
+
+ /* The priority at which this slab has been queued for allocation */
+ u8 priority;
+
+ /* Fields beyond this point are the reference counts for the data blocks in this slab. */
+ /* The size of the counters array */
+ u32 block_count;
+ /* The number of free blocks */
+ u32 free_blocks;
+ /* The array of reference counts */
+ vdo_refcount_t *counters; /* use vdo_allocate() to align data ptr */
+
+ /* The saved block pointer and array indexes for the free block search */
+ struct search_cursor search_cursor;
+
+ /* A list of the dirty blocks waiting to be written out */
+ struct vdo_wait_queue dirty_blocks;
+ /* The number of blocks which are currently writing */
+ size_t active_count;
+
+ /* A waiter object for updating the slab summary */
+ struct vdo_waiter summary_waiter;
+
+ /* The latest slab journal for which there has been a reference count update */
+ struct journal_point slab_journal_point;
+
+ /* The number of reference count blocks */
+ u32 reference_block_count;
+ /* reference count block array */
+ struct reference_block *reference_blocks;
+};
+
+enum block_allocator_drain_step {
+ VDO_DRAIN_ALLOCATOR_START,
+ VDO_DRAIN_ALLOCATOR_STEP_SCRUBBER,
+ VDO_DRAIN_ALLOCATOR_STEP_SLABS,
+ VDO_DRAIN_ALLOCATOR_STEP_SUMMARY,
+ VDO_DRAIN_ALLOCATOR_STEP_FINISHED,
+};
+
+struct slab_scrubber {
+ /* The queue of slabs to scrub first */
+ struct list_head high_priority_slabs;
+ /* The queue of slabs to scrub once there are no high_priority_slabs */
+ struct list_head slabs;
+ /* The queue of VIOs waiting for a slab to be scrubbed */
+ struct vdo_wait_queue waiters;
+
+ /*
+ * The number of slabs that are unrecovered or being scrubbed. This field is modified by
+ * the physical zone thread, but is queried by other threads.
+ */
+ slab_count_t slab_count;
+
+ /* The administrative state of the scrubber */
+ struct admin_state admin_state;
+ /* Whether to only scrub high-priority slabs */
+ bool high_priority_only;
+ /* The slab currently being scrubbed */
+ struct vdo_slab *slab;
+ /* The vio for loading slab journal blocks */
+ struct vio vio;
+};
+
+/* A sub-structure for applying actions in parallel to all an allocator's slabs. */
+struct slab_actor {
+ /* The number of slabs performing a slab action */
+ slab_count_t slab_action_count;
+ /* The method to call when a slab action has been completed by all slabs */
+ vdo_action_fn callback;
+};
+
+/* A slab_iterator is a structure for iterating over a set of slabs. */
+struct slab_iterator {
+ struct vdo_slab **slabs;
+ struct vdo_slab *next;
+ slab_count_t end;
+ slab_count_t stride;
+};
+
+/*
+ * The slab_summary provides hints during load and recovery about the state of the slabs in order
+ * to avoid the need to read the slab journals in their entirety before a VDO can come online.
+ *
+ * The information in the summary for each slab includes the rough number of free blocks (which is
+ * used to prioritize scrubbing), the cleanliness of a slab (so that clean slabs containing free
+ * space will be used on restart), and the location of the tail block of the slab's journal.
+ *
+ * The slab_summary has its own partition at the end of the volume which is sized to allow for a
+ * complete copy of the summary for each of up to 16 physical zones.
+ *
+ * During resize, the slab_summary moves its backing partition and is saved once moved; the
+ * slab_summary is not permitted to overwrite the previous recovery journal space.
+ *
+ * The slab_summary does not have its own version information, but relies on the VDO volume version
+ * number.
+ */
+
+/*
+ * A slab status is a very small structure for use in determining the ordering of slabs in the
+ * scrubbing process.
+ */
+struct slab_status {
+ slab_count_t slab_number;
+ bool is_clean;
+ u8 emptiness;
+};
+
+struct slab_summary_block {
+ /* The block_allocator to which this block belongs */
+ struct block_allocator *allocator;
+ /* The index of this block in its zone's summary */
+ block_count_t index;
+ /* Whether this block has a write outstanding */
+ bool writing;
+ /* Ring of updates waiting on the outstanding write */
+ struct vdo_wait_queue current_update_waiters;
+ /* Ring of updates waiting on the next write */
+ struct vdo_wait_queue next_update_waiters;
+ /* The active slab_summary_entry array for this block */
+ struct slab_summary_entry *entries;
+ /* The vio used to write this block */
+ struct vio vio;
+ /* The packed entries, one block long, backing the vio */
+ char *outgoing_entries;
+};
+
+/*
+ * The statistics for all the slab summary zones owned by this slab summary. These fields are all
+ * mutated only by their physical zone threads, but are read by other threads when gathering
+ * statistics for the entire depot.
+ */
+struct atomic_slab_summary_statistics {
+ /* Number of blocks written */
+ atomic64_t blocks_written;
+};
+
+struct block_allocator {
+ struct vdo_completion completion;
+ /* The slab depot for this allocator */
+ struct slab_depot *depot;
+ /* The nonce of the VDO */
+ nonce_t nonce;
+ /* The physical zone number of this allocator */
+ zone_count_t zone_number;
+ /* The thread ID for this allocator's physical zone */
+ thread_id_t thread_id;
+ /* The number of slabs in this allocator */
+ slab_count_t slab_count;
+ /* The number of the last slab owned by this allocator */
+ slab_count_t last_slab;
+ /* The reduced priority level used to preserve unopened slabs */
+ unsigned int unopened_slab_priority;
+ /* The state of this allocator */
+ struct admin_state state;
+ /* The actor for applying an action to all slabs */
+ struct slab_actor slab_actor;
+
+ /* The slab from which blocks are currently being allocated */
+ struct vdo_slab *open_slab;
+ /* A priority queue containing all slabs available for allocation */
+ struct priority_table *prioritized_slabs;
+ /* The slab scrubber */
+ struct slab_scrubber scrubber;
+ /* What phase of the close operation the allocator is to perform */
+ enum block_allocator_drain_step drain_step;
+
+ /*
+ * These statistics are all mutated only by the physical zone thread, but are read by other
+ * threads when gathering statistics for the entire depot.
+ */
+ /*
+ * The count of allocated blocks in this zone. Not in block_allocator_statistics for
+ * historical reasons.
+ */
+ u64 allocated_blocks;
+ /* Statistics for this block allocator */
+ struct block_allocator_statistics statistics;
+ /* Cumulative statistics for the slab journals in this zone */
+ struct slab_journal_statistics slab_journal_statistics;
+ /* Cumulative statistics for the reference counters in this zone */
+ struct ref_counts_statistics ref_counts_statistics;
+
+ /*
+ * This is the head of a queue of slab journals which have entries in their tail blocks
+ * which have not yet started to commit. When the recovery journal is under space pressure,
+ * slab journals which have uncommitted entries holding a lock on the recovery journal head
+ * are forced to commit their blocks early. This list is kept in order, with the tail
+ * containing the slab journal holding the most recent recovery journal lock.
+ */
+ struct list_head dirty_slab_journals;
+
+ /* The vio pool for reading and writing block allocator metadata */
+ struct vio_pool *vio_pool;
+ /* The dm_kcopyd client for erasing slab journals */
+ struct dm_kcopyd_client *eraser;
+ /* Iterator over the slabs to be erased */
+ struct slab_iterator slabs_to_erase;
+
+ /* The portion of the slab summary managed by this allocator */
+ /* The state of the slab summary */
+ struct admin_state summary_state;
+ /* The number of outstanding summary writes */
+ block_count_t summary_write_count;
+ /* The array (owned by the blocks) of all entries */
+ struct slab_summary_entry *summary_entries;
+ /* The array of slab_summary_blocks */
+ struct slab_summary_block *summary_blocks;
+};
+
+enum slab_depot_load_type {
+ VDO_SLAB_DEPOT_NORMAL_LOAD,
+ VDO_SLAB_DEPOT_RECOVERY_LOAD,
+ VDO_SLAB_DEPOT_REBUILD_LOAD
+};
+
+struct slab_depot {
+ zone_count_t zone_count;
+ zone_count_t old_zone_count;
+ struct vdo *vdo;
+ struct slab_config slab_config;
+ struct action_manager *action_manager;
+
+ physical_block_number_t first_block;
+ physical_block_number_t last_block;
+ physical_block_number_t origin;
+
+ /* slab_size == (1 << slab_size_shift) */
+ unsigned int slab_size_shift;
+
+ /* Determines how slabs should be queued during load */
+ enum slab_depot_load_type load_type;
+
+ /* The state for notifying slab journals to release recovery journal */
+ sequence_number_t active_release_request;
+ sequence_number_t new_release_request;
+
+ /* State variables for scrubbing complete handling */
+ atomic_t zones_to_scrub;
+
+ /* Array of pointers to individually allocated slabs */
+ struct vdo_slab **slabs;
+ /* The number of slabs currently allocated and stored in 'slabs' */
+ slab_count_t slab_count;
+
+ /* Array of pointers to a larger set of slabs (used during resize) */
+ struct vdo_slab **new_slabs;
+ /* The number of slabs currently allocated and stored in 'new_slabs' */
+ slab_count_t new_slab_count;
+ /* The size that 'new_slabs' was allocated for */
+ block_count_t new_size;
+
+ /* The last block before resize, for rollback */
+ physical_block_number_t old_last_block;
+ /* The last block after resize, for resize */
+ physical_block_number_t new_last_block;
+
+ /* The statistics for the slab summary */
+ struct atomic_slab_summary_statistics summary_statistics;
+ /* The start of the slab summary partition */
+ physical_block_number_t summary_origin;
+ /* The number of bits to shift to get a 7-bit fullness hint */
+ unsigned int hint_shift;
+ /* The slab summary entries for all of the zones the partition can hold */
+ struct slab_summary_entry *summary_entries;
+
+ /* The block allocators for this depot */
+ struct block_allocator allocators[];
+};
+
+struct reference_updater;
+
+bool __must_check vdo_attempt_replay_into_slab(struct vdo_slab *slab,
+ physical_block_number_t pbn,
+ enum journal_operation operation,
+ bool increment,
+ struct journal_point *recovery_point,
+ struct vdo_completion *parent);
+
+int __must_check vdo_adjust_reference_count_for_rebuild(struct slab_depot *depot,
+ physical_block_number_t pbn,
+ enum journal_operation operation);
+
+static inline struct block_allocator *vdo_as_block_allocator(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_BLOCK_ALLOCATOR_COMPLETION);
+ return container_of(completion, struct block_allocator, completion);
+}
+
+int __must_check vdo_acquire_provisional_reference(struct vdo_slab *slab,
+ physical_block_number_t pbn,
+ struct pbn_lock *lock);
+
+int __must_check vdo_allocate_block(struct block_allocator *allocator,
+ physical_block_number_t *block_number_ptr);
+
+int vdo_enqueue_clean_slab_waiter(struct block_allocator *allocator,
+ struct vdo_waiter *waiter);
+
+void vdo_modify_reference_count(struct vdo_completion *completion,
+ struct reference_updater *updater);
+
+int __must_check vdo_release_block_reference(struct block_allocator *allocator,
+ physical_block_number_t pbn);
+
+void vdo_notify_slab_journals_are_recovered(struct vdo_completion *completion);
+
+void vdo_dump_block_allocator(const struct block_allocator *allocator);
+
+int __must_check vdo_decode_slab_depot(struct slab_depot_state_2_0 state,
+ struct vdo *vdo,
+ struct partition *summary_partition,
+ struct slab_depot **depot_ptr);
+
+void vdo_free_slab_depot(struct slab_depot *depot);
+
+struct slab_depot_state_2_0 __must_check vdo_record_slab_depot(const struct slab_depot *depot);
+
+int __must_check vdo_allocate_reference_counters(struct slab_depot *depot);
+
+struct vdo_slab * __must_check vdo_get_slab(const struct slab_depot *depot,
+ physical_block_number_t pbn);
+
+u8 __must_check vdo_get_increment_limit(struct slab_depot *depot,
+ physical_block_number_t pbn);
+
+bool __must_check vdo_is_physical_data_block(const struct slab_depot *depot,
+ physical_block_number_t pbn);
+
+block_count_t __must_check vdo_get_slab_depot_allocated_blocks(const struct slab_depot *depot);
+
+block_count_t __must_check vdo_get_slab_depot_data_blocks(const struct slab_depot *depot);
+
+void vdo_get_slab_depot_statistics(const struct slab_depot *depot,
+ struct vdo_statistics *stats);
+
+void vdo_load_slab_depot(struct slab_depot *depot,
+ const struct admin_state_code *operation,
+ struct vdo_completion *parent, void *context);
+
+void vdo_prepare_slab_depot_to_allocate(struct slab_depot *depot,
+ enum slab_depot_load_type load_type,
+ struct vdo_completion *parent);
+
+void vdo_update_slab_depot_size(struct slab_depot *depot);
+
+int __must_check vdo_prepare_to_grow_slab_depot(struct slab_depot *depot,
+ const struct partition *partition);
+
+void vdo_use_new_slabs(struct slab_depot *depot, struct vdo_completion *parent);
+
+void vdo_abandon_new_slabs(struct slab_depot *depot);
+
+void vdo_drain_slab_depot(struct slab_depot *depot,
+ const struct admin_state_code *operation,
+ struct vdo_completion *parent);
+
+void vdo_resume_slab_depot(struct slab_depot *depot, struct vdo_completion *parent);
+
+void vdo_commit_oldest_slab_journal_tail_blocks(struct slab_depot *depot,
+ sequence_number_t recovery_block_number);
+
+void vdo_scrub_all_unrecovered_slabs(struct slab_depot *depot,
+ struct vdo_completion *parent);
+
+void vdo_dump_slab_depot(const struct slab_depot *depot);
+
+#endif /* VDO_SLAB_DEPOT_H */
diff --git a/drivers/md/dm-vdo/statistics.h b/drivers/md/dm-vdo/statistics.h
new file mode 100644
index 000000000000..c88a75dffba3
--- /dev/null
+++ b/drivers/md/dm-vdo/statistics.h
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef STATISTICS_H
+#define STATISTICS_H
+
+#include "types.h"
+
+enum {
+ STATISTICS_VERSION = 36,
+};
+
+struct block_allocator_statistics {
+ /* The total number of slabs from which blocks may be allocated */
+ u64 slab_count;
+ /* The total number of slabs from which blocks have ever been allocated */
+ u64 slabs_opened;
+ /* The number of times since loading that a slab has been re-opened */
+ u64 slabs_reopened;
+};
+
+/**
+ * Counters for tracking the number of items written (blocks, requests, etc.)
+ * that keep track of totals at steps in the write pipeline. Three counters
+ * allow the number of buffered, in-memory items and the number of in-flight,
+ * unacknowledged writes to be derived, while still tracking totals for
+ * reporting purposes
+ */
+struct commit_statistics {
+ /* The total number of items on which processing has started */
+ u64 started;
+ /* The total number of items for which a write operation has been issued */
+ u64 written;
+ /* The total number of items for which a write operation has completed */
+ u64 committed;
+};
+
+/** Counters for events in the recovery journal */
+struct recovery_journal_statistics {
+ /* Number of times the on-disk journal was full */
+ u64 disk_full;
+ /* Number of times the recovery journal requested slab journal commits. */
+ u64 slab_journal_commits_requested;
+ /* Write/Commit totals for individual journal entries */
+ struct commit_statistics entries;
+ /* Write/Commit totals for journal blocks */
+ struct commit_statistics blocks;
+};
+
+/** The statistics for the compressed block packer. */
+struct packer_statistics {
+ /* Number of compressed data items written since startup */
+ u64 compressed_fragments_written;
+ /* Number of blocks containing compressed items written since startup */
+ u64 compressed_blocks_written;
+ /* Number of VIOs that are pending in the packer */
+ u64 compressed_fragments_in_packer;
+};
+
+/** The statistics for the slab journals. */
+struct slab_journal_statistics {
+ /* Number of times the on-disk journal was full */
+ u64 disk_full_count;
+ /* Number of times an entry was added over the flush threshold */
+ u64 flush_count;
+ /* Number of times an entry was added over the block threshold */
+ u64 blocked_count;
+ /* Number of times a tail block was written */
+ u64 blocks_written;
+ /* Number of times we had to wait for the tail to write */
+ u64 tail_busy_count;
+};
+
+/** The statistics for the slab summary. */
+struct slab_summary_statistics {
+ /* Number of blocks written */
+ u64 blocks_written;
+};
+
+/** The statistics for the reference counts. */
+struct ref_counts_statistics {
+ /* Number of reference blocks written */
+ u64 blocks_written;
+};
+
+/** The statistics for the block map. */
+struct block_map_statistics {
+ /* number of dirty (resident) pages */
+ u32 dirty_pages;
+ /* number of clean (resident) pages */
+ u32 clean_pages;
+ /* number of free pages */
+ u32 free_pages;
+ /* number of pages in failed state */
+ u32 failed_pages;
+ /* number of pages incoming */
+ u32 incoming_pages;
+ /* number of pages outgoing */
+ u32 outgoing_pages;
+ /* how many times free page not avail */
+ u32 cache_pressure;
+ /* number of get_vdo_page() calls for read */
+ u64 read_count;
+ /* number of get_vdo_page() calls for write */
+ u64 write_count;
+ /* number of times pages failed to read */
+ u64 failed_reads;
+ /* number of times pages failed to write */
+ u64 failed_writes;
+ /* number of gets that are reclaimed */
+ u64 reclaimed;
+ /* number of gets for outgoing pages */
+ u64 read_outgoing;
+ /* number of gets that were already there */
+ u64 found_in_cache;
+ /* number of gets requiring discard */
+ u64 discard_required;
+ /* number of gets enqueued for their page */
+ u64 wait_for_page;
+ /* number of gets that have to fetch */
+ u64 fetch_required;
+ /* number of page fetches */
+ u64 pages_loaded;
+ /* number of page saves */
+ u64 pages_saved;
+ /* the number of flushes issued */
+ u64 flush_count;
+};
+
+/** The dedupe statistics from hash locks */
+struct hash_lock_statistics {
+ /* Number of times the UDS advice proved correct */
+ u64 dedupe_advice_valid;
+ /* Number of times the UDS advice proved incorrect */
+ u64 dedupe_advice_stale;
+ /* Number of writes with the same data as another in-flight write */
+ u64 concurrent_data_matches;
+ /* Number of writes whose hash collided with an in-flight write */
+ u64 concurrent_hash_collisions;
+ /* Current number of dedupe queries that are in flight */
+ u32 curr_dedupe_queries;
+};
+
+/** Counts of error conditions in VDO. */
+struct error_statistics {
+ /* number of times VDO got an invalid dedupe advice PBN from UDS */
+ u64 invalid_advice_pbn_count;
+ /* number of times a VIO completed with a VDO_NO_SPACE error */
+ u64 no_space_error_count;
+ /* number of times a VIO completed with a VDO_READ_ONLY error */
+ u64 read_only_error_count;
+};
+
+struct bio_stats {
+ /* Number of REQ_OP_READ bios */
+ u64 read;
+ /* Number of REQ_OP_WRITE bios with data */
+ u64 write;
+ /* Number of bios tagged with REQ_PREFLUSH and containing no data */
+ u64 empty_flush;
+ /* Number of REQ_OP_DISCARD bios */
+ u64 discard;
+ /* Number of bios tagged with REQ_PREFLUSH */
+ u64 flush;
+ /* Number of bios tagged with REQ_FUA */
+ u64 fua;
+};
+
+struct memory_usage {
+ /* Tracked bytes currently allocated. */
+ u64 bytes_used;
+ /* Maximum tracked bytes allocated. */
+ u64 peak_bytes_used;
+};
+
+/** UDS index statistics */
+struct index_statistics {
+ /* Number of records stored in the index */
+ u64 entries_indexed;
+ /* Number of post calls that found an existing entry */
+ u64 posts_found;
+ /* Number of post calls that added a new entry */
+ u64 posts_not_found;
+ /* Number of query calls that found an existing entry */
+ u64 queries_found;
+ /* Number of query calls that added a new entry */
+ u64 queries_not_found;
+ /* Number of update calls that found an existing entry */
+ u64 updates_found;
+ /* Number of update calls that added a new entry */
+ u64 updates_not_found;
+ /* Number of entries discarded */
+ u64 entries_discarded;
+};
+
+/** The statistics of the vdo service. */
+struct vdo_statistics {
+ u32 version;
+ /* Number of blocks used for data */
+ u64 data_blocks_used;
+ /* Number of blocks used for VDO metadata */
+ u64 overhead_blocks_used;
+ /* Number of logical blocks that are currently mapped to physical blocks */
+ u64 logical_blocks_used;
+ /* number of physical blocks */
+ block_count_t physical_blocks;
+ /* number of logical blocks */
+ block_count_t logical_blocks;
+ /* Size of the block map page cache, in bytes */
+ u64 block_map_cache_size;
+ /* The physical block size */
+ u64 block_size;
+ /* Number of times the VDO has successfully recovered */
+ u64 complete_recoveries;
+ /* Number of times the VDO has recovered from read-only mode */
+ u64 read_only_recoveries;
+ /* String describing the operating mode of the VDO */
+ char mode[15];
+ /* Whether the VDO is in recovery mode */
+ bool in_recovery_mode;
+ /* What percentage of recovery mode work has been completed */
+ u8 recovery_percentage;
+ /* The statistics for the compressed block packer */
+ struct packer_statistics packer;
+ /* Counters for events in the block allocator */
+ struct block_allocator_statistics allocator;
+ /* Counters for events in the recovery journal */
+ struct recovery_journal_statistics journal;
+ /* The statistics for the slab journals */
+ struct slab_journal_statistics slab_journal;
+ /* The statistics for the slab summary */
+ struct slab_summary_statistics slab_summary;
+ /* The statistics for the reference counts */
+ struct ref_counts_statistics ref_counts;
+ /* The statistics for the block map */
+ struct block_map_statistics block_map;
+ /* The dedupe statistics from hash locks */
+ struct hash_lock_statistics hash_lock;
+ /* Counts of error conditions */
+ struct error_statistics errors;
+ /* The VDO instance */
+ u32 instance;
+ /* Current number of active VIOs */
+ u32 current_vios_in_progress;
+ /* Maximum number of active VIOs */
+ u32 max_vios;
+ /* Number of times the UDS index was too slow in responding */
+ u64 dedupe_advice_timeouts;
+ /* Number of flush requests submitted to the storage device */
+ u64 flush_out;
+ /* Logical block size */
+ u64 logical_block_size;
+ /* Bios submitted into VDO from above */
+ struct bio_stats bios_in;
+ struct bio_stats bios_in_partial;
+ /* Bios submitted onward for user data */
+ struct bio_stats bios_out;
+ /* Bios submitted onward for metadata */
+ struct bio_stats bios_meta;
+ struct bio_stats bios_journal;
+ struct bio_stats bios_page_cache;
+ struct bio_stats bios_out_completed;
+ struct bio_stats bios_meta_completed;
+ struct bio_stats bios_journal_completed;
+ struct bio_stats bios_page_cache_completed;
+ struct bio_stats bios_acknowledged;
+ struct bio_stats bios_acknowledged_partial;
+ /* Current number of bios in progress */
+ struct bio_stats bios_in_progress;
+ /* Memory usage stats. */
+ struct memory_usage memory_usage;
+ /* The statistics for the UDS index */
+ struct index_statistics index;
+};
+
+#endif /* not STATISTICS_H */
diff --git a/drivers/md/dm-vdo/status-codes.c b/drivers/md/dm-vdo/status-codes.c
new file mode 100644
index 000000000000..d3493450b169
--- /dev/null
+++ b/drivers/md/dm-vdo/status-codes.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "status-codes.h"
+
+#include "errors.h"
+#include "logger.h"
+#include "permassert.h"
+#include "thread-utils.h"
+
+const struct error_info vdo_status_list[] = {
+ { "VDO_NOT_IMPLEMENTED", "Not implemented" },
+ { "VDO_OUT_OF_RANGE", "Out of range" },
+ { "VDO_REF_COUNT_INVALID", "Reference count would become invalid" },
+ { "VDO_NO_SPACE", "Out of space" },
+ { "VDO_BAD_CONFIGURATION", "Bad configuration option" },
+ { "VDO_COMPONENT_BUSY", "Prior operation still in progress" },
+ { "VDO_BAD_PAGE", "Corrupt or incorrect page" },
+ { "VDO_UNSUPPORTED_VERSION", "Unsupported component version" },
+ { "VDO_INCORRECT_COMPONENT", "Component id mismatch in decoder" },
+ { "VDO_PARAMETER_MISMATCH", "Parameters have conflicting values" },
+ { "VDO_UNKNOWN_PARTITION", "No partition exists with a given id" },
+ { "VDO_PARTITION_EXISTS", "A partition already exists with a given id" },
+ { "VDO_INCREMENT_TOO_SMALL", "Physical block growth of too few blocks" },
+ { "VDO_CHECKSUM_MISMATCH", "Incorrect checksum" },
+ { "VDO_LOCK_ERROR", "A lock is held incorrectly" },
+ { "VDO_READ_ONLY", "The device is in read-only mode" },
+ { "VDO_SHUTTING_DOWN", "The device is shutting down" },
+ { "VDO_CORRUPT_JOURNAL", "Recovery journal entries corrupted" },
+ { "VDO_TOO_MANY_SLABS", "Exceeds maximum number of slabs supported" },
+ { "VDO_INVALID_FRAGMENT", "Compressed block fragment is invalid" },
+ { "VDO_RETRY_AFTER_REBUILD", "Retry operation after rebuilding finishes" },
+ { "VDO_BAD_MAPPING", "Invalid page mapping" },
+ { "VDO_BIO_CREATION_FAILED", "Bio creation failed" },
+ { "VDO_BAD_MAGIC", "Bad magic number" },
+ { "VDO_BAD_NONCE", "Bad nonce" },
+ { "VDO_JOURNAL_OVERFLOW", "Journal sequence number overflow" },
+ { "VDO_INVALID_ADMIN_STATE", "Invalid operation for current state" },
+};
+
+/**
+ * vdo_register_status_codes() - Register the VDO status codes.
+ * Return: A success or error code.
+ */
+int vdo_register_status_codes(void)
+{
+ int result;
+
+ BUILD_BUG_ON((VDO_STATUS_CODE_LAST - VDO_STATUS_CODE_BASE) !=
+ ARRAY_SIZE(vdo_status_list));
+
+ result = uds_register_error_block("VDO Status", VDO_STATUS_CODE_BASE,
+ VDO_STATUS_CODE_BLOCK_END, vdo_status_list,
+ sizeof(vdo_status_list));
+ return (result == UDS_SUCCESS) ? VDO_SUCCESS : result;
+}
+
+/**
+ * vdo_status_to_errno() - Given an error code, return a value we can return to the OS.
+ * @error: The error code to convert.
+ *
+ * The input error code may be a system-generated value (such as -EIO), an errno macro used in our
+ * code (such as EIO), or a UDS or VDO status code; the result must be something the rest of the OS
+ * can consume (negative errno values such as -EIO, in the case of the kernel).
+ *
+ * Return: A system error code value.
+ */
+int vdo_status_to_errno(int error)
+{
+ char error_name[VDO_MAX_ERROR_NAME_SIZE];
+ char error_message[VDO_MAX_ERROR_MESSAGE_SIZE];
+
+ /* 0 is success, negative a system error code */
+ if (likely(error <= 0))
+ return error;
+ if (error < 1024)
+ return -error;
+
+ /* VDO or UDS error */
+ switch (error) {
+ case VDO_NO_SPACE:
+ return -ENOSPC;
+ case VDO_READ_ONLY:
+ return -EIO;
+ default:
+ vdo_log_info("%s: mapping internal status code %d (%s: %s) to EIO",
+ __func__, error,
+ uds_string_error_name(error, error_name, sizeof(error_name)),
+ uds_string_error(error, error_message, sizeof(error_message)));
+ return -EIO;
+ }
+}
diff --git a/drivers/md/dm-vdo/status-codes.h b/drivers/md/dm-vdo/status-codes.h
new file mode 100644
index 000000000000..72da04159f88
--- /dev/null
+++ b/drivers/md/dm-vdo/status-codes.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_STATUS_CODES_H
+#define VDO_STATUS_CODES_H
+
+#include "errors.h"
+
+enum {
+ UDS_ERRORS_BLOCK_SIZE = UDS_ERROR_CODE_BLOCK_END - UDS_ERROR_CODE_BASE,
+ VDO_ERRORS_BLOCK_START = UDS_ERROR_CODE_BLOCK_END,
+ VDO_ERRORS_BLOCK_END = VDO_ERRORS_BLOCK_START + UDS_ERRORS_BLOCK_SIZE,
+};
+
+/* VDO-specific status codes. */
+enum vdo_status_codes {
+ /* base of all VDO errors */
+ VDO_STATUS_CODE_BASE = VDO_ERRORS_BLOCK_START,
+ /* we haven't written this yet */
+ VDO_NOT_IMPLEMENTED = VDO_STATUS_CODE_BASE,
+ /* input out of range */
+ VDO_OUT_OF_RANGE,
+ /* an invalid reference count would result */
+ VDO_REF_COUNT_INVALID,
+ /* a free block could not be allocated */
+ VDO_NO_SPACE,
+ /* improper or missing configuration option */
+ VDO_BAD_CONFIGURATION,
+ /* prior operation still in progress */
+ VDO_COMPONENT_BUSY,
+ /* page contents incorrect or corrupt data */
+ VDO_BAD_PAGE,
+ /* unsupported version of some component */
+ VDO_UNSUPPORTED_VERSION,
+ /* component id mismatch in decoder */
+ VDO_INCORRECT_COMPONENT,
+ /* parameters have conflicting values */
+ VDO_PARAMETER_MISMATCH,
+ /* no partition exists with a given id */
+ VDO_UNKNOWN_PARTITION,
+ /* a partition already exists with a given id */
+ VDO_PARTITION_EXISTS,
+ /* physical block growth of too few blocks */
+ VDO_INCREMENT_TOO_SMALL,
+ /* incorrect checksum */
+ VDO_CHECKSUM_MISMATCH,
+ /* a lock is held incorrectly */
+ VDO_LOCK_ERROR,
+ /* the VDO is in read-only mode */
+ VDO_READ_ONLY,
+ /* the VDO is shutting down */
+ VDO_SHUTTING_DOWN,
+ /* the recovery journal has corrupt entries */
+ VDO_CORRUPT_JOURNAL,
+ /* exceeds maximum number of slabs supported */
+ VDO_TOO_MANY_SLABS,
+ /* a compressed block fragment is invalid */
+ VDO_INVALID_FRAGMENT,
+ /* action is unsupported while rebuilding */
+ VDO_RETRY_AFTER_REBUILD,
+ /* a block map entry is invalid */
+ VDO_BAD_MAPPING,
+ /* bio_add_page failed */
+ VDO_BIO_CREATION_FAILED,
+ /* bad magic number */
+ VDO_BAD_MAGIC,
+ /* bad nonce */
+ VDO_BAD_NONCE,
+ /* sequence number overflow */
+ VDO_JOURNAL_OVERFLOW,
+ /* the VDO is not in a state to perform an admin operation */
+ VDO_INVALID_ADMIN_STATE,
+ /* one more than last error code */
+ VDO_STATUS_CODE_LAST,
+ VDO_STATUS_CODE_BLOCK_END = VDO_ERRORS_BLOCK_END
+};
+
+extern const struct error_info vdo_status_list[];
+
+int vdo_register_status_codes(void);
+
+int vdo_status_to_errno(int error);
+
+#endif /* VDO_STATUS_CODES_H */
diff --git a/drivers/md/dm-vdo/string-utils.c b/drivers/md/dm-vdo/string-utils.c
new file mode 100644
index 000000000000..71e44b4683ea
--- /dev/null
+++ b/drivers/md/dm-vdo/string-utils.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "string-utils.h"
+
+char *vdo_append_to_buffer(char *buffer, char *buf_end, const char *fmt, ...)
+{
+ va_list args;
+ size_t n;
+
+ va_start(args, fmt);
+ n = vsnprintf(buffer, buf_end - buffer, fmt, args);
+ if (n >= (size_t) (buf_end - buffer))
+ buffer = buf_end;
+ else
+ buffer += n;
+ va_end(args);
+
+ return buffer;
+}
diff --git a/drivers/md/dm-vdo/string-utils.h b/drivers/md/dm-vdo/string-utils.h
new file mode 100644
index 000000000000..96eecd38b1c2
--- /dev/null
+++ b/drivers/md/dm-vdo/string-utils.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_STRING_UTILS_H
+#define VDO_STRING_UTILS_H
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+/* Utilities related to string manipulation */
+
+static inline const char *vdo_bool_to_string(bool value)
+{
+ return value ? "true" : "false";
+}
+
+/* Append a formatted string to the end of a buffer. */
+char *vdo_append_to_buffer(char *buffer, char *buf_end, const char *fmt, ...)
+ __printf(3, 4);
+
+#endif /* VDO_STRING_UTILS_H */
diff --git a/drivers/md/dm-vdo/thread-device.c b/drivers/md/dm-vdo/thread-device.c
new file mode 100644
index 000000000000..df13ca914db8
--- /dev/null
+++ b/drivers/md/dm-vdo/thread-device.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "thread-device.h"
+
+/* A registry of threads associated with device id numbers. */
+static struct thread_registry device_id_thread_registry;
+
+/* Any registered thread must be unregistered. */
+void vdo_register_thread_device_id(struct registered_thread *new_thread,
+ unsigned int *id_ptr)
+{
+ vdo_register_thread(&device_id_thread_registry, new_thread, id_ptr);
+}
+
+void vdo_unregister_thread_device_id(void)
+{
+ vdo_unregister_thread(&device_id_thread_registry);
+}
+
+int vdo_get_thread_device_id(void)
+{
+ const unsigned int *pointer;
+
+ pointer = vdo_lookup_thread(&device_id_thread_registry);
+ return (pointer != NULL) ? *pointer : -1;
+}
+
+void vdo_initialize_thread_device_registry(void)
+{
+ vdo_initialize_thread_registry(&device_id_thread_registry);
+}
diff --git a/drivers/md/dm-vdo/thread-device.h b/drivers/md/dm-vdo/thread-device.h
new file mode 100644
index 000000000000..494d9c9ef3f6
--- /dev/null
+++ b/drivers/md/dm-vdo/thread-device.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_THREAD_DEVICE_H
+#define VDO_THREAD_DEVICE_H
+
+#include "thread-registry.h"
+
+void vdo_register_thread_device_id(struct registered_thread *new_thread,
+ unsigned int *id_ptr);
+
+void vdo_unregister_thread_device_id(void);
+
+int vdo_get_thread_device_id(void);
+
+void vdo_initialize_thread_device_registry(void);
+
+#endif /* VDO_THREAD_DEVICE_H */
diff --git a/drivers/md/dm-vdo/thread-registry.c b/drivers/md/dm-vdo/thread-registry.c
new file mode 100644
index 000000000000..d4a077d58c60
--- /dev/null
+++ b/drivers/md/dm-vdo/thread-registry.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "thread-registry.h"
+
+#include <asm/current.h>
+#include <linux/rculist.h>
+
+#include "permassert.h"
+
+/*
+ * We need to be careful when using other facilities that may use thread registry functions in
+ * their normal operation. For example, we do not want to invoke the logger while holding a lock.
+ */
+
+void vdo_initialize_thread_registry(struct thread_registry *registry)
+{
+ INIT_LIST_HEAD(&registry->links);
+ spin_lock_init(&registry->lock);
+}
+
+/* Register the current thread and associate it with a data pointer. */
+void vdo_register_thread(struct thread_registry *registry,
+ struct registered_thread *new_thread, const void *pointer)
+{
+ struct registered_thread *thread;
+ bool found_it = false;
+
+ INIT_LIST_HEAD(&new_thread->links);
+ new_thread->pointer = pointer;
+ new_thread->task = current;
+
+ spin_lock(&registry->lock);
+ list_for_each_entry(thread, &registry->links, links) {
+ if (thread->task == current) {
+ /* There should be no existing entry. */
+ list_del_rcu(&thread->links);
+ found_it = true;
+ break;
+ }
+ }
+ list_add_tail_rcu(&new_thread->links, &registry->links);
+ spin_unlock(&registry->lock);
+
+ VDO_ASSERT_LOG_ONLY(!found_it, "new thread not already in registry");
+ if (found_it) {
+ /* Ensure no RCU iterators see it before re-initializing. */
+ synchronize_rcu();
+ INIT_LIST_HEAD(&thread->links);
+ }
+}
+
+void vdo_unregister_thread(struct thread_registry *registry)
+{
+ struct registered_thread *thread;
+ bool found_it = false;
+
+ spin_lock(&registry->lock);
+ list_for_each_entry(thread, &registry->links, links) {
+ if (thread->task == current) {
+ list_del_rcu(&thread->links);
+ found_it = true;
+ break;
+ }
+ }
+ spin_unlock(&registry->lock);
+
+ VDO_ASSERT_LOG_ONLY(found_it, "thread found in registry");
+ if (found_it) {
+ /* Ensure no RCU iterators see it before re-initializing. */
+ synchronize_rcu();
+ INIT_LIST_HEAD(&thread->links);
+ }
+}
+
+const void *vdo_lookup_thread(struct thread_registry *registry)
+{
+ struct registered_thread *thread;
+ const void *result = NULL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(thread, &registry->links, links) {
+ if (thread->task == current) {
+ result = thread->pointer;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return result;
+}
diff --git a/drivers/md/dm-vdo/thread-registry.h b/drivers/md/dm-vdo/thread-registry.h
new file mode 100644
index 000000000000..cc6d78312b9e
--- /dev/null
+++ b/drivers/md/dm-vdo/thread-registry.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_THREAD_REGISTRY_H
+#define VDO_THREAD_REGISTRY_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct thread_registry {
+ struct list_head links;
+ spinlock_t lock;
+};
+
+struct registered_thread {
+ struct list_head links;
+ const void *pointer;
+ struct task_struct *task;
+};
+
+void vdo_initialize_thread_registry(struct thread_registry *registry);
+
+void vdo_register_thread(struct thread_registry *registry,
+ struct registered_thread *new_thread, const void *pointer);
+
+void vdo_unregister_thread(struct thread_registry *registry);
+
+const void *vdo_lookup_thread(struct thread_registry *registry);
+
+#endif /* VDO_THREAD_REGISTRY_H */
diff --git a/drivers/md/dm-vdo/thread-utils.c b/drivers/md/dm-vdo/thread-utils.c
new file mode 100644
index 000000000000..ec08478dd013
--- /dev/null
+++ b/drivers/md/dm-vdo/thread-utils.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "thread-utils.h"
+
+#include <asm/current.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+#include "errors.h"
+#include "logger.h"
+#include "memory-alloc.h"
+
+static struct hlist_head thread_list;
+static struct mutex thread_mutex;
+
+struct thread {
+ void (*thread_function)(void *thread_data);
+ void *thread_data;
+ struct hlist_node thread_links;
+ struct task_struct *thread_task;
+ struct completion thread_done;
+};
+
+void vdo_initialize_threads_mutex(void)
+{
+ mutex_init(&thread_mutex);
+}
+
+static int thread_starter(void *arg)
+{
+ struct registered_thread allocating_thread;
+ struct thread *thread = arg;
+
+ thread->thread_task = current;
+ mutex_lock(&thread_mutex);
+ hlist_add_head(&thread->thread_links, &thread_list);
+ mutex_unlock(&thread_mutex);
+ vdo_register_allocating_thread(&allocating_thread, NULL);
+ thread->thread_function(thread->thread_data);
+ vdo_unregister_allocating_thread();
+ complete(&thread->thread_done);
+ return 0;
+}
+
+int vdo_create_thread(void (*thread_function)(void *), void *thread_data,
+ const char *name, struct thread **new_thread)
+{
+ char *name_colon = strchr(name, ':');
+ char *my_name_colon = strchr(current->comm, ':');
+ struct task_struct *task;
+ struct thread *thread;
+ int result;
+
+ result = vdo_allocate(1, struct thread, __func__, &thread);
+ if (result != VDO_SUCCESS) {
+ vdo_log_warning("Error allocating memory for %s", name);
+ return result;
+ }
+
+ thread->thread_function = thread_function;
+ thread->thread_data = thread_data;
+ init_completion(&thread->thread_done);
+ /*
+ * Start the thread, with an appropriate thread name.
+ *
+ * If the name supplied contains a colon character, use that name. This causes uds module
+ * threads to have names like "uds:callbackW" and the main test runner thread to be named
+ * "zub:runtest".
+ *
+ * Otherwise if the current thread has a name containing a colon character, prefix the name
+ * supplied with the name of the current thread up to (and including) the colon character.
+ * Thus when the "kvdo0:dedupeQ" thread opens an index session, all the threads associated
+ * with that index will have names like "kvdo0:foo".
+ *
+ * Otherwise just use the name supplied. This should be a rare occurrence.
+ */
+ if ((name_colon == NULL) && (my_name_colon != NULL)) {
+ task = kthread_run(thread_starter, thread, "%.*s:%s",
+ (int) (my_name_colon - current->comm), current->comm,
+ name);
+ } else {
+ task = kthread_run(thread_starter, thread, "%s", name);
+ }
+
+ if (IS_ERR(task)) {
+ vdo_free(thread);
+ return PTR_ERR(task);
+ }
+
+ *new_thread = thread;
+ return VDO_SUCCESS;
+}
+
+void vdo_join_threads(struct thread *thread)
+{
+ while (wait_for_completion_interruptible(&thread->thread_done))
+ fsleep(1000);
+
+ mutex_lock(&thread_mutex);
+ hlist_del(&thread->thread_links);
+ mutex_unlock(&thread_mutex);
+ vdo_free(thread);
+}
diff --git a/drivers/md/dm-vdo/thread-utils.h b/drivers/md/dm-vdo/thread-utils.h
new file mode 100644
index 000000000000..687ab43e2cee
--- /dev/null
+++ b/drivers/md/dm-vdo/thread-utils.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef THREAD_UTILS_H
+#define THREAD_UTILS_H
+
+#include <linux/atomic.h>
+
+/* Thread and synchronization utilities */
+
+struct thread;
+
+void vdo_initialize_threads_mutex(void);
+int __must_check vdo_create_thread(void (*thread_function)(void *), void *thread_data,
+ const char *name, struct thread **new_thread);
+void vdo_join_threads(struct thread *thread);
+
+#endif /* UDS_THREADS_H */
diff --git a/drivers/md/dm-vdo/time-utils.h b/drivers/md/dm-vdo/time-utils.h
new file mode 100644
index 000000000000..5f1e850fd826
--- /dev/null
+++ b/drivers/md/dm-vdo/time-utils.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef UDS_TIME_UTILS_H
+#define UDS_TIME_UTILS_H
+
+#include <linux/ktime.h>
+#include <linux/time.h>
+#include <linux/types.h>
+
+static inline s64 ktime_to_seconds(ktime_t reltime)
+{
+ return reltime / NSEC_PER_SEC;
+}
+
+static inline ktime_t current_time_ns(clockid_t clock)
+{
+ return clock == CLOCK_MONOTONIC ? ktime_get_ns() : ktime_get_real_ns();
+}
+
+static inline ktime_t current_time_us(void)
+{
+ return current_time_ns(CLOCK_REALTIME) / NSEC_PER_USEC;
+}
+
+#endif /* UDS_TIME_UTILS_H */
diff --git a/drivers/md/dm-vdo/types.h b/drivers/md/dm-vdo/types.h
new file mode 100644
index 000000000000..dbe892b10f26
--- /dev/null
+++ b/drivers/md/dm-vdo/types.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_TYPES_H
+#define VDO_TYPES_H
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/device-mapper.h>
+#include <linux/list.h>
+#include <linux/compiler_attributes.h>
+#include <linux/types.h>
+
+#include "funnel-queue.h"
+
+/* A size type in blocks. */
+typedef u64 block_count_t;
+
+/* The size of a block. */
+typedef u16 block_size_t;
+
+/* A counter for data_vios */
+typedef u16 data_vio_count_t;
+
+/* A height within a tree. */
+typedef u8 height_t;
+
+/* The logical block number as used by the consumer. */
+typedef u64 logical_block_number_t;
+
+/* The type of the nonce used to identify instances of VDO. */
+typedef u64 nonce_t;
+
+/* A size in pages. */
+typedef u32 page_count_t;
+
+/* A page number. */
+typedef u32 page_number_t;
+
+/*
+ * The physical (well, less logical) block number at which the block is found on the underlying
+ * device.
+ */
+typedef u64 physical_block_number_t;
+
+/* A count of tree roots. */
+typedef u8 root_count_t;
+
+/* A number of sectors. */
+typedef u8 sector_count_t;
+
+/* A sequence number. */
+typedef u64 sequence_number_t;
+
+/* The offset of a block within a slab. */
+typedef u32 slab_block_number;
+
+/* A size type in slabs. */
+typedef u16 slab_count_t;
+
+/* A slot in a bin or block map page. */
+typedef u16 slot_number_t;
+
+/* typedef thread_count_t - A thread counter. */
+typedef u8 thread_count_t;
+
+/* typedef thread_id_t - A thread ID, vdo threads are numbered sequentially from 0. */
+typedef u8 thread_id_t;
+
+/* A zone counter */
+typedef u8 zone_count_t;
+
+/* The following enums are persisted on storage, so the values must be preserved. */
+
+/* The current operating mode of the VDO. */
+enum vdo_state {
+ VDO_DIRTY = 0,
+ VDO_NEW = 1,
+ VDO_CLEAN = 2,
+ VDO_READ_ONLY_MODE = 3,
+ VDO_FORCE_REBUILD = 4,
+ VDO_RECOVERING = 5,
+ VDO_REPLAYING = 6, /* VDO_REPLAYING is never set anymore, but retained for upgrade */
+ VDO_REBUILD_FOR_UPGRADE = 7,
+
+ /* Keep VDO_STATE_COUNT at the bottom. */
+ VDO_STATE_COUNT
+};
+
+/**
+ * vdo_state_requires_read_only_rebuild() - Check whether a vdo_state indicates
+ * that a read-only rebuild is required.
+ * @state: The vdo_state to check.
+ *
+ * Return: true if the state indicates a rebuild is required
+ */
+static inline bool __must_check vdo_state_requires_read_only_rebuild(enum vdo_state state)
+{
+ return ((state == VDO_FORCE_REBUILD) || (state == VDO_REBUILD_FOR_UPGRADE));
+}
+
+/**
+ * vdo_state_requires_recovery() - Check whether a vdo state indicates that recovery is needed.
+ * @state: The state to check.
+ *
+ * Return: true if the state indicates a recovery is required
+ */
+static inline bool __must_check vdo_state_requires_recovery(enum vdo_state state)
+{
+ return ((state == VDO_DIRTY) || (state == VDO_REPLAYING) || (state == VDO_RECOVERING));
+}
+
+/*
+ * The current operation on a physical block (from the point of view of the recovery journal, slab
+ * journals, and reference counts.
+ */
+enum journal_operation {
+ VDO_JOURNAL_DATA_REMAPPING = 0,
+ VDO_JOURNAL_BLOCK_MAP_REMAPPING = 1,
+} __packed;
+
+/* Partition IDs encoded in the volume layout in the super block. */
+enum partition_id {
+ VDO_BLOCK_MAP_PARTITION = 0,
+ VDO_SLAB_DEPOT_PARTITION = 1,
+ VDO_RECOVERY_JOURNAL_PARTITION = 2,
+ VDO_SLAB_SUMMARY_PARTITION = 3,
+} __packed;
+
+/* Metadata types for the vdo. */
+enum vdo_metadata_type {
+ VDO_METADATA_RECOVERY_JOURNAL = 1,
+ VDO_METADATA_SLAB_JOURNAL = 2,
+ VDO_METADATA_RECOVERY_JOURNAL_2 = 3,
+} __packed;
+
+/* A position in the block map where a block map entry is stored. */
+struct block_map_slot {
+ physical_block_number_t pbn;
+ slot_number_t slot;
+};
+
+/*
+ * Four bits of each five-byte block map entry contain a mapping state value used to distinguish
+ * unmapped or discarded logical blocks (which are treated as mapped to the zero block) from entries
+ * that have been mapped to a physical block, including the zero block.
+ *
+ * FIXME: these should maybe be defines.
+ */
+enum block_mapping_state {
+ VDO_MAPPING_STATE_UNMAPPED = 0, /* Must be zero to be the default value */
+ VDO_MAPPING_STATE_UNCOMPRESSED = 1, /* A normal (uncompressed) block */
+ VDO_MAPPING_STATE_COMPRESSED_BASE = 2, /* Compressed in slot 0 */
+ VDO_MAPPING_STATE_COMPRESSED_MAX = 15, /* Compressed in slot 13 */
+};
+
+enum {
+ VDO_MAX_COMPRESSION_SLOTS =
+ (VDO_MAPPING_STATE_COMPRESSED_MAX - VDO_MAPPING_STATE_COMPRESSED_BASE + 1),
+};
+
+
+struct data_location {
+ physical_block_number_t pbn;
+ enum block_mapping_state state;
+};
+
+/* The configuration of a single slab derived from the configured block size and slab size. */
+struct slab_config {
+ /* total number of blocks in the slab */
+ block_count_t slab_blocks;
+ /* number of blocks available for data */
+ block_count_t data_blocks;
+ /* number of blocks for reference counts */
+ block_count_t reference_count_blocks;
+ /* number of blocks for the slab journal */
+ block_count_t slab_journal_blocks;
+ /*
+ * Number of blocks after which the slab journal starts pushing out a reference_block for
+ * each new entry it receives.
+ */
+ block_count_t slab_journal_flushing_threshold;
+ /*
+ * Number of blocks after which the slab journal pushes out all reference_blocks and makes
+ * all vios wait.
+ */
+ block_count_t slab_journal_blocking_threshold;
+ /* Number of blocks after which the slab must be scrubbed before coming online. */
+ block_count_t slab_journal_scrubbing_threshold;
+} __packed;
+
+/*
+ * This structure is memcmp'd for equality. Keep it packed and don't add any fields that are not
+ * properly set in both extant and parsed configs.
+ */
+struct thread_count_config {
+ unsigned int bio_ack_threads;
+ unsigned int bio_threads;
+ unsigned int bio_rotation_interval;
+ unsigned int cpu_threads;
+ unsigned int logical_zones;
+ unsigned int physical_zones;
+ unsigned int hash_zones;
+} __packed;
+
+struct device_config {
+ struct dm_target *owning_target;
+ struct dm_dev *owned_device;
+ struct vdo *vdo;
+ /* All configs referencing a layer are kept on a list in the layer */
+ struct list_head config_list;
+ char *original_string;
+ unsigned int version;
+ char *parent_device_name;
+ block_count_t physical_blocks;
+ /*
+ * This is the number of logical blocks from VDO's internal point of view. It is the number
+ * of 4K blocks regardless of the value of the logical_block_size parameter below.
+ */
+ block_count_t logical_blocks;
+ unsigned int logical_block_size;
+ unsigned int cache_size;
+ unsigned int block_map_maximum_age;
+ bool deduplication;
+ bool compression;
+ struct thread_count_config thread_counts;
+ block_count_t max_discard_blocks;
+};
+
+enum vdo_completion_type {
+ /* Keep VDO_UNSET_COMPLETION_TYPE at the top. */
+ VDO_UNSET_COMPLETION_TYPE,
+ VDO_ACTION_COMPLETION,
+ VDO_ADMIN_COMPLETION,
+ VDO_BLOCK_ALLOCATOR_COMPLETION,
+ VDO_DATA_VIO_POOL_COMPLETION,
+ VDO_DECREMENT_COMPLETION,
+ VDO_FLUSH_COMPLETION,
+ VDO_FLUSH_NOTIFICATION_COMPLETION,
+ VDO_GENERATION_FLUSHED_COMPLETION,
+ VDO_HASH_ZONE_COMPLETION,
+ VDO_HASH_ZONES_COMPLETION,
+ VDO_LOCK_COUNTER_COMPLETION,
+ VDO_PAGE_COMPLETION,
+ VDO_READ_ONLY_MODE_COMPLETION,
+ VDO_REPAIR_COMPLETION,
+ VDO_SYNC_COMPLETION,
+ VIO_COMPLETION,
+} __packed;
+
+struct vdo_completion;
+
+/**
+ * typedef vdo_action_fn - An asynchronous VDO operation.
+ * @completion: The completion of the operation.
+ */
+typedef void (*vdo_action_fn)(struct vdo_completion *completion);
+
+enum vdo_completion_priority {
+ BIO_ACK_Q_ACK_PRIORITY = 0,
+ BIO_ACK_Q_MAX_PRIORITY = 0,
+ BIO_Q_COMPRESSED_DATA_PRIORITY = 0,
+ BIO_Q_DATA_PRIORITY = 0,
+ BIO_Q_FLUSH_PRIORITY = 2,
+ BIO_Q_HIGH_PRIORITY = 2,
+ BIO_Q_METADATA_PRIORITY = 1,
+ BIO_Q_VERIFY_PRIORITY = 1,
+ BIO_Q_MAX_PRIORITY = 2,
+ CPU_Q_COMPLETE_VIO_PRIORITY = 0,
+ CPU_Q_COMPLETE_READ_PRIORITY = 0,
+ CPU_Q_COMPRESS_BLOCK_PRIORITY = 0,
+ CPU_Q_EVENT_REPORTER_PRIORITY = 0,
+ CPU_Q_HASH_BLOCK_PRIORITY = 0,
+ CPU_Q_MAX_PRIORITY = 0,
+ UDS_Q_PRIORITY = 0,
+ UDS_Q_MAX_PRIORITY = 0,
+ VDO_DEFAULT_Q_COMPLETION_PRIORITY = 1,
+ VDO_DEFAULT_Q_FLUSH_PRIORITY = 2,
+ VDO_DEFAULT_Q_MAP_BIO_PRIORITY = 0,
+ VDO_DEFAULT_Q_SYNC_PRIORITY = 2,
+ VDO_DEFAULT_Q_VIO_CALLBACK_PRIORITY = 1,
+ VDO_DEFAULT_Q_MAX_PRIORITY = 2,
+ /* The maximum allowable priority */
+ VDO_WORK_Q_MAX_PRIORITY = 2,
+ /* A value which must be out of range for a valid priority */
+ VDO_WORK_Q_DEFAULT_PRIORITY = VDO_WORK_Q_MAX_PRIORITY + 1,
+};
+
+struct vdo_completion {
+ /* The type of completion this is */
+ enum vdo_completion_type type;
+
+ /*
+ * <code>true</code> once the processing of the operation is complete. This flag should not
+ * be used by waiters external to the VDO base as it is used to gate calling the callback.
+ */
+ bool complete;
+
+ /*
+ * If true, queue this completion on the next callback invocation, even if it is already
+ * running on the correct thread.
+ */
+ bool requeue;
+
+ /* The ID of the thread which should run the next callback */
+ thread_id_t callback_thread_id;
+
+ /* The result of the operation */
+ int result;
+
+ /* The VDO on which this completion operates */
+ struct vdo *vdo;
+
+ /* The callback which will be called once the operation is complete */
+ vdo_action_fn callback;
+
+ /* Callback which, if set, will be called if an error result is set */
+ vdo_action_fn error_handler;
+
+ /* The parent object, if any, that spawned this completion */
+ void *parent;
+
+ /* Entry link for lock-free work queue */
+ struct funnel_queue_entry work_queue_entry_link;
+ enum vdo_completion_priority priority;
+ struct vdo_work_queue *my_queue;
+};
+
+struct block_allocator;
+struct data_vio;
+struct vdo;
+struct vdo_config;
+
+/* vio types for statistics and instrumentation. */
+enum vio_type {
+ VIO_TYPE_UNINITIALIZED = 0,
+ VIO_TYPE_DATA,
+ VIO_TYPE_BLOCK_ALLOCATOR,
+ VIO_TYPE_BLOCK_MAP,
+ VIO_TYPE_BLOCK_MAP_INTERIOR,
+ VIO_TYPE_GEOMETRY,
+ VIO_TYPE_PARTITION_COPY,
+ VIO_TYPE_RECOVERY_JOURNAL,
+ VIO_TYPE_SLAB_JOURNAL,
+ VIO_TYPE_SLAB_SUMMARY,
+ VIO_TYPE_SUPER_BLOCK,
+} __packed;
+
+/* Priority levels for asynchronous I/O operations performed on a vio. */
+enum vio_priority {
+ VIO_PRIORITY_LOW = 0,
+ VIO_PRIORITY_DATA = VIO_PRIORITY_LOW,
+ VIO_PRIORITY_COMPRESSED_DATA = VIO_PRIORITY_DATA,
+ VIO_PRIORITY_METADATA,
+ VIO_PRIORITY_HIGH,
+} __packed;
+
+/*
+ * A wrapper for a bio. All I/O to the storage below a vdo is conducted via vios.
+ */
+struct vio {
+ /* The completion for this vio */
+ struct vdo_completion completion;
+
+ /* The bio zone in which I/O should be processed */
+ zone_count_t bio_zone;
+
+ /* The queueing priority of the vio operation */
+ enum vio_priority priority;
+
+ /* The vio type is used for statistics and instrumentation. */
+ enum vio_type type;
+
+ /* The size of this vio in blocks */
+ unsigned int block_count;
+
+ /* The data being read or written. */
+ char *data;
+
+ /* The VDO-owned bio to use for all IO for this vio */
+ struct bio *bio;
+
+ /*
+ * A list of enqueued bios with consecutive block numbers, stored by vdo_submit_bio() under
+ * the first-enqueued vio. The other vios are found via their bio entries in this list, and
+ * are not added to the work queue as separate completions.
+ */
+ struct bio_list bios_merged;
+};
+
+#endif /* VDO_TYPES_H */
diff --git a/drivers/md/dm-vdo/vdo.c b/drivers/md/dm-vdo/vdo.c
new file mode 100644
index 000000000000..fff847767755
--- /dev/null
+++ b/drivers/md/dm-vdo/vdo.c
@@ -0,0 +1,1730 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+/*
+ * This file contains the main entry points for normal operations on a vdo as well as functions for
+ * constructing and destroying vdo instances (in memory).
+ */
+
+/**
+ * DOC:
+ *
+ * A read_only_notifier has a single completion which is used to perform read-only notifications,
+ * however, vdo_enter_read_only_mode() may be called from any thread. A pair of fields, protected
+ * by a spinlock, are used to control the read-only mode entry process. The first field holds the
+ * read-only error. The second is the state field, which may hold any of the four special values
+ * enumerated here.
+ *
+ * When vdo_enter_read_only_mode() is called from some vdo thread, if the read_only_error field
+ * already contains an error (i.e. its value is not VDO_SUCCESS), then some other error has already
+ * initiated the read-only process, and nothing more is done. Otherwise, the new error is stored in
+ * the read_only_error field, and the state field is consulted. If the state is MAY_NOTIFY, it is
+ * set to NOTIFYING, and the notification process begins. If the state is MAY_NOT_NOTIFY, then
+ * notifications are currently disallowed, generally due to the vdo being suspended. In this case,
+ * the nothing more will be done until the vdo is resumed, at which point the notification will be
+ * performed. In any other case, the vdo is already read-only, and there is nothing more to do.
+ */
+
+#include "vdo.h"
+
+#include <linux/completion.h>
+#include <linux/device-mapper.h>
+#include <linux/kernel.h>
+#include <linux/lz4.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+#include "string-utils.h"
+
+#include "block-map.h"
+#include "completion.h"
+#include "data-vio.h"
+#include "dedupe.h"
+#include "encodings.h"
+#include "funnel-workqueue.h"
+#include "io-submitter.h"
+#include "logical-zone.h"
+#include "packer.h"
+#include "physical-zone.h"
+#include "recovery-journal.h"
+#include "slab-depot.h"
+#include "statistics.h"
+#include "status-codes.h"
+#include "vio.h"
+
+#define PARANOID_THREAD_CONSISTENCY_CHECKS 0
+
+struct sync_completion {
+ struct vdo_completion vdo_completion;
+ struct completion completion;
+};
+
+/* A linked list is adequate for the small number of entries we expect. */
+struct device_registry {
+ struct list_head links;
+ /* TODO: Convert to rcu per kernel recommendation. */
+ rwlock_t lock;
+};
+
+static struct device_registry registry;
+
+/**
+ * vdo_initialize_device_registry_once() - Initialize the necessary structures for the device
+ * registry.
+ */
+void vdo_initialize_device_registry_once(void)
+{
+ INIT_LIST_HEAD(&registry.links);
+ rwlock_init(&registry.lock);
+}
+
+/** vdo_is_equal() - Implements vdo_filter_fn. */
+static bool vdo_is_equal(struct vdo *vdo, const void *context)
+{
+ return (vdo == context);
+}
+
+/**
+ * filter_vdos_locked() - Find a vdo in the registry if it exists there.
+ * @filter: The filter function to apply to devices.
+ * @context: A bit of context to provide the filter.
+ *
+ * Context: Must be called holding the lock.
+ *
+ * Return: the vdo object found, if any.
+ */
+static struct vdo * __must_check filter_vdos_locked(vdo_filter_fn filter,
+ const void *context)
+{
+ struct vdo *vdo;
+
+ list_for_each_entry(vdo, &registry.links, registration) {
+ if (filter(vdo, context))
+ return vdo;
+ }
+
+ return NULL;
+}
+
+/**
+ * vdo_find_matching() - Find and return the first (if any) vdo matching a given filter function.
+ * @filter: The filter function to apply to vdos.
+ * @context: A bit of context to provide the filter.
+ */
+struct vdo *vdo_find_matching(vdo_filter_fn filter, const void *context)
+{
+ struct vdo *vdo;
+
+ read_lock(&registry.lock);
+ vdo = filter_vdos_locked(filter, context);
+ read_unlock(&registry.lock);
+
+ return vdo;
+}
+
+static void start_vdo_request_queue(void *ptr)
+{
+ struct vdo_thread *thread = vdo_get_work_queue_owner(vdo_get_current_work_queue());
+
+ vdo_register_allocating_thread(&thread->allocating_thread,
+ &thread->vdo->allocations_allowed);
+}
+
+static void finish_vdo_request_queue(void *ptr)
+{
+ vdo_unregister_allocating_thread();
+}
+
+#ifdef MODULE
+#define MODULE_NAME THIS_MODULE->name
+#else
+#define MODULE_NAME "dm-vdo"
+#endif /* MODULE */
+
+static const struct vdo_work_queue_type default_queue_type = {
+ .start = start_vdo_request_queue,
+ .finish = finish_vdo_request_queue,
+ .max_priority = VDO_DEFAULT_Q_MAX_PRIORITY,
+ .default_priority = VDO_DEFAULT_Q_COMPLETION_PRIORITY,
+};
+
+static const struct vdo_work_queue_type bio_ack_q_type = {
+ .start = NULL,
+ .finish = NULL,
+ .max_priority = BIO_ACK_Q_MAX_PRIORITY,
+ .default_priority = BIO_ACK_Q_ACK_PRIORITY,
+};
+
+static const struct vdo_work_queue_type cpu_q_type = {
+ .start = NULL,
+ .finish = NULL,
+ .max_priority = CPU_Q_MAX_PRIORITY,
+ .default_priority = CPU_Q_MAX_PRIORITY,
+};
+
+static void uninitialize_thread_config(struct thread_config *config)
+{
+ vdo_free(vdo_forget(config->logical_threads));
+ vdo_free(vdo_forget(config->physical_threads));
+ vdo_free(vdo_forget(config->hash_zone_threads));
+ vdo_free(vdo_forget(config->bio_threads));
+ memset(config, 0, sizeof(struct thread_config));
+}
+
+static void assign_thread_ids(struct thread_config *config,
+ thread_id_t thread_ids[], zone_count_t count)
+{
+ zone_count_t zone;
+
+ for (zone = 0; zone < count; zone++)
+ thread_ids[zone] = config->thread_count++;
+}
+
+/**
+ * initialize_thread_config() - Initialize the thread mapping
+ *
+ * If the logical, physical, and hash zone counts are all 0, a single thread will be shared by all
+ * three plus the packer and recovery journal. Otherwise, there must be at least one of each type,
+ * and each will have its own thread, as will the packer and recovery journal.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int __must_check initialize_thread_config(struct thread_count_config counts,
+ struct thread_config *config)
+{
+ int result;
+ bool single = ((counts.logical_zones + counts.physical_zones + counts.hash_zones) == 0);
+
+ config->bio_thread_count = counts.bio_threads;
+ if (single) {
+ config->logical_zone_count = 1;
+ config->physical_zone_count = 1;
+ config->hash_zone_count = 1;
+ } else {
+ config->logical_zone_count = counts.logical_zones;
+ config->physical_zone_count = counts.physical_zones;
+ config->hash_zone_count = counts.hash_zones;
+ }
+
+ result = vdo_allocate(config->logical_zone_count, thread_id_t,
+ "logical thread array", &config->logical_threads);
+ if (result != VDO_SUCCESS) {
+ uninitialize_thread_config(config);
+ return result;
+ }
+
+ result = vdo_allocate(config->physical_zone_count, thread_id_t,
+ "physical thread array", &config->physical_threads);
+ if (result != VDO_SUCCESS) {
+ uninitialize_thread_config(config);
+ return result;
+ }
+
+ result = vdo_allocate(config->hash_zone_count, thread_id_t,
+ "hash thread array", &config->hash_zone_threads);
+ if (result != VDO_SUCCESS) {
+ uninitialize_thread_config(config);
+ return result;
+ }
+
+ result = vdo_allocate(config->bio_thread_count, thread_id_t,
+ "bio thread array", &config->bio_threads);
+ if (result != VDO_SUCCESS) {
+ uninitialize_thread_config(config);
+ return result;
+ }
+
+ if (single) {
+ config->logical_threads[0] = config->thread_count;
+ config->physical_threads[0] = config->thread_count;
+ config->hash_zone_threads[0] = config->thread_count++;
+ } else {
+ config->admin_thread = config->thread_count;
+ config->journal_thread = config->thread_count++;
+ config->packer_thread = config->thread_count++;
+ assign_thread_ids(config, config->logical_threads, counts.logical_zones);
+ assign_thread_ids(config, config->physical_threads, counts.physical_zones);
+ assign_thread_ids(config, config->hash_zone_threads, counts.hash_zones);
+ }
+
+ config->dedupe_thread = config->thread_count++;
+ config->bio_ack_thread =
+ ((counts.bio_ack_threads > 0) ? config->thread_count++ : VDO_INVALID_THREAD_ID);
+ config->cpu_thread = config->thread_count++;
+ assign_thread_ids(config, config->bio_threads, counts.bio_threads);
+ return VDO_SUCCESS;
+}
+
+/**
+ * read_geometry_block() - Synchronously read the geometry block from a vdo's underlying block
+ * device.
+ * @vdo: The vdo whose geometry is to be read.
+ *
+ * Return: VDO_SUCCESS or an error code.
+ */
+static int __must_check read_geometry_block(struct vdo *vdo)
+{
+ struct vio *vio;
+ char *block;
+ int result;
+
+ result = vdo_allocate(VDO_BLOCK_SIZE, u8, __func__, &block);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = create_metadata_vio(vdo, VIO_TYPE_GEOMETRY, VIO_PRIORITY_HIGH, NULL,
+ block, &vio);
+ if (result != VDO_SUCCESS) {
+ vdo_free(block);
+ return result;
+ }
+
+ /*
+ * This is only safe because, having not already loaded the geometry, the vdo's geometry's
+ * bio_offset field is 0, so the fact that vio_reset_bio() will subtract that offset from
+ * the supplied pbn is not a problem.
+ */
+ result = vio_reset_bio(vio, block, NULL, REQ_OP_READ,
+ VDO_GEOMETRY_BLOCK_LOCATION);
+ if (result != VDO_SUCCESS) {
+ free_vio(vdo_forget(vio));
+ vdo_free(block);
+ return result;
+ }
+
+ bio_set_dev(vio->bio, vdo_get_backing_device(vdo));
+ submit_bio_wait(vio->bio);
+ result = blk_status_to_errno(vio->bio->bi_status);
+ free_vio(vdo_forget(vio));
+ if (result != 0) {
+ vdo_log_error_strerror(result, "synchronous read failed");
+ vdo_free(block);
+ return -EIO;
+ }
+
+ result = vdo_parse_geometry_block((u8 *) block, &vdo->geometry);
+ vdo_free(block);
+ return result;
+}
+
+static bool get_zone_thread_name(const thread_id_t thread_ids[], zone_count_t count,
+ thread_id_t id, const char *prefix,
+ char *buffer, size_t buffer_length)
+{
+ if (id >= thread_ids[0]) {
+ thread_id_t index = id - thread_ids[0];
+
+ if (index < count) {
+ snprintf(buffer, buffer_length, "%s%d", prefix, index);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * get_thread_name() - Format the name of the worker thread desired to support a given work queue.
+ * @thread_config: The thread configuration.
+ * @thread_id: The thread id.
+ * @buffer: Where to put the formatted name.
+ * @buffer_length: Size of the output buffer.
+ *
+ * The physical layer may add a prefix identifying the product; the output from this function
+ * should just identify the thread.
+ */
+static void get_thread_name(const struct thread_config *thread_config,
+ thread_id_t thread_id, char *buffer, size_t buffer_length)
+{
+ if (thread_id == thread_config->journal_thread) {
+ if (thread_config->packer_thread == thread_id) {
+ /*
+ * This is the "single thread" config where one thread is used for the
+ * journal, packer, logical, physical, and hash zones. In that case, it is
+ * known as the "request queue."
+ */
+ snprintf(buffer, buffer_length, "reqQ");
+ return;
+ }
+
+ snprintf(buffer, buffer_length, "journalQ");
+ return;
+ } else if (thread_id == thread_config->admin_thread) {
+ /* Theoretically this could be different from the journal thread. */
+ snprintf(buffer, buffer_length, "adminQ");
+ return;
+ } else if (thread_id == thread_config->packer_thread) {
+ snprintf(buffer, buffer_length, "packerQ");
+ return;
+ } else if (thread_id == thread_config->dedupe_thread) {
+ snprintf(buffer, buffer_length, "dedupeQ");
+ return;
+ } else if (thread_id == thread_config->bio_ack_thread) {
+ snprintf(buffer, buffer_length, "ackQ");
+ return;
+ } else if (thread_id == thread_config->cpu_thread) {
+ snprintf(buffer, buffer_length, "cpuQ");
+ return;
+ }
+
+ if (get_zone_thread_name(thread_config->logical_threads,
+ thread_config->logical_zone_count,
+ thread_id, "logQ", buffer, buffer_length))
+ return;
+
+ if (get_zone_thread_name(thread_config->physical_threads,
+ thread_config->physical_zone_count,
+ thread_id, "physQ", buffer, buffer_length))
+ return;
+
+ if (get_zone_thread_name(thread_config->hash_zone_threads,
+ thread_config->hash_zone_count,
+ thread_id, "hashQ", buffer, buffer_length))
+ return;
+
+ if (get_zone_thread_name(thread_config->bio_threads,
+ thread_config->bio_thread_count,
+ thread_id, "bioQ", buffer, buffer_length))
+ return;
+
+ /* Some sort of misconfiguration? */
+ snprintf(buffer, buffer_length, "reqQ%d", thread_id);
+}
+
+/**
+ * vdo_make_thread() - Construct a single vdo work_queue and its associated thread (or threads for
+ * round-robin queues).
+ * @vdo: The vdo which owns the thread.
+ * @thread_id: The id of the thread to create (as determined by the thread_config).
+ * @type: The description of the work queue for this thread.
+ * @queue_count: The number of actual threads/queues contained in the "thread".
+ * @contexts: An array of queue_count contexts, one for each individual queue; may be NULL.
+ *
+ * Each "thread" constructed by this method is represented by a unique thread id in the thread
+ * config, and completions can be enqueued to the queue and run on the threads comprising this
+ * entity.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_make_thread(struct vdo *vdo, thread_id_t thread_id,
+ const struct vdo_work_queue_type *type,
+ unsigned int queue_count, void *contexts[])
+{
+ struct vdo_thread *thread = &vdo->threads[thread_id];
+ char queue_name[MAX_VDO_WORK_QUEUE_NAME_LEN];
+
+ if (type == NULL)
+ type = &default_queue_type;
+
+ if (thread->queue != NULL) {
+ return VDO_ASSERT(vdo_work_queue_type_is(thread->queue, type),
+ "already constructed vdo thread %u is of the correct type",
+ thread_id);
+ }
+
+ thread->vdo = vdo;
+ thread->thread_id = thread_id;
+ get_thread_name(&vdo->thread_config, thread_id, queue_name, sizeof(queue_name));
+ return vdo_make_work_queue(vdo->thread_name_prefix, queue_name, thread,
+ type, queue_count, contexts, &thread->queue);
+}
+
+/**
+ * register_vdo() - Register a VDO; it must not already be registered.
+ * @vdo: The vdo to register.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+static int register_vdo(struct vdo *vdo)
+{
+ int result;
+
+ write_lock(&registry.lock);
+ result = VDO_ASSERT(filter_vdos_locked(vdo_is_equal, vdo) == NULL,
+ "VDO not already registered");
+ if (result == VDO_SUCCESS) {
+ INIT_LIST_HEAD(&vdo->registration);
+ list_add_tail(&vdo->registration, &registry.links);
+ }
+ write_unlock(&registry.lock);
+
+ return result;
+}
+
+/**
+ * initialize_vdo() - Do the portion of initializing a vdo which will clean up after itself on
+ * error.
+ * @vdo: The vdo being initialized
+ * @config: The configuration of the vdo
+ * @instance: The instance number of the vdo
+ * @reason: The buffer to hold the failure reason on error
+ */
+static int initialize_vdo(struct vdo *vdo, struct device_config *config,
+ unsigned int instance, char **reason)
+{
+ int result;
+ zone_count_t i;
+
+ vdo->device_config = config;
+ vdo->starting_sector_offset = config->owning_target->begin;
+ vdo->instance = instance;
+ vdo->allocations_allowed = true;
+ vdo_set_admin_state_code(&vdo->admin.state, VDO_ADMIN_STATE_NEW);
+ INIT_LIST_HEAD(&vdo->device_config_list);
+ vdo_initialize_completion(&vdo->admin.completion, vdo, VDO_ADMIN_COMPLETION);
+ init_completion(&vdo->admin.callback_sync);
+ mutex_init(&vdo->stats_mutex);
+ result = read_geometry_block(vdo);
+ if (result != VDO_SUCCESS) {
+ *reason = "Could not load geometry block";
+ return result;
+ }
+
+ result = initialize_thread_config(config->thread_counts, &vdo->thread_config);
+ if (result != VDO_SUCCESS) {
+ *reason = "Cannot create thread configuration";
+ return result;
+ }
+
+ vdo_log_info("zones: %d logical, %d physical, %d hash; total threads: %d",
+ config->thread_counts.logical_zones,
+ config->thread_counts.physical_zones,
+ config->thread_counts.hash_zones, vdo->thread_config.thread_count);
+
+ /* Compression context storage */
+ result = vdo_allocate(config->thread_counts.cpu_threads, char *, "LZ4 context",
+ &vdo->compression_context);
+ if (result != VDO_SUCCESS) {
+ *reason = "cannot allocate LZ4 context";
+ return result;
+ }
+
+ for (i = 0; i < config->thread_counts.cpu_threads; i++) {
+ result = vdo_allocate(LZ4_MEM_COMPRESS, char, "LZ4 context",
+ &vdo->compression_context[i]);
+ if (result != VDO_SUCCESS) {
+ *reason = "cannot allocate LZ4 context";
+ return result;
+ }
+ }
+
+ result = register_vdo(vdo);
+ if (result != VDO_SUCCESS) {
+ *reason = "Cannot add VDO to device registry";
+ return result;
+ }
+
+ vdo_set_admin_state_code(&vdo->admin.state, VDO_ADMIN_STATE_INITIALIZED);
+ return result;
+}
+
+/**
+ * vdo_make() - Allocate and initialize a vdo.
+ * @instance: Device instantiation counter.
+ * @config: The device configuration.
+ * @reason: The reason for any failure during this call.
+ * @vdo_ptr: A pointer to hold the created vdo.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_make(unsigned int instance, struct device_config *config, char **reason,
+ struct vdo **vdo_ptr)
+{
+ int result;
+ struct vdo *vdo;
+
+ /* Initialize with a generic failure reason to prevent returning garbage. */
+ *reason = "Unspecified error";
+
+ result = vdo_allocate(1, struct vdo, __func__, &vdo);
+ if (result != VDO_SUCCESS) {
+ *reason = "Cannot allocate VDO";
+ return result;
+ }
+
+ result = initialize_vdo(vdo, config, instance, reason);
+ if (result != VDO_SUCCESS) {
+ vdo_destroy(vdo);
+ return result;
+ }
+
+ /* From here on, the caller will clean up if there is an error. */
+ *vdo_ptr = vdo;
+
+ snprintf(vdo->thread_name_prefix, sizeof(vdo->thread_name_prefix),
+ "%s%u", MODULE_NAME, instance);
+ BUG_ON(vdo->thread_name_prefix[0] == '\0');
+ result = vdo_allocate(vdo->thread_config.thread_count,
+ struct vdo_thread, __func__, &vdo->threads);
+ if (result != VDO_SUCCESS) {
+ *reason = "Cannot allocate thread structures";
+ return result;
+ }
+
+ result = vdo_make_thread(vdo, vdo->thread_config.admin_thread,
+ &default_queue_type, 1, NULL);
+ if (result != VDO_SUCCESS) {
+ *reason = "Cannot make admin thread";
+ return result;
+ }
+
+ result = vdo_make_flusher(vdo);
+ if (result != VDO_SUCCESS) {
+ *reason = "Cannot make flusher zones";
+ return result;
+ }
+
+ result = vdo_make_packer(vdo, DEFAULT_PACKER_BINS, &vdo->packer);
+ if (result != VDO_SUCCESS) {
+ *reason = "Cannot make packer zones";
+ return result;
+ }
+
+ BUG_ON(vdo->device_config->logical_block_size <= 0);
+ BUG_ON(vdo->device_config->owned_device == NULL);
+ result = make_data_vio_pool(vdo, MAXIMUM_VDO_USER_VIOS,
+ MAXIMUM_VDO_USER_VIOS * 3 / 4,
+ &vdo->data_vio_pool);
+ if (result != VDO_SUCCESS) {
+ *reason = "Cannot allocate data_vio pool";
+ return result;
+ }
+
+ result = vdo_make_io_submitter(config->thread_counts.bio_threads,
+ config->thread_counts.bio_rotation_interval,
+ get_data_vio_pool_request_limit(vdo->data_vio_pool),
+ vdo, &vdo->io_submitter);
+ if (result != VDO_SUCCESS) {
+ *reason = "bio submission initialization failed";
+ return result;
+ }
+
+ if (vdo_uses_bio_ack_queue(vdo)) {
+ result = vdo_make_thread(vdo, vdo->thread_config.bio_ack_thread,
+ &bio_ack_q_type,
+ config->thread_counts.bio_ack_threads, NULL);
+ if (result != VDO_SUCCESS) {
+ *reason = "bio ack queue initialization failed";
+ return result;
+ }
+ }
+
+ result = vdo_make_thread(vdo, vdo->thread_config.cpu_thread, &cpu_q_type,
+ config->thread_counts.cpu_threads,
+ (void **) vdo->compression_context);
+ if (result != VDO_SUCCESS) {
+ *reason = "CPU queue initialization failed";
+ return result;
+ }
+
+ return VDO_SUCCESS;
+}
+
+static void finish_vdo(struct vdo *vdo)
+{
+ int i;
+
+ if (vdo->threads == NULL)
+ return;
+
+ vdo_cleanup_io_submitter(vdo->io_submitter);
+ vdo_finish_dedupe_index(vdo->hash_zones);
+
+ for (i = 0; i < vdo->thread_config.thread_count; i++)
+ vdo_finish_work_queue(vdo->threads[i].queue);
+}
+
+/**
+ * free_listeners() - Free the list of read-only listeners associated with a thread.
+ * @thread_data: The thread holding the list to free.
+ */
+static void free_listeners(struct vdo_thread *thread)
+{
+ struct read_only_listener *listener, *next;
+
+ for (listener = vdo_forget(thread->listeners); listener != NULL; listener = next) {
+ next = vdo_forget(listener->next);
+ vdo_free(listener);
+ }
+}
+
+static void uninitialize_super_block(struct vdo_super_block *super_block)
+{
+ free_vio_components(&super_block->vio);
+ vdo_free(super_block->buffer);
+}
+
+/**
+ * unregister_vdo() - Remove a vdo from the device registry.
+ * @vdo: The vdo to remove.
+ */
+static void unregister_vdo(struct vdo *vdo)
+{
+ write_lock(&registry.lock);
+ if (filter_vdos_locked(vdo_is_equal, vdo) == vdo)
+ list_del_init(&vdo->registration);
+
+ write_unlock(&registry.lock);
+}
+
+/**
+ * vdo_destroy() - Destroy a vdo instance.
+ * @vdo: The vdo to destroy (may be NULL).
+ */
+void vdo_destroy(struct vdo *vdo)
+{
+ unsigned int i;
+
+ if (vdo == NULL)
+ return;
+
+ /* A running VDO should never be destroyed without suspending first. */
+ BUG_ON(vdo_get_admin_state(vdo)->normal);
+
+ vdo->allocations_allowed = true;
+
+ finish_vdo(vdo);
+ unregister_vdo(vdo);
+ free_data_vio_pool(vdo->data_vio_pool);
+ vdo_free_io_submitter(vdo_forget(vdo->io_submitter));
+ vdo_free_flusher(vdo_forget(vdo->flusher));
+ vdo_free_packer(vdo_forget(vdo->packer));
+ vdo_free_recovery_journal(vdo_forget(vdo->recovery_journal));
+ vdo_free_slab_depot(vdo_forget(vdo->depot));
+ vdo_uninitialize_layout(&vdo->layout);
+ vdo_uninitialize_layout(&vdo->next_layout);
+ if (vdo->partition_copier)
+ dm_kcopyd_client_destroy(vdo_forget(vdo->partition_copier));
+ uninitialize_super_block(&vdo->super_block);
+ vdo_free_block_map(vdo_forget(vdo->block_map));
+ vdo_free_hash_zones(vdo_forget(vdo->hash_zones));
+ vdo_free_physical_zones(vdo_forget(vdo->physical_zones));
+ vdo_free_logical_zones(vdo_forget(vdo->logical_zones));
+
+ if (vdo->threads != NULL) {
+ for (i = 0; i < vdo->thread_config.thread_count; i++) {
+ free_listeners(&vdo->threads[i]);
+ vdo_free_work_queue(vdo_forget(vdo->threads[i].queue));
+ }
+ vdo_free(vdo_forget(vdo->threads));
+ }
+
+ uninitialize_thread_config(&vdo->thread_config);
+
+ if (vdo->compression_context != NULL) {
+ for (i = 0; i < vdo->device_config->thread_counts.cpu_threads; i++)
+ vdo_free(vdo_forget(vdo->compression_context[i]));
+
+ vdo_free(vdo_forget(vdo->compression_context));
+ }
+ vdo_free(vdo);
+}
+
+static int initialize_super_block(struct vdo *vdo, struct vdo_super_block *super_block)
+{
+ int result;
+
+ result = vdo_allocate(VDO_BLOCK_SIZE, char, "encoded super block",
+ (char **) &vdo->super_block.buffer);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ return allocate_vio_components(vdo, VIO_TYPE_SUPER_BLOCK,
+ VIO_PRIORITY_METADATA, NULL, 1,
+ (char *) super_block->buffer,
+ &vdo->super_block.vio);
+}
+
+/**
+ * finish_reading_super_block() - Continue after loading the super block.
+ * @completion: The super block vio.
+ *
+ * This callback is registered in vdo_load_super_block().
+ */
+static void finish_reading_super_block(struct vdo_completion *completion)
+{
+ struct vdo_super_block *super_block =
+ container_of(as_vio(completion), struct vdo_super_block, vio);
+
+ vdo_continue_completion(vdo_forget(completion->parent),
+ vdo_decode_super_block(super_block->buffer));
+}
+
+/**
+ * handle_super_block_read_error() - Handle an error reading the super block.
+ * @completion: The super block vio.
+ *
+ * This error handler is registered in vdo_load_super_block().
+ */
+static void handle_super_block_read_error(struct vdo_completion *completion)
+{
+ vio_record_metadata_io_error(as_vio(completion));
+ finish_reading_super_block(completion);
+}
+
+static void read_super_block_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct vdo_completion *parent = vio->completion.parent;
+
+ continue_vio_after_io(vio, finish_reading_super_block,
+ parent->callback_thread_id);
+}
+
+/**
+ * vdo_load_super_block() - Allocate a super block and read its contents from storage.
+ * @vdo: The vdo containing the super block on disk.
+ * @parent: The completion to notify after loading the super block.
+ */
+void vdo_load_super_block(struct vdo *vdo, struct vdo_completion *parent)
+{
+ int result;
+
+ result = initialize_super_block(vdo, &vdo->super_block);
+ if (result != VDO_SUCCESS) {
+ vdo_continue_completion(parent, result);
+ return;
+ }
+
+ vdo->super_block.vio.completion.parent = parent;
+ vdo_submit_metadata_vio(&vdo->super_block.vio,
+ vdo_get_data_region_start(vdo->geometry),
+ read_super_block_endio,
+ handle_super_block_read_error,
+ REQ_OP_READ);
+}
+
+/**
+ * vdo_get_backing_device() - Get the block device object underlying a vdo.
+ * @vdo: The vdo.
+ *
+ * Return: The vdo's current block device.
+ */
+struct block_device *vdo_get_backing_device(const struct vdo *vdo)
+{
+ return vdo->device_config->owned_device->bdev;
+}
+
+/**
+ * vdo_get_device_name() - Get the device name associated with the vdo target.
+ * @target: The target device interface.
+ *
+ * Return: The block device name.
+ */
+const char *vdo_get_device_name(const struct dm_target *target)
+{
+ return dm_device_name(dm_table_get_md(target->table));
+}
+
+/**
+ * vdo_synchronous_flush() - Issue a flush request and wait for it to complete.
+ * @vdo: The vdo.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_synchronous_flush(struct vdo *vdo)
+{
+ int result;
+ struct bio bio;
+
+ bio_init(&bio, vdo_get_backing_device(vdo), NULL, 0,
+ REQ_OP_WRITE | REQ_PREFLUSH);
+ submit_bio_wait(&bio);
+ result = blk_status_to_errno(bio.bi_status);
+
+ atomic64_inc(&vdo->stats.flush_out);
+ if (result != 0) {
+ vdo_log_error_strerror(result, "synchronous flush failed");
+ result = -EIO;
+ }
+
+ bio_uninit(&bio);
+ return result;
+}
+
+/**
+ * vdo_get_state() - Get the current state of the vdo.
+ * @vdo: The vdo.
+
+ * Context: This method may be called from any thread.
+ *
+ * Return: The current state of the vdo.
+ */
+enum vdo_state vdo_get_state(const struct vdo *vdo)
+{
+ enum vdo_state state = atomic_read(&vdo->state);
+
+ /* pairs with barriers where state field is changed */
+ smp_rmb();
+ return state;
+}
+
+/**
+ * vdo_set_state() - Set the current state of the vdo.
+ * @vdo: The vdo whose state is to be set.
+ * @state: The new state of the vdo.
+ *
+ * Context: This method may be called from any thread.
+ */
+void vdo_set_state(struct vdo *vdo, enum vdo_state state)
+{
+ /* pairs with barrier in vdo_get_state */
+ smp_wmb();
+ atomic_set(&vdo->state, state);
+}
+
+/**
+ * vdo_get_admin_state() - Get the admin state of the vdo.
+ * @vdo: The vdo.
+ *
+ * Return: The code for the vdo's current admin state.
+ */
+const struct admin_state_code *vdo_get_admin_state(const struct vdo *vdo)
+{
+ return vdo_get_admin_state_code(&vdo->admin.state);
+}
+
+/**
+ * record_vdo() - Record the state of the VDO for encoding in the super block.
+ */
+static void record_vdo(struct vdo *vdo)
+{
+ /* This is for backwards compatibility. */
+ vdo->states.unused = vdo->geometry.unused;
+ vdo->states.vdo.state = vdo_get_state(vdo);
+ vdo->states.block_map = vdo_record_block_map(vdo->block_map);
+ vdo->states.recovery_journal = vdo_record_recovery_journal(vdo->recovery_journal);
+ vdo->states.slab_depot = vdo_record_slab_depot(vdo->depot);
+ vdo->states.layout = vdo->layout;
+}
+
+/**
+ * continue_super_block_parent() - Continue the parent of a super block save operation.
+ * @completion: The super block vio.
+ *
+ * This callback is registered in vdo_save_components().
+ */
+static void continue_super_block_parent(struct vdo_completion *completion)
+{
+ vdo_continue_completion(vdo_forget(completion->parent), completion->result);
+}
+
+/**
+ * handle_save_error() - Log a super block save error.
+ * @completion: The super block vio.
+ *
+ * This error handler is registered in vdo_save_components().
+ */
+static void handle_save_error(struct vdo_completion *completion)
+{
+ struct vdo_super_block *super_block =
+ container_of(as_vio(completion), struct vdo_super_block, vio);
+
+ vio_record_metadata_io_error(&super_block->vio);
+ vdo_log_error_strerror(completion->result, "super block save failed");
+ /*
+ * Mark the super block as unwritable so that we won't attempt to write it again. This
+ * avoids the case where a growth attempt fails writing the super block with the new size,
+ * but the subsequent attempt to write out the read-only state succeeds. In this case,
+ * writes which happened just before the suspend would not be visible if the VDO is
+ * restarted without rebuilding, but, after a read-only rebuild, the effects of those
+ * writes would reappear.
+ */
+ super_block->unwritable = true;
+ completion->callback(completion);
+}
+
+static void super_block_write_endio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct vdo_completion *parent = vio->completion.parent;
+
+ continue_vio_after_io(vio, continue_super_block_parent,
+ parent->callback_thread_id);
+}
+
+/**
+ * vdo_save_components() - Encode the vdo and save the super block asynchronously.
+ * @vdo: The vdo whose state is being saved.
+ * @parent: The completion to notify when the save is complete.
+ */
+void vdo_save_components(struct vdo *vdo, struct vdo_completion *parent)
+{
+ struct vdo_super_block *super_block = &vdo->super_block;
+
+ if (super_block->unwritable) {
+ vdo_continue_completion(parent, VDO_READ_ONLY);
+ return;
+ }
+
+ if (super_block->vio.completion.parent != NULL) {
+ vdo_continue_completion(parent, VDO_COMPONENT_BUSY);
+ return;
+ }
+
+ record_vdo(vdo);
+
+ vdo_encode_super_block(super_block->buffer, &vdo->states);
+ super_block->vio.completion.parent = parent;
+ super_block->vio.completion.callback_thread_id = parent->callback_thread_id;
+ vdo_submit_metadata_vio(&super_block->vio,
+ vdo_get_data_region_start(vdo->geometry),
+ super_block_write_endio, handle_save_error,
+ REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA);
+}
+
+/**
+ * vdo_register_read_only_listener() - Register a listener to be notified when the VDO goes
+ * read-only.
+ * @vdo: The vdo to register with.
+ * @listener: The object to notify.
+ * @notification: The function to call to send the notification.
+ * @thread_id: The id of the thread on which to send the notification.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_register_read_only_listener(struct vdo *vdo, void *listener,
+ vdo_read_only_notification_fn notification,
+ thread_id_t thread_id)
+{
+ struct vdo_thread *thread = &vdo->threads[thread_id];
+ struct read_only_listener *read_only_listener;
+ int result;
+
+ result = VDO_ASSERT(thread_id != vdo->thread_config.dedupe_thread,
+ "read only listener not registered on dedupe thread");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = vdo_allocate(1, struct read_only_listener, __func__,
+ &read_only_listener);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *read_only_listener = (struct read_only_listener) {
+ .listener = listener,
+ .notify = notification,
+ .next = thread->listeners,
+ };
+
+ thread->listeners = read_only_listener;
+ return VDO_SUCCESS;
+}
+
+/**
+ * notify_vdo_of_read_only_mode() - Notify a vdo that it is going read-only.
+ * @listener: The vdo.
+ * @parent: The completion to notify in order to acknowledge the notification.
+ *
+ * This will save the read-only state to the super block.
+ *
+ * Implements vdo_read_only_notification_fn.
+ */
+static void notify_vdo_of_read_only_mode(void *listener, struct vdo_completion *parent)
+{
+ struct vdo *vdo = listener;
+
+ if (vdo_in_read_only_mode(vdo))
+ vdo_finish_completion(parent);
+
+ vdo_set_state(vdo, VDO_READ_ONLY_MODE);
+ vdo_save_components(vdo, parent);
+}
+
+/**
+ * vdo_enable_read_only_entry() - Enable a vdo to enter read-only mode on errors.
+ * @vdo: The vdo to enable.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int vdo_enable_read_only_entry(struct vdo *vdo)
+{
+ thread_id_t id;
+ bool is_read_only = vdo_in_read_only_mode(vdo);
+ struct read_only_notifier *notifier = &vdo->read_only_notifier;
+
+ if (is_read_only) {
+ notifier->read_only_error = VDO_READ_ONLY;
+ notifier->state = NOTIFIED;
+ } else {
+ notifier->state = MAY_NOT_NOTIFY;
+ }
+
+ spin_lock_init(&notifier->lock);
+ vdo_initialize_completion(&notifier->completion, vdo,
+ VDO_READ_ONLY_MODE_COMPLETION);
+
+ for (id = 0; id < vdo->thread_config.thread_count; id++)
+ vdo->threads[id].is_read_only = is_read_only;
+
+ return vdo_register_read_only_listener(vdo, vdo, notify_vdo_of_read_only_mode,
+ vdo->thread_config.admin_thread);
+}
+
+/**
+ * vdo_wait_until_not_entering_read_only_mode() - Wait until no read-only notifications are in
+ * progress and prevent any subsequent
+ * notifications.
+ * @parent: The completion to notify when no threads are entering read-only mode.
+ *
+ * Notifications may be re-enabled by calling vdo_allow_read_only_mode_entry().
+ */
+void vdo_wait_until_not_entering_read_only_mode(struct vdo_completion *parent)
+{
+ struct vdo *vdo = parent->vdo;
+ struct read_only_notifier *notifier = &vdo->read_only_notifier;
+
+ vdo_assert_on_admin_thread(vdo, __func__);
+
+ if (notifier->waiter != NULL) {
+ vdo_continue_completion(parent, VDO_COMPONENT_BUSY);
+ return;
+ }
+
+ spin_lock(&notifier->lock);
+ if (notifier->state == NOTIFYING)
+ notifier->waiter = parent;
+ else if (notifier->state == MAY_NOTIFY)
+ notifier->state = MAY_NOT_NOTIFY;
+ spin_unlock(&notifier->lock);
+
+ if (notifier->waiter == NULL) {
+ /*
+ * A notification was not in progress, and now they are
+ * disallowed.
+ */
+ vdo_launch_completion(parent);
+ return;
+ }
+}
+
+/**
+ * as_notifier() - Convert a generic vdo_completion to a read_only_notifier.
+ * @completion: The completion to convert.
+ *
+ * Return: The completion as a read_only_notifier.
+ */
+static inline struct read_only_notifier *as_notifier(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_READ_ONLY_MODE_COMPLETION);
+ return container_of(completion, struct read_only_notifier, completion);
+}
+
+/**
+ * finish_entering_read_only_mode() - Complete the process of entering read only mode.
+ * @completion: The read-only mode completion.
+ */
+static void finish_entering_read_only_mode(struct vdo_completion *completion)
+{
+ struct read_only_notifier *notifier = as_notifier(completion);
+
+ vdo_assert_on_admin_thread(completion->vdo, __func__);
+
+ spin_lock(&notifier->lock);
+ notifier->state = NOTIFIED;
+ spin_unlock(&notifier->lock);
+
+ if (notifier->waiter != NULL)
+ vdo_continue_completion(vdo_forget(notifier->waiter),
+ completion->result);
+}
+
+/**
+ * make_thread_read_only() - Inform each thread that the VDO is in read-only mode.
+ * @completion: The read-only mode completion.
+ */
+static void make_thread_read_only(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+ thread_id_t thread_id = completion->callback_thread_id;
+ struct read_only_notifier *notifier = as_notifier(completion);
+ struct read_only_listener *listener = completion->parent;
+
+ if (listener == NULL) {
+ /* This is the first call on this thread */
+ struct vdo_thread *thread = &vdo->threads[thread_id];
+
+ thread->is_read_only = true;
+ listener = thread->listeners;
+ if (thread_id == 0)
+ vdo_log_error_strerror(READ_ONCE(notifier->read_only_error),
+ "Unrecoverable error, entering read-only mode");
+ } else {
+ /* We've just finished notifying a listener */
+ listener = listener->next;
+ }
+
+ if (listener != NULL) {
+ /* We have a listener to notify */
+ vdo_prepare_completion(completion, make_thread_read_only,
+ make_thread_read_only, thread_id,
+ listener);
+ listener->notify(listener->listener, completion);
+ return;
+ }
+
+ /* We're done with this thread */
+ if (++thread_id == vdo->thread_config.dedupe_thread) {
+ /*
+ * We don't want to notify the dedupe thread since it may be
+ * blocked rebuilding the index.
+ */
+ thread_id++;
+ }
+
+ if (thread_id >= vdo->thread_config.thread_count) {
+ /* There are no more threads */
+ vdo_prepare_completion(completion, finish_entering_read_only_mode,
+ finish_entering_read_only_mode,
+ vdo->thread_config.admin_thread, NULL);
+ } else {
+ vdo_prepare_completion(completion, make_thread_read_only,
+ make_thread_read_only, thread_id, NULL);
+ }
+
+ vdo_launch_completion(completion);
+}
+
+/**
+ * vdo_allow_read_only_mode_entry() - Allow the notifier to put the VDO into read-only mode,
+ * reversing the effects of
+ * vdo_wait_until_not_entering_read_only_mode().
+ * @parent: The object to notify once the operation is complete.
+ *
+ * If some thread tried to put the vdo into read-only mode while notifications were disallowed, it
+ * will be done when this method is called. If that happens, the parent will not be notified until
+ * the vdo has actually entered read-only mode and attempted to save the super block.
+ *
+ * Context: This method may only be called from the admin thread.
+ */
+void vdo_allow_read_only_mode_entry(struct vdo_completion *parent)
+{
+ struct vdo *vdo = parent->vdo;
+ struct read_only_notifier *notifier = &vdo->read_only_notifier;
+
+ vdo_assert_on_admin_thread(vdo, __func__);
+
+ if (notifier->waiter != NULL) {
+ vdo_continue_completion(parent, VDO_COMPONENT_BUSY);
+ return;
+ }
+
+ spin_lock(&notifier->lock);
+ if (notifier->state == MAY_NOT_NOTIFY) {
+ if (notifier->read_only_error == VDO_SUCCESS) {
+ notifier->state = MAY_NOTIFY;
+ } else {
+ notifier->state = NOTIFYING;
+ notifier->waiter = parent;
+ }
+ }
+ spin_unlock(&notifier->lock);
+
+ if (notifier->waiter == NULL) {
+ /* We're done */
+ vdo_launch_completion(parent);
+ return;
+ }
+
+ /* Do the pending notification. */
+ make_thread_read_only(&notifier->completion);
+}
+
+/**
+ * vdo_enter_read_only_mode() - Put a VDO into read-only mode and save the read-only state in the
+ * super block.
+ * @vdo: The vdo.
+ * @error_code: The error which caused the VDO to enter read-only mode.
+ *
+ * This method is a no-op if the VDO is already read-only.
+ */
+void vdo_enter_read_only_mode(struct vdo *vdo, int error_code)
+{
+ bool notify = false;
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+ struct read_only_notifier *notifier = &vdo->read_only_notifier;
+ struct vdo_thread *thread;
+
+ if (thread_id != VDO_INVALID_THREAD_ID) {
+ thread = &vdo->threads[thread_id];
+ if (thread->is_read_only) {
+ /* This thread has already gone read-only. */
+ return;
+ }
+
+ /* Record for this thread that the VDO is read-only. */
+ thread->is_read_only = true;
+ }
+
+ spin_lock(&notifier->lock);
+ if (notifier->read_only_error == VDO_SUCCESS) {
+ WRITE_ONCE(notifier->read_only_error, error_code);
+ if (notifier->state == MAY_NOTIFY) {
+ notifier->state = NOTIFYING;
+ notify = true;
+ }
+ }
+ spin_unlock(&notifier->lock);
+
+ if (!notify) {
+ /* The notifier is already aware of a read-only error */
+ return;
+ }
+
+ /* Initiate a notification starting on the lowest numbered thread. */
+ vdo_launch_completion_callback(&notifier->completion, make_thread_read_only, 0);
+}
+
+/**
+ * vdo_is_read_only() - Check whether the VDO is read-only.
+ * @vdo: The vdo.
+ *
+ * Return: true if the vdo is read-only.
+ *
+ * This method may be called from any thread, as opposed to examining the VDO's state field which
+ * is only safe to check from the admin thread.
+ */
+bool vdo_is_read_only(struct vdo *vdo)
+{
+ return vdo->threads[vdo_get_callback_thread_id()].is_read_only;
+}
+
+/**
+ * vdo_in_read_only_mode() - Check whether a vdo is in read-only mode.
+ * @vdo: The vdo to query.
+ *
+ * Return: true if the vdo is in read-only mode.
+ */
+bool vdo_in_read_only_mode(const struct vdo *vdo)
+{
+ return (vdo_get_state(vdo) == VDO_READ_ONLY_MODE);
+}
+
+/**
+ * vdo_in_recovery_mode() - Check whether the vdo is in recovery mode.
+ * @vdo: The vdo to query.
+ *
+ * Return: true if the vdo is in recovery mode.
+ */
+bool vdo_in_recovery_mode(const struct vdo *vdo)
+{
+ return (vdo_get_state(vdo) == VDO_RECOVERING);
+}
+
+/**
+ * vdo_enter_recovery_mode() - Put the vdo into recovery mode.
+ * @vdo: The vdo.
+ */
+void vdo_enter_recovery_mode(struct vdo *vdo)
+{
+ vdo_assert_on_admin_thread(vdo, __func__);
+
+ if (vdo_in_read_only_mode(vdo))
+ return;
+
+ vdo_log_info("Entering recovery mode");
+ vdo_set_state(vdo, VDO_RECOVERING);
+}
+
+/**
+ * complete_synchronous_action() - Signal the waiting thread that a synchronous action is complete.
+ * @completion: The sync completion.
+ */
+static void complete_synchronous_action(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VDO_SYNC_COMPLETION);
+ complete(&(container_of(completion, struct sync_completion,
+ vdo_completion)->completion));
+}
+
+/**
+ * perform_synchronous_action() - Launch an action on a VDO thread and wait for it to complete.
+ * @vdo: The vdo.
+ * @action: The callback to launch.
+ * @thread_id: The thread on which to run the action.
+ * @parent: The parent of the sync completion (may be NULL).
+ */
+static int perform_synchronous_action(struct vdo *vdo, vdo_action_fn action,
+ thread_id_t thread_id, void *parent)
+{
+ struct sync_completion sync;
+
+ vdo_initialize_completion(&sync.vdo_completion, vdo, VDO_SYNC_COMPLETION);
+ init_completion(&sync.completion);
+ sync.vdo_completion.parent = parent;
+ vdo_launch_completion_callback(&sync.vdo_completion, action, thread_id);
+ wait_for_completion(&sync.completion);
+ return sync.vdo_completion.result;
+}
+
+/**
+ * set_compression_callback() - Callback to turn compression on or off.
+ * @completion: The completion.
+ */
+static void set_compression_callback(struct vdo_completion *completion)
+{
+ struct vdo *vdo = completion->vdo;
+ bool *enable = completion->parent;
+ bool was_enabled = vdo_get_compressing(vdo);
+
+ if (*enable != was_enabled) {
+ WRITE_ONCE(vdo->compressing, *enable);
+ if (was_enabled) {
+ /* Signal the packer to flush since compression has been disabled. */
+ vdo_flush_packer(vdo->packer);
+ }
+ }
+
+ vdo_log_info("compression is %s", (*enable ? "enabled" : "disabled"));
+ *enable = was_enabled;
+ complete_synchronous_action(completion);
+}
+
+/**
+ * vdo_set_compressing() - Turn compression on or off.
+ * @vdo: The vdo.
+ * @enable: Whether to enable or disable compression.
+ *
+ * Return: Whether compression was previously on or off.
+ */
+bool vdo_set_compressing(struct vdo *vdo, bool enable)
+{
+ perform_synchronous_action(vdo, set_compression_callback,
+ vdo->thread_config.packer_thread,
+ &enable);
+ return enable;
+}
+
+/**
+ * vdo_get_compressing() - Get whether compression is enabled in a vdo.
+ * @vdo: The vdo.
+ *
+ * Return: State of compression.
+ */
+bool vdo_get_compressing(struct vdo *vdo)
+{
+ return READ_ONCE(vdo->compressing);
+}
+
+static size_t get_block_map_cache_size(const struct vdo *vdo)
+{
+ return ((size_t) vdo->device_config->cache_size) * VDO_BLOCK_SIZE;
+}
+
+static struct error_statistics __must_check get_vdo_error_statistics(const struct vdo *vdo)
+{
+ /*
+ * The error counts can be incremented from arbitrary threads and so must be incremented
+ * atomically, but they are just statistics with no semantics that could rely on memory
+ * order, so unfenced reads are sufficient.
+ */
+ const struct atomic_statistics *atoms = &vdo->stats;
+
+ return (struct error_statistics) {
+ .invalid_advice_pbn_count = atomic64_read(&atoms->invalid_advice_pbn_count),
+ .no_space_error_count = atomic64_read(&atoms->no_space_error_count),
+ .read_only_error_count = atomic64_read(&atoms->read_only_error_count),
+ };
+}
+
+static void copy_bio_stat(struct bio_stats *b, const struct atomic_bio_stats *a)
+{
+ b->read = atomic64_read(&a->read);
+ b->write = atomic64_read(&a->write);
+ b->discard = atomic64_read(&a->discard);
+ b->flush = atomic64_read(&a->flush);
+ b->empty_flush = atomic64_read(&a->empty_flush);
+ b->fua = atomic64_read(&a->fua);
+}
+
+static struct bio_stats subtract_bio_stats(struct bio_stats minuend,
+ struct bio_stats subtrahend)
+{
+ return (struct bio_stats) {
+ .read = minuend.read - subtrahend.read,
+ .write = minuend.write - subtrahend.write,
+ .discard = minuend.discard - subtrahend.discard,
+ .flush = minuend.flush - subtrahend.flush,
+ .empty_flush = minuend.empty_flush - subtrahend.empty_flush,
+ .fua = minuend.fua - subtrahend.fua,
+ };
+}
+
+/**
+ * vdo_get_physical_blocks_allocated() - Get the number of physical blocks in use by user data.
+ * @vdo: The vdo.
+ *
+ * Return: The number of blocks allocated for user data.
+ */
+static block_count_t __must_check vdo_get_physical_blocks_allocated(const struct vdo *vdo)
+{
+ return (vdo_get_slab_depot_allocated_blocks(vdo->depot) -
+ vdo_get_journal_block_map_data_blocks_used(vdo->recovery_journal));
+}
+
+/**
+ * vdo_get_physical_blocks_overhead() - Get the number of physical blocks used by vdo metadata.
+ * @vdo: The vdo.
+ *
+ * Return: The number of overhead blocks.
+ */
+static block_count_t __must_check vdo_get_physical_blocks_overhead(const struct vdo *vdo)
+{
+ /*
+ * config.physical_blocks is mutated during resize and is in a packed structure,
+ * but resize runs on admin thread.
+ * TODO: Verify that this is always safe.
+ */
+ return (vdo->states.vdo.config.physical_blocks -
+ vdo_get_slab_depot_data_blocks(vdo->depot) +
+ vdo_get_journal_block_map_data_blocks_used(vdo->recovery_journal));
+}
+
+static const char *vdo_describe_state(enum vdo_state state)
+{
+ /* These strings should all fit in the 15 chars of VDOStatistics.mode. */
+ switch (state) {
+ case VDO_RECOVERING:
+ return "recovering";
+
+ case VDO_READ_ONLY_MODE:
+ return "read-only";
+
+ default:
+ return "normal";
+ }
+}
+
+/**
+ * get_vdo_statistics() - Populate a vdo_statistics structure on the admin thread.
+ * @vdo: The vdo.
+ * @stats: The statistics structure to populate.
+ */
+static void get_vdo_statistics(const struct vdo *vdo, struct vdo_statistics *stats)
+{
+ struct recovery_journal *journal = vdo->recovery_journal;
+ enum vdo_state state = vdo_get_state(vdo);
+
+ vdo_assert_on_admin_thread(vdo, __func__);
+
+ /* start with a clean slate */
+ memset(stats, 0, sizeof(struct vdo_statistics));
+
+ /*
+ * These are immutable properties of the vdo object, so it is safe to query them from any
+ * thread.
+ */
+ stats->version = STATISTICS_VERSION;
+ stats->logical_blocks = vdo->states.vdo.config.logical_blocks;
+ /*
+ * config.physical_blocks is mutated during resize and is in a packed structure, but resize
+ * runs on the admin thread.
+ * TODO: verify that this is always safe
+ */
+ stats->physical_blocks = vdo->states.vdo.config.physical_blocks;
+ stats->block_size = VDO_BLOCK_SIZE;
+ stats->complete_recoveries = vdo->states.vdo.complete_recoveries;
+ stats->read_only_recoveries = vdo->states.vdo.read_only_recoveries;
+ stats->block_map_cache_size = get_block_map_cache_size(vdo);
+
+ /* The callees are responsible for thread-safety. */
+ stats->data_blocks_used = vdo_get_physical_blocks_allocated(vdo);
+ stats->overhead_blocks_used = vdo_get_physical_blocks_overhead(vdo);
+ stats->logical_blocks_used = vdo_get_recovery_journal_logical_blocks_used(journal);
+ vdo_get_slab_depot_statistics(vdo->depot, stats);
+ stats->journal = vdo_get_recovery_journal_statistics(journal);
+ stats->packer = vdo_get_packer_statistics(vdo->packer);
+ stats->block_map = vdo_get_block_map_statistics(vdo->block_map);
+ vdo_get_dedupe_statistics(vdo->hash_zones, stats);
+ stats->errors = get_vdo_error_statistics(vdo);
+ stats->in_recovery_mode = (state == VDO_RECOVERING);
+ snprintf(stats->mode, sizeof(stats->mode), "%s", vdo_describe_state(state));
+
+ stats->instance = vdo->instance;
+ stats->current_vios_in_progress = get_data_vio_pool_active_requests(vdo->data_vio_pool);
+ stats->max_vios = get_data_vio_pool_maximum_requests(vdo->data_vio_pool);
+
+ stats->flush_out = atomic64_read(&vdo->stats.flush_out);
+ stats->logical_block_size = vdo->device_config->logical_block_size;
+ copy_bio_stat(&stats->bios_in, &vdo->stats.bios_in);
+ copy_bio_stat(&stats->bios_in_partial, &vdo->stats.bios_in_partial);
+ copy_bio_stat(&stats->bios_out, &vdo->stats.bios_out);
+ copy_bio_stat(&stats->bios_meta, &vdo->stats.bios_meta);
+ copy_bio_stat(&stats->bios_journal, &vdo->stats.bios_journal);
+ copy_bio_stat(&stats->bios_page_cache, &vdo->stats.bios_page_cache);
+ copy_bio_stat(&stats->bios_out_completed, &vdo->stats.bios_out_completed);
+ copy_bio_stat(&stats->bios_meta_completed, &vdo->stats.bios_meta_completed);
+ copy_bio_stat(&stats->bios_journal_completed,
+ &vdo->stats.bios_journal_completed);
+ copy_bio_stat(&stats->bios_page_cache_completed,
+ &vdo->stats.bios_page_cache_completed);
+ copy_bio_stat(&stats->bios_acknowledged, &vdo->stats.bios_acknowledged);
+ copy_bio_stat(&stats->bios_acknowledged_partial, &vdo->stats.bios_acknowledged_partial);
+ stats->bios_in_progress =
+ subtract_bio_stats(stats->bios_in, stats->bios_acknowledged);
+ vdo_get_memory_stats(&stats->memory_usage.bytes_used,
+ &stats->memory_usage.peak_bytes_used);
+}
+
+/**
+ * vdo_fetch_statistics_callback() - Action to populate a vdo_statistics
+ * structure on the admin thread.
+ * @completion: The completion.
+ *
+ * This callback is registered in vdo_fetch_statistics().
+ */
+static void vdo_fetch_statistics_callback(struct vdo_completion *completion)
+{
+ get_vdo_statistics(completion->vdo, completion->parent);
+ complete_synchronous_action(completion);
+}
+
+/**
+ * vdo_fetch_statistics() - Fetch statistics on the correct thread.
+ * @vdo: The vdo.
+ * @stats: The vdo statistics are returned here.
+ */
+void vdo_fetch_statistics(struct vdo *vdo, struct vdo_statistics *stats)
+{
+ perform_synchronous_action(vdo, vdo_fetch_statistics_callback,
+ vdo->thread_config.admin_thread, stats);
+}
+
+/**
+ * vdo_get_callback_thread_id() - Get the id of the callback thread on which a completion is
+ * currently running.
+ *
+ * Return: The current thread ID, or -1 if no such thread.
+ */
+thread_id_t vdo_get_callback_thread_id(void)
+{
+ struct vdo_work_queue *queue = vdo_get_current_work_queue();
+ struct vdo_thread *thread;
+ thread_id_t thread_id;
+
+ if (queue == NULL)
+ return VDO_INVALID_THREAD_ID;
+
+ thread = vdo_get_work_queue_owner(queue);
+ thread_id = thread->thread_id;
+
+ if (PARANOID_THREAD_CONSISTENCY_CHECKS) {
+ BUG_ON(thread_id >= thread->vdo->thread_config.thread_count);
+ BUG_ON(thread != &thread->vdo->threads[thread_id]);
+ }
+
+ return thread_id;
+}
+
+/**
+ * vdo_dump_status() - Dump status information about a vdo to the log for debugging.
+ * @vdo: The vdo to dump.
+ */
+void vdo_dump_status(const struct vdo *vdo)
+{
+ zone_count_t zone;
+
+ vdo_dump_flusher(vdo->flusher);
+ vdo_dump_recovery_journal_statistics(vdo->recovery_journal);
+ vdo_dump_packer(vdo->packer);
+ vdo_dump_slab_depot(vdo->depot);
+
+ for (zone = 0; zone < vdo->thread_config.logical_zone_count; zone++)
+ vdo_dump_logical_zone(&vdo->logical_zones->zones[zone]);
+
+ for (zone = 0; zone < vdo->thread_config.physical_zone_count; zone++)
+ vdo_dump_physical_zone(&vdo->physical_zones->zones[zone]);
+
+ vdo_dump_hash_zones(vdo->hash_zones);
+}
+
+/**
+ * vdo_assert_on_admin_thread() - Assert that we are running on the admin thread.
+ * @vdo: The vdo.
+ * @name: The name of the function which should be running on the admin thread (for logging).
+ */
+void vdo_assert_on_admin_thread(const struct vdo *vdo, const char *name)
+{
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() == vdo->thread_config.admin_thread),
+ "%s called on admin thread", name);
+}
+
+/**
+ * vdo_assert_on_logical_zone_thread() - Assert that this function was called on the specified
+ * logical zone thread.
+ * @vdo: The vdo.
+ * @logical_zone: The number of the logical zone.
+ * @name: The name of the calling function.
+ */
+void vdo_assert_on_logical_zone_thread(const struct vdo *vdo, zone_count_t logical_zone,
+ const char *name)
+{
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() ==
+ vdo->thread_config.logical_threads[logical_zone]),
+ "%s called on logical thread", name);
+}
+
+/**
+ * vdo_assert_on_physical_zone_thread() - Assert that this function was called on the specified
+ * physical zone thread.
+ * @vdo: The vdo.
+ * @physical_zone: The number of the physical zone.
+ * @name: The name of the calling function.
+ */
+void vdo_assert_on_physical_zone_thread(const struct vdo *vdo,
+ zone_count_t physical_zone, const char *name)
+{
+ VDO_ASSERT_LOG_ONLY((vdo_get_callback_thread_id() ==
+ vdo->thread_config.physical_threads[physical_zone]),
+ "%s called on physical thread", name);
+}
+
+/**
+ * vdo_get_physical_zone() - Get the physical zone responsible for a given physical block number.
+ * @vdo: The vdo containing the physical zones.
+ * @pbn: The PBN of the data block.
+ * @zone_ptr: A pointer to return the physical zone.
+ *
+ * Gets the physical zone responsible for a given physical block number of a data block in this vdo
+ * instance, or of the zero block (for which a NULL zone is returned). For any other block number
+ * that is not in the range of valid data block numbers in any slab, an error will be returned.
+ * This function is safe to call on invalid block numbers; it will not put the vdo into read-only
+ * mode.
+ *
+ * Return: VDO_SUCCESS or VDO_OUT_OF_RANGE if the block number is invalid or an error code for any
+ * other failure.
+ */
+int vdo_get_physical_zone(const struct vdo *vdo, physical_block_number_t pbn,
+ struct physical_zone **zone_ptr)
+{
+ struct vdo_slab *slab;
+ int result;
+
+ if (pbn == VDO_ZERO_BLOCK) {
+ *zone_ptr = NULL;
+ return VDO_SUCCESS;
+ }
+
+ /*
+ * Used because it does a more restrictive bounds check than vdo_get_slab(), and done first
+ * because it won't trigger read-only mode on an invalid PBN.
+ */
+ if (!vdo_is_physical_data_block(vdo->depot, pbn))
+ return VDO_OUT_OF_RANGE;
+
+ /* With the PBN already checked, we should always succeed in finding a slab. */
+ slab = vdo_get_slab(vdo->depot, pbn);
+ result = VDO_ASSERT(slab != NULL, "vdo_get_slab must succeed on all valid PBNs");
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *zone_ptr = &vdo->physical_zones->zones[slab->allocator->zone_number];
+ return VDO_SUCCESS;
+}
diff --git a/drivers/md/dm-vdo/vdo.h b/drivers/md/dm-vdo/vdo.h
new file mode 100644
index 000000000000..483ae873e002
--- /dev/null
+++ b/drivers/md/dm-vdo/vdo.h
@@ -0,0 +1,362 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_H
+#define VDO_H
+
+#include <linux/atomic.h>
+#include <linux/blk_types.h>
+#include <linux/completion.h>
+#include <linux/dm-kcopyd.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include "admin-state.h"
+#include "encodings.h"
+#include "funnel-workqueue.h"
+#include "packer.h"
+#include "physical-zone.h"
+#include "statistics.h"
+#include "thread-registry.h"
+#include "types.h"
+
+enum notifier_state {
+ /* Notifications are allowed but not in progress */
+ MAY_NOTIFY,
+ /* A notification is in progress */
+ NOTIFYING,
+ /* Notifications are not allowed */
+ MAY_NOT_NOTIFY,
+ /* A notification has completed */
+ NOTIFIED,
+};
+
+/**
+ * typedef vdo_read_only_notification_fn - A function to notify a listener that the VDO has gone
+ * read-only.
+ * @listener: The object to notify.
+ * @parent: The completion to notify in order to acknowledge the notification.
+ */
+typedef void (*vdo_read_only_notification_fn)(void *listener, struct vdo_completion *parent);
+
+/*
+ * An object to be notified when the VDO enters read-only mode
+ */
+struct read_only_listener {
+ /* The listener */
+ void *listener;
+ /* The method to call to notify the listener */
+ vdo_read_only_notification_fn notify;
+ /* A pointer to the next listener */
+ struct read_only_listener *next;
+};
+
+struct vdo_thread {
+ struct vdo *vdo;
+ thread_id_t thread_id;
+ struct vdo_work_queue *queue;
+ /*
+ * Each thread maintains its own notion of whether the VDO is read-only so that the
+ * read-only state can be checked from any base thread without worrying about
+ * synchronization or thread safety. This does mean that knowledge of the VDO going
+ * read-only does not occur simultaneously across the VDO's threads, but that does not seem
+ * to cause any problems.
+ */
+ bool is_read_only;
+ /*
+ * A list of objects waiting to be notified on this thread that the VDO has entered
+ * read-only mode.
+ */
+ struct read_only_listener *listeners;
+ struct registered_thread allocating_thread;
+};
+
+/* Keep struct bio statistics atomically */
+struct atomic_bio_stats {
+ atomic64_t read; /* Number of not REQ_WRITE bios */
+ atomic64_t write; /* Number of REQ_WRITE bios */
+ atomic64_t discard; /* Number of REQ_DISCARD bios */
+ atomic64_t flush; /* Number of REQ_FLUSH bios */
+ atomic64_t empty_flush; /* Number of REQ_PREFLUSH bios without data */
+ atomic64_t fua; /* Number of REQ_FUA bios */
+};
+
+/* Counters are atomic since updates can arrive concurrently from arbitrary threads. */
+struct atomic_statistics {
+ atomic64_t bios_submitted;
+ atomic64_t bios_completed;
+ atomic64_t flush_out;
+ atomic64_t invalid_advice_pbn_count;
+ atomic64_t no_space_error_count;
+ atomic64_t read_only_error_count;
+ struct atomic_bio_stats bios_in;
+ struct atomic_bio_stats bios_in_partial;
+ struct atomic_bio_stats bios_out;
+ struct atomic_bio_stats bios_out_completed;
+ struct atomic_bio_stats bios_acknowledged;
+ struct atomic_bio_stats bios_acknowledged_partial;
+ struct atomic_bio_stats bios_meta;
+ struct atomic_bio_stats bios_meta_completed;
+ struct atomic_bio_stats bios_journal;
+ struct atomic_bio_stats bios_journal_completed;
+ struct atomic_bio_stats bios_page_cache;
+ struct atomic_bio_stats bios_page_cache_completed;
+};
+
+struct read_only_notifier {
+ /* The completion for entering read-only mode */
+ struct vdo_completion completion;
+ /* A completion waiting for notifications to be drained or enabled */
+ struct vdo_completion *waiter;
+ /* Lock to protect the next two fields */
+ spinlock_t lock;
+ /* The code of the error which put the VDO into read-only mode */
+ int read_only_error;
+ /* The current state of the notifier (values described above) */
+ enum notifier_state state;
+};
+
+/*
+ * The thread ID returned when the current thread is not a vdo thread, or can not be determined
+ * (usually due to being at interrupt context).
+ */
+#define VDO_INVALID_THREAD_ID ((thread_id_t) -1)
+
+struct thread_config {
+ zone_count_t logical_zone_count;
+ zone_count_t physical_zone_count;
+ zone_count_t hash_zone_count;
+ thread_count_t bio_thread_count;
+ thread_count_t thread_count;
+ thread_id_t admin_thread;
+ thread_id_t journal_thread;
+ thread_id_t packer_thread;
+ thread_id_t dedupe_thread;
+ thread_id_t bio_ack_thread;
+ thread_id_t cpu_thread;
+ thread_id_t *logical_threads;
+ thread_id_t *physical_threads;
+ thread_id_t *hash_zone_threads;
+ thread_id_t *bio_threads;
+};
+
+struct thread_count_config;
+
+struct vdo_super_block {
+ /* The vio for reading and writing the super block to disk */
+ struct vio vio;
+ /* A buffer to hold the super block */
+ u8 *buffer;
+ /* Whether this super block may not be written */
+ bool unwritable;
+};
+
+struct data_vio_pool;
+
+struct vdo_administrator {
+ struct vdo_completion completion;
+ struct admin_state state;
+ atomic_t busy;
+ u32 phase;
+ struct completion callback_sync;
+};
+
+struct vdo {
+ char thread_name_prefix[MAX_VDO_WORK_QUEUE_NAME_LEN];
+ struct vdo_thread *threads;
+ vdo_action_fn action;
+ struct vdo_completion *completion;
+ struct vio_tracer *vio_tracer;
+
+ /* The atomic version of the state of this vdo */
+ atomic_t state;
+ /* The full state of all components */
+ struct vdo_component_states states;
+ /*
+ * A counter value to attach to thread names and log messages to identify the individual
+ * device.
+ */
+ unsigned int instance;
+ /* The read-only notifier */
+ struct read_only_notifier read_only_notifier;
+ /* The load-time configuration of this vdo */
+ struct device_config *device_config;
+ /* The thread mapping */
+ struct thread_config thread_config;
+
+ /* The super block */
+ struct vdo_super_block super_block;
+
+ /* The partitioning of the underlying storage */
+ struct layout layout;
+ struct layout next_layout;
+ struct dm_kcopyd_client *partition_copier;
+
+ /* The block map */
+ struct block_map *block_map;
+
+ /* The journal for block map recovery */
+ struct recovery_journal *recovery_journal;
+
+ /* The slab depot */
+ struct slab_depot *depot;
+
+ /* The compressed-block packer */
+ struct packer *packer;
+ /* Whether incoming data should be compressed */
+ bool compressing;
+
+ /* The handler for flush requests */
+ struct flusher *flusher;
+
+ /* The state the vdo was in when loaded (primarily for unit tests) */
+ enum vdo_state load_state;
+
+ /* The logical zones of this vdo */
+ struct logical_zones *logical_zones;
+
+ /* The physical zones of this vdo */
+ struct physical_zones *physical_zones;
+
+ /* The hash lock zones of this vdo */
+ struct hash_zones *hash_zones;
+
+ /* Bio submission manager used for sending bios to the storage device. */
+ struct io_submitter *io_submitter;
+
+ /* The pool of data_vios for servicing incoming bios */
+ struct data_vio_pool *data_vio_pool;
+
+ /* The manager for administrative operations */
+ struct vdo_administrator admin;
+
+ /* Flags controlling administrative operations */
+ const struct admin_state_code *suspend_type;
+ bool allocations_allowed;
+ bool dump_on_shutdown;
+ atomic_t processing_message;
+
+ /*
+ * Statistics
+ * Atomic stats counters
+ */
+ struct atomic_statistics stats;
+ /* Used to gather statistics without allocating memory */
+ struct vdo_statistics stats_buffer;
+ /* Protects the stats_buffer */
+ struct mutex stats_mutex;
+
+ /* A list of all device_configs referencing this vdo */
+ struct list_head device_config_list;
+
+ /* This VDO's list entry for the device registry */
+ struct list_head registration;
+
+ /* Underlying block device info. */
+ u64 starting_sector_offset;
+ struct volume_geometry geometry;
+
+ /* N blobs of context data for LZ4 code, one per CPU thread. */
+ char **compression_context;
+};
+
+/**
+ * vdo_uses_bio_ack_queue() - Indicate whether the vdo is configured to use a separate work queue
+ * for acknowledging received and processed bios.
+ * @vdo: The vdo.
+ *
+ * Note that this directly controls the handling of write operations, but the compile-time flag
+ * VDO_USE_BIO_ACK_QUEUE_FOR_READ is also checked for read operations.
+ *
+ * Return: Whether a bio-acknowledgement work queue is in use.
+ */
+static inline bool vdo_uses_bio_ack_queue(struct vdo *vdo)
+{
+ return vdo->device_config->thread_counts.bio_ack_threads > 0;
+}
+
+/**
+ * typedef vdo_filter_fn - Method type for vdo matching methods.
+ *
+ * A filter function returns false if the vdo doesn't match.
+ */
+typedef bool (*vdo_filter_fn)(struct vdo *vdo, const void *context);
+
+void vdo_initialize_device_registry_once(void);
+struct vdo * __must_check vdo_find_matching(vdo_filter_fn filter, const void *context);
+
+int __must_check vdo_make_thread(struct vdo *vdo, thread_id_t thread_id,
+ const struct vdo_work_queue_type *type,
+ unsigned int queue_count, void *contexts[]);
+
+static inline int __must_check vdo_make_default_thread(struct vdo *vdo,
+ thread_id_t thread_id)
+{
+ return vdo_make_thread(vdo, thread_id, NULL, 1, NULL);
+}
+
+int __must_check vdo_make(unsigned int instance, struct device_config *config,
+ char **reason, struct vdo **vdo_ptr);
+
+void vdo_destroy(struct vdo *vdo);
+
+void vdo_load_super_block(struct vdo *vdo, struct vdo_completion *parent);
+
+struct block_device * __must_check vdo_get_backing_device(const struct vdo *vdo);
+
+const char * __must_check vdo_get_device_name(const struct dm_target *target);
+
+int __must_check vdo_synchronous_flush(struct vdo *vdo);
+
+const struct admin_state_code * __must_check vdo_get_admin_state(const struct vdo *vdo);
+
+bool vdo_set_compressing(struct vdo *vdo, bool enable);
+
+bool vdo_get_compressing(struct vdo *vdo);
+
+void vdo_fetch_statistics(struct vdo *vdo, struct vdo_statistics *stats);
+
+thread_id_t vdo_get_callback_thread_id(void);
+
+enum vdo_state __must_check vdo_get_state(const struct vdo *vdo);
+
+void vdo_set_state(struct vdo *vdo, enum vdo_state state);
+
+void vdo_save_components(struct vdo *vdo, struct vdo_completion *parent);
+
+int vdo_register_read_only_listener(struct vdo *vdo, void *listener,
+ vdo_read_only_notification_fn notification,
+ thread_id_t thread_id);
+
+int vdo_enable_read_only_entry(struct vdo *vdo);
+
+void vdo_wait_until_not_entering_read_only_mode(struct vdo_completion *parent);
+
+void vdo_allow_read_only_mode_entry(struct vdo_completion *parent);
+
+void vdo_enter_read_only_mode(struct vdo *vdo, int error_code);
+
+bool __must_check vdo_is_read_only(struct vdo *vdo);
+
+bool __must_check vdo_in_read_only_mode(const struct vdo *vdo);
+
+bool __must_check vdo_in_recovery_mode(const struct vdo *vdo);
+
+void vdo_enter_recovery_mode(struct vdo *vdo);
+
+void vdo_assert_on_admin_thread(const struct vdo *vdo, const char *name);
+
+void vdo_assert_on_logical_zone_thread(const struct vdo *vdo, zone_count_t logical_zone,
+ const char *name);
+
+void vdo_assert_on_physical_zone_thread(const struct vdo *vdo, zone_count_t physical_zone,
+ const char *name);
+
+int __must_check vdo_get_physical_zone(const struct vdo *vdo, physical_block_number_t pbn,
+ struct physical_zone **zone_ptr);
+
+void vdo_dump_status(const struct vdo *vdo);
+
+#endif /* VDO_H */
diff --git a/drivers/md/dm-vdo/vio.c b/drivers/md/dm-vdo/vio.c
new file mode 100644
index 000000000000..b291578f726f
--- /dev/null
+++ b/drivers/md/dm-vdo/vio.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "vio.h"
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/kernel.h>
+#include <linux/ratelimit.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "constants.h"
+#include "io-submitter.h"
+#include "vdo.h"
+
+/* A vio_pool is a collection of preallocated vios. */
+struct vio_pool {
+ /* The number of objects managed by the pool */
+ size_t size;
+ /* The list of objects which are available */
+ struct list_head available;
+ /* The queue of requestors waiting for objects from the pool */
+ struct vdo_wait_queue waiting;
+ /* The number of objects currently in use */
+ size_t busy_count;
+ /* The list of objects which are in use */
+ struct list_head busy;
+ /* The ID of the thread on which this pool may be used */
+ thread_id_t thread_id;
+ /* The buffer backing the pool's vios */
+ char *buffer;
+ /* The pool entries */
+ struct pooled_vio vios[];
+};
+
+physical_block_number_t pbn_from_vio_bio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct vdo *vdo = vio->completion.vdo;
+ physical_block_number_t pbn = bio->bi_iter.bi_sector / VDO_SECTORS_PER_BLOCK;
+
+ return ((pbn == VDO_GEOMETRY_BLOCK_LOCATION) ? pbn : pbn + vdo->geometry.bio_offset);
+}
+
+static int create_multi_block_bio(block_count_t size, struct bio **bio_ptr)
+{
+ struct bio *bio = NULL;
+ int result;
+
+ result = vdo_allocate_extended(struct bio, size + 1, struct bio_vec,
+ "bio", &bio);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *bio_ptr = bio;
+ return VDO_SUCCESS;
+}
+
+int vdo_create_bio(struct bio **bio_ptr)
+{
+ return create_multi_block_bio(1, bio_ptr);
+}
+
+void vdo_free_bio(struct bio *bio)
+{
+ if (bio == NULL)
+ return;
+
+ bio_uninit(bio);
+ vdo_free(vdo_forget(bio));
+}
+
+int allocate_vio_components(struct vdo *vdo, enum vio_type vio_type,
+ enum vio_priority priority, void *parent,
+ unsigned int block_count, char *data, struct vio *vio)
+{
+ struct bio *bio;
+ int result;
+
+ result = VDO_ASSERT(block_count <= MAX_BLOCKS_PER_VIO,
+ "block count %u does not exceed maximum %u", block_count,
+ MAX_BLOCKS_PER_VIO);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(((vio_type != VIO_TYPE_UNINITIALIZED) && (vio_type != VIO_TYPE_DATA)),
+ "%d is a metadata type", vio_type);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = create_multi_block_bio(block_count, &bio);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ initialize_vio(vio, bio, block_count, vio_type, priority, vdo);
+ vio->completion.parent = parent;
+ vio->data = data;
+ return VDO_SUCCESS;
+}
+
+/**
+ * create_multi_block_metadata_vio() - Create a vio.
+ * @vdo: The vdo on which the vio will operate.
+ * @vio_type: The type of vio to create.
+ * @priority: The relative priority to assign to the vio.
+ * @parent: The parent of the vio.
+ * @block_count: The size of the vio in blocks.
+ * @data: The buffer.
+ * @vio_ptr: A pointer to hold the new vio.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int create_multi_block_metadata_vio(struct vdo *vdo, enum vio_type vio_type,
+ enum vio_priority priority, void *parent,
+ unsigned int block_count, char *data,
+ struct vio **vio_ptr)
+{
+ struct vio *vio;
+ int result;
+
+ BUILD_BUG_ON(sizeof(struct vio) > 256);
+
+ /*
+ * Metadata vios should use direct allocation and not use the buffer pool, which is
+ * reserved for submissions from the linux block layer.
+ */
+ result = vdo_allocate(1, struct vio, __func__, &vio);
+ if (result != VDO_SUCCESS) {
+ vdo_log_error("metadata vio allocation failure %d", result);
+ return result;
+ }
+
+ result = allocate_vio_components(vdo, vio_type, priority, parent, block_count,
+ data, vio);
+ if (result != VDO_SUCCESS) {
+ vdo_free(vio);
+ return result;
+ }
+
+ *vio_ptr = vio;
+ return VDO_SUCCESS;
+}
+
+/**
+ * free_vio_components() - Free the components of a vio embedded in a larger structure.
+ * @vio: The vio to destroy
+ */
+void free_vio_components(struct vio *vio)
+{
+ if (vio == NULL)
+ return;
+
+ BUG_ON(is_data_vio(vio));
+ vdo_free_bio(vdo_forget(vio->bio));
+}
+
+/**
+ * free_vio() - Destroy a vio.
+ * @vio: The vio to destroy.
+ */
+void free_vio(struct vio *vio)
+{
+ free_vio_components(vio);
+ vdo_free(vio);
+}
+
+/* Set bio properties for a VDO read or write. */
+void vdo_set_bio_properties(struct bio *bio, struct vio *vio, bio_end_io_t callback,
+ blk_opf_t bi_opf, physical_block_number_t pbn)
+{
+ struct vdo *vdo = vio->completion.vdo;
+ struct device_config *config = vdo->device_config;
+
+ pbn -= vdo->geometry.bio_offset;
+ vio->bio_zone = ((pbn / config->thread_counts.bio_rotation_interval) %
+ config->thread_counts.bio_threads);
+
+ bio->bi_private = vio;
+ bio->bi_end_io = callback;
+ bio->bi_opf = bi_opf;
+ bio->bi_iter.bi_sector = pbn * VDO_SECTORS_PER_BLOCK;
+}
+
+/*
+ * Prepares the bio to perform IO with the specified buffer. May only be used on a VDO-allocated
+ * bio, as it assumes the bio wraps a 4k buffer that is 4k aligned, but there does not have to be a
+ * vio associated with the bio.
+ */
+int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback,
+ blk_opf_t bi_opf, physical_block_number_t pbn)
+{
+ int bvec_count, offset, len, i;
+ struct bio *bio = vio->bio;
+
+ bio_reset(bio, bio->bi_bdev, bi_opf);
+ vdo_set_bio_properties(bio, vio, callback, bi_opf, pbn);
+ if (data == NULL)
+ return VDO_SUCCESS;
+
+ bio->bi_io_vec = bio->bi_inline_vecs;
+ bio->bi_max_vecs = vio->block_count + 1;
+ len = VDO_BLOCK_SIZE * vio->block_count;
+ offset = offset_in_page(data);
+ bvec_count = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+
+ /*
+ * If we knew that data was always on one page, or contiguous pages, we wouldn't need the
+ * loop. But if we're using vmalloc, it's not impossible that the data is in different
+ * pages that can't be merged in bio_add_page...
+ */
+ for (i = 0; (i < bvec_count) && (len > 0); i++) {
+ struct page *page;
+ int bytes_added;
+ int bytes = PAGE_SIZE - offset;
+
+ if (bytes > len)
+ bytes = len;
+
+ page = is_vmalloc_addr(data) ? vmalloc_to_page(data) : virt_to_page(data);
+ bytes_added = bio_add_page(bio, page, bytes, offset);
+
+ if (bytes_added != bytes) {
+ return vdo_log_error_strerror(VDO_BIO_CREATION_FAILED,
+ "Could only add %i bytes to bio",
+ bytes_added);
+ }
+
+ data += bytes;
+ len -= bytes;
+ offset = 0;
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * update_vio_error_stats() - Update per-vio error stats and log the error.
+ * @vio: The vio which got an error.
+ * @format: The format of the message to log (a printf style format).
+ */
+void update_vio_error_stats(struct vio *vio, const char *format, ...)
+{
+ static DEFINE_RATELIMIT_STATE(error_limiter, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ va_list args;
+ int priority;
+ struct vdo *vdo = vio->completion.vdo;
+
+ switch (vio->completion.result) {
+ case VDO_READ_ONLY:
+ atomic64_inc(&vdo->stats.read_only_error_count);
+ return;
+
+ case VDO_NO_SPACE:
+ atomic64_inc(&vdo->stats.no_space_error_count);
+ priority = VDO_LOG_DEBUG;
+ break;
+
+ default:
+ priority = VDO_LOG_ERR;
+ }
+
+ if (!__ratelimit(&error_limiter))
+ return;
+
+ va_start(args, format);
+ vdo_vlog_strerror(priority, vio->completion.result, VDO_LOGGING_MODULE_NAME,
+ format, args);
+ va_end(args);
+}
+
+void vio_record_metadata_io_error(struct vio *vio)
+{
+ const char *description;
+ physical_block_number_t pbn = pbn_from_vio_bio(vio->bio);
+
+ if (bio_op(vio->bio) == REQ_OP_READ) {
+ description = "read";
+ } else if ((vio->bio->bi_opf & REQ_PREFLUSH) == REQ_PREFLUSH) {
+ description = (((vio->bio->bi_opf & REQ_FUA) == REQ_FUA) ?
+ "write+preflush+fua" :
+ "write+preflush");
+ } else if ((vio->bio->bi_opf & REQ_FUA) == REQ_FUA) {
+ description = "write+fua";
+ } else {
+ description = "write";
+ }
+
+ update_vio_error_stats(vio,
+ "Completing %s vio of type %u for physical block %llu with error",
+ description, vio->type, (unsigned long long) pbn);
+}
+
+/**
+ * make_vio_pool() - Create a new vio pool.
+ * @vdo: The vdo.
+ * @pool_size: The number of vios in the pool.
+ * @thread_id: The ID of the thread using this pool.
+ * @vio_type: The type of vios in the pool.
+ * @priority: The priority with which vios from the pool should be enqueued.
+ * @context: The context that each entry will have.
+ * @pool_ptr: The resulting pool.
+ *
+ * Return: A success or error code.
+ */
+int make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id,
+ enum vio_type vio_type, enum vio_priority priority, void *context,
+ struct vio_pool **pool_ptr)
+{
+ struct vio_pool *pool;
+ char *ptr;
+ int result;
+
+ result = vdo_allocate_extended(struct vio_pool, pool_size, struct pooled_vio,
+ __func__, &pool);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ pool->thread_id = thread_id;
+ INIT_LIST_HEAD(&pool->available);
+ INIT_LIST_HEAD(&pool->busy);
+
+ result = vdo_allocate(pool_size * VDO_BLOCK_SIZE, char,
+ "VIO pool buffer", &pool->buffer);
+ if (result != VDO_SUCCESS) {
+ free_vio_pool(pool);
+ return result;
+ }
+
+ ptr = pool->buffer;
+ for (pool->size = 0; pool->size < pool_size; pool->size++, ptr += VDO_BLOCK_SIZE) {
+ struct pooled_vio *pooled = &pool->vios[pool->size];
+
+ result = allocate_vio_components(vdo, vio_type, priority, NULL, 1, ptr,
+ &pooled->vio);
+ if (result != VDO_SUCCESS) {
+ free_vio_pool(pool);
+ return result;
+ }
+
+ pooled->context = context;
+ list_add_tail(&pooled->pool_entry, &pool->available);
+ }
+
+ *pool_ptr = pool;
+ return VDO_SUCCESS;
+}
+
+/**
+ * free_vio_pool() - Destroy a vio pool.
+ * @pool: The pool to free.
+ */
+void free_vio_pool(struct vio_pool *pool)
+{
+ struct pooled_vio *pooled, *tmp;
+
+ if (pool == NULL)
+ return;
+
+ /* Remove all available vios from the object pool. */
+ VDO_ASSERT_LOG_ONLY(!vdo_waitq_has_waiters(&pool->waiting),
+ "VIO pool must not have any waiters when being freed");
+ VDO_ASSERT_LOG_ONLY((pool->busy_count == 0),
+ "VIO pool must not have %zu busy entries when being freed",
+ pool->busy_count);
+ VDO_ASSERT_LOG_ONLY(list_empty(&pool->busy),
+ "VIO pool must not have busy entries when being freed");
+
+ list_for_each_entry_safe(pooled, tmp, &pool->available, pool_entry) {
+ list_del(&pooled->pool_entry);
+ free_vio_components(&pooled->vio);
+ pool->size--;
+ }
+
+ VDO_ASSERT_LOG_ONLY(pool->size == 0,
+ "VIO pool must not have missing entries when being freed");
+
+ vdo_free(vdo_forget(pool->buffer));
+ vdo_free(pool);
+}
+
+/**
+ * is_vio_pool_busy() - Check whether an vio pool has outstanding entries.
+ *
+ * Return: true if the pool is busy.
+ */
+bool is_vio_pool_busy(struct vio_pool *pool)
+{
+ return (pool->busy_count != 0);
+}
+
+/**
+ * acquire_vio_from_pool() - Acquire a vio and buffer from the pool (asynchronous).
+ * @pool: The vio pool.
+ * @waiter: Object that is requesting a vio.
+ */
+void acquire_vio_from_pool(struct vio_pool *pool, struct vdo_waiter *waiter)
+{
+ struct pooled_vio *pooled;
+
+ VDO_ASSERT_LOG_ONLY((pool->thread_id == vdo_get_callback_thread_id()),
+ "acquire from active vio_pool called from correct thread");
+
+ if (list_empty(&pool->available)) {
+ vdo_waitq_enqueue_waiter(&pool->waiting, waiter);
+ return;
+ }
+
+ pooled = list_first_entry(&pool->available, struct pooled_vio, pool_entry);
+ pool->busy_count++;
+ list_move_tail(&pooled->pool_entry, &pool->busy);
+ (*waiter->callback)(waiter, pooled);
+}
+
+/**
+ * return_vio_to_pool() - Return a vio to the pool
+ * @pool: The vio pool.
+ * @vio: The pooled vio to return.
+ */
+void return_vio_to_pool(struct vio_pool *pool, struct pooled_vio *vio)
+{
+ VDO_ASSERT_LOG_ONLY((pool->thread_id == vdo_get_callback_thread_id()),
+ "vio pool entry returned on same thread as it was acquired");
+
+ vio->vio.completion.error_handler = NULL;
+ vio->vio.completion.parent = NULL;
+ if (vdo_waitq_has_waiters(&pool->waiting)) {
+ vdo_waitq_notify_next_waiter(&pool->waiting, NULL, vio);
+ return;
+ }
+
+ list_move_tail(&vio->pool_entry, &pool->available);
+ --pool->busy_count;
+}
+
+/*
+ * Various counting functions for statistics.
+ * These are used for bios coming into VDO, as well as bios generated by VDO.
+ */
+void vdo_count_bios(struct atomic_bio_stats *bio_stats, struct bio *bio)
+{
+ if (((bio->bi_opf & REQ_PREFLUSH) != 0) && (bio->bi_iter.bi_size == 0)) {
+ atomic64_inc(&bio_stats->empty_flush);
+ atomic64_inc(&bio_stats->flush);
+ return;
+ }
+
+ switch (bio_op(bio)) {
+ case REQ_OP_WRITE:
+ atomic64_inc(&bio_stats->write);
+ break;
+ case REQ_OP_READ:
+ atomic64_inc(&bio_stats->read);
+ break;
+ case REQ_OP_DISCARD:
+ atomic64_inc(&bio_stats->discard);
+ break;
+ /*
+ * All other operations are filtered out in dmvdo.c, or not created by VDO, so
+ * shouldn't exist.
+ */
+ default:
+ VDO_ASSERT_LOG_ONLY(0, "Bio operation %d not a write, read, discard, or empty flush",
+ bio_op(bio));
+ }
+
+ if ((bio->bi_opf & REQ_PREFLUSH) != 0)
+ atomic64_inc(&bio_stats->flush);
+ if (bio->bi_opf & REQ_FUA)
+ atomic64_inc(&bio_stats->fua);
+}
+
+static void count_all_bios_completed(struct vio *vio, struct bio *bio)
+{
+ struct atomic_statistics *stats = &vio->completion.vdo->stats;
+
+ if (is_data_vio(vio)) {
+ vdo_count_bios(&stats->bios_out_completed, bio);
+ return;
+ }
+
+ vdo_count_bios(&stats->bios_meta_completed, bio);
+ if (vio->type == VIO_TYPE_RECOVERY_JOURNAL)
+ vdo_count_bios(&stats->bios_journal_completed, bio);
+ else if (vio->type == VIO_TYPE_BLOCK_MAP)
+ vdo_count_bios(&stats->bios_page_cache_completed, bio);
+}
+
+void vdo_count_completed_bios(struct bio *bio)
+{
+ struct vio *vio = (struct vio *) bio->bi_private;
+
+ atomic64_inc(&vio->completion.vdo->stats.bios_completed);
+ count_all_bios_completed(vio, bio);
+}
diff --git a/drivers/md/dm-vdo/vio.h b/drivers/md/dm-vdo/vio.h
new file mode 100644
index 000000000000..3490e9f59b04
--- /dev/null
+++ b/drivers/md/dm-vdo/vio.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VIO_H
+#define VIO_H
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+
+#include "completion.h"
+#include "constants.h"
+#include "types.h"
+#include "vdo.h"
+
+enum {
+ MAX_BLOCKS_PER_VIO = (BIO_MAX_VECS << PAGE_SHIFT) / VDO_BLOCK_SIZE,
+};
+
+struct pooled_vio {
+ /* The underlying vio */
+ struct vio vio;
+ /* The list entry for chaining pooled vios together */
+ struct list_head list_entry;
+ /* The context set by the pool */
+ void *context;
+ /* The list entry used by the pool */
+ struct list_head pool_entry;
+};
+
+/**
+ * as_vio() - Convert a generic vdo_completion to a vio.
+ * @completion: The completion to convert.
+ *
+ * Return: The completion as a vio.
+ */
+static inline struct vio *as_vio(struct vdo_completion *completion)
+{
+ vdo_assert_completion_type(completion, VIO_COMPLETION);
+ return container_of(completion, struct vio, completion);
+}
+
+/**
+ * get_vio_bio_zone_thread_id() - Get the thread id of the bio zone in which a vio should submit
+ * its I/O.
+ * @vio: The vio.
+ *
+ * Return: The id of the bio zone thread the vio should use.
+ */
+static inline thread_id_t __must_check get_vio_bio_zone_thread_id(struct vio *vio)
+{
+ return vio->completion.vdo->thread_config.bio_threads[vio->bio_zone];
+}
+
+physical_block_number_t __must_check pbn_from_vio_bio(struct bio *bio);
+
+/**
+ * assert_vio_in_bio_zone() - Check that a vio is running on the correct thread for its bio zone.
+ * @vio: The vio to check.
+ */
+static inline void assert_vio_in_bio_zone(struct vio *vio)
+{
+ thread_id_t expected = get_vio_bio_zone_thread_id(vio);
+ thread_id_t thread_id = vdo_get_callback_thread_id();
+
+ VDO_ASSERT_LOG_ONLY((expected == thread_id),
+ "vio I/O for physical block %llu on thread %u, should be on bio zone thread %u",
+ (unsigned long long) pbn_from_vio_bio(vio->bio), thread_id,
+ expected);
+}
+
+int vdo_create_bio(struct bio **bio_ptr);
+void vdo_free_bio(struct bio *bio);
+int allocate_vio_components(struct vdo *vdo, enum vio_type vio_type,
+ enum vio_priority priority, void *parent,
+ unsigned int block_count, char *data, struct vio *vio);
+int __must_check create_multi_block_metadata_vio(struct vdo *vdo, enum vio_type vio_type,
+ enum vio_priority priority,
+ void *parent, unsigned int block_count,
+ char *data, struct vio **vio_ptr);
+
+static inline int __must_check create_metadata_vio(struct vdo *vdo, enum vio_type vio_type,
+ enum vio_priority priority,
+ void *parent, char *data,
+ struct vio **vio_ptr)
+{
+ return create_multi_block_metadata_vio(vdo, vio_type, priority, parent, 1, data,
+ vio_ptr);
+}
+
+void free_vio_components(struct vio *vio);
+void free_vio(struct vio *vio);
+
+/**
+ * initialize_vio() - Initialize a vio.
+ * @vio: The vio to initialize.
+ * @bio: The bio this vio should use for its I/O.
+ * @block_count: The size of this vio in vdo blocks.
+ * @vio_type: The vio type.
+ * @priority: The relative priority of the vio.
+ * @vdo: The vdo for this vio.
+ */
+static inline void initialize_vio(struct vio *vio, struct bio *bio,
+ unsigned int block_count, enum vio_type vio_type,
+ enum vio_priority priority, struct vdo *vdo)
+{
+ /* data_vio's may not span multiple blocks */
+ BUG_ON((vio_type == VIO_TYPE_DATA) && (block_count != 1));
+
+ vio->bio = bio;
+ vio->block_count = block_count;
+ vio->type = vio_type;
+ vio->priority = priority;
+ vdo_initialize_completion(&vio->completion, vdo, VIO_COMPLETION);
+}
+
+void vdo_set_bio_properties(struct bio *bio, struct vio *vio, bio_end_io_t callback,
+ blk_opf_t bi_opf, physical_block_number_t pbn);
+
+int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback,
+ blk_opf_t bi_opf, physical_block_number_t pbn);
+
+void update_vio_error_stats(struct vio *vio, const char *format, ...)
+ __printf(2, 3);
+
+/**
+ * is_data_vio() - Check whether a vio is servicing an external data request.
+ * @vio: The vio to check.
+ */
+static inline bool is_data_vio(struct vio *vio)
+{
+ return (vio->type == VIO_TYPE_DATA);
+}
+
+/**
+ * get_metadata_priority() - Convert a vio's priority to a work item priority.
+ * @vio: The vio.
+ *
+ * Return: The priority with which to submit the vio's bio.
+ */
+static inline enum vdo_completion_priority get_metadata_priority(struct vio *vio)
+{
+ return ((vio->priority == VIO_PRIORITY_HIGH) ?
+ BIO_Q_HIGH_PRIORITY :
+ BIO_Q_METADATA_PRIORITY);
+}
+
+/**
+ * continue_vio() - Enqueue a vio to run its next callback.
+ * @vio: The vio to continue.
+ *
+ * Return: The result of the current operation.
+ */
+static inline void continue_vio(struct vio *vio, int result)
+{
+ if (unlikely(result != VDO_SUCCESS))
+ vdo_set_completion_result(&vio->completion, result);
+
+ vdo_enqueue_completion(&vio->completion, VDO_WORK_Q_DEFAULT_PRIORITY);
+}
+
+void vdo_count_bios(struct atomic_bio_stats *bio_stats, struct bio *bio);
+void vdo_count_completed_bios(struct bio *bio);
+
+/**
+ * continue_vio_after_io() - Continue a vio now that its I/O has returned.
+ */
+static inline void continue_vio_after_io(struct vio *vio, vdo_action_fn callback,
+ thread_id_t thread)
+{
+ vdo_count_completed_bios(vio->bio);
+ vdo_set_completion_callback(&vio->completion, callback, thread);
+ continue_vio(vio, blk_status_to_errno(vio->bio->bi_status));
+}
+
+void vio_record_metadata_io_error(struct vio *vio);
+
+/* A vio_pool is a collection of preallocated vios used to write arbitrary metadata blocks. */
+
+static inline struct pooled_vio *vio_as_pooled_vio(struct vio *vio)
+{
+ return container_of(vio, struct pooled_vio, vio);
+}
+
+struct vio_pool;
+
+int __must_check make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id,
+ enum vio_type vio_type, enum vio_priority priority,
+ void *context, struct vio_pool **pool_ptr);
+void free_vio_pool(struct vio_pool *pool);
+bool __must_check is_vio_pool_busy(struct vio_pool *pool);
+void acquire_vio_from_pool(struct vio_pool *pool, struct vdo_waiter *waiter);
+void return_vio_to_pool(struct vio_pool *pool, struct pooled_vio *vio);
+
+#endif /* VIO_H */
diff --git a/drivers/md/dm-vdo/wait-queue.c b/drivers/md/dm-vdo/wait-queue.c
new file mode 100644
index 000000000000..6e1e739277ef
--- /dev/null
+++ b/drivers/md/dm-vdo/wait-queue.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "wait-queue.h"
+
+#include <linux/device-mapper.h>
+
+#include "permassert.h"
+
+#include "status-codes.h"
+
+/**
+ * vdo_waitq_enqueue_waiter() - Add a waiter to the tail end of a waitq.
+ * @waitq: The vdo_wait_queue to which to add the waiter.
+ * @waiter: The waiter to add to the waitq.
+ *
+ * The waiter must not already be waiting in a waitq.
+ */
+void vdo_waitq_enqueue_waiter(struct vdo_wait_queue *waitq, struct vdo_waiter *waiter)
+{
+ BUG_ON(waiter->next_waiter != NULL);
+
+ if (waitq->last_waiter == NULL) {
+ /*
+ * The waitq is empty, so form the initial circular list by self-linking the
+ * initial waiter.
+ */
+ waiter->next_waiter = waiter;
+ } else {
+ /* Splice the new waiter in at the end of the waitq. */
+ waiter->next_waiter = waitq->last_waiter->next_waiter;
+ waitq->last_waiter->next_waiter = waiter;
+ }
+
+ /* In both cases, the waiter we added to the ring becomes the last waiter. */
+ waitq->last_waiter = waiter;
+ waitq->length += 1;
+}
+
+/**
+ * vdo_waitq_transfer_all_waiters() - Transfer all waiters from one waitq to
+ * a second waitq, emptying the first waitq.
+ * @from_waitq: The waitq containing the waiters to move.
+ * @to_waitq: The waitq that will receive the waiters from the first waitq.
+ */
+void vdo_waitq_transfer_all_waiters(struct vdo_wait_queue *from_waitq,
+ struct vdo_wait_queue *to_waitq)
+{
+ /* If the source waitq is empty, there's nothing to do. */
+ if (!vdo_waitq_has_waiters(from_waitq))
+ return;
+
+ if (vdo_waitq_has_waiters(to_waitq)) {
+ /*
+ * Both are non-empty. Splice the two circular lists together
+ * by swapping the next (head) pointers in the list tails.
+ */
+ struct vdo_waiter *from_head = from_waitq->last_waiter->next_waiter;
+ struct vdo_waiter *to_head = to_waitq->last_waiter->next_waiter;
+
+ to_waitq->last_waiter->next_waiter = from_head;
+ from_waitq->last_waiter->next_waiter = to_head;
+ }
+
+ to_waitq->last_waiter = from_waitq->last_waiter;
+ to_waitq->length += from_waitq->length;
+ vdo_waitq_init(from_waitq);
+}
+
+/**
+ * vdo_waitq_notify_all_waiters() - Notify all the entries waiting in a waitq.
+ * @waitq: The vdo_wait_queue containing the waiters to notify.
+ * @callback: The function to call to notify each waiter, or NULL to invoke the callback field
+ * registered in each waiter.
+ * @context: The context to pass to the callback function.
+ *
+ * Notifies all the entries waiting in a waitq to continue execution by invoking a callback
+ * function on each of them in turn. The waitq is copied and emptied before invoking any callbacks,
+ * and only the waiters that were in the waitq at the start of the call will be notified.
+ */
+void vdo_waitq_notify_all_waiters(struct vdo_wait_queue *waitq,
+ vdo_waiter_callback_fn callback, void *context)
+{
+ /*
+ * Copy and empty the waitq first, avoiding the possibility of an infinite
+ * loop if entries are returned to the waitq by the callback function.
+ */
+ struct vdo_wait_queue waiters;
+
+ vdo_waitq_init(&waiters);
+ vdo_waitq_transfer_all_waiters(waitq, &waiters);
+
+ /* Drain the copied waitq, invoking the callback on every entry. */
+ while (vdo_waitq_has_waiters(&waiters))
+ vdo_waitq_notify_next_waiter(&waiters, callback, context);
+}
+
+/**
+ * vdo_waitq_get_first_waiter() - Return the waiter that is at the head end of a waitq.
+ * @waitq: The vdo_wait_queue from which to get the first waiter.
+ *
+ * Return: The first (oldest) waiter in the waitq, or NULL if the waitq is empty.
+ */
+struct vdo_waiter *vdo_waitq_get_first_waiter(const struct vdo_wait_queue *waitq)
+{
+ struct vdo_waiter *last_waiter = waitq->last_waiter;
+
+ if (last_waiter == NULL) {
+ /* There are no waiters, so we're done. */
+ return NULL;
+ }
+
+ /* The waitq is circular, so the last entry links to the head of the waitq. */
+ return last_waiter->next_waiter;
+}
+
+/**
+ * vdo_waitq_dequeue_matching_waiters() - Remove all waiters that match based on the specified
+ * matching method and append them to a vdo_wait_queue.
+ * @waitq: The vdo_wait_queue to process.
+ * @waiter_match: The method to determine matching.
+ * @match_context: Contextual info for the match method.
+ * @matched_waitq: A wait_waitq to store matches.
+ */
+void vdo_waitq_dequeue_matching_waiters(struct vdo_wait_queue *waitq,
+ vdo_waiter_match_fn waiter_match,
+ void *match_context,
+ struct vdo_wait_queue *matched_waitq)
+{
+ struct vdo_wait_queue iteration_waitq;
+
+ vdo_waitq_init(&iteration_waitq);
+ vdo_waitq_transfer_all_waiters(waitq, &iteration_waitq);
+
+ while (vdo_waitq_has_waiters(&iteration_waitq)) {
+ struct vdo_waiter *waiter = vdo_waitq_dequeue_waiter(&iteration_waitq);
+
+ vdo_waitq_enqueue_waiter((waiter_match(waiter, match_context) ?
+ matched_waitq : waitq), waiter);
+ }
+}
+
+/**
+ * vdo_waitq_dequeue_waiter() - Remove the first (oldest) waiter from a waitq.
+ * @waitq: The vdo_wait_queue from which to remove the first entry.
+ *
+ * The caller will be responsible for waking the waiter by continuing its
+ * execution appropriately.
+ *
+ * Return: The first (oldest) waiter in the waitq, or NULL if the waitq is empty.
+ */
+struct vdo_waiter *vdo_waitq_dequeue_waiter(struct vdo_wait_queue *waitq)
+{
+ struct vdo_waiter *first_waiter = vdo_waitq_get_first_waiter(waitq);
+ struct vdo_waiter *last_waiter = waitq->last_waiter;
+
+ if (first_waiter == NULL)
+ return NULL;
+
+ if (first_waiter == last_waiter) {
+ /* The waitq has a single entry, so empty it by nulling the tail. */
+ waitq->last_waiter = NULL;
+ } else {
+ /*
+ * The waitq has multiple waiters, so splice the first waiter out
+ * of the circular waitq.
+ */
+ last_waiter->next_waiter = first_waiter->next_waiter;
+ }
+
+ /* The waiter is no longer in a waitq. */
+ first_waiter->next_waiter = NULL;
+ waitq->length -= 1;
+
+ return first_waiter;
+}
+
+/**
+ * vdo_waitq_notify_next_waiter() - Notify the next entry waiting in a waitq.
+ * @waitq: The vdo_wait_queue containing the waiter to notify.
+ * @callback: The function to call to notify the waiter, or NULL to invoke the callback field
+ * registered in the waiter.
+ * @context: The context to pass to the callback function.
+ *
+ * Notifies the next entry waiting in a waitq to continue execution by invoking a callback function
+ * on it after removing it from the waitq.
+ *
+ * Return: true if there was a waiter in the waitq.
+ */
+bool vdo_waitq_notify_next_waiter(struct vdo_wait_queue *waitq,
+ vdo_waiter_callback_fn callback, void *context)
+{
+ struct vdo_waiter *waiter = vdo_waitq_dequeue_waiter(waitq);
+
+ if (waiter == NULL)
+ return false;
+
+ if (callback == NULL)
+ callback = waiter->callback;
+ callback(waiter, context);
+
+ return true;
+}
diff --git a/drivers/md/dm-vdo/wait-queue.h b/drivers/md/dm-vdo/wait-queue.h
new file mode 100644
index 000000000000..7e8ee6afe7c7
--- /dev/null
+++ b/drivers/md/dm-vdo/wait-queue.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_WAIT_QUEUE_H
+#define VDO_WAIT_QUEUE_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/**
+ * A vdo_wait_queue is a circular singly linked list of entries waiting to be notified
+ * of a change in a condition. Keeping a circular list allows the vdo_wait_queue
+ * structure to simply be a pointer to the tail (newest) entry, supporting
+ * constant-time enqueue and dequeue operations. A null pointer is an empty waitq.
+ *
+ * An empty waitq:
+ * waitq0.last_waiter -> NULL
+ *
+ * A singleton waitq:
+ * waitq1.last_waiter -> entry1 -> entry1 -> [...]
+ *
+ * A three-element waitq:
+ * waitq2.last_waiter -> entry3 -> entry1 -> entry2 -> entry3 -> [...]
+ *
+ * linux/wait.h's wait_queue_head is _not_ used because vdo_wait_queue's
+ * interface is much less complex (doesn't need locking, priorities or timers).
+ * Made possible by vdo's thread-based resource allocation and locking; and
+ * the polling nature of vdo_wait_queue consumers.
+ *
+ * FIXME: could be made to use a linux/list.h's list_head but its extra barriers
+ * really aren't needed. Nor is a doubly linked list, but vdo_wait_queue could
+ * make use of __list_del_clearprev() -- but that would compromise the ability
+ * to make full use of linux's list interface.
+ */
+
+struct vdo_waiter;
+
+struct vdo_wait_queue {
+ /* The tail of the queue, the last (most recently added) entry */
+ struct vdo_waiter *last_waiter;
+ /* The number of waiters currently in the queue */
+ size_t length;
+};
+
+/**
+ * vdo_waiter_callback_fn - Callback type that will be called to resume processing
+ * of a waiter after it has been removed from its wait queue.
+ */
+typedef void (*vdo_waiter_callback_fn)(struct vdo_waiter *waiter, void *context);
+
+/**
+ * vdo_waiter_match_fn - Method type for waiter matching methods.
+ *
+ * Returns false if the waiter does not match.
+ */
+typedef bool (*vdo_waiter_match_fn)(struct vdo_waiter *waiter, void *context);
+
+/* The structure for entries in a vdo_wait_queue. */
+struct vdo_waiter {
+ /*
+ * The next waiter in the waitq. If this entry is the last waiter, then this
+ * is actually a pointer back to the head of the waitq.
+ */
+ struct vdo_waiter *next_waiter;
+
+ /* Optional waiter-specific callback to invoke when dequeuing this waiter. */
+ vdo_waiter_callback_fn callback;
+};
+
+/**
+ * vdo_waiter_is_waiting() - Check whether a waiter is waiting.
+ * @waiter: The waiter to check.
+ *
+ * Return: true if the waiter is on some vdo_wait_queue.
+ */
+static inline bool vdo_waiter_is_waiting(struct vdo_waiter *waiter)
+{
+ return (waiter->next_waiter != NULL);
+}
+
+/**
+ * vdo_waitq_init() - Initialize a vdo_wait_queue.
+ * @waitq: The vdo_wait_queue to initialize.
+ */
+static inline void vdo_waitq_init(struct vdo_wait_queue *waitq)
+{
+ *waitq = (struct vdo_wait_queue) {
+ .last_waiter = NULL,
+ .length = 0,
+ };
+}
+
+/**
+ * vdo_waitq_has_waiters() - Check whether a vdo_wait_queue has any entries waiting.
+ * @waitq: The vdo_wait_queue to query.
+ *
+ * Return: true if there are any waiters in the waitq.
+ */
+static inline bool __must_check vdo_waitq_has_waiters(const struct vdo_wait_queue *waitq)
+{
+ return (waitq->last_waiter != NULL);
+}
+
+void vdo_waitq_enqueue_waiter(struct vdo_wait_queue *waitq,
+ struct vdo_waiter *waiter);
+
+struct vdo_waiter *vdo_waitq_dequeue_waiter(struct vdo_wait_queue *waitq);
+
+void vdo_waitq_notify_all_waiters(struct vdo_wait_queue *waitq,
+ vdo_waiter_callback_fn callback, void *context);
+
+bool vdo_waitq_notify_next_waiter(struct vdo_wait_queue *waitq,
+ vdo_waiter_callback_fn callback, void *context);
+
+void vdo_waitq_transfer_all_waiters(struct vdo_wait_queue *from_waitq,
+ struct vdo_wait_queue *to_waitq);
+
+struct vdo_waiter *vdo_waitq_get_first_waiter(const struct vdo_wait_queue *waitq);
+
+void vdo_waitq_dequeue_matching_waiters(struct vdo_wait_queue *waitq,
+ vdo_waiter_match_fn waiter_match,
+ void *match_context,
+ struct vdo_wait_queue *matched_waitq);
+
+/**
+ * vdo_waitq_num_waiters() - Return the number of waiters in a vdo_wait_queue.
+ * @waitq: The vdo_wait_queue to query.
+ *
+ * Return: The number of waiters in the waitq.
+ */
+static inline size_t __must_check vdo_waitq_num_waiters(const struct vdo_wait_queue *waitq)
+{
+ return waitq->length;
+}
+
+#endif /* VDO_WAIT_QUEUE_H */
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index b475200d8586..e46aee6f932e 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -60,7 +60,8 @@ static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
* to the data block. Caller is responsible for releasing buf.
*/
static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
- unsigned int *offset, struct dm_buffer **buf)
+ unsigned int *offset, struct dm_buffer **buf,
+ unsigned short ioprio)
{
u64 position, block, rem;
u8 *res;
@@ -69,7 +70,7 @@ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
block = div64_u64_rem(position, v->fec->io_size, &rem);
*offset = (unsigned int)rem;
- res = dm_bufio_read(v->fec->bufio, block, buf);
+ res = dm_bufio_read_with_ioprio(v->fec->bufio, block, buf, ioprio);
if (IS_ERR(res)) {
DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
v->data_dev->name, (unsigned long long)rsb,
@@ -121,16 +122,17 @@ static inline unsigned int fec_buffer_rs_index(unsigned int i, unsigned int j)
* Decode all RS blocks from buffers and copy corrected bytes into fio->output
* starting from block_offset.
*/
-static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
- u64 rsb, int byte_index, unsigned int block_offset,
- int neras)
+static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io,
+ struct dm_verity_fec_io *fio, u64 rsb, int byte_index,
+ unsigned int block_offset, int neras)
{
int r, corrected = 0, res;
struct dm_buffer *buf;
unsigned int n, i, offset;
u8 *par, *block;
+ struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
- par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
+ par = fec_read_parity(v, rsb, block_offset, &offset, &buf, bio_prio(bio));
if (IS_ERR(par))
return PTR_ERR(par);
@@ -158,7 +160,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
if (offset >= v->fec->io_size) {
dm_bufio_release(buf);
- par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
+ par = fec_read_parity(v, rsb, block_offset, &offset, &buf, bio_prio(bio));
if (IS_ERR(par))
return PTR_ERR(par);
}
@@ -210,6 +212,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
u8 *bbuf, *rs_block;
u8 want_digest[HASH_MAX_DIGESTSIZE];
unsigned int n, k;
+ struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
if (neras)
*neras = 0;
@@ -248,7 +251,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
bufio = v->bufio;
}
- bbuf = dm_bufio_read(bufio, block, &buf);
+ bbuf = dm_bufio_read_with_ioprio(bufio, block, &buf, bio_prio(bio));
if (IS_ERR(bbuf)) {
DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
v->data_dev->name,
@@ -377,7 +380,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
if (unlikely(r < 0))
return r;
- r = fec_decode_bufs(v, fio, rsb, r, pos, neras);
+ r = fec_decode_bufs(v, io, fio, rsb, r, pos, neras);
if (r < 0)
return r;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 14e58ae70521..bb5da66da4c1 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -46,11 +46,12 @@ static unsigned int dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE
module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644);
-static DEFINE_STATIC_KEY_FALSE(use_tasklet_enabled);
+static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled);
struct dm_verity_prefetch_work {
struct work_struct work;
struct dm_verity *v;
+ unsigned short ioprio;
sector_t block;
unsigned int n_blocks;
};
@@ -294,10 +295,11 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
int r;
sector_t hash_block;
unsigned int offset;
+ struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
verity_hash_at_level(v, block, level, &hash_block, &offset);
- if (static_branch_unlikely(&use_tasklet_enabled) && io->in_tasklet) {
+ if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
data = dm_bufio_get(v->bufio, hash_block, &buf);
if (data == NULL) {
/*
@@ -307,8 +309,10 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
*/
return -EAGAIN;
}
- } else
- data = dm_bufio_read(v->bufio, hash_block, &buf);
+ } else {
+ data = dm_bufio_read_with_ioprio(v->bufio, hash_block,
+ &buf, bio_prio(bio));
+ }
if (IS_ERR(data))
return PTR_ERR(data);
@@ -323,15 +327,14 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
r = verity_hash(v, verity_io_hash_req(v, io),
data, 1 << v->hash_dev_block_bits,
- verity_io_real_digest(v, io), !io->in_tasklet);
+ verity_io_real_digest(v, io), !io->in_bh);
if (unlikely(r < 0))
goto release_ret_r;
if (likely(memcmp(verity_io_real_digest(v, io), want_digest,
v->digest_size) == 0))
aux->hash_verified = 1;
- else if (static_branch_unlikely(&use_tasklet_enabled) &&
- io->in_tasklet) {
+ else if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
/*
* Error handling code (FEC included) cannot be run in a
* tasklet since it may sleep, so fallback to work-queue.
@@ -482,6 +485,63 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
return 0;
}
+static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *data, size_t len)
+{
+ memcpy(data, io->recheck_buffer, len);
+ io->recheck_buffer += len;
+
+ return 0;
+}
+
+static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
+ struct bvec_iter start, sector_t cur_block)
+{
+ struct page *page;
+ void *buffer;
+ int r;
+ struct dm_io_request io_req;
+ struct dm_io_region io_loc;
+
+ page = mempool_alloc(&v->recheck_pool, GFP_NOIO);
+ buffer = page_to_virt(page);
+
+ io_req.bi_opf = REQ_OP_READ;
+ io_req.mem.type = DM_IO_KMEM;
+ io_req.mem.ptr.addr = buffer;
+ io_req.notify.fn = NULL;
+ io_req.client = v->io;
+ io_loc.bdev = v->data_dev->bdev;
+ io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT);
+ io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT);
+ r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
+ if (unlikely(r))
+ goto free_ret;
+
+ r = verity_hash(v, verity_io_hash_req(v, io), buffer,
+ 1 << v->data_dev_block_bits,
+ verity_io_real_digest(v, io), true);
+ if (unlikely(r))
+ goto free_ret;
+
+ if (memcmp(verity_io_real_digest(v, io),
+ verity_io_want_digest(v, io), v->digest_size)) {
+ r = -EIO;
+ goto free_ret;
+ }
+
+ io->recheck_buffer = buffer;
+ r = verity_for_bv_block(v, io, &start, verity_recheck_copy);
+ if (unlikely(r))
+ goto free_ret;
+
+ r = 0;
+free_ret:
+ mempool_free(page, &v->recheck_pool);
+
+ return r;
+}
+
static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
u8 *data, size_t len)
{
@@ -508,16 +568,14 @@ static int verity_verify_io(struct dm_verity_io *io)
{
bool is_zero;
struct dm_verity *v = io->v;
-#if defined(CONFIG_DM_VERITY_FEC)
struct bvec_iter start;
-#endif
struct bvec_iter iter_copy;
struct bvec_iter *iter;
struct crypto_wait wait;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
unsigned int b;
- if (static_branch_unlikely(&use_tasklet_enabled) && io->in_tasklet) {
+ if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
/*
* Copy the iterator in case we need to restart
* verification in a work-queue.
@@ -557,14 +615,11 @@ static int verity_verify_io(struct dm_verity_io *io)
continue;
}
- r = verity_hash_init(v, req, &wait, !io->in_tasklet);
+ r = verity_hash_init(v, req, &wait, !io->in_bh);
if (unlikely(r < 0))
return r;
-#if defined(CONFIG_DM_VERITY_FEC)
- if (verity_fec_is_enabled(v))
- start = *iter;
-#endif
+ start = *iter;
r = verity_for_io_block(v, io, iter, &wait);
if (unlikely(r < 0))
return r;
@@ -579,13 +634,16 @@ static int verity_verify_io(struct dm_verity_io *io)
if (v->validated_blocks)
set_bit(cur_block, v->validated_blocks);
continue;
- } else if (static_branch_unlikely(&use_tasklet_enabled) &&
- io->in_tasklet) {
+ } else if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
/*
* Error handling code (FEC included) cannot be run in a
* tasklet since it may sleep, so fallback to work-queue.
*/
return -EAGAIN;
+ } else if (verity_recheck(v, io, start, cur_block) == 0) {
+ if (v->validated_blocks)
+ set_bit(cur_block, v->validated_blocks);
+ continue;
#if defined(CONFIG_DM_VERITY_FEC)
} else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
cur_block, NULL, &start) == 0) {
@@ -630,7 +688,7 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
bio->bi_end_io = io->orig_bi_end_io;
bio->bi_status = status;
- if (!static_branch_unlikely(&use_tasklet_enabled) || !io->in_tasklet)
+ if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh)
verity_fec_finish_io(io);
bio_endio(bio);
@@ -640,17 +698,17 @@ static void verity_work(struct work_struct *w)
{
struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
- io->in_tasklet = false;
+ io->in_bh = false;
verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
}
-static void verity_tasklet(unsigned long data)
+static void verity_bh_work(struct work_struct *w)
{
- struct dm_verity_io *io = (struct dm_verity_io *)data;
+ struct dm_verity_io *io = container_of(w, struct dm_verity_io, bh_work);
int err;
- io->in_tasklet = true;
+ io->in_bh = true;
err = verity_verify_io(io);
if (err == -EAGAIN || err == -ENOMEM) {
/* fallback to retrying with work-queue */
@@ -674,9 +732,9 @@ static void verity_end_io(struct bio *bio)
return;
}
- if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet) {
- tasklet_init(&io->tasklet, verity_tasklet, (unsigned long)io);
- tasklet_schedule(&io->tasklet);
+ if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq) {
+ INIT_WORK(&io->bh_work, verity_bh_work);
+ queue_work(system_bh_wq, &io->bh_work);
} else {
INIT_WORK(&io->work, verity_work);
queue_work(io->v->verify_wq, &io->work);
@@ -718,14 +776,16 @@ static void verity_prefetch_io(struct work_struct *work)
hash_block_end = v->hash_blocks - 1;
}
no_prefetch_cluster:
- dm_bufio_prefetch(v->bufio, hash_block_start,
- hash_block_end - hash_block_start + 1);
+ dm_bufio_prefetch_with_ioprio(v->bufio, hash_block_start,
+ hash_block_end - hash_block_start + 1,
+ pw->ioprio);
}
kfree(pw);
}
-static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
+static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io,
+ unsigned short ioprio)
{
sector_t block = io->block;
unsigned int n_blocks = io->n_blocks;
@@ -753,6 +813,7 @@ static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
pw->v = v;
pw->block = block;
pw->n_blocks = n_blocks;
+ pw->ioprio = ioprio;
queue_work(v->verify_wq, &pw->work);
}
@@ -795,7 +856,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
verity_fec_init_io(io);
- verity_submit_prefetch(v, io);
+ verity_submit_prefetch(v, io, bio_prio(bio));
submit_bio_noacct(bio);
@@ -844,7 +905,7 @@ static void verity_status(struct dm_target *ti, status_type_t type,
args++;
if (v->validated_blocks)
args++;
- if (v->use_tasklet)
+ if (v->use_bh_wq)
args++;
if (v->signature_key_desc)
args += DM_VERITY_ROOT_HASH_VERIFICATION_OPTS;
@@ -871,7 +932,7 @@ static void verity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES);
if (v->validated_blocks)
DMEMIT(" " DM_VERITY_OPT_AT_MOST_ONCE);
- if (v->use_tasklet)
+ if (v->use_bh_wq)
DMEMIT(" " DM_VERITY_OPT_TASKLET_VERIFY);
sz = verity_fec_status_table(v, sz, result, maxlen);
if (v->signature_key_desc)
@@ -963,6 +1024,10 @@ static void verity_dtr(struct dm_target *ti)
if (v->verify_wq)
destroy_workqueue(v->verify_wq);
+ mempool_exit(&v->recheck_pool);
+ if (v->io)
+ dm_io_client_destroy(v->io);
+
if (v->bufio)
dm_bufio_client_destroy(v->bufio);
@@ -986,8 +1051,8 @@ static void verity_dtr(struct dm_target *ti)
kfree(v->signature_key_desc);
- if (v->use_tasklet)
- static_branch_dec(&use_tasklet_enabled);
+ if (v->use_bh_wq)
+ static_branch_dec(&use_bh_wq_enabled);
kfree(v);
@@ -1121,8 +1186,8 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
continue;
} else if (!strcasecmp(arg_name, DM_VERITY_OPT_TASKLET_VERIFY)) {
- v->use_tasklet = true;
- static_branch_inc(&use_tasklet_enabled);
+ v->use_bh_wq = true;
+ static_branch_inc(&use_bh_wq_enabled);
continue;
} else if (verity_is_fec_opt_arg(arg_name)) {
@@ -1293,7 +1358,7 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
v->tfm = crypto_alloc_ahash(v->alg_name, 0,
- v->use_tasklet ? CRYPTO_ALG_ASYNC : 0);
+ v->use_bh_wq ? CRYPTO_ALG_ASYNC : 0);
if (IS_ERR(v->tfm)) {
ti->error = "Cannot initialize hash function";
r = PTR_ERR(v->tfm);
@@ -1401,10 +1466,24 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
v->hash_blocks = hash_position;
+ r = mempool_init_page_pool(&v->recheck_pool, 1, 0);
+ if (unlikely(r)) {
+ ti->error = "Cannot allocate mempool";
+ goto bad;
+ }
+
+ v->io = dm_io_client_create();
+ if (IS_ERR(v->io)) {
+ r = PTR_ERR(v->io);
+ v->io = NULL;
+ ti->error = "Cannot allocate dm io";
+ goto bad;
+ }
+
v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
dm_bufio_alloc_callback, NULL,
- v->use_tasklet ? DM_BUFIO_CLIENT_NO_SLEEP : 0);
+ v->use_bh_wq ? DM_BUFIO_CLIENT_NO_SLEEP : 0);
if (IS_ERR(v->bufio)) {
ti->error = "Cannot initialize dm-bufio";
r = PTR_ERR(v->bufio);
@@ -1423,7 +1502,7 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
* reducing wait times when reading from a dm-verity device.
*
* Also as required for the "try_verify_in_tasklet" feature: WQ_HIGHPRI
- * allows verify_wq to preempt softirq since verification in tasklet
+ * allows verify_wq to preempt softirq since verification in BH workqueue
* will fall-back to using it for error handling (or if the bufio cache
* doesn't have required hashes).
*/
@@ -1507,8 +1586,8 @@ int dm_verity_get_root_digest(struct dm_target *ti, u8 **root_digest, unsigned i
static struct target_type verity_target = {
.name = "verity",
- .features = DM_TARGET_IMMUTABLE,
- .version = {1, 9, 0},
+ .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
+ .version = {1, 10, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index f9d522c870e6..20b1bcf03474 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -11,6 +11,7 @@
#ifndef DM_VERITY_H
#define DM_VERITY_H
+#include <linux/dm-io.h>
#include <linux/dm-bufio.h>
#include <linux/device-mapper.h>
#include <linux/interrupt.h>
@@ -53,7 +54,7 @@ struct dm_verity {
unsigned char levels; /* the number of tree levels */
unsigned char version;
bool hash_failed:1; /* set if hash of any block failed */
- bool use_tasklet:1; /* try to verify in tasklet before work-queue */
+ bool use_bh_wq:1; /* try to verify in BH wq before normal work-queue */
unsigned int digest_size; /* digest size for the current hash algorithm */
unsigned int ahash_reqsize;/* the size of temporary space for crypto */
enum verity_mode mode; /* mode for handling verification errors */
@@ -68,6 +69,9 @@ struct dm_verity {
unsigned long *validated_blocks; /* bitset blocks validated */
char *signature_key_desc; /* signature keyring reference */
+
+ struct dm_io_client *io;
+ mempool_t recheck_pool;
};
struct dm_verity_io {
@@ -76,14 +80,16 @@ struct dm_verity_io {
/* original value of bio->bi_end_io */
bio_end_io_t *orig_bi_end_io;
+ struct bvec_iter iter;
+
sector_t block;
unsigned int n_blocks;
- bool in_tasklet;
-
- struct bvec_iter iter;
+ bool in_bh;
struct work_struct work;
- struct tasklet_struct tasklet;
+ struct work_struct bh_work;
+
+ char *recheck_buffer;
/*
* Three variably-size fields follow this struct:
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 074cb785eafc..7ce8847b3404 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -299,7 +299,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
long i;
wc->memory_map = NULL;
- pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL);
+ pages = vmalloc_array(p, sizeof(struct page *));
if (!pages) {
r = -ENOMEM;
goto err2;
@@ -330,7 +330,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
r = -ENOMEM;
goto err3;
}
- kvfree(pages);
+ vfree(pages);
wc->memory_vmapped = true;
}
@@ -341,7 +341,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
return 0;
err3:
- kvfree(pages);
+ vfree(pages);
err2:
dax_read_unlock(id);
err1:
@@ -531,7 +531,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
req.notify.context = &endio;
/* writing via async dm-io (implied by notify.fn above) won't return an error */
- (void) dm_io(&req, 1, &region, NULL);
+ (void) dm_io(&req, 1, &region, NULL, IOPRIO_DEFAULT);
i = j;
}
@@ -568,7 +568,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc)
req.notify.fn = NULL;
req.notify.context = NULL;
- r = dm_io(&req, 1, &region, NULL);
+ r = dm_io(&req, 1, &region, NULL, IOPRIO_DEFAULT);
if (unlikely(r))
writecache_error(wc, r, "error writing superblock");
}
@@ -596,7 +596,7 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
req.client = wc->dm_io;
req.notify.fn = NULL;
- r = dm_io(&req, 1, &region, NULL);
+ r = dm_io(&req, 1, &region, NULL, IOPRIO_DEFAULT);
if (unlikely(r))
writecache_error(wc, r, "error flushing metadata: %d", r);
}
@@ -962,7 +962,7 @@ static int writecache_alloc_entries(struct dm_writecache *wc)
if (wc->entries)
return 0;
- wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks));
+ wc->entries = vmalloc_array(wc->n_blocks, sizeof(struct wc_entry));
if (!wc->entries)
return -ENOMEM;
for (b = 0; b < wc->n_blocks; b++) {
@@ -990,7 +990,7 @@ static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors
req.client = wc->dm_io;
req.notify.fn = NULL;
- return dm_io(&req, 1, &region, NULL);
+ return dm_io(&req, 1, &region, NULL, IOPRIO_DEFAULT);
}
static void writecache_resume(struct dm_target *ti)
@@ -2776,5 +2776,5 @@ static struct target_type writecache_target = {
module_dm(writecache);
MODULE_DESCRIPTION(DM_NAME " writecache target");
-MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
+MODULE_AUTHOR("Mikulas Patocka <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index fdfe30f7b697..8156881a31de 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -1655,10 +1655,13 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
struct dmz_dev *dev = zone->dev;
+ unsigned int noio_flag;
+ noio_flag = memalloc_noio_save();
ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET,
dmz_start_sect(zmd, zone),
- zmd->zone_nr_sectors, GFP_NOIO);
+ zmd->zone_nr_sectors);
+ memalloc_noio_restore(noio_flag);
if (ret) {
dmz_dev_err(dev, "Reset zone %u failed %d",
zone->id, ret);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8dcabf84d866..06263b0a7b58 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -726,7 +726,8 @@ static struct table_device *open_table_device(struct mapped_device *md,
dev_t dev, blk_mode_t mode)
{
struct table_device *td;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
+ struct block_device *bdev;
u64 part_off;
int r;
@@ -735,34 +736,36 @@ static struct table_device *open_table_device(struct mapped_device *md,
return ERR_PTR(-ENOMEM);
refcount_set(&td->count, 1);
- bdev_handle = bdev_open_by_dev(dev, mode, _dm_claim_ptr, NULL);
- if (IS_ERR(bdev_handle)) {
- r = PTR_ERR(bdev_handle);
+ bdev_file = bdev_file_open_by_dev(dev, mode, _dm_claim_ptr, NULL);
+ if (IS_ERR(bdev_file)) {
+ r = PTR_ERR(bdev_file);
goto out_free_td;
}
+ bdev = file_bdev(bdev_file);
+
/*
* We can be called before the dm disk is added. In that case we can't
* register the holder relation here. It will be done once add_disk was
* called.
*/
if (md->disk->slave_dir) {
- r = bd_link_disk_holder(bdev_handle->bdev, md->disk);
+ r = bd_link_disk_holder(bdev, md->disk);
if (r)
goto out_blkdev_put;
}
td->dm_dev.mode = mode;
- td->dm_dev.bdev = bdev_handle->bdev;
- td->dm_dev.bdev_handle = bdev_handle;
- td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev_handle->bdev, &part_off,
+ td->dm_dev.bdev = bdev;
+ td->dm_dev.bdev_file = bdev_file;
+ td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off,
NULL, NULL);
format_dev_t(td->dm_dev.name, dev);
list_add(&td->list, &md->table_devices);
return td;
out_blkdev_put:
- bdev_release(bdev_handle);
+ fput(bdev_file);
out_free_td:
kfree(td);
return ERR_PTR(r);
@@ -775,7 +778,7 @@ static void close_table_device(struct table_device *td, struct mapped_device *md
{
if (md->disk->slave_dir)
bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
- bdev_release(td->dm_dev.bdev_handle);
+ fput(td->dm_dev.bdev_file);
put_dax(td->dm_dev.dax_dev);
list_del(&td->list);
kfree(td);
@@ -2098,8 +2101,8 @@ static struct mapped_device *alloc_dev(int minor)
* established. If request-based table is loaded: blk-mq will
* override accordingly.
*/
- md->disk = blk_alloc_disk(md->numa_node_id);
- if (!md->disk)
+ md->disk = blk_alloc_disk(NULL, md->numa_node_id);
+ if (IS_ERR(md->disk))
goto bad;
md->queue = md->disk->queue;
@@ -2945,6 +2948,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned int suspend
static void __dm_internal_resume(struct mapped_device *md)
{
+ int r;
+ struct dm_table *map;
+
BUG_ON(!md->internal_suspend_count);
if (--md->internal_suspend_count)
@@ -2953,12 +2959,23 @@ static void __dm_internal_resume(struct mapped_device *md)
if (dm_suspended_md(md))
goto done; /* resume from nested suspend */
- /*
- * NOTE: existing callers don't need to call dm_table_resume_targets
- * (which may fail -- so best to avoid it for now by passing NULL map)
- */
- (void) __dm_resume(md, NULL);
-
+ map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
+ r = __dm_resume(md, map);
+ if (r) {
+ /*
+ * If a preresume method of some target failed, we are in a
+ * tricky situation. We can't return an error to the caller. We
+ * can't fake success because then the "resume" and
+ * "postsuspend" methods would not be paired correctly, and it
+ * would break various targets, for example it would cause list
+ * corruption in the "origin" target.
+ *
+ * So, we fake normal suspend here, to make sure that the
+ * "resume" and "postsuspend" methods will be paired correctly.
+ */
+ DMERR("Preresume method failed: %d", r);
+ set_bit(DMF_SUSPENDED, &md->flags);
+ }
done:
clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
smp_mb__after_atomic();
@@ -3512,5 +3529,5 @@ module_param(swap_bios, int, 0644);
MODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs");
MODULE_DESCRIPTION(DM_NAME " driver");
-MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 9672f75c3050..059afc24c08b 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -234,7 +234,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
sector_t doff;
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
- if (pg_index == store->file_pages - 1) {
+ /* we compare length (page numbers), not page offset. */
+ if ((pg_index - store->sb_index) == store->file_pages - 1) {
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
if (last_page_size == 0)
@@ -438,8 +439,8 @@ static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index,
struct page *page = store->filemap[pg_index];
if (mddev_is_clustered(bitmap->mddev)) {
- pg_index += bitmap->cluster_slot *
- DIV_ROUND_UP(store->bytes, PAGE_SIZE);
+ /* go to node bitmap area starting point */
+ pg_index += store->sb_index;
}
if (store->file)
@@ -952,6 +953,7 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
unsigned long index = file_page_index(store, chunk);
unsigned long node_offset = 0;
+ index += store->sb_index;
if (mddev_is_clustered(bitmap->mddev))
node_offset = bitmap->cluster_slot * store->file_pages;
@@ -982,6 +984,7 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
unsigned long index = file_page_index(store, chunk);
unsigned long node_offset = 0;
+ index += store->sb_index;
if (mddev_is_clustered(bitmap->mddev))
node_offset = bitmap->cluster_slot * store->file_pages;
@@ -1043,9 +1046,8 @@ void md_bitmap_unplug(struct bitmap *bitmap)
if (dirty || need_write) {
if (!writing) {
md_bitmap_wait_writes(bitmap);
- if (bitmap->mddev->queue)
- blk_add_trace_msg(bitmap->mddev->queue,
- "md bitmap_unplug");
+ mddev_add_trace_msg(bitmap->mddev,
+ "md bitmap_unplug");
}
clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
filemap_write_page(bitmap, i, false);
@@ -1316,9 +1318,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
}
bitmap->allclean = 1;
- if (bitmap->mddev->queue)
- blk_add_trace_msg(bitmap->mddev->queue,
- "md bitmap_daemon_work");
+ mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work");
/* Any file-page which is PENDING now needs to be written.
* So set NEEDWRITE now, then after we make any last-minute changes
diff --git a/drivers/md/md-linear.h b/drivers/md/md-linear.h
deleted file mode 100644
index 5587eeedb882..000000000000
--- a/drivers/md/md-linear.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINEAR_H
-#define _LINEAR_H
-
-struct dev_info {
- struct md_rdev *rdev;
- sector_t end_sector;
-};
-
-struct linear_conf
-{
- struct rcu_head rcu;
- sector_t array_sectors;
- int raid_disks; /* a copy of mddev->raid_disks */
- struct dev_info disks[] __counted_by(raid_disks);
-};
-#endif
diff --git a/drivers/md/md-multipath.h b/drivers/md/md-multipath.h
deleted file mode 100644
index b3099e5fc4d7..000000000000
--- a/drivers/md/md-multipath.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MULTIPATH_H
-#define _MULTIPATH_H
-
-struct multipath_info {
- struct md_rdev *rdev;
-};
-
-struct mpconf {
- struct mddev *mddev;
- struct multipath_info *multipaths;
- int raid_disks;
- spinlock_t device_lock;
- struct list_head retry_list;
-
- mempool_t pool;
-};
-
-/*
- * this is our 'private' 'collective' MULTIPATH buffer head.
- * it contains information about what kind of IO operations were started
- * for this MULTIPATH operation, and about their status:
- */
-
-struct multipath_bh {
- struct mddev *mddev;
- struct bio *master_bio;
- struct bio bio;
- int path;
- struct list_head retry_list;
-};
-#endif
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2266358d8074..e575e74aabf5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -65,7 +65,6 @@
#include <linux/percpu-refcount.h>
#include <linux/part_stat.h>
-#include <trace/events/block.h>
#include "md.h"
#include "md-bitmap.h"
#include "md-cluster.h"
@@ -99,18 +98,6 @@ static void mddev_detach(struct mddev *mddev);
static void export_rdev(struct md_rdev *rdev, struct mddev *mddev);
static void md_wakeup_thread_directly(struct md_thread __rcu *thread);
-enum md_ro_state {
- MD_RDWR,
- MD_RDONLY,
- MD_AUTO_READ,
- MD_MAX_STATE
-};
-
-static bool md_is_rdwr(struct mddev *mddev)
-{
- return (mddev->ro == MD_RDWR);
-}
-
/*
* Default number of read corrections we'll attempt on an rdev
* before ejecting it from the array. We divide the read error
@@ -378,7 +365,7 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio)
return true;
}
-void md_handle_request(struct mddev *mddev, struct bio *bio)
+bool md_handle_request(struct mddev *mddev, struct bio *bio)
{
check_suspended:
if (is_suspended(mddev, bio)) {
@@ -386,7 +373,7 @@ check_suspended:
/* Bail out if REQ_NOWAIT is set for the bio */
if (bio->bi_opf & REQ_NOWAIT) {
bio_wouldblock_error(bio);
- return;
+ return true;
}
for (;;) {
prepare_to_wait(&mddev->sb_wait, &__wait,
@@ -402,10 +389,13 @@ check_suspended:
if (!mddev->pers->make_request(mddev, bio)) {
percpu_ref_put(&mddev->active_io);
+ if (!mddev->gendisk && mddev->pers->prepare_suspend)
+ return false;
goto check_suspended;
}
percpu_ref_put(&mddev->active_io);
+ return true;
}
EXPORT_SYMBOL(md_handle_request);
@@ -529,6 +519,24 @@ void mddev_resume(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(mddev_resume);
+/* sync bdev before setting device to readonly or stopping raid*/
+static int mddev_set_closing_and_sync_blockdev(struct mddev *mddev, int opener_num)
+{
+ mutex_lock(&mddev->open_mutex);
+ if (mddev->pers && atomic_read(&mddev->openers) > opener_num) {
+ mutex_unlock(&mddev->open_mutex);
+ return -EBUSY;
+ }
+ if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
+ mutex_unlock(&mddev->open_mutex);
+ return -EBUSY;
+ }
+ mutex_unlock(&mddev->open_mutex);
+
+ sync_blockdev(mddev->gendisk->part0);
+ return 0;
+}
+
/*
* Generic flush handling for md
*/
@@ -579,8 +587,12 @@ static void submit_flushes(struct work_struct *ws)
rcu_read_lock();
}
rcu_read_unlock();
- if (atomic_dec_and_test(&mddev->flush_pending))
+ if (atomic_dec_and_test(&mddev->flush_pending)) {
+ /* The pair is percpu_ref_get() from md_flush_request() */
+ percpu_ref_put(&mddev->active_io);
+
queue_work(md_wq, &mddev->flush_work);
+ }
}
static void md_submit_flush_data(struct work_struct *ws)
@@ -2402,7 +2414,7 @@ int md_integrity_register(struct mddev *mddev)
if (list_empty(&mddev->disks))
return 0; /* nothing to do */
- if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
+ if (mddev_is_dm(mddev) || blk_get_integrity(mddev->gendisk))
return 0; /* shouldn't register, or already is */
rdev_for_each(rdev, mddev) {
/* skip spares and non-functional disks */
@@ -2455,7 +2467,7 @@ int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
{
struct blk_integrity *bi_mddev;
- if (!mddev->gendisk)
+ if (mddev_is_dm(mddev))
return 0;
bi_mddev = blk_get_integrity(mddev->gendisk);
@@ -2562,6 +2574,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
fail:
pr_warn("md: failed to register dev-%s for %s\n",
b, mdname(mddev));
+ mddev_destroy_serial_pool(mddev, rdev);
return err;
}
@@ -2578,7 +2591,7 @@ static void export_rdev(struct md_rdev *rdev, struct mddev *mddev)
if (test_bit(AutoDetected, &rdev->flags))
md_autodetect_dev(rdev->bdev->bd_dev);
#endif
- bdev_release(rdev->bdev_handle);
+ fput(rdev->bdev_file);
rdev->bdev = NULL;
kobject_put(&rdev->kobj);
}
@@ -2591,7 +2604,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
list_del_rcu(&rdev->same_set);
pr_debug("md: unbind<%pg>\n", rdev->bdev);
mddev_destroy_serial_pool(rdev->mddev, rdev);
- rdev->mddev = NULL;
+ WRITE_ONCE(rdev->mddev, NULL);
sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state);
sysfs_put(rdev->sysfs_unack_badblocks);
@@ -2847,8 +2860,7 @@ repeat:
pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
mdname(mddev), mddev->in_sync);
- if (mddev->queue)
- blk_add_trace_msg(mddev->queue, "md md_update_sb");
+ mddev_add_trace_msg(mddev, "md md_update_sb");
rewrite:
md_bitmap_update_sb(mddev->bitmap);
rdev_for_each(rdev, mddev) {
@@ -2929,7 +2941,6 @@ static int add_bound_rdev(struct md_rdev *rdev)
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_new_event();
- md_wakeup_thread(mddev->thread);
return 0;
}
@@ -3044,10 +3055,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
if (err == 0) {
md_kick_rdev_from_array(rdev);
- if (mddev->pers) {
+ if (mddev->pers)
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
- md_wakeup_thread(mddev->thread);
- }
md_new_event();
}
}
@@ -3077,7 +3086,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
clear_bit(BlockedBadBlocks, &rdev->flags);
wake_up(&rdev->blocked_wait);
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
- md_wakeup_thread(rdev->mddev->thread);
err = 0;
} else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
@@ -3115,7 +3123,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
!test_bit(Replacement, &rdev->flags))
set_bit(WantReplacement, &rdev->flags);
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
- md_wakeup_thread(rdev->mddev->thread);
err = 0;
} else if (cmd_match(buf, "-want_replacement")) {
/* Clearing 'want_replacement' is always allowed.
@@ -3245,7 +3252,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
if (rdev->raid_disk >= 0)
return -EBUSY;
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
- md_wakeup_thread(rdev->mddev->thread);
} else if (rdev->mddev->pers) {
/* Activating a spare .. or possibly reactivating
* if we ever get bitmaps working here.
@@ -3339,8 +3345,7 @@ static ssize_t new_offset_store(struct md_rdev *rdev,
if (kstrtoull(buf, 10, &new_offset) < 0)
return -EINVAL;
- if (mddev->sync_thread ||
- test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
if (new_offset == rdev->data_offset)
/* reset is always permitted */
@@ -3671,7 +3676,7 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
struct kernfs_node *kn = NULL;
bool suspend = false;
ssize_t rv;
- struct mddev *mddev = rdev->mddev;
+ struct mddev *mddev = READ_ONCE(rdev->mddev);
if (!entry->store)
return -EIO;
@@ -3773,16 +3778,16 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
if (err)
goto out_clear_rdev;
- rdev->bdev_handle = bdev_open_by_dev(newdev,
+ rdev->bdev_file = bdev_file_open_by_dev(newdev,
BLK_OPEN_READ | BLK_OPEN_WRITE,
super_format == -2 ? &claim_rdev : rdev, NULL);
- if (IS_ERR(rdev->bdev_handle)) {
+ if (IS_ERR(rdev->bdev_file)) {
pr_warn("md: could not open device unknown-block(%u,%u).\n",
MAJOR(newdev), MINOR(newdev));
- err = PTR_ERR(rdev->bdev_handle);
+ err = PTR_ERR(rdev->bdev_file);
goto out_clear_rdev;
}
- rdev->bdev = rdev->bdev_handle->bdev;
+ rdev->bdev = file_bdev(rdev->bdev_file);
kobject_init(&rdev->kobj, &rdev_ktype);
@@ -3813,7 +3818,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
return rdev;
out_blkdev_put:
- bdev_release(rdev->bdev_handle);
+ fput(rdev->bdev_file);
out_clear_rdev:
md_rdev_clear(rdev);
out_free_rdev:
@@ -4013,8 +4018,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
*/
rv = -EBUSY;
- if (mddev->sync_thread ||
- test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
mddev->reshape_position != MaxSector ||
mddev->sysfs_active)
goto out_unlock;
@@ -4164,7 +4168,6 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->in_sync = 1;
del_timer_sync(&mddev->safemode_timer);
}
- blk_set_stacking_limits(&mddev->queue->limits);
pers->run(mddev);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
if (!mddev->thread)
@@ -4471,8 +4474,8 @@ array_state_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", array_states[st]);
}
-static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
-static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
+static int do_md_stop(struct mddev *mddev, int ro);
+static int md_set_readonly(struct mddev *mddev);
static int restart_array(struct mddev *mddev);
static ssize_t
@@ -4489,6 +4492,17 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
case broken: /* cannot be set */
case bad_word:
return -EINVAL;
+ case clear:
+ case readonly:
+ case inactive:
+ case read_auto:
+ if (!mddev->pers || !md_is_rdwr(mddev))
+ break;
+ /* write sysfs will not open mddev and opener should be 0 */
+ err = mddev_set_closing_and_sync_blockdev(mddev, 0);
+ if (err)
+ return err;
+ break;
default:
break;
}
@@ -4522,14 +4536,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
case inactive:
/* stop an active array, return 0 otherwise */
if (mddev->pers)
- err = do_md_stop(mddev, 2, NULL);
+ err = do_md_stop(mddev, 2);
break;
case clear:
- err = do_md_stop(mddev, 0, NULL);
+ err = do_md_stop(mddev, 0);
break;
case readonly:
if (mddev->pers)
- err = md_set_readonly(mddev, NULL);
+ err = md_set_readonly(mddev);
else {
mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
@@ -4539,7 +4553,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
case read_auto:
if (mddev->pers) {
if (md_is_rdwr(mddev))
- err = md_set_readonly(mddev, NULL);
+ err = md_set_readonly(mddev);
else if (mddev->ro == MD_RDONLY)
err = restart_array(mddev);
if (err == 0) {
@@ -4588,6 +4602,11 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
sysfs_notify_dirent_safe(mddev->sysfs_state);
}
mddev_unlock(mddev);
+
+ if (st == readonly || st == read_auto || st == inactive ||
+ (err && st == clear))
+ clear_bit(MD_CLOSING, &mddev->flags);
+
return err ?: len;
}
static struct md_sysfs_entry md_array_state =
@@ -4915,6 +4934,35 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
mddev_lock_nointr(mddev);
}
+void md_idle_sync_thread(struct mddev *mddev)
+{
+ lockdep_assert_held(&mddev->reconfig_mutex);
+
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ stop_sync_thread(mddev, true, true);
+}
+EXPORT_SYMBOL_GPL(md_idle_sync_thread);
+
+void md_frozen_sync_thread(struct mddev *mddev)
+{
+ lockdep_assert_held(&mddev->reconfig_mutex);
+
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ stop_sync_thread(mddev, true, false);
+}
+EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
+
+void md_unfrozen_sync_thread(struct mddev *mddev)
+{
+ lockdep_assert_held(&mddev->reconfig_mutex);
+
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ sysfs_notify_dirent_safe(mddev->sysfs_action);
+}
+EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
+
static void idle_sync_thread(struct mddev *mddev)
{
mutex_lock(&mddev->sync_mutex);
@@ -5706,6 +5754,51 @@ static const struct kobj_type md_ktype = {
int mdp_major = 0;
+/* stack the limit for all rdevs into lim */
+void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim)
+{
+ struct md_rdev *rdev;
+
+ rdev_for_each(rdev, mddev) {
+ queue_limits_stack_bdev(lim, rdev->bdev, rdev->data_offset,
+ mddev->gendisk->disk_name);
+ }
+}
+EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits);
+
+/* apply the extra stacking limits from a new rdev into mddev */
+int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev)
+{
+ struct queue_limits lim;
+
+ if (mddev_is_dm(mddev))
+ return 0;
+
+ lim = queue_limits_start_update(mddev->gendisk->queue);
+ queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset,
+ mddev->gendisk->disk_name);
+ return queue_limits_commit_update(mddev->gendisk->queue, &lim);
+}
+EXPORT_SYMBOL_GPL(mddev_stack_new_rdev);
+
+/* update the optimal I/O size after a reshape */
+void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes)
+{
+ struct queue_limits lim;
+
+ if (mddev_is_dm(mddev))
+ return;
+
+ /* don't bother updating io_opt if we can't suspend the array */
+ if (mddev_suspend(mddev, false) < 0)
+ return;
+ lim = queue_limits_start_update(mddev->gendisk->queue);
+ lim.io_opt = lim.io_min * nr_stripes;
+ queue_limits_commit_update(mddev->gendisk->queue, &lim);
+ mddev_resume(mddev);
+}
+EXPORT_SYMBOL_GPL(mddev_update_io_opt);
+
static void mddev_delayed_delete(struct work_struct *ws)
{
struct mddev *mddev = container_of(ws, struct mddev, del_work);
@@ -5770,10 +5863,11 @@ struct mddev *md_alloc(dev_t dev, char *name)
*/
mddev->hold_active = UNTIL_STOP;
- error = -ENOMEM;
- disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!disk)
+ disk = blk_alloc_disk(NULL, NUMA_NO_NODE);
+ if (IS_ERR(disk)) {
+ error = PTR_ERR(disk);
goto out_free_mddev;
+ }
disk->major = MAJOR(mddev->unit);
disk->first_minor = unit << shift;
@@ -5787,9 +5881,7 @@ struct mddev *md_alloc(dev_t dev, char *name)
disk->fops = &md_fops;
disk->private_data = mddev;
- mddev->queue = disk->queue;
- blk_set_stacking_limits(&mddev->queue->limits);
- blk_queue_write_cache(mddev->queue, true, true);
+ blk_queue_write_cache(disk->queue, true, true);
disk->events |= DISK_EVENT_MEDIA_CHANGE;
mddev->gendisk = disk;
error = add_disk(disk);
@@ -5931,7 +6023,7 @@ int md_run(struct mddev *mddev)
invalidate_bdev(rdev->bdev);
if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) {
mddev->ro = MD_RDONLY;
- if (mddev->gendisk)
+ if (!mddev_is_dm(mddev))
set_disk_ro(mddev->gendisk, 1);
}
@@ -6034,7 +6126,10 @@ int md_run(struct mddev *mddev)
pr_warn("True protection against single-disk failure might be compromised.\n");
}
- mddev->recovery = 0;
+ /* dm-raid expect sync_thread to be frozen until resume */
+ if (mddev->gendisk)
+ mddev->recovery = 0;
+
/* may be over-ridden by personality */
mddev->resync_max_sectors = mddev->dev_sectors;
@@ -6090,7 +6185,8 @@ int md_run(struct mddev *mddev)
}
}
- if (mddev->queue) {
+ if (!mddev_is_dm(mddev)) {
+ struct request_queue *q = mddev->gendisk->queue;
bool nonrot = true;
rdev_for_each(rdev, mddev) {
@@ -6102,14 +6198,14 @@ int md_run(struct mddev *mddev)
if (mddev->degraded)
nonrot = false;
if (nonrot)
- blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
else
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
- blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q);
/* Set the NOWAIT flags if all underlying devices support it */
if (nowait)
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
}
if (pers->sync_request) {
if (mddev->kobj.sd &&
@@ -6188,7 +6284,6 @@ int do_md_run(struct mddev *mddev)
/* run start up tasks that require md_thread */
md_start(mddev);
- md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
@@ -6209,7 +6304,6 @@ int md_start(struct mddev *mddev)
if (mddev->pers->start) {
set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
ret = mddev->pers->start(mddev);
clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
md_wakeup_thread(mddev->sync_thread);
@@ -6254,7 +6348,6 @@ static int restart_array(struct mddev *mddev)
pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
/* Kick recovery or resync if necessary */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread);
sysfs_notify_dirent_safe(mddev->sysfs_state);
return 0;
@@ -6274,7 +6367,15 @@ static void md_clean(struct mddev *mddev)
mddev->persistent = 0;
mddev->level = LEVEL_NONE;
mddev->clevel[0] = 0;
- mddev->flags = 0;
+ /*
+ * Don't clear MD_CLOSING, or mddev can be opened again.
+ * 'hold_active != 0' means mddev is still in the creation
+ * process and will be used later.
+ */
+ if (mddev->hold_active)
+ mddev->flags = 0;
+ else
+ mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
mddev->sb_flags = 0;
mddev->ro = MD_RDWR;
mddev->metadata_type[0] = 0;
@@ -6311,7 +6412,6 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
- stop_sync_thread(mddev, true, false);
del_timer_sync(&mddev->safemode_timer);
if (mddev->pers && mddev->pers->quiesce) {
@@ -6336,6 +6436,8 @@ static void __md_stop_writes(struct mddev *mddev)
void md_stop_writes(struct mddev *mddev)
{
mddev_lock_nointr(mddev);
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ stop_sync_thread(mddev, true, false);
__md_stop_writes(mddev);
mddev_unlock(mddev);
}
@@ -6349,8 +6451,10 @@ static void mddev_detach(struct mddev *mddev)
mddev->pers->quiesce(mddev, 0);
}
md_unregister_thread(mddev, &mddev->thread);
- if (mddev->queue)
- blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
+
+ /* the unplug fn references 'conf' */
+ if (!mddev_is_dm(mddev))
+ blk_sync_queue(mddev->gendisk->queue);
}
static void __md_stop(struct mddev *mddev)
@@ -6387,7 +6491,8 @@ void md_stop(struct mddev *mddev)
EXPORT_SYMBOL_GPL(md_stop);
-static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
+/* ensure 'mddev->pers' exist before calling md_set_readonly() */
+static int md_set_readonly(struct mddev *mddev)
{
int err = 0;
int did_freeze = 0;
@@ -6398,7 +6503,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
}
stop_sync_thread(mddev, false, false);
@@ -6406,36 +6510,29 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
- mutex_lock(&mddev->open_mutex);
- if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
- mddev->sync_thread ||
- test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
pr_warn("md: %s still in use.\n",mdname(mddev));
err = -EBUSY;
goto out;
}
- if (mddev->pers) {
- __md_stop_writes(mddev);
-
- if (mddev->ro == MD_RDONLY) {
- err = -ENXIO;
- goto out;
- }
+ __md_stop_writes(mddev);
- mddev->ro = MD_RDONLY;
- set_disk_ro(mddev->gendisk, 1);
+ if (mddev->ro == MD_RDONLY) {
+ err = -ENXIO;
+ goto out;
}
+ mddev->ro = MD_RDONLY;
+ set_disk_ro(mddev->gendisk, 1);
+
out:
- if ((mddev->pers && !err) || did_freeze) {
+ if (!err || did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
sysfs_notify_dirent_safe(mddev->sysfs_state);
}
- mutex_unlock(&mddev->open_mutex);
return err;
}
@@ -6443,8 +6540,7 @@ out:
* 0 - completely stop and dis-assemble array
* 2 - stop but do not disassemble array
*/
-static int do_md_stop(struct mddev *mddev, int mode,
- struct block_device *bdev)
+static int do_md_stop(struct mddev *mddev, int mode)
{
struct gendisk *disk = mddev->gendisk;
struct md_rdev *rdev;
@@ -6453,22 +6549,16 @@ static int do_md_stop(struct mddev *mddev, int mode,
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
}
stop_sync_thread(mddev, true, false);
- mutex_lock(&mddev->open_mutex);
- if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
- mddev->sysfs_active ||
- mddev->sync_thread ||
+ if (mddev->sysfs_active ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
pr_warn("md: %s still in use.\n",mdname(mddev));
- mutex_unlock(&mddev->open_mutex);
if (did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
}
return -EBUSY;
}
@@ -6487,13 +6577,11 @@ static int do_md_stop(struct mddev *mddev, int mode,
sysfs_unlink_rdev(mddev, rdev);
set_capacity_and_notify(disk, 0);
- mutex_unlock(&mddev->open_mutex);
mddev->changed = 1;
if (!md_is_rdwr(mddev))
mddev->ro = MD_RDWR;
- } else
- mutex_unlock(&mddev->open_mutex);
+ }
/*
* Free resources if final stop
*/
@@ -6539,7 +6627,7 @@ static void autorun_array(struct mddev *mddev)
err = do_md_run(mddev);
if (err) {
pr_warn("md: do_md_run() returned %d\n", err);
- do_md_stop(mddev, 0, NULL);
+ do_md_stop(mddev, 0);
}
}
@@ -7009,9 +7097,7 @@ kick_rdev:
md_kick_rdev_from_array(rdev);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
- if (mddev->thread)
- md_wakeup_thread(mddev->thread);
- else
+ if (!mddev->thread)
md_update_sb(mddev, 1);
md_new_event();
@@ -7086,14 +7172,13 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
if (!bdev_nowait(rdev->bdev)) {
pr_info("%s: Disabling nowait because %pg does not support nowait\n",
mdname(mddev), rdev->bdev);
- blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->gendisk->queue);
}
/*
* Kick recovery, maybe this spare has to be added to the
* array immediately.
*/
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
md_new_event();
return 0;
@@ -7307,8 +7392,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
* of each device. If num_sectors is zero, we find the largest size
* that fits.
*/
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
- mddev->sync_thread)
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
if (!md_is_rdwr(mddev))
return -EROFS;
@@ -7325,10 +7409,9 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
if (!rv) {
if (mddev_is_clustered(mddev))
md_cluster_ops->update_size(mddev, old_dev_sectors);
- else if (mddev->queue) {
+ else if (!mddev_is_dm(mddev))
set_capacity_and_notify(mddev->gendisk,
mddev->array_sectors);
- }
}
return rv;
}
@@ -7345,8 +7428,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
if (raid_disks <= 0 ||
(mddev->max_disks && raid_disks >= mddev->max_disks))
return -EINVAL;
- if (mddev->sync_thread ||
- test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
mddev->reshape_position != MaxSector)
return -EBUSY;
@@ -7542,16 +7624,17 @@ static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
-static inline bool md_ioctl_valid(unsigned int cmd)
+static inline int md_ioctl_valid(unsigned int cmd)
{
switch (cmd) {
- case ADD_NEW_DISK:
case GET_ARRAY_INFO:
- case GET_BITMAP_FILE:
case GET_DISK_INFO:
+ case RAID_VERSION:
+ return 0;
+ case ADD_NEW_DISK:
+ case GET_BITMAP_FILE:
case HOT_ADD_DISK:
case HOT_REMOVE_DISK:
- case RAID_VERSION:
case RESTART_ARRAY_RW:
case RUN_ARRAY:
case SET_ARRAY_INFO:
@@ -7560,9 +7643,11 @@ static inline bool md_ioctl_valid(unsigned int cmd)
case STOP_ARRAY:
case STOP_ARRAY_RO:
case CLUSTERED_DISK_NACK:
- return true;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ return 0;
default:
- return false;
+ return -ENOTTY;
}
}
@@ -7620,31 +7705,17 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
int err = 0;
void __user *argp = (void __user *)arg;
struct mddev *mddev = NULL;
- bool did_set_md_closing = false;
-
- if (!md_ioctl_valid(cmd))
- return -ENOTTY;
- switch (cmd) {
- case RAID_VERSION:
- case GET_ARRAY_INFO:
- case GET_DISK_INFO:
- break;
- default:
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- }
+ err = md_ioctl_valid(cmd);
+ if (err)
+ return err;
/*
* Commands dealing with the RAID driver but not any
* particular array:
*/
- switch (cmd) {
- case RAID_VERSION:
- err = get_version(argp);
- goto out;
- default:;
- }
+ if (cmd == RAID_VERSION)
+ return get_version(argp);
/*
* Commands creating/starting a new array:
@@ -7652,35 +7723,23 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
mddev = bdev->bd_disk->private_data;
- if (!mddev) {
- BUG();
- goto out;
- }
-
/* Some actions do not requires the mutex */
switch (cmd) {
case GET_ARRAY_INFO:
if (!mddev->raid_disks && !mddev->external)
- err = -ENODEV;
- else
- err = get_array_info(mddev, argp);
- goto out;
+ return -ENODEV;
+ return get_array_info(mddev, argp);
case GET_DISK_INFO:
if (!mddev->raid_disks && !mddev->external)
- err = -ENODEV;
- else
- err = get_disk_info(mddev, argp);
- goto out;
+ return -ENODEV;
+ return get_disk_info(mddev, argp);
case SET_DISK_FAULTY:
- err = set_disk_faulty(mddev, new_decode_dev(arg));
- goto out;
+ return set_disk_faulty(mddev, new_decode_dev(arg));
case GET_BITMAP_FILE:
- err = get_bitmap_file(mddev, argp);
- goto out;
-
+ return get_bitmap_file(mddev, argp);
}
if (cmd == HOT_REMOVE_DISK)
@@ -7693,20 +7752,9 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
/* Need to flush page cache, and ensure no-one else opens
* and writes
*/
- mutex_lock(&mddev->open_mutex);
- if (mddev->pers && atomic_read(&mddev->openers) > 1) {
- mutex_unlock(&mddev->open_mutex);
- err = -EBUSY;
- goto out;
- }
- if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
- mutex_unlock(&mddev->open_mutex);
- err = -EBUSY;
- goto out;
- }
- did_set_md_closing = true;
- mutex_unlock(&mddev->open_mutex);
- sync_blockdev(bdev);
+ err = mddev_set_closing_and_sync_blockdev(mddev, 1);
+ if (err)
+ return err;
}
if (!md_is_rdwr(mddev))
@@ -7747,11 +7795,12 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
goto unlock;
case STOP_ARRAY:
- err = do_md_stop(mddev, 0, bdev);
+ err = do_md_stop(mddev, 0);
goto unlock;
case STOP_ARRAY_RO:
- err = md_set_readonly(mddev, bdev);
+ if (mddev->pers)
+ err = md_set_readonly(mddev);
goto unlock;
case HOT_REMOVE_DISK:
@@ -7846,7 +7895,7 @@ unlock:
mddev_unlock(mddev);
out:
- if(did_set_md_closing)
+ if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY))
clear_bit(MD_CLOSING, &mddev->flags);
return err;
}
@@ -8683,10 +8732,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
bio_chain(discard_bio, bio);
bio_clone_blkg_association(discard_bio, bio);
- if (mddev->gendisk)
- trace_block_bio_remap(discard_bio,
- disk_devt(mddev->gendisk),
- bio->bi_iter.bi_sector);
+ mddev_trace_remap(mddev, discard_bio, bio->bi_iter.bi_sector);
submit_bio_noacct(discard_bio);
}
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
@@ -8733,6 +8779,23 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
}
EXPORT_SYMBOL_GPL(md_account_bio);
+void md_free_cloned_bio(struct bio *bio)
+{
+ struct md_io_clone *md_io_clone = bio->bi_private;
+ struct bio *orig_bio = md_io_clone->orig_bio;
+ struct mddev *mddev = md_io_clone->mddev;
+
+ if (bio->bi_status && !orig_bio->bi_status)
+ orig_bio->bi_status = bio->bi_status;
+
+ if (md_io_clone->start_time)
+ bio_end_io_acct(orig_bio, md_io_clone->start_time);
+
+ bio_put(bio);
+ percpu_ref_put(&mddev->active_io);
+}
+EXPORT_SYMBOL_GPL(md_free_cloned_bio);
+
/* md_allow_write(mddev)
* Calling this ensures that the array is marked 'active' so that writes
* may proceed without blocking. It is important to call this before
@@ -8788,12 +8851,16 @@ void md_do_sync(struct md_thread *thread)
int ret;
/* just incase thread restarts... */
- if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
- test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
+ if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
return;
- if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */
+
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+ goto skip;
+
+ if (test_bit(MD_RECOVERY_WAIT, &mddev->recovery) ||
+ !md_is_rdwr(mddev)) {/* never try to sync a read-only array */
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- return;
+ goto skip;
}
if (mddev_is_clustered(mddev)) {
@@ -9162,7 +9229,7 @@ void md_do_sync(struct md_thread *thread)
mddev->delta_disks > 0 &&
mddev->pers->finish_reshape &&
mddev->pers->size &&
- mddev->queue) {
+ !mddev_is_dm(mddev)) {
mddev_lock_nointr(mddev);
md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
mddev_unlock(mddev);
@@ -9262,9 +9329,14 @@ static bool md_spares_need_change(struct mddev *mddev)
{
struct md_rdev *rdev;
- rdev_for_each(rdev, mddev)
- if (rdev_removeable(rdev) || rdev_addable(rdev))
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev) {
+ if (rdev_removeable(rdev) || rdev_addable(rdev)) {
+ rcu_read_unlock();
return true;
+ }
+ }
+ rcu_read_unlock();
return false;
}
@@ -9368,13 +9440,19 @@ static void md_start_sync(struct work_struct *ws)
struct mddev *mddev = container_of(ws, struct mddev, sync_work);
int spares = 0;
bool suspend = false;
+ char *name;
- if (md_spares_need_change(mddev))
+ /*
+ * If reshape is still in progress, spares won't be added or removed
+ * from conf until reshape is done.
+ */
+ if (mddev->reshape_position == MaxSector &&
+ md_spares_need_change(mddev)) {
suspend = true;
+ mddev_suspend(mddev, false);
+ }
- suspend ? mddev_suspend_and_lock_nointr(mddev) :
- mddev_lock_nointr(mddev);
-
+ mddev_lock_nointr(mddev);
if (!md_is_rdwr(mddev)) {
/*
* On a read-only array we can:
@@ -9400,8 +9478,10 @@ static void md_start_sync(struct work_struct *ws)
if (spares)
md_bitmap_write_all(mddev->bitmap);
+ name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ?
+ "reshape" : "resync";
rcu_assign_pointer(mddev->sync_thread,
- md_register_thread(md_do_sync, mddev, "resync"));
+ md_register_thread(md_do_sync, mddev, name));
if (!mddev->sync_thread) {
pr_warn("%s: could not start resync thread...\n",
mdname(mddev));
@@ -9445,6 +9525,20 @@ not_running:
sysfs_notify_dirent_safe(mddev->sysfs_action);
}
+static void unregister_sync_thread(struct mddev *mddev)
+{
+ if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
+ /* resync/recovery still happening */
+ clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ return;
+ }
+
+ if (WARN_ON_ONCE(!mddev->sync_thread))
+ return;
+
+ md_reap_sync_thread(mddev);
+}
+
/*
* This routine is regularly called by all per-raid-array threads to
* deal with generic issues like resync and super-block update.
@@ -9469,9 +9563,6 @@ not_running:
*/
void md_check_recovery(struct mddev *mddev)
{
- if (READ_ONCE(mddev->suspended))
- return;
-
if (mddev->bitmap)
md_bitmap_daemon_work(mddev);
@@ -9485,7 +9576,8 @@ void md_check_recovery(struct mddev *mddev)
}
if (!md_is_rdwr(mddev) &&
- !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+ !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) &&
+ !test_bit(MD_RECOVERY_DONE, &mddev->recovery))
return;
if ( ! (
(mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
@@ -9507,8 +9599,7 @@ void md_check_recovery(struct mddev *mddev)
struct md_rdev *rdev;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
- /* sync_work already queued. */
- clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ unregister_sync_thread(mddev);
goto unlock;
}
@@ -9571,16 +9662,7 @@ void md_check_recovery(struct mddev *mddev)
* still set.
*/
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
- if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
- /* resync/recovery still happening */
- clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- goto unlock;
- }
-
- if (WARN_ON_ONCE(!mddev->sync_thread))
- goto unlock;
-
- md_reap_sync_thread(mddev);
+ unregister_sync_thread(mddev);
goto unlock;
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 8d881cc59799..097d9dbd69b8 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -18,6 +18,7 @@
#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <trace/events/block.h>
#include "md-cluster.h"
#define MaxSector (~(sector_t)0)
@@ -59,7 +60,7 @@ struct md_rdev {
*/
struct block_device *meta_bdev;
struct block_device *bdev; /* block device handle */
- struct bdev_handle *bdev_handle; /* Handle from open for bdev */
+ struct file *bdev_file; /* Handle from open for bdev */
struct page *sb_page, *bb_page;
int sb_loaded;
@@ -207,6 +208,7 @@ enum flag_bits {
* check if there is collision between raid1
* serial bios.
*/
+ Nonrot, /* non-rotational device (SSD) */
};
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
@@ -222,6 +224,16 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
}
return 0;
}
+
+static inline int rdev_has_badblock(struct md_rdev *rdev, sector_t s,
+ int sectors)
+{
+ sector_t first_bad;
+ int bad_sectors;
+
+ return is_badblock(rdev, s, sectors, &first_bad, &bad_sectors);
+}
+
extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
@@ -468,7 +480,6 @@ struct mddev {
struct timer_list safemode_timer;
struct percpu_ref writes_pending;
int sync_checkers; /* # of threads checking writes_pending */
- struct request_queue *queue; /* for plugging ... */
struct bitmap *bitmap; /* the bitmap for the device */
struct {
@@ -558,6 +569,37 @@ enum recovery_flags {
MD_RESYNCING_REMOTE, /* remote node is running resync thread */
};
+enum md_ro_state {
+ MD_RDWR,
+ MD_RDONLY,
+ MD_AUTO_READ,
+ MD_MAX_STATE
+};
+
+static inline bool md_is_rdwr(struct mddev *mddev)
+{
+ return (mddev->ro == MD_RDWR);
+}
+
+static inline bool reshape_interrupted(struct mddev *mddev)
+{
+ /* reshape never start */
+ if (mddev->reshape_position == MaxSector)
+ return false;
+
+ /* interrupted */
+ if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ return true;
+
+ /* running reshape will be interrupted soon. */
+ if (test_bit(MD_RECOVERY_WAIT, &mddev->recovery) ||
+ test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
+ test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
+ return true;
+
+ return false;
+}
+
static inline int __must_check mddev_lock(struct mddev *mddev)
{
return mutex_lock_interruptible(&mddev->reconfig_mutex);
@@ -617,6 +659,7 @@ struct md_personality
int (*start_reshape) (struct mddev *mddev);
void (*finish_reshape) (struct mddev *mddev);
void (*update_reshape_pos) (struct mddev *mddev);
+ void (*prepare_suspend) (struct mddev *mddev);
/* quiesce suspends or resumes internal processing.
* 1 - stop new actions and wait for action io to complete
* 0 - return to normal behaviour
@@ -750,6 +793,7 @@ extern void md_finish_reshape(struct mddev *mddev);
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
struct bio *bio, sector_t start, sector_t size);
void md_account_bio(struct mddev *mddev, struct bio **bio);
+void md_free_cloned_bio(struct bio *bio);
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
@@ -778,9 +822,12 @@ extern void md_stop_writes(struct mddev *mddev);
extern int md_rdev_init(struct md_rdev *rdev);
extern void md_rdev_clear(struct md_rdev *rdev);
-extern void md_handle_request(struct mddev *mddev, struct bio *bio);
+extern bool md_handle_request(struct mddev *mddev, struct bio *bio);
extern int mddev_suspend(struct mddev *mddev, bool interruptible);
extern void mddev_resume(struct mddev *mddev);
+extern void md_idle_sync_thread(struct mddev *mddev);
+extern void md_frozen_sync_thread(struct mddev *mddev);
+extern void md_unfrozen_sync_thread(struct mddev *mddev);
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
extern void md_update_sb(struct mddev *mddev, int force);
@@ -821,7 +868,7 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
{
if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
!bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors)
- mddev->queue->limits.max_write_zeroes_sectors = 0;
+ mddev->gendisk->queue->limits.max_write_zeroes_sectors = 0;
}
static inline int mddev_suspend_and_lock(struct mddev *mddev)
@@ -860,7 +907,31 @@ void md_autostart_arrays(int part);
int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
int do_md_run(struct mddev *mddev);
+void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim);
+int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev);
+void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes);
extern const struct block_device_operations md_fops;
+/*
+ * MD devices can be used undeneath by DM, in which case ->gendisk is NULL.
+ */
+static inline bool mddev_is_dm(struct mddev *mddev)
+{
+ return !mddev->gendisk;
+}
+
+static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio,
+ sector_t sector)
+{
+ if (!mddev_is_dm(mddev))
+ trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector);
+}
+
+#define mddev_add_trace_msg(mddev, fmt, args...) \
+do { \
+ if (!mddev_is_dm(mddev)) \
+ blk_add_trace_msg((mddev)->gendisk->queue, fmt, ##args); \
+} while (0)
+
#endif /* _MD_MD_H */
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 0e010e1204aa..b17b54df673b 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -656,7 +656,7 @@ EXPORT_SYMBOL_GPL(dm_bm_checksum);
/*----------------------------------------------------------------*/
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>");
MODULE_DESCRIPTION("Immutable metadata library for dm");
/*----------------------------------------------------------------*/
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c50a7abda744..c5d4aeb68404 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -379,6 +379,19 @@ static void raid0_free(struct mddev *mddev, void *priv)
free_conf(mddev, conf);
}
+static int raid0_set_limits(struct mddev *mddev)
+{
+ struct queue_limits lim;
+
+ blk_set_stacking_limits(&lim);
+ lim.max_hw_sectors = mddev->chunk_sectors;
+ lim.max_write_zeroes_sectors = mddev->chunk_sectors;
+ lim.io_min = mddev->chunk_sectors << 9;
+ lim.io_opt = lim.io_min * mddev->raid_disks;
+ mddev_stack_rdev_limits(mddev, &lim);
+ return queue_limits_set(mddev->gendisk->queue, &lim);
+}
+
static int raid0_run(struct mddev *mddev)
{
struct r0conf *conf;
@@ -399,20 +412,10 @@ static int raid0_run(struct mddev *mddev)
mddev->private = conf;
}
conf = mddev->private;
- if (mddev->queue) {
- struct md_rdev *rdev;
-
- blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
- blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
-
- blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
- blk_queue_io_opt(mddev->queue,
- (mddev->chunk_sectors << 9) * mddev->raid_disks);
-
- rdev_for_each(rdev, mddev) {
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
- }
+ if (!mddev_is_dm(mddev)) {
+ ret = raid0_set_limits(mddev);
+ if (ret)
+ goto out_free_conf;
}
/* calculate array device size */
@@ -426,8 +429,10 @@ static int raid0_run(struct mddev *mddev)
ret = md_integrity_register(mddev);
if (ret)
- free_conf(mddev, conf);
-
+ goto out_free_conf;
+ return 0;
+out_free_conf:
+ free_conf(mddev, conf);
return ret;
}
@@ -578,10 +583,7 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
bio_set_dev(bio, tmp_dev->bdev);
bio->bi_iter.bi_sector = sector + zone->dev_start +
tmp_dev->data_offset;
-
- if (mddev->gendisk)
- trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
- bio_sector);
+ mddev_trace_remap(mddev, bio, bio_sector);
mddev_check_write_zeroes(mddev, bio);
submit_bio_noacct(bio);
}
diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index 512746551f36..2ea1710a3b70 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -227,3 +227,72 @@ static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev)
return false;
}
+
+/**
+ * raid1_check_read_range() - check a given read range for bad blocks,
+ * available read length is returned;
+ * @rdev: the rdev to read;
+ * @this_sector: read position;
+ * @len: read length;
+ *
+ * helper function for read_balance()
+ *
+ * 1) If there are no bad blocks in the range, @len is returned;
+ * 2) If the range are all bad blocks, 0 is returned;
+ * 3) If there are partial bad blocks:
+ * - If the bad block range starts after @this_sector, the length of first
+ * good region is returned;
+ * - If the bad block range starts before @this_sector, 0 is returned and
+ * the @len is updated to the offset into the region before we get to the
+ * good blocks;
+ */
+static inline int raid1_check_read_range(struct md_rdev *rdev,
+ sector_t this_sector, int *len)
+{
+ sector_t first_bad;
+ int bad_sectors;
+
+ /* no bad block overlap */
+ if (!is_badblock(rdev, this_sector, *len, &first_bad, &bad_sectors))
+ return *len;
+
+ /*
+ * bad block range starts offset into our range so we can return the
+ * number of sectors before the bad blocks start.
+ */
+ if (first_bad > this_sector)
+ return first_bad - this_sector;
+
+ /* read range is fully consumed by bad blocks. */
+ if (this_sector + *len <= first_bad + bad_sectors)
+ return 0;
+
+ /*
+ * final case, bad block range starts before or at the start of our
+ * range but does not cover our entire range so we still return 0 but
+ * update the length with the number of sectors before we get to the
+ * good ones.
+ */
+ *len = first_bad + bad_sectors - this_sector;
+ return 0;
+}
+
+/*
+ * Check if read should choose the first rdev.
+ *
+ * Balance on the whole device if no resync is going on (recovery is ok) or
+ * below the resync window. Otherwise, take the first readable disk.
+ */
+static inline bool raid1_should_read_first(struct mddev *mddev,
+ sector_t this_sector, int len)
+{
+ if ((mddev->recovery_cp < this_sector + len))
+ return true;
+
+ if (mddev_is_clustered(mddev) &&
+ md_cluster_ops->area_resyncing(mddev, READ, this_sector,
+ this_sector + len))
+ return true;
+
+ return false;
+}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 24f0d799fd98..be8ac24f50b6 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -46,9 +46,6 @@
static void allow_barrier(struct r1conf *conf, sector_t sector_nr);
static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
-#define raid1_log(md, fmt, args...) \
- do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
-
#define RAID_1_10_NAME "raid1"
#include "raid1-10.c"
@@ -498,9 +495,6 @@ static void raid1_end_write_request(struct bio *bio)
* to user-side. So if something waits for IO, then it
* will wait for the 'master' bio.
*/
- sector_t first_bad;
- int bad_sectors;
-
r1_bio->bios[mirror] = NULL;
to_put = bio;
/*
@@ -516,8 +510,8 @@ static void raid1_end_write_request(struct bio *bio)
set_bit(R1BIO_Uptodate, &r1_bio->state);
/* Maybe we can clear some bad blocks. */
- if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors,
- &first_bad, &bad_sectors) && !discard_error) {
+ if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) &&
+ !discard_error) {
r1_bio->bios[mirror] = IO_MADE_GOOD;
set_bit(R1BIO_MadeGood, &r1_bio->state);
}
@@ -582,211 +576,312 @@ static sector_t align_to_barrier_unit_end(sector_t start_sector,
return len;
}
-/*
- * This routine returns the disk from which the requested read should
- * be done. There is a per-array 'next expected sequential IO' sector
- * number - if this matches on the next IO then we use the last disk.
- * There is also a per-disk 'last know head position' sector that is
- * maintained from IRQ contexts, both the normal and the resync IO
- * completion handlers update this position correctly. If there is no
- * perfect sequential match then we pick the disk whose head is closest.
- *
- * If there are 2 mirrors in the same 2 devices, performance degrades
- * because position is mirror, not device based.
- *
- * The rdev for the device selected will have nr_pending incremented.
- */
-static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors)
+static void update_read_sectors(struct r1conf *conf, int disk,
+ sector_t this_sector, int len)
{
- const sector_t this_sector = r1_bio->sector;
- int sectors;
- int best_good_sectors;
- int best_disk, best_dist_disk, best_pending_disk;
- int has_nonrot_disk;
+ struct raid1_info *info = &conf->mirrors[disk];
+
+ atomic_inc(&info->rdev->nr_pending);
+ if (info->next_seq_sect != this_sector)
+ info->seq_start = this_sector;
+ info->next_seq_sect = this_sector + len;
+}
+
+static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+ int *max_sectors)
+{
+ sector_t this_sector = r1_bio->sector;
+ int len = r1_bio->sectors;
int disk;
- sector_t best_dist;
- unsigned int min_pending;
- struct md_rdev *rdev;
- int choose_first;
- int choose_next_idle;
- /*
- * Check if we can balance. We can balance on the whole
- * device if no resync is going on, or below the resync window.
- * We take the first readable disk when above the resync window.
- */
- retry:
- sectors = r1_bio->sectors;
- best_disk = -1;
- best_dist_disk = -1;
- best_dist = MaxSector;
- best_pending_disk = -1;
- min_pending = UINT_MAX;
- best_good_sectors = 0;
- has_nonrot_disk = 0;
- choose_next_idle = 0;
- clear_bit(R1BIO_FailFast, &r1_bio->state);
+ for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
+ struct md_rdev *rdev;
+ int read_len;
- if ((conf->mddev->recovery_cp < this_sector + sectors) ||
- (mddev_is_clustered(conf->mddev) &&
- md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
- this_sector + sectors)))
- choose_first = 1;
- else
- choose_first = 0;
+ if (r1_bio->bios[disk] == IO_BLOCKED)
+ continue;
+
+ rdev = conf->mirrors[disk].rdev;
+ if (!rdev || test_bit(Faulty, &rdev->flags))
+ continue;
+
+ /* choose the first disk even if it has some bad blocks. */
+ read_len = raid1_check_read_range(rdev, this_sector, &len);
+ if (read_len > 0) {
+ update_read_sectors(conf, disk, this_sector, read_len);
+ *max_sectors = read_len;
+ return disk;
+ }
+ }
+
+ return -1;
+}
+
+static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+ int *max_sectors)
+{
+ sector_t this_sector = r1_bio->sector;
+ int best_disk = -1;
+ int best_len = 0;
+ int disk;
for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
- sector_t dist;
- sector_t first_bad;
- int bad_sectors;
- unsigned int pending;
- bool nonrot;
+ struct md_rdev *rdev;
+ int len;
+ int read_len;
+
+ if (r1_bio->bios[disk] == IO_BLOCKED)
+ continue;
rdev = conf->mirrors[disk].rdev;
- if (r1_bio->bios[disk] == IO_BLOCKED
- || rdev == NULL
- || test_bit(Faulty, &rdev->flags))
+ if (!rdev || test_bit(Faulty, &rdev->flags) ||
+ test_bit(WriteMostly, &rdev->flags))
continue;
- if (!test_bit(In_sync, &rdev->flags) &&
- rdev->recovery_offset < this_sector + sectors)
+
+ /* keep track of the disk with the most readable sectors. */
+ len = r1_bio->sectors;
+ read_len = raid1_check_read_range(rdev, this_sector, &len);
+ if (read_len > best_len) {
+ best_disk = disk;
+ best_len = read_len;
+ }
+ }
+
+ if (best_disk != -1) {
+ *max_sectors = best_len;
+ update_read_sectors(conf, best_disk, this_sector, best_len);
+ }
+
+ return best_disk;
+}
+
+static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+ int *max_sectors)
+{
+ sector_t this_sector = r1_bio->sector;
+ int bb_disk = -1;
+ int bb_read_len = 0;
+ int disk;
+
+ for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
+ struct md_rdev *rdev;
+ int len;
+ int read_len;
+
+ if (r1_bio->bios[disk] == IO_BLOCKED)
continue;
- if (test_bit(WriteMostly, &rdev->flags)) {
- /* Don't balance among write-mostly, just
- * use the first as a last resort */
- if (best_dist_disk < 0) {
- if (is_badblock(rdev, this_sector, sectors,
- &first_bad, &bad_sectors)) {
- if (first_bad <= this_sector)
- /* Cannot use this */
- continue;
- best_good_sectors = first_bad - this_sector;
- } else
- best_good_sectors = sectors;
- best_dist_disk = disk;
- best_pending_disk = disk;
- }
+
+ rdev = conf->mirrors[disk].rdev;
+ if (!rdev || test_bit(Faulty, &rdev->flags) ||
+ !test_bit(WriteMostly, &rdev->flags))
continue;
+
+ /* there are no bad blocks, we can use this disk */
+ len = r1_bio->sectors;
+ read_len = raid1_check_read_range(rdev, this_sector, &len);
+ if (read_len == r1_bio->sectors) {
+ update_read_sectors(conf, disk, this_sector, read_len);
+ return disk;
}
- /* This is a reasonable device to use. It might
- * even be best.
+
+ /*
+ * there are partial bad blocks, choose the rdev with largest
+ * read length.
*/
- if (is_badblock(rdev, this_sector, sectors,
- &first_bad, &bad_sectors)) {
- if (best_dist < MaxSector)
- /* already have a better device */
- continue;
- if (first_bad <= this_sector) {
- /* cannot read here. If this is the 'primary'
- * device, then we must not read beyond
- * bad_sectors from another device..
- */
- bad_sectors -= (this_sector - first_bad);
- if (choose_first && sectors > bad_sectors)
- sectors = bad_sectors;
- if (best_good_sectors > sectors)
- best_good_sectors = sectors;
-
- } else {
- sector_t good_sectors = first_bad - this_sector;
- if (good_sectors > best_good_sectors) {
- best_good_sectors = good_sectors;
- best_disk = disk;
- }
- if (choose_first)
- break;
- }
- continue;
- } else {
- if ((sectors > best_good_sectors) && (best_disk >= 0))
- best_disk = -1;
- best_good_sectors = sectors;
+ if (read_len > bb_read_len) {
+ bb_disk = disk;
+ bb_read_len = read_len;
}
+ }
+
+ if (bb_disk != -1) {
+ *max_sectors = bb_read_len;
+ update_read_sectors(conf, bb_disk, this_sector, bb_read_len);
+ }
+
+ return bb_disk;
+}
+
+static bool is_sequential(struct r1conf *conf, int disk, struct r1bio *r1_bio)
+{
+ /* TODO: address issues with this check and concurrency. */
+ return conf->mirrors[disk].next_seq_sect == r1_bio->sector ||
+ conf->mirrors[disk].head_position == r1_bio->sector;
+}
+
+/*
+ * If buffered sequential IO size exceeds optimal iosize, check if there is idle
+ * disk. If yes, choose the idle disk.
+ */
+static bool should_choose_next(struct r1conf *conf, int disk)
+{
+ struct raid1_info *mirror = &conf->mirrors[disk];
+ int opt_iosize;
+
+ if (!test_bit(Nonrot, &mirror->rdev->flags))
+ return false;
+
+ opt_iosize = bdev_io_opt(mirror->rdev->bdev) >> 9;
+ return opt_iosize > 0 && mirror->seq_start != MaxSector &&
+ mirror->next_seq_sect > opt_iosize &&
+ mirror->next_seq_sect - opt_iosize >= mirror->seq_start;
+}
+
+static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio)
+{
+ if (!rdev || test_bit(Faulty, &rdev->flags))
+ return false;
+
+ /* still in recovery */
+ if (!test_bit(In_sync, &rdev->flags) &&
+ rdev->recovery_offset < r1_bio->sector + r1_bio->sectors)
+ return false;
+
+ /* don't read from slow disk unless have to */
+ if (test_bit(WriteMostly, &rdev->flags))
+ return false;
+
+ /* don't split IO for bad blocks unless have to */
+ if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors))
+ return false;
+
+ return true;
+}
+
+struct read_balance_ctl {
+ sector_t closest_dist;
+ int closest_dist_disk;
+ int min_pending;
+ int min_pending_disk;
+ int sequential_disk;
+ int readable_disks;
+};
+
+static int choose_best_rdev(struct r1conf *conf, struct r1bio *r1_bio)
+{
+ int disk;
+ struct read_balance_ctl ctl = {
+ .closest_dist_disk = -1,
+ .closest_dist = MaxSector,
+ .min_pending_disk = -1,
+ .min_pending = UINT_MAX,
+ .sequential_disk = -1,
+ };
+
+ for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
+ struct md_rdev *rdev;
+ sector_t dist;
+ unsigned int pending;
- if (best_disk >= 0)
- /* At least two disks to choose from so failfast is OK */
+ if (r1_bio->bios[disk] == IO_BLOCKED)
+ continue;
+
+ rdev = conf->mirrors[disk].rdev;
+ if (!rdev_readable(rdev, r1_bio))
+ continue;
+
+ /* At least two disks to choose from so failfast is OK */
+ if (ctl.readable_disks++ == 1)
set_bit(R1BIO_FailFast, &r1_bio->state);
- nonrot = bdev_nonrot(rdev->bdev);
- has_nonrot_disk |= nonrot;
pending = atomic_read(&rdev->nr_pending);
- dist = abs(this_sector - conf->mirrors[disk].head_position);
- if (choose_first) {
- best_disk = disk;
- break;
- }
+ dist = abs(r1_bio->sector - conf->mirrors[disk].head_position);
+
/* Don't change to another disk for sequential reads */
- if (conf->mirrors[disk].next_seq_sect == this_sector
- || dist == 0) {
- int opt_iosize = bdev_io_opt(rdev->bdev) >> 9;
- struct raid1_info *mirror = &conf->mirrors[disk];
+ if (is_sequential(conf, disk, r1_bio)) {
+ if (!should_choose_next(conf, disk))
+ return disk;
- best_disk = disk;
/*
- * If buffered sequential IO size exceeds optimal
- * iosize, check if there is idle disk. If yes, choose
- * the idle disk. read_balance could already choose an
- * idle disk before noticing it's a sequential IO in
- * this disk. This doesn't matter because this disk
- * will idle, next time it will be utilized after the
- * first disk has IO size exceeds optimal iosize. In
- * this way, iosize of the first disk will be optimal
- * iosize at least. iosize of the second disk might be
- * small, but not a big deal since when the second disk
- * starts IO, the first disk is likely still busy.
+ * Add 'pending' to avoid choosing this disk if
+ * there is other idle disk.
*/
- if (nonrot && opt_iosize > 0 &&
- mirror->seq_start != MaxSector &&
- mirror->next_seq_sect > opt_iosize &&
- mirror->next_seq_sect - opt_iosize >=
- mirror->seq_start) {
- choose_next_idle = 1;
- continue;
- }
- break;
+ pending++;
+ /*
+ * If there is no other idle disk, this disk
+ * will be chosen.
+ */
+ ctl.sequential_disk = disk;
}
- if (choose_next_idle)
- continue;
-
- if (min_pending > pending) {
- min_pending = pending;
- best_pending_disk = disk;
+ if (ctl.min_pending > pending) {
+ ctl.min_pending = pending;
+ ctl.min_pending_disk = disk;
}
- if (dist < best_dist) {
- best_dist = dist;
- best_dist_disk = disk;
+ if (ctl.closest_dist > dist) {
+ ctl.closest_dist = dist;
+ ctl.closest_dist_disk = disk;
}
}
/*
+ * sequential IO size exceeds optimal iosize, however, there is no other
+ * idle disk, so choose the sequential disk.
+ */
+ if (ctl.sequential_disk != -1 && ctl.min_pending != 0)
+ return ctl.sequential_disk;
+
+ /*
* If all disks are rotational, choose the closest disk. If any disk is
* non-rotational, choose the disk with less pending request even the
* disk is rotational, which might/might not be optimal for raids with
* mixed ratation/non-rotational disks depending on workload.
*/
- if (best_disk == -1) {
- if (has_nonrot_disk || min_pending == 0)
- best_disk = best_pending_disk;
- else
- best_disk = best_dist_disk;
- }
+ if (ctl.min_pending_disk != -1 &&
+ (READ_ONCE(conf->nonrot_disks) || ctl.min_pending == 0))
+ return ctl.min_pending_disk;
+ else
+ return ctl.closest_dist_disk;
+}
- if (best_disk >= 0) {
- rdev = conf->mirrors[best_disk].rdev;
- if (!rdev)
- goto retry;
- atomic_inc(&rdev->nr_pending);
- sectors = best_good_sectors;
+/*
+ * This routine returns the disk from which the requested read should be done.
+ *
+ * 1) If resync is in progress, find the first usable disk and use it even if it
+ * has some bad blocks.
+ *
+ * 2) Now that there is no resync, loop through all disks and skipping slow
+ * disks and disks with bad blocks for now. Only pay attention to key disk
+ * choice.
+ *
+ * 3) If we've made it this far, now look for disks with bad blocks and choose
+ * the one with most number of sectors.
+ *
+ * 4) If we are all the way at the end, we have no choice but to use a disk even
+ * if it is write mostly.
+ *
+ * The rdev for the device selected will have nr_pending incremented.
+ */
+static int read_balance(struct r1conf *conf, struct r1bio *r1_bio,
+ int *max_sectors)
+{
+ int disk;
- if (conf->mirrors[best_disk].next_seq_sect != this_sector)
- conf->mirrors[best_disk].seq_start = this_sector;
+ clear_bit(R1BIO_FailFast, &r1_bio->state);
+
+ if (raid1_should_read_first(conf->mddev, r1_bio->sector,
+ r1_bio->sectors))
+ return choose_first_rdev(conf, r1_bio, max_sectors);
- conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
+ disk = choose_best_rdev(conf, r1_bio);
+ if (disk >= 0) {
+ *max_sectors = r1_bio->sectors;
+ update_read_sectors(conf, disk, r1_bio->sector,
+ r1_bio->sectors);
+ return disk;
}
- *max_sectors = sectors;
- return best_disk;
+ /*
+ * If we are here it means we didn't find a perfectly good disk so
+ * now spend a bit more time trying to find one with the most good
+ * sectors.
+ */
+ disk = choose_bb_rdev(conf, r1_bio, max_sectors);
+ if (disk >= 0)
+ return disk;
+
+ return choose_slow_rdev(conf, r1_bio, max_sectors);
}
static void wake_up_barrier(struct r1conf *conf)
@@ -1098,7 +1193,7 @@ static void freeze_array(struct r1conf *conf, int extra)
*/
spin_lock_irq(&conf->resync_lock);
conf->array_frozen = 1;
- raid1_log(conf->mddev, "wait freeze");
+ mddev_add_trace_msg(conf->mddev, "raid1 wait freeze");
wait_event_lock_irq_cmd(
conf->wait_barrier,
get_unqueued_pending(conf) == extra,
@@ -1287,7 +1382,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
* Reading from a write-mostly device must take care not to
* over-take any writes that are 'behind'
*/
- raid1_log(mddev, "wait behind writes");
+ mddev_add_trace_msg(mddev, "raid1 wait behind writes");
wait_event(bitmap->behind_wait,
atomic_read(&bitmap->behind_writes) == 0);
}
@@ -1320,11 +1415,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
test_bit(R1BIO_FailFast, &r1_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
read_bio->bi_private = r1_bio;
-
- if (mddev->gendisk)
- trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk),
- r1_bio->sector);
-
+ mddev_trace_remap(mddev, read_bio, r1_bio->sector);
submit_bio_noacct(read_bio);
}
@@ -1474,7 +1565,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
bio_wouldblock_error(bio);
return;
}
- raid1_log(mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
+ mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked",
+ blocked_rdev->raid_disk);
md_wait_for_blocked_rdev(blocked_rdev, mddev);
wait_barrier(conf, bio->bi_iter.bi_sector, false);
goto retry_write;
@@ -1557,10 +1649,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
mbio->bi_private = r1_bio;
atomic_inc(&r1_bio->remaining);
-
- if (mddev->gendisk)
- trace_block_bio_remap(mbio, disk_devt(mddev->gendisk),
- r1_bio->sector);
+ mddev_trace_remap(mddev, mbio, r1_bio->sector);
/* flush_pending_writes() needs access to the rdev so...*/
mbio->bi_bdev = (void *)rdev;
if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug, disks)) {
@@ -1760,6 +1849,52 @@ static int raid1_spare_active(struct mddev *mddev)
return count;
}
+static bool raid1_add_conf(struct r1conf *conf, struct md_rdev *rdev, int disk,
+ bool replacement)
+{
+ struct raid1_info *info = conf->mirrors + disk;
+
+ if (replacement)
+ info += conf->raid_disks;
+
+ if (info->rdev)
+ return false;
+
+ if (bdev_nonrot(rdev->bdev)) {
+ set_bit(Nonrot, &rdev->flags);
+ WRITE_ONCE(conf->nonrot_disks, conf->nonrot_disks + 1);
+ }
+
+ rdev->raid_disk = disk;
+ info->head_position = 0;
+ info->seq_start = MaxSector;
+ WRITE_ONCE(info->rdev, rdev);
+
+ return true;
+}
+
+static bool raid1_remove_conf(struct r1conf *conf, int disk)
+{
+ struct raid1_info *info = conf->mirrors + disk;
+ struct md_rdev *rdev = info->rdev;
+
+ if (!rdev || test_bit(In_sync, &rdev->flags) ||
+ atomic_read(&rdev->nr_pending))
+ return false;
+
+ /* Only remove non-faulty devices if recovery is not possible. */
+ if (!test_bit(Faulty, &rdev->flags) &&
+ rdev->mddev->recovery_disabled != conf->recovery_disabled &&
+ rdev->mddev->degraded < conf->raid_disks)
+ return false;
+
+ if (test_and_clear_bit(Nonrot, &rdev->flags))
+ WRITE_ONCE(conf->nonrot_disks, conf->nonrot_disks - 1);
+
+ WRITE_ONCE(info->rdev, NULL);
+ return true;
+}
+
static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
{
struct r1conf *conf = mddev->private;
@@ -1791,19 +1926,16 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
for (mirror = first; mirror <= last; mirror++) {
p = conf->mirrors + mirror;
if (!p->rdev) {
- if (mddev->gendisk)
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ err = mddev_stack_new_rdev(mddev, rdev);
+ if (err)
+ return err;
- p->head_position = 0;
- rdev->raid_disk = mirror;
- err = 0;
+ raid1_add_conf(conf, rdev, mirror, false);
/* As all devices are equivalent, we don't need a full recovery
* if this was recently any drive of the array
*/
if (rdev->saved_raid_disk < 0)
conf->fullsync = 1;
- WRITE_ONCE(p->rdev, rdev);
break;
}
if (test_bit(WantReplacement, &p->rdev->flags) &&
@@ -1813,13 +1945,11 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (err && repl_slot >= 0) {
/* Add this device as a replacement */
- p = conf->mirrors + repl_slot;
clear_bit(In_sync, &rdev->flags);
set_bit(Replacement, &rdev->flags);
- rdev->raid_disk = repl_slot;
+ raid1_add_conf(conf, rdev, repl_slot, true);
err = 0;
conf->fullsync = 1;
- WRITE_ONCE(p[conf->raid_disks].rdev, rdev);
}
print_conf(conf);
@@ -1836,27 +1966,20 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if (unlikely(number >= conf->raid_disks))
goto abort;
- if (rdev != p->rdev)
- p = conf->mirrors + conf->raid_disks + number;
+ if (rdev != p->rdev) {
+ number += conf->raid_disks;
+ p = conf->mirrors + number;
+ }
print_conf(conf);
if (rdev == p->rdev) {
- if (test_bit(In_sync, &rdev->flags) ||
- atomic_read(&rdev->nr_pending)) {
+ if (!raid1_remove_conf(conf, number)) {
err = -EBUSY;
goto abort;
}
- /* Only remove non-faulty devices if recovery
- * is not possible.
- */
- if (!test_bit(Faulty, &rdev->flags) &&
- mddev->recovery_disabled != conf->recovery_disabled &&
- mddev->degraded < conf->raid_disks) {
- err = -EBUSY;
- goto abort;
- }
- WRITE_ONCE(p->rdev, NULL);
- if (conf->mirrors[conf->raid_disks + number].rdev) {
+
+ if (number < conf->raid_disks &&
+ conf->mirrors[conf->raid_disks + number].rdev) {
/* We just removed a device that is being replaced.
* Move down the replacement. We drain all IO before
* doing this to avoid confusion.
@@ -1944,8 +2067,6 @@ static void end_sync_write(struct bio *bio)
struct r1bio *r1_bio = get_resync_r1bio(bio);
struct mddev *mddev = r1_bio->mddev;
struct r1conf *conf = mddev->private;
- sector_t first_bad;
- int bad_sectors;
struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev;
if (!uptodate) {
@@ -1955,14 +2076,11 @@ static void end_sync_write(struct bio *bio)
set_bit(MD_RECOVERY_NEEDED, &
mddev->recovery);
set_bit(R1BIO_WriteError, &r1_bio->state);
- } else if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors,
- &first_bad, &bad_sectors) &&
- !is_badblock(conf->mirrors[r1_bio->read_disk].rdev,
- r1_bio->sector,
- r1_bio->sectors,
- &first_bad, &bad_sectors)
- )
+ } else if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) &&
+ !rdev_has_badblock(conf->mirrors[r1_bio->read_disk].rdev,
+ r1_bio->sector, r1_bio->sectors)) {
set_bit(R1BIO_MadeGood, &r1_bio->state);
+ }
put_sync_write_buf(r1_bio, uptodate);
}
@@ -2262,7 +2380,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
int sectors = r1_bio->sectors;
int read_disk = r1_bio->read_disk;
struct mddev *mddev = conf->mddev;
- struct md_rdev *rdev = rcu_dereference(conf->mirrors[read_disk].rdev);
+ struct md_rdev *rdev = conf->mirrors[read_disk].rdev;
if (exceed_read_errors(mddev, rdev)) {
r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED;
@@ -2279,16 +2397,12 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
s = PAGE_SIZE >> 9;
do {
- sector_t first_bad;
- int bad_sectors;
-
rdev = conf->mirrors[d].rdev;
if (rdev &&
(test_bit(In_sync, &rdev->flags) ||
(!test_bit(Faulty, &rdev->flags) &&
rdev->recovery_offset >= sect + s)) &&
- is_badblock(rdev, sect, s,
- &first_bad, &bad_sectors) == 0) {
+ rdev_has_badblock(rdev, sect, s) == 0) {
atomic_inc(&rdev->nr_pending);
if (sync_page_io(rdev, sect, s<<9,
conf->tmppage, REQ_OP_READ, false))
@@ -3006,23 +3120,17 @@ static struct r1conf *setup_conf(struct mddev *mddev)
err = -EINVAL;
spin_lock_init(&conf->device_lock);
+ conf->raid_disks = mddev->raid_disks;
rdev_for_each(rdev, mddev) {
int disk_idx = rdev->raid_disk;
- if (disk_idx >= mddev->raid_disks
- || disk_idx < 0)
+
+ if (disk_idx >= conf->raid_disks || disk_idx < 0)
continue;
- if (test_bit(Replacement, &rdev->flags))
- disk = conf->mirrors + mddev->raid_disks + disk_idx;
- else
- disk = conf->mirrors + disk_idx;
- if (disk->rdev)
+ if (!raid1_add_conf(conf, rdev, disk_idx,
+ test_bit(Replacement, &rdev->flags)))
goto abort;
- disk->rdev = rdev;
- disk->head_position = 0;
- disk->seq_start = MaxSector;
}
- conf->raid_disks = mddev->raid_disks;
conf->mddev = mddev;
INIT_LIST_HEAD(&conf->retry_list);
INIT_LIST_HEAD(&conf->bio_end_io_list);
@@ -3086,12 +3194,21 @@ static struct r1conf *setup_conf(struct mddev *mddev)
return ERR_PTR(err);
}
+static int raid1_set_limits(struct mddev *mddev)
+{
+ struct queue_limits lim;
+
+ blk_set_stacking_limits(&lim);
+ lim.max_write_zeroes_sectors = 0;
+ mddev_stack_rdev_limits(mddev, &lim);
+ return queue_limits_set(mddev->gendisk->queue, &lim);
+}
+
static void raid1_free(struct mddev *mddev, void *priv);
static int raid1_run(struct mddev *mddev)
{
struct r1conf *conf;
int i;
- struct md_rdev *rdev;
int ret;
if (mddev->level != 1) {
@@ -3118,14 +3235,10 @@ static int raid1_run(struct mddev *mddev)
if (IS_ERR(conf))
return PTR_ERR(conf);
- if (mddev->queue)
- blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
-
- rdev_for_each(rdev, mddev) {
- if (!mddev->gendisk)
- continue;
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ if (!mddev_is_dm(mddev)) {
+ ret = raid1_set_limits(mddev);
+ if (ret)
+ goto abort;
}
mddev->degraded = 0;
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index 14d4211a123a..5300cbaa58a4 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -71,6 +71,7 @@ struct r1conf {
* allow for replacements.
*/
int raid_disks;
+ int nonrot_disks;
spinlock_t device_lock;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7412066ea22c..a4556d2e46bf 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -76,9 +76,6 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
static void end_reshape_write(struct bio *bio);
static void end_reshape(struct r10conf *conf);
-#define raid10_log(md, fmt, args...) \
- do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0)
-
#include "raid1-10.c"
#define NULL_CMD
@@ -518,11 +515,7 @@ static void raid10_end_write_request(struct bio *bio)
* The 'master' represents the composite IO operation to
* user-side. So if something waits for IO, then it will
* wait for the 'master' bio.
- */
- sector_t first_bad;
- int bad_sectors;
-
- /*
+ *
* Do not set R10BIO_Uptodate if the current device is
* rebuilding or Faulty. This is because we cannot use
* such device for properly reading the data back (we could
@@ -535,10 +528,9 @@ static void raid10_end_write_request(struct bio *bio)
set_bit(R10BIO_Uptodate, &r10_bio->state);
/* Maybe we can clear some bad blocks. */
- if (is_badblock(rdev,
- r10_bio->devs[slot].addr,
- r10_bio->sectors,
- &first_bad, &bad_sectors) && !discard_error) {
+ if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr,
+ r10_bio->sectors) &&
+ !discard_error) {
bio_put(bio);
if (repl)
r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;
@@ -753,17 +745,8 @@ static struct md_rdev *read_balance(struct r10conf *conf,
best_good_sectors = 0;
do_balance = 1;
clear_bit(R10BIO_FailFast, &r10_bio->state);
- /*
- * Check if we can balance. We can balance on the whole
- * device if no resync is going on (recovery is ok), or below
- * the resync window. We take the first readable disk when
- * above the resync window.
- */
- if ((conf->mddev->recovery_cp < MaxSector
- && (this_sector + sectors >= conf->next_resync)) ||
- (mddev_is_clustered(conf->mddev) &&
- md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
- this_sector + sectors)))
+
+ if (raid1_should_read_first(conf->mddev, this_sector, sectors))
do_balance = 0;
for (slot = 0; slot < conf->copies ; slot++) {
@@ -1033,7 +1016,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
ret = false;
} else {
conf->nr_waiting++;
- raid10_log(conf->mddev, "wait barrier");
+ mddev_add_trace_msg(conf->mddev, "raid10 wait barrier");
wait_event_barrier(conf, stop_waiting_barrier(conf));
conf->nr_waiting--;
}
@@ -1152,7 +1135,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
bio_wouldblock_error(bio);
return false;
}
- raid10_log(conf->mddev, "wait reshape");
+ mddev_add_trace_msg(conf->mddev, "raid10 wait reshape");
wait_event(conf->wait_barrier,
conf->reshape_progress <= bio->bi_iter.bi_sector ||
conf->reshape_progress >= bio->bi_iter.bi_sector +
@@ -1249,10 +1232,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
test_bit(R10BIO_FailFast, &r10_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
read_bio->bi_private = r10_bio;
-
- if (mddev->gendisk)
- trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk),
- r10_bio->sector);
+ mddev_trace_remap(mddev, read_bio, r10_bio->sector);
submit_bio_noacct(read_bio);
return;
}
@@ -1288,10 +1268,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
&& enough(conf, devnum))
mbio->bi_opf |= MD_FAILFAST;
mbio->bi_private = r10_bio;
-
- if (conf->mddev->gendisk)
- trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk),
- r10_bio->sector);
+ mddev_trace_remap(mddev, mbio, r10_bio->sector);
/* flush_pending_writes() needs access to the rdev so...*/
mbio->bi_bdev = (void *)rdev;
@@ -1330,10 +1307,7 @@ retry_wait:
}
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
- sector_t first_bad;
sector_t dev_sector = r10_bio->devs[i].addr;
- int bad_sectors;
- int is_bad;
/*
* Discard request doesn't care the write result
@@ -1342,9 +1316,8 @@ retry_wait:
if (!r10_bio->sectors)
continue;
- is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors,
- &first_bad, &bad_sectors);
- if (is_bad < 0) {
+ if (rdev_has_badblock(rdev, dev_sector,
+ r10_bio->sectors) < 0) {
/*
* Mustn't write here until the bad block
* is acknowledged
@@ -1360,8 +1333,9 @@ retry_wait:
if (unlikely(blocked_rdev)) {
/* Have to wait for this device to get unblocked, then retry */
allow_barrier(conf);
- raid10_log(conf->mddev, "%s wait rdev %d blocked",
- __func__, blocked_rdev->raid_disk);
+ mddev_add_trace_msg(conf->mddev,
+ "raid10 %s wait rdev %d blocked",
+ __func__, blocked_rdev->raid_disk);
md_wait_for_blocked_rdev(blocked_rdev, mddev);
wait_barrier(conf, false);
goto retry_wait;
@@ -1416,7 +1390,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
bio_wouldblock_error(bio);
return;
}
- raid10_log(conf->mddev, "wait reshape metadata");
+ mddev_add_trace_msg(conf->mddev,
+ "raid10 wait reshape metadata");
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
@@ -2131,10 +2106,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
continue;
}
- if (mddev->gendisk)
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
-
+ err = mddev_stack_new_rdev(mddev, rdev);
+ if (err)
+ return err;
p->head_position = 0;
p->recovery_disabled = mddev->recovery_disabled - 1;
rdev->raid_disk = mirror;
@@ -2150,10 +2124,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
clear_bit(In_sync, &rdev->flags);
set_bit(Replacement, &rdev->flags);
rdev->raid_disk = repl_slot;
- err = 0;
- if (mddev->gendisk)
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ err = mddev_stack_new_rdev(mddev, rdev);
+ if (err)
+ return err;
conf->fullsync = 1;
WRITE_ONCE(p->replacement, rdev);
}
@@ -2290,8 +2263,6 @@ static void end_sync_write(struct bio *bio)
struct mddev *mddev = r10_bio->mddev;
struct r10conf *conf = mddev->private;
int d;
- sector_t first_bad;
- int bad_sectors;
int slot;
int repl;
struct md_rdev *rdev = NULL;
@@ -2312,11 +2283,10 @@ static void end_sync_write(struct bio *bio)
&rdev->mddev->recovery);
set_bit(R10BIO_WriteError, &r10_bio->state);
}
- } else if (is_badblock(rdev,
- r10_bio->devs[slot].addr,
- r10_bio->sectors,
- &first_bad, &bad_sectors))
+ } else if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr,
+ r10_bio->sectors)) {
set_bit(R10BIO_MadeGood, &r10_bio->state);
+ }
rdev_dec_pending(rdev, mddev);
@@ -2597,11 +2567,8 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
int sectors, struct page *page, enum req_op op)
{
- sector_t first_bad;
- int bad_sectors;
-
- if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
- && (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags)))
+ if (rdev_has_badblock(rdev, sector, sectors) &&
+ (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags)))
return -1;
if (sync_page_io(rdev, sector, sectors << 9, page, op, false))
/* success */
@@ -2658,16 +2625,14 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
s = PAGE_SIZE >> 9;
do {
- sector_t first_bad;
- int bad_sectors;
-
d = r10_bio->devs[sl].devnum;
rdev = conf->mirrors[d].rdev;
if (rdev &&
test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags) &&
- is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
- &first_bad, &bad_sectors) == 0) {
+ rdev_has_badblock(rdev,
+ r10_bio->devs[sl].addr + sect,
+ s) == 0) {
atomic_inc(&rdev->nr_pending);
success = sync_page_io(rdev,
r10_bio->devs[sl].addr +
@@ -4002,14 +3967,26 @@ static struct r10conf *setup_conf(struct mddev *mddev)
return ERR_PTR(err);
}
-static void raid10_set_io_opt(struct r10conf *conf)
+static unsigned int raid10_nr_stripes(struct r10conf *conf)
{
- int raid_disks = conf->geo.raid_disks;
+ unsigned int raid_disks = conf->geo.raid_disks;
+
+ if (conf->geo.raid_disks % conf->geo.near_copies)
+ return raid_disks;
+ return raid_disks / conf->geo.near_copies;
+}
- if (!(conf->geo.raid_disks % conf->geo.near_copies))
- raid_disks /= conf->geo.near_copies;
- blk_queue_io_opt(conf->mddev->queue, (conf->mddev->chunk_sectors << 9) *
- raid_disks);
+static int raid10_set_queue_limits(struct mddev *mddev)
+{
+ struct r10conf *conf = mddev->private;
+ struct queue_limits lim;
+
+ blk_set_stacking_limits(&lim);
+ lim.max_write_zeroes_sectors = 0;
+ lim.io_min = mddev->chunk_sectors << 9;
+ lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
+ mddev_stack_rdev_limits(mddev, &lim);
+ return queue_limits_set(mddev->gendisk->queue, &lim);
}
static int raid10_run(struct mddev *mddev)
@@ -4021,6 +3998,7 @@ static int raid10_run(struct mddev *mddev)
sector_t size;
sector_t min_offset_diff = 0;
int first = 1;
+ int ret = -EIO;
if (mddev->private == NULL) {
conf = setup_conf(mddev);
@@ -4047,12 +4025,6 @@ static int raid10_run(struct mddev *mddev)
}
}
- if (mddev->queue) {
- blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
- blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
- raid10_set_io_opt(conf);
- }
-
rdev_for_each(rdev, mddev) {
long long diff;
@@ -4081,14 +4053,16 @@ static int raid10_run(struct mddev *mddev)
if (first || diff < min_offset_diff)
min_offset_diff = diff;
- if (mddev->gendisk)
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
-
disk->head_position = 0;
first = 0;
}
+ if (!mddev_is_dm(conf->mddev)) {
+ ret = raid10_set_queue_limits(mddev);
+ if (ret)
+ goto out_free_conf;
+ }
+
/* need to check that every block has at least one working mirror */
if (!enough(conf, -1)) {
pr_err("md/raid10:%s: not enough operational mirrors.\n",
@@ -4175,11 +4149,7 @@ static int raid10_run(struct mddev *mddev)
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
- set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- rcu_assign_pointer(mddev->sync_thread,
- md_register_thread(md_do_sync, mddev, "reshape"));
- if (!mddev->sync_thread)
- goto out_free_conf;
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
return 0;
@@ -4189,7 +4159,7 @@ out_free_conf:
raid10_free_conf(conf);
mddev->private = NULL;
out:
- return -EIO;
+ return ret;
}
static void raid10_free(struct mddev *mddev, void *priv)
@@ -4573,16 +4543,8 @@ out:
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
- set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-
- rcu_assign_pointer(mddev->sync_thread,
- md_register_thread(md_do_sync, mddev, "reshape"));
- if (!mddev->sync_thread) {
- ret = -EAGAIN;
- goto abort;
- }
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
conf->reshape_checkpoint = jiffies;
- md_wakeup_thread(mddev->sync_thread);
md_new_event();
return 0;
@@ -4966,8 +4928,7 @@ static void end_reshape(struct r10conf *conf)
conf->reshape_safe = MaxSector;
spin_unlock_irq(&conf->device_lock);
- if (conf->mddev->queue)
- raid10_set_io_opt(conf);
+ mddev_update_io_opt(conf->mddev, raid10_nr_stripes(conf));
conf->fullsync = 0;
}
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index da4ba736c4f0..a70cbec12ed0 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -1393,7 +1393,8 @@ int ppl_init_log(struct r5conf *conf)
ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
ppl_conf->block_size = 512;
} else {
- ppl_conf->block_size = queue_logical_block_size(mddev->queue);
+ ppl_conf->block_size =
+ queue_logical_block_size(mddev->gendisk->queue);
}
for (i = 0; i < ppl_conf->count; i++) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8497880135ee..d874abfc1836 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -36,6 +36,7 @@
*/
#include <linux/blkdev.h>
+#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
@@ -760,6 +761,7 @@ enum stripe_result {
STRIPE_RETRY,
STRIPE_SCHEDULE_AND_RETRY,
STRIPE_FAIL,
+ STRIPE_WAIT_RESHAPE,
};
struct stripe_request_ctx {
@@ -1210,10 +1212,8 @@ again:
*/
while (op_is_write(op) && rdev &&
test_bit(WriteErrorSeen, &rdev->flags)) {
- sector_t first_bad;
- int bad_sectors;
- int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
- &first_bad, &bad_sectors);
+ int bad = rdev_has_badblock(rdev, sh->sector,
+ RAID5_STRIPE_SECTORS(conf));
if (!bad)
break;
@@ -1295,10 +1295,7 @@ again:
if (rrdev)
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
- if (conf->mddev->gendisk)
- trace_block_bio_remap(bi,
- disk_devt(conf->mddev->gendisk),
- sh->dev[i].sector);
+ mddev_trace_remap(conf->mddev, bi, sh->dev[i].sector);
if (should_defer && op_is_write(op))
bio_list_add(&pending_bios, bi);
else
@@ -1342,10 +1339,7 @@ again:
*/
if (op == REQ_OP_DISCARD)
rbi->bi_vcnt = 0;
- if (conf->mddev->gendisk)
- trace_block_bio_remap(rbi,
- disk_devt(conf->mddev->gendisk),
- sh->dev[i].sector);
+ mddev_trace_remap(conf->mddev, rbi, sh->dev[i].sector);
if (should_defer && op_is_write(op))
bio_list_add(&pending_bios, rbi);
else
@@ -2412,7 +2406,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
atomic_inc(&conf->active_stripes);
raid5_release_stripe(sh);
- conf->max_nr_stripes++;
+ WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes + 1);
return 1;
}
@@ -2422,12 +2416,12 @@ static int grow_stripes(struct r5conf *conf, int num)
size_t namelen = sizeof(conf->cache_name[0]);
int devs = max(conf->raid_disks, conf->previous_raid_disks);
- if (conf->mddev->gendisk)
+ if (mddev_is_dm(conf->mddev))
snprintf(conf->cache_name[0], namelen,
- "raid%d-%s", conf->level, mdname(conf->mddev));
+ "raid%d-%p", conf->level, conf->mddev);
else
snprintf(conf->cache_name[0], namelen,
- "raid%d-%p", conf->level, conf->mddev);
+ "raid%d-%s", conf->level, mdname(conf->mddev));
snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
conf->active_name = 0;
@@ -2707,7 +2701,7 @@ static int drop_one_stripe(struct r5conf *conf)
shrink_buffers(sh);
free_stripe(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes);
- conf->max_nr_stripes--;
+ WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes - 1);
return 1;
}
@@ -2855,8 +2849,6 @@ static void raid5_end_write_request(struct bio *bi)
struct r5conf *conf = sh->raid_conf;
int disks = sh->disks, i;
struct md_rdev *rdev;
- sector_t first_bad;
- int bad_sectors;
int replacement = 0;
for (i = 0 ; i < disks; i++) {
@@ -2888,9 +2880,8 @@ static void raid5_end_write_request(struct bio *bi)
if (replacement) {
if (bi->bi_status)
md_error(conf->mddev, rdev);
- else if (is_badblock(rdev, sh->sector,
- RAID5_STRIPE_SECTORS(conf),
- &first_bad, &bad_sectors))
+ else if (rdev_has_badblock(rdev, sh->sector,
+ RAID5_STRIPE_SECTORS(conf)))
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
} else {
if (bi->bi_status) {
@@ -2900,9 +2891,8 @@ static void raid5_end_write_request(struct bio *bi)
if (!test_and_set_bit(WantReplacement, &rdev->flags))
set_bit(MD_RECOVERY_NEEDED,
&rdev->mddev->recovery);
- } else if (is_badblock(rdev, sh->sector,
- RAID5_STRIPE_SECTORS(conf),
- &first_bad, &bad_sectors)) {
+ } else if (rdev_has_badblock(rdev, sh->sector,
+ RAID5_STRIPE_SECTORS(conf))) {
set_bit(R5_MadeGood, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags))
/* That was a successful write so make
@@ -4205,10 +4195,9 @@ static int handle_stripe_dirtying(struct r5conf *conf,
set_bit(STRIPE_HANDLE, &sh->state);
if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) {
/* prefer read-modify-write, but need to get some data */
- if (conf->mddev->queue)
- blk_add_trace_msg(conf->mddev->queue,
- "raid5 rmw %llu %d",
- (unsigned long long)sh->sector, rmw);
+ mddev_add_trace_msg(conf->mddev, "raid5 rmw %llu %d",
+ sh->sector, rmw);
+
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
if (test_bit(R5_InJournal, &dev->flags) &&
@@ -4285,10 +4274,11 @@ static int handle_stripe_dirtying(struct r5conf *conf,
set_bit(STRIPE_DELAYED, &sh->state);
}
}
- if (rcw && conf->mddev->queue)
- blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
- (unsigned long long)sh->sector,
- rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
+ if (rcw && !mddev_is_dm(conf->mddev))
+ blk_add_trace_msg(conf->mddev->gendisk->queue,
+ "raid5 rcw %llu %d %d %d",
+ (unsigned long long)sh->sector, rcw, qread,
+ test_bit(STRIPE_DELAYED, &sh->state));
}
if (rcw > disks && rmw > disks &&
@@ -4674,8 +4664,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* Now to look around and see what can be done */
for (i=disks; i--; ) {
struct md_rdev *rdev;
- sector_t first_bad;
- int bad_sectors;
int is_bad = 0;
dev = &sh->dev[i];
@@ -4719,8 +4707,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
rdev = conf->disks[i].replacement;
if (rdev && !test_bit(Faulty, &rdev->flags) &&
rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
- !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
- &first_bad, &bad_sectors))
+ !rdev_has_badblock(rdev, sh->sector,
+ RAID5_STRIPE_SECTORS(conf)))
set_bit(R5_ReadRepl, &dev->flags);
else {
if (rdev && !test_bit(Faulty, &rdev->flags))
@@ -4733,8 +4721,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
if (rdev && test_bit(Faulty, &rdev->flags))
rdev = NULL;
if (rdev) {
- is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
- &first_bad, &bad_sectors);
+ is_bad = rdev_has_badblock(rdev, sh->sector,
+ RAID5_STRIPE_SECTORS(conf));
if (s->blocked_rdev == NULL
&& (test_bit(Blocked, &rdev->flags)
|| is_bad < 0)) {
@@ -5463,8 +5451,8 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
struct r5conf *conf = mddev->private;
struct bio *align_bio;
struct md_rdev *rdev;
- sector_t sector, end_sector, first_bad;
- int bad_sectors, dd_idx;
+ sector_t sector, end_sector;
+ int dd_idx;
bool did_inc;
if (!in_chunk_boundary(mddev, raid_bio)) {
@@ -5493,8 +5481,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
atomic_inc(&rdev->nr_pending);
- if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
- &bad_sectors)) {
+ if (rdev_has_badblock(rdev, sector, bio_sectors(raid_bio))) {
rdev_dec_pending(rdev, mddev);
return 0;
}
@@ -5530,9 +5517,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
spin_unlock_irq(&conf->device_lock);
}
- if (mddev->gendisk)
- trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk),
- raid_bio->bi_iter.bi_sector);
+ mddev_trace_remap(mddev, align_bio, raid_bio->bi_iter.bi_sector);
submit_bio_noacct(align_bio);
return 1;
}
@@ -5701,8 +5686,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
}
release_inactive_stripe_list(conf, cb->temp_inactive_list,
NR_STRIPE_HASH_LOCKS);
- if (mddev->queue)
- trace_block_unplug(mddev->queue, cnt, !from_schedule);
+ if (!mddev_is_dm(mddev))
+ trace_block_unplug(mddev->gendisk->queue, cnt, !from_schedule);
kfree(cb);
}
@@ -5946,7 +5931,8 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
if (ahead_of_reshape(mddev, logical_sector,
conf->reshape_safe)) {
spin_unlock_irq(&conf->device_lock);
- return STRIPE_SCHEDULE_AND_RETRY;
+ ret = STRIPE_SCHEDULE_AND_RETRY;
+ goto out;
}
}
spin_unlock_irq(&conf->device_lock);
@@ -6025,6 +6011,12 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
out_release:
raid5_release_stripe(sh);
+out:
+ if (ret == STRIPE_SCHEDULE_AND_RETRY && reshape_interrupted(mddev)) {
+ bi->bi_status = BLK_STS_RESOURCE;
+ ret = STRIPE_WAIT_RESHAPE;
+ pr_err_ratelimited("dm-raid456: io across reshape position while reshape can't make progress");
+ }
return ret;
}
@@ -6146,7 +6138,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
while (1) {
res = make_stripe_request(mddev, conf, &ctx, logical_sector,
bi);
- if (res == STRIPE_FAIL)
+ if (res == STRIPE_FAIL || res == STRIPE_WAIT_RESHAPE)
break;
if (res == STRIPE_RETRY)
@@ -6184,6 +6176,11 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
if (rw == WRITE)
md_write_end(mddev);
+ if (res == STRIPE_WAIT_RESHAPE) {
+ md_free_cloned_bio(bi);
+ return false;
+ }
+
bio_endio(bi);
return true;
}
@@ -6773,7 +6770,18 @@ static void raid5d(struct md_thread *thread)
spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock);
+
+ /*
+ * Waiting on MD_SB_CHANGE_PENDING below may deadlock
+ * seeing md_check_recovery() is needed to clear
+ * the flag when using mdmon.
+ */
+ continue;
}
+
+ wait_event_lock_irq(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
+ conf->device_lock);
}
pr_debug("%d stripes handled\n", handled);
@@ -6820,7 +6828,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
if (size <= 16 || size > 32768)
return -EINVAL;
- conf->min_nr_stripes = size;
+ WRITE_ONCE(conf->min_nr_stripes, size);
mutex_lock(&conf->cache_size_mutex);
while (size < conf->max_nr_stripes &&
drop_one_stripe(conf))
@@ -6832,7 +6840,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes)
if (!grow_one_stripe(conf, GFP_KERNEL)) {
- conf->min_nr_stripes = conf->max_nr_stripes;
+ WRITE_ONCE(conf->min_nr_stripes, conf->max_nr_stripes);
result = -ENOMEM;
break;
}
@@ -6967,10 +6975,8 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len)
pr_debug("md/raid: change stripe_size from %lu to %lu\n",
conf->stripe_size, new);
- if (mddev->sync_thread ||
- test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
- mddev->reshape_position != MaxSector ||
- mddev->sysfs_active) {
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ mddev->reshape_position != MaxSector || mddev->sysfs_active) {
err = -EBUSY;
goto out_unlock;
}
@@ -7084,7 +7090,7 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
if (!conf)
err = -ENODEV;
else if (new != conf->skip_copy) {
- struct request_queue *q = mddev->queue;
+ struct request_queue *q = mddev->gendisk->queue;
conf->skip_copy = new;
if (new)
@@ -7390,11 +7396,13 @@ static unsigned long raid5_cache_count(struct shrinker *shrink,
struct shrink_control *sc)
{
struct r5conf *conf = shrink->private_data;
+ int max_stripes = READ_ONCE(conf->max_nr_stripes);
+ int min_stripes = READ_ONCE(conf->min_nr_stripes);
- if (conf->max_nr_stripes < conf->min_nr_stripes)
+ if (max_stripes < min_stripes)
/* unlikely, but not impossible */
return 0;
- return conf->max_nr_stripes - conf->min_nr_stripes;
+ return max_stripes - min_stripes;
}
static struct r5conf *setup_conf(struct mddev *mddev)
@@ -7684,10 +7692,65 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
return 0;
}
-static void raid5_set_io_opt(struct r5conf *conf)
+static int raid5_set_limits(struct mddev *mddev)
{
- blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) *
- (conf->raid_disks - conf->max_degraded));
+ struct r5conf *conf = mddev->private;
+ struct queue_limits lim;
+ int data_disks, stripe;
+ struct md_rdev *rdev;
+
+ /*
+ * The read-ahead size must cover two whole stripes, which is
+ * 2 * (datadisks) * chunksize where 'n' is the number of raid devices.
+ */
+ data_disks = conf->previous_raid_disks - conf->max_degraded;
+
+ /*
+ * We can only discard a whole stripe. It doesn't make sense to
+ * discard data disk but write parity disk
+ */
+ stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));
+
+ blk_set_stacking_limits(&lim);
+ lim.io_min = mddev->chunk_sectors << 9;
+ lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
+ lim.raid_partial_stripes_expensive = 1;
+ lim.discard_granularity = stripe;
+ lim.max_write_zeroes_sectors = 0;
+ mddev_stack_rdev_limits(mddev, &lim);
+ rdev_for_each(rdev, mddev)
+ queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset,
+ mddev->gendisk->disk_name);
+
+ /*
+ * Zeroing is required for discard, otherwise data could be lost.
+ *
+ * Consider a scenario: discard a stripe (the stripe could be
+ * inconsistent if discard_zeroes_data is 0); write one disk of the
+ * stripe (the stripe could be inconsistent again depending on which
+ * disks are used to calculate parity); the disk is broken; The stripe
+ * data of this disk is lost.
+ *
+ * We only allow DISCARD if the sysadmin has confirmed that only safe
+ * devices are in use by setting a module parameter. A better idea
+ * might be to turn DISCARD into WRITE_ZEROES requests, as that is
+ * required to be safe.
+ */
+ if (!devices_handle_discard_safely ||
+ lim.max_discard_sectors < (stripe >> 9) ||
+ lim.discard_granularity < stripe)
+ lim.max_hw_discard_sectors = 0;
+
+ /*
+ * Requests require having a bitmap for each stripe.
+ * Limit the max sectors based on this.
+ */
+ lim.max_hw_sectors = RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf);
+
+ /* No restrictions on the number of segments in the request */
+ lim.max_segments = USHRT_MAX;
+
+ return queue_limits_set(mddev->gendisk->queue, &lim);
}
static int raid5_run(struct mddev *mddev)
@@ -7700,6 +7763,7 @@ static int raid5_run(struct mddev *mddev)
int i;
long long min_offset_diff = 0;
int first = 1;
+ int ret = -EIO;
if (mddev->recovery_cp != MaxSector)
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
@@ -7936,11 +8000,7 @@ static int raid5_run(struct mddev *mddev)
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
- set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- rcu_assign_pointer(mddev->sync_thread,
- md_register_thread(md_do_sync, mddev, "reshape"));
- if (!mddev->sync_thread)
- goto abort;
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
/* Ok, everything is just fine now */
@@ -7952,66 +8012,10 @@ static int raid5_run(struct mddev *mddev)
mdname(mddev));
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
- if (mddev->queue) {
- int chunk_size;
- /* read-ahead size must cover two whole stripes, which
- * is 2 * (datadisks) * chunksize where 'n' is the
- * number of raid devices
- */
- int data_disks = conf->previous_raid_disks - conf->max_degraded;
- int stripe = data_disks *
- ((mddev->chunk_sectors << 9) / PAGE_SIZE);
-
- chunk_size = mddev->chunk_sectors << 9;
- blk_queue_io_min(mddev->queue, chunk_size);
- raid5_set_io_opt(conf);
- mddev->queue->limits.raid_partial_stripes_expensive = 1;
- /*
- * We can only discard a whole stripe. It doesn't make sense to
- * discard data disk but write parity disk
- */
- stripe = stripe * PAGE_SIZE;
- stripe = roundup_pow_of_two(stripe);
- mddev->queue->limits.discard_granularity = stripe;
-
- blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
-
- rdev_for_each(rdev, mddev) {
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->new_data_offset << 9);
- }
-
- /*
- * zeroing is required, otherwise data
- * could be lost. Consider a scenario: discard a stripe
- * (the stripe could be inconsistent if
- * discard_zeroes_data is 0); write one disk of the
- * stripe (the stripe could be inconsistent again
- * depending on which disks are used to calculate
- * parity); the disk is broken; The stripe data of this
- * disk is lost.
- *
- * We only allow DISCARD if the sysadmin has confirmed that
- * only safe devices are in use by setting a module parameter.
- * A better idea might be to turn DISCARD into WRITE_ZEROES
- * requests, as that is required to be safe.
- */
- if (!devices_handle_discard_safely ||
- mddev->queue->limits.max_discard_sectors < (stripe >> 9) ||
- mddev->queue->limits.discard_granularity < stripe)
- blk_queue_max_discard_sectors(mddev->queue, 0);
-
- /*
- * Requests require having a bitmap for each stripe.
- * Limit the max sectors based on this.
- */
- blk_queue_max_hw_sectors(mddev->queue,
- RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf));
-
- /* No restrictions on the number of segments in the request */
- blk_queue_max_segments(mddev->queue, USHRT_MAX);
+ if (!mddev_is_dm(mddev)) {
+ ret = raid5_set_limits(mddev);
+ if (ret)
+ goto abort;
}
if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
@@ -8024,7 +8028,7 @@ abort:
free_conf(conf);
mddev->private = NULL;
pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
- return -EIO;
+ return ret;
}
static void raid5_free(struct mddev *mddev, void *priv)
@@ -8506,29 +8510,8 @@ static int raid5_start_reshape(struct mddev *mddev)
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
- set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- rcu_assign_pointer(mddev->sync_thread,
- md_register_thread(md_do_sync, mddev, "reshape"));
- if (!mddev->sync_thread) {
- mddev->recovery = 0;
- spin_lock_irq(&conf->device_lock);
- write_seqcount_begin(&conf->gen_lock);
- mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
- mddev->new_chunk_sectors =
- conf->chunk_sectors = conf->prev_chunk_sectors;
- mddev->new_layout = conf->algorithm = conf->prev_algo;
- rdev_for_each(rdev, mddev)
- rdev->new_data_offset = rdev->data_offset;
- smp_wmb();
- conf->generation --;
- conf->reshape_progress = MaxSector;
- mddev->reshape_position = MaxSector;
- write_seqcount_end(&conf->gen_lock);
- spin_unlock_irq(&conf->device_lock);
- return -EAGAIN;
- }
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
conf->reshape_checkpoint = jiffies;
- md_wakeup_thread(mddev->sync_thread);
md_new_event();
return 0;
}
@@ -8556,8 +8539,8 @@ static void end_reshape(struct r5conf *conf)
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap);
- if (conf->mddev->queue)
- raid5_set_io_opt(conf);
+ mddev_update_io_opt(conf->mddev,
+ conf->raid_disks - conf->max_degraded);
}
}
@@ -8934,6 +8917,18 @@ static int raid5_start(struct mddev *mddev)
return r5l_start(conf->log);
}
+/*
+ * This is only used for dm-raid456, caller already frozen sync_thread, hence
+ * if rehsape is still in progress, io that is waiting for reshape can never be
+ * done now, hence wake up and handle those IO.
+ */
+static void raid5_prepare_suspend(struct mddev *mddev)
+{
+ struct r5conf *conf = mddev->private;
+
+ wake_up(&conf->wait_for_overlap);
+}
+
static struct md_personality raid6_personality =
{
.name = "raid6",
@@ -8957,6 +8952,7 @@ static struct md_personality raid6_personality =
.quiesce = raid5_quiesce,
.takeover = raid6_takeover,
.change_consistency_policy = raid5_change_consistency_policy,
+ .prepare_suspend = raid5_prepare_suspend,
};
static struct md_personality raid5_personality =
{
@@ -8981,6 +8977,7 @@ static struct md_personality raid5_personality =
.quiesce = raid5_quiesce,
.takeover = raid5_takeover,
.change_consistency_policy = raid5_change_consistency_policy,
+ .prepare_suspend = raid5_prepare_suspend,
};
static struct md_personality raid4_personality =
@@ -9006,6 +9003,7 @@ static struct md_personality raid4_personality =
.quiesce = raid5_quiesce,
.takeover = raid4_takeover,
.change_consistency_policy = raid5_change_consistency_policy,
+ .prepare_suspend = raid5_prepare_suspend,
};
static int __init raid5_init(void)
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index 41a832dd1426..b6bf8f232f48 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -989,7 +989,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
bool no_previous_buffers = !q_num_bufs;
int ret = 0;
- if (q->num_buffers == q->max_num_buffers) {
+ if (q_num_bufs == q->max_num_buffers) {
dprintk(q, 1, "maximum number of buffers already allocated\n");
return -ENOBUFS;
}
diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c
index 54d572c3b515..c575198e8354 100644
--- a/drivers/media/common/videobuf2/videobuf2-v4l2.c
+++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c
@@ -671,8 +671,20 @@ int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b)
}
EXPORT_SYMBOL(vb2_querybuf);
-static void fill_buf_caps(struct vb2_queue *q, u32 *caps)
+static void vb2_set_flags_and_caps(struct vb2_queue *q, u32 memory,
+ u32 *flags, u32 *caps, u32 *max_num_bufs)
{
+ if (!q->allow_cache_hints || memory != V4L2_MEMORY_MMAP) {
+ /*
+ * This needs to clear V4L2_MEMORY_FLAG_NON_COHERENT only,
+ * but in order to avoid bugs we zero out all bits.
+ */
+ *flags = 0;
+ } else {
+ /* Clear all unknown flags. */
+ *flags &= V4L2_MEMORY_FLAG_NON_COHERENT;
+ }
+
*caps = V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS;
if (q->io_modes & VB2_MMAP)
*caps |= V4L2_BUF_CAP_SUPPORTS_MMAP;
@@ -686,21 +698,9 @@ static void fill_buf_caps(struct vb2_queue *q, u32 *caps)
*caps |= V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS;
if (q->supports_requests)
*caps |= V4L2_BUF_CAP_SUPPORTS_REQUESTS;
-}
-
-static void validate_memory_flags(struct vb2_queue *q,
- int memory,
- u32 *flags)
-{
- if (!q->allow_cache_hints || memory != V4L2_MEMORY_MMAP) {
- /*
- * This needs to clear V4L2_MEMORY_FLAG_NON_COHERENT only,
- * but in order to avoid bugs we zero out all bits.
- */
- *flags = 0;
- } else {
- /* Clear all unknown flags. */
- *flags &= V4L2_MEMORY_FLAG_NON_COHERENT;
+ if (max_num_bufs) {
+ *max_num_bufs = q->max_num_buffers;
+ *caps |= V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS;
}
}
@@ -709,8 +709,8 @@ int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req)
int ret = vb2_verify_memory_type(q, req->memory, req->type);
u32 flags = req->flags;
- fill_buf_caps(q, &req->capabilities);
- validate_memory_flags(q, req->memory, &flags);
+ vb2_set_flags_and_caps(q, req->memory, &flags,
+ &req->capabilities, NULL);
req->flags = flags;
return ret ? ret : vb2_core_reqbufs(q, req->memory,
req->flags, &req->count);
@@ -751,11 +751,9 @@ int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create)
int ret = vb2_verify_memory_type(q, create->memory, f->type);
unsigned i;
- fill_buf_caps(q, &create->capabilities);
- validate_memory_flags(q, create->memory, &create->flags);
create->index = vb2_get_num_buffers(q);
- create->max_num_buffers = q->max_num_buffers;
- create->capabilities |= V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS;
+ vb2_set_flags_and_caps(q, create->memory, &create->flags,
+ &create->capabilities, &create->max_num_buffers);
if (create->count == 0)
return ret != -EBUSY ? ret : 0;
@@ -1006,8 +1004,8 @@ int vb2_ioctl_reqbufs(struct file *file, void *priv,
int res = vb2_verify_memory_type(vdev->queue, p->memory, p->type);
u32 flags = p->flags;
- fill_buf_caps(vdev->queue, &p->capabilities);
- validate_memory_flags(vdev->queue, p->memory, &flags);
+ vb2_set_flags_and_caps(vdev->queue, p->memory, &flags,
+ &p->capabilities, NULL);
p->flags = flags;
if (res)
return res;
@@ -1026,12 +1024,11 @@ int vb2_ioctl_create_bufs(struct file *file, void *priv,
struct v4l2_create_buffers *p)
{
struct video_device *vdev = video_devdata(file);
- int res = vb2_verify_memory_type(vdev->queue, p->memory,
- p->format.type);
+ int res = vb2_verify_memory_type(vdev->queue, p->memory, p->format.type);
- p->index = vdev->queue->num_buffers;
- fill_buf_caps(vdev->queue, &p->capabilities);
- validate_memory_flags(vdev->queue, p->memory, &p->flags);
+ p->index = vb2_get_num_buffers(vdev->queue);
+ vb2_set_flags_and_caps(vdev->queue, p->memory, &p->flags,
+ &p->capabilities, &p->max_num_buffers);
/*
* If count == 0, then just check if memory and type are valid.
* Any -EBUSY result from vb2_verify_memory_type can be mapped to 0.
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu.c b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
index bfe4caa79cc9..0d90b5820bef 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpu.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
@@ -272,7 +272,7 @@ static const struct wave5_match_data ti_wave521c_data = {
};
static const struct of_device_id wave5_dt_ids[] = {
- { .compatible = "ti,k3-j721s2-wave521c", .data = &ti_wave521c_data },
+ { .compatible = "ti,j721s2-wave521c", .data = &ti_wave521c_data },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, wave5_dt_ids);
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
index aebd3c12020b..c381c22135a2 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
@@ -725,6 +725,9 @@ irqreturn_t rkisp1_capture_isr(int irq, void *ctx)
unsigned int i;
u32 status;
+ if (!rkisp1->irqs_enabled)
+ return IRQ_NONE;
+
status = rkisp1_read(rkisp1, RKISP1_CIF_MI_MIS);
if (!status)
return IRQ_NONE;
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h
index 4b6b28c05b89..b757f75edecf 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h
@@ -450,6 +450,7 @@ struct rkisp1_debug {
* @debug: debug params to be exposed on debugfs
* @info: version-specific ISP information
* @irqs: IRQ line numbers
+ * @irqs_enabled: the hardware is enabled and can cause interrupts
*/
struct rkisp1_device {
void __iomem *base_addr;
@@ -471,6 +472,7 @@ struct rkisp1_device {
struct rkisp1_debug debug;
const struct rkisp1_info *info;
int irqs[RKISP1_NUM_IRQS];
+ bool irqs_enabled;
};
/*
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c
index b6e47e2f1b94..4202642e0523 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c
@@ -196,6 +196,9 @@ irqreturn_t rkisp1_csi_isr(int irq, void *ctx)
struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
u32 val, status;
+ if (!rkisp1->irqs_enabled)
+ return IRQ_NONE;
+
status = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_MIS);
if (!status)
return IRQ_NONE;
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
index f96f821a7b50..73cf08a74011 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
@@ -305,6 +305,24 @@ static int __maybe_unused rkisp1_runtime_suspend(struct device *dev)
{
struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
+ rkisp1->irqs_enabled = false;
+ /* Make sure the IRQ handler will see the above */
+ mb();
+
+ /*
+ * Wait until any running IRQ handler has returned. The IRQ handler
+ * may get called even after this (as it's a shared interrupt line)
+ * but the 'irqs_enabled' flag will make the handler return immediately.
+ */
+ for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) {
+ if (rkisp1->irqs[il] == -1)
+ continue;
+
+ /* Skip if the irq line is the same as previous */
+ if (il == 0 || rkisp1->irqs[il - 1] != rkisp1->irqs[il])
+ synchronize_irq(rkisp1->irqs[il]);
+ }
+
clk_bulk_disable_unprepare(rkisp1->clk_size, rkisp1->clks);
return pinctrl_pm_select_sleep_state(dev);
}
@@ -321,6 +339,10 @@ static int __maybe_unused rkisp1_runtime_resume(struct device *dev)
if (ret)
return ret;
+ rkisp1->irqs_enabled = true;
+ /* Make sure the IRQ handler will see the above */
+ mb();
+
return 0;
}
@@ -559,7 +581,7 @@ static int rkisp1_probe(struct platform_device *pdev)
rkisp1->irqs[il] = irq;
}
- ret = devm_request_irq(dev, irq, info->isrs[i].isr, 0,
+ ret = devm_request_irq(dev, irq, info->isrs[i].isr, IRQF_SHARED,
dev_driver_string(dev), dev);
if (ret) {
dev_err(dev, "request irq failed: %d\n", ret);
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
index f00873d31c42..78a1f7a1499b 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
@@ -976,6 +976,9 @@ irqreturn_t rkisp1_isp_isr(int irq, void *ctx)
struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
u32 status, isp_err;
+ if (!rkisp1->irqs_enabled)
+ return IRQ_NONE;
+
status = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS);
if (!status)
return IRQ_NONE;
diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig
index 2afe67ffa285..74d69ce22a33 100644
--- a/drivers/media/rc/Kconfig
+++ b/drivers/media/rc/Kconfig
@@ -319,6 +319,7 @@ config IR_PWM_TX
tristate "PWM IR transmitter"
depends on LIRC
depends on PWM
+ depends on HIGH_RES_TIMERS
depends on OF
help
Say Y if you want to use a PWM based IR transmitter. This is
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index fe17c7f98e81..2f7564f26445 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -110,7 +110,7 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_trace_printk:
- if (perfmon_capable())
+ if (bpf_token_capable(prog->aux->token, CAP_PERFMON))
return bpf_get_trace_printk_proto();
fallthrough;
default:
@@ -253,7 +253,7 @@ int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
if (attr->attach_flags)
return -EINVAL;
- rcdev = rc_dev_get_from_fd(attr->target_fd);
+ rcdev = rc_dev_get_from_fd(attr->target_fd, true);
if (IS_ERR(rcdev))
return PTR_ERR(rcdev);
@@ -278,7 +278,7 @@ int lirc_prog_detach(const union bpf_attr *attr)
if (IS_ERR(prog))
return PTR_ERR(prog);
- rcdev = rc_dev_get_from_fd(attr->target_fd);
+ rcdev = rc_dev_get_from_fd(attr->target_fd, true);
if (IS_ERR(rcdev)) {
bpf_prog_put(prog);
return PTR_ERR(rcdev);
@@ -303,7 +303,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
if (attr->query.query_flags)
return -EINVAL;
- rcdev = rc_dev_get_from_fd(attr->query.target_fd);
+ rcdev = rc_dev_get_from_fd(attr->query.target_fd, false);
if (IS_ERR(rcdev))
return PTR_ERR(rcdev);
diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c
index 196806709259..69e630d85262 100644
--- a/drivers/media/rc/ir_toy.c
+++ b/drivers/media/rc/ir_toy.c
@@ -332,6 +332,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count)
sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP);
if (err) {
dev_err(irtoy->dev, "exit sample mode: %d\n", err);
+ kfree(buf);
return err;
}
@@ -339,6 +340,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count)
sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND);
if (err) {
dev_err(irtoy->dev, "enter sample mode: %d\n", err);
+ kfree(buf);
return err;
}
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index a537734832c5..caad59f76793 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -814,7 +814,7 @@ void __exit lirc_dev_exit(void)
unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX);
}
-struct rc_dev *rc_dev_get_from_fd(int fd)
+struct rc_dev *rc_dev_get_from_fd(int fd, bool write)
{
struct fd f = fdget(fd);
struct lirc_fh *fh;
@@ -828,6 +828,9 @@ struct rc_dev *rc_dev_get_from_fd(int fd)
return ERR_PTR(-EINVAL);
}
+ if (write && !(f.file->f_mode & FMODE_WRITE))
+ return ERR_PTR(-EPERM);
+
fh = f.file->private_data;
dev = fh->rc;
diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h
index ef1e95e1af7f..7df949fc65e2 100644
--- a/drivers/media/rc/rc-core-priv.h
+++ b/drivers/media/rc/rc-core-priv.h
@@ -325,7 +325,7 @@ void lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev);
void lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc);
int lirc_register(struct rc_dev *dev);
void lirc_unregister(struct rc_dev *dev);
-struct rc_dev *rc_dev_get_from_fd(int fd);
+struct rc_dev *rc_dev_get_from_fd(int fd, bool write);
#else
static inline int lirc_dev_init(void) { return 0; }
static inline void lirc_dev_exit(void) {}
diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c
index 434982545be6..8c5ad5c025fa 100644
--- a/drivers/memory/emif.c
+++ b/drivers/memory/emif.c
@@ -72,7 +72,6 @@ static DEFINE_SPINLOCK(emif_lock);
static unsigned long irq_state;
static LIST_HEAD(device_list);
-#ifdef CONFIG_DEBUG_FS
static void do_emif_regdump_show(struct seq_file *s, struct emif_data *emif,
struct emif_regs *regs)
{
@@ -140,31 +139,24 @@ static int emif_mr4_show(struct seq_file *s, void *unused)
DEFINE_SHOW_ATTRIBUTE(emif_mr4);
-static int __init_or_module emif_debugfs_init(struct emif_data *emif)
+static void emif_debugfs_init(struct emif_data *emif)
{
- emif->debugfs_root = debugfs_create_dir(dev_name(emif->dev), NULL);
- debugfs_create_file("regcache_dump", S_IRUGO, emif->debugfs_root, emif,
- &emif_regdump_fops);
- debugfs_create_file("mr4", S_IRUGO, emif->debugfs_root, emif,
- &emif_mr4_fops);
- return 0;
-}
-
-static void __exit emif_debugfs_exit(struct emif_data *emif)
-{
- debugfs_remove_recursive(emif->debugfs_root);
- emif->debugfs_root = NULL;
-}
-#else
-static inline int __init_or_module emif_debugfs_init(struct emif_data *emif)
-{
- return 0;
+ if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+ emif->debugfs_root = debugfs_create_dir(dev_name(emif->dev), NULL);
+ debugfs_create_file("regcache_dump", S_IRUGO, emif->debugfs_root, emif,
+ &emif_regdump_fops);
+ debugfs_create_file("mr4", S_IRUGO, emif->debugfs_root, emif,
+ &emif_mr4_fops);
+ }
}
-static inline void __exit emif_debugfs_exit(struct emif_data *emif)
+static void emif_debugfs_exit(struct emif_data *emif)
{
+ if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+ debugfs_remove_recursive(emif->debugfs_root);
+ emif->debugfs_root = NULL;
+ }
}
-#endif
/*
* Get bus width used by EMIF. Note that this may be different from the
@@ -679,7 +671,7 @@ static void disable_and_clear_all_interrupts(struct emif_data *emif)
clear_all_interrupts(emif);
}
-static int __init_or_module setup_interrupts(struct emif_data *emif, u32 irq)
+static int setup_interrupts(struct emif_data *emif, u32 irq)
{
u32 interrupts, type;
void __iomem *base = emif->base;
@@ -710,7 +702,7 @@ static int __init_or_module setup_interrupts(struct emif_data *emif, u32 irq)
}
-static void __init_or_module emif_onetime_settings(struct emif_data *emif)
+static void emif_onetime_settings(struct emif_data *emif)
{
u32 pwr_mgmt_ctrl, zq, temp_alert_cfg;
void __iomem *base = emif->base;
@@ -834,8 +826,7 @@ static int is_custom_config_valid(struct emif_custom_configs *cust_cfgs,
return valid;
}
-#if defined(CONFIG_OF)
-static void __init_or_module of_get_custom_configs(struct device_node *np_emif,
+static void of_get_custom_configs(struct device_node *np_emif,
struct emif_data *emif)
{
struct emif_custom_configs *cust_cfgs = NULL;
@@ -884,7 +875,7 @@ static void __init_or_module of_get_custom_configs(struct device_node *np_emif,
emif->plat_data->custom_configs = cust_cfgs;
}
-static void __init_or_module of_get_ddr_info(struct device_node *np_emif,
+static void of_get_ddr_info(struct device_node *np_emif,
struct device_node *np_ddr,
struct ddr_device_info *dev_info)
{
@@ -918,7 +909,7 @@ static void __init_or_module of_get_ddr_info(struct device_node *np_emif,
dev_info->io_width = __fls(io_width) - 1;
}
-static struct emif_data * __init_or_module of_get_memory_device_details(
+static struct emif_data *of_get_memory_device_details(
struct device_node *np_emif, struct device *dev)
{
struct emif_data *emif = NULL;
@@ -991,16 +982,7 @@ out:
return emif;
}
-#else
-
-static struct emif_data * __init_or_module of_get_memory_device_details(
- struct device_node *np_emif, struct device *dev)
-{
- return NULL;
-}
-#endif
-
-static struct emif_data *__init_or_module get_device_details(
+static struct emif_data *get_device_details(
struct platform_device *pdev)
{
u32 size;
@@ -1104,7 +1086,7 @@ error:
return NULL;
}
-static int __init_or_module emif_probe(struct platform_device *pdev)
+static int emif_probe(struct platform_device *pdev)
{
struct emif_data *emif;
int irq, ret;
@@ -1159,7 +1141,7 @@ error:
return -ENODEV;
}
-static void __exit emif_remove(struct platform_device *pdev)
+static void emif_remove(struct platform_device *pdev)
{
struct emif_data *emif = platform_get_drvdata(pdev);
@@ -1183,7 +1165,8 @@ MODULE_DEVICE_TABLE(of, emif_of_match);
#endif
static struct platform_driver emif_driver = {
- .remove_new = __exit_p(emif_remove),
+ .probe = emif_probe,
+ .remove_new = emif_remove,
.shutdown = emif_shutdown,
.driver = {
.name = "emif",
@@ -1191,7 +1174,7 @@ static struct platform_driver emif_driver = {
},
};
-module_platform_driver_probe(emif_driver, emif_probe);
+module_platform_driver(emif_driver);
MODULE_DESCRIPTION("TI EMIF SDRAM Controller Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/memory/stm32-fmc2-ebi.c b/drivers/memory/stm32-fmc2-ebi.c
index 47d0ea5f1616..1c63eeacd071 100644
--- a/drivers/memory/stm32-fmc2-ebi.c
+++ b/drivers/memory/stm32-fmc2-ebi.c
@@ -11,6 +11,7 @@
#include <linux/of_platform.h>
#include <linux/pinctrl/consumer.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/reset.h>
@@ -20,8 +21,15 @@
#define FMC2_BCR(x) ((x) * 0x8 + FMC2_BCR1)
#define FMC2_BTR(x) ((x) * 0x8 + FMC2_BTR1)
#define FMC2_PCSCNTR 0x20
+#define FMC2_CFGR 0x20
+#define FMC2_SR 0x84
#define FMC2_BWTR1 0x104
#define FMC2_BWTR(x) ((x) * 0x8 + FMC2_BWTR1)
+#define FMC2_SECCFGR 0x300
+#define FMC2_CIDCFGR0 0x30c
+#define FMC2_CIDCFGR(x) ((x) * 0x8 + FMC2_CIDCFGR0)
+#define FMC2_SEMCR0 0x310
+#define FMC2_SEMCR(x) ((x) * 0x8 + FMC2_SEMCR0)
/* Register: FMC2_BCR1 */
#define FMC2_BCR1_CCLKEN BIT(20)
@@ -42,6 +50,7 @@
#define FMC2_BCR_ASYNCWAIT BIT(15)
#define FMC2_BCR_CPSIZE GENMASK(18, 16)
#define FMC2_BCR_CBURSTRW BIT(19)
+#define FMC2_BCR_CSCOUNT GENMASK(21, 20)
#define FMC2_BCR_NBLSET GENMASK(23, 22)
/* Register: FMC2_BTRx/FMC2_BWTRx */
@@ -58,8 +67,28 @@
#define FMC2_PCSCNTR_CSCOUNT GENMASK(15, 0)
#define FMC2_PCSCNTR_CNTBEN(x) BIT((x) + 16)
+/* Register: FMC2_CFGR */
+#define FMC2_CFGR_CLKDIV GENMASK(19, 16)
+#define FMC2_CFGR_CCLKEN BIT(20)
+#define FMC2_CFGR_FMC2EN BIT(31)
+
+/* Register: FMC2_SR */
+#define FMC2_SR_ISOST GENMASK(1, 0)
+
+/* Register: FMC2_CIDCFGR */
+#define FMC2_CIDCFGR_CFEN BIT(0)
+#define FMC2_CIDCFGR_SEMEN BIT(1)
+#define FMC2_CIDCFGR_SCID GENMASK(6, 4)
+#define FMC2_CIDCFGR_SEMWLC1 BIT(17)
+
+/* Register: FMC2_SEMCR */
+#define FMC2_SEMCR_SEM_MUTEX BIT(0)
+#define FMC2_SEMCR_SEMCID GENMASK(6, 4)
+
#define FMC2_MAX_EBI_CE 4
#define FMC2_MAX_BANKS 5
+#define FMC2_MAX_RESOURCES 6
+#define FMC2_CID1 1
#define FMC2_BCR_CPSIZE_0 0x0
#define FMC2_BCR_CPSIZE_128 0x1
@@ -74,6 +103,11 @@
#define FMC2_BCR_MTYP_PSRAM 0x1
#define FMC2_BCR_MTYP_NOR 0x2
+#define FMC2_BCR_CSCOUNT_0 0x0
+#define FMC2_BCR_CSCOUNT_1 0x1
+#define FMC2_BCR_CSCOUNT_64 0x2
+#define FMC2_BCR_CSCOUNT_256 0x3
+
#define FMC2_BXTR_EXTMOD_A 0x0
#define FMC2_BXTR_EXTMOD_B 0x1
#define FMC2_BXTR_EXTMOD_C 0x2
@@ -88,6 +122,7 @@
#define FMC2_BTR_CLKDIV_MAX 0xf
#define FMC2_BTR_DATLAT_MAX 0xf
#define FMC2_PCSCNTR_CSCOUNT_MAX 0xff
+#define FMC2_CFGR_CLKDIV_MAX 0xf
enum stm32_fmc2_ebi_bank {
FMC2_EBI1 = 0,
@@ -101,7 +136,8 @@ enum stm32_fmc2_ebi_register_type {
FMC2_REG_BCR = 1,
FMC2_REG_BTR,
FMC2_REG_BWTR,
- FMC2_REG_PCSCNTR
+ FMC2_REG_PCSCNTR,
+ FMC2_REG_CFGR
};
enum stm32_fmc2_ebi_transaction_type {
@@ -132,16 +168,42 @@ enum stm32_fmc2_ebi_cpsize {
FMC2_CPSIZE_1024 = 1024
};
+enum stm32_fmc2_ebi_cscount {
+ FMC2_CSCOUNT_0 = 0,
+ FMC2_CSCOUNT_1 = 1,
+ FMC2_CSCOUNT_64 = 64,
+ FMC2_CSCOUNT_256 = 256
+};
+
+struct stm32_fmc2_ebi;
+
+struct stm32_fmc2_ebi_data {
+ const struct stm32_fmc2_prop *child_props;
+ unsigned int nb_child_props;
+ u32 fmc2_enable_reg;
+ u32 fmc2_enable_bit;
+ int (*nwait_used_by_ctrls)(struct stm32_fmc2_ebi *ebi);
+ void (*set_setup)(struct stm32_fmc2_ebi *ebi);
+ int (*save_setup)(struct stm32_fmc2_ebi *ebi);
+ int (*check_rif)(struct stm32_fmc2_ebi *ebi, u32 resource);
+ void (*put_sems)(struct stm32_fmc2_ebi *ebi);
+ void (*get_sems)(struct stm32_fmc2_ebi *ebi);
+};
+
struct stm32_fmc2_ebi {
struct device *dev;
struct clk *clk;
struct regmap *regmap;
+ const struct stm32_fmc2_ebi_data *data;
u8 bank_assigned;
+ u8 sem_taken;
+ bool access_granted;
u32 bcr[FMC2_MAX_EBI_CE];
u32 btr[FMC2_MAX_EBI_CE];
u32 bwtr[FMC2_MAX_EBI_CE];
u32 pcscntr;
+ u32 cfgr;
};
/*
@@ -181,8 +243,11 @@ static int stm32_fmc2_ebi_check_mux(struct stm32_fmc2_ebi *ebi,
int cs)
{
u32 bcr;
+ int ret;
- regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ ret = regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ if (ret)
+ return ret;
if (bcr & FMC2_BCR_MTYP)
return 0;
@@ -195,8 +260,11 @@ static int stm32_fmc2_ebi_check_waitcfg(struct stm32_fmc2_ebi *ebi,
int cs)
{
u32 bcr, val = FIELD_PREP(FMC2_BCR_MTYP, FMC2_BCR_MTYP_NOR);
+ int ret;
- regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ ret = regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ if (ret)
+ return ret;
if ((bcr & FMC2_BCR_MTYP) == val && bcr & FMC2_BCR_BURSTEN)
return 0;
@@ -209,8 +277,11 @@ static int stm32_fmc2_ebi_check_sync_trans(struct stm32_fmc2_ebi *ebi,
int cs)
{
u32 bcr;
+ int ret;
- regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ ret = regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ if (ret)
+ return ret;
if (bcr & FMC2_BCR_BURSTEN)
return 0;
@@ -218,13 +289,43 @@ static int stm32_fmc2_ebi_check_sync_trans(struct stm32_fmc2_ebi *ebi,
return -EINVAL;
}
+static int stm32_fmc2_ebi_mp25_check_cclk(struct stm32_fmc2_ebi *ebi,
+ const struct stm32_fmc2_prop *prop,
+ int cs)
+{
+ if (!ebi->access_granted)
+ return -EACCES;
+
+ return stm32_fmc2_ebi_check_sync_trans(ebi, prop, cs);
+}
+
+static int stm32_fmc2_ebi_mp25_check_clk_period(struct stm32_fmc2_ebi *ebi,
+ const struct stm32_fmc2_prop *prop,
+ int cs)
+{
+ u32 cfgr;
+ int ret;
+
+ ret = regmap_read(ebi->regmap, FMC2_CFGR, &cfgr);
+ if (ret)
+ return ret;
+
+ if (cfgr & FMC2_CFGR_CCLKEN && !ebi->access_granted)
+ return -EACCES;
+
+ return stm32_fmc2_ebi_check_sync_trans(ebi, prop, cs);
+}
+
static int stm32_fmc2_ebi_check_async_trans(struct stm32_fmc2_ebi *ebi,
const struct stm32_fmc2_prop *prop,
int cs)
{
u32 bcr;
+ int ret;
- regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ ret = regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ if (ret)
+ return ret;
if (!(bcr & FMC2_BCR_BURSTEN) || !(bcr & FMC2_BCR_CBURSTRW))
return 0;
@@ -237,8 +338,11 @@ static int stm32_fmc2_ebi_check_cpsize(struct stm32_fmc2_ebi *ebi,
int cs)
{
u32 bcr, val = FIELD_PREP(FMC2_BCR_MTYP, FMC2_BCR_MTYP_PSRAM);
+ int ret;
- regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ ret = regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ if (ret)
+ return ret;
if ((bcr & FMC2_BCR_MTYP) == val && bcr & FMC2_BCR_BURSTEN)
return 0;
@@ -251,12 +355,18 @@ static int stm32_fmc2_ebi_check_address_hold(struct stm32_fmc2_ebi *ebi,
int cs)
{
u32 bcr, bxtr, val = FIELD_PREP(FMC2_BXTR_ACCMOD, FMC2_BXTR_EXTMOD_D);
+ int ret;
+
+ ret = regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ if (ret)
+ return ret;
- regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
if (prop->reg_type == FMC2_REG_BWTR)
- regmap_read(ebi->regmap, FMC2_BWTR(cs), &bxtr);
+ ret = regmap_read(ebi->regmap, FMC2_BWTR(cs), &bxtr);
else
- regmap_read(ebi->regmap, FMC2_BTR(cs), &bxtr);
+ ret = regmap_read(ebi->regmap, FMC2_BTR(cs), &bxtr);
+ if (ret)
+ return ret;
if ((!(bcr & FMC2_BCR_BURSTEN) || !(bcr & FMC2_BCR_CBURSTRW)) &&
((bxtr & FMC2_BXTR_ACCMOD) == val || bcr & FMC2_BCR_MUXEN))
@@ -270,12 +380,19 @@ static int stm32_fmc2_ebi_check_clk_period(struct stm32_fmc2_ebi *ebi,
int cs)
{
u32 bcr, bcr1;
+ int ret;
- regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
- if (cs)
- regmap_read(ebi->regmap, FMC2_BCR1, &bcr1);
- else
+ ret = regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ if (ret)
+ return ret;
+
+ if (cs) {
+ ret = regmap_read(ebi->regmap, FMC2_BCR1, &bcr1);
+ if (ret)
+ return ret;
+ } else {
bcr1 = bcr;
+ }
if (bcr & FMC2_BCR_BURSTEN && (!cs || !(bcr1 & FMC2_BCR1_CCLKEN)))
return 0;
@@ -307,18 +424,48 @@ static u32 stm32_fmc2_ebi_ns_to_clk_period(struct stm32_fmc2_ebi *ebi,
{
u32 nb_clk_cycles = stm32_fmc2_ebi_ns_to_clock_cycles(ebi, cs, setup);
u32 bcr, btr, clk_period;
+ int ret;
+
+ ret = regmap_read(ebi->regmap, FMC2_BCR1, &bcr);
+ if (ret)
+ return ret;
- regmap_read(ebi->regmap, FMC2_BCR1, &bcr);
if (bcr & FMC2_BCR1_CCLKEN || !cs)
- regmap_read(ebi->regmap, FMC2_BTR1, &btr);
+ ret = regmap_read(ebi->regmap, FMC2_BTR1, &btr);
else
- regmap_read(ebi->regmap, FMC2_BTR(cs), &btr);
+ ret = regmap_read(ebi->regmap, FMC2_BTR(cs), &btr);
+ if (ret)
+ return ret;
clk_period = FIELD_GET(FMC2_BTR_CLKDIV, btr) + 1;
return DIV_ROUND_UP(nb_clk_cycles, clk_period);
}
+static u32 stm32_fmc2_ebi_mp25_ns_to_clk_period(struct stm32_fmc2_ebi *ebi,
+ int cs, u32 setup)
+{
+ u32 nb_clk_cycles = stm32_fmc2_ebi_ns_to_clock_cycles(ebi, cs, setup);
+ u32 cfgr, btr, clk_period;
+ int ret;
+
+ ret = regmap_read(ebi->regmap, FMC2_CFGR, &cfgr);
+ if (ret)
+ return ret;
+
+ if (cfgr & FMC2_CFGR_CCLKEN) {
+ clk_period = FIELD_GET(FMC2_CFGR_CLKDIV, cfgr) + 1;
+ } else {
+ ret = regmap_read(ebi->regmap, FMC2_BTR(cs), &btr);
+ if (ret)
+ return ret;
+
+ clk_period = FIELD_GET(FMC2_BTR_CLKDIV, btr) + 1;
+ }
+
+ return DIV_ROUND_UP(nb_clk_cycles, clk_period);
+}
+
static int stm32_fmc2_ebi_get_reg(int reg_type, int cs, u32 *reg)
{
switch (reg_type) {
@@ -334,6 +481,9 @@ static int stm32_fmc2_ebi_get_reg(int reg_type, int cs, u32 *reg)
case FMC2_REG_PCSCNTR:
*reg = FMC2_PCSCNTR;
break;
+ case FMC2_REG_CFGR:
+ *reg = FMC2_CFGR;
+ break;
default:
return -EINVAL;
}
@@ -571,11 +721,16 @@ static int stm32_fmc2_ebi_set_address_setup(struct stm32_fmc2_ebi *ebi,
if (ret)
return ret;
- regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ ret = regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ if (ret)
+ return ret;
+
if (prop->reg_type == FMC2_REG_BWTR)
- regmap_read(ebi->regmap, FMC2_BWTR(cs), &bxtr);
+ ret = regmap_read(ebi->regmap, FMC2_BWTR(cs), &bxtr);
else
- regmap_read(ebi->regmap, FMC2_BTR(cs), &bxtr);
+ ret = regmap_read(ebi->regmap, FMC2_BTR(cs), &bxtr);
+ if (ret)
+ return ret;
if ((bxtr & FMC2_BXTR_ACCMOD) == val || bcr & FMC2_BCR_MUXEN)
val = clamp_val(setup, 1, FMC2_BXTR_ADDSET_MAX);
@@ -675,6 +830,30 @@ static int stm32_fmc2_ebi_set_clk_period(struct stm32_fmc2_ebi *ebi,
return 0;
}
+static int stm32_fmc2_ebi_mp25_set_clk_period(struct stm32_fmc2_ebi *ebi,
+ const struct stm32_fmc2_prop *prop,
+ int cs, u32 setup)
+{
+ u32 val, cfgr;
+ int ret;
+
+ ret = regmap_read(ebi->regmap, FMC2_CFGR, &cfgr);
+ if (ret)
+ return ret;
+
+ if (cfgr & FMC2_CFGR_CCLKEN) {
+ val = setup ? clamp_val(setup - 1, 1, FMC2_CFGR_CLKDIV_MAX) : 1;
+ val = FIELD_PREP(FMC2_CFGR_CLKDIV, val);
+ regmap_update_bits(ebi->regmap, FMC2_CFGR, FMC2_CFGR_CLKDIV, val);
+ } else {
+ val = setup ? clamp_val(setup - 1, 1, FMC2_BTR_CLKDIV_MAX) : 1;
+ val = FIELD_PREP(FMC2_BTR_CLKDIV, val);
+ regmap_update_bits(ebi->regmap, FMC2_BTR(cs), FMC2_BTR_CLKDIV, val);
+ }
+
+ return 0;
+}
+
static int stm32_fmc2_ebi_set_data_latency(struct stm32_fmc2_ebi *ebi,
const struct stm32_fmc2_prop *prop,
int cs, u32 setup)
@@ -693,11 +872,14 @@ static int stm32_fmc2_ebi_set_max_low_pulse(struct stm32_fmc2_ebi *ebi,
int cs, u32 setup)
{
u32 old_val, new_val, pcscntr;
+ int ret;
if (setup < 1)
return 0;
- regmap_read(ebi->regmap, FMC2_PCSCNTR, &pcscntr);
+ ret = regmap_read(ebi->regmap, FMC2_PCSCNTR, &pcscntr);
+ if (ret)
+ return ret;
/* Enable counter for the bank */
regmap_update_bits(ebi->regmap, FMC2_PCSCNTR,
@@ -717,6 +899,27 @@ static int stm32_fmc2_ebi_set_max_low_pulse(struct stm32_fmc2_ebi *ebi,
return 0;
}
+static int stm32_fmc2_ebi_mp25_set_max_low_pulse(struct stm32_fmc2_ebi *ebi,
+ const struct stm32_fmc2_prop *prop,
+ int cs, u32 setup)
+{
+ u32 val;
+
+ if (setup == FMC2_CSCOUNT_0)
+ val = FIELD_PREP(FMC2_BCR_CSCOUNT, FMC2_BCR_CSCOUNT_0);
+ else if (setup == FMC2_CSCOUNT_1)
+ val = FIELD_PREP(FMC2_BCR_CSCOUNT, FMC2_BCR_CSCOUNT_1);
+ else if (setup <= FMC2_CSCOUNT_64)
+ val = FIELD_PREP(FMC2_BCR_CSCOUNT, FMC2_BCR_CSCOUNT_64);
+ else
+ val = FIELD_PREP(FMC2_BCR_CSCOUNT, FMC2_BCR_CSCOUNT_256);
+
+ regmap_update_bits(ebi->regmap, FMC2_BCR(cs),
+ FMC2_BCR_CSCOUNT, val);
+
+ return 0;
+}
+
static const struct stm32_fmc2_prop stm32_fmc2_child_props[] = {
/* st,fmc2-ebi-cs-trans-type must be the first property */
{
@@ -882,6 +1085,275 @@ static const struct stm32_fmc2_prop stm32_fmc2_child_props[] = {
},
};
+static const struct stm32_fmc2_prop stm32_fmc2_mp25_child_props[] = {
+ /* st,fmc2-ebi-cs-trans-type must be the first property */
+ {
+ .name = "st,fmc2-ebi-cs-transaction-type",
+ .mprop = true,
+ .set = stm32_fmc2_ebi_set_trans_type,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-cclk-enable",
+ .bprop = true,
+ .reg_type = FMC2_REG_CFGR,
+ .reg_mask = FMC2_CFGR_CCLKEN,
+ .check = stm32_fmc2_ebi_mp25_check_cclk,
+ .set = stm32_fmc2_ebi_set_bit_field,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-mux-enable",
+ .bprop = true,
+ .reg_type = FMC2_REG_BCR,
+ .reg_mask = FMC2_BCR_MUXEN,
+ .check = stm32_fmc2_ebi_check_mux,
+ .set = stm32_fmc2_ebi_set_bit_field,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-buswidth",
+ .reset_val = FMC2_BUSWIDTH_16,
+ .set = stm32_fmc2_ebi_set_buswidth,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-waitpol-high",
+ .bprop = true,
+ .reg_type = FMC2_REG_BCR,
+ .reg_mask = FMC2_BCR_WAITPOL,
+ .set = stm32_fmc2_ebi_set_bit_field,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-waitcfg-enable",
+ .bprop = true,
+ .reg_type = FMC2_REG_BCR,
+ .reg_mask = FMC2_BCR_WAITCFG,
+ .check = stm32_fmc2_ebi_check_waitcfg,
+ .set = stm32_fmc2_ebi_set_bit_field,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-wait-enable",
+ .bprop = true,
+ .reg_type = FMC2_REG_BCR,
+ .reg_mask = FMC2_BCR_WAITEN,
+ .check = stm32_fmc2_ebi_check_sync_trans,
+ .set = stm32_fmc2_ebi_set_bit_field,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-asyncwait-enable",
+ .bprop = true,
+ .reg_type = FMC2_REG_BCR,
+ .reg_mask = FMC2_BCR_ASYNCWAIT,
+ .check = stm32_fmc2_ebi_check_async_trans,
+ .set = stm32_fmc2_ebi_set_bit_field,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-cpsize",
+ .check = stm32_fmc2_ebi_check_cpsize,
+ .set = stm32_fmc2_ebi_set_cpsize,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-byte-lane-setup-ns",
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_bl_setup,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-address-setup-ns",
+ .reg_type = FMC2_REG_BTR,
+ .reset_val = FMC2_BXTR_ADDSET_MAX,
+ .check = stm32_fmc2_ebi_check_async_trans,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_address_setup,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-address-hold-ns",
+ .reg_type = FMC2_REG_BTR,
+ .reset_val = FMC2_BXTR_ADDHLD_MAX,
+ .check = stm32_fmc2_ebi_check_address_hold,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_address_hold,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-data-setup-ns",
+ .reg_type = FMC2_REG_BTR,
+ .reset_val = FMC2_BXTR_DATAST_MAX,
+ .check = stm32_fmc2_ebi_check_async_trans,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_data_setup,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-bus-turnaround-ns",
+ .reg_type = FMC2_REG_BTR,
+ .reset_val = FMC2_BXTR_BUSTURN_MAX + 1,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_bus_turnaround,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-data-hold-ns",
+ .reg_type = FMC2_REG_BTR,
+ .check = stm32_fmc2_ebi_check_async_trans,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_data_hold,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-clk-period-ns",
+ .reset_val = FMC2_CFGR_CLKDIV_MAX + 1,
+ .check = stm32_fmc2_ebi_mp25_check_clk_period,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_mp25_set_clk_period,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-data-latency-ns",
+ .check = stm32_fmc2_ebi_check_sync_trans,
+ .calculate = stm32_fmc2_ebi_mp25_ns_to_clk_period,
+ .set = stm32_fmc2_ebi_set_data_latency,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-write-address-setup-ns",
+ .reg_type = FMC2_REG_BWTR,
+ .reset_val = FMC2_BXTR_ADDSET_MAX,
+ .check = stm32_fmc2_ebi_check_async_trans,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_address_setup,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-write-address-hold-ns",
+ .reg_type = FMC2_REG_BWTR,
+ .reset_val = FMC2_BXTR_ADDHLD_MAX,
+ .check = stm32_fmc2_ebi_check_address_hold,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_address_hold,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-write-data-setup-ns",
+ .reg_type = FMC2_REG_BWTR,
+ .reset_val = FMC2_BXTR_DATAST_MAX,
+ .check = stm32_fmc2_ebi_check_async_trans,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_data_setup,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-write-bus-turnaround-ns",
+ .reg_type = FMC2_REG_BWTR,
+ .reset_val = FMC2_BXTR_BUSTURN_MAX + 1,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_bus_turnaround,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-write-data-hold-ns",
+ .reg_type = FMC2_REG_BWTR,
+ .check = stm32_fmc2_ebi_check_async_trans,
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_set_data_hold,
+ },
+ {
+ .name = "st,fmc2-ebi-cs-max-low-pulse-ns",
+ .calculate = stm32_fmc2_ebi_ns_to_clock_cycles,
+ .set = stm32_fmc2_ebi_mp25_set_max_low_pulse,
+ },
+};
+
+static int stm32_fmc2_ebi_mp25_check_rif(struct stm32_fmc2_ebi *ebi, u32 resource)
+{
+ u32 seccfgr, cidcfgr, semcr;
+ int cid, ret;
+
+ if (resource >= FMC2_MAX_RESOURCES)
+ return -EINVAL;
+
+ ret = regmap_read(ebi->regmap, FMC2_SECCFGR, &seccfgr);
+ if (ret)
+ return ret;
+
+ if (seccfgr & BIT(resource)) {
+ if (resource)
+ dev_err(ebi->dev, "resource %d is configured as secure\n",
+ resource);
+
+ return -EACCES;
+ }
+
+ ret = regmap_read(ebi->regmap, FMC2_CIDCFGR(resource), &cidcfgr);
+ if (ret)
+ return ret;
+
+ if (!(cidcfgr & FMC2_CIDCFGR_CFEN))
+ /* CID filtering is turned off: access granted */
+ return 0;
+
+ if (!(cidcfgr & FMC2_CIDCFGR_SEMEN)) {
+ /* Static CID mode */
+ cid = FIELD_GET(FMC2_CIDCFGR_SCID, cidcfgr);
+ if (cid != FMC2_CID1) {
+ if (resource)
+ dev_err(ebi->dev, "static CID%d set for resource %d\n",
+ cid, resource);
+
+ return -EACCES;
+ }
+
+ return 0;
+ }
+
+ /* Pass-list with semaphore mode */
+ if (!(cidcfgr & FMC2_CIDCFGR_SEMWLC1)) {
+ if (resource)
+ dev_err(ebi->dev, "CID1 is block-listed for resource %d\n",
+ resource);
+
+ return -EACCES;
+ }
+
+ ret = regmap_read(ebi->regmap, FMC2_SEMCR(resource), &semcr);
+ if (ret)
+ return ret;
+
+ if (!(semcr & FMC2_SEMCR_SEM_MUTEX)) {
+ regmap_update_bits(ebi->regmap, FMC2_SEMCR(resource),
+ FMC2_SEMCR_SEM_MUTEX, FMC2_SEMCR_SEM_MUTEX);
+
+ ret = regmap_read(ebi->regmap, FMC2_SEMCR(resource), &semcr);
+ if (ret)
+ return ret;
+ }
+
+ cid = FIELD_GET(FMC2_SEMCR_SEMCID, semcr);
+ if (cid != FMC2_CID1) {
+ if (resource)
+ dev_err(ebi->dev, "resource %d is already used by CID%d\n",
+ resource, cid);
+
+ return -EACCES;
+ }
+
+ ebi->sem_taken |= BIT(resource);
+
+ return 0;
+}
+
+static void stm32_fmc2_ebi_mp25_put_sems(struct stm32_fmc2_ebi *ebi)
+{
+ unsigned int resource;
+
+ for (resource = 0; resource < FMC2_MAX_RESOURCES; resource++) {
+ if (!(ebi->sem_taken & BIT(resource)))
+ continue;
+
+ regmap_update_bits(ebi->regmap, FMC2_SEMCR(resource),
+ FMC2_SEMCR_SEM_MUTEX, 0);
+ }
+}
+
+static void stm32_fmc2_ebi_mp25_get_sems(struct stm32_fmc2_ebi *ebi)
+{
+ unsigned int resource;
+
+ for (resource = 0; resource < FMC2_MAX_RESOURCES; resource++) {
+ if (!(ebi->sem_taken & BIT(resource)))
+ continue;
+
+ regmap_update_bits(ebi->regmap, FMC2_SEMCR(resource),
+ FMC2_SEMCR_SEM_MUTEX, FMC2_SEMCR_SEM_MUTEX);
+ }
+}
+
static int stm32_fmc2_ebi_parse_prop(struct stm32_fmc2_ebi *ebi,
struct device_node *dev_node,
const struct stm32_fmc2_prop *prop,
@@ -944,17 +1416,48 @@ static void stm32_fmc2_ebi_disable_bank(struct stm32_fmc2_ebi *ebi, int cs)
regmap_update_bits(ebi->regmap, FMC2_BCR(cs), FMC2_BCR_MBKEN, 0);
}
-static void stm32_fmc2_ebi_save_setup(struct stm32_fmc2_ebi *ebi)
+static int stm32_fmc2_ebi_save_setup(struct stm32_fmc2_ebi *ebi)
{
unsigned int cs;
+ int ret;
for (cs = 0; cs < FMC2_MAX_EBI_CE; cs++) {
- regmap_read(ebi->regmap, FMC2_BCR(cs), &ebi->bcr[cs]);
- regmap_read(ebi->regmap, FMC2_BTR(cs), &ebi->btr[cs]);
- regmap_read(ebi->regmap, FMC2_BWTR(cs), &ebi->bwtr[cs]);
+ if (!(ebi->bank_assigned & BIT(cs)))
+ continue;
+
+ ret = regmap_read(ebi->regmap, FMC2_BCR(cs), &ebi->bcr[cs]);
+ ret |= regmap_read(ebi->regmap, FMC2_BTR(cs), &ebi->btr[cs]);
+ ret |= regmap_read(ebi->regmap, FMC2_BWTR(cs), &ebi->bwtr[cs]);
+ if (ret)
+ return ret;
}
- regmap_read(ebi->regmap, FMC2_PCSCNTR, &ebi->pcscntr);
+ return 0;
+}
+
+static int stm32_fmc2_ebi_mp1_save_setup(struct stm32_fmc2_ebi *ebi)
+{
+ int ret;
+
+ ret = stm32_fmc2_ebi_save_setup(ebi);
+ if (ret)
+ return ret;
+
+ return regmap_read(ebi->regmap, FMC2_PCSCNTR, &ebi->pcscntr);
+}
+
+static int stm32_fmc2_ebi_mp25_save_setup(struct stm32_fmc2_ebi *ebi)
+{
+ int ret;
+
+ ret = stm32_fmc2_ebi_save_setup(ebi);
+ if (ret)
+ return ret;
+
+ if (ebi->access_granted)
+ ret = regmap_read(ebi->regmap, FMC2_CFGR, &ebi->cfgr);
+
+ return ret;
}
static void stm32_fmc2_ebi_set_setup(struct stm32_fmc2_ebi *ebi)
@@ -962,14 +1465,29 @@ static void stm32_fmc2_ebi_set_setup(struct stm32_fmc2_ebi *ebi)
unsigned int cs;
for (cs = 0; cs < FMC2_MAX_EBI_CE; cs++) {
+ if (!(ebi->bank_assigned & BIT(cs)))
+ continue;
+
regmap_write(ebi->regmap, FMC2_BCR(cs), ebi->bcr[cs]);
regmap_write(ebi->regmap, FMC2_BTR(cs), ebi->btr[cs]);
regmap_write(ebi->regmap, FMC2_BWTR(cs), ebi->bwtr[cs]);
}
+}
+static void stm32_fmc2_ebi_mp1_set_setup(struct stm32_fmc2_ebi *ebi)
+{
+ stm32_fmc2_ebi_set_setup(ebi);
regmap_write(ebi->regmap, FMC2_PCSCNTR, ebi->pcscntr);
}
+static void stm32_fmc2_ebi_mp25_set_setup(struct stm32_fmc2_ebi *ebi)
+{
+ stm32_fmc2_ebi_set_setup(ebi);
+
+ if (ebi->access_granted)
+ regmap_write(ebi->regmap, FMC2_CFGR, ebi->cfgr);
+}
+
static void stm32_fmc2_ebi_disable_banks(struct stm32_fmc2_ebi *ebi)
{
unsigned int cs;
@@ -983,33 +1501,48 @@ static void stm32_fmc2_ebi_disable_banks(struct stm32_fmc2_ebi *ebi)
}
/* NWAIT signal can not be connected to EBI controller and NAND controller */
-static bool stm32_fmc2_ebi_nwait_used_by_ctrls(struct stm32_fmc2_ebi *ebi)
+static int stm32_fmc2_ebi_nwait_used_by_ctrls(struct stm32_fmc2_ebi *ebi)
{
+ struct device *dev = ebi->dev;
unsigned int cs;
u32 bcr;
+ int ret;
for (cs = 0; cs < FMC2_MAX_EBI_CE; cs++) {
if (!(ebi->bank_assigned & BIT(cs)))
continue;
- regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ ret = regmap_read(ebi->regmap, FMC2_BCR(cs), &bcr);
+ if (ret)
+ return ret;
+
if ((bcr & FMC2_BCR_WAITEN || bcr & FMC2_BCR_ASYNCWAIT) &&
- ebi->bank_assigned & BIT(FMC2_NAND))
- return true;
+ ebi->bank_assigned & BIT(FMC2_NAND)) {
+ dev_err(dev, "NWAIT signal connected to EBI and NAND controllers\n");
+ return -EINVAL;
+ }
}
- return false;
+ return 0;
}
static void stm32_fmc2_ebi_enable(struct stm32_fmc2_ebi *ebi)
{
- regmap_update_bits(ebi->regmap, FMC2_BCR1,
- FMC2_BCR1_FMC2EN, FMC2_BCR1_FMC2EN);
+ if (!ebi->access_granted)
+ return;
+
+ regmap_update_bits(ebi->regmap, ebi->data->fmc2_enable_reg,
+ ebi->data->fmc2_enable_bit,
+ ebi->data->fmc2_enable_bit);
}
static void stm32_fmc2_ebi_disable(struct stm32_fmc2_ebi *ebi)
{
- regmap_update_bits(ebi->regmap, FMC2_BCR1, FMC2_BCR1_FMC2EN, 0);
+ if (!ebi->access_granted)
+ return;
+
+ regmap_update_bits(ebi->regmap, ebi->data->fmc2_enable_reg,
+ ebi->data->fmc2_enable_bit, 0);
}
static int stm32_fmc2_ebi_setup_cs(struct stm32_fmc2_ebi *ebi,
@@ -1021,8 +1554,8 @@ static int stm32_fmc2_ebi_setup_cs(struct stm32_fmc2_ebi *ebi,
stm32_fmc2_ebi_disable_bank(ebi, cs);
- for (i = 0; i < ARRAY_SIZE(stm32_fmc2_child_props); i++) {
- const struct stm32_fmc2_prop *p = &stm32_fmc2_child_props[i];
+ for (i = 0; i < ebi->data->nb_child_props; i++) {
+ const struct stm32_fmc2_prop *p = &ebi->data->child_props[i];
ret = stm32_fmc2_ebi_parse_prop(ebi, dev_node, p, cs);
if (ret) {
@@ -1066,6 +1599,15 @@ static int stm32_fmc2_ebi_parse_dt(struct stm32_fmc2_ebi *ebi)
return -EINVAL;
}
+ if (ebi->data->check_rif) {
+ ret = ebi->data->check_rif(ebi, bank + 1);
+ if (ret) {
+ dev_err(dev, "bank access failed: %d\n", bank);
+ of_node_put(child);
+ return ret;
+ }
+ }
+
if (bank < FMC2_MAX_EBI_CE) {
ret = stm32_fmc2_ebi_setup_cs(ebi, child, bank);
if (ret) {
@@ -1085,9 +1627,10 @@ static int stm32_fmc2_ebi_parse_dt(struct stm32_fmc2_ebi *ebi)
return -ENODEV;
}
- if (stm32_fmc2_ebi_nwait_used_by_ctrls(ebi)) {
- dev_err(dev, "NWAIT signal connected to EBI and NAND controllers\n");
- return -EINVAL;
+ if (ebi->data->nwait_used_by_ctrls) {
+ ret = ebi->data->nwait_used_by_ctrls(ebi);
+ if (ret)
+ return ret;
}
stm32_fmc2_ebi_enable(ebi);
@@ -1107,6 +1650,11 @@ static int stm32_fmc2_ebi_probe(struct platform_device *pdev)
return -ENOMEM;
ebi->dev = dev;
+ platform_set_drvdata(pdev, ebi);
+
+ ebi->data = of_device_get_match_data(dev);
+ if (!ebi->data)
+ return -EINVAL;
ebi->regmap = device_node_to_regmap(dev->of_node);
if (IS_ERR(ebi->regmap))
@@ -1120,28 +1668,57 @@ static int stm32_fmc2_ebi_probe(struct platform_device *pdev)
if (PTR_ERR(rstc) == -EPROBE_DEFER)
return -EPROBE_DEFER;
- ret = clk_prepare_enable(ebi->clk);
+ ret = devm_pm_runtime_enable(dev);
if (ret)
return ret;
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ return ret;
+
if (!IS_ERR(rstc)) {
reset_control_assert(rstc);
reset_control_deassert(rstc);
}
+ /* Check if CFGR register can be modified */
+ ebi->access_granted = true;
+ if (ebi->data->check_rif) {
+ ret = ebi->data->check_rif(ebi, 0);
+ if (ret) {
+ u32 sr;
+
+ ebi->access_granted = false;
+
+ ret = regmap_read(ebi->regmap, FMC2_SR, &sr);
+ if (ret)
+ goto err_release;
+
+ /* In case of CFGR is secure, just check that the FMC2 is enabled */
+ if (sr & FMC2_SR_ISOST) {
+ dev_err(dev, "FMC2 is not ready to be used.\n");
+ ret = -EACCES;
+ goto err_release;
+ }
+ }
+ }
+
ret = stm32_fmc2_ebi_parse_dt(ebi);
if (ret)
goto err_release;
- stm32_fmc2_ebi_save_setup(ebi);
- platform_set_drvdata(pdev, ebi);
+ ret = ebi->data->save_setup(ebi);
+ if (ret)
+ goto err_release;
return 0;
err_release:
stm32_fmc2_ebi_disable_banks(ebi);
stm32_fmc2_ebi_disable(ebi);
- clk_disable_unprepare(ebi->clk);
+ if (ebi->data->put_sems)
+ ebi->data->put_sems(ebi);
+ pm_runtime_put_sync_suspend(dev);
return ret;
}
@@ -1153,7 +1730,25 @@ static void stm32_fmc2_ebi_remove(struct platform_device *pdev)
of_platform_depopulate(&pdev->dev);
stm32_fmc2_ebi_disable_banks(ebi);
stm32_fmc2_ebi_disable(ebi);
+ if (ebi->data->put_sems)
+ ebi->data->put_sems(ebi);
+ pm_runtime_put_sync_suspend(&pdev->dev);
+}
+
+static int __maybe_unused stm32_fmc2_ebi_runtime_suspend(struct device *dev)
+{
+ struct stm32_fmc2_ebi *ebi = dev_get_drvdata(dev);
+
clk_disable_unprepare(ebi->clk);
+
+ return 0;
+}
+
+static int __maybe_unused stm32_fmc2_ebi_runtime_resume(struct device *dev)
+{
+ struct stm32_fmc2_ebi *ebi = dev_get_drvdata(dev);
+
+ return clk_prepare_enable(ebi->clk);
}
static int __maybe_unused stm32_fmc2_ebi_suspend(struct device *dev)
@@ -1161,7 +1756,9 @@ static int __maybe_unused stm32_fmc2_ebi_suspend(struct device *dev)
struct stm32_fmc2_ebi *ebi = dev_get_drvdata(dev);
stm32_fmc2_ebi_disable(ebi);
- clk_disable_unprepare(ebi->clk);
+ if (ebi->data->put_sems)
+ ebi->data->put_sems(ebi);
+ pm_runtime_put_sync_suspend(dev);
pinctrl_pm_select_sleep_state(dev);
return 0;
@@ -1174,21 +1771,55 @@ static int __maybe_unused stm32_fmc2_ebi_resume(struct device *dev)
pinctrl_pm_select_default_state(dev);
- ret = clk_prepare_enable(ebi->clk);
- if (ret)
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
return ret;
- stm32_fmc2_ebi_set_setup(ebi);
+ if (ebi->data->get_sems)
+ ebi->data->get_sems(ebi);
+ ebi->data->set_setup(ebi);
stm32_fmc2_ebi_enable(ebi);
return 0;
}
-static SIMPLE_DEV_PM_OPS(stm32_fmc2_ebi_pm_ops, stm32_fmc2_ebi_suspend,
- stm32_fmc2_ebi_resume);
+static const struct dev_pm_ops stm32_fmc2_ebi_pm_ops = {
+ SET_RUNTIME_PM_OPS(stm32_fmc2_ebi_runtime_suspend,
+ stm32_fmc2_ebi_runtime_resume, NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(stm32_fmc2_ebi_suspend, stm32_fmc2_ebi_resume)
+};
+
+static const struct stm32_fmc2_ebi_data stm32_fmc2_ebi_mp1_data = {
+ .child_props = stm32_fmc2_child_props,
+ .nb_child_props = ARRAY_SIZE(stm32_fmc2_child_props),
+ .fmc2_enable_reg = FMC2_BCR1,
+ .fmc2_enable_bit = FMC2_BCR1_FMC2EN,
+ .nwait_used_by_ctrls = stm32_fmc2_ebi_nwait_used_by_ctrls,
+ .set_setup = stm32_fmc2_ebi_mp1_set_setup,
+ .save_setup = stm32_fmc2_ebi_mp1_save_setup,
+};
+
+static const struct stm32_fmc2_ebi_data stm32_fmc2_ebi_mp25_data = {
+ .child_props = stm32_fmc2_mp25_child_props,
+ .nb_child_props = ARRAY_SIZE(stm32_fmc2_mp25_child_props),
+ .fmc2_enable_reg = FMC2_CFGR,
+ .fmc2_enable_bit = FMC2_CFGR_FMC2EN,
+ .set_setup = stm32_fmc2_ebi_mp25_set_setup,
+ .save_setup = stm32_fmc2_ebi_mp25_save_setup,
+ .check_rif = stm32_fmc2_ebi_mp25_check_rif,
+ .put_sems = stm32_fmc2_ebi_mp25_put_sems,
+ .get_sems = stm32_fmc2_ebi_mp25_get_sems,
+};
static const struct of_device_id stm32_fmc2_ebi_match[] = {
- {.compatible = "st,stm32mp1-fmc2-ebi"},
+ {
+ .compatible = "st,stm32mp1-fmc2-ebi",
+ .data = &stm32_fmc2_ebi_mp1_data,
+ },
+ {
+ .compatible = "st,stm32mp25-fmc2-ebi",
+ .data = &stm32_fmc2_ebi_mp25_data,
+ },
{}
};
MODULE_DEVICE_TABLE(of, stm32_fmc2_ebi_match);
diff --git a/drivers/memory/tegra/tegra234.c b/drivers/memory/tegra/tegra234.c
index abff87f917cb..5f57cea48b62 100644
--- a/drivers/memory/tegra/tegra234.c
+++ b/drivers/memory/tegra/tegra234.c
@@ -92,6 +92,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA0RDB,
.name = "dla0rdb",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_0,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA0,
.regs = {
.sid = {
@@ -102,6 +104,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA0RDB1,
.name = "dla0rdb1",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_0,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA0,
.regs = {
.sid = {
@@ -112,6 +116,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA0WRB,
.name = "dla0wrb",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_0,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA0,
.regs = {
.sid = {
@@ -121,7 +127,9 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
},
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA1RDB,
- .name = "dla0rdb",
+ .name = "dla1rdb",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_1,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA1,
.regs = {
.sid = {
@@ -407,7 +415,9 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
},
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA1RDB1,
- .name = "dla0rdb1",
+ .name = "dla1rdb1",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_1,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA1,
.regs = {
.sid = {
@@ -417,7 +427,9 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
},
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA1WRB,
- .name = "dla0wrb",
+ .name = "dla1wrb",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_1,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA1,
.regs = {
.sid = {
@@ -539,7 +551,7 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
.bpmp_id = TEGRA_ICC_BPMP_NVJPG_0,
.type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVJPG,
- .regs = {
+ .regs = {
.sid = {
.override = 0x3f8,
.security = 0x3fc,
@@ -660,6 +672,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA0RDA,
.name = "dla0rda",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_0,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA0,
.regs = {
.sid = {
@@ -670,6 +684,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA0FALRDB,
.name = "dla0falrdb",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_0,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA0,
.regs = {
.sid = {
@@ -680,6 +696,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA0WRA,
.name = "dla0wra",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_0,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA0,
.regs = {
.sid = {
@@ -690,6 +708,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA0FALWRB,
.name = "dla0falwrb",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_0,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA0,
.regs = {
.sid = {
@@ -699,7 +719,9 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
},
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA1RDA,
- .name = "dla0rda",
+ .name = "dla1rda",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_1,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA1,
.regs = {
.sid = {
@@ -709,7 +731,9 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
},
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA1FALRDB,
- .name = "dla0falrdb",
+ .name = "dla1falrdb",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_1,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA1,
.regs = {
.sid = {
@@ -719,7 +743,9 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
},
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA1WRA,
- .name = "dla0wra",
+ .name = "dla1wra",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_1,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA1,
.regs = {
.sid = {
@@ -729,7 +755,9 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
},
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA1FALWRB,
- .name = "dla0falwrb",
+ .name = "dla1falwrb",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_1,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA1,
.regs = {
.sid = {
@@ -908,6 +936,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA0RDA1,
.name = "dla0rda1",
+ .bpmp_id = TEGRA_ICC_BPMP_DLA_0,
+ .type = TEGRA_ICC_NISO,
.sid = TEGRA234_SID_NVDLA0,
.regs = {
.sid = {
@@ -917,7 +947,7 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
},
}, {
.id = TEGRA234_MEMORY_CLIENT_DLA1RDA1,
- .name = "dla0rda1",
+ .name = "dla1rda1",
.sid = TEGRA234_SID_NVDLA1,
.regs = {
.sid = {
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 04115cd92433..47a314a4eb6f 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2078,6 +2078,12 @@ static const struct blk_mq_ops msb_mq_ops = {
static int msb_init_disk(struct memstick_dev *card)
{
struct msb_data *msb = memstick_get_drvdata(card);
+ struct queue_limits lim = {
+ .logical_block_size = msb->page_size,
+ .max_hw_sectors = MS_BLOCK_MAX_PAGES,
+ .max_segments = MS_BLOCK_MAX_SEGS,
+ .max_segment_size = MS_BLOCK_MAX_PAGES * msb->page_size,
+ };
int rc;
unsigned long capacity;
@@ -2093,19 +2099,13 @@ static int msb_init_disk(struct memstick_dev *card)
if (rc)
goto out_release_id;
- msb->disk = blk_mq_alloc_disk(&msb->tag_set, card);
+ msb->disk = blk_mq_alloc_disk(&msb->tag_set, &lim, card);
if (IS_ERR(msb->disk)) {
rc = PTR_ERR(msb->disk);
goto out_free_tag_set;
}
msb->queue = msb->disk->queue;
- blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES);
- blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS);
- blk_queue_max_segment_size(msb->queue,
- MS_BLOCK_MAX_PAGES * msb->page_size);
- blk_queue_logical_block_size(msb->queue, msb->page_size);
-
sprintf(msb->disk->disk_name, "msblk%d", msb->disk_id);
msb->disk->fops = &msb_bdops;
msb->disk->private_data = msb;
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 5a69ed33999b..49accfdc89d6 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1103,6 +1103,12 @@ static const struct blk_mq_ops mspro_mq_ops = {
static int mspro_block_init_disk(struct memstick_dev *card)
{
struct mspro_block_data *msb = memstick_get_drvdata(card);
+ struct queue_limits lim = {
+ .logical_block_size = msb->page_size,
+ .max_hw_sectors = MSPRO_BLOCK_MAX_PAGES,
+ .max_segments = MSPRO_BLOCK_MAX_SEGS,
+ .max_segment_size = MSPRO_BLOCK_MAX_PAGES * msb->page_size,
+ };
struct mspro_devinfo *dev_info = NULL;
struct mspro_sys_info *sys_info = NULL;
struct mspro_sys_attr *s_attr = NULL;
@@ -1138,18 +1144,13 @@ static int mspro_block_init_disk(struct memstick_dev *card)
if (rc)
goto out_release_id;
- msb->disk = blk_mq_alloc_disk(&msb->tag_set, card);
+ msb->disk = blk_mq_alloc_disk(&msb->tag_set, &lim, card);
if (IS_ERR(msb->disk)) {
rc = PTR_ERR(msb->disk);
goto out_free_tag_set;
}
msb->queue = msb->disk->queue;
- blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
- blk_queue_max_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
- blk_queue_max_segment_size(msb->queue,
- MSPRO_BLOCK_MAX_PAGES * msb->page_size);
-
msb->disk->major = major;
msb->disk->first_minor = disk_id << MSPRO_BLOCK_PART_SHIFT;
msb->disk->minors = 1 << MSPRO_BLOCK_PART_SHIFT;
@@ -1158,8 +1159,6 @@ static int mspro_block_init_disk(struct memstick_dev *card)
sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
- blk_queue_logical_block_size(msb->queue, msb->page_size);
-
capacity = be16_to_cpu(sys_info->user_block_count);
capacity *= be16_to_cpu(sys_info->block_size);
capacity *= msb->page_size >> 9;
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 1c6c62a7f7f5..dbd26c3b245b 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -263,7 +263,6 @@ struct fastrpc_channel_ctx {
int domain_id;
int sesscount;
int vmcount;
- u64 perms;
struct qcom_scm_vmperm vmperms[FASTRPC_MAX_VMIDS];
struct rpmsg_device *rpdev;
struct fastrpc_session_ctx session[FASTRPC_MAX_SESSIONS];
@@ -1279,9 +1278,11 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl,
/* Map if we have any heap VMIDs associated with this ADSP Static Process. */
if (fl->cctx->vmcount) {
+ u64 src_perms = BIT(QCOM_SCM_VMID_HLOS);
+
err = qcom_scm_assign_mem(fl->cctx->remote_heap->phys,
(u64)fl->cctx->remote_heap->size,
- &fl->cctx->perms,
+ &src_perms,
fl->cctx->vmperms, fl->cctx->vmcount);
if (err) {
dev_err(fl->sctx->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d",
@@ -1915,8 +1916,10 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
/* Add memory to static PD pool, protection thru hypervisor */
if (req.flags == ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) {
+ u64 src_perms = BIT(QCOM_SCM_VMID_HLOS);
+
err = qcom_scm_assign_mem(buf->phys, (u64)buf->size,
- &fl->cctx->perms, fl->cctx->vmperms, fl->cctx->vmcount);
+ &src_perms, fl->cctx->vmperms, fl->cctx->vmcount);
if (err) {
dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
buf->phys, buf->size, err);
@@ -2191,7 +2194,7 @@ static int fastrpc_cb_remove(struct platform_device *pdev)
int i;
spin_lock_irqsave(&cctx->lock, flags);
- for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) {
+ for (i = 0; i < FASTRPC_MAX_SESSIONS; i++) {
if (cctx->session[i].sid == sess->sid) {
cctx->session[i].valid = false;
cctx->sesscount--;
@@ -2290,7 +2293,6 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
if (vmcount) {
data->vmcount = vmcount;
- data->perms = BIT(QCOM_SCM_VMID_HLOS);
for (i = 0; i < data->vmcount; i++) {
data->vmperms[i].vmid = vmids[i];
data->vmperms[i].perm = QCOM_SCM_PERM_RWX;
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
index c6eb27d46cb0..15119584473c 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
@@ -198,8 +198,14 @@ static int lis3lv02d_i2c_suspend(struct device *dev)
struct i2c_client *client = to_i2c_client(dev);
struct lis3lv02d *lis3 = i2c_get_clientdata(client);
- if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+ /* Turn on for wakeup if turned off by runtime suspend */
+ if (lis3->pdata && lis3->pdata->wakeup_flags) {
+ if (pm_runtime_suspended(dev))
+ lis3lv02d_poweron(lis3);
+ /* For non wakeup turn off if not already turned off by runtime suspend */
+ } else if (!pm_runtime_suspended(dev))
lis3lv02d_poweroff(lis3);
+
return 0;
}
@@ -208,13 +214,12 @@ static int lis3lv02d_i2c_resume(struct device *dev)
struct i2c_client *client = to_i2c_client(dev);
struct lis3lv02d *lis3 = i2c_get_clientdata(client);
- /*
- * pm_runtime documentation says that devices should always
- * be powered on at resume. Pm_runtime turns them off after system
- * wide resume is complete.
- */
- if (!lis3->pdata || !lis3->pdata->wakeup_flags ||
- pm_runtime_suspended(dev))
+ /* Turn back off if turned on for wakeup and runtime suspended*/
+ if (lis3->pdata && lis3->pdata->wakeup_flags) {
+ if (pm_runtime_suspended(dev))
+ lis3lv02d_poweroff(lis3);
+ /* For non wakeup turn back on if not runtime suspended */
+ } else if (!pm_runtime_suspended(dev))
lis3lv02d_poweron(lis3);
return 0;
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index b080eb2335eb..b92767d6bdd2 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -294,10 +294,11 @@ static void lkdtm_SPINLOCKUP(void)
__release(&lock_me_up);
}
-static void lkdtm_HUNG_TASK(void)
+static void __noreturn lkdtm_HUNG_TASK(void)
{
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
+ BUG();
}
static volatile unsigned int huge = INT_MAX - 2;
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 0772e4a4757e..5732fd59a227 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -153,12 +153,17 @@ static const struct crashtype *find_crashtype(const char *name)
/*
* This is forced noinline just so it distinctly shows up in the stackdump
* which makes validation of expected lkdtm crashes easier.
+ *
+ * NOTE: having a valid return value helps prevent the compiler from doing
+ * tail call optimizations and taking this out of the stack trace.
*/
-static noinline void lkdtm_do_action(const struct crashtype *crashtype)
+static noinline int lkdtm_do_action(const struct crashtype *crashtype)
{
if (WARN_ON(!crashtype || !crashtype->func))
- return;
+ return -EINVAL;
crashtype->func();
+
+ return 0;
}
static int lkdtm_register_cpoint(struct crashpoint *crashpoint,
@@ -167,10 +172,8 @@ static int lkdtm_register_cpoint(struct crashpoint *crashpoint,
int ret;
/* If this doesn't have a symbol, just call immediately. */
- if (!crashpoint->kprobe.symbol_name) {
- lkdtm_do_action(crashtype);
- return 0;
- }
+ if (!crashpoint->kprobe.symbol_name)
+ return lkdtm_do_action(crashtype);
if (lkdtm_kprobe != NULL)
unregister_kprobe(lkdtm_kprobe);
@@ -216,7 +219,7 @@ static int lkdtm_kprobe_handler(struct kprobe *kp, struct pt_regs *regs)
spin_unlock_irqrestore(&crash_count_lock, flags);
if (do_it)
- lkdtm_do_action(lkdtm_crashtype);
+ return lkdtm_do_action(lkdtm_crashtype);
return 0;
}
@@ -303,6 +306,7 @@ static ssize_t direct_entry(struct file *f, const char __user *user_buf,
{
const struct crashtype *crashtype;
char *buf;
+ int err;
if (count >= PAGE_SIZE)
return -EINVAL;
@@ -326,9 +330,11 @@ static ssize_t direct_entry(struct file *f, const char __user *user_buf,
return -EINVAL;
pr_info("Performing direct entry %s\n", crashtype->name);
- lkdtm_do_action(crashtype);
+ err = lkdtm_do_action(crashtype);
*off += count;
+ if (err)
+ return err;
return count;
}
diff --git a/drivers/misc/lkdtm/heap.c b/drivers/misc/lkdtm/heap.c
index 4f467d3972a6..b1b316f99703 100644
--- a/drivers/misc/lkdtm/heap.c
+++ b/drivers/misc/lkdtm/heap.c
@@ -48,7 +48,7 @@ static void lkdtm_VMALLOC_LINEAR_OVERFLOW(void)
* correctly.
*
* This should get caught by either memory tagging, KASan, or by using
- * CONFIG_SLUB_DEBUG=y and slub_debug=ZF (or CONFIG_SLUB_DEBUG_ON=y).
+ * CONFIG_SLUB_DEBUG=y and slab_debug=ZF (or CONFIG_SLUB_DEBUG_ON=y).
*/
static void lkdtm_SLAB_LINEAR_OVERFLOW(void)
{
diff --git a/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c b/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c
index be52b113aea9..89364bdbb129 100644
--- a/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c
+++ b/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c
@@ -96,7 +96,8 @@ static const struct component_master_ops mei_component_master_ops = {
*
* The function checks if the device is pci device and
* Intel VGA adapter, the subcomponent is SW Proxy
- * and the parent of MEI PCI and the parent of VGA are the same PCH device.
+ * and the VGA is on the bus 0 reserved for built-in devices
+ * to reject discrete GFX.
*
* @dev: master device
* @subcomponent: subcomponent to match (I915_COMPONENT_SWPROXY)
@@ -123,7 +124,8 @@ static int mei_gsc_proxy_component_match(struct device *dev, int subcomponent,
if (subcomponent != I915_COMPONENT_GSC_PROXY)
return 0;
- return component_compare_dev(dev->parent, ((struct device *)data)->parent);
+ /* Only built-in GFX */
+ return (pdev->bus->number == 0);
}
static int mei_gsc_proxy_probe(struct mei_cl_device *cldev,
@@ -146,7 +148,7 @@ static int mei_gsc_proxy_probe(struct mei_cl_device *cldev,
}
component_match_add_typed(&cldev->dev, &master_match,
- mei_gsc_proxy_component_match, cldev->dev.parent);
+ mei_gsc_proxy_component_match, NULL);
if (IS_ERR_OR_NULL(master_match)) {
ret = -ENOMEM;
goto err_exit;
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index 961e5d53a27a..aac36750d2c5 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -112,6 +112,8 @@
#define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */
#define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */
+#define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */
+#define MEI_DEV_ID_ARL_H 0x7770 /* Arrow Lake Point H */
/*
* MEI HW Section
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 676d566f38dd..8cf636c54032 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -119,6 +119,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
{MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_H, MEI_ME_PCH15_CFG)},
/* required last entry */
{0, }
diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c
index 6f4a4be6ccb5..55f7db490d3b 100644
--- a/drivers/misc/mei/vsc-tp.c
+++ b/drivers/misc/mei/vsc-tp.c
@@ -535,6 +535,7 @@ static const struct acpi_device_id vsc_tp_acpi_ids[] = {
{ "INTC1009" }, /* Raptor Lake */
{ "INTC1058" }, /* Tiger Lake */
{ "INTC1094" }, /* Alder Lake */
+ { "INTC10D0" }, /* Meteor Lake */
{}
};
MODULE_DEVICE_TABLE(acpi, vsc_tp_acpi_ids);
diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c
index 8aea2d070a40..d279a4f195e2 100644
--- a/drivers/misc/open-dice.c
+++ b/drivers/misc/open-dice.c
@@ -140,7 +140,6 @@ static int __init open_dice_probe(struct platform_device *pdev)
return -ENOMEM;
*drvdata = (struct open_dice_drvdata){
- .lock = __MUTEX_INITIALIZER(drvdata->lock),
.rmem = rmem,
.misc = (struct miscdevice){
.parent = dev,
@@ -150,6 +149,7 @@ static int __init open_dice_probe(struct platform_device *pdev)
.mode = 0600,
},
};
+ mutex_init(&drvdata->lock);
/* Index overflow check not needed, misc_register() will fail. */
snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++);
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c
index f50d22882476..3964d9e5a39b 100644
--- a/drivers/misc/vmw_vmci/vmci_datagram.c
+++ b/drivers/misc/vmw_vmci/vmci_datagram.c
@@ -224,8 +224,8 @@ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg)
return VMCI_ERROR_NO_MEM;
}
- dg_info = kmalloc(sizeof(*dg_info) +
- (size_t) dg->payload_size, GFP_ATOMIC);
+ dg_info = kmalloc(struct_size(dg_info, msg_payload, dg->payload_size),
+ GFP_ATOMIC);
if (!dg_info) {
atomic_dec(&delayed_dg_host_queue_size);
vmci_resource_put(resource);
@@ -234,7 +234,8 @@ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg)
dg_info->in_dg_host_queue = true;
dg_info->entry = dst_entry;
- memcpy(&dg_info->msg, dg, dg_size);
+ dg_info->msg = *dg;
+ memcpy(&dg_info->msg_payload, dg + 1, dg->payload_size);
INIT_WORK(&dg_info->work, dg_delayed_dispatch);
schedule_work(&dg_info->work);
@@ -377,7 +378,8 @@ int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg)
dg_info->in_dg_host_queue = false;
dg_info->entry = dst_entry;
- memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg));
+ dg_info->msg = *dg;
+ memcpy(&dg_info->msg_payload, dg + 1, dg->payload_size);
INIT_WORK(&dg_info->work, dg_delayed_dispatch);
schedule_work(&dg_info->work);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index f410bee50132..58ed7193a3ca 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1015,10 +1015,12 @@ static int mmc_select_bus_width(struct mmc_card *card)
static unsigned ext_csd_bits[] = {
EXT_CSD_BUS_WIDTH_8,
EXT_CSD_BUS_WIDTH_4,
+ EXT_CSD_BUS_WIDTH_1,
};
static unsigned bus_widths[] = {
MMC_BUS_WIDTH_8,
MMC_BUS_WIDTH_4,
+ MMC_BUS_WIDTH_1,
};
struct mmc_host *host = card->host;
unsigned idx, bus_width = 0;
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index a0a2412f62a7..2ae60d208cdf 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -174,8 +174,8 @@ static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp)
return sg;
}
-static void mmc_queue_setup_discard(struct request_queue *q,
- struct mmc_card *card)
+static void mmc_queue_setup_discard(struct mmc_card *card,
+ struct queue_limits *lim)
{
unsigned max_discard;
@@ -183,15 +183,17 @@ static void mmc_queue_setup_discard(struct request_queue *q,
if (!max_discard)
return;
- blk_queue_max_discard_sectors(q, max_discard);
- q->limits.discard_granularity = card->pref_erase << 9;
- /* granularity must not be greater than max. discard */
- if (card->pref_erase > max_discard)
- q->limits.discard_granularity = SECTOR_SIZE;
+ lim->max_hw_discard_sectors = max_discard;
if (mmc_can_secure_erase_trim(card))
- blk_queue_max_secure_erase_sectors(q, max_discard);
+ lim->max_secure_erase_sectors = max_discard;
if (mmc_can_trim(card) && card->erased_byte == 0)
- blk_queue_max_write_zeroes_sectors(q, max_discard);
+ lim->max_write_zeroes_sectors = max_discard;
+
+ /* granularity must not be greater than max. discard */
+ if (card->pref_erase > max_discard)
+ lim->discard_granularity = SECTOR_SIZE;
+ else
+ lim->discard_granularity = card->pref_erase << 9;
}
static unsigned short mmc_get_max_segments(struct mmc_host *host)
@@ -341,40 +343,53 @@ static const struct blk_mq_ops mmc_mq_ops = {
.timeout = mmc_mq_timed_out,
};
-static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
+static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq,
+ struct mmc_card *card)
{
struct mmc_host *host = card->host;
- unsigned block_size = 512;
+ struct queue_limits lim = { };
+ struct gendisk *disk;
- blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
if (mmc_can_erase(card))
- mmc_queue_setup_discard(mq->queue, card);
+ mmc_queue_setup_discard(card, &lim);
if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask)
- blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH);
- blk_queue_max_hw_sectors(mq->queue,
- min(host->max_blk_count, host->max_req_size / 512));
- if (host->can_dma_map_merge)
- WARN(!blk_queue_can_use_dma_map_merging(mq->queue,
- mmc_dev(host)),
- "merging was advertised but not possible");
- blk_queue_max_segments(mq->queue, mmc_get_max_segments(host));
-
- if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) {
- block_size = card->ext_csd.data_sector_size;
- WARN_ON(block_size != 512 && block_size != 4096);
- }
+ lim.bounce = BLK_BOUNCE_HIGH;
+
+ lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512);
+
+ if (mmc_card_mmc(card) && card->ext_csd.data_sector_size)
+ lim.logical_block_size = card->ext_csd.data_sector_size;
+ else
+ lim.logical_block_size = 512;
+
+ WARN_ON_ONCE(lim.logical_block_size != 512 &&
+ lim.logical_block_size != 4096);
- blk_queue_logical_block_size(mq->queue, block_size);
/*
- * After blk_queue_can_use_dma_map_merging() was called with succeed,
- * since it calls blk_queue_virt_boundary(), the mmc should not call
- * both blk_queue_max_segment_size().
+ * Setting a virt_boundary implicity sets a max_segment_size, so try
+ * to set the hardware one here.
*/
- if (!host->can_dma_map_merge)
- blk_queue_max_segment_size(mq->queue,
- round_down(host->max_seg_size, block_size));
+ if (host->can_dma_map_merge) {
+ lim.virt_boundary_mask = dma_get_merge_boundary(mmc_dev(host));
+ lim.max_segments = MMC_DMA_MAP_MERGE_SEGMENTS;
+ } else {
+ lim.max_segment_size =
+ round_down(host->max_seg_size, lim.logical_block_size);
+ lim.max_segments = host->max_segs;
+ }
+
+ disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq);
+ if (IS_ERR(disk))
+ return disk;
+ mq->queue = disk->queue;
+
+ if (mmc_host_is_spi(host) && host->use_spi_crc)
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue);
+ blk_queue_rq_timeout(mq->queue, 60 * HZ);
+
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue));
@@ -386,6 +401,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
init_waitqueue_head(&mq->wait);
mmc_crypto_setup_queue(mq->queue, host);
+ return disk;
}
static inline bool mmc_merge_capable(struct mmc_host *host)
@@ -447,18 +463,9 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
return ERR_PTR(ret);
- disk = blk_mq_alloc_disk(&mq->tag_set, mq);
- if (IS_ERR(disk)) {
+ disk = mmc_alloc_disk(mq, card);
+ if (IS_ERR(disk))
blk_mq_free_tag_set(&mq->tag_set);
- return disk;
- }
- mq->queue = disk->queue;
-
- if (mmc_host_is_spi(host) && host->use_spi_crc)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue);
- blk_queue_rq_timeout(mq->queue, 60 * HZ);
-
- mmc_setup_queue(mq, card);
return disk;
}
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 2a2d949a9344..39f45c2b6de8 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -75,11 +75,15 @@ EXPORT_SYMBOL(mmc_gpio_set_cd_irq);
int mmc_gpio_get_ro(struct mmc_host *host)
{
struct mmc_gpio *ctx = host->slot.handler_priv;
+ int cansleep;
if (!ctx || !ctx->ro_gpio)
return -ENOSYS;
- return gpiod_get_value_cansleep(ctx->ro_gpio);
+ cansleep = gpiod_cansleep(ctx->ro_gpio);
+ return cansleep ?
+ gpiod_get_value_cansleep(ctx->ro_gpio) :
+ gpiod_get_value(ctx->ro_gpio);
}
EXPORT_SYMBOL(mmc_gpio_get_ro);
diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
index 35067e1e6cd8..f5da7f9baa52 100644
--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
+++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
@@ -225,6 +225,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
struct scatterlist *sg;
int i;
+ host->dma_in_progress = true;
+
if (!host->variant->dma_lli || data->sg_len == 1 ||
idma->use_bounce_buffer) {
u32 dma_addr;
@@ -263,9 +265,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
return 0;
}
+static void sdmmc_idma_error(struct mmci_host *host)
+{
+ struct mmc_data *data = host->data;
+ struct sdmmc_idma *idma = host->dma_priv;
+
+ if (!dma_inprogress(host))
+ return;
+
+ writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR);
+ host->dma_in_progress = false;
+ data->host_cookie = 0;
+
+ if (!idma->use_bounce_buffer)
+ dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+ mmc_get_dma_dir(data));
+}
+
static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data)
{
+ if (!dma_inprogress(host))
+ return;
+
writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR);
+ host->dma_in_progress = false;
if (!data->host_cookie)
sdmmc_idma_unprep_data(host, data, 0);
@@ -676,6 +699,7 @@ static struct mmci_host_ops sdmmc_variant_ops = {
.dma_setup = sdmmc_idma_setup,
.dma_start = sdmmc_idma_start,
.dma_finalize = sdmmc_idma_finalize,
+ .dma_error = sdmmc_idma_error,
.set_clkreg = mmci_sdmmc_set_clkreg,
.set_pwrreg = mmci_sdmmc_set_pwrreg,
.busy_complete = sdmmc_busy_complete,
diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index 7bfee28116af..d4a02184784a 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -693,6 +693,35 @@ static int sdhci_pci_o2_init_sd_express(struct mmc_host *mmc, struct mmc_ios *io
return 0;
}
+static void sdhci_pci_o2_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd)
+{
+ struct sdhci_pci_chip *chip;
+ struct sdhci_pci_slot *slot = sdhci_priv(host);
+ u32 scratch_32 = 0;
+ u8 scratch_8 = 0;
+
+ chip = slot->chip;
+
+ if (mode == MMC_POWER_OFF) {
+ /* UnLock WP */
+ pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8);
+ scratch_8 &= 0x7f;
+ pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8);
+
+ /* Set PCR 0x354[16] to switch Clock Source back to OPE Clock */
+ pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32);
+ scratch_32 &= ~(O2_SD_SEL_DLL);
+ pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, scratch_32);
+
+ /* Lock WP */
+ pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8);
+ scratch_8 |= 0x80;
+ pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8);
+ }
+
+ sdhci_set_power(host, mode, vdd);
+}
+
static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
{
struct sdhci_pci_chip *chip;
@@ -1051,6 +1080,7 @@ static const struct sdhci_ops sdhci_pci_o2_ops = {
.set_bus_width = sdhci_set_bus_width,
.reset = sdhci_reset,
.set_uhs_signaling = sdhci_set_uhs_signaling,
+ .set_power = sdhci_pci_o2_set_power,
};
const struct sdhci_pci_fixes sdhci_o2 = {
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 8cf3a375de65..cc9d28b75eb9 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/ktime.h>
+#include <linux/iopoll.h>
#include <linux/of_address.h>
#include "sdhci-pltfm.h"
@@ -109,6 +110,8 @@
#define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18)
#define XENON_LOGIC_TIMING_VALUE 0x00AA8977
+#define XENON_MAX_PHY_TIMEOUT_LOOPS 100
+
/*
* List offset of PHY registers and some special register values
* in eMMC PHY 5.0 or eMMC PHY 5.1
@@ -216,6 +219,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host)
return 0;
}
+static int xenon_check_stability_internal_clk(struct sdhci_host *host)
+{
+ u32 reg;
+ int err;
+
+ err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE,
+ 1100, 20000, false, host, SDHCI_CLOCK_CONTROL);
+ if (err)
+ dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n");
+
+ return err;
+}
+
/*
* eMMC 5.0/5.1 PHY init/re-init.
* eMMC PHY init should be executed after:
@@ -232,6 +248,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
+ int ret = xenon_check_stability_internal_clk(host);
+
+ if (ret)
+ return ret;
+
reg = sdhci_readl(host, phy_regs->timing_adj);
reg |= XENON_PHY_INITIALIZAION;
sdhci_writel(host, reg, phy_regs->timing_adj);
@@ -259,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
/* get the wait time */
wait /= clock;
wait++;
- /* wait for host eMMC PHY init completes */
- udelay(wait);
- reg = sdhci_readl(host, phy_regs->timing_adj);
- reg &= XENON_PHY_INITIALIZAION;
- if (reg) {
+ /*
+ * AC5X spec says bit must be polled until zero.
+ * We see cases in which timeout can take longer
+ * than the standard calculation on AC5X, which is
+ * expected following the spec comment above.
+ * According to the spec, we must wait as long as
+ * it takes for that bit to toggle on AC5X.
+ * Cap that with 100 delay loops so we won't get
+ * stuck here forever:
+ */
+
+ ret = read_poll_timeout(sdhci_readl, reg,
+ !(reg & XENON_PHY_INITIALIZAION),
+ wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait,
+ false, host, phy_regs->timing_adj);
+ if (ret)
dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n",
- wait);
- return -ETIMEDOUT;
- }
+ wait * XENON_MAX_PHY_TIMEOUT_LOOPS);
- return 0;
+ return ret;
}
#define ARMADA_3700_SOC_PAD_1_8V 0x1
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index aa44a23ec045..97a00ec9a4d4 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -37,7 +37,7 @@
/* Info for the block device */
struct block2mtd_dev {
struct list_head list;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct mtd_info mtd;
struct mutex write_mutex;
};
@@ -55,8 +55,7 @@ static struct page *page_read(struct address_space *mapping, pgoff_t index)
/* erase a specified part of the device */
static int _block2mtd_erase(struct block2mtd_dev *dev, loff_t to, size_t len)
{
- struct address_space *mapping =
- dev->bdev_handle->bdev->bd_inode->i_mapping;
+ struct address_space *mapping = dev->bdev_file->f_mapping;
struct page *page;
pgoff_t index = to >> PAGE_SHIFT; // page index
int pages = len >> PAGE_SHIFT;
@@ -106,8 +105,7 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, u_char *buf)
{
struct block2mtd_dev *dev = mtd->priv;
- struct address_space *mapping =
- dev->bdev_handle->bdev->bd_inode->i_mapping;
+ struct address_space *mapping = dev->bdev_file->f_mapping;
struct page *page;
pgoff_t index = from >> PAGE_SHIFT;
int offset = from & (PAGE_SIZE-1);
@@ -142,8 +140,7 @@ static int _block2mtd_write(struct block2mtd_dev *dev, const u_char *buf,
loff_t to, size_t len, size_t *retlen)
{
struct page *page;
- struct address_space *mapping =
- dev->bdev_handle->bdev->bd_inode->i_mapping;
+ struct address_space *mapping = dev->bdev_file->f_mapping;
pgoff_t index = to >> PAGE_SHIFT; // page index
int offset = to & ~PAGE_MASK; // page offset
int cpylen;
@@ -198,7 +195,7 @@ static int block2mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
static void block2mtd_sync(struct mtd_info *mtd)
{
struct block2mtd_dev *dev = mtd->priv;
- sync_blockdev(dev->bdev_handle->bdev);
+ sync_blockdev(file_bdev(dev->bdev_file));
return;
}
@@ -210,10 +207,9 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
kfree(dev->mtd.name);
- if (dev->bdev_handle) {
- invalidate_mapping_pages(
- dev->bdev_handle->bdev->bd_inode->i_mapping, 0, -1);
- bdev_release(dev->bdev_handle);
+ if (dev->bdev_file) {
+ invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1);
+ fput(dev->bdev_file);
}
kfree(dev);
@@ -223,10 +219,10 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
* This function is marked __ref because it calls the __init marked
* early_lookup_bdev when called from the early boot code.
*/
-static struct bdev_handle __ref *mdtblock_early_get_bdev(const char *devname,
+static struct file __ref *mdtblock_early_get_bdev(const char *devname,
blk_mode_t mode, int timeout, struct block2mtd_dev *dev)
{
- struct bdev_handle *bdev_handle = ERR_PTR(-ENODEV);
+ struct file *bdev_file = ERR_PTR(-ENODEV);
#ifndef MODULE
int i;
@@ -234,7 +230,7 @@ static struct bdev_handle __ref *mdtblock_early_get_bdev(const char *devname,
* We can't use early_lookup_bdev from a running system.
*/
if (system_state >= SYSTEM_RUNNING)
- return bdev_handle;
+ return bdev_file;
/*
* We might not have the root device mounted at this point.
@@ -253,20 +249,20 @@ static struct bdev_handle __ref *mdtblock_early_get_bdev(const char *devname,
wait_for_device_probe();
if (!early_lookup_bdev(devname, &devt)) {
- bdev_handle = bdev_open_by_dev(devt, mode, dev, NULL);
- if (!IS_ERR(bdev_handle))
+ bdev_file = bdev_file_open_by_dev(devt, mode, dev, NULL);
+ if (!IS_ERR(bdev_file))
break;
}
}
#endif
- return bdev_handle;
+ return bdev_file;
}
static struct block2mtd_dev *add_device(char *devname, int erase_size,
char *label, int timeout)
{
const blk_mode_t mode = BLK_OPEN_READ | BLK_OPEN_WRITE;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct block_device *bdev;
struct block2mtd_dev *dev;
char *name;
@@ -279,16 +275,16 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size,
return NULL;
/* Get a handle on the device */
- bdev_handle = bdev_open_by_path(devname, mode, dev, NULL);
- if (IS_ERR(bdev_handle))
- bdev_handle = mdtblock_early_get_bdev(devname, mode, timeout,
+ bdev_file = bdev_file_open_by_path(devname, mode, dev, NULL);
+ if (IS_ERR(bdev_file))
+ bdev_file = mdtblock_early_get_bdev(devname, mode, timeout,
dev);
- if (IS_ERR(bdev_handle)) {
+ if (IS_ERR(bdev_file)) {
pr_err("error: cannot open device %s\n", devname);
goto err_free_block2mtd;
}
- dev->bdev_handle = bdev_handle;
- bdev = bdev_handle->bdev;
+ dev->bdev_file = bdev_file;
+ bdev = file_bdev(bdev_file);
if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
pr_err("attempting to use an MTD device as a block device\n");
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index f0526dcc2162..3caa0717d46c 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -277,6 +277,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
{
struct mtd_blktrans_ops *tr = new->tr;
struct mtd_blktrans_dev *d;
+ struct queue_limits lim = { };
int last_devnum = -1;
struct gendisk *gd;
int ret;
@@ -331,9 +332,13 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
if (ret)
goto out_kfree_tag_set;
+
+ lim.logical_block_size = tr->blksize;
+ if (tr->discard)
+ lim.max_hw_discard_sectors = UINT_MAX;
/* Create gendisk */
- gd = blk_mq_alloc_disk(new->tag_set, new);
+ gd = blk_mq_alloc_disk(new->tag_set, &lim, new);
if (IS_ERR(gd)) {
ret = PTR_ERR(gd);
goto out_free_tag_set;
@@ -371,14 +376,9 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
if (tr->flush)
blk_queue_write_cache(new->rq, true, false);
- blk_queue_logical_block_size(new->rq, tr->blksize);
-
blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
- if (tr->discard)
- blk_queue_max_discard_sectors(new->rq, UINT_MAX);
-
gd->queue = new->rq;
if (new->readonly)
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index e451b28840d5..5887feb347a4 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -621,6 +621,7 @@ static void mtd_check_of_node(struct mtd_info *mtd)
if (plen == mtd_name_len &&
!strncmp(mtd->name, pname + offset, plen)) {
mtd_set_of_node(mtd, mtd_dn);
+ of_node_put(mtd_dn);
break;
}
}
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index a46698744850..5b0f5a9cef81 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -290,16 +290,13 @@ static const struct marvell_hw_ecc_layout marvell_nfc_layouts[] = {
MARVELL_LAYOUT( 2048, 512, 4, 1, 1, 2048, 32, 30, 0, 0, 0),
MARVELL_LAYOUT( 2048, 512, 8, 2, 1, 1024, 0, 30,1024,32, 30),
MARVELL_LAYOUT( 2048, 512, 8, 2, 1, 1024, 0, 30,1024,64, 30),
- MARVELL_LAYOUT( 2048, 512, 12, 3, 2, 704, 0, 30,640, 0, 30),
- MARVELL_LAYOUT( 2048, 512, 16, 5, 4, 512, 0, 30, 0, 32, 30),
+ MARVELL_LAYOUT( 2048, 512, 16, 4, 4, 512, 0, 30, 0, 32, 30),
MARVELL_LAYOUT( 4096, 512, 4, 2, 2, 2048, 32, 30, 0, 0, 0),
- MARVELL_LAYOUT( 4096, 512, 8, 5, 4, 1024, 0, 30, 0, 64, 30),
- MARVELL_LAYOUT( 4096, 512, 12, 6, 5, 704, 0, 30,576, 32, 30),
- MARVELL_LAYOUT( 4096, 512, 16, 9, 8, 512, 0, 30, 0, 32, 30),
+ MARVELL_LAYOUT( 4096, 512, 8, 4, 4, 1024, 0, 30, 0, 64, 30),
+ MARVELL_LAYOUT( 4096, 512, 16, 8, 8, 512, 0, 30, 0, 32, 30),
MARVELL_LAYOUT( 8192, 512, 4, 4, 4, 2048, 0, 30, 0, 0, 0),
- MARVELL_LAYOUT( 8192, 512, 8, 9, 8, 1024, 0, 30, 0, 160, 30),
- MARVELL_LAYOUT( 8192, 512, 12, 12, 11, 704, 0, 30,448, 64, 30),
- MARVELL_LAYOUT( 8192, 512, 16, 17, 16, 512, 0, 30, 0, 32, 30),
+ MARVELL_LAYOUT( 8192, 512, 8, 8, 8, 1024, 0, 30, 0, 160, 30),
+ MARVELL_LAYOUT( 8192, 512, 16, 16, 16, 512, 0, 30, 0, 32, 30),
};
/**
diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c
index 987710e09441..6023cba748bb 100644
--- a/drivers/mtd/nand/spi/gigadevice.c
+++ b/drivers/mtd/nand/spi/gigadevice.c
@@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
{
u8 status2;
struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2,
- &status2);
+ spinand->scratchbuf);
int ret;
switch (status & STATUS_ECC_MASK) {
@@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
* report the maximum of 4 in this case
*/
/* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */
+ status2 = *(spinand->scratchbuf);
return ((status & STATUS_ECC_MASK) >> 2) |
((status2 & STATUS_ECC_MASK) >> 4);
@@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand,
{
u8 status2;
struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2,
- &status2);
+ spinand->scratchbuf);
int ret;
switch (status & STATUS_ECC_MASK) {
@@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand,
* 1 ... 4 bits are flipped (and corrected)
*/
/* bits sorted this way (1...0): ECCSE1, ECCSE0 */
+ status2 = *(spinand->scratchbuf);
return ((status2 & STATUS_ECC_MASK) >> 4) + 1;
case STATUS_ECC_UNCOR_ERROR:
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 654bd7372cd8..5c8fdcc088a0 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -348,6 +348,9 @@ static int calc_disk_capacity(struct ubi_volume_info *vi, u64 *disk_capacity)
int ubiblock_create(struct ubi_volume_info *vi)
{
+ struct queue_limits lim = {
+ .max_segments = UBI_MAX_SG_COUNT,
+ };
struct ubiblock *dev;
struct gendisk *gd;
u64 disk_capacity;
@@ -393,7 +396,7 @@ int ubiblock_create(struct ubi_volume_info *vi)
/* Initialize the gendisk of this ubiblock device */
- gd = blk_mq_alloc_disk(&dev->tag_set, dev);
+ gd = blk_mq_alloc_disk(&dev->tag_set, &lim, dev);
if (IS_ERR(gd)) {
ret = PTR_ERR(gd);
goto out_free_tags;
@@ -416,7 +419,6 @@ int ubiblock_create(struct ubi_volume_info *vi)
dev->gd = gd;
dev->rq = gd->queue;
- blk_queue_max_segments(dev->rq, UBI_MAX_SG_COUNT);
list_add_tail(&dev->list, &ubiblock_devices);
diff --git a/drivers/net/amt.c b/drivers/net/amt.c
index 68e79b1272f6..6d15ab3bfbbc 100644
--- a/drivers/net/amt.c
+++ b/drivers/net/amt.c
@@ -3063,15 +3063,10 @@ static int amt_dev_init(struct net_device *dev)
int err;
amt->dev = dev;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
err = gro_cells_init(&amt->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
return 0;
}
@@ -3081,7 +3076,6 @@ static void amt_dev_uninit(struct net_device *dev)
struct amt_dev *amt = netdev_priv(dev);
gro_cells_destroy(&amt->gro_cells);
- free_percpu(dev->tstats);
}
static const struct net_device_ops amt_netdev_ops = {
@@ -3090,7 +3084,6 @@ static const struct net_device_ops amt_netdev_ops = {
.ndo_open = amt_dev_open,
.ndo_stop = amt_dev_stop,
.ndo_start_xmit = amt_dev_xmit,
- .ndo_get_stats64 = dev_get_tstats64,
};
static void amt_link_setup(struct net_device *dev)
@@ -3111,6 +3104,7 @@ static void amt_link_setup(struct net_device *dev)
dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM;
dev->hw_features |= NETIF_F_FRAGLIST | NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
eth_hw_addr_random(dev);
eth_zero_addr(dev->broadcast);
ether_setup(dev);
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 8c651fdee039..57f1729066f2 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -186,4 +186,5 @@ static void __exit arcnet_raw_exit(void)
module_init(arcnet_raw_init);
module_exit(arcnet_raw_exit);
+MODULE_DESCRIPTION("ARCnet raw mode packet interface module");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c
index 8c3ccc7c83cd..53d10a04d1bd 100644
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -312,6 +312,7 @@ module_param(node, int, 0);
module_param(io, int, 0);
module_param(irq, int, 0);
module_param_string(device, device, sizeof(device), 0);
+MODULE_DESCRIPTION("ARCnet COM90xx RIM I chipset driver");
MODULE_LICENSE("GPL");
static struct net_device *my_dev;
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index d9e052c49ba1..166bfc3c8e6c 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -108,6 +108,7 @@ static int go_tx(struct net_device *dev);
static int debug = ARCNET_DEBUG;
module_param(debug, int, 0);
+MODULE_DESCRIPTION("ARCnet core driver");
MODULE_LICENSE("GPL");
static int __init arcnet_init(void)
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index c09b567845e1..7a0a79973769 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -265,4 +265,5 @@ static void __exit capmode_module_exit(void)
module_init(capmode_module_init);
module_exit(capmode_module_exit);
+MODULE_DESCRIPTION("ARCnet CAP mode packet interface module");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 7b5c8bb02f11..c5e571ec94c9 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -61,6 +61,7 @@ module_param(timeout, int, 0);
module_param(backplane, int, 0);
module_param(clockp, int, 0);
module_param(clockm, int, 0);
+MODULE_DESCRIPTION("ARCnet COM20020 chipset PCI driver");
MODULE_LICENSE("GPL");
static void led_tx_set(struct led_classdev *led_cdev,
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index 06e1651b594b..a0053e3992a3 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -399,6 +399,7 @@ EXPORT_SYMBOL(com20020_found);
EXPORT_SYMBOL(com20020_netdev_ops);
#endif
+MODULE_DESCRIPTION("ARCnet COM20020 chipset core driver");
MODULE_LICENSE("GPL");
#ifdef MODULE
diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c
index dc3253b318da..75f08aa7528b 100644
--- a/drivers/net/arcnet/com20020_cs.c
+++ b/drivers/net/arcnet/com20020_cs.c
@@ -97,6 +97,7 @@ module_param(backplane, int, 0);
module_param(clockp, int, 0);
module_param(clockm, int, 0);
+MODULE_DESCRIPTION("ARCnet COM20020 chipset PCMCIA driver");
MODULE_LICENSE("GPL");
/*====================================================================*/
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index 37b47749fc8b..3b463fbc6402 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -350,6 +350,7 @@ static char device[9]; /* use eg. device=arc1 to change name */
module_param_hw(io, int, ioport, 0);
module_param_hw(irq, int, irq, 0);
module_param_string(device, device, sizeof(device), 0);
+MODULE_DESCRIPTION("ARCnet COM90xx IO mapped chipset driver");
MODULE_LICENSE("GPL");
#ifndef MODULE
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c
index f49dae194284..b3b287c16561 100644
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -645,6 +645,7 @@ static void com90xx_copy_from_card(struct net_device *dev, int bufnum,
TIME(dev, "memcpy_fromio", count, memcpy_fromio(buf, memaddr, count));
}
+MODULE_DESCRIPTION("ARCnet COM90xx normal chipset driver");
MODULE_LICENSE("GPL");
static int __init com90xx_init(void)
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index a7752a5b647f..46519ca63a0a 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -78,6 +78,7 @@ static void __exit arcnet_rfc1051_exit(void)
module_init(arcnet_rfc1051_init);
module_exit(arcnet_rfc1051_exit);
+MODULE_DESCRIPTION("ARCNet packet format (RFC 1051) module");
MODULE_LICENSE("GPL");
/* Determine a packet's protocol ID.
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index a4c856282674..0edf35d971c5 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -35,6 +35,7 @@
#include "arcdevice.h"
+MODULE_DESCRIPTION("ARCNet packet format (RFC 1201) module");
MODULE_LICENSE("GPL");
static __be16 type_trans(struct sk_buff *skb, struct net_device *dev);
diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index 31377bb1cc97..339db6e4a1d5 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -194,15 +194,10 @@ static int bareudp_init(struct net_device *dev)
struct bareudp_dev *bareudp = netdev_priv(dev);
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
err = gro_cells_init(&bareudp->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
+
return 0;
}
@@ -211,7 +206,6 @@ static void bareudp_uninit(struct net_device *dev)
struct bareudp_dev *bareudp = netdev_priv(dev);
gro_cells_destroy(&bareudp->gro_cells);
- free_percpu(dev->tstats);
}
static struct socket *bareudp_create_sock(struct net *net, __be16 port)
@@ -529,7 +523,6 @@ static const struct net_device_ops bareudp_netdev_ops = {
.ndo_open = bareudp_open,
.ndo_stop = bareudp_stop,
.ndo_start_xmit = bareudp_xmit,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_fill_metadata_dst = bareudp_fill_metadata_dst,
};
@@ -567,6 +560,7 @@ static void bareudp_setup(struct net_device *dev)
netif_keep_dst(dev);
dev->priv_flags |= IFF_NO_QUEUE;
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
}
static int bareudp_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -760,23 +754,18 @@ static void bareudp_destroy_tunnels(struct net *net, struct list_head *head)
unregister_netdevice_queue(bareudp->dev, head);
}
-static void __net_exit bareudp_exit_batch_net(struct list_head *net_list)
+static void __net_exit bareudp_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_kill_list)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
- bareudp_destroy_tunnels(net, &list);
-
- /* unregister the devices gathered above */
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ bareudp_destroy_tunnels(net, dev_kill_list);
}
static struct pernet_operations bareudp_net_ops = {
.init = bareudp_init_net,
- .exit_batch = bareudp_exit_batch_net,
+ .exit_batch_rtnl = bareudp_exit_batch_rtnl,
.id = &bareudp_net_id,
.size = sizeof(struct bareudp_net),
};
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index c99ffe6c683a..c6807e473ab7 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -82,10 +82,6 @@ enum ad_link_speed_type {
#define MAC_ADDRESS_EQUAL(A, B) \
ether_addr_equal_64bits((const u8 *)A, (const u8 *)B)
-static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = {
- 0, 0, 0, 0, 0, 0
-};
-
static const u16 ad_ticks_per_sec = 1000 / AD_TIMER_INTERVAL;
static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;
@@ -106,6 +102,9 @@ static void ad_agg_selection_logic(struct aggregator *aggregator,
static void ad_clear_agg(struct aggregator *aggregator);
static void ad_initialize_agg(struct aggregator *aggregator);
static void ad_initialize_port(struct port *port, int lacp_fast);
+static void ad_enable_collecting(struct port *port);
+static void ad_disable_distributing(struct port *port,
+ bool *update_slave_arr);
static void ad_enable_collecting_distributing(struct port *port,
bool *update_slave_arr);
static void ad_disable_collecting_distributing(struct port *port,
@@ -172,8 +171,37 @@ static inline int __agg_has_partner(struct aggregator *agg)
}
/**
+ * __disable_distributing_port - disable the port's slave for distributing.
+ * Port will still be able to collect.
+ * @port: the port we're looking at
+ *
+ * This will disable only distributing on the port's slave.
+ */
+static void __disable_distributing_port(struct port *port)
+{
+ bond_set_slave_tx_disabled_flags(port->slave, BOND_SLAVE_NOTIFY_LATER);
+}
+
+/**
+ * __enable_collecting_port - enable the port's slave for collecting,
+ * if it's up
+ * @port: the port we're looking at
+ *
+ * This will enable only collecting on the port's slave.
+ */
+static void __enable_collecting_port(struct port *port)
+{
+ struct slave *slave = port->slave;
+
+ if (slave->link == BOND_LINK_UP && bond_slave_is_up(slave))
+ bond_set_slave_rx_enabled_flags(slave, BOND_SLAVE_NOTIFY_LATER);
+}
+
+/**
* __disable_port - disable the port's slave
* @port: the port we're looking at
+ *
+ * This will disable both collecting and distributing on the port's slave.
*/
static inline void __disable_port(struct port *port)
{
@@ -183,6 +211,8 @@ static inline void __disable_port(struct port *port)
/**
* __enable_port - enable the port's slave, if it's up
* @port: the port we're looking at
+ *
+ * This will enable both collecting and distributing on the port's slave.
*/
static inline void __enable_port(struct port *port)
{
@@ -193,10 +223,27 @@ static inline void __enable_port(struct port *port)
}
/**
- * __port_is_enabled - check if the port's slave is in active state
+ * __port_move_to_attached_state - check if port should transition back to attached
+ * state.
+ * @port: the port we're looking at
+ */
+static bool __port_move_to_attached_state(struct port *port)
+{
+ if (!(port->sm_vars & AD_PORT_SELECTED) ||
+ (port->sm_vars & AD_PORT_STANDBY) ||
+ !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) ||
+ !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION))
+ port->sm_mux_state = AD_MUX_ATTACHED;
+
+ return port->sm_mux_state == AD_MUX_ATTACHED;
+}
+
+/**
+ * __port_is_collecting_distributing - check if the port's slave is in the
+ * combined collecting/distributing state
* @port: the port we're looking at
*/
-static inline int __port_is_enabled(struct port *port)
+static int __port_is_collecting_distributing(struct port *port)
{
return bond_is_active_slave(port->slave);
}
@@ -942,6 +989,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
*/
static void ad_mux_machine(struct port *port, bool *update_slave_arr)
{
+ struct bonding *bond = __get_bond_by_port(port);
mux_states_t last_state;
/* keep current State Machine state to compare later if it was
@@ -999,9 +1047,13 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
if ((port->sm_vars & AD_PORT_SELECTED) &&
(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) &&
!__check_agg_selection_timer(port)) {
- if (port->aggregator->is_active)
- port->sm_mux_state =
- AD_MUX_COLLECTING_DISTRIBUTING;
+ if (port->aggregator->is_active) {
+ int state = AD_MUX_COLLECTING_DISTRIBUTING;
+
+ if (!bond->params.coupled_control)
+ state = AD_MUX_COLLECTING;
+ port->sm_mux_state = state;
+ }
} else if (!(port->sm_vars & AD_PORT_SELECTED) ||
(port->sm_vars & AD_PORT_STANDBY)) {
/* if UNSELECTED or STANDBY */
@@ -1019,11 +1071,45 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
}
break;
case AD_MUX_COLLECTING_DISTRIBUTING:
+ if (!__port_move_to_attached_state(port)) {
+ /* if port state hasn't changed make
+ * sure that a collecting distributing
+ * port in an active aggregator is enabled
+ */
+ if (port->aggregator->is_active &&
+ !__port_is_collecting_distributing(port)) {
+ __enable_port(port);
+ *update_slave_arr = true;
+ }
+ }
+ break;
+ case AD_MUX_COLLECTING:
+ if (!__port_move_to_attached_state(port)) {
+ if ((port->sm_vars & AD_PORT_SELECTED) &&
+ (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) &&
+ (port->partner_oper.port_state & LACP_STATE_COLLECTING)) {
+ port->sm_mux_state = AD_MUX_DISTRIBUTING;
+ } else {
+ /* If port state hasn't changed, make sure that a collecting
+ * port is enabled for an active aggregator.
+ */
+ struct slave *slave = port->slave;
+
+ if (port->aggregator->is_active &&
+ bond_is_slave_rx_disabled(slave)) {
+ ad_enable_collecting(port);
+ *update_slave_arr = true;
+ }
+ }
+ }
+ break;
+ case AD_MUX_DISTRIBUTING:
if (!(port->sm_vars & AD_PORT_SELECTED) ||
(port->sm_vars & AD_PORT_STANDBY) ||
+ !(port->partner_oper.port_state & LACP_STATE_COLLECTING) ||
!(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) ||
!(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) {
- port->sm_mux_state = AD_MUX_ATTACHED;
+ port->sm_mux_state = AD_MUX_COLLECTING;
} else {
/* if port state hasn't changed make
* sure that a collecting distributing
@@ -1031,7 +1117,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
*/
if (port->aggregator &&
port->aggregator->is_active &&
- !__port_is_enabled(port)) {
+ !__port_is_collecting_distributing(port)) {
__enable_port(port);
*update_slave_arr = true;
}
@@ -1082,6 +1168,20 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
update_slave_arr);
port->ntt = true;
break;
+ case AD_MUX_COLLECTING:
+ port->actor_oper_port_state |= LACP_STATE_COLLECTING;
+ port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING;
+ port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION;
+ ad_enable_collecting(port);
+ ad_disable_distributing(port, update_slave_arr);
+ port->ntt = true;
+ break;
+ case AD_MUX_DISTRIBUTING:
+ port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING;
+ port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION;
+ ad_enable_collecting_distributing(port,
+ update_slave_arr);
+ break;
default:
break;
}
@@ -1484,7 +1584,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
(aggregator->partner_system_priority == port->partner_oper.system_priority) &&
(aggregator->partner_oper_aggregator_key == port->partner_oper.key)
) &&
- ((!MAC_ADDRESS_EQUAL(&(port->partner_oper.system), &(null_mac_addr)) && /* partner answers */
+ ((__agg_has_partner(aggregator) && /* partner answers */
!aggregator->is_individual) /* but is not individual OR */
)
) {
@@ -1907,6 +2007,43 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
}
/**
+ * ad_enable_collecting - enable a port's receive
+ * @port: the port we're looking at
+ *
+ * Enable @port if it's in an active aggregator
+ */
+static void ad_enable_collecting(struct port *port)
+{
+ if (port->aggregator->is_active) {
+ struct slave *slave = port->slave;
+
+ slave_dbg(slave->bond->dev, slave->dev,
+ "Enabling collecting on port %d (LAG %d)\n",
+ port->actor_port_number,
+ port->aggregator->aggregator_identifier);
+ __enable_collecting_port(port);
+ }
+}
+
+/**
+ * ad_disable_distributing - disable a port's transmit
+ * @port: the port we're looking at
+ * @update_slave_arr: Does slave array need update?
+ */
+static void ad_disable_distributing(struct port *port, bool *update_slave_arr)
+{
+ if (port->aggregator && __agg_has_partner(port->aggregator)) {
+ slave_dbg(port->slave->bond->dev, port->slave->dev,
+ "Disabling distributing on port %d (LAG %d)\n",
+ port->actor_port_number,
+ port->aggregator->aggregator_identifier);
+ __disable_distributing_port(port);
+ /* Slave array needs an update */
+ *update_slave_arr = true;
+ }
+}
+
+/**
* ad_enable_collecting_distributing - enable a port's transmit/receive
* @port: the port we're looking at
* @update_slave_arr: Does slave array need update?
@@ -1935,9 +2072,7 @@ static void ad_enable_collecting_distributing(struct port *port,
static void ad_disable_collecting_distributing(struct port *port,
bool *update_slave_arr)
{
- if (port->aggregator &&
- !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system),
- &(null_mac_addr))) {
+ if (port->aggregator && __agg_has_partner(port->aggregator)) {
slave_dbg(port->slave->bond->dev, port->slave->dev,
"Disabling port %d (LAG %d)\n",
port->actor_port_number,
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 4e0600c7b050..2c5ed0a7cb18 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1811,7 +1811,7 @@ void bond_xdp_set_features(struct net_device *bond_dev)
ASSERT_RTNL();
- if (!bond_xdp_check(bond)) {
+ if (!bond_xdp_check(bond) || !bond_has_slaves(bond)) {
xdp_clear_features_flag(bond_dev);
return;
}
@@ -1819,6 +1819,8 @@ void bond_xdp_set_features(struct net_device *bond_dev)
bond_for_each_slave(bond, slave, iter)
val &= slave->dev->xdp_features;
+ val &= ~NETDEV_XDP_ACT_XSK_ZEROCOPY;
+
xdp_set_features_flag(bond_dev, val);
}
@@ -2609,7 +2611,7 @@ static int bond_miimon_inspect(struct bonding *bond)
bond_propose_link_state(slave, BOND_LINK_FAIL);
commit++;
slave->delay = bond->params.downdelay;
- if (slave->delay) {
+ if (slave->delay && net_ratelimit()) {
slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n",
(BOND_MODE(bond) ==
BOND_MODE_ACTIVEBACKUP) ?
@@ -2623,9 +2625,10 @@ static int bond_miimon_inspect(struct bonding *bond)
/* recovered before downdelay expired */
bond_propose_link_state(slave, BOND_LINK_UP);
slave->last_link_up = jiffies;
- slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
- (bond->params.downdelay - slave->delay) *
- bond->params.miimon);
+ if (net_ratelimit())
+ slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
+ (bond->params.downdelay - slave->delay) *
+ bond->params.miimon);
commit++;
continue;
}
@@ -2647,7 +2650,7 @@ static int bond_miimon_inspect(struct bonding *bond)
commit++;
slave->delay = bond->params.updelay;
- if (slave->delay) {
+ if (slave->delay && net_ratelimit()) {
slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
ignore_updelay ? 0 :
bond->params.updelay *
@@ -2657,9 +2660,10 @@ static int bond_miimon_inspect(struct bonding *bond)
case BOND_LINK_BACK:
if (!link_state) {
bond_propose_link_state(slave, BOND_LINK_DOWN);
- slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
- (bond->params.updelay - slave->delay) *
- bond->params.miimon);
+ if (net_ratelimit())
+ slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
+ (bond->params.updelay - slave->delay) *
+ bond->params.miimon);
commit++;
continue;
}
@@ -5909,9 +5913,6 @@ void bond_setup(struct net_device *bond_dev)
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
bond_dev->features |= BOND_XFRM_FEATURES;
#endif /* CONFIG_XFRM_OFFLOAD */
-
- if (bond_xdp_check(bond))
- bond_dev->xdp_features = NETDEV_XDP_ACT_MASK;
}
/* Destroy a bonding device.
@@ -6306,6 +6307,7 @@ static int __init bond_check_params(struct bond_params *params)
params->ad_actor_sys_prio = ad_actor_sys_prio;
eth_zero_addr(params->ad_actor_system);
params->ad_user_port_key = ad_user_port_key;
+ params->coupled_control = 1;
if (packets_per_slave > 0) {
params->reciprocal_packets_per_slave =
reciprocal_value(packets_per_slave);
@@ -6415,28 +6417,41 @@ static int __net_init bond_net_init(struct net *net)
return 0;
}
-static void __net_exit bond_net_exit_batch(struct list_head *net_list)
+/* According to commit 69b0216ac255 ("bonding: fix bonding_masters
+ * race condition in bond unloading") we need to remove sysfs files
+ * before we remove our devices (done later in bond_net_exit_batch_rtnl())
+ */
+static void __net_exit bond_net_pre_exit(struct net *net)
+{
+ struct bond_net *bn = net_generic(net, bond_net_id);
+
+ bond_destroy_sysfs(bn);
+}
+
+static void __net_exit bond_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_kill_list)
{
struct bond_net *bn;
struct net *net;
- LIST_HEAD(list);
-
- list_for_each_entry(net, net_list, exit_list) {
- bn = net_generic(net, bond_net_id);
- bond_destroy_sysfs(bn);
- }
/* Kill off any bonds created after unregistering bond rtnl ops */
- rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
struct bonding *bond, *tmp_bond;
bn = net_generic(net, bond_net_id);
list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
- unregister_netdevice_queue(bond->dev, &list);
+ unregister_netdevice_queue(bond->dev, dev_kill_list);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
+}
+
+/* According to commit 23fa5c2caae0 ("bonding: destroy proc directory
+ * only after all bonds are gone") bond_destroy_proc_dir() is called
+ * after bond_net_exit_batch_rtnl() has completed.
+ */
+static void __net_exit bond_net_exit_batch(struct list_head *net_list)
+{
+ struct bond_net *bn;
+ struct net *net;
list_for_each_entry(net, net_list, exit_list) {
bn = net_generic(net, bond_net_id);
@@ -6446,6 +6461,8 @@ static void __net_exit bond_net_exit_batch(struct list_head *net_list)
static struct pernet_operations bond_net_ops = {
.init = bond_net_init,
+ .pre_exit = bond_net_pre_exit,
+ .exit_batch_rtnl = bond_net_exit_batch_rtnl,
.exit_batch = bond_net_exit_batch,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index cfa74cf8bb1a..29b4c3d1b9b6 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -122,6 +122,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
[IFLA_BOND_PEER_NOTIF_DELAY] = NLA_POLICY_FULL_RANGE(NLA_U32, &delay_range),
[IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 },
[IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED },
+ [IFLA_BOND_COUPLED_CONTROL] = { .type = NLA_U8 },
};
static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
@@ -549,6 +550,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
return err;
}
+ if (data[IFLA_BOND_COUPLED_CONTROL]) {
+ int coupled_control = nla_get_u8(data[IFLA_BOND_COUPLED_CONTROL]);
+
+ bond_opt_initval(&newval, coupled_control);
+ err = __bond_opt_set(bond, BOND_OPT_COUPLED_CONTROL, &newval,
+ data[IFLA_BOND_COUPLED_CONTROL], extack);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -615,6 +626,7 @@ static size_t bond_get_size(const struct net_device *bond_dev)
/* IFLA_BOND_NS_IP6_TARGET */
nla_total_size(sizeof(struct nlattr)) +
nla_total_size(sizeof(struct in6_addr)) * BOND_MAX_NS_TARGETS +
+ nla_total_size(sizeof(u8)) + /* IFLA_BOND_COUPLED_CONTROL */
0;
}
@@ -774,6 +786,10 @@ static int bond_fill_info(struct sk_buff *skb,
bond->params.missed_max))
goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_BOND_COUPLED_CONTROL,
+ bond->params.coupled_control))
+ goto nla_put_failure;
+
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct ad_info info;
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index f3f27f0bd2a6..4cdbc7e084f4 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -84,7 +84,8 @@ static int bond_option_ad_user_port_key_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_missed_max_set(struct bonding *bond,
const struct bond_opt_value *newval);
-
+static int bond_option_coupled_control_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
static const struct bond_opt_value bond_mode_tbl[] = {
{ "balance-rr", BOND_MODE_ROUNDROBIN, BOND_VALFLAG_DEFAULT},
@@ -232,6 +233,12 @@ static const struct bond_opt_value bond_missed_max_tbl[] = {
{ NULL, -1, 0},
};
+static const struct bond_opt_value bond_coupled_control_tbl[] = {
+ { "on", 1, BOND_VALFLAG_DEFAULT},
+ { "off", 0, 0},
+ { NULL, -1, 0},
+};
+
static const struct bond_option bond_opts[BOND_OPT_LAST] = {
[BOND_OPT_MODE] = {
.id = BOND_OPT_MODE,
@@ -496,6 +503,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
.desc = "Delay between each peer notification on failover event, in milliseconds",
.values = bond_peer_notif_delay_tbl,
.set = bond_option_peer_notif_delay_set
+ },
+ [BOND_OPT_COUPLED_CONTROL] = {
+ .id = BOND_OPT_COUPLED_CONTROL,
+ .name = "coupled_control",
+ .desc = "Opt into using coupled control MUX for LACP states",
+ .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
+ .flags = BOND_OPTFLAG_IFDOWN,
+ .values = bond_coupled_control_tbl,
+ .set = bond_option_coupled_control_set,
}
};
@@ -1692,3 +1708,13 @@ static int bond_option_ad_user_port_key_set(struct bonding *bond,
bond->params.ad_user_port_key = newval->value;
return 0;
}
+
+static int bond_option_coupled_control_set(struct bonding *bond,
+ const struct bond_opt_value *newval)
+{
+ netdev_info(bond->dev, "Setting coupled_control to %s (%llu)\n",
+ newval->string, newval->value);
+
+ bond->params.coupled_control = newval->value;
+ return 0;
+}
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index eb410714afc2..2e31db55d927 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -168,6 +168,8 @@ config CAN_KVASER_PCIEFD
Kvaser Mini PCI Express 2xHS v2
Kvaser Mini PCI Express 1xCAN v3
Kvaser Mini PCI Express 2xCAN v3
+ Kvaser M.2 PCIe 4xCAN
+ Kvaser PCIe 8xCAN
config CAN_SLCAN
tristate "Serial / USB serial CAN Adaptors (slcan)"
@@ -218,6 +220,7 @@ config CAN_XILINXCAN
source "drivers/net/can/c_can/Kconfig"
source "drivers/net/can/cc770/Kconfig"
source "drivers/net/can/ctucanfd/Kconfig"
+source "drivers/net/can/esd/Kconfig"
source "drivers/net/can/ifi_canfd/Kconfig"
source "drivers/net/can/m_can/Kconfig"
source "drivers/net/can/mscan/Kconfig"
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index ff8f76295d13..4669cd51e7bf 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_CAN_VXCAN) += vxcan.o
obj-$(CONFIG_CAN_SLCAN) += slcan/
obj-y += dev/
+obj-y += esd/
obj-y += rcar/
obj-y += spi/
obj-y += usb/
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index 036d85ef07f5..dfdc039d92a6 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -346,7 +346,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
/* Neither of TDC parameters nor TDC flags are
* provided: do calculation
*/
- can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming,
+ can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt,
&priv->ctrlmode, priv->ctrlmode_supported);
} /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly
* turned off. TDC is disabled: do nothing
diff --git a/drivers/net/can/esd/Kconfig b/drivers/net/can/esd/Kconfig
new file mode 100644
index 000000000000..54bfc366634c
--- /dev/null
+++ b/drivers/net/can/esd/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config CAN_ESD_402_PCI
+ tristate "esd electronics gmbh CAN-PCI(e)/402 family"
+ depends on PCI && HAS_DMA
+ help
+ Support for C402 card family from esd electronics gmbh.
+ This card family is based on the ESDACC CAN controller and
+ available in several form factors: PCI, PCIe, PCIe Mini,
+ M.2 PCIe, CPCIserial, PMC, XMC (see https://esd.eu/en)
+
+ This driver can also be built as a module. In this case the
+ module will be called esd_402_pci.
diff --git a/drivers/net/can/esd/Makefile b/drivers/net/can/esd/Makefile
new file mode 100644
index 000000000000..5dd2d470c286
--- /dev/null
+++ b/drivers/net/can/esd/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for esd gmbh ESDACC controller driver
+#
+esd_402_pci-objs := esdacc.o esd_402_pci-core.o
+
+obj-$(CONFIG_CAN_ESD_402_PCI) += esd_402_pci.o
diff --git a/drivers/net/can/esd/esd_402_pci-core.c b/drivers/net/can/esd/esd_402_pci-core.c
new file mode 100644
index 000000000000..b7cdcffd0e45
--- /dev/null
+++ b/drivers/net/can/esd/esd_402_pci-core.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2015 - 2016 Thomas Körper, esd electronic system design gmbh
+ * Copyright (C) 2017 - 2023 Stefan Mätje, esd electronics gmbh
+ */
+
+#include <linux/can/dev.h>
+#include <linux/can.h>
+#include <linux/can/netlink.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/ethtool.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+
+#include "esdacc.h"
+
+#define ESD_PCI_DEVICE_ID_PCIE402 0x0402
+
+#define PCI402_FPGA_VER_MIN 0x003d
+#define PCI402_MAX_CORES 6
+#define PCI402_BAR 0
+#define PCI402_IO_OV_OFFS 0
+#define PCI402_IO_PCIEP_OFFS 0x10000
+#define PCI402_IO_LEN_TOTAL 0x20000
+#define PCI402_IO_LEN_CORE 0x2000
+#define PCI402_PCICFG_MSICAP 0x50
+
+#define PCI402_DMA_MASK DMA_BIT_MASK(32)
+#define PCI402_DMA_SIZE ALIGN(0x10000, PAGE_SIZE)
+
+#define PCI402_PCIEP_OF_INT_ENABLE 0x0050
+#define PCI402_PCIEP_OF_BM_ADDR_LO 0x1000
+#define PCI402_PCIEP_OF_BM_ADDR_HI 0x1004
+#define PCI402_PCIEP_OF_MSI_ADDR_LO 0x1008
+#define PCI402_PCIEP_OF_MSI_ADDR_HI 0x100c
+
+struct pci402_card {
+ /* Actually mapped io space, all other iomem derived from this */
+ void __iomem *addr;
+ void __iomem *addr_pciep;
+
+ void *dma_buf;
+ dma_addr_t dma_hnd;
+
+ struct acc_ov ov;
+ struct acc_core *cores;
+
+ bool msi_enabled;
+};
+
+/* The BTR register capabilities described by the can_bittiming_const structures
+ * below are valid since esdACC version 0x0032.
+ */
+
+/* Used if the esdACC FPGA is built as CAN-Classic version. */
+static const struct can_bittiming_const pci402_bittiming_const = {
+ .name = "esd_402",
+ .tseg1_min = 1,
+ .tseg1_max = 16,
+ .tseg2_min = 1,
+ .tseg2_max = 8,
+ .sjw_max = 4,
+ .brp_min = 1,
+ .brp_max = 512,
+ .brp_inc = 1,
+};
+
+/* Used if the esdACC FPGA is built as CAN-FD version. */
+static const struct can_bittiming_const pci402_bittiming_const_canfd = {
+ .name = "esd_402fd",
+ .tseg1_min = 1,
+ .tseg1_max = 256,
+ .tseg2_min = 1,
+ .tseg2_max = 128,
+ .sjw_max = 128,
+ .brp_min = 1,
+ .brp_max = 256,
+ .brp_inc = 1,
+};
+
+static const struct net_device_ops pci402_acc_netdev_ops = {
+ .ndo_open = acc_open,
+ .ndo_stop = acc_close,
+ .ndo_start_xmit = acc_start_xmit,
+ .ndo_change_mtu = can_change_mtu,
+ .ndo_eth_ioctl = can_eth_ioctl_hwts,
+};
+
+static const struct ethtool_ops pci402_acc_ethtool_ops = {
+ .get_ts_info = can_ethtool_op_get_ts_info_hwts,
+};
+
+static irqreturn_t pci402_interrupt(int irq, void *dev_id)
+{
+ struct pci_dev *pdev = dev_id;
+ struct pci402_card *card = pci_get_drvdata(pdev);
+ irqreturn_t irq_status;
+
+ irq_status = acc_card_interrupt(&card->ov, card->cores);
+
+ return irq_status;
+}
+
+static int pci402_set_msiconfig(struct pci_dev *pdev)
+{
+ struct pci402_card *card = pci_get_drvdata(pdev);
+ u32 addr_lo_offs = 0;
+ u32 addr_lo = 0;
+ u32 addr_hi = 0;
+ u32 data = 0;
+ u16 csr = 0;
+ int err;
+
+ /* The FPGA hard IP PCIe core implements a 64-bit MSI Capability
+ * Register Format
+ */
+ err = pci_read_config_word(pdev, PCI402_PCICFG_MSICAP + PCI_MSI_FLAGS, &csr);
+ if (err)
+ goto failed;
+
+ err = pci_read_config_dword(pdev, PCI402_PCICFG_MSICAP + PCI_MSI_ADDRESS_LO,
+ &addr_lo);
+ if (err)
+ goto failed;
+ err = pci_read_config_dword(pdev, PCI402_PCICFG_MSICAP + PCI_MSI_ADDRESS_HI,
+ &addr_hi);
+ if (err)
+ goto failed;
+
+ err = pci_read_config_dword(pdev, PCI402_PCICFG_MSICAP + PCI_MSI_DATA_64,
+ &data);
+ if (err)
+ goto failed;
+
+ addr_lo_offs = addr_lo & 0x0000ffff;
+ addr_lo &= 0xffff0000;
+
+ if (addr_hi)
+ addr_lo |= 1; /* To enable 64-Bit addressing in PCIe endpoint */
+
+ if (!(csr & PCI_MSI_FLAGS_ENABLE)) {
+ err = -EINVAL;
+ goto failed;
+ }
+
+ iowrite32(addr_lo, card->addr_pciep + PCI402_PCIEP_OF_MSI_ADDR_LO);
+ iowrite32(addr_hi, card->addr_pciep + PCI402_PCIEP_OF_MSI_ADDR_HI);
+ acc_ov_write32(&card->ov, ACC_OV_OF_MSI_ADDRESSOFFSET, addr_lo_offs);
+ acc_ov_write32(&card->ov, ACC_OV_OF_MSI_DATA, data);
+
+ return 0;
+
+failed:
+ pci_warn(pdev, "Error while setting MSI configuration:\n"
+ "CSR: 0x%.4x, addr: 0x%.8x%.8x, offs: 0x%.4x, data: 0x%.8x\n",
+ csr, addr_hi, addr_lo, addr_lo_offs, data);
+
+ return err;
+}
+
+static int pci402_init_card(struct pci_dev *pdev)
+{
+ struct pci402_card *card = pci_get_drvdata(pdev);
+
+ card->ov.addr = card->addr + PCI402_IO_OV_OFFS;
+ card->addr_pciep = card->addr + PCI402_IO_PCIEP_OFFS;
+
+ acc_reset_fpga(&card->ov);
+ acc_init_ov(&card->ov, &pdev->dev);
+
+ if (card->ov.version < PCI402_FPGA_VER_MIN) {
+ pci_err(pdev,
+ "esdACC version (0x%.4x) outdated, please update\n",
+ card->ov.version);
+ return -EINVAL;
+ }
+
+ if (card->ov.timestamp_frequency != ACC_TS_FREQ_80MHZ) {
+ pci_err(pdev,
+ "esdACC timestamp frequency of %uHz not supported by driver. Aborted.\n",
+ card->ov.timestamp_frequency);
+ return -EINVAL;
+ }
+
+ if (card->ov.active_cores > PCI402_MAX_CORES) {
+ pci_err(pdev,
+ "Card with %u active cores not supported by driver. Aborted.\n",
+ card->ov.active_cores);
+ return -EINVAL;
+ }
+ card->cores = devm_kcalloc(&pdev->dev, card->ov.active_cores,
+ sizeof(struct acc_core), GFP_KERNEL);
+ if (!card->cores)
+ return -ENOMEM;
+
+ if (card->ov.features & ACC_OV_REG_FEAT_MASK_CANFD) {
+ pci_warn(pdev,
+ "esdACC with CAN-FD feature detected. This driver doesn't support CAN-FD yet.\n");
+ }
+
+#ifdef __LITTLE_ENDIAN
+ /* So card converts all busmastered data to LE for us: */
+ acc_ov_set_bits(&card->ov, ACC_OV_OF_MODE,
+ ACC_OV_REG_MODE_MASK_ENDIAN_LITTLE);
+#endif
+
+ return 0;
+}
+
+static int pci402_init_interrupt(struct pci_dev *pdev)
+{
+ struct pci402_card *card = pci_get_drvdata(pdev);
+ int err;
+
+ err = pci_enable_msi(pdev);
+ if (!err) {
+ err = pci402_set_msiconfig(pdev);
+ if (!err) {
+ card->msi_enabled = true;
+ acc_ov_set_bits(&card->ov, ACC_OV_OF_MODE,
+ ACC_OV_REG_MODE_MASK_MSI_ENABLE);
+ pci_dbg(pdev, "MSI preparation done\n");
+ }
+ }
+
+ err = devm_request_irq(&pdev->dev, pdev->irq, pci402_interrupt,
+ IRQF_SHARED, dev_name(&pdev->dev), pdev);
+ if (err)
+ goto failure_msidis;
+
+ iowrite32(1, card->addr_pciep + PCI402_PCIEP_OF_INT_ENABLE);
+
+ return 0;
+
+failure_msidis:
+ if (card->msi_enabled) {
+ acc_ov_clear_bits(&card->ov, ACC_OV_OF_MODE,
+ ACC_OV_REG_MODE_MASK_MSI_ENABLE);
+ pci_disable_msi(pdev);
+ card->msi_enabled = false;
+ }
+
+ return err;
+}
+
+static void pci402_finish_interrupt(struct pci_dev *pdev)
+{
+ struct pci402_card *card = pci_get_drvdata(pdev);
+
+ iowrite32(0, card->addr_pciep + PCI402_PCIEP_OF_INT_ENABLE);
+ devm_free_irq(&pdev->dev, pdev->irq, pdev);
+
+ if (card->msi_enabled) {
+ acc_ov_clear_bits(&card->ov, ACC_OV_OF_MODE,
+ ACC_OV_REG_MODE_MASK_MSI_ENABLE);
+ pci_disable_msi(pdev);
+ card->msi_enabled = false;
+ }
+}
+
+static int pci402_init_dma(struct pci_dev *pdev)
+{
+ struct pci402_card *card = pci_get_drvdata(pdev);
+ int err;
+
+ err = dma_set_coherent_mask(&pdev->dev, PCI402_DMA_MASK);
+ if (err) {
+ pci_err(pdev, "DMA set mask failed!\n");
+ return err;
+ }
+
+ /* The esdACC DMA engine needs the DMA buffer aligned to a 64k
+ * boundary. The DMA API guarantees to align the returned buffer to the
+ * smallest PAGE_SIZE order which is greater than or equal to the
+ * requested size. With PCI402_DMA_SIZE == 64kB this suffices here.
+ */
+ card->dma_buf = dma_alloc_coherent(&pdev->dev, PCI402_DMA_SIZE,
+ &card->dma_hnd, GFP_KERNEL);
+ if (!card->dma_buf)
+ return -ENOMEM;
+
+ acc_init_bm_ptr(&card->ov, card->cores, card->dma_buf);
+
+ iowrite32(card->dma_hnd,
+ card->addr_pciep + PCI402_PCIEP_OF_BM_ADDR_LO);
+ iowrite32(0, card->addr_pciep + PCI402_PCIEP_OF_BM_ADDR_HI);
+
+ pci_set_master(pdev);
+
+ acc_ov_set_bits(&card->ov, ACC_OV_OF_MODE,
+ ACC_OV_REG_MODE_MASK_BM_ENABLE);
+
+ return 0;
+}
+
+static void pci402_finish_dma(struct pci_dev *pdev)
+{
+ struct pci402_card *card = pci_get_drvdata(pdev);
+ int i;
+
+ acc_ov_clear_bits(&card->ov, ACC_OV_OF_MODE,
+ ACC_OV_REG_MODE_MASK_BM_ENABLE);
+
+ pci_clear_master(pdev);
+
+ iowrite32(0, card->addr_pciep + PCI402_PCIEP_OF_BM_ADDR_LO);
+ iowrite32(0, card->addr_pciep + PCI402_PCIEP_OF_BM_ADDR_HI);
+
+ card->ov.bmfifo.messages = NULL;
+ card->ov.bmfifo.irq_cnt = NULL;
+ for (i = 0; i < card->ov.active_cores; i++) {
+ struct acc_core *core = &card->cores[i];
+
+ core->bmfifo.messages = NULL;
+ core->bmfifo.irq_cnt = NULL;
+ }
+
+ dma_free_coherent(&pdev->dev, PCI402_DMA_SIZE, card->dma_buf,
+ card->dma_hnd);
+ card->dma_buf = NULL;
+}
+
+static void pci402_unregister_core(struct acc_core *core)
+{
+ netdev_info(core->netdev, "unregister\n");
+ unregister_candev(core->netdev);
+
+ free_candev(core->netdev);
+ core->netdev = NULL;
+}
+
+static int pci402_init_cores(struct pci_dev *pdev)
+{
+ struct pci402_card *card = pci_get_drvdata(pdev);
+ int err;
+ int i;
+
+ for (i = 0; i < card->ov.active_cores; i++) {
+ struct acc_core *core = &card->cores[i];
+ struct acc_net_priv *priv;
+ struct net_device *netdev;
+ u32 fifo_config;
+
+ core->addr = card->ov.addr + (i + 1) * PCI402_IO_LEN_CORE;
+
+ fifo_config = acc_read32(core, ACC_CORE_OF_TXFIFO_CONFIG);
+ core->tx_fifo_size = (fifo_config >> 24);
+ if (core->tx_fifo_size <= 1) {
+ pci_err(pdev, "Invalid tx_fifo_size!\n");
+ err = -EINVAL;
+ goto failure;
+ }
+
+ netdev = alloc_candev(sizeof(*priv), core->tx_fifo_size);
+ if (!netdev) {
+ err = -ENOMEM;
+ goto failure;
+ }
+ core->netdev = netdev;
+
+ netdev->flags |= IFF_ECHO;
+ netdev->dev_port = i;
+ netdev->netdev_ops = &pci402_acc_netdev_ops;
+ netdev->ethtool_ops = &pci402_acc_ethtool_ops;
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+
+ priv = netdev_priv(netdev);
+ priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK |
+ CAN_CTRLMODE_LISTENONLY |
+ CAN_CTRLMODE_BERR_REPORTING |
+ CAN_CTRLMODE_CC_LEN8_DLC;
+
+ priv->can.clock.freq = card->ov.core_frequency;
+ if (card->ov.features & ACC_OV_REG_FEAT_MASK_CANFD)
+ priv->can.bittiming_const = &pci402_bittiming_const_canfd;
+ else
+ priv->can.bittiming_const = &pci402_bittiming_const;
+ priv->can.do_set_bittiming = acc_set_bittiming;
+ priv->can.do_set_mode = acc_set_mode;
+ priv->can.do_get_berr_counter = acc_get_berr_counter;
+
+ priv->core = core;
+ priv->ov = &card->ov;
+
+ err = register_candev(netdev);
+ if (err) {
+ free_candev(core->netdev);
+ core->netdev = NULL;
+ goto failure;
+ }
+
+ netdev_info(netdev, "registered\n");
+ }
+
+ return 0;
+
+failure:
+ for (i--; i >= 0; i--)
+ pci402_unregister_core(&card->cores[i]);
+
+ return err;
+}
+
+static void pci402_finish_cores(struct pci_dev *pdev)
+{
+ struct pci402_card *card = pci_get_drvdata(pdev);
+ int i;
+
+ for (i = 0; i < card->ov.active_cores; i++)
+ pci402_unregister_core(&card->cores[i]);
+}
+
+static int pci402_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct pci402_card *card = NULL;
+ int err;
+
+ err = pci_enable_device(pdev);
+ if (err)
+ return err;
+
+ card = devm_kzalloc(&pdev->dev, sizeof(*card), GFP_KERNEL);
+ if (!card) {
+ err = -ENOMEM;
+ goto failure_disable_pci;
+ }
+
+ pci_set_drvdata(pdev, card);
+
+ err = pci_request_regions(pdev, pci_name(pdev));
+ if (err)
+ goto failure_disable_pci;
+
+ card->addr = pci_iomap(pdev, PCI402_BAR, PCI402_IO_LEN_TOTAL);
+ if (!card->addr) {
+ err = -ENOMEM;
+ goto failure_release_regions;
+ }
+
+ err = pci402_init_card(pdev);
+ if (err)
+ goto failure_unmap;
+
+ err = pci402_init_dma(pdev);
+ if (err)
+ goto failure_unmap;
+
+ err = pci402_init_interrupt(pdev);
+ if (err)
+ goto failure_finish_dma;
+
+ err = pci402_init_cores(pdev);
+ if (err)
+ goto failure_finish_interrupt;
+
+ return 0;
+
+failure_finish_interrupt:
+ pci402_finish_interrupt(pdev);
+
+failure_finish_dma:
+ pci402_finish_dma(pdev);
+
+failure_unmap:
+ pci_iounmap(pdev, card->addr);
+
+failure_release_regions:
+ pci_release_regions(pdev);
+
+failure_disable_pci:
+ pci_disable_device(pdev);
+
+ return err;
+}
+
+static void pci402_remove(struct pci_dev *pdev)
+{
+ struct pci402_card *card = pci_get_drvdata(pdev);
+
+ pci402_finish_interrupt(pdev);
+ pci402_finish_cores(pdev);
+ pci402_finish_dma(pdev);
+ pci_iounmap(pdev, card->addr);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+}
+
+static const struct pci_device_id pci402_tbl[] = {
+ {
+ .vendor = PCI_VENDOR_ID_ESDGMBH,
+ .device = ESD_PCI_DEVICE_ID_PCIE402,
+ .subvendor = PCI_VENDOR_ID_ESDGMBH,
+ .subdevice = PCI_ANY_ID,
+ },
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, pci402_tbl);
+
+static struct pci_driver pci402_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = pci402_tbl,
+ .probe = pci402_probe,
+ .remove = pci402_remove,
+};
+module_pci_driver(pci402_driver);
+
+MODULE_DESCRIPTION("Socket-CAN driver for esd CAN 402 card family with esdACC core on PCIe");
+MODULE_AUTHOR("Thomas Körper <socketcan@esd.eu>");
+MODULE_AUTHOR("Stefan Mätje <stefan.maetje@esd.eu>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/can/esd/esdacc.c b/drivers/net/can/esd/esdacc.c
new file mode 100644
index 000000000000..121cbbf81458
--- /dev/null
+++ b/drivers/net/can/esd/esdacc.c
@@ -0,0 +1,764 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2015 - 2016 Thomas Körper, esd electronic system design gmbh
+ * Copyright (C) 2017 - 2023 Stefan Mätje, esd electronics gmbh
+ */
+
+#include "esdacc.h"
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/ktime.h>
+
+/* esdACC ID register layout */
+#define ACC_ID_ID_MASK GENMASK(28, 0)
+#define ACC_ID_EFF_FLAG BIT(29)
+
+/* esdACC DLC register layout */
+#define ACC_DLC_DLC_MASK GENMASK(3, 0)
+#define ACC_DLC_RTR_FLAG BIT(4)
+#define ACC_DLC_TXD_FLAG BIT(5)
+
+/* ecc value of esdACC equals SJA1000's ECC register */
+#define ACC_ECC_SEG 0x1f
+#define ACC_ECC_DIR 0x20
+#define ACC_ECC_BIT 0x00
+#define ACC_ECC_FORM 0x40
+#define ACC_ECC_STUFF 0x80
+#define ACC_ECC_MASK 0xc0
+
+/* esdACC Status Register bits. Unused bits not documented. */
+#define ACC_REG_STATUS_MASK_STATUS_ES BIT(17)
+#define ACC_REG_STATUS_MASK_STATUS_EP BIT(18)
+#define ACC_REG_STATUS_MASK_STATUS_BS BIT(19)
+
+/* esdACC Overview Module BM_IRQ_Mask register related defines */
+/* Two bit wide command masks to mask or unmask a single core IRQ */
+#define ACC_BM_IRQ_UNMASK BIT(0)
+#define ACC_BM_IRQ_MASK (ACC_BM_IRQ_UNMASK << 1)
+/* Command to unmask all IRQ sources. Created by shifting
+ * and oring the two bit wide ACC_BM_IRQ_UNMASK 16 times.
+ */
+#define ACC_BM_IRQ_UNMASK_ALL 0x55555555U
+
+static void acc_resetmode_enter(struct acc_core *core)
+{
+ acc_set_bits(core, ACC_CORE_OF_CTRL_MODE,
+ ACC_REG_CONTROL_MASK_MODE_RESETMODE);
+
+ /* Read back reset mode bit to flush PCI write posting */
+ acc_resetmode_entered(core);
+}
+
+static void acc_resetmode_leave(struct acc_core *core)
+{
+ acc_clear_bits(core, ACC_CORE_OF_CTRL_MODE,
+ ACC_REG_CONTROL_MASK_MODE_RESETMODE);
+
+ /* Read back reset mode bit to flush PCI write posting */
+ acc_resetmode_entered(core);
+}
+
+static void acc_txq_put(struct acc_core *core, u32 acc_id, u8 acc_dlc,
+ const void *data)
+{
+ acc_write32_noswap(core, ACC_CORE_OF_TXFIFO_DATA_1,
+ *((const u32 *)(data + 4)));
+ acc_write32_noswap(core, ACC_CORE_OF_TXFIFO_DATA_0,
+ *((const u32 *)data));
+ acc_write32(core, ACC_CORE_OF_TXFIFO_DLC, acc_dlc);
+ /* CAN id must be written at last. This write starts TX. */
+ acc_write32(core, ACC_CORE_OF_TXFIFO_ID, acc_id);
+}
+
+static u8 acc_tx_fifo_next(struct acc_core *core, u8 tx_fifo_idx)
+{
+ ++tx_fifo_idx;
+ if (tx_fifo_idx >= core->tx_fifo_size)
+ tx_fifo_idx = 0U;
+ return tx_fifo_idx;
+}
+
+/* Convert timestamp from esdACC time stamp ticks to ns
+ *
+ * The conversion factor ts2ns from time stamp counts to ns is basically
+ * ts2ns = NSEC_PER_SEC / timestamp_frequency
+ *
+ * We handle here only a fixed timestamp frequency of 80MHz. The
+ * resulting ts2ns factor would be 12.5.
+ *
+ * At the end we multiply by 12 and add the half of the HW timestamp
+ * to get a multiplication by 12.5. This way any overflow is
+ * avoided until ktime_t itself overflows.
+ */
+#define ACC_TS_FACTOR (NSEC_PER_SEC / ACC_TS_FREQ_80MHZ)
+#define ACC_TS_80MHZ_SHIFT 1
+
+static ktime_t acc_ts2ktime(struct acc_ov *ov, u64 ts)
+{
+ u64 ns;
+
+ ns = (ts * ACC_TS_FACTOR) + (ts >> ACC_TS_80MHZ_SHIFT);
+
+ return ns_to_ktime(ns);
+}
+
+#undef ACC_TS_FACTOR
+#undef ACC_TS_80MHZ_SHIFT
+
+void acc_init_ov(struct acc_ov *ov, struct device *dev)
+{
+ u32 temp;
+
+ temp = acc_ov_read32(ov, ACC_OV_OF_VERSION);
+ ov->version = temp;
+ ov->features = (temp >> 16);
+
+ temp = acc_ov_read32(ov, ACC_OV_OF_INFO);
+ ov->total_cores = temp;
+ ov->active_cores = (temp >> 8);
+
+ ov->core_frequency = acc_ov_read32(ov, ACC_OV_OF_CANCORE_FREQ);
+ ov->timestamp_frequency = acc_ov_read32(ov, ACC_OV_OF_TS_FREQ_LO);
+
+ /* Depending on esdACC feature NEW_PSC enable the new prescaler
+ * or adjust core_frequency according to the implicit division by 2.
+ */
+ if (ov->features & ACC_OV_REG_FEAT_MASK_NEW_PSC) {
+ acc_ov_set_bits(ov, ACC_OV_OF_MODE,
+ ACC_OV_REG_MODE_MASK_NEW_PSC_ENABLE);
+ } else {
+ ov->core_frequency /= 2;
+ }
+
+ dev_dbg(dev,
+ "esdACC v%u, freq: %u/%u, feat/strap: 0x%x/0x%x, cores: %u/%u\n",
+ ov->version, ov->core_frequency, ov->timestamp_frequency,
+ ov->features, acc_ov_read32(ov, ACC_OV_OF_INFO) >> 16,
+ ov->active_cores, ov->total_cores);
+}
+
+void acc_init_bm_ptr(struct acc_ov *ov, struct acc_core *cores, const void *mem)
+{
+ unsigned int u;
+
+ /* DMA buffer layout as follows where N is the number of CAN cores
+ * implemented in the FPGA, i.e. N = ov->total_cores
+ *
+ * Section Layout Section size
+ * ----------------------------------------------
+ * FIFO Card/Overview ACC_CORE_DMABUF_SIZE
+ * FIFO Core0 ACC_CORE_DMABUF_SIZE
+ * ... ...
+ * FIFO CoreN ACC_CORE_DMABUF_SIZE
+ * irq_cnt Card/Overview sizeof(u32)
+ * irq_cnt Core0 sizeof(u32)
+ * ... ...
+ * irq_cnt CoreN sizeof(u32)
+ */
+ ov->bmfifo.messages = mem;
+ ov->bmfifo.irq_cnt = mem + (ov->total_cores + 1U) * ACC_CORE_DMABUF_SIZE;
+
+ for (u = 0U; u < ov->active_cores; u++) {
+ struct acc_core *core = &cores[u];
+
+ core->bmfifo.messages = mem + (u + 1U) * ACC_CORE_DMABUF_SIZE;
+ core->bmfifo.irq_cnt = ov->bmfifo.irq_cnt + (u + 1U);
+ }
+}
+
+int acc_open(struct net_device *netdev)
+{
+ struct acc_net_priv *priv = netdev_priv(netdev);
+ struct acc_core *core = priv->core;
+ u32 tx_fifo_status;
+ u32 ctrl_mode;
+ int err;
+
+ /* Retry to enter RESET mode if out of sync. */
+ if (priv->can.state != CAN_STATE_STOPPED) {
+ netdev_warn(netdev, "Entered %s() with bad can.state: %s\n",
+ __func__, can_get_state_str(priv->can.state));
+ acc_resetmode_enter(core);
+ priv->can.state = CAN_STATE_STOPPED;
+ }
+
+ err = open_candev(netdev);
+ if (err)
+ return err;
+
+ ctrl_mode = ACC_REG_CONTROL_MASK_IE_RXTX |
+ ACC_REG_CONTROL_MASK_IE_TXERROR |
+ ACC_REG_CONTROL_MASK_IE_ERRWARN |
+ ACC_REG_CONTROL_MASK_IE_OVERRUN |
+ ACC_REG_CONTROL_MASK_IE_ERRPASS;
+
+ if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
+ ctrl_mode |= ACC_REG_CONTROL_MASK_IE_BUSERR;
+
+ if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
+ ctrl_mode |= ACC_REG_CONTROL_MASK_MODE_LOM;
+
+ acc_set_bits(core, ACC_CORE_OF_CTRL_MODE, ctrl_mode);
+
+ acc_resetmode_leave(core);
+ priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+ /* Resync TX FIFO indices to HW state after (re-)start. */
+ tx_fifo_status = acc_read32(core, ACC_CORE_OF_TXFIFO_STATUS);
+ core->tx_fifo_head = tx_fifo_status & 0xff;
+ core->tx_fifo_tail = (tx_fifo_status >> 8) & 0xff;
+
+ netif_start_queue(netdev);
+ return 0;
+}
+
+int acc_close(struct net_device *netdev)
+{
+ struct acc_net_priv *priv = netdev_priv(netdev);
+ struct acc_core *core = priv->core;
+
+ acc_clear_bits(core, ACC_CORE_OF_CTRL_MODE,
+ ACC_REG_CONTROL_MASK_IE_RXTX |
+ ACC_REG_CONTROL_MASK_IE_TXERROR |
+ ACC_REG_CONTROL_MASK_IE_ERRWARN |
+ ACC_REG_CONTROL_MASK_IE_OVERRUN |
+ ACC_REG_CONTROL_MASK_IE_ERRPASS |
+ ACC_REG_CONTROL_MASK_IE_BUSERR);
+
+ netif_stop_queue(netdev);
+ acc_resetmode_enter(core);
+ priv->can.state = CAN_STATE_STOPPED;
+
+ /* Mark pending TX requests to be aborted after controller restart. */
+ acc_write32(core, ACC_CORE_OF_TX_ABORT_MASK, 0xffff);
+
+ /* ACC_REG_CONTROL_MASK_MODE_LOM is only accessible in RESET mode */
+ acc_clear_bits(core, ACC_CORE_OF_CTRL_MODE,
+ ACC_REG_CONTROL_MASK_MODE_LOM);
+
+ close_candev(netdev);
+ return 0;
+}
+
+netdev_tx_t acc_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct acc_net_priv *priv = netdev_priv(netdev);
+ struct acc_core *core = priv->core;
+ struct can_frame *cf = (struct can_frame *)skb->data;
+ u8 tx_fifo_head = core->tx_fifo_head;
+ int fifo_usage;
+ u32 acc_id;
+ u8 acc_dlc;
+
+ if (can_dropped_invalid_skb(netdev, skb))
+ return NETDEV_TX_OK;
+
+ /* Access core->tx_fifo_tail only once because it may be changed
+ * from the interrupt level.
+ */
+ fifo_usage = tx_fifo_head - core->tx_fifo_tail;
+ if (fifo_usage < 0)
+ fifo_usage += core->tx_fifo_size;
+
+ if (fifo_usage >= core->tx_fifo_size - 1) {
+ netdev_err(core->netdev,
+ "BUG: TX ring full when queue awake!\n");
+ netif_stop_queue(netdev);
+ return NETDEV_TX_BUSY;
+ }
+
+ if (fifo_usage == core->tx_fifo_size - 2)
+ netif_stop_queue(netdev);
+
+ acc_dlc = can_get_cc_dlc(cf, priv->can.ctrlmode);
+ if (cf->can_id & CAN_RTR_FLAG)
+ acc_dlc |= ACC_DLC_RTR_FLAG;
+
+ if (cf->can_id & CAN_EFF_FLAG) {
+ acc_id = cf->can_id & CAN_EFF_MASK;
+ acc_id |= ACC_ID_EFF_FLAG;
+ } else {
+ acc_id = cf->can_id & CAN_SFF_MASK;
+ }
+
+ can_put_echo_skb(skb, netdev, core->tx_fifo_head, 0);
+
+ core->tx_fifo_head = acc_tx_fifo_next(core, tx_fifo_head);
+
+ acc_txq_put(core, acc_id, acc_dlc, cf->data);
+
+ return NETDEV_TX_OK;
+}
+
+int acc_get_berr_counter(const struct net_device *netdev,
+ struct can_berr_counter *bec)
+{
+ struct acc_net_priv *priv = netdev_priv(netdev);
+ u32 core_status = acc_read32(priv->core, ACC_CORE_OF_STATUS);
+
+ bec->txerr = (core_status >> 8) & 0xff;
+ bec->rxerr = core_status & 0xff;
+
+ return 0;
+}
+
+int acc_set_mode(struct net_device *netdev, enum can_mode mode)
+{
+ struct acc_net_priv *priv = netdev_priv(netdev);
+
+ switch (mode) {
+ case CAN_MODE_START:
+ /* Paranoid FIFO index check. */
+ {
+ const u32 tx_fifo_status =
+ acc_read32(priv->core, ACC_CORE_OF_TXFIFO_STATUS);
+ const u8 hw_fifo_head = tx_fifo_status;
+
+ if (hw_fifo_head != priv->core->tx_fifo_head ||
+ hw_fifo_head != priv->core->tx_fifo_tail) {
+ netdev_warn(netdev,
+ "TX FIFO mismatch: T %2u H %2u; TFHW %#08x\n",
+ priv->core->tx_fifo_tail,
+ priv->core->tx_fifo_head,
+ tx_fifo_status);
+ }
+ }
+ acc_resetmode_leave(priv->core);
+ /* To leave the bus-off state the esdACC controller begins
+ * here a grace period where it counts 128 "idle conditions" (each
+ * of 11 consecutive recessive bits) on the bus as required
+ * by the CAN spec.
+ *
+ * During this time the TX FIFO may still contain already
+ * aborted "zombie" frames that are only drained from the FIFO
+ * at the end of the grace period.
+ *
+ * To not to interfere with this drain process we don't
+ * call netif_wake_queue() here. When the controller reaches
+ * the error-active state again, it informs us about that
+ * with an acc_bmmsg_errstatechange message. Then
+ * netif_wake_queue() is called from
+ * handle_core_msg_errstatechange() instead.
+ */
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int acc_set_bittiming(struct net_device *netdev)
+{
+ struct acc_net_priv *priv = netdev_priv(netdev);
+ const struct can_bittiming *bt = &priv->can.bittiming;
+ u32 brp;
+ u32 btr;
+
+ if (priv->ov->features & ACC_OV_REG_FEAT_MASK_CANFD) {
+ u32 fbtr = 0;
+
+ netdev_dbg(netdev, "bit timing: brp %u, prop %u, ph1 %u ph2 %u, sjw %u\n",
+ bt->brp, bt->prop_seg,
+ bt->phase_seg1, bt->phase_seg2, bt->sjw);
+
+ brp = FIELD_PREP(ACC_REG_BRP_FD_MASK_BRP, bt->brp - 1);
+
+ btr = FIELD_PREP(ACC_REG_BTR_FD_MASK_TSEG1, bt->phase_seg1 + bt->prop_seg - 1);
+ btr |= FIELD_PREP(ACC_REG_BTR_FD_MASK_TSEG2, bt->phase_seg2 - 1);
+ btr |= FIELD_PREP(ACC_REG_BTR_FD_MASK_SJW, bt->sjw - 1);
+
+ /* Keep order of accesses to ACC_CORE_OF_BRP and ACC_CORE_OF_BTR. */
+ acc_write32(priv->core, ACC_CORE_OF_BRP, brp);
+ acc_write32(priv->core, ACC_CORE_OF_BTR, btr);
+
+ netdev_dbg(netdev, "esdACC: BRP %u, NBTR 0x%08x, DBTR 0x%08x",
+ brp, btr, fbtr);
+ } else {
+ netdev_dbg(netdev, "bit timing: brp %u, prop %u, ph1 %u ph2 %u, sjw %u\n",
+ bt->brp, bt->prop_seg,
+ bt->phase_seg1, bt->phase_seg2, bt->sjw);
+
+ brp = FIELD_PREP(ACC_REG_BRP_CL_MASK_BRP, bt->brp - 1);
+
+ btr = FIELD_PREP(ACC_REG_BTR_CL_MASK_TSEG1, bt->phase_seg1 + bt->prop_seg - 1);
+ btr |= FIELD_PREP(ACC_REG_BTR_CL_MASK_TSEG2, bt->phase_seg2 - 1);
+ btr |= FIELD_PREP(ACC_REG_BTR_CL_MASK_SJW, bt->sjw - 1);
+
+ /* Keep order of accesses to ACC_CORE_OF_BRP and ACC_CORE_OF_BTR. */
+ acc_write32(priv->core, ACC_CORE_OF_BRP, brp);
+ acc_write32(priv->core, ACC_CORE_OF_BTR, btr);
+
+ netdev_dbg(netdev, "esdACC: BRP %u, BTR 0x%08x", brp, btr);
+ }
+
+ return 0;
+}
+
+static void handle_core_msg_rxtxdone(struct acc_core *core,
+ const struct acc_bmmsg_rxtxdone *msg)
+{
+ struct acc_net_priv *priv = netdev_priv(core->netdev);
+ struct net_device_stats *stats = &core->netdev->stats;
+ struct sk_buff *skb;
+
+ if (msg->acc_dlc.len & ACC_DLC_TXD_FLAG) {
+ u8 tx_fifo_tail = core->tx_fifo_tail;
+
+ if (core->tx_fifo_head == tx_fifo_tail) {
+ netdev_warn(core->netdev,
+ "TX interrupt, but queue is empty!?\n");
+ return;
+ }
+
+ /* Direct access echo skb to attach HW time stamp. */
+ skb = priv->can.echo_skb[tx_fifo_tail];
+ if (skb) {
+ skb_hwtstamps(skb)->hwtstamp =
+ acc_ts2ktime(priv->ov, msg->ts);
+ }
+
+ stats->tx_packets++;
+ stats->tx_bytes += can_get_echo_skb(core->netdev, tx_fifo_tail,
+ NULL);
+
+ core->tx_fifo_tail = acc_tx_fifo_next(core, tx_fifo_tail);
+
+ netif_wake_queue(core->netdev);
+
+ } else {
+ struct can_frame *cf;
+
+ skb = alloc_can_skb(core->netdev, &cf);
+ if (!skb) {
+ stats->rx_dropped++;
+ return;
+ }
+
+ cf->can_id = msg->id & ACC_ID_ID_MASK;
+ if (msg->id & ACC_ID_EFF_FLAG)
+ cf->can_id |= CAN_EFF_FLAG;
+
+ can_frame_set_cc_len(cf, msg->acc_dlc.len & ACC_DLC_DLC_MASK,
+ priv->can.ctrlmode);
+
+ if (msg->acc_dlc.len & ACC_DLC_RTR_FLAG) {
+ cf->can_id |= CAN_RTR_FLAG;
+ } else {
+ memcpy(cf->data, msg->data, cf->len);
+ stats->rx_bytes += cf->len;
+ }
+ stats->rx_packets++;
+
+ skb_hwtstamps(skb)->hwtstamp = acc_ts2ktime(priv->ov, msg->ts);
+
+ netif_rx(skb);
+ }
+}
+
+static void handle_core_msg_txabort(struct acc_core *core,
+ const struct acc_bmmsg_txabort *msg)
+{
+ struct net_device_stats *stats = &core->netdev->stats;
+ u8 tx_fifo_tail = core->tx_fifo_tail;
+ u32 abort_mask = msg->abort_mask; /* u32 extend to avoid warnings later */
+
+ /* The abort_mask shows which frames were aborted in esdACC's FIFO. */
+ while (tx_fifo_tail != core->tx_fifo_head && (abort_mask)) {
+ const u32 tail_mask = (1U << tx_fifo_tail);
+
+ if (!(abort_mask & tail_mask))
+ break;
+ abort_mask &= ~tail_mask;
+
+ can_free_echo_skb(core->netdev, tx_fifo_tail, NULL);
+ stats->tx_dropped++;
+ stats->tx_aborted_errors++;
+
+ tx_fifo_tail = acc_tx_fifo_next(core, tx_fifo_tail);
+ }
+ core->tx_fifo_tail = tx_fifo_tail;
+ if (abort_mask)
+ netdev_warn(core->netdev, "Unhandled aborted messages\n");
+
+ if (!acc_resetmode_entered(core))
+ netif_wake_queue(core->netdev);
+}
+
+static void handle_core_msg_overrun(struct acc_core *core,
+ const struct acc_bmmsg_overrun *msg)
+{
+ struct acc_net_priv *priv = netdev_priv(core->netdev);
+ struct net_device_stats *stats = &core->netdev->stats;
+ struct can_frame *cf;
+ struct sk_buff *skb;
+
+ /* lost_cnt may be 0 if not supported by esdACC version */
+ if (msg->lost_cnt) {
+ stats->rx_errors += msg->lost_cnt;
+ stats->rx_over_errors += msg->lost_cnt;
+ } else {
+ stats->rx_errors++;
+ stats->rx_over_errors++;
+ }
+
+ skb = alloc_can_err_skb(core->netdev, &cf);
+ if (!skb)
+ return;
+
+ cf->can_id |= CAN_ERR_CRTL;
+ cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+
+ skb_hwtstamps(skb)->hwtstamp = acc_ts2ktime(priv->ov, msg->ts);
+
+ netif_rx(skb);
+}
+
+static void handle_core_msg_buserr(struct acc_core *core,
+ const struct acc_bmmsg_buserr *msg)
+{
+ struct acc_net_priv *priv = netdev_priv(core->netdev);
+ struct net_device_stats *stats = &core->netdev->stats;
+ struct can_frame *cf;
+ struct sk_buff *skb;
+ const u32 reg_status = msg->reg_status;
+ const u8 rxerr = reg_status;
+ const u8 txerr = (reg_status >> 8);
+ u8 can_err_prot_type = 0U;
+
+ priv->can.can_stats.bus_error++;
+
+ /* Error occurred during transmission? */
+ if (msg->ecc & ACC_ECC_DIR) {
+ stats->rx_errors++;
+ } else {
+ can_err_prot_type |= CAN_ERR_PROT_TX;
+ stats->tx_errors++;
+ }
+ /* Determine error type */
+ switch (msg->ecc & ACC_ECC_MASK) {
+ case ACC_ECC_BIT:
+ can_err_prot_type |= CAN_ERR_PROT_BIT;
+ break;
+ case ACC_ECC_FORM:
+ can_err_prot_type |= CAN_ERR_PROT_FORM;
+ break;
+ case ACC_ECC_STUFF:
+ can_err_prot_type |= CAN_ERR_PROT_STUFF;
+ break;
+ default:
+ can_err_prot_type |= CAN_ERR_PROT_UNSPEC;
+ break;
+ }
+
+ skb = alloc_can_err_skb(core->netdev, &cf);
+ if (!skb)
+ return;
+
+ cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR | CAN_ERR_CNT;
+
+ /* Set protocol error type */
+ cf->data[2] = can_err_prot_type;
+ /* Set error location */
+ cf->data[3] = msg->ecc & ACC_ECC_SEG;
+
+ /* Insert CAN TX and RX error counters. */
+ cf->data[6] = txerr;
+ cf->data[7] = rxerr;
+
+ skb_hwtstamps(skb)->hwtstamp = acc_ts2ktime(priv->ov, msg->ts);
+
+ netif_rx(skb);
+}
+
+static void
+handle_core_msg_errstatechange(struct acc_core *core,
+ const struct acc_bmmsg_errstatechange *msg)
+{
+ struct acc_net_priv *priv = netdev_priv(core->netdev);
+ struct can_frame *cf = NULL;
+ struct sk_buff *skb;
+ const u32 reg_status = msg->reg_status;
+ const u8 rxerr = reg_status;
+ const u8 txerr = (reg_status >> 8);
+ enum can_state new_state;
+
+ if (reg_status & ACC_REG_STATUS_MASK_STATUS_BS) {
+ new_state = CAN_STATE_BUS_OFF;
+ } else if (reg_status & ACC_REG_STATUS_MASK_STATUS_EP) {
+ new_state = CAN_STATE_ERROR_PASSIVE;
+ } else if (reg_status & ACC_REG_STATUS_MASK_STATUS_ES) {
+ new_state = CAN_STATE_ERROR_WARNING;
+ } else {
+ new_state = CAN_STATE_ERROR_ACTIVE;
+ if (priv->can.state == CAN_STATE_BUS_OFF) {
+ /* See comment in acc_set_mode() for CAN_MODE_START */
+ netif_wake_queue(core->netdev);
+ }
+ }
+
+ skb = alloc_can_err_skb(core->netdev, &cf);
+
+ if (new_state != priv->can.state) {
+ enum can_state tx_state, rx_state;
+
+ tx_state = (txerr >= rxerr) ?
+ new_state : CAN_STATE_ERROR_ACTIVE;
+ rx_state = (rxerr >= txerr) ?
+ new_state : CAN_STATE_ERROR_ACTIVE;
+
+ /* Always call can_change_state() to update the state
+ * even if alloc_can_err_skb() may have failed.
+ * can_change_state() can cope with a NULL cf pointer.
+ */
+ can_change_state(core->netdev, cf, tx_state, rx_state);
+ }
+
+ if (skb) {
+ cf->can_id |= CAN_ERR_CNT;
+ cf->data[6] = txerr;
+ cf->data[7] = rxerr;
+
+ skb_hwtstamps(skb)->hwtstamp = acc_ts2ktime(priv->ov, msg->ts);
+
+ netif_rx(skb);
+ }
+
+ if (new_state == CAN_STATE_BUS_OFF) {
+ acc_write32(core, ACC_CORE_OF_TX_ABORT_MASK, 0xffff);
+ can_bus_off(core->netdev);
+ }
+}
+
+static void handle_core_interrupt(struct acc_core *core)
+{
+ u32 msg_fifo_head = core->bmfifo.local_irq_cnt & 0xff;
+
+ while (core->bmfifo.msg_fifo_tail != msg_fifo_head) {
+ const union acc_bmmsg *msg =
+ &core->bmfifo.messages[core->bmfifo.msg_fifo_tail];
+
+ switch (msg->msg_id) {
+ case BM_MSG_ID_RXTXDONE:
+ handle_core_msg_rxtxdone(core, &msg->rxtxdone);
+ break;
+
+ case BM_MSG_ID_TXABORT:
+ handle_core_msg_txabort(core, &msg->txabort);
+ break;
+
+ case BM_MSG_ID_OVERRUN:
+ handle_core_msg_overrun(core, &msg->overrun);
+ break;
+
+ case BM_MSG_ID_BUSERR:
+ handle_core_msg_buserr(core, &msg->buserr);
+ break;
+
+ case BM_MSG_ID_ERRPASSIVE:
+ case BM_MSG_ID_ERRWARN:
+ handle_core_msg_errstatechange(core,
+ &msg->errstatechange);
+ break;
+
+ default:
+ /* Ignore all other BM messages (like the CAN-FD messages) */
+ break;
+ }
+
+ core->bmfifo.msg_fifo_tail =
+ (core->bmfifo.msg_fifo_tail + 1) & 0xff;
+ }
+}
+
+/**
+ * acc_card_interrupt() - handle the interrupts of an esdACC FPGA
+ *
+ * @ov: overview module structure
+ * @cores: array of core structures
+ *
+ * This function handles all interrupts pending for the overview module and the
+ * CAN cores of the esdACC FPGA.
+ *
+ * It examines for all cores (the overview module core and the CAN cores)
+ * the bmfifo.irq_cnt and compares it with the previously saved
+ * bmfifo.local_irq_cnt. An IRQ is pending if they differ. The esdACC FPGA
+ * updates the bmfifo.irq_cnt values by DMA.
+ *
+ * The pending interrupts are masked by writing to the IRQ mask register at
+ * ACC_OV_OF_BM_IRQ_MASK. This register has for each core a two bit command
+ * field evaluated as follows:
+ *
+ * Define, bit pattern: meaning
+ * 00: no action
+ * ACC_BM_IRQ_UNMASK, 01: unmask interrupt
+ * ACC_BM_IRQ_MASK, 10: mask interrupt
+ * 11: no action
+ *
+ * For each CAN core with a pending IRQ handle_core_interrupt() handles all
+ * busmaster messages from the message FIFO. The last handled message (FIFO
+ * index) is written to the CAN core to acknowledge its handling.
+ *
+ * Last step is to unmask all interrupts in the FPGA using
+ * ACC_BM_IRQ_UNMASK_ALL.
+ *
+ * Return:
+ * IRQ_HANDLED, if card generated an interrupt that was handled
+ * IRQ_NONE, if the interrupt is not ours
+ */
+irqreturn_t acc_card_interrupt(struct acc_ov *ov, struct acc_core *cores)
+{
+ u32 irqmask;
+ int i;
+
+ /* First we look for whom interrupts are pending, card/overview
+ * or any of the cores. Two bits in irqmask are used for each;
+ * Each two bit field is set to ACC_BM_IRQ_MASK if an IRQ is
+ * pending.
+ */
+ irqmask = 0U;
+ if (READ_ONCE(*ov->bmfifo.irq_cnt) != ov->bmfifo.local_irq_cnt) {
+ irqmask |= ACC_BM_IRQ_MASK;
+ ov->bmfifo.local_irq_cnt = READ_ONCE(*ov->bmfifo.irq_cnt);
+ }
+
+ for (i = 0; i < ov->active_cores; i++) {
+ struct acc_core *core = &cores[i];
+
+ if (READ_ONCE(*core->bmfifo.irq_cnt) != core->bmfifo.local_irq_cnt) {
+ irqmask |= (ACC_BM_IRQ_MASK << (2 * (i + 1)));
+ core->bmfifo.local_irq_cnt = READ_ONCE(*core->bmfifo.irq_cnt);
+ }
+ }
+
+ if (!irqmask)
+ return IRQ_NONE;
+
+ /* At second we tell the card we're working on them by writing irqmask,
+ * call handle_{ov|core}_interrupt and then acknowledge the
+ * interrupts by writing irq_cnt:
+ */
+ acc_ov_write32(ov, ACC_OV_OF_BM_IRQ_MASK, irqmask);
+
+ if (irqmask & ACC_BM_IRQ_MASK) {
+ /* handle_ov_interrupt(); - no use yet. */
+ acc_ov_write32(ov, ACC_OV_OF_BM_IRQ_COUNTER,
+ ov->bmfifo.local_irq_cnt);
+ }
+
+ for (i = 0; i < ov->active_cores; i++) {
+ struct acc_core *core = &cores[i];
+
+ if (irqmask & (ACC_BM_IRQ_MASK << (2 * (i + 1)))) {
+ handle_core_interrupt(core);
+ acc_write32(core, ACC_OV_OF_BM_IRQ_COUNTER,
+ core->bmfifo.local_irq_cnt);
+ }
+ }
+
+ acc_ov_write32(ov, ACC_OV_OF_BM_IRQ_MASK, ACC_BM_IRQ_UNMASK_ALL);
+
+ return IRQ_HANDLED;
+}
diff --git a/drivers/net/can/esd/esdacc.h b/drivers/net/can/esd/esdacc.h
new file mode 100644
index 000000000000..a70488b25d39
--- /dev/null
+++ b/drivers/net/can/esd/esdacc.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2015 - 2016 Thomas Körper, esd electronic system design gmbh
+ * Copyright (C) 2017 - 2023 Stefan Mätje, esd electronics gmbh
+ */
+
+#include <linux/bits.h>
+#include <linux/can/dev.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/units.h>
+
+#define ACC_TS_FREQ_80MHZ (80 * HZ_PER_MHZ)
+#define ACC_I2C_ADDON_DETECT_DELAY_MS 10
+
+/* esdACC Overview Module */
+#define ACC_OV_OF_PROBE 0x0000
+#define ACC_OV_OF_VERSION 0x0004
+#define ACC_OV_OF_INFO 0x0008
+#define ACC_OV_OF_CANCORE_FREQ 0x000c
+#define ACC_OV_OF_TS_FREQ_LO 0x0010
+#define ACC_OV_OF_TS_FREQ_HI 0x0014
+#define ACC_OV_OF_IRQ_STATUS_CORES 0x0018
+#define ACC_OV_OF_TS_CURR_LO 0x001c
+#define ACC_OV_OF_TS_CURR_HI 0x0020
+#define ACC_OV_OF_IRQ_STATUS 0x0028
+#define ACC_OV_OF_MODE 0x002c
+#define ACC_OV_OF_BM_IRQ_COUNTER 0x0070
+#define ACC_OV_OF_BM_IRQ_MASK 0x0074
+#define ACC_OV_OF_MSI_DATA 0x0080
+#define ACC_OV_OF_MSI_ADDRESSOFFSET 0x0084
+
+/* Feature flags are contained in the upper 16 bit of the version
+ * register at ACC_OV_OF_VERSION but only used with these masks after
+ * extraction into an extra variable => (xx - 16).
+ */
+#define ACC_OV_REG_FEAT_MASK_CANFD BIT(27 - 16)
+#define ACC_OV_REG_FEAT_MASK_NEW_PSC BIT(28 - 16)
+
+#define ACC_OV_REG_MODE_MASK_ENDIAN_LITTLE BIT(0)
+#define ACC_OV_REG_MODE_MASK_BM_ENABLE BIT(1)
+#define ACC_OV_REG_MODE_MASK_MODE_LED BIT(2)
+#define ACC_OV_REG_MODE_MASK_TIMER_ENABLE BIT(4)
+#define ACC_OV_REG_MODE_MASK_TIMER_ONE_SHOT BIT(5)
+#define ACC_OV_REG_MODE_MASK_TIMER_ABSOLUTE BIT(6)
+#define ACC_OV_REG_MODE_MASK_TIMER GENMASK(6, 4)
+#define ACC_OV_REG_MODE_MASK_TS_SRC GENMASK(8, 7)
+#define ACC_OV_REG_MODE_MASK_I2C_ENABLE BIT(11)
+#define ACC_OV_REG_MODE_MASK_MSI_ENABLE BIT(14)
+#define ACC_OV_REG_MODE_MASK_NEW_PSC_ENABLE BIT(15)
+#define ACC_OV_REG_MODE_MASK_FPGA_RESET BIT(31)
+
+/* esdACC CAN Core Module */
+#define ACC_CORE_OF_CTRL_MODE 0x0000
+#define ACC_CORE_OF_STATUS_IRQ 0x0008
+#define ACC_CORE_OF_BRP 0x000c
+#define ACC_CORE_OF_BTR 0x0010
+#define ACC_CORE_OF_FBTR 0x0014
+#define ACC_CORE_OF_STATUS 0x0030
+#define ACC_CORE_OF_TXFIFO_CONFIG 0x0048
+#define ACC_CORE_OF_TXFIFO_STATUS 0x004c
+#define ACC_CORE_OF_TX_STATUS_IRQ 0x0050
+#define ACC_CORE_OF_TX_ABORT_MASK 0x0054
+#define ACC_CORE_OF_BM_IRQ_COUNTER 0x0070
+#define ACC_CORE_OF_TXFIFO_ID 0x00c0
+#define ACC_CORE_OF_TXFIFO_DLC 0x00c4
+#define ACC_CORE_OF_TXFIFO_DATA_0 0x00c8
+#define ACC_CORE_OF_TXFIFO_DATA_1 0x00cc
+
+#define ACC_REG_CONTROL_MASK_MODE_RESETMODE BIT(0)
+#define ACC_REG_CONTROL_MASK_MODE_LOM BIT(1)
+#define ACC_REG_CONTROL_MASK_MODE_STM BIT(2)
+#define ACC_REG_CONTROL_MASK_MODE_TRANSEN BIT(5)
+#define ACC_REG_CONTROL_MASK_MODE_TS BIT(6)
+#define ACC_REG_CONTROL_MASK_MODE_SCHEDULE BIT(7)
+
+#define ACC_REG_CONTROL_MASK_IE_RXTX BIT(8)
+#define ACC_REG_CONTROL_MASK_IE_TXERROR BIT(9)
+#define ACC_REG_CONTROL_MASK_IE_ERRWARN BIT(10)
+#define ACC_REG_CONTROL_MASK_IE_OVERRUN BIT(11)
+#define ACC_REG_CONTROL_MASK_IE_TSI BIT(12)
+#define ACC_REG_CONTROL_MASK_IE_ERRPASS BIT(13)
+#define ACC_REG_CONTROL_MASK_IE_ALI BIT(14)
+#define ACC_REG_CONTROL_MASK_IE_BUSERR BIT(15)
+
+/* BRP and BTR register layout for CAN-Classic version */
+#define ACC_REG_BRP_CL_MASK_BRP GENMASK(8, 0)
+#define ACC_REG_BTR_CL_MASK_TSEG1 GENMASK(3, 0)
+#define ACC_REG_BTR_CL_MASK_TSEG2 GENMASK(18, 16)
+#define ACC_REG_BTR_CL_MASK_SJW GENMASK(25, 24)
+
+/* BRP and BTR register layout for CAN-FD version */
+#define ACC_REG_BRP_FD_MASK_BRP GENMASK(7, 0)
+#define ACC_REG_BTR_FD_MASK_TSEG1 GENMASK(7, 0)
+#define ACC_REG_BTR_FD_MASK_TSEG2 GENMASK(22, 16)
+#define ACC_REG_BTR_FD_MASK_SJW GENMASK(30, 24)
+
+/* 256 BM_MSGs of 32 byte size */
+#define ACC_CORE_DMAMSG_SIZE 32U
+#define ACC_CORE_DMABUF_SIZE (256U * ACC_CORE_DMAMSG_SIZE)
+
+enum acc_bmmsg_id {
+ BM_MSG_ID_RXTXDONE = 0x01,
+ BM_MSG_ID_TXABORT = 0x02,
+ BM_MSG_ID_OVERRUN = 0x03,
+ BM_MSG_ID_BUSERR = 0x04,
+ BM_MSG_ID_ERRPASSIVE = 0x05,
+ BM_MSG_ID_ERRWARN = 0x06,
+ BM_MSG_ID_TIMESLICE = 0x07,
+ BM_MSG_ID_HWTIMER = 0x08,
+ BM_MSG_ID_HOTPLUG = 0x09,
+};
+
+/* The struct acc_bmmsg_* structure declarations that follow here provide
+ * access to the ring buffer of bus master messages maintained by the FPGA
+ * bus master engine. All bus master messages have the same size of
+ * ACC_CORE_DMAMSG_SIZE and a minimum alignment of ACC_CORE_DMAMSG_SIZE in
+ * memory.
+ *
+ * All structure members are natural aligned. Therefore we should not need
+ * a __packed attribute. All struct acc_bmmsg_* declarations have at least
+ * reserved* members to fill the structure to the full ACC_CORE_DMAMSG_SIZE.
+ *
+ * A failure of this property due padding will be detected at compile time
+ * by static_assert(sizeof(union acc_bmmsg) == ACC_CORE_DMAMSG_SIZE).
+ */
+
+struct acc_bmmsg_rxtxdone {
+ u8 msg_id;
+ u8 txfifo_level;
+ u8 reserved1[2];
+ u8 txtsfifo_level;
+ u8 reserved2[3];
+ u32 id;
+ struct {
+ u8 len;
+ u8 txdfifo_idx;
+ u8 zeroes8;
+ u8 reserved;
+ } acc_dlc;
+ u8 data[CAN_MAX_DLEN];
+ /* Time stamps in struct acc_ov::timestamp_frequency ticks. */
+ u64 ts;
+};
+
+struct acc_bmmsg_txabort {
+ u8 msg_id;
+ u8 txfifo_level;
+ u16 abort_mask;
+ u8 txtsfifo_level;
+ u8 reserved2[1];
+ u16 abort_mask_txts;
+ u64 ts;
+ u32 reserved3[4];
+};
+
+struct acc_bmmsg_overrun {
+ u8 msg_id;
+ u8 txfifo_level;
+ u8 lost_cnt;
+ u8 reserved1;
+ u8 txtsfifo_level;
+ u8 reserved2[3];
+ u64 ts;
+ u32 reserved3[4];
+};
+
+struct acc_bmmsg_buserr {
+ u8 msg_id;
+ u8 txfifo_level;
+ u8 ecc;
+ u8 reserved1;
+ u8 txtsfifo_level;
+ u8 reserved2[3];
+ u64 ts;
+ u32 reg_status;
+ u32 reg_btr;
+ u32 reserved3[2];
+};
+
+struct acc_bmmsg_errstatechange {
+ u8 msg_id;
+ u8 txfifo_level;
+ u8 reserved1[2];
+ u8 txtsfifo_level;
+ u8 reserved2[3];
+ u64 ts;
+ u32 reg_status;
+ u32 reserved3[3];
+};
+
+struct acc_bmmsg_timeslice {
+ u8 msg_id;
+ u8 txfifo_level;
+ u8 reserved1[2];
+ u8 txtsfifo_level;
+ u8 reserved2[3];
+ u64 ts;
+ u32 reserved3[4];
+};
+
+struct acc_bmmsg_hwtimer {
+ u8 msg_id;
+ u8 reserved1[3];
+ u32 reserved2[1];
+ u64 timer;
+ u32 reserved3[4];
+};
+
+struct acc_bmmsg_hotplug {
+ u8 msg_id;
+ u8 reserved1[3];
+ u32 reserved2[7];
+};
+
+union acc_bmmsg {
+ u8 msg_id;
+ struct acc_bmmsg_rxtxdone rxtxdone;
+ struct acc_bmmsg_txabort txabort;
+ struct acc_bmmsg_overrun overrun;
+ struct acc_bmmsg_buserr buserr;
+ struct acc_bmmsg_errstatechange errstatechange;
+ struct acc_bmmsg_timeslice timeslice;
+ struct acc_bmmsg_hwtimer hwtimer;
+};
+
+/* Check size of union acc_bmmsg to be of expected size. */
+static_assert(sizeof(union acc_bmmsg) == ACC_CORE_DMAMSG_SIZE);
+
+struct acc_bmfifo {
+ const union acc_bmmsg *messages;
+ /* irq_cnt points to an u32 value where the esdACC FPGA deposits
+ * the bm_fifo head index in coherent DMA memory. Only bits 7..0
+ * are valid. Use READ_ONCE() to access this memory location.
+ */
+ const u32 *irq_cnt;
+ u32 local_irq_cnt;
+ u32 msg_fifo_tail;
+};
+
+struct acc_core {
+ void __iomem *addr;
+ struct net_device *netdev;
+ struct acc_bmfifo bmfifo;
+ u8 tx_fifo_size;
+ u8 tx_fifo_head;
+ u8 tx_fifo_tail;
+};
+
+struct acc_ov {
+ void __iomem *addr;
+ struct acc_bmfifo bmfifo;
+ u32 timestamp_frequency;
+ u32 core_frequency;
+ u16 version;
+ u16 features;
+ u8 total_cores;
+ u8 active_cores;
+};
+
+struct acc_net_priv {
+ struct can_priv can; /* must be the first member! */
+ struct acc_core *core;
+ struct acc_ov *ov;
+};
+
+static inline u32 acc_read32(struct acc_core *core, unsigned short offs)
+{
+ return ioread32be(core->addr + offs);
+}
+
+static inline void acc_write32(struct acc_core *core,
+ unsigned short offs, u32 v)
+{
+ iowrite32be(v, core->addr + offs);
+}
+
+static inline void acc_write32_noswap(struct acc_core *core,
+ unsigned short offs, u32 v)
+{
+ iowrite32(v, core->addr + offs);
+}
+
+static inline void acc_set_bits(struct acc_core *core,
+ unsigned short offs, u32 mask)
+{
+ u32 v = acc_read32(core, offs);
+
+ v |= mask;
+ acc_write32(core, offs, v);
+}
+
+static inline void acc_clear_bits(struct acc_core *core,
+ unsigned short offs, u32 mask)
+{
+ u32 v = acc_read32(core, offs);
+
+ v &= ~mask;
+ acc_write32(core, offs, v);
+}
+
+static inline int acc_resetmode_entered(struct acc_core *core)
+{
+ u32 ctrl = acc_read32(core, ACC_CORE_OF_CTRL_MODE);
+
+ return (ctrl & ACC_REG_CONTROL_MASK_MODE_RESETMODE) != 0;
+}
+
+static inline u32 acc_ov_read32(struct acc_ov *ov, unsigned short offs)
+{
+ return ioread32be(ov->addr + offs);
+}
+
+static inline void acc_ov_write32(struct acc_ov *ov,
+ unsigned short offs, u32 v)
+{
+ iowrite32be(v, ov->addr + offs);
+}
+
+static inline void acc_ov_set_bits(struct acc_ov *ov,
+ unsigned short offs, u32 b)
+{
+ u32 v = acc_ov_read32(ov, offs);
+
+ v |= b;
+ acc_ov_write32(ov, offs, v);
+}
+
+static inline void acc_ov_clear_bits(struct acc_ov *ov,
+ unsigned short offs, u32 b)
+{
+ u32 v = acc_ov_read32(ov, offs);
+
+ v &= ~b;
+ acc_ov_write32(ov, offs, v);
+}
+
+static inline void acc_reset_fpga(struct acc_ov *ov)
+{
+ acc_ov_write32(ov, ACC_OV_OF_MODE, ACC_OV_REG_MODE_MASK_FPGA_RESET);
+
+ /* (Re-)start and wait for completion of addon detection on the I^2C bus */
+ acc_ov_set_bits(ov, ACC_OV_OF_MODE, ACC_OV_REG_MODE_MASK_I2C_ENABLE);
+ mdelay(ACC_I2C_ADDON_DETECT_DELAY_MS);
+}
+
+void acc_init_ov(struct acc_ov *ov, struct device *dev);
+void acc_init_bm_ptr(struct acc_ov *ov, struct acc_core *cores,
+ const void *mem);
+int acc_open(struct net_device *netdev);
+int acc_close(struct net_device *netdev);
+netdev_tx_t acc_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+int acc_get_berr_counter(const struct net_device *netdev,
+ struct can_berr_counter *bec);
+int acc_set_mode(struct net_device *netdev, enum can_mode mode);
+int acc_set_bittiming(struct net_device *netdev);
+irqreturn_t acc_card_interrupt(struct acc_ov *ov, struct acc_core *cores);
diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c
index a57005faa04f..f81b598147b3 100644
--- a/drivers/net/can/kvaser_pciefd.c
+++ b/drivers/net/can/kvaser_pciefd.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
#define KVASER_PCIEFD_BEC_POLL_FREQ (jiffies + msecs_to_jiffies(200))
#define KVASER_PCIEFD_MAX_ERR_REP 256U
#define KVASER_PCIEFD_CAN_TX_MAX_COUNT 17U
-#define KVASER_PCIEFD_MAX_CAN_CHANNELS 4UL
+#define KVASER_PCIEFD_MAX_CAN_CHANNELS 8UL
#define KVASER_PCIEFD_DMA_COUNT 2U
#define KVASER_PCIEFD_DMA_SIZE (4U * 1024U)
@@ -47,12 +47,19 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
#define KVASER_PCIEFD_MINIPCIE_2CAN_V3_DEVICE_ID 0x0015
#define KVASER_PCIEFD_MINIPCIE_1CAN_V3_DEVICE_ID 0x0016
+/* Xilinx based devices */
+#define KVASER_PCIEFD_M2_4CAN_DEVICE_ID 0x0017
+#define KVASER_PCIEFD_8CAN_DEVICE_ID 0x0019
+
/* Altera SerDes Enable 64-bit DMA address translation */
#define KVASER_PCIEFD_ALTERA_DMA_64BIT BIT(0)
/* SmartFusion2 SerDes LSB address translation mask */
#define KVASER_PCIEFD_SF2_DMA_LSB_MASK GENMASK(31, 12)
+/* Xilinx SerDes LSB address translation mask */
+#define KVASER_PCIEFD_XILINX_DMA_LSB_MASK GENMASK(31, 12)
+
/* Kvaser KCAN CAN controller registers */
#define KVASER_PCIEFD_KCAN_FIFO_REG 0x100
#define KVASER_PCIEFD_KCAN_FIFO_LAST_REG 0x180
@@ -281,6 +288,8 @@ static void kvaser_pciefd_write_dma_map_altera(struct kvaser_pciefd *pcie,
dma_addr_t addr, int index);
static void kvaser_pciefd_write_dma_map_sf2(struct kvaser_pciefd *pcie,
dma_addr_t addr, int index);
+static void kvaser_pciefd_write_dma_map_xilinx(struct kvaser_pciefd *pcie,
+ dma_addr_t addr, int index);
struct kvaser_pciefd_address_offset {
u32 serdes;
@@ -335,6 +344,18 @@ static const struct kvaser_pciefd_address_offset kvaser_pciefd_sf2_address_offse
.kcan_ch1 = 0x142000,
};
+static const struct kvaser_pciefd_address_offset kvaser_pciefd_xilinx_address_offset = {
+ .serdes = 0x00208,
+ .pci_ien = 0x102004,
+ .pci_irq = 0x102008,
+ .sysid = 0x100000,
+ .loopback = 0x103000,
+ .kcan_srb_fifo = 0x120000,
+ .kcan_srb = 0x121000,
+ .kcan_ch0 = 0x140000,
+ .kcan_ch1 = 0x142000,
+};
+
static const struct kvaser_pciefd_irq_mask kvaser_pciefd_altera_irq_mask = {
.kcan_rx0 = BIT(4),
.kcan_tx = { BIT(0), BIT(1), BIT(2), BIT(3) },
@@ -347,6 +368,12 @@ static const struct kvaser_pciefd_irq_mask kvaser_pciefd_sf2_irq_mask = {
.all = GENMASK(19, 16) | BIT(4),
};
+static const struct kvaser_pciefd_irq_mask kvaser_pciefd_xilinx_irq_mask = {
+ .kcan_rx0 = BIT(4),
+ .kcan_tx = { BIT(16), BIT(17), BIT(18), BIT(19) },
+ .all = GENMASK(19, 16) | BIT(4),
+};
+
static const struct kvaser_pciefd_dev_ops kvaser_pciefd_altera_dev_ops = {
.kvaser_pciefd_write_dma_map = kvaser_pciefd_write_dma_map_altera,
};
@@ -355,6 +382,10 @@ static const struct kvaser_pciefd_dev_ops kvaser_pciefd_sf2_dev_ops = {
.kvaser_pciefd_write_dma_map = kvaser_pciefd_write_dma_map_sf2,
};
+static const struct kvaser_pciefd_dev_ops kvaser_pciefd_xilinx_dev_ops = {
+ .kvaser_pciefd_write_dma_map = kvaser_pciefd_write_dma_map_xilinx,
+};
+
static const struct kvaser_pciefd_driver_data kvaser_pciefd_altera_driver_data = {
.address_offset = &kvaser_pciefd_altera_address_offset,
.irq_mask = &kvaser_pciefd_altera_irq_mask,
@@ -367,6 +398,12 @@ static const struct kvaser_pciefd_driver_data kvaser_pciefd_sf2_driver_data = {
.ops = &kvaser_pciefd_sf2_dev_ops,
};
+static const struct kvaser_pciefd_driver_data kvaser_pciefd_xilinx_driver_data = {
+ .address_offset = &kvaser_pciefd_xilinx_address_offset,
+ .irq_mask = &kvaser_pciefd_xilinx_irq_mask,
+ .ops = &kvaser_pciefd_xilinx_dev_ops,
+};
+
struct kvaser_pciefd_can {
struct can_priv can;
struct kvaser_pciefd *kv_pcie;
@@ -457,6 +494,14 @@ static struct pci_device_id kvaser_pciefd_id_table[] = {
.driver_data = (kernel_ulong_t)&kvaser_pciefd_sf2_driver_data,
},
{
+ PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_M2_4CAN_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_xilinx_driver_data,
+ },
+ {
+ PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_8CAN_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_xilinx_driver_data,
+ },
+ {
0,
},
};
@@ -1035,6 +1080,21 @@ static void kvaser_pciefd_write_dma_map_sf2(struct kvaser_pciefd *pcie,
iowrite32(msb, serdes_base + 0x4);
}
+static void kvaser_pciefd_write_dma_map_xilinx(struct kvaser_pciefd *pcie,
+ dma_addr_t addr, int index)
+{
+ void __iomem *serdes_base;
+ u32 lsb = addr & KVASER_PCIEFD_XILINX_DMA_LSB_MASK;
+ u32 msb = 0x0;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ msb = addr >> 32;
+#endif
+ serdes_base = KVASER_PCIEFD_SERDES_ADDR(pcie) + 0x8 * index;
+ iowrite32(msb, serdes_base);
+ iowrite32(lsb, serdes_base + 0x4);
+}
+
static int kvaser_pciefd_setup_dma(struct kvaser_pciefd *pcie)
{
int i;
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 16ecc11c7f62..14b231c4d7ec 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -255,6 +255,7 @@ enum m_can_reg {
#define TXESC_TBDS_64B 0x7
/* Tx Event FIFO Configuration (TXEFC) */
+#define TXEFC_EFWM_MASK GENMASK(29, 24)
#define TXEFC_EFS_MASK GENMASK(21, 16)
/* Tx Event FIFO Status (TXEFS) */
@@ -320,6 +321,12 @@ struct id_and_dlc {
u32 dlc;
};
+struct m_can_fifo_element {
+ u32 id;
+ u32 dlc;
+ u8 data[CANFD_MAX_DLEN];
+};
+
static inline u32 m_can_read(struct m_can_classdev *cdev, enum m_can_reg reg)
{
return cdev->ops->read_reg(cdev, reg);
@@ -372,16 +379,6 @@ m_can_txe_fifo_read(struct m_can_classdev *cdev, u32 fgi, u32 offset, u32 *val)
return cdev->ops->read_fifo(cdev, addr_offset, val, 1);
}
-static inline bool _m_can_tx_fifo_full(u32 txfqs)
-{
- return !!(txfqs & TXFQS_TFQF);
-}
-
-static inline bool m_can_tx_fifo_full(struct m_can_classdev *cdev)
-{
- return _m_can_tx_fifo_full(m_can_read(cdev, M_CAN_TXFQS));
-}
-
static void m_can_config_endisable(struct m_can_classdev *cdev, bool enable)
{
u32 cccr = m_can_read(cdev, M_CAN_CCCR);
@@ -416,15 +413,48 @@ static void m_can_config_endisable(struct m_can_classdev *cdev, bool enable)
}
}
+static void m_can_interrupt_enable(struct m_can_classdev *cdev, u32 interrupts)
+{
+ if (cdev->active_interrupts == interrupts)
+ return;
+ cdev->ops->write_reg(cdev, M_CAN_IE, interrupts);
+ cdev->active_interrupts = interrupts;
+}
+
+static void m_can_coalescing_disable(struct m_can_classdev *cdev)
+{
+ u32 new_interrupts = cdev->active_interrupts | IR_RF0N | IR_TEFN;
+
+ if (!cdev->net->irq)
+ return;
+
+ hrtimer_cancel(&cdev->hrtimer);
+ m_can_interrupt_enable(cdev, new_interrupts);
+}
+
static inline void m_can_enable_all_interrupts(struct m_can_classdev *cdev)
{
+ if (!cdev->net->irq) {
+ dev_dbg(cdev->dev, "Start hrtimer\n");
+ hrtimer_start(&cdev->hrtimer,
+ ms_to_ktime(HRTIMER_POLL_INTERVAL_MS),
+ HRTIMER_MODE_REL_PINNED);
+ }
+
/* Only interrupt line 0 is used in this driver */
m_can_write(cdev, M_CAN_ILE, ILE_EINT0);
}
static inline void m_can_disable_all_interrupts(struct m_can_classdev *cdev)
{
+ m_can_coalescing_disable(cdev);
m_can_write(cdev, M_CAN_ILE, 0x0);
+ cdev->active_interrupts = 0x0;
+
+ if (!cdev->net->irq) {
+ dev_dbg(cdev->dev, "Stop hrtimer\n");
+ hrtimer_cancel(&cdev->hrtimer);
+ }
}
/* Retrieve internal timestamp counter from TSCV.TSC, and shift it to 32-bit
@@ -444,18 +474,26 @@ static u32 m_can_get_timestamp(struct m_can_classdev *cdev)
static void m_can_clean(struct net_device *net)
{
struct m_can_classdev *cdev = netdev_priv(net);
+ unsigned long irqflags;
- if (cdev->tx_skb) {
- int putidx = 0;
+ if (cdev->tx_ops) {
+ for (int i = 0; i != cdev->tx_fifo_size; ++i) {
+ if (!cdev->tx_ops[i].skb)
+ continue;
- net->stats.tx_errors++;
- if (cdev->version > 30)
- putidx = FIELD_GET(TXFQS_TFQPI_MASK,
- m_can_read(cdev, M_CAN_TXFQS));
-
- can_free_echo_skb(cdev->net, putidx, NULL);
- cdev->tx_skb = NULL;
+ net->stats.tx_errors++;
+ cdev->tx_ops[i].skb = NULL;
+ }
}
+
+ for (int i = 0; i != cdev->can.echo_skb_max; ++i)
+ can_free_echo_skb(cdev->net, i, NULL);
+
+ netdev_reset_queue(cdev->net);
+
+ spin_lock_irqsave(&cdev->tx_handling_spinlock, irqflags);
+ cdev->tx_fifo_in_flight = 0;
+ spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags);
}
/* For peripherals, pass skb to rx-offload, which will push skb from
@@ -1007,23 +1045,60 @@ static int m_can_poll(struct napi_struct *napi, int quota)
* echo. timestamp is used for peripherals to ensure correct ordering
* by rx-offload, and is ignored for non-peripherals.
*/
-static void m_can_tx_update_stats(struct m_can_classdev *cdev,
- unsigned int msg_mark,
- u32 timestamp)
+static unsigned int m_can_tx_update_stats(struct m_can_classdev *cdev,
+ unsigned int msg_mark, u32 timestamp)
{
struct net_device *dev = cdev->net;
struct net_device_stats *stats = &dev->stats;
+ unsigned int frame_len;
if (cdev->is_peripheral)
stats->tx_bytes +=
can_rx_offload_get_echo_skb_queue_timestamp(&cdev->offload,
msg_mark,
timestamp,
- NULL);
+ &frame_len);
else
- stats->tx_bytes += can_get_echo_skb(dev, msg_mark, NULL);
+ stats->tx_bytes += can_get_echo_skb(dev, msg_mark, &frame_len);
stats->tx_packets++;
+
+ return frame_len;
+}
+
+static void m_can_finish_tx(struct m_can_classdev *cdev, int transmitted,
+ unsigned int transmitted_frame_len)
+{
+ unsigned long irqflags;
+
+ netdev_completed_queue(cdev->net, transmitted, transmitted_frame_len);
+
+ spin_lock_irqsave(&cdev->tx_handling_spinlock, irqflags);
+ if (cdev->tx_fifo_in_flight >= cdev->tx_fifo_size && transmitted > 0)
+ netif_wake_queue(cdev->net);
+ cdev->tx_fifo_in_flight -= transmitted;
+ spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags);
+}
+
+static netdev_tx_t m_can_start_tx(struct m_can_classdev *cdev)
+{
+ unsigned long irqflags;
+ int tx_fifo_in_flight;
+
+ spin_lock_irqsave(&cdev->tx_handling_spinlock, irqflags);
+ tx_fifo_in_flight = cdev->tx_fifo_in_flight + 1;
+ if (tx_fifo_in_flight >= cdev->tx_fifo_size) {
+ netif_stop_queue(cdev->net);
+ if (tx_fifo_in_flight > cdev->tx_fifo_size) {
+ netdev_err_once(cdev->net, "hard_xmit called while TX FIFO full\n");
+ spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags);
+ return NETDEV_TX_BUSY;
+ }
+ }
+ cdev->tx_fifo_in_flight = tx_fifo_in_flight;
+ spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags);
+
+ return NETDEV_TX_OK;
}
static int m_can_echo_tx_event(struct net_device *dev)
@@ -1035,6 +1110,8 @@ static int m_can_echo_tx_event(struct net_device *dev)
int i = 0;
int err = 0;
unsigned int msg_mark;
+ int processed = 0;
+ unsigned int processed_frame_len = 0;
struct m_can_classdev *cdev = netdev_priv(dev);
@@ -1063,25 +1140,62 @@ static int m_can_echo_tx_event(struct net_device *dev)
fgi = (++fgi >= cdev->mcfg[MRAM_TXE].num ? 0 : fgi);
/* update stats */
- m_can_tx_update_stats(cdev, msg_mark, timestamp);
+ processed_frame_len += m_can_tx_update_stats(cdev, msg_mark,
+ timestamp);
+
+ ++processed;
}
if (ack_fgi != -1)
m_can_write(cdev, M_CAN_TXEFA, FIELD_PREP(TXEFA_EFAI_MASK,
ack_fgi));
+ m_can_finish_tx(cdev, processed, processed_frame_len);
+
return err;
}
+static void m_can_coalescing_update(struct m_can_classdev *cdev, u32 ir)
+{
+ u32 new_interrupts = cdev->active_interrupts;
+ bool enable_rx_timer = false;
+ bool enable_tx_timer = false;
+
+ if (!cdev->net->irq)
+ return;
+
+ if (cdev->rx_coalesce_usecs_irq > 0 && (ir & (IR_RF0N | IR_RF0W))) {
+ enable_rx_timer = true;
+ new_interrupts &= ~IR_RF0N;
+ }
+ if (cdev->tx_coalesce_usecs_irq > 0 && (ir & (IR_TEFN | IR_TEFW))) {
+ enable_tx_timer = true;
+ new_interrupts &= ~IR_TEFN;
+ }
+ if (!enable_rx_timer && !hrtimer_active(&cdev->hrtimer))
+ new_interrupts |= IR_RF0N;
+ if (!enable_tx_timer && !hrtimer_active(&cdev->hrtimer))
+ new_interrupts |= IR_TEFN;
+
+ m_can_interrupt_enable(cdev, new_interrupts);
+ if (enable_rx_timer | enable_tx_timer)
+ hrtimer_start(&cdev->hrtimer, cdev->irq_timer_wait,
+ HRTIMER_MODE_REL);
+}
+
static irqreturn_t m_can_isr(int irq, void *dev_id)
{
struct net_device *dev = (struct net_device *)dev_id;
struct m_can_classdev *cdev = netdev_priv(dev);
u32 ir;
- if (pm_runtime_suspended(cdev->dev))
+ if (pm_runtime_suspended(cdev->dev)) {
+ m_can_coalescing_disable(cdev);
return IRQ_NONE;
+ }
+
ir = m_can_read(cdev, M_CAN_IR);
+ m_can_coalescing_update(cdev, ir);
if (!ir)
return IRQ_NONE;
@@ -1096,13 +1210,17 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
* - state change IRQ
* - bus error IRQ and bus error reporting
*/
- if ((ir & IR_RF0N) || (ir & IR_ERR_ALL_30X)) {
+ if (ir & (IR_RF0N | IR_RF0W | IR_ERR_ALL_30X)) {
cdev->irqstatus = ir;
if (!cdev->is_peripheral) {
m_can_disable_all_interrupts(cdev);
napi_schedule(&cdev->napi);
- } else if (m_can_rx_peripheral(dev, ir) < 0) {
- goto out_fail;
+ } else {
+ int pkts;
+
+ pkts = m_can_rx_peripheral(dev, ir);
+ if (pkts < 0)
+ goto out_fail;
}
}
@@ -1110,21 +1228,18 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
if (ir & IR_TC) {
/* Transmission Complete Interrupt*/
u32 timestamp = 0;
+ unsigned int frame_len;
if (cdev->is_peripheral)
timestamp = m_can_get_timestamp(cdev);
- m_can_tx_update_stats(cdev, 0, timestamp);
- netif_wake_queue(dev);
+ frame_len = m_can_tx_update_stats(cdev, 0, timestamp);
+ m_can_finish_tx(cdev, 1, frame_len);
}
} else {
- if (ir & IR_TEFN) {
+ if (ir & (IR_TEFN | IR_TEFW)) {
/* New TX FIFO Element arrived */
if (m_can_echo_tx_event(dev) != 0)
goto out_fail;
-
- if (netif_queue_stopped(dev) &&
- !m_can_tx_fifo_full(cdev))
- netif_wake_queue(dev);
}
}
@@ -1138,6 +1253,15 @@ out_fail:
return IRQ_HANDLED;
}
+static enum hrtimer_restart m_can_coalescing_timer(struct hrtimer *timer)
+{
+ struct m_can_classdev *cdev = container_of(timer, struct m_can_classdev, hrtimer);
+
+ irq_wake_thread(cdev->net->irq, cdev->net);
+
+ return HRTIMER_NORESTART;
+}
+
static const struct can_bittiming_const m_can_bittiming_const_30X = {
.name = KBUILD_MODNAME,
.tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */
@@ -1276,9 +1400,8 @@ static int m_can_chip_config(struct net_device *dev)
}
/* Disable unused interrupts */
- interrupts &= ~(IR_ARA | IR_ELO | IR_DRX | IR_TEFF | IR_TEFW | IR_TFE |
- IR_TCF | IR_HPM | IR_RF1F | IR_RF1W | IR_RF1N |
- IR_RF0F | IR_RF0W);
+ interrupts &= ~(IR_ARA | IR_ELO | IR_DRX | IR_TEFF | IR_TFE | IR_TCF |
+ IR_HPM | IR_RF1F | IR_RF1W | IR_RF1N | IR_RF0F);
m_can_config_endisable(cdev, true);
@@ -1315,6 +1438,8 @@ static int m_can_chip_config(struct net_device *dev)
} else {
/* Full TX Event FIFO is used */
m_can_write(cdev, M_CAN_TXEFC,
+ FIELD_PREP(TXEFC_EFWM_MASK,
+ cdev->tx_max_coalesced_frames_irq) |
FIELD_PREP(TXEFC_EFS_MASK,
cdev->mcfg[MRAM_TXE].num) |
cdev->mcfg[MRAM_TXE].off);
@@ -1322,6 +1447,7 @@ static int m_can_chip_config(struct net_device *dev)
/* rx fifo configuration, blocking mode, fifo size 1 */
m_can_write(cdev, M_CAN_RXF0C,
+ FIELD_PREP(RXFC_FWM_MASK, cdev->rx_max_coalesced_frames_irq) |
FIELD_PREP(RXFC_FS_MASK, cdev->mcfg[MRAM_RXF0].num) |
cdev->mcfg[MRAM_RXF0].off);
@@ -1380,7 +1506,7 @@ static int m_can_chip_config(struct net_device *dev)
else
interrupts &= ~(IR_ERR_LEC_31X);
}
- m_can_write(cdev, M_CAN_IE, interrupts);
+ m_can_interrupt_enable(cdev, interrupts);
/* route all interrupts to INT0 */
m_can_write(cdev, M_CAN_ILS, ILS_ALL_INT0);
@@ -1413,15 +1539,16 @@ static int m_can_start(struct net_device *dev)
if (ret)
return ret;
+ netdev_queue_set_dql_min_limit(netdev_get_tx_queue(cdev->net, 0),
+ cdev->tx_max_coalesced_frames);
+
cdev->can.state = CAN_STATE_ERROR_ACTIVE;
m_can_enable_all_interrupts(cdev);
- if (!dev->irq) {
- dev_dbg(cdev->dev, "Start hrtimer\n");
- hrtimer_start(&cdev->hrtimer, ms_to_ktime(HRTIMER_POLL_INTERVAL_MS),
- HRTIMER_MODE_REL_PINNED);
- }
+ if (cdev->version > 30)
+ cdev->tx_fifo_putidx = FIELD_GET(TXFQS_TFQPI_MASK,
+ m_can_read(cdev, M_CAN_TXFQS));
return 0;
}
@@ -1577,11 +1704,6 @@ static void m_can_stop(struct net_device *dev)
{
struct m_can_classdev *cdev = netdev_priv(dev);
- if (!dev->irq) {
- dev_dbg(cdev->dev, "Stop hrtimer\n");
- hrtimer_cancel(&cdev->hrtimer);
- }
-
/* disable all interrupts */
m_can_disable_all_interrupts(cdev);
@@ -1605,8 +1727,9 @@ static int m_can_close(struct net_device *dev)
m_can_clk_stop(cdev);
free_irq(dev->irq, dev);
+ m_can_clean(dev);
+
if (cdev->is_peripheral) {
- cdev->tx_skb = NULL;
destroy_workqueue(cdev->tx_wq);
cdev->tx_wq = NULL;
can_rx_offload_disable(&cdev->offload);
@@ -1619,57 +1742,42 @@ static int m_can_close(struct net_device *dev)
return 0;
}
-static int m_can_next_echo_skb_occupied(struct net_device *dev, int putidx)
-{
- struct m_can_classdev *cdev = netdev_priv(dev);
- /*get wrap around for loopback skb index */
- unsigned int wrap = cdev->can.echo_skb_max;
- int next_idx;
-
- /* calculate next index */
- next_idx = (++putidx >= wrap ? 0 : putidx);
-
- /* check if occupied */
- return !!cdev->can.echo_skb[next_idx];
-}
-
-static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
+static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev,
+ struct sk_buff *skb)
{
- struct canfd_frame *cf = (struct canfd_frame *)cdev->tx_skb->data;
+ struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+ u8 len_padded = DIV_ROUND_UP(cf->len, 4);
+ struct m_can_fifo_element fifo_element;
struct net_device *dev = cdev->net;
- struct sk_buff *skb = cdev->tx_skb;
- struct id_and_dlc fifo_header;
u32 cccr, fdflags;
- u32 txfqs;
int err;
- int putidx;
-
- cdev->tx_skb = NULL;
+ u32 putidx;
+ unsigned int frame_len = can_skb_get_frame_len(skb);
/* Generate ID field for TX buffer Element */
/* Common to all supported M_CAN versions */
if (cf->can_id & CAN_EFF_FLAG) {
- fifo_header.id = cf->can_id & CAN_EFF_MASK;
- fifo_header.id |= TX_BUF_XTD;
+ fifo_element.id = cf->can_id & CAN_EFF_MASK;
+ fifo_element.id |= TX_BUF_XTD;
} else {
- fifo_header.id = ((cf->can_id & CAN_SFF_MASK) << 18);
+ fifo_element.id = ((cf->can_id & CAN_SFF_MASK) << 18);
}
if (cf->can_id & CAN_RTR_FLAG)
- fifo_header.id |= TX_BUF_RTR;
+ fifo_element.id |= TX_BUF_RTR;
if (cdev->version == 30) {
netif_stop_queue(dev);
- fifo_header.dlc = can_fd_len2dlc(cf->len) << 16;
+ fifo_element.dlc = can_fd_len2dlc(cf->len) << 16;
/* Write the frame ID, DLC, and payload to the FIFO element. */
- err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, &fifo_header, 2);
+ err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, &fifo_element, 2);
if (err)
goto out_fail;
err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_DATA,
- cf->data, DIV_ROUND_UP(cf->len, 4));
+ cf->data, len_padded);
if (err)
goto out_fail;
@@ -1690,33 +1798,15 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
}
m_can_write(cdev, M_CAN_TXBTIE, 0x1);
- can_put_echo_skb(skb, dev, 0, 0);
+ can_put_echo_skb(skb, dev, 0, frame_len);
m_can_write(cdev, M_CAN_TXBAR, 0x1);
/* End of xmit function for version 3.0.x */
} else {
/* Transmit routine for version >= v3.1.x */
- txfqs = m_can_read(cdev, M_CAN_TXFQS);
-
- /* Check if FIFO full */
- if (_m_can_tx_fifo_full(txfqs)) {
- /* This shouldn't happen */
- netif_stop_queue(dev);
- netdev_warn(dev,
- "TX queue active although FIFO is full.");
-
- if (cdev->is_peripheral) {
- kfree_skb(skb);
- dev->stats.tx_dropped++;
- return NETDEV_TX_OK;
- } else {
- return NETDEV_TX_BUSY;
- }
- }
-
/* get put index for frame */
- putidx = FIELD_GET(TXFQS_TFQPI_MASK, txfqs);
+ putidx = cdev->tx_fifo_putidx;
/* Construct DLC Field, with CAN-FD configuration.
* Use the put index of the fifo as the message marker,
@@ -1731,30 +1821,32 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
fdflags |= TX_BUF_BRS;
}
- fifo_header.dlc = FIELD_PREP(TX_BUF_MM_MASK, putidx) |
+ fifo_element.dlc = FIELD_PREP(TX_BUF_MM_MASK, putidx) |
FIELD_PREP(TX_BUF_DLC_MASK, can_fd_len2dlc(cf->len)) |
fdflags | TX_BUF_EFC;
- err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, &fifo_header, 2);
- if (err)
- goto out_fail;
- err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DATA,
- cf->data, DIV_ROUND_UP(cf->len, 4));
+ memcpy_and_pad(fifo_element.data, CANFD_MAX_DLEN, &cf->data,
+ cf->len, 0);
+
+ err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID,
+ &fifo_element, 2 + len_padded);
if (err)
goto out_fail;
/* Push loopback echo.
* Will be looped back on TX interrupt based on message marker
*/
- can_put_echo_skb(skb, dev, putidx, 0);
+ can_put_echo_skb(skb, dev, putidx, frame_len);
- /* Enable TX FIFO element to start transfer */
- m_can_write(cdev, M_CAN_TXBAR, (1 << putidx));
-
- /* stop network queue if fifo full */
- if (m_can_tx_fifo_full(cdev) ||
- m_can_next_echo_skb_occupied(dev, putidx))
- netif_stop_queue(dev);
+ if (cdev->is_peripheral) {
+ /* Delay enabling TX FIFO element */
+ cdev->tx_peripheral_submit |= BIT(putidx);
+ } else {
+ /* Enable TX FIFO element to start transfer */
+ m_can_write(cdev, M_CAN_TXBAR, BIT(putidx));
+ }
+ cdev->tx_fifo_putidx = (++cdev->tx_fifo_putidx >= cdev->can.echo_skb_max ?
+ 0 : cdev->tx_fifo_putidx);
}
return NETDEV_TX_OK;
@@ -1765,46 +1857,91 @@ out_fail:
return NETDEV_TX_BUSY;
}
+static void m_can_tx_submit(struct m_can_classdev *cdev)
+{
+ if (cdev->version == 30)
+ return;
+ if (!cdev->is_peripheral)
+ return;
+
+ m_can_write(cdev, M_CAN_TXBAR, cdev->tx_peripheral_submit);
+ cdev->tx_peripheral_submit = 0;
+}
+
static void m_can_tx_work_queue(struct work_struct *ws)
{
- struct m_can_classdev *cdev = container_of(ws, struct m_can_classdev,
- tx_work);
+ struct m_can_tx_op *op = container_of(ws, struct m_can_tx_op, work);
+ struct m_can_classdev *cdev = op->cdev;
+ struct sk_buff *skb = op->skb;
+
+ op->skb = NULL;
+ m_can_tx_handler(cdev, skb);
+ if (op->submit)
+ m_can_tx_submit(cdev);
+}
+
+static void m_can_tx_queue_skb(struct m_can_classdev *cdev, struct sk_buff *skb,
+ bool submit)
+{
+ cdev->tx_ops[cdev->next_tx_op].skb = skb;
+ cdev->tx_ops[cdev->next_tx_op].submit = submit;
+ queue_work(cdev->tx_wq, &cdev->tx_ops[cdev->next_tx_op].work);
+
+ ++cdev->next_tx_op;
+ if (cdev->next_tx_op >= cdev->tx_fifo_size)
+ cdev->next_tx_op = 0;
+}
+
+static netdev_tx_t m_can_start_peripheral_xmit(struct m_can_classdev *cdev,
+ struct sk_buff *skb)
+{
+ bool submit;
+
+ ++cdev->nr_txs_without_submit;
+ if (cdev->nr_txs_without_submit >= cdev->tx_max_coalesced_frames ||
+ !netdev_xmit_more()) {
+ cdev->nr_txs_without_submit = 0;
+ submit = true;
+ } else {
+ submit = false;
+ }
+ m_can_tx_queue_skb(cdev, skb, submit);
- m_can_tx_handler(cdev);
+ return NETDEV_TX_OK;
}
static netdev_tx_t m_can_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct m_can_classdev *cdev = netdev_priv(dev);
+ unsigned int frame_len;
+ netdev_tx_t ret;
if (can_dev_dropped_skb(dev, skb))
return NETDEV_TX_OK;
- if (cdev->is_peripheral) {
- if (cdev->tx_skb) {
- netdev_err(dev, "hard_xmit called while tx busy\n");
- return NETDEV_TX_BUSY;
- }
+ frame_len = can_skb_get_frame_len(skb);
- if (cdev->can.state == CAN_STATE_BUS_OFF) {
- m_can_clean(dev);
- } else {
- /* Need to stop the queue to avoid numerous requests
- * from being sent. Suggested improvement is to create
- * a queueing mechanism that will queue the skbs and
- * process them in order.
- */
- cdev->tx_skb = skb;
- netif_stop_queue(cdev->net);
- queue_work(cdev->tx_wq, &cdev->tx_work);
- }
- } else {
- cdev->tx_skb = skb;
- return m_can_tx_handler(cdev);
+ if (cdev->can.state == CAN_STATE_BUS_OFF) {
+ m_can_clean(cdev->net);
+ return NETDEV_TX_OK;
}
- return NETDEV_TX_OK;
+ ret = m_can_start_tx(cdev);
+ if (ret != NETDEV_TX_OK)
+ return ret;
+
+ netdev_sent_queue(dev, frame_len);
+
+ if (cdev->is_peripheral)
+ ret = m_can_start_peripheral_xmit(cdev, skb);
+ else
+ ret = m_can_tx_handler(cdev, skb);
+
+ if (ret != NETDEV_TX_OK)
+ netdev_completed_queue(dev, 1, frame_len);
+
+ return ret;
}
static enum hrtimer_restart hrtimer_callback(struct hrtimer *timer)
@@ -1844,15 +1981,17 @@ static int m_can_open(struct net_device *dev)
/* register interrupt handler */
if (cdev->is_peripheral) {
- cdev->tx_skb = NULL;
- cdev->tx_wq = alloc_workqueue("mcan_wq",
- WQ_FREEZABLE | WQ_MEM_RECLAIM, 0);
+ cdev->tx_wq = alloc_ordered_workqueue("mcan_wq",
+ WQ_FREEZABLE | WQ_MEM_RECLAIM);
if (!cdev->tx_wq) {
err = -ENOMEM;
goto out_wq_fail;
}
- INIT_WORK(&cdev->tx_work, m_can_tx_work_queue);
+ for (int i = 0; i != cdev->tx_fifo_size; ++i) {
+ cdev->tx_ops[i].cdev = cdev;
+ INIT_WORK(&cdev->tx_ops[i].work, m_can_tx_work_queue);
+ }
err = request_threaded_irq(dev->irq, NULL, m_can_isr,
IRQF_ONESHOT,
@@ -1900,7 +2039,108 @@ static const struct net_device_ops m_can_netdev_ops = {
.ndo_change_mtu = can_change_mtu,
};
+static int m_can_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kec,
+ struct netlink_ext_ack *ext_ack)
+{
+ struct m_can_classdev *cdev = netdev_priv(dev);
+
+ ec->rx_max_coalesced_frames_irq = cdev->rx_max_coalesced_frames_irq;
+ ec->rx_coalesce_usecs_irq = cdev->rx_coalesce_usecs_irq;
+ ec->tx_max_coalesced_frames = cdev->tx_max_coalesced_frames;
+ ec->tx_max_coalesced_frames_irq = cdev->tx_max_coalesced_frames_irq;
+ ec->tx_coalesce_usecs_irq = cdev->tx_coalesce_usecs_irq;
+
+ return 0;
+}
+
+static int m_can_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kec,
+ struct netlink_ext_ack *ext_ack)
+{
+ struct m_can_classdev *cdev = netdev_priv(dev);
+
+ if (cdev->can.state != CAN_STATE_STOPPED) {
+ netdev_err(dev, "Device is in use, please shut it down first\n");
+ return -EBUSY;
+ }
+
+ if (ec->rx_max_coalesced_frames_irq > cdev->mcfg[MRAM_RXF0].num) {
+ netdev_err(dev, "rx-frames-irq %u greater than the RX FIFO %u\n",
+ ec->rx_max_coalesced_frames_irq,
+ cdev->mcfg[MRAM_RXF0].num);
+ return -EINVAL;
+ }
+ if ((ec->rx_max_coalesced_frames_irq == 0) != (ec->rx_coalesce_usecs_irq == 0)) {
+ netdev_err(dev, "rx-frames-irq and rx-usecs-irq can only be set together\n");
+ return -EINVAL;
+ }
+ if (ec->tx_max_coalesced_frames_irq > cdev->mcfg[MRAM_TXE].num) {
+ netdev_err(dev, "tx-frames-irq %u greater than the TX event FIFO %u\n",
+ ec->tx_max_coalesced_frames_irq,
+ cdev->mcfg[MRAM_TXE].num);
+ return -EINVAL;
+ }
+ if (ec->tx_max_coalesced_frames_irq > cdev->mcfg[MRAM_TXB].num) {
+ netdev_err(dev, "tx-frames-irq %u greater than the TX FIFO %u\n",
+ ec->tx_max_coalesced_frames_irq,
+ cdev->mcfg[MRAM_TXB].num);
+ return -EINVAL;
+ }
+ if ((ec->tx_max_coalesced_frames_irq == 0) != (ec->tx_coalesce_usecs_irq == 0)) {
+ netdev_err(dev, "tx-frames-irq and tx-usecs-irq can only be set together\n");
+ return -EINVAL;
+ }
+ if (ec->tx_max_coalesced_frames > cdev->mcfg[MRAM_TXE].num) {
+ netdev_err(dev, "tx-frames %u greater than the TX event FIFO %u\n",
+ ec->tx_max_coalesced_frames,
+ cdev->mcfg[MRAM_TXE].num);
+ return -EINVAL;
+ }
+ if (ec->tx_max_coalesced_frames > cdev->mcfg[MRAM_TXB].num) {
+ netdev_err(dev, "tx-frames %u greater than the TX FIFO %u\n",
+ ec->tx_max_coalesced_frames,
+ cdev->mcfg[MRAM_TXB].num);
+ return -EINVAL;
+ }
+ if (ec->rx_coalesce_usecs_irq != 0 && ec->tx_coalesce_usecs_irq != 0 &&
+ ec->rx_coalesce_usecs_irq != ec->tx_coalesce_usecs_irq) {
+ netdev_err(dev, "rx-usecs-irq %u needs to be equal to tx-usecs-irq %u if both are enabled\n",
+ ec->rx_coalesce_usecs_irq,
+ ec->tx_coalesce_usecs_irq);
+ return -EINVAL;
+ }
+
+ cdev->rx_max_coalesced_frames_irq = ec->rx_max_coalesced_frames_irq;
+ cdev->rx_coalesce_usecs_irq = ec->rx_coalesce_usecs_irq;
+ cdev->tx_max_coalesced_frames = ec->tx_max_coalesced_frames;
+ cdev->tx_max_coalesced_frames_irq = ec->tx_max_coalesced_frames_irq;
+ cdev->tx_coalesce_usecs_irq = ec->tx_coalesce_usecs_irq;
+
+ if (cdev->rx_coalesce_usecs_irq)
+ cdev->irq_timer_wait =
+ ns_to_ktime(cdev->rx_coalesce_usecs_irq * NSEC_PER_USEC);
+ else
+ cdev->irq_timer_wait =
+ ns_to_ktime(cdev->tx_coalesce_usecs_irq * NSEC_PER_USEC);
+
+ return 0;
+}
+
static const struct ethtool_ops m_can_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS_IRQ |
+ ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ |
+ ETHTOOL_COALESCE_TX_USECS_IRQ |
+ ETHTOOL_COALESCE_TX_MAX_FRAMES |
+ ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ,
+ .get_ts_info = ethtool_op_get_ts_info,
+ .get_coalesce = m_can_get_coalesce,
+ .set_coalesce = m_can_set_coalesce,
+};
+
+static const struct ethtool_ops m_can_ethtool_ops_polling = {
.get_ts_info = ethtool_op_get_ts_info,
};
@@ -1908,7 +2148,10 @@ static int register_m_can_dev(struct net_device *dev)
{
dev->flags |= IFF_ECHO; /* we support local echo */
dev->netdev_ops = &m_can_netdev_ops;
- dev->ethtool_ops = &m_can_ethtool_ops;
+ if (dev->irq)
+ dev->ethtool_ops = &m_can_ethtool_ops;
+ else
+ dev->ethtool_ops = &m_can_ethtool_ops_polling;
return register_candev(dev);
}
@@ -2056,12 +2299,23 @@ int m_can_class_register(struct m_can_classdev *cdev)
{
int ret;
- if (cdev->pm_clock_support) {
- ret = m_can_clk_start(cdev);
- if (ret)
- return ret;
+ cdev->tx_fifo_size = max(1, min(cdev->mcfg[MRAM_TXB].num,
+ cdev->mcfg[MRAM_TXE].num));
+ if (cdev->is_peripheral) {
+ cdev->tx_ops =
+ devm_kzalloc(cdev->dev,
+ cdev->tx_fifo_size * sizeof(*cdev->tx_ops),
+ GFP_KERNEL);
+ if (!cdev->tx_ops) {
+ dev_err(cdev->dev, "Failed to allocate tx_ops for workqueue\n");
+ return -ENOMEM;
+ }
}
+ ret = m_can_clk_start(cdev);
+ if (ret)
+ return ret;
+
if (cdev->is_peripheral) {
ret = can_rx_offload_add_manual(cdev->net, &cdev->offload,
NAPI_POLL_WEIGHT);
@@ -2069,8 +2323,15 @@ int m_can_class_register(struct m_can_classdev *cdev)
goto clk_disable;
}
- if (!cdev->net->irq)
+ if (!cdev->net->irq) {
+ dev_dbg(cdev->dev, "Polling enabled, initialize hrtimer");
+ hrtimer_init(&cdev->hrtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_PINNED);
cdev->hrtimer.function = &hrtimer_callback;
+ } else {
+ hrtimer_init(&cdev->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ cdev->hrtimer.function = m_can_coalescing_timer;
+ }
ret = m_can_dev_setup(cdev);
if (ret)
@@ -2121,7 +2382,15 @@ int m_can_class_suspend(struct device *dev)
if (netif_running(ndev)) {
netif_stop_queue(ndev);
netif_device_detach(ndev);
- m_can_stop(ndev);
+
+ /* leave the chip running with rx interrupt enabled if it is
+ * used as a wake-up source.
+ */
+ if (cdev->pm_wake_source)
+ m_can_write(cdev, M_CAN_IE, IR_RF0N);
+ else
+ m_can_stop(ndev);
+
m_can_clk_stop(cdev);
}
@@ -2148,11 +2417,15 @@ int m_can_class_resume(struct device *dev)
ret = m_can_clk_start(cdev);
if (ret)
return ret;
- ret = m_can_start(ndev);
- if (ret) {
- m_can_clk_stop(cdev);
- return ret;
+ if (cdev->pm_wake_source) {
+ m_can_write(cdev, M_CAN_IE, cdev->active_interrupts);
+ } else {
+ ret = m_can_start(ndev);
+ if (ret) {
+ m_can_clk_stop(cdev);
+ return ret;
+ }
}
netif_device_attach(ndev);
diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h
index 520e14277dff..3a9edc292593 100644
--- a/drivers/net/can/m_can/m_can.h
+++ b/drivers/net/can/m_can/m_can.h
@@ -70,6 +70,13 @@ struct m_can_ops {
int (*init)(struct m_can_classdev *cdev);
};
+struct m_can_tx_op {
+ struct m_can_classdev *cdev;
+ struct work_struct work;
+ struct sk_buff *skb;
+ bool submit;
+};
+
struct m_can_classdev {
struct can_priv can;
struct can_rx_offload offload;
@@ -80,18 +87,42 @@ struct m_can_classdev {
struct clk *cclk;
struct workqueue_struct *tx_wq;
- struct work_struct tx_work;
- struct sk_buff *tx_skb;
struct phy *transceiver;
+ ktime_t irq_timer_wait;
+
struct m_can_ops *ops;
int version;
u32 irqstatus;
int pm_clock_support;
+ int pm_wake_source;
int is_peripheral;
+ // Cached M_CAN_IE register content
+ u32 active_interrupts;
+ u32 rx_max_coalesced_frames_irq;
+ u32 rx_coalesce_usecs_irq;
+ u32 tx_max_coalesced_frames;
+ u32 tx_max_coalesced_frames_irq;
+ u32 tx_coalesce_usecs_irq;
+
+ // Store this internally to avoid fetch delays on peripheral chips
+ u32 tx_fifo_putidx;
+
+ /* Protects shared state between start_xmit and m_can_isr */
+ spinlock_t tx_handling_spinlock;
+ int tx_fifo_in_flight;
+
+ struct m_can_tx_op *tx_ops;
+ int tx_fifo_size;
+ int next_tx_op;
+
+ int nr_txs_without_submit;
+ /* bitfield of fifo elements that will be submitted together */
+ u32 tx_peripheral_submit;
+
struct mram_cfg mcfg[MRAM_CFG_NUM];
struct hrtimer hrtimer;
diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c
index f2219aa2824b..45400de4163d 100644
--- a/drivers/net/can/m_can/m_can_pci.c
+++ b/drivers/net/can/m_can/m_can_pci.c
@@ -125,6 +125,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
mcan_class->dev = &pci->dev;
mcan_class->net->irq = pci_irq_vector(pci, 0);
mcan_class->pm_clock_support = 1;
+ mcan_class->pm_wake_source = 0;
mcan_class->can.clock.freq = id->driver_data;
mcan_class->ops = &m_can_pci_ops;
diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c
index cdb28d6a092c..df0367124b4c 100644
--- a/drivers/net/can/m_can/m_can_platform.c
+++ b/drivers/net/can/m_can/m_can_platform.c
@@ -109,10 +109,6 @@ static int m_can_plat_probe(struct platform_device *pdev)
ret = irq;
goto probe_fail;
}
- } else {
- dev_dbg(mcan_class->dev, "Polling enabled, initialize hrtimer");
- hrtimer_init(&mcan_class->hrtimer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL_PINNED);
}
/* message ram could be shared */
@@ -143,6 +139,7 @@ static int m_can_plat_probe(struct platform_device *pdev)
mcan_class->net->irq = irq;
mcan_class->pm_clock_support = 1;
+ mcan_class->pm_wake_source = 0;
mcan_class->can.clock.freq = clk_get_rate(mcan_class->cclk);
mcan_class->dev = &pdev->dev;
mcan_class->transceiver = transceiver;
diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c
index ae8c42f5debd..a42600dac70d 100644
--- a/drivers/net/can/m_can/tcan4x5x-core.c
+++ b/drivers/net/can/m_can/tcan4x5x-core.c
@@ -411,6 +411,7 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
priv->spi = spi;
mcan_class->pm_clock_support = 0;
+ mcan_class->pm_wake_source = device_property_read_bool(&spi->dev, "wakeup-source");
mcan_class->can.clock.freq = freq;
mcan_class->dev = &spi->dev;
mcan_class->ops = &tcan4x5x_ops;
@@ -459,6 +460,9 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
goto out_power;
}
+ if (mcan_class->pm_wake_source)
+ device_init_wakeup(&spi->dev, true);
+
ret = m_can_class_register(mcan_class);
if (ret) {
dev_err(&spi->dev, "Failed registering m_can device %pe\n",
@@ -487,6 +491,29 @@ static void tcan4x5x_can_remove(struct spi_device *spi)
m_can_class_free_dev(priv->cdev.net);
}
+static int __maybe_unused tcan4x5x_suspend(struct device *dev)
+{
+ struct m_can_classdev *cdev = dev_get_drvdata(dev);
+ struct spi_device *spi = to_spi_device(dev);
+
+ if (cdev->pm_wake_source)
+ enable_irq_wake(spi->irq);
+
+ return m_can_class_suspend(dev);
+}
+
+static int __maybe_unused tcan4x5x_resume(struct device *dev)
+{
+ struct m_can_classdev *cdev = dev_get_drvdata(dev);
+ struct spi_device *spi = to_spi_device(dev);
+ int ret = m_can_class_resume(dev);
+
+ if (cdev->pm_wake_source)
+ disable_irq_wake(spi->irq);
+
+ return ret;
+}
+
static const struct of_device_id tcan4x5x_of_match[] = {
{
.compatible = "ti,tcan4x5x",
@@ -505,11 +532,15 @@ static const struct spi_device_id tcan4x5x_id_table[] = {
};
MODULE_DEVICE_TABLE(spi, tcan4x5x_id_table);
+static const struct dev_pm_ops tcan4x5x_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(tcan4x5x_suspend, tcan4x5x_resume)
+};
+
static struct spi_driver tcan4x5x_can_driver = {
.driver = {
.name = KBUILD_MODNAME,
.of_match_table = tcan4x5x_of_match,
- .pm = NULL,
+ .pm = &tcan4x5x_pm_ops,
},
.id_table = tcan4x5x_id_table,
.probe = tcan4x5x_can_probe,
diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c
index 32286f861a19..721df91cdbfb 100644
--- a/drivers/net/can/softing/softing_fw.c
+++ b/drivers/net/can/softing/softing_fw.c
@@ -436,7 +436,7 @@ int softing_startstop(struct net_device *dev, int up)
return ret;
bus_bitmask_start = 0;
- if (dev && up)
+ if (up)
/* prepare to start this bus as well */
bus_bitmask_start |= (1 << priv->index);
/* bring netdevs down */
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index eebf967f4711..1d9057dc44f2 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -837,7 +837,7 @@ static int __mcp251xfd_get_berr_counter(const struct net_device *ndev,
return err;
if (trec & MCP251XFD_REG_TREC_TXBO)
- bec->txerr = 256;
+ bec->txerr = CAN_BUS_OFF_THRESHOLD;
else
bec->txerr = FIELD_GET(MCP251XFD_REG_TREC_TEC_MASK, trec);
bec->rxerr = FIELD_GET(MCP251XFD_REG_TREC_REC_MASK, trec);
diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index d1450722cb3c..bd58c636d465 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -100,6 +100,7 @@ config CAN_KVASER_USB
- Scania VCI2 (if you have the Kvaser logo on top)
- Kvaser BlackBird v2
- Kvaser Leaf Pro HS v2
+ - Kvaser Leaf v3
- Kvaser Hybrid CAN/LIN
- Kvaser Hybrid 2xCAN/LIN
- Kvaser Hybrid Pro CAN/LIN
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 95b0fdb602c8..65c962f76898 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -385,7 +385,7 @@ static struct gs_tx_context *gs_get_tx_context(struct gs_can *dev,
static int gs_cmd_reset(struct gs_can *dev)
{
struct gs_device_mode dm = {
- .mode = GS_CAN_MODE_RESET,
+ .mode = cpu_to_le32(GS_CAN_MODE_RESET),
};
return usb_control_msg_send(dev->udev, 0, GS_USB_BREQ_MODE,
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
index 71ef4db5c09f..8faf8a462c05 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
@@ -88,6 +88,7 @@
#define USB_USBCAN_PRO_4HS_PRODUCT_ID 0x0114
#define USB_HYBRID_CANLIN_PRODUCT_ID 0x0115
#define USB_HYBRID_PRO_CANLIN_PRODUCT_ID 0x0116
+#define USB_LEAF_V3_PRODUCT_ID 0x0117
static const struct kvaser_usb_driver_info kvaser_usb_driver_info_hydra = {
.quirks = KVASER_USB_QUIRK_HAS_HARDWARE_TIMESTAMP,
@@ -235,6 +236,8 @@ static const struct usb_device_id kvaser_usb_table[] = {
.driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID),
.driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_V3_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
{ }
};
MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 98c669ad5141..f7fabba707ea 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -119,7 +119,7 @@ static int vxcan_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(priv->peer);
- iflink = peer ? peer->ifindex : 0;
+ iflink = peer ? READ_ONCE(peer->ifindex) : 0;
rcu_read_unlock();
return iflink;
diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 3722eaa84234..fae0120473f8 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -31,6 +31,7 @@
#include <linux/phy/phy.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
+#include <linux/u64_stats_sync.h>
#define DRIVER_NAME "xilinx_can"
@@ -58,6 +59,13 @@ enum xcan_reg {
*/
XCAN_F_BTR_OFFSET = 0x08C, /* Data Phase Bit Timing */
XCAN_TRR_OFFSET = 0x0090, /* TX Buffer Ready Request */
+
+ /* only on AXI CAN cores */
+ XCAN_ECC_CFG_OFFSET = 0xC8, /* ECC Configuration */
+ XCAN_TXTLFIFO_ECC_OFFSET = 0xCC, /* TXTL FIFO ECC error counter */
+ XCAN_TXOLFIFO_ECC_OFFSET = 0xD0, /* TXOL FIFO ECC error counter */
+ XCAN_RXFIFO_ECC_OFFSET = 0xD4, /* RX FIFO ECC error counter */
+
XCAN_AFR_EXT_OFFSET = 0x00E0, /* Acceptance Filter */
XCAN_FSR_OFFSET = 0x00E8, /* RX FIFO Status */
XCAN_TXMSG_BASE_OFFSET = 0x0100, /* TX Message Space */
@@ -124,6 +132,18 @@ enum xcan_reg {
#define XCAN_IXR_TXFLL_MASK 0x00000004 /* Tx FIFO Full intr */
#define XCAN_IXR_TXOK_MASK 0x00000002 /* TX successful intr */
#define XCAN_IXR_ARBLST_MASK 0x00000001 /* Arbitration lost intr */
+#define XCAN_IXR_E2BERX_MASK BIT(23) /* RX FIFO two bit ECC error */
+#define XCAN_IXR_E1BERX_MASK BIT(22) /* RX FIFO one bit ECC error */
+#define XCAN_IXR_E2BETXOL_MASK BIT(21) /* TXOL FIFO two bit ECC error */
+#define XCAN_IXR_E1BETXOL_MASK BIT(20) /* TXOL FIFO One bit ECC error */
+#define XCAN_IXR_E2BETXTL_MASK BIT(19) /* TXTL FIFO Two bit ECC error */
+#define XCAN_IXR_E1BETXTL_MASK BIT(18) /* TXTL FIFO One bit ECC error */
+#define XCAN_IXR_ECC_MASK (XCAN_IXR_E2BERX_MASK | \
+ XCAN_IXR_E1BERX_MASK | \
+ XCAN_IXR_E2BETXOL_MASK | \
+ XCAN_IXR_E1BETXOL_MASK | \
+ XCAN_IXR_E2BETXTL_MASK | \
+ XCAN_IXR_E1BETXTL_MASK)
#define XCAN_IDR_ID1_MASK 0xFFE00000 /* Standard msg identifier */
#define XCAN_IDR_SRR_MASK 0x00100000 /* Substitute remote TXreq */
#define XCAN_IDR_IDE_MASK 0x00080000 /* Identifier extension */
@@ -137,6 +157,11 @@ enum xcan_reg {
#define XCAN_2_FSR_RI_MASK 0x0000003F /* RX Read Index */
#define XCAN_DLCR_EDL_MASK 0x08000000 /* EDL Mask in DLC */
#define XCAN_DLCR_BRS_MASK 0x04000000 /* BRS Mask in DLC */
+#define XCAN_ECC_CFG_REECRX_MASK BIT(2) /* Reset RX FIFO ECC error counters */
+#define XCAN_ECC_CFG_REECTXOL_MASK BIT(1) /* Reset TXOL FIFO ECC error counters */
+#define XCAN_ECC_CFG_REECTXTL_MASK BIT(0) /* Reset TXTL FIFO ECC error counters */
+#define XCAN_ECC_1BIT_CNT_MASK GENMASK(15, 0) /* FIFO ECC 1bit count mask */
+#define XCAN_ECC_2BIT_CNT_MASK GENMASK(31, 16) /* FIFO ECC 2bit count mask */
/* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */
#define XCAN_BRPR_TDC_ENABLE BIT(16) /* Transmitter Delay Compensation (TDC) Enable */
@@ -202,6 +227,14 @@ struct xcan_devtype_data {
* @devtype: Device type specific constants
* @transceiver: Optional pointer to associated CAN transceiver
* @rstc: Pointer to reset control
+ * @ecc_enable: ECC enable flag
+ * @syncp: synchronization for ECC error stats
+ * @ecc_rx_2_bit_errors: RXFIFO 2bit ECC count
+ * @ecc_rx_1_bit_errors: RXFIFO 1bit ECC count
+ * @ecc_txol_2_bit_errors: TXOLFIFO 2bit ECC count
+ * @ecc_txol_1_bit_errors: TXOLFIFO 1bit ECC count
+ * @ecc_txtl_2_bit_errors: TXTLFIFO 2bit ECC count
+ * @ecc_txtl_1_bit_errors: TXTLFIFO 1bit ECC count
*/
struct xcan_priv {
struct can_priv can;
@@ -221,6 +254,14 @@ struct xcan_priv {
struct xcan_devtype_data devtype;
struct phy *transceiver;
struct reset_control *rstc;
+ bool ecc_enable;
+ struct u64_stats_sync syncp;
+ u64_stats_t ecc_rx_2_bit_errors;
+ u64_stats_t ecc_rx_1_bit_errors;
+ u64_stats_t ecc_txol_2_bit_errors;
+ u64_stats_t ecc_txol_1_bit_errors;
+ u64_stats_t ecc_txtl_2_bit_errors;
+ u64_stats_t ecc_txtl_1_bit_errors;
};
/* CAN Bittiming constants as per Xilinx CAN specs */
@@ -308,6 +349,24 @@ static const struct can_tdc_const xcan_tdc_const_canfd2 = {
.tdcf_max = 0,
};
+enum xcan_stats_type {
+ XCAN_ECC_RX_2_BIT_ERRORS,
+ XCAN_ECC_RX_1_BIT_ERRORS,
+ XCAN_ECC_TXOL_2_BIT_ERRORS,
+ XCAN_ECC_TXOL_1_BIT_ERRORS,
+ XCAN_ECC_TXTL_2_BIT_ERRORS,
+ XCAN_ECC_TXTL_1_BIT_ERRORS,
+};
+
+static const char xcan_priv_flags_strings[][ETH_GSTRING_LEN] = {
+ [XCAN_ECC_RX_2_BIT_ERRORS] = "ecc_rx_2_bit_errors",
+ [XCAN_ECC_RX_1_BIT_ERRORS] = "ecc_rx_1_bit_errors",
+ [XCAN_ECC_TXOL_2_BIT_ERRORS] = "ecc_txol_2_bit_errors",
+ [XCAN_ECC_TXOL_1_BIT_ERRORS] = "ecc_txol_1_bit_errors",
+ [XCAN_ECC_TXTL_2_BIT_ERRORS] = "ecc_txtl_2_bit_errors",
+ [XCAN_ECC_TXTL_1_BIT_ERRORS] = "ecc_txtl_1_bit_errors",
+};
+
/**
* xcan_write_reg_le - Write a value to the device register little endian
* @priv: Driver private data structure
@@ -523,6 +582,9 @@ static int xcan_chip_start(struct net_device *ndev)
XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
XCAN_IXR_ARBLST_MASK | xcan_rx_int_mask(priv);
+ if (priv->ecc_enable)
+ ier |= XCAN_IXR_ECC_MASK;
+
if (priv->devtype.flags & XCAN_FLAG_RXMNF)
ier |= XCAN_IXR_RXMNF_MASK;
@@ -1127,6 +1189,54 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
priv->can.can_stats.bus_error++;
}
+ if (priv->ecc_enable && isr & XCAN_IXR_ECC_MASK) {
+ u32 reg_rx_ecc, reg_txol_ecc, reg_txtl_ecc;
+
+ reg_rx_ecc = priv->read_reg(priv, XCAN_RXFIFO_ECC_OFFSET);
+ reg_txol_ecc = priv->read_reg(priv, XCAN_TXOLFIFO_ECC_OFFSET);
+ reg_txtl_ecc = priv->read_reg(priv, XCAN_TXTLFIFO_ECC_OFFSET);
+
+ /* The counter reaches its maximum at 0xffff and does not overflow.
+ * Accept the small race window between reading and resetting ECC counters.
+ */
+ priv->write_reg(priv, XCAN_ECC_CFG_OFFSET, XCAN_ECC_CFG_REECRX_MASK |
+ XCAN_ECC_CFG_REECTXOL_MASK | XCAN_ECC_CFG_REECTXTL_MASK);
+
+ u64_stats_update_begin(&priv->syncp);
+
+ if (isr & XCAN_IXR_E2BERX_MASK) {
+ u64_stats_add(&priv->ecc_rx_2_bit_errors,
+ FIELD_GET(XCAN_ECC_2BIT_CNT_MASK, reg_rx_ecc));
+ }
+
+ if (isr & XCAN_IXR_E1BERX_MASK) {
+ u64_stats_add(&priv->ecc_rx_1_bit_errors,
+ FIELD_GET(XCAN_ECC_1BIT_CNT_MASK, reg_rx_ecc));
+ }
+
+ if (isr & XCAN_IXR_E2BETXOL_MASK) {
+ u64_stats_add(&priv->ecc_txol_2_bit_errors,
+ FIELD_GET(XCAN_ECC_2BIT_CNT_MASK, reg_txol_ecc));
+ }
+
+ if (isr & XCAN_IXR_E1BETXOL_MASK) {
+ u64_stats_add(&priv->ecc_txol_1_bit_errors,
+ FIELD_GET(XCAN_ECC_1BIT_CNT_MASK, reg_txol_ecc));
+ }
+
+ if (isr & XCAN_IXR_E2BETXTL_MASK) {
+ u64_stats_add(&priv->ecc_txtl_2_bit_errors,
+ FIELD_GET(XCAN_ECC_2BIT_CNT_MASK, reg_txtl_ecc));
+ }
+
+ if (isr & XCAN_IXR_E1BETXTL_MASK) {
+ u64_stats_add(&priv->ecc_txtl_1_bit_errors,
+ FIELD_GET(XCAN_ECC_1BIT_CNT_MASK, reg_txtl_ecc));
+ }
+
+ u64_stats_update_end(&priv->syncp);
+ }
+
if (cf.can_id) {
struct can_frame *skb_cf;
struct sk_buff *skb = alloc_can_err_skb(ndev, &skb_cf);
@@ -1354,8 +1464,8 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
{
struct net_device *ndev = (struct net_device *)dev_id;
struct xcan_priv *priv = netdev_priv(ndev);
+ u32 isr_errors, mask;
u32 isr, ier;
- u32 isr_errors;
u32 rx_int_mask = xcan_rx_int_mask(priv);
/* Get the interrupt status from Xilinx CAN */
@@ -1374,10 +1484,15 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
if (isr & XCAN_IXR_TXOK_MASK)
xcan_tx_interrupt(ndev, isr);
+ mask = XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
+ XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK |
+ XCAN_IXR_RXMNF_MASK;
+
+ if (priv->ecc_enable)
+ mask |= XCAN_IXR_ECC_MASK;
+
/* Check for the type of error interrupt and Processing it */
- isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
- XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK |
- XCAN_IXR_RXMNF_MASK);
+ isr_errors = isr & mask;
if (isr_errors) {
priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors);
xcan_err_interrupt(ndev, isr);
@@ -1546,6 +1661,43 @@ static int xcan_get_auto_tdcv(const struct net_device *ndev, u32 *tdcv)
return 0;
}
+static void xcan_get_strings(struct net_device *ndev, u32 stringset, u8 *buf)
+{
+ switch (stringset) {
+ case ETH_SS_STATS:
+ memcpy(buf, &xcan_priv_flags_strings,
+ sizeof(xcan_priv_flags_strings));
+ }
+}
+
+static int xcan_get_sset_count(struct net_device *netdev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return ARRAY_SIZE(xcan_priv_flags_strings);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void xcan_get_ethtool_stats(struct net_device *ndev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct xcan_priv *priv = netdev_priv(ndev);
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&priv->syncp);
+
+ data[XCAN_ECC_RX_2_BIT_ERRORS] = u64_stats_read(&priv->ecc_rx_2_bit_errors);
+ data[XCAN_ECC_RX_1_BIT_ERRORS] = u64_stats_read(&priv->ecc_rx_1_bit_errors);
+ data[XCAN_ECC_TXOL_2_BIT_ERRORS] = u64_stats_read(&priv->ecc_txol_2_bit_errors);
+ data[XCAN_ECC_TXOL_1_BIT_ERRORS] = u64_stats_read(&priv->ecc_txol_1_bit_errors);
+ data[XCAN_ECC_TXTL_2_BIT_ERRORS] = u64_stats_read(&priv->ecc_txtl_2_bit_errors);
+ data[XCAN_ECC_TXTL_1_BIT_ERRORS] = u64_stats_read(&priv->ecc_txtl_1_bit_errors);
+ } while (u64_stats_fetch_retry(&priv->syncp, start));
+}
+
static const struct net_device_ops xcan_netdev_ops = {
.ndo_open = xcan_open,
.ndo_stop = xcan_close,
@@ -1555,6 +1707,9 @@ static const struct net_device_ops xcan_netdev_ops = {
static const struct ethtool_ops xcan_ethtool_ops = {
.get_ts_info = ethtool_op_get_ts_info,
+ .get_strings = xcan_get_strings,
+ .get_sset_count = xcan_get_sset_count,
+ .get_ethtool_stats = xcan_get_ethtool_stats,
};
/**
@@ -1793,6 +1948,7 @@ static int xcan_probe(struct platform_device *pdev)
return -ENOMEM;
priv = netdev_priv(ndev);
+ priv->ecc_enable = of_property_read_bool(pdev->dev.of_node, "xlnx,has-ecc");
priv->dev = &pdev->dev;
priv->can.bittiming_const = devtype->bittiming_const;
priv->can.do_set_mode = xcan_do_set_mode;
@@ -1909,6 +2065,11 @@ static int xcan_probe(struct platform_device *pdev)
priv->reg_base, ndev->irq, priv->can.clock.freq,
hw_tx_max, priv->tx_max);
+ if (priv->ecc_enable) {
+ /* Reset FIFO ECC counters */
+ priv->write_reg(priv, XCAN_ECC_CFG_OFFSET, XCAN_ECC_CFG_REECRX_MASK |
+ XCAN_ECC_CFG_REECTXOL_MASK | XCAN_ECC_CFG_REECTXTL_MASK);
+ }
return 0;
err_disableclks:
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index f8c1d73b251d..3092b391031a 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -48,7 +48,7 @@ config NET_DSA_MT7530
config NET_DSA_MT7530_MDIO
tristate "MediaTek MT7530 MDIO interface driver"
depends on NET_DSA_MT7530
- imply MEDIATEK_GE_PHY
+ select MEDIATEK_GE_PHY
select PCS_MTK_LYNXI
help
This enables support for the MediaTek MT7530 and MT7531 switch
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 0d628b35fd5c..b2eeff04f4c8 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -559,6 +559,19 @@ static void b53_port_set_learning(struct b53_device *dev, int port,
b53_write16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, reg);
}
+static void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
+{
+ struct b53_device *dev = ds->priv;
+ u16 reg;
+
+ b53_read16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, &reg);
+ if (enable)
+ reg |= BIT(port);
+ else
+ reg &= ~BIT(port);
+ b53_write16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, reg);
+}
+
int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
{
struct b53_device *dev = ds->priv;
@@ -1257,7 +1270,7 @@ static void b53_adjust_link(struct dsa_switch *ds, int port,
struct phy_device *phydev)
{
struct b53_device *dev = ds->priv;
- struct ethtool_eee *p = &dev->ports[port].eee;
+ struct ethtool_keee *p = &dev->ports[port].eee;
u8 rgmii_ctrl = 0, reg = 0, off;
bool tx_pause = false;
bool rx_pause = false;
@@ -2193,21 +2206,6 @@ void b53_mirror_del(struct dsa_switch *ds, int port,
}
EXPORT_SYMBOL(b53_mirror_del);
-void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
-{
- struct b53_device *dev = ds->priv;
- u16 reg;
-
- b53_read16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, &reg);
- if (enable)
- reg |= BIT(port);
- else
- reg &= ~BIT(port);
- b53_write16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, reg);
-}
-EXPORT_SYMBOL(b53_eee_enable_set);
-
-
/* Returns 0 if EEE was not enabled, or 1 otherwise
*/
int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy)
@@ -2224,27 +2222,21 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy)
}
EXPORT_SYMBOL(b53_eee_init);
-int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e)
+int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e)
{
struct b53_device *dev = ds->priv;
- struct ethtool_eee *p = &dev->ports[port].eee;
- u16 reg;
if (is5325(dev) || is5365(dev))
return -EOPNOTSUPP;
- b53_read16(dev, B53_EEE_PAGE, B53_EEE_LPI_INDICATE, &reg);
- e->eee_enabled = p->eee_enabled;
- e->eee_active = !!(reg & BIT(port));
-
return 0;
}
EXPORT_SYMBOL(b53_get_mac_eee);
-int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e)
+int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e)
{
struct b53_device *dev = ds->priv;
- struct ethtool_eee *p = &dev->ports[port].eee;
+ struct ethtool_keee *p = &dev->ports[port].eee;
if (is5325(dev) || is5365(dev))
return -EOPNOTSUPP;
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index fdcfd5081c28..c13a907947f1 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -95,7 +95,7 @@ struct b53_pcs {
struct b53_port {
u16 vlan_ctl_mask;
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
};
struct b53_vlan {
@@ -395,9 +395,8 @@ void b53_mirror_del(struct dsa_switch *ds, int port,
int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
void b53_disable_port(struct dsa_switch *ds, int port);
void b53_brcm_hdr_setup(struct dsa_switch *ds, int port);
-void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable);
int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy);
-int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e);
-int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e);
+int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e);
+int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e);
#endif
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 4a52ccbe393f..bc77ee9e6d0a 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -835,7 +835,7 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
bool tx_pause, bool rx_pause)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- struct ethtool_eee *p = &priv->dev->ports[port].eee;
+ struct ethtool_keee *p = &priv->dev->ports[port].eee;
u32 reg_rgmii_ctrl = 0;
u32 reg, offset;
diff --git a/drivers/net/dsa/dsa_loop_bdinfo.c b/drivers/net/dsa/dsa_loop_bdinfo.c
index 237066d30704..14ca42491512 100644
--- a/drivers/net/dsa/dsa_loop_bdinfo.c
+++ b/drivers/net/dsa/dsa_loop_bdinfo.c
@@ -32,4 +32,5 @@ static int __init dsa_loop_bdinfo_init(void)
}
arch_initcall(dsa_loop_bdinfo_init)
+MODULE_DESCRIPTION("DSA mock-up switch driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 61b71bcfe396..14923535ca7e 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -49,9 +49,9 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data)
mutex_lock(&dev->alu_mutex);
ctrl_addr = IND_ACC_TABLE(table) | addr;
- ret = ksz_write8(dev, regs[REG_IND_BYTE], data);
+ ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
if (!ret)
- ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
+ ret = ksz_write8(dev, regs[REG_IND_BYTE], data);
mutex_unlock(&dev->alu_mutex);
@@ -633,6 +633,57 @@ static void ksz8_w_vlan_table(struct ksz_device *dev, u16 vid, u16 vlan)
}
/**
+ * ksz879x_get_loopback - KSZ879x specific function to get loopback
+ * configuration status for a specific port
+ * @dev: Pointer to the device structure
+ * @port: Port number to query
+ * @val: Pointer to store the result
+ *
+ * This function reads the SMI registers to determine whether loopback mode
+ * is enabled for a specific port.
+ *
+ * Return: 0 on success, error code on failure.
+ */
+static int ksz879x_get_loopback(struct ksz_device *dev, u16 port,
+ u16 *val)
+{
+ u8 stat3;
+ int ret;
+
+ ret = ksz_pread8(dev, port, REG_PORT_STATUS_3, &stat3);
+ if (ret)
+ return ret;
+
+ if (stat3 & PORT_PHY_LOOPBACK)
+ *val |= BMCR_LOOPBACK;
+
+ return 0;
+}
+
+/**
+ * ksz879x_set_loopback - KSZ879x specific function to set loopback mode for
+ * a specific port
+ * @dev: Pointer to the device structure.
+ * @port: Port number to modify.
+ * @val: Value indicating whether to enable or disable loopback mode.
+ *
+ * This function translates loopback bit of the BMCR register into the
+ * corresponding hardware register bit value and writes it to the SMI interface.
+ *
+ * Return: 0 on success, error code on failure.
+ */
+static int ksz879x_set_loopback(struct ksz_device *dev, u16 port, u16 val)
+{
+ u8 stat3 = 0;
+
+ if (val & BMCR_LOOPBACK)
+ stat3 |= PORT_PHY_LOOPBACK;
+
+ return ksz_prmw8(dev, port, REG_PORT_STATUS_3, PORT_PHY_LOOPBACK,
+ stat3);
+}
+
+/**
* ksz8_r_phy_ctrl - Translates and reads from the SMI interface to a MIIM PHY
* Control register (Reg. 31).
* @dev: The KSZ device instance.
@@ -676,59 +727,122 @@ static int ksz8_r_phy_ctrl(struct ksz_device *dev, int port, u16 *val)
return 0;
}
+/**
+ * ksz8_r_phy_bmcr - Translates and reads from the SMI interface to a MIIM PHY
+ * Basic mode control register (Reg. 0).
+ * @dev: The KSZ device instance.
+ * @port: The port number to be read.
+ * @val: The value read from the SMI interface.
+ *
+ * This function reads the SMI interface and translates the hardware register
+ * bit values into their corresponding control settings for a MIIM PHY Basic
+ * mode control register.
+ *
+ * MIIM Bit Mapping Comparison between KSZ8794 and KSZ8873
+ * -------------------------------------------------------------------
+ * MIIM Bit | KSZ8794 Reg/Bit | KSZ8873 Reg/Bit
+ * ----------------------------+-----------------------------+----------------
+ * Bit 15 - Soft Reset | 0xF/4 | Not supported
+ * Bit 14 - Loopback | 0xD/0 (MAC), 0xF/7 (PHY) ~ 0xD/0 (PHY)
+ * Bit 13 - Force 100 | 0xC/6 = 0xC/6
+ * Bit 12 - AN Enable | 0xC/7 (reverse logic) ~ 0xC/7
+ * Bit 11 - Power Down | 0xD/3 = 0xD/3
+ * Bit 10 - PHY Isolate | 0xF/5 | Not supported
+ * Bit 9 - Restart AN | 0xD/5 = 0xD/5
+ * Bit 8 - Force Full-Duplex | 0xC/5 = 0xC/5
+ * Bit 7 - Collision Test/Res. | Not supported | Not supported
+ * Bit 6 - Reserved | Not supported | Not supported
+ * Bit 5 - Hp_mdix | 0x9/7 ~ 0xF/7
+ * Bit 4 - Force MDI | 0xD/1 = 0xD/1
+ * Bit 3 - Disable MDIX | 0xD/2 = 0xD/2
+ * Bit 2 - Disable Far-End F. | ???? | 0xD/4
+ * Bit 1 - Disable Transmit | 0xD/6 = 0xD/6
+ * Bit 0 - Disable LED | 0xD/7 = 0xD/7
+ * -------------------------------------------------------------------
+ *
+ * Return: 0 on success, error code on failure.
+ */
+static int ksz8_r_phy_bmcr(struct ksz_device *dev, u16 port, u16 *val)
+{
+ const u16 *regs = dev->info->regs;
+ u8 restart, speed, ctrl;
+ int ret;
+
+ *val = 0;
+
+ ret = ksz_pread8(dev, port, regs[P_NEG_RESTART_CTRL], &restart);
+ if (ret)
+ return ret;
+
+ ret = ksz_pread8(dev, port, regs[P_SPEED_STATUS], &speed);
+ if (ret)
+ return ret;
+
+ ret = ksz_pread8(dev, port, regs[P_FORCE_CTRL], &ctrl);
+ if (ret)
+ return ret;
+
+ if (ctrl & PORT_FORCE_100_MBIT)
+ *val |= BMCR_SPEED100;
+
+ if (ksz_is_ksz88x3(dev)) {
+ if (restart & KSZ8873_PORT_PHY_LOOPBACK)
+ *val |= BMCR_LOOPBACK;
+
+ if ((ctrl & PORT_AUTO_NEG_ENABLE))
+ *val |= BMCR_ANENABLE;
+ } else {
+ ret = ksz879x_get_loopback(dev, port, val);
+ if (ret)
+ return ret;
+
+ if (!(ctrl & PORT_AUTO_NEG_DISABLE))
+ *val |= BMCR_ANENABLE;
+ }
+
+ if (restart & PORT_POWER_DOWN)
+ *val |= BMCR_PDOWN;
+
+ if (restart & PORT_AUTO_NEG_RESTART)
+ *val |= BMCR_ANRESTART;
+
+ if (ctrl & PORT_FORCE_FULL_DUPLEX)
+ *val |= BMCR_FULLDPLX;
+
+ if (speed & PORT_HP_MDIX)
+ *val |= KSZ886X_BMCR_HP_MDIX;
+
+ if (restart & PORT_FORCE_MDIX)
+ *val |= KSZ886X_BMCR_FORCE_MDI;
+
+ if (restart & PORT_AUTO_MDIX_DISABLE)
+ *val |= KSZ886X_BMCR_DISABLE_AUTO_MDIX;
+
+ if (restart & PORT_TX_DISABLE)
+ *val |= KSZ886X_BMCR_DISABLE_TRANSMIT;
+
+ if (restart & PORT_LED_OFF)
+ *val |= KSZ886X_BMCR_DISABLE_LED;
+
+ return 0;
+}
+
int ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
{
- u8 restart, speed, ctrl, link;
+ u8 ctrl, link, val1, val2;
int processed = true;
const u16 *regs;
- u8 val1, val2;
u16 data = 0;
- u8 p = phy;
+ u16 p = phy;
int ret;
regs = dev->info->regs;
switch (reg) {
case MII_BMCR:
- ret = ksz_pread8(dev, p, regs[P_NEG_RESTART_CTRL], &restart);
- if (ret)
- return ret;
-
- ret = ksz_pread8(dev, p, regs[P_SPEED_STATUS], &speed);
- if (ret)
- return ret;
-
- ret = ksz_pread8(dev, p, regs[P_FORCE_CTRL], &ctrl);
+ ret = ksz8_r_phy_bmcr(dev, p, &data);
if (ret)
return ret;
-
- if (restart & PORT_PHY_LOOPBACK)
- data |= BMCR_LOOPBACK;
- if (ctrl & PORT_FORCE_100_MBIT)
- data |= BMCR_SPEED100;
- if (ksz_is_ksz88x3(dev)) {
- if ((ctrl & PORT_AUTO_NEG_ENABLE))
- data |= BMCR_ANENABLE;
- } else {
- if (!(ctrl & PORT_AUTO_NEG_DISABLE))
- data |= BMCR_ANENABLE;
- }
- if (restart & PORT_POWER_DOWN)
- data |= BMCR_PDOWN;
- if (restart & PORT_AUTO_NEG_RESTART)
- data |= BMCR_ANRESTART;
- if (ctrl & PORT_FORCE_FULL_DUPLEX)
- data |= BMCR_FULLDPLX;
- if (speed & PORT_HP_MDIX)
- data |= KSZ886X_BMCR_HP_MDIX;
- if (restart & PORT_FORCE_MDIX)
- data |= KSZ886X_BMCR_FORCE_MDI;
- if (restart & PORT_AUTO_MDIX_DISABLE)
- data |= KSZ886X_BMCR_DISABLE_AUTO_MDIX;
- if (restart & PORT_TX_DISABLE)
- data |= KSZ886X_BMCR_DISABLE_TRANSMIT;
- if (restart & PORT_LED_OFF)
- data |= KSZ886X_BMCR_DISABLE_LED;
break;
case MII_BMSR:
ret = ksz_pread8(dev, p, regs[P_LINK_STATUS], &link);
@@ -860,113 +974,137 @@ static int ksz8_w_phy_ctrl(struct ksz_device *dev, int port, u16 val)
return ret;
}
-int ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
+/**
+ * ksz8_w_phy_bmcr - Translates and writes to the SMI interface from a MIIM PHY
+ * Basic mode control register (Reg. 0).
+ * @dev: The KSZ device instance.
+ * @port: The port number to be configured.
+ * @val: The register value to be written.
+ *
+ * This function translates control settings from a MIIM PHY Basic mode control
+ * register into their corresponding hardware register bit values for the SMI
+ * interface.
+ *
+ * MIIM Bit Mapping Comparison between KSZ8794 and KSZ8873
+ * -------------------------------------------------------------------
+ * MIIM Bit | KSZ8794 Reg/Bit | KSZ8873 Reg/Bit
+ * ----------------------------+-----------------------------+----------------
+ * Bit 15 - Soft Reset | 0xF/4 | Not supported
+ * Bit 14 - Loopback | 0xD/0 (MAC), 0xF/7 (PHY) ~ 0xD/0 (PHY)
+ * Bit 13 - Force 100 | 0xC/6 = 0xC/6
+ * Bit 12 - AN Enable | 0xC/7 (reverse logic) ~ 0xC/7
+ * Bit 11 - Power Down | 0xD/3 = 0xD/3
+ * Bit 10 - PHY Isolate | 0xF/5 | Not supported
+ * Bit 9 - Restart AN | 0xD/5 = 0xD/5
+ * Bit 8 - Force Full-Duplex | 0xC/5 = 0xC/5
+ * Bit 7 - Collision Test/Res. | Not supported | Not supported
+ * Bit 6 - Reserved | Not supported | Not supported
+ * Bit 5 - Hp_mdix | 0x9/7 ~ 0xF/7
+ * Bit 4 - Force MDI | 0xD/1 = 0xD/1
+ * Bit 3 - Disable MDIX | 0xD/2 = 0xD/2
+ * Bit 2 - Disable Far-End F. | ???? | 0xD/4
+ * Bit 1 - Disable Transmit | 0xD/6 = 0xD/6
+ * Bit 0 - Disable LED | 0xD/7 = 0xD/7
+ * -------------------------------------------------------------------
+ *
+ * Return: 0 on success, error code on failure.
+ */
+static int ksz8_w_phy_bmcr(struct ksz_device *dev, u16 port, u16 val)
{
- u8 restart, speed, ctrl, data;
- const u16 *regs;
- u8 p = phy;
+ u8 restart, speed, ctrl, restart_mask;
+ const u16 *regs = dev->info->regs;
int ret;
- regs = dev->info->regs;
+ /* Do not support PHY reset function. */
+ if (val & BMCR_RESET)
+ return 0;
- switch (reg) {
- case MII_BMCR:
+ speed = 0;
+ if (val & KSZ886X_BMCR_HP_MDIX)
+ speed |= PORT_HP_MDIX;
- /* Do not support PHY reset function. */
- if (val & BMCR_RESET)
- break;
- ret = ksz_pread8(dev, p, regs[P_SPEED_STATUS], &speed);
- if (ret)
- return ret;
+ ret = ksz_prmw8(dev, port, regs[P_SPEED_STATUS], PORT_HP_MDIX, speed);
+ if (ret)
+ return ret;
- data = speed;
- if (val & KSZ886X_BMCR_HP_MDIX)
- data |= PORT_HP_MDIX;
- else
- data &= ~PORT_HP_MDIX;
+ ctrl = 0;
+ if (ksz_is_ksz88x3(dev)) {
+ if ((val & BMCR_ANENABLE))
+ ctrl |= PORT_AUTO_NEG_ENABLE;
+ } else {
+ if (!(val & BMCR_ANENABLE))
+ ctrl |= PORT_AUTO_NEG_DISABLE;
- if (data != speed) {
- ret = ksz_pwrite8(dev, p, regs[P_SPEED_STATUS], data);
- if (ret)
- return ret;
- }
+ /* Fiber port does not support auto-negotiation. */
+ if (dev->ports[port].fiber)
+ ctrl |= PORT_AUTO_NEG_DISABLE;
+ }
- ret = ksz_pread8(dev, p, regs[P_FORCE_CTRL], &ctrl);
- if (ret)
- return ret;
+ if (val & BMCR_SPEED100)
+ ctrl |= PORT_FORCE_100_MBIT;
- data = ctrl;
- if (ksz_is_ksz88x3(dev)) {
- if ((val & BMCR_ANENABLE))
- data |= PORT_AUTO_NEG_ENABLE;
- else
- data &= ~PORT_AUTO_NEG_ENABLE;
- } else {
- if (!(val & BMCR_ANENABLE))
- data |= PORT_AUTO_NEG_DISABLE;
- else
- data &= ~PORT_AUTO_NEG_DISABLE;
-
- /* Fiber port does not support auto-negotiation. */
- if (dev->ports[p].fiber)
- data |= PORT_AUTO_NEG_DISABLE;
- }
+ if (val & BMCR_FULLDPLX)
+ ctrl |= PORT_FORCE_FULL_DUPLEX;
- if (val & BMCR_SPEED100)
- data |= PORT_FORCE_100_MBIT;
- else
- data &= ~PORT_FORCE_100_MBIT;
- if (val & BMCR_FULLDPLX)
- data |= PORT_FORCE_FULL_DUPLEX;
- else
- data &= ~PORT_FORCE_FULL_DUPLEX;
+ ret = ksz_prmw8(dev, port, regs[P_FORCE_CTRL], PORT_FORCE_100_MBIT |
+ /* PORT_AUTO_NEG_ENABLE and PORT_AUTO_NEG_DISABLE are the same
+ * bits
+ */
+ PORT_FORCE_FULL_DUPLEX | PORT_AUTO_NEG_ENABLE, ctrl);
+ if (ret)
+ return ret;
- if (data != ctrl) {
- ret = ksz_pwrite8(dev, p, regs[P_FORCE_CTRL], data);
- if (ret)
- return ret;
- }
+ restart = 0;
+ restart_mask = PORT_LED_OFF | PORT_TX_DISABLE | PORT_AUTO_NEG_RESTART |
+ PORT_POWER_DOWN | PORT_AUTO_MDIX_DISABLE | PORT_FORCE_MDIX;
+
+ if (val & KSZ886X_BMCR_DISABLE_LED)
+ restart |= PORT_LED_OFF;
+
+ if (val & KSZ886X_BMCR_DISABLE_TRANSMIT)
+ restart |= PORT_TX_DISABLE;
+
+ if (val & BMCR_ANRESTART)
+ restart |= PORT_AUTO_NEG_RESTART;
+
+ if (val & BMCR_PDOWN)
+ restart |= PORT_POWER_DOWN;
+
+ if (val & KSZ886X_BMCR_DISABLE_AUTO_MDIX)
+ restart |= PORT_AUTO_MDIX_DISABLE;
+
+ if (val & KSZ886X_BMCR_FORCE_MDI)
+ restart |= PORT_FORCE_MDIX;
- ret = ksz_pread8(dev, p, regs[P_NEG_RESTART_CTRL], &restart);
+ if (ksz_is_ksz88x3(dev)) {
+ restart_mask |= KSZ8873_PORT_PHY_LOOPBACK;
+
+ if (val & BMCR_LOOPBACK)
+ restart |= KSZ8873_PORT_PHY_LOOPBACK;
+ } else {
+ ret = ksz879x_set_loopback(dev, port, val);
if (ret)
return ret;
+ }
- data = restart;
- if (val & KSZ886X_BMCR_DISABLE_LED)
- data |= PORT_LED_OFF;
- else
- data &= ~PORT_LED_OFF;
- if (val & KSZ886X_BMCR_DISABLE_TRANSMIT)
- data |= PORT_TX_DISABLE;
- else
- data &= ~PORT_TX_DISABLE;
- if (val & BMCR_ANRESTART)
- data |= PORT_AUTO_NEG_RESTART;
- else
- data &= ~(PORT_AUTO_NEG_RESTART);
- if (val & BMCR_PDOWN)
- data |= PORT_POWER_DOWN;
- else
- data &= ~PORT_POWER_DOWN;
- if (val & KSZ886X_BMCR_DISABLE_AUTO_MDIX)
- data |= PORT_AUTO_MDIX_DISABLE;
- else
- data &= ~PORT_AUTO_MDIX_DISABLE;
- if (val & KSZ886X_BMCR_FORCE_MDI)
- data |= PORT_FORCE_MDIX;
- else
- data &= ~PORT_FORCE_MDIX;
- if (val & BMCR_LOOPBACK)
- data |= PORT_PHY_LOOPBACK;
- else
- data &= ~PORT_PHY_LOOPBACK;
+ return ksz_prmw8(dev, port, regs[P_NEG_RESTART_CTRL], restart_mask,
+ restart);
+}
- if (data != restart) {
- ret = ksz_pwrite8(dev, p, regs[P_NEG_RESTART_CTRL],
- data);
- if (ret)
- return ret;
- }
+int ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
+{
+ const u16 *regs;
+ u8 ctrl, data;
+ u16 p = phy;
+ int ret;
+
+ regs = dev->info->regs;
+
+ switch (reg) {
+ case MII_BMCR:
+ ret = ksz8_w_phy_bmcr(dev, p, val);
+ if (ret)
+ return ret;
break;
case MII_ADVERTISE:
ret = ksz_pread8(dev, p, regs[P_LOCAL_CTRL], &ctrl);
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index beca974e0171..7c9341ef73b0 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -265,6 +265,7 @@
#define PORT_AUTO_MDIX_DISABLE BIT(2)
#define PORT_FORCE_MDIX BIT(1)
#define PORT_MAC_LOOPBACK BIT(0)
+#define KSZ8873_PORT_PHY_LOOPBACK BIT(0)
#define REG_PORT_1_STATUS_2 0x1E
#define REG_PORT_2_STATUS_2 0x2E
diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c
index cac4a607e54a..82bebee4615c 100644
--- a/drivers/net/dsa/microchip/ksz9477_i2c.c
+++ b/drivers/net/dsa/microchip/ksz9477_i2c.c
@@ -104,6 +104,10 @@ static const struct of_device_id ksz9477_dt_ids[] = {
.data = &ksz_switch_chips[KSZ8563]
},
{
+ .compatible = "microchip,ksz8567",
+ .data = &ksz_switch_chips[KSZ8567]
+ },
+ {
.compatible = "microchip,ksz9567",
.data = &ksz_switch_chips[KSZ9567]
},
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 245dfb7a7a31..2b510f150dd8 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -1476,6 +1476,39 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.gbit_capable = {true, true, true},
},
+ [KSZ8567] = {
+ .chip_id = KSZ8567_CHIP_ID,
+ .dev_name = "KSZ8567",
+ .num_vlans = 4096,
+ .num_alus = 4096,
+ .num_statics = 16,
+ .cpu_ports = 0x7F, /* can be configured as cpu port */
+ .port_cnt = 7, /* total port count */
+ .port_nirqs = 3,
+ .num_tx_queues = 4,
+ .tc_cbs_supported = true,
+ .tc_ets_supported = true,
+ .ops = &ksz9477_dev_ops,
+ .mib_names = ksz9477_mib_names,
+ .mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
+ .reg_mib_cnt = MIB_COUNTER_NUM,
+ .regs = ksz9477_regs,
+ .masks = ksz9477_masks,
+ .shifts = ksz9477_shifts,
+ .xmii_ctrl0 = ksz9477_xmii_ctrl0,
+ .xmii_ctrl1 = ksz9477_xmii_ctrl1,
+ .supports_mii = {false, false, false, false,
+ false, true, true},
+ .supports_rmii = {false, false, false, false,
+ false, true, true},
+ .supports_rgmii = {false, false, false, false,
+ false, true, true},
+ .internal_phy = {true, true, true, true,
+ true, false, false},
+ .gbit_capable = {false, false, false, false, false,
+ true, true},
+ },
+
[KSZ9567] = {
.chip_id = KSZ9567_CHIP_ID,
.dev_name = "KSZ9567",
@@ -1864,6 +1897,29 @@ static void ksz_get_strings(struct dsa_switch *ds, int port,
}
}
+/**
+ * ksz_update_port_member - Adjust port forwarding rules based on STP state and
+ * isolation settings.
+ * @dev: A pointer to the struct ksz_device representing the device.
+ * @port: The port number to adjust.
+ *
+ * This function dynamically adjusts the port membership configuration for a
+ * specified port and other device ports, based on Spanning Tree Protocol (STP)
+ * states and port isolation settings. Each port, including the CPU port, has a
+ * membership register, represented as a bitfield, where each bit corresponds
+ * to a port number. A set bit indicates permission to forward frames to that
+ * port. This function iterates over all ports, updating the membership register
+ * to reflect current forwarding permissions:
+ *
+ * 1. Forwards frames only to ports that are part of the same bridge group and
+ * in the BR_STATE_FORWARDING state.
+ * 2. Takes into account the isolation status of ports; ports in the
+ * BR_STATE_FORWARDING state with BR_ISOLATED configuration will not forward
+ * frames to each other, even if they are in the same bridge group.
+ * 3. Ensures that the CPU port is included in the membership based on its
+ * upstream port configuration, allowing for management and control traffic
+ * to flow as required.
+ */
static void ksz_update_port_member(struct ksz_device *dev, int port)
{
struct ksz_port *p = &dev->ports[port];
@@ -1892,7 +1948,14 @@ static void ksz_update_port_member(struct ksz_device *dev, int port)
if (other_p->stp_state != BR_STATE_FORWARDING)
continue;
- if (p->stp_state == BR_STATE_FORWARDING) {
+ /* At this point we know that "port" and "other" port [i] are in
+ * the same bridge group and that "other" port [i] is in
+ * forwarding stp state. If "port" is also in forwarding stp
+ * state, we can allow forwarding from port [port] to port [i].
+ * Except if both ports are isolated.
+ */
+ if (p->stp_state == BR_STATE_FORWARDING &&
+ !(p->isolated && other_p->isolated)) {
val |= BIT(port);
port_member |= BIT(i);
}
@@ -1911,8 +1974,19 @@ static void ksz_update_port_member(struct ksz_device *dev, int port)
third_p = &dev->ports[j];
if (third_p->stp_state != BR_STATE_FORWARDING)
continue;
+
third_dp = dsa_to_port(ds, j);
- if (dsa_port_bridge_same(other_dp, third_dp))
+
+ /* Now we updating relation of the "other" port [i] to
+ * the "third" port [j]. We already know that "other"
+ * port [i] is in forwarding stp state and that "third"
+ * port [j] is in forwarding stp state too.
+ * We need to check if "other" port [i] and "third" port
+ * [j] are in the same bridge group and not isolated
+ * before allowing forwarding from port [i] to port [j].
+ */
+ if (dsa_port_bridge_same(other_dp, third_dp) &&
+ !(other_p->isolated && third_p->isolated))
val |= BIT(j);
}
@@ -2185,6 +2259,8 @@ static int ksz_pirq_setup(struct ksz_device *dev, u8 p)
return ksz_irq_common_setup(dev, pirq);
}
+static int ksz_parse_drive_strength(struct ksz_device *dev);
+
static int ksz_setup(struct dsa_switch *ds)
{
struct ksz_device *dev = ds->priv;
@@ -2206,6 +2282,10 @@ static int ksz_setup(struct dsa_switch *ds)
return ret;
}
+ ret = ksz_parse_drive_strength(dev);
+ if (ret)
+ return ret;
+
/* set broadcast storm protection 10% rate */
regmap_update_bits(ksz_regmap_16(dev), regs[S_BROADCAST_CTRL],
BROADCAST_STORM_RATE,
@@ -2649,6 +2729,7 @@ static void ksz_port_teardown(struct dsa_switch *ds, int port)
switch (dev->chip_id) {
case KSZ8563_CHIP_ID:
+ case KSZ8567_CHIP_ID:
case KSZ9477_CHIP_ID:
case KSZ9563_CHIP_ID:
case KSZ9567_CHIP_ID:
@@ -2664,7 +2745,7 @@ static int ksz_port_pre_bridge_flags(struct dsa_switch *ds, int port,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack)
{
- if (flags.mask & ~BR_LEARNING)
+ if (flags.mask & ~(BR_LEARNING | BR_ISOLATED))
return -EINVAL;
return 0;
@@ -2677,8 +2758,12 @@ static int ksz_port_bridge_flags(struct dsa_switch *ds, int port,
struct ksz_device *dev = ds->priv;
struct ksz_port *p = &dev->ports[port];
- if (flags.mask & BR_LEARNING) {
- p->learning = !!(flags.val & BR_LEARNING);
+ if (flags.mask & (BR_LEARNING | BR_ISOLATED)) {
+ if (flags.mask & BR_LEARNING)
+ p->learning = !!(flags.val & BR_LEARNING);
+
+ if (flags.mask & BR_ISOLATED)
+ p->isolated = !!(flags.val & BR_ISOLATED);
/* Make the change take effect immediately */
ksz_port_stp_state_set(ds, port, p->stp_state);
@@ -2705,7 +2790,8 @@ static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds,
dev->chip_id == KSZ9563_CHIP_ID)
proto = DSA_TAG_PROTO_KSZ9893;
- if (dev->chip_id == KSZ9477_CHIP_ID ||
+ if (dev->chip_id == KSZ8567_CHIP_ID ||
+ dev->chip_id == KSZ9477_CHIP_ID ||
dev->chip_id == KSZ9896_CHIP_ID ||
dev->chip_id == KSZ9897_CHIP_ID ||
dev->chip_id == KSZ9567_CHIP_ID)
@@ -2813,6 +2899,7 @@ static int ksz_max_mtu(struct dsa_switch *ds, int port)
case KSZ8830_CHIP_ID:
return KSZ8863_HUGE_PACKET_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN;
case KSZ8563_CHIP_ID:
+ case KSZ8567_CHIP_ID:
case KSZ9477_CHIP_ID:
case KSZ9563_CHIP_ID:
case KSZ9567_CHIP_ID:
@@ -2839,6 +2926,7 @@ static int ksz_validate_eee(struct dsa_switch *ds, int port)
switch (dev->chip_id) {
case KSZ8563_CHIP_ID:
+ case KSZ8567_CHIP_ID:
case KSZ9477_CHIP_ID:
case KSZ9563_CHIP_ID:
case KSZ9567_CHIP_ID:
@@ -2852,7 +2940,7 @@ static int ksz_validate_eee(struct dsa_switch *ds, int port)
}
static int ksz_get_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_eee *e)
+ struct ethtool_keee *e)
{
int ret;
@@ -2872,7 +2960,7 @@ static int ksz_get_mac_eee(struct dsa_switch *ds, int port,
}
static int ksz_set_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_eee *e)
+ struct ethtool_keee *e)
{
struct ksz_device *dev = ds->priv;
int ret;
@@ -3183,6 +3271,7 @@ static int ksz_switch_detect(struct ksz_device *dev)
case KSZ9896_CHIP_ID:
case KSZ9897_CHIP_ID:
case KSZ9567_CHIP_ID:
+ case KSZ8567_CHIP_ID:
case LAN9370_CHIP_ID:
case LAN9371_CHIP_ID:
case LAN9372_CHIP_ID:
@@ -3220,6 +3309,7 @@ static int ksz_cls_flower_add(struct dsa_switch *ds, int port,
switch (dev->chip_id) {
case KSZ8563_CHIP_ID:
+ case KSZ8567_CHIP_ID:
case KSZ9477_CHIP_ID:
case KSZ9563_CHIP_ID:
case KSZ9567_CHIP_ID:
@@ -3239,6 +3329,7 @@ static int ksz_cls_flower_del(struct dsa_switch *ds, int port,
switch (dev->chip_id) {
case KSZ8563_CHIP_ID:
+ case KSZ8567_CHIP_ID:
case KSZ9477_CHIP_ID:
case KSZ9563_CHIP_ID:
case KSZ9567_CHIP_ID:
@@ -4142,6 +4233,7 @@ static int ksz_parse_drive_strength(struct ksz_device *dev)
case KSZ8794_CHIP_ID:
case KSZ8765_CHIP_ID:
case KSZ8563_CHIP_ID:
+ case KSZ8567_CHIP_ID:
case KSZ9477_CHIP_ID:
case KSZ9563_CHIP_ID:
case KSZ9567_CHIP_ID:
@@ -4242,10 +4334,6 @@ int ksz_switch_register(struct ksz_device *dev)
for (port_num = 0; port_num < dev->info->port_cnt; ++port_num)
dev->ports[port_num].interface = PHY_INTERFACE_MODE_NA;
if (dev->dev->of_node) {
- ret = ksz_parse_drive_strength(dev);
- if (ret)
- return ret;
-
ret = of_get_phy_mode(dev->dev->of_node, &interface);
if (ret == 0)
dev->compat_interface = interface;
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 15612101a155..40c11b0d6b62 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -110,6 +110,7 @@ struct ksz_switch_macaddr {
struct ksz_port {
bool remove_tag; /* Remove Tag flag set, for ksz8795 only */
bool learning;
+ bool isolated;
int stp_state;
struct phy_device phydev;
@@ -187,6 +188,7 @@ struct ksz_device {
/* List of supported models */
enum ksz_model {
KSZ8563,
+ KSZ8567,
KSZ8795,
KSZ8794,
KSZ8765,
diff --git a/drivers/net/dsa/microchip/ksz_spi.c b/drivers/net/dsa/microchip/ksz_spi.c
index 6f6d878e742c..c8166fb440ab 100644
--- a/drivers/net/dsa/microchip/ksz_spi.c
+++ b/drivers/net/dsa/microchip/ksz_spi.c
@@ -165,6 +165,10 @@ static const struct of_device_id ksz_dt_ids[] = {
.data = &ksz_switch_chips[KSZ8563]
},
{
+ .compatible = "microchip,ksz8567",
+ .data = &ksz_switch_chips[KSZ8567]
+ },
+ {
.compatible = "microchip,ksz9567",
.data = &ksz_switch_chips[KSZ9567]
},
@@ -204,6 +208,7 @@ static const struct spi_device_id ksz_spi_ids[] = {
{ "ksz9893" },
{ "ksz9563" },
{ "ksz8563" },
+ { "ksz8567" },
{ "ksz9567" },
{ "lan9370" },
{ "lan9371" },
diff --git a/drivers/net/dsa/mt7530-mdio.c b/drivers/net/dsa/mt7530-mdio.c
index 088533663b83..fa3ee85a99c1 100644
--- a/drivers/net/dsa/mt7530-mdio.c
+++ b/drivers/net/dsa/mt7530-mdio.c
@@ -81,17 +81,14 @@ static const struct regmap_bus mt7530_regmap_bus = {
};
static int
-mt7531_create_sgmii(struct mt7530_priv *priv, bool dual_sgmii)
+mt7531_create_sgmii(struct mt7530_priv *priv)
{
struct regmap_config *mt7531_pcs_config[2] = {};
struct phylink_pcs *pcs;
struct regmap *regmap;
int i, ret = 0;
- /* MT7531AE has two SGMII units for port 5 and port 6
- * MT7531BE has only one SGMII unit for port 6
- */
- for (i = dual_sgmii ? 0 : 1; i < 2; i++) {
+ for (i = priv->p5_sgmii ? 0 : 1; i < 2; i++) {
mt7531_pcs_config[i] = devm_kzalloc(priv->dev,
sizeof(struct regmap_config),
GFP_KERNEL);
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 391c4dbdff42..678b51f9cea6 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -414,92 +414,57 @@ mt753x_preferred_default_local_cpu_port(struct dsa_switch *ds)
}
/* Setup port 6 interface mode and TRGMII TX circuit */
-static int
-mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
+static void
+mt7530_setup_port6(struct dsa_switch *ds, phy_interface_t interface)
{
struct mt7530_priv *priv = ds->priv;
- u32 ncpo1, ssc_delta, trgint, xtal;
-
- xtal = mt7530_read(priv, MT7530_MHWTRAP) & HWTRAP_XTAL_MASK;
+ u32 ncpo1, ssc_delta, xtal;
- if (xtal == HWTRAP_XTAL_20MHZ) {
- dev_err(priv->dev,
- "%s: MT7530 with a 20MHz XTAL is not supported!\n",
- __func__);
- return -EINVAL;
- }
+ /* Disable the MT7530 TRGMII clocks */
+ core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN);
- switch (interface) {
- case PHY_INTERFACE_MODE_RGMII:
- trgint = 0;
- break;
- case PHY_INTERFACE_MODE_TRGMII:
- trgint = 1;
- if (xtal == HWTRAP_XTAL_25MHZ)
- ssc_delta = 0x57;
- else
- ssc_delta = 0x87;
- if (priv->id == ID_MT7621) {
- /* PLL frequency: 125MHz: 1.0GBit */
- if (xtal == HWTRAP_XTAL_40MHZ)
- ncpo1 = 0x0640;
- if (xtal == HWTRAP_XTAL_25MHZ)
- ncpo1 = 0x0a00;
- } else { /* PLL frequency: 250MHz: 2.0Gbit */
- if (xtal == HWTRAP_XTAL_40MHZ)
- ncpo1 = 0x0c80;
- if (xtal == HWTRAP_XTAL_25MHZ)
- ncpo1 = 0x1400;
- }
- break;
- default:
- dev_err(priv->dev, "xMII interface %d not supported\n",
- interface);
- return -EINVAL;
+ if (interface == PHY_INTERFACE_MODE_RGMII) {
+ mt7530_rmw(priv, MT7530_P6ECR, P6_INTF_MODE_MASK,
+ P6_INTF_MODE(0));
+ return;
}
- mt7530_rmw(priv, MT7530_P6ECR, P6_INTF_MODE_MASK,
- P6_INTF_MODE(trgint));
+ mt7530_rmw(priv, MT7530_P6ECR, P6_INTF_MODE_MASK, P6_INTF_MODE(1));
- if (trgint) {
- /* Disable the MT7530 TRGMII clocks */
- core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN);
+ xtal = mt7530_read(priv, MT7530_MHWTRAP) & HWTRAP_XTAL_MASK;
- /* Setup the MT7530 TRGMII Tx Clock */
- core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1));
- core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0));
- core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta));
- core_write(priv, CORE_PLL_GROUP11, RG_LCDDS_SSC_DELTA1(ssc_delta));
- core_write(priv, CORE_PLL_GROUP4,
- RG_SYSPLL_DDSFBK_EN | RG_SYSPLL_BIAS_EN |
- RG_SYSPLL_BIAS_LPF_EN);
- core_write(priv, CORE_PLL_GROUP2,
- RG_SYSPLL_EN_NORMAL | RG_SYSPLL_VODEN |
- RG_SYSPLL_POSDIV(1));
- core_write(priv, CORE_PLL_GROUP7,
- RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) |
- RG_LCDDS_PWDB | RG_LCDDS_ISO_EN);
+ if (xtal == HWTRAP_XTAL_25MHZ)
+ ssc_delta = 0x57;
+ else
+ ssc_delta = 0x87;
- /* Enable the MT7530 TRGMII clocks */
- core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN);
+ if (priv->id == ID_MT7621) {
+ /* PLL frequency: 125MHz: 1.0GBit */
+ if (xtal == HWTRAP_XTAL_40MHZ)
+ ncpo1 = 0x0640;
+ if (xtal == HWTRAP_XTAL_25MHZ)
+ ncpo1 = 0x0a00;
+ } else { /* PLL frequency: 250MHz: 2.0Gbit */
+ if (xtal == HWTRAP_XTAL_40MHZ)
+ ncpo1 = 0x0c80;
+ if (xtal == HWTRAP_XTAL_25MHZ)
+ ncpo1 = 0x1400;
}
- return 0;
-}
+ /* Setup the MT7530 TRGMII Tx Clock */
+ core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1));
+ core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0));
+ core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta));
+ core_write(priv, CORE_PLL_GROUP11, RG_LCDDS_SSC_DELTA1(ssc_delta));
+ core_write(priv, CORE_PLL_GROUP4, RG_SYSPLL_DDSFBK_EN |
+ RG_SYSPLL_BIAS_EN | RG_SYSPLL_BIAS_LPF_EN);
+ core_write(priv, CORE_PLL_GROUP2, RG_SYSPLL_EN_NORMAL |
+ RG_SYSPLL_VODEN | RG_SYSPLL_POSDIV(1));
+ core_write(priv, CORE_PLL_GROUP7, RG_LCDDS_PCW_NCPO_CHG |
+ RG_LCCDS_C(3) | RG_LCDDS_PWDB | RG_LCDDS_ISO_EN);
-static bool mt7531_dual_sgmii_supported(struct mt7530_priv *priv)
-{
- u32 val;
-
- val = mt7530_read(priv, MT7531_TOP_SIG_SR);
-
- return (val & PAD_DUAL_SGMII_EN) != 0;
-}
-
-static int
-mt7531_pad_setup(struct dsa_switch *ds, phy_interface_t interface)
-{
- return 0;
+ /* Enable the MT7530 TRGMII clocks */
+ core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN);
}
static void
@@ -510,9 +475,6 @@ mt7531_pll_setup(struct mt7530_priv *priv)
u32 xtal;
u32 val;
- if (mt7531_dual_sgmii_supported(priv))
- return;
-
val = mt7530_read(priv, MT7531_CREV);
top_sig = mt7530_read(priv, MT7531_TOP_SIG_SR);
hwstrap = mt7530_read(priv, MT7531_HWTRAP);
@@ -920,8 +882,6 @@ static const char *p5_intf_modes(unsigned int p5_interface)
return "PHY P4";
case P5_INTF_SEL_GMAC5:
return "GMAC5";
- case P5_INTF_SEL_GMAC5_SGMII:
- return "GMAC5_SGMII";
default:
return "unknown";
}
@@ -956,13 +916,8 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
/* MT7530_P5_MODE_GMAC: P5 -> External phy or 2nd GMAC */
val &= ~MHWTRAP_P5_DIS;
break;
- case P5_DISABLED:
- interface = PHY_INTERFACE_MODE_NA;
- break;
default:
- dev_err(ds->dev, "Unsupported p5_intf_sel %d\n",
- priv->p5_intf_sel);
- goto unlock_exit;
+ break;
}
/* Setup RGMII settings */
@@ -992,9 +947,6 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
dev_dbg(ds->dev, "Setup P5, HWTRAP=0x%x, intf_sel=%s, phy-mode=%s\n",
val, p5_intf_modes(priv->p5_intf_sel), phy_modes(interface));
- priv->p5_interface = interface;
-
-unlock_exit:
mutex_unlock(&priv->reg_mutex);
}
@@ -1014,18 +966,10 @@ mt753x_trap_frames(struct mt7530_priv *priv)
MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY));
}
-static int
+static void
mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
{
struct mt7530_priv *priv = ds->priv;
- int ret;
-
- /* Setup max capability of CPU port at first */
- if (priv->info->cpu_port_config) {
- ret = priv->info->cpu_port_config(ds, port);
- if (ret)
- return ret;
- }
/* Enable Mediatek header mode on the cpu port */
mt7530_write(priv, MT7530_PVC_P(port),
@@ -1035,10 +979,6 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
mt7530_set(priv, MT7530_MFC, BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) |
UNU_FFP(BIT(port)));
- /* Set CPU port number */
- if (priv->id == ID_MT7530 || priv->id == ID_MT7621)
- mt7530_rmw(priv, MT7530_MFC, CPU_MASK, CPU_EN | CPU_PORT(port));
-
/* Add the CPU port to the CPU port bitmap for MT7531 and the switch on
* the MT7988 SoC. Trapped frames will be forwarded to the CPU port that
* is affine to the inbound user port.
@@ -1055,8 +995,6 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
/* Set to fallback mode for independent VLAN learning */
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
MT7530_PORT_FALLBACK_MODE);
-
- return 0;
}
static int
@@ -1080,7 +1018,6 @@ mt7530_port_enable(struct dsa_switch *ds, int port,
priv->ports[port].enable = true;
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK,
priv->ports[port].pm);
- mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK);
mutex_unlock(&priv->reg_mutex);
@@ -1100,7 +1037,6 @@ mt7530_port_disable(struct dsa_switch *ds, int port)
priv->ports[port].enable = false;
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK,
PCR_MATRIX_CLR);
- mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK);
mutex_unlock(&priv->reg_mutex);
}
@@ -2107,7 +2043,7 @@ mt7530_setup_irq(struct mt7530_priv *priv)
}
/* This register must be set for MT7530 to properly fire interrupts */
- if (priv->id != ID_MT7531)
+ if (priv->id == ID_MT7530 || priv->id == ID_MT7621)
mt7530_set(priv, MT7530_TOP_SIG_CTRL, TOP_SIG_CTRL_NORMAL);
ret = request_threaded_irq(priv->irq, NULL, mt7530_irq_thread_fn,
@@ -2146,24 +2082,40 @@ mt7530_free_irq_common(struct mt7530_priv *priv)
static void
mt7530_free_irq(struct mt7530_priv *priv)
{
- mt7530_free_mdio_irq(priv);
+ struct device_node *mnp, *np = priv->dev->of_node;
+
+ mnp = of_get_child_by_name(np, "mdio");
+ if (!mnp)
+ mt7530_free_mdio_irq(priv);
+ of_node_put(mnp);
+
mt7530_free_irq_common(priv);
}
static int
mt7530_setup_mdio(struct mt7530_priv *priv)
{
+ struct device_node *mnp, *np = priv->dev->of_node;
struct dsa_switch *ds = priv->ds;
struct device *dev = priv->dev;
struct mii_bus *bus;
static int idx;
- int ret;
+ int ret = 0;
+
+ mnp = of_get_child_by_name(np, "mdio");
+
+ if (mnp && !of_device_is_available(mnp))
+ goto out;
bus = devm_mdiobus_alloc(dev);
- if (!bus)
- return -ENOMEM;
+ if (!bus) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (!mnp)
+ ds->user_mii_bus = bus;
- ds->user_mii_bus = bus;
bus->priv = priv;
bus->name = KBUILD_MODNAME "-mii";
snprintf(bus->id, MII_BUS_ID_SIZE, KBUILD_MODNAME "-%d", idx++);
@@ -2174,16 +2126,18 @@ mt7530_setup_mdio(struct mt7530_priv *priv)
bus->parent = dev;
bus->phy_mask = ~ds->phys_mii_mask;
- if (priv->irq)
+ if (priv->irq && !mnp)
mt7530_setup_mdio_irq(priv);
- ret = devm_mdiobus_register(dev, bus);
+ ret = devm_of_mdiobus_register(dev, bus, mnp);
if (ret) {
dev_err(dev, "failed to register MDIO bus: %d\n", ret);
- if (priv->irq)
+ if (priv->irq && !mnp)
mt7530_free_mdio_irq(priv);
}
+out:
+ of_node_put(mnp);
return ret;
}
@@ -2238,6 +2192,12 @@ mt7530_setup(struct dsa_switch *ds)
}
}
+ /* Disable LEDs before reset to prevent the MT7530 sampling a
+ * potentially incorrect HT_XTAL_FSEL value.
+ */
+ mt7530_write(priv, MT7530_LED_EN, 0);
+ usleep_range(1000, 1100);
+
/* Reset whole chip through gpio pin or memory-mapped registers for
* different type of hardware
*/
@@ -2267,6 +2227,12 @@ mt7530_setup(struct dsa_switch *ds)
return -ENODEV;
}
+ if ((val & HWTRAP_XTAL_MASK) == HWTRAP_XTAL_20MHZ) {
+ dev_err(priv->dev,
+ "MT7530 with a 20MHz XTAL is not supported!\n");
+ return -EINVAL;
+ }
+
/* Reset the switch through internal reset */
mt7530_write(priv, MT7530_SYS_CTRL,
SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
@@ -2289,14 +2255,18 @@ mt7530_setup(struct dsa_switch *ds)
val |= MHWTRAP_MANUAL;
mt7530_write(priv, MT7530_MHWTRAP, val);
- priv->p6_interface = PHY_INTERFACE_MODE_NA;
-
mt753x_trap_frames(priv);
/* Enable and reset MIB counters */
mt7530_mib_reset(ds);
for (i = 0; i < MT7530_NUM_PORTS; i++) {
+ /* Clear link settings and enable force mode to force link down
+ * on all ports until they're enabled later.
+ */
+ mt7530_rmw(priv, MT7530_PMCR_P(i), PMCR_LINK_SETTINGS_MASK |
+ PMCR_FORCE_MODE, PMCR_FORCE_MODE);
+
/* Disable forwarding by default on all ports */
mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
PCR_MATRIX_CLR);
@@ -2305,9 +2275,7 @@ mt7530_setup(struct dsa_switch *ds)
mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
if (dsa_is_cpu_port(ds, i)) {
- ret = mt753x_cpu_port_enable(ds, i);
- if (ret)
- return ret;
+ mt753x_cpu_port_enable(ds, i);
} else {
mt7530_port_disable(ds, i);
@@ -2326,16 +2294,13 @@ mt7530_setup(struct dsa_switch *ds)
return ret;
/* Setup port 5 */
- priv->p5_intf_sel = P5_DISABLED;
- interface = PHY_INTERFACE_MODE_NA;
-
if (!dsa_is_unused_port(ds, 5)) {
priv->p5_intf_sel = P5_INTF_SEL_GMAC5;
- ret = of_get_phy_mode(dsa_to_port(ds, 5)->dn, &interface);
- if (ret && ret != -ENODEV)
- return ret;
} else {
- /* Scan the ethernet nodes. look for GMAC1, lookup used phy */
+ /* Scan the ethernet nodes. Look for GMAC1, lookup the used PHY.
+ * Set priv->p5_intf_sel to the appropriate value if PHY muxing
+ * is detected.
+ */
for_each_child_of_node(dn, mac_np) {
if (!of_device_is_compatible(mac_np,
"mediatek,eth-mac"))
@@ -2366,6 +2331,10 @@ mt7530_setup(struct dsa_switch *ds)
of_node_put(phy_node);
break;
}
+
+ if (priv->p5_intf_sel == P5_INTF_SEL_PHY_P0 ||
+ priv->p5_intf_sel == P5_INTF_SEL_PHY_P4)
+ mt7530_setup_port5(ds, interface);
}
#ifdef CONFIG_GPIOLIB
@@ -2376,8 +2345,6 @@ mt7530_setup(struct dsa_switch *ds)
}
#endif /* CONFIG_GPIOLIB */
- mt7530_setup_port5(ds, interface);
-
/* Flush the FDB table */
ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
if (ret < 0)
@@ -2402,6 +2369,12 @@ mt7531_setup_common(struct dsa_switch *ds)
UNU_FFP_MASK);
for (i = 0; i < MT7530_NUM_PORTS; i++) {
+ /* Clear link settings and enable force mode to force link down
+ * on all ports until they're enabled later.
+ */
+ mt7530_rmw(priv, MT7530_PMCR_P(i), PMCR_LINK_SETTINGS_MASK |
+ MT7531_FORCE_MODE, MT7531_FORCE_MODE);
+
/* Disable forwarding by default on all ports */
mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
PCR_MATRIX_CLR);
@@ -2412,9 +2385,7 @@ mt7531_setup_common(struct dsa_switch *ds)
mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR);
if (dsa_is_cpu_port(ds, i)) {
- ret = mt753x_cpu_port_enable(ds, i);
- if (ret)
- return ret;
+ mt753x_cpu_port_enable(ds, i);
} else {
mt7530_port_disable(ds, i);
@@ -2474,38 +2445,35 @@ mt7531_setup(struct dsa_switch *ds)
return -ENODEV;
}
- /* all MACs must be forced link-down before sw reset */
+ /* MT7531AE has got two SGMII units. One for port 5, one for port 6.
+ * MT7531BE has got only one SGMII unit which is for port 6.
+ */
+ val = mt7530_read(priv, MT7531_TOP_SIG_SR);
+ priv->p5_sgmii = !!(val & PAD_DUAL_SGMII_EN);
+
+ /* Force link down on all ports before internal reset */
for (i = 0; i < MT7530_NUM_PORTS; i++)
mt7530_write(priv, MT7530_PMCR_P(i), MT7531_FORCE_LNK);
/* Reset the switch through internal reset */
- mt7530_write(priv, MT7530_SYS_CTRL,
- SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
- SYS_CTRL_REG_RST);
-
- mt7531_pll_setup(priv);
-
- if (mt7531_dual_sgmii_supported(priv)) {
- priv->p5_intf_sel = P5_INTF_SEL_GMAC5_SGMII;
+ mt7530_write(priv, MT7530_SYS_CTRL, SYS_CTRL_SW_RST | SYS_CTRL_REG_RST);
+ if (!priv->p5_sgmii) {
+ mt7531_pll_setup(priv);
+ } else {
/* Let ds->user_mii_bus be able to access external phy. */
mt7530_rmw(priv, MT7531_GPIO_MODE1, MT7531_GPIO11_RG_RXD2_MASK,
MT7531_EXT_P_MDC_11);
mt7530_rmw(priv, MT7531_GPIO_MODE1, MT7531_GPIO12_RG_RXD3_MASK,
MT7531_EXT_P_MDIO_12);
- } else {
- priv->p5_intf_sel = P5_INTF_SEL_GMAC5;
}
- dev_dbg(ds->dev, "P5 support %s interface\n",
- p5_intf_modes(priv->p5_intf_sel));
+
+ if (!dsa_is_unused_port(ds, 5))
+ priv->p5_intf_sel = P5_INTF_SEL_GMAC5;
mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK,
MT7531_GPIO0_INTERRUPT);
- /* Let phylink decide the interface later. */
- priv->p5_interface = PHY_INTERFACE_MODE_NA;
- priv->p6_interface = PHY_INTERFACE_MODE_NA;
-
/* Enable PHY core PLL, since phy_device has not yet been created
* provided for phy_[read,write]_mmd_indirect is called, we provide
* our own mt7531_ind_mmd_phy_[read,write] to complete this
@@ -2535,12 +2503,14 @@ static void mt7530_mac_port_get_caps(struct dsa_switch *ds, int port,
struct phylink_config *config)
{
switch (port) {
- case 0 ... 4: /* Internal phy */
+ /* Ports which are connected to switch PHYs. There is no MII pinout. */
+ case 0 ... 4:
__set_bit(PHY_INTERFACE_MODE_GMII,
config->supported_interfaces);
break;
- case 5: /* 2nd cpu port with phy of port 0 or 4 / external phy */
+ /* Port 5 supports rgmii with delays, mii, and gmii. */
+ case 5:
phy_interface_set_rgmii(config->supported_interfaces);
__set_bit(PHY_INTERFACE_MODE_MII,
config->supported_interfaces);
@@ -2548,7 +2518,8 @@ static void mt7530_mac_port_get_caps(struct dsa_switch *ds, int port,
config->supported_interfaces);
break;
- case 6: /* 1st cpu port */
+ /* Port 6 supports rgmii and trgmii. */
+ case 6:
__set_bit(PHY_INTERFACE_MODE_RGMII,
config->supported_interfaces);
__set_bit(PHY_INTERFACE_MODE_TRGMII,
@@ -2557,30 +2528,30 @@ static void mt7530_mac_port_get_caps(struct dsa_switch *ds, int port,
}
}
-static bool mt7531_is_rgmii_port(struct mt7530_priv *priv, u32 port)
-{
- return (port == 5) && (priv->p5_intf_sel != P5_INTF_SEL_GMAC5_SGMII);
-}
-
static void mt7531_mac_port_get_caps(struct dsa_switch *ds, int port,
struct phylink_config *config)
{
struct mt7530_priv *priv = ds->priv;
switch (port) {
- case 0 ... 4: /* Internal phy */
+ /* Ports which are connected to switch PHYs. There is no MII pinout. */
+ case 0 ... 4:
__set_bit(PHY_INTERFACE_MODE_GMII,
config->supported_interfaces);
break;
- case 5: /* 2nd cpu port supports either rgmii or sgmii/8023z */
- if (mt7531_is_rgmii_port(priv, port)) {
+ /* Port 5 supports rgmii with delays on MT7531BE, sgmii/802.3z on
+ * MT7531AE.
+ */
+ case 5:
+ if (!priv->p5_sgmii) {
phy_interface_set_rgmii(config->supported_interfaces);
break;
}
fallthrough;
- case 6: /* 1st cpu port supports sgmii/8023z only */
+ /* Port 6 supports sgmii/802.3z. */
+ case 6:
__set_bit(PHY_INTERFACE_MODE_SGMII,
config->supported_interfaces);
__set_bit(PHY_INTERFACE_MODE_1000BASEX,
@@ -2596,14 +2567,14 @@ static void mt7531_mac_port_get_caps(struct dsa_switch *ds, int port,
static void mt7988_mac_port_get_caps(struct dsa_switch *ds, int port,
struct phylink_config *config)
{
- phy_interface_zero(config->supported_interfaces);
-
switch (port) {
- case 0 ... 4: /* Internal phy */
+ /* Ports which are connected to switch PHYs. There is no MII pinout. */
+ case 0 ... 3:
__set_bit(PHY_INTERFACE_MODE_INTERNAL,
config->supported_interfaces);
break;
+ /* Port 6 is connected to SoC's XGMII MAC. There is no MII pinout. */
case 6:
__set_bit(PHY_INTERFACE_MODE_INTERNAL,
config->supported_interfaces);
@@ -2612,41 +2583,24 @@ static void mt7988_mac_port_get_caps(struct dsa_switch *ds, int port,
}
}
-static int
-mt753x_pad_setup(struct dsa_switch *ds, const struct phylink_link_state *state)
-{
- struct mt7530_priv *priv = ds->priv;
-
- return priv->info->pad_setup(ds, state->interface);
-}
-
-static int
+static void
mt7530_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
phy_interface_t interface)
{
struct mt7530_priv *priv = ds->priv;
- /* Only need to setup port5. */
- if (port != 5)
- return 0;
-
- mt7530_setup_port5(priv->ds, interface);
-
- return 0;
+ if (port == 5)
+ mt7530_setup_port5(priv->ds, interface);
+ else if (port == 6)
+ mt7530_setup_port6(priv->ds, interface);
}
-static int mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port,
- phy_interface_t interface,
- struct phy_device *phydev)
+static void mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port,
+ phy_interface_t interface,
+ struct phy_device *phydev)
{
u32 val;
- if (!mt7531_is_rgmii_port(priv, port)) {
- dev_err(priv->dev, "RGMII mode is not available for port %d\n",
- port);
- return -EINVAL;
- }
-
val = mt7530_read(priv, MT7531_CLKGEN_CTRL);
val |= GP_CLK_EN;
val &= ~GP_MODE_MASK;
@@ -2674,31 +2628,14 @@ static int mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port,
case PHY_INTERFACE_MODE_RGMII_ID:
break;
default:
- return -EINVAL;
+ break;
}
}
- mt7530_write(priv, MT7531_CLKGEN_CTRL, val);
-
- return 0;
-}
-
-static bool mt753x_is_mac_port(u32 port)
-{
- return (port == 5 || port == 6);
-}
-
-static int
-mt7988_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
- phy_interface_t interface)
-{
- if (dsa_is_cpu_port(ds, port) &&
- interface == PHY_INTERFACE_MODE_INTERNAL)
- return 0;
- return -EINVAL;
+ mt7530_write(priv, MT7531_CLKGEN_CTRL, val);
}
-static int
+static void
mt7531_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
phy_interface_t interface)
{
@@ -2706,39 +2643,11 @@ mt7531_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
struct phy_device *phydev;
struct dsa_port *dp;
- if (!mt753x_is_mac_port(port)) {
- dev_err(priv->dev, "port %d is not a MAC port\n", port);
- return -EINVAL;
- }
-
- switch (interface) {
- case PHY_INTERFACE_MODE_RGMII:
- case PHY_INTERFACE_MODE_RGMII_ID:
- case PHY_INTERFACE_MODE_RGMII_RXID:
- case PHY_INTERFACE_MODE_RGMII_TXID:
+ if (phy_interface_mode_is_rgmii(interface)) {
dp = dsa_to_port(ds, port);
phydev = dp->user->phydev;
- return mt7531_rgmii_setup(priv, port, interface, phydev);
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_NA:
- case PHY_INTERFACE_MODE_1000BASEX:
- case PHY_INTERFACE_MODE_2500BASEX:
- /* handled in SGMII PCS driver */
- return 0;
- default:
- return -EINVAL;
+ mt7531_rgmii_setup(priv, port, interface, phydev);
}
-
- return -EINVAL;
-}
-
-static int
-mt753x_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
- const struct phylink_link_state *state)
-{
- struct mt7530_priv *priv = ds->priv;
-
- return priv->info->mac_port_config(ds, port, mode, state->interface);
}
static struct phylink_pcs *
@@ -2764,54 +2673,13 @@ mt753x_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
const struct phylink_link_state *state)
{
struct mt7530_priv *priv = ds->priv;
- u32 mcr_cur, mcr_new;
-
- switch (port) {
- case 0 ... 4: /* Internal phy */
- if (state->interface != PHY_INTERFACE_MODE_GMII &&
- state->interface != PHY_INTERFACE_MODE_INTERNAL)
- goto unsupported;
- break;
- case 5: /* 2nd cpu port with phy of port 0 or 4 / external phy */
- if (priv->p5_interface == state->interface)
- break;
-
- if (mt753x_mac_config(ds, port, mode, state) < 0)
- goto unsupported;
-
- if (priv->p5_intf_sel != P5_DISABLED)
- priv->p5_interface = state->interface;
- break;
- case 6: /* 1st cpu port */
- if (priv->p6_interface == state->interface)
- break;
-
- mt753x_pad_setup(ds, state);
- if (mt753x_mac_config(ds, port, mode, state) < 0)
- goto unsupported;
-
- priv->p6_interface = state->interface;
- break;
- default:
-unsupported:
- dev_err(ds->dev, "%s: unsupported %s port: %i\n",
- __func__, phy_modes(state->interface), port);
- return;
- }
-
- mcr_cur = mt7530_read(priv, MT7530_PMCR_P(port));
- mcr_new = mcr_cur;
- mcr_new &= ~PMCR_LINK_SETTINGS_MASK;
- mcr_new |= PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | PMCR_BACKOFF_EN |
- PMCR_BACKPR_EN | PMCR_FORCE_MODE_ID(priv->id);
+ if ((port == 5 || port == 6) && priv->info->mac_port_config)
+ priv->info->mac_port_config(ds, port, mode, state->interface);
/* Are we connected to external phy */
if (port == 5 && dsa_is_user_port(ds, 5))
- mcr_new |= PMCR_EXT_PHY;
-
- if (mcr_new != mcr_cur)
- mt7530_write(priv, MT7530_PMCR_P(port), mcr_new);
+ mt7530_set(priv, MT7530_PMCR_P(port), PMCR_EXT_PHY);
}
static void mt753x_phylink_mac_link_down(struct dsa_switch *ds, int port,
@@ -2835,18 +2703,10 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
mcr = PMCR_RX_EN | PMCR_TX_EN | PMCR_FORCE_LNK;
- /* MT753x MAC works in 1G full duplex mode for all up-clocked
- * variants.
- */
- if (interface == PHY_INTERFACE_MODE_INTERNAL ||
- interface == PHY_INTERFACE_MODE_TRGMII ||
- (phy_interface_mode_is_8023z(interface))) {
- speed = SPEED_1000;
- duplex = DUPLEX_FULL;
- }
-
switch (speed) {
case SPEED_1000:
+ case SPEED_2500:
+ case SPEED_10000:
mcr |= PMCR_FORCE_SPEED_1000;
break;
case SPEED_100:
@@ -2864,6 +2724,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) {
switch (speed) {
case SPEED_1000:
+ case SPEED_2500:
mcr |= PMCR_FORCE_EEE1G;
break;
case SPEED_100:
@@ -2875,63 +2736,6 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
mt7530_set(priv, MT7530_PMCR_P(port), mcr);
}
-static int
-mt7531_cpu_port_config(struct dsa_switch *ds, int port)
-{
- struct mt7530_priv *priv = ds->priv;
- phy_interface_t interface;
- int speed;
- int ret;
-
- switch (port) {
- case 5:
- if (mt7531_is_rgmii_port(priv, port))
- interface = PHY_INTERFACE_MODE_RGMII;
- else
- interface = PHY_INTERFACE_MODE_2500BASEX;
-
- priv->p5_interface = interface;
- break;
- case 6:
- interface = PHY_INTERFACE_MODE_2500BASEX;
-
- priv->p6_interface = interface;
- break;
- default:
- return -EINVAL;
- }
-
- if (interface == PHY_INTERFACE_MODE_2500BASEX)
- speed = SPEED_2500;
- else
- speed = SPEED_1000;
-
- ret = mt7531_mac_config(ds, port, MLO_AN_FIXED, interface);
- if (ret)
- return ret;
- mt7530_write(priv, MT7530_PMCR_P(port),
- PMCR_CPU_PORT_SETTING(priv->id));
- mt753x_phylink_mac_link_up(ds, port, MLO_AN_FIXED, interface, NULL,
- speed, DUPLEX_FULL, true, true);
-
- return 0;
-}
-
-static int
-mt7988_cpu_port_config(struct dsa_switch *ds, int port)
-{
- struct mt7530_priv *priv = ds->priv;
-
- mt7530_write(priv, MT7530_PMCR_P(port),
- PMCR_CPU_PORT_SETTING(priv->id));
-
- mt753x_phylink_mac_link_up(ds, port, MLO_AN_FIXED,
- PHY_INTERFACE_MODE_INTERNAL, NULL,
- SPEED_10000, DUPLEX_FULL, true, true);
-
- return 0;
-}
-
static void mt753x_phylink_get_caps(struct dsa_switch *ds, int port,
struct phylink_config *config)
{
@@ -3014,17 +2818,9 @@ static int
mt753x_setup(struct dsa_switch *ds)
{
struct mt7530_priv *priv = ds->priv;
- int i, ret;
-
- /* Initialise the PCS devices */
- for (i = 0; i < priv->ds->num_ports; i++) {
- priv->pcs[i].pcs.ops = priv->info->pcs_ops;
- priv->pcs[i].pcs.neg_mode = true;
- priv->pcs[i].priv = priv;
- priv->pcs[i].port = i;
- }
+ int ret = priv->info->sw_setup(ds);
+ int i;
- ret = priv->info->sw_setup(ds);
if (ret)
return ret;
@@ -3036,8 +2832,16 @@ mt753x_setup(struct dsa_switch *ds)
if (ret && priv->irq)
mt7530_free_irq_common(priv);
+ /* Initialise the PCS devices */
+ for (i = 0; i < priv->ds->num_ports; i++) {
+ priv->pcs[i].pcs.ops = priv->info->pcs_ops;
+ priv->pcs[i].pcs.neg_mode = true;
+ priv->pcs[i].priv = priv;
+ priv->pcs[i].port = i;
+ }
+
if (priv->create_sgmii) {
- ret = priv->create_sgmii(priv, mt7531_dual_sgmii_supported(priv));
+ ret = priv->create_sgmii(priv);
if (ret && priv->irq)
mt7530_free_irq(priv);
}
@@ -3046,7 +2850,7 @@ mt753x_setup(struct dsa_switch *ds)
}
static int mt753x_get_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_eee *e)
+ struct ethtool_keee *e)
{
struct mt7530_priv *priv = ds->priv;
u32 eeecr = mt7530_read(priv, MT7530_PMEEECR_P(port));
@@ -3058,7 +2862,7 @@ static int mt753x_get_mac_eee(struct dsa_switch *ds, int port,
}
static int mt753x_set_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_eee *e)
+ struct ethtool_keee *e)
{
struct mt7530_priv *priv = ds->priv;
u32 set, mask = LPI_THRESH_MASK | LPI_MODE_EN;
@@ -3075,9 +2879,34 @@ static int mt753x_set_mac_eee(struct dsa_switch *ds, int port,
return 0;
}
-static int mt7988_pad_setup(struct dsa_switch *ds, phy_interface_t interface)
+static void
+mt753x_conduit_state_change(struct dsa_switch *ds,
+ const struct net_device *conduit,
+ bool operational)
{
- return 0;
+ struct dsa_port *cpu_dp = conduit->dsa_ptr;
+ struct mt7530_priv *priv = ds->priv;
+ int val = 0;
+ u8 mask;
+
+ /* Set the CPU port to trap frames to for MT7530. Trapped frames will be
+ * forwarded to the numerically smallest CPU port whose conduit
+ * interface is up.
+ */
+ if (priv->id != ID_MT7530 && priv->id != ID_MT7621)
+ return;
+
+ mask = BIT(cpu_dp->index);
+
+ if (operational)
+ priv->active_cpu_ports |= mask;
+ else
+ priv->active_cpu_ports &= ~mask;
+
+ if (priv->active_cpu_ports)
+ val = CPU_EN | CPU_PORT(__ffs(priv->active_cpu_ports));
+
+ mt7530_rmw(priv, MT7530_MFC, CPU_EN | CPU_PORT_MASK, val);
}
static int mt7988_setup(struct dsa_switch *ds)
@@ -3130,6 +2959,7 @@ const struct dsa_switch_ops mt7530_switch_ops = {
.phylink_mac_link_up = mt753x_phylink_mac_link_up,
.get_mac_eee = mt753x_get_mac_eee,
.set_mac_eee = mt753x_set_mac_eee,
+ .conduit_state_change = mt753x_conduit_state_change,
};
EXPORT_SYMBOL_GPL(mt7530_switch_ops);
@@ -3142,7 +2972,6 @@ const struct mt753x_info mt753x_table[] = {
.phy_write_c22 = mt7530_phy_write_c22,
.phy_read_c45 = mt7530_phy_read_c45,
.phy_write_c45 = mt7530_phy_write_c45,
- .pad_setup = mt7530_pad_clk_setup,
.mac_port_get_caps = mt7530_mac_port_get_caps,
.mac_port_config = mt7530_mac_config,
},
@@ -3154,7 +2983,6 @@ const struct mt753x_info mt753x_table[] = {
.phy_write_c22 = mt7530_phy_write_c22,
.phy_read_c45 = mt7530_phy_read_c45,
.phy_write_c45 = mt7530_phy_write_c45,
- .pad_setup = mt7530_pad_clk_setup,
.mac_port_get_caps = mt7530_mac_port_get_caps,
.mac_port_config = mt7530_mac_config,
},
@@ -3166,8 +2994,6 @@ const struct mt753x_info mt753x_table[] = {
.phy_write_c22 = mt7531_ind_c22_phy_write,
.phy_read_c45 = mt7531_ind_c45_phy_read,
.phy_write_c45 = mt7531_ind_c45_phy_write,
- .pad_setup = mt7531_pad_setup,
- .cpu_port_config = mt7531_cpu_port_config,
.mac_port_get_caps = mt7531_mac_port_get_caps,
.mac_port_config = mt7531_mac_config,
},
@@ -3179,10 +3005,7 @@ const struct mt753x_info mt753x_table[] = {
.phy_write_c22 = mt7531_ind_c22_phy_write,
.phy_read_c45 = mt7531_ind_c45_phy_read,
.phy_write_c45 = mt7531_ind_c45_phy_write,
- .pad_setup = mt7988_pad_setup,
- .cpu_port_config = mt7988_cpu_port_config,
.mac_port_get_caps = mt7988_mac_port_get_caps,
- .mac_port_config = mt7988_mac_config,
},
};
EXPORT_SYMBOL_GPL(mt753x_table);
@@ -3209,10 +3032,8 @@ mt7530_probe_common(struct mt7530_priv *priv)
/* Sanity check if these required device operations are filled
* properly.
*/
- if (!priv->info->sw_setup || !priv->info->pad_setup ||
- !priv->info->phy_read_c22 || !priv->info->phy_write_c22 ||
- !priv->info->mac_port_get_caps ||
- !priv->info->mac_port_config)
+ if (!priv->info->sw_setup || !priv->info->phy_read_c22 ||
+ !priv->info->phy_write_c22 || !priv->info->mac_port_get_caps)
return -EINVAL;
priv->id = priv->info->id;
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 17e42d30fff4..a71166e0a7fc 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -41,8 +41,8 @@ enum mt753x_id {
#define UNU_FFP(x) (((x) & 0xff) << 8)
#define UNU_FFP_MASK UNU_FFP(~0)
#define CPU_EN BIT(7)
-#define CPU_PORT(x) ((x) << 4)
-#define CPU_MASK (0xf << 4)
+#define CPU_PORT_MASK GENMASK(6, 4)
+#define CPU_PORT(x) FIELD_PREP(CPU_PORT_MASK, x)
#define MIRROR_EN BIT(3)
#define MIRROR_PORT(x) ((x) & 0x7)
#define MIRROR_MASK 0x7
@@ -304,20 +304,11 @@ enum mt7530_vlan_port_acc_frm {
MT7531_FORCE_DPX | \
MT7531_FORCE_RX_FC | \
MT7531_FORCE_TX_FC)
-#define PMCR_FORCE_MODE_ID(id) ((((id) == ID_MT7531) || ((id) == ID_MT7988)) ? \
- MT7531_FORCE_MODE : PMCR_FORCE_MODE)
#define PMCR_LINK_SETTINGS_MASK (PMCR_TX_EN | PMCR_FORCE_SPEED_1000 | \
PMCR_RX_EN | PMCR_FORCE_SPEED_100 | \
PMCR_TX_FC_EN | PMCR_RX_FC_EN | \
PMCR_FORCE_FDX | PMCR_FORCE_LNK | \
PMCR_FORCE_EEE1G | PMCR_FORCE_EEE100)
-#define PMCR_CPU_PORT_SETTING(id) (PMCR_FORCE_MODE_ID((id)) | \
- PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | \
- PMCR_BACKOFF_EN | PMCR_BACKPR_EN | \
- PMCR_TX_EN | PMCR_RX_EN | \
- PMCR_TX_FC_EN | PMCR_RX_FC_EN | \
- PMCR_FORCE_SPEED_1000 | \
- PMCR_FORCE_FDX | PMCR_FORCE_LNK)
#define MT7530_PMEEECR_P(x) (0x3004 + (x) * 0x100)
#define WAKEUP_TIME_1000(x) (((x) & 0xFF) << 24)
@@ -683,11 +674,10 @@ struct mt7530_port {
/* Port 5 interface select definitions */
enum p5_interface_select {
- P5_DISABLED = 0,
+ P5_DISABLED,
P5_INTF_SEL_PHY_P0,
P5_INTF_SEL_PHY_P4,
P5_INTF_SEL_GMAC5,
- P5_INTF_SEL_GMAC5_SGMII,
};
struct mt7530_priv;
@@ -705,8 +695,6 @@ struct mt753x_pcs {
* @phy_write_c22: Holding the way writing PHY port using C22
* @phy_read_c45: Holding the way reading PHY port using C45
* @phy_write_c45: Holding the way writing PHY port using C45
- * @pad_setup: Holding the way setting up the bus pad for a certain
- * MAC port
* @phy_mode_supported: Check if the PHY type is being supported on a certain
* port
* @mac_port_validate: Holding the way to set addition validate type for a
@@ -727,16 +715,14 @@ struct mt753x_info {
int regnum);
int (*phy_write_c45)(struct mt7530_priv *priv, int port, int devad,
int regnum, u16 val);
- int (*pad_setup)(struct dsa_switch *ds, phy_interface_t interface);
- int (*cpu_port_config)(struct dsa_switch *ds, int port);
void (*mac_port_get_caps)(struct dsa_switch *ds, int port,
struct phylink_config *config);
void (*mac_port_validate)(struct dsa_switch *ds, int port,
phy_interface_t interface,
unsigned long *supported);
- int (*mac_port_config)(struct dsa_switch *ds, int port,
- unsigned int mode,
- phy_interface_t interface);
+ void (*mac_port_config)(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface);
};
/* struct mt7530_priv - This is the main data structure for holding the state
@@ -754,12 +740,14 @@ struct mt753x_info {
* @ports: Holding the state among ports
* @reg_mutex: The lock for protecting among process accessing
* registers
- * @p6_interface Holding the current port 6 interface
* @p5_intf_sel: Holding the current port 5 interface select
+ * @p5_sgmii: Flag for distinguishing if port 5 of the MT7531 switch
+ * has got SGMII
* @irq: IRQ number of the switch
* @irq_domain: IRQ domain of the switch irq_chip
* @irq_enable: IRQ enable bits, synced to SYS_INT_EN
* @create_sgmii: Pointer to function creating SGMII PCS instance(s)
+ * @active_cpu_ports: Holding the active CPU ports
*/
struct mt7530_priv {
struct device *dev;
@@ -773,9 +761,8 @@ struct mt7530_priv {
const struct mt753x_info *info;
unsigned int id;
bool mcm;
- phy_interface_t p6_interface;
- phy_interface_t p5_interface;
- unsigned int p5_intf_sel;
+ enum p5_interface_select p5_intf_sel;
+ bool p5_sgmii;
u8 mirror_rx;
u8 mirror_tx;
struct mt7530_port ports[MT7530_NUM_PORTS];
@@ -785,7 +772,8 @@ struct mt7530_priv {
int irq;
struct irq_domain *irq_domain;
u32 irq_enable;
- int (*create_sgmii)(struct mt7530_priv *priv, bool dual_sgmii);
+ int (*create_sgmii)(struct mt7530_priv *priv);
+ u8 active_cpu_ports;
};
struct mt7530_hw_vlan_entry {
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 383b3c4d6f59..9ed1821184ec 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1451,14 +1451,14 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
}
static int mv88e6xxx_get_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_eee *e)
+ struct ethtool_keee *e)
{
/* Nothing to do on the port's MAC */
return 0;
}
static int mv88e6xxx_set_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_eee *e)
+ struct ethtool_keee *e)
{
/* Nothing to do on the port's MAC */
return 0;
@@ -3659,7 +3659,7 @@ static int mv88e6xxx_mdio_read_c45(struct mii_bus *bus, int phy, int devad,
int err;
if (!chip->info->ops->phy_read_c45)
- return -EOPNOTSUPP;
+ return -ENODEV;
mv88e6xxx_reg_lock(chip);
err = chip->info->ops->phy_read_c45(chip, bus, phy, devad, reg, &val);
@@ -3712,7 +3712,10 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
if (external) {
mv88e6xxx_reg_lock(chip);
- err = mv88e6xxx_g2_scratch_gpio_set_smi(chip, true);
+ if (chip->info->family == MV88E6XXX_FAMILY_6393)
+ err = mv88e6393x_g2_scratch_gpio_set_smi(chip, true);
+ else
+ err = mv88e6390_g2_scratch_gpio_set_smi(chip, true);
mv88e6xxx_reg_unlock(chip);
if (err)
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index d9434f7cae53..82f9b410de0b 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -378,8 +378,10 @@ extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops;
extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
-int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+int mv88e6390_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
bool external);
+int mv88e6393x_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+ bool external);
int mv88e6352_g2_scratch_port_has_serdes(struct mv88e6xxx_chip *chip, int port);
int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip, u16 kind, u16 bin);
int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip, u16 *stats);
diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
index a9d6e40321a2..61ab6cc4fbfc 100644
--- a/drivers/net/dsa/mv88e6xxx/global2_scratch.c
+++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
@@ -240,7 +240,7 @@ const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
};
/**
- * mv88e6xxx_g2_scratch_gpio_set_smi - set gpio muxing for external smi
+ * mv88e6390_g2_scratch_gpio_set_smi - set gpio muxing for external smi
* @chip: chip private data
* @external: set mux for external smi, or free for gpio usage
*
@@ -248,7 +248,7 @@ const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
* an external SMI interface, or they may be made free for other
* GPIO uses.
*/
-int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+int mv88e6390_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
bool external)
{
int misc_cfg = MV88E6352_G2_SCRATCH_MISC_CFG;
@@ -291,6 +291,37 @@ int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
}
/**
+ * mv88e6393x_g2_scratch_gpio_set_smi - set gpio muxing for external smi
+ * @chip: chip private data
+ * @external: set mux for external smi, or free for gpio usage
+ *
+ * MV88E6191X/6193X/6393X GPIO pins 9 and 10 can be configured as an
+ * external SMI interface or as regular GPIO-s.
+ *
+ * They however have a different register layout then the existing
+ * function.
+ */
+
+int mv88e6393x_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+ bool external)
+{
+ int misc_cfg = MV88E6352_G2_SCRATCH_MISC_CFG;
+ int err;
+ u8 val;
+
+ err = mv88e6xxx_g2_scratch_read(chip, misc_cfg, &val);
+ if (err)
+ return err;
+
+ if (external)
+ val &= ~MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+ else
+ val |= MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+
+ return mv88e6xxx_g2_scratch_write(chip, misc_cfg, val);
+}
+
+/**
* mv88e6352_g2_scratch_port_has_serdes - indicate if a port can have a serdes
* @chip: chip private data
* @port: port number to check for serdes
diff --git a/drivers/net/dsa/mv88e6xxx/pcs-6185.c b/drivers/net/dsa/mv88e6xxx/pcs-6185.c
index 4d677f836807..5a27d047a38e 100644
--- a/drivers/net/dsa/mv88e6xxx/pcs-6185.c
+++ b/drivers/net/dsa/mv88e6xxx/pcs-6185.c
@@ -95,7 +95,7 @@ static void mv88e6185_pcs_get_state(struct phylink_pcs *pcs,
}
}
-static int mv88e6185_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+static int mv88e6185_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
phy_interface_t interface,
const unsigned long *advertising,
bool permit_pause_to_mac)
@@ -137,6 +137,7 @@ static int mv88e6185_pcs_init(struct mv88e6xxx_chip *chip, int port)
mpcs->chip = chip;
mpcs->port = port;
mpcs->phylink_pcs.ops = &mv88e6185_phylink_pcs_ops;
+ mpcs->phylink_pcs.neg_mode = true;
irq = mv88e6xxx_serdes_irq_mapping(chip, port);
if (irq) {
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
index c51f40960961..dab66c0c6f64 100644
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -950,15 +950,15 @@ qca8k_mdio_register(struct qca8k_priv *priv)
struct device *dev = ds->dev;
struct device_node *mdio;
struct mii_bus *bus;
- int err = 0;
+ int ret = 0;
mdio = of_get_child_by_name(dev->of_node, "mdio");
if (mdio && !of_device_is_available(mdio))
- goto out;
+ goto out_put_node;
bus = devm_mdiobus_alloc(dev);
if (!bus) {
- err = -ENOMEM;
+ ret = -ENOMEM;
goto out_put_node;
}
@@ -984,12 +984,11 @@ qca8k_mdio_register(struct qca8k_priv *priv)
bus->write = qca8k_legacy_mdio_write;
}
- err = devm_of_mdiobus_register(dev, bus, mdio);
+ ret = devm_of_mdiobus_register(dev, bus, mdio);
out_put_node:
of_node_put(mdio);
-out:
- return err;
+ return ret;
}
static int
@@ -998,7 +997,7 @@ qca8k_setup_mdio_bus(struct qca8k_priv *priv)
u32 internal_mdio_mask = 0, external_mdio_mask = 0, reg;
struct device_node *ports, *port;
phy_interface_t mode;
- int err;
+ int ret;
ports = of_get_child_by_name(priv->dev->of_node, "ports");
if (!ports)
@@ -1008,11 +1007,11 @@ qca8k_setup_mdio_bus(struct qca8k_priv *priv)
return -EINVAL;
for_each_available_child_of_node(ports, port) {
- err = of_property_read_u32(port, "reg", &reg);
- if (err) {
+ ret = of_property_read_u32(port, "reg", &reg);
+ if (ret) {
of_node_put(port);
of_node_put(ports);
- return err;
+ return ret;
}
if (!dsa_is_user_port(priv->ds, reg))
@@ -2051,12 +2050,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
priv->info = of_device_get_match_data(priv->dev);
priv->reset_gpio = devm_gpiod_get_optional(priv->dev, "reset",
- GPIOD_ASIS);
+ GPIOD_OUT_HIGH);
if (IS_ERR(priv->reset_gpio))
return PTR_ERR(priv->reset_gpio);
if (priv->reset_gpio) {
- gpiod_set_value_cansleep(priv->reset_gpio, 1);
/* The active low duration must be greater than 10 ms
* and checkpatch.pl wants 20 ms.
*/
diff --git a/drivers/net/dsa/qca/qca8k-common.c b/drivers/net/dsa/qca/qca8k-common.c
index 2358cd399c7e..7f80035c5441 100644
--- a/drivers/net/dsa/qca/qca8k-common.c
+++ b/drivers/net/dsa/qca/qca8k-common.c
@@ -534,7 +534,7 @@ int qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset)
}
int qca8k_set_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_eee *eee)
+ struct ethtool_keee *eee)
{
u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port);
struct qca8k_priv *priv = ds->priv;
@@ -558,7 +558,7 @@ exit:
}
int qca8k_get_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_eee *e)
+ struct ethtool_keee *e)
{
/* Nothing to do on the port's MAC */
return 0;
diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h
index c8785c36c54e..2184d8d2d5a9 100644
--- a/drivers/net/dsa/qca/qca8k.h
+++ b/drivers/net/dsa/qca/qca8k.h
@@ -518,8 +518,8 @@ void qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
int qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset);
/* Common eee function */
-int qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee);
-int qca8k_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e);
+int qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *eee);
+int qca8k_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e);
/* Common bridge function */
void qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state);
diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig
index 060165a85fb7..6989972eebc3 100644
--- a/drivers/net/dsa/realtek/Kconfig
+++ b/drivers/net/dsa/realtek/Kconfig
@@ -16,37 +16,29 @@ menuconfig NET_DSA_REALTEK
if NET_DSA_REALTEK
config NET_DSA_REALTEK_MDIO
- tristate "Realtek MDIO interface driver"
+ bool "Realtek MDIO interface support"
depends on OF
- depends on NET_DSA_REALTEK_RTL8365MB || NET_DSA_REALTEK_RTL8366RB
- depends on NET_DSA_REALTEK_RTL8365MB || !NET_DSA_REALTEK_RTL8365MB
- depends on NET_DSA_REALTEK_RTL8366RB || !NET_DSA_REALTEK_RTL8366RB
help
Select to enable support for registering switches configured
through MDIO.
config NET_DSA_REALTEK_SMI
- tristate "Realtek SMI interface driver"
+ bool "Realtek SMI interface support"
depends on OF
- depends on NET_DSA_REALTEK_RTL8365MB || NET_DSA_REALTEK_RTL8366RB
- depends on NET_DSA_REALTEK_RTL8365MB || !NET_DSA_REALTEK_RTL8365MB
- depends on NET_DSA_REALTEK_RTL8366RB || !NET_DSA_REALTEK_RTL8366RB
help
Select to enable support for registering switches connected
through SMI.
config NET_DSA_REALTEK_RTL8365MB
- tristate "Realtek RTL8365MB switch subdriver"
- imply NET_DSA_REALTEK_SMI
- imply NET_DSA_REALTEK_MDIO
+ tristate "Realtek RTL8365MB switch driver"
+ depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO
select NET_DSA_TAG_RTL8_4
help
Select to enable support for Realtek RTL8365MB-VC and RTL8367S.
config NET_DSA_REALTEK_RTL8366RB
- tristate "Realtek RTL8366RB switch subdriver"
- imply NET_DSA_REALTEK_SMI
- imply NET_DSA_REALTEK_MDIO
+ tristate "Realtek RTL8366RB switch driver"
+ depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO
select NET_DSA_TAG_RTL4_A
help
Select to enable support for Realtek RTL8366RB.
diff --git a/drivers/net/dsa/realtek/Makefile b/drivers/net/dsa/realtek/Makefile
index 0aab57252a7c..35491dc20d6d 100644
--- a/drivers/net/dsa/realtek/Makefile
+++ b/drivers/net/dsa/realtek/Makefile
@@ -1,6 +1,15 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_NET_DSA_REALTEK_MDIO) += realtek-mdio.o
-obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o
+obj-$(CONFIG_NET_DSA_REALTEK) += realtek_dsa.o
+realtek_dsa-objs := rtl83xx.o
+
+ifdef CONFIG_NET_DSA_REALTEK_MDIO
+realtek_dsa-objs += realtek-mdio.o
+endif
+
+ifdef CONFIG_NET_DSA_REALTEK_SMI
+realtek_dsa-objs += realtek-smi.o
+endif
+
obj-$(CONFIG_NET_DSA_REALTEK_RTL8366RB) += rtl8366.o
rtl8366-objs := rtl8366-core.o rtl8366rb.o
obj-$(CONFIG_NET_DSA_REALTEK_RTL8365MB) += rtl8365mb.o
diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c
index 292e6d087e8b..04b758e5a680 100644
--- a/drivers/net/dsa/realtek/realtek-mdio.c
+++ b/drivers/net/dsa/realtek/realtek-mdio.c
@@ -25,6 +25,8 @@
#include <linux/regmap.h>
#include "realtek.h"
+#include "realtek-mdio.h"
+#include "rtl83xx.h"
/* Read/write via mdiobus */
#define REALTEK_MDIO_CTRL0_REG 31
@@ -99,192 +101,87 @@ out_unlock:
return ret;
}
-static void realtek_mdio_lock(void *ctx)
-{
- struct realtek_priv *priv = ctx;
-
- mutex_lock(&priv->map_lock);
-}
-
-static void realtek_mdio_unlock(void *ctx)
-{
- struct realtek_priv *priv = ctx;
-
- mutex_unlock(&priv->map_lock);
-}
-
-static const struct regmap_config realtek_mdio_regmap_config = {
- .reg_bits = 10, /* A4..A0 R4..R0 */
- .val_bits = 16,
- .reg_stride = 1,
- /* PHY regs are at 0x8000 */
- .max_register = 0xffff,
- .reg_format_endian = REGMAP_ENDIAN_BIG,
+static const struct realtek_interface_info realtek_mdio_info = {
.reg_read = realtek_mdio_read,
.reg_write = realtek_mdio_write,
- .cache_type = REGCACHE_NONE,
- .lock = realtek_mdio_lock,
- .unlock = realtek_mdio_unlock,
};
-static const struct regmap_config realtek_mdio_nolock_regmap_config = {
- .reg_bits = 10, /* A4..A0 R4..R0 */
- .val_bits = 16,
- .reg_stride = 1,
- /* PHY regs are at 0x8000 */
- .max_register = 0xffff,
- .reg_format_endian = REGMAP_ENDIAN_BIG,
- .reg_read = realtek_mdio_read,
- .reg_write = realtek_mdio_write,
- .cache_type = REGCACHE_NONE,
- .disable_locking = true,
-};
-
-static int realtek_mdio_probe(struct mdio_device *mdiodev)
+/**
+ * realtek_mdio_probe() - Probe a platform device for an MDIO-connected switch
+ * @mdiodev: mdio_device to probe on.
+ *
+ * This function should be used as the .probe in an mdio_driver. After
+ * calling the common probe function for both interfaces, it initializes the
+ * values specific for MDIO-connected devices. Finally, it calls a common
+ * function to register the DSA switch.
+ *
+ * Context: Can sleep. Takes and releases priv->map_lock.
+ * Return: Returns 0 on success, a negative error on failure.
+ */
+int realtek_mdio_probe(struct mdio_device *mdiodev)
{
- struct realtek_priv *priv;
struct device *dev = &mdiodev->dev;
- const struct realtek_variant *var;
- struct regmap_config rc;
- struct device_node *np;
+ struct realtek_priv *priv;
int ret;
- var = of_device_get_match_data(dev);
- if (!var)
- return -EINVAL;
-
- priv = devm_kzalloc(&mdiodev->dev,
- size_add(sizeof(*priv), var->chip_data_sz),
- GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
-
- mutex_init(&priv->map_lock);
+ priv = rtl83xx_probe(dev, &realtek_mdio_info);
+ if (IS_ERR(priv))
+ return PTR_ERR(priv);
- rc = realtek_mdio_regmap_config;
- rc.lock_arg = priv;
- priv->map = devm_regmap_init(dev, NULL, priv, &rc);
- if (IS_ERR(priv->map)) {
- ret = PTR_ERR(priv->map);
- dev_err(dev, "regmap init failed: %d\n", ret);
- return ret;
- }
-
- rc = realtek_mdio_nolock_regmap_config;
- priv->map_nolock = devm_regmap_init(dev, NULL, priv, &rc);
- if (IS_ERR(priv->map_nolock)) {
- ret = PTR_ERR(priv->map_nolock);
- dev_err(dev, "regmap init failed: %d\n", ret);
- return ret;
- }
-
- priv->mdio_addr = mdiodev->addr;
priv->bus = mdiodev->bus;
- priv->dev = &mdiodev->dev;
- priv->chip_data = (void *)priv + sizeof(*priv);
-
- priv->clk_delay = var->clk_delay;
- priv->cmd_read = var->cmd_read;
- priv->cmd_write = var->cmd_write;
- priv->ops = var->ops;
-
+ priv->mdio_addr = mdiodev->addr;
priv->write_reg_noack = realtek_mdio_write;
- np = dev->of_node;
-
- dev_set_drvdata(dev, priv);
-
- /* TODO: if power is software controlled, set up any regulators here */
- priv->leds_disabled = of_property_read_bool(np, "realtek,disable-leds");
-
- priv->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
- if (IS_ERR(priv->reset)) {
- dev_err(dev, "failed to get RESET GPIO\n");
- return PTR_ERR(priv->reset);
- }
-
- if (priv->reset) {
- gpiod_set_value(priv->reset, 1);
- dev_dbg(dev, "asserted RESET\n");
- msleep(REALTEK_HW_STOP_DELAY);
- gpiod_set_value(priv->reset, 0);
- msleep(REALTEK_HW_START_DELAY);
- dev_dbg(dev, "deasserted RESET\n");
- }
-
- ret = priv->ops->detect(priv);
- if (ret) {
- dev_err(dev, "unable to detect switch\n");
- return ret;
- }
-
- priv->ds = devm_kzalloc(dev, sizeof(*priv->ds), GFP_KERNEL);
- if (!priv->ds)
- return -ENOMEM;
-
- priv->ds->dev = dev;
- priv->ds->num_ports = priv->num_ports;
- priv->ds->priv = priv;
- priv->ds->ops = var->ds_ops_mdio;
-
- ret = dsa_register_switch(priv->ds);
+ ret = rtl83xx_register_switch(priv);
if (ret) {
- dev_err(priv->dev, "unable to register switch ret = %d\n", ret);
+ rtl83xx_remove(priv);
return ret;
}
return 0;
}
+EXPORT_SYMBOL_NS_GPL(realtek_mdio_probe, REALTEK_DSA);
-static void realtek_mdio_remove(struct mdio_device *mdiodev)
+/**
+ * realtek_mdio_remove() - Remove the driver of an MDIO-connected switch
+ * @mdiodev: mdio_device to be removed.
+ *
+ * This function should be used as the .remove_new in an mdio_driver. First
+ * it unregisters the DSA switch and then it calls the common remove function.
+ *
+ * Context: Can sleep.
+ * Return: Nothing.
+ */
+void realtek_mdio_remove(struct mdio_device *mdiodev)
{
struct realtek_priv *priv = dev_get_drvdata(&mdiodev->dev);
if (!priv)
return;
- dsa_unregister_switch(priv->ds);
+ rtl83xx_unregister_switch(priv);
- /* leave the device reset asserted */
- if (priv->reset)
- gpiod_set_value(priv->reset, 1);
+ rtl83xx_remove(priv);
}
+EXPORT_SYMBOL_NS_GPL(realtek_mdio_remove, REALTEK_DSA);
-static void realtek_mdio_shutdown(struct mdio_device *mdiodev)
+/**
+ * realtek_mdio_shutdown() - Shutdown the driver of a MDIO-connected switch
+ * @mdiodev: mdio_device shutting down.
+ *
+ * This function should be used as the .shutdown in a platform_driver. It calls
+ * the common shutdown function.
+ *
+ * Context: Can sleep.
+ * Return: Nothing.
+ */
+void realtek_mdio_shutdown(struct mdio_device *mdiodev)
{
struct realtek_priv *priv = dev_get_drvdata(&mdiodev->dev);
if (!priv)
return;
- dsa_switch_shutdown(priv->ds);
-
- dev_set_drvdata(&mdiodev->dev, NULL);
+ rtl83xx_shutdown(priv);
}
-
-static const struct of_device_id realtek_mdio_of_match[] = {
-#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB)
- { .compatible = "realtek,rtl8366rb", .data = &rtl8366rb_variant, },
-#endif
-#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB)
- { .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, },
-#endif
- { /* sentinel */ },
-};
-MODULE_DEVICE_TABLE(of, realtek_mdio_of_match);
-
-static struct mdio_driver realtek_mdio_driver = {
- .mdiodrv.driver = {
- .name = "realtek-mdio",
- .of_match_table = realtek_mdio_of_match,
- },
- .probe = realtek_mdio_probe,
- .remove = realtek_mdio_remove,
- .shutdown = realtek_mdio_shutdown,
-};
-
-mdio_module_driver(realtek_mdio_driver);
-
-MODULE_AUTHOR("Luiz Angelo Daros de Luca <luizluca@gmail.com>");
-MODULE_DESCRIPTION("Driver for Realtek ethernet switch connected via MDIO interface");
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL_NS_GPL(realtek_mdio_shutdown, REALTEK_DSA);
diff --git a/drivers/net/dsa/realtek/realtek-mdio.h b/drivers/net/dsa/realtek/realtek-mdio.h
new file mode 100644
index 000000000000..ee70f6a5b8ff
--- /dev/null
+++ b/drivers/net/dsa/realtek/realtek-mdio.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _REALTEK_MDIO_H
+#define _REALTEK_MDIO_H
+
+#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_MDIO)
+
+static inline int realtek_mdio_driver_register(struct mdio_driver *drv)
+{
+ return mdio_driver_register(drv);
+}
+
+static inline void realtek_mdio_driver_unregister(struct mdio_driver *drv)
+{
+ mdio_driver_unregister(drv);
+}
+
+int realtek_mdio_probe(struct mdio_device *mdiodev);
+void realtek_mdio_remove(struct mdio_device *mdiodev);
+void realtek_mdio_shutdown(struct mdio_device *mdiodev);
+
+#else /* IS_ENABLED(CONFIG_NET_DSA_REALTEK_MDIO) */
+
+static inline int realtek_mdio_driver_register(struct mdio_driver *drv)
+{
+ return 0;
+}
+
+static inline void realtek_mdio_driver_unregister(struct mdio_driver *drv)
+{
+}
+
+static inline int realtek_mdio_probe(struct mdio_device *mdiodev)
+{
+ return -ENOENT;
+}
+
+static inline void realtek_mdio_remove(struct mdio_device *mdiodev)
+{
+}
+
+static inline void realtek_mdio_shutdown(struct mdio_device *mdiodev)
+{
+}
+
+#endif /* IS_ENABLED(CONFIG_NET_DSA_REALTEK_MDIO) */
+
+#endif /* _REALTEK_MDIO_H */
diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c
index 755546ed8db6..88590ae95a75 100644
--- a/drivers/net/dsa/realtek/realtek-smi.c
+++ b/drivers/net/dsa/realtek/realtek-smi.c
@@ -31,7 +31,6 @@
#include <linux/spinlock.h>
#include <linux/skbuff.h>
#include <linux/of.h>
-#include <linux/of_mdio.h>
#include <linux/delay.h>
#include <linux/gpio/consumer.h>
#include <linux/platform_device.h>
@@ -40,12 +39,14 @@
#include <linux/if_bridge.h>
#include "realtek.h"
+#include "realtek-smi.h"
+#include "rtl83xx.h"
#define REALTEK_SMI_ACK_RETRY_COUNT 5
static inline void realtek_smi_clk_delay(struct realtek_priv *priv)
{
- ndelay(priv->clk_delay);
+ ndelay(priv->variant->clk_delay);
}
static void realtek_smi_start(struct realtek_priv *priv)
@@ -208,7 +209,7 @@ static int realtek_smi_read_reg(struct realtek_priv *priv, u32 addr, u32 *data)
realtek_smi_start(priv);
/* Send READ command */
- ret = realtek_smi_write_byte(priv, priv->cmd_read);
+ ret = realtek_smi_write_byte(priv, priv->variant->cmd_read);
if (ret)
goto out;
@@ -249,7 +250,7 @@ static int realtek_smi_write_reg(struct realtek_priv *priv,
realtek_smi_start(priv);
/* Send WRITE command */
- ret = realtek_smi_write_byte(priv, priv->cmd_write);
+ ret = realtek_smi_write_byte(priv, priv->variant->cmd_write);
if (ret)
goto out;
@@ -310,258 +311,98 @@ static int realtek_smi_read(void *ctx, u32 reg, u32 *val)
return realtek_smi_read_reg(priv, reg, val);
}
-static void realtek_smi_lock(void *ctx)
-{
- struct realtek_priv *priv = ctx;
-
- mutex_lock(&priv->map_lock);
-}
-
-static void realtek_smi_unlock(void *ctx)
-{
- struct realtek_priv *priv = ctx;
-
- mutex_unlock(&priv->map_lock);
-}
-
-static const struct regmap_config realtek_smi_regmap_config = {
- .reg_bits = 10, /* A4..A0 R4..R0 */
- .val_bits = 16,
- .reg_stride = 1,
- /* PHY regs are at 0x8000 */
- .max_register = 0xffff,
- .reg_format_endian = REGMAP_ENDIAN_BIG,
+static const struct realtek_interface_info realtek_smi_info = {
.reg_read = realtek_smi_read,
.reg_write = realtek_smi_write,
- .cache_type = REGCACHE_NONE,
- .lock = realtek_smi_lock,
- .unlock = realtek_smi_unlock,
};
-static const struct regmap_config realtek_smi_nolock_regmap_config = {
- .reg_bits = 10, /* A4..A0 R4..R0 */
- .val_bits = 16,
- .reg_stride = 1,
- /* PHY regs are at 0x8000 */
- .max_register = 0xffff,
- .reg_format_endian = REGMAP_ENDIAN_BIG,
- .reg_read = realtek_smi_read,
- .reg_write = realtek_smi_write,
- .cache_type = REGCACHE_NONE,
- .disable_locking = true,
-};
-
-static int realtek_smi_mdio_read(struct mii_bus *bus, int addr, int regnum)
-{
- struct realtek_priv *priv = bus->priv;
-
- return priv->ops->phy_read(priv, addr, regnum);
-}
-
-static int realtek_smi_mdio_write(struct mii_bus *bus, int addr, int regnum,
- u16 val)
-{
- struct realtek_priv *priv = bus->priv;
-
- return priv->ops->phy_write(priv, addr, regnum, val);
-}
-
-static int realtek_smi_setup_mdio(struct dsa_switch *ds)
-{
- struct realtek_priv *priv = ds->priv;
- struct device_node *mdio_np;
- int ret;
-
- mdio_np = of_get_compatible_child(priv->dev->of_node, "realtek,smi-mdio");
- if (!mdio_np) {
- dev_err(priv->dev, "no MDIO bus node\n");
- return -ENODEV;
- }
-
- priv->user_mii_bus = devm_mdiobus_alloc(priv->dev);
- if (!priv->user_mii_bus) {
- ret = -ENOMEM;
- goto err_put_node;
- }
- priv->user_mii_bus->priv = priv;
- priv->user_mii_bus->name = "SMI user MII";
- priv->user_mii_bus->read = realtek_smi_mdio_read;
- priv->user_mii_bus->write = realtek_smi_mdio_write;
- snprintf(priv->user_mii_bus->id, MII_BUS_ID_SIZE, "SMI-%d",
- ds->index);
- priv->user_mii_bus->dev.of_node = mdio_np;
- priv->user_mii_bus->parent = priv->dev;
- ds->user_mii_bus = priv->user_mii_bus;
-
- ret = devm_of_mdiobus_register(priv->dev, priv->user_mii_bus, mdio_np);
- if (ret) {
- dev_err(priv->dev, "unable to register MDIO bus %s\n",
- priv->user_mii_bus->id);
- goto err_put_node;
- }
-
- return 0;
-
-err_put_node:
- of_node_put(mdio_np);
-
- return ret;
-}
-
-static int realtek_smi_probe(struct platform_device *pdev)
+/**
+ * realtek_smi_probe() - Probe a platform device for an SMI-connected switch
+ * @pdev: platform_device to probe on.
+ *
+ * This function should be used as the .probe in a platform_driver. After
+ * calling the common probe function for both interfaces, it initializes the
+ * values specific for SMI-connected devices. Finally, it calls a common
+ * function to register the DSA switch.
+ *
+ * Context: Can sleep. Takes and releases priv->map_lock.
+ * Return: Returns 0 on success, a negative error on failure.
+ */
+int realtek_smi_probe(struct platform_device *pdev)
{
- const struct realtek_variant *var;
struct device *dev = &pdev->dev;
struct realtek_priv *priv;
- struct regmap_config rc;
- struct device_node *np;
int ret;
- var = of_device_get_match_data(dev);
- np = dev->of_node;
-
- priv = devm_kzalloc(dev, sizeof(*priv) + var->chip_data_sz, GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
- priv->chip_data = (void *)priv + sizeof(*priv);
-
- mutex_init(&priv->map_lock);
-
- rc = realtek_smi_regmap_config;
- rc.lock_arg = priv;
- priv->map = devm_regmap_init(dev, NULL, priv, &rc);
- if (IS_ERR(priv->map)) {
- ret = PTR_ERR(priv->map);
- dev_err(dev, "regmap init failed: %d\n", ret);
- return ret;
- }
-
- rc = realtek_smi_nolock_regmap_config;
- priv->map_nolock = devm_regmap_init(dev, NULL, priv, &rc);
- if (IS_ERR(priv->map_nolock)) {
- ret = PTR_ERR(priv->map_nolock);
- dev_err(dev, "regmap init failed: %d\n", ret);
- return ret;
- }
-
- /* Link forward and backward */
- priv->dev = dev;
- priv->clk_delay = var->clk_delay;
- priv->cmd_read = var->cmd_read;
- priv->cmd_write = var->cmd_write;
- priv->ops = var->ops;
-
- priv->setup_interface = realtek_smi_setup_mdio;
- priv->write_reg_noack = realtek_smi_write_reg_noack;
-
- dev_set_drvdata(dev, priv);
- spin_lock_init(&priv->lock);
-
- /* TODO: if power is software controlled, set up any regulators here */
-
- priv->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
- if (IS_ERR(priv->reset)) {
- dev_err(dev, "failed to get RESET GPIO\n");
- return PTR_ERR(priv->reset);
- }
- if (priv->reset) {
- gpiod_set_value(priv->reset, 1);
- dev_dbg(dev, "asserted RESET\n");
- msleep(REALTEK_HW_STOP_DELAY);
- gpiod_set_value(priv->reset, 0);
- msleep(REALTEK_HW_START_DELAY);
- dev_dbg(dev, "deasserted RESET\n");
- }
+ priv = rtl83xx_probe(dev, &realtek_smi_info);
+ if (IS_ERR(priv))
+ return PTR_ERR(priv);
/* Fetch MDIO pins */
priv->mdc = devm_gpiod_get_optional(dev, "mdc", GPIOD_OUT_LOW);
- if (IS_ERR(priv->mdc))
+ if (IS_ERR(priv->mdc)) {
+ rtl83xx_remove(priv);
return PTR_ERR(priv->mdc);
+ }
+
priv->mdio = devm_gpiod_get_optional(dev, "mdio", GPIOD_OUT_LOW);
- if (IS_ERR(priv->mdio))
+ if (IS_ERR(priv->mdio)) {
+ rtl83xx_remove(priv);
return PTR_ERR(priv->mdio);
-
- priv->leds_disabled = of_property_read_bool(np, "realtek,disable-leds");
-
- ret = priv->ops->detect(priv);
- if (ret) {
- dev_err(dev, "unable to detect switch\n");
- return ret;
}
- priv->ds = devm_kzalloc(dev, sizeof(*priv->ds), GFP_KERNEL);
- if (!priv->ds)
- return -ENOMEM;
-
- priv->ds->dev = dev;
- priv->ds->num_ports = priv->num_ports;
- priv->ds->priv = priv;
+ priv->write_reg_noack = realtek_smi_write_reg_noack;
- priv->ds->ops = var->ds_ops_smi;
- ret = dsa_register_switch(priv->ds);
+ ret = rtl83xx_register_switch(priv);
if (ret) {
- dev_err_probe(dev, ret, "unable to register switch\n");
+ rtl83xx_remove(priv);
return ret;
}
+
return 0;
}
+EXPORT_SYMBOL_NS_GPL(realtek_smi_probe, REALTEK_DSA);
-static void realtek_smi_remove(struct platform_device *pdev)
+/**
+ * realtek_smi_remove() - Remove the driver of a SMI-connected switch
+ * @pdev: platform_device to be removed.
+ *
+ * This function should be used as the .remove_new in a platform_driver. First
+ * it unregisters the DSA switch and then it calls the common remove function.
+ *
+ * Context: Can sleep.
+ * Return: Nothing.
+ */
+void realtek_smi_remove(struct platform_device *pdev)
{
struct realtek_priv *priv = platform_get_drvdata(pdev);
if (!priv)
return;
- dsa_unregister_switch(priv->ds);
- if (priv->user_mii_bus)
- of_node_put(priv->user_mii_bus->dev.of_node);
+ rtl83xx_unregister_switch(priv);
- /* leave the device reset asserted */
- if (priv->reset)
- gpiod_set_value(priv->reset, 1);
+ rtl83xx_remove(priv);
}
+EXPORT_SYMBOL_NS_GPL(realtek_smi_remove, REALTEK_DSA);
-static void realtek_smi_shutdown(struct platform_device *pdev)
+/**
+ * realtek_smi_shutdown() - Shutdown the driver of a SMI-connected switch
+ * @pdev: platform_device shutting down.
+ *
+ * This function should be used as the .shutdown in a platform_driver. It calls
+ * the common shutdown function.
+ *
+ * Context: Can sleep.
+ * Return: Nothing.
+ */
+void realtek_smi_shutdown(struct platform_device *pdev)
{
struct realtek_priv *priv = platform_get_drvdata(pdev);
if (!priv)
return;
- dsa_switch_shutdown(priv->ds);
-
- platform_set_drvdata(pdev, NULL);
+ rtl83xx_shutdown(priv);
}
-
-static const struct of_device_id realtek_smi_of_match[] = {
-#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB)
- {
- .compatible = "realtek,rtl8366rb",
- .data = &rtl8366rb_variant,
- },
-#endif
-#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB)
- {
- .compatible = "realtek,rtl8365mb",
- .data = &rtl8365mb_variant,
- },
-#endif
- { /* sentinel */ },
-};
-MODULE_DEVICE_TABLE(of, realtek_smi_of_match);
-
-static struct platform_driver realtek_smi_driver = {
- .driver = {
- .name = "realtek-smi",
- .of_match_table = realtek_smi_of_match,
- },
- .probe = realtek_smi_probe,
- .remove_new = realtek_smi_remove,
- .shutdown = realtek_smi_shutdown,
-};
-module_platform_driver(realtek_smi_driver);
-
-MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
-MODULE_DESCRIPTION("Driver for Realtek ethernet switch connected via SMI interface");
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL_NS_GPL(realtek_smi_shutdown, REALTEK_DSA);
diff --git a/drivers/net/dsa/realtek/realtek-smi.h b/drivers/net/dsa/realtek/realtek-smi.h
new file mode 100644
index 000000000000..ea49a2edd3c8
--- /dev/null
+++ b/drivers/net/dsa/realtek/realtek-smi.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _REALTEK_SMI_H
+#define _REALTEK_SMI_H
+
+#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_SMI)
+
+static inline int realtek_smi_driver_register(struct platform_driver *drv)
+{
+ return platform_driver_register(drv);
+}
+
+static inline void realtek_smi_driver_unregister(struct platform_driver *drv)
+{
+ platform_driver_unregister(drv);
+}
+
+int realtek_smi_probe(struct platform_device *pdev);
+void realtek_smi_remove(struct platform_device *pdev);
+void realtek_smi_shutdown(struct platform_device *pdev);
+
+#else /* IS_ENABLED(CONFIG_NET_DSA_REALTEK_SMI) */
+
+static inline int realtek_smi_driver_register(struct platform_driver *drv)
+{
+ return 0;
+}
+
+static inline void realtek_smi_driver_unregister(struct platform_driver *drv)
+{
+}
+
+static inline int realtek_smi_probe(struct platform_device *pdev)
+{
+ return -ENOENT;
+}
+
+static inline void realtek_smi_remove(struct platform_device *pdev)
+{
+}
+
+static inline void realtek_smi_shutdown(struct platform_device *pdev)
+{
+}
+
+#endif /* IS_ENABLED(CONFIG_NET_DSA_REALTEK_SMI) */
+
+#endif /* _REALTEK_SMI_H */
diff --git a/drivers/net/dsa/realtek/realtek.h b/drivers/net/dsa/realtek/realtek.h
index 790488e9c667..e0b1aa01337b 100644
--- a/drivers/net/dsa/realtek/realtek.h
+++ b/drivers/net/dsa/realtek/realtek.h
@@ -12,6 +12,7 @@
#include <linux/platform_device.h>
#include <linux/gpio/consumer.h>
#include <net/dsa.h>
+#include <linux/reset.h>
#define REALTEK_HW_STOP_DELAY 25 /* msecs */
#define REALTEK_HW_START_DELAY 100 /* msecs */
@@ -48,6 +49,7 @@ struct rtl8366_vlan_4k {
struct realtek_priv {
struct device *dev;
+ struct reset_control *reset_ctl;
struct gpio_desc *reset;
struct gpio_desc *mdc;
struct gpio_desc *mdio;
@@ -58,11 +60,10 @@ struct realtek_priv {
struct mii_bus *bus;
int mdio_addr;
- unsigned int clk_delay;
- u8 cmd_read;
- u8 cmd_write;
+ const struct realtek_variant *variant;
+
spinlock_t lock; /* Locks around command writes */
- struct dsa_switch *ds;
+ struct dsa_switch ds;
struct irq_domain *irqdomain;
bool leds_disabled;
@@ -73,7 +74,6 @@ struct realtek_priv {
struct rtl8366_mib_counter *mib_counters;
const struct realtek_ops *ops;
- int (*setup_interface)(struct dsa_switch *ds);
int (*write_reg_noack)(void *ctx, u32 addr, u32 data);
int vlan_enabled;
@@ -91,7 +91,6 @@ struct realtek_ops {
int (*detect)(struct realtek_priv *priv);
int (*reset_chip)(struct realtek_priv *priv);
int (*setup)(struct realtek_priv *priv);
- void (*cleanup)(struct realtek_priv *priv);
int (*get_mib_counter)(struct realtek_priv *priv,
int port,
struct rtl8366_mib_counter *mib,
@@ -116,8 +115,7 @@ struct realtek_ops {
};
struct realtek_variant {
- const struct dsa_switch_ops *ds_ops_smi;
- const struct dsa_switch_ops *ds_ops_mdio;
+ const struct dsa_switch_ops *ds_ops;
const struct realtek_ops *ops;
unsigned int clk_delay;
u8 cmd_read;
diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c
index b072045eb154..12665a8a3412 100644
--- a/drivers/net/dsa/realtek/rtl8365mb.c
+++ b/drivers/net/dsa/realtek/rtl8365mb.c
@@ -101,6 +101,9 @@
#include <linux/if_vlan.h>
#include "realtek.h"
+#include "realtek-smi.h"
+#include "realtek-mdio.h"
+#include "rtl83xx.h"
/* Family-specific data and limits */
#define RTL8365MB_PHYADDRMAX 7
@@ -206,10 +209,10 @@
#define RTL8365MB_EXT_PORT_MODE_100FX 13
/* External interface mode configuration registers 0~1 */
-#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 0x1305 /* EXT1 */
+#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 0x1305 /* EXT0,EXT1 */
#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 0x13C3 /* EXT2 */
#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(_extint) \
- ((_extint) == 1 ? RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 : \
+ ((_extint) <= 1 ? RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 : \
(_extint) == 2 ? RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 : \
0x0)
#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(_extint) \
@@ -689,7 +692,7 @@ static int rtl8365mb_phy_ocp_read(struct realtek_priv *priv, int phy,
u32 val;
int ret;
- mutex_lock(&priv->map_lock);
+ rtl83xx_lock(priv);
ret = rtl8365mb_phy_poll_busy(priv);
if (ret)
@@ -722,7 +725,7 @@ static int rtl8365mb_phy_ocp_read(struct realtek_priv *priv, int phy,
*data = val & 0xFFFF;
out:
- mutex_unlock(&priv->map_lock);
+ rtl83xx_unlock(priv);
return ret;
}
@@ -733,7 +736,7 @@ static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy,
u32 val;
int ret;
- mutex_lock(&priv->map_lock);
+ rtl83xx_lock(priv);
ret = rtl8365mb_phy_poll_busy(priv);
if (ret)
@@ -764,7 +767,7 @@ static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy,
goto out;
out:
- mutex_unlock(&priv->map_lock);
+ rtl83xx_unlock(priv);
return 0;
}
@@ -825,17 +828,6 @@ static int rtl8365mb_phy_write(struct realtek_priv *priv, int phy, int regnum,
return 0;
}
-static int rtl8365mb_dsa_phy_read(struct dsa_switch *ds, int phy, int regnum)
-{
- return rtl8365mb_phy_read(ds->priv, phy, regnum);
-}
-
-static int rtl8365mb_dsa_phy_write(struct dsa_switch *ds, int phy, int regnum,
- u16 val)
-{
- return rtl8365mb_phy_write(ds->priv, phy, regnum, val);
-}
-
static const struct rtl8365mb_extint *
rtl8365mb_get_port_extint(struct realtek_priv *priv, int port)
{
@@ -878,6 +870,7 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port,
{
const struct rtl8365mb_extint *extint =
rtl8365mb_get_port_extint(priv, port);
+ struct dsa_switch *ds = &priv->ds;
struct device_node *dn;
struct dsa_port *dp;
int tx_delay = 0;
@@ -888,7 +881,7 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port,
if (!extint)
return -ENODEV;
- dp = dsa_to_port(priv->ds, port);
+ dp = dsa_to_port(ds, port);
dn = dp->dn;
/* Set the RGMII TX/RX delay
@@ -1541,6 +1534,7 @@ static void rtl8365mb_get_stats64(struct dsa_switch *ds, int port,
static void rtl8365mb_stats_setup(struct realtek_priv *priv)
{
struct rtl8365mb *mb = priv->chip_data;
+ struct dsa_switch *ds = &priv->ds;
int i;
/* Per-chip global mutex to protect MIB counter access, since doing
@@ -1551,7 +1545,7 @@ static void rtl8365mb_stats_setup(struct realtek_priv *priv)
for (i = 0; i < priv->num_ports; i++) {
struct rtl8365mb_port *p = &mb->ports[i];
- if (dsa_is_unused_port(priv->ds, i))
+ if (dsa_is_unused_port(ds, i))
continue;
/* Per-port spinlock to protect the stats64 data */
@@ -1567,12 +1561,13 @@ static void rtl8365mb_stats_setup(struct realtek_priv *priv)
static void rtl8365mb_stats_teardown(struct realtek_priv *priv)
{
struct rtl8365mb *mb = priv->chip_data;
+ struct dsa_switch *ds = &priv->ds;
int i;
for (i = 0; i < priv->num_ports; i++) {
struct rtl8365mb_port *p = &mb->ports[i];
- if (dsa_is_unused_port(priv->ds, i))
+ if (dsa_is_unused_port(ds, i))
continue;
cancel_delayed_work_sync(&p->mib_work);
@@ -1971,7 +1966,7 @@ static int rtl8365mb_setup(struct dsa_switch *ds)
dev_info(priv->dev, "no interrupt support\n");
/* Configure CPU tagging */
- dsa_switch_for_each_cpu_port(cpu_dp, priv->ds) {
+ dsa_switch_for_each_cpu_port(cpu_dp, ds) {
cpu->mask |= BIT(cpu_dp->index);
if (cpu->trap_port == RTL8365MB_MAX_NUM_PORTS)
@@ -1986,7 +1981,7 @@ static int rtl8365mb_setup(struct dsa_switch *ds)
for (i = 0; i < priv->num_ports; i++) {
struct rtl8365mb_port *p = &mb->ports[i];
- if (dsa_is_unused_port(priv->ds, i))
+ if (dsa_is_unused_port(ds, i))
continue;
/* Forward only to the CPU */
@@ -2003,7 +1998,7 @@ static int rtl8365mb_setup(struct dsa_switch *ds)
* ports will still forward frames to the CPU despite being
* administratively down by default.
*/
- rtl8365mb_port_stp_state_set(priv->ds, i, BR_STATE_DISABLED);
+ rtl8365mb_port_stp_state_set(ds, i, BR_STATE_DISABLED);
/* Set up per-port private data */
p->priv = priv;
@@ -2014,12 +2009,10 @@ static int rtl8365mb_setup(struct dsa_switch *ds)
if (ret)
goto out_teardown_irq;
- if (priv->setup_interface) {
- ret = priv->setup_interface(ds);
- if (ret) {
- dev_err(priv->dev, "could not set up MDIO bus\n");
- goto out_teardown_irq;
- }
+ ret = rtl83xx_setup_user_mdio(ds);
+ if (ret) {
+ dev_err(priv->dev, "could not set up MDIO bus\n");
+ goto out_teardown_irq;
}
/* Start statistics counter polling */
@@ -2113,7 +2106,7 @@ static int rtl8365mb_detect(struct realtek_priv *priv)
return 0;
}
-static const struct dsa_switch_ops rtl8365mb_switch_ops_smi = {
+static const struct dsa_switch_ops rtl8365mb_switch_ops = {
.get_tag_protocol = rtl8365mb_get_tag_protocol,
.change_tag_protocol = rtl8365mb_change_tag_protocol,
.setup = rtl8365mb_setup,
@@ -2134,29 +2127,6 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops_smi = {
.port_max_mtu = rtl8365mb_port_max_mtu,
};
-static const struct dsa_switch_ops rtl8365mb_switch_ops_mdio = {
- .get_tag_protocol = rtl8365mb_get_tag_protocol,
- .change_tag_protocol = rtl8365mb_change_tag_protocol,
- .setup = rtl8365mb_setup,
- .teardown = rtl8365mb_teardown,
- .phylink_get_caps = rtl8365mb_phylink_get_caps,
- .phylink_mac_config = rtl8365mb_phylink_mac_config,
- .phylink_mac_link_down = rtl8365mb_phylink_mac_link_down,
- .phylink_mac_link_up = rtl8365mb_phylink_mac_link_up,
- .phy_read = rtl8365mb_dsa_phy_read,
- .phy_write = rtl8365mb_dsa_phy_write,
- .port_stp_state_set = rtl8365mb_port_stp_state_set,
- .get_strings = rtl8365mb_get_strings,
- .get_ethtool_stats = rtl8365mb_get_ethtool_stats,
- .get_sset_count = rtl8365mb_get_sset_count,
- .get_eth_phy_stats = rtl8365mb_get_phy_stats,
- .get_eth_mac_stats = rtl8365mb_get_mac_stats,
- .get_eth_ctrl_stats = rtl8365mb_get_ctrl_stats,
- .get_stats64 = rtl8365mb_get_stats64,
- .port_change_mtu = rtl8365mb_port_change_mtu,
- .port_max_mtu = rtl8365mb_port_max_mtu,
-};
-
static const struct realtek_ops rtl8365mb_ops = {
.detect = rtl8365mb_detect,
.phy_read = rtl8365mb_phy_read,
@@ -2164,16 +2134,66 @@ static const struct realtek_ops rtl8365mb_ops = {
};
const struct realtek_variant rtl8365mb_variant = {
- .ds_ops_smi = &rtl8365mb_switch_ops_smi,
- .ds_ops_mdio = &rtl8365mb_switch_ops_mdio,
+ .ds_ops = &rtl8365mb_switch_ops,
.ops = &rtl8365mb_ops,
.clk_delay = 10,
.cmd_read = 0xb9,
.cmd_write = 0xb8,
.chip_data_sz = sizeof(struct rtl8365mb),
};
-EXPORT_SYMBOL_GPL(rtl8365mb_variant);
+
+static const struct of_device_id rtl8365mb_of_match[] = {
+ { .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, rtl8365mb_of_match);
+
+static struct platform_driver rtl8365mb_smi_driver = {
+ .driver = {
+ .name = "rtl8365mb-smi",
+ .of_match_table = rtl8365mb_of_match,
+ },
+ .probe = realtek_smi_probe,
+ .remove_new = realtek_smi_remove,
+ .shutdown = realtek_smi_shutdown,
+};
+
+static struct mdio_driver rtl8365mb_mdio_driver = {
+ .mdiodrv.driver = {
+ .name = "rtl8365mb-mdio",
+ .of_match_table = rtl8365mb_of_match,
+ },
+ .probe = realtek_mdio_probe,
+ .remove = realtek_mdio_remove,
+ .shutdown = realtek_mdio_shutdown,
+};
+
+static int rtl8365mb_init(void)
+{
+ int ret;
+
+ ret = realtek_mdio_driver_register(&rtl8365mb_mdio_driver);
+ if (ret)
+ return ret;
+
+ ret = realtek_smi_driver_register(&rtl8365mb_smi_driver);
+ if (ret) {
+ realtek_mdio_driver_unregister(&rtl8365mb_mdio_driver);
+ return ret;
+ }
+
+ return 0;
+}
+module_init(rtl8365mb_init);
+
+static void __exit rtl8365mb_exit(void)
+{
+ realtek_smi_driver_unregister(&rtl8365mb_smi_driver);
+ realtek_mdio_driver_unregister(&rtl8365mb_mdio_driver);
+}
+module_exit(rtl8365mb_exit);
MODULE_AUTHOR("Alvin Å ipraga <alsi@bang-olufsen.dk>");
MODULE_DESCRIPTION("Driver for RTL8365MB-VC ethernet switch");
MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(REALTEK_DSA);
diff --git a/drivers/net/dsa/realtek/rtl8366-core.c b/drivers/net/dsa/realtek/rtl8366-core.c
index 59f98d2c8769..7c6520ba3a26 100644
--- a/drivers/net/dsa/realtek/rtl8366-core.c
+++ b/drivers/net/dsa/realtek/rtl8366-core.c
@@ -34,7 +34,7 @@ int rtl8366_mc_is_used(struct realtek_priv *priv, int mc_index, int *used)
return 0;
}
-EXPORT_SYMBOL_GPL(rtl8366_mc_is_used);
+EXPORT_SYMBOL_NS_GPL(rtl8366_mc_is_used, REALTEK_DSA);
/**
* rtl8366_obtain_mc() - retrieve or allocate a VLAN member configuration
@@ -187,7 +187,7 @@ int rtl8366_set_vlan(struct realtek_priv *priv, int vid, u32 member,
return ret;
}
-EXPORT_SYMBOL_GPL(rtl8366_set_vlan);
+EXPORT_SYMBOL_NS_GPL(rtl8366_set_vlan, REALTEK_DSA);
int rtl8366_set_pvid(struct realtek_priv *priv, unsigned int port,
unsigned int vid)
@@ -217,7 +217,7 @@ int rtl8366_set_pvid(struct realtek_priv *priv, unsigned int port,
return 0;
}
-EXPORT_SYMBOL_GPL(rtl8366_set_pvid);
+EXPORT_SYMBOL_NS_GPL(rtl8366_set_pvid, REALTEK_DSA);
int rtl8366_enable_vlan4k(struct realtek_priv *priv, bool enable)
{
@@ -243,7 +243,7 @@ int rtl8366_enable_vlan4k(struct realtek_priv *priv, bool enable)
priv->vlan4k_enabled = enable;
return 0;
}
-EXPORT_SYMBOL_GPL(rtl8366_enable_vlan4k);
+EXPORT_SYMBOL_NS_GPL(rtl8366_enable_vlan4k, REALTEK_DSA);
int rtl8366_enable_vlan(struct realtek_priv *priv, bool enable)
{
@@ -265,7 +265,7 @@ int rtl8366_enable_vlan(struct realtek_priv *priv, bool enable)
return ret;
}
-EXPORT_SYMBOL_GPL(rtl8366_enable_vlan);
+EXPORT_SYMBOL_NS_GPL(rtl8366_enable_vlan, REALTEK_DSA);
int rtl8366_reset_vlan(struct realtek_priv *priv)
{
@@ -290,7 +290,7 @@ int rtl8366_reset_vlan(struct realtek_priv *priv)
return 0;
}
-EXPORT_SYMBOL_GPL(rtl8366_reset_vlan);
+EXPORT_SYMBOL_NS_GPL(rtl8366_reset_vlan, REALTEK_DSA);
int rtl8366_vlan_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan,
@@ -345,7 +345,7 @@ int rtl8366_vlan_add(struct dsa_switch *ds, int port,
return 0;
}
-EXPORT_SYMBOL_GPL(rtl8366_vlan_add);
+EXPORT_SYMBOL_NS_GPL(rtl8366_vlan_add, REALTEK_DSA);
int rtl8366_vlan_del(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan)
@@ -389,7 +389,7 @@ int rtl8366_vlan_del(struct dsa_switch *ds, int port,
return 0;
}
-EXPORT_SYMBOL_GPL(rtl8366_vlan_del);
+EXPORT_SYMBOL_NS_GPL(rtl8366_vlan_del, REALTEK_DSA);
void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset,
uint8_t *data)
@@ -403,7 +403,7 @@ void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset,
for (i = 0; i < priv->num_mib_counters; i++)
ethtool_puts(&data, priv->mib_counters[i].name);
}
-EXPORT_SYMBOL_GPL(rtl8366_get_strings);
+EXPORT_SYMBOL_NS_GPL(rtl8366_get_strings, REALTEK_DSA);
int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset)
{
@@ -417,7 +417,7 @@ int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset)
return priv->num_mib_counters;
}
-EXPORT_SYMBOL_GPL(rtl8366_get_sset_count);
+EXPORT_SYMBOL_NS_GPL(rtl8366_get_sset_count, REALTEK_DSA);
void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
{
@@ -441,4 +441,4 @@ void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
data[i] = mibvalue;
}
}
-EXPORT_SYMBOL_GPL(rtl8366_get_ethtool_stats);
+EXPORT_SYMBOL_NS_GPL(rtl8366_get_ethtool_stats, REALTEK_DSA);
diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
index e3b6a470ca67..e10ae94cf771 100644
--- a/drivers/net/dsa/realtek/rtl8366rb.c
+++ b/drivers/net/dsa/realtek/rtl8366rb.c
@@ -23,6 +23,9 @@
#include <linux/regmap.h>
#include "realtek.h"
+#include "realtek-smi.h"
+#include "realtek-mdio.h"
+#include "rtl83xx.h"
#define RTL8366RB_PORT_NUM_CPU 5
#define RTL8366RB_NUM_PORTS 6
@@ -1030,12 +1033,10 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
if (ret)
dev_info(priv->dev, "no interrupt support\n");
- if (priv->setup_interface) {
- ret = priv->setup_interface(ds);
- if (ret) {
- dev_err(priv->dev, "could not set up MDIO bus\n");
- return -ENODEV;
- }
+ ret = rtl83xx_setup_user_mdio(ds);
+ if (ret) {
+ dev_err(priv->dev, "could not set up MDIO bus\n");
+ return -ENODEV;
}
return 0;
@@ -1650,6 +1651,7 @@ static int rtl8366rb_get_mc_index(struct realtek_priv *priv, int port, int *val)
static int rtl8366rb_set_mc_index(struct realtek_priv *priv, int port, int index)
{
+ struct dsa_switch *ds = &priv->ds;
struct rtl8366rb *rb;
bool pvid_enabled;
int ret;
@@ -1674,7 +1676,7 @@ static int rtl8366rb_set_mc_index(struct realtek_priv *priv, int port, int index
* not drop any untagged or C-tagged frames. Make sure to update the
* filtering setting.
*/
- if (dsa_port_is_vlan_filtering(dsa_to_port(priv->ds, port)))
+ if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
ret = rtl8366rb_drop_untagged(priv, port, !pvid_enabled);
return ret;
@@ -1718,7 +1720,7 @@ static int rtl8366rb_phy_read(struct realtek_priv *priv, int phy, int regnum)
if (phy > RTL8366RB_PHY_NO_MAX)
return -EINVAL;
- mutex_lock(&priv->map_lock);
+ rtl83xx_lock(priv);
ret = regmap_write(priv->map_nolock, RTL8366RB_PHY_ACCESS_CTRL_REG,
RTL8366RB_PHY_CTRL_READ);
@@ -1746,7 +1748,7 @@ static int rtl8366rb_phy_read(struct realtek_priv *priv, int phy, int regnum)
phy, regnum, reg, val);
out:
- mutex_unlock(&priv->map_lock);
+ rtl83xx_unlock(priv);
return ret;
}
@@ -1760,7 +1762,7 @@ static int rtl8366rb_phy_write(struct realtek_priv *priv, int phy, int regnum,
if (phy > RTL8366RB_PHY_NO_MAX)
return -EINVAL;
- mutex_lock(&priv->map_lock);
+ rtl83xx_lock(priv);
ret = regmap_write(priv->map_nolock, RTL8366RB_PHY_ACCESS_CTRL_REG,
RTL8366RB_PHY_CTRL_WRITE);
@@ -1777,22 +1779,11 @@ static int rtl8366rb_phy_write(struct realtek_priv *priv, int phy, int regnum,
goto out;
out:
- mutex_unlock(&priv->map_lock);
+ rtl83xx_unlock(priv);
return ret;
}
-static int rtl8366rb_dsa_phy_read(struct dsa_switch *ds, int phy, int regnum)
-{
- return rtl8366rb_phy_read(ds->priv, phy, regnum);
-}
-
-static int rtl8366rb_dsa_phy_write(struct dsa_switch *ds, int phy, int regnum,
- u16 val)
-{
- return rtl8366rb_phy_write(ds->priv, phy, regnum, val);
-}
-
static int rtl8366rb_reset_chip(struct realtek_priv *priv)
{
int timeout = 10;
@@ -1858,7 +1849,7 @@ static int rtl8366rb_detect(struct realtek_priv *priv)
return 0;
}
-static const struct dsa_switch_ops rtl8366rb_switch_ops_smi = {
+static const struct dsa_switch_ops rtl8366rb_switch_ops = {
.get_tag_protocol = rtl8366_get_tag_protocol,
.setup = rtl8366rb_setup,
.phylink_get_caps = rtl8366rb_phylink_get_caps,
@@ -1882,32 +1873,6 @@ static const struct dsa_switch_ops rtl8366rb_switch_ops_smi = {
.port_max_mtu = rtl8366rb_max_mtu,
};
-static const struct dsa_switch_ops rtl8366rb_switch_ops_mdio = {
- .get_tag_protocol = rtl8366_get_tag_protocol,
- .setup = rtl8366rb_setup,
- .phy_read = rtl8366rb_dsa_phy_read,
- .phy_write = rtl8366rb_dsa_phy_write,
- .phylink_get_caps = rtl8366rb_phylink_get_caps,
- .phylink_mac_link_up = rtl8366rb_mac_link_up,
- .phylink_mac_link_down = rtl8366rb_mac_link_down,
- .get_strings = rtl8366_get_strings,
- .get_ethtool_stats = rtl8366_get_ethtool_stats,
- .get_sset_count = rtl8366_get_sset_count,
- .port_bridge_join = rtl8366rb_port_bridge_join,
- .port_bridge_leave = rtl8366rb_port_bridge_leave,
- .port_vlan_filtering = rtl8366rb_vlan_filtering,
- .port_vlan_add = rtl8366_vlan_add,
- .port_vlan_del = rtl8366_vlan_del,
- .port_enable = rtl8366rb_port_enable,
- .port_disable = rtl8366rb_port_disable,
- .port_pre_bridge_flags = rtl8366rb_port_pre_bridge_flags,
- .port_bridge_flags = rtl8366rb_port_bridge_flags,
- .port_stp_state_set = rtl8366rb_port_stp_state_set,
- .port_fast_age = rtl8366rb_port_fast_age,
- .port_change_mtu = rtl8366rb_change_mtu,
- .port_max_mtu = rtl8366rb_max_mtu,
-};
-
static const struct realtek_ops rtl8366rb_ops = {
.detect = rtl8366rb_detect,
.get_vlan_mc = rtl8366rb_get_vlan_mc,
@@ -1925,16 +1890,66 @@ static const struct realtek_ops rtl8366rb_ops = {
};
const struct realtek_variant rtl8366rb_variant = {
- .ds_ops_smi = &rtl8366rb_switch_ops_smi,
- .ds_ops_mdio = &rtl8366rb_switch_ops_mdio,
+ .ds_ops = &rtl8366rb_switch_ops,
.ops = &rtl8366rb_ops,
.clk_delay = 10,
.cmd_read = 0xa9,
.cmd_write = 0xa8,
.chip_data_sz = sizeof(struct rtl8366rb),
};
-EXPORT_SYMBOL_GPL(rtl8366rb_variant);
+
+static const struct of_device_id rtl8366rb_of_match[] = {
+ { .compatible = "realtek,rtl8366rb", .data = &rtl8366rb_variant, },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, rtl8366rb_of_match);
+
+static struct platform_driver rtl8366rb_smi_driver = {
+ .driver = {
+ .name = "rtl8366rb-smi",
+ .of_match_table = rtl8366rb_of_match,
+ },
+ .probe = realtek_smi_probe,
+ .remove_new = realtek_smi_remove,
+ .shutdown = realtek_smi_shutdown,
+};
+
+static struct mdio_driver rtl8366rb_mdio_driver = {
+ .mdiodrv.driver = {
+ .name = "rtl8366rb-mdio",
+ .of_match_table = rtl8366rb_of_match,
+ },
+ .probe = realtek_mdio_probe,
+ .remove = realtek_mdio_remove,
+ .shutdown = realtek_mdio_shutdown,
+};
+
+static int rtl8366rb_init(void)
+{
+ int ret;
+
+ ret = realtek_mdio_driver_register(&rtl8366rb_mdio_driver);
+ if (ret)
+ return ret;
+
+ ret = realtek_smi_driver_register(&rtl8366rb_smi_driver);
+ if (ret) {
+ realtek_mdio_driver_unregister(&rtl8366rb_mdio_driver);
+ return ret;
+ }
+
+ return 0;
+}
+module_init(rtl8366rb_init);
+
+static void __exit rtl8366rb_exit(void)
+{
+ realtek_smi_driver_unregister(&rtl8366rb_smi_driver);
+ realtek_mdio_driver_unregister(&rtl8366rb_mdio_driver);
+}
+module_exit(rtl8366rb_exit);
MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
MODULE_DESCRIPTION("Driver for RTL8366RB ethernet switch");
MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(REALTEK_DSA);
diff --git a/drivers/net/dsa/realtek/rtl83xx.c b/drivers/net/dsa/realtek/rtl83xx.c
new file mode 100644
index 000000000000..d2e876805393
--- /dev/null
+++ b/drivers/net/dsa/realtek/rtl83xx.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/of_mdio.h>
+
+#include "realtek.h"
+#include "rtl83xx.h"
+
+/**
+ * rtl83xx_lock() - Locks the mutex used by regmaps
+ * @ctx: realtek_priv pointer
+ *
+ * This function is passed to regmap to be used as the lock function.
+ * It is also used externally to block regmap before executing multiple
+ * operations that must happen in sequence (which will use
+ * realtek_priv.map_nolock instead).
+ *
+ * Context: Can sleep. Holds priv->map_lock lock.
+ * Return: nothing
+ */
+void rtl83xx_lock(void *ctx)
+{
+ struct realtek_priv *priv = ctx;
+
+ mutex_lock(&priv->map_lock);
+}
+EXPORT_SYMBOL_NS_GPL(rtl83xx_lock, REALTEK_DSA);
+
+/**
+ * rtl83xx_unlock() - Unlocks the mutex used by regmaps
+ * @ctx: realtek_priv pointer
+ *
+ * This function unlocks the lock acquired by rtl83xx_lock.
+ *
+ * Context: Releases priv->map_lock lock.
+ * Return: nothing
+ */
+void rtl83xx_unlock(void *ctx)
+{
+ struct realtek_priv *priv = ctx;
+
+ mutex_unlock(&priv->map_lock);
+}
+EXPORT_SYMBOL_NS_GPL(rtl83xx_unlock, REALTEK_DSA);
+
+static int rtl83xx_user_mdio_read(struct mii_bus *bus, int addr, int regnum)
+{
+ struct realtek_priv *priv = bus->priv;
+
+ return priv->ops->phy_read(priv, addr, regnum);
+}
+
+static int rtl83xx_user_mdio_write(struct mii_bus *bus, int addr, int regnum,
+ u16 val)
+{
+ struct realtek_priv *priv = bus->priv;
+
+ return priv->ops->phy_write(priv, addr, regnum, val);
+}
+
+/**
+ * rtl83xx_setup_user_mdio() - register the user mii bus driver
+ * @ds: DSA switch associated with this user_mii_bus
+ *
+ * Registers the MDIO bus for built-in Ethernet PHYs, and associates it with
+ * the mandatory 'mdio' child OF node of the switch.
+ *
+ * Context: Can sleep.
+ * Return: 0 on success, negative value for failure.
+ */
+int rtl83xx_setup_user_mdio(struct dsa_switch *ds)
+{
+ struct realtek_priv *priv = ds->priv;
+ struct device_node *mdio_np;
+ struct mii_bus *bus;
+ int ret = 0;
+
+ mdio_np = of_get_child_by_name(priv->dev->of_node, "mdio");
+ if (!mdio_np) {
+ dev_err(priv->dev, "no MDIO bus node\n");
+ return -ENODEV;
+ }
+
+ bus = devm_mdiobus_alloc(priv->dev);
+ if (!bus) {
+ ret = -ENOMEM;
+ goto err_put_node;
+ }
+
+ bus->priv = priv;
+ bus->name = "Realtek user MII";
+ bus->read = rtl83xx_user_mdio_read;
+ bus->write = rtl83xx_user_mdio_write;
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%s:user_mii", dev_name(priv->dev));
+ bus->parent = priv->dev;
+
+ ret = devm_of_mdiobus_register(priv->dev, bus, mdio_np);
+ if (ret) {
+ dev_err(priv->dev, "unable to register MDIO bus %s\n",
+ bus->id);
+ goto err_put_node;
+ }
+
+ priv->user_mii_bus = bus;
+
+err_put_node:
+ of_node_put(mdio_np);
+
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(rtl83xx_setup_user_mdio, REALTEK_DSA);
+
+/**
+ * rtl83xx_probe() - probe a Realtek switch
+ * @dev: the device being probed
+ * @interface_info: specific management interface info.
+ *
+ * This function initializes realtek_priv and reads data from the device tree
+ * node. The switch is hard resetted if a method is provided.
+ *
+ * Context: Can sleep.
+ * Return: Pointer to the realtek_priv or ERR_PTR() in case of failure.
+ *
+ * The realtek_priv pointer does not need to be freed as it is controlled by
+ * devres.
+ */
+struct realtek_priv *
+rtl83xx_probe(struct device *dev,
+ const struct realtek_interface_info *interface_info)
+{
+ const struct realtek_variant *var;
+ struct realtek_priv *priv;
+ struct regmap_config rc = {
+ .reg_bits = 10, /* A4..A0 R4..R0 */
+ .val_bits = 16,
+ .reg_stride = 1,
+ .max_register = 0xffff,
+ .reg_format_endian = REGMAP_ENDIAN_BIG,
+ .reg_read = interface_info->reg_read,
+ .reg_write = interface_info->reg_write,
+ .cache_type = REGCACHE_NONE,
+ .lock = rtl83xx_lock,
+ .unlock = rtl83xx_unlock,
+ };
+ int ret;
+
+ var = of_device_get_match_data(dev);
+ if (!var)
+ return ERR_PTR(-EINVAL);
+
+ priv = devm_kzalloc(dev, size_add(sizeof(*priv), var->chip_data_sz),
+ GFP_KERNEL);
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&priv->map_lock);
+
+ rc.lock_arg = priv;
+ priv->map = devm_regmap_init(dev, NULL, priv, &rc);
+ if (IS_ERR(priv->map)) {
+ ret = PTR_ERR(priv->map);
+ dev_err(dev, "regmap init failed: %d\n", ret);
+ return ERR_PTR(ret);
+ }
+
+ rc.disable_locking = true;
+ priv->map_nolock = devm_regmap_init(dev, NULL, priv, &rc);
+ if (IS_ERR(priv->map_nolock)) {
+ ret = PTR_ERR(priv->map_nolock);
+ dev_err(dev, "regmap init failed: %d\n", ret);
+ return ERR_PTR(ret);
+ }
+
+ /* Link forward and backward */
+ priv->dev = dev;
+ priv->variant = var;
+ priv->ops = var->ops;
+ priv->chip_data = (void *)priv + sizeof(*priv);
+
+ spin_lock_init(&priv->lock);
+
+ priv->leds_disabled = of_property_read_bool(dev->of_node,
+ "realtek,disable-leds");
+
+ /* TODO: if power is software controlled, set up any regulators here */
+ priv->reset_ctl = devm_reset_control_get_optional(dev, NULL);
+ if (IS_ERR(priv->reset_ctl)) {
+ ret = PTR_ERR(priv->reset_ctl);
+ dev_err_probe(dev, ret, "failed to get reset control\n");
+ return ERR_CAST(priv->reset_ctl);
+ }
+
+ priv->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(priv->reset)) {
+ dev_err(dev, "failed to get RESET GPIO\n");
+ return ERR_CAST(priv->reset);
+ }
+
+ dev_set_drvdata(dev, priv);
+
+ if (priv->reset_ctl || priv->reset) {
+ rtl83xx_reset_assert(priv);
+ dev_dbg(dev, "asserted RESET\n");
+ msleep(REALTEK_HW_STOP_DELAY);
+ rtl83xx_reset_deassert(priv);
+ msleep(REALTEK_HW_START_DELAY);
+ dev_dbg(dev, "deasserted RESET\n");
+ }
+
+ return priv;
+}
+EXPORT_SYMBOL_NS_GPL(rtl83xx_probe, REALTEK_DSA);
+
+/**
+ * rtl83xx_register_switch() - detects and register a switch
+ * @priv: realtek_priv pointer
+ *
+ * This function first checks the switch chip ID and register a DSA
+ * switch.
+ *
+ * Context: Can sleep. Takes and releases priv->map_lock.
+ * Return: 0 on success, negative value for failure.
+ */
+int rtl83xx_register_switch(struct realtek_priv *priv)
+{
+ struct dsa_switch *ds = &priv->ds;
+ int ret;
+
+ ret = priv->ops->detect(priv);
+ if (ret) {
+ dev_err_probe(priv->dev, ret, "unable to detect switch\n");
+ return ret;
+ }
+
+ ds->priv = priv;
+ ds->dev = priv->dev;
+ ds->ops = priv->variant->ds_ops;
+ ds->num_ports = priv->num_ports;
+
+ ret = dsa_register_switch(ds);
+ if (ret) {
+ dev_err_probe(priv->dev, ret, "unable to register switch\n");
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(rtl83xx_register_switch, REALTEK_DSA);
+
+/**
+ * rtl83xx_unregister_switch() - unregister a switch
+ * @priv: realtek_priv pointer
+ *
+ * This function unregister a DSA switch.
+ *
+ * Context: Can sleep.
+ * Return: Nothing.
+ */
+void rtl83xx_unregister_switch(struct realtek_priv *priv)
+{
+ struct dsa_switch *ds = &priv->ds;
+
+ dsa_unregister_switch(ds);
+}
+EXPORT_SYMBOL_NS_GPL(rtl83xx_unregister_switch, REALTEK_DSA);
+
+/**
+ * rtl83xx_shutdown() - shutdown a switch
+ * @priv: realtek_priv pointer
+ *
+ * This function shuts down the DSA switch and cleans the platform driver data,
+ * to prevent realtek_{smi,mdio}_remove() from running afterwards, which is
+ * possible if the parent bus implements its own .shutdown() as .remove().
+ *
+ * Context: Can sleep.
+ * Return: Nothing.
+ */
+void rtl83xx_shutdown(struct realtek_priv *priv)
+{
+ struct dsa_switch *ds = &priv->ds;
+
+ dsa_switch_shutdown(ds);
+
+ dev_set_drvdata(priv->dev, NULL);
+}
+EXPORT_SYMBOL_NS_GPL(rtl83xx_shutdown, REALTEK_DSA);
+
+/**
+ * rtl83xx_remove() - Cleanup a realtek switch driver
+ * @priv: realtek_priv pointer
+ *
+ * If a method is provided, this function asserts the hard reset of the switch
+ * in order to avoid leaking traffic when the driver is gone.
+ *
+ * Context: Might sleep if priv->gdev->chip->can_sleep.
+ * Return: nothing
+ */
+void rtl83xx_remove(struct realtek_priv *priv)
+{
+ /* leave the device reset asserted */
+ rtl83xx_reset_assert(priv);
+}
+EXPORT_SYMBOL_NS_GPL(rtl83xx_remove, REALTEK_DSA);
+
+void rtl83xx_reset_assert(struct realtek_priv *priv)
+{
+ int ret;
+
+ ret = reset_control_assert(priv->reset_ctl);
+ if (ret)
+ dev_warn(priv->dev,
+ "Failed to assert the switch reset control: %pe\n",
+ ERR_PTR(ret));
+
+ gpiod_set_value(priv->reset, true);
+}
+
+void rtl83xx_reset_deassert(struct realtek_priv *priv)
+{
+ int ret;
+
+ ret = reset_control_deassert(priv->reset_ctl);
+ if (ret)
+ dev_warn(priv->dev,
+ "Failed to deassert the switch reset control: %pe\n",
+ ERR_PTR(ret));
+
+ gpiod_set_value(priv->reset, false);
+}
+
+MODULE_AUTHOR("Luiz Angelo Daros de Luca <luizluca@gmail.com>");
+MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+MODULE_DESCRIPTION("Realtek DSA switches common module");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/realtek/rtl83xx.h b/drivers/net/dsa/realtek/rtl83xx.h
new file mode 100644
index 000000000000..c8a0ff8fd75e
--- /dev/null
+++ b/drivers/net/dsa/realtek/rtl83xx.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _RTL83XX_H
+#define _RTL83XX_H
+
+struct realtek_interface_info {
+ int (*reg_read)(void *ctx, u32 reg, u32 *val);
+ int (*reg_write)(void *ctx, u32 reg, u32 val);
+};
+
+void rtl83xx_lock(void *ctx);
+void rtl83xx_unlock(void *ctx);
+int rtl83xx_setup_user_mdio(struct dsa_switch *ds);
+struct realtek_priv *
+rtl83xx_probe(struct device *dev,
+ const struct realtek_interface_info *interface_info);
+int rtl83xx_register_switch(struct realtek_priv *priv);
+void rtl83xx_unregister_switch(struct realtek_priv *priv);
+void rtl83xx_shutdown(struct realtek_priv *priv);
+void rtl83xx_remove(struct realtek_priv *priv);
+void rtl83xx_reset_assert(struct realtek_priv *priv);
+void rtl83xx_reset_deassert(struct realtek_priv *priv);
+
+#endif /* _RTL83XX_H */
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 768454aa36d6..d29b5d7af0d7 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -67,18 +67,12 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev)
static int dummy_dev_init(struct net_device *dev)
{
- dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
- if (!dev->lstats)
- return -ENOMEM;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
+ netdev_lockdep_set_classes(dev);
return 0;
}
-static void dummy_dev_uninit(struct net_device *dev)
-{
- free_percpu(dev->lstats);
-}
-
static int dummy_change_carrier(struct net_device *dev, bool new_carrier)
{
if (new_carrier)
@@ -90,7 +84,6 @@ static int dummy_change_carrier(struct net_device *dev, bool new_carrier)
static const struct net_device_ops dummy_netdev_ops = {
.ndo_init = dummy_dev_init,
- .ndo_uninit = dummy_dev_uninit,
.ndo_start_xmit = dummy_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = set_multicast_list,
diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c
index 0e0aa4016858..c5636245f1ca 100644
--- a/drivers/net/ethernet/8390/8390.c
+++ b/drivers/net/ethernet/8390/8390.c
@@ -100,4 +100,5 @@ static void __exit ns8390_module_exit(void)
module_init(ns8390_module_init);
module_exit(ns8390_module_exit);
#endif /* MODULE */
+MODULE_DESCRIPTION("National Semiconductor 8390 core driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c
index 6834742057b3..6d429b11e9c6 100644
--- a/drivers/net/ethernet/8390/8390p.c
+++ b/drivers/net/ethernet/8390/8390p.c
@@ -102,4 +102,5 @@ static void __exit NS8390p_cleanup_module(void)
module_init(NS8390p_init_module);
module_exit(NS8390p_cleanup_module);
+MODULE_DESCRIPTION("National Semiconductor 8390 core for ISA driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c
index a09f383dd249..828edca8d30c 100644
--- a/drivers/net/ethernet/8390/apne.c
+++ b/drivers/net/ethernet/8390/apne.c
@@ -610,4 +610,5 @@ static int init_pcmcia(void)
return 1;
}
+MODULE_DESCRIPTION("National Semiconductor 8390 Amiga PCMCIA ethernet driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c
index 24f49a8ff903..fd9dcdc356e6 100644
--- a/drivers/net/ethernet/8390/hydra.c
+++ b/drivers/net/ethernet/8390/hydra.c
@@ -270,4 +270,5 @@ static void __exit hydra_cleanup_module(void)
module_init(hydra_init_module);
module_exit(hydra_cleanup_module);
+MODULE_DESCRIPTION("Zorro-II Hydra 8390 ethernet driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/8390/stnic.c b/drivers/net/ethernet/8390/stnic.c
index 265976e3b64a..6cc0e190aa79 100644
--- a/drivers/net/ethernet/8390/stnic.c
+++ b/drivers/net/ethernet/8390/stnic.c
@@ -296,4 +296,5 @@ static void __exit stnic_cleanup(void)
module_init(stnic_probe);
module_exit(stnic_cleanup);
+MODULE_DESCRIPTION("National Semiconductor DP83902AV ethernet driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c
index d70390e9d03d..c24dd4fe7a10 100644
--- a/drivers/net/ethernet/8390/zorro8390.c
+++ b/drivers/net/ethernet/8390/zorro8390.c
@@ -443,4 +443,5 @@ static void __exit zorro8390_cleanup_module(void)
module_init(zorro8390_init_module);
module_exit(zorro8390_cleanup_module);
+MODULE_DESCRIPTION("Zorro NS8390-based ethernet driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 5a274b99f299..6a19b5393ed1 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -15,9 +15,6 @@ if ETHERNET
config MDIO
tristate
-config SUNGEM_PHY
- tristate
-
source "drivers/net/ethernet/3com/Kconfig"
source "drivers/net/ethernet/actions/Kconfig"
source "drivers/net/ethernet/adaptec/Kconfig"
diff --git a/drivers/net/ethernet/adi/Kconfig b/drivers/net/ethernet/adi/Kconfig
index da3bdd302502..760a9a60bc15 100644
--- a/drivers/net/ethernet/adi/Kconfig
+++ b/drivers/net/ethernet/adi/Kconfig
@@ -21,6 +21,7 @@ config ADIN1110
tristate "Analog Devices ADIN1110 MAC-PHY"
depends on SPI && NET_SWITCHDEV
select CRC8
+ select PHYLIB
help
Say yes here to build support for Analog Devices ADIN1110
Low Power 10BASE-T1L Ethernet MAC-PHY.
diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c
index d7c274af6d4d..8b4ef5121308 100644
--- a/drivers/net/ethernet/adi/adin1110.c
+++ b/drivers/net/ethernet/adi/adin1110.c
@@ -464,8 +464,9 @@ static int adin1110_mdio_read(struct mii_bus *bus, int phy_id, int reg)
* bitfield of ADIN1110_MDIOACC register will contain
* the requested register value.
*/
- ret = readx_poll_timeout(adin1110_read_mdio_acc, priv, val,
- (val & ADIN1110_MDIO_TRDONE), 10000, 30000);
+ ret = readx_poll_timeout_atomic(adin1110_read_mdio_acc, priv, val,
+ (val & ADIN1110_MDIO_TRDONE),
+ 100, 30000);
if (ret < 0)
return ret;
@@ -495,8 +496,9 @@ static int adin1110_mdio_write(struct mii_bus *bus, int phy_id,
if (ret < 0)
return ret;
- return readx_poll_timeout(adin1110_read_mdio_acc, priv, val,
- (val & ADIN1110_MDIO_TRDONE), 10000, 30000);
+ return readx_poll_timeout_atomic(adin1110_read_mdio_acc, priv, val,
+ (val & ADIN1110_MDIO_TRDONE),
+ 100, 30000);
}
/* ADIN1110 MAC-PHY contains an ADIN1100 PHY.
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 633b321d7fdd..9e9e4a03f1a8 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -90,8 +90,7 @@ static int ena_com_admin_init_sq(struct ena_com_admin_queue *admin_queue)
struct ena_com_admin_sq *sq = &admin_queue->sq;
u16 size = ADMIN_SQ_SIZE(admin_queue->q_depth);
- sq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size,
- &sq->dma_addr, GFP_KERNEL);
+ sq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size, &sq->dma_addr, GFP_KERNEL);
if (!sq->entries) {
netdev_err(ena_dev->net_device, "Memory allocation failed\n");
@@ -113,8 +112,7 @@ static int ena_com_admin_init_cq(struct ena_com_admin_queue *admin_queue)
struct ena_com_admin_cq *cq = &admin_queue->cq;
u16 size = ADMIN_CQ_SIZE(admin_queue->q_depth);
- cq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size,
- &cq->dma_addr, GFP_KERNEL);
+ cq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size, &cq->dma_addr, GFP_KERNEL);
if (!cq->entries) {
netdev_err(ena_dev->net_device, "Memory allocation failed\n");
@@ -136,8 +134,7 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *ena_dev,
ena_dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH;
size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH);
- aenq->entries = dma_alloc_coherent(ena_dev->dmadev, size,
- &aenq->dma_addr, GFP_KERNEL);
+ aenq->entries = dma_alloc_coherent(ena_dev->dmadev, size, &aenq->dma_addr, GFP_KERNEL);
if (!aenq->entries) {
netdev_err(ena_dev->net_device, "Memory allocation failed\n");
@@ -155,14 +152,13 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *ena_dev,
aenq_caps = 0;
aenq_caps |= ena_dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
- aenq_caps |= (sizeof(struct ena_admin_aenq_entry)
- << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
- ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
+ aenq_caps |=
+ (sizeof(struct ena_admin_aenq_entry) << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
+ ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
writel(aenq_caps, ena_dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF);
if (unlikely(!aenq_handlers)) {
- netdev_err(ena_dev->net_device,
- "AENQ handlers pointer is NULL\n");
+ netdev_err(ena_dev->net_device, "AENQ handlers pointer is NULL\n");
return -EINVAL;
}
@@ -189,14 +185,12 @@ static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *admin_queu
}
if (unlikely(!admin_queue->comp_ctx)) {
- netdev_err(admin_queue->ena_dev->net_device,
- "Completion context is NULL\n");
+ netdev_err(admin_queue->ena_dev->net_device, "Completion context is NULL\n");
return NULL;
}
if (unlikely(admin_queue->comp_ctx[command_id].occupied && capture)) {
- netdev_err(admin_queue->ena_dev->net_device,
- "Completion context is occupied\n");
+ netdev_err(admin_queue->ena_dev->net_device, "Completion context is occupied\n");
return NULL;
}
@@ -226,8 +220,7 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu
/* In case of queue FULL */
cnt = (u16)atomic_read(&admin_queue->outstanding_cmds);
if (cnt >= admin_queue->q_depth) {
- netdev_dbg(admin_queue->ena_dev->net_device,
- "Admin queue is full.\n");
+ netdev_dbg(admin_queue->ena_dev->net_device, "Admin queue is full.\n");
admin_queue->stats.out_of_space++;
return ERR_PTR(-ENOSPC);
}
@@ -274,8 +267,7 @@ static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *admin_queue)
struct ena_comp_ctx *comp_ctx;
u16 i;
- admin_queue->comp_ctx =
- devm_kzalloc(admin_queue->q_dmadev, size, GFP_KERNEL);
+ admin_queue->comp_ctx = devm_kzalloc(admin_queue->q_dmadev, size, GFP_KERNEL);
if (unlikely(!admin_queue->comp_ctx)) {
netdev_err(ena_dev->net_device, "Memory allocation failed\n");
return -ENOMEM;
@@ -336,20 +328,17 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
dev_node = dev_to_node(ena_dev->dmadev);
set_dev_node(ena_dev->dmadev, ctx->numa_node);
io_sq->desc_addr.virt_addr =
- dma_alloc_coherent(ena_dev->dmadev, size,
- &io_sq->desc_addr.phys_addr,
+ dma_alloc_coherent(ena_dev->dmadev, size, &io_sq->desc_addr.phys_addr,
GFP_KERNEL);
set_dev_node(ena_dev->dmadev, dev_node);
if (!io_sq->desc_addr.virt_addr) {
io_sq->desc_addr.virt_addr =
dma_alloc_coherent(ena_dev->dmadev, size,
- &io_sq->desc_addr.phys_addr,
- GFP_KERNEL);
+ &io_sq->desc_addr.phys_addr, GFP_KERNEL);
}
if (!io_sq->desc_addr.virt_addr) {
- netdev_err(ena_dev->net_device,
- "Memory allocation failed\n");
+ netdev_err(ena_dev->net_device, "Memory allocation failed\n");
return -ENOMEM;
}
}
@@ -367,16 +356,14 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
dev_node = dev_to_node(ena_dev->dmadev);
set_dev_node(ena_dev->dmadev, ctx->numa_node);
- io_sq->bounce_buf_ctrl.base_buffer =
- devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+ io_sq->bounce_buf_ctrl.base_buffer = devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
set_dev_node(ena_dev->dmadev, dev_node);
if (!io_sq->bounce_buf_ctrl.base_buffer)
io_sq->bounce_buf_ctrl.base_buffer =
devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
if (!io_sq->bounce_buf_ctrl.base_buffer) {
- netdev_err(ena_dev->net_device,
- "Bounce buffer memory allocation failed\n");
+ netdev_err(ena_dev->net_device, "Bounce buffer memory allocation failed\n");
return -ENOMEM;
}
@@ -425,13 +412,11 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
prev_node = dev_to_node(ena_dev->dmadev);
set_dev_node(ena_dev->dmadev, ctx->numa_node);
io_cq->cdesc_addr.virt_addr =
- dma_alloc_coherent(ena_dev->dmadev, size,
- &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
+ dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
set_dev_node(ena_dev->dmadev, prev_node);
if (!io_cq->cdesc_addr.virt_addr) {
io_cq->cdesc_addr.virt_addr =
- dma_alloc_coherent(ena_dev->dmadev, size,
- &io_cq->cdesc_addr.phys_addr,
+ dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr,
GFP_KERNEL);
}
@@ -514,8 +499,8 @@ static int ena_com_comp_status_to_errno(struct ena_com_admin_queue *admin_queue,
u8 comp_status)
{
if (unlikely(comp_status != 0))
- netdev_err(admin_queue->ena_dev->net_device,
- "Admin command failed[%u]\n", comp_status);
+ netdev_err(admin_queue->ena_dev->net_device, "Admin command failed[%u]\n",
+ comp_status);
switch (comp_status) {
case ENA_ADMIN_SUCCESS:
@@ -580,8 +565,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c
}
if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) {
- netdev_err(admin_queue->ena_dev->net_device,
- "Command was aborted\n");
+ netdev_err(admin_queue->ena_dev->net_device, "Command was aborted\n");
spin_lock_irqsave(&admin_queue->q_lock, flags);
admin_queue->stats.aborted_cmd++;
spin_unlock_irqrestore(&admin_queue->q_lock, flags);
@@ -589,8 +573,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c
goto err;
}
- WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n",
- comp_ctx->status);
+ WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n", comp_ctx->status);
ret = ena_com_comp_status_to_errno(admin_queue, comp_ctx->comp_status);
err:
@@ -634,8 +617,7 @@ static int ena_com_set_llq(struct ena_com_dev *ena_dev)
sizeof(resp));
if (unlikely(ret))
- netdev_err(ena_dev->net_device,
- "Failed to set LLQ configurations: %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to set LLQ configurations: %d\n", ret);
return ret;
}
@@ -658,8 +640,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
llq_default_cfg->llq_header_location;
} else {
netdev_err(ena_dev->net_device,
- "Invalid header location control, supported: 0x%x\n",
- supported_feat);
+ "Invalid header location control, supported: 0x%x\n", supported_feat);
return -EINVAL;
}
@@ -681,8 +662,8 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
netdev_err(ena_dev->net_device,
"Default llq stride ctrl is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
- llq_default_cfg->llq_stride_ctrl,
- supported_feat, llq_info->desc_stride_ctrl);
+ llq_default_cfg->llq_stride_ctrl, supported_feat,
+ llq_info->desc_stride_ctrl);
}
} else {
llq_info->desc_stride_ctrl = 0;
@@ -704,8 +685,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
llq_info->desc_list_entry_size = 256;
} else {
netdev_err(ena_dev->net_device,
- "Invalid entry_size_ctrl, supported: 0x%x\n",
- supported_feat);
+ "Invalid entry_size_ctrl, supported: 0x%x\n", supported_feat);
return -EINVAL;
}
@@ -750,8 +730,8 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
netdev_err(ena_dev->net_device,
"Default llq num descs before header is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
- llq_default_cfg->llq_num_decs_before_header,
- supported_feat, llq_info->descs_num_before_header);
+ llq_default_cfg->llq_num_decs_before_header, supported_feat,
+ llq_info->descs_num_before_header);
}
/* Check for accelerated queue supported */
llq_accel_mode_get = llq_features->accel_mode.u.get;
@@ -767,8 +747,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
rc = ena_com_set_llq(ena_dev);
if (rc)
- netdev_err(ena_dev->net_device,
- "Cannot set LLQ configuration: %d\n", rc);
+ netdev_err(ena_dev->net_device, "Cannot set LLQ configuration: %d\n", rc);
return rc;
}
@@ -780,8 +759,7 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com
int ret;
wait_for_completion_timeout(&comp_ctx->wait_event,
- usecs_to_jiffies(
- admin_queue->completion_timeout));
+ usecs_to_jiffies(admin_queue->completion_timeout));
/* In case the command wasn't completed find out the root cause.
* There might be 2 kinds of errors
@@ -797,8 +775,7 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com
if (comp_ctx->status == ENA_CMD_COMPLETED) {
netdev_err(admin_queue->ena_dev->net_device,
"The ena device sent a completion but the driver didn't receive a MSI-X interrupt (cmd %d), autopolling mode is %s\n",
- comp_ctx->cmd_opcode,
- admin_queue->auto_polling ? "ON" : "OFF");
+ comp_ctx->cmd_opcode, admin_queue->auto_polling ? "ON" : "OFF");
/* Check if fallback to polling is enabled */
if (admin_queue->auto_polling)
admin_queue->polling = true;
@@ -867,15 +844,13 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
if (unlikely(i == timeout)) {
netdev_err(ena_dev->net_device,
"Reading reg failed for timeout. expected: req id[%u] offset[%u] actual: req id[%u] offset[%u]\n",
- mmio_read->seq_num, offset, read_resp->req_id,
- read_resp->reg_off);
+ mmio_read->seq_num, offset, read_resp->req_id, read_resp->reg_off);
ret = ENA_MMIO_READ_TIMEOUT;
goto err;
}
if (read_resp->reg_off != offset) {
- netdev_err(ena_dev->net_device,
- "Read failure: wrong offset provided\n");
+ netdev_err(ena_dev->net_device, "Read failure: wrong offset provided\n");
ret = ENA_MMIO_READ_TIMEOUT;
} else {
ret = read_resp->reg_val;
@@ -934,8 +909,7 @@ static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev,
sizeof(destroy_resp));
if (unlikely(ret && (ret != -ENODEV)))
- netdev_err(ena_dev->net_device,
- "Failed to destroy io sq error: %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to destroy io sq error: %d\n", ret);
return ret;
}
@@ -949,8 +923,7 @@ static void ena_com_io_queue_free(struct ena_com_dev *ena_dev,
if (io_cq->cdesc_addr.virt_addr) {
size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
- dma_free_coherent(ena_dev->dmadev, size,
- io_cq->cdesc_addr.virt_addr,
+ dma_free_coherent(ena_dev->dmadev, size, io_cq->cdesc_addr.virt_addr,
io_cq->cdesc_addr.phys_addr);
io_cq->cdesc_addr.virt_addr = NULL;
@@ -959,8 +932,7 @@ static void ena_com_io_queue_free(struct ena_com_dev *ena_dev,
if (io_sq->desc_addr.virt_addr) {
size = io_sq->desc_entry_size * io_sq->q_depth;
- dma_free_coherent(ena_dev->dmadev, size,
- io_sq->desc_addr.virt_addr,
+ dma_free_coherent(ena_dev->dmadev, size, io_sq->desc_addr.virt_addr,
io_sq->desc_addr.phys_addr);
io_sq->desc_addr.virt_addr = NULL;
@@ -985,8 +957,7 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) {
- netdev_err(ena_dev->net_device,
- "Reg read timeout occurred\n");
+ netdev_err(ena_dev->net_device, "Reg read timeout occurred\n");
return -ETIME;
}
@@ -1026,8 +997,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
int ret;
if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) {
- netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
- feature_id);
+ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", feature_id);
return -EOPNOTSUPP;
}
@@ -1064,8 +1034,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
if (unlikely(ret))
netdev_err(ena_dev->net_device,
- "Failed to submit get_feature command %d error: %d\n",
- feature_id, ret);
+ "Failed to submit get_feature command %d error: %d\n", feature_id, ret);
return ret;
}
@@ -1104,13 +1073,11 @@ static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
{
struct ena_rss *rss = &ena_dev->rss;
- if (!ena_com_check_supported_feature_id(ena_dev,
- ENA_ADMIN_RSS_HASH_FUNCTION))
+ if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_HASH_FUNCTION))
return -EOPNOTSUPP;
- rss->hash_key =
- dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
- &rss->hash_key_dma_addr, GFP_KERNEL);
+ rss->hash_key = dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
+ &rss->hash_key_dma_addr, GFP_KERNEL);
if (unlikely(!rss->hash_key))
return -ENOMEM;
@@ -1123,8 +1090,8 @@ static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev)
struct ena_rss *rss = &ena_dev->rss;
if (rss->hash_key)
- dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
- rss->hash_key, rss->hash_key_dma_addr);
+ dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), rss->hash_key,
+ rss->hash_key_dma_addr);
rss->hash_key = NULL;
}
@@ -1132,9 +1099,8 @@ static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev)
{
struct ena_rss *rss = &ena_dev->rss;
- rss->hash_ctrl =
- dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
- &rss->hash_ctrl_dma_addr, GFP_KERNEL);
+ rss->hash_ctrl = dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
+ &rss->hash_ctrl_dma_addr, GFP_KERNEL);
if (unlikely(!rss->hash_ctrl))
return -ENOMEM;
@@ -1147,8 +1113,8 @@ static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev)
struct ena_rss *rss = &ena_dev->rss;
if (rss->hash_ctrl)
- dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
- rss->hash_ctrl, rss->hash_ctrl_dma_addr);
+ dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), rss->hash_ctrl,
+ rss->hash_ctrl_dma_addr);
rss->hash_ctrl = NULL;
}
@@ -1177,15 +1143,13 @@ static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev,
tbl_size = (1ULL << log_size) *
sizeof(struct ena_admin_rss_ind_table_entry);
- rss->rss_ind_tbl =
- dma_alloc_coherent(ena_dev->dmadev, tbl_size,
- &rss->rss_ind_tbl_dma_addr, GFP_KERNEL);
+ rss->rss_ind_tbl = dma_alloc_coherent(ena_dev->dmadev, tbl_size, &rss->rss_ind_tbl_dma_addr,
+ GFP_KERNEL);
if (unlikely(!rss->rss_ind_tbl))
goto mem_err1;
tbl_size = (1ULL << log_size) * sizeof(u16);
- rss->host_rss_ind_tbl =
- devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL);
+ rss->host_rss_ind_tbl = devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL);
if (unlikely(!rss->host_rss_ind_tbl))
goto mem_err2;
@@ -1197,8 +1161,7 @@ mem_err2:
tbl_size = (1ULL << log_size) *
sizeof(struct ena_admin_rss_ind_table_entry);
- dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl,
- rss->rss_ind_tbl_dma_addr);
+ dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, rss->rss_ind_tbl_dma_addr);
rss->rss_ind_tbl = NULL;
mem_err1:
rss->tbl_log_size = 0;
@@ -1261,8 +1224,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
&create_cmd.sq_ba,
io_sq->desc_addr.phys_addr);
if (unlikely(ret)) {
- netdev_err(ena_dev->net_device,
- "Memory address set failed\n");
+ netdev_err(ena_dev->net_device, "Memory address set failed\n");
return ret;
}
}
@@ -1273,8 +1235,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
(struct ena_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (unlikely(ret)) {
- netdev_err(ena_dev->net_device,
- "Failed to create IO SQ. error: %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to create IO SQ. error: %d\n", ret);
return ret;
}
@@ -1284,16 +1245,12 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
(uintptr_t)cmd_completion.sq_doorbell_offset);
if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
- io_sq->header_addr = (u8 __iomem *)((uintptr_t)ena_dev->mem_bar
- + cmd_completion.llq_headers_offset);
-
io_sq->desc_addr.pbuf_dev_addr =
(u8 __iomem *)((uintptr_t)ena_dev->mem_bar +
cmd_completion.llq_descriptors_offset);
}
- netdev_dbg(ena_dev->net_device, "Created sq[%u], depth[%u]\n",
- io_sq->idx, io_sq->q_depth);
+ netdev_dbg(ena_dev->net_device, "Created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth);
return ret;
}
@@ -1420,8 +1377,7 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
(struct ena_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (unlikely(ret)) {
- netdev_err(ena_dev->net_device,
- "Failed to create IO CQ. error: %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to create IO CQ. error: %d\n", ret);
return ret;
}
@@ -1430,18 +1386,12 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
io_cq->unmask_reg = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
cmd_completion.cq_interrupt_unmask_register_offset);
- if (cmd_completion.cq_head_db_register_offset)
- io_cq->cq_head_db_reg =
- (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
- cmd_completion.cq_head_db_register_offset);
-
if (cmd_completion.numa_node_register_offset)
io_cq->numa_node_cfg_reg =
(u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
cmd_completion.numa_node_register_offset);
- netdev_dbg(ena_dev->net_device, "Created cq[%u], depth[%u]\n",
- io_cq->idx, io_cq->q_depth);
+ netdev_dbg(ena_dev->net_device, "Created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth);
return ret;
}
@@ -1451,8 +1401,7 @@ int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid,
struct ena_com_io_cq **io_cq)
{
if (qid >= ENA_TOTAL_NUM_QUEUES) {
- netdev_err(ena_dev->net_device,
- "Invalid queue number %d but the max is %d\n", qid,
+ netdev_err(ena_dev->net_device, "Invalid queue number %d but the max is %d\n", qid,
ENA_TOTAL_NUM_QUEUES);
return -EINVAL;
}
@@ -1492,8 +1441,7 @@ void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev)
spin_lock_irqsave(&admin_queue->q_lock, flags);
while (atomic_read(&admin_queue->outstanding_cmds) != 0) {
spin_unlock_irqrestore(&admin_queue->q_lock, flags);
- ena_delay_exponential_backoff_us(exp++,
- ena_dev->ena_min_poll_delay_us);
+ ena_delay_exponential_backoff_us(exp++, ena_dev->ena_min_poll_delay_us);
spin_lock_irqsave(&admin_queue->q_lock, flags);
}
spin_unlock_irqrestore(&admin_queue->q_lock, flags);
@@ -1519,8 +1467,7 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
sizeof(destroy_resp));
if (unlikely(ret && (ret != -ENODEV)))
- netdev_err(ena_dev->net_device,
- "Failed to destroy IO CQ. error: %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to destroy IO CQ. error: %d\n", ret);
return ret;
}
@@ -1588,8 +1535,7 @@ int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag)
sizeof(resp));
if (unlikely(ret))
- netdev_err(ena_dev->net_device,
- "Failed to config AENQ ret: %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to config AENQ ret: %d\n", ret);
return ret;
}
@@ -1610,8 +1556,7 @@ int ena_com_get_dma_width(struct ena_com_dev *ena_dev)
netdev_dbg(ena_dev->net_device, "ENA dma width: %d\n", width);
if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) {
- netdev_err(ena_dev->net_device, "DMA width illegal value: %d\n",
- width);
+ netdev_err(ena_dev->net_device, "DMA width illegal value: %d\n", width);
return -EINVAL;
}
@@ -1633,19 +1578,16 @@ int ena_com_validate_version(struct ena_com_dev *ena_dev)
ctrl_ver = ena_com_reg_bar_read32(ena_dev,
ENA_REGS_CONTROLLER_VERSION_OFF);
- if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) ||
- (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) {
+ if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) || (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) {
netdev_err(ena_dev->net_device, "Reg read timeout occurred\n");
return -ETIME;
}
dev_info(ena_dev->dmadev, "ENA device version: %d.%d\n",
- (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >>
- ENA_REGS_VERSION_MAJOR_VERSION_SHIFT,
+ (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >> ENA_REGS_VERSION_MAJOR_VERSION_SHIFT,
ver & ENA_REGS_VERSION_MINOR_VERSION_MASK);
- dev_info(ena_dev->dmadev,
- "ENA controller version: %d.%d.%d implementation version %d\n",
+ dev_info(ena_dev->dmadev, "ENA controller version: %d.%d.%d implementation version %d\n",
(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
@@ -1694,20 +1636,17 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
size = ADMIN_SQ_SIZE(admin_queue->q_depth);
if (sq->entries)
- dma_free_coherent(ena_dev->dmadev, size, sq->entries,
- sq->dma_addr);
+ dma_free_coherent(ena_dev->dmadev, size, sq->entries, sq->dma_addr);
sq->entries = NULL;
size = ADMIN_CQ_SIZE(admin_queue->q_depth);
if (cq->entries)
- dma_free_coherent(ena_dev->dmadev, size, cq->entries,
- cq->dma_addr);
+ dma_free_coherent(ena_dev->dmadev, size, cq->entries, cq->dma_addr);
cq->entries = NULL;
size = ADMIN_AENQ_SIZE(aenq->q_depth);
if (ena_dev->aenq.entries)
- dma_free_coherent(ena_dev->dmadev, size, aenq->entries,
- aenq->dma_addr);
+ dma_free_coherent(ena_dev->dmadev, size, aenq->entries, aenq->dma_addr);
aenq->entries = NULL;
}
@@ -1733,10 +1672,8 @@ int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev)
struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
spin_lock_init(&mmio_read->lock);
- mmio_read->read_resp =
- dma_alloc_coherent(ena_dev->dmadev,
- sizeof(*mmio_read->read_resp),
- &mmio_read->read_resp_dma_addr, GFP_KERNEL);
+ mmio_read->read_resp = dma_alloc_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp),
+ &mmio_read->read_resp_dma_addr, GFP_KERNEL);
if (unlikely(!mmio_read->read_resp))
goto err;
@@ -1767,8 +1704,8 @@ void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev)
writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF);
writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF);
- dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp),
- mmio_read->read_resp, mmio_read->read_resp_dma_addr);
+ dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), mmio_read->read_resp,
+ mmio_read->read_resp_dma_addr);
mmio_read->read_resp = NULL;
}
@@ -1800,8 +1737,7 @@ int ena_com_admin_init(struct ena_com_dev *ena_dev,
}
if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) {
- netdev_err(ena_dev->net_device,
- "Device isn't ready, abort com init\n");
+ netdev_err(ena_dev->net_device, "Device isn't ready, abort com init\n");
return -ENODEV;
}
@@ -1878,8 +1814,7 @@ int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
int ret;
if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) {
- netdev_err(ena_dev->net_device,
- "Qid (%d) is bigger than max num of queues (%d)\n",
+ netdev_err(ena_dev->net_device, "Qid (%d) is bigger than max num of queues (%d)\n",
ctx->qid, ENA_TOTAL_NUM_QUEUES);
return -EINVAL;
}
@@ -1905,8 +1840,7 @@ int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
/* header length is limited to 8 bits */
- io_sq->tx_max_header_size =
- min_t(u32, ena_dev->tx_max_header_size, SZ_256);
+ io_sq->tx_max_header_size = min_t(u32, ena_dev->tx_max_header_size, SZ_256);
ret = ena_com_init_io_sq(ena_dev, ctx, io_sq);
if (ret)
@@ -1938,8 +1872,7 @@ void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid)
struct ena_com_io_cq *io_cq;
if (qid >= ENA_TOTAL_NUM_QUEUES) {
- netdev_err(ena_dev->net_device,
- "Qid (%d) is bigger than max num of queues (%d)\n",
+ netdev_err(ena_dev->net_device, "Qid (%d) is bigger than max num of queues (%d)\n",
qid, ENA_TOTAL_NUM_QUEUES);
return;
}
@@ -1983,8 +1916,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
if (rc)
return rc;
- if (get_resp.u.max_queue_ext.version !=
- ENA_FEATURE_MAX_QUEUE_EXT_VER)
+ if (get_resp.u.max_queue_ext.version != ENA_FEATURE_MAX_QUEUE_EXT_VER)
return -EINVAL;
memcpy(&get_feat_ctx->max_queue_ext, &get_resp.u.max_queue_ext,
@@ -2025,18 +1957,15 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS, 0);
if (!rc)
- memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints,
- sizeof(get_resp.u.hw_hints));
+ memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints, sizeof(get_resp.u.hw_hints));
else if (rc == -EOPNOTSUPP)
- memset(&get_feat_ctx->hw_hints, 0x0,
- sizeof(get_feat_ctx->hw_hints));
+ memset(&get_feat_ctx->hw_hints, 0x0, sizeof(get_feat_ctx->hw_hints));
else
return rc;
rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ, 0);
if (!rc)
- memcpy(&get_feat_ctx->llq, &get_resp.u.llq,
- sizeof(get_resp.u.llq));
+ memcpy(&get_feat_ctx->llq, &get_resp.u.llq, sizeof(get_resp.u.llq));
else if (rc == -EOPNOTSUPP)
memset(&get_feat_ctx->llq, 0x0, sizeof(get_feat_ctx->llq));
else
@@ -2084,8 +2013,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data)
aenq_common = &aenq_e->aenq_common_desc;
/* Go over all the events */
- while ((READ_ONCE(aenq_common->flags) &
- ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
+ while ((READ_ONCE(aenq_common->flags) & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
/* Make sure the phase bit (ownership) is as expected before
* reading the rest of the descriptor.
*/
@@ -2094,8 +2022,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data)
timestamp = (u64)aenq_common->timestamp_low |
((u64)aenq_common->timestamp_high << 32);
- netdev_dbg(ena_dev->net_device,
- "AENQ! Group[%x] Syndrome[%x] timestamp: [%llus]\n",
+ netdev_dbg(ena_dev->net_device, "AENQ! Group[%x] Syndrome[%x] timestamp: [%llus]\n",
aenq_common->group, aenq_common->syndrome, timestamp);
/* Handle specific event*/
@@ -2124,8 +2051,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data)
/* write the aenq doorbell after all AENQ descriptors were read */
mb();
- writel_relaxed((u32)aenq->head,
- ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+ writel_relaxed((u32)aenq->head, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
}
int ena_com_dev_reset(struct ena_com_dev *ena_dev,
@@ -2137,15 +2063,13 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev,
stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF);
- if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) ||
- (cap == ENA_MMIO_READ_TIMEOUT))) {
+ if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) || (cap == ENA_MMIO_READ_TIMEOUT))) {
netdev_err(ena_dev->net_device, "Reg read32 timeout occurred\n");
return -ETIME;
}
if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) {
- netdev_err(ena_dev->net_device,
- "Device isn't ready, can't reset device\n");
+ netdev_err(ena_dev->net_device, "Device isn't ready, can't reset device\n");
return -EINVAL;
}
@@ -2168,8 +2092,7 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev,
rc = wait_for_reset_state(ena_dev, timeout,
ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
if (rc != 0) {
- netdev_err(ena_dev->net_device,
- "Reset indication didn't turn on\n");
+ netdev_err(ena_dev->net_device, "Reset indication didn't turn on\n");
return rc;
}
@@ -2177,8 +2100,7 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev,
writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF);
rc = wait_for_reset_state(ena_dev, timeout, 0);
if (rc != 0) {
- netdev_err(ena_dev->net_device,
- "Reset indication didn't turn off\n");
+ netdev_err(ena_dev->net_device, "Reset indication didn't turn off\n");
return rc;
}
@@ -2215,8 +2137,7 @@ static int ena_get_dev_stats(struct ena_com_dev *ena_dev,
sizeof(*get_resp));
if (unlikely(ret))
- netdev_err(ena_dev->net_device,
- "Failed to get stats. error: %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to get stats. error: %d\n", ret);
return ret;
}
@@ -2228,8 +2149,7 @@ int ena_com_get_eni_stats(struct ena_com_dev *ena_dev,
int ret;
if (!ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) {
- netdev_err(ena_dev->net_device,
- "Capability %d isn't supported\n",
+ netdev_err(ena_dev->net_device, "Capability %d isn't supported\n",
ENA_ADMIN_ENI_STATS);
return -EOPNOTSUPP;
}
@@ -2266,8 +2186,7 @@ int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, u32 mtu)
int ret;
if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) {
- netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
- ENA_ADMIN_MTU);
+ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", ENA_ADMIN_MTU);
return -EOPNOTSUPP;
}
@@ -2286,8 +2205,7 @@ int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, u32 mtu)
sizeof(resp));
if (unlikely(ret))
- netdev_err(ena_dev->net_device,
- "Failed to set mtu %d. error: %d\n", mtu, ret);
+ netdev_err(ena_dev->net_device, "Failed to set mtu %d. error: %d\n", mtu, ret);
return ret;
}
@@ -2301,8 +2219,7 @@ int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
ret = ena_com_get_feature(ena_dev, &resp,
ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0);
if (unlikely(ret)) {
- netdev_err(ena_dev->net_device,
- "Failed to get offload capabilities %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to get offload capabilities %d\n", ret);
return ret;
}
@@ -2320,8 +2237,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
struct ena_admin_get_feat_resp get_resp;
int ret;
- if (!ena_com_check_supported_feature_id(ena_dev,
- ENA_ADMIN_RSS_HASH_FUNCTION)) {
+ if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_HASH_FUNCTION)) {
netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
ENA_ADMIN_RSS_HASH_FUNCTION);
return -EOPNOTSUPP;
@@ -2334,8 +2250,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
return ret;
if (!(get_resp.u.flow_hash_func.supported_func & BIT(rss->hash_func))) {
- netdev_err(ena_dev->net_device,
- "Func hash %d isn't supported by device, abort\n",
+ netdev_err(ena_dev->net_device, "Func hash %d isn't supported by device, abort\n",
rss->hash_func);
return -EOPNOTSUPP;
}
@@ -2365,8 +2280,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
(struct ena_admin_acq_entry *)&resp,
sizeof(resp));
if (unlikely(ret)) {
- netdev_err(ena_dev->net_device,
- "Failed to set hash function %d. error: %d\n",
+ netdev_err(ena_dev->net_device, "Failed to set hash function %d. error: %d\n",
rss->hash_func, ret);
return -EINVAL;
}
@@ -2398,16 +2312,15 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
return rc;
if (!(BIT(func) & get_resp.u.flow_hash_func.supported_func)) {
- netdev_err(ena_dev->net_device,
- "Flow hash function %d isn't supported\n", func);
+ netdev_err(ena_dev->net_device, "Flow hash function %d isn't supported\n", func);
return -EOPNOTSUPP;
}
if ((func == ENA_ADMIN_TOEPLITZ) && key) {
if (key_len != sizeof(hash_key->key)) {
netdev_err(ena_dev->net_device,
- "key len (%u) doesn't equal the supported size (%zu)\n",
- key_len, sizeof(hash_key->key));
+ "key len (%u) doesn't equal the supported size (%zu)\n", key_len,
+ sizeof(hash_key->key));
return -EINVAL;
}
memcpy(hash_key->key, key, key_len);
@@ -2495,8 +2408,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev)
struct ena_admin_set_feat_resp resp;
int ret;
- if (!ena_com_check_supported_feature_id(ena_dev,
- ENA_ADMIN_RSS_HASH_INPUT)) {
+ if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_HASH_INPUT)) {
netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
ENA_ADMIN_RSS_HASH_INPUT);
return -EOPNOTSUPP;
@@ -2527,8 +2439,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev)
(struct ena_admin_acq_entry *)&resp,
sizeof(resp));
if (unlikely(ret))
- netdev_err(ena_dev->net_device,
- "Failed to set hash input. error: %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to set hash input. error: %d\n", ret);
return ret;
}
@@ -2605,8 +2516,7 @@ int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev,
int rc;
if (proto >= ENA_ADMIN_RSS_PROTO_NUM) {
- netdev_err(ena_dev->net_device, "Invalid proto num (%u)\n",
- proto);
+ netdev_err(ena_dev->net_device, "Invalid proto num (%u)\n", proto);
return -EINVAL;
}
@@ -2658,8 +2568,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev)
struct ena_admin_set_feat_resp resp;
int ret;
- if (!ena_com_check_supported_feature_id(
- ena_dev, ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG)) {
+ if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG)) {
netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG);
return -EOPNOTSUPP;
@@ -2699,8 +2608,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev)
sizeof(resp));
if (unlikely(ret))
- netdev_err(ena_dev->net_device,
- "Failed to set indirect table. error: %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to set indirect table. error: %d\n", ret);
return ret;
}
@@ -2779,9 +2687,8 @@ int ena_com_allocate_host_info(struct ena_com_dev *ena_dev)
{
struct ena_host_attribute *host_attr = &ena_dev->host_attr;
- host_attr->host_info =
- dma_alloc_coherent(ena_dev->dmadev, SZ_4K,
- &host_attr->host_info_dma_addr, GFP_KERNEL);
+ host_attr->host_info = dma_alloc_coherent(ena_dev->dmadev, SZ_4K,
+ &host_attr->host_info_dma_addr, GFP_KERNEL);
if (unlikely(!host_attr->host_info))
return -ENOMEM;
@@ -2827,8 +2734,7 @@ void ena_com_delete_debug_area(struct ena_com_dev *ena_dev)
if (host_attr->debug_area_virt_addr) {
dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size,
- host_attr->debug_area_virt_addr,
- host_attr->debug_area_dma_addr);
+ host_attr->debug_area_virt_addr, host_attr->debug_area_dma_addr);
host_attr->debug_area_virt_addr = NULL;
}
}
@@ -2877,8 +2783,7 @@ int ena_com_set_host_attributes(struct ena_com_dev *ena_dev)
sizeof(resp));
if (unlikely(ret))
- netdev_err(ena_dev->net_device,
- "Failed to set host attributes: %d\n", ret);
+ netdev_err(ena_dev->net_device, "Failed to set host attributes: %d\n", ret);
return ret;
}
@@ -2896,8 +2801,7 @@ static int ena_com_update_nonadaptive_moderation_interval(struct ena_com_dev *en
u32 *intr_moder_interval)
{
if (!intr_delay_resolution) {
- netdev_err(ena_dev->net_device,
- "Illegal interrupt delay granularity value\n");
+ netdev_err(ena_dev->net_device, "Illegal interrupt delay granularity value\n");
return -EFAULT;
}
@@ -2935,14 +2839,12 @@ int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
if (rc) {
if (rc == -EOPNOTSUPP) {
- netdev_dbg(ena_dev->net_device,
- "Feature %d isn't supported\n",
+ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
ENA_ADMIN_INTERRUPT_MODERATION);
rc = 0;
} else {
netdev_err(ena_dev->net_device,
- "Failed to get interrupt moderation admin cmd. rc: %d\n",
- rc);
+ "Failed to get interrupt moderation admin cmd. rc: %d\n", rc);
}
/* no moderation supported, disable adaptive support */
@@ -2990,8 +2892,7 @@ int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
(llq_info->descs_num_before_header * sizeof(struct ena_eth_io_tx_desc));
if (unlikely(ena_dev->tx_max_header_size == 0)) {
- netdev_err(ena_dev->net_device,
- "The size of the LLQ entry is smaller than needed\n");
+ netdev_err(ena_dev->net_device, "The size of the LLQ entry is smaller than needed\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 3c5081d9d25d..fea57eb8e58b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -109,16 +109,13 @@ struct ena_com_io_cq {
/* Interrupt unmask register */
u32 __iomem *unmask_reg;
- /* The completion queue head doorbell register */
- u32 __iomem *cq_head_db_reg;
-
/* numa configuration register (for TPH) */
u32 __iomem *numa_node_cfg_reg;
/* The value to write to the above register to unmask
* the interrupt of this queue
*/
- u32 msix_vector;
+ u32 msix_vector ____cacheline_aligned;
enum queue_direction direction;
@@ -134,7 +131,6 @@ struct ena_com_io_cq {
/* Device queue index */
u16 idx;
u16 head;
- u16 last_head_update;
u8 phase;
u8 cdesc_entry_size_in_bytes;
@@ -158,7 +154,6 @@ struct ena_com_io_sq {
struct ena_com_io_desc_addr desc_addr;
u32 __iomem *db_addr;
- u8 __iomem *header_addr;
enum queue_direction direction;
enum ena_admin_placement_policy_type mem_queue_type;
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index f9f886289b97..933e619b3a31 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -18,8 +18,7 @@ static struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr
+ (head_masked * io_cq->cdesc_entry_size_in_bytes));
- desc_phase = (READ_ONCE(cdesc->status) &
- ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
+ desc_phase = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
if (desc_phase != expected_phase)
@@ -65,8 +64,8 @@ static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq,
io_sq->entries_in_tx_burst_left--;
netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
- "Decreasing entries_in_tx_burst_left of queue %d to %d\n",
- io_sq->qid, io_sq->entries_in_tx_burst_left);
+ "Decreasing entries_in_tx_burst_left of queue %d to %d\n", io_sq->qid,
+ io_sq->entries_in_tx_burst_left);
}
/* Make sure everything was written into the bounce buffer before
@@ -75,8 +74,8 @@ static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq,
wmb();
/* The line is completed. Copy it to dev */
- __iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset,
- bounce_buffer, (llq_info->desc_list_entry_size) / 8);
+ __iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset, bounce_buffer,
+ (llq_info->desc_list_entry_size) / 8);
io_sq->tail++;
@@ -102,16 +101,14 @@ static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
header_offset =
llq_info->descs_num_before_header * io_sq->desc_entry_size;
- if (unlikely((header_offset + header_len) >
- llq_info->desc_list_entry_size)) {
+ if (unlikely((header_offset + header_len) > llq_info->desc_list_entry_size)) {
netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
"Trying to write header larger than llq entry can accommodate\n");
return -EFAULT;
}
if (unlikely(!bounce_buffer)) {
- netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
- "Bounce buffer is NULL\n");
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, "Bounce buffer is NULL\n");
return -EFAULT;
}
@@ -129,8 +126,7 @@ static void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
bounce_buffer = pkt_ctrl->curr_bounce_buf;
if (unlikely(!bounce_buffer)) {
- netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
- "Bounce buffer is NULL\n");
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, "Bounce buffer is NULL\n");
return NULL;
}
@@ -247,8 +243,7 @@ static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
ena_com_cq_inc_head(io_cq);
count++;
- last = (READ_ONCE(cdesc->status) &
- ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
+ last = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT;
} while (!last);
@@ -369,9 +364,8 @@ static void ena_com_rx_set_flags(struct ena_com_io_cq *io_cq,
netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
"l3_proto %d l4_proto %d l3_csum_err %d l4_csum_err %d hash %d frag %d cdesc_status %x\n",
- ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto,
- ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err,
- ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status);
+ ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto, ena_rx_ctx->l3_csum_err,
+ ena_rx_ctx->l4_csum_err, ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status);
}
/*****************************************************************************/
@@ -403,13 +397,12 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
if (unlikely(header_len > io_sq->tx_max_header_size)) {
netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
- "Header size is too large %d max header: %d\n",
- header_len, io_sq->tx_max_header_size);
+ "Header size is too large %d max header: %d\n", header_len,
+ io_sq->tx_max_header_size);
return -EINVAL;
}
- if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV &&
- !buffer_to_push)) {
+ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && !buffer_to_push)) {
netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
"Push header wasn't provided in LLQ mode\n");
return -EINVAL;
@@ -556,13 +549,11 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
}
netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
- "Fetch rx packet: queue %d completed desc: %d\n", io_cq->qid,
- nb_hw_desc);
+ "Fetch rx packet: queue %d completed desc: %d\n", io_cq->qid, nb_hw_desc);
if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) {
netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
- "Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc,
- ena_rx_ctx->max_bufs);
+ "Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc, ena_rx_ctx->max_bufs);
return -ENOSPC;
}
@@ -586,8 +577,8 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
io_sq->next_to_comp += nb_hw_desc;
netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
- "[%s][QID#%d] Updating SQ head to: %d\n", __func__,
- io_sq->qid, io_sq->next_to_comp);
+ "[%s][QID#%d] Updating SQ head to: %d\n", __func__, io_sq->qid,
+ io_sq->next_to_comp);
/* Get rx flags from the last pkt */
ena_com_rx_set_flags(io_cq, ena_rx_ctx, cdesc);
@@ -624,8 +615,8 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
desc->req_id = req_id;
netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
- "[%s] Adding single RX desc, Queue: %u, req_id: %u\n",
- __func__, io_sq->qid, req_id);
+ "[%s] Adding single RX desc, Queue: %u, req_id: %u\n", __func__, io_sq->qid,
+ req_id);
desc->buff_addr_lo = (u32)ena_buf->paddr;
desc->buff_addr_hi =
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 372b259279ec..72b019758caa 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -8,8 +8,6 @@
#include "ena_com.h"
-/* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */
-#define ENA_COMP_HEAD_THRESH 4
/* we allow 2 DMA descriptors per LLQ entry */
#define ENA_LLQ_ENTRY_DESC_CHUNK_SIZE (2 * sizeof(struct ena_eth_io_tx_desc))
#define ENA_LLQ_HEADER (128UL - ENA_LLQ_ENTRY_DESC_CHUNK_SIZE)
@@ -145,8 +143,8 @@ static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq,
}
netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
- "Queue: %d num_descs: %d num_entries_needed: %d\n",
- io_sq->qid, num_descs, num_entries_needed);
+ "Queue: %d num_descs: %d num_entries_needed: %d\n", io_sq->qid, num_descs,
+ num_entries_needed);
return num_entries_needed > io_sq->entries_in_tx_burst_left;
}
@@ -157,43 +155,20 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
u16 tail = io_sq->tail;
netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
- "Write submission queue doorbell for queue: %d tail: %d\n",
- io_sq->qid, tail);
+ "Write submission queue doorbell for queue: %d tail: %d\n", io_sq->qid, tail);
writel(tail, io_sq->db_addr);
if (is_llq_max_tx_burst_exists(io_sq)) {
netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
- "Reset available entries in tx burst for queue %d to %d\n",
- io_sq->qid, max_entries_in_tx_burst);
+ "Reset available entries in tx burst for queue %d to %d\n", io_sq->qid,
+ max_entries_in_tx_burst);
io_sq->entries_in_tx_burst_left = max_entries_in_tx_burst;
}
return 0;
}
-static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq)
-{
- u16 unreported_comp, head;
- bool need_update;
-
- if (unlikely(io_cq->cq_head_db_reg)) {
- head = io_cq->head;
- unreported_comp = head - io_cq->last_head_update;
- need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
-
- if (unlikely(need_update)) {
- netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
- "Write completion queue doorbell for queue %d: head: %d\n",
- io_cq->qid, head);
- writel(head, io_cq->cq_head_db_reg);
- io_cq->last_head_update = head;
- }
- }
-
- return 0;
-}
-
static inline void ena_com_update_numa_node(struct ena_com_io_cq *io_cq,
u8 numa_node)
{
@@ -248,8 +223,8 @@ static inline int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq,
*req_id = READ_ONCE(cdesc->req_id);
if (unlikely(*req_id >= io_cq->q_depth)) {
- netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
- "Invalid req id %d\n", cdesc->req_id);
+ netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device, "Invalid req id %d\n",
+ cdesc->req_id);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 1c0a7828d397..09e7da1a69c9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -32,7 +32,7 @@ MODULE_LICENSE("GPL");
#define ENA_MAX_RINGS min_t(unsigned int, ENA_MAX_NUM_IO_QUEUES, num_possible_cpus())
#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
- NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
+ NETIF_MSG_IFDOWN | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
static struct ena_aenq_handlers aenq_handlers;
@@ -47,19 +47,44 @@ static int ena_restore_device(struct ena_adapter *adapter);
static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
+ enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
struct ena_adapter *adapter = netdev_priv(dev);
+ unsigned int time_since_last_napi, threshold;
+ struct ena_ring *tx_ring;
+ int napi_scheduled;
+
+ if (txqueue >= adapter->num_io_queues) {
+ netdev_err(dev, "TX timeout on invalid queue %u\n", txqueue);
+ goto schedule_reset;
+ }
+
+ threshold = jiffies_to_usecs(dev->watchdog_timeo);
+ tx_ring = &adapter->tx_ring[txqueue];
+ time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
+ napi_scheduled = !!(tx_ring->napi->state & NAPIF_STATE_SCHED);
+
+ netdev_err(dev,
+ "TX q %d is paused for too long (threshold %u). Time since last napi %u usec. napi scheduled: %d\n",
+ txqueue,
+ threshold,
+ time_since_last_napi,
+ napi_scheduled);
+
+ if (threshold < time_since_last_napi && napi_scheduled) {
+ netdev_err(dev,
+ "napi handler hasn't been called for a long time but is scheduled\n");
+ reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION;
+ }
+schedule_reset:
/* Change the state of the device to trigger reset
* Check that we are not in the middle or a trigger already
*/
-
if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
return;
- ena_reset_device(adapter, ENA_REGS_RESET_OS_NETDEV_WD);
+ ena_reset_device(adapter, reset_reason);
ena_increase_stat(&adapter->dev_stats.tx_timeout, 1, &adapter->syncp);
-
- netif_err(adapter, tx_err, dev, "Transmit time out\n");
}
static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
@@ -116,11 +141,9 @@ int ena_xmit_common(struct ena_adapter *adapter,
if (unlikely(rc)) {
netif_err(adapter, tx_queued, adapter->netdev,
"Failed to prepare tx bufs\n");
- ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1,
- &ring->syncp);
+ ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, &ring->syncp);
if (rc != -ENOMEM)
- ena_reset_device(adapter,
- ENA_REGS_RESET_DRIVER_INVALID_STATE);
+ ena_reset_device(adapter, ENA_REGS_RESET_DRIVER_INVALID_STATE);
return rc;
}
@@ -485,8 +508,7 @@ static struct page *ena_alloc_map_page(struct ena_ring *rx_ring,
*/
page = dev_alloc_page();
if (!page) {
- ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1,
- &rx_ring->syncp);
+ ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, &rx_ring->syncp);
return ERR_PTR(-ENOSPC);
}
@@ -523,7 +545,7 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring,
/* We handle DMA here */
page = ena_alloc_map_page(rx_ring, &dma);
- if (unlikely(IS_ERR(page)))
+ if (IS_ERR(page))
return PTR_ERR(page);
netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
@@ -545,8 +567,8 @@ static void ena_unmap_rx_buff_attrs(struct ena_ring *rx_ring,
struct ena_rx_buffer *rx_info,
unsigned long attrs)
{
- dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE,
- DMA_BIDIRECTIONAL, attrs);
+ dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, DMA_BIDIRECTIONAL,
+ attrs);
}
static void ena_free_rx_page(struct ena_ring *rx_ring,
@@ -819,8 +841,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
&req_id);
if (rc) {
if (unlikely(rc == -EINVAL))
- handle_invalid_req_id(tx_ring, req_id, NULL,
- false);
+ handle_invalid_req_id(tx_ring, req_id, NULL, false);
break;
}
@@ -856,7 +877,6 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
tx_ring->next_to_clean = next_to_clean;
ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
- ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
@@ -1046,8 +1066,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
DMA_FROM_DEVICE);
if (!reuse_rx_buf_page)
- ena_unmap_rx_buff_attrs(rx_ring, rx_info,
- DMA_ATTR_SKIP_CPU_SYNC);
+ ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
page_offset + buf_offset, len, buf_len);
@@ -1303,10 +1322,8 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
ENA_RX_REFILL_THRESH_PACKET);
/* Optimization, try to batch new rx buffers */
- if (refill_required > refill_threshold) {
- ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
+ if (refill_required > refill_threshold)
ena_refill_rx_bufs(rx_ring, refill_required);
- }
if (xdp_flags & ENA_XDP_REDIRECT)
xdp_do_flush();
@@ -1320,8 +1337,7 @@ error:
adapter = netdev_priv(rx_ring->netdev);
if (rc == -ENOSPC) {
- ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1,
- &rx_ring->syncp);
+ ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, &rx_ring->syncp);
ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS);
} else {
ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1,
@@ -1811,8 +1827,7 @@ static int ena_rss_configure(struct ena_adapter *adapter)
if (!ena_dev->rss.tbl_log_size) {
rc = ena_rss_init_default(adapter);
if (rc && (rc != -EOPNOTSUPP)) {
- netif_err(adapter, ifup, adapter->netdev,
- "Failed to init RSS rc: %d\n", rc);
+ netif_err(adapter, ifup, adapter->netdev, "Failed to init RSS rc: %d\n", rc);
return rc;
}
}
@@ -2134,6 +2149,12 @@ int ena_up(struct ena_adapter *adapter)
*/
ena_init_napi_in_range(adapter, 0, io_queue_count);
+ /* Enabling DIM needs to happen before enabling IRQs since DIM
+ * is run from napi routine
+ */
+ if (ena_com_interrupt_moderation_supported(adapter->ena_dev))
+ ena_com_enable_adaptive_moderation(adapter->ena_dev);
+
rc = ena_request_io_irq(adapter);
if (rc)
goto err_req_irq;
@@ -2184,7 +2205,7 @@ void ena_down(struct ena_adapter *adapter)
{
int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
- netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
+ netif_dbg(adapter, ifdown, adapter->netdev, "%s\n", __func__);
clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
@@ -2197,8 +2218,6 @@ void ena_down(struct ena_adapter *adapter)
/* After this point the napi handler won't enable the tx queue */
ena_napi_disable_in_range(adapter, 0, io_queue_count);
- /* After destroy the queue there won't be any new interrupts */
-
if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
int rc;
@@ -2588,8 +2607,6 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(rc))
goto error_drop_packet;
- skb_tx_timestamp(skb);
-
next_to_use = tx_ring->next_to_use;
req_id = tx_ring->free_ids[next_to_use];
tx_info = &tx_ring->tx_buffer_info[req_id];
@@ -2653,6 +2670,8 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
}
+ skb_tx_timestamp(skb);
+
if (netif_xmit_stopped(txq) || !netdev_xmit_more())
/* trigger the dma engine. ena_ring_tx_doorbell()
* calls a memory barrier inside it.
@@ -2670,22 +2689,6 @@ error_drop_packet:
return NETDEV_TX_OK;
}
-static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev)
-{
- u16 qid;
- /* we suspect that this is good for in--kernel network services that
- * want to loop incoming skb rx to tx in normal user generated traffic,
- * most probably we will not get to this
- */
- if (skb_rx_queue_recorded(skb))
- qid = skb_get_rx_queue(skb);
- else
- qid = netdev_pick_tx(dev, skb, NULL);
-
- return qid;
-}
-
static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
@@ -2764,8 +2767,7 @@ static void ena_config_debug_area(struct ena_adapter *adapter)
rc = ena_com_set_host_attributes(adapter->ena_dev);
if (rc) {
if (rc == -EOPNOTSUPP)
- netif_warn(adapter, drv, adapter->netdev,
- "Cannot set host attributes\n");
+ netif_warn(adapter, drv, adapter->netdev, "Cannot set host attributes\n");
else
netif_err(adapter, drv, adapter->netdev,
"Cannot set host attributes\n");
@@ -2863,18 +2865,16 @@ static const struct net_device_ops ena_netdev_ops = {
.ndo_open = ena_open,
.ndo_stop = ena_close,
.ndo_start_xmit = ena_start_xmit,
- .ndo_select_queue = ena_select_queue,
.ndo_get_stats64 = ena_get_stats64,
.ndo_tx_timeout = ena_tx_timeout,
.ndo_change_mtu = ena_change_mtu,
- .ndo_set_mac_address = NULL,
.ndo_validate_addr = eth_validate_addr,
.ndo_bpf = ena_xdp,
.ndo_xdp_xmit = ena_xdp_xmit,
};
-static void ena_calc_io_queue_size(struct ena_adapter *adapter,
- struct ena_com_dev_get_features_ctx *get_feat_ctx)
+static int ena_calc_io_queue_size(struct ena_adapter *adapter,
+ struct ena_com_dev_get_features_ctx *get_feat_ctx)
{
struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq;
struct ena_com_dev *ena_dev = adapter->ena_dev;
@@ -2933,6 +2933,18 @@ static void ena_calc_io_queue_size(struct ena_adapter *adapter,
max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
+ if (max_tx_queue_size < ENA_MIN_RING_SIZE) {
+ netdev_err(adapter->netdev, "Device max TX queue size: %d < minimum: %d\n",
+ max_tx_queue_size, ENA_MIN_RING_SIZE);
+ return -EINVAL;
+ }
+
+ if (max_rx_queue_size < ENA_MIN_RING_SIZE) {
+ netdev_err(adapter->netdev, "Device max RX queue size: %d < minimum: %d\n",
+ max_rx_queue_size, ENA_MIN_RING_SIZE);
+ return -EINVAL;
+ }
+
/* When forcing large headers, we multiply the entry size by 2, and therefore divide
* the queue size by 2, leaving the amount of memory used by the queues unchanged.
*/
@@ -2963,6 +2975,8 @@ static void ena_calc_io_queue_size(struct ena_adapter *adapter,
adapter->max_rx_ring_size = max_rx_queue_size;
adapter->requested_tx_ring_size = tx_queue_size;
adapter->requested_rx_ring_size = rx_queue_size;
+
+ return 0;
}
static int ena_device_validate_params(struct ena_adapter *adapter,
@@ -3070,6 +3084,7 @@ static int ena_device_init(struct ena_adapter *adapter, struct pci_dev *pdev,
bool *wd_state)
{
struct ena_com_dev *ena_dev = adapter->ena_dev;
+ struct net_device *netdev = adapter->netdev;
struct ena_llq_configurations llq_config;
struct device *dev = &pdev->dev;
bool readless_supported;
@@ -3159,15 +3174,19 @@ static int ena_device_init(struct ena_adapter *adapter, struct pci_dev *pdev,
rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
&llq_config);
if (rc) {
- dev_err(dev, "ENA device init failed\n");
+ netdev_err(netdev, "Cannot set queues placement policy rc= %d\n", rc);
goto err_admin_init;
}
- ena_calc_io_queue_size(adapter, get_feat_ctx);
+ rc = ena_calc_io_queue_size(adapter, get_feat_ctx);
+ if (unlikely(rc))
+ goto err_admin_init;
return 0;
err_admin_init:
+ ena_com_abort_admin_commands(ena_dev);
+ ena_com_wait_for_abort_completion(ena_dev);
ena_com_delete_host_info(ena_dev);
ena_com_admin_destroy(ena_dev);
err_mmio_read_less:
@@ -3226,7 +3245,7 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
if (!graceful)
ena_com_set_admin_running_state(ena_dev, false);
- if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+ if (dev_up)
ena_down(adapter);
/* Stop the device from sending AENQ events (in case reset flag is set
@@ -3372,14 +3391,18 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
struct ena_ring *tx_ring)
{
struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi);
+ enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
unsigned int time_since_last_napi;
unsigned int missing_tx_comp_to;
bool is_tx_comp_time_expired;
struct ena_tx_buffer *tx_buf;
unsigned long last_jiffies;
+ int napi_scheduled;
u32 missed_tx = 0;
int i, rc = 0;
+ missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to);
+
for (i = 0; i < tx_ring->ring_size; i++) {
tx_buf = &tx_ring->tx_buffer_info[i];
last_jiffies = tx_buf->last_jiffies;
@@ -3406,25 +3429,45 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
adapter->missing_tx_completion_to);
if (unlikely(is_tx_comp_time_expired)) {
- if (!tx_buf->print_once) {
- time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
- missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to);
- netif_notice(adapter, tx_err, adapter->netdev,
- "Found a Tx that wasn't completed on time, qid %d, index %d. %u usecs have passed since last napi execution. Missing Tx timeout value %u msecs\n",
- tx_ring->qid, i, time_since_last_napi, missing_tx_comp_to);
+ time_since_last_napi =
+ jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
+ napi_scheduled = !!(ena_napi->napi.state & NAPIF_STATE_SCHED);
+
+ if (missing_tx_comp_to < time_since_last_napi && napi_scheduled) {
+ /* We suspect napi isn't called because the
+ * bottom half is not run. Require a bigger
+ * timeout for these cases
+ */
+ if (!time_is_before_jiffies(last_jiffies +
+ 2 * adapter->missing_tx_completion_to))
+ continue;
+
+ reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION;
}
- tx_buf->print_once = 1;
missed_tx++;
+
+ if (tx_buf->print_once)
+ continue;
+
+ netif_notice(adapter, tx_err, adapter->netdev,
+ "TX hasn't completed, qid %d, index %d. %u usecs from last napi execution, napi scheduled: %d\n",
+ tx_ring->qid, i, time_since_last_napi, napi_scheduled);
+
+ tx_buf->print_once = 1;
}
}
if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
netif_err(adapter, tx_err, adapter->netdev,
- "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
+ "Lost TX completions are above the threshold (%d > %d). Completion transmission timeout: %u.\n",
missed_tx,
- adapter->missing_tx_completion_threshold);
- ena_reset_device(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
+ adapter->missing_tx_completion_threshold,
+ missing_tx_comp_to);
+ netif_err(adapter, tx_err, adapter->netdev,
+ "Resetting the device\n");
+
+ ena_reset_device(adapter, reset_reason);
rc = -EIO;
}
@@ -3762,8 +3805,8 @@ static int ena_rss_init_default(struct ena_adapter *adapter)
}
}
- rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL,
- ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
+ rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, ENA_HASH_KEY_SIZE,
+ 0xFFFFFFFF);
if (unlikely(rc && (rc != -EOPNOTSUPP))) {
dev_err(dev, "Cannot fill hash function\n");
goto err_fill_indir;
@@ -4040,8 +4083,8 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
free_irq_cpu_rmap(netdev->rx_cpu_rmap);
netdev->rx_cpu_rmap = NULL;
}
-#endif /* CONFIG_RFS_ACCEL */
+#endif /* CONFIG_RFS_ACCEL */
/* Make sure timer and reset routine won't be called after
* freeing device resources.
*/
diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
index 1e007a41a525..2c3d6a77ea79 100644
--- a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
@@ -21,6 +21,7 @@ enum ena_regs_reset_reason_types {
ENA_REGS_RESET_USER_TRIGGER = 12,
ENA_REGS_RESET_GENERIC = 13,
ENA_REGS_RESET_MISS_INTERRUPT = 14,
+ ENA_REGS_RESET_SUSPECTED_POLL_STARVATION = 15,
};
/* ena_registers offsets */
diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c
index fc1c4ef73ba3..337c435d3ce9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_xdp.c
+++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c
@@ -412,7 +412,6 @@ static int ena_clean_xdp_irq(struct ena_ring *tx_ring, u32 budget)
tx_ring->next_to_clean = next_to_clean;
ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
- ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
"tx_poll: q %d done. total pkts: %d\n",
diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c
index 5beadabc2136..c83a0a80d533 100644
--- a/drivers/net/ethernet/amd/pds_core/adminq.c
+++ b/drivers/net/ethernet/amd/pds_core/adminq.c
@@ -63,6 +63,15 @@ static int pdsc_process_notifyq(struct pdsc_qcq *qcq)
return nq_work;
}
+static bool pdsc_adminq_inc_if_up(struct pdsc *pdsc)
+{
+ if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER) ||
+ pdsc->state & BIT_ULL(PDSC_S_FW_DEAD))
+ return false;
+
+ return refcount_inc_not_zero(&pdsc->adminq_refcnt);
+}
+
void pdsc_process_adminq(struct pdsc_qcq *qcq)
{
union pds_core_adminq_comp *comp;
@@ -73,11 +82,10 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq)
unsigned long irqflags;
int nq_work = 0;
int aq_work = 0;
- int credits;
- /* Don't process AdminQ when shutting down */
- if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) {
- dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n",
+ /* Don't process AdminQ when it's not up */
+ if (!pdsc_adminq_inc_if_up(pdsc)) {
+ dev_err(pdsc->dev, "%s: called while adminq is unavailable\n",
__func__);
return;
}
@@ -119,11 +127,10 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq)
credits:
/* Return the interrupt credits, one for each completion */
- credits = nq_work + aq_work;
- if (credits)
- pds_core_intr_credits(&pdsc->intr_ctrl[qcq->intx],
- credits,
- PDS_CORE_INTR_CRED_REARM);
+ pds_core_intr_credits(&pdsc->intr_ctrl[qcq->intx],
+ nq_work + aq_work,
+ PDS_CORE_INTR_CRED_REARM);
+ refcount_dec(&pdsc->adminq_refcnt);
}
void pdsc_work_thread(struct work_struct *work)
@@ -135,18 +142,19 @@ void pdsc_work_thread(struct work_struct *work)
irqreturn_t pdsc_adminq_isr(int irq, void *data)
{
- struct pdsc_qcq *qcq = data;
- struct pdsc *pdsc = qcq->pdsc;
+ struct pdsc *pdsc = data;
+ struct pdsc_qcq *qcq;
- /* Don't process AdminQ when shutting down */
- if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) {
- dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n",
+ /* Don't process AdminQ when it's not up */
+ if (!pdsc_adminq_inc_if_up(pdsc)) {
+ dev_err(pdsc->dev, "%s: called while adminq is unavailable\n",
__func__);
return IRQ_HANDLED;
}
+ qcq = &pdsc->adminqcq;
queue_work(pdsc->wq, &qcq->work);
- pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR);
+ refcount_dec(&pdsc->adminq_refcnt);
return IRQ_HANDLED;
}
@@ -179,10 +187,16 @@ static int __pdsc_adminq_post(struct pdsc *pdsc,
/* Check that the FW is running */
if (!pdsc_is_fw_running(pdsc)) {
- u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
-
- dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n",
- __func__, fw_status);
+ if (pdsc->info_regs) {
+ u8 fw_status =
+ ioread8(&pdsc->info_regs->fw_status);
+
+ dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n",
+ __func__, fw_status);
+ } else {
+ dev_info(pdsc->dev, "%s: post failed - BARs not setup\n",
+ __func__);
+ }
ret = -ENXIO;
goto err_out_unlock;
@@ -230,6 +244,12 @@ int pdsc_adminq_post(struct pdsc *pdsc,
int err = 0;
int index;
+ if (!pdsc_adminq_inc_if_up(pdsc)) {
+ dev_dbg(pdsc->dev, "%s: preventing adminq cmd %u\n",
+ __func__, cmd->opcode);
+ return -ENXIO;
+ }
+
wc.qcq = &pdsc->adminqcq;
index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc);
if (index < 0) {
@@ -248,10 +268,16 @@ int pdsc_adminq_post(struct pdsc *pdsc,
break;
if (!pdsc_is_fw_running(pdsc)) {
- u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
-
- dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n",
- __func__, fw_status);
+ if (pdsc->info_regs) {
+ u8 fw_status =
+ ioread8(&pdsc->info_regs->fw_status);
+
+ dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n",
+ __func__, fw_status);
+ } else {
+ dev_dbg(pdsc->dev, "%s: post wait failed - BARs not setup\n",
+ __func__);
+ }
err = -ENXIO;
break;
}
@@ -285,6 +311,8 @@ err_out:
queue_work(pdsc->wq, &pdsc->health_work);
}
+ refcount_dec(&pdsc->adminq_refcnt);
+
return err;
}
EXPORT_SYMBOL_GPL(pdsc_adminq_post);
diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c
index 11c23a7f3172..2babea110991 100644
--- a/drivers/net/ethernet/amd/pds_core/auxbus.c
+++ b/drivers/net/ethernet/amd/pds_core/auxbus.c
@@ -160,23 +160,19 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf,
if (err < 0) {
dev_warn(cf->dev, "auxiliary_device_init of %s failed: %pe\n",
name, ERR_PTR(err));
- goto err_out;
+ kfree(padev);
+ return ERR_PTR(err);
}
err = auxiliary_device_add(aux_dev);
if (err) {
dev_warn(cf->dev, "auxiliary_device_add of %s failed: %pe\n",
name, ERR_PTR(err));
- goto err_out_uninit;
+ auxiliary_device_uninit(aux_dev);
+ return ERR_PTR(err);
}
return padev;
-
-err_out_uninit:
- auxiliary_device_uninit(aux_dev);
-err_out:
- kfree(padev);
- return ERR_PTR(err);
}
int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf)
@@ -184,6 +180,9 @@ int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf)
struct pds_auxiliary_dev *padev;
int err = 0;
+ if (!cf)
+ return -ENODEV;
+
mutex_lock(&pf->config_lock);
padev = pf->vfs[cf->vf_id].padev;
@@ -202,14 +201,27 @@ int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf)
int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
{
struct pds_auxiliary_dev *padev;
- enum pds_core_vif_types vt;
char devname[PDS_DEVNAME_LEN];
+ enum pds_core_vif_types vt;
+ unsigned long mask;
u16 vt_support;
int client_id;
int err = 0;
+ if (!cf)
+ return -ENODEV;
+
mutex_lock(&pf->config_lock);
+ mask = BIT_ULL(PDSC_S_FW_DEAD) |
+ BIT_ULL(PDSC_S_STOPPING_DRIVER);
+ if (cf->state & mask) {
+ dev_err(pf->dev, "%s: can't add dev, VF client in bad state %#lx\n",
+ __func__, cf->state);
+ err = -ENXIO;
+ goto out_unlock;
+ }
+
/* We only support vDPA so far, so it is the only one to
* be verified that it is available in the Core device and
* enabled in the devlink param. In the future this might
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 0d2091e9eb28..9662ee72814c 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -125,10 +125,11 @@ static int pdsc_qcq_intr_alloc(struct pdsc *pdsc, struct pdsc_qcq *qcq)
snprintf(name, sizeof(name), "%s-%d-%s",
PDS_CORE_DRV_NAME, pdsc->pdev->bus->number, qcq->q.name);
- index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, qcq);
+ index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, pdsc);
if (index < 0)
return index;
qcq->intx = index;
+ qcq->cq.bound_intr = &pdsc->intr_info[index];
return 0;
}
@@ -222,7 +223,6 @@ int pdsc_qcq_alloc(struct pdsc *pdsc, unsigned int type, unsigned int index,
goto err_out_free_irq;
}
- qcq->cq.bound_intr = &pdsc->intr_info[qcq->intx];
qcq->cq.num_descs = num_descs;
qcq->cq.desc_size = cq_desc_size;
qcq->cq.tail_idx = 0;
@@ -300,6 +300,17 @@ err_out:
return err;
}
+static void pdsc_core_uninit(struct pdsc *pdsc)
+{
+ pdsc_qcq_free(pdsc, &pdsc->notifyqcq);
+ pdsc_qcq_free(pdsc, &pdsc->adminqcq);
+
+ if (pdsc->kern_dbpage) {
+ iounmap(pdsc->kern_dbpage);
+ pdsc->kern_dbpage = NULL;
+ }
+}
+
static int pdsc_core_init(struct pdsc *pdsc)
{
union pds_core_dev_comp comp = {};
@@ -310,9 +321,32 @@ static int pdsc_core_init(struct pdsc *pdsc)
struct pds_core_dev_init_data_in cidi;
u32 dbid_count;
u32 dbpage_num;
+ int numdescs;
size_t sz;
int err;
+ /* Scale the descriptor ring length based on number of CPUs and VFs */
+ numdescs = max_t(int, PDSC_ADMINQ_MIN_LENGTH, num_online_cpus());
+ numdescs += 2 * pci_sriov_get_totalvfs(pdsc->pdev);
+ numdescs = roundup_pow_of_two(numdescs);
+ err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_ADMINQ, 0, "adminq",
+ PDS_CORE_QCQ_F_CORE | PDS_CORE_QCQ_F_INTR,
+ numdescs,
+ sizeof(union pds_core_adminq_cmd),
+ sizeof(union pds_core_adminq_comp),
+ 0, &pdsc->adminqcq);
+ if (err)
+ return err;
+
+ err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_NOTIFYQ, 0, "notifyq",
+ PDS_CORE_QCQ_F_NOTIFYQ,
+ PDSC_NOTIFYQ_LENGTH,
+ sizeof(struct pds_core_notifyq_cmd),
+ sizeof(union pds_core_notifyq_comp),
+ 0, &pdsc->notifyqcq);
+ if (err)
+ goto err_out_uninit;
+
cidi.adminq_q_base = cpu_to_le64(pdsc->adminqcq.q_base_pa);
cidi.adminq_cq_base = cpu_to_le64(pdsc->adminqcq.cq_base_pa);
cidi.notifyq_cq_base = cpu_to_le64(pdsc->notifyqcq.cq.base_pa);
@@ -336,7 +370,7 @@ static int pdsc_core_init(struct pdsc *pdsc)
if (err) {
dev_err(pdsc->dev, "Device init command failed: %pe\n",
ERR_PTR(err));
- return err;
+ goto err_out_uninit;
}
pdsc->hw_index = le32_to_cpu(cido.core_hw_index);
@@ -346,7 +380,8 @@ static int pdsc_core_init(struct pdsc *pdsc)
pdsc->kern_dbpage = pdsc_map_dbpage(pdsc, dbpage_num);
if (!pdsc->kern_dbpage) {
dev_err(pdsc->dev, "Cannot map dbpage, aborting\n");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_out_uninit;
}
pdsc->adminqcq.q.hw_type = cido.adminq_hw_type;
@@ -359,6 +394,10 @@ static int pdsc_core_init(struct pdsc *pdsc)
pdsc->last_eid = 0;
+ return 0;
+
+err_out_uninit:
+ pdsc_core_uninit(pdsc);
return err;
}
@@ -401,41 +440,12 @@ static int pdsc_viftypes_init(struct pdsc *pdsc)
int pdsc_setup(struct pdsc *pdsc, bool init)
{
- int numdescs;
int err;
- if (init)
- err = pdsc_dev_init(pdsc);
- else
- err = pdsc_dev_reinit(pdsc);
+ err = pdsc_dev_init(pdsc);
if (err)
return err;
- /* Scale the descriptor ring length based on number of CPUs and VFs */
- numdescs = max_t(int, PDSC_ADMINQ_MIN_LENGTH, num_online_cpus());
- numdescs += 2 * pci_sriov_get_totalvfs(pdsc->pdev);
- numdescs = roundup_pow_of_two(numdescs);
- err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_ADMINQ, 0, "adminq",
- PDS_CORE_QCQ_F_CORE | PDS_CORE_QCQ_F_INTR,
- numdescs,
- sizeof(union pds_core_adminq_cmd),
- sizeof(union pds_core_adminq_comp),
- 0, &pdsc->adminqcq);
- if (err)
- goto err_out_teardown;
-
- err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_NOTIFYQ, 0, "notifyq",
- PDS_CORE_QCQ_F_NOTIFYQ,
- PDSC_NOTIFYQ_LENGTH,
- sizeof(struct pds_core_notifyq_cmd),
- sizeof(union pds_core_notifyq_comp),
- 0, &pdsc->notifyqcq);
- if (err)
- goto err_out_teardown;
-
- /* NotifyQ rides on the AdminQ interrupt */
- pdsc->notifyqcq.intx = pdsc->adminqcq.intx;
-
/* Set up the Core with the AdminQ and NotifyQ info */
err = pdsc_core_init(pdsc);
if (err)
@@ -450,6 +460,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init)
pdsc_debugfs_add_viftype(pdsc);
}
+ refcount_set(&pdsc->adminq_refcnt, 1);
clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
return 0;
@@ -460,32 +471,19 @@ err_out_teardown:
void pdsc_teardown(struct pdsc *pdsc, bool removing)
{
- int i;
-
if (!pdsc->pdev->is_virtfn)
pdsc_devcmd_reset(pdsc);
- pdsc_qcq_free(pdsc, &pdsc->notifyqcq);
- pdsc_qcq_free(pdsc, &pdsc->adminqcq);
+ if (pdsc->adminqcq.work.func)
+ cancel_work_sync(&pdsc->adminqcq.work);
+
+ pdsc_core_uninit(pdsc);
if (removing) {
kfree(pdsc->viftype_status);
pdsc->viftype_status = NULL;
}
- if (pdsc->intr_info) {
- for (i = 0; i < pdsc->nintrs; i++)
- pdsc_intr_free(pdsc, i);
-
- if (removing) {
- kfree(pdsc->intr_info);
- pdsc->intr_info = NULL;
- }
- }
-
- if (pdsc->kern_dbpage) {
- iounmap(pdsc->kern_dbpage);
- pdsc->kern_dbpage = NULL;
- }
+ pdsc_dev_uninit(pdsc);
set_bit(PDSC_S_FW_DEAD, &pdsc->state);
}
@@ -512,6 +510,24 @@ void pdsc_stop(struct pdsc *pdsc)
PDS_CORE_INTR_MASK_SET);
}
+static void pdsc_adminq_wait_and_dec_once_unused(struct pdsc *pdsc)
+{
+ /* The driver initializes the adminq_refcnt to 1 when the adminq is
+ * allocated and ready for use. Other users/requesters will increment
+ * the refcnt while in use. If the refcnt is down to 1 then the adminq
+ * is not in use and the refcnt can be cleared and adminq freed. Before
+ * calling this function the driver will set PDSC_S_FW_DEAD, which
+ * prevent subsequent attempts to use the adminq and increment the
+ * refcnt to fail. This guarantees that this function will eventually
+ * exit.
+ */
+ while (!refcount_dec_if_one(&pdsc->adminq_refcnt)) {
+ dev_dbg_ratelimited(pdsc->dev, "%s: adminq in use\n",
+ __func__);
+ cpu_relax();
+ }
+}
+
void pdsc_fw_down(struct pdsc *pdsc)
{
union pds_core_notifyq_comp reset_event = {
@@ -527,6 +543,8 @@ void pdsc_fw_down(struct pdsc *pdsc)
if (pdsc->pdev->is_virtfn)
return;
+ pdsc_adminq_wait_and_dec_once_unused(pdsc);
+
/* Notify clients of fw_down */
if (pdsc->fw_reporter)
devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
@@ -577,14 +595,19 @@ err_out:
static void pdsc_check_pci_health(struct pdsc *pdsc)
{
- u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
+ u8 fw_status;
+
+ /* some sort of teardown already in progress */
+ if (!pdsc->info_regs)
+ return;
+
+ fw_status = ioread8(&pdsc->info_regs->fw_status);
/* is PCI broken? */
if (fw_status != PDS_RC_BAD_PCI)
return;
- pdsc_reset_prepare(pdsc->pdev);
- pdsc_reset_done(pdsc->pdev);
+ pci_reset_function(pdsc->pdev);
}
void pdsc_health_thread(struct work_struct *work)
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index e35d3e7006bf..92d7657dd614 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -184,6 +184,7 @@ struct pdsc {
struct mutex devcmd_lock; /* lock for dev_cmd operations */
struct mutex config_lock; /* lock for configuration operations */
spinlock_t adminq_lock; /* lock for adminq operations */
+ refcount_t adminq_refcnt;
struct pds_core_dev_info_regs __iomem *info_regs;
struct pds_core_dev_cmd_regs __iomem *cmd_regs;
struct pds_core_intr __iomem *intr_ctrl;
@@ -280,11 +281,8 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
union pds_core_dev_comp *comp, int max_seconds);
int pdsc_devcmd_init(struct pdsc *pdsc);
int pdsc_devcmd_reset(struct pdsc *pdsc);
-int pdsc_dev_reinit(struct pdsc *pdsc);
int pdsc_dev_init(struct pdsc *pdsc);
-
-void pdsc_reset_prepare(struct pci_dev *pdev);
-void pdsc_reset_done(struct pci_dev *pdev);
+void pdsc_dev_uninit(struct pdsc *pdsc);
int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
irq_handler_t handler, void *data);
diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c
index 8ec392299b7d..6bdd02b7aa6d 100644
--- a/drivers/net/ethernet/amd/pds_core/debugfs.c
+++ b/drivers/net/ethernet/amd/pds_core/debugfs.c
@@ -32,8 +32,8 @@ void pdsc_debugfs_del_dev(struct pdsc *pdsc)
static int identity_show(struct seq_file *seq, void *v)
{
- struct pdsc *pdsc = seq->private;
struct pds_core_dev_identity *ident;
+ struct pdsc *pdsc = seq->private;
int vt;
ident = &pdsc->dev_ident;
@@ -64,6 +64,10 @@ DEFINE_SHOW_ATTRIBUTE(identity);
void pdsc_debugfs_add_ident(struct pdsc *pdsc)
{
+ /* This file will already exist in the reset flow */
+ if (debugfs_lookup("identity", pdsc->dentry))
+ return;
+
debugfs_create_file("identity", 0400, pdsc->dentry,
pdsc, &identity_fops);
}
@@ -102,10 +106,8 @@ static const struct debugfs_reg32 intr_ctrl_regs[] = {
void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq)
{
- struct dentry *qcq_dentry, *q_dentry, *cq_dentry;
- struct dentry *intr_dentry;
+ struct dentry *qcq_dentry, *q_dentry, *cq_dentry, *intr_dentry;
struct debugfs_regset32 *intr_ctrl_regset;
- struct pdsc_intr_info *intr = &pdsc->intr_info[qcq->intx];
struct pdsc_queue *q = &qcq->q;
struct pdsc_cq *cq = &qcq->cq;
@@ -143,6 +145,8 @@ void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq)
debugfs_create_u16("tail", 0400, cq_dentry, &cq->tail_idx);
if (qcq->flags & PDS_CORE_QCQ_F_INTR) {
+ struct pdsc_intr_info *intr = &pdsc->intr_info[qcq->intx];
+
intr_dentry = debugfs_create_dir("intr", qcq->dentry);
if (IS_ERR_OR_NULL(intr_dentry))
return;
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index 31940b857e0e..e494e1298dc9 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -57,6 +57,9 @@ int pdsc_err_to_errno(enum pds_core_status_code code)
bool pdsc_is_fw_running(struct pdsc *pdsc)
{
+ if (!pdsc->info_regs)
+ return false;
+
pdsc->fw_status = ioread8(&pdsc->info_regs->fw_status);
pdsc->last_fw_time = jiffies;
pdsc->last_hb = ioread32(&pdsc->info_regs->fw_heartbeat);
@@ -182,13 +185,17 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
{
int err;
+ if (!pdsc->cmd_regs)
+ return -ENXIO;
+
memcpy_toio(&pdsc->cmd_regs->cmd, cmd, sizeof(*cmd));
pdsc_devcmd_dbell(pdsc);
err = pdsc_devcmd_wait(pdsc, cmd->opcode, max_seconds);
- memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq)
queue_work(pdsc->wq, &pdsc->health_work);
+ else
+ memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
return err;
}
@@ -309,11 +316,20 @@ static int pdsc_identify(struct pdsc *pdsc)
return 0;
}
-int pdsc_dev_reinit(struct pdsc *pdsc)
+void pdsc_dev_uninit(struct pdsc *pdsc)
{
- pdsc_init_devinfo(pdsc);
+ if (pdsc->intr_info) {
+ int i;
+
+ for (i = 0; i < pdsc->nintrs; i++)
+ pdsc_intr_free(pdsc, i);
- return pdsc_identify(pdsc);
+ kfree(pdsc->intr_info);
+ pdsc->intr_info = NULL;
+ pdsc->nintrs = 0;
+ }
+
+ pci_free_irq_vectors(pdsc->pdev);
}
int pdsc_dev_init(struct pdsc *pdsc)
@@ -341,10 +357,8 @@ int pdsc_dev_init(struct pdsc *pdsc)
/* Get intr_info struct array for tracking */
pdsc->intr_info = kcalloc(nintrs, sizeof(*pdsc->intr_info), GFP_KERNEL);
- if (!pdsc->intr_info) {
- err = -ENOMEM;
- goto err_out;
- }
+ if (!pdsc->intr_info)
+ return -ENOMEM;
err = pci_alloc_irq_vectors(pdsc->pdev, nintrs, nintrs, PCI_IRQ_MSIX);
if (err != nintrs) {
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
index e9948ea5bbcd..54864f27c87a 100644
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -111,7 +111,8 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
mutex_lock(&pdsc->devcmd_lock);
err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout * 2);
- memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list));
+ if (!err)
+ memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list));
mutex_unlock(&pdsc->devcmd_lock);
if (err && err != -EIO)
return err;
diff --git a/drivers/net/ethernet/amd/pds_core/fw.c b/drivers/net/ethernet/amd/pds_core/fw.c
index 90a811f3878a..fa626719e68d 100644
--- a/drivers/net/ethernet/amd/pds_core/fw.c
+++ b/drivers/net/ethernet/amd/pds_core/fw.c
@@ -107,6 +107,9 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
dev_info(pdsc->dev, "Installing firmware\n");
+ if (!pdsc->cmd_regs)
+ return -ENXIO;
+
dl = priv_to_devlink(pdsc);
devlink_flash_update_status_notify(dl, "Preparing to flash",
NULL, 0, 0);
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index 3080898d7b95..ab6133e7db42 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -37,9 +37,15 @@ static void pdsc_unmap_bars(struct pdsc *pdsc)
struct pdsc_dev_bar *bars = pdsc->bars;
unsigned int i;
+ pdsc->info_regs = NULL;
+ pdsc->cmd_regs = NULL;
+ pdsc->intr_status = NULL;
+ pdsc->intr_ctrl = NULL;
+
for (i = 0; i < PDS_CORE_BARS_MAX; i++) {
if (bars[i].vaddr)
pci_iounmap(pdsc->pdev, bars[i].vaddr);
+ bars[i].vaddr = NULL;
}
}
@@ -293,7 +299,7 @@ err_out_stop:
err_out_teardown:
pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
err_out_unmap_bars:
- del_timer_sync(&pdsc->wdtimer);
+ timer_shutdown_sync(&pdsc->wdtimer);
if (pdsc->wq)
destroy_workqueue(pdsc->wq);
mutex_destroy(&pdsc->config_lock);
@@ -420,7 +426,7 @@ static void pdsc_remove(struct pci_dev *pdev)
*/
pdsc_sriov_configure(pdev, 0);
- del_timer_sync(&pdsc->wdtimer);
+ timer_shutdown_sync(&pdsc->wdtimer);
if (pdsc->wq)
destroy_workqueue(pdsc->wq);
@@ -433,7 +439,6 @@ static void pdsc_remove(struct pci_dev *pdev)
mutex_destroy(&pdsc->config_lock);
mutex_destroy(&pdsc->devcmd_lock);
- pci_free_irq_vectors(pdev);
pdsc_unmap_bars(pdsc);
pci_release_regions(pdev);
}
@@ -445,19 +450,47 @@ static void pdsc_remove(struct pci_dev *pdev)
devlink_free(dl);
}
-void pdsc_reset_prepare(struct pci_dev *pdev)
+static void pdsc_stop_health_thread(struct pdsc *pdsc)
+{
+ if (pdsc->pdev->is_virtfn)
+ return;
+
+ timer_shutdown_sync(&pdsc->wdtimer);
+ if (pdsc->health_work.func)
+ cancel_work_sync(&pdsc->health_work);
+}
+
+static void pdsc_restart_health_thread(struct pdsc *pdsc)
+{
+ if (pdsc->pdev->is_virtfn)
+ return;
+
+ timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
+ mod_timer(&pdsc->wdtimer, jiffies + 1);
+}
+
+static void pdsc_reset_prepare(struct pci_dev *pdev)
{
struct pdsc *pdsc = pci_get_drvdata(pdev);
+ pdsc_stop_health_thread(pdsc);
pdsc_fw_down(pdsc);
- pci_free_irq_vectors(pdev);
+ if (pdev->is_virtfn) {
+ struct pdsc *pf;
+
+ pf = pdsc_get_pf_struct(pdsc->pdev);
+ if (!IS_ERR(pf))
+ pdsc_auxbus_dev_del(pdsc, pf);
+ }
+
pdsc_unmap_bars(pdsc);
pci_release_regions(pdev);
- pci_disable_device(pdev);
+ if (pci_is_enabled(pdev))
+ pci_disable_device(pdev);
}
-void pdsc_reset_done(struct pci_dev *pdev)
+static void pdsc_reset_done(struct pci_dev *pdev)
{
struct pdsc *pdsc = pci_get_drvdata(pdev);
struct device *dev = pdsc->dev;
@@ -486,12 +519,44 @@ void pdsc_reset_done(struct pci_dev *pdev)
}
pdsc_fw_up(pdsc);
+ pdsc_restart_health_thread(pdsc);
+
+ if (pdev->is_virtfn) {
+ struct pdsc *pf;
+
+ pf = pdsc_get_pf_struct(pdsc->pdev);
+ if (!IS_ERR(pf))
+ pdsc_auxbus_dev_add(pdsc, pf);
+ }
+}
+
+static pci_ers_result_t pdsc_pci_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t error)
+{
+ if (error == pci_channel_io_frozen) {
+ pdsc_reset_prepare(pdev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+
+ return PCI_ERS_RESULT_NONE;
+}
+
+static void pdsc_pci_error_resume(struct pci_dev *pdev)
+{
+ struct pdsc *pdsc = pci_get_drvdata(pdev);
+
+ if (test_bit(PDSC_S_FW_DEAD, &pdsc->state))
+ pci_reset_function_locked(pdev);
}
static const struct pci_error_handlers pdsc_err_handler = {
/* FLR handling */
.reset_prepare = pdsc_reset_prepare,
.reset_done = pdsc_reset_done,
+
+ /* AER handling */
+ .error_detected = pdsc_pci_error_detected,
+ .resume = pdsc_pci_error_resume,
};
static struct pci_driver pdsc_driver = {
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 18a6c8d99fa0..a2606ee3b0a5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -15,6 +15,7 @@
#include "aq_macsec.h"
#include "aq_main.h"
+#include <linux/linkmode.h>
#include <linux/ptp_clock_kernel.h>
static void aq_ethtool_get_regs(struct net_device *ndev,
@@ -681,23 +682,19 @@ static int aq_ethtool_get_ts_info(struct net_device *ndev,
return 0;
}
-static u32 eee_mask_to_ethtool_mask(u32 speed)
+static void eee_mask_to_ethtool_mask(unsigned long *mode, u32 speed)
{
- u32 rate = 0;
-
if (speed & AQ_NIC_RATE_EEE_10G)
- rate |= SUPPORTED_10000baseT_Full;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, mode);
if (speed & AQ_NIC_RATE_EEE_1G)
- rate |= SUPPORTED_1000baseT_Full;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mode);
if (speed & AQ_NIC_RATE_EEE_100M)
- rate |= SUPPORTED_100baseT_Full;
-
- return rate;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mode);
}
-static int aq_ethtool_get_eee(struct net_device *ndev, struct ethtool_eee *eee)
+static int aq_ethtool_get_eee(struct net_device *ndev, struct ethtool_keee *eee)
{
struct aq_nic_s *aq_nic = netdev_priv(ndev);
u32 rate, supported_rates;
@@ -713,14 +710,14 @@ static int aq_ethtool_get_eee(struct net_device *ndev, struct ethtool_eee *eee)
if (err < 0)
return err;
- eee->supported = eee_mask_to_ethtool_mask(supported_rates);
+ eee_mask_to_ethtool_mask(eee->supported, supported_rates);
if (aq_nic->aq_nic_cfg.eee_speeds)
- eee->advertised = eee->supported;
+ linkmode_copy(eee->advertised, eee->supported);
- eee->lp_advertised = eee_mask_to_ethtool_mask(rate);
+ eee_mask_to_ethtool_mask(eee->lp_advertised, rate);
- eee->eee_enabled = !!eee->advertised;
+ eee->eee_enabled = !linkmode_empty(eee->advertised);
eee->tx_lpi_enabled = eee->eee_enabled;
if ((supported_rates & rate) & AQ_NIC_RATE_EEE_MSK)
@@ -729,7 +726,7 @@ static int aq_ethtool_get_eee(struct net_device *ndev, struct ethtool_eee *eee)
return 0;
}
-static int aq_ethtool_set_eee(struct net_device *ndev, struct ethtool_eee *eee)
+static int aq_ethtool_set_eee(struct net_device *ndev, struct ethtool_keee *eee)
{
struct aq_nic_s *aq_nic = netdev_priv(ndev);
u32 rate, supported_rates;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
index abd4832e4ed2..5acb3e16b567 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
@@ -993,7 +993,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic)
return 0;
err_exit_hwts_rx:
- aq_ring_free(&aq_ptp->hwts_rx);
+ aq_ring_hwts_rx_free(&aq_ptp->hwts_rx);
err_exit_ptp_rx:
aq_ring_free(&aq_ptp->ptp_rx);
err_exit_ptp_tx:
@@ -1011,7 +1011,7 @@ void aq_ptp_ring_free(struct aq_nic_s *aq_nic)
aq_ring_free(&aq_ptp->ptp_tx);
aq_ring_free(&aq_ptp->ptp_rx);
- aq_ring_free(&aq_ptp->hwts_rx);
+ aq_ring_hwts_rx_free(&aq_ptp->hwts_rx);
aq_ptp_skb_ring_release(&aq_ptp->skb_ring);
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index cda8597b4e14..f7433abd6591 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -919,6 +919,19 @@ void aq_ring_free(struct aq_ring_s *self)
}
}
+void aq_ring_hwts_rx_free(struct aq_ring_s *self)
+{
+ if (!self)
+ return;
+
+ if (self->dx_ring) {
+ dma_free_coherent(aq_nic_get_dev(self->aq_nic),
+ self->size * self->dx_size + AQ_CFG_RXDS_DEF,
+ self->dx_ring, self->dx_ring_pa);
+ self->dx_ring = NULL;
+ }
+}
+
unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data)
{
unsigned int count;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index 52847310740a..d627ace850ff 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -210,6 +210,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self);
int aq_ring_hwts_rx_alloc(struct aq_ring_s *self,
struct aq_nic_s *aq_nic, unsigned int idx,
unsigned int size, unsigned int dx_size);
+void aq_ring_hwts_rx_free(struct aq_ring_s *self);
void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic);
unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data);
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index 29b04a274d07..a806dadc4196 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -31,6 +31,20 @@ static void _intr2_mask_set(struct bcmasp_priv *priv, u32 mask)
priv->irq_mask |= mask;
}
+void bcmasp_enable_phy_irq(struct bcmasp_intf *intf, int en)
+{
+ struct bcmasp_priv *priv = intf->parent;
+
+ /* Only supported with internal phys */
+ if (!intf->internal_phy)
+ return;
+
+ if (en)
+ _intr2_mask_clear(priv, ASP_INTR2_PHY_EVENT(intf->channel));
+ else
+ _intr2_mask_set(priv, ASP_INTR2_PHY_EVENT(intf->channel));
+}
+
void bcmasp_enable_tx_irq(struct bcmasp_intf *intf, int en)
{
struct bcmasp_priv *priv = intf->parent;
@@ -79,6 +93,9 @@ static void bcmasp_intr2_handling(struct bcmasp_intf *intf, u32 status)
__napi_schedule_irqoff(&intf->tx_napi);
}
}
+
+ if (status & ASP_INTR2_PHY_EVENT(intf->channel))
+ phy_mac_interrupt(intf->ndev->phydev);
}
static irqreturn_t bcmasp_isr(int irq, void *data)
@@ -535,9 +552,6 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
int j = 0, i;
for (i = 0; i < NUM_NET_FILTERS; i++) {
- if (j == *rule_cnt)
- return -EMSGSIZE;
-
if (!priv->net_filters[i].claimed ||
priv->net_filters[i].port != intf->port)
continue;
@@ -547,6 +561,9 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
priv->net_filters[i - 1].wake_filter)
continue;
+ if (j == *rule_cnt)
+ return -EMSGSIZE;
+
rule_locs[j++] = priv->net_filters[i].fs.location;
}
@@ -972,7 +989,26 @@ static void bcmasp_core_init(struct bcmasp_priv *priv)
ASP_INTR2_CLEAR);
}
-static void bcmasp_core_clock_select(struct bcmasp_priv *priv, bool slow)
+static void bcmasp_core_clock_select_many(struct bcmasp_priv *priv, bool slow)
+{
+ u32 reg;
+
+ reg = ctrl2_core_rl(priv, ASP_CTRL2_CORE_CLOCK_SELECT);
+ if (slow)
+ reg &= ~ASP_CTRL2_CORE_CLOCK_SELECT_MAIN;
+ else
+ reg |= ASP_CTRL2_CORE_CLOCK_SELECT_MAIN;
+ ctrl2_core_wl(priv, reg, ASP_CTRL2_CORE_CLOCK_SELECT);
+
+ reg = ctrl2_core_rl(priv, ASP_CTRL2_CPU_CLOCK_SELECT);
+ if (slow)
+ reg &= ~ASP_CTRL2_CPU_CLOCK_SELECT_MAIN;
+ else
+ reg |= ASP_CTRL2_CPU_CLOCK_SELECT_MAIN;
+ ctrl2_core_wl(priv, reg, ASP_CTRL2_CPU_CLOCK_SELECT);
+}
+
+static void bcmasp_core_clock_select_one(struct bcmasp_priv *priv, bool slow)
{
u32 reg;
@@ -1166,6 +1202,24 @@ static void bcmasp_wol_irq_destroy_per_intf(struct bcmasp_priv *priv)
}
}
+static void bcmasp_eee_fixup(struct bcmasp_intf *intf, bool en)
+{
+ u32 reg, phy_lpi_overwrite;
+
+ reg = rx_edpkt_core_rl(intf->parent, ASP_EDPKT_SPARE_REG);
+ phy_lpi_overwrite = intf->internal_phy ? ASP_EDPKT_SPARE_REG_EPHY_LPI :
+ ASP_EDPKT_SPARE_REG_GPHY_LPI;
+
+ if (en)
+ reg |= phy_lpi_overwrite;
+ else
+ reg &= ~phy_lpi_overwrite;
+
+ rx_edpkt_core_wl(intf->parent, reg, ASP_EDPKT_SPARE_REG);
+
+ usleep_range(50, 100);
+}
+
static struct bcmasp_hw_info v20_hw_info = {
.rx_ctrl_flush = ASP_RX_CTRL_FLUSH,
.umac2fb = UMAC2FB_OFFSET,
@@ -1178,6 +1232,7 @@ static const struct bcmasp_plat_data v20_plat_data = {
.init_wol = bcmasp_init_wol_per_intf,
.enable_wol = bcmasp_enable_wol_per_intf,
.destroy_wol = bcmasp_wol_irq_destroy_per_intf,
+ .core_clock_select = bcmasp_core_clock_select_one,
.hw_info = &v20_hw_info,
};
@@ -1194,17 +1249,39 @@ static const struct bcmasp_plat_data v21_plat_data = {
.init_wol = bcmasp_init_wol_shared,
.enable_wol = bcmasp_enable_wol_shared,
.destroy_wol = bcmasp_wol_irq_destroy_shared,
+ .core_clock_select = bcmasp_core_clock_select_one,
+ .hw_info = &v21_hw_info,
+};
+
+static const struct bcmasp_plat_data v22_plat_data = {
+ .init_wol = bcmasp_init_wol_shared,
+ .enable_wol = bcmasp_enable_wol_shared,
+ .destroy_wol = bcmasp_wol_irq_destroy_shared,
+ .core_clock_select = bcmasp_core_clock_select_many,
.hw_info = &v21_hw_info,
+ .eee_fixup = bcmasp_eee_fixup,
};
+static void bcmasp_set_pdata(struct bcmasp_priv *priv, const struct bcmasp_plat_data *pdata)
+{
+ priv->init_wol = pdata->init_wol;
+ priv->enable_wol = pdata->enable_wol;
+ priv->destroy_wol = pdata->destroy_wol;
+ priv->core_clock_select = pdata->core_clock_select;
+ priv->eee_fixup = pdata->eee_fixup;
+ priv->hw_info = pdata->hw_info;
+}
+
static const struct of_device_id bcmasp_of_match[] = {
{ .compatible = "brcm,asp-v2.0", .data = &v20_plat_data },
{ .compatible = "brcm,asp-v2.1", .data = &v21_plat_data },
+ { .compatible = "brcm,asp-v2.2", .data = &v22_plat_data },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, bcmasp_of_match);
static const struct of_device_id bcmasp_mdio_of_match[] = {
+ { .compatible = "brcm,asp-v2.2-mdio", },
{ .compatible = "brcm,asp-v2.1-mdio", },
{ .compatible = "brcm,asp-v2.0-mdio", },
{ /* sentinel */ },
@@ -1265,16 +1342,13 @@ static int bcmasp_probe(struct platform_device *pdev)
if (!pdata)
return dev_err_probe(dev, -EINVAL, "unable to find platform data\n");
- priv->init_wol = pdata->init_wol;
- priv->enable_wol = pdata->enable_wol;
- priv->destroy_wol = pdata->destroy_wol;
- priv->hw_info = pdata->hw_info;
+ bcmasp_set_pdata(priv, pdata);
/* Enable all clocks to ensure successful probing */
bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0);
/* Switch to the main clock */
- bcmasp_core_clock_select(priv, false);
+ priv->core_clock_select(priv, false);
bcmasp_intr2_mask_set_all(priv);
bcmasp_intr2_clear_all(priv);
@@ -1381,7 +1455,7 @@ static int __maybe_unused bcmasp_suspend(struct device *d)
*/
bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_TX_DISABLE);
- bcmasp_core_clock_select(priv, true);
+ priv->core_clock_select(priv, true);
clk_disable_unprepare(priv->clk);
@@ -1399,7 +1473,7 @@ static int __maybe_unused bcmasp_resume(struct device *d)
return ret;
/* Switch to the main clock domain */
- bcmasp_core_clock_select(priv, false);
+ priv->core_clock_select(priv, false);
/* Re-enable all clocks for re-initialization */
bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0);
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.h b/drivers/net/ethernet/broadcom/asp2/bcmasp.h
index ec90add6b03e..f93cb3da44b0 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.h
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.h
@@ -19,6 +19,8 @@
#define ASP_INTR2_TX_DESC(intr) BIT((intr) + 14)
#define ASP_INTR2_UMC0_WAKE BIT(22)
#define ASP_INTR2_UMC1_WAKE BIT(28)
+#define ASP_INTR2_PHY_EVENT(intr) ((intr) ? BIT(30) | BIT(31) : \
+ BIT(24) | BIT(25))
#define ASP_WAKEUP_INTR2_OFFSET 0x1200
#define ASP_WAKEUP_INTR2_STATUS 0x0
@@ -33,6 +35,12 @@
#define ASP_WAKEUP_INTR2_FILT_1 BIT(3)
#define ASP_WAKEUP_INTR2_FW BIT(4)
+#define ASP_CTRL2_OFFSET 0x2000
+#define ASP_CTRL2_CORE_CLOCK_SELECT 0x0
+#define ASP_CTRL2_CORE_CLOCK_SELECT_MAIN BIT(0)
+#define ASP_CTRL2_CPU_CLOCK_SELECT 0x4
+#define ASP_CTRL2_CPU_CLOCK_SELECT_MAIN BIT(0)
+
#define ASP_TX_ANALYTICS_OFFSET 0x4c000
#define ASP_TX_ANALYTICS_CTRL 0x0
@@ -134,8 +142,11 @@ enum asp_rx_net_filter_block {
#define ASP_EDPKT_RX_PKT_CNT 0x138
#define ASP_EDPKT_HDR_EXTR_CNT 0x13c
#define ASP_EDPKT_HDR_OUT_CNT 0x140
+#define ASP_EDPKT_SPARE_REG 0x174
+#define ASP_EDPKT_SPARE_REG_EPHY_LPI BIT(4)
+#define ASP_EDPKT_SPARE_REG_GPHY_LPI BIT(3)
-#define ASP_CTRL 0x101000
+#define ASP_CTRL_OFFSET 0x101000
#define ASP_CTRL_ASP_SW_INIT 0x04
#define ASP_CTRL_ASP_SW_INIT_ACPUSS_CORE BIT(0)
#define ASP_CTRL_ASP_SW_INIT_ASP_TX BIT(1)
@@ -306,6 +317,7 @@ struct bcmasp_intf {
struct bcmasp_desc *rx_edpkt_cpu;
dma_addr_t rx_edpkt_dma_addr;
dma_addr_t rx_edpkt_dma_read;
+ dma_addr_t rx_edpkt_dma_valid;
/* RX buffer prefetcher ring*/
void *rx_ring_cpu;
@@ -337,7 +349,7 @@ struct bcmasp_intf {
int wol_irq;
unsigned int wol_irq_enabled:1;
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
};
#define NUM_NET_FILTERS 32
@@ -372,6 +384,8 @@ struct bcmasp_plat_data {
void (*init_wol)(struct bcmasp_priv *priv);
void (*enable_wol)(struct bcmasp_intf *intf, bool en);
void (*destroy_wol)(struct bcmasp_priv *priv);
+ void (*core_clock_select)(struct bcmasp_priv *priv, bool slow);
+ void (*eee_fixup)(struct bcmasp_intf *priv, bool en);
struct bcmasp_hw_info *hw_info;
};
@@ -390,6 +404,8 @@ struct bcmasp_priv {
void (*init_wol)(struct bcmasp_priv *priv);
void (*enable_wol)(struct bcmasp_intf *intf, bool en);
void (*destroy_wol)(struct bcmasp_priv *priv);
+ void (*core_clock_select)(struct bcmasp_priv *priv, bool slow);
+ void (*eee_fixup)(struct bcmasp_intf *intf, bool en);
void __iomem *base;
struct bcmasp_hw_info *hw_info;
@@ -530,7 +546,8 @@ BCMASP_CORE_IO_MACRO(rx_analytics, ASP_RX_ANALYTICS_OFFSET);
BCMASP_CORE_IO_MACRO(rx_ctrl, ASP_RX_CTRL_OFFSET);
BCMASP_CORE_IO_MACRO(rx_filter, ASP_RX_FILTER_OFFSET);
BCMASP_CORE_IO_MACRO(rx_edpkt, ASP_EDPKT_OFFSET);
-BCMASP_CORE_IO_MACRO(ctrl, ASP_CTRL);
+BCMASP_CORE_IO_MACRO(ctrl, ASP_CTRL_OFFSET);
+BCMASP_CORE_IO_MACRO(ctrl2, ASP_CTRL2_OFFSET);
struct bcmasp_intf *bcmasp_interface_create(struct bcmasp_priv *priv,
struct device_node *ndev_dn, int i);
@@ -541,6 +558,8 @@ void bcmasp_enable_tx_irq(struct bcmasp_intf *intf, int en);
void bcmasp_enable_rx_irq(struct bcmasp_intf *intf, int en);
+void bcmasp_enable_phy_irq(struct bcmasp_intf *intf, int en);
+
void bcmasp_flush_rx_port(struct bcmasp_intf *intf);
extern const struct ethtool_ops bcmasp_ethtool_ops;
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c
index ce6a3d56fb23..484fc2b5626f 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c
@@ -360,29 +360,26 @@ void bcmasp_eee_enable_set(struct bcmasp_intf *intf, bool enable)
umac_wl(intf, reg, UMC_EEE_CTRL);
intf->eee.eee_enabled = enable;
- intf->eee.eee_active = enable;
}
-static int bcmasp_get_eee(struct net_device *dev, struct ethtool_eee *e)
+static int bcmasp_get_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct bcmasp_intf *intf = netdev_priv(dev);
- struct ethtool_eee *p = &intf->eee;
+ struct ethtool_keee *p = &intf->eee;
if (!dev->phydev)
return -ENODEV;
- e->eee_enabled = p->eee_enabled;
- e->eee_active = p->eee_active;
e->tx_lpi_enabled = p->tx_lpi_enabled;
e->tx_lpi_timer = umac_rl(intf, UMC_EEE_LPI_TIMER);
return phy_ethtool_get_eee(dev->phydev, e);
}
-static int bcmasp_set_eee(struct net_device *dev, struct ethtool_eee *e)
+static int bcmasp_set_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct bcmasp_intf *intf = netdev_priv(dev);
- struct ethtool_eee *p = &intf->eee;
+ struct ethtool_keee *p = &intf->eee;
int ret;
if (!dev->phydev)
@@ -399,7 +396,6 @@ static int bcmasp_set_eee(struct net_device *dev, struct ethtool_eee *e)
}
umac_wl(intf, e->tx_lpi_timer, UMC_EEE_LPI_TIMER);
- intf->eee.eee_active = ret >= 0;
intf->eee.tx_lpi_enabled = e->tx_lpi_enabled;
bcmasp_eee_enable_set(intf, true);
}
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
index 53e542881255..dd06b68b33ed 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
@@ -382,6 +382,7 @@ static void bcmasp_netif_start(struct net_device *dev)
bcmasp_enable_rx_irq(intf, 1);
bcmasp_enable_tx_irq(intf, 1);
+ bcmasp_enable_phy_irq(intf, 1);
phy_start(dev->phydev);
}
@@ -607,6 +608,7 @@ static void bcmasp_adj_link(struct net_device *dev)
struct phy_device *phydev = dev->phydev;
u32 cmd_bits = 0, reg;
int changed = 0;
+ bool active;
if (intf->old_link != phydev->link) {
changed = 1;
@@ -658,8 +660,8 @@ static void bcmasp_adj_link(struct net_device *dev)
reg |= cmd_bits;
umac_wl(intf, reg, UMC_CMD);
- intf->eee.eee_active = phy_init_eee(phydev, 0) >= 0;
- bcmasp_eee_enable_set(intf, intf->eee.eee_active);
+ active = phy_init_eee(phydev, 0) >= 0;
+ bcmasp_eee_enable_set(intf, active);
}
reg = rgmii_rl(intf, RGMII_OOB_CNTRL);
@@ -673,38 +675,78 @@ static void bcmasp_adj_link(struct net_device *dev)
phy_print_status(phydev);
}
-static int bcmasp_init_rx(struct bcmasp_intf *intf)
+static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
{
struct device *kdev = &intf->parent->pdev->dev;
struct page *buffer_pg;
- dma_addr_t dma;
- void *p;
- u32 reg;
- int ret;
+ /* Alloc RX */
intf->rx_buf_order = get_order(RING_BUFFER_SIZE);
buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order);
-
- dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(kdev, dma)) {
- __free_pages(buffer_pg, intf->rx_buf_order);
+ if (!buffer_pg)
return -ENOMEM;
- }
+
intf->rx_ring_cpu = page_to_virt(buffer_pg);
- intf->rx_ring_dma = dma;
- intf->rx_ring_dma_valid = intf->rx_ring_dma + RING_BUFFER_SIZE - 1;
+ intf->rx_ring_dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(kdev, intf->rx_ring_dma))
+ goto free_rx_buffer;
+
+ intf->rx_edpkt_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
+ &intf->rx_edpkt_dma_addr, GFP_KERNEL);
+ if (!intf->rx_edpkt_cpu)
+ goto free_rx_buffer_dma;
+
+ /* Alloc TX */
+ intf->tx_spb_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
+ &intf->tx_spb_dma_addr, GFP_KERNEL);
+ if (!intf->tx_spb_cpu)
+ goto free_rx_edpkt_dma;
- p = dma_alloc_coherent(kdev, DESC_RING_SIZE, &intf->rx_edpkt_dma_addr,
+ intf->tx_cbs = kcalloc(DESC_RING_COUNT, sizeof(struct bcmasp_tx_cb),
GFP_KERNEL);
- if (!p) {
- ret = -ENOMEM;
- goto free_rx_ring;
- }
- intf->rx_edpkt_cpu = p;
+ if (!intf->tx_cbs)
+ goto free_tx_spb_dma;
- netif_napi_add(intf->ndev, &intf->rx_napi, bcmasp_rx_poll);
+ return 0;
+
+free_tx_spb_dma:
+ dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
+ intf->tx_spb_dma_addr);
+free_rx_edpkt_dma:
+ dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
+ intf->rx_edpkt_dma_addr);
+free_rx_buffer_dma:
+ dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
+ DMA_FROM_DEVICE);
+free_rx_buffer:
+ __free_pages(buffer_pg, intf->rx_buf_order);
+
+ return -ENOMEM;
+}
+
+static void bcmasp_reclaim_free_buffers(struct bcmasp_intf *intf)
+{
+ struct device *kdev = &intf->parent->pdev->dev;
+
+ /* RX buffers */
+ dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
+ intf->rx_edpkt_dma_addr);
+ dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
+ DMA_FROM_DEVICE);
+ __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
+
+ /* TX buffers */
+ dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
+ intf->tx_spb_dma_addr);
+ kfree(intf->tx_cbs);
+}
+static void bcmasp_init_rx(struct bcmasp_intf *intf)
+{
+ /* Restart from index 0 */
+ intf->rx_ring_dma_valid = intf->rx_ring_dma + RING_BUFFER_SIZE - 1;
+ intf->rx_edpkt_dma_valid = intf->rx_edpkt_dma_addr + (DESC_RING_SIZE - 1);
intf->rx_edpkt_dma_read = intf->rx_edpkt_dma_addr;
intf->rx_edpkt_index = 0;
@@ -730,64 +772,23 @@ static int bcmasp_init_rx(struct bcmasp_intf *intf)
rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr, RX_EDPKT_DMA_WRITE);
rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr, RX_EDPKT_DMA_READ);
rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr, RX_EDPKT_DMA_BASE);
- rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr + (DESC_RING_SIZE - 1),
- RX_EDPKT_DMA_END);
- rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr + (DESC_RING_SIZE - 1),
- RX_EDPKT_DMA_VALID);
-
- reg = UMAC2FB_CFG_DEFAULT_EN |
- ((intf->channel + 11) << UMAC2FB_CFG_CHID_SHIFT);
- reg |= (0xd << UMAC2FB_CFG_OK_SEND_SHIFT);
- umac2fb_wl(intf, reg, UMAC2FB_CFG);
-
- return 0;
-
-free_rx_ring:
- dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
- DMA_FROM_DEVICE);
- __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
+ rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_valid, RX_EDPKT_DMA_END);
+ rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_valid, RX_EDPKT_DMA_VALID);
- return ret;
+ umac2fb_wl(intf, UMAC2FB_CFG_DEFAULT_EN | ((intf->channel + 11) <<
+ UMAC2FB_CFG_CHID_SHIFT) | (0xd << UMAC2FB_CFG_OK_SEND_SHIFT),
+ UMAC2FB_CFG);
}
-static void bcmasp_reclaim_free_all_rx(struct bcmasp_intf *intf)
-{
- struct device *kdev = &intf->parent->pdev->dev;
-
- dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
- intf->rx_edpkt_dma_addr);
- dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
- DMA_FROM_DEVICE);
- __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
-}
-static int bcmasp_init_tx(struct bcmasp_intf *intf)
+static void bcmasp_init_tx(struct bcmasp_intf *intf)
{
- struct device *kdev = &intf->parent->pdev->dev;
- void *p;
- int ret;
-
- p = dma_alloc_coherent(kdev, DESC_RING_SIZE, &intf->tx_spb_dma_addr,
- GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- intf->tx_spb_cpu = p;
+ /* Restart from index 0 */
intf->tx_spb_dma_valid = intf->tx_spb_dma_addr + DESC_RING_SIZE - 1;
intf->tx_spb_dma_read = intf->tx_spb_dma_addr;
-
- intf->tx_cbs = kcalloc(DESC_RING_COUNT, sizeof(struct bcmasp_tx_cb),
- GFP_KERNEL);
- if (!intf->tx_cbs) {
- ret = -ENOMEM;
- goto free_tx_spb;
- }
-
intf->tx_spb_index = 0;
intf->tx_spb_clean_index = 0;
- netif_napi_add_tx(intf->ndev, &intf->tx_napi, bcmasp_tx_poll);
-
/* Make sure channels are disabled */
tx_spb_ctrl_wl(intf, 0x0, TX_SPB_CTRL_ENABLE);
tx_epkt_core_wl(intf, 0x0, TX_EPKT_C_CFG_MISC);
@@ -803,26 +804,6 @@ static int bcmasp_init_tx(struct bcmasp_intf *intf)
tx_spb_dma_wq(intf, intf->tx_spb_dma_addr, TX_SPB_DMA_BASE);
tx_spb_dma_wq(intf, intf->tx_spb_dma_valid, TX_SPB_DMA_END);
tx_spb_dma_wq(intf, intf->tx_spb_dma_valid, TX_SPB_DMA_VALID);
-
- return 0;
-
-free_tx_spb:
- dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
- intf->tx_spb_dma_addr);
-
- return ret;
-}
-
-static void bcmasp_reclaim_free_all_tx(struct bcmasp_intf *intf)
-{
- struct device *kdev = &intf->parent->pdev->dev;
-
- /* Free descriptors */
- dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
- intf->tx_spb_dma_addr);
-
- /* Free cbs */
- kfree(intf->tx_cbs);
}
static void bcmasp_ephy_enable_set(struct bcmasp_intf *intf, bool enable)
@@ -910,12 +891,10 @@ static void bcmasp_netif_deinit(struct net_device *dev)
/* Disable interrupts */
bcmasp_enable_tx_irq(intf, 0);
bcmasp_enable_rx_irq(intf, 0);
+ bcmasp_enable_phy_irq(intf, 0);
netif_napi_del(&intf->tx_napi);
- bcmasp_reclaim_free_all_tx(intf);
-
netif_napi_del(&intf->rx_napi);
- bcmasp_reclaim_free_all_rx(intf);
}
static int bcmasp_stop(struct net_device *dev)
@@ -929,6 +908,8 @@ static int bcmasp_stop(struct net_device *dev)
bcmasp_netif_deinit(dev);
+ bcmasp_reclaim_free_buffers(intf);
+
phy_disconnect(dev->phydev);
/* Disable internal EPHY or external PHY */
@@ -1048,6 +1029,12 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
netdev_err(dev, "could not attach to PHY\n");
goto err_phy_disable;
}
+
+ if (intf->internal_phy)
+ dev->phydev->irq = PHY_MAC_INTERRUPT;
+
+ /* Indicate that the MAC is responsible for PHY PM */
+ phydev->mac_managed_pm = true;
} else if (!intf->wolopts) {
ret = phy_resume(dev->phydev);
if (ret)
@@ -1067,17 +1054,12 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
intf->old_link = -1;
intf->old_pause = -1;
- ret = bcmasp_init_tx(intf);
- if (ret)
- goto err_phy_disconnect;
-
- /* Turn on asp */
+ bcmasp_init_tx(intf);
+ netif_napi_add_tx(intf->ndev, &intf->tx_napi, bcmasp_tx_poll);
bcmasp_enable_tx(intf, 1);
- ret = bcmasp_init_rx(intf);
- if (ret)
- goto err_reclaim_tx;
-
+ bcmasp_init_rx(intf);
+ netif_napi_add(intf->ndev, &intf->rx_napi, bcmasp_rx_poll);
bcmasp_enable_rx(intf, 1);
/* Turn on UniMAC TX/RX */
@@ -1091,11 +1073,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
return 0;
-err_reclaim_tx:
- bcmasp_reclaim_free_all_tx(intf);
-err_phy_disconnect:
- if (phydev)
- phy_disconnect(phydev);
err_phy_disable:
if (intf->internal_phy)
bcmasp_ephy_enable_set(intf, false);
@@ -1111,13 +1088,24 @@ static int bcmasp_open(struct net_device *dev)
netif_dbg(intf, ifup, dev, "bcmasp open\n");
- ret = clk_prepare_enable(intf->parent->clk);
+ ret = bcmasp_alloc_buffers(intf);
if (ret)
return ret;
- ret = bcmasp_netif_init(dev, true);
+ ret = clk_prepare_enable(intf->parent->clk);
if (ret)
+ goto err_free_mem;
+
+ ret = bcmasp_netif_init(dev, true);
+ if (ret) {
clk_disable_unprepare(intf->parent->clk);
+ goto err_free_mem;
+ }
+
+ return ret;
+
+err_free_mem:
+ bcmasp_reclaim_free_buffers(intf);
return ret;
}
@@ -1326,6 +1314,9 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf)
ASP_WAKEUP_INTR2_MASK_CLEAR);
}
+ if (intf->eee.eee_enabled && intf->parent->eee_fixup)
+ intf->parent->eee_fixup(intf, true);
+
netif_dbg(intf, wol, ndev, "entered WOL mode\n");
}
@@ -1374,6 +1365,9 @@ static void bcmasp_resume_from_wol(struct bcmasp_intf *intf)
{
u32 reg;
+ if (intf->eee.eee_enabled && intf->parent->eee_fixup)
+ intf->parent->eee_fixup(intf, false);
+
reg = umac_rl(intf, UMC_MPD_CTRL);
reg &= ~UMC_MPD_CTRL_MPD_EN;
umac_wl(intf, reg, UMC_MPD_CTRL);
diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
index 3e7c8671cd11..72df1bb10172 100644
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@ -793,5 +793,6 @@ static struct platform_driver bcm4908_enet_driver = {
};
module_platform_driver(bcm4908_enet_driver);
+MODULE_DESCRIPTION("Broadcom BCM4908 Gigabit Ethernet driver");
MODULE_LICENSE("GPL v2");
MODULE_DEVICE_TABLE(of, bcm4908_enet_of_match);
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
index 9b83d5361699..50b8e97a811d 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
@@ -260,4 +260,5 @@ void bcma_mdio_mii_unregister(struct mii_bus *mii_bus)
EXPORT_SYMBOL_GPL(bcma_mdio_mii_unregister);
MODULE_AUTHOR("Rafał Miłecki");
+MODULE_DESCRIPTION("Broadcom iProc GBit BCMA MDIO helpers");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 6e4f36aaf5db..36f9bad28e6a 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -362,4 +362,5 @@ module_init(bgmac_init)
module_exit(bgmac_exit)
MODULE_AUTHOR("Rafał Miłecki");
+MODULE_DESCRIPTION("Broadcom iProc GBit BCMA interface driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 0b21fd5bd457..77425c7a32db 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -298,4 +298,5 @@ static struct platform_driver bgmac_enet_driver = {
};
module_platform_driver(bgmac_enet_driver);
+MODULE_DESCRIPTION("Broadcom iProc GBit platform interface driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 448a1b90de5e..6ffdc4229407 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1626,4 +1626,5 @@ int bgmac_enet_resume(struct bgmac *bgmac)
EXPORT_SYMBOL_GPL(bgmac_enet_resume);
MODULE_AUTHOR("Rafał Miłecki");
+MODULE_DESCRIPTION("Broadcom iProc GBit driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index e9c1e1bb5580..c9b6acd8c892 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -147,10 +147,11 @@ void bnx2x_fill_fw_str(struct bnx2x *bp, char *buf, size_t buf_len)
phy_fw_ver[0] = '\0';
bnx2x_get_ext_phy_fw_version(&bp->link_params,
- phy_fw_ver, PHY_FW_VER_LEN);
- strscpy(buf, bp->fw_ver, buf_len);
- snprintf(buf + strlen(bp->fw_ver), 32 - strlen(bp->fw_ver),
- "bc %d.%d.%d%s%s",
+ phy_fw_ver, sizeof(phy_fw_ver));
+ /* This may become truncated. */
+ scnprintf(buf, buf_len,
+ "%sbc %d.%d.%d%s%s",
+ bp->fw_ver,
(bp->common.bc_ver & 0xff0000) >> 16,
(bp->common.bc_ver & 0xff00) >> 8,
(bp->common.bc_ver & 0xff),
@@ -3537,7 +3538,7 @@ static u8 bnx2x_set_pbd_csum_enc(struct bnx2x *bp, struct sk_buff *skb,
u32 *parsing_data, u32 xmit_type)
{
*parsing_data |=
- ((((u8 *)skb_inner_transport_header(skb) - skb->data) >> 1) <<
+ ((skb_inner_transport_offset(skb) >> 1) <<
ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) &
ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W;
@@ -3569,7 +3570,7 @@ static u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb,
u32 *parsing_data, u32 xmit_type)
{
*parsing_data |=
- ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) <<
+ ((skb_transport_offset(skb) >> 1) <<
ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) &
ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W;
@@ -3612,7 +3613,7 @@ static u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb,
struct eth_tx_parse_bd_e1x *pbd,
u32 xmit_type)
{
- u8 hlen = (skb_network_header(skb) - skb->data) >> 1;
+ u8 hlen = skb_network_offset(skb) >> 1;
/* for now NS flag is not used in Linux */
pbd->global_data =
@@ -3620,8 +3621,7 @@ static u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb,
((skb->protocol == cpu_to_be16(ETH_P_8021Q)) <<
ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT));
- pbd->ip_hlen_w = (skb_transport_header(skb) -
- skb_network_header(skb)) >> 1;
+ pbd->ip_hlen_w = skb_network_header_len(skb) >> 1;
hlen += pbd->ip_hlen_w;
@@ -3666,8 +3666,7 @@ static void bnx2x_update_pbds_gso_enc(struct sk_buff *skb,
u8 outerip_off, outerip_len = 0;
/* from outer IP to transport */
- hlen_w = (skb_inner_transport_header(skb) -
- skb_network_header(skb)) >> 1;
+ hlen_w = skb_inner_transport_offset(skb) >> 1;
/* transport len */
hlen_w += inner_tcp_hdrlen(skb) >> 1;
@@ -3713,7 +3712,7 @@ static void bnx2x_update_pbds_gso_enc(struct sk_buff *skb,
0, IPPROTO_TCP, 0));
}
- outerip_off = (skb_network_header(skb) - skb->data) >> 1;
+ outerip_off = (skb_network_offset(skb)) >> 1;
*global_data |=
outerip_off |
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 81d232e6d05f..58956ed8f531 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -1132,7 +1132,7 @@ static void bnx2x_get_drvinfo(struct net_device *dev,
}
memset(version, 0, sizeof(version));
- bnx2x_fill_fw_str(bp, version, ETHTOOL_FWVERS_LEN);
+ bnx2x_fill_fw_str(bp, version, sizeof(version));
strlcat(info->fw_version, version, sizeof(info->fw_version));
strscpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info));
@@ -2081,34 +2081,31 @@ static const char bnx2x_private_arr[BNX2X_PRI_FLAG_LEN][ETH_GSTRING_LEN] = {
"Storage only interface"
};
-static u32 bnx2x_eee_to_adv(u32 eee_adv)
+static void bnx2x_eee_to_linkmode(unsigned long *mode, u32 eee_adv)
{
- u32 modes = 0;
-
if (eee_adv & SHMEM_EEE_100M_ADV)
- modes |= ADVERTISED_100baseT_Full;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mode);
if (eee_adv & SHMEM_EEE_1G_ADV)
- modes |= ADVERTISED_1000baseT_Full;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mode);
if (eee_adv & SHMEM_EEE_10G_ADV)
- modes |= ADVERTISED_10000baseT_Full;
-
- return modes;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, mode);
}
-static u32 bnx2x_adv_to_eee(u32 modes, u32 shift)
+static u32 bnx2x_linkmode_to_eee(const unsigned long *mode, u32 shift)
{
u32 eee_adv = 0;
- if (modes & ADVERTISED_100baseT_Full)
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mode))
eee_adv |= SHMEM_EEE_100M_ADV;
- if (modes & ADVERTISED_1000baseT_Full)
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mode))
eee_adv |= SHMEM_EEE_1G_ADV;
- if (modes & ADVERTISED_10000baseT_Full)
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, mode))
eee_adv |= SHMEM_EEE_10G_ADV;
return eee_adv << shift;
}
-static int bnx2x_get_eee(struct net_device *dev, struct ethtool_eee *edata)
+static int bnx2x_get_eee(struct net_device *dev, struct ethtool_keee *edata)
{
struct bnx2x *bp = netdev_priv(dev);
u32 eee_cfg;
@@ -2120,16 +2117,17 @@ static int bnx2x_get_eee(struct net_device *dev, struct ethtool_eee *edata)
eee_cfg = bp->link_vars.eee_status;
- edata->supported =
- bnx2x_eee_to_adv((eee_cfg & SHMEM_EEE_SUPPORTED_MASK) >>
- SHMEM_EEE_SUPPORTED_SHIFT);
+ bnx2x_eee_to_linkmode(edata->supported,
+ (eee_cfg & SHMEM_EEE_SUPPORTED_MASK) >>
+ SHMEM_EEE_SUPPORTED_SHIFT);
+
+ bnx2x_eee_to_linkmode(edata->advertised,
+ (eee_cfg & SHMEM_EEE_ADV_STATUS_MASK) >>
+ SHMEM_EEE_ADV_STATUS_SHIFT);
- edata->advertised =
- bnx2x_eee_to_adv((eee_cfg & SHMEM_EEE_ADV_STATUS_MASK) >>
- SHMEM_EEE_ADV_STATUS_SHIFT);
- edata->lp_advertised =
- bnx2x_eee_to_adv((eee_cfg & SHMEM_EEE_LP_ADV_STATUS_MASK) >>
- SHMEM_EEE_LP_ADV_STATUS_SHIFT);
+ bnx2x_eee_to_linkmode(edata->lp_advertised,
+ (eee_cfg & SHMEM_EEE_LP_ADV_STATUS_MASK) >>
+ SHMEM_EEE_LP_ADV_STATUS_SHIFT);
/* SHMEM value is in 16u units --> Convert to 1u units. */
edata->tx_lpi_timer = (eee_cfg & SHMEM_EEE_TIMER_MASK) << 4;
@@ -2141,7 +2139,7 @@ static int bnx2x_get_eee(struct net_device *dev, struct ethtool_eee *edata)
return 0;
}
-static int bnx2x_set_eee(struct net_device *dev, struct ethtool_eee *edata)
+static int bnx2x_set_eee(struct net_device *dev, struct ethtool_keee *edata)
{
struct bnx2x *bp = netdev_priv(dev);
u32 eee_cfg;
@@ -2162,8 +2160,8 @@ static int bnx2x_set_eee(struct net_device *dev, struct ethtool_eee *edata)
return -EOPNOTSUPP;
}
- advertised = bnx2x_adv_to_eee(edata->advertised,
- SHMEM_EEE_ADV_STATUS_SHIFT);
+ advertised = bnx2x_linkmode_to_eee(edata->advertised,
+ SHMEM_EEE_ADV_STATUS_SHIFT);
if ((advertised != (eee_cfg & SHMEM_EEE_ADV_STATUS_MASK))) {
DP(BNX2X_MSG_ETHTOOL,
"Direct manipulation of EEE advertisement is not supported\n");
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 02808513ffe4..ea310057fe3a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -6163,8 +6163,8 @@ static void bnx2x_link_int_ack(struct link_params *params,
static int bnx2x_null_format_ver(u32 spirom_ver, u8 *str, u16 *len)
{
- str[0] = '\0';
- (*len)--;
+ if (*len)
+ str[0] = '\0';
return 0;
}
@@ -6173,7 +6173,7 @@ static int bnx2x_format_ver(u32 num, u8 *str, u16 *len)
u16 ret;
if (*len < 10) {
- /* Need more than 10chars for this format */
+ /* Need more than 10 chars for this format */
bnx2x_null_format_ver(num, str, len);
return -EINVAL;
}
@@ -6188,8 +6188,8 @@ static int bnx2x_3_seq_format_ver(u32 num, u8 *str, u16 *len)
{
u16 ret;
- if (*len < 10) {
- /* Need more than 10chars for this format */
+ if (*len < 9) {
+ /* Need more than 9 chars for this format */
bnx2x_null_format_ver(num, str, len);
return -EINVAL;
}
@@ -6208,7 +6208,7 @@ int bnx2x_get_ext_phy_fw_version(struct link_params *params, u8 *version,
int status = 0;
u8 *ver_p = version;
u16 remain_len = len;
- if (version == NULL || params == NULL)
+ if (version == NULL || params == NULL || len == 0)
return -EINVAL;
bp = params->bp;
@@ -11546,7 +11546,7 @@ static int bnx2x_7101_format_ver(u32 spirom_ver, u8 *str, u16 *len)
str[2] = (spirom_ver & 0xFF0000) >> 16;
str[3] = (spirom_ver & 0xFF000000) >> 24;
str[4] = '\0';
- *len -= 5;
+ *len -= 4;
return 0;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0aacd3c6ed5c..493b724848c8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -246,6 +246,49 @@ static const u16 bnxt_async_events_arr[] = {
static struct workqueue_struct *bnxt_pf_wq;
+#define BNXT_IPV6_MASK_ALL {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}}
+#define BNXT_IPV6_MASK_NONE {{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}}
+
+const struct bnxt_flow_masks BNXT_FLOW_MASK_NONE = {
+ .ports = {
+ .src = 0,
+ .dst = 0,
+ },
+ .addrs = {
+ .v6addrs = {
+ .src = BNXT_IPV6_MASK_NONE,
+ .dst = BNXT_IPV6_MASK_NONE,
+ },
+ },
+};
+
+const struct bnxt_flow_masks BNXT_FLOW_IPV6_MASK_ALL = {
+ .ports = {
+ .src = cpu_to_be16(0xffff),
+ .dst = cpu_to_be16(0xffff),
+ },
+ .addrs = {
+ .v6addrs = {
+ .src = BNXT_IPV6_MASK_ALL,
+ .dst = BNXT_IPV6_MASK_ALL,
+ },
+ },
+};
+
+const struct bnxt_flow_masks BNXT_FLOW_IPV4_MASK_ALL = {
+ .ports = {
+ .src = cpu_to_be16(0xffff),
+ .dst = cpu_to_be16(0xffff),
+ },
+ .addrs = {
+ .v4addrs = {
+ .src = cpu_to_be32(0xffffffff),
+ .dst = cpu_to_be32(0xffffffff),
+ },
+ },
+};
+
static bool bnxt_vf_pciid(enum board_idx idx)
{
return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF ||
@@ -3817,7 +3860,7 @@ static int bnxt_alloc_cp_rings(struct bnxt *bp)
{
bool sh = !!(bp->flags & BNXT_FLAG_SHARED_RINGS);
int i, j, rc, ulp_base_vec, ulp_msix;
- int tcs = netdev_get_num_tc(bp->dev);
+ int tcs = bp->num_tc;
if (!tcs)
tcs = 1;
@@ -4168,8 +4211,12 @@ static int bnxt_alloc_vnics(struct bnxt *bp)
int num_vnics = 1;
#ifdef CONFIG_RFS_ACCEL
- if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5_PLUS)) == BNXT_FLAG_RFS)
- num_vnics += bp->rx_nr_rings;
+ if (bp->flags & BNXT_FLAG_RFS) {
+ if (BNXT_SUPPORTS_NTUPLE_VNIC(bp))
+ num_vnics++;
+ else if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
+ num_vnics += bp->rx_nr_rings;
+ }
#endif
if (BNXT_CHIP_TYPE_NITRO_A0(bp))
@@ -4186,6 +4233,7 @@ static int bnxt_alloc_vnics(struct bnxt *bp)
static void bnxt_init_vnics(struct bnxt *bp)
{
+ struct bnxt_vnic_info *vnic0 = &bp->vnic_info[BNXT_VNIC_DEFAULT];
int i;
for (i = 0; i < bp->nr_vnics; i++) {
@@ -4199,20 +4247,33 @@ static void bnxt_init_vnics(struct bnxt *bp)
vnic->fw_l2_ctx_id = INVALID_HW_RING_ID;
if (bp->vnic_info[i].rss_hash_key) {
- if (!i) {
+ if (i == BNXT_VNIC_DEFAULT) {
u8 *key = (void *)vnic->rss_hash_key;
int k;
+ if (!bp->rss_hash_key_valid &&
+ !bp->rss_hash_key_updated) {
+ get_random_bytes(bp->rss_hash_key,
+ HW_HASH_KEY_SIZE);
+ bp->rss_hash_key_updated = true;
+ }
+
+ memcpy(vnic->rss_hash_key, bp->rss_hash_key,
+ HW_HASH_KEY_SIZE);
+
+ if (!bp->rss_hash_key_updated)
+ continue;
+
+ bp->rss_hash_key_updated = false;
+ bp->rss_hash_key_valid = true;
+
bp->toeplitz_prefix = 0;
- get_random_bytes(vnic->rss_hash_key,
- HW_HASH_KEY_SIZE);
for (k = 0; k < 8; k++) {
bp->toeplitz_prefix <<= 8;
bp->toeplitz_prefix |= key[k];
}
} else {
- memcpy(vnic->rss_hash_key,
- bp->vnic_info[0].rss_hash_key,
+ memcpy(vnic->rss_hash_key, vnic0->rss_hash_key,
HW_HASH_KEY_SIZE);
}
}
@@ -4798,6 +4859,44 @@ static void bnxt_clear_ring_indices(struct bnxt *bp)
}
}
+void bnxt_insert_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr)
+{
+ u8 type = fltr->type, flags = fltr->flags;
+
+ INIT_LIST_HEAD(&fltr->list);
+ if ((type == BNXT_FLTR_TYPE_L2 && flags & BNXT_ACT_RING_DST) ||
+ (type == BNXT_FLTR_TYPE_NTUPLE && flags & BNXT_ACT_NO_AGING))
+ list_add_tail(&fltr->list, &bp->usr_fltr_list);
+}
+
+void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr)
+{
+ if (!list_empty(&fltr->list))
+ list_del_init(&fltr->list);
+}
+
+void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all)
+{
+ struct bnxt_filter_base *usr_fltr, *tmp;
+
+ list_for_each_entry_safe(usr_fltr, tmp, &bp->usr_fltr_list, list) {
+ if (!all && usr_fltr->type == BNXT_FLTR_TYPE_L2)
+ continue;
+ bnxt_del_one_usr_fltr(bp, usr_fltr);
+ }
+}
+
+static void bnxt_del_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr)
+{
+ hlist_del(&fltr->hash);
+ bnxt_del_one_usr_fltr(bp, fltr);
+ if (fltr->flags) {
+ clear_bit(fltr->sw_id, bp->ntp_fltr_bmap);
+ bp->ntp_fltr_count--;
+ }
+ kfree(fltr);
+}
+
static void bnxt_free_ntp_fltrs(struct bnxt *bp, bool all)
{
int i;
@@ -4813,12 +4912,10 @@ static void bnxt_free_ntp_fltrs(struct bnxt *bp, bool all)
head = &bp->ntp_fltr_hash_tbl[i];
hlist_for_each_entry_safe(fltr, tmp, head, base.hash) {
bnxt_del_l2_filter(bp, fltr->l2_fltr);
- if (!all && (fltr->base.flags & BNXT_ACT_FUNC_DST))
+ if (!all && ((fltr->base.flags & BNXT_ACT_FUNC_DST) ||
+ !list_empty(&fltr->base.list)))
continue;
- hlist_del(&fltr->base.hash);
- clear_bit(fltr->base.sw_id, bp->ntp_fltr_bmap);
- bp->ntp_fltr_count--;
- kfree(fltr);
+ bnxt_del_fltr(bp, &fltr->base);
}
}
if (!all)
@@ -4840,7 +4937,7 @@ static int bnxt_alloc_ntp_fltrs(struct bnxt *bp)
INIT_HLIST_HEAD(&bp->ntp_fltr_hash_tbl[i]);
bp->ntp_fltr_count = 0;
- bp->ntp_fltr_bmap = bitmap_zalloc(BNXT_MAX_FLTR, GFP_KERNEL);
+ bp->ntp_fltr_bmap = bitmap_zalloc(bp->max_fltr, GFP_KERNEL);
if (!bp->ntp_fltr_bmap)
rc = -ENOMEM;
@@ -4859,14 +4956,10 @@ static void bnxt_free_l2_filters(struct bnxt *bp, bool all)
head = &bp->l2_fltr_hash_tbl[i];
hlist_for_each_entry_safe(fltr, tmp, head, base.hash) {
- if (!all && (fltr->base.flags & BNXT_ACT_FUNC_DST))
+ if (!all && ((fltr->base.flags & BNXT_ACT_FUNC_DST) ||
+ !list_empty(&fltr->base.list)))
continue;
- hlist_del(&fltr->base.hash);
- if (fltr->base.flags) {
- clear_bit(fltr->base.sw_id, bp->ntp_fltr_bmap);
- bp->ntp_fltr_count--;
- }
- kfree(fltr);
+ bnxt_del_fltr(bp, &fltr->base);
}
}
}
@@ -5039,8 +5132,13 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
if (rc)
goto alloc_mem_err;
- bp->vnic_info[0].flags |= BNXT_VNIC_RSS_FLAG | BNXT_VNIC_MCAST_FLAG |
- BNXT_VNIC_UCAST_FLAG;
+ bp->vnic_info[BNXT_VNIC_DEFAULT].flags |= BNXT_VNIC_RSS_FLAG |
+ BNXT_VNIC_MCAST_FLAG |
+ BNXT_VNIC_UCAST_FLAG;
+ if (BNXT_SUPPORTS_NTUPLE_VNIC(bp) && (bp->flags & BNXT_FLAG_RFS))
+ bp->vnic_info[BNXT_VNIC_NTUPLE].flags |=
+ BNXT_VNIC_RSS_FLAG | BNXT_VNIC_NTUPLE_FLAG;
+
rc = bnxt_alloc_vnic_attributes(bp);
if (rc)
goto alloc_mem_err;
@@ -5342,6 +5440,7 @@ void bnxt_del_l2_filter(struct bnxt *bp, struct bnxt_l2_filter *fltr)
return;
}
hlist_del_rcu(&fltr->base.hash);
+ bnxt_del_one_usr_fltr(bp, &fltr->base);
if (fltr->base.flags) {
clear_bit(fltr->base.sw_id, bp->ntp_fltr_bmap);
bp->ntp_fltr_count--;
@@ -5480,13 +5579,15 @@ static int bnxt_init_l2_filter(struct bnxt *bp, struct bnxt_l2_filter *fltr,
int bit_id;
bit_id = bitmap_find_free_region(bp->ntp_fltr_bmap,
- BNXT_MAX_FLTR, 0);
+ bp->max_fltr, 0);
if (bit_id < 0)
return -ENOMEM;
fltr->base.sw_id = (u16)bit_id;
+ bp->ntp_fltr_count++;
}
head = &bp->l2_fltr_hash_tbl[idx];
hlist_add_head_rcu(&fltr->base.hash, head);
+ bnxt_insert_usr_fltr(bp, &fltr->base);
set_bit(BNXT_FLTR_INSERTED, &fltr->base.state);
atomic_set(&fltr->refcnt, 1);
return 0;
@@ -5519,6 +5620,40 @@ static struct bnxt_l2_filter *bnxt_alloc_l2_filter(struct bnxt *bp,
return fltr;
}
+struct bnxt_l2_filter *bnxt_alloc_new_l2_filter(struct bnxt *bp,
+ struct bnxt_l2_key *key,
+ u16 flags)
+{
+ struct bnxt_l2_filter *fltr;
+ u32 idx;
+ int rc;
+
+ idx = jhash2(&key->filter_key, BNXT_L2_KEY_SIZE, bp->hash_seed) &
+ BNXT_L2_FLTR_HASH_MASK;
+ spin_lock_bh(&bp->ntp_fltr_lock);
+ fltr = __bnxt_lookup_l2_filter(bp, key, idx);
+ if (fltr) {
+ fltr = ERR_PTR(-EEXIST);
+ goto l2_filter_exit;
+ }
+ fltr = kzalloc(sizeof(*fltr), GFP_ATOMIC);
+ if (!fltr) {
+ fltr = ERR_PTR(-ENOMEM);
+ goto l2_filter_exit;
+ }
+ fltr->base.flags = flags;
+ rc = bnxt_init_l2_filter(bp, fltr, key, idx);
+ if (rc) {
+ spin_unlock_bh(&bp->ntp_fltr_lock);
+ bnxt_del_l2_filter(bp, fltr);
+ return ERR_PTR(rc);
+ }
+
+l2_filter_exit:
+ spin_unlock_bh(&bp->ntp_fltr_lock);
+ return fltr;
+}
+
static u16 bnxt_vf_target_id(struct bnxt_pf_info *pf, u16 vf_idx)
{
#ifdef CONFIG_BNXT_SRIOV
@@ -5650,15 +5785,38 @@ void bnxt_fill_ipv6_mask(__be32 mask[4])
mask[i] = cpu_to_be32(~0);
}
+static void
+bnxt_cfg_rfs_ring_tbl_idx(struct bnxt *bp,
+ struct hwrm_cfa_ntuple_filter_alloc_input *req,
+ u16 rxq)
+{
+ if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) {
+ struct bnxt_vnic_info *vnic;
+ u32 enables;
+
+ vnic = &bp->vnic_info[BNXT_VNIC_NTUPLE];
+ req->dst_id = cpu_to_le16(vnic->fw_vnic_id);
+ enables = CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_RFS_RING_TBL_IDX;
+ req->enables |= cpu_to_le32(enables);
+ req->rfs_ring_tbl_idx = cpu_to_le16(rxq);
+ } else {
+ u32 flags;
+
+ flags = CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_RFS_RING_IDX;
+ req->flags |= cpu_to_le32(flags);
+ req->dst_id = cpu_to_le16(rxq);
+ }
+}
+
int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
struct bnxt_ntuple_filter *fltr)
{
struct hwrm_cfa_ntuple_filter_alloc_output *resp;
struct hwrm_cfa_ntuple_filter_alloc_input *req;
+ struct bnxt_flow_masks *masks = &fltr->fmasks;
struct flow_keys *keys = &fltr->fkeys;
struct bnxt_l2_filter *l2_fltr;
struct bnxt_vnic_info *vnic;
- u32 flags = 0;
int rc;
rc = hwrm_req_init(bp, req, HWRM_CFA_NTUPLE_FILTER_ALLOC);
@@ -5668,16 +5826,16 @@ int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
l2_fltr = fltr->l2_fltr;
req->l2_filter_id = l2_fltr->base.filter_id;
-
- if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2) {
- flags = CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_RFS_RING_IDX;
- req->dst_id = cpu_to_le16(fltr->base.rxq);
+ if (fltr->base.flags & BNXT_ACT_DROP) {
+ req->flags =
+ cpu_to_le32(CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DROP);
+ } else if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2) {
+ bnxt_cfg_rfs_ring_tbl_idx(bp, req, fltr->base.rxq);
} else {
vnic = &bp->vnic_info[fltr->base.rxq + 1];
req->dst_id = cpu_to_le16(vnic->fw_vnic_id);
}
- req->flags = cpu_to_le32(flags);
- req->enables = cpu_to_le32(BNXT_NTP_FLTR_FLAGS);
+ req->enables |= cpu_to_le32(BNXT_NTP_FLTR_FLAGS);
req->ethertype = htons(ETH_P_IP);
req->ip_addr_type = CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
@@ -5687,25 +5845,15 @@ int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
req->ethertype = htons(ETH_P_IPV6);
req->ip_addr_type =
CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6;
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) {
- *(struct in6_addr *)&req->src_ipaddr[0] =
- keys->addrs.v6addrs.src;
- bnxt_fill_ipv6_mask(req->src_ipaddr_mask);
- }
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) {
- *(struct in6_addr *)&req->dst_ipaddr[0] =
- keys->addrs.v6addrs.dst;
- bnxt_fill_ipv6_mask(req->dst_ipaddr_mask);
- }
+ *(struct in6_addr *)&req->src_ipaddr[0] = keys->addrs.v6addrs.src;
+ *(struct in6_addr *)&req->src_ipaddr_mask[0] = masks->addrs.v6addrs.src;
+ *(struct in6_addr *)&req->dst_ipaddr[0] = keys->addrs.v6addrs.dst;
+ *(struct in6_addr *)&req->dst_ipaddr_mask[0] = masks->addrs.v6addrs.dst;
} else {
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) {
- req->src_ipaddr[0] = keys->addrs.v4addrs.src;
- req->src_ipaddr_mask[0] = cpu_to_be32(0xffffffff);
- }
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) {
- req->dst_ipaddr[0] = keys->addrs.v4addrs.dst;
- req->dst_ipaddr_mask[0] = cpu_to_be32(0xffffffff);
- }
+ req->src_ipaddr[0] = keys->addrs.v4addrs.src;
+ req->src_ipaddr_mask[0] = masks->addrs.v4addrs.src;
+ req->dst_ipaddr[0] = keys->addrs.v4addrs.dst;
+ req->dst_ipaddr_mask[0] = masks->addrs.v4addrs.dst;
}
if (keys->control.flags & FLOW_DIS_ENCAPSULATION) {
req->enables |= cpu_to_le32(BNXT_NTP_TUNNEL_FLTR_FLAG);
@@ -5713,14 +5861,10 @@ int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL;
}
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_PORT) {
- req->src_port = keys->ports.src;
- req->src_port_mask = cpu_to_be16(0xffff);
- }
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_PORT) {
- req->dst_port = keys->ports.dst;
- req->dst_port_mask = cpu_to_be16(0xffff);
- }
+ req->src_port = keys->ports.src;
+ req->src_port_mask = masks->ports.src;
+ req->dst_port = keys->ports.dst;
+ req->dst_port_mask = masks->ports.dst;
resp = hwrm_req_hold(bp, req);
rc = hwrm_req_send(bp, req);
@@ -5935,8 +6079,12 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp)
int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
- return DIV_ROUND_UP(rx_rings, BNXT_RSS_TABLE_ENTRIES_P5);
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+ if (!rx_rings)
+ return 0;
+ return bnxt_calc_nr_ring_pages(rx_rings - 1,
+ BNXT_RSS_TABLE_ENTRIES_P5);
+ }
if (BNXT_CHIP_TYPE_NITRO_A0(bp))
return 2;
return 1;
@@ -5967,7 +6115,10 @@ static void bnxt_fill_hw_rss_tbl_p5(struct bnxt *bp,
for (i = 0; i < tbl_size; i++) {
u16 ring_id, j;
- j = bp->rss_indir_tbl[i];
+ if (vnic->flags & BNXT_VNIC_NTUPLE_FLAG)
+ j = ethtool_rxfh_indir_default(i, bp->rx_nr_rings);
+ else
+ j = bp->rss_indir_tbl[i];
rxr = &bp->rx_ring[j];
ring_id = rxr->rx_ring_struct.fw_ring_id;
@@ -5981,10 +6132,13 @@ static void
__bnxt_hwrm_vnic_set_rss(struct bnxt *bp, struct hwrm_vnic_rss_cfg_input *req,
struct bnxt_vnic_info *vnic)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
bnxt_fill_hw_rss_tbl_p5(bp, vnic);
- else
+ if (bp->flags & BNXT_FLAG_CHIP_P7)
+ req->flags |= VNIC_RSS_CFG_REQ_FLAGS_IPSEC_HASH_TYPE_CFG_SUPPORT;
+ } else {
bnxt_fill_hw_rss_tbl(bp, vnic);
+ }
if (bp->rss_hash_delta) {
req->hash_type = cpu_to_le32(bp->rss_hash_delta);
@@ -6057,7 +6211,7 @@ exit:
static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp)
{
- struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+ struct bnxt_vnic_info *vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT];
struct hwrm_vnic_rss_qcfg_output *resp;
struct hwrm_vnic_rss_qcfg_input *req;
@@ -6161,6 +6315,7 @@ static u32 bnxt_get_roce_vnic_mode(struct bnxt *bp)
int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
{
+ struct bnxt_vnic_info *vnic0 = &bp->vnic_info[BNXT_VNIC_DEFAULT];
struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
struct hwrm_vnic_cfg_input *req;
unsigned int ring = 0, grp_idx;
@@ -6190,8 +6345,7 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
req->enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
VNIC_CFG_REQ_ENABLES_MRU);
} else if (vnic->flags & BNXT_VNIC_RFS_NEW_RSS_FLAG) {
- req->rss_rule =
- cpu_to_le16(bp->vnic_info[0].fw_rss_cos_lb_ctx[0]);
+ req->rss_rule = cpu_to_le16(vnic0->fw_rss_cos_lb_ctx[0]);
req->enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
VNIC_CFG_REQ_ENABLES_MRU);
req->flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE);
@@ -6288,7 +6442,7 @@ static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id,
vnic_no_ring_grps:
for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++)
vnic->fw_rss_cos_lb_ctx[i] = INVALID_HW_RING_ID;
- if (vnic_id == 0)
+ if (vnic_id == BNXT_VNIC_DEFAULT)
req->flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT);
resp = hwrm_req_hold(bp, req);
@@ -6347,6 +6501,14 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
}
if (flags & VNIC_QCAPS_RESP_FLAGS_HW_TUNNEL_TPA_CAP)
bp->fw_cap |= BNXT_FW_CAP_VNIC_TUNNEL_TPA;
+ if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_AH_SPI_IPV4_CAP)
+ bp->rss_cap |= BNXT_RSS_CAP_AH_V4_RSS_CAP;
+ if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_AH_SPI_IPV6_CAP)
+ bp->rss_cap |= BNXT_RSS_CAP_AH_V6_RSS_CAP;
+ if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV4_CAP)
+ bp->rss_cap |= BNXT_RSS_CAP_ESP_V4_RSS_CAP;
+ if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV6_CAP)
+ bp->rss_cap |= BNXT_RSS_CAP_ESP_V6_RSS_CAP;
}
hwrm_req_drop(bp, req);
return rc;
@@ -6914,6 +7076,7 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
hw_resc->resv_hw_ring_grps =
le32_to_cpu(resp->alloc_hw_ring_grps);
hw_resc->resv_vnics = le16_to_cpu(resp->alloc_vnics);
+ hw_resc->resv_rsscos_ctxs = le16_to_cpu(resp->alloc_rsscos_ctx);
cp = le16_to_cpu(resp->alloc_cmpl_rings);
stats = le16_to_cpu(resp->alloc_stat_ctx);
hw_resc->resv_irqs = cp;
@@ -6926,7 +7089,7 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
if (cp < (rx + tx)) {
rc = __bnxt_trim_rings(bp, &rx, &tx, cp, false);
if (rc)
- return rc;
+ goto get_rings_exit;
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx <<= 1;
hw_resc->resv_rx_rings = rx;
@@ -6938,8 +7101,9 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
hw_resc->resv_cp_rings = cp;
hw_resc->resv_stat_ctxs = stats;
}
+get_rings_exit:
hwrm_req_drop(bp, req);
- return 0;
+ return rc;
}
int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings)
@@ -6968,8 +7132,7 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings)
static bool bnxt_rfs_supported(struct bnxt *bp);
static struct hwrm_func_cfg_input *
-__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int stats, int vnics)
+__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct bnxt_hw_rings *hwr)
{
struct hwrm_func_cfg_input *req;
u32 enables = 0;
@@ -6978,51 +7141,42 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
return NULL;
req->fid = cpu_to_le16(0xffff);
- enables |= tx_rings ? FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
- req->num_tx_rings = cpu_to_le16(tx_rings);
+ enables |= hwr->tx ? FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
+ req->num_tx_rings = cpu_to_le16(hwr->tx);
if (BNXT_NEW_RM(bp)) {
- enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
- enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ enables |= hwr->rx ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
+ enables |= hwr->stat ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
- enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0;
- enables |= tx_rings + ring_grps ?
+ enables |= hwr->cp ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0;
+ enables |= hwr->cp_p5 ?
FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
- enables |= rx_rings ?
- FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
} else {
- enables |= cp_rings ?
+ enables |= hwr->cp ?
FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
- enables |= ring_grps ?
- FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS |
- FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
- }
- enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0;
-
- req->num_rx_rings = cpu_to_le16(rx_rings);
+ enables |= hwr->grp ?
+ FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
+ }
+ enables |= hwr->vnic ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0;
+ enables |= hwr->rss_ctx ? FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS :
+ 0;
+ req->num_rx_rings = cpu_to_le16(hwr->rx);
+ req->num_rsscos_ctxs = cpu_to_le16(hwr->rss_ctx);
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
- req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps);
- req->num_msix = cpu_to_le16(cp_rings);
- req->num_rsscos_ctxs =
- cpu_to_le16(DIV_ROUND_UP(ring_grps, 64));
+ req->num_cmpl_rings = cpu_to_le16(hwr->cp_p5);
+ req->num_msix = cpu_to_le16(hwr->cp);
} else {
- req->num_cmpl_rings = cpu_to_le16(cp_rings);
- req->num_hw_ring_grps = cpu_to_le16(ring_grps);
- req->num_rsscos_ctxs = cpu_to_le16(1);
- if (!(bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP) &&
- bnxt_rfs_supported(bp))
- req->num_rsscos_ctxs =
- cpu_to_le16(ring_grps + 1);
+ req->num_cmpl_rings = cpu_to_le16(hwr->cp);
+ req->num_hw_ring_grps = cpu_to_le16(hwr->grp);
}
- req->num_stat_ctxs = cpu_to_le16(stats);
- req->num_vnics = cpu_to_le16(vnics);
+ req->num_stat_ctxs = cpu_to_le16(hwr->stat);
+ req->num_vnics = cpu_to_le16(hwr->vnic);
}
req->enables = cpu_to_le32(enables);
return req;
}
static struct hwrm_func_vf_cfg_input *
-__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int stats, int vnics)
+__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, struct bnxt_hw_rings *hwr)
{
struct hwrm_func_vf_cfg_input *req;
u32 enables = 0;
@@ -7030,49 +7184,46 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
if (hwrm_req_init(bp, req, HWRM_FUNC_VF_CFG))
return NULL;
- enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
- enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
- FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
- enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ enables |= hwr->tx ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
+ enables |= hwr->rx ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
+ FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
+ enables |= hwr->stat ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ enables |= hwr->rss_ctx ? FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
- enables |= tx_rings + ring_grps ?
+ enables |= hwr->cp_p5 ?
FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
} else {
- enables |= cp_rings ?
- FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
- enables |= ring_grps ?
+ enables |= hwr->cp ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
+ enables |= hwr->grp ?
FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
}
- enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
+ enables |= hwr->vnic ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
enables |= FUNC_VF_CFG_REQ_ENABLES_NUM_L2_CTXS;
req->num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
- req->num_tx_rings = cpu_to_le16(tx_rings);
- req->num_rx_rings = cpu_to_le16(rx_rings);
+ req->num_tx_rings = cpu_to_le16(hwr->tx);
+ req->num_rx_rings = cpu_to_le16(hwr->rx);
+ req->num_rsscos_ctxs = cpu_to_le16(hwr->rss_ctx);
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
- req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps);
- req->num_rsscos_ctxs = cpu_to_le16(DIV_ROUND_UP(ring_grps, 64));
+ req->num_cmpl_rings = cpu_to_le16(hwr->cp_p5);
} else {
- req->num_cmpl_rings = cpu_to_le16(cp_rings);
- req->num_hw_ring_grps = cpu_to_le16(ring_grps);
- req->num_rsscos_ctxs = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
+ req->num_cmpl_rings = cpu_to_le16(hwr->cp);
+ req->num_hw_ring_grps = cpu_to_le16(hwr->grp);
}
- req->num_stat_ctxs = cpu_to_le16(stats);
- req->num_vnics = cpu_to_le16(vnics);
+ req->num_stat_ctxs = cpu_to_le16(hwr->stat);
+ req->num_vnics = cpu_to_le16(hwr->vnic);
req->enables = cpu_to_le32(enables);
return req;
}
static int
-bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int stats, int vnics)
+bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct bnxt_hw_rings *hwr)
{
struct hwrm_func_cfg_input *req;
int rc;
- req = __bnxt_hwrm_reserve_pf_rings(bp, tx_rings, rx_rings, ring_grps,
- cp_rings, stats, vnics);
+ req = __bnxt_hwrm_reserve_pf_rings(bp, hwr);
if (!req)
return -ENOMEM;
@@ -7086,25 +7237,23 @@ bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
return rc;
if (bp->hwrm_spec_code < 0x10601)
- bp->hw_resc.resv_tx_rings = tx_rings;
+ bp->hw_resc.resv_tx_rings = hwr->tx;
return bnxt_hwrm_get_rings(bp);
}
static int
-bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int stats, int vnics)
+bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, struct bnxt_hw_rings *hwr)
{
struct hwrm_func_vf_cfg_input *req;
int rc;
if (!BNXT_NEW_RM(bp)) {
- bp->hw_resc.resv_tx_rings = tx_rings;
+ bp->hw_resc.resv_tx_rings = hwr->tx;
return 0;
}
- req = __bnxt_hwrm_reserve_vf_rings(bp, tx_rings, rx_rings, ring_grps,
- cp_rings, stats, vnics);
+ req = __bnxt_hwrm_reserve_vf_rings(bp, hwr);
if (!req)
return -ENOMEM;
@@ -7115,15 +7264,12 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
return bnxt_hwrm_get_rings(bp);
}
-static int bnxt_hwrm_reserve_rings(struct bnxt *bp, int tx, int rx, int grp,
- int cp, int stat, int vnic)
+static int bnxt_hwrm_reserve_rings(struct bnxt *bp, struct bnxt_hw_rings *hwr)
{
if (BNXT_PF(bp))
- return bnxt_hwrm_reserve_pf_rings(bp, tx, rx, grp, cp, stat,
- vnic);
+ return bnxt_hwrm_reserve_pf_rings(bp, hwr);
else
- return bnxt_hwrm_reserve_vf_rings(bp, tx, rx, grp, cp, stat,
- vnic);
+ return bnxt_hwrm_reserve_vf_rings(bp, hwr);
}
int bnxt_nq_rings_in_use(struct bnxt *bp)
@@ -7166,6 +7312,24 @@ static int bnxt_get_func_stat_ctxs(struct bnxt *bp)
return cp + ulp_stat;
}
+static int bnxt_get_total_rss_ctxs(struct bnxt *bp, struct bnxt_hw_rings *hwr)
+{
+ if (!hwr->grp)
+ return 0;
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+ int rss_ctx = bnxt_get_nr_rss_ctxs(bp, hwr->grp);
+
+ if (BNXT_SUPPORTS_NTUPLE_VNIC(bp))
+ rss_ctx *= hwr->vnic;
+ return rss_ctx;
+ }
+ if (BNXT_VF(bp))
+ return BNXT_VF_MAX_RSS_CTX;
+ if (!(bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP) && bnxt_rfs_supported(bp))
+ return hwr->grp + 1;
+ return 1;
+}
+
/* Check if a default RSS map needs to be setup. This function is only
* used on older firmware that does not require reserving RX rings.
*/
@@ -7181,13 +7345,24 @@ static void bnxt_check_rss_tbl_no_rmgr(struct bnxt *bp)
}
}
+static int bnxt_get_total_vnics(struct bnxt *bp, int rx_rings)
+{
+ if (bp->flags & BNXT_FLAG_RFS) {
+ if (BNXT_SUPPORTS_NTUPLE_VNIC(bp))
+ return 2;
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
+ return rx_rings + 1;
+ }
+ return 1;
+}
+
static bool bnxt_need_reserve_rings(struct bnxt *bp)
{
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
int cp = bnxt_cp_rings_in_use(bp);
int nq = bnxt_nq_rings_in_use(bp);
int rx = bp->rx_nr_rings, stat;
- int vnic = 1, grp = rx;
+ int vnic, grp = rx;
if (hw_resc->resv_tx_rings != bp->tx_nr_rings &&
bp->hwrm_spec_code >= 0x10601)
@@ -7202,9 +7377,9 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
bnxt_check_rss_tbl_no_rmgr(bp);
return false;
}
- if ((bp->flags & BNXT_FLAG_RFS) &&
- !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
- vnic = rx + 1;
+
+ vnic = bnxt_get_total_vnics(bp, rx);
+
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx <<= 1;
stat = bnxt_get_func_stat_ctxs(bp);
@@ -7219,47 +7394,65 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
return false;
}
-static int __bnxt_reserve_rings(struct bnxt *bp)
+static void bnxt_copy_reserved_rings(struct bnxt *bp, struct bnxt_hw_rings *hwr)
{
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
- int cp = bnxt_nq_rings_in_use(bp);
- int tx = bp->tx_nr_rings;
- int rx = bp->rx_nr_rings;
- int grp, rx_rings, rc;
- int vnic = 1, stat;
+
+ hwr->tx = hw_resc->resv_tx_rings;
+ if (BNXT_NEW_RM(bp)) {
+ hwr->rx = hw_resc->resv_rx_rings;
+ hwr->cp = hw_resc->resv_irqs;
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+ hwr->cp_p5 = hw_resc->resv_cp_rings;
+ hwr->grp = hw_resc->resv_hw_ring_grps;
+ hwr->vnic = hw_resc->resv_vnics;
+ hwr->stat = hw_resc->resv_stat_ctxs;
+ hwr->rss_ctx = hw_resc->resv_rsscos_ctxs;
+ }
+}
+
+static bool bnxt_rings_ok(struct bnxt *bp, struct bnxt_hw_rings *hwr)
+{
+ return hwr->tx && hwr->rx && hwr->cp && hwr->grp && hwr->vnic &&
+ hwr->stat && (hwr->cp_p5 || !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS));
+}
+
+static int __bnxt_reserve_rings(struct bnxt *bp)
+{
+ struct bnxt_hw_rings hwr = {0};
+ int rx_rings, rc;
bool sh = false;
int tx_cp;
if (!bnxt_need_reserve_rings(bp))
return 0;
+ hwr.cp = bnxt_nq_rings_in_use(bp);
+ hwr.tx = bp->tx_nr_rings;
+ hwr.rx = bp->rx_nr_rings;
if (bp->flags & BNXT_FLAG_SHARED_RINGS)
sh = true;
- if ((bp->flags & BNXT_FLAG_RFS) &&
- !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
- vnic = rx + 1;
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+ hwr.cp_p5 = hwr.rx + hwr.tx;
+
+ hwr.vnic = bnxt_get_total_vnics(bp, hwr.rx);
+
if (bp->flags & BNXT_FLAG_AGG_RINGS)
- rx <<= 1;
- grp = bp->rx_nr_rings;
- stat = bnxt_get_func_stat_ctxs(bp);
+ hwr.rx <<= 1;
+ hwr.grp = bp->rx_nr_rings;
+ hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr);
+ hwr.stat = bnxt_get_func_stat_ctxs(bp);
- rc = bnxt_hwrm_reserve_rings(bp, tx, rx, grp, cp, stat, vnic);
+ rc = bnxt_hwrm_reserve_rings(bp, &hwr);
if (rc)
return rc;
- tx = hw_resc->resv_tx_rings;
- if (BNXT_NEW_RM(bp)) {
- rx = hw_resc->resv_rx_rings;
- cp = hw_resc->resv_irqs;
- grp = hw_resc->resv_hw_ring_grps;
- vnic = hw_resc->resv_vnics;
- stat = hw_resc->resv_stat_ctxs;
- }
+ bnxt_copy_reserved_rings(bp, &hwr);
- rx_rings = rx;
+ rx_rings = hwr.rx;
if (bp->flags & BNXT_FLAG_AGG_RINGS) {
- if (rx >= 2) {
- rx_rings = rx >> 1;
+ if (hwr.rx >= 2) {
+ rx_rings = hwr.rx >> 1;
} else {
if (netif_running(bp->dev))
return -ENOMEM;
@@ -7271,17 +7464,17 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
bnxt_set_ring_params(bp);
}
}
- rx_rings = min_t(int, rx_rings, grp);
- cp = min_t(int, cp, bp->cp_nr_rings);
- if (stat > bnxt_get_ulp_stat_ctxs(bp))
- stat -= bnxt_get_ulp_stat_ctxs(bp);
- cp = min_t(int, cp, stat);
- rc = bnxt_trim_rings(bp, &rx_rings, &tx, cp, sh);
+ rx_rings = min_t(int, rx_rings, hwr.grp);
+ hwr.cp = min_t(int, hwr.cp, bp->cp_nr_rings);
+ if (hwr.stat > bnxt_get_ulp_stat_ctxs(bp))
+ hwr.stat -= bnxt_get_ulp_stat_ctxs(bp);
+ hwr.cp = min_t(int, hwr.cp, hwr.stat);
+ rc = bnxt_trim_rings(bp, &rx_rings, &hwr.tx, hwr.cp, sh);
if (bp->flags & BNXT_FLAG_AGG_RINGS)
- rx = rx_rings << 1;
- tx_cp = bnxt_num_tx_to_cp(bp, tx);
- cp = sh ? max_t(int, tx_cp, rx_rings) : tx_cp + rx_rings;
- bp->tx_nr_rings = tx;
+ hwr.rx = rx_rings << 1;
+ tx_cp = bnxt_num_tx_to_cp(bp, hwr.tx);
+ hwr.cp = sh ? max_t(int, tx_cp, rx_rings) : tx_cp + rx_rings;
+ bp->tx_nr_rings = hwr.tx;
/* If we cannot reserve all the RX rings, reset the RSS map only
* if absolutely necessary
@@ -7298,9 +7491,9 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
}
}
bp->rx_nr_rings = rx_rings;
- bp->cp_nr_rings = cp;
+ bp->cp_nr_rings = hwr.cp;
- if (!tx || !rx || !cp || !grp || !vnic || !stat)
+ if (!bnxt_rings_ok(bp, &hwr))
return -ENOMEM;
if (!netif_is_rxfh_configured(bp->dev))
@@ -7309,9 +7502,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
return rc;
}
-static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int stats,
- int vnics)
+static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, struct bnxt_hw_rings *hwr)
{
struct hwrm_func_vf_cfg_input *req;
u32 flags;
@@ -7319,8 +7510,7 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
if (!BNXT_NEW_RM(bp))
return 0;
- req = __bnxt_hwrm_reserve_vf_rings(bp, tx_rings, rx_rings, ring_grps,
- cp_rings, stats, vnics);
+ req = __bnxt_hwrm_reserve_vf_rings(bp, hwr);
flags = FUNC_VF_CFG_REQ_FLAGS_TX_ASSETS_TEST |
FUNC_VF_CFG_REQ_FLAGS_RX_ASSETS_TEST |
FUNC_VF_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
@@ -7334,15 +7524,12 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
return hwrm_req_send_silent(bp, req);
}
-static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int stats,
- int vnics)
+static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, struct bnxt_hw_rings *hwr)
{
struct hwrm_func_cfg_input *req;
u32 flags;
- req = __bnxt_hwrm_reserve_pf_rings(bp, tx_rings, rx_rings, ring_grps,
- cp_rings, stats, vnics);
+ req = __bnxt_hwrm_reserve_pf_rings(bp, hwr);
flags = FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST;
if (BNXT_NEW_RM(bp)) {
flags |= FUNC_CFG_REQ_FLAGS_RX_ASSETS_TEST |
@@ -7360,20 +7547,15 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
return hwrm_req_send_silent(bp, req);
}
-static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int stats,
- int vnics)
+static int bnxt_hwrm_check_rings(struct bnxt *bp, struct bnxt_hw_rings *hwr)
{
if (bp->hwrm_spec_code < 0x10801)
return 0;
if (BNXT_PF(bp))
- return bnxt_hwrm_check_pf_rings(bp, tx_rings, rx_rings,
- ring_grps, cp_rings, stats,
- vnics);
+ return bnxt_hwrm_check_pf_rings(bp, hwr);
- return bnxt_hwrm_check_vf_rings(bp, tx_rings, rx_rings, ring_grps,
- cp_rings, stats, vnics);
+ return bnxt_hwrm_check_vf_rings(bp, hwr);
}
static void bnxt_hwrm_coal_params_qcaps(struct bnxt *bp)
@@ -8701,6 +8883,13 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
hw_resc->max_vnics = le16_to_cpu(resp->max_vnics);
hw_resc->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx);
+ hw_resc->max_encap_records = le32_to_cpu(resp->max_encap_records);
+ hw_resc->max_decap_records = le32_to_cpu(resp->max_decap_records);
+ hw_resc->max_tx_em_flows = le32_to_cpu(resp->max_tx_em_flows);
+ hw_resc->max_tx_wm_flows = le32_to_cpu(resp->max_tx_wm_flows);
+ hw_resc->max_rx_em_flows = le32_to_cpu(resp->max_rx_em_flows);
+ hw_resc->max_rx_wm_flows = le32_to_cpu(resp->max_rx_wm_flows);
+
if (BNXT_PF(bp)) {
struct bnxt_pf_info *pf = &bp->pf;
@@ -8709,12 +8898,6 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN);
pf->first_vf_id = le16_to_cpu(resp->first_vf_id);
pf->max_vfs = le16_to_cpu(resp->max_vfs);
- pf->max_encap_records = le32_to_cpu(resp->max_encap_records);
- pf->max_decap_records = le32_to_cpu(resp->max_decap_records);
- pf->max_tx_em_flows = le32_to_cpu(resp->max_tx_em_flows);
- pf->max_tx_wm_flows = le32_to_cpu(resp->max_tx_wm_flows);
- pf->max_rx_em_flows = le32_to_cpu(resp->max_rx_em_flows);
- pf->max_rx_wm_flows = le32_to_cpu(resp->max_rx_wm_flows);
bp->flags &= ~BNXT_FLAG_WOL_CAP;
if (flags & FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED)
bp->flags |= BNXT_FLAG_WOL_CAP;
@@ -8817,6 +9000,14 @@ static int bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(struct bnxt *bp)
CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V2_SUPPORTED)
bp->fw_cap |= BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2;
+ if (flags &
+ CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V3_SUPPORTED)
+ bp->fw_cap |= BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V3;
+
+ if (flags &
+ CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_EXT_IP_PROTO_SUPPORTED)
+ bp->fw_cap |= BNXT_FW_CAP_CFA_NTUPLE_RX_EXT_IP_PROTO;
+
hwrm_cfa_adv_qcaps_exit:
hwrm_req_drop(bp, req);
return rc;
@@ -9681,10 +9872,28 @@ static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
return __bnxt_setup_vnic(bp, vnic_id);
}
+static int bnxt_alloc_and_setup_vnic(struct bnxt *bp, u16 vnic_id,
+ u16 start_rx_ring_idx, int rx_rings)
+{
+ int rc;
+
+ rc = bnxt_hwrm_vnic_alloc(bp, vnic_id, start_rx_ring_idx, rx_rings);
+ if (rc) {
+ netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n",
+ vnic_id, rc);
+ return rc;
+ }
+ return bnxt_setup_vnic(bp, vnic_id);
+}
+
static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
{
int i, rc = 0;
+ if (BNXT_SUPPORTS_NTUPLE_VNIC(bp))
+ return bnxt_alloc_and_setup_vnic(bp, BNXT_VNIC_NTUPLE, 0,
+ bp->rx_nr_rings);
+
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return 0;
@@ -9700,14 +9909,7 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
vnic->flags |= BNXT_VNIC_RFS_FLAG;
if (bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP)
vnic->flags |= BNXT_VNIC_RFS_NEW_RSS_FLAG;
- rc = bnxt_hwrm_vnic_alloc(bp, vnic_id, ring_id, 1);
- if (rc) {
- netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n",
- vnic_id, rc);
- break;
- }
- rc = bnxt_setup_vnic(bp, vnic_id);
- if (rc)
+ if (bnxt_alloc_and_setup_vnic(bp, vnic_id, ring_id, 1))
break;
}
return rc;
@@ -9748,7 +9950,7 @@ static bool bnxt_mc_list_updated(struct bnxt *, u32 *);
static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
{
- struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+ struct bnxt_vnic_info *vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT];
int rc = 0;
unsigned int rx_nr_rings = bp->rx_nr_rings;
@@ -9777,7 +9979,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
rx_nr_rings--;
/* default vnic 0 */
- rc = bnxt_hwrm_vnic_alloc(bp, 0, 0, rx_nr_rings);
+ rc = bnxt_hwrm_vnic_alloc(bp, BNXT_VNIC_DEFAULT, 0, rx_nr_rings);
if (rc) {
netdev_err(bp->dev, "hwrm vnic alloc failure rc: %x\n", rc);
goto err_out;
@@ -9786,7 +9988,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
if (BNXT_VF(bp))
bnxt_hwrm_func_qcfg(bp);
- rc = bnxt_setup_vnic(bp, 0);
+ rc = bnxt_setup_vnic(bp, BNXT_VNIC_DEFAULT);
if (rc)
goto err_out;
if (bp->rss_cap & BNXT_RSS_CAP_RSS_HASH_TYPE_DELTA)
@@ -9938,7 +10140,7 @@ static int __bnxt_num_tx_to_cp(struct bnxt *bp, int tx, int tx_sets, int tx_xdp)
int bnxt_num_tx_to_cp(struct bnxt *bp, int tx)
{
- int tcs = netdev_get_num_tc(bp->dev);
+ int tcs = bp->num_tc;
if (!tcs)
tcs = 1;
@@ -9947,7 +10149,7 @@ int bnxt_num_tx_to_cp(struct bnxt *bp, int tx)
static int bnxt_num_cp_to_tx(struct bnxt *bp, int tx_cp)
{
- int tcs = netdev_get_num_tc(bp->dev);
+ int tcs = bp->num_tc;
return (tx_cp - bp->tx_nr_rings_xdp) * tcs +
bp->tx_nr_rings_xdp;
@@ -9977,7 +10179,7 @@ static void bnxt_setup_msix(struct bnxt *bp)
struct net_device *dev = bp->dev;
int tcs, i;
- tcs = netdev_get_num_tc(dev);
+ tcs = bp->num_tc;
if (tcs) {
int i, off, count;
@@ -10009,8 +10211,10 @@ static void bnxt_setup_inta(struct bnxt *bp)
{
const int len = sizeof(bp->irq_tbl[0].name);
- if (netdev_get_num_tc(bp->dev))
+ if (bp->num_tc) {
netdev_reset_tc(bp->dev);
+ bp->num_tc = 0;
+ }
snprintf(bp->irq_tbl[0].name, len, "%s-%s-%d", bp->dev->name, "TxRx",
0);
@@ -10236,8 +10440,8 @@ static void bnxt_clear_int_mode(struct bnxt *bp)
int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
{
- int tcs = netdev_get_num_tc(bp->dev);
bool irq_cleared = false;
+ int tcs = bp->num_tc;
int rc;
if (!bnxt_need_reserve_rings(bp))
@@ -10263,6 +10467,7 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
bp->tx_nr_rings - bp->tx_nr_rings_xdp)) {
netdev_err(bp->dev, "tx ring reservation failure\n");
netdev_reset_tc(bp->dev);
+ bp->num_tc = 0;
if (bp->tx_nr_rings_xdp)
bp->tx_nr_rings_per_tc = bp->tx_nr_rings_xdp;
else
@@ -10610,10 +10815,10 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
bp->phy_flags = resp->flags | (le16_to_cpu(resp->flags2) << 8);
if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) {
- struct ethtool_eee *eee = &bp->eee;
+ struct ethtool_keee *eee = &bp->eee;
u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode);
- eee->supported = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0);
+ _bnxt_fw_to_linkmode(eee->supported, fw_speeds);
bp->lpi_tmr_lo = le32_to_cpu(resp->tx_lpi_timer_low) &
PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_LOW_MASK;
bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
@@ -10755,7 +10960,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
link_info->module_status = resp->module_status;
if (bp->phy_flags & BNXT_PHY_FL_EEE_CAP) {
- struct ethtool_eee *eee = &bp->eee;
+ struct ethtool_keee *eee = &bp->eee;
u16 fw_speeds;
eee->eee_active = 0;
@@ -10764,8 +10969,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
eee->eee_active = 1;
fw_speeds = le16_to_cpu(
resp->link_partner_adv_eee_link_speed_mask);
- eee->lp_advertised =
- _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0);
+ _bnxt_fw_to_linkmode(eee->lp_advertised, fw_speeds);
}
/* Pull initial EEE config */
@@ -10775,8 +10979,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
eee->eee_enabled = 1;
fw_speeds = le16_to_cpu(resp->adv_eee_link_speed_mask);
- eee->advertised =
- _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0);
+ _bnxt_fw_to_linkmode(eee->advertised, fw_speeds);
if (resp->eee_config_phy_addr &
PORT_PHY_QCFG_RESP_EEE_CONFIG_EEE_TX_LPI) {
@@ -10946,7 +11149,7 @@ int bnxt_hwrm_set_pause(struct bnxt *bp)
static void bnxt_hwrm_set_eee(struct bnxt *bp,
struct hwrm_port_phy_cfg_input *req)
{
- struct ethtool_eee *eee = &bp->eee;
+ struct ethtool_keee *eee = &bp->eee;
if (eee->eee_enabled) {
u16 eee_speeds;
@@ -11076,6 +11279,7 @@ static void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset)
hw_resc->resv_rx_rings = 0;
hw_resc->resv_hw_ring_grps = 0;
hw_resc->resv_vnics = 0;
+ hw_resc->resv_rsscos_ctxs = 0;
if (!fw_reset) {
bp->tx_nr_rings = 0;
bp->rx_nr_rings = 0;
@@ -11311,22 +11515,25 @@ static void bnxt_get_wol_settings(struct bnxt *bp)
static bool bnxt_eee_config_ok(struct bnxt *bp)
{
- struct ethtool_eee *eee = &bp->eee;
+ struct ethtool_keee *eee = &bp->eee;
struct bnxt_link_info *link_info = &bp->link_info;
if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP))
return true;
if (eee->eee_enabled) {
- u32 advertising =
- _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp);
+
+ _bnxt_fw_to_linkmode(advertising, link_info->advertising);
if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) {
eee->eee_enabled = 0;
return false;
}
- if (eee->advertised & ~advertising) {
- eee->advertised = advertising & eee->supported;
+ if (linkmode_andnot(tmp, eee->advertised, advertising)) {
+ linkmode_and(eee->advertised, advertising,
+ eee->supported);
return false;
}
}
@@ -11431,6 +11638,42 @@ static int bnxt_reinit_after_abort(struct bnxt *bp)
return rc;
}
+static void bnxt_cfg_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr)
+{
+ struct bnxt_ntuple_filter *ntp_fltr;
+ struct bnxt_l2_filter *l2_fltr;
+
+ if (list_empty(&fltr->list))
+ return;
+
+ if (fltr->type == BNXT_FLTR_TYPE_NTUPLE) {
+ ntp_fltr = container_of(fltr, struct bnxt_ntuple_filter, base);
+ l2_fltr = bp->vnic_info[BNXT_VNIC_DEFAULT].l2_filters[0];
+ atomic_inc(&l2_fltr->refcnt);
+ ntp_fltr->l2_fltr = l2_fltr;
+ if (bnxt_hwrm_cfa_ntuple_filter_alloc(bp, ntp_fltr)) {
+ bnxt_del_ntp_filter(bp, ntp_fltr);
+ netdev_err(bp->dev, "restoring previously configured ntuple filter id %d failed\n",
+ fltr->sw_id);
+ }
+ } else if (fltr->type == BNXT_FLTR_TYPE_L2) {
+ l2_fltr = container_of(fltr, struct bnxt_l2_filter, base);
+ if (bnxt_hwrm_l2_filter_alloc(bp, l2_fltr)) {
+ bnxt_del_l2_filter(bp, l2_fltr);
+ netdev_err(bp->dev, "restoring previously configured l2 filter id %d failed\n",
+ fltr->sw_id);
+ }
+ }
+}
+
+static void bnxt_cfg_usr_fltrs(struct bnxt *bp)
+{
+ struct bnxt_filter_base *usr_fltr, *tmp;
+
+ list_for_each_entry_safe(usr_fltr, tmp, &bp->usr_fltr_list, list)
+ bnxt_cfg_one_usr_fltr(bp, usr_fltr);
+}
+
static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
{
int rc = 0;
@@ -11517,6 +11760,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
bnxt_vf_reps_open(bp);
bnxt_ptp_init_rtc(bp, true);
bnxt_ptp_cfg_tstamp_filters(bp);
+ bnxt_cfg_usr_fltrs(bp);
return 0;
open_err_irq:
@@ -11564,10 +11808,12 @@ int bnxt_half_open_nic(struct bnxt *bp)
netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc);
goto half_open_err;
}
+ bnxt_init_napi(bp);
set_bit(BNXT_STATE_HALF_OPEN, &bp->state);
rc = bnxt_init_nic(bp, true);
if (rc) {
clear_bit(BNXT_STATE_HALF_OPEN, &bp->state);
+ bnxt_del_napi(bp);
netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc);
goto half_open_err;
}
@@ -11586,6 +11832,7 @@ half_open_err:
void bnxt_half_close_nic(struct bnxt *bp)
{
bnxt_hwrm_resource_free(bp, false, true);
+ bnxt_del_napi(bp);
bnxt_free_skbs(bp);
bnxt_free_mem(bp, true);
clear_bit(BNXT_STATE_HALF_OPEN, &bp->state);
@@ -11955,8 +12202,8 @@ void bnxt_get_ring_err_stats(struct bnxt *bp,
static bool bnxt_mc_list_updated(struct bnxt *bp, u32 *rx_mask)
{
+ struct bnxt_vnic_info *vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT];
struct net_device *dev = bp->dev;
- struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
struct netdev_hw_addr *ha;
u8 *haddr;
int mc_count = 0;
@@ -11990,7 +12237,7 @@ static bool bnxt_mc_list_updated(struct bnxt *bp, u32 *rx_mask)
static bool bnxt_uc_list_updated(struct bnxt *bp)
{
struct net_device *dev = bp->dev;
- struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+ struct bnxt_vnic_info *vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT];
struct netdev_hw_addr *ha;
int off = 0;
@@ -12017,7 +12264,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
if (!test_bit(BNXT_STATE_OPEN, &bp->state))
return;
- vnic = &bp->vnic_info[0];
+ vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT];
mask = vnic->rx_mask;
mask &= ~(CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS |
CFA_L2_SET_RX_MASK_REQ_MASK_MCAST |
@@ -12048,7 +12295,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
static int bnxt_cfg_rx_mode(struct bnxt *bp)
{
struct net_device *dev = bp->dev;
- struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+ struct bnxt_vnic_info *vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT];
struct netdev_hw_addr *ha;
int i, off = 0, rc;
bool uc_update;
@@ -12160,21 +12407,32 @@ static bool bnxt_rfs_supported(struct bnxt *bp)
/* If runtime conditions support RFS */
static bool bnxt_rfs_capable(struct bnxt *bp)
{
- int vnics, max_vnics, max_rss_ctxs;
+ struct bnxt_hw_rings hwr = {0};
+ int max_vnics, max_rss_ctxs;
+ hwr.rss_ctx = 1;
+ if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) {
+ /* 2 VNICS: default + Ntuple */
+ hwr.vnic = 2;
+ hwr.rss_ctx = bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) *
+ hwr.vnic;
+ goto check_reserve_vnic;
+ }
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return bnxt_rfs_supported(bp);
if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings)
return false;
- vnics = 1 + bp->rx_nr_rings;
+ hwr.vnic = 1 + bp->rx_nr_rings;
+check_reserve_vnic:
max_vnics = bnxt_get_max_func_vnics(bp);
max_rss_ctxs = bnxt_get_max_func_rss_ctxs(bp);
- /* RSS contexts not a limiting factor */
- if (bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP)
- max_rss_ctxs = max_vnics;
- if (vnics > max_vnics || vnics > max_rss_ctxs) {
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
+ !(bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP))
+ hwr.rss_ctx = hwr.vnic;
+
+ if (hwr.vnic > max_vnics || hwr.rss_ctx > max_rss_ctxs) {
if (bp->rx_nr_rings > 1)
netdev_warn(bp->dev,
"Not enough resources to support NTUPLE filters, enough resources for up to %d rx rings\n",
@@ -12185,15 +12443,19 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
if (!BNXT_NEW_RM(bp))
return true;
- if (vnics == bp->hw_resc.resv_vnics)
+ if (hwr.vnic == bp->hw_resc.resv_vnics &&
+ hwr.rss_ctx <= bp->hw_resc.resv_rsscos_ctxs)
return true;
- bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, 0, vnics);
- if (vnics <= bp->hw_resc.resv_vnics)
+ bnxt_hwrm_reserve_rings(bp, &hwr);
+ if (hwr.vnic <= bp->hw_resc.resv_vnics &&
+ hwr.rss_ctx <= bp->hw_resc.resv_rsscos_ctxs)
return true;
netdev_warn(bp->dev, "Unable to reserve resources to support NTUPLE filters.\n");
- bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, 0, 1);
+ hwr.vnic = 1;
+ hwr.rss_ctx = 0;
+ bnxt_hwrm_reserve_rings(bp, &hwr);
return false;
}
@@ -12232,14 +12494,24 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
return features;
}
+static int bnxt_reinit_features(struct bnxt *bp, bool irq_re_init,
+ bool link_re_init, u32 flags, bool update_tpa)
+{
+ bnxt_close_nic(bp, irq_re_init, link_re_init);
+ bp->flags = flags;
+ if (update_tpa)
+ bnxt_set_ring_params(bp);
+ return bnxt_open_nic(bp, irq_re_init, link_re_init);
+}
+
static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
{
+ bool update_tpa = false, update_ntuple = false;
struct bnxt *bp = netdev_priv(dev);
u32 flags = bp->flags;
u32 changes;
int rc = 0;
bool re_init = false;
- bool update_tpa = false;
flags &= ~BNXT_FLAG_ALL_CONFIG_FEATS;
if (features & NETIF_F_GRO_HW)
@@ -12255,6 +12527,8 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
if (features & NETIF_F_NTUPLE)
flags |= BNXT_FLAG_RFS;
+ else
+ bnxt_clear_usr_fltrs(bp, true);
changes = flags ^ bp->flags;
if (changes & BNXT_FLAG_TPA) {
@@ -12268,6 +12542,9 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
if (changes & ~BNXT_FLAG_TPA)
re_init = true;
+ if (changes & BNXT_FLAG_RFS)
+ update_ntuple = true;
+
if (flags != bp->flags) {
u32 old_flags = bp->flags;
@@ -12278,14 +12555,12 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
return rc;
}
- if (re_init) {
- bnxt_close_nic(bp, false, false);
- bp->flags = flags;
- if (update_tpa)
- bnxt_set_ring_params(bp);
+ if (update_ntuple)
+ return bnxt_reinit_features(bp, true, false, flags, update_tpa);
+
+ if (re_init)
+ return bnxt_reinit_features(bp, false, false, flags, update_tpa);
- return bnxt_open_nic(bp, false, false);
- }
if (update_tpa) {
bp->flags = flags;
rc = bnxt_set_tpa(bp,
@@ -13115,9 +13390,8 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
int tx_xdp)
{
int max_rx, max_tx, max_cp, tx_sets = 1, tx_cp;
- int tx_rings_needed, stats;
+ struct bnxt_hw_rings hwr = {0};
int rx_rings = rx;
- int cp, vnics;
if (tcs)
tx_sets = tcs;
@@ -13130,26 +13404,27 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx_rings <<= 1;
- tx_rings_needed = tx * tx_sets + tx_xdp;
- if (max_tx < tx_rings_needed)
+ hwr.rx = rx_rings;
+ hwr.tx = tx * tx_sets + tx_xdp;
+ if (max_tx < hwr.tx)
return -ENOMEM;
- vnics = 1;
- if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5_PLUS)) ==
- BNXT_FLAG_RFS)
- vnics += rx;
+ hwr.vnic = bnxt_get_total_vnics(bp, rx);
- tx_cp = __bnxt_num_tx_to_cp(bp, tx_rings_needed, tx_sets, tx_xdp);
- cp = sh ? max_t(int, tx_cp, rx) : tx_cp + rx;
- if (max_cp < cp)
+ tx_cp = __bnxt_num_tx_to_cp(bp, hwr.tx, tx_sets, tx_xdp);
+ hwr.cp = sh ? max_t(int, tx_cp, rx) : tx_cp + rx;
+ if (max_cp < hwr.cp)
return -ENOMEM;
- stats = cp;
+ hwr.stat = hwr.cp;
if (BNXT_NEW_RM(bp)) {
- cp += bnxt_get_ulp_msix_num(bp);
- stats += bnxt_get_ulp_stat_ctxs(bp);
+ hwr.cp += bnxt_get_ulp_msix_num(bp);
+ hwr.stat += bnxt_get_ulp_stat_ctxs(bp);
+ hwr.grp = rx;
+ hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr);
}
- return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp,
- stats, vnics);
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+ hwr.cp_p5 = hwr.tx + rx;
+ return bnxt_hwrm_check_rings(bp, &hwr);
}
static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev)
@@ -13232,6 +13507,11 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp)
bp->fw_cap = 0;
rc = bnxt_hwrm_ver_get(bp);
+ /* FW may be unresponsive after FLR. FLR must complete within 100 msec
+ * so wait before continuing with recovery.
+ */
+ if (rc)
+ msleep(100);
bnxt_try_map_fw_health_reg(bp);
if (rc) {
rc = bnxt_try_recover_fw(bp);
@@ -13747,6 +14027,7 @@ static int bnxt_change_mac_addr(struct net_device *dev, void *p)
return rc;
eth_hw_addr_set(dev, addr->sa_data);
+ bnxt_clear_usr_fltrs(bp, true);
if (netif_running(dev)) {
bnxt_close_nic(bp, false, false);
rc = bnxt_open_nic(bp, false, false);
@@ -13784,7 +14065,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
return -EINVAL;
}
- if (netdev_get_num_tc(dev) == tc)
+ if (bp->num_tc == tc)
return 0;
if (bp->flags & BNXT_FLAG_SHARED_RINGS)
@@ -13802,9 +14083,11 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
if (tc) {
bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc;
netdev_set_num_tc(dev, tc);
+ bp->num_tc = tc;
} else {
bp->tx_nr_rings = bp->tx_nr_rings_per_tc;
netdev_reset_tc(dev);
+ bp->num_tc = 0;
}
bp->tx_nr_rings += bp->tx_nr_rings_xdp;
tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
@@ -13867,7 +14150,7 @@ u32 bnxt_get_ntp_filter_idx(struct bnxt *bp, struct flow_keys *fkeys,
if (skb)
return skb_get_hash_raw(skb) & BNXT_NTP_FLTR_HASH_MASK;
- vnic = &bp->vnic_info[0];
+ vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT];
return bnxt_toeplitz(bp, fkeys, (void *)vnic->rss_hash_key);
}
@@ -13878,7 +14161,7 @@ int bnxt_insert_ntp_filter(struct bnxt *bp, struct bnxt_ntuple_filter *fltr,
int bit_id;
spin_lock_bh(&bp->ntp_fltr_lock);
- bit_id = bitmap_find_free_region(bp->ntp_fltr_bmap, BNXT_MAX_FLTR, 0);
+ bit_id = bitmap_find_free_region(bp->ntp_fltr_bmap, bp->max_fltr, 0);
if (bit_id < 0) {
spin_unlock_bh(&bp->ntp_fltr_lock);
return -ENOMEM;
@@ -13890,6 +14173,7 @@ int bnxt_insert_ntp_filter(struct bnxt *bp, struct bnxt_ntuple_filter *fltr,
head = &bp->ntp_fltr_hash_tbl[idx];
hlist_add_head_rcu(&fltr->base.hash, head);
set_bit(BNXT_FLTR_INSERTED, &fltr->base.state);
+ bnxt_insert_usr_fltr(bp, &fltr->base);
bp->ntp_fltr_count++;
spin_unlock_bh(&bp->ntp_fltr_lock);
return 0;
@@ -13898,45 +14182,39 @@ int bnxt_insert_ntp_filter(struct bnxt *bp, struct bnxt_ntuple_filter *fltr,
static bool bnxt_fltr_match(struct bnxt_ntuple_filter *f1,
struct bnxt_ntuple_filter *f2)
{
+ struct bnxt_flow_masks *masks1 = &f1->fmasks;
+ struct bnxt_flow_masks *masks2 = &f2->fmasks;
struct flow_keys *keys1 = &f1->fkeys;
struct flow_keys *keys2 = &f2->fkeys;
- if (f1->ntuple_flags != f2->ntuple_flags)
- return false;
-
if (keys1->basic.n_proto != keys2->basic.n_proto ||
keys1->basic.ip_proto != keys2->basic.ip_proto)
return false;
if (keys1->basic.n_proto == htons(ETH_P_IP)) {
- if (((f1->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) &&
- keys1->addrs.v4addrs.src != keys2->addrs.v4addrs.src) ||
- ((f1->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) &&
- keys1->addrs.v4addrs.dst != keys2->addrs.v4addrs.dst))
+ if (keys1->addrs.v4addrs.src != keys2->addrs.v4addrs.src ||
+ masks1->addrs.v4addrs.src != masks2->addrs.v4addrs.src ||
+ keys1->addrs.v4addrs.dst != keys2->addrs.v4addrs.dst ||
+ masks1->addrs.v4addrs.dst != masks2->addrs.v4addrs.dst)
return false;
} else {
- if (((f1->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) &&
- memcmp(&keys1->addrs.v6addrs.src,
- &keys2->addrs.v6addrs.src,
- sizeof(keys1->addrs.v6addrs.src))) ||
- ((f1->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) &&
- memcmp(&keys1->addrs.v6addrs.dst,
- &keys2->addrs.v6addrs.dst,
- sizeof(keys1->addrs.v6addrs.dst))))
+ if (!ipv6_addr_equal(&keys1->addrs.v6addrs.src,
+ &keys2->addrs.v6addrs.src) ||
+ !ipv6_addr_equal(&masks1->addrs.v6addrs.src,
+ &masks2->addrs.v6addrs.src) ||
+ !ipv6_addr_equal(&keys1->addrs.v6addrs.dst,
+ &keys2->addrs.v6addrs.dst) ||
+ !ipv6_addr_equal(&masks1->addrs.v6addrs.dst,
+ &masks2->addrs.v6addrs.dst))
return false;
}
- if (((f1->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_PORT) &&
- keys1->ports.src != keys2->ports.src) ||
- ((f1->ntuple_flags & BNXT_NTUPLE_MATCH_DST_PORT) &&
- keys1->ports.dst != keys2->ports.dst))
- return false;
-
- if (keys1->control.flags == keys2->control.flags &&
- f1->l2_fltr == f2->l2_fltr)
- return true;
-
- return false;
+ return keys1->ports.src == keys2->ports.src &&
+ masks1->ports.src == masks2->ports.src &&
+ keys1->ports.dst == keys2->ports.dst &&
+ masks1->ports.dst == masks2->ports.dst &&
+ keys1->control.flags == keys2->control.flags &&
+ f1->l2_fltr == f2->l2_fltr;
}
struct bnxt_ntuple_filter *
@@ -13967,7 +14245,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u32 flags;
if (ether_addr_equal(dev->dev_addr, eth->h_dest)) {
- l2_fltr = bp->vnic_info[0].l2_filters[0];
+ l2_fltr = bp->vnic_info[BNXT_VNIC_DEFAULT].l2_filters[0];
atomic_inc(&l2_fltr->refcnt);
} else {
struct bnxt_l2_key key;
@@ -14001,10 +14279,13 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
rc = -EPROTONOSUPPORT;
goto err_free;
}
- if (fkeys->basic.n_proto == htons(ETH_P_IPV6) &&
- bp->hwrm_spec_code < 0x10601) {
- rc = -EPROTONOSUPPORT;
- goto err_free;
+ new_fltr->fmasks = BNXT_FLOW_IPV4_MASK_ALL;
+ if (fkeys->basic.n_proto == htons(ETH_P_IPV6)) {
+ if (bp->hwrm_spec_code < 0x10601) {
+ rc = -EPROTONOSUPPORT;
+ goto err_free;
+ }
+ new_fltr->fmasks = BNXT_FLOW_IPV6_MASK_ALL;
}
flags = fkeys->control.flags;
if (((flags & FLOW_DIS_ENCAPSULATION) &&
@@ -14012,9 +14293,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
rc = -EPROTONOSUPPORT;
goto err_free;
}
-
new_fltr->l2_fltr = l2_fltr;
- new_fltr->ntuple_flags = BNXT_NTUPLE_MATCH_ALL;
idx = bnxt_get_ntp_filter_idx(bp, fkeys, skb);
rcu_read_lock();
@@ -14049,6 +14328,7 @@ void bnxt_del_ntp_filter(struct bnxt *bp, struct bnxt_ntuple_filter *fltr)
return;
}
hlist_del_rcu(&fltr->base.hash);
+ bnxt_del_one_usr_fltr(bp, &fltr->base);
bp->ntp_fltr_count--;
spin_unlock_bh(&bp->ntp_fltr_lock);
bnxt_del_l2_filter(bp, fltr->l2_fltr);
@@ -14243,6 +14523,70 @@ static const struct net_device_ops bnxt_netdev_ops = {
.ndo_bridge_setlink = bnxt_bridge_setlink,
};
+static void bnxt_get_queue_stats_rx(struct net_device *dev, int i,
+ struct netdev_queue_stats_rx *stats)
+{
+ struct bnxt *bp = netdev_priv(dev);
+ struct bnxt_cp_ring_info *cpr;
+ u64 *sw;
+
+ cpr = &bp->bnapi[i]->cp_ring;
+ sw = cpr->stats.sw_stats;
+
+ stats->packets = 0;
+ stats->packets += BNXT_GET_RING_STATS64(sw, rx_ucast_pkts);
+ stats->packets += BNXT_GET_RING_STATS64(sw, rx_mcast_pkts);
+ stats->packets += BNXT_GET_RING_STATS64(sw, rx_bcast_pkts);
+
+ stats->bytes = 0;
+ stats->bytes += BNXT_GET_RING_STATS64(sw, rx_ucast_bytes);
+ stats->bytes += BNXT_GET_RING_STATS64(sw, rx_mcast_bytes);
+ stats->bytes += BNXT_GET_RING_STATS64(sw, rx_bcast_bytes);
+
+ stats->alloc_fail = cpr->sw_stats.rx.rx_oom_discards;
+}
+
+static void bnxt_get_queue_stats_tx(struct net_device *dev, int i,
+ struct netdev_queue_stats_tx *stats)
+{
+ struct bnxt *bp = netdev_priv(dev);
+ struct bnxt_napi *bnapi;
+ u64 *sw;
+
+ bnapi = bp->tx_ring[bp->tx_ring_map[i]].bnapi;
+ sw = bnapi->cp_ring.stats.sw_stats;
+
+ stats->packets = 0;
+ stats->packets += BNXT_GET_RING_STATS64(sw, tx_ucast_pkts);
+ stats->packets += BNXT_GET_RING_STATS64(sw, tx_mcast_pkts);
+ stats->packets += BNXT_GET_RING_STATS64(sw, tx_bcast_pkts);
+
+ stats->bytes = 0;
+ stats->bytes += BNXT_GET_RING_STATS64(sw, tx_ucast_bytes);
+ stats->bytes += BNXT_GET_RING_STATS64(sw, tx_mcast_bytes);
+ stats->bytes += BNXT_GET_RING_STATS64(sw, tx_bcast_bytes);
+}
+
+static void bnxt_get_base_stats(struct net_device *dev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx)
+{
+ struct bnxt *bp = netdev_priv(dev);
+
+ rx->packets = bp->net_stats_prev.rx_packets;
+ rx->bytes = bp->net_stats_prev.rx_bytes;
+ rx->alloc_fail = bp->ring_err_stats_prev.rx_total_oom_discards;
+
+ tx->packets = bp->net_stats_prev.tx_packets;
+ tx->bytes = bp->net_stats_prev.tx_bytes;
+}
+
+static const struct netdev_stat_ops bnxt_stat_ops = {
+ .get_queue_stats_rx = bnxt_get_queue_stats_rx,
+ .get_queue_stats_tx = bnxt_get_queue_stats_tx,
+ .get_base_stats = bnxt_get_base_stats,
+};
+
static void bnxt_remove_one(struct pci_dev *pdev)
{
struct net_device *dev = pci_get_drvdata(pdev);
@@ -14648,6 +14992,7 @@ void bnxt_print_device_info(struct bnxt *bp)
static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
+ struct bnxt_hw_resc *hw_resc;
struct net_device *dev;
struct bnxt *bp;
int rc, max_irqs;
@@ -14689,6 +15034,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto init_err_free;
dev->netdev_ops = &bnxt_netdev_ops;
+ dev->stat_ops = &bnxt_stat_ops;
dev->watchdog_timeo = BNXT_TX_TIMEOUT;
dev->ethtool_ops = &bnxt_ethtool_ops;
pci_set_drvdata(pdev, dev);
@@ -14806,6 +15152,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
goto init_err_pci_clean;
+ hw_resc = &bp->hw_resc;
+ bp->max_fltr = hw_resc->max_rx_em_flows + hw_resc->max_rx_wm_flows +
+ BNXT_L2_FLTR_MAX_FLTR;
+ /* Older firmware may not report these filters properly */
+ if (bp->max_fltr < BNXT_MAX_FLTR)
+ bp->max_fltr = BNXT_MAX_FLTR;
bnxt_init_l2_fltr_tbl(bp);
bnxt_set_rx_skb_mode(bp, false);
bnxt_set_tpa_flags(bp);
@@ -14858,6 +15210,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
goto init_err_dl;
+ INIT_LIST_HEAD(&bp->usr_fltr_list);
+
rc = register_netdev(dev);
if (rc)
goto init_err_cleanup;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index b8ef1717cb65..dd849e715c9b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1213,6 +1213,9 @@ struct bnxt_ring_grp_info {
u16 cp_fw_ring_id;
};
+#define BNXT_VNIC_DEFAULT 0
+#define BNXT_VNIC_NTUPLE 1
+
struct bnxt_vnic_info {
u16 fw_vnic_id; /* returned by Chimp during alloc */
#define BNXT_MAX_CTX_PER_VNIC 8
@@ -1252,11 +1255,24 @@ struct bnxt_vnic_info {
#define BNXT_VNIC_MCAST_FLAG 4
#define BNXT_VNIC_UCAST_FLAG 8
#define BNXT_VNIC_RFS_NEW_RSS_FLAG 0x10
+#define BNXT_VNIC_NTUPLE_FLAG 0x20
+};
+
+struct bnxt_hw_rings {
+ int tx;
+ int rx;
+ int grp;
+ int cp;
+ int cp_p5;
+ int stat;
+ int vnic;
+ int rss_ctx;
};
struct bnxt_hw_resc {
u16 min_rsscos_ctxs;
u16 max_rsscos_ctxs;
+ u16 resv_rsscos_ctxs;
u16 min_cp_rings;
u16 max_cp_rings;
u16 resv_cp_rings;
@@ -1281,6 +1297,12 @@ struct bnxt_hw_resc {
u16 max_nqs;
u16 max_irqs;
u16 resv_irqs;
+ u32 max_encap_records;
+ u32 max_decap_records;
+ u32 max_tx_em_flows;
+ u32 max_tx_wm_flows;
+ u32 max_rx_em_flows;
+ u32 max_rx_wm_flows;
};
#if defined(CONFIG_BNXT_SRIOV)
@@ -1315,12 +1337,6 @@ struct bnxt_pf_info {
u16 active_vfs;
u16 registered_vfs;
u16 max_vfs;
- u32 max_encap_records;
- u32 max_decap_records;
- u32 max_tx_em_flows;
- u32 max_tx_wm_flows;
- u32 max_rx_em_flows;
- u32 max_rx_wm_flows;
unsigned long *vf_event_bmap;
u16 hwrm_cmd_req_pages;
u8 vf_resv_strategy;
@@ -1334,6 +1350,7 @@ struct bnxt_pf_info {
struct bnxt_filter_base {
struct hlist_node hash;
+ struct list_head list;
__le64 filter_id;
u8 type;
#define BNXT_FLTR_TYPE_NTUPLE 1
@@ -1355,19 +1372,21 @@ struct bnxt_filter_base {
struct rcu_head rcu;
};
+struct bnxt_flow_masks {
+ struct flow_dissector_key_ports ports;
+ struct flow_dissector_key_addrs addrs;
+};
+
+extern const struct bnxt_flow_masks BNXT_FLOW_MASK_NONE;
+extern const struct bnxt_flow_masks BNXT_FLOW_IPV6_MASK_ALL;
+extern const struct bnxt_flow_masks BNXT_FLOW_IPV4_MASK_ALL;
+
struct bnxt_ntuple_filter {
+ /* base filter must be the first member */
struct bnxt_filter_base base;
struct flow_keys fkeys;
+ struct bnxt_flow_masks fmasks;
struct bnxt_l2_filter *l2_fltr;
- u32 ntuple_flags;
-#define BNXT_NTUPLE_MATCH_SRC_IP 1
-#define BNXT_NTUPLE_MATCH_DST_IP 2
-#define BNXT_NTUPLE_MATCH_SRC_PORT 4
-#define BNXT_NTUPLE_MATCH_DST_PORT 8
-#define BNXT_NTUPLE_MATCH_ALL (BNXT_NTUPLE_MATCH_SRC_IP | \
- BNXT_NTUPLE_MATCH_DST_IP | \
- BNXT_NTUPLE_MATCH_SRC_PORT | \
- BNXT_NTUPLE_MATCH_DST_PORT)
u32 flow_id;
};
@@ -1394,6 +1413,7 @@ struct bnxt_ipv6_tuple {
#define BNXT_L2_KEY_SIZE (sizeof(struct bnxt_l2_key) / 4)
struct bnxt_l2_filter {
+ /* base filter must be the first member */
struct bnxt_filter_base base;
struct bnxt_l2_key l2_key;
atomic_t refcnt;
@@ -2217,6 +2237,14 @@ struct bnxt {
#define BNXT_RSS_CAP_UDP_RSS_CAP BIT(1)
#define BNXT_RSS_CAP_NEW_RSS_CAP BIT(2)
#define BNXT_RSS_CAP_RSS_TCAM BIT(3)
+#define BNXT_RSS_CAP_AH_V4_RSS_CAP BIT(4)
+#define BNXT_RSS_CAP_AH_V6_RSS_CAP BIT(5)
+#define BNXT_RSS_CAP_ESP_V4_RSS_CAP BIT(6)
+#define BNXT_RSS_CAP_ESP_V6_RSS_CAP BIT(7)
+
+ u8 rss_hash_key[HW_HASH_KEY_SIZE];
+ u8 rss_hash_key_valid:1;
+ u8 rss_hash_key_updated:1;
u16 max_mtu;
u8 max_tc;
@@ -2225,6 +2253,7 @@ struct bnxt {
u8 tc_to_qidx[BNXT_MAX_QUEUE];
u8 q_ids[BNXT_MAX_QUEUE];
u8 max_q;
+ u8 num_tc;
unsigned int current_interval;
#define BNXT_TIMER_INTERVAL HZ
@@ -2300,12 +2329,17 @@ struct bnxt {
#define BNXT_FW_CAP_PRE_RESV_VNICS BIT_ULL(35)
#define BNXT_FW_CAP_BACKING_STORE_V2 BIT_ULL(36)
#define BNXT_FW_CAP_VNIC_TUNNEL_TPA BIT_ULL(37)
+ #define BNXT_FW_CAP_CFA_NTUPLE_RX_EXT_IP_PROTO BIT_ULL(38)
+ #define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V3 BIT_ULL(39)
u32 fw_dbg_cap;
#define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
#define BNXT_PTP_USE_RTC(bp) (!BNXT_MH(bp) && \
((bp)->fw_cap & BNXT_FW_CAP_PTP_RTC))
+#define BNXT_SUPPORTS_NTUPLE_VNIC(bp) \
+ (BNXT_PF(bp) && ((bp)->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V3))
+
u32 hwrm_spec_code;
u16 hwrm_cmd_seq;
u16 hwrm_cmd_kong_seq;
@@ -2427,6 +2461,7 @@ struct bnxt {
unsigned long *ntp_fltr_bmap;
int ntp_fltr_count;
+ int max_fltr;
#define BNXT_L2_FLTR_MAX_FLTR 1024
#define BNXT_L2_FLTR_HASH_SIZE 32
@@ -2436,12 +2471,14 @@ struct bnxt {
u32 hash_seed;
u64 toeplitz_prefix;
+ struct list_head usr_fltr_list;
+
/* To protect link related settings during link changes and
* ethtool settings changes.
*/
struct mutex link_lock;
struct bnxt_link_info link_info;
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
u32 lpi_tmr_lo;
u32 lpi_tmr_hi;
@@ -2640,10 +2677,16 @@ u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx);
void bnxt_set_tpa_flags(struct bnxt *bp);
void bnxt_set_ring_params(struct bnxt *);
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
+void bnxt_insert_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr);
+void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr);
+void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all);
int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap,
int bmap_size, bool async_only);
int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp);
void bnxt_del_l2_filter(struct bnxt *bp, struct bnxt_l2_filter *fltr);
+struct bnxt_l2_filter *bnxt_alloc_new_l2_filter(struct bnxt *bp,
+ struct bnxt_l2_key *key,
+ u16 flags);
int bnxt_hwrm_l2_filter_free(struct bnxt *bp, struct bnxt_l2_filter *fltr);
int bnxt_hwrm_l2_filter_alloc(struct bnxt *bp, struct bnxt_l2_filter *fltr);
int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 63e067038385..0dbb880a7aa0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -228,7 +228,7 @@ static int bnxt_queue_remap(struct bnxt *bp, unsigned int lltc_mask)
}
}
if (bp->ieee_ets) {
- int tc = netdev_get_num_tc(bp->dev);
+ int tc = bp->num_tc;
if (!tc)
tc = 1;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 27b983c0a8a9..1d240a27455a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -884,7 +884,7 @@ static void bnxt_get_channels(struct net_device *dev,
if (max_tx_sch_inputs)
max_tx_rings = min_t(int, max_tx_rings, max_tx_sch_inputs);
- tcs = netdev_get_num_tc(dev);
+ tcs = bp->num_tc;
tx_grps = max(tcs, 1);
if (bp->tx_nr_rings_xdp)
tx_grps++;
@@ -944,7 +944,7 @@ static int bnxt_set_channels(struct net_device *dev,
if (channel->combined_count)
sh = true;
- tcs = netdev_get_num_tc(dev);
+ tcs = bp->num_tc;
req_tx_rings = sh ? channel->combined_count : channel->tx_count;
req_rx_rings = sh ? channel->combined_count : channel->rx_count;
@@ -968,6 +968,7 @@ static int bnxt_set_channels(struct net_device *dev,
return -EINVAL;
}
+ bnxt_clear_usr_fltrs(bp, true);
if (netif_running(dev)) {
if (BNXT_PF(bp)) {
/* TODO CHIMP_FW: Send message to all VF's
@@ -1058,11 +1059,17 @@ static struct bnxt_filter_base *bnxt_get_one_fltr_rcu(struct bnxt *bp,
static int bnxt_grxclsrlall(struct bnxt *bp, struct ethtool_rxnfc *cmd,
u32 *rule_locs)
{
+ u32 count;
+
cmd->data = bp->ntp_fltr_count;
rcu_read_lock();
+ count = bnxt_get_all_fltr_ids_rcu(bp, bp->l2_fltr_hash_tbl,
+ BNXT_L2_FLTR_HASH_SIZE, rule_locs, 0,
+ cmd->rule_cnt);
cmd->rule_cnt = bnxt_get_all_fltr_ids_rcu(bp, bp->ntp_fltr_hash_tbl,
BNXT_NTP_FLTR_HASH_SIZE,
- rule_locs, 0, cmd->rule_cnt);
+ rule_locs, count,
+ cmd->rule_cnt);
rcu_read_unlock();
return 0;
@@ -1074,13 +1081,44 @@ static int bnxt_grxclsrule(struct bnxt *bp, struct ethtool_rxnfc *cmd)
(struct ethtool_rx_flow_spec *)&cmd->fs;
struct bnxt_filter_base *fltr_base;
struct bnxt_ntuple_filter *fltr;
+ struct bnxt_flow_masks *fmasks;
struct flow_keys *fkeys;
int rc = -EINVAL;
- if (fs->location >= BNXT_NTP_FLTR_MAX_FLTR)
+ if (fs->location >= bp->max_fltr)
return rc;
rcu_read_lock();
+ fltr_base = bnxt_get_one_fltr_rcu(bp, bp->l2_fltr_hash_tbl,
+ BNXT_L2_FLTR_HASH_SIZE,
+ fs->location);
+ if (fltr_base) {
+ struct ethhdr *h_ether = &fs->h_u.ether_spec;
+ struct ethhdr *m_ether = &fs->m_u.ether_spec;
+ struct bnxt_l2_filter *l2_fltr;
+ struct bnxt_l2_key *l2_key;
+
+ l2_fltr = container_of(fltr_base, struct bnxt_l2_filter, base);
+ l2_key = &l2_fltr->l2_key;
+ fs->flow_type = ETHER_FLOW;
+ ether_addr_copy(h_ether->h_dest, l2_key->dst_mac_addr);
+ eth_broadcast_addr(m_ether->h_dest);
+ if (l2_key->vlan) {
+ struct ethtool_flow_ext *m_ext = &fs->m_ext;
+ struct ethtool_flow_ext *h_ext = &fs->h_ext;
+
+ fs->flow_type |= FLOW_EXT;
+ m_ext->vlan_tci = htons(0xfff);
+ h_ext->vlan_tci = htons(l2_key->vlan);
+ }
+ if (fltr_base->flags & BNXT_ACT_RING_DST)
+ fs->ring_cookie = fltr_base->rxq;
+ if (fltr_base->flags & BNXT_ACT_FUNC_DST)
+ fs->ring_cookie = (u64)(fltr_base->vf_idx + 1) <<
+ ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+ rcu_read_unlock();
+ return 0;
+ }
fltr_base = bnxt_get_one_fltr_rcu(bp, bp->ntp_fltr_hash_tbl,
BNXT_NTP_FLTR_HASH_SIZE,
fs->location);
@@ -1091,59 +1129,74 @@ static int bnxt_grxclsrule(struct bnxt *bp, struct ethtool_rxnfc *cmd)
fltr = container_of(fltr_base, struct bnxt_ntuple_filter, base);
fkeys = &fltr->fkeys;
+ fmasks = &fltr->fmasks;
if (fkeys->basic.n_proto == htons(ETH_P_IP)) {
- if (fkeys->basic.ip_proto == IPPROTO_TCP)
+ if (fkeys->basic.ip_proto == IPPROTO_ICMP ||
+ fkeys->basic.ip_proto == IPPROTO_RAW) {
+ fs->flow_type = IP_USER_FLOW;
+ fs->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+ if (fkeys->basic.ip_proto == IPPROTO_ICMP)
+ fs->h_u.usr_ip4_spec.proto = IPPROTO_ICMP;
+ else
+ fs->h_u.usr_ip4_spec.proto = IPPROTO_RAW;
+ fs->m_u.usr_ip4_spec.proto = BNXT_IP_PROTO_FULL_MASK;
+ } else if (fkeys->basic.ip_proto == IPPROTO_TCP) {
fs->flow_type = TCP_V4_FLOW;
- else if (fkeys->basic.ip_proto == IPPROTO_UDP)
+ } else if (fkeys->basic.ip_proto == IPPROTO_UDP) {
fs->flow_type = UDP_V4_FLOW;
- else
+ } else {
goto fltr_err;
-
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) {
- fs->h_u.tcp_ip4_spec.ip4src = fkeys->addrs.v4addrs.src;
- fs->m_u.tcp_ip4_spec.ip4src = cpu_to_be32(~0);
- }
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) {
- fs->h_u.tcp_ip4_spec.ip4dst = fkeys->addrs.v4addrs.dst;
- fs->m_u.tcp_ip4_spec.ip4dst = cpu_to_be32(~0);
}
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_PORT) {
+
+ fs->h_u.tcp_ip4_spec.ip4src = fkeys->addrs.v4addrs.src;
+ fs->m_u.tcp_ip4_spec.ip4src = fmasks->addrs.v4addrs.src;
+ fs->h_u.tcp_ip4_spec.ip4dst = fkeys->addrs.v4addrs.dst;
+ fs->m_u.tcp_ip4_spec.ip4dst = fmasks->addrs.v4addrs.dst;
+ if (fs->flow_type == TCP_V4_FLOW ||
+ fs->flow_type == UDP_V4_FLOW) {
fs->h_u.tcp_ip4_spec.psrc = fkeys->ports.src;
- fs->m_u.tcp_ip4_spec.psrc = cpu_to_be16(~0);
- }
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_PORT) {
+ fs->m_u.tcp_ip4_spec.psrc = fmasks->ports.src;
fs->h_u.tcp_ip4_spec.pdst = fkeys->ports.dst;
- fs->m_u.tcp_ip4_spec.pdst = cpu_to_be16(~0);
+ fs->m_u.tcp_ip4_spec.pdst = fmasks->ports.dst;
}
} else {
- if (fkeys->basic.ip_proto == IPPROTO_TCP)
+ if (fkeys->basic.ip_proto == IPPROTO_ICMPV6 ||
+ fkeys->basic.ip_proto == IPPROTO_RAW) {
+ fs->flow_type = IPV6_USER_FLOW;
+ if (fkeys->basic.ip_proto == IPPROTO_ICMPV6)
+ fs->h_u.usr_ip6_spec.l4_proto = IPPROTO_ICMPV6;
+ else
+ fs->h_u.usr_ip6_spec.l4_proto = IPPROTO_RAW;
+ fs->m_u.usr_ip6_spec.l4_proto = BNXT_IP_PROTO_FULL_MASK;
+ } else if (fkeys->basic.ip_proto == IPPROTO_TCP) {
fs->flow_type = TCP_V6_FLOW;
- else if (fkeys->basic.ip_proto == IPPROTO_UDP)
+ } else if (fkeys->basic.ip_proto == IPPROTO_UDP) {
fs->flow_type = UDP_V6_FLOW;
- else
+ } else {
goto fltr_err;
-
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) {
- *(struct in6_addr *)&fs->h_u.tcp_ip6_spec.ip6src[0] =
- fkeys->addrs.v6addrs.src;
- bnxt_fill_ipv6_mask(fs->m_u.tcp_ip6_spec.ip6src);
- }
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) {
- *(struct in6_addr *)&fs->h_u.tcp_ip6_spec.ip6dst[0] =
- fkeys->addrs.v6addrs.dst;
- bnxt_fill_ipv6_mask(fs->m_u.tcp_ip6_spec.ip6dst);
}
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_PORT) {
+
+ *(struct in6_addr *)&fs->h_u.tcp_ip6_spec.ip6src[0] =
+ fkeys->addrs.v6addrs.src;
+ *(struct in6_addr *)&fs->m_u.tcp_ip6_spec.ip6src[0] =
+ fmasks->addrs.v6addrs.src;
+ *(struct in6_addr *)&fs->h_u.tcp_ip6_spec.ip6dst[0] =
+ fkeys->addrs.v6addrs.dst;
+ *(struct in6_addr *)&fs->m_u.tcp_ip6_spec.ip6dst[0] =
+ fmasks->addrs.v6addrs.dst;
+ if (fs->flow_type == TCP_V6_FLOW ||
+ fs->flow_type == UDP_V6_FLOW) {
fs->h_u.tcp_ip6_spec.psrc = fkeys->ports.src;
- fs->m_u.tcp_ip6_spec.psrc = cpu_to_be16(~0);
- }
- if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_PORT) {
+ fs->m_u.tcp_ip6_spec.psrc = fmasks->ports.src;
fs->h_u.tcp_ip6_spec.pdst = fkeys->ports.dst;
- fs->m_u.tcp_ip6_spec.pdst = cpu_to_be16(~0);
+ fs->m_u.tcp_ip6_spec.pdst = fmasks->ports.dst;
}
}
- fs->ring_cookie = fltr->base.rxq;
+ if (fltr->base.flags & BNXT_ACT_DROP)
+ fs->ring_cookie = RX_CLS_FLOW_DISC;
+ else
+ fs->ring_cookie = fltr->base.rxq;
rc = 0;
fltr_err:
@@ -1152,17 +1205,78 @@ fltr_err:
return rc;
}
-#define IPV4_ALL_MASK ((__force __be32)~0)
-#define L4_PORT_ALL_MASK ((__force __be16)~0)
+static int bnxt_add_l2_cls_rule(struct bnxt *bp,
+ struct ethtool_rx_flow_spec *fs)
+{
+ u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie);
+ u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
+ struct ethhdr *h_ether = &fs->h_u.ether_spec;
+ struct ethhdr *m_ether = &fs->m_u.ether_spec;
+ struct bnxt_l2_filter *fltr;
+ struct bnxt_l2_key key;
+ u16 vnic_id;
+ u8 flags;
+ int rc;
+
+ if (BNXT_CHIP_P5_PLUS(bp))
+ return -EOPNOTSUPP;
-static bool ipv6_mask_is_full(__be32 mask[4])
+ if (!is_broadcast_ether_addr(m_ether->h_dest))
+ return -EINVAL;
+ ether_addr_copy(key.dst_mac_addr, h_ether->h_dest);
+ key.vlan = 0;
+ if (fs->flow_type & FLOW_EXT) {
+ struct ethtool_flow_ext *m_ext = &fs->m_ext;
+ struct ethtool_flow_ext *h_ext = &fs->h_ext;
+
+ if (m_ext->vlan_tci != htons(0xfff) || !h_ext->vlan_tci)
+ return -EINVAL;
+ key.vlan = ntohs(h_ext->vlan_tci);
+ }
+
+ if (vf) {
+ flags = BNXT_ACT_FUNC_DST;
+ vnic_id = 0xffff;
+ vf--;
+ } else {
+ flags = BNXT_ACT_RING_DST;
+ vnic_id = bp->vnic_info[ring + 1].fw_vnic_id;
+ }
+ fltr = bnxt_alloc_new_l2_filter(bp, &key, flags);
+ if (IS_ERR(fltr))
+ return PTR_ERR(fltr);
+
+ fltr->base.fw_vnic_id = vnic_id;
+ fltr->base.rxq = ring;
+ fltr->base.vf_idx = vf;
+ rc = bnxt_hwrm_l2_filter_alloc(bp, fltr);
+ if (rc)
+ bnxt_del_l2_filter(bp, fltr);
+ else
+ fs->location = fltr->base.sw_id;
+ return rc;
+}
+
+static bool bnxt_verify_ntuple_ip4_flow(struct ethtool_usrip4_spec *ip_spec,
+ struct ethtool_usrip4_spec *ip_mask)
{
- return (mask[0] & mask[1] & mask[2] & mask[3]) == IPV4_ALL_MASK;
+ if (ip_mask->l4_4_bytes || ip_mask->tos ||
+ ip_spec->ip_ver != ETH_RX_NFC_IP4 ||
+ ip_mask->proto != BNXT_IP_PROTO_FULL_MASK ||
+ (ip_spec->proto != IPPROTO_RAW && ip_spec->proto != IPPROTO_ICMP))
+ return false;
+ return true;
}
-static bool ipv6_mask_is_zero(__be32 mask[4])
+static bool bnxt_verify_ntuple_ip6_flow(struct ethtool_usrip6_spec *ip_spec,
+ struct ethtool_usrip6_spec *ip_mask)
{
- return !(mask[0] | mask[1] | mask[2] | mask[3]);
+ if (ip_mask->l4_4_bytes || ip_mask->tclass ||
+ ip_mask->l4_proto != BNXT_IP_PROTO_FULL_MASK ||
+ (ip_spec->l4_proto != IPPROTO_RAW &&
+ ip_spec->l4_proto != IPPROTO_ICMPV6))
+ return false;
+ return true;
}
static int bnxt_add_ntuple_cls_rule(struct bnxt *bp,
@@ -1172,6 +1286,7 @@ static int bnxt_add_ntuple_cls_rule(struct bnxt *bp,
u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie);
struct bnxt_ntuple_filter *new_fltr, *fltr;
struct bnxt_l2_filter *l2_fltr;
+ struct bnxt_flow_masks *fmasks;
u32 flow_type = fs->flow_type;
struct flow_keys *fkeys;
u32 idx;
@@ -1183,17 +1298,42 @@ static int bnxt_add_ntuple_cls_rule(struct bnxt *bp,
if ((flow_type & (FLOW_MAC_EXT | FLOW_EXT)) || vf)
return -EOPNOTSUPP;
+ if (flow_type == IP_USER_FLOW) {
+ if (!bnxt_verify_ntuple_ip4_flow(&fs->h_u.usr_ip4_spec,
+ &fs->m_u.usr_ip4_spec))
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_type == IPV6_USER_FLOW) {
+ if (!bnxt_verify_ntuple_ip6_flow(&fs->h_u.usr_ip6_spec,
+ &fs->m_u.usr_ip6_spec))
+ return -EOPNOTSUPP;
+ }
+
new_fltr = kzalloc(sizeof(*new_fltr), GFP_KERNEL);
if (!new_fltr)
return -ENOMEM;
- l2_fltr = bp->vnic_info[0].l2_filters[0];
+ l2_fltr = bp->vnic_info[BNXT_VNIC_DEFAULT].l2_filters[0];
atomic_inc(&l2_fltr->refcnt);
new_fltr->l2_fltr = l2_fltr;
+ fmasks = &new_fltr->fmasks;
fkeys = &new_fltr->fkeys;
rc = -EOPNOTSUPP;
switch (flow_type) {
+ case IP_USER_FLOW: {
+ struct ethtool_usrip4_spec *ip_spec = &fs->h_u.usr_ip4_spec;
+ struct ethtool_usrip4_spec *ip_mask = &fs->m_u.usr_ip4_spec;
+
+ fkeys->basic.ip_proto = ip_spec->proto;
+ fkeys->basic.n_proto = htons(ETH_P_IP);
+ fkeys->addrs.v4addrs.src = ip_spec->ip4src;
+ fmasks->addrs.v4addrs.src = ip_mask->ip4src;
+ fkeys->addrs.v4addrs.dst = ip_spec->ip4dst;
+ fmasks->addrs.v4addrs.dst = ip_mask->ip4dst;
+ break;
+ }
case TCP_V4_FLOW:
case UDP_V4_FLOW: {
struct ethtool_tcpip4_spec *ip_spec = &fs->h_u.tcp_ip4_spec;
@@ -1203,32 +1343,26 @@ static int bnxt_add_ntuple_cls_rule(struct bnxt *bp,
if (flow_type == UDP_V4_FLOW)
fkeys->basic.ip_proto = IPPROTO_UDP;
fkeys->basic.n_proto = htons(ETH_P_IP);
+ fkeys->addrs.v4addrs.src = ip_spec->ip4src;
+ fmasks->addrs.v4addrs.src = ip_mask->ip4src;
+ fkeys->addrs.v4addrs.dst = ip_spec->ip4dst;
+ fmasks->addrs.v4addrs.dst = ip_mask->ip4dst;
+ fkeys->ports.src = ip_spec->psrc;
+ fmasks->ports.src = ip_mask->psrc;
+ fkeys->ports.dst = ip_spec->pdst;
+ fmasks->ports.dst = ip_mask->pdst;
+ break;
+ }
+ case IPV6_USER_FLOW: {
+ struct ethtool_usrip6_spec *ip_spec = &fs->h_u.usr_ip6_spec;
+ struct ethtool_usrip6_spec *ip_mask = &fs->m_u.usr_ip6_spec;
- if (ip_mask->ip4src == IPV4_ALL_MASK) {
- fkeys->addrs.v4addrs.src = ip_spec->ip4src;
- new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_SRC_IP;
- } else if (ip_mask->ip4src) {
- goto ntuple_err;
- }
- if (ip_mask->ip4dst == IPV4_ALL_MASK) {
- fkeys->addrs.v4addrs.dst = ip_spec->ip4dst;
- new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_DST_IP;
- } else if (ip_mask->ip4dst) {
- goto ntuple_err;
- }
-
- if (ip_mask->psrc == L4_PORT_ALL_MASK) {
- fkeys->ports.src = ip_spec->psrc;
- new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_SRC_PORT;
- } else if (ip_mask->psrc) {
- goto ntuple_err;
- }
- if (ip_mask->pdst == L4_PORT_ALL_MASK) {
- fkeys->ports.dst = ip_spec->pdst;
- new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_DST_PORT;
- } else if (ip_mask->pdst) {
- goto ntuple_err;
- }
+ fkeys->basic.ip_proto = ip_spec->l4_proto;
+ fkeys->basic.n_proto = htons(ETH_P_IPV6);
+ fkeys->addrs.v6addrs.src = *(struct in6_addr *)&ip_spec->ip6src;
+ fmasks->addrs.v6addrs.src = *(struct in6_addr *)&ip_mask->ip6src;
+ fkeys->addrs.v6addrs.dst = *(struct in6_addr *)&ip_spec->ip6dst;
+ fmasks->addrs.v6addrs.dst = *(struct in6_addr *)&ip_mask->ip6dst;
break;
}
case TCP_V6_FLOW:
@@ -1241,40 +1375,21 @@ static int bnxt_add_ntuple_cls_rule(struct bnxt *bp,
fkeys->basic.ip_proto = IPPROTO_UDP;
fkeys->basic.n_proto = htons(ETH_P_IPV6);
- if (ipv6_mask_is_full(ip_mask->ip6src)) {
- fkeys->addrs.v6addrs.src =
- *(struct in6_addr *)&ip_spec->ip6src;
- new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_SRC_IP;
- } else if (!ipv6_mask_is_zero(ip_mask->ip6src)) {
- goto ntuple_err;
- }
- if (ipv6_mask_is_full(ip_mask->ip6dst)) {
- fkeys->addrs.v6addrs.dst =
- *(struct in6_addr *)&ip_spec->ip6dst;
- new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_DST_IP;
- } else if (!ipv6_mask_is_zero(ip_mask->ip6dst)) {
- goto ntuple_err;
- }
-
- if (ip_mask->psrc == L4_PORT_ALL_MASK) {
- fkeys->ports.src = ip_spec->psrc;
- new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_SRC_PORT;
- } else if (ip_mask->psrc) {
- goto ntuple_err;
- }
- if (ip_mask->pdst == L4_PORT_ALL_MASK) {
- fkeys->ports.dst = ip_spec->pdst;
- new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_DST_PORT;
- } else if (ip_mask->pdst) {
- goto ntuple_err;
- }
+ fkeys->addrs.v6addrs.src = *(struct in6_addr *)&ip_spec->ip6src;
+ fmasks->addrs.v6addrs.src = *(struct in6_addr *)&ip_mask->ip6src;
+ fkeys->addrs.v6addrs.dst = *(struct in6_addr *)&ip_spec->ip6dst;
+ fmasks->addrs.v6addrs.dst = *(struct in6_addr *)&ip_mask->ip6dst;
+ fkeys->ports.src = ip_spec->psrc;
+ fmasks->ports.src = ip_mask->psrc;
+ fkeys->ports.dst = ip_spec->pdst;
+ fmasks->ports.dst = ip_mask->pdst;
break;
}
default:
rc = -EOPNOTSUPP;
goto ntuple_err;
}
- if (!new_fltr->ntuple_flags)
+ if (!memcmp(&BNXT_FLOW_MASK_NONE, fmasks, sizeof(*fmasks)))
goto ntuple_err;
idx = bnxt_get_ntp_filter_idx(bp, fkeys, NULL);
@@ -1287,8 +1402,11 @@ static int bnxt_add_ntuple_cls_rule(struct bnxt *bp,
}
rcu_read_unlock();
- new_fltr->base.rxq = ring;
new_fltr->base.flags = BNXT_ACT_NO_AGING;
+ if (fs->ring_cookie == RX_CLS_FLOW_DISC)
+ new_fltr->base.flags |= BNXT_ACT_DROP;
+ else
+ new_fltr->base.rxq = ring;
__set_bit(BNXT_FLTR_VALID, &new_fltr->base.state);
rc = bnxt_insert_ntp_filter(bp, new_fltr, idx);
if (!rc) {
@@ -1321,6 +1439,18 @@ static int bnxt_srxclsrlins(struct bnxt *bp, struct ethtool_rxnfc *cmd)
if (fs->location != RX_CLS_LOC_ANY)
return -EINVAL;
+ flow_type = fs->flow_type;
+ if ((flow_type == IP_USER_FLOW ||
+ flow_type == IPV6_USER_FLOW) &&
+ !(bp->fw_cap & BNXT_FW_CAP_CFA_NTUPLE_RX_EXT_IP_PROTO))
+ return -EOPNOTSUPP;
+ if (flow_type & (FLOW_MAC_EXT | FLOW_RSS))
+ return -EINVAL;
+ flow_type &= ~FLOW_EXT;
+
+ if (fs->ring_cookie == RX_CLS_FLOW_DISC && flow_type != ETHER_FLOW)
+ return bnxt_add_ntuple_cls_rule(bp, fs);
+
ring = ethtool_get_flow_spec_ring(fs->ring_cookie);
vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
if (BNXT_VF(bp) && vf)
@@ -1330,12 +1460,8 @@ static int bnxt_srxclsrlins(struct bnxt *bp, struct ethtool_rxnfc *cmd)
if (!vf && ring >= bp->rx_nr_rings)
return -EINVAL;
- flow_type = fs->flow_type;
- if (flow_type & (FLOW_MAC_EXT | FLOW_RSS))
- return -EINVAL;
- flow_type &= ~FLOW_EXT;
if (flow_type == ETHER_FLOW)
- rc = -EOPNOTSUPP;
+ rc = bnxt_add_l2_cls_rule(bp, fs);
else
rc = bnxt_add_ntuple_cls_rule(bp, fs);
return rc;
@@ -1346,11 +1472,22 @@ static int bnxt_srxclsrldel(struct bnxt *bp, struct ethtool_rxnfc *cmd)
struct ethtool_rx_flow_spec *fs = &cmd->fs;
struct bnxt_filter_base *fltr_base;
struct bnxt_ntuple_filter *fltr;
+ u32 id = fs->location;
rcu_read_lock();
+ fltr_base = bnxt_get_one_fltr_rcu(bp, bp->l2_fltr_hash_tbl,
+ BNXT_L2_FLTR_HASH_SIZE, id);
+ if (fltr_base) {
+ struct bnxt_l2_filter *l2_fltr;
+
+ l2_fltr = container_of(fltr_base, struct bnxt_l2_filter, base);
+ rcu_read_unlock();
+ bnxt_hwrm_l2_filter_free(bp, l2_fltr);
+ bnxt_del_l2_filter(bp, l2_fltr);
+ return 0;
+ }
fltr_base = bnxt_get_one_fltr_rcu(bp, bp->ntp_fltr_hash_tbl,
- BNXT_NTP_FLTR_HASH_SIZE,
- fs->location);
+ BNXT_NTP_FLTR_HASH_SIZE, id);
if (!fltr_base) {
rcu_read_unlock();
return -ENOENT;
@@ -1396,8 +1533,14 @@ static int bnxt_grxfh(struct bnxt *bp, struct ethtool_rxnfc *cmd)
cmd->data |= RXH_IP_SRC | RXH_IP_DST |
RXH_L4_B_0_1 | RXH_L4_B_2_3;
fallthrough;
- case SCTP_V4_FLOW:
case AH_ESP_V4_FLOW:
+ if (bp->rss_hash_cfg &
+ (VNIC_RSS_CFG_REQ_HASH_TYPE_AH_SPI_IPV4 |
+ VNIC_RSS_CFG_REQ_HASH_TYPE_ESP_SPI_IPV4))
+ cmd->data |= RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ fallthrough;
+ case SCTP_V4_FLOW:
case AH_V4_FLOW:
case ESP_V4_FLOW:
case IPV4_FLOW:
@@ -1415,8 +1558,14 @@ static int bnxt_grxfh(struct bnxt *bp, struct ethtool_rxnfc *cmd)
cmd->data |= RXH_IP_SRC | RXH_IP_DST |
RXH_L4_B_0_1 | RXH_L4_B_2_3;
fallthrough;
- case SCTP_V6_FLOW:
case AH_ESP_V6_FLOW:
+ if (bp->rss_hash_cfg &
+ (VNIC_RSS_CFG_REQ_HASH_TYPE_AH_SPI_IPV6 |
+ VNIC_RSS_CFG_REQ_HASH_TYPE_ESP_SPI_IPV6))
+ cmd->data |= RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ fallthrough;
+ case SCTP_V6_FLOW:
case AH_V6_FLOW:
case ESP_V6_FLOW:
case IPV6_FLOW:
@@ -1463,6 +1612,24 @@ static int bnxt_srxfh(struct bnxt *bp, struct ethtool_rxnfc *cmd)
rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
if (tuple == 4)
rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
+ } else if (cmd->flow_type == AH_ESP_V4_FLOW) {
+ if (tuple == 4 && (!(bp->rss_cap & BNXT_RSS_CAP_AH_V4_RSS_CAP) ||
+ !(bp->rss_cap & BNXT_RSS_CAP_ESP_V4_RSS_CAP)))
+ return -EINVAL;
+ rss_hash_cfg &= ~(VNIC_RSS_CFG_REQ_HASH_TYPE_AH_SPI_IPV4 |
+ VNIC_RSS_CFG_REQ_HASH_TYPE_ESP_SPI_IPV4);
+ if (tuple == 4)
+ rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_AH_SPI_IPV4 |
+ VNIC_RSS_CFG_REQ_HASH_TYPE_ESP_SPI_IPV4;
+ } else if (cmd->flow_type == AH_ESP_V6_FLOW) {
+ if (tuple == 4 && (!(bp->rss_cap & BNXT_RSS_CAP_AH_V6_RSS_CAP) ||
+ !(bp->rss_cap & BNXT_RSS_CAP_ESP_V6_RSS_CAP)))
+ return -EINVAL;
+ rss_hash_cfg &= ~(VNIC_RSS_CFG_REQ_HASH_TYPE_AH_SPI_IPV6 |
+ VNIC_RSS_CFG_REQ_HASH_TYPE_ESP_SPI_IPV6);
+ if (tuple == 4)
+ rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_AH_SPI_IPV6 |
+ VNIC_RSS_CFG_REQ_HASH_TYPE_ESP_SPI_IPV6;
} else if (tuple == 4) {
return -EINVAL;
}
@@ -1521,7 +1688,7 @@ static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
case ETHTOOL_GRXCLSRLCNT:
cmd->rule_cnt = bp->ntp_fltr_count;
- cmd->data = BNXT_NTP_FLTR_MAX_FLTR | RX_CLS_LOC_SPECIAL;
+ cmd->data = bp->max_fltr | RX_CLS_LOC_SPECIAL;
break;
case ETHTOOL_GRXCLSRLALL:
@@ -1574,7 +1741,8 @@ u32 bnxt_get_rxfh_indir_size(struct net_device *dev)
struct bnxt *bp = netdev_priv(dev);
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
- return ALIGN(bp->rx_nr_rings, BNXT_RSS_TABLE_ENTRIES_P5);
+ return bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) *
+ BNXT_RSS_TABLE_ENTRIES_P5;
return HW_HASH_INDEX_SIZE;
}
@@ -1595,7 +1763,7 @@ static int bnxt_get_rxfh(struct net_device *dev,
if (!bp->vnic_info)
return 0;
- vnic = &bp->vnic_info[0];
+ vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT];
if (rxfh->indir && bp->rss_indir_tbl) {
tbl_size = bnxt_get_rxfh_indir_size(dev);
for (i = 0; i < tbl_size; i++)
@@ -1618,8 +1786,10 @@ static int bnxt_set_rxfh(struct net_device *dev,
if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
- if (rxfh->key)
- return -EOPNOTSUPP;
+ if (rxfh->key) {
+ memcpy(bp->rss_hash_key, rxfh->key, HW_HASH_KEY_SIZE);
+ bp->rss_hash_key_updated = true;
+ }
if (rxfh->indir) {
u32 i, pad, tbl_size = bnxt_get_rxfh_indir_size(dev);
@@ -1630,7 +1800,7 @@ static int bnxt_set_rxfh(struct net_device *dev,
if (pad)
memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16));
}
-
+ bnxt_clear_usr_fltrs(bp, false);
if (netif_running(bp->dev)) {
bnxt_close_nic(bp, false, false);
rc = bnxt_open_nic(bp, false, false);
@@ -1750,31 +1920,21 @@ static int bnxt_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
return 0;
}
-u32 _bnxt_fw_to_ethtool_adv_spds(u16 fw_speeds, u8 fw_pause)
+/* TODO: support 25GB, 40GB, 50GB with different cable type */
+void _bnxt_fw_to_linkmode(unsigned long *mode, u16 fw_speeds)
{
- u32 speed_mask = 0;
+ linkmode_zero(mode);
- /* TODO: support 25GB, 40GB, 50GB with different cable type */
- /* set the advertised speeds */
if (fw_speeds & BNXT_LINK_SPEED_MSK_100MB)
- speed_mask |= ADVERTISED_100baseT_Full;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mode);
if (fw_speeds & BNXT_LINK_SPEED_MSK_1GB)
- speed_mask |= ADVERTISED_1000baseT_Full;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mode);
if (fw_speeds & BNXT_LINK_SPEED_MSK_2_5GB)
- speed_mask |= ADVERTISED_2500baseX_Full;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseX_Full_BIT, mode);
if (fw_speeds & BNXT_LINK_SPEED_MSK_10GB)
- speed_mask |= ADVERTISED_10000baseT_Full;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, mode);
if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
- speed_mask |= ADVERTISED_40000baseCR4_Full;
-
- if ((fw_pause & BNXT_LINK_PAUSE_BOTH) == BNXT_LINK_PAUSE_BOTH)
- speed_mask |= ADVERTISED_Pause;
- else if (fw_pause & BNXT_LINK_PAUSE_TX)
- speed_mask |= ADVERTISED_Asym_Pause;
- else if (fw_pause & BNXT_LINK_PAUSE_RX)
- speed_mask |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-
- return speed_mask;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, mode);
}
enum bnxt_media_type {
@@ -2642,23 +2802,22 @@ bnxt_force_link_speed(struct net_device *dev, u32 ethtool_speed, u32 lanes)
return 0;
}
-u16 bnxt_get_fw_auto_link_speeds(u32 advertising)
+u16 bnxt_get_fw_auto_link_speeds(const unsigned long *mode)
{
u16 fw_speed_mask = 0;
- /* only support autoneg at speed 100, 1000, and 10000 */
- if (advertising & (ADVERTISED_100baseT_Full |
- ADVERTISED_100baseT_Half)) {
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mode) ||
+ linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mode))
fw_speed_mask |= BNXT_LINK_SPEED_MSK_100MB;
- }
- if (advertising & (ADVERTISED_1000baseT_Full |
- ADVERTISED_1000baseT_Half)) {
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mode) ||
+ linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, mode))
fw_speed_mask |= BNXT_LINK_SPEED_MSK_1GB;
- }
- if (advertising & ADVERTISED_10000baseT_Full)
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, mode))
fw_speed_mask |= BNXT_LINK_SPEED_MSK_10GB;
- if (advertising & ADVERTISED_40000baseCR4_Full)
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, mode))
fw_speed_mask |= BNXT_LINK_SPEED_MSK_40GB;
return fw_speed_mask;
@@ -3883,12 +4042,13 @@ static int bnxt_set_eeprom(struct net_device *dev,
eeprom->len);
}
-static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata)
+static int bnxt_set_eee(struct net_device *dev, struct ethtool_keee *edata)
{
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp);
struct bnxt *bp = netdev_priv(dev);
- struct ethtool_eee *eee = &bp->eee;
+ struct ethtool_keee *eee = &bp->eee;
struct bnxt_link_info *link_info = &bp->link_info;
- u32 advertising;
int rc = 0;
if (!BNXT_PHY_CFG_ABLE(bp))
@@ -3898,7 +4058,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata)
return -EOPNOTSUPP;
mutex_lock(&bp->link_lock);
- advertising = _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0);
+ _bnxt_fw_to_linkmode(advertising, link_info->advertising);
if (!edata->eee_enabled)
goto eee_ok;
@@ -3918,16 +4078,15 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata)
edata->tx_lpi_timer = eee->tx_lpi_timer;
}
}
- if (!edata->advertised) {
- edata->advertised = advertising & eee->supported;
- } else if (edata->advertised & ~advertising) {
- netdev_warn(dev, "EEE advertised %x must be a subset of autoneg advertised speeds %x\n",
- edata->advertised, advertising);
+ if (linkmode_empty(edata->advertised)) {
+ linkmode_and(edata->advertised, advertising, eee->supported);
+ } else if (linkmode_andnot(tmp, edata->advertised, advertising)) {
+ netdev_warn(dev, "EEE advertised must be a subset of autoneg advertised speeds\n");
rc = -EINVAL;
goto eee_exit;
}
- eee->advertised = edata->advertised;
+ linkmode_copy(eee->advertised, edata->advertised);
eee->tx_lpi_enabled = edata->tx_lpi_enabled;
eee->tx_lpi_timer = edata->tx_lpi_timer;
eee_ok:
@@ -3941,7 +4100,7 @@ eee_exit:
return rc;
}
-static int bnxt_get_eee(struct net_device *dev, struct ethtool_eee *edata)
+static int bnxt_get_eee(struct net_device *dev, struct ethtool_keee *edata)
{
struct bnxt *bp = netdev_priv(dev);
@@ -3953,12 +4112,12 @@ static int bnxt_get_eee(struct net_device *dev, struct ethtool_eee *edata)
/* Preserve tx_lpi_timer so that the last value will be used
* by default when it is re-enabled.
*/
- edata->advertised = 0;
+ linkmode_zero(edata->advertised);
edata->tx_lpi_enabled = 0;
}
if (!bp->eee.eee_active)
- edata->lp_advertised = 0;
+ linkmode_zero(edata->lp_advertised);
return 0;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
index a8ecef8ab82c..e2ee030237d4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
@@ -43,12 +43,14 @@ struct bnxt_led_cfg {
#define BNXT_PXP_REG_LEN 0x3110
+#define BNXT_IP_PROTO_FULL_MASK 0xFF
+
extern const struct ethtool_ops bnxt_ethtool_ops;
u32 bnxt_get_rxfh_indir_size(struct net_device *dev);
-u32 _bnxt_fw_to_ethtool_adv_spds(u16, u8);
+void _bnxt_fw_to_linkmode(unsigned long *mode, u16 fw_speeds);
u32 bnxt_fw_to_ethtool_speed(u16);
-u16 bnxt_get_fw_auto_link_speeds(u32);
+u16 bnxt_get_fw_auto_link_speeds(const unsigned long *mode);
int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp,
struct hwrm_nvm_get_dev_info_output *nvm_dev_info);
int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index adad188e38b8..cc07660330f5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -684,7 +684,7 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb)
timestamp.hwtstamp = ns_to_ktime(ns);
skb_tstamp_tx(ptp->tx_skb, &timestamp);
} else {
- netdev_WARN_ONCE(bp->dev,
+ netdev_warn_once(bp->dev,
"TS query for TX timer failed rc = %x\n", rc);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index c2b25fc623ec..4079538bc310 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -407,7 +407,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
if (prog)
tx_xdp = bp->rx_nr_rings;
- tc = netdev_get_num_tc(dev);
+ tc = bp->num_tc;
if (!tc)
tc = 1;
rc = bnxt_check_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 2d7ae71287b1..7396e2823e32 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1313,14 +1313,13 @@ void bcmgenet_eee_enable_set(struct net_device *dev, bool enable,
}
priv->eee.eee_enabled = enable;
- priv->eee.eee_active = enable;
priv->eee.tx_lpi_enabled = tx_lpi_enabled;
}
-static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e)
+static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
- struct ethtool_eee *p = &priv->eee;
+ struct ethtool_keee *p = &priv->eee;
if (GENET_IS_V1(priv))
return -EOPNOTSUPP;
@@ -1328,18 +1327,17 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e)
if (!dev->phydev)
return -ENODEV;
- e->eee_enabled = p->eee_enabled;
- e->eee_active = p->eee_active;
e->tx_lpi_enabled = p->tx_lpi_enabled;
e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER);
return phy_ethtool_get_eee(dev->phydev, e);
}
-static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
+static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
- struct ethtool_eee *p = &priv->eee;
+ struct ethtool_keee *p = &priv->eee;
+ bool active;
if (GENET_IS_V1(priv))
return -EOPNOTSUPP;
@@ -1352,9 +1350,9 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
if (!p->eee_enabled) {
bcmgenet_eee_enable_set(dev, false, false);
} else {
- p->eee_active = phy_init_eee(dev->phydev, false) >= 0;
+ active = phy_init_eee(dev->phydev, false) >= 0;
bcmgenet_umac_writel(priv, e->tx_lpi_timer, UMAC_EEE_LPI_TIMER);
- bcmgenet_eee_enable_set(dev, p->eee_active, e->tx_lpi_enabled);
+ bcmgenet_eee_enable_set(dev, active, e->tx_lpi_enabled);
}
return phy_ethtool_set_eee(dev->phydev, e);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 1985c0ec4da2..7523b60b3c1c 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -645,7 +645,7 @@ struct bcmgenet_priv {
struct bcmgenet_mib_counters mib;
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
};
#define GENET_IO_MACRO(name, offset) \
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 97ea76d443ab..9ada89355747 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -30,6 +30,7 @@ static void bcmgenet_mac_config(struct net_device *dev)
struct bcmgenet_priv *priv = netdev_priv(dev);
struct phy_device *phydev = dev->phydev;
u32 reg, cmd_bits = 0;
+ bool active;
/* speed */
if (phydev->speed == SPEED_1000)
@@ -88,9 +89,9 @@ static void bcmgenet_mac_config(struct net_device *dev)
}
bcmgenet_umac_writel(priv, reg, UMAC_CMD);
- priv->eee.eee_active = phy_init_eee(phydev, 0) >= 0;
+ active = phy_init_eee(phydev, 0) >= 0;
bcmgenet_eee_enable_set(dev,
- priv->eee.eee_enabled && priv->eee.eee_active,
+ priv->eee.eee_enabled && active,
priv->eee.tx_lpi_enabled);
}
@@ -475,6 +476,10 @@ static int bcmgenet_mii_register(struct bcmgenet_priv *priv)
ppd.wait_func = bcmgenet_mii_wait;
ppd.wait_func_data = priv;
ppd.bus_name = "bcmgenet MII bus";
+ /* Pass a reference to our "main" clock which is used for MDIO
+ * transfers
+ */
+ ppd.clk = priv->clk;
/* Unimac MDIO bus controller starts at UniMAC offset + MDIO_CMD
* and is 2 * 32-bits word long, 8 bytes total.
@@ -673,7 +678,5 @@ void bcmgenet_mii_exit(struct net_device *dev)
if (of_phy_is_fixed_link(dn))
of_phy_deregister_fixed_link(dn);
of_node_put(priv->phy_dn);
- clk_prepare_enable(priv->clk);
platform_device_unregister(priv->mii_pdev);
- clk_disable_unprepare(priv->clk);
}
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 04964bbe08cf..eee759054aad 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -2338,10 +2338,10 @@ static void tg3_phy_apply_otp(struct tg3 *tp)
tg3_phy_toggle_auxctl_smdsp(tp, false);
}
-static void tg3_eee_pull_config(struct tg3 *tp, struct ethtool_eee *eee)
+static void tg3_eee_pull_config(struct tg3 *tp, struct ethtool_keee *eee)
{
u32 val;
- struct ethtool_eee *dest = &tp->eee;
+ struct ethtool_keee *dest = &tp->eee;
if (!(tp->phy_flags & TG3_PHYFLG_EEE_CAP))
return;
@@ -2362,13 +2362,13 @@ static void tg3_eee_pull_config(struct tg3 *tp, struct ethtool_eee *eee)
/* Pull lp advertised settings */
if (tg3_phy_cl45_read(tp, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE, &val))
return;
- dest->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(val);
+ mii_eee_cap1_mod_linkmode_t(dest->lp_advertised, val);
/* Pull advertised and eee_enabled settings */
if (tg3_phy_cl45_read(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, &val))
return;
dest->eee_enabled = !!val;
- dest->advertised = mmd_eee_adv_to_ethtool_adv_t(val);
+ mii_eee_cap1_mod_linkmode_t(dest->advertised, val);
/* Pull tx_lpi_enabled */
val = tr32(TG3_CPMU_EEE_MODE);
@@ -4354,23 +4354,12 @@ static int tg3_phy_autoneg_cfg(struct tg3 *tp, u32 advertise, u32 flowctrl)
if (!err) {
u32 err2;
- val = 0;
- /* Advertise 100-BaseTX EEE ability */
- if (advertise & ADVERTISED_100baseT_Full)
- val |= MDIO_AN_EEE_ADV_100TX;
- /* Advertise 1000-BaseT EEE ability */
- if (advertise & ADVERTISED_1000baseT_Full)
- val |= MDIO_AN_EEE_ADV_1000T;
-
- if (!tp->eee.eee_enabled) {
+ if (!tp->eee.eee_enabled)
val = 0;
- tp->eee.advertised = 0;
- } else {
- tp->eee.advertised = advertise &
- (ADVERTISED_100baseT_Full |
- ADVERTISED_1000baseT_Full);
- }
+ else
+ val = ethtool_adv_to_mmd_eee_adv_t(advertise);
+ mii_eee_cap1_mod_linkmode_t(tp->eee.advertised, val);
err = tg3_phy_cl45_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, val);
if (err)
val = 0;
@@ -4618,7 +4607,7 @@ static int tg3_init_5401phy_dsp(struct tg3 *tp)
static bool tg3_phy_eee_config_ok(struct tg3 *tp)
{
- struct ethtool_eee eee;
+ struct ethtool_keee eee = {};
if (!(tp->phy_flags & TG3_PHYFLG_EEE_CAP))
return true;
@@ -4626,13 +4615,13 @@ static bool tg3_phy_eee_config_ok(struct tg3 *tp)
tg3_eee_pull_config(tp, &eee);
if (tp->eee.eee_enabled) {
- if (tp->eee.advertised != eee.advertised ||
+ if (!linkmode_equal(tp->eee.advertised, eee.advertised) ||
tp->eee.tx_lpi_timer != eee.tx_lpi_timer ||
tp->eee.tx_lpi_enabled != eee.tx_lpi_enabled)
return false;
} else {
/* EEE is disabled but we're advertising */
- if (eee.advertised)
+ if (!linkmode_empty(eee.advertised))
return false;
}
@@ -14180,7 +14169,7 @@ static int tg3_set_coalesce(struct net_device *dev,
return 0;
}
-static int tg3_set_eee(struct net_device *dev, struct ethtool_eee *edata)
+static int tg3_set_eee(struct net_device *dev, struct ethtool_keee *edata)
{
struct tg3 *tp = netdev_priv(dev);
@@ -14189,7 +14178,7 @@ static int tg3_set_eee(struct net_device *dev, struct ethtool_eee *edata)
return -EOPNOTSUPP;
}
- if (edata->advertised != tp->eee.advertised) {
+ if (!linkmode_equal(edata->advertised, tp->eee.advertised)) {
netdev_warn(tp->dev,
"Direct manipulation of EEE advertisement is not supported\n");
return -EINVAL;
@@ -14202,7 +14191,9 @@ static int tg3_set_eee(struct net_device *dev, struct ethtool_eee *edata)
return -EINVAL;
}
- tp->eee = *edata;
+ tp->eee.eee_enabled = edata->eee_enabled;
+ tp->eee.tx_lpi_enabled = edata->tx_lpi_enabled;
+ tp->eee.tx_lpi_timer = edata->tx_lpi_timer;
tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED;
tg3_warn_mgmt_link_flap(tp);
@@ -14217,7 +14208,7 @@ static int tg3_set_eee(struct net_device *dev, struct ethtool_eee *edata)
return 0;
}
-static int tg3_get_eee(struct net_device *dev, struct ethtool_eee *edata)
+static int tg3_get_eee(struct net_device *dev, struct ethtool_keee *edata)
{
struct tg3 *tp = netdev_priv(dev);
@@ -15655,10 +15646,13 @@ static int tg3_phy_probe(struct tg3 *tp)
tg3_chip_rev_id(tp) != CHIPREV_ID_57765_A0))) {
tp->phy_flags |= TG3_PHYFLG_EEE_CAP;
- tp->eee.supported = SUPPORTED_100baseT_Full |
- SUPPORTED_1000baseT_Full;
- tp->eee.advertised = ADVERTISED_100baseT_Full |
- ADVERTISED_1000baseT_Full;
+ linkmode_zero(tp->eee.supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ tp->eee.supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ tp->eee.supported);
+ linkmode_copy(tp->eee.advertised, tp->eee.supported);
+
tp->eee.eee_enabled = 1;
tp->eee.tx_lpi_enabled = 1;
tp->eee.tx_lpi_timer = TG3_CPMU_DBTMR1_LNKIDLE_2047US;
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 5016475e5005..cf1b2b123c7e 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -3419,7 +3419,7 @@ struct tg3 {
unsigned int irq_cnt;
struct ethtool_coalesce coal;
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
/* firmware info */
const char *fw_needed;
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 31191b520b58..c32174484a96 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -1091,10 +1091,10 @@ bnad_cb_tx_resume(struct bnad *bnad, struct bna_tx *tx)
* Free all TxQs buffers and then notify TX_E_CLEANUP_DONE to Tx fsm.
*/
static void
-bnad_tx_cleanup(struct delayed_work *work)
+bnad_tx_cleanup(struct work_struct *work)
{
struct bnad_tx_info *tx_info =
- container_of(work, struct bnad_tx_info, tx_cleanup_work);
+ container_of(work, struct bnad_tx_info, tx_cleanup_work.work);
struct bnad *bnad = NULL;
struct bna_tcb *tcb;
unsigned long flags;
@@ -1170,7 +1170,7 @@ bnad_cb_rx_stall(struct bnad *bnad, struct bna_rx *rx)
* Free all RxQs buffers and then notify RX_E_CLEANUP_DONE to Rx fsm.
*/
static void
-bnad_rx_cleanup(void *work)
+bnad_rx_cleanup(struct work_struct *work)
{
struct bnad_rx_info *rx_info =
container_of(work, struct bnad_rx_info, rx_cleanup_work);
@@ -1991,8 +1991,7 @@ bnad_setup_tx(struct bnad *bnad, u32 tx_id)
}
tx_info->tx = tx;
- INIT_DELAYED_WORK(&tx_info->tx_cleanup_work,
- (work_func_t)bnad_tx_cleanup);
+ INIT_DELAYED_WORK(&tx_info->tx_cleanup_work, bnad_tx_cleanup);
/* Register ISR for the Tx object */
if (intr_info->intr_type == BNA_INTR_T_MSIX) {
@@ -2248,8 +2247,7 @@ bnad_setup_rx(struct bnad *bnad, u32 rx_id)
rx_info->rx = rx;
spin_unlock_irqrestore(&bnad->bna_lock, flags);
- INIT_WORK(&rx_info->rx_cleanup_work,
- (work_func_t)(bnad_rx_cleanup));
+ INIT_WORK(&rx_info->rx_cleanup_work, bnad_rx_cleanup);
/*
* Init NAPI, so that state is set to NAPI_STATE_SCHED,
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 9cc6303c82ff..f38d31bfab1b 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -27,6 +27,7 @@
#include "octeon_network.h"
MODULE_AUTHOR("Cavium Networks, <support@cavium.com>");
+MODULE_DESCRIPTION("Cavium LiquidIO Intelligent Server Adapter Core");
MODULE_LICENSE("GPL");
/* OOM task polling interval */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index b5ff2e1a9975..49d5808b7d11 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -804,20 +804,6 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb,
}
/**
- * calc_tx_descs - calculate the number of Tx descriptors for a packet
- * @skb: the packet
- * @chip_ver: chip version
- *
- * Returns the number of Tx descriptors needed for the given Ethernet
- * packet, including the needed WR and CPL headers.
- */
-static inline unsigned int calc_tx_descs(const struct sk_buff *skb,
- unsigned int chip_ver)
-{
- return flits_to_desc(calc_tx_flits(skb, chip_ver));
-}
-
-/**
* cxgb4_write_sgl - populate a scatter/gather list for a packet
* @skb: the packet
* @q: the Tx queue we are writing into
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index 1c2a540db13d..1f495cfd7959 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -868,5 +868,6 @@ static struct platform_driver ep93xx_eth_driver = {
module_platform_driver(ep93xx_eth_driver);
+MODULE_DESCRIPTION("Cirrus EP93xx Ethernet driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:ep93xx-eth");
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 37bd38d772e8..d266a87297a5 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -872,7 +872,7 @@ error:
return NETDEV_TX_OK;
}
-/* dev_base_lock rwlock held, nominally process context */
+/* rcu_read_lock potentially held, nominally process context */
static void enic_get_stats(struct net_device *netdev,
struct rtnl_link_stats64 *net_stats)
{
diff --git a/drivers/net/ethernet/cisco/enic/vnic_vic.c b/drivers/net/ethernet/cisco/enic/vnic_vic.c
index 20fcb20b42ed..66b577835338 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_vic.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_vic.c
@@ -49,7 +49,8 @@ int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length,
tlv->type = htons(type);
tlv->length = htons(length);
- memcpy(tlv->value, value, length);
+ unsafe_memcpy(tlv->value, value, length,
+ /* Flexible array of flexible arrays */);
vp->num_tlvs = htonl(ntohl(vp->num_tlvs) + 1);
vp->length = htonl(ntohl(vp->length) +
diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index c2c5c589a5e3..44af1d13d931 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -590,5 +590,6 @@ module_pci_driver(pci_driver);
module_param(polling_frequency, long, 0444);
MODULE_PARM_DESC(polling_frequency, "Polling timer frequency in ns");
+MODULE_DESCRIPTION("Beckhoff CX5020 EtherCAT Ethernet driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Dariusz Marcinkiewicz <reksio@newterm.pl>");
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index df40c720e7b2..4b15af6b7122 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -229,8 +229,10 @@ static int tsnep_phy_loopback(struct tsnep_adapter *adapter, bool enable)
* would delay a working loopback anyway, let's ensure that loopback
* is working immediately by setting link mode directly
*/
- if (!retval && enable)
+ if (!retval && enable) {
+ netif_carrier_on(adapter->netdev);
tsnep_set_link_mode(adapter);
+ }
return retval;
}
@@ -238,7 +240,7 @@ static int tsnep_phy_loopback(struct tsnep_adapter *adapter, bool enable)
static int tsnep_phy_open(struct tsnep_adapter *adapter)
{
struct phy_device *phydev;
- struct ethtool_eee ethtool_eee;
+ struct ethtool_keee ethtool_keee;
int retval;
retval = phy_connect_direct(adapter->netdev, adapter->phydev,
@@ -257,8 +259,8 @@ static int tsnep_phy_open(struct tsnep_adapter *adapter)
phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
/* disable EEE autoneg, EEE not supported by TSNEP */
- memset(&ethtool_eee, 0, sizeof(ethtool_eee));
- phy_ethtool_set_eee(adapter->phydev, &ethtool_eee);
+ memset(&ethtool_keee, 0, sizeof(ethtool_keee));
+ phy_ethtool_set_eee(adapter->phydev, &ethtool_keee);
adapter->phydev->irq = PHY_MAC_INTERRUPT;
phy_start(adapter->phydev);
@@ -719,17 +721,25 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
struct xdp_buff *xdp,
- struct netdev_queue *tx_nq, struct tsnep_tx *tx)
+ struct netdev_queue *tx_nq, struct tsnep_tx *tx,
+ bool zc)
{
struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
bool xmit;
+ u32 type;
if (unlikely(!xdpf))
return false;
+ /* no page pool for zero copy */
+ if (zc)
+ type = TSNEP_TX_TYPE_XDP_NDO;
+ else
+ type = TSNEP_TX_TYPE_XDP_TX;
+
__netif_tx_lock(tx_nq, smp_processor_id());
- xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, TSNEP_TX_TYPE_XDP_TX);
+ xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, type);
/* Avoid transmit queue timeout since we share it with the slow path */
if (xmit)
@@ -1258,6 +1268,14 @@ static int tsnep_rx_refill_zc(struct tsnep_rx *rx, int count, bool reuse)
return desc_refilled;
}
+static void tsnep_xsk_rx_need_wakeup(struct tsnep_rx *rx, int desc_available)
+{
+ if (desc_available)
+ xsk_set_rx_need_wakeup(rx->xsk_pool);
+ else
+ xsk_clear_rx_need_wakeup(rx->xsk_pool);
+}
+
static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
struct xdp_buff *xdp, int *status,
struct netdev_queue *tx_nq, struct tsnep_tx *tx)
@@ -1273,7 +1291,7 @@ static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
case XDP_PASS:
return false;
case XDP_TX:
- if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx))
+ if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx, false))
goto out_failure;
*status |= TSNEP_XDP_TX;
return true;
@@ -1323,7 +1341,7 @@ static bool tsnep_xdp_run_prog_zc(struct tsnep_rx *rx, struct bpf_prog *prog,
case XDP_PASS:
return false;
case XDP_TX:
- if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx))
+ if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx, true))
goto out_failure;
*status |= TSNEP_XDP_TX;
return true;
@@ -1485,7 +1503,7 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
xdp_prepare_buff(&xdp, page_address(entry->page),
XDP_PACKET_HEADROOM + TSNEP_RX_INLINE_METADATA_SIZE,
- length, false);
+ length - ETH_FCS_LEN, false);
consume = tsnep_xdp_run_prog(rx, prog, &xdp,
&xdp_status, tx_nq, tx);
@@ -1568,7 +1586,7 @@ static int tsnep_rx_poll_zc(struct tsnep_rx *rx, struct napi_struct *napi,
prefetch(entry->xdp->data);
length = __le32_to_cpu(entry->desc_wb->properties) &
TSNEP_DESC_LENGTH_MASK;
- xsk_buff_set_size(entry->xdp, length);
+ xsk_buff_set_size(entry->xdp, length - ETH_FCS_LEN);
xsk_buff_dma_sync_for_cpu(entry->xdp, rx->xsk_pool);
/* RX metadata with timestamps is in front of actual data,
@@ -1619,10 +1637,7 @@ static int tsnep_rx_poll_zc(struct tsnep_rx *rx, struct napi_struct *napi,
desc_available -= tsnep_rx_refill_zc(rx, desc_available, false);
if (xsk_uses_need_wakeup(rx->xsk_pool)) {
- if (desc_available)
- xsk_set_rx_need_wakeup(rx->xsk_pool);
- else
- xsk_clear_rx_need_wakeup(rx->xsk_pool);
+ tsnep_xsk_rx_need_wakeup(rx, desc_available);
return done;
}
@@ -1762,6 +1777,13 @@ static void tsnep_rx_reopen_xsk(struct tsnep_rx *rx)
allocated--;
}
}
+
+ /* set need wakeup flag immediately if ring is not filled completely,
+ * first polling would be too late as need wakeup signalisation would
+ * be delayed for an indefinite time
+ */
+ if (xsk_uses_need_wakeup(rx->xsk_pool))
+ tsnep_xsk_rx_need_wakeup(rx, tsnep_rx_desc_available(rx));
}
static bool tsnep_pending(struct tsnep_queue *queue)
@@ -2549,8 +2571,7 @@ static int tsnep_probe(struct platform_device *pdev)
mutex_init(&adapter->rxnfc_lock);
INIT_LIST_HEAD(&adapter->rxnfc_rules);
- io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- adapter->addr = devm_ioremap_resource(&pdev->dev, io);
+ adapter->addr = devm_platform_get_and_ioremap_resource(pdev, 0, &io);
if (IS_ERR(adapter->addr))
return PTR_ERR(adapter->addr);
netdev->mem_start = io->start;
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 07c2b701b5fa..9ebe751c1df0 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -661,4 +661,5 @@ static struct platform_driver nps_enet_driver = {
module_platform_driver(nps_enet_driver);
MODULE_AUTHOR("EZchip Semiconductor");
+MODULE_DESCRIPTION("EZchip NPS Ethernet driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index cffbf27c4656..9f07f4947b63 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -2402,7 +2402,7 @@ static void enetc_clear_interrupts(struct enetc_ndev_priv *priv)
static int enetc_phylink_connect(struct net_device *ndev)
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
- struct ethtool_eee edata;
+ struct ethtool_keee edata;
int err;
if (!priv->phylink) {
@@ -2418,7 +2418,7 @@ static int enetc_phylink_connect(struct net_device *ndev)
}
/* disable EEE autoneg, until ENETC driver supports it */
- memset(&edata, 0, sizeof(struct ethtool_eee));
+ memset(&edata, 0, sizeof(struct ethtool_keee));
phylink_ethtool_set_eee(priv->phylink, &edata);
phylink_start(priv->phylink);
@@ -3216,4 +3216,5 @@ void enetc_pci_remove(struct pci_dev *pdev)
}
EXPORT_SYMBOL_GPL(enetc_pci_remove);
+MODULE_DESCRIPTION("NXP ENETC Ethernet driver");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index a8fbcada6b01..a19cb2a786fd 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -672,7 +672,7 @@ struct fec_enet_private {
unsigned int itr_clk_rate;
/* tx lpi eee mode */
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
unsigned int clk_ref_rate;
/* ptp clock period in ns*/
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index d42594f32275..d7693fdf640d 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -85,8 +85,6 @@ static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep,
static const u16 fec_enet_vlan_pri_to_queue[8] = {0, 0, 1, 1, 1, 2, 2, 2};
-/* Pause frame feild and FIFO threshold */
-#define FEC_ENET_FCE (1 << 5)
#define FEC_ENET_RSEM_V 0x84
#define FEC_ENET_RSFL_V 16
#define FEC_ENET_RAEM_V 0x8
@@ -240,8 +238,8 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
#define PKT_MINBUF_SIZE 64
/* FEC receive acceleration */
-#define FEC_RACC_IPDIS (1 << 1)
-#define FEC_RACC_PRODIS (1 << 2)
+#define FEC_RACC_IPDIS BIT(1)
+#define FEC_RACC_PRODIS BIT(2)
#define FEC_RACC_SHIFT16 BIT(7)
#define FEC_RACC_OPTIONS (FEC_RACC_IPDIS | FEC_RACC_PRODIS)
@@ -273,8 +271,23 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
#define FEC_MMFR_TA (2 << 16)
#define FEC_MMFR_DATA(v) (v & 0xffff)
/* FEC ECR bits definition */
-#define FEC_ECR_MAGICEN (1 << 2)
-#define FEC_ECR_SLEEP (1 << 3)
+#define FEC_ECR_RESET BIT(0)
+#define FEC_ECR_ETHEREN BIT(1)
+#define FEC_ECR_MAGICEN BIT(2)
+#define FEC_ECR_SLEEP BIT(3)
+#define FEC_ECR_EN1588 BIT(4)
+#define FEC_ECR_BYTESWP BIT(8)
+/* FEC RCR bits definition */
+#define FEC_RCR_LOOP BIT(0)
+#define FEC_RCR_HALFDPX BIT(1)
+#define FEC_RCR_MII BIT(2)
+#define FEC_RCR_PROMISC BIT(3)
+#define FEC_RCR_BC_REJ BIT(4)
+#define FEC_RCR_FLOWCTL BIT(5)
+#define FEC_RCR_RMII BIT(8)
+#define FEC_RCR_10BASET BIT(9)
+/* TX WMARK bits */
+#define FEC_TXWMRK_STRFWD BIT(8)
#define FEC_MII_TIMEOUT 30000 /* us */
@@ -1062,7 +1075,7 @@ fec_restart(struct net_device *ndev)
struct fec_enet_private *fep = netdev_priv(ndev);
u32 temp_mac[2];
u32 rcntl = OPT_FRAME_SIZE | 0x04;
- u32 ecntl = 0x2; /* ETHEREN */
+ u32 ecntl = FEC_ECR_ETHEREN;
/* Whack a reset. We should wait for this.
* For i.MX6SX SOC, enet use AXI bus, we use disable MAC
@@ -1137,18 +1150,18 @@ fec_restart(struct net_device *ndev)
fep->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID)
rcntl |= (1 << 6);
else if (fep->phy_interface == PHY_INTERFACE_MODE_RMII)
- rcntl |= (1 << 8);
+ rcntl |= FEC_RCR_RMII;
else
- rcntl &= ~(1 << 8);
+ rcntl &= ~FEC_RCR_RMII;
/* 1G, 100M or 10M */
if (ndev->phydev) {
if (ndev->phydev->speed == SPEED_1000)
ecntl |= (1 << 5);
else if (ndev->phydev->speed == SPEED_100)
- rcntl &= ~(1 << 9);
+ rcntl &= ~FEC_RCR_10BASET;
else
- rcntl |= (1 << 9);
+ rcntl |= FEC_RCR_10BASET;
}
} else {
#ifdef FEC_MIIGSK_ENR
@@ -1181,7 +1194,7 @@ fec_restart(struct net_device *ndev)
if ((fep->pause_flag & FEC_PAUSE_FLAG_ENABLE) ||
((fep->pause_flag & FEC_PAUSE_FLAG_AUTONEG) &&
ndev->phydev && ndev->phydev->pause)) {
- rcntl |= FEC_ENET_FCE;
+ rcntl |= FEC_RCR_FLOWCTL;
/* set FIFO threshold parameter to reduce overrun */
writel(FEC_ENET_RSEM_V, fep->hwp + FEC_R_FIFO_RSEM);
@@ -1192,7 +1205,7 @@ fec_restart(struct net_device *ndev)
/* OPD */
writel(FEC_ENET_OPD_V, fep->hwp + FEC_OPD);
} else {
- rcntl &= ~FEC_ENET_FCE;
+ rcntl &= ~FEC_RCR_FLOWCTL;
}
#endif /* !defined(CONFIG_M5272) */
@@ -1207,13 +1220,13 @@ fec_restart(struct net_device *ndev)
if (fep->quirks & FEC_QUIRK_ENET_MAC) {
/* enable ENET endian swap */
- ecntl |= (1 << 8);
+ ecntl |= FEC_ECR_BYTESWP;
/* enable ENET store and forward mode */
- writel(1 << 8, fep->hwp + FEC_X_WMRK);
+ writel(FEC_TXWMRK_STRFWD, fep->hwp + FEC_X_WMRK);
}
if (fep->bufdesc_ex)
- ecntl |= (1 << 4);
+ ecntl |= FEC_ECR_EN1588;
if (fep->quirks & FEC_QUIRK_DELAYED_CLKS_SUPPORT &&
fep->rgmii_txc_dly)
@@ -1312,7 +1325,7 @@ static void
fec_stop(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
- u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & (1 << 8);
+ u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & FEC_RCR_RMII;
u32 val;
/* We cannot expect a graceful transmit stop without link !!! */
@@ -1331,7 +1344,7 @@ fec_stop(struct net_device *ndev)
if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES) {
writel(0, fep->hwp + FEC_ECNTRL);
} else {
- writel(1, fep->hwp + FEC_ECNTRL);
+ writel(FEC_ECR_RESET, fep->hwp + FEC_ECNTRL);
udelay(10);
}
} else {
@@ -1345,12 +1358,11 @@ fec_stop(struct net_device *ndev)
/* We have to keep ENET enabled to have MII interrupt stay working */
if (fep->quirks & FEC_QUIRK_ENET_MAC &&
!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) {
- writel(2, fep->hwp + FEC_ECNTRL);
+ writel(FEC_ECR_ETHEREN, fep->hwp + FEC_ECNTRL);
writel(rmii_mode, fep->hwp + FEC_R_CNTRL);
}
}
-
static void
fec_timeout(struct net_device *ndev, unsigned int txqueue)
{
@@ -2005,6 +2017,37 @@ static int fec_get_mac(struct net_device *ndev)
/*
* Phy section
*/
+
+/* LPI Sleep Ts count base on tx clk (clk_ref).
+ * The lpi sleep cnt value = X us / (cycle_ns).
+ */
+static int fec_enet_us_to_tx_cycle(struct net_device *ndev, int us)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+
+ return us * (fep->clk_ref_rate / 1000) / 1000;
+}
+
+static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ struct ethtool_keee *p = &fep->eee;
+ unsigned int sleep_cycle, wake_cycle;
+
+ if (enable) {
+ sleep_cycle = fec_enet_us_to_tx_cycle(ndev, p->tx_lpi_timer);
+ wake_cycle = sleep_cycle;
+ } else {
+ sleep_cycle = 0;
+ wake_cycle = 0;
+ }
+
+ writel(sleep_cycle, fep->hwp + FEC_LPI_SLEEP);
+ writel(wake_cycle, fep->hwp + FEC_LPI_WAKE);
+
+ return 0;
+}
+
static void fec_enet_adjust_link(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
@@ -2036,6 +2079,7 @@ static void fec_enet_adjust_link(struct net_device *ndev)
/* if any of the above changed restart the FEC */
if (status_change) {
+ netif_stop_queue(ndev);
napi_disable(&fep->napi);
netif_tx_lock_bh(ndev);
fec_restart(ndev);
@@ -2043,8 +2087,11 @@ static void fec_enet_adjust_link(struct net_device *ndev)
netif_tx_unlock_bh(ndev);
napi_enable(&fep->napi);
}
+ if (fep->quirks & FEC_QUIRK_HAS_EEE)
+ fec_enet_eee_mode_set(ndev, phy_dev->enable_tx_lpi);
} else {
if (fep->link) {
+ netif_stop_queue(ndev);
napi_disable(&fep->napi);
netif_tx_lock_bh(ndev);
fec_stop(ndev);
@@ -2401,6 +2448,9 @@ static int fec_enet_mii_probe(struct net_device *ndev)
else
phy_set_max_speed(phy_dev, 100);
+ if (fep->quirks & FEC_QUIRK_HAS_EEE)
+ phy_support_eee(phy_dev);
+
fep->link = 0;
fep->full_duplex = 0;
@@ -3107,50 +3157,11 @@ static int fec_enet_set_coalesce(struct net_device *ndev,
return 0;
}
-/* LPI Sleep Ts count base on tx clk (clk_ref).
- * The lpi sleep cnt value = X us / (cycle_ns).
- */
-static int fec_enet_us_to_tx_cycle(struct net_device *ndev, int us)
-{
- struct fec_enet_private *fep = netdev_priv(ndev);
-
- return us * (fep->clk_ref_rate / 1000) / 1000;
-}
-
-static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable)
-{
- struct fec_enet_private *fep = netdev_priv(ndev);
- struct ethtool_eee *p = &fep->eee;
- unsigned int sleep_cycle, wake_cycle;
- int ret = 0;
-
- if (enable) {
- ret = phy_init_eee(ndev->phydev, false);
- if (ret)
- return ret;
-
- sleep_cycle = fec_enet_us_to_tx_cycle(ndev, p->tx_lpi_timer);
- wake_cycle = sleep_cycle;
- } else {
- sleep_cycle = 0;
- wake_cycle = 0;
- }
-
- p->tx_lpi_enabled = enable;
- p->eee_enabled = enable;
- p->eee_active = enable;
-
- writel(sleep_cycle, fep->hwp + FEC_LPI_SLEEP);
- writel(wake_cycle, fep->hwp + FEC_LPI_WAKE);
-
- return 0;
-}
-
static int
-fec_enet_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
+fec_enet_get_eee(struct net_device *ndev, struct ethtool_keee *edata)
{
struct fec_enet_private *fep = netdev_priv(ndev);
- struct ethtool_eee *p = &fep->eee;
+ struct ethtool_keee *p = &fep->eee;
if (!(fep->quirks & FEC_QUIRK_HAS_EEE))
return -EOPNOTSUPP;
@@ -3158,20 +3169,16 @@ fec_enet_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
if (!netif_running(ndev))
return -ENETDOWN;
- edata->eee_enabled = p->eee_enabled;
- edata->eee_active = p->eee_active;
edata->tx_lpi_timer = p->tx_lpi_timer;
- edata->tx_lpi_enabled = p->tx_lpi_enabled;
return phy_ethtool_get_eee(ndev->phydev, edata);
}
static int
-fec_enet_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
+fec_enet_set_eee(struct net_device *ndev, struct ethtool_keee *edata)
{
struct fec_enet_private *fep = netdev_priv(ndev);
- struct ethtool_eee *p = &fep->eee;
- int ret = 0;
+ struct ethtool_keee *p = &fep->eee;
if (!(fep->quirks & FEC_QUIRK_HAS_EEE))
return -EOPNOTSUPP;
@@ -3181,15 +3188,6 @@ fec_enet_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
p->tx_lpi_timer = edata->tx_lpi_timer;
- if (!edata->eee_enabled || !edata->tx_lpi_enabled ||
- !edata->tx_lpi_timer)
- ret = fec_enet_eee_mode_set(ndev, false);
- else
- ret = fec_enet_eee_mode_set(ndev, true);
-
- if (ret)
- return ret;
-
return phy_ethtool_set_eee(ndev->phydev, edata);
}
@@ -4769,4 +4767,5 @@ static struct platform_driver fec_driver = {
module_platform_driver(fec_driver);
+MODULE_DESCRIPTION("NXP Fast Ethernet Controller (FEC) driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index 9ba15d3183d7..758535adc9ff 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -1073,6 +1073,14 @@ int memac_initialization(struct mac_device *mac_dev,
unsigned long capabilities;
unsigned long *supported;
+ /* The internal connection to the serdes is XGMII, but this isn't
+ * really correct for the phy mode (which is the external connection).
+ * However, this is how all older device trees say that they want
+ * 10GBASE-R (aka XFI), so just convert it for them.
+ */
+ if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII)
+ mac_dev->phy_if = PHY_INTERFACE_MODE_10GBASER;
+
mac_dev->phylink_ops = &memac_mac_ops;
mac_dev->set_promisc = memac_set_promiscuous;
mac_dev->change_addr = memac_modify_mac_address;
@@ -1139,7 +1147,7 @@ int memac_initialization(struct mac_device *mac_dev,
* (and therefore that xfi_pcs cannot be set). If we are defaulting to
* XGMII, assume this is for XFI. Otherwise, assume it is for SGMII.
*/
- if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII)
+ if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_10GBASER)
memac->xfi_pcs = pcs;
else
memac->sgmii_pcs = pcs;
@@ -1153,14 +1161,6 @@ int memac_initialization(struct mac_device *mac_dev,
goto _return_fm_mac_free;
}
- /* The internal connection to the serdes is XGMII, but this isn't
- * really correct for the phy mode (which is the external connection).
- * However, this is how all older device trees say that they want
- * 10GBASE-R (aka XFI), so just convert it for them.
- */
- if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII)
- mac_dev->phy_if = PHY_INTERFACE_MODE_10GBASER;
-
/* TODO: The following interface modes are supported by (some) hardware
* but not by this driver:
* - 1000BASE-KX
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index 70dd982a5edc..026f7270a54d 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -531,4 +531,5 @@ static struct platform_driver fsl_pq_mdio_driver = {
module_platform_driver(fsl_pq_mdio_driver);
+MODULE_DESCRIPTION("Freescale PQ MDIO helpers");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index e3dfbd7a4236..a811238c018d 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1649,7 +1649,7 @@ static int init_phy(struct net_device *dev)
struct gfar_private *priv = netdev_priv(dev);
phy_interface_t interface = priv->interface;
struct phy_device *phydev;
- struct ethtool_eee edata;
+ struct ethtool_keee edata;
linkmode_set_bit_array(phy_10_100_features_array,
ARRAY_SIZE(phy_10_100_features_array),
@@ -1681,7 +1681,7 @@ static int init_phy(struct net_device *dev)
phy_support_asym_pause(phydev);
/* disable EEE autoneg, EEE not supported by eTSEC */
- memset(&edata, 0, sizeof(struct ethtool_eee));
+ memset(&edata, 0, sizeof(struct ethtool_keee));
phy_ethtool_set_eee(phydev, &edata);
return 0;
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index b80349154604..4814c96d5fe7 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -9,6 +9,7 @@
#include <linux/dma-mapping.h>
#include <linux/dmapool.h>
+#include <linux/ethtool_netlink.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/u64_stats_sync.h>
@@ -51,12 +52,16 @@
#define GVE_DEFAULT_RX_BUFFER_SIZE 2048
+#define GVE_MAX_RX_BUFFER_SIZE 4096
+
#define GVE_DEFAULT_RX_BUFFER_OFFSET 2048
#define GVE_XDP_ACTIONS 5
#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
+#define GVE_DEFAULT_HEADER_BUFFER_SIZE 128
+
#define DQO_QPL_DEFAULT_TX_PAGES 512
#define DQO_QPL_DEFAULT_RX_PAGES 2048
@@ -150,6 +155,11 @@ struct gve_rx_compl_queue_dqo {
u32 mask; /* Mask for indices to the size of the ring */
};
+struct gve_header_buf {
+ u8 *data;
+ dma_addr_t addr;
+};
+
/* Stores state for tracking buffers posted to HW */
struct gve_rx_buf_state_dqo {
/* The page posted to HW. */
@@ -252,19 +262,26 @@ struct gve_rx_ring {
/* track number of used buffers */
u16 used_buf_states_cnt;
+
+ /* Address info of the buffers for header-split */
+ struct gve_header_buf hdr_bufs;
} dqo;
};
u64 rbytes; /* free-running bytes received */
+ u64 rx_hsplit_bytes; /* free-running header bytes received */
u64 rpackets; /* free-running packets received */
u32 cnt; /* free-running total number of completed packets */
u32 fill_cnt; /* free-running total number of descs and buffs posted */
u32 mask; /* masks the cnt and fill_cnt to the size of the ring */
+ u64 rx_hsplit_pkt; /* free-running packets with headers split */
u64 rx_copybreak_pkt; /* free-running count of copybreak packets */
u64 rx_copied_pkt; /* free-running total number of copied packets */
u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */
u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */
u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */
+ /* free-running count of unsplit packets due to header buffer overflow or hdr_len is 0 */
+ u64 rx_hsplit_unsplit_pkt;
u64 rx_cont_packet_cnt; /* free-running multi-fragment packets received */
u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */
u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */
@@ -622,6 +639,56 @@ struct gve_ptype_lut {
struct gve_ptype ptypes[GVE_NUM_PTYPES];
};
+/* Parameters for allocating queue page lists */
+struct gve_qpls_alloc_cfg {
+ struct gve_qpl_config *qpl_cfg;
+ struct gve_queue_config *tx_cfg;
+ struct gve_queue_config *rx_cfg;
+
+ u16 num_xdp_queues;
+ bool raw_addressing;
+ bool is_gqi;
+
+ /* Allocated resources are returned here */
+ struct gve_queue_page_list *qpls;
+};
+
+/* Parameters for allocating resources for tx queues */
+struct gve_tx_alloc_rings_cfg {
+ struct gve_queue_config *qcfg;
+
+ /* qpls and qpl_cfg must already be allocated */
+ struct gve_queue_page_list *qpls;
+ struct gve_qpl_config *qpl_cfg;
+
+ u16 ring_size;
+ u16 start_idx;
+ u16 num_rings;
+ bool raw_addressing;
+
+ /* Allocated resources are returned here */
+ struct gve_tx_ring *tx;
+};
+
+/* Parameters for allocating resources for rx queues */
+struct gve_rx_alloc_rings_cfg {
+ /* tx config is also needed to determine QPL ids */
+ struct gve_queue_config *qcfg;
+ struct gve_queue_config *qcfg_tx;
+
+ /* qpls and qpl_cfg must already be allocated */
+ struct gve_queue_page_list *qpls;
+ struct gve_qpl_config *qpl_cfg;
+
+ u16 ring_size;
+ u16 packet_buffer_size;
+ bool raw_addressing;
+ bool enable_header_split;
+
+ /* Allocated resources are returned here */
+ struct gve_rx_ring *rx;
+};
+
/* GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value
* when the entire configure_device_resources command is zeroed out and the
* queue_format is not specified.
@@ -729,13 +796,17 @@ struct gve_priv {
struct gve_ptype_lut *ptype_lut_dqo;
/* Must be a power of two. */
- int data_buffer_size_dqo;
+ u16 data_buffer_size_dqo;
+ u16 max_rx_buffer_size; /* device limit */
enum gve_queue_format queue_format;
/* Interrupt coalescing settings */
u32 tx_coalesce_usecs;
u32 rx_coalesce_usecs;
+
+ u16 header_buf_size; /* device configured, header-split supported if non-zero */
+ bool header_split_enabled; /* True if the header split is enabled by the user */
};
enum gve_service_task_flags_bit {
@@ -917,14 +988,14 @@ static inline bool gve_is_qpl(struct gve_priv *priv)
priv->queue_format == GVE_DQO_QPL_FORMAT;
}
-/* Returns the number of tx queue page lists
- */
-static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
+/* Returns the number of tx queue page lists */
+static inline u32 gve_num_tx_qpls(const struct gve_queue_config *tx_cfg,
+ int num_xdp_queues,
+ bool is_qpl)
{
- if (!gve_is_qpl(priv))
+ if (!is_qpl)
return 0;
-
- return priv->tx_cfg.num_queues + priv->num_xdp_queues;
+ return tx_cfg->num_queues + num_xdp_queues;
}
/* Returns the number of XDP tx queue page lists
@@ -937,14 +1008,13 @@ static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
return priv->num_xdp_queues;
}
-/* Returns the number of rx queue page lists
- */
-static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
+/* Returns the number of rx queue page lists */
+static inline u32 gve_num_rx_qpls(const struct gve_queue_config *rx_cfg,
+ bool is_qpl)
{
- if (!gve_is_qpl(priv))
+ if (!is_qpl)
return 0;
-
- return priv->rx_cfg.num_queues;
+ return rx_cfg->num_queues;
}
static inline u32 gve_tx_qpl_id(struct gve_priv *priv, int tx_qid)
@@ -957,59 +1027,59 @@ static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid)
return priv->tx_cfg.max_queues + rx_qid;
}
+/* Returns the index into priv->qpls where a certain rx queue's QPL resides */
+static inline u32 gve_get_rx_qpl_id(const struct gve_queue_config *tx_cfg, int rx_qid)
+{
+ return tx_cfg->max_queues + rx_qid;
+}
+
static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv)
{
return gve_tx_qpl_id(priv, 0);
}
-static inline u32 gve_rx_start_qpl_id(struct gve_priv *priv)
+/* Returns the index into priv->qpls where the first rx queue's QPL resides */
+static inline u32 gve_rx_start_qpl_id(const struct gve_queue_config *tx_cfg)
{
- return gve_rx_qpl_id(priv, 0);
+ return gve_get_rx_qpl_id(tx_cfg, 0);
}
-/* Returns a pointer to the next available tx qpl in the list of qpls
- */
+/* Returns a pointer to the next available tx qpl in the list of qpls */
static inline
-struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv, int tx_qid)
+struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_tx_alloc_rings_cfg *cfg,
+ int tx_qid)
{
- int id = gve_tx_qpl_id(priv, tx_qid);
-
/* QPL already in use */
- if (test_bit(id, priv->qpl_cfg.qpl_id_map))
+ if (test_bit(tx_qid, cfg->qpl_cfg->qpl_id_map))
return NULL;
-
- set_bit(id, priv->qpl_cfg.qpl_id_map);
- return &priv->qpls[id];
+ set_bit(tx_qid, cfg->qpl_cfg->qpl_id_map);
+ return &cfg->qpls[tx_qid];
}
-/* Returns a pointer to the next available rx qpl in the list of qpls
- */
+/* Returns a pointer to the next available rx qpl in the list of qpls */
static inline
-struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv, int rx_qid)
+struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_rx_alloc_rings_cfg *cfg,
+ int rx_qid)
{
- int id = gve_rx_qpl_id(priv, rx_qid);
-
+ int id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx_qid);
/* QPL already in use */
- if (test_bit(id, priv->qpl_cfg.qpl_id_map))
+ if (test_bit(id, cfg->qpl_cfg->qpl_id_map))
return NULL;
-
- set_bit(id, priv->qpl_cfg.qpl_id_map);
- return &priv->qpls[id];
+ set_bit(id, cfg->qpl_cfg->qpl_id_map);
+ return &cfg->qpls[id];
}
-/* Unassigns the qpl with the given id
- */
-static inline void gve_unassign_qpl(struct gve_priv *priv, int id)
+/* Unassigns the qpl with the given id */
+static inline void gve_unassign_qpl(struct gve_qpl_config *qpl_cfg, int id)
{
- clear_bit(id, priv->qpl_cfg.qpl_id_map);
+ clear_bit(id, qpl_cfg->qpl_id_map);
}
-/* Returns the correct dma direction for tx and rx qpls
- */
+/* Returns the correct dma direction for tx and rx qpls */
static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
int id)
{
- if (id < gve_rx_start_qpl_id(priv))
+ if (id < gve_rx_start_qpl_id(&priv->tx_cfg))
return DMA_TO_DEVICE;
else
return DMA_FROM_DEVICE;
@@ -1036,6 +1106,9 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
return gve_xdp_tx_queue_id(priv, 0);
}
+/* gqi napi handler defined in gve_main.c */
+int gve_napi_poll(struct napi_struct *napi, int budget);
+
/* buffers */
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
@@ -1051,8 +1124,12 @@ int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid);
bool gve_tx_poll(struct gve_notify_block *block, int budget);
bool gve_xdp_poll(struct gve_notify_block *block, int budget);
-int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings);
-void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings);
+int gve_tx_alloc_rings_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg);
+void gve_tx_free_rings_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg);
+void gve_tx_start_ring_gqi(struct gve_priv *priv, int idx);
+void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx);
u32 gve_tx_load_event_counter(struct gve_priv *priv,
struct gve_tx_ring *tx);
bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx);
@@ -1061,7 +1138,15 @@ void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
int gve_rx_poll(struct gve_notify_block *block, int budget);
bool gve_rx_work_pending(struct gve_rx_ring *rx);
int gve_rx_alloc_rings(struct gve_priv *priv);
-void gve_rx_free_rings_gqi(struct gve_priv *priv);
+int gve_rx_alloc_rings_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg);
+void gve_rx_free_rings_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg);
+void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx);
+void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx);
+u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit);
+bool gve_header_split_supported(const struct gve_priv *priv);
+int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split);
/* Reset */
void gve_schedule_reset(struct gve_priv *priv);
int gve_reset(struct gve_priv *priv, bool attempt_teardown);
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 12fbd723ecc6..ae12ac38e18b 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -40,7 +40,8 @@ void gve_parse_device_option(struct gve_priv *priv,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
- struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
+ struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
+ struct gve_device_option_buffer_sizes **dev_op_buffer_sizes)
{
u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
u16 option_length = be16_to_cpu(option->option_length);
@@ -147,6 +148,23 @@ void gve_parse_device_option(struct gve_priv *priv,
}
*dev_op_jumbo_frames = (void *)(option + 1);
break;
+ case GVE_DEV_OPT_ID_BUFFER_SIZES:
+ if (option_length < sizeof(**dev_op_buffer_sizes) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "Buffer Sizes",
+ (int)sizeof(**dev_op_buffer_sizes),
+ GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_buffer_sizes))
+ dev_warn(&priv->pdev->dev,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT,
+ "Buffer Sizes");
+ *dev_op_buffer_sizes = (void *)(option + 1);
+ break;
default:
/* If we don't recognize the option just continue
* without doing anything.
@@ -164,7 +182,8 @@ gve_process_device_options(struct gve_priv *priv,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
- struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
+ struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
+ struct gve_device_option_buffer_sizes **dev_op_buffer_sizes)
{
const int num_options = be16_to_cpu(descriptor->num_device_options);
struct gve_device_option *dev_opt;
@@ -185,7 +204,7 @@ gve_process_device_options(struct gve_priv *priv,
gve_parse_device_option(priv, descriptor, dev_opt,
dev_op_gqi_rda, dev_op_gqi_qpl,
dev_op_dqo_rda, dev_op_jumbo_frames,
- dev_op_dqo_qpl);
+ dev_op_dqo_qpl, dev_op_buffer_sizes);
dev_opt = next_opt;
}
@@ -640,6 +659,9 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
cpu_to_be16(rx_buff_ring_entries);
cmd.create_rx_queue.enable_rsc =
!!(priv->dev->features & NETIF_F_LRO);
+ if (priv->header_split_enabled)
+ cmd.create_rx_queue.header_buffer_size =
+ cpu_to_be16(priv->header_buf_size);
}
return gve_adminq_issue_cmd(priv, &cmd);
@@ -755,7 +777,9 @@ static void gve_enable_supported_features(struct gve_priv *priv,
const struct gve_device_option_jumbo_frames
*dev_op_jumbo_frames,
const struct gve_device_option_dqo_qpl
- *dev_op_dqo_qpl)
+ *dev_op_dqo_qpl,
+ const struct gve_device_option_buffer_sizes
+ *dev_op_buffer_sizes)
{
/* Before control reaches this point, the page-size-capped max MTU from
* the gve_device_descriptor field has already been stored in
@@ -779,10 +803,22 @@ static void gve_enable_supported_features(struct gve_priv *priv,
if (priv->rx_pages_per_qpl == 0)
priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES;
}
+
+ if (dev_op_buffer_sizes &&
+ (supported_features_mask & GVE_SUP_BUFFER_SIZES_MASK)) {
+ priv->max_rx_buffer_size =
+ be16_to_cpu(dev_op_buffer_sizes->packet_buffer_size);
+ priv->header_buf_size =
+ be16_to_cpu(dev_op_buffer_sizes->header_buffer_size);
+ dev_info(&priv->pdev->dev,
+ "BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n",
+ priv->max_rx_buffer_size, priv->header_buf_size);
+ }
}
int gve_adminq_describe_device(struct gve_priv *priv)
{
+ struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL;
struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
@@ -816,7 +852,8 @@ int gve_adminq_describe_device(struct gve_priv *priv)
err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
&dev_op_gqi_qpl, &dev_op_dqo_rda,
&dev_op_jumbo_frames,
- &dev_op_dqo_qpl);
+ &dev_op_dqo_qpl,
+ &dev_op_buffer_sizes);
if (err)
goto free_device_descriptor;
@@ -885,7 +922,8 @@ int gve_adminq_describe_device(struct gve_priv *priv)
priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
gve_enable_supported_features(priv, supported_features_mask,
- dev_op_jumbo_frames, dev_op_dqo_qpl);
+ dev_op_jumbo_frames, dev_op_dqo_qpl,
+ dev_op_buffer_sizes);
free_device_descriptor:
dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 5865ccdccbd0..5ac972e45ff8 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -125,6 +125,15 @@ struct gve_device_option_jumbo_frames {
static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8);
+struct gve_device_option_buffer_sizes {
+ /* GVE_SUP_BUFFER_SIZES_MASK bit should be set */
+ __be32 supported_features_mask;
+ __be16 packet_buffer_size;
+ __be16 header_buffer_size;
+};
+
+static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8);
+
/* Terminology:
*
* RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA
@@ -140,6 +149,7 @@ enum gve_dev_opt_id {
GVE_DEV_OPT_ID_DQO_RDA = 0x4,
GVE_DEV_OPT_ID_DQO_QPL = 0x7,
GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
+ GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa,
};
enum gve_dev_opt_req_feat_mask {
@@ -149,10 +159,12 @@ enum gve_dev_opt_req_feat_mask {
GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0,
};
enum gve_sup_feature_mask {
GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2,
+ GVE_SUP_BUFFER_SIZES_MASK = 1 << 4,
};
#define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0
@@ -165,6 +177,7 @@ enum gve_driver_capbility {
gve_driver_capability_dqo_qpl = 2, /* reserved for future use */
gve_driver_capability_dqo_rda = 3,
gve_driver_capability_alt_miss_compl = 4,
+ gve_driver_capability_flexible_buffer_size = 5,
};
#define GVE_CAP1(a) BIT((int)a)
@@ -176,7 +189,8 @@ enum gve_driver_capbility {
(GVE_CAP1(gve_driver_capability_gqi_qpl) | \
GVE_CAP1(gve_driver_capability_gqi_rda) | \
GVE_CAP1(gve_driver_capability_dqo_rda) | \
- GVE_CAP1(gve_driver_capability_alt_miss_compl))
+ GVE_CAP1(gve_driver_capability_alt_miss_compl) | \
+ GVE_CAP1(gve_driver_capability_flexible_buffer_size))
#define GVE_DRIVER_CAPABILITY_FLAGS2 0x0
#define GVE_DRIVER_CAPABILITY_FLAGS3 0x0
@@ -260,7 +274,9 @@ struct gve_adminq_create_rx_queue {
__be16 packet_buffer_size;
__be16 rx_buff_ring_size;
u8 enable_rsc;
- u8 padding[5];
+ u8 padding1;
+ __be16 header_buffer_size;
+ u8 padding2[2];
};
static_assert(sizeof(struct gve_adminq_create_rx_queue) == 56);
diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
index c36b93f0de15..b81584829c40 100644
--- a/drivers/net/ethernet/google/gve/gve_dqo.h
+++ b/drivers/net/ethernet/google/gve/gve_dqo.h
@@ -38,10 +38,18 @@ netdev_features_t gve_features_check_dqo(struct sk_buff *skb,
netdev_features_t features);
bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
-int gve_tx_alloc_rings_dqo(struct gve_priv *priv);
-void gve_tx_free_rings_dqo(struct gve_priv *priv);
-int gve_rx_alloc_rings_dqo(struct gve_priv *priv);
-void gve_rx_free_rings_dqo(struct gve_priv *priv);
+int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg);
+void gve_tx_free_rings_dqo(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg);
+void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx);
+void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx);
+int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg);
+void gve_rx_free_rings_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg);
+void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx);
+void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx);
int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
struct napi_struct *napi);
void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx);
@@ -93,4 +101,6 @@ gve_set_itr_coalesce_usecs_dqo(struct gve_priv *priv,
gve_write_irq_doorbell_dqo(priv, block,
gve_setup_itr_interval_dqo(usecs));
}
+
+int gve_napi_poll_dqo(struct napi_struct *napi, int budget);
#endif /* _GVE_DQO_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index e5397aa1e48f..9aebfb843d9d 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -4,7 +4,6 @@
* Copyright (C) 2015-2021 Google, Inc.
*/
-#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
#include "gve.h"
#include "gve_adminq.h"
@@ -40,17 +39,18 @@ static u32 gve_get_msglevel(struct net_device *netdev)
* as declared in enum xdp_action inside file uapi/linux/bpf.h .
*/
static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
- "rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
- "rx_dropped", "tx_dropped", "tx_timeouts",
+ "rx_packets", "rx_hsplit_pkt", "tx_packets", "rx_bytes",
+ "tx_bytes", "rx_dropped", "tx_dropped", "tx_timeouts",
"rx_skb_alloc_fail", "rx_buf_alloc_fail", "rx_desc_err_dropped_pkt",
+ "rx_hsplit_unsplit_pkt",
"interface_up_cnt", "interface_down_cnt", "reset_cnt",
"page_alloc_fail", "dma_mapping_error", "stats_report_trigger_cnt",
};
static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
- "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", "rx_bytes[%u]",
- "rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]",
- "rx_frag_alloc_cnt[%u]",
+ "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]",
+ "rx_bytes[%u]", "rx_hsplit_bytes[%u]", "rx_cont_packet_cnt[%u]",
+ "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", "rx_frag_alloc_cnt[%u]",
"rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]",
"rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]",
"rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]",
@@ -154,11 +154,13 @@ static void
gve_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats *stats, u64 *data)
{
- u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail,
- tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt,
+ u64 tmp_rx_pkts, tmp_rx_hsplit_pkt, tmp_rx_bytes, tmp_rx_hsplit_bytes,
+ tmp_rx_skb_alloc_fail, tmp_rx_buf_alloc_fail,
+ tmp_rx_desc_err_dropped_pkt, tmp_rx_hsplit_unsplit_pkt,
tmp_tx_pkts, tmp_tx_bytes;
- u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts,
- rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, tx_dropped;
+ u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_hsplit_unsplit_pkt,
+ rx_pkts, rx_hsplit_pkt, rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes,
+ tx_dropped;
int stats_idx, base_stats_idx, max_stats_idx;
struct stats *report_stats;
int *rx_qid_to_stats_idx;
@@ -185,8 +187,10 @@ gve_get_ethtool_stats(struct net_device *netdev,
kfree(rx_qid_to_stats_idx);
return;
}
- for (rx_pkts = 0, rx_bytes = 0, rx_skb_alloc_fail = 0,
- rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, ring = 0;
+ for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0,
+ rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0,
+ rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0,
+ ring = 0;
ring < priv->rx_cfg.num_queues; ring++) {
if (priv->rx) {
do {
@@ -195,18 +199,23 @@ gve_get_ethtool_stats(struct net_device *netdev,
start =
u64_stats_fetch_begin(&priv->rx[ring].statss);
tmp_rx_pkts = rx->rpackets;
+ tmp_rx_hsplit_pkt = rx->rx_hsplit_pkt;
tmp_rx_bytes = rx->rbytes;
tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
tmp_rx_desc_err_dropped_pkt =
rx->rx_desc_err_dropped_pkt;
+ tmp_rx_hsplit_unsplit_pkt =
+ rx->rx_hsplit_unsplit_pkt;
} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
start));
rx_pkts += tmp_rx_pkts;
+ rx_hsplit_pkt += tmp_rx_hsplit_pkt;
rx_bytes += tmp_rx_bytes;
rx_skb_alloc_fail += tmp_rx_skb_alloc_fail;
rx_buf_alloc_fail += tmp_rx_buf_alloc_fail;
rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt;
+ rx_hsplit_unsplit_pkt += tmp_rx_hsplit_unsplit_pkt;
}
}
for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0;
@@ -227,6 +236,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
i = 0;
data[i++] = rx_pkts;
+ data[i++] = rx_hsplit_pkt;
data[i++] = tx_pkts;
data[i++] = rx_bytes;
data[i++] = tx_bytes;
@@ -238,6 +248,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = rx_skb_alloc_fail;
data[i++] = rx_buf_alloc_fail;
data[i++] = rx_desc_err_dropped_pkt;
+ data[i++] = rx_hsplit_unsplit_pkt;
data[i++] = priv->interface_up_cnt;
data[i++] = priv->interface_down_cnt;
data[i++] = priv->reset_cnt;
@@ -277,6 +288,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
start =
u64_stats_fetch_begin(&priv->rx[ring].statss);
tmp_rx_bytes = rx->rbytes;
+ tmp_rx_hsplit_bytes = rx->rx_hsplit_bytes;
tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
tmp_rx_desc_err_dropped_pkt =
@@ -284,6 +296,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
start));
data[i++] = tmp_rx_bytes;
+ data[i++] = tmp_rx_hsplit_bytes;
data[i++] = rx->rx_cont_packet_cnt;
data[i++] = rx->rx_frag_flip_cnt;
data[i++] = rx->rx_frag_copy_cnt;
@@ -480,6 +493,29 @@ static void gve_get_ringparam(struct net_device *netdev,
cmd->tx_max_pending = priv->tx_desc_cnt;
cmd->rx_pending = priv->rx_desc_cnt;
cmd->tx_pending = priv->tx_desc_cnt;
+
+ if (!gve_header_split_supported(priv))
+ kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN;
+ else if (priv->header_split_enabled)
+ kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED;
+ else
+ kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED;
+}
+
+static int gve_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *cmd,
+ struct kernel_ethtool_ringparam *kernel_cmd,
+ struct netlink_ext_ack *extack)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ if (priv->tx_desc_cnt != cmd->tx_pending ||
+ priv->rx_desc_cnt != cmd->rx_pending) {
+ dev_info(&priv->pdev->dev, "Modify ring size is not supported.\n");
+ return -EOPNOTSUPP;
+ }
+
+ return gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split);
}
static int gve_user_reset(struct net_device *netdev, u32 *flags)
@@ -655,6 +691,7 @@ static int gve_set_coalesce(struct net_device *netdev,
const struct ethtool_ops gve_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
+ .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT,
.get_drvinfo = gve_get_drvinfo,
.get_strings = gve_get_strings,
.get_sset_count = gve_get_sset_count,
@@ -667,6 +704,7 @@ const struct ethtool_ops gve_ethtool_ops = {
.get_coalesce = gve_get_coalesce,
.set_coalesce = gve_set_coalesce,
.get_ringparam = gve_get_ringparam,
+ .set_ringparam = gve_set_ringparam,
.reset = gve_user_reset,
.get_tunable = gve_get_tunable,
.set_tunable = gve_set_tunable,
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 619bf63ec935..166bd827a6d7 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -22,6 +22,7 @@
#include "gve_dqo.h"
#include "gve_adminq.h"
#include "gve_register.h"
+#include "gve_utils.h"
#define GVE_DEFAULT_RX_COPYBREAK (256)
@@ -252,7 +253,7 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg)
return IRQ_HANDLED;
}
-static int gve_napi_poll(struct napi_struct *napi, int budget)
+int gve_napi_poll(struct napi_struct *napi, int budget)
{
struct gve_notify_block *block;
__be32 __iomem *irq_doorbell;
@@ -302,7 +303,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
return work_done;
}
-static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
+int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
{
struct gve_notify_block *block =
container_of(napi, struct gve_notify_block, napi);
@@ -581,19 +582,59 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
gve_clear_device_resources_ok(priv);
}
-static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
- int (*gve_poll)(struct napi_struct *, int))
+static int gve_unregister_qpl(struct gve_priv *priv, u32 i)
{
- struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+ int err;
+
+ err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Failed to unregister queue page list %d\n",
+ priv->qpls[i].id);
+ return err;
+ }
- netif_napi_add(priv->dev, &block->napi, gve_poll);
+ priv->num_registered_pages -= priv->qpls[i].num_entries;
+ return 0;
}
-static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
+static int gve_register_qpl(struct gve_priv *priv, u32 i)
{
- struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+ int num_rx_qpls;
+ int pages;
+ int err;
+
+ /* Rx QPLs succeed Tx QPLs in the priv->qpls array. */
+ num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
+ if (i >= gve_rx_start_qpl_id(&priv->tx_cfg) + num_rx_qpls) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot register nonexisting QPL at index %d\n", i);
+ return -EINVAL;
+ }
+
+ pages = priv->qpls[i].num_entries;
+
+ if (pages + priv->num_registered_pages > priv->max_registered_pages) {
+ netif_err(priv, drv, priv->dev,
+ "Reached max number of registered pages %llu > %llu\n",
+ pages + priv->num_registered_pages,
+ priv->max_registered_pages);
+ return -EINVAL;
+ }
+
+ err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to register queue page list %d\n",
+ priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
- netif_napi_del(&block->napi);
+ priv->num_registered_pages += pages;
+ return 0;
}
static int gve_register_xdp_qpls(struct gve_priv *priv)
@@ -602,55 +643,41 @@ static int gve_register_xdp_qpls(struct gve_priv *priv)
int err;
int i;
- start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
+ start_id = gve_xdp_tx_start_queue_id(priv);
for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
- err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to register queue page list %d\n",
- priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean
- * up
- */
+ err = gve_register_qpl(priv, i);
+ /* This failure will trigger a reset - no need to clean up */
+ if (err)
return err;
- }
}
return 0;
}
static int gve_register_qpls(struct gve_priv *priv)
{
+ int num_tx_qpls, num_rx_qpls;
int start_id;
int err;
int i;
- start_id = gve_tx_start_qpl_id(priv);
- for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
- err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to register queue page list %d\n",
- priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean
- * up
- */
+ num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
+ gve_is_qpl(priv));
+ num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
+
+ for (i = 0; i < num_tx_qpls; i++) {
+ err = gve_register_qpl(priv, i);
+ if (err)
return err;
- }
}
- start_id = gve_rx_start_qpl_id(priv);
- for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
- err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to register queue page list %d\n",
- priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean
- * up
- */
+ /* there might be a gap between the tx and rx qpl ids */
+ start_id = gve_rx_start_qpl_id(&priv->tx_cfg);
+ for (i = 0; i < num_rx_qpls; i++) {
+ err = gve_register_qpl(priv, start_id + i);
+ if (err)
return err;
- }
}
+
return 0;
}
@@ -660,48 +687,40 @@ static int gve_unregister_xdp_qpls(struct gve_priv *priv)
int err;
int i;
- start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
+ start_id = gve_xdp_tx_start_queue_id(priv);
for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
- err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean up */
- if (err) {
- netif_err(priv, drv, priv->dev,
- "Failed to unregister queue page list %d\n",
- priv->qpls[i].id);
+ err = gve_unregister_qpl(priv, i);
+ /* This failure will trigger a reset - no need to clean */
+ if (err)
return err;
- }
}
return 0;
}
static int gve_unregister_qpls(struct gve_priv *priv)
{
+ int num_tx_qpls, num_rx_qpls;
int start_id;
int err;
int i;
- start_id = gve_tx_start_qpl_id(priv);
- for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
- err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean up */
- if (err) {
- netif_err(priv, drv, priv->dev,
- "Failed to unregister queue page list %d\n",
- priv->qpls[i].id);
+ num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
+ gve_is_qpl(priv));
+ num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
+
+ for (i = 0; i < num_tx_qpls; i++) {
+ err = gve_unregister_qpl(priv, i);
+ /* This failure will trigger a reset - no need to clean */
+ if (err)
return err;
- }
}
- start_id = gve_rx_start_qpl_id(priv);
- for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
- err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean up */
- if (err) {
- netif_err(priv, drv, priv->dev,
- "Failed to unregister queue page list %d\n",
- priv->qpls[i].id);
+ start_id = gve_rx_start_qpl_id(&priv->tx_cfg);
+ for (i = 0; i < num_rx_qpls; i++) {
+ err = gve_unregister_qpl(priv, start_id + i);
+ /* This failure will trigger a reset - no need to clean */
+ if (err)
return err;
- }
}
return 0;
}
@@ -776,120 +795,124 @@ static int gve_create_rings(struct gve_priv *priv)
return 0;
}
-static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
- int (*napi_poll)(struct napi_struct *napi,
- int budget))
+static void init_xdp_sync_stats(struct gve_priv *priv)
{
int start_id = gve_xdp_tx_start_queue_id(priv);
int i;
- /* Add xdp tx napi & init sync stats*/
+ /* Init stats */
for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
u64_stats_init(&priv->tx[i].statss);
priv->tx[i].ntfy_id = ntfy_idx;
- gve_add_napi(priv, ntfy_idx, napi_poll);
}
}
-static void add_napi_init_sync_stats(struct gve_priv *priv,
- int (*napi_poll)(struct napi_struct *napi,
- int budget))
+static void gve_init_sync_stats(struct gve_priv *priv)
{
int i;
- /* Add tx napi & init sync stats*/
- for (i = 0; i < gve_num_tx_queues(priv); i++) {
- int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
-
+ for (i = 0; i < priv->tx_cfg.num_queues; i++)
u64_stats_init(&priv->tx[i].statss);
- priv->tx[i].ntfy_id = ntfy_idx;
- gve_add_napi(priv, ntfy_idx, napi_poll);
- }
- /* Add rx napi & init sync stats*/
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+ /* Init stats for XDP TX queues */
+ init_xdp_sync_stats(priv);
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++)
u64_stats_init(&priv->rx[i].statss);
- priv->rx[i].ntfy_id = ntfy_idx;
- gve_add_napi(priv, ntfy_idx, napi_poll);
+}
+
+static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg)
+{
+ cfg->qcfg = &priv->tx_cfg;
+ cfg->raw_addressing = !gve_is_qpl(priv);
+ cfg->qpls = priv->qpls;
+ cfg->qpl_cfg = &priv->qpl_cfg;
+ cfg->ring_size = priv->tx_desc_cnt;
+ cfg->start_idx = 0;
+ cfg->num_rings = gve_num_tx_queues(priv);
+ cfg->tx = priv->tx;
+}
+
+static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings)
+{
+ int i;
+
+ if (!priv->tx)
+ return;
+
+ for (i = start_id; i < start_id + num_rings; i++) {
+ if (gve_is_gqi(priv))
+ gve_tx_stop_ring_gqi(priv, i);
+ else
+ gve_tx_stop_ring_dqo(priv, i);
}
}
-static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
+static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
+ int num_rings)
{
- if (gve_is_gqi(priv)) {
- gve_tx_free_rings_gqi(priv, start_id, num_rings);
- } else {
- gve_tx_free_rings_dqo(priv);
+ int i;
+
+ for (i = start_id; i < start_id + num_rings; i++) {
+ if (gve_is_gqi(priv))
+ gve_tx_start_ring_gqi(priv, i);
+ else
+ gve_tx_start_ring_dqo(priv, i);
}
}
static int gve_alloc_xdp_rings(struct gve_priv *priv)
{
- int start_id;
+ struct gve_tx_alloc_rings_cfg cfg = {0};
int err = 0;
if (!priv->num_xdp_queues)
return 0;
- start_id = gve_xdp_tx_start_queue_id(priv);
- err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
+ gve_tx_get_curr_alloc_cfg(priv, &cfg);
+ cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
+ cfg.num_rings = priv->num_xdp_queues;
+
+ err = gve_tx_alloc_rings_gqi(priv, &cfg);
if (err)
return err;
- add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
+
+ gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings);
+ init_xdp_sync_stats(priv);
return 0;
}
-static int gve_alloc_rings(struct gve_priv *priv)
+static int gve_alloc_rings(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
{
int err;
- /* Setup tx rings */
- priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
- GFP_KERNEL);
- if (!priv->tx)
- return -ENOMEM;
-
if (gve_is_gqi(priv))
- err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
+ err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg);
else
- err = gve_tx_alloc_rings_dqo(priv);
+ err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg);
if (err)
- goto free_tx;
-
- /* Setup rx rings */
- priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
- GFP_KERNEL);
- if (!priv->rx) {
- err = -ENOMEM;
- goto free_tx_queue;
- }
+ return err;
if (gve_is_gqi(priv))
- err = gve_rx_alloc_rings(priv);
+ err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg);
else
- err = gve_rx_alloc_rings_dqo(priv);
+ err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg);
if (err)
- goto free_rx;
-
- if (gve_is_gqi(priv))
- add_napi_init_sync_stats(priv, gve_napi_poll);
- else
- add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
+ goto free_tx;
return 0;
-free_rx:
- kvfree(priv->rx);
- priv->rx = NULL;
-free_tx_queue:
- gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
free_tx:
- kvfree(priv->tx);
- priv->tx = NULL;
+ if (gve_is_gqi(priv))
+ gve_tx_free_rings_gqi(priv, tx_alloc_cfg);
+ else
+ gve_tx_free_rings_dqo(priv, tx_alloc_cfg);
return err;
}
@@ -937,52 +960,30 @@ static int gve_destroy_rings(struct gve_priv *priv)
return 0;
}
-static void gve_rx_free_rings(struct gve_priv *priv)
-{
- if (gve_is_gqi(priv))
- gve_rx_free_rings_gqi(priv);
- else
- gve_rx_free_rings_dqo(priv);
-}
-
static void gve_free_xdp_rings(struct gve_priv *priv)
{
- int ntfy_idx, start_id;
- int i;
+ struct gve_tx_alloc_rings_cfg cfg = {0};
+
+ gve_tx_get_curr_alloc_cfg(priv, &cfg);
+ cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
+ cfg.num_rings = priv->num_xdp_queues;
- start_id = gve_xdp_tx_start_queue_id(priv);
if (priv->tx) {
- for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
- ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
- gve_remove_napi(priv, ntfy_idx);
- }
- gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
+ gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings);
+ gve_tx_free_rings_gqi(priv, &cfg);
}
}
-static void gve_free_rings(struct gve_priv *priv)
+static void gve_free_rings(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_cfg)
{
- int num_tx_queues = gve_num_tx_queues(priv);
- int ntfy_idx;
- int i;
-
- if (priv->tx) {
- for (i = 0; i < num_tx_queues; i++) {
- ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
- gve_remove_napi(priv, ntfy_idx);
- }
- gve_tx_free_rings(priv, 0, num_tx_queues);
- kvfree(priv->tx);
- priv->tx = NULL;
- }
- if (priv->rx) {
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
- gve_remove_napi(priv, ntfy_idx);
- }
- gve_rx_free_rings(priv);
- kvfree(priv->rx);
- priv->rx = NULL;
+ if (gve_is_gqi(priv)) {
+ gve_tx_free_rings_gqi(priv, tx_cfg);
+ gve_rx_free_rings_gqi(priv, rx_cfg);
+ } else {
+ gve_tx_free_rings_dqo(priv, tx_cfg);
+ gve_rx_free_rings_dqo(priv, rx_cfg);
}
}
@@ -1004,21 +1005,13 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev,
return 0;
}
-static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
- int pages)
+static int gve_alloc_queue_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl,
+ u32 id, int pages)
{
- struct gve_queue_page_list *qpl = &priv->qpls[id];
int err;
int i;
- if (pages + priv->num_registered_pages > priv->max_registered_pages) {
- netif_err(priv, drv, priv->dev,
- "Reached max number of registered pages %llu > %llu\n",
- pages + priv->num_registered_pages,
- priv->max_registered_pages);
- return -EINVAL;
- }
-
qpl->id = id;
qpl->num_entries = 0;
qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
@@ -1039,7 +1032,6 @@ static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
return -ENOMEM;
qpl->num_entries++;
}
- priv->num_registered_pages += pages;
return 0;
}
@@ -1053,9 +1045,10 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
put_page(page);
}
-static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
+static void gve_free_queue_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl,
+ int id)
{
- struct gve_queue_page_list *qpl = &priv->qpls[id];
int i;
if (!qpl->pages)
@@ -1072,19 +1065,30 @@ static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
free_pages:
kvfree(qpl->pages);
qpl->pages = NULL;
- priv->num_registered_pages -= qpl->num_entries;
}
-static int gve_alloc_xdp_qpls(struct gve_priv *priv)
+static void gve_free_n_qpls(struct gve_priv *priv,
+ struct gve_queue_page_list *qpls,
+ int start_id,
+ int num_qpls)
+{
+ int i;
+
+ for (i = start_id; i < start_id + num_qpls; i++)
+ gve_free_queue_page_list(priv, &qpls[i], i);
+}
+
+static int gve_alloc_n_qpls(struct gve_priv *priv,
+ struct gve_queue_page_list *qpls,
+ int page_count,
+ int start_id,
+ int num_qpls)
{
- int start_id;
- int i, j;
int err;
+ int i;
- start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
- for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
- err = gve_alloc_queue_page_list(priv, i,
- priv->tx_pages_per_qpl);
+ for (i = start_id; i < start_id + num_qpls; i++) {
+ err = gve_alloc_queue_page_list(priv, &qpls[i], i, page_count);
if (err)
goto free_qpls;
}
@@ -1092,95 +1096,89 @@ static int gve_alloc_xdp_qpls(struct gve_priv *priv)
return 0;
free_qpls:
- for (j = start_id; j <= i; j++)
- gve_free_queue_page_list(priv, j);
+ /* Must include the failing QPL too for gve_alloc_queue_page_list fails
+ * without cleaning up.
+ */
+ gve_free_n_qpls(priv, qpls, start_id, i - start_id + 1);
return err;
}
-static int gve_alloc_qpls(struct gve_priv *priv)
+static int gve_alloc_qpls(struct gve_priv *priv,
+ struct gve_qpls_alloc_cfg *cfg)
{
- int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
+ int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues;
+ int rx_start_id, tx_num_qpls, rx_num_qpls;
+ struct gve_queue_page_list *qpls;
int page_count;
- int start_id;
- int i, j;
int err;
- if (!gve_is_qpl(priv))
+ if (cfg->raw_addressing)
return 0;
- priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
- if (!priv->qpls)
+ qpls = kvcalloc(max_queues, sizeof(*qpls), GFP_KERNEL);
+ if (!qpls)
return -ENOMEM;
- start_id = gve_tx_start_qpl_id(priv);
- page_count = priv->tx_pages_per_qpl;
- for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
- err = gve_alloc_queue_page_list(priv, i,
- page_count);
- if (err)
- goto free_qpls;
+ cfg->qpl_cfg->qpl_map_size = BITS_TO_LONGS(max_queues) *
+ sizeof(unsigned long) * BITS_PER_BYTE;
+ cfg->qpl_cfg->qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!cfg->qpl_cfg->qpl_id_map) {
+ err = -ENOMEM;
+ goto free_qpl_array;
}
- start_id = gve_rx_start_qpl_id(priv);
+ /* Allocate TX QPLs */
+ page_count = priv->tx_pages_per_qpl;
+ tx_num_qpls = gve_num_tx_qpls(cfg->tx_cfg, cfg->num_xdp_queues,
+ gve_is_qpl(priv));
+ err = gve_alloc_n_qpls(priv, qpls, page_count, 0, tx_num_qpls);
+ if (err)
+ goto free_qpl_map;
+ /* Allocate RX QPLs */
+ rx_start_id = gve_rx_start_qpl_id(cfg->tx_cfg);
/* For GQI_QPL number of pages allocated have 1:1 relationship with
* number of descriptors. For DQO, number of pages required are
* more than descriptors (because of out of order completions).
*/
- page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
- priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
- for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
- err = gve_alloc_queue_page_list(priv, i,
- page_count);
- if (err)
- goto free_qpls;
- }
-
- priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
- sizeof(unsigned long) * BITS_PER_BYTE;
- priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
- sizeof(unsigned long), GFP_KERNEL);
- if (!priv->qpl_cfg.qpl_id_map) {
- err = -ENOMEM;
- goto free_qpls;
- }
+ page_count = cfg->is_gqi ? priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
+ rx_num_qpls = gve_num_rx_qpls(cfg->rx_cfg, gve_is_qpl(priv));
+ err = gve_alloc_n_qpls(priv, qpls, page_count, rx_start_id, rx_num_qpls);
+ if (err)
+ goto free_tx_qpls;
+ cfg->qpls = qpls;
return 0;
-free_qpls:
- for (j = 0; j <= i; j++)
- gve_free_queue_page_list(priv, j);
- kvfree(priv->qpls);
- priv->qpls = NULL;
+free_tx_qpls:
+ gve_free_n_qpls(priv, qpls, 0, tx_num_qpls);
+free_qpl_map:
+ kvfree(cfg->qpl_cfg->qpl_id_map);
+ cfg->qpl_cfg->qpl_id_map = NULL;
+free_qpl_array:
+ kvfree(qpls);
return err;
}
-static void gve_free_xdp_qpls(struct gve_priv *priv)
-{
- int start_id;
- int i;
-
- start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
- for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
- gve_free_queue_page_list(priv, i);
-}
-
-static void gve_free_qpls(struct gve_priv *priv)
+static void gve_free_qpls(struct gve_priv *priv,
+ struct gve_qpls_alloc_cfg *cfg)
{
- int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
+ int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues;
+ struct gve_queue_page_list *qpls = cfg->qpls;
int i;
- if (!priv->qpls)
+ if (!qpls)
return;
- kvfree(priv->qpl_cfg.qpl_id_map);
- priv->qpl_cfg.qpl_id_map = NULL;
+ kvfree(cfg->qpl_cfg->qpl_id_map);
+ cfg->qpl_cfg->qpl_id_map = NULL;
for (i = 0; i < max_queues; i++)
- gve_free_queue_page_list(priv, i);
+ gve_free_queue_page_list(priv, &qpls[i], i);
- kvfree(priv->qpls);
- priv->qpls = NULL;
+ kvfree(qpls);
+ cfg->qpls = NULL;
}
/* Use this to schedule a reset when the device is capable of continuing
@@ -1278,58 +1276,178 @@ static void gve_unreg_xdp_info(struct gve_priv *priv)
static void gve_drain_page_cache(struct gve_priv *priv)
{
- struct page_frag_cache *nc;
int i;
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- nc = &priv->rx[i].page_cache;
- if (nc->va) {
- __page_frag_cache_drain(virt_to_page(nc->va),
- nc->pagecnt_bias);
- nc->va = NULL;
- }
+ for (i = 0; i < priv->rx_cfg.num_queues; i++)
+ page_frag_cache_drain(&priv->rx[i].page_cache);
+}
+
+static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv,
+ struct gve_qpls_alloc_cfg *cfg)
+{
+ cfg->raw_addressing = !gve_is_qpl(priv);
+ cfg->is_gqi = gve_is_gqi(priv);
+ cfg->num_xdp_queues = priv->num_xdp_queues;
+ cfg->qpl_cfg = &priv->qpl_cfg;
+ cfg->tx_cfg = &priv->tx_cfg;
+ cfg->rx_cfg = &priv->rx_cfg;
+ cfg->qpls = priv->qpls;
+}
+
+static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg)
+{
+ cfg->qcfg = &priv->rx_cfg;
+ cfg->qcfg_tx = &priv->tx_cfg;
+ cfg->raw_addressing = !gve_is_qpl(priv);
+ cfg->enable_header_split = priv->header_split_enabled;
+ cfg->qpls = priv->qpls;
+ cfg->qpl_cfg = &priv->qpl_cfg;
+ cfg->ring_size = priv->rx_desc_cnt;
+ cfg->packet_buffer_size = gve_is_gqi(priv) ?
+ GVE_DEFAULT_RX_BUFFER_SIZE :
+ priv->data_buffer_size_dqo;
+ cfg->rx = priv->rx;
+}
+
+static void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
+ struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+{
+ gve_qpls_get_curr_alloc_cfg(priv, qpls_alloc_cfg);
+ gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg);
+ gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg);
+}
+
+static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
+{
+ int i;
+
+ for (i = 0; i < num_rings; i++) {
+ if (gve_is_gqi(priv))
+ gve_rx_start_ring_gqi(priv, i);
+ else
+ gve_rx_start_ring_dqo(priv, i);
}
}
-static int gve_open(struct net_device *dev)
+static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
{
- struct gve_priv *priv = netdev_priv(dev);
+ int i;
+
+ if (!priv->rx)
+ return;
+
+ for (i = 0; i < num_rings; i++) {
+ if (gve_is_gqi(priv))
+ gve_rx_stop_ring_gqi(priv, i);
+ else
+ gve_rx_stop_ring_dqo(priv, i);
+ }
+}
+
+static void gve_queues_mem_free(struct gve_priv *priv,
+ struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+{
+ gve_free_rings(priv, tx_alloc_cfg, rx_alloc_cfg);
+ gve_free_qpls(priv, qpls_alloc_cfg);
+}
+
+static int gve_queues_mem_alloc(struct gve_priv *priv,
+ struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+{
+ int err;
+
+ err = gve_alloc_qpls(priv, qpls_alloc_cfg);
+ if (err) {
+ netif_err(priv, drv, priv->dev, "Failed to alloc QPLs\n");
+ return err;
+ }
+ tx_alloc_cfg->qpls = qpls_alloc_cfg->qpls;
+ rx_alloc_cfg->qpls = qpls_alloc_cfg->qpls;
+ err = gve_alloc_rings(priv, tx_alloc_cfg, rx_alloc_cfg);
+ if (err) {
+ netif_err(priv, drv, priv->dev, "Failed to alloc rings\n");
+ goto free_qpls;
+ }
+
+ return 0;
+
+free_qpls:
+ gve_free_qpls(priv, qpls_alloc_cfg);
+ return err;
+}
+
+static void gve_queues_mem_remove(struct gve_priv *priv)
+{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+ struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
+
+ gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
+ gve_queues_mem_free(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
+ priv->qpls = NULL;
+ priv->tx = NULL;
+ priv->rx = NULL;
+}
+
+/* The passed-in queue memory is stored into priv and the queues are made live.
+ * No memory is allocated. Passed-in memory is freed on errors.
+ */
+static int gve_queues_start(struct gve_priv *priv,
+ struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+{
+ struct net_device *dev = priv->dev;
int err;
+ /* Record new resources into priv */
+ priv->qpls = qpls_alloc_cfg->qpls;
+ priv->tx = tx_alloc_cfg->tx;
+ priv->rx = rx_alloc_cfg->rx;
+
+ /* Record new configs into priv */
+ priv->qpl_cfg = *qpls_alloc_cfg->qpl_cfg;
+ priv->tx_cfg = *tx_alloc_cfg->qcfg;
+ priv->rx_cfg = *rx_alloc_cfg->qcfg;
+ priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
+ priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
+
if (priv->xdp_prog)
priv->num_xdp_queues = priv->rx_cfg.num_queues;
else
priv->num_xdp_queues = 0;
- err = gve_alloc_qpls(priv);
- if (err)
- return err;
-
- err = gve_alloc_rings(priv);
- if (err)
- goto free_qpls;
+ gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings);
+ gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues);
+ gve_init_sync_stats(priv);
err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
if (err)
- goto free_rings;
+ goto stop_and_free_rings;
err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
if (err)
- goto free_rings;
+ goto stop_and_free_rings;
err = gve_reg_xdp_info(priv, dev);
if (err)
- goto free_rings;
+ goto stop_and_free_rings;
err = gve_register_qpls(priv);
if (err)
goto reset;
- if (!gve_is_gqi(priv)) {
- /* Hard code this for now. This may be tuned in the future for
- * performance.
- */
- priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
- }
+ priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
+ priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size;
+
err = gve_create_rings(priv);
if (err)
goto reset;
@@ -1346,32 +1464,53 @@ static int gve_open(struct net_device *dev)
priv->interface_up_cnt++;
return 0;
-free_rings:
- gve_free_rings(priv);
-free_qpls:
- gve_free_qpls(priv);
- return err;
-
reset:
- /* This must have been called from a reset due to the rtnl lock
- * so just return at this point.
- */
if (gve_get_reset_in_progress(priv))
- return err;
- /* Otherwise reset before returning */
+ goto stop_and_free_rings;
gve_reset_and_teardown(priv, true);
/* if this fails there is nothing we can do so just ignore the return */
gve_reset_recovery(priv, false);
/* return the original error */
return err;
+stop_and_free_rings:
+ gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
+ gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
+ gve_queues_mem_remove(priv);
+ return err;
}
-static int gve_close(struct net_device *dev)
+static int gve_open(struct net_device *dev)
{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+ struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
struct gve_priv *priv = netdev_priv(dev);
int err;
- netif_carrier_off(dev);
+ gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
+
+ err = gve_queues_mem_alloc(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
+ if (err)
+ return err;
+
+ /* No need to free on error: ownership of resources is lost after
+ * calling gve_queues_start.
+ */
+ err = gve_queues_start(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int gve_queues_stop(struct gve_priv *priv)
+{
+ int err;
+
+ netif_carrier_off(priv->dev);
if (gve_get_device_rings_ok(priv)) {
gve_turndown(priv);
gve_drain_page_cache(priv);
@@ -1386,8 +1525,10 @@ static int gve_close(struct net_device *dev)
del_timer_sync(&priv->stats_report_timer);
gve_unreg_xdp_info(priv);
- gve_free_rings(priv);
- gve_free_qpls(priv);
+
+ gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
+ gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
+
priv->interface_down_cnt++;
return 0;
@@ -1402,10 +1543,26 @@ err:
return gve_reset_recovery(priv, false);
}
+static int gve_close(struct net_device *dev)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ int err;
+
+ err = gve_queues_stop(priv);
+ if (err)
+ return err;
+
+ gve_queues_mem_remove(priv);
+ return 0;
+}
+
static int gve_remove_xdp_queues(struct gve_priv *priv)
{
+ int qpl_start_id;
int err;
+ qpl_start_id = gve_xdp_tx_start_queue_id(priv);
+
err = gve_destroy_xdp_rings(priv);
if (err)
return err;
@@ -1416,18 +1573,22 @@ static int gve_remove_xdp_queues(struct gve_priv *priv)
gve_unreg_xdp_info(priv);
gve_free_xdp_rings(priv);
- gve_free_xdp_qpls(priv);
+
+ gve_free_n_qpls(priv, priv->qpls, qpl_start_id, gve_num_xdp_qpls(priv));
priv->num_xdp_queues = 0;
return 0;
}
static int gve_add_xdp_queues(struct gve_priv *priv)
{
+ int start_id;
int err;
- priv->num_xdp_queues = priv->tx_cfg.num_queues;
+ priv->num_xdp_queues = priv->rx_cfg.num_queues;
- err = gve_alloc_xdp_qpls(priv);
+ start_id = gve_xdp_tx_start_queue_id(priv);
+ err = gve_alloc_n_qpls(priv, priv->qpls, priv->tx_pages_per_qpl,
+ start_id, gve_num_xdp_qpls(priv));
if (err)
goto err;
@@ -1452,7 +1613,7 @@ static int gve_add_xdp_queues(struct gve_priv *priv)
free_xdp_rings:
gve_free_xdp_rings(priv);
free_xdp_qpls:
- gve_free_xdp_qpls(priv);
+ gve_free_n_qpls(priv, priv->qpls, start_id, gve_num_xdp_qpls(priv));
err:
priv->num_xdp_queues = 0;
return err;
@@ -1702,42 +1863,87 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
+static int gve_adjust_config(struct gve_priv *priv,
+ struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+{
+ int err;
+
+ /* Allocate resources for the new confiugration */
+ err = gve_queues_mem_alloc(priv, qpls_alloc_cfg,
+ tx_alloc_cfg, rx_alloc_cfg);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Adjust config failed to alloc new queues");
+ return err;
+ }
+
+ /* Teardown the device and free existing resources */
+ err = gve_close(priv->dev);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Adjust config failed to close old queues");
+ gve_queues_mem_free(priv, qpls_alloc_cfg,
+ tx_alloc_cfg, rx_alloc_cfg);
+ return err;
+ }
+
+ /* Bring the device back up again with the new resources. */
+ err = gve_queues_start(priv, qpls_alloc_cfg,
+ tx_alloc_cfg, rx_alloc_cfg);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
+ /* No need to free on error: ownership of resources is lost after
+ * calling gve_queues_start.
+ */
+ gve_turndown(priv);
+ return err;
+ }
+
+ return 0;
+}
+
int gve_adjust_queues(struct gve_priv *priv,
struct gve_queue_config new_rx_config,
struct gve_queue_config new_tx_config)
{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+ struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
+ struct gve_qpl_config new_qpl_cfg;
int err;
- if (netif_carrier_ok(priv->dev)) {
- /* To make this process as simple as possible we teardown the
- * device, set the new configuration, and then bring the device
- * up again.
- */
- err = gve_close(priv->dev);
- /* we have already tried to reset in close,
- * just fail at this point
- */
- if (err)
- return err;
- priv->tx_cfg = new_tx_config;
- priv->rx_cfg = new_rx_config;
+ gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
- err = gve_open(priv->dev);
- if (err)
- goto err;
+ /* qpl_cfg is not read-only, it contains a map that gets updated as
+ * rings are allocated, which is why we cannot use the yet unreleased
+ * one in priv.
+ */
+ qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg;
+ tx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
+ rx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
+
+ /* Relay the new config from ethtool */
+ qpls_alloc_cfg.tx_cfg = &new_tx_config;
+ tx_alloc_cfg.qcfg = &new_tx_config;
+ rx_alloc_cfg.qcfg_tx = &new_tx_config;
+ qpls_alloc_cfg.rx_cfg = &new_rx_config;
+ rx_alloc_cfg.qcfg = &new_rx_config;
+ tx_alloc_cfg.num_rings = new_tx_config.num_queues;
- return 0;
+ if (netif_carrier_ok(priv->dev)) {
+ err = gve_adjust_config(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
+ return err;
}
/* Set the config for the next up. */
priv->tx_cfg = new_tx_config;
priv->rx_cfg = new_rx_config;
return 0;
-err:
- netif_err(priv, drv, priv->dev,
- "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
- gve_turndown(priv);
- return err;
}
static void gve_turndown(struct gve_priv *priv)
@@ -1853,40 +2059,91 @@ out:
priv->tx_timeo_cnt++;
}
+u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
+{
+ if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE)
+ return GVE_MAX_RX_BUFFER_SIZE;
+ else
+ return GVE_DEFAULT_RX_BUFFER_SIZE;
+}
+
+/* header-split is not supported on non-DQO_RDA yet even if device advertises it */
+bool gve_header_split_supported(const struct gve_priv *priv)
+{
+ return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
+}
+
+int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
+{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+ struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
+ bool enable_hdr_split;
+ int err = 0;
+
+ if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
+ return 0;
+
+ if (!gve_header_split_supported(priv)) {
+ dev_err(&priv->pdev->dev, "Header-split not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
+ enable_hdr_split = true;
+ else
+ enable_hdr_split = false;
+
+ if (enable_hdr_split == priv->header_split_enabled)
+ return 0;
+
+ gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
+
+ rx_alloc_cfg.enable_header_split = enable_hdr_split;
+ rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split);
+
+ if (netif_running(priv->dev))
+ err = gve_adjust_config(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
+ return err;
+}
+
static int gve_set_features(struct net_device *netdev,
netdev_features_t features)
{
const netdev_features_t orig_features = netdev->features;
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+ struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
struct gve_priv *priv = netdev_priv(netdev);
+ struct gve_qpl_config new_qpl_cfg;
int err;
+ gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
+ /* qpl_cfg is not read-only, it contains a map that gets updated as
+ * rings are allocated, which is why we cannot use the yet unreleased
+ * one in priv.
+ */
+ qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg;
+ tx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
+ rx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
+
if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
netdev->features ^= NETIF_F_LRO;
if (netif_carrier_ok(netdev)) {
- /* To make this process as simple as possible we
- * teardown the device, set the new configuration,
- * and then bring the device up again.
- */
- err = gve_close(netdev);
- /* We have already tried to reset in close, just fail
- * at this point.
- */
- if (err)
- goto err;
-
- err = gve_open(netdev);
- if (err)
- goto err;
+ err = gve_adjust_config(priv, &qpls_alloc_cfg,
+ &tx_alloc_cfg, &rx_alloc_cfg);
+ if (err) {
+ /* Revert the change on error. */
+ netdev->features = orig_features;
+ return err;
+ }
}
}
return 0;
-err:
- /* Reverts the change on error. */
- netdev->features = orig_features;
- netif_err(priv, drv, netdev,
- "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
- return err;
}
static const struct net_device_ops gve_netdev_ops = {
@@ -2051,6 +2308,8 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
goto err;
}
+ priv->num_registered_pages = 0;
+
if (skip_describe_device)
goto setup_device;
@@ -2080,7 +2339,6 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
if (!gve_is_gqi(priv))
netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
- priv->num_registered_pages = 0;
priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
/* gvnic has one Notification Block per MSI-x vector, except for the
* management vector
@@ -2297,6 +2555,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
priv->service_task_flags = 0x0;
priv->state_flags = 0x0;
priv->ethtool_flags = 0x0;
+ priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
+ priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
gve_set_probe_in_progress(priv);
priv->gve_wq = alloc_ordered_workqueue("gve", 0);
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 7a8dc5386fff..20f5a9e7fae9 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -23,7 +23,9 @@ static void gve_rx_free_buffer(struct device *dev,
gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
}
-static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
+static void gve_rx_unfill_pages(struct gve_priv *priv,
+ struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
u32 slots = rx->mask + 1;
int i;
@@ -36,7 +38,7 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
for (i = 0; i < slots; i++)
page_ref_sub(rx->data.page_info[i].page,
rx->data.page_info[i].pagecnt_bias - 1);
- gve_unassign_qpl(priv, rx->data.qpl->id);
+ gve_unassign_qpl(cfg->qpl_cfg, rx->data.qpl->id);
rx->data.qpl = NULL;
for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) {
@@ -49,16 +51,26 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
rx->data.page_info = NULL;
}
-static void gve_rx_free_ring(struct gve_priv *priv, int idx)
+void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx)
+{
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+
+ if (!gve_rx_was_added_to_block(priv, idx))
+ return;
+
+ gve_remove_napi(priv, ntfy_idx);
+ gve_rx_remove_from_block(priv, idx);
+}
+
+static void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
- struct gve_rx_ring *rx = &priv->rx[idx];
struct device *dev = &priv->pdev->dev;
u32 slots = rx->mask + 1;
+ int idx = rx->q_num;
size_t bytes;
- gve_rx_remove_from_block(priv, idx);
-
- bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
+ bytes = sizeof(struct gve_rx_desc) * cfg->ring_size;
dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
rx->desc.desc_ring = NULL;
@@ -66,7 +78,7 @@ static void gve_rx_free_ring(struct gve_priv *priv, int idx)
rx->q_resources, rx->q_resources_bus);
rx->q_resources = NULL;
- gve_rx_unfill_pages(priv, rx);
+ gve_rx_unfill_pages(priv, rx, cfg);
bytes = sizeof(*rx->data.data_ring) * slots;
dma_free_coherent(dev, bytes, rx->data.data_ring,
@@ -93,7 +105,8 @@ static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
struct gve_rx_slot_page_info *page_info,
- union gve_rx_data_slot *data_slot)
+ union gve_rx_data_slot *data_slot,
+ struct gve_rx_ring *rx)
{
struct page *page;
dma_addr_t dma;
@@ -101,14 +114,19 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
GFP_ATOMIC);
- if (err)
+ if (err) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_buf_alloc_fail++;
+ u64_stats_update_end(&rx->statss);
return err;
+ }
gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
return 0;
}
-static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
+static int gve_rx_prefill_pages(struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
struct gve_priv *priv = rx->gve;
u32 slots;
@@ -127,7 +145,7 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
return -ENOMEM;
if (!rx->data.raw_addressing) {
- rx->data.qpl = gve_assign_rx_qpl(priv, rx->q_num);
+ rx->data.qpl = gve_assign_rx_qpl(cfg, rx->q_num);
if (!rx->data.qpl) {
kvfree(rx->data.page_info);
rx->data.page_info = NULL;
@@ -143,8 +161,9 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
&rx->data.data_ring[i].qpl_offset);
continue;
}
- err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
- &rx->data.data_ring[i]);
+ err = gve_rx_alloc_buffer(priv, &priv->pdev->dev,
+ &rx->data.page_info[i],
+ &rx->data.data_ring[i], rx);
if (err)
goto alloc_err_rda;
}
@@ -185,7 +204,7 @@ alloc_err_qpl:
page_ref_sub(rx->data.page_info[i].page,
rx->data.page_info[i].pagecnt_bias - 1);
- gve_unassign_qpl(priv, rx->data.qpl->id);
+ gve_unassign_qpl(cfg->qpl_cfg, rx->data.qpl->id);
rx->data.qpl = NULL;
return err;
@@ -207,13 +226,23 @@ static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx)
ctx->drop_pkt = false;
}
-static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
+void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx)
+{
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+
+ gve_rx_add_to_block(priv, idx);
+ gve_add_napi(priv, ntfy_idx, gve_napi_poll);
+}
+
+static int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx)
{
- struct gve_rx_ring *rx = &priv->rx[idx];
struct device *hdev = &priv->pdev->dev;
+ u32 slots = priv->rx_data_slot_cnt;
int filled_pages;
size_t bytes;
- u32 slots;
int err;
netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
@@ -223,9 +252,8 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
rx->gve = priv;
rx->q_num = idx;
- slots = priv->rx_data_slot_cnt;
rx->mask = slots - 1;
- rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
+ rx->data.raw_addressing = cfg->raw_addressing;
/* alloc rx data ring */
bytes = sizeof(*rx->data.data_ring) * slots;
@@ -246,7 +274,7 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
goto abort_with_slots;
}
- filled_pages = gve_prefill_rx_pages(rx);
+ filled_pages = gve_rx_prefill_pages(rx, cfg);
if (filled_pages < 0) {
err = -ENOMEM;
goto abort_with_copy_pool;
@@ -269,7 +297,7 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
(unsigned long)rx->data.data_bus);
/* alloc rx desc ring */
- bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
+ bytes = sizeof(struct gve_rx_desc) * cfg->ring_size;
rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
GFP_KERNEL);
if (!rx->desc.desc_ring) {
@@ -277,15 +305,11 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
goto abort_with_q_resources;
}
rx->cnt = 0;
- rx->db_threshold = priv->rx_desc_cnt / 2;
+ rx->db_threshold = slots / 2;
rx->desc.seqno = 1;
- /* Allocating half-page buffers allows page-flipping which is faster
- * than copying or allocating new pages.
- */
rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
gve_rx_ctx_clear(&rx->ctx);
- gve_rx_add_to_block(priv, idx);
return 0;
@@ -294,7 +318,7 @@ abort_with_q_resources:
rx->q_resources, rx->q_resources_bus);
rx->q_resources = NULL;
abort_filled:
- gve_rx_unfill_pages(priv, rx);
+ gve_rx_unfill_pages(priv, rx, cfg);
abort_with_copy_pool:
kvfree(rx->qpl_copy_pool);
rx->qpl_copy_pool = NULL;
@@ -306,36 +330,58 @@ abort_with_slots:
return err;
}
-int gve_rx_alloc_rings(struct gve_priv *priv)
+int gve_rx_alloc_rings_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
+ struct gve_rx_ring *rx;
int err = 0;
- int i;
+ int i, j;
+
+ if (!cfg->raw_addressing && !cfg->qpls) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot alloc QPL ring before allocing QPLs\n");
+ return -EINVAL;
+ }
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- err = gve_rx_alloc_ring(priv, i);
+ rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
+ GFP_KERNEL);
+ if (!rx)
+ return -ENOMEM;
+
+ for (i = 0; i < cfg->qcfg->num_queues; i++) {
+ err = gve_rx_alloc_ring_gqi(priv, cfg, &rx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
"Failed to alloc rx ring=%d: err=%d\n",
i, err);
- break;
+ goto cleanup;
}
}
- /* Unallocate if there was an error */
- if (err) {
- int j;
- for (j = 0; j < i; j++)
- gve_rx_free_ring(priv, j);
- }
+ cfg->rx = rx;
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++)
+ gve_rx_free_ring_gqi(priv, &rx[j], cfg);
+ kvfree(rx);
return err;
}
-void gve_rx_free_rings_gqi(struct gve_priv *priv)
+void gve_rx_free_rings_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
+ struct gve_rx_ring *rx = cfg->rx;
int i;
- for (i = 0; i < priv->rx_cfg.num_queues; i++)
- gve_rx_free_ring(priv, i);
+ if (!rx)
+ return;
+
+ for (i = 0; i < cfg->qcfg->num_queues; i++)
+ gve_rx_free_ring_gqi(priv, &rx[i], cfg);
+
+ kvfree(rx);
+ cfg->rx = NULL;
}
void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
@@ -356,7 +402,7 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
struct gve_rx_slot_page_info *page_info,
- u16 packet_buffer_size, u16 len,
+ unsigned int truesize, u16 len,
struct gve_rx_ctx *ctx)
{
u32 offset = page_info->page_offset + page_info->pad;
@@ -389,10 +435,10 @@ static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
if (skb != ctx->skb_head) {
ctx->skb_head->len += len;
ctx->skb_head->data_len += len;
- ctx->skb_head->truesize += packet_buffer_size;
+ ctx->skb_head->truesize += truesize;
}
skb_add_rx_frag(skb, num_frags, page_info->page,
- offset, len, packet_buffer_size);
+ offset, len, truesize);
return ctx->skb_head;
}
@@ -486,7 +532,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx,
memcpy(alloc_page_info.page_address, src, page_info->pad + len);
skb = gve_rx_add_frags(napi, &alloc_page_info,
- rx->packet_buffer_size,
+ PAGE_SIZE,
len, ctx);
u64_stats_update_begin(&rx->statss);
@@ -896,10 +942,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
gve_rx_free_buffer(dev, page_info, data_slot);
page_info->page = NULL;
if (gve_rx_alloc_buffer(priv, dev, page_info,
- data_slot)) {
- u64_stats_update_begin(&rx->statss);
- rx->rx_buf_alloc_fail++;
- u64_stats_update_end(&rx->statss);
+ data_slot, rx)) {
break;
}
}
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index f281e42a7ef9..8e8071308aeb 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -199,20 +199,42 @@ static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
return 0;
}
-static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
+static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+ struct device *hdev = &priv->pdev->dev;
+ int buf_count = rx->dqo.bufq.mask + 1;
+
+ if (rx->dqo.hdr_bufs.data) {
+ dma_free_coherent(hdev, priv->header_buf_size * buf_count,
+ rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
+ rx->dqo.hdr_bufs.data = NULL;
+ }
+}
+
+void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
+{
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+
+ if (!gve_rx_was_added_to_block(priv, idx))
+ return;
+
+ gve_remove_napi(priv, ntfy_idx);
+ gve_rx_remove_from_block(priv, idx);
+}
+
+static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
- struct gve_rx_ring *rx = &priv->rx[idx];
struct device *hdev = &priv->pdev->dev;
size_t completion_queue_slots;
size_t buffer_queue_slots;
+ int idx = rx->q_num;
size_t size;
int i;
completion_queue_slots = rx->dqo.complq.mask + 1;
buffer_queue_slots = rx->dqo.bufq.mask + 1;
- gve_rx_remove_from_block(priv, idx);
-
if (rx->q_resources) {
dma_free_coherent(hdev, sizeof(*rx->q_resources),
rx->q_resources, rx->q_resources_bus);
@@ -226,7 +248,7 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
}
if (rx->dqo.qpl) {
- gve_unassign_qpl(priv, rx->dqo.qpl->id);
+ gve_unassign_qpl(cfg->qpl_cfg, rx->dqo.qpl->id);
rx->dqo.qpl = NULL;
}
@@ -248,20 +270,44 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
kvfree(rx->dqo.buf_states);
rx->dqo.buf_states = NULL;
+ gve_rx_free_hdr_bufs(priv, rx);
+
netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
}
-static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
+static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+ struct device *hdev = &priv->pdev->dev;
+ int buf_count = rx->dqo.bufq.mask + 1;
+
+ rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
+ &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
+ if (!rx->dqo.hdr_bufs.data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
+{
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+
+ gve_rx_add_to_block(priv, idx);
+ gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
+}
+
+static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx)
{
- struct gve_rx_ring *rx = &priv->rx[idx];
struct device *hdev = &priv->pdev->dev;
size_t size;
int i;
- const u32 buffer_queue_slots =
- priv->queue_format == GVE_DQO_RDA_FORMAT ?
- priv->options_dqo_rda.rx_buff_ring_entries : priv->rx_desc_cnt;
- const u32 completion_queue_slots = priv->rx_desc_cnt;
+ const u32 buffer_queue_slots = cfg->raw_addressing ?
+ priv->options_dqo_rda.rx_buff_ring_entries : cfg->ring_size;
+ const u32 completion_queue_slots = cfg->ring_size;
netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
@@ -274,7 +320,7 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
rx->ctx.skb_head = NULL;
rx->ctx.skb_tail = NULL;
- rx->dqo.num_buf_states = priv->queue_format == GVE_DQO_RDA_FORMAT ?
+ rx->dqo.num_buf_states = cfg->raw_addressing ?
min_t(s16, S16_MAX, buffer_queue_slots * 4) :
priv->rx_pages_per_qpl;
rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
@@ -283,6 +329,11 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
if (!rx->dqo.buf_states)
return -ENOMEM;
+ /* Allocate header buffers for header-split */
+ if (cfg->enable_header_split)
+ if (gve_rx_alloc_hdr_bufs(priv, rx))
+ goto err;
+
/* Set up linked list of buffer IDs */
for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
rx->dqo.buf_states[i].next = i + 1;
@@ -308,8 +359,8 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
if (!rx->dqo.bufq.desc_ring)
goto err;
- if (priv->queue_format != GVE_DQO_RDA_FORMAT) {
- rx->dqo.qpl = gve_assign_rx_qpl(priv, rx->q_num);
+ if (!cfg->raw_addressing) {
+ rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx->q_num);
if (!rx->dqo.qpl)
goto err;
rx->dqo.next_qpl_page_idx = 0;
@@ -320,12 +371,10 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
if (!rx->q_resources)
goto err;
- gve_rx_add_to_block(priv, idx);
-
return 0;
err:
- gve_rx_free_ring_dqo(priv, idx);
+ gve_rx_free_ring_dqo(priv, rx, cfg);
return -ENOMEM;
}
@@ -337,13 +386,26 @@ void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
}
-int gve_rx_alloc_rings_dqo(struct gve_priv *priv)
+int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
- int err = 0;
+ struct gve_rx_ring *rx;
+ int err;
int i;
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- err = gve_rx_alloc_ring_dqo(priv, i);
+ if (!cfg->raw_addressing && !cfg->qpls) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot alloc QPL ring before allocing QPLs\n");
+ return -EINVAL;
+ }
+
+ rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
+ GFP_KERNEL);
+ if (!rx)
+ return -ENOMEM;
+
+ for (i = 0; i < cfg->qcfg->num_queues; i++) {
+ err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
"Failed to alloc rx ring=%d: err=%d\n",
@@ -352,21 +414,30 @@ int gve_rx_alloc_rings_dqo(struct gve_priv *priv)
}
}
+ cfg->rx = rx;
return 0;
err:
for (i--; i >= 0; i--)
- gve_rx_free_ring_dqo(priv, i);
-
+ gve_rx_free_ring_dqo(priv, &rx[i], cfg);
+ kvfree(rx);
return err;
}
-void gve_rx_free_rings_dqo(struct gve_priv *priv)
+void gve_rx_free_rings_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
+ struct gve_rx_ring *rx = cfg->rx;
int i;
- for (i = 0; i < priv->rx_cfg.num_queues; i++)
- gve_rx_free_ring_dqo(priv, i);
+ if (!rx)
+ return;
+
+ for (i = 0; i < cfg->qcfg->num_queues; i++)
+ gve_rx_free_ring_dqo(priv, &rx[i], cfg);
+
+ kvfree(rx);
+ cfg->rx = NULL;
}
void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
@@ -404,6 +475,10 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
desc->buf_addr = cpu_to_le64(buf_state->addr +
buf_state->page_info.page_offset);
+ if (rx->dqo.hdr_bufs.data)
+ desc->header_buf_addr =
+ cpu_to_le64(rx->dqo.hdr_bufs.addr +
+ priv->header_buf_size * bufq->tail);
bufq->tail = (bufq->tail + 1) & bufq->mask;
complq->num_free_slots--;
@@ -419,7 +494,7 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
struct gve_rx_buf_state_dqo *buf_state)
{
- const int data_buffer_size = priv->data_buffer_size_dqo;
+ const u16 data_buffer_size = priv->data_buffer_size_dqo;
int pagecount;
/* Can't reuse if we only fit one buffer per page */
@@ -606,13 +681,16 @@ static int gve_rx_append_frags(struct napi_struct *napi,
*/
static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
const struct gve_rx_compl_desc_dqo *compl_desc,
- int queue_idx)
+ u32 desc_idx, int queue_idx)
{
const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
+ const bool hbo = compl_desc->header_buffer_overflow;
const bool eop = compl_desc->end_of_packet != 0;
+ const bool hsplit = compl_desc->split_header;
struct gve_rx_buf_state_dqo *buf_state;
struct gve_priv *priv = rx->gve;
u16 buf_len;
+ u16 hdr_len;
if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
@@ -633,12 +711,35 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
}
buf_len = compl_desc->packet_len;
+ hdr_len = compl_desc->header_len;
/* Page might have not been used for awhile and was likely last written
* by a different thread.
*/
prefetch(buf_state->page_info.page);
+ /* Copy the header into the skb in the case of header split */
+ if (hsplit) {
+ int unsplit = 0;
+
+ if (hdr_len && !hbo) {
+ rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
+ rx->dqo.hdr_bufs.data +
+ desc_idx * priv->header_buf_size,
+ hdr_len);
+ if (unlikely(!rx->ctx.skb_head))
+ goto error;
+ rx->ctx.skb_tail = rx->ctx.skb_head;
+ } else {
+ unsplit = 1;
+ }
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_hsplit_pkt++;
+ rx->rx_hsplit_unsplit_pkt += unsplit;
+ rx->rx_hsplit_bytes += hdr_len;
+ u64_stats_update_end(&rx->statss);
+ }
+
/* Sync the portion of dma buffer for CPU to read. */
dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
buf_state->page_info.page_offset,
@@ -781,7 +882,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
/* Do not read data until we own the descriptor */
dma_rmb();
- err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num);
+ err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
if (err < 0) {
gve_rx_free_skb(rx);
u64_stats_update_begin(&rx->statss);
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 07ba124780df..4b9853adc113 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -196,29 +196,36 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx,
static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u32 to_do, bool try_to_wake);
-static void gve_tx_free_ring(struct gve_priv *priv, int idx)
+void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx)
{
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_tx_ring *tx = &priv->tx[idx];
+
+ if (!gve_tx_was_added_to_block(priv, idx))
+ return;
+
+ gve_remove_napi(priv, ntfy_idx);
+ gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
+ netdev_tx_reset_queue(tx->netdev_txq);
+ gve_tx_remove_from_block(priv, idx);
+}
+
+static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct gve_tx_alloc_rings_cfg *cfg)
+{
struct device *hdev = &priv->pdev->dev;
+ int idx = tx->q_num;
size_t bytes;
u32 slots;
- gve_tx_remove_from_block(priv, idx);
slots = tx->mask + 1;
- if (tx->q_num < priv->tx_cfg.num_queues) {
- gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
- netdev_tx_reset_queue(tx->netdev_txq);
- } else {
- gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt);
- }
-
dma_free_coherent(hdev, sizeof(*tx->q_resources),
tx->q_resources, tx->q_resources_bus);
tx->q_resources = NULL;
if (!tx->raw_addressing) {
gve_tx_fifo_release(priv, &tx->tx_fifo);
- gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+ gve_unassign_qpl(cfg->qpl_cfg, tx->tx_fifo.qpl->id);
tx->tx_fifo.qpl = NULL;
}
@@ -232,11 +239,23 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx)
netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
}
-static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
+void gve_tx_start_ring_gqi(struct gve_priv *priv, int idx)
{
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_tx_ring *tx = &priv->tx[idx];
+
+ gve_tx_add_to_block(priv, idx);
+
+ tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ gve_add_napi(priv, ntfy_idx, gve_napi_poll);
+}
+
+static int gve_tx_alloc_ring_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg,
+ struct gve_tx_ring *tx,
+ int idx)
+{
struct device *hdev = &priv->pdev->dev;
- u32 slots = priv->tx_desc_cnt;
size_t bytes;
/* Make sure everything is zeroed to start */
@@ -245,23 +264,23 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
spin_lock_init(&tx->xdp_lock);
tx->q_num = idx;
- tx->mask = slots - 1;
+ tx->mask = cfg->ring_size - 1;
/* alloc metadata */
- tx->info = vcalloc(slots, sizeof(*tx->info));
+ tx->info = vcalloc(cfg->ring_size, sizeof(*tx->info));
if (!tx->info)
return -ENOMEM;
/* alloc tx queue */
- bytes = sizeof(*tx->desc) * slots;
+ bytes = sizeof(*tx->desc) * cfg->ring_size;
tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL);
if (!tx->desc)
goto abort_with_info;
- tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
- tx->dev = &priv->pdev->dev;
+ tx->raw_addressing = cfg->raw_addressing;
+ tx->dev = hdev;
if (!tx->raw_addressing) {
- tx->tx_fifo.qpl = gve_assign_tx_qpl(priv, idx);
+ tx->tx_fifo.qpl = gve_assign_tx_qpl(cfg, idx);
if (!tx->tx_fifo.qpl)
goto abort_with_desc;
/* map Tx FIFO */
@@ -277,12 +296,6 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
if (!tx->q_resources)
goto abort_with_fifo;
- netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
- (unsigned long)tx->bus);
- if (idx < priv->tx_cfg.num_queues)
- tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
- gve_tx_add_to_block(priv, idx);
-
return 0;
abort_with_fifo:
@@ -290,7 +303,7 @@ abort_with_fifo:
gve_tx_fifo_release(priv, &tx->tx_fifo);
abort_with_qpl:
if (!tx->raw_addressing)
- gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+ gve_unassign_qpl(cfg->qpl_cfg, tx->tx_fifo.qpl->id);
abort_with_desc:
dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
tx->desc = NULL;
@@ -300,36 +313,73 @@ abort_with_info:
return -ENOMEM;
}
-int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings)
+int gve_tx_alloc_rings_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg)
{
+ struct gve_tx_ring *tx = cfg->tx;
int err = 0;
- int i;
+ int i, j;
+
+ if (!cfg->raw_addressing && !cfg->qpls) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot alloc QPL ring before allocing QPLs\n");
+ return -EINVAL;
+ }
+
+ if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot alloc more than the max num of Tx rings\n");
+ return -EINVAL;
+ }
+
+ if (cfg->start_idx == 0) {
+ tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
+ GFP_KERNEL);
+ if (!tx)
+ return -ENOMEM;
+ } else if (!tx) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot alloc tx rings from a nonzero start idx without tx array\n");
+ return -EINVAL;
+ }
- for (i = start_id; i < start_id + num_rings; i++) {
- err = gve_tx_alloc_ring(priv, i);
+ for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) {
+ err = gve_tx_alloc_ring_gqi(priv, cfg, &tx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
"Failed to alloc tx ring=%d: err=%d\n",
i, err);
- break;
+ goto cleanup;
}
}
- /* Unallocate if there was an error */
- if (err) {
- int j;
- for (j = start_id; j < i; j++)
- gve_tx_free_ring(priv, j);
- }
+ cfg->tx = tx;
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++)
+ gve_tx_free_ring_gqi(priv, &tx[j], cfg);
+ if (cfg->start_idx == 0)
+ kvfree(tx);
return err;
}
-void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings)
+void gve_tx_free_rings_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg)
{
+ struct gve_tx_ring *tx = cfg->tx;
int i;
- for (i = start_id; i < start_id + num_rings; i++)
- gve_tx_free_ring(priv, i);
+ if (!tx)
+ return;
+
+ for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++)
+ gve_tx_free_ring_gqi(priv, &tx[i], cfg);
+
+ if (cfg->start_idx == 0) {
+ kvfree(tx);
+ cfg->tx = NULL;
+ }
}
/* gve_tx_avail - Calculates the number of slots available in the ring
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index f59c4710f118..bc34b6cd3a3e 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -188,13 +188,27 @@ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx)
}
}
-static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx)
+void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx)
{
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_tx_ring *tx = &priv->tx[idx];
- struct device *hdev = &priv->pdev->dev;
- size_t bytes;
+ if (!gve_tx_was_added_to_block(priv, idx))
+ return;
+
+ gve_remove_napi(priv, ntfy_idx);
+ gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL);
+ netdev_tx_reset_queue(tx->netdev_txq);
+ gve_tx_clean_pending_packets(tx);
gve_tx_remove_from_block(priv, idx);
+}
+
+static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct gve_tx_alloc_rings_cfg *cfg)
+{
+ struct device *hdev = &priv->pdev->dev;
+ int idx = tx->q_num;
+ size_t bytes;
if (tx->q_resources) {
dma_free_coherent(hdev, sizeof(*tx->q_resources),
@@ -223,7 +237,7 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx)
tx->dqo.tx_qpl_buf_next = NULL;
if (tx->dqo.qpl) {
- gve_unassign_qpl(priv, tx->dqo.qpl->id);
+ gve_unassign_qpl(cfg->qpl_cfg, tx->dqo.qpl->id);
tx->dqo.qpl = NULL;
}
@@ -253,9 +267,22 @@ static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx)
return 0;
}
-static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
+void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx)
{
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_tx_ring *tx = &priv->tx[idx];
+
+ gve_tx_add_to_block(priv, idx);
+
+ tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
+}
+
+static int gve_tx_alloc_ring_dqo(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg,
+ struct gve_tx_ring *tx,
+ int idx)
+{
struct device *hdev = &priv->pdev->dev;
int num_pending_packets;
size_t bytes;
@@ -263,12 +290,11 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
memset(tx, 0, sizeof(*tx));
tx->q_num = idx;
- tx->dev = &priv->pdev->dev;
- tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ tx->dev = hdev;
atomic_set_release(&tx->dqo_compl.hw_tx_head, 0);
/* Queue sizes must be a power of 2 */
- tx->mask = priv->tx_desc_cnt - 1;
+ tx->mask = cfg->ring_size - 1;
tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ?
priv->options_dqo_rda.tx_comp_ring_entries - 1 :
tx->mask;
@@ -327,8 +353,8 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
if (!tx->q_resources)
goto err;
- if (gve_is_qpl(priv)) {
- tx->dqo.qpl = gve_assign_tx_qpl(priv, idx);
+ if (!cfg->raw_addressing) {
+ tx->dqo.qpl = gve_assign_tx_qpl(cfg, idx);
if (!tx->dqo.qpl)
goto err;
@@ -336,22 +362,45 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
goto err;
}
- gve_tx_add_to_block(priv, idx);
-
return 0;
err:
- gve_tx_free_ring_dqo(priv, idx);
+ gve_tx_free_ring_dqo(priv, tx, cfg);
return -ENOMEM;
}
-int gve_tx_alloc_rings_dqo(struct gve_priv *priv)
+int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg)
{
+ struct gve_tx_ring *tx = cfg->tx;
int err = 0;
- int i;
+ int i, j;
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
- err = gve_tx_alloc_ring_dqo(priv, i);
+ if (!cfg->raw_addressing && !cfg->qpls) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot alloc QPL ring before allocing QPLs\n");
+ return -EINVAL;
+ }
+
+ if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot alloc more than the max num of Tx rings\n");
+ return -EINVAL;
+ }
+
+ if (cfg->start_idx == 0) {
+ tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
+ GFP_KERNEL);
+ if (!tx)
+ return -ENOMEM;
+ } else if (!tx) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot alloc tx rings from a nonzero start idx without tx array\n");
+ return -EINVAL;
+ }
+
+ for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) {
+ err = gve_tx_alloc_ring_dqo(priv, cfg, &tx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
"Failed to alloc tx ring=%d: err=%d\n",
@@ -360,27 +409,32 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv)
}
}
+ cfg->tx = tx;
return 0;
err:
- for (i--; i >= 0; i--)
- gve_tx_free_ring_dqo(priv, i);
-
+ for (j = 0; j < i; j++)
+ gve_tx_free_ring_dqo(priv, &tx[j], cfg);
+ if (cfg->start_idx == 0)
+ kvfree(tx);
return err;
}
-void gve_tx_free_rings_dqo(struct gve_priv *priv)
+void gve_tx_free_rings_dqo(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg)
{
+ struct gve_tx_ring *tx = cfg->tx;
int i;
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
- struct gve_tx_ring *tx = &priv->tx[i];
+ if (!tx)
+ return;
- gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL);
- netdev_tx_reset_queue(tx->netdev_txq);
- gve_tx_clean_pending_packets(tx);
+ for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++)
+ gve_tx_free_ring_dqo(priv, &tx[i], cfg);
- gve_tx_free_ring_dqo(priv, i);
+ if (cfg->start_idx == 0) {
+ kvfree(tx);
+ cfg->tx = NULL;
}
}
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index 26e08d753270..2349750075a5 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -8,6 +8,14 @@
#include "gve_adminq.h"
#include "gve_utils.h"
+bool gve_tx_was_added_to_block(struct gve_priv *priv, int queue_idx)
+{
+ struct gve_notify_block *block =
+ &priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)];
+
+ return block->tx != NULL;
+}
+
void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx)
{
struct gve_notify_block *block =
@@ -30,6 +38,14 @@ void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
queue_idx);
}
+bool gve_rx_was_added_to_block(struct gve_priv *priv, int queue_idx)
+{
+ struct gve_notify_block *block =
+ &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
+
+ return block->rx != NULL;
+}
+
void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
{
struct gve_notify_block *block =
@@ -48,11 +64,9 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
rx->ntfy_id = ntfy_idx;
}
-struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
- struct gve_rx_slot_page_info *page_info, u16 len)
+struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi,
+ u8 *data, u16 len)
{
- void *va = page_info->page_address + page_info->page_offset +
- page_info->pad;
struct sk_buff *skb;
skb = napi_alloc_skb(napi, len);
@@ -60,12 +74,21 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
return NULL;
__skb_put(skb, len);
- skb_copy_to_linear_data_offset(skb, 0, va, len);
+ skb_copy_to_linear_data_offset(skb, 0, data, len);
skb->protocol = eth_type_trans(skb, dev);
return skb;
}
+struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
+ struct gve_rx_slot_page_info *page_info, u16 len)
+{
+ void *va = page_info->page_address + page_info->page_offset +
+ page_info->pad;
+
+ return gve_rx_copy_data(dev, napi, va, len);
+}
+
void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info)
{
page_info->pagecnt_bias--;
@@ -81,3 +104,18 @@ void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info)
page_ref_add(page_info->page, INT_MAX - pagecount);
}
}
+
+void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
+ int (*gve_poll)(struct napi_struct *, int))
+{
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+ netif_napi_add(priv->dev, &block->napi, gve_poll);
+}
+
+void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
+{
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+ netif_napi_del(&block->napi);
+}
diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h
index 324fd98a6112..bf2e9a0adb36 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.h
+++ b/drivers/net/ethernet/google/gve/gve_utils.h
@@ -11,17 +11,25 @@
#include "gve.h"
+bool gve_tx_was_added_to_block(struct gve_priv *priv, int queue_idx);
void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx);
void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx);
+bool gve_rx_was_added_to_block(struct gve_priv *priv, int queue_idx);
void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx);
void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx);
+struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi,
+ u8 *data, u16 len);
+
struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
struct gve_rx_slot_page_info *page_info, u16 len);
/* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */
void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info);
+void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
+ int (*gve_poll)(struct napi_struct *, int));
+void gve_remove_napi(struct gve_priv *priv, int ntfy_idx);
#endif /* _GVE_UTILS_H */
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index 8a1027ad340d..d4293f76d69d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -12,7 +12,9 @@
#define cls_to_ae_dev(dev) container_of(dev, struct hnae_ae_dev, cls_dev)
-static struct class *hnae_class;
+static const struct class hnae_class = {
+ .name = "hnae",
+};
static void
hnae_list_add(spinlock_t *lock, struct list_head *node, struct list_head *head)
@@ -111,7 +113,7 @@ static struct hnae_ae_dev *find_ae(const struct fwnode_handle *fwnode)
WARN_ON(!fwnode);
- dev = class_find_device(hnae_class, NULL, fwnode, __ae_match);
+ dev = class_find_device(&hnae_class, NULL, fwnode, __ae_match);
return dev ? cls_to_ae_dev(dev) : NULL;
}
@@ -415,7 +417,7 @@ int hnae_ae_register(struct hnae_ae_dev *hdev, struct module *owner)
hdev->owner = owner;
hdev->id = (int)atomic_inc_return(&id);
hdev->cls_dev.parent = hdev->dev;
- hdev->cls_dev.class = hnae_class;
+ hdev->cls_dev.class = &hnae_class;
hdev->cls_dev.release = hnae_release;
(void)dev_set_name(&hdev->cls_dev, "hnae%d", hdev->id);
ret = device_register(&hdev->cls_dev);
@@ -448,13 +450,12 @@ EXPORT_SYMBOL(hnae_ae_unregister);
static int __init hnae_init(void)
{
- hnae_class = class_create("hnae");
- return PTR_ERR_OR_ZERO(hnae_class);
+ return class_register(&hnae_class);
}
static void __exit hnae_exit(void)
{
- class_destroy(hnae_class);
+ class_unregister(&hnae_class);
}
subsys_initcall(hnae_init);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index d7e175a9cb49..f19f1e1d1f9f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -388,6 +388,7 @@ struct hnae3_dev_specs {
u16 mc_mac_size;
u32 mac_stats_num;
u8 tnl_num;
+ u8 hilink_version;
};
struct hnae3_client_ops {
@@ -819,6 +820,7 @@ struct hnae3_tc_info {
u8 max_tc; /* Total number of TCs */
u8 num_tc; /* Total number of enabled TCs */
bool mqprio_active;
+ bool mqprio_destroy;
bool dcb_ets_active;
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
index d92ad6082d8e..652d71326231 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
@@ -351,7 +351,7 @@ static int hclge_comm_cmd_csq_done(struct hclge_comm_hw *hw)
static u32 hclge_get_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
{
static const struct hclge_cmdq_tx_timeout_map cmdq_tx_timeout_map[] = {
- {HCLGE_OPC_CFG_RST_TRIGGER, HCLGE_COMM_CMDQ_TX_TIMEOUT_500MS},
+ {HCLGE_OPC_CFG_RST_TRIGGER, HCLGE_COMM_CMDQ_CFG_RST_TIMEOUT},
};
u32 i;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
index 533c19d25e4f..552396518e08 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
@@ -55,7 +55,7 @@
#define HCLGE_COMM_NIC_CMQ_DESC_NUM_S 3
#define HCLGE_COMM_NIC_CMQ_DESC_NUM 1024
#define HCLGE_COMM_CMDQ_TX_TIMEOUT_DEFAULT 30000
-#define HCLGE_COMM_CMDQ_TX_TIMEOUT_500MS 500000
+#define HCLGE_COMM_CMDQ_CFG_RST_TIMEOUT 1000000
enum hclge_opcode_type {
/* Generic commands */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
index 3b6dbf158b98..f72dc0cee30e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
@@ -76,7 +76,7 @@ static int hns3_dcbnl_ieee_delapp(struct net_device *ndev, struct dcb_app *app)
if (hns3_nic_resetting(ndev))
return -EBUSY;
- if (h->kinfo.dcb_ops->ieee_setapp)
+ if (h->kinfo.dcb_ops->ieee_delapp)
return h->kinfo.dcb_ops->ieee_delapp(h, app);
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index c083d1d10767..807eb3bbb11c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -1097,6 +1097,8 @@ hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos)
*pos += scnprintf(buf + *pos, len - *pos,
"TX timeout threshold: %d seconds\n",
dev->watchdog_timeo / HZ);
+ *pos += scnprintf(buf + *pos, len - *pos, "Hilink Version: %u\n",
+ dev_specs->hilink_version);
}
static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f1695c889d3a..19668a8d22f7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2473,9 +2473,9 @@ static netdev_features_t hns3_features_check(struct sk_buff *skb,
return features;
if (skb->encapsulation)
- len = skb_inner_transport_header(skb) - skb->data;
+ len = skb_inner_transport_offset(skb);
else
- len = skb_transport_header(skb) - skb->data;
+ len = skb_transport_offset(skb);
/* Assume L4 is 60 byte as TCP is the only protocol with a
* a flexible value, and it's max len is 60 bytes.
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 4d15eb73b972..9bb708fa42f2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -828,7 +828,8 @@ struct hclge_dev_specs_1_cmd {
__le16 mc_mac_size;
u8 rsv1[6];
u8 tnl_num;
- u8 rsv2[5];
+ u8 hilink_version;
+ u8 rsv2[4];
};
/* mac speed type defined in firmware command */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index b98301e205f7..eabbacb1c714 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -619,6 +619,8 @@ static int hclge_setup_tc(struct hnae3_handle *h,
return ret;
}
+ kinfo->tc_info.mqprio_destroy = !tc;
+
ret = hclge_notify_down_uinit(hdev);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 5ea9e59569ef..b4afb66efe5c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -645,8 +645,12 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
handle->flags |= HNAE3_SUPPORT_APP_LOOPBACK;
}
- count += 1;
- handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK;
+ if (hdev->ae_dev->dev_specs.hilink_version !=
+ HCLGE_HILINK_H60) {
+ count += 1;
+ handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK;
+ }
+
count += 1;
handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK;
count += 1;
@@ -884,7 +888,7 @@ static const struct hclge_speed_bit_map speed_bit_map[] = {
{HCLGE_MAC_SPEED_40G, HCLGE_SUPPORT_40G_BIT},
{HCLGE_MAC_SPEED_50G, HCLGE_SUPPORT_50G_BITS},
{HCLGE_MAC_SPEED_100G, HCLGE_SUPPORT_100G_BITS},
- {HCLGE_MAC_SPEED_200G, HCLGE_SUPPORT_200G_BIT},
+ {HCLGE_MAC_SPEED_200G, HCLGE_SUPPORT_200G_BITS},
};
static int hclge_get_speed_bit(u32 speed, u32 *speed_bit)
@@ -940,7 +944,7 @@ static void hclge_update_fec_support(struct hclge_mac *mac)
mac->supported);
}
-static const struct hclge_link_mode_bmap hclge_sr_link_mode_bmap[8] = {
+static const struct hclge_link_mode_bmap hclge_sr_link_mode_bmap[] = {
{HCLGE_SUPPORT_10G_BIT, ETHTOOL_LINK_MODE_10000baseSR_Full_BIT},
{HCLGE_SUPPORT_25G_BIT, ETHTOOL_LINK_MODE_25000baseSR_Full_BIT},
{HCLGE_SUPPORT_40G_BIT, ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT},
@@ -948,10 +952,12 @@ static const struct hclge_link_mode_bmap hclge_sr_link_mode_bmap[8] = {
{HCLGE_SUPPORT_50G_R1_BIT, ETHTOOL_LINK_MODE_50000baseSR_Full_BIT},
{HCLGE_SUPPORT_100G_R4_BIT, ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT},
{HCLGE_SUPPORT_100G_R2_BIT, ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT},
- {HCLGE_SUPPORT_200G_BIT, ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT},
+ {HCLGE_SUPPORT_200G_R4_EXT_BIT,
+ ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT},
+ {HCLGE_SUPPORT_200G_R4_BIT, ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT},
};
-static const struct hclge_link_mode_bmap hclge_lr_link_mode_bmap[6] = {
+static const struct hclge_link_mode_bmap hclge_lr_link_mode_bmap[] = {
{HCLGE_SUPPORT_10G_BIT, ETHTOOL_LINK_MODE_10000baseLR_Full_BIT},
{HCLGE_SUPPORT_40G_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT},
{HCLGE_SUPPORT_50G_R1_BIT, ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT},
@@ -959,11 +965,13 @@ static const struct hclge_link_mode_bmap hclge_lr_link_mode_bmap[6] = {
ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT},
{HCLGE_SUPPORT_100G_R2_BIT,
ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT},
- {HCLGE_SUPPORT_200G_BIT,
+ {HCLGE_SUPPORT_200G_R4_EXT_BIT,
+ ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT},
+ {HCLGE_SUPPORT_200G_R4_BIT,
ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT},
};
-static const struct hclge_link_mode_bmap hclge_cr_link_mode_bmap[8] = {
+static const struct hclge_link_mode_bmap hclge_cr_link_mode_bmap[] = {
{HCLGE_SUPPORT_10G_BIT, ETHTOOL_LINK_MODE_10000baseCR_Full_BIT},
{HCLGE_SUPPORT_25G_BIT, ETHTOOL_LINK_MODE_25000baseCR_Full_BIT},
{HCLGE_SUPPORT_40G_BIT, ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT},
@@ -971,10 +979,12 @@ static const struct hclge_link_mode_bmap hclge_cr_link_mode_bmap[8] = {
{HCLGE_SUPPORT_50G_R1_BIT, ETHTOOL_LINK_MODE_50000baseCR_Full_BIT},
{HCLGE_SUPPORT_100G_R4_BIT, ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT},
{HCLGE_SUPPORT_100G_R2_BIT, ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT},
- {HCLGE_SUPPORT_200G_BIT, ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT},
+ {HCLGE_SUPPORT_200G_R4_EXT_BIT,
+ ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT},
+ {HCLGE_SUPPORT_200G_R4_BIT, ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT},
};
-static const struct hclge_link_mode_bmap hclge_kr_link_mode_bmap[9] = {
+static const struct hclge_link_mode_bmap hclge_kr_link_mode_bmap[] = {
{HCLGE_SUPPORT_1G_BIT, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT},
{HCLGE_SUPPORT_10G_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
{HCLGE_SUPPORT_25G_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
@@ -983,7 +993,9 @@ static const struct hclge_link_mode_bmap hclge_kr_link_mode_bmap[9] = {
{HCLGE_SUPPORT_50G_R1_BIT, ETHTOOL_LINK_MODE_50000baseKR_Full_BIT},
{HCLGE_SUPPORT_100G_R4_BIT, ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
{HCLGE_SUPPORT_100G_R2_BIT, ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT},
- {HCLGE_SUPPORT_200G_BIT, ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT},
+ {HCLGE_SUPPORT_200G_R4_EXT_BIT,
+ ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT},
+ {HCLGE_SUPPORT_200G_R4_BIT, ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT},
};
static void hclge_convert_setting_sr(u16 speed_ability,
@@ -1154,7 +1166,7 @@ static void hclge_parse_link_mode(struct hclge_dev *hdev, u16 speed_ability)
static u32 hclge_get_max_speed(u16 speed_ability)
{
- if (speed_ability & HCLGE_SUPPORT_200G_BIT)
+ if (speed_ability & HCLGE_SUPPORT_200G_BITS)
return HCLGE_MAC_SPEED_200G;
if (speed_ability & HCLGE_SUPPORT_100G_BITS)
@@ -1350,6 +1362,7 @@ static void hclge_parse_dev_specs(struct hclge_dev *hdev,
ae_dev->dev_specs.umv_size = le16_to_cpu(req1->umv_size);
ae_dev->dev_specs.mc_mac_size = le16_to_cpu(req1->mc_mac_size);
ae_dev->dev_specs.tnl_num = req1->tnl_num;
+ ae_dev->dev_specs.hilink_version = req1->hilink_version;
}
static void hclge_check_dev_specs(struct hclge_dev *hdev)
@@ -2890,7 +2903,10 @@ static int hclge_mac_init(struct hclge_dev *hdev)
int ret;
hdev->support_sfp_query = true;
- hdev->hw.mac.duplex = HCLGE_MAC_FULL;
+
+ if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+ hdev->hw.mac.duplex = HCLGE_MAC_FULL;
+
ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed,
hdev->hw.mac.duplex, hdev->hw.mac.lane_num);
if (ret)
@@ -12092,6 +12108,8 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
return ret;
}
+ hclge_reset_tc_config(hdev);
+
ret = hclge_tm_init_hw(hdev, true);
if (ret) {
dev_err(&pdev->dev, "tm init hw fail, ret =%d\n", ret);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 51979cf71262..e821dd2f1528 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -191,9 +191,10 @@ enum HLCGE_PORT_TYPE {
#define HCLGE_SUPPORT_40G_BIT BIT(5)
#define HCLGE_SUPPORT_100M_BIT BIT(6)
#define HCLGE_SUPPORT_10M_BIT BIT(7)
-#define HCLGE_SUPPORT_200G_BIT BIT(8)
+#define HCLGE_SUPPORT_200G_R4_EXT_BIT BIT(8)
#define HCLGE_SUPPORT_50G_R1_BIT BIT(9)
#define HCLGE_SUPPORT_100G_R2_BIT BIT(10)
+#define HCLGE_SUPPORT_200G_R4_BIT BIT(11)
#define HCLGE_SUPPORT_GE \
(HCLGE_SUPPORT_1G_BIT | HCLGE_SUPPORT_100M_BIT | HCLGE_SUPPORT_10M_BIT)
@@ -201,6 +202,8 @@ enum HLCGE_PORT_TYPE {
(HCLGE_SUPPORT_50G_R2_BIT | HCLGE_SUPPORT_50G_R1_BIT)
#define HCLGE_SUPPORT_100G_BITS \
(HCLGE_SUPPORT_100G_R4_BIT | HCLGE_SUPPORT_100G_R2_BIT)
+#define HCLGE_SUPPORT_200G_BITS \
+ (HCLGE_SUPPORT_200G_R4_EXT_BIT | HCLGE_SUPPORT_200G_R4_BIT)
enum HCLGE_DEV_STATE {
HCLGE_STATE_REINITING,
@@ -253,6 +256,12 @@ enum HCLGE_MAC_DUPLEX {
HCLGE_MAC_FULL
};
+/* hilink version */
+enum hclge_hilink_version {
+ HCLGE_HILINK_H32 = 0,
+ HCLGE_HILINK_H60 = 1,
+};
+
#define QUERY_SFP_SPEED 0
#define QUERY_ACTIVE_SPEED 1
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 4b0d07ca2505..d4a0e0be7a72 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -1123,10 +1123,11 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data;
flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
- if (unlikely(!hnae3_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B))) {
+ if (unlikely(!hnae3_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B) ||
+ req->mbx_src_vfid > hdev->num_req_vfs)) {
dev_warn(&hdev->pdev->dev,
- "dropped invalid mailbox message, code = %u\n",
- req->msg.code);
+ "dropped invalid mailbox message, code = %u, vfid = %u\n",
+ req->msg.code, req->mbx_src_vfid);
/* dropping/not processing this invalid message */
crq->desc[crq->next_to_use].flag = 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
index 80a2a0073d97..507d7ce26d83 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
@@ -108,7 +108,7 @@ void hclge_ptp_get_rx_hwts(struct hnae3_handle *handle, struct sk_buff *skb,
u64 ns = nsec;
u32 sec_h;
- if (!test_bit(HCLGE_PTP_FLAG_RX_EN, &hdev->ptp->flags))
+ if (!hdev->ptp || !test_bit(HCLGE_PTP_FLAG_RX_EN, &hdev->ptp->flags))
return;
/* Since the BD does not have enough space for the higher 16 bits of
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index c58c31221762..00c3f2548bf6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -2143,3 +2143,19 @@ int hclge_tm_flush_cfg(struct hclge_dev *hdev, bool enable)
return ret;
}
+
+void hclge_reset_tc_config(struct hclge_dev *hdev)
+{
+ struct hclge_vport *vport = &hdev->vport[0];
+ struct hnae3_knic_private_info *kinfo;
+
+ kinfo = &vport->nic.kinfo;
+
+ if (!kinfo->tc_info.mqprio_destroy)
+ return;
+
+ /* clear tc info, including mqprio_destroy and mqprio_active */
+ memset(&kinfo->tc_info, 0, sizeof(kinfo->tc_info));
+ hclge_tm_schd_info_update(hdev, 0);
+ hclge_comm_rss_indir_init_cfg(hdev->ae_dev, &hdev->rss_cfg);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 53eec6df5194..0985916629d3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -277,4 +277,5 @@ int hclge_tm_get_port_shaper(struct hclge_dev *hdev,
int hclge_up_to_tc_map(struct hclge_dev *hdev);
int hclge_dscp_to_tc_map(struct hclge_dev *hdev);
int hclge_tm_flush_cfg(struct hclge_dev *hdev, bool enable);
+void hclge_reset_tc_config(struct hclge_dev *hdev);
#endif
diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c
index 5e27470c6b1e..f2d4669c81cf 100644
--- a/drivers/net/ethernet/i825xx/sun3_82586.c
+++ b/drivers/net/ethernet/i825xx/sun3_82586.c
@@ -987,7 +987,7 @@ static void sun3_82586_timeout(struct net_device *dev, unsigned int txqueue)
{
#ifdef DEBUG
printk("%s: xmitter timed out, try to restart! stat: %02x\n",dev->name,p->scb->cus);
- printk("%s: command-stats: %04x %04x\n",dev->name,swab16(p->xmit_cmds[0]->cmd_status),swab16(p->xmit_cmds[1]->cmd_status));
+ printk("%s: command-stats: %04x\n", dev->name, swab16(p->xmit_cmds[0]->cmd_status));
printk("%s: check, whether you set the right interrupt number!\n",dev->name);
#endif
sun3_82586_close(dev);
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index d55638ad8704..639fbb12bd35 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -368,6 +368,15 @@ config IGC
To compile this driver as a module, choose M here. The module
will be called igc.
+
+config IGC_LEDS
+ def_bool LEDS_TRIGGER_NETDEV
+ depends on IGC && LEDS_CLASS
+ depends on LEDS_CLASS=y || IGC=m
+ help
+ Optional support for controlling the NIC LED's with the netdev
+ LED trigger.
+
config IDPF
tristate "Intel(R) Infrastructure Data Path Function Support"
depends on PCI_MSI
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 01f0f12035ca..3fcb8daaa243 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -171,8 +171,8 @@ static int debug = 3;
static int eeprom_bad_csum_allow = 0;
static int use_io = 0;
module_param(debug, int, 0);
-module_param(eeprom_bad_csum_allow, int, 0);
-module_param(use_io, int, 0);
+module_param(eeprom_bad_csum_allow, int, 0444);
+module_param(use_io, int, 0444);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
MODULE_PARM_DESC(eeprom_bad_csum_allow, "Allow bad eeprom checksums");
MODULE_PARM_DESC(use_io, "Force use of i/o access mode");
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index a187582d2299..ba9c19e6994c 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -360,23 +360,43 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca);
* As a result, a shift of INCVALUE_SHIFT_n is used to fit a value of
* INCVALUE_n into the TIMINCA register allowing 32+8+(24-INCVALUE_SHIFT_n)
* bits to count nanoseconds leaving the rest for fractional nonseconds.
+ *
+ * Any given INCVALUE also has an associated maximum adjustment value. This
+ * maximum adjustment value is the largest increase (or decrease) which can be
+ * safely applied without overflowing the INCVALUE. Since INCVALUE has
+ * a maximum range of 24 bits, its largest value is 0xFFFFFF.
+ *
+ * To understand where the maximum value comes from, consider the following
+ * equation:
+ *
+ * new_incval = base_incval + (base_incval * adjustment) / 1billion
+ *
+ * To avoid overflow that means:
+ * max_incval = base_incval + (base_incval * max_adj) / billion
+ *
+ * Re-arranging:
+ * max_adj = floor(((max_incval - base_incval) * 1billion) / 1billion)
*/
#define INCVALUE_96MHZ 125
#define INCVALUE_SHIFT_96MHZ 17
#define INCPERIOD_SHIFT_96MHZ 2
#define INCPERIOD_96MHZ (12 >> INCPERIOD_SHIFT_96MHZ)
+#define MAX_PPB_96MHZ 23999900 /* 23,999,900 ppb */
#define INCVALUE_25MHZ 40
#define INCVALUE_SHIFT_25MHZ 18
#define INCPERIOD_25MHZ 1
+#define MAX_PPB_25MHZ 599999900 /* 599,999,900 ppb */
#define INCVALUE_24MHZ 125
#define INCVALUE_SHIFT_24MHZ 14
#define INCPERIOD_24MHZ 3
+#define MAX_PPB_24MHZ 999999999 /* 999,999,999 ppb */
#define INCVALUE_38400KHZ 26
#define INCVALUE_SHIFT_38400KHZ 19
#define INCPERIOD_38400KHZ 1
+#define MAX_PPB_38400KHZ 230769100 /* 230,769,100 ppb */
/* Another drawback of scaling the incvalue by a large factor is the
* 64-bit SYSTIM register overflows more quickly. This is dealt with
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index fc0f98ea6133..dc553c51d79a 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -2186,7 +2186,7 @@ static int e1000_get_rxnfc(struct net_device *netdev,
}
}
-static int e1000e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+static int e1000e_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
@@ -2223,16 +2223,16 @@ static int e1000e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
ret_val = e1000_read_emi_reg_locked(hw, cap_addr, &phy_data);
if (ret_val)
goto release;
- edata->supported = mmd_eee_cap_to_ethtool_sup_t(phy_data);
+ mii_eee_cap1_mod_linkmode_t(edata->supported, phy_data);
/* EEE Advertised */
- edata->advertised = mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert);
+ mii_eee_cap1_mod_linkmode_t(edata->advertised, adapter->eee_advert);
/* EEE Link Partner Advertised */
ret_val = e1000_read_emi_reg_locked(hw, lpa_addr, &phy_data);
if (ret_val)
goto release;
- edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+ mii_eee_cap1_mod_linkmode_t(edata->lp_advertised, phy_data);
/* EEE PCS Status */
ret_val = e1000_read_emi_reg_locked(hw, pcs_stat_addr, &phy_data);
@@ -2262,11 +2262,13 @@ release:
return ret_val;
}
-static int e1000e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
+static int e1000e_set_eee(struct net_device *netdev, struct ethtool_keee *edata)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = {};
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp) = {};
struct e1000_hw *hw = &adapter->hw;
- struct ethtool_eee eee_curr;
+ struct ethtool_keee eee_curr;
s32 ret_val;
ret_val = e1000e_get_eee(netdev, &eee_curr);
@@ -2283,12 +2285,17 @@ static int e1000e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
return -EINVAL;
}
- if (edata->advertised & ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) {
+ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ supported);
+
+ if (linkmode_andnot(tmp, edata->advertised, supported)) {
e_err("EEE advertisement supports only 100TX and/or 1000T full-duplex\n");
return -EINVAL;
}
- adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
+ adapter->eee_advert = linkmode_to_mii_eee_cap1_t(edata->advertised);
hw->dev_spec.ich8lan.eee_disable = !edata->eee_enabled;
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index a2788fd5f8bb..19e450a5bd31 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -2559,7 +2559,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw)
hw->phy.ops.write_reg_page(hw, BM_RAR_H(i),
(u16)(mac_reg & 0xFFFF));
hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i),
- FIELD_GET(E1000_RAH_AV, mac_reg));
+ (u16)((mac_reg & E1000_RAH_AV) >> 16));
}
e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index af5d9d97a0d6..cc8c531ec3df 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6688,14 +6688,14 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
if (adapter->hw.phy.type == e1000_phy_igp_3) {
e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
} else if (hw->mac.type >= e1000_pch_lpt) {
- if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC)))
+ if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) {
/* ULP does not support wake from unicast, multicast
* or broadcast.
*/
retval = e1000_enable_ulp_lpt_lp(hw, !runtime);
-
- if (retval)
- return retval;
+ if (retval)
+ return retval;
+ }
}
/* Ensure that the appropriate bits are set in LPI_CTRL
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index 02d871bc112a..bbcfd529399b 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -280,8 +280,17 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
switch (hw->mac.type) {
case e1000_pch2lan:
+ adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ;
+ break;
case e1000_pch_lpt:
+ if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)
+ adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ;
+ else
+ adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ;
+ break;
case e1000_pch_spt:
+ adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ;
+ break;
case e1000_pch_cnp:
case e1000_pch_tgp:
case e1000_pch_adp:
@@ -289,15 +298,14 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
case e1000_pch_lnp:
case e1000_pch_ptp:
case e1000_pch_nvp:
- if ((hw->mac.type < e1000_pch_lpt) ||
- (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) {
- adapter->ptp_clock_info.max_adj = 24000000 - 1;
- break;
- }
- fallthrough;
+ if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)
+ adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ;
+ else
+ adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ;
+ break;
case e1000_82574:
case e1000_82583:
- adapter->ptp_clock_info.max_adj = 600000000 - 1;
+ adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ;
break;
default:
break;
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 9b701615c7c6..ba24f3fa92c3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -687,6 +687,54 @@ struct i40e_pf {
};
/**
+ * __i40e_pf_next_vsi - get next valid VSI
+ * @pf: pointer to the PF struct
+ * @idx: pointer to start position number
+ *
+ * Find and return next non-NULL VSI pointer in pf->vsi array and
+ * updates idx position. Returns NULL if no VSI is found.
+ **/
+static __always_inline struct i40e_vsi *
+__i40e_pf_next_vsi(struct i40e_pf *pf, int *idx)
+{
+ while (*idx < pf->num_alloc_vsi) {
+ if (pf->vsi[*idx])
+ return pf->vsi[*idx];
+ (*idx)++;
+ }
+ return NULL;
+}
+
+#define i40e_pf_for_each_vsi(_pf, _i, _vsi) \
+ for (_i = 0, _vsi = __i40e_pf_next_vsi(_pf, &_i); \
+ _vsi; \
+ _i++, _vsi = __i40e_pf_next_vsi(_pf, &_i))
+
+/**
+ * __i40e_pf_next_veb - get next valid VEB
+ * @pf: pointer to the PF struct
+ * @idx: pointer to start position number
+ *
+ * Find and return next non-NULL VEB pointer in pf->veb array and
+ * updates idx position. Returns NULL if no VEB is found.
+ **/
+static __always_inline struct i40e_veb *
+__i40e_pf_next_veb(struct i40e_pf *pf, int *idx)
+{
+ while (*idx < I40E_MAX_VEB) {
+ if (pf->veb[*idx])
+ return pf->veb[*idx];
+ (*idx)++;
+ }
+ return NULL;
+}
+
+#define i40e_pf_for_each_veb(_pf, _i, _veb) \
+ for (_i = 0, _veb = __i40e_pf_next_veb(_pf, &_i); \
+ _veb; \
+ _i++, _veb = __i40e_pf_next_veb(_pf, &_i))
+
+/**
* i40e_mac_to_hkey - Convert a 6-byte MAC Address to a u64 hash key
* @macaddr: the MAC Address as the base key
*
@@ -735,7 +783,6 @@ struct i40e_new_mac_filter {
struct i40e_veb {
struct i40e_pf *pf;
u16 idx;
- u16 veb_idx; /* index of VEB parent */
u16 seid;
u16 uplink_seid;
u16 stats_idx; /* index of VEB parent */
@@ -1120,14 +1167,12 @@ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id);
static inline struct i40e_vsi *
i40e_find_vsi_by_type(struct i40e_pf *pf, u16 type)
{
+ struct i40e_vsi *vsi;
int i;
- for (i = 0; i < pf->num_alloc_vsi; i++) {
- struct i40e_vsi *vsi = pf->vsi[i];
-
- if (vsi && vsi->type == type)
+ i40e_pf_for_each_vsi(pf, i, vsi)
+ if (vsi->type == type)
return vsi;
- }
return NULL;
}
@@ -1309,4 +1354,40 @@ static inline struct i40e_pf *i40e_hw_to_pf(struct i40e_hw *hw)
struct device *i40e_hw_to_dev(struct i40e_hw *hw);
+/**
+ * i40e_pf_get_vsi_by_seid - find VSI by SEID
+ * @pf: pointer to a PF
+ * @seid: SEID of the VSI
+ **/
+static inline struct i40e_vsi *
+i40e_pf_get_vsi_by_seid(struct i40e_pf *pf, u16 seid)
+{
+ struct i40e_vsi *vsi;
+ int i;
+
+ i40e_pf_for_each_vsi(pf, i, vsi)
+ if (vsi->seid == seid)
+ return vsi;
+
+ return NULL;
+}
+
+/**
+ * i40e_pf_get_veb_by_seid - find VEB by SEID
+ * @pf: pointer to a PF
+ * @seid: SEID of the VSI
+ **/
+static inline struct i40e_veb *
+i40e_pf_get_veb_by_seid(struct i40e_pf *pf, u16 seid)
+{
+ struct i40e_veb *veb;
+ int i;
+
+ i40e_pf_for_each_veb(pf, i, veb)
+ if (veb->seid == seid)
+ return veb;
+
+ return NULL;
+}
+
#endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 306758428aef..b32071ee84af 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -148,8 +148,6 @@ static void i40e_client_release_qvlist(struct i40e_info *ldev)
u32 reg_idx;
qv_info = &qvlist_info->qv_info[i];
- if (!qv_info)
- continue;
reg_idx = I40E_PFINT_LNKLSTN(qv_info->v_idx - 1);
wr32(&pf->hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK);
}
@@ -576,8 +574,6 @@ static int i40e_client_setup_qvlist(struct i40e_info *ldev,
for (i = 0; i < qvlist_info->num_vectors; i++) {
qv_info = &qvlist_info->qv_info[i];
- if (!qv_info)
- continue;
v_idx = qv_info->v_idx;
/* Validate vector id belongs to this client */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 9d88ed6105fd..8db1eb0c1768 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -1523,7 +1523,7 @@ void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share,
reg = rd32(hw, I40E_PRTDCB_RETSTCC(i));
reg &= ~(I40E_PRTDCB_RETSTCC_BWSHARE_MASK |
I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK |
- I40E_PRTDCB_RETSTCC_ETSTC_SHIFT);
+ I40E_PRTDCB_RETSTCC_ETSTC_MASK);
reg |= FIELD_PREP(I40E_PRTDCB_RETSTCC_BWSHARE_MASK,
bw_share[i]);
reg |= FIELD_PREP(I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
index 6b60dc9b7736..d76497566e40 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
@@ -43,7 +43,7 @@
#define I40E_LLDP_TLV_SUBTYPE_SHIFT 0
#define I40E_LLDP_TLV_SUBTYPE_MASK (0xFF << I40E_LLDP_TLV_SUBTYPE_SHIFT)
#define I40E_LLDP_TLV_OUI_SHIFT 8
-#define I40E_LLDP_TLV_OUI_MASK (0xFFFFFF << I40E_LLDP_TLV_OUI_SHIFT)
+#define I40E_LLDP_TLV_OUI_MASK (0xFFFFFFU << I40E_LLDP_TLV_OUI_SHIFT)
/* Defines for IEEE ETS TLV */
#define I40E_IEEE_ETS_MAXTC_SHIFT 0
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
index b96a92187ab3..8aa43aefe84c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
@@ -947,16 +947,16 @@ static int i40e_dcbnl_vsi_del_app(struct i40e_vsi *vsi,
static void i40e_dcbnl_del_app(struct i40e_pf *pf,
struct i40e_dcb_app_priority_table *app)
{
+ struct i40e_vsi *vsi;
int v, err;
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- if (pf->vsi[v] && pf->vsi[v]->netdev) {
- err = i40e_dcbnl_vsi_del_app(pf->vsi[v], app);
+ i40e_pf_for_each_vsi(pf, v, vsi)
+ if (vsi->netdev) {
+ err = i40e_dcbnl_vsi_del_app(vsi, app);
dev_dbg(&pf->pdev->dev, "Deleting app for VSI seid=%d err=%d sel=%d proto=0x%x prio=%d\n",
- pf->vsi[v]->seid, err, app->selector,
+ vsi->seid, err, app->selector,
app->protocolid, app->priority);
}
- }
}
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index ef70ddbe9c2f..f9ba45f596c9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -24,31 +24,13 @@ enum ring_type {
**/
static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid)
{
- int i;
-
- if (seid < 0)
+ if (seid < 0) {
dev_info(&pf->pdev->dev, "%d: bad seid\n", seid);
- else
- for (i = 0; i < pf->num_alloc_vsi; i++)
- if (pf->vsi[i] && (pf->vsi[i]->seid == seid))
- return pf->vsi[i];
-
- return NULL;
-}
-/**
- * i40e_dbg_find_veb - searches for the veb with the given seid
- * @pf: the PF structure to search for the veb
- * @seid: seid of the veb it is searching for
- **/
-static struct i40e_veb *i40e_dbg_find_veb(struct i40e_pf *pf, int seid)
-{
- int i;
+ return NULL;
+ }
- for (i = 0; i < I40E_MAX_VEB; i++)
- if (pf->veb[i] && pf->veb[i]->seid == seid)
- return pf->veb[i];
- return NULL;
+ return i40e_pf_get_vsi_by_seid(pf, seid);
}
/**************************************************************
@@ -653,12 +635,11 @@ out:
**/
static void i40e_dbg_dump_vsi_no_seid(struct i40e_pf *pf)
{
+ struct i40e_vsi *vsi;
int i;
- for (i = 0; i < pf->num_alloc_vsi; i++)
- if (pf->vsi[i])
- dev_info(&pf->pdev->dev, "dump vsi[%d]: %d\n",
- i, pf->vsi[i]->seid);
+ i40e_pf_for_each_vsi(pf, i, vsi)
+ dev_info(&pf->pdev->dev, "dump vsi[%d]: %d\n", i, vsi->seid);
}
/**
@@ -696,15 +677,14 @@ static void i40e_dbg_dump_veb_seid(struct i40e_pf *pf, int seid)
{
struct i40e_veb *veb;
- veb = i40e_dbg_find_veb(pf, seid);
+ veb = i40e_pf_get_veb_by_seid(pf, seid);
if (!veb) {
dev_info(&pf->pdev->dev, "can't find veb %d\n", seid);
return;
}
dev_info(&pf->pdev->dev,
- "veb idx=%d,%d stats_ic=%d seid=%d uplink=%d mode=%s\n",
- veb->idx, veb->veb_idx, veb->stats_idx, veb->seid,
- veb->uplink_seid,
+ "veb idx=%d stats_ic=%d seid=%d uplink=%d mode=%s\n",
+ veb->idx, veb->stats_idx, veb->seid, veb->uplink_seid,
veb->bridge_mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
i40e_dbg_dump_eth_stats(pf, &veb->stats);
}
@@ -718,11 +698,8 @@ static void i40e_dbg_dump_veb_all(struct i40e_pf *pf)
struct i40e_veb *veb;
int i;
- for (i = 0; i < I40E_MAX_VEB; i++) {
- veb = pf->veb[i];
- if (veb)
- i40e_dbg_dump_veb_seid(pf, veb->seid);
- }
+ i40e_pf_for_each_veb(pf, i, veb)
+ i40e_dbg_dump_veb_seid(pf, veb->seid);
}
/**
@@ -851,10 +828,14 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
} else if (strncmp(cmd_buf, "add relay", 9) == 0) {
struct i40e_veb *veb;
- int uplink_seid, i;
+ u8 enabled_tc = 0x1;
+ int uplink_seid;
cnt = sscanf(&cmd_buf[9], "%i %i", &uplink_seid, &vsi_seid);
- if (cnt != 2) {
+ if (cnt == 0) {
+ uplink_seid = 0;
+ vsi_seid = 0;
+ } else if (cnt != 2) {
dev_info(&pf->pdev->dev,
"add relay: bad command string, cnt=%d\n",
cnt);
@@ -866,33 +847,36 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
goto command_write_done;
}
- vsi = i40e_dbg_find_vsi(pf, vsi_seid);
- if (!vsi) {
- dev_info(&pf->pdev->dev,
- "add relay: VSI %d not found\n", vsi_seid);
- goto command_write_done;
- }
-
- for (i = 0; i < I40E_MAX_VEB; i++)
- if (pf->veb[i] && pf->veb[i]->seid == uplink_seid)
- break;
- if (i >= I40E_MAX_VEB && uplink_seid != 0 &&
- uplink_seid != pf->mac_seid) {
+ if (uplink_seid != 0 && uplink_seid != pf->mac_seid) {
dev_info(&pf->pdev->dev,
"add relay: relay uplink %d not found\n",
uplink_seid);
goto command_write_done;
+ } else if (uplink_seid) {
+ vsi = i40e_pf_get_vsi_by_seid(pf, vsi_seid);
+ if (!vsi) {
+ dev_info(&pf->pdev->dev,
+ "add relay: VSI %d not found\n",
+ vsi_seid);
+ goto command_write_done;
+ }
+ enabled_tc = vsi->tc_config.enabled_tc;
+ } else if (vsi_seid) {
+ dev_info(&pf->pdev->dev,
+ "add relay: VSI must be 0 for floating relay\n");
+ goto command_write_done;
}
- veb = i40e_veb_setup(pf, 0, uplink_seid, vsi_seid,
- vsi->tc_config.enabled_tc);
+ veb = i40e_veb_setup(pf, 0, uplink_seid, vsi_seid, enabled_tc);
if (veb)
dev_info(&pf->pdev->dev, "added relay %d\n", veb->seid);
else
dev_info(&pf->pdev->dev, "add relay failed\n");
} else if (strncmp(cmd_buf, "del relay", 9) == 0) {
+ struct i40e_veb *veb;
int i;
+
cnt = sscanf(&cmd_buf[9], "%i", &veb_seid);
if (cnt != 1) {
dev_info(&pf->pdev->dev,
@@ -906,9 +890,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
}
/* find the veb */
- for (i = 0; i < I40E_MAX_VEB; i++)
- if (pf->veb[i] && pf->veb[i]->seid == veb_seid)
+ i40e_pf_for_each_veb(pf, i, veb)
+ if (veb->seid == veb_seid)
break;
+
if (i >= I40E_MAX_VEB) {
dev_info(&pf->pdev->dev,
"del relay: relay %d not found\n", veb_seid);
@@ -916,7 +901,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
}
dev_info(&pf->pdev->dev, "deleting relay %d\n", veb_seid);
- i40e_veb_release(pf->veb[i]);
+ i40e_veb_release(veb);
} else if (strncmp(cmd_buf, "add pvid", 8) == 0) {
unsigned int v;
int ret;
@@ -1251,8 +1236,8 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
if (cnt == 0) {
int i;
- for (i = 0; i < pf->num_alloc_vsi; i++)
- i40e_vsi_reset_stats(pf->vsi[i]);
+ i40e_pf_for_each_vsi(pf, i, vsi)
+ i40e_vsi_reset_stats(vsi);
dev_info(&pf->pdev->dev, "vsi clear stats called for all vsi's\n");
} else if (cnt == 1) {
vsi = i40e_dbg_find_vsi(pf, vsi_seid);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index c841779713f6..42e7e6cdaa6d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -5644,7 +5644,7 @@ static int i40e_get_module_eeprom(struct net_device *netdev,
return 0;
}
-static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+static int i40e_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_aq_get_phy_abilities_resp phy_cfg;
@@ -5664,16 +5664,12 @@ static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
if (phy_cfg.eee_capability == 0)
return -EOPNOTSUPP;
- edata->supported = SUPPORTED_Autoneg;
- edata->lp_advertised = edata->supported;
-
/* Get current configuration */
status = i40e_aq_get_phy_capabilities(hw, false, false, &phy_cfg, NULL);
if (status)
return -EAGAIN;
- edata->advertised = phy_cfg.eee_capability ? SUPPORTED_Autoneg : 0U;
- edata->eee_enabled = !!edata->advertised;
+ edata->eee_enabled = !!phy_cfg.eee_capability;
edata->tx_lpi_enabled = pf->stats.tx_lpi_status;
edata->eee_active = pf->stats.tx_lpi_status && pf->stats.rx_lpi_status;
@@ -5682,7 +5678,7 @@ static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
}
static int i40e_is_eee_param_supported(struct net_device *netdev,
- struct ethtool_eee *edata)
+ struct ethtool_keee *edata)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
@@ -5691,7 +5687,6 @@ static int i40e_is_eee_param_supported(struct net_device *netdev,
u32 value;
const char *name;
} param[] = {
- {edata->advertised & ~SUPPORTED_Autoneg, "advertise"},
{edata->tx_lpi_timer, "tx-timer"},
{edata->tx_lpi_enabled != pf->stats.tx_lpi_status, "tx-lpi"}
};
@@ -5709,7 +5704,7 @@ static int i40e_is_eee_param_supported(struct net_device *netdev,
return 0;
}
-static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
+static int i40e_set_eee(struct net_device *netdev, struct ethtool_keee *edata)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_aq_get_phy_abilities_resp abilities;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ae8f9f135725..f86578857e8a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -310,11 +310,12 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
**/
struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
{
+ struct i40e_vsi *vsi;
int i;
- for (i = 0; i < pf->num_alloc_vsi; i++)
- if (pf->vsi[i] && (pf->vsi[i]->id == id))
- return pf->vsi[i];
+ i40e_pf_for_each_vsi(pf, i, vsi)
+ if (vsi->id == id)
+ return vsi;
return NULL;
}
@@ -552,24 +553,19 @@ void i40e_vsi_reset_stats(struct i40e_vsi *vsi)
**/
void i40e_pf_reset_stats(struct i40e_pf *pf)
{
+ struct i40e_veb *veb;
int i;
memset(&pf->stats, 0, sizeof(pf->stats));
memset(&pf->stats_offsets, 0, sizeof(pf->stats_offsets));
pf->stat_offsets_loaded = false;
- for (i = 0; i < I40E_MAX_VEB; i++) {
- if (pf->veb[i]) {
- memset(&pf->veb[i]->stats, 0,
- sizeof(pf->veb[i]->stats));
- memset(&pf->veb[i]->stats_offsets, 0,
- sizeof(pf->veb[i]->stats_offsets));
- memset(&pf->veb[i]->tc_stats, 0,
- sizeof(pf->veb[i]->tc_stats));
- memset(&pf->veb[i]->tc_stats_offsets, 0,
- sizeof(pf->veb[i]->tc_stats_offsets));
- pf->veb[i]->stat_offsets_loaded = false;
- }
+ i40e_pf_for_each_veb(pf, i, veb) {
+ memset(&veb->stats, 0, sizeof(veb->stats));
+ memset(&veb->stats_offsets, 0, sizeof(veb->stats_offsets));
+ memset(&veb->tc_stats, 0, sizeof(veb->tc_stats));
+ memset(&veb->tc_stats_offsets, 0, sizeof(veb->tc_stats_offsets));
+ veb->stat_offsets_loaded = false;
}
pf->hw_csum_rx_error = 0;
}
@@ -2879,6 +2875,7 @@ err_no_memory_locked:
**/
static void i40e_sync_filters_subtask(struct i40e_pf *pf)
{
+ struct i40e_vsi *vsi;
int v;
if (!pf)
@@ -2890,11 +2887,10 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
return;
}
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- if (pf->vsi[v] &&
- (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED) &&
- !test_bit(__I40E_VSI_RELEASING, pf->vsi[v]->state)) {
- int ret = i40e_sync_vsi_filters(pf->vsi[v]);
+ i40e_pf_for_each_vsi(pf, v, vsi) {
+ if ((vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) &&
+ !test_bit(__I40E_VSI_RELEASING, vsi->state)) {
+ int ret = i40e_sync_vsi_filters(vsi);
if (ret) {
/* come back and try again later */
@@ -3588,40 +3584,55 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
struct i40e_hmc_obj_rxq rx_ctx;
int err = 0;
bool ok;
- int ret;
bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
/* clear the context structure first */
memset(&rx_ctx, 0, sizeof(rx_ctx));
- if (ring->vsi->type == I40E_VSI_MAIN)
- xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+ ring->rx_buf_len = vsi->rx_buf_len;
+
+ /* XDP RX-queue info only needed for RX rings exposed to XDP */
+ if (ring->vsi->type != I40E_VSI_MAIN)
+ goto skip;
+
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
+ err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->queue_index,
+ ring->q_vector->napi.napi_id,
+ ring->rx_buf_len);
+ if (err)
+ return err;
+ }
ring->xsk_pool = i40e_xsk_pool(ring);
if (ring->xsk_pool) {
- ring->rx_buf_len =
- xsk_pool_get_rx_frame_size(ring->xsk_pool);
- ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
+ ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
+ err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->queue_index,
+ ring->q_vector->napi.napi_id,
+ ring->rx_buf_len);
+ if (err)
+ return err;
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL,
NULL);
- if (ret)
- return ret;
+ if (err)
+ return err;
dev_info(&vsi->back->pdev->dev,
"Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
ring->queue_index);
} else {
- ring->rx_buf_len = vsi->rx_buf_len;
- if (ring->vsi->type == I40E_VSI_MAIN) {
- ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
- MEM_TYPE_PAGE_SHARED,
- NULL);
- if (ret)
- return ret;
- }
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err)
+ return err;
}
+skip:
xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);
rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
@@ -4911,27 +4922,23 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi)
void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
{
struct i40e_pf *pf = vsi->back;
- int pf_q, err, q_end;
+ u32 pf_q, tx_q_end, rx_q_end;
/* When port TX is suspended, don't wait */
if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state))
return i40e_vsi_stop_rings_no_wait(vsi);
- q_end = vsi->base_queue + vsi->num_queue_pairs;
- for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
- i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false);
+ tx_q_end = vsi->base_queue +
+ vsi->alloc_queue_pairs * (i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
+ for (pf_q = vsi->base_queue; pf_q < tx_q_end; pf_q++)
+ i40e_pre_tx_queue_cfg(&pf->hw, pf_q, false);
- for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) {
- err = i40e_control_wait_rx_q(pf, pf_q, false);
- if (err)
- dev_info(&pf->pdev->dev,
- "VSI seid %d Rx ring %d disable timeout\n",
- vsi->seid, pf_q);
- }
+ rx_q_end = vsi->base_queue + vsi->num_queue_pairs;
+ for (pf_q = vsi->base_queue; pf_q < rx_q_end; pf_q++)
+ i40e_control_rx_q(pf, pf_q, false);
msleep(I40E_DISABLE_TX_GAP_MSEC);
- pf_q = vsi->base_queue;
- for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+ for (pf_q = vsi->base_queue; pf_q < tx_q_end; pf_q++)
wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0);
i40e_vsi_wait_queues_disabled(vsi);
@@ -5155,6 +5162,7 @@ static void i40e_reset_interrupt_capability(struct i40e_pf *pf)
**/
static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
{
+ struct i40e_vsi *vsi;
int i;
if (test_bit(__I40E_MISC_IRQ_REQUESTED, pf->state))
@@ -5164,9 +5172,10 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
I40E_IWARP_IRQ_PILE_ID);
i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
- for (i = 0; i < pf->num_alloc_vsi; i++)
- if (pf->vsi[i])
- i40e_vsi_free_q_vectors(pf->vsi[i]);
+
+ i40e_pf_for_each_vsi(pf, i, vsi)
+ i40e_vsi_free_q_vectors(vsi);
+
i40e_reset_interrupt_capability(pf);
}
@@ -5263,12 +5272,11 @@ static void i40e_unquiesce_vsi(struct i40e_vsi *vsi)
**/
static void i40e_pf_quiesce_all_vsi(struct i40e_pf *pf)
{
+ struct i40e_vsi *vsi;
int v;
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- if (pf->vsi[v])
- i40e_quiesce_vsi(pf->vsi[v]);
- }
+ i40e_pf_for_each_vsi(pf, v, vsi)
+ i40e_quiesce_vsi(vsi);
}
/**
@@ -5277,12 +5285,11 @@ static void i40e_pf_quiesce_all_vsi(struct i40e_pf *pf)
**/
static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf)
{
+ struct i40e_vsi *vsi;
int v;
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- if (pf->vsi[v])
- i40e_unquiesce_vsi(pf->vsi[v]);
- }
+ i40e_pf_for_each_vsi(pf, v, vsi)
+ i40e_unquiesce_vsi(vsi);
}
/**
@@ -5343,14 +5350,13 @@ wait_rx:
**/
static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
{
+ struct i40e_vsi *vsi;
int v, ret = 0;
- for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
- if (pf->vsi[v]) {
- ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]);
- if (ret)
- break;
- }
+ i40e_pf_for_each_vsi(pf, v, vsi) {
+ ret = i40e_vsi_wait_queues_disabled(vsi);
+ if (ret)
+ break;
}
return ret;
@@ -6767,32 +6773,29 @@ out:
**/
static void i40e_dcb_reconfigure(struct i40e_pf *pf)
{
+ struct i40e_vsi *vsi;
+ struct i40e_veb *veb;
u8 tc_map = 0;
int ret;
- u8 v;
+ int v;
/* Enable the TCs available on PF to all VEBs */
tc_map = i40e_pf_get_tc_map(pf);
if (tc_map == I40E_DEFAULT_TRAFFIC_CLASS)
return;
- for (v = 0; v < I40E_MAX_VEB; v++) {
- if (!pf->veb[v])
- continue;
- ret = i40e_veb_config_tc(pf->veb[v], tc_map);
+ i40e_pf_for_each_veb(pf, v, veb) {
+ ret = i40e_veb_config_tc(veb, tc_map);
if (ret) {
dev_info(&pf->pdev->dev,
"Failed configuring TC for VEB seid=%d\n",
- pf->veb[v]->seid);
+ veb->seid);
/* Will try to configure as many components */
}
}
/* Update each VSI */
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- if (!pf->vsi[v])
- continue;
-
+ i40e_pf_for_each_vsi(pf, v, vsi) {
/* - Enable all TCs for the LAN VSI
* - For all others keep them at TC0 for now
*/
@@ -6801,17 +6804,17 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf)
else
tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
- ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
+ ret = i40e_vsi_config_tc(vsi, tc_map);
if (ret) {
dev_info(&pf->pdev->dev,
"Failed configuring TC for VSI seid=%d\n",
- pf->vsi[v]->seid);
+ vsi->seid);
/* Will try to configure as many components */
} else {
/* Re-configure VSI vectors based on updated TC map */
- i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
- if (pf->vsi[v]->netdev)
- i40e_dcbnl_set_all(pf->vsi[v]);
+ i40e_vsi_map_rings_to_vectors(vsi);
+ if (vsi->netdev)
+ i40e_dcbnl_set_all(vsi);
}
}
}
@@ -9246,7 +9249,9 @@ int i40e_close(struct net_device *netdev)
**/
void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
{
+ struct i40e_vsi *vsi;
u32 val;
+ int i;
/* do the biggest reset indicated */
if (reset_flags & BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED)) {
@@ -9302,29 +9307,20 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
"FW LLDP is enabled\n");
} else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) {
- int v;
-
/* Find the VSI(s) that requested a re-init */
- dev_info(&pf->pdev->dev,
- "VSI reinit requested\n");
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- struct i40e_vsi *vsi = pf->vsi[v];
+ dev_info(&pf->pdev->dev, "VSI reinit requested\n");
- if (vsi != NULL &&
- test_and_clear_bit(__I40E_VSI_REINIT_REQUESTED,
+ i40e_pf_for_each_vsi(pf, i, vsi) {
+ if (test_and_clear_bit(__I40E_VSI_REINIT_REQUESTED,
vsi->state))
- i40e_vsi_reinit_locked(pf->vsi[v]);
+ i40e_vsi_reinit_locked(vsi);
}
} else if (reset_flags & BIT_ULL(__I40E_DOWN_REQUESTED)) {
- int v;
-
/* Find the VSI(s) that needs to be brought down */
dev_info(&pf->pdev->dev, "VSI down requested\n");
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- struct i40e_vsi *vsi = pf->vsi[v];
- if (vsi != NULL &&
- test_and_clear_bit(__I40E_VSI_DOWN_REQUESTED,
+ i40e_pf_for_each_vsi(pf, i, vsi) {
+ if (test_and_clear_bit(__I40E_VSI_DOWN_REQUESTED,
vsi->state)) {
set_bit(__I40E_VSI_DOWN, vsi->state);
i40e_down(vsi);
@@ -9877,6 +9873,7 @@ static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up)
**/
static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up)
{
+ struct i40e_vsi *vsi;
struct i40e_pf *pf;
int i;
@@ -9884,15 +9881,10 @@ static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up)
return;
pf = veb->pf;
- /* depth first... */
- for (i = 0; i < I40E_MAX_VEB; i++)
- if (pf->veb[i] && (pf->veb[i]->uplink_seid == veb->seid))
- i40e_veb_link_event(pf->veb[i], link_up);
-
- /* ... now the local VSIs */
- for (i = 0; i < pf->num_alloc_vsi; i++)
- if (pf->vsi[i] && (pf->vsi[i]->uplink_seid == veb->seid))
- i40e_vsi_link_event(pf->vsi[i], link_up);
+ /* Send link event to contained VSIs */
+ i40e_pf_for_each_vsi(pf, i, vsi)
+ if (vsi->uplink_seid == veb->seid)
+ i40e_vsi_link_event(vsi, link_up);
}
/**
@@ -9984,6 +9976,8 @@ static void i40e_link_event(struct i40e_pf *pf)
**/
static void i40e_watchdog_subtask(struct i40e_pf *pf)
{
+ struct i40e_vsi *vsi;
+ struct i40e_veb *veb;
int i;
/* if interface is down do nothing */
@@ -10004,15 +9998,14 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf)
/* Update the stats for active netdevs so the network stack
* can look at updated numbers whenever it cares to
*/
- for (i = 0; i < pf->num_alloc_vsi; i++)
- if (pf->vsi[i] && pf->vsi[i]->netdev)
- i40e_update_stats(pf->vsi[i]);
+ i40e_pf_for_each_vsi(pf, i, vsi)
+ if (vsi->netdev)
+ i40e_update_stats(vsi);
if (test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags)) {
/* Update the stats for the active switching components */
- for (i = 0; i < I40E_MAX_VEB; i++)
- if (pf->veb[i])
- i40e_update_veb_stats(pf->veb[i]);
+ i40e_pf_for_each_veb(pf, i, veb)
+ i40e_update_veb_stats(veb);
}
i40e_ptp_rx_hang(pf);
@@ -10357,89 +10350,84 @@ static void i40e_config_bridge_mode(struct i40e_veb *veb)
}
/**
- * i40e_reconstitute_veb - rebuild the VEB and anything connected to it
+ * i40e_reconstitute_veb - rebuild the VEB and VSIs connected to it
* @veb: pointer to the VEB instance
*
- * This is a recursive function that first builds the attached VSIs then
- * recurses in to build the next layer of VEB. We track the connections
- * through our own index numbers because the seid's from the HW could
- * change across the reset.
+ * This is a function that builds the attached VSIs. We track the connections
+ * through our own index numbers because the seid's from the HW could change
+ * across the reset.
**/
static int i40e_reconstitute_veb(struct i40e_veb *veb)
{
struct i40e_vsi *ctl_vsi = NULL;
struct i40e_pf *pf = veb->pf;
- int v, veb_idx;
- int ret;
+ struct i40e_vsi *vsi;
+ int v, ret;
- /* build VSI that owns this VEB, temporarily attached to base VEB */
- for (v = 0; v < pf->num_alloc_vsi && !ctl_vsi; v++) {
- if (pf->vsi[v] &&
- pf->vsi[v]->veb_idx == veb->idx &&
- pf->vsi[v]->flags & I40E_VSI_FLAG_VEB_OWNER) {
- ctl_vsi = pf->vsi[v];
- break;
- }
- }
- if (!ctl_vsi) {
- dev_info(&pf->pdev->dev,
- "missing owner VSI for veb_idx %d\n", veb->idx);
- ret = -ENOENT;
- goto end_reconstitute;
+ /* As we do not maintain PV (port virtualizer) switch element then
+ * there can be only one non-floating VEB that have uplink to MAC SEID
+ * and its control VSI is the main one.
+ */
+ if (WARN_ON(veb->uplink_seid && veb->uplink_seid != pf->mac_seid)) {
+ dev_err(&pf->pdev->dev,
+ "Invalid uplink SEID for VEB %d\n", veb->idx);
+ return -ENOENT;
}
- if (ctl_vsi != pf->vsi[pf->lan_vsi])
- ctl_vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid;
- ret = i40e_add_vsi(ctl_vsi);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "rebuild of veb_idx %d owner VSI failed: %d\n",
- veb->idx, ret);
- goto end_reconstitute;
+
+ if (veb->uplink_seid == pf->mac_seid) {
+ /* Check that the LAN VSI has VEB owning flag set */
+ ctl_vsi = pf->vsi[pf->lan_vsi];
+
+ if (WARN_ON(ctl_vsi->veb_idx != veb->idx ||
+ !(ctl_vsi->flags & I40E_VSI_FLAG_VEB_OWNER))) {
+ dev_err(&pf->pdev->dev,
+ "Invalid control VSI for VEB %d\n", veb->idx);
+ return -ENOENT;
+ }
+
+ /* Add the control VSI to switch */
+ ret = i40e_add_vsi(ctl_vsi);
+ if (ret) {
+ dev_err(&pf->pdev->dev,
+ "Rebuild of owner VSI for VEB %d failed: %d\n",
+ veb->idx, ret);
+ return ret;
+ }
+
+ i40e_vsi_reset_stats(ctl_vsi);
}
- i40e_vsi_reset_stats(ctl_vsi);
/* create the VEB in the switch and move the VSI onto the VEB */
ret = i40e_add_veb(veb, ctl_vsi);
if (ret)
- goto end_reconstitute;
+ return ret;
- if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags))
- veb->bridge_mode = BRIDGE_MODE_VEB;
- else
- veb->bridge_mode = BRIDGE_MODE_VEPA;
- i40e_config_bridge_mode(veb);
+ if (veb->uplink_seid) {
+ if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags))
+ veb->bridge_mode = BRIDGE_MODE_VEB;
+ else
+ veb->bridge_mode = BRIDGE_MODE_VEPA;
+ i40e_config_bridge_mode(veb);
+ }
/* create the remaining VSIs attached to this VEB */
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- if (!pf->vsi[v] || pf->vsi[v] == ctl_vsi)
+ i40e_pf_for_each_vsi(pf, v, vsi) {
+ if (vsi == ctl_vsi)
continue;
- if (pf->vsi[v]->veb_idx == veb->idx) {
- struct i40e_vsi *vsi = pf->vsi[v];
-
+ if (vsi->veb_idx == veb->idx) {
vsi->uplink_seid = veb->seid;
ret = i40e_add_vsi(vsi);
if (ret) {
dev_info(&pf->pdev->dev,
"rebuild of vsi_idx %d failed: %d\n",
v, ret);
- goto end_reconstitute;
+ return ret;
}
i40e_vsi_reset_stats(vsi);
}
}
- /* create any VEBs attached to this VEB - RECURSION */
- for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) {
- if (pf->veb[veb_idx] && pf->veb[veb_idx]->veb_idx == veb->idx) {
- pf->veb[veb_idx]->uplink_seid = veb->seid;
- ret = i40e_reconstitute_veb(pf->veb[veb_idx]);
- if (ret)
- break;
- }
- }
-
-end_reconstitute:
return ret;
}
@@ -10707,6 +10695,7 @@ static void i40e_clean_xps_state(struct i40e_vsi *vsi)
static void i40e_prep_for_reset(struct i40e_pf *pf)
{
struct i40e_hw *hw = &pf->hw;
+ struct i40e_vsi *vsi;
int ret = 0;
u32 v;
@@ -10721,11 +10710,9 @@ static void i40e_prep_for_reset(struct i40e_pf *pf)
/* quiesce the VSIs and their queues that are not already DOWN */
i40e_pf_quiesce_all_vsi(pf);
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- if (pf->vsi[v]) {
- i40e_clean_xps_state(pf->vsi[v]);
- pf->vsi[v]->seid = 0;
- }
+ i40e_pf_for_each_vsi(pf, v, vsi) {
+ i40e_clean_xps_state(vsi);
+ vsi->seid = 0;
}
i40e_shutdown_adminq(&pf->hw);
@@ -10839,6 +10826,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
struct i40e_hw *hw = &pf->hw;
+ struct i40e_veb *veb;
int ret;
u32 val;
int v;
@@ -10980,35 +10968,29 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
*/
if (vsi->uplink_seid != pf->mac_seid) {
dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
- /* find the one VEB connected to the MAC, and find orphans */
- for (v = 0; v < I40E_MAX_VEB; v++) {
- if (!pf->veb[v])
- continue;
-
- if (pf->veb[v]->uplink_seid == pf->mac_seid ||
- pf->veb[v]->uplink_seid == 0) {
- ret = i40e_reconstitute_veb(pf->veb[v]);
- if (!ret)
- continue;
+ /* Rebuild VEBs */
+ i40e_pf_for_each_veb(pf, v, veb) {
+ ret = i40e_reconstitute_veb(veb);
+ if (!ret)
+ continue;
- /* If Main VEB failed, we're in deep doodoo,
- * so give up rebuilding the switch and set up
- * for minimal rebuild of PF VSI.
- * If orphan failed, we'll report the error
- * but try to keep going.
- */
- if (pf->veb[v]->uplink_seid == pf->mac_seid) {
- dev_info(&pf->pdev->dev,
- "rebuild of switch failed: %d, will try to set up simple PF connection\n",
- ret);
- vsi->uplink_seid = pf->mac_seid;
- break;
- } else if (pf->veb[v]->uplink_seid == 0) {
- dev_info(&pf->pdev->dev,
- "rebuild of orphan VEB failed: %d\n",
- ret);
- }
+ /* If Main VEB failed, we're in deep doodoo,
+ * so give up rebuilding the switch and set up
+ * for minimal rebuild of PF VSI.
+ * If orphan failed, we'll report the error
+ * but try to keep going.
+ */
+ if (veb->uplink_seid == pf->mac_seid) {
+ dev_info(&pf->pdev->dev,
+ "rebuild of switch failed: %d, will try to set up simple PF connection\n",
+ ret);
+ vsi->uplink_seid = pf->mac_seid;
+ break;
+ } else if (veb->uplink_seid == 0) {
+ dev_info(&pf->pdev->dev,
+ "rebuild of orphan VEB failed: %d\n",
+ ret);
}
}
}
@@ -12087,6 +12069,7 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
*/
static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
{
+ struct i40e_vsi *vsi;
int err, i;
/* We cleared the MSI and MSI-X flags when disabling the old interrupt
@@ -12103,13 +12086,12 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
/* Now that we've re-acquired IRQs, we need to remap the vectors and
* rings together again.
*/
- for (i = 0; i < pf->num_alloc_vsi; i++) {
- if (pf->vsi[i]) {
- err = i40e_vsi_alloc_q_vectors(pf->vsi[i]);
- if (err)
- goto err_unwind;
- i40e_vsi_map_rings_to_vectors(pf->vsi[i]);
- }
+ i40e_pf_for_each_vsi(pf, i, vsi) {
+ err = i40e_vsi_alloc_q_vectors(vsi);
+ if (err)
+ goto err_unwind;
+
+ i40e_vsi_map_rings_to_vectors(vsi);
}
err = i40e_setup_misc_vector(pf);
@@ -13111,19 +13093,16 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
- struct i40e_veb *veb = NULL;
struct nlattr *attr, *br_spec;
- int i, rem;
+ struct i40e_veb *veb;
+ int rem;
/* Only for PF VSI for now */
if (vsi->seid != pf->vsi[pf->lan_vsi]->seid)
return -EOPNOTSUPP;
/* Find the HW bridge for PF VSI */
- for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
- if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
- veb = pf->veb[i];
- }
+ veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid);
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (!br_spec)
@@ -13188,19 +13167,14 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
- struct i40e_veb *veb = NULL;
- int i;
+ struct i40e_veb *veb;
/* Only for PF VSI for now */
if (vsi->seid != pf->vsi[pf->lan_vsi]->seid)
return -EOPNOTSUPP;
/* Find the HW bridge for the PF VSI */
- for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
- if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
- veb = pf->veb[i];
- }
-
+ veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid);
if (!veb)
return 0;
@@ -13234,12 +13208,12 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb,
features &= ~NETIF_F_GSO_MASK;
/* MACLEN can support at most 63 words */
- len = skb_network_header(skb) - skb->data;
+ len = skb_network_offset(skb);
if (len & ~(63 * 2))
goto out_err;
/* IPLEN and EIPLEN can support at most 127 dwords */
- len = skb_transport_header(skb) - skb_network_header(skb);
+ len = skb_network_header_len(skb);
if (len & ~(127 * 4))
goto out_err;
@@ -13549,9 +13523,9 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
return err;
i40e_queue_pair_disable_irq(vsi, queue_pair);
+ i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
- i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
i40e_queue_pair_clean_rings(vsi, queue_pair);
i40e_queue_pair_reset_stats(vsi, queue_pair);
@@ -14134,7 +14108,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
{
struct i40e_mac_filter *f;
struct hlist_node *h;
- struct i40e_veb *veb = NULL;
+ struct i40e_veb *veb;
struct i40e_pf *pf;
u16 uplink_seid;
int i, n, bkt;
@@ -14198,29 +14172,28 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
/* If this was the last thing on the VEB, except for the
* controlling VSI, remove the VEB, which puts the controlling
- * VSI onto the next level down in the switch.
+ * VSI onto the uplink port.
*
* Well, okay, there's one more exception here: don't remove
- * the orphan VEBs yet. We'll wait for an explicit remove request
+ * the floating VEBs yet. We'll wait for an explicit remove request
* from up the network stack.
*/
- for (n = 0, i = 0; i < pf->num_alloc_vsi; i++) {
- if (pf->vsi[i] &&
- pf->vsi[i]->uplink_seid == uplink_seid &&
- (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
- n++; /* count the VSIs */
- }
- }
- for (i = 0; i < I40E_MAX_VEB; i++) {
- if (!pf->veb[i])
- continue;
- if (pf->veb[i]->uplink_seid == uplink_seid)
- n++; /* count the VEBs */
- if (pf->veb[i]->seid == uplink_seid)
- veb = pf->veb[i];
+ veb = i40e_pf_get_veb_by_seid(pf, uplink_seid);
+ if (veb && veb->uplink_seid) {
+ n = 0;
+
+ /* Count non-controlling VSIs present on the VEB */
+ i40e_pf_for_each_vsi(pf, i, vsi)
+ if (vsi->uplink_seid == uplink_seid &&
+ (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0)
+ n++;
+
+ /* If there is no VSI except the control one then release
+ * the VEB and put the control VSI onto VEB uplink.
+ */
+ if (!n)
+ i40e_veb_release(veb);
}
- if (n == 0 && veb && veb->uplink_seid != 0)
- i40e_veb_release(veb);
return 0;
}
@@ -14378,8 +14351,8 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
struct i40e_vsi *vsi = NULL;
struct i40e_veb *veb = NULL;
u16 alloc_queue_pairs;
- int ret, i;
int v_idx;
+ int ret;
/* The requested uplink_seid must be either
* - the PF's port seid
@@ -14394,21 +14367,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
*
* Find which uplink_seid we were given and create a new VEB if needed
*/
- for (i = 0; i < I40E_MAX_VEB; i++) {
- if (pf->veb[i] && pf->veb[i]->seid == uplink_seid) {
- veb = pf->veb[i];
- break;
- }
- }
-
+ veb = i40e_pf_get_veb_by_seid(pf, uplink_seid);
if (!veb && uplink_seid != pf->mac_seid) {
-
- for (i = 0; i < pf->num_alloc_vsi; i++) {
- if (pf->vsi[i] && pf->vsi[i]->seid == uplink_seid) {
- vsi = pf->vsi[i];
- break;
- }
- }
+ vsi = i40e_pf_get_vsi_by_seid(pf, uplink_seid);
if (!vsi) {
dev_info(&pf->pdev->dev, "no such uplink_seid %d\n",
uplink_seid);
@@ -14437,10 +14398,7 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
}
i40e_config_bridge_mode(veb);
}
- for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
- if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
- veb = pf->veb[i];
- }
+ veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid);
if (!veb) {
dev_info(&pf->pdev->dev, "couldn't add VEB\n");
return NULL;
@@ -14670,29 +14628,24 @@ static void i40e_switch_branch_release(struct i40e_veb *branch)
struct i40e_pf *pf = branch->pf;
u16 branch_seid = branch->seid;
u16 veb_idx = branch->idx;
+ struct i40e_vsi *vsi;
+ struct i40e_veb *veb;
int i;
/* release any VEBs on this VEB - RECURSION */
- for (i = 0; i < I40E_MAX_VEB; i++) {
- if (!pf->veb[i])
- continue;
- if (pf->veb[i]->uplink_seid == branch->seid)
- i40e_switch_branch_release(pf->veb[i]);
- }
+ i40e_pf_for_each_veb(pf, i, veb)
+ if (veb->uplink_seid == branch->seid)
+ i40e_switch_branch_release(veb);
/* Release the VSIs on this VEB, but not the owner VSI.
*
* NOTE: Removing the last VSI on a VEB has the SIDE EFFECT of removing
* the VEB itself, so don't use (*branch) after this loop.
*/
- for (i = 0; i < pf->num_alloc_vsi; i++) {
- if (!pf->vsi[i])
- continue;
- if (pf->vsi[i]->uplink_seid == branch_seid &&
- (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
- i40e_vsi_release(pf->vsi[i]);
- }
- }
+ i40e_pf_for_each_vsi(pf, i, vsi)
+ if (vsi->uplink_seid == branch_seid &&
+ (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0)
+ i40e_vsi_release(vsi);
/* There's one corner case where the VEB might not have been
* removed, so double check it here and remove it if needed.
@@ -14730,38 +14683,35 @@ static void i40e_veb_clear(struct i40e_veb *veb)
**/
void i40e_veb_release(struct i40e_veb *veb)
{
- struct i40e_vsi *vsi = NULL;
+ struct i40e_vsi *vsi, *vsi_it;
struct i40e_pf *pf;
int i, n = 0;
pf = veb->pf;
/* find the remaining VSI and check for extras */
- for (i = 0; i < pf->num_alloc_vsi; i++) {
- if (pf->vsi[i] && pf->vsi[i]->uplink_seid == veb->seid) {
+ i40e_pf_for_each_vsi(pf, i, vsi_it)
+ if (vsi_it->uplink_seid == veb->seid) {
+ if (vsi_it->flags & I40E_VSI_FLAG_VEB_OWNER)
+ vsi = vsi_it;
n++;
- vsi = pf->vsi[i];
}
- }
- if (n != 1) {
+
+ /* Floating VEB has to be empty and regular one must have
+ * single owner VSI.
+ */
+ if ((veb->uplink_seid && n != 1) || (!veb->uplink_seid && n != 0)) {
dev_info(&pf->pdev->dev,
"can't remove VEB %d with %d VSIs left\n",
veb->seid, n);
return;
}
- /* move the remaining VSI to uplink veb */
- vsi->flags &= ~I40E_VSI_FLAG_VEB_OWNER;
+ /* For regular VEB move the owner VSI to uplink port */
if (veb->uplink_seid) {
+ vsi->flags &= ~I40E_VSI_FLAG_VEB_OWNER;
vsi->uplink_seid = veb->uplink_seid;
- if (veb->uplink_seid == pf->mac_seid)
- vsi->veb_idx = I40E_NO_VEB;
- else
- vsi->veb_idx = veb->veb_idx;
- } else {
- /* floating VEB */
- vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid;
- vsi->veb_idx = pf->vsi[pf->lan_vsi]->veb_idx;
+ vsi->veb_idx = I40E_NO_VEB;
}
i40e_aq_delete_element(&pf->hw, veb->seid, NULL);
@@ -14779,8 +14729,8 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
bool enable_stats = !!test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags);
int ret;
- ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid,
- veb->enabled_tc, false,
+ ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi ? vsi->seid : 0,
+ veb->enabled_tc, vsi ? false : true,
&veb->seid, enable_stats, NULL);
/* get a VEB from the hardware */
@@ -14812,9 +14762,11 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
return -ENOENT;
}
- vsi->uplink_seid = veb->seid;
- vsi->veb_idx = veb->idx;
- vsi->flags |= I40E_VSI_FLAG_VEB_OWNER;
+ if (vsi) {
+ vsi->uplink_seid = veb->seid;
+ vsi->veb_idx = veb->idx;
+ vsi->flags |= I40E_VSI_FLAG_VEB_OWNER;
+ }
return 0;
}
@@ -14839,8 +14791,9 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags,
u16 uplink_seid, u16 vsi_seid,
u8 enabled_tc)
{
- struct i40e_veb *veb, *uplink_veb = NULL;
- int vsi_idx, veb_idx;
+ struct i40e_vsi *vsi = NULL;
+ struct i40e_veb *veb;
+ int veb_idx;
int ret;
/* if one seid is 0, the other must be 0 to create a floating relay */
@@ -14853,26 +14806,11 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags,
}
/* make sure there is such a vsi and uplink */
- for (vsi_idx = 0; vsi_idx < pf->num_alloc_vsi; vsi_idx++)
- if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid)
- break;
- if (vsi_idx == pf->num_alloc_vsi && vsi_seid != 0) {
- dev_info(&pf->pdev->dev, "vsi seid %d not found\n",
- vsi_seid);
- return NULL;
- }
-
- if (uplink_seid && uplink_seid != pf->mac_seid) {
- for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) {
- if (pf->veb[veb_idx] &&
- pf->veb[veb_idx]->seid == uplink_seid) {
- uplink_veb = pf->veb[veb_idx];
- break;
- }
- }
- if (!uplink_veb) {
- dev_info(&pf->pdev->dev,
- "uplink seid %d not found\n", uplink_seid);
+ if (vsi_seid) {
+ vsi = i40e_pf_get_vsi_by_seid(pf, vsi_seid);
+ if (!vsi) {
+ dev_err(&pf->pdev->dev, "vsi seid %d not found\n",
+ vsi_seid);
return NULL;
}
}
@@ -14884,14 +14822,14 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags,
veb = pf->veb[veb_idx];
veb->flags = flags;
veb->uplink_seid = uplink_seid;
- veb->veb_idx = (uplink_veb ? uplink_veb->idx : I40E_NO_VEB);
veb->enabled_tc = (enabled_tc ? enabled_tc : 0x1);
/* create the VEB in the switch */
- ret = i40e_add_veb(veb, pf->vsi[vsi_idx]);
+ ret = i40e_add_veb(veb, vsi);
if (ret)
goto err_veb;
- if (vsi_idx == pf->lan_vsi)
+
+ if (vsi && vsi->idx == pf->lan_vsi)
pf->lan_veb = veb->idx;
return veb;
@@ -14919,6 +14857,7 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf,
u16 uplink_seid = le16_to_cpu(ele->uplink_seid);
u8 element_type = ele->element_type;
u16 seid = le16_to_cpu(ele->seid);
+ struct i40e_veb *veb;
if (printconfig)
dev_info(&pf->pdev->dev,
@@ -14937,13 +14876,10 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf,
int v;
/* find existing or else empty VEB */
- for (v = 0; v < I40E_MAX_VEB; v++) {
- if (pf->veb[v] && (pf->veb[v]->seid == seid)) {
- pf->lan_veb = v;
- break;
- }
- }
- if (pf->lan_veb >= I40E_MAX_VEB) {
+ veb = i40e_pf_get_veb_by_seid(pf, seid);
+ if (veb) {
+ pf->lan_veb = veb->idx;
+ } else {
v = i40e_veb_mem_alloc(pf);
if (v < 0)
break;
@@ -14956,7 +14892,6 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf,
pf->veb[pf->lan_veb]->seid = seid;
pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid;
pf->veb[pf->lan_veb]->pf = pf;
- pf->veb[pf->lan_veb]->veb_idx = I40E_NO_VEB;
break;
case I40E_SWITCH_ELEMENT_TYPE_VSI:
if (num_reported != 1)
@@ -15619,6 +15554,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
#ifdef CONFIG_I40E_DCB
enum i40e_get_fw_lldp_status_resp lldp_status;
#endif /* CONFIG_I40E_DCB */
+ struct i40e_vsi *vsi;
struct i40e_pf *pf;
struct i40e_hw *hw;
u16 wol_nvm_bits;
@@ -15629,7 +15565,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
#endif /* CONFIG_I40E_DCB */
int err;
u32 val;
- u32 i;
err = pci_enable_device_mem(pdev);
if (err)
@@ -15979,12 +15914,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list);
/* if FDIR VSI was set up, start it now */
- for (i = 0; i < pf->num_alloc_vsi; i++) {
- if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
- i40e_vsi_open(pf->vsi[i]);
- break;
- }
- }
+ vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+ if (vsi)
+ i40e_vsi_open(vsi);
/* The driver only wants link up/down and module qualification
* reports from firmware. Note the negative logic.
@@ -16230,6 +16162,8 @@ static void i40e_remove(struct pci_dev *pdev)
{
struct i40e_pf *pf = pci_get_drvdata(pdev);
struct i40e_hw *hw = &pf->hw;
+ struct i40e_vsi *vsi;
+ struct i40e_veb *veb;
int ret_code;
int i;
@@ -16287,24 +16221,19 @@ static void i40e_remove(struct pci_dev *pdev)
/* If there is a switch structure or any orphans, remove them.
* This will leave only the PF's VSI remaining.
*/
- for (i = 0; i < I40E_MAX_VEB; i++) {
- if (!pf->veb[i])
- continue;
-
- if (pf->veb[i]->uplink_seid == pf->mac_seid ||
- pf->veb[i]->uplink_seid == 0)
- i40e_switch_branch_release(pf->veb[i]);
- }
+ i40e_pf_for_each_veb(pf, i, veb)
+ if (veb->uplink_seid == pf->mac_seid ||
+ veb->uplink_seid == 0)
+ i40e_switch_branch_release(veb);
/* Now we can shutdown the PF's VSIs, just before we kill
* adminq and hmc.
*/
- for (i = pf->num_alloc_vsi; i--;)
- if (pf->vsi[i]) {
- i40e_vsi_close(pf->vsi[i]);
- i40e_vsi_release(pf->vsi[i]);
- pf->vsi[i] = NULL;
- }
+ i40e_pf_for_each_vsi(pf, i, vsi) {
+ i40e_vsi_close(vsi);
+ i40e_vsi_release(vsi);
+ pf->vsi[i] = NULL;
+ }
i40e_cloud_filter_exit(pf);
@@ -16341,18 +16270,17 @@ unmap:
/* Clear all dynamic memory lists of rings, q_vectors, and VSIs */
rtnl_lock();
i40e_clear_interrupt_scheme(pf);
- for (i = 0; i < pf->num_alloc_vsi; i++) {
- if (pf->vsi[i]) {
- if (!test_bit(__I40E_RECOVERY_MODE, pf->state))
- i40e_vsi_clear_rings(pf->vsi[i]);
- i40e_vsi_clear(pf->vsi[i]);
- pf->vsi[i] = NULL;
- }
+ i40e_pf_for_each_vsi(pf, i, vsi) {
+ if (!test_bit(__I40E_RECOVERY_MODE, pf->state))
+ i40e_vsi_clear_rings(vsi);
+
+ i40e_vsi_clear(vsi);
+ pf->vsi[i] = NULL;
}
rtnl_unlock();
- for (i = 0; i < I40E_MAX_VEB; i++) {
- kfree(pf->veb[i]);
+ i40e_pf_for_each_veb(pf, i, veb) {
+ kfree(veb);
pf->veb[i] = NULL;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index af4269330581..ce1f11b8ad65 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -567,8 +567,7 @@ static inline bool i40e_is_fw_ver_lt(struct i40e_hw *hw, u16 maj, u16 min)
**/
static inline bool i40e_is_fw_ver_eq(struct i40e_hw *hw, u16 maj, u16 min)
{
- return (hw->aq.fw_maj_ver > maj ||
- (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min));
+ return (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min);
}
#endif /* _I40E_PROTOTYPE_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 971ba3322038..0d7177083708 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1548,7 +1548,6 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
- int err;
u64_stats_init(&rx_ring->syncp);
@@ -1569,14 +1568,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
rx_ring->next_to_process = 0;
rx_ring->next_to_use = 0;
- /* XDP RX-queue info only needed for RX rings exposed to XDP */
- if (rx_ring->vsi->type == I40E_VSI_MAIN) {
- err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
- rx_ring->queue_index, rx_ring->q_vector->napi.napi_id);
- if (err < 0)
- return err;
- }
-
rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
rx_ring->rx_bi =
@@ -2087,7 +2078,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
struct xdp_buff *xdp)
{
- u32 next = rx_ring->next_to_clean;
+ u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+ u32 next = rx_ring->next_to_clean, i = 0;
struct i40e_rx_buffer *rx_buffer;
xdp->flags = 0;
@@ -2100,10 +2092,10 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
if (!rx_buffer->page)
continue;
- if (xdp_res == I40E_XDP_CONSUMED)
- rx_buffer->pagecnt_bias++;
- else
+ if (xdp_res != I40E_XDP_CONSUMED)
i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
+ else if (i++ <= nr_frags)
+ rx_buffer->pagecnt_bias++;
/* EOP buffer will be put in i40e_clean_rx_irq() */
if (next == rx_ring->next_to_process)
@@ -2117,20 +2109,20 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
* i40e_construct_skb - Allocate skb and populate it
* @rx_ring: rx descriptor ring to transact packets on
* @xdp: xdp_buff pointing to the data
- * @nr_frags: number of buffers for the packet
*
* This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the
* skb correctly.
*/
static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
- struct xdp_buff *xdp,
- u32 nr_frags)
+ struct xdp_buff *xdp)
{
unsigned int size = xdp->data_end - xdp->data;
struct i40e_rx_buffer *rx_buffer;
+ struct skb_shared_info *sinfo;
unsigned int headlen;
struct sk_buff *skb;
+ u32 nr_frags = 0;
/* prefetch first cache line of first page */
net_prefetch(xdp->data);
@@ -2168,6 +2160,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
memcpy(__skb_put(skb, headlen), xdp->data,
ALIGN(headlen, sizeof(long)));
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
/* update all of the pointers */
size -= headlen;
@@ -2187,9 +2183,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
}
if (unlikely(xdp_buff_has_frags(xdp))) {
- struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb);
+ struct skb_shared_info *skinfo = skb_shinfo(skb);
- sinfo = xdp_get_shared_info_from_buff(xdp);
memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
sizeof(skb_frag_t) * nr_frags);
@@ -2212,17 +2207,17 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
* i40e_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @xdp: xdp_buff pointing to the data
- * @nr_frags: number of buffers for the packet
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
*/
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
- struct xdp_buff *xdp,
- u32 nr_frags)
+ struct xdp_buff *xdp)
{
unsigned int metasize = xdp->data - xdp->data_meta;
+ struct skb_shared_info *sinfo;
struct sk_buff *skb;
+ u32 nr_frags;
/* Prefetch first cache line of first page. If xdp->data_meta
* is unused, this points exactly as xdp->data, otherwise we
@@ -2231,6 +2226,11 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
*/
net_prefetch(xdp->data_meta);
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
+
/* build an skb around the page buffer */
skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
if (unlikely(!skb))
@@ -2243,9 +2243,6 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
skb_metadata_set(skb, metasize);
if (unlikely(xdp_buff_has_frags(xdp))) {
- struct skb_shared_info *sinfo;
-
- sinfo = xdp_get_shared_info_from_buff(xdp);
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
@@ -2589,9 +2586,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
total_rx_bytes += size;
} else {
if (ring_uses_build_skb(rx_ring))
- skb = i40e_build_skb(rx_ring, xdp, nfrags);
+ skb = i40e_build_skb(rx_ring, xdp);
else
- skb = i40e_construct_skb(rx_ring, xdp, nfrags);
+ skb = i40e_construct_skb(rx_ring, xdp);
/* drop if we failed to retrieve a buffer */
if (!skb) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 908cdbd3ec5d..83a34e98bdc7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -491,8 +491,6 @@ static void i40e_release_rdma_qvlist(struct i40e_vf *vf)
u32 v_idx, reg_idx, reg;
qv_info = &qvlist_info->qv_info[i];
- if (!qv_info)
- continue;
v_idx = qv_info->v_idx;
if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
/* Figure out the queue after CEQ and make that the
@@ -562,8 +560,6 @@ i40e_config_rdma_qvlist(struct i40e_vf *vf,
msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
for (i = 0; i < qvlist_info->num_vectors; i++) {
qv_info = &qvlist_info->qv_info[i];
- if (!qv_info)
- continue;
/* Validate vector id belongs to this vf */
if (!i40e_vc_isvalid_vector_id(vf, qv_info->v_idx)) {
@@ -2848,6 +2844,24 @@ error_param:
(u8 *)&stats, sizeof(stats));
}
+/**
+ * i40e_can_vf_change_mac
+ * @vf: pointer to the VF info
+ *
+ * Return true if the VF is allowed to change its MAC filters, false otherwise
+ */
+static bool i40e_can_vf_change_mac(struct i40e_vf *vf)
+{
+ /* If the VF MAC address has been set administratively (via the
+ * ndo_set_vf_mac command), then deny permission to the VF to
+ * add/delete unicast MAC addresses, unless the VF is trusted
+ */
+ if (vf->pf_set_mac && !vf->trusted)
+ return false;
+
+ return true;
+}
+
#define I40E_MAX_MACVLAN_PER_HW 3072
#define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \
(num_ports))
@@ -2907,8 +2921,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
* The VF may request to set the MAC address filter already
* assigned to it so do not return an error in that case.
*/
- if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
- !is_multicast_ether_addr(addr) && vf->pf_set_mac &&
+ if (!i40e_can_vf_change_mac(vf) &&
+ !is_multicast_ether_addr(addr) &&
!ether_addr_equal(addr, vf->default_lan_addr.addr)) {
dev_err(&pf->pdev->dev,
"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
@@ -3114,19 +3128,29 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
ret = -EINVAL;
goto error_param;
}
- if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr))
- was_unimac_deleted = true;
}
vsi = pf->vsi[vf->lan_vsi_idx];
spin_lock_bh(&vsi->mac_filter_hash_lock);
/* delete addresses from the list */
- for (i = 0; i < al->num_elements; i++)
+ for (i = 0; i < al->num_elements; i++) {
+ const u8 *addr = al->list[i].addr;
+
+ /* Allow to delete VF primary MAC only if it was not set
+ * administratively by PF or if VF is trusted.
+ */
+ if (ether_addr_equal(addr, vf->default_lan_addr.addr) &&
+ i40e_can_vf_change_mac(vf))
+ was_unimac_deleted = true;
+ else
+ continue;
+
if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
ret = -EINVAL;
spin_unlock_bh(&vsi->mac_filter_hash_lock);
goto error_param;
}
+ }
spin_unlock_bh(&vsi->mac_filter_hash_lock);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index af7d5fa6cdc1..11500003af0d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -414,7 +414,8 @@ i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first,
}
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
- virt_to_page(xdp->data_hard_start), 0, size);
+ virt_to_page(xdp->data_hard_start),
+ XDP_PACKET_HEADROOM, size);
sinfo->xdp_frags_size += size;
xsk_buff_add_frag(xdp);
@@ -498,7 +499,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
xdp_res = i40e_run_xdp_zc(rx_ring, first, xdp_prog);
i40e_handle_xdp_result_zc(rx_ring, first, rx_desc, &rx_packets,
&rx_bytes, xdp_res, &failure);
- first->flags = 0;
next_to_clean = next_to_process;
if (failure)
break;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 335fd13e86f7..ef2440f3abf8 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -2170,19 +2170,10 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
iavf_add_cloud_filter(adapter);
return 0;
}
-
- if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) {
- iavf_del_cloud_filter(adapter);
- return 0;
- }
if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) {
iavf_del_cloud_filter(adapter);
return 0;
}
- if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) {
- iavf_add_cloud_filter(adapter);
- return 0;
- }
if (adapter->aq_required & IAVF_FLAG_AQ_ADD_FDIR_FILTER) {
iavf_add_fdir_filter(adapter);
return IAVF_SUCCESS;
@@ -4423,12 +4414,12 @@ static netdev_features_t iavf_features_check(struct sk_buff *skb,
features &= ~NETIF_F_GSO_MASK;
/* MACLEN can support at most 63 words */
- len = skb_network_header(skb) - skb->data;
+ len = skb_network_offset(skb);
if (len & ~(63 * 2))
goto out_err;
/* IPLEN and EIPLEN can support at most 127 dwords */
- len = skb_transport_header(skb) - skb_network_header(skb);
+ len = skb_network_header_len(skb);
if (len & ~(127 * 4))
goto out_err;
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 367b613d92c0..365c03d1c462 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -493,7 +493,6 @@ enum ice_pf_flags {
ICE_FLAG_DCB_ENA,
ICE_FLAG_FD_ENA,
ICE_FLAG_PTP_SUPPORTED, /* PTP is supported by NVM */
- ICE_FLAG_PTP, /* PTP is enabled by software */
ICE_FLAG_ADV_FEATURES,
ICE_FLAG_TC_MQPRIO, /* support for Multi queue TC */
ICE_FLAG_CLS_FLOWER,
@@ -606,6 +605,7 @@ struct ice_pf {
wait_queue_head_t reset_wait_queue;
u32 hw_csum_rx_error;
+ u32 hw_rx_eipe_error;
u32 oicr_err_reg;
struct msi_map oicr_irq; /* Other interrupt cause MSIX vector */
struct msi_map ll_ts_irq; /* LL_TS interrupt MSIX vector */
@@ -896,6 +896,7 @@ static inline bool ice_is_adq_active(struct ice_pf *pf)
}
void ice_debugfs_fwlog_init(struct ice_pf *pf);
+void ice_debugfs_pf_deinit(struct ice_pf *pf);
void ice_debugfs_init(void);
void ice_debugfs_exit(void);
void ice_pf_fwlog_update_module(struct ice_pf *pf, int log_level, int module);
@@ -983,6 +984,8 @@ void ice_service_task_schedule(struct ice_pf *pf);
int ice_load(struct ice_pf *pf);
void ice_unload(struct ice_pf *pf);
void ice_adv_lnk_speed_maps_init(void);
+int ice_init_dev(struct ice_pf *pf);
+void ice_deinit_dev(struct ice_pf *pf);
/**
* ice_set_rdma_cap - enable RDMA support
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
index cca0e753f38f..7cee365cc7d1 100644
--- a/drivers/net/ethernet/intel/ice/ice_arfs.c
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2018-2020, Intel Corporation. */
#include "ice.h"
+#include <net/rps.h>
/**
* ice_is_arfs_active - helper to check is aRFS is active
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 533b923cae2d..d2fd315556a3 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -190,15 +190,13 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
q_vector = vsi->q_vectors[v_idx];
ice_for_each_tx_ring(tx_ring, q_vector->tx) {
- if (vsi->netdev)
- netif_queue_set_napi(vsi->netdev, tx_ring->q_index,
- NETDEV_QUEUE_TYPE_TX, NULL);
+ ice_queue_set_napi(vsi, tx_ring->q_index, NETDEV_QUEUE_TYPE_TX,
+ NULL);
tx_ring->q_vector = NULL;
}
ice_for_each_rx_ring(rx_ring, q_vector->rx) {
- if (vsi->netdev)
- netif_queue_set_napi(vsi->netdev, rx_ring->q_index,
- NETDEV_QUEUE_TYPE_RX, NULL);
+ ice_queue_set_napi(vsi, rx_ring->q_index, NETDEV_QUEUE_TYPE_RX,
+ NULL);
rx_ring->q_vector = NULL;
}
@@ -538,7 +536,7 @@ static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring)
*
* Return 0 on success and a negative value on error.
*/
-int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
+static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
{
struct device *dev = ice_pf_to_dev(ring->vsi->back);
u32 num_bufs = ICE_RX_DESC_UNUSED(ring);
@@ -547,19 +545,27 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
ring->rx_buf_len = ring->vsi->rx_buf_len;
if (ring->vsi->type == ICE_VSI_PF) {
- if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
- /* coverity[check_return] */
- __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
- ring->q_index,
- ring->q_vector->napi.napi_id,
- ring->vsi->rx_buf_len);
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
+ err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->q_index,
+ ring->q_vector->napi.napi_id,
+ ring->rx_buf_len);
+ if (err)
+ return err;
+ }
ring->xsk_pool = ice_xsk_pool(ring);
if (ring->xsk_pool) {
- xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
ring->rx_buf_len =
xsk_pool_get_rx_frame_size(ring->xsk_pool);
+ err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->q_index,
+ ring->q_vector->napi.napi_id,
+ ring->rx_buf_len);
+ if (err)
+ return err;
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL,
NULL);
@@ -571,13 +577,14 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
ring->q_index);
} else {
- if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
- /* coverity[check_return] */
- __xdp_rxq_info_reg(&ring->xdp_rxq,
- ring->netdev,
- ring->q_index,
- ring->q_vector->napi.napi_id,
- ring->vsi->rx_buf_len);
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
+ err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->q_index,
+ ring->q_vector->napi.napi_id,
+ ring->rx_buf_len);
+ if (err)
+ return err;
+ }
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
@@ -624,6 +631,62 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
return 0;
}
+int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx)
+{
+ if (q_idx >= vsi->num_rxq)
+ return -EINVAL;
+
+ return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]);
+}
+
+/**
+ * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
+ * @vsi: VSI
+ */
+static void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
+{
+ if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
+ vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX;
+ vsi->rx_buf_len = ICE_RXBUF_1664;
+#if (PAGE_SIZE < 8192)
+ } else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
+ (vsi->netdev->mtu <= ETH_DATA_LEN)) {
+ vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
+ vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
+#endif
+ } else {
+ vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+ vsi->rx_buf_len = ICE_RXBUF_3072;
+ }
+}
+
+/**
+ * ice_vsi_cfg_rxqs - Configure the VSI for Rx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Rx VSI for operation.
+ */
+int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
+{
+ u16 i;
+
+ if (vsi->type == ICE_VSI_VF)
+ goto setup_rings;
+
+ ice_vsi_cfg_frame_size(vsi);
+setup_rings:
+ /* set up individual rings */
+ ice_for_each_rxq(vsi, i) {
+ int err = ice_vsi_cfg_rxq(vsi->rx_rings[i]);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
/**
* __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
* @qs_cfg: gathered variables needed for pf->vsi queues assignment
@@ -819,7 +882,7 @@ void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
* @ring: Tx ring to be configured
* @qg_buf: queue group buffer
*/
-int
+static int
ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
struct ice_aqc_add_tx_qgrp *qg_buf)
{
@@ -890,6 +953,80 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
return 0;
}
+int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings,
+ u16 q_idx)
+{
+ DEFINE_FLEX(struct ice_aqc_add_tx_qgrp, qg_buf, txqs, 1);
+
+ if (q_idx >= vsi->alloc_txq || !tx_rings || !tx_rings[q_idx])
+ return -EINVAL;
+
+ qg_buf->num_txqs = 1;
+
+ return ice_vsi_cfg_txq(vsi, tx_rings[q_idx], qg_buf);
+}
+
+/**
+ * ice_vsi_cfg_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ * @rings: Tx ring array to be configured
+ * @count: number of Tx ring array elements
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+static int
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_tx_ring **rings, u16 count)
+{
+ DEFINE_FLEX(struct ice_aqc_add_tx_qgrp, qg_buf, txqs, 1);
+ int err = 0;
+ u16 q_idx;
+
+ qg_buf->num_txqs = 1;
+
+ for (q_idx = 0; q_idx < count; q_idx++) {
+ err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/**
+ * ice_vsi_cfg_lan_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
+{
+ return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
+}
+
+/**
+ * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx queues dedicated for XDP in given VSI for operation.
+ */
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
+{
+ int ret;
+ int i;
+
+ ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
+ if (ret)
+ return ret;
+
+ ice_for_each_rxq(vsi, i)
+ ice_tx_xsk_pool(vsi, i);
+
+ return 0;
+}
+
/**
* ice_cfg_itr - configure the initial interrupt throttle values
* @hw: pointer to the HW structure
diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h
index 17321ba75602..b711bc921928 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.h
+++ b/drivers/net/ethernet/intel/ice/ice_base.h
@@ -6,7 +6,8 @@
#include "ice.h"
-int ice_vsi_cfg_rxq(struct ice_rx_ring *ring);
+int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx);
+int ice_vsi_cfg_rxqs(struct ice_vsi *vsi);
int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
int
ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait);
@@ -14,9 +15,10 @@ int ice_vsi_wait_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi);
void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
-int
-ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
- struct ice_aqc_add_tx_qgrp *qg_buf);
+int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings,
+ u16 q_idx);
+int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector);
void
ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 10c32cd80fff..4d8111aeb0ff 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -154,6 +154,12 @@ static int ice_set_mac_type(struct ice_hw *hw)
case ICE_DEV_ID_E823L_SFP:
hw->mac_type = ICE_MAC_GENERIC;
break;
+ case ICE_DEV_ID_E825C_BACKPLANE:
+ case ICE_DEV_ID_E825C_QSFP:
+ case ICE_DEV_ID_E825C_SFP:
+ case ICE_DEV_ID_E825C_SGMII:
+ hw->mac_type = ICE_MAC_GENERIC_3K_E825;
+ break;
case ICE_DEV_ID_E830_BACKPLANE:
case ICE_DEV_ID_E830_QSFP56:
case ICE_DEV_ID_E830_SFP:
@@ -170,6 +176,18 @@ static int ice_set_mac_type(struct ice_hw *hw)
}
/**
+ * ice_is_generic_mac - check if device's mac_type is generic
+ * @hw: pointer to the hardware structure
+ *
+ * Return: true if mac_type is generic (with SBQ support), false if not
+ */
+bool ice_is_generic_mac(struct ice_hw *hw)
+{
+ return (hw->mac_type == ICE_MAC_GENERIC ||
+ hw->mac_type == ICE_MAC_GENERIC_3K_E825);
+}
+
+/**
* ice_is_e810
* @hw: pointer to the hardware structure
*
@@ -241,6 +259,25 @@ bool ice_is_e823(struct ice_hw *hw)
}
/**
+ * ice_is_e825c - Check if a device is E825C family device
+ * @hw: pointer to the hardware structure
+ *
+ * Return: true if the device is E825-C based, false if not.
+ */
+bool ice_is_e825c(struct ice_hw *hw)
+{
+ switch (hw->device_id) {
+ case ICE_DEV_ID_E825C_BACKPLANE:
+ case ICE_DEV_ID_E825C_QSFP:
+ case ICE_DEV_ID_E825C_SFP:
+ case ICE_DEV_ID_E825C_SGMII:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
* ice_clear_pf_cfg - Clear PF configuration
* @hw: pointer to the hardware structure
*
@@ -965,9 +1002,9 @@ static void ice_get_itr_intrl_gran(struct ice_hw *hw)
*/
int ice_init_hw(struct ice_hw *hw)
{
- struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+ void *mac_buf __free(kfree);
u16 mac_buf_len;
- void *mac_buf;
int status;
/* Set MAC type based on DeviceID */
@@ -1045,7 +1082,7 @@ int ice_init_hw(struct ice_hw *hw)
if (status)
goto err_unroll_sched;
- pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps) {
status = -ENOMEM;
goto err_unroll_sched;
@@ -1055,7 +1092,6 @@ int ice_init_hw(struct ice_hw *hw)
status = ice_aq_get_phy_caps(hw->port_info, false,
ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
NULL);
- devm_kfree(ice_hw_to_dev(hw), pcaps);
if (status)
dev_warn(ice_hw_to_dev(hw), "Get PHY capabilities failed status = %d, continuing anyway\n",
status);
@@ -1082,18 +1118,15 @@ int ice_init_hw(struct ice_hw *hw)
/* Get MAC information */
/* A single port can report up to two (LAN and WoL) addresses */
- mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2,
- sizeof(struct ice_aqc_manage_mac_read_resp),
- GFP_KERNEL);
- mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp);
-
+ mac_buf = kcalloc(2, sizeof(struct ice_aqc_manage_mac_read_resp),
+ GFP_KERNEL);
if (!mac_buf) {
status = -ENOMEM;
goto err_unroll_fltr_mgmt_struct;
}
+ mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp);
status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
- devm_kfree(ice_hw_to_dev(hw), mac_buf);
if (status)
goto err_unroll_fltr_mgmt_struct;
@@ -1362,9 +1395,8 @@ static const struct ice_ctx_ele ice_rlan_ctx_info[] = {
* it to HW register space and enables the hardware to prefetch descriptors
* instead of only fetching them on demand
*/
-int
-ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
- u32 rxq_index)
+int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+ u32 rxq_index)
{
u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 };
@@ -3240,19 +3272,14 @@ int ice_update_link_info(struct ice_port_info *pi)
return status;
if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
- struct ice_aqc_get_phy_caps_data *pcaps;
- struct ice_hw *hw;
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
- hw = pi->hw;
- pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps),
- GFP_KERNEL);
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
return -ENOMEM;
status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
pcaps, NULL);
-
- devm_kfree(ice_hw_to_dev(hw), pcaps);
}
return status;
@@ -3393,8 +3420,8 @@ ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
int
ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
{
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
struct ice_aqc_set_phy_cfg_data cfg = { 0 };
- struct ice_aqc_get_phy_caps_data *pcaps;
struct ice_hw *hw;
int status;
@@ -3404,7 +3431,7 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
*aq_failures = 0;
hw = pi->hw;
- pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
return -ENOMEM;
@@ -3456,7 +3483,6 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
}
out:
- devm_kfree(ice_hw_to_dev(hw), pcaps);
return status;
}
@@ -3535,7 +3561,7 @@ int
ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
enum ice_fec_mode fec)
{
- struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
struct ice_hw *hw;
int status;
@@ -3604,8 +3630,6 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
}
out:
- kfree(pcaps);
-
return status;
}
@@ -4325,13 +4349,13 @@ ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps,
/* End of FW Admin Queue command wrappers */
/**
- * ice_write_byte - write a byte to a packed context structure
- * @src_ctx: the context structure to read from
- * @dest_ctx: the context to be written to
- * @ce_info: a description of the struct to be filled
+ * ice_pack_ctx_byte - write a byte to a packed context structure
+ * @src_ctx: unpacked source context structure
+ * @dest_ctx: packed destination context data
+ * @ce_info: context element description
*/
-static void
-ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+static void ice_pack_ctx_byte(u8 *src_ctx, u8 *dest_ctx,
+ const struct ice_ctx_ele *ce_info)
{
u8 src_byte, dest_byte, mask;
u8 *from, *dest;
@@ -4342,14 +4366,11 @@ ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
/* prepare the bits and mask */
shift_width = ce_info->lsb % 8;
- mask = (u8)(BIT(ce_info->width) - 1);
+ mask = GENMASK(ce_info->width - 1 + shift_width, shift_width);
src_byte = *from;
- src_byte &= mask;
-
- /* shift to correct alignment */
- mask <<= shift_width;
src_byte <<= shift_width;
+ src_byte &= mask;
/* get the current bits from the target bit string */
dest = dest_ctx + (ce_info->lsb / 8);
@@ -4364,13 +4385,13 @@ ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
}
/**
- * ice_write_word - write a word to a packed context structure
- * @src_ctx: the context structure to read from
- * @dest_ctx: the context to be written to
- * @ce_info: a description of the struct to be filled
+ * ice_pack_ctx_word - write a word to a packed context structure
+ * @src_ctx: unpacked source context structure
+ * @dest_ctx: packed destination context data
+ * @ce_info: context element description
*/
-static void
-ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+static void ice_pack_ctx_word(u8 *src_ctx, u8 *dest_ctx,
+ const struct ice_ctx_ele *ce_info)
{
u16 src_word, mask;
__le16 dest_word;
@@ -4382,17 +4403,14 @@ ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
/* prepare the bits and mask */
shift_width = ce_info->lsb % 8;
- mask = BIT(ce_info->width) - 1;
+ mask = GENMASK(ce_info->width - 1 + shift_width, shift_width);
/* don't swizzle the bits until after the mask because the mask bits
* will be in a different bit position on big endian machines
*/
src_word = *(u16 *)from;
- src_word &= mask;
-
- /* shift to correct alignment */
- mask <<= shift_width;
src_word <<= shift_width;
+ src_word &= mask;
/* get the current bits from the target bit string */
dest = dest_ctx + (ce_info->lsb / 8);
@@ -4407,13 +4425,13 @@ ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
}
/**
- * ice_write_dword - write a dword to a packed context structure
- * @src_ctx: the context structure to read from
- * @dest_ctx: the context to be written to
- * @ce_info: a description of the struct to be filled
+ * ice_pack_ctx_dword - write a dword to a packed context structure
+ * @src_ctx: unpacked source context structure
+ * @dest_ctx: packed destination context data
+ * @ce_info: context element description
*/
-static void
-ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+static void ice_pack_ctx_dword(u8 *src_ctx, u8 *dest_ctx,
+ const struct ice_ctx_ele *ce_info)
{
u32 src_dword, mask;
__le32 dest_dword;
@@ -4425,25 +4443,14 @@ ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
/* prepare the bits and mask */
shift_width = ce_info->lsb % 8;
-
- /* if the field width is exactly 32 on an x86 machine, then the shift
- * operation will not work because the SHL instructions count is masked
- * to 5 bits so the shift will do nothing
- */
- if (ce_info->width < 32)
- mask = BIT(ce_info->width) - 1;
- else
- mask = (u32)~0;
+ mask = GENMASK(ce_info->width - 1 + shift_width, shift_width);
/* don't swizzle the bits until after the mask because the mask bits
* will be in a different bit position on big endian machines
*/
src_dword = *(u32 *)from;
- src_dword &= mask;
-
- /* shift to correct alignment */
- mask <<= shift_width;
src_dword <<= shift_width;
+ src_dword &= mask;
/* get the current bits from the target bit string */
dest = dest_ctx + (ce_info->lsb / 8);
@@ -4458,13 +4465,13 @@ ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
}
/**
- * ice_write_qword - write a qword to a packed context structure
- * @src_ctx: the context structure to read from
- * @dest_ctx: the context to be written to
- * @ce_info: a description of the struct to be filled
+ * ice_pack_ctx_qword - write a qword to a packed context structure
+ * @src_ctx: unpacked source context structure
+ * @dest_ctx: packed destination context data
+ * @ce_info: context element description
*/
-static void
-ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+static void ice_pack_ctx_qword(u8 *src_ctx, u8 *dest_ctx,
+ const struct ice_ctx_ele *ce_info)
{
u64 src_qword, mask;
__le64 dest_qword;
@@ -4476,25 +4483,14 @@ ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
/* prepare the bits and mask */
shift_width = ce_info->lsb % 8;
-
- /* if the field width is exactly 64 on an x86 machine, then the shift
- * operation will not work because the SHL instructions count is masked
- * to 6 bits so the shift will do nothing
- */
- if (ce_info->width < 64)
- mask = BIT_ULL(ce_info->width) - 1;
- else
- mask = (u64)~0;
+ mask = GENMASK_ULL(ce_info->width - 1 + shift_width, shift_width);
/* don't swizzle the bits until after the mask because the mask bits
* will be in a different bit position on big endian machines
*/
src_qword = *(u64 *)from;
- src_qword &= mask;
-
- /* shift to correct alignment */
- mask <<= shift_width;
src_qword <<= shift_width;
+ src_qword &= mask;
/* get the current bits from the target bit string */
dest = dest_ctx + (ce_info->lsb / 8);
@@ -4513,11 +4509,10 @@ ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
* @hw: pointer to the hardware structure
* @src_ctx: pointer to a generic non-packed context structure
* @dest_ctx: pointer to memory for the packed structure
- * @ce_info: a description of the structure to be transformed
+ * @ce_info: List of Rx context elements
*/
-int
-ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
- const struct ice_ctx_ele *ce_info)
+int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
+ const struct ice_ctx_ele *ce_info)
{
int f;
@@ -4533,16 +4528,16 @@ ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
}
switch (ce_info[f].size_of) {
case sizeof(u8):
- ice_write_byte(src_ctx, dest_ctx, &ce_info[f]);
+ ice_pack_ctx_byte(src_ctx, dest_ctx, &ce_info[f]);
break;
case sizeof(u16):
- ice_write_word(src_ctx, dest_ctx, &ce_info[f]);
+ ice_pack_ctx_word(src_ctx, dest_ctx, &ce_info[f]);
break;
case sizeof(u32):
- ice_write_dword(src_ctx, dest_ctx, &ce_info[f]);
+ ice_pack_ctx_dword(src_ctx, dest_ctx, &ce_info[f]);
break;
case sizeof(u64):
- ice_write_qword(src_ctx, dest_ctx, &ce_info[f]);
+ ice_pack_ctx_qword(src_ctx, dest_ctx, &ce_info[f]);
break;
default:
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 3e933f75e948..ffb22c7ce28b 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -53,9 +53,8 @@ int ice_get_caps(struct ice_hw *hw);
void ice_set_safe_mode_caps(struct ice_hw *hw);
-int
-ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
- u32 rxq_index);
+int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+ u32 rxq_index);
int
ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params);
@@ -72,9 +71,8 @@ bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
int ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
extern const struct ice_ctx_ele ice_tlan_ctx_info[];
-int
-ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
- const struct ice_ctx_ele *ce_info);
+int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
+ const struct ice_ctx_ele *ce_info);
extern struct mutex ice_global_cfg_lock_sw;
@@ -112,6 +110,7 @@ ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
int
ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
struct ice_sq_cd *cd);
+bool ice_is_generic_mac(struct ice_hw *hw);
bool ice_is_e810(struct ice_hw *hw);
int ice_clear_pf_cfg(struct ice_hw *hw);
int
@@ -251,6 +250,7 @@ ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
u64 *prev_stat, u64 *cur_stat);
bool ice_is_e810t(struct ice_hw *hw);
bool ice_is_e823(struct ice_hw *hw);
+bool ice_is_e825c(struct ice_hw *hw);
int
ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
struct ice_aqc_txsched_elem_data *buf);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index e7d2474c431c..ffe660f34992 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -666,7 +666,7 @@ bool ice_is_sbq_supported(struct ice_hw *hw)
/* The device sideband queue is only supported on devices with the
* generic MAC type.
*/
- return hw->mac_type == ICE_MAC_GENERIC;
+ return ice_is_generic_mac(hw);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c
index 8b7504a9df31..7532d11ad7f3 100644
--- a/drivers/net/ethernet/intel/ice/ice_ddp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ddp.c
@@ -1825,6 +1825,7 @@ static u32 ice_get_pkg_segment_id(enum ice_mac_type mac_type)
seg_id = SEGMENT_TYPE_ICE_E830;
break;
case ICE_MAC_GENERIC:
+ case ICE_MAC_GENERIC_3K_E825:
default:
seg_id = SEGMENT_TYPE_ICE_E810;
break;
@@ -1845,6 +1846,9 @@ static u32 ice_get_pkg_sign_type(enum ice_mac_type mac_type)
case ICE_MAC_E830:
sign_type = SEGMENT_SIGN_TYPE_RSA3K_SBB;
break;
+ case ICE_MAC_GENERIC_3K_E825:
+ sign_type = SEGMENT_SIGN_TYPE_RSA3K_E825;
+ break;
case ICE_MAC_GENERIC:
default:
sign_type = SEGMENT_SIGN_TYPE_RSA2K;
diff --git a/drivers/net/ethernet/intel/ice/ice_debugfs.c b/drivers/net/ethernet/intel/ice/ice_debugfs.c
index c2bfba6b9ead..d252d98218d0 100644
--- a/drivers/net/ethernet/intel/ice/ice_debugfs.c
+++ b/drivers/net/ethernet/intel/ice/ice_debugfs.c
@@ -64,9 +64,6 @@ static const char * const ice_fwlog_level_string[] = {
"verbose",
};
-/* the order in this array is important. it matches the ordering of the
- * values in the FW so the index is the same value as in ice_fwlog_level
- */
static const char * const ice_fwlog_log_size[] = {
"128K",
"256K",
@@ -648,6 +645,16 @@ err_create_module_files:
}
/**
+ * ice_debugfs_pf_deinit - cleanup PF's debugfs
+ * @pf: pointer to the PF struct
+ */
+void ice_debugfs_pf_deinit(struct ice_pf *pf)
+{
+ debugfs_remove_recursive(pf->ice_debugfs_pf);
+ pf->ice_debugfs_pf = NULL;
+}
+
+/**
* ice_debugfs_init - create root directory for debugfs entries
*/
void ice_debugfs_init(void)
diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h
index a2d384dbfc76..9dfae9bce758 100644
--- a/drivers/net/ethernet/intel/ice/ice_devids.h
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h
@@ -71,5 +71,13 @@
#define ICE_DEV_ID_E822L_10G_BASE_T 0x1899
/* Intel(R) Ethernet Connection E822-L 1GbE */
#define ICE_DEV_ID_E822L_SGMII 0x189A
+/* Intel(R) Ethernet Connection E825-C for backplane */
+#define ICE_DEV_ID_E825C_BACKPLANE 0x579c
+/* Intel(R) Ethernet Connection E825-C for QSFP */
+#define ICE_DEV_ID_E825C_QSFP 0x579d
+/* Intel(R) Ethernet Connection E825-C for SFP */
+#define ICE_DEV_ID_E825C_SFP 0x579e
+/* Intel(R) Ethernet Connection E825-C 1GbE */
+#define ICE_DEV_ID_E825C_SGMII 0x579f
#endif /* _ICE_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 65be56f2af9e..b516e42b41f0 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -445,6 +445,20 @@ ice_devlink_reload_empr_start(struct ice_pf *pf,
}
/**
+ * ice_devlink_reinit_down - unload given PF
+ * @pf: pointer to the PF struct
+ */
+static void ice_devlink_reinit_down(struct ice_pf *pf)
+{
+ /* No need to take devl_lock, it's already taken by devlink API */
+ ice_unload(pf);
+ rtnl_lock();
+ ice_vsi_decfg(ice_get_main_vsi(pf));
+ rtnl_unlock();
+ ice_deinit_dev(pf);
+}
+
+/**
* ice_devlink_reload_down - prepare for reload
* @devlink: pointer to the devlink instance to reload
* @netns_change: if true, the network namespace is changing
@@ -477,7 +491,7 @@ ice_devlink_reload_down(struct devlink *devlink, bool netns_change,
"Remove all VFs before doing reinit\n");
return -EOPNOTSUPP;
}
- ice_unload(pf);
+ ice_devlink_reinit_down(pf);
return 0;
case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
return ice_devlink_reload_empr_start(pf, extack);
@@ -1270,6 +1284,45 @@ static int ice_devlink_set_parent(struct devlink_rate *devlink_rate,
}
/**
+ * ice_devlink_reinit_up - do reinit of the given PF
+ * @pf: pointer to the PF struct
+ */
+static int ice_devlink_reinit_up(struct ice_pf *pf)
+{
+ struct ice_vsi *vsi = ice_get_main_vsi(pf);
+ struct ice_vsi_cfg_params params;
+ int err;
+
+ err = ice_init_dev(pf);
+ if (err)
+ return err;
+
+ params = ice_vsi_to_params(vsi);
+ params.flags = ICE_VSI_FLAG_INIT;
+
+ rtnl_lock();
+ err = ice_vsi_cfg(vsi, &params);
+ rtnl_unlock();
+ if (err)
+ goto err_vsi_cfg;
+
+ /* No need to take devl_lock, it's already taken by devlink API */
+ err = ice_load(pf);
+ if (err)
+ goto err_load;
+
+ return 0;
+
+err_load:
+ rtnl_lock();
+ ice_vsi_decfg(vsi);
+ rtnl_unlock();
+err_vsi_cfg:
+ ice_deinit_dev(pf);
+ return err;
+}
+
+/**
* ice_devlink_reload_up - do reload up after reinit
* @devlink: pointer to the devlink instance reloading
* @action: the action requested
@@ -1289,7 +1342,7 @@ ice_devlink_reload_up(struct devlink *devlink,
switch (action) {
case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
- return ice_load(pf);
+ return ice_devlink_reinit_up(pf);
case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
*actions_performed = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE);
return ice_devlink_reload_empr_finish(pf, extack);
@@ -1569,6 +1622,7 @@ static const struct devlink_port_ops ice_devlink_port_ops = {
* @pf: the PF to create a devlink port for
*
* Create and register a devlink_port for this PF.
+ * This function has to be called under devl_lock.
*
* Return: zero on success or an error code on failure.
*/
@@ -1581,6 +1635,8 @@ int ice_devlink_create_pf_port(struct ice_pf *pf)
struct device *dev;
int err;
+ devlink = priv_to_devlink(pf);
+
dev = ice_pf_to_dev(pf);
devlink_port = &pf->devlink_port;
@@ -1601,10 +1657,9 @@ int ice_devlink_create_pf_port(struct ice_pf *pf)
ice_devlink_set_switch_id(pf, &attrs.switch_id);
devlink_port_attrs_set(devlink_port, &attrs);
- devlink = priv_to_devlink(pf);
- err = devlink_port_register_with_ops(devlink, devlink_port, vsi->idx,
- &ice_devlink_port_ops);
+ err = devl_port_register_with_ops(devlink, devlink_port, vsi->idx,
+ &ice_devlink_port_ops);
if (err) {
dev_err(dev, "Failed to create devlink port for PF %d, error %d\n",
pf->hw.pf_id, err);
@@ -1619,10 +1674,11 @@ int ice_devlink_create_pf_port(struct ice_pf *pf)
* @pf: the PF to cleanup
*
* Unregisters the devlink_port structure associated with this PF.
+ * This function has to be called under devl_lock.
*/
void ice_devlink_destroy_pf_port(struct ice_pf *pf)
{
- devlink_port_unregister(&pf->devlink_port);
+ devl_port_unregister(&pf->devlink_port);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c
index b9c5eced6326..e92be6f130a3 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.c
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -31,6 +31,26 @@ static const char * const pin_type_name[] = {
};
/**
+ * ice_dpll_is_reset - check if reset is in progress
+ * @pf: private board structure
+ * @extack: error reporting
+ *
+ * If reset is in progress, fill extack with error.
+ *
+ * Return:
+ * * false - no reset in progress
+ * * true - reset in progress
+ */
+static bool ice_dpll_is_reset(struct ice_pf *pf, struct netlink_ext_ack *extack)
+{
+ if (ice_is_reset_in_progress(pf->state)) {
+ NL_SET_ERR_MSG(extack, "PF reset in progress");
+ return true;
+ }
+ return false;
+}
+
+/**
* ice_dpll_pin_freq_set - set pin's frequency
* @pf: private board structure
* @pin: pointer to a pin
@@ -109,6 +129,9 @@ ice_dpll_frequency_set(const struct dpll_pin *pin, void *pin_priv,
struct ice_pf *pf = d->pf;
int ret;
+ if (ice_dpll_is_reset(pf, extack))
+ return -EBUSY;
+
mutex_lock(&pf->dplls.lock);
ret = ice_dpll_pin_freq_set(pf, p, pin_type, frequency, extack);
mutex_unlock(&pf->dplls.lock);
@@ -254,6 +277,7 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv,
* ice_dpll_pin_enable - enable a pin on dplls
* @hw: board private hw structure
* @pin: pointer to a pin
+ * @dpll_idx: dpll index to connect to output pin
* @pin_type: type of pin being enabled
* @extack: error reporting
*
@@ -266,7 +290,7 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv,
*/
static int
ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin,
- enum ice_dpll_pin_type pin_type,
+ u8 dpll_idx, enum ice_dpll_pin_type pin_type,
struct netlink_ext_ack *extack)
{
u8 flags = 0;
@@ -280,10 +304,12 @@ ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin,
ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0);
break;
case ICE_DPLL_PIN_TYPE_OUTPUT:
+ flags = ICE_AQC_SET_CGU_OUT_CFG_UPDATE_SRC_SEL;
if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN)
flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN;
flags |= ICE_AQC_SET_CGU_OUT_CFG_OUT_EN;
- ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0);
+ ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, dpll_idx,
+ 0, 0);
break;
default:
return -EINVAL;
@@ -370,7 +396,7 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin,
case ICE_DPLL_PIN_TYPE_INPUT:
ret = ice_aq_get_input_pin_cfg(&pf->hw, pin->idx, NULL, NULL,
NULL, &pin->flags[0],
- &pin->freq, NULL);
+ &pin->freq, &pin->phase_adjust);
if (ret)
goto err;
if (ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN & pin->flags[0]) {
@@ -398,14 +424,27 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin,
break;
case ICE_DPLL_PIN_TYPE_OUTPUT:
ret = ice_aq_get_output_pin_cfg(&pf->hw, pin->idx,
- &pin->flags[0], NULL,
+ &pin->flags[0], &parent,
&pin->freq, NULL);
if (ret)
goto err;
- if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0])
- pin->state[0] = DPLL_PIN_STATE_CONNECTED;
- else
- pin->state[0] = DPLL_PIN_STATE_DISCONNECTED;
+
+ parent &= ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL;
+ if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) {
+ pin->state[pf->dplls.eec.dpll_idx] =
+ parent == pf->dplls.eec.dpll_idx ?
+ DPLL_PIN_STATE_CONNECTED :
+ DPLL_PIN_STATE_DISCONNECTED;
+ pin->state[pf->dplls.pps.dpll_idx] =
+ parent == pf->dplls.pps.dpll_idx ?
+ DPLL_PIN_STATE_CONNECTED :
+ DPLL_PIN_STATE_DISCONNECTED;
+ } else {
+ pin->state[pf->dplls.eec.dpll_idx] =
+ DPLL_PIN_STATE_DISCONNECTED;
+ pin->state[pf->dplls.pps.dpll_idx] =
+ DPLL_PIN_STATE_DISCONNECTED;
+ }
break;
case ICE_DPLL_PIN_TYPE_RCLK_INPUT:
for (parent = 0; parent < pf->dplls.rclk.num_parents;
@@ -488,6 +527,7 @@ ice_dpll_hw_input_prio_set(struct ice_pf *pf, struct ice_dpll *dpll,
* @dpll: registered dpll pointer
* @dpll_priv: private data pointer passed on dpll registration
* @status: on success holds dpll's lock status
+ * @status_error: status error value
* @extack: error reporting
*
* Dpll subsystem callback, provides dpll's lock status.
@@ -500,6 +540,7 @@ ice_dpll_hw_input_prio_set(struct ice_pf *pf, struct ice_dpll *dpll,
static int
ice_dpll_lock_status_get(const struct dpll_device *dpll, void *dpll_priv,
enum dpll_lock_status *status,
+ enum dpll_lock_status_error *status_error,
struct netlink_ext_ack *extack)
{
struct ice_dpll *d = dpll_priv;
@@ -568,9 +609,13 @@ ice_dpll_pin_state_set(const struct dpll_pin *pin, void *pin_priv,
struct ice_pf *pf = d->pf;
int ret;
+ if (ice_dpll_is_reset(pf, extack))
+ return -EBUSY;
+
mutex_lock(&pf->dplls.lock);
if (enable)
- ret = ice_dpll_pin_enable(&pf->hw, p, pin_type, extack);
+ ret = ice_dpll_pin_enable(&pf->hw, p, d->dpll_idx, pin_type,
+ extack);
else
ret = ice_dpll_pin_disable(&pf->hw, p, pin_type, extack);
if (!ret)
@@ -603,6 +648,11 @@ ice_dpll_output_state_set(const struct dpll_pin *pin, void *pin_priv,
struct netlink_ext_ack *extack)
{
bool enable = state == DPLL_PIN_STATE_CONNECTED;
+ struct ice_dpll_pin *p = pin_priv;
+ struct ice_dpll *d = dpll_priv;
+
+ if (!enable && p->state[d->dpll_idx] == DPLL_PIN_STATE_DISCONNECTED)
+ return 0;
return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable,
extack, ICE_DPLL_PIN_TYPE_OUTPUT);
@@ -665,14 +715,16 @@ ice_dpll_pin_state_get(const struct dpll_pin *pin, void *pin_priv,
struct ice_pf *pf = d->pf;
int ret;
+ if (ice_dpll_is_reset(pf, extack))
+ return -EBUSY;
+
mutex_lock(&pf->dplls.lock);
ret = ice_dpll_pin_state_update(pf, p, pin_type, extack);
if (ret)
goto unlock;
- if (pin_type == ICE_DPLL_PIN_TYPE_INPUT)
+ if (pin_type == ICE_DPLL_PIN_TYPE_INPUT ||
+ pin_type == ICE_DPLL_PIN_TYPE_OUTPUT)
*state = p->state[d->dpll_idx];
- else if (pin_type == ICE_DPLL_PIN_TYPE_OUTPUT)
- *state = p->state[0];
ret = 0;
unlock:
mutex_unlock(&pf->dplls.lock);
@@ -790,6 +842,9 @@ ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv,
struct ice_pf *pf = d->pf;
int ret;
+ if (ice_dpll_is_reset(pf, extack))
+ return -EBUSY;
+
mutex_lock(&pf->dplls.lock);
ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack);
mutex_unlock(&pf->dplls.lock);
@@ -910,6 +965,9 @@ ice_dpll_pin_phase_adjust_set(const struct dpll_pin *pin, void *pin_priv,
u8 flag, flags_en = 0;
int ret;
+ if (ice_dpll_is_reset(pf, extack))
+ return -EBUSY;
+
mutex_lock(&pf->dplls.lock);
switch (type) {
case ICE_DPLL_PIN_TYPE_INPUT:
@@ -1069,6 +1127,9 @@ ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv,
int ret = -EINVAL;
u32 hw_idx;
+ if (ice_dpll_is_reset(pf, extack))
+ return -EBUSY;
+
mutex_lock(&pf->dplls.lock);
hw_idx = parent->idx - pf->dplls.base_rclk_idx;
if (hw_idx >= pf->dplls.num_inputs)
@@ -1123,6 +1184,9 @@ ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv,
int ret = -EINVAL;
u32 hw_idx;
+ if (ice_dpll_is_reset(pf, extack))
+ return -EBUSY;
+
mutex_lock(&pf->dplls.lock);
hw_idx = parent->idx - pf->dplls.base_rclk_idx;
if (hw_idx >= pf->dplls.num_inputs)
@@ -1305,8 +1369,10 @@ static void ice_dpll_periodic_work(struct kthread_work *work)
struct ice_pf *pf = container_of(d, struct ice_pf, dplls);
struct ice_dpll *de = &pf->dplls.eec;
struct ice_dpll *dp = &pf->dplls.pps;
- int ret;
+ int ret = 0;
+ if (ice_is_reset_in_progress(pf->state))
+ goto resched;
mutex_lock(&pf->dplls.lock);
ret = ice_dpll_update_state(pf, de, false);
if (!ret)
@@ -1326,6 +1392,7 @@ static void ice_dpll_periodic_work(struct kthread_work *work)
ice_dpll_notify_changes(de);
ice_dpll_notify_changes(dp);
+resched:
/* Run twice a second or reschedule if update failed */
kthread_queue_delayed_work(d->kworker, &d->work,
ret ? msecs_to_jiffies(10) :
@@ -1532,7 +1599,7 @@ static void ice_dpll_deinit_rclk_pin(struct ice_pf *pf)
}
if (WARN_ON_ONCE(!vsi || !vsi->netdev))
return;
- netdev_dpll_pin_clear(vsi->netdev);
+ dpll_netdev_pin_clear(vsi->netdev);
dpll_pin_put(rclk->pin);
}
@@ -1576,7 +1643,7 @@ ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin,
}
if (WARN_ON((!vsi || !vsi->netdev)))
return -EINVAL;
- netdev_dpll_pin_set(vsi->netdev, pf->dplls.rclk.pin);
+ dpll_netdev_pin_set(vsi->netdev, pf->dplls.rclk.pin);
return 0;
@@ -2055,6 +2122,7 @@ void ice_dpll_init(struct ice_pf *pf)
struct ice_dplls *d = &pf->dplls;
int err = 0;
+ mutex_init(&d->lock);
err = ice_dpll_init_info(pf, cgu);
if (err)
goto err_exit;
@@ -2067,7 +2135,6 @@ void ice_dpll_init(struct ice_pf *pf)
err = ice_dpll_init_pins(pf, cgu);
if (err)
goto deinit_pps;
- mutex_init(&d->lock);
if (cgu) {
err = ice_dpll_init_worker(pf);
if (err)
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index a19b06f18e40..255a9c8151b4 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -129,6 +129,7 @@ static const struct ice_stats ice_gstrings_pf_stats[] = {
ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize),
ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber),
ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error),
+ ICE_PF_STAT("rx_eipe_error.nic", hw_rx_eipe_error),
ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards),
ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors),
ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes),
@@ -801,7 +802,7 @@ static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
if (!pf)
return -EINVAL;
- data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL);
+ data = kzalloc(size, GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -944,11 +945,9 @@ static u64 ice_loopback_test(struct net_device *netdev)
int num_frames, valid_frames;
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
- struct device *dev;
- u8 *tx_frame;
+ u8 *tx_frame __free(kfree);
int i;
- dev = ice_pf_to_dev(pf);
netdev_info(netdev, "loopback test\n");
test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
@@ -993,7 +992,7 @@ static u64 ice_loopback_test(struct net_device *netdev)
for (i = 0; i < num_frames; i++) {
if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) {
ret = 8;
- goto lbtest_free_frame;
+ goto remove_mac_filters;
}
}
@@ -1003,8 +1002,6 @@ static u64 ice_loopback_test(struct net_device *netdev)
else if (valid_frames != num_frames)
ret = 10;
-lbtest_free_frame:
- devm_kfree(dev, tx_frame);
remove_mac_filters:
if (ice_fltr_remove_mac(test_vsi, broadcast, ICE_FWD_TO_VSI))
netdev_err(netdev, "Could not remove MAC filter for the test VSI\n");
@@ -2486,6 +2483,24 @@ static u32 ice_parse_hdrs(struct ethtool_rxnfc *nfc)
case SCTP_V4_FLOW:
hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4;
break;
+ case GTPU_V4_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPU_IP | ICE_FLOW_SEG_HDR_IPV4;
+ break;
+ case GTPC_V4_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPC | ICE_FLOW_SEG_HDR_IPV4;
+ break;
+ case GTPC_TEID_V4_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPC_TEID | ICE_FLOW_SEG_HDR_IPV4;
+ break;
+ case GTPU_EH_V4_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPU_EH | ICE_FLOW_SEG_HDR_IPV4;
+ break;
+ case GTPU_UL_V4_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPU_UP | ICE_FLOW_SEG_HDR_IPV4;
+ break;
+ case GTPU_DL_V4_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPU_DWN | ICE_FLOW_SEG_HDR_IPV4;
+ break;
case TCP_V6_FLOW:
hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6;
break;
@@ -2495,6 +2510,24 @@ static u32 ice_parse_hdrs(struct ethtool_rxnfc *nfc)
case SCTP_V6_FLOW:
hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6;
break;
+ case GTPU_V6_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPU_IP | ICE_FLOW_SEG_HDR_IPV6;
+ break;
+ case GTPC_V6_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPC | ICE_FLOW_SEG_HDR_IPV6;
+ break;
+ case GTPC_TEID_V6_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPC_TEID | ICE_FLOW_SEG_HDR_IPV6;
+ break;
+ case GTPU_EH_V6_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPU_EH | ICE_FLOW_SEG_HDR_IPV6;
+ break;
+ case GTPU_UL_V6_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPU_UP | ICE_FLOW_SEG_HDR_IPV6;
+ break;
+ case GTPU_DL_V6_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_GTPU_DWN | ICE_FLOW_SEG_HDR_IPV6;
+ break;
default:
break;
}
@@ -2518,6 +2551,12 @@ static u64 ice_parse_hash_flds(struct ethtool_rxnfc *nfc, bool symm)
case TCP_V4_FLOW:
case UDP_V4_FLOW:
case SCTP_V4_FLOW:
+ case GTPU_V4_FLOW:
+ case GTPC_V4_FLOW:
+ case GTPC_TEID_V4_FLOW:
+ case GTPU_EH_V4_FLOW:
+ case GTPU_UL_V4_FLOW:
+ case GTPU_DL_V4_FLOW:
if (nfc->data & RXH_IP_SRC)
hfld |= ICE_FLOW_HASH_FLD_IPV4_SA;
if (nfc->data & RXH_IP_DST)
@@ -2526,6 +2565,12 @@ static u64 ice_parse_hash_flds(struct ethtool_rxnfc *nfc, bool symm)
case TCP_V6_FLOW:
case UDP_V6_FLOW:
case SCTP_V6_FLOW:
+ case GTPU_V6_FLOW:
+ case GTPC_V6_FLOW:
+ case GTPC_TEID_V6_FLOW:
+ case GTPU_EH_V6_FLOW:
+ case GTPU_UL_V6_FLOW:
+ case GTPU_DL_V6_FLOW:
if (nfc->data & RXH_IP_SRC)
hfld |= ICE_FLOW_HASH_FLD_IPV6_SA;
if (nfc->data & RXH_IP_DST)
@@ -2564,6 +2609,33 @@ static u64 ice_parse_hash_flds(struct ethtool_rxnfc *nfc, bool symm)
}
}
+ if (nfc->data & RXH_GTP_TEID) {
+ switch (nfc->flow_type) {
+ case GTPC_TEID_V4_FLOW:
+ case GTPC_TEID_V6_FLOW:
+ hfld |= ICE_FLOW_HASH_FLD_GTPC_TEID;
+ break;
+ case GTPU_V4_FLOW:
+ case GTPU_V6_FLOW:
+ hfld |= ICE_FLOW_HASH_FLD_GTPU_IP_TEID;
+ break;
+ case GTPU_EH_V4_FLOW:
+ case GTPU_EH_V6_FLOW:
+ hfld |= ICE_FLOW_HASH_FLD_GTPU_EH_TEID;
+ break;
+ case GTPU_UL_V4_FLOW:
+ case GTPU_UL_V6_FLOW:
+ hfld |= ICE_FLOW_HASH_FLD_GTPU_UP_TEID;
+ break;
+ case GTPU_DL_V4_FLOW:
+ case GTPU_DL_V6_FLOW:
+ hfld |= ICE_FLOW_HASH_FLD_GTPU_DWN_TEID;
+ break;
+ default:
+ break;
+ }
+ }
+
return hfld;
}
@@ -2676,6 +2748,13 @@ ice_get_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
hash_flds & ICE_FLOW_HASH_FLD_UDP_DST_PORT ||
hash_flds & ICE_FLOW_HASH_FLD_SCTP_DST_PORT)
nfc->data |= (u64)RXH_L4_B_2_3;
+
+ if (hash_flds & ICE_FLOW_HASH_FLD_GTPC_TEID ||
+ hash_flds & ICE_FLOW_HASH_FLD_GTPU_IP_TEID ||
+ hash_flds & ICE_FLOW_HASH_FLD_GTPU_EH_TEID ||
+ hash_flds & ICE_FLOW_HASH_FLD_GTPU_UP_TEID ||
+ hash_flds & ICE_FLOW_HASH_FLD_GTPU_DWN_TEID)
+ nfc->data |= (u64)RXH_GTP_TEID;
}
/**
@@ -3360,7 +3439,7 @@ ice_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
struct ice_pf *pf = ice_netdev_to_pf(dev);
/* only report timestamping if PTP is enabled */
- if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ if (pf->ptp.state != ICE_PTP_READY)
return ethtool_op_get_ts_info(dev, info);
info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index ff82915ab497..2fd2e0cb483d 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -37,13 +37,13 @@
#define ICE_HASH_SCTP_IPV4 (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_SCTP_PORT)
#define ICE_HASH_SCTP_IPV6 (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_SCTP_PORT)
-#define ICE_FLOW_HASH_GTP_TEID \
+#define ICE_FLOW_HASH_GTP_C_TEID \
(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID))
-#define ICE_FLOW_HASH_GTP_IPV4_TEID \
- (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_TEID)
-#define ICE_FLOW_HASH_GTP_IPV6_TEID \
- (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_TEID)
+#define ICE_FLOW_HASH_GTP_C_IPV4_TEID \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_C_TEID)
+#define ICE_FLOW_HASH_GTP_C_IPV6_TEID \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_C_TEID)
#define ICE_FLOW_HASH_GTP_U_TEID \
(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID))
@@ -66,6 +66,20 @@
(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_EH_TEID | \
ICE_FLOW_HASH_GTP_U_EH_QFI)
+#define ICE_FLOW_HASH_GTP_U_UP \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_UP_TEID))
+#define ICE_FLOW_HASH_GTP_U_DWN \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID))
+
+#define ICE_FLOW_HASH_GTP_U_IPV4_UP \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_UP)
+#define ICE_FLOW_HASH_GTP_U_IPV6_UP \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_UP)
+#define ICE_FLOW_HASH_GTP_U_IPV4_DWN \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_DWN)
+#define ICE_FLOW_HASH_GTP_U_IPV6_DWN \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_DWN)
+
#define ICE_FLOW_HASH_PPPOE_SESS_ID \
(BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID))
@@ -242,6 +256,13 @@ enum ice_flow_field {
#define ICE_FLOW_HASH_FLD_SCTP_DST_PORT \
BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)
+#define ICE_FLOW_HASH_FLD_GTPC_TEID BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID)
+#define ICE_FLOW_HASH_FLD_GTPU_IP_TEID BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)
+#define ICE_FLOW_HASH_FLD_GTPU_EH_TEID BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_TEID)
+#define ICE_FLOW_HASH_FLD_GTPU_UP_TEID BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_UP_TEID)
+#define ICE_FLOW_HASH_FLD_GTPU_DWN_TEID \
+ BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID)
+
/* Flow headers and fields for AVF support */
enum ice_flow_avf_hdr_field {
/* Values 0 - 28 are reserved for future use */
diff --git a/drivers/net/ethernet/intel/ice/ice_fwlog.c b/drivers/net/ethernet/intel/ice/ice_fwlog.c
index 92b5dac481cd..4fd15387a7e5 100644
--- a/drivers/net/ethernet/intel/ice/ice_fwlog.c
+++ b/drivers/net/ethernet/intel/ice/ice_fwlog.c
@@ -188,6 +188,8 @@ void ice_fwlog_deinit(struct ice_hw *hw)
if (hw->bus.func)
return;
+ ice_debugfs_pf_deinit(hw->back);
+
/* make sure FW logging is disabled to not put the FW in a weird state
* for the next driver load
*/
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index 2a25323105e5..467372d541d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -152,6 +152,27 @@ ice_lag_find_hw_by_lport(struct ice_lag *lag, u8 lport)
}
/**
+ * ice_pkg_has_lport_extract - check if lport extraction supported
+ * @hw: HW struct
+ */
+static bool ice_pkg_has_lport_extract(struct ice_hw *hw)
+{
+ int i;
+
+ for (i = 0; i < hw->blk[ICE_BLK_SW].es.count; i++) {
+ u16 offset;
+ u8 fv_prot;
+
+ ice_find_prot_off(hw, ICE_BLK_SW, ICE_SW_DEFAULT_PROFILE, i,
+ &fv_prot, &offset);
+ if (fv_prot == ICE_FV_PROT_MDID &&
+ offset == ICE_LP_EXT_BUF_OFFSET)
+ return true;
+ }
+ return false;
+}
+
+/**
* ice_lag_find_primary - returns pointer to primary interfaces lag struct
* @lag: local interfaces lag struct
*/
@@ -1206,7 +1227,7 @@ static void ice_lag_del_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
}
/**
- * ice_lag_init_feature_support_flag - Check for NVM support for LAG
+ * ice_lag_init_feature_support_flag - Check for package and NVM support for LAG
* @pf: PF struct
*/
static void ice_lag_init_feature_support_flag(struct ice_pf *pf)
@@ -1219,7 +1240,7 @@ static void ice_lag_init_feature_support_flag(struct ice_pf *pf)
else
ice_clear_feature_support(pf, ICE_F_ROCE_LAG);
- if (caps->sriov_lag)
+ if (caps->sriov_lag && ice_pkg_has_lport_extract(&pf->hw))
ice_set_feature_support(pf, ICE_F_SRIOV_LAG);
else
ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h
index ede833dfa658..183b38792ef2 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.h
+++ b/drivers/net/ethernet/intel/ice/ice_lag.h
@@ -17,6 +17,9 @@ enum ice_lag_role {
#define ICE_LAG_INVALID_PORT 0xFF
#define ICE_LAG_RESET_RETRIES 5
+#define ICE_SW_DEFAULT_PROFILE 0
+#define ICE_FV_PROT_MDID 255
+#define ICE_LP_EXT_BUF_OFFSET 32
struct ice_pf;
struct ice_vf;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 9be724291ef8..ee3f0d3e3f6d 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1618,6 +1618,25 @@ static const struct ice_rss_hash_cfg default_rss_cfgs[] = {
*/
{ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4,
ICE_HASH_SCTP_IPV4, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpc4 with input set IPv4 src/dst */
+ {ICE_FLOW_SEG_HDR_GTPC | ICE_FLOW_SEG_HDR_IPV4,
+ ICE_FLOW_HASH_IPV4, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpc4t with input set IPv4 src/dst */
+ {ICE_FLOW_SEG_HDR_GTPC_TEID | ICE_FLOW_SEG_HDR_IPV4,
+ ICE_FLOW_HASH_GTP_C_IPV4_TEID, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpu4 with input set IPv4 src/dst */
+ {ICE_FLOW_SEG_HDR_GTPU_IP | ICE_FLOW_SEG_HDR_IPV4,
+ ICE_FLOW_HASH_GTP_U_IPV4_TEID, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpu4e with input set IPv4 src/dst */
+ {ICE_FLOW_SEG_HDR_GTPU_EH | ICE_FLOW_SEG_HDR_IPV4,
+ ICE_FLOW_HASH_GTP_U_IPV4_EH, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpu4u with input set IPv4 src/dst */
+ { ICE_FLOW_SEG_HDR_GTPU_UP | ICE_FLOW_SEG_HDR_IPV4,
+ ICE_FLOW_HASH_GTP_U_IPV4_UP, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpu4d with input set IPv4 src/dst */
+ {ICE_FLOW_SEG_HDR_GTPU_DWN | ICE_FLOW_SEG_HDR_IPV4,
+ ICE_FLOW_HASH_GTP_U_IPV4_DWN, ICE_RSS_OUTER_HEADERS, false},
+
/* configure RSS for tcp6 with input set IPv6 src/dst, TCP src/dst */
{ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6,
ICE_HASH_TCP_IPV6, ICE_RSS_ANY_HEADERS, false},
@@ -1632,6 +1651,24 @@ static const struct ice_rss_hash_cfg default_rss_cfgs[] = {
/* configure RSS for IPSEC ESP SPI with input set MAC_IPV4_SPI */
{ICE_FLOW_SEG_HDR_ESP,
ICE_FLOW_HASH_ESP_SPI, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpc6 with input set IPv6 src/dst */
+ {ICE_FLOW_SEG_HDR_GTPC | ICE_FLOW_SEG_HDR_IPV6,
+ ICE_FLOW_HASH_IPV6, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpc6t with input set IPv6 src/dst */
+ {ICE_FLOW_SEG_HDR_GTPC_TEID | ICE_FLOW_SEG_HDR_IPV6,
+ ICE_FLOW_HASH_GTP_C_IPV6_TEID, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpu6 with input set IPv6 src/dst */
+ {ICE_FLOW_SEG_HDR_GTPU_IP | ICE_FLOW_SEG_HDR_IPV6,
+ ICE_FLOW_HASH_GTP_U_IPV6_TEID, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpu6e with input set IPv6 src/dst */
+ {ICE_FLOW_SEG_HDR_GTPU_EH | ICE_FLOW_SEG_HDR_IPV6,
+ ICE_FLOW_HASH_GTP_U_IPV6_EH, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpu6u with input set IPv6 src/dst */
+ { ICE_FLOW_SEG_HDR_GTPU_UP | ICE_FLOW_SEG_HDR_IPV6,
+ ICE_FLOW_HASH_GTP_U_IPV6_UP, ICE_RSS_OUTER_HEADERS, false},
+ /* configure RSS for gtpu6d with input set IPv6 src/dst */
+ {ICE_FLOW_SEG_HDR_GTPU_DWN | ICE_FLOW_SEG_HDR_IPV6,
+ ICE_FLOW_HASH_GTP_U_IPV6_DWN, ICE_RSS_OUTER_HEADERS, false},
};
/**
@@ -1672,27 +1709,6 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
}
/**
- * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
- * @vsi: VSI
- */
-static void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
-{
- if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
- vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX;
- vsi->rx_buf_len = ICE_RXBUF_1664;
-#if (PAGE_SIZE < 8192)
- } else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
- (vsi->netdev->mtu <= ETH_DATA_LEN)) {
- vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
- vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
-#endif
- } else {
- vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
- vsi->rx_buf_len = ICE_RXBUF_3072;
- }
-}
-
-/**
* ice_pf_state_is_nominal - checks the PF for nominal state
* @pf: pointer to PF to check
*
@@ -1795,114 +1811,6 @@ ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
}
-int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx)
-{
- if (q_idx >= vsi->num_rxq)
- return -EINVAL;
-
- return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]);
-}
-
-int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx)
-{
- DEFINE_FLEX(struct ice_aqc_add_tx_qgrp, qg_buf, txqs, 1);
-
- if (q_idx >= vsi->alloc_txq || !tx_rings || !tx_rings[q_idx])
- return -EINVAL;
-
- qg_buf->num_txqs = 1;
-
- return ice_vsi_cfg_txq(vsi, tx_rings[q_idx], qg_buf);
-}
-
-/**
- * ice_vsi_cfg_rxqs - Configure the VSI for Rx
- * @vsi: the VSI being configured
- *
- * Return 0 on success and a negative value on error
- * Configure the Rx VSI for operation.
- */
-int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
-{
- u16 i;
-
- if (vsi->type == ICE_VSI_VF)
- goto setup_rings;
-
- ice_vsi_cfg_frame_size(vsi);
-setup_rings:
- /* set up individual rings */
- ice_for_each_rxq(vsi, i) {
- int err = ice_vsi_cfg_rxq(vsi->rx_rings[i]);
-
- if (err)
- return err;
- }
-
- return 0;
-}
-
-/**
- * ice_vsi_cfg_txqs - Configure the VSI for Tx
- * @vsi: the VSI being configured
- * @rings: Tx ring array to be configured
- * @count: number of Tx ring array elements
- *
- * Return 0 on success and a negative value on error
- * Configure the Tx VSI for operation.
- */
-static int
-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_tx_ring **rings, u16 count)
-{
- DEFINE_FLEX(struct ice_aqc_add_tx_qgrp, qg_buf, txqs, 1);
- int err = 0;
- u16 q_idx;
-
- qg_buf->num_txqs = 1;
-
- for (q_idx = 0; q_idx < count; q_idx++) {
- err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
- if (err)
- break;
- }
-
- return err;
-}
-
-/**
- * ice_vsi_cfg_lan_txqs - Configure the VSI for Tx
- * @vsi: the VSI being configured
- *
- * Return 0 on success and a negative value on error
- * Configure the Tx VSI for operation.
- */
-int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
-{
- return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
-}
-
-/**
- * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
- * @vsi: the VSI being configured
- *
- * Return 0 on success and a negative value on error
- * Configure the Tx queues dedicated for XDP in given VSI for operation.
- */
-int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
-{
- int ret;
- int i;
-
- ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
- if (ret)
- return ret;
-
- ice_for_each_rxq(vsi, i)
- ice_tx_xsk_pool(vsi, i);
-
- return 0;
-}
-
/**
* ice_intrl_usec_to_reg - convert interrupt rate limit to register value
* @intrl: interrupt rate limit in usecs
@@ -2426,7 +2334,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
ice_vsi_map_rings_to_vectors(vsi);
/* Associate q_vector rings to napi */
- ice_vsi_set_napi_queues(vsi, true);
+ ice_vsi_set_napi_queues(vsi);
vsi->stat_offsets_loaded = false;
@@ -2849,74 +2757,19 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
}
/**
- * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
- * @vsi: the VSI being un-configured
- */
-void ice_vsi_dis_irq(struct ice_vsi *vsi)
-{
- struct ice_pf *pf = vsi->back;
- struct ice_hw *hw = &pf->hw;
- u32 val;
- int i;
-
- /* disable interrupt causation from each queue */
- if (vsi->tx_rings) {
- ice_for_each_txq(vsi, i) {
- if (vsi->tx_rings[i]) {
- u16 reg;
-
- reg = vsi->tx_rings[i]->reg_idx;
- val = rd32(hw, QINT_TQCTL(reg));
- val &= ~QINT_TQCTL_CAUSE_ENA_M;
- wr32(hw, QINT_TQCTL(reg), val);
- }
- }
- }
-
- if (vsi->rx_rings) {
- ice_for_each_rxq(vsi, i) {
- if (vsi->rx_rings[i]) {
- u16 reg;
-
- reg = vsi->rx_rings[i]->reg_idx;
- val = rd32(hw, QINT_RQCTL(reg));
- val &= ~QINT_RQCTL_CAUSE_ENA_M;
- wr32(hw, QINT_RQCTL(reg), val);
- }
- }
- }
-
- /* disable each interrupt */
- ice_for_each_q_vector(vsi, i) {
- if (!vsi->q_vectors[i])
- continue;
- wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0);
- }
-
- ice_flush(hw);
-
- /* don't call synchronize_irq() for VF's from the host */
- if (vsi->type == ICE_VSI_VF)
- return;
-
- ice_for_each_q_vector(vsi, i)
- synchronize_irq(vsi->q_vectors[i]->irq.virq);
-}
-
-/**
- * ice_queue_set_napi - Set the napi instance for the queue
+ * __ice_queue_set_napi - Set the napi instance for the queue
* @dev: device to which NAPI and queue belong
* @queue_index: Index of queue
* @type: queue type as RX or TX
* @napi: NAPI context
* @locked: is the rtnl_lock already held
*
- * Set the napi instance for the queue
+ * Set the napi instance for the queue. Caller indicates the lock status.
*/
static void
-ice_queue_set_napi(struct net_device *dev, unsigned int queue_index,
- enum netdev_queue_type type, struct napi_struct *napi,
- bool locked)
+__ice_queue_set_napi(struct net_device *dev, unsigned int queue_index,
+ enum netdev_queue_type type, struct napi_struct *napi,
+ bool locked)
{
if (!locked)
rtnl_lock();
@@ -2926,26 +2779,79 @@ ice_queue_set_napi(struct net_device *dev, unsigned int queue_index,
}
/**
- * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi
+ * ice_queue_set_napi - Set the napi instance for the queue
+ * @vsi: VSI being configured
+ * @queue_index: Index of queue
+ * @type: queue type as RX or TX
+ * @napi: NAPI context
+ *
+ * Set the napi instance for the queue. The rtnl lock state is derived from the
+ * execution path.
+ */
+void
+ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index,
+ enum netdev_queue_type type, struct napi_struct *napi)
+{
+ struct ice_pf *pf = vsi->back;
+
+ if (!vsi->netdev)
+ return;
+
+ if (current_work() == &pf->serv_task ||
+ test_bit(ICE_PREPARED_FOR_RESET, pf->state) ||
+ test_bit(ICE_DOWN, pf->state) ||
+ test_bit(ICE_SUSPENDED, pf->state))
+ __ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
+ false);
+ else
+ __ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
+ true);
+}
+
+/**
+ * __ice_q_vector_set_napi_queues - Map queue[s] associated with the napi
* @q_vector: q_vector pointer
* @locked: is the rtnl_lock already held
*
+ * Associate the q_vector napi with all the queue[s] on the vector.
+ * Caller indicates the lock status.
+ */
+void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked)
+{
+ struct ice_rx_ring *rx_ring;
+ struct ice_tx_ring *tx_ring;
+
+ ice_for_each_rx_ring(rx_ring, q_vector->rx)
+ __ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index,
+ NETDEV_QUEUE_TYPE_RX, &q_vector->napi,
+ locked);
+
+ ice_for_each_tx_ring(tx_ring, q_vector->tx)
+ __ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index,
+ NETDEV_QUEUE_TYPE_TX, &q_vector->napi,
+ locked);
+ /* Also set the interrupt number for the NAPI */
+ netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
+}
+
+/**
+ * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi
+ * @q_vector: q_vector pointer
+ *
* Associate the q_vector napi with all the queue[s] on the vector
*/
-void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked)
+void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector)
{
struct ice_rx_ring *rx_ring;
struct ice_tx_ring *tx_ring;
ice_for_each_rx_ring(rx_ring, q_vector->rx)
- ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index,
- NETDEV_QUEUE_TYPE_RX, &q_vector->napi,
- locked);
+ ice_queue_set_napi(q_vector->vsi, rx_ring->q_index,
+ NETDEV_QUEUE_TYPE_RX, &q_vector->napi);
ice_for_each_tx_ring(tx_ring, q_vector->tx)
- ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index,
- NETDEV_QUEUE_TYPE_TX, &q_vector->napi,
- locked);
+ ice_queue_set_napi(q_vector->vsi, tx_ring->q_index,
+ NETDEV_QUEUE_TYPE_TX, &q_vector->napi);
/* Also set the interrupt number for the NAPI */
netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
}
@@ -2953,11 +2859,10 @@ void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked)
/**
* ice_vsi_set_napi_queues
* @vsi: VSI pointer
- * @locked: is the rtnl_lock already held
*
* Associate queue[s] with napi for all vectors
*/
-void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked)
+void ice_vsi_set_napi_queues(struct ice_vsi *vsi)
{
int i;
@@ -2965,7 +2870,7 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked)
return;
ice_for_each_q_vector(vsi, i)
- ice_q_vector_set_napi_queues(vsi->q_vectors[i], locked);
+ ice_q_vector_set_napi_queues(vsi->q_vectors[i]);
}
/**
@@ -3140,7 +3045,7 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi)
}
}
- tx_ring_stats = vsi_stat->rx_ring_stats;
+ tx_ring_stats = vsi_stat->tx_ring_stats;
vsi_stat->tx_ring_stats =
krealloc_array(vsi_stat->tx_ring_stats, req_txq,
sizeof(*vsi_stat->tx_ring_stats),
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 71bd27244941..9cd23afe5f15 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -54,14 +54,6 @@ bool ice_pf_state_is_nominal(struct ice_pf *pf);
void ice_update_eth_stats(struct ice_vsi *vsi);
-int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx);
-
-int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx);
-
-int ice_vsi_cfg_rxqs(struct ice_vsi *vsi);
-
-int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
-
void ice_vsi_cfg_msix(struct ice_vsi *vsi);
int ice_vsi_start_all_rx_rings(struct ice_vsi *vsi);
@@ -72,8 +64,6 @@ int
ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
u16 rel_vmvf_num);
-int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
-
int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
@@ -91,9 +81,15 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
struct ice_vsi *
ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params);
-void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked);
+void
+ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index,
+ enum netdev_queue_type type, struct napi_struct *napi);
+
+void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked);
-void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked);
+void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector);
+
+void ice_vsi_set_napi_queues(struct ice_vsi *vsi);
int ice_vsi_release(struct ice_vsi *vsi);
@@ -114,8 +110,6 @@ void
ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
bool ena_ts);
-void ice_vsi_dis_irq(struct ice_vsi *vsi);
-
void ice_vsi_free_irq(struct ice_vsi *vsi);
void ice_vsi_free_rx_rings(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index dd4a9bc0dfdc..33a164fa325a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -613,7 +613,7 @@ skip:
ice_pf_dis_all_vsi(pf, false);
if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
- ice_ptp_prepare_for_reset(pf);
+ ice_ptp_prepare_for_reset(pf, reset_type);
if (ice_is_feature_supported(pf, ICE_F_GNSS))
ice_gnss_exit(pf);
@@ -1649,8 +1649,10 @@ static void ice_clean_sbq_subtask(struct ice_pf *pf)
{
struct ice_hw *hw = &pf->hw;
- /* Nothing to do here if sideband queue is not supported */
- if (!ice_is_sbq_supported(hw)) {
+ /* if mac_type is not generic, sideband is not supported
+ * and there's nothing to do here
+ */
+ if (!ice_is_generic_mac(hw)) {
clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
return;
}
@@ -3495,7 +3497,7 @@ static void ice_napi_add(struct ice_vsi *vsi)
ice_for_each_q_vector(vsi, v_idx) {
netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
ice_napi_poll);
- ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false);
+ __ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false);
}
}
@@ -4572,90 +4574,6 @@ static void ice_decfg_netdev(struct ice_vsi *vsi)
vsi->netdev = NULL;
}
-static int ice_start_eth(struct ice_vsi *vsi)
-{
- int err;
-
- err = ice_init_mac_fltr(vsi->back);
- if (err)
- return err;
-
- err = ice_vsi_open(vsi);
- if (err)
- ice_fltr_remove_all(vsi);
-
- return err;
-}
-
-static void ice_stop_eth(struct ice_vsi *vsi)
-{
- ice_fltr_remove_all(vsi);
- ice_vsi_close(vsi);
-}
-
-static int ice_init_eth(struct ice_pf *pf)
-{
- struct ice_vsi *vsi = ice_get_main_vsi(pf);
- int err;
-
- if (!vsi)
- return -EINVAL;
-
- /* init channel list */
- INIT_LIST_HEAD(&vsi->ch_list);
-
- err = ice_cfg_netdev(vsi);
- if (err)
- return err;
- /* Setup DCB netlink interface */
- ice_dcbnl_setup(vsi);
-
- err = ice_init_mac_fltr(pf);
- if (err)
- goto err_init_mac_fltr;
-
- err = ice_devlink_create_pf_port(pf);
- if (err)
- goto err_devlink_create_pf_port;
-
- SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port);
-
- err = ice_register_netdev(vsi);
- if (err)
- goto err_register_netdev;
-
- err = ice_tc_indir_block_register(vsi);
- if (err)
- goto err_tc_indir_block_register;
-
- ice_napi_add(vsi);
-
- return 0;
-
-err_tc_indir_block_register:
- ice_unregister_netdev(vsi);
-err_register_netdev:
- ice_devlink_destroy_pf_port(pf);
-err_devlink_create_pf_port:
-err_init_mac_fltr:
- ice_decfg_netdev(vsi);
- return err;
-}
-
-static void ice_deinit_eth(struct ice_pf *pf)
-{
- struct ice_vsi *vsi = ice_get_main_vsi(pf);
-
- if (!vsi)
- return;
-
- ice_vsi_close(vsi);
- ice_unregister_netdev(vsi);
- ice_devlink_destroy_pf_port(pf);
- ice_tc_indir_block_unregister(vsi);
- ice_decfg_netdev(vsi);
-}
-
/**
* ice_wait_for_fw - wait for full FW readiness
* @hw: pointer to the hardware structure
@@ -4681,7 +4599,7 @@ static int ice_wait_for_fw(struct ice_hw *hw, u32 timeout)
return -ETIMEDOUT;
}
-static int ice_init_dev(struct ice_pf *pf)
+int ice_init_dev(struct ice_pf *pf)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
@@ -4774,7 +4692,7 @@ err_init_pf:
return err;
}
-static void ice_deinit_dev(struct ice_pf *pf)
+void ice_deinit_dev(struct ice_pf *pf)
{
ice_free_irq_msix_misc(pf);
ice_deinit_pf(pf);
@@ -5079,31 +4997,47 @@ static void ice_deinit(struct ice_pf *pf)
/**
* ice_load - load pf by init hw and starting VSI
* @pf: pointer to the pf instance
+ *
+ * This function has to be called under devl_lock.
*/
int ice_load(struct ice_pf *pf)
{
- struct ice_vsi_cfg_params params = {};
struct ice_vsi *vsi;
int err;
- err = ice_init_dev(pf);
+ devl_assert_locked(priv_to_devlink(pf));
+
+ vsi = ice_get_main_vsi(pf);
+
+ /* init channel list */
+ INIT_LIST_HEAD(&vsi->ch_list);
+
+ err = ice_cfg_netdev(vsi);
if (err)
return err;
- vsi = ice_get_main_vsi(pf);
+ /* Setup DCB netlink interface */
+ ice_dcbnl_setup(vsi);
- params = ice_vsi_to_params(vsi);
- params.flags = ICE_VSI_FLAG_INIT;
+ err = ice_init_mac_fltr(pf);
+ if (err)
+ goto err_init_mac_fltr;
- rtnl_lock();
- err = ice_vsi_cfg(vsi, &params);
+ err = ice_devlink_create_pf_port(pf);
if (err)
- goto err_vsi_cfg;
+ goto err_devlink_create_pf_port;
+
+ SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port);
+
+ err = ice_register_netdev(vsi);
+ if (err)
+ goto err_register_netdev;
- err = ice_start_eth(ice_get_main_vsi(pf));
+ err = ice_tc_indir_block_register(vsi);
if (err)
- goto err_start_eth;
- rtnl_unlock();
+ goto err_tc_indir_block_register;
+
+ ice_napi_add(vsi);
err = ice_init_rdma(pf);
if (err)
@@ -5117,29 +5051,35 @@ int ice_load(struct ice_pf *pf)
return 0;
err_init_rdma:
- ice_vsi_close(ice_get_main_vsi(pf));
- rtnl_lock();
-err_start_eth:
- ice_vsi_decfg(ice_get_main_vsi(pf));
-err_vsi_cfg:
- rtnl_unlock();
- ice_deinit_dev(pf);
+ ice_tc_indir_block_unregister(vsi);
+err_tc_indir_block_register:
+ ice_unregister_netdev(vsi);
+err_register_netdev:
+ ice_devlink_destroy_pf_port(pf);
+err_devlink_create_pf_port:
+err_init_mac_fltr:
+ ice_decfg_netdev(vsi);
return err;
}
/**
* ice_unload - unload pf by stopping VSI and deinit hw
* @pf: pointer to the pf instance
+ *
+ * This function has to be called under devl_lock.
*/
void ice_unload(struct ice_pf *pf)
{
+ struct ice_vsi *vsi = ice_get_main_vsi(pf);
+
+ devl_assert_locked(priv_to_devlink(pf));
+
ice_deinit_features(pf);
ice_deinit_rdma(pf);
- rtnl_lock();
- ice_stop_eth(ice_get_main_vsi(pf));
- ice_vsi_decfg(ice_get_main_vsi(pf));
- rtnl_unlock();
- ice_deinit_dev(pf);
+ ice_tc_indir_block_unregister(vsi);
+ ice_unregister_netdev(vsi);
+ ice_devlink_destroy_pf_port(pf);
+ ice_decfg_netdev(vsi);
}
/**
@@ -5237,27 +5177,23 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
if (err)
goto err_init;
- err = ice_init_eth(pf);
- if (err)
- goto err_init_eth;
-
- err = ice_init_rdma(pf);
+ devl_lock(priv_to_devlink(pf));
+ err = ice_load(pf);
+ devl_unlock(priv_to_devlink(pf));
if (err)
- goto err_init_rdma;
+ goto err_load;
err = ice_init_devlink(pf);
if (err)
goto err_init_devlink;
- ice_init_features(pf);
-
return 0;
err_init_devlink:
- ice_deinit_rdma(pf);
-err_init_rdma:
- ice_deinit_eth(pf);
-err_init_eth:
+ devl_lock(priv_to_devlink(pf));
+ ice_unload(pf);
+ devl_unlock(priv_to_devlink(pf));
+err_load:
ice_deinit(pf);
err_init:
pci_disable_device(pdev);
@@ -5340,8 +5276,6 @@ static void ice_remove(struct pci_dev *pdev)
msleep(100);
}
- ice_debugfs_exit();
-
if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
set_bit(ICE_VF_RESETS_DISABLED, pf->state);
ice_free_vfs(pf);
@@ -5355,12 +5289,14 @@ static void ice_remove(struct pci_dev *pdev)
if (!ice_is_safe_mode(pf))
ice_remove_arfs(pf);
- ice_deinit_features(pf);
+
ice_deinit_devlink(pf);
- ice_deinit_rdma(pf);
- ice_deinit_eth(pf);
- ice_deinit(pf);
+ devl_lock(priv_to_devlink(pf));
+ ice_unload(pf);
+ devl_unlock(priv_to_devlink(pf));
+
+ ice_deinit(pf);
ice_vsi_release_all(pf);
ice_setup_mc_magic_wake(pf);
@@ -5447,6 +5383,7 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
if (ret)
goto err_reinit;
ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+ ice_vsi_set_napi_queues(pf->vsi[v]);
}
ret = ice_req_irq_msix_misc(pf);
@@ -5752,6 +5689,10 @@ static const struct pci_device_id ice_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE) },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP) },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT) },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_BACKPLANE), },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_QSFP), },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_SFP), },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_SGMII), },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_BACKPLANE) },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_QSFP56) },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_SFP) },
@@ -5841,6 +5782,7 @@ module_init(ice_module_init);
static void __exit ice_module_exit(void)
{
pci_unregister_driver(&ice_driver);
+ ice_debugfs_exit();
destroy_workqueue(ice_wq);
destroy_workqueue(ice_lag_wq);
pr_info("module unloaded\n");
@@ -6736,6 +6678,7 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
{
struct rtnl_link_stats64 *net_stats, *stats_prev;
struct rtnl_link_stats64 *vsi_stats;
+ struct ice_pf *pf = vsi->back;
u64 pkts, bytes;
int i;
@@ -6781,21 +6724,18 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
net_stats = &vsi->net_stats;
stats_prev = &vsi->net_stats_prev;
- /* clear prev counters after reset */
- if (vsi_stats->tx_packets < stats_prev->tx_packets ||
- vsi_stats->rx_packets < stats_prev->rx_packets) {
- stats_prev->tx_packets = 0;
- stats_prev->tx_bytes = 0;
- stats_prev->rx_packets = 0;
- stats_prev->rx_bytes = 0;
+ /* Update netdev counters, but keep in mind that values could start at
+ * random value after PF reset. And as we increase the reported stat by
+ * diff of Prev-Cur, we need to be sure that Prev is valid. If it's not,
+ * let's skip this round.
+ */
+ if (likely(pf->stat_prev_loaded)) {
+ net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets;
+ net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes;
+ net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets;
+ net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes;
}
- /* update netdev counters */
- net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets;
- net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes;
- net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets;
- net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes;
-
stats_prev->tx_packets = vsi_stats->tx_packets;
stats_prev->tx_bytes = vsi_stats->tx_bytes;
stats_prev->rx_packets = vsi_stats->rx_packets;
@@ -7060,6 +7000,50 @@ static void ice_napi_disable_all(struct ice_vsi *vsi)
}
/**
+ * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
+ * @vsi: the VSI being un-configured
+ */
+static void ice_vsi_dis_irq(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+ int i;
+
+ /* disable interrupt causation from each Rx queue; Tx queues are
+ * handled in ice_vsi_stop_tx_ring()
+ */
+ if (vsi->rx_rings) {
+ ice_for_each_rxq(vsi, i) {
+ if (vsi->rx_rings[i]) {
+ u16 reg;
+
+ reg = vsi->rx_rings[i]->reg_idx;
+ val = rd32(hw, QINT_RQCTL(reg));
+ val &= ~QINT_RQCTL_CAUSE_ENA_M;
+ wr32(hw, QINT_RQCTL(reg), val);
+ }
+ }
+ }
+
+ /* disable each interrupt */
+ ice_for_each_q_vector(vsi, i) {
+ if (!vsi->q_vectors[i])
+ continue;
+ wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0);
+ }
+
+ ice_flush(hw);
+
+ /* don't call synchronize_irq() for VF's from the host */
+ if (vsi->type == ICE_VSI_VF)
+ return;
+
+ ice_for_each_q_vector(vsi, i)
+ synchronize_irq(vsi->q_vectors[i]->irq.virq);
+}
+
+/**
* ice_down - Shutdown the connection
* @vsi: The VSI being stopped
*
@@ -7548,7 +7532,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
* fail.
*/
if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
- ice_ptp_reset(pf);
+ ice_ptp_rebuild(pf, reset_type);
if (ice_is_feature_supported(pf, ICE_F_GNSS))
ice_gnss_init(pf);
@@ -8012,6 +7996,8 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
pf_sw = pf->first_sw;
/* find the attribute in the netlink message */
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+ if (!br_spec)
+ return -EINVAL;
nla_for_each_nested(attr, br_spec, rem) {
__u16 mode;
diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h
index 82bc54fec7f3..a2562f04267f 100644
--- a/drivers/net/ethernet/intel/ice/ice_osdep.h
+++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
@@ -24,7 +24,7 @@
#define rd64(a, reg) readq((a)->hw_addr + (reg))
#define ice_flush(a) rd32((a), GLGEN_STAT)
-#define ICE_M(m, s) ((m) << (s))
+#define ICE_M(m, s) ((m ## U) << (s))
struct ice_dma_mem {
void *va;
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 3b6605c8585e..c11eba07283c 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -601,17 +601,13 @@ void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx)
/* Read the low 32 bit value */
raw_tstamp |= (u64)rd32(&pf->hw, PF_SB_ATQBAH);
- /* For PHYs which don't implement a proper timestamp ready bitmap,
- * verify that the timestamp value is different from the last cached
- * timestamp. If it is not, skip this for now assuming it hasn't yet
- * been captured by hardware.
+ /* Devices using this interface always verify the timestamp differs
+ * relative to the last cached timestamp value.
*/
- if (!drop_ts && tx->verify_cached &&
- raw_tstamp == tx->tstamps[idx].cached_tstamp)
+ if (raw_tstamp == tx->tstamps[idx].cached_tstamp)
return;
- if (tx->verify_cached && raw_tstamp)
- tx->tstamps[idx].cached_tstamp = raw_tstamp;
+ tx->tstamps[idx].cached_tstamp = raw_tstamp;
clear_bit(idx, tx->in_use);
skb = tx->tstamps[idx].skb;
tx->tstamps[idx].skb = NULL;
@@ -701,9 +697,11 @@ static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx)
hw = &pf->hw;
/* Read the Tx ready status first */
- err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready);
- if (err)
- return;
+ if (tx->has_ready_bitmap) {
+ err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready);
+ if (err)
+ return;
+ }
/* Drop packets if the link went down */
link_up = ptp_port->link_up;
@@ -731,7 +729,8 @@ static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx)
* If we do not, the hardware logic for generating a new
* interrupt can get stuck on some devices.
*/
- if (!(tstamp_ready & BIT_ULL(phy_idx))) {
+ if (tx->has_ready_bitmap &&
+ !(tstamp_ready & BIT_ULL(phy_idx))) {
if (drop_ts)
goto skip_ts_read;
@@ -751,7 +750,7 @@ static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx)
* from the last cached timestamp. If it is not, skip this for
* now assuming it hasn't yet been captured by hardware.
*/
- if (!drop_ts && tx->verify_cached &&
+ if (!drop_ts && !tx->has_ready_bitmap &&
raw_tstamp == tx->tstamps[idx].cached_tstamp)
continue;
@@ -761,7 +760,7 @@ static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx)
skip_ts_read:
spin_lock_irqsave(&tx->lock, flags);
- if (tx->verify_cached && raw_tstamp)
+ if (!tx->has_ready_bitmap && raw_tstamp)
tx->tstamps[idx].cached_tstamp = raw_tstamp;
clear_bit(idx, tx->in_use);
skb = tx->tstamps[idx].skb;
@@ -965,6 +964,22 @@ ice_ptp_mark_tx_tracker_stale(struct ice_ptp_tx *tx)
}
/**
+ * ice_ptp_flush_all_tx_tracker - Flush all timestamp trackers on this clock
+ * @pf: Board private structure
+ *
+ * Called by the clock owner to flush all the Tx timestamp trackers associated
+ * with the clock.
+ */
+static void
+ice_ptp_flush_all_tx_tracker(struct ice_pf *pf)
+{
+ struct ice_ptp_port *port;
+
+ list_for_each_entry(port, &pf->ptp.ports_owner.ports, list_member)
+ ice_ptp_flush_tx_tracker(ptp_port_to_pf(port), &port->tx);
+}
+
+/**
* ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker
* @pf: Board private structure
* @tx: Tx tracking structure to release
@@ -1014,7 +1029,7 @@ ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
tx->block = port / ICE_PORTS_PER_QUAD;
tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E82X;
tx->len = INDEX_PER_PORT_E82X;
- tx->verify_cached = 0;
+ tx->has_ready_bitmap = 1;
return ice_ptp_alloc_tx_tracker(tx);
}
@@ -1037,7 +1052,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
* verify new timestamps against cached copy of the last read
* timestamp.
*/
- tx->verify_cached = 1;
+ tx->has_ready_bitmap = 0;
return ice_ptp_alloc_tx_tracker(tx);
}
@@ -1430,7 +1445,7 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
struct ice_ptp_port *ptp_port;
struct ice_hw *hw = &pf->hw;
- if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ if (pf->ptp.state != ICE_PTP_READY)
return;
if (WARN_ON_ONCE(port >= ICE_NUM_EXTERNAL_PORTS))
@@ -1456,14 +1471,14 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
}
/**
- * ice_ptp_tx_ena_intr - Enable or disable the Tx timestamp interrupt
+ * ice_ptp_cfg_phy_interrupt - Configure PHY interrupt settings
* @pf: PF private structure
* @ena: bool value to enable or disable interrupt
* @threshold: Minimum number of packets at which intr is triggered
*
* Utility function to enable or disable Tx timestamp interrupt and threshold
*/
-static int ice_ptp_tx_ena_intr(struct ice_pf *pf, bool ena, u32 threshold)
+static int ice_ptp_cfg_phy_interrupt(struct ice_pf *pf, bool ena, u32 threshold)
{
struct ice_hw *hw = &pf->hw;
int err = 0;
@@ -2162,7 +2177,7 @@ int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr)
{
struct hwtstamp_config *config;
- if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ if (pf->ptp.state != ICE_PTP_READY)
return -EIO;
config = &pf->ptp.tstamp_config;
@@ -2232,7 +2247,7 @@ int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
struct hwtstamp_config config;
int err;
- if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ if (pf->ptp.state != ICE_PTP_READY)
return -EAGAIN;
if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
@@ -2616,7 +2631,7 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp);
int err;
- if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ if (pf->ptp.state != ICE_PTP_READY)
return;
err = ice_ptp_update_cached_phctime(pf);
@@ -2629,36 +2644,72 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
}
/**
- * ice_ptp_reset - Initialize PTP hardware clock support after reset
+ * ice_ptp_prepare_for_reset - Prepare PTP for reset
+ * @pf: Board private structure
+ * @reset_type: the reset type being performed
+ */
+void ice_ptp_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+ struct ice_ptp *ptp = &pf->ptp;
+ u8 src_tmr;
+
+ if (ptp->state != ICE_PTP_READY)
+ return;
+
+ ptp->state = ICE_PTP_RESETTING;
+
+ /* Disable timestamping for both Tx and Rx */
+ ice_ptp_disable_timestamp_mode(pf);
+
+ kthread_cancel_delayed_work_sync(&ptp->work);
+
+ if (reset_type == ICE_RESET_PFR)
+ return;
+
+ ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
+
+ /* Disable periodic outputs */
+ ice_ptp_disable_all_clkout(pf);
+
+ src_tmr = ice_get_ptp_src_clock_index(&pf->hw);
+
+ /* Disable source clock */
+ wr32(&pf->hw, GLTSYN_ENA(src_tmr), (u32)~GLTSYN_ENA_TSYN_ENA_M);
+
+ /* Acquire PHC and system timer to restore after reset */
+ ptp->reset_time = ktime_get_real_ns();
+}
+
+/**
+ * ice_ptp_rebuild_owner - Initialize PTP clock owner after reset
* @pf: Board private structure
+ *
+ * Companion function for ice_ptp_rebuild() which handles tasks that only the
+ * PTP clock owner instance should perform.
*/
-void ice_ptp_reset(struct ice_pf *pf)
+static int ice_ptp_rebuild_owner(struct ice_pf *pf)
{
struct ice_ptp *ptp = &pf->ptp;
struct ice_hw *hw = &pf->hw;
struct timespec64 ts;
- int err, itr = 1;
u64 time_diff;
-
- if (test_bit(ICE_PFR_REQ, pf->state) ||
- !ice_pf_src_tmr_owned(pf))
- goto pfr;
+ int err;
err = ice_ptp_init_phc(hw);
if (err)
- goto err;
+ return err;
/* Acquire the global hardware lock */
if (!ice_ptp_lock(hw)) {
err = -EBUSY;
- goto err;
+ return err;
}
/* Write the increment time value to PHY and LAN */
err = ice_ptp_write_incval(hw, ice_base_incval(pf));
if (err) {
ice_ptp_unlock(hw);
- goto err;
+ return err;
}
/* Write the initial Time value to PHY and LAN using the cached PHC
@@ -2674,38 +2725,54 @@ void ice_ptp_reset(struct ice_pf *pf)
err = ice_ptp_write_init(pf, &ts);
if (err) {
ice_ptp_unlock(hw);
- goto err;
+ return err;
}
/* Release the global hardware lock */
ice_ptp_unlock(hw);
+ /* Flush software tracking of any outstanding timestamps since we're
+ * about to flush the PHY timestamp block.
+ */
+ ice_ptp_flush_all_tx_tracker(pf);
+
if (!ice_is_e810(hw)) {
/* Enable quad interrupts */
- err = ice_ptp_tx_ena_intr(pf, true, itr);
+ err = ice_ptp_cfg_phy_interrupt(pf, true, 1);
if (err)
- goto err;
- }
+ return err;
-pfr:
- /* Init Tx structures */
- if (ice_is_e810(&pf->hw)) {
- err = ice_ptp_init_tx_e810(pf, &ptp->port.tx);
- } else {
- kthread_init_delayed_work(&ptp->port.ov_work,
- ice_ptp_wait_for_offsets);
- err = ice_ptp_init_tx_e82x(pf, &ptp->port.tx,
- ptp->port.port_num);
+ ice_ptp_restart_all_phy(pf);
}
- if (err)
+
+ return 0;
+}
+
+/**
+ * ice_ptp_rebuild - Initialize PTP hardware clock support after reset
+ * @pf: Board private structure
+ * @reset_type: the reset type being performed
+ */
+void ice_ptp_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+ struct ice_ptp *ptp = &pf->ptp;
+ int err;
+
+ if (ptp->state == ICE_PTP_READY) {
+ ice_ptp_prepare_for_reset(pf, reset_type);
+ } else if (ptp->state != ICE_PTP_RESETTING) {
+ err = -EINVAL;
+ dev_err(ice_pf_to_dev(pf), "PTP was not initialized\n");
goto err;
+ }
- set_bit(ICE_FLAG_PTP, pf->flags);
+ if (ice_pf_src_tmr_owned(pf) && reset_type != ICE_RESET_PFR) {
+ err = ice_ptp_rebuild_owner(pf);
+ if (err)
+ goto err;
+ }
- /* Restart the PHY timestamping block */
- if (!test_bit(ICE_PFR_REQ, pf->state) &&
- ice_pf_src_tmr_owned(pf))
- ice_ptp_restart_all_phy(pf);
+ ptp->state = ICE_PTP_READY;
/* Start periodic work going */
kthread_queue_delayed_work(ptp->kworker, &ptp->work, 0);
@@ -2714,6 +2781,7 @@ pfr:
return;
err:
+ ptp->state = ICE_PTP_ERROR;
dev_err(ice_pf_to_dev(pf), "PTP reset failed %d\n", err);
}
@@ -2923,39 +2991,6 @@ int ice_ptp_clock_index(struct ice_pf *pf)
}
/**
- * ice_ptp_prepare_for_reset - Prepare PTP for reset
- * @pf: Board private structure
- */
-void ice_ptp_prepare_for_reset(struct ice_pf *pf)
-{
- struct ice_ptp *ptp = &pf->ptp;
- u8 src_tmr;
-
- clear_bit(ICE_FLAG_PTP, pf->flags);
-
- /* Disable timestamping for both Tx and Rx */
- ice_ptp_disable_timestamp_mode(pf);
-
- kthread_cancel_delayed_work_sync(&ptp->work);
-
- if (test_bit(ICE_PFR_REQ, pf->state))
- return;
-
- ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
-
- /* Disable periodic outputs */
- ice_ptp_disable_all_clkout(pf);
-
- src_tmr = ice_get_ptp_src_clock_index(&pf->hw);
-
- /* Disable source clock */
- wr32(&pf->hw, GLTSYN_ENA(src_tmr), (u32)~GLTSYN_ENA_TSYN_ENA_M);
-
- /* Acquire PHC and system timer to restore after reset */
- ptp->reset_time = ktime_get_real_ns();
-}
-
-/**
* ice_ptp_init_owner - Initialize PTP_1588_CLOCK device
* @pf: Board private structure
*
@@ -2967,7 +3002,7 @@ static int ice_ptp_init_owner(struct ice_pf *pf)
{
struct ice_hw *hw = &pf->hw;
struct timespec64 ts;
- int err, itr = 1;
+ int err;
err = ice_ptp_init_phc(hw);
if (err) {
@@ -3002,7 +3037,7 @@ static int ice_ptp_init_owner(struct ice_pf *pf)
if (!ice_is_e810(hw)) {
/* Enable quad interrupts */
- err = ice_ptp_tx_ena_intr(pf, true, itr);
+ err = ice_ptp_cfg_phy_interrupt(pf, true, 1);
if (err)
goto err_exit;
}
@@ -3195,6 +3230,8 @@ void ice_ptp_init(struct ice_pf *pf)
struct ice_hw *hw = &pf->hw;
int err;
+ ptp->state = ICE_PTP_INITIALIZING;
+
ice_ptp_init_phy_model(hw);
ice_ptp_init_tx_interrupt_mode(pf);
@@ -3219,12 +3256,13 @@ void ice_ptp_init(struct ice_pf *pf)
/* Configure initial Tx interrupt settings */
ice_ptp_cfg_tx_interrupt(pf);
- set_bit(ICE_FLAG_PTP, pf->flags);
- err = ice_ptp_init_work(pf, ptp);
+ err = ice_ptp_create_auxbus_device(pf);
if (err)
goto err;
- err = ice_ptp_create_auxbus_device(pf);
+ ptp->state = ICE_PTP_READY;
+
+ err = ice_ptp_init_work(pf, ptp);
if (err)
goto err;
@@ -3237,7 +3275,7 @@ err:
ptp_clock_unregister(ptp->clock);
pf->ptp.clock = NULL;
}
- clear_bit(ICE_FLAG_PTP, pf->flags);
+ ptp->state = ICE_PTP_ERROR;
dev_err(ice_pf_to_dev(pf), "PTP failed %d\n", err);
}
@@ -3250,9 +3288,11 @@ err:
*/
void ice_ptp_release(struct ice_pf *pf)
{
- if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ if (pf->ptp.state != ICE_PTP_READY)
return;
+ pf->ptp.state = ICE_PTP_UNINIT;
+
/* Disable timestamping for both Tx and Rx */
ice_ptp_disable_timestamp_mode(pf);
@@ -3260,8 +3300,6 @@ void ice_ptp_release(struct ice_pf *pf)
ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
- clear_bit(ICE_FLAG_PTP, pf->flags);
-
kthread_cancel_delayed_work_sync(&pf->ptp.work);
ice_ptp_port_phy_stop(&pf->ptp.port);
@@ -3271,6 +3309,9 @@ void ice_ptp_release(struct ice_pf *pf)
pf->ptp.kworker = NULL;
}
+ if (ice_pf_src_tmr_owned(pf))
+ ice_ptp_unregister_auxbus_driver(pf);
+
if (!pf->ptp.clock)
return;
@@ -3280,7 +3321,5 @@ void ice_ptp_release(struct ice_pf *pf)
ptp_clock_unregister(pf->ptp.clock);
pf->ptp.clock = NULL;
- ice_ptp_unregister_auxbus_driver(pf);
-
dev_info(ice_pf_to_dev(pf), "Removed PTP clock\n");
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index 087dd32d8762..3af20025043a 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -100,7 +100,7 @@ struct ice_perout_channel {
* the last timestamp we read for a given index. If the current timestamp
* value is the same as the cached value, we assume a new timestamp hasn't
* been captured. This avoids reporting stale timestamps to the stack. This is
- * only done if the verify_cached flag is set in ice_ptp_tx structure.
+ * only done if the has_ready_bitmap flag is not set in ice_ptp_tx structure.
*/
struct ice_tx_tstamp {
struct sk_buff *skb;
@@ -130,7 +130,9 @@ enum ice_tx_tstamp_work {
* @init: if true, the tracker is initialized;
* @calibrating: if true, the PHY is calibrating the Tx offset. During this
* window, timestamps are temporarily disabled.
- * @verify_cached: if true, verify new timestamp differs from last read value
+ * @has_ready_bitmap: if true, the hardware has a valid Tx timestamp ready
+ * bitmap register. If false, fall back to verifying new
+ * timestamp values against previously cached copy.
* @last_ll_ts_idx_read: index of the last LL TS read by the FW
*/
struct ice_ptp_tx {
@@ -143,7 +145,7 @@ struct ice_ptp_tx {
u8 len;
u8 init : 1;
u8 calibrating : 1;
- u8 verify_cached : 1;
+ u8 has_ready_bitmap : 1;
s8 last_ll_ts_idx_read;
};
@@ -203,8 +205,17 @@ struct ice_ptp_port_owner {
#define GLTSYN_TGT_H_IDX_MAX 4
+enum ice_ptp_state {
+ ICE_PTP_UNINIT = 0,
+ ICE_PTP_INITIALIZING,
+ ICE_PTP_READY,
+ ICE_PTP_RESETTING,
+ ICE_PTP_ERROR,
+};
+
/**
* struct ice_ptp - data used for integrating with CONFIG_PTP_1588_CLOCK
+ * @state: current state of PTP state machine
* @tx_interrupt_mode: the TX interrupt mode for the PTP clock
* @port: data for the PHY port initialization procedure
* @ports_owner: data for the auxiliary driver owner
@@ -227,6 +238,7 @@ struct ice_ptp_port_owner {
* @late_cached_phc_updates: number of times cached PHC update is late
*/
struct ice_ptp {
+ enum ice_ptp_state state;
enum ice_ptp_tx_interrupt tx_interrupt_mode;
struct ice_ptp_port port;
struct ice_ptp_port_owner ports_owner;
@@ -304,8 +316,9 @@ enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf);
u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc,
const struct ice_pkt_ctx *pkt_ctx);
-void ice_ptp_reset(struct ice_pf *pf);
-void ice_ptp_prepare_for_reset(struct ice_pf *pf);
+void ice_ptp_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
+void ice_ptp_prepare_for_reset(struct ice_pf *pf,
+ enum ice_reset_req reset_type);
void ice_ptp_init(struct ice_pf *pf);
void ice_ptp_release(struct ice_pf *pf);
void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup);
@@ -345,8 +358,15 @@ ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc,
return 0;
}
-static inline void ice_ptp_reset(struct ice_pf *pf) { }
-static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { }
+static inline void ice_ptp_rebuild(struct ice_pf *pf,
+ enum ice_reset_req reset_type)
+{
+}
+
+static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf,
+ enum ice_reset_req reset_type)
+{
+}
static inline void ice_ptp_init(struct ice_pf *pf) { }
static inline void ice_ptp_release(struct ice_pf *pf) { }
static inline void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index a94a1c48c3de..a958fcf3e6be 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -240,7 +240,6 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf)
}
vf->lan_vsi_idx = vsi->idx;
- vf->lan_vsi_num = vsi->vsi_num;
return vsi;
}
@@ -1068,6 +1067,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count)
struct ice_pf *pf = pci_get_drvdata(pdev);
u16 prev_msix, prev_queues, queues;
bool needs_rebuild = false;
+ struct ice_vsi *vsi;
struct ice_vf *vf;
int id;
@@ -1102,6 +1102,10 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count)
if (!vf)
return -ENOENT;
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi)
+ return -ENOENT;
+
prev_msix = vf->num_msix;
prev_queues = vf->num_vf_qs;
@@ -1122,7 +1126,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count)
if (vf->first_vector_idx < 0)
goto unroll;
- if (ice_vf_reconfig_vsi(vf)) {
+ if (ice_vf_reconfig_vsi(vf) || ice_vf_init_host_cfg(vf, vsi)) {
/* Try to rebuild with previous values */
needs_rebuild = true;
goto unroll;
@@ -1148,8 +1152,10 @@ unroll:
if (vf->first_vector_idx < 0)
return -EINVAL;
- if (needs_rebuild)
+ if (needs_rebuild) {
ice_vf_reconfig_vsi(vf);
+ ice_vf_init_host_cfg(vf, vsi);
+ }
ice_ena_vf_mappings(vf);
ice_put_vf(vf);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 74d13cc5a3a7..97d41d6ebf1f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -513,11 +513,6 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
if (ice_is_xdp_ena_vsi(rx_ring->vsi))
WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
- if (rx_ring->vsi->type == ICE_VSI_PF &&
- !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
- if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
- rx_ring->q_index, rx_ring->q_vector->napi.napi_id))
- goto err;
return 0;
err:
@@ -603,9 +598,7 @@ out_failure:
ret = ICE_XDP_CONSUMED;
}
exit:
- rx_buf->act = ret;
- if (unlikely(xdp_buff_has_frags(xdp)))
- ice_set_rx_bufs_act(xdp, rx_ring, ret);
+ ice_set_rx_bufs_act(xdp, rx_ring, ret);
}
/**
@@ -893,14 +886,17 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
}
if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
- if (unlikely(xdp_buff_has_frags(xdp)))
- ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
+ ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
return -ENOMEM;
}
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
rx_buf->page_offset, size);
sinfo->xdp_frags_size += size;
+ /* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
+ * can pop off frags but driver has to handle it on its own
+ */
+ rx_ring->nr_frags = sinfo->nr_frags;
if (page_is_pfmemalloc(rx_buf->page))
xdp_buff_set_frag_pfmemalloc(xdp);
@@ -1251,6 +1247,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
xdp->data = NULL;
rx_ring->first_desc = ntc;
+ rx_ring->nr_frags = 0;
continue;
construct_skb:
if (likely(ice_ring_uses_build_skb(rx_ring)))
@@ -1266,10 +1263,12 @@ construct_skb:
ICE_XDP_CONSUMED);
xdp->data = NULL;
rx_ring->first_desc = ntc;
+ rx_ring->nr_frags = 0;
break;
}
xdp->data = NULL;
rx_ring->first_desc = ntc;
+ rx_ring->nr_frags = 0;
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index b3379ff73674..af955b0e5dc5 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -358,6 +358,7 @@ struct ice_rx_ring {
struct ice_tx_ring *xdp_ring;
struct ice_rx_ring *next; /* pointer to next ring in q_vector */
struct xsk_buff_pool *xsk_pool;
+ u32 nr_frags;
dma_addr_t dma; /* physical address of ring */
u16 rx_buf_len;
u8 dcb_tc; /* Traffic class of ring */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 839e5da24ad5..f8f1d2bdc1be 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -143,8 +143,12 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb,
ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
(decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
- if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
- BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+ if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) {
+ ring->vsi->back->hw_rx_eipe_error++;
+ return;
+ }
+
+ if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S))))
goto checksum_fail;
if (ipv6 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index 762047508619..afcead4baef4 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -12,26 +12,39 @@
* act: action to store onto Rx buffers related to XDP buffer parts
*
* Set action that should be taken before putting Rx buffer from first frag
- * to one before last. Last one is handled by caller of this function as it
- * is the EOP frag that is currently being processed. This function is
- * supposed to be called only when XDP buffer contains frags.
+ * to the last.
*/
static inline void
ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
const unsigned int act)
{
- const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
- u32 first = rx_ring->first_desc;
- u32 nr_frags = sinfo->nr_frags;
+ u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+ u32 nr_frags = rx_ring->nr_frags + 1;
+ u32 idx = rx_ring->first_desc;
u32 cnt = rx_ring->count;
struct ice_rx_buf *buf;
for (int i = 0; i < nr_frags; i++) {
- buf = &rx_ring->rx_buf[first];
+ buf = &rx_ring->rx_buf[idx];
buf->act = act;
- if (++first == cnt)
- first = 0;
+ if (++idx == cnt)
+ idx = 0;
+ }
+
+ /* adjust pagecnt_bias on frags freed by XDP prog */
+ if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
+ u32 delta = rx_ring->nr_frags - sinfo_frags;
+
+ while (delta) {
+ if (idx == 0)
+ idx = cnt - 1;
+ else
+ idx--;
+ buf = &rx_ring->rx_buf[idx];
+ buf->pagecnt_bias--;
+ delta--;
+ }
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 41ab6d7bbd9e..9ff92dba5823 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -132,6 +132,7 @@ enum ice_mac_type {
ICE_MAC_E810,
ICE_MAC_E830,
ICE_MAC_GENERIC,
+ ICE_MAC_GENERIC_3K_E825,
};
/* Media Types */
@@ -1072,7 +1073,7 @@ struct ice_aq_get_set_rss_lut_params {
#define ICE_OROM_VER_BUILD_SHIFT 8
#define ICE_OROM_VER_BUILD_MASK (0xffff << ICE_OROM_VER_BUILD_SHIFT)
#define ICE_OROM_VER_SHIFT 24
-#define ICE_OROM_VER_MASK (0xff << ICE_OROM_VER_SHIFT)
+#define ICE_OROM_VER_MASK (0xffU << ICE_OROM_VER_SHIFT)
#define ICE_SR_PFA_PTR 0x40
#define ICE_SR_1ST_NVM_BANK_PTR 0x42
#define ICE_SR_NVM_BANK_SIZE 0x43
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index 2ffdae9a82df..21d26e19338a 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -280,12 +280,6 @@ int ice_vf_reconfig_vsi(struct ice_vf *vf)
return err;
}
- /* Update the lan_vsi_num field since it might have been changed. The
- * PF lan_vsi_idx number remains the same so we don't need to change
- * that.
- */
- vf->lan_vsi_num = vsi->vsi_num;
-
return 0;
}
@@ -315,7 +309,6 @@ static int ice_vf_rebuild_vsi(struct ice_vf *vf)
* vf->lan_vsi_idx
*/
vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
- vf->lan_vsi_num = vsi->vsi_num;
return 0;
}
@@ -1315,13 +1308,12 @@ int ice_vf_init_host_cfg(struct ice_vf *vf, struct ice_vsi *vsi)
}
/**
- * ice_vf_invalidate_vsi - invalidate vsi_idx/vsi_num to remove VSI access
+ * ice_vf_invalidate_vsi - invalidate vsi_idx to remove VSI access
* @vf: VF to remove access to VSI for
*/
void ice_vf_invalidate_vsi(struct ice_vf *vf)
{
vf->lan_vsi_idx = ICE_NO_VSI;
- vf->lan_vsi_num = ICE_NO_VSI;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index 0cc9034065c5..fec16919ec19 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -109,11 +109,6 @@ struct ice_vf {
u8 spoofchk:1;
u8 link_forced:1;
u8 link_up:1; /* only valid if VF link is forced */
- /* VSI indices - actual VSI pointers are maintained in the PF structure
- * When assigned, these will be non-zero, because VSI 0 is always
- * the main LAN VSI for the PF.
- */
- u16 lan_vsi_num; /* ID as used by firmware */
unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */
unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */
DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index c925813ec9ca..1ff9818b4c84 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -440,7 +440,6 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
vf->driver_caps = *(u32 *)msg;
else
vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 |
- VIRTCHNL_VF_OFFLOAD_RSS_REG |
VIRTCHNL_VF_OFFLOAD_VLAN;
vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2;
@@ -453,14 +452,8 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi,
vf->driver_caps);
- if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
- } else {
- if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ)
- vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
- else
- vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
- }
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
@@ -506,7 +499,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
vfres->rss_lut_size = ICE_LUT_VSI_SIZE;
vfres->max_mtu = ice_vc_get_max_frame_size(vf);
- vfres->vsi_res[0].vsi_id = vf->lan_vsi_num;
+ vfres->vsi_res[0].vsi_id = ICE_VF_VSI_ID;
vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
vfres->vsi_res[0].num_queue_pairs = vsi->num_txq;
ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
@@ -552,27 +545,20 @@ static void ice_vc_reset_vf_msg(struct ice_vf *vf)
*/
bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
{
- struct ice_pf *pf = vf->pf;
- struct ice_vsi *vsi;
-
- vsi = ice_find_vsi(pf, vsi_id);
-
- return (vsi && (vsi->vf == vf));
+ return vsi_id == ICE_VF_VSI_ID;
}
/**
* ice_vc_isvalid_q_id
- * @vf: pointer to the VF info
- * @vsi_id: VSI ID
+ * @vsi: VSI to check queue ID against
* @qid: VSI relative queue ID
*
* check for the valid queue ID
*/
-static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid)
+static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u8 qid)
{
- struct ice_vsi *vsi = ice_find_vsi(vf->pf, vsi_id);
/* allocated Tx and Rx queues should be always equal for VF VSI */
- return (vsi && (qid < vsi->alloc_txq));
+ return qid < vsi->alloc_txq;
}
/**
@@ -1330,7 +1316,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
*/
q_map = vqs->rx_queues;
for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1352,7 +1338,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
q_map = vqs->tx_queues;
for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1457,7 +1443,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
q_map = vqs->tx_queues;
for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1483,7 +1469,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
} else if (q_map) {
for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1539,7 +1525,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id,
for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
vsi_q_id = vsi_q_id_idx;
- if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
+ if (!ice_vc_isvalid_q_id(vsi, vsi_q_id))
return VIRTCHNL_STATUS_ERR_PARAM;
q_vector->num_ring_rx++;
@@ -1553,7 +1539,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id,
for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
vsi_q_id = vsi_q_id_idx;
- if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
+ if (!ice_vc_isvalid_q_id(vsi, vsi_q_id))
return VIRTCHNL_STATUS_ERR_PARAM;
q_vector->num_ring_tx++;
@@ -1710,7 +1696,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
qpi->txq.headwb_enabled ||
!ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
!ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
- !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
+ !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) {
goto error_param;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h
index 60dfbe05980a..3a4115869153 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h
@@ -19,6 +19,15 @@
#define ICE_MAX_MACADDR_PER_VF 18
#define ICE_FLEX_DESC_RXDID_MAX_NUM 64
+/* VFs only get a single VSI. For ice hardware, the VF does not need to know
+ * its VSI index. However, the virtchnl interface requires a VSI number,
+ * mainly due to legacy hardware.
+ *
+ * Since the VF doesn't need this information, report a static value to the VF
+ * instead of leaking any information about the PF or hardware setup.
+ */
+#define ICE_VF_VSI_ID 1
+
struct ice_virtchnl_ops {
int (*get_ver_msg)(struct ice_vf *vf, u8 *msg);
int (*get_vf_res_msg)(struct ice_vf *vf, u8 *msg);
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
index 5e19d48a05b4..d796dbd2a440 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
@@ -13,8 +13,6 @@
* - opcodes needed by VF when caps are activated
*
* Caps that don't use new opcodes (no opcodes should be allowed):
- * - VIRTCHNL_VF_OFFLOAD_RSS_AQ
- * - VIRTCHNL_VF_OFFLOAD_RSS_REG
* - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR
* - VIRTCHNL_VF_OFFLOAD_CRC
* - VIRTCHNL_VF_OFFLOAD_RX_POLLING
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index f001553e1a1a..8e4ff3af86c6 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -94,9 +94,6 @@ ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id)
if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF))
return -EINVAL;
- if (vsi_id != vf->lan_vsi_num)
- return -EINVAL;
-
if (!ice_vc_isvalid_vsi_id(vf, vsi_id))
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 5d1ae8e4058a..1857220d27fe 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -179,6 +179,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
return -EBUSY;
usleep_range(1000, 2000);
}
+
+ ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+ ice_qvec_toggle_napi(vsi, q_vector, false);
+
netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
@@ -195,13 +199,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
if (err)
return err;
}
- ice_qvec_dis_irq(vsi, rx_ring, q_vector);
-
err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
if (err)
return err;
- ice_qvec_toggle_napi(vsi, q_vector, false);
ice_qp_clean_rings(vsi, q_idx);
ice_qp_reset_stats(vsi, q_idx);
@@ -217,53 +218,39 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
*/
static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
{
- DEFINE_FLEX(struct ice_aqc_add_tx_qgrp, qg_buf, txqs, 1);
- u16 size = __struct_size(qg_buf);
struct ice_q_vector *q_vector;
- struct ice_tx_ring *tx_ring;
- struct ice_rx_ring *rx_ring;
int err;
- if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
- return -EINVAL;
-
- qg_buf->num_txqs = 1;
-
- tx_ring = vsi->tx_rings[q_idx];
- rx_ring = vsi->rx_rings[q_idx];
- q_vector = rx_ring->q_vector;
-
- err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
+ err = ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx);
if (err)
return err;
if (ice_is_xdp_ena_vsi(vsi)) {
struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
- memset(qg_buf, 0, size);
- qg_buf->num_txqs = 1;
- err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
+ err = ice_vsi_cfg_single_txq(vsi, vsi->xdp_rings, q_idx);
if (err)
return err;
ice_set_ring_xdp(xdp_ring);
ice_tx_xsk_pool(vsi, q_idx);
}
- err = ice_vsi_cfg_rxq(rx_ring);
+ err = ice_vsi_cfg_single_rxq(vsi, q_idx);
if (err)
return err;
+ q_vector = vsi->rx_rings[q_idx]->q_vector;
ice_qvec_cfg_msix(vsi, q_vector);
err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true);
if (err)
return err;
- clear_bit(ICE_CFG_BUSY, vsi->state);
ice_qvec_toggle_napi(vsi, q_vector, true);
ice_qvec_ena_irq(vsi, q_vector);
netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+ clear_bit(ICE_CFG_BUSY, vsi->state);
return 0;
}
@@ -825,7 +812,8 @@ ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first,
}
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
- virt_to_page(xdp->data_hard_start), 0, size);
+ virt_to_page(xdp->data_hard_start),
+ XDP_PACKET_HEADROOM, size);
sinfo->xdp_frags_size += size;
xsk_buff_add_frag(xdp);
@@ -895,7 +883,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
if (!first) {
first = xdp;
- xdp_buff_clear_frags_flag(first);
} else if (ice_add_xsk_frag(rx_ring, first, xdp, size)) {
break;
}
diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
index 0acc125decb3..e7a036538246 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -37,8 +37,6 @@ struct idpf_vport_max_q;
#define IDPF_MB_MAX_ERR 20
#define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz) \
((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz))
-#define IDPF_WAIT_FOR_EVENT_TIMEO_MIN 2000
-#define IDPF_WAIT_FOR_EVENT_TIMEO 60000
#define IDPF_MAX_WAIT 500
@@ -66,14 +64,12 @@ struct idpf_mac_filter {
/**
* enum idpf_state - State machine to handle bring up
- * @__IDPF_STARTUP: Start the state machine
* @__IDPF_VER_CHECK: Negotiate virtchnl version
* @__IDPF_GET_CAPS: Negotiate capabilities
* @__IDPF_INIT_SW: Init based on given capabilities
* @__IDPF_STATE_LAST: Must be last, used to determine size
*/
enum idpf_state {
- __IDPF_STARTUP,
__IDPF_VER_CHECK,
__IDPF_GET_CAPS,
__IDPF_INIT_SW,
@@ -87,6 +83,7 @@ enum idpf_state {
* @IDPF_HR_RESET_IN_PROG: Reset in progress
* @IDPF_REMOVE_IN_PROG: Driver remove in progress
* @IDPF_MB_INTR_MODE: Mailbox in interrupt mode
+ * @IDPF_VC_CORE_INIT: virtchnl core has been init
* @IDPF_FLAGS_NBITS: Must be last
*/
enum idpf_flags {
@@ -95,6 +92,7 @@ enum idpf_flags {
IDPF_HR_RESET_IN_PROG,
IDPF_REMOVE_IN_PROG,
IDPF_MB_INTR_MODE,
+ IDPF_VC_CORE_INIT,
IDPF_FLAGS_NBITS,
};
@@ -209,71 +207,6 @@ struct idpf_dev_ops {
struct idpf_reg_ops reg_ops;
};
-/* These macros allow us to generate an enum and a matching char * array of
- * stringified enums that are always in sync. Checkpatch issues a bogus warning
- * about this being a complex macro; but it's wrong, these are never used as a
- * statement and instead only used to define the enum and array.
- */
-#define IDPF_FOREACH_VPORT_VC_STATE(STATE) \
- STATE(IDPF_VC_CREATE_VPORT) \
- STATE(IDPF_VC_CREATE_VPORT_ERR) \
- STATE(IDPF_VC_ENA_VPORT) \
- STATE(IDPF_VC_ENA_VPORT_ERR) \
- STATE(IDPF_VC_DIS_VPORT) \
- STATE(IDPF_VC_DIS_VPORT_ERR) \
- STATE(IDPF_VC_DESTROY_VPORT) \
- STATE(IDPF_VC_DESTROY_VPORT_ERR) \
- STATE(IDPF_VC_CONFIG_TXQ) \
- STATE(IDPF_VC_CONFIG_TXQ_ERR) \
- STATE(IDPF_VC_CONFIG_RXQ) \
- STATE(IDPF_VC_CONFIG_RXQ_ERR) \
- STATE(IDPF_VC_ENA_QUEUES) \
- STATE(IDPF_VC_ENA_QUEUES_ERR) \
- STATE(IDPF_VC_DIS_QUEUES) \
- STATE(IDPF_VC_DIS_QUEUES_ERR) \
- STATE(IDPF_VC_MAP_IRQ) \
- STATE(IDPF_VC_MAP_IRQ_ERR) \
- STATE(IDPF_VC_UNMAP_IRQ) \
- STATE(IDPF_VC_UNMAP_IRQ_ERR) \
- STATE(IDPF_VC_ADD_QUEUES) \
- STATE(IDPF_VC_ADD_QUEUES_ERR) \
- STATE(IDPF_VC_DEL_QUEUES) \
- STATE(IDPF_VC_DEL_QUEUES_ERR) \
- STATE(IDPF_VC_ALLOC_VECTORS) \
- STATE(IDPF_VC_ALLOC_VECTORS_ERR) \
- STATE(IDPF_VC_DEALLOC_VECTORS) \
- STATE(IDPF_VC_DEALLOC_VECTORS_ERR) \
- STATE(IDPF_VC_SET_SRIOV_VFS) \
- STATE(IDPF_VC_SET_SRIOV_VFS_ERR) \
- STATE(IDPF_VC_GET_RSS_LUT) \
- STATE(IDPF_VC_GET_RSS_LUT_ERR) \
- STATE(IDPF_VC_SET_RSS_LUT) \
- STATE(IDPF_VC_SET_RSS_LUT_ERR) \
- STATE(IDPF_VC_GET_RSS_KEY) \
- STATE(IDPF_VC_GET_RSS_KEY_ERR) \
- STATE(IDPF_VC_SET_RSS_KEY) \
- STATE(IDPF_VC_SET_RSS_KEY_ERR) \
- STATE(IDPF_VC_GET_STATS) \
- STATE(IDPF_VC_GET_STATS_ERR) \
- STATE(IDPF_VC_ADD_MAC_ADDR) \
- STATE(IDPF_VC_ADD_MAC_ADDR_ERR) \
- STATE(IDPF_VC_DEL_MAC_ADDR) \
- STATE(IDPF_VC_DEL_MAC_ADDR_ERR) \
- STATE(IDPF_VC_GET_PTYPE_INFO) \
- STATE(IDPF_VC_GET_PTYPE_INFO_ERR) \
- STATE(IDPF_VC_LOOPBACK_STATE) \
- STATE(IDPF_VC_LOOPBACK_STATE_ERR) \
- STATE(IDPF_VC_NBITS)
-
-#define IDPF_GEN_ENUM(ENUM) ENUM,
-#define IDPF_GEN_STRING(STRING) #STRING,
-
-enum idpf_vport_vc_state {
- IDPF_FOREACH_VPORT_VC_STATE(IDPF_GEN_ENUM)
-};
-
-extern const char * const idpf_vport_vc_state_str[];
-
/**
* enum idpf_vport_reset_cause - Vport soft reset causes
* @IDPF_SR_Q_CHANGE: Soft reset queue change
@@ -358,11 +291,7 @@ struct idpf_port_stats {
* @port_stats: per port csum, header split, and other offload stats
* @link_up: True if link is up
* @link_speed_mbps: Link speed in mbps
- * @vc_msg: Virtchnl message buffer
- * @vc_state: Virtchnl message state
- * @vchnl_wq: Wait queue for virtchnl messages
* @sw_marker_wq: workqueue for marker packets
- * @vc_buf_lock: Lock to protect virtchnl buffer
*/
struct idpf_vport {
u16 num_txq;
@@ -408,12 +337,7 @@ struct idpf_vport {
bool link_up;
u32 link_speed_mbps;
- char vc_msg[IDPF_CTLQ_MAX_BUF_LEN];
- DECLARE_BITMAP(vc_state, IDPF_VC_NBITS);
-
- wait_queue_head_t vchnl_wq;
wait_queue_head_t sw_marker_wq;
- struct mutex vc_buf_lock;
};
/**
@@ -476,15 +400,11 @@ struct idpf_vport_user_config_data {
* enum idpf_vport_config_flags - Vport config flags
* @IDPF_VPORT_REG_NETDEV: Register netdev
* @IDPF_VPORT_UP_REQUESTED: Set if interface up is requested on core reset
- * @IDPF_VPORT_ADD_MAC_REQ: Asynchronous add ether address in flight
- * @IDPF_VPORT_DEL_MAC_REQ: Asynchronous delete ether address in flight
* @IDPF_VPORT_CONFIG_FLAGS_NBITS: Must be last
*/
enum idpf_vport_config_flags {
IDPF_VPORT_REG_NETDEV,
IDPF_VPORT_UP_REQUESTED,
- IDPF_VPORT_ADD_MAC_REQ,
- IDPF_VPORT_DEL_MAC_REQ,
IDPF_VPORT_CONFIG_FLAGS_NBITS,
};
@@ -555,11 +475,13 @@ struct idpf_vector_lifo {
struct idpf_vport_config {
struct idpf_vport_user_config_data user_config;
struct idpf_vport_max_q max_q;
- void *req_qs_chunks;
+ struct virtchnl2_add_queues *req_qs_chunks;
spinlock_t mac_filter_list_lock;
DECLARE_BITMAP(flags, IDPF_VPORT_CONFIG_FLAGS_NBITS);
};
+struct idpf_vc_xn_manager;
+
/**
* struct idpf_adapter - Device data struct generated on probe
* @pdev: PCI device struct given on probe
@@ -601,9 +523,7 @@ struct idpf_vport_config {
* @stats_task: Periodic statistics retrieval task
* @stats_wq: Workqueue for statistics task
* @caps: Negotiated capabilities with device
- * @vchnl_wq: Wait queue for virtchnl messages
- * @vc_state: Virtchnl message state
- * @vc_msg: Virtchnl message buffer
+ * @vcxn_mngr: Virtchnl transaction manager
* @dev_ops: See idpf_dev_ops
* @num_vfs: Number of allocated VFs through sysfs. PF does not directly talk
* to VFs but is used to initialize them
@@ -659,10 +579,8 @@ struct idpf_adapter {
struct delayed_work stats_task;
struct workqueue_struct *stats_wq;
struct virtchnl2_get_capabilities caps;
+ struct idpf_vc_xn_manager *vcxn_mngr;
- wait_queue_head_t vchnl_wq;
- DECLARE_BITMAP(vc_state, IDPF_VC_NBITS);
- char vc_msg[IDPF_CTLQ_MAX_BUF_LEN];
struct idpf_dev_ops dev_ops;
int num_vfs;
bool crc_enable;
@@ -903,68 +821,18 @@ void idpf_mbx_task(struct work_struct *work);
void idpf_vc_event_task(struct work_struct *work);
void idpf_dev_ops_init(struct idpf_adapter *adapter);
void idpf_vf_dev_ops_init(struct idpf_adapter *adapter);
-int idpf_vport_adjust_qs(struct idpf_vport *vport);
-int idpf_init_dflt_mbx(struct idpf_adapter *adapter);
-void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter);
-int idpf_vc_core_init(struct idpf_adapter *adapter);
-void idpf_vc_core_deinit(struct idpf_adapter *adapter);
int idpf_intr_req(struct idpf_adapter *adapter);
void idpf_intr_rel(struct idpf_adapter *adapter);
-int idpf_get_reg_intr_vecs(struct idpf_vport *vport,
- struct idpf_vec_regs *reg_vals);
u16 idpf_get_max_tx_hdr_size(struct idpf_adapter *adapter);
-int idpf_send_delete_queues_msg(struct idpf_vport *vport);
-int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q,
- u16 num_complq, u16 num_rx_q, u16 num_rx_bufq);
int idpf_initiate_soft_reset(struct idpf_vport *vport,
enum idpf_vport_reset_cause reset_cause);
-int idpf_send_enable_vport_msg(struct idpf_vport *vport);
-int idpf_send_disable_vport_msg(struct idpf_vport *vport);
-int idpf_send_destroy_vport_msg(struct idpf_vport *vport);
-int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport);
-int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport);
-int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get);
-int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get);
-int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter);
-int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors);
void idpf_deinit_task(struct idpf_adapter *adapter);
int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter,
u16 *q_vector_idxs,
struct idpf_vector_info *vec_info);
-int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport);
-int idpf_send_get_stats_msg(struct idpf_vport *vport);
-int idpf_get_vec_ids(struct idpf_adapter *adapter,
- u16 *vecids, int num_vecids,
- struct virtchnl2_vector_chunks *chunks);
-int idpf_recv_mb_msg(struct idpf_adapter *adapter, u32 op,
- void *msg, int msg_size);
-int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op,
- u16 msg_size, u8 *msg);
void idpf_set_ethtool_ops(struct net_device *netdev);
-int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter,
- struct idpf_vport_max_q *max_q);
-void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter,
- struct idpf_vport_max_q *max_q);
-int idpf_add_del_mac_filters(struct idpf_vport *vport,
- struct idpf_netdev_priv *np,
- bool add, bool async);
-int idpf_set_promiscuous(struct idpf_adapter *adapter,
- struct idpf_vport_user_config_data *config_data,
- u32 vport_id);
-int idpf_send_disable_queues_msg(struct idpf_vport *vport);
-void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q);
-u32 idpf_get_vport_id(struct idpf_vport *vport);
-int idpf_vport_queue_ids_init(struct idpf_vport *vport);
-int idpf_queue_reg_init(struct idpf_vport *vport);
-int idpf_send_config_queues_msg(struct idpf_vport *vport);
-int idpf_send_enable_queues_msg(struct idpf_vport *vport);
-int idpf_send_create_vport_msg(struct idpf_adapter *adapter,
- struct idpf_vport_max_q *max_q);
-int idpf_check_supported_desc_ids(struct idpf_vport *vport);
void idpf_vport_intr_write_itr(struct idpf_q_vector *q_vector,
u16 itr, bool tx);
-int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map);
-int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs);
int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs);
u8 idpf_vport_get_hsplit(const struct idpf_vport *vport);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
index c7f43d2fcd13..4849590a5591 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
@@ -516,6 +516,8 @@ post_buffs_out:
/* Wrap to end of end ring since current ntp is 0 */
cq->next_to_post = cq->ring_size - 1;
+ dma_wmb();
+
wr32(hw, cq->reg.tail, cq->next_to_post);
}
@@ -546,11 +548,6 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
int err = 0;
u16 i;
- if (*num_q_msg == 0)
- return 0;
- else if (*num_q_msg > cq->ring_size)
- return -EBADR;
-
/* take the lock before we start messing with the ring */
mutex_lock(&cq->cq_lock);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
index 8dee098bbfb0..e8e046ef2f0d 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
@@ -69,6 +69,11 @@ struct idpf_ctlq_msg {
u8 context[IDPF_INDIRECT_CTX_SIZE];
struct idpf_dma_mem *payload;
} indirect;
+ struct {
+ u32 rsvd;
+ u16 data;
+ u16 flags;
+ } sw_cookie;
} ctx;
};
diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c
index 34ad1ac46b78..3df9935685e9 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_dev.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c
@@ -3,6 +3,7 @@
#include "idpf.h"
#include "idpf_lan_pf_regs.h"
+#include "idpf_virtchnl.h"
#define IDPF_PF_ITR_IDX_SPACING 0x4
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 5fea2fd957eb..5d3532c27d57 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -2,14 +2,11 @@
/* Copyright (C) 2023 Intel Corporation */
#include "idpf.h"
+#include "idpf_virtchnl.h"
static const struct net_device_ops idpf_netdev_ops_splitq;
static const struct net_device_ops idpf_netdev_ops_singleq;
-const char * const idpf_vport_vc_state_str[] = {
- IDPF_FOREACH_VPORT_VC_STATE(IDPF_GEN_STRING)
-};
-
/**
* idpf_init_vector_stack - Fill the MSIX vector stack with vector index
* @adapter: private data struct
@@ -82,19 +79,12 @@ static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter)
*/
void idpf_intr_rel(struct idpf_adapter *adapter)
{
- int err;
-
if (!adapter->msix_entries)
return;
idpf_mb_intr_rel_irq(adapter);
pci_free_irq_vectors(adapter->pdev);
-
- err = idpf_send_dealloc_vectors_msg(adapter);
- if (err)
- dev_err(&adapter->pdev->dev,
- "Failed to deallocate vectors: %d\n", err);
-
+ idpf_send_dealloc_vectors_msg(adapter);
idpf_deinit_vector_stack(adapter);
kfree(adapter->msix_entries);
adapter->msix_entries = NULL;
@@ -783,6 +773,8 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
/* setup watchdog timeout value to be 5 second */
netdev->watchdog_timeo = 5 * HZ;
+ netdev->dev_port = idx;
+
/* configure default MTU size */
netdev->min_mtu = ETH_MIN_MTU;
netdev->max_mtu = vport->max_mtu;
@@ -973,7 +965,6 @@ static void idpf_vport_rel(struct idpf_vport *vport)
struct idpf_rss_data *rss_data;
struct idpf_vport_max_q max_q;
u16 idx = vport->idx;
- int i;
vport_config = adapter->vport_config[vport->idx];
idpf_deinit_rss(vport);
@@ -983,20 +974,6 @@ static void idpf_vport_rel(struct idpf_vport *vport)
idpf_send_destroy_vport_msg(vport);
- /* Set all bits as we dont know on which vc_state the vport vhnl_wq
- * is waiting on and wakeup the virtchnl workqueue even if it is
- * waiting for the response as we are going down
- */
- for (i = 0; i < IDPF_VC_NBITS; i++)
- set_bit(i, vport->vc_state);
- wake_up(&vport->vchnl_wq);
-
- mutex_destroy(&vport->vc_buf_lock);
-
- /* Clear all the bits */
- for (i = 0; i < IDPF_VC_NBITS; i++)
- clear_bit(i, vport->vc_state);
-
/* Release all max queues allocated to the adapter's pool */
max_q.max_rxq = vport_config->max_q.max_rxq;
max_q.max_txq = vport_config->max_q.max_txq;
@@ -1251,7 +1228,7 @@ void idpf_mbx_task(struct work_struct *work)
queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task,
msecs_to_jiffies(300));
- idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_UNKNOWN, NULL, 0);
+ idpf_recv_mb_msg(adapter);
}
/**
@@ -1541,9 +1518,7 @@ void idpf_init_task(struct work_struct *work)
vport_config = adapter->vport_config[index];
init_waitqueue_head(&vport->sw_marker_wq);
- init_waitqueue_head(&vport->vchnl_wq);
- mutex_init(&vport->vc_buf_lock);
spin_lock_init(&vport_config->mac_filter_list_lock);
INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list);
@@ -1821,6 +1796,8 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter)
goto unlock_mutex;
}
+ queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0);
+
/* Initialize the state machine, also allocate memory and request
* resources
*/
@@ -1900,7 +1877,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
* mess with. Nothing below should use those variables from new_vport
* and should instead always refer to them in vport if they need to.
*/
- memcpy(new_vport, vport, offsetof(struct idpf_vport, vc_state));
+ memcpy(new_vport, vport, offsetof(struct idpf_vport, link_speed_mbps));
/* Adjust resource parameters prior to reallocating resources */
switch (reset_cause) {
@@ -1949,7 +1926,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
/* Same comment as above regarding avoiding copying the wait_queues and
* mutexes applies here. We do not want to mess with those if possible.
*/
- memcpy(vport, new_vport, offsetof(struct idpf_vport, vc_state));
+ memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps));
/* Since idpf_vport_queues_alloc was called with new_port, the queue
* back pointers are currently pointing to the local new_vport. Reset
diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c
index e1febc74cefd..f784eea044bd 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_main.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_main.c
@@ -3,6 +3,7 @@
#include "idpf.h"
#include "idpf_devids.h"
+#include "idpf_virtchnl.h"
#define DRV_SUMMARY "Intel(R) Infrastructure Data Path Function Linux Driver"
@@ -30,6 +31,7 @@ static void idpf_remove(struct pci_dev *pdev)
idpf_sriov_configure(pdev, 0);
idpf_vc_core_deinit(adapter);
+
/* Be a good citizen and leave the device clean on exit */
adapter->dev_ops.reg_ops.trigger_reset(adapter, IDPF_HR_FUNC_RESET);
idpf_deinit_dflt_mbx(adapter);
@@ -66,6 +68,8 @@ destroy_wqs:
adapter->vport_config = NULL;
kfree(adapter->netdevs);
adapter->netdevs = NULL;
+ kfree(adapter->vcxn_mngr);
+ adapter->vcxn_mngr = NULL;
mutex_destroy(&adapter->vport_ctrl_lock);
mutex_destroy(&adapter->vector_lock);
@@ -229,8 +233,6 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
mutex_init(&adapter->queue_lock);
mutex_init(&adapter->vc_buf_lock);
- init_waitqueue_head(&adapter->vchnl_wq);
-
INIT_DELAYED_WORK(&adapter->init_task, idpf_init_task);
INIT_DELAYED_WORK(&adapter->serv_task, idpf_service_task);
INIT_DELAYED_WORK(&adapter->mbx_task, idpf_mbx_task);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 2f8ad79ae3f0..6dd7a66bb897 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2023 Intel Corporation */
#include "idpf.h"
+#include "idpf_virtchnl.h"
/**
* idpf_buf_lifo_push - push a buffer pointer onto stack
diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
index 8ade4e3a9fe1..629cb5cb7c9f 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
@@ -3,6 +3,7 @@
#include "idpf.h"
#include "idpf_lan_vf_regs.h"
+#include "idpf_virtchnl.h"
#define IDPF_VF_ITR_IDX_SPACING 0x40
@@ -137,7 +138,7 @@ static void idpf_vf_trigger_reset(struct idpf_adapter *adapter,
/* Do not send VIRTCHNL2_OP_RESET_VF message on driver unload */
if (trig_cause == IDPF_HR_FUNC_RESET &&
!test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
- idpf_send_mb_msg(adapter, VIRTCHNL2_OP_RESET_VF, 0, NULL);
+ idpf_send_mb_msg(adapter, VIRTCHNL2_OP_RESET_VF, 0, NULL, 0);
}
/**
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
index d0cdd63b3d5b..a5f9b7a5effe 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -2,46 +2,192 @@
/* Copyright (C) 2023 Intel Corporation */
#include "idpf.h"
+#include "idpf_virtchnl.h"
+
+#define IDPF_VC_XN_MIN_TIMEOUT_MSEC 2000
+#define IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC (60 * 1000)
+#define IDPF_VC_XN_IDX_M GENMASK(7, 0)
+#define IDPF_VC_XN_SALT_M GENMASK(15, 8)
+#define IDPF_VC_XN_RING_LEN U8_MAX
+
+/**
+ * enum idpf_vc_xn_state - Virtchnl transaction status
+ * @IDPF_VC_XN_IDLE: not expecting a reply, ready to be used
+ * @IDPF_VC_XN_WAITING: expecting a reply, not yet received
+ * @IDPF_VC_XN_COMPLETED_SUCCESS: a reply was expected and received,
+ * buffer updated
+ * @IDPF_VC_XN_COMPLETED_FAILED: a reply was expected and received, but there
+ * was an error, buffer not updated
+ * @IDPF_VC_XN_SHUTDOWN: transaction object cannot be used, VC torn down
+ * @IDPF_VC_XN_ASYNC: transaction sent asynchronously and doesn't have the
+ * return context; a callback may be provided to handle
+ * return
+ */
+enum idpf_vc_xn_state {
+ IDPF_VC_XN_IDLE = 1,
+ IDPF_VC_XN_WAITING,
+ IDPF_VC_XN_COMPLETED_SUCCESS,
+ IDPF_VC_XN_COMPLETED_FAILED,
+ IDPF_VC_XN_SHUTDOWN,
+ IDPF_VC_XN_ASYNC,
+};
+
+struct idpf_vc_xn;
+/* Callback for asynchronous messages */
+typedef int (*async_vc_cb) (struct idpf_adapter *, struct idpf_vc_xn *,
+ const struct idpf_ctlq_msg *);
+
+/**
+ * struct idpf_vc_xn - Data structure representing virtchnl transactions
+ * @completed: virtchnl event loop uses that to signal when a reply is
+ * available, uses kernel completion API
+ * @state: virtchnl event loop stores the data below, protected by the
+ * completion's lock.
+ * @reply_sz: Original size of reply, may be > reply_buf.iov_len; it will be
+ * truncated on its way to the receiver thread according to
+ * reply_buf.iov_len.
+ * @reply: Reference to the buffer(s) where the reply data should be written
+ * to. May be 0-length (then NULL address permitted) if the reply data
+ * should be ignored.
+ * @async_handler: if sent asynchronously, a callback can be provided to handle
+ * the reply when it's received
+ * @vc_op: corresponding opcode sent with this transaction
+ * @idx: index used as retrieval on reply receive, used for cookie
+ * @salt: changed every message to make unique, used for cookie
+ */
+struct idpf_vc_xn {
+ struct completion completed;
+ enum idpf_vc_xn_state state;
+ size_t reply_sz;
+ struct kvec reply;
+ async_vc_cb async_handler;
+ u32 vc_op;
+ u8 idx;
+ u8 salt;
+};
+
+/**
+ * struct idpf_vc_xn_params - Parameters for executing transaction
+ * @send_buf: kvec for send buffer
+ * @recv_buf: kvec for recv buffer, may be NULL, must then have zero length
+ * @timeout_ms: timeout to wait for reply
+ * @async: send message asynchronously, will not wait on completion
+ * @async_handler: If sent asynchronously, optional callback handler. The user
+ * must be careful when using async handlers as the memory for
+ * the recv_buf _cannot_ be on stack if this is async.
+ * @vc_op: virtchnl op to send
+ */
+struct idpf_vc_xn_params {
+ struct kvec send_buf;
+ struct kvec recv_buf;
+ int timeout_ms;
+ bool async;
+ async_vc_cb async_handler;
+ u32 vc_op;
+};
+
+/**
+ * struct idpf_vc_xn_manager - Manager for tracking transactions
+ * @ring: backing and lookup for transactions
+ * @free_xn_bm: bitmap for free transactions
+ * @xn_bm_lock: make bitmap access synchronous where necessary
+ * @salt: used to make cookie unique every message
+ */
+struct idpf_vc_xn_manager {
+ struct idpf_vc_xn ring[IDPF_VC_XN_RING_LEN];
+ DECLARE_BITMAP(free_xn_bm, IDPF_VC_XN_RING_LEN);
+ spinlock_t xn_bm_lock;
+ u8 salt;
+};
+
+/**
+ * idpf_vid_to_vport - Translate vport id to vport pointer
+ * @adapter: private data struct
+ * @v_id: vport id to translate
+ *
+ * Returns vport matching v_id, NULL if not found.
+ */
+static
+struct idpf_vport *idpf_vid_to_vport(struct idpf_adapter *adapter, u32 v_id)
+{
+ u16 num_max_vports = idpf_get_max_vports(adapter);
+ int i;
+
+ for (i = 0; i < num_max_vports; i++)
+ if (adapter->vport_ids[i] == v_id)
+ return adapter->vports[i];
+
+ return NULL;
+}
+
+/**
+ * idpf_handle_event_link - Handle link event message
+ * @adapter: private data struct
+ * @v2e: virtchnl event message
+ */
+static void idpf_handle_event_link(struct idpf_adapter *adapter,
+ const struct virtchnl2_event *v2e)
+{
+ struct idpf_netdev_priv *np;
+ struct idpf_vport *vport;
+
+ vport = idpf_vid_to_vport(adapter, le32_to_cpu(v2e->vport_id));
+ if (!vport) {
+ dev_err_ratelimited(&adapter->pdev->dev, "Failed to find vport_id %d for link event\n",
+ v2e->vport_id);
+ return;
+ }
+ np = netdev_priv(vport->netdev);
+
+ vport->link_speed_mbps = le32_to_cpu(v2e->link_speed);
+
+ if (vport->link_up == v2e->link_status)
+ return;
+
+ vport->link_up = v2e->link_status;
+
+ if (np->state != __IDPF_VPORT_UP)
+ return;
+
+ if (vport->link_up) {
+ netif_tx_start_all_queues(vport->netdev);
+ netif_carrier_on(vport->netdev);
+ } else {
+ netif_tx_stop_all_queues(vport->netdev);
+ netif_carrier_off(vport->netdev);
+ }
+}
/**
* idpf_recv_event_msg - Receive virtchnl event message
- * @vport: virtual port structure
+ * @adapter: Driver specific private structure
* @ctlq_msg: message to copy from
*
* Receive virtchnl event message
*/
-static void idpf_recv_event_msg(struct idpf_vport *vport,
+static void idpf_recv_event_msg(struct idpf_adapter *adapter,
struct idpf_ctlq_msg *ctlq_msg)
{
- struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+ int payload_size = ctlq_msg->ctx.indirect.payload->size;
struct virtchnl2_event *v2e;
- bool link_status;
u32 event;
+ if (payload_size < sizeof(*v2e)) {
+ dev_err_ratelimited(&adapter->pdev->dev, "Failed to receive valid payload for event msg (op %d len %d)\n",
+ ctlq_msg->cookie.mbx.chnl_opcode,
+ payload_size);
+ return;
+ }
+
v2e = (struct virtchnl2_event *)ctlq_msg->ctx.indirect.payload->va;
event = le32_to_cpu(v2e->event);
switch (event) {
case VIRTCHNL2_EVENT_LINK_CHANGE:
- vport->link_speed_mbps = le32_to_cpu(v2e->link_speed);
- link_status = v2e->link_status;
-
- if (vport->link_up == link_status)
- break;
-
- vport->link_up = link_status;
- if (np->state == __IDPF_VPORT_UP) {
- if (vport->link_up) {
- netif_carrier_on(vport->netdev);
- netif_tx_start_all_queues(vport->netdev);
- } else {
- netif_tx_stop_all_queues(vport->netdev);
- netif_carrier_off(vport->netdev);
- }
- }
- break;
+ idpf_handle_event_link(adapter, v2e);
+ return;
default:
- dev_err(&vport->adapter->pdev->dev,
+ dev_err(&adapter->pdev->dev,
"Unknown event %d from PF\n", event);
break;
}
@@ -93,13 +239,14 @@ err_kfree:
* @op: virtchnl opcode
* @msg_size: size of the payload
* @msg: pointer to buffer holding the payload
+ * @cookie: unique SW generated cookie per message
*
* Will prepare the control queue message and initiates the send api
*
* Returns 0 on success, negative on failure
*/
int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op,
- u16 msg_size, u8 *msg)
+ u16 msg_size, u8 *msg, u16 cookie)
{
struct idpf_ctlq_msg *ctlq_msg;
struct idpf_dma_mem *dma_mem;
@@ -139,8 +286,12 @@ int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op,
err = -ENOMEM;
goto dma_alloc_error;
}
- memcpy(dma_mem->va, msg, msg_size);
+
+ /* It's possible we're just sending an opcode but no buffer */
+ if (msg && msg_size)
+ memcpy(dma_mem->va, msg, msg_size);
ctlq_msg->ctx.indirect.payload = dma_mem;
+ ctlq_msg->ctx.sw_cookie.data = cookie;
err = idpf_ctlq_send(&adapter->hw, adapter->hw.asq, 1, ctlq_msg);
if (err)
@@ -159,592 +310,432 @@ dma_mem_error:
return err;
}
-/**
- * idpf_find_vport - Find vport pointer from control queue message
- * @adapter: driver specific private structure
- * @vport: address of vport pointer to copy the vport from adapters vport list
- * @ctlq_msg: control queue message
+/* API for virtchnl "transaction" support ("xn" for short).
*
- * Return 0 on success, error value on failure. Also this function does check
- * for the opcodes which expect to receive payload and return error value if
- * it is not the case.
+ * We are reusing the completion lock to serialize the accesses to the
+ * transaction state for simplicity, but it could be its own separate synchro
+ * as well. For now, this API is only used from within a workqueue context;
+ * raw_spin_lock() is enough.
*/
-static int idpf_find_vport(struct idpf_adapter *adapter,
- struct idpf_vport **vport,
- struct idpf_ctlq_msg *ctlq_msg)
-{
- bool no_op = false, vid_found = false;
- int i, err = 0;
- char *vc_msg;
- u32 v_id;
+/**
+ * idpf_vc_xn_lock - Request exclusive access to vc transaction
+ * @xn: struct idpf_vc_xn* to access
+ */
+#define idpf_vc_xn_lock(xn) \
+ raw_spin_lock(&(xn)->completed.wait.lock)
- vc_msg = kcalloc(IDPF_CTLQ_MAX_BUF_LEN, sizeof(char), GFP_KERNEL);
- if (!vc_msg)
- return -ENOMEM;
+/**
+ * idpf_vc_xn_unlock - Release exclusive access to vc transaction
+ * @xn: struct idpf_vc_xn* to access
+ */
+#define idpf_vc_xn_unlock(xn) \
+ raw_spin_unlock(&(xn)->completed.wait.lock)
- if (ctlq_msg->data_len) {
- size_t payload_size = ctlq_msg->ctx.indirect.payload->size;
+/**
+ * idpf_vc_xn_release_bufs - Release reference to reply buffer(s) and
+ * reset the transaction state.
+ * @xn: struct idpf_vc_xn to update
+ */
+static void idpf_vc_xn_release_bufs(struct idpf_vc_xn *xn)
+{
+ xn->reply.iov_base = NULL;
+ xn->reply.iov_len = 0;
- if (!payload_size) {
- dev_err(&adapter->pdev->dev, "Failed to receive payload buffer\n");
- kfree(vc_msg);
+ if (xn->state != IDPF_VC_XN_SHUTDOWN)
+ xn->state = IDPF_VC_XN_IDLE;
+}
- return -EINVAL;
- }
+/**
+ * idpf_vc_xn_init - Initialize virtchnl transaction object
+ * @vcxn_mngr: pointer to vc transaction manager struct
+ */
+static void idpf_vc_xn_init(struct idpf_vc_xn_manager *vcxn_mngr)
+{
+ int i;
- memcpy(vc_msg, ctlq_msg->ctx.indirect.payload->va,
- min_t(size_t, payload_size, IDPF_CTLQ_MAX_BUF_LEN));
- }
-
- switch (ctlq_msg->cookie.mbx.chnl_opcode) {
- case VIRTCHNL2_OP_VERSION:
- case VIRTCHNL2_OP_GET_CAPS:
- case VIRTCHNL2_OP_CREATE_VPORT:
- case VIRTCHNL2_OP_SET_SRIOV_VFS:
- case VIRTCHNL2_OP_ALLOC_VECTORS:
- case VIRTCHNL2_OP_DEALLOC_VECTORS:
- case VIRTCHNL2_OP_GET_PTYPE_INFO:
- goto free_vc_msg;
- case VIRTCHNL2_OP_ENABLE_VPORT:
- case VIRTCHNL2_OP_DISABLE_VPORT:
- case VIRTCHNL2_OP_DESTROY_VPORT:
- v_id = le32_to_cpu(((struct virtchnl2_vport *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_CONFIG_TX_QUEUES:
- v_id = le32_to_cpu(((struct virtchnl2_config_tx_queues *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_CONFIG_RX_QUEUES:
- v_id = le32_to_cpu(((struct virtchnl2_config_rx_queues *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_ENABLE_QUEUES:
- case VIRTCHNL2_OP_DISABLE_QUEUES:
- case VIRTCHNL2_OP_DEL_QUEUES:
- v_id = le32_to_cpu(((struct virtchnl2_del_ena_dis_queues *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_ADD_QUEUES:
- v_id = le32_to_cpu(((struct virtchnl2_add_queues *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_MAP_QUEUE_VECTOR:
- case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR:
- v_id = le32_to_cpu(((struct virtchnl2_queue_vector_maps *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_GET_STATS:
- v_id = le32_to_cpu(((struct virtchnl2_vport_stats *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_GET_RSS_LUT:
- case VIRTCHNL2_OP_SET_RSS_LUT:
- v_id = le32_to_cpu(((struct virtchnl2_rss_lut *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_GET_RSS_KEY:
- case VIRTCHNL2_OP_SET_RSS_KEY:
- v_id = le32_to_cpu(((struct virtchnl2_rss_key *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_EVENT:
- v_id = le32_to_cpu(((struct virtchnl2_event *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_LOOPBACK:
- v_id = le32_to_cpu(((struct virtchnl2_loopback *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE:
- v_id = le32_to_cpu(((struct virtchnl2_promisc_info *)vc_msg)->vport_id);
- break;
- case VIRTCHNL2_OP_ADD_MAC_ADDR:
- case VIRTCHNL2_OP_DEL_MAC_ADDR:
- v_id = le32_to_cpu(((struct virtchnl2_mac_addr_list *)vc_msg)->vport_id);
- break;
- default:
- no_op = true;
- break;
- }
+ spin_lock_init(&vcxn_mngr->xn_bm_lock);
- if (no_op)
- goto free_vc_msg;
+ for (i = 0; i < ARRAY_SIZE(vcxn_mngr->ring); i++) {
+ struct idpf_vc_xn *xn = &vcxn_mngr->ring[i];
- for (i = 0; i < idpf_get_max_vports(adapter); i++) {
- if (adapter->vport_ids[i] == v_id) {
- vid_found = true;
- break;
- }
+ xn->state = IDPF_VC_XN_IDLE;
+ xn->idx = i;
+ idpf_vc_xn_release_bufs(xn);
+ init_completion(&xn->completed);
}
- if (vid_found)
- *vport = adapter->vports[i];
- else
- err = -EINVAL;
-
-free_vc_msg:
- kfree(vc_msg);
-
- return err;
+ bitmap_fill(vcxn_mngr->free_xn_bm, IDPF_VC_XN_RING_LEN);
}
/**
- * idpf_copy_data_to_vc_buf - Copy the virtchnl response data into the buffer.
- * @adapter: driver specific private structure
- * @vport: virtual port structure
- * @ctlq_msg: msg to copy from
- * @err_enum: err bit to set on error
+ * idpf_vc_xn_shutdown - Uninitialize virtchnl transaction object
+ * @vcxn_mngr: pointer to vc transaction manager struct
*
- * Copies the payload from ctlq_msg into virtchnl buffer. Returns 0 on success,
- * negative on failure.
+ * All waiting threads will be woken-up and their transaction aborted. Further
+ * operations on that object will fail.
*/
-static int idpf_copy_data_to_vc_buf(struct idpf_adapter *adapter,
- struct idpf_vport *vport,
- struct idpf_ctlq_msg *ctlq_msg,
- enum idpf_vport_vc_state err_enum)
+static void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr)
{
- if (ctlq_msg->cookie.mbx.chnl_retval) {
- if (vport)
- set_bit(err_enum, vport->vc_state);
- else
- set_bit(err_enum, adapter->vc_state);
+ int i;
- return -EINVAL;
- }
+ spin_lock_bh(&vcxn_mngr->xn_bm_lock);
+ bitmap_zero(vcxn_mngr->free_xn_bm, IDPF_VC_XN_RING_LEN);
+ spin_unlock_bh(&vcxn_mngr->xn_bm_lock);
- if (vport)
- memcpy(vport->vc_msg, ctlq_msg->ctx.indirect.payload->va,
- min_t(int, ctlq_msg->ctx.indirect.payload->size,
- IDPF_CTLQ_MAX_BUF_LEN));
- else
- memcpy(adapter->vc_msg, ctlq_msg->ctx.indirect.payload->va,
- min_t(int, ctlq_msg->ctx.indirect.payload->size,
- IDPF_CTLQ_MAX_BUF_LEN));
+ for (i = 0; i < ARRAY_SIZE(vcxn_mngr->ring); i++) {
+ struct idpf_vc_xn *xn = &vcxn_mngr->ring[i];
- return 0;
+ idpf_vc_xn_lock(xn);
+ xn->state = IDPF_VC_XN_SHUTDOWN;
+ idpf_vc_xn_release_bufs(xn);
+ idpf_vc_xn_unlock(xn);
+ complete_all(&xn->completed);
+ }
}
/**
- * idpf_recv_vchnl_op - helper function with common logic when handling the
- * reception of VIRTCHNL OPs.
- * @adapter: driver specific private structure
- * @vport: virtual port structure
- * @ctlq_msg: msg to copy from
- * @state: state bit used on timeout check
- * @err_state: err bit to set on error
+ * idpf_vc_xn_pop_free - Pop a free transaction from free list
+ * @vcxn_mngr: transaction manager to pop from
+ *
+ * Returns NULL if no free transactions
*/
-static void idpf_recv_vchnl_op(struct idpf_adapter *adapter,
- struct idpf_vport *vport,
- struct idpf_ctlq_msg *ctlq_msg,
- enum idpf_vport_vc_state state,
- enum idpf_vport_vc_state err_state)
+static
+struct idpf_vc_xn *idpf_vc_xn_pop_free(struct idpf_vc_xn_manager *vcxn_mngr)
{
- wait_queue_head_t *vchnl_wq;
- int err;
+ struct idpf_vc_xn *xn = NULL;
+ unsigned long free_idx;
- if (vport)
- vchnl_wq = &vport->vchnl_wq;
- else
- vchnl_wq = &adapter->vchnl_wq;
+ spin_lock_bh(&vcxn_mngr->xn_bm_lock);
+ free_idx = find_first_bit(vcxn_mngr->free_xn_bm, IDPF_VC_XN_RING_LEN);
+ if (free_idx == IDPF_VC_XN_RING_LEN)
+ goto do_unlock;
- err = idpf_copy_data_to_vc_buf(adapter, vport, ctlq_msg, err_state);
- if (wq_has_sleeper(vchnl_wq)) {
- if (vport)
- set_bit(state, vport->vc_state);
- else
- set_bit(state, adapter->vc_state);
+ clear_bit(free_idx, vcxn_mngr->free_xn_bm);
+ xn = &vcxn_mngr->ring[free_idx];
+ xn->salt = vcxn_mngr->salt++;
- wake_up(vchnl_wq);
- } else {
- if (!err) {
- dev_warn(&adapter->pdev->dev, "opcode %d received without waiting thread\n",
- ctlq_msg->cookie.mbx.chnl_opcode);
- } else {
- /* Clear the errors since there is no sleeper to pass
- * them on
- */
- if (vport)
- clear_bit(err_state, vport->vc_state);
- else
- clear_bit(err_state, adapter->vc_state);
- }
- }
+do_unlock:
+ spin_unlock_bh(&vcxn_mngr->xn_bm_lock);
+
+ return xn;
}
/**
- * idpf_recv_mb_msg - Receive message over mailbox
- * @adapter: Driver specific private structure
- * @op: virtchannel operation code
- * @msg: Received message holding buffer
- * @msg_size: message size
- *
- * Will receive control queue message and posts the receive buffer. Returns 0
- * on success and negative on failure.
+ * idpf_vc_xn_push_free - Push a free transaction to free list
+ * @vcxn_mngr: transaction manager to push to
+ * @xn: transaction to push
*/
-int idpf_recv_mb_msg(struct idpf_adapter *adapter, u32 op,
- void *msg, int msg_size)
+static void idpf_vc_xn_push_free(struct idpf_vc_xn_manager *vcxn_mngr,
+ struct idpf_vc_xn *xn)
{
- struct idpf_vport *vport = NULL;
- struct idpf_ctlq_msg ctlq_msg;
- struct idpf_dma_mem *dma_mem;
- bool work_done = false;
- int num_retry = 2000;
- u16 num_q_msg;
- int err;
-
- while (1) {
- struct idpf_vport_config *vport_config;
- int payload_size = 0;
-
- /* Try to get one message */
- num_q_msg = 1;
- dma_mem = NULL;
- err = idpf_ctlq_recv(adapter->hw.arq, &num_q_msg, &ctlq_msg);
- /* If no message then decide if we have to retry based on
- * opcode
- */
- if (err || !num_q_msg) {
- /* Increasing num_retry to consider the delayed
- * responses because of large number of VF's mailbox
- * messages. If the mailbox message is received from
- * the other side, we come out of the sleep cycle
- * immediately else we wait for more time.
- */
- if (!op || !num_retry--)
- break;
- if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) {
- err = -EIO;
- break;
- }
- msleep(20);
- continue;
- }
+ idpf_vc_xn_release_bufs(xn);
+ set_bit(xn->idx, vcxn_mngr->free_xn_bm);
+}
- /* If we are here a message is received. Check if we are looking
- * for a specific message based on opcode. If it is different
- * ignore and post buffers
+/**
+ * idpf_vc_xn_exec - Perform a send/recv virtchnl transaction
+ * @adapter: driver specific private structure with vcxn_mngr
+ * @params: parameters for this particular transaction including
+ * -vc_op: virtchannel operation to send
+ * -send_buf: kvec iov for send buf and len
+ * -recv_buf: kvec iov for recv buf and len (ignored if NULL)
+ * -timeout_ms: timeout waiting for a reply (milliseconds)
+ * -async: don't wait for message reply, will lose caller context
+ * -async_handler: callback to handle async replies
+ *
+ * @returns >= 0 for success, the size of the initial reply (may or may not be
+ * >= @recv_buf.iov_len, but we never overflow @@recv_buf_iov_base). < 0 for
+ * error.
+ */
+static ssize_t idpf_vc_xn_exec(struct idpf_adapter *adapter,
+ const struct idpf_vc_xn_params *params)
+{
+ const struct kvec *send_buf = &params->send_buf;
+ struct idpf_vc_xn *xn;
+ ssize_t retval;
+ u16 cookie;
+
+ xn = idpf_vc_xn_pop_free(adapter->vcxn_mngr);
+ /* no free transactions available */
+ if (!xn)
+ return -ENOSPC;
+
+ idpf_vc_xn_lock(xn);
+ if (xn->state == IDPF_VC_XN_SHUTDOWN) {
+ retval = -ENXIO;
+ goto only_unlock;
+ } else if (xn->state != IDPF_VC_XN_IDLE) {
+ /* We're just going to clobber this transaction even though
+ * it's not IDLE. If we don't reuse it we could theoretically
+ * eventually leak all the free transactions and not be able to
+ * send any messages. At least this way we make an attempt to
+ * remain functional even though something really bad is
+ * happening that's corrupting what was supposed to be free
+ * transactions.
*/
- if (op && ctlq_msg.cookie.mbx.chnl_opcode != op)
- goto post_buffs;
+ WARN_ONCE(1, "There should only be idle transactions in free list (idx %d op %d)\n",
+ xn->idx, xn->vc_op);
+ }
- err = idpf_find_vport(adapter, &vport, &ctlq_msg);
- if (err)
- goto post_buffs;
+ xn->reply = params->recv_buf;
+ xn->reply_sz = 0;
+ xn->state = params->async ? IDPF_VC_XN_ASYNC : IDPF_VC_XN_WAITING;
+ xn->vc_op = params->vc_op;
+ xn->async_handler = params->async_handler;
+ idpf_vc_xn_unlock(xn);
- if (ctlq_msg.data_len)
- payload_size = ctlq_msg.ctx.indirect.payload->size;
+ if (!params->async)
+ reinit_completion(&xn->completed);
+ cookie = FIELD_PREP(IDPF_VC_XN_SALT_M, xn->salt) |
+ FIELD_PREP(IDPF_VC_XN_IDX_M, xn->idx);
- /* All conditions are met. Either a message requested is
- * received or we received a message to be processed
- */
- switch (ctlq_msg.cookie.mbx.chnl_opcode) {
- case VIRTCHNL2_OP_VERSION:
- case VIRTCHNL2_OP_GET_CAPS:
- if (ctlq_msg.cookie.mbx.chnl_retval) {
- dev_err(&adapter->pdev->dev, "Failure initializing, vc op: %u retval: %u\n",
- ctlq_msg.cookie.mbx.chnl_opcode,
- ctlq_msg.cookie.mbx.chnl_retval);
- err = -EBADMSG;
- } else if (msg) {
- memcpy(msg, ctlq_msg.ctx.indirect.payload->va,
- min_t(int, payload_size, msg_size));
- }
- work_done = true;
- break;
- case VIRTCHNL2_OP_CREATE_VPORT:
- idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
- IDPF_VC_CREATE_VPORT,
- IDPF_VC_CREATE_VPORT_ERR);
- break;
- case VIRTCHNL2_OP_ENABLE_VPORT:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_ENA_VPORT,
- IDPF_VC_ENA_VPORT_ERR);
- break;
- case VIRTCHNL2_OP_DISABLE_VPORT:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_DIS_VPORT,
- IDPF_VC_DIS_VPORT_ERR);
- break;
- case VIRTCHNL2_OP_DESTROY_VPORT:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_DESTROY_VPORT,
- IDPF_VC_DESTROY_VPORT_ERR);
- break;
- case VIRTCHNL2_OP_CONFIG_TX_QUEUES:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_CONFIG_TXQ,
- IDPF_VC_CONFIG_TXQ_ERR);
- break;
- case VIRTCHNL2_OP_CONFIG_RX_QUEUES:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_CONFIG_RXQ,
- IDPF_VC_CONFIG_RXQ_ERR);
- break;
- case VIRTCHNL2_OP_ENABLE_QUEUES:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_ENA_QUEUES,
- IDPF_VC_ENA_QUEUES_ERR);
- break;
- case VIRTCHNL2_OP_DISABLE_QUEUES:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_DIS_QUEUES,
- IDPF_VC_DIS_QUEUES_ERR);
- break;
- case VIRTCHNL2_OP_ADD_QUEUES:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_ADD_QUEUES,
- IDPF_VC_ADD_QUEUES_ERR);
- break;
- case VIRTCHNL2_OP_DEL_QUEUES:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_DEL_QUEUES,
- IDPF_VC_DEL_QUEUES_ERR);
- break;
- case VIRTCHNL2_OP_MAP_QUEUE_VECTOR:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_MAP_IRQ,
- IDPF_VC_MAP_IRQ_ERR);
- break;
- case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_UNMAP_IRQ,
- IDPF_VC_UNMAP_IRQ_ERR);
- break;
- case VIRTCHNL2_OP_GET_STATS:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_GET_STATS,
- IDPF_VC_GET_STATS_ERR);
- break;
- case VIRTCHNL2_OP_GET_RSS_LUT:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_GET_RSS_LUT,
- IDPF_VC_GET_RSS_LUT_ERR);
- break;
- case VIRTCHNL2_OP_SET_RSS_LUT:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_SET_RSS_LUT,
- IDPF_VC_SET_RSS_LUT_ERR);
- break;
- case VIRTCHNL2_OP_GET_RSS_KEY:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_GET_RSS_KEY,
- IDPF_VC_GET_RSS_KEY_ERR);
- break;
- case VIRTCHNL2_OP_SET_RSS_KEY:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_SET_RSS_KEY,
- IDPF_VC_SET_RSS_KEY_ERR);
- break;
- case VIRTCHNL2_OP_SET_SRIOV_VFS:
- idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
- IDPF_VC_SET_SRIOV_VFS,
- IDPF_VC_SET_SRIOV_VFS_ERR);
- break;
- case VIRTCHNL2_OP_ALLOC_VECTORS:
- idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
- IDPF_VC_ALLOC_VECTORS,
- IDPF_VC_ALLOC_VECTORS_ERR);
- break;
- case VIRTCHNL2_OP_DEALLOC_VECTORS:
- idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
- IDPF_VC_DEALLOC_VECTORS,
- IDPF_VC_DEALLOC_VECTORS_ERR);
- break;
- case VIRTCHNL2_OP_GET_PTYPE_INFO:
- idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
- IDPF_VC_GET_PTYPE_INFO,
- IDPF_VC_GET_PTYPE_INFO_ERR);
- break;
- case VIRTCHNL2_OP_LOOPBACK:
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_LOOPBACK_STATE,
- IDPF_VC_LOOPBACK_STATE_ERR);
- break;
- case VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE:
- /* This message can only be sent asynchronously. As
- * such we'll have lost the context in which it was
- * called and thus can only really report if it looks
- * like an error occurred. Don't bother setting ERR bit
- * or waking chnl_wq since no work queue will be waiting
- * to read the message.
- */
- if (ctlq_msg.cookie.mbx.chnl_retval) {
- dev_err(&adapter->pdev->dev, "Failed to set promiscuous mode: %d\n",
- ctlq_msg.cookie.mbx.chnl_retval);
- }
- break;
- case VIRTCHNL2_OP_ADD_MAC_ADDR:
- vport_config = adapter->vport_config[vport->idx];
- if (test_and_clear_bit(IDPF_VPORT_ADD_MAC_REQ,
- vport_config->flags)) {
- /* Message was sent asynchronously. We don't
- * normally print errors here, instead
- * prefer to handle errors in the function
- * calling wait_for_event. However, if
- * asynchronous, the context in which the
- * message was sent is lost. We can't really do
- * anything about at it this point, but we
- * should at a minimum indicate that it looks
- * like something went wrong. Also don't bother
- * setting ERR bit or waking vchnl_wq since no
- * one will be waiting to read the async
- * message.
- */
- if (ctlq_msg.cookie.mbx.chnl_retval)
- dev_err(&adapter->pdev->dev, "Failed to add MAC address: %d\n",
- ctlq_msg.cookie.mbx.chnl_retval);
- break;
- }
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_ADD_MAC_ADDR,
- IDPF_VC_ADD_MAC_ADDR_ERR);
- break;
- case VIRTCHNL2_OP_DEL_MAC_ADDR:
- vport_config = adapter->vport_config[vport->idx];
- if (test_and_clear_bit(IDPF_VPORT_DEL_MAC_REQ,
- vport_config->flags)) {
- /* Message was sent asynchronously like the
- * VIRTCHNL2_OP_ADD_MAC_ADDR
- */
- if (ctlq_msg.cookie.mbx.chnl_retval)
- dev_err(&adapter->pdev->dev, "Failed to delete MAC address: %d\n",
- ctlq_msg.cookie.mbx.chnl_retval);
- break;
- }
- idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
- IDPF_VC_DEL_MAC_ADDR,
- IDPF_VC_DEL_MAC_ADDR_ERR);
- break;
- case VIRTCHNL2_OP_EVENT:
- idpf_recv_event_msg(vport, &ctlq_msg);
- break;
- default:
- dev_warn(&adapter->pdev->dev,
- "Unhandled virtchnl response %d\n",
- ctlq_msg.cookie.mbx.chnl_opcode);
- break;
- }
+ retval = idpf_send_mb_msg(adapter, params->vc_op,
+ send_buf->iov_len, send_buf->iov_base,
+ cookie);
+ if (retval) {
+ idpf_vc_xn_lock(xn);
+ goto release_and_unlock;
+ }
-post_buffs:
- if (ctlq_msg.data_len)
- dma_mem = ctlq_msg.ctx.indirect.payload;
- else
- num_q_msg = 0;
+ if (params->async)
+ return 0;
- err = idpf_ctlq_post_rx_buffs(&adapter->hw, adapter->hw.arq,
- &num_q_msg, &dma_mem);
- /* If post failed clear the only buffer we supplied */
- if (err && dma_mem)
- dma_free_coherent(&adapter->pdev->dev, dma_mem->size,
- dma_mem->va, dma_mem->pa);
+ wait_for_completion_timeout(&xn->completed,
+ msecs_to_jiffies(params->timeout_ms));
- /* Applies only if we are looking for a specific opcode */
- if (work_done)
- break;
+ /* No need to check the return value; we check the final state of the
+ * transaction below. It's possible the transaction actually gets more
+ * timeout than specified if we get preempted here but after
+ * wait_for_completion_timeout returns. This should be non-issue
+ * however.
+ */
+ idpf_vc_xn_lock(xn);
+ switch (xn->state) {
+ case IDPF_VC_XN_SHUTDOWN:
+ retval = -ENXIO;
+ goto only_unlock;
+ case IDPF_VC_XN_WAITING:
+ dev_notice_ratelimited(&adapter->pdev->dev, "Transaction timed-out (op %d, %dms)\n",
+ params->vc_op, params->timeout_ms);
+ retval = -ETIME;
+ break;
+ case IDPF_VC_XN_COMPLETED_SUCCESS:
+ retval = xn->reply_sz;
+ break;
+ case IDPF_VC_XN_COMPLETED_FAILED:
+ dev_notice_ratelimited(&adapter->pdev->dev, "Transaction failed (op %d)\n",
+ params->vc_op);
+ retval = -EIO;
+ break;
+ default:
+ /* Invalid state. */
+ WARN_ON_ONCE(1);
+ retval = -EIO;
+ break;
}
- return err;
+release_and_unlock:
+ idpf_vc_xn_push_free(adapter->vcxn_mngr, xn);
+ /* If we receive a VC reply after here, it will be dropped. */
+only_unlock:
+ idpf_vc_xn_unlock(xn);
+
+ return retval;
}
/**
- * __idpf_wait_for_event - wrapper function for wait on virtchannel response
- * @adapter: Driver private data structure
- * @vport: virtual port structure
- * @state: check on state upon timeout
- * @err_check: check if this specific error bit is set
- * @timeout: Max time to wait
+ * idpf_vc_xn_forward_async - Handle async reply receives
+ * @adapter: private data struct
+ * @xn: transaction to handle
+ * @ctlq_msg: corresponding ctlq_msg
*
- * Checks if state is set upon expiry of timeout. Returns 0 on success,
- * negative on failure.
+ * For async sends we're going to lose the caller's context so, if an
+ * async_handler was provided, it can deal with the reply, otherwise we'll just
+ * check and report if there is an error.
*/
-static int __idpf_wait_for_event(struct idpf_adapter *adapter,
- struct idpf_vport *vport,
- enum idpf_vport_vc_state state,
- enum idpf_vport_vc_state err_check,
- int timeout)
+static int
+idpf_vc_xn_forward_async(struct idpf_adapter *adapter, struct idpf_vc_xn *xn,
+ const struct idpf_ctlq_msg *ctlq_msg)
{
- int time_to_wait, num_waits;
- wait_queue_head_t *vchnl_wq;
- unsigned long *vc_state;
+ int err = 0;
- time_to_wait = ((timeout <= IDPF_MAX_WAIT) ? timeout : IDPF_MAX_WAIT);
- num_waits = ((timeout <= IDPF_MAX_WAIT) ? 1 : timeout / IDPF_MAX_WAIT);
+ if (ctlq_msg->cookie.mbx.chnl_opcode != xn->vc_op) {
+ dev_err_ratelimited(&adapter->pdev->dev, "Async message opcode does not match transaction opcode (msg: %d) (xn: %d)\n",
+ ctlq_msg->cookie.mbx.chnl_opcode, xn->vc_op);
+ xn->reply_sz = 0;
+ err = -EINVAL;
+ goto release_bufs;
+ }
- if (vport) {
- vchnl_wq = &vport->vchnl_wq;
- vc_state = vport->vc_state;
- } else {
- vchnl_wq = &adapter->vchnl_wq;
- vc_state = adapter->vc_state;
+ if (xn->async_handler) {
+ err = xn->async_handler(adapter, xn, ctlq_msg);
+ goto release_bufs;
+ }
+
+ if (ctlq_msg->cookie.mbx.chnl_retval) {
+ xn->reply_sz = 0;
+ dev_err_ratelimited(&adapter->pdev->dev, "Async message failure (op %d)\n",
+ ctlq_msg->cookie.mbx.chnl_opcode);
+ err = -EINVAL;
}
- while (num_waits) {
- int event;
+release_bufs:
+ idpf_vc_xn_push_free(adapter->vcxn_mngr, xn);
+
+ return err;
+}
+
+/**
+ * idpf_vc_xn_forward_reply - copy a reply back to receiving thread
+ * @adapter: driver specific private structure with vcxn_mngr
+ * @ctlq_msg: controlq message to send back to receiving thread
+ */
+static int
+idpf_vc_xn_forward_reply(struct idpf_adapter *adapter,
+ const struct idpf_ctlq_msg *ctlq_msg)
+{
+ const void *payload = NULL;
+ size_t payload_size = 0;
+ struct idpf_vc_xn *xn;
+ u16 msg_info;
+ int err = 0;
+ u16 xn_idx;
+ u16 salt;
+
+ msg_info = ctlq_msg->ctx.sw_cookie.data;
+ xn_idx = FIELD_GET(IDPF_VC_XN_IDX_M, msg_info);
+ if (xn_idx >= ARRAY_SIZE(adapter->vcxn_mngr->ring)) {
+ dev_err_ratelimited(&adapter->pdev->dev, "Out of bounds cookie received: %02x\n",
+ xn_idx);
+ return -EINVAL;
+ }
+ xn = &adapter->vcxn_mngr->ring[xn_idx];
+ salt = FIELD_GET(IDPF_VC_XN_SALT_M, msg_info);
+ if (xn->salt != salt) {
+ dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (%02x != %02x)\n",
+ xn->salt, salt);
+ return -EINVAL;
+ }
- /* If we are here and a reset is detected do not wait but
- * return. Reset timing is out of drivers control. So
- * while we are cleaning resources as part of reset if the
- * underlying HW mailbox is gone, wait on mailbox messages
- * is not meaningful
+ idpf_vc_xn_lock(xn);
+ switch (xn->state) {
+ case IDPF_VC_XN_WAITING:
+ /* success */
+ break;
+ case IDPF_VC_XN_IDLE:
+ dev_err_ratelimited(&adapter->pdev->dev, "Unexpected or belated VC reply (op %d)\n",
+ ctlq_msg->cookie.mbx.chnl_opcode);
+ err = -EINVAL;
+ goto out_unlock;
+ case IDPF_VC_XN_SHUTDOWN:
+ /* ENXIO is a bit special here as the recv msg loop uses that
+ * know if it should stop trying to clean the ring if we lost
+ * the virtchnl. We need to stop playing with registers and
+ * yield.
*/
- if (idpf_is_reset_detected(adapter))
- return 0;
+ err = -ENXIO;
+ goto out_unlock;
+ case IDPF_VC_XN_ASYNC:
+ err = idpf_vc_xn_forward_async(adapter, xn, ctlq_msg);
+ idpf_vc_xn_unlock(xn);
+ return err;
+ default:
+ dev_err_ratelimited(&adapter->pdev->dev, "Overwriting VC reply (op %d)\n",
+ ctlq_msg->cookie.mbx.chnl_opcode);
+ err = -EBUSY;
+ goto out_unlock;
+ }
- event = wait_event_timeout(*vchnl_wq,
- test_and_clear_bit(state, vc_state),
- msecs_to_jiffies(time_to_wait));
- if (event) {
- if (test_and_clear_bit(err_check, vc_state)) {
- dev_err(&adapter->pdev->dev, "VC response error %s\n",
- idpf_vport_vc_state_str[err_check]);
+ if (ctlq_msg->cookie.mbx.chnl_opcode != xn->vc_op) {
+ dev_err_ratelimited(&adapter->pdev->dev, "Message opcode does not match transaction opcode (msg: %d) (xn: %d)\n",
+ ctlq_msg->cookie.mbx.chnl_opcode, xn->vc_op);
+ xn->reply_sz = 0;
+ xn->state = IDPF_VC_XN_COMPLETED_FAILED;
+ err = -EINVAL;
+ goto out_unlock;
+ }
- return -EINVAL;
- }
+ if (ctlq_msg->cookie.mbx.chnl_retval) {
+ xn->reply_sz = 0;
+ xn->state = IDPF_VC_XN_COMPLETED_FAILED;
+ err = -EINVAL;
+ goto out_unlock;
+ }
- return 0;
- }
- num_waits--;
+ if (ctlq_msg->data_len) {
+ payload = ctlq_msg->ctx.indirect.payload->va;
+ payload_size = ctlq_msg->ctx.indirect.payload->size;
}
- /* Timeout occurred */
- dev_err(&adapter->pdev->dev, "VC timeout, state = %s\n",
- idpf_vport_vc_state_str[state]);
+ xn->reply_sz = payload_size;
+ xn->state = IDPF_VC_XN_COMPLETED_SUCCESS;
- return -ETIMEDOUT;
+ if (xn->reply.iov_base && xn->reply.iov_len && payload_size)
+ memcpy(xn->reply.iov_base, payload,
+ min_t(size_t, xn->reply.iov_len, payload_size));
+
+out_unlock:
+ idpf_vc_xn_unlock(xn);
+ /* we _cannot_ hold lock while calling complete */
+ complete(&xn->completed);
+
+ return err;
}
/**
- * idpf_min_wait_for_event - wait for virtchannel response
- * @adapter: Driver private data structure
- * @vport: virtual port structure
- * @state: check on state upon timeout
- * @err_check: check if this specific error bit is set
+ * idpf_recv_mb_msg - Receive message over mailbox
+ * @adapter: Driver specific private structure
*
- * Returns 0 on success, negative on failure.
+ * Will receive control queue message and posts the receive buffer. Returns 0
+ * on success and negative on failure.
*/
-static int idpf_min_wait_for_event(struct idpf_adapter *adapter,
- struct idpf_vport *vport,
- enum idpf_vport_vc_state state,
- enum idpf_vport_vc_state err_check)
+int idpf_recv_mb_msg(struct idpf_adapter *adapter)
{
- return __idpf_wait_for_event(adapter, vport, state, err_check,
- IDPF_WAIT_FOR_EVENT_TIMEO_MIN);
-}
+ struct idpf_ctlq_msg ctlq_msg;
+ struct idpf_dma_mem *dma_mem;
+ int post_err, err;
+ u16 num_recv;
-/**
- * idpf_wait_for_event - wait for virtchannel response
- * @adapter: Driver private data structure
- * @vport: virtual port structure
- * @state: check on state upon timeout after 500ms
- * @err_check: check if this specific error bit is set
- *
- * Returns 0 on success, negative on failure.
- */
-static int idpf_wait_for_event(struct idpf_adapter *adapter,
- struct idpf_vport *vport,
- enum idpf_vport_vc_state state,
- enum idpf_vport_vc_state err_check)
-{
- /* Increasing the timeout in __IDPF_INIT_SW flow to consider large
- * number of VF's mailbox message responses. When a message is received
- * on mailbox, this thread is woken up by the idpf_recv_mb_msg before
- * the timeout expires. Only in the error case i.e. if no message is
- * received on mailbox, we wait for the complete timeout which is
- * less likely to happen.
- */
- return __idpf_wait_for_event(adapter, vport, state, err_check,
- IDPF_WAIT_FOR_EVENT_TIMEO);
+ while (1) {
+ /* This will get <= num_recv messages and output how many
+ * actually received on num_recv.
+ */
+ num_recv = 1;
+ err = idpf_ctlq_recv(adapter->hw.arq, &num_recv, &ctlq_msg);
+ if (err || !num_recv)
+ break;
+
+ if (ctlq_msg.data_len) {
+ dma_mem = ctlq_msg.ctx.indirect.payload;
+ } else {
+ dma_mem = NULL;
+ num_recv = 0;
+ }
+
+ if (ctlq_msg.cookie.mbx.chnl_opcode == VIRTCHNL2_OP_EVENT)
+ idpf_recv_event_msg(adapter, &ctlq_msg);
+ else
+ err = idpf_vc_xn_forward_reply(adapter, &ctlq_msg);
+
+ post_err = idpf_ctlq_post_rx_buffs(&adapter->hw,
+ adapter->hw.arq,
+ &num_recv, &dma_mem);
+
+ /* If post failed clear the only buffer we supplied */
+ if (post_err) {
+ if (dma_mem)
+ dmam_free_coherent(&adapter->pdev->dev,
+ dma_mem->size, dma_mem->va,
+ dma_mem->pa);
+ break;
+ }
+
+ /* virtchnl trying to shutdown, stop cleaning */
+ if (err == -ENXIO)
+ break;
+ }
+
+ return err;
}
/**
@@ -785,7 +776,11 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport)
*/
static int idpf_send_ver_msg(struct idpf_adapter *adapter)
{
+ struct idpf_vc_xn_params xn_params = {};
struct virtchnl2_version_info vvi;
+ ssize_t reply_sz;
+ u32 major, minor;
+ int err = 0;
if (adapter->virt_ver_maj) {
vvi.major = cpu_to_le32(adapter->virt_ver_maj);
@@ -795,43 +790,29 @@ static int idpf_send_ver_msg(struct idpf_adapter *adapter)
vvi.minor = cpu_to_le32(IDPF_VIRTCHNL_VERSION_MINOR);
}
- return idpf_send_mb_msg(adapter, VIRTCHNL2_OP_VERSION, sizeof(vvi),
- (u8 *)&vvi);
-}
-
-/**
- * idpf_recv_ver_msg - Receive virtchnl version message
- * @adapter: Driver specific private structure
- *
- * Receive virtchnl version message. Returns 0 on success, -EAGAIN if we need
- * to send version message again, otherwise negative on failure.
- */
-static int idpf_recv_ver_msg(struct idpf_adapter *adapter)
-{
- struct virtchnl2_version_info vvi;
- u32 major, minor;
- int err;
+ xn_params.vc_op = VIRTCHNL2_OP_VERSION;
+ xn_params.send_buf.iov_base = &vvi;
+ xn_params.send_buf.iov_len = sizeof(vvi);
+ xn_params.recv_buf = xn_params.send_buf;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
- err = idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_VERSION, &vvi,
- sizeof(vvi));
- if (err)
- return err;
+ reply_sz = idpf_vc_xn_exec(adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
+ if (reply_sz < sizeof(vvi))
+ return -EIO;
major = le32_to_cpu(vvi.major);
minor = le32_to_cpu(vvi.minor);
if (major > IDPF_VIRTCHNL_VERSION_MAJOR) {
- dev_warn(&adapter->pdev->dev,
- "Virtchnl major version (%d) greater than supported\n",
- major);
-
+ dev_warn(&adapter->pdev->dev, "Virtchnl major version greater than supported\n");
return -EINVAL;
}
if (major == IDPF_VIRTCHNL_VERSION_MAJOR &&
minor > IDPF_VIRTCHNL_VERSION_MINOR)
- dev_warn(&adapter->pdev->dev,
- "Virtchnl minor version (%d) didn't match\n", minor);
+ dev_warn(&adapter->pdev->dev, "Virtchnl minor version didn't match\n");
/* If we have a mismatch, resend version to update receiver on what
* version we will use.
@@ -856,7 +837,9 @@ static int idpf_recv_ver_msg(struct idpf_adapter *adapter)
*/
static int idpf_send_get_caps_msg(struct idpf_adapter *adapter)
{
- struct virtchnl2_get_capabilities caps = { };
+ struct virtchnl2_get_capabilities caps = {};
+ struct idpf_vc_xn_params xn_params = {};
+ ssize_t reply_sz;
caps.csum_caps =
cpu_to_le32(VIRTCHNL2_CAP_TX_CSUM_L3_IPV4 |
@@ -913,21 +896,20 @@ static int idpf_send_get_caps_msg(struct idpf_adapter *adapter)
VIRTCHNL2_CAP_PROMISC |
VIRTCHNL2_CAP_LOOPBACK);
- return idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_CAPS, sizeof(caps),
- (u8 *)&caps);
-}
+ xn_params.vc_op = VIRTCHNL2_OP_GET_CAPS;
+ xn_params.send_buf.iov_base = &caps;
+ xn_params.send_buf.iov_len = sizeof(caps);
+ xn_params.recv_buf.iov_base = &adapter->caps;
+ xn_params.recv_buf.iov_len = sizeof(adapter->caps);
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
-/**
- * idpf_recv_get_caps_msg - Receive virtchnl get capabilities message
- * @adapter: Driver specific private structure
- *
- * Receive virtchnl get capabilities message. Returns 0 on success, negative on
- * failure.
- */
-static int idpf_recv_get_caps_msg(struct idpf_adapter *adapter)
-{
- return idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_GET_CAPS, &adapter->caps,
- sizeof(struct virtchnl2_get_capabilities));
+ reply_sz = idpf_vc_xn_exec(adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
+ if (reply_sz < sizeof(adapter->caps))
+ return -EIO;
+
+ return 0;
}
/**
@@ -1254,8 +1236,10 @@ int idpf_send_create_vport_msg(struct idpf_adapter *adapter,
struct idpf_vport_max_q *max_q)
{
struct virtchnl2_create_vport *vport_msg;
+ struct idpf_vc_xn_params xn_params = {};
u16 idx = adapter->next_vport;
int err, buf_size;
+ ssize_t reply_sz;
buf_size = sizeof(struct virtchnl2_create_vport);
if (!adapter->vport_params_reqd[idx]) {
@@ -1286,35 +1270,38 @@ int idpf_send_create_vport_msg(struct idpf_adapter *adapter,
return err;
}
- mutex_lock(&adapter->vc_buf_lock);
-
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_CREATE_VPORT, buf_size,
- (u8 *)vport_msg);
- if (err)
- goto rel_lock;
-
- err = idpf_wait_for_event(adapter, NULL, IDPF_VC_CREATE_VPORT,
- IDPF_VC_CREATE_VPORT_ERR);
- if (err) {
- dev_err(&adapter->pdev->dev, "Failed to receive create vport message");
-
- goto rel_lock;
- }
-
if (!adapter->vport_params_recvd[idx]) {
adapter->vport_params_recvd[idx] = kzalloc(IDPF_CTLQ_MAX_BUF_LEN,
GFP_KERNEL);
if (!adapter->vport_params_recvd[idx]) {
err = -ENOMEM;
- goto rel_lock;
+ goto free_vport_params;
}
}
- vport_msg = adapter->vport_params_recvd[idx];
- memcpy(vport_msg, adapter->vc_msg, IDPF_CTLQ_MAX_BUF_LEN);
+ xn_params.vc_op = VIRTCHNL2_OP_CREATE_VPORT;
+ xn_params.send_buf.iov_base = vport_msg;
+ xn_params.send_buf.iov_len = buf_size;
+ xn_params.recv_buf.iov_base = adapter->vport_params_recvd[idx];
+ xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ reply_sz = idpf_vc_xn_exec(adapter, &xn_params);
+ if (reply_sz < 0) {
+ err = reply_sz;
+ goto free_vport_params;
+ }
+ if (reply_sz < IDPF_CTLQ_MAX_BUF_LEN) {
+ err = -EIO;
+ goto free_vport_params;
+ }
-rel_lock:
- mutex_unlock(&adapter->vc_buf_lock);
+ return 0;
+
+free_vport_params:
+ kfree(adapter->vport_params_recvd[idx]);
+ adapter->vport_params_recvd[idx] = NULL;
+ kfree(adapter->vport_params_reqd[idx]);
+ adapter->vport_params_reqd[idx] = NULL;
return err;
}
@@ -1366,26 +1353,19 @@ int idpf_check_supported_desc_ids(struct idpf_vport *vport)
*/
int idpf_send_destroy_vport_msg(struct idpf_vport *vport)
{
- struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_vc_xn_params xn_params = {};
struct virtchnl2_vport v_id;
- int err;
+ ssize_t reply_sz;
v_id.vport_id = cpu_to_le32(vport->vport_id);
- mutex_lock(&vport->vc_buf_lock);
+ xn_params.vc_op = VIRTCHNL2_OP_DESTROY_VPORT;
+ xn_params.send_buf.iov_base = &v_id;
+ xn_params.send_buf.iov_len = sizeof(v_id);
+ xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DESTROY_VPORT,
- sizeof(v_id), (u8 *)&v_id);
- if (err)
- goto rel_lock;
-
- err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DESTROY_VPORT,
- IDPF_VC_DESTROY_VPORT_ERR);
-
-rel_lock:
- mutex_unlock(&vport->vc_buf_lock);
-
- return err;
+ return reply_sz < 0 ? reply_sz : 0;
}
/**
@@ -1397,26 +1377,19 @@ rel_lock:
*/
int idpf_send_enable_vport_msg(struct idpf_vport *vport)
{
- struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_vc_xn_params xn_params = {};
struct virtchnl2_vport v_id;
- int err;
+ ssize_t reply_sz;
v_id.vport_id = cpu_to_le32(vport->vport_id);
- mutex_lock(&vport->vc_buf_lock);
-
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ENABLE_VPORT,
- sizeof(v_id), (u8 *)&v_id);
- if (err)
- goto rel_lock;
-
- err = idpf_wait_for_event(adapter, vport, IDPF_VC_ENA_VPORT,
- IDPF_VC_ENA_VPORT_ERR);
+ xn_params.vc_op = VIRTCHNL2_OP_ENABLE_VPORT;
+ xn_params.send_buf.iov_base = &v_id;
+ xn_params.send_buf.iov_len = sizeof(v_id);
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
-rel_lock:
- mutex_unlock(&vport->vc_buf_lock);
-
- return err;
+ return reply_sz < 0 ? reply_sz : 0;
}
/**
@@ -1428,26 +1401,19 @@ rel_lock:
*/
int idpf_send_disable_vport_msg(struct idpf_vport *vport)
{
- struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_vc_xn_params xn_params = {};
struct virtchnl2_vport v_id;
- int err;
+ ssize_t reply_sz;
v_id.vport_id = cpu_to_le32(vport->vport_id);
- mutex_lock(&vport->vc_buf_lock);
+ xn_params.vc_op = VIRTCHNL2_OP_DISABLE_VPORT;
+ xn_params.send_buf.iov_base = &v_id;
+ xn_params.send_buf.iov_len = sizeof(v_id);
+ xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DISABLE_VPORT,
- sizeof(v_id), (u8 *)&v_id);
- if (err)
- goto rel_lock;
-
- err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DIS_VPORT,
- IDPF_VC_DIS_VPORT_ERR);
-
-rel_lock:
- mutex_unlock(&vport->vc_buf_lock);
-
- return err;
+ return reply_sz < 0 ? reply_sz : 0;
}
/**
@@ -1459,11 +1425,13 @@ rel_lock:
*/
static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)
{
- struct virtchnl2_config_tx_queues *ctq;
+ struct virtchnl2_config_tx_queues *ctq __free(kfree) = NULL;
+ struct virtchnl2_txq_info *qi __free(kfree) = NULL;
+ struct idpf_vc_xn_params xn_params = {};
u32 config_sz, chunk_sz, buf_sz;
int totqs, num_msgs, num_chunks;
- struct virtchnl2_txq_info *qi;
- int err = 0, i, k = 0;
+ ssize_t reply_sz;
+ int i, k = 0;
totqs = vport->num_txq + vport->num_complq;
qi = kcalloc(totqs, sizeof(struct virtchnl2_txq_info), GFP_KERNEL);
@@ -1524,10 +1492,8 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)
}
/* Make sure accounting agrees */
- if (k != totqs) {
- err = -EINVAL;
- goto error;
- }
+ if (k != totqs)
+ return -EINVAL;
/* Chunk up the queue contexts into multiple messages to avoid
* sending a control queue message buffer that is too large
@@ -1541,12 +1507,11 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)
buf_sz = struct_size(ctq, qinfo, num_chunks);
ctq = kzalloc(buf_sz, GFP_KERNEL);
- if (!ctq) {
- err = -ENOMEM;
- goto error;
- }
+ if (!ctq)
+ return -ENOMEM;
- mutex_lock(&vport->vc_buf_lock);
+ xn_params.vc_op = VIRTCHNL2_OP_CONFIG_TX_QUEUES;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
for (i = 0, k = 0; i < num_msgs; i++) {
memset(ctq, 0, buf_sz);
@@ -1554,17 +1519,11 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)
ctq->num_qinfo = cpu_to_le16(num_chunks);
memcpy(ctq->qinfo, &qi[k], chunk_sz * num_chunks);
- err = idpf_send_mb_msg(vport->adapter,
- VIRTCHNL2_OP_CONFIG_TX_QUEUES,
- buf_sz, (u8 *)ctq);
- if (err)
- goto mbx_error;
-
- err = idpf_wait_for_event(vport->adapter, vport,
- IDPF_VC_CONFIG_TXQ,
- IDPF_VC_CONFIG_TXQ_ERR);
- if (err)
- goto mbx_error;
+ xn_params.send_buf.iov_base = ctq;
+ xn_params.send_buf.iov_len = buf_sz;
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
k += num_chunks;
totqs -= num_chunks;
@@ -1573,13 +1532,7 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)
buf_sz = struct_size(ctq, qinfo, num_chunks);
}
-mbx_error:
- mutex_unlock(&vport->vc_buf_lock);
- kfree(ctq);
-error:
- kfree(qi);
-
- return err;
+ return 0;
}
/**
@@ -1591,11 +1544,13 @@ error:
*/
static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
{
- struct virtchnl2_config_rx_queues *crq;
+ struct virtchnl2_config_rx_queues *crq __free(kfree) = NULL;
+ struct virtchnl2_rxq_info *qi __free(kfree) = NULL;
+ struct idpf_vc_xn_params xn_params = {};
u32 config_sz, chunk_sz, buf_sz;
int totqs, num_msgs, num_chunks;
- struct virtchnl2_rxq_info *qi;
- int err = 0, i, k = 0;
+ ssize_t reply_sz;
+ int i, k = 0;
totqs = vport->num_rxq + vport->num_bufq;
qi = kcalloc(totqs, sizeof(struct virtchnl2_rxq_info), GFP_KERNEL);
@@ -1676,10 +1631,8 @@ common_qi_fields:
}
/* Make sure accounting agrees */
- if (k != totqs) {
- err = -EINVAL;
- goto error;
- }
+ if (k != totqs)
+ return -EINVAL;
/* Chunk up the queue contexts into multiple messages to avoid
* sending a control queue message buffer that is too large
@@ -1693,12 +1646,11 @@ common_qi_fields:
buf_sz = struct_size(crq, qinfo, num_chunks);
crq = kzalloc(buf_sz, GFP_KERNEL);
- if (!crq) {
- err = -ENOMEM;
- goto error;
- }
+ if (!crq)
+ return -ENOMEM;
- mutex_lock(&vport->vc_buf_lock);
+ xn_params.vc_op = VIRTCHNL2_OP_CONFIG_RX_QUEUES;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
for (i = 0, k = 0; i < num_msgs; i++) {
memset(crq, 0, buf_sz);
@@ -1706,17 +1658,11 @@ common_qi_fields:
crq->num_qinfo = cpu_to_le16(num_chunks);
memcpy(crq->qinfo, &qi[k], chunk_sz * num_chunks);
- err = idpf_send_mb_msg(vport->adapter,
- VIRTCHNL2_OP_CONFIG_RX_QUEUES,
- buf_sz, (u8 *)crq);
- if (err)
- goto mbx_error;
-
- err = idpf_wait_for_event(vport->adapter, vport,
- IDPF_VC_CONFIG_RXQ,
- IDPF_VC_CONFIG_RXQ_ERR);
- if (err)
- goto mbx_error;
+ xn_params.send_buf.iov_base = crq;
+ xn_params.send_buf.iov_len = buf_sz;
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
k += num_chunks;
totqs -= num_chunks;
@@ -1725,42 +1671,28 @@ common_qi_fields:
buf_sz = struct_size(crq, qinfo, num_chunks);
}
-mbx_error:
- mutex_unlock(&vport->vc_buf_lock);
- kfree(crq);
-error:
- kfree(qi);
-
- return err;
+ return 0;
}
/**
* idpf_send_ena_dis_queues_msg - Send virtchnl enable or disable
* queues message
* @vport: virtual port data structure
- * @vc_op: virtchnl op code to send
+ * @ena: if true enable, false disable
*
* Send enable or disable queues virtchnl message. Returns 0 on success,
* negative on failure.
*/
-static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, u32 vc_op)
+static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena)
{
+ struct virtchnl2_del_ena_dis_queues *eq __free(kfree) = NULL;
+ struct virtchnl2_queue_chunk *qc __free(kfree) = NULL;
u32 num_msgs, num_chunks, num_txq, num_rxq, num_q;
- struct idpf_adapter *adapter = vport->adapter;
- struct virtchnl2_del_ena_dis_queues *eq;
+ struct idpf_vc_xn_params xn_params = {};
struct virtchnl2_queue_chunks *qcs;
- struct virtchnl2_queue_chunk *qc;
u32 config_sz, chunk_sz, buf_sz;
- int i, j, k = 0, err = 0;
-
- /* validate virtchnl op */
- switch (vc_op) {
- case VIRTCHNL2_OP_ENABLE_QUEUES:
- case VIRTCHNL2_OP_DISABLE_QUEUES:
- break;
- default:
- return -EINVAL;
- }
+ ssize_t reply_sz;
+ int i, j, k = 0;
num_txq = vport->num_txq + vport->num_complq;
num_rxq = vport->num_rxq + vport->num_bufq;
@@ -1779,10 +1711,8 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, u32 vc_op)
qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
}
}
- if (vport->num_txq != k) {
- err = -EINVAL;
- goto error;
- }
+ if (vport->num_txq != k)
+ return -EINVAL;
if (!idpf_is_queue_model_split(vport->txq_model))
goto setup_rx;
@@ -1794,10 +1724,8 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, u32 vc_op)
qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id);
qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
}
- if (vport->num_complq != (k - vport->num_txq)) {
- err = -EINVAL;
- goto error;
- }
+ if (vport->num_complq != (k - vport->num_txq))
+ return -EINVAL;
setup_rx:
for (i = 0; i < vport->num_rxq_grp; i++) {
@@ -1823,10 +1751,8 @@ setup_rx:
qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
}
}
- if (vport->num_rxq != k - (vport->num_txq + vport->num_complq)) {
- err = -EINVAL;
- goto error;
- }
+ if (vport->num_rxq != k - (vport->num_txq + vport->num_complq))
+ return -EINVAL;
if (!idpf_is_queue_model_split(vport->rxq_model))
goto send_msg;
@@ -1845,10 +1771,8 @@ setup_rx:
}
if (vport->num_bufq != k - (vport->num_txq +
vport->num_complq +
- vport->num_rxq)) {
- err = -EINVAL;
- goto error;
- }
+ vport->num_rxq))
+ return -EINVAL;
send_msg:
/* Chunk up the queue info into multiple messages */
@@ -1861,12 +1785,16 @@ send_msg:
buf_sz = struct_size(eq, chunks.chunks, num_chunks);
eq = kzalloc(buf_sz, GFP_KERNEL);
- if (!eq) {
- err = -ENOMEM;
- goto error;
- }
+ if (!eq)
+ return -ENOMEM;
- mutex_lock(&vport->vc_buf_lock);
+ if (ena) {
+ xn_params.vc_op = VIRTCHNL2_OP_ENABLE_QUEUES;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ } else {
+ xn_params.vc_op = VIRTCHNL2_OP_DISABLE_QUEUES;
+ xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
+ }
for (i = 0, k = 0; i < num_msgs; i++) {
memset(eq, 0, buf_sz);
@@ -1875,20 +1803,11 @@ send_msg:
qcs = &eq->chunks;
memcpy(qcs->chunks, &qc[k], chunk_sz * num_chunks);
- err = idpf_send_mb_msg(adapter, vc_op, buf_sz, (u8 *)eq);
- if (err)
- goto mbx_error;
-
- if (vc_op == VIRTCHNL2_OP_ENABLE_QUEUES)
- err = idpf_wait_for_event(adapter, vport,
- IDPF_VC_ENA_QUEUES,
- IDPF_VC_ENA_QUEUES_ERR);
- else
- err = idpf_min_wait_for_event(adapter, vport,
- IDPF_VC_DIS_QUEUES,
- IDPF_VC_DIS_QUEUES_ERR);
- if (err)
- goto mbx_error;
+ xn_params.send_buf.iov_base = eq;
+ xn_params.send_buf.iov_len = buf_sz;
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
k += num_chunks;
num_q -= num_chunks;
@@ -1897,13 +1816,7 @@ send_msg:
buf_sz = struct_size(eq, chunks.chunks, num_chunks);
}
-mbx_error:
- mutex_unlock(&vport->vc_buf_lock);
- kfree(eq);
-error:
- kfree(qc);
-
- return err;
+ return 0;
}
/**
@@ -1917,12 +1830,13 @@ error:
*/
int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
{
- struct idpf_adapter *adapter = vport->adapter;
- struct virtchnl2_queue_vector_maps *vqvm;
- struct virtchnl2_queue_vector *vqv;
+ struct virtchnl2_queue_vector_maps *vqvm __free(kfree) = NULL;
+ struct virtchnl2_queue_vector *vqv __free(kfree) = NULL;
+ struct idpf_vc_xn_params xn_params = {};
u32 config_sz, chunk_sz, buf_sz;
u32 num_msgs, num_chunks, num_q;
- int i, j, k = 0, err = 0;
+ ssize_t reply_sz;
+ int i, j, k = 0;
num_q = vport->num_txq + vport->num_rxq;
@@ -1952,10 +1866,8 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
}
}
- if (vport->num_txq != k) {
- err = -EINVAL;
- goto error;
- }
+ if (vport->num_txq != k)
+ return -EINVAL;
for (i = 0; i < vport->num_rxq_grp; i++) {
struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
@@ -1982,15 +1894,11 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
}
if (idpf_is_queue_model_split(vport->txq_model)) {
- if (vport->num_rxq != k - vport->num_complq) {
- err = -EINVAL;
- goto error;
- }
+ if (vport->num_rxq != k - vport->num_complq)
+ return -EINVAL;
} else {
- if (vport->num_rxq != k - vport->num_txq) {
- err = -EINVAL;
- goto error;
- }
+ if (vport->num_rxq != k - vport->num_txq)
+ return -EINVAL;
}
/* Chunk up the vector info into multiple messages */
@@ -2003,39 +1911,28 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
buf_sz = struct_size(vqvm, qv_maps, num_chunks);
vqvm = kzalloc(buf_sz, GFP_KERNEL);
- if (!vqvm) {
- err = -ENOMEM;
- goto error;
- }
+ if (!vqvm)
+ return -ENOMEM;
- mutex_lock(&vport->vc_buf_lock);
+ if (map) {
+ xn_params.vc_op = VIRTCHNL2_OP_MAP_QUEUE_VECTOR;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ } else {
+ xn_params.vc_op = VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR;
+ xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
+ }
for (i = 0, k = 0; i < num_msgs; i++) {
memset(vqvm, 0, buf_sz);
+ xn_params.send_buf.iov_base = vqvm;
+ xn_params.send_buf.iov_len = buf_sz;
vqvm->vport_id = cpu_to_le32(vport->vport_id);
vqvm->num_qv_maps = cpu_to_le16(num_chunks);
memcpy(vqvm->qv_maps, &vqv[k], chunk_sz * num_chunks);
- if (map) {
- err = idpf_send_mb_msg(adapter,
- VIRTCHNL2_OP_MAP_QUEUE_VECTOR,
- buf_sz, (u8 *)vqvm);
- if (!err)
- err = idpf_wait_for_event(adapter, vport,
- IDPF_VC_MAP_IRQ,
- IDPF_VC_MAP_IRQ_ERR);
- } else {
- err = idpf_send_mb_msg(adapter,
- VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR,
- buf_sz, (u8 *)vqvm);
- if (!err)
- err =
- idpf_min_wait_for_event(adapter, vport,
- IDPF_VC_UNMAP_IRQ,
- IDPF_VC_UNMAP_IRQ_ERR);
- }
- if (err)
- goto mbx_error;
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
k += num_chunks;
num_q -= num_chunks;
@@ -2044,13 +1941,7 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
buf_sz = struct_size(vqvm, qv_maps, num_chunks);
}
-mbx_error:
- mutex_unlock(&vport->vc_buf_lock);
- kfree(vqvm);
-error:
- kfree(vqv);
-
- return err;
+ return 0;
}
/**
@@ -2062,7 +1953,7 @@ error:
*/
int idpf_send_enable_queues_msg(struct idpf_vport *vport)
{
- return idpf_send_ena_dis_queues_msg(vport, VIRTCHNL2_OP_ENABLE_QUEUES);
+ return idpf_send_ena_dis_queues_msg(vport, true);
}
/**
@@ -2076,7 +1967,7 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport)
{
int err, i;
- err = idpf_send_ena_dis_queues_msg(vport, VIRTCHNL2_OP_DISABLE_QUEUES);
+ err = idpf_send_ena_dis_queues_msg(vport, false);
if (err)
return err;
@@ -2087,8 +1978,10 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport)
set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags);
/* schedule the napi to receive all the marker packets */
+ local_bh_disable();
for (i = 0; i < vport->num_q_vectors; i++)
napi_schedule(&vport->q_vectors[i].napi);
+ local_bh_enable();
return idpf_wait_for_marker_event(vport);
}
@@ -2122,22 +2015,21 @@ static void idpf_convert_reg_to_queue_chunks(struct virtchnl2_queue_chunk *dchun
*/
int idpf_send_delete_queues_msg(struct idpf_vport *vport)
{
- struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_del_ena_dis_queues *eq __free(kfree) = NULL;
struct virtchnl2_create_vport *vport_params;
struct virtchnl2_queue_reg_chunks *chunks;
- struct virtchnl2_del_ena_dis_queues *eq;
+ struct idpf_vc_xn_params xn_params = {};
struct idpf_vport_config *vport_config;
u16 vport_idx = vport->idx;
- int buf_size, err;
+ ssize_t reply_sz;
u16 num_chunks;
+ int buf_size;
- vport_config = adapter->vport_config[vport_idx];
+ vport_config = vport->adapter->vport_config[vport_idx];
if (vport_config->req_qs_chunks) {
- struct virtchnl2_add_queues *vc_aq =
- (struct virtchnl2_add_queues *)vport_config->req_qs_chunks;
- chunks = &vc_aq->chunks;
+ chunks = &vport_config->req_qs_chunks->chunks;
} else {
- vport_params = adapter->vport_params_recvd[vport_idx];
+ vport_params = vport->adapter->vport_params_recvd[vport_idx];
chunks = &vport_params->chunks;
}
@@ -2154,21 +2046,13 @@ int idpf_send_delete_queues_msg(struct idpf_vport *vport)
idpf_convert_reg_to_queue_chunks(eq->chunks.chunks, chunks->chunks,
num_chunks);
- mutex_lock(&vport->vc_buf_lock);
-
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DEL_QUEUES,
- buf_size, (u8 *)eq);
- if (err)
- goto rel_lock;
-
- err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DEL_QUEUES,
- IDPF_VC_DEL_QUEUES_ERR);
-
-rel_lock:
- mutex_unlock(&vport->vc_buf_lock);
- kfree(eq);
+ xn_params.vc_op = VIRTCHNL2_OP_DEL_QUEUES;
+ xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
+ xn_params.send_buf.iov_base = eq;
+ xn_params.send_buf.iov_len = buf_size;
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
- return err;
+ return reply_sz < 0 ? reply_sz : 0;
}
/**
@@ -2203,14 +2087,21 @@ int idpf_send_config_queues_msg(struct idpf_vport *vport)
int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q,
u16 num_complq, u16 num_rx_q, u16 num_rx_bufq)
{
- struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_add_queues *vc_msg __free(kfree) = NULL;
+ struct idpf_vc_xn_params xn_params = {};
struct idpf_vport_config *vport_config;
- struct virtchnl2_add_queues aq = { };
- struct virtchnl2_add_queues *vc_msg;
+ struct virtchnl2_add_queues aq = {};
u16 vport_idx = vport->idx;
- int size, err;
+ ssize_t reply_sz;
+ int size;
+
+ vc_msg = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!vc_msg)
+ return -ENOMEM;
- vport_config = adapter->vport_config[vport_idx];
+ vport_config = vport->adapter->vport_config[vport_idx];
+ kfree(vport_config->req_qs_chunks);
+ vport_config->req_qs_chunks = NULL;
aq.vport_id = cpu_to_le32(vport->vport_id);
aq.num_tx_q = cpu_to_le16(num_tx_q);
@@ -2218,47 +2109,33 @@ int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q,
aq.num_rx_q = cpu_to_le16(num_rx_q);
aq.num_rx_bufq = cpu_to_le16(num_rx_bufq);
- mutex_lock(&((struct idpf_vport *)vport)->vc_buf_lock);
-
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ADD_QUEUES,
- sizeof(struct virtchnl2_add_queues), (u8 *)&aq);
- if (err)
- goto rel_lock;
-
- /* We want vport to be const to prevent incidental code changes making
- * changes to the vport config. We're making a special exception here
- * to discard const to use the virtchnl.
- */
- err = idpf_wait_for_event(adapter, (struct idpf_vport *)vport,
- IDPF_VC_ADD_QUEUES, IDPF_VC_ADD_QUEUES_ERR);
- if (err)
- goto rel_lock;
-
- kfree(vport_config->req_qs_chunks);
- vport_config->req_qs_chunks = NULL;
+ xn_params.vc_op = VIRTCHNL2_OP_ADD_QUEUES;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ xn_params.send_buf.iov_base = &aq;
+ xn_params.send_buf.iov_len = sizeof(aq);
+ xn_params.recv_buf.iov_base = vc_msg;
+ xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN;
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
- vc_msg = (struct virtchnl2_add_queues *)vport->vc_msg;
/* compare vc_msg num queues with vport num queues */
if (le16_to_cpu(vc_msg->num_tx_q) != num_tx_q ||
le16_to_cpu(vc_msg->num_rx_q) != num_rx_q ||
le16_to_cpu(vc_msg->num_tx_complq) != num_complq ||
- le16_to_cpu(vc_msg->num_rx_bufq) != num_rx_bufq) {
- err = -EINVAL;
- goto rel_lock;
- }
+ le16_to_cpu(vc_msg->num_rx_bufq) != num_rx_bufq)
+ return -EINVAL;
size = struct_size(vc_msg, chunks.chunks,
le16_to_cpu(vc_msg->chunks.num_chunks));
- vport_config->req_qs_chunks = kmemdup(vc_msg, size, GFP_KERNEL);
- if (!vport_config->req_qs_chunks) {
- err = -ENOMEM;
- goto rel_lock;
- }
+ if (reply_sz < size)
+ return -EIO;
-rel_lock:
- mutex_unlock(&((struct idpf_vport *)vport)->vc_buf_lock);
+ vport_config->req_qs_chunks = kmemdup(vc_msg, size, GFP_KERNEL);
+ if (!vport_config->req_qs_chunks)
+ return -ENOMEM;
- return err;
+ return 0;
}
/**
@@ -2270,53 +2147,49 @@ rel_lock:
*/
int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors)
{
- struct virtchnl2_alloc_vectors *alloc_vec, *rcvd_vec;
- struct virtchnl2_alloc_vectors ac = { };
+ struct virtchnl2_alloc_vectors *rcvd_vec __free(kfree) = NULL;
+ struct idpf_vc_xn_params xn_params = {};
+ struct virtchnl2_alloc_vectors ac = {};
+ ssize_t reply_sz;
u16 num_vchunks;
- int size, err;
+ int size;
ac.num_vectors = cpu_to_le16(num_vectors);
- mutex_lock(&adapter->vc_buf_lock);
-
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ALLOC_VECTORS,
- sizeof(ac), (u8 *)&ac);
- if (err)
- goto rel_lock;
+ rcvd_vec = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!rcvd_vec)
+ return -ENOMEM;
- err = idpf_wait_for_event(adapter, NULL, IDPF_VC_ALLOC_VECTORS,
- IDPF_VC_ALLOC_VECTORS_ERR);
- if (err)
- goto rel_lock;
+ xn_params.vc_op = VIRTCHNL2_OP_ALLOC_VECTORS;
+ xn_params.send_buf.iov_base = &ac;
+ xn_params.send_buf.iov_len = sizeof(ac);
+ xn_params.recv_buf.iov_base = rcvd_vec;
+ xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ reply_sz = idpf_vc_xn_exec(adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
- rcvd_vec = (struct virtchnl2_alloc_vectors *)adapter->vc_msg;
num_vchunks = le16_to_cpu(rcvd_vec->vchunks.num_vchunks);
-
size = struct_size(rcvd_vec, vchunks.vchunks, num_vchunks);
- if (size > sizeof(adapter->vc_msg)) {
- err = -EINVAL;
- goto rel_lock;
- }
+ if (reply_sz < size)
+ return -EIO;
+
+ if (size > IDPF_CTLQ_MAX_BUF_LEN)
+ return -EINVAL;
kfree(adapter->req_vec_chunks);
- adapter->req_vec_chunks = NULL;
- adapter->req_vec_chunks = kmemdup(adapter->vc_msg, size, GFP_KERNEL);
- if (!adapter->req_vec_chunks) {
- err = -ENOMEM;
- goto rel_lock;
- }
+ adapter->req_vec_chunks = kmemdup(rcvd_vec, size, GFP_KERNEL);
+ if (!adapter->req_vec_chunks)
+ return -ENOMEM;
- alloc_vec = adapter->req_vec_chunks;
- if (le16_to_cpu(alloc_vec->num_vectors) < num_vectors) {
+ if (le16_to_cpu(adapter->req_vec_chunks->num_vectors) < num_vectors) {
kfree(adapter->req_vec_chunks);
adapter->req_vec_chunks = NULL;
- err = -EINVAL;
+ return -EINVAL;
}
-rel_lock:
- mutex_unlock(&adapter->vc_buf_lock);
-
- return err;
+ return 0;
}
/**
@@ -2329,29 +2202,24 @@ int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter)
{
struct virtchnl2_alloc_vectors *ac = adapter->req_vec_chunks;
struct virtchnl2_vector_chunks *vcs = &ac->vchunks;
- int buf_size, err;
+ struct idpf_vc_xn_params xn_params = {};
+ ssize_t reply_sz;
+ int buf_size;
buf_size = struct_size(vcs, vchunks, le16_to_cpu(vcs->num_vchunks));
- mutex_lock(&adapter->vc_buf_lock);
-
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DEALLOC_VECTORS, buf_size,
- (u8 *)vcs);
- if (err)
- goto rel_lock;
-
- err = idpf_min_wait_for_event(adapter, NULL, IDPF_VC_DEALLOC_VECTORS,
- IDPF_VC_DEALLOC_VECTORS_ERR);
- if (err)
- goto rel_lock;
+ xn_params.vc_op = VIRTCHNL2_OP_DEALLOC_VECTORS;
+ xn_params.send_buf.iov_base = vcs;
+ xn_params.send_buf.iov_len = buf_size;
+ xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
+ reply_sz = idpf_vc_xn_exec(adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
kfree(adapter->req_vec_chunks);
adapter->req_vec_chunks = NULL;
-rel_lock:
- mutex_unlock(&adapter->vc_buf_lock);
-
- return err;
+ return 0;
}
/**
@@ -2374,25 +2242,18 @@ static int idpf_get_max_vfs(struct idpf_adapter *adapter)
*/
int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs)
{
- struct virtchnl2_sriov_vfs_info svi = { };
- int err;
+ struct virtchnl2_sriov_vfs_info svi = {};
+ struct idpf_vc_xn_params xn_params = {};
+ ssize_t reply_sz;
svi.num_vfs = cpu_to_le16(num_vfs);
+ xn_params.vc_op = VIRTCHNL2_OP_SET_SRIOV_VFS;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ xn_params.send_buf.iov_base = &svi;
+ xn_params.send_buf.iov_len = sizeof(svi);
+ reply_sz = idpf_vc_xn_exec(adapter, &xn_params);
- mutex_lock(&adapter->vc_buf_lock);
-
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_SRIOV_VFS,
- sizeof(svi), (u8 *)&svi);
- if (err)
- goto rel_lock;
-
- err = idpf_wait_for_event(adapter, NULL, IDPF_VC_SET_SRIOV_VFS,
- IDPF_VC_SET_SRIOV_VFS_ERR);
-
-rel_lock:
- mutex_unlock(&adapter->vc_buf_lock);
-
- return err;
+ return reply_sz < 0 ? reply_sz : 0;
}
/**
@@ -2405,10 +2266,10 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport)
{
struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
struct rtnl_link_stats64 *netstats = &np->netstats;
- struct idpf_adapter *adapter = vport->adapter;
- struct virtchnl2_vport_stats stats_msg = { };
- struct virtchnl2_vport_stats *stats;
- int err;
+ struct virtchnl2_vport_stats stats_msg = {};
+ struct idpf_vc_xn_params xn_params = {};
+ ssize_t reply_sz;
+
/* Don't send get_stats message if the link is down */
if (np->state <= __IDPF_VPORT_DOWN)
@@ -2416,46 +2277,38 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport)
stats_msg.vport_id = cpu_to_le32(vport->vport_id);
- mutex_lock(&vport->vc_buf_lock);
+ xn_params.vc_op = VIRTCHNL2_OP_GET_STATS;
+ xn_params.send_buf.iov_base = &stats_msg;
+ xn_params.send_buf.iov_len = sizeof(stats_msg);
+ xn_params.recv_buf = xn_params.send_buf;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_STATS,
- sizeof(struct virtchnl2_vport_stats),
- (u8 *)&stats_msg);
- if (err)
- goto rel_lock;
-
- err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_STATS,
- IDPF_VC_GET_STATS_ERR);
- if (err)
- goto rel_lock;
-
- stats = (struct virtchnl2_vport_stats *)vport->vc_msg;
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
+ if (reply_sz < sizeof(stats_msg))
+ return -EIO;
spin_lock_bh(&np->stats_lock);
- netstats->rx_packets = le64_to_cpu(stats->rx_unicast) +
- le64_to_cpu(stats->rx_multicast) +
- le64_to_cpu(stats->rx_broadcast);
- netstats->rx_bytes = le64_to_cpu(stats->rx_bytes);
- netstats->rx_dropped = le64_to_cpu(stats->rx_discards);
- netstats->rx_over_errors = le64_to_cpu(stats->rx_overflow_drop);
- netstats->rx_length_errors = le64_to_cpu(stats->rx_invalid_frame_length);
-
- netstats->tx_packets = le64_to_cpu(stats->tx_unicast) +
- le64_to_cpu(stats->tx_multicast) +
- le64_to_cpu(stats->tx_broadcast);
- netstats->tx_bytes = le64_to_cpu(stats->tx_bytes);
- netstats->tx_errors = le64_to_cpu(stats->tx_errors);
- netstats->tx_dropped = le64_to_cpu(stats->tx_discards);
-
- vport->port_stats.vport_stats = *stats;
+ netstats->rx_packets = le64_to_cpu(stats_msg.rx_unicast) +
+ le64_to_cpu(stats_msg.rx_multicast) +
+ le64_to_cpu(stats_msg.rx_broadcast);
+ netstats->tx_packets = le64_to_cpu(stats_msg.tx_unicast) +
+ le64_to_cpu(stats_msg.tx_multicast) +
+ le64_to_cpu(stats_msg.tx_broadcast);
+ netstats->rx_bytes = le64_to_cpu(stats_msg.rx_bytes);
+ netstats->tx_bytes = le64_to_cpu(stats_msg.tx_bytes);
+ netstats->rx_errors = le64_to_cpu(stats_msg.rx_errors);
+ netstats->tx_errors = le64_to_cpu(stats_msg.tx_errors);
+ netstats->rx_dropped = le64_to_cpu(stats_msg.rx_discards);
+ netstats->tx_dropped = le64_to_cpu(stats_msg.tx_discards);
+
+ vport->port_stats.vport_stats = stats_msg;
spin_unlock_bh(&np->stats_lock);
-rel_lock:
- mutex_unlock(&vport->vc_buf_lock);
-
- return err;
+ return 0;
}
/**
@@ -2467,70 +2320,70 @@ rel_lock:
*/
int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get)
{
- struct idpf_adapter *adapter = vport->adapter;
- struct virtchnl2_rss_lut *recv_rl;
+ struct virtchnl2_rss_lut *recv_rl __free(kfree) = NULL;
+ struct virtchnl2_rss_lut *rl __free(kfree) = NULL;
+ struct idpf_vc_xn_params xn_params = {};
struct idpf_rss_data *rss_data;
- struct virtchnl2_rss_lut *rl;
int buf_size, lut_buf_size;
- int i, err;
+ ssize_t reply_sz;
+ int i;
- rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+ rss_data =
+ &vport->adapter->vport_config[vport->idx]->user_config.rss_data;
buf_size = struct_size(rl, lut, rss_data->rss_lut_size);
rl = kzalloc(buf_size, GFP_KERNEL);
if (!rl)
return -ENOMEM;
rl->vport_id = cpu_to_le32(vport->vport_id);
- mutex_lock(&vport->vc_buf_lock);
- if (!get) {
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ xn_params.send_buf.iov_base = rl;
+ xn_params.send_buf.iov_len = buf_size;
+
+ if (get) {
+ recv_rl = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!recv_rl)
+ return -ENOMEM;
+ xn_params.vc_op = VIRTCHNL2_OP_GET_RSS_LUT;
+ xn_params.recv_buf.iov_base = recv_rl;
+ xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN;
+ } else {
rl->lut_entries = cpu_to_le16(rss_data->rss_lut_size);
for (i = 0; i < rss_data->rss_lut_size; i++)
rl->lut[i] = cpu_to_le32(rss_data->rss_lut[i]);
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_RSS_LUT,
- buf_size, (u8 *)rl);
- if (err)
- goto free_mem;
-
- err = idpf_wait_for_event(adapter, vport, IDPF_VC_SET_RSS_LUT,
- IDPF_VC_SET_RSS_LUT_ERR);
-
- goto free_mem;
+ xn_params.vc_op = VIRTCHNL2_OP_SET_RSS_LUT;
}
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
+ if (!get)
+ return 0;
+ if (reply_sz < sizeof(struct virtchnl2_rss_lut))
+ return -EIO;
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_RSS_LUT,
- buf_size, (u8 *)rl);
- if (err)
- goto free_mem;
+ lut_buf_size = le16_to_cpu(recv_rl->lut_entries) * sizeof(u32);
+ if (reply_sz < lut_buf_size)
+ return -EIO;
- err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_RSS_LUT,
- IDPF_VC_GET_RSS_LUT_ERR);
- if (err)
- goto free_mem;
-
- recv_rl = (struct virtchnl2_rss_lut *)vport->vc_msg;
+ /* size didn't change, we can reuse existing lut buf */
if (rss_data->rss_lut_size == le16_to_cpu(recv_rl->lut_entries))
goto do_memcpy;
rss_data->rss_lut_size = le16_to_cpu(recv_rl->lut_entries);
kfree(rss_data->rss_lut);
- lut_buf_size = rss_data->rss_lut_size * sizeof(u32);
rss_data->rss_lut = kzalloc(lut_buf_size, GFP_KERNEL);
if (!rss_data->rss_lut) {
rss_data->rss_lut_size = 0;
- err = -ENOMEM;
- goto free_mem;
+ return -ENOMEM;
}
do_memcpy:
- memcpy(rss_data->rss_lut, vport->vc_msg, rss_data->rss_lut_size);
-free_mem:
- mutex_unlock(&vport->vc_buf_lock);
- kfree(rl);
+ memcpy(rss_data->rss_lut, recv_rl->lut, rss_data->rss_lut_size);
- return err;
+ return 0;
}
/**
@@ -2542,68 +2395,70 @@ free_mem:
*/
int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get)
{
- struct idpf_adapter *adapter = vport->adapter;
- struct virtchnl2_rss_key *recv_rk;
+ struct virtchnl2_rss_key *recv_rk __free(kfree) = NULL;
+ struct virtchnl2_rss_key *rk __free(kfree) = NULL;
+ struct idpf_vc_xn_params xn_params = {};
struct idpf_rss_data *rss_data;
- struct virtchnl2_rss_key *rk;
- int i, buf_size, err;
+ ssize_t reply_sz;
+ int i, buf_size;
+ u16 key_size;
- rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+ rss_data =
+ &vport->adapter->vport_config[vport->idx]->user_config.rss_data;
buf_size = struct_size(rk, key_flex, rss_data->rss_key_size);
rk = kzalloc(buf_size, GFP_KERNEL);
if (!rk)
return -ENOMEM;
rk->vport_id = cpu_to_le32(vport->vport_id);
- mutex_lock(&vport->vc_buf_lock);
-
+ xn_params.send_buf.iov_base = rk;
+ xn_params.send_buf.iov_len = buf_size;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
if (get) {
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_RSS_KEY,
- buf_size, (u8 *)rk);
- if (err)
- goto error;
-
- err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_RSS_KEY,
- IDPF_VC_GET_RSS_KEY_ERR);
- if (err)
- goto error;
-
- recv_rk = (struct virtchnl2_rss_key *)vport->vc_msg;
- if (rss_data->rss_key_size !=
- le16_to_cpu(recv_rk->key_len)) {
- rss_data->rss_key_size =
- min_t(u16, NETDEV_RSS_KEY_LEN,
- le16_to_cpu(recv_rk->key_len));
- kfree(rss_data->rss_key);
- rss_data->rss_key = kzalloc(rss_data->rss_key_size,
- GFP_KERNEL);
- if (!rss_data->rss_key) {
- rss_data->rss_key_size = 0;
- err = -ENOMEM;
- goto error;
- }
- }
- memcpy(rss_data->rss_key, recv_rk->key_flex,
- rss_data->rss_key_size);
+ recv_rk = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!recv_rk)
+ return -ENOMEM;
+
+ xn_params.vc_op = VIRTCHNL2_OP_GET_RSS_KEY;
+ xn_params.recv_buf.iov_base = recv_rk;
+ xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN;
} else {
rk->key_len = cpu_to_le16(rss_data->rss_key_size);
for (i = 0; i < rss_data->rss_key_size; i++)
rk->key_flex[i] = rss_data->rss_key[i];
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_RSS_KEY,
- buf_size, (u8 *)rk);
- if (err)
- goto error;
+ xn_params.vc_op = VIRTCHNL2_OP_SET_RSS_KEY;
+ }
+
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
+ if (!get)
+ return 0;
+ if (reply_sz < sizeof(struct virtchnl2_rss_key))
+ return -EIO;
+
+ key_size = min_t(u16, NETDEV_RSS_KEY_LEN,
+ le16_to_cpu(recv_rk->key_len));
+ if (reply_sz < key_size)
+ return -EIO;
- err = idpf_wait_for_event(adapter, vport, IDPF_VC_SET_RSS_KEY,
- IDPF_VC_SET_RSS_KEY_ERR);
+ /* key len didn't change, reuse existing buf */
+ if (rss_data->rss_key_size == key_size)
+ goto do_memcpy;
+
+ rss_data->rss_key_size = key_size;
+ kfree(rss_data->rss_key);
+ rss_data->rss_key = kzalloc(key_size, GFP_KERNEL);
+ if (!rss_data->rss_key) {
+ rss_data->rss_key_size = 0;
+ return -ENOMEM;
}
-error:
- mutex_unlock(&vport->vc_buf_lock);
- kfree(rk);
+do_memcpy:
+ memcpy(rss_data->rss_key, recv_rk->key_flex, rss_data->rss_key_size);
- return err;
+ return 0;
}
/**
@@ -2655,13 +2510,15 @@ static void idpf_fill_ptype_lookup(struct idpf_rx_ptype_decoded *ptype,
*/
int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport)
{
+ struct virtchnl2_get_ptype_info *get_ptype_info __free(kfree) = NULL;
+ struct virtchnl2_get_ptype_info *ptype_info __free(kfree) = NULL;
struct idpf_rx_ptype_decoded *ptype_lkup = vport->rx_ptype_lkup;
- struct virtchnl2_get_ptype_info get_ptype_info;
int max_ptype, ptypes_recvd = 0, ptype_offset;
struct idpf_adapter *adapter = vport->adapter;
- struct virtchnl2_get_ptype_info *ptype_info;
+ struct idpf_vc_xn_params xn_params = {};
u16 next_ptype_id = 0;
- int err = 0, i, j, k;
+ ssize_t reply_sz;
+ int i, j, k;
if (idpf_is_queue_model_split(vport->rxq_model))
max_ptype = IDPF_RX_MAX_PTYPE;
@@ -2670,43 +2527,44 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport)
memset(vport->rx_ptype_lkup, 0, sizeof(vport->rx_ptype_lkup));
+ get_ptype_info = kzalloc(sizeof(*get_ptype_info), GFP_KERNEL);
+ if (!get_ptype_info)
+ return -ENOMEM;
+
ptype_info = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL);
if (!ptype_info)
return -ENOMEM;
- mutex_lock(&adapter->vc_buf_lock);
+ xn_params.vc_op = VIRTCHNL2_OP_GET_PTYPE_INFO;
+ xn_params.send_buf.iov_base = get_ptype_info;
+ xn_params.send_buf.iov_len = sizeof(*get_ptype_info);
+ xn_params.recv_buf.iov_base = ptype_info;
+ xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
while (next_ptype_id < max_ptype) {
- get_ptype_info.start_ptype_id = cpu_to_le16(next_ptype_id);
+ get_ptype_info->start_ptype_id = cpu_to_le16(next_ptype_id);
if ((next_ptype_id + IDPF_RX_MAX_PTYPES_PER_BUF) > max_ptype)
- get_ptype_info.num_ptypes =
+ get_ptype_info->num_ptypes =
cpu_to_le16(max_ptype - next_ptype_id);
else
- get_ptype_info.num_ptypes =
+ get_ptype_info->num_ptypes =
cpu_to_le16(IDPF_RX_MAX_PTYPES_PER_BUF);
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_PTYPE_INFO,
- sizeof(struct virtchnl2_get_ptype_info),
- (u8 *)&get_ptype_info);
- if (err)
- goto vc_buf_unlock;
-
- err = idpf_wait_for_event(adapter, NULL, IDPF_VC_GET_PTYPE_INFO,
- IDPF_VC_GET_PTYPE_INFO_ERR);
- if (err)
- goto vc_buf_unlock;
+ reply_sz = idpf_vc_xn_exec(adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
- memcpy(ptype_info, adapter->vc_msg, IDPF_CTLQ_MAX_BUF_LEN);
+ if (reply_sz < IDPF_CTLQ_MAX_BUF_LEN)
+ return -EIO;
ptypes_recvd += le16_to_cpu(ptype_info->num_ptypes);
- if (ptypes_recvd > max_ptype) {
- err = -EINVAL;
- goto vc_buf_unlock;
- }
+ if (ptypes_recvd > max_ptype)
+ return -EINVAL;
- next_ptype_id = le16_to_cpu(get_ptype_info.start_ptype_id) +
- le16_to_cpu(get_ptype_info.num_ptypes);
+ next_ptype_id = le16_to_cpu(get_ptype_info->start_ptype_id) +
+ le16_to_cpu(get_ptype_info->num_ptypes);
ptype_offset = IDPF_RX_PTYPE_HDR_SZ;
@@ -2719,17 +2577,13 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport)
((u8 *)ptype_info + ptype_offset);
ptype_offset += IDPF_GET_PTYPE_SIZE(ptype);
- if (ptype_offset > IDPF_CTLQ_MAX_BUF_LEN) {
- err = -EINVAL;
- goto vc_buf_unlock;
- }
+ if (ptype_offset > IDPF_CTLQ_MAX_BUF_LEN)
+ return -EINVAL;
/* 0xFFFF indicates end of ptypes */
if (le16_to_cpu(ptype->ptype_id_10) ==
- IDPF_INVALID_PTYPE_ID) {
- err = 0;
- goto vc_buf_unlock;
- }
+ IDPF_INVALID_PTYPE_ID)
+ return 0;
if (idpf_is_queue_model_split(vport->rxq_model))
k = le16_to_cpu(ptype->ptype_id_10);
@@ -2857,11 +2711,7 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport)
}
}
-vc_buf_unlock:
- mutex_unlock(&adapter->vc_buf_lock);
- kfree(ptype_info);
-
- return err;
+ return 0;
}
/**
@@ -2873,27 +2723,20 @@ vc_buf_unlock:
*/
int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport)
{
+ struct idpf_vc_xn_params xn_params = {};
struct virtchnl2_loopback loopback;
- int err;
+ ssize_t reply_sz;
loopback.vport_id = cpu_to_le32(vport->vport_id);
loopback.enable = idpf_is_feature_ena(vport, NETIF_F_LOOPBACK);
- mutex_lock(&vport->vc_buf_lock);
-
- err = idpf_send_mb_msg(vport->adapter, VIRTCHNL2_OP_LOOPBACK,
- sizeof(loopback), (u8 *)&loopback);
- if (err)
- goto rel_lock;
+ xn_params.vc_op = VIRTCHNL2_OP_LOOPBACK;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ xn_params.send_buf.iov_base = &loopback;
+ xn_params.send_buf.iov_len = sizeof(loopback);
+ reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
- err = idpf_wait_for_event(vport->adapter, vport,
- IDPF_VC_LOOPBACK_STATE,
- IDPF_VC_LOOPBACK_STATE_ERR);
-
-rel_lock:
- mutex_unlock(&vport->vc_buf_lock);
-
- return err;
+ return reply_sz < 0 ? reply_sz : 0;
}
/**
@@ -2958,7 +2801,7 @@ int idpf_init_dflt_mbx(struct idpf_adapter *adapter)
return -ENOENT;
}
- adapter->state = __IDPF_STARTUP;
+ adapter->state = __IDPF_VER_CHECK;
return 0;
}
@@ -3055,35 +2898,42 @@ int idpf_vc_core_init(struct idpf_adapter *adapter)
u16 num_max_vports;
int err = 0;
+ if (!adapter->vcxn_mngr) {
+ adapter->vcxn_mngr = kzalloc(sizeof(*adapter->vcxn_mngr), GFP_KERNEL);
+ if (!adapter->vcxn_mngr) {
+ err = -ENOMEM;
+ goto init_failed;
+ }
+ }
+ idpf_vc_xn_init(adapter->vcxn_mngr);
+
while (adapter->state != __IDPF_INIT_SW) {
switch (adapter->state) {
- case __IDPF_STARTUP:
- if (idpf_send_ver_msg(adapter))
- goto init_failed;
- adapter->state = __IDPF_VER_CHECK;
- goto restart;
case __IDPF_VER_CHECK:
- err = idpf_recv_ver_msg(adapter);
- if (err == -EIO) {
- return err;
- } else if (err == -EAGAIN) {
- adapter->state = __IDPF_STARTUP;
+ err = idpf_send_ver_msg(adapter);
+ switch (err) {
+ case 0:
+ /* success, move state machine forward */
+ adapter->state = __IDPF_GET_CAPS;
+ fallthrough;
+ case -EAGAIN:
goto restart;
- } else if (err) {
+ default:
+ /* Something bad happened, try again but only a
+ * few times.
+ */
goto init_failed;
}
- if (idpf_send_get_caps_msg(adapter))
- goto init_failed;
- adapter->state = __IDPF_GET_CAPS;
- goto restart;
case __IDPF_GET_CAPS:
- if (idpf_recv_get_caps_msg(adapter))
+ err = idpf_send_get_caps_msg(adapter);
+ if (err)
goto init_failed;
adapter->state = __IDPF_INIT_SW;
break;
default:
dev_err(&adapter->pdev->dev, "Device is in bad state: %d\n",
adapter->state);
+ err = -EINVAL;
goto init_failed;
}
break;
@@ -3142,7 +2992,9 @@ restart:
queue_delayed_work(adapter->init_wq, &adapter->init_task,
msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07)));
- goto no_err;
+ set_bit(IDPF_VC_CORE_INIT, adapter->flags);
+
+ return 0;
err_intr_req:
cancel_delayed_work_sync(&adapter->serv_task);
@@ -3151,7 +3003,6 @@ err_intr_req:
err_netdev_alloc:
kfree(adapter->vports);
adapter->vports = NULL;
-no_err:
return err;
init_failed:
@@ -3168,7 +3019,9 @@ init_failed:
* register writes might not have taken effect. Retry to initialize
* the mailbox again
*/
- adapter->state = __IDPF_STARTUP;
+ adapter->state = __IDPF_VER_CHECK;
+ if (adapter->vcxn_mngr)
+ idpf_vc_xn_shutdown(adapter->vcxn_mngr);
idpf_deinit_dflt_mbx(adapter);
set_bit(IDPF_HR_DRV_LOAD, adapter->flags);
queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task,
@@ -3184,29 +3037,22 @@ init_failed:
*/
void idpf_vc_core_deinit(struct idpf_adapter *adapter)
{
- int i;
+ if (!test_bit(IDPF_VC_CORE_INIT, adapter->flags))
+ return;
+ idpf_vc_xn_shutdown(adapter->vcxn_mngr);
idpf_deinit_task(adapter);
idpf_intr_rel(adapter);
- /* Set all bits as we dont know on which vc_state the vhnl_wq is
- * waiting on and wakeup the virtchnl workqueue even if it is waiting
- * for the response as we are going down
- */
- for (i = 0; i < IDPF_VC_NBITS; i++)
- set_bit(i, adapter->vc_state);
- wake_up(&adapter->vchnl_wq);
cancel_delayed_work_sync(&adapter->serv_task);
cancel_delayed_work_sync(&adapter->mbx_task);
idpf_vport_params_buf_rel(adapter);
- /* Clear all the bits */
- for (i = 0; i < IDPF_VC_NBITS; i++)
- clear_bit(i, adapter->vc_state);
-
kfree(adapter->vports);
adapter->vports = NULL;
+
+ clear_bit(IDPF_VC_CORE_INIT, adapter->flags);
}
/**
@@ -3622,6 +3468,75 @@ u32 idpf_get_vport_id(struct idpf_vport *vport)
}
/**
+ * idpf_mac_filter_async_handler - Async callback for mac filters
+ * @adapter: private data struct
+ * @xn: transaction for message
+ * @ctlq_msg: received message
+ *
+ * In some scenarios driver can't sleep and wait for a reply (e.g.: stack is
+ * holding rtnl_lock) when adding a new mac filter. It puts us in a difficult
+ * situation to deal with errors returned on the reply. The best we can
+ * ultimately do is remove it from our list of mac filters and report the
+ * error.
+ */
+static int idpf_mac_filter_async_handler(struct idpf_adapter *adapter,
+ struct idpf_vc_xn *xn,
+ const struct idpf_ctlq_msg *ctlq_msg)
+{
+ struct virtchnl2_mac_addr_list *ma_list;
+ struct idpf_vport_config *vport_config;
+ struct virtchnl2_mac_addr *mac_addr;
+ struct idpf_mac_filter *f, *tmp;
+ struct list_head *ma_list_head;
+ struct idpf_vport *vport;
+ u16 num_entries;
+ int i;
+
+ /* if success we're done, we're only here if something bad happened */
+ if (!ctlq_msg->cookie.mbx.chnl_retval)
+ return 0;
+
+ /* make sure at least struct is there */
+ if (xn->reply_sz < sizeof(*ma_list))
+ goto invalid_payload;
+
+ ma_list = ctlq_msg->ctx.indirect.payload->va;
+ mac_addr = ma_list->mac_addr_list;
+ num_entries = le16_to_cpu(ma_list->num_mac_addr);
+ /* we should have received a buffer at least this big */
+ if (xn->reply_sz < struct_size(ma_list, mac_addr_list, num_entries))
+ goto invalid_payload;
+
+ vport = idpf_vid_to_vport(adapter, le32_to_cpu(ma_list->vport_id));
+ if (!vport)
+ goto invalid_payload;
+
+ vport_config = adapter->vport_config[le32_to_cpu(ma_list->vport_id)];
+ ma_list_head = &vport_config->user_config.mac_filter_list;
+
+ /* We can't do much to reconcile bad filters at this point, however we
+ * should at least remove them from our list one way or the other so we
+ * have some idea what good filters we have.
+ */
+ spin_lock_bh(&vport_config->mac_filter_list_lock);
+ list_for_each_entry_safe(f, tmp, ma_list_head, list)
+ for (i = 0; i < num_entries; i++)
+ if (ether_addr_equal(mac_addr[i].addr, f->macaddr))
+ list_del(&f->list);
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+ dev_err_ratelimited(&adapter->pdev->dev, "Received error sending MAC filter request (op %d)\n",
+ xn->vc_op);
+
+ return 0;
+
+invalid_payload:
+ dev_err_ratelimited(&adapter->pdev->dev, "Received invalid MAC filter payload (op %d) (len %zd)\n",
+ xn->vc_op, xn->reply_sz);
+
+ return -EINVAL;
+}
+
+/**
* idpf_add_del_mac_filters - Add/del mac filters
* @vport: Virtual port data structure
* @np: Netdev private structure
@@ -3634,17 +3549,21 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport,
struct idpf_netdev_priv *np,
bool add, bool async)
{
- struct virtchnl2_mac_addr_list *ma_list = NULL;
+ struct virtchnl2_mac_addr_list *ma_list __free(kfree) = NULL;
+ struct virtchnl2_mac_addr *mac_addr __free(kfree) = NULL;
struct idpf_adapter *adapter = np->adapter;
+ struct idpf_vc_xn_params xn_params = {};
struct idpf_vport_config *vport_config;
- enum idpf_vport_config_flags mac_flag;
- struct pci_dev *pdev = adapter->pdev;
- enum idpf_vport_vc_state vc, vc_err;
- struct virtchnl2_mac_addr *mac_addr;
- struct idpf_mac_filter *f, *tmp;
u32 num_msgs, total_filters = 0;
- int i = 0, k, err = 0;
- u32 vop;
+ struct idpf_mac_filter *f;
+ ssize_t reply_sz;
+ int i = 0, k;
+
+ xn_params.vc_op = add ? VIRTCHNL2_OP_ADD_MAC_ADDR :
+ VIRTCHNL2_OP_DEL_MAC_ADDR;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ xn_params.async = async;
+ xn_params.async_handler = idpf_mac_filter_async_handler;
vport_config = adapter->vport_config[np->vport_idx];
spin_lock_bh(&vport_config->mac_filter_list_lock);
@@ -3668,13 +3587,13 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport,
mac_addr = kcalloc(total_filters, sizeof(struct virtchnl2_mac_addr),
GFP_ATOMIC);
if (!mac_addr) {
- err = -ENOMEM;
spin_unlock_bh(&vport_config->mac_filter_list_lock);
- goto error;
+
+ return -ENOMEM;
}
- list_for_each_entry_safe(f, tmp, &vport_config->user_config.mac_filter_list,
- list) {
+ list_for_each_entry(f, &vport_config->user_config.mac_filter_list,
+ list) {
if (add && f->add) {
ether_addr_copy(mac_addr[i].addr, f->macaddr);
i++;
@@ -3693,26 +3612,11 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport,
spin_unlock_bh(&vport_config->mac_filter_list_lock);
- if (add) {
- vop = VIRTCHNL2_OP_ADD_MAC_ADDR;
- vc = IDPF_VC_ADD_MAC_ADDR;
- vc_err = IDPF_VC_ADD_MAC_ADDR_ERR;
- mac_flag = IDPF_VPORT_ADD_MAC_REQ;
- } else {
- vop = VIRTCHNL2_OP_DEL_MAC_ADDR;
- vc = IDPF_VC_DEL_MAC_ADDR;
- vc_err = IDPF_VC_DEL_MAC_ADDR_ERR;
- mac_flag = IDPF_VPORT_DEL_MAC_REQ;
- }
-
/* Chunk up the filters into multiple messages to avoid
* sending a control queue message buffer that is too large
*/
num_msgs = DIV_ROUND_UP(total_filters, IDPF_NUM_FILTERS_PER_MSG);
- if (!async)
- mutex_lock(&vport->vc_buf_lock);
-
for (i = 0, k = 0; i < num_msgs; i++) {
u32 entries_size, buf_size, num_entries;
@@ -3724,10 +3628,8 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport,
if (!ma_list || num_entries != IDPF_NUM_FILTERS_PER_MSG) {
kfree(ma_list);
ma_list = kzalloc(buf_size, GFP_ATOMIC);
- if (!ma_list) {
- err = -ENOMEM;
- goto list_prep_error;
- }
+ if (!ma_list)
+ return -ENOMEM;
} else {
memset(ma_list, 0, buf_size);
}
@@ -3736,34 +3638,17 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport,
ma_list->num_mac_addr = cpu_to_le16(num_entries);
memcpy(ma_list->mac_addr_list, &mac_addr[k], entries_size);
- if (async)
- set_bit(mac_flag, vport_config->flags);
-
- err = idpf_send_mb_msg(adapter, vop, buf_size, (u8 *)ma_list);
- if (err)
- goto mbx_error;
-
- if (!async) {
- err = idpf_wait_for_event(adapter, vport, vc, vc_err);
- if (err)
- goto mbx_error;
- }
+ xn_params.send_buf.iov_base = ma_list;
+ xn_params.send_buf.iov_len = buf_size;
+ reply_sz = idpf_vc_xn_exec(adapter, &xn_params);
+ if (reply_sz < 0)
+ return reply_sz;
k += num_entries;
total_filters -= num_entries;
}
-mbx_error:
- if (!async)
- mutex_unlock(&vport->vc_buf_lock);
- kfree(ma_list);
-list_prep_error:
- kfree(mac_addr);
-error:
- if (err)
- dev_err(&pdev->dev, "Failed to add or del mac filters %d", err);
-
- return err;
+ return 0;
}
/**
@@ -3780,9 +3665,10 @@ int idpf_set_promiscuous(struct idpf_adapter *adapter,
struct idpf_vport_user_config_data *config_data,
u32 vport_id)
{
+ struct idpf_vc_xn_params xn_params = {};
struct virtchnl2_promisc_info vpi;
+ ssize_t reply_sz;
u16 flags = 0;
- int err;
if (test_bit(__IDPF_PROMISC_UC, config_data->user_flags))
flags |= VIRTCHNL2_UNICAST_PROMISC;
@@ -3792,9 +3678,13 @@ int idpf_set_promiscuous(struct idpf_adapter *adapter,
vpi.vport_id = cpu_to_le32(vport_id);
vpi.flags = cpu_to_le16(flags);
- err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE,
- sizeof(struct virtchnl2_promisc_info),
- (u8 *)&vpi);
+ xn_params.vc_op = VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE;
+ xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
+ xn_params.send_buf.iov_base = &vpi;
+ xn_params.send_buf.iov_len = sizeof(vpi);
+ /* setting promiscuous is only ever done asynchronously */
+ xn_params.async = true;
+ reply_sz = idpf_vc_xn_exec(adapter, &xn_params);
- return err;
+ return reply_sz < 0 ? reply_sz : 0;
}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h
new file mode 100644
index 000000000000..83da5d8da56b
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2024 Intel Corporation */
+
+#ifndef _IDPF_VIRTCHNL_H_
+#define _IDPF_VIRTCHNL_H_
+
+struct idpf_adapter;
+struct idpf_netdev_priv;
+struct idpf_vec_regs;
+struct idpf_vport;
+struct idpf_vport_max_q;
+struct idpf_vport_user_config_data;
+
+int idpf_init_dflt_mbx(struct idpf_adapter *adapter);
+void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter);
+int idpf_vc_core_init(struct idpf_adapter *adapter);
+void idpf_vc_core_deinit(struct idpf_adapter *adapter);
+
+int idpf_get_reg_intr_vecs(struct idpf_vport *vport,
+ struct idpf_vec_regs *reg_vals);
+int idpf_queue_reg_init(struct idpf_vport *vport);
+int idpf_vport_queue_ids_init(struct idpf_vport *vport);
+
+int idpf_recv_mb_msg(struct idpf_adapter *adapter);
+int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op,
+ u16 msg_size, u8 *msg, u16 cookie);
+
+void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q);
+u32 idpf_get_vport_id(struct idpf_vport *vport);
+int idpf_send_create_vport_msg(struct idpf_adapter *adapter,
+ struct idpf_vport_max_q *max_q);
+int idpf_send_destroy_vport_msg(struct idpf_vport *vport);
+int idpf_send_enable_vport_msg(struct idpf_vport *vport);
+int idpf_send_disable_vport_msg(struct idpf_vport *vport);
+
+int idpf_vport_adjust_qs(struct idpf_vport *vport);
+int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter,
+ struct idpf_vport_max_q *max_q);
+void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter,
+ struct idpf_vport_max_q *max_q);
+int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q,
+ u16 num_complq, u16 num_rx_q, u16 num_rx_bufq);
+int idpf_send_delete_queues_msg(struct idpf_vport *vport);
+int idpf_send_enable_queues_msg(struct idpf_vport *vport);
+int idpf_send_disable_queues_msg(struct idpf_vport *vport);
+int idpf_send_config_queues_msg(struct idpf_vport *vport);
+
+int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport);
+int idpf_get_vec_ids(struct idpf_adapter *adapter,
+ u16 *vecids, int num_vecids,
+ struct virtchnl2_vector_chunks *chunks);
+int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors);
+int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter);
+int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map);
+
+int idpf_add_del_mac_filters(struct idpf_vport *vport,
+ struct idpf_netdev_priv *np,
+ bool add, bool async);
+int idpf_set_promiscuous(struct idpf_adapter *adapter,
+ struct idpf_vport_user_config_data *config_data,
+ u32 vport_id);
+int idpf_check_supported_desc_ids(struct idpf_vport *vport);
+int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport);
+int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport);
+int idpf_send_get_stats_msg(struct idpf_vport *vport);
+int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs);
+int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get);
+int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get);
+
+#endif /* _IDPF_VIRTCHNL_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h
index 8dc837889723..4a3c4454d25a 100644
--- a/drivers/net/ethernet/intel/idpf/virtchnl2.h
+++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h
@@ -978,7 +978,7 @@ struct virtchnl2_ptype {
u8 proto_id_count;
__le16 pad;
__le16 proto_id[];
-};
+} __packed __aligned(2);
VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype);
/**
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index a2b759531cb7..3c2dc7bdebb5 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -637,7 +637,7 @@ struct igb_adapter {
struct timespec64 period;
} perout[IGB_N_PEROUT];
- char fw_version[32];
+ char fw_version[48];
#ifdef CONFIG_IGB_HWMON
struct hwmon_buff *igb_hwmon_buff;
bool ets;
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index b66199c9bb3a..99977a22b843 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -3027,7 +3027,7 @@ static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
return ret;
}
-static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+static int igb_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
{
struct igb_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
@@ -3038,11 +3038,13 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
(hw->phy.media_type != e1000_media_type_copper))
return -EOPNOTSUPP;
- edata->supported = (SUPPORTED_1000baseT_Full |
- SUPPORTED_100baseT_Full);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ edata->supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ edata->supported);
if (!hw->dev_spec._82575.eee_disable)
- edata->advertised =
- mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert);
+ mii_eee_cap1_mod_linkmode_t(edata->advertised,
+ adapter->eee_advert);
/* The IPCNFG and EEER registers are not supported on I354. */
if (hw->mac.type == e1000_i354) {
@@ -3068,7 +3070,7 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
if (ret_val)
return -ENODATA;
- edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+ mii_eee_cap1_mod_linkmode_t(edata->lp_advertised, phy_data);
break;
case e1000_i354:
case e1000_i210:
@@ -3079,7 +3081,7 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
if (ret_val)
return -ENODATA;
- edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+ mii_eee_cap1_mod_linkmode_t(edata->lp_advertised, phy_data);
break;
default:
@@ -3099,18 +3101,20 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
edata->eee_enabled = false;
edata->eee_active = false;
edata->tx_lpi_enabled = false;
- edata->advertised &= ~edata->advertised;
+ linkmode_zero(edata->advertised);
}
return 0;
}
static int igb_set_eee(struct net_device *netdev,
- struct ethtool_eee *edata)
+ struct ethtool_keee *edata)
{
struct igb_adapter *adapter = netdev_priv(netdev);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = {};
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp) = {};
struct e1000_hw *hw = &adapter->hw;
- struct ethtool_eee eee_curr;
+ struct ethtool_keee eee_curr;
bool adv1g_eee = true, adv100m_eee = true;
s32 ret_val;
@@ -3118,7 +3122,7 @@ static int igb_set_eee(struct net_device *netdev,
(hw->phy.media_type != e1000_media_type_copper))
return -EOPNOTSUPP;
- memset(&eee_curr, 0, sizeof(struct ethtool_eee));
+ memset(&eee_curr, 0, sizeof(struct ethtool_keee));
ret_val = igb_get_eee(netdev, &eee_curr);
if (ret_val)
@@ -3138,14 +3142,21 @@ static int igb_set_eee(struct net_device *netdev,
return -EINVAL;
}
- if (!edata->advertised || (edata->advertised &
- ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL))) {
+ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ supported);
+ if (linkmode_andnot(tmp, edata->advertised, supported)) {
dev_err(&adapter->pdev->dev,
"EEE Advertisement supports only 100Tx and/or 100T full duplex\n");
return -EINVAL;
}
- adv100m_eee = !!(edata->advertised & ADVERTISE_100_FULL);
- adv1g_eee = !!(edata->advertised & ADVERTISE_1000_FULL);
+ adv100m_eee = linkmode_test_bit(
+ ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ edata->advertised);
+ adv1g_eee = linkmode_test_bit(
+ ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ edata->advertised);
} else if (!edata->eee_enabled) {
dev_err(&adapter->pdev->dev,
@@ -3153,7 +3164,7 @@ static int igb_set_eee(struct net_device *netdev,
return -EINVAL;
}
- adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
+ adapter->eee_advert = linkmode_to_mii_eee_cap1_t(edata->advertised);
if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) {
hw->dev_spec._82575.eee_disable = !edata->eee_enabled;
adapter->flags |= IGB_FLAG_EEE;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 4df8d4153aa5..a3f100769e39 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -202,7 +202,7 @@ static struct notifier_block dca_notifier = {
#endif
#ifdef CONFIG_PCI_IOV
static unsigned int max_vfs;
-module_param(max_vfs, uint, 0);
+module_param(max_vfs, uint, 0444);
MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function");
#endif /* CONFIG_PCI_IOV */
@@ -2538,7 +2538,7 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev,
unsigned int network_hdr_len, mac_hdr_len;
/* Make certain the headers can be described by a context descriptor */
- mac_hdr_len = skb_network_header(skb) - skb->data;
+ mac_hdr_len = skb_network_offset(skb);
if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN))
return features & ~(NETIF_F_HW_CSUM |
NETIF_F_SCTP_CRC |
@@ -3069,7 +3069,6 @@ void igb_set_fw_version(struct igb_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct e1000_fw_version fw;
- char *lbuf;
igb_get_fw_version(hw, &fw);
@@ -3077,34 +3076,36 @@ void igb_set_fw_version(struct igb_adapter *adapter)
case e1000_i210:
case e1000_i211:
if (!(igb_get_flash_presence_i210(hw))) {
- lbuf = kasprintf(GFP_KERNEL, "%2d.%2d-%d",
- fw.invm_major, fw.invm_minor,
- fw.invm_img_type);
+ snprintf(adapter->fw_version,
+ sizeof(adapter->fw_version),
+ "%2d.%2d-%d",
+ fw.invm_major, fw.invm_minor,
+ fw.invm_img_type);
break;
}
fallthrough;
default:
/* if option rom is valid, display its version too */
if (fw.or_valid) {
- lbuf = kasprintf(GFP_KERNEL, "%d.%d, 0x%08x, %d.%d.%d",
- fw.eep_major, fw.eep_minor,
- fw.etrack_id, fw.or_major, fw.or_build,
- fw.or_patch);
+ snprintf(adapter->fw_version,
+ sizeof(adapter->fw_version),
+ "%d.%d, 0x%08x, %d.%d.%d",
+ fw.eep_major, fw.eep_minor, fw.etrack_id,
+ fw.or_major, fw.or_build, fw.or_patch);
/* no option rom */
} else if (fw.etrack_id != 0X0000) {
- lbuf = kasprintf(GFP_KERNEL, "%d.%d, 0x%08x",
- fw.eep_major, fw.eep_minor,
- fw.etrack_id);
+ snprintf(adapter->fw_version,
+ sizeof(adapter->fw_version),
+ "%d.%d, 0x%08x",
+ fw.eep_major, fw.eep_minor, fw.etrack_id);
} else {
- lbuf = kasprintf(GFP_KERNEL, "%d.%d.%d", fw.eep_major,
- fw.eep_minor, fw.eep_build);
+ snprintf(adapter->fw_version,
+ sizeof(adapter->fw_version),
+ "%d.%d.%d",
+ fw.eep_major, fw.eep_minor, fw.eep_build);
}
break;
}
-
- /* the truncate happens here if it doesn't fit */
- strscpy(adapter->fw_version, lbuf, sizeof(adapter->fw_version));
- kfree(lbuf);
}
/**
@@ -6984,44 +6985,31 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt)
static void igb_tsync_interrupt(struct igb_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
- u32 ack = 0, tsicr = rd32(E1000_TSICR);
+ u32 tsicr = rd32(E1000_TSICR);
struct ptp_clock_event event;
if (tsicr & TSINTR_SYS_WRAP) {
event.type = PTP_CLOCK_PPS;
if (adapter->ptp_caps.pps)
ptp_clock_event(adapter->ptp_clock, &event);
- ack |= TSINTR_SYS_WRAP;
}
if (tsicr & E1000_TSICR_TXTS) {
/* retrieve hardware timestamp */
schedule_work(&adapter->ptp_tx_work);
- ack |= E1000_TSICR_TXTS;
}
- if (tsicr & TSINTR_TT0) {
+ if (tsicr & TSINTR_TT0)
igb_perout(adapter, 0);
- ack |= TSINTR_TT0;
- }
- if (tsicr & TSINTR_TT1) {
+ if (tsicr & TSINTR_TT1)
igb_perout(adapter, 1);
- ack |= TSINTR_TT1;
- }
- if (tsicr & TSINTR_AUTT0) {
+ if (tsicr & TSINTR_AUTT0)
igb_extts(adapter, 0);
- ack |= TSINTR_AUTT0;
- }
- if (tsicr & TSINTR_AUTT1) {
+ if (tsicr & TSINTR_AUTT1)
igb_extts(adapter, 1);
- ack |= TSINTR_AUTT1;
- }
-
- /* acknowledge the interrupts */
- wr32(E1000_TSICR, ack);
}
static irqreturn_t igb_msix_other(int irq, void *data)
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 319c544b9f04..f94570556120 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -957,7 +957,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
/* adjust timestamp for the TX latency based on link speed */
- if (adapter->hw.mac.type == e1000_i210) {
+ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
switch (adapter->link_speed) {
case SPEED_10:
adjust = IGB_I210_TX_LATENCY_10;
@@ -1003,6 +1003,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
ktime_t *timestamp)
{
struct igb_adapter *adapter = q_vector->adapter;
+ struct e1000_hw *hw = &adapter->hw;
struct skb_shared_hwtstamps ts;
__le64 *regval = (__le64 *)va;
int adjust = 0;
@@ -1022,7 +1023,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1]));
/* adjust timestamp for the RX latency based on link speed */
- if (adapter->hw.mac.type == e1000_i210) {
+ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
switch (adapter->link_speed) {
case SPEED_10:
adjust = IGB_I210_RX_LATENCY_10;
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index a4d4f00e6a87..b0cf310e6f7b 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2655,7 +2655,7 @@ igbvf_features_check(struct sk_buff *skb, struct net_device *dev,
unsigned int network_hdr_len, mac_hdr_len;
/* Make certain the headers can be described by a context descriptor */
- mac_hdr_len = skb_network_header(skb) - skb->data;
+ mac_hdr_len = skb_network_offset(skb);
if (unlikely(mac_hdr_len > IGBVF_MAX_MAC_HDR_LEN))
return features & ~(NETIF_F_HW_CSUM |
NETIF_F_SCTP_CRC |
diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
index 95d1e8c490a4..ebffd3054285 100644
--- a/drivers/net/ethernet/intel/igc/Makefile
+++ b/drivers/net/ethernet/intel/igc/Makefile
@@ -6,6 +6,7 @@
#
obj-$(CONFIG_IGC) += igc.o
+igc-$(CONFIG_IGC_LEDS) += igc_leds.o
igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \
igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o igc_xdp.o
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 45430e246e9c..90316dc58630 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -168,7 +168,7 @@ struct igc_ring {
struct igc_adapter {
struct net_device *netdev;
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
u16 eee_advert;
unsigned long state;
@@ -295,6 +295,9 @@ struct igc_adapter {
struct timespec64 start;
struct timespec64 period;
} perout[IGC_N_PEROUT];
+
+ /* LEDs */
+ struct mutex led_mutex;
};
void igc_up(struct igc_adapter *adapter);
@@ -567,7 +570,6 @@ struct igc_q_vector {
struct rcu_head rcu; /* to avoid race with update stats on free */
char name[IFNAMSIZ + 9];
- struct net_device poll_dev;
/* for dynamic allocation of rings associated with this q_vector */
struct igc_ring ring[] ____cacheline_internodealigned_in_smp;
@@ -585,7 +587,7 @@ enum igc_filter_match_flags {
struct igc_nfc_filter {
u8 match_flags;
u16 etype;
- __be16 vlan_etype;
+ u16 vlan_etype;
u16 vlan_tci;
u16 vlan_tci_mask;
u8 src_addr[ETH_ALEN];
@@ -720,6 +722,8 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter);
void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts);
void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter);
+int igc_led_setup(struct igc_adapter *adapter);
+
#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
#define IGC_TXD_DCMD (IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS)
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index b95d2c86e803..1a64f1ca6ca8 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -981,7 +981,7 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) {
fsp->flow_type |= FLOW_EXT;
- fsp->h_ext.vlan_etype = rule->filter.vlan_etype;
+ fsp->h_ext.vlan_etype = htons(rule->filter.vlan_etype);
fsp->m_ext.vlan_etype = ETHER_TYPE_FULL_MASK;
}
@@ -1249,7 +1249,7 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
/* VLAN etype matching */
if ((fsp->flow_type & FLOW_EXT) && fsp->h_ext.vlan_etype) {
- rule->filter.vlan_etype = fsp->h_ext.vlan_etype;
+ rule->filter.vlan_etype = ntohs(fsp->h_ext.vlan_etype);
rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_ETYPE;
}
@@ -1623,18 +1623,17 @@ static int igc_ethtool_set_priv_flags(struct net_device *netdev, u32 priv_flags)
}
static int igc_ethtool_get_eee(struct net_device *netdev,
- struct ethtool_eee *edata)
+ struct ethtool_keee *edata)
{
struct igc_adapter *adapter = netdev_priv(netdev);
struct igc_hw *hw = &adapter->hw;
u32 eeer;
if (hw->dev_spec._base.eee_enable)
- edata->advertised =
- mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert);
+ mii_eee_cap1_mod_linkmode_t(edata->advertised,
+ adapter->eee_advert);
*edata = adapter->eee;
- edata->supported = SUPPORTED_Autoneg;
eeer = rd32(IGC_EEER);
@@ -1647,9 +1646,6 @@ static int igc_ethtool_get_eee(struct net_device *netdev,
edata->eee_enabled = hw->dev_spec._base.eee_enable;
- edata->advertised = SUPPORTED_Autoneg;
- edata->lp_advertised = SUPPORTED_Autoneg;
-
/* Report correct negotiated EEE status for devices that
* wrongly report EEE at half-duplex
*/
@@ -1657,21 +1653,21 @@ static int igc_ethtool_get_eee(struct net_device *netdev,
edata->eee_enabled = false;
edata->eee_active = false;
edata->tx_lpi_enabled = false;
- edata->advertised &= ~edata->advertised;
+ linkmode_zero(edata->advertised);
}
return 0;
}
static int igc_ethtool_set_eee(struct net_device *netdev,
- struct ethtool_eee *edata)
+ struct ethtool_keee *edata)
{
struct igc_adapter *adapter = netdev_priv(netdev);
struct igc_hw *hw = &adapter->hw;
- struct ethtool_eee eee_curr;
+ struct ethtool_keee eee_curr;
s32 ret_val;
- memset(&eee_curr, 0, sizeof(struct ethtool_eee));
+ memset(&eee_curr, 0, sizeof(struct ethtool_keee));
ret_val = igc_ethtool_get_eee(netdev, &eee_curr);
if (ret_val) {
@@ -1699,7 +1695,8 @@ static int igc_ethtool_set_eee(struct net_device *netdev,
return -EINVAL;
}
- adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
+ adapter->eee_advert = linkmode_to_mii_eee_cap1_t(edata->advertised);
+
if (hw->dev_spec._base.eee_enable != edata->eee_enabled) {
hw->dev_spec._base.eee_enable = edata->eee_enabled;
adapter->flags |= IGC_FLAG_EEE;
diff --git a/drivers/net/ethernet/intel/igc/igc_leds.c b/drivers/net/ethernet/intel/igc/igc_leds.c
new file mode 100644
index 000000000000..bf240c5daf86
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_leds.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024 Linutronix GmbH */
+
+#include <linux/bits.h>
+#include <linux/leds.h>
+#include <linux/netdevice.h>
+#include <linux/pm_runtime.h>
+#include <uapi/linux/uleds.h>
+
+#include "igc.h"
+
+#define IGC_NUM_LEDS 3
+
+#define IGC_LEDCTL_LED0_MODE_SHIFT 0
+#define IGC_LEDCTL_LED0_MODE_MASK GENMASK(3, 0)
+#define IGC_LEDCTL_LED0_BLINK BIT(7)
+#define IGC_LEDCTL_LED1_MODE_SHIFT 8
+#define IGC_LEDCTL_LED1_MODE_MASK GENMASK(11, 8)
+#define IGC_LEDCTL_LED1_BLINK BIT(15)
+#define IGC_LEDCTL_LED2_MODE_SHIFT 16
+#define IGC_LEDCTL_LED2_MODE_MASK GENMASK(19, 16)
+#define IGC_LEDCTL_LED2_BLINK BIT(23)
+
+#define IGC_LEDCTL_MODE_ON 0x00
+#define IGC_LEDCTL_MODE_OFF 0x01
+#define IGC_LEDCTL_MODE_LINK_10 0x05
+#define IGC_LEDCTL_MODE_LINK_100 0x06
+#define IGC_LEDCTL_MODE_LINK_1000 0x07
+#define IGC_LEDCTL_MODE_LINK_2500 0x08
+#define IGC_LEDCTL_MODE_ACTIVITY 0x0b
+
+#define IGC_SUPPORTED_MODES \
+ (BIT(TRIGGER_NETDEV_LINK_2500) | BIT(TRIGGER_NETDEV_LINK_1000) | \
+ BIT(TRIGGER_NETDEV_LINK_100) | BIT(TRIGGER_NETDEV_LINK_10) | \
+ BIT(TRIGGER_NETDEV_RX) | BIT(TRIGGER_NETDEV_TX))
+
+#define IGC_ACTIVITY_MODES \
+ (BIT(TRIGGER_NETDEV_RX) | BIT(TRIGGER_NETDEV_TX))
+
+struct igc_led_classdev {
+ struct net_device *netdev;
+ struct led_classdev led;
+ int index;
+};
+
+#define lcdev_to_igc_ldev(lcdev) \
+ container_of(lcdev, struct igc_led_classdev, led)
+
+static void igc_led_select(struct igc_adapter *adapter, int led,
+ u32 *mask, u32 *shift, u32 *blink)
+{
+ switch (led) {
+ case 0:
+ *mask = IGC_LEDCTL_LED0_MODE_MASK;
+ *shift = IGC_LEDCTL_LED0_MODE_SHIFT;
+ *blink = IGC_LEDCTL_LED0_BLINK;
+ break;
+ case 1:
+ *mask = IGC_LEDCTL_LED1_MODE_MASK;
+ *shift = IGC_LEDCTL_LED1_MODE_SHIFT;
+ *blink = IGC_LEDCTL_LED1_BLINK;
+ break;
+ case 2:
+ *mask = IGC_LEDCTL_LED2_MODE_MASK;
+ *shift = IGC_LEDCTL_LED2_MODE_SHIFT;
+ *blink = IGC_LEDCTL_LED2_BLINK;
+ break;
+ default:
+ *mask = *shift = *blink = 0;
+ netdev_err(adapter->netdev, "Unknown LED %d selected!\n", led);
+ }
+}
+
+static void igc_led_set(struct igc_adapter *adapter, int led, u32 mode,
+ bool blink)
+{
+ u32 shift, mask, blink_bit, ledctl;
+ struct igc_hw *hw = &adapter->hw;
+
+ igc_led_select(adapter, led, &mask, &shift, &blink_bit);
+
+ pm_runtime_get_sync(&adapter->pdev->dev);
+ mutex_lock(&adapter->led_mutex);
+
+ /* Set mode */
+ ledctl = rd32(IGC_LEDCTL);
+ ledctl &= ~mask;
+ ledctl |= mode << shift;
+
+ /* Configure blinking */
+ if (blink)
+ ledctl |= blink_bit;
+ else
+ ledctl &= ~blink_bit;
+ wr32(IGC_LEDCTL, ledctl);
+
+ mutex_unlock(&adapter->led_mutex);
+ pm_runtime_put(&adapter->pdev->dev);
+}
+
+static u32 igc_led_get(struct igc_adapter *adapter, int led)
+{
+ u32 shift, mask, blink_bit, ledctl;
+ struct igc_hw *hw = &adapter->hw;
+
+ igc_led_select(adapter, led, &mask, &shift, &blink_bit);
+
+ pm_runtime_get_sync(&adapter->pdev->dev);
+ mutex_lock(&adapter->led_mutex);
+ ledctl = rd32(IGC_LEDCTL);
+ mutex_unlock(&adapter->led_mutex);
+ pm_runtime_put(&adapter->pdev->dev);
+
+ return (ledctl & mask) >> shift;
+}
+
+static int igc_led_brightness_set_blocking(struct led_classdev *led_cdev,
+ enum led_brightness brightness)
+{
+ struct igc_led_classdev *ldev = lcdev_to_igc_ldev(led_cdev);
+ struct igc_adapter *adapter = netdev_priv(ldev->netdev);
+ u32 mode;
+
+ if (brightness)
+ mode = IGC_LEDCTL_MODE_ON;
+ else
+ mode = IGC_LEDCTL_MODE_OFF;
+
+ netdev_dbg(adapter->netdev, "Set brightness for LED %d to mode %u!\n",
+ ldev->index, mode);
+
+ igc_led_set(adapter, ldev->index, mode, false);
+
+ return 0;
+}
+
+static int igc_led_hw_control_is_supported(struct led_classdev *led_cdev,
+ unsigned long flags)
+{
+ if (flags & ~IGC_SUPPORTED_MODES)
+ return -EOPNOTSUPP;
+
+ /* If Tx and Rx selected, activity can be offloaded unless some other
+ * mode is selected as well.
+ */
+ if ((flags & BIT(TRIGGER_NETDEV_TX)) &&
+ (flags & BIT(TRIGGER_NETDEV_RX)) &&
+ !(flags & ~IGC_ACTIVITY_MODES))
+ return 0;
+
+ /* Single Rx or Tx activity is not supported. */
+ if (flags & IGC_ACTIVITY_MODES)
+ return -EOPNOTSUPP;
+
+ /* Only one mode can be active at a given time. */
+ if (flags & (flags - 1))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int igc_led_hw_control_set(struct led_classdev *led_cdev,
+ unsigned long flags)
+{
+ struct igc_led_classdev *ldev = lcdev_to_igc_ldev(led_cdev);
+ struct igc_adapter *adapter = netdev_priv(ldev->netdev);
+ u32 mode = IGC_LEDCTL_MODE_OFF;
+ bool blink = false;
+
+ if (flags & BIT(TRIGGER_NETDEV_LINK_10))
+ mode = IGC_LEDCTL_MODE_LINK_10;
+ if (flags & BIT(TRIGGER_NETDEV_LINK_100))
+ mode = IGC_LEDCTL_MODE_LINK_100;
+ if (flags & BIT(TRIGGER_NETDEV_LINK_1000))
+ mode = IGC_LEDCTL_MODE_LINK_1000;
+ if (flags & BIT(TRIGGER_NETDEV_LINK_2500))
+ mode = IGC_LEDCTL_MODE_LINK_2500;
+ if ((flags & BIT(TRIGGER_NETDEV_TX)) &&
+ (flags & BIT(TRIGGER_NETDEV_RX)))
+ mode = IGC_LEDCTL_MODE_ACTIVITY;
+
+ netdev_dbg(adapter->netdev, "Set HW control for LED %d to mode %u!\n",
+ ldev->index, mode);
+
+ /* blink is recommended for activity */
+ if (mode == IGC_LEDCTL_MODE_ACTIVITY)
+ blink = true;
+
+ igc_led_set(adapter, ldev->index, mode, blink);
+
+ return 0;
+}
+
+static int igc_led_hw_control_get(struct led_classdev *led_cdev,
+ unsigned long *flags)
+{
+ struct igc_led_classdev *ldev = lcdev_to_igc_ldev(led_cdev);
+ struct igc_adapter *adapter = netdev_priv(ldev->netdev);
+ u32 mode;
+
+ mode = igc_led_get(adapter, ldev->index);
+
+ switch (mode) {
+ case IGC_LEDCTL_MODE_ACTIVITY:
+ *flags = BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX);
+ break;
+ case IGC_LEDCTL_MODE_LINK_10:
+ *flags = BIT(TRIGGER_NETDEV_LINK_10);
+ break;
+ case IGC_LEDCTL_MODE_LINK_100:
+ *flags = BIT(TRIGGER_NETDEV_LINK_100);
+ break;
+ case IGC_LEDCTL_MODE_LINK_1000:
+ *flags = BIT(TRIGGER_NETDEV_LINK_1000);
+ break;
+ case IGC_LEDCTL_MODE_LINK_2500:
+ *flags = BIT(TRIGGER_NETDEV_LINK_2500);
+ break;
+ }
+
+ return 0;
+}
+
+static struct device *igc_led_hw_control_get_device(struct led_classdev *led_cdev)
+{
+ struct igc_led_classdev *ldev = lcdev_to_igc_ldev(led_cdev);
+
+ return &ldev->netdev->dev;
+}
+
+static void igc_led_get_name(struct igc_adapter *adapter, int index, char *buf,
+ size_t buf_len)
+{
+ snprintf(buf, buf_len, "igc-%x%x-led%d",
+ pci_domain_nr(adapter->pdev->bus),
+ pci_dev_id(adapter->pdev), index);
+}
+
+static void igc_setup_ldev(struct igc_led_classdev *ldev,
+ struct net_device *netdev, int index)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ struct led_classdev *led_cdev = &ldev->led;
+ char led_name[LED_MAX_NAME_SIZE];
+
+ ldev->netdev = netdev;
+ ldev->index = index;
+
+ igc_led_get_name(adapter, index, led_name, LED_MAX_NAME_SIZE);
+ led_cdev->name = led_name;
+ led_cdev->flags |= LED_RETAIN_AT_SHUTDOWN;
+ led_cdev->max_brightness = 1;
+ led_cdev->brightness_set_blocking = igc_led_brightness_set_blocking;
+ led_cdev->hw_control_trigger = "netdev";
+ led_cdev->hw_control_is_supported = igc_led_hw_control_is_supported;
+ led_cdev->hw_control_set = igc_led_hw_control_set;
+ led_cdev->hw_control_get = igc_led_hw_control_get;
+ led_cdev->hw_control_get_device = igc_led_hw_control_get_device;
+
+ devm_led_classdev_register(&netdev->dev, led_cdev);
+}
+
+int igc_led_setup(struct igc_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct device *dev = &netdev->dev;
+ struct igc_led_classdev *leds;
+ int i;
+
+ mutex_init(&adapter->led_mutex);
+
+ leds = devm_kcalloc(dev, IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ if (!leds)
+ return -ENOMEM;
+
+ for (i = 0; i < IGC_NUM_LEDS; i++)
+ igc_setup_ldev(leds + i, netdev, i);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ba8d3fe186ae..2e1cfbd82f4f 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -3385,7 +3385,7 @@ static int igc_flex_filter_select(struct igc_adapter *adapter,
u32 fhftsl;
if (input->index >= MAX_FLEX_FILTER) {
- dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n");
+ netdev_err(adapter->netdev, "Wrong Flex Filter index selected!\n");
return -EINVAL;
}
@@ -3420,7 +3420,6 @@ static int igc_flex_filter_select(struct igc_adapter *adapter,
static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
struct igc_flex_filter *input)
{
- struct device *dev = &adapter->pdev->dev;
struct igc_hw *hw = &adapter->hw;
u8 *data = input->data;
u8 *mask = input->mask;
@@ -3434,7 +3433,7 @@ static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
* out early to avoid surprises later.
*/
if (input->length % 8 != 0) {
- dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n");
+ netdev_err(adapter->netdev, "The length of a flex filter has to be 8 byte aligned!\n");
return -EINVAL;
}
@@ -3504,8 +3503,8 @@ static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
}
wr32(IGC_WUFC, wufc);
- dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n",
- input->index);
+ netdev_dbg(adapter->netdev, "Added flex filter %u to HW.\n",
+ input->index);
return 0;
}
@@ -3577,9 +3576,9 @@ static bool igc_flex_filter_in_use(struct igc_adapter *adapter)
static int igc_add_flex_filter(struct igc_adapter *adapter,
struct igc_nfc_rule *rule)
{
- struct igc_flex_filter flex = { };
struct igc_nfc_filter *filter = &rule->filter;
unsigned int eth_offset, user_offset;
+ struct igc_flex_filter flex = { };
int ret, index;
bool vlan;
@@ -3615,10 +3614,12 @@ static int igc_add_flex_filter(struct igc_adapter *adapter,
ETH_ALEN, NULL);
/* Add VLAN etype */
- if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE)
- igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12,
- sizeof(filter->vlan_etype),
- NULL);
+ if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) {
+ __be16 vlan_etype = cpu_to_be16(filter->vlan_etype);
+
+ igc_flex_filter_add_field(&flex, &vlan_etype, 12,
+ sizeof(vlan_etype), NULL);
+ }
/* Add VLAN TCI */
if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
@@ -5276,7 +5277,7 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev,
unsigned int network_hdr_len, mac_hdr_len;
/* Make certain the headers can be described by a context descriptor */
- mac_hdr_len = skb_network_header(skb) - skb->data;
+ mac_hdr_len = skb_network_offset(skb);
if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN))
return features & ~(NETIF_F_HW_CSUM |
NETIF_F_SCTP_CRC |
@@ -5302,25 +5303,22 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev,
static void igc_tsync_interrupt(struct igc_adapter *adapter)
{
- u32 ack, tsauxc, sec, nsec, tsicr;
struct igc_hw *hw = &adapter->hw;
+ u32 tsauxc, sec, nsec, tsicr;
struct ptp_clock_event event;
struct timespec64 ts;
tsicr = rd32(IGC_TSICR);
- ack = 0;
if (tsicr & IGC_TSICR_SYS_WRAP) {
event.type = PTP_CLOCK_PPS;
if (adapter->ptp_caps.pps)
ptp_clock_event(adapter->ptp_clock, &event);
- ack |= IGC_TSICR_SYS_WRAP;
}
if (tsicr & IGC_TSICR_TXTS) {
/* retrieve hardware timestamp */
igc_ptp_tx_tstamp_event(adapter);
- ack |= IGC_TSICR_TXTS;
}
if (tsicr & IGC_TSICR_TT0) {
@@ -5334,7 +5332,6 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter)
wr32(IGC_TSAUXC, tsauxc);
adapter->perout[0].start = ts;
spin_unlock(&adapter->tmreg_lock);
- ack |= IGC_TSICR_TT0;
}
if (tsicr & IGC_TSICR_TT1) {
@@ -5348,7 +5345,6 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter)
wr32(IGC_TSAUXC, tsauxc);
adapter->perout[1].start = ts;
spin_unlock(&adapter->tmreg_lock);
- ack |= IGC_TSICR_TT1;
}
if (tsicr & IGC_TSICR_AUTT0) {
@@ -5358,7 +5354,6 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter)
event.index = 0;
event.timestamp = sec * NSEC_PER_SEC + nsec;
ptp_clock_event(adapter->ptp_clock, &event);
- ack |= IGC_TSICR_AUTT0;
}
if (tsicr & IGC_TSICR_AUTT1) {
@@ -5368,11 +5363,7 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter)
event.index = 1;
event.timestamp = sec * NSEC_PER_SEC + nsec;
ptp_clock_event(adapter->ptp_clock, &event);
- ack |= IGC_TSICR_AUTT1;
}
-
- /* acknowledge the interrupts */
- wr32(IGC_TSICR, ack);
}
/**
@@ -6487,7 +6478,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
int cpu = smp_processor_id();
struct netdev_queue *nq;
struct igc_ring *ring;
- int i, drops;
+ int i, nxmit;
if (unlikely(!netif_carrier_ok(dev)))
return -ENETDOWN;
@@ -6503,16 +6494,15 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
/* Avoid transmit queue timeout since we share it with the slow path */
txq_trans_cond_update(nq);
- drops = 0;
+ nxmit = 0;
for (i = 0; i < num_frames; i++) {
int err;
struct xdp_frame *xdpf = frames[i];
err = igc_xdp_init_tx_descriptor(ring, xdpf);
- if (err) {
- xdp_return_frame_rx_napi(xdpf);
- drops++;
- }
+ if (err)
+ break;
+ nxmit++;
}
if (flags & XDP_XMIT_FLUSH)
@@ -6520,7 +6510,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
__netif_tx_unlock(nq);
- return num_frames - drops;
+ return nxmit;
}
static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
@@ -6977,6 +6967,12 @@ static int igc_probe(struct pci_dev *pdev,
pm_runtime_put_noidle(&pdev->dev);
+ if (IS_ENABLED(CONFIG_IGC_LEDS)) {
+ err = igc_led_setup(adapter);
+ if (err)
+ goto err_register;
+ }
+
return 0;
err_register:
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
index 7cd8716d2ffa..861f37076861 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -130,11 +130,7 @@ void igc_power_down_phy_copper(struct igc_hw *hw)
/* The PHY will retain its settings across a power down/up cycle */
hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
mii_reg |= MII_CR_POWER_DOWN;
-
- /* Temporary workaround - should be removed when PHY will implement
- * IEEE registers as properly
- */
- /* hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);*/
+ hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
usleep_range(1000, 2000);
}
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index d38c87d7e5e8..e5b893fc5b66 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -12,6 +12,7 @@
#define IGC_MDIC 0x00020 /* MDI Control - RW */
#define IGC_CONNSW 0x00034 /* Copper/Fiber switch control - RW */
#define IGC_VET 0x00038 /* VLAN Ether Type - RW */
+#define IGC_LEDCTL 0x00E00 /* LED Control - RW */
#define IGC_I225_PHPM 0x00E14 /* I225 PHY Power Management */
#define IGC_GPHY_VERSION 0x0001E /* I225 gPHY Firmware Version */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index b6f0376e42f4..559b443c409f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -949,19 +949,19 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16);
void ixgbe_write_eitr(struct ixgbe_q_vector *);
int ixgbe_poll(struct napi_struct *napi, int budget);
int ethtool_ioctl(struct ifreq *ifr);
-s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
-s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl);
-s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl);
-s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
+int ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
+int ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl);
+int ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl);
+int ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
union ixgbe_atr_hash_dword input,
union ixgbe_atr_hash_dword common,
u8 queue);
-s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+int ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
union ixgbe_atr_input *input_mask);
-s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+int ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
union ixgbe_atr_input *input,
u16 soft_id, u8 queue);
-s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+int ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
union ixgbe_atr_input *input,
u16 soft_id);
void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
@@ -1059,7 +1059,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter);
void ixgbe_store_key(struct ixgbe_adapter *adapter);
void ixgbe_store_reta(struct ixgbe_adapter *adapter);
-s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
+int ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
#ifdef CONFIG_IXGBE_IPSEC
void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index 6835d5f18753..283a23150a4d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -15,10 +15,10 @@
#define IXGBE_82598_VFT_TBL_SIZE 128
#define IXGBE_82598_RX_PB_SIZE 512
-static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
+static int ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete);
-static s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
+static int ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
u8 *eeprom_data);
/**
@@ -66,7 +66,7 @@ out:
IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr);
}
-static s32 ixgbe_get_invariants_82598(struct ixgbe_hw *hw)
+static int ixgbe_get_invariants_82598(struct ixgbe_hw *hw)
{
struct ixgbe_mac_info *mac = &hw->mac;
@@ -93,12 +93,12 @@ static s32 ixgbe_get_invariants_82598(struct ixgbe_hw *hw)
* not known. Perform the SFP init if necessary.
*
**/
-static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
+static int ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
{
struct ixgbe_mac_info *mac = &hw->mac;
struct ixgbe_phy_info *phy = &hw->phy;
- s32 ret_val;
u16 list_offset, data_offset;
+ int ret_val;
/* Identify the PHY */
phy->ops.identify(hw);
@@ -148,9 +148,9 @@ static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
* Then set pcie completion timeout
*
**/
-static s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
+static int ixgbe_start_hw_82598(struct ixgbe_hw *hw)
{
- s32 ret_val;
+ int ret_val;
ret_val = ixgbe_start_hw_generic(hw);
if (ret_val)
@@ -170,7 +170,7 @@ static s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
*
* Determines the link capabilities by reading the AUTOC register.
**/
-static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
+static int ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
ixgbe_link_speed *speed,
bool *autoneg)
{
@@ -271,7 +271,7 @@ static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw)
*
* Enable flow control according to the current settings.
**/
-static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw)
+static int ixgbe_fc_enable_82598(struct ixgbe_hw *hw)
{
u32 fctrl_reg;
u32 rmcs_reg;
@@ -411,13 +411,13 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw)
* Configures link settings based on values in the ixgbe_hw struct.
* Restarts the link. Performs autonegotiation if needed.
**/
-static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
+static int ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
bool autoneg_wait_to_complete)
{
+ int status = 0;
u32 autoc_reg;
u32 links_reg;
u32 i;
- s32 status = 0;
/* Restart link */
autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
@@ -457,7 +457,7 @@ static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
* Function indicates success when phy link is available. If phy is not ready
* within 5 seconds of MAC indicating link, the function returns error.
**/
-static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw)
+static int ixgbe_validate_link_ready(struct ixgbe_hw *hw)
{
u32 timeout;
u16 an_reg;
@@ -493,7 +493,7 @@ static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw)
*
* Reads the links register to determine if link is up and the current speed
**/
-static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
+static int ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
ixgbe_link_speed *speed, bool *link_up,
bool link_up_wait_to_complete)
{
@@ -579,7 +579,7 @@ static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
*
* Set the link speed in the AUTOC register and restarts link.
**/
-static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw,
+static int ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete)
{
@@ -624,11 +624,11 @@ static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw,
*
* Sets the link speed in the AUTOC register in the MAC and restarts link.
**/
-static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
- ixgbe_link_speed speed,
- bool autoneg_wait_to_complete)
+static int ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg_wait_to_complete)
{
- s32 status;
+ int status;
/* Setup the PHY according to input speed */
status = hw->phy.ops.setup_link_speed(hw, speed,
@@ -647,15 +647,15 @@ static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
* clears all interrupts, performing a PHY reset, and performing a link (MAC)
* reset.
**/
-static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw)
+static int ixgbe_reset_hw_82598(struct ixgbe_hw *hw)
{
- s32 status;
- s32 phy_status = 0;
- u32 ctrl;
+ int phy_status = 0;
+ u8 analog_val;
u32 gheccr;
- u32 i;
+ int status;
u32 autoc;
- u8 analog_val;
+ u32 ctrl;
+ u32 i;
/* Call adapter stop to disable tx/rx and clear interrupts */
status = hw->mac.ops.stop_adapter(hw);
@@ -781,7 +781,7 @@ mac_reset_top:
* @rar: receive address register index to associate with a VMDq index
* @vmdq: VMDq set index
**/
-static s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+static int ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
{
u32 rar_high;
u32 rar_entries = hw->mac.num_rar_entries;
@@ -805,7 +805,7 @@ static s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
* @rar: receive address register index to associate with a VMDq index
* @vmdq: VMDq clear index (not used in 82598, but elsewhere)
**/
-static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+static int ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
{
u32 rar_high;
u32 rar_entries = hw->mac.num_rar_entries;
@@ -836,7 +836,7 @@ static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
*
* Turn on/off specified VLAN in the VLAN filter table.
**/
-static s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+static int ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind,
bool vlan_on, bool vlvf_bypass)
{
u32 regindex;
@@ -881,7 +881,7 @@ static s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind,
*
* Clears the VLAN filter table, and the VMDq index associated with the filter
**/
-static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw)
+static int ixgbe_clear_vfta_82598(struct ixgbe_hw *hw)
{
u32 offset;
u32 vlanbyte;
@@ -905,7 +905,7 @@ static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw)
*
* Performs read operation to Atlas analog register specified.
**/
-static s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val)
+static int ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val)
{
u32 atlas_ctl;
@@ -927,7 +927,7 @@ static s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val)
*
* Performs write operation to Atlas analog register specified.
**/
-static s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val)
+static int ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val)
{
u32 atlas_ctl;
@@ -948,13 +948,13 @@ static s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val)
*
* Performs 8 byte read operation to SFP module's data over I2C interface.
**/
-static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr,
+static int ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr,
u8 byte_offset, u8 *eeprom_data)
{
- s32 status = 0;
u16 sfp_addr = 0;
u16 sfp_data = 0;
u16 sfp_stat = 0;
+ int status = 0;
u16 gssr;
u32 i;
@@ -1019,7 +1019,7 @@ out:
*
* Performs 8 byte read operation to SFP module's EEPROM over I2C interface.
**/
-static s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
+static int ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
u8 *eeprom_data)
{
return ixgbe_read_i2c_phy_82598(hw, IXGBE_I2C_EEPROM_DEV_ADDR,
@@ -1034,8 +1034,8 @@ static s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
*
* Performs 8 byte read operation to SFP module's SFF-8472 data over I2C
**/
-static s32 ixgbe_read_i2c_sff8472_82598(struct ixgbe_hw *hw, u8 byte_offset,
- u8 *sff8472_data)
+static int ixgbe_read_i2c_sff8472_82598(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 *sff8472_data)
{
return ixgbe_read_i2c_phy_82598(hw, IXGBE_I2C_EEPROM_DEV_ADDR2,
byte_offset, sff8472_data);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 339e106a5732..cdaf087b4e85 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -21,24 +21,24 @@ static void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
static void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
static void
ixgbe_set_hard_rate_select_speed(struct ixgbe_hw *, ixgbe_link_speed);
-static s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
+static int ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete);
static void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
-static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
+static int ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
bool autoneg_wait_to_complete);
-static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
- ixgbe_link_speed speed,
- bool autoneg_wait_to_complete);
-static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
+static int ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg_wait_to_complete);
+static int ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete);
-static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw);
-static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+static int ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw);
+static int ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 *data);
-static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+static int ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 data);
-static s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw);
+static int ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw);
static bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw);
bool ixgbe_mng_enabled(struct ixgbe_hw *hw)
@@ -98,10 +98,10 @@ static void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw)
}
}
-static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw)
+static int ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw)
{
- s32 ret_val;
u16 list_offset, data_offset, data_value;
+ int ret_val;
if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) {
ixgbe_init_mac_link_ops_82599(hw);
@@ -173,10 +173,10 @@ setup_sfp_err:
* prot_autoc_write_82599(). Note, that locked can only be true in cases
* where this function doesn't return an error.
**/
-static s32 prot_autoc_read_82599(struct ixgbe_hw *hw, bool *locked,
+static int prot_autoc_read_82599(struct ixgbe_hw *hw, bool *locked,
u32 *reg_val)
{
- s32 ret_val;
+ int ret_val;
*locked = false;
/* If LESM is on then we need to hold the SW/FW semaphore. */
@@ -203,9 +203,9 @@ static s32 prot_autoc_read_82599(struct ixgbe_hw *hw, bool *locked,
* This part (82599) may need to hold a the SW/FW lock around all writes to
* AUTOC. Likewise after a write we need to do a pipeline reset.
**/
-static s32 prot_autoc_write_82599(struct ixgbe_hw *hw, u32 autoc, bool locked)
+static int prot_autoc_write_82599(struct ixgbe_hw *hw, u32 autoc, bool locked)
{
- s32 ret_val = 0;
+ int ret_val = 0;
/* Blocked by MNG FW so bail */
if (ixgbe_check_reset_blocked(hw))
@@ -237,7 +237,7 @@ out:
return ret_val;
}
-static s32 ixgbe_get_invariants_82599(struct ixgbe_hw *hw)
+static int ixgbe_get_invariants_82599(struct ixgbe_hw *hw)
{
struct ixgbe_mac_info *mac = &hw->mac;
@@ -263,11 +263,11 @@ static s32 ixgbe_get_invariants_82599(struct ixgbe_hw *hw)
* not known. Perform the SFP init if necessary.
*
**/
-static s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw)
+static int ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw)
{
struct ixgbe_mac_info *mac = &hw->mac;
struct ixgbe_phy_info *phy = &hw->phy;
- s32 ret_val;
+ int ret_val;
u32 esdp;
if (hw->device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) {
@@ -322,7 +322,7 @@ static s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw)
*
* Determines the link capabilities by reading the AUTOC register.
**/
-static s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
+static int ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
ixgbe_link_speed *speed,
bool *autoneg)
{
@@ -334,7 +334,9 @@ static s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
- hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) {
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core1) {
*speed = IXGBE_LINK_SPEED_1GB_FULL;
*autoneg = true;
return 0;
@@ -500,14 +502,14 @@ static void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw)
* Configures link settings based on values in the ixgbe_hw struct.
* Restarts the link. Performs autonegotiation if needed.
**/
-static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
- bool autoneg_wait_to_complete)
+static int ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
+ bool autoneg_wait_to_complete)
{
+ bool got_lock = false;
+ int status = 0;
u32 autoc_reg;
u32 links_reg;
u32 i;
- s32 status = 0;
- bool got_lock = false;
if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
status = hw->mac.ops.acquire_swfw_sync(hw,
@@ -657,15 +659,15 @@ ixgbe_set_hard_rate_select_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed)
*
* Implements the Intel SmartSpeed algorithm.
**/
-static s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
- ixgbe_link_speed speed,
- bool autoneg_wait_to_complete)
+static int ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg_wait_to_complete)
{
- s32 status = 0;
ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
- s32 i, j;
- bool link_up = false;
u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ bool link_up = false;
+ int status = 0;
+ s32 i, j;
/* Set autoneg_advertised value based on input link speed */
hw->phy.autoneg_advertised = 0;
@@ -767,16 +769,15 @@ out:
*
* Set the link speed in the AUTOC register and restarts link.
**/
-static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
+static int ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete)
{
- bool autoneg = false;
- s32 status;
- u32 pma_pmd_1g, link_mode, links_reg, i;
- u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
- u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK;
ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN;
+ u32 pma_pmd_10g_serial, pma_pmd_1g, link_mode, links_reg, i;
+ u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+ bool autoneg = false;
+ int status;
/* holds the value of AUTOC register at this current point in time */
u32 current_autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
@@ -785,6 +786,8 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
/* temporary variable used for comparison purposes */
u32 autoc = current_autoc;
+ pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK;
+
/* Check to see if speed passed in is supported. */
status = hw->mac.ops.get_link_capabilities(hw, &link_capabilities,
&autoneg);
@@ -882,11 +885,11 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
*
* Restarts link on PHY and MAC based on settings passed in.
**/
-static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
+static int ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete)
{
- s32 status;
+ int status;
/* Setup the PHY according to input speed */
status = hw->phy.ops.setup_link_speed(hw, speed,
@@ -905,13 +908,13 @@ static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
* and clears all interrupts, perform a PHY reset, and perform a link (MAC)
* reset.
**/
-static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
+static int ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
{
ixgbe_link_speed link_speed;
- s32 status;
u32 ctrl, i, autoc, autoc2;
- u32 curr_lms;
bool link_up = false;
+ u32 curr_lms;
+ int status;
/* Call adapter stop to disable tx/rx and clear interrupts */
status = hw->mac.ops.stop_adapter(hw);
@@ -1081,7 +1084,7 @@ mac_reset_top:
* @hw: pointer to hardware structure
* @fdircmd: current value of FDIRCMD register
*/
-static s32 ixgbe_fdir_check_cmd_complete(struct ixgbe_hw *hw, u32 *fdircmd)
+static int ixgbe_fdir_check_cmd_complete(struct ixgbe_hw *hw, u32 *fdircmd)
{
int i;
@@ -1099,12 +1102,12 @@ static s32 ixgbe_fdir_check_cmd_complete(struct ixgbe_hw *hw, u32 *fdircmd)
* ixgbe_reinit_fdir_tables_82599 - Reinitialize Flow Director tables.
* @hw: pointer to hardware structure
**/
-s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
+int ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
{
- int i;
u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL);
u32 fdircmd;
- s32 err;
+ int err;
+ int i;
fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE;
@@ -1212,7 +1215,7 @@ static void ixgbe_fdir_enable_82599(struct ixgbe_hw *hw, u32 fdirctrl)
* @fdirctrl: value to write to flow director control register, initially
* contains just the value of the Rx packet buffer allocation
**/
-s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+int ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl)
{
/*
* Continue setup of fdirctrl register bits:
@@ -1236,7 +1239,7 @@ s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl)
* @fdirctrl: value to write to flow director control register, initially
* contains just the value of the Rx packet buffer allocation
**/
-s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+int ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl)
{
/*
* Continue setup of fdirctrl register bits:
@@ -1359,7 +1362,7 @@ static u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
* Note that the tunnel bit in input must not be set when the hardware
* tunneling support does not exist.
**/
-s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
+int ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
union ixgbe_atr_hash_dword input,
union ixgbe_atr_hash_dword common,
u8 queue)
@@ -1515,7 +1518,7 @@ static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask)
#define IXGBE_STORE_AS_BE16(_value) __swab16(ntohs((_value)))
-s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+int ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
union ixgbe_atr_input *input_mask)
{
/* mask IPv6 since it is currently not supported */
@@ -1627,12 +1630,12 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
return 0;
}
-s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+int ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
union ixgbe_atr_input *input,
u16 soft_id, u8 queue)
{
u32 fdirport, fdirvlan, fdirhash, fdircmd;
- s32 err;
+ int err;
/* currently IPv6 is not supported, must be programmed with 0 */
IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(0),
@@ -1690,13 +1693,13 @@ s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
return 0;
}
-s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+int ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
union ixgbe_atr_input *input,
u16 soft_id)
{
u32 fdirhash;
u32 fdircmd;
- s32 err;
+ int err;
/* configure FDIRHASH register */
fdirhash = (__force u32)input->formatted.bkt_hash;
@@ -1734,7 +1737,7 @@ s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
*
* Performs read operation to Omer analog register specified.
**/
-static s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val)
+static int ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val)
{
u32 core_ctl;
@@ -1756,7 +1759,7 @@ static s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val)
*
* Performs write operation to Omer analog register specified.
**/
-static s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val)
+static int ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val)
{
u32 core_ctl;
@@ -1776,9 +1779,9 @@ static s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val)
* and the generation start_hw function.
* Then performs revision-specific operations, if any.
**/
-static s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw)
+static int ixgbe_start_hw_82599(struct ixgbe_hw *hw)
{
- s32 ret_val = 0;
+ int ret_val = 0;
ret_val = ixgbe_start_hw_generic(hw);
if (ret_val)
@@ -1802,9 +1805,9 @@ static s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw)
* If PHY already detected, maintains current PHY type in hw struct,
* otherwise executes the PHY detection routine.
**/
-static s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw)
+static int ixgbe_identify_phy_82599(struct ixgbe_hw *hw)
{
- s32 status;
+ int status;
/* Detect PHY if not unknown - returns success if already detected. */
status = ixgbe_identify_phy_generic(hw);
@@ -1835,7 +1838,7 @@ static s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw)
*
* Enables the Rx DMA unit for 82599
**/
-static s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval)
+static int ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval)
{
/*
* Workaround for 82599 silicon errata when enabling the Rx datapath.
@@ -1865,12 +1868,12 @@ static s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval)
* Return: -EACCES if the FW is not present or if the FW version is
* not supported.
**/
-static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw)
+static int ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw)
{
u16 fw_offset, fw_ptp_cfg_offset;
- s32 status = -EACCES;
- u16 offset;
+ int status = -EACCES;
u16 fw_version = 0;
+ u16 offset;
/* firmware check is only necessary for SFI devices */
if (hw->phy.media_type != ixgbe_media_type_fiber)
@@ -1917,7 +1920,7 @@ fw_version_err:
static bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw)
{
u16 fw_offset, fw_lesm_param_offset, fw_lesm_state;
- s32 status;
+ int status;
/* get the offset to the Firmware Module block */
status = hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset);
@@ -1956,7 +1959,7 @@ static bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw)
*
* Retrieves 16 bit word(s) read from EEPROM
**/
-static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset,
+static int ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data)
{
struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
@@ -1982,7 +1985,7 @@ static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset,
*
* Reads a 16 bit word from the EEPROM
**/
-static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw,
+static int ixgbe_read_eeprom_82599(struct ixgbe_hw *hw,
u16 offset, u16 *data)
{
struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
@@ -2006,11 +2009,11 @@ static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw,
* full pipeline reset. Note - We must hold the SW/FW semaphore before writing
* to AUTOC, so this function assumes the semaphore is held.
**/
-static s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw)
+static int ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw)
{
- s32 ret_val;
- u32 anlp1_reg = 0;
u32 i, autoc_reg, autoc2_reg;
+ u32 anlp1_reg = 0;
+ int ret_val;
/* Enable link if disabled in NVM */
autoc2_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
@@ -2061,12 +2064,12 @@ reset_pipeline_out:
* Performs byte read operation to SFP module's EEPROM over I2C interface at
* a specified device address.
**/
-static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+static int ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 *data)
{
- u32 esdp;
- s32 status;
s32 timeout = 200;
+ int status;
+ u32 esdp;
if (hw->phy.qsfp_shared_i2c_bus == true) {
/* Acquire I2C bus ownership. */
@@ -2115,12 +2118,12 @@ release_i2c_access:
* Performs byte write operation to SFP module's EEPROM over I2C interface at
* a specified device address.
**/
-static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+static int ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 data)
{
- u32 esdp;
- s32 status;
s32 timeout = 200;
+ int status;
+ u32 esdp;
if (hw->phy.qsfp_shared_i2c_bus == true) {
/* Acquire I2C bus ownership. */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 2e6e0365154a..3be1bfb16498 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -10,10 +10,10 @@
#include "ixgbe_common.h"
#include "ixgbe_phy.h"
-static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw);
-static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw);
+static int ixgbe_acquire_eeprom(struct ixgbe_hw *hw);
+static int ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw);
static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw);
-static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw);
+static int ixgbe_ready_eeprom(struct ixgbe_hw *hw);
static void ixgbe_standby_eeprom(struct ixgbe_hw *hw);
static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
u16 count);
@@ -22,15 +22,15 @@ static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
static void ixgbe_release_eeprom(struct ixgbe_hw *hw);
-static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr);
-static s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg);
-static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+static int ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr);
+static int ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg);
+static int ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data);
-static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
- u16 words, u16 *data);
-static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
+static int ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+static int ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
u16 offset);
-static s32 ixgbe_disable_pcie_primary(struct ixgbe_hw *hw);
+static int ixgbe_disable_pcie_primary(struct ixgbe_hw *hw);
/* Base table for registers values that change by MAC */
const u32 ixgbe_mvals_8259X[IXGBE_MVALS_IDX_LIMIT] = {
@@ -111,12 +111,12 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
*
* Called at init time to set up flow control.
**/
-s32 ixgbe_setup_fc_generic(struct ixgbe_hw *hw)
+int ixgbe_setup_fc_generic(struct ixgbe_hw *hw)
{
- s32 ret_val = 0;
u32 reg = 0, reg_bp = 0;
- u16 reg_cu = 0;
bool locked = false;
+ int ret_val = 0;
+ u16 reg_cu = 0;
/*
* Validate the requested mode. Strict IEEE mode does not allow
@@ -267,11 +267,11 @@ s32 ixgbe_setup_fc_generic(struct ixgbe_hw *hw)
* table, VLAN filter table, calls routine to set up link and flow control
* settings, and leaves transmit and receive units disabled and uninitialized
**/
-s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
+int ixgbe_start_hw_generic(struct ixgbe_hw *hw)
{
- s32 ret_val;
- u32 ctrl_ext;
u16 device_caps;
+ u32 ctrl_ext;
+ int ret_val;
/* Set the media type */
hw->phy.media_type = hw->mac.ops.get_media_type(hw);
@@ -330,7 +330,7 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
* 82599
* X540
**/
-s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
+int ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
{
u32 i;
@@ -354,9 +354,9 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
* up link and flow control settings, and leaves transmit and receive units
* disabled and uninitialized
**/
-s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw)
+int ixgbe_init_hw_generic(struct ixgbe_hw *hw)
{
- s32 status;
+ int status;
/* Reset the hardware */
status = hw->mac.ops.reset_hw(hw);
@@ -380,7 +380,7 @@ s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw)
* Clears all hardware statistics counters by reading them from the hardware
* Statistics counters are clear on read.
**/
-s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw)
+int ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw)
{
u16 i = 0;
@@ -489,14 +489,14 @@ s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw)
*
* Reads the part number string from the EEPROM.
**/
-s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
+int ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
u32 pba_num_size)
{
- s32 ret_val;
- u16 data;
+ int ret_val;
u16 pba_ptr;
u16 offset;
u16 length;
+ u16 data;
if (pba_num == NULL) {
hw_dbg(hw, "PBA string buffer was null\n");
@@ -599,7 +599,7 @@ s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
* A reset of the adapter must be performed prior to calling this function
* in order for the MAC address to have been loaded from the EEPROM into RAR0
**/
-s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
+int ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
{
u32 rar_high;
u32 rar_low;
@@ -653,7 +653,7 @@ enum ixgbe_bus_speed ixgbe_convert_bus_speed(u16 link_status)
*
* Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure
**/
-s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw)
+int ixgbe_get_bus_info_generic(struct ixgbe_hw *hw)
{
u16 link_status;
@@ -709,7 +709,7 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
* the shared code and drivers to determine if the adapter is in a stopped
* state and should not touch the hardware.
**/
-s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
+int ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
{
u32 reg_val;
u16 i;
@@ -759,7 +759,7 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
* Store the index for the link active LED. This will be used to support
* blinking the LED.
**/
-s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw)
+int ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw)
{
struct ixgbe_mac_info *mac = &hw->mac;
u32 led_reg, led_mode;
@@ -800,7 +800,7 @@ s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw)
* @hw: pointer to hardware structure
* @index: led number to turn on
**/
-s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index)
+int ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index)
{
u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
@@ -821,7 +821,7 @@ s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index)
* @hw: pointer to hardware structure
* @index: led number to turn off
**/
-s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index)
+int ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index)
{
u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
@@ -844,7 +844,7 @@ s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index)
* Initializes the EEPROM parameters ixgbe_eeprom_info within the
* ixgbe_hw struct in order to set up EEPROM access.
**/
-s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw)
+int ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw)
{
struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
u32 eec;
@@ -895,11 +895,11 @@ s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw)
*
* Reads 16 bit word(s) from EEPROM through bit-bang method
**/
-s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+int ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data)
{
- s32 status;
u16 i, count;
+ int status;
hw->eeprom.ops.init_params(hw);
@@ -942,14 +942,14 @@ s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
* If ixgbe_eeprom_update_checksum is not called after this function, the
* EEPROM will most likely contain an invalid checksum.
**/
-static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+static int ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data)
{
- s32 status;
- u16 word;
+ u8 write_opcode = IXGBE_EEPROM_WRITE_OPCODE_SPI;
u16 page_size;
+ int status;
+ u16 word;
u16 i;
- u8 write_opcode = IXGBE_EEPROM_WRITE_OPCODE_SPI;
/* Prepare the EEPROM for writing */
status = ixgbe_acquire_eeprom(hw);
@@ -1019,7 +1019,7 @@ static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
* If ixgbe_eeprom_update_checksum is not called after this function, the
* EEPROM will most likely contain an invalid checksum.
**/
-s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
+int ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
{
hw->eeprom.ops.init_params(hw);
@@ -1038,11 +1038,11 @@ s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
*
* Reads 16 bit word(s) from EEPROM through bit-bang method
**/
-s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+int ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data)
{
- s32 status;
u16 i, count;
+ int status;
hw->eeprom.ops.init_params(hw);
@@ -1077,12 +1077,12 @@ s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
*
* Reads 16 bit word(s) from EEPROM through bit-bang method
**/
-static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+static int ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data)
{
- s32 status;
- u16 word_in;
u8 read_opcode = IXGBE_EEPROM_READ_OPCODE_SPI;
+ u16 word_in;
+ int status;
u16 i;
/* Prepare the EEPROM for reading */
@@ -1129,7 +1129,7 @@ static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
*
* Reads 16 bit value from EEPROM through bit-bang method
**/
-s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+int ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
u16 *data)
{
hw->eeprom.ops.init_params(hw);
@@ -1149,11 +1149,11 @@ s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
*
* Reads a 16 bit word(s) from the EEPROM using the EERD register.
**/
-s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+int ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data)
{
+ int status;
u32 eerd;
- s32 status;
u32 i;
hw->eeprom.ops.init_params(hw);
@@ -1189,11 +1189,11 @@ s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
* This function is called only when we are writing a new large buffer
* at given offset so the data would be overwritten anyway.
**/
-static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
+static int ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
u16 offset)
{
u16 data[IXGBE_EEPROM_PAGE_SIZE_MAX];
- s32 status;
+ int status;
u16 i;
for (i = 0; i < IXGBE_EEPROM_PAGE_SIZE_MAX; i++)
@@ -1229,7 +1229,7 @@ static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
*
* Reads a 16 bit word from the EEPROM using the EERD register.
**/
-s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data)
+int ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data)
{
return ixgbe_read_eerd_buffer_generic(hw, offset, 1, data);
}
@@ -1243,11 +1243,11 @@ s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data)
*
* Write a 16 bit word(s) to the EEPROM using the EEWR register.
**/
-s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+int ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data)
{
+ int status;
u32 eewr;
- s32 status;
u16 i;
hw->eeprom.ops.init_params(hw);
@@ -1286,7 +1286,7 @@ s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
*
* Write a 16 bit word to the EEPROM using the EEWR register.
**/
-s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
+int ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
{
return ixgbe_write_eewr_buffer_generic(hw, offset, 1, &data);
}
@@ -1299,7 +1299,7 @@ s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
* Polls the status bit (bit 1) of the EERD or EEWR to determine when the
* read or write is done respectively.
**/
-static s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg)
+static int ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg)
{
u32 i;
u32 reg;
@@ -1325,7 +1325,7 @@ static s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg)
* Prepares EEPROM for access using bit-bang method. This function should
* be called before issuing a command to the EEPROM.
**/
-static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw)
+static int ixgbe_acquire_eeprom(struct ixgbe_hw *hw)
{
u32 eec;
u32 i;
@@ -1371,7 +1371,7 @@ static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw)
*
* Sets the hardware semaphores so EEPROM access can occur for bit-bang method
**/
-static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw)
+static int ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw)
{
u32 timeout = 2000;
u32 i;
@@ -1462,7 +1462,7 @@ static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw)
* ixgbe_ready_eeprom - Polls for EEPROM ready
* @hw: pointer to hardware structure
**/
-static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw)
+static int ixgbe_ready_eeprom(struct ixgbe_hw *hw)
{
u16 i;
u8 spi_stat_reg;
@@ -1680,7 +1680,7 @@ static void ixgbe_release_eeprom(struct ixgbe_hw *hw)
* ixgbe_calc_eeprom_checksum_generic - Calculates and returns the checksum
* @hw: pointer to hardware structure
**/
-s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw)
+int ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw)
{
u16 i;
u16 j;
@@ -1728,7 +1728,7 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw)
checksum = (u16)IXGBE_EEPROM_SUM - checksum;
- return (s32)checksum;
+ return (int)checksum;
}
/**
@@ -1739,12 +1739,12 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw)
* Performs checksum calculation and validates the EEPROM checksum. If the
* caller does not need checksum_val, the value can be NULL.
**/
-s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
+int ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
u16 *checksum_val)
{
- s32 status;
- u16 checksum;
u16 read_checksum = 0;
+ u16 checksum;
+ int status;
/*
* Read the first word from the EEPROM. If this times out or fails, do
@@ -1786,10 +1786,10 @@ s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
* ixgbe_update_eeprom_checksum_generic - Updates the EEPROM checksum
* @hw: pointer to hardware structure
**/
-s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw)
+int ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw)
{
- s32 status;
u16 checksum;
+ int status;
/*
* Read the first word from the EEPROM. If this times out or fails, do
@@ -1823,7 +1823,7 @@ s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw)
*
* Puts an ethernet address into a receive address register.
**/
-s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+int ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
u32 enable_addr)
{
u32 rar_low, rar_high;
@@ -1876,7 +1876,7 @@ s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
*
* Clears an ethernet address from a receive address register.
**/
-s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index)
+int ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index)
{
u32 rar_high;
u32 rar_entries = hw->mac.num_rar_entries;
@@ -1917,7 +1917,7 @@ s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index)
* of the receive address registers. Clears the multicast table. Assumes
* the receiver is in reset when the routine is called.
**/
-s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw)
+int ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw)
{
u32 i;
u32 rar_entries = hw->mac.num_rar_entries;
@@ -1980,7 +1980,7 @@ s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw)
* by the MO field of the MCSTCTRL. The MO field is set during initialization
* to mc_filter_type.
**/
-static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr)
+static int ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr)
{
u32 vector = 0;
@@ -2049,7 +2049,7 @@ static void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr)
* registers for the first multicast addresses, and hashes the rest into the
* multicast table.
**/
-s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
+int ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
struct net_device *netdev)
{
struct netdev_hw_addr *ha;
@@ -2091,7 +2091,7 @@ s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
*
* Enables multicast address in RAR and the use of the multicast hash table.
**/
-s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw)
+int ixgbe_enable_mc_generic(struct ixgbe_hw *hw)
{
struct ixgbe_addr_filter_info *a = &hw->addr_ctrl;
@@ -2108,7 +2108,7 @@ s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw)
*
* Disables multicast address in RAR and the use of the multicast hash table.
**/
-s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw)
+int ixgbe_disable_mc_generic(struct ixgbe_hw *hw)
{
struct ixgbe_addr_filter_info *a = &hw->addr_ctrl;
@@ -2124,7 +2124,7 @@ s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw)
*
* Enable flow control according to the current settings.
**/
-s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
+int ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
{
u32 mflcn_reg, fccfg_reg;
u32 reg;
@@ -2252,7 +2252,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
* Find the intersection between advertised settings and link partner's
* advertised settings
**/
-s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
+int ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
{
if ((!(adv_reg)) || (!(lp_reg)))
@@ -2294,10 +2294,10 @@ s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
*
* Enable flow control according on 1 gig fiber.
**/
-static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw)
+static int ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw)
{
u32 pcs_anadv_reg, pcs_lpab_reg, linkstat;
- s32 ret_val;
+ int ret_val;
/*
* On multispeed fiber at 1g, bail out if
@@ -2328,10 +2328,10 @@ static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw)
*
* Enable flow control according to IEEE clause 37.
**/
-static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw)
+static int ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw)
{
u32 links2, anlp1_reg, autoc_reg, links;
- s32 ret_val;
+ int ret_val;
/*
* On backplane, bail out if
@@ -2367,7 +2367,7 @@ static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw)
*
* Enable flow control according to IEEE clause 37.
**/
-static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw)
+static int ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw)
{
u16 technology_ability_reg = 0;
u16 lp_technology_ability_reg = 0;
@@ -2395,7 +2395,7 @@ static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw)
void ixgbe_fc_autoneg(struct ixgbe_hw *hw)
{
ixgbe_link_speed speed;
- s32 ret_val = -EIO;
+ int ret_val = -EIO;
bool link_up;
/*
@@ -2501,7 +2501,7 @@ static u32 ixgbe_pcie_timeout_poll(struct ixgbe_hw *hw)
* bit hasn't caused the primary requests to be disabled, else 0
* is returned signifying primary requests disabled.
**/
-static s32 ixgbe_disable_pcie_primary(struct ixgbe_hw *hw)
+static int ixgbe_disable_pcie_primary(struct ixgbe_hw *hw)
{
u32 i, poll;
u16 value;
@@ -2573,7 +2573,7 @@ gio_disable_fail:
* Acquires the SWFW semaphore through the GSSR register for the specified
* function (CSR, PHY0, PHY1, EEPROM, Flash)
**/
-s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask)
+int ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask)
{
u32 gssr = 0;
u32 swmask = mask;
@@ -2641,7 +2641,7 @@ void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u32 mask)
*
* The default case requires no protection so just to the register read.
**/
-s32 prot_autoc_read_generic(struct ixgbe_hw *hw, bool *locked, u32 *reg_val)
+int prot_autoc_read_generic(struct ixgbe_hw *hw, bool *locked, u32 *reg_val)
{
*locked = false;
*reg_val = IXGBE_READ_REG(hw, IXGBE_AUTOC);
@@ -2655,7 +2655,7 @@ s32 prot_autoc_read_generic(struct ixgbe_hw *hw, bool *locked, u32 *reg_val)
* @locked: bool to indicate whether the SW/FW lock was already taken by
* previous read.
**/
-s32 prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked)
+int prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked)
{
IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_val);
return 0;
@@ -2668,7 +2668,7 @@ s32 prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked)
* Stops the receive data path and waits for the HW to internally
* empty the Rx security block.
**/
-s32 ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw)
+int ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw)
{
#define IXGBE_MAX_SECRX_POLL 40
int i;
@@ -2700,7 +2700,7 @@ s32 ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw)
*
* Enables the receive data path
**/
-s32 ixgbe_enable_rx_buff_generic(struct ixgbe_hw *hw)
+int ixgbe_enable_rx_buff_generic(struct ixgbe_hw *hw)
{
u32 secrxreg;
@@ -2719,7 +2719,7 @@ s32 ixgbe_enable_rx_buff_generic(struct ixgbe_hw *hw)
*
* Enables the Rx DMA unit
**/
-s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval)
+int ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval)
{
if (regval & IXGBE_RXCTRL_RXEN)
hw->mac.ops.enable_rx(hw);
@@ -2734,14 +2734,14 @@ s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval)
* @hw: pointer to hardware structure
* @index: led number to blink
**/
-s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index)
+int ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index)
{
- ixgbe_link_speed speed = 0;
- bool link_up = false;
u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+ ixgbe_link_speed speed = 0;
+ bool link_up = false;
bool locked = false;
- s32 ret_val;
+ int ret_val;
if (index > 3)
return -EINVAL;
@@ -2782,12 +2782,12 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index)
* @hw: pointer to hardware structure
* @index: led number to stop blinking
**/
-s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index)
+int ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index)
{
- u32 autoc_reg = 0;
u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
bool locked = false;
- s32 ret_val;
+ u32 autoc_reg = 0;
+ int ret_val;
if (index > 3)
return -EINVAL;
@@ -2821,10 +2821,10 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index)
* pointer, and returns the value at that location. This is used in both
* get and set mac_addr routines.
**/
-static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
- u16 *san_mac_offset)
+static int ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
+ u16 *san_mac_offset)
{
- s32 ret_val;
+ int ret_val;
/*
* First read the EEPROM pointer to see if the MAC addresses are
@@ -2849,11 +2849,11 @@ static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
* set_lan_id() is called by identify_sfp(), but this cannot be relied
* upon for non-SFP connections, so we must call it here.
**/
-s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr)
+int ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr)
{
u16 san_mac_data, san_mac_offset;
+ int ret_val;
u8 i;
- s32 ret_val;
/*
* First read the EEPROM pointer to see if the MAC addresses are
@@ -2942,7 +2942,7 @@ u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw)
* @rar: receive address register index to disassociate
* @vmdq: VMDq pool index to remove from the rar
**/
-s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+int ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
{
u32 mpsar_lo, mpsar_hi;
u32 rar_entries = hw->mac.num_rar_entries;
@@ -2993,7 +2993,7 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
* @rar: receive address register index to associate with a VMDq index
* @vmdq: VMDq pool index
**/
-s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+int ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
{
u32 mpsar;
u32 rar_entries = hw->mac.num_rar_entries;
@@ -3026,7 +3026,7 @@ s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
* VFs advertized and not 0.
* MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index]
**/
-s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq)
+int ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq)
{
u32 rar = hw->mac.san_mac_rar_index;
@@ -3045,7 +3045,7 @@ s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq)
* ixgbe_init_uta_tables_generic - Initialize the Unicast Table Array
* @hw: pointer to hardware structure
**/
-s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw)
+int ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw)
{
int i;
@@ -3065,9 +3065,9 @@ s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw)
* return the VLVF index where this VLAN id should be placed
*
**/
-static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass)
+static int ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass)
{
- s32 regindex, first_empty_slot;
+ int regindex, first_empty_slot;
u32 bits;
/* short cut the special case */
@@ -3115,11 +3115,11 @@ static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass)
*
* Turn on/off specified VLAN in the VLAN filter table.
**/
-s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+int ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
bool vlan_on, bool vlvf_bypass)
{
u32 regidx, vfta_delta, vfta, bits;
- s32 vlvf_index;
+ int vlvf_index;
if ((vlan > 4095) || (vind > 63))
return -EINVAL;
@@ -3226,7 +3226,7 @@ vfta_update:
*
* Clears the VLAN filter table, and the VMDq index associated with the filter
**/
-s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw)
+int ixgbe_clear_vfta_generic(struct ixgbe_hw *hw)
{
u32 offset;
@@ -3276,7 +3276,7 @@ static bool ixgbe_need_crosstalk_fix(struct ixgbe_hw *hw)
*
* Reads the links register to determine if link is up and the current speed
**/
-s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+int ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
bool *link_up, bool link_up_wait_to_complete)
{
bool crosstalk_fix_active = ixgbe_need_crosstalk_fix(hw);
@@ -3396,8 +3396,8 @@ s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
* This function will read the EEPROM from the alternative SAN MAC address
* block to check the support for the alternative WWNN/WWPN prefix support.
**/
-s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
- u16 *wwpn_prefix)
+int ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+ u16 *wwpn_prefix)
{
u16 offset, caps;
u16 alt_san_mac_blk_offset;
@@ -3494,7 +3494,7 @@ void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf)
* This function will read the EEPROM location for the device capabilities,
* and return the word through device_caps.
**/
-s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps)
+int ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps)
{
hw->eeprom.ops.read(hw, IXGBE_DEVICE_CAPS, device_caps);
@@ -3604,7 +3604,7 @@ u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
* This function assumes that the IXGBE_GSSR_SW_MNG_SM semaphore is held
* by the caller.
**/
-s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length,
+int ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length,
u32 timeout)
{
u32 hicr, i, fwsts;
@@ -3676,15 +3676,15 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length,
* Communicates with the manageability block. On success return 0
* else return -EIO or -EINVAL.
**/
-s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
+int ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
u32 length, u32 timeout,
bool return_data)
{
u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
struct ixgbe_hic_hdr *hdr = buffer;
- u32 *u32arr = buffer;
u16 buf_len, dword_len;
- s32 status;
+ u32 *u32arr = buffer;
+ int status;
u32 bi;
if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
@@ -3753,13 +3753,13 @@ rel_out:
* else returns -EBUSY when encountering an error acquiring
* semaphore or -EIO when command fails.
**/
-s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
+int ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
u8 build, u8 sub, __always_unused u16 len,
__always_unused const char *driver_ver)
{
struct ixgbe_hic_drv_info fw_cmd;
+ int ret_val;
int i;
- s32 ret_val;
fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN;
@@ -3875,10 +3875,10 @@ static const u8 ixgbe_emc_therm_limit[4] = {
*
* Returns error code.
**/
-static s32 ixgbe_get_ets_data(struct ixgbe_hw *hw, u16 *ets_cfg,
+static int ixgbe_get_ets_data(struct ixgbe_hw *hw, u16 *ets_cfg,
u16 *ets_offset)
{
- s32 status;
+ int status;
status = hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, ets_offset);
if (status)
@@ -3903,13 +3903,13 @@ static s32 ixgbe_get_ets_data(struct ixgbe_hw *hw, u16 *ets_cfg,
*
* Returns the thermal sensor data structure
**/
-s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw)
+int ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw)
{
- s32 status;
u16 ets_offset;
- u16 ets_cfg;
u16 ets_sensor;
u8 num_sensors;
+ u16 ets_cfg;
+ int status;
u8 i;
struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
@@ -3959,17 +3959,17 @@ s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw)
* Inits the thermal sensor thresholds according to the NVM map
* and save off the threshold and location values into mac.thermal_sensor_data
**/
-s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
+int ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
{
- s32 status;
- u16 ets_offset;
- u16 ets_cfg;
- u16 ets_sensor;
+ struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
u8 low_thresh_delta;
u8 num_sensors;
u8 therm_limit;
+ u16 ets_sensor;
+ u16 ets_offset;
+ u16 ets_cfg;
+ int status;
u8 i;
- struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
memset(data, 0, sizeof(struct ixgbe_thermal_sensor_data));
@@ -4192,16 +4192,16 @@ bool ixgbe_mng_present(struct ixgbe_hw *hw)
*
* Set the link speed in the MAC and/or PHY register and restarts link.
*/
-s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
+int ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete)
{
- ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
ixgbe_link_speed highest_link_speed = IXGBE_LINK_SPEED_UNKNOWN;
- s32 status = 0;
+ ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
+ bool autoneg, link_up = false;
u32 speedcnt = 0;
+ int status = 0;
u32 i = 0;
- bool autoneg, link_up = false;
/* Mask off requested but non-supported speeds */
status = hw->mac.ops.get_link_capabilities(hw, &link_speed, &autoneg);
@@ -4340,8 +4340,8 @@ out:
void ixgbe_set_soft_rate_select_speed(struct ixgbe_hw *hw,
ixgbe_link_speed speed)
{
- s32 status;
u8 rs, eeprom_data;
+ int status;
switch (speed) {
case IXGBE_LINK_SPEED_10GB_FULL:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 34761e691d52..6493abf189de 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -8,89 +8,89 @@
#include "ixgbe.h"
u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw);
-s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw);
-s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw);
-s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw);
-s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw);
-s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
+int ixgbe_init_hw_generic(struct ixgbe_hw *hw);
+int ixgbe_start_hw_generic(struct ixgbe_hw *hw);
+int ixgbe_start_hw_gen2(struct ixgbe_hw *hw);
+int ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw);
+int ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
u32 pba_num_size);
-s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr);
+int ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr);
enum ixgbe_bus_width ixgbe_convert_bus_width(u16 link_status);
enum ixgbe_bus_speed ixgbe_convert_bus_speed(u16 link_status);
-s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw);
+int ixgbe_get_bus_info_generic(struct ixgbe_hw *hw);
void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw);
-s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw);
+int ixgbe_stop_adapter_generic(struct ixgbe_hw *hw);
-s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw);
+int ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index);
+int ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index);
+int ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw);
-s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw);
-s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
-s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+int ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw);
+int ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
+int ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data);
-s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data);
-s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+int ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data);
+int ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data);
-s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
-s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+int ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
+int ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data);
-s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+int ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
u16 *data);
-s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+int ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data);
-s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw);
-s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
+int ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw);
+int ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
u16 *checksum_val);
-s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw);
+int ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw);
-s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+int ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
u32 enable_addr);
-s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw);
-s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
+int ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index);
+int ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw);
+int ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
struct net_device *netdev);
-s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw);
-s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw);
-s32 ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw);
-s32 ixgbe_enable_rx_buff_generic(struct ixgbe_hw *hw);
-s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval);
-s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw);
-s32 ixgbe_setup_fc_generic(struct ixgbe_hw *);
+int ixgbe_enable_mc_generic(struct ixgbe_hw *hw);
+int ixgbe_disable_mc_generic(struct ixgbe_hw *hw);
+int ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw);
+int ixgbe_enable_rx_buff_generic(struct ixgbe_hw *hw);
+int ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval);
+int ixgbe_fc_enable_generic(struct ixgbe_hw *hw);
+int ixgbe_setup_fc_generic(struct ixgbe_hw *);
bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw);
void ixgbe_fc_autoneg(struct ixgbe_hw *hw);
-s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask);
+int ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask);
void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u32 mask);
-s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
-s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
-s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq);
-s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
-s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw);
-s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan,
+int ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
+int ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+int ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq);
+int ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+int ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw);
+int ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan,
u32 vind, bool vlan_on, bool vlvf_bypass);
-s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw);
-s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw,
+int ixgbe_clear_vfta_generic(struct ixgbe_hw *hw);
+int ixgbe_check_mac_link_generic(struct ixgbe_hw *hw,
ixgbe_link_speed *speed,
bool *link_up, bool link_up_wait_to_complete);
-s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+int ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
u16 *wwpn_prefix);
-s32 prot_autoc_read_generic(struct ixgbe_hw *hw, bool *, u32 *reg_val);
-s32 prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked);
+int prot_autoc_read_generic(struct ixgbe_hw *hw, bool *, u32 *reg_val);
+int prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked);
-s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index);
+int ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index);
+int ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index);
void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf);
void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf);
-s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps);
-s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
+int ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps);
+int ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
u8 build, u8 ver, u16 len, const char *str);
u8 ixgbe_calculate_checksum(u8 *buffer, u32 length);
-s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *, u32 length,
+int ixgbe_host_interface_command(struct ixgbe_hw *hw, void *, u32 length,
u32 timeout, bool return_data);
-s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 len, u32 timeout);
-s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity,
+int ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 len, u32 timeout);
+int ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity,
u32 (*data)[FW_PHY_ACT_DATA_COUNT]);
void ixgbe_clear_tx_pending(struct ixgbe_hw *hw);
bool ixgbe_mng_present(struct ixgbe_hw *hw);
@@ -111,8 +111,8 @@ extern const u32 ixgbe_mvals_8259X[IXGBE_MVALS_IDX_LIMIT];
#define IXGBE_EMC_DIODE3_DATA 0x2A
#define IXGBE_EMC_DIODE3_THERM_LIMIT 0x30
-s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw);
-s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw);
+int ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw);
+int ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw);
void ixgbe_get_etk_id(struct ixgbe_hw *hw,
struct ixgbe_nvm_version *nvm_ver);
void ixgbe_get_oem_prod_version(struct ixgbe_hw *hw,
@@ -121,7 +121,7 @@ void ixgbe_get_orom_version(struct ixgbe_hw *hw,
struct ixgbe_nvm_version *nvm_ver);
void ixgbe_disable_rx_generic(struct ixgbe_hw *hw);
void ixgbe_enable_rx_generic(struct ixgbe_hw *hw);
-s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
+int ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete);
void ixgbe_set_soft_rate_select_speed(struct ixgbe_hw *hw,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
index d26cea5b43bd..502666f28124 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
@@ -18,7 +18,7 @@
* @max: max credits by traffic class
* @max_frame: maximum frame size
*/
-static s32 ixgbe_ieee_credits(__u8 *bw, __u16 *refill,
+static int ixgbe_ieee_credits(__u8 *bw, __u16 *refill,
__u16 *max, int max_frame)
{
int min_percent = 100;
@@ -59,7 +59,7 @@ static s32 ixgbe_ieee_credits(__u8 *bw, __u16 *refill,
* It should be called only after the rules are checked by
* ixgbe_dcb_check_config().
*/
-s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_hw *hw,
+int ixgbe_dcb_calculate_tc_credits(struct ixgbe_hw *hw,
struct ixgbe_dcb_config *dcb_config,
int max_frame, u8 direction)
{
@@ -247,7 +247,7 @@ void ixgbe_dcb_unpack_map(struct ixgbe_dcb_config *cfg, int direction, u8 *map)
*
* Configure dcb settings and enable dcb mode.
*/
-s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw,
+int ixgbe_dcb_hw_config(struct ixgbe_hw *hw,
struct ixgbe_dcb_config *dcb_config)
{
u8 pfc_en;
@@ -283,7 +283,7 @@ s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw,
}
/* Helper routines to abstract HW specifics from DCB netlink ops */
-s32 ixgbe_dcb_hw_pfc_config(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc)
+int ixgbe_dcb_hw_pfc_config(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc)
{
switch (hw->mac.type) {
case ixgbe_mac_82598EB:
@@ -300,7 +300,7 @@ s32 ixgbe_dcb_hw_pfc_config(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc)
return -EINVAL;
}
-s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame)
+int ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame)
{
__u16 refill[IEEE_8021QAZ_MAX_TCS], max[IEEE_8021QAZ_MAX_TCS];
__u8 prio_type[IEEE_8021QAZ_MAX_TCS];
@@ -333,7 +333,7 @@ s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame)
bwg_id, prio_type, ets->prio_tc);
}
-s32 ixgbe_dcb_hw_ets_config(struct ixgbe_hw *hw,
+int ixgbe_dcb_hw_ets_config(struct ixgbe_hw *hw,
u16 *refill, u16 *max, u8 *bwg_id,
u8 *prio_type, u8 *prio_tc)
{
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
index 60cd5863bf5e..91788e4c4e19 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
@@ -124,15 +124,15 @@ void ixgbe_dcb_unpack_map(struct ixgbe_dcb_config *, int, u8 *);
u8 ixgbe_dcb_get_tc_from_up(struct ixgbe_dcb_config *, int, u8);
/* DCB credits calculation */
-s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_hw *,
+int ixgbe_dcb_calculate_tc_credits(struct ixgbe_hw *,
struct ixgbe_dcb_config *, int, u8);
/* DCB hw initialization */
-s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max);
-s32 ixgbe_dcb_hw_ets_config(struct ixgbe_hw *hw, u16 *refill, u16 *max,
+int ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max);
+int ixgbe_dcb_hw_ets_config(struct ixgbe_hw *hw, u16 *refill, u16 *max,
u8 *bwg_id, u8 *prio_type, u8 *tc_prio);
-s32 ixgbe_dcb_hw_pfc_config(struct ixgbe_hw *hw, u8 pfc_en, u8 *tc_prio);
-s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+int ixgbe_dcb_hw_pfc_config(struct ixgbe_hw *hw, u8 pfc_en, u8 *tc_prio);
+int ixgbe_dcb_hw_config(struct ixgbe_hw *, struct ixgbe_dcb_config *);
void ixgbe_dcb_read_rtrup2tc(struct ixgbe_hw *hw, u8 *map);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
index 379ae747cdce..185c3e5f9837 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
@@ -15,10 +15,8 @@
*
* Configure Rx Data Arbiter and credits for each traffic class.
*/
-s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw,
- u16 *refill,
- u16 *max,
- u8 *prio_type)
+int ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw, u16 *refill,
+ u16 *max, u8 *prio_type)
{
u32 reg = 0;
u32 credit_refill = 0;
@@ -75,11 +73,8 @@ s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw,
*
* Configure Tx Descriptor Arbiter and credits for each traffic class.
*/
-s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw,
- u16 *refill,
- u16 *max,
- u8 *bwg_id,
- u8 *prio_type)
+int ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw, u16 *refill,
+ u16 *max, u8 *bwg_id, u8 *prio_type)
{
u32 reg, max_credits;
u8 i;
@@ -124,11 +119,8 @@ s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw,
*
* Configure Tx Data Arbiter and credits for each traffic class.
*/
-s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
- u16 *refill,
- u16 *max,
- u8 *bwg_id,
- u8 *prio_type)
+int ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw, u16 *refill,
+ u16 *max, u8 *bwg_id, u8 *prio_type)
{
u32 reg;
u8 i;
@@ -171,7 +163,7 @@ s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
*
* Configure Priority Flow Control for each traffic class.
*/
-s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en)
+int ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en)
{
u32 fcrtl, reg;
u8 i;
@@ -224,7 +216,7 @@ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en)
* Configure queue statistics registers, all queues belonging to same traffic
* class uses a single set of queue statistics counters.
*/
-static s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw)
+static int ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw)
{
u32 reg = 0;
u8 i = 0;
@@ -260,7 +252,7 @@ static s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw)
*
* Configure dcb settings and enable dcb mode.
*/
-s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
+int ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
u16 *max, u8 *bwg_id, u8 *prio_type)
{
ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, prio_type);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
index fdca41abb44c..5bf3f13c6953 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
@@ -46,27 +46,19 @@
/* DCB hardware-specific driver APIs */
/* DCB PFC functions */
-s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *, u8 pfc_en);
+int ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *, u8 pfc_en);
/* DCB hw initialization */
-s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw,
- u16 *refill,
- u16 *max,
- u8 *prio_type);
-
-s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw,
- u16 *refill,
- u16 *max,
- u8 *bwg_id,
- u8 *prio_type);
-
-s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
- u16 *refill,
- u16 *max,
- u8 *bwg_id,
- u8 *prio_type);
-
-s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
+int ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw, u16 *refill,
+ u16 *max, u8 *prio_type);
+
+int ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw, u16 *refill,
+ u16 *max, u8 *bwg_id, u8 *prio_type);
+
+int ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw, u16 *refill,
+ u16 *max, u8 *bwg_id, u8 *prio_type);
+
+int ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
u16 *max, u8 *bwg_id, u8 *prio_type);
#endif /* _DCB_82598_CONFIG_H */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
index 7948849840a5..c61bd9059541 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
@@ -17,7 +17,7 @@
*
* Configure Rx Packet Arbiter and credits for each traffic class.
*/
-s32 ixgbe_dcb_config_rx_arbiter_82599(struct ixgbe_hw *hw,
+int ixgbe_dcb_config_rx_arbiter_82599(struct ixgbe_hw *hw,
u16 *refill,
u16 *max,
u8 *bwg_id,
@@ -76,7 +76,7 @@ s32 ixgbe_dcb_config_rx_arbiter_82599(struct ixgbe_hw *hw,
*
* Configure Tx Descriptor Arbiter and credits for each traffic class.
*/
-s32 ixgbe_dcb_config_tx_desc_arbiter_82599(struct ixgbe_hw *hw,
+int ixgbe_dcb_config_tx_desc_arbiter_82599(struct ixgbe_hw *hw,
u16 *refill,
u16 *max,
u8 *bwg_id,
@@ -128,7 +128,7 @@ s32 ixgbe_dcb_config_tx_desc_arbiter_82599(struct ixgbe_hw *hw,
*
* Configure Tx Packet Arbiter and credits for each traffic class.
*/
-s32 ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw,
+int ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw,
u16 *refill,
u16 *max,
u8 *bwg_id,
@@ -187,7 +187,7 @@ s32 ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw,
*
* Configure Priority Flow Control (PFC) for each traffic class.
*/
-s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc)
+int ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc)
{
u32 i, j, fcrtl, reg;
u8 max_tc = 0;
@@ -272,7 +272,7 @@ s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc)
* Configure queue statistics registers, all queues belonging to same traffic
* class uses a single set of queue statistics counters.
*/
-static s32 ixgbe_dcb_config_tc_stats_82599(struct ixgbe_hw *hw)
+static int ixgbe_dcb_config_tc_stats_82599(struct ixgbe_hw *hw)
{
u32 reg = 0;
u8 i = 0;
@@ -330,7 +330,7 @@ static s32 ixgbe_dcb_config_tc_stats_82599(struct ixgbe_hw *hw)
*
* Configure dcb settings and enable dcb mode.
*/
-s32 ixgbe_dcb_hw_config_82599(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
+int ixgbe_dcb_hw_config_82599(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
u16 *max, u8 *bwg_id, u8 *prio_type, u8 *prio_tc)
{
ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
index c6f084883cab..f6e5a87c03e3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
@@ -70,30 +70,21 @@
/* DCB hardware-specific driver APIs */
/* DCB PFC functions */
-s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc);
+int ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc);
/* DCB hw initialization */
-s32 ixgbe_dcb_config_rx_arbiter_82599(struct ixgbe_hw *hw,
- u16 *refill,
- u16 *max,
- u8 *bwg_id,
- u8 *prio_type,
- u8 *prio_tc);
-
-s32 ixgbe_dcb_config_tx_desc_arbiter_82599(struct ixgbe_hw *hw,
- u16 *refill,
- u16 *max,
- u8 *bwg_id,
- u8 *prio_type);
-
-s32 ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw,
- u16 *refill,
- u16 *max,
- u8 *bwg_id,
- u8 *prio_type,
- u8 *prio_tc);
-
-s32 ixgbe_dcb_hw_config_82599(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
+int ixgbe_dcb_config_rx_arbiter_82599(struct ixgbe_hw *hw, u16 *refill,
+ u16 *max, u8 *bwg_id, u8 *prio_type,
+ u8 *prio_tc);
+
+int ixgbe_dcb_config_tx_desc_arbiter_82599(struct ixgbe_hw *hw, u16 *refill,
+ u16 *max, u8 *bwg_id, u8 *prio_type);
+
+int ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw, u16 *refill,
+ u16 *max, u8 *bwg_id, u8 *prio_type,
+ u8 *prio_tc);
+
+int ixgbe_dcb_hw_config_82599(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
u16 *max, u8 *bwg_id, u8 *prio_type,
u8 *prio_tc);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 9a63457712c7..6e6e6f1847b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -349,6 +349,8 @@ static int ixgbe_get_link_ksettings(struct net_device *netdev,
case ixgbe_sfp_type_1g_sx_core1:
case ixgbe_sfp_type_1g_lx_core0:
case ixgbe_sfp_type_1g_lx_core1:
+ case ixgbe_sfp_type_1g_bx_core0:
+ case ixgbe_sfp_type_1g_bx_core1:
ethtool_link_ksettings_add_link_mode(cmd, supported,
FIBRE);
ethtool_link_ksettings_add_link_mode(cmd, advertising,
@@ -459,7 +461,7 @@ static int ixgbe_set_link_ksettings(struct net_device *netdev,
struct ixgbe_adapter *adapter = netdev_priv(netdev);
struct ixgbe_hw *hw = &adapter->hw;
u32 advertised, old;
- s32 err = 0;
+ int err = 0;
if ((hw->phy.media_type == ixgbe_media_type_copper) ||
(hw->phy.multispeed_fiber)) {
@@ -3326,9 +3328,9 @@ static int ixgbe_get_module_info(struct net_device *dev,
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_hw *hw = &adapter->hw;
- s32 status;
u8 sff8472_rev, addr_mode;
bool page_swap = false;
+ int status;
if (hw->phy.type == ixgbe_phy_fw)
return -ENXIO;
@@ -3372,7 +3374,7 @@ static int ixgbe_get_module_eeprom(struct net_device *dev,
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_hw *hw = &adapter->hw;
- s32 status = -EFAULT;
+ int status = -EFAULT;
u8 databyte = 0xFF;
int i = 0;
@@ -3403,66 +3405,68 @@ static int ixgbe_get_module_eeprom(struct net_device *dev,
static const struct {
ixgbe_link_speed mac_speed;
- u32 supported;
+ u32 link_mode;
} ixgbe_ls_map[] = {
- { IXGBE_LINK_SPEED_10_FULL, SUPPORTED_10baseT_Full },
- { IXGBE_LINK_SPEED_100_FULL, SUPPORTED_100baseT_Full },
- { IXGBE_LINK_SPEED_1GB_FULL, SUPPORTED_1000baseT_Full },
- { IXGBE_LINK_SPEED_2_5GB_FULL, SUPPORTED_2500baseX_Full },
- { IXGBE_LINK_SPEED_10GB_FULL, SUPPORTED_10000baseT_Full },
+ { IXGBE_LINK_SPEED_10_FULL, ETHTOOL_LINK_MODE_10baseT_Full_BIT },
+ { IXGBE_LINK_SPEED_100_FULL, ETHTOOL_LINK_MODE_100baseT_Full_BIT },
+ { IXGBE_LINK_SPEED_1GB_FULL, ETHTOOL_LINK_MODE_1000baseT_Full_BIT },
+ { IXGBE_LINK_SPEED_2_5GB_FULL, ETHTOOL_LINK_MODE_2500baseX_Full_BIT },
+ { IXGBE_LINK_SPEED_10GB_FULL, ETHTOOL_LINK_MODE_10000baseT_Full_BIT },
};
static const struct {
u32 lp_advertised;
- u32 mac_speed;
+ u32 link_mode;
} ixgbe_lp_map[] = {
- { FW_PHY_ACT_UD_2_100M_TX_EEE, SUPPORTED_100baseT_Full },
- { FW_PHY_ACT_UD_2_1G_T_EEE, SUPPORTED_1000baseT_Full },
- { FW_PHY_ACT_UD_2_10G_T_EEE, SUPPORTED_10000baseT_Full },
- { FW_PHY_ACT_UD_2_1G_KX_EEE, SUPPORTED_1000baseKX_Full },
- { FW_PHY_ACT_UD_2_10G_KX4_EEE, SUPPORTED_10000baseKX4_Full },
- { FW_PHY_ACT_UD_2_10G_KR_EEE, SUPPORTED_10000baseKR_Full},
+ { FW_PHY_ACT_UD_2_100M_TX_EEE, ETHTOOL_LINK_MODE_100baseT_Full_BIT },
+ { FW_PHY_ACT_UD_2_1G_T_EEE, ETHTOOL_LINK_MODE_1000baseT_Full_BIT },
+ { FW_PHY_ACT_UD_2_10G_T_EEE, ETHTOOL_LINK_MODE_10000baseT_Full_BIT },
+ { FW_PHY_ACT_UD_2_1G_KX_EEE, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT },
+ { FW_PHY_ACT_UD_2_10G_KX4_EEE, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT },
+ { FW_PHY_ACT_UD_2_10G_KR_EEE, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
};
static int
-ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_eee *edata)
+ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_keee *edata)
{
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(common);
u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 };
struct ixgbe_hw *hw = &adapter->hw;
- s32 rc;
+ int rc;
u16 i;
rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_UD_2, &info);
if (rc)
return rc;
- edata->lp_advertised = 0;
for (i = 0; i < ARRAY_SIZE(ixgbe_lp_map); ++i) {
if (info[0] & ixgbe_lp_map[i].lp_advertised)
- edata->lp_advertised |= ixgbe_lp_map[i].mac_speed;
+ linkmode_set_bit(ixgbe_lp_map[i].link_mode,
+ edata->lp_advertised);
}
- edata->supported = 0;
for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) {
if (hw->phy.eee_speeds_supported & ixgbe_ls_map[i].mac_speed)
- edata->supported |= ixgbe_ls_map[i].supported;
+ linkmode_set_bit(ixgbe_lp_map[i].link_mode,
+ edata->supported);
}
- edata->advertised = 0;
for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) {
if (hw->phy.eee_speeds_advertised & ixgbe_ls_map[i].mac_speed)
- edata->advertised |= ixgbe_ls_map[i].supported;
+ linkmode_set_bit(ixgbe_lp_map[i].link_mode,
+ edata->advertised);
}
- edata->eee_enabled = !!edata->advertised;
+ edata->eee_enabled = !linkmode_empty(edata->advertised);
edata->tx_lpi_enabled = edata->eee_enabled;
- if (edata->advertised & edata->lp_advertised)
- edata->eee_active = true;
+
+ linkmode_and(common, edata->advertised, edata->lp_advertised);
+ edata->eee_active = !linkmode_empty(common);
return 0;
}
-static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
struct ixgbe_hw *hw = &adapter->hw;
@@ -3476,17 +3480,17 @@ static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
return -EOPNOTSUPP;
}
-static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
+static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_keee *edata)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
struct ixgbe_hw *hw = &adapter->hw;
- struct ethtool_eee eee_data;
- s32 ret_val;
+ struct ethtool_keee eee_data;
+ int ret_val;
if (!(adapter->flags2 & IXGBE_FLAG2_EEE_CAPABLE))
return -EOPNOTSUPP;
- memset(&eee_data, 0, sizeof(struct ethtool_eee));
+ memset(&eee_data, 0, sizeof(struct ethtool_keee));
ret_val = ixgbe_get_eee(netdev, &eee_data);
if (ret_val)
@@ -3504,7 +3508,7 @@ static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
return -EINVAL;
}
- if (eee_data.advertised != edata->advertised) {
+ if (!linkmode_equal(eee_data.advertised, edata->advertised)) {
e_err(drv,
"Setting EEE advertised speeds is not supported\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index bd541527c8c7..f985252c8c8d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -153,7 +153,7 @@ MODULE_PARM_DESC(max_vfs,
#endif /* CONFIG_PCI_IOV */
static bool allow_unsupported_sfp;
-module_param(allow_unsupported_sfp, bool, 0);
+module_param(allow_unsupported_sfp, bool, 0444);
MODULE_PARM_DESC(allow_unsupported_sfp,
"Allow unsupported and untested SFP+ modules on 82599-based adapters");
@@ -205,7 +205,7 @@ static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter,
return 0;
}
-static s32 ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter)
+static int ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
u16 link_status = 0;
@@ -1106,6 +1106,44 @@ static int ixgbe_tx_maxrate(struct net_device *netdev,
}
/**
+ * ixgbe_update_tx_ring_stats - Update Tx ring specific counters
+ * @tx_ring: ring to update
+ * @q_vector: queue vector ring belongs to
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ixgbe_update_tx_ring_stats(struct ixgbe_ring *tx_ring,
+ struct ixgbe_q_vector *q_vector, u64 pkts,
+ u64 bytes)
+{
+ u64_stats_update_begin(&tx_ring->syncp);
+ tx_ring->stats.bytes += bytes;
+ tx_ring->stats.packets += pkts;
+ u64_stats_update_end(&tx_ring->syncp);
+ q_vector->tx.total_bytes += bytes;
+ q_vector->tx.total_packets += pkts;
+}
+
+/**
+ * ixgbe_update_rx_ring_stats - Update Rx ring specific counters
+ * @rx_ring: ring to update
+ * @q_vector: queue vector ring belongs to
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ixgbe_update_rx_ring_stats(struct ixgbe_ring *rx_ring,
+ struct ixgbe_q_vector *q_vector, u64 pkts,
+ u64 bytes)
+{
+ u64_stats_update_begin(&rx_ring->syncp);
+ rx_ring->stats.bytes += bytes;
+ rx_ring->stats.packets += pkts;
+ u64_stats_update_end(&rx_ring->syncp);
+ q_vector->rx.total_bytes += bytes;
+ q_vector->rx.total_packets += pkts;
+}
+
+/**
* ixgbe_clean_tx_irq - Reclaim resources after transmit completes
* @q_vector: structure containing interrupt and ring information
* @tx_ring: tx ring to clean
@@ -1207,12 +1245,8 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
i += tx_ring->count;
tx_ring->next_to_clean = i;
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->stats.bytes += total_bytes;
- tx_ring->stats.packets += total_packets;
- u64_stats_update_end(&tx_ring->syncp);
- q_vector->tx.total_bytes += total_bytes;
- q_vector->tx.total_packets += total_packets;
+ ixgbe_update_tx_ring_stats(tx_ring, q_vector, total_packets,
+ total_bytes);
adapter->tx_ipsec += total_ipsec;
if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
@@ -2429,12 +2463,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
ixgbe_xdp_ring_update_tail_locked(ring);
}
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->stats.packets += total_rx_packets;
- rx_ring->stats.bytes += total_rx_bytes;
- u64_stats_update_end(&rx_ring->syncp);
- q_vector->rx.total_packets += total_rx_packets;
- q_vector->rx.total_bytes += total_rx_bytes;
+ ixgbe_update_rx_ring_stats(rx_ring, q_vector, total_rx_packets,
+ total_rx_bytes);
return total_rx_packets;
}
@@ -2939,8 +2969,8 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter)
static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter,
u64 qmask)
{
- u32 mask;
struct ixgbe_hw *hw = &adapter->hw;
+ u32 mask;
switch (hw->mac.type) {
case ixgbe_mac_82598EB:
@@ -7809,7 +7839,7 @@ static void ixgbe_watchdog_subtask(struct ixgbe_adapter *adapter)
static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
- s32 err;
+ int err;
/* not searching for SFP so there is nothing to do here */
if (!(adapter->flags2 & IXGBE_FLAG2_SEARCH_FOR_SFP) &&
@@ -10205,7 +10235,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
unsigned int network_hdr_len, mac_hdr_len;
/* Make certain the headers can be described by a context descriptor */
- mac_hdr_len = skb_network_header(skb) - skb->data;
+ mac_hdr_len = skb_network_offset(skb);
if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN))
return features & ~(NETIF_F_HW_CSUM |
NETIF_F_SCTP_CRC |
@@ -10525,6 +10555,44 @@ static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring)
}
/**
+ * ixgbe_irq_disable_single - Disable single IRQ vector
+ * @adapter: adapter structure
+ * @ring: ring index
+ **/
+static void ixgbe_irq_disable_single(struct ixgbe_adapter *adapter, u32 ring)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u64 qmask = BIT_ULL(ring);
+ u32 mask;
+
+ switch (adapter->hw.mac.type) {
+ case ixgbe_mac_82598EB:
+ mask = qmask & IXGBE_EIMC_RTX_QUEUE;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask);
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ case ixgbe_mac_X550:
+ case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
+ mask = (qmask & 0xFFFFFFFF);
+ if (mask)
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
+ mask = (qmask >> 32);
+ if (mask)
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
+ break;
+ default:
+ break;
+ }
+ IXGBE_WRITE_FLUSH(&adapter->hw);
+ if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
+ synchronize_irq(adapter->msix_entries[ring].vector);
+ else
+ synchronize_irq(adapter->pdev->irq);
+}
+
+/**
* ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings
* @adapter: adapter structure
* @ring: ring index
@@ -10540,6 +10608,11 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring)
tx_ring = adapter->tx_ring[ring];
xdp_ring = adapter->xdp_ring[ring];
+ ixgbe_irq_disable_single(adapter, ring);
+
+ /* Rx/Tx/XDP Tx share the same napi context. */
+ napi_disable(&rx_ring->q_vector->napi);
+
ixgbe_disable_txr(adapter, tx_ring);
if (xdp_ring)
ixgbe_disable_txr(adapter, xdp_ring);
@@ -10548,9 +10621,6 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring)
if (xdp_ring)
synchronize_rcu();
- /* Rx/Tx/XDP Tx share the same napi context. */
- napi_disable(&rx_ring->q_vector->napi);
-
ixgbe_clean_tx_ring(tx_ring);
if (xdp_ring)
ixgbe_clean_tx_ring(xdp_ring);
@@ -10578,9 +10648,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring)
tx_ring = adapter->tx_ring[ring];
xdp_ring = adapter->xdp_ring[ring];
- /* Rx/Tx/XDP Tx share the same napi context. */
- napi_enable(&rx_ring->q_vector->napi);
-
ixgbe_configure_tx_ring(adapter, tx_ring);
if (xdp_ring)
ixgbe_configure_tx_ring(adapter, xdp_ring);
@@ -10589,6 +10656,11 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring)
clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state);
if (xdp_ring)
clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state);
+
+ /* Rx/Tx/XDP Tx share the same napi context. */
+ napi_enable(&rx_ring->q_vector->napi);
+ ixgbe_irq_enable_queues(adapter, BIT_ULL(ring));
+ IXGBE_WRITE_FLUSH(&adapter->hw);
}
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
index fe7ef5773369..d67d77e5dacc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
@@ -15,7 +15,7 @@
*
* returns SUCCESS if it successfully read message from buffer
**/
-s32 ixgbe_read_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+int ixgbe_read_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
{
struct ixgbe_mbx_info *mbx = &hw->mbx;
@@ -38,7 +38,7 @@ s32 ixgbe_read_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
*
* returns SUCCESS if it successfully copied message into the buffer
**/
-s32 ixgbe_write_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+int ixgbe_write_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
{
struct ixgbe_mbx_info *mbx = &hw->mbx;
@@ -58,7 +58,7 @@ s32 ixgbe_write_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
*
* returns SUCCESS if the Status bit was found or else ERR_MBX
**/
-s32 ixgbe_check_for_msg(struct ixgbe_hw *hw, u16 mbx_id)
+int ixgbe_check_for_msg(struct ixgbe_hw *hw, u16 mbx_id)
{
struct ixgbe_mbx_info *mbx = &hw->mbx;
@@ -75,7 +75,7 @@ s32 ixgbe_check_for_msg(struct ixgbe_hw *hw, u16 mbx_id)
*
* returns SUCCESS if the Status bit was found or else ERR_MBX
**/
-s32 ixgbe_check_for_ack(struct ixgbe_hw *hw, u16 mbx_id)
+int ixgbe_check_for_ack(struct ixgbe_hw *hw, u16 mbx_id)
{
struct ixgbe_mbx_info *mbx = &hw->mbx;
@@ -92,7 +92,7 @@ s32 ixgbe_check_for_ack(struct ixgbe_hw *hw, u16 mbx_id)
*
* returns SUCCESS if the Status bit was found or else ERR_MBX
**/
-s32 ixgbe_check_for_rst(struct ixgbe_hw *hw, u16 mbx_id)
+int ixgbe_check_for_rst(struct ixgbe_hw *hw, u16 mbx_id)
{
struct ixgbe_mbx_info *mbx = &hw->mbx;
@@ -109,7 +109,7 @@ s32 ixgbe_check_for_rst(struct ixgbe_hw *hw, u16 mbx_id)
*
* returns SUCCESS if it successfully received a message notification
**/
-static s32 ixgbe_poll_for_msg(struct ixgbe_hw *hw, u16 mbx_id)
+static int ixgbe_poll_for_msg(struct ixgbe_hw *hw, u16 mbx_id)
{
struct ixgbe_mbx_info *mbx = &hw->mbx;
int countdown = mbx->timeout;
@@ -134,7 +134,7 @@ static s32 ixgbe_poll_for_msg(struct ixgbe_hw *hw, u16 mbx_id)
*
* returns SUCCESS if it successfully received a message acknowledgement
**/
-static s32 ixgbe_poll_for_ack(struct ixgbe_hw *hw, u16 mbx_id)
+static int ixgbe_poll_for_ack(struct ixgbe_hw *hw, u16 mbx_id)
{
struct ixgbe_mbx_info *mbx = &hw->mbx;
int countdown = mbx->timeout;
@@ -162,11 +162,11 @@ static s32 ixgbe_poll_for_ack(struct ixgbe_hw *hw, u16 mbx_id)
* returns SUCCESS if it successfully received a message notification and
* copied it into the receive buffer.
**/
-static s32 ixgbe_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size,
+static int ixgbe_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size,
u16 mbx_id)
{
struct ixgbe_mbx_info *mbx = &hw->mbx;
- s32 ret_val;
+ int ret_val;
if (!mbx->ops)
return -EIO;
@@ -189,11 +189,11 @@ static s32 ixgbe_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size,
* returns SUCCESS if it successfully copied message into the buffer and
* received an ack to that message within delay * timeout period
**/
-static s32 ixgbe_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size,
- u16 mbx_id)
+static int ixgbe_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size,
+ u16 mbx_id)
{
struct ixgbe_mbx_info *mbx = &hw->mbx;
- s32 ret_val;
+ int ret_val;
/* exit if either we can't write or there isn't a defined timeout */
if (!mbx->ops || !mbx->timeout)
@@ -208,7 +208,7 @@ static s32 ixgbe_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size,
return ixgbe_poll_for_ack(hw, mbx_id);
}
-static s32 ixgbe_check_for_bit_pf(struct ixgbe_hw *hw, u32 mask, s32 index)
+static int ixgbe_check_for_bit_pf(struct ixgbe_hw *hw, u32 mask, s32 index)
{
u32 mbvficr = IXGBE_READ_REG(hw, IXGBE_MBVFICR(index));
@@ -227,9 +227,9 @@ static s32 ixgbe_check_for_bit_pf(struct ixgbe_hw *hw, u32 mask, s32 index)
*
* returns SUCCESS if the VF has set the Status bit or else ERR_MBX
**/
-static s32 ixgbe_check_for_msg_pf(struct ixgbe_hw *hw, u16 vf_number)
+static int ixgbe_check_for_msg_pf(struct ixgbe_hw *hw, u16 vf_number)
{
- s32 index = IXGBE_MBVFICR_INDEX(vf_number);
+ int index = IXGBE_MBVFICR_INDEX(vf_number);
u32 vf_bit = vf_number % 16;
if (!ixgbe_check_for_bit_pf(hw, IXGBE_MBVFICR_VFREQ_VF1 << vf_bit,
@@ -248,9 +248,9 @@ static s32 ixgbe_check_for_msg_pf(struct ixgbe_hw *hw, u16 vf_number)
*
* returns SUCCESS if the VF has set the Status bit or else ERR_MBX
**/
-static s32 ixgbe_check_for_ack_pf(struct ixgbe_hw *hw, u16 vf_number)
+static int ixgbe_check_for_ack_pf(struct ixgbe_hw *hw, u16 vf_number)
{
- s32 index = IXGBE_MBVFICR_INDEX(vf_number);
+ int index = IXGBE_MBVFICR_INDEX(vf_number);
u32 vf_bit = vf_number % 16;
if (!ixgbe_check_for_bit_pf(hw, IXGBE_MBVFICR_VFACK_VF1 << vf_bit,
@@ -269,7 +269,7 @@ static s32 ixgbe_check_for_ack_pf(struct ixgbe_hw *hw, u16 vf_number)
*
* returns SUCCESS if the VF has set the Status bit or else ERR_MBX
**/
-static s32 ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number)
+static int ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number)
{
u32 reg_offset = (vf_number < 32) ? 0 : 1;
u32 vf_shift = vf_number % 32;
@@ -305,7 +305,7 @@ static s32 ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number)
*
* return SUCCESS if we obtained the mailbox lock
**/
-static s32 ixgbe_obtain_mbx_lock_pf(struct ixgbe_hw *hw, u16 vf_number)
+static int ixgbe_obtain_mbx_lock_pf(struct ixgbe_hw *hw, u16 vf_number)
{
u32 p2v_mailbox;
@@ -329,10 +329,10 @@ static s32 ixgbe_obtain_mbx_lock_pf(struct ixgbe_hw *hw, u16 vf_number)
*
* returns SUCCESS if it successfully copied message into the buffer
**/
-static s32 ixgbe_write_mbx_pf(struct ixgbe_hw *hw, u32 *msg, u16 size,
+static int ixgbe_write_mbx_pf(struct ixgbe_hw *hw, u32 *msg, u16 size,
u16 vf_number)
{
- s32 ret_val;
+ int ret_val;
u16 i;
/* lock the mailbox to prevent pf/vf race condition */
@@ -368,10 +368,10 @@ static s32 ixgbe_write_mbx_pf(struct ixgbe_hw *hw, u32 *msg, u16 size,
* memory buffer. The presumption is that the caller knows that there was
* a message due to a VF request so no polling for message is needed.
**/
-static s32 ixgbe_read_mbx_pf(struct ixgbe_hw *hw, u32 *msg, u16 size,
+static int ixgbe_read_mbx_pf(struct ixgbe_hw *hw, u32 *msg, u16 size,
u16 vf_number)
{
- s32 ret_val;
+ int ret_val;
u16 i;
/* lock the mailbox to prevent pf/vf race condition */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
index 6434c190e7a4..bd205306934b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
@@ -96,11 +96,11 @@ enum ixgbe_pfvf_api_rev {
#define IXGBE_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */
#define IXGBE_VF_MBX_INIT_DELAY 500 /* microseconds between retries */
-s32 ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16);
-s32 ixgbe_write_mbx(struct ixgbe_hw *, u32 *, u16, u16);
-s32 ixgbe_check_for_msg(struct ixgbe_hw *, u16);
-s32 ixgbe_check_for_ack(struct ixgbe_hw *, u16);
-s32 ixgbe_check_for_rst(struct ixgbe_hw *, u16);
+int ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+int ixgbe_write_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+int ixgbe_check_for_msg(struct ixgbe_hw *, u16);
+int ixgbe_check_for_ack(struct ixgbe_hw *, u16);
+int ixgbe_check_for_rst(struct ixgbe_hw *, u16);
#ifdef CONFIG_PCI_IOV
void ixgbe_init_mbx_params_pf(struct ixgbe_hw *);
#endif /* CONFIG_PCI_IOV */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index f28140a05f09..07eaa3c3f4d3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -11,19 +11,19 @@
static void ixgbe_i2c_start(struct ixgbe_hw *hw);
static void ixgbe_i2c_stop(struct ixgbe_hw *hw);
-static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data);
-static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data);
-static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw);
-static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data);
-static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data);
+static int ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data);
+static int ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data);
+static int ixgbe_get_i2c_ack(struct ixgbe_hw *hw);
+static int ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data);
+static int ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data);
static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl);
static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl);
-static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data);
+static int ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data);
static bool ixgbe_get_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl);
static void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw);
static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id);
-static s32 ixgbe_get_phy_id(struct ixgbe_hw *hw);
-static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw);
+static int ixgbe_get_phy_id(struct ixgbe_hw *hw);
+static int ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw);
/**
* ixgbe_out_i2c_byte_ack - Send I2C byte with ack
@@ -32,9 +32,9 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw);
*
* Returns an error code on error.
**/
-static s32 ixgbe_out_i2c_byte_ack(struct ixgbe_hw *hw, u8 byte)
+static int ixgbe_out_i2c_byte_ack(struct ixgbe_hw *hw, u8 byte)
{
- s32 status;
+ int status;
status = ixgbe_clock_out_i2c_byte(hw, byte);
if (status)
@@ -49,9 +49,9 @@ static s32 ixgbe_out_i2c_byte_ack(struct ixgbe_hw *hw, u8 byte)
*
* Returns an error code on error.
**/
-static s32 ixgbe_in_i2c_byte_ack(struct ixgbe_hw *hw, u8 *byte)
+static int ixgbe_in_i2c_byte_ack(struct ixgbe_hw *hw, u8 *byte)
{
- s32 status;
+ int status;
status = ixgbe_clock_in_i2c_byte(hw, byte);
if (status)
@@ -85,7 +85,7 @@ static u8 ixgbe_ones_comp_byte_add(u8 add1, u8 add2)
*
* Returns an error code on error.
*/
-s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
+int ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
u16 reg, u16 *val, bool lock)
{
u32 swfw_mask = hw->phy.phy_semaphore_mask;
@@ -163,7 +163,7 @@ fail:
*
* Returns an error code on error.
*/
-s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
+int ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
u16 reg, u16 val, bool lock)
{
u32 swfw_mask = hw->phy.phy_semaphore_mask;
@@ -260,7 +260,7 @@ static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr)
*
* Determines the physical layer module found on the current adapter.
**/
-s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
+int ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
{
u32 status = -EFAULT;
u32 phy_addr;
@@ -332,11 +332,11 @@ bool ixgbe_check_reset_blocked(struct ixgbe_hw *hw)
* @hw: pointer to hardware structure
*
**/
-static s32 ixgbe_get_phy_id(struct ixgbe_hw *hw)
+static int ixgbe_get_phy_id(struct ixgbe_hw *hw)
{
- s32 status;
u16 phy_id_high = 0;
u16 phy_id_low = 0;
+ int status;
status = hw->phy.ops.read_reg(hw, MDIO_DEVID1, MDIO_MMD_PMAPMD,
&phy_id_high);
@@ -394,11 +394,11 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
* ixgbe_reset_phy_generic - Performs a PHY reset
* @hw: pointer to hardware structure
**/
-s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
+int ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
{
u32 i;
u16 ctrl = 0;
- s32 status = 0;
+ int status = 0;
if (hw->phy.type == ixgbe_phy_unknown)
status = ixgbe_identify_phy_generic(hw);
@@ -470,8 +470,8 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
*
* Reads a value from a specified PHY register without the SWFW lock
**/
-s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
- u16 *phy_data)
+int ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+ u16 *phy_data)
{
u32 i, data, command;
@@ -546,11 +546,11 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
* @device_type: 5 bit device type
* @phy_data: Pointer to read data from PHY register
**/
-s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+int ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u16 *phy_data)
{
- s32 status;
u32 gssr = hw->phy.phy_semaphore_mask;
+ int status;
if (hw->mac.ops.acquire_swfw_sync(hw, gssr) == 0) {
status = ixgbe_read_phy_reg_mdi(hw, reg_addr, device_type,
@@ -571,8 +571,8 @@ s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
* @device_type: 5 bit device type
* @phy_data: Data to write to the PHY register
**/
-s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
- u32 device_type, u16 phy_data)
+int ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+ u16 phy_data)
{
u32 i, command;
@@ -644,11 +644,11 @@ s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
* @device_type: 5 bit device type
* @phy_data: Data to write to the PHY register
**/
-s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+int ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u16 phy_data)
{
- s32 status;
u32 gssr = hw->phy.phy_semaphore_mask;
+ int status;
if (hw->mac.ops.acquire_swfw_sync(hw, gssr) == 0) {
status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type,
@@ -668,7 +668,7 @@ s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
* @hw: pointer to hardware structure
* @cmd: command register value to write
**/
-static s32 ixgbe_msca_cmd(struct ixgbe_hw *hw, u32 cmd)
+static int ixgbe_msca_cmd(struct ixgbe_hw *hw, u32 cmd)
{
IXGBE_WRITE_REG(hw, IXGBE_MSCA, cmd);
@@ -684,11 +684,11 @@ static s32 ixgbe_msca_cmd(struct ixgbe_hw *hw, u32 cmd)
* @regnum: register number
* @gssr: semaphore flags to acquire
**/
-static s32 ixgbe_mii_bus_read_generic_c22(struct ixgbe_hw *hw, int addr,
+static int ixgbe_mii_bus_read_generic_c22(struct ixgbe_hw *hw, int addr,
int regnum, u32 gssr)
{
u32 hwaddr, cmd;
- s32 data;
+ int data;
if (hw->mac.ops.acquire_swfw_sync(hw, gssr))
return -EBUSY;
@@ -718,11 +718,11 @@ mii_bus_read_done:
* @regnum: register number
* @gssr: semaphore flags to acquire
**/
-static s32 ixgbe_mii_bus_read_generic_c45(struct ixgbe_hw *hw, int addr,
+static int ixgbe_mii_bus_read_generic_c45(struct ixgbe_hw *hw, int addr,
int devad, int regnum, u32 gssr)
{
u32 hwaddr, cmd;
- s32 data;
+ int data;
if (hw->mac.ops.acquire_swfw_sync(hw, gssr))
return -EBUSY;
@@ -756,11 +756,11 @@ mii_bus_read_done:
* @val: value to write
* @gssr: semaphore flags to acquire
**/
-static s32 ixgbe_mii_bus_write_generic_c22(struct ixgbe_hw *hw, int addr,
+static int ixgbe_mii_bus_write_generic_c22(struct ixgbe_hw *hw, int addr,
int regnum, u16 val, u32 gssr)
{
u32 hwaddr, cmd;
- s32 err;
+ int err;
if (hw->mac.ops.acquire_swfw_sync(hw, gssr))
return -EBUSY;
@@ -787,12 +787,12 @@ static s32 ixgbe_mii_bus_write_generic_c22(struct ixgbe_hw *hw, int addr,
* @val: value to write
* @gssr: semaphore flags to acquire
**/
-static s32 ixgbe_mii_bus_write_generic_c45(struct ixgbe_hw *hw, int addr,
+static int ixgbe_mii_bus_write_generic_c45(struct ixgbe_hw *hw, int addr,
int devad, int regnum, u16 val,
u32 gssr)
{
u32 hwaddr, cmd;
- s32 err;
+ int err;
if (hw->mac.ops.acquire_swfw_sync(hw, gssr))
return -EBUSY;
@@ -821,7 +821,7 @@ mii_bus_write_done:
* @addr: address
* @regnum: register number
**/
-static s32 ixgbe_mii_bus_read_c22(struct mii_bus *bus, int addr, int regnum)
+static int ixgbe_mii_bus_read_c22(struct mii_bus *bus, int addr, int regnum)
{
struct ixgbe_adapter *adapter = bus->priv;
struct ixgbe_hw *hw = &adapter->hw;
@@ -837,7 +837,7 @@ static s32 ixgbe_mii_bus_read_c22(struct mii_bus *bus, int addr, int regnum)
* @addr: address
* @regnum: register number
**/
-static s32 ixgbe_mii_bus_read_c45(struct mii_bus *bus, int devad, int addr,
+static int ixgbe_mii_bus_read_c45(struct mii_bus *bus, int devad, int addr,
int regnum)
{
struct ixgbe_adapter *adapter = bus->priv;
@@ -854,7 +854,7 @@ static s32 ixgbe_mii_bus_read_c45(struct mii_bus *bus, int devad, int addr,
* @regnum: register number
* @val: value to write
**/
-static s32 ixgbe_mii_bus_write_c22(struct mii_bus *bus, int addr, int regnum,
+static int ixgbe_mii_bus_write_c22(struct mii_bus *bus, int addr, int regnum,
u16 val)
{
struct ixgbe_adapter *adapter = bus->priv;
@@ -872,7 +872,7 @@ static s32 ixgbe_mii_bus_write_c22(struct mii_bus *bus, int addr, int regnum,
* @regnum: register number
* @val: value to write
**/
-static s32 ixgbe_mii_bus_write_c45(struct mii_bus *bus, int addr, int devad,
+static int ixgbe_mii_bus_write_c45(struct mii_bus *bus, int addr, int devad,
int regnum, u16 val)
{
struct ixgbe_adapter *adapter = bus->priv;
@@ -889,7 +889,7 @@ static s32 ixgbe_mii_bus_write_c45(struct mii_bus *bus, int addr, int devad,
* @addr: address
* @regnum: register number
**/
-static s32 ixgbe_x550em_a_mii_bus_read_c22(struct mii_bus *bus, int addr,
+static int ixgbe_x550em_a_mii_bus_read_c22(struct mii_bus *bus, int addr,
int regnum)
{
struct ixgbe_adapter *adapter = bus->priv;
@@ -907,7 +907,7 @@ static s32 ixgbe_x550em_a_mii_bus_read_c22(struct mii_bus *bus, int addr,
* @devad: device address to read
* @regnum: register number
**/
-static s32 ixgbe_x550em_a_mii_bus_read_c45(struct mii_bus *bus, int addr,
+static int ixgbe_x550em_a_mii_bus_read_c45(struct mii_bus *bus, int addr,
int devad, int regnum)
{
struct ixgbe_adapter *adapter = bus->priv;
@@ -925,7 +925,7 @@ static s32 ixgbe_x550em_a_mii_bus_read_c45(struct mii_bus *bus, int addr,
* @regnum: register number
* @val: value to write
**/
-static s32 ixgbe_x550em_a_mii_bus_write_c22(struct mii_bus *bus, int addr,
+static int ixgbe_x550em_a_mii_bus_write_c22(struct mii_bus *bus, int addr,
int regnum, u16 val)
{
struct ixgbe_adapter *adapter = bus->priv;
@@ -944,7 +944,7 @@ static s32 ixgbe_x550em_a_mii_bus_write_c22(struct mii_bus *bus, int addr,
* @regnum: register number
* @val: value to write
**/
-static s32 ixgbe_x550em_a_mii_bus_write_c45(struct mii_bus *bus, int addr,
+static int ixgbe_x550em_a_mii_bus_write_c45(struct mii_bus *bus, int addr,
int devad, int regnum, u16 val)
{
struct ixgbe_adapter *adapter = bus->priv;
@@ -1023,13 +1023,13 @@ out:
*
* ixgbe_mii_bus_init initializes a mii_bus structure in adapter
**/
-s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw)
+int ixgbe_mii_bus_init(struct ixgbe_hw *hw)
{
- s32 (*write_c22)(struct mii_bus *bus, int addr, int regnum, u16 val);
- s32 (*read_c22)(struct mii_bus *bus, int addr, int regnum);
- s32 (*write_c45)(struct mii_bus *bus, int addr, int devad, int regnum,
+ int (*write_c22)(struct mii_bus *bus, int addr, int regnum, u16 val);
+ int (*read_c22)(struct mii_bus *bus, int addr, int regnum);
+ int (*write_c45)(struct mii_bus *bus, int addr, int devad, int regnum,
u16 val);
- s32 (*read_c45)(struct mii_bus *bus, int addr, int devad, int regnum);
+ int (*read_c45)(struct mii_bus *bus, int addr, int devad, int regnum);
struct ixgbe_adapter *adapter = hw->back;
struct pci_dev *pdev = adapter->pdev;
struct device *dev = &adapter->netdev->dev;
@@ -1095,12 +1095,12 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw)
*
* Restart autonegotiation and PHY and waits for completion.
**/
-s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
+int ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
{
- s32 status = 0;
u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
- bool autoneg = false;
ixgbe_link_speed speed;
+ bool autoneg = false;
+ int status = 0;
ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
@@ -1173,7 +1173,7 @@ s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
* @speed: new link speed
* @autoneg_wait_to_complete: unused
**/
-s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
+int ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete)
{
@@ -1214,10 +1214,10 @@ s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
* Determines the supported link capabilities by reading the PHY auto
* negotiation register.
*/
-static s32 ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw)
+static int ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw)
{
u16 speed_ability;
- s32 status;
+ int status;
status = hw->phy.ops.read_reg(hw, MDIO_SPEED, MDIO_MMD_PMAPMD,
&speed_ability);
@@ -1253,11 +1253,11 @@ static s32 ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw)
* @speed: pointer to link speed
* @autoneg: boolean auto-negotiation value
*/
-s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
+int ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
ixgbe_link_speed *speed,
bool *autoneg)
{
- s32 status = 0;
+ int status = 0;
*autoneg = true;
if (!hw->phy.speeds_supported)
@@ -1276,15 +1276,15 @@ s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
* Reads the VS1 register to determine if link is up and the current speed for
* the PHY.
**/
-s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+int ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
bool *link_up)
{
- s32 status;
- u32 time_out;
u32 max_time_out = 10;
- u16 phy_link = 0;
u16 phy_speed = 0;
+ u16 phy_link = 0;
u16 phy_data = 0;
+ u32 time_out;
+ int status;
/* Initialize speed and link to default case */
*link_up = false;
@@ -1326,7 +1326,7 @@ s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
* it is called via a function pointer that could call other
* functions that could return an error.
**/
-s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw)
+int ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw)
{
u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
bool autoneg = false;
@@ -1399,13 +1399,13 @@ s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw)
* ixgbe_reset_phy_nl - Performs a PHY reset
* @hw: pointer to hardware structure
**/
-s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw)
+int ixgbe_reset_phy_nl(struct ixgbe_hw *hw)
{
u16 phy_offset, control, eword, edata, block_crc;
- bool end_data = false;
u16 list_offset, data_offset;
+ bool end_data = false;
u16 phy_data = 0;
- s32 ret_val;
+ int ret_val;
u32 i;
/* Blocked by MNG FW so bail */
@@ -1506,7 +1506,7 @@ err_eeprom:
*
* Determines HW type and calls appropriate function.
**/
-s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw)
+int ixgbe_identify_module_generic(struct ixgbe_hw *hw)
{
switch (hw->mac.ops.get_media_type(hw)) {
case ixgbe_media_type_fiber:
@@ -1527,19 +1527,20 @@ s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw)
*
* Searches for and identifies the SFP module and assigns appropriate PHY type.
**/
-s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
+int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
{
+ enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type;
struct ixgbe_adapter *adapter = hw->back;
- s32 status;
+ u8 oui_bytes[3] = {0, 0, 0};
+ u8 bitrate_nominal = 0;
+ u8 comp_codes_10g = 0;
+ u8 comp_codes_1g = 0;
+ u16 enforce_sfp = 0;
u32 vendor_oui = 0;
- enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type;
u8 identifier = 0;
- u8 comp_codes_1g = 0;
- u8 comp_codes_10g = 0;
- u8 oui_bytes[3] = {0, 0, 0};
u8 cable_tech = 0;
u8 cable_spec = 0;
- u16 enforce_sfp = 0;
+ int status;
if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) {
hw->phy.sfp_type = ixgbe_sfp_type_not_present;
@@ -1576,7 +1577,12 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
status = hw->phy.ops.read_i2c_eeprom(hw,
IXGBE_SFF_CABLE_TECHNOLOGY,
&cable_tech);
+ if (status)
+ goto err_read_i2c_eeprom;
+ status = hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_BITRATE_NOMINAL,
+ &bitrate_nominal);
if (status)
goto err_read_i2c_eeprom;
@@ -1659,6 +1665,18 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
else
hw->phy.sfp_type =
ixgbe_sfp_type_1g_lx_core1;
+ /* Support only Ethernet 1000BASE-BX10, checking the Bit Rate
+ * Nominal Value as per SFF-8472 by convention 1.25 Gb/s should
+ * be rounded up to 0Dh (13 in units of 100 MBd) for 1000BASE-BX
+ */
+ } else if ((comp_codes_1g & IXGBE_SFF_BASEBX10_CAPABLE) &&
+ (bitrate_nominal == 0xD)) {
+ if (hw->bus.lan_id == 0)
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_1g_bx_core0;
+ else
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_1g_bx_core1;
} else {
hw->phy.sfp_type = ixgbe_sfp_type_unknown;
}
@@ -1747,7 +1765,9 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
- hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) {
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core1)) {
hw->phy.type = ixgbe_phy_sfp_unsupported;
return -EOPNOTSUPP;
}
@@ -1763,7 +1783,9 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
- hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) {
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core1)) {
/* Make sure we're a supported PHY type */
if (hw->phy.type == ixgbe_phy_sfp_intel)
return 0;
@@ -1792,10 +1814,10 @@ err_read_i2c_eeprom:
*
* Searches for and identifies the QSFP module and assigns appropriate PHY type
**/
-static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw)
+static int ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw)
{
struct ixgbe_adapter *adapter = hw->back;
- s32 status;
+ int status;
u32 vendor_oui = 0;
enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type;
u8 identifier = 0;
@@ -1975,7 +1997,7 @@ err_read_i2c_eeprom:
* Checks the MAC's EEPROM to see if it supports a given SFP+ module type, if
* so it returns the offsets to the phy init sequence block.
**/
-s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
+int ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
u16 *list_offset,
u16 *data_offset)
{
@@ -1999,12 +2021,14 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 ||
sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
- sfp_type == ixgbe_sfp_type_1g_sx_core0)
+ sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
+ sfp_type == ixgbe_sfp_type_1g_bx_core0)
sfp_type = ixgbe_sfp_type_srlr_core0;
else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 ||
sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
- sfp_type == ixgbe_sfp_type_1g_sx_core1)
+ sfp_type == ixgbe_sfp_type_1g_sx_core1 ||
+ sfp_type == ixgbe_sfp_type_1g_bx_core1)
sfp_type = ixgbe_sfp_type_srlr_core1;
/* Read offset to PHY init contents */
@@ -2065,7 +2089,7 @@ err_phy:
*
* Performs byte read operation to SFP module's EEPROM over I2C interface.
**/
-s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 *eeprom_data)
{
return hw->phy.ops.read_i2c_byte(hw, byte_offset,
@@ -2081,7 +2105,7 @@ s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
*
* Performs byte read operation to SFP module's SFF-8472 data over I2C
**/
-s32 ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 *sff8472_data)
{
return hw->phy.ops.read_i2c_byte(hw, byte_offset,
@@ -2097,7 +2121,7 @@ s32 ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset,
*
* Performs byte write operation to SFP module's EEPROM over I2C interface.
**/
-s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 eeprom_data)
{
return hw->phy.ops.write_i2c_byte(hw, byte_offset,
@@ -2131,14 +2155,14 @@ static bool ixgbe_is_sfp_probe(struct ixgbe_hw *hw, u8 offset, u8 addr)
* Performs byte read operation to SFP module's EEPROM over I2C interface at
* a specified device address.
*/
-static s32 ixgbe_read_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset,
+static int ixgbe_read_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 *data, bool lock)
{
- s32 status;
- u32 max_retry = 10;
- u32 retry = 0;
u32 swfw_mask = hw->phy.phy_semaphore_mask;
+ u32 max_retry = 10;
bool nack = true;
+ u32 retry = 0;
+ int status;
if (hw->mac.type >= ixgbe_mac_X550)
max_retry = 3;
@@ -2221,7 +2245,7 @@ fail:
* Performs byte read operation to SFP module's EEPROM over I2C interface at
* a specified device address.
*/
-s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 *data)
{
return ixgbe_read_i2c_byte_generic_int(hw, byte_offset, dev_addr,
@@ -2238,7 +2262,7 @@ s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
* Performs byte read operation to SFP module's EEPROM over I2C interface at
* a specified device address.
*/
-s32 ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 *data)
{
return ixgbe_read_i2c_byte_generic_int(hw, byte_offset, dev_addr,
@@ -2256,13 +2280,13 @@ s32 ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
* Performs byte write operation to SFP module's EEPROM over I2C interface at
* a specified device address.
*/
-static s32 ixgbe_write_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset,
+static int ixgbe_write_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 data, bool lock)
{
- s32 status;
+ u32 swfw_mask = hw->phy.phy_semaphore_mask;
u32 max_retry = 1;
u32 retry = 0;
- u32 swfw_mask = hw->phy.phy_semaphore_mask;
+ int status;
if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
return -EBUSY;
@@ -2324,7 +2348,7 @@ fail:
* Performs byte write operation to SFP module's EEPROM over I2C interface at
* a specified device address.
*/
-s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 data)
{
return ixgbe_write_i2c_byte_generic_int(hw, byte_offset, dev_addr,
@@ -2341,7 +2365,7 @@ s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
* Performs byte write operation to SFP module's EEPROM over I2C interface at
* a specified device address.
*/
-s32 ixgbe_write_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_write_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 data)
{
return ixgbe_write_i2c_byte_generic_int(hw, byte_offset, dev_addr,
@@ -2422,10 +2446,10 @@ static void ixgbe_i2c_stop(struct ixgbe_hw *hw)
*
* Clocks in one byte data via I2C data/clock
**/
-static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data)
+static int ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data)
{
- s32 i;
bool bit = false;
+ int i;
*data = 0;
for (i = 7; i >= 0; i--) {
@@ -2443,12 +2467,12 @@ static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data)
*
* Clocks out one byte data via I2C data/clock
**/
-static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data)
+static int ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data)
{
- s32 status;
- s32 i;
- u32 i2cctl;
bool bit = false;
+ int status;
+ u32 i2cctl;
+ int i;
for (i = 7; i >= 0; i--) {
bit = (data >> i) & 0x1;
@@ -2474,14 +2498,14 @@ static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data)
*
* Clocks in/out one bit via I2C data/clock
**/
-static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw)
+static int ixgbe_get_i2c_ack(struct ixgbe_hw *hw)
{
- u32 data_oe_bit = IXGBE_I2C_DATA_OE_N_EN(hw);
- s32 status = 0;
- u32 i = 0;
u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+ u32 data_oe_bit = IXGBE_I2C_DATA_OE_N_EN(hw);
u32 timeout = 10;
bool ack = true;
+ int status = 0;
+ u32 i = 0;
if (data_oe_bit) {
i2cctl |= IXGBE_I2C_DATA_OUT(hw);
@@ -2525,7 +2549,7 @@ static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw)
*
* Clocks in one bit via I2C data/clock
**/
-static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data)
+static int ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data)
{
u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
u32 data_oe_bit = IXGBE_I2C_DATA_OE_N_EN(hw);
@@ -2559,10 +2583,10 @@ static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data)
*
* Clocks out one bit via I2C data/clock
**/
-static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data)
+static int ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data)
{
- s32 status;
u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+ int status;
status = ixgbe_set_i2c_data(hw, &i2cctl, data);
if (status == 0) {
@@ -2647,7 +2671,7 @@ static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
* Sets the I2C data bit
* Asserts the I2C data output enable on X550 hardware.
**/
-static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data)
+static int ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data)
{
u32 data_oe_bit = IXGBE_I2C_DATA_OE_N_EN(hw);
@@ -2769,7 +2793,7 @@ bool ixgbe_tn_check_overtemp(struct ixgbe_hw *hw)
* @hw: pointer to hardware structure
* @on: true for on, false for off
**/
-s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on)
+int ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on)
{
u32 status;
u16 reg;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
index ef72729d7c93..14aa2ca51f70 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
@@ -17,6 +17,7 @@
#define IXGBE_SFF_1GBE_COMP_CODES 0x6
#define IXGBE_SFF_10GBE_COMP_CODES 0x3
#define IXGBE_SFF_CABLE_TECHNOLOGY 0x8
+#define IXGBE_SFF_BITRATE_NOMINAL 0xC
#define IXGBE_SFF_CABLE_SPEC_COMP 0x3C
#define IXGBE_SFF_SFF_8472_SWAP 0x5C
#define IXGBE_SFF_SFF_8472_COMP 0x5E
@@ -39,6 +40,7 @@
#define IXGBE_SFF_1GBASESX_CAPABLE 0x1
#define IXGBE_SFF_1GBASELX_CAPABLE 0x2
#define IXGBE_SFF_1GBASET_CAPABLE 0x8
+#define IXGBE_SFF_BASEBX10_CAPABLE 0x64
#define IXGBE_SFF_10GBASESR_CAPABLE 0x10
#define IXGBE_SFF_10GBASELR_CAPABLE 0x20
#define IXGBE_SFF_SOFT_RS_SELECT_MASK 0x8
@@ -121,57 +123,57 @@
/* SFP+ SFF-8472 Compliance code */
#define IXGBE_SFF_SFF_8472_UNSUP 0x00
-s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw);
+int ixgbe_mii_bus_init(struct ixgbe_hw *hw);
-s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw);
-s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw);
-s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+int ixgbe_identify_phy_generic(struct ixgbe_hw *hw);
+int ixgbe_reset_phy_generic(struct ixgbe_hw *hw);
+int ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u16 *phy_data);
-s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+int ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u16 phy_data);
-s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
+int ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u16 *phy_data);
-s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
+int ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u16 phy_data);
-s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw);
-s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
+int ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw);
+int ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait_to_complete);
-s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
+int ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
ixgbe_link_speed *speed,
bool *autoneg);
bool ixgbe_check_reset_blocked(struct ixgbe_hw *hw);
/* PHY specific */
-s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw,
+int ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw,
ixgbe_link_speed *speed,
bool *link_up);
-s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw);
+int ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw);
-s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw);
-s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on);
-s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw);
-s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw);
-s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
+int ixgbe_reset_phy_nl(struct ixgbe_hw *hw);
+int ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on);
+int ixgbe_identify_module_generic(struct ixgbe_hw *hw);
+int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw);
+int ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
u16 *list_offset,
u16 *data_offset);
bool ixgbe_tn_check_overtemp(struct ixgbe_hw *hw);
-s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 *data);
-s32 ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 *data);
-s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 data);
-s32 ixgbe_write_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_write_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 data);
-s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 *eeprom_data);
-s32 ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 *sff8472_data);
-s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+int ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 eeprom_data);
-s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg,
+int ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg,
u16 *val, bool lock);
-s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg,
+int ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg,
u16 val, bool lock);
#endif /* _IXGBE_PHY_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 7299a830f6e4..fcfd0a075eee 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -492,7 +492,7 @@ static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf
struct net_device *dev = adapter->netdev;
int pf_max_frame = dev->mtu + ETH_HLEN;
u32 reg_offset, vf_shift, vfre;
- s32 err = 0;
+ int err = 0;
#ifdef CONFIG_FCOE
if (dev->features & NETIF_F_FCOE_MTU)
@@ -775,7 +775,7 @@ static void ixgbe_vf_clear_mbx(struct ixgbe_adapter *adapter, u32 vf)
static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
int vf, unsigned char *mac_addr)
{
- s32 retval;
+ int retval;
ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
@@ -1254,7 +1254,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
u32 mbx_size = IXGBE_VFMAILBOX_SIZE;
u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
struct ixgbe_hw *hw = &adapter->hw;
- s32 retval;
+ int retval;
retval = ixgbe_read_mbx(hw, msgbuf, mbx_size, vf);
@@ -1418,7 +1418,7 @@ void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter)
int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
- s32 retval;
+ int retval;
if (vf >= adapter->num_vfs)
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
index f1f69ce67420..78deea5ec536 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -46,4 +46,11 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
int ixgbe_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring);
+void ixgbe_update_tx_ring_stats(struct ixgbe_ring *tx_ring,
+ struct ixgbe_q_vector *q_vector, u64 pkts,
+ u64 bytes);
+void ixgbe_update_rx_ring_stats(struct ixgbe_ring *rx_ring,
+ struct ixgbe_q_vector *q_vector, u64 pkts,
+ u64 bytes);
+
#endif /* #define _IXGBE_TXRX_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 61b9774b3d31..ed440dd0c4f9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3210,6 +3210,9 @@ enum ixgbe_sfp_type {
ixgbe_sfp_type_1g_sx_core1 = 12,
ixgbe_sfp_type_1g_lx_core0 = 13,
ixgbe_sfp_type_1g_lx_core1 = 14,
+ ixgbe_sfp_type_1g_bx_core0 = 15,
+ ixgbe_sfp_type_1g_bx_core1 = 16,
+
ixgbe_sfp_type_not_present = 0xFFFE,
ixgbe_sfp_type_unknown = 0xFFFF
};
@@ -3393,50 +3396,50 @@ struct ixgbe_hw;
/* Function pointer table */
struct ixgbe_eeprom_operations {
- s32 (*init_params)(struct ixgbe_hw *);
- s32 (*read)(struct ixgbe_hw *, u16, u16 *);
- s32 (*read_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
- s32 (*write)(struct ixgbe_hw *, u16, u16);
- s32 (*write_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
- s32 (*validate_checksum)(struct ixgbe_hw *, u16 *);
- s32 (*update_checksum)(struct ixgbe_hw *);
- s32 (*calc_checksum)(struct ixgbe_hw *);
+ int (*init_params)(struct ixgbe_hw *);
+ int (*read)(struct ixgbe_hw *, u16, u16 *);
+ int (*read_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
+ int (*write)(struct ixgbe_hw *, u16, u16);
+ int (*write_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
+ int (*validate_checksum)(struct ixgbe_hw *, u16 *);
+ int (*update_checksum)(struct ixgbe_hw *);
+ int (*calc_checksum)(struct ixgbe_hw *);
};
struct ixgbe_mac_operations {
- s32 (*init_hw)(struct ixgbe_hw *);
- s32 (*reset_hw)(struct ixgbe_hw *);
- s32 (*start_hw)(struct ixgbe_hw *);
- s32 (*clear_hw_cntrs)(struct ixgbe_hw *);
+ int (*init_hw)(struct ixgbe_hw *);
+ int (*reset_hw)(struct ixgbe_hw *);
+ int (*start_hw)(struct ixgbe_hw *);
+ int (*clear_hw_cntrs)(struct ixgbe_hw *);
enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *);
- s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *);
- s32 (*get_san_mac_addr)(struct ixgbe_hw *, u8 *);
- s32 (*get_device_caps)(struct ixgbe_hw *, u16 *);
- s32 (*get_wwn_prefix)(struct ixgbe_hw *, u16 *, u16 *);
- s32 (*stop_adapter)(struct ixgbe_hw *);
- s32 (*get_bus_info)(struct ixgbe_hw *);
+ int (*get_mac_addr)(struct ixgbe_hw *, u8 *);
+ int (*get_san_mac_addr)(struct ixgbe_hw *, u8 *);
+ int (*get_device_caps)(struct ixgbe_hw *, u16 *);
+ int (*get_wwn_prefix)(struct ixgbe_hw *, u16 *, u16 *);
+ int (*stop_adapter)(struct ixgbe_hw *);
+ int (*get_bus_info)(struct ixgbe_hw *);
void (*set_lan_id)(struct ixgbe_hw *);
- s32 (*read_analog_reg8)(struct ixgbe_hw*, u32, u8*);
- s32 (*write_analog_reg8)(struct ixgbe_hw*, u32, u8);
- s32 (*setup_sfp)(struct ixgbe_hw *);
- s32 (*disable_rx_buff)(struct ixgbe_hw *);
- s32 (*enable_rx_buff)(struct ixgbe_hw *);
- s32 (*enable_rx_dma)(struct ixgbe_hw *, u32);
- s32 (*acquire_swfw_sync)(struct ixgbe_hw *, u32);
+ int (*read_analog_reg8)(struct ixgbe_hw*, u32, u8*);
+ int (*write_analog_reg8)(struct ixgbe_hw*, u32, u8);
+ int (*setup_sfp)(struct ixgbe_hw *);
+ int (*disable_rx_buff)(struct ixgbe_hw *);
+ int (*enable_rx_buff)(struct ixgbe_hw *);
+ int (*enable_rx_dma)(struct ixgbe_hw *, u32);
+ int (*acquire_swfw_sync)(struct ixgbe_hw *, u32);
void (*release_swfw_sync)(struct ixgbe_hw *, u32);
void (*init_swfw_sync)(struct ixgbe_hw *);
- s32 (*prot_autoc_read)(struct ixgbe_hw *, bool *, u32 *);
- s32 (*prot_autoc_write)(struct ixgbe_hw *, u32, bool);
+ int (*prot_autoc_read)(struct ixgbe_hw *, bool *, u32 *);
+ int (*prot_autoc_write)(struct ixgbe_hw *, u32, bool);
/* Link */
void (*disable_tx_laser)(struct ixgbe_hw *);
void (*enable_tx_laser)(struct ixgbe_hw *);
void (*flap_tx_laser)(struct ixgbe_hw *);
void (*stop_link_on_d3)(struct ixgbe_hw *);
- s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool);
- s32 (*setup_mac_link)(struct ixgbe_hw *, ixgbe_link_speed, bool);
- s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool);
- s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *,
+ int (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool);
+ int (*setup_mac_link)(struct ixgbe_hw *, ixgbe_link_speed, bool);
+ int (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool);
+ int (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *,
bool *);
void (*set_rate_select_speed)(struct ixgbe_hw *, ixgbe_link_speed);
@@ -3444,38 +3447,38 @@ struct ixgbe_mac_operations {
void (*set_rxpba)(struct ixgbe_hw *, int, u32, int);
/* LED */
- s32 (*led_on)(struct ixgbe_hw *, u32);
- s32 (*led_off)(struct ixgbe_hw *, u32);
- s32 (*blink_led_start)(struct ixgbe_hw *, u32);
- s32 (*blink_led_stop)(struct ixgbe_hw *, u32);
- s32 (*init_led_link_act)(struct ixgbe_hw *);
+ int (*led_on)(struct ixgbe_hw *, u32);
+ int (*led_off)(struct ixgbe_hw *, u32);
+ int (*blink_led_start)(struct ixgbe_hw *, u32);
+ int (*blink_led_stop)(struct ixgbe_hw *, u32);
+ int (*init_led_link_act)(struct ixgbe_hw *);
/* RAR, Multicast, VLAN */
- s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32);
- s32 (*clear_rar)(struct ixgbe_hw *, u32);
- s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32);
- s32 (*set_vmdq_san_mac)(struct ixgbe_hw *, u32);
- s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32);
- s32 (*init_rx_addrs)(struct ixgbe_hw *);
- s32 (*update_mc_addr_list)(struct ixgbe_hw *, struct net_device *);
- s32 (*enable_mc)(struct ixgbe_hw *);
- s32 (*disable_mc)(struct ixgbe_hw *);
- s32 (*clear_vfta)(struct ixgbe_hw *);
- s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool, bool);
- s32 (*init_uta_tables)(struct ixgbe_hw *);
+ int (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32);
+ int (*clear_rar)(struct ixgbe_hw *, u32);
+ int (*set_vmdq)(struct ixgbe_hw *, u32, u32);
+ int (*set_vmdq_san_mac)(struct ixgbe_hw *, u32);
+ int (*clear_vmdq)(struct ixgbe_hw *, u32, u32);
+ int (*init_rx_addrs)(struct ixgbe_hw *);
+ int (*update_mc_addr_list)(struct ixgbe_hw *, struct net_device *);
+ int (*enable_mc)(struct ixgbe_hw *);
+ int (*disable_mc)(struct ixgbe_hw *);
+ int (*clear_vfta)(struct ixgbe_hw *);
+ int (*set_vfta)(struct ixgbe_hw *, u32, u32, bool, bool);
+ int (*init_uta_tables)(struct ixgbe_hw *);
void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int);
void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int);
/* Flow Control */
- s32 (*fc_enable)(struct ixgbe_hw *);
- s32 (*setup_fc)(struct ixgbe_hw *);
+ int (*fc_enable)(struct ixgbe_hw *);
+ int (*setup_fc)(struct ixgbe_hw *);
void (*fc_autoneg)(struct ixgbe_hw *);
/* Manageability interface */
- s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8, u16,
+ int (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8, u16,
const char *);
- s32 (*get_thermal_sensor_data)(struct ixgbe_hw *);
- s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw);
+ int (*get_thermal_sensor_data)(struct ixgbe_hw *);
+ int (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw);
bool (*fw_recovery_mode)(struct ixgbe_hw *hw);
void (*disable_rx)(struct ixgbe_hw *hw);
void (*enable_rx)(struct ixgbe_hw *hw);
@@ -3484,47 +3487,47 @@ struct ixgbe_mac_operations {
void (*set_ethertype_anti_spoofing)(struct ixgbe_hw *, bool, int);
/* DMA Coalescing */
- s32 (*dmac_config)(struct ixgbe_hw *hw);
- s32 (*dmac_update_tcs)(struct ixgbe_hw *hw);
- s32 (*dmac_config_tcs)(struct ixgbe_hw *hw);
- s32 (*read_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32 *);
- s32 (*write_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32);
+ int (*dmac_config)(struct ixgbe_hw *hw);
+ int (*dmac_update_tcs)(struct ixgbe_hw *hw);
+ int (*dmac_config_tcs)(struct ixgbe_hw *hw);
+ int (*read_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32 *);
+ int (*write_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32);
};
struct ixgbe_phy_operations {
- s32 (*identify)(struct ixgbe_hw *);
- s32 (*identify_sfp)(struct ixgbe_hw *);
- s32 (*init)(struct ixgbe_hw *);
- s32 (*reset)(struct ixgbe_hw *);
- s32 (*read_reg)(struct ixgbe_hw *, u32, u32, u16 *);
- s32 (*write_reg)(struct ixgbe_hw *, u32, u32, u16);
- s32 (*read_reg_mdi)(struct ixgbe_hw *, u32, u32, u16 *);
- s32 (*write_reg_mdi)(struct ixgbe_hw *, u32, u32, u16);
- s32 (*setup_link)(struct ixgbe_hw *);
- s32 (*setup_internal_link)(struct ixgbe_hw *);
- s32 (*setup_link_speed)(struct ixgbe_hw *, ixgbe_link_speed, bool);
- s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *);
- s32 (*read_i2c_byte)(struct ixgbe_hw *, u8, u8, u8 *);
- s32 (*write_i2c_byte)(struct ixgbe_hw *, u8, u8, u8);
- s32 (*read_i2c_sff8472)(struct ixgbe_hw *, u8 , u8 *);
- s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *);
- s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8);
+ int (*identify)(struct ixgbe_hw *);
+ int (*identify_sfp)(struct ixgbe_hw *);
+ int (*init)(struct ixgbe_hw *);
+ int (*reset)(struct ixgbe_hw *);
+ int (*read_reg)(struct ixgbe_hw *, u32, u32, u16 *);
+ int (*write_reg)(struct ixgbe_hw *, u32, u32, u16);
+ int (*read_reg_mdi)(struct ixgbe_hw *, u32, u32, u16 *);
+ int (*write_reg_mdi)(struct ixgbe_hw *, u32, u32, u16);
+ int (*setup_link)(struct ixgbe_hw *);
+ int (*setup_internal_link)(struct ixgbe_hw *);
+ int (*setup_link_speed)(struct ixgbe_hw *, ixgbe_link_speed, bool);
+ int (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *);
+ int (*read_i2c_byte)(struct ixgbe_hw *, u8, u8, u8 *);
+ int (*write_i2c_byte)(struct ixgbe_hw *, u8, u8, u8);
+ int (*read_i2c_sff8472)(struct ixgbe_hw *, u8, u8 *);
+ int (*read_i2c_eeprom)(struct ixgbe_hw *, u8, u8 *);
+ int (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8);
bool (*check_overtemp)(struct ixgbe_hw *);
- s32 (*set_phy_power)(struct ixgbe_hw *, bool on);
- s32 (*enter_lplu)(struct ixgbe_hw *);
- s32 (*handle_lasi)(struct ixgbe_hw *hw, bool *);
- s32 (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
+ int (*set_phy_power)(struct ixgbe_hw *, bool on);
+ int (*enter_lplu)(struct ixgbe_hw *);
+ int (*handle_lasi)(struct ixgbe_hw *hw, bool *);
+ int (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
u8 *value);
- s32 (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
+ int (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
u8 value);
};
struct ixgbe_link_operations {
- s32 (*read_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val);
- s32 (*read_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
+ int (*read_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val);
+ int (*read_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
u16 *val);
- s32 (*write_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val);
- s32 (*write_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
+ int (*write_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val);
+ int (*write_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
u16 val);
};
@@ -3602,14 +3605,14 @@ struct ixgbe_phy_info {
#include "ixgbe_mbx.h"
struct ixgbe_mbx_operations {
- s32 (*init_params)(struct ixgbe_hw *hw);
- s32 (*read)(struct ixgbe_hw *, u32 *, u16, u16);
- s32 (*write)(struct ixgbe_hw *, u32 *, u16, u16);
- s32 (*read_posted)(struct ixgbe_hw *, u32 *, u16, u16);
- s32 (*write_posted)(struct ixgbe_hw *, u32 *, u16, u16);
- s32 (*check_for_msg)(struct ixgbe_hw *, u16);
- s32 (*check_for_ack)(struct ixgbe_hw *, u16);
- s32 (*check_for_rst)(struct ixgbe_hw *, u16);
+ int (*init_params)(struct ixgbe_hw *hw);
+ int (*read)(struct ixgbe_hw *, u32 *, u16, u16);
+ int (*write)(struct ixgbe_hw *, u32 *, u16, u16);
+ int (*read_posted)(struct ixgbe_hw *, u32 *, u16, u16);
+ int (*write_posted)(struct ixgbe_hw *, u32 *, u16, u16);
+ int (*check_for_msg)(struct ixgbe_hw *, u16);
+ int (*check_for_ack)(struct ixgbe_hw *, u16);
+ int (*check_for_rst)(struct ixgbe_hw *, u16);
};
struct ixgbe_mbx_stats {
@@ -3656,7 +3659,7 @@ struct ixgbe_hw {
struct ixgbe_info {
enum ixgbe_mac_type mac;
- s32 (*get_invariants)(struct ixgbe_hw *);
+ int (*get_invariants)(struct ixgbe_hw *);
const struct ixgbe_mac_operations *mac_ops;
const struct ixgbe_eeprom_operations *eeprom_ops;
const struct ixgbe_phy_operations *phy_ops;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index 57a912e4653f..f1ffa398f6df 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -16,9 +16,9 @@
#define IXGBE_X540_VFT_TBL_SIZE 128
#define IXGBE_X540_RX_PB_SIZE 384
-static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw);
-static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw);
-static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw);
+static int ixgbe_update_flash_X540(struct ixgbe_hw *hw);
+static int ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw);
+static int ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw);
static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw);
enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw)
@@ -26,7 +26,7 @@ enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw)
return ixgbe_media_type_copper;
}
-s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw)
+int ixgbe_get_invariants_X540(struct ixgbe_hw *hw)
{
struct ixgbe_mac_info *mac = &hw->mac;
struct ixgbe_phy_info *phy = &hw->phy;
@@ -51,7 +51,7 @@ s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw)
* @speed: new link speed
* @autoneg_wait_to_complete: true when waiting for completion is needed
**/
-s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+int ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
bool autoneg_wait_to_complete)
{
return hw->phy.ops.setup_link_speed(hw, speed,
@@ -66,11 +66,11 @@ s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
* and clears all interrupts, perform a PHY reset, and perform a link (MAC)
* reset.
**/
-s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw)
+int ixgbe_reset_hw_X540(struct ixgbe_hw *hw)
{
- s32 status;
- u32 ctrl, i;
u32 swfw_mask = hw->phy.phy_semaphore_mask;
+ u32 ctrl, i;
+ int status;
/* Call adapter stop to disable tx/rx and clear interrupts */
status = hw->mac.ops.stop_adapter(hw);
@@ -166,9 +166,9 @@ mac_reset_top:
* and the generation start_hw function.
* Then performs revision-specific operations, if any.
**/
-s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw)
+int ixgbe_start_hw_X540(struct ixgbe_hw *hw)
{
- s32 ret_val;
+ int ret_val;
ret_val = ixgbe_start_hw_generic(hw);
if (ret_val)
@@ -184,7 +184,7 @@ s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw)
* Initializes the EEPROM parameters ixgbe_eeprom_info within the
* ixgbe_hw struct in order to set up EEPROM access.
**/
-s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw)
+int ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw)
{
struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
@@ -215,9 +215,9 @@ s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw)
*
* Reads a 16 bit word from the EEPROM using the EERD register.
**/
-static s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data)
+static int ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data)
{
- s32 status;
+ int status;
if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM))
return -EBUSY;
@@ -237,10 +237,10 @@ static s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data)
*
* Reads a 16 bit word(s) from the EEPROM using the EERD register.
**/
-static s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw,
+static int ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw,
u16 offset, u16 words, u16 *data)
{
- s32 status;
+ int status;
if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM))
return -EBUSY;
@@ -259,9 +259,9 @@ static s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw,
*
* Write a 16 bit word to the EEPROM using the EEWR register.
**/
-static s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data)
+static int ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data)
{
- s32 status;
+ int status;
if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM))
return -EBUSY;
@@ -281,10 +281,10 @@ static s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data)
*
* Write a 16 bit word(s) to the EEPROM using the EEWR register.
**/
-static s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw,
+static int ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw,
u16 offset, u16 words, u16 *data)
{
- s32 status;
+ int status;
if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM))
return -EBUSY;
@@ -303,7 +303,7 @@ static s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw,
*
* @hw: pointer to hardware structure
**/
-static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw)
+static int ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw)
{
u16 i;
u16 j;
@@ -368,7 +368,7 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw)
checksum = (u16)IXGBE_EEPROM_SUM - checksum;
- return (s32)checksum;
+ return (int)checksum;
}
/**
@@ -379,12 +379,12 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw)
* Performs checksum calculation and validates the EEPROM checksum. If the
* caller does not need checksum_val, the value can be NULL.
**/
-static s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw,
+static int ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw,
u16 *checksum_val)
{
- s32 status;
- u16 checksum;
u16 read_checksum = 0;
+ u16 checksum;
+ int status;
/* Read the first word from the EEPROM. If this times out or fails, do
* not continue or we could be in for a very long wait while every
@@ -439,10 +439,10 @@ out:
* checksum and updates the EEPROM and instructs the hardware to update
* the flash.
**/
-static s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw)
+static int ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw)
{
- s32 status;
u16 checksum;
+ int status;
/* Read the first word from the EEPROM. If this times out or fails, do
* not continue or we could be in for a very long wait while every
@@ -484,10 +484,10 @@ out:
* Set FLUP (bit 23) of the EEC register to instruct Hardware to copy
* EEPROM from shadow RAM to the flash device.
**/
-static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw)
+static int ixgbe_update_flash_X540(struct ixgbe_hw *hw)
{
+ int status;
u32 flup;
- s32 status;
status = ixgbe_poll_flash_update_done_X540(hw);
if (status == -EIO) {
@@ -529,7 +529,7 @@ static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw)
* Polls the FLUDONE (bit 26) of the EEC Register to determine when the
* flash update is done.
**/
-static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw)
+static int ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw)
{
u32 i;
u32 reg;
@@ -551,7 +551,7 @@ static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw)
* Acquires the SWFW semaphore thought the SW_FW_SYNC register for
* the specified function (CSR, PHY0, PHY1, NVM, Flash)
**/
-s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
+int ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
{
u32 swmask = mask & IXGBE_GSSR_NVM_PHY_MASK;
u32 swi2c_mask = mask & IXGBE_GSSR_I2C_MASK;
@@ -660,7 +660,7 @@ void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
*
* Sets the hardware semaphores so SW/FW can gain control of shared resources
*/
-static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw)
+static int ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw)
{
u32 timeout = 2000;
u32 i;
@@ -760,7 +760,7 @@ void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw)
* Devices that implement the version 2 interface:
* X540
**/
-s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index)
+int ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index)
{
u32 macc_reg;
u32 ledctl_reg;
@@ -798,7 +798,7 @@ s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index)
* Devices that implement the version 2 interface:
* X540
**/
-s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index)
+int ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index)
{
u32 macc_reg;
u32 ledctl_reg;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
index e246c0d2a427..b69a680d3ab5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
@@ -3,17 +3,17 @@
#include "ixgbe_type.h"
-s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw);
-s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+int ixgbe_get_invariants_X540(struct ixgbe_hw *hw);
+int ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
bool autoneg_wait_to_complete);
-s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw);
-s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw);
+int ixgbe_reset_hw_X540(struct ixgbe_hw *hw);
+int ixgbe_start_hw_X540(struct ixgbe_hw *hw);
enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw);
-s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+int ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
bool autoneg_wait_to_complete);
-s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask);
+int ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index);
+int ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index);
+int ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask);
void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask);
void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw);
-s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw);
+int ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 6208923e29a2..2decb0710b6e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -6,13 +6,13 @@
#include "ixgbe_common.h"
#include "ixgbe_phy.h"
-static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed);
-static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *);
+static int ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed);
+static int ixgbe_setup_fc_x550em(struct ixgbe_hw *);
static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *);
static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *);
-static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *);
+static int ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *);
-static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw)
+static int ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw)
{
struct ixgbe_mac_info *mac = &hw->mac;
struct ixgbe_phy_info *phy = &hw->phy;
@@ -29,7 +29,7 @@ static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw)
return 0;
}
-static s32 ixgbe_get_invariants_X550_x_fw(struct ixgbe_hw *hw)
+static int ixgbe_get_invariants_X550_x_fw(struct ixgbe_hw *hw)
{
struct ixgbe_phy_info *phy = &hw->phy;
@@ -41,7 +41,7 @@ static s32 ixgbe_get_invariants_X550_x_fw(struct ixgbe_hw *hw)
return 0;
}
-static s32 ixgbe_get_invariants_X550_a(struct ixgbe_hw *hw)
+static int ixgbe_get_invariants_X550_a(struct ixgbe_hw *hw)
{
struct ixgbe_mac_info *mac = &hw->mac;
struct ixgbe_phy_info *phy = &hw->phy;
@@ -55,7 +55,7 @@ static s32 ixgbe_get_invariants_X550_a(struct ixgbe_hw *hw)
return 0;
}
-static s32 ixgbe_get_invariants_X550_a_fw(struct ixgbe_hw *hw)
+static int ixgbe_get_invariants_X550_a_fw(struct ixgbe_hw *hw)
{
struct ixgbe_phy_info *phy = &hw->phy;
@@ -91,7 +91,7 @@ static void ixgbe_setup_mux_ctl(struct ixgbe_hw *hw)
*
* Returns status code
*/
-static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value)
+static int ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value)
{
return hw->link.ops.read_link_unlocked(hw, hw->link.addr, reg, value);
}
@@ -104,7 +104,7 @@ static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value)
*
* Returns status code
*/
-static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value)
+static int ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value)
{
return hw->link.ops.write_link_unlocked(hw, hw->link.addr, reg, value);
}
@@ -117,9 +117,9 @@ static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value)
*
* Returns status code
*/
-static s32 ixgbe_read_pe(struct ixgbe_hw *hw, u8 reg, u8 *value)
+static int ixgbe_read_pe(struct ixgbe_hw *hw, u8 reg, u8 *value)
{
- s32 status;
+ int status;
status = ixgbe_read_i2c_byte_generic_unlocked(hw, reg, IXGBE_PE, value);
if (status)
@@ -135,9 +135,9 @@ static s32 ixgbe_read_pe(struct ixgbe_hw *hw, u8 reg, u8 *value)
*
* Returns status code
*/
-static s32 ixgbe_write_pe(struct ixgbe_hw *hw, u8 reg, u8 value)
+static int ixgbe_write_pe(struct ixgbe_hw *hw, u8 reg, u8 value)
{
- s32 status;
+ int status;
status = ixgbe_write_i2c_byte_generic_unlocked(hw, reg, IXGBE_PE,
value);
@@ -153,9 +153,9 @@ static s32 ixgbe_write_pe(struct ixgbe_hw *hw, u8 reg, u8 value)
* This function assumes that the caller has acquired the proper semaphore.
* Returns error code
*/
-static s32 ixgbe_reset_cs4227(struct ixgbe_hw *hw)
+static int ixgbe_reset_cs4227(struct ixgbe_hw *hw)
{
- s32 status;
+ int status;
u32 retry;
u16 value;
u8 reg;
@@ -225,7 +225,7 @@ static s32 ixgbe_reset_cs4227(struct ixgbe_hw *hw)
static void ixgbe_check_cs4227(struct ixgbe_hw *hw)
{
u32 swfw_mask = hw->phy.phy_semaphore_mask;
- s32 status;
+ int status;
u16 value;
u8 retry;
@@ -292,7 +292,7 @@ out:
*
* Returns error code
*/
-static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
+static int ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
{
switch (hw->device_id) {
case IXGBE_DEV_ID_X550EM_A_SFP:
@@ -347,13 +347,13 @@ static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
return 0;
}
-static s32 ixgbe_read_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
+static int ixgbe_read_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u16 *phy_data)
{
return -EOPNOTSUPP;
}
-static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
+static int ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u16 phy_data)
{
return -EOPNOTSUPP;
@@ -368,7 +368,7 @@ static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
*
* Returns an error code on error.
**/
-static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
+static int ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
u16 reg, u16 *val)
{
return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, true);
@@ -383,7 +383,7 @@ static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
*
* Returns an error code on error.
**/
-static s32
+static int
ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
u16 reg, u16 *val)
{
@@ -399,7 +399,7 @@ ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
*
* Returns an error code on error.
**/
-static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
+static int ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
u8 addr, u16 reg, u16 val)
{
return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, true);
@@ -414,7 +414,7 @@ static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
*
* Returns an error code on error.
**/
-static s32
+static int
ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw,
u8 addr, u16 reg, u16 val)
{
@@ -427,7 +427,7 @@ ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw,
* @activity: activity to perform
* @data: Pointer to 4 32-bit words of data
*/
-s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity,
+int ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity,
u32 (*data)[FW_PHY_ACT_DATA_COUNT])
{
union {
@@ -435,7 +435,7 @@ s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity,
struct ixgbe_hic_phy_activity_resp rsp;
} hic;
u16 retries = FW_PHY_ACT_RETRIES;
- s32 rc;
+ int rc;
u32 i;
do {
@@ -484,12 +484,12 @@ static const struct {
*
* Returns error code
*/
-static s32 ixgbe_get_phy_id_fw(struct ixgbe_hw *hw)
+static int ixgbe_get_phy_id_fw(struct ixgbe_hw *hw)
{
u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 };
u16 phy_speeds;
u16 phy_id_lo;
- s32 rc;
+ int rc;
u16 i;
if (hw->phy.id)
@@ -526,7 +526,7 @@ static s32 ixgbe_get_phy_id_fw(struct ixgbe_hw *hw)
*
* Returns error code
*/
-static s32 ixgbe_identify_phy_fw(struct ixgbe_hw *hw)
+static int ixgbe_identify_phy_fw(struct ixgbe_hw *hw)
{
if (hw->bus.lan_id)
hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM;
@@ -545,7 +545,7 @@ static s32 ixgbe_identify_phy_fw(struct ixgbe_hw *hw)
*
* Returns error code
*/
-static s32 ixgbe_shutdown_fw_phy(struct ixgbe_hw *hw)
+static int ixgbe_shutdown_fw_phy(struct ixgbe_hw *hw)
{
u32 setup[FW_PHY_ACT_DATA_COUNT] = { 0 };
@@ -557,10 +557,10 @@ static s32 ixgbe_shutdown_fw_phy(struct ixgbe_hw *hw)
* ixgbe_setup_fw_link - Setup firmware-controlled PHYs
* @hw: pointer to hardware structure
*/
-static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw)
+static int ixgbe_setup_fw_link(struct ixgbe_hw *hw)
{
u32 setup[FW_PHY_ACT_DATA_COUNT] = { 0 };
- s32 rc;
+ int rc;
u16 i;
if (hw->phy.reset_disable || ixgbe_check_reset_blocked(hw))
@@ -613,7 +613,7 @@ static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw)
*
* Called at init time to set up flow control.
*/
-static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw)
+static int ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw)
{
if (hw->fc.requested_mode == ixgbe_fc_default)
hw->fc.requested_mode = ixgbe_fc_full;
@@ -627,7 +627,7 @@ static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw)
* Initializes the EEPROM parameters ixgbe_eeprom_info within the
* ixgbe_hw struct in order to set up EEPROM access.
**/
-static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw)
+static int ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw)
{
struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
@@ -659,7 +659,7 @@ static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw)
*
* Note: ctrl can be NULL if the IOSF control register value is not needed
*/
-static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl)
+static int ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl)
{
u32 i, command;
@@ -690,12 +690,12 @@ static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl)
* @device_type: 3 bit device type
* @phy_data: Pointer to read data from the register
**/
-static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+static int ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u32 *data)
{
u32 gssr = IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_PHY0_SM;
u32 command, error;
- s32 ret;
+ int ret;
ret = hw->mac.ops.acquire_swfw_sync(hw, gssr);
if (ret)
@@ -716,7 +716,8 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command);
hw_dbg(hw, "Failed to read, error %x\n", error);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
if (!ret)
@@ -731,10 +732,10 @@ out:
* ixgbe_get_phy_token - Get the token for shared PHY access
* @hw: Pointer to hardware structure
*/
-static s32 ixgbe_get_phy_token(struct ixgbe_hw *hw)
+static int ixgbe_get_phy_token(struct ixgbe_hw *hw)
{
struct ixgbe_hic_phy_token_req token_cmd;
- s32 status;
+ int status;
token_cmd.hdr.cmd = FW_PHY_TOKEN_REQ_CMD;
token_cmd.hdr.buf_len = FW_PHY_TOKEN_REQ_LEN;
@@ -760,10 +761,10 @@ static s32 ixgbe_get_phy_token(struct ixgbe_hw *hw)
* ixgbe_put_phy_token - Put the token for shared PHY access
* @hw: Pointer to hardware structure
*/
-static s32 ixgbe_put_phy_token(struct ixgbe_hw *hw)
+static int ixgbe_put_phy_token(struct ixgbe_hw *hw)
{
struct ixgbe_hic_phy_token_req token_cmd;
- s32 status;
+ int status;
token_cmd.hdr.cmd = FW_PHY_TOKEN_REQ_CMD;
token_cmd.hdr.buf_len = FW_PHY_TOKEN_REQ_LEN;
@@ -789,7 +790,7 @@ static s32 ixgbe_put_phy_token(struct ixgbe_hw *hw)
* @device_type: 3 bit device type
* @data: Data to write to the register
**/
-static s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
+static int ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
__always_unused u32 device_type,
u32 data)
{
@@ -815,7 +816,7 @@ static s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
* @device_type: 3 bit device type
* @data: Pointer to read data from the register
**/
-static s32 ixgbe_read_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
+static int ixgbe_read_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
__always_unused u32 device_type,
u32 *data)
{
@@ -823,7 +824,7 @@ static s32 ixgbe_read_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
struct ixgbe_hic_internal_phy_req cmd;
struct ixgbe_hic_internal_phy_resp rsp;
} hic;
- s32 status;
+ int status;
memset(&hic, 0, sizeof(hic));
hic.cmd.hdr.cmd = FW_INT_PHY_REQ_CMD;
@@ -850,14 +851,14 @@ static s32 ixgbe_read_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
*
* Reads a 16 bit word(s) from the EEPROM using the hostif.
**/
-static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
+static int ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
u16 offset, u16 words, u16 *data)
{
const u32 mask = IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_EEP_SM;
struct ixgbe_hic_read_shadow_ram buffer;
u32 current_word = 0;
u16 words_to_read;
- s32 status;
+ int status;
u32 i;
/* Take semaphore for the entire operation. */
@@ -922,14 +923,14 @@ out:
*
* Returns error status for any failure
**/
-static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr,
+static int ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr,
u16 size, u16 *csum, u16 *buffer,
u32 buffer_size)
{
- u16 buf[256];
- s32 status;
u16 length, bufsz, i, start;
u16 *local_buffer;
+ u16 buf[256];
+ int status;
bufsz = ARRAY_SIZE(buf);
@@ -990,14 +991,14 @@ static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr,
*
* Returns a negative error code on error, or the 16-bit checksum
**/
-static s32 ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer,
+static int ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer,
u32 buffer_size)
{
u16 eeprom_ptrs[IXGBE_EEPROM_LAST_WORD + 1];
+ u16 pointer, i, size;
u16 *local_buffer;
- s32 status;
u16 checksum = 0;
- u16 pointer, i, size;
+ int status;
hw->eeprom.ops.init_params(hw);
@@ -1059,7 +1060,7 @@ static s32 ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer,
checksum = (u16)IXGBE_EEPROM_SUM - checksum;
- return (s32)checksum;
+ return (int)checksum;
}
/** ixgbe_calc_eeprom_checksum_X550 - Calculates and returns the checksum
@@ -1067,7 +1068,7 @@ static s32 ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer,
*
* Returns a negative error code on error, or the 16-bit checksum
**/
-static s32 ixgbe_calc_eeprom_checksum_X550(struct ixgbe_hw *hw)
+static int ixgbe_calc_eeprom_checksum_X550(struct ixgbe_hw *hw)
{
return ixgbe_calc_checksum_X550(hw, NULL, 0);
}
@@ -1079,11 +1080,11 @@ static s32 ixgbe_calc_eeprom_checksum_X550(struct ixgbe_hw *hw)
*
* Reads a 16 bit word from the EEPROM using the hostif.
**/
-static s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 *data)
+static int ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 *data)
{
const u32 mask = IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_EEP_SM;
struct ixgbe_hic_read_shadow_ram buffer;
- s32 status;
+ int status;
buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD;
buffer.hdr.req.buf_lenh = 0;
@@ -1117,12 +1118,12 @@ static s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 *data)
* Performs checksum calculation and validates the EEPROM checksum. If the
* caller does not need checksum_val, the value can be NULL.
**/
-static s32 ixgbe_validate_eeprom_checksum_X550(struct ixgbe_hw *hw,
+static int ixgbe_validate_eeprom_checksum_X550(struct ixgbe_hw *hw,
u16 *checksum_val)
{
- s32 status;
- u16 checksum;
u16 read_checksum = 0;
+ u16 checksum;
+ int status;
/* Read the first word from the EEPROM. If this times out or fails, do
* not continue or we could be in for a very long wait while every
@@ -1167,11 +1168,11 @@ static s32 ixgbe_validate_eeprom_checksum_X550(struct ixgbe_hw *hw,
*
* Write a 16 bit word to the EEPROM using the hostif.
**/
-static s32 ixgbe_write_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
+static int ixgbe_write_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
u16 data)
{
- s32 status;
struct ixgbe_hic_write_shadow_ram buffer;
+ int status;
buffer.hdr.req.cmd = FW_WRITE_SHADOW_RAM_CMD;
buffer.hdr.req.buf_lenh = 0;
@@ -1195,9 +1196,9 @@ static s32 ixgbe_write_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
*
* Write a 16 bit word to the EEPROM using the hostif.
**/
-static s32 ixgbe_write_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 data)
+static int ixgbe_write_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 data)
{
- s32 status = 0;
+ int status = 0;
if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == 0) {
status = ixgbe_write_ee_hostif_data_X550(hw, offset, data);
@@ -1215,10 +1216,10 @@ static s32 ixgbe_write_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 data)
*
* Issue a shadow RAM dump to FW to copy EEPROM from shadow RAM to the flash.
**/
-static s32 ixgbe_update_flash_X550(struct ixgbe_hw *hw)
+static int ixgbe_update_flash_X550(struct ixgbe_hw *hw)
{
- s32 status = 0;
union ixgbe_hic_hdr2 buffer;
+ int status = 0;
buffer.req.cmd = FW_SHADOW_RAM_DUMP_CMD;
buffer.req.buf_lenh = 0;
@@ -1237,7 +1238,7 @@ static s32 ixgbe_update_flash_X550(struct ixgbe_hw *hw)
* Sets bus link width and speed to unknown because X550em is
* not a PCI device.
**/
-static s32 ixgbe_get_bus_info_X550em(struct ixgbe_hw *hw)
+static int ixgbe_get_bus_info_X550em(struct ixgbe_hw *hw)
{
hw->bus.type = ixgbe_bus_type_internal;
hw->bus.width = ixgbe_bus_width_unknown;
@@ -1268,9 +1269,9 @@ static bool ixgbe_fw_recovery_mode_X550(struct ixgbe_hw *hw)
**/
static void ixgbe_disable_rx_x550(struct ixgbe_hw *hw)
{
- u32 rxctrl, pfdtxgswc;
- s32 status;
struct ixgbe_hic_disable_rxen fw_cmd;
+ u32 rxctrl, pfdtxgswc;
+ int status;
rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
if (rxctrl & IXGBE_RXCTRL_RXEN) {
@@ -1310,10 +1311,10 @@ static void ixgbe_disable_rx_x550(struct ixgbe_hw *hw)
* checksum and updates the EEPROM and instructs the hardware to update
* the flash.
**/
-static s32 ixgbe_update_eeprom_checksum_X550(struct ixgbe_hw *hw)
+static int ixgbe_update_eeprom_checksum_X550(struct ixgbe_hw *hw)
{
- s32 status;
u16 checksum = 0;
+ int status;
/* Read the first word from the EEPROM. If this times out or fails, do
* not continue or we could be in for a very long wait while every
@@ -1350,11 +1351,11 @@ static s32 ixgbe_update_eeprom_checksum_X550(struct ixgbe_hw *hw)
*
* Write a 16 bit word(s) to the EEPROM using the hostif.
**/
-static s32 ixgbe_write_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
+static int ixgbe_write_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
u16 offset, u16 words,
u16 *data)
{
- s32 status = 0;
+ int status = 0;
u32 i = 0;
/* Take semaphore for the entire operation. */
@@ -1386,12 +1387,12 @@ static s32 ixgbe_write_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
* @device_type: 3 bit device type
* @data: Data to write to the register
**/
-static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+static int ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u32 data)
{
u32 gssr = IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_PHY0_SM;
u32 command, error;
- s32 ret;
+ int ret;
ret = hw->mac.ops.acquire_swfw_sync(hw, gssr);
if (ret)
@@ -1429,10 +1430,10 @@ out:
*
* iXfI configuration needed for ixgbe_mac_X550EM_x devices.
**/
-static s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
+static int ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
{
- s32 status;
u32 reg_val;
+ int status;
/* Disable training protocol FSM. */
status = ixgbe_read_iosf_sb_reg_x550(hw,
@@ -1501,10 +1502,10 @@ static s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
* internal PHY
* @hw: pointer to hardware structure
**/
-static s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw)
+static int ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw)
{
- s32 status;
u32 link_ctrl;
+ int status;
/* Restart auto-negotiation. */
status = hw->mac.ops.read_iosf_sb_reg(hw,
@@ -1550,11 +1551,11 @@ static s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw)
* Configures the integrated KR PHY to use iXFI mode. Used to connect an
* internal and external PHY at a specific speed, without autonegotiation.
**/
-static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+static int ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
{
struct ixgbe_mac_info *mac = &hw->mac;
- s32 status;
u32 reg_val;
+ int status;
/* iXFI is only supported with X552 */
if (mac->type != ixgbe_mac_X550EM_x)
@@ -1607,7 +1608,7 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
* @hw: pointer to hardware structure
* @linear: true if SFP module is linear
*/
-static s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear)
+static int ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear)
{
switch (hw->phy.sfp_type) {
case ixgbe_sfp_type_not_present:
@@ -1644,14 +1645,14 @@ static s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear)
*
* Configures the extern PHY and the integrated KR PHY for SFP support.
*/
-static s32
+static int
ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
__always_unused bool autoneg_wait_to_complete)
{
- s32 status;
- u16 reg_slice, reg_val;
bool setup_linear = false;
+ u16 reg_slice, reg_val;
+ int status;
/* Check if SFP module is supported and linear */
status = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
@@ -1690,11 +1691,11 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
* Configures the integrated PHY for native SFI mode. Used to connect the
* internal PHY directly to an SFP cage, without autonegotiation.
**/
-static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+static int ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
{
struct ixgbe_mac_info *mac = &hw->mac;
- s32 status;
u32 reg_val;
+ int status;
/* Disable all AN and force speed to 10G Serial. */
status = mac->ops.read_iosf_sb_reg(hw,
@@ -1789,13 +1790,13 @@ static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
*
* Configure the integrated PHY for native SFP support.
*/
-static s32
+static int
ixgbe_setup_mac_link_sfp_n(struct ixgbe_hw *hw, ixgbe_link_speed speed,
__always_unused bool autoneg_wait_to_complete)
{
bool setup_linear = false;
u32 reg_phy_int;
- s32 ret_val;
+ int ret_val;
/* Check if SFP module is supported and linear */
ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
@@ -1838,14 +1839,14 @@ ixgbe_setup_mac_link_sfp_n(struct ixgbe_hw *hw, ixgbe_link_speed speed,
*
* Configure the integrated PHY for SFP support.
*/
-static s32
+static int
ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
__always_unused bool autoneg_wait_to_complete)
{
u32 reg_slice, slice_offset;
bool setup_linear = false;
u16 reg_phy_ext;
- s32 ret_val;
+ int ret_val;
/* Check if SFP module is supported and linear */
ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
@@ -1917,12 +1918,12 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
*
* Returns error status for any failure
**/
-static s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw,
+static int ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw,
ixgbe_link_speed speed,
bool autoneg_wait)
{
- s32 status;
ixgbe_link_speed force_speed;
+ int status;
/* Setup internal/external PHY link speed to iXFI (10G), unless
* only 1G is auto advertised then setup KX link.
@@ -1953,7 +1954,7 @@ static s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw,
*
* Check that both the MAC and X557 external PHY have link.
**/
-static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw,
+static int ixgbe_check_link_t_X550em(struct ixgbe_hw *hw,
ixgbe_link_speed *speed,
bool *link_up,
bool link_up_wait_to_complete)
@@ -1997,13 +1998,13 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw,
* @speed: unused
* @autoneg_wait_to_complete: unused
*/
-static s32
+static int
ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed,
__always_unused bool autoneg_wait_to_complete)
{
struct ixgbe_mac_info *mac = &hw->mac;
u32 lval, sval, flx_val;
- s32 rc;
+ int rc;
rc = mac->ops.read_iosf_sb_reg(hw,
IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
@@ -2070,12 +2071,12 @@ ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed,
* @speed: the link speed to force
* @autoneg_wait: true when waiting for completion is needed
*/
-static s32 ixgbe_setup_sgmii_fw(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+static int ixgbe_setup_sgmii_fw(struct ixgbe_hw *hw, ixgbe_link_speed speed,
bool autoneg_wait)
{
struct ixgbe_mac_info *mac = &hw->mac;
u32 lval, sval, flx_val;
- s32 rc;
+ int rc;
rc = mac->ops.read_iosf_sb_reg(hw,
IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
@@ -2147,7 +2148,7 @@ static void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw)
{
u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 };
ixgbe_link_speed speed;
- s32 status = -EIO;
+ int status = -EIO;
bool link_up;
/* AN should have completed when the cable was plugged in.
@@ -2275,10 +2276,10 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
/** ixgbe_setup_sfp_modules_X550em - Setup SFP module
* @hw: pointer to hardware structure
*/
-static s32 ixgbe_setup_sfp_modules_X550em(struct ixgbe_hw *hw)
+static int ixgbe_setup_sfp_modules_X550em(struct ixgbe_hw *hw)
{
- s32 status;
bool linear;
+ int status;
/* Check if SFP module is supported */
status = ixgbe_supported_sfp_modules_X550em(hw, &linear);
@@ -2296,7 +2297,7 @@ static s32 ixgbe_setup_sfp_modules_X550em(struct ixgbe_hw *hw)
* @speed: pointer to link speed
* @autoneg: true when autoneg or autotry is enabled
**/
-static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
+static int ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
ixgbe_link_speed *speed,
bool *autoneg)
{
@@ -2374,7 +2375,7 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
* Determime if external Base T PHY interrupt cause is high temperature
* failure alarm or link status change.
**/
-static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc,
+static int ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc,
bool *is_overtemp)
{
u32 status;
@@ -2462,7 +2463,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc,
*
* Returns PHY access status
**/
-static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
+static int ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
{
bool lsc, overtemp;
u32 status;
@@ -2554,7 +2555,7 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
* failure alarm then return error, else if link status change
* then setup internal/external PHY link
**/
-static s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw,
+static int ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw,
bool *is_overtemp)
{
struct ixgbe_phy_info *phy = &hw->phy;
@@ -2578,11 +2579,11 @@ static s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw,
*
* Configures the integrated KR PHY.
**/
-static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
+static int ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
ixgbe_link_speed speed)
{
- s32 status;
u32 reg_val;
+ int status;
status = hw->mac.ops.read_iosf_sb_reg(hw,
IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
@@ -2633,7 +2634,7 @@ static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
* ixgbe_setup_kr_x550em - Configure the KR PHY
* @hw: pointer to hardware structure
**/
-static s32 ixgbe_setup_kr_x550em(struct ixgbe_hw *hw)
+static int ixgbe_setup_kr_x550em(struct ixgbe_hw *hw)
{
/* leave link alone for 2.5G */
if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_2_5GB_FULL)
@@ -2651,7 +2652,7 @@ static s32 ixgbe_setup_kr_x550em(struct ixgbe_hw *hw)
*
* Returns error code if unable to get link status.
**/
-static s32 ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up)
+static int ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up)
{
u32 ret;
u16 autoneg_status;
@@ -2685,7 +2686,7 @@ static s32 ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up)
* A return of a non-zero value indicates an error, and the base driver should
* not report link up.
**/
-static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
+static int ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
{
ixgbe_link_speed force_speed;
bool link_up;
@@ -2745,9 +2746,9 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
/** ixgbe_reset_phy_t_X550em - Performs X557 PHY reset and enables LASI
* @hw: pointer to hardware structure
**/
-static s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw)
+static int ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw)
{
- s32 status;
+ int status;
status = ixgbe_reset_phy_generic(hw);
@@ -2763,7 +2764,7 @@ static s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw)
* @hw: pointer to hardware structure
* @led_idx: led number to turn on
**/
-static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
+static int ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
{
u16 phy_data;
@@ -2785,7 +2786,7 @@ static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
* @hw: pointer to hardware structure
* @led_idx: led number to turn off
**/
-static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
+static int ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
{
u16 phy_data;
@@ -2818,12 +2819,12 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
* semaphore, -EIO when command fails or -ENIVAL when incorrect
* params passed.
**/
-static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min,
+static int ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min,
u8 build, u8 sub, u16 len,
const char *driver_ver)
{
struct ixgbe_hic_drv_info2 fw_cmd;
- s32 ret_val;
+ int ret_val;
int i;
if (!len || !driver_ver || (len > sizeof(fw_cmd.driver_string)))
@@ -2865,12 +2866,12 @@ static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min,
*
* Determine lowest common link speed with link partner.
**/
-static s32 ixgbe_get_lcd_t_x550em(struct ixgbe_hw *hw,
+static int ixgbe_get_lcd_t_x550em(struct ixgbe_hw *hw,
ixgbe_link_speed *lcd_speed)
{
- u16 an_lp_status;
- s32 status;
u16 word = hw->eeprom.ctrl_word_3;
+ u16 an_lp_status;
+ int status;
*lcd_speed = IXGBE_LINK_SPEED_UNKNOWN;
@@ -2883,28 +2884,28 @@ static s32 ixgbe_get_lcd_t_x550em(struct ixgbe_hw *hw,
/* If link partner advertised 1G, return 1G */
if (an_lp_status & IXGBE_AUTO_NEG_LP_1000BASE_CAP) {
*lcd_speed = IXGBE_LINK_SPEED_1GB_FULL;
- return status;
+ return 0;
}
/* If 10G disabled for LPLU via NVM D10GMP, then return no valid LCD */
if ((hw->bus.lan_id && (word & NVM_INIT_CTRL_3_D10GMP_PORT1)) ||
(word & NVM_INIT_CTRL_3_D10GMP_PORT0))
- return status;
+ return 0;
/* Link partner not capable of lower speeds, return 10G */
*lcd_speed = IXGBE_LINK_SPEED_10GB_FULL;
- return status;
+ return 0;
}
/**
* ixgbe_setup_fc_x550em - Set up flow control
* @hw: pointer to hardware structure
*/
-static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
+static int ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
{
bool pause, asm_dir;
u32 reg_val;
- s32 rc = 0;
+ int rc = 0;
/* Validate the requested mode */
if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
@@ -2989,7 +2990,7 @@ static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw)
{
u32 link_s1, lp_an_page_low, an_cntl_1;
ixgbe_link_speed speed;
- s32 status = -EIO;
+ int status = -EIO;
bool link_up;
/* AN should have completed when the cable was plugged in.
@@ -3072,13 +3073,13 @@ static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw)
* (from D0 to non-D0). Link is required to enter LPLU so avoid resetting
* the X557 PHY immediately prior to entering LPLU.
**/
-static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
+static int ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
{
u16 an_10g_cntl_reg, autoneg_reg, speed;
- s32 status;
ixgbe_link_speed lcd_speed;
u32 save_autoneg;
bool link_up;
+ int status;
/* If blocked by MNG FW, then don't restart AN */
if (ixgbe_check_reset_blocked(hw))
@@ -3129,7 +3130,7 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
(lcd_speed == IXGBE_LINK_SPEED_1GB_FULL)) ||
((speed == IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB) &&
(lcd_speed == IXGBE_LINK_SPEED_10GB_FULL)))
- return status;
+ return 0;
/* Clear AN completed indication */
status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM,
@@ -3166,10 +3167,10 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
* ixgbe_reset_phy_fw - Reset firmware-controlled PHYs
* @hw: pointer to hardware structure
*/
-static s32 ixgbe_reset_phy_fw(struct ixgbe_hw *hw)
+static int ixgbe_reset_phy_fw(struct ixgbe_hw *hw)
{
u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 };
- s32 rc;
+ int rc;
if (hw->phy.reset_disable || ixgbe_check_reset_blocked(hw))
return 0;
@@ -3195,7 +3196,7 @@ static s32 ixgbe_reset_phy_fw(struct ixgbe_hw *hw)
static bool ixgbe_check_overtemp_fw(struct ixgbe_hw *hw)
{
u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 };
- s32 rc;
+ int rc;
rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &store);
if (rc)
@@ -3238,10 +3239,10 @@ static void ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw)
* set during init_shared_code because the PHY/SFP type was
* not known. Perform the SFP init if necessary.
**/
-static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
+static int ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
{
struct ixgbe_phy_info *phy = &hw->phy;
- s32 ret_val;
+ int ret_val;
hw->mac.ops.set_lan_id(hw);
@@ -3366,9 +3367,9 @@ static enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw)
/** ixgbe_init_ext_t_x550em - Start (unstall) the external Base T PHY.
** @hw: pointer to hardware structure
**/
-static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
+static int ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
{
- s32 status;
+ int status;
u16 reg;
status = hw->phy.ops.read_reg(hw,
@@ -3440,14 +3441,14 @@ static void ixgbe_set_mdio_speed(struct ixgbe_hw *hw)
** and clears all interrupts, perform a PHY reset, and perform a link (MAC)
** reset.
**/
-static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
+static int ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
{
+ u32 swfw_mask = hw->phy.phy_semaphore_mask;
ixgbe_link_speed link_speed;
- s32 status;
+ bool link_up = false;
u32 ctrl = 0;
+ int status;
u32 i;
- bool link_up = false;
- u32 swfw_mask = hw->phy.phy_semaphore_mask;
/* Call adapter stop to disable Tx/Rx and clear interrupts */
status = hw->mac.ops.stop_adapter(hw);
@@ -3608,10 +3609,10 @@ static void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw,
*
* Called at init time to set up flow control.
**/
-static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw)
+static int ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw)
{
- s32 status = 0;
u32 an_cntl = 0;
+ int status = 0;
/* Validate the requested mode */
if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
@@ -3713,9 +3714,9 @@ static void ixgbe_set_mux(struct ixgbe_hw *hw, u8 state)
*
* Acquires the SWFW semaphore and sets the I2C MUX
*/
-static s32 ixgbe_acquire_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask)
+static int ixgbe_acquire_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask)
{
- s32 status;
+ int status;
status = ixgbe_acquire_swfw_sync_X540(hw, mask);
if (status)
@@ -3749,11 +3750,11 @@ static void ixgbe_release_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask)
*
* Acquires the SWFW semaphore and get the shared PHY token as needed
*/
-static s32 ixgbe_acquire_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask)
+static int ixgbe_acquire_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask)
{
u32 hmask = mask & ~IXGBE_GSSR_TOKEN_SM;
int retries = FW_PHY_TOKEN_RETRIES;
- s32 status;
+ int status;
while (--retries) {
status = 0;
@@ -3806,11 +3807,11 @@ static void ixgbe_release_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask)
* Token. The PHY Token is needed since the MDIO is shared between to MAC
* instances.
*/
-static s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
+static int ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u16 *phy_data)
{
u32 mask = hw->phy.phy_semaphore_mask | IXGBE_GSSR_TOKEN_SM;
- s32 status;
+ int status;
if (hw->mac.ops.acquire_swfw_sync(hw, mask))
return -EBUSY;
@@ -3832,11 +3833,11 @@ static s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
* Writes a value to specified PHY register using the SWFW lock and PHY Token.
* The PHY Token is needed since the MDIO is shared between to MAC instances.
*/
-static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
+static int ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
u32 device_type, u16 phy_data)
{
u32 mask = hw->phy.phy_semaphore_mask | IXGBE_GSSR_TOKEN_SM;
- s32 status;
+ int status;
if (hw->mac.ops.acquire_swfw_sync(hw, mask))
return -EBUSY;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 59798bc33298..d34d715c59eb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -359,12 +359,8 @@ construct_skb:
ixgbe_xdp_ring_update_tail_locked(ring);
}
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->stats.packets += total_rx_packets;
- rx_ring->stats.bytes += total_rx_bytes;
- u64_stats_update_end(&rx_ring->syncp);
- q_vector->rx.total_packets += total_rx_packets;
- q_vector->rx.total_bytes += total_rx_bytes;
+ ixgbe_update_rx_ring_stats(rx_ring, q_vector, total_rx_packets,
+ total_rx_bytes);
if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
@@ -499,13 +495,8 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
}
tx_ring->next_to_clean = ntc;
-
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->stats.bytes += total_bytes;
- tx_ring->stats.packets += total_packets;
- u64_stats_update_end(&tx_ring->syncp);
- q_vector->tx.total_bytes += total_bytes;
- q_vector->tx.total_packets += total_packets;
+ ixgbe_update_tx_ring_stats(tx_ring, q_vector, total_packets,
+ total_bytes);
if (xsk_frames)
xsk_tx_completed(pool, xsk_frames);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index a44e4bd56142..9c960017a6de 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4413,7 +4413,7 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev,
unsigned int network_hdr_len, mac_hdr_len;
/* Make certain the headers can be described by a context descriptor */
- mac_hdr_len = skb_network_header(skb) - skb->data;
+ mac_hdr_len = skb_network_offset(skb);
if (unlikely(mac_hdr_len > IXGBEVF_MAX_MAC_HDR_LEN))
return features & ~(NETIF_F_HW_CSUM |
NETIF_F_SCTP_CRC |
diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c
index 5182fe737c37..ff54fbe41bcc 100644
--- a/drivers/net/ethernet/litex/litex_liteeth.c
+++ b/drivers/net/ethernet/litex/litex_liteeth.c
@@ -318,4 +318,5 @@ static struct platform_driver liteeth_driver = {
module_platform_driver(liteeth_driver);
MODULE_AUTHOR("Joel Stanley <joel@jms.id.au>");
+MODULE_DESCRIPTION("LiteX Liteeth Ethernet driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index 884d64114bff..837295fecd17 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -180,6 +180,7 @@ config SKY2_DEBUG
source "drivers/net/ethernet/marvell/octeontx2/Kconfig"
source "drivers/net/ethernet/marvell/octeon_ep/Kconfig"
+source "drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig"
source "drivers/net/ethernet/marvell/prestera/Kconfig"
endif # NET_VENDOR_MARVELL
diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile
index ceba4aa4f026..a399defe25fd 100644
--- a/drivers/net/ethernet/marvell/Makefile
+++ b/drivers/net/ethernet/marvell/Makefile
@@ -12,5 +12,6 @@ obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o
obj-$(CONFIG_SKGE) += skge.o
obj-$(CONFIG_SKY2) += sky2.o
obj-y += octeon_ep/
+obj-y += octeon_ep_vf/
obj-y += octeontx2/
obj-y += prestera/
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index a641b3534ca3..40a5f1431e4e 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -5097,7 +5097,7 @@ static int mvneta_ethtool_set_wol(struct net_device *dev,
}
static int mvneta_ethtool_get_eee(struct net_device *dev,
- struct ethtool_eee *eee)
+ struct ethtool_keee *eee)
{
struct mvneta_port *pp = netdev_priv(dev);
u32 lpi_ctl0;
@@ -5113,7 +5113,7 @@ static int mvneta_ethtool_get_eee(struct net_device *dev,
}
static int mvneta_ethtool_set_eee(struct net_device *dev,
- struct ethtool_eee *eee)
+ struct ethtool_keee *eee)
{
struct mvneta_port *pp = netdev_priv(dev);
u32 lpi_ctl0;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 820b1fabe297..23adf53c2aa1 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -614,12 +614,38 @@ static void mvpp23_bm_set_8pool_mode(struct mvpp2 *priv)
mvpp2_write(priv, MVPP22_BM_POOL_BASE_ADDR_HIGH_REG, val);
}
+/* Cleanup pool before actual initialization in the OS */
+static void mvpp2_bm_pool_cleanup(struct mvpp2 *priv, int pool_id)
+{
+ unsigned int thread = mvpp2_cpu_to_thread(priv, get_cpu());
+ u32 val;
+ int i;
+
+ /* Drain the BM from all possible residues left by firmware */
+ for (i = 0; i < MVPP2_BM_POOL_SIZE_MAX; i++)
+ mvpp2_thread_read(priv, thread, MVPP2_BM_PHY_ALLOC_REG(pool_id));
+
+ put_cpu();
+
+ /* Stop the BM pool */
+ val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(pool_id));
+ val |= MVPP2_BM_STOP_MASK;
+ mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(pool_id), val);
+}
+
static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv)
{
enum dma_data_direction dma_dir = DMA_FROM_DEVICE;
int i, err, poolnum = MVPP2_BM_POOLS_NUM;
struct mvpp2_port *port;
+ if (priv->percpu_pools)
+ poolnum = mvpp2_get_nrxqs(priv) * 2;
+
+ /* Clean up the pool state in case it contains stale state */
+ for (i = 0; i < poolnum; i++)
+ mvpp2_bm_pool_cleanup(priv, i);
+
if (priv->percpu_pools) {
for (i = 0; i < priv->port_count; i++) {
port = priv->port_list[i];
@@ -629,7 +655,6 @@ static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv)
}
}
- poolnum = mvpp2_get_nrxqs(priv) * 2;
for (i = 0; i < poolnum; i++) {
/* the pool in use */
int pn = i / (poolnum / 2);
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig b/drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig
new file mode 100644
index 000000000000..e371a3ef0c49
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Marvell's Octeon PCI Endpoint NIC VF Driver Configuration
+#
+
+config OCTEON_EP_VF
+ tristate "Marvell Octeon PCI Endpoint NIC VF Driver"
+ depends on 64BIT
+ depends on PCI
+ help
+ This driver supports the networking functionality of Marvell's
+ Octeon PCI Endpoint NIC VF.
+
+ To know the list of devices supported by this driver, refer to the
+ documentation in
+ <file:Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst>.
+
+ To compile this driver as a module, choose M here.
+ The name of the module will be octeon_ep_vf.
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/Makefile b/drivers/net/ethernet/marvell/octeon_ep_vf/Makefile
new file mode 100644
index 000000000000..4a5f9fcb0b40
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Network driver for Marvell's Octeon PCI Endpoint NIC VF
+#
+
+obj-$(CONFIG_OCTEON_EP_VF) += octeon_ep_vf.o
+
+octeon_ep_vf-y := octep_vf_main.o octep_vf_cn9k.o octep_vf_cnxk.o \
+ octep_vf_tx.o octep_vf_rx.o octep_vf_mbox.o \
+ octep_vf_ethtool.o
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c
new file mode 100644
index 000000000000..88937fce75f1
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include "octep_vf_config.h"
+#include "octep_vf_main.h"
+#include "octep_vf_regs_cn9k.h"
+
+/* Dump useful hardware IQ/OQ CSRs for debug purpose */
+static void cn93_vf_dump_q_regs(struct octep_vf_device *oct, int qno)
+{
+ struct device *dev = &oct->pdev->dev;
+
+ dev_info(dev, "IQ-%d register dump\n", qno);
+ dev_info(dev, "R[%d]_IN_INSTR_DBELL[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_IN_INSTR_DBELL(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INSTR_DBELL(qno)));
+ dev_info(dev, "R[%d]_IN_CONTROL[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_IN_CONTROL(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(qno)));
+ dev_info(dev, "R[%d]_IN_ENABLE[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_IN_ENABLE(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(qno)));
+ dev_info(dev, "R[%d]_IN_INSTR_BADDR[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_IN_INSTR_BADDR(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INSTR_BADDR(qno)));
+ dev_info(dev, "R[%d]_IN_INSTR_RSIZE[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_IN_INSTR_RSIZE(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INSTR_RSIZE(qno)));
+ dev_info(dev, "R[%d]_IN_CNTS[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_IN_CNTS(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CNTS(qno)));
+ dev_info(dev, "R[%d]_IN_INT_LEVELS[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_IN_INT_LEVELS(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(qno)));
+ dev_info(dev, "R[%d]_IN_PKT_CNT[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_IN_PKT_CNT(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_PKT_CNT(qno)));
+ dev_info(dev, "R[%d]_IN_BYTE_CNT[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_IN_BYTE_CNT(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_BYTE_CNT(qno)));
+
+ dev_info(dev, "OQ-%d register dump\n", qno);
+ dev_info(dev, "R[%d]_OUT_SLIST_DBELL[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_OUT_SLIST_DBELL(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_DBELL(qno)));
+ dev_info(dev, "R[%d]_OUT_CONTROL[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_OUT_CONTROL(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(qno)));
+ dev_info(dev, "R[%d]_OUT_ENABLE[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_OUT_ENABLE(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(qno)));
+ dev_info(dev, "R[%d]_OUT_SLIST_BADDR[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_OUT_SLIST_BADDR(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_BADDR(qno)));
+ dev_info(dev, "R[%d]_OUT_SLIST_RSIZE[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_OUT_SLIST_RSIZE(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_RSIZE(qno)));
+ dev_info(dev, "R[%d]_OUT_CNTS[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_OUT_CNTS(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_CNTS(qno)));
+ dev_info(dev, "R[%d]_OUT_INT_LEVELS[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_OUT_INT_LEVELS(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(qno)));
+ dev_info(dev, "R[%d]_OUT_PKT_CNT[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_OUT_PKT_CNT(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_PKT_CNT(qno)));
+ dev_info(dev, "R[%d]_OUT_BYTE_CNT[0x%llx]: 0x%016llx\n",
+ qno, CN93_VF_SDP_R_OUT_BYTE_CNT(qno),
+ octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_BYTE_CNT(qno)));
+}
+
+/* Reset Hardware Tx queue */
+static void cn93_vf_reset_iq(struct octep_vf_device *oct, int q_no)
+{
+ u64 val = ULL(0);
+
+ dev_dbg(&oct->pdev->dev, "Reset VF IQ-%d\n", q_no);
+
+ /* Disable the Tx/Instruction Ring */
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(q_no), val);
+
+ /* clear the Instruction Ring packet/byte counts and doorbell CSRs */
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(q_no), val);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_PKT_CNT(q_no), val);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_BYTE_CNT(q_no), val);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_BADDR(q_no), val);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_RSIZE(q_no), val);
+
+ val = GENMASK_ULL(31, 0);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_DBELL(q_no), val);
+
+ val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CNTS(q_no));
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_CNTS(q_no),
+ val & GENMASK_ULL(31, 0));
+}
+
+/* Reset Hardware Rx queue */
+static void cn93_vf_reset_oq(struct octep_vf_device *oct, int q_no)
+{
+ u64 val = ULL(0);
+
+ /* Disable Output (Rx) Ring */
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(q_no), val);
+
+ /* Clear count CSRs */
+ val = octep_vf_read_csr(oct, CN93_VF_SDP_R_OUT_CNTS(q_no));
+ octep_vf_write_csr(oct, CN93_VF_SDP_R_OUT_CNTS(q_no), val);
+
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_PKT_CNT(q_no), GENMASK_ULL(35, 0));
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_DBELL(q_no), GENMASK_ULL(31, 0));
+}
+
+/* Reset all hardware Tx/Rx queues */
+static void octep_vf_reset_io_queues_cn93(struct octep_vf_device *oct)
+{
+ struct pci_dev *pdev = oct->pdev;
+ int q;
+
+ dev_dbg(&pdev->dev, "Reset OCTEP_CN93 VF IO Queues\n");
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ cn93_vf_reset_iq(oct, q);
+ cn93_vf_reset_oq(oct, q);
+ }
+}
+
+/* Initialize configuration limits and initial active config */
+static void octep_vf_init_config_cn93_vf(struct octep_vf_device *oct)
+{
+ struct octep_vf_config *conf = oct->conf;
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(0));
+ conf->ring_cfg.max_io_rings = (reg_val >> CN93_VF_R_IN_CTL_RPVF_POS) &
+ CN93_VF_R_IN_CTL_RPVF_MASK;
+ conf->ring_cfg.active_io_rings = conf->ring_cfg.max_io_rings;
+
+ conf->iq.num_descs = OCTEP_VF_IQ_MAX_DESCRIPTORS;
+ conf->iq.instr_type = OCTEP_VF_64BYTE_INSTR;
+ conf->iq.db_min = OCTEP_VF_DB_MIN;
+ conf->iq.intr_threshold = OCTEP_VF_IQ_INTR_THRESHOLD;
+
+ conf->oq.num_descs = OCTEP_VF_OQ_MAX_DESCRIPTORS;
+ conf->oq.buf_size = OCTEP_VF_OQ_BUF_SIZE;
+ conf->oq.refill_threshold = OCTEP_VF_OQ_REFILL_THRESHOLD;
+ conf->oq.oq_intr_pkt = OCTEP_VF_OQ_INTR_PKT_THRESHOLD;
+ conf->oq.oq_intr_time = OCTEP_VF_OQ_INTR_TIME_THRESHOLD;
+
+ conf->msix_cfg.ioq_msix = conf->ring_cfg.active_io_rings;
+}
+
+/* Setup registers for a hardware Tx Queue */
+static void octep_vf_setup_iq_regs_cn93(struct octep_vf_device *oct, int iq_no)
+{
+ struct octep_vf_iq *iq = oct->iq[iq_no];
+ u32 reset_instr_cnt;
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(iq_no));
+
+ /* wait for IDLE to set to 1 */
+ if (!(reg_val & CN93_VF_R_IN_CTL_IDLE)) {
+ do {
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(iq_no));
+ } while (!(reg_val & CN93_VF_R_IN_CTL_IDLE));
+ }
+ reg_val |= CN93_VF_R_IN_CTL_RDSIZE;
+ reg_val |= CN93_VF_R_IN_CTL_IS_64B;
+ reg_val |= CN93_VF_R_IN_CTL_ESR;
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(iq_no), reg_val);
+
+ /* Write the start of the input queue's ring and its size */
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_BADDR(iq_no), iq->desc_ring_dma);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_RSIZE(iq_no), iq->max_count);
+
+ /* Remember the doorbell & instruction count register addr for this queue */
+ iq->doorbell_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_IN_INSTR_DBELL(iq_no);
+ iq->inst_cnt_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_IN_CNTS(iq_no);
+ iq->intr_lvl_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_IN_INT_LEVELS(iq_no);
+
+ /* Store the current instruction counter (used in flush_iq calculation) */
+ reset_instr_cnt = readl(iq->inst_cnt_reg);
+ writel(reset_instr_cnt, iq->inst_cnt_reg);
+
+ /* INTR_THRESHOLD is set to max(FFFFFFFF) to disable the INTR */
+ reg_val = CFG_GET_IQ_INTR_THRESHOLD(oct->conf) & GENMASK_ULL(31, 0);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(iq_no), reg_val);
+}
+
+/* Setup registers for a hardware Rx Queue */
+static void octep_vf_setup_oq_regs_cn93(struct octep_vf_device *oct, int oq_no)
+{
+ struct octep_vf_oq *oq = oct->oq[oq_no];
+ u32 time_threshold = 0;
+ u64 oq_ctl = ULL(0);
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(oq_no));
+
+ /* wait for IDLE to set to 1 */
+ if (!(reg_val & CN93_VF_R_OUT_CTL_IDLE)) {
+ do {
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(oq_no));
+ } while (!(reg_val & CN93_VF_R_OUT_CTL_IDLE));
+ }
+
+ reg_val &= ~(CN93_VF_R_OUT_CTL_IMODE);
+ reg_val &= ~(CN93_VF_R_OUT_CTL_ROR_P);
+ reg_val &= ~(CN93_VF_R_OUT_CTL_NSR_P);
+ reg_val &= ~(CN93_VF_R_OUT_CTL_ROR_I);
+ reg_val &= ~(CN93_VF_R_OUT_CTL_NSR_I);
+ reg_val &= ~(CN93_VF_R_OUT_CTL_ES_I);
+ reg_val &= ~(CN93_VF_R_OUT_CTL_ROR_D);
+ reg_val &= ~(CN93_VF_R_OUT_CTL_NSR_D);
+ reg_val &= ~(CN93_VF_R_OUT_CTL_ES_D);
+ reg_val |= (CN93_VF_R_OUT_CTL_ES_P);
+
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(oq_no), reg_val);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_BADDR(oq_no), oq->desc_ring_dma);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_RSIZE(oq_no), oq->max_count);
+
+ oq_ctl = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(oq_no));
+ oq_ctl &= ~GENMASK_ULL(22, 0); //clear the ISIZE and BSIZE (22-0)
+ oq_ctl |= (oq->buffer_size & GENMASK_ULL(15, 0)); //populate the BSIZE (15-0)
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(oq_no), oq_ctl);
+
+ /* Get the mapped address of the pkt_sent and pkts_credit regs */
+ oq->pkts_sent_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_OUT_CNTS(oq_no);
+ oq->pkts_credit_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_OUT_SLIST_DBELL(oq_no);
+
+ time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf);
+ reg_val = ((u64)time_threshold << 32) | CFG_GET_OQ_INTR_PKT(oct->conf);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+}
+
+/* Setup registers for a VF mailbox */
+static void octep_vf_setup_mbox_regs_cn93(struct octep_vf_device *oct, int q_no)
+{
+ struct octep_vf_mbox *mbox = oct->mbox;
+
+ /* PF to VF DATA reg. VF reads from this reg */
+ mbox->mbox_read_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_MBOX_PF_VF_DATA(q_no);
+
+ /* VF mbox interrupt reg */
+ mbox->mbox_int_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_MBOX_PF_VF_INT(q_no);
+
+ /* VF to PF DATA reg. VF writes into this reg */
+ mbox->mbox_write_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_MBOX_VF_PF_DATA(q_no);
+}
+
+/* Mailbox Interrupt handler */
+static void cn93_handle_vf_mbox_intr(struct octep_vf_device *oct)
+{
+ if (oct->mbox)
+ schedule_work(&oct->mbox->wk.work);
+ else
+ dev_err(&oct->pdev->dev, "cannot schedule work on invalid mbox\n");
+}
+
+/* Tx/Rx queue interrupt handler */
+static irqreturn_t octep_vf_ioq_intr_handler_cn93(void *data)
+{
+ struct octep_vf_ioq_vector *vector = data;
+ struct octep_vf_device *oct;
+ struct octep_vf_oq *oq;
+ u64 reg_val;
+
+ oct = vector->octep_vf_dev;
+ oq = vector->oq;
+ /* Mailbox interrupt arrives along with interrupt of tx/rx ring pair 0 */
+ if (oq->q_no == 0) {
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_MBOX_PF_VF_INT(0));
+ if (reg_val & CN93_VF_SDP_R_MBOX_PF_VF_INT_STATUS) {
+ cn93_handle_vf_mbox_intr(oct);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_MBOX_PF_VF_INT(0), reg_val);
+ }
+ }
+ napi_schedule_irqoff(oq->napi);
+ return IRQ_HANDLED;
+}
+
+/* Re-initialize Octeon hardware registers */
+static void octep_vf_reinit_regs_cn93(struct octep_vf_device *oct)
+{
+ u32 i;
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ oct->hw_ops.setup_iq_regs(oct, i);
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ oct->hw_ops.setup_oq_regs(oct, i);
+
+ oct->hw_ops.enable_interrupts(oct);
+ oct->hw_ops.enable_io_queues(oct);
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg);
+}
+
+/* Enable all interrupts */
+static void octep_vf_enable_interrupts_cn93(struct octep_vf_device *oct)
+{
+ int num_rings, q;
+ u64 reg_val;
+
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+ for (q = 0; q < num_rings; q++) {
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(q));
+ reg_val |= BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(q), reg_val);
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(q));
+ reg_val |= BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(q), reg_val);
+ }
+ /* Enable PF to VF mbox interrupt by setting 2nd bit*/
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_MBOX_PF_VF_INT(0),
+ CN93_VF_SDP_R_MBOX_PF_VF_INT_ENAB);
+}
+
+/* Disable all interrupts */
+static void octep_vf_disable_interrupts_cn93(struct octep_vf_device *oct)
+{
+ int num_rings, q;
+ u64 reg_val;
+
+ /* Disable PF to VF mbox interrupt by setting 2nd bit*/
+ if (oct->mbox)
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_MBOX_PF_VF_INT(0), 0x0);
+
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+ for (q = 0; q < num_rings; q++) {
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(q));
+ reg_val &= ~BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(q), reg_val);
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(q));
+ reg_val &= ~BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(q), reg_val);
+ }
+}
+
+/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */
+static u32 octep_vf_update_iq_read_index_cn93(struct octep_vf_iq *iq)
+{
+ u32 pkt_in_done = readl(iq->inst_cnt_reg);
+ u32 last_done, new_idx;
+
+ last_done = pkt_in_done - iq->pkt_in_done;
+ iq->pkt_in_done = pkt_in_done;
+
+ new_idx = (iq->octep_vf_read_index + last_done) % iq->max_count;
+
+ return new_idx;
+}
+
+/* Enable a hardware Tx Queue */
+static void octep_vf_enable_iq_cn93(struct octep_vf_device *oct, int iq_no)
+{
+ u64 loop = HZ;
+ u64 reg_val;
+
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_DBELL(iq_no), GENMASK_ULL(31, 0));
+
+ while (octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INSTR_DBELL(iq_no)) &&
+ loop--) {
+ schedule_timeout_interruptible(1);
+ }
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(iq_no));
+ reg_val |= BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(iq_no), reg_val);
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(iq_no));
+ reg_val |= ULL(1);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(iq_no), reg_val);
+}
+
+/* Enable a hardware Rx Queue */
+static void octep_vf_enable_oq_cn93(struct octep_vf_device *oct, int oq_no)
+{
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(oq_no));
+ reg_val |= BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_DBELL(oq_no), GENMASK_ULL(31, 0));
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(oq_no));
+ reg_val |= ULL(1);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(oq_no), reg_val);
+}
+
+/* Enable all hardware Tx/Rx Queues assigned to VF */
+static void octep_vf_enable_io_queues_cn93(struct octep_vf_device *oct)
+{
+ u8 q;
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ octep_vf_enable_iq_cn93(oct, q);
+ octep_vf_enable_oq_cn93(oct, q);
+ }
+}
+
+/* Disable a hardware Tx Queue assigned to VF */
+static void octep_vf_disable_iq_cn93(struct octep_vf_device *oct, int iq_no)
+{
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(iq_no));
+ reg_val &= ~ULL(1);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(iq_no), reg_val);
+}
+
+/* Disable a hardware Rx Queue assigned to VF */
+static void octep_vf_disable_oq_cn93(struct octep_vf_device *oct, int oq_no)
+{
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(oq_no));
+ reg_val &= ~ULL(1);
+ octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(oq_no), reg_val);
+}
+
+/* Disable all hardware Tx/Rx Queues assigned to VF */
+static void octep_vf_disable_io_queues_cn93(struct octep_vf_device *oct)
+{
+ int q;
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ octep_vf_disable_iq_cn93(oct, q);
+ octep_vf_disable_oq_cn93(oct, q);
+ }
+}
+
+/* Dump hardware registers (including Tx/Rx queues) for debugging. */
+static void octep_vf_dump_registers_cn93(struct octep_vf_device *oct)
+{
+ u8 num_rings, q;
+
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+ for (q = 0; q < num_rings; q++)
+ cn93_vf_dump_q_regs(oct, q);
+}
+
+/**
+ * octep_vf_device_setup_cn93() - Setup Octeon device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * - initialize hardware operations.
+ * - get target side pcie port number for the device.
+ * - set initial configuration and max limits.
+ */
+void octep_vf_device_setup_cn93(struct octep_vf_device *oct)
+{
+ oct->hw_ops.setup_iq_regs = octep_vf_setup_iq_regs_cn93;
+ oct->hw_ops.setup_oq_regs = octep_vf_setup_oq_regs_cn93;
+ oct->hw_ops.setup_mbox_regs = octep_vf_setup_mbox_regs_cn93;
+
+ oct->hw_ops.ioq_intr_handler = octep_vf_ioq_intr_handler_cn93;
+ oct->hw_ops.reinit_regs = octep_vf_reinit_regs_cn93;
+
+ oct->hw_ops.enable_interrupts = octep_vf_enable_interrupts_cn93;
+ oct->hw_ops.disable_interrupts = octep_vf_disable_interrupts_cn93;
+
+ oct->hw_ops.update_iq_read_idx = octep_vf_update_iq_read_index_cn93;
+
+ oct->hw_ops.enable_iq = octep_vf_enable_iq_cn93;
+ oct->hw_ops.enable_oq = octep_vf_enable_oq_cn93;
+ oct->hw_ops.enable_io_queues = octep_vf_enable_io_queues_cn93;
+
+ oct->hw_ops.disable_iq = octep_vf_disable_iq_cn93;
+ oct->hw_ops.disable_oq = octep_vf_disable_oq_cn93;
+ oct->hw_ops.disable_io_queues = octep_vf_disable_io_queues_cn93;
+ oct->hw_ops.reset_io_queues = octep_vf_reset_io_queues_cn93;
+
+ oct->hw_ops.dump_registers = octep_vf_dump_registers_cn93;
+ octep_vf_init_config_cn93_vf(oct);
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c
new file mode 100644
index 000000000000..1f79dfad42c6
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include "octep_vf_config.h"
+#include "octep_vf_main.h"
+#include "octep_vf_regs_cnxk.h"
+
+/* Dump useful hardware IQ/OQ CSRs for debug purpose */
+static void cnxk_vf_dump_q_regs(struct octep_vf_device *oct, int qno)
+{
+ struct device *dev = &oct->pdev->dev;
+
+ dev_info(dev, "IQ-%d register dump\n", qno);
+ dev_info(dev, "R[%d]_IN_INSTR_DBELL[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_IN_INSTR_DBELL(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_DBELL(qno)));
+ dev_info(dev, "R[%d]_IN_CONTROL[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_IN_CONTROL(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(qno)));
+ dev_info(dev, "R[%d]_IN_ENABLE[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_IN_ENABLE(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(qno)));
+ dev_info(dev, "R[%d]_IN_INSTR_BADDR[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_IN_INSTR_BADDR(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_BADDR(qno)));
+ dev_info(dev, "R[%d]_IN_INSTR_RSIZE[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_IN_INSTR_RSIZE(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_RSIZE(qno)));
+ dev_info(dev, "R[%d]_IN_CNTS[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_IN_CNTS(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CNTS(qno)));
+ dev_info(dev, "R[%d]_IN_INT_LEVELS[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_IN_INT_LEVELS(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(qno)));
+ dev_info(dev, "R[%d]_IN_PKT_CNT[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_IN_PKT_CNT(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_PKT_CNT(qno)));
+ dev_info(dev, "R[%d]_IN_BYTE_CNT[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_IN_BYTE_CNT(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_BYTE_CNT(qno)));
+
+ dev_info(dev, "OQ-%d register dump\n", qno);
+ dev_info(dev, "R[%d]_OUT_SLIST_DBELL[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_OUT_SLIST_DBELL(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_DBELL(qno)));
+ dev_info(dev, "R[%d]_OUT_CONTROL[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_OUT_CONTROL(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(qno)));
+ dev_info(dev, "R[%d]_OUT_ENABLE[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_OUT_ENABLE(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(qno)));
+ dev_info(dev, "R[%d]_OUT_SLIST_BADDR[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_OUT_SLIST_BADDR(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_BADDR(qno)));
+ dev_info(dev, "R[%d]_OUT_SLIST_RSIZE[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_OUT_SLIST_RSIZE(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_RSIZE(qno)));
+ dev_info(dev, "R[%d]_OUT_CNTS[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_OUT_CNTS(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CNTS(qno)));
+ dev_info(dev, "R[%d]_OUT_INT_LEVELS[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_OUT_INT_LEVELS(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(qno)));
+ dev_info(dev, "R[%d]_OUT_PKT_CNT[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_OUT_PKT_CNT(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_PKT_CNT(qno)));
+ dev_info(dev, "R[%d]_OUT_BYTE_CNT[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_OUT_BYTE_CNT(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_BYTE_CNT(qno)));
+ dev_info(dev, "R[%d]_ERR_TYPE[0x%llx]: 0x%016llx\n",
+ qno, CNXK_VF_SDP_R_ERR_TYPE(qno),
+ octep_vf_read_csr64(oct, CNXK_VF_SDP_R_ERR_TYPE(qno)));
+}
+
+/* Reset Hardware Tx queue */
+static void cnxk_vf_reset_iq(struct octep_vf_device *oct, int q_no)
+{
+ u64 val = ULL(0);
+
+ dev_dbg(&oct->pdev->dev, "Reset VF IQ-%d\n", q_no);
+
+ /* Disable the Tx/Instruction Ring */
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(q_no), val);
+
+ /* clear the Instruction Ring packet/byte counts and doorbell CSRs */
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(q_no), val);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_PKT_CNT(q_no), val);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_BYTE_CNT(q_no), val);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_BADDR(q_no), val);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_RSIZE(q_no), val);
+
+ val = GENMASK_ULL(31, 0);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_DBELL(q_no), val);
+
+ val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CNTS(q_no));
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_CNTS(q_no), val & GENMASK_ULL(31, 0));
+}
+
+/* Reset Hardware Rx queue */
+static void cnxk_vf_reset_oq(struct octep_vf_device *oct, int q_no)
+{
+ u64 val = ULL(0);
+
+ /* Disable Output (Rx) Ring */
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(q_no), val);
+
+ /* Clear count CSRs */
+ val = octep_vf_read_csr(oct, CNXK_VF_SDP_R_OUT_CNTS(q_no));
+ octep_vf_write_csr(oct, CNXK_VF_SDP_R_OUT_CNTS(q_no), val);
+
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_PKT_CNT(q_no), GENMASK_ULL(35, 0));
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_DBELL(q_no), GENMASK_ULL(31, 0));
+}
+
+/* Reset all hardware Tx/Rx queues */
+static void octep_vf_reset_io_queues_cnxk(struct octep_vf_device *oct)
+{
+ struct pci_dev *pdev = oct->pdev;
+ int q;
+
+ dev_dbg(&pdev->dev, "Reset OCTEP_CNXK VF IO Queues\n");
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ cnxk_vf_reset_iq(oct, q);
+ cnxk_vf_reset_oq(oct, q);
+ }
+}
+
+/* Initialize configuration limits and initial active config */
+static void octep_vf_init_config_cnxk_vf(struct octep_vf_device *oct)
+{
+ struct octep_vf_config *conf = oct->conf;
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(0));
+ conf->ring_cfg.max_io_rings = (reg_val >> CNXK_VF_R_IN_CTL_RPVF_POS) &
+ CNXK_VF_R_IN_CTL_RPVF_MASK;
+ conf->ring_cfg.active_io_rings = conf->ring_cfg.max_io_rings;
+
+ conf->iq.num_descs = OCTEP_VF_IQ_MAX_DESCRIPTORS;
+ conf->iq.instr_type = OCTEP_VF_64BYTE_INSTR;
+ conf->iq.db_min = OCTEP_VF_DB_MIN;
+ conf->iq.intr_threshold = OCTEP_VF_IQ_INTR_THRESHOLD;
+
+ conf->oq.num_descs = OCTEP_VF_OQ_MAX_DESCRIPTORS;
+ conf->oq.buf_size = OCTEP_VF_OQ_BUF_SIZE;
+ conf->oq.refill_threshold = OCTEP_VF_OQ_REFILL_THRESHOLD;
+ conf->oq.oq_intr_pkt = OCTEP_VF_OQ_INTR_PKT_THRESHOLD;
+ conf->oq.oq_intr_time = OCTEP_VF_OQ_INTR_TIME_THRESHOLD;
+ conf->oq.wmark = OCTEP_VF_OQ_WMARK_MIN;
+
+ conf->msix_cfg.ioq_msix = conf->ring_cfg.active_io_rings;
+}
+
+/* Setup registers for a hardware Tx Queue */
+static void octep_vf_setup_iq_regs_cnxk(struct octep_vf_device *oct, int iq_no)
+{
+ struct octep_vf_iq *iq = oct->iq[iq_no];
+ u32 reset_instr_cnt;
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(iq_no));
+
+ /* wait for IDLE to set to 1 */
+ if (!(reg_val & CNXK_VF_R_IN_CTL_IDLE)) {
+ do {
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(iq_no));
+ } while (!(reg_val & CNXK_VF_R_IN_CTL_IDLE));
+ }
+ reg_val |= CNXK_VF_R_IN_CTL_RDSIZE;
+ reg_val |= CNXK_VF_R_IN_CTL_IS_64B;
+ reg_val |= CNXK_VF_R_IN_CTL_ESR;
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(iq_no), reg_val);
+
+ /* Write the start of the input queue's ring and its size */
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_BADDR(iq_no), iq->desc_ring_dma);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_RSIZE(iq_no), iq->max_count);
+
+ /* Remember the doorbell & instruction count register addr for this queue */
+ iq->doorbell_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_IN_INSTR_DBELL(iq_no);
+ iq->inst_cnt_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_IN_CNTS(iq_no);
+ iq->intr_lvl_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_IN_INT_LEVELS(iq_no);
+
+ /* Store the current instruction counter (used in flush_iq calculation) */
+ reset_instr_cnt = readl(iq->inst_cnt_reg);
+ writel(reset_instr_cnt, iq->inst_cnt_reg);
+
+ /* INTR_THRESHOLD is set to max(FFFFFFFF) to disable the INTR */
+ reg_val = CFG_GET_IQ_INTR_THRESHOLD(oct->conf) & GENMASK_ULL(31, 0);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(iq_no), reg_val);
+}
+
+/* Setup registers for a hardware Rx Queue */
+static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no)
+{
+ struct octep_vf_oq *oq = oct->oq[oq_no];
+ u32 time_threshold = 0;
+ u64 oq_ctl = ULL(0);
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no));
+
+ /* wait for IDLE to set to 1 */
+ if (!(reg_val & CNXK_VF_R_OUT_CTL_IDLE)) {
+ do {
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no));
+ } while (!(reg_val & CNXK_VF_R_OUT_CTL_IDLE));
+ }
+
+ reg_val &= ~(CNXK_VF_R_OUT_CTL_IMODE);
+ reg_val &= ~(CNXK_VF_R_OUT_CTL_ROR_P);
+ reg_val &= ~(CNXK_VF_R_OUT_CTL_NSR_P);
+ reg_val &= ~(CNXK_VF_R_OUT_CTL_ROR_I);
+ reg_val &= ~(CNXK_VF_R_OUT_CTL_NSR_I);
+ reg_val &= ~(CNXK_VF_R_OUT_CTL_ES_I);
+ reg_val &= ~(CNXK_VF_R_OUT_CTL_ROR_D);
+ reg_val &= ~(CNXK_VF_R_OUT_CTL_NSR_D);
+ reg_val &= ~(CNXK_VF_R_OUT_CTL_ES_D);
+ reg_val |= (CNXK_VF_R_OUT_CTL_ES_P);
+
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no), reg_val);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_BADDR(oq_no), oq->desc_ring_dma);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_RSIZE(oq_no), oq->max_count);
+
+ oq_ctl = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no));
+ /* Clear the ISIZE and BSIZE (22-0) */
+ oq_ctl &= ~GENMASK_ULL(22, 0);
+ /* Populate the BSIZE (15-0) */
+ oq_ctl |= (oq->buffer_size & GENMASK_ULL(15, 0));
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no), oq_ctl);
+
+ /* Get the mapped address of the pkt_sent and pkts_credit regs */
+ oq->pkts_sent_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_OUT_CNTS(oq_no);
+ oq->pkts_credit_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_OUT_SLIST_DBELL(oq_no);
+
+ time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf);
+ reg_val = ((u64)time_threshold << 32) | CFG_GET_OQ_INTR_PKT(oct->conf);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+
+ /* set watermark for backpressure */
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_WMARK(oq_no));
+ reg_val &= ~GENMASK_ULL(31, 0);
+ reg_val |= CFG_GET_OQ_WMARK(oct->conf);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_WMARK(oq_no), reg_val);
+}
+
+/* Setup registers for a VF mailbox */
+static void octep_vf_setup_mbox_regs_cnxk(struct octep_vf_device *oct, int q_no)
+{
+ struct octep_vf_mbox *mbox = oct->mbox;
+
+ /* PF to VF DATA reg. VF reads from this reg */
+ mbox->mbox_read_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_MBOX_PF_VF_DATA(q_no);
+
+ /* VF mbox interrupt reg */
+ mbox->mbox_int_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_MBOX_PF_VF_INT(q_no);
+
+ /* VF to PF DATA reg. VF writes into this reg */
+ mbox->mbox_write_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_MBOX_VF_PF_DATA(q_no);
+}
+
+/* Mailbox Interrupt handler */
+static void cnxk_handle_vf_mbox_intr(struct octep_vf_device *oct)
+{
+ if (oct->mbox)
+ schedule_work(&oct->mbox->wk.work);
+ else
+ dev_err(&oct->pdev->dev, "cannot schedule work on invalid mbox\n");
+}
+
+/* Tx/Rx queue interrupt handler */
+static irqreturn_t octep_vf_ioq_intr_handler_cnxk(void *data)
+{
+ struct octep_vf_ioq_vector *vector = data;
+ struct octep_vf_device *oct;
+ struct octep_vf_oq *oq;
+ u64 reg_val;
+
+ oct = vector->octep_vf_dev;
+ oq = vector->oq;
+ /* Mailbox interrupt arrives along with interrupt of tx/rx ring pair 0 */
+ if (oq->q_no == 0) {
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_MBOX_PF_VF_INT(0));
+ if (reg_val & CNXK_VF_SDP_R_MBOX_PF_VF_INT_STATUS) {
+ cnxk_handle_vf_mbox_intr(oct);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_MBOX_PF_VF_INT(0), reg_val);
+ }
+ }
+ napi_schedule_irqoff(oq->napi);
+ return IRQ_HANDLED;
+}
+
+/* Re-initialize Octeon hardware registers */
+static void octep_vf_reinit_regs_cnxk(struct octep_vf_device *oct)
+{
+ u32 i;
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ oct->hw_ops.setup_iq_regs(oct, i);
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ oct->hw_ops.setup_oq_regs(oct, i);
+
+ oct->hw_ops.enable_interrupts(oct);
+ oct->hw_ops.enable_io_queues(oct);
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg);
+}
+
+/* Enable all interrupts */
+static void octep_vf_enable_interrupts_cnxk(struct octep_vf_device *oct)
+{
+ int num_rings, q;
+ u64 reg_val;
+
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+ for (q = 0; q < num_rings; q++) {
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(q));
+ reg_val |= BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(q), reg_val);
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(q));
+ reg_val |= BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(q), reg_val);
+ }
+ /* Enable PF to VF mbox interrupt by setting 2nd bit*/
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_MBOX_PF_VF_INT(0),
+ CNXK_VF_SDP_R_MBOX_PF_VF_INT_ENAB);
+}
+
+/* Disable all interrupts */
+static void octep_vf_disable_interrupts_cnxk(struct octep_vf_device *oct)
+{
+ int num_rings, q;
+ u64 reg_val;
+
+ /* Disable PF to VF mbox interrupt by setting 2nd bit*/
+ if (oct->mbox)
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_MBOX_PF_VF_INT(0), 0x0);
+
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+ for (q = 0; q < num_rings; q++) {
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(q));
+ reg_val &= ~BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(q), reg_val);
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(q));
+ reg_val &= ~BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(q), reg_val);
+ }
+}
+
+/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */
+static u32 octep_vf_update_iq_read_index_cnxk(struct octep_vf_iq *iq)
+{
+ u32 pkt_in_done = readl(iq->inst_cnt_reg);
+ u32 last_done, new_idx;
+
+ last_done = pkt_in_done - iq->pkt_in_done;
+ iq->pkt_in_done = pkt_in_done;
+
+ new_idx = (iq->octep_vf_read_index + last_done) % iq->max_count;
+
+ return new_idx;
+}
+
+/* Enable a hardware Tx Queue */
+static void octep_vf_enable_iq_cnxk(struct octep_vf_device *oct, int iq_no)
+{
+ u64 loop = HZ;
+ u64 reg_val;
+
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_DBELL(iq_no), GENMASK_ULL(31, 0));
+
+ while (octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_DBELL(iq_no)) &&
+ loop--) {
+ schedule_timeout_interruptible(1);
+ }
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(iq_no));
+ reg_val |= BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(iq_no), reg_val);
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(iq_no));
+ reg_val |= ULL(1);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(iq_no), reg_val);
+}
+
+/* Enable a hardware Rx Queue */
+static void octep_vf_enable_oq_cnxk(struct octep_vf_device *oct, int oq_no)
+{
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(oq_no));
+ reg_val |= BIT_ULL_MASK(62);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_DBELL(oq_no), GENMASK_ULL(31, 0));
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(oq_no));
+ reg_val |= ULL(1);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(oq_no), reg_val);
+}
+
+/* Enable all hardware Tx/Rx Queues assigned to VF */
+static void octep_vf_enable_io_queues_cnxk(struct octep_vf_device *oct)
+{
+ u8 q;
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ octep_vf_enable_iq_cnxk(oct, q);
+ octep_vf_enable_oq_cnxk(oct, q);
+ }
+}
+
+/* Disable a hardware Tx Queue assigned to VF */
+static void octep_vf_disable_iq_cnxk(struct octep_vf_device *oct, int iq_no)
+{
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(iq_no));
+ reg_val &= ~ULL(1);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(iq_no), reg_val);
+}
+
+/* Disable a hardware Rx Queue assigned to VF */
+static void octep_vf_disable_oq_cnxk(struct octep_vf_device *oct, int oq_no)
+{
+ u64 reg_val;
+
+ reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(oq_no));
+ reg_val &= ~ULL(1);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(oq_no), reg_val);
+}
+
+/* Disable all hardware Tx/Rx Queues assigned to VF */
+static void octep_vf_disable_io_queues_cnxk(struct octep_vf_device *oct)
+{
+ int q;
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ octep_vf_disable_iq_cnxk(oct, q);
+ octep_vf_disable_oq_cnxk(oct, q);
+ }
+}
+
+/* Dump hardware registers (including Tx/Rx queues) for debugging. */
+static void octep_vf_dump_registers_cnxk(struct octep_vf_device *oct)
+{
+ u8 num_rings, q;
+
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+ for (q = 0; q < num_rings; q++)
+ cnxk_vf_dump_q_regs(oct, q);
+}
+
+/**
+ * octep_vf_device_setup_cnxk() - Setup Octeon device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * - initialize hardware operations.
+ * - get target side pcie port number for the device.
+ * - set initial configuration and max limits.
+ */
+void octep_vf_device_setup_cnxk(struct octep_vf_device *oct)
+{
+ oct->hw_ops.setup_iq_regs = octep_vf_setup_iq_regs_cnxk;
+ oct->hw_ops.setup_oq_regs = octep_vf_setup_oq_regs_cnxk;
+ oct->hw_ops.setup_mbox_regs = octep_vf_setup_mbox_regs_cnxk;
+
+ oct->hw_ops.ioq_intr_handler = octep_vf_ioq_intr_handler_cnxk;
+ oct->hw_ops.reinit_regs = octep_vf_reinit_regs_cnxk;
+
+ oct->hw_ops.enable_interrupts = octep_vf_enable_interrupts_cnxk;
+ oct->hw_ops.disable_interrupts = octep_vf_disable_interrupts_cnxk;
+
+ oct->hw_ops.update_iq_read_idx = octep_vf_update_iq_read_index_cnxk;
+
+ oct->hw_ops.enable_iq = octep_vf_enable_iq_cnxk;
+ oct->hw_ops.enable_oq = octep_vf_enable_oq_cnxk;
+ oct->hw_ops.enable_io_queues = octep_vf_enable_io_queues_cnxk;
+
+ oct->hw_ops.disable_iq = octep_vf_disable_iq_cnxk;
+ oct->hw_ops.disable_oq = octep_vf_disable_oq_cnxk;
+ oct->hw_ops.disable_io_queues = octep_vf_disable_io_queues_cnxk;
+ oct->hw_ops.reset_io_queues = octep_vf_reset_io_queues_cnxk;
+
+ oct->hw_ops.dump_registers = octep_vf_dump_registers_cnxk;
+ octep_vf_init_config_cnxk_vf(oct);
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h
new file mode 100644
index 000000000000..e03a647b0110
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_VF_CONFIG_H_
+#define _OCTEP_VF_CONFIG_H_
+
+/* Tx instruction types by length */
+#define OCTEP_VF_32BYTE_INSTR 32
+#define OCTEP_VF_64BYTE_INSTR 64
+
+/* Tx Queue: maximum descriptors per ring */
+#define OCTEP_VF_IQ_MAX_DESCRIPTORS 1024
+/* Minimum input (Tx) requests to be enqueued to ring doorbell */
+#define OCTEP_VF_DB_MIN 8
+/* Packet threshold for Tx queue interrupt */
+#define OCTEP_VF_IQ_INTR_THRESHOLD 0x0
+
+/* Minimum watermark for backpressure */
+#define OCTEP_VF_OQ_WMARK_MIN 256
+
+/* Rx Queue: maximum descriptors per ring */
+#define OCTEP_VF_OQ_MAX_DESCRIPTORS 1024
+
+/* Rx buffer size: Use page size buffers.
+ * Build skb from allocated page buffer once the packet is received.
+ * When a gathered packet is received, make head page as skb head and
+ * page buffers in consecutive Rx descriptors as fragments.
+ */
+#define OCTEP_VF_OQ_BUF_SIZE (SKB_WITH_OVERHEAD(PAGE_SIZE))
+#define OCTEP_VF_OQ_PKTS_PER_INTR 128
+#define OCTEP_VF_OQ_REFILL_THRESHOLD (OCTEP_VF_OQ_MAX_DESCRIPTORS / 4)
+
+#define OCTEP_VF_OQ_INTR_PKT_THRESHOLD 1
+#define OCTEP_VF_OQ_INTR_TIME_THRESHOLD 10
+
+#define OCTEP_VF_MSIX_NAME_SIZE (IFNAMSIZ + 32)
+
+/* Tx Queue wake threshold
+ * wakeup a stopped Tx queue if minimum 2 descriptors are available.
+ * Even a skb with fragments consume only one Tx queue descriptor entry.
+ */
+#define OCTEP_VF_WAKE_QUEUE_THRESHOLD 2
+
+/* Minimum MTU supported by Octeon network interface */
+#define OCTEP_VF_MIN_MTU ETH_MIN_MTU
+/* Maximum MTU supported by Octeon interface*/
+#define OCTEP_VF_MAX_MTU (10000 - (ETH_HLEN + ETH_FCS_LEN))
+/* Default MTU */
+#define OCTEP_VF_DEFAULT_MTU 1500
+
+/* Macros to get octeon config params */
+#define CFG_GET_IQ_CFG(cfg) ((cfg)->iq)
+#define CFG_GET_IQ_NUM_DESC(cfg) ((cfg)->iq.num_descs)
+#define CFG_GET_IQ_INSTR_TYPE(cfg) ((cfg)->iq.instr_type)
+#define CFG_GET_IQ_INSTR_SIZE(cfg) (64)
+#define CFG_GET_IQ_DB_MIN(cfg) ((cfg)->iq.db_min)
+#define CFG_GET_IQ_INTR_THRESHOLD(cfg) ((cfg)->iq.intr_threshold)
+
+#define CFG_GET_OQ_NUM_DESC(cfg) ((cfg)->oq.num_descs)
+#define CFG_GET_OQ_BUF_SIZE(cfg) ((cfg)->oq.buf_size)
+#define CFG_GET_OQ_REFILL_THRESHOLD(cfg) ((cfg)->oq.refill_threshold)
+#define CFG_GET_OQ_INTR_PKT(cfg) ((cfg)->oq.oq_intr_pkt)
+#define CFG_GET_OQ_INTR_TIME(cfg) ((cfg)->oq.oq_intr_time)
+#define CFG_GET_OQ_WMARK(cfg) ((cfg)->oq.wmark)
+
+#define CFG_GET_PORTS_ACTIVE_IO_RINGS(cfg) ((cfg)->ring_cfg.active_io_rings)
+#define CFG_GET_PORTS_MAX_IO_RINGS(cfg) ((cfg)->ring_cfg.max_io_rings)
+
+#define CFG_GET_CORE_TICS_PER_US(cfg) ((cfg)->core_cfg.core_tics_per_us)
+#define CFG_GET_COPROC_TICS_PER_US(cfg) ((cfg)->core_cfg.coproc_tics_per_us)
+
+#define CFG_GET_IOQ_MSIX(cfg) ((cfg)->msix_cfg.ioq_msix)
+
+/* Hardware Tx Queue configuration. */
+struct octep_vf_iq_config {
+ /* Size of the Input queue (number of commands) */
+ u16 num_descs;
+
+ /* Command size - 32 or 64 bytes */
+ u16 instr_type;
+
+ /* Minimum number of commands pending to be posted to Octeon before driver
+ * hits the Input queue doorbell.
+ */
+ u16 db_min;
+
+ /* Trigger the IQ interrupt when processed cmd count reaches
+ * this level.
+ */
+ u32 intr_threshold;
+};
+
+/* Hardware Rx Queue configuration. */
+struct octep_vf_oq_config {
+ /* Size of Output queue (number of descriptors) */
+ u16 num_descs;
+
+ /* Size of buffer in this Output queue. */
+ u16 buf_size;
+
+ /* The number of buffers that were consumed during packet processing
+ * by the driver on this Output queue before the driver attempts to
+ * replenish the descriptor ring with new buffers.
+ */
+ u16 refill_threshold;
+
+ /* Interrupt Coalescing (Packet Count). Octeon will interrupt the host
+ * only if it sent as many packets as specified by this field.
+ * The driver usually does not use packet count interrupt coalescing.
+ */
+ u32 oq_intr_pkt;
+
+ /* Interrupt Coalescing (Time Interval). Octeon will interrupt the host
+ * if at least one packet was sent in the time interval specified by
+ * this field. The driver uses time interval interrupt coalescing by
+ * default. The time is specified in microseconds.
+ */
+ u32 oq_intr_time;
+
+ /* Water mark for backpressure.
+ * Output queue sends backpressure signal to source when
+ * free buffer count falls below wmark.
+ */
+ u32 wmark;
+};
+
+/* Tx/Rx configuration */
+struct octep_vf_ring_config {
+ /* Max number of IOQs */
+ u16 max_io_rings;
+
+ /* Number of active IOQs */
+ u16 active_io_rings;
+};
+
+/* Octeon MSI-x config. */
+struct octep_vf_msix_config {
+ /* Number of IOQ interrupts */
+ u16 ioq_msix;
+};
+
+/* Data Structure to hold configuration limits and active config */
+struct octep_vf_config {
+ /* Input Queue attributes. */
+ struct octep_vf_iq_config iq;
+
+ /* Output Queue attributes. */
+ struct octep_vf_oq_config oq;
+
+ /* MSI-X interrupt config */
+ struct octep_vf_msix_config msix_cfg;
+
+ /* NIC VF ring Configuration */
+ struct octep_vf_ring_config ring_cfg;
+};
+#endif /* _OCTEP_VF_CONFIG_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c
new file mode 100644
index 000000000000..a1979b45e355
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+
+#include "octep_vf_config.h"
+#include "octep_vf_main.h"
+
+static const char octep_vf_gstrings_global_stats[][ETH_GSTRING_LEN] = {
+ "rx_alloc_errors",
+ "tx_busy_errors",
+ "tx_hw_pkts",
+ "tx_hw_octs",
+ "tx_hw_bcast",
+ "tx_hw_mcast",
+ "rx_hw_pkts",
+ "rx_hw_bytes",
+ "rx_hw_bcast",
+ "rx_dropped_bytes_fifo_full",
+};
+
+#define OCTEP_VF_GLOBAL_STATS_CNT (sizeof(octep_vf_gstrings_global_stats) / ETH_GSTRING_LEN)
+
+static const char octep_vf_gstrings_tx_q_stats[][ETH_GSTRING_LEN] = {
+ "tx_packets_posted[Q-%u]",
+ "tx_packets_completed[Q-%u]",
+ "tx_bytes[Q-%u]",
+ "tx_busy[Q-%u]",
+};
+
+#define OCTEP_VF_TX_Q_STATS_CNT (sizeof(octep_vf_gstrings_tx_q_stats) / ETH_GSTRING_LEN)
+
+static const char octep_vf_gstrings_rx_q_stats[][ETH_GSTRING_LEN] = {
+ "rx_packets[Q-%u]",
+ "rx_bytes[Q-%u]",
+ "rx_alloc_errors[Q-%u]",
+};
+
+#define OCTEP_VF_RX_Q_STATS_CNT (sizeof(octep_vf_gstrings_rx_q_stats) / ETH_GSTRING_LEN)
+
+static void octep_vf_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *info)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+
+ strscpy(info->driver, OCTEP_VF_DRV_NAME, sizeof(info->driver));
+ strscpy(info->bus_info, pci_name(oct->pdev), sizeof(info->bus_info));
+}
+
+static void octep_vf_get_strings(struct net_device *netdev,
+ u32 stringset, u8 *data)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+ u16 num_queues = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+ char *strings = (char *)data;
+ int i, j;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < OCTEP_VF_GLOBAL_STATS_CNT; i++) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ octep_vf_gstrings_global_stats[i]);
+ strings += ETH_GSTRING_LEN;
+ }
+
+ for (i = 0; i < num_queues; i++) {
+ for (j = 0; j < OCTEP_VF_TX_Q_STATS_CNT; j++) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ octep_vf_gstrings_tx_q_stats[j], i);
+ strings += ETH_GSTRING_LEN;
+ }
+ }
+
+ for (i = 0; i < num_queues; i++) {
+ for (j = 0; j < OCTEP_VF_RX_Q_STATS_CNT; j++) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ octep_vf_gstrings_rx_q_stats[j], i);
+ strings += ETH_GSTRING_LEN;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static int octep_vf_get_sset_count(struct net_device *netdev, int sset)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+ u16 num_queues = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ return OCTEP_VF_GLOBAL_STATS_CNT + (num_queues *
+ (OCTEP_VF_TX_Q_STATS_CNT + OCTEP_VF_RX_Q_STATS_CNT));
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void octep_vf_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+ struct octep_vf_iface_tx_stats *iface_tx_stats;
+ struct octep_vf_iface_rx_stats *iface_rx_stats;
+ u64 rx_alloc_errors, tx_busy_errors;
+ int q, i;
+
+ rx_alloc_errors = 0;
+ tx_busy_errors = 0;
+
+ octep_vf_get_if_stats(oct);
+ iface_tx_stats = &oct->iface_tx_stats;
+ iface_rx_stats = &oct->iface_rx_stats;
+
+ for (q = 0; q < oct->num_oqs; q++) {
+ struct octep_vf_iq *iq = oct->iq[q];
+ struct octep_vf_oq *oq = oct->oq[q];
+
+ tx_busy_errors += iq->stats.tx_busy;
+ rx_alloc_errors += oq->stats.alloc_failures;
+ }
+ i = 0;
+ data[i++] = rx_alloc_errors;
+ data[i++] = tx_busy_errors;
+ data[i++] = iface_tx_stats->pkts;
+ data[i++] = iface_tx_stats->octs;
+ data[i++] = iface_tx_stats->bcst;
+ data[i++] = iface_tx_stats->mcst;
+ data[i++] = iface_rx_stats->pkts;
+ data[i++] = iface_rx_stats->octets;
+ data[i++] = iface_rx_stats->bcast_pkts;
+ data[i++] = iface_rx_stats->dropped_octets_fifo_full;
+
+ /* Per Tx Queue stats */
+ for (q = 0; q < oct->num_iqs; q++) {
+ struct octep_vf_iq *iq = oct->iq[q];
+
+ data[i++] = iq->stats.instr_posted;
+ data[i++] = iq->stats.instr_completed;
+ data[i++] = iq->stats.bytes_sent;
+ data[i++] = iq->stats.tx_busy;
+ }
+
+ /* Per Rx Queue stats */
+ for (q = 0; q < oct->num_oqs; q++) {
+ struct octep_vf_oq *oq = oct->oq[q];
+
+ data[i++] = oq->stats.packets;
+ data[i++] = oq->stats.bytes;
+ data[i++] = oq->stats.alloc_failures;
+ }
+}
+
+#define OCTEP_VF_SET_ETHTOOL_LINK_MODES_BITMAP(octep_vf_speeds, ksettings, name) \
+{ \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_T)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseT_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_R)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseR_FEC); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_CR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseCR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_KR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseKR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_LR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseLR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_SR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseSR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_25GBASE_CR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseCR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_25GBASE_KR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseKR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_25GBASE_SR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseSR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_40GBASE_CR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseCR4_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_40GBASE_KR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseKR4_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_40GBASE_LR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseLR4_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_40GBASE_SR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseSR4_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_CR2)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseCR2_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_KR2)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseKR2_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_SR2)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseSR2_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_CR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseCR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_KR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseKR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_LR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseLR_ER_FR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_SR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseSR_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_100GBASE_CR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseCR4_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_100GBASE_KR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseKR4_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_100GBASE_LR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseLR4_ER4_Full); \
+ if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_100GBASE_SR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseSR4_Full); \
+}
+
+static int octep_vf_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+ struct octep_vf_iface_link_info *link_info;
+ u32 advertised_modes, supported_modes;
+
+ ethtool_link_ksettings_zero_link_mode(cmd, supported);
+ ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+
+ octep_vf_get_link_info(oct);
+
+ advertised_modes = oct->link_info.advertised_modes;
+ supported_modes = oct->link_info.supported_modes;
+ link_info = &oct->link_info;
+
+ OCTEP_VF_SET_ETHTOOL_LINK_MODES_BITMAP(supported_modes, cmd, supported);
+ OCTEP_VF_SET_ETHTOOL_LINK_MODES_BITMAP(advertised_modes, cmd, advertising);
+
+ if (link_info->autoneg) {
+ if (link_info->autoneg & OCTEP_VF_LINK_MODE_AUTONEG_SUPPORTED)
+ ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+ if (link_info->autoneg & OCTEP_VF_LINK_MODE_AUTONEG_ADVERTISED) {
+ ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
+ cmd->base.autoneg = AUTONEG_ENABLE;
+ } else {
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ }
+ } else {
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ }
+
+ cmd->base.port = PORT_FIBRE;
+ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
+
+ if (netif_carrier_ok(netdev)) {
+ cmd->base.speed = link_info->speed;
+ cmd->base.duplex = DUPLEX_FULL;
+ } else {
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ }
+ return 0;
+}
+
+static const struct ethtool_ops octep_vf_ethtool_ops = {
+ .get_drvinfo = octep_vf_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = octep_vf_get_strings,
+ .get_sset_count = octep_vf_get_sset_count,
+ .get_ethtool_stats = octep_vf_get_ethtool_stats,
+ .get_link_ksettings = octep_vf_get_link_ksettings,
+};
+
+void octep_vf_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &octep_vf_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
new file mode 100644
index 000000000000..dd49d0b8b494
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
@@ -0,0 +1,1231 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/vmalloc.h>
+#include <net/netdev_queues.h>
+
+#include "octep_vf_config.h"
+#include "octep_vf_main.h"
+
+struct workqueue_struct *octep_vf_wq;
+
+/* Supported Devices */
+static const struct pci_device_id octep_vf_pci_id_tbl[] = {
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_VF)},
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF95N_VF)},
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN98_VF)},
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KA_VF)},
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KA_VF)},
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KB_VF)},
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KB_VF)},
+ {0, },
+};
+MODULE_DEVICE_TABLE(pci, octep_vf_pci_id_tbl);
+
+MODULE_AUTHOR("Veerasenareddy Burru <vburru@marvell.com>");
+MODULE_DESCRIPTION(OCTEP_VF_DRV_STRING);
+MODULE_LICENSE("GPL");
+
+/**
+ * octep_vf_alloc_ioq_vectors() - Allocate Tx/Rx Queue interrupt info.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Allocate resources to hold per Tx/Rx queue interrupt info.
+ * This is the information passed to interrupt handler, from which napi poll
+ * is scheduled and includes quick access to private data of Tx/Rx queue
+ * corresponding to the interrupt being handled.
+ *
+ * Return: 0, on successful allocation of resources for all queue interrupts.
+ * -1, if failed to allocate any resource.
+ */
+static int octep_vf_alloc_ioq_vectors(struct octep_vf_device *oct)
+{
+ struct octep_vf_ioq_vector *ioq_vector;
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ oct->ioq_vector[i] = vzalloc(sizeof(*oct->ioq_vector[i]));
+ if (!oct->ioq_vector[i])
+ goto free_ioq_vector;
+
+ ioq_vector = oct->ioq_vector[i];
+ ioq_vector->iq = oct->iq[i];
+ ioq_vector->oq = oct->oq[i];
+ ioq_vector->octep_vf_dev = oct;
+ }
+
+ dev_info(&oct->pdev->dev, "Allocated %d IOQ vectors\n", oct->num_oqs);
+ return 0;
+
+free_ioq_vector:
+ while (i) {
+ i--;
+ vfree(oct->ioq_vector[i]);
+ oct->ioq_vector[i] = NULL;
+ }
+ return -1;
+}
+
+/**
+ * octep_vf_free_ioq_vectors() - Free Tx/Rx Queue interrupt vector info.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_vf_free_ioq_vectors(struct octep_vf_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ if (oct->ioq_vector[i]) {
+ vfree(oct->ioq_vector[i]);
+ oct->ioq_vector[i] = NULL;
+ }
+ }
+ netdev_info(oct->netdev, "Freed IOQ Vectors\n");
+}
+
+/**
+ * octep_vf_enable_msix_range() - enable MSI-x interrupts.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Allocate and enable all MSI-x interrupts (queue and non-queue interrupts)
+ * for the Octeon device.
+ *
+ * Return: 0, on successfully enabling all MSI-x interrupts.
+ * -1, if failed to enable any MSI-x interrupt.
+ */
+static int octep_vf_enable_msix_range(struct octep_vf_device *oct)
+{
+ int num_msix, msix_allocated;
+ int i;
+
+ /* Generic interrupts apart from input/output queues */
+ //num_msix = oct->num_oqs + CFG_GET_NON_IOQ_MSIX(oct->conf);
+ num_msix = oct->num_oqs;
+ oct->msix_entries = kcalloc(num_msix, sizeof(struct msix_entry), GFP_KERNEL);
+ if (!oct->msix_entries)
+ goto msix_alloc_err;
+
+ for (i = 0; i < num_msix; i++)
+ oct->msix_entries[i].entry = i;
+
+ msix_allocated = pci_enable_msix_range(oct->pdev, oct->msix_entries,
+ num_msix, num_msix);
+ if (msix_allocated != num_msix) {
+ dev_err(&oct->pdev->dev,
+ "Failed to enable %d msix irqs; got only %d\n",
+ num_msix, msix_allocated);
+ goto enable_msix_err;
+ }
+ oct->num_irqs = msix_allocated;
+ dev_info(&oct->pdev->dev, "MSI-X enabled successfully\n");
+
+ return 0;
+
+enable_msix_err:
+ if (msix_allocated > 0)
+ pci_disable_msix(oct->pdev);
+ kfree(oct->msix_entries);
+ oct->msix_entries = NULL;
+msix_alloc_err:
+ return -1;
+}
+
+/**
+ * octep_vf_disable_msix() - disable MSI-x interrupts.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Disable MSI-x on the Octeon device.
+ */
+static void octep_vf_disable_msix(struct octep_vf_device *oct)
+{
+ pci_disable_msix(oct->pdev);
+ kfree(oct->msix_entries);
+ oct->msix_entries = NULL;
+ dev_info(&oct->pdev->dev, "Disabled MSI-X\n");
+}
+
+/**
+ * octep_vf_ioq_intr_handler() - handler for all Tx/Rx queue interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data contains pointers to Tx/Rx queue private data
+ * and correspong NAPI context.
+ *
+ * this is common handler for all non-queue (generic) interrupts.
+ */
+static irqreturn_t octep_vf_ioq_intr_handler(int irq, void *data)
+{
+ struct octep_vf_ioq_vector *ioq_vector = data;
+ struct octep_vf_device *oct = ioq_vector->octep_vf_dev;
+
+ return oct->hw_ops.ioq_intr_handler(ioq_vector);
+}
+
+/**
+ * octep_vf_request_irqs() - Register interrupt handlers.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Register handlers for all queue and non-queue interrupts.
+ *
+ * Return: 0, on successful registration of all interrupt handlers.
+ * -1, on any error.
+ */
+static int octep_vf_request_irqs(struct octep_vf_device *oct)
+{
+ struct net_device *netdev = oct->netdev;
+ struct octep_vf_ioq_vector *ioq_vector;
+ struct msix_entry *msix_entry;
+ int ret, i;
+
+ /* Request IRQs for Tx/Rx queues */
+ for (i = 0; i < oct->num_oqs; i++) {
+ ioq_vector = oct->ioq_vector[i];
+ msix_entry = &oct->msix_entries[i];
+
+ snprintf(ioq_vector->name, sizeof(ioq_vector->name),
+ "%s-q%d", netdev->name, i);
+ ret = request_irq(msix_entry->vector,
+ octep_vf_ioq_intr_handler, 0,
+ ioq_vector->name, ioq_vector);
+ if (ret) {
+ netdev_err(netdev,
+ "request_irq failed for Q-%d; err=%d",
+ i, ret);
+ goto ioq_irq_err;
+ }
+
+ cpumask_set_cpu(i % num_online_cpus(),
+ &ioq_vector->affinity_mask);
+ irq_set_affinity_hint(msix_entry->vector,
+ &ioq_vector->affinity_mask);
+ }
+
+ return 0;
+ioq_irq_err:
+ while (i) {
+ --i;
+ free_irq(oct->msix_entries[i].vector, oct);
+ }
+ return -1;
+}
+
+/**
+ * octep_vf_free_irqs() - free all registered interrupts.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Free all queue and non-queue interrupts of the Octeon device.
+ */
+static void octep_vf_free_irqs(struct octep_vf_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_irqs; i++) {
+ irq_set_affinity_hint(oct->msix_entries[i].vector, NULL);
+ free_irq(oct->msix_entries[i].vector, oct->ioq_vector[i]);
+ }
+ netdev_info(oct->netdev, "IRQs freed\n");
+}
+
+/**
+ * octep_vf_setup_irqs() - setup interrupts for the Octeon device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Allocate data structures to hold per interrupt information, allocate/enable
+ * MSI-x interrupt and register interrupt handlers.
+ *
+ * Return: 0, on successful allocation and registration of all interrupts.
+ * -1, on any error.
+ */
+static int octep_vf_setup_irqs(struct octep_vf_device *oct)
+{
+ if (octep_vf_alloc_ioq_vectors(oct))
+ goto ioq_vector_err;
+
+ if (octep_vf_enable_msix_range(oct))
+ goto enable_msix_err;
+
+ if (octep_vf_request_irqs(oct))
+ goto request_irq_err;
+
+ return 0;
+
+request_irq_err:
+ octep_vf_disable_msix(oct);
+enable_msix_err:
+ octep_vf_free_ioq_vectors(oct);
+ioq_vector_err:
+ return -1;
+}
+
+/**
+ * octep_vf_clean_irqs() - free all interrupts and its resources.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_vf_clean_irqs(struct octep_vf_device *oct)
+{
+ octep_vf_free_irqs(oct);
+ octep_vf_disable_msix(oct);
+ octep_vf_free_ioq_vectors(oct);
+}
+
+/**
+ * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue.
+ *
+ * @iq: Octeon Tx queue data structure.
+ * @oq: Octeon Rx queue data structure.
+ */
+static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, struct octep_vf_oq *oq)
+{
+ u32 pkts_pend = oq->pkts_pending;
+
+ netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no);
+ if (iq->pkts_processed) {
+ writel(iq->pkts_processed, iq->inst_cnt_reg);
+ iq->pkt_in_done -= iq->pkts_processed;
+ iq->pkts_processed = 0;
+ }
+ if (oq->last_pkt_count - pkts_pend) {
+ writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg);
+ oq->last_pkt_count = pkts_pend;
+ }
+
+ /* Flush the previous wrties before writing to RESEND bit */
+ smp_wmb();
+ writeq(1UL << OCTEP_VF_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg);
+ writeq(1UL << OCTEP_VF_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg);
+}
+
+/**
+ * octep_vf_napi_poll() - NAPI poll function for Tx/Rx.
+ *
+ * @napi: pointer to napi context.
+ * @budget: max number of packets to be processed in single invocation.
+ */
+static int octep_vf_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct octep_vf_ioq_vector *ioq_vector =
+ container_of(napi, struct octep_vf_ioq_vector, napi);
+ u32 tx_pending, rx_done;
+
+ tx_pending = octep_vf_iq_process_completions(ioq_vector->iq, 64);
+ rx_done = octep_vf_oq_process_rx(ioq_vector->oq, budget);
+
+ /* need more polling if tx completion processing is still pending or
+ * processed at least 'budget' number of rx packets.
+ */
+ if (tx_pending || rx_done >= budget)
+ return budget;
+
+ if (likely(napi_complete_done(napi, rx_done)))
+ octep_vf_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq);
+
+ return rx_done;
+}
+
+/**
+ * octep_vf_napi_add() - Add NAPI poll for all Tx/Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_vf_napi_add(struct octep_vf_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ netdev_dbg(oct->netdev, "Adding NAPI on Q-%d\n", i);
+ netif_napi_add(oct->netdev, &oct->ioq_vector[i]->napi, octep_vf_napi_poll);
+ oct->oq[i]->napi = &oct->ioq_vector[i]->napi;
+ }
+}
+
+/**
+ * octep_vf_napi_delete() - delete NAPI poll callback for all Tx/Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_vf_napi_delete(struct octep_vf_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ netdev_dbg(oct->netdev, "Deleting NAPI on Q-%d\n", i);
+ netif_napi_del(&oct->ioq_vector[i]->napi);
+ oct->oq[i]->napi = NULL;
+ }
+}
+
+/**
+ * octep_vf_napi_enable() - enable NAPI for all Tx/Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_vf_napi_enable(struct octep_vf_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ netdev_dbg(oct->netdev, "Enabling NAPI on Q-%d\n", i);
+ napi_enable(&oct->ioq_vector[i]->napi);
+ }
+}
+
+/**
+ * octep_vf_napi_disable() - disable NAPI for all Tx/Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_vf_napi_disable(struct octep_vf_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ netdev_dbg(oct->netdev, "Disabling NAPI on Q-%d\n", i);
+ napi_disable(&oct->ioq_vector[i]->napi);
+ }
+}
+
+static void octep_vf_link_up(struct net_device *netdev)
+{
+ netif_carrier_on(netdev);
+ netif_tx_start_all_queues(netdev);
+}
+
+static void octep_vf_set_rx_state(struct octep_vf_device *oct, bool up)
+{
+ int err;
+
+ err = octep_vf_mbox_set_rx_state(oct, up);
+ if (err)
+ netdev_err(oct->netdev, "Set Rx state to %d failed with err:%d\n", up, err);
+}
+
+static int octep_vf_get_link_status(struct octep_vf_device *oct)
+{
+ int err;
+
+ err = octep_vf_mbox_get_link_status(oct, &oct->link_info.oper_up);
+ if (err)
+ netdev_err(oct->netdev, "Get link status failed with err:%d\n", err);
+ return oct->link_info.oper_up;
+}
+
+static void octep_vf_set_link_status(struct octep_vf_device *oct, bool up)
+{
+ int err;
+
+ err = octep_vf_mbox_set_link_status(oct, up);
+ if (err) {
+ netdev_err(oct->netdev, "Set link status to %d failed with err:%d\n", up, err);
+ return;
+ }
+ oct->link_info.oper_up = up;
+}
+
+/**
+ * octep_vf_open() - start the octeon network device.
+ *
+ * @netdev: pointer to kernel network device.
+ *
+ * setup Tx/Rx queues, interrupts and enable hardware operation of Tx/Rx queues
+ * and interrupts..
+ *
+ * Return: 0, on successfully setting up device and bring it up.
+ * -1, on any error.
+ */
+static int octep_vf_open(struct net_device *netdev)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+ int err, ret;
+
+ netdev_info(netdev, "Starting netdev ...\n");
+ netif_carrier_off(netdev);
+
+ oct->hw_ops.reset_io_queues(oct);
+
+ if (octep_vf_setup_iqs(oct))
+ goto setup_iq_err;
+ if (octep_vf_setup_oqs(oct))
+ goto setup_oq_err;
+ if (octep_vf_setup_irqs(oct))
+ goto setup_irq_err;
+
+ err = netif_set_real_num_tx_queues(netdev, oct->num_oqs);
+ if (err)
+ goto set_queues_err;
+ err = netif_set_real_num_rx_queues(netdev, oct->num_iqs);
+ if (err)
+ goto set_queues_err;
+
+ octep_vf_napi_add(oct);
+ octep_vf_napi_enable(oct);
+
+ oct->link_info.admin_up = 1;
+ octep_vf_set_rx_state(oct, true);
+
+ ret = octep_vf_get_link_status(oct);
+ if (!ret)
+ octep_vf_set_link_status(oct, true);
+
+ /* Enable the input and output queues for this Octeon device */
+ oct->hw_ops.enable_io_queues(oct);
+
+ /* Enable Octeon device interrupts */
+ oct->hw_ops.enable_interrupts(oct);
+
+ octep_vf_oq_dbell_init(oct);
+
+ ret = octep_vf_get_link_status(oct);
+ if (ret)
+ octep_vf_link_up(netdev);
+
+ return 0;
+
+set_queues_err:
+ octep_vf_napi_disable(oct);
+ octep_vf_napi_delete(oct);
+ octep_vf_clean_irqs(oct);
+setup_irq_err:
+ octep_vf_free_oqs(oct);
+setup_oq_err:
+ octep_vf_free_iqs(oct);
+setup_iq_err:
+ return -1;
+}
+
+/**
+ * octep_vf_stop() - stop the octeon network device.
+ *
+ * @netdev: pointer to kernel network device.
+ *
+ * stop the device Tx/Rx operations, bring down the link and
+ * free up all resources allocated for Tx/Rx queues and interrupts.
+ */
+static int octep_vf_stop(struct net_device *netdev)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+
+ netdev_info(netdev, "Stopping the device ...\n");
+
+ /* Stop Tx from stack */
+ netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
+
+ octep_vf_set_link_status(oct, false);
+ octep_vf_set_rx_state(oct, false);
+
+ oct->link_info.admin_up = 0;
+ oct->link_info.oper_up = 0;
+
+ oct->hw_ops.disable_interrupts(oct);
+ octep_vf_napi_disable(oct);
+ octep_vf_napi_delete(oct);
+
+ octep_vf_clean_irqs(oct);
+ octep_vf_clean_iqs(oct);
+
+ oct->hw_ops.disable_io_queues(oct);
+ oct->hw_ops.reset_io_queues(oct);
+ octep_vf_free_oqs(oct);
+ octep_vf_free_iqs(oct);
+ netdev_info(netdev, "Device stopped !!\n");
+ return 0;
+}
+
+/**
+ * octep_vf_iq_full_check() - check if a Tx queue is full.
+ *
+ * @iq: Octeon Tx queue data structure.
+ *
+ * Return: 0, if the Tx queue is not full.
+ * 1, if the Tx queue is full.
+ */
+static int octep_vf_iq_full_check(struct octep_vf_iq *iq)
+{
+ int ret;
+
+ ret = netif_subqueue_maybe_stop(iq->netdev, iq->q_no, IQ_INSTR_SPACE(iq),
+ OCTEP_VF_WAKE_QUEUE_THRESHOLD,
+ OCTEP_VF_WAKE_QUEUE_THRESHOLD);
+ switch (ret) {
+ case 0: /* Stopped the queue, since IQ is full */
+ return 1;
+ case -1: /*
+ * Pending updates in write index from
+ * iq_process_completion in other cpus
+ * caused queues to get re-enabled after
+ * being stopped
+ */
+ iq->stats.restart_cnt++;
+ fallthrough;
+ case 1: /* Queue left enabled, since IQ is not yet full*/
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * octep_vf_start_xmit() - Enqueue packet to Octoen hardware Tx Queue.
+ *
+ * @skb: packet skbuff pointer.
+ * @netdev: kernel network device.
+ *
+ * Return: NETDEV_TX_BUSY, if Tx Queue is full.
+ * NETDEV_TX_OK, if successfully enqueued to hardware Tx queue.
+ */
+static netdev_tx_t octep_vf_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+ netdev_features_t feat = netdev->features;
+ struct octep_vf_tx_sglist_desc *sglist;
+ struct octep_vf_tx_buffer *tx_buffer;
+ struct octep_vf_tx_desc_hw *hw_desc;
+ struct skb_shared_info *shinfo;
+ struct octep_vf_instr_hdr *ih;
+ struct octep_vf_iq *iq;
+ skb_frag_t *frag;
+ u16 nr_frags, si;
+ int xmit_more;
+ u16 q_no, wi;
+
+ if (skb_put_padto(skb, ETH_ZLEN))
+ return NETDEV_TX_OK;
+
+ q_no = skb_get_queue_mapping(skb);
+ if (q_no >= oct->num_iqs) {
+ netdev_err(netdev, "Invalid Tx skb->queue_mapping=%d\n", q_no);
+ q_no = q_no % oct->num_iqs;
+ }
+
+ iq = oct->iq[q_no];
+
+ shinfo = skb_shinfo(skb);
+ nr_frags = shinfo->nr_frags;
+
+ wi = iq->host_write_index;
+ hw_desc = &iq->desc_ring[wi];
+ hw_desc->ih64 = 0;
+
+ tx_buffer = iq->buff_info + wi;
+ tx_buffer->skb = skb;
+
+ ih = &hw_desc->ih;
+ ih->tlen = skb->len;
+ ih->pkind = oct->fw_info.pkind;
+ ih->fsz = oct->fw_info.fsz;
+ ih->tlen = skb->len + ih->fsz;
+
+ if (!nr_frags) {
+ tx_buffer->gather = 0;
+ tx_buffer->dma = dma_map_single(iq->dev, skb->data,
+ skb->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(iq->dev, tx_buffer->dma))
+ goto dma_map_err;
+ hw_desc->dptr = tx_buffer->dma;
+ } else {
+ /* Scatter/Gather */
+ dma_addr_t dma;
+ u16 len;
+
+ sglist = tx_buffer->sglist;
+
+ ih->gsz = nr_frags + 1;
+ ih->gather = 1;
+ tx_buffer->gather = 1;
+
+ len = skb_headlen(skb);
+ dma = dma_map_single(iq->dev, skb->data, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(iq->dev, dma))
+ goto dma_map_err;
+
+ memset(sglist, 0, OCTEP_VF_SGLIST_SIZE_PER_PKT);
+ sglist[0].len[3] = len;
+ sglist[0].dma_ptr[0] = dma;
+
+ si = 1; /* entry 0 is main skb, mapped above */
+ frag = &shinfo->frags[0];
+ while (nr_frags--) {
+ len = skb_frag_size(frag);
+ dma = skb_frag_dma_map(iq->dev, frag, 0,
+ len, DMA_TO_DEVICE);
+ if (dma_mapping_error(iq->dev, dma))
+ goto dma_map_sg_err;
+
+ sglist[si >> 2].len[3 - (si & 3)] = len;
+ sglist[si >> 2].dma_ptr[si & 3] = dma;
+
+ frag++;
+ si++;
+ }
+ hw_desc->dptr = tx_buffer->sglist_dma;
+ }
+ if (oct->fw_info.tx_ol_flags) {
+ if ((feat & (NETIF_F_TSO)) && (skb_is_gso(skb))) {
+ hw_desc->txm.ol_flags = OCTEP_VF_TX_OFFLOAD_CKSUM;
+ hw_desc->txm.ol_flags |= OCTEP_VF_TX_OFFLOAD_TSO;
+ hw_desc->txm.gso_size = skb_shinfo(skb)->gso_size;
+ hw_desc->txm.gso_segs = skb_shinfo(skb)->gso_segs;
+ } else if (feat & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
+ hw_desc->txm.ol_flags = OCTEP_VF_TX_OFFLOAD_CKSUM;
+ }
+ /* due to ESR txm will be swapped by hw */
+ hw_desc->txm64[0] = (__force u64)cpu_to_be64(hw_desc->txm64[0]);
+ }
+
+ xmit_more = netdev_xmit_more();
+
+ netdev_tx_sent_queue(iq->netdev_q, skb->len);
+
+ skb_tx_timestamp(skb);
+ iq->fill_cnt++;
+ wi++;
+ iq->host_write_index = wi & iq->ring_size_mask;
+
+ /* octep_iq_full_check stops the queue and returns
+ * true if so, in case the queue has become full
+ * by inserting current packet. If so, we can
+ * go ahead and ring doorbell.
+ */
+ if (!octep_vf_iq_full_check(iq) && xmit_more &&
+ iq->fill_cnt < iq->fill_threshold)
+ return NETDEV_TX_OK;
+
+ goto ring_dbell;
+
+dma_map_sg_err:
+ if (si > 0) {
+ dma_unmap_single(iq->dev, sglist[0].dma_ptr[0],
+ sglist[0].len[0], DMA_TO_DEVICE);
+ sglist[0].len[0] = 0;
+ }
+ while (si > 1) {
+ dma_unmap_page(iq->dev, sglist[si >> 2].dma_ptr[si & 3],
+ sglist[si >> 2].len[si & 3], DMA_TO_DEVICE);
+ sglist[si >> 2].len[si & 3] = 0;
+ si--;
+ }
+ tx_buffer->gather = 0;
+dma_map_err:
+ dev_kfree_skb_any(skb);
+ring_dbell:
+ /* Flush the hw descriptors before writing to doorbell */
+ smp_wmb();
+ writel(iq->fill_cnt, iq->doorbell_reg);
+ iq->stats.instr_posted += iq->fill_cnt;
+ iq->fill_cnt = 0;
+ return NETDEV_TX_OK;
+}
+
+int octep_vf_get_if_stats(struct octep_vf_device *oct)
+{
+ struct octep_vf_iface_rxtx_stats vf_stats;
+ int ret, size;
+
+ memset(&vf_stats, 0, sizeof(struct octep_vf_iface_rxtx_stats));
+ ret = octep_vf_mbox_bulk_read(oct, OCTEP_PFVF_MBOX_CMD_GET_STATS,
+ (u8 *)&vf_stats, &size);
+
+ if (ret)
+ return ret;
+
+ memcpy(&oct->iface_rx_stats, &vf_stats.iface_rx_stats,
+ sizeof(struct octep_vf_iface_rx_stats));
+ memcpy(&oct->iface_tx_stats, &vf_stats.iface_tx_stats,
+ sizeof(struct octep_vf_iface_tx_stats));
+
+ return 0;
+}
+
+int octep_vf_get_link_info(struct octep_vf_device *oct)
+{
+ int ret, size;
+
+ ret = octep_vf_mbox_bulk_read(oct, OCTEP_PFVF_MBOX_CMD_GET_LINK_INFO,
+ (u8 *)&oct->link_info, &size);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "Get VF link info failed via VF Mbox\n");
+ return ret;
+ }
+ return 0;
+}
+
+/**
+ * octep_vf_get_stats64() - Get Octeon network device statistics.
+ *
+ * @netdev: kernel network device.
+ * @stats: pointer to stats structure to be filled in.
+ */
+static void octep_vf_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+ u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+ int q;
+
+ tx_packets = 0;
+ tx_bytes = 0;
+ rx_packets = 0;
+ rx_bytes = 0;
+ for (q = 0; q < oct->num_oqs; q++) {
+ struct octep_vf_iq *iq = oct->iq[q];
+ struct octep_vf_oq *oq = oct->oq[q];
+
+ tx_packets += iq->stats.instr_completed;
+ tx_bytes += iq->stats.bytes_sent;
+ rx_packets += oq->stats.packets;
+ rx_bytes += oq->stats.bytes;
+ }
+ stats->tx_packets = tx_packets;
+ stats->tx_bytes = tx_bytes;
+ stats->rx_packets = rx_packets;
+ stats->rx_bytes = rx_bytes;
+ if (!octep_vf_get_if_stats(oct)) {
+ stats->multicast = oct->iface_rx_stats.mcast_pkts;
+ stats->rx_errors = oct->iface_rx_stats.err_pkts;
+ stats->rx_dropped = oct->iface_rx_stats.dropped_pkts_fifo_full +
+ oct->iface_rx_stats.err_pkts;
+ stats->rx_missed_errors = oct->iface_rx_stats.dropped_pkts_fifo_full;
+ stats->tx_dropped = oct->iface_tx_stats.dropped;
+ }
+}
+
+/**
+ * octep_vf_tx_timeout_task - work queue task to Handle Tx queue timeout.
+ *
+ * @work: pointer to Tx queue timeout work_struct
+ *
+ * Stop and start the device so that it frees up all queue resources
+ * and restarts the queues, that potentially clears a Tx queue timeout
+ * condition.
+ **/
+static void octep_vf_tx_timeout_task(struct work_struct *work)
+{
+ struct octep_vf_device *oct = container_of(work, struct octep_vf_device,
+ tx_timeout_task);
+ struct net_device *netdev = oct->netdev;
+
+ rtnl_lock();
+ if (netif_running(netdev)) {
+ octep_vf_stop(netdev);
+ octep_vf_open(netdev);
+ }
+ rtnl_unlock();
+ netdev_put(netdev, NULL);
+}
+
+/**
+ * octep_vf_tx_timeout() - Handle Tx Queue timeout.
+ *
+ * @netdev: pointer to kernel network device.
+ * @txqueue: Timed out Tx queue number.
+ *
+ * Schedule a work to handle Tx queue timeout.
+ */
+static void octep_vf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+
+ netdev_hold(netdev, NULL, GFP_ATOMIC);
+ schedule_work(&oct->tx_timeout_task);
+}
+
+static int octep_vf_set_mac(struct net_device *netdev, void *p)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+ struct sockaddr *addr = (struct sockaddr *)p;
+ int err;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ err = octep_vf_mbox_set_mac_addr(oct, addr->sa_data);
+ if (err)
+ return err;
+
+ memcpy(oct->mac_addr, addr->sa_data, ETH_ALEN);
+ eth_hw_addr_set(netdev, addr->sa_data);
+
+ return 0;
+}
+
+static int octep_vf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+ struct octep_vf_iface_link_info *link_info;
+ int err;
+
+ link_info = &oct->link_info;
+ if (link_info->mtu == new_mtu)
+ return 0;
+
+ err = octep_vf_mbox_set_mtu(oct, new_mtu);
+ if (!err) {
+ oct->link_info.mtu = new_mtu;
+ netdev->mtu = new_mtu;
+ }
+ return err;
+}
+
+static int octep_vf_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct octep_vf_device *oct = netdev_priv(netdev);
+ u16 rx_offloads = 0, tx_offloads = 0;
+ int err;
+
+ /* We only support features received from firmware */
+ if ((features & netdev->hw_features) != features)
+ return -EINVAL;
+
+ if (features & NETIF_F_TSO)
+ tx_offloads |= OCTEP_VF_TX_OFFLOAD_TSO;
+
+ if (features & NETIF_F_TSO6)
+ tx_offloads |= OCTEP_VF_TX_OFFLOAD_TSO;
+
+ if (features & NETIF_F_IP_CSUM)
+ tx_offloads |= OCTEP_VF_TX_OFFLOAD_CKSUM;
+
+ if (features & NETIF_F_IPV6_CSUM)
+ tx_offloads |= OCTEP_VF_TX_OFFLOAD_CKSUM;
+
+ if (features & NETIF_F_RXCSUM)
+ rx_offloads |= OCTEP_VF_RX_OFFLOAD_CKSUM;
+
+ err = octep_vf_mbox_set_offloads(oct, tx_offloads, rx_offloads);
+ if (!err)
+ netdev->features = features;
+
+ return err;
+}
+
+static const struct net_device_ops octep_vf_netdev_ops = {
+ .ndo_open = octep_vf_open,
+ .ndo_stop = octep_vf_stop,
+ .ndo_start_xmit = octep_vf_start_xmit,
+ .ndo_get_stats64 = octep_vf_get_stats64,
+ .ndo_tx_timeout = octep_vf_tx_timeout,
+ .ndo_set_mac_address = octep_vf_set_mac,
+ .ndo_change_mtu = octep_vf_change_mtu,
+ .ndo_set_features = octep_vf_set_features,
+};
+
+static const char *octep_vf_devid_to_str(struct octep_vf_device *oct)
+{
+ switch (oct->chip_id) {
+ case OCTEP_PCI_DEVICE_ID_CN93_VF:
+ return "CN93XX";
+ case OCTEP_PCI_DEVICE_ID_CNF95N_VF:
+ return "CNF95N";
+ case OCTEP_PCI_DEVICE_ID_CN10KA_VF:
+ return "CN10KA";
+ case OCTEP_PCI_DEVICE_ID_CNF10KA_VF:
+ return "CNF10KA";
+ case OCTEP_PCI_DEVICE_ID_CNF10KB_VF:
+ return "CNF10KB";
+ case OCTEP_PCI_DEVICE_ID_CN10KB_VF:
+ return "CN10KB";
+ default:
+ return "Unsupported";
+ }
+}
+
+/**
+ * octep_vf_device_setup() - Setup Octeon Device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Setup Octeon device hardware operations, configuration, etc ...
+ */
+int octep_vf_device_setup(struct octep_vf_device *oct)
+{
+ struct pci_dev *pdev = oct->pdev;
+
+ /* allocate memory for oct->conf */
+ oct->conf = kzalloc(sizeof(*oct->conf), GFP_KERNEL);
+ if (!oct->conf)
+ return -ENOMEM;
+
+ /* Map BAR region 0 */
+ oct->mmio.hw_addr = ioremap(pci_resource_start(oct->pdev, 0),
+ pci_resource_len(oct->pdev, 0));
+ if (!oct->mmio.hw_addr) {
+ dev_err(&pdev->dev,
+ "Failed to remap BAR0; start=0x%llx len=0x%llx\n",
+ pci_resource_start(oct->pdev, 0),
+ pci_resource_len(oct->pdev, 0));
+ goto ioremap_err;
+ }
+ oct->mmio.mapped = 1;
+
+ oct->chip_id = pdev->device;
+ oct->rev_id = pdev->revision;
+ dev_info(&pdev->dev, "chip_id = 0x%x\n", pdev->device);
+
+ switch (oct->chip_id) {
+ case OCTEP_PCI_DEVICE_ID_CN93_VF:
+ case OCTEP_PCI_DEVICE_ID_CNF95N_VF:
+ case OCTEP_PCI_DEVICE_ID_CN98_VF:
+ dev_info(&pdev->dev, "Setting up OCTEON %s VF PASS%d.%d\n",
+ octep_vf_devid_to_str(oct), OCTEP_VF_MAJOR_REV(oct),
+ OCTEP_VF_MINOR_REV(oct));
+ octep_vf_device_setup_cn93(oct);
+ break;
+ case OCTEP_PCI_DEVICE_ID_CNF10KA_VF:
+ case OCTEP_PCI_DEVICE_ID_CN10KA_VF:
+ case OCTEP_PCI_DEVICE_ID_CNF10KB_VF:
+ case OCTEP_PCI_DEVICE_ID_CN10KB_VF:
+ dev_info(&pdev->dev, "Setting up OCTEON %s VF PASS%d.%d\n",
+ octep_vf_devid_to_str(oct), OCTEP_VF_MAJOR_REV(oct),
+ OCTEP_VF_MINOR_REV(oct));
+ octep_vf_device_setup_cnxk(oct);
+ break;
+ default:
+ dev_err(&pdev->dev, "Unsupported device\n");
+ goto unsupported_dev;
+ }
+
+ return 0;
+
+unsupported_dev:
+ iounmap(oct->mmio.hw_addr);
+ioremap_err:
+ kfree(oct->conf);
+ return -EOPNOTSUPP;
+}
+
+/**
+ * octep_vf_device_cleanup() - Cleanup Octeon Device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Cleanup Octeon device allocated resources.
+ */
+static void octep_vf_device_cleanup(struct octep_vf_device *oct)
+{
+ dev_info(&oct->pdev->dev, "Cleaning up Octeon Device ...\n");
+
+ if (oct->mmio.mapped)
+ iounmap(oct->mmio.hw_addr);
+
+ kfree(oct->conf);
+ oct->conf = NULL;
+}
+
+static int octep_vf_get_mac_addr(struct octep_vf_device *oct, u8 *addr)
+{
+ return octep_vf_mbox_get_mac_addr(oct, addr);
+}
+
+/**
+ * octep_vf_probe() - Octeon PCI device probe handler.
+ *
+ * @pdev: PCI device structure.
+ * @ent: entry in Octeon PCI device ID table.
+ *
+ * Initializes and enables the Octeon PCI device for network operations.
+ * Initializes Octeon private data structure and registers a network device.
+ */
+static int octep_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct octep_vf_device *octep_vf_dev;
+ struct net_device *netdev;
+ int err;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to enable PCI device\n");
+ return err;
+ }
+
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(&pdev->dev, "Failed to set DMA mask !!\n");
+ goto disable_pci_device;
+ }
+
+ err = pci_request_mem_regions(pdev, OCTEP_VF_DRV_NAME);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to map PCI memory regions\n");
+ goto disable_pci_device;
+ }
+
+ pci_set_master(pdev);
+
+ netdev = alloc_etherdev_mq(sizeof(struct octep_vf_device),
+ OCTEP_VF_MAX_QUEUES);
+ if (!netdev) {
+ dev_err(&pdev->dev, "Failed to allocate netdev\n");
+ err = -ENOMEM;
+ goto mem_regions_release;
+ }
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+
+ octep_vf_dev = netdev_priv(netdev);
+ octep_vf_dev->netdev = netdev;
+ octep_vf_dev->pdev = pdev;
+ octep_vf_dev->dev = &pdev->dev;
+ pci_set_drvdata(pdev, octep_vf_dev);
+
+ err = octep_vf_device_setup(octep_vf_dev);
+ if (err) {
+ dev_err(&pdev->dev, "Device setup failed\n");
+ goto netdevice_free;
+ }
+ INIT_WORK(&octep_vf_dev->tx_timeout_task, octep_vf_tx_timeout_task);
+
+ netdev->netdev_ops = &octep_vf_netdev_ops;
+ octep_vf_set_ethtool_ops(netdev);
+ netif_carrier_off(netdev);
+
+ if (octep_vf_setup_mbox(octep_vf_dev)) {
+ dev_err(&pdev->dev, "VF Mailbox setup failed\n");
+ err = -ENOMEM;
+ goto device_cleanup;
+ }
+
+ if (octep_vf_mbox_version_check(octep_vf_dev)) {
+ dev_err(&pdev->dev, "PF VF Mailbox version mismatch\n");
+ err = -EINVAL;
+ goto delete_mbox;
+ }
+
+ if (octep_vf_mbox_get_fw_info(octep_vf_dev)) {
+ dev_err(&pdev->dev, "unable to get fw info\n");
+ err = -EINVAL;
+ goto delete_mbox;
+ }
+
+ netdev->hw_features = NETIF_F_SG;
+ if (OCTEP_VF_TX_IP_CSUM(octep_vf_dev->fw_info.tx_ol_flags))
+ netdev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+
+ if (OCTEP_VF_RX_IP_CSUM(octep_vf_dev->fw_info.rx_ol_flags))
+ netdev->hw_features |= NETIF_F_RXCSUM;
+
+ netdev->min_mtu = OCTEP_VF_MIN_MTU;
+ netdev->max_mtu = OCTEP_VF_MAX_MTU;
+ netdev->mtu = OCTEP_VF_DEFAULT_MTU;
+
+ if (OCTEP_VF_TX_TSO(octep_vf_dev->fw_info.tx_ol_flags)) {
+ netdev->hw_features |= NETIF_F_TSO;
+ netif_set_tso_max_size(netdev, netdev->max_mtu);
+ }
+
+ netdev->features |= netdev->hw_features;
+ octep_vf_get_mac_addr(octep_vf_dev, octep_vf_dev->mac_addr);
+ eth_hw_addr_set(netdev, octep_vf_dev->mac_addr);
+ err = register_netdev(netdev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to register netdev\n");
+ goto delete_mbox;
+ }
+ dev_info(&pdev->dev, "Device probe successful\n");
+ return 0;
+
+delete_mbox:
+ octep_vf_delete_mbox(octep_vf_dev);
+device_cleanup:
+ octep_vf_device_cleanup(octep_vf_dev);
+netdevice_free:
+ free_netdev(netdev);
+mem_regions_release:
+ pci_release_mem_regions(pdev);
+disable_pci_device:
+ pci_disable_device(pdev);
+ dev_err(&pdev->dev, "Device probe failed\n");
+ return err;
+}
+
+/**
+ * octep_vf_remove() - Remove Octeon PCI device from driver control.
+ *
+ * @pdev: PCI device structure of the Octeon device.
+ *
+ * Cleanup all resources allocated for the Octeon device.
+ * Unregister from network device and disable the PCI device.
+ */
+static void octep_vf_remove(struct pci_dev *pdev)
+{
+ struct octep_vf_device *oct = pci_get_drvdata(pdev);
+ struct net_device *netdev;
+
+ if (!oct)
+ return;
+
+ octep_vf_mbox_dev_remove(oct);
+ cancel_work_sync(&oct->tx_timeout_task);
+ netdev = oct->netdev;
+ if (netdev->reg_state == NETREG_REGISTERED)
+ unregister_netdev(netdev);
+ octep_vf_delete_mbox(oct);
+ octep_vf_device_cleanup(oct);
+ pci_release_mem_regions(pdev);
+ free_netdev(netdev);
+ pci_disable_device(pdev);
+}
+
+static struct pci_driver octep_vf_driver = {
+ .name = OCTEP_VF_DRV_NAME,
+ .id_table = octep_vf_pci_id_tbl,
+ .probe = octep_vf_probe,
+ .remove = octep_vf_remove,
+};
+
+/**
+ * octep_vf_init_module() - Module initialization.
+ *
+ * create common resource for the driver and register PCI driver.
+ */
+static int __init octep_vf_init_module(void)
+{
+ int ret;
+
+ pr_info("%s: Loading %s ...\n", OCTEP_VF_DRV_NAME, OCTEP_VF_DRV_STRING);
+
+ ret = pci_register_driver(&octep_vf_driver);
+ if (ret < 0) {
+ pr_err("%s: Failed to register PCI driver; err=%d\n",
+ OCTEP_VF_DRV_NAME, ret);
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * octep_vf_exit_module() - Module exit routine.
+ *
+ * unregister the driver with PCI subsystem and cleanup common resources.
+ */
+static void __exit octep_vf_exit_module(void)
+{
+ pr_info("%s: Unloading ...\n", OCTEP_VF_DRV_NAME);
+
+ pci_unregister_driver(&octep_vf_driver);
+
+ pr_info("%s: Unloading complete\n", OCTEP_VF_DRV_NAME);
+}
+
+module_init(octep_vf_init_module);
+module_exit(octep_vf_exit_module);
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h
new file mode 100644
index 000000000000..5769f62545cd
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_VF_MAIN_H_
+#define _OCTEP_VF_MAIN_H_
+
+#include "octep_vf_tx.h"
+#include "octep_vf_rx.h"
+#include "octep_vf_mbox.h"
+
+#define OCTEP_VF_DRV_NAME "octeon_ep_vf"
+#define OCTEP_VF_DRV_STRING "Marvell Octeon EndPoint NIC VF Driver"
+
+#define OCTEP_PCI_DEVICE_ID_CN93_VF 0xB203 //93xx VF
+#define OCTEP_PCI_DEVICE_ID_CNF95N_VF 0xB403 //95N VF
+#define OCTEP_PCI_DEVICE_ID_CN98_VF 0xB103
+#define OCTEP_PCI_DEVICE_ID_CN10KA_VF 0xB903
+#define OCTEP_PCI_DEVICE_ID_CNF10KA_VF 0xBA03
+#define OCTEP_PCI_DEVICE_ID_CNF10KB_VF 0xBC03
+#define OCTEP_PCI_DEVICE_ID_CN10KB_VF 0xBD03
+
+#define OCTEP_VF_MAX_QUEUES 63
+#define OCTEP_VF_MAX_IQ OCTEP_VF_MAX_QUEUES
+#define OCTEP_VF_MAX_OQ OCTEP_VF_MAX_QUEUES
+
+#define OCTEP_VF_MAX_MSIX_VECTORS OCTEP_VF_MAX_OQ
+
+#define OCTEP_VF_IQ_INTR_RESEND_BIT 59
+#define OCTEP_VF_OQ_INTR_RESEND_BIT 59
+
+#define IQ_INSTR_PENDING(iq) ({ typeof(iq) iq__ = (iq); \
+ ((iq__)->host_write_index - (iq__)->flush_index) & \
+ (iq__)->ring_size_mask; \
+ })
+#define IQ_INSTR_SPACE(iq) ({ typeof(iq) iq_ = (iq); \
+ (iq_)->max_count - IQ_INSTR_PENDING(iq_); \
+ })
+
+/* PCI address space mapping information.
+ * Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of
+ * Octeon gets mapped to different physical address spaces in
+ * the kernel.
+ */
+struct octep_vf_mmio {
+ /* The physical address to which the PCI address space is mapped. */
+ u8 __iomem *hw_addr;
+
+ /* Flag indicating the mapping was successful. */
+ int mapped;
+};
+
+struct octep_vf_hw_ops {
+ void (*setup_iq_regs)(struct octep_vf_device *oct, int q);
+ void (*setup_oq_regs)(struct octep_vf_device *oct, int q);
+ void (*setup_mbox_regs)(struct octep_vf_device *oct, int mbox);
+
+ irqreturn_t (*non_ioq_intr_handler)(void *ioq_vector);
+ irqreturn_t (*ioq_intr_handler)(void *ioq_vector);
+ void (*reinit_regs)(struct octep_vf_device *oct);
+ u32 (*update_iq_read_idx)(struct octep_vf_iq *iq);
+
+ void (*enable_interrupts)(struct octep_vf_device *oct);
+ void (*disable_interrupts)(struct octep_vf_device *oct);
+
+ void (*enable_io_queues)(struct octep_vf_device *oct);
+ void (*disable_io_queues)(struct octep_vf_device *oct);
+ void (*enable_iq)(struct octep_vf_device *oct, int q);
+ void (*disable_iq)(struct octep_vf_device *oct, int q);
+ void (*enable_oq)(struct octep_vf_device *oct, int q);
+ void (*disable_oq)(struct octep_vf_device *oct, int q);
+ void (*reset_io_queues)(struct octep_vf_device *oct);
+ void (*dump_registers)(struct octep_vf_device *oct);
+};
+
+/* Octeon mailbox data */
+struct octep_vf_mbox_data {
+ /* Holds the offset of received data via mailbox. */
+ u32 data_index;
+
+ /* Holds the received data via mailbox. */
+ u8 recv_data[OCTEP_PFVF_MBOX_MAX_DATA_BUF_SIZE];
+};
+
+/* wrappers around work structs */
+struct octep_vf_mbox_wk {
+ struct work_struct work;
+ void *ctxptr;
+};
+
+/* Octeon device mailbox */
+struct octep_vf_mbox {
+ /* A mutex to protect access to this q_mbox. */
+ struct mutex lock;
+
+ u32 state;
+
+ /* SLI_MAC_PF_MBOX_INT for PF, SLI_PKT_MBOX_INT for VF. */
+ u8 __iomem *mbox_int_reg;
+
+ /* SLI_PKT_PF_VF_MBOX_SIG(0) for PF,
+ * SLI_PKT_PF_VF_MBOX_SIG(1) for VF.
+ */
+ u8 __iomem *mbox_write_reg;
+
+ /* SLI_PKT_PF_VF_MBOX_SIG(1) for PF,
+ * SLI_PKT_PF_VF_MBOX_SIG(0) for VF.
+ */
+ u8 __iomem *mbox_read_reg;
+
+ /* Octeon mailbox data */
+ struct octep_vf_mbox_data mbox_data;
+
+ /* Octeon mailbox work handler to process Mbox messages */
+ struct octep_vf_mbox_wk wk;
+};
+
+/* Tx/Rx queue vector per interrupt. */
+struct octep_vf_ioq_vector {
+ char name[OCTEP_VF_MSIX_NAME_SIZE];
+ struct napi_struct napi;
+ struct octep_vf_device *octep_vf_dev;
+ struct octep_vf_iq *iq;
+ struct octep_vf_oq *oq;
+ cpumask_t affinity_mask;
+};
+
+/* Octeon hardware/firmware offload capability flags. */
+#define OCTEP_VF_CAP_TX_CHECKSUM BIT(0)
+#define OCTEP_VF_CAP_RX_CHECKSUM BIT(1)
+#define OCTEP_VF_CAP_TSO BIT(2)
+
+/* Link modes */
+enum octep_vf_link_mode_bit_indices {
+ OCTEP_VF_LINK_MODE_10GBASE_T = 0,
+ OCTEP_VF_LINK_MODE_10GBASE_R,
+ OCTEP_VF_LINK_MODE_10GBASE_CR,
+ OCTEP_VF_LINK_MODE_10GBASE_KR,
+ OCTEP_VF_LINK_MODE_10GBASE_LR,
+ OCTEP_VF_LINK_MODE_10GBASE_SR,
+ OCTEP_VF_LINK_MODE_25GBASE_CR,
+ OCTEP_VF_LINK_MODE_25GBASE_KR,
+ OCTEP_VF_LINK_MODE_25GBASE_SR,
+ OCTEP_VF_LINK_MODE_40GBASE_CR4,
+ OCTEP_VF_LINK_MODE_40GBASE_KR4,
+ OCTEP_VF_LINK_MODE_40GBASE_LR4,
+ OCTEP_VF_LINK_MODE_40GBASE_SR4,
+ OCTEP_VF_LINK_MODE_50GBASE_CR2,
+ OCTEP_VF_LINK_MODE_50GBASE_KR2,
+ OCTEP_VF_LINK_MODE_50GBASE_SR2,
+ OCTEP_VF_LINK_MODE_50GBASE_CR,
+ OCTEP_VF_LINK_MODE_50GBASE_KR,
+ OCTEP_VF_LINK_MODE_50GBASE_LR,
+ OCTEP_VF_LINK_MODE_50GBASE_SR,
+ OCTEP_VF_LINK_MODE_100GBASE_CR4,
+ OCTEP_VF_LINK_MODE_100GBASE_KR4,
+ OCTEP_VF_LINK_MODE_100GBASE_LR4,
+ OCTEP_VF_LINK_MODE_100GBASE_SR4,
+ OCTEP_VF_LINK_MODE_NBITS
+};
+
+/* Hardware interface link state information. */
+struct octep_vf_iface_link_info {
+ /* Bitmap of Supported link speeds/modes. */
+ u64 supported_modes;
+
+ /* Bitmap of Advertised link speeds/modes. */
+ u64 advertised_modes;
+
+ /* Negotiated link speed in Mbps. */
+ u32 speed;
+
+ /* MTU */
+ u16 mtu;
+
+ /* Autonegotiation state. */
+#define OCTEP_VF_LINK_MODE_AUTONEG_SUPPORTED BIT(0)
+#define OCTEP_VF_LINK_MODE_AUTONEG_ADVERTISED BIT(1)
+ u8 autoneg;
+
+ /* Pause frames setting. */
+#define OCTEP_VF_LINK_MODE_PAUSE_SUPPORTED BIT(0)
+#define OCTEP_VF_LINK_MODE_PAUSE_ADVERTISED BIT(1)
+ u8 pause;
+
+ /* Admin state of the link (ifconfig <iface> up/down */
+ u8 admin_up;
+
+ /* Operational state of the link: physical link is up down */
+ u8 oper_up;
+};
+
+/* Hardware interface stats information. */
+struct octep_vf_iface_rxtx_stats {
+ /* Hardware Interface Rx statistics */
+ struct octep_vf_iface_rx_stats iface_rx_stats;
+
+ /* Hardware Interface Tx statistics */
+ struct octep_vf_iface_tx_stats iface_tx_stats;
+};
+
+struct octep_vf_fw_info {
+ /* pkind value to be used in every Tx hardware descriptor */
+ u8 pkind;
+ /* front size data */
+ u8 fsz;
+ /* supported rx offloads OCTEP_VF_RX_OFFLOAD_* */
+ u16 rx_ol_flags;
+ /* supported tx offloads OCTEP_VF_TX_OFFLOAD_* */
+ u16 tx_ol_flags;
+};
+
+/* The Octeon device specific private data structure.
+ * Each Octeon device has this structure to represent all its components.
+ */
+struct octep_vf_device {
+ struct octep_vf_config *conf;
+
+ /* Octeon Chip type. */
+ u16 chip_id;
+ u16 rev_id;
+
+ /* Device capabilities enabled */
+ u64 caps_enabled;
+ /* Device capabilities supported */
+ u64 caps_supported;
+
+ /* Pointer to basic Linux device */
+ struct device *dev;
+ /* Linux PCI device pointer */
+ struct pci_dev *pdev;
+ /* Netdev corresponding to the Octeon device */
+ struct net_device *netdev;
+
+ /* memory mapped io range */
+ struct octep_vf_mmio mmio;
+
+ /* MAC address */
+ u8 mac_addr[ETH_ALEN];
+
+ /* Tx queues (IQ: Instruction Queue) */
+ u16 num_iqs;
+ /* Pointers to Octeon Tx queues */
+ struct octep_vf_iq *iq[OCTEP_VF_MAX_IQ];
+
+ /* Rx queues (OQ: Output Queue) */
+ u16 num_oqs;
+ /* Pointers to Octeon Rx queues */
+ struct octep_vf_oq *oq[OCTEP_VF_MAX_OQ];
+
+ /* Hardware port number of the PCIe interface */
+ u16 pcie_port;
+
+ /* Hardware operations */
+ struct octep_vf_hw_ops hw_ops;
+
+ /* IRQ info */
+ u16 num_irqs;
+ u16 num_non_ioq_irqs;
+ char *non_ioq_irq_names;
+ struct msix_entry *msix_entries;
+ /* IOq information of it's corresponding MSI-X interrupt. */
+ struct octep_vf_ioq_vector *ioq_vector[OCTEP_VF_MAX_QUEUES];
+
+ /* Hardware Interface Tx statistics */
+ struct octep_vf_iface_tx_stats iface_tx_stats;
+ /* Hardware Interface Rx statistics */
+ struct octep_vf_iface_rx_stats iface_rx_stats;
+
+ /* Hardware Interface Link info like supported modes, aneg support */
+ struct octep_vf_iface_link_info link_info;
+
+ /* Mailbox to talk to VFs */
+ struct octep_vf_mbox *mbox;
+
+ /* Work entry to handle Tx timeout */
+ struct work_struct tx_timeout_task;
+
+ /* offset for iface stats */
+ u32 ctrl_mbox_ifstats_offset;
+
+ /* Negotiated Mbox version */
+ u32 mbox_neg_ver;
+
+ /* firmware info */
+ struct octep_vf_fw_info fw_info;
+};
+
+static inline u16 OCTEP_VF_MAJOR_REV(struct octep_vf_device *oct)
+{
+ u16 rev = (oct->rev_id & 0xC) >> 2;
+
+ return (rev == 0) ? 1 : rev;
+}
+
+static inline u16 OCTEP_VF_MINOR_REV(struct octep_vf_device *oct)
+{
+ return (oct->rev_id & 0x3);
+}
+
+/* Octeon CSR read/write access APIs */
+#define octep_vf_write_csr(octep_vf_dev, reg_off, value) \
+ writel(value, (octep_vf_dev)->mmio.hw_addr + (reg_off))
+
+#define octep_vf_write_csr64(octep_vf_dev, reg_off, val64) \
+ writeq(val64, (octep_vf_dev)->mmio.hw_addr + (reg_off))
+
+#define octep_vf_read_csr(octep_vf_dev, reg_off) \
+ readl((octep_vf_dev)->mmio.hw_addr + (reg_off))
+
+#define octep_vf_read_csr64(octep_vf_dev, reg_off) \
+ readq((octep_vf_dev)->mmio.hw_addr + (reg_off))
+
+extern struct workqueue_struct *octep_vf_wq;
+
+int octep_vf_device_setup(struct octep_vf_device *oct);
+int octep_vf_setup_iqs(struct octep_vf_device *oct);
+void octep_vf_free_iqs(struct octep_vf_device *oct);
+void octep_vf_clean_iqs(struct octep_vf_device *oct);
+int octep_vf_setup_oqs(struct octep_vf_device *oct);
+void octep_vf_free_oqs(struct octep_vf_device *oct);
+void octep_vf_oq_dbell_init(struct octep_vf_device *oct);
+void octep_vf_device_setup_cn93(struct octep_vf_device *oct);
+void octep_vf_device_setup_cnxk(struct octep_vf_device *oct);
+int octep_vf_iq_process_completions(struct octep_vf_iq *iq, u16 budget);
+int octep_vf_oq_process_rx(struct octep_vf_oq *oq, int budget);
+void octep_vf_set_ethtool_ops(struct net_device *netdev);
+int octep_vf_get_link_info(struct octep_vf_device *oct);
+int octep_vf_get_if_stats(struct octep_vf_device *oct);
+void octep_vf_mbox_work(struct work_struct *work);
+#endif /* _OCTEP_VF_MAIN_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c
new file mode 100644
index 000000000000..2eab21e43048
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include "octep_vf_config.h"
+#include "octep_vf_main.h"
+
+/* When a new command is implemented, the below table should be updated
+ * with new command and it's version info.
+ */
+static u32 pfvf_cmd_versions[OCTEP_PFVF_MBOX_CMD_MAX] = {
+ [0 ... OCTEP_PFVF_MBOX_CMD_DEV_REMOVE] = OCTEP_PFVF_MBOX_VERSION_V1,
+ [OCTEP_PFVF_MBOX_CMD_GET_FW_INFO ... OCTEP_PFVF_MBOX_NOTIF_LINK_STATUS] =
+ OCTEP_PFVF_MBOX_VERSION_V2
+};
+
+int octep_vf_setup_mbox(struct octep_vf_device *oct)
+{
+ int ring = 0;
+
+ oct->mbox = vzalloc(sizeof(*oct->mbox));
+ if (!oct->mbox)
+ return -1;
+
+ mutex_init(&oct->mbox->lock);
+
+ oct->hw_ops.setup_mbox_regs(oct, ring);
+ INIT_WORK(&oct->mbox->wk.work, octep_vf_mbox_work);
+ oct->mbox->wk.ctxptr = oct;
+ oct->mbox_neg_ver = OCTEP_PFVF_MBOX_VERSION_CURRENT;
+ dev_info(&oct->pdev->dev, "setup vf mbox successfully\n");
+ return 0;
+}
+
+void octep_vf_delete_mbox(struct octep_vf_device *oct)
+{
+ if (oct->mbox) {
+ if (work_pending(&oct->mbox->wk.work))
+ cancel_work_sync(&oct->mbox->wk.work);
+
+ mutex_destroy(&oct->mbox->lock);
+ vfree(oct->mbox);
+ oct->mbox = NULL;
+ dev_info(&oct->pdev->dev, "Deleted vf mbox successfully\n");
+ }
+}
+
+int octep_vf_mbox_version_check(struct octep_vf_device *oct)
+{
+ union octep_pfvf_mbox_word cmd;
+ union octep_pfvf_mbox_word rsp;
+ int ret;
+
+ cmd.u64 = 0;
+ cmd.s_version.opcode = OCTEP_PFVF_MBOX_CMD_VERSION;
+ cmd.s_version.version = OCTEP_PFVF_MBOX_VERSION_CURRENT;
+ ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret == OCTEP_PFVF_MBOX_CMD_STATUS_NACK) {
+ dev_err(&oct->pdev->dev,
+ "VF Mbox version is incompatible with PF\n");
+ return -EINVAL;
+ }
+ oct->mbox_neg_ver = (u32)rsp.s_version.version;
+ dev_dbg(&oct->pdev->dev,
+ "VF Mbox version:%u Negotiated VF version with PF:%u\n",
+ (u32)cmd.s_version.version,
+ (u32)rsp.s_version.version);
+ return 0;
+}
+
+void octep_vf_mbox_work(struct work_struct *work)
+{
+ struct octep_vf_mbox_wk *wk = container_of(work, struct octep_vf_mbox_wk, work);
+ struct octep_vf_iface_link_info *link_info;
+ struct octep_vf_device *oct = NULL;
+ struct octep_vf_mbox *mbox = NULL;
+ union octep_pfvf_mbox_word *notif;
+ u64 pf_vf_data;
+
+ oct = (struct octep_vf_device *)wk->ctxptr;
+ link_info = &oct->link_info;
+ mbox = oct->mbox;
+ pf_vf_data = readq(mbox->mbox_read_reg);
+
+ notif = (union octep_pfvf_mbox_word *)&pf_vf_data;
+
+ switch (notif->s.opcode) {
+ case OCTEP_PFVF_MBOX_NOTIF_LINK_STATUS:
+ if (notif->s_link_status.status) {
+ link_info->oper_up = OCTEP_PFVF_LINK_STATUS_UP;
+ netif_carrier_on(oct->netdev);
+ dev_info(&oct->pdev->dev, "netif_carrier_on\n");
+ } else {
+ link_info->oper_up = OCTEP_PFVF_LINK_STATUS_DOWN;
+ netif_carrier_off(oct->netdev);
+ dev_info(&oct->pdev->dev, "netif_carrier_off\n");
+ }
+ break;
+ default:
+ dev_err(&oct->pdev->dev,
+ "Received unsupported notif %d\n", notif->s.opcode);
+ break;
+ }
+}
+
+static int __octep_vf_mbox_send_cmd(struct octep_vf_device *oct,
+ union octep_pfvf_mbox_word cmd,
+ union octep_pfvf_mbox_word *rsp)
+{
+ struct octep_vf_mbox *mbox = oct->mbox;
+ u64 reg_val = 0ull;
+ int count;
+
+ if (!mbox)
+ return OCTEP_PFVF_MBOX_CMD_STATUS_NOT_SETUP;
+
+ cmd.s.type = OCTEP_PFVF_MBOX_TYPE_CMD;
+ writeq(cmd.u64, mbox->mbox_write_reg);
+
+ /* No response for notification messages */
+ if (!rsp)
+ return 0;
+
+ for (count = 0; count < OCTEP_PFVF_MBOX_TIMEOUT_WAIT_COUNT; count++) {
+ usleep_range(1000, 1500);
+ reg_val = readq(mbox->mbox_write_reg);
+ if (reg_val != cmd.u64) {
+ rsp->u64 = reg_val;
+ break;
+ }
+ }
+ if (count == OCTEP_PFVF_MBOX_TIMEOUT_WAIT_COUNT) {
+ dev_err(&oct->pdev->dev, "mbox send command timed out\n");
+ return OCTEP_PFVF_MBOX_CMD_STATUS_TIMEDOUT;
+ }
+ if (rsp->s.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) {
+ dev_err(&oct->pdev->dev, "mbox_send: Received NACK\n");
+ return OCTEP_PFVF_MBOX_CMD_STATUS_NACK;
+ }
+ rsp->u64 = reg_val;
+ return 0;
+}
+
+int octep_vf_mbox_send_cmd(struct octep_vf_device *oct, union octep_pfvf_mbox_word cmd,
+ union octep_pfvf_mbox_word *rsp)
+{
+ struct octep_vf_mbox *mbox = oct->mbox;
+ int ret;
+
+ if (!mbox)
+ return OCTEP_PFVF_MBOX_CMD_STATUS_NOT_SETUP;
+ mutex_lock(&mbox->lock);
+ if (pfvf_cmd_versions[cmd.s.opcode] > oct->mbox_neg_ver) {
+ dev_dbg(&oct->pdev->dev, "CMD:%d not supported in Version:%d\n",
+ cmd.s.opcode, oct->mbox_neg_ver);
+ mutex_unlock(&mbox->lock);
+ return -EOPNOTSUPP;
+ }
+ ret = __octep_vf_mbox_send_cmd(oct, cmd, rsp);
+ mutex_unlock(&mbox->lock);
+ return ret;
+}
+
+int octep_vf_mbox_bulk_read(struct octep_vf_device *oct, enum octep_pfvf_mbox_opcode opcode,
+ u8 *data, int *size)
+{
+ struct octep_vf_mbox *mbox = oct->mbox;
+ union octep_pfvf_mbox_word cmd;
+ union octep_pfvf_mbox_word rsp;
+ int data_len = 0, tmp_len = 0;
+ int read_cnt, i = 0, ret;
+
+ if (!mbox)
+ return OCTEP_PFVF_MBOX_CMD_STATUS_NOT_SETUP;
+
+ mutex_lock(&mbox->lock);
+ cmd.u64 = 0;
+ cmd.s_data.opcode = opcode;
+ cmd.s_data.frag = 0;
+ /* Send cmd to read data from PF */
+ ret = __octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "send mbox cmd fail for data request\n");
+ mutex_unlock(&mbox->lock);
+ return ret;
+ }
+ /* PF sends the data length of requested CMD
+ * in ACK
+ */
+ data_len = *((int32_t *)rsp.s_data.data);
+ tmp_len = data_len;
+ cmd.u64 = 0;
+ rsp.u64 = 0;
+ cmd.s_data.opcode = opcode;
+ cmd.s_data.frag = 1;
+ while (data_len) {
+ ret = __octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "send mbox cmd fail for data request\n");
+ mutex_unlock(&mbox->lock);
+ mbox->mbox_data.data_index = 0;
+ memset(mbox->mbox_data.recv_data, 0, OCTEP_PFVF_MBOX_MAX_DATA_BUF_SIZE);
+ return ret;
+ }
+ if (data_len > OCTEP_PFVF_MBOX_MAX_DATA_SIZE) {
+ data_len -= OCTEP_PFVF_MBOX_MAX_DATA_SIZE;
+ read_cnt = OCTEP_PFVF_MBOX_MAX_DATA_SIZE;
+ } else {
+ read_cnt = data_len;
+ data_len = 0;
+ }
+ for (i = 0; i < read_cnt; i++) {
+ mbox->mbox_data.recv_data[mbox->mbox_data.data_index] =
+ rsp.s_data.data[i];
+ mbox->mbox_data.data_index++;
+ }
+ cmd.u64 = 0;
+ rsp.u64 = 0;
+ cmd.s_data.opcode = opcode;
+ cmd.s_data.frag = 1;
+ }
+ memcpy(data, mbox->mbox_data.recv_data, tmp_len);
+ *size = tmp_len;
+ mbox->mbox_data.data_index = 0;
+ memset(mbox->mbox_data.recv_data, 0, OCTEP_PFVF_MBOX_MAX_DATA_BUF_SIZE);
+ mutex_unlock(&mbox->lock);
+ return 0;
+}
+
+int octep_vf_mbox_set_mtu(struct octep_vf_device *oct, int mtu)
+{
+ int frame_size = mtu + ETH_HLEN + ETH_FCS_LEN;
+ union octep_pfvf_mbox_word cmd;
+ union octep_pfvf_mbox_word rsp;
+ int ret = 0;
+
+ if (mtu < ETH_MIN_MTU || frame_size > ETH_MAX_MTU) {
+ dev_err(&oct->pdev->dev,
+ "Failed to set MTU to %d MIN MTU:%d MAX MTU:%d\n",
+ mtu, ETH_MIN_MTU, ETH_MAX_MTU);
+ return -EINVAL;
+ }
+
+ cmd.u64 = 0;
+ cmd.s_set_mtu.opcode = OCTEP_PFVF_MBOX_CMD_SET_MTU;
+ cmd.s_set_mtu.mtu = mtu;
+
+ ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "Mbox send failed; err=%d\n", ret);
+ return ret;
+ }
+ if (rsp.s_set_mtu.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) {
+ dev_err(&oct->pdev->dev, "Received Mbox NACK from PF for MTU:%d\n", mtu);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int octep_vf_mbox_set_mac_addr(struct octep_vf_device *oct, char *mac_addr)
+{
+ union octep_pfvf_mbox_word cmd;
+ union octep_pfvf_mbox_word rsp;
+ int i, ret;
+
+ cmd.u64 = 0;
+ cmd.s_set_mac.opcode = OCTEP_PFVF_MBOX_CMD_SET_MAC_ADDR;
+ for (i = 0; i < ETH_ALEN; i++)
+ cmd.s_set_mac.mac_addr[i] = mac_addr[i];
+ ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "Mbox send failed; err = %d\n", ret);
+ return ret;
+ }
+ if (rsp.s_set_mac.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) {
+ dev_err(&oct->pdev->dev, "received NACK\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int octep_vf_mbox_get_mac_addr(struct octep_vf_device *oct, char *mac_addr)
+{
+ union octep_pfvf_mbox_word cmd;
+ union octep_pfvf_mbox_word rsp;
+ int i, ret;
+
+ cmd.u64 = 0;
+ cmd.s_set_mac.opcode = OCTEP_PFVF_MBOX_CMD_GET_MAC_ADDR;
+ ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "get_mac: mbox send failed; err = %d\n", ret);
+ return ret;
+ }
+ if (rsp.s_set_mac.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) {
+ dev_err(&oct->pdev->dev, "get_mac: received NACK\n");
+ return -EINVAL;
+ }
+ for (i = 0; i < ETH_ALEN; i++)
+ mac_addr[i] = rsp.s_set_mac.mac_addr[i];
+ return 0;
+}
+
+int octep_vf_mbox_set_rx_state(struct octep_vf_device *oct, bool state)
+{
+ union octep_pfvf_mbox_word cmd;
+ union octep_pfvf_mbox_word rsp;
+ int ret;
+
+ cmd.u64 = 0;
+ cmd.s_link_state.opcode = OCTEP_PFVF_MBOX_CMD_SET_RX_STATE;
+ cmd.s_link_state.state = state;
+ ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "Set Rx state via VF Mbox send failed\n");
+ return ret;
+ }
+ if (rsp.s_link_state.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) {
+ dev_err(&oct->pdev->dev, "Set Rx state received NACK\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int octep_vf_mbox_set_link_status(struct octep_vf_device *oct, bool status)
+{
+ union octep_pfvf_mbox_word cmd;
+ union octep_pfvf_mbox_word rsp;
+ int ret;
+
+ cmd.u64 = 0;
+ cmd.s_link_status.opcode = OCTEP_PFVF_MBOX_CMD_SET_LINK_STATUS;
+ cmd.s_link_status.status = status;
+ ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "Set link status via VF Mbox send failed\n");
+ return ret;
+ }
+ if (rsp.s_link_status.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) {
+ dev_err(&oct->pdev->dev, "Set link status received NACK\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int octep_vf_mbox_get_link_status(struct octep_vf_device *oct, u8 *oper_up)
+{
+ union octep_pfvf_mbox_word cmd;
+ union octep_pfvf_mbox_word rsp;
+ int ret;
+
+ cmd.u64 = 0;
+ cmd.s_link_status.opcode = OCTEP_PFVF_MBOX_CMD_GET_LINK_STATUS;
+ ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "Get link status via VF Mbox send failed\n");
+ return ret;
+ }
+ if (rsp.s_link_status.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) {
+ dev_err(&oct->pdev->dev, "Get link status received NACK\n");
+ return -EINVAL;
+ }
+ *oper_up = rsp.s_link_status.status;
+ return 0;
+}
+
+int octep_vf_mbox_dev_remove(struct octep_vf_device *oct)
+{
+ union octep_pfvf_mbox_word cmd;
+ int ret;
+
+ cmd.u64 = 0;
+ cmd.s.opcode = OCTEP_PFVF_MBOX_CMD_DEV_REMOVE;
+ ret = octep_vf_mbox_send_cmd(oct, cmd, NULL);
+ return ret;
+}
+
+int octep_vf_mbox_get_fw_info(struct octep_vf_device *oct)
+{
+ union octep_pfvf_mbox_word cmd;
+ union octep_pfvf_mbox_word rsp;
+ int ret;
+
+ cmd.u64 = 0;
+ cmd.s_fw_info.opcode = OCTEP_PFVF_MBOX_CMD_GET_FW_INFO;
+ ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "Get link status via VF Mbox send failed\n");
+ return ret;
+ }
+ if (rsp.s_fw_info.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) {
+ dev_err(&oct->pdev->dev, "Get link status received NACK\n");
+ return -EINVAL;
+ }
+ oct->fw_info.pkind = rsp.s_fw_info.pkind;
+ oct->fw_info.fsz = rsp.s_fw_info.fsz;
+ oct->fw_info.rx_ol_flags = rsp.s_fw_info.rx_ol_flags;
+ oct->fw_info.tx_ol_flags = rsp.s_fw_info.tx_ol_flags;
+
+ return 0;
+}
+
+int octep_vf_mbox_set_offloads(struct octep_vf_device *oct, u16 tx_offloads,
+ u16 rx_offloads)
+{
+ union octep_pfvf_mbox_word cmd;
+ union octep_pfvf_mbox_word rsp;
+ int ret;
+
+ cmd.u64 = 0;
+ cmd.s_offloads.opcode = OCTEP_PFVF_MBOX_CMD_SET_OFFLOADS;
+ cmd.s_offloads.rx_ol_flags = rx_offloads;
+ cmd.s_offloads.tx_ol_flags = tx_offloads;
+ ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp);
+ if (ret) {
+ dev_err(&oct->pdev->dev, "Set offloads via VF Mbox send failed\n");
+ return ret;
+ }
+ if (rsp.s_link_state.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) {
+ dev_err(&oct->pdev->dev, "Set offloads received NACK\n");
+ return -EINVAL;
+ }
+ return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h
new file mode 100644
index 000000000000..9b5efad37eab
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+#ifndef _OCTEP_VF_MBOX_H_
+#define _OCTEP_VF_MBOX_H_
+
+/* When a new command is implemented, VF Mbox version should be bumped.
+ */
+enum octep_pfvf_mbox_version {
+ OCTEP_PFVF_MBOX_VERSION_V0,
+ OCTEP_PFVF_MBOX_VERSION_V1,
+ OCTEP_PFVF_MBOX_VERSION_V2
+};
+
+#define OCTEP_PFVF_MBOX_VERSION_CURRENT OCTEP_PFVF_MBOX_VERSION_V2
+
+enum octep_pfvf_mbox_opcode {
+ OCTEP_PFVF_MBOX_CMD_VERSION,
+ OCTEP_PFVF_MBOX_CMD_SET_MTU,
+ OCTEP_PFVF_MBOX_CMD_SET_MAC_ADDR,
+ OCTEP_PFVF_MBOX_CMD_GET_MAC_ADDR,
+ OCTEP_PFVF_MBOX_CMD_GET_LINK_INFO,
+ OCTEP_PFVF_MBOX_CMD_GET_STATS,
+ OCTEP_PFVF_MBOX_CMD_SET_RX_STATE,
+ OCTEP_PFVF_MBOX_CMD_SET_LINK_STATUS,
+ OCTEP_PFVF_MBOX_CMD_GET_LINK_STATUS,
+ OCTEP_PFVF_MBOX_CMD_GET_MTU,
+ OCTEP_PFVF_MBOX_CMD_DEV_REMOVE,
+ OCTEP_PFVF_MBOX_CMD_GET_FW_INFO,
+ OCTEP_PFVF_MBOX_CMD_SET_OFFLOADS,
+ OCTEP_PFVF_MBOX_NOTIF_LINK_STATUS,
+ OCTEP_PFVF_MBOX_CMD_MAX,
+};
+
+enum octep_pfvf_mbox_word_type {
+ OCTEP_PFVF_MBOX_TYPE_CMD,
+ OCTEP_PFVF_MBOX_TYPE_RSP_ACK,
+ OCTEP_PFVF_MBOX_TYPE_RSP_NACK,
+};
+
+enum octep_pfvf_mbox_cmd_status {
+ OCTEP_PFVF_MBOX_CMD_STATUS_NOT_SETUP = 1,
+ OCTEP_PFVF_MBOX_CMD_STATUS_TIMEDOUT = 2,
+ OCTEP_PFVF_MBOX_CMD_STATUS_NACK = 3,
+ OCTEP_PFVF_MBOX_CMD_STATUS_BUSY = 4,
+ OCTEP_PFVF_MBOX_CMD_STATUS_ERR = 5
+};
+
+enum octep_pfvf_link_status {
+ OCTEP_PFVF_LINK_STATUS_DOWN,
+ OCTEP_PFVF_LINK_STATUS_UP,
+};
+
+enum octep_pfvf_link_speed {
+ OCTEP_PFVF_LINK_SPEED_NONE,
+ OCTEP_PFVF_LINK_SPEED_1000,
+ OCTEP_PFVF_LINK_SPEED_10000,
+ OCTEP_PFVF_LINK_SPEED_25000,
+ OCTEP_PFVF_LINK_SPEED_40000,
+ OCTEP_PFVF_LINK_SPEED_50000,
+ OCTEP_PFVF_LINK_SPEED_100000,
+ OCTEP_PFVF_LINK_SPEED_LAST,
+};
+
+enum octep_pfvf_link_duplex {
+ OCTEP_PFVF_LINK_HALF_DUPLEX,
+ OCTEP_PFVF_LINK_FULL_DUPLEX,
+};
+
+enum octep_pfvf_link_autoneg {
+ OCTEP_PFVF_LINK_AUTONEG,
+ OCTEP_PFVF_LINK_FIXED,
+};
+
+#define OCTEP_PFVF_MBOX_TIMEOUT_WAIT_COUNT 8000
+#define OCTEP_PFVF_MBOX_TIMEOUT_WAIT_UDELAY 1000
+#define OCTEP_PFVF_MBOX_MAX_RETRIES 2
+#define OCTEP_PFVF_MBOX_VERSION 0
+#define OCTEP_PFVF_MBOX_MAX_DATA_SIZE 6
+#define OCTEP_PFVF_MBOX_MAX_DATA_BUF_SIZE 320
+#define OCTEP_PFVF_MBOX_MORE_FRAG_FLAG 1
+
+union octep_pfvf_mbox_word {
+ u64 u64;
+ struct {
+ u64 opcode:8;
+ u64 type:2;
+ u64 rsvd:6;
+ u64 data:48;
+ } s;
+ struct {
+ u64 opcode:8;
+ u64 type:2;
+ u64 frag:1;
+ u64 rsvd:5;
+ u8 data[6];
+ } s_data;
+ struct {
+ u64 opcode:8;
+ u64 type:2;
+ u64 rsvd:6;
+ u64 version:48;
+ } s_version;
+ struct {
+ u64 opcode:8;
+ u64 type:2;
+ u64 rsvd:6;
+ u8 mac_addr[6];
+ } s_set_mac;
+ struct {
+ u64 opcode:8;
+ u64 type:2;
+ u64 rsvd:6;
+ u64 mtu:48;
+ } s_set_mtu;
+ struct {
+ u64 opcode:8;
+ u64 type:2;
+ u64 state:1;
+ u64 rsvd:53;
+ } s_link_state;
+ struct {
+ u64 opcode:8;
+ u64 type:2;
+ u64 status:1;
+ u64 rsvd:53;
+ } s_link_status;
+ struct {
+ u64 opcode:8;
+ u64 type:2;
+ u64 pkind:8;
+ u64 fsz:8;
+ u64 rx_ol_flags:16;
+ u64 tx_ol_flags:16;
+ u64 rsvd:6;
+ } s_fw_info;
+ struct {
+ u64 opcode:8;
+ u64 type:2;
+ u64 rsvd:22;
+ u64 rx_ol_flags:16;
+ u64 tx_ol_flags:16;
+ } s_offloads;
+} __packed;
+
+int octep_vf_setup_mbox(struct octep_vf_device *oct);
+void octep_vf_delete_mbox(struct octep_vf_device *oct);
+int octep_vf_mbox_send_cmd(struct octep_vf_device *oct, union octep_pfvf_mbox_word cmd,
+ union octep_pfvf_mbox_word *rsp);
+int octep_vf_mbox_bulk_read(struct octep_vf_device *oct, enum octep_pfvf_mbox_opcode opcode,
+ u8 *data, int *size);
+int octep_vf_mbox_set_mtu(struct octep_vf_device *oct, int mtu);
+int octep_vf_mbox_set_mac_addr(struct octep_vf_device *oct, char *mac_addr);
+int octep_vf_mbox_get_mac_addr(struct octep_vf_device *oct, char *mac_addr);
+int octep_vf_mbox_version_check(struct octep_vf_device *oct);
+int octep_vf_mbox_set_rx_state(struct octep_vf_device *oct, bool state);
+int octep_vf_mbox_set_link_status(struct octep_vf_device *oct, bool status);
+int octep_vf_mbox_get_link_status(struct octep_vf_device *oct, u8 *oper_up);
+int octep_vf_mbox_dev_remove(struct octep_vf_device *oct);
+int octep_vf_mbox_get_fw_info(struct octep_vf_device *oct);
+int octep_vf_mbox_set_offloads(struct octep_vf_device *oct, u16 tx_offloads, u16 rx_offloads);
+
+#endif
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h
new file mode 100644
index 000000000000..25e2a876ebba
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+#ifndef _OCTEP_VF_REGS_CN9K_H_
+#define _OCTEP_VF_REGS_CN9K_H_
+
+/*############################ RST #########################*/
+#define CN93_VF_CONFIG_XPANSION_BAR 0x38
+#define CN93_VF_CONFIG_PCIE_CAP 0x70
+#define CN93_VF_CONFIG_PCIE_DEVCAP 0x74
+#define CN93_VF_CONFIG_PCIE_DEVCTL 0x78
+#define CN93_VF_CONFIG_PCIE_LINKCAP 0x7C
+#define CN93_VF_CONFIG_PCIE_LINKCTL 0x80
+#define CN93_VF_CONFIG_PCIE_SLOTCAP 0x84
+#define CN93_VF_CONFIG_PCIE_SLOTCTL 0x88
+
+#define CN93_VF_RING_OFFSET BIT_ULL(17)
+
+/*###################### RING IN REGISTERS #########################*/
+#define CN93_VF_SDP_R_IN_CONTROL_START 0x10000
+#define CN93_VF_SDP_R_IN_ENABLE_START 0x10010
+#define CN93_VF_SDP_R_IN_INSTR_BADDR_START 0x10020
+#define CN93_VF_SDP_R_IN_INSTR_RSIZE_START 0x10030
+#define CN93_VF_SDP_R_IN_INSTR_DBELL_START 0x10040
+#define CN93_VF_SDP_R_IN_CNTS_START 0x10050
+#define CN93_VF_SDP_R_IN_INT_LEVELS_START 0x10060
+#define CN93_VF_SDP_R_IN_PKT_CNT_START 0x10080
+#define CN93_VF_SDP_R_IN_BYTE_CNT_START 0x10090
+
+#define CN93_VF_SDP_R_IN_CONTROL(ring) \
+ (CN93_VF_SDP_R_IN_CONTROL_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_IN_ENABLE(ring) \
+ (CN93_VF_SDP_R_IN_ENABLE_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_IN_INSTR_BADDR(ring) \
+ (CN93_VF_SDP_R_IN_INSTR_BADDR_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_IN_INSTR_RSIZE(ring) \
+ (CN93_VF_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_IN_INSTR_DBELL(ring) \
+ (CN93_VF_SDP_R_IN_INSTR_DBELL_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_IN_CNTS(ring) \
+ (CN93_VF_SDP_R_IN_CNTS_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_IN_INT_LEVELS(ring) \
+ (CN93_VF_SDP_R_IN_INT_LEVELS_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_IN_PKT_CNT(ring) \
+ (CN93_VF_SDP_R_IN_PKT_CNT_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_IN_BYTE_CNT(ring) \
+ (CN93_VF_SDP_R_IN_BYTE_CNT_START + ((ring) * CN93_VF_RING_OFFSET))
+
+/*------------------ R_IN Masks ----------------*/
+
+/** Rings per Virtual Function **/
+#define CN93_VF_R_IN_CTL_RPVF_MASK (0xF)
+#define CN93_VF_R_IN_CTL_RPVF_POS (48)
+
+/* Number of instructions to be read in one MAC read request.
+ * setting to Max value(4)
+ **/
+#define CN93_VF_R_IN_CTL_IDLE BIT_ULL(28)
+#define CN93_VF_R_IN_CTL_RDSIZE (0x3ULL << 25)
+#define CN93_VF_R_IN_CTL_IS_64B BIT_ULL(24)
+#define CN93_VF_R_IN_CTL_D_NSR BIT_ULL(8)
+#define CN93_VF_R_IN_CTL_D_ESR BIT_ULL(6)
+#define CN93_VF_R_IN_CTL_D_ROR BIT_ULL(5)
+#define CN93_VF_R_IN_CTL_NSR BIT_ULL(3)
+#define CN93_VF_R_IN_CTL_ESR BIT_ULL(1)
+#define CN93_VF_R_IN_CTL_ROR BIT_ULL(0)
+
+#define CN93_VF_R_IN_CTL_MASK (CN93_VF_R_IN_CTL_RDSIZE | CN93_VF_R_IN_CTL_IS_64B)
+
+/*###################### RING OUT REGISTERS #########################*/
+#define CN93_VF_SDP_R_OUT_CNTS_START 0x10100
+#define CN93_VF_SDP_R_OUT_INT_LEVELS_START 0x10110
+#define CN93_VF_SDP_R_OUT_SLIST_BADDR_START 0x10120
+#define CN93_VF_SDP_R_OUT_SLIST_RSIZE_START 0x10130
+#define CN93_VF_SDP_R_OUT_SLIST_DBELL_START 0x10140
+#define CN93_VF_SDP_R_OUT_CONTROL_START 0x10150
+#define CN93_VF_SDP_R_OUT_ENABLE_START 0x10160
+#define CN93_VF_SDP_R_OUT_PKT_CNT_START 0x10180
+#define CN93_VF_SDP_R_OUT_BYTE_CNT_START 0x10190
+
+#define CN93_VF_SDP_R_OUT_CONTROL(ring) \
+ (CN93_VF_SDP_R_OUT_CONTROL_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_OUT_ENABLE(ring) \
+ (CN93_VF_SDP_R_OUT_ENABLE_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_OUT_SLIST_BADDR(ring) \
+ (CN93_VF_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_OUT_SLIST_RSIZE(ring) \
+ (CN93_VF_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_OUT_SLIST_DBELL(ring) \
+ (CN93_VF_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_OUT_CNTS(ring) \
+ (CN93_VF_SDP_R_OUT_CNTS_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_OUT_INT_LEVELS(ring) \
+ (CN93_VF_SDP_R_OUT_INT_LEVELS_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_OUT_PKT_CNT(ring) \
+ (CN93_VF_SDP_R_OUT_PKT_CNT_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_OUT_BYTE_CNT(ring) \
+ (CN93_VF_SDP_R_OUT_BYTE_CNT_START + ((ring) * CN93_VF_RING_OFFSET))
+
+/*------------------ R_OUT Masks ----------------*/
+#define CN93_VF_R_OUT_INT_LEVELS_BMODE BIT_ULL(63)
+#define CN93_VF_R_OUT_INT_LEVELS_TIMET (32)
+
+#define CN93_VF_R_OUT_CTL_IDLE BIT_ULL(40)
+#define CN93_VF_R_OUT_CTL_ES_I BIT_ULL(34)
+#define CN93_VF_R_OUT_CTL_NSR_I BIT_ULL(33)
+#define CN93_VF_R_OUT_CTL_ROR_I BIT_ULL(32)
+#define CN93_VF_R_OUT_CTL_ES_D BIT_ULL(30)
+#define CN93_VF_R_OUT_CTL_NSR_D BIT_ULL(29)
+#define CN93_VF_R_OUT_CTL_ROR_D BIT_ULL(28)
+#define CN93_VF_R_OUT_CTL_ES_P BIT_ULL(26)
+#define CN93_VF_R_OUT_CTL_NSR_P BIT_ULL(25)
+#define CN93_VF_R_OUT_CTL_ROR_P BIT_ULL(24)
+#define CN93_VF_R_OUT_CTL_IMODE BIT_ULL(23)
+
+/* ##################### Mail Box Registers ########################## */
+/* SDP PF to VF Mailbox Data Register */
+#define CN93_VF_SDP_R_MBOX_PF_VF_DATA_START 0x10210
+/* SDP Packet PF to VF Mailbox Interrupt Register */
+#define CN93_VF_SDP_R_MBOX_PF_VF_INT_START 0x10220
+/* SDP VF to PF Mailbox Data Register */
+#define CN93_VF_SDP_R_MBOX_VF_PF_DATA_START 0x10230
+
+#define CN93_VF_SDP_R_MBOX_PF_VF_INT_ENAB BIT_ULL(1)
+#define CN93_VF_SDP_R_MBOX_PF_VF_INT_STATUS BIT_ULL(0)
+
+#define CN93_VF_SDP_R_MBOX_PF_VF_DATA(ring) \
+ (CN93_VF_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_MBOX_PF_VF_INT(ring) \
+ (CN93_VF_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CN93_VF_RING_OFFSET))
+
+#define CN93_VF_SDP_R_MBOX_VF_PF_DATA(ring) \
+ (CN93_VF_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CN93_VF_RING_OFFSET))
+#endif /* _OCTEP_VF_REGS_CN9K_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h
new file mode 100644
index 000000000000..2e156745ef64
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+#ifndef _OCTEP_VF_REGS_CNXK_H_
+#define _OCTEP_VF_REGS_CNXK_H_
+
+/*############################ RST #########################*/
+#define CNXK_VF_CONFIG_XPANSION_BAR 0x38
+#define CNXK_VF_CONFIG_PCIE_CAP 0x70
+#define CNXK_VF_CONFIG_PCIE_DEVCAP 0x74
+#define CNXK_VF_CONFIG_PCIE_DEVCTL 0x78
+#define CNXK_VF_CONFIG_PCIE_LINKCAP 0x7C
+#define CNXK_VF_CONFIG_PCIE_LINKCTL 0x80
+#define CNXK_VF_CONFIG_PCIE_SLOTCAP 0x84
+#define CNXK_VF_CONFIG_PCIE_SLOTCTL 0x88
+
+#define CNXK_VF_RING_OFFSET (0x1ULL << 17)
+
+/*###################### RING IN REGISTERS #########################*/
+#define CNXK_VF_SDP_R_IN_CONTROL_START 0x10000
+#define CNXK_VF_SDP_R_IN_ENABLE_START 0x10010
+#define CNXK_VF_SDP_R_IN_INSTR_BADDR_START 0x10020
+#define CNXK_VF_SDP_R_IN_INSTR_RSIZE_START 0x10030
+#define CNXK_VF_SDP_R_IN_INSTR_DBELL_START 0x10040
+#define CNXK_VF_SDP_R_IN_CNTS_START 0x10050
+#define CNXK_VF_SDP_R_IN_INT_LEVELS_START 0x10060
+#define CNXK_VF_SDP_R_IN_PKT_CNT_START 0x10080
+#define CNXK_VF_SDP_R_IN_BYTE_CNT_START 0x10090
+#define CNXK_VF_SDP_R_ERR_TYPE_START 0x10400
+
+#define CNXK_VF_SDP_R_ERR_TYPE(ring) \
+ (CNXK_VF_SDP_R_ERR_TYPE_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_IN_CONTROL(ring) \
+ (CNXK_VF_SDP_R_IN_CONTROL_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_IN_ENABLE(ring) \
+ (CNXK_VF_SDP_R_IN_ENABLE_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_IN_INSTR_BADDR(ring) \
+ (CNXK_VF_SDP_R_IN_INSTR_BADDR_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_IN_INSTR_RSIZE(ring) \
+ (CNXK_VF_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_IN_INSTR_DBELL(ring) \
+ (CNXK_VF_SDP_R_IN_INSTR_DBELL_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_IN_CNTS(ring) \
+ (CNXK_VF_SDP_R_IN_CNTS_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_IN_INT_LEVELS(ring) \
+ (CNXK_VF_SDP_R_IN_INT_LEVELS_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_IN_PKT_CNT(ring) \
+ (CNXK_VF_SDP_R_IN_PKT_CNT_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_IN_BYTE_CNT(ring) \
+ (CNXK_VF_SDP_R_IN_BYTE_CNT_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+/*------------------ R_IN Masks ----------------*/
+
+/** Rings per Virtual Function **/
+#define CNXK_VF_R_IN_CTL_RPVF_MASK (0xF)
+#define CNXK_VF_R_IN_CTL_RPVF_POS (48)
+
+/* Number of instructions to be read in one MAC read request.
+ * setting to Max value(4)
+ **/
+#define CNXK_VF_R_IN_CTL_IDLE (0x1ULL << 28)
+#define CNXK_VF_R_IN_CTL_RDSIZE (0x3ULL << 25)
+#define CNXK_VF_R_IN_CTL_IS_64B (0x1ULL << 24)
+#define CNXK_VF_R_IN_CTL_D_NSR (0x1ULL << 8)
+#define CNXK_VF_R_IN_CTL_D_ESR (0x1ULL << 6)
+#define CNXK_VF_R_IN_CTL_D_ROR (0x1ULL << 5)
+#define CNXK_VF_R_IN_CTL_NSR (0x1ULL << 3)
+#define CNXK_VF_R_IN_CTL_ESR (0x1ULL << 1)
+#define CNXK_VF_R_IN_CTL_ROR (0x1ULL << 0)
+
+#define CNXK_VF_R_IN_CTL_MASK (CNXK_VF_R_IN_CTL_RDSIZE | CNXK_VF_R_IN_CTL_IS_64B)
+
+/*###################### RING OUT REGISTERS #########################*/
+#define CNXK_VF_SDP_R_OUT_CNTS_START 0x10100
+#define CNXK_VF_SDP_R_OUT_INT_LEVELS_START 0x10110
+#define CNXK_VF_SDP_R_OUT_SLIST_BADDR_START 0x10120
+#define CNXK_VF_SDP_R_OUT_SLIST_RSIZE_START 0x10130
+#define CNXK_VF_SDP_R_OUT_SLIST_DBELL_START 0x10140
+#define CNXK_VF_SDP_R_OUT_CONTROL_START 0x10150
+#define CNXK_VF_SDP_R_OUT_WMARK_START 0x10160
+#define CNXK_VF_SDP_R_OUT_ENABLE_START 0x10170
+#define CNXK_VF_SDP_R_OUT_PKT_CNT_START 0x10180
+#define CNXK_VF_SDP_R_OUT_BYTE_CNT_START 0x10190
+
+#define CNXK_VF_SDP_R_OUT_CONTROL(ring) \
+ (CNXK_VF_SDP_R_OUT_CONTROL_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_OUT_ENABLE(ring) \
+ (CNXK_VF_SDP_R_OUT_ENABLE_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_OUT_SLIST_BADDR(ring) \
+ (CNXK_VF_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_OUT_SLIST_RSIZE(ring) \
+ (CNXK_VF_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_OUT_SLIST_DBELL(ring) \
+ (CNXK_VF_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_OUT_WMARK(ring) \
+ (CNXK_VF_SDP_R_OUT_WMARK_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_OUT_CNTS(ring) \
+ (CNXK_VF_SDP_R_OUT_CNTS_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_OUT_INT_LEVELS(ring) \
+ (CNXK_VF_SDP_R_OUT_INT_LEVELS_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_OUT_PKT_CNT(ring) \
+ (CNXK_VF_SDP_R_OUT_PKT_CNT_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_OUT_BYTE_CNT(ring) \
+ (CNXK_VF_SDP_R_OUT_BYTE_CNT_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+/*------------------ R_OUT Masks ----------------*/
+#define CNXK_VF_R_OUT_INT_LEVELS_BMODE BIT_ULL(63)
+#define CNXK_VF_R_OUT_INT_LEVELS_TIMET (32)
+
+#define CNXK_VF_R_OUT_CTL_IDLE BIT_ULL(40)
+#define CNXK_VF_R_OUT_CTL_ES_I BIT_ULL(34)
+#define CNXK_VF_R_OUT_CTL_NSR_I BIT_ULL(33)
+#define CNXK_VF_R_OUT_CTL_ROR_I BIT_ULL(32)
+#define CNXK_VF_R_OUT_CTL_ES_D BIT_ULL(30)
+#define CNXK_VF_R_OUT_CTL_NSR_D BIT_ULL(29)
+#define CNXK_VF_R_OUT_CTL_ROR_D BIT_ULL(28)
+#define CNXK_VF_R_OUT_CTL_ES_P BIT_ULL(26)
+#define CNXK_VF_R_OUT_CTL_NSR_P BIT_ULL(25)
+#define CNXK_VF_R_OUT_CTL_ROR_P BIT_ULL(24)
+#define CNXK_VF_R_OUT_CTL_IMODE BIT_ULL(23)
+
+/* ##################### Mail Box Registers ########################## */
+/* SDP PF to VF Mailbox Data Register */
+#define CNXK_VF_SDP_R_MBOX_PF_VF_DATA_START 0x10210
+/* SDP Packet PF to VF Mailbox Interrupt Register */
+#define CNXK_VF_SDP_R_MBOX_PF_VF_INT_START 0x10220
+/* SDP VF to PF Mailbox Data Register */
+#define CNXK_VF_SDP_R_MBOX_VF_PF_DATA_START 0x10230
+
+#define CNXK_VF_SDP_R_MBOX_PF_VF_INT_ENAB BIT_ULL(1)
+#define CNXK_VF_SDP_R_MBOX_PF_VF_INT_STATUS BIT_ULL(0)
+
+#define CNXK_VF_SDP_R_MBOX_PF_VF_DATA(ring) \
+ (CNXK_VF_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_MBOX_PF_VF_INT(ring) \
+ (CNXK_VF_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CNXK_VF_RING_OFFSET))
+
+#define CNXK_VF_SDP_R_MBOX_VF_PF_DATA(ring) \
+ (CNXK_VF_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_VF_RING_OFFSET))
+#endif /* _OCTEP_VF_REGS_CNXK_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
new file mode 100644
index 000000000000..82821bc28634
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
@@ -0,0 +1,510 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+#include <linux/vmalloc.h>
+
+#include "octep_vf_config.h"
+#include "octep_vf_main.h"
+
+static void octep_vf_oq_reset_indices(struct octep_vf_oq *oq)
+{
+ oq->host_read_idx = 0;
+ oq->host_refill_idx = 0;
+ oq->refill_count = 0;
+ oq->last_pkt_count = 0;
+ oq->pkts_pending = 0;
+}
+
+/**
+ * octep_vf_oq_fill_ring_buffers() - fill initial receive buffers for Rx ring.
+ *
+ * @oq: Octeon Rx queue data structure.
+ *
+ * Return: 0, if successfully filled receive buffers for all descriptors.
+ * -ENOMEM, if failed to allocate a buffer or failed to map for DMA.
+ */
+static int octep_vf_oq_fill_ring_buffers(struct octep_vf_oq *oq)
+{
+ struct octep_vf_oq_desc_hw *desc_ring = oq->desc_ring;
+ struct page *page;
+ u32 i;
+
+ for (i = 0; i < oq->max_count; i++) {
+ page = dev_alloc_page();
+ if (unlikely(!page)) {
+ dev_err(oq->dev, "Rx buffer alloc failed\n");
+ goto rx_buf_alloc_err;
+ }
+ desc_ring[i].buffer_ptr = dma_map_page(oq->dev, page, 0,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(oq->dev, desc_ring[i].buffer_ptr)) {
+ dev_err(oq->dev,
+ "OQ-%d buffer alloc: DMA mapping error!\n",
+ oq->q_no);
+ goto dma_map_err;
+ }
+ oq->buff_info[i].page = page;
+ }
+
+ return 0;
+
+dma_map_err:
+ put_page(page);
+rx_buf_alloc_err:
+ while (i) {
+ i--;
+ dma_unmap_page(oq->dev, desc_ring[i].buffer_ptr, PAGE_SIZE, DMA_FROM_DEVICE);
+ put_page(oq->buff_info[i].page);
+ oq->buff_info[i].page = NULL;
+ }
+
+ return -ENOMEM;
+}
+
+/**
+ * octep_vf_oq_refill() - refill buffers for used Rx ring descriptors.
+ *
+ * @oct: Octeon device private data structure.
+ * @oq: Octeon Rx queue data structure.
+ *
+ * Return: number of descriptors successfully refilled with receive buffers.
+ */
+static int octep_vf_oq_refill(struct octep_vf_device *oct, struct octep_vf_oq *oq)
+{
+ struct octep_vf_oq_desc_hw *desc_ring = oq->desc_ring;
+ struct page *page;
+ u32 refill_idx, i;
+
+ refill_idx = oq->host_refill_idx;
+ for (i = 0; i < oq->refill_count; i++) {
+ page = dev_alloc_page();
+ if (unlikely(!page)) {
+ dev_err(oq->dev, "refill: rx buffer alloc failed\n");
+ oq->stats.alloc_failures++;
+ break;
+ }
+
+ desc_ring[refill_idx].buffer_ptr = dma_map_page(oq->dev, page, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(oq->dev, desc_ring[refill_idx].buffer_ptr)) {
+ dev_err(oq->dev,
+ "OQ-%d buffer refill: DMA mapping error!\n",
+ oq->q_no);
+ put_page(page);
+ oq->stats.alloc_failures++;
+ break;
+ }
+ oq->buff_info[refill_idx].page = page;
+ refill_idx++;
+ if (refill_idx == oq->max_count)
+ refill_idx = 0;
+ }
+ oq->host_refill_idx = refill_idx;
+ oq->refill_count -= i;
+
+ return i;
+}
+
+/**
+ * octep_vf_setup_oq() - Setup a Rx queue.
+ *
+ * @oct: Octeon device private data structure.
+ * @q_no: Rx queue number to be setup.
+ *
+ * Allocate resources for a Rx queue.
+ */
+static int octep_vf_setup_oq(struct octep_vf_device *oct, int q_no)
+{
+ struct octep_vf_oq *oq;
+ u32 desc_ring_size;
+
+ oq = vzalloc(sizeof(*oq));
+ if (!oq)
+ goto create_oq_fail;
+ oct->oq[q_no] = oq;
+
+ oq->octep_vf_dev = oct;
+ oq->netdev = oct->netdev;
+ oq->dev = &oct->pdev->dev;
+ oq->q_no = q_no;
+ oq->max_count = CFG_GET_OQ_NUM_DESC(oct->conf);
+ oq->ring_size_mask = oq->max_count - 1;
+ oq->buffer_size = CFG_GET_OQ_BUF_SIZE(oct->conf);
+ oq->max_single_buffer_size = oq->buffer_size - OCTEP_VF_OQ_RESP_HW_SIZE;
+
+ /* When the hardware/firmware supports additional capabilities,
+ * additional header is filled-in by Octeon after length field in
+ * Rx packets. this header contains additional packet information.
+ */
+ if (oct->fw_info.rx_ol_flags)
+ oq->max_single_buffer_size -= OCTEP_VF_OQ_RESP_HW_EXT_SIZE;
+
+ oq->refill_threshold = CFG_GET_OQ_REFILL_THRESHOLD(oct->conf);
+
+ desc_ring_size = oq->max_count * OCTEP_VF_OQ_DESC_SIZE;
+ oq->desc_ring = dma_alloc_coherent(oq->dev, desc_ring_size,
+ &oq->desc_ring_dma, GFP_KERNEL);
+
+ if (unlikely(!oq->desc_ring)) {
+ dev_err(oq->dev,
+ "Failed to allocate DMA memory for OQ-%d !!\n", q_no);
+ goto desc_dma_alloc_err;
+ }
+
+ oq->buff_info = vzalloc(oq->max_count * OCTEP_VF_OQ_RECVBUF_SIZE);
+
+ if (unlikely(!oq->buff_info)) {
+ dev_err(&oct->pdev->dev,
+ "Failed to allocate buffer info for OQ-%d\n", q_no);
+ goto buf_list_err;
+ }
+
+ if (octep_vf_oq_fill_ring_buffers(oq))
+ goto oq_fill_buff_err;
+
+ octep_vf_oq_reset_indices(oq);
+ oct->hw_ops.setup_oq_regs(oct, q_no);
+ oct->num_oqs++;
+
+ return 0;
+
+oq_fill_buff_err:
+ vfree(oq->buff_info);
+ oq->buff_info = NULL;
+buf_list_err:
+ dma_free_coherent(oq->dev, desc_ring_size,
+ oq->desc_ring, oq->desc_ring_dma);
+ oq->desc_ring = NULL;
+desc_dma_alloc_err:
+ vfree(oq);
+ oct->oq[q_no] = NULL;
+create_oq_fail:
+ return -ENOMEM;
+}
+
+/**
+ * octep_vf_oq_free_ring_buffers() - Free ring buffers.
+ *
+ * @oq: Octeon Rx queue data structure.
+ *
+ * Free receive buffers in unused Rx queue descriptors.
+ */
+static void octep_vf_oq_free_ring_buffers(struct octep_vf_oq *oq)
+{
+ struct octep_vf_oq_desc_hw *desc_ring = oq->desc_ring;
+ int i;
+
+ if (!oq->desc_ring || !oq->buff_info)
+ return;
+
+ for (i = 0; i < oq->max_count; i++) {
+ if (oq->buff_info[i].page) {
+ dma_unmap_page(oq->dev, desc_ring[i].buffer_ptr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ put_page(oq->buff_info[i].page);
+ oq->buff_info[i].page = NULL;
+ desc_ring[i].buffer_ptr = 0;
+ }
+ }
+ octep_vf_oq_reset_indices(oq);
+}
+
+/**
+ * octep_vf_free_oq() - Free Rx queue resources.
+ *
+ * @oq: Octeon Rx queue data structure.
+ *
+ * Free all resources of a Rx queue.
+ */
+static int octep_vf_free_oq(struct octep_vf_oq *oq)
+{
+ struct octep_vf_device *oct = oq->octep_vf_dev;
+ int q_no = oq->q_no;
+
+ octep_vf_oq_free_ring_buffers(oq);
+
+ vfree(oq->buff_info);
+
+ if (oq->desc_ring)
+ dma_free_coherent(oq->dev,
+ oq->max_count * OCTEP_VF_OQ_DESC_SIZE,
+ oq->desc_ring, oq->desc_ring_dma);
+
+ vfree(oq);
+ oct->oq[q_no] = NULL;
+ oct->num_oqs--;
+ return 0;
+}
+
+/**
+ * octep_vf_setup_oqs() - setup resources for all Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+int octep_vf_setup_oqs(struct octep_vf_device *oct)
+{
+ int i, retval = 0;
+
+ oct->num_oqs = 0;
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ retval = octep_vf_setup_oq(oct, i);
+ if (retval) {
+ dev_err(&oct->pdev->dev,
+ "Failed to setup OQ(RxQ)-%d.\n", i);
+ goto oq_setup_err;
+ }
+ dev_dbg(&oct->pdev->dev, "Successfully setup OQ(RxQ)-%d.\n", i);
+ }
+
+ return 0;
+
+oq_setup_err:
+ while (i) {
+ i--;
+ octep_vf_free_oq(oct->oq[i]);
+ }
+ return retval;
+}
+
+/**
+ * octep_vf_oq_dbell_init() - Initialize Rx queue doorbell.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Write number of descriptors to Rx queue doorbell register.
+ */
+void octep_vf_oq_dbell_init(struct octep_vf_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++)
+ writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg);
+}
+
+/**
+ * octep_vf_free_oqs() - Free resources of all Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+void octep_vf_free_oqs(struct octep_vf_device *oct)
+{
+ int i;
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ if (!oct->oq[i])
+ continue;
+ octep_vf_free_oq(oct->oq[i]);
+ dev_dbg(&oct->pdev->dev,
+ "Successfully freed OQ(RxQ)-%d.\n", i);
+ }
+}
+
+/**
+ * octep_vf_oq_check_hw_for_pkts() - Check for new Rx packets.
+ *
+ * @oct: Octeon device private data structure.
+ * @oq: Octeon Rx queue data structure.
+ *
+ * Return: packets received after previous check.
+ */
+static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct,
+ struct octep_vf_oq *oq)
+{
+ u32 pkt_count, new_pkts;
+
+ pkt_count = readl(oq->pkts_sent_reg);
+ new_pkts = pkt_count - oq->last_pkt_count;
+
+ /* Clear the hardware packets counter register if the rx queue is
+ * being processed continuously with-in a single interrupt and
+ * reached half its max value.
+ * this counter is not cleared every time read, to save write cycles.
+ */
+ if (unlikely(pkt_count > 0xF0000000U)) {
+ writel(pkt_count, oq->pkts_sent_reg);
+ pkt_count = readl(oq->pkts_sent_reg);
+ new_pkts += pkt_count;
+ }
+ oq->last_pkt_count = pkt_count;
+ oq->pkts_pending += new_pkts;
+ return new_pkts;
+}
+
+/**
+ * __octep_vf_oq_process_rx() - Process hardware Rx queue and push to stack.
+ *
+ * @oct: Octeon device private data structure.
+ * @oq: Octeon Rx queue data structure.
+ * @pkts_to_process: number of packets to be processed.
+ *
+ * Process the new packets in Rx queue.
+ * Packets larger than single Rx buffer arrive in consecutive descriptors.
+ * But, count returned by the API only accounts full packets, not fragments.
+ *
+ * Return: number of packets processed and pushed to stack.
+ */
+static int __octep_vf_oq_process_rx(struct octep_vf_device *oct,
+ struct octep_vf_oq *oq, u16 pkts_to_process)
+{
+ struct octep_vf_oq_resp_hw_ext *resp_hw_ext = NULL;
+ netdev_features_t feat = oq->netdev->features;
+ struct octep_vf_rx_buffer *buff_info;
+ struct octep_vf_oq_resp_hw *resp_hw;
+ u32 pkt, rx_bytes, desc_used;
+ u16 data_offset, rx_ol_flags;
+ struct sk_buff *skb;
+ u32 read_idx;
+
+ read_idx = oq->host_read_idx;
+ rx_bytes = 0;
+ desc_used = 0;
+ for (pkt = 0; pkt < pkts_to_process; pkt++) {
+ buff_info = (struct octep_vf_rx_buffer *)&oq->buff_info[read_idx];
+ dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ resp_hw = page_address(buff_info->page);
+ buff_info->page = NULL;
+
+ /* Swap the length field that is in Big-Endian to CPU */
+ buff_info->len = be64_to_cpu(resp_hw->length);
+ if (oct->fw_info.rx_ol_flags) {
+ /* Extended response header is immediately after
+ * response header (resp_hw)
+ */
+ resp_hw_ext = (struct octep_vf_oq_resp_hw_ext *)
+ (resp_hw + 1);
+ buff_info->len -= OCTEP_VF_OQ_RESP_HW_EXT_SIZE;
+ /* Packet Data is immediately after
+ * extended response header.
+ */
+ data_offset = OCTEP_VF_OQ_RESP_HW_SIZE +
+ OCTEP_VF_OQ_RESP_HW_EXT_SIZE;
+ rx_ol_flags = resp_hw_ext->rx_ol_flags;
+ } else {
+ /* Data is immediately after
+ * Hardware Rx response header.
+ */
+ data_offset = OCTEP_VF_OQ_RESP_HW_SIZE;
+ rx_ol_flags = 0;
+ }
+ rx_bytes += buff_info->len;
+
+ if (buff_info->len <= oq->max_single_buffer_size) {
+ skb = napi_build_skb((void *)resp_hw, PAGE_SIZE);
+ skb_reserve(skb, data_offset);
+ skb_put(skb, buff_info->len);
+ read_idx++;
+ desc_used++;
+ if (read_idx == oq->max_count)
+ read_idx = 0;
+ } else {
+ struct skb_shared_info *shinfo;
+ u16 data_len;
+
+ skb = napi_build_skb((void *)resp_hw, PAGE_SIZE);
+ skb_reserve(skb, data_offset);
+ /* Head fragment includes response header(s);
+ * subsequent fragments contains only data.
+ */
+ skb_put(skb, oq->max_single_buffer_size);
+ read_idx++;
+ desc_used++;
+ if (read_idx == oq->max_count)
+ read_idx = 0;
+
+ shinfo = skb_shinfo(skb);
+ data_len = buff_info->len - oq->max_single_buffer_size;
+ while (data_len) {
+ dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ buff_info = (struct octep_vf_rx_buffer *)
+ &oq->buff_info[read_idx];
+ if (data_len < oq->buffer_size) {
+ buff_info->len = data_len;
+ data_len = 0;
+ } else {
+ buff_info->len = oq->buffer_size;
+ data_len -= oq->buffer_size;
+ }
+
+ skb_add_rx_frag(skb, shinfo->nr_frags,
+ buff_info->page, 0,
+ buff_info->len,
+ buff_info->len);
+ buff_info->page = NULL;
+ read_idx++;
+ desc_used++;
+ if (read_idx == oq->max_count)
+ read_idx = 0;
+ }
+ }
+
+ skb->dev = oq->netdev;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ if (feat & NETIF_F_RXCSUM &&
+ OCTEP_VF_RX_CSUM_VERIFIED(rx_ol_flags))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ else
+ skb->ip_summed = CHECKSUM_NONE;
+ napi_gro_receive(oq->napi, skb);
+ }
+
+ oq->host_read_idx = read_idx;
+ oq->refill_count += desc_used;
+ oq->stats.packets += pkt;
+ oq->stats.bytes += rx_bytes;
+
+ return pkt;
+}
+
+/**
+ * octep_vf_oq_process_rx() - Process Rx queue.
+ *
+ * @oq: Octeon Rx queue data structure.
+ * @budget: max number of packets can be processed in one invocation.
+ *
+ * Check for newly received packets and process them.
+ * Keeps checking for new packets until budget is used or no new packets seen.
+ *
+ * Return: number of packets processed.
+ */
+int octep_vf_oq_process_rx(struct octep_vf_oq *oq, int budget)
+{
+ u32 pkts_available, pkts_processed, total_pkts_processed;
+ struct octep_vf_device *oct = oq->octep_vf_dev;
+
+ pkts_available = 0;
+ pkts_processed = 0;
+ total_pkts_processed = 0;
+ while (total_pkts_processed < budget) {
+ /* update pending count only when current one exhausted */
+ if (oq->pkts_pending == 0)
+ octep_vf_oq_check_hw_for_pkts(oct, oq);
+ pkts_available = min(budget - total_pkts_processed,
+ oq->pkts_pending);
+ if (!pkts_available)
+ break;
+
+ pkts_processed = __octep_vf_oq_process_rx(oct, oq,
+ pkts_available);
+ oq->pkts_pending -= pkts_processed;
+ total_pkts_processed += pkts_processed;
+ }
+
+ if (oq->refill_count >= oq->refill_threshold) {
+ u32 desc_refilled = octep_vf_oq_refill(oct, oq);
+
+ /* flush pending writes before updating credits */
+ smp_wmb();
+ writel(desc_refilled, oq->pkts_credit_reg);
+ }
+
+ return total_pkts_processed;
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h
new file mode 100644
index 000000000000..fe46838b5200
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_VF_RX_H_
+#define _OCTEP_VF_RX_H_
+
+/* struct octep_vf_oq_desc_hw - Octeon Hardware OQ descriptor format.
+ *
+ * The descriptor ring is made of descriptors which have 2 64-bit values:
+ *
+ * @buffer_ptr: DMA address of the skb->data
+ * @info_ptr: DMA address of host memory, used to update pkt count by hw.
+ * This is currently unused to save pci writes.
+ */
+struct octep_vf_oq_desc_hw {
+ dma_addr_t buffer_ptr;
+ u64 info_ptr;
+};
+
+static_assert(sizeof(struct octep_vf_oq_desc_hw) == 16);
+
+#define OCTEP_VF_OQ_DESC_SIZE (sizeof(struct octep_vf_oq_desc_hw))
+
+/* Rx offload flags */
+#define OCTEP_VF_RX_OFFLOAD_VLAN_STRIP BIT(0)
+#define OCTEP_VF_RX_OFFLOAD_IPV4_CKSUM BIT(1)
+#define OCTEP_VF_RX_OFFLOAD_UDP_CKSUM BIT(2)
+#define OCTEP_VF_RX_OFFLOAD_TCP_CKSUM BIT(3)
+
+#define OCTEP_VF_RX_OFFLOAD_CKSUM (OCTEP_VF_RX_OFFLOAD_IPV4_CKSUM | \
+ OCTEP_VF_RX_OFFLOAD_UDP_CKSUM | \
+ OCTEP_VF_RX_OFFLOAD_TCP_CKSUM)
+
+#define OCTEP_VF_RX_IP_CSUM(flags) ((flags) & \
+ (OCTEP_VF_RX_OFFLOAD_IPV4_CKSUM | \
+ OCTEP_VF_RX_OFFLOAD_TCP_CKSUM | \
+ OCTEP_VF_RX_OFFLOAD_UDP_CKSUM))
+
+/* bit 0 is vlan strip */
+#define OCTEP_VF_RX_CSUM_IP_VERIFIED BIT(1)
+#define OCTEP_VF_RX_CSUM_L4_VERIFIED BIT(2)
+
+#define OCTEP_VF_RX_CSUM_VERIFIED(flags) ((flags) & \
+ (OCTEP_VF_RX_CSUM_L4_VERIFIED | \
+ OCTEP_VF_RX_CSUM_IP_VERIFIED))
+
+/* Extended Response Header in packet data received from Hardware.
+ * Includes metadata like checksum status.
+ * this is valid only if hardware/firmware published support for this.
+ * This is at offset 0 of packet data (skb->data).
+ */
+struct octep_vf_oq_resp_hw_ext {
+ /* Reserved. */
+ u64 rsvd:48;
+
+ /* rx offload flags */
+ u16 rx_ol_flags;
+};
+
+static_assert(sizeof(struct octep_vf_oq_resp_hw_ext) == 8);
+
+#define OCTEP_VF_OQ_RESP_HW_EXT_SIZE (sizeof(struct octep_vf_oq_resp_hw_ext))
+
+/* Length of Rx packet DMA'ed by Octeon to Host.
+ * this is in bigendian; so need to be converted to cpu endian.
+ * Octeon writes this at the beginning of Rx buffer (skb->data).
+ */
+struct octep_vf_oq_resp_hw {
+ /* The Length of the packet. */
+ __be64 length;
+};
+
+static_assert(sizeof(struct octep_vf_oq_resp_hw) == 8);
+
+#define OCTEP_VF_OQ_RESP_HW_SIZE (sizeof(struct octep_vf_oq_resp_hw))
+
+/* Pointer to data buffer.
+ * Driver keeps a pointer to the data buffer that it made available to
+ * the Octeon device. Since the descriptor ring keeps physical (bus)
+ * addresses, this field is required for the driver to keep track of
+ * the virtual address pointers. The fields are operated by
+ * OS-dependent routines.
+ */
+struct octep_vf_rx_buffer {
+ struct page *page;
+
+ /* length from rx hardware descriptor after converting to cpu endian */
+ u64 len;
+};
+
+#define OCTEP_VF_OQ_RECVBUF_SIZE (sizeof(struct octep_vf_rx_buffer))
+
+/* Output Queue statistics. Each output queue has four stats fields. */
+struct octep_vf_oq_stats {
+ /* Number of packets received from the Device. */
+ u64 packets;
+
+ /* Number of bytes received from the Device. */
+ u64 bytes;
+
+ /* Number of times failed to allocate buffers. */
+ u64 alloc_failures;
+};
+
+#define OCTEP_VF_OQ_STATS_SIZE (sizeof(struct octep_vf_oq_stats))
+
+/* Hardware interface Rx statistics */
+struct octep_vf_iface_rx_stats {
+ /* Received packets */
+ u64 pkts;
+
+ /* Octets of received packets */
+ u64 octets;
+
+ /* Received PAUSE and Control packets */
+ u64 pause_pkts;
+
+ /* Received PAUSE and Control octets */
+ u64 pause_octets;
+
+ /* Filtered DMAC0 packets */
+ u64 dmac0_pkts;
+
+ /* Filtered DMAC0 octets */
+ u64 dmac0_octets;
+
+ /* Packets dropped due to RX FIFO full */
+ u64 dropped_pkts_fifo_full;
+
+ /* Octets dropped due to RX FIFO full */
+ u64 dropped_octets_fifo_full;
+
+ /* Error packets */
+ u64 err_pkts;
+
+ /* Filtered DMAC1 packets */
+ u64 dmac1_pkts;
+
+ /* Filtered DMAC1 octets */
+ u64 dmac1_octets;
+
+ /* NCSI-bound packets dropped */
+ u64 ncsi_dropped_pkts;
+
+ /* NCSI-bound octets dropped */
+ u64 ncsi_dropped_octets;
+
+ /* Multicast packets received. */
+ u64 mcast_pkts;
+
+ /* Broadcast packets received. */
+ u64 bcast_pkts;
+
+};
+
+/* The Descriptor Ring Output Queue structure.
+ * This structure has all the information required to implement a
+ * Octeon OQ.
+ */
+struct octep_vf_oq {
+ u32 q_no;
+
+ struct octep_vf_device *octep_vf_dev;
+ struct net_device *netdev;
+ struct device *dev;
+
+ struct napi_struct *napi;
+
+ /* The receive buffer list. This list has the virtual addresses
+ * of the buffers.
+ */
+ struct octep_vf_rx_buffer *buff_info;
+
+ /* Pointer to the mapped packet credit register.
+ * Host writes number of info/buffer ptrs available to this register
+ */
+ u8 __iomem *pkts_credit_reg;
+
+ /* Pointer to the mapped packet sent register.
+ * Octeon writes the number of packets DMA'ed to host memory
+ * in this register.
+ */
+ u8 __iomem *pkts_sent_reg;
+
+ /* Statistics for this OQ. */
+ struct octep_vf_oq_stats stats;
+
+ /* Packets pending to be processed */
+ u32 pkts_pending;
+ u32 last_pkt_count;
+
+ /* Index in the ring where the driver should read the next packet */
+ u32 host_read_idx;
+
+ /* Number of descriptors in this ring. */
+ u32 max_count;
+ u32 ring_size_mask;
+
+ /* The number of descriptors pending refill. */
+ u32 refill_count;
+
+ /* Index in the ring where the driver will refill the
+ * descriptor's buffer
+ */
+ u32 host_refill_idx;
+ u32 refill_threshold;
+
+ /* The size of each buffer pointed by the buffer pointer. */
+ u32 buffer_size;
+ u32 max_single_buffer_size;
+
+ /* The 8B aligned descriptor ring starts at this address. */
+ struct octep_vf_oq_desc_hw *desc_ring;
+
+ /* DMA mapped address of the OQ descriptor ring. */
+ dma_addr_t desc_ring_dma;
+};
+
+#define OCTEP_VF_OQ_SIZE (sizeof(struct octep_vf_oq))
+#endif /* _OCTEP_VF_RX_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c
new file mode 100644
index 000000000000..47a5c054fdb6
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+#include <linux/vmalloc.h>
+#include <net/netdev_queues.h>
+
+#include "octep_vf_config.h"
+#include "octep_vf_main.h"
+
+/* Reset various index of Tx queue data structure. */
+static void octep_vf_iq_reset_indices(struct octep_vf_iq *iq)
+{
+ iq->fill_cnt = 0;
+ iq->host_write_index = 0;
+ iq->octep_vf_read_index = 0;
+ iq->flush_index = 0;
+ iq->pkts_processed = 0;
+ iq->pkt_in_done = 0;
+}
+
+/**
+ * octep_vf_iq_process_completions() - Process Tx queue completions.
+ *
+ * @iq: Octeon Tx queue data structure.
+ * @budget: max number of completions to be processed in one invocation.
+ */
+int octep_vf_iq_process_completions(struct octep_vf_iq *iq, u16 budget)
+{
+ u32 compl_pkts, compl_bytes, compl_sg;
+ struct octep_vf_device *oct = iq->octep_vf_dev;
+ struct octep_vf_tx_buffer *tx_buffer;
+ struct skb_shared_info *shinfo;
+ u32 fi = iq->flush_index;
+ struct sk_buff *skb;
+ u8 frags, i;
+
+ compl_pkts = 0;
+ compl_sg = 0;
+ compl_bytes = 0;
+ iq->octep_vf_read_index = oct->hw_ops.update_iq_read_idx(iq);
+
+ while (likely(budget && (fi != iq->octep_vf_read_index))) {
+ tx_buffer = iq->buff_info + fi;
+ skb = tx_buffer->skb;
+
+ fi++;
+ if (unlikely(fi == iq->max_count))
+ fi = 0;
+ compl_bytes += skb->len;
+ compl_pkts++;
+ budget--;
+
+ if (!tx_buffer->gather) {
+ dma_unmap_single(iq->dev, tx_buffer->dma,
+ tx_buffer->skb->len, DMA_TO_DEVICE);
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ /* Scatter/Gather */
+ shinfo = skb_shinfo(skb);
+ frags = shinfo->nr_frags;
+ compl_sg++;
+
+ dma_unmap_single(iq->dev, tx_buffer->sglist[0].dma_ptr[0],
+ tx_buffer->sglist[0].len[3], DMA_TO_DEVICE);
+
+ i = 1; /* entry 0 is main skb, unmapped above */
+ while (frags--) {
+ dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3],
+ tx_buffer->sglist[i >> 2].len[3 - (i & 3)], DMA_TO_DEVICE);
+ i++;
+ }
+
+ dev_kfree_skb_any(skb);
+ }
+
+ iq->pkts_processed += compl_pkts;
+ iq->stats.instr_completed += compl_pkts;
+ iq->stats.bytes_sent += compl_bytes;
+ iq->stats.sgentry_sent += compl_sg;
+ iq->flush_index = fi;
+
+ netif_subqueue_completed_wake(iq->netdev, iq->q_no, compl_pkts,
+ compl_bytes, IQ_INSTR_SPACE(iq),
+ OCTEP_VF_WAKE_QUEUE_THRESHOLD);
+
+ return !budget;
+}
+
+/**
+ * octep_vf_iq_free_pending() - Free Tx buffers for pending completions.
+ *
+ * @iq: Octeon Tx queue data structure.
+ */
+static void octep_vf_iq_free_pending(struct octep_vf_iq *iq)
+{
+ struct octep_vf_tx_buffer *tx_buffer;
+ struct skb_shared_info *shinfo;
+ u32 fi = iq->flush_index;
+ struct sk_buff *skb;
+ u8 frags, i;
+
+ while (fi != iq->host_write_index) {
+ tx_buffer = iq->buff_info + fi;
+ skb = tx_buffer->skb;
+
+ fi++;
+ if (unlikely(fi == iq->max_count))
+ fi = 0;
+
+ if (!tx_buffer->gather) {
+ dma_unmap_single(iq->dev, tx_buffer->dma,
+ tx_buffer->skb->len, DMA_TO_DEVICE);
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ /* Scatter/Gather */
+ shinfo = skb_shinfo(skb);
+ frags = shinfo->nr_frags;
+
+ dma_unmap_single(iq->dev,
+ tx_buffer->sglist[0].dma_ptr[0],
+ tx_buffer->sglist[0].len[0],
+ DMA_TO_DEVICE);
+
+ i = 1; /* entry 0 is main skb, unmapped above */
+ while (frags--) {
+ dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3],
+ tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE);
+ i++;
+ }
+
+ dev_kfree_skb_any(skb);
+ }
+
+ iq->flush_index = fi;
+ netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no));
+}
+
+/**
+ * octep_vf_clean_iqs() - Clean Tx queues to shutdown the device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Free the buffers in Tx queue descriptors pending completion and
+ * reset queue indices
+ */
+void octep_vf_clean_iqs(struct octep_vf_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_iqs; i++) {
+ octep_vf_iq_free_pending(oct->iq[i]);
+ octep_vf_iq_reset_indices(oct->iq[i]);
+ }
+}
+
+/**
+ * octep_vf_setup_iq() - Setup a Tx queue.
+ *
+ * @oct: Octeon device private data structure.
+ * @q_no: Tx queue number to be setup.
+ *
+ * Allocate resources for a Tx queue.
+ */
+static int octep_vf_setup_iq(struct octep_vf_device *oct, int q_no)
+{
+ u32 desc_ring_size, buff_info_size, sglist_size;
+ struct octep_vf_iq *iq;
+ int i;
+
+ iq = vzalloc(sizeof(*iq));
+ if (!iq)
+ goto iq_alloc_err;
+ oct->iq[q_no] = iq;
+
+ iq->octep_vf_dev = oct;
+ iq->netdev = oct->netdev;
+ iq->dev = &oct->pdev->dev;
+ iq->q_no = q_no;
+ iq->max_count = CFG_GET_IQ_NUM_DESC(oct->conf);
+ iq->ring_size_mask = iq->max_count - 1;
+ iq->fill_threshold = CFG_GET_IQ_DB_MIN(oct->conf);
+ iq->netdev_q = netdev_get_tx_queue(iq->netdev, q_no);
+
+ /* Allocate memory for hardware queue descriptors */
+ desc_ring_size = OCTEP_VF_IQ_DESC_SIZE * CFG_GET_IQ_NUM_DESC(oct->conf);
+ iq->desc_ring = dma_alloc_coherent(iq->dev, desc_ring_size,
+ &iq->desc_ring_dma, GFP_KERNEL);
+ if (unlikely(!iq->desc_ring)) {
+ dev_err(iq->dev,
+ "Failed to allocate DMA memory for IQ-%d\n", q_no);
+ goto desc_dma_alloc_err;
+ }
+
+ /* Allocate memory for hardware SGLIST descriptors */
+ sglist_size = OCTEP_VF_SGLIST_SIZE_PER_PKT *
+ CFG_GET_IQ_NUM_DESC(oct->conf);
+ iq->sglist = dma_alloc_coherent(iq->dev, sglist_size,
+ &iq->sglist_dma, GFP_KERNEL);
+ if (unlikely(!iq->sglist)) {
+ dev_err(iq->dev,
+ "Failed to allocate DMA memory for IQ-%d SGLIST\n",
+ q_no);
+ goto sglist_alloc_err;
+ }
+
+ /* allocate memory to manage Tx packets pending completion */
+ buff_info_size = OCTEP_VF_IQ_TXBUFF_INFO_SIZE * iq->max_count;
+ iq->buff_info = vzalloc(buff_info_size);
+ if (!iq->buff_info) {
+ dev_err(iq->dev,
+ "Failed to allocate buff info for IQ-%d\n", q_no);
+ goto buff_info_err;
+ }
+
+ /* Setup sglist addresses in tx_buffer entries */
+ for (i = 0; i < CFG_GET_IQ_NUM_DESC(oct->conf); i++) {
+ struct octep_vf_tx_buffer *tx_buffer;
+
+ tx_buffer = &iq->buff_info[i];
+ tx_buffer->sglist =
+ &iq->sglist[i * OCTEP_VF_SGLIST_ENTRIES_PER_PKT];
+ tx_buffer->sglist_dma =
+ iq->sglist_dma + (i * OCTEP_VF_SGLIST_SIZE_PER_PKT);
+ }
+
+ octep_vf_iq_reset_indices(iq);
+ oct->hw_ops.setup_iq_regs(oct, q_no);
+
+ oct->num_iqs++;
+ return 0;
+
+buff_info_err:
+ dma_free_coherent(iq->dev, sglist_size, iq->sglist, iq->sglist_dma);
+sglist_alloc_err:
+ dma_free_coherent(iq->dev, desc_ring_size,
+ iq->desc_ring, iq->desc_ring_dma);
+desc_dma_alloc_err:
+ vfree(iq);
+ oct->iq[q_no] = NULL;
+iq_alloc_err:
+ return -1;
+}
+
+/**
+ * octep_vf_free_iq() - Free Tx queue resources.
+ *
+ * @iq: Octeon Tx queue data structure.
+ *
+ * Free all the resources allocated for a Tx queue.
+ */
+static void octep_vf_free_iq(struct octep_vf_iq *iq)
+{
+ struct octep_vf_device *oct = iq->octep_vf_dev;
+ u64 desc_ring_size, sglist_size;
+ int q_no = iq->q_no;
+
+ desc_ring_size = OCTEP_VF_IQ_DESC_SIZE * CFG_GET_IQ_NUM_DESC(oct->conf);
+
+ vfree(iq->buff_info);
+
+ if (iq->desc_ring)
+ dma_free_coherent(iq->dev, desc_ring_size,
+ iq->desc_ring, iq->desc_ring_dma);
+
+ sglist_size = OCTEP_VF_SGLIST_SIZE_PER_PKT *
+ CFG_GET_IQ_NUM_DESC(oct->conf);
+ if (iq->sglist)
+ dma_free_coherent(iq->dev, sglist_size,
+ iq->sglist, iq->sglist_dma);
+
+ vfree(iq);
+ oct->iq[q_no] = NULL;
+ oct->num_iqs--;
+}
+
+/**
+ * octep_vf_setup_iqs() - setup resources for all Tx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+int octep_vf_setup_iqs(struct octep_vf_device *oct)
+{
+ int i;
+
+ oct->num_iqs = 0;
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ if (octep_vf_setup_iq(oct, i)) {
+ dev_err(&oct->pdev->dev,
+ "Failed to setup IQ(TxQ)-%d.\n", i);
+ goto iq_setup_err;
+ }
+ dev_dbg(&oct->pdev->dev, "Successfully setup IQ(TxQ)-%d.\n", i);
+ }
+
+ return 0;
+
+iq_setup_err:
+ while (i) {
+ i--;
+ octep_vf_free_iq(oct->iq[i]);
+ }
+ return -1;
+}
+
+/**
+ * octep_vf_free_iqs() - Free resources of all Tx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+void octep_vf_free_iqs(struct octep_vf_device *oct)
+{
+ int i;
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ octep_vf_free_iq(oct->iq[i]);
+ dev_dbg(&oct->pdev->dev,
+ "Successfully destroyed IQ(TxQ)-%d.\n", i);
+ }
+ oct->num_iqs = 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h
new file mode 100644
index 000000000000..f338b975103c
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) VF Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_VF_TX_H_
+#define _OCTEP_VF_TX_H_
+
+#define IQ_SEND_OK 0
+#define IQ_SEND_STOP 1
+#define IQ_SEND_FAILED -1
+
+#define TX_BUFTYPE_NONE 0
+#define TX_BUFTYPE_NET 1
+#define TX_BUFTYPE_NET_SG 2
+#define NUM_TX_BUFTYPES 3
+
+/* Hardware format for Scatter/Gather list
+ *
+ * 63 48|47 32|31 16|15 0
+ * -----------------------------------------
+ * | Len 0 | Len 1 | Len 2 | Len 3 |
+ * -----------------------------------------
+ * | Ptr 0 |
+ * -----------------------------------------
+ * | Ptr 1 |
+ * -----------------------------------------
+ * | Ptr 2 |
+ * -----------------------------------------
+ * | Ptr 3 |
+ * -----------------------------------------
+ */
+struct octep_vf_tx_sglist_desc {
+ u16 len[4];
+ dma_addr_t dma_ptr[4];
+};
+
+static_assert(sizeof(struct octep_vf_tx_sglist_desc) == 40);
+
+/* Each Scatter/Gather entry sent to hardwar hold four pointers.
+ * So, number of entries required is (MAX_SKB_FRAGS + 1)/4, where '+1'
+ * is for main skb which also goes as a gather buffer to Octeon hardware.
+ * To allocate sufficient SGLIST entries for a packet with max fragments,
+ * align by adding 3 before calcuating max SGLIST entries per packet.
+ */
+#define OCTEP_VF_SGLIST_ENTRIES_PER_PKT ((MAX_SKB_FRAGS + 1 + 3) / 4)
+#define OCTEP_VF_SGLIST_SIZE_PER_PKT \
+ (OCTEP_VF_SGLIST_ENTRIES_PER_PKT * sizeof(struct octep_vf_tx_sglist_desc))
+
+struct octep_vf_tx_buffer {
+ struct sk_buff *skb;
+ dma_addr_t dma;
+ struct octep_vf_tx_sglist_desc *sglist;
+ dma_addr_t sglist_dma;
+ u8 gather;
+};
+
+#define OCTEP_VF_IQ_TXBUFF_INFO_SIZE (sizeof(struct octep_vf_tx_buffer))
+
+/* VF Hardware interface Tx statistics */
+struct octep_vf_iface_tx_stats {
+ /* Total frames sent on the interface */
+ u64 pkts;
+
+ /* Total octets sent on the interface */
+ u64 octs;
+
+ /* Packets sent to a broadcast DMAC */
+ u64 bcst;
+
+ /* Packets sent to the multicast DMAC */
+ u64 mcst;
+
+ /* Packets dropped */
+ u64 dropped;
+
+ /* Reserved */
+ u64 reserved[13];
+};
+
+/* VF Input Queue statistics */
+struct octep_vf_iq_stats {
+ /* Instructions posted to this queue. */
+ u64 instr_posted;
+
+ /* Instructions copied by hardware for processing. */
+ u64 instr_completed;
+
+ /* Instructions that could not be processed. */
+ u64 instr_dropped;
+
+ /* Bytes sent through this queue. */
+ u64 bytes_sent;
+
+ /* Gather entries sent through this queue. */
+ u64 sgentry_sent;
+
+ /* Number of transmit failures due to TX_BUSY */
+ u64 tx_busy;
+
+ /* Number of times the queue is restarted */
+ u64 restart_cnt;
+};
+
+/* The instruction (input) queue.
+ * The input queue is used to post raw (instruction) mode data or packet
+ * data to Octeon device from the host. Each input queue (up to 4) for
+ * a Octeon device has one such structure to represent it.
+ */
+struct octep_vf_iq {
+ u32 q_no;
+
+ struct octep_vf_device *octep_vf_dev;
+ struct net_device *netdev;
+ struct device *dev;
+ struct netdev_queue *netdev_q;
+
+ /* Index in input ring where driver should write the next packet */
+ u16 host_write_index;
+
+ /* Index in input ring where Octeon is expected to read next packet */
+ u16 octep_vf_read_index;
+
+ /* This index aids in finding the window in the queue where Octeon
+ * has read the commands.
+ */
+ u16 flush_index;
+
+ /* Statistics for this input queue. */
+ struct octep_vf_iq_stats stats;
+
+ /* Pointer to the Virtual Base addr of the input ring. */
+ struct octep_vf_tx_desc_hw *desc_ring;
+
+ /* DMA mapped base address of the input descriptor ring. */
+ dma_addr_t desc_ring_dma;
+
+ /* Info of Tx buffers pending completion. */
+ struct octep_vf_tx_buffer *buff_info;
+
+ /* Base pointer to Scatter/Gather lists for all ring descriptors. */
+ struct octep_vf_tx_sglist_desc *sglist;
+
+ /* DMA mapped addr of Scatter Gather Lists */
+ dma_addr_t sglist_dma;
+
+ /* Octeon doorbell register for the ring. */
+ u8 __iomem *doorbell_reg;
+
+ /* Octeon instruction count register for this ring. */
+ u8 __iomem *inst_cnt_reg;
+
+ /* interrupt level register for this ring */
+ u8 __iomem *intr_lvl_reg;
+
+ /* Maximum no. of instructions in this queue. */
+ u32 max_count;
+ u32 ring_size_mask;
+
+ u32 pkt_in_done;
+ u32 pkts_processed;
+
+ u32 status;
+
+ /* Number of instructions pending to be posted to Octeon. */
+ u32 fill_cnt;
+
+ /* The max. number of instructions that can be held pending by the
+ * driver before ringing doorbell.
+ */
+ u32 fill_threshold;
+};
+
+/* Hardware Tx Instruction Header */
+struct octep_vf_instr_hdr {
+ /* Data Len */
+ u64 tlen:16;
+
+ /* Reserved */
+ u64 rsvd:20;
+
+ /* PKIND for SDP */
+ u64 pkind:6;
+
+ /* Front Data size */
+ u64 fsz:6;
+
+ /* No. of entries in gather list */
+ u64 gsz:14;
+
+ /* Gather indicator 1=gather*/
+ u64 gather:1;
+
+ /* Reserved3 */
+ u64 reserved3:1;
+};
+
+static_assert(sizeof(struct octep_vf_instr_hdr) == 8);
+
+/* Tx offload flags */
+#define OCTEP_VF_TX_OFFLOAD_VLAN_INSERT BIT(0)
+#define OCTEP_VF_TX_OFFLOAD_IPV4_CKSUM BIT(1)
+#define OCTEP_VF_TX_OFFLOAD_UDP_CKSUM BIT(2)
+#define OCTEP_VF_TX_OFFLOAD_TCP_CKSUM BIT(3)
+#define OCTEP_VF_TX_OFFLOAD_SCTP_CKSUM BIT(4)
+#define OCTEP_VF_TX_OFFLOAD_TCP_TSO BIT(5)
+#define OCTEP_VF_TX_OFFLOAD_UDP_TSO BIT(6)
+
+#define OCTEP_VF_TX_OFFLOAD_CKSUM (OCTEP_VF_TX_OFFLOAD_IPV4_CKSUM | \
+ OCTEP_VF_TX_OFFLOAD_UDP_CKSUM | \
+ OCTEP_VF_TX_OFFLOAD_TCP_CKSUM)
+
+#define OCTEP_VF_TX_OFFLOAD_TSO (OCTEP_VF_TX_OFFLOAD_TCP_TSO | \
+ OCTEP_VF_TX_OFFLOAD_UDP_TSO)
+
+#define OCTEP_VF_TX_IP_CSUM(flags) ((flags) & \
+ (OCTEP_VF_TX_OFFLOAD_IPV4_CKSUM | \
+ OCTEP_VF_TX_OFFLOAD_TCP_CKSUM | \
+ OCTEP_VF_TX_OFFLOAD_UDP_CKSUM))
+
+#define OCTEP_VF_TX_TSO(flags) ((flags) & \
+ (OCTEP_VF_TX_OFFLOAD_TCP_TSO | \
+ OCTEP_VF_TX_OFFLOAD_UDP_TSO))
+
+struct tx_mdata {
+ /* offload flags */
+ u16 ol_flags;
+
+ /* gso size */
+ u16 gso_size;
+
+ /* gso flags */
+ u16 gso_segs;
+
+ /* reserved */
+ u16 rsvd1;
+
+ /* reserved */
+ u64 rsvd2;
+};
+
+static_assert(sizeof(struct tx_mdata) == 16);
+
+/* 64-byte Tx instruction format.
+ * Format of instruction for a 64-byte mode input queue.
+ *
+ * only first 16-bytes (dptr and ih) are mandatory; rest are optional
+ * and filled by the driver based on firmware/hardware capabilities.
+ * These optional headers together called Front Data and its size is
+ * described by ih->fsz.
+ */
+struct octep_vf_tx_desc_hw {
+ /* Pointer where the input data is available. */
+ u64 dptr;
+
+ /* Instruction Header. */
+ union {
+ struct octep_vf_instr_hdr ih;
+ u64 ih64;
+ };
+
+ union {
+ u64 txm64[2];
+ struct tx_mdata txm;
+ };
+
+ /* Additional headers available in a 64-byte instruction. */
+ u64 exhdr[4];
+};
+
+static_assert(sizeof(struct octep_vf_tx_desc_hw) == 64);
+
+#define OCTEP_VF_IQ_DESC_SIZE (sizeof(struct octep_vf_tx_desc_hw))
+#endif /* _OCTEP_VF_TX_H_ */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
index 9690ac01f02c..b92264d0a77e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
@@ -413,4 +413,5 @@ const char *otx2_mbox_id2name(u16 id)
EXPORT_SYMBOL(otx2_mbox_id2name);
MODULE_AUTHOR("Marvell.");
+MODULE_DESCRIPTION("Marvell RVU NIC Mbox helpers");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index edeb0f737312..61ab7f66f053 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -837,6 +837,8 @@ enum nix_af_status {
NIX_AF_ERR_CQ_CTX_WRITE_ERR = -429,
NIX_AF_ERR_AQ_CTX_RETRY_WRITE = -430,
NIX_AF_ERR_LINK_CREDITS = -431,
+ NIX_AF_ERR_INVALID_BPID = -434,
+ NIX_AF_ERR_INVALID_BPID_REQ = -435,
NIX_AF_ERR_INVALID_MCAST_GRP = -436,
NIX_AF_ERR_INVALID_MCAST_DEL_REQ = -437,
NIX_AF_ERR_NON_CONTIG_MCE_LIST = -438,
@@ -1114,6 +1116,7 @@ struct nix_rss_flowkey_cfg {
#define NIX_FLOW_KEY_TYPE_INNR_UDP BIT(15)
#define NIX_FLOW_KEY_TYPE_INNR_SCTP BIT(16)
#define NIX_FLOW_KEY_TYPE_INNR_ETH_DMAC BIT(17)
+#define NIX_FLOW_KEY_TYPE_CUSTOM0 BIT(19)
#define NIX_FLOW_KEY_TYPE_VLAN BIT(20)
#define NIX_FLOW_KEY_TYPE_IPV4_PROTO BIT(21)
#define NIX_FLOW_KEY_TYPE_AH BIT(22)
@@ -1553,6 +1556,7 @@ struct flow_msg {
u32 mpls_lse[4];
u8 icmp_type;
u8 icmp_code;
+ __be16 tcp_flags;
};
struct npc_install_flow_req {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index b0b4dea548e1..d883157393ea 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -85,8 +85,7 @@ enum npc_kpu_lc_ltype {
enum npc_kpu_ld_ltype {
NPC_LT_LD_TCP = 1,
NPC_LT_LD_UDP,
- NPC_LT_LD_ICMP,
- NPC_LT_LD_SCTP,
+ NPC_LT_LD_SCTP = 4,
NPC_LT_LD_ICMP6,
NPC_LT_LD_CUSTOM0,
NPC_LT_LD_CUSTOM1,
@@ -97,6 +96,7 @@ enum npc_kpu_ld_ltype {
NPC_LT_LD_NSH,
NPC_LT_LD_TU_MPLS_IN_NSH,
NPC_LT_LD_TU_MPLS_IN_IP,
+ NPC_LT_LD_ICMP,
};
enum npc_kpu_le_ltype {
@@ -140,14 +140,14 @@ enum npc_kpu_lg_ltype {
enum npc_kpu_lh_ltype {
NPC_LT_LH_TU_TCP = 1,
NPC_LT_LH_TU_UDP,
- NPC_LT_LH_TU_ICMP,
- NPC_LT_LH_TU_SCTP,
+ NPC_LT_LH_TU_SCTP = 4,
NPC_LT_LH_TU_ICMP6,
+ NPC_LT_LH_CUSTOM0,
+ NPC_LT_LH_CUSTOM1,
NPC_LT_LH_TU_IGMP = 8,
NPC_LT_LH_TU_ESP,
NPC_LT_LH_TU_AH,
- NPC_LT_LH_CUSTOM0 = 0xE,
- NPC_LT_LH_CUSTOM1 = 0xF,
+ NPC_LT_LH_TU_ICMP = 0xF,
};
/* NPC port kind defines how the incoming or outgoing packets
@@ -155,10 +155,11 @@ enum npc_kpu_lh_ltype {
* Software assigns pkind for each incoming port such as CGX
* Ethernet interfaces, LBK interfaces, etc.
*/
-#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_CUSTOM_PRE_L2_PKIND
+#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_CPT_HDR_PTP_PKIND
enum npc_pkind_type {
NPC_RX_LBK_PKIND = 0ULL,
+ NPC_RX_CPT_HDR_PTP_PKIND = 54ULL,
NPC_RX_CUSTOM_PRE_L2_PKIND = 55ULL,
NPC_RX_VLAN_EXDSA_PKIND = 56ULL,
NPC_RX_CHLEN24B_PKIND = 57ULL,
@@ -216,6 +217,7 @@ enum key_fields {
NPC_MPLS4_TTL,
NPC_TYPE_ICMP,
NPC_CODE_ICMP,
+ NPC_TCP_FLAGS,
NPC_HEADER_FIELDS_MAX,
NPC_CHAN = NPC_HEADER_FIELDS_MAX, /* Valid when Rx */
NPC_PF_FUNC, /* Valid when Tx */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
index a820bad3abb2..41de72c8607f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
@@ -35,6 +35,7 @@
#define NPC_ETYPE_NSH 0x894f
#define NPC_ETYPE_DSA 0xdada
#define NPC_ETYPE_PPPOE 0x8864
+#define NPC_ETYPE_ERSPA 0x88be
#define NPC_PPP_IP 0x0021
#define NPC_PPP_IP6 0x0057
@@ -59,6 +60,9 @@
#define NPC_IPNH_MPLS 137
#define NPC_IPNH_HOSTID 139
#define NPC_IPNH_SHIM6 140
+#define NPC_IPNH_CUSTOM 253
+
+#define NPC_IP6_ROUTE_TYPE 4
#define NPC_UDP_PORT_PTP_E 319
#define NPC_UDP_PORT_PTP_G 320
@@ -187,6 +191,7 @@ enum npc_kpu_parser_state {
NPC_S_KPU2_EXDSA,
NPC_S_KPU2_CPT_CTAG,
NPC_S_KPU2_CPT_QINQ,
+ NPC_S_KPU2_MT,
NPC_S_KPU3_CTAG,
NPC_S_KPU3_STAG,
NPC_S_KPU3_QINQ,
@@ -231,6 +236,7 @@ enum npc_kpu_parser_state {
NPC_S_KPU8_ICMP6,
NPC_S_KPU8_GRE,
NPC_S_KPU8_AH,
+ NPC_S_KPU8_CUSTOM,
NPC_S_KPU9_TU_MPLS_IN_GRE,
NPC_S_KPU9_TU_MPLS_IN_NSH,
NPC_S_KPU9_TU_MPLS_IN_IP,
@@ -242,6 +248,7 @@ enum npc_kpu_parser_state {
NPC_S_KPU9_GTPC,
NPC_S_KPU9_GTPU,
NPC_S_KPU9_ESP,
+ NPC_S_KPU9_CUSTOM,
NPC_S_KPU10_TU_MPLS_IN_VXLANGPE,
NPC_S_KPU10_TU_MPLS_PL,
NPC_S_KPU10_TU_MPLS,
@@ -318,10 +325,10 @@ enum npc_kpu_lc_uflag {
NPC_F_LC_U_UNK_PROTO = 0x10,
NPC_F_LC_U_IP_FRAG = 0x20,
NPC_F_LC_U_IP6_FRAG = 0x40,
+ NPC_F_LC_L_6TO4 = 0x80,
};
enum npc_kpu_lc_lflag {
NPC_F_LC_L_IP_IN_IP = 1,
- NPC_F_LC_L_6TO4,
NPC_F_LC_L_MPLS_IN_IP,
NPC_F_LC_L_IP6_TUN_IP6,
NPC_F_LC_L_IP6_MPLS_IN_IP,
@@ -334,6 +341,8 @@ enum npc_kpu_lc_lflag {
NPC_F_LC_L_EXT_MOBILITY,
NPC_F_LC_L_EXT_HOSTID,
NPC_F_LC_L_EXT_SHIM6,
+ NPC_F_LC_L_IP6_SRH_SEG_1,
+ NPC_F_LC_L_IP6_SRH_SEG_2,
};
enum npc_kpu_ld_lflag {
@@ -970,10 +979,10 @@ static struct npc_kpu_profile_action ikpu_action_entries[] = {
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
12, 16, 20, 0, 0,
- NPC_S_KPU1_ETHER, 0, 0,
+ NPC_S_KPU1_CPT_HDR, 48, 0,
NPC_LID_LA, NPC_LT_NA,
0,
- 0, 0, 0, 0,
+ 0, 7, 0, 0,
},
{
@@ -2786,6 +2795,24 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
0x0000,
},
{
+ NPC_S_KPU2_MT, 0xff,
+ NPC_ETYPE_CTAG,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU2_MT, 0xff,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
NPC_S_NA, 0X00,
0x0000,
0x0000,
@@ -4501,6 +4528,24 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
0xff00,
NPC_IP_VER_6,
NPC_IP_VER_MASK,
+ (NPC_IP6_ROUTE_TYPE << 8) | 1,
+ 0xffff,
+ },
+ {
+ NPC_S_KPU5_IP6, 0xff,
+ NPC_IPNH_ROUT << 8,
+ 0xff00,
+ NPC_IP_VER_6,
+ NPC_IP_VER_MASK,
+ (NPC_IP6_ROUTE_TYPE << 8) | 2,
+ 0xffff,
+ },
+ {
+ NPC_S_KPU5_IP6, 0xff,
+ NPC_IPNH_ROUT << 8,
+ 0xff00,
+ NPC_IP_VER_6,
+ NPC_IP_VER_MASK,
0x0000,
0x0000,
},
@@ -4776,6 +4821,15 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
},
{
NPC_S_KPU5_CPT_IP, 0xff,
+ NPC_IPNH_CUSTOM,
+ 0x00ff,
+ NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+ NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU5_CPT_IP, 0xff,
0x0000,
0x0000,
NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
@@ -4884,6 +4938,15 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
},
{
NPC_S_KPU5_CPT_IP, 0xff,
+ NPC_IPNH_CUSTOM,
+ 0x00ff,
+ NPC_IP_VER_4,
+ NPC_IP_VER_MASK,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU5_CPT_IP, 0xff,
0x0000,
0x0000,
NPC_IP_VER_4,
@@ -5064,6 +5127,15 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
},
{
NPC_S_KPU5_CPT_IP6, 0xff,
+ NPC_IPNH_CUSTOM << 8,
+ 0xff00,
+ NPC_IP_VER_6,
+ NPC_IP_VER_MASK,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU5_CPT_IP6, 0xff,
0x0000,
0x0000,
NPC_IP_VER_6,
@@ -5208,6 +5280,15 @@ static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
},
{
NPC_S_KPU6_IP6_FRAG, 0xff,
+ NPC_IPNH_CUSTOM << 8,
+ 0xff00,
+ 0x0000,
+ NPC_IP6_FRAG_FRAGOFF,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU6_IP6_FRAG, 0xff,
0x0000,
0x0000,
0x0000,
@@ -5325,6 +5406,15 @@ static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
},
{
NPC_S_KPU6_IP6_HOP_DEST, 0xff,
+ NPC_IPNH_CUSTOM << 8,
+ 0xff00,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU6_IP6_HOP_DEST, 0xff,
0x0000,
0x0000,
0x0000,
@@ -5433,6 +5523,15 @@ static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
},
{
NPC_S_KPU6_IP6_ROUT, 0xff,
+ NPC_IPNH_CUSTOM << 8,
+ 0xff00,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU6_IP6_ROUT, 0xff,
0x0000,
0x0000,
0x0000,
@@ -5532,6 +5631,15 @@ static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
},
{
NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+ NPC_IPNH_CUSTOM << 8,
+ 0xff00,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
0x0000,
0x0000,
0x0000,
@@ -5649,6 +5757,15 @@ static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
},
{
NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+ NPC_IPNH_CUSTOM << 8,
+ 0xff00,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
0x0000,
0x0000,
0x0000,
@@ -5757,6 +5874,15 @@ static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
},
{
NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+ NPC_IPNH_CUSTOM << 8,
+ 0xff00,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
0x0000,
0x0000,
0x0000,
@@ -5883,6 +6009,15 @@ static struct npc_kpu_profile_cam kpu7_cam_entries[] = {
},
{
NPC_S_KPU7_IP6_ROUT, 0xff,
+ NPC_IPNH_CUSTOM << 8,
+ 0xff00,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU7_IP6_ROUT, 0xff,
0x0000,
0x0000,
0x0000,
@@ -5982,6 +6117,15 @@ static struct npc_kpu_profile_cam kpu7_cam_entries[] = {
},
{
NPC_S_KPU7_IP6_FRAG, 0xff,
+ NPC_IPNH_CUSTOM << 8,
+ 0xff00,
+ 0x0000,
+ NPC_IP6_FRAG_FRAGOFF,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU7_IP6_FRAG, 0xff,
0x0000,
0x0000,
0x0000,
@@ -6081,6 +6225,15 @@ static struct npc_kpu_profile_cam kpu7_cam_entries[] = {
},
{
NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+ NPC_IPNH_CUSTOM << 8,
+ 0xff00,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
0x0000,
0x0000,
0x0000,
@@ -6310,6 +6463,15 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
0xffff,
0x0000,
0x0000,
+ 0x0009,
+ 0xffff,
+ },
+ {
+ NPC_S_KPU8_UDP, 0xff,
+ NPC_UDP_PORT_ESP,
+ 0xffff,
+ 0x0000,
+ 0x0000,
0x0000,
0x0000,
},
@@ -6756,6 +6918,78 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
},
{
NPC_S_KPU8_GRE, 0xff,
+ NPC_ETYPE_ERSPA,
+ 0xffff,
+ 0x0000,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU8_GRE, 0xff,
+ NPC_ETYPE_ERSPA,
+ 0xffff,
+ NPC_GRE_F_CSUM,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU8_GRE, 0xff,
+ NPC_ETYPE_ERSPA,
+ 0xffff,
+ NPC_GRE_F_KEY,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU8_GRE, 0xff,
+ NPC_ETYPE_ERSPA,
+ 0xffff,
+ NPC_GRE_F_SEQ,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU8_GRE, 0xff,
+ NPC_ETYPE_ERSPA,
+ 0xffff,
+ NPC_GRE_F_CSUM | NPC_GRE_F_KEY,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU8_GRE, 0xff,
+ NPC_ETYPE_ERSPA,
+ 0xffff,
+ NPC_GRE_F_CSUM | NPC_GRE_F_SEQ,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU8_GRE, 0xff,
+ NPC_ETYPE_ERSPA,
+ 0xffff,
+ NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU8_GRE, 0xff,
+ NPC_ETYPE_ERSPA,
+ 0xffff,
+ NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU8_GRE, 0xff,
0x0000,
0xffff,
NPC_GRE_F_ROUTE,
@@ -6836,6 +7070,15 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
0x0000,
},
{
+ NPC_S_KPU8_CUSTOM, 0xff,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
NPC_S_NA, 0X00,
0x0000,
0x0000,
@@ -7304,6 +7547,24 @@ static struct npc_kpu_profile_cam kpu9_cam_entries[] = {
0x0000,
},
{
+ NPC_S_KPU9_CUSTOM, 0xff,
+ 0x4000,
+ 0xf000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU9_CUSTOM, 0xff,
+ 0x6000,
+ 0xf000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
NPC_S_NA, 0X00,
0x0000,
0x0000,
@@ -8384,7 +8645,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 3, 0,
+ 6, 0, 42, 3, 0,
NPC_S_KPU5_IP6, 14, 1,
NPC_LID_LA, NPC_LT_LA_ETHER,
0,
@@ -8536,7 +8797,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 3, 0,
+ 6, 0, 42, 3, 0,
NPC_S_KPU5_IP6, 22, 1,
NPC_LID_LA, NPC_LT_LA_IH_NIX_ETHER,
NPC_F_LA_U_HAS_IH_NIX,
@@ -8693,7 +8954,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 3, 0,
+ 6, 0, 42, 3, 0,
NPC_S_KPU5_IP6, 30, 1,
NPC_LID_LA, NPC_LT_LA_HIGIG2_ETHER,
NPC_F_LA_U_HAS_HIGIG2,
@@ -8818,7 +9079,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 3, 0,
+ 6, 0, 42, 3, 0,
NPC_S_KPU5_IP6, 38, 1,
NPC_LID_LA, NPC_LT_LA_IH_NIX_HIGIG2_ETHER,
NPC_F_LA_U_HAS_IH_NIX | NPC_F_LA_U_HAS_HIGIG2,
@@ -8947,7 +9208,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 3, 0,
+ 6, 0, 42, 3, 0,
NPC_S_KPU5_IP6, 14, 0,
NPC_LID_LA, NPC_LT_NA,
0,
@@ -9124,7 +9385,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 2, 0,
+ 6, 0, 42, 2, 0,
NPC_S_KPU5_IP6, 6, 1,
NPC_LID_LB, NPC_LT_LB_CTAG,
0,
@@ -9204,7 +9465,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 2, 0,
+ 6, 0, 42, 2, 0,
NPC_S_KPU5_IP6, 14, 1,
NPC_LID_LB, NPC_LT_LB_PPPOE,
0,
@@ -9213,7 +9474,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
- NPC_S_NA, 0, 1,
+ NPC_S_NA, 6, 1,
NPC_LID_LB, NPC_LT_LB_CTAG,
NPC_F_LB_U_UNK_ETYPE,
0, 0, 0, 0,
@@ -9228,7 +9489,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 2, 0,
+ 6, 0, 42, 2, 0,
NPC_S_KPU5_IP6, 10, 1,
NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
@@ -9324,7 +9585,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 2, 0,
+ 6, 0, 42, 2, 0,
NPC_S_KPU5_IP6, 24, 1,
NPC_LID_LB, NPC_LT_LB_BTAG,
NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
@@ -9428,7 +9689,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 2, 0,
+ 6, 0, 42, 2, 0,
NPC_S_KPU5_IP6, 10, 1,
NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
@@ -9532,7 +9793,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 2, 0,
+ 6, 0, 42, 2, 0,
NPC_S_KPU5_IP6, 10, 1,
NPC_LID_LB, NPC_LT_LB_ETAG,
0,
@@ -9628,7 +9889,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 2, 0,
+ 6, 0, 42, 2, 0,
NPC_S_KPU5_IP6, 28, 1,
NPC_LID_LB, NPC_LT_LB_ETAG,
NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
@@ -9684,7 +9945,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 2, 0,
+ 6, 0, 42, 2, 0,
NPC_S_KPU5_IP6, 10, 1,
NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
0,
@@ -9757,7 +10018,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
- NPC_S_NA, 0, 1,
+ NPC_S_NA, 8, 1,
NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
NPC_F_LB_U_UNK_ETYPE,
0, 0, 0, 0,
@@ -9772,7 +10033,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 2, 0,
+ 6, 0, 42, 2, 0,
NPC_S_KPU5_IP6, 18, 1,
NPC_LID_LB, NPC_LT_LB_EDSA,
NPC_F_LB_L_EDSA,
@@ -9836,7 +10097,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 2, 0,
+ 6, 0, 42, 2, 0,
NPC_S_KPU5_IP6, 10, 1,
NPC_LID_LB, NPC_LT_LB_EXDSA,
NPC_F_LB_L_EXDSA,
@@ -9923,6 +10184,22 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
0, 0, 0, 0,
},
{
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 4, 0, 0, 0, 0,
+ NPC_S_KPU3_CTAG, 0, 1,
+ NPC_LID_LB, NPC_LT_LB_CTAG,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 0, 0,
+ NPC_S_KPU3_CTAG_C, 0, 0,
+ NPC_LID_LB, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
NPC_ERRLEV_LB, NPC_EC_L2_K3,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
@@ -9949,7 +10226,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 6, 0,
NPC_LID_LB, NPC_LT_NA,
0,
@@ -10029,7 +10306,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 8, 0,
NPC_LID_LB, NPC_LT_NA,
0,
@@ -10101,7 +10378,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 4, 0,
NPC_LID_LB, NPC_LT_NA,
0,
@@ -10165,7 +10442,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 8, 0,
NPC_LID_LB, NPC_LT_NA,
0,
@@ -10237,7 +10514,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 4, 0,
NPC_LID_LB, NPC_LT_NA,
0,
@@ -10310,80 +10587,80 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
8, 0, 6, 1, 0,
- NPC_S_KPU5_IP, 4, 1,
- NPC_LID_LB, NPC_LT_LB_CTAG,
+ NPC_S_KPU5_IP, 2, 0,
+ NPC_LID_LB, NPC_LT_NA,
0,
0, 0, 0, 0,
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
- NPC_S_KPU5_IP6, 4, 1,
- NPC_LID_LB, NPC_LT_LB_CTAG,
+ 6, 0, 42, 1, 0,
+ NPC_S_KPU5_IP6, 2, 0,
+ NPC_LID_LB, NPC_LT_NA,
0,
0, 0, 0, 0,
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 1, 0,
- NPC_S_KPU5_ARP, 4, 1,
- NPC_LID_LB, NPC_LT_LB_CTAG,
+ NPC_S_KPU5_ARP, 2, 0,
+ NPC_LID_LB, NPC_LT_NA,
0,
0, 0, 0, 0,
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 1, 0,
- NPC_S_KPU5_RARP, 4, 1,
- NPC_LID_LB, NPC_LT_LB_CTAG,
+ NPC_S_KPU5_RARP, 2, 0,
+ NPC_LID_LB, NPC_LT_NA,
0,
0, 0, 0, 0,
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 1, 0,
- NPC_S_KPU5_PTP, 4, 1,
- NPC_LID_LB, NPC_LT_LB_CTAG,
+ NPC_S_KPU5_PTP, 2, 0,
+ NPC_LID_LB, NPC_LT_NA,
0,
0, 0, 0, 0,
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 1, 0,
- NPC_S_KPU5_FCOE, 4, 1,
- NPC_LID_LB, NPC_LT_LB_CTAG,
+ NPC_S_KPU5_FCOE, 2, 0,
+ NPC_LID_LB, NPC_LT_NA,
0,
0, 0, 0, 0,
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
2, 6, 10, 0, 0,
- NPC_S_KPU4_MPLS, 4, 1,
- NPC_LID_LB, NPC_LT_LB_CTAG,
+ NPC_S_KPU4_MPLS, 2, 0,
+ NPC_LID_LB, NPC_LT_NA,
0,
0, 0, 0, 0,
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
2, 6, 10, 0, 0,
- NPC_S_KPU4_MPLS, 4, 1,
- NPC_LID_LB, NPC_LT_LB_CTAG,
+ NPC_S_KPU4_MPLS, 2, 0,
+ NPC_LID_LB, NPC_LT_NA,
0,
0, 0, 0, 0,
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
2, 0, 0, 0, 0,
- NPC_S_KPU4_NSH, 4, 1,
- NPC_LID_LB, NPC_LT_LB_CTAG,
+ NPC_S_KPU4_NSH, 2, 0,
+ NPC_LID_LB, NPC_LT_NA,
0,
0, 0, 0, 0,
},
{
NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK,
0, 0, 0, 0, 1,
- NPC_S_NA, 0, 1,
- NPC_LID_LB, NPC_LT_LB_CTAG,
+ NPC_S_NA, 0, 0,
+ NPC_LID_LB, NPC_LT_NA,
0,
0, 0, 0, 0,
},
@@ -10397,7 +10674,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 8, 1,
NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
0,
@@ -10469,7 +10746,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 4, 1,
NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
0,
@@ -10533,7 +10810,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 8, 1,
NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
0,
@@ -10605,7 +10882,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 4, 1,
NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
0,
@@ -10685,7 +10962,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 10, 1,
NPC_LID_LB, NPC_LT_LB_DSA,
NPC_F_LB_L_DSA,
@@ -10733,7 +11010,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 1, 0,
+ 6, 0, 42, 1, 0,
NPC_S_KPU5_IP6, 14, 1,
NPC_LID_LB, NPC_LT_LB_DSA_VLAN,
NPC_F_LB_L_DSA_VLAN,
@@ -10894,7 +11171,7 @@ static struct npc_kpu_profile_action kpu4_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 0, 0,
+ 6, 0, 42, 0, 0,
NPC_S_KPU5_IP6, 6, 1,
NPC_LID_LB, NPC_LT_LB_FDSA,
NPC_F_LB_L_FDSA,
@@ -10942,7 +11219,7 @@ static struct npc_kpu_profile_action kpu4_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 0, 0,
+ 6, 0, 42, 0, 0,
NPC_S_KPU5_IP6, 10, 1,
NPC_LID_LB, NPC_LT_LB_FDSA,
NPC_F_LB_L_FDSA,
@@ -10990,7 +11267,7 @@ static struct npc_kpu_profile_action kpu4_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 0, 0,
+ 6, 0, 42, 0, 0,
NPC_S_KPU5_IP6, 14, 1,
NPC_LID_LB, NPC_LT_LB_PPPOE,
0,
@@ -11014,7 +11291,7 @@ static struct npc_kpu_profile_action kpu4_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 0, 0,
+ 6, 0, 42, 0, 0,
NPC_S_KPU5_IP6, 2, 0,
NPC_LID_LC, NPC_LT_NA,
0,
@@ -11063,15 +11340,15 @@ static struct npc_kpu_profile_action kpu4_action_entries[] = {
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
8, 0, 6, 0, 0,
- NPC_S_KPU5_IP, 10, 0,
+ NPC_S_KPU5_IP, 10, 1,
NPC_LID_LB, NPC_LT_LB_PPPOE,
0,
0, 0, 0, 0,
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 6, 0, 0, 0, 0,
- NPC_S_KPU5_IP6, 10, 0,
+ 6, 0, 42, 0, 0,
+ NPC_S_KPU5_IP6, 10, 1,
NPC_LID_LB, NPC_LT_LB_PPPOE,
0,
0, 0, 0, 0,
@@ -11119,7 +11396,7 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 2, 0, 0, 2, 0,
+ 2, 0, 4, 2, 0,
NPC_S_KPU8_UDP, 20, 1,
NPC_LID_LC, NPC_LT_LC_IP,
0,
@@ -11223,7 +11500,7 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 2, 8, 10, 2, 0,
+ 2, 8, 4, 2, 0,
NPC_S_KPU8_UDP, 0, 1,
NPC_LID_LC, NPC_LT_LC_IP_OPT,
0,
@@ -11450,6 +11727,22 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
0, 0, 0, 0, 0,
NPC_S_KPU6_IP6_ROUT, 40, 1,
NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+ NPC_F_LC_L_IP6_SRH_SEG_1,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 0, 0,
+ NPC_S_KPU6_IP6_ROUT, 40, 1,
+ NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+ NPC_F_LC_L_IP6_SRH_SEG_2,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 0, 0,
+ NPC_S_KPU6_IP6_ROUT, 40, 1,
+ NPC_LID_LC, NPC_LT_LC_IP6_EXT,
NPC_F_LC_L_EXT_ROUT,
0, 0, 0, 0,
},
@@ -11695,6 +11988,14 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 2, 0,
+ NPC_S_KPU8_CUSTOM, 20, 1,
+ NPC_LID_LC, NPC_LT_LC_IP,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 1,
NPC_LID_LC, NPC_LT_LC_IP,
@@ -11791,6 +12092,14 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 2, 0,
+ NPC_S_KPU8_CUSTOM, 0, 1,
+ NPC_LID_LC, NPC_LT_LC_IP_OPT,
+ 0,
+ 0, 0xf, 0, 2,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 1,
NPC_LID_LC, NPC_LT_LC_IP_OPT,
@@ -11951,6 +12260,14 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 2, 0,
+ NPC_S_KPU8_CUSTOM, 40, 1,
+ NPC_LID_LC, NPC_LT_LC_IP6,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 1,
NPC_LID_LC, NPC_LT_LC_IP6,
@@ -12080,6 +12397,14 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 1, 0,
+ NPC_S_KPU8_CUSTOM, 8, 0,
+ NPC_LID_LC, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
NPC_LID_LC, NPC_LT_NA,
@@ -12184,6 +12509,14 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 1, 0,
+ NPC_S_KPU8_CUSTOM, 8, 0,
+ NPC_LID_LC, NPC_LT_NA,
+ 0,
+ 1, 0xff, 0, 3,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
NPC_LID_LC, NPC_LT_NA,
@@ -12280,6 +12613,14 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 1, 0,
+ NPC_S_KPU8_CUSTOM, 8, 0,
+ NPC_LID_LC, NPC_LT_NA,
+ 0,
+ 1, 0xff, 0, 3,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
NPC_LID_LC, NPC_LT_NA,
@@ -12368,6 +12709,14 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 1, 0,
+ NPC_S_KPU8_CUSTOM, 8, 0,
+ NPC_LID_LC, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
NPC_LID_LC, NPC_LT_NA,
@@ -12472,6 +12821,14 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 1, 0,
+ NPC_S_KPU8_CUSTOM, 8, 0,
+ NPC_LID_LC, NPC_LT_NA,
+ 0,
+ 1, 0xff, 0, 3,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
NPC_LID_LC, NPC_LT_NA,
@@ -12568,6 +12925,14 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 1, 0,
+ NPC_S_KPU8_CUSTOM, 8, 0,
+ NPC_LID_LC, NPC_LT_NA,
+ 0,
+ 1, 0xff, 0, 3,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
NPC_LID_LC, NPC_LT_NA,
@@ -12681,6 +13046,14 @@ static struct npc_kpu_profile_action kpu7_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 0, 0,
+ NPC_S_KPU8_CUSTOM, 8, 0,
+ NPC_LID_LC, NPC_LT_NA,
+ 0,
+ 1, 0xff, 0, 3,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
NPC_LID_LC, NPC_LT_NA,
@@ -12769,6 +13142,14 @@ static struct npc_kpu_profile_action kpu7_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 0, 0,
+ NPC_S_KPU8_CUSTOM, 8, 0,
+ NPC_LID_LC, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
NPC_LID_LC, NPC_LT_NA,
@@ -12857,6 +13238,14 @@ static struct npc_kpu_profile_action kpu7_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 0, 0,
+ NPC_S_KPU8_CUSTOM, 8, 0,
+ NPC_LID_LC, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
NPC_LID_LC, NPC_LT_NA,
@@ -13058,6 +13447,14 @@ static struct npc_kpu_profile_action kpu8_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 0, 1,
+ NPC_S_NA, 8, 1,
+ NPC_LID_LD, NPC_LT_LD_UDP,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 0,
NPC_S_KPU9_ESP, 8, 1,
NPC_LID_LD, NPC_LT_LD_UDP,
@@ -13458,6 +13855,70 @@ static struct npc_kpu_profile_action kpu8_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 12, 16, 20, 2, 0,
+ NPC_S_KPU11_TU_ETHER, 12, 1,
+ NPC_LID_LD, NPC_LT_LD_GRE,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 12, 16, 20, 2, 0,
+ NPC_S_KPU11_TU_ETHER, 16, 1,
+ NPC_LID_LD, NPC_LT_LD_GRE,
+ NPC_F_LD_L_GRE_HAS_CSUM,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 12, 16, 20, 2, 0,
+ NPC_S_KPU11_TU_ETHER, 16, 1,
+ NPC_LID_LD, NPC_LT_LD_GRE,
+ NPC_F_LD_L_GRE_HAS_KEY,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 12, 16, 20, 2, 0,
+ NPC_S_KPU11_TU_ETHER, 16, 1,
+ NPC_LID_LD, NPC_LT_LD_GRE,
+ NPC_F_LD_L_GRE_HAS_SEQ,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 12, 16, 20, 2, 0,
+ NPC_S_KPU11_TU_ETHER, 20, 1,
+ NPC_LID_LD, NPC_LT_LD_GRE,
+ NPC_F_LD_L_GRE_HAS_CSUM_KEY,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 12, 16, 20, 2, 0,
+ NPC_S_KPU11_TU_ETHER, 20, 1,
+ NPC_LID_LD, NPC_LT_LD_GRE,
+ NPC_F_LD_L_GRE_HAS_CSUM_SEQ,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 12, 16, 20, 2, 0,
+ NPC_S_KPU11_TU_ETHER, 20, 1,
+ NPC_LID_LD, NPC_LT_LD_GRE,
+ NPC_F_LD_L_GRE_HAS_KEY_SEQ,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 12, 16, 20, 2, 0,
+ NPC_S_KPU11_TU_ETHER, 24, 1,
+ NPC_LID_LD, NPC_LT_LD_GRE,
+ NPC_F_LD_L_GRE_HAS_CSUM_KEY_SEQ,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 1,
NPC_LID_LD, NPC_LT_LD_GRE,
@@ -13529,6 +13990,14 @@ static struct npc_kpu_profile_action kpu8_action_entries[] = {
0, 0, 0, 0,
},
{
+ NPC_ERRLEV_LD, NPC_EC_NOERR,
+ 0, 0, 0, 0, 0,
+ NPC_S_KPU9_CUSTOM, 0, 1,
+ NPC_LID_LF, NPC_LT_LF_CUSTOM0,
+ 0,
+ 0, 0xff, 0, 0,
+ },
+ {
NPC_ERRLEV_LD, NPC_EC_UNK,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
@@ -13946,6 +14415,22 @@ static struct npc_kpu_profile_action kpu9_action_entries[] = {
0, 0, 0, 0,
},
{
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 8, 0, 6, 2, 0,
+ NPC_S_KPU12_TU_IP, 0, 0,
+ NPC_LID_LE, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 6, 0, 0, 2, 0,
+ NPC_S_KPU12_TU_IP6, 0, 0,
+ NPC_LID_LE, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
NPC_ERRLEV_LE, NPC_EC_UNK,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
@@ -15105,7 +15590,9 @@ static struct npc_lt_def_cfg npc_lt_defaults = {
},
.rx_et = {
{
- .lid = NPC_LID_LB,
+ .offset = -2,
+ .valid = 1,
+ .lid = NPC_LID_LC,
.ltype_match = NPC_LT_NA,
.ltype_mask = 0x0,
},
@@ -15139,6 +15626,12 @@ static struct npc_mcam_kex npc_mkex_default = {
/* Ethertype: 2 bytes, KW0[55:40] */
KEX_LD_CFG(0x01, 0xc, 0x1, 0x0, 0x5),
},
+ [NPC_LT_LA_CPT_HDR] = {
+ /* DMAC: 6 bytes, KW1[55:8] */
+ KEX_LD_CFG(0x05, 0x0, 0x1, 0x0, NPC_KEXOF_DMAC),
+ /* Ethertype: 2 bytes, KW0[55:40] */
+ KEX_LD_CFG(0x01, 0xc, 0x1, 0x0, 0x5),
+ },
/* Layer A: HiGig2: */
[NPC_LT_LA_HIGIG2_ETHER] = {
/* Classification: 2 bytes, KW1[23:8] */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 5c1d04a3c559..07d4859de53a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -817,6 +817,8 @@ static int rvu_fwdata_init(struct rvu *rvu)
err = cgx_get_fwdata_base(&fwdbase);
if (err)
goto fail;
+
+ BUILD_BUG_ON(offsetof(struct rvu_fwdata, cgx_fw_data) > FWDATA_CGX_LMAC_OFFSET);
rvu->fwdata = ioremap_wc(fwdbase, sizeof(struct rvu_fwdata));
if (!rvu->fwdata)
goto fail;
@@ -1484,7 +1486,7 @@ int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc)
/* All CGX mapped PFs are set with assigned NIX block during init */
if (is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) {
blkaddr = pf->nix_blkaddr;
- } else if (is_afvf(pcifunc)) {
+ } else if (is_lbk_vf(rvu, pcifunc)) {
vf = pcifunc - 1;
/* Assign NIX based on VF number. All even numbered VFs get
* NIX0 and odd numbered gets NIX1
@@ -2034,7 +2036,7 @@ int rvu_mbox_handler_set_vf_perm(struct rvu *rvu, struct set_vf_perm *req,
u16 target;
/* Only PF can add VF permissions */
- if ((pcifunc & RVU_PFVF_FUNC_MASK) || is_afvf(pcifunc))
+ if ((pcifunc & RVU_PFVF_FUNC_MASK) || is_lbk_vf(rvu, pcifunc))
return -EOPNOTSUPP;
target = (pcifunc & ~RVU_PFVF_FUNC_MASK) | (req->vf + 1);
@@ -2618,6 +2620,9 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc)
* 3. Cleanup pools (NPA)
*/
+ /* Free allocated BPIDs */
+ rvu_nix_flr_free_bpids(rvu, pcifunc);
+
/* Free multicast/mirror node associated with the 'pcifunc' */
rvu_nix_mcast_flr_free_entries(rvu, pcifunc);
@@ -3151,6 +3156,7 @@ static int rvu_enable_sriov(struct rvu *rvu)
{
struct pci_dev *pdev = rvu->pdev;
int err, chans, vfs;
+ int pos = 0;
if (!rvu_afvf_msix_vectors_num_ok(rvu)) {
dev_warn(&pdev->dev,
@@ -3158,6 +3164,12 @@ static int rvu_enable_sriov(struct rvu *rvu)
return 0;
}
+ /* Get RVU VFs device id */
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ if (!pos)
+ return 0;
+ pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &rvu->vf_devid);
+
chans = rvu_get_num_lbk_chans();
if (chans < 0)
return chans;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 43be37dd1f32..f390525a6217 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -288,6 +288,16 @@ enum rvu_pfvf_flags {
#define RVU_CLEAR_VF_PERM ~GENMASK(PF_SET_VF_TRUSTED, PF_SET_VF_MAC)
+struct nix_bp {
+ struct rsrc_bmap bpids; /* free bpids bitmap */
+ u16 cgx_bpid_cnt;
+ u16 sdp_bpid_cnt;
+ u16 free_pool_base;
+ u16 *fn_map; /* pcifunc mapping */
+ u8 *intf_map; /* interface type map */
+ u8 *ref_cnt;
+};
+
struct nix_txsch {
struct rsrc_bmap schq;
u8 lvl;
@@ -363,6 +373,7 @@ struct nix_hw {
struct nix_lso lso;
struct nix_txvlan txvlan;
struct nix_ipolicer *ipolicer;
+ struct nix_bp bp;
u64 *tx_credits;
u8 cc_mcs_cnt;
};
@@ -432,6 +443,13 @@ struct mbox_wq_info {
struct workqueue_struct *mbox_wq;
};
+struct channel_fwdata {
+ struct sdp_node_info info;
+ u8 valid;
+#define RVU_CHANL_INFO_RESERVED 379
+ u8 reserved[RVU_CHANL_INFO_RESERVED];
+};
+
struct rvu_fwdata {
#define RVU_FWDATA_HEADER_MAGIC 0xCFDA /* Custom Firmware Data*/
#define RVU_FWDATA_VERSION 0x0001
@@ -450,11 +468,13 @@ struct rvu_fwdata {
u64 msixtr_base;
u32 ptp_ext_clk_rate;
u32 ptp_ext_tstamp;
-#define FWDATA_RESERVED_MEM 1022
+ struct channel_fwdata channel_data;
+#define FWDATA_RESERVED_MEM 958
u64 reserved[FWDATA_RESERVED_MEM];
#define CGX_MAX 9
#define CGX_LMACS_MAX 4
#define CGX_LMACS_USX 8
+#define FWDATA_CGX_LMAC_OFFSET 10536
union {
struct cgx_lmac_fwdata_s
cgx_fw_data[CGX_MAX][CGX_LMACS_MAX];
@@ -503,6 +523,7 @@ struct rvu {
struct mutex rsrc_lock; /* Serialize resource alloc/free */
struct mutex alias_lock; /* Serialize bar2 alias access */
int vfs; /* Number of VFs attached to RVU */
+ u16 vf_devid; /* VF devices id */
int nix_blkaddr[MAX_NIX_BLKS];
/* Mbox */
@@ -732,9 +753,11 @@ static inline bool is_rvu_supports_nix1(struct rvu *rvu)
/* Function Prototypes
* RVU
*/
-static inline bool is_afvf(u16 pcifunc)
+#define RVU_LBK_VF_DEVID 0xA0F8
+static inline bool is_lbk_vf(struct rvu *rvu, u16 pcifunc)
{
- return !(pcifunc & ~RVU_PFVF_FUNC_MASK);
+ return (!(pcifunc & ~RVU_PFVF_FUNC_MASK) &&
+ (rvu->vf_devid == RVU_LBK_VF_DEVID));
}
static inline bool is_vf(u16 pcifunc)
@@ -794,7 +817,7 @@ void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq);
int rvu_sdp_init(struct rvu *rvu);
bool is_sdp_pfvf(u16 pcifunc);
bool is_sdp_pf(u16 pcifunc);
-bool is_sdp_vf(u16 pcifunc);
+bool is_sdp_vf(struct rvu *rvu, u16 pcifunc);
/* CGX APIs */
static inline bool is_pf_cgxmapped(struct rvu *rvu, u8 pf)
@@ -873,6 +896,7 @@ int rvu_nix_mcast_get_mce_index(struct rvu *rvu, u16 pcifunc,
u32 mcast_grp_idx);
int rvu_nix_mcast_update_mcam_entry(struct rvu *rvu, u16 pcifunc,
u32 mcast_grp_idx, u16 mcam_index);
+void rvu_nix_flr_free_bpids(struct rvu *rvu, u16 pcifunc);
/* NPC APIs */
void rvu_npc_freemem(struct rvu *rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index e6d7914ce61c..2500f5ba4f5a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -2870,6 +2870,10 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s,
seq_printf(s, "%d ", ntohs(rule->packet.dport));
seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.dport));
break;
+ case NPC_TCP_FLAGS:
+ seq_printf(s, "%d ", rule->packet.tcp_flags);
+ seq_printf(s, "mask 0x%x\n", rule->mask.tcp_flags);
+ break;
case NPC_IPSEC_SPI:
seq_printf(s, "0x%x ", ntohl(rule->packet.spi));
seq_printf(s, "mask 0x%x\n", ntohl(rule->mask.spi));
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
index 1e6fbd98423d..96c04f7d93f8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
@@ -1235,8 +1235,8 @@ static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id,
enum rvu_af_dl_param_id {
RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
- RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE,
RVU_AF_DEVLINK_PARAM_ID_NPC_MCAM_ZONE_PERCENT,
+ RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE,
RVU_AF_DEVLINK_PARAM_ID_NIX_MAXLF,
};
@@ -1434,15 +1434,6 @@ static const struct devlink_param rvu_af_dl_params[] = {
BIT(DEVLINK_PARAM_CMODE_RUNTIME),
rvu_af_dl_dwrr_mtu_get, rvu_af_dl_dwrr_mtu_set,
rvu_af_dl_dwrr_mtu_validate),
-};
-
-static const struct devlink_param rvu_af_dl_param_exact_match[] = {
- DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE,
- "npc_exact_feature_disable", DEVLINK_PARAM_TYPE_STRING,
- BIT(DEVLINK_PARAM_CMODE_RUNTIME),
- rvu_af_npc_exact_feature_get,
- rvu_af_npc_exact_feature_disable,
- rvu_af_npc_exact_feature_validate),
DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_NPC_MCAM_ZONE_PERCENT,
"npc_mcam_high_zone_percent", DEVLINK_PARAM_TYPE_U8,
BIT(DEVLINK_PARAM_CMODE_RUNTIME),
@@ -1457,6 +1448,15 @@ static const struct devlink_param rvu_af_dl_param_exact_match[] = {
rvu_af_dl_nix_maxlf_validate),
};
+static const struct devlink_param rvu_af_dl_param_exact_match[] = {
+ DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE,
+ "npc_exact_feature_disable", DEVLINK_PARAM_TYPE_STRING,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ rvu_af_npc_exact_feature_get,
+ rvu_af_npc_exact_feature_disable,
+ rvu_af_npc_exact_feature_validate),
+};
+
/* Devlink switch mode */
static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
{
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 66203a90f052..d39001cdc707 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -499,29 +499,115 @@ static void nix_interface_deinit(struct rvu *rvu, u16 pcifunc, u8 nixlf)
rvu_cgx_disable_dmac_entries(rvu, pcifunc);
}
+#define NIX_BPIDS_PER_LMAC 8
+#define NIX_BPIDS_PER_CPT 1
+static int nix_setup_bpids(struct rvu *rvu, struct nix_hw *hw, int blkaddr)
+{
+ struct nix_bp *bp = &hw->bp;
+ int err, max_bpids;
+ u64 cfg;
+
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+ max_bpids = FIELD_GET(NIX_CONST_MAX_BPIDS, cfg);
+
+ /* Reserve the BPIds for CGX and SDP */
+ bp->cgx_bpid_cnt = rvu->hw->cgx_links * NIX_BPIDS_PER_LMAC;
+ bp->sdp_bpid_cnt = rvu->hw->sdp_links * FIELD_GET(NIX_CONST_SDP_CHANS, cfg);
+ bp->free_pool_base = bp->cgx_bpid_cnt + bp->sdp_bpid_cnt +
+ NIX_BPIDS_PER_CPT;
+ bp->bpids.max = max_bpids - bp->free_pool_base;
+
+ err = rvu_alloc_bitmap(&bp->bpids);
+ if (err)
+ return err;
+
+ bp->fn_map = devm_kcalloc(rvu->dev, bp->bpids.max,
+ sizeof(u16), GFP_KERNEL);
+ if (!bp->fn_map)
+ return -ENOMEM;
+
+ bp->intf_map = devm_kcalloc(rvu->dev, bp->bpids.max,
+ sizeof(u8), GFP_KERNEL);
+ if (!bp->intf_map)
+ return -ENOMEM;
+
+ bp->ref_cnt = devm_kcalloc(rvu->dev, bp->bpids.max,
+ sizeof(u8), GFP_KERNEL);
+ if (!bp->ref_cnt)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void rvu_nix_flr_free_bpids(struct rvu *rvu, u16 pcifunc)
+{
+ int blkaddr, bpid, err;
+ struct nix_hw *nix_hw;
+ struct nix_bp *bp;
+
+ if (!is_lbk_vf(rvu, pcifunc))
+ return;
+
+ err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr);
+ if (err)
+ return;
+
+ bp = &nix_hw->bp;
+
+ mutex_lock(&rvu->rsrc_lock);
+ for (bpid = 0; bpid < bp->bpids.max; bpid++) {
+ if (bp->fn_map[bpid] == pcifunc) {
+ bp->ref_cnt[bpid]--;
+ if (bp->ref_cnt[bpid])
+ continue;
+ rvu_free_rsrc(&bp->bpids, bpid);
+ bp->fn_map[bpid] = 0;
+ }
+ }
+ mutex_unlock(&rvu->rsrc_lock);
+}
+
int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu,
struct nix_bp_cfg_req *req,
struct msg_rsp *rsp)
{
u16 pcifunc = req->hdr.pcifunc;
+ int blkaddr, pf, type, err;
+ u16 chan_base, chan, bpid;
struct rvu_pfvf *pfvf;
- int blkaddr, pf, type;
- u16 chan_base, chan;
+ struct nix_hw *nix_hw;
+ struct nix_bp *bp;
u64 cfg;
pf = rvu_get_pf(pcifunc);
- type = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
+ type = is_lbk_vf(rvu, pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK)
return 0;
pfvf = rvu_get_pfvf(rvu, pcifunc);
- blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+ err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr);
+ if (err)
+ return err;
+ bp = &nix_hw->bp;
chan_base = pfvf->rx_chan_base + req->chan_base;
for (chan = chan_base; chan < (chan_base + req->chan_cnt); chan++) {
cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan));
rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan),
cfg & ~BIT_ULL(16));
+
+ if (type == NIX_INTF_TYPE_LBK) {
+ bpid = cfg & GENMASK(8, 0);
+ mutex_lock(&rvu->rsrc_lock);
+ rvu_free_rsrc(&bp->bpids, bpid - bp->free_pool_base);
+ for (bpid = 0; bpid < bp->bpids.max; bpid++) {
+ if (bp->fn_map[bpid] == pcifunc) {
+ bp->fn_map[bpid] = 0;
+ bp->ref_cnt[bpid] = 0;
+ }
+ }
+ mutex_unlock(&rvu->rsrc_lock);
+ }
}
return 0;
}
@@ -529,25 +615,20 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu,
static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
int type, int chan_id)
{
- int bpid, blkaddr, lmac_chan_cnt, sdp_chan_cnt;
- u16 cgx_bpid_cnt, lbk_bpid_cnt, sdp_bpid_cnt;
+ int bpid, blkaddr, sdp_chan_base, err;
struct rvu_hwinfo *hw = rvu->hw;
struct rvu_pfvf *pfvf;
+ struct nix_hw *nix_hw;
u8 cgx_id, lmac_id;
- u64 cfg;
-
- blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, req->hdr.pcifunc);
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
- lmac_chan_cnt = cfg & 0xFF;
+ struct nix_bp *bp;
- cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt;
- lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF);
+ pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
- sdp_chan_cnt = cfg & 0xFFF;
- sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt;
+ err = nix_get_struct_ptrs(rvu, req->hdr.pcifunc, &nix_hw, &blkaddr);
+ if (err)
+ return err;
- pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+ bp = &nix_hw->bp;
/* Backpressure IDs range division
* CGX channles are mapped to (0 - 191) BPIDs
@@ -561,38 +642,48 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
*/
switch (type) {
case NIX_INTF_TYPE_CGX:
- if ((req->chan_base + req->chan_cnt) > 16)
- return -EINVAL;
+ if ((req->chan_base + req->chan_cnt) > NIX_BPIDS_PER_LMAC)
+ return NIX_AF_ERR_INVALID_BPID_REQ;
rvu_get_cgx_lmac_id(pfvf->cgx_lmac, &cgx_id, &lmac_id);
/* Assign bpid based on cgx, lmac and chan id */
- bpid = (cgx_id * hw->lmac_per_cgx * lmac_chan_cnt) +
- (lmac_id * lmac_chan_cnt) + req->chan_base;
+ bpid = (cgx_id * hw->lmac_per_cgx * NIX_BPIDS_PER_LMAC) +
+ (lmac_id * NIX_BPIDS_PER_LMAC) + req->chan_base;
if (req->bpid_per_chan)
bpid += chan_id;
- if (bpid > cgx_bpid_cnt)
- return -EINVAL;
+ if (bpid > bp->cgx_bpid_cnt)
+ return NIX_AF_ERR_INVALID_BPID;
break;
case NIX_INTF_TYPE_LBK:
- if ((req->chan_base + req->chan_cnt) > 63)
- return -EINVAL;
- bpid = cgx_bpid_cnt + req->chan_base;
- if (req->bpid_per_chan)
- bpid += chan_id;
- if (bpid > (cgx_bpid_cnt + lbk_bpid_cnt))
- return -EINVAL;
+ /* Alloc bpid from the free pool */
+ mutex_lock(&rvu->rsrc_lock);
+ bpid = rvu_alloc_rsrc(&bp->bpids);
+ if (bpid < 0) {
+ mutex_unlock(&rvu->rsrc_lock);
+ return NIX_AF_ERR_INVALID_BPID;
+ }
+ bp->fn_map[bpid] = req->hdr.pcifunc;
+ bp->ref_cnt[bpid]++;
+ bpid += bp->free_pool_base;
+ mutex_unlock(&rvu->rsrc_lock);
break;
case NIX_INTF_TYPE_SDP:
- if ((req->chan_base + req->chan_cnt) > 255)
- return -EINVAL;
+ if ((req->chan_base + req->chan_cnt) > bp->sdp_bpid_cnt)
+ return NIX_AF_ERR_INVALID_BPID_REQ;
- bpid = sdp_bpid_cnt + req->chan_base;
+ /* Handle usecase of 2 SDP blocks */
+ if (!hw->cap.programmable_chans)
+ sdp_chan_base = pfvf->rx_chan_base - NIX_CHAN_SDP_CH_START;
+ else
+ sdp_chan_base = pfvf->rx_chan_base - hw->sdp_chan_base;
+
+ bpid = bp->cgx_bpid_cnt + req->chan_base + sdp_chan_base;
if (req->bpid_per_chan)
bpid += chan_id;
- if (bpid > (cgx_bpid_cnt + lbk_bpid_cnt + sdp_bpid_cnt))
- return -EINVAL;
+ if (bpid > (bp->cgx_bpid_cnt + bp->sdp_bpid_cnt))
+ return NIX_AF_ERR_INVALID_BPID;
break;
default:
return -EINVAL;
@@ -612,7 +703,7 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu,
u64 cfg;
pf = rvu_get_pf(pcifunc);
- type = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
+ type = is_lbk_vf(rvu, pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
if (is_sdp_pfvf(pcifunc))
type = NIX_INTF_TYPE_SDP;
@@ -1523,7 +1614,7 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu,
cfg = NPC_TX_DEF_PKIND;
rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_PARSE_CFG(nixlf), cfg);
- intf = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
+ intf = is_lbk_vf(rvu, pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
if (is_sdp_pfvf(pcifunc))
intf = NIX_INTF_TYPE_SDP;
@@ -1899,7 +1990,7 @@ static int nix_get_tx_link(struct rvu *rvu, u16 pcifunc)
int pf = rvu_get_pf(pcifunc);
u8 cgx_id = 0, lmac_id = 0;
- if (is_afvf(pcifunc)) {/* LBK links */
+ if (is_lbk_vf(rvu, pcifunc)) {/* LBK links */
return hw->cgx_links;
} else if (is_pf_cgxmapped(rvu, pf)) {
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
@@ -1916,7 +2007,7 @@ static void nix_get_txschq_range(struct rvu *rvu, u16 pcifunc,
struct rvu_hwinfo *hw = rvu->hw;
int pf = rvu_get_pf(pcifunc);
- if (is_afvf(pcifunc)) { /* LBK links */
+ if (is_lbk_vf(rvu, pcifunc)) { /* LBK links */
*start = hw->cap.nix_txsch_per_cgx_lmac * link;
*end = *start + hw->cap.nix_txsch_per_lbk_lmac;
} else if (is_pf_cgxmapped(rvu, pf)) { /* CGX links */
@@ -3356,7 +3447,7 @@ static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc,
int pf;
/* skip multicast pkt replication for AF's VFs & SDP links */
- if (is_afvf(pcifunc) || is_sdp_pfvf(pcifunc))
+ if (is_lbk_vf(rvu, pcifunc) || is_sdp_pfvf(pcifunc))
return 0;
if (!hw->cap.nix_rx_multicast)
@@ -3703,7 +3794,7 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req,
if (blkaddr < 0)
return NIX_AF_ERR_AF_LF_INVALID;
- if (is_afvf(pcifunc))
+ if (is_lbk_vf(rvu, pcifunc))
rvu_get_lbk_link_max_frs(rvu, &rsp->max_mtu);
else
rvu_get_lmac_link_max_frs(rvu, &rsp->max_mtu);
@@ -4039,6 +4130,13 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
field->ltype_match = NPC_LT_LE_GTPU;
field->ltype_mask = 0xF;
break;
+ case NIX_FLOW_KEY_TYPE_CUSTOM0:
+ field->lid = NPC_LID_LC;
+ field->hdr_offset = 6;
+ field->bytesm1 = 1; /* 2 Bytes*/
+ field->ltype_match = NPC_LT_LC_CUSTOM0;
+ field->ltype_mask = 0xF;
+ break;
case NIX_FLOW_KEY_TYPE_VLAN:
field->lid = NPC_LID_LB;
field->hdr_offset = 2; /* Skip TPID (2-bytes) */
@@ -4420,7 +4518,7 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
if (!nix_hw)
return NIX_AF_ERR_INVALID_NIXBLK;
- if (is_afvf(pcifunc))
+ if (is_lbk_vf(rvu, pcifunc))
rvu_get_lbk_link_max_frs(rvu, &max_mtu);
else
rvu_get_lmac_link_max_frs(rvu, &max_mtu);
@@ -4784,6 +4882,10 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
if (err)
return err;
+ err = nix_setup_bpids(rvu, nix_hw, blkaddr);
+ if (err)
+ return err;
+
/* Configure segmentation offload formats */
nix_setup_lso(rvu, nix_hw, blkaddr);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 167145bdcb75..e350242bbafb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -61,28 +61,6 @@ int rvu_npc_get_tx_nibble_cfg(struct rvu *rvu, u64 nibble_ena)
return 0;
}
-static int npc_mcam_verify_pf_func(struct rvu *rvu,
- struct mcam_entry *entry_data, u8 intf,
- u16 pcifunc)
-{
- u16 pf_func, pf_func_mask;
-
- if (is_npc_intf_rx(intf))
- return 0;
-
- pf_func_mask = (entry_data->kw_mask[0] >> 32) &
- NPC_KEX_PF_FUNC_MASK;
- pf_func = (entry_data->kw[0] >> 32) & NPC_KEX_PF_FUNC_MASK;
-
- pf_func = be16_to_cpu((__force __be16)pf_func);
- if (pf_func_mask != NPC_KEX_PF_FUNC_MASK ||
- ((pf_func & ~RVU_PFVF_FUNC_MASK) !=
- (pcifunc & ~RVU_PFVF_FUNC_MASK)))
- return -EINVAL;
-
- return 0;
-}
-
void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf)
{
int blkaddr;
@@ -417,7 +395,7 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
owner = mcam->entry2pfvf_map[index];
target_func = (entry->action >> 4) & 0xffff;
/* do nothing when target is LBK/PF or owner is not PF */
- if (is_pffunc_af(owner) || is_afvf(target_func) ||
+ if (is_pffunc_af(owner) || is_lbk_vf(rvu, target_func) ||
(owner & RVU_PFVF_FUNC_MASK) ||
!(target_func & RVU_PFVF_FUNC_MASK))
return;
@@ -437,6 +415,10 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
return;
}
+ /* AF modifies given action iff PF/VF has requested for it */
+ if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT)
+ return;
+
/* copy VF default entry action to the VF mcam entry */
rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr,
target_func);
@@ -626,7 +608,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
int blkaddr, index;
/* AF's and SDP VFs work in promiscuous mode */
- if (is_afvf(pcifunc) || is_sdp_vf(pcifunc))
+ if (is_lbk_vf(rvu, pcifunc) || is_sdp_vf(rvu, pcifunc))
return;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@ -791,7 +773,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
return;
/* Skip LBK VFs */
- if (is_afvf(pcifunc))
+ if (is_lbk_vf(rvu, pcifunc))
return;
/* If pkt replication is not supported,
@@ -871,7 +853,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
u16 vf_func;
/* Only CGX PF/VF can add allmulticast entry */
- if (is_afvf(pcifunc) && is_sdp_vf(pcifunc))
+ if (is_lbk_vf(rvu, pcifunc) && is_sdp_vf(rvu, pcifunc))
return;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@ -1850,8 +1832,8 @@ void npc_mcam_rsrcs_deinit(struct rvu *rvu)
{
struct npc_mcam *mcam = &rvu->hw->mcam;
- kfree(mcam->bmap);
- kfree(mcam->bmap_reverse);
+ bitmap_free(mcam->bmap);
+ bitmap_free(mcam->bmap_reverse);
kfree(mcam->entry2pfvf_map);
kfree(mcam->cntr2pfvf_map);
kfree(mcam->entry2cntr_map);
@@ -1904,21 +1886,20 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr)
mcam->pf_offset = mcam->nixlf_offset + nixlf_count;
/* Allocate bitmaps for managing MCAM entries */
- mcam->bmap = kmalloc_array(BITS_TO_LONGS(mcam->bmap_entries),
- sizeof(long), GFP_KERNEL);
+ mcam->bmap = bitmap_zalloc(mcam->bmap_entries, GFP_KERNEL);
if (!mcam->bmap)
return -ENOMEM;
- mcam->bmap_reverse = kmalloc_array(BITS_TO_LONGS(mcam->bmap_entries),
- sizeof(long), GFP_KERNEL);
+ mcam->bmap_reverse = bitmap_zalloc(mcam->bmap_entries, GFP_KERNEL);
if (!mcam->bmap_reverse)
goto free_bmap;
mcam->bmap_fcnt = mcam->bmap_entries;
/* Alloc memory for saving entry to RVU PFFUNC allocation mapping */
- mcam->entry2pfvf_map = kmalloc_array(mcam->bmap_entries,
- sizeof(u16), GFP_KERNEL);
+ mcam->entry2pfvf_map = kcalloc(mcam->bmap_entries, sizeof(u16),
+ GFP_KERNEL);
+
if (!mcam->entry2pfvf_map)
goto free_bmap_reverse;
@@ -1941,21 +1922,21 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr)
if (err)
goto free_entry_map;
- mcam->cntr2pfvf_map = kmalloc_array(mcam->counters.max,
- sizeof(u16), GFP_KERNEL);
+ mcam->cntr2pfvf_map = kcalloc(mcam->counters.max, sizeof(u16),
+ GFP_KERNEL);
if (!mcam->cntr2pfvf_map)
goto free_cntr_bmap;
/* Alloc memory for MCAM entry to counter mapping and for tracking
* counter's reference count.
*/
- mcam->entry2cntr_map = kmalloc_array(mcam->bmap_entries,
- sizeof(u16), GFP_KERNEL);
+ mcam->entry2cntr_map = kcalloc(mcam->bmap_entries, sizeof(u16),
+ GFP_KERNEL);
if (!mcam->entry2cntr_map)
goto free_cntr_map;
- mcam->cntr_refcnt = kmalloc_array(mcam->counters.max,
- sizeof(u16), GFP_KERNEL);
+ mcam->cntr_refcnt = kcalloc(mcam->counters.max, sizeof(u16),
+ GFP_KERNEL);
if (!mcam->cntr_refcnt)
goto free_entry_cntr_map;
@@ -1988,9 +1969,9 @@ free_cntr_bmap:
free_entry_map:
kfree(mcam->entry2pfvf_map);
free_bmap_reverse:
- kfree(mcam->bmap_reverse);
+ bitmap_free(mcam->bmap_reverse);
free_bmap:
- kfree(mcam->bmap);
+ bitmap_free(mcam->bmap);
return -ENOMEM;
}
@@ -2852,12 +2833,6 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
else
nix_intf = pfvf->nix_rx_intf;
- if (!is_pffunc_af(pcifunc) &&
- npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) {
- rc = NPC_MCAM_INVALID_REQ;
- goto exit;
- }
-
/* For AF installed rules, the nix_intf should be set to target NIX */
if (is_pffunc_af(req->hdr.pcifunc))
nix_intf = req->intf;
@@ -3209,10 +3184,6 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu,
if (!is_npc_interface_valid(rvu, req->intf))
return NPC_MCAM_INVALID_REQ;
- if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf,
- req->hdr.pcifunc))
- return NPC_MCAM_INVALID_REQ;
-
/* Try to allocate a MCAM entry */
entry_req.hdr.pcifunc = req->hdr.pcifunc;
entry_req.contig = true;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index c75669c8fde7..c181e7aa9eb6 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -53,6 +53,7 @@ static const char * const npc_flow_names[] = {
[NPC_MPLS4_TTL] = "lse depth 4",
[NPC_TYPE_ICMP] = "icmp type",
[NPC_CODE_ICMP] = "icmp code",
+ [NPC_TCP_FLAGS] = "tcp flags",
[NPC_UNKNOWN] = "unknown",
};
@@ -530,6 +531,7 @@ do { \
NPC_SCAN_HDR(NPC_DPORT_SCTP, NPC_LID_LD, NPC_LT_LD_SCTP, 2, 2);
NPC_SCAN_HDR(NPC_TYPE_ICMP, NPC_LID_LD, NPC_LT_LD_ICMP, 0, 1);
NPC_SCAN_HDR(NPC_CODE_ICMP, NPC_LID_LD, NPC_LT_LD_ICMP, 1, 1);
+ NPC_SCAN_HDR(NPC_TCP_FLAGS, NPC_LID_LD, NPC_LT_LD_TCP, 12, 2);
NPC_SCAN_HDR(NPC_ETYPE_ETHER, NPC_LID_LA, NPC_LT_LA_ETHER, 12, 2);
NPC_SCAN_HDR(NPC_ETYPE_TAG1, NPC_LID_LB, NPC_LT_LB_CTAG, 4, 2);
NPC_SCAN_HDR(NPC_ETYPE_TAG2, NPC_LID_LB, NPC_LT_LB_STAG_QINQ, 8, 2);
@@ -574,7 +576,8 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf)
BIT_ULL(NPC_DPORT_TCP) | BIT_ULL(NPC_DPORT_UDP) |
BIT_ULL(NPC_SPORT_SCTP) | BIT_ULL(NPC_DPORT_SCTP) |
BIT_ULL(NPC_SPORT_SCTP) | BIT_ULL(NPC_DPORT_SCTP) |
- BIT_ULL(NPC_TYPE_ICMP) | BIT_ULL(NPC_CODE_ICMP);
+ BIT_ULL(NPC_TYPE_ICMP) | BIT_ULL(NPC_CODE_ICMP) |
+ BIT_ULL(NPC_TCP_FLAGS);
/* for tcp/udp/sctp corresponding layer type should be in the key */
if (*features & proto_flags) {
@@ -982,7 +985,8 @@ do { \
mask->icmp_type, 0);
NPC_WRITE_FLOW(NPC_CODE_ICMP, icmp_code, pkt->icmp_code, 0,
mask->icmp_code, 0);
-
+ NPC_WRITE_FLOW(NPC_TCP_FLAGS, tcp_flags, ntohs(pkt->tcp_flags), 0,
+ ntohs(mask->tcp_flags), 0);
NPC_WRITE_FLOW(NPC_IPSEC_SPI, spi, ntohl(pkt->spi), 0,
ntohl(mask->spi), 0);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index 6f73ad9807f0..086f05c0376f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -439,6 +439,9 @@
#define NIX_AF_LINKX_RANGE_MASK GENMASK_ULL(19, 16)
#define NIX_AF_LINKX_MCS_CNT_MASK GENMASK_ULL(33, 32)
+#define NIX_CONST_MAX_BPIDS GENMASK_ULL(23, 12)
+#define NIX_CONST_SDP_CHANS GENMASK_ULL(11, 0)
+
/* SSO */
#define SSO_AF_CONST (0x1000)
#define SSO_AF_CONST1 (0x1008)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c
index ae50d56258ec..38cfe148f4b7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c
@@ -40,8 +40,12 @@ bool is_sdp_pf(u16 pcifunc)
!(pcifunc & RVU_PFVF_FUNC_MASK));
}
-bool is_sdp_vf(u16 pcifunc)
+#define RVU_SDP_VF_DEVID 0xA0F7
+bool is_sdp_vf(struct rvu *rvu, u16 pcifunc)
{
+ if (!(pcifunc & ~RVU_PFVF_FUNC_MASK))
+ return (rvu->vf_devid == RVU_SDP_VF_DEVID);
+
return (is_sdp_pfvf(pcifunc) &&
!!(pcifunc & RVU_PFVF_FUNC_MASK));
}
@@ -52,6 +56,14 @@ int rvu_sdp_init(struct rvu *rvu)
struct rvu_pfvf *pfvf;
u32 i = 0;
+ if (rvu->fwdata->channel_data.valid) {
+ sdp_pf_num[0] = 0;
+ pfvf = &rvu->pf[sdp_pf_num[0]];
+ pfvf->sdp_info = &rvu->fwdata->channel_data.info;
+
+ return 0;
+ }
+
while ((i < MAX_SDP) && (pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
PCI_DEVID_OTX2_SDP_PF,
pdev)) != NULL) {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 7ca6941ea0b9..02d0b707aea5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -951,8 +951,11 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
if (pfvf->ptp && qidx < pfvf->hw.tx_queues) {
err = qmem_alloc(pfvf->dev, &sq->timestamps, qset->sqe_cnt,
sizeof(*sq->timestamps));
- if (err)
+ if (err) {
+ kfree(sq->sg);
+ sq->sg = NULL;
return err;
+ }
}
sq->head = 0;
@@ -968,7 +971,14 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
sq->stats.bytes = 0;
sq->stats.pkts = 0;
- return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura);
+ err = pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura);
+ if (err) {
+ kfree(sq->sg);
+ sq->sg = NULL;
+ return err;
+ }
+
+ return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index 2928898c7f8d..7f786de61014 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -314,7 +314,6 @@ static int otx2_set_channels(struct net_device *dev,
pfvf->hw.tx_queues = channel->tx_count;
if (pfvf->xdp_prog)
pfvf->hw.xdp_queues = channel->rx_count;
- pfvf->hw.non_qos_queues = pfvf->hw.tx_queues + pfvf->hw.xdp_queues;
if (if_up)
err = dev->netdev_ops->ndo_open(dev);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index a57455aebff6..e5fe67e73865 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1744,6 +1744,7 @@ int otx2_open(struct net_device *netdev)
/* RQ and SQs are mapped to different CQs,
* so find out max CQ IRQs (i.e CINTs) needed.
*/
+ pf->hw.non_qos_queues = pf->hw.tx_queues + pf->hw.xdp_queues;
pf->hw.cint_cnt = max3(pf->hw.rx_queues, pf->hw.tx_queues,
pf->hw.tc_tx_queues);
@@ -2643,8 +2644,6 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog)
xdp_features_clear_redirect_target(dev);
}
- pf->hw.non_qos_queues += pf->hw.xdp_queues;
-
if (if_up)
otx2_open(pf->netdev);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 4fd44b6eecea..87bdb93cb066 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -638,6 +638,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
BIT(FLOW_DISSECTOR_KEY_IPSEC) |
BIT_ULL(FLOW_DISSECTOR_KEY_MPLS) |
BIT_ULL(FLOW_DISSECTOR_KEY_ICMP) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
BIT_ULL(FLOW_DISSECTOR_KEY_IP)))) {
netdev_info(nic->netdev, "unsupported flow used key 0x%llx",
dissector->used_keys);
@@ -857,6 +858,16 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
}
}
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_match_tcp match;
+
+ flow_rule_match_tcp(rule, &match);
+
+ flow_spec->tcp_flags = match.key->flags;
+ flow_mask->tcp_flags = match.mask->flags;
+ req->features |= BIT_ULL(NPC_TCP_FLAGS);
+ }
+
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) {
struct flow_match_mpls match;
u8 bit;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 4d519ea833b2..f828d32737af 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -1403,7 +1403,7 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
struct otx2_cq_queue *cq,
bool *need_xdp_flush)
{
- unsigned char *hard_start, *data;
+ unsigned char *hard_start;
int qidx = cq->cq_idx;
struct xdp_buff xdp;
struct page *page;
@@ -1417,9 +1417,8 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
xdp_init_buff(&xdp, pfvf->rbsize, &cq->xdp_rxq);
- data = (unsigned char *)phys_to_virt(pa);
- hard_start = page_address(page);
- xdp_prepare_buff(&xdp, hard_start, data - hard_start,
+ hard_start = (unsigned char *)phys_to_virt(pa);
+ xdp_prepare_buff(&xdp, hard_start, OTX2_HEAD_ROOM,
cqe->sg.seg_size, false);
act = bpf_prog_run_xdp(prog, &xdp);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index a6e91573f8da..de123350bd46 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -4761,7 +4761,10 @@ static int mtk_probe(struct platform_device *pdev)
}
if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) {
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36));
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(36));
+ if (!err)
+ err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+
if (err) {
dev_err(&pdev->dev, "Wrong DMA config\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
index d58b07e7e123..7063c78bd35f 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
@@ -286,7 +286,6 @@ mtk_wed_wo_queue_free(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q)
static void
mtk_wed_wo_queue_tx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q)
{
- struct page *page;
int i;
for (i = 0; i < q->n_desc; i++) {
@@ -301,19 +300,12 @@ mtk_wed_wo_queue_tx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q)
entry->buf = NULL;
}
- if (!q->cache.va)
- return;
-
- page = virt_to_page(q->cache.va);
- __page_frag_cache_drain(page, q->cache.pagecnt_bias);
- memset(&q->cache, 0, sizeof(q->cache));
+ page_frag_cache_drain(&q->cache);
}
static void
mtk_wed_wo_queue_rx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q)
{
- struct page *page;
-
for (;;) {
void *buf = mtk_wed_wo_dequeue(wo, q, NULL, true);
@@ -323,12 +315,7 @@ mtk_wed_wo_queue_rx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q)
skb_free_frag(buf);
}
- if (!q->cache.va)
- return;
-
- page = virt_to_page(q->cache.va);
- __page_frag_cache_drain(page, q->cache.pagecnt_bias);
- memset(&q->cache, 0, sizeof(q->cache));
+ page_frag_cache_drain(&q->cache);
}
static void
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index f5b1f8c7834f..7f20813456e2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2199,8 +2199,9 @@ reset_slave:
if (cmd != MLX4_COMM_CMD_RESET) {
mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n",
slave, cmd);
- /* Turn on internal error letting slave reset itself immeditaly,
- * otherwise it might take till timeout on command is passed
+ /* Turn on internal error letting slave reset itself
+ * immediately, otherwise it might take till timeout on
+ * command is passed
*/
reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR);
}
@@ -2954,7 +2955,7 @@ static bool mlx4_valid_vf_state_change(struct mlx4_dev *dev, int port,
dummy_admin.default_vlan = vlan;
/* VF wants to move to other VST state which is valid with current
- * rate limit. Either differnt default vlan in VST or other
+ * rate limit. Either different default vlan in VST or other
* supported QoS priority. Otherwise we don't allow this change when
* the TX rate is still configured.
*/
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 4d4f9cf9facb..e130e7259275 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -115,7 +115,7 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
return;
}
- /* Acessing the CQ outside of rcu_read_lock is safe, because
+ /* Accessing the CQ outside of rcu_read_lock is safe, because
* the CQ is freed only after interrupt handling is completed.
*/
++cq->arm_sn;
@@ -137,7 +137,7 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
return;
}
- /* Acessing the CQ outside of rcu_read_lock is safe, because
+ /* Accessing the CQ outside of rcu_read_lock is safe, because
* the CQ is freed only after interrupt handling is completed.
*/
cq->event(cq, event_type);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index 9e3b76182088..cd754cd76bde 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -96,8 +96,8 @@ void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev)
#define MLX4_EN_WRAP_AROUND_SEC 10UL
/* By scheduling the overflow check every 5 seconds, we have a reasonably
- * good chance we wont miss a wrap around.
- * TOTO: Use a timer instead of a work queue to increase the guarantee.
+ * good chance we won't miss a wrap around.
+ * TODO: Use a timer instead of a work queue to increase the guarantee.
*/
#define MLX4_EN_OVERFLOW_PERIOD (MLX4_EN_WRAP_AROUND_SEC * HZ / 2)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 33bbcced8105..5d3fde63b273 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -42,6 +42,7 @@
#include <net/ip.h>
#include <net/vxlan.h>
#include <net/devlink.h>
+#include <net/rps.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/device.h>
@@ -1072,7 +1073,8 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv,
1, MLX4_MCAST_CONFIG);
/* Update multicast list - we cache all addresses so they won't
- * change while HW is updated holding the command semaphor */
+ * change while HW is updated holding the command semaphore
+ */
netif_addr_lock_bh(dev);
mlx4_en_cache_mclist(dev);
netif_addr_unlock_bh(dev);
@@ -1817,7 +1819,7 @@ int mlx4_en_start_port(struct net_device *dev)
mlx4_en_set_rss_steer_rules(priv))
mlx4_warn(mdev, "Failed setting steering rules\n");
- /* Attach rx QP to bradcast address */
+ /* Attach rx QP to broadcast address */
eth_broadcast_addr(&mc_list[10]);
mc_list[5] = priv->port; /* needed for B0 steering support */
if (mlx4_multicast_attach(mdev->dev, priv->rss_map.indir_qp, mc_list,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index a09b6e05337d..eac49657bd07 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -762,7 +762,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
/* Drop packet on bad receive or bad checksum */
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
MLX4_CQE_OPCODE_ERROR)) {
- en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n",
+ en_err(priv, "CQE completed in error - vendor syndrome:%d syndrome:%d\n",
((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome,
((struct mlx4_err_cqe *)cqe)->syndrome);
goto next;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 65cb63f6c465..1ddb11cb25f9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -992,7 +992,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
tx_info->ts_requested = 1;
}
- /* Prepare ctrl segement apart opcode+ownership, which depends on
+ /* Prepare ctrl segment apart opcode+ownership, which depends on
* whether LSO is used */
tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 6598b10a9ff4..9572a45f6143 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -210,7 +210,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1);
s_eqe->slave_id = slave;
- /* ensure all information is written before setting the ownersip bit */
+ /* ensure all information is written before setting the ownership bit */
dma_wmb();
s_eqe->owner = !!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE) ? 0x0 : 0x80;
++slave_eq->prod;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
index 954b86faac29..40ca29bb928c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
@@ -44,7 +44,7 @@
/* Default supported priorities for VPP allocation */
#define MLX4_DEFAULT_QOS_PRIO (0)
-/* Derived from FW feature definition, 0 is the default vport fo all QPs */
+/* Derived from FW feature definition, 0 is the default vport for all QPs */
#define MLX4_VPP_DEFAULT_VPORT (0)
struct mlx4_vport_qos_param {
@@ -98,7 +98,7 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port,
u16 *available_vpp, u8 *vpp_p_up);
/**
- * mlx4_ALLOCATE_VPP_set - Distribution of VPPs among differnt priorities.
+ * mlx4_ALLOCATE_VPP_set - Distribution of VPPs among different priorities.
* The total number of VPPs assigned to all for a port must not exceed
* the value reported by available_vpp in mlx4_ALLOCATE_VPP_get.
* VPP allocation is allowed only after the port type has been set,
@@ -113,7 +113,7 @@ int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port,
int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up);
/**
- * mlx4_SET_VPORT_QOS_get - Query QoS proporties of a Vport.
+ * mlx4_SET_VPORT_QOS_get - Query QoS properties of a Vport.
* Each priority allowed for the Vport is assigned with a share of the BW,
* and a BW limitation. This commands query the current QoS values.
*
@@ -128,7 +128,7 @@ int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport,
struct mlx4_vport_qos_param *out_param);
/**
- * mlx4_SET_VPORT_QOS_set - Set QoS proporties of a Vport.
+ * mlx4_SET_VPORT_QOS_set - Set QoS properties of a Vport.
* QoS parameters can be modified at any time, but must be initialized
* before any QP is associated with the VPort.
*
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2581226836b5..7b02ff61126d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -129,7 +129,7 @@ static const struct mlx4_profile default_profile = {
.num_cq = 1 << 16,
.num_mcg = 1 << 13,
.num_mpt = 1 << 19,
- .num_mtt = 1 << 20, /* It is really num mtt segements */
+ .num_mtt = 1 << 20, /* It is really num mtt segments */
};
static const struct mlx4_profile low_mem_profile = {
@@ -1508,7 +1508,7 @@ static int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
priv->v2p.port1 = port1;
priv->v2p.port2 = port2;
} else {
- mlx4_err(dev, "Failed to change port mape: %d\n", err);
+ mlx4_err(dev, "Failed to change port map: %d\n", err);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
index e9cd4bb6f83d..d3d9ec042d2c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
@@ -112,7 +112,7 @@ struct mlx4_en_stat_out_flow_control_mbox {
__be64 tx_pause_duration;
/* Number of transmitter transitions from XOFF state to XON state */
__be64 tx_pause_transition;
- /* Reserverd */
+ /* Reserved */
__be64 reserved[2];
};
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 256a06b3c096..4e43f4a7d246 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -2118,7 +2118,7 @@ static void mlx4_qsfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset)
* @data: output buffer to put the requested data into.
*
* Reads cable module eeprom data, puts the outcome data into
- * data pointer paramer.
+ * data pointer parameter.
* Returns num of read bytes on success or a negative error
* code.
*/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index c44870b175f9..76dc5a9b9648 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -29,7 +29,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \
- lib/crypto.o
+ lib/crypto.o lib/sd.o
#
# Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a7b1f9686c09..4957412ff1f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1923,6 +1923,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
{
const char *namep = mlx5_command_str(opcode);
struct mlx5_cmd_stats *stats;
+ unsigned long flags;
if (!err || !(strcmp(namep, "unknown command opcode")))
return;
@@ -1930,7 +1931,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
stats = xa_load(&dev->cmd.stats, opcode);
if (!stats)
return;
- spin_lock_irq(&stats->lock);
+ spin_lock_irqsave(&stats->lock, flags);
stats->failed++;
if (err < 0)
stats->last_failed_errno = -err;
@@ -1939,7 +1940,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
stats->last_failed_mbox_status = status;
stats->last_failed_syndrome = syndrome;
}
- spin_unlock_irq(&stats->lock);
+ spin_unlock_irqrestore(&stats->lock, flags);
}
/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index cf0477f53dc4..47e7c2639774 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -210,7 +210,7 @@ static bool is_dpll_supported(struct mlx5_core_dev *dev)
return false;
if (!MLX5_CAP_MCAM_REG2(dev, synce_registers)) {
- mlx5_core_warn(dev, "Missing SyncE capability\n");
+ mlx5_core_dbg(dev, "Missing SyncE capability\n");
return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 3e064234f6fe..98d4306929f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -157,6 +157,12 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
return -EOPNOTSUPP;
}
+ if (action == DEVLINK_RELOAD_ACTION_FW_ACTIVATE &&
+ !dev->priv.fw_reset) {
+ NL_SET_ERR_MSG_MOD(extack, "FW activate is unsupported for this function");
+ return -EOPNOTSUPP;
+ }
+
if (mlx5_core_is_pf(dev) && pci_num_vf(pdev))
NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
index 18fed2b34fb1..904e08de852e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
@@ -41,6 +41,7 @@ struct mlx5_dpll_synce_status {
enum mlx5_msees_oper_status oper_status;
bool ho_acq;
bool oper_freq_measure;
+ enum mlx5_msees_failure_reason failure_reason;
s32 frequency_diff;
};
@@ -60,6 +61,7 @@ mlx5_dpll_synce_status_get(struct mlx5_core_dev *mdev,
synce_status->oper_status = MLX5_GET(msees_reg, out, oper_status);
synce_status->ho_acq = MLX5_GET(msees_reg, out, ho_acq);
synce_status->oper_freq_measure = MLX5_GET(msees_reg, out, oper_freq_measure);
+ synce_status->failure_reason = MLX5_GET(msees_reg, out, failure_reason);
synce_status->frequency_diff = MLX5_GET(msees_reg, out, frequency_diff);
return 0;
}
@@ -99,6 +101,26 @@ mlx5_dpll_lock_status_get(struct mlx5_dpll_synce_status *synce_status)
}
}
+static enum dpll_lock_status_error
+mlx5_dpll_lock_status_error_get(struct mlx5_dpll_synce_status *synce_status)
+{
+ switch (synce_status->oper_status) {
+ case MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER:
+ fallthrough;
+ case MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING:
+ switch (synce_status->failure_reason) {
+ case MLX5_MSEES_FAILURE_REASON_PORT_DOWN:
+ return DPLL_LOCK_STATUS_ERROR_MEDIA_DOWN;
+ case MLX5_MSEES_FAILURE_REASON_TOO_HIGH_FREQUENCY_DIFF:
+ return DPLL_LOCK_STATUS_ERROR_FRACTIONAL_FREQUENCY_OFFSET_TOO_HIGH;
+ default:
+ return DPLL_LOCK_STATUS_ERROR_UNDEFINED;
+ }
+ default:
+ return DPLL_LOCK_STATUS_ERROR_NONE;
+ }
+}
+
static enum dpll_pin_state
mlx5_dpll_pin_state_get(struct mlx5_dpll_synce_status *synce_status)
{
@@ -118,10 +140,11 @@ mlx5_dpll_pin_ffo_get(struct mlx5_dpll_synce_status *synce_status,
return 0;
}
-static int mlx5_dpll_device_lock_status_get(const struct dpll_device *dpll,
- void *priv,
- enum dpll_lock_status *status,
- struct netlink_ext_ack *extack)
+static int
+mlx5_dpll_device_lock_status_get(const struct dpll_device *dpll, void *priv,
+ enum dpll_lock_status *status,
+ enum dpll_lock_status_error *status_error,
+ struct netlink_ext_ack *extack)
{
struct mlx5_dpll_synce_status synce_status;
struct mlx5_dpll *mdpll = priv;
@@ -131,6 +154,7 @@ static int mlx5_dpll_device_lock_status_get(const struct dpll_device *dpll,
if (err)
return err;
*status = mlx5_dpll_lock_status_get(&synce_status);
+ *status_error = mlx5_dpll_lock_status_error_get(&synce_status);
return 0;
}
@@ -261,7 +285,7 @@ static void mlx5_dpll_netdev_dpll_pin_set(struct mlx5_dpll *mdpll,
{
if (mdpll->tracking_netdev)
return;
- netdev_dpll_pin_set(netdev, mdpll->dpll_pin);
+ dpll_netdev_pin_set(netdev, mdpll->dpll_pin);
mdpll->tracking_netdev = netdev;
}
@@ -269,7 +293,7 @@ static void mlx5_dpll_netdev_dpll_pin_clear(struct mlx5_dpll *mdpll)
{
if (!mdpll->tracking_netdev)
return;
- netdev_dpll_pin_clear(mdpll->tracking_netdev);
+ dpll_netdev_pin_clear(mdpll->tracking_netdev);
mdpll->tracking_netdev = NULL;
}
@@ -389,7 +413,7 @@ static void mlx5_dpll_remove(struct auxiliary_device *adev)
struct mlx5_dpll *mdpll = auxiliary_get_drvdata(adev);
struct mlx5_core_dev *mdev = mdpll->mdev;
- cancel_delayed_work(&mdpll->work);
+ cancel_delayed_work_sync(&mdpll->work);
mlx5_dpll_mdev_netdev_untrack(mdpll, mdev);
destroy_workqueue(mdpll->wq);
dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0bfe1ca8a364..84db05fb9389 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -60,6 +60,7 @@
#include "lib/clock.h"
#include "en/rx_res.h"
#include "en/selq.h"
+#include "lib/sd.h"
extern const struct net_device_ops mlx5e_netdev_ops;
struct page_pool;
@@ -791,6 +792,8 @@ struct mlx5e_channel {
struct hwtstamp_config *tstamp;
DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
int ix;
+ int vec_ix;
+ int sd_ix;
int cpu;
/* Sync between icosq recovery and XSK enable/disable. */
struct mutex icosq_recovery_lock;
@@ -914,7 +917,7 @@ struct mlx5e_priv {
bool tx_ptp_opened;
bool rx_ptp_opened;
struct hwtstamp_config tstamp;
- u16 q_counter;
+ u16 q_counter[MLX5_SD_MAX_GROUP_SZ];
u16 drop_rq_q_counter;
struct notifier_block events_nb;
struct notifier_block blocking_events_nb;
@@ -1029,12 +1032,12 @@ struct mlx5e_xsk_param;
struct mlx5e_rq_param;
int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
- struct mlx5e_xsk_param *xsk, int node,
+ struct mlx5e_xsk_param *xsk, int node, u16 q_counter,
struct mlx5e_rq *rq);
#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
void mlx5e_close_rq(struct mlx5e_rq *rq);
-int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param);
+int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter);
void mlx5e_destroy_rq(struct mlx5e_rq *rq);
struct mlx5e_sq_param;
@@ -1124,7 +1127,7 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
extern const struct ethtool_ops mlx5e_ethtool_ops;
int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey);
-int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises);
void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
bool enable_mc_lb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
index 48581ea3adcb..874a1016623c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
@@ -23,20 +23,26 @@ bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix)
return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
}
-void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn,
+ u32 *vhca_id)
{
struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
*rqn = c->rq.rqn;
+ if (vhca_id)
+ *vhca_id = MLX5_CAP_GEN(c->mdev, vhca_id);
}
-void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn,
+ u32 *vhca_id)
{
struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state));
*rqn = c->xskrq.rqn;
+ if (vhca_id)
+ *vhca_id = MLX5_CAP_GEN(c->mdev, vhca_id);
}
bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
index 637ca90daaa8..6715aa9383b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
@@ -10,8 +10,10 @@ struct mlx5e_channels;
unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix);
-void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
-void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn,
+ u32 *vhca_id);
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn,
+ u32 *vhca_id);
bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
#endif /* __MLX5_EN_CHANNELS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
index e1283531e0b8..671adbad0a40 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -436,6 +436,7 @@ static int fs_any_create_groups(struct mlx5e_flow_table *ft)
in = kvzalloc(inlen, GFP_KERNEL);
if (!in || !ft->g) {
kfree(ft->g);
+ ft->g = NULL;
kvfree(in);
return -ENOMEM;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
index 40c8df111754..e2d8d2754be0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
@@ -20,10 +20,8 @@
#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1
#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1
-int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
+static int mlx5e_monitor_counter_cap(struct mlx5_core_dev *mdev)
{
- struct mlx5_core_dev *mdev = priv->mdev;
-
if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters))
return false;
if (MLX5_CAP_PCAM_REG(mdev, ppcnt) &&
@@ -36,24 +34,38 @@ int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
return true;
}
-static void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv)
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *pos;
+ int i;
+
+ mlx5_sd_for_each_dev(i, priv->mdev, pos)
+ if (!mlx5e_monitor_counter_cap(pos))
+ return false;
+ return true;
+}
+
+static void mlx5e_monitor_counter_arm(struct mlx5_core_dev *mdev)
{
u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
MLX5_SET(arm_monitor_counter_in, in, opcode,
MLX5_CMD_OP_ARM_MONITOR_COUNTER);
- mlx5_cmd_exec_in(priv->mdev, arm_monitor_counter, in);
+ mlx5_cmd_exec_in(mdev, arm_monitor_counter, in);
}
static void mlx5e_monitor_counters_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
monitor_counters_work);
+ struct mlx5_core_dev *pos;
+ int i;
mutex_lock(&priv->state_lock);
mlx5e_stats_update_ndo_stats(priv);
mutex_unlock(&priv->state_lock);
- mlx5e_monitor_counter_arm(priv);
+ mlx5_sd_for_each_dev(i, priv->mdev, pos)
+ mlx5e_monitor_counter_arm(pos);
}
static int mlx5e_monitor_event_handler(struct notifier_block *nb,
@@ -97,15 +109,13 @@ static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in)
}
/* check if mlx5e_monitor_counter_supported before calling this function*/
-static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
+static void mlx5e_set_monitor_counter(struct mlx5_core_dev *mdev, int q_counter)
{
- struct mlx5_core_dev *mdev = priv->mdev;
int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters);
int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters);
int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 :
MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters);
u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
- int q_counter = priv->q_counter;
int cnt = 0;
if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 &&
@@ -127,13 +137,17 @@ static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
/* check if mlx5e_monitor_counter_supported before calling this function*/
void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
{
+ struct mlx5_core_dev *pos;
+ int i;
+
INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work);
MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
MONITOR_COUNTER);
- mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
-
- mlx5e_set_monitor_counter(priv);
- mlx5e_monitor_counter_arm(priv);
+ mlx5_sd_for_each_dev(i, priv->mdev, pos) {
+ mlx5_eq_notifier_register(pos, &priv->monitor_counters_nb);
+ mlx5e_set_monitor_counter(pos, priv->q_counter[i]);
+ mlx5e_monitor_counter_arm(pos);
+ }
queue_work(priv->wq, &priv->update_stats_work);
}
@@ -141,11 +155,15 @@ void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
{
u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+ struct mlx5_core_dev *pos;
+ int i;
MLX5_SET(set_monitor_counter_in, in, opcode,
MLX5_CMD_OP_SET_MONITOR_COUNTER);
- mlx5_cmd_exec_in(priv->mdev, set_monitor_counter, in);
- mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
+ mlx5_sd_for_each_dev(i, priv->mdev, pos) {
+ mlx5_cmd_exec_in(pos, set_monitor_counter, in);
+ mlx5_eq_notifier_unregister(pos, &priv->monitor_counters_nb);
+ }
cancel_work_sync(&priv->monitor_counters_work);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 284253b79266..a3f31d9d527e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -240,11 +240,14 @@ static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params,
return xsk->headroom + hw_mtu;
}
-static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk)
+static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool no_head_tail_room)
{
- /* SKBs built on XDP_PASS on XSK RQs don't have headroom. */
- u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL);
u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ u16 headroom;
+
+ if (no_head_tail_room)
+ return SKB_DATA_ALIGN(hw_mtu);
+ headroom = mlx5e_get_linear_rq_headroom(params, NULL);
return MLX5_SKB_FRAG_SZ(headroom + hw_mtu);
}
@@ -254,6 +257,7 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk,
bool mpwqe)
{
+ bool no_head_tail_room;
u32 sz;
/* XSK frames are mapped as individual pages, because frames may come in
@@ -262,7 +266,13 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
if (xsk)
return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
- sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
+ no_head_tail_room = params->xdp_prog && mpwqe && !mlx5e_rx_is_linear_skb(mdev, params, xsk);
+
+ /* When no_head_tail_room is set, headroom and tailroom are excluded from skb calculations.
+ * no_head_tail_room should be set in the case of XDP with Striding RQ
+ * when SKB is not linear. This is because another page is allocated for the linear part.
+ */
+ sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, no_head_tail_room));
/* XDP in mlx5e doesn't support multiple packets per page.
* Do not assume sz <= PAGE_SIZE if params->xdp_prog is set.
@@ -289,7 +299,11 @@ bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE)
return false;
- /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
+ /* Call mlx5e_rx_get_linear_sz_skb with the no_head_tail_room parameter set
+ * to exclude headroom and tailroom from calculations.
+ * no_head_tail_room is true when SKB is built on XDP_PASS on XSK RQs
+ * since packet data buffers don't have headroom and tailroom resreved for the SKB.
+ * Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
* must fit into a CPU page.
*/
if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE)
@@ -674,7 +688,7 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e
.napi = &c->napi,
.ch_stats = c->stats,
.node = cpu_to_node(c->cpu),
- .ix = c->ix,
+ .ix = c->vec_ix,
};
}
@@ -945,7 +959,6 @@ static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *param
int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
- u16 q_counter,
struct mlx5e_rq_param *param)
{
void *rqc = param->rqc;
@@ -1007,7 +1020,6 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
MLX5_SET(wq, wq, log_wq_stride,
mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn);
- MLX5_SET(rqc, rqc, counter_set_id, q_counter);
MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable);
MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
@@ -1018,7 +1030,6 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
}
void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
- u16 q_counter,
struct mlx5e_rq_param *param)
{
void *rqc = param->rqc;
@@ -1027,7 +1038,6 @@ void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
MLX5_SET(wq, wq, log_wq_stride,
mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
- MLX5_SET(rqc, rqc, counter_set_id, q_counter);
param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
}
@@ -1064,8 +1074,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
bool allow_swp;
- allow_swp =
- mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev);
+ allow_swp = mlx5_geneve_tx_allowed(mdev) ||
+ (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO);
mlx5e_build_sq_param_common(mdev, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
MLX5_SET(sqc, sqc, allow_swp, allow_swp);
@@ -1292,13 +1302,12 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- u16 q_counter,
struct mlx5e_channel_param *cparam)
{
u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
int err;
- err = mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq);
+ err = mlx5e_build_rq_param(mdev, params, NULL, &cparam->rq);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 6800949dafbc..9a781f18b57f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -130,10 +130,8 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e
int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
- u16 q_counter,
struct mlx5e_rq_param *param);
void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
- u16 q_counter,
struct mlx5e_rq_param *param);
void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev,
struct mlx5e_sq_param *param);
@@ -149,7 +147,6 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
struct mlx5e_sq_param *param);
int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
- u16 q_counter,
struct mlx5e_channel_param *cparam);
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index c206cc0a8483..d0af7271da34 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -42,9 +42,9 @@ mlx5e_ptp_port_ts_cqe_list_add(struct mlx5e_ptp_port_ts_cqe_list *list, u8 metad
WARN_ON_ONCE(tracker->inuse);
tracker->inuse = true;
- spin_lock(&list->tracker_list_lock);
+ spin_lock_bh(&list->tracker_list_lock);
list_add_tail(&tracker->entry, &list->tracker_list_head);
- spin_unlock(&list->tracker_list_lock);
+ spin_unlock_bh(&list->tracker_list_lock);
}
static void
@@ -54,9 +54,9 @@ mlx5e_ptp_port_ts_cqe_list_remove(struct mlx5e_ptp_port_ts_cqe_list *list, u8 me
WARN_ON_ONCE(!tracker->inuse);
tracker->inuse = false;
- spin_lock(&list->tracker_list_lock);
+ spin_lock_bh(&list->tracker_list_lock);
list_del(&tracker->entry);
- spin_unlock(&list->tracker_list_lock);
+ spin_unlock_bh(&list->tracker_list_lock);
}
void mlx5e_ptpsq_track_metadata(struct mlx5e_ptpsq *ptpsq, u8 metadata)
@@ -155,7 +155,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq,
struct mlx5e_ptp_metadata_map *metadata_map = &ptpsq->metadata_map;
struct mlx5e_ptp_port_ts_cqe_tracker *pos, *n;
- spin_lock(&cqe_list->tracker_list_lock);
+ spin_lock_bh(&cqe_list->tracker_list_lock);
list_for_each_entry_safe(pos, n, &cqe_list->tracker_list_head, entry) {
struct sk_buff *skb =
mlx5e_ptp_metadata_map_lookup(metadata_map, pos->metadata_id);
@@ -170,7 +170,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq,
pos->inuse = false;
list_del(&pos->entry);
}
- spin_unlock(&cqe_list->tracker_list_lock);
+ spin_unlock_bh(&cqe_list->tracker_list_lock);
}
#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask)
@@ -213,7 +213,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp);
out:
napi_consume_skb(skb, budget);
- md_buff[*md_buff_sz++] = metadata_id;
+ md_buff[(*md_buff_sz)++] = metadata_id;
if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) &&
!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work);
@@ -646,7 +646,6 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev,
struct net_device *netdev,
- u16 q_counter,
struct mlx5e_ptp_params *ptp_params)
{
struct mlx5e_rq_param *rq_params = &ptp_params->rq_param;
@@ -655,7 +654,7 @@ static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev,
params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
mlx5e_init_rq_type_params(mdev, params);
params->sw_mtu = netdev->max_mtu;
- mlx5e_build_rq_param(mdev, params, NULL, q_counter, rq_params);
+ mlx5e_build_rq_param(mdev, params, NULL, rq_params);
}
static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
@@ -681,7 +680,7 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
/* RQ */
if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
params->vlan_strip_disable = orig->vlan_strip_disable;
- mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams);
+ mlx5e_ptp_build_rq_param(c->mdev, c->netdev, cparams);
}
}
@@ -714,13 +713,16 @@ static int mlx5e_ptp_open_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
struct mlx5e_rq_param *rq_param)
{
int node = dev_to_node(c->mdev->device);
- int err;
+ int err, sd_ix;
+ u16 q_counter;
err = mlx5e_init_ptp_rq(c, params, &c->rq);
if (err)
return err;
- return mlx5e_open_rq(params, rq_param, NULL, node, &c->rq);
+ sd_ix = mlx5_sd_ch_ix_get_dev_ix(c->mdev, MLX5E_PTP_CHANNEL_IX);
+ q_counter = c->priv->q_counter[sd_ix];
+ return mlx5e_open_rq(params, rq_param, NULL, node, q_counter, &c->rq);
}
static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c,
@@ -935,6 +937,7 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
mlx5e_ptp_rx_set_fs(c->priv);
mlx5e_activate_rq(&c->rq);
+ netif_queue_set_napi(c->netdev, c->rq.ix, NETDEV_QUEUE_TYPE_RX, &c->napi);
}
mlx5e_trigger_napi_sched(&c->napi);
}
@@ -943,8 +946,10 @@ void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c)
{
int tc;
- if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ netif_queue_set_napi(c->netdev, c->rq.ix, NETDEV_QUEUE_TYPE_RX, NULL);
mlx5e_deactivate_rq(&c->rq);
+ }
if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
for (tc = 0; tc < c->num_tc; tc++)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index 34adf8c3f81a..e87e26f2c669 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -122,8 +122,8 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
memset(&param_sq, 0, sizeof(param_sq));
memset(&param_cq, 0, sizeof(param_cq));
- mlx5e_build_sq_param(priv->mdev, params, &param_sq);
- mlx5e_build_tx_cq_param(priv->mdev, params, &param_cq);
+ mlx5e_build_sq_param(c->mdev, params, &param_sq);
+ mlx5e_build_tx_cq_param(c->mdev, params, &param_cq);
err = mlx5e_open_cq(c->mdev, params->tx_cq_moderation, &param_cq, &ccp, &sq->cq);
if (err)
goto err_free_sq;
@@ -176,7 +176,7 @@ int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
*/
smp_wmb();
- qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid);
+ qos_dbg(sq->mdev, "Activate QoS SQ qid %u\n", node_qid);
mlx5e_activate_txqsq(sq);
return 0;
@@ -190,7 +190,7 @@ void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
if (!sq) /* Handle the case when the SQ failed to open. */
return;
- qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
+ qos_dbg(sq->mdev, "Deactivate QoS SQ qid %u\n", qid);
mlx5e_deactivate_txqsq(sq);
priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 4358798d6ce1..25d751eba99b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -294,8 +294,8 @@ static void mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
params = &priv->channels.params;
rq_sz = mlx5e_rqwq_get_size(rq);
- real_time = mlx5_is_real_time_rq(priv->mdev);
- rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
+ real_time = mlx5_is_real_time_rq(rq->mdev);
+ rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(rq->mdev, params, NULL));
mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 6b44ddce14e9..0ab9db319530 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -219,7 +219,6 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
struct mlx5e_txqsq *sq, int tc)
{
bool stopped = netif_xmit_stopped(sq->txq);
- struct mlx5e_priv *priv = sq->priv;
u8 state;
int err;
@@ -227,7 +226,7 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
- err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
+ err = mlx5_core_query_sq_state(sq->mdev, sq->sqn, &state);
if (!err)
devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
index 7b8ff7a71003..bcafb4bf9415 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
@@ -4,6 +4,33 @@
#include "rqt.h"
#include <linux/mlx5/transobj.h>
+static bool verify_num_vhca_ids(struct mlx5_core_dev *mdev, u32 *vhca_ids,
+ unsigned int size)
+{
+ unsigned int max_num_vhca_id = MLX5_CAP_GEN_2(mdev, max_rqt_vhca_id);
+ int i;
+
+ /* Verify that all vhca_ids are in range [0, max_num_vhca_ids - 1] */
+ for (i = 0; i < size; i++)
+ if (vhca_ids[i] >= max_num_vhca_id)
+ return false;
+ return true;
+}
+
+static bool rqt_verify_vhca_ids(struct mlx5_core_dev *mdev, u32 *vhca_ids,
+ unsigned int size)
+{
+ if (!vhca_ids)
+ return true;
+
+ if (!MLX5_CAP_GEN(mdev, cross_vhca_rqt))
+ return false;
+ if (!verify_num_vhca_ids(mdev, vhca_ids, size))
+ return false;
+
+ return true;
+}
+
void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
unsigned int num_channels)
{
@@ -13,19 +40,38 @@ void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
indir->table[i] = i % num_channels;
}
+static void fill_rqn_list(void *rqtc, u32 *rqns, u32 *vhca_ids, unsigned int size)
+{
+ unsigned int i;
+
+ if (vhca_ids) {
+ MLX5_SET(rqtc, rqtc, rq_vhca_id_format, 1);
+ for (i = 0; i < size; i++) {
+ MLX5_SET(rqtc, rqtc, rq_vhca[i].rq_num, rqns[i]);
+ MLX5_SET(rqtc, rqtc, rq_vhca[i].rq_vhca_id, vhca_ids[i]);
+ }
+ } else {
+ for (i = 0; i < size; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]);
+ }
+}
static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
- u16 max_size, u32 *init_rqns, u16 init_size)
+ u16 max_size, u32 *init_rqns, u32 *init_vhca_ids, u16 init_size)
{
+ int entry_sz;
void *rqtc;
int inlen;
int err;
u32 *in;
- int i;
+
+ if (!rqt_verify_vhca_ids(mdev, init_vhca_ids, init_size))
+ return -EOPNOTSUPP;
rqt->mdev = mdev;
rqt->size = max_size;
- inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size;
+ entry_sz = init_vhca_ids ? MLX5_ST_SZ_BYTES(rq_vhca) : MLX5_ST_SZ_BYTES(rq_num);
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + entry_sz * init_size;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -33,10 +79,9 @@ static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size);
-
MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size);
- for (i = 0; i < init_size; i++)
- MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]);
+
+ fill_rqn_list(rqtc, init_rqns, init_vhca_ids, init_size);
err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn);
@@ -49,7 +94,7 @@ int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
{
u16 max_size = indir_enabled ? indir_table_size : 1;
- return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1);
+ return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, NULL, 1);
}
static int mlx5e_bits_invert(unsigned long a, int size)
@@ -63,7 +108,8 @@ static int mlx5e_bits_invert(unsigned long a, int size)
return inv;
}
-static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns,
+static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, u32 *rss_vhca_ids, u32 *vhca_ids,
+ unsigned int num_rqns,
u8 hfunc, struct mlx5e_rss_params_indir *indir)
{
unsigned int i;
@@ -82,30 +128,42 @@ static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns
*/
return -EINVAL;
rss_rqns[i] = rqns[ix];
+ if (vhca_ids)
+ rss_vhca_ids[i] = vhca_ids[ix];
}
return 0;
}
int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
- u32 *rqns, unsigned int num_rqns,
+ u32 *rqns, u32 *vhca_ids, unsigned int num_rqns,
u8 hfunc, struct mlx5e_rss_params_indir *indir)
{
- u32 *rss_rqns;
+ u32 *rss_rqns, *rss_vhca_ids = NULL;
int err;
rss_rqns = kvmalloc_array(indir->actual_table_size, sizeof(*rss_rqns), GFP_KERNEL);
if (!rss_rqns)
return -ENOMEM;
- err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+ if (vhca_ids) {
+ rss_vhca_ids = kvmalloc_array(indir->actual_table_size, sizeof(*rss_vhca_ids),
+ GFP_KERNEL);
+ if (!rss_vhca_ids) {
+ kvfree(rss_rqns);
+ return -ENOMEM;
+ }
+ }
+
+ err = mlx5e_calc_indir_rqns(rss_rqns, rqns, rss_vhca_ids, vhca_ids, num_rqns, hfunc, indir);
if (err)
goto out;
- err = mlx5e_rqt_init(rqt, mdev, indir->max_table_size, rss_rqns,
+ err = mlx5e_rqt_init(rqt, mdev, indir->max_table_size, rss_rqns, rss_vhca_ids,
indir->actual_table_size);
out:
+ kvfree(rss_vhca_ids);
kvfree(rss_rqns);
return err;
}
@@ -126,15 +184,20 @@ void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
}
-static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size)
+static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids,
+ unsigned int size)
{
- unsigned int i;
+ int entry_sz;
void *rqtc;
int inlen;
u32 *in;
int err;
- inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size;
+ if (!rqt_verify_vhca_ids(rqt->mdev, vhca_ids, size))
+ return -EINVAL;
+
+ entry_sz = vhca_ids ? MLX5_ST_SZ_BYTES(rq_vhca) : MLX5_ST_SZ_BYTES(rq_num);
+ inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + entry_sz * size;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -143,8 +206,8 @@ static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int siz
MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
MLX5_SET(rqtc, rqtc, rqt_actual_size, size);
- for (i = 0; i < size; i++)
- MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]);
+
+ fill_rqn_list(rqtc, rqns, vhca_ids, size);
err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen);
@@ -152,17 +215,21 @@ static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int siz
return err;
}
-int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn)
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id)
{
- return mlx5e_rqt_redirect(rqt, &rqn, 1);
+ return mlx5e_rqt_redirect(rqt, &rqn, vhca_id, 1);
}
-int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids,
+ unsigned int num_rqns,
u8 hfunc, struct mlx5e_rss_params_indir *indir)
{
- u32 *rss_rqns;
+ u32 *rss_rqns, *rss_vhca_ids = NULL;
int err;
+ if (!rqt_verify_vhca_ids(rqt->mdev, vhca_ids, num_rqns))
+ return -EINVAL;
+
if (WARN_ON(rqt->size != indir->max_table_size))
return -EINVAL;
@@ -170,13 +237,23 @@ int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_
if (!rss_rqns)
return -ENOMEM;
- err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+ if (vhca_ids) {
+ rss_vhca_ids = kvmalloc_array(indir->actual_table_size, sizeof(*rss_vhca_ids),
+ GFP_KERNEL);
+ if (!rss_vhca_ids) {
+ kvfree(rss_rqns);
+ return -ENOMEM;
+ }
+ }
+
+ err = mlx5e_calc_indir_rqns(rss_rqns, rqns, rss_vhca_ids, vhca_ids, num_rqns, hfunc, indir);
if (err)
goto out;
- err = mlx5e_rqt_redirect(rqt, rss_rqns, indir->actual_table_size);
+ err = mlx5e_rqt_redirect(rqt, rss_rqns, rss_vhca_ids, indir->actual_table_size);
out:
+ kvfree(rss_vhca_ids);
kvfree(rss_rqns);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
index 77fba3ebd18d..e0bc30308c77 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
@@ -20,7 +20,7 @@ void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
unsigned int num_channels);
struct mlx5e_rqt {
- struct mlx5_core_dev *mdev;
+ struct mlx5_core_dev *mdev; /* primary */
u32 rqtn;
u16 size;
};
@@ -28,7 +28,7 @@ struct mlx5e_rqt {
int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
bool indir_enabled, u32 init_rqn, u32 indir_table_size);
int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
- u32 *rqns, unsigned int num_rqns,
+ u32 *rqns, u32 *vhca_ids, unsigned int num_rqns,
u8 hfunc, struct mlx5e_rss_params_indir *indir);
void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt);
@@ -38,8 +38,9 @@ static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
}
u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels);
-int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn);
-int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id);
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids,
+ unsigned int num_rqns,
u8 hfunc, struct mlx5e_rss_params_indir *indir);
#endif /* __MLX5_EN_RQT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
index c1545a2e8d6d..5f742f896600 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
@@ -74,7 +74,7 @@ struct mlx5e_rss {
struct mlx5e_tir *tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_rqt rqt;
- struct mlx5_core_dev *mdev;
+ struct mlx5_core_dev *mdev; /* primary */
u32 drop_rqn;
bool inner_ft_support;
bool enabled;
@@ -473,21 +473,22 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
return 0;
}
-static int mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+static int mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns)
{
int err;
- err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, num_rqns, rss->hash.hfunc, &rss->indir);
+ err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, vhca_ids, num_rqns, rss->hash.hfunc,
+ &rss->indir);
if (err)
mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to channels: err = %d\n",
mlx5e_rqt_get_rqtn(&rss->rqt), err);
return err;
}
-void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns)
{
rss->enabled = true;
- mlx5e_rss_apply(rss, rqns, num_rqns);
+ mlx5e_rss_apply(rss, rqns, vhca_ids, num_rqns);
}
void mlx5e_rss_disable(struct mlx5e_rss *rss)
@@ -495,7 +496,7 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss)
int err;
rss->enabled = false;
- err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn);
+ err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn, NULL);
if (err)
mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n",
mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
@@ -568,7 +569,7 @@ int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc)
int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
const u8 *key, const u8 *hfunc,
- u32 *rqns, unsigned int num_rqns)
+ u32 *rqns, u32 *vhca_ids, unsigned int num_rqns)
{
bool changed_indir = false;
bool changed_hash = false;
@@ -608,7 +609,7 @@ int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
}
if (changed_indir && rss->enabled) {
- err = mlx5e_rss_apply(rss, rqns, num_rqns);
+ err = mlx5e_rss_apply(rss, rqns, vhca_ids, num_rqns);
if (err) {
mlx5e_rss_copy(rss, old_rss);
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
index d1d0bc350e92..d0df98963c8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
@@ -39,7 +39,7 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
const struct mlx5e_packet_merge_param *init_pkt_merge_param,
bool inner, u32 *tirn);
-void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns);
void mlx5e_rss_disable(struct mlx5e_rss *rss);
int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
@@ -47,7 +47,7 @@ int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
const u8 *key, const u8 *hfunc,
- u32 *rqns, unsigned int num_rqns);
+ u32 *rqns, u32 *vhca_ids, unsigned int num_rqns);
struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss);
u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt);
int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
index b23e224e3763..a86eade9a9e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -8,7 +8,7 @@
#define MLX5E_MAX_NUM_RSS 16
struct mlx5e_rx_res {
- struct mlx5_core_dev *mdev;
+ struct mlx5_core_dev *mdev; /* primary */
enum mlx5e_rx_res_features features;
unsigned int max_nch;
u32 drop_rqn;
@@ -19,6 +19,7 @@ struct mlx5e_rx_res {
struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS];
bool rss_active;
u32 *rss_rqns;
+ u32 *rss_vhca_ids;
unsigned int rss_nch;
struct {
@@ -34,6 +35,13 @@ struct mlx5e_rx_res {
/* API for rx_res_rss_* */
+static u32 *get_vhca_ids(struct mlx5e_rx_res *res, int offset)
+{
+ bool multi_vhca = res->features & MLX5E_RX_RES_FEATURE_MULTI_VHCA;
+
+ return multi_vhca ? res->rss_vhca_ids + offset : NULL;
+}
+
void mlx5e_rx_res_rss_update_num_channels(struct mlx5e_rx_res *res, u32 nch)
{
int i;
@@ -85,8 +93,11 @@ int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int i
return PTR_ERR(rss);
mlx5e_rss_set_indir_uniform(rss, init_nch);
- if (res->rss_active)
- mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+ if (res->rss_active) {
+ u32 *vhca_ids = get_vhca_ids(res, 0);
+
+ mlx5e_rss_enable(rss, res->rss_rqns, vhca_ids, res->rss_nch);
+ }
res->rss[i] = rss;
*rss_idx = i;
@@ -153,10 +164,12 @@ static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res)
for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
struct mlx5e_rss *rss = res->rss[i];
+ u32 *vhca_ids;
if (!rss)
continue;
- mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+ vhca_ids = get_vhca_ids(res, 0);
+ mlx5e_rss_enable(rss, res->rss_rqns, vhca_ids, res->rss_nch);
}
}
@@ -200,6 +213,7 @@ int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
const u32 *indir, const u8 *key, const u8 *hfunc)
{
+ u32 *vhca_ids = get_vhca_ids(res, 0);
struct mlx5e_rss *rss;
if (rss_idx >= MLX5E_MAX_NUM_RSS)
@@ -209,7 +223,8 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
if (!rss)
return -ENOENT;
- return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch);
+ return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, vhca_ids,
+ res->rss_nch);
}
int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx,
@@ -280,11 +295,13 @@ struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx)
static void mlx5e_rx_res_free(struct mlx5e_rx_res *res)
{
+ kvfree(res->rss_vhca_ids);
kvfree(res->rss_rqns);
kvfree(res);
}
-static struct mlx5e_rx_res *mlx5e_rx_res_alloc(struct mlx5_core_dev *mdev, unsigned int max_nch)
+static struct mlx5e_rx_res *mlx5e_rx_res_alloc(struct mlx5_core_dev *mdev, unsigned int max_nch,
+ bool multi_vhca)
{
struct mlx5e_rx_res *rx_res;
@@ -298,6 +315,15 @@ static struct mlx5e_rx_res *mlx5e_rx_res_alloc(struct mlx5_core_dev *mdev, unsig
return NULL;
}
+ if (multi_vhca) {
+ rx_res->rss_vhca_ids = kvcalloc(max_nch, sizeof(*rx_res->rss_vhca_ids), GFP_KERNEL);
+ if (!rx_res->rss_vhca_ids) {
+ kvfree(rx_res->rss_rqns);
+ kvfree(rx_res);
+ return NULL;
+ }
+ }
+
return rx_res;
}
@@ -424,10 +450,11 @@ mlx5e_rx_res_create(struct mlx5_core_dev *mdev, enum mlx5e_rx_res_features featu
const struct mlx5e_packet_merge_param *init_pkt_merge_param,
unsigned int init_nch)
{
+ bool multi_vhca = features & MLX5E_RX_RES_FEATURE_MULTI_VHCA;
struct mlx5e_rx_res *res;
int err;
- res = mlx5e_rx_res_alloc(mdev, max_nch);
+ res = mlx5e_rx_res_alloc(mdev, max_nch, multi_vhca);
if (!res)
return ERR_PTR(-ENOMEM);
@@ -504,10 +531,11 @@ static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res,
struct mlx5e_channels *chs,
unsigned int ix)
{
+ u32 *vhca_id = get_vhca_ids(res, ix);
u32 rqn = res->rss_rqns[ix];
int err;
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn, vhca_id);
if (err)
mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
@@ -519,7 +547,7 @@ static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res,
{
int err;
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn, NULL);
if (err)
mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
@@ -534,10 +562,12 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann
nch = mlx5e_channels_get_num(chs);
for (ix = 0; ix < chs->num; ix++) {
+ u32 *vhca_id = get_vhca_ids(res, ix);
+
if (mlx5e_channels_is_xsk(chs, ix))
- mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix], vhca_id);
else
- mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix], vhca_id);
}
res->rss_nch = chs->num;
@@ -554,7 +584,7 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann
if (!mlx5e_channels_get_ptp_rqn(chs, &rqn))
rqn = res->drop_rqn;
- err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn, NULL);
if (err)
mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n",
mlx5e_rqt_get_rqtn(&res->ptp.rqt),
@@ -573,7 +603,7 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
mlx5e_rx_res_channel_deactivate_direct(res, ix);
if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
- err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn, NULL);
if (err)
mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n",
mlx5e_rqt_get_rqtn(&res->ptp.rqt),
@@ -584,10 +614,12 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
unsigned int ix, bool xsk)
{
+ u32 *vhca_id = get_vhca_ids(res, ix);
+
if (xsk)
- mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix], vhca_id);
else
- mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix], vhca_id);
mlx5e_rx_res_rss_enable(res);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
index 82aaba8a82b3..7b1a9f0f1874 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -18,6 +18,7 @@ struct mlx5e_rss_params_hash;
enum mlx5e_rx_res_features {
MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
MLX5E_RX_RES_FEATURE_PTP = BIT(1),
+ MLX5E_RX_RES_FEATURE_MULTI_VHCA = BIT(2),
};
/* Setup */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
index 86bf007fd05b..b500cc2c9689 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
@@ -37,7 +37,7 @@ mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) {
if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
- mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
+ mlx5_core_dbg(priv->mdev, "firmware flow level support is missing\n");
err = -EOPNOTSUPP;
goto err_check;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index ac458a8d10e0..53ca16cb9c41 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -63,10 +63,12 @@ static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t)
struct mlx5e_create_cq_param ccp = {};
struct dim_cq_moder trap_moder = {};
struct mlx5e_rq *rq = &t->rq;
+ u16 q_counter;
int node;
int err;
node = dev_to_node(mdev->device);
+ q_counter = priv->q_counter[0];
ccp.netdev = priv->netdev;
ccp.wq = priv->wq;
@@ -79,7 +81,7 @@ static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t)
return err;
mlx5e_init_trap_rq(t, &t->params, rq);
- err = mlx5e_open_rq(&t->params, rq_param, NULL, node, rq);
+ err = mlx5e_open_rq(&t->params, rq_param, NULL, node, q_counter, rq);
if (err)
goto err_destroy_cq;
@@ -116,15 +118,14 @@ static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct ml
}
static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev,
- int max_mtu, u16 q_counter,
- struct mlx5e_trap *t)
+ int max_mtu, struct mlx5e_trap *t)
{
struct mlx5e_params *params = &t->params;
params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
mlx5e_init_rq_type_params(mdev, params);
params->sw_mtu = max_mtu;
- mlx5e_build_rq_param(mdev, params, NULL, q_counter, &t->rq_param);
+ mlx5e_build_rq_param(mdev, params, NULL, &t->rq_param);
}
static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
@@ -138,7 +139,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
if (!t)
return ERR_PTR(-ENOMEM);
- mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, priv->q_counter, t);
+ mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, t);
t->priv = priv;
t->mdev = priv->mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
index ebada0c5af3c..db776e515b6a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -6,10 +6,10 @@
#include "setup.h"
#include "en/params.h"
-static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv,
+static int mlx5e_xsk_map_pool(struct mlx5_core_dev *mdev,
struct xsk_buff_pool *pool)
{
- struct device *dev = mlx5_core_dma_dev(priv->mdev);
+ struct device *dev = mlx5_core_dma_dev(mdev);
return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC);
}
@@ -89,7 +89,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
if (unlikely(!mlx5e_xsk_is_pool_sane(pool)))
return -EINVAL;
- err = mlx5e_xsk_map_pool(priv, pool);
+ err = mlx5e_xsk_map_pool(mlx5_sd_ch_ix_get_dev(priv->mdev, ix), pool);
if (unlikely(err))
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 82e6abbc1734..06592b9f0424 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -49,10 +49,9 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
- u16 q_counter,
struct mlx5e_channel_param *cparam)
{
- mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq);
+ mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq);
mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq);
}
@@ -93,6 +92,7 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *param
struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool,
struct mlx5e_xsk_param *xsk)
{
+ u16 q_counter = c->priv->q_counter[c->sd_ix];
struct mlx5e_rq *xskrq = &c->xskrq;
int err;
@@ -100,7 +100,7 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *param
if (err)
return err;
- err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), xskrq);
+ err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), q_counter, xskrq);
if (err)
return err;
@@ -125,7 +125,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (!cparam)
return -ENOMEM;
- mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam);
+ mlx5e_build_xsk_cparam(priv->mdev, params, xsk, cparam);
err = mlx5e_open_cq(c->mdev, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
&c->xskrq.cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 161c5190c236..c54fd01ea635 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -336,12 +336,17 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
/* iv len */
aes_gcm->icv_len = x->aead->alg_icv_len;
+ attrs->dir = x->xso.dir;
+
/* esn */
if (x->props.flags & XFRM_STATE_ESN) {
attrs->replay_esn.trigger = true;
attrs->replay_esn.esn = sa_entry->esn_state.esn;
attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
+ if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
+ goto skip_replay_window;
+
switch (x->replay_esn->replay_window) {
case 32:
attrs->replay_esn.replay_window =
@@ -365,7 +370,7 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
}
}
- attrs->dir = x->xso.dir;
+skip_replay_window:
/* spi */
attrs->spi = be32_to_cpu(x->id.spi);
@@ -501,7 +506,8 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
- if (x->replay_esn && x->replay_esn->replay_window != 32 &&
+ if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN &&
+ x->replay_esn->replay_window != 32 &&
x->replay_esn->replay_window != 64 &&
x->replay_esn->replay_window != 128 &&
x->replay_esn->replay_window != 256) {
@@ -978,21 +984,41 @@ static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
queue_work(sa_entry->ipsec->wq, &work->work);
}
-static void mlx5e_xfrm_update_curlft(struct xfrm_state *x)
+static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
+ struct net *net = dev_net(x->xso.dev);
u64 packets, bytes, lastuse;
lockdep_assert(lockdep_is_held(&x->lock) ||
- lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex));
+ lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) ||
+ lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_state_lock));
if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
return;
+ if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
+ mlx5_fc_query_cached(ipsec_rule->auth.fc, &bytes, &packets, &lastuse);
+ x->stats.integrity_failed += packets;
+ XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, packets);
+
+ mlx5_fc_query_cached(ipsec_rule->trailer.fc, &bytes, &packets, &lastuse);
+ XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, packets);
+ }
+
+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ return;
+
mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
x->curlft.packets += packets;
x->curlft.bytes += bytes;
+
+ if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
+ mlx5_fc_query_cached(ipsec_rule->replay.fc, &bytes, &packets, &lastuse);
+ x->stats.replay += packets;
+ XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, packets);
+ }
}
static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
@@ -1150,7 +1176,7 @@ static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
.xdo_dev_offload_ok = mlx5e_ipsec_offload_ok,
.xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
- .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft,
+ .xdo_dev_state_update_stats = mlx5e_xfrm_update_stats,
.xdo_dev_policy_add = mlx5e_xfrm_add_policy,
.xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
.xdo_dev_policy_free = mlx5e_xfrm_free_policy,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index adaea3493193..7d943e93cf6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -137,7 +137,6 @@ struct mlx5e_ipsec_hw_stats {
struct mlx5e_ipsec_sw_stats {
atomic64_t ipsec_rx_drop_sp_alloc;
atomic64_t ipsec_rx_drop_sadb_miss;
- atomic64_t ipsec_rx_drop_syndrome;
atomic64_t ipsec_tx_drop_bundle;
atomic64_t ipsec_tx_drop_no_state;
atomic64_t ipsec_tx_drop_not_ip;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 51a144246ea6..727fa7c18523 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -304,12 +304,6 @@ drop:
return false;
}
-enum {
- MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED,
- MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED,
- MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER,
-};
-
void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
struct sk_buff *skb,
u32 ipsec_meta_data)
@@ -343,20 +337,7 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
xo = xfrm_offload(skb);
xo->flags = CRYPTO_DONE;
-
- switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) {
- case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED:
- xo->status = CRYPTO_SUCCESS;
- break;
- case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED:
- xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
- break;
- case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER:
- xo->status = CRYPTO_INVALID_PACKET_SYNTAX;
- break;
- default:
- atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_syndrome);
- }
+ xo->status = CRYPTO_SUCCESS;
}
int mlx5_esw_ipsec_rx_make_metadata(struct mlx5e_priv *priv, u32 id, u32 *metadata)
@@ -374,8 +355,6 @@ int mlx5_esw_ipsec_rx_make_metadata(struct mlx5e_priv *priv, u32 id, u32 *metada
return err;
}
- *metadata = MLX5_IPSEC_METADATA_CREATE(ipsec_obj_id,
- MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED);
-
+ *metadata = ipsec_obj_id;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index 2ed99772f168..82064614846f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -43,7 +43,6 @@
#define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1)
#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0))
#define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0))
-#define MLX5_IPSEC_METADATA_CREATE(id, syndrome) ((id) | ((syndrome) << 24))
struct mlx5e_accel_tx_ipsec_state {
struct xfrm_offload *xo;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
index e0e36a09721c..dd36b04e30a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -51,7 +51,6 @@ static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
index 984fa04bd331..e3e57c849436 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -96,7 +96,7 @@ bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev)
{
u8 max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
- if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx))
+ if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx) || mlx5_get_sd(mdev))
return false;
/* Check the possibility to post the required ICOSQ WQEs. */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index f11075e67658..adc6d8ea0960 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -11,6 +11,7 @@
#ifdef CONFIG_MLX5_EN_TLS
#include "lib/crypto.h"
+#include "lib/mlx5.h"
struct mlx5_crypto_dek *mlx5_ktls_create_key(struct mlx5_crypto_dek_pool *dek_pool,
struct tls_crypto_info *crypto_info);
@@ -61,7 +62,8 @@ void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_
static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev)
{
- return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx);
+ return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx) &&
+ !mlx5_get_sd(mdev);
}
bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 9b597cb24598..65ccb33edafb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -267,7 +267,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
goto err_out;
}
- pdev = mlx5_core_dma_dev(sq->channel->priv->mdev);
+ pdev = mlx5_core_dma_dev(sq->channel->mdev);
buf->dma_addr = dma_map_single(pdev, &buf->progress,
PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) {
@@ -425,14 +425,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
{
struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf;
struct mlx5e_ktls_offload_context_rx *priv_rx;
- struct mlx5e_ktls_rx_resync_ctx *resync;
u8 tracker_state, auth_state, *ctx;
struct device *dev;
u32 hw_seq;
priv_rx = buf->priv_rx;
- resync = &priv_rx->resync;
- dev = mlx5_core_dma_dev(resync->priv->mdev);
+ dev = mlx5_core_dma_dev(sq->channel->mdev);
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
index d4ebd8743114..b2cabd6ab86c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -310,9 +310,9 @@ static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_o
mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
}
-static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec,
- struct mlx5e_macsec_sa *sa,
- bool is_tx, struct net_device *netdev, u32 fs_id)
+static void mlx5e_macsec_cleanup_sa_fs(struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_sa *sa, bool is_tx,
+ struct net_device *netdev, u32 fs_id)
{
int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT :
MLX5_ACCEL_MACSEC_ACTION_DECRYPT;
@@ -322,20 +322,49 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec,
mlx5_macsec_fs_del_rule(macsec->mdev->macsec_fs, sa->macsec_rule, action, netdev,
fs_id);
- mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id);
sa->macsec_rule = NULL;
}
+static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_sa *sa, bool is_tx,
+ struct net_device *netdev, u32 fs_id)
+{
+ mlx5e_macsec_cleanup_sa_fs(macsec, sa, is_tx, netdev, fs_id);
+ mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id);
+}
+
+static int mlx5e_macsec_init_sa_fs(struct macsec_context *ctx,
+ struct mlx5e_macsec_sa *sa, bool encrypt,
+ bool is_tx, u32 *fs_id)
+{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
+ struct mlx5_macsec_fs *macsec_fs = priv->mdev->macsec_fs;
+ struct mlx5_macsec_rule_attrs rule_attrs;
+ union mlx5_macsec_rule *macsec_rule;
+
+ rule_attrs.macsec_obj_id = sa->macsec_obj_id;
+ rule_attrs.sci = sa->sci;
+ rule_attrs.assoc_num = sa->assoc_num;
+ rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT :
+ MLX5_ACCEL_MACSEC_ACTION_DECRYPT;
+
+ macsec_rule = mlx5_macsec_fs_add_rule(macsec_fs, ctx, &rule_attrs, fs_id);
+ if (!macsec_rule)
+ return -ENOMEM;
+
+ sa->macsec_rule = macsec_rule;
+
+ return 0;
+}
+
static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
struct mlx5e_macsec_sa *sa,
bool encrypt, bool is_tx, u32 *fs_id)
{
struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec *macsec = priv->macsec;
- struct mlx5_macsec_rule_attrs rule_attrs;
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_macsec_obj_attrs obj_attrs;
- union mlx5_macsec_rule *macsec_rule;
int err;
obj_attrs.next_pn = sa->next_pn;
@@ -357,20 +386,12 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
if (err)
return err;
- rule_attrs.macsec_obj_id = sa->macsec_obj_id;
- rule_attrs.sci = sa->sci;
- rule_attrs.assoc_num = sa->assoc_num;
- rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT :
- MLX5_ACCEL_MACSEC_ACTION_DECRYPT;
-
- macsec_rule = mlx5_macsec_fs_add_rule(mdev->macsec_fs, ctx, &rule_attrs, fs_id);
- if (!macsec_rule) {
- err = -ENOMEM;
- goto destroy_macsec_object;
+ if (sa->active) {
+ err = mlx5e_macsec_init_sa_fs(ctx, sa, encrypt, is_tx, fs_id);
+ if (err)
+ goto destroy_macsec_object;
}
- sa->macsec_rule = macsec_rule;
-
return 0;
destroy_macsec_object:
@@ -526,9 +547,7 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx)
goto destroy_sa;
macsec_device->tx_sa[assoc_num] = tx_sa;
- if (!secy->operational ||
- assoc_num != tx_sc->encoding_sa ||
- !tx_sa->active)
+ if (!secy->operational)
goto out;
err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL);
@@ -595,7 +614,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx)
goto out;
if (ctx_tx_sa->active) {
- err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL);
+ err = mlx5e_macsec_init_sa_fs(ctx, tx_sa, tx_sc->encrypt, true, NULL);
if (err)
goto out;
} else {
@@ -604,7 +623,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx)
goto out;
}
- mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0);
+ mlx5e_macsec_cleanup_sa_fs(macsec, tx_sa, true, ctx->secy->netdev, 0);
}
out:
mutex_unlock(&macsec->lock);
@@ -1030,8 +1049,9 @@ static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx)
goto out;
}
- mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev,
- rx_sc->sc_xarray_element->fs_id);
+ if (rx_sa->active)
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev,
+ rx_sc->sc_xarray_element->fs_id);
mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
kfree(rx_sa);
rx_sc->rx_sa[assoc_num] = NULL;
@@ -1112,8 +1132,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx,
if (!rx_sa || !rx_sa->macsec_rule)
continue;
- mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev,
- rx_sc->sc_xarray_element->fs_id);
+ mlx5e_macsec_cleanup_sa_fs(macsec, rx_sa, false, ctx->secy->netdev,
+ rx_sc->sc_xarray_element->fs_id);
}
}
@@ -1124,8 +1144,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx,
continue;
if (rx_sa->active) {
- err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false,
- &rx_sc->sc_xarray_element->fs_id);
+ err = mlx5e_macsec_init_sa_fs(ctx, rx_sa, true, false,
+ &rx_sc->sc_xarray_element->fs_id);
if (err)
goto out;
}
@@ -1178,7 +1198,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx)
if (!tx_sa)
continue;
- mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0);
+ mlx5e_macsec_cleanup_sa_fs(macsec, tx_sa, true, ctx->secy->netdev, 0);
}
for (i = 0; i < MACSEC_NUM_AN; ++i) {
@@ -1187,7 +1207,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx)
continue;
if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) {
- err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL);
+ err = mlx5e_macsec_init_sa_fs(ctx, tx_sa, tx_sc->encrypt, true, NULL);
if (err)
goto out;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index bb7f86c993e5..c7f542d0b8f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -34,6 +34,7 @@
#include <linux/mlx5/fs.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <net/rps.h>
#include "en.h"
#define ARFS_HASH_SHIFT BITS_PER_BYTE
@@ -254,11 +255,13 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
sizeof(*ft->g), GFP_KERNEL);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in || !ft->g) {
- kfree(ft->g);
- kvfree(in);
+ if (!ft->g)
return -ENOMEM;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free_g;
}
mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
@@ -278,7 +281,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
break;
default:
err = -EINVAL;
- goto out;
+ goto err_free_in;
}
switch (type) {
@@ -300,7 +303,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
break;
default:
err = -EINVAL;
- goto out;
+ goto err_free_in;
}
MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
@@ -309,7 +312,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
MLX5_SET_CFG(in, end_flow_index, ix - 1);
ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
+ goto err_clean_group;
ft->num_groups++;
memset(in, 0, inlen);
@@ -318,18 +321,20 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
MLX5_SET_CFG(in, end_flow_index, ix - 1);
ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
+ goto err_clean_group;
ft->num_groups++;
kvfree(in);
return 0;
-err:
+err_clean_group:
err = PTR_ERR(ft->g[ft->num_groups]);
ft->g[ft->num_groups] = NULL;
-out:
+err_free_in:
kvfree(in);
-
+err_free_g:
+ kfree(ft->g);
+ ft->g = NULL;
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 67f546683e85..6ed3a32b7e22 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -95,7 +95,7 @@ static void mlx5e_destroy_tises(struct mlx5_core_dev *mdev, u32 tisn[MLX5_MAX_PO
{
int tc, i;
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < mlx5e_get_num_lag_ports(mdev); i++)
for (tc = 0; tc < MLX5_MAX_NUM_TC; tc++)
mlx5e_destroy_tis(mdev, tisn[i][tc]);
}
@@ -110,7 +110,7 @@ static int mlx5e_create_tises(struct mlx5_core_dev *mdev, u32 tisn[MLX5_MAX_PORT
int tc, i;
int err;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ for (i = 0; i < mlx5e_get_num_lag_ports(mdev); i++) {
for (tc = 0; tc < MLX5_MAX_NUM_TC; tc++) {
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
void *tisc;
@@ -140,7 +140,7 @@ err_close_tises:
return err;
}
-int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises)
{
struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
int err;
@@ -169,11 +169,15 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
goto err_destroy_mkey;
}
- err = mlx5e_create_tises(mdev, res->tisn);
- if (err) {
- mlx5_core_err(mdev, "alloc tises failed, %d\n", err);
- goto err_destroy_bfreg;
+ if (create_tises) {
+ err = mlx5e_create_tises(mdev, res->tisn);
+ if (err) {
+ mlx5_core_err(mdev, "alloc tises failed, %d\n", err);
+ goto err_destroy_bfreg;
+ }
+ res->tisn_valid = true;
}
+
INIT_LIST_HEAD(&res->td.tirs_list);
mutex_init(&res->td.list_lock);
@@ -203,7 +207,8 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
mlx5_crypto_dek_cleanup(mdev->mlx5e_res.dek_priv);
mdev->mlx5e_res.dek_priv = NULL;
- mlx5e_destroy_tises(mdev, res->tisn);
+ if (res->tisn_valid)
+ mlx5e_destroy_tises(mdev, res->tisn);
mlx5_free_bfreg(mdev, &res->bfreg);
mlx5_core_destroy_mkey(mdev, res->mkey);
mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b5f1c4ca38ba..91848eae4565 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -70,6 +70,7 @@
#include "qos.h"
#include "en/trap.h"
#include "lib/devcom.h"
+#include "lib/sd.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
enum mlx5e_mpwrq_umr_mode umr_mode)
@@ -1024,7 +1025,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
mlx5_wq_destroy(&rq->wq_ctrl);
}
-int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
+int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter)
{
struct mlx5_core_dev *mdev = rq->mdev;
u8 ts_format;
@@ -1051,6 +1052,7 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
MLX5_SET(rqc, rqc, ts_format, ts_format);
+ MLX5_SET(rqc, rqc, counter_set_id, q_counter);
MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
@@ -1274,7 +1276,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
}
int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
- struct mlx5e_xsk_param *xsk, int node,
+ struct mlx5e_xsk_param *xsk, int node, u16 q_counter,
struct mlx5e_rq *rq)
{
struct mlx5_core_dev *mdev = rq->mdev;
@@ -1287,7 +1289,7 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
if (err)
return err;
- err = mlx5e_create_rq(rq, param);
+ err = mlx5e_create_rq(rq, param, q_counter);
if (err)
goto err_free_rq;
@@ -1806,6 +1808,7 @@ void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
netdev_tx_reset_queue(sq->txq);
netif_tx_start_queue(sq->txq);
+ netif_queue_set_napi(sq->netdev, sq->txq_ix, NETDEV_QUEUE_TYPE_TX, sq->cq.napi);
}
void mlx5e_tx_disable_queue(struct netdev_queue *txq)
@@ -1819,6 +1822,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
{
struct mlx5_wq_cyc *wq = &sq->wq;
+ netif_queue_set_napi(sq->netdev, sq->txq_ix, NETDEV_QUEUE_TYPE_TX, NULL);
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
@@ -2333,13 +2337,14 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_rq_param *rq_params)
{
+ u16 q_counter = c->priv->q_counter[c->sd_ix];
int err;
err = mlx5e_init_rxq_rq(c, params, rq_params->xdp_frag_size, &c->rq);
if (err)
return err;
- return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq);
+ return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), q_counter, &c->rq);
}
static int mlx5e_open_queues(struct mlx5e_channel *c,
@@ -2526,14 +2531,20 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct xsk_buff_pool *xsk_pool,
struct mlx5e_channel **cp)
{
- int cpu = mlx5_comp_vector_get_cpu(priv->mdev, ix);
struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev;
struct mlx5e_xsk_param xsk;
struct mlx5e_channel *c;
unsigned int irq;
+ int vec_ix;
+ int cpu;
int err;
- err = mlx5_comp_irqn_get(priv->mdev, ix, &irq);
+ mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix);
+ vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix);
+ cpu = mlx5_comp_vector_get_cpu(mdev, vec_ix);
+
+ err = mlx5_comp_irqn_get(mdev, vec_ix, &irq);
if (err)
return err;
@@ -2546,20 +2557,23 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
return -ENOMEM;
c->priv = priv;
- c->mdev = priv->mdev;
+ c->mdev = mdev;
c->tstamp = &priv->tstamp;
c->ix = ix;
+ c->vec_ix = vec_ix;
+ c->sd_ix = mlx5_sd_ch_ix_get_dev_ix(mdev, ix);
c->cpu = cpu;
- c->pdev = mlx5_core_dma_dev(priv->mdev);
+ c->pdev = mlx5_core_dma_dev(mdev);
c->netdev = priv->netdev;
- c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
+ c->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
c->num_tc = mlx5e_get_dcb_num_tc(params);
c->xdp = !!params->xdp_prog;
c->stats = &priv->channel_stats[ix]->ch;
c->aff_mask = irq_get_effective_affinity_mask(irq);
- c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
+ c->lag_port = mlx5e_enumerate_lag_port(mdev, ix);
netif_napi_add(netdev, &c->napi, mlx5e_napi_poll);
+ netif_napi_set_irq(&c->napi, irq);
err = mlx5e_open_queues(c, params, cparam);
if (unlikely(err))
@@ -2602,12 +2616,16 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
mlx5e_activate_xsk(c);
else
mlx5e_activate_rq(&c->rq);
+
+ netif_queue_set_napi(c->netdev, c->ix, NETDEV_QUEUE_TYPE_RX, &c->napi);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
{
int tc;
+ netif_queue_set_napi(c->netdev, c->ix, NETDEV_QUEUE_TYPE_RX, NULL);
+
if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
mlx5e_deactivate_xsk(c);
else
@@ -2647,7 +2665,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
if (!chs->c || !cparam)
goto err_free;
- err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam);
+ err = mlx5e_build_channel_param(priv->mdev, &chs->params, cparam);
if (err)
goto err_free;
@@ -2935,15 +2953,18 @@ static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues);
static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
struct mlx5e_params *params)
{
- struct mlx5_core_dev *mdev = priv->mdev;
- int num_comp_vectors, ix, irq;
-
- num_comp_vectors = mlx5_comp_vectors_max(mdev);
+ int ix;
for (ix = 0; ix < params->num_channels; ix++) {
+ int num_comp_vectors, irq, vec_ix;
+ struct mlx5_core_dev *mdev;
+
+ mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix);
+ num_comp_vectors = mlx5_comp_vectors_max(mdev);
cpumask_clear(priv->scratchpad.cpumask);
+ vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix);
- for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) {
+ for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) {
int cpu = mlx5_comp_vector_get_cpu(mdev, irq);
cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
@@ -3335,7 +3356,7 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
struct mlx5e_cq *cq = &drop_rq->cq;
int err;
- mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param);
+ mlx5e_build_drop_rq_param(mdev, &rq_param);
err = mlx5e_alloc_drop_cq(priv, cq, &cq_param);
if (err)
@@ -3349,7 +3370,7 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
if (err)
goto err_destroy_cq;
- err = mlx5e_create_rq(drop_rq, &rq_param);
+ err = mlx5e_create_rq(drop_rq, &rq_param, priv->drop_rq_q_counter);
if (err)
goto err_free_rq;
@@ -5264,13 +5285,17 @@ void mlx5e_create_q_counters(struct mlx5e_priv *priv)
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
struct mlx5_core_dev *mdev = priv->mdev;
- int err;
+ struct mlx5_core_dev *pos;
+ int err, i;
MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
- err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
- if (!err)
- priv->q_counter =
- MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+
+ mlx5_sd_for_each_dev(i, mdev, pos) {
+ err = mlx5_cmd_exec_inout(pos, alloc_q_counter, in, out);
+ if (!err)
+ priv->q_counter[i] =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ }
err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
if (!err)
@@ -5281,13 +5306,17 @@ void mlx5e_create_q_counters(struct mlx5e_priv *priv)
void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+ struct mlx5_core_dev *pos;
+ int i;
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
- if (priv->q_counter) {
- MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
- priv->q_counter);
- mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
+ mlx5_sd_for_each_dev(i, priv->mdev, pos) {
+ if (priv->q_counter[i]) {
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+ priv->q_counter[i]);
+ mlx5_cmd_exec_in(pos, dealloc_q_counter, in);
+ }
}
if (priv->drop_rq_q_counter) {
@@ -5371,6 +5400,8 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
features = MLX5E_RX_RES_FEATURE_PTP;
if (mlx5_tunnel_inner_ft_supported(mdev))
features |= MLX5E_RX_RES_FEATURE_INNER_FT;
+ if (mlx5_get_sd(priv->mdev))
+ features |= MLX5E_RX_RES_FEATURE_MULTI_VHCA;
priv->rx_res = mlx5e_rx_res_create(priv->mdev, features, priv->max_nch, priv->drop_rq.rqn,
&priv->channels.params.packet_merge,
@@ -5980,28 +6011,52 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
free_netdev(netdev);
}
-static int mlx5e_resume(struct auxiliary_device *adev)
+static int _mlx5e_resume(struct auxiliary_device *adev)
{
struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev);
struct mlx5e_priv *priv = mlx5e_dev->priv;
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = edev->mdev;
- int err;
+ struct mlx5_core_dev *pos, *to;
+ int err, i;
if (netif_device_present(netdev))
return 0;
- err = mlx5e_create_mdev_resources(mdev);
- if (err)
- return err;
+ mlx5_sd_for_each_dev(i, mdev, pos) {
+ err = mlx5e_create_mdev_resources(pos, true);
+ if (err)
+ goto err_destroy_mdev_res;
+ }
err = mlx5e_attach_netdev(priv);
- if (err) {
- mlx5e_destroy_mdev_resources(mdev);
+ if (err)
+ goto err_destroy_mdev_res;
+
+ return 0;
+
+err_destroy_mdev_res:
+ to = pos;
+ mlx5_sd_for_each_dev_to(i, mdev, to, pos)
+ mlx5e_destroy_mdev_resources(pos);
+ return err;
+}
+
+static int mlx5e_resume(struct auxiliary_device *adev)
+{
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = edev->mdev;
+ struct auxiliary_device *actual_adev;
+ int err;
+
+ err = mlx5_sd_init(mdev);
+ if (err)
return err;
- }
+ actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
+ if (actual_adev)
+ return _mlx5e_resume(actual_adev);
return 0;
}
@@ -6011,21 +6066,36 @@ static int _mlx5e_suspend(struct auxiliary_device *adev)
struct mlx5e_priv *priv = mlx5e_dev->priv;
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_core_dev *pos;
+ int i;
if (!netif_device_present(netdev)) {
if (test_bit(MLX5E_STATE_DESTROYING, &priv->state))
- mlx5e_destroy_mdev_resources(mdev);
+ mlx5_sd_for_each_dev(i, mdev, pos)
+ mlx5e_destroy_mdev_resources(pos);
return -ENODEV;
}
mlx5e_detach_netdev(priv);
- mlx5e_destroy_mdev_resources(mdev);
+ mlx5_sd_for_each_dev(i, mdev, pos)
+ mlx5e_destroy_mdev_resources(pos);
+
return 0;
}
static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
{
- return _mlx5e_suspend(adev);
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = edev->mdev;
+ struct auxiliary_device *actual_adev;
+ int err = 0;
+
+ actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
+ if (actual_adev)
+ err = _mlx5e_suspend(actual_adev);
+
+ mlx5_sd_cleanup(mdev);
+ return err;
}
static int _mlx5e_probe(struct auxiliary_device *adev)
@@ -6071,9 +6141,9 @@ static int _mlx5e_probe(struct auxiliary_device *adev)
goto err_destroy_netdev;
}
- err = mlx5e_resume(adev);
+ err = _mlx5e_resume(adev);
if (err) {
- mlx5_core_err(mdev, "mlx5e_resume failed, %d\n", err);
+ mlx5_core_err(mdev, "_mlx5e_resume failed, %d\n", err);
goto err_profile_cleanup;
}
@@ -6104,15 +6174,29 @@ err_devlink_unregister:
static int mlx5e_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
- return _mlx5e_probe(adev);
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = edev->mdev;
+ struct auxiliary_device *actual_adev;
+ int err;
+
+ err = mlx5_sd_init(mdev);
+ if (err)
+ return err;
+
+ actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
+ if (actual_adev)
+ return _mlx5e_probe(actual_adev);
+ return 0;
}
-static void mlx5e_remove(struct auxiliary_device *adev)
+static void _mlx5e_remove(struct auxiliary_device *adev)
{
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev);
struct mlx5e_priv *priv = mlx5e_dev->priv;
+ struct mlx5_core_dev *mdev = edev->mdev;
- mlx5_core_uplink_netdev_set(priv->mdev, NULL);
+ mlx5_core_uplink_netdev_set(mdev, NULL);
mlx5e_dcbnl_delete_app(priv);
unregister_netdev(priv->netdev);
_mlx5e_suspend(adev);
@@ -6122,6 +6206,19 @@ static void mlx5e_remove(struct auxiliary_device *adev)
mlx5e_destroy_devlink(mlx5e_dev);
}
+static void mlx5e_remove(struct auxiliary_device *adev)
+{
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = edev->mdev;
+ struct auxiliary_device *actual_adev;
+
+ actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
+ if (actual_adev)
+ _mlx5e_remove(actual_adev);
+
+ mlx5_sd_cleanup(mdev);
+}
+
static const struct auxiliary_device_id mlx5e_id_table[] = {
{ .name = MLX5_ADEV_NAME ".eth", },
{},
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 4b96ad657145..f3d0898bdbc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -561,11 +561,23 @@ static const struct counter_desc drop_rq_stats_desc[] = {
#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc)
#define NUM_DROP_RQ_COUNTERS ARRAY_SIZE(drop_rq_stats_desc)
+static bool q_counter_any(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *pos;
+ int i;
+
+ mlx5_sd_for_each_dev(i, priv->mdev, pos)
+ if (priv->q_counter[i++])
+ return true;
+
+ return false;
+}
+
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qcnt)
{
int num_stats = 0;
- if (priv->q_counter)
+ if (q_counter_any(priv))
num_stats += NUM_Q_COUNTERS;
if (priv->drop_rq_q_counter)
@@ -578,7 +590,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qcnt)
{
int i;
- for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
+ for (i = 0; i < NUM_Q_COUNTERS && q_counter_any(priv); i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
q_stats_desc[i].format);
@@ -593,7 +605,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt)
{
int i;
- for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
+ for (i = 0; i < NUM_Q_COUNTERS && q_counter_any(priv); i++)
data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
q_stats_desc, i);
for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
@@ -607,18 +619,23 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qcnt)
struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
- int ret;
+ struct mlx5_core_dev *pos;
+ u32 rx_out_of_buffer = 0;
+ int ret, i;
MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
- if (priv->q_counter) {
- MLX5_SET(query_q_counter_in, in, counter_set_id,
- priv->q_counter);
- ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out);
- if (!ret)
- qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out,
- out, out_of_buffer);
+ mlx5_sd_for_each_dev(i, priv->mdev, pos) {
+ if (priv->q_counter[i]) {
+ MLX5_SET(query_q_counter_in, in, counter_set_id,
+ priv->q_counter[i]);
+ ret = mlx5_cmd_exec_inout(pos, query_q_counter, in, out);
+ if (!ret)
+ rx_out_of_buffer += MLX5_GET(query_q_counter_out,
+ out, out_of_buffer);
+ }
}
+ qcnt->rx_out_of_buffer = rx_out_of_buffer;
if (priv->drop_rq_q_counter) {
MLX5_SET(query_q_counter_in, in, counter_set_id,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 30932c9c9a8f..31ed26cac9bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -761,12 +761,12 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
err = mlx5e_rss_params_indir_init(&indir, mdev,
mlx5e_rqt_size(mdev, hp->num_channels),
- mlx5e_rqt_size(mdev, priv->max_nch));
+ mlx5e_rqt_size(mdev, hp->num_channels));
if (err)
return err;
mlx5e_rss_params_indir_init_uniform(&indir, hp->num_channels);
- err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
+ err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, NULL, hp->num_channels,
mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
&indir);
@@ -1169,7 +1169,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz),
MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
- params.q_counter = priv->q_counter;
+ params.q_counter = priv->q_counter[0];
err = devl_param_driverinit_value_get(
devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val);
if (err) {
@@ -2014,9 +2014,10 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow,
list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) {
if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev))
continue;
+
+ list_del(&peer_flow->peer_flows);
if (refcount_dec_and_test(&peer_flow->refcnt)) {
mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow);
- list_del(&peer_flow->peer_flows);
kfree(peer_flow);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 5c166d9d2dca..2fa076b23fbe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -401,6 +401,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
mlx5e_skb_cb_hwtstamp_init(skb);
mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
metadata_index);
+ /* ensure skb is put on metadata_map before tracking the index */
+ wmb();
mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index);
if (!netif_tx_queue_stopped(sq->txq) &&
mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
index a7ed87e9d842..22dd30cf8033 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
@@ -83,6 +83,7 @@ mlx5_esw_bridge_mdb_flow_create(u16 esw_owner_vhca_id, struct mlx5_esw_bridge_md
i++;
}
+ rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN;
rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, outer_headers.dmac_47_16);
ether_addr_copy(dmac_v, entry->key.addr);
@@ -587,6 +588,7 @@ mlx5_esw_bridge_mcast_vlan_flow_create(u16 vlan_proto, struct mlx5_esw_bridge_po
if (!rule_spec)
return ERR_PTR(-ENOMEM);
+ rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN;
rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
@@ -662,6 +664,7 @@ mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port)
dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
dest.vport.vhca_id = port->esw_owner_vhca_id;
}
+ rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN;
handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1);
kvfree(rule_spec);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
index 190f10aba170..5a0047bdcb51 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
@@ -152,7 +152,7 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev)
xa_for_each(&esw->offloads.vport_reps, i, rep) {
rpriv = rep->rep_data[REP_ETH].priv;
- if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems))
+ if (!rpriv || !rpriv->netdev)
continue;
rhashtable_walk_enter(&rpriv->tc_ht, &iter);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index b0455134c98e..baaae628b0a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -535,21 +535,26 @@ esw_src_port_rewrite_supported(struct mlx5_eswitch *esw)
}
static bool
-esw_dests_to_vf_pf_vports(struct mlx5_flow_destination *dests, int max_dest)
+esw_dests_to_int_external(struct mlx5_flow_destination *dests, int max_dest)
{
- bool vf_dest = false, pf_dest = false;
+ bool internal_dest = false, external_dest = false;
int i;
for (i = 0; i < max_dest; i++) {
- if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT)
+ if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ dests[i].type != MLX5_FLOW_DESTINATION_TYPE_UPLINK)
continue;
- if (dests[i].vport.num == MLX5_VPORT_UPLINK)
- pf_dest = true;
+ /* Uplink dest is external, but considered as internal
+ * if there is reformat because firmware uses LB+hairpin to support it.
+ */
+ if (dests[i].vport.num == MLX5_VPORT_UPLINK &&
+ !(dests[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID))
+ external_dest = true;
else
- vf_dest = true;
+ internal_dest = true;
- if (vf_dest && pf_dest)
+ if (internal_dest && external_dest)
return true;
}
@@ -695,9 +700,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
/* Header rewrite with combined wire+loopback in FDB is not allowed */
if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) &&
- esw_dests_to_vf_pf_vports(dest, i)) {
+ esw_dests_to_int_external(dest, i)) {
esw_warn(esw->dev,
- "FDB: Header rewrite with forwarding to both PF and VF is not allowed\n");
+ "FDB: Header rewrite with forwarding to both internal and external dests is not allowed\n");
rule = ERR_PTR(-EINVAL);
goto err_esw_get;
}
@@ -3658,22 +3663,6 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
return 0;
}
-static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink)
-{
- struct mlx5_core_dev *dev = devlink_priv(devlink);
- struct net *devl_net, *netdev_net;
- bool ret = false;
-
- mutex_lock(&dev->mlx5e_res.uplink_netdev_lock);
- if (dev->mlx5e_res.uplink_netdev) {
- netdev_net = dev_net(dev->mlx5e_res.uplink_netdev);
- devl_net = devlink_net(devlink);
- ret = net_eq(devl_net, netdev_net);
- }
- mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock);
- return ret;
-}
-
int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev)
{
struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -3718,13 +3707,6 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
- if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV &&
- !esw_offloads_devlink_ns_eq_netdev_ns(devlink)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's.");
- return -EPERM;
- }
-
mlx5_lag_disable_change(esw->dev);
err = mlx5_esw_try_lock(esw);
if (err < 0) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 1616a6144f7b..9b8599c200e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -566,6 +566,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
fte->flow_context.flow_tag);
MLX5_SET(flow_context, in_flow_context, flow_source,
fte->flow_context.flow_source);
+ MLX5_SET(flow_context, in_flow_context, uplink_hairpin_en,
+ !!(fte->flow_context.flags & FLOW_CONTEXT_UPLINK_HAIRPIN_EN));
MLX5_SET(flow_context, in_flow_context, extended_destination,
extended_dest);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 58f4c0d0fafa..e7faf7e73ca4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -366,18 +366,18 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
return -EIO;
}
- mlx5_set_nic_state(dev, MLX5_NIC_IFC_DISABLED);
+ mlx5_set_nic_state(dev, MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED);
/* Loop until device state turns to disable */
end = jiffies + msecs_to_jiffies(delay_ms);
do {
- if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
break;
cond_resched();
} while (!time_after(jiffies, end));
- if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+ if (mlx5_get_nic_state(dev) != MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) {
dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
mlx5_get_nic_state(dev), delay_ms);
return -EIO;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index f27eab6e4929..2911aa34a5be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -703,19 +703,30 @@ void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (!fw_reset)
+ return;
+
MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT);
mlx5_eq_notifier_register(dev, &fw_reset->nb);
}
void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
{
- mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ if (!fw_reset)
+ return;
+
+ mlx5_eq_notifier_unregister(dev, &fw_reset->nb);
}
void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (!fw_reset)
+ return;
+
set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
cancel_work_sync(&fw_reset->fw_live_patch_work);
cancel_work_sync(&fw_reset->reset_request_work);
@@ -733,9 +744,13 @@ static const struct devlink_param mlx5_fw_reset_devlink_params[] = {
int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
{
- struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
+ struct mlx5_fw_reset *fw_reset;
int err;
+ if (!MLX5_CAP_MCAM_REG(dev, mfrl))
+ return 0;
+
+ fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
if (!fw_reset)
return -ENOMEM;
fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events");
@@ -771,6 +786,9 @@ void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (!fw_reset)
+ return;
+
devl_params_unregister(priv_to_devlink(dev),
mlx5_fw_reset_devlink_params,
ARRAY_SIZE(mlx5_fw_reset_devlink_params));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 8ff6dc9bc803..ad38e31822df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -116,9 +116,9 @@ u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
return MLX5_SENSOR_PCI_COMM_ERR;
if (pci_channel_offline(dev->pdev))
return MLX5_SENSOR_PCI_ERR;
- if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
return MLX5_SENSOR_NIC_DISABLED;
- if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
+ if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET)
return MLX5_SENSOR_NIC_SW_RESET;
if (sensor_fw_synd_rfr(dev))
return MLX5_SENSOR_FW_SYND_RFR;
@@ -185,7 +185,7 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
/* Write the NIC interface field to initiate the reset, the command
* interface address also resides here, don't overwrite it.
*/
- mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
+ mlx5_set_nic_state(dev, MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET);
return true;
}
@@ -246,13 +246,13 @@ recover_from_sw_reset:
/* Recover from SW reset */
end = jiffies + msecs_to_jiffies(delay_ms);
do {
- if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
break;
msleep(20);
} while (!time_after(jiffies, end));
- if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+ if (mlx5_get_nic_state(dev) != MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) {
dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
mlx5_get_nic_state(dev), delay_ms);
}
@@ -272,26 +272,26 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
u8 nic_interface = mlx5_get_nic_state(dev);
switch (nic_interface) {
- case MLX5_NIC_IFC_FULL:
+ case MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER:
mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
break;
- case MLX5_NIC_IFC_DISABLED:
+ case MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED:
mlx5_core_warn(dev, "starting teardown\n");
break;
- case MLX5_NIC_IFC_NO_DRAM_NIC:
+ case MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC:
mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
break;
- case MLX5_NIC_IFC_SW_RESET:
+ case MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET:
/* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
* 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
* and this is a VF), this is not recoverable by SW reset.
* Logging of this is handled elsewhere.
* 2. FW reset has been issued by another function, driver can
* be reloaded to recover after the mode switches to
- * MLX5_NIC_IFC_DISABLED.
+ * MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED.
*/
if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
mlx5_core_warn(dev, "NIC SW reset in progress\n");
@@ -452,10 +452,10 @@ mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
struct health_buffer __iomem *h = health->health;
u8 synd = ioread8(&h->synd);
+ devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
if (!synd)
return 0;
- devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
return 0;
@@ -555,12 +555,17 @@ static void mlx5_fw_reporter_err_work(struct work_struct *work)
&fw_reporter_ctx);
}
-static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
+static const struct devlink_health_reporter_ops mlx5_fw_reporter_pf_ops = {
.name = "fw",
.diagnose = mlx5_fw_reporter_diagnose,
.dump = mlx5_fw_reporter_dump,
};
+static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
+ .name = "fw",
+ .diagnose = mlx5_fw_reporter_diagnose,
+};
+
static int
mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
void *priv_ctx,
@@ -646,12 +651,17 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
}
}
-static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
+static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_pf_ops = {
.name = "fw_fatal",
.recover = mlx5_fw_fatal_reporter_recover,
.dump = mlx5_fw_fatal_reporter_dump,
};
+static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
+ .name = "fw_fatal",
+ .recover = mlx5_fw_fatal_reporter_recover,
+};
+
#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
@@ -659,10 +669,14 @@ static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
{
+ const struct devlink_health_reporter_ops *fw_fatal_ops;
struct mlx5_core_health *health = &dev->priv.health;
+ const struct devlink_health_reporter_ops *fw_ops;
struct devlink *devlink = priv_to_devlink(dev);
u64 grace_period;
+ fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops;
+ fw_ops = &mlx5_fw_reporter_pf_ops;
if (mlx5_core_is_ecpf(dev)) {
grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD;
} else if (mlx5_core_is_pf(dev)) {
@@ -670,18 +684,19 @@ void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
} else {
/* VF or SF */
grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD;
+ fw_fatal_ops = &mlx5_fw_fatal_reporter_ops;
+ fw_ops = &mlx5_fw_reporter_ops;
}
health->fw_reporter =
- devl_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
- 0, dev);
+ devl_health_reporter_create(devlink, fw_ops, 0, dev);
if (IS_ERR(health->fw_reporter))
mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
PTR_ERR(health->fw_reporter));
health->fw_fatal_reporter =
devl_health_reporter_create(devlink,
- &mlx5_fw_fatal_reporter_ops,
+ fw_fatal_ops,
grace_period,
dev);
if (IS_ERR(health->fw_fatal_reporter))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 58845121954c..d77be1b4dd9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -783,7 +783,7 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num,
}
/* This should only be called once per mdev */
- err = mlx5e_create_mdev_resources(mdev);
+ err = mlx5e_create_mdev_resources(mdev, false);
if (err)
goto destroy_ht;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
index 40c7be124041..58bd749b5e4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
@@ -98,7 +98,7 @@ static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data)
mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
(__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
- MLX5_SET(cqc, cqc, cq_period_mode, DIM_CQ_PERIOD_MODE_START_FROM_EQE);
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
index ec32b686f586..d58032dd0df7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -10,6 +10,7 @@ enum mlx5_devcom_component {
MLX5_DEVCOM_ESW_OFFLOADS,
MLX5_DEVCOM_MPV,
MLX5_DEVCOM_HCA_PORTS,
+ MLX5_DEVCOM_SD_GROUP,
MLX5_DEVCOM_NUM_COMPONENTS,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 2b5826a785c4..37d5f445598c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -54,4 +54,16 @@ static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *md
{
return mdev->mlx5e_res.uplink_netdev;
}
+
+struct mlx5_sd;
+
+static inline struct mlx5_sd *mlx5_get_sd(struct mlx5_core_dev *dev)
+{
+ return dev->sd;
+}
+
+static inline void mlx5_set_sd(struct mlx5_core_dev *dev, struct mlx5_sd *sd)
+{
+ dev->sd = sd;
+}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
new file mode 100644
index 000000000000..5b28084e8a03
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "lib/sd.h"
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "fs_cmd.h"
+#include <linux/mlx5/vport.h>
+#include <linux/debugfs.h>
+
+#define sd_info(__dev, format, ...) \
+ dev_info((__dev)->device, "Socket-Direct: " format, ##__VA_ARGS__)
+#define sd_warn(__dev, format, ...) \
+ dev_warn((__dev)->device, "Socket-Direct: " format, ##__VA_ARGS__)
+
+struct mlx5_sd {
+ u32 group_id;
+ u8 host_buses;
+ struct mlx5_devcom_comp_dev *devcom;
+ struct dentry *dfs;
+ bool primary;
+ union {
+ struct { /* primary */
+ struct mlx5_core_dev *secondaries[MLX5_SD_MAX_GROUP_SZ - 1];
+ struct mlx5_flow_table *tx_ft;
+ };
+ struct { /* secondary */
+ struct mlx5_core_dev *primary_dev;
+ u32 alias_obj_id;
+ };
+ };
+};
+
+static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(dev);
+
+ if (!sd)
+ return 1;
+
+ return sd->host_buses;
+}
+
+static struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(dev);
+
+ if (!sd)
+ return dev;
+
+ return sd->primary ? dev : sd->primary_dev;
+}
+
+struct mlx5_core_dev *
+mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx)
+{
+ struct mlx5_sd *sd;
+
+ if (idx == 0)
+ return primary;
+
+ if (idx >= mlx5_sd_get_host_buses(primary))
+ return NULL;
+
+ sd = mlx5_get_sd(primary);
+ return sd->secondaries[idx - 1];
+}
+
+int mlx5_sd_ch_ix_get_dev_ix(struct mlx5_core_dev *dev, int ch_ix)
+{
+ return ch_ix % mlx5_sd_get_host_buses(dev);
+}
+
+int mlx5_sd_ch_ix_get_vec_ix(struct mlx5_core_dev *dev, int ch_ix)
+{
+ return ch_ix / mlx5_sd_get_host_buses(dev);
+}
+
+struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int ch_ix)
+{
+ int mdev_idx = mlx5_sd_ch_ix_get_dev_ix(primary, ch_ix);
+
+ return mlx5_sd_primary_get_peer(primary, mdev_idx);
+}
+
+static bool ft_create_alias_supported(struct mlx5_core_dev *dev)
+{
+ u64 obj_allowed = MLX5_CAP_GEN_2_64(dev, allowed_object_for_other_vhca_access);
+ u32 obj_supp = MLX5_CAP_GEN_2(dev, cross_vhca_object_to_object_supported);
+
+ if (!(obj_supp &
+ MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_ROOT_TO_REMOTE_FLOW_TABLE))
+ return false;
+
+ if (!(obj_allowed & MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE))
+ return false;
+
+ return true;
+}
+
+static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
+{
+ /* Feature is currently implemented for PFs only */
+ if (!mlx5_core_is_pf(dev))
+ return false;
+
+ /* Honor the SW implementation limit */
+ if (host_buses > MLX5_SD_MAX_GROUP_SZ)
+ return false;
+
+ /* Disconnect secondaries from the network */
+ if (!MLX5_CAP_GEN(dev, eswitch_manager))
+ return false;
+ if (!MLX5_CAP_GEN(dev, silent_mode))
+ return false;
+
+ /* RX steering from primary to secondaries */
+ if (!MLX5_CAP_GEN(dev, cross_vhca_rqt))
+ return false;
+ if (host_buses > MLX5_CAP_GEN_2(dev, max_rqt_vhca_id))
+ return false;
+
+ /* TX steering from secondaries to primary */
+ if (!ft_create_alias_supported(dev))
+ return false;
+ if (!MLX5_CAP_FLOWTABLE_NIC_TX(dev, reset_root_to_default))
+ return false;
+
+ return true;
+}
+
+static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm,
+ u8 *host_buses, u8 *sd_group)
+{
+ u32 out[MLX5_ST_SZ_DW(mpir_reg)];
+ int err;
+
+ err = mlx5_query_mpir_reg(dev, out);
+ if (err)
+ return err;
+
+ err = mlx5_query_nic_vport_sd_group(dev, sd_group);
+ if (err)
+ return err;
+
+ *sdm = MLX5_GET(mpir_reg, out, sdm);
+ *host_buses = MLX5_GET(mpir_reg, out, host_buses);
+
+ return 0;
+}
+
+static u32 mlx5_sd_group_id(struct mlx5_core_dev *dev, u8 sd_group)
+{
+ return (u32)((MLX5_CAP_GEN(dev, native_port_num) << 8) | sd_group);
+}
+
+static int sd_init(struct mlx5_core_dev *dev)
+{
+ u8 host_buses, sd_group;
+ struct mlx5_sd *sd;
+ u32 group_id;
+ bool sdm;
+ int err;
+
+ if (!MLX5_CAP_MCAM_REG(dev, mpir))
+ return 0;
+
+ err = mlx5_query_sd(dev, &sdm, &host_buses, &sd_group);
+ if (err)
+ return err;
+
+ if (!sdm)
+ return 0;
+
+ if (!sd_group)
+ return 0;
+
+ group_id = mlx5_sd_group_id(dev, sd_group);
+
+ if (!mlx5_sd_is_supported(dev, host_buses)) {
+ sd_warn(dev, "can't support requested netdev combining for group id 0x%x), skipping\n",
+ group_id);
+ return 0;
+ }
+
+ sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+ if (!sd)
+ return -ENOMEM;
+
+ sd->host_buses = host_buses;
+ sd->group_id = group_id;
+
+ mlx5_set_sd(dev, sd);
+
+ return 0;
+}
+
+static void sd_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(dev);
+
+ mlx5_set_sd(dev, NULL);
+ kfree(sd);
+}
+
+static int sd_register(struct mlx5_core_dev *dev)
+{
+ struct mlx5_devcom_comp_dev *devcom, *pos;
+ struct mlx5_core_dev *peer, *primary;
+ struct mlx5_sd *sd, *primary_sd;
+ int err, i;
+
+ sd = mlx5_get_sd(dev);
+ devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP,
+ sd->group_id, NULL, dev);
+ if (!devcom)
+ return -ENOMEM;
+
+ sd->devcom = devcom;
+
+ if (mlx5_devcom_comp_get_size(devcom) != sd->host_buses)
+ return 0;
+
+ mlx5_devcom_comp_lock(devcom);
+ mlx5_devcom_comp_set_ready(devcom, true);
+ mlx5_devcom_comp_unlock(devcom);
+
+ if (!mlx5_devcom_for_each_peer_begin(devcom)) {
+ err = -ENODEV;
+ goto err_devcom_unreg;
+ }
+
+ primary = dev;
+ mlx5_devcom_for_each_peer_entry(devcom, peer, pos)
+ if (peer->pdev->bus->number < primary->pdev->bus->number)
+ primary = peer;
+
+ primary_sd = mlx5_get_sd(primary);
+ primary_sd->primary = true;
+ i = 0;
+ /* loop the secondaries */
+ mlx5_devcom_for_each_peer_entry(primary_sd->devcom, peer, pos) {
+ struct mlx5_sd *peer_sd = mlx5_get_sd(peer);
+
+ primary_sd->secondaries[i++] = peer;
+ peer_sd->primary = false;
+ peer_sd->primary_dev = primary;
+ }
+
+ mlx5_devcom_for_each_peer_end(devcom);
+ return 0;
+
+err_devcom_unreg:
+ mlx5_devcom_comp_lock(sd->devcom);
+ mlx5_devcom_comp_set_ready(sd->devcom, false);
+ mlx5_devcom_comp_unlock(sd->devcom);
+ mlx5_devcom_unregister_component(sd->devcom);
+ return err;
+}
+
+static void sd_unregister(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(dev);
+
+ mlx5_devcom_comp_lock(sd->devcom);
+ mlx5_devcom_comp_set_ready(sd->devcom, false);
+ mlx5_devcom_comp_unlock(sd->devcom);
+ mlx5_devcom_unregister_component(sd->devcom);
+}
+
+static int sd_cmd_set_primary(struct mlx5_core_dev *primary, u8 *alias_key)
+{
+ struct mlx5_cmd_allow_other_vhca_access_attr allow_attr = {};
+ struct mlx5_sd *sd = mlx5_get_sd(primary);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *nic_ns;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ nic_ns = mlx5_get_flow_namespace(primary, MLX5_FLOW_NAMESPACE_EGRESS);
+ if (!nic_ns)
+ return -EOPNOTSUPP;
+
+ ft = mlx5_create_flow_table(nic_ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ return err;
+ }
+ sd->tx_ft = ft;
+ memcpy(allow_attr.access_key, alias_key, ACCESS_KEY_LEN);
+ allow_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS;
+ allow_attr.obj_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id;
+
+ err = mlx5_cmd_allow_other_vhca_access(primary, &allow_attr);
+ if (err) {
+ mlx5_core_err(primary, "Failed to allow other vhca access err=%d\n",
+ err);
+ mlx5_destroy_flow_table(ft);
+ return err;
+ }
+
+ return 0;
+}
+
+static void sd_cmd_unset_primary(struct mlx5_core_dev *primary)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(primary);
+
+ mlx5_destroy_flow_table(sd->tx_ft);
+}
+
+static int sd_secondary_create_alias_ft(struct mlx5_core_dev *secondary,
+ struct mlx5_core_dev *primary,
+ struct mlx5_flow_table *ft,
+ u32 *obj_id, u8 *alias_key)
+{
+ u32 aliased_object_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id;
+ u16 vhca_id_to_be_accessed = MLX5_CAP_GEN(primary, vhca_id);
+ struct mlx5_cmd_alias_obj_create_attr alias_attr = {};
+ int ret;
+
+ memcpy(alias_attr.access_key, alias_key, ACCESS_KEY_LEN);
+ alias_attr.obj_id = aliased_object_id;
+ alias_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS;
+ alias_attr.vhca_id = vhca_id_to_be_accessed;
+ ret = mlx5_cmd_alias_obj_create(secondary, &alias_attr, obj_id);
+ if (ret) {
+ mlx5_core_err(secondary, "Failed to create alias object err=%d\n",
+ ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void sd_secondary_destroy_alias_ft(struct mlx5_core_dev *secondary)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(secondary);
+
+ mlx5_cmd_alias_obj_destroy(secondary, sd->alias_obj_id,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS);
+}
+
+static int sd_cmd_set_secondary(struct mlx5_core_dev *secondary,
+ struct mlx5_core_dev *primary,
+ u8 *alias_key)
+{
+ struct mlx5_sd *primary_sd = mlx5_get_sd(primary);
+ struct mlx5_sd *sd = mlx5_get_sd(secondary);
+ int err;
+
+ err = mlx5_fs_cmd_set_l2table_entry_silent(secondary, 1);
+ if (err)
+ return err;
+
+ err = sd_secondary_create_alias_ft(secondary, primary, primary_sd->tx_ft,
+ &sd->alias_obj_id, alias_key);
+ if (err)
+ goto err_unset_silent;
+
+ err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, sd->alias_obj_id, false);
+ if (err)
+ goto err_destroy_alias_ft;
+
+ return 0;
+
+err_destroy_alias_ft:
+ sd_secondary_destroy_alias_ft(secondary);
+err_unset_silent:
+ mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
+ return err;
+}
+
+static void sd_cmd_unset_secondary(struct mlx5_core_dev *secondary)
+{
+ mlx5_fs_cmd_set_tx_flow_table_root(secondary, 0, true);
+ sd_secondary_destroy_alias_ft(secondary);
+ mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
+}
+
+static void sd_print_group(struct mlx5_core_dev *primary)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(primary);
+ struct mlx5_core_dev *pos;
+ int i;
+
+ sd_info(primary, "group id %#x, primary %s, vhca %#x\n",
+ sd->group_id, pci_name(primary->pdev),
+ MLX5_CAP_GEN(primary, vhca_id));
+ mlx5_sd_for_each_secondary(i, primary, pos)
+ sd_info(primary, "group id %#x, secondary_%d %s, vhca %#x\n",
+ sd->group_id, i - 1, pci_name(pos->pdev),
+ MLX5_CAP_GEN(pos, vhca_id));
+}
+
+static ssize_t dev_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_core_dev *dev;
+ char tbuf[32];
+ int ret;
+
+ dev = filp->private_data;
+ ret = snprintf(tbuf, sizeof(tbuf), "%s vhca %#x\n", pci_name(dev->pdev),
+ MLX5_CAP_GEN(dev, vhca_id));
+
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static const struct file_operations dev_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = dev_read,
+};
+
+int mlx5_sd_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *primary, *pos, *to;
+ struct mlx5_sd *sd = mlx5_get_sd(dev);
+ u8 alias_key[ACCESS_KEY_LEN];
+ int err, i;
+
+ err = sd_init(dev);
+ if (err)
+ return err;
+
+ sd = mlx5_get_sd(dev);
+ if (!sd)
+ return 0;
+
+ err = sd_register(dev);
+ if (err)
+ goto err_sd_cleanup;
+
+ if (!mlx5_devcom_comp_is_ready(sd->devcom))
+ return 0;
+
+ primary = mlx5_sd_get_primary(dev);
+
+ for (i = 0; i < ACCESS_KEY_LEN; i++)
+ alias_key[i] = get_random_u8();
+
+ err = sd_cmd_set_primary(primary, alias_key);
+ if (err)
+ goto err_sd_unregister;
+
+ sd->dfs = debugfs_create_dir("multi-pf", mlx5_debugfs_get_dev_root(primary));
+ debugfs_create_x32("group_id", 0400, sd->dfs, &sd->group_id);
+ debugfs_create_file("primary", 0400, sd->dfs, primary, &dev_fops);
+
+ mlx5_sd_for_each_secondary(i, primary, pos) {
+ char name[32];
+
+ err = sd_cmd_set_secondary(pos, primary, alias_key);
+ if (err)
+ goto err_unset_secondaries;
+
+ snprintf(name, sizeof(name), "secondary_%d", i - 1);
+ debugfs_create_file(name, 0400, sd->dfs, pos, &dev_fops);
+
+ }
+
+ sd_info(primary, "group id %#x, size %d, combined\n",
+ sd->group_id, mlx5_devcom_comp_get_size(sd->devcom));
+ sd_print_group(primary);
+
+ return 0;
+
+err_unset_secondaries:
+ to = pos;
+ mlx5_sd_for_each_secondary_to(i, primary, to, pos)
+ sd_cmd_unset_secondary(pos);
+ sd_cmd_unset_primary(primary);
+ debugfs_remove_recursive(sd->dfs);
+err_sd_unregister:
+ sd_unregister(dev);
+err_sd_cleanup:
+ sd_cleanup(dev);
+ return err;
+}
+
+void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(dev);
+ struct mlx5_core_dev *primary, *pos;
+ int i;
+
+ if (!sd)
+ return;
+
+ if (!mlx5_devcom_comp_is_ready(sd->devcom))
+ goto out;
+
+ primary = mlx5_sd_get_primary(dev);
+ mlx5_sd_for_each_secondary(i, primary, pos)
+ sd_cmd_unset_secondary(pos);
+ sd_cmd_unset_primary(primary);
+ debugfs_remove_recursive(sd->dfs);
+
+ sd_info(primary, "group id %#x, uncombined\n", sd->group_id);
+out:
+ sd_unregister(dev);
+ sd_cleanup(dev);
+}
+
+struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
+ struct auxiliary_device *adev,
+ int idx)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(dev);
+ struct mlx5_core_dev *primary;
+
+ if (!sd)
+ return adev;
+
+ if (!mlx5_devcom_comp_is_ready(sd->devcom))
+ return NULL;
+
+ primary = mlx5_sd_get_primary(dev);
+ if (dev == primary)
+ return adev;
+
+ return &primary->priv.adev[idx]->adev;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
new file mode 100644
index 000000000000..137efaf9aabc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LIB_SD_H__
+#define __MLX5_LIB_SD_H__
+
+#define MLX5_SD_MAX_GROUP_SZ 2
+
+struct mlx5_sd;
+
+struct mlx5_core_dev *mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx);
+int mlx5_sd_ch_ix_get_dev_ix(struct mlx5_core_dev *dev, int ch_ix);
+int mlx5_sd_ch_ix_get_vec_ix(struct mlx5_core_dev *dev, int ch_ix);
+struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int ch_ix);
+struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
+ struct auxiliary_device *adev,
+ int idx);
+
+int mlx5_sd_init(struct mlx5_core_dev *dev);
+void mlx5_sd_cleanup(struct mlx5_core_dev *dev);
+
+#define mlx5_sd_for_each_dev_from_to(i, primary, ix_from, to, pos) \
+ for (i = ix_from; \
+ (pos = mlx5_sd_primary_get_peer(primary, i)) && pos != (to); i++)
+
+#define mlx5_sd_for_each_dev(i, primary, pos) \
+ mlx5_sd_for_each_dev_from_to(i, primary, 0, NULL, pos)
+
+#define mlx5_sd_for_each_dev_to(i, primary, to, pos) \
+ mlx5_sd_for_each_dev_from_to(i, primary, 0, to, pos)
+
+#define mlx5_sd_for_each_secondary(i, primary, pos) \
+ mlx5_sd_for_each_dev_from_to(i, primary, 1, NULL, pos)
+
+#define mlx5_sd_for_each_secondary_to(i, primary, to, pos) \
+ mlx5_sd_for_each_dev_from_to(i, primary, 1, to, pos)
+
+#endif /* __MLX5_LIB_SD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index bccf6e53556c..c2593625c09a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -187,31 +187,36 @@ static struct mlx5_profile profile[] = {
};
static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
- u32 warn_time_mili)
+ u32 warn_time_mili, const char *init_state)
{
unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
u32 fw_initializing;
- int err = 0;
do {
fw_initializing = ioread32be(&dev->iseg->initializing);
if (!(fw_initializing >> 31))
break;
- if (time_after(jiffies, end) ||
- test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
- err = -EBUSY;
- break;
+ if (time_after(jiffies, end)) {
+ mlx5_core_err(dev, "Firmware over %u MS in %s state, aborting\n",
+ max_wait_mili, init_state);
+ return -ETIMEDOUT;
+ }
+ if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
+ mlx5_core_warn(dev, "device is being removed, stop waiting for FW %s\n",
+ init_state);
+ return -ENODEV;
}
if (warn_time_mili && time_after(jiffies, warn)) {
- mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds (0x%x)\n",
- jiffies_to_msecs(end - warn) / 1000, fw_initializing);
+ mlx5_core_warn(dev, "Waiting for FW %s, timeout abort in %ds (0x%x)\n",
+ init_state, jiffies_to_msecs(end - warn) / 1000,
+ fw_initializing);
warn = jiffies + msecs_to_jiffies(warn_time_mili);
}
msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT));
} while (true);
- return err;
+ return 0;
}
static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
@@ -1151,12 +1156,10 @@ static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeou
/* wait for firmware to accept initialization segments configurations
*/
err = wait_fw_init(dev, timeout,
- mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL));
- if (err) {
- mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n",
- timeout);
+ mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL),
+ "pre-initializing");
+ if (err)
return err;
- }
err = mlx5_cmd_enable(dev);
if (err) {
@@ -1166,12 +1169,9 @@ static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeou
mlx5_tout_query_iseg(dev);
- err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0);
- if (err) {
- mlx5_core_err(dev, "Firmware over %llu MS in initializing state, aborting\n",
- mlx5_tout_ms(dev, FW_INIT));
+ err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0, "initializing");
+ if (err)
goto err_cmd_cleanup;
- }
dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index a79b7959361b..58732f44940f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -312,13 +312,6 @@ static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev)
return ret;
}
-enum {
- MLX5_NIC_IFC_FULL = 0,
- MLX5_NIC_IFC_DISABLED = 1,
- MLX5_NIC_IFC_NO_DRAM_NIC = 2,
- MLX5_NIC_IFC_SW_RESET = 7
-};
-
u8 mlx5_get_nic_state(struct mlx5_core_dev *dev);
void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
index c93492b67788..99219ea52c4b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -74,7 +74,8 @@ static void mlx5_sf_dev_release(struct device *device)
kfree(sf_dev);
}
-static void mlx5_sf_dev_remove(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev)
+static void mlx5_sf_dev_remove_aux(struct mlx5_core_dev *dev,
+ struct mlx5_sf_dev *sf_dev)
{
int id;
@@ -138,7 +139,7 @@ static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id,
return;
xa_err:
- mlx5_sf_dev_remove(dev, sf_dev);
+ mlx5_sf_dev_remove_aux(dev, sf_dev);
add_err:
mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n",
sf_index, sfnum, err);
@@ -149,7 +150,7 @@ static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_de
struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
xa_erase(&table->devices, sf_index);
- mlx5_sf_dev_remove(dev, sf_dev);
+ mlx5_sf_dev_remove_aux(dev, sf_dev);
}
static int
@@ -367,7 +368,7 @@ static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table)
xa_for_each(&table->devices, index, sf_dev) {
xa_erase(&table->devices, index);
- mlx5_sf_dev_remove(table->dev, sf_dev);
+ mlx5_sf_dev_remove_aux(table->dev, sf_dev);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index 169c2c68ed5c..bc863e1f062e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -95,24 +95,29 @@ mdev_err:
static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
{
struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
- struct devlink *devlink = priv_to_devlink(sf_dev->mdev);
+ struct mlx5_core_dev *mdev = sf_dev->mdev;
+ struct devlink *devlink;
- mlx5_drain_health_wq(sf_dev->mdev);
+ devlink = priv_to_devlink(mdev);
+ set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
+ mlx5_drain_health_wq(mdev);
devlink_unregister(devlink);
- if (mlx5_dev_is_lightweight(sf_dev->mdev))
- mlx5_uninit_one_light(sf_dev->mdev);
+ if (mlx5_dev_is_lightweight(mdev))
+ mlx5_uninit_one_light(mdev);
else
- mlx5_uninit_one(sf_dev->mdev);
- iounmap(sf_dev->mdev->iseg);
- mlx5_mdev_uninit(sf_dev->mdev);
+ mlx5_uninit_one(mdev);
+ iounmap(mdev->iseg);
+ mlx5_mdev_uninit(mdev);
mlx5_devlink_free(devlink);
}
static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev)
{
struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+ struct mlx5_core_dev *mdev = sf_dev->mdev;
- mlx5_unload_one(sf_dev->mdev, false);
+ set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
+ mlx5_unload_one(mdev, false);
}
static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 6f9790e97fed..2ebb61ef3ea9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -788,6 +788,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
switch (action_type) {
case DR_ACTION_TYP_DROP:
attr.final_icm_addr = nic_dmn->drop_icm_addr;
+ attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48;
break;
case DR_ACTION_TYP_FT:
dest_action = action;
@@ -873,11 +874,17 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
action->sampler->tx_icm_addr;
break;
case DR_ACTION_TYP_VPORT:
- attr.hit_gvmi = action->vport->caps->vhca_gvmi;
- dest_action = action;
- attr.final_icm_addr = rx_rule ?
- action->vport->caps->icm_address_rx :
- action->vport->caps->icm_address_tx;
+ if (unlikely(rx_rule && action->vport->caps->num == MLX5_VPORT_UPLINK)) {
+ /* can't go to uplink on RX rule - dropping instead */
+ attr.final_icm_addr = nic_dmn->drop_icm_addr;
+ attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48;
+ } else {
+ attr.hit_gvmi = action->vport->caps->vhca_gvmi;
+ dest_action = action;
+ attr.final_icm_addr = rx_rule ?
+ action->vport->caps->icm_address_rx :
+ action->vport->caps->icm_address_tx;
+ }
break;
case DR_ACTION_TYP_POP_VLAN:
if (!rx_rule && !(dmn->ste_ctx->actions_caps &
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
index 7e36e1062139..64f4cc284aea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
@@ -54,6 +54,107 @@ enum dr_dump_rec_type {
DR_DUMP_REC_TYPE_ACTION_MATCH_RANGE = 3425,
};
+static struct mlx5dr_dbg_dump_buff *
+mlx5dr_dbg_dump_data_init_new_buff(struct mlx5dr_dbg_dump_data *dump_data)
+{
+ struct mlx5dr_dbg_dump_buff *new_buff;
+
+ new_buff = kzalloc(sizeof(*new_buff), GFP_KERNEL);
+ if (!new_buff)
+ return NULL;
+
+ new_buff->buff = kvzalloc(MLX5DR_DEBUG_DUMP_BUFF_SIZE, GFP_KERNEL);
+ if (!new_buff->buff) {
+ kfree(new_buff);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&new_buff->node);
+ list_add_tail(&new_buff->node, &dump_data->buff_list);
+
+ return new_buff;
+}
+
+static struct mlx5dr_dbg_dump_data *
+mlx5dr_dbg_create_dump_data(void)
+{
+ struct mlx5dr_dbg_dump_data *dump_data;
+
+ dump_data = kzalloc(sizeof(*dump_data), GFP_KERNEL);
+ if (!dump_data)
+ return NULL;
+
+ INIT_LIST_HEAD(&dump_data->buff_list);
+
+ if (!mlx5dr_dbg_dump_data_init_new_buff(dump_data)) {
+ kfree(dump_data);
+ return NULL;
+ }
+
+ return dump_data;
+}
+
+static void
+mlx5dr_dbg_destroy_dump_data(struct mlx5dr_dbg_dump_data *dump_data)
+{
+ struct mlx5dr_dbg_dump_buff *dump_buff, *tmp_buff;
+
+ if (!dump_data)
+ return;
+
+ list_for_each_entry_safe(dump_buff, tmp_buff, &dump_data->buff_list, node) {
+ kvfree(dump_buff->buff);
+ list_del(&dump_buff->node);
+ kfree(dump_buff);
+ }
+
+ kfree(dump_data);
+}
+
+static int
+mlx5dr_dbg_dump_data_print(struct seq_file *file, char *str, u32 size)
+{
+ struct mlx5dr_domain *dmn = file->private;
+ struct mlx5dr_dbg_dump_data *dump_data;
+ struct mlx5dr_dbg_dump_buff *buff;
+ u32 buff_capacity, write_size;
+ int remain_size, ret;
+
+ if (size >= MLX5DR_DEBUG_DUMP_BUFF_SIZE)
+ return -EINVAL;
+
+ dump_data = dmn->dump_info.dump_data;
+ buff = list_last_entry(&dump_data->buff_list,
+ struct mlx5dr_dbg_dump_buff, node);
+
+ buff_capacity = (MLX5DR_DEBUG_DUMP_BUFF_SIZE - 1) - buff->index;
+ remain_size = buff_capacity - size;
+ write_size = (remain_size > 0) ? size : buff_capacity;
+
+ if (likely(write_size)) {
+ ret = snprintf(buff->buff + buff->index, write_size + 1, "%s", str);
+ if (ret < 0)
+ return ret;
+
+ buff->index += write_size;
+ }
+
+ if (remain_size < 0) {
+ remain_size *= -1;
+ buff = mlx5dr_dbg_dump_data_init_new_buff(dump_data);
+ if (!buff)
+ return -ENOMEM;
+
+ ret = snprintf(buff->buff, remain_size + 1, "%s", str + write_size);
+ if (ret < 0)
+ return ret;
+
+ buff->index += remain_size;
+ }
+
+ return 0;
+}
+
void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl)
{
mutex_lock(&tbl->dmn->dump_info.dbg_mutex);
@@ -109,36 +210,68 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
{
struct mlx5dr_action *action = action_mem->action;
const u64 action_id = DR_DBG_PTR_TO_ID(action);
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
u64 hit_tbl_ptr, miss_tbl_ptr;
u32 hit_tbl_id, miss_tbl_id;
+ int ret;
switch (action->action_type) {
case DR_ACTION_TYP_DROP:
- seq_printf(file, "%d,0x%llx,0x%llx\n",
- DR_DUMP_REC_TYPE_ACTION_DROP, action_id, rule_id);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_DROP, action_id,
+ rule_id);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_FT:
if (action->dest_tbl->is_fw_tbl)
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_FT, action_id,
- rule_id, action->dest_tbl->fw_tbl.id,
- -1);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_FT, action_id,
+ rule_id, action->dest_tbl->fw_tbl.id,
+ -1);
else
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx\n",
- DR_DUMP_REC_TYPE_ACTION_FT, action_id,
- rule_id, action->dest_tbl->tbl->table_id,
- DR_DBG_PTR_TO_ID(action->dest_tbl->tbl));
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_FT, action_id,
+ rule_id, action->dest_tbl->tbl->table_id,
+ DR_DBG_PTR_TO_ID(action->dest_tbl->tbl));
+
+ if (ret < 0)
+ return ret;
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_CTR:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_CTR, action_id, rule_id,
- action->ctr->ctr_id + action->ctr->offset);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_CTR, action_id, rule_id,
+ action->ctr->ctr_id + action->ctr->offset);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_TAG:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_TAG, action_id, rule_id,
- action->flow_tag->flow_tag);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_TAG, action_id, rule_id,
+ action->flow_tag->flow_tag);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_MODIFY_HDR:
{
@@ -150,83 +283,171 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
ptrn_arg = !action->rewrite->single_action_opt && ptrn && arg;
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x,%d,0x%x,0x%x,0x%x",
- DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id,
- rule_id, action->rewrite->index,
- action->rewrite->single_action_opt,
- ptrn_arg ? action->rewrite->num_of_actions : 0,
- ptrn_arg ? ptrn->index : 0,
- ptrn_arg ? mlx5dr_arg_get_obj_id(arg) : 0);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x,%d,0x%x,0x%x,0x%x",
+ DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id,
+ rule_id, action->rewrite->index,
+ action->rewrite->single_action_opt,
+ ptrn_arg ? action->rewrite->num_of_actions : 0,
+ ptrn_arg ? ptrn->index : 0,
+ ptrn_arg ? mlx5dr_arg_get_obj_id(arg) : 0);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
if (ptrn_arg) {
for (i = 0; i < action->rewrite->num_of_actions; i++) {
- seq_printf(file, ",0x%016llx",
- be64_to_cpu(((__be64 *)rewrite_data)[i]));
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ ",0x%016llx",
+ be64_to_cpu(((__be64 *)rewrite_data)[i]));
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
}
}
- seq_puts(file, "\n");
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH, "\n");
+ if (ret < 0)
+ return ret;
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
}
case DR_ACTION_TYP_VPORT:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id,
- action->vport->caps->num);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id,
+ action->vport->caps->num);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_TNL_L2_TO_L2:
- seq_printf(file, "%d,0x%llx,0x%llx\n",
- DR_DUMP_REC_TYPE_ACTION_DECAP_L2, action_id,
- rule_id);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L2, action_id,
+ rule_id);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_TNL_L3_TO_L2:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id,
- rule_id,
- (action->rewrite->ptrn && action->rewrite->arg) ?
- mlx5dr_arg_get_obj_id(action->rewrite->arg) :
- action->rewrite->index);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id,
+ rule_id,
+ (action->rewrite->ptrn && action->rewrite->arg) ?
+ mlx5dr_arg_get_obj_id(action->rewrite->arg) :
+ action->rewrite->index);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_L2_TO_TNL_L2:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_ENCAP_L2, action_id,
- rule_id, action->reformat->id);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L2, action_id,
+ rule_id, action->reformat->id);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_L2_TO_TNL_L3:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_ENCAP_L3, action_id,
- rule_id, action->reformat->id);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L3, action_id,
+ rule_id, action->reformat->id);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_POP_VLAN:
- seq_printf(file, "%d,0x%llx,0x%llx\n",
- DR_DUMP_REC_TYPE_ACTION_POP_VLAN, action_id,
- rule_id);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_POP_VLAN, action_id,
+ rule_id);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_PUSH_VLAN:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN, action_id,
- rule_id, action->push_vlan->vlan_hdr);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN, action_id,
+ rule_id, action->push_vlan->vlan_hdr);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_INSERT_HDR:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_INSERT_HDR, action_id,
- rule_id, action->reformat->id,
- action->reformat->param_0,
- action->reformat->param_1);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_INSERT_HDR, action_id,
+ rule_id, action->reformat->id,
+ action->reformat->param_0,
+ action->reformat->param_1);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_REMOVE_HDR:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR, action_id,
- rule_id, action->reformat->id,
- action->reformat->param_0,
- action->reformat->param_1);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR, action_id,
+ rule_id, action->reformat->id,
+ action->reformat->param_0,
+ action->reformat->param_1);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_SAMPLER:
- seq_printf(file,
- "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
- DR_DUMP_REC_TYPE_ACTION_SAMPLER, action_id, rule_id,
- 0, 0, action->sampler->sampler_id,
- action->sampler->rx_icm_addr,
- action->sampler->tx_icm_addr);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_SAMPLER, action_id,
+ rule_id, 0, 0, action->sampler->sampler_id,
+ action->sampler->rx_icm_addr,
+ action->sampler->tx_icm_addr);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
case DR_ACTION_TYP_RANGE:
if (action->range->hit_tbl_action->dest_tbl->is_fw_tbl) {
@@ -247,10 +468,17 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
DR_DBG_PTR_TO_ID(action->range->miss_tbl_action->dest_tbl->tbl);
}
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx,0x%x,0x%llx,0x%x\n",
- DR_DUMP_REC_TYPE_ACTION_MATCH_RANGE, action_id, rule_id,
- hit_tbl_id, hit_tbl_ptr, miss_tbl_id, miss_tbl_ptr,
- action->range->definer_id);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x,0x%llx,0x%x,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_MATCH_RANGE, action_id,
+ rule_id, hit_tbl_id, hit_tbl_ptr, miss_tbl_id,
+ miss_tbl_ptr, action->range->definer_id);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
break;
default:
return 0;
@@ -263,8 +491,10 @@ static int
dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
bool is_rx, const u64 rule_id, u8 format_ver)
{
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
char hw_ste_dump[DR_HEX_SIZE];
u32 mem_rec_type;
+ int ret;
if (format_ver == MLX5_STEERING_FORMAT_CONNECTX_5) {
mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 :
@@ -277,9 +507,16 @@ dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
dr_dump_hex_print(hw_ste_dump, (char *)mlx5dr_ste_get_hw_ste(ste),
DR_STE_SIZE_REDUCED);
- seq_printf(file, "%d,0x%llx,0x%llx,%s\n", mem_rec_type,
- dr_dump_icm_to_idx(mlx5dr_ste_get_icm_addr(ste)), rule_id,
- hw_ste_dump);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,%s\n", mem_rec_type,
+ dr_dump_icm_to_idx(mlx5dr_ste_get_icm_addr(ste)),
+ rule_id, hw_ste_dump);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
return 0;
}
@@ -309,6 +546,7 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
{
struct mlx5dr_rule_action_member *action_mem;
const u64 rule_id = DR_DBG_PTR_TO_ID(rule);
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
struct mlx5dr_rule_rx_tx *rx = &rule->rx;
struct mlx5dr_rule_rx_tx *tx = &rule->tx;
u8 format_ver;
@@ -316,8 +554,15 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
format_ver = rule->matcher->tbl->dmn->info.caps.sw_format_ver;
- seq_printf(file, "%d,0x%llx,0x%llx\n", DR_DUMP_REC_TYPE_RULE, rule_id,
- DR_DBG_PTR_TO_ID(rule->matcher));
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx\n", DR_DUMP_REC_TYPE_RULE,
+ rule_id, DR_DBG_PTR_TO_ID(rule->matcher));
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
if (rx->nic_matcher) {
ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver);
@@ -344,46 +589,94 @@ static int
dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
u8 criteria, const u64 matcher_id)
{
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
char dump[DR_HEX_SIZE];
+ int ret;
- seq_printf(file, "%d,0x%llx,", DR_DUMP_REC_TYPE_MATCHER_MASK,
- matcher_id);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH, "%d,0x%llx,",
+ DR_DUMP_REC_TYPE_MATCHER_MASK, matcher_id);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
if (criteria & DR_MATCHER_CRITERIA_OUTER) {
dr_dump_hex_print(dump, (char *)&mask->outer, sizeof(mask->outer));
- seq_printf(file, "%s,", dump);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%s,", dump);
} else {
- seq_puts(file, ",");
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH, ",");
}
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
+
if (criteria & DR_MATCHER_CRITERIA_INNER) {
dr_dump_hex_print(dump, (char *)&mask->inner, sizeof(mask->inner));
- seq_printf(file, "%s,", dump);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%s,", dump);
} else {
- seq_puts(file, ",");
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH, ",");
}
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
+
if (criteria & DR_MATCHER_CRITERIA_MISC) {
dr_dump_hex_print(dump, (char *)&mask->misc, sizeof(mask->misc));
- seq_printf(file, "%s,", dump);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%s,", dump);
} else {
- seq_puts(file, ",");
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH, ",");
}
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
+
if (criteria & DR_MATCHER_CRITERIA_MISC2) {
dr_dump_hex_print(dump, (char *)&mask->misc2, sizeof(mask->misc2));
- seq_printf(file, "%s,", dump);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%s,", dump);
} else {
- seq_puts(file, ",");
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH, ",");
}
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
+
if (criteria & DR_MATCHER_CRITERIA_MISC3) {
dr_dump_hex_print(dump, (char *)&mask->misc3, sizeof(mask->misc3));
- seq_printf(file, "%s\n", dump);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%s\n", dump);
} else {
- seq_puts(file, ",\n");
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH, ",\n");
}
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
+
return 0;
}
@@ -391,9 +684,19 @@ static int
dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
u32 index, bool is_rx, const u64 matcher_id)
{
- seq_printf(file, "%d,0x%llx,%d,%d,0x%x\n",
- DR_DUMP_REC_TYPE_MATCHER_BUILDER, matcher_id, index, is_rx,
- builder->lu_type);
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
+ int ret;
+
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,%d,%d,0x%x\n",
+ DR_DUMP_REC_TYPE_MATCHER_BUILDER, matcher_id, index,
+ is_rx, builder->lu_type);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
return 0;
}
@@ -403,6 +706,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
struct mlx5dr_matcher_rx_tx *matcher_rx_tx,
const u64 matcher_id)
{
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
enum dr_dump_rec_type rec_type;
u64 s_icm_addr, e_icm_addr;
int i, ret;
@@ -412,11 +716,19 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->s_htbl->chunk);
e_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->e_anchor->chunk);
- seq_printf(file, "%d,0x%llx,0x%llx,%d,0x%llx,0x%llx\n",
- rec_type, DR_DBG_PTR_TO_ID(matcher_rx_tx),
- matcher_id, matcher_rx_tx->num_of_builders,
- dr_dump_icm_to_idx(s_icm_addr),
- dr_dump_icm_to_idx(e_icm_addr));
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,%d,0x%llx,0x%llx\n",
+ rec_type, DR_DBG_PTR_TO_ID(matcher_rx_tx),
+ matcher_id, matcher_rx_tx->num_of_builders,
+ dr_dump_icm_to_idx(s_icm_addr),
+ dr_dump_icm_to_idx(e_icm_addr));
+
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
for (i = 0; i < matcher_rx_tx->num_of_builders; i++) {
ret = dr_dump_matcher_builder(file,
@@ -434,13 +746,22 @@ dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
{
struct mlx5dr_matcher_rx_tx *rx = &matcher->rx;
struct mlx5dr_matcher_rx_tx *tx = &matcher->tx;
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
u64 matcher_id;
int ret;
matcher_id = DR_DBG_PTR_TO_ID(matcher);
- seq_printf(file, "%d,0x%llx,0x%llx,%d\n", DR_DUMP_REC_TYPE_MATCHER,
- matcher_id, DR_DBG_PTR_TO_ID(matcher->tbl), matcher->prio);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,%d\n", DR_DUMP_REC_TYPE_MATCHER,
+ matcher_id, DR_DBG_PTR_TO_ID(matcher->tbl),
+ matcher->prio);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
ret = dr_dump_matcher_mask(file, &matcher->mask,
matcher->match_criteria, matcher_id);
@@ -486,15 +807,24 @@ dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
struct mlx5dr_table_rx_tx *table_rx_tx,
const u64 table_id)
{
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
enum dr_dump_rec_type rec_type;
u64 s_icm_addr;
+ int ret;
rec_type = is_rx ? DR_DUMP_REC_TYPE_TABLE_RX :
DR_DUMP_REC_TYPE_TABLE_TX;
s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(table_rx_tx->s_anchor->chunk);
- seq_printf(file, "%d,0x%llx,0x%llx\n", rec_type, table_id,
- dr_dump_icm_to_idx(s_icm_addr));
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx\n", rec_type, table_id,
+ dr_dump_icm_to_idx(s_icm_addr));
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
return 0;
}
@@ -503,11 +833,19 @@ static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
{
struct mlx5dr_table_rx_tx *rx = &table->rx;
struct mlx5dr_table_rx_tx *tx = &table->tx;
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
- seq_printf(file, "%d,0x%llx,0x%llx,%d,%d\n", DR_DUMP_REC_TYPE_TABLE,
- DR_DBG_PTR_TO_ID(table), DR_DBG_PTR_TO_ID(table->dmn),
- table->table_type, table->level);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,%d,%d\n", DR_DUMP_REC_TYPE_TABLE,
+ DR_DBG_PTR_TO_ID(table), DR_DBG_PTR_TO_ID(table->dmn),
+ table->table_type, table->level);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
if (rx->nic_dmn) {
ret = dr_dump_table_rx_tx(file, true, rx,
@@ -546,46 +884,86 @@ static int
dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
const u64 domain_id)
{
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n",
- DR_DUMP_REC_TYPE_DOMAIN_SEND_RING, DR_DBG_PTR_TO_ID(ring),
- domain_id, ring->cq->mcq.cqn, ring->qp->qpn);
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
+ int ret;
+
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%llx,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_DOMAIN_SEND_RING,
+ DR_DBG_PTR_TO_ID(ring), domain_id,
+ ring->cq->mcq.cqn, ring->qp->qpn);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
+
return 0;
}
-static int
+static noinline_for_stack int
dr_dump_domain_info_flex_parser(struct seq_file *file,
const char *flex_parser_name,
const u8 flex_parser_value,
const u64 domain_id)
{
- seq_printf(file, "%d,0x%llx,%s,0x%x\n",
- DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER, domain_id,
- flex_parser_name, flex_parser_value);
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
+ int ret;
+
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,%s,0x%x\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER, domain_id,
+ flex_parser_name, flex_parser_value);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
+
return 0;
}
-static int
+static noinline_for_stack int
dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
const u64 domain_id)
{
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
struct mlx5dr_cmd_vport_cap *vport_caps;
unsigned long i, vports_num;
+ int ret;
xa_for_each(&caps->vports.vports_caps_xa, vports_num, vport_caps)
; /* count the number of vports in xarray */
- seq_printf(file, "%d,0x%llx,0x%x,0x%llx,0x%llx,0x%x,%lu,%d\n",
- DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS, domain_id, caps->gvmi,
- caps->nic_rx_drop_address, caps->nic_tx_drop_address,
- caps->flex_protocols, vports_num, caps->eswitch_manager);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,0x%x,0x%llx,0x%llx,0x%x,%lu,%d\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS, domain_id, caps->gvmi,
+ caps->nic_rx_drop_address, caps->nic_tx_drop_address,
+ caps->flex_protocols, vports_num, caps->eswitch_manager);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
xa_for_each(&caps->vports.vports_caps_xa, i, vport_caps) {
vport_caps = xa_load(&caps->vports.vports_caps_xa, i);
- seq_printf(file, "%d,0x%llx,%lu,0x%x,0x%llx,0x%llx\n",
- DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT, domain_id, i,
- vport_caps->vport_gvmi, vport_caps->icm_address_rx,
- vport_caps->icm_address_tx);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,%lu,0x%x,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT,
+ domain_id, i, vport_caps->vport_gvmi,
+ vport_caps->icm_address_rx,
+ vport_caps->icm_address_tx);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
}
return 0;
}
@@ -627,24 +1005,32 @@ dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info,
return 0;
}
-static int
+static noinline_for_stack int
dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn)
{
+ char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
u64 domain_id = DR_DBG_PTR_TO_ID(dmn);
int ret;
- seq_printf(file, "%d,0x%llx,%d,0%x,%d,%u.%u.%u,%s,%d,%u,%u,%u\n",
- DR_DUMP_REC_TYPE_DOMAIN,
- domain_id, dmn->type, dmn->info.caps.gvmi,
- dmn->info.supp_sw_steering,
- /* package version */
- LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL,
- LINUX_VERSION_SUBLEVEL,
- pci_name(dmn->mdev->pdev),
- 0, /* domain flags */
- dmn->num_buddies[DR_ICM_TYPE_STE],
- dmn->num_buddies[DR_ICM_TYPE_MODIFY_ACTION],
- dmn->num_buddies[DR_ICM_TYPE_MODIFY_HDR_PTRN]);
+ ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
+ "%d,0x%llx,%d,0%x,%d,%u.%u.%u,%s,%d,%u,%u,%u\n",
+ DR_DUMP_REC_TYPE_DOMAIN,
+ domain_id, dmn->type, dmn->info.caps.gvmi,
+ dmn->info.supp_sw_steering,
+ /* package version */
+ LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL,
+ LINUX_VERSION_SUBLEVEL,
+ pci_name(dmn->mdev->pdev),
+ 0, /* domain flags */
+ dmn->num_buddies[DR_ICM_TYPE_STE],
+ dmn->num_buddies[DR_ICM_TYPE_MODIFY_ACTION],
+ dmn->num_buddies[DR_ICM_TYPE_MODIFY_HDR_PTRN]);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5dr_dbg_dump_data_print(file, buff, ret);
+ if (ret)
+ return ret;
ret = dr_dump_domain_info(file, &dmn->info, domain_id);
if (ret < 0)
@@ -683,11 +1069,91 @@ unlock_mutex:
return ret;
}
-static int dr_dump_show(struct seq_file *file, void *priv)
+static void *
+dr_dump_start(struct seq_file *file, loff_t *pos)
{
- return dr_dump_domain_all(file, file->private);
+ struct mlx5dr_domain *dmn = file->private;
+ struct mlx5dr_dbg_dump_data *dump_data;
+
+ if (atomic_read(&dmn->dump_info.state) != MLX5DR_DEBUG_DUMP_STATE_FREE) {
+ mlx5_core_warn(dmn->mdev, "Dump already in progress\n");
+ return ERR_PTR(-EBUSY);
+ }
+
+ atomic_set(&dmn->dump_info.state, MLX5DR_DEBUG_DUMP_STATE_IN_PROGRESS);
+ dump_data = dmn->dump_info.dump_data;
+
+ if (dump_data) {
+ return seq_list_start(&dump_data->buff_list, *pos);
+ } else if (*pos == 0) {
+ dump_data = mlx5dr_dbg_create_dump_data();
+ if (!dump_data)
+ goto exit;
+
+ dmn->dump_info.dump_data = dump_data;
+ if (dr_dump_domain_all(file, dmn)) {
+ mlx5dr_dbg_destroy_dump_data(dump_data);
+ dmn->dump_info.dump_data = NULL;
+ goto exit;
+ }
+
+ return seq_list_start(&dump_data->buff_list, *pos);
+ }
+
+exit:
+ atomic_set(&dmn->dump_info.state, MLX5DR_DEBUG_DUMP_STATE_FREE);
+ return NULL;
}
-DEFINE_SHOW_ATTRIBUTE(dr_dump);
+
+static void *
+dr_dump_next(struct seq_file *file, void *v, loff_t *pos)
+{
+ struct mlx5dr_domain *dmn = file->private;
+ struct mlx5dr_dbg_dump_data *dump_data;
+
+ dump_data = dmn->dump_info.dump_data;
+
+ return seq_list_next(v, &dump_data->buff_list, pos);
+}
+
+static void
+dr_dump_stop(struct seq_file *file, void *v)
+{
+ struct mlx5dr_domain *dmn = file->private;
+ struct mlx5dr_dbg_dump_data *dump_data;
+
+ if (v && IS_ERR(v))
+ return;
+
+ if (!v) {
+ dump_data = dmn->dump_info.dump_data;
+ if (dump_data) {
+ mlx5dr_dbg_destroy_dump_data(dump_data);
+ dmn->dump_info.dump_data = NULL;
+ }
+ }
+
+ atomic_set(&dmn->dump_info.state, MLX5DR_DEBUG_DUMP_STATE_FREE);
+}
+
+static int
+dr_dump_show(struct seq_file *file, void *v)
+{
+ struct mlx5dr_dbg_dump_buff *entry;
+
+ entry = list_entry(v, struct mlx5dr_dbg_dump_buff, node);
+ seq_printf(file, "%s", entry->buff);
+
+ return 0;
+}
+
+static const struct seq_operations dr_dump_sops = {
+ .start = dr_dump_start,
+ .next = dr_dump_next,
+ .stop = dr_dump_stop,
+ .show = dr_dump_show,
+};
+DEFINE_SEQ_ATTRIBUTE(dr_dump);
void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h
index def6cf853eea..57c6b363b870 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h
@@ -1,10 +1,30 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+#define MLX5DR_DEBUG_DUMP_BUFF_SIZE (64 * 1024 * 1024)
+#define MLX5DR_DEBUG_DUMP_BUFF_LENGTH 512
+
+enum {
+ MLX5DR_DEBUG_DUMP_STATE_FREE,
+ MLX5DR_DEBUG_DUMP_STATE_IN_PROGRESS,
+};
+
+struct mlx5dr_dbg_dump_buff {
+ char *buff;
+ u32 index;
+ struct list_head node;
+};
+
+struct mlx5dr_dbg_dump_data {
+ struct list_head buff_list;
+};
+
struct mlx5dr_dbg_dump_info {
struct mutex dbg_mutex; /* protect dbg lists */
struct dentry *steering_debugfs;
struct dentry *fdb_debugfs;
+ struct mlx5dr_dbg_dump_data *dump_data;
+ atomic_t state;
};
void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 21753f327868..1005bb6935b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -440,6 +440,27 @@ out:
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
+int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 *out;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, 0, out);
+ if (err)
+ goto out;
+
+ *sd_group = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.sd_group);
+out:
+ kvfree(out);
+ return err;
+}
+
int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
{
u32 *out;
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c
index 253d7ad9b809..8b63968bbee9 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c
@@ -124,6 +124,41 @@ static void mlxbf_gige_get_pauseparam(struct net_device *netdev,
pause->tx_pause = 1;
}
+static bool mlxbf_gige_llu_counters_enabled(struct mlxbf_gige *priv)
+{
+ u32 data;
+
+ if (priv->hw_version == MLXBF_GIGE_VERSION_BF2) {
+ data = readl(priv->llu_base + MLXBF_GIGE_BF2_LLU_GENERAL_CONFIG);
+ if (data & MLXBF_GIGE_BF2_LLU_COUNTERS_EN)
+ return true;
+ } else {
+ data = readl(priv->llu_base + MLXBF_GIGE_BF3_LLU_GENERAL_CONFIG);
+ if (data & MLXBF_GIGE_BF3_LLU_COUNTERS_EN)
+ return true;
+ }
+
+ return false;
+}
+
+static void mlxbf_gige_get_pause_stats(struct net_device *netdev,
+ struct ethtool_pause_stats *pause_stats)
+{
+ struct mlxbf_gige *priv = netdev_priv(netdev);
+ u64 data_lo, data_hi;
+
+ /* Read LLU counters to provide stats only if counters are enabled */
+ if (mlxbf_gige_llu_counters_enabled(priv)) {
+ data_lo = readl(priv->llu_base + MLXBF_GIGE_TX_PAUSE_CNT_LO);
+ data_hi = readl(priv->llu_base + MLXBF_GIGE_TX_PAUSE_CNT_HI);
+ pause_stats->tx_pause_frames = (data_hi << 32) | data_lo;
+
+ data_lo = readl(priv->llu_base + MLXBF_GIGE_RX_PAUSE_CNT_LO);
+ data_hi = readl(priv->llu_base + MLXBF_GIGE_RX_PAUSE_CNT_HI);
+ pause_stats->rx_pause_frames = (data_hi << 32) | data_lo;
+ }
+}
+
const struct ethtool_ops mlxbf_gige_ethtool_ops = {
.get_link = ethtool_op_get_link,
.get_ringparam = mlxbf_gige_get_ringparam,
@@ -134,6 +169,7 @@ const struct ethtool_ops mlxbf_gige_ethtool_ops = {
.get_ethtool_stats = mlxbf_gige_get_ethtool_stats,
.nway_reset = phy_ethtool_nway_reset,
.get_pauseparam = mlxbf_gige_get_pauseparam,
+ .get_pause_stats = mlxbf_gige_get_pause_stats,
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
};
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
index cd0973229c9b..98a8681c21b9 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
@@ -99,4 +99,34 @@
#define MLXBF_GIGE_100M_IPG_SIZE 119
#define MLXBF_GIGE_10M_IPG_SIZE 1199
+/* Offsets into OOB LLU block for pause frame counters */
+#define MLXBF_GIGE_BF2_TX_PAUSE_CNT_HI 0x33d8
+#define MLXBF_GIGE_BF2_TX_PAUSE_CNT_LO 0x33dc
+#define MLXBF_GIGE_BF2_RX_PAUSE_CNT_HI 0x3210
+#define MLXBF_GIGE_BF2_RX_PAUSE_CNT_LO 0x3214
+
+#define MLXBF_GIGE_BF3_TX_PAUSE_CNT_HI 0x3a88
+#define MLXBF_GIGE_BF3_TX_PAUSE_CNT_LO 0x3a8c
+#define MLXBF_GIGE_BF3_RX_PAUSE_CNT_HI 0x38c0
+#define MLXBF_GIGE_BF3_RX_PAUSE_CNT_LO 0x38c4
+
+#define MLXBF_GIGE_TX_PAUSE_CNT_HI ((priv->hw_version == MLXBF_GIGE_VERSION_BF2) ? \
+ MLXBF_GIGE_BF2_TX_PAUSE_CNT_HI : \
+ MLXBF_GIGE_BF3_TX_PAUSE_CNT_HI)
+#define MLXBF_GIGE_TX_PAUSE_CNT_LO ((priv->hw_version == MLXBF_GIGE_VERSION_BF2) ? \
+ MLXBF_GIGE_BF2_TX_PAUSE_CNT_LO : \
+ MLXBF_GIGE_BF3_TX_PAUSE_CNT_LO)
+#define MLXBF_GIGE_RX_PAUSE_CNT_HI ((priv->hw_version == MLXBF_GIGE_VERSION_BF2) ? \
+ MLXBF_GIGE_BF2_RX_PAUSE_CNT_HI : \
+ MLXBF_GIGE_BF3_RX_PAUSE_CNT_HI)
+#define MLXBF_GIGE_RX_PAUSE_CNT_LO ((priv->hw_version == MLXBF_GIGE_VERSION_BF2) ? \
+ MLXBF_GIGE_BF2_RX_PAUSE_CNT_LO : \
+ MLXBF_GIGE_BF3_RX_PAUSE_CNT_LO)
+
+#define MLXBF_GIGE_BF2_LLU_GENERAL_CONFIG 0x2110
+#define MLXBF_GIGE_BF3_LLU_GENERAL_CONFIG 0x2030
+
+#define MLXBF_GIGE_BF2_LLU_COUNTERS_EN BIT(0)
+#define MLXBF_GIGE_BF3_LLU_COUNTERS_EN BIT(4)
+
#endif /* !defined(__MLXBF_GIGE_REGS_H__) */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index faa63ea9b83e..1915fa41c622 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -95,7 +95,7 @@ struct mlxsw_afa_set {
*/
has_trap:1,
has_police:1;
- unsigned int ref_count;
+ refcount_t ref_count;
struct mlxsw_afa_set *next; /* Pointer to the next set. */
struct mlxsw_afa_set *prev; /* Pointer to the previous set,
* note that set may have multiple
@@ -120,7 +120,7 @@ struct mlxsw_afa_fwd_entry {
struct rhash_head ht_node;
struct mlxsw_afa_fwd_entry_ht_key ht_key;
u32 kvdl_index;
- unsigned int ref_count;
+ refcount_t ref_count;
};
static const struct rhashtable_params mlxsw_afa_fwd_entry_ht_params = {
@@ -282,7 +282,7 @@ static struct mlxsw_afa_set *mlxsw_afa_set_create(bool is_first)
/* Need to initialize the set to pass by default */
mlxsw_afa_set_goto_set(set, MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0);
set->ht_key.is_first = is_first;
- set->ref_count = 1;
+ refcount_set(&set->ref_count, 1);
return set;
}
@@ -330,7 +330,7 @@ static void mlxsw_afa_set_unshare(struct mlxsw_afa *mlxsw_afa,
static void mlxsw_afa_set_put(struct mlxsw_afa *mlxsw_afa,
struct mlxsw_afa_set *set)
{
- if (--set->ref_count)
+ if (!refcount_dec_and_test(&set->ref_count))
return;
if (set->shared)
mlxsw_afa_set_unshare(mlxsw_afa, set);
@@ -350,7 +350,7 @@ static struct mlxsw_afa_set *mlxsw_afa_set_get(struct mlxsw_afa *mlxsw_afa,
set = rhashtable_lookup_fast(&mlxsw_afa->set_ht, &orig_set->ht_key,
mlxsw_afa_set_ht_params);
if (set) {
- set->ref_count++;
+ refcount_inc(&set->ref_count);
mlxsw_afa_set_put(mlxsw_afa, orig_set);
} else {
set = orig_set;
@@ -564,7 +564,7 @@ mlxsw_afa_fwd_entry_create(struct mlxsw_afa *mlxsw_afa, u16 local_port)
if (!fwd_entry)
return ERR_PTR(-ENOMEM);
fwd_entry->ht_key.local_port = local_port;
- fwd_entry->ref_count = 1;
+ refcount_set(&fwd_entry->ref_count, 1);
err = rhashtable_insert_fast(&mlxsw_afa->fwd_entry_ht,
&fwd_entry->ht_node,
@@ -607,7 +607,7 @@ mlxsw_afa_fwd_entry_get(struct mlxsw_afa *mlxsw_afa, u16 local_port)
fwd_entry = rhashtable_lookup_fast(&mlxsw_afa->fwd_entry_ht, &ht_key,
mlxsw_afa_fwd_entry_ht_params);
if (fwd_entry) {
- fwd_entry->ref_count++;
+ refcount_inc(&fwd_entry->ref_count);
return fwd_entry;
}
return mlxsw_afa_fwd_entry_create(mlxsw_afa, local_port);
@@ -616,7 +616,7 @@ mlxsw_afa_fwd_entry_get(struct mlxsw_afa *mlxsw_afa, u16 local_port)
static void mlxsw_afa_fwd_entry_put(struct mlxsw_afa *mlxsw_afa,
struct mlxsw_afa_fwd_entry *fwd_entry)
{
- if (--fwd_entry->ref_count)
+ if (!refcount_dec_and_test(&fwd_entry->ref_count))
return;
mlxsw_afa_fwd_entry_destroy(mlxsw_afa, fwd_entry);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
index 0d5e6f9b466e..947500f8ed71 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
@@ -5,6 +5,7 @@
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/errno.h>
+#include <linux/refcount.h>
#include "item.h"
#include "core_acl_flex_keys.h"
@@ -107,7 +108,7 @@ EXPORT_SYMBOL(mlxsw_afk_destroy);
struct mlxsw_afk_key_info {
struct list_head list;
- unsigned int ref_count;
+ refcount_t ref_count;
unsigned int blocks_count;
int element_to_block[MLXSW_AFK_ELEMENT_MAX]; /* index is element, value
* is index inside "blocks"
@@ -334,7 +335,7 @@ mlxsw_afk_key_info_create(struct mlxsw_afk *mlxsw_afk,
if (err)
goto err_picker;
list_add(&key_info->list, &mlxsw_afk->key_info_list);
- key_info->ref_count = 1;
+ refcount_set(&key_info->ref_count, 1);
return key_info;
err_picker:
@@ -356,7 +357,7 @@ mlxsw_afk_key_info_get(struct mlxsw_afk *mlxsw_afk,
key_info = mlxsw_afk_key_info_find(mlxsw_afk, elusage);
if (key_info) {
- key_info->ref_count++;
+ refcount_inc(&key_info->ref_count);
return key_info;
}
return mlxsw_afk_key_info_create(mlxsw_afk, elusage);
@@ -365,7 +366,7 @@ EXPORT_SYMBOL(mlxsw_afk_key_info_get);
void mlxsw_afk_key_info_put(struct mlxsw_afk_key_info *key_info)
{
- if (--key_info->ref_count)
+ if (!refcount_dec_and_test(&key_info->ref_count))
return;
mlxsw_afk_key_info_destroy(key_info);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index 6b98c3287b49..f0ceb196a6ce 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -708,7 +708,6 @@ static const struct i2c_device_id mlxsw_m_i2c_id[] = {
static struct i2c_driver mlxsw_m_i2c_driver = {
.driver.name = "mlxsw_minimal",
- .class = I2C_CLASS_HWMON,
.id_table = mlxsw_m_i2c_id,
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5d3413636a62..bb642e9bb6cf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -176,13 +176,15 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 16, 16);
MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4);
int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
- unsigned int counter_index, u64 *packets,
- u64 *bytes)
+ unsigned int counter_index, bool clear,
+ u64 *packets, u64 *bytes)
{
+ enum mlxsw_reg_mgpc_opcode op = clear ? MLXSW_REG_MGPC_OPCODE_CLEAR :
+ MLXSW_REG_MGPC_OPCODE_NOP;
char mgpc_pl[MLXSW_REG_MGPC_LEN];
int err;
- mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP,
+ mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, op,
MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
if (err)
@@ -2695,23 +2697,18 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
static int mlxsw_sp_lag_pgt_init(struct mlxsw_sp *mlxsw_sp)
{
char sgcr_pl[MLXSW_REG_SGCR_LEN];
- u16 max_lag;
int err;
if (mlxsw_core_lag_mode(mlxsw_sp->core) !=
MLXSW_CMD_MBOX_CONFIG_PROFILE_LAG_MODE_SW)
return 0;
- err = mlxsw_core_max_lag(mlxsw_sp->core, &max_lag);
- if (err)
- return err;
-
/* In DDD mode, which we by default use, each LAG entry is 8 PGT
* entries. The LAG table address needs to be 8-aligned, but that ought
* to be the case, since the LAG table is allocated first.
*/
err = mlxsw_sp_pgt_mid_alloc_range(mlxsw_sp, &mlxsw_sp->lag_pgt_base,
- max_lag * 8);
+ mlxsw_sp->max_lag * 8);
if (err)
return err;
if (WARN_ON_ONCE(mlxsw_sp->lag_pgt_base % 8)) {
@@ -2728,33 +2725,31 @@ static int mlxsw_sp_lag_pgt_init(struct mlxsw_sp *mlxsw_sp)
err_mid_alloc_range:
mlxsw_sp_pgt_mid_free_range(mlxsw_sp, mlxsw_sp->lag_pgt_base,
- max_lag * 8);
+ mlxsw_sp->max_lag * 8);
return err;
}
static void mlxsw_sp_lag_pgt_fini(struct mlxsw_sp *mlxsw_sp)
{
- u16 max_lag;
- int err;
-
if (mlxsw_core_lag_mode(mlxsw_sp->core) !=
MLXSW_CMD_MBOX_CONFIG_PROFILE_LAG_MODE_SW)
return;
- err = mlxsw_core_max_lag(mlxsw_sp->core, &max_lag);
- if (err)
- return;
-
mlxsw_sp_pgt_mid_free_range(mlxsw_sp, mlxsw_sp->lag_pgt_base,
- max_lag * 8);
+ mlxsw_sp->max_lag * 8);
}
#define MLXSW_SP_LAG_SEED_INIT 0xcafecafe
+struct mlxsw_sp_lag {
+ struct net_device *dev;
+ refcount_t ref_count;
+ u16 lag_id;
+};
+
static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
{
char slcr_pl[MLXSW_REG_SLCR_LEN];
- u16 max_lag;
u32 seed;
int err;
@@ -2773,7 +2768,7 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
if (err)
return err;
- err = mlxsw_core_max_lag(mlxsw_sp->core, &max_lag);
+ err = mlxsw_core_max_lag(mlxsw_sp->core, &mlxsw_sp->max_lag);
if (err)
return err;
@@ -2784,7 +2779,7 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
if (err)
return err;
- mlxsw_sp->lags = kcalloc(max_lag, sizeof(struct mlxsw_sp_upper),
+ mlxsw_sp->lags = kcalloc(mlxsw_sp->max_lag, sizeof(struct mlxsw_sp_lag),
GFP_KERNEL);
if (!mlxsw_sp->lags) {
err = -ENOMEM;
@@ -4269,19 +4264,48 @@ mlxsw_sp_port_lag_uppers_cleanup(struct mlxsw_sp_port *mlxsw_sp_port,
}
}
-static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+static struct mlxsw_sp_lag *
+mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev,
+ struct netlink_ext_ack *extack)
{
char sldr_pl[MLXSW_REG_SLDR_LEN];
+ struct mlxsw_sp_lag *lag;
+ u16 lag_id;
+ int i, err;
+
+ for (i = 0; i < mlxsw_sp->max_lag; i++) {
+ if (!mlxsw_sp->lags[i].dev)
+ break;
+ }
+
+ if (i == mlxsw_sp->max_lag) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Exceeded number of supported LAG devices");
+ return ERR_PTR(-EBUSY);
+ }
+ lag_id = i;
mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id);
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+ if (err)
+ return ERR_PTR(err);
+
+ lag = &mlxsw_sp->lags[lag_id];
+ lag->lag_id = lag_id;
+ lag->dev = lag_dev;
+ refcount_set(&lag->ref_count, 1);
+
+ return lag;
}
-static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+static int
+mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lag *lag)
{
char sldr_pl[MLXSW_REG_SLDR_LEN];
- mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id);
+ lag->dev = NULL;
+
+ mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag->lag_id);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
}
@@ -4329,34 +4353,44 @@ static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
}
-static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp,
- struct net_device *lag_dev,
- u16 *p_lag_id)
+static struct mlxsw_sp_lag *
+mlxsw_sp_lag_find(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev)
{
- struct mlxsw_sp_upper *lag;
- int free_lag_id = -1;
- u16 max_lag;
- int err, i;
+ int i;
- err = mlxsw_core_max_lag(mlxsw_sp->core, &max_lag);
- if (err)
- return err;
+ for (i = 0; i < mlxsw_sp->max_lag; i++) {
+ if (!mlxsw_sp->lags[i].dev)
+ continue;
- for (i = 0; i < max_lag; i++) {
- lag = mlxsw_sp_lag_get(mlxsw_sp, i);
- if (lag->ref_count) {
- if (lag->dev == lag_dev) {
- *p_lag_id = i;
- return 0;
- }
- } else if (free_lag_id < 0) {
- free_lag_id = i;
- }
+ if (mlxsw_sp->lags[i].dev == lag_dev)
+ return &mlxsw_sp->lags[i];
}
- if (free_lag_id < 0)
- return -EBUSY;
- *p_lag_id = free_lag_id;
- return 0;
+
+ return NULL;
+}
+
+static struct mlxsw_sp_lag *
+mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_lag *lag;
+
+ lag = mlxsw_sp_lag_find(mlxsw_sp, lag_dev);
+ if (lag) {
+ refcount_inc(&lag->ref_count);
+ return lag;
+ }
+
+ return mlxsw_sp_lag_create(mlxsw_sp, lag_dev, extack);
+}
+
+static void
+mlxsw_sp_lag_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lag *lag)
+{
+ if (!refcount_dec_and_test(&lag->ref_count))
+ return;
+
+ mlxsw_sp_lag_destroy(mlxsw_sp, lag);
}
static bool
@@ -4365,12 +4399,6 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
struct netdev_lag_upper_info *lag_upper_info,
struct netlink_ext_ack *extack)
{
- u16 lag_id;
-
- if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
- NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices");
- return false;
- }
if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
return false;
@@ -4482,22 +4510,16 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
struct netlink_ext_ack *extack)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
- struct mlxsw_sp_upper *lag;
+ struct mlxsw_sp_lag *lag;
u16 lag_id;
u8 port_index;
int err;
- err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id);
- if (err)
- return err;
- lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
- if (!lag->ref_count) {
- err = mlxsw_sp_lag_create(mlxsw_sp, lag_id);
- if (err)
- return err;
- lag->dev = lag_dev;
- }
+ lag = mlxsw_sp_lag_get(mlxsw_sp, lag_dev, extack);
+ if (IS_ERR(lag))
+ return PTR_ERR(lag);
+ lag_id = lag->lag_id;
err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index);
if (err)
return err;
@@ -4515,7 +4537,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
mlxsw_sp_port->local_port);
mlxsw_sp_port->lag_id = lag_id;
mlxsw_sp_port->lagged = 1;
- lag->ref_count++;
err = mlxsw_sp_fid_port_join_lag(mlxsw_sp_port);
if (err)
@@ -4542,7 +4563,6 @@ err_replay:
err_router_join:
mlxsw_sp_fid_port_leave_lag(mlxsw_sp_port);
err_fid_port_join_lag:
- lag->ref_count--;
mlxsw_sp_port->lagged = 0;
mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id,
mlxsw_sp_port->local_port);
@@ -4550,8 +4570,7 @@ err_fid_port_join_lag:
err_col_port_add:
mlxsw_sp_lag_uppers_bridge_leave(mlxsw_sp_port, lag_dev);
err_lag_uppers_bridge_join:
- if (!lag->ref_count)
- mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
+ mlxsw_sp_lag_put(mlxsw_sp, lag);
return err;
}
@@ -4560,12 +4579,11 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u16 lag_id = mlxsw_sp_port->lag_id;
- struct mlxsw_sp_upper *lag;
+ struct mlxsw_sp_lag *lag;
if (!mlxsw_sp_port->lagged)
return;
- lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
- WARN_ON(lag->ref_count == 0);
+ lag = &mlxsw_sp->lags[lag_id];
mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
@@ -4579,13 +4597,11 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
mlxsw_sp_fid_port_leave_lag(mlxsw_sp_port);
- if (lag->ref_count == 1)
- mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
+ mlxsw_sp_lag_put(mlxsw_sp, lag);
mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id,
mlxsw_sp_port->local_port);
mlxsw_sp_port->lagged = 0;
- lag->ref_count--;
/* Make sure untagged frames are allowed to ingress */
mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index a0c9775fa955..3beb5d0847ab 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -78,11 +78,6 @@ struct mlxsw_sp_span_entry;
enum mlxsw_sp_l3proto;
union mlxsw_sp_l3addr;
-struct mlxsw_sp_upper {
- struct net_device *dev;
- unsigned int ref_count;
-};
-
enum mlxsw_sp_rif_type {
MLXSW_SP_RIF_TYPE_SUBPORT,
MLXSW_SP_RIF_TYPE_VLAN,
@@ -136,6 +131,7 @@ struct mlxsw_sp_span_ops;
struct mlxsw_sp_qdisc_state;
struct mlxsw_sp_mall_entry;
struct mlxsw_sp_pgt;
+struct mlxsw_sp_lag;
struct mlxsw_sp_port_mapping {
u8 module;
@@ -164,7 +160,8 @@ struct mlxsw_sp {
const struct mlxsw_bus_info *bus_info;
unsigned char base_mac[ETH_ALEN];
const unsigned char *mac_mask;
- struct mlxsw_sp_upper *lags;
+ struct mlxsw_sp_lag *lags;
+ u16 max_lag;
struct mlxsw_sp_port_mapping *port_mapping;
struct mlxsw_sp_port_mapping_events port_mapping_events;
struct rhashtable sample_trigger_ht;
@@ -257,12 +254,6 @@ struct mlxsw_sp_fid_core_ops {
void (*fini)(struct mlxsw_sp *mlxsw_sp);
};
-static inline struct mlxsw_sp_upper *
-mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
-{
- return &mlxsw_sp->lags[lag_id];
-}
-
struct mlxsw_sp_port_pcpu_stats {
u64 rx_packets;
u64 rx_bytes;
@@ -715,8 +706,8 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
u16 vid_end, bool is_member, bool untagged);
int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
- unsigned int counter_index, u64 *packets,
- u64 *bytes);
+ unsigned int counter_index, bool clear,
+ u64 *packets, u64 *bytes);
int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp,
unsigned int *p_counter_index);
void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 7c59c8a13584..3e70cee4d2f3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -9,6 +9,7 @@
#include <linux/rhashtable.h>
#include <linux/netdevice.h>
#include <linux/mutex.h>
+#include <linux/refcount.h>
#include <net/net_namespace.h>
#include <net/tc_act/tc_vlan.h>
@@ -55,7 +56,7 @@ struct mlxsw_sp_acl_ruleset {
struct rhash_head ht_node; /* Member of acl HT */
struct mlxsw_sp_acl_ruleset_ht_key ht_key;
struct rhashtable rule_ht;
- unsigned int ref_count;
+ refcount_t ref_count;
unsigned int min_prio;
unsigned int max_prio;
unsigned long priv[];
@@ -99,7 +100,7 @@ static bool
mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset)
{
/* We hold a reference on ruleset ourselves */
- return ruleset->ref_count == 2;
+ return refcount_read(&ruleset->ref_count) == 2;
}
int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
@@ -176,7 +177,7 @@ mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
ruleset = kzalloc(alloc_size, GFP_KERNEL);
if (!ruleset)
return ERR_PTR(-ENOMEM);
- ruleset->ref_count = 1;
+ refcount_set(&ruleset->ref_count, 1);
ruleset->ht_key.block = block;
ruleset->ht_key.chain_index = chain_index;
ruleset->ht_key.ops = ops;
@@ -222,13 +223,13 @@ static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_acl_ruleset_ref_inc(struct mlxsw_sp_acl_ruleset *ruleset)
{
- ruleset->ref_count++;
+ refcount_inc(&ruleset->ref_count);
}
static void mlxsw_sp_acl_ruleset_ref_dec(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_ruleset *ruleset)
{
- if (--ruleset->ref_count)
+ if (!refcount_dec_and_test(&ruleset->ref_count))
return;
mlxsw_sp_acl_ruleset_destroy(mlxsw_sp, ruleset);
}
@@ -1023,7 +1024,7 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
rulei = mlxsw_sp_acl_rule_rulei(rule);
if (rulei->counter_valid) {
err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index,
- &current_packets,
+ false, &current_packets,
&current_bytes);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index 50ea1eff02b2..f20052776b3f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -9,6 +9,7 @@
#include <linux/rhashtable.h>
#include <linux/netdevice.h>
#include <linux/mutex.h>
+#include <linux/refcount.h>
#include <net/devlink.h>
#include <trace/events/mlxsw.h>
@@ -155,7 +156,7 @@ struct mlxsw_sp_acl_tcam_vregion {
struct mlxsw_sp_acl_tcam_rehash_ctx ctx;
} rehash;
struct mlxsw_sp *mlxsw_sp;
- unsigned int ref_count;
+ refcount_t ref_count;
};
struct mlxsw_sp_acl_tcam_vchunk;
@@ -176,7 +177,7 @@ struct mlxsw_sp_acl_tcam_vchunk {
unsigned int priority; /* Priority within the vregion and group */
struct mlxsw_sp_acl_tcam_vgroup *vgroup;
struct mlxsw_sp_acl_tcam_vregion *vregion;
- unsigned int ref_count;
+ refcount_t ref_count;
};
struct mlxsw_sp_acl_tcam_entry {
@@ -769,7 +770,7 @@ mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp,
vregion->tcam = tcam;
vregion->mlxsw_sp = mlxsw_sp;
vregion->vgroup = vgroup;
- vregion->ref_count = 1;
+ refcount_set(&vregion->ref_count, 1);
vregion->key_info = mlxsw_afk_key_info_get(afk, elusage);
if (IS_ERR(vregion->key_info)) {
@@ -856,7 +857,7 @@ mlxsw_sp_acl_tcam_vregion_get(struct mlxsw_sp *mlxsw_sp,
*/
return ERR_PTR(-EOPNOTSUPP);
}
- vregion->ref_count++;
+ refcount_inc(&vregion->ref_count);
return vregion;
}
@@ -871,7 +872,7 @@ static void
mlxsw_sp_acl_tcam_vregion_put(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_tcam_vregion *vregion)
{
- if (--vregion->ref_count)
+ if (!refcount_dec_and_test(&vregion->ref_count))
return;
mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion);
}
@@ -924,7 +925,7 @@ mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp,
INIT_LIST_HEAD(&vchunk->ventry_list);
vchunk->priority = priority;
vchunk->vgroup = vgroup;
- vchunk->ref_count = 1;
+ refcount_set(&vchunk->ref_count, 1);
vregion = mlxsw_sp_acl_tcam_vregion_get(mlxsw_sp, vgroup,
priority, elusage);
@@ -1008,7 +1009,7 @@ mlxsw_sp_acl_tcam_vchunk_get(struct mlxsw_sp *mlxsw_sp,
if (WARN_ON(!mlxsw_afk_key_info_subset(vchunk->vregion->key_info,
elusage)))
return ERR_PTR(-EINVAL);
- vchunk->ref_count++;
+ refcount_inc(&vchunk->ref_count);
return vchunk;
}
return mlxsw_sp_acl_tcam_vchunk_create(mlxsw_sp, vgroup,
@@ -1019,7 +1020,7 @@ static void
mlxsw_sp_acl_tcam_vchunk_put(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_tcam_vchunk *vchunk)
{
- if (--vchunk->ref_count)
+ if (!refcount_dec_and_test(&vchunk->ref_count))
return;
mlxsw_sp_acl_tcam_vchunk_destroy(mlxsw_sp, vchunk);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index c8a356accdf8..ca80af06465f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -1181,9 +1181,11 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
char ratr_pl[MLXSW_REG_RATR_LEN];
struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_nexthop *nh;
+ unsigned int n_done = 0;
u32 adj_hash_index = 0;
u32 adj_index = 0;
u32 adj_size = 0;
+ int err;
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
if (!mlxsw_sp_nexthop_is_forward(nh) ||
@@ -1192,15 +1194,27 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
&adj_hash_index);
- if (enable)
- mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
- else
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ if (enable) {
+ err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
+ if (err)
+ goto err_counter_enable;
+ } else {
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
+ }
mlxsw_sp_nexthop_eth_update(mlxsw_sp,
adj_index + adj_hash_index, nh,
true, ratr_pl);
+ n_done++;
}
return 0;
+
+err_counter_enable:
+ mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
+ if (!n_done--)
+ break;
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
+ }
+ return err;
}
static u64
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
index 221aa6a474eb..01d81ae3662a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
@@ -361,7 +361,7 @@ static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_mr_tcam_route *route = route_priv;
return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index,
- packets, bytes);
+ false, packets, bytes);
}
static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 7164f9e6370f..40ba314fbc72 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -19,6 +19,7 @@
#include <linux/net_namespace.h>
#include <linux/mutex.h>
#include <linux/genalloc.h>
+#include <linux/xarray.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
@@ -501,7 +502,7 @@ struct mlxsw_sp_rt6 {
struct mlxsw_sp_lpm_tree {
u8 id; /* tree ID */
- unsigned int ref_count;
+ refcount_t ref_count;
enum mlxsw_sp_l3proto proto;
unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
struct mlxsw_sp_prefix_usage prefix_usage;
@@ -578,7 +579,7 @@ mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
lpm_tree = &mlxsw_sp->router->lpm.trees[i];
- if (lpm_tree->ref_count == 0)
+ if (refcount_read(&lpm_tree->ref_count) == 0)
return lpm_tree;
}
return NULL;
@@ -654,7 +655,7 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
sizeof(lpm_tree->prefix_usage));
memset(&lpm_tree->prefix_ref_count, 0,
sizeof(lpm_tree->prefix_ref_count));
- lpm_tree->ref_count = 1;
+ refcount_set(&lpm_tree->ref_count, 1);
return lpm_tree;
err_left_struct_set:
@@ -678,7 +679,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
lpm_tree = &mlxsw_sp->router->lpm.trees[i];
- if (lpm_tree->ref_count != 0 &&
+ if (refcount_read(&lpm_tree->ref_count) &&
lpm_tree->proto == proto &&
mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
prefix_usage)) {
@@ -691,14 +692,15 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
{
- lpm_tree->ref_count++;
+ refcount_inc(&lpm_tree->ref_count);
}
static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_lpm_tree *lpm_tree)
{
- if (--lpm_tree->ref_count == 0)
- mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
+ if (!refcount_dec_and_test(&lpm_tree->ref_count))
+ return;
+ mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
}
#define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
@@ -2250,7 +2252,7 @@ int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
return -EINVAL;
return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
- p_counter, NULL);
+ false, p_counter, NULL);
}
static struct mlxsw_sp_neigh_entry *
@@ -3048,6 +3050,8 @@ struct mlxsw_sp_nexthop_key {
struct fib_nh *fib_nh;
};
+struct mlxsw_sp_nexthop_counter;
+
struct mlxsw_sp_nexthop {
struct list_head neigh_list_node; /* member of neigh entry list */
struct list_head crif_list_node;
@@ -3079,8 +3083,8 @@ struct mlxsw_sp_nexthop {
struct mlxsw_sp_neigh_entry *neigh_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
};
- unsigned int counter_index;
- bool counter_valid;
+ struct mlxsw_sp_nexthop_counter *counter;
+ u32 id; /* NH ID for members of a NH object group. */
};
static struct net_device *
@@ -3105,8 +3109,10 @@ struct mlxsw_sp_nexthop_group_info {
int sum_norm_weight;
u8 adj_index_valid:1,
gateway:1, /* routes using the group use a gateway */
- is_resilient:1;
+ is_resilient:1,
+ hw_stats:1;
struct list_head list; /* member in nh_res_grp_list */
+ struct xarray nexthop_counters;
struct mlxsw_sp_nexthop nexthops[] __counted_by(count);
};
@@ -3150,39 +3156,148 @@ struct mlxsw_sp_nexthop_group {
bool can_destroy;
};
-void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+struct mlxsw_sp_nexthop_counter {
+ unsigned int counter_index;
+ refcount_t ref_count;
+};
+
+static struct mlxsw_sp_nexthop_counter *
+mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_nexthop_counter *nhct;
+ int err;
+
+ nhct = kzalloc(sizeof(*nhct), GFP_KERNEL);
+ if (!nhct)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nhct->counter_index);
+ if (err)
+ goto err_counter_alloc;
+
+ refcount_set(&nhct->ref_count, 1);
+ return nhct;
+
+err_counter_alloc:
+ kfree(nhct);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_counter *nhct)
+{
+ mlxsw_sp_flow_counter_free(mlxsw_sp, nhct->counter_index);
+ kfree(nhct);
+}
+
+static struct mlxsw_sp_nexthop_counter *
+mlxsw_sp_nexthop_sh_counter_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
+ struct mlxsw_sp_nexthop_counter *nhct;
+ void *ptr;
+ int err;
+
+ nhct = xa_load(&nh_grp->nhgi->nexthop_counters, nh->id);
+ if (nhct) {
+ refcount_inc(&nhct->ref_count);
+ return nhct;
+ }
+
+ nhct = mlxsw_sp_nexthop_counter_alloc(mlxsw_sp);
+ if (IS_ERR(nhct))
+ return nhct;
+
+ ptr = xa_store(&nh_grp->nhgi->nexthop_counters, nh->id, nhct,
+ GFP_KERNEL);
+ if (IS_ERR(ptr)) {
+ err = PTR_ERR(ptr);
+ goto err_store;
+ }
+
+ return nhct;
+
+err_store:
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nhct);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_nexthop_sh_counter_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
+ struct mlxsw_sp_nexthop_counter *nhct;
+
+ nhct = xa_load(&nh_grp->nhgi->nexthop_counters, nh->id);
+ if (WARN_ON(!nhct))
+ return;
+
+ if (!refcount_dec_and_test(&nhct->ref_count))
+ return;
+
+ xa_erase(&nh_grp->nhgi->nexthop_counters, nh->id);
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nhct);
+}
+
+int mlxsw_sp_nexthop_counter_enable(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
+ const char *table_adj = MLXSW_SP_DPIPE_TABLE_NAME_ADJ;
+ struct mlxsw_sp_nexthop_counter *nhct;
struct devlink *devlink;
+ bool dpipe_stats;
+
+ if (nh->counter)
+ return 0;
devlink = priv_to_devlink(mlxsw_sp->core);
- if (!devlink_dpipe_table_counter_enabled(devlink,
- MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
- return;
+ dpipe_stats = devlink_dpipe_table_counter_enabled(devlink, table_adj);
+ if (!(nh->nhgi->hw_stats || dpipe_stats))
+ return 0;
- if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
- return;
+ if (nh->id)
+ nhct = mlxsw_sp_nexthop_sh_counter_get(mlxsw_sp, nh);
+ else
+ nhct = mlxsw_sp_nexthop_counter_alloc(mlxsw_sp);
+ if (IS_ERR(nhct))
+ return PTR_ERR(nhct);
- nh->counter_valid = true;
+ nh->counter = nhct;
+ return 0;
}
-void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_nexthop *nh)
+void mlxsw_sp_nexthop_counter_disable(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
{
- if (!nh->counter_valid)
+ if (!nh->counter)
return;
- mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
- nh->counter_valid = false;
+
+ if (nh->id)
+ mlxsw_sp_nexthop_sh_counter_put(mlxsw_sp, nh);
+ else
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh->counter);
+ nh->counter = NULL;
+}
+
+static int mlxsw_sp_nexthop_counter_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ if (nh->nhgi->hw_stats)
+ return mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
+ return 0;
}
int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh, u64 *p_counter)
{
- if (!nh->counter_valid)
+ if (!nh->counter)
return -EINVAL;
- return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
- p_counter, NULL);
+ return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter->counter_index,
+ true, p_counter, NULL);
}
struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
@@ -3655,8 +3770,9 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
WARN_ON_ONCE(1);
return -EINVAL;
}
- if (nh->counter_valid)
- mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
+ if (nh->counter)
+ mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter->counter_index,
+ true);
else
mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
@@ -3743,6 +3859,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
nh = &nhgi->nexthops[i];
if (!nh->should_offload) {
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
nh->offloaded = 0;
continue;
}
@@ -3750,6 +3867,10 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
if (nh->update || reallocate) {
int err = 0;
+ err = mlxsw_sp_nexthop_counter_update(mlxsw_sp, nh);
+ if (err)
+ return err;
+
err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
true, ratr_pl);
if (err)
@@ -4506,7 +4627,10 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
if (err)
return err;
- mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+ err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
+ if (err)
+ goto err_counter_enable;
+
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
if (!dev)
@@ -4530,7 +4654,8 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
err_nexthop_neigh_init:
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
+err_counter_enable:
mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
return err;
}
@@ -4540,7 +4665,7 @@ static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
{
mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
}
@@ -5005,9 +5130,9 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
break;
}
- mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
nh->ifindex = dev->ifindex;
+ nh->id = nh_obj->id;
err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
if (err)
@@ -5029,7 +5154,6 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
err_type_init:
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
return err;
}
@@ -5040,7 +5164,7 @@ static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
nh->should_offload = 0;
}
@@ -5052,6 +5176,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group_info *nhgi;
struct mlxsw_sp_nexthop *nh;
bool is_resilient = false;
+ bool hw_stats = false;
unsigned int nhs;
int err, i;
@@ -5061,9 +5186,11 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
break;
case NH_NOTIFIER_INFO_TYPE_GRP:
nhs = info->nh_grp->num_nh;
+ hw_stats = info->nh_grp->hw_stats;
break;
case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
nhs = info->nh_res_table->num_nh_buckets;
+ hw_stats = info->nh_res_table->hw_stats;
is_resilient = true;
break;
default:
@@ -5078,6 +5205,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
nhgi->is_resilient = is_resilient;
nhgi->count = nhs;
+ nhgi->hw_stats = hw_stats;
+
+ xa_init_flags(&nhgi->nexthop_counters, XA_FLAGS_ALLOC1);
+
for (i = 0; i < nhgi->count; i++) {
struct nh_notifier_single_info *nh_obj;
int weight;
@@ -5160,6 +5291,8 @@ mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
}
mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
WARN_ON_ONCE(nhgi->adj_index_valid);
+ WARN_ON(!xa_empty(&nhgi->nexthop_counters));
+ xa_destroy(&nhgi->nexthop_counters);
kfree(nhgi);
}
@@ -5299,6 +5432,43 @@ err_out:
return err;
}
+static int mlxsw_sp_nexthop_obj_res_group_pre(struct mlxsw_sp *mlxsw_sp,
+ struct nh_notifier_info *info)
+{
+ struct nh_notifier_grp_info *grp_info = info->nh_grp;
+ struct mlxsw_sp_nexthop_group_info *nhgi;
+ struct mlxsw_sp_nexthop_group *nh_grp;
+ int err;
+ int i;
+
+ nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
+ if (!nh_grp)
+ return 0;
+ nhgi = nh_grp->nhgi;
+
+ if (nhgi->hw_stats == grp_info->hw_stats)
+ return 0;
+
+ nhgi->hw_stats = grp_info->hw_stats;
+
+ for (i = 0; i < nhgi->count; i++) {
+ struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
+
+ if (nh->offloaded)
+ nh->update = 1;
+ }
+
+ err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ if (err)
+ goto err_group_refresh;
+
+ return 0;
+
+err_group_refresh:
+ nhgi->hw_stats = !grp_info->hw_stats;
+ return err;
+}
+
static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
struct nh_notifier_info *info)
{
@@ -5475,6 +5645,79 @@ err_nexthop_obj_init:
return err;
}
+static void
+mlxsw_sp_nexthop_obj_mp_hw_stats_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group_info *nhgi,
+ struct nh_notifier_grp_hw_stats_info *info)
+{
+ int nhi;
+
+ for (nhi = 0; nhi < info->num_nh; nhi++) {
+ struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[nhi];
+ u64 packets;
+ int err;
+
+ err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &packets);
+ if (err)
+ continue;
+
+ nh_grp_hw_stats_report_delta(info, nhi, packets);
+ }
+}
+
+static void
+mlxsw_sp_nexthop_obj_res_hw_stats_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group_info *nhgi,
+ struct nh_notifier_grp_hw_stats_info *info)
+{
+ int nhi = -1;
+ int bucket;
+
+ for (bucket = 0; bucket < nhgi->count; bucket++) {
+ struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[bucket];
+ u64 packets;
+ int err;
+
+ if (nhi == -1 || info->stats[nhi].id != nh->id) {
+ for (nhi = 0; nhi < info->num_nh; nhi++)
+ if (info->stats[nhi].id == nh->id)
+ break;
+ if (WARN_ON_ONCE(nhi == info->num_nh)) {
+ nhi = -1;
+ continue;
+ }
+ }
+
+ err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &packets);
+ if (err)
+ continue;
+
+ nh_grp_hw_stats_report_delta(info, nhi, packets);
+ }
+}
+
+static void mlxsw_sp_nexthop_obj_hw_stats_get(struct mlxsw_sp *mlxsw_sp,
+ struct nh_notifier_info *info)
+{
+ struct mlxsw_sp_nexthop_group_info *nhgi;
+ struct mlxsw_sp_nexthop_group *nh_grp;
+
+ if (info->type != NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS)
+ return;
+
+ nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
+ if (!nh_grp)
+ return;
+ nhgi = nh_grp->nhgi;
+
+ if (nhgi->is_resilient)
+ mlxsw_sp_nexthop_obj_res_hw_stats_get(mlxsw_sp, nhgi,
+ info->nh_grp_hw_stats);
+ else
+ mlxsw_sp_nexthop_obj_mp_hw_stats_get(mlxsw_sp, nhgi,
+ info->nh_grp_hw_stats);
+}
+
static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -5490,6 +5733,10 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
mutex_lock(&router->lock);
switch (event) {
+ case NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE:
+ err = mlxsw_sp_nexthop_obj_res_group_pre(router->mlxsw_sp,
+ info);
+ break;
case NEXTHOP_EVENT_REPLACE:
err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
break;
@@ -5500,6 +5747,9 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
info);
break;
+ case NEXTHOP_EVENT_HW_STATS_REPORT_DELTA:
+ mlxsw_sp_nexthop_obj_hw_stats_get(router->mlxsw_sp, info);
+ break;
default:
break;
}
@@ -6733,7 +6983,10 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
#if IS_ENABLED(CONFIG_IPV6)
nh->neigh_tbl = &nd_tbl;
#endif
- mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+
+ err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
+ if (err)
+ return err;
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
@@ -6749,7 +7002,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
err_nexthop_type_init:
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
return err;
}
@@ -6758,7 +7011,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
{
mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
list_del(&nh->router_list_node);
- mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
}
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index ed3b628caafe..0432c7cc6b07 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -156,10 +156,10 @@ int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh, bool force,
char *ratr_pl);
-void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+int mlxsw_sp_nexthop_counter_enable(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh);
-void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_nexthop *nh);
+void mlxsw_sp_nexthop_counter_disable(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh);
static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
const union mlxsw_sp_l3addr *addr2)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 6c749c148148..6397ff0dc951 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -61,7 +61,7 @@ struct mlxsw_sp_bridge_port {
struct mlxsw_sp_bridge_device *bridge_device;
struct list_head list;
struct list_head vlans_list;
- unsigned int ref_count;
+ refcount_t ref_count;
u8 stp_state;
unsigned long flags;
bool mrouter;
@@ -495,7 +495,7 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
BR_MCAST_FLOOD;
INIT_LIST_HEAD(&bridge_port->vlans_list);
list_add(&bridge_port->list, &bridge_device->ports_list);
- bridge_port->ref_count = 1;
+ refcount_set(&bridge_port->ref_count, 1);
err = switchdev_bridge_port_offload(brport_dev, mlxsw_sp_port->dev,
NULL, NULL, NULL, false, extack);
@@ -531,7 +531,7 @@ mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge,
bridge_port = mlxsw_sp_bridge_port_find(bridge, brport_dev);
if (bridge_port) {
- bridge_port->ref_count++;
+ refcount_inc(&bridge_port->ref_count);
return bridge_port;
}
@@ -558,7 +558,7 @@ static void mlxsw_sp_bridge_port_put(struct mlxsw_sp_bridge *bridge,
{
struct mlxsw_sp_bridge_device *bridge_device;
- if (--bridge_port->ref_count != 0)
+ if (!refcount_dec_and_test(&bridge_port->ref_count))
return;
bridge_device = bridge_port->bridge_device;
mlxsw_sp_bridge_port_destroy(bridge_port);
diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c
index 5693784eec5b..443128adbcb6 100644
--- a/drivers/net/ethernet/microchip/encx24j600-regmap.c
+++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c
@@ -464,7 +464,7 @@ static struct regmap_config regcfg = {
.val_bits = 16,
.max_register = 0xee,
.reg_stride = 2,
- .cache_type = REGCACHE_RBTREE,
+ .cache_type = REGCACHE_MAPLE,
.val_format_endian = REGMAP_ENDIAN_LITTLE,
.readable_reg = encx24j600_regmap_readable,
.writeable_reg = encx24j600_regmap_writeable,
@@ -485,7 +485,7 @@ static struct regmap_config phycfg = {
.reg_bits = 8,
.val_bits = 16,
.max_register = 0x1f,
- .cache_type = REGCACHE_RBTREE,
+ .cache_type = REGCACHE_MAPLE,
.val_format_endian = REGMAP_ENDIAN_LITTLE,
.readable_reg = encx24j600_phymap_readable,
.writeable_reg = encx24j600_phymap_writeable,
@@ -513,4 +513,5 @@ int devm_regmap_init_encx24j600(struct device *dev,
}
EXPORT_SYMBOL_GPL(devm_regmap_init_encx24j600);
+MODULE_DESCRIPTION("Microchip ENCX24J600 helpers");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c
index a2b3f4433ca8..8a6ae171e375 100644
--- a/drivers/net/ethernet/microchip/lan743x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c
@@ -1055,7 +1055,7 @@ static int lan743x_ethtool_get_ts_info(struct net_device *netdev,
}
static int lan743x_ethtool_get_eee(struct net_device *netdev,
- struct ethtool_eee *eee)
+ struct ethtool_keee *eee)
{
struct lan743x_adapter *adapter = netdev_priv(netdev);
struct phy_device *phydev = netdev->phydev;
@@ -1092,7 +1092,7 @@ static int lan743x_ethtool_get_eee(struct net_device *netdev,
}
static int lan743x_ethtool_set_eee(struct net_device *netdev,
- struct ethtool_eee *eee)
+ struct ethtool_keee *eee)
{
struct lan743x_adapter *adapter;
struct phy_device *phydev;
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 45e209a7d083..bd8aa83b47e5 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1196,7 +1196,7 @@ static int lan743x_sgmii_config(struct lan743x_adapter *adapter)
ret = lan743x_is_sgmii_2_5G_mode(adapter, &status);
if (ret < 0) {
netif_err(adapter, drv, adapter->netdev,
- "erro %d SGMII get mode failed\n", ret);
+ "error %d SGMII get mode failed\n", ret);
return ret;
}
diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c
index 2f04bc77a118..2801f08bf1c9 100644
--- a/drivers/net/ethernet/microchip/lan743x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan743x_ptp.c
@@ -1712,13 +1712,13 @@ bool lan743x_ptp_request_tx_timestamp(struct lan743x_adapter *adapter)
struct lan743x_ptp *ptp = &adapter->ptp;
bool result = false;
- spin_lock_bh(&ptp->tx_ts_lock);
+ spin_lock(&ptp->tx_ts_lock);
if (ptp->pending_tx_timestamps < LAN743X_PTP_NUMBER_OF_TX_TIMESTAMPS) {
/* request granted */
ptp->pending_tx_timestamps++;
result = true;
}
- spin_unlock_bh(&ptp->tx_ts_lock);
+ spin_unlock(&ptp->tx_ts_lock);
return result;
}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
index 41fa2523d91d..5f2cd9a8cf8f 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
@@ -37,19 +37,24 @@ static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x)
/* Now, set PGIDs for each active LAG */
for (lag = 0; lag < lan966x->num_phys_ports; ++lag) {
- struct net_device *bond = lan966x->ports[lag]->bond;
+ struct lan966x_port *port = lan966x->ports[lag];
int num_active_ports = 0;
+ struct net_device *bond;
unsigned long bond_mask;
u8 aggr_idx[16];
- if (!bond || (visited & BIT(lag)))
+ if (!port || !port->bond || (visited & BIT(lag)))
continue;
+ bond = port->bond;
bond_mask = lan966x_lag_get_mask(lan966x, bond);
for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) {
struct lan966x_port *port = lan966x->ports[p];
+ if (!port)
+ continue;
+
lan_wr(ANA_PGID_PGID_SET(bond_mask),
lan966x, ANA_PGID(p));
if (port->lag_tx_active)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
index 92108d354051..2e83bbb9477e 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
@@ -168,9 +168,10 @@ static void lan966x_port_link_up(struct lan966x_port *port)
lan966x_taprio_speed_set(port, config->speed);
/* Also the GIGA_MODE_ENA(1) needs to be set regardless of the
- * port speed for QSGMII ports.
+ * port speed for QSGMII or SGMII ports.
*/
- if (phy_interface_num_ports(config->portmode) == 4)
+ if (phy_interface_num_ports(config->portmode) == 4 ||
+ config->portmode == PHY_INTERFACE_MODE_SGMII)
mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA_SET(1);
lan_wr(config->duplex | mode,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c
index ac525ff1503e..3a01e13bd10b 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c
@@ -25,6 +25,8 @@ static void lan966x_vcap_is1_port_keys(struct lan966x_port *port,
for (int l = 0; l < admin->lookups; ++l) {
out->prf(out->dst, "\n Lookup %d: ", l);
+ val = lan_rd(lan966x, ANA_VCAP_S1_CFG(port->chip_port, l));
+
out->prf(out->dst, "\n other: ");
switch (ANA_VCAP_S1_CFG_KEY_OTHER_CFG_GET(val)) {
case VCAP_IS1_PS_OTHER_NORMAL:
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
index 4af285918ea2..75868b3f548e 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
@@ -347,10 +347,10 @@ int sparx5_del_mact_entry(struct sparx5 *sparx5,
list) {
if ((vid == 0 || mact_entry->vid == vid) &&
ether_addr_equal(addr, mact_entry->mac)) {
+ sparx5_mact_forget(sparx5, addr, mact_entry->vid);
+
list_del(&mact_entry->list);
devm_kfree(sparx5->dev, mact_entry);
-
- sparx5_mact_forget(sparx5, addr, mact_entry->vid);
}
}
mutex_unlock(&sparx5->mact_lock);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index d1f7fc8b1b71..3c066b62e689 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -757,6 +757,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, sparx5);
sparx5->pdev = pdev;
sparx5->dev = &pdev->dev;
+ spin_lock_init(&sparx5->tx_lock);
/* Do switch core reset if available */
reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch");
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index 6f565c0c0c3d..316fed5f2735 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -280,6 +280,7 @@ struct sparx5 {
int xtr_irq;
/* Frame DMA */
int fdma_irq;
+ spinlock_t tx_lock; /* lock for frame transmission */
struct sparx5_rx rx;
struct sparx5_tx tx;
/* PTP */
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
index 6db6ac6a3bbc..ac7e1cffbcec 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
@@ -244,10 +244,12 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev)
}
skb_tx_timestamp(skb);
+ spin_lock(&sparx5->tx_lock);
if (sparx5->fdma_irq > 0)
ret = sparx5_fdma_xmit(sparx5, ifh, skb);
else
ret = sparx5_inject(sparx5, ifh, skb, dev);
+ spin_unlock(&sparx5->tx_lock);
if (ret == -EBUSY)
goto busy;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index d33b27214539..1332db9a08eb 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1249,15 +1249,47 @@ void mana_gd_free_res_map(struct gdma_resource *r)
r->size = 0;
}
+static int irq_setup(unsigned int *irqs, unsigned int len, int node)
+{
+ const struct cpumask *next, *prev = cpu_none_mask;
+ cpumask_var_t cpus __free(free_cpumask_var);
+ int cpu, weight;
+
+ if (!alloc_cpumask_var(&cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+ rcu_read_lock();
+ for_each_numa_hop_mask(next, node) {
+ weight = cpumask_weight_andnot(next, prev);
+ while (weight > 0) {
+ cpumask_andnot(cpus, next, prev);
+ for_each_cpu(cpu, cpus) {
+ if (len-- == 0)
+ goto done;
+ irq_set_affinity_and_hint(*irqs++, topology_sibling_cpumask(cpu));
+ cpumask_andnot(cpus, cpus, topology_sibling_cpumask(cpu));
+ --weight;
+ }
+ }
+ prev = next;
+ }
+done:
+ rcu_read_unlock();
+ return 0;
+}
+
static int mana_gd_setup_irqs(struct pci_dev *pdev)
{
- unsigned int max_queues_per_port = num_online_cpus();
struct gdma_context *gc = pci_get_drvdata(pdev);
+ unsigned int max_queues_per_port;
struct gdma_irq_context *gic;
unsigned int max_irqs, cpu;
- int nvec, irq;
+ int start_irq_index = 1;
+ int nvec, *irqs, irq;
int err, i = 0, j;
+ cpus_read_lock();
+ max_queues_per_port = num_online_cpus();
if (max_queues_per_port > MANA_MAX_NUM_QUEUES)
max_queues_per_port = MANA_MAX_NUM_QUEUES;
@@ -1265,8 +1297,18 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
max_irqs = max_queues_per_port + 1;
nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX);
- if (nvec < 0)
+ if (nvec < 0) {
+ cpus_read_unlock();
return nvec;
+ }
+ if (nvec <= num_online_cpus())
+ start_irq_index = 0;
+
+ irqs = kmalloc_array((nvec - start_irq_index), sizeof(int), GFP_KERNEL);
+ if (!irqs) {
+ err = -ENOMEM;
+ goto free_irq_vector;
+ }
gc->irq_contexts = kcalloc(nvec, sizeof(struct gdma_irq_context),
GFP_KERNEL);
@@ -1294,17 +1336,41 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
goto free_irq;
}
- err = request_irq(irq, mana_gd_intr, 0, gic->name, gic);
- if (err)
- goto free_irq;
-
- cpu = cpumask_local_spread(i, gc->numa_node);
- irq_set_affinity_and_hint(irq, cpumask_of(cpu));
+ if (!i) {
+ err = request_irq(irq, mana_gd_intr, 0, gic->name, gic);
+ if (err)
+ goto free_irq;
+
+ /* If number of IRQ is one extra than number of online CPUs,
+ * then we need to assign IRQ0 (hwc irq) and IRQ1 to
+ * same CPU.
+ * Else we will use different CPUs for IRQ0 and IRQ1.
+ * Also we are using cpumask_local_spread instead of
+ * cpumask_first for the node, because the node can be
+ * mem only.
+ */
+ if (start_irq_index) {
+ cpu = cpumask_local_spread(i, gc->numa_node);
+ irq_set_affinity_and_hint(irq, cpumask_of(cpu));
+ } else {
+ irqs[start_irq_index] = irq;
+ }
+ } else {
+ irqs[i - start_irq_index] = irq;
+ err = request_irq(irqs[i - start_irq_index], mana_gd_intr, 0,
+ gic->name, gic);
+ if (err)
+ goto free_irq;
+ }
}
+ err = irq_setup(irqs, (nvec - start_irq_index), gc->numa_node);
+ if (err)
+ goto free_irq;
+
gc->max_num_msix = nvec;
gc->num_msix_usable = nvec;
-
+ cpus_read_unlock();
return 0;
free_irq:
@@ -1317,8 +1383,10 @@ free_irq:
}
kfree(gc->irq_contexts);
+ kfree(irqs);
gc->irq_contexts = NULL;
free_irq_vector:
+ cpus_read_unlock();
pci_free_irq_vectors(pdev);
return err;
}
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 56ccbd4c37fe..ed2fb44500b0 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -3078,4 +3078,5 @@ void ocelot_deinit_port(struct ocelot *ocelot, int port)
}
EXPORT_SYMBOL(ocelot_deinit_port);
+MODULE_DESCRIPTION("Microsemi Ocelot switch family library");
MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 2b383d92d7f5..2c3f62907958 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -460,7 +460,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun,
set_tun->ttl = ip6_dst_hoplimit(dst);
dst_release(dst);
} else {
- set_tun->ttl = net->ipv6.devconf_all->hop_limit;
+ set_tun->ttl = READ_ONCE(net->ipv6.devconf_all->hop_limit);
}
#endif
} else {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 2967bab72505..15180538b80a 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -1424,10 +1424,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_
mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask);
return;
+ /* Both struct tcphdr and struct udphdr start with
+ * __be16 source;
+ * __be16 dest;
+ * so we can use the same code for both.
+ */
case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
- mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val);
- mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask);
+ if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) {
+ mangle_action->mangle.val =
+ (__force u32)cpu_to_be32(mangle_action->mangle.val << 16);
+ /* The mask of mangle action is inverse mask,
+ * so clear the dest tp port with 0xFFFF to
+ * instead of rotate-left operation.
+ */
+ mangle_action->mangle.mask =
+ (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF);
+ }
+ if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) {
+ mangle_action->mangle.offset = 0;
+ mangle_action->mangle.val =
+ (__force u32)cpu_to_be32(mangle_action->mangle.val);
+ mangle_action->mangle.mask =
+ (__force u32)cpu_to_be32(mangle_action->mangle.mask);
+ }
return;
default:
@@ -1864,10 +1884,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
{
struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
struct nfp_fl_ct_flow_entry *ct_entry;
+ struct flow_action_entry *ct_goto;
struct nfp_fl_ct_zone_entry *zt;
+ struct flow_action_entry *act;
bool wildcarded = false;
struct flow_match_ct ct;
- struct flow_action_entry *ct_goto;
+ int i;
+
+ flow_action_for_each(i, act, &rule->action) {
+ switch (act->id) {
+ case FLOW_ACTION_REDIRECT:
+ case FLOW_ACTION_REDIRECT_INGRESS:
+ case FLOW_ACTION_MIRRED:
+ case FLOW_ACTION_MIRRED_INGRESS:
+ if (act->dev->rtnl_link_ops &&
+ !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "unsupported offload: out port is openvswitch internal port");
+ return -EOPNOTSUPP;
+ }
+ break;
+ default:
+ break;
+ }
+ }
flow_rule_match_ct(rule, &ct);
if (!ct.mask->ct_zone) {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
index 361d7c495e2d..2c7bd6e80d99 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
@@ -337,6 +337,11 @@ static void nfp_fl_lag_do_work(struct work_struct *work)
acti_netdevs = kmalloc_array(entry->slave_cnt,
sizeof(*acti_netdevs), GFP_KERNEL);
+ if (!acti_netdevs) {
+ schedule_delayed_work(&lag->work,
+ NFP_FL_LAG_DELAY);
+ continue;
+ }
/* Include sanity check in the loop. It may be that a bond has
* changed between processing the last notification and the
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index e522845c7c21..0d7d138d6e0d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -1084,7 +1084,7 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev,
u16 nfp_mac_idx = 0;
entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr);
- if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) {
+ if (entry && (nfp_tunnel_is_mac_idx_global(entry->index) || netif_is_lag_port(netdev))) {
if (entry->bridge_count ||
!nfp_flower_is_supported_bridge(netdev)) {
nfp_tunnel_offloaded_macs_inc_ref_and_link(entry,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 3b3210d823e8..f28e769e6fda 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2776,6 +2776,7 @@ static void nfp_net_netdev_init(struct nfp_net *nn)
case NFP_NFD_VER_NFD3:
netdev->netdev_ops = &nfp_nfd3_netdev_ops;
netdev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ netdev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
break;
case NFP_NFD_VER_NFDK:
netdev->netdev_ops = &nfp_nfdk_netdev_ops;
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
index 33b4c2856316..3f10c5365c80 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
@@ -537,11 +537,13 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
const u32 barcfg_msix_general =
NFP_PCIE_BAR_PCIE2CPP_MapType(
NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) |
- NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT;
+ NFP_PCIE_BAR_PCIE2CPP_LengthSelect(
+ NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT);
const u32 barcfg_msix_xpb =
NFP_PCIE_BAR_PCIE2CPP_MapType(
NFP_PCIE_BAR_PCIE2CPP_MapType_BULK) |
- NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT |
+ NFP_PCIE_BAR_PCIE2CPP_LengthSelect(
+ NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT) |
NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress(
NFP_CPP_TARGET_ISLAND_XPB);
const u32 barcfg_explicit[4] = {
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 7a549b834e97..31f896c4aa26 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -1761,7 +1761,7 @@ static void nv_get_stats(int cpu, struct fe_priv *np,
/*
* nv_get_stats64: dev->ndo_get_stats64 function
* Get latest stats value from the nic.
- * Called with read_lock(&dev_base_lock) held for read -
+ * Called with rcu_read_lock() held -
* only synchronized against unregister_netdevice.
*/
static void
@@ -3090,7 +3090,7 @@ static void set_bufsize(struct net_device *dev)
/*
* nv_change_mtu: dev->change_mtu function
- * Called with dev_base_lock held for read.
+ * Called with RTNL held for read.
*/
static int nv_change_mtu(struct net_device *dev, int new_mtu)
{
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index 9ffef2e06885..2ccc2c2a06e3 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -76,6 +76,8 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
u8 status, int err);
+bool ionic_notifyq_service(struct ionic_cq *cq);
+bool ionic_adminq_service(struct ionic_cq *cq);
int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_wait);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index c49aa358e424..6ba8d4aca0a0 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -93,6 +93,7 @@ static void ionic_unmap_bars(struct ionic *ionic)
bars[i].len = 0;
}
}
+ ionic->num_bars = 0;
}
void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num)
@@ -215,15 +216,17 @@ out:
static void ionic_clear_pci(struct ionic *ionic)
{
- ionic->idev.dev_info_regs = NULL;
- ionic->idev.dev_cmd_regs = NULL;
- ionic->idev.intr_status = NULL;
- ionic->idev.intr_ctrl = NULL;
-
- ionic_unmap_bars(ionic);
- pci_release_regions(ionic->pdev);
+ if (ionic->num_bars) {
+ ionic->idev.dev_info_regs = NULL;
+ ionic->idev.dev_cmd_regs = NULL;
+ ionic->idev.intr_status = NULL;
+ ionic->idev.intr_ctrl = NULL;
+
+ ionic_unmap_bars(ionic);
+ pci_release_regions(ionic->pdev);
+ }
- if (atomic_read(&ionic->pdev->enable_cnt) > 0)
+ if (pci_is_enabled(ionic->pdev))
pci_disable_device(ionic->pdev);
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
index 91327ef670c7..c3ae11a48024 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
@@ -113,8 +113,8 @@ static const struct debugfs_reg32 intr_ctrl_regs[] = {
void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq)
{
struct dentry *qcq_dentry, *q_dentry, *cq_dentry;
- struct dentry *intr_dentry, *stats_dentry;
struct ionic_dev *idev = &lif->ionic->idev;
+ struct dentry *intr_dentry, *stats_dentry;
struct debugfs_regset32 *intr_ctrl_regset;
struct ionic_intr_info *intr = &qcq->intr;
struct debugfs_blob_wrapper *desc_blob;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 1e7c71f7f081..874499337132 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -319,22 +319,32 @@ do_check_time:
u8 ionic_dev_cmd_status(struct ionic_dev *idev)
{
+ if (!idev->dev_cmd_regs)
+ return (u8)PCI_ERROR_RESPONSE;
return ioread8(&idev->dev_cmd_regs->comp.comp.status);
}
bool ionic_dev_cmd_done(struct ionic_dev *idev)
{
+ if (!idev->dev_cmd_regs)
+ return false;
return ioread32(&idev->dev_cmd_regs->done) & IONIC_DEV_CMD_DONE;
}
void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp)
{
+ if (!idev->dev_cmd_regs)
+ return;
memcpy_fromio(comp, &idev->dev_cmd_regs->comp, sizeof(*comp));
}
void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd)
{
idev->opcode = cmd->cmd.opcode;
+
+ if (!idev->dev_cmd_regs)
+ return;
+
memcpy_toio(&idev->dev_cmd_regs->cmd, cmd, sizeof(*cmd));
iowrite32(0, &idev->dev_cmd_regs->done);
iowrite32(1, &idev->dev_cmd_regs->doorbell);
@@ -619,43 +629,25 @@ int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
cq->desc_size = desc_size;
cq->tail_idx = 0;
cq->done_color = 1;
+ cq->idev = &lif->ionic->idev;
return 0;
}
-void ionic_cq_map(struct ionic_cq *cq, void *base, dma_addr_t base_pa)
-{
- struct ionic_cq_info *cur;
- unsigned int i;
-
- cq->base = base;
- cq->base_pa = base_pa;
-
- for (i = 0, cur = cq->info; i < cq->num_descs; i++, cur++)
- cur->cq_desc = base + (i * cq->desc_size);
-}
-
-void ionic_cq_bind(struct ionic_cq *cq, struct ionic_queue *q)
-{
- cq->bound_q = q;
-}
-
unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
ionic_cq_cb cb, ionic_cq_done_cb done_cb,
void *done_arg)
{
- struct ionic_cq_info *cq_info;
unsigned int work_done = 0;
if (work_to_do == 0)
return 0;
- cq_info = &cq->info[cq->tail_idx];
- while (cb(cq, cq_info)) {
+ while (cb(cq)) {
if (cq->tail_idx == cq->num_descs - 1)
cq->done_color = !cq->done_color;
+
cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1);
- cq_info = &cq->info[cq->tail_idx];
if (++work_done >= work_to_do)
break;
@@ -682,7 +674,6 @@ int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
return -EINVAL;
q->lif = lif;
- q->idev = idev;
q->index = index;
q->num_descs = num_descs;
q->desc_size = desc_size;
@@ -696,53 +687,11 @@ int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
return 0;
}
-void ionic_q_map(struct ionic_queue *q, void *base, dma_addr_t base_pa)
-{
- struct ionic_desc_info *cur;
- unsigned int i;
-
- q->base = base;
- q->base_pa = base_pa;
-
- for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
- cur->desc = base + (i * q->desc_size);
-}
-
-void ionic_q_cmb_map(struct ionic_queue *q, void __iomem *base, dma_addr_t base_pa)
-{
- struct ionic_desc_info *cur;
- unsigned int i;
-
- q->cmb_base = base;
- q->cmb_base_pa = base_pa;
-
- for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
- cur->cmb_desc = base + (i * q->desc_size);
-}
-
-void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa)
-{
- struct ionic_desc_info *cur;
- unsigned int i;
-
- q->sg_base = base;
- q->sg_base_pa = base_pa;
-
- for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
- cur->sg_desc = base + (i * q->sg_desc_size);
-}
-
-void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
- void *cb_arg)
+void ionic_q_post(struct ionic_queue *q, bool ring_doorbell)
{
- struct ionic_desc_info *desc_info;
struct ionic_lif *lif = q->lif;
struct device *dev = q->dev;
- desc_info = &q->info[q->head_idx];
- desc_info->cb = cb;
- desc_info->cb_arg = cb_arg;
-
q->head_idx = (q->head_idx + 1) & (q->num_descs - 1);
dev_dbg(dev, "lif=%d qname=%s qid=%d qtype=%d p_index=%d ringdb=%d\n",
@@ -761,7 +710,7 @@ void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
}
}
-static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos)
+bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos)
{
unsigned int mask, tail, head;
@@ -771,37 +720,3 @@ static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos)
return ((pos - tail) & mask) < ((head - tail) & mask);
}
-
-void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
- unsigned int stop_index)
-{
- struct ionic_desc_info *desc_info;
- ionic_desc_cb cb;
- void *cb_arg;
- u16 index;
-
- /* check for empty queue */
- if (q->tail_idx == q->head_idx)
- return;
-
- /* stop index must be for a descriptor that is not yet completed */
- if (unlikely(!ionic_q_is_posted(q, stop_index)))
- dev_err(q->dev,
- "ionic stop is not posted %s stop %u tail %u head %u\n",
- q->name, stop_index, q->tail_idx, q->head_idx);
-
- do {
- desc_info = &q->info[q->tail_idx];
- index = q->tail_idx;
- q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
-
- cb = desc_info->cb;
- cb_arg = desc_info->cb_arg;
-
- desc_info->cb = NULL;
- desc_info->cb_arg = NULL;
-
- if (cb)
- cb(q, desc_info, cq_info, cb_arg);
- } while (index != stop_index);
-}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 2667e1cde16b..f30eee4a5a80 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -8,6 +8,7 @@
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/skbuff.h>
+#include <linux/bpf_trace.h>
#include "ionic_if.h"
#include "ionic_regs.h"
@@ -15,9 +16,10 @@
#define IONIC_MAX_TX_DESC 8192
#define IONIC_MAX_RX_DESC 16384
#define IONIC_MIN_TXRX_DESC 64
-#define IONIC_DEF_TXRX_DESC 4096
+#define IONIC_DEF_TXRX_DESC 1024
#define IONIC_RX_FILL_THRESHOLD 16
#define IONIC_RX_FILL_DIV 8
+#define IONIC_TSO_DESCS_NEEDED 44 /* 64K TSO @1500B */
#define IONIC_LIFS_MAX 1024
#define IONIC_WATCHDOG_SECS 5
#define IONIC_ITR_COAL_USEC_DEFAULT 64
@@ -120,11 +122,13 @@ static_assert(sizeof(struct ionic_log_event) == 64);
/* I/O */
static_assert(sizeof(struct ionic_txq_desc) == 16);
static_assert(sizeof(struct ionic_txq_sg_desc) == 128);
+static_assert(sizeof(struct ionic_txq_sg_desc_v1) == 256);
static_assert(sizeof(struct ionic_txq_comp) == 16);
static_assert(sizeof(struct ionic_rxq_desc) == 16);
static_assert(sizeof(struct ionic_rxq_sg_desc) == 128);
static_assert(sizeof(struct ionic_rxq_comp) == 16);
+static_assert(sizeof(struct ionic_rxq_comp) == sizeof(struct ionic_txq_comp));
/* SR/IOV */
static_assert(sizeof(struct ionic_vf_setattr_cmd) == 64);
@@ -173,21 +177,8 @@ struct ionic_dev {
struct ionic_devinfo dev_info;
};
-struct ionic_cq_info {
- union {
- void *cq_desc;
- struct ionic_admin_comp *admincq;
- struct ionic_notifyq_event *notifyq;
- };
-};
-
struct ionic_queue;
struct ionic_qcq;
-struct ionic_desc_info;
-
-typedef void (*ionic_desc_cb)(struct ionic_queue *q,
- struct ionic_desc_info *desc_info,
- struct ionic_cq_info *cq_info, void *cb_arg);
#define IONIC_MAX_BUF_LEN ((u16)-1)
#define IONIC_PAGE_SIZE PAGE_SIZE
@@ -195,6 +186,11 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
#define IONIC_PAGE_GFP_MASK (GFP_ATOMIC | __GFP_NOWARN |\
__GFP_COMP | __GFP_MEMALLOC)
+#define IONIC_XDP_MAX_LINEAR_MTU (IONIC_PAGE_SIZE - \
+ (VLAN_ETH_HLEN + \
+ XDP_PACKET_HEADROOM + \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))))
+
struct ionic_buf_info {
struct page *page;
dma_addr_t dma_addr;
@@ -202,26 +198,25 @@ struct ionic_buf_info {
u32 len;
};
-#define IONIC_MAX_FRAGS (1 + IONIC_TX_MAX_SG_ELEMS_V1)
+#define IONIC_TX_MAX_FRAGS (1 + IONIC_TX_MAX_SG_ELEMS_V1)
+#define IONIC_RX_MAX_FRAGS (1 + IONIC_RX_MAX_SG_ELEMS)
-struct ionic_desc_info {
- union {
- void *desc;
- struct ionic_txq_desc *txq_desc;
- struct ionic_rxq_desc *rxq_desc;
- struct ionic_admin_cmd *adminq_desc;
- };
- void __iomem *cmb_desc;
- union {
- void *sg_desc;
- struct ionic_txq_sg_desc *txq_sg_desc;
- struct ionic_rxq_sg_desc *rxq_sgl_desc;
- };
+struct ionic_tx_desc_info {
unsigned int bytes;
unsigned int nbufs;
+ struct sk_buff *skb;
+ struct xdp_frame *xdpf;
+ enum xdp_action act;
struct ionic_buf_info bufs[MAX_SKB_FRAGS + 1];
- ionic_desc_cb cb;
- void *cb_arg;
+};
+
+struct ionic_rx_desc_info {
+ unsigned int nbufs;
+ struct ionic_buf_info bufs[IONIC_RX_MAX_FRAGS];
+};
+
+struct ionic_admin_desc_info {
+ void *ctx;
};
#define IONIC_QUEUE_NAME_MAX_SZ 16
@@ -229,7 +224,12 @@ struct ionic_desc_info {
struct ionic_queue {
struct device *dev;
struct ionic_lif *lif;
- struct ionic_desc_info *info;
+ union {
+ void *info;
+ struct ionic_tx_desc_info *tx_info;
+ struct ionic_rx_desc_info *rx_info;
+ struct ionic_admin_desc_info *admin_info;
+ };
u64 dbval;
unsigned long dbell_deadline;
unsigned long dbell_jiffies;
@@ -239,26 +239,33 @@ struct ionic_queue {
unsigned int num_descs;
unsigned int max_sg_elems;
u64 features;
- u64 drop;
- struct ionic_dev *idev;
unsigned int type;
unsigned int hw_index;
unsigned int hw_type;
+ bool xdp_flush;
union {
void *base;
struct ionic_txq_desc *txq;
struct ionic_rxq_desc *rxq;
struct ionic_admin_cmd *adminq;
};
- void __iomem *cmb_base;
+ union {
+ void __iomem *cmb_base;
+ struct ionic_txq_desc __iomem *cmb_txq;
+ struct ionic_rxq_desc __iomem *cmb_rxq;
+ };
union {
void *sg_base;
struct ionic_txq_sg_desc *txq_sgl;
+ struct ionic_txq_sg_desc_v1 *txq_sgl_v1;
struct ionic_rxq_sg_desc *rxq_sgl;
};
+ struct xdp_rxq_info *xdp_rxq_info;
+ struct ionic_queue *partner;
dma_addr_t base_pa;
dma_addr_t cmb_base_pa;
dma_addr_t sg_base_pa;
+ u64 drop;
unsigned int desc_size;
unsigned int sg_desc_size;
unsigned int pid;
@@ -280,7 +287,6 @@ struct ionic_intr_info {
struct ionic_cq {
struct ionic_lif *lif;
- struct ionic_cq_info *info;
struct ionic_queue *bound_q;
struct ionic_intr_info *bound_intr;
u16 tail_idx;
@@ -289,6 +295,7 @@ struct ionic_cq {
unsigned int desc_size;
void *base;
dma_addr_t base_pa;
+ struct ionic_dev *idev;
} ____cacheline_aligned_in_smp;
struct ionic;
@@ -363,23 +370,20 @@ int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
unsigned int num_descs, size_t desc_size);
void ionic_cq_map(struct ionic_cq *cq, void *base, dma_addr_t base_pa);
void ionic_cq_bind(struct ionic_cq *cq, struct ionic_queue *q);
-typedef bool (*ionic_cq_cb)(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
+typedef bool (*ionic_cq_cb)(struct ionic_cq *cq);
typedef void (*ionic_cq_done_cb)(void *done_arg);
unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
ionic_cq_cb cb, ionic_cq_done_cb done_cb,
void *done_arg);
+unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do);
int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
struct ionic_queue *q, unsigned int index, const char *name,
unsigned int num_descs, size_t desc_size,
size_t sg_desc_size, unsigned int pid);
-void ionic_q_map(struct ionic_queue *q, void *base, dma_addr_t base_pa);
-void ionic_q_cmb_map(struct ionic_queue *q, void __iomem *base, dma_addr_t base_pa);
-void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa);
-void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
- void *cb_arg);
-void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
- unsigned int stop_index);
+void ionic_q_post(struct ionic_queue *q, bool ring_doorbell);
+bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos);
+
int ionic_heartbeat_check(struct ionic *ionic);
bool ionic_is_fw_running(struct ionic_dev *idev);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index cd3c0b01402e..91183965a6b7 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -90,18 +90,23 @@ static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
void *p)
{
struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_dev *idev;
unsigned int offset;
unsigned int size;
regs->version = IONIC_DEV_CMD_REG_VERSION;
+ idev = &lif->ionic->idev;
+ if (!idev->dev_info_regs)
+ return;
+
offset = 0;
size = IONIC_DEV_INFO_REG_COUNT * sizeof(u32);
memcpy_fromio(p + offset, lif->ionic->idev.dev_info_regs->words, size);
offset += size;
size = IONIC_DEV_CMD_REG_COUNT * sizeof(u32);
- memcpy_fromio(p + offset, lif->ionic->idev.dev_cmd_regs->words, size);
+ memcpy_fromio(p + offset, idev->dev_cmd_regs->words, size);
}
static void ionic_get_link_ext_stats(struct net_device *netdev,
@@ -721,6 +726,11 @@ static int ionic_set_channels(struct net_device *netdev,
ionic_init_queue_params(lif, &qparam);
+ if ((ch->rx_count || ch->tx_count) && lif->xdp_prog) {
+ netdev_info(lif->netdev, "Split Tx/Rx interrupts not available when using XDP\n");
+ return -EOPNOTSUPP;
+ }
+
if (ch->rx_count != ch->tx_count) {
netdev_info(netdev, "The rx and tx count must be equal\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_fw.c b/drivers/net/ethernet/pensando/ionic/ionic_fw.c
index 5f40324cd243..3c209c1a2337 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_fw.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_fw.c
@@ -109,6 +109,11 @@ int ionic_firmware_update(struct ionic_lif *lif, const struct firmware *fw,
dl = priv_to_devlink(ionic);
devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0);
+ if (!idev->dev_cmd_regs) {
+ err = -ENXIO;
+ goto err_out;
+ }
+
buf_sz = sizeof(idev->dev_cmd_regs->data);
netdev_dbg(netdev,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index cf2d5ad7b68c..7f0c6cdc375e 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -46,18 +46,26 @@ static int ionic_start_queues(struct ionic_lif *lif);
static void ionic_stop_queues(struct ionic_lif *lif);
static void ionic_lif_queue_identify(struct ionic_lif *lif);
+static int ionic_xdp_queues_config(struct ionic_lif *lif);
+static void ionic_xdp_unregister_rxq_info(struct ionic_queue *q);
+
static void ionic_dim_work(struct work_struct *work)
{
struct dim *dim = container_of(work, struct dim, work);
- struct ionic_intr_info *intr;
struct dim_cq_moder cur_moder;
+ struct ionic_intr_info *intr;
struct ionic_qcq *qcq;
struct ionic_lif *lif;
+ struct ionic_queue *q;
u32 new_coal;
- cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
qcq = container_of(dim, struct ionic_qcq, dim);
- lif = qcq->q.lif;
+ q = &qcq->q;
+ if (q->type == IONIC_QTYPE_RXQ)
+ cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+ else
+ cur_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+ lif = q->lif;
new_coal = ionic_coal_usec_to_hw(lif->ionic, cur_moder.usec);
new_coal = new_coal ? new_coal : 1;
@@ -422,10 +430,9 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
qcq->sg_base_pa = 0;
}
+ ionic_xdp_unregister_rxq_info(&qcq->q);
ionic_qcq_intr_free(lif, qcq);
- vfree(qcq->cq.info);
- qcq->cq.info = NULL;
vfree(qcq->q.info);
qcq->q.info = NULL;
}
@@ -529,14 +536,11 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
unsigned int num_descs, unsigned int desc_size,
unsigned int cq_desc_size,
unsigned int sg_desc_size,
+ unsigned int desc_info_size,
unsigned int pid, struct ionic_qcq **qcq)
{
struct ionic_dev *idev = &lif->ionic->idev;
struct device *dev = lif->ionic->dev;
- void *q_base, *cq_base, *sg_base;
- dma_addr_t cq_base_pa = 0;
- dma_addr_t sg_base_pa = 0;
- dma_addr_t q_base_pa = 0;
struct ionic_qcq *new;
int err;
@@ -552,7 +556,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
new->q.dev = dev;
new->flags = flags;
- new->q.info = vcalloc(num_descs, sizeof(*new->q.info));
+ new->q.info = vcalloc(num_descs, desc_info_size);
if (!new->q.info) {
netdev_err(lif->netdev, "Cannot allocate queue info\n");
err = -ENOMEM;
@@ -571,19 +575,12 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
err = ionic_alloc_qcq_interrupt(lif, new);
if (err)
- goto err_out;
-
- new->cq.info = vcalloc(num_descs, sizeof(*new->cq.info));
- if (!new->cq.info) {
- netdev_err(lif->netdev, "Cannot allocate completion queue info\n");
- err = -ENOMEM;
- goto err_out_free_irq;
- }
+ goto err_out_free_q_info;
err = ionic_cq_init(lif, &new->cq, &new->intr, num_descs, cq_desc_size);
if (err) {
netdev_err(lif->netdev, "Cannot initialize completion queue\n");
- goto err_out_free_cq_info;
+ goto err_out_free_irq;
}
if (flags & IONIC_QCQ_F_NOTIFYQ) {
@@ -601,16 +598,15 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
if (!new->q_base) {
netdev_err(lif->netdev, "Cannot allocate qcq DMA memory\n");
err = -ENOMEM;
- goto err_out_free_cq_info;
+ goto err_out_free_irq;
}
- q_base = PTR_ALIGN(new->q_base, PAGE_SIZE);
- q_base_pa = ALIGN(new->q_base_pa, PAGE_SIZE);
- ionic_q_map(&new->q, q_base, q_base_pa);
-
- cq_base = PTR_ALIGN(q_base + q_size, PAGE_SIZE);
- cq_base_pa = ALIGN(new->q_base_pa + q_size, PAGE_SIZE);
- ionic_cq_map(&new->cq, cq_base, cq_base_pa);
- ionic_cq_bind(&new->cq, &new->q);
+ new->q.base = PTR_ALIGN(new->q_base, PAGE_SIZE);
+ new->q.base_pa = ALIGN(new->q_base_pa, PAGE_SIZE);
+
+ /* Base the NotifyQ cq.base off of the ALIGNed q.base */
+ new->cq.base = PTR_ALIGN(new->q.base + q_size, PAGE_SIZE);
+ new->cq.base_pa = ALIGN(new->q_base_pa + q_size, PAGE_SIZE);
+ new->cq.bound_q = &new->q;
} else {
/* regular DMA q descriptors */
new->q_size = PAGE_SIZE + (num_descs * desc_size);
@@ -619,11 +615,10 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
if (!new->q_base) {
netdev_err(lif->netdev, "Cannot allocate queue DMA memory\n");
err = -ENOMEM;
- goto err_out_free_cq_info;
+ goto err_out_free_irq;
}
- q_base = PTR_ALIGN(new->q_base, PAGE_SIZE);
- q_base_pa = ALIGN(new->q_base_pa, PAGE_SIZE);
- ionic_q_map(&new->q, q_base, q_base_pa);
+ new->q.base = PTR_ALIGN(new->q_base, PAGE_SIZE);
+ new->q.base_pa = ALIGN(new->q_base_pa, PAGE_SIZE);
if (flags & IONIC_QCQ_F_CMB_RINGS) {
/* on-chip CMB q descriptors */
@@ -648,7 +643,8 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
}
new->cmb_q_base_pa -= idev->phy_cmb_pages;
- ionic_q_cmb_map(&new->q, new->cmb_q_base, new->cmb_q_base_pa);
+ new->q.cmb_base = new->cmb_q_base;
+ new->q.cmb_base_pa = new->cmb_q_base_pa;
}
/* cq DMA descriptors */
@@ -660,10 +656,9 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
err = -ENOMEM;
goto err_out_free_q;
}
- cq_base = PTR_ALIGN(new->cq_base, PAGE_SIZE);
- cq_base_pa = ALIGN(new->cq_base_pa, PAGE_SIZE);
- ionic_cq_map(&new->cq, cq_base, cq_base_pa);
- ionic_cq_bind(&new->cq, &new->q);
+ new->cq.base = PTR_ALIGN(new->cq_base, PAGE_SIZE);
+ new->cq.base_pa = ALIGN(new->cq_base_pa, PAGE_SIZE);
+ new->cq.bound_q = &new->q;
}
if (flags & IONIC_QCQ_F_SG) {
@@ -675,13 +670,12 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
err = -ENOMEM;
goto err_out_free_cq;
}
- sg_base = PTR_ALIGN(new->sg_base, PAGE_SIZE);
- sg_base_pa = ALIGN(new->sg_base_pa, PAGE_SIZE);
- ionic_q_sg_map(&new->q, sg_base, sg_base_pa);
+ new->q.sg_base = PTR_ALIGN(new->sg_base, PAGE_SIZE);
+ new->q.sg_base_pa = ALIGN(new->sg_base_pa, PAGE_SIZE);
}
INIT_WORK(&new->dim.work, ionic_dim_work);
- new->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ new->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
*qcq = new;
@@ -695,8 +689,6 @@ err_out_free_q:
ionic_put_cmb(lif, new->cmb_pgid, new->cmb_order);
}
dma_free_coherent(dev, new->q_size, new->q_base, new->q_base_pa);
-err_out_free_cq_info:
- vfree(new->cq.info);
err_out_free_irq:
if (flags & IONIC_QCQ_F_INTR) {
devm_free_irq(dev, new->intr.vector, &new->napi);
@@ -722,7 +714,9 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif)
IONIC_ADMINQ_LENGTH,
sizeof(struct ionic_admin_cmd),
sizeof(struct ionic_admin_comp),
- 0, lif->kern_pid, &lif->adminqcq);
+ 0,
+ sizeof(struct ionic_admin_desc_info),
+ lif->kern_pid, &lif->adminqcq);
if (err)
return err;
ionic_debugfs_add_qcq(lif, lif->adminqcq);
@@ -733,7 +727,9 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif)
flags, IONIC_NOTIFYQ_LENGTH,
sizeof(struct ionic_notifyq_cmd),
sizeof(union ionic_notifyq_comp),
- 0, lif->kern_pid, &lif->notifyqcq);
+ 0,
+ sizeof(struct ionic_admin_desc_info),
+ lif->kern_pid, &lif->notifyqcq);
if (err)
goto err_out;
ionic_debugfs_add_qcq(lif, lif->notifyqcq);
@@ -862,8 +858,7 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
.type = q->type,
.ver = lif->qtype_info[q->type].version,
.index = cpu_to_le32(q->index),
- .flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
- IONIC_QINIT_F_SG),
+ .flags = cpu_to_le16(IONIC_QINIT_F_IRQ),
.intr_index = cpu_to_le16(cq->bound_intr->index),
.pid = cpu_to_le16(q->pid),
.ring_size = ilog2(q->num_descs),
@@ -875,6 +870,13 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
};
int err;
+ q->partner = &lif->txqcqs[q->index]->q;
+ q->partner->partner = q;
+
+ if (!lif->xdp_prog ||
+ (lif->xdp_prog->aux && lif->xdp_prog->aux->xdp_has_frags))
+ ctx.cmd.q_init.flags |= cpu_to_le16(IONIC_QINIT_F_SG);
+
if (qcq->flags & IONIC_QCQ_F_CMB_RINGS) {
ctx.cmd.q_init.flags |= cpu_to_le16(IONIC_QINIT_F_CMB);
ctx.cmd.q_init.ring_base = cpu_to_le64(qcq->cmb_q_base_pa);
@@ -945,6 +947,7 @@ int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif)
err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, txq_i, "hwstamp_tx", flags,
num_desc, desc_sz, comp_sz, sg_desc_sz,
+ sizeof(struct ionic_tx_desc_info),
lif->kern_pid, &txq);
if (err)
goto err_qcq_alloc;
@@ -1004,6 +1007,7 @@ int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif)
err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, rxq_i, "hwstamp_rx", flags,
num_desc, desc_sz, comp_sz, sg_desc_sz,
+ sizeof(struct ionic_rx_desc_info),
lif->kern_pid, &rxq);
if (err)
goto err_qcq_alloc;
@@ -1157,71 +1161,6 @@ int ionic_lif_set_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class)
return ionic_lif_add_hwstamp_rxfilt(lif, pkt_class);
}
-static bool ionic_notifyq_service(struct ionic_cq *cq,
- struct ionic_cq_info *cq_info)
-{
- union ionic_notifyq_comp *comp = cq_info->cq_desc;
- struct ionic_deferred_work *work;
- struct net_device *netdev;
- struct ionic_queue *q;
- struct ionic_lif *lif;
- u64 eid;
-
- q = cq->bound_q;
- lif = q->info[0].cb_arg;
- netdev = lif->netdev;
- eid = le64_to_cpu(comp->event.eid);
-
- /* Have we run out of new completions to process? */
- if ((s64)(eid - lif->last_eid) <= 0)
- return false;
-
- lif->last_eid = eid;
-
- dev_dbg(lif->ionic->dev, "notifyq event:\n");
- dynamic_hex_dump("event ", DUMP_PREFIX_OFFSET, 16, 1,
- comp, sizeof(*comp), true);
-
- switch (le16_to_cpu(comp->event.ecode)) {
- case IONIC_EVENT_LINK_CHANGE:
- ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
- break;
- case IONIC_EVENT_RESET:
- if (lif->ionic->idev.fw_status_ready &&
- !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
- !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work) {
- netdev_err(lif->netdev, "Reset event dropped\n");
- clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
- } else {
- work->type = IONIC_DW_TYPE_LIF_RESET;
- ionic_lif_deferred_enqueue(&lif->deferred, work);
- }
- }
- break;
- default:
- netdev_warn(netdev, "Notifyq event ecode=%d eid=%lld\n",
- comp->event.ecode, eid);
- break;
- }
-
- return true;
-}
-
-static bool ionic_adminq_service(struct ionic_cq *cq,
- struct ionic_cq_info *cq_info)
-{
- struct ionic_admin_comp *comp = cq_info->cq_desc;
-
- if (!color_match(comp->color, cq->done_color))
- return false;
-
- ionic_q_service(cq->bound_q, cq_info, le16_to_cpu(comp->comp_index));
-
- return true;
-}
-
static int ionic_adminq_napi(struct napi_struct *napi, int budget)
{
struct ionic_intr_info *intr = napi_to_cq(napi)->bound_intr;
@@ -1252,8 +1191,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
ionic_rx_service, NULL, NULL);
if (lif->hwstamp_txq)
- tx_work = ionic_cq_service(&lif->hwstamp_txq->cq, budget,
- ionic_tx_service, NULL, NULL);
+ tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget);
work_done = max(max(n_work, a_work), max(rx_work, tx_work));
if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -1640,6 +1578,12 @@ static int ionic_init_nic_features(struct ionic_lif *lif)
netdev->priv_flags |= IFF_UNICAST_FLT |
IFF_LIVE_ADDR_CHANGE;
+ netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
+ NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG |
+ NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG;
+
return 0;
}
@@ -1777,6 +1721,21 @@ static int ionic_start_queues_reconfig(struct ionic_lif *lif)
return err;
}
+static bool ionic_xdp_is_valid_mtu(struct ionic_lif *lif, u32 mtu,
+ struct bpf_prog *xdp_prog)
+{
+ if (!xdp_prog)
+ return true;
+
+ if (mtu <= IONIC_XDP_MAX_LINEAR_MTU)
+ return true;
+
+ if (xdp_prog->aux && xdp_prog->aux->xdp_has_frags)
+ return true;
+
+ return false;
+}
+
static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
{
struct ionic_lif *lif = netdev_priv(netdev);
@@ -1789,8 +1748,13 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
.mtu = cpu_to_le32(new_mtu),
},
};
+ struct bpf_prog *xdp_prog;
int err;
+ xdp_prog = READ_ONCE(lif->xdp_prog);
+ if (!ionic_xdp_is_valid_mtu(lif, new_mtu, xdp_prog))
+ return -EINVAL;
+
err = ionic_adminq_post_wait(lif, &ctx);
if (err)
return err;
@@ -2070,6 +2034,7 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
for (i = 0; i < lif->nxqs; i++) {
err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
num_desc, desc_sz, comp_sz, sg_desc_sz,
+ sizeof(struct ionic_tx_desc_info),
lif->kern_pid, &lif->txqcqs[i]);
if (err)
goto err_out;
@@ -2101,6 +2066,7 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
for (i = 0; i < lif->nxqs; i++) {
err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
num_desc, desc_sz, comp_sz, sg_desc_sz,
+ sizeof(struct ionic_rx_desc_info),
lif->kern_pid, &lif->rxqcqs[i]);
if (err)
goto err_out;
@@ -2166,6 +2132,10 @@ static int ionic_txrx_enable(struct ionic_lif *lif)
int derr = 0;
int i, err;
+ err = ionic_xdp_queues_config(lif);
+ if (err)
+ return err;
+
for (i = 0; i < lif->nxqs; i++) {
if (!(lif->rxqcqs[i] && lif->txqcqs[i])) {
dev_err(lif->ionic->dev, "%s: bad qcq %d\n", __func__, i);
@@ -2211,6 +2181,8 @@ err_out:
derr = ionic_qcq_disable(lif, lif->rxqcqs[i], derr);
}
+ ionic_xdp_queues_config(lif);
+
return err;
}
@@ -2668,11 +2640,151 @@ static void ionic_vf_attr_replay(struct ionic_lif *lif)
ionic_vf_start(ionic);
}
+static void ionic_xdp_unregister_rxq_info(struct ionic_queue *q)
+{
+ struct xdp_rxq_info *xi;
+
+ if (!q->xdp_rxq_info)
+ return;
+
+ xi = q->xdp_rxq_info;
+ q->xdp_rxq_info = NULL;
+
+ xdp_rxq_info_unreg(xi);
+ kfree(xi);
+}
+
+static int ionic_xdp_register_rxq_info(struct ionic_queue *q, unsigned int napi_id)
+{
+ struct xdp_rxq_info *rxq_info;
+ int err;
+
+ rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL);
+ if (!rxq_info)
+ return -ENOMEM;
+
+ err = xdp_rxq_info_reg(rxq_info, q->lif->netdev, q->index, napi_id);
+ if (err) {
+ dev_err(q->dev, "Queue %d xdp_rxq_info_reg failed, err %d\n",
+ q->index, err);
+ goto err_out;
+ }
+
+ err = xdp_rxq_info_reg_mem_model(rxq_info, MEM_TYPE_PAGE_ORDER0, NULL);
+ if (err) {
+ dev_err(q->dev, "Queue %d xdp_rxq_info_reg_mem_model failed, err %d\n",
+ q->index, err);
+ xdp_rxq_info_unreg(rxq_info);
+ goto err_out;
+ }
+
+ q->xdp_rxq_info = rxq_info;
+
+ return 0;
+
+err_out:
+ kfree(rxq_info);
+ return err;
+}
+
+static int ionic_xdp_queues_config(struct ionic_lif *lif)
+{
+ unsigned int i;
+ int err;
+
+ if (!lif->rxqcqs)
+ return 0;
+
+ /* There's no need to rework memory if not going to/from NULL program.
+ * If there is no lif->xdp_prog, there should also be no q.xdp_rxq_info
+ * This way we don't need to keep an *xdp_prog in every queue struct.
+ */
+ if (!lif->xdp_prog == !lif->rxqcqs[0]->q.xdp_rxq_info)
+ return 0;
+
+ for (i = 0; i < lif->ionic->nrxqs_per_lif && lif->rxqcqs[i]; i++) {
+ struct ionic_queue *q = &lif->rxqcqs[i]->q;
+
+ if (q->xdp_rxq_info) {
+ ionic_xdp_unregister_rxq_info(q);
+ continue;
+ }
+
+ err = ionic_xdp_register_rxq_info(q, lif->rxqcqs[i]->napi.napi_id);
+ if (err) {
+ dev_err(lif->ionic->dev, "failed to register RX queue %d info for XDP, err %d\n",
+ i, err);
+ goto err_out;
+ }
+ }
+
+ return 0;
+
+err_out:
+ for (i = 0; i < lif->ionic->nrxqs_per_lif && lif->rxqcqs[i]; i++)
+ ionic_xdp_unregister_rxq_info(&lif->rxqcqs[i]->q);
+
+ return err;
+}
+
+static int ionic_xdp_config(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct bpf_prog *old_prog;
+ u32 maxfs;
+
+ if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state)) {
+#define XDP_ERR_SPLIT "XDP not available with split Tx/Rx interrupts"
+ NL_SET_ERR_MSG_MOD(bpf->extack, XDP_ERR_SPLIT);
+ netdev_info(lif->netdev, XDP_ERR_SPLIT);
+ return -EOPNOTSUPP;
+ }
+
+ if (!ionic_xdp_is_valid_mtu(lif, netdev->mtu, bpf->prog)) {
+#define XDP_ERR_MTU "MTU is too large for XDP without frags support"
+ NL_SET_ERR_MSG_MOD(bpf->extack, XDP_ERR_MTU);
+ netdev_info(lif->netdev, XDP_ERR_MTU);
+ return -EINVAL;
+ }
+
+ maxfs = __le32_to_cpu(lif->identity->eth.max_frame_size) - VLAN_ETH_HLEN;
+ if (bpf->prog && !(bpf->prog->aux && bpf->prog->aux->xdp_has_frags))
+ maxfs = min_t(u32, maxfs, IONIC_XDP_MAX_LINEAR_MTU);
+ netdev->max_mtu = maxfs;
+
+ if (!netif_running(netdev)) {
+ old_prog = xchg(&lif->xdp_prog, bpf->prog);
+ } else {
+ mutex_lock(&lif->queue_lock);
+ ionic_stop_queues_reconfig(lif);
+ old_prog = xchg(&lif->xdp_prog, bpf->prog);
+ ionic_start_queues_reconfig(lif);
+ mutex_unlock(&lif->queue_lock);
+ }
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ return 0;
+}
+
+static int ionic_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ return ionic_xdp_config(netdev, bpf);
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops ionic_netdev_ops = {
.ndo_open = ionic_open,
.ndo_stop = ionic_stop,
.ndo_eth_ioctl = ionic_eth_ioctl,
.ndo_start_xmit = ionic_start_xmit,
+ .ndo_bpf = ionic_xdp,
+ .ndo_xdp_xmit = ionic_xdp_xmit,
.ndo_get_stats64 = ionic_get_stats64,
.ndo_set_rx_mode = ionic_ndo_set_rx_mode,
.ndo_set_features = ionic_set_features,
@@ -2755,6 +2867,8 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
swap(a->q.base, b->q.base);
swap(a->q.base_pa, b->q.base_pa);
swap(a->q.info, b->q.info);
+ swap(a->q.xdp_rxq_info, b->q.xdp_rxq_info);
+ swap(a->q.partner, b->q.partner);
swap(a->q_base, b->q_base);
swap(a->q_base_pa, b->q_base_pa);
swap(a->q_size, b->q_size);
@@ -2770,7 +2884,6 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
swap(a->cq.desc_size, b->cq.desc_size);
swap(a->cq.base, b->cq.base);
swap(a->cq.base_pa, b->cq.base_pa);
- swap(a->cq.info, b->cq.info);
swap(a->cq_base, b->cq_base);
swap(a->cq_base_pa, b->cq_base_pa);
swap(a->cq_size, b->cq_size);
@@ -2834,6 +2947,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG;
err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
4, desc_sz, comp_sz, sg_desc_sz,
+ sizeof(struct ionic_tx_desc_info),
lif->kern_pid, &lif->txqcqs[i]);
if (err)
goto err_out;
@@ -2842,6 +2956,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
flags = lif->txqcqs[i]->flags & ~IONIC_QCQ_F_INTR;
err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
num_desc, desc_sz, comp_sz, sg_desc_sz,
+ sizeof(struct ionic_tx_desc_info),
lif->kern_pid, &tx_qcqs[i]);
if (err)
goto err_out;
@@ -2863,6 +2978,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_SG;
err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
4, desc_sz, comp_sz, sg_desc_sz,
+ sizeof(struct ionic_rx_desc_info),
lif->kern_pid, &lif->rxqcqs[i]);
if (err)
goto err_out;
@@ -2871,6 +2987,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
flags = lif->rxqcqs[i]->flags & ~IONIC_QCQ_F_INTR;
err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
num_desc, desc_sz, comp_sz, sg_desc_sz,
+ sizeof(struct ionic_rx_desc_info),
lif->kern_pid, &rx_qcqs[i]);
if (err)
goto err_out;
@@ -3391,9 +3508,12 @@ static int ionic_lif_adminq_init(struct ionic_lif *lif)
napi_enable(&qcq->napi);
- if (qcq->flags & IONIC_QCQ_F_INTR)
+ if (qcq->flags & IONIC_QCQ_F_INTR) {
+ irq_set_affinity_hint(qcq->intr.vector,
+ &qcq->intr.affinity_mask);
ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
IONIC_INTR_MASK_CLEAR);
+ }
qcq->flags |= IONIC_QCQ_F_INITED;
@@ -3442,7 +3562,7 @@ static int ionic_lif_notifyq_init(struct ionic_lif *lif)
dev_dbg(dev, "notifyq->hw_index %d\n", q->hw_index);
/* preset the callback info */
- q->info[0].cb_arg = lif;
+ q->admin_info[0].ctx = lif;
qcq->flags |= IONIC_QCQ_F_INITED;
@@ -3559,7 +3679,10 @@ int ionic_lif_init(struct ionic_lif *lif)
goto err_out_notifyq_deinit;
}
- err = ionic_init_nic_features(lif);
+ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ err = ionic_set_nic_features(lif, lif->netdev->features);
+ else
+ err = ionic_init_nic_features(lif);
if (err)
goto err_out_notifyq_deinit;
@@ -3691,6 +3814,7 @@ static void ionic_lif_queue_identify(struct ionic_lif *lif)
union ionic_q_identity __iomem *q_ident;
struct ionic *ionic = lif->ionic;
struct ionic_dev *idev;
+ u16 max_frags;
int qtype;
int err;
@@ -3758,17 +3882,16 @@ static void ionic_lif_queue_identify(struct ionic_lif *lif)
dev_dbg(ionic->dev, " qtype[%d].sg_desc_stride = %d\n",
qtype, qti->sg_desc_stride);
- if (qti->max_sg_elems >= IONIC_MAX_FRAGS) {
- qti->max_sg_elems = IONIC_MAX_FRAGS - 1;
- dev_dbg(ionic->dev, "limiting qtype %d max_sg_elems to IONIC_MAX_FRAGS-1 %d\n",
- qtype, qti->max_sg_elems);
- }
+ if (qtype == IONIC_QTYPE_TXQ)
+ max_frags = IONIC_TX_MAX_FRAGS;
+ else if (qtype == IONIC_QTYPE_RXQ)
+ max_frags = IONIC_RX_MAX_FRAGS;
+ else
+ max_frags = 1;
- if (qti->max_sg_elems > MAX_SKB_FRAGS) {
- qti->max_sg_elems = MAX_SKB_FRAGS;
- dev_dbg(ionic->dev, "limiting qtype %d max_sg_elems to MAX_SKB_FRAGS %d\n",
- qtype, qti->max_sg_elems);
- }
+ qti->max_sg_elems = min_t(u16, max_frags - 1, MAX_SKB_FRAGS);
+ dev_dbg(ionic->dev, "qtype %d max_sg_elems %d\n",
+ qtype, qti->max_sg_elems);
}
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 61548b3eea93..08f4266fe2aa 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -37,6 +37,7 @@ struct ionic_tx_stats {
u64 dma_map_err;
u64 hwstamp_valid;
u64 hwstamp_invalid;
+ u64 xdp_frames;
};
struct ionic_rx_stats {
@@ -51,6 +52,11 @@ struct ionic_rx_stats {
u64 alloc_err;
u64 hwstamp_valid;
u64 hwstamp_invalid;
+ u64 xdp_drop;
+ u64 xdp_aborted;
+ u64 xdp_pass;
+ u64 xdp_tx;
+ u64 xdp_redirect;
};
#define IONIC_QCQ_F_INITED BIT(0)
@@ -65,25 +71,25 @@ struct ionic_qcq {
void *q_base;
dma_addr_t q_base_pa;
u32 q_size;
+ u32 cq_size;
void *cq_base;
dma_addr_t cq_base_pa;
- u32 cq_size;
void *sg_base;
dma_addr_t sg_base_pa;
u32 sg_size;
+ unsigned int flags;
void __iomem *cmb_q_base;
phys_addr_t cmb_q_base_pa;
u32 cmb_q_size;
u32 cmb_pgid;
u32 cmb_order;
struct dim dim;
+ struct timer_list napi_deadline;
struct ionic_queue q;
struct ionic_cq cq;
- struct ionic_intr_info intr;
- struct timer_list napi_deadline;
struct napi_struct napi;
- unsigned int flags;
struct ionic_qcq *napi_qcq;
+ struct ionic_intr_info intr;
struct dentry *dentry;
};
@@ -135,6 +141,12 @@ struct ionic_lif_sw_stats {
u64 hw_rx_over_errors;
u64 hw_rx_missed_errors;
u64 hw_tx_aborted_errors;
+ u64 xdp_drop;
+ u64 xdp_aborted;
+ u64 xdp_pass;
+ u64 xdp_tx;
+ u64 xdp_redirect;
+ u64 xdp_frames;
};
enum ionic_lif_state_flags {
@@ -230,6 +242,7 @@ struct ionic_lif {
struct ionic_phc *phc;
struct dentry *dentry;
+ struct bpf_prog *xdp_prog;
};
struct ionic_phc {
@@ -314,7 +327,7 @@ static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
static inline bool ionic_txq_hwstamp_enabled(struct ionic_queue *q)
{
- return unlikely(q->features & IONIC_TXQ_F_HWSTAMP);
+ return q->features & IONIC_TXQ_F_HWSTAMP;
}
void ionic_link_status_check_request(struct ionic_lif *lif, bool can_sleep);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 165ab08ad2dd..c1259324b0be 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -15,7 +15,7 @@
#include "ionic_debugfs.h"
MODULE_DESCRIPTION(IONIC_DRV_DESCRIPTION);
-MODULE_AUTHOR("Pensando Systems, Inc");
+MODULE_AUTHOR("Shannon Nelson <shannon.nelson@amd.com>");
MODULE_LICENSE("GPL");
static const char *ionic_error_to_str(enum ionic_status_code code)
@@ -190,7 +190,8 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
static void ionic_adminq_flush(struct ionic_lif *lif)
{
- struct ionic_desc_info *desc_info;
+ struct ionic_admin_desc_info *desc_info;
+ struct ionic_admin_cmd *desc;
unsigned long irqflags;
struct ionic_queue *q;
@@ -203,10 +204,10 @@ static void ionic_adminq_flush(struct ionic_lif *lif)
q = &lif->adminqcq->q;
while (q->tail_idx != q->head_idx) {
- desc_info = &q->info[q->tail_idx];
- memset(desc_info->desc, 0, sizeof(union ionic_adminq_cmd));
- desc_info->cb = NULL;
- desc_info->cb_arg = NULL;
+ desc = &q->adminq[q->tail_idx];
+ desc_info = &q->admin_info[q->tail_idx];
+ memset(desc, 0, sizeof(union ionic_adminq_cmd));
+ desc_info->ctx = NULL;
q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
}
spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
@@ -246,25 +247,93 @@ static int ionic_adminq_check_err(struct ionic_lif *lif,
return err;
}
-static void ionic_adminq_cb(struct ionic_queue *q,
- struct ionic_desc_info *desc_info,
- struct ionic_cq_info *cq_info, void *cb_arg)
+bool ionic_notifyq_service(struct ionic_cq *cq)
{
- struct ionic_admin_ctx *ctx = cb_arg;
+ struct ionic_deferred_work *work;
+ union ionic_notifyq_comp *comp;
+ struct net_device *netdev;
+ struct ionic_queue *q;
+ struct ionic_lif *lif;
+ u64 eid;
+
+ comp = &((union ionic_notifyq_comp *)cq->base)[cq->tail_idx];
+
+ q = cq->bound_q;
+ lif = q->admin_info[0].ctx;
+ netdev = lif->netdev;
+ eid = le64_to_cpu(comp->event.eid);
+
+ /* Have we run out of new completions to process? */
+ if ((s64)(eid - lif->last_eid) <= 0)
+ return false;
+
+ lif->last_eid = eid;
+
+ dev_dbg(lif->ionic->dev, "notifyq event:\n");
+ dynamic_hex_dump("event ", DUMP_PREFIX_OFFSET, 16, 1,
+ comp, sizeof(*comp), true);
+
+ switch (le16_to_cpu(comp->event.ecode)) {
+ case IONIC_EVENT_LINK_CHANGE:
+ ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
+ break;
+ case IONIC_EVENT_RESET:
+ if (lif->ionic->idev.fw_status_ready &&
+ !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+ !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ netdev_err(lif->netdev, "Reset event dropped\n");
+ clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
+ } else {
+ work->type = IONIC_DW_TYPE_LIF_RESET;
+ ionic_lif_deferred_enqueue(&lif->deferred, work);
+ }
+ }
+ break;
+ default:
+ netdev_warn(netdev, "Notifyq event ecode=%d eid=%lld\n",
+ comp->event.ecode, eid);
+ break;
+ }
+
+ return true;
+}
+
+bool ionic_adminq_service(struct ionic_cq *cq)
+{
+ struct ionic_admin_desc_info *desc_info;
+ struct ionic_queue *q = cq->bound_q;
struct ionic_admin_comp *comp;
+ u16 index;
- if (!ctx)
- return;
+ comp = &((struct ionic_admin_comp *)cq->base)[cq->tail_idx];
- comp = cq_info->cq_desc;
+ if (!color_match(comp->color, cq->done_color))
+ return false;
- memcpy(&ctx->comp, comp, sizeof(*comp));
+ /* check for empty queue */
+ if (q->tail_idx == q->head_idx)
+ return false;
+
+ do {
+ desc_info = &q->admin_info[q->tail_idx];
+ index = q->tail_idx;
+ q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
+ if (likely(desc_info->ctx)) {
+ struct ionic_admin_ctx *ctx = desc_info->ctx;
- dev_dbg(q->dev, "comp admin queue command:\n");
- dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1,
- &ctx->comp, sizeof(ctx->comp), true);
+ memcpy(&ctx->comp, comp, sizeof(*comp));
- complete_all(&ctx->work);
+ dev_dbg(q->dev, "comp admin queue command:\n");
+ dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1,
+ &ctx->comp, sizeof(ctx->comp), true);
+ complete_all(&ctx->work);
+ desc_info->ctx = NULL;
+ }
+ } while (index != le16_to_cpu(comp->comp_index));
+
+ return true;
}
bool ionic_adminq_poke_doorbell(struct ionic_queue *q)
@@ -298,7 +367,8 @@ bool ionic_adminq_poke_doorbell(struct ionic_queue *q)
int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
{
- struct ionic_desc_info *desc_info;
+ struct ionic_admin_desc_info *desc_info;
+ struct ionic_admin_cmd *desc;
unsigned long irqflags;
struct ionic_queue *q;
int err = 0;
@@ -320,14 +390,17 @@ int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
if (err)
goto err_out;
- desc_info = &q->info[q->head_idx];
- memcpy(desc_info->desc, &ctx->cmd, sizeof(ctx->cmd));
+ desc_info = &q->admin_info[q->head_idx];
+ desc_info->ctx = ctx;
+
+ desc = &q->adminq[q->head_idx];
+ memcpy(desc, &ctx->cmd, sizeof(ctx->cmd));
dev_dbg(&lif->netdev->dev, "post admin queue command:\n");
dynamic_hex_dump("cmd ", DUMP_PREFIX_OFFSET, 16, 1,
&ctx->cmd, sizeof(ctx->cmd), true);
- ionic_q_post(q, true, ionic_adminq_cb, ctx);
+ ionic_q_post(q, true);
err_out:
spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
@@ -416,6 +489,9 @@ static void ionic_dev_cmd_clean(struct ionic *ionic)
{
struct ionic_dev *idev = &ionic->idev;
+ if (!idev->dev_cmd_regs)
+ return;
+
iowrite32(0, &idev->dev_cmd_regs->doorbell);
memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd));
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index 1f6022fb7679..0107599a9dd4 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -27,6 +27,12 @@ static const struct ionic_stat_desc ionic_lif_stats_desc[] = {
IONIC_LIF_STAT_DESC(hw_rx_over_errors),
IONIC_LIF_STAT_DESC(hw_rx_missed_errors),
IONIC_LIF_STAT_DESC(hw_tx_aborted_errors),
+ IONIC_LIF_STAT_DESC(xdp_drop),
+ IONIC_LIF_STAT_DESC(xdp_aborted),
+ IONIC_LIF_STAT_DESC(xdp_pass),
+ IONIC_LIF_STAT_DESC(xdp_tx),
+ IONIC_LIF_STAT_DESC(xdp_redirect),
+ IONIC_LIF_STAT_DESC(xdp_frames),
};
static const struct ionic_stat_desc ionic_port_stats_desc[] = {
@@ -135,6 +141,7 @@ static const struct ionic_stat_desc ionic_tx_stats_desc[] = {
IONIC_TX_STAT_DESC(csum_none),
IONIC_TX_STAT_DESC(csum),
IONIC_TX_STAT_DESC(vlan_inserted),
+ IONIC_TX_STAT_DESC(xdp_frames),
};
static const struct ionic_stat_desc ionic_rx_stats_desc[] = {
@@ -149,6 +156,11 @@ static const struct ionic_stat_desc ionic_rx_stats_desc[] = {
IONIC_RX_STAT_DESC(hwstamp_invalid),
IONIC_RX_STAT_DESC(dropped),
IONIC_RX_STAT_DESC(vlan_stripped),
+ IONIC_RX_STAT_DESC(xdp_drop),
+ IONIC_RX_STAT_DESC(xdp_aborted),
+ IONIC_RX_STAT_DESC(xdp_pass),
+ IONIC_RX_STAT_DESC(xdp_tx),
+ IONIC_RX_STAT_DESC(xdp_redirect),
};
#define IONIC_NUM_LIF_STATS ARRAY_SIZE(ionic_lif_stats_desc)
@@ -171,6 +183,7 @@ static void ionic_add_lif_txq_stats(struct ionic_lif *lif, int q_num,
stats->tx_csum += txstats->csum;
stats->tx_hwstamp_valid += txstats->hwstamp_valid;
stats->tx_hwstamp_invalid += txstats->hwstamp_invalid;
+ stats->xdp_frames += txstats->xdp_frames;
}
static void ionic_add_lif_rxq_stats(struct ionic_lif *lif, int q_num,
@@ -185,6 +198,11 @@ static void ionic_add_lif_rxq_stats(struct ionic_lif *lif, int q_num,
stats->rx_csum_error += rxstats->csum_error;
stats->rx_hwstamp_valid += rxstats->hwstamp_valid;
stats->rx_hwstamp_invalid += rxstats->hwstamp_invalid;
+ stats->xdp_drop += rxstats->xdp_drop;
+ stats->xdp_aborted += rxstats->xdp_aborted;
+ stats->xdp_pass += rxstats->xdp_pass;
+ stats->xdp_tx += rxstats->xdp_tx;
+ stats->xdp_redirect += rxstats->xdp_redirect;
}
static void ionic_get_lif_stats(struct ionic_lif *lif,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 54cd96b035d6..5dba6d2d633c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -5,27 +5,40 @@
#include <linux/ipv6.h>
#include <linux/if_vlan.h>
#include <net/ip6_checksum.h>
+#include <net/netdev_queues.h>
#include "ionic.h"
#include "ionic_lif.h"
#include "ionic_txrx.h"
-static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell,
- ionic_desc_cb cb_func, void *cb_arg)
+static dma_addr_t ionic_tx_map_single(struct ionic_queue *q,
+ void *data, size_t len);
+
+static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q,
+ const skb_frag_t *frag,
+ size_t offset, size_t len);
+
+static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
+ struct ionic_tx_desc_info *desc_info);
+
+static void ionic_tx_clean(struct ionic_queue *q,
+ struct ionic_tx_desc_info *desc_info,
+ struct ionic_txq_comp *comp);
+
+static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell)
{
- ionic_q_post(q, ring_dbell, cb_func, cb_arg);
+ ionic_q_post(q, ring_dbell);
}
-static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell,
- ionic_desc_cb cb_func, void *cb_arg)
+static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell)
{
- ionic_q_post(q, ring_dbell, cb_func, cb_arg);
+ ionic_q_post(q, ring_dbell);
}
bool ionic_txq_poke_doorbell(struct ionic_queue *q)
{
- unsigned long now, then, dif;
struct netdev_queue *netdev_txq;
+ unsigned long now, then, dif;
struct net_device *netdev;
netdev = q->lif->netdev;
@@ -83,46 +96,61 @@ bool ionic_rxq_poke_doorbell(struct ionic_queue *q)
return true;
}
-static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
+static inline struct ionic_txq_sg_elem *ionic_tx_sg_elems(struct ionic_queue *q)
+{
+ if (likely(q->sg_desc_size == sizeof(struct ionic_txq_sg_desc_v1)))
+ return q->txq_sgl_v1[q->head_idx].elems;
+ else
+ return q->txq_sgl[q->head_idx].elems;
+}
+
+static inline struct netdev_queue *q_to_ndq(struct net_device *netdev,
+ struct ionic_queue *q)
{
- return netdev_get_tx_queue(q->lif->netdev, q->index);
+ return netdev_get_tx_queue(netdev, q->index);
+}
+
+static void *ionic_rx_buf_va(struct ionic_buf_info *buf_info)
+{
+ return page_address(buf_info->page) + buf_info->page_offset;
+}
+
+static dma_addr_t ionic_rx_buf_pa(struct ionic_buf_info *buf_info)
+{
+ return buf_info->dma_addr + buf_info->page_offset;
+}
+
+static unsigned int ionic_rx_buf_size(struct ionic_buf_info *buf_info)
+{
+ return min_t(u32, IONIC_MAX_BUF_LEN, IONIC_PAGE_SIZE - buf_info->page_offset);
}
static int ionic_rx_page_alloc(struct ionic_queue *q,
struct ionic_buf_info *buf_info)
{
- struct net_device *netdev = q->lif->netdev;
- struct ionic_rx_stats *stats;
- struct device *dev;
+ struct device *dev = q->dev;
+ dma_addr_t dma_addr;
struct page *page;
- dev = q->dev;
- stats = q_to_rx_stats(q);
-
- if (unlikely(!buf_info)) {
- net_err_ratelimited("%s: %s invalid buf_info in alloc\n",
- netdev->name, q->name);
- return -EINVAL;
- }
-
page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
if (unlikely(!page)) {
net_err_ratelimited("%s: %s page alloc failed\n",
- netdev->name, q->name);
- stats->alloc_err++;
+ dev_name(dev), q->name);
+ q_to_rx_stats(q)->alloc_err++;
return -ENOMEM;
}
- buf_info->dma_addr = dma_map_page(dev, page, 0,
- IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
+ dma_addr = dma_map_page(dev, page, 0,
+ IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(dev, dma_addr))) {
__free_pages(page, 0);
net_err_ratelimited("%s: %s dma map failed\n",
- netdev->name, q->name);
- stats->dma_map_err++;
+ dev_name(dev), q->name);
+ q_to_rx_stats(q)->dma_map_err++;
return -EIO;
}
+ buf_info->dma_addr = dma_addr;
buf_info->page = page;
buf_info->page_offset = 0;
@@ -132,12 +160,11 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
static void ionic_rx_page_free(struct ionic_queue *q,
struct ionic_buf_info *buf_info)
{
- struct net_device *netdev = q->lif->netdev;
struct device *dev = q->dev;
if (unlikely(!buf_info)) {
net_err_ratelimited("%s: %s invalid buf_info in free\n",
- netdev->name, q->name);
+ dev_name(dev), q->name);
return;
}
@@ -150,7 +177,7 @@ static void ionic_rx_page_free(struct ionic_queue *q,
}
static bool ionic_rx_buf_recycle(struct ionic_queue *q,
- struct ionic_buf_info *buf_info, u32 used)
+ struct ionic_buf_info *buf_info, u32 len)
{
u32 size;
@@ -162,7 +189,7 @@ static bool ionic_rx_buf_recycle(struct ionic_queue *q,
if (page_to_nid(buf_info->page) != numa_mem_id())
return false;
- size = ALIGN(used, IONIC_PAGE_SPLIT_SZ);
+ size = ALIGN(len, q->xdp_rxq_info ? IONIC_PAGE_SIZE : IONIC_PAGE_SPLIT_SZ);
buf_info->page_offset += size;
if (buf_info->page_offset >= IONIC_PAGE_SIZE)
return false;
@@ -172,88 +199,96 @@ static bool ionic_rx_buf_recycle(struct ionic_queue *q,
return true;
}
-static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
- struct ionic_desc_info *desc_info,
- struct ionic_rxq_comp *comp)
+static void ionic_rx_add_skb_frag(struct ionic_queue *q,
+ struct sk_buff *skb,
+ struct ionic_buf_info *buf_info,
+ u32 off, u32 len,
+ bool synced)
+{
+ if (!synced)
+ dma_sync_single_range_for_cpu(q->dev, ionic_rx_buf_pa(buf_info),
+ off, len, DMA_FROM_DEVICE);
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ buf_info->page, buf_info->page_offset + off,
+ len,
+ IONIC_PAGE_SIZE);
+
+ if (!ionic_rx_buf_recycle(q, buf_info, len)) {
+ dma_unmap_page(q->dev, buf_info->dma_addr,
+ IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+ buf_info->page = NULL;
+ }
+}
+
+static struct sk_buff *ionic_rx_build_skb(struct ionic_queue *q,
+ struct ionic_rx_desc_info *desc_info,
+ unsigned int headroom,
+ unsigned int len,
+ unsigned int num_sg_elems,
+ bool synced)
{
- struct net_device *netdev = q->lif->netdev;
struct ionic_buf_info *buf_info;
- struct ionic_rx_stats *stats;
- struct device *dev = q->dev;
struct sk_buff *skb;
unsigned int i;
u16 frag_len;
- u16 len;
-
- stats = q_to_rx_stats(q);
buf_info = &desc_info->bufs[0];
- len = le16_to_cpu(comp->len);
-
prefetchw(buf_info->page);
skb = napi_get_frags(&q_to_qcq(q)->napi);
if (unlikely(!skb)) {
net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
- netdev->name, q->name);
- stats->alloc_err++;
+ dev_name(q->dev), q->name);
+ q_to_rx_stats(q)->alloc_err++;
return NULL;
}
- i = comp->num_sg_elems + 1;
- do {
- if (unlikely(!buf_info->page)) {
- dev_kfree_skb(skb);
- return NULL;
- }
-
- frag_len = min_t(u16, len, min_t(u32, IONIC_MAX_BUF_LEN,
- IONIC_PAGE_SIZE - buf_info->page_offset));
- len -= frag_len;
-
- dma_sync_single_for_cpu(dev,
- buf_info->dma_addr + buf_info->page_offset,
- frag_len, DMA_FROM_DEVICE);
-
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
- buf_info->page, buf_info->page_offset, frag_len,
- IONIC_PAGE_SIZE);
-
- if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
- dma_unmap_page(dev, buf_info->dma_addr,
- IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
- buf_info->page = NULL;
- }
+ if (headroom)
+ frag_len = min_t(u16, len,
+ IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN);
+ else
+ frag_len = min_t(u16, len, ionic_rx_buf_size(buf_info));
- buf_info++;
+ if (unlikely(!buf_info->page))
+ goto err_bad_buf_page;
+ ionic_rx_add_skb_frag(q, skb, buf_info, headroom, frag_len, synced);
+ len -= frag_len;
+ buf_info++;
- i--;
- } while (i > 0);
+ for (i = 0; i < num_sg_elems; i++, buf_info++) {
+ if (unlikely(!buf_info->page))
+ goto err_bad_buf_page;
+ frag_len = min_t(u16, len, ionic_rx_buf_size(buf_info));
+ ionic_rx_add_skb_frag(q, skb, buf_info, 0, frag_len, synced);
+ len -= frag_len;
+ }
return skb;
+
+err_bad_buf_page:
+ dev_kfree_skb(skb);
+ return NULL;
}
-static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
- struct ionic_desc_info *desc_info,
- struct ionic_rxq_comp *comp)
+static struct sk_buff *ionic_rx_copybreak(struct net_device *netdev,
+ struct ionic_queue *q,
+ struct ionic_rx_desc_info *desc_info,
+ unsigned int headroom,
+ unsigned int len,
+ bool synced)
{
- struct net_device *netdev = q->lif->netdev;
struct ionic_buf_info *buf_info;
- struct ionic_rx_stats *stats;
struct device *dev = q->dev;
struct sk_buff *skb;
- u16 len;
-
- stats = q_to_rx_stats(q);
buf_info = &desc_info->bufs[0];
- len = le16_to_cpu(comp->len);
skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
if (unlikely(!skb)) {
net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
- netdev->name, q->name);
- stats->alloc_err++;
+ dev_name(dev), q->name);
+ q_to_rx_stats(q)->alloc_err++;
return NULL;
}
@@ -262,30 +297,343 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
return NULL;
}
- dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset,
- len, DMA_FROM_DEVICE);
- skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len);
- dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset,
- len, DMA_FROM_DEVICE);
+ if (!synced)
+ dma_sync_single_range_for_cpu(dev, ionic_rx_buf_pa(buf_info),
+ headroom, len, DMA_FROM_DEVICE);
+ skb_copy_to_linear_data(skb, ionic_rx_buf_va(buf_info) + headroom, len);
+ dma_sync_single_range_for_device(dev, ionic_rx_buf_pa(buf_info),
+ headroom, len, DMA_FROM_DEVICE);
skb_put(skb, len);
- skb->protocol = eth_type_trans(skb, q->lif->netdev);
+ skb->protocol = eth_type_trans(skb, netdev);
return skb;
}
+static void ionic_xdp_tx_desc_clean(struct ionic_queue *q,
+ struct ionic_tx_desc_info *desc_info)
+{
+ unsigned int nbufs = desc_info->nbufs;
+ struct ionic_buf_info *buf_info;
+ struct device *dev = q->dev;
+ int i;
+
+ if (!nbufs)
+ return;
+
+ buf_info = desc_info->bufs;
+ dma_unmap_single(dev, buf_info->dma_addr,
+ buf_info->len, DMA_TO_DEVICE);
+ if (desc_info->act == XDP_TX)
+ __free_pages(buf_info->page, 0);
+ buf_info->page = NULL;
+
+ buf_info++;
+ for (i = 1; i < nbufs + 1 && buf_info->page; i++, buf_info++) {
+ dma_unmap_page(dev, buf_info->dma_addr,
+ buf_info->len, DMA_TO_DEVICE);
+ if (desc_info->act == XDP_TX)
+ __free_pages(buf_info->page, 0);
+ buf_info->page = NULL;
+ }
+
+ if (desc_info->act == XDP_REDIRECT)
+ xdp_return_frame(desc_info->xdpf);
+
+ desc_info->nbufs = 0;
+ desc_info->xdpf = NULL;
+ desc_info->act = 0;
+}
+
+static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame,
+ enum xdp_action act, struct page *page, int off,
+ bool ring_doorbell)
+{
+ struct ionic_tx_desc_info *desc_info;
+ struct ionic_buf_info *buf_info;
+ struct ionic_tx_stats *stats;
+ struct ionic_txq_desc *desc;
+ size_t len = frame->len;
+ dma_addr_t dma_addr;
+ u64 cmd;
+
+ desc_info = &q->tx_info[q->head_idx];
+ desc = &q->txq[q->head_idx];
+ buf_info = desc_info->bufs;
+ stats = q_to_tx_stats(q);
+
+ dma_addr = ionic_tx_map_single(q, frame->data, len);
+ if (!dma_addr)
+ return -EIO;
+ buf_info->dma_addr = dma_addr;
+ buf_info->len = len;
+ buf_info->page = page;
+ buf_info->page_offset = off;
+
+ desc_info->nbufs = 1;
+ desc_info->xdpf = frame;
+ desc_info->act = act;
+
+ if (xdp_frame_has_frags(frame)) {
+ struct ionic_txq_sg_elem *elem;
+ struct skb_shared_info *sinfo;
+ struct ionic_buf_info *bi;
+ skb_frag_t *frag;
+ int i;
+
+ bi = &buf_info[1];
+ sinfo = xdp_get_shared_info_from_frame(frame);
+ frag = sinfo->frags;
+ elem = ionic_tx_sg_elems(q);
+ for (i = 0; i < sinfo->nr_frags; i++, frag++, bi++) {
+ dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag));
+ if (!dma_addr) {
+ ionic_tx_desc_unmap_bufs(q, desc_info);
+ return -EIO;
+ }
+ bi->dma_addr = dma_addr;
+ bi->len = skb_frag_size(frag);
+ bi->page = skb_frag_page(frag);
+
+ elem->addr = cpu_to_le64(bi->dma_addr);
+ elem->len = cpu_to_le16(bi->len);
+ elem++;
+
+ desc_info->nbufs++;
+ }
+ }
+
+ cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_NONE,
+ 0, (desc_info->nbufs - 1), buf_info->dma_addr);
+ desc->cmd = cpu_to_le64(cmd);
+ desc->len = cpu_to_le16(len);
+ desc->csum_start = 0;
+ desc->csum_offset = 0;
+
+ stats->xdp_frames++;
+ stats->pkts++;
+ stats->bytes += len;
+
+ ionic_txq_post(q, ring_doorbell);
+
+ return 0;
+}
+
+int ionic_xdp_xmit(struct net_device *netdev, int n,
+ struct xdp_frame **xdp_frames, u32 flags)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_queue *txq;
+ struct netdev_queue *nq;
+ int nxmit;
+ int space;
+ int cpu;
+ int qi;
+
+ if (unlikely(!test_bit(IONIC_LIF_F_UP, lif->state)))
+ return -ENETDOWN;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ /* AdminQ is assumed on cpu 0, while we attempt to affinitize the
+ * TxRx queue pairs 0..n-1 on cpus 1..n. We try to keep with that
+ * affinitization here, but of course irqbalance and friends might
+ * have juggled things anyway, so we have to check for the 0 case.
+ */
+ cpu = smp_processor_id();
+ qi = cpu ? (cpu - 1) % lif->nxqs : cpu;
+
+ txq = &lif->txqcqs[qi]->q;
+ nq = netdev_get_tx_queue(netdev, txq->index);
+ __netif_tx_lock(nq, cpu);
+ txq_trans_cond_update(nq);
+
+ if (netif_tx_queue_stopped(nq) ||
+ !netif_txq_maybe_stop(q_to_ndq(netdev, txq),
+ ionic_q_space_avail(txq),
+ 1, 1)) {
+ __netif_tx_unlock(nq);
+ return -EIO;
+ }
+
+ space = min_t(int, n, ionic_q_space_avail(txq));
+ for (nxmit = 0; nxmit < space ; nxmit++) {
+ if (ionic_xdp_post_frame(txq, xdp_frames[nxmit],
+ XDP_REDIRECT,
+ virt_to_page(xdp_frames[nxmit]->data),
+ 0, false)) {
+ nxmit--;
+ break;
+ }
+ }
+
+ if (flags & XDP_XMIT_FLUSH)
+ ionic_dbell_ring(lif->kern_dbpage, txq->hw_type,
+ txq->dbval | txq->head_idx);
+
+ netif_txq_maybe_stop(q_to_ndq(netdev, txq),
+ ionic_q_space_avail(txq),
+ 4, 4);
+ __netif_tx_unlock(nq);
+
+ return nxmit;
+}
+
+static bool ionic_run_xdp(struct ionic_rx_stats *stats,
+ struct net_device *netdev,
+ struct bpf_prog *xdp_prog,
+ struct ionic_queue *rxq,
+ struct ionic_buf_info *buf_info,
+ int len)
+{
+ u32 xdp_action = XDP_ABORTED;
+ struct xdp_buff xdp_buf;
+ struct ionic_queue *txq;
+ struct netdev_queue *nq;
+ struct xdp_frame *xdpf;
+ int remain_len;
+ int frag_len;
+ int err = 0;
+
+ xdp_init_buff(&xdp_buf, IONIC_PAGE_SIZE, rxq->xdp_rxq_info);
+ frag_len = min_t(u16, len, IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN);
+ xdp_prepare_buff(&xdp_buf, ionic_rx_buf_va(buf_info),
+ XDP_PACKET_HEADROOM, frag_len, false);
+
+ dma_sync_single_range_for_cpu(rxq->dev, ionic_rx_buf_pa(buf_info),
+ XDP_PACKET_HEADROOM, len,
+ DMA_FROM_DEVICE);
+
+ prefetchw(&xdp_buf.data_hard_start);
+
+ /* We limit MTU size to one buffer if !xdp_has_frags, so
+ * if the recv len is bigger than one buffer
+ * then we know we have frag info to gather
+ */
+ remain_len = len - frag_len;
+ if (remain_len) {
+ struct skb_shared_info *sinfo;
+ struct ionic_buf_info *bi;
+ skb_frag_t *frag;
+
+ bi = buf_info;
+ sinfo = xdp_get_shared_info_from_buff(&xdp_buf);
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(&xdp_buf);
+
+ do {
+ if (unlikely(sinfo->nr_frags >= MAX_SKB_FRAGS)) {
+ err = -ENOSPC;
+ goto out_xdp_abort;
+ }
+
+ frag = &sinfo->frags[sinfo->nr_frags];
+ sinfo->nr_frags++;
+ bi++;
+ frag_len = min_t(u16, remain_len, ionic_rx_buf_size(bi));
+ dma_sync_single_range_for_cpu(rxq->dev, ionic_rx_buf_pa(bi),
+ 0, frag_len, DMA_FROM_DEVICE);
+ skb_frag_fill_page_desc(frag, bi->page, 0, frag_len);
+ sinfo->xdp_frags_size += frag_len;
+ remain_len -= frag_len;
+
+ if (page_is_pfmemalloc(bi->page))
+ xdp_buff_set_frag_pfmemalloc(&xdp_buf);
+ } while (remain_len > 0);
+ }
+
+ xdp_action = bpf_prog_run_xdp(xdp_prog, &xdp_buf);
+
+ switch (xdp_action) {
+ case XDP_PASS:
+ stats->xdp_pass++;
+ return false; /* false = we didn't consume the packet */
+
+ case XDP_DROP:
+ ionic_rx_page_free(rxq, buf_info);
+ stats->xdp_drop++;
+ break;
+
+ case XDP_TX:
+ xdpf = xdp_convert_buff_to_frame(&xdp_buf);
+ if (!xdpf)
+ goto out_xdp_abort;
+
+ txq = rxq->partner;
+ nq = netdev_get_tx_queue(netdev, txq->index);
+ __netif_tx_lock(nq, smp_processor_id());
+ txq_trans_cond_update(nq);
+
+ if (netif_tx_queue_stopped(nq) ||
+ !netif_txq_maybe_stop(q_to_ndq(netdev, txq),
+ ionic_q_space_avail(txq),
+ 1, 1)) {
+ __netif_tx_unlock(nq);
+ goto out_xdp_abort;
+ }
+
+ dma_unmap_page(rxq->dev, buf_info->dma_addr,
+ IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+
+ err = ionic_xdp_post_frame(txq, xdpf, XDP_TX,
+ buf_info->page,
+ buf_info->page_offset,
+ true);
+ __netif_tx_unlock(nq);
+ if (err) {
+ netdev_dbg(netdev, "tx ionic_xdp_post_frame err %d\n", err);
+ goto out_xdp_abort;
+ }
+ stats->xdp_tx++;
+
+ /* the Tx completion will free the buffers */
+ break;
+
+ case XDP_REDIRECT:
+ /* unmap the pages before handing them to a different device */
+ dma_unmap_page(rxq->dev, buf_info->dma_addr,
+ IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+
+ err = xdp_do_redirect(netdev, &xdp_buf, xdp_prog);
+ if (err) {
+ netdev_dbg(netdev, "xdp_do_redirect err %d\n", err);
+ goto out_xdp_abort;
+ }
+ buf_info->page = NULL;
+ rxq->xdp_flush = true;
+ stats->xdp_redirect++;
+ break;
+
+ case XDP_ABORTED:
+ default:
+ goto out_xdp_abort;
+ }
+
+ return true;
+
+out_xdp_abort:
+ trace_xdp_exception(netdev, xdp_prog, xdp_action);
+ ionic_rx_page_free(rxq, buf_info);
+ stats->xdp_aborted++;
+
+ return true;
+}
+
static void ionic_rx_clean(struct ionic_queue *q,
- struct ionic_desc_info *desc_info,
- struct ionic_cq_info *cq_info,
- void *cb_arg)
+ struct ionic_rx_desc_info *desc_info,
+ struct ionic_rxq_comp *comp)
{
struct net_device *netdev = q->lif->netdev;
struct ionic_qcq *qcq = q_to_qcq(q);
struct ionic_rx_stats *stats;
- struct ionic_rxq_comp *comp;
+ struct bpf_prog *xdp_prog;
+ unsigned int headroom;
struct sk_buff *skb;
-
- comp = cq_info->cq_desc + qcq->cq.desc_size - sizeof(*comp);
+ bool synced = false;
+ bool use_copybreak;
+ u16 len;
stats = q_to_rx_stats(q);
@@ -294,13 +642,25 @@ static void ionic_rx_clean(struct ionic_queue *q,
return;
}
+ len = le16_to_cpu(comp->len);
stats->pkts++;
- stats->bytes += le16_to_cpu(comp->len);
+ stats->bytes += len;
+
+ xdp_prog = READ_ONCE(q->lif->xdp_prog);
+ if (xdp_prog) {
+ if (ionic_run_xdp(stats, netdev, xdp_prog, q, desc_info->bufs, len))
+ return;
+ synced = true;
+ }
- if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
- skb = ionic_rx_copybreak(q, desc_info, comp);
+ headroom = q->xdp_rxq_info ? XDP_PACKET_HEADROOM : 0;
+ use_copybreak = len <= q->lif->rx_copybreak;
+ if (use_copybreak)
+ skb = ionic_rx_copybreak(netdev, q, desc_info,
+ headroom, len, synced);
else
- skb = ionic_rx_frags(q, desc_info, comp);
+ skb = ionic_rx_build_skb(q, desc_info, headroom, len,
+ comp->num_sg_elems, synced);
if (unlikely(!skb)) {
stats->dropped++;
@@ -352,7 +712,7 @@ static void ionic_rx_clean(struct ionic_queue *q,
u64 hwstamp;
cq_desc_hwstamp =
- cq_info->cq_desc +
+ (void *)comp +
qcq->cq.desc_size -
sizeof(struct ionic_rxq_comp) -
IONIC_HWSTAMP_CQ_NEGOFFSET;
@@ -367,19 +727,19 @@ static void ionic_rx_clean(struct ionic_queue *q,
}
}
- if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
+ if (use_copybreak)
napi_gro_receive(&qcq->napi, skb);
else
napi_gro_frags(&qcq->napi);
}
-bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
+bool ionic_rx_service(struct ionic_cq *cq)
{
+ struct ionic_rx_desc_info *desc_info;
struct ionic_queue *q = cq->bound_q;
- struct ionic_desc_info *desc_info;
struct ionic_rxq_comp *comp;
- comp = cq_info->cq_desc + cq->desc_size - sizeof(*comp);
+ comp = &((struct ionic_rxq_comp *)cq->base)[cq->tail_idx];
if (!color_match(comp->pkt_type_color, cq->done_color))
return false;
@@ -391,31 +751,29 @@ bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
if (q->tail_idx != le16_to_cpu(comp->comp_index))
return false;
- desc_info = &q->info[q->tail_idx];
+ desc_info = &q->rx_info[q->tail_idx];
q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
/* clean the related q entry, only one per qc completion */
- ionic_rx_clean(q, desc_info, cq_info, desc_info->cb_arg);
-
- desc_info->cb = NULL;
- desc_info->cb_arg = NULL;
+ ionic_rx_clean(q, desc_info, comp);
return true;
}
static inline void ionic_write_cmb_desc(struct ionic_queue *q,
- void __iomem *cmb_desc,
void *desc)
{
- if (q_to_qcq(q)->flags & IONIC_QCQ_F_CMB_RINGS)
- memcpy_toio(cmb_desc, desc, q->desc_size);
+ /* Since Rx and Tx descriptors are the same size, we can
+ * save an instruction or two and skip the qtype check.
+ */
+ if (unlikely(q_to_qcq(q)->flags & IONIC_QCQ_F_CMB_RINGS))
+ memcpy_toio(&q->cmb_txq[q->head_idx], desc, sizeof(q->cmb_txq[0]));
}
void ionic_rx_fill(struct ionic_queue *q)
{
struct net_device *netdev = q->lif->netdev;
- struct ionic_desc_info *desc_info;
- struct ionic_rxq_sg_desc *sg_desc;
+ struct ionic_rx_desc_info *desc_info;
struct ionic_rxq_sg_elem *sg_elem;
struct ionic_buf_info *buf_info;
unsigned int fill_threshold;
@@ -424,8 +782,9 @@ void ionic_rx_fill(struct ionic_queue *q)
unsigned int frag_len;
unsigned int nfrags;
unsigned int n_fill;
- unsigned int i, j;
unsigned int len;
+ unsigned int i;
+ unsigned int j;
n_fill = ionic_q_space_avail(q);
@@ -434,13 +793,16 @@ void ionic_rx_fill(struct ionic_queue *q)
if (n_fill < fill_threshold)
return;
- len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
+ len = netdev->mtu + VLAN_ETH_HLEN;
for (i = n_fill; i; i--) {
+ unsigned int headroom;
+ unsigned int buf_len;
+
nfrags = 0;
remain_len = len;
- desc_info = &q->info[q->head_idx];
- desc = desc_info->desc;
+ desc = &q->rxq[q->head_idx];
+ desc_info = &q->rx_info[q->head_idx];
buf_info = &desc_info->bufs[0];
if (!buf_info->page) { /* alloc a new buffer? */
@@ -451,19 +813,26 @@ void ionic_rx_fill(struct ionic_queue *q)
}
}
- /* fill main descriptor - buf[0] */
- desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
- frag_len = min_t(u16, len, min_t(u32, IONIC_MAX_BUF_LEN,
- IONIC_PAGE_SIZE - buf_info->page_offset));
+ /* fill main descriptor - buf[0]
+ * XDP uses space in the first buffer, so account for
+ * head room, tail room, and ip header in the first frag size.
+ */
+ headroom = q->xdp_rxq_info ? XDP_PACKET_HEADROOM : 0;
+ if (q->xdp_rxq_info)
+ buf_len = IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN;
+ else
+ buf_len = ionic_rx_buf_size(buf_info);
+ frag_len = min_t(u16, len, buf_len);
+
+ desc->addr = cpu_to_le64(ionic_rx_buf_pa(buf_info) + headroom);
desc->len = cpu_to_le16(frag_len);
remain_len -= frag_len;
buf_info++;
nfrags++;
/* fill sg descriptors - buf[1..n] */
- sg_desc = desc_info->sg_desc;
- for (j = 0; remain_len > 0 && j < q->max_sg_elems; j++) {
- sg_elem = &sg_desc->elems[j];
+ sg_elem = q->rxq_sgl[q->head_idx].elems;
+ for (j = 0; remain_len > 0 && j < q->max_sg_elems; j++, sg_elem++) {
if (!buf_info->page) { /* alloc a new sg buffer? */
if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
sg_elem->addr = 0;
@@ -472,10 +841,8 @@ void ionic_rx_fill(struct ionic_queue *q)
}
}
- sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
- frag_len = min_t(u16, remain_len, min_t(u32, IONIC_MAX_BUF_LEN,
- IONIC_PAGE_SIZE -
- buf_info->page_offset));
+ sg_elem->addr = cpu_to_le64(ionic_rx_buf_pa(buf_info));
+ frag_len = min_t(u16, remain_len, ionic_rx_buf_size(buf_info));
sg_elem->len = cpu_to_le16(frag_len);
remain_len -= frag_len;
buf_info++;
@@ -483,18 +850,16 @@ void ionic_rx_fill(struct ionic_queue *q)
}
/* clear end sg element as a sentinel */
- if (j < q->max_sg_elems) {
- sg_elem = &sg_desc->elems[j];
+ if (j < q->max_sg_elems)
memset(sg_elem, 0, sizeof(*sg_elem));
- }
desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
IONIC_RXQ_DESC_OPCODE_SIMPLE;
desc_info->nbufs = nfrags;
- ionic_write_cmb_desc(q, desc_info->cmb_desc, desc);
+ ionic_write_cmb_desc(q, desc);
- ionic_rxq_post(q, false, ionic_rx_clean, NULL);
+ ionic_rxq_post(q, false);
}
ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type,
@@ -509,21 +874,19 @@ void ionic_rx_fill(struct ionic_queue *q)
void ionic_rx_empty(struct ionic_queue *q)
{
- struct ionic_desc_info *desc_info;
+ struct ionic_rx_desc_info *desc_info;
struct ionic_buf_info *buf_info;
unsigned int i, j;
for (i = 0; i < q->num_descs; i++) {
- desc_info = &q->info[i];
- for (j = 0; j < IONIC_RX_MAX_SG_ELEMS + 1; j++) {
+ desc_info = &q->rx_info[i];
+ for (j = 0; j < ARRAY_SIZE(desc_info->bufs); j++) {
buf_info = &desc_info->bufs[j];
if (buf_info->page)
ionic_rx_page_free(q, buf_info);
}
desc_info->nbufs = 0;
- desc_info->cb = NULL;
- desc_info->cb_arg = NULL;
}
q->head_idx = 0;
@@ -568,16 +931,13 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
{
struct ionic_qcq *qcq = napi_to_qcq(napi);
struct ionic_cq *cq = napi_to_cq(napi);
- struct ionic_dev *idev;
- struct ionic_lif *lif;
u32 work_done = 0;
u32 flags = 0;
- lif = cq->bound_q->lif;
- idev = &lif->ionic->idev;
+ work_done = ionic_tx_cq_service(cq, budget);
- work_done = ionic_cq_service(cq, budget,
- ionic_tx_service, NULL, NULL);
+ if (unlikely(!budget))
+ return budget;
if (work_done < budget && napi_complete_done(napi, work_done)) {
ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR);
@@ -587,7 +947,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
if (work_done || flags) {
flags |= IONIC_INTR_CRED_RESET_COALESCE;
- ionic_intr_credits(idev->intr_ctrl,
+ ionic_intr_credits(cq->idev->intr_ctrl,
cq->bound_intr->index,
work_done, flags);
}
@@ -598,23 +958,30 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
return work_done;
}
+static void ionic_xdp_do_flush(struct ionic_cq *cq)
+{
+ if (cq->bound_q->xdp_flush) {
+ xdp_do_flush();
+ cq->bound_q->xdp_flush = false;
+ }
+}
+
int ionic_rx_napi(struct napi_struct *napi, int budget)
{
struct ionic_qcq *qcq = napi_to_qcq(napi);
struct ionic_cq *cq = napi_to_cq(napi);
- struct ionic_dev *idev;
- struct ionic_lif *lif;
u32 work_done = 0;
u32 flags = 0;
- lif = cq->bound_q->lif;
- idev = &lif->ionic->idev;
+ if (unlikely(!budget))
+ return budget;
work_done = ionic_cq_service(cq, budget,
ionic_rx_service, NULL, NULL);
ionic_rx_fill(cq->bound_q);
+ ionic_xdp_do_flush(cq);
if (work_done < budget && napi_complete_done(napi, work_done)) {
ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR);
flags |= IONIC_INTR_CRED_UNMASK;
@@ -623,7 +990,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
if (work_done || flags) {
flags |= IONIC_INTR_CRED_RESET_COALESCE;
- ionic_intr_credits(idev->intr_ctrl,
+ ionic_intr_credits(cq->idev->intr_ctrl,
cq->bound_intr->index,
work_done, flags);
}
@@ -640,7 +1007,6 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
struct ionic_cq *rxcq = napi_to_cq(napi);
unsigned int qi = rxcq->bound_q->index;
struct ionic_qcq *txqcq;
- struct ionic_dev *idev;
struct ionic_lif *lif;
struct ionic_cq *txcq;
bool resched = false;
@@ -649,18 +1015,20 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
u32 flags = 0;
lif = rxcq->bound_q->lif;
- idev = &lif->ionic->idev;
txqcq = lif->txqcqs[qi];
txcq = &lif->txqcqs[qi]->cq;
- tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT,
- ionic_tx_service, NULL, NULL);
+ tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT);
+
+ if (unlikely(!budget))
+ return budget;
rx_work_done = ionic_cq_service(rxcq, budget,
ionic_rx_service, NULL, NULL);
ionic_rx_fill(rxcq->bound_q);
+ ionic_xdp_do_flush(rxcq);
if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) {
ionic_dim_update(rxqcq, 0);
flags |= IONIC_INTR_CRED_UNMASK;
@@ -669,7 +1037,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
if (rx_work_done || flags) {
flags |= IONIC_INTR_CRED_RESET_COALESCE;
- ionic_intr_credits(idev->intr_ctrl, rxcq->bound_intr->index,
+ ionic_intr_credits(rxcq->idev->intr_ctrl, rxcq->bound_intr->index,
tx_work_done + rx_work_done, flags);
}
@@ -686,15 +1054,14 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
static dma_addr_t ionic_tx_map_single(struct ionic_queue *q,
void *data, size_t len)
{
- struct ionic_tx_stats *stats = q_to_tx_stats(q);
struct device *dev = q->dev;
dma_addr_t dma_addr;
dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma_addr)) {
net_warn_ratelimited("%s: DMA single map failed on %s!\n",
- q->lif->netdev->name, q->name);
- stats->dma_map_err++;
+ dev_name(dev), q->name);
+ q_to_tx_stats(q)->dma_map_err++;
return 0;
}
return dma_addr;
@@ -704,24 +1071,23 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q,
const skb_frag_t *frag,
size_t offset, size_t len)
{
- struct ionic_tx_stats *stats = q_to_tx_stats(q);
struct device *dev = q->dev;
dma_addr_t dma_addr;
dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma_addr)) {
net_warn_ratelimited("%s: DMA frag map failed on %s!\n",
- q->lif->netdev->name, q->name);
- stats->dma_map_err++;
+ dev_name(dev), q->name);
+ q_to_tx_stats(q)->dma_map_err++;
+ return 0;
}
return dma_addr;
}
static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
- struct ionic_desc_info *desc_info)
+ struct ionic_tx_desc_info *desc_info)
{
struct ionic_buf_info *buf_info = desc_info->bufs;
- struct ionic_tx_stats *stats = q_to_tx_stats(q);
struct device *dev = q->dev;
dma_addr_t dma_addr;
unsigned int nfrags;
@@ -729,10 +1095,8 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
int frag_idx;
dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
- if (dma_mapping_error(dev, dma_addr)) {
- stats->dma_map_err++;
+ if (!dma_addr)
return -EIO;
- }
buf_info->dma_addr = dma_addr;
buf_info->len = skb_headlen(skb);
buf_info++;
@@ -741,10 +1105,8 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
nfrags = skb_shinfo(skb)->nr_frags;
for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) {
dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag));
- if (dma_mapping_error(dev, dma_addr)) {
- stats->dma_map_err++;
+ if (!dma_addr)
goto dma_fail;
- }
buf_info->dma_addr = dma_addr;
buf_info->len = skb_frag_size(frag);
buf_info++;
@@ -762,12 +1124,13 @@ dma_fail:
dma_unmap_page(dev, buf_info->dma_addr,
buf_info->len, DMA_TO_DEVICE);
}
- dma_unmap_single(dev, buf_info->dma_addr, buf_info->len, DMA_TO_DEVICE);
+ dma_unmap_single(dev, desc_info->bufs[0].dma_addr,
+ desc_info->bufs[0].len, DMA_TO_DEVICE);
return -EIO;
}
static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
- struct ionic_desc_info *desc_info)
+ struct ionic_tx_desc_info *desc_info)
{
struct ionic_buf_info *buf_info = desc_info->bufs;
struct device *dev = q->dev;
@@ -776,41 +1139,48 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
if (!desc_info->nbufs)
return;
- dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
+ dma_unmap_single(dev, buf_info->dma_addr,
buf_info->len, DMA_TO_DEVICE);
buf_info++;
for (i = 1; i < desc_info->nbufs; i++, buf_info++)
- dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
+ dma_unmap_page(dev, buf_info->dma_addr,
buf_info->len, DMA_TO_DEVICE);
desc_info->nbufs = 0;
}
static void ionic_tx_clean(struct ionic_queue *q,
- struct ionic_desc_info *desc_info,
- struct ionic_cq_info *cq_info,
- void *cb_arg)
+ struct ionic_tx_desc_info *desc_info,
+ struct ionic_txq_comp *comp)
{
struct ionic_tx_stats *stats = q_to_tx_stats(q);
struct ionic_qcq *qcq = q_to_qcq(q);
- struct sk_buff *skb = cb_arg;
- u16 qi;
+ struct sk_buff *skb;
+
+ if (desc_info->xdpf) {
+ ionic_xdp_tx_desc_clean(q->partner, desc_info);
+ stats->clean++;
+
+ if (unlikely(__netif_subqueue_stopped(q->lif->netdev, q->index)))
+ netif_wake_subqueue(q->lif->netdev, q->index);
+
+ return;
+ }
ionic_tx_desc_unmap_bufs(q, desc_info);
+ skb = desc_info->skb;
if (!skb)
return;
- qi = skb_get_queue_mapping(skb);
-
- if (ionic_txq_hwstamp_enabled(q)) {
- if (cq_info) {
+ if (unlikely(ionic_txq_hwstamp_enabled(q))) {
+ if (comp) {
struct skb_shared_hwtstamps hwts = {};
__le64 *cq_desc_hwstamp;
u64 hwstamp;
cq_desc_hwstamp =
- cq_info->cq_desc +
+ (void *)comp +
qcq->cq.desc_size -
sizeof(struct ionic_txq_comp) -
IONIC_HWSTAMP_CQ_NEGOFFSET;
@@ -828,27 +1198,25 @@ static void ionic_tx_clean(struct ionic_queue *q,
stats->hwstamp_invalid++;
}
}
-
- } else if (unlikely(__netif_subqueue_stopped(q->lif->netdev, qi))) {
- netif_wake_subqueue(q->lif->netdev, qi);
}
desc_info->bytes = skb->len;
stats->clean++;
- dev_consume_skb_any(skb);
+ napi_consume_skb(skb, 1);
}
-bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
+static bool ionic_tx_service(struct ionic_cq *cq,
+ unsigned int *total_pkts, unsigned int *total_bytes)
{
+ struct ionic_tx_desc_info *desc_info;
struct ionic_queue *q = cq->bound_q;
- struct ionic_desc_info *desc_info;
struct ionic_txq_comp *comp;
- int bytes = 0;
- int pkts = 0;
+ unsigned int bytes = 0;
+ unsigned int pkts = 0;
u16 index;
- comp = cq_info->cq_desc + cq->desc_size - sizeof(*comp);
+ comp = &((struct ionic_txq_comp *)cq->base)[cq->tail_idx];
if (!color_match(comp->color, cq->done_color))
return false;
@@ -857,59 +1225,90 @@ bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
* several q entries completed for each cq completion
*/
do {
- desc_info = &q->info[q->tail_idx];
+ desc_info = &q->tx_info[q->tail_idx];
desc_info->bytes = 0;
index = q->tail_idx;
q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
- ionic_tx_clean(q, desc_info, cq_info, desc_info->cb_arg);
- if (desc_info->cb_arg) {
+ ionic_tx_clean(q, desc_info, comp);
+ if (desc_info->skb) {
pkts++;
bytes += desc_info->bytes;
+ desc_info->skb = NULL;
}
- desc_info->cb = NULL;
- desc_info->cb_arg = NULL;
} while (index != le16_to_cpu(comp->comp_index));
- if (pkts && bytes && !ionic_txq_hwstamp_enabled(q))
- netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes);
+ (*total_pkts) += pkts;
+ (*total_bytes) += bytes;
return true;
}
+unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do)
+{
+ unsigned int work_done = 0;
+ unsigned int bytes = 0;
+ unsigned int pkts = 0;
+
+ if (work_to_do == 0)
+ return 0;
+
+ while (ionic_tx_service(cq, &pkts, &bytes)) {
+ if (cq->tail_idx == cq->num_descs - 1)
+ cq->done_color = !cq->done_color;
+ cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1);
+
+ if (++work_done >= work_to_do)
+ break;
+ }
+
+ if (work_done) {
+ struct ionic_queue *q = cq->bound_q;
+
+ if (likely(!ionic_txq_hwstamp_enabled(q)))
+ netif_txq_completed_wake(q_to_ndq(q->lif->netdev, q),
+ pkts, bytes,
+ ionic_q_space_avail(q),
+ IONIC_TSO_DESCS_NEEDED);
+ }
+
+ return work_done;
+}
+
void ionic_tx_flush(struct ionic_cq *cq)
{
- struct ionic_dev *idev = &cq->lif->ionic->idev;
u32 work_done;
- work_done = ionic_cq_service(cq, cq->num_descs,
- ionic_tx_service, NULL, NULL);
+ work_done = ionic_tx_cq_service(cq, cq->num_descs);
if (work_done)
- ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index,
+ ionic_intr_credits(cq->idev->intr_ctrl, cq->bound_intr->index,
work_done, IONIC_INTR_CRED_RESET_COALESCE);
}
void ionic_tx_empty(struct ionic_queue *q)
{
- struct ionic_desc_info *desc_info;
+ struct ionic_tx_desc_info *desc_info;
int bytes = 0;
int pkts = 0;
/* walk the not completed tx entries, if any */
while (q->head_idx != q->tail_idx) {
- desc_info = &q->info[q->tail_idx];
+ desc_info = &q->tx_info[q->tail_idx];
desc_info->bytes = 0;
q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
- ionic_tx_clean(q, desc_info, NULL, desc_info->cb_arg);
- if (desc_info->cb_arg) {
+ ionic_tx_clean(q, desc_info, NULL);
+ if (desc_info->skb) {
pkts++;
bytes += desc_info->bytes;
+ desc_info->skb = NULL;
}
- desc_info->cb = NULL;
- desc_info->cb_arg = NULL;
}
- if (pkts && bytes && !ionic_txq_hwstamp_enabled(q))
- netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes);
+ if (likely(!ionic_txq_hwstamp_enabled(q))) {
+ struct netdev_queue *ndq = q_to_ndq(q->lif->netdev, q);
+
+ netdev_tx_completed_queue(ndq, pkts, bytes);
+ netdev_tx_reset_queue(ndq);
+ }
}
static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb)
@@ -957,8 +1356,8 @@ static int ionic_tx_tcp_pseudo_csum(struct sk_buff *skb)
return 0;
}
-static void ionic_tx_tso_post(struct ionic_queue *q,
- struct ionic_desc_info *desc_info,
+static void ionic_tx_tso_post(struct net_device *netdev, struct ionic_queue *q,
+ struct ionic_tx_desc_info *desc_info,
struct sk_buff *skb,
dma_addr_t addr, u8 nsge, u16 len,
unsigned int hdrlen, unsigned int mss,
@@ -966,7 +1365,7 @@ static void ionic_tx_tso_post(struct ionic_queue *q,
u16 vlan_tci, bool has_vlan,
bool start, bool done)
{
- struct ionic_txq_desc *desc = desc_info->desc;
+ struct ionic_txq_desc *desc = &q->txq[q->head_idx];
u8 flags = 0;
u64 cmd;
@@ -982,22 +1381,23 @@ static void ionic_tx_tso_post(struct ionic_queue *q,
desc->hdr_len = cpu_to_le16(hdrlen);
desc->mss = cpu_to_le16(mss);
- ionic_write_cmb_desc(q, desc_info->cmb_desc, desc);
+ ionic_write_cmb_desc(q, desc);
if (start) {
skb_tx_timestamp(skb);
- if (!ionic_txq_hwstamp_enabled(q))
- netdev_tx_sent_queue(q_to_ndq(q), skb->len);
- ionic_txq_post(q, false, ionic_tx_clean, skb);
+ if (likely(!ionic_txq_hwstamp_enabled(q)))
+ netdev_tx_sent_queue(q_to_ndq(netdev, q), skb->len);
+ ionic_txq_post(q, false);
} else {
- ionic_txq_post(q, done, NULL, NULL);
+ ionic_txq_post(q, done);
}
}
-static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
+static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q,
+ struct sk_buff *skb)
{
struct ionic_tx_stats *stats = q_to_tx_stats(q);
- struct ionic_desc_info *desc_info;
+ struct ionic_tx_desc_info *desc_info;
struct ionic_buf_info *buf_info;
struct ionic_txq_sg_elem *elem;
struct ionic_txq_desc *desc;
@@ -1019,8 +1419,7 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
bool encap;
int err;
- desc_info = &q->info[q->head_idx];
- buf_info = desc_info->bufs;
+ desc_info = &q->tx_info[q->head_idx];
if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
return -EIO;
@@ -1057,6 +1456,8 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
else
hdrlen = skb_tcp_all_headers(skb);
+ desc_info->skb = skb;
+ buf_info = desc_info->bufs;
tso_rem = len;
seg_rem = min(tso_rem, hdrlen + mss);
@@ -1083,8 +1484,8 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
chunk_len = min(frag_rem, seg_rem);
if (!desc) {
/* fill main descriptor */
- desc = desc_info->txq_desc;
- elem = desc_info->txq_sg_desc->elems;
+ desc = &q->txq[q->head_idx];
+ elem = ionic_tx_sg_elems(q);
desc_addr = frag_addr;
desc_len = chunk_len;
} else {
@@ -1102,13 +1503,13 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
seg_rem = min(tso_rem, mss);
done = (tso_rem == 0);
/* post descriptor */
- ionic_tx_tso_post(q, desc_info, skb,
+ ionic_tx_tso_post(netdev, q, desc_info, skb,
desc_addr, desc_nsge, desc_len,
hdrlen, mss, outer_csum, vlan_tci, has_vlan,
start, done);
start = false;
/* Buffer information is stored with the first tso descriptor */
- desc_info = &q->info[q->head_idx];
+ desc_info = &q->tx_info[q->head_idx];
desc_info->nbufs = 0;
}
@@ -1121,9 +1522,9 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
}
static void ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
- struct ionic_desc_info *desc_info)
+ struct ionic_tx_desc_info *desc_info)
{
- struct ionic_txq_desc *desc = desc_info->txq_desc;
+ struct ionic_txq_desc *desc = &q->txq[q->head_idx];
struct ionic_buf_info *buf_info = desc_info->bufs;
struct ionic_tx_stats *stats = q_to_tx_stats(q);
bool has_vlan;
@@ -1151,7 +1552,7 @@ static void ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb));
desc->csum_offset = cpu_to_le16(skb->csum_offset);
- ionic_write_cmb_desc(q, desc_info->cmb_desc, desc);
+ ionic_write_cmb_desc(q, desc);
if (skb_csum_is_sctp(skb))
stats->crc32_csum++;
@@ -1160,9 +1561,9 @@ static void ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
}
static void ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
- struct ionic_desc_info *desc_info)
+ struct ionic_tx_desc_info *desc_info)
{
- struct ionic_txq_desc *desc = desc_info->txq_desc;
+ struct ionic_txq_desc *desc = &q->txq[q->head_idx];
struct ionic_buf_info *buf_info = desc_info->bufs;
struct ionic_tx_stats *stats = q_to_tx_stats(q);
bool has_vlan;
@@ -1190,20 +1591,20 @@ static void ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
desc->csum_start = 0;
desc->csum_offset = 0;
- ionic_write_cmb_desc(q, desc_info->cmb_desc, desc);
+ ionic_write_cmb_desc(q, desc);
stats->csum_none++;
}
static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
- struct ionic_desc_info *desc_info)
+ struct ionic_tx_desc_info *desc_info)
{
- struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc;
struct ionic_buf_info *buf_info = &desc_info->bufs[1];
- struct ionic_txq_sg_elem *elem = sg_desc->elems;
struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ struct ionic_txq_sg_elem *elem;
unsigned int i;
+ elem = ionic_tx_sg_elems(q);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, buf_info++, elem++) {
elem->addr = cpu_to_le64(buf_info->dma_addr);
elem->len = cpu_to_le16(buf_info->len);
@@ -1212,14 +1613,18 @@ static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
stats->frags += skb_shinfo(skb)->nr_frags;
}
-static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
+static int ionic_tx(struct net_device *netdev, struct ionic_queue *q,
+ struct sk_buff *skb)
{
- struct ionic_desc_info *desc_info = &q->info[q->head_idx];
+ struct ionic_tx_desc_info *desc_info = &q->tx_info[q->head_idx];
struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ bool ring_dbell = true;
if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
return -EIO;
+ desc_info->skb = skb;
+
/* set up the initial descriptor */
if (skb->ip_summed == CHECKSUM_PARTIAL)
ionic_tx_calc_csum(q, skb, desc_info);
@@ -1233,16 +1638,22 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
stats->pkts++;
stats->bytes += skb->len;
- if (!ionic_txq_hwstamp_enabled(q))
- netdev_tx_sent_queue(q_to_ndq(q), skb->len);
- ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb);
+ if (likely(!ionic_txq_hwstamp_enabled(q))) {
+ struct netdev_queue *ndq = q_to_ndq(netdev, q);
+
+ if (unlikely(!ionic_q_has_space(q, MAX_SKB_FRAGS + 1)))
+ netif_tx_stop_queue(ndq);
+ ring_dbell = __netdev_tx_sent_queue(ndq, skb->len,
+ netdev_xmit_more());
+ }
+ ionic_txq_post(q, ring_dbell);
return 0;
}
static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
{
- struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ int nr_frags = skb_shinfo(skb)->nr_frags;
bool too_many_frags = false;
skb_frag_t *frag;
int desc_bufs;
@@ -1258,17 +1669,20 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
/* Each desc is mss long max, so a descriptor for each gso_seg */
if (skb_is_gso(skb)) {
ndescs = skb_shinfo(skb)->gso_segs;
+ if (!nr_frags)
+ return ndescs;
} else {
ndescs = 1;
- if (skb_shinfo(skb)->nr_frags > q->max_sg_elems) {
+ if (!nr_frags)
+ return ndescs;
+
+ if (unlikely(nr_frags > q->max_sg_elems)) {
too_many_frags = true;
goto linearize;
}
- }
- /* If non-TSO, or no frags to check, we're done */
- if (!skb_is_gso(skb) || !skb_shinfo(skb)->nr_frags)
return ndescs;
+ }
/* We need to scan the skb to be sure that none of the MTU sized
* packets in the TSO will require more sgs per descriptor than we
@@ -1319,36 +1733,17 @@ linearize:
err = skb_linearize(skb);
if (err)
return err;
- stats->linearize++;
+ q_to_tx_stats(q)->linearize++;
}
return ndescs;
}
-static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs)
-{
- int stopped = 0;
-
- if (unlikely(!ionic_q_has_space(q, ndescs))) {
- netif_stop_subqueue(q->lif->netdev, q->index);
- stopped = 1;
-
- /* Might race with ionic_tx_clean, check again */
- smp_rmb();
- if (ionic_q_has_space(q, ndescs)) {
- netif_wake_subqueue(q->lif->netdev, q->index);
- stopped = 0;
- }
- }
-
- return stopped;
-}
-
static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct ionic_lif *lif = netdev_priv(netdev);
- struct ionic_queue *q = &lif->hwstamp_txq->q;
+ struct ionic_queue *q;
int err, ndescs;
/* Does not stop/start txq, because we post to a separate tx queue
@@ -1356,6 +1751,7 @@ static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb,
* the timestamping queue, it is dropped.
*/
+ q = &lif->hwstamp_txq->q;
ndescs = ionic_tx_descs_needed(q, skb);
if (unlikely(ndescs < 0))
goto err_out_drop;
@@ -1365,9 +1761,9 @@ static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb,
skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP;
if (skb_is_gso(skb))
- err = ionic_tx_tso(q, skb);
+ err = ionic_tx_tso(netdev, q, skb);
else
- err = ionic_tx(q, skb);
+ err = ionic_tx(netdev, q, skb);
if (err)
goto err_out_drop;
@@ -1405,23 +1801,19 @@ netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
if (ndescs < 0)
goto err_out_drop;
- if (unlikely(ionic_maybe_stop_tx(q, ndescs)))
+ if (!netif_txq_maybe_stop(q_to_ndq(netdev, q),
+ ionic_q_space_avail(q),
+ ndescs, ndescs))
return NETDEV_TX_BUSY;
if (skb_is_gso(skb))
- err = ionic_tx_tso(q, skb);
+ err = ionic_tx_tso(netdev, q, skb);
else
- err = ionic_tx(q, skb);
+ err = ionic_tx(netdev, q, skb);
if (err)
goto err_out_drop;
- /* Stop the queue if there aren't descriptors for the next packet.
- * Since our SG lists per descriptor take care of most of the possible
- * fragmentation, we don't need to have many descriptors available.
- */
- ionic_maybe_stop_tx(q, 4);
-
return NETDEV_TX_OK;
err_out_drop:
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
index d7cbaad8a6fb..9e73e324e7a1 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
@@ -14,7 +14,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget);
int ionic_txrx_napi(struct napi_struct *napi, int budget);
netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev);
-bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
-bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
+bool ionic_rx_service(struct ionic_cq *cq);
+int ionic_xdp_xmit(struct net_device *netdev, int n, struct xdp_frame **xdp, u32 flags);
#endif /* _IONIC_TXRX_H_ */
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
index 35ec9aab3dc7..51fa880eaf6c 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
@@ -1186,7 +1186,6 @@ static int
netxen_p3_has_mn(struct netxen_adapter *adapter)
{
u32 capability, flashed_ver;
- capability = 0;
/* NX2031 always had MN */
if (NX_IS_REVISION_P2(adapter->ahw.revision_id))
@@ -1197,7 +1196,6 @@ netxen_p3_has_mn(struct netxen_adapter *adapter)
flashed_ver = NETXEN_DECODE_VERSION(flashed_ver);
if (flashed_ver >= NETXEN_VERSION_CODE(4, 0, 220)) {
-
capability = NXRD32(adapter, NX_PEG_TUNE_CAPABILITY);
if (capability & NX_PEG_TUNE_MN_PRESENT)
return 1;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 5a5dbbb8d8aa..9a1660a12c57 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -1794,8 +1794,6 @@ qed_rdma_create_srq(void *rdma_cxt,
goto err;
opaque_fid = p_hwfn->hw_info.opaque_fid;
-
- opaque_fid = p_hwfn->hw_info.opaque_fid;
init_data.opaque_fid = opaque_fid;
init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 0e240b5ab8d4..ae3ebf0cf999 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -1776,7 +1776,7 @@ static int qede_get_tunable(struct net_device *dev,
return 0;
}
-static int qede_get_eee(struct net_device *dev, struct ethtool_eee *edata)
+static int qede_get_eee(struct net_device *dev, struct ethtool_keee *edata)
{
struct qede_dev *edev = netdev_priv(dev);
struct qed_link_output current_link;
@@ -1789,18 +1789,26 @@ static int qede_get_eee(struct net_device *dev, struct ethtool_eee *edata)
return -EOPNOTSUPP;
}
- if (current_link.eee.adv_caps & QED_EEE_1G_ADV)
- edata->advertised = ADVERTISED_1000baseT_Full;
- if (current_link.eee.adv_caps & QED_EEE_10G_ADV)
- edata->advertised |= ADVERTISED_10000baseT_Full;
- if (current_link.sup_caps & QED_EEE_1G_ADV)
- edata->supported = ADVERTISED_1000baseT_Full;
- if (current_link.sup_caps & QED_EEE_10G_ADV)
- edata->supported |= ADVERTISED_10000baseT_Full;
- if (current_link.eee.lp_adv_caps & QED_EEE_1G_ADV)
- edata->lp_advertised = ADVERTISED_1000baseT_Full;
- if (current_link.eee.lp_adv_caps & QED_EEE_10G_ADV)
- edata->lp_advertised |= ADVERTISED_10000baseT_Full;
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ edata->advertised,
+ current_link.eee.adv_caps & QED_EEE_1G_ADV);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+ edata->advertised,
+ current_link.eee.adv_caps & QED_EEE_10G_ADV);
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ edata->supported,
+ current_link.sup_caps & QED_EEE_1G_ADV);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+ edata->supported,
+ current_link.sup_caps & QED_EEE_10G_ADV);
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ edata->lp_advertised,
+ current_link.eee.lp_adv_caps & QED_EEE_1G_ADV);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+ edata->lp_advertised,
+ current_link.eee.lp_adv_caps & QED_EEE_10G_ADV);
edata->tx_lpi_timer = current_link.eee.tx_lpi_timer;
edata->eee_enabled = current_link.eee.enable;
@@ -1810,11 +1818,14 @@ static int qede_get_eee(struct net_device *dev, struct ethtool_eee *edata)
return 0;
}
-static int qede_set_eee(struct net_device *dev, struct ethtool_eee *edata)
+static int qede_set_eee(struct net_device *dev, struct ethtool_keee *edata)
{
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = {};
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp) = {};
struct qede_dev *edev = netdev_priv(dev);
struct qed_link_output current_link;
struct qed_link_params params;
+ bool unsupp;
if (!edev->ops->common->can_link_change(edev->cdev)) {
DP_INFO(edev, "Link settings are not allowed to be changed\n");
@@ -1832,21 +1843,26 @@ static int qede_set_eee(struct net_device *dev, struct ethtool_eee *edata)
memset(&params, 0, sizeof(params));
params.override_flags |= QED_LINK_OVERRIDE_EEE_CONFIG;
- if (!(edata->advertised & (ADVERTISED_1000baseT_Full |
- ADVERTISED_10000baseT_Full)) ||
- ((edata->advertised & (ADVERTISED_1000baseT_Full |
- ADVERTISED_10000baseT_Full)) !=
- edata->advertised)) {
+ linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+ supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ supported);
+
+ unsupp = linkmode_andnot(tmp, edata->advertised, supported);
+ if (unsupp) {
DP_VERBOSE(edev, QED_MSG_DEBUG,
- "Invalid advertised capabilities %d\n",
- edata->advertised);
+ "Invalid advertised capabilities %*pb\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, edata->advertised);
return -EINVAL;
}
- if (edata->advertised & ADVERTISED_1000baseT_Full)
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ edata->advertised))
params.eee.adv_caps = QED_EEE_1G_ADV;
- if (edata->advertised & ADVERTISED_10000baseT_Full)
- params.eee.adv_caps |= QED_EEE_10G_ADV;
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+ edata->advertised))
+ params.eee.adv_caps = QED_EEE_10G_ADV;
+
params.eee.enable = edata->eee_enabled;
params.eee.tx_lpi_enable = edata->tx_lpi_enabled;
params.eee.tx_lpi_timer = edata->tx_lpi_timer;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index cb1746bc0e0c..847fa62c80df 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -215,7 +215,7 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb,
bd2_bits1 |= (1 << ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT);
- bd2_bits2 |= ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) &
+ bd2_bits2 |= ((skb_transport_offset(skb) >> 1) &
ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK)
<< ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 41894d154013..b9dc0071c5de 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -446,8 +446,7 @@ static int qlcnic_tx_encap_pkt(struct qlcnic_adapter *adapter,
encap_descr |= skb_network_offset(skb) << 10;
first_desc->encap_descr = cpu_to_le16(encap_descr);
- first_desc->tcp_hdr_offset = skb_inner_transport_header(skb) -
- skb->data;
+ first_desc->tcp_hdr_offset = skb_inner_transport_offset(skb);
first_desc->ip_hdr_offset = skb_inner_network_offset(skb);
qlcnic_set_tx_flags_opcode(first_desc, flags, opcode);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 3270df72541b..4c06f55878de 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -771,5 +771,6 @@ static struct platform_driver emac_platform_driver = {
module_platform_driver(emac_platform_driver);
+MODULE_DESCRIPTION("Qualcomm EMAC Gigabit Ethernet driver");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("platform:qcom-emac");
diff --git a/drivers/net/ethernet/qualcomm/qca_7k.c b/drivers/net/ethernet/qualcomm/qca_7k.c
index 4292c89bd35c..6263e4cf47fa 100644
--- a/drivers/net/ethernet/qualcomm/qca_7k.c
+++ b/drivers/net/ethernet/qualcomm/qca_7k.c
@@ -1,22 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- *
* Copyright (c) 2011, 2012, Qualcomm Atheros Communications Inc.
* Copyright (c) 2014, I2SE GmbH
- *
- * Permission to use, copy, modify, and/or distribute this software
- * for any purpose with or without fee is hereby granted, provided
- * that the above copyright notice and this permission notice appear
- * in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
*/
/* This module implements the Qualcomm Atheros SPI protocol for
diff --git a/drivers/net/ethernet/qualcomm/qca_7k.h b/drivers/net/ethernet/qualcomm/qca_7k.h
index 356de8ec5d48..828ee9c27578 100644
--- a/drivers/net/ethernet/qualcomm/qca_7k.h
+++ b/drivers/net/ethernet/qualcomm/qca_7k.h
@@ -1,21 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright (c) 2011, 2012, Qualcomm Atheros Communications Inc.
* Copyright (c) 2014, I2SE GmbH
- *
- * Permission to use, copy, modify, and/or distribute this software
- * for any purpose with or without fee is hereby granted, provided
- * that the above copyright notice and this permission notice appear
- * in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
*/
/* Qualcomm Atheros SPI register definition.
diff --git a/drivers/net/ethernet/qualcomm/qca_7k_common.c b/drivers/net/ethernet/qualcomm/qca_7k_common.c
index 6b511f05df61..5302da587620 100644
--- a/drivers/net/ethernet/qualcomm/qca_7k_common.c
+++ b/drivers/net/ethernet/qualcomm/qca_7k_common.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
* Copyright (c) 2011, 2012, Atheros Communications Inc.
* Copyright (c) 2014, I2SE GmbH
- *
- * Permission to use, copy, modify, and/or distribute this software
- * for any purpose with or without fee is hereby granted, provided
- * that the above copyright notice and this permission notice appear
- * in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* Atheros ethernet framing. Every Ethernet frame is surrounded
@@ -162,5 +149,5 @@ EXPORT_SYMBOL_GPL(qcafrm_fsm_decode);
MODULE_DESCRIPTION("Qualcomm Atheros QCA7000 common");
MODULE_AUTHOR("Qualcomm Atheros Communications");
-MODULE_AUTHOR("Stefan Wahren <stefan.wahren@i2se.com>");
+MODULE_AUTHOR("Stefan Wahren <wahrenst@gmx.net>");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/qualcomm/qca_7k_common.h b/drivers/net/ethernet/qualcomm/qca_7k_common.h
index 928554f11e35..44ed66fdb407 100644
--- a/drivers/net/ethernet/qualcomm/qca_7k_common.h
+++ b/drivers/net/ethernet/qualcomm/qca_7k_common.h
@@ -1,20 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright (c) 2011, 2012, Atheros Communications Inc.
* Copyright (c) 2014, I2SE GmbH
- *
- * Permission to use, copy, modify, and/or distribute this software
- * for any purpose with or without fee is hereby granted, provided
- * that the above copyright notice and this permission notice appear
- * in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* Atheros Ethernet framing. Every Ethernet frame is surrounded by an atheros
@@ -107,9 +94,6 @@ struct qcafrm_handle {
/* Offset in buffer (borrowed for length too) */
u16 offset;
-
- /* Frame length as kept by this module */
- u16 len;
};
u16 qcafrm_create_header(u8 *buf, u16 len);
@@ -128,17 +112,6 @@ static inline void qcafrm_fsm_init_uart(struct qcafrm_handle *handle)
handle->state = handle->init;
}
-/* Gather received bytes and try to extract a full Ethernet frame
- * by following a simple state machine.
- *
- * Return: QCAFRM_GATHER No Ethernet frame fully received yet.
- * QCAFRM_NOHEAD Header expected but not found.
- * QCAFRM_INVLEN QCA7K frame length is invalid
- * QCAFRM_NOTAIL Footer expected but not found.
- * > 0 Number of byte in the fully received
- * Ethernet frame
- */
-
s32 qcafrm_fsm_decode(struct qcafrm_handle *handle, u8 *buf, u16 buf_len, u8 recv_byte);
#endif /* _QCA_FRAMING_H */
diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c
index 1822f2ad8f0d..ff3b89e9028e 100644
--- a/drivers/net/ethernet/qualcomm/qca_debug.c
+++ b/drivers/net/ethernet/qualcomm/qca_debug.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
* Copyright (c) 2011, 2012, Qualcomm Atheros Communications Inc.
* Copyright (c) 2014, I2SE GmbH
- *
- * Permission to use, copy, modify, and/or distribute this software
- * for any purpose with or without fee is hereby granted, provided
- * that the above copyright notice and this permission notice appear
- * in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* This file contains debugging routines for use in the QCA7K driver.
@@ -255,7 +242,7 @@ qcaspi_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring,
struct qcaspi *qca = netdev_priv(dev);
ring->rx_max_pending = QCASPI_RX_MAX_FRAMES;
- ring->tx_max_pending = TX_RING_MAX_LEN;
+ ring->tx_max_pending = QCASPI_TX_RING_MAX_LEN;
ring->rx_pending = QCASPI_RX_MAX_FRAMES;
ring->tx_pending = qca->txr.count;
}
@@ -275,8 +262,8 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring,
if (qca->spi_thread)
kthread_park(qca->spi_thread);
- qca->txr.count = max_t(u32, ring->tx_pending, TX_RING_MIN_LEN);
- qca->txr.count = min_t(u16, qca->txr.count, TX_RING_MAX_LEN);
+ qca->txr.count = max_t(u32, ring->tx_pending, QCASPI_TX_RING_MIN_LEN);
+ qca->txr.count = min_t(u16, qca->txr.count, QCASPI_TX_RING_MAX_LEN);
if (qca->spi_thread)
kthread_unpark(qca->spi_thread);
diff --git a/drivers/net/ethernet/qualcomm/qca_debug.h b/drivers/net/ethernet/qualcomm/qca_debug.h
index 46a785844421..0d98cef3abc4 100644
--- a/drivers/net/ethernet/qualcomm/qca_debug.h
+++ b/drivers/net/ethernet/qualcomm/qca_debug.h
@@ -1,20 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright (c) 2011, 2012, Qualcomm Atheros Communications Inc.
* Copyright (c) 2014, I2SE GmbH
- *
- * Permission to use, copy, modify, and/or distribute this software
- * for any purpose with or without fee is hereby granted, provided
- * that the above copyright notice and this permission notice appear
- * in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* This file contains debugging routines for use in the QCA7K driver.
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 5f3c11fb3fa2..5799ecc88a87 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
* Copyright (c) 2011, 2012, Qualcomm Atheros Communications Inc.
* Copyright (c) 2014, I2SE GmbH
- *
- * Permission to use, copy, modify, and/or distribute this software
- * for any purpose with or without fee is hereby granted, provided
- * that the above copyright notice and this permission notice appear
- * in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* This module implements the Qualcomm Atheros SPI protocol for
@@ -359,7 +346,7 @@ qcaspi_receive(struct qcaspi *qca)
/* Read the packet size. */
qcaspi_read_register(qca, SPI_REG_RDBUF_BYTE_AVA, &available);
- netdev_dbg(net_dev, "qcaspi_receive: SPI_REG_RDBUF_BYTE_AVA: Value: %08x\n",
+ netdev_dbg(net_dev, "qcaspi_receive: SPI_REG_RDBUF_BYTE_AVA: Value: %04x\n",
available);
if (available > QCASPI_HW_BUF_LEN + QCASPI_HW_PKT_LEN) {
@@ -476,7 +463,7 @@ qcaspi_flush_tx_ring(struct qcaspi *qca)
* has been replaced by netif_tx_lock_bh() and so on.
*/
netif_tx_lock_bh(qca->net_dev);
- for (i = 0; i < TX_RING_MAX_LEN; i++) {
+ for (i = 0; i < QCASPI_TX_RING_MAX_LEN; i++) {
if (qca->txr.skb[i]) {
dev_kfree_skb(qca->txr.skb[i]);
qca->txr.skb[i] = NULL;
@@ -687,7 +674,7 @@ static int
qcaspi_netdev_open(struct net_device *dev)
{
struct qcaspi *qca = netdev_priv(dev);
- int ret = 0;
+ struct task_struct *thread;
if (!qca)
return -EINVAL;
@@ -697,23 +684,18 @@ qcaspi_netdev_open(struct net_device *dev)
qca->sync = QCASPI_SYNC_UNKNOWN;
qcafrm_fsm_init_spi(&qca->frm_handle);
- qca->spi_thread = kthread_run((void *)qcaspi_spi_thread,
- qca, "%s", dev->name);
+ thread = kthread_run((void *)qcaspi_spi_thread,
+ qca, "%s", dev->name);
- if (IS_ERR(qca->spi_thread)) {
+ if (IS_ERR(thread)) {
netdev_err(dev, "%s: unable to start kernel thread.\n",
QCASPI_DRV_NAME);
- return PTR_ERR(qca->spi_thread);
+ return PTR_ERR(thread);
}
- ret = request_irq(qca->spi_dev->irq, qcaspi_intr_handler, 0,
- dev->name, qca);
- if (ret) {
- netdev_err(dev, "%s: unable to get IRQ %d (irqval=%d).\n",
- QCASPI_DRV_NAME, qca->spi_dev->irq, ret);
- kthread_stop(qca->spi_thread);
- return ret;
- }
+ qca->spi_thread = thread;
+
+ enable_irq(qca->spi_dev->irq);
/* SPI thread takes care of TX queue */
@@ -728,10 +710,12 @@ qcaspi_netdev_close(struct net_device *dev)
netif_stop_queue(dev);
qcaspi_write_register(qca, SPI_REG_INTR_ENABLE, 0, wr_verify);
- free_irq(qca->spi_dev->irq, qca);
+ disable_irq(qca->spi_dev->irq);
- kthread_stop(qca->spi_thread);
- qca->spi_thread = NULL;
+ if (qca->spi_thread) {
+ kthread_stop(qca->spi_thread);
+ qca->spi_thread = NULL;
+ }
qcaspi_flush_tx_ring(qca);
return 0;
@@ -831,8 +815,8 @@ qcaspi_netdev_init(struct net_device *dev)
qca->clkspeed = qcaspi_clkspeed;
qca->burst_len = qcaspi_burst_len;
qca->spi_thread = NULL;
- qca->buffer_size = (dev->mtu + VLAN_ETH_HLEN + QCAFRM_HEADER_LEN +
- QCAFRM_FOOTER_LEN + 4) * 4;
+ qca->buffer_size = (QCAFRM_MAX_MTU + VLAN_ETH_HLEN + QCAFRM_HEADER_LEN +
+ QCAFRM_FOOTER_LEN + QCASPI_HW_PKT_LEN) * QCASPI_RX_MAX_FRAMES;
memset(&qca->stats, 0, sizeof(struct qcaspi_stats));
@@ -881,6 +865,8 @@ qcaspi_netdev_setup(struct net_device *dev)
qcaspi_set_ethtool_ops(dev);
dev->watchdog_timeo = QCASPI_TX_TIMEOUT;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->needed_tailroom = ALIGN(QCAFRM_FOOTER_LEN + QCAFRM_MIN_LEN, 4);
+ dev->needed_headroom = ALIGN(QCAFRM_HEADER_LEN, 4);
dev->tx_queue_len = 100;
/* MTU range: 46 - 1500 */
@@ -891,7 +877,7 @@ qcaspi_netdev_setup(struct net_device *dev)
memset(qca, 0, sizeof(struct qcaspi));
memset(&qca->txr, 0, sizeof(qca->txr));
- qca->txr.count = TX_RING_MAX_LEN;
+ qca->txr.count = QCASPI_TX_RING_MAX_LEN;
}
static const struct of_device_id qca_spi_of_match[] = {
@@ -984,6 +970,15 @@ qca_spi_probe(struct spi_device *spi)
spi_set_drvdata(spi, qcaspi_devs);
+ ret = devm_request_irq(&spi->dev, spi->irq, qcaspi_intr_handler,
+ IRQF_NO_AUTOEN, qca->net_dev->name, qca);
+ if (ret) {
+ dev_err(&spi->dev, "Unable to get IRQ %d (irqval=%d).\n",
+ spi->irq, ret);
+ free_netdev(qcaspi_devs);
+ return ret;
+ }
+
ret = of_get_ethdev_address(spi->dev.of_node, qca->net_dev);
if (ret) {
eth_hw_addr_random(qca->net_dev);
@@ -998,8 +993,8 @@ qca_spi_probe(struct spi_device *spi)
qcaspi_read_register(qca, SPI_REG_SIGNATURE, &signature);
if (signature != QCASPI_GOOD_SIGNATURE) {
- dev_err(&spi->dev, "Invalid signature (0x%04X)\n",
- signature);
+ dev_err(&spi->dev, "Invalid signature (expected 0x%04x, read 0x%04x)\n",
+ QCASPI_GOOD_SIGNATURE, signature);
free_netdev(qcaspi_devs);
return -EFAULT;
}
@@ -1048,6 +1043,6 @@ module_spi_driver(qca_spi_driver);
MODULE_DESCRIPTION("Qualcomm Atheros QCA7000 SPI Driver");
MODULE_AUTHOR("Qualcomm Atheros Communications");
-MODULE_AUTHOR("Stefan Wahren <stefan.wahren@i2se.com>");
+MODULE_AUTHOR("Stefan Wahren <wahrenst@gmx.net>");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(QCASPI_DRV_VERSION);
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h
index 3067356106f0..d59cb2352cee 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.h
+++ b/drivers/net/ethernet/qualcomm/qca_spi.h
@@ -1,20 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright (c) 2011, 2012, Qualcomm Atheros Communications Inc.
* Copyright (c) 2014, I2SE GmbH
- *
- * Permission to use, copy, modify, and/or distribute this software
- * for any purpose with or without fee is hereby granted, provided
- * that the above copyright notice and this permission notice appear
- * in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* Qualcomm Atheros SPI register definition.
@@ -39,8 +26,9 @@
#define QCASPI_GOOD_SIGNATURE 0xAA55
-#define TX_RING_MAX_LEN 10
-#define TX_RING_MIN_LEN 2
+#define QCASPI_TX_RING_MAX_LEN 10
+#define QCASPI_TX_RING_MIN_LEN 2
+#define QCASPI_RX_MAX_FRAMES 4
/* sync related constants */
#define QCASPI_SYNC_UNKNOWN 0
@@ -54,7 +42,7 @@
#define QCASPI_EVENT_CPUON 1
struct tx_ring {
- struct sk_buff *skb[TX_RING_MAX_LEN];
+ struct sk_buff *skb[QCASPI_TX_RING_MAX_LEN];
u16 head;
u16 tail;
u16 size;
diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index 223321897b96..321fd8d00730 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
* Copyright (c) 2011, 2012, Qualcomm Atheros Communications Inc.
* Copyright (c) 2017, I2SE GmbH
- *
- * Permission to use, copy, modify, and/or distribute this software
- * for any purpose with or without fee is hereby granted, provided
- * that the above copyright notice and this permission notice appear
- * in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* This module implements the Qualcomm Atheros UART protocol for
@@ -410,6 +397,6 @@ module_serdev_device_driver(qca_uart_driver);
MODULE_DESCRIPTION("Qualcomm Atheros QCA7000 UART Driver");
MODULE_AUTHOR("Qualcomm Atheros Communications");
-MODULE_AUTHOR("Stefan Wahren <stefan.wahren@i2se.com>");
+MODULE_AUTHOR("Stefan Wahren <wahrenst@gmx.net>");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(QCAUART_DRV_VERSION);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 5b69b9268c75..f3bea196a8f9 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -520,4 +520,5 @@ static void __exit rmnet_exit(void)
module_init(rmnet_init)
module_exit(rmnet_exit)
MODULE_ALIAS_RTNL_LINK("rmnet");
+MODULE_DESCRIPTION("Qualcomm RmNet MAP driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 046b5f7d8e7c..9d2a9562c96f 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -98,7 +98,7 @@ static int rmnet_vnd_get_iflink(const struct net_device *dev)
{
struct rmnet_priv *priv = netdev_priv(dev);
- return priv->real_dev->ifindex;
+ return READ_ONCE(priv->real_dev->ifindex);
}
static int rmnet_vnd_init(struct net_device *dev)
diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
index 81567fcf3957..4c043052198d 100644
--- a/drivers/net/ethernet/realtek/r8169.h
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -68,6 +68,7 @@ enum mac_version {
/* support for RTL_GIGA_MAC_VER_60 has been removed */
RTL_GIGA_MAC_VER_61,
RTL_GIGA_MAC_VER_63,
+ RTL_GIGA_MAC_VER_65,
RTL_GIGA_MAC_NONE
};
@@ -84,3 +85,6 @@ void r8169_get_led_name(struct rtl8169_private *tp, int idx,
int rtl8168_get_led_mode(struct rtl8169_private *tp);
int rtl8168_led_mod_ctrl(struct rtl8169_private *tp, u16 mask, u16 val);
void rtl8168_init_leds(struct net_device *ndev);
+int rtl8125_get_led_mode(struct rtl8169_private *tp, int index);
+int rtl8125_set_led_mode(struct rtl8169_private *tp, int index, u16 mode);
+void rtl8125_init_leds(struct net_device *ndev);
diff --git a/drivers/net/ethernet/realtek/r8169_leds.c b/drivers/net/ethernet/realtek/r8169_leds.c
index 007d077edcad..7c5dc9d0df85 100644
--- a/drivers/net/ethernet/realtek/r8169_leds.c
+++ b/drivers/net/ethernet/realtek/r8169_leds.c
@@ -18,12 +18,14 @@
#define RTL8168_LED_CTRL_LINK_100 BIT(1)
#define RTL8168_LED_CTRL_LINK_10 BIT(0)
-#define RTL8168_NUM_LEDS 3
+#define RTL8125_LED_CTRL_ACT BIT(9)
+#define RTL8125_LED_CTRL_LINK_2500 BIT(5)
+#define RTL8125_LED_CTRL_LINK_1000 BIT(3)
+#define RTL8125_LED_CTRL_LINK_100 BIT(1)
+#define RTL8125_LED_CTRL_LINK_10 BIT(0)
-#define RTL8168_SUPPORTED_MODES \
- (BIT(TRIGGER_NETDEV_LINK_1000) | BIT(TRIGGER_NETDEV_LINK_100) | \
- BIT(TRIGGER_NETDEV_LINK_10) | BIT(TRIGGER_NETDEV_RX) | \
- BIT(TRIGGER_NETDEV_TX))
+#define RTL8168_NUM_LEDS 3
+#define RTL8125_NUM_LEDS 4
struct r8169_led_classdev {
struct led_classdev led;
@@ -33,28 +35,35 @@ struct r8169_led_classdev {
#define lcdev_to_r8169_ldev(lcdev) container_of(lcdev, struct r8169_led_classdev, led)
+static bool r8169_trigger_mode_is_valid(unsigned long flags)
+{
+ bool rx, tx;
+
+ if (flags & BIT(TRIGGER_NETDEV_HALF_DUPLEX))
+ return false;
+ if (flags & BIT(TRIGGER_NETDEV_FULL_DUPLEX))
+ return false;
+
+ rx = flags & BIT(TRIGGER_NETDEV_RX);
+ tx = flags & BIT(TRIGGER_NETDEV_TX);
+
+ return rx == tx;
+}
+
static int rtl8168_led_hw_control_is_supported(struct led_classdev *led_cdev,
unsigned long flags)
{
struct r8169_led_classdev *ldev = lcdev_to_r8169_ldev(led_cdev);
struct rtl8169_private *tp = netdev_priv(ldev->ndev);
int shift = ldev->index * 4;
- bool rx, tx;
-
- if (flags & ~RTL8168_SUPPORTED_MODES)
- goto nosupp;
- rx = flags & BIT(TRIGGER_NETDEV_RX);
- tx = flags & BIT(TRIGGER_NETDEV_TX);
- if (rx != tx)
- goto nosupp;
+ if (!r8169_trigger_mode_is_valid(flags)) {
+ /* Switch LED off to indicate that mode isn't supported */
+ rtl8168_led_mod_ctrl(tp, 0x000f << shift, 0);
+ return -EOPNOTSUPP;
+ }
return 0;
-
-nosupp:
- /* Switch LED off to indicate that mode isn't supported */
- rtl8168_led_mod_ctrl(tp, 0x000f << shift, 0);
- return -EOPNOTSUPP;
}
static int rtl8168_led_hw_control_set(struct led_classdev *led_cdev,
@@ -129,7 +138,6 @@ static void rtl8168_setup_ldev(struct r8169_led_classdev *ldev,
r8169_get_led_name(tp, index, led_name, LED_MAX_NAME_SIZE);
led_cdev->name = led_name;
- led_cdev->default_trigger = "netdev";
led_cdev->hw_control_trigger = "netdev";
led_cdev->flags |= LED_RETAIN_AT_SHUTDOWN;
led_cdev->hw_control_is_supported = rtl8168_led_hw_control_is_supported;
@@ -155,3 +163,102 @@ void rtl8168_init_leds(struct net_device *ndev)
for (i = 0; i < RTL8168_NUM_LEDS; i++)
rtl8168_setup_ldev(leds + i, ndev, i);
}
+
+static int rtl8125_led_hw_control_is_supported(struct led_classdev *led_cdev,
+ unsigned long flags)
+{
+ struct r8169_led_classdev *ldev = lcdev_to_r8169_ldev(led_cdev);
+ struct rtl8169_private *tp = netdev_priv(ldev->ndev);
+
+ if (!r8169_trigger_mode_is_valid(flags)) {
+ /* Switch LED off to indicate that mode isn't supported */
+ rtl8125_set_led_mode(tp, ldev->index, 0);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int rtl8125_led_hw_control_set(struct led_classdev *led_cdev,
+ unsigned long flags)
+{
+ struct r8169_led_classdev *ldev = lcdev_to_r8169_ldev(led_cdev);
+ struct rtl8169_private *tp = netdev_priv(ldev->ndev);
+ u16 mode = 0;
+
+ if (flags & BIT(TRIGGER_NETDEV_LINK_10))
+ mode |= RTL8125_LED_CTRL_LINK_10;
+ if (flags & BIT(TRIGGER_NETDEV_LINK_100))
+ mode |= RTL8125_LED_CTRL_LINK_100;
+ if (flags & BIT(TRIGGER_NETDEV_LINK_1000))
+ mode |= RTL8125_LED_CTRL_LINK_1000;
+ if (flags & BIT(TRIGGER_NETDEV_LINK_2500))
+ mode |= RTL8125_LED_CTRL_LINK_2500;
+ if (flags & (BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX)))
+ mode |= RTL8125_LED_CTRL_ACT;
+
+ return rtl8125_set_led_mode(tp, ldev->index, mode);
+}
+
+static int rtl8125_led_hw_control_get(struct led_classdev *led_cdev,
+ unsigned long *flags)
+{
+ struct r8169_led_classdev *ldev = lcdev_to_r8169_ldev(led_cdev);
+ struct rtl8169_private *tp = netdev_priv(ldev->ndev);
+ int mode;
+
+ mode = rtl8125_get_led_mode(tp, ldev->index);
+ if (mode < 0)
+ return mode;
+
+ if (mode & RTL8125_LED_CTRL_LINK_10)
+ *flags |= BIT(TRIGGER_NETDEV_LINK_10);
+ if (mode & RTL8125_LED_CTRL_LINK_100)
+ *flags |= BIT(TRIGGER_NETDEV_LINK_100);
+ if (mode & RTL8125_LED_CTRL_LINK_1000)
+ *flags |= BIT(TRIGGER_NETDEV_LINK_1000);
+ if (mode & RTL8125_LED_CTRL_LINK_2500)
+ *flags |= BIT(TRIGGER_NETDEV_LINK_2500);
+ if (mode & RTL8125_LED_CTRL_ACT)
+ *flags |= BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX);
+
+ return 0;
+}
+
+static void rtl8125_setup_led_ldev(struct r8169_led_classdev *ldev,
+ struct net_device *ndev, int index)
+{
+ struct rtl8169_private *tp = netdev_priv(ndev);
+ struct led_classdev *led_cdev = &ldev->led;
+ char led_name[LED_MAX_NAME_SIZE];
+
+ ldev->ndev = ndev;
+ ldev->index = index;
+
+ r8169_get_led_name(tp, index, led_name, LED_MAX_NAME_SIZE);
+ led_cdev->name = led_name;
+ led_cdev->hw_control_trigger = "netdev";
+ led_cdev->flags |= LED_RETAIN_AT_SHUTDOWN;
+ led_cdev->hw_control_is_supported = rtl8125_led_hw_control_is_supported;
+ led_cdev->hw_control_set = rtl8125_led_hw_control_set;
+ led_cdev->hw_control_get = rtl8125_led_hw_control_get;
+ led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
+
+ /* ignore errors */
+ devm_led_classdev_register(&ndev->dev, led_cdev);
+}
+
+void rtl8125_init_leds(struct net_device *ndev)
+{
+ /* bind resource mgmt to netdev */
+ struct device *dev = &ndev->dev;
+ struct r8169_led_classdev *leds;
+ int i;
+
+ leds = devm_kcalloc(dev, RTL8125_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ if (!leds)
+ return;
+
+ for (i = 0; i < RTL8125_NUM_LEDS; i++)
+ rtl8125_setup_led_ldev(leds + i, ndev, i);
+}
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index dd73df6b17b0..5c879a5c86d7 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -55,6 +55,7 @@
#define FIRMWARE_8107E_2 "rtl_nic/rtl8107e-2.fw"
#define FIRMWARE_8125A_3 "rtl_nic/rtl8125a-3.fw"
#define FIRMWARE_8125B_2 "rtl_nic/rtl8125b-2.fw"
+#define FIRMWARE_8126A_2 "rtl_nic/rtl8126a-2.fw"
#define TX_DMA_BURST 7 /* Maximum PCI burst, '7' is unlimited */
#define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */
@@ -136,6 +137,7 @@ static const struct {
[RTL_GIGA_MAC_VER_61] = {"RTL8125A", FIRMWARE_8125A_3},
/* reserve 62 for CFG_METHOD_4 in the vendor driver */
[RTL_GIGA_MAC_VER_63] = {"RTL8125B", FIRMWARE_8125B_2},
+ [RTL_GIGA_MAC_VER_65] = {"RTL8126A", FIRMWARE_8126A_2},
};
static const struct pci_device_id rtl8169_pci_tbl[] = {
@@ -158,6 +160,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
{ PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, 0x0024 },
{ 0x0001, 0x8168, PCI_ANY_ID, 0x2410 },
{ PCI_VDEVICE(REALTEK, 0x8125) },
+ { PCI_VDEVICE(REALTEK, 0x8126) },
{ PCI_VDEVICE(REALTEK, 0x3000) },
{}
};
@@ -327,13 +330,23 @@ enum rtl8168_registers {
};
enum rtl8125_registers {
+ LEDSEL0 = 0x18,
+ INT_CFG0_8125 = 0x34,
+#define INT_CFG0_ENABLE_8125 BIT(0)
+#define INT_CFG0_CLKREQEN BIT(3)
IntrMask_8125 = 0x38,
IntrStatus_8125 = 0x3c,
+ INT_CFG1_8125 = 0x7a,
+ LEDSEL2 = 0x84,
+ LEDSEL1 = 0x86,
TxPoll_8125 = 0x90,
+ LEDSEL3 = 0x96,
MAC0_BKP = 0x19e0,
EEE_TXIDLE_TIMER_8125 = 0x6048,
};
+#define LEDSEL_MASK_8125 0x23f
+
#define RX_VLAN_INNER_8125 BIT(22)
#define RX_VLAN_OUTER_8125 BIT(23)
#define RX_VLAN_8125 (RX_VLAN_INNER_8125 | RX_VLAN_OUTER_8125)
@@ -606,6 +619,7 @@ struct rtl8169_private {
struct page *Rx_databuff[NUM_RX_DESC]; /* Rx data buffers */
struct ring_info tx_skb[NUM_TX_DESC]; /* Tx data buffers */
u16 cp_cmd;
+ u16 tx_lpi_timer;
u32 irq_mask;
int irq;
struct clk *clk;
@@ -629,7 +643,6 @@ struct rtl8169_private {
struct rtl8169_counters *counters;
struct rtl8169_tc_offsets tc_offset;
u32 saved_wolopts;
- int eee_adv;
const char *fw_name;
struct rtl_fw *rtl_fw;
@@ -663,6 +676,7 @@ MODULE_FIRMWARE(FIRMWARE_8168FP_3);
MODULE_FIRMWARE(FIRMWARE_8107E_2);
MODULE_FIRMWARE(FIRMWARE_8125A_3);
MODULE_FIRMWARE(FIRMWARE_8125B_2);
+MODULE_FIRMWARE(FIRMWARE_8126A_2);
static inline struct device *tp_to_dev(struct rtl8169_private *tp)
{
@@ -824,6 +838,51 @@ int rtl8168_get_led_mode(struct rtl8169_private *tp)
return ret;
}
+static int rtl8125_get_led_reg(int index)
+{
+ static const int led_regs[] = { LEDSEL0, LEDSEL1, LEDSEL2, LEDSEL3 };
+
+ return led_regs[index];
+}
+
+int rtl8125_set_led_mode(struct rtl8169_private *tp, int index, u16 mode)
+{
+ int reg = rtl8125_get_led_reg(index);
+ struct device *dev = tp_to_dev(tp);
+ int ret;
+ u16 val;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&tp->led_lock);
+ val = RTL_R16(tp, reg) & ~LEDSEL_MASK_8125;
+ RTL_W16(tp, reg, val | mode);
+ mutex_unlock(&tp->led_lock);
+
+ pm_runtime_put_sync(dev);
+
+ return 0;
+}
+
+int rtl8125_get_led_mode(struct rtl8169_private *tp, int index)
+{
+ int reg = rtl8125_get_led_reg(index);
+ struct device *dev = tp_to_dev(tp);
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = RTL_R16(tp, reg);
+
+ pm_runtime_put_sync(dev);
+
+ return ret;
+}
+
void r8169_get_led_name(struct rtl8169_private *tp, int idx,
char *buf, int buf_len)
{
@@ -1140,7 +1199,7 @@ static void rtl_writephy(struct rtl8169_private *tp, int location, int val)
case RTL_GIGA_MAC_VER_31:
r8168dp_2_mdio_write(tp, location, val);
break;
- case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_65:
r8168g_mdio_write(tp, location, val);
break;
default:
@@ -1155,7 +1214,7 @@ static int rtl_readphy(struct rtl8169_private *tp, int location)
case RTL_GIGA_MAC_VER_28:
case RTL_GIGA_MAC_VER_31:
return r8168dp_2_mdio_read(tp, location);
- case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_65:
return r8168g_mdio_read(tp, location);
default:
return r8169_mdio_read(tp, location);
@@ -1341,7 +1400,7 @@ static void rtl_set_d3_pll_down(struct rtl8169_private *tp, bool enable)
case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26:
case RTL_GIGA_MAC_VER_29 ... RTL_GIGA_MAC_VER_30:
case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_37:
- case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_65:
if (enable)
RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~D3_NO_PLL_DOWN);
else
@@ -1508,7 +1567,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
break;
case RTL_GIGA_MAC_VER_34:
case RTL_GIGA_MAC_VER_37:
- case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_65:
if (wolopts)
rtl_mod_config2(tp, 0, PME_SIGNAL);
else
@@ -1974,30 +2033,64 @@ static int rtl_set_coalesce(struct net_device *dev,
return 0;
}
-static int rtl8169_get_eee(struct net_device *dev, struct ethtool_eee *data)
+static void rtl_set_eee_txidle_timer(struct rtl8169_private *tp)
+{
+ unsigned int timer_val = READ_ONCE(tp->dev->mtu) + ETH_HLEN + 0x20;
+
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_46:
+ case RTL_GIGA_MAC_VER_48:
+ tp->tx_lpi_timer = timer_val;
+ r8168_mac_ocp_write(tp, 0xe048, timer_val);
+ break;
+ case RTL_GIGA_MAC_VER_61:
+ case RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_65:
+ tp->tx_lpi_timer = timer_val;
+ RTL_W16(tp, EEE_TXIDLE_TIMER_8125, timer_val);
+ break;
+ default:
+ break;
+ }
+}
+
+static unsigned int r8169_get_tx_lpi_timer_us(struct rtl8169_private *tp)
+{
+ unsigned int speed = tp->phydev->speed;
+ unsigned int timer = tp->tx_lpi_timer;
+
+ if (!timer || speed == SPEED_UNKNOWN)
+ return 0;
+
+ /* tx_lpi_timer value is in bytes */
+ return DIV_ROUND_CLOSEST(timer * BITS_PER_BYTE, speed);
+}
+
+static int rtl8169_get_eee(struct net_device *dev, struct ethtool_keee *data)
{
struct rtl8169_private *tp = netdev_priv(dev);
+ int ret;
if (!rtl_supports_eee(tp))
return -EOPNOTSUPP;
- return phy_ethtool_get_eee(tp->phydev, data);
+ ret = phy_ethtool_get_eee(tp->phydev, data);
+ if (ret)
+ return ret;
+
+ data->tx_lpi_timer = r8169_get_tx_lpi_timer_us(tp);
+
+ return 0;
}
-static int rtl8169_set_eee(struct net_device *dev, struct ethtool_eee *data)
+static int rtl8169_set_eee(struct net_device *dev, struct ethtool_keee *data)
{
struct rtl8169_private *tp = netdev_priv(dev);
- int ret;
if (!rtl_supports_eee(tp))
return -EOPNOTSUPP;
- ret = phy_ethtool_set_eee(tp->phydev, data);
-
- if (!ret)
- tp->eee_adv = phy_read_mmd(dev->phydev, MDIO_MMD_AN,
- MDIO_AN_EEE_ADV);
- return ret;
+ return phy_ethtool_set_eee(tp->phydev, data);
}
static void rtl8169_get_ringparam(struct net_device *dev,
@@ -2062,21 +2155,6 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
.set_pauseparam = rtl8169_set_pauseparam,
};
-static void rtl_enable_eee(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
- int adv;
-
- /* respect EEE advertisement the user may have set */
- if (tp->eee_adv >= 0)
- adv = tp->eee_adv;
- else
- adv = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
-
- if (adv >= 0)
- phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, adv);
-}
-
static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii)
{
/*
@@ -2095,6 +2173,9 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii)
u16 val;
enum mac_version ver;
} mac_info[] = {
+ /* 8126A family. */
+ { 0x7cf, 0x649, RTL_GIGA_MAC_VER_65 },
+
/* 8125B family. */
{ 0x7cf, 0x641, RTL_GIGA_MAC_VER_63 },
@@ -2250,14 +2331,8 @@ static void rtl8125a_config_eee_mac(struct rtl8169_private *tp)
r8168_mac_ocp_modify(tp, 0xeb62, 0, BIT(2) | BIT(1));
}
-static void rtl8125_set_eee_txidle_timer(struct rtl8169_private *tp)
-{
- RTL_W16(tp, EEE_TXIDLE_TIMER_8125, tp->dev->mtu + ETH_HLEN + 0x20);
-}
-
static void rtl8125b_config_eee_mac(struct rtl8169_private *tp)
{
- rtl8125_set_eee_txidle_timer(tp);
r8168_mac_ocp_modify(tp, 0xe040, 0, BIT(1) | BIT(0));
}
@@ -2313,9 +2388,6 @@ static void rtl8169_init_phy(struct rtl8169_private *tp)
/* We may have called phy_speed_down before */
phy_speed_up(tp->phydev);
- if (rtl_supports_eee(tp))
- rtl_enable_eee(tp);
-
genphy_soft_reset(tp->phydev);
}
@@ -2368,6 +2440,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST);
break;
case RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_65:
RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST |
RX_PAUSE_SLOT_ON);
break;
@@ -2554,7 +2627,7 @@ static void rtl_wait_txrx_fifo_empty(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_61:
rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42);
break;
- case RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_65:
RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42);
rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond_2, 100, 42);
@@ -2797,7 +2870,7 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_38:
rtl_eri_set_bits(tp, 0xd4, 0x0c00);
break;
- case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_65:
r8168_mac_ocp_modify(tp, 0xc0ac, 0, 0x1f80);
break;
default:
@@ -2811,7 +2884,7 @@ static void rtl_disable_exit_l1(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
rtl_eri_clear_bits(tp, 0xd4, 0x1f00);
break;
- case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_65:
r8168_mac_ocp_modify(tp, 0xc0ac, 0x1f80, 0);
break;
default:
@@ -2821,6 +2894,8 @@ static void rtl_disable_exit_l1(struct rtl8169_private *tp)
static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
{
+ u8 val8;
+
if (tp->mac_version < RTL_GIGA_MAC_VER_32)
return;
@@ -2834,11 +2909,19 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
return;
rtl_mod_config5(tp, 0, ASPM_en);
- rtl_mod_config2(tp, 0, ClkReqEn);
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_65:
+ val8 = RTL_R8(tp, INT_CFG0_8125) | INT_CFG0_CLKREQEN;
+ RTL_W8(tp, INT_CFG0_8125, val8);
+ break;
+ default:
+ rtl_mod_config2(tp, 0, ClkReqEn);
+ break;
+ }
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_65:
/* reset ephy tx/rx disable timer */
r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0);
/* chip can trigger L1.2 */
@@ -2850,14 +2933,22 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
} else {
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_65:
r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0);
break;
default:
break;
}
- rtl_mod_config2(tp, ClkReqEn, 0);
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_65:
+ val8 = RTL_R8(tp, INT_CFG0_8125) & ~INT_CFG0_CLKREQEN;
+ RTL_W8(tp, INT_CFG0_8125, val8);
+ break;
+ default:
+ rtl_mod_config2(tp, ClkReqEn, 0);
+ break;
+ }
rtl_mod_config5(tp, ASPM_en, 0);
}
}
@@ -3570,10 +3661,15 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
/* disable new tx descriptor format */
r8168_mac_ocp_modify(tp, 0xeb58, 0x0001, 0x0000);
- if (tp->mac_version == RTL_GIGA_MAC_VER_63)
+ if (tp->mac_version == RTL_GIGA_MAC_VER_65)
+ RTL_W8(tp, 0xD8, RTL_R8(tp, 0xD8) & ~0x02);
+
+ if (tp->mac_version == RTL_GIGA_MAC_VER_65)
+ r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0400);
+ else if (tp->mac_version == RTL_GIGA_MAC_VER_63)
r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0200);
else
- r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0400);
+ r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0300);
if (tp->mac_version == RTL_GIGA_MAC_VER_63)
r8168_mac_ocp_modify(tp, 0xe63e, 0x0c30, 0x0000);
@@ -3586,6 +3682,10 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0030);
r8168_mac_ocp_modify(tp, 0xe040, 0x1000, 0x0000);
r8168_mac_ocp_modify(tp, 0xea1c, 0x0003, 0x0001);
+ if (tp->mac_version == RTL_GIGA_MAC_VER_65)
+ r8168_mac_ocp_modify(tp, 0xea1c, 0x0300, 0x0000);
+ else
+ r8168_mac_ocp_modify(tp, 0xea1c, 0x0004, 0x0000);
r8168_mac_ocp_modify(tp, 0xe0c0, 0x4f0f, 0x4403);
r8168_mac_ocp_modify(tp, 0xe052, 0x0080, 0x0068);
r8168_mac_ocp_modify(tp, 0xd430, 0x0fff, 0x047f);
@@ -3600,10 +3700,10 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
rtl_loop_wait_low(tp, &rtl_mac_ocp_e00e_cond, 1000, 10);
- if (tp->mac_version == RTL_GIGA_MAC_VER_63)
- rtl8125b_config_eee_mac(tp);
- else
+ if (tp->mac_version == RTL_GIGA_MAC_VER_61)
rtl8125a_config_eee_mac(tp);
+ else
+ rtl8125b_config_eee_mac(tp);
rtl_disable_rxdvgate(tp);
}
@@ -3647,6 +3747,12 @@ static void rtl_hw_start_8125b(struct rtl8169_private *tp)
rtl_hw_start_8125_common(tp);
}
+static void rtl_hw_start_8126a(struct rtl8169_private *tp)
+{
+ rtl_set_def_aspm_entry_latency(tp);
+ rtl_hw_start_8125_common(tp);
+}
+
static void rtl_hw_config(struct rtl8169_private *tp)
{
static const rtl_generic_fct hw_configs[] = {
@@ -3689,6 +3795,7 @@ static void rtl_hw_config(struct rtl8169_private *tp)
[RTL_GIGA_MAC_VER_53] = rtl_hw_start_8117,
[RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125a_2,
[RTL_GIGA_MAC_VER_63] = rtl_hw_start_8125b,
+ [RTL_GIGA_MAC_VER_65] = rtl_hw_start_8126a,
};
if (hw_configs[tp->mac_version])
@@ -3699,9 +3806,23 @@ static void rtl_hw_start_8125(struct rtl8169_private *tp)
{
int i;
+ RTL_W8(tp, INT_CFG0_8125, 0x00);
+
/* disable interrupt coalescing */
- for (i = 0xa00; i < 0xb00; i += 4)
- RTL_W32(tp, i, 0);
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_61:
+ for (i = 0xa00; i < 0xb00; i += 4)
+ RTL_W32(tp, i, 0);
+ break;
+ case RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_65:
+ for (i = 0xa00; i < 0xa80; i += 4)
+ RTL_W32(tp, i, 0);
+ RTL_W16(tp, INT_CFG1_8125, 0x0000);
+ break;
+ default:
+ break;
+ }
rtl_hw_config(tp);
}
@@ -3744,6 +3865,8 @@ static void rtl_hw_start(struct rtl8169_private *tp)
rtl_hw_aspm_clkreq_enable(tp, false);
RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+ rtl_set_eee_txidle_timer(tp);
+
if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
rtl_hw_start_8169(tp);
else if (rtl_is_8125(tp))
@@ -3777,15 +3900,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
dev->mtu = new_mtu;
netdev_update_features(dev);
rtl_jumbo_config(tp);
-
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_61:
- case RTL_GIGA_MAC_VER_63:
- rtl8125_set_eee_txidle_timer(tp);
- break;
- default:
- break;
- }
+ rtl_set_eee_txidle_timer(tp);
return 0;
}
@@ -3929,7 +4044,7 @@ static void rtl8169_cleanup(struct rtl8169_private *tp)
RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
break;
- case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_65:
rtl_enable_rxdvgate(tp);
fsleep(2000);
break;
@@ -4080,8 +4195,7 @@ static unsigned int rtl_quirk_packet_padto(struct rtl8169_private *tp,
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_34:
- case RTL_GIGA_MAC_VER_61:
- case RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_65:
padto = max_t(unsigned int, padto, ETH_ZLEN);
break;
default:
@@ -5058,7 +5172,8 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
}
tp->phydev->mac_managed_pm = true;
-
+ if (rtl_supports_eee(tp))
+ phy_support_eee(tp->phydev);
phy_support_asym_pause(tp->phydev);
/* PHY will be woken up in rtl_open() */
@@ -5108,7 +5223,7 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48:
rtl_hw_init_8168g(tp);
break;
- case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_65:
rtl_hw_init_8125(tp);
break;
default:
@@ -5193,7 +5308,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->dev = dev;
tp->pci_dev = pdev;
tp->supports_gmii = ent->driver_data == RTL_CFG_NO_GBIT ? 0 : 1;
- tp->eee_adv = -1;
tp->ocp_base = OCP_STD_PHY_BASE;
raw_spin_lock_init(&tp->cfg9346_usage_lock);
@@ -5201,11 +5315,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
raw_spin_lock_init(&tp->mac_ocp_lock);
mutex_init(&tp->led_lock);
- dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev,
- struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
/* Get the *optional* external "ether_clk" used on some boards */
tp->clk = devm_clk_get_optional_enabled(&pdev->dev, "ether_clk");
if (IS_ERR(tp->clk))
@@ -5320,6 +5429,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dev->hw_features |= NETIF_F_RXALL;
dev->hw_features |= NETIF_F_RXFCS;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+
netdev_sw_irq_coalesce_default_on(dev);
/* configure chip for default features */
@@ -5356,10 +5467,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
return rc;
- if (IS_ENABLED(CONFIG_R8169_LEDS) &&
- tp->mac_version > RTL_GIGA_MAC_VER_06 &&
- tp->mac_version < RTL_GIGA_MAC_VER_61)
- rtl8168_init_leds(dev);
+ if (IS_ENABLED(CONFIG_R8169_LEDS)) {
+ if (rtl_is_8125(tp))
+ rtl8125_init_leds(dev);
+ else if (tp->mac_version > RTL_GIGA_MAC_VER_06)
+ rtl8168_init_leds(dev);
+ }
netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
rtl_chip_infos[chipset].name, dev->dev_addr, xid, tp->irq);
diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
index b50f16786c24..1f74317beb88 100644
--- a/drivers/net/ethernet/realtek/r8169_phy_config.c
+++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
@@ -1102,6 +1102,12 @@ static void rtl8125b_hw_phy_config(struct rtl8169_private *tp,
rtl8125b_config_eee_phy(phydev);
}
+static void rtl8126a_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ r8169_apply_firmware(tp);
+}
+
void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev,
enum mac_version ver)
{
@@ -1152,6 +1158,7 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev,
[RTL_GIGA_MAC_VER_53] = rtl8117_hw_phy_config,
[RTL_GIGA_MAC_VER_61] = rtl8125a_2_hw_phy_config,
[RTL_GIGA_MAC_VER_63] = rtl8125b_hw_phy_config,
+ [RTL_GIGA_MAC_VER_65] = rtl8126a_hw_phy_config,
};
if (phy_configs[ver])
diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig
index d6136fe5c206..b03fae7a0f72 100644
--- a/drivers/net/ethernet/renesas/Kconfig
+++ b/drivers/net/ethernet/renesas/Kconfig
@@ -34,6 +34,7 @@ config RAVB
select MII
select MDIO_BITBANG
select PHYLIB
+ select RESET_CONTROLLER
help
Renesas Ethernet AVB device driver.
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index e0f8276cffed..b48935ec7e28 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -205,7 +205,11 @@ enum ravb_reg {
TLFRCR = 0x0758,
RFCR = 0x0760,
MAFCR = 0x0778,
- CSR0 = 0x0800, /* RZ/G2L only */
+
+ /* TOE registers (RZ/G2L only) */
+ CSR0 = 0x0800,
+ CSR1 = 0x0804,
+ CSR2 = 0x0808,
};
@@ -978,16 +982,39 @@ enum CSR0_BIT {
CSR0_RPE = 0x00000020,
};
+enum CSR1_BIT {
+ CSR1_TIP4 = 0x00000001,
+ CSR1_TTCP4 = 0x00000010,
+ CSR1_TUDP4 = 0x00000020,
+ CSR1_TICMP4 = 0x00000040,
+ CSR1_TTCP6 = 0x00100000,
+ CSR1_TUDP6 = 0x00200000,
+ CSR1_TICMP6 = 0x00400000,
+ CSR1_THOP = 0x01000000,
+ CSR1_TROUT = 0x02000000,
+ CSR1_TAHD = 0x04000000,
+ CSR1_TDHD = 0x08000000,
+};
+
+enum CSR2_BIT {
+ CSR2_RIP4 = 0x00000001,
+ CSR2_RTCP4 = 0x00000010,
+ CSR2_RUDP4 = 0x00000020,
+ CSR2_RICMP4 = 0x00000040,
+ CSR2_RTCP6 = 0x00100000,
+ CSR2_RUDP6 = 0x00200000,
+ CSR2_RICMP6 = 0x00400000,
+ CSR2_RHOP = 0x01000000,
+ CSR2_RROUT = 0x02000000,
+ CSR2_RAHD = 0x04000000,
+ CSR2_RDHD = 0x08000000,
+};
+
#define DBAT_ENTRY_NUM 22
#define RX_QUEUE_OFFSET 4
#define NUM_RX_QUEUE 2
#define NUM_TX_QUEUE 2
-#define RX_BUF_SZ (2048 - ETH_FCS_LEN + sizeof(__sum16))
-
-#define GBETH_RX_BUFF_MAX 8192
-#define GBETH_RX_DESC_DATA_SIZE 4080
-
struct ravb_tstamp_skb {
struct list_head list;
struct sk_buff *skb;
@@ -1012,9 +1039,6 @@ struct ravb_ptp {
};
struct ravb_hw_info {
- void (*rx_ring_free)(struct net_device *ndev, int q);
- void (*rx_ring_format)(struct net_device *ndev, int q);
- void *(*alloc_rx_desc)(struct net_device *ndev, int q);
bool (*receive)(struct net_device *ndev, int *quota, int q);
void (*set_rate)(struct net_device *ndev);
int (*set_feature)(struct net_device *ndev, netdev_features_t features);
@@ -1025,9 +1049,10 @@ struct ravb_hw_info {
netdev_features_t net_hw_features;
netdev_features_t net_features;
int stats_len;
- size_t max_rx_len;
u32 tccr_mask;
- u32 rx_max_buf_size;
+ u32 rx_max_frame_size;
+ u32 rx_max_desc_use;
+ u32 rx_desc_size;
unsigned aligned_tx: 1;
/* hardware features */
@@ -1060,8 +1085,11 @@ struct ravb_private {
struct ravb_desc *desc_bat;
dma_addr_t rx_desc_dma[NUM_RX_QUEUE];
dma_addr_t tx_desc_dma[NUM_TX_QUEUE];
- struct ravb_rx_desc *gbeth_rx_ring;
- struct ravb_ex_rx_desc *rx_ring[NUM_RX_QUEUE];
+ union {
+ struct ravb_rx_desc *desc;
+ struct ravb_ex_rx_desc *ex_desc;
+ void *raw;
+ } rx_ring[NUM_RX_QUEUE];
struct ravb_tx_desc *tx_ring[NUM_TX_QUEUE];
void *tx_align[NUM_TX_QUEUE];
struct sk_buff *rx_1st_skb;
@@ -1089,10 +1117,6 @@ struct ravb_private {
int msg_enable;
int speed;
int emac_irq;
- int erra_irq;
- int mgmta_irq;
- int rx_irqs[NUM_RX_QUEUE];
- int tx_irqs[NUM_TX_QUEUE];
unsigned no_avb_link:1;
unsigned avb_link_active_low:1;
@@ -1106,6 +1130,8 @@ struct ravb_private {
const struct ravb_hw_info *info;
struct reset_control *rstc;
+
+ u32 gti_tiv;
};
static inline u32 ravb_read(struct net_device *ndev, enum ravb_reg reg)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 0e3731f50fc2..d1be030c8848 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -29,6 +29,7 @@
#include <linux/spinlock.h>
#include <linux/reset.h>
#include <linux/math64.h>
+#include <net/ip.h>
#include "ravb.h"
@@ -38,16 +39,6 @@
NETIF_MSG_RX_ERR | \
NETIF_MSG_TX_ERR)
-static const char *ravb_rx_irqs[NUM_RX_QUEUE] = {
- "ch0", /* RAVB_BE */
- "ch1", /* RAVB_NC */
-};
-
-static const char *ravb_tx_irqs[NUM_TX_QUEUE] = {
- "ch18", /* RAVB_BE */
- "ch19", /* RAVB_NC */
-};
-
void ravb_modify(struct net_device *ndev, enum ravb_reg reg, u32 clear,
u32 set)
{
@@ -96,13 +87,13 @@ static void ravb_set_rate_gbeth(struct net_device *ndev)
struct ravb_private *priv = netdev_priv(ndev);
switch (priv->speed) {
- case 10: /* 10BASE */
+ case 10: /* 10BASE */
ravb_write(ndev, GBETH_GECMR_SPEED_10, GECMR);
break;
- case 100: /* 100BASE */
+ case 100: /* 100BASE */
ravb_write(ndev, GBETH_GECMR_SPEED_100, GECMR);
break;
- case 1000: /* 1000BASE */
+ case 1000: /* 1000BASE */
ravb_write(ndev, GBETH_GECMR_SPEED_1000, GECMR);
break;
}
@@ -122,12 +113,23 @@ static void ravb_set_rate_rcar(struct net_device *ndev)
}
}
-static void ravb_set_buffer_align(struct sk_buff *skb)
+static struct sk_buff *
+ravb_alloc_skb(struct net_device *ndev, const struct ravb_hw_info *info,
+ gfp_t gfp_mask)
{
- u32 reserve = (unsigned long)skb->data & (RAVB_ALIGN - 1);
+ struct sk_buff *skb;
+ u32 reserve;
+
+ skb = __netdev_alloc_skb(ndev, info->rx_max_frame_size + RAVB_ALIGN - 1,
+ gfp_mask);
+ if (!skb)
+ return NULL;
+ reserve = (unsigned long)skb->data & (RAVB_ALIGN - 1);
if (reserve)
skb_reserve(skb, RAVB_ALIGN - reserve);
+
+ return skb;
}
/* Get MAC address from the MAC address registers
@@ -200,6 +202,13 @@ static const struct mdiobb_ops bb_ops = {
.get_mdio_data = ravb_get_mdio_data,
};
+static struct ravb_rx_desc *
+ravb_rx_get_desc(struct ravb_private *priv, unsigned int q,
+ unsigned int i)
+{
+ return priv->rx_ring[q].raw + priv->info->rx_desc_size * i;
+}
+
/* Free TX skb function for AVB-IP */
static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only)
{
@@ -244,67 +253,40 @@ static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only)
return free_num;
}
-static void ravb_rx_ring_free_gbeth(struct net_device *ndev, int q)
+static void ravb_rx_ring_free(struct net_device *ndev, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
unsigned int ring_size;
unsigned int i;
- if (!priv->gbeth_rx_ring)
+ if (!priv->rx_ring[q].raw)
return;
for (i = 0; i < priv->num_rx_ring[q]; i++) {
- struct ravb_rx_desc *desc = &priv->gbeth_rx_ring[i];
+ struct ravb_rx_desc *desc = ravb_rx_get_desc(priv, q, i);
if (!dma_mapping_error(ndev->dev.parent,
le32_to_cpu(desc->dptr)))
dma_unmap_single(ndev->dev.parent,
le32_to_cpu(desc->dptr),
- GBETH_RX_BUFF_MAX,
+ priv->info->rx_max_frame_size,
DMA_FROM_DEVICE);
}
- ring_size = sizeof(struct ravb_rx_desc) * (priv->num_rx_ring[q] + 1);
- dma_free_coherent(ndev->dev.parent, ring_size, priv->gbeth_rx_ring,
+ ring_size = priv->info->rx_desc_size * (priv->num_rx_ring[q] + 1);
+ dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q].raw,
priv->rx_desc_dma[q]);
- priv->gbeth_rx_ring = NULL;
-}
-
-static void ravb_rx_ring_free_rcar(struct net_device *ndev, int q)
-{
- struct ravb_private *priv = netdev_priv(ndev);
- unsigned int ring_size;
- unsigned int i;
-
- if (!priv->rx_ring[q])
- return;
-
- for (i = 0; i < priv->num_rx_ring[q]; i++) {
- struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i];
-
- if (!dma_mapping_error(ndev->dev.parent,
- le32_to_cpu(desc->dptr)))
- dma_unmap_single(ndev->dev.parent,
- le32_to_cpu(desc->dptr),
- RX_BUF_SZ,
- DMA_FROM_DEVICE);
- }
- ring_size = sizeof(struct ravb_ex_rx_desc) *
- (priv->num_rx_ring[q] + 1);
- dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q],
- priv->rx_desc_dma[q]);
- priv->rx_ring[q] = NULL;
+ priv->rx_ring[q].raw = NULL;
}
/* Free skb's and DMA buffers for Ethernet AVB */
static void ravb_ring_free(struct net_device *ndev, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
- const struct ravb_hw_info *info = priv->info;
unsigned int num_tx_desc = priv->num_tx_desc;
unsigned int ring_size;
unsigned int i;
- info->rx_ring_free(ndev, q);
+ ravb_rx_ring_free(ndev, q);
if (priv->tx_ring[q]) {
ravb_tx_free(ndev, q, false);
@@ -335,7 +317,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)
priv->tx_skb[q] = NULL;
}
-static void ravb_rx_ring_format_gbeth(struct net_device *ndev, int q)
+static void ravb_rx_ring_format(struct net_device *ndev, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
struct ravb_rx_desc *rx_desc;
@@ -343,45 +325,15 @@ static void ravb_rx_ring_format_gbeth(struct net_device *ndev, int q)
dma_addr_t dma_addr;
unsigned int i;
- rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q];
- memset(priv->gbeth_rx_ring, 0, rx_ring_size);
- /* Build RX ring buffer */
- for (i = 0; i < priv->num_rx_ring[q]; i++) {
- /* RX descriptor */
- rx_desc = &priv->gbeth_rx_ring[i];
- rx_desc->ds_cc = cpu_to_le16(GBETH_RX_DESC_DATA_SIZE);
- dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
- GBETH_RX_BUFF_MAX,
- DMA_FROM_DEVICE);
- /* We just set the data size to 0 for a failed mapping which
- * should prevent DMA from happening...
- */
- if (dma_mapping_error(ndev->dev.parent, dma_addr))
- rx_desc->ds_cc = cpu_to_le16(0);
- rx_desc->dptr = cpu_to_le32(dma_addr);
- rx_desc->die_dt = DT_FEMPTY;
- }
- rx_desc = &priv->gbeth_rx_ring[i];
- rx_desc->dptr = cpu_to_le32((u32)priv->rx_desc_dma[q]);
- rx_desc->die_dt = DT_LINKFIX; /* type */
-}
-
-static void ravb_rx_ring_format_rcar(struct net_device *ndev, int q)
-{
- struct ravb_private *priv = netdev_priv(ndev);
- struct ravb_ex_rx_desc *rx_desc;
- unsigned int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q];
- dma_addr_t dma_addr;
- unsigned int i;
-
- memset(priv->rx_ring[q], 0, rx_ring_size);
+ rx_ring_size = priv->info->rx_desc_size * priv->num_rx_ring[q];
+ memset(priv->rx_ring[q].raw, 0, rx_ring_size);
/* Build RX ring buffer */
for (i = 0; i < priv->num_rx_ring[q]; i++) {
/* RX descriptor */
- rx_desc = &priv->rx_ring[q][i];
- rx_desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
+ rx_desc = ravb_rx_get_desc(priv, q, i);
+ rx_desc->ds_cc = cpu_to_le16(priv->info->rx_max_desc_use);
dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
- RX_BUF_SZ,
+ priv->info->rx_max_frame_size,
DMA_FROM_DEVICE);
/* We just set the data size to 0 for a failed mapping which
* should prevent DMA from happening...
@@ -391,7 +343,7 @@ static void ravb_rx_ring_format_rcar(struct net_device *ndev, int q)
rx_desc->dptr = cpu_to_le32(dma_addr);
rx_desc->die_dt = DT_FEMPTY;
}
- rx_desc = &priv->rx_ring[q][i];
+ rx_desc = ravb_rx_get_desc(priv, q, i);
rx_desc->dptr = cpu_to_le32((u32)priv->rx_desc_dma[q]);
rx_desc->die_dt = DT_LINKFIX; /* type */
}
@@ -400,7 +352,6 @@ static void ravb_rx_ring_format_rcar(struct net_device *ndev, int q)
static void ravb_ring_format(struct net_device *ndev, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
- const struct ravb_hw_info *info = priv->info;
unsigned int num_tx_desc = priv->num_tx_desc;
struct ravb_tx_desc *tx_desc;
struct ravb_desc *desc;
@@ -413,7 +364,7 @@ static void ravb_ring_format(struct net_device *ndev, int q)
priv->dirty_rx[q] = 0;
priv->dirty_tx[q] = 0;
- info->rx_ring_format(ndev, q);
+ ravb_rx_ring_format(ndev, q);
memset(priv->tx_ring[q], 0, tx_ring_size);
/* Build TX ring buffer */
@@ -439,30 +390,18 @@ static void ravb_ring_format(struct net_device *ndev, int q)
desc->dptr = cpu_to_le32((u32)priv->tx_desc_dma[q]);
}
-static void *ravb_alloc_rx_desc_gbeth(struct net_device *ndev, int q)
+static void *ravb_alloc_rx_desc(struct net_device *ndev, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
unsigned int ring_size;
- ring_size = sizeof(struct ravb_rx_desc) * (priv->num_rx_ring[q] + 1);
+ ring_size = priv->info->rx_desc_size * (priv->num_rx_ring[q] + 1);
- priv->gbeth_rx_ring = dma_alloc_coherent(ndev->dev.parent, ring_size,
- &priv->rx_desc_dma[q],
- GFP_KERNEL);
- return priv->gbeth_rx_ring;
-}
+ priv->rx_ring[q].raw = dma_alloc_coherent(ndev->dev.parent, ring_size,
+ &priv->rx_desc_dma[q],
+ GFP_KERNEL);
-static void *ravb_alloc_rx_desc_rcar(struct net_device *ndev, int q)
-{
- struct ravb_private *priv = netdev_priv(ndev);
- unsigned int ring_size;
-
- ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1);
-
- priv->rx_ring[q] = dma_alloc_coherent(ndev->dev.parent, ring_size,
- &priv->rx_desc_dma[q],
- GFP_KERNEL);
- return priv->rx_ring[q];
+ return priv->rx_ring[q].raw;
}
/* Init skb and descriptor buffer for Ethernet AVB */
@@ -484,10 +423,9 @@ static int ravb_ring_init(struct net_device *ndev, int q)
goto error;
for (i = 0; i < priv->num_rx_ring[q]; i++) {
- skb = __netdev_alloc_skb(ndev, info->max_rx_len, GFP_KERNEL);
+ skb = ravb_alloc_skb(ndev, info, GFP_KERNEL);
if (!skb)
goto error;
- ravb_set_buffer_align(skb);
priv->rx_skb[q][i] = skb;
}
@@ -500,7 +438,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
}
/* Allocate all RX descriptors. */
- if (!info->alloc_rx_desc(ndev, q))
+ if (!ravb_alloc_rx_desc(ndev, q))
goto error;
priv->dirty_rx[q] = 0;
@@ -522,6 +460,36 @@ error:
return -ENOMEM;
}
+static void ravb_csum_init_gbeth(struct net_device *ndev)
+{
+ bool tx_enable = ndev->features & NETIF_F_HW_CSUM;
+ bool rx_enable = ndev->features & NETIF_F_RXCSUM;
+
+ if (!(tx_enable || rx_enable))
+ goto done;
+
+ ravb_write(ndev, 0, CSR0);
+ if (ravb_wait(ndev, CSR0, CSR0_TPE | CSR0_RPE, 0)) {
+ netdev_err(ndev, "Timeout enabling hardware checksum\n");
+
+ if (tx_enable)
+ ndev->features &= ~NETIF_F_HW_CSUM;
+
+ if (rx_enable)
+ ndev->features &= ~NETIF_F_RXCSUM;
+ } else {
+ if (tx_enable)
+ ravb_write(ndev, CSR1_TIP4 | CSR1_TTCP4 | CSR1_TUDP4, CSR1);
+
+ if (rx_enable)
+ ravb_write(ndev, CSR2_RIP4 | CSR2_RTCP4 | CSR2_RUDP4 | CSR2_RICMP4,
+ CSR2);
+ }
+
+done:
+ ravb_write(ndev, CSR0_TPE | CSR0_RPE, CSR0);
+}
+
static void ravb_emac_init_gbeth(struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
@@ -536,7 +504,7 @@ static void ravb_emac_init_gbeth(struct net_device *ndev)
}
/* Receive frame limit set register */
- ravb_write(ndev, GBETH_RX_BUFF_MAX + ETH_FCS_LEN, RFLR);
+ ravb_write(ndev, priv->info->rx_max_frame_size + ETH_FCS_LEN, RFLR);
/* EMAC Mode: PAUSE prohibition; Duplex; TX; RX; CRC Pass Through */
ravb_write(ndev, ECMR_ZPF | ((priv->duplex > 0) ? ECMR_DM : 0) |
@@ -553,7 +521,8 @@ static void ravb_emac_init_gbeth(struct net_device *ndev)
/* E-MAC status register clear */
ravb_write(ndev, ECSR_ICD | ECSR_LCHNG | ECSR_PFRI, ECSR);
- ravb_write(ndev, CSR0_TPE | CSR0_RPE, CSR0);
+
+ ravb_csum_init_gbeth(ndev);
/* E-MAC interrupt enable register */
ravb_write(ndev, ECSIPR_ICDIP, ECSIPR);
@@ -596,6 +565,7 @@ static void ravb_emac_init(struct net_device *ndev)
static int ravb_dmac_init_gbeth(struct net_device *ndev)
{
+ struct ravb_private *priv = netdev_priv(ndev);
int error;
error = ravb_ring_init(ndev, RAVB_BE);
@@ -609,7 +579,7 @@ static int ravb_dmac_init_gbeth(struct net_device *ndev)
ravb_write(ndev, 0x60000000, RCR);
/* Set Max Frame Length (RTC) */
- ravb_write(ndev, 0x7ffc0000 | GBETH_RX_BUFF_MAX, RTC);
+ ravb_write(ndev, 0x7ffc0000 | priv->info->rx_max_frame_size, RTC);
/* Set FIFO size */
ravb_write(ndev, 0x00222200, TGC);
@@ -734,6 +704,30 @@ static void ravb_get_tx_tstamp(struct net_device *ndev)
}
}
+static void ravb_rx_csum_gbeth(struct sk_buff *skb)
+{
+ __wsum csum_ip_hdr, csum_proto;
+ u8 *hw_csum;
+
+ /* The hardware checksum status is contained in sizeof(__sum16) * 2 = 4
+ * bytes appended to packet data. First 2 bytes is ip header checksum
+ * and last 2 bytes is protocol checksum.
+ */
+ if (unlikely(skb->len < sizeof(__sum16) * 2))
+ return;
+
+ hw_csum = skb_tail_pointer(skb) - sizeof(__sum16);
+ csum_proto = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+
+ hw_csum -= sizeof(__sum16);
+ csum_ip_hdr = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+ skb_trim(skb, skb->len - 2 * sizeof(__sum16));
+
+ /* TODO: IPV6 Rx checksum */
+ if (skb->protocol == htons(ETH_P_IP) && !csum_ip_hdr && !csum_proto)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+}
+
static void ravb_rx_csum(struct sk_buff *skb)
{
u8 *hw_csum;
@@ -758,7 +752,8 @@ static struct sk_buff *ravb_get_skb_gbeth(struct net_device *ndev, int entry,
skb = priv->rx_skb[RAVB_BE][entry];
priv->rx_skb[RAVB_BE][entry] = NULL;
dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
- ALIGN(GBETH_RX_BUFF_MAX, 16), DMA_FROM_DEVICE);
+ ALIGN(priv->info->rx_max_frame_size, 16),
+ DMA_FROM_DEVICE);
return skb;
}
@@ -772,29 +767,25 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
struct ravb_rx_desc *desc;
struct sk_buff *skb;
dma_addr_t dma_addr;
+ int rx_packets = 0;
u8 desc_status;
- int boguscnt;
u16 pkt_len;
u8 die_dt;
int entry;
int limit;
+ int i;
entry = priv->cur_rx[q] % priv->num_rx_ring[q];
- boguscnt = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
+ limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
stats = &priv->stats[q];
- boguscnt = min(boguscnt, *quota);
- limit = boguscnt;
- desc = &priv->gbeth_rx_ring[entry];
- while (desc->die_dt != DT_FEMPTY) {
+ desc = &priv->rx_ring[q].desc[entry];
+ for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) {
/* Descriptor type must be checked before all other reads */
dma_rmb();
desc_status = desc->msc;
pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
- if (--boguscnt < 0)
- break;
-
/* We use 0-byte descriptors to mark the DMA mapping errors */
if (!pkt_len)
continue;
@@ -819,8 +810,10 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
skb = ravb_get_skb_gbeth(ndev, entry, desc);
skb_put(skb, pkt_len);
skb->protocol = eth_type_trans(skb, ndev);
+ if (ndev->features & NETIF_F_RXCSUM)
+ ravb_rx_csum_gbeth(skb);
napi_gro_receive(&priv->napi[q], skb);
- stats->rx_packets++;
+ rx_packets++;
stats->rx_bytes += pkt_len;
break;
case DT_FSTART:
@@ -846,32 +839,33 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
dev_kfree_skb(skb);
priv->rx_1st_skb->protocol =
eth_type_trans(priv->rx_1st_skb, ndev);
+ if (ndev->features & NETIF_F_RXCSUM)
+ ravb_rx_csum_gbeth(skb);
napi_gro_receive(&priv->napi[q],
priv->rx_1st_skb);
- stats->rx_packets++;
+ rx_packets++;
stats->rx_bytes += pkt_len;
break;
}
}
entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
- desc = &priv->gbeth_rx_ring[entry];
+ desc = &priv->rx_ring[q].desc[entry];
}
/* Refill the RX ring buffers. */
for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
- desc = &priv->gbeth_rx_ring[entry];
- desc->ds_cc = cpu_to_le16(GBETH_RX_DESC_DATA_SIZE);
+ desc = &priv->rx_ring[q].desc[entry];
+ desc->ds_cc = cpu_to_le16(priv->info->rx_max_desc_use);
if (!priv->rx_skb[q][entry]) {
- skb = netdev_alloc_skb(ndev, info->max_rx_len);
+ skb = ravb_alloc_skb(ndev, info, GFP_ATOMIC);
if (!skb)
break;
- ravb_set_buffer_align(skb);
dma_addr = dma_map_single(ndev->dev.parent,
skb->data,
- GBETH_RX_BUFF_MAX,
+ priv->info->rx_max_frame_size,
DMA_FROM_DEVICE);
skb_checksum_none_assert(skb);
/* We just set the data size to 0 for a failed mapping
@@ -887,9 +881,9 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
desc->die_dt = DT_FEMPTY;
}
- *quota -= limit - (++boguscnt);
-
- return boguscnt <= 0;
+ stats->rx_packets += rx_packets;
+ *quota -= rx_packets;
+ return *quota == 0;
}
/* Packet receive function for Ethernet AVB */
@@ -911,7 +905,7 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
boguscnt = min(boguscnt, *quota);
limit = boguscnt;
- desc = &priv->rx_ring[q][entry];
+ desc = &priv->rx_ring[q].ex_desc[entry];
while (desc->die_dt != DT_FEMPTY) {
/* Descriptor type must be checked before all other reads */
dma_rmb();
@@ -945,7 +939,7 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
skb = priv->rx_skb[q][entry];
priv->rx_skb[q][entry] = NULL;
dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
- RX_BUF_SZ,
+ priv->info->rx_max_frame_size,
DMA_FROM_DEVICE);
get_ts &= (q == RAVB_NC) ?
RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
@@ -971,22 +965,21 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
}
entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
- desc = &priv->rx_ring[q][entry];
+ desc = &priv->rx_ring[q].ex_desc[entry];
}
/* Refill the RX ring buffers. */
for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
- desc = &priv->rx_ring[q][entry];
- desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
+ desc = &priv->rx_ring[q].ex_desc[entry];
+ desc->ds_cc = cpu_to_le16(priv->info->rx_max_desc_use);
if (!priv->rx_skb[q][entry]) {
- skb = netdev_alloc_skb(ndev, info->max_rx_len);
+ skb = ravb_alloc_skb(ndev, info, GFP_ATOMIC);
if (!skb)
break; /* Better luck next round. */
- ravb_set_buffer_align(skb);
dma_addr = dma_map_single(ndev->dev.parent, skb->data,
- le16_to_cpu(desc->ds_cc),
+ priv->info->rx_max_frame_size,
DMA_FROM_DEVICE);
skb_checksum_none_assert(skb);
/* We just set the data size to 0 for a failed mapping
@@ -1092,11 +1085,23 @@ static irqreturn_t ravb_emac_interrupt(int irq, void *dev_id)
{
struct net_device *ndev = dev_id;
struct ravb_private *priv = netdev_priv(ndev);
+ struct device *dev = &priv->pdev->dev;
+ irqreturn_t result = IRQ_HANDLED;
+
+ pm_runtime_get_noresume(dev);
+
+ if (unlikely(!pm_runtime_active(dev))) {
+ result = IRQ_NONE;
+ goto out_rpm_put;
+ }
spin_lock(&priv->lock);
ravb_emac_interrupt_unlocked(ndev);
spin_unlock(&priv->lock);
- return IRQ_HANDLED;
+
+out_rpm_put:
+ pm_runtime_put_noidle(dev);
+ return result;
}
/* Error interrupt handler */
@@ -1176,9 +1181,15 @@ static irqreturn_t ravb_interrupt(int irq, void *dev_id)
struct net_device *ndev = dev_id;
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
+ struct device *dev = &priv->pdev->dev;
irqreturn_t result = IRQ_NONE;
u32 iss;
+ pm_runtime_get_noresume(dev);
+
+ if (unlikely(!pm_runtime_active(dev)))
+ goto out_rpm_put;
+
spin_lock(&priv->lock);
/* Get interrupt status */
iss = ravb_read(ndev, ISS);
@@ -1222,6 +1233,9 @@ static irqreturn_t ravb_interrupt(int irq, void *dev_id)
}
spin_unlock(&priv->lock);
+
+out_rpm_put:
+ pm_runtime_put_noidle(dev);
return result;
}
@@ -1230,9 +1244,15 @@ static irqreturn_t ravb_multi_interrupt(int irq, void *dev_id)
{
struct net_device *ndev = dev_id;
struct ravb_private *priv = netdev_priv(ndev);
+ struct device *dev = &priv->pdev->dev;
irqreturn_t result = IRQ_NONE;
u32 iss;
+ pm_runtime_get_noresume(dev);
+
+ if (unlikely(!pm_runtime_active(dev)))
+ goto out_rpm_put;
+
spin_lock(&priv->lock);
/* Get interrupt status */
iss = ravb_read(ndev, ISS);
@@ -1254,6 +1274,9 @@ static irqreturn_t ravb_multi_interrupt(int irq, void *dev_id)
}
spin_unlock(&priv->lock);
+
+out_rpm_put:
+ pm_runtime_put_noidle(dev);
return result;
}
@@ -1261,8 +1284,14 @@ static irqreturn_t ravb_dma_interrupt(int irq, void *dev_id, int q)
{
struct net_device *ndev = dev_id;
struct ravb_private *priv = netdev_priv(ndev);
+ struct device *dev = &priv->pdev->dev;
irqreturn_t result = IRQ_NONE;
+ pm_runtime_get_noresume(dev);
+
+ if (unlikely(!pm_runtime_active(dev)))
+ goto out_rpm_put;
+
spin_lock(&priv->lock);
/* Network control/Best effort queue RX/TX */
@@ -1270,6 +1299,9 @@ static irqreturn_t ravb_dma_interrupt(int irq, void *dev_id, int q)
result = IRQ_HANDLED;
spin_unlock(&priv->lock);
+
+out_rpm_put:
+ pm_runtime_put_noidle(dev);
return result;
}
@@ -1288,25 +1320,16 @@ static int ravb_poll(struct napi_struct *napi, int budget)
struct net_device *ndev = napi->dev;
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
- bool gptp = info->gptp || info->ccc_gac;
- struct ravb_rx_desc *desc;
unsigned long flags;
int q = napi - priv->napi;
int mask = BIT(q);
int quota = budget;
- unsigned int entry;
- if (!gptp) {
- entry = priv->cur_rx[q] % priv->num_rx_ring[q];
- desc = &priv->gbeth_rx_ring[entry];
- }
/* Processing RX Descriptor Ring */
/* Clear RX interrupt */
ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
- if (gptp || desc->die_dt != DT_FEMPTY) {
- if (ravb_rx(ndev, &quota, q))
- goto out;
- }
+ if (ravb_rx(ndev, &quota, q))
+ goto out;
/* Processing TX Descriptor Ring */
spin_lock_irqsave(&priv->lock, flags);
@@ -1736,89 +1759,159 @@ static const struct ethtool_ops ravb_ethtool_ops = {
.set_wol = ravb_set_wol,
};
-static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler,
- struct net_device *ndev, struct device *dev,
- const char *ch)
+static int ravb_set_config_mode(struct net_device *ndev)
{
- char *name;
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
int error;
- name = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", ndev->name, ch);
- if (!name)
- return -ENOMEM;
- error = request_irq(irq, handler, 0, name, ndev);
- if (error)
- netdev_err(ndev, "cannot request IRQ %s\n", name);
+ if (info->gptp) {
+ error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+ if (error)
+ return error;
+ /* Set CSEL value */
+ ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
+ } else if (info->ccc_gac) {
+ error = ravb_set_opmode(ndev, CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB);
+ } else {
+ error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+ }
return error;
}
+static void ravb_set_gti(struct net_device *ndev)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
+
+ if (!(info->gptp || info->ccc_gac))
+ return;
+
+ ravb_write(ndev, priv->gti_tiv, GTI);
+
+ /* Request GTI loading */
+ ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
+}
+
+static int ravb_compute_gti(struct net_device *ndev)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
+ struct device *dev = ndev->dev.parent;
+ unsigned long rate;
+ u64 inc;
+
+ if (!(info->gptp || info->ccc_gac))
+ return 0;
+
+ if (info->gptp_ref_clk)
+ rate = clk_get_rate(priv->gptp_clk);
+ else
+ rate = clk_get_rate(priv->clk);
+ if (!rate)
+ return -EINVAL;
+
+ inc = div64_ul(1000000000ULL << 20, rate);
+
+ if (inc < GTI_TIV_MIN || inc > GTI_TIV_MAX) {
+ dev_err(dev, "gti.tiv increment 0x%llx is outside the range 0x%x - 0x%x\n",
+ inc, GTI_TIV_MIN, GTI_TIV_MAX);
+ return -EINVAL;
+ }
+ priv->gti_tiv = inc;
+
+ return 0;
+}
+
+/* Set tx and rx clock internal delay modes */
+static void ravb_parse_delay_mode(struct device_node *np, struct net_device *ndev)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ bool explicit_delay = false;
+ u32 delay;
+
+ if (!priv->info->internal_delay)
+ return;
+
+ if (!of_property_read_u32(np, "rx-internal-delay-ps", &delay)) {
+ /* Valid values are 0 and 1800, according to DT bindings */
+ priv->rxcidm = !!delay;
+ explicit_delay = true;
+ }
+ if (!of_property_read_u32(np, "tx-internal-delay-ps", &delay)) {
+ /* Valid values are 0 and 2000, according to DT bindings */
+ priv->txcidm = !!delay;
+ explicit_delay = true;
+ }
+
+ if (explicit_delay)
+ return;
+
+ /* Fall back to legacy rgmii-*id behavior */
+ if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ priv->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID) {
+ priv->rxcidm = 1;
+ priv->rgmii_override = 1;
+ }
+
+ if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+ priv->txcidm = 1;
+ priv->rgmii_override = 1;
+ }
+}
+
+static void ravb_set_delay_mode(struct net_device *ndev)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ u32 set = 0;
+
+ if (!priv->info->internal_delay)
+ return;
+
+ if (priv->rxcidm)
+ set |= APSR_RDM;
+ if (priv->txcidm)
+ set |= APSR_TDM;
+ ravb_modify(ndev, APSR, APSR_RDM | APSR_TDM, set);
+}
+
/* Network device open function for Ethernet AVB */
static int ravb_open(struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
- struct platform_device *pdev = priv->pdev;
- struct device *dev = &pdev->dev;
+ struct device *dev = &priv->pdev->dev;
int error;
napi_enable(&priv->napi[RAVB_BE]);
if (info->nc_queues)
napi_enable(&priv->napi[RAVB_NC]);
- if (!info->multi_irqs) {
- error = request_irq(ndev->irq, ravb_interrupt, IRQF_SHARED,
- ndev->name, ndev);
- if (error) {
- netdev_err(ndev, "cannot request IRQ\n");
- goto out_napi_off;
- }
- } else {
- error = ravb_hook_irq(ndev->irq, ravb_multi_interrupt, ndev,
- dev, "ch22:multi");
- if (error)
- goto out_napi_off;
- error = ravb_hook_irq(priv->emac_irq, ravb_emac_interrupt, ndev,
- dev, "ch24:emac");
- if (error)
- goto out_free_irq;
- error = ravb_hook_irq(priv->rx_irqs[RAVB_BE], ravb_be_interrupt,
- ndev, dev, "ch0:rx_be");
- if (error)
- goto out_free_irq_emac;
- error = ravb_hook_irq(priv->tx_irqs[RAVB_BE], ravb_be_interrupt,
- ndev, dev, "ch18:tx_be");
- if (error)
- goto out_free_irq_be_rx;
- error = ravb_hook_irq(priv->rx_irqs[RAVB_NC], ravb_nc_interrupt,
- ndev, dev, "ch1:rx_nc");
- if (error)
- goto out_free_irq_be_tx;
- error = ravb_hook_irq(priv->tx_irqs[RAVB_NC], ravb_nc_interrupt,
- ndev, dev, "ch19:tx_nc");
- if (error)
- goto out_free_irq_nc_rx;
-
- if (info->err_mgmt_irqs) {
- error = ravb_hook_irq(priv->erra_irq, ravb_multi_interrupt,
- ndev, dev, "err_a");
- if (error)
- goto out_free_irq_nc_tx;
- error = ravb_hook_irq(priv->mgmta_irq, ravb_multi_interrupt,
- ndev, dev, "mgmt_a");
- if (error)
- goto out_free_irq_erra;
- }
- }
+ error = pm_runtime_resume_and_get(dev);
+ if (error < 0)
+ goto out_napi_off;
+
+ /* Set AVB config mode */
+ error = ravb_set_config_mode(ndev);
+ if (error)
+ goto out_rpm_put;
+
+ ravb_set_delay_mode(ndev);
+ ravb_write(ndev, priv->desc_bat_dma, DBAT);
/* Device init */
error = ravb_dmac_init(ndev);
if (error)
- goto out_free_irq_mgmta;
+ goto out_set_reset;
+
ravb_emac_init(ndev);
+ ravb_set_gti(ndev);
+
/* Initialise PTP Clock driver */
- if (info->gptp)
+ if (info->gptp || info->ccc_gac)
ravb_ptp_init(ndev, priv->pdev);
/* PHY control start */
@@ -1832,29 +1925,14 @@ static int ravb_open(struct net_device *ndev)
out_ptp_stop:
/* Stop PTP Clock driver */
- if (info->gptp)
+ if (info->gptp || info->ccc_gac)
ravb_ptp_stop(ndev);
ravb_stop_dma(ndev);
-out_free_irq_mgmta:
- if (!info->multi_irqs)
- goto out_free_irq;
- if (info->err_mgmt_irqs)
- free_irq(priv->mgmta_irq, ndev);
-out_free_irq_erra:
- if (info->err_mgmt_irqs)
- free_irq(priv->erra_irq, ndev);
-out_free_irq_nc_tx:
- free_irq(priv->tx_irqs[RAVB_NC], ndev);
-out_free_irq_nc_rx:
- free_irq(priv->rx_irqs[RAVB_NC], ndev);
-out_free_irq_be_tx:
- free_irq(priv->tx_irqs[RAVB_BE], ndev);
-out_free_irq_be_rx:
- free_irq(priv->rx_irqs[RAVB_BE], ndev);
-out_free_irq_emac:
- free_irq(priv->emac_irq, ndev);
-out_free_irq:
- free_irq(ndev->irq, ndev);
+out_set_reset:
+ ravb_set_opmode(ndev, CCC_OPC_RESET);
+out_rpm_put:
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
out_napi_off:
if (info->nc_queues)
napi_disable(&priv->napi[RAVB_NC]);
@@ -1939,6 +2017,36 @@ out_unlock:
rtnl_unlock();
}
+static bool ravb_can_tx_csum_gbeth(struct sk_buff *skb)
+{
+ struct iphdr *ip = ip_hdr(skb);
+
+ /* TODO: Need to add support for VLAN tag 802.1Q */
+ if (skb_vlan_tag_present(skb))
+ return false;
+
+ /* TODO: Need to add hardware checksum for IPv6 */
+ if (skb->protocol != htons(ETH_P_IP))
+ return false;
+
+ switch (ip->protocol) {
+ case IPPROTO_TCP:
+ break;
+ case IPPROTO_UDP:
+ /* If the checksum value in the UDP header field is 0, TOE does
+ * not calculate checksum for UDP part of this frame as it is
+ * optional function as per standards.
+ */
+ if (udp_hdr(skb)->check == 0)
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
/* Packet transmit function for Ethernet AVB */
static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{
@@ -1954,6 +2062,9 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
u32 entry;
u32 len;
+ if (skb->ip_summed == CHECKSUM_PARTIAL && !ravb_can_tx_csum_gbeth(skb))
+ skb_checksum_help(skb);
+
spin_lock_irqsave(&priv->lock, flags);
if (priv->cur_tx[q] - priv->dirty_tx[q] > (priv->num_tx_ring[q] - 1) *
num_tx_desc) {
@@ -2088,8 +2199,15 @@ static struct net_device_stats *ravb_get_stats(struct net_device *ndev)
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
struct net_device_stats *nstats, *stats0, *stats1;
+ struct device *dev = &priv->pdev->dev;
nstats = &ndev->stats;
+
+ pm_runtime_get_noresume(dev);
+
+ if (!pm_runtime_active(dev))
+ goto out_rpm_put;
+
stats0 = &priv->stats[RAVB_BE];
if (info->tx_counters) {
@@ -2131,6 +2249,8 @@ static struct net_device_stats *ravb_get_stats(struct net_device *ndev)
nstats->rx_over_errors += stats1->rx_over_errors;
}
+out_rpm_put:
+ pm_runtime_put_noidle(dev);
return nstats;
}
@@ -2153,6 +2273,8 @@ static int ravb_close(struct net_device *ndev)
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
struct ravb_tstamp_skb *ts_skb, *ts_skb2;
+ struct device *dev = &priv->pdev->dev;
+ int error;
netif_tx_stop_all_queues(ndev);
@@ -2161,8 +2283,16 @@ static int ravb_close(struct net_device *ndev)
ravb_write(ndev, 0, RIC2);
ravb_write(ndev, 0, TIC);
+ /* PHY disconnect */
+ if (ndev->phydev) {
+ phy_stop(ndev->phydev);
+ phy_disconnect(ndev->phydev);
+ if (of_phy_is_fixed_link(np))
+ of_phy_deregister_fixed_link(np);
+ }
+
/* Stop PTP Clock driver */
- if (info->gptp)
+ if (info->gptp || info->ccc_gac)
ravb_ptp_stop(ndev);
/* Set the config mode to stop the AVB-DMAC's processes */
@@ -2179,29 +2309,8 @@ static int ravb_close(struct net_device *ndev)
}
}
- /* PHY disconnect */
- if (ndev->phydev) {
- phy_stop(ndev->phydev);
- phy_disconnect(ndev->phydev);
- if (of_phy_is_fixed_link(np))
- of_phy_deregister_fixed_link(np);
- }
-
cancel_work_sync(&priv->work);
- if (info->multi_irqs) {
- free_irq(priv->tx_irqs[RAVB_NC], ndev);
- free_irq(priv->rx_irqs[RAVB_NC], ndev);
- free_irq(priv->tx_irqs[RAVB_BE], ndev);
- free_irq(priv->rx_irqs[RAVB_BE], ndev);
- free_irq(priv->emac_irq, ndev);
- if (info->err_mgmt_irqs) {
- free_irq(priv->erra_irq, ndev);
- free_irq(priv->mgmta_irq, ndev);
- }
- }
- free_irq(ndev->irq, ndev);
-
if (info->nc_queues)
napi_disable(&priv->napi[RAVB_NC]);
napi_disable(&priv->napi[RAVB_BE]);
@@ -2211,6 +2320,17 @@ static int ravb_close(struct net_device *ndev)
if (info->nc_queues)
ravb_ring_free(ndev, RAVB_NC);
+ /* Update statistics. */
+ ravb_get_stats(ndev);
+
+ /* Set reset mode. */
+ error = ravb_set_opmode(ndev, CCC_OPC_RESET);
+ if (error)
+ return error;
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+
return 0;
}
@@ -2334,11 +2454,58 @@ static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
spin_unlock_irqrestore(&priv->lock, flags);
}
+static int ravb_endisable_csum_gbeth(struct net_device *ndev, enum ravb_reg reg,
+ u32 val, u32 mask)
+{
+ u32 csr0 = CSR0_TPE | CSR0_RPE;
+ int ret;
+
+ ravb_write(ndev, csr0 & ~mask, CSR0);
+ ret = ravb_wait(ndev, CSR0, mask, 0);
+ if (!ret)
+ ravb_write(ndev, val, reg);
+
+ ravb_write(ndev, csr0, CSR0);
+
+ return ret;
+}
+
static int ravb_set_features_gbeth(struct net_device *ndev,
netdev_features_t features)
{
- /* Place holder */
- return 0;
+ netdev_features_t changed = ndev->features ^ features;
+ struct ravb_private *priv = netdev_priv(ndev);
+ unsigned long flags;
+ int ret = 0;
+ u32 val;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (changed & NETIF_F_RXCSUM) {
+ if (features & NETIF_F_RXCSUM)
+ val = CSR2_RIP4 | CSR2_RTCP4 | CSR2_RUDP4 | CSR2_RICMP4;
+ else
+ val = 0;
+
+ ret = ravb_endisable_csum_gbeth(ndev, CSR2, val, CSR0_RPE);
+ if (ret)
+ goto done;
+ }
+
+ if (changed & NETIF_F_HW_CSUM) {
+ if (features & NETIF_F_HW_CSUM)
+ val = CSR1_TIP4 | CSR1_TTCP4 | CSR1_TUDP4;
+ else
+ val = 0;
+
+ ret = ravb_endisable_csum_gbeth(ndev, CSR1, val, CSR0_TPE);
+ if (ret)
+ goto done;
+ }
+
+done:
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return ret;
}
static int ravb_set_features_rcar(struct net_device *ndev,
@@ -2349,8 +2516,6 @@ static int ravb_set_features_rcar(struct net_device *ndev,
if (changed & NETIF_F_RXCSUM)
ravb_set_rx_csum(ndev, features & NETIF_F_RXCSUM);
- ndev->features = features;
-
return 0;
}
@@ -2359,8 +2524,24 @@ static int ravb_set_features(struct net_device *ndev,
{
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
+ struct device *dev = &priv->pdev->dev;
+ int ret;
+
+ pm_runtime_get_noresume(dev);
+
+ if (pm_runtime_active(dev))
+ ret = info->set_feature(ndev, features);
+ else
+ ret = 0;
+
+ pm_runtime_put_noidle(dev);
+
+ if (ret)
+ return ret;
- return info->set_feature(ndev, features);
+ ndev->features = features;
+
+ return 0;
}
static const struct net_device_ops ravb_netdev_ops = {
@@ -2434,9 +2615,6 @@ static int ravb_mdio_release(struct ravb_private *priv)
}
static const struct ravb_hw_info ravb_gen3_hw_info = {
- .rx_ring_free = ravb_rx_ring_free_rcar,
- .rx_ring_format = ravb_rx_ring_format_rcar,
- .alloc_rx_desc = ravb_alloc_rx_desc_rcar,
.receive = ravb_rx_rcar,
.set_rate = ravb_set_rate_rcar,
.set_feature = ravb_set_features_rcar,
@@ -2447,9 +2625,10 @@ static const struct ravb_hw_info ravb_gen3_hw_info = {
.net_hw_features = NETIF_F_RXCSUM,
.net_features = NETIF_F_RXCSUM,
.stats_len = ARRAY_SIZE(ravb_gstrings_stats),
- .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1,
.tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
- .rx_max_buf_size = SZ_2K,
+ .rx_max_frame_size = SZ_2K,
+ .rx_max_desc_use = SZ_2K - ETH_FCS_LEN + sizeof(__sum16),
+ .rx_desc_size = sizeof(struct ravb_ex_rx_desc),
.internal_delay = 1,
.tx_counters = 1,
.multi_irqs = 1,
@@ -2460,9 +2639,6 @@ static const struct ravb_hw_info ravb_gen3_hw_info = {
};
static const struct ravb_hw_info ravb_gen2_hw_info = {
- .rx_ring_free = ravb_rx_ring_free_rcar,
- .rx_ring_format = ravb_rx_ring_format_rcar,
- .alloc_rx_desc = ravb_alloc_rx_desc_rcar,
.receive = ravb_rx_rcar,
.set_rate = ravb_set_rate_rcar,
.set_feature = ravb_set_features_rcar,
@@ -2473,9 +2649,10 @@ static const struct ravb_hw_info ravb_gen2_hw_info = {
.net_hw_features = NETIF_F_RXCSUM,
.net_features = NETIF_F_RXCSUM,
.stats_len = ARRAY_SIZE(ravb_gstrings_stats),
- .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1,
.tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
- .rx_max_buf_size = SZ_2K,
+ .rx_max_frame_size = SZ_2K,
+ .rx_max_desc_use = SZ_2K - ETH_FCS_LEN + sizeof(__sum16),
+ .rx_desc_size = sizeof(struct ravb_ex_rx_desc),
.aligned_tx = 1,
.gptp = 1,
.nc_queues = 1,
@@ -2483,9 +2660,6 @@ static const struct ravb_hw_info ravb_gen2_hw_info = {
};
static const struct ravb_hw_info ravb_rzv2m_hw_info = {
- .rx_ring_free = ravb_rx_ring_free_rcar,
- .rx_ring_format = ravb_rx_ring_format_rcar,
- .alloc_rx_desc = ravb_alloc_rx_desc_rcar,
.receive = ravb_rx_rcar,
.set_rate = ravb_set_rate_rcar,
.set_feature = ravb_set_features_rcar,
@@ -2496,9 +2670,10 @@ static const struct ravb_hw_info ravb_rzv2m_hw_info = {
.net_hw_features = NETIF_F_RXCSUM,
.net_features = NETIF_F_RXCSUM,
.stats_len = ARRAY_SIZE(ravb_gstrings_stats),
- .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1,
.tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
- .rx_max_buf_size = SZ_2K,
+ .rx_max_frame_size = SZ_2K,
+ .rx_max_desc_use = SZ_2K - ETH_FCS_LEN + sizeof(__sum16),
+ .rx_desc_size = sizeof(struct ravb_ex_rx_desc),
.multi_irqs = 1,
.err_mgmt_irqs = 1,
.gptp = 1,
@@ -2508,9 +2683,6 @@ static const struct ravb_hw_info ravb_rzv2m_hw_info = {
};
static const struct ravb_hw_info gbeth_hw_info = {
- .rx_ring_free = ravb_rx_ring_free_gbeth,
- .rx_ring_format = ravb_rx_ring_format_gbeth,
- .alloc_rx_desc = ravb_alloc_rx_desc_gbeth,
.receive = ravb_rx_gbeth,
.set_rate = ravb_set_rate_gbeth,
.set_feature = ravb_set_features_gbeth,
@@ -2518,10 +2690,13 @@ static const struct ravb_hw_info gbeth_hw_info = {
.emac_init = ravb_emac_init_gbeth,
.gstrings_stats = ravb_gstrings_stats_gbeth,
.gstrings_size = sizeof(ravb_gstrings_stats_gbeth),
+ .net_hw_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM,
+ .net_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM,
.stats_len = ARRAY_SIZE(ravb_gstrings_stats_gbeth),
- .max_rx_len = ALIGN(GBETH_RX_BUFF_MAX, RAVB_ALIGN),
.tccr_mask = TCCR_TSRQ0,
- .rx_max_buf_size = SZ_8K,
+ .rx_max_frame_size = SZ_8K,
+ .rx_max_desc_use = 4080,
+ .rx_desc_size = sizeof(struct ravb_rx_desc),
.aligned_tx = 1,
.tx_counters = 1,
.carrier_counters = 1,
@@ -2541,100 +2716,91 @@ static const struct of_device_id ravb_match_table[] = {
};
MODULE_DEVICE_TABLE(of, ravb_match_table);
-static int ravb_set_gti(struct net_device *ndev)
+static int ravb_setup_irq(struct ravb_private *priv, const char *irq_name,
+ const char *ch, int *irq, irq_handler_t handler)
{
- struct ravb_private *priv = netdev_priv(ndev);
- const struct ravb_hw_info *info = priv->info;
- struct device *dev = ndev->dev.parent;
- unsigned long rate;
- uint64_t inc;
-
- if (info->gptp_ref_clk)
- rate = clk_get_rate(priv->gptp_clk);
- else
- rate = clk_get_rate(priv->clk);
- if (!rate)
- return -EINVAL;
+ struct platform_device *pdev = priv->pdev;
+ struct net_device *ndev = priv->ndev;
+ struct device *dev = &pdev->dev;
+ const char *dev_name;
+ unsigned long flags;
+ int error, irq_num;
- inc = div64_ul(1000000000ULL << 20, rate);
+ if (irq_name) {
+ dev_name = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", ndev->name, ch);
+ if (!dev_name)
+ return -ENOMEM;
- if (inc < GTI_TIV_MIN || inc > GTI_TIV_MAX) {
- dev_err(dev, "gti.tiv increment 0x%llx is outside the range 0x%x - 0x%x\n",
- inc, GTI_TIV_MIN, GTI_TIV_MAX);
- return -EINVAL;
+ irq_num = platform_get_irq_byname(pdev, irq_name);
+ flags = 0;
+ } else {
+ dev_name = ndev->name;
+ irq_num = platform_get_irq(pdev, 0);
+ flags = IRQF_SHARED;
}
+ if (irq_num < 0)
+ return irq_num;
- ravb_write(ndev, inc, GTI);
+ if (irq)
+ *irq = irq_num;
- return 0;
+ error = devm_request_irq(dev, irq_num, handler, flags, dev_name, ndev);
+ if (error)
+ netdev_err(ndev, "cannot request IRQ %s\n", dev_name);
+
+ return error;
}
-static int ravb_set_config_mode(struct net_device *ndev)
+static int ravb_setup_irqs(struct ravb_private *priv)
{
- struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
+ struct net_device *ndev = priv->ndev;
+ const char *irq_name, *emac_irq_name;
int error;
- if (info->gptp) {
- error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
- if (error)
- return error;
- /* Set CSEL value */
- ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
- } else if (info->ccc_gac) {
- error = ravb_set_opmode(ndev, CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB);
+ if (!info->multi_irqs)
+ return ravb_setup_irq(priv, NULL, NULL, &ndev->irq, ravb_interrupt);
+
+ if (info->err_mgmt_irqs) {
+ irq_name = "dia";
+ emac_irq_name = "line3";
} else {
- error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+ irq_name = "ch22";
+ emac_irq_name = "ch24";
}
- return error;
-}
-
-/* Set tx and rx clock internal delay modes */
-static void ravb_parse_delay_mode(struct device_node *np, struct net_device *ndev)
-{
- struct ravb_private *priv = netdev_priv(ndev);
- bool explicit_delay = false;
- u32 delay;
+ error = ravb_setup_irq(priv, irq_name, "ch22:multi", &ndev->irq, ravb_multi_interrupt);
+ if (error)
+ return error;
- if (!of_property_read_u32(np, "rx-internal-delay-ps", &delay)) {
- /* Valid values are 0 and 1800, according to DT bindings */
- priv->rxcidm = !!delay;
- explicit_delay = true;
- }
- if (!of_property_read_u32(np, "tx-internal-delay-ps", &delay)) {
- /* Valid values are 0 and 2000, according to DT bindings */
- priv->txcidm = !!delay;
- explicit_delay = true;
- }
+ error = ravb_setup_irq(priv, emac_irq_name, "ch24:emac", &priv->emac_irq,
+ ravb_emac_interrupt);
+ if (error)
+ return error;
- if (explicit_delay)
- return;
+ if (info->err_mgmt_irqs) {
+ error = ravb_setup_irq(priv, "err_a", "err_a", NULL, ravb_multi_interrupt);
+ if (error)
+ return error;
- /* Fall back to legacy rgmii-*id behavior */
- if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
- priv->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID) {
- priv->rxcidm = 1;
- priv->rgmii_override = 1;
+ error = ravb_setup_irq(priv, "mgmt_a", "mgmt_a", NULL, ravb_multi_interrupt);
+ if (error)
+ return error;
}
- if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
- priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) {
- priv->txcidm = 1;
- priv->rgmii_override = 1;
- }
-}
+ error = ravb_setup_irq(priv, "ch0", "ch0:rx_be", NULL, ravb_be_interrupt);
+ if (error)
+ return error;
-static void ravb_set_delay_mode(struct net_device *ndev)
-{
- struct ravb_private *priv = netdev_priv(ndev);
- u32 set = 0;
+ error = ravb_setup_irq(priv, "ch1", "ch1:rx_nc", NULL, ravb_nc_interrupt);
+ if (error)
+ return error;
- if (priv->rxcidm)
- set |= APSR_RDM;
- if (priv->txcidm)
- set |= APSR_TDM;
- ravb_modify(ndev, APSR, APSR_RDM | APSR_TDM, set);
+ error = ravb_setup_irq(priv, "ch18", "ch18:tx_be", NULL, ravb_be_interrupt);
+ if (error)
+ return error;
+
+ return ravb_setup_irq(priv, "ch19", "ch19:tx_nc", NULL, ravb_nc_interrupt);
}
static int ravb_probe(struct platform_device *pdev)
@@ -2644,9 +2810,8 @@ static int ravb_probe(struct platform_device *pdev)
struct reset_control *rstc;
struct ravb_private *priv;
struct net_device *ndev;
- int error, irq, q;
struct resource *res;
- int i;
+ int error, q;
if (!np) {
dev_err(&pdev->dev,
@@ -2654,7 +2819,7 @@ static int ravb_probe(struct platform_device *pdev)
return -EINVAL;
}
- rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
+ rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
if (IS_ERR(rstc))
return dev_err_probe(&pdev->dev, PTR_ERR(rstc),
"failed to get cpg reset\n");
@@ -2673,25 +2838,6 @@ static int ravb_probe(struct platform_device *pdev)
if (error)
goto out_free_netdev;
- pm_runtime_enable(&pdev->dev);
- error = pm_runtime_resume_and_get(&pdev->dev);
- if (error < 0)
- goto out_rpm_disable;
-
- if (info->multi_irqs) {
- if (info->err_mgmt_irqs)
- irq = platform_get_irq_byname(pdev, "dia");
- else
- irq = platform_get_irq_byname(pdev, "ch22");
- } else {
- irq = platform_get_irq(pdev, 0);
- }
- if (irq < 0) {
- error = irq;
- goto out_release;
- }
- ndev->irq = irq;
-
SET_NETDEV_DEV(ndev, &pdev->dev);
priv = netdev_priv(ndev);
@@ -2706,10 +2852,43 @@ static int ravb_probe(struct platform_device *pdev)
priv->num_rx_ring[RAVB_NC] = NC_RX_RING_SIZE;
}
+ error = ravb_setup_irqs(priv);
+ if (error)
+ goto out_reset_assert;
+
+ priv->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(priv->clk)) {
+ error = PTR_ERR(priv->clk);
+ goto out_reset_assert;
+ }
+
+ if (info->gptp_ref_clk) {
+ priv->gptp_clk = devm_clk_get(&pdev->dev, "gptp");
+ if (IS_ERR(priv->gptp_clk)) {
+ error = PTR_ERR(priv->gptp_clk);
+ goto out_reset_assert;
+ }
+ }
+
+ priv->refclk = devm_clk_get_optional(&pdev->dev, "refclk");
+ if (IS_ERR(priv->refclk)) {
+ error = PTR_ERR(priv->refclk);
+ goto out_reset_assert;
+ }
+ clk_prepare(priv->refclk);
+
+ platform_set_drvdata(pdev, ndev);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, 100);
+ pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+ error = pm_runtime_resume_and_get(&pdev->dev);
+ if (error < 0)
+ goto out_rpm_disable;
+
priv->addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(priv->addr)) {
error = PTR_ERR(priv->addr);
- goto out_release;
+ goto out_rpm_put;
}
/* The Ether-specific entries in the device structure. */
@@ -2720,79 +2899,14 @@ static int ravb_probe(struct platform_device *pdev)
error = of_get_phy_mode(np, &priv->phy_interface);
if (error && error != -ENODEV)
- goto out_release;
+ goto out_rpm_put;
priv->no_avb_link = of_property_read_bool(np, "renesas,no-ether-link");
priv->avb_link_active_low =
of_property_read_bool(np, "renesas,ether-link-active-low");
- if (info->multi_irqs) {
- if (info->err_mgmt_irqs)
- irq = platform_get_irq_byname(pdev, "line3");
- else
- irq = platform_get_irq_byname(pdev, "ch24");
- if (irq < 0) {
- error = irq;
- goto out_release;
- }
- priv->emac_irq = irq;
- for (i = 0; i < NUM_RX_QUEUE; i++) {
- irq = platform_get_irq_byname(pdev, ravb_rx_irqs[i]);
- if (irq < 0) {
- error = irq;
- goto out_release;
- }
- priv->rx_irqs[i] = irq;
- }
- for (i = 0; i < NUM_TX_QUEUE; i++) {
- irq = platform_get_irq_byname(pdev, ravb_tx_irqs[i]);
- if (irq < 0) {
- error = irq;
- goto out_release;
- }
- priv->tx_irqs[i] = irq;
- }
-
- if (info->err_mgmt_irqs) {
- irq = platform_get_irq_byname(pdev, "err_a");
- if (irq < 0) {
- error = irq;
- goto out_release;
- }
- priv->erra_irq = irq;
-
- irq = platform_get_irq_byname(pdev, "mgmt_a");
- if (irq < 0) {
- error = irq;
- goto out_release;
- }
- priv->mgmta_irq = irq;
- }
- }
-
- priv->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(priv->clk)) {
- error = PTR_ERR(priv->clk);
- goto out_release;
- }
-
- priv->refclk = devm_clk_get_optional(&pdev->dev, "refclk");
- if (IS_ERR(priv->refclk)) {
- error = PTR_ERR(priv->refclk);
- goto out_release;
- }
- clk_prepare_enable(priv->refclk);
-
- if (info->gptp_ref_clk) {
- priv->gptp_clk = devm_clk_get(&pdev->dev, "gptp");
- if (IS_ERR(priv->gptp_clk)) {
- error = PTR_ERR(priv->gptp_clk);
- goto out_disable_refclk;
- }
- clk_prepare_enable(priv->gptp_clk);
- }
-
- ndev->max_mtu = info->rx_max_buf_size - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
+ ndev->max_mtu = info->rx_max_frame_size -
+ (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
ndev->min_mtu = ETH_MIN_MTU;
/* FIXME: R-Car Gen2 has 4byte alignment restriction for tx buffer
@@ -2806,25 +2920,11 @@ static int ravb_probe(struct platform_device *pdev)
ndev->netdev_ops = &ravb_netdev_ops;
ndev->ethtool_ops = &ravb_ethtool_ops;
- /* Set AVB config mode */
- error = ravb_set_config_mode(ndev);
+ error = ravb_compute_gti(ndev);
if (error)
- goto out_disable_gptp_clk;
-
- if (info->gptp || info->ccc_gac) {
- /* Set GTI value */
- error = ravb_set_gti(ndev);
- if (error)
- goto out_disable_gptp_clk;
+ goto out_rpm_put;
- /* Request GTI loading */
- ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
- }
-
- if (info->internal_delay) {
- ravb_parse_delay_mode(np, ndev);
- ravb_set_delay_mode(ndev);
- }
+ ravb_parse_delay_mode(np, ndev);
/* Allocate descriptor base address table */
priv->desc_bat_size = sizeof(struct ravb_desc) * DBAT_ENTRY_NUM;
@@ -2835,22 +2935,22 @@ static int ravb_probe(struct platform_device *pdev)
"Cannot allocate desc base address table (size %d bytes)\n",
priv->desc_bat_size);
error = -ENOMEM;
- goto out_disable_gptp_clk;
+ goto out_rpm_put;
}
for (q = RAVB_BE; q < DBAT_ENTRY_NUM; q++)
priv->desc_bat[q].die_dt = DT_EOS;
- ravb_write(ndev, priv->desc_bat_dma, DBAT);
/* Initialise HW timestamp list */
INIT_LIST_HEAD(&priv->ts_skb_list);
- /* Initialise PTP Clock driver */
- if (info->ccc_gac)
- ravb_ptp_init(ndev, pdev);
-
/* Debug message level */
priv->msg_enable = RAVB_DEF_MSG_ENABLE;
+ /* Set config mode as this is needed for PHY initialization. */
+ error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+ if (error)
+ goto out_rpm_put;
+
/* Read and set MAC address */
ravb_read_mac_address(np, ndev);
if (!is_valid_ether_addr(ndev->dev_addr)) {
@@ -2863,9 +2963,14 @@ static int ravb_probe(struct platform_device *pdev)
error = ravb_mdio_init(priv);
if (error) {
dev_err(&pdev->dev, "failed to initialize MDIO\n");
- goto out_dma_free;
+ goto out_reset_mode;
}
+ /* Undo previous switch to config opmode. */
+ error = ravb_set_opmode(ndev, CCC_OPC_RESET);
+ if (error)
+ goto out_mdio_release;
+
netif_napi_add(ndev, &priv->napi[RAVB_BE], ravb_poll);
if (info->nc_queues)
netif_napi_add(ndev, &priv->napi[RAVB_NC], ravb_poll);
@@ -2881,7 +2986,8 @@ static int ravb_probe(struct platform_device *pdev)
netdev_info(ndev, "Base address at %#x, %pM, IRQ %d.\n",
(u32)ndev->base_addr, ndev->dev_addr, ndev->irq);
- platform_set_drvdata(pdev, ndev);
+ pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_put_autosuspend(&pdev->dev);
return 0;
@@ -2890,22 +2996,19 @@ out_napi_del:
netif_napi_del(&priv->napi[RAVB_NC]);
netif_napi_del(&priv->napi[RAVB_BE]);
+out_mdio_release:
ravb_mdio_release(priv);
-out_dma_free:
+out_reset_mode:
+ ravb_set_opmode(ndev, CCC_OPC_RESET);
dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
priv->desc_bat_dma);
-
- /* Stop PTP Clock driver */
- if (info->ccc_gac)
- ravb_ptp_stop(ndev);
-out_disable_gptp_clk:
- clk_disable_unprepare(priv->gptp_clk);
-out_disable_refclk:
- clk_disable_unprepare(priv->refclk);
-out_release:
+out_rpm_put:
pm_runtime_put(&pdev->dev);
out_rpm_disable:
pm_runtime_disable(&pdev->dev);
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
+ clk_unprepare(priv->refclk);
+out_reset_assert:
reset_control_assert(rstc);
out_free_netdev:
free_netdev(ndev);
@@ -2917,6 +3020,12 @@ static void ravb_remove(struct platform_device *pdev)
struct net_device *ndev = platform_get_drvdata(pdev);
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
+ struct device *dev = &priv->pdev->dev;
+ int error;
+
+ error = pm_runtime_resume_and_get(dev);
+ if (error < 0)
+ return;
unregister_netdev(ndev);
if (info->nc_queues)
@@ -2925,20 +3034,13 @@ static void ravb_remove(struct platform_device *pdev)
ravb_mdio_release(priv);
- /* Stop PTP Clock driver */
- if (info->ccc_gac)
- ravb_ptp_stop(ndev);
-
dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
priv->desc_bat_dma);
- ravb_set_opmode(ndev, CCC_OPC_RESET);
-
- clk_disable_unprepare(priv->gptp_clk);
- clk_disable_unprepare(priv->refclk);
-
- pm_runtime_put_sync(&pdev->dev);
+ pm_runtime_put_sync_suspend(&pdev->dev);
pm_runtime_disable(&pdev->dev);
+ pm_runtime_dont_use_autosuspend(dev);
+ clk_unprepare(priv->refclk);
reset_control_assert(priv->rstc);
free_netdev(ndev);
platform_set_drvdata(pdev, NULL);
@@ -2964,6 +3066,9 @@ static int ravb_wol_setup(struct net_device *ndev)
/* Enable MagicPacket */
ravb_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
+ if (priv->info->ccc_gac)
+ ravb_ptp_stop(ndev);
+
return enable_irq_wake(priv->emac_irq);
}
@@ -2971,6 +3076,20 @@ static int ravb_wol_restore(struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
+ int error;
+
+ /* Set reset mode to rearm the WoL logic. */
+ error = ravb_set_opmode(ndev, CCC_OPC_RESET);
+ if (error)
+ return error;
+
+ /* Set AVB config mode. */
+ error = ravb_set_config_mode(ndev);
+ if (error)
+ return error;
+
+ if (priv->info->ccc_gac)
+ ravb_ptp_init(ndev, priv->pdev);
if (info->nc_queues)
napi_enable(&priv->napi[RAVB_NC]);
@@ -2984,102 +3103,96 @@ static int ravb_wol_restore(struct net_device *ndev)
return disable_irq_wake(priv->emac_irq);
}
-static int __maybe_unused ravb_suspend(struct device *dev)
+static int ravb_suspend(struct device *dev)
{
struct net_device *ndev = dev_get_drvdata(dev);
struct ravb_private *priv = netdev_priv(ndev);
int ret;
if (!netif_running(ndev))
- return 0;
+ goto reset_assert;
netif_device_detach(ndev);
if (priv->wol_enabled)
- ret = ravb_wol_setup(ndev);
- else
- ret = ravb_close(ndev);
+ return ravb_wol_setup(ndev);
- if (priv->info->ccc_gac)
- ravb_ptp_stop(ndev);
+ ret = ravb_close(ndev);
+ if (ret)
+ return ret;
- return ret;
+ ret = pm_runtime_force_suspend(&priv->pdev->dev);
+ if (ret)
+ return ret;
+
+reset_assert:
+ return reset_control_assert(priv->rstc);
}
-static int __maybe_unused ravb_resume(struct device *dev)
+static int ravb_resume(struct device *dev)
{
struct net_device *ndev = dev_get_drvdata(dev);
struct ravb_private *priv = netdev_priv(ndev);
- const struct ravb_hw_info *info = priv->info;
- int ret = 0;
-
- /* If WoL is enabled set reset mode to rearm the WoL logic */
- if (priv->wol_enabled) {
- ret = ravb_set_opmode(ndev, CCC_OPC_RESET);
- if (ret)
- return ret;
- }
-
- /* All register have been reset to default values.
- * Restore all registers which where setup at probe time and
- * reopen device if it was running before system suspended.
- */
+ int ret;
- /* Set AVB config mode */
- ret = ravb_set_config_mode(ndev);
+ ret = reset_control_deassert(priv->rstc);
if (ret)
return ret;
- if (info->gptp || info->ccc_gac) {
- /* Set GTI value */
- ret = ravb_set_gti(ndev);
+ if (!netif_running(ndev))
+ return 0;
+
+ /* If WoL is enabled restore the interface. */
+ if (priv->wol_enabled) {
+ ret = ravb_wol_restore(ndev);
+ if (ret)
+ return ret;
+ } else {
+ ret = pm_runtime_force_resume(dev);
if (ret)
return ret;
-
- /* Request GTI loading */
- ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
}
- if (info->internal_delay)
- ravb_set_delay_mode(ndev);
+ /* Reopening the interface will restore the device to the working state. */
+ ret = ravb_open(ndev);
+ if (ret < 0)
+ goto out_rpm_put;
- /* Restore descriptor base address table */
- ravb_write(ndev, priv->desc_bat_dma, DBAT);
+ ravb_set_rx_mode(ndev);
+ netif_device_attach(ndev);
- if (priv->info->ccc_gac)
- ravb_ptp_init(ndev, priv->pdev);
+ return 0;
- if (netif_running(ndev)) {
- if (priv->wol_enabled) {
- ret = ravb_wol_restore(ndev);
- if (ret)
- return ret;
- }
- ret = ravb_open(ndev);
- if (ret < 0)
- return ret;
- ravb_set_rx_mode(ndev);
- netif_device_attach(ndev);
+out_rpm_put:
+ if (!priv->wol_enabled) {
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
}
return ret;
}
-static int __maybe_unused ravb_runtime_nop(struct device *dev)
+static int ravb_runtime_suspend(struct device *dev)
{
- /* Runtime PM callback shared between ->runtime_suspend()
- * and ->runtime_resume(). Simply returns success.
- *
- * This driver re-initializes all registers after
- * pm_runtime_get_sync() anyway so there is no need
- * to save and restore registers here.
- */
+ struct net_device *ndev = dev_get_drvdata(dev);
+ struct ravb_private *priv = netdev_priv(ndev);
+
+ clk_disable(priv->refclk);
+
return 0;
}
+static int ravb_runtime_resume(struct device *dev)
+{
+ struct net_device *ndev = dev_get_drvdata(dev);
+ struct ravb_private *priv = netdev_priv(ndev);
+
+ return clk_enable(priv->refclk);
+}
+
static const struct dev_pm_ops ravb_dev_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(ravb_suspend, ravb_resume)
- SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL)
+ SYSTEM_SLEEP_PM_OPS(ravb_suspend, ravb_resume)
+ RUNTIME_PM_OPS(ravb_runtime_suspend, ravb_runtime_resume, NULL)
};
static struct platform_driver ravb_driver = {
@@ -3087,7 +3200,7 @@ static struct platform_driver ravb_driver = {
.remove_new = ravb_remove,
.driver = {
.name = "ravb",
- .pm = &ravb_dev_pm_ops,
+ .pm = pm_ptr(&ravb_dev_pm_ops),
.of_match_table = ravb_match_table,
},
};
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 9e59669a93dd..755db89db909 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -32,7 +32,6 @@
#include <net/fib_rules.h>
#include <net/fib_notifier.h>
#include <linux/io-64-nonatomic-lo-hi.h>
-#include <generated/utsrelease.h>
#include "rocker_hw.h"
#include "rocker.h"
@@ -2227,7 +2226,6 @@ static void rocker_port_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
strscpy(drvinfo->driver, rocker_driver_name, sizeof(drvinfo->driver));
- strscpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
}
static struct rocker_port_stats {
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
index d14e0cfc3a6b..1458939c3bf5 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
@@ -503,7 +503,6 @@ struct sxgbe_priv_data {
bool tx_path_in_lpi_mode;
int lpi_irq;
int eee_enabled;
- int eee_active;
int tx_lpi_timer;
};
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
index 8ba017ec9849..4a439b34114d 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
@@ -133,22 +133,20 @@ static const struct sxgbe_stats sxgbe_gstrings_stats[] = {
#define SXGBE_STATS_LEN ARRAY_SIZE(sxgbe_gstrings_stats)
static int sxgbe_get_eee(struct net_device *dev,
- struct ethtool_eee *edata)
+ struct ethtool_keee *edata)
{
struct sxgbe_priv_data *priv = netdev_priv(dev);
if (!priv->hw_cap.eee)
return -EOPNOTSUPP;
- edata->eee_enabled = priv->eee_enabled;
- edata->eee_active = priv->eee_active;
edata->tx_lpi_timer = priv->tx_lpi_timer;
return phy_ethtool_get_eee(dev->phydev, edata);
}
static int sxgbe_set_eee(struct net_device *dev,
- struct ethtool_eee *edata)
+ struct ethtool_keee *edata)
{
struct sxgbe_priv_data *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 71439825ea4e..ecbe3994f2b1 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -130,7 +130,6 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv)
if (phy_init_eee(ndev->phydev, true))
return false;
- priv->eee_active = 1;
timer_setup(&priv->eee_ctrl_timer, sxgbe_eee_ctrl_timer, 0);
priv->eee_ctrl_timer.expires = SXGBE_LPI_TIMER(eee_timer);
add_timer(&priv->eee_ctrl_timer);
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index 175bd9cdfdac..551f890db90a 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -595,7 +595,7 @@ void efx_stop_all(struct efx_nic *efx)
efx_stop_datapath(efx);
}
-/* Context: process, dev_base_lock or RTNL held, non-blocking. */
+/* Context: process, rcu_read_lock or RTNL held, non-blocking. */
void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
{
struct efx_nic *efx = efx_netdev_priv(net_dev);
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index e001f27085c6..1cb32aedd89c 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -2085,7 +2085,7 @@ int ef4_net_stop(struct net_device *net_dev)
return 0;
}
-/* Context: process, dev_base_lock or RTNL held, non-blocking. */
+/* Context: process, rcu_read_lock or RTNL held, non-blocking. */
static void ef4_net_stats(struct net_device *net_dev,
struct rtnl_link_stats64 *stats)
{
diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
index fac227d372db..dcd901eccfc8 100644
--- a/drivers/net/ethernet/sfc/rx_common.c
+++ b/drivers/net/ethernet/sfc/rx_common.c
@@ -11,6 +11,7 @@
#include "net_driver.h"
#include <linux/module.h>
#include <linux/iommu.h>
+#include <net/rps.h>
#include "efx.h"
#include "nic.h"
#include "rx_common.h"
diff --git a/drivers/net/ethernet/sfc/siena/efx_common.c b/drivers/net/ethernet/sfc/siena/efx_common.c
index e4b294b8e9ac..88e5bc347a44 100644
--- a/drivers/net/ethernet/sfc/siena/efx_common.c
+++ b/drivers/net/ethernet/sfc/siena/efx_common.c
@@ -605,7 +605,7 @@ static size_t efx_siena_update_stats_atomic(struct efx_nic *efx, u64 *full_stats
return efx->type->update_stats(efx, full_stats, core_stats);
}
-/* Context: process, dev_base_lock or RTNL held, non-blocking. */
+/* Context: process, rcu_read_lock or RTNL held, non-blocking. */
void efx_siena_net_stats(struct net_device *net_dev,
struct rtnl_link_stats64 *stats)
{
diff --git a/drivers/net/ethernet/sfc/siena/rx_common.c b/drivers/net/ethernet/sfc/siena/rx_common.c
index 4579f43484c3..219fb358a646 100644
--- a/drivers/net/ethernet/sfc/siena/rx_common.c
+++ b/drivers/net/ethernet/sfc/siena/rx_common.c
@@ -11,6 +11,7 @@
#include "net_driver.h"
#include <linux/module.h>
#include <linux/iommu.h>
+#include <net/rps.h>
#include "efx.h"
#include "nic.h"
#include "rx_common.h"
diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c
index a7a9ab304e13..71f9b5ec5ae4 100644
--- a/drivers/net/ethernet/sfc/siena/tx_common.c
+++ b/drivers/net/ethernet/sfc/siena/tx_common.c
@@ -317,11 +317,10 @@ static int efx_tx_tso_header_length(struct sk_buff *skb)
size_t header_len;
if (skb->encapsulation)
- header_len = skb_inner_transport_header(skb) -
- skb->data +
+ header_len = skb_inner_transport_offset(skb) +
(inner_tcp_hdr(skb)->doff << 2u);
else
- header_len = skb_transport_header(skb) - skb->data +
+ header_len = skb_transport_offset(skb) +
(tcp_hdr(skb)->doff << 2u);
return header_len;
}
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
index 9f2393d34371..2adb132b2f7e 100644
--- a/drivers/net/ethernet/sfc/tx_common.c
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -336,11 +336,10 @@ int efx_tx_tso_header_length(struct sk_buff *skb)
size_t header_len;
if (skb->encapsulation)
- header_len = skb_inner_transport_header(skb) -
- skb->data +
+ header_len = skb_inner_transport_offset(skb) +
(inner_tcp_hdr(skb)->doff << 2u);
else
- header_len = skb_transport_header(skb) - skb->data +
+ header_len = skb_transport_offset(skb) +
(tcp_hdr(skb)->doff << 2u);
return header_len;
}
diff --git a/drivers/net/ethernet/sfc/tx_tso.c b/drivers/net/ethernet/sfc/tx_tso.c
index 64a6768f75ea..ddf149db8180 100644
--- a/drivers/net/ethernet/sfc/tx_tso.c
+++ b/drivers/net/ethernet/sfc/tx_tso.c
@@ -174,8 +174,8 @@ static int tso_start(struct tso_state *st, struct efx_nic *efx,
unsigned int header_len, in_len;
dma_addr_t dma_addr;
- st->ip_off = skb_network_header(skb) - skb->data;
- st->tcp_off = skb_transport_header(skb) - skb->data;
+ st->ip_off = skb_network_offset(skb);
+ st->tcp_off = skb_transport_offset(skb);
header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
in_len = skb_headlen(skb) - header_len;
st->header_len = header_len;
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 758347616535..78ff3af7911a 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -98,6 +98,7 @@ static int watchdog = 1000;
module_param(watchdog, int, 0400);
MODULE_PARM_DESC(watchdog, "transmit timeout in milliseconds");
+MODULE_DESCRIPTION("SMC 91C9x/91C1xxx Ethernet driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:smc91x");
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 31cb7d0166f0..74f1ccc96459 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -56,6 +56,7 @@
#define SMSC_MDIONAME "smsc911x-mdio"
#define SMSC_DRV_VERSION "2008-10-21"
+MODULE_DESCRIPTION("SMSC LAN911x/LAN921x Ethernet driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(SMSC_DRV_VERSION);
MODULE_ALIAS("platform:smsc911x");
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index e1c4a11c1f18..15cb96c2506d 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -26,6 +26,7 @@
#define DRV_DESCRIPTION "SMSC LAN9420 driver"
#define DRV_VERSION "1.01"
+MODULE_DESCRIPTION("SMSC LAN9420 Ethernet driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 85dcda51df05..4ec61f1ee71a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -165,9 +165,9 @@ config DWMAC_STARFIVE
help
Support for ethernet controllers on StarFive RISC-V SoCs
- This selects the StarFive platform specific glue layer support for
- the stmmac device driver. This driver is used for StarFive JH7110
- ethernet controller.
+ This selects the StarFive platform specific glue layer support
+ for the stmmac device driver. This driver is used for the
+ StarFive JH7100 and JH7110 ethernet controllers.
config DWMAC_STI
tristate "STi GMAC support"
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 721c1f8e892f..a6fefe675ef1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -59,28 +59,51 @@
#undef FRAME_FILTER_DEBUG
/* #define FRAME_FILTER_DEBUG */
+struct stmmac_q_tx_stats {
+ u64_stats_t tx_bytes;
+ u64_stats_t tx_set_ic_bit;
+ u64_stats_t tx_tso_frames;
+ u64_stats_t tx_tso_nfrags;
+};
+
+struct stmmac_napi_tx_stats {
+ u64_stats_t tx_packets;
+ u64_stats_t tx_pkt_n;
+ u64_stats_t poll;
+ u64_stats_t tx_clean;
+ u64_stats_t tx_set_ic_bit;
+};
+
struct stmmac_txq_stats {
- u64 tx_bytes;
- u64 tx_packets;
- u64 tx_pkt_n;
- u64 tx_normal_irq_n;
- u64 napi_poll;
- u64 tx_clean;
- u64 tx_set_ic_bit;
- u64 tx_tso_frames;
- u64 tx_tso_nfrags;
- struct u64_stats_sync syncp;
+ /* Updates protected by tx queue lock. */
+ struct u64_stats_sync q_syncp;
+ struct stmmac_q_tx_stats q;
+
+ /* Updates protected by NAPI poll logic. */
+ struct u64_stats_sync napi_syncp;
+ struct stmmac_napi_tx_stats napi;
} ____cacheline_aligned_in_smp;
+struct stmmac_napi_rx_stats {
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_packets;
+ u64_stats_t rx_pkt_n;
+ u64_stats_t poll;
+};
+
struct stmmac_rxq_stats {
- u64 rx_bytes;
- u64 rx_packets;
- u64 rx_pkt_n;
- u64 rx_normal_irq_n;
- u64 napi_poll;
- struct u64_stats_sync syncp;
+ /* Updates protected by NAPI poll logic. */
+ struct u64_stats_sync napi_syncp;
+ struct stmmac_napi_rx_stats napi;
} ____cacheline_aligned_in_smp;
+/* Updates on each CPU protected by not allowing nested irqs. */
+struct stmmac_pcpu_stats {
+ struct u64_stats_sync syncp;
+ u64_stats_t rx_normal_irq_n[MTL_MAX_TX_QUEUES];
+ u64_stats_t tx_normal_irq_n[MTL_MAX_RX_QUEUES];
+};
+
/* Extra statistic and debug information exposed by ethtool */
struct stmmac_extra_stats {
/* Transmit errors */
@@ -202,9 +225,12 @@ struct stmmac_extra_stats {
unsigned long mtl_est_hlbf;
unsigned long mtl_est_btre;
unsigned long mtl_est_btrlm;
+ unsigned long max_sdu_txq_drop[MTL_MAX_TX_QUEUES];
+ unsigned long mtl_est_txq_hlbf[MTL_MAX_TX_QUEUES];
/* per queue statistics */
struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES];
struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES];
+ struct stmmac_pcpu_stats __percpu *pcpu_stats;
unsigned long rx_dropped;
unsigned long rx_errors;
unsigned long tx_dropped;
@@ -216,6 +242,7 @@ struct stmmac_safety_stats {
unsigned long mac_errors[32];
unsigned long mtl_errors[32];
unsigned long dma_errors[32];
+ unsigned long dma_dpp_errors[32];
};
/* Number of fields in Safety Stats */
@@ -344,6 +371,7 @@ enum request_irq_err {
REQ_IRQ_ERR_ALL,
REQ_IRQ_ERR_TX,
REQ_IRQ_ERR_RX,
+ REQ_IRQ_ERR_SFTY,
REQ_IRQ_ERR_SFTY_UE,
REQ_IRQ_ERR_SFTY_CE,
REQ_IRQ_ERR_LPI,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index 8f730ada71f9..6b65420e11b5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -353,6 +353,10 @@ static int imx_dwmac_probe(struct platform_device *pdev)
if (data->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY)
plat_dat->flags |= STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY;
+ /* Default TX Q0 to use TSO and rest TXQ for TBS */
+ for (int i = 1; i < plat_dat->tx_queues_to_use; i++)
+ plat_dat->tx_queues_cfg[i].tbs_en = 1;
+
plat_dat->host_dma_width = dwmac->ops->addr_width;
plat_dat->init = imx_dwmac_init;
plat_dat->exit = imx_dwmac_exit;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 31631e3f89d0..e254b21fdb59 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -106,6 +106,7 @@ struct qcom_ethqos {
struct clk *link_clk;
struct phy *serdes_phy;
unsigned int speed;
+ int serdes_speed;
phy_interface_t phy_mode;
const struct ethqos_emac_por *por;
@@ -169,6 +170,9 @@ static void rgmii_dump(void *priv)
static void
ethqos_update_link_clk(struct qcom_ethqos *ethqos, unsigned int speed)
{
+ if (!phy_interface_mode_is_rgmii(ethqos->phy_mode))
+ return;
+
switch (speed) {
case SPEED_1000:
ethqos->link_clk_rate = RGMII_1000_NOM_CLK_FREQ;
@@ -606,19 +610,39 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos)
*/
static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos)
{
+ struct net_device *dev = platform_get_drvdata(ethqos->pdev);
+ struct stmmac_priv *priv = netdev_priv(dev);
int val;
val = readl(ethqos->mac_base + MAC_CTRL_REG);
switch (ethqos->speed) {
+ case SPEED_2500:
+ val &= ~ETHQOS_MAC_CTRL_PORT_SEL;
+ rgmii_updatel(ethqos, RGMII_CONFIG2_RGMII_CLK_SEL_CFG,
+ RGMII_CONFIG2_RGMII_CLK_SEL_CFG,
+ RGMII_IO_MACRO_CONFIG2);
+ if (ethqos->serdes_speed != SPEED_2500)
+ phy_set_speed(ethqos->serdes_phy, SPEED_2500);
+ ethqos->serdes_speed = SPEED_2500;
+ stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 0, 0, 0);
+ break;
case SPEED_1000:
val &= ~ETHQOS_MAC_CTRL_PORT_SEL;
rgmii_updatel(ethqos, RGMII_CONFIG2_RGMII_CLK_SEL_CFG,
RGMII_CONFIG2_RGMII_CLK_SEL_CFG,
RGMII_IO_MACRO_CONFIG2);
+ if (ethqos->serdes_speed != SPEED_1000)
+ phy_set_speed(ethqos->serdes_phy, SPEED_1000);
+ ethqos->serdes_speed = SPEED_1000;
+ stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, 0, 0);
break;
case SPEED_100:
val |= ETHQOS_MAC_CTRL_PORT_SEL | ETHQOS_MAC_CTRL_SPEED_MODE;
+ if (ethqos->serdes_speed != SPEED_1000)
+ phy_set_speed(ethqos->serdes_phy, SPEED_1000);
+ ethqos->serdes_speed = SPEED_1000;
+ stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, 0, 0);
break;
case SPEED_10:
val |= ETHQOS_MAC_CTRL_PORT_SEL;
@@ -627,6 +651,10 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos)
FIELD_PREP(RGMII_CONFIG_SGMII_CLK_DVDR,
SGMII_10M_RX_CLK_DVDR),
RGMII_IO_MACRO_CONFIG);
+ if (ethqos->serdes_speed != SPEED_1000)
+ phy_set_speed(ethqos->serdes_phy, ethqos->speed);
+ ethqos->serdes_speed = SPEED_1000;
+ stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, 0, 0);
break;
}
@@ -728,7 +756,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
struct stmmac_resources stmmac_res;
struct device *dev = &pdev->dev;
struct qcom_ethqos *ethqos;
- int ret;
+ int ret, i;
ret = stmmac_get_platform_resources(pdev, &stmmac_res);
if (ret)
@@ -799,6 +827,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
"Failed to get serdes phy\n");
ethqos->speed = SPEED_1000;
+ ethqos->serdes_speed = SPEED_1000;
ethqos_update_link_clk(ethqos, SPEED_1000);
ethqos_set_func_clk_en(ethqos);
@@ -822,6 +851,10 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
plat_dat->serdes_powerdown = qcom_ethqos_serdes_powerdown;
}
+ /* Enable TSO on queue0 and enable TBS on rest of the queues */
+ for (i = 1; i < plat_dat->tx_queues_to_use; i++)
+ plat_dat->tx_queues_cfg[i].tbs_en = 1;
+
return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index ba2ce776bd4d..68f85e4605cb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -585,4 +585,5 @@ static struct platform_driver socfpga_dwmac_driver = {
};
module_platform_driver(socfpga_dwmac_driver);
+MODULE_DESCRIPTION("Altera SOC DWMAC Specific Glue layer");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
index 5d630affb4d1..4e1076faee0c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
@@ -15,13 +15,20 @@
#include "stmmac_platform.h"
-#define STARFIVE_DWMAC_PHY_INFT_RGMII 0x1
-#define STARFIVE_DWMAC_PHY_INFT_RMII 0x4
-#define STARFIVE_DWMAC_PHY_INFT_FIELD 0x7U
+#define STARFIVE_DWMAC_PHY_INFT_RGMII 0x1
+#define STARFIVE_DWMAC_PHY_INFT_RMII 0x4
+#define STARFIVE_DWMAC_PHY_INFT_FIELD 0x7U
+
+#define JH7100_SYSMAIN_REGISTER49_DLYCHAIN 0xc8
+
+struct starfive_dwmac_data {
+ unsigned int gtxclk_dlychain;
+};
struct starfive_dwmac {
struct device *dev;
struct clk *clk_tx;
+ const struct starfive_dwmac_data *data;
};
static void starfive_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
@@ -67,6 +74,8 @@ static int starfive_dwmac_set_mode(struct plat_stmmacenet_data *plat_dat)
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
mode = STARFIVE_DWMAC_PHY_INFT_RGMII;
break;
@@ -89,6 +98,14 @@ static int starfive_dwmac_set_mode(struct plat_stmmacenet_data *plat_dat)
if (err)
return dev_err_probe(dwmac->dev, err, "error setting phy mode\n");
+ if (dwmac->data) {
+ err = regmap_write(regmap, JH7100_SYSMAIN_REGISTER49_DLYCHAIN,
+ dwmac->data->gtxclk_dlychain);
+ if (err)
+ return dev_err_probe(dwmac->dev, err,
+ "error selecting gtxclk delay chain\n");
+ }
+
return 0;
}
@@ -114,6 +131,8 @@ static int starfive_dwmac_probe(struct platform_device *pdev)
if (!dwmac)
return -ENOMEM;
+ dwmac->data = device_get_match_data(&pdev->dev);
+
dwmac->clk_tx = devm_clk_get_enabled(&pdev->dev, "tx");
if (IS_ERR(dwmac->clk_tx))
return dev_err_probe(&pdev->dev, PTR_ERR(dwmac->clk_tx),
@@ -144,8 +163,13 @@ static int starfive_dwmac_probe(struct platform_device *pdev)
return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
}
+static const struct starfive_dwmac_data jh7100_data = {
+ .gtxclk_dlychain = 4,
+};
+
static const struct of_device_id starfive_dwmac_match[] = {
- { .compatible = "starfive,jh7110-dwmac" },
+ { .compatible = "starfive,jh7100-dwmac", .data = &jh7100_data },
+ { .compatible = "starfive,jh7110-dwmac" },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, starfive_dwmac_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 137741b94122..b21d99faa2d0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -441,8 +441,7 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
struct stmmac_extra_stats *x, u32 chan,
u32 dir)
{
- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
int ret = 0;
u32 v;
@@ -455,9 +454,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
if (v & EMAC_TX_INT) {
ret |= handle_tx;
- u64_stats_update_begin(&txq_stats->syncp);
- txq_stats->tx_normal_irq_n++;
- u64_stats_update_end(&txq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
}
if (v & EMAC_TX_DMA_STOP_INT)
@@ -479,9 +478,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
if (v & EMAC_RX_INT) {
ret |= handle_rx;
- u64_stats_update_begin(&rxq_stats->syncp);
- rxq_stats->rx_normal_irq_n++;
- u64_stats_update_end(&rxq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
}
if (v & EMAC_RX_BUF_UA_INT)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index 358e7dcb6a9a..17d9120db5fe 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -92,7 +92,7 @@
#define DMA_TBS_FTOV BIT(0)
#define DMA_TBS_DEF_FTOS (DMA_TBS_FTOS | DMA_TBS_FTOV)
-/* Following DMA defines are chanels oriented */
+/* Following DMA defines are channel-oriented */
#define DMA_CHAN_BASE_ADDR 0x00001100
#define DMA_CHAN_BASE_OFFSET 0x80
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 9470d3fd2ded..0d185e54eb7e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -171,8 +171,7 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan));
u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
int ret = 0;
if (dir == DMA_DIR_RX)
@@ -201,15 +200,15 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
}
/* TX/RX NORMAL interrupts */
if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
- u64_stats_update_begin(&rxq_stats->syncp);
- rxq_stats->rx_normal_irq_n++;
- u64_stats_update_end(&rxq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_rx;
}
if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
- u64_stats_update_begin(&txq_stats->syncp);
- txq_stats->tx_normal_irq_n++;
- u64_stats_update_end(&txq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_tx;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 7907d62d3437..85e18f9a22f9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -162,8 +162,7 @@ static void show_rx_process_state(unsigned int status)
int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_extra_stats *x, u32 chan, u32 dir)
{
- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
int ret = 0;
/* read the status register (CSR5) */
u32 intr_status = readl(ioaddr + DMA_STATUS);
@@ -215,16 +214,16 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
u32 value = readl(ioaddr + DMA_INTR_ENA);
/* to schedule NAPI on real RIE event. */
if (likely(value & DMA_INTR_ENA_RIE)) {
- u64_stats_update_begin(&rxq_stats->syncp);
- rxq_stats->rx_normal_irq_n++;
- u64_stats_update_end(&rxq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_rx;
}
}
if (likely(intr_status & DMA_STATUS_TI)) {
- u64_stats_update_begin(&txq_stats->syncp);
- txq_stats->tx_normal_irq_n++;
- u64_stats_update_end(&txq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_tx;
}
if (unlikely(intr_status & DMA_STATUS_ERI))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 207ff1799f2c..6a2c7d22df1e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -303,6 +303,8 @@
#define XGMAC_RXCEIE BIT(4)
#define XGMAC_TXCEIE BIT(0)
#define XGMAC_MTL_ECC_INT_STATUS 0x000010cc
+#define XGMAC_MTL_DPP_CONTROL 0x000010e0
+#define XGMAC_DPP_DISABLE BIT(0)
#define XGMAC_MTL_TXQ_OPMODE(x) (0x00001100 + (0x80 * (x)))
#define XGMAC_TQS GENMASK(25, 16)
#define XGMAC_TQS_SHIFT 16
@@ -385,6 +387,7 @@
#define XGMAC_DCEIE BIT(1)
#define XGMAC_TCEIE BIT(0)
#define XGMAC_DMA_ECC_INT_STATUS 0x0000306c
+#define XGMAC_DMA_DPP_INT_STATUS 0x00003074
#define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x)))
#define XGMAC_SPH BIT(24)
#define XGMAC_PBLx8 BIT(16)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index eb48211d9b0e..1af2f89a0504 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -830,6 +830,44 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
};
+#define DPP_RX_ERR "Read Rx Descriptor Parity checker Error"
+#define DPP_TX_ERR "Read Tx Descriptor Parity checker Error"
+
+static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = {
+ { true, "TDPES0", DPP_TX_ERR },
+ { true, "TDPES1", DPP_TX_ERR },
+ { true, "TDPES2", DPP_TX_ERR },
+ { true, "TDPES3", DPP_TX_ERR },
+ { true, "TDPES4", DPP_TX_ERR },
+ { true, "TDPES5", DPP_TX_ERR },
+ { true, "TDPES6", DPP_TX_ERR },
+ { true, "TDPES7", DPP_TX_ERR },
+ { true, "TDPES8", DPP_TX_ERR },
+ { true, "TDPES9", DPP_TX_ERR },
+ { true, "TDPES10", DPP_TX_ERR },
+ { true, "TDPES11", DPP_TX_ERR },
+ { true, "TDPES12", DPP_TX_ERR },
+ { true, "TDPES13", DPP_TX_ERR },
+ { true, "TDPES14", DPP_TX_ERR },
+ { true, "TDPES15", DPP_TX_ERR },
+ { true, "RDPES0", DPP_RX_ERR },
+ { true, "RDPES1", DPP_RX_ERR },
+ { true, "RDPES2", DPP_RX_ERR },
+ { true, "RDPES3", DPP_RX_ERR },
+ { true, "RDPES4", DPP_RX_ERR },
+ { true, "RDPES5", DPP_RX_ERR },
+ { true, "RDPES6", DPP_RX_ERR },
+ { true, "RDPES7", DPP_RX_ERR },
+ { true, "RDPES8", DPP_RX_ERR },
+ { true, "RDPES9", DPP_RX_ERR },
+ { true, "RDPES10", DPP_RX_ERR },
+ { true, "RDPES11", DPP_RX_ERR },
+ { true, "RDPES12", DPP_RX_ERR },
+ { true, "RDPES13", DPP_RX_ERR },
+ { true, "RDPES14", DPP_RX_ERR },
+ { true, "RDPES15", DPP_RX_ERR },
+};
+
static void dwxgmac3_handle_dma_err(struct net_device *ndev,
void __iomem *ioaddr, bool correctable,
struct stmmac_safety_stats *stats)
@@ -841,6 +879,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev,
dwxgmac3_log_error(ndev, value, correctable, "DMA",
dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats);
+
+ value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS);
+ writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS);
+
+ dwxgmac3_log_error(ndev, value, false, "DMA_DPP",
+ dwxgmac3_dma_dpp_errors,
+ STAT_OFF(dma_dpp_errors), stats);
}
static int
@@ -881,6 +926,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */
writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL);
+ /* 5. Enable Data Path Parity Protection */
+ value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL);
+ /* already enabled by default, explicit enable it again */
+ value &= ~XGMAC_DPP_DISABLE;
+ writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL);
+
return 0;
}
@@ -914,7 +965,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev,
ret |= !corr;
}
- err = dma & (XGMAC_DEUIS | XGMAC_DECIS);
+ /* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in
+ * DMA_Safety_Interrupt_Status, so we handle DMA Data Path
+ * Parity Errors here
+ */
+ err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS);
corr = dma & XGMAC_DECIS;
if (err) {
dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats);
@@ -930,6 +985,7 @@ static const struct dwxgmac3_error {
{ dwxgmac3_mac_errors },
{ dwxgmac3_mtl_errors },
{ dwxgmac3_dma_errors },
+ { dwxgmac3_dma_dpp_errors },
};
static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 3cde695fec91..dd2ab6185c40 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -337,8 +337,7 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
struct stmmac_extra_stats *x, u32 chan,
u32 dir)
{
- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan));
u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
int ret = 0;
@@ -367,15 +366,15 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
/* TX/RX NORMAL interrupts */
if (likely(intr_status & XGMAC_NIS)) {
if (likely(intr_status & XGMAC_RI)) {
- u64_stats_update_begin(&rxq_stats->syncp);
- rxq_stats->rx_normal_irq_n++;
- u64_stats_update_end(&rxq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_rx;
}
if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
- u64_stats_update_begin(&txq_stats->syncp);
- txq_stats->tx_normal_irq_n++;
- u64_stats_update_end(&txq_stats->syncp);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
ret |= handle_tx;
}
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 1bd34b2a47e8..29367105df54 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -224,7 +224,7 @@ static const struct stmmac_hwif_entry {
.regs = {
.ptp_off = PTP_GMAC4_OFFSET,
.mmc_off = MMC_GMAC4_OFFSET,
- .est_off = EST_XGMAC_OFFSET,
+ .est_off = EST_GMAC4_OFFSET,
},
.desc = &dwmac4_desc_ops,
.dma = &dwmac410_dma_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h
index 14c9d2637dfe..dff02d75d519 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc.h
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h
@@ -86,10 +86,6 @@ struct stmmac_counters {
unsigned int mmc_rx_discard_octets_gb;
unsigned int mmc_rx_align_err_frames;
- /* IPC */
- unsigned int mmc_rx_ipc_intr_mask;
- unsigned int mmc_rx_ipc_intr;
-
/* IPv4 */
unsigned int mmc_rx_ipv4_gd;
unsigned int mmc_rx_ipv4_hderr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index 8597c6abae8d..7eb477faa75a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -316,9 +316,6 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
mmc->mmc_rx_fifo_overflow += readl(mmcaddr + MMC_RX_FIFO_OVERFLOW);
mmc->mmc_rx_vlan_frames_gb += readl(mmcaddr + MMC_RX_VLAN_FRAMES_GB);
mmc->mmc_rx_watchdog_error += readl(mmcaddr + MMC_RX_WATCHDOG_ERROR);
- /* IPC */
- mmc->mmc_rx_ipc_intr_mask += readl(mmcaddr + MMC_RX_IPC_INTR_MASK);
- mmc->mmc_rx_ipc_intr += readl(mmcaddr + MMC_RX_IPC_INTR);
/* IPv4 */
mmc->mmc_rx_ipv4_gd += readl(mmcaddr + MMC_RX_IPV4_GD);
mmc->mmc_rx_ipv4_hderr += readl(mmcaddr + MMC_RX_IPV4_HDERR);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index f155e4841c62..dddcaa9220cc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -31,6 +31,7 @@ struct stmmac_resources {
int wol_irq;
int lpi_irq;
int irq;
+ int sfty_irq;
int sfty_ce_irq;
int sfty_ue_irq;
int rx_irq[MTL_MAX_RX_QUEUES];
@@ -298,6 +299,7 @@ struct stmmac_priv {
void __iomem *ptpaddr;
void __iomem *estaddr;
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+ int sfty_irq;
int sfty_ce_irq;
int sfty_ue_irq;
int rx_irq[MTL_MAX_RX_QUEUES];
@@ -306,6 +308,7 @@ struct stmmac_priv {
char int_name_mac[IFNAMSIZ + 9];
char int_name_wol[IFNAMSIZ + 9];
char int_name_lpi[IFNAMSIZ + 9];
+ char int_name_sfty[IFNAMSIZ + 10];
char int_name_sfty_ce[IFNAMSIZ + 10];
char int_name_sfty_ue[IFNAMSIZ + 10];
char int_name_rx_irq[MTL_MAX_TX_QUEUES][IFNAMSIZ + 14];
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
index 4da6ccc17c20..c9693f77e1f6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
@@ -81,6 +81,7 @@ static void est_irq_status(struct stmmac_priv *priv, struct net_device *dev,
u32 status, value, feqn, hbfq, hbfs, btrl, btrl_max;
void __iomem *est_addr = priv->estaddr;
u32 txqcnt_mask = BIT(txqcnt) - 1;
+ int i;
status = readl(est_addr + EST_STATUS);
@@ -125,6 +126,11 @@ static void est_irq_status(struct stmmac_priv *priv, struct net_device *dev,
x->mtl_est_hlbf++;
+ for (i = 0; i < txqcnt; i++) {
+ if (feqn & BIT(i))
+ x->mtl_est_txq_hlbf[i]++;
+ }
+
/* Clear Interrupt */
writel(feqn, est_addr + EST_FRM_SZ_ERR);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 42d27b97dd1d..e1537a57815f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -243,8 +243,6 @@ static const struct stmmac_stats stmmac_mmc[] = {
STMMAC_MMC_STAT(mmc_rx_discard_frames_gb),
STMMAC_MMC_STAT(mmc_rx_discard_octets_gb),
STMMAC_MMC_STAT(mmc_rx_align_err_frames),
- STMMAC_MMC_STAT(mmc_rx_ipc_intr_mask),
- STMMAC_MMC_STAT(mmc_rx_ipc_intr),
STMMAC_MMC_STAT(mmc_rx_ipv4_gd),
STMMAC_MMC_STAT(mmc_rx_ipv4_hderr),
STMMAC_MMC_STAT(mmc_rx_ipv4_nopay),
@@ -549,44 +547,79 @@ stmmac_set_pauseparam(struct net_device *netdev,
}
}
+static u64 stmmac_get_rx_normal_irq_n(struct stmmac_priv *priv, int q)
+{
+ u64 total;
+ int cpu;
+
+ total = 0;
+ for_each_possible_cpu(cpu) {
+ struct stmmac_pcpu_stats *pcpu;
+ unsigned int start;
+ u64 irq_n;
+
+ pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu);
+ do {
+ start = u64_stats_fetch_begin(&pcpu->syncp);
+ irq_n = u64_stats_read(&pcpu->rx_normal_irq_n[q]);
+ } while (u64_stats_fetch_retry(&pcpu->syncp, start));
+ total += irq_n;
+ }
+ return total;
+}
+
+static u64 stmmac_get_tx_normal_irq_n(struct stmmac_priv *priv, int q)
+{
+ u64 total;
+ int cpu;
+
+ total = 0;
+ for_each_possible_cpu(cpu) {
+ struct stmmac_pcpu_stats *pcpu;
+ unsigned int start;
+ u64 irq_n;
+
+ pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu);
+ do {
+ start = u64_stats_fetch_begin(&pcpu->syncp);
+ irq_n = u64_stats_read(&pcpu->tx_normal_irq_n[q]);
+ } while (u64_stats_fetch_retry(&pcpu->syncp, start));
+ total += irq_n;
+ }
+ return total;
+}
+
static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
{
u32 tx_cnt = priv->plat->tx_queues_to_use;
u32 rx_cnt = priv->plat->rx_queues_to_use;
unsigned int start;
- int q, stat;
- char *p;
+ int q;
for (q = 0; q < tx_cnt; q++) {
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q];
- struct stmmac_txq_stats snapshot;
+ u64 pkt_n;
do {
- start = u64_stats_fetch_begin(&txq_stats->syncp);
- snapshot = *txq_stats;
- } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
+ start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
+ pkt_n = u64_stats_read(&txq_stats->napi.tx_pkt_n);
+ } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));
- p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n);
- for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
- *data++ = (*(u64 *)p);
- p += sizeof(u64);
- }
+ *data++ = pkt_n;
+ *data++ = stmmac_get_tx_normal_irq_n(priv, q);
}
for (q = 0; q < rx_cnt; q++) {
struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q];
- struct stmmac_rxq_stats snapshot;
+ u64 pkt_n;
do {
- start = u64_stats_fetch_begin(&rxq_stats->syncp);
- snapshot = *rxq_stats;
- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
+ start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
+ pkt_n = u64_stats_read(&rxq_stats->napi.rx_pkt_n);
+ } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));
- p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n);
- for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
- *data++ = (*(u64 *)p);
- p += sizeof(u64);
- }
+ *data++ = pkt_n;
+ *data++ = stmmac_get_rx_normal_irq_n(priv, q);
}
}
@@ -645,39 +678,49 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
pos = j;
for (i = 0; i < rx_queues_count; i++) {
struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[i];
- struct stmmac_rxq_stats snapshot;
+ struct stmmac_napi_rx_stats snapshot;
+ u64 n_irq;
j = pos;
do {
- start = u64_stats_fetch_begin(&rxq_stats->syncp);
- snapshot = *rxq_stats;
- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
-
- data[j++] += snapshot.rx_pkt_n;
- data[j++] += snapshot.rx_normal_irq_n;
- normal_irq_n += snapshot.rx_normal_irq_n;
- napi_poll += snapshot.napi_poll;
+ start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
+ snapshot = rxq_stats->napi;
+ } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));
+
+ data[j++] += u64_stats_read(&snapshot.rx_pkt_n);
+ n_irq = stmmac_get_rx_normal_irq_n(priv, i);
+ data[j++] += n_irq;
+ normal_irq_n += n_irq;
+ napi_poll += u64_stats_read(&snapshot.poll);
}
pos = j;
for (i = 0; i < tx_queues_count; i++) {
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[i];
- struct stmmac_txq_stats snapshot;
+ struct stmmac_napi_tx_stats napi_snapshot;
+ struct stmmac_q_tx_stats q_snapshot;
+ u64 n_irq;
j = pos;
do {
- start = u64_stats_fetch_begin(&txq_stats->syncp);
- snapshot = *txq_stats;
- } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
-
- data[j++] += snapshot.tx_pkt_n;
- data[j++] += snapshot.tx_normal_irq_n;
- normal_irq_n += snapshot.tx_normal_irq_n;
- data[j++] += snapshot.tx_clean;
- data[j++] += snapshot.tx_set_ic_bit;
- data[j++] += snapshot.tx_tso_frames;
- data[j++] += snapshot.tx_tso_nfrags;
- napi_poll += snapshot.napi_poll;
+ start = u64_stats_fetch_begin(&txq_stats->q_syncp);
+ q_snapshot = txq_stats->q;
+ } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start));
+ do {
+ start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
+ napi_snapshot = txq_stats->napi;
+ } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));
+
+ data[j++] += u64_stats_read(&napi_snapshot.tx_pkt_n);
+ n_irq = stmmac_get_tx_normal_irq_n(priv, i);
+ data[j++] += n_irq;
+ normal_irq_n += n_irq;
+ data[j++] += u64_stats_read(&napi_snapshot.tx_clean);
+ data[j++] += u64_stats_read(&q_snapshot.tx_set_ic_bit) +
+ u64_stats_read(&napi_snapshot.tx_set_ic_bit);
+ data[j++] += u64_stats_read(&q_snapshot.tx_tso_frames);
+ data[j++] += u64_stats_read(&q_snapshot.tx_tso_nfrags);
+ napi_poll += u64_stats_read(&napi_snapshot.poll);
}
normal_irq_n += priv->xstats.rx_early_irq;
data[j++] = normal_irq_n;
@@ -852,15 +895,13 @@ static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
}
static int stmmac_ethtool_op_get_eee(struct net_device *dev,
- struct ethtool_eee *edata)
+ struct ethtool_keee *edata)
{
struct stmmac_priv *priv = netdev_priv(dev);
if (!priv->dma_cap.eee)
return -EOPNOTSUPP;
- edata->eee_enabled = priv->eee_enabled;
- edata->eee_active = priv->eee_active;
edata->tx_lpi_timer = priv->tx_lpi_timer;
edata->tx_lpi_enabled = priv->tx_lpi_enabled;
@@ -868,7 +909,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev,
}
static int stmmac_ethtool_op_set_eee(struct net_device *dev,
- struct ethtool_eee *edata)
+ struct ethtool_keee *edata)
{
struct stmmac_priv *priv = netdev_priv(dev);
int ret;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a0e46369ae15..24cd80490d19 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2482,7 +2482,6 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
struct xdp_desc xdp_desc;
bool work_done = true;
u32 tx_set_ic_bit = 0;
- unsigned long flags;
/* Avoids TX time-out as we are sharing with slow path */
txq_trans_cond_update(nq);
@@ -2507,6 +2506,13 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
if (!xsk_tx_peek_desc(pool, &xdp_desc))
break;
+ if (priv->plat->est && priv->plat->est->enable &&
+ priv->plat->est->max_sdu[queue] &&
+ xdp_desc.len > priv->plat->est->max_sdu[queue]) {
+ priv->xstats.max_sdu_txq_drop[queue]++;
+ continue;
+ }
+
if (likely(priv->extend_desc))
tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry);
else if (tx_q->tbs & STMMAC_TBS_AVAIL)
@@ -2566,9 +2572,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size);
entry = tx_q->cur_tx;
}
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->tx_set_ic_bit += tx_set_ic_bit;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_update_begin(&txq_stats->napi_syncp);
+ u64_stats_add(&txq_stats->napi.tx_set_ic_bit, tx_set_ic_bit);
+ u64_stats_update_end(&txq_stats->napi_syncp);
if (tx_desc) {
stmmac_flush_tx_descriptors(priv, queue);
@@ -2616,7 +2622,6 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
unsigned int bytes_compl = 0, pkts_compl = 0;
unsigned int entry, xmits = 0, count = 0;
u32 tx_packets = 0, tx_errors = 0;
- unsigned long flags;
__netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
@@ -2674,7 +2679,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
}
if (skb) {
stmmac_get_tx_hwtstamp(priv, p, skb);
- } else {
+ } else if (tx_q->xsk_pool &&
+ xp_tx_metadata_enabled(tx_q->xsk_pool)) {
struct stmmac_xsk_tx_complete tx_compl = {
.priv = priv,
.desc = p,
@@ -2782,11 +2788,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
if (tx_q->dirty_tx != tx_q->cur_tx)
*pending_packets = true;
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->tx_packets += tx_packets;
- txq_stats->tx_pkt_n += tx_packets;
- txq_stats->tx_clean++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_update_begin(&txq_stats->napi_syncp);
+ u64_stats_add(&txq_stats->napi.tx_packets, tx_packets);
+ u64_stats_add(&txq_stats->napi.tx_pkt_n, tx_packets);
+ u64_stats_inc(&txq_stats->napi.tx_clean);
+ u64_stats_update_end(&txq_stats->napi_syncp);
priv->xstats.tx_errors += tx_errors;
@@ -3592,6 +3598,10 @@ static void stmmac_free_irq(struct net_device *dev,
if (priv->wol_irq > 0 && priv->wol_irq != dev->irq)
free_irq(priv->wol_irq, dev);
fallthrough;
+ case REQ_IRQ_ERR_SFTY:
+ if (priv->sfty_irq > 0 && priv->sfty_irq != dev->irq)
+ free_irq(priv->sfty_irq, dev);
+ fallthrough;
case REQ_IRQ_ERR_WOL:
free_irq(dev->irq, dev);
fallthrough;
@@ -3662,6 +3672,23 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev)
}
}
+ /* Request the common Safety Feature Correctible/Uncorrectible
+ * Error line in case of another line is used
+ */
+ if (priv->sfty_irq > 0 && priv->sfty_irq != dev->irq) {
+ int_name = priv->int_name_sfty;
+ sprintf(int_name, "%s:%s", dev->name, "safety");
+ ret = request_irq(priv->sfty_irq, stmmac_safety_interrupt,
+ 0, int_name, dev);
+ if (unlikely(ret < 0)) {
+ netdev_err(priv->dev,
+ "%s: alloc sfty MSI %d (error: %d)\n",
+ __func__, priv->sfty_irq, ret);
+ irq_err = REQ_IRQ_ERR_SFTY;
+ goto irq_error;
+ }
+ }
+
/* Request the Safety Feature Correctible Error line in
* case of another line is used
*/
@@ -3799,6 +3826,21 @@ static int stmmac_request_irq_single(struct net_device *dev)
}
}
+ /* Request the common Safety Feature Correctible/Uncorrectible
+ * Error line in case of another line is used
+ */
+ if (priv->sfty_irq > 0 && priv->sfty_irq != dev->irq) {
+ ret = request_irq(priv->sfty_irq, stmmac_safety_interrupt,
+ IRQF_SHARED, dev->name, dev);
+ if (unlikely(ret < 0)) {
+ netdev_err(priv->dev,
+ "%s: ERROR: allocating the sfty IRQ %d (%d)\n",
+ __func__, priv->sfty_irq, ret);
+ irq_err = REQ_IRQ_ERR_SFTY;
+ goto irq_error;
+ }
+ }
+
return 0;
irq_error:
@@ -3932,6 +3974,9 @@ static int __stmmac_open(struct net_device *dev,
priv->rx_copybreak = STMMAC_RX_COPYBREAK;
buf_sz = dma_conf->dma_buf_sz;
+ for (int i = 0; i < MTL_MAX_TX_QUEUES; i++)
+ if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN)
+ dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs;
memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf));
stmmac_reset_queues_param(priv);
@@ -4004,8 +4049,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
{
set_bit(__FPE_REMOVING, &priv->fpe_task_state);
- if (priv->fpe_wq)
+ if (priv->fpe_wq) {
destroy_workqueue(priv->fpe_wq);
+ priv->fpe_wq = NULL;
+ }
netdev_info(priv->dev, "FPE workqueue stop");
}
@@ -4210,7 +4257,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
struct stmmac_tx_queue *tx_q;
bool has_vlan, set_ic;
u8 proto_hdr_len, hdr;
- unsigned long flags;
u32 pay_len, mss;
dma_addr_t des;
int i;
@@ -4375,13 +4421,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
}
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->tx_bytes += skb->len;
- txq_stats->tx_tso_frames++;
- txq_stats->tx_tso_nfrags += nfrags;
+ u64_stats_update_begin(&txq_stats->q_syncp);
+ u64_stats_add(&txq_stats->q.tx_bytes, skb->len);
+ u64_stats_inc(&txq_stats->q.tx_tso_frames);
+ u64_stats_add(&txq_stats->q.tx_tso_nfrags, nfrags);
if (set_ic)
- txq_stats->tx_set_ic_bit++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_inc(&txq_stats->q.tx_set_ic_bit);
+ u64_stats_update_end(&txq_stats->q_syncp);
if (priv->sarc_type)
stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4480,7 +4526,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
struct stmmac_tx_queue *tx_q;
bool has_vlan, set_ic;
int entry, first_tx;
- unsigned long flags;
dma_addr_t des;
tx_q = &priv->dma_conf.tx_queue[queue];
@@ -4498,6 +4543,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
return stmmac_tso_xmit(skb, dev);
}
+ if (priv->plat->est && priv->plat->est->enable &&
+ priv->plat->est->max_sdu[queue] &&
+ skb->len > priv->plat->est->max_sdu[queue]){
+ priv->xstats.max_sdu_txq_drop[queue]++;
+ goto max_sdu_err;
+ }
+
if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) {
if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, queue))) {
netif_tx_stop_queue(netdev_get_tx_queue(priv->dev,
@@ -4650,11 +4702,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
}
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->tx_bytes += skb->len;
+ u64_stats_update_begin(&txq_stats->q_syncp);
+ u64_stats_add(&txq_stats->q.tx_bytes, skb->len);
if (set_ic)
- txq_stats->tx_set_ic_bit++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_inc(&txq_stats->q.tx_set_ic_bit);
+ u64_stats_update_end(&txq_stats->q_syncp);
if (priv->sarc_type)
stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4715,6 +4767,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
dma_map_err:
netdev_err(priv->dev, "Tx DMA map failed\n");
+max_sdu_err:
dev_kfree_skb(skb);
priv->xstats.tx_dropped++;
return NETDEV_TX_OK;
@@ -4871,6 +4924,13 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
if (stmmac_tx_avail(priv, queue) < STMMAC_TX_THRESH(priv))
return STMMAC_XDP_CONSUMED;
+ if (priv->plat->est && priv->plat->est->enable &&
+ priv->plat->est->max_sdu[queue] &&
+ xdpf->len > priv->plat->est->max_sdu[queue]) {
+ priv->xstats.max_sdu_txq_drop[queue]++;
+ return STMMAC_XDP_CONSUMED;
+ }
+
if (likely(priv->extend_desc))
tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry);
else if (tx_q->tbs & STMMAC_TBS_AVAIL)
@@ -4918,12 +4978,11 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
set_ic = false;
if (set_ic) {
- unsigned long flags;
tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, tx_desc);
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->tx_set_ic_bit++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_update_begin(&txq_stats->q_syncp);
+ u64_stats_inc(&txq_stats->q.tx_set_ic_bit);
+ u64_stats_update_end(&txq_stats->q_syncp);
}
stmmac_enable_dma_transmission(priv, priv->ioaddr);
@@ -5073,7 +5132,6 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
unsigned int len = xdp->data_end - xdp->data;
enum pkt_hash_types hash_type;
int coe = priv->hw->rx_csum;
- unsigned long flags;
struct sk_buff *skb;
u32 hash;
@@ -5103,10 +5161,10 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
skb_record_rx_queue(skb, queue);
napi_gro_receive(&ch->rxtx_napi, skb);
- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
- rxq_stats->rx_pkt_n++;
- rxq_stats->rx_bytes += len;
- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+ u64_stats_update_begin(&rxq_stats->napi_syncp);
+ u64_stats_inc(&rxq_stats->napi.rx_pkt_n);
+ u64_stats_add(&rxq_stats->napi.rx_bytes, len);
+ u64_stats_update_end(&rxq_stats->napi_syncp);
}
static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
@@ -5188,7 +5246,6 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
unsigned int desc_size;
struct bpf_prog *prog;
bool failure = false;
- unsigned long flags;
int xdp_status = 0;
int status = 0;
@@ -5343,9 +5400,9 @@ read_again:
stmmac_finalize_xdp_rx(priv, xdp_status);
- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
- rxq_stats->rx_pkt_n += count;
- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+ u64_stats_update_begin(&rxq_stats->napi_syncp);
+ u64_stats_add(&rxq_stats->napi.rx_pkt_n, count);
+ u64_stats_update_end(&rxq_stats->napi_syncp);
priv->xstats.rx_dropped += rx_dropped;
priv->xstats.rx_errors += rx_errors;
@@ -5383,7 +5440,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
unsigned int desc_size;
struct sk_buff *skb = NULL;
struct stmmac_xdp_buff ctx;
- unsigned long flags;
int xdp_status = 0;
int buf_sz;
@@ -5643,11 +5699,11 @@ drain_data:
stmmac_rx_refill(priv, queue);
- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
- rxq_stats->rx_packets += rx_packets;
- rxq_stats->rx_bytes += rx_bytes;
- rxq_stats->rx_pkt_n += count;
- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+ u64_stats_update_begin(&rxq_stats->napi_syncp);
+ u64_stats_add(&rxq_stats->napi.rx_packets, rx_packets);
+ u64_stats_add(&rxq_stats->napi.rx_bytes, rx_bytes);
+ u64_stats_add(&rxq_stats->napi.rx_pkt_n, count);
+ u64_stats_update_end(&rxq_stats->napi_syncp);
priv->xstats.rx_dropped += rx_dropped;
priv->xstats.rx_errors += rx_errors;
@@ -5662,13 +5718,12 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
struct stmmac_priv *priv = ch->priv_data;
struct stmmac_rxq_stats *rxq_stats;
u32 chan = ch->index;
- unsigned long flags;
int work_done;
rxq_stats = &priv->xstats.rxq_stats[chan];
- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
- rxq_stats->napi_poll++;
- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+ u64_stats_update_begin(&rxq_stats->napi_syncp);
+ u64_stats_inc(&rxq_stats->napi.poll);
+ u64_stats_update_end(&rxq_stats->napi_syncp);
work_done = stmmac_rx(priv, budget, chan);
if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -5690,13 +5745,12 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
struct stmmac_txq_stats *txq_stats;
bool pending_packets = false;
u32 chan = ch->index;
- unsigned long flags;
int work_done;
txq_stats = &priv->xstats.txq_stats[chan];
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->napi_poll++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_update_begin(&txq_stats->napi_syncp);
+ u64_stats_inc(&txq_stats->napi.poll);
+ u64_stats_update_end(&txq_stats->napi_syncp);
work_done = stmmac_tx_clean(priv, budget, chan, &pending_packets);
work_done = min(work_done, budget);
@@ -5726,17 +5780,16 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
struct stmmac_rxq_stats *rxq_stats;
struct stmmac_txq_stats *txq_stats;
u32 chan = ch->index;
- unsigned long flags;
rxq_stats = &priv->xstats.rxq_stats[chan];
- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
- rxq_stats->napi_poll++;
- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+ u64_stats_update_begin(&rxq_stats->napi_syncp);
+ u64_stats_inc(&rxq_stats->napi.poll);
+ u64_stats_update_end(&rxq_stats->napi_syncp);
txq_stats = &priv->xstats.txq_stats[chan];
- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
- txq_stats->napi_poll++;
- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+ u64_stats_update_begin(&txq_stats->napi_syncp);
+ u64_stats_inc(&txq_stats->napi.poll);
+ u64_stats_update_end(&txq_stats->napi_syncp);
tx_done = stmmac_tx_clean(priv, budget, chan, &tx_pending_packets);
tx_done = min(tx_done, budget);
@@ -6011,10 +6064,8 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv)
priv->tx_path_in_lpi_mode = false;
}
- for (queue = 0; queue < queues_count; queue++) {
- status = stmmac_host_mtl_irq_status(priv, priv->hw,
- queue);
- }
+ for (queue = 0; queue < queues_count; queue++)
+ stmmac_host_mtl_irq_status(priv, priv->hw, queue);
/* PCS link status */
if (priv->hw->pcs &&
@@ -6049,8 +6100,8 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
if (test_bit(STMMAC_DOWN, &priv->state))
return IRQ_HANDLED;
- /* Check if a fatal error happened */
- if (stmmac_safety_feat_interrupt(priv))
+ /* Check ASP error if it isn't delivered via an individual IRQ */
+ if (priv->sfty_irq <= 0 && stmmac_safety_feat_interrupt(priv))
return IRQ_HANDLED;
/* To handle Common interrupts */
@@ -6067,11 +6118,6 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id)
struct net_device *dev = (struct net_device *)dev_id;
struct stmmac_priv *priv = netdev_priv(dev);
- if (unlikely(!dev)) {
- netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
- return IRQ_NONE;
- }
-
/* Check if adapter is up */
if (test_bit(STMMAC_DOWN, &priv->state))
return IRQ_HANDLED;
@@ -6087,11 +6133,6 @@ static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id)
struct net_device *dev = (struct net_device *)dev_id;
struct stmmac_priv *priv = netdev_priv(dev);
- if (unlikely(!dev)) {
- netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
- return IRQ_NONE;
- }
-
/* Check if adapter is up */
if (test_bit(STMMAC_DOWN, &priv->state))
return IRQ_HANDLED;
@@ -6113,11 +6154,6 @@ static irqreturn_t stmmac_msi_intr_tx(int irq, void *data)
dma_conf = container_of(tx_q, struct stmmac_dma_conf, tx_queue[chan]);
priv = container_of(dma_conf, struct stmmac_priv, dma_conf);
- if (unlikely(!data)) {
- netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
- return IRQ_NONE;
- }
-
/* Check if adapter is up */
if (test_bit(STMMAC_DOWN, &priv->state))
return IRQ_HANDLED;
@@ -6144,11 +6180,6 @@ static irqreturn_t stmmac_msi_intr_rx(int irq, void *data)
dma_conf = container_of(rx_q, struct stmmac_dma_conf, rx_queue[chan]);
priv = container_of(dma_conf, struct stmmac_priv, dma_conf);
- if (unlikely(!data)) {
- netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
- return IRQ_NONE;
- }
-
/* Check if adapter is up */
if (test_bit(STMMAC_DOWN, &priv->state))
return IRQ_HANDLED;
@@ -7062,10 +7093,13 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64
u64 tx_bytes;
do {
- start = u64_stats_fetch_begin(&txq_stats->syncp);
- tx_packets = txq_stats->tx_packets;
- tx_bytes = txq_stats->tx_bytes;
- } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
+ start = u64_stats_fetch_begin(&txq_stats->q_syncp);
+ tx_bytes = u64_stats_read(&txq_stats->q.tx_bytes);
+ } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start));
+ do {
+ start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
+ tx_packets = u64_stats_read(&txq_stats->napi.tx_packets);
+ } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));
stats->tx_packets += tx_packets;
stats->tx_bytes += tx_bytes;
@@ -7077,10 +7111,10 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64
u64 rx_bytes;
do {
- start = u64_stats_fetch_begin(&rxq_stats->syncp);
- rx_packets = rxq_stats->rx_packets;
- rx_bytes = rxq_stats->rx_bytes;
- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
+ start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
+ rx_packets = u64_stats_read(&rxq_stats->napi.rx_packets);
+ rx_bytes = u64_stats_read(&rxq_stats->napi.rx_bytes);
+ } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));
stats->rx_packets += rx_packets;
stats->rx_bytes += rx_bytes;
@@ -7474,9 +7508,16 @@ int stmmac_dvr_probe(struct device *device,
priv->dev = ndev;
for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
- u64_stats_init(&priv->xstats.rxq_stats[i].syncp);
- for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
- u64_stats_init(&priv->xstats.txq_stats[i].syncp);
+ u64_stats_init(&priv->xstats.rxq_stats[i].napi_syncp);
+ for (i = 0; i < MTL_MAX_TX_QUEUES; i++) {
+ u64_stats_init(&priv->xstats.txq_stats[i].q_syncp);
+ u64_stats_init(&priv->xstats.txq_stats[i].napi_syncp);
+ }
+
+ priv->xstats.pcpu_stats =
+ devm_netdev_alloc_pcpu_stats(device, struct stmmac_pcpu_stats);
+ if (!priv->xstats.pcpu_stats)
+ return -ENOMEM;
stmmac_set_ethtool_ops(ndev);
priv->pause = pause;
@@ -7489,6 +7530,7 @@ int stmmac_dvr_probe(struct device *device,
priv->dev->irq = res->irq;
priv->wol_irq = res->wol_irq;
priv->lpi_irq = res->lpi_irq;
+ priv->sfty_irq = res->sfty_irq;
priv->sfty_ce_irq = res->sfty_ce_irq;
priv->sfty_ue_irq = res->sfty_ue_irq;
for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
@@ -7542,6 +7584,9 @@ int stmmac_dvr_probe(struct device *device,
dev_err(priv->device, "unable to bring out of ahb reset: %pe\n",
ERR_PTR(ret));
+ /* Wait a bit for the reset to take effect */
+ udelay(10);
+
/* Init MAC and get the capabilities */
ret = stmmac_hw_init(priv);
if (ret)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h
index aefc121464b5..13a30e6df4c1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h
@@ -110,6 +110,8 @@ static inline void dwmac_ctrl_ane(void __iomem *ioaddr, u32 reg, bool ane,
/* Enable and restart the Auto-Negotiation */
if (ane)
value |= GMAC_AN_CTRL_ANE | GMAC_AN_CTRL_RAN;
+ else
+ value &= ~GMAC_AN_CTRL_ANE;
/* In case of MAC-2-MAC connection, block is configured to operate
* according to MAC conf register.
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 70eadc83ca68..54797edc9b38 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -743,6 +743,14 @@ int stmmac_get_platform_resources(struct platform_device *pdev,
dev_info(&pdev->dev, "IRQ eth_lpi not found\n");
}
+ stmmac_res->sfty_irq =
+ platform_get_irq_byname_optional(pdev, "sfty");
+ if (stmmac_res->sfty_irq < 0) {
+ if (stmmac_res->sfty_irq == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+ dev_info(&pdev->dev, "IRQ sfty not found\n");
+ }
+
stmmac_res->addr = devm_platform_ioremap_resource(pdev, 0);
return PTR_ERR_OR_ZERO(stmmac_res->addr);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 26fa33e5ec34..cce00719937d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -915,8 +915,30 @@ struct timespec64 stmmac_calc_tas_basetime(ktime_t old_base_time,
return time;
}
-static int tc_setup_taprio(struct stmmac_priv *priv,
- struct tc_taprio_qopt_offload *qopt)
+static void tc_taprio_map_maxsdu_txq(struct stmmac_priv *priv,
+ struct tc_taprio_qopt_offload *qopt)
+{
+ struct plat_stmmacenet_data *plat = priv->plat;
+ u32 num_tc = qopt->mqprio.qopt.num_tc;
+ u32 offset, count, i, j;
+
+ /* QueueMaxSDU received from the driver corresponds to the Linux traffic
+ * class. Map queueMaxSDU per Linux traffic class to DWMAC Tx queues.
+ */
+ for (i = 0; i < num_tc; i++) {
+ if (!qopt->max_sdu[i])
+ continue;
+
+ offset = qopt->mqprio.qopt.offset[i];
+ count = qopt->mqprio.qopt.count[i];
+
+ for (j = offset; j < offset + count; j++)
+ plat->est->max_sdu[j] = qopt->max_sdu[i] + ETH_HLEN - ETH_TLEN;
+ }
+}
+
+static int tc_taprio_configure(struct stmmac_priv *priv,
+ struct tc_taprio_qopt_offload *qopt)
{
u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep;
struct plat_stmmacenet_data *plat = priv->plat;
@@ -968,8 +990,6 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
if (qopt->cmd == TAPRIO_CMD_DESTROY)
goto disable;
- else if (qopt->cmd != TAPRIO_CMD_REPLACE)
- return -EOPNOTSUPP;
if (qopt->num_entries >= dep)
return -EINVAL;
@@ -1045,6 +1065,8 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
priv->plat->est->ter = qopt->cycle_time_extension;
+ tc_taprio_map_maxsdu_txq(priv, qopt);
+
if (fpe && !priv->dma_cap.fpesel) {
mutex_unlock(&priv->plat->est->lock);
return -EOPNOTSUPP;
@@ -1078,6 +1100,11 @@ disable:
priv->plat->est->enable = false;
stmmac_est_configure(priv, priv, priv->plat->est,
priv->plat->clk_ptp_rate);
+ /* Reset taprio status */
+ for (i = 0; i < priv->plat->tx_queues_to_use; i++) {
+ priv->xstats.max_sdu_txq_drop[i] = 0;
+ priv->xstats.mtl_est_txq_hlbf[i] = 0;
+ }
mutex_unlock(&priv->plat->est->lock);
}
@@ -1095,6 +1122,57 @@ disable:
return ret;
}
+static void tc_taprio_stats(struct stmmac_priv *priv,
+ struct tc_taprio_qopt_offload *qopt)
+{
+ u64 window_drops = 0;
+ int i = 0;
+
+ for (i = 0; i < priv->plat->tx_queues_to_use; i++)
+ window_drops += priv->xstats.max_sdu_txq_drop[i] +
+ priv->xstats.mtl_est_txq_hlbf[i];
+ qopt->stats.window_drops = window_drops;
+
+ /* Transmission overrun doesn't happen for stmmac, hence always 0 */
+ qopt->stats.tx_overruns = 0;
+}
+
+static void tc_taprio_queue_stats(struct stmmac_priv *priv,
+ struct tc_taprio_qopt_offload *qopt)
+{
+ struct tc_taprio_qopt_queue_stats *q_stats = &qopt->queue_stats;
+ int queue = qopt->queue_stats.queue;
+
+ q_stats->stats.window_drops = priv->xstats.max_sdu_txq_drop[queue] +
+ priv->xstats.mtl_est_txq_hlbf[queue];
+
+ /* Transmission overrun doesn't happen for stmmac, hence always 0 */
+ q_stats->stats.tx_overruns = 0;
+}
+
+static int tc_setup_taprio(struct stmmac_priv *priv,
+ struct tc_taprio_qopt_offload *qopt)
+{
+ int err = 0;
+
+ switch (qopt->cmd) {
+ case TAPRIO_CMD_REPLACE:
+ case TAPRIO_CMD_DESTROY:
+ err = tc_taprio_configure(priv, qopt);
+ break;
+ case TAPRIO_CMD_STATS:
+ tc_taprio_stats(priv, qopt);
+ break;
+ case TAPRIO_CMD_QUEUE_STATS:
+ tc_taprio_queue_stats(priv, qopt);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
static int tc_setup_etf(struct stmmac_priv *priv,
struct tc_etf_qopt_offload *qopt)
{
@@ -1126,6 +1204,7 @@ static int tc_query_caps(struct stmmac_priv *priv,
return -EOPNOTSUPP;
caps->gate_mask_per_txq = true;
+ caps->supports_queue_max_sdu = true;
return 0;
}
diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index 3525d5c0d694..351609f4f011 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -1144,9 +1144,9 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies)
nskb->protocol = skb->protocol;
offset = skb_mac_header(skb) - skb->data;
skb_set_mac_header(nskb, offset);
- offset = skb_network_header(skb) - skb->data;
+ offset = skb_network_offset(skb);
skb_set_network_header(nskb, offset);
- offset = skb_transport_header(skb) - skb->data;
+ offset = skb_transport_offset(skb);
skb_set_transport_header(nskb, offset);
offset = 0;
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index be01450c20dc..1530d13984d4 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -189,6 +189,7 @@ config TI_ICSSG_PRUETH
select TI_K3_CPPI_DESC_POOL
depends on PRU_REMOTEPROC
depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER
+ depends on PTP_1588_CLOCK_OPTIONAL
help
Support dual Gigabit Ethernet ports over the ICSSG PRU Subsystem.
This subsystem is available starting with the AM65 platform.
diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
index 35fceba01ea4..d6ce2c9f0a8d 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
@@ -514,14 +514,14 @@ am65_cpsw_set_link_ksettings(struct net_device *ndev,
return phylink_ethtool_ksettings_set(salve->phylink, ecmd);
}
-static int am65_cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
+static int am65_cpsw_get_eee(struct net_device *ndev, struct ethtool_keee *edata)
{
struct am65_cpsw_slave_data *salve = am65_ndev_to_slave(ndev);
return phylink_ethtool_get_eee(salve->phylink, edata);
}
-static int am65_cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
+static int am65_cpsw_set_eee(struct net_device *ndev, struct ethtool_keee *edata)
{
struct am65_cpsw_slave_data *salve = am65_ndev_to_slave(ndev);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 9d2f4ac783e4..2939a21ca74f 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -294,7 +294,7 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev,
txqueue,
netif_tx_queue_stopped(netif_txq),
jiffies_to_msecs(jiffies - trans_start),
- dql_avail(&netif_txq->dql),
+ netdev_queue_dql_avail(netif_txq),
k3_cppi_desc_pool_avail(tx_chn->desc_pool));
if (netif_tx_queue_stopped(netif_txq)) {
diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c
index 26dc906eae90..57fe936bb177 100644
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c
@@ -90,4 +90,5 @@ int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
}
EXPORT_SYMBOL_GPL(ti_cm_get_macid);
+MODULE_DESCRIPTION("TI CPSW Switch common module");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index ea85c6dd5484..c0a5abd8d9a8 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -631,6 +631,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
}
}
+ phy->mac_managed_pm = true;
+
slave->phy = phy;
phy_attached_info(slave->phy);
diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c
index a557a477d039..f7b283353ba2 100644
--- a/drivers/net/ethernet/ti/cpsw_ethtool.c
+++ b/drivers/net/ethernet/ti/cpsw_ethtool.c
@@ -422,7 +422,7 @@ int cpsw_set_link_ksettings(struct net_device *ndev,
return phy_ethtool_ksettings_set(cpsw->slaves[slave_no].phy, ecmd);
}
-int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
+int cpsw_get_eee(struct net_device *ndev, struct ethtool_keee *edata)
{
struct cpsw_priv *priv = netdev_priv(ndev);
struct cpsw_common *cpsw = priv->cpsw;
@@ -434,7 +434,7 @@ int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
return -EOPNOTSUPP;
}
-int cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
+int cpsw_set_eee(struct net_device *ndev, struct ethtool_keee *edata)
{
struct cpsw_priv *priv = netdev_priv(ndev);
struct cpsw_common *cpsw = priv->cpsw;
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 498c50c6d1a7..087dcb67505a 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -773,6 +773,9 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
slave->slave_num);
return;
}
+
+ phy->mac_managed_pm = true;
+
slave->phy = phy;
phy_attached_info(slave->phy);
diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
index 0e27c433098d..7efa72502c86 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.h
+++ b/drivers/net/ethernet/ti/cpsw_priv.h
@@ -496,8 +496,8 @@ int cpsw_get_link_ksettings(struct net_device *ndev,
struct ethtool_link_ksettings *ecmd);
int cpsw_set_link_ksettings(struct net_device *ndev,
const struct ethtool_link_ksettings *ecmd);
-int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata);
-int cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata);
+int cpsw_get_eee(struct net_device *ndev, struct ethtool_keee *edata);
+int cpsw_set_eee(struct net_device *ndev, struct ethtool_keee *edata);
int cpsw_nway_reset(struct net_device *ndev);
void cpsw_get_ringparam(struct net_device *ndev,
struct ethtool_ringparam *ering,
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index bcccf43d368b..dbbea9146040 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -638,6 +638,16 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
freq, cpts->cc.mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
}
+static void cpts_clk_unregister(void *clk)
+{
+ clk_hw_unregister_mux(clk);
+}
+
+static void cpts_clk_del_provider(void *np)
+{
+ of_clk_del_provider(np);
+}
+
static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node)
{
struct device_node *refclk_np;
@@ -687,9 +697,7 @@ static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node)
goto mux_fail;
}
- ret = devm_add_action_or_reset(cpts->dev,
- (void(*)(void *))clk_hw_unregister_mux,
- clk_hw);
+ ret = devm_add_action_or_reset(cpts->dev, cpts_clk_unregister, clk_hw);
if (ret) {
dev_err(cpts->dev, "add clkmux unreg action %d", ret);
goto mux_fail;
@@ -699,8 +707,7 @@ static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node)
if (ret)
goto mux_fail;
- ret = devm_add_action_or_reset(cpts->dev,
- (void(*)(void *))of_clk_del_provider,
+ ret = devm_add_action_or_reset(cpts->dev, cpts_clk_del_provider,
refclk_np);
if (ret) {
dev_err(cpts->dev, "add clkmux provider unreg action %d", ret);
diff --git a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
index a27ec1dcc8d5..9a7dd7efcf69 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
@@ -45,7 +45,7 @@ static int emac_set_link_ksettings(struct net_device *ndev,
return phy_ethtool_set_link_ksettings(ndev, ecmd);
}
-static int emac_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
+static int emac_get_eee(struct net_device *ndev, struct ethtool_keee *edata)
{
if (!ndev->phydev)
return -EOPNOTSUPP;
@@ -53,7 +53,7 @@ static int emac_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
return phy_ethtool_get_eee(ndev->phydev, edata);
}
-static int emac_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
+static int emac_set_eee(struct net_device *ndev, struct ethtool_keee *edata)
{
if (!ndev->phydev)
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 411898a4f38c..cf7b73f8f450 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -1489,9 +1489,6 @@ static int emac_ndo_stop(struct net_device *ndev)
/* Destroying the queued work in ndo_stop() */
cancel_delayed_work_sync(&emac->stats_work);
- /* stop PRUs */
- prueth_emac_stop(emac);
-
if (prueth->emacs_initialized == 1)
icss_iep_exit(emac->iep);
@@ -1502,7 +1499,6 @@ static int emac_ndo_stop(struct net_device *ndev)
free_irq(emac->rx_chns.irq[rx_flow], emac);
prueth_ndev_del_tx_napi(emac, emac->tx_ch_num);
- prueth_cleanup_tx_chns(emac);
prueth_cleanup_rx_chns(emac, &emac->rx_chns, max_rx_flows);
prueth_cleanup_tx_chns(emac);
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index d5b75af163d3..5ee8e8980393 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -384,18 +384,18 @@ static int gelic_descr_prepare_rx(struct gelic_card *card,
if (gelic_descr_get_status(descr) != GELIC_DESCR_DMA_NOT_IN_USE)
dev_info(ctodev(card), "%s: ERROR status\n", __func__);
- descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size);
- if (!descr->skb) {
- descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */
- return -ENOMEM;
- }
descr->hw_regs.dmac_cmd_status = 0;
descr->hw_regs.result_size = 0;
descr->hw_regs.valid_size = 0;
descr->hw_regs.data_error = 0;
descr->hw_regs.payload.dev_addr = 0;
descr->hw_regs.payload.size = 0;
- descr->skb = NULL;
+
+ descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size);
+ if (!descr->skb) {
+ descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */
+ return -ENOMEM;
+ }
offset = ((unsigned long)descr->skb->data) &
(GELIC_NET_RXBUF_ALIGN - 1);
@@ -698,7 +698,7 @@ gelic_card_get_next_tx_descr(struct gelic_card *card)
}
/**
- * gelic_net_set_txdescr_cmdstat - sets the tx descriptor command field
+ * gelic_descr_set_tx_cmdstat - sets the tx descriptor command field
* @descr: descriptor structure to fill out
* @skb: packet to consider
*
@@ -1461,7 +1461,7 @@ static void gelic_ether_setup_netdev_ops(struct net_device *netdev,
}
/**
- * gelic_ether_setup_netdev - initialization of net_device
+ * gelic_net_setup_netdev - initialization of net_device
* @netdev: net_device structure
* @card: card structure
*
@@ -1518,14 +1518,16 @@ int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card)
return 0;
}
+#define GELIC_ALIGN (32)
+
/**
* gelic_alloc_card_net - allocates net_device and card structure
+ * @netdev: interface device structure
*
* returns the card structure or NULL in case of errors
*
* the card and net_device structures are linked to each other
*/
-#define GELIC_ALIGN (32)
static struct gelic_card *gelic_alloc_card_net(struct net_device **netdev)
{
struct gelic_card *card;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 1db754615cca..945c13d1a982 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -1958,8 +1958,6 @@ int wx_sw_init(struct wx *wx)
return -ENOMEM;
}
- wx->msix_in_use = false;
-
return 0;
}
EXPORT_SYMBOL(wx_sw_init);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 8706223a6e5a..6dff2c85682d 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -1257,7 +1257,7 @@ static int wx_tso(struct wx_ring *tx_ring, struct wx_tx_buffer *first,
/* compute header lengths */
l4len = enc ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
- *hdr_len = enc ? (skb_inner_transport_header(skb) - skb->data) :
+ *hdr_len = enc ? skb_inner_transport_offset(skb) :
skb_transport_offset(skb);
*hdr_len += l4len;
@@ -1614,14 +1614,12 @@ static int wx_acquire_msix_vectors(struct wx *wx)
/* One for non-queue interrupts */
nvecs += 1;
- if (!wx->msix_in_use) {
- wx->msix_entry = kcalloc(1, sizeof(struct msix_entry),
- GFP_KERNEL);
- if (!wx->msix_entry) {
- kfree(wx->msix_q_entries);
- wx->msix_q_entries = NULL;
- return -ENOMEM;
- }
+ wx->msix_entry = kcalloc(1, sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (!wx->msix_entry) {
+ kfree(wx->msix_q_entries);
+ wx->msix_q_entries = NULL;
+ return -ENOMEM;
}
nvecs = pci_alloc_irq_vectors_affinity(wx->pdev, nvecs,
@@ -1931,10 +1929,8 @@ void wx_reset_interrupt_capability(struct wx *wx)
if (pdev->msix_enabled) {
kfree(wx->msix_q_entries);
wx->msix_q_entries = NULL;
- if (!wx->msix_in_use) {
- kfree(wx->msix_entry);
- wx->msix_entry = NULL;
- }
+ kfree(wx->msix_entry);
+ wx->msix_entry = NULL;
}
pci_free_irq_vectors(wx->pdev);
}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index b4dc4f341117..1fdeb464d5f4 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -1047,7 +1047,6 @@ struct wx {
unsigned int queues_per_pool;
struct msix_entry *msix_q_entries;
struct msix_entry *msix_entry;
- bool msix_in_use;
struct wx_ring_feature ring_feature[RING_F_ARRAY_SIZE];
/* misc interrupt status block */
diff --git a/drivers/net/ethernet/wangxun/txgbe/Makefile b/drivers/net/ethernet/wangxun/txgbe/Makefile
index 7507f762edfe..42718875277c 100644
--- a/drivers/net/ethernet/wangxun/txgbe/Makefile
+++ b/drivers/net/ethernet/wangxun/txgbe/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_TXGBE) += txgbe.o
txgbe-objs := txgbe_main.o \
txgbe_hw.o \
txgbe_phy.o \
+ txgbe_irq.o \
txgbe_ethtool.o
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
new file mode 100644
index 000000000000..b3e3605d1edb
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2015 - 2024 Beijing WangXun Technology Co., Ltd. */
+
+#include <linux/irqdomain.h>
+#include <linux/pci.h>
+
+#include "../libwx/wx_type.h"
+#include "../libwx/wx_lib.h"
+#include "../libwx/wx_hw.h"
+#include "txgbe_type.h"
+#include "txgbe_phy.h"
+#include "txgbe_irq.h"
+
+/**
+ * txgbe_irq_enable - Enable default interrupt generation settings
+ * @wx: pointer to private structure
+ * @queues: enable irqs for queues
+ **/
+void txgbe_irq_enable(struct wx *wx, bool queues)
+{
+ wr32(wx, WX_PX_MISC_IEN, TXGBE_PX_MISC_IEN_MASK);
+
+ /* unmask interrupt */
+ wx_intr_enable(wx, TXGBE_INTR_MISC);
+ if (queues)
+ wx_intr_enable(wx, TXGBE_INTR_QALL(wx));
+}
+
+/**
+ * txgbe_intr - msi/legacy mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t txgbe_intr(int __always_unused irq, void *data)
+{
+ struct wx_q_vector *q_vector;
+ struct wx *wx = data;
+ struct pci_dev *pdev;
+ u32 eicr;
+
+ q_vector = wx->q_vector[0];
+ pdev = wx->pdev;
+
+ eicr = wx_misc_isb(wx, WX_ISB_VEC0);
+ if (!eicr) {
+ /* shared interrupt alert!
+ * the interrupt that we masked before the ICR read.
+ */
+ if (netif_running(wx->netdev))
+ txgbe_irq_enable(wx, true);
+ return IRQ_NONE; /* Not our interrupt */
+ }
+ wx->isb_mem[WX_ISB_VEC0] = 0;
+ if (!(pdev->msi_enabled))
+ wr32(wx, WX_PX_INTA, 1);
+
+ wx->isb_mem[WX_ISB_MISC] = 0;
+ /* would disable interrupts here but it is auto disabled */
+ napi_schedule_irqoff(&q_vector->napi);
+
+ /* re-enable link(maybe) and non-queue interrupts, no flush.
+ * txgbe_poll will re-enable the queue interrupts
+ */
+ if (netif_running(wx->netdev))
+ txgbe_irq_enable(wx, false);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * txgbe_request_msix_irqs - Initialize MSI-X interrupts
+ * @wx: board private structure
+ *
+ * Allocate MSI-X vectors and request interrupts from the kernel.
+ **/
+static int txgbe_request_msix_irqs(struct wx *wx)
+{
+ struct net_device *netdev = wx->netdev;
+ int vector, err;
+
+ for (vector = 0; vector < wx->num_q_vectors; vector++) {
+ struct wx_q_vector *q_vector = wx->q_vector[vector];
+ struct msix_entry *entry = &wx->msix_q_entries[vector];
+
+ if (q_vector->tx.ring && q_vector->rx.ring)
+ snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+ "%s-TxRx-%d", netdev->name, entry->entry);
+ else
+ /* skip this unused q_vector */
+ continue;
+
+ err = request_irq(entry->vector, wx_msix_clean_rings, 0,
+ q_vector->name, q_vector);
+ if (err) {
+ wx_err(wx, "request_irq failed for MSIX interrupt %s Error: %d\n",
+ q_vector->name, err);
+ goto free_queue_irqs;
+ }
+ }
+
+ return 0;
+
+free_queue_irqs:
+ while (vector) {
+ vector--;
+ free_irq(wx->msix_q_entries[vector].vector,
+ wx->q_vector[vector]);
+ }
+ wx_reset_interrupt_capability(wx);
+ return err;
+}
+
+/**
+ * txgbe_request_irq - initialize interrupts
+ * @wx: board private structure
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+int txgbe_request_irq(struct wx *wx)
+{
+ struct net_device *netdev = wx->netdev;
+ struct pci_dev *pdev = wx->pdev;
+ int err;
+
+ if (pdev->msix_enabled)
+ err = txgbe_request_msix_irqs(wx);
+ else if (pdev->msi_enabled)
+ err = request_irq(wx->pdev->irq, &txgbe_intr, 0,
+ netdev->name, wx);
+ else
+ err = request_irq(wx->pdev->irq, &txgbe_intr, IRQF_SHARED,
+ netdev->name, wx);
+
+ if (err)
+ wx_err(wx, "request_irq failed, Error %d\n", err);
+
+ return err;
+}
+
+static int txgbe_request_gpio_irq(struct txgbe *txgbe)
+{
+ txgbe->gpio_irq = irq_find_mapping(txgbe->misc.domain, TXGBE_IRQ_GPIO);
+ return request_threaded_irq(txgbe->gpio_irq, NULL,
+ txgbe_gpio_irq_handler,
+ IRQF_ONESHOT, "txgbe-gpio-irq", txgbe);
+}
+
+static int txgbe_request_link_irq(struct txgbe *txgbe)
+{
+ txgbe->link_irq = irq_find_mapping(txgbe->misc.domain, TXGBE_IRQ_LINK);
+ return request_threaded_irq(txgbe->link_irq, NULL,
+ txgbe_link_irq_handler,
+ IRQF_ONESHOT, "txgbe-link-irq", txgbe);
+}
+
+static const struct irq_chip txgbe_irq_chip = {
+ .name = "txgbe-misc-irq",
+};
+
+static int txgbe_misc_irq_domain_map(struct irq_domain *d,
+ unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ struct txgbe *txgbe = d->host_data;
+
+ irq_set_chip_data(irq, txgbe);
+ irq_set_chip(irq, &txgbe->misc.chip);
+ irq_set_nested_thread(irq, true);
+ irq_set_noprobe(irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops txgbe_misc_irq_domain_ops = {
+ .map = txgbe_misc_irq_domain_map,
+};
+
+static irqreturn_t txgbe_misc_irq_handle(int irq, void *data)
+{
+ struct txgbe *txgbe = data;
+ struct wx *wx = txgbe->wx;
+ unsigned int nhandled = 0;
+ unsigned int sub_irq;
+ u32 eicr;
+
+ eicr = wx_misc_isb(wx, WX_ISB_MISC);
+ if (eicr & TXGBE_PX_MISC_GPIO) {
+ sub_irq = irq_find_mapping(txgbe->misc.domain, TXGBE_IRQ_GPIO);
+ handle_nested_irq(sub_irq);
+ nhandled++;
+ }
+ if (eicr & (TXGBE_PX_MISC_ETH_LK | TXGBE_PX_MISC_ETH_LKDN |
+ TXGBE_PX_MISC_ETH_AN)) {
+ sub_irq = irq_find_mapping(txgbe->misc.domain, TXGBE_IRQ_LINK);
+ handle_nested_irq(sub_irq);
+ nhandled++;
+ }
+
+ wx_intr_enable(wx, TXGBE_INTR_MISC);
+ return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
+}
+
+static void txgbe_del_irq_domain(struct txgbe *txgbe)
+{
+ int hwirq, virq;
+
+ for (hwirq = 0; hwirq < txgbe->misc.nirqs; hwirq++) {
+ virq = irq_find_mapping(txgbe->misc.domain, hwirq);
+ irq_dispose_mapping(virq);
+ }
+
+ irq_domain_remove(txgbe->misc.domain);
+}
+
+void txgbe_free_misc_irq(struct txgbe *txgbe)
+{
+ free_irq(txgbe->gpio_irq, txgbe);
+ free_irq(txgbe->link_irq, txgbe);
+ free_irq(txgbe->misc.irq, txgbe);
+ txgbe_del_irq_domain(txgbe);
+}
+
+int txgbe_setup_misc_irq(struct txgbe *txgbe)
+{
+ struct wx *wx = txgbe->wx;
+ int hwirq, err;
+
+ txgbe->misc.nirqs = 2;
+ txgbe->misc.domain = irq_domain_add_simple(NULL, txgbe->misc.nirqs, 0,
+ &txgbe_misc_irq_domain_ops, txgbe);
+ if (!txgbe->misc.domain)
+ return -ENOMEM;
+
+ for (hwirq = 0; hwirq < txgbe->misc.nirqs; hwirq++)
+ irq_create_mapping(txgbe->misc.domain, hwirq);
+
+ txgbe->misc.chip = txgbe_irq_chip;
+ if (wx->pdev->msix_enabled)
+ txgbe->misc.irq = wx->msix_entry->vector;
+ else
+ txgbe->misc.irq = wx->pdev->irq;
+
+ err = request_threaded_irq(txgbe->misc.irq, NULL,
+ txgbe_misc_irq_handle,
+ IRQF_ONESHOT,
+ wx->netdev->name, txgbe);
+ if (err)
+ goto del_misc_irq;
+
+ err = txgbe_request_gpio_irq(txgbe);
+ if (err)
+ goto free_msic_irq;
+
+ err = txgbe_request_link_irq(txgbe);
+ if (err)
+ goto free_gpio_irq;
+
+ return 0;
+
+free_gpio_irq:
+ free_irq(txgbe->gpio_irq, txgbe);
+free_msic_irq:
+ free_irq(txgbe->misc.irq, txgbe);
+del_misc_irq:
+ txgbe_del_irq_domain(txgbe);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h
new file mode 100644
index 000000000000..b77945e7a0f2
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2015 - 2024 Beijing WangXun Technology Co., Ltd. */
+
+void txgbe_irq_enable(struct wx *wx, bool queues);
+int txgbe_request_irq(struct wx *wx);
+void txgbe_free_misc_irq(struct txgbe *txgbe);
+int txgbe_setup_misc_irq(struct txgbe *txgbe);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index 3b151c410a5c..bd4624d14ca0 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -17,6 +17,7 @@
#include "txgbe_type.h"
#include "txgbe_hw.h"
#include "txgbe_phy.h"
+#include "txgbe_irq.h"
#include "txgbe_ethtool.h"
char txgbe_driver_name[] = "txgbe";
@@ -76,137 +77,11 @@ static int txgbe_enumerate_functions(struct wx *wx)
return physfns;
}
-/**
- * txgbe_irq_enable - Enable default interrupt generation settings
- * @wx: pointer to private structure
- * @queues: enable irqs for queues
- **/
-static void txgbe_irq_enable(struct wx *wx, bool queues)
-{
- wr32(wx, WX_PX_MISC_IEN, TXGBE_PX_MISC_IEN_MASK);
-
- /* unmask interrupt */
- wx_intr_enable(wx, TXGBE_INTR_MISC);
- if (queues)
- wx_intr_enable(wx, TXGBE_INTR_QALL(wx));
-}
-
-/**
- * txgbe_intr - msi/legacy mode Interrupt Handler
- * @irq: interrupt number
- * @data: pointer to a network interface device structure
- **/
-static irqreturn_t txgbe_intr(int __always_unused irq, void *data)
-{
- struct wx_q_vector *q_vector;
- struct wx *wx = data;
- struct pci_dev *pdev;
- u32 eicr;
-
- q_vector = wx->q_vector[0];
- pdev = wx->pdev;
-
- eicr = wx_misc_isb(wx, WX_ISB_VEC0);
- if (!eicr) {
- /* shared interrupt alert!
- * the interrupt that we masked before the ICR read.
- */
- if (netif_running(wx->netdev))
- txgbe_irq_enable(wx, true);
- return IRQ_NONE; /* Not our interrupt */
- }
- wx->isb_mem[WX_ISB_VEC0] = 0;
- if (!(pdev->msi_enabled))
- wr32(wx, WX_PX_INTA, 1);
-
- wx->isb_mem[WX_ISB_MISC] = 0;
- /* would disable interrupts here but it is auto disabled */
- napi_schedule_irqoff(&q_vector->napi);
-
- /* re-enable link(maybe) and non-queue interrupts, no flush.
- * txgbe_poll will re-enable the queue interrupts
- */
- if (netif_running(wx->netdev))
- txgbe_irq_enable(wx, false);
-
- return IRQ_HANDLED;
-}
-
-/**
- * txgbe_request_msix_irqs - Initialize MSI-X interrupts
- * @wx: board private structure
- *
- * Allocate MSI-X vectors and request interrupts from the kernel.
- **/
-static int txgbe_request_msix_irqs(struct wx *wx)
-{
- struct net_device *netdev = wx->netdev;
- int vector, err;
-
- for (vector = 0; vector < wx->num_q_vectors; vector++) {
- struct wx_q_vector *q_vector = wx->q_vector[vector];
- struct msix_entry *entry = &wx->msix_q_entries[vector];
-
- if (q_vector->tx.ring && q_vector->rx.ring)
- snprintf(q_vector->name, sizeof(q_vector->name) - 1,
- "%s-TxRx-%d", netdev->name, entry->entry);
- else
- /* skip this unused q_vector */
- continue;
-
- err = request_irq(entry->vector, wx_msix_clean_rings, 0,
- q_vector->name, q_vector);
- if (err) {
- wx_err(wx, "request_irq failed for MSIX interrupt %s Error: %d\n",
- q_vector->name, err);
- goto free_queue_irqs;
- }
- }
-
- return 0;
-
-free_queue_irqs:
- while (vector) {
- vector--;
- free_irq(wx->msix_q_entries[vector].vector,
- wx->q_vector[vector]);
- }
- wx_reset_interrupt_capability(wx);
- return err;
-}
-
-/**
- * txgbe_request_irq - initialize interrupts
- * @wx: board private structure
- *
- * Attempt to configure interrupts using the best available
- * capabilities of the hardware and kernel.
- **/
-static int txgbe_request_irq(struct wx *wx)
-{
- struct net_device *netdev = wx->netdev;
- struct pci_dev *pdev = wx->pdev;
- int err;
-
- if (pdev->msix_enabled)
- err = txgbe_request_msix_irqs(wx);
- else if (pdev->msi_enabled)
- err = request_irq(wx->pdev->irq, &txgbe_intr, 0,
- netdev->name, wx);
- else
- err = request_irq(wx->pdev->irq, &txgbe_intr, IRQF_SHARED,
- netdev->name, wx);
-
- if (err)
- wx_err(wx, "request_irq failed, Error %d\n", err);
-
- return err;
-}
-
static void txgbe_up_complete(struct wx *wx)
{
struct net_device *netdev = wx->netdev;
+ txgbe_reinit_gpio_intr(wx);
wx_control_hw(wx, true);
wx_configure_vectors(wx);
@@ -518,6 +393,7 @@ static void txgbe_shutdown(struct pci_dev *pdev)
int txgbe_setup_tc(struct net_device *dev, u8 tc)
{
struct wx *wx = netdev_priv(dev);
+ struct txgbe *txgbe = wx->priv;
/* Hardware has to reinitialize queues and interrupts to
* match packet buffer alignment. Unfortunately, the
@@ -528,6 +404,7 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc)
else
txgbe_reset(wx);
+ txgbe_free_misc_irq(txgbe);
wx_clear_interrupt_scheme(wx);
if (tc)
@@ -536,6 +413,7 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc)
netdev_reset_tc(dev);
wx_init_interrupt_scheme(wx);
+ txgbe_setup_misc_irq(txgbe);
if (netif_running(dev))
txgbe_open(dev);
@@ -751,10 +629,14 @@ static int txgbe_probe(struct pci_dev *pdev,
txgbe->wx = wx;
wx->priv = txgbe;
- err = txgbe_init_phy(txgbe);
+ err = txgbe_setup_misc_irq(txgbe);
if (err)
goto err_release_hw;
+ err = txgbe_init_phy(txgbe);
+ if (err)
+ goto err_free_misc_irq;
+
err = register_netdev(netdev);
if (err)
goto err_remove_phy;
@@ -781,6 +663,8 @@ static int txgbe_probe(struct pci_dev *pdev,
err_remove_phy:
txgbe_remove_phy(txgbe);
+err_free_misc_irq:
+ txgbe_free_misc_irq(txgbe);
err_release_hw:
wx_clear_interrupt_scheme(wx);
wx_control_hw(wx, false);
@@ -813,6 +697,7 @@ static void txgbe_remove(struct pci_dev *pdev)
unregister_netdev(netdev);
txgbe_remove_phy(txgbe);
+ txgbe_free_misc_irq(txgbe);
pci_release_selected_regions(pdev,
pci_select_bars(pdev, IORESOURCE_MEM));
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
index 1b84d495d14e..93295916b1d2 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
@@ -292,6 +292,21 @@ static int txgbe_phylink_init(struct txgbe *txgbe)
return 0;
}
+irqreturn_t txgbe_link_irq_handler(int irq, void *data)
+{
+ struct txgbe *txgbe = data;
+ struct wx *wx = txgbe->wx;
+ u32 status;
+ bool up;
+
+ status = rd32(wx, TXGBE_CFG_PORT_ST);
+ up = !!(status & TXGBE_CFG_PORT_ST_LINK_UP);
+
+ phylink_mac_change(wx->phylink, up);
+
+ return IRQ_HANDLED;
+}
+
static int txgbe_gpio_get(struct gpio_chip *chip, unsigned int offset)
{
struct wx *wx = gpiochip_get_data(chip);
@@ -437,7 +452,7 @@ static int txgbe_gpio_set_type(struct irq_data *d, unsigned int type)
}
static const struct irq_chip txgbe_gpio_irq_chip = {
- .name = "txgbe_gpio_irq",
+ .name = "txgbe-gpio-irq",
.irq_ack = txgbe_gpio_irq_ack,
.irq_mask = txgbe_gpio_irq_mask,
.irq_unmask = txgbe_gpio_irq_unmask,
@@ -446,29 +461,25 @@ static const struct irq_chip txgbe_gpio_irq_chip = {
GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
-static void txgbe_irq_handler(struct irq_desc *desc)
+irqreturn_t txgbe_gpio_irq_handler(int irq, void *data)
{
- struct irq_chip *chip = irq_desc_get_chip(desc);
- struct wx *wx = irq_desc_get_handler_data(desc);
- struct txgbe *txgbe = wx->priv;
+ struct txgbe *txgbe = data;
+ struct wx *wx = txgbe->wx;
irq_hw_number_t hwirq;
unsigned long gpioirq;
struct gpio_chip *gc;
unsigned long flags;
- u32 eicr;
-
- eicr = wx_misc_isb(wx, WX_ISB_MISC);
-
- chained_irq_enter(chip, desc);
gpioirq = rd32(wx, WX_GPIO_INTSTATUS);
gc = txgbe->gpio;
for_each_set_bit(hwirq, &gpioirq, gc->ngpio) {
int gpio = irq_find_mapping(gc->irq.domain, hwirq);
+ struct irq_data *d = irq_get_irq_data(gpio);
u32 irq_type = irq_get_trigger_type(gpio);
- generic_handle_domain_irq(gc->irq.domain, hwirq);
+ txgbe_gpio_irq_ack(d);
+ handle_nested_irq(gpio);
if ((irq_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) {
raw_spin_lock_irqsave(&wx->gpio_lock, flags);
@@ -477,17 +488,34 @@ static void txgbe_irq_handler(struct irq_desc *desc)
}
}
- chained_irq_exit(chip, desc);
+ return IRQ_HANDLED;
+}
+
+void txgbe_reinit_gpio_intr(struct wx *wx)
+{
+ struct txgbe *txgbe = wx->priv;
+ irq_hw_number_t hwirq;
+ unsigned long gpioirq;
+ struct gpio_chip *gc;
+ unsigned long flags;
+
+ /* for gpio interrupt pending before irq enable */
+ gpioirq = rd32(wx, WX_GPIO_INTSTATUS);
- if (eicr & (TXGBE_PX_MISC_ETH_LK | TXGBE_PX_MISC_ETH_LKDN |
- TXGBE_PX_MISC_ETH_AN)) {
- u32 reg = rd32(wx, TXGBE_CFG_PORT_ST);
+ gc = txgbe->gpio;
+ for_each_set_bit(hwirq, &gpioirq, gc->ngpio) {
+ int gpio = irq_find_mapping(gc->irq.domain, hwirq);
+ struct irq_data *d = irq_get_irq_data(gpio);
+ u32 irq_type = irq_get_trigger_type(gpio);
- phylink_mac_change(wx->phylink, !!(reg & TXGBE_CFG_PORT_ST_LINK_UP));
- }
+ txgbe_gpio_irq_ack(d);
- /* unmask interrupt */
- wx_intr_enable(wx, TXGBE_INTR_MISC);
+ if ((irq_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) {
+ raw_spin_lock_irqsave(&wx->gpio_lock, flags);
+ txgbe_toggle_trigger(gc, hwirq);
+ raw_spin_unlock_irqrestore(&wx->gpio_lock, flags);
+ }
+ }
}
static int txgbe_gpio_init(struct txgbe *txgbe)
@@ -524,19 +552,6 @@ static int txgbe_gpio_init(struct txgbe *txgbe)
girq = &gc->irq;
gpio_irq_chip_set_chip(girq, &txgbe_gpio_irq_chip);
- girq->parent_handler = txgbe_irq_handler;
- girq->parent_handler_data = wx;
- girq->num_parents = 1;
- girq->parents = devm_kcalloc(dev, girq->num_parents,
- sizeof(*girq->parents), GFP_KERNEL);
- if (!girq->parents)
- return -ENOMEM;
-
- /* now only suuported on MSI-X interrupt */
- if (!wx->msix_entry)
- return -EPERM;
-
- girq->parents[0] = wx->msix_entry->vector;
girq->default_type = IRQ_TYPE_NONE;
girq->handler = handle_bad_irq;
@@ -754,8 +769,6 @@ int txgbe_init_phy(struct txgbe *txgbe)
goto err_unregister_i2c;
}
- wx->msix_in_use = true;
-
return 0;
err_unregister_i2c:
@@ -788,5 +801,4 @@ void txgbe_remove_phy(struct txgbe *txgbe)
phylink_destroy(txgbe->wx->phylink);
xpcs_destroy(txgbe->xpcs);
software_node_unregister_node_group(txgbe->nodes.group);
- txgbe->wx->msix_in_use = false;
}
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h
index 1ab592124986..8a026d804fe2 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h
@@ -4,6 +4,9 @@
#ifndef _TXGBE_PHY_H_
#define _TXGBE_PHY_H_
+irqreturn_t txgbe_gpio_irq_handler(int irq, void *data);
+void txgbe_reinit_gpio_intr(struct wx *wx);
+irqreturn_t txgbe_link_irq_handler(int irq, void *data);
int txgbe_init_phy(struct txgbe *txgbe);
void txgbe_remove_phy(struct txgbe *txgbe);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index 270a6fd9ad0b..1b4ff50d5857 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -5,6 +5,7 @@
#define _TXGBE_TYPE_H_
#include <linux/property.h>
+#include <linux/irq.h>
/* Device IDs */
#define TXGBE_DEV_ID_SP1000 0x1001
@@ -169,15 +170,31 @@ struct txgbe_nodes {
const struct software_node *group[SWNODE_MAX + 1];
};
+enum txgbe_misc_irqs {
+ TXGBE_IRQ_GPIO = 0,
+ TXGBE_IRQ_LINK,
+ TXGBE_IRQ_MAX
+};
+
+struct txgbe_irq {
+ struct irq_chip chip;
+ struct irq_domain *domain;
+ int nirqs;
+ int irq;
+};
+
struct txgbe {
struct wx *wx;
struct txgbe_nodes nodes;
+ struct txgbe_irq misc;
struct dw_xpcs *xpcs;
struct platform_device *sfp_dev;
struct platform_device *i2c_dev;
struct clk_lookup *clock;
struct clk *clk;
struct gpio_chip *gpio;
+ unsigned int gpio_irq;
+ unsigned int link_irq;
};
#endif /* _TXGBE_TYPE_H_ */
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index 3318b50a5911..f165616f36fe 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -539,8 +539,7 @@ static int w5300_hw_probe(struct platform_device *pdev)
eth_hw_addr_random(ndev);
}
- mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- priv->base = devm_ioremap_resource(&pdev->dev, mem);
+ priv->base = devm_platform_get_and_ioremap_resource(pdev, 0, &mem);
if (IS_ERR(priv->base))
return PTR_ERR(priv->base);
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 765aa516aada..940452d0a4d2 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1114,8 +1114,7 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
ndev->irq = rc;
- res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
- lp->base_addr = devm_ioremap_resource(&ofdev->dev, res);
+ lp->base_addr = devm_platform_get_and_ioremap_resource(ofdev, 0, &res);
if (IS_ERR(lp->base_addr)) {
rc = PTR_ERR(lp->base_addr);
goto error;
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index 9f505cf02d96..e9bc38fd2025 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -1240,9 +1240,7 @@ do_start_xmit(struct sk_buff *skb, struct net_device *dev)
netif_stop_queue(dev);
SelectPage(0);
PutWord(XIRCREG0_TRS, (u_short)pktlen+2);
- freespace = GetWord(XIRCREG0_TSO);
- okay = freespace & 0x8000;
- freespace &= 0x7fff;
+ freespace = GetWord(XIRCREG0_TSO) & 0x7fff;
/* TRS doesn't work - (indeed it is eliminated with sil-rev 1) */
okay = pktlen +2 < freespace;
pr_debug("%s: avail. tx space=%u%s\n",
diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c
index 2b6a607ac0b7..a273362c9e70 100644
--- a/drivers/net/fddi/skfp/skfddi.c
+++ b/drivers/net/fddi/skfp/skfddi.c
@@ -153,6 +153,7 @@ static const struct pci_device_id skfddi_pci_tbl[] = {
{ } /* Terminating entry */
};
MODULE_DEVICE_TABLE(pci, skfddi_pci_tbl);
+MODULE_DESCRIPTION("SysKonnect FDDI PCI driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mirko Lindner <mlindner@syskonnect.de>");
diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c
index 704e949484d0..b9b5554ea862 100644
--- a/drivers/net/fjes/fjes_hw.c
+++ b/drivers/net/fjes/fjes_hw.c
@@ -221,21 +221,25 @@ static int fjes_hw_setup(struct fjes_hw *hw)
mem_size = FJES_DEV_REQ_BUF_SIZE(hw->max_epid);
hw->hw_info.req_buf = kzalloc(mem_size, GFP_KERNEL);
- if (!(hw->hw_info.req_buf))
- return -ENOMEM;
+ if (!(hw->hw_info.req_buf)) {
+ result = -ENOMEM;
+ goto free_ep_info;
+ }
hw->hw_info.req_buf_size = mem_size;
mem_size = FJES_DEV_RES_BUF_SIZE(hw->max_epid);
hw->hw_info.res_buf = kzalloc(mem_size, GFP_KERNEL);
- if (!(hw->hw_info.res_buf))
- return -ENOMEM;
+ if (!(hw->hw_info.res_buf)) {
+ result = -ENOMEM;
+ goto free_req_buf;
+ }
hw->hw_info.res_buf_size = mem_size;
result = fjes_hw_alloc_shared_status_region(hw);
if (result)
- return result;
+ goto free_res_buf;
hw->hw_info.buffer_share_bit = 0;
hw->hw_info.buffer_unshare_reserve_bit = 0;
@@ -246,11 +250,11 @@ static int fjes_hw_setup(struct fjes_hw *hw)
result = fjes_hw_alloc_epbuf(&buf_pair->tx);
if (result)
- return result;
+ goto free_epbuf;
result = fjes_hw_alloc_epbuf(&buf_pair->rx);
if (result)
- return result;
+ goto free_epbuf;
spin_lock_irqsave(&hw->rx_status_lock, flags);
fjes_hw_setup_epbuf(&buf_pair->tx, mac,
@@ -273,6 +277,25 @@ static int fjes_hw_setup(struct fjes_hw *hw)
fjes_hw_init_command_registers(hw, &param);
return 0;
+
+free_epbuf:
+ for (epidx = 0; epidx < hw->max_epid ; epidx++) {
+ if (epidx == hw->my_epid)
+ continue;
+ fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].tx);
+ fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].rx);
+ }
+ fjes_hw_free_shared_status_region(hw);
+free_res_buf:
+ kfree(hw->hw_info.res_buf);
+ hw->hw_info.res_buf = NULL;
+free_req_buf:
+ kfree(hw->hw_info.req_buf);
+ hw->hw_info.req_buf = NULL;
+free_ep_info:
+ kfree(hw->ep_shm_info);
+ hw->ep_shm_info = NULL;
+ return result;
}
static void fjes_hw_cleanup(struct fjes_hw *hw)
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 32c51c244153..2f6739fe78af 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -221,7 +221,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
struct genevehdr *gnvh = geneve_hdr(skb);
struct metadata_dst *tun_dst = NULL;
unsigned int len;
- int err = 0;
+ int nh, err = 0;
void *oiph;
if (ip_tunnel_collect_metadata() || gs->collect_md) {
@@ -272,9 +272,23 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
skb->pkt_type = PACKET_HOST;
}
- oiph = skb_network_header(skb);
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_reset_network_header(skb);
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(geneve->dev, rx_length_errors);
+ DEV_STATS_INC(geneve->dev, rx_errors);
+ goto drop;
+ }
+
+ /* Get the outer header. */
+ oiph = skb->head + nh;
+
if (geneve_get_sk_family(gs) == AF_INET)
err = IP_ECN_decapsulate(oiph, skb);
#if IS_ENABLED(CONFIG_IPV6)
@@ -319,22 +333,16 @@ static int geneve_init(struct net_device *dev)
struct geneve_dev *geneve = netdev_priv(dev);
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
err = gro_cells_init(&geneve->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL);
if (err) {
- free_percpu(dev->tstats);
gro_cells_destroy(&geneve->gro_cells);
return err;
}
+ netdev_lockdep_set_classes(dev);
return 0;
}
@@ -344,7 +352,6 @@ static void geneve_uninit(struct net_device *dev)
dst_cache_destroy(&geneve->cfg.info.dst_cache);
gro_cells_destroy(&geneve->gro_cells);
- free_percpu(dev->tstats);
}
/* Callback from net/ipv4/udp.c to receive packets */
@@ -507,7 +514,7 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk,
gh_len = geneve_hlen(gh);
hlen = off_gnv + gh_len;
- if (skb_gro_header_hard(skb, hlen)) {
+ if (!skb_gro_may_pull(skb, hlen)) {
gh = skb_gro_header_slow(skb, hlen, off_gnv);
if (unlikely(!gh))
goto out;
@@ -1121,7 +1128,6 @@ static const struct net_device_ops geneve_netdev_ops = {
.ndo_open = geneve_open,
.ndo_stop = geneve_stop,
.ndo_start_xmit = geneve_xmit,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_change_mtu = geneve_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
@@ -1141,7 +1147,7 @@ static const struct ethtool_ops geneve_ethtool_ops = {
};
/* Info for udev, that this is a virtual tunnel endpoint */
-static struct device_type geneve_type = {
+static const struct device_type geneve_type = {
.name = "geneve",
};
@@ -1188,6 +1194,7 @@ static void geneve_setup(struct net_device *dev)
dev->hw_features |= NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
/* MTU range: 68 - (something less than 65535) */
dev->min_mtu = ETH_MIN_MTU;
/* The max_mtu calculation does not take account of GENEVE
@@ -1900,29 +1907,26 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
}
}
-static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
+static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
- geneve_destroy_tunnels(net, &list);
-
- /* unregister the devices gathered above */
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ geneve_destroy_tunnels(net, dev_to_kill);
+}
- list_for_each_entry(net, net_list, exit_list) {
- const struct geneve_net *gn = net_generic(net, geneve_net_id);
+static void __net_exit geneve_exit_net(struct net *net)
+{
+ const struct geneve_net *gn = net_generic(net, geneve_net_id);
- WARN_ON_ONCE(!list_empty(&gn->sock_list));
- }
+ WARN_ON_ONCE(!list_empty(&gn->sock_list));
}
static struct pernet_operations geneve_net_ops = {
.init = geneve_init_net,
- .exit_batch = geneve_exit_batch_net,
+ .exit_batch_rtnl = geneve_exit_batch_rtnl,
+ .exit = geneve_exit_net,
.id = &geneve_net_id,
.size = sizeof(struct geneve_net),
};
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index b1919278e931..ba4704c2c640 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -711,25 +711,11 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
return ret;
}
-static int gtp_dev_init(struct net_device *dev)
-{
- struct gtp_dev *gtp = netdev_priv(dev);
-
- gtp->dev = dev;
-
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- return 0;
-}
-
static void gtp_dev_uninit(struct net_device *dev)
{
struct gtp_dev *gtp = netdev_priv(dev);
gtp_encap_disable(gtp);
- free_percpu(dev->tstats);
}
static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
@@ -942,10 +928,8 @@ tx_err:
}
static const struct net_device_ops gtp_netdev_ops = {
- .ndo_init = gtp_dev_init,
.ndo_uninit = gtp_dev_uninit,
.ndo_start_xmit = gtp_dev_xmit,
- .ndo_get_stats64 = dev_get_tstats64,
};
static const struct device_type gtp_type = {
@@ -957,6 +941,7 @@ static void gtp_link_setup(struct net_device *dev)
unsigned int max_gtp_header_len = sizeof(struct iphdr) +
sizeof(struct udphdr) +
sizeof(struct gtp0_header);
+ struct gtp_dev *gtp = netdev_priv(dev);
dev->netdev_ops = &gtp_netdev_ops;
dev->needs_free_netdev = true;
@@ -970,11 +955,13 @@ static void gtp_link_setup(struct net_device *dev)
dev->type = ARPHRD_NONE;
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->priv_flags |= IFF_NO_QUEUE;
dev->features |= NETIF_F_LLTX;
netif_keep_dst(dev);
dev->needed_headroom = LL_MAX_HEADER + max_gtp_header_len;
+ gtp->dev = dev;
}
static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
@@ -1876,23 +1863,23 @@ static int __net_init gtp_net_init(struct net *net)
return 0;
}
-static void __net_exit gtp_net_exit(struct net *net)
+static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- struct gtp_net *gn = net_generic(net, gtp_net_id);
- struct gtp_dev *gtp;
- LIST_HEAD(list);
+ struct net *net;
- rtnl_lock();
- list_for_each_entry(gtp, &gn->gtp_dev_list, list)
- gtp_dellink(gtp->dev, &list);
+ list_for_each_entry(net, net_list, exit_list) {
+ struct gtp_net *gn = net_generic(net, gtp_net_id);
+ struct gtp_dev *gtp;
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ list_for_each_entry(gtp, &gn->gtp_dev_list, list)
+ gtp_dellink(gtp->dev, dev_to_kill);
+ }
}
static struct pernet_operations gtp_net_ops = {
.init = gtp_net_init,
- .exit = gtp_net_exit,
+ .exit_batch_rtnl = gtp_net_exit_batch_rtnl,
.id = &gtp_net_id,
.size = sizeof(struct gtp_net),
};
@@ -1903,26 +1890,26 @@ static int __init gtp_init(void)
get_random_bytes(&gtp_h_initval, sizeof(gtp_h_initval));
- err = rtnl_link_register(&gtp_link_ops);
+ err = register_pernet_subsys(&gtp_net_ops);
if (err < 0)
goto error_out;
- err = genl_register_family(&gtp_genl_family);
+ err = rtnl_link_register(&gtp_link_ops);
if (err < 0)
- goto unreg_rtnl_link;
+ goto unreg_pernet_subsys;
- err = register_pernet_subsys(&gtp_net_ops);
+ err = genl_register_family(&gtp_genl_family);
if (err < 0)
- goto unreg_genl_family;
+ goto unreg_rtnl_link;
pr_info("GTP module loaded (pdp ctx size %zd bytes)\n",
sizeof(struct pdp_ctx));
return 0;
-unreg_genl_family:
- genl_unregister_family(&gtp_genl_family);
unreg_rtnl_link:
rtnl_link_unregister(&gtp_link_ops);
+unreg_pernet_subsys:
+ unregister_pernet_subsys(&gtp_net_ops);
error_out:
pr_err("error loading GTP module loaded\n");
return err;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 1dafa44155d0..a6fcbda64ecc 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -708,7 +708,10 @@ void netvsc_device_remove(struct hv_device *device)
/* Disable NAPI and disassociate its context from the device. */
for (i = 0; i < net_device->num_chn; i++) {
/* See also vmbus_reset_channel_cb(). */
- napi_disable(&net_device->chan_table[i].napi);
+ /* only disable enabled NAPI channel */
+ if (i < ndev->real_num_rx_queues)
+ napi_disable(&net_device->chan_table[i].napi);
+
netif_napi_del(&net_device->chan_table[i].napi);
}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 4406427d4617..11831a1c9762 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -42,9 +42,13 @@
#define LINKCHANGE_INT (2 * HZ)
#define VF_TAKEOVER_INT (HZ / 10)
+/* Macros to define the context of vf registration */
+#define VF_REG_IN_PROBE 1
+#define VF_REG_IN_NOTIFIER 2
+
static unsigned int ring_size __ro_after_init = 128;
module_param(ring_size, uint, 0444);
-MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
+MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)");
unsigned int netvsc_ring_bytes __ro_after_init;
static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
@@ -2185,7 +2189,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
}
static int netvsc_vf_join(struct net_device *vf_netdev,
- struct net_device *ndev)
+ struct net_device *ndev, int context)
{
struct net_device_context *ndev_ctx = netdev_priv(ndev);
int ret;
@@ -2208,7 +2212,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
goto upper_link_failed;
}
- schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+ /* If this registration is called from probe context vf_takeover
+ * is taken care of later in probe itself.
+ */
+ if (context == VF_REG_IN_NOTIFIER)
+ schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
@@ -2346,7 +2354,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev)
return NOTIFY_DONE;
}
-static int netvsc_register_vf(struct net_device *vf_netdev)
+static int netvsc_register_vf(struct net_device *vf_netdev, int context)
{
struct net_device_context *net_device_ctx;
struct netvsc_device *netvsc_dev;
@@ -2386,7 +2394,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
- if (netvsc_vf_join(vf_netdev, ndev) != 0)
+ if (netvsc_vf_join(vf_netdev, ndev, context) != 0)
return NOTIFY_DONE;
dev_hold(vf_netdev);
@@ -2484,10 +2492,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
return NOTIFY_OK;
}
+static int check_dev_is_matching_vf(struct net_device *event_ndev)
+{
+ /* Skip NetVSC interfaces */
+ if (event_ndev->netdev_ops == &device_ops)
+ return -ENODEV;
+
+ /* Avoid non-Ethernet type devices */
+ if (event_ndev->type != ARPHRD_ETHER)
+ return -ENODEV;
+
+ /* Avoid Vlan dev with same MAC registering as VF */
+ if (is_vlan_dev(event_ndev))
+ return -ENODEV;
+
+ /* Avoid Bonding master dev with same MAC registering as VF */
+ if (netif_is_bond_master(event_ndev))
+ return -ENODEV;
+
+ return 0;
+}
+
static int netvsc_probe(struct hv_device *dev,
const struct hv_vmbus_device_id *dev_id)
{
- struct net_device *net = NULL;
+ struct net_device *net = NULL, *vf_netdev;
struct net_device_context *net_device_ctx;
struct netvsc_device_info *device_info = NULL;
struct netvsc_device *nvdev;
@@ -2599,6 +2628,30 @@ static int netvsc_probe(struct hv_device *dev,
}
list_add(&net_device_ctx->list, &netvsc_dev_list);
+
+ /* When the hv_netvsc driver is unloaded and reloaded, the
+ * NET_DEVICE_REGISTER for the vf device is replayed before probe
+ * is complete. This is because register_netdevice_notifier() gets
+ * registered before vmbus_driver_register() so that callback func
+ * is set before probe and we don't miss events like NETDEV_POST_INIT
+ * So, in this section we try to register the matching vf device that
+ * is present as a netdevice, knowing that its register call is not
+ * processed in the netvsc_netdev_notifier(as probing is progress and
+ * get_netvsc_byslot fails).
+ */
+ for_each_netdev(dev_net(net), vf_netdev) {
+ ret = check_dev_is_matching_vf(vf_netdev);
+ if (ret != 0)
+ continue;
+
+ if (net != get_netvsc_byslot(vf_netdev))
+ continue;
+
+ netvsc_prepare_bonding(vf_netdev);
+ netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
+ __netvsc_vf_setup(net, vf_netdev);
+ break;
+ }
rtnl_unlock();
netvsc_devinfo_put(device_info);
@@ -2754,28 +2807,17 @@ static int netvsc_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ int ret = 0;
- /* Skip our own events */
- if (event_dev->netdev_ops == &device_ops)
- return NOTIFY_DONE;
-
- /* Avoid non-Ethernet type devices */
- if (event_dev->type != ARPHRD_ETHER)
- return NOTIFY_DONE;
-
- /* Avoid Vlan dev with same MAC registering as VF */
- if (is_vlan_dev(event_dev))
- return NOTIFY_DONE;
-
- /* Avoid Bonding master dev with same MAC registering as VF */
- if (netif_is_bond_master(event_dev))
+ ret = check_dev_is_matching_vf(event_dev);
+ if (ret != 0)
return NOTIFY_DONE;
switch (event) {
case NETDEV_POST_INIT:
return netvsc_prepare_bonding(event_dev);
case NETDEV_REGISTER:
- return netvsc_register_vf(event_dev);
+ return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER);
case NETDEV_UNREGISTER:
return netvsc_unregister_vf(event_dev);
case NETDEV_UP:
@@ -2807,7 +2849,7 @@ static int __init netvsc_drv_init(void)
pr_info("Increased ring_size to %u (min allowed)\n",
ring_size);
}
- netvsc_ring_bytes = ring_size * PAGE_SIZE;
+ netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096);
register_netdevice_notifier(&netvsc_netdev_notifier);
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 164c7f605af5..f632b0cfd5ae 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -11,17 +11,16 @@
*/
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/gpio/consumer.h>
#include <linux/hrtimer.h>
#include <linux/jiffies.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
-#include <linux/gpio.h>
#include <linux/delay.h>
#include <linux/property.h>
#include <linux/spi/spi.h>
#include <linux/regmap.h>
#include <linux/skbuff.h>
-#include <linux/of_gpio.h>
#include <linux/ieee802154.h>
#include <net/mac802154.h>
@@ -316,7 +315,7 @@ static const struct regmap_config at86rf230_regmap_spi_config = {
.val_bits = 8,
.write_flag_mask = CMD_REG | CMD_WRITE,
.read_flag_mask = CMD_REG,
- .cache_type = REGCACHE_RBTREE,
+ .cache_type = REGCACHE_MAPLE,
.max_register = AT86RF2XX_NUMREGS,
.writeable_reg = at86rf230_reg_writeable,
.readable_reg = at86rf230_reg_readable,
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index 4ec0dab38872..f102f26cb0e3 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -2857,19 +2857,13 @@ static int ca8210_interrupt_init(struct spi_device *spi)
*/
static int ca8210_dev_com_init(struct ca8210_priv *priv)
{
- priv->mlme_workqueue = alloc_ordered_workqueue(
- "MLME work queue",
- WQ_UNBOUND
- );
+ priv->mlme_workqueue = alloc_ordered_workqueue("MLME work queue", 0);
if (!priv->mlme_workqueue) {
dev_crit(&priv->spi->dev, "alloc of mlme_workqueue failed!\n");
return -ENOMEM;
}
- priv->irq_workqueue = alloc_ordered_workqueue(
- "ca8210 irq worker",
- WQ_UNBOUND
- );
+ priv->irq_workqueue = alloc_ordered_workqueue("ca8210 irq worker", 0);
if (!priv->irq_workqueue) {
dev_crit(&priv->spi->dev, "alloc of irq_workqueue failed!\n");
destroy_workqueue(priv->mlme_workqueue);
diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c
index 35e55f198e05..2930141d7dd2 100644
--- a/drivers/net/ieee802154/fakelb.c
+++ b/drivers/net/ieee802154/fakelb.c
@@ -259,4 +259,5 @@ static __exit void fake_remove_module(void)
module_init(fakelb_init_module);
module_exit(fake_remove_module);
+MODULE_DESCRIPTION("IEEE 802.15.4 loopback driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
index 87abe3b46316..433fb5839203 100644
--- a/drivers/net/ieee802154/mcr20a.c
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -12,7 +12,6 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/skbuff.h>
-#include <linux/of_gpio.h>
#include <linux/regmap.h>
#include <linux/ieee802154.h>
#include <linux/debugfs.h>
@@ -251,7 +250,7 @@ static const struct regmap_config mcr20a_dar_regmap = {
.val_bits = 8,
.write_flag_mask = REGISTER_ACCESS | REGISTER_WRITE,
.read_flag_mask = REGISTER_ACCESS | REGISTER_READ,
- .cache_type = REGCACHE_RBTREE,
+ .cache_type = REGCACHE_MAPLE,
.writeable_reg = mcr20a_dar_writeable,
.readable_reg = mcr20a_dar_readable,
.volatile_reg = mcr20a_dar_volatile,
@@ -387,7 +386,7 @@ static const struct regmap_config mcr20a_iar_regmap = {
.val_bits = 8,
.write_flag_mask = REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX,
.read_flag_mask = REGISTER_ACCESS | REGISTER_READ | IAR_INDEX,
- .cache_type = REGCACHE_RBTREE,
+ .cache_type = REGCACHE_MAPLE,
.writeable_reg = mcr20a_iar_writeable,
.readable_reg = mcr20a_iar_readable,
.volatile_reg = mcr20a_iar_volatile,
diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index ee4cfbf2c5cc..d3f42efc5d1a 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -388,7 +388,7 @@ static const struct regmap_config mrf24j40_short_regmap = {
.pad_bits = 1,
.write_flag_mask = MRF24J40_SHORT_WRITE,
.read_flag_mask = MRF24J40_SHORT_READ,
- .cache_type = REGCACHE_RBTREE,
+ .cache_type = REGCACHE_MAPLE,
.max_register = MRF24J40_SHORT_NUMREGS,
.writeable_reg = mrf24j40_short_reg_writeable,
.readable_reg = mrf24j40_short_reg_readable,
@@ -495,7 +495,7 @@ static const struct regmap_config mrf24j40_long_regmap = {
.pad_bits = 5,
.write_flag_mask = MRF24J40_LONG_ACCESS,
.read_flag_mask = MRF24J40_LONG_ACCESS,
- .cache_type = REGCACHE_RBTREE,
+ .cache_type = REGCACHE_MAPLE,
.max_register = MRF24J40_LONG_NUMREGS,
.writeable_reg = mrf24j40_long_reg_writeable,
.readable_reg = mrf24j40_long_reg_readable,
diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h
index f3355e040a9e..334cd62cf286 100644
--- a/drivers/net/ipa/ipa.h
+++ b/drivers/net/ipa/ipa.h
@@ -21,7 +21,6 @@
struct clk;
struct icc_path;
struct net_device;
-struct platform_device;
struct ipa_power;
struct ipa_smp2p;
@@ -31,7 +30,7 @@ struct ipa_interrupt;
* struct ipa - IPA information
* @gsi: Embedded GSI structure
* @version: IPA hardware version
- * @pdev: Platform device
+ * @dev: IPA device pointer
* @completion: Used to signal pipeline clear transfer complete
* @nb: Notifier block used for remoteproc SSR
* @notifier: Remoteproc SSR notifier
@@ -79,7 +78,7 @@ struct ipa_interrupt;
struct ipa {
struct gsi gsi;
enum ipa_version version;
- struct platform_device *pdev;
+ struct device *dev;
struct completion completion;
struct notifier_block nb;
void *notifier;
diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
index f1419fbd776c..39219963dbb3 100644
--- a/drivers/net/ipa/ipa_cmd.c
+++ b/drivers/net/ipa/ipa_cmd.c
@@ -174,7 +174,7 @@ bool ipa_cmd_table_init_valid(struct ipa *ipa, const struct ipa_mem *mem,
u32 offset_max = field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK);
u32 size_max = field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK);
const char *table = route ? "route" : "filter";
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
u32 size;
size = route ? ipa->route_count : ipa->filter_count + 1;
@@ -204,7 +204,7 @@ bool ipa_cmd_table_init_valid(struct ipa *ipa, const struct ipa_mem *mem,
/* Validate the memory region that holds headers */
static bool ipa_cmd_header_init_local_valid(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
const struct ipa_mem *mem;
u32 offset_max;
u32 size_max;
@@ -256,7 +256,7 @@ static bool ipa_cmd_register_write_offset_valid(struct ipa *ipa,
const char *name, u32 offset)
{
struct ipa_cmd_register_write *payload;
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
u32 offset_max;
u32 bit_count;
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index afa1d56d9095..dd490941615e 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -233,8 +233,8 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
const struct ipa_gsi_endpoint_data *data)
{
const struct ipa_gsi_endpoint_data *other_data;
- struct device *dev = &ipa->pdev->dev;
enum ipa_endpoint_name other_name;
+ struct device *dev = ipa->dev;
if (ipa_gsi_endpoint_data_empty(data))
return true;
@@ -388,7 +388,7 @@ static u32 ipa_endpoint_max(struct ipa *ipa, u32 count,
const struct ipa_gsi_endpoint_data *data)
{
const struct ipa_gsi_endpoint_data *dp = data;
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
enum ipa_endpoint_name name;
u32 max;
@@ -606,7 +606,7 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa)
count = ipa->modem_tx_count + ipa_cmd_pipeline_clear_count();
trans = ipa_cmd_trans_alloc(ipa, count);
if (!trans) {
- dev_err(&ipa->pdev->dev,
+ dev_err(ipa->dev,
"no transaction to reset modem exception endpoints\n");
return -EBUSY;
}
@@ -1498,8 +1498,7 @@ ipa_endpoint_status_tag_valid(struct ipa_endpoint *endpoint, const void *data)
if (endpoint_id == command_endpoint->endpoint_id) {
complete(&ipa->completion);
} else {
- dev_err(&ipa->pdev->dev,
- "unexpected tagged packet from endpoint %u\n",
+ dev_err(ipa->dev, "unexpected tagged packet from endpoint %u\n",
endpoint_id);
}
@@ -1536,6 +1535,7 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint,
void *data = page_address(page) + NET_SKB_PAD;
u32 unused = buffer_size - total_len;
struct ipa *ipa = endpoint->ipa;
+ struct device *dev = ipa->dev;
u32 resid = total_len;
while (resid) {
@@ -1544,7 +1544,7 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint,
u32 len;
if (resid < IPA_STATUS_SIZE) {
- dev_err(&endpoint->ipa->pdev->dev,
+ dev_err(dev,
"short message (%u bytes < %zu byte status)\n",
resid, IPA_STATUS_SIZE);
break;
@@ -1666,8 +1666,8 @@ void ipa_endpoint_default_route_clear(struct ipa *ipa)
*/
static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint)
{
- struct device *dev = &endpoint->ipa->pdev->dev;
struct ipa *ipa = endpoint->ipa;
+ struct device *dev = ipa->dev;
struct gsi *gsi = &ipa->gsi;
bool suspended = false;
dma_addr_t addr;
@@ -1769,7 +1769,7 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint)
gsi_channel_reset(&ipa->gsi, channel_id, true);
if (ret)
- dev_err(&ipa->pdev->dev,
+ dev_err(ipa->dev,
"error %d resetting channel %u for endpoint %u\n",
ret, endpoint->channel_id, endpoint->endpoint_id);
}
@@ -1817,7 +1817,7 @@ int ipa_endpoint_enable_one(struct ipa_endpoint *endpoint)
ret = gsi_channel_start(gsi, endpoint->channel_id);
if (ret) {
- dev_err(&ipa->pdev->dev,
+ dev_err(ipa->dev,
"error %d starting %cX channel %u for endpoint %u\n",
ret, endpoint->toward_ipa ? 'T' : 'R',
endpoint->channel_id, endpoint_id);
@@ -1854,14 +1854,13 @@ void ipa_endpoint_disable_one(struct ipa_endpoint *endpoint)
/* Note that if stop fails, the channel's state is not well-defined */
ret = gsi_channel_stop(gsi, endpoint->channel_id);
if (ret)
- dev_err(&ipa->pdev->dev,
- "error %d attempting to stop endpoint %u\n", ret,
- endpoint_id);
+ dev_err(ipa->dev, "error %d attempting to stop endpoint %u\n",
+ ret, endpoint_id);
}
void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
{
- struct device *dev = &endpoint->ipa->pdev->dev;
+ struct device *dev = endpoint->ipa->dev;
struct gsi *gsi = &endpoint->ipa->gsi;
int ret;
@@ -1881,7 +1880,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
{
- struct device *dev = &endpoint->ipa->pdev->dev;
+ struct device *dev = endpoint->ipa->dev;
struct gsi *gsi = &endpoint->ipa->gsi;
int ret;
@@ -1983,7 +1982,7 @@ void ipa_endpoint_deconfig(struct ipa *ipa)
int ipa_endpoint_config(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
const struct reg *reg;
u32 endpoint_id;
u32 hw_limit;
diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c
index 4bc05948f772..c3e8784d51d9 100644
--- a/drivers/net/ipa/ipa_interrupt.c
+++ b/drivers/net/ipa/ipa_interrupt.c
@@ -19,6 +19,7 @@
* time only these three are supported.
*/
+#include <linux/platform_device.h>
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/pm_runtime.h>
@@ -43,6 +44,30 @@ struct ipa_interrupt {
u32 enabled;
};
+/* Clear the suspend interrupt for all endpoints that signaled it */
+static void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt)
+{
+ struct ipa *ipa = interrupt->ipa;
+ u32 unit_count;
+ u32 unit;
+
+ unit_count = DIV_ROUND_UP(ipa->endpoint_count, 32);
+ for (unit = 0; unit < unit_count; unit++) {
+ const struct reg *reg;
+ u32 val;
+
+ reg = ipa_reg(ipa, IRQ_SUSPEND_INFO);
+ val = ioread32(ipa->reg_virt + reg_n_offset(reg, unit));
+
+ /* SUSPEND interrupt status isn't cleared on IPA version 3.0 */
+ if (!val || ipa->version == IPA_VERSION_3_0)
+ continue;
+
+ reg = ipa_reg(ipa, IRQ_SUSPEND_CLR);
+ iowrite32(val, ipa->reg_virt + reg_n_offset(reg, unit));
+ }
+}
+
/* Process a particular interrupt type that has been received */
static void ipa_interrupt_process(struct ipa_interrupt *interrupt, u32 irq_id)
{
@@ -70,7 +95,7 @@ static void ipa_interrupt_process(struct ipa_interrupt *interrupt, u32 irq_id)
* caused the interrupt, so defer clearing until after
* the handler has been called.
*/
- ipa_power_suspend_handler(ipa, irq_id);
+ ipa_interrupt_suspend_clear_all(interrupt);
fallthrough;
default: /* Silently ignore (and clear) any other condition */
@@ -85,14 +110,13 @@ static irqreturn_t ipa_isr_thread(int irq, void *dev_id)
struct ipa_interrupt *interrupt = dev_id;
struct ipa *ipa = interrupt->ipa;
u32 enabled = interrupt->enabled;
+ struct device *dev = ipa->dev;
const struct reg *reg;
- struct device *dev;
u32 pending;
u32 offset;
u32 mask;
int ret;
- dev = &ipa->pdev->dev;
ret = pm_runtime_get_sync(dev);
if (WARN_ON(ret < 0))
goto out_power_put;
@@ -205,30 +229,6 @@ ipa_interrupt_suspend_disable(struct ipa_interrupt *interrupt, u32 endpoint_id)
ipa_interrupt_suspend_control(interrupt, endpoint_id, false);
}
-/* Clear the suspend interrupt for all endpoints that signaled it */
-void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt)
-{
- struct ipa *ipa = interrupt->ipa;
- u32 unit_count;
- u32 unit;
-
- unit_count = roundup(ipa->endpoint_count, 32);
- for (unit = 0; unit < unit_count; unit++) {
- const struct reg *reg;
- u32 val;
-
- reg = ipa_reg(ipa, IRQ_SUSPEND_INFO);
- val = ioread32(ipa->reg_virt + reg_n_offset(reg, unit));
-
- /* SUSPEND interrupt status isn't cleared on IPA version 3.0 */
- if (ipa->version == IPA_VERSION_3_0)
- continue;
-
- reg = ipa_reg(ipa, IRQ_SUSPEND_CLR);
- iowrite32(val, ipa->reg_virt + reg_n_offset(reg, unit));
- }
-}
-
/* Simulate arrival of an IPA TX_SUSPEND interrupt */
void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt)
{
@@ -236,29 +236,17 @@ void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt)
}
/* Configure the IPA interrupt framework */
-struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa)
+int ipa_interrupt_config(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
- struct ipa_interrupt *interrupt;
+ struct ipa_interrupt *interrupt = ipa->interrupt;
+ unsigned int irq = interrupt->irq;
+ struct device *dev = ipa->dev;
const struct reg *reg;
- unsigned int irq;
int ret;
- ret = platform_get_irq_byname(ipa->pdev, "ipa");
- if (ret <= 0) {
- dev_err(dev, "DT error %d getting \"ipa\" IRQ property\n",
- ret);
- return ERR_PTR(ret ? : -EINVAL);
- }
- irq = ret;
-
- interrupt = kzalloc(sizeof(*interrupt), GFP_KERNEL);
- if (!interrupt)
- return ERR_PTR(-ENOMEM);
interrupt->ipa = ipa;
- interrupt->irq = irq;
- /* Start with all IPA interrupts disabled */
+ /* Disable all IPA interrupt types */
reg = ipa_reg(ipa, IPA_IRQ_EN);
iowrite32(0, ipa->reg_virt + reg_offset(reg));
@@ -271,26 +259,59 @@ struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa)
ret = dev_pm_set_wake_irq(dev, irq);
if (ret) {
- dev_err(dev, "error %d registering \"ipa\" IRQ as wakeirq\n", ret);
+ dev_err(dev, "error %d registering \"ipa\" IRQ as wakeirq\n",
+ ret);
goto err_free_irq;
}
- return interrupt;
+ ipa->interrupt = interrupt;
+
+ return 0;
err_free_irq:
free_irq(interrupt->irq, interrupt);
err_kfree:
kfree(interrupt);
- return ERR_PTR(ret);
+ return ret;
}
/* Inverse of ipa_interrupt_config() */
-void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt)
+void ipa_interrupt_deconfig(struct ipa *ipa)
{
- struct device *dev = &interrupt->ipa->pdev->dev;
+ struct ipa_interrupt *interrupt = ipa->interrupt;
+ struct device *dev = ipa->dev;
+
+ ipa->interrupt = NULL;
dev_pm_clear_wake_irq(dev);
free_irq(interrupt->irq, interrupt);
+}
+
+/* Initialize the IPA interrupt structure */
+struct ipa_interrupt *ipa_interrupt_init(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct ipa_interrupt *interrupt;
+ int irq;
+
+ irq = platform_get_irq_byname(pdev, "ipa");
+ if (irq <= 0) {
+ dev_err(dev, "DT error %d getting \"ipa\" IRQ property\n", irq);
+
+ return ERR_PTR(irq ? : -EINVAL);
+ }
+
+ interrupt = kzalloc(sizeof(*interrupt), GFP_KERNEL);
+ if (!interrupt)
+ return ERR_PTR(-ENOMEM);
+ interrupt->irq = irq;
+
+ return interrupt;
+}
+
+/* Inverse of ipa_interrupt_init() */
+void ipa_interrupt_exit(struct ipa_interrupt *interrupt)
+{
kfree(interrupt);
}
diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h
index 12e3e798ccb3..f3f4f4330a59 100644
--- a/drivers/net/ipa/ipa_interrupt.h
+++ b/drivers/net/ipa/ipa_interrupt.h
@@ -35,14 +35,6 @@ void ipa_interrupt_suspend_disable(struct ipa_interrupt *interrupt,
u32 endpoint_id);
/**
- * ipa_interrupt_suspend_clear_all - clear all suspend interrupts
- * @interrupt: IPA interrupt structure
- *
- * Clear the TX_SUSPEND interrupt for all endpoints that signaled it.
- */
-void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt);
-
-/**
* ipa_interrupt_simulate_suspend() - Simulate TX_SUSPEND IPA interrupt
* @interrupt: IPA interrupt structure
*
@@ -84,17 +76,31 @@ void ipa_interrupt_irq_enable(struct ipa *ipa);
void ipa_interrupt_irq_disable(struct ipa *ipa);
/**
- * ipa_interrupt_config() - Configure the IPA interrupt framework
+ * ipa_interrupt_config() - Configure IPA interrupts
* @ipa: IPA pointer
*
- * Return: Pointer to IPA SMP2P info, or a pointer-coded error
+ * Return: 0 if successful, or a negative error code
*/
-struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa);
+int ipa_interrupt_config(struct ipa *ipa);
/**
* ipa_interrupt_deconfig() - Inverse of ipa_interrupt_config()
+ * @ipa: IPA pointer
+ */
+void ipa_interrupt_deconfig(struct ipa *ipa);
+
+/**
+ * ipa_interrupt_init() - Initialize the IPA interrupt structure
+ * @pdev: IPA platform device pointer
+ *
+ * Return: Pointer to an IPA interrupt structure, or a pointer-coded error
+ */
+struct ipa_interrupt *ipa_interrupt_init(struct platform_device *pdev);
+
+/**
+ * ipa_interrupt_exit() - Inverse of ipa_interrupt_init()
* @interrupt: IPA interrupt structure
*/
-void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt);
+void ipa_interrupt_exit(struct ipa_interrupt *interrupt);
#endif /* _IPA_INTERRUPT_H_ */
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 00475fd7a205..57b241417e8c 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -7,7 +7,6 @@
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/bitfield.h>
-#include <linux/device.h>
#include <linux/bug.h>
#include <linux/io.h>
#include <linux/firmware.h>
@@ -114,7 +113,7 @@ int ipa_setup(struct ipa *ipa)
{
struct ipa_endpoint *exception_endpoint;
struct ipa_endpoint *command_endpoint;
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
int ret;
ret = gsi_setup(&ipa->gsi);
@@ -542,12 +541,9 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data)
if (ret)
goto err_hardware_deconfig;
- ipa->interrupt = ipa_interrupt_config(ipa);
- if (IS_ERR(ipa->interrupt)) {
- ret = PTR_ERR(ipa->interrupt);
- ipa->interrupt = NULL;
+ ret = ipa_interrupt_config(ipa);
+ if (ret)
goto err_mem_deconfig;
- }
ipa_uc_config(ipa);
@@ -572,8 +568,7 @@ err_endpoint_deconfig:
ipa_endpoint_deconfig(ipa);
err_uc_deconfig:
ipa_uc_deconfig(ipa);
- ipa_interrupt_deconfig(ipa->interrupt);
- ipa->interrupt = NULL;
+ ipa_interrupt_deconfig(ipa);
err_mem_deconfig:
ipa_mem_deconfig(ipa);
err_hardware_deconfig:
@@ -591,8 +586,7 @@ static void ipa_deconfig(struct ipa *ipa)
ipa_modem_deconfig(ipa);
ipa_endpoint_deconfig(ipa);
ipa_uc_deconfig(ipa);
- ipa_interrupt_deconfig(ipa->interrupt);
- ipa->interrupt = NULL;
+ ipa_interrupt_deconfig(ipa);
ipa_mem_deconfig(ipa);
ipa_hardware_deconfig(ipa);
}
@@ -808,6 +802,7 @@ out_self:
static int ipa_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
+ struct ipa_interrupt *interrupt;
enum ipa_firmware_loader loader;
const struct ipa_data *data;
struct ipa_power *power;
@@ -839,12 +834,21 @@ static int ipa_probe(struct platform_device *pdev)
if (loader == IPA_LOADER_DEFER)
return -EPROBE_DEFER;
- /* The clock and interconnects might not be ready when we're
- * probed, so might return -EPROBE_DEFER.
+ /* The IPA interrupt might not be ready when we're probed, so this
+ * might return -EPROBE_DEFER.
+ */
+ interrupt = ipa_interrupt_init(pdev);
+ if (IS_ERR(interrupt))
+ return PTR_ERR(interrupt);
+
+ /* The clock and interconnects might not be ready when we're probed,
+ * so this might return -EPROBE_DEFER.
*/
power = ipa_power_init(dev, data->power_data);
- if (IS_ERR(power))
- return PTR_ERR(power);
+ if (IS_ERR(power)) {
+ ret = PTR_ERR(power);
+ goto err_interrupt_exit;
+ }
/* No more EPROBE_DEFER. Allocate and initialize the IPA structure */
ipa = kzalloc(sizeof(*ipa), GFP_KERNEL);
@@ -853,18 +857,19 @@ static int ipa_probe(struct platform_device *pdev)
goto err_power_exit;
}
- ipa->pdev = pdev;
+ ipa->dev = dev;
dev_set_drvdata(dev, ipa);
+ ipa->interrupt = interrupt;
ipa->power = power;
ipa->version = data->version;
ipa->modem_route_count = data->modem_route_count;
init_completion(&ipa->completion);
- ret = ipa_reg_init(ipa);
+ ret = ipa_reg_init(ipa, pdev);
if (ret)
goto err_kfree_ipa;
- ret = ipa_mem_init(ipa, data->mem_data);
+ ret = ipa_mem_init(ipa, pdev, data->mem_data);
if (ret)
goto err_reg_exit;
@@ -882,7 +887,7 @@ static int ipa_probe(struct platform_device *pdev)
if (ret)
goto err_endpoint_exit;
- ret = ipa_smp2p_init(ipa, loader == IPA_LOADER_MODEM);
+ ret = ipa_smp2p_init(ipa, pdev, loader == IPA_LOADER_MODEM);
if (ret)
goto err_table_exit;
@@ -939,17 +944,27 @@ err_kfree_ipa:
kfree(ipa);
err_power_exit:
ipa_power_exit(power);
+err_interrupt_exit:
+ ipa_interrupt_exit(interrupt);
return ret;
}
static void ipa_remove(struct platform_device *pdev)
{
- struct ipa *ipa = dev_get_drvdata(&pdev->dev);
- struct ipa_power *power = ipa->power;
- struct device *dev = &pdev->dev;
+ struct ipa_interrupt *interrupt;
+ struct ipa_power *power;
+ struct device *dev;
+ struct ipa *ipa;
int ret;
+ ipa = dev_get_drvdata(&pdev->dev);
+ dev = ipa->dev;
+ WARN_ON(dev != &pdev->dev);
+
+ power = ipa->power;
+ interrupt = ipa->interrupt;
+
/* Prevent the modem from triggering a call to ipa_setup(). This
* also ensures a modem-initiated setup that's underway completes.
*/
@@ -991,6 +1006,7 @@ out_power_put:
ipa_reg_exit(ipa);
kfree(ipa);
ipa_power_exit(power);
+ ipa_interrupt_exit(interrupt);
dev_info(dev, "IPA driver removed");
}
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 694960537ecd..709f061ede61 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -9,6 +9,7 @@
#include <linux/bug.h>
#include <linux/dma-mapping.h>
#include <linux/iommu.h>
+#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/soc/qcom/smem.h>
@@ -75,9 +76,9 @@ ipa_mem_zero_region_add(struct gsi_trans *trans, enum ipa_mem_id mem_id)
int ipa_mem_setup(struct ipa *ipa)
{
dma_addr_t addr = ipa->zero_addr;
- const struct reg *reg;
const struct ipa_mem *mem;
struct gsi_trans *trans;
+ const struct reg *reg;
u32 offset;
u16 size;
u32 val;
@@ -87,7 +88,7 @@ int ipa_mem_setup(struct ipa *ipa)
*/
trans = ipa_cmd_trans_alloc(ipa, 4);
if (!trans) {
- dev_err(&ipa->pdev->dev, "no transaction for memory setup\n");
+ dev_err(ipa->dev, "no transaction for memory setup\n");
return -EBUSY;
}
@@ -217,8 +218,8 @@ static bool ipa_mem_id_required(struct ipa *ipa, enum ipa_mem_id mem_id)
static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem)
{
- struct device *dev = &ipa->pdev->dev;
enum ipa_mem_id mem_id = mem->id;
+ struct device *dev = ipa->dev;
u16 size_multiple;
/* Make sure the memory region is valid for this version of IPA */
@@ -254,7 +255,7 @@ static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem)
static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
{
DECLARE_BITMAP(regions, IPA_MEM_COUNT) = { };
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
enum ipa_mem_id mem_id;
u32 i;
@@ -290,7 +291,7 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
/* Do all memory regions fit within the IPA local memory? */
static bool ipa_mem_size_valid(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
u32 limit = ipa->mem_size;
u32 i;
@@ -317,7 +318,7 @@ static bool ipa_mem_size_valid(struct ipa *ipa)
*/
int ipa_mem_config(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
const struct ipa_mem *mem;
const struct reg *reg;
dma_addr_t addr;
@@ -393,7 +394,7 @@ err_dma_free:
/* Inverse of ipa_mem_config() */
void ipa_mem_deconfig(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
dma_free_coherent(dev, ipa->zero_size, ipa->zero_virt, ipa->zero_addr);
ipa->zero_size = 0;
@@ -420,8 +421,7 @@ int ipa_mem_zero_modem(struct ipa *ipa)
*/
trans = ipa_cmd_trans_alloc(ipa, 3);
if (!trans) {
- dev_err(&ipa->pdev->dev,
- "no transaction to zero modem memory\n");
+ dev_err(ipa->dev, "no transaction to zero modem memory\n");
return -EBUSY;
}
@@ -452,7 +452,7 @@ int ipa_mem_zero_modem(struct ipa *ipa)
*/
static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
struct iommu_domain *domain;
unsigned long iova;
phys_addr_t phys;
@@ -485,13 +485,12 @@ static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size)
static void ipa_imem_exit(struct ipa *ipa)
{
+ struct device *dev = ipa->dev;
struct iommu_domain *domain;
- struct device *dev;
if (!ipa->imem_size)
return;
- dev = &ipa->pdev->dev;
domain = iommu_get_domain_for_dev(dev);
if (domain) {
size_t size;
@@ -527,7 +526,7 @@ static void ipa_imem_exit(struct ipa *ipa)
*/
static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
struct iommu_domain *domain;
unsigned long iova;
phys_addr_t phys;
@@ -594,7 +593,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
static void ipa_smem_exit(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
struct iommu_domain *domain;
domain = iommu_get_domain_for_dev(dev);
@@ -615,9 +614,10 @@ static void ipa_smem_exit(struct ipa *ipa)
}
/* Perform memory region-related initialization */
-int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
+int ipa_mem_init(struct ipa *ipa, struct platform_device *pdev,
+ const struct ipa_mem_data *mem_data)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = &pdev->dev;
struct resource *res;
int ret;
@@ -634,14 +634,13 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
if (!ipa_table_mem_valid(ipa, true))
return -EINVAL;
- ret = dma_set_mask_and_coherent(&ipa->pdev->dev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret) {
dev_err(dev, "error %d setting DMA mask\n", ret);
return ret;
}
- res = platform_get_resource_byname(ipa->pdev, IORESOURCE_MEM,
- "ipa-shared");
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ipa-shared");
if (!res) {
dev_err(dev,
"DT error getting \"ipa-shared\" memory property\n");
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index 868e9c20e8c4..28aad00a151d 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -6,6 +6,8 @@
#ifndef _IPA_MEM_H_
#define _IPA_MEM_H_
+struct platform_device;
+
struct ipa;
struct ipa_mem_data;
@@ -100,7 +102,8 @@ int ipa_mem_setup(struct ipa *ipa); /* No ipa_mem_teardown() needed */
int ipa_mem_zero_modem(struct ipa *ipa);
-int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data);
+int ipa_mem_init(struct ipa *ipa, struct platform_device *pdev,
+ const struct ipa_mem_data *mem_data);
void ipa_mem_exit(struct ipa *ipa);
#endif /* _IPA_MEM_H_ */
diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c
index 423422a2a445..c27ca3f27f7d 100644
--- a/drivers/net/ipa/ipa_modem.c
+++ b/drivers/net/ipa/ipa_modem.c
@@ -39,10 +39,14 @@ enum ipa_modem_state {
/**
* struct ipa_priv - IPA network device private data
* @ipa: IPA pointer
+ * @tx: Transmit endpoint pointer
+ * @rx: Receive endpoint pointer
* @work: Work structure used to wake the modem netdev TX queue
*/
struct ipa_priv {
struct ipa *ipa;
+ struct ipa_endpoint *tx;
+ struct ipa_endpoint *rx;
struct work_struct work;
};
@@ -54,16 +58,16 @@ static int ipa_open(struct net_device *netdev)
struct device *dev;
int ret;
- dev = &ipa->pdev->dev;
+ dev = ipa->dev;
ret = pm_runtime_get_sync(dev);
if (ret < 0)
goto err_power_put;
- ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
+ ret = ipa_endpoint_enable_one(priv->tx);
if (ret)
goto err_power_put;
- ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
+ ret = ipa_endpoint_enable_one(priv->rx);
if (ret)
goto err_disable_tx;
@@ -75,7 +79,7 @@ static int ipa_open(struct net_device *netdev)
return 0;
err_disable_tx:
- ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
+ ipa_endpoint_disable_one(priv->tx);
err_power_put:
pm_runtime_put_noidle(dev);
@@ -90,15 +94,15 @@ static int ipa_stop(struct net_device *netdev)
struct device *dev;
int ret;
- dev = &ipa->pdev->dev;
+ dev = ipa->dev;
ret = pm_runtime_get_sync(dev);
if (ret < 0)
goto out_power_put;
netif_stop_queue(netdev);
- ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
- ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
+ ipa_endpoint_disable_one(priv->rx);
+ ipa_endpoint_disable_one(priv->tx);
out_power_put:
pm_runtime_mark_last_busy(dev);
(void)pm_runtime_put_autosuspend(dev);
@@ -106,13 +110,16 @@ out_power_put:
return 0;
}
-/** ipa_start_xmit() - Transmits an skb.
- * @skb: skb to be transmitted
- * @dev: network device
+/** ipa_start_xmit() - Transmit an skb
+ * @skb: Socket buffer to be transmitted
+ * @netdev: Network device
*
- * Return codes:
- * NETDEV_TX_OK: Success
- * NETDEV_TX_BUSY: Error while transmitting the skb. Try again later
+ * Return: NETDEV_TX_OK if successful (or dropped), NETDEV_TX_BUSY otherwise
+
+ * Normally NETDEV_TX_OK indicates the buffer was successfully transmitted.
+ * If the buffer has an unexpected protocol or its size is out of range it
+ * is quietly dropped, returning NETDEV_TX_OK. NETDEV_TX_BUSY indicates
+ * the buffer cannot be sent at this time and should retried later.
*/
static netdev_tx_t
ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev)
@@ -132,29 +139,41 @@ ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev)
if (endpoint->config.qmap && skb->protocol != htons(ETH_P_MAP))
goto err_drop_skb;
- /* The hardware must be powered for us to transmit */
- dev = &ipa->pdev->dev;
+ /* The hardware must be powered for us to transmit, so if we're not
+ * ready we want the network stack to stop queueing until power is
+ * ACTIVE. Once runtime resume has completed, we inform the network
+ * stack it's OK to try transmitting again.
+ *
+ * We learn from pm_runtime_get() whether the hardware is powered.
+ * If it was not, powering up is either started or already underway.
+ * And in that case we want to disable queueing, expecting it to be
+ * re-enabled once power is ACTIVE. But runtime PM and network
+ * transmit run concurrently, and if we're not careful the requests
+ * to stop and start queueing could occur in the wrong order.
+ *
+ * For that reason we *always* stop queueing here, *before* the call
+ * to pm_runtime_get(). If we determine here that power is ACTIVE,
+ * we restart queueing before transmitting the SKB. Otherwise
+ * queueing will eventually be enabled after resume completes.
+ */
+ netif_stop_queue(netdev);
+
+ dev = ipa->dev;
ret = pm_runtime_get(dev);
if (ret < 1) {
/* If a resume won't happen, just drop the packet */
if (ret < 0 && ret != -EINPROGRESS) {
- ipa_power_modem_queue_active(ipa);
+ netif_wake_queue(netdev);
pm_runtime_put_noidle(dev);
goto err_drop_skb;
}
- /* No power (yet). Stop the network stack from transmitting
- * until we're resumed; ipa_modem_resume() arranges for the
- * TX queue to be started again.
- */
- ipa_power_modem_queue_stop(ipa);
-
pm_runtime_put_noidle(dev);
return NETDEV_TX_BUSY;
}
- ipa_power_modem_queue_active(ipa);
+ netif_wake_queue(netdev);
ret = ipa_endpoint_skb_tx(endpoint, skb);
@@ -233,14 +252,14 @@ static void ipa_modem_netdev_setup(struct net_device *netdev)
*/
void ipa_modem_suspend(struct net_device *netdev)
{
- struct ipa_priv *priv = netdev_priv(netdev);
- struct ipa *ipa = priv->ipa;
+ struct ipa_priv *priv;
if (!(netdev->flags & IFF_UP))
return;
- ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
- ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
+ priv = netdev_priv(netdev);
+ ipa_endpoint_suspend_one(priv->rx);
+ ipa_endpoint_suspend_one(priv->tx);
}
/**
@@ -258,7 +277,7 @@ static void ipa_modem_wake_queue_work(struct work_struct *work)
{
struct ipa_priv *priv = container_of(work, struct ipa_priv, work);
- ipa_power_modem_queue_wake(priv->ipa);
+ netif_wake_queue(priv->tx->netdev);
}
/** ipa_modem_resume() - resume callback for runtime_pm
@@ -268,14 +287,14 @@ static void ipa_modem_wake_queue_work(struct work_struct *work)
*/
void ipa_modem_resume(struct net_device *netdev)
{
- struct ipa_priv *priv = netdev_priv(netdev);
- struct ipa *ipa = priv->ipa;
+ struct ipa_priv *priv;
if (!(netdev->flags & IFF_UP))
return;
- ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
- ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
+ priv = netdev_priv(netdev);
+ ipa_endpoint_resume_one(priv->tx);
+ ipa_endpoint_resume_one(priv->rx);
/* Arrange for the TX queue to be restarted */
(void)queue_pm_work(&priv->work);
@@ -303,19 +322,24 @@ int ipa_modem_start(struct ipa *ipa)
goto out_set_state;
}
- SET_NETDEV_DEV(netdev, &ipa->pdev->dev);
+ SET_NETDEV_DEV(netdev, ipa->dev);
priv = netdev_priv(netdev);
priv->ipa = ipa;
+ priv->tx = ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX];
+ priv->rx = ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX];
INIT_WORK(&priv->work, ipa_modem_wake_queue_work);
- ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
- ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
+
+ priv->tx->netdev = netdev;
+ priv->rx->netdev = netdev;
+
ipa->modem_netdev = netdev;
ret = register_netdev(netdev);
if (ret) {
ipa->modem_netdev = NULL;
- ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
- ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
+ priv->rx->netdev = NULL;
+ priv->tx->netdev = NULL;
+
free_netdev(netdev);
}
@@ -355,9 +379,11 @@ int ipa_modem_stop(struct ipa *ipa)
if (netdev->flags & IFF_UP)
(void)ipa_stop(netdev);
unregister_netdev(netdev);
+
ipa->modem_netdev = NULL;
- ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
- ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
+ priv->rx->netdev = NULL;
+ priv->tx->netdev = NULL;
+
free_netdev(netdev);
}
@@ -370,7 +396,7 @@ int ipa_modem_stop(struct ipa *ipa)
/* Treat a "clean" modem stop the same as a crash */
static void ipa_modem_crashed(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
int ret;
/* Prevent the modem from triggering a call to ipa_setup() */
@@ -417,7 +443,7 @@ static int ipa_modem_notify(struct notifier_block *nb, unsigned long action,
{
struct ipa *ipa = container_of(nb, struct ipa, nb);
struct qcom_ssr_notify_data *notify_data = data;
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
switch (action) {
case QCOM_SSR_BEFORE_POWERUP:
@@ -466,7 +492,7 @@ int ipa_modem_config(struct ipa *ipa)
void ipa_modem_deconfig(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
int ret;
ret = qcom_unregister_ssr_notifier(ipa->notifier, &ipa->nb);
diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c
index e223886123ce..41ca7ef5e20f 100644
--- a/drivers/net/ipa/ipa_power.c
+++ b/drivers/net/ipa/ipa_power.c
@@ -35,28 +35,10 @@
#define IPA_AUTOSUSPEND_DELAY 500 /* milliseconds */
/**
- * enum ipa_power_flag - IPA power flags
- * @IPA_POWER_FLAG_RESUMED: Whether resume from suspend has been signaled
- * @IPA_POWER_FLAG_SYSTEM: Hardware is system (not runtime) suspended
- * @IPA_POWER_FLAG_STOPPED: Modem TX is disabled by ipa_start_xmit()
- * @IPA_POWER_FLAG_STARTED: Modem TX was enabled by ipa_runtime_resume()
- * @IPA_POWER_FLAG_COUNT: Number of defined power flags
- */
-enum ipa_power_flag {
- IPA_POWER_FLAG_RESUMED,
- IPA_POWER_FLAG_SYSTEM,
- IPA_POWER_FLAG_STOPPED,
- IPA_POWER_FLAG_STARTED,
- IPA_POWER_FLAG_COUNT, /* Last; not a flag */
-};
-
-/**
* struct ipa_power - IPA power management information
* @dev: IPA device pointer
* @core: IPA core clock
* @qmp: QMP handle for AOSS communication
- * @spinlock: Protects modem TX queue enable/disable
- * @flags: Boolean state flags
* @interconnect_count: Number of elements in interconnect[]
* @interconnect: Interconnect array
*/
@@ -64,8 +46,6 @@ struct ipa_power {
struct device *dev;
struct clk *core;
struct qmp *qmp;
- spinlock_t spinlock; /* used with STOPPED/STARTED power flags */
- DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
u32 interconnect_count;
struct icc_bulk_data interconnect[] __counted_by(interconnect_count);
};
@@ -147,7 +127,6 @@ static int ipa_runtime_suspend(struct device *dev)
/* Endpoints aren't usable until setup is complete */
if (ipa->setup_complete) {
- __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->power->flags);
ipa_endpoint_suspend(ipa);
gsi_suspend(&ipa->gsi);
}
@@ -179,8 +158,6 @@ static int ipa_suspend(struct device *dev)
{
struct ipa *ipa = dev_get_drvdata(dev);
- __set_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags);
-
/* Increment the disable depth to ensure that the IRQ won't
* be re-enabled until the matching _enable call in
* ipa_resume(). We do this to ensure that the interrupt
@@ -202,8 +179,6 @@ static int ipa_resume(struct device *dev)
ret = pm_runtime_force_resume(dev);
- __clear_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags);
-
/* Now that PM runtime is enabled again it's safe
* to turn the IRQ back on and process any data
* that was received during suspend.
@@ -219,84 +194,6 @@ u32 ipa_core_clock_rate(struct ipa *ipa)
return ipa->power ? (u32)clk_get_rate(ipa->power->core) : 0;
}
-void ipa_power_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
-{
- /* To handle an IPA interrupt we will have resumed the hardware
- * just to handle the interrupt, so we're done. If we are in a
- * system suspend, trigger a system resume.
- */
- if (!__test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->power->flags))
- if (test_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags))
- pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
-
- /* Acknowledge/clear the suspend interrupt on all endpoints */
- ipa_interrupt_suspend_clear_all(ipa->interrupt);
-}
-
-/* The next few functions coordinate stopping and starting the modem
- * network device transmit queue.
- *
- * Transmit can be running concurrent with power resume, and there's a
- * chance the resume completes before the transmit path stops the queue,
- * leaving the queue in a stopped state. The next two functions are used
- * to avoid this: ipa_power_modem_queue_stop() is used by ipa_start_xmit()
- * to conditionally stop the TX queue; and ipa_power_modem_queue_start()
- * is used by ipa_runtime_resume() to conditionally restart it.
- *
- * Two flags and a spinlock are used. If the queue is stopped, the STOPPED
- * power flag is set. And if the queue is started, the STARTED flag is set.
- * The queue is only started on resume if the STOPPED flag is set. And the
- * queue is only started in ipa_start_xmit() if the STARTED flag is *not*
- * set. As a result, the queue remains operational if the two activites
- * happen concurrently regardless of the order they complete. The spinlock
- * ensures the flag and TX queue operations are done atomically.
- *
- * The first function stops the modem netdev transmit queue, but only if
- * the STARTED flag is *not* set. That flag is cleared if it was set.
- * If the queue is stopped, the STOPPED flag is set. This is called only
- * from the power ->runtime_resume operation.
- */
-void ipa_power_modem_queue_stop(struct ipa *ipa)
-{
- struct ipa_power *power = ipa->power;
- unsigned long flags;
-
- spin_lock_irqsave(&power->spinlock, flags);
-
- if (!__test_and_clear_bit(IPA_POWER_FLAG_STARTED, power->flags)) {
- netif_stop_queue(ipa->modem_netdev);
- __set_bit(IPA_POWER_FLAG_STOPPED, power->flags);
- }
-
- spin_unlock_irqrestore(&power->spinlock, flags);
-}
-
-/* This function starts the modem netdev transmit queue, but only if the
- * STOPPED flag is set. That flag is cleared if it was set. If the queue
- * was restarted, the STARTED flag is set; this allows ipa_start_xmit()
- * to skip stopping the queue in the event of a race.
- */
-void ipa_power_modem_queue_wake(struct ipa *ipa)
-{
- struct ipa_power *power = ipa->power;
- unsigned long flags;
-
- spin_lock_irqsave(&power->spinlock, flags);
-
- if (__test_and_clear_bit(IPA_POWER_FLAG_STOPPED, power->flags)) {
- __set_bit(IPA_POWER_FLAG_STARTED, power->flags);
- netif_wake_queue(ipa->modem_netdev);
- }
-
- spin_unlock_irqrestore(&power->spinlock, flags);
-}
-
-/* This function clears the STARTED flag once the TX queue is operating */
-void ipa_power_modem_queue_active(struct ipa *ipa)
-{
- clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags);
-}
-
static int ipa_power_retention_init(struct ipa_power *power)
{
struct qmp *qmp = qmp_get(power->dev);
@@ -341,7 +238,7 @@ int ipa_power_setup(struct ipa *ipa)
ipa_interrupt_enable(ipa, IPA_IRQ_TX_SUSPEND);
- ret = device_init_wakeup(&ipa->pdev->dev, true);
+ ret = device_init_wakeup(ipa->dev, true);
if (ret)
ipa_interrupt_disable(ipa, IPA_IRQ_TX_SUSPEND);
@@ -350,7 +247,7 @@ int ipa_power_setup(struct ipa *ipa)
void ipa_power_teardown(struct ipa *ipa)
{
- (void)device_init_wakeup(&ipa->pdev->dev, false);
+ (void)device_init_wakeup(ipa->dev, false);
ipa_interrupt_disable(ipa, IPA_IRQ_TX_SUSPEND);
}
@@ -385,7 +282,6 @@ ipa_power_init(struct device *dev, const struct ipa_power_data *data)
}
power->dev = dev;
power->core = clk;
- spin_lock_init(&power->spinlock);
power->interconnect_count = data->interconnect_count;
ret = ipa_interconnect_init(power, data->interconnect_data);
diff --git a/drivers/net/ipa/ipa_power.h b/drivers/net/ipa/ipa_power.h
index 3a4c59ea1222..227cc04bea80 100644
--- a/drivers/net/ipa/ipa_power.h
+++ b/drivers/net/ipa/ipa_power.h
@@ -24,24 +24,6 @@ extern const struct dev_pm_ops ipa_pm_ops;
u32 ipa_core_clock_rate(struct ipa *ipa);
/**
- * ipa_power_modem_queue_stop() - Possibly stop the modem netdev TX queue
- * @ipa: IPA pointer
- */
-void ipa_power_modem_queue_stop(struct ipa *ipa);
-
-/**
- * ipa_power_modem_queue_wake() - Possibly wake the modem netdev TX queue
- * @ipa: IPA pointer
- */
-void ipa_power_modem_queue_wake(struct ipa *ipa);
-
-/**
- * ipa_power_modem_queue_active() - Report modem netdev TX queue active
- * @ipa: IPA pointer
- */
-void ipa_power_modem_queue_active(struct ipa *ipa);
-
-/**
* ipa_power_retention() - Control register retention on power collapse
* @ipa: IPA pointer
* @enable: Whether retention should be enabled or disabled
@@ -49,17 +31,6 @@ void ipa_power_modem_queue_active(struct ipa *ipa);
void ipa_power_retention(struct ipa *ipa, bool enable);
/**
- * ipa_power_suspend_handler() - Handler for SUSPEND IPA interrupts
- * @ipa: IPA pointer
- * @irq_id: IPA interrupt ID (unused)
- *
- * If an RX endpoint is suspended, and the IPA has a packet destined for
- * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP
- * that it should resume the endpoint.
- */
-void ipa_power_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id);
-
-/**
* ipa_power_setup() - Set up IPA power management
* @ipa: IPA pointer
*
diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
index f70f0a1d1cda..65c40e207802 100644
--- a/drivers/net/ipa/ipa_qmi.c
+++ b/drivers/net/ipa/ipa_qmi.c
@@ -96,7 +96,7 @@ static void ipa_server_init_complete(struct ipa_qmi *ipa_qmi)
IPA_QMI_INIT_COMPLETE_IND_SZ,
ipa_init_complete_ind_ei, &ind);
if (ret)
- dev_err(&ipa->pdev->dev,
+ dev_err(ipa->dev,
"error %d sending init complete indication\n", ret);
else
ipa_qmi->indication_sent = true;
@@ -148,7 +148,7 @@ static void ipa_qmi_ready(struct ipa_qmi *ipa_qmi)
ipa = container_of(ipa_qmi, struct ipa, qmi);
ret = ipa_modem_start(ipa);
if (ret)
- dev_err(&ipa->pdev->dev, "error %d starting modem\n", ret);
+ dev_err(ipa->dev, "error %d starting modem\n", ret);
}
/* All QMI clients from the modem node are gone (modem shut down or crashed). */
@@ -199,7 +199,7 @@ static void ipa_server_indication_register(struct qmi_handle *qmi,
ipa_qmi->indication_requested = true;
ipa_qmi_ready(ipa_qmi); /* We might be ready now */
} else {
- dev_err(&ipa->pdev->dev,
+ dev_err(ipa->dev,
"error %d sending register indication response\n", ret);
}
}
@@ -228,7 +228,7 @@ static void ipa_server_driver_init_complete(struct qmi_handle *qmi,
ipa_qmi->uc_ready = true;
ipa_qmi_ready(ipa_qmi); /* We might be ready now */
} else {
- dev_err(&ipa->pdev->dev,
+ dev_err(ipa->dev,
"error %d sending init complete response\n", ret);
}
}
@@ -417,7 +417,7 @@ static void ipa_client_init_driver_work(struct work_struct *work)
qmi = &ipa_qmi->client_handle;
ipa = container_of(ipa_qmi, struct ipa, qmi);
- dev = &ipa->pdev->dev;
+ dev = ipa->dev;
ret = qmi_txn_init(qmi, &txn, NULL, NULL);
if (ret < 0) {
diff --git a/drivers/net/ipa/ipa_reg.c b/drivers/net/ipa/ipa_reg.c
index 6a3203ae6f1e..98625956e0bb 100644
--- a/drivers/net/ipa/ipa_reg.c
+++ b/drivers/net/ipa/ipa_reg.c
@@ -4,6 +4,7 @@
* Copyright (C) 2019-2023 Linaro Ltd.
*/
+#include <linux/platform_device.h>
#include <linux/io.h>
#include "ipa.h"
@@ -132,9 +133,9 @@ static const struct regs *ipa_regs(enum ipa_version version)
}
}
-int ipa_reg_init(struct ipa *ipa)
+int ipa_reg_init(struct ipa *ipa, struct platform_device *pdev)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = &pdev->dev;
const struct regs *regs;
struct resource *res;
@@ -146,8 +147,7 @@ int ipa_reg_init(struct ipa *ipa)
return -EINVAL;
/* Setup IPA register memory */
- res = platform_get_resource_byname(ipa->pdev, IORESOURCE_MEM,
- "ipa-reg");
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ipa-reg");
if (!res) {
dev_err(dev, "DT error getting \"ipa-reg\" memory property\n");
return -ENODEV;
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 2998f115f12c..62c62495b796 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -12,6 +12,8 @@
#include "ipa_version.h"
#include "reg.h"
+struct platform_device;
+
struct ipa;
/**
@@ -643,7 +645,7 @@ extern const struct regs ipa_regs_v5_5;
const struct reg *ipa_reg(struct ipa *ipa, enum ipa_reg_id reg_id);
-int ipa_reg_init(struct ipa *ipa);
+int ipa_reg_init(struct ipa *ipa, struct platform_device *pdev);
void ipa_reg_exit(struct ipa *ipa);
#endif /* _IPA_REG_H_ */
diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c
index 5620dc271fac..aeccce9fab72 100644
--- a/drivers/net/ipa/ipa_smp2p.c
+++ b/drivers/net/ipa/ipa_smp2p.c
@@ -5,7 +5,7 @@
*/
#include <linux/types.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/panic_notifier.h>
@@ -84,15 +84,13 @@ struct ipa_smp2p {
*/
static void ipa_smp2p_notify(struct ipa_smp2p *smp2p)
{
- struct device *dev;
u32 value;
u32 mask;
if (smp2p->notified)
return;
- dev = &smp2p->ipa->pdev->dev;
- smp2p->power_on = pm_runtime_get_if_active(dev, true) > 0;
+ smp2p->power_on = pm_runtime_get_if_active(smp2p->ipa->dev, true) > 0;
/* Signal whether the IPA power is enabled */
mask = BIT(smp2p->enabled_bit);
@@ -152,15 +150,16 @@ static void ipa_smp2p_panic_notifier_unregister(struct ipa_smp2p *smp2p)
static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id)
{
struct ipa_smp2p *smp2p = dev_id;
+ struct ipa *ipa = smp2p->ipa;
struct device *dev;
int ret;
/* Ignore any (spurious) interrupts received after the first */
- if (smp2p->ipa->setup_complete)
+ if (ipa->setup_complete)
return IRQ_HANDLED;
/* Power needs to be active for setup */
- dev = &smp2p->ipa->pdev->dev;
+ dev = ipa->dev;
ret = pm_runtime_get_sync(dev);
if (ret < 0) {
dev_err(dev, "error %d getting power for setup\n", ret);
@@ -168,7 +167,7 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id)
}
/* An error here won't cause driver shutdown, so warn if one occurs */
- ret = ipa_setup(smp2p->ipa);
+ ret = ipa_setup(ipa);
WARN(ret != 0, "error %d from ipa_setup()\n", ret);
out_power_put:
@@ -179,14 +178,15 @@ out_power_put:
}
/* Initialize SMP2P interrupts */
-static int ipa_smp2p_irq_init(struct ipa_smp2p *smp2p, const char *name,
- irq_handler_t handler)
+static int ipa_smp2p_irq_init(struct ipa_smp2p *smp2p,
+ struct platform_device *pdev,
+ const char *name, irq_handler_t handler)
{
- struct device *dev = &smp2p->ipa->pdev->dev;
+ struct device *dev = &pdev->dev;
unsigned int irq;
int ret;
- ret = platform_get_irq_byname(smp2p->ipa->pdev, name);
+ ret = platform_get_irq_byname(pdev, name);
if (ret <= 0)
return ret ? : -EINVAL;
irq = ret;
@@ -208,7 +208,7 @@ static void ipa_smp2p_irq_exit(struct ipa_smp2p *smp2p, u32 irq)
/* Drop the power reference if it was taken in ipa_smp2p_notify() */
static void ipa_smp2p_power_release(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
if (!ipa->smp2p->power_on)
return;
@@ -219,10 +219,11 @@ static void ipa_smp2p_power_release(struct ipa *ipa)
}
/* Initialize the IPA SMP2P subsystem */
-int ipa_smp2p_init(struct ipa *ipa, bool modem_init)
+int
+ipa_smp2p_init(struct ipa *ipa, struct platform_device *pdev, bool modem_init)
{
struct qcom_smem_state *enabled_state;
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = &pdev->dev;
struct qcom_smem_state *valid_state;
struct ipa_smp2p *smp2p;
u32 enabled_bit;
@@ -261,7 +262,7 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init)
/* We have enough information saved to handle notifications */
ipa->smp2p = smp2p;
- ret = ipa_smp2p_irq_init(smp2p, "ipa-clock-query",
+ ret = ipa_smp2p_irq_init(smp2p, pdev, "ipa-clock-query",
ipa_smp2p_modem_clk_query_isr);
if (ret < 0)
goto err_null_smp2p;
@@ -273,7 +274,7 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init)
if (modem_init) {
/* Result will be non-zero (negative for error) */
- ret = ipa_smp2p_irq_init(smp2p, "ipa-setup-ready",
+ ret = ipa_smp2p_irq_init(smp2p, pdev, "ipa-setup-ready",
ipa_smp2p_modem_setup_ready_isr);
if (ret < 0)
goto err_notifier_unregister;
diff --git a/drivers/net/ipa/ipa_smp2p.h b/drivers/net/ipa/ipa_smp2p.h
index 9b969b03d1a4..2a3d8eefb13b 100644
--- a/drivers/net/ipa/ipa_smp2p.h
+++ b/drivers/net/ipa/ipa_smp2p.h
@@ -8,17 +8,20 @@
#include <linux/types.h>
+struct platform_device;
+
struct ipa;
/**
* ipa_smp2p_init() - Initialize the IPA SMP2P subsystem
* @ipa: IPA pointer
+ * @pdev: Platform device pointer
* @modem_init: Whether the modem is responsible for GSI initialization
*
* Return: 0 if successful, or a negative error code
- *
*/
-int ipa_smp2p_init(struct ipa *ipa, bool modem_init);
+int ipa_smp2p_init(struct ipa *ipa, struct platform_device *pdev,
+ bool modem_init);
/**
* ipa_smp2p_exit() - Inverse of ipa_smp2p_init()
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index 7b637bb8b41c..a24ac11b8893 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -163,7 +163,7 @@ ipa_table_mem(struct ipa *ipa, bool filter, bool hashed, bool ipv6)
bool ipa_filtered_valid(struct ipa *ipa, u64 filtered)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
u32 count;
if (!filtered) {
@@ -236,8 +236,7 @@ ipa_filter_reset_table(struct ipa *ipa, bool hashed, bool ipv6, bool modem)
trans = ipa_cmd_trans_alloc(ipa, hweight64(ep_mask));
if (!trans) {
- dev_err(&ipa->pdev->dev,
- "no transaction for %s filter reset\n",
+ dev_err(ipa->dev, "no transaction for %s filter reset\n",
modem ? "modem" : "AP");
return -EBUSY;
}
@@ -298,8 +297,7 @@ static int ipa_route_reset(struct ipa *ipa, bool modem)
trans = ipa_cmd_trans_alloc(ipa, hash_support ? 4 : 2);
if (!trans) {
- dev_err(&ipa->pdev->dev,
- "no transaction for %s route reset\n",
+ dev_err(ipa->dev, "no transaction for %s route reset\n",
modem ? "modem" : "AP");
return -EBUSY;
}
@@ -327,7 +325,7 @@ static int ipa_route_reset(struct ipa *ipa, bool modem)
void ipa_table_reset(struct ipa *ipa, bool modem)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
const char *ee_name;
int ret;
@@ -356,7 +354,7 @@ int ipa_table_hash_flush(struct ipa *ipa)
trans = ipa_cmd_trans_alloc(ipa, 1);
if (!trans) {
- dev_err(&ipa->pdev->dev, "no transaction for hash flush\n");
+ dev_err(ipa->dev, "no transaction for hash flush\n");
return -EBUSY;
}
@@ -469,7 +467,7 @@ int ipa_table_setup(struct ipa *ipa)
*/
trans = ipa_cmd_trans_alloc(ipa, 8);
if (!trans) {
- dev_err(&ipa->pdev->dev, "no transaction for table setup\n");
+ dev_err(ipa->dev, "no transaction for table setup\n");
return -EBUSY;
}
@@ -713,7 +711,7 @@ bool ipa_table_mem_valid(struct ipa *ipa, bool filter)
*/
int ipa_table_init(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
dma_addr_t addr;
__le64 le_addr;
__le64 *virt;
@@ -763,7 +761,7 @@ int ipa_table_init(struct ipa *ipa)
void ipa_table_exit(struct ipa *ipa)
{
u32 count = max_t(u32, 1 + ipa->filter_count, ipa->route_count);
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
size_t size;
size = IPA_ZERO_RULE_SIZE + (1 + count) * sizeof(__le64);
diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c
index 7eaa0b4ebed9..bfd5dc6dab43 100644
--- a/drivers/net/ipa/ipa_uc.c
+++ b/drivers/net/ipa/ipa_uc.c
@@ -127,7 +127,7 @@ static struct ipa_uc_mem_area *ipa_uc_shared(struct ipa *ipa)
static void ipa_uc_event_handler(struct ipa *ipa)
{
struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa);
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
if (shared->event == IPA_UC_EVENT_ERROR)
dev_err(dev, "microcontroller error event\n");
@@ -141,7 +141,7 @@ static void ipa_uc_event_handler(struct ipa *ipa)
static void ipa_uc_response_hdlr(struct ipa *ipa)
{
struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa);
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
/* An INIT_COMPLETED response message is sent to the AP by the
* microcontroller when it is operational. Other than this, the AP
@@ -191,7 +191,7 @@ void ipa_uc_config(struct ipa *ipa)
/* Inverse of ipa_uc_config() */
void ipa_uc_deconfig(struct ipa *ipa)
{
- struct device *dev = &ipa->pdev->dev;
+ struct device *dev = ipa->dev;
ipa_interrupt_disable(ipa, IPA_IRQ_UC_1);
ipa_interrupt_disable(ipa, IPA_IRQ_UC_0);
@@ -208,8 +208,8 @@ void ipa_uc_deconfig(struct ipa *ipa)
/* Take a proxy power reference for the microcontroller */
void ipa_uc_power(struct ipa *ipa)
{
+ struct device *dev = ipa->dev;
static bool already;
- struct device *dev;
int ret;
if (already)
@@ -217,7 +217,6 @@ void ipa_uc_power(struct ipa *ipa)
already = true; /* Only do this on first boot */
/* This power reference dropped in ipa_uc_response_hdlr() above */
- dev = &ipa->pdev->dev;
ret = pm_runtime_get_sync(dev);
if (ret < 0) {
pm_runtime_put_noidle(dev);
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index df7c43a109e1..5920f7e63352 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -349,7 +349,7 @@ static int ipvlan_get_iflink(const struct net_device *dev)
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
- return ipvlan->phy_dev->ifindex;
+ return READ_ONCE(ipvlan->phy_dev->ifindex);
}
static const struct net_device_ops ipvlan_netdev_ops = {
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
index 60944a4beada..1afc4c47be73 100644
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -237,4 +237,5 @@ static void __exit ipvtap_exit(void)
module_exit(ipvtap_exit);
MODULE_ALIAS_RTNL_LINK("ipvtap");
MODULE_AUTHOR("Sainath Grandhi <sainath.grandhi@intel.com>");
+MODULE_DESCRIPTION("IP-VLAN based tap driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index f6d53e63ef4e..f6eab66c2660 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -144,6 +144,7 @@ static int loopback_dev_init(struct net_device *dev)
dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
if (!dev->lstats)
return -ENOMEM;
+ netdev_lockdep_set_classes(dev);
return 0;
}
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index e34816638569..0206b84284ab 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -607,11 +607,26 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
return ERR_PTR(-EINVAL);
}
- ret = skb_ensure_writable_head_tail(skb, dev);
- if (unlikely(ret < 0)) {
- macsec_txsa_put(tx_sa);
- kfree_skb(skb);
- return ERR_PTR(ret);
+ if (unlikely(skb_headroom(skb) < MACSEC_NEEDED_HEADROOM ||
+ skb_tailroom(skb) < MACSEC_NEEDED_TAILROOM)) {
+ struct sk_buff *nskb = skb_copy_expand(skb,
+ MACSEC_NEEDED_HEADROOM,
+ MACSEC_NEEDED_TAILROOM,
+ GFP_ATOMIC);
+ if (likely(nskb)) {
+ consume_skb(skb);
+ skb = nskb;
+ } else {
+ macsec_txsa_put(tx_sa);
+ kfree_skb(skb);
+ return ERR_PTR(-ENOMEM);
+ }
+ } else {
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb) {
+ macsec_txsa_put(tx_sa);
+ return ERR_PTR(-ENOMEM);
+ }
}
unprotected_len = skb->len;
@@ -3504,18 +3519,13 @@ static int macsec_dev_init(struct net_device *dev)
struct net_device *real_dev = macsec->real_dev;
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
err = gro_cells_init(&macsec->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
dev->features = real_dev->features & MACSEC_FEATURES;
dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
macsec_set_head_tail_room(dev);
@@ -3535,7 +3545,6 @@ static void macsec_dev_uninit(struct net_device *dev)
struct macsec_dev *macsec = macsec_priv(dev);
gro_cells_destroy(&macsec->gro_cells);
- free_percpu(dev->tstats);
}
static netdev_features_t macsec_fix_features(struct net_device *dev,
@@ -3738,7 +3747,7 @@ static void macsec_get_stats64(struct net_device *dev,
static int macsec_get_iflink(const struct net_device *dev)
{
- return macsec_priv(dev)->real_dev->ifindex;
+ return READ_ONCE(macsec_priv(dev)->real_dev->ifindex);
}
static const struct net_device_ops macsec_netdev_ops = {
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index a3cc665757e8..0cec2783a3e7 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1158,7 +1158,7 @@ static int macvlan_dev_get_iflink(const struct net_device *dev)
{
struct macvlan_dev *vlan = netdev_priv(dev);
- return vlan->lowerdev->ifindex;
+ return READ_ONCE(vlan->lowerdev->ifindex);
}
static const struct ethtool_ops macvlan_ethtool_ops = {
diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c
index 68f8ee0ec8ba..f40eb50bb978 100644
--- a/drivers/net/mdio/mdio-bcm-unimac.c
+++ b/drivers/net/mdio/mdio-bcm-unimac.c
@@ -94,6 +94,10 @@ static int unimac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
int ret;
u32 cmd;
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return ret;
+
/* Prepare the read operation */
cmd = MDIO_RD | (phy_id << MDIO_PMD_SHIFT) | (reg << MDIO_REG_SHIFT);
unimac_mdio_writel(priv, cmd, MDIO_CMD);
@@ -103,7 +107,7 @@ static int unimac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
ret = priv->wait_func(priv->wait_func_data);
if (ret)
- return ret;
+ goto out;
cmd = unimac_mdio_readl(priv, MDIO_CMD);
@@ -112,10 +116,15 @@ static int unimac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
* that condition here and ignore the MDIO controller read failure
* indication.
*/
- if (!(bus->phy_ignore_ta_mask & 1 << phy_id) && (cmd & MDIO_READ_FAIL))
- return -EIO;
+ if (!(bus->phy_ignore_ta_mask & 1 << phy_id) && (cmd & MDIO_READ_FAIL)) {
+ ret = -EIO;
+ goto out;
+ }
- return cmd & 0xffff;
+ ret = cmd & 0xffff;
+out:
+ clk_disable_unprepare(priv->clk);
+ return ret;
}
static int unimac_mdio_write(struct mii_bus *bus, int phy_id,
@@ -123,6 +132,11 @@ static int unimac_mdio_write(struct mii_bus *bus, int phy_id,
{
struct unimac_mdio_priv *priv = bus->priv;
u32 cmd;
+ int ret;
+
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return ret;
/* Prepare the write operation */
cmd = MDIO_WR | (phy_id << MDIO_PMD_SHIFT) |
@@ -131,7 +145,10 @@ static int unimac_mdio_write(struct mii_bus *bus, int phy_id,
unimac_mdio_start(priv);
- return priv->wait_func(priv->wait_func_data);
+ ret = priv->wait_func(priv->wait_func_data);
+ clk_disable_unprepare(priv->clk);
+
+ return ret;
}
/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
@@ -178,14 +195,19 @@ static int unimac_mdio_reset(struct mii_bus *bus)
return 0;
}
-static void unimac_mdio_clk_set(struct unimac_mdio_priv *priv)
+static int unimac_mdio_clk_set(struct unimac_mdio_priv *priv)
{
unsigned long rate;
u32 reg, div;
+ int ret;
/* Keep the hardware default values */
if (!priv->clk_freq)
- return;
+ return 0;
+
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return ret;
if (!priv->clk)
rate = 250000000;
@@ -195,7 +217,8 @@ static void unimac_mdio_clk_set(struct unimac_mdio_priv *priv)
div = (rate / (2 * priv->clk_freq)) - 1;
if (div & ~MDIO_CLK_DIV_MASK) {
pr_warn("Incorrect MDIO clock frequency, ignoring\n");
- return;
+ ret = 0;
+ goto out;
}
/* The MDIO clock is the reference clock (typically 250Mhz) divided by
@@ -205,6 +228,9 @@ static void unimac_mdio_clk_set(struct unimac_mdio_priv *priv)
reg &= ~(MDIO_CLK_DIV_MASK << MDIO_CLK_DIV_SHIFT);
reg |= div << MDIO_CLK_DIV_SHIFT;
unimac_mdio_writel(priv, reg, MDIO_CFG);
+out:
+ clk_disable_unprepare(priv->clk);
+ return ret;
}
static int unimac_mdio_probe(struct platform_device *pdev)
@@ -235,24 +261,12 @@ static int unimac_mdio_probe(struct platform_device *pdev)
return -ENOMEM;
}
- priv->clk = devm_clk_get_optional(&pdev->dev, NULL);
- if (IS_ERR(priv->clk))
- return PTR_ERR(priv->clk);
-
- ret = clk_prepare_enable(priv->clk);
- if (ret)
- return ret;
-
if (of_property_read_u32(np, "clock-frequency", &priv->clk_freq))
priv->clk_freq = 0;
- unimac_mdio_clk_set(priv);
-
priv->mii_bus = mdiobus_alloc();
- if (!priv->mii_bus) {
- ret = -ENOMEM;
- goto out_clk_disable;
- }
+ if (!priv->mii_bus)
+ return -ENOMEM;
bus = priv->mii_bus;
bus->priv = priv;
@@ -261,17 +275,29 @@ static int unimac_mdio_probe(struct platform_device *pdev)
priv->wait_func = pdata->wait_func;
priv->wait_func_data = pdata->wait_func_data;
bus->phy_mask = ~pdata->phy_mask;
+ priv->clk = pdata->clk;
} else {
bus->name = "unimac MII bus";
priv->wait_func_data = priv;
priv->wait_func = unimac_mdio_poll;
+ priv->clk = devm_clk_get_optional(&pdev->dev, NULL);
+ }
+
+ if (IS_ERR(priv->clk)) {
+ ret = PTR_ERR(priv->clk);
+ goto out_mdio_free;
}
+
bus->parent = &pdev->dev;
bus->read = unimac_mdio_read;
bus->write = unimac_mdio_write;
bus->reset = unimac_mdio_reset;
snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id);
+ ret = unimac_mdio_clk_set(priv);
+ if (ret)
+ goto out_mdio_free;
+
ret = of_mdiobus_register(bus, np);
if (ret) {
dev_err(&pdev->dev, "MDIO bus registration failed\n");
@@ -286,8 +312,6 @@ static int unimac_mdio_probe(struct platform_device *pdev)
out_mdio_free:
mdiobus_free(bus);
-out_clk_disable:
- clk_disable_unprepare(priv->clk);
return ret;
}
@@ -297,36 +321,20 @@ static void unimac_mdio_remove(struct platform_device *pdev)
mdiobus_unregister(priv->mii_bus);
mdiobus_free(priv->mii_bus);
- clk_disable_unprepare(priv->clk);
-}
-
-static int __maybe_unused unimac_mdio_suspend(struct device *d)
-{
- struct unimac_mdio_priv *priv = dev_get_drvdata(d);
-
- clk_disable_unprepare(priv->clk);
-
- return 0;
}
static int __maybe_unused unimac_mdio_resume(struct device *d)
{
struct unimac_mdio_priv *priv = dev_get_drvdata(d);
- int ret;
- ret = clk_prepare_enable(priv->clk);
- if (ret)
- return ret;
-
- unimac_mdio_clk_set(priv);
-
- return 0;
+ return unimac_mdio_clk_set(priv);
}
static SIMPLE_DEV_PM_OPS(unimac_mdio_pm_ops,
- unimac_mdio_suspend, unimac_mdio_resume);
+ NULL, unimac_mdio_resume);
static const struct of_device_id unimac_mdio_ids[] = {
+ { .compatible = "brcm,asp-v2.2-mdio", },
{ .compatible = "brcm,asp-v2.1-mdio", },
{ .compatible = "brcm,asp-v2.0-mdio", },
{ .compatible = "brcm,genet-mdio-v5", },
diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
index abd8b508ec16..9d8f43b28aac 100644
--- a/drivers/net/mdio/mdio-ipq4019.c
+++ b/drivers/net/mdio/mdio-ipq4019.c
@@ -14,6 +14,20 @@
#include <linux/clk.h>
#define MDIO_MODE_REG 0x40
+#define MDIO_MODE_MDC_MODE BIT(12)
+/* 0 = Clause 22, 1 = Clause 45 */
+#define MDIO_MODE_C45 BIT(8)
+#define MDIO_MODE_DIV_MASK GENMASK(7, 0)
+#define MDIO_MODE_DIV(x) FIELD_PREP(MDIO_MODE_DIV_MASK, (x) - 1)
+#define MDIO_MODE_DIV_1 0x0
+#define MDIO_MODE_DIV_2 0x1
+#define MDIO_MODE_DIV_4 0x3
+#define MDIO_MODE_DIV_8 0x7
+#define MDIO_MODE_DIV_16 0xf
+#define MDIO_MODE_DIV_32 0x1f
+#define MDIO_MODE_DIV_64 0x3f
+#define MDIO_MODE_DIV_128 0x7f
+#define MDIO_MODE_DIV_256 0xff
#define MDIO_ADDR_REG 0x44
#define MDIO_DATA_WRITE_REG 0x48
#define MDIO_DATA_READ_REG 0x4c
@@ -26,9 +40,6 @@
#define MDIO_CMD_ACCESS_CODE_C45_WRITE 1
#define MDIO_CMD_ACCESS_CODE_C45_READ 2
-/* 0 = Clause 22, 1 = Clause 45 */
-#define MDIO_MODE_C45 BIT(8)
-
#define IPQ4019_MDIO_TIMEOUT 10000
#define IPQ4019_MDIO_SLEEP 10
@@ -41,6 +52,7 @@ struct ipq4019_mdio_data {
void __iomem *membase;
void __iomem *eth_ldo_rdy;
struct clk *mdio_clk;
+ unsigned int mdc_rate;
};
static int ipq4019_mdio_wait_busy(struct mii_bus *bus)
@@ -203,6 +215,38 @@ static int ipq4019_mdio_write_c22(struct mii_bus *bus, int mii_id, int regnum,
return 0;
}
+static int ipq4019_mdio_set_div(struct ipq4019_mdio_data *priv)
+{
+ unsigned long ahb_rate;
+ int div;
+ u32 val;
+
+ /* If we don't have a clock for AHB use the fixed value */
+ ahb_rate = IPQ_MDIO_CLK_RATE;
+ if (priv->mdio_clk)
+ ahb_rate = clk_get_rate(priv->mdio_clk);
+
+ /* MDC rate is ahb_rate/(MDIO_MODE_DIV + 1)
+ * While supported, internal documentation doesn't
+ * assure correct functionality of the MDIO bus
+ * with divider of 1, 2 or 4.
+ */
+ for (div = 8; div <= 256; div *= 2) {
+ /* The requested rate is supported by the div */
+ if (priv->mdc_rate == DIV_ROUND_UP(ahb_rate, div)) {
+ val = readl(priv->membase + MDIO_MODE_REG);
+ val &= ~MDIO_MODE_DIV_MASK;
+ val |= MDIO_MODE_DIV(div);
+ writel(val, priv->membase + MDIO_MODE_REG);
+
+ return 0;
+ }
+ }
+
+ /* The requested rate is not supported */
+ return -EINVAL;
+}
+
static int ipq_mdio_reset(struct mii_bus *bus)
{
struct ipq4019_mdio_data *priv = bus->priv;
@@ -225,10 +269,58 @@ static int ipq_mdio_reset(struct mii_bus *bus)
return ret;
ret = clk_prepare_enable(priv->mdio_clk);
- if (ret == 0)
- mdelay(10);
+ if (ret)
+ return ret;
- return ret;
+ mdelay(10);
+
+ /* Restore MDC rate */
+ return ipq4019_mdio_set_div(priv);
+}
+
+static void ipq4019_mdio_select_mdc_rate(struct platform_device *pdev,
+ struct ipq4019_mdio_data *priv)
+{
+ unsigned long ahb_rate;
+ int div;
+ u32 val;
+
+ /* MDC rate defined in DT, we don't have to decide a default value */
+ if (!of_property_read_u32(pdev->dev.of_node, "clock-frequency",
+ &priv->mdc_rate))
+ return;
+
+ /* If we don't have a clock for AHB use the fixed value */
+ ahb_rate = IPQ_MDIO_CLK_RATE;
+ if (priv->mdio_clk)
+ ahb_rate = clk_get_rate(priv->mdio_clk);
+
+ /* Check what is the current div set */
+ val = readl(priv->membase + MDIO_MODE_REG);
+ div = FIELD_GET(MDIO_MODE_DIV_MASK, val);
+
+ /* div is not set to the default value of /256
+ * Probably someone changed that (bootloader, other drivers)
+ * Keep this and don't overwrite it.
+ */
+ if (div != MDIO_MODE_DIV_256) {
+ priv->mdc_rate = DIV_ROUND_UP(ahb_rate, div + 1);
+ return;
+ }
+
+ /* If div is /256 assume nobody have set this value and
+ * try to find one MDC rate that is close the 802.3 spec of
+ * 2.5MHz
+ */
+ for (div = 256; div >= 8; div /= 2) {
+ /* Stop as soon as we found a divider that
+ * reached the closest value to 2.5MHz
+ */
+ if (DIV_ROUND_UP(ahb_rate, div) > 2500000)
+ break;
+
+ priv->mdc_rate = DIV_ROUND_UP(ahb_rate, div);
+ }
}
static int ipq4019_mdio_probe(struct platform_device *pdev)
@@ -252,6 +344,11 @@ static int ipq4019_mdio_probe(struct platform_device *pdev)
if (IS_ERR(priv->mdio_clk))
return PTR_ERR(priv->mdio_clk);
+ ipq4019_mdio_select_mdc_rate(pdev, priv);
+ ret = ipq4019_mdio_set_div(priv);
+ if (ret)
+ return ret;
+
/* The platform resource is provided on the chipset IPQ5018 */
/* This resource is optional */
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 64ebcb6d235c..08e607f62e10 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -139,6 +139,53 @@ bool of_mdiobus_child_is_phy(struct device_node *child)
}
EXPORT_SYMBOL(of_mdiobus_child_is_phy);
+static int __of_mdiobus_parse_phys(struct mii_bus *mdio, struct device_node *np,
+ bool *scanphys)
+{
+ struct device_node *child;
+ int addr, rc = 0;
+
+ /* Loop over the child nodes and register a phy_device for each phy */
+ for_each_available_child_of_node(np, child) {
+ if (of_node_name_eq(child, "ethernet-phy-package")) {
+ /* Ignore invalid ethernet-phy-package node */
+ if (!of_property_present(child, "reg"))
+ continue;
+
+ rc = __of_mdiobus_parse_phys(mdio, child, NULL);
+ if (rc && rc != -ENODEV)
+ goto exit;
+
+ continue;
+ }
+
+ addr = of_mdio_parse_addr(&mdio->dev, child);
+ if (addr < 0) {
+ /* Skip scanning for invalid ethernet-phy-package node */
+ if (scanphys)
+ *scanphys = true;
+ continue;
+ }
+
+ if (of_mdiobus_child_is_phy(child))
+ rc = of_mdiobus_register_phy(mdio, child, addr);
+ else
+ rc = of_mdiobus_register_device(mdio, child, addr);
+
+ if (rc == -ENODEV)
+ dev_err(&mdio->dev,
+ "MDIO device at address %d is missing.\n",
+ addr);
+ else if (rc)
+ goto exit;
+ }
+
+ return 0;
+exit:
+ of_node_put(child);
+ return rc;
+}
+
/**
* __of_mdiobus_register - Register mii_bus and create PHYs from the device tree
* @mdio: pointer to mii_bus structure
@@ -180,33 +227,18 @@ int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np,
return rc;
/* Loop over the child nodes and register a phy_device for each phy */
- for_each_available_child_of_node(np, child) {
- addr = of_mdio_parse_addr(&mdio->dev, child);
- if (addr < 0) {
- scanphys = true;
- continue;
- }
-
- if (of_mdiobus_child_is_phy(child))
- rc = of_mdiobus_register_phy(mdio, child, addr);
- else
- rc = of_mdiobus_register_device(mdio, child, addr);
-
- if (rc == -ENODEV)
- dev_err(&mdio->dev,
- "MDIO device at address %d is missing.\n",
- addr);
- else if (rc)
- goto unregister;
- }
+ rc = __of_mdiobus_parse_phys(mdio, np, &scanphys);
+ if (rc)
+ goto unregister;
if (!scanphys)
return 0;
/* auto scan for PHYs with empty reg property */
for_each_available_child_of_node(np, child) {
- /* Skip PHYs with reg property set */
- if (of_property_present(child, "reg"))
+ /* Skip PHYs with reg property set or ethernet-phy-package node */
+ if (of_property_present(child, "reg") ||
+ of_node_name_eq(child, "ethernet-phy-package"))
continue;
for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
@@ -227,15 +259,16 @@ int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np,
if (!rc)
break;
if (rc != -ENODEV)
- goto unregister;
+ goto put_unregister;
}
}
}
return 0;
-unregister:
+put_unregister:
of_node_put(child);
+unregister:
mdiobus_unregister(mdio);
return rc;
}
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 6e14ba5e06c8..d7070dd4fe73 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -42,14 +42,20 @@ MODULE_AUTHOR("Maintainer: Matt Mackall <mpm@selenic.com>");
MODULE_DESCRIPTION("Console driver for network interfaces");
MODULE_LICENSE("GPL");
-#define MAX_PARAM_LENGTH 256
-#define MAX_PRINT_CHUNK 1000
+#define MAX_PARAM_LENGTH 256
+#define MAX_USERDATA_ENTRY_LENGTH 256
+#define MAX_USERDATA_VALUE_LENGTH 200
+/* The number 3 comes from userdata entry format characters (' ', '=', '\n') */
+#define MAX_USERDATA_NAME_LENGTH (MAX_USERDATA_ENTRY_LENGTH - \
+ MAX_USERDATA_VALUE_LENGTH - 3)
+#define MAX_USERDATA_ITEMS 16
+#define MAX_PRINT_CHUNK 1000
static char config[MAX_PARAM_LENGTH];
module_param_string(netconsole, config, MAX_PARAM_LENGTH, 0);
MODULE_PARM_DESC(netconsole, " netconsole=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]");
-static bool oops_only = false;
+static bool oops_only;
module_param(oops_only, bool, 0600);
MODULE_PARM_DESC(oops_only, "Only log oops messages");
@@ -79,7 +85,10 @@ static struct console netconsole_ext;
/**
* struct netconsole_target - Represents a configured netconsole target.
* @list: Links this target into the target_list.
- * @item: Links us into the configfs subsystem hierarchy.
+ * @group: Links us into the configfs subsystem hierarchy.
+ * @userdata_group: Links to the userdata configfs hierarchy
+ * @userdata_complete: Cached, formatted string of append
+ * @userdata_length: String length of userdata_complete
* @enabled: On / off knob to enable / disable target.
* Visible from userspace (read-write).
* We maintain a strict 1:1 correspondence between this and
@@ -102,7 +111,10 @@ static struct console netconsole_ext;
struct netconsole_target {
struct list_head list;
#ifdef CONFIG_NETCONSOLE_DYNAMIC
- struct config_item item;
+ struct config_group group;
+ struct config_group userdata_group;
+ char userdata_complete[MAX_USERDATA_ENTRY_LENGTH * MAX_USERDATA_ITEMS];
+ size_t userdata_length;
#endif
bool enabled;
bool extended;
@@ -134,14 +146,14 @@ static void __exit dynamic_netconsole_exit(void)
*/
static void netconsole_target_get(struct netconsole_target *nt)
{
- if (config_item_name(&nt->item))
- config_item_get(&nt->item);
+ if (config_item_name(&nt->group.cg_item))
+ config_group_get(&nt->group);
}
static void netconsole_target_put(struct netconsole_target *nt)
{
- if (config_item_name(&nt->item))
- config_item_put(&nt->item);
+ if (config_item_name(&nt->group.cg_item))
+ config_group_put(&nt->group);
}
#else /* !CONFIG_NETCONSOLE_DYNAMIC */
@@ -215,15 +227,33 @@ static struct netconsole_target *alloc_and_init(void)
* | remote_ip
* | local_mac
* | remote_mac
+ * | userdata/
+ * | <key>/
+ * | value
+ * | ...
* |
* <target>/...
*/
static struct netconsole_target *to_target(struct config_item *item)
{
- return item ?
- container_of(item, struct netconsole_target, item) :
- NULL;
+ struct config_group *cfg_group;
+
+ cfg_group = to_config_group(item);
+ if (!cfg_group)
+ return NULL;
+ return container_of(to_config_group(item),
+ struct netconsole_target, group);
+}
+
+/* Get rid of possible trailing newline, returning the new length */
+static void trim_newline(char *s, size_t maxlen)
+{
+ size_t len;
+
+ len = strnlen(s, maxlen);
+ if (s[len - 1] == '\n')
+ s[len - 1] = '\0';
}
/*
@@ -370,7 +400,7 @@ static ssize_t release_store(struct config_item *item, const char *buf,
mutex_lock(&dynamic_netconsole_mutex);
if (nt->enabled) {
pr_err("target (%s) is enabled, disable to update parameters\n",
- config_item_name(&nt->item));
+ config_item_name(&nt->group.cg_item));
err = -EINVAL;
goto out_unlock;
}
@@ -398,7 +428,7 @@ static ssize_t extended_store(struct config_item *item, const char *buf,
mutex_lock(&dynamic_netconsole_mutex);
if (nt->enabled) {
pr_err("target (%s) is enabled, disable to update parameters\n",
- config_item_name(&nt->item));
+ config_item_name(&nt->group.cg_item));
err = -EINVAL;
goto out_unlock;
}
@@ -420,22 +450,17 @@ static ssize_t dev_name_store(struct config_item *item, const char *buf,
size_t count)
{
struct netconsole_target *nt = to_target(item);
- size_t len;
mutex_lock(&dynamic_netconsole_mutex);
if (nt->enabled) {
pr_err("target (%s) is enabled, disable to update parameters\n",
- config_item_name(&nt->item));
+ config_item_name(&nt->group.cg_item));
mutex_unlock(&dynamic_netconsole_mutex);
return -EINVAL;
}
strscpy(nt->np.dev_name, buf, IFNAMSIZ);
-
- /* Get rid of possible trailing newline from echo(1) */
- len = strnlen(nt->np.dev_name, IFNAMSIZ);
- if (nt->np.dev_name[len - 1] == '\n')
- nt->np.dev_name[len - 1] = '\0';
+ trim_newline(nt->np.dev_name, IFNAMSIZ);
mutex_unlock(&dynamic_netconsole_mutex);
return strnlen(buf, count);
@@ -450,7 +475,7 @@ static ssize_t local_port_store(struct config_item *item, const char *buf,
mutex_lock(&dynamic_netconsole_mutex);
if (nt->enabled) {
pr_err("target (%s) is enabled, disable to update parameters\n",
- config_item_name(&nt->item));
+ config_item_name(&nt->group.cg_item));
goto out_unlock;
}
@@ -473,7 +498,7 @@ static ssize_t remote_port_store(struct config_item *item,
mutex_lock(&dynamic_netconsole_mutex);
if (nt->enabled) {
pr_err("target (%s) is enabled, disable to update parameters\n",
- config_item_name(&nt->item));
+ config_item_name(&nt->group.cg_item));
goto out_unlock;
}
@@ -495,12 +520,13 @@ static ssize_t local_ip_store(struct config_item *item, const char *buf,
mutex_lock(&dynamic_netconsole_mutex);
if (nt->enabled) {
pr_err("target (%s) is enabled, disable to update parameters\n",
- config_item_name(&nt->item));
+ config_item_name(&nt->group.cg_item));
goto out_unlock;
}
if (strnchr(buf, count, ':')) {
const char *end;
+
if (in6_pton(buf, count, nt->np.local_ip.in6.s6_addr, -1, &end) > 0) {
if (*end && *end != '\n') {
pr_err("invalid IPv6 address at: <%c>\n", *end);
@@ -510,9 +536,9 @@ static ssize_t local_ip_store(struct config_item *item, const char *buf,
} else
goto out_unlock;
} else {
- if (!nt->np.ipv6) {
+ if (!nt->np.ipv6)
nt->np.local_ip.ip = in_aton(buf);
- } else
+ else
goto out_unlock;
}
@@ -531,12 +557,13 @@ static ssize_t remote_ip_store(struct config_item *item, const char *buf,
mutex_lock(&dynamic_netconsole_mutex);
if (nt->enabled) {
pr_err("target (%s) is enabled, disable to update parameters\n",
- config_item_name(&nt->item));
+ config_item_name(&nt->group.cg_item));
goto out_unlock;
}
if (strnchr(buf, count, ':')) {
const char *end;
+
if (in6_pton(buf, count, nt->np.remote_ip.in6.s6_addr, -1, &end) > 0) {
if (*end && *end != '\n') {
pr_err("invalid IPv6 address at: <%c>\n", *end);
@@ -546,9 +573,9 @@ static ssize_t remote_ip_store(struct config_item *item, const char *buf,
} else
goto out_unlock;
} else {
- if (!nt->np.ipv6) {
+ if (!nt->np.ipv6)
nt->np.remote_ip.ip = in_aton(buf);
- } else
+ else
goto out_unlock;
}
@@ -568,7 +595,7 @@ static ssize_t remote_mac_store(struct config_item *item, const char *buf,
mutex_lock(&dynamic_netconsole_mutex);
if (nt->enabled) {
pr_err("target (%s) is enabled, disable to update parameters\n",
- config_item_name(&nt->item));
+ config_item_name(&nt->group.cg_item));
goto out_unlock;
}
@@ -585,6 +612,180 @@ out_unlock:
return -EINVAL;
}
+struct userdatum {
+ struct config_item item;
+ char value[MAX_USERDATA_VALUE_LENGTH];
+};
+
+static struct userdatum *to_userdatum(struct config_item *item)
+{
+ return container_of(item, struct userdatum, item);
+}
+
+struct userdata {
+ struct config_group group;
+};
+
+static struct userdata *to_userdata(struct config_item *item)
+{
+ return container_of(to_config_group(item), struct userdata, group);
+}
+
+static struct netconsole_target *userdata_to_target(struct userdata *ud)
+{
+ struct config_group *netconsole_group;
+
+ netconsole_group = to_config_group(ud->group.cg_item.ci_parent);
+ return to_target(&netconsole_group->cg_item);
+}
+
+static ssize_t userdatum_value_show(struct config_item *item, char *buf)
+{
+ return sysfs_emit(buf, "%s\n", &(to_userdatum(item)->value[0]));
+}
+
+static void update_userdata(struct netconsole_target *nt)
+{
+ int complete_idx = 0, child_count = 0;
+ struct list_head *entry;
+
+ /* Clear the current string in case the last userdatum was deleted */
+ nt->userdata_length = 0;
+ nt->userdata_complete[0] = 0;
+
+ list_for_each(entry, &nt->userdata_group.cg_children) {
+ struct userdatum *udm_item;
+ struct config_item *item;
+
+ if (child_count >= MAX_USERDATA_ITEMS)
+ break;
+ child_count++;
+
+ item = container_of(entry, struct config_item, ci_entry);
+ udm_item = to_userdatum(item);
+
+ /* Skip userdata with no value set */
+ if (strnlen(udm_item->value, MAX_USERDATA_VALUE_LENGTH) == 0)
+ continue;
+
+ /* This doesn't overflow userdata_complete since it will write
+ * one entry length (1/MAX_USERDATA_ITEMS long), entry count is
+ * checked to not exceed MAX items with child_count above
+ */
+ complete_idx += scnprintf(&nt->userdata_complete[complete_idx],
+ MAX_USERDATA_ENTRY_LENGTH, " %s=%s\n",
+ item->ci_name, udm_item->value);
+ }
+ nt->userdata_length = strnlen(nt->userdata_complete,
+ sizeof(nt->userdata_complete));
+}
+
+static ssize_t userdatum_value_store(struct config_item *item, const char *buf,
+ size_t count)
+{
+ struct userdatum *udm = to_userdatum(item);
+ struct netconsole_target *nt;
+ struct userdata *ud;
+ int ret;
+
+ if (count > MAX_USERDATA_VALUE_LENGTH)
+ return -EMSGSIZE;
+
+ mutex_lock(&dynamic_netconsole_mutex);
+
+ ret = strscpy(udm->value, buf, sizeof(udm->value));
+ if (ret < 0)
+ goto out_unlock;
+ trim_newline(udm->value, sizeof(udm->value));
+
+ ud = to_userdata(item->ci_parent);
+ nt = userdata_to_target(ud);
+ update_userdata(nt);
+
+ mutex_unlock(&dynamic_netconsole_mutex);
+ return count;
+out_unlock:
+ mutex_unlock(&dynamic_netconsole_mutex);
+ return ret;
+}
+
+CONFIGFS_ATTR(userdatum_, value);
+
+static struct configfs_attribute *userdatum_attrs[] = {
+ &userdatum_attr_value,
+ NULL,
+};
+
+static void userdatum_release(struct config_item *item)
+{
+ kfree(to_userdatum(item));
+}
+
+static struct configfs_item_operations userdatum_ops = {
+ .release = userdatum_release,
+};
+
+static const struct config_item_type userdatum_type = {
+ .ct_item_ops = &userdatum_ops,
+ .ct_attrs = userdatum_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_item *userdatum_make_item(struct config_group *group,
+ const char *name)
+{
+ struct netconsole_target *nt;
+ struct userdatum *udm;
+ struct userdata *ud;
+ size_t child_count;
+
+ if (strlen(name) > MAX_USERDATA_NAME_LENGTH)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ ud = to_userdata(&group->cg_item);
+ nt = userdata_to_target(ud);
+ child_count = list_count_nodes(&nt->userdata_group.cg_children);
+ if (child_count >= MAX_USERDATA_ITEMS)
+ return ERR_PTR(-ENOSPC);
+
+ udm = kzalloc(sizeof(*udm), GFP_KERNEL);
+ if (!udm)
+ return ERR_PTR(-ENOMEM);
+
+ config_item_init_type_name(&udm->item, name, &userdatum_type);
+ return &udm->item;
+}
+
+static void userdatum_drop(struct config_group *group, struct config_item *item)
+{
+ struct netconsole_target *nt;
+ struct userdata *ud;
+
+ ud = to_userdata(&group->cg_item);
+ nt = userdata_to_target(ud);
+
+ mutex_lock(&dynamic_netconsole_mutex);
+ update_userdata(nt);
+ config_item_put(item);
+ mutex_unlock(&dynamic_netconsole_mutex);
+}
+
+static struct configfs_attribute *userdata_attrs[] = {
+ NULL,
+};
+
+static struct configfs_group_operations userdata_ops = {
+ .make_item = userdatum_make_item,
+ .drop_item = userdatum_drop,
+};
+
+static struct config_item_type userdata_type = {
+ .ct_item_ops = &userdatum_ops,
+ .ct_group_ops = &userdata_ops,
+ .ct_attrs = userdata_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
CONFIGFS_ATTR(, enabled);
CONFIGFS_ATTR(, extended);
CONFIGFS_ATTR(, dev_name);
@@ -629,6 +830,15 @@ static const struct config_item_type netconsole_target_type = {
.ct_owner = THIS_MODULE,
};
+static void init_target_config_group(struct netconsole_target *nt,
+ const char *name)
+{
+ config_group_init_type_name(&nt->group, name, &netconsole_target_type);
+ config_group_init_type_name(&nt->userdata_group, "userdata",
+ &userdata_type);
+ configfs_add_default_group(&nt->userdata_group, &nt->group);
+}
+
static struct netconsole_target *find_cmdline_target(const char *name)
{
struct netconsole_target *nt, *ret = NULL;
@@ -636,7 +846,7 @@ static struct netconsole_target *find_cmdline_target(const char *name)
spin_lock_irqsave(&target_list_lock, flags);
list_for_each_entry(nt, &target_list, list) {
- if (!strcmp(nt->item.ci_name, name)) {
+ if (!strcmp(nt->group.cg_item.ci_name, name)) {
ret = nt;
break;
}
@@ -650,8 +860,8 @@ static struct netconsole_target *find_cmdline_target(const char *name)
* Group operations and type for netconsole_subsys.
*/
-static struct config_item *make_netconsole_target(struct config_group *group,
- const char *name)
+static struct config_group *make_netconsole_target(struct config_group *group,
+ const char *name)
{
struct netconsole_target *nt;
unsigned long flags;
@@ -663,23 +873,25 @@ static struct config_item *make_netconsole_target(struct config_group *group,
if (!strncmp(name, NETCONSOLE_PARAM_TARGET_PREFIX,
strlen(NETCONSOLE_PARAM_TARGET_PREFIX))) {
nt = find_cmdline_target(name);
- if (nt)
- return &nt->item;
+ if (nt) {
+ init_target_config_group(nt, name);
+ return &nt->group;
+ }
}
nt = alloc_and_init();
if (!nt)
return ERR_PTR(-ENOMEM);
- /* Initialize the config_item member */
- config_item_init_type_name(&nt->item, name, &netconsole_target_type);
+ /* Initialize the config_group member */
+ init_target_config_group(nt, name);
/* Adding, but it is disabled */
spin_lock_irqsave(&target_list_lock, flags);
list_add(&nt->list, &target_list);
spin_unlock_irqrestore(&target_list_lock, flags);
- return &nt->item;
+ return &nt->group;
}
static void drop_netconsole_target(struct config_group *group,
@@ -699,11 +911,11 @@ static void drop_netconsole_target(struct config_group *group,
if (nt->enabled)
netpoll_cleanup(&nt->np);
- config_item_put(&nt->item);
+ config_item_put(&nt->group.cg_item);
}
static struct configfs_group_operations netconsole_subsys_group_ops = {
- .make_item = make_netconsole_target,
+ .make_group = make_netconsole_target,
.drop_item = drop_netconsole_target,
};
@@ -729,8 +941,7 @@ static void populate_configfs_item(struct netconsole_target *nt,
snprintf(target_name, sizeof(target_name), "%s%d",
NETCONSOLE_PARAM_TARGET_PREFIX, cmdline_count);
- config_item_init_type_name(&nt->item, target_name,
- &netconsole_target_type);
+ init_target_config_group(nt, target_name);
}
#endif /* CONFIG_NETCONSOLE_DYNAMIC */
@@ -781,6 +992,7 @@ restart:
spin_unlock_irqrestore(&target_list_lock, flags);
if (stopped) {
const char *msg = "had an event";
+
switch (event) {
case NETDEV_UNREGISTER:
msg = "unregistered";
@@ -824,19 +1036,34 @@ static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg,
const char *msg_ready = msg;
const char *release;
int release_len = 0;
+ int userdata_len = 0;
+ char *userdata = NULL;
+
+#ifdef CONFIG_NETCONSOLE_DYNAMIC
+ userdata = nt->userdata_complete;
+ userdata_len = nt->userdata_length;
+#endif
if (nt->release) {
release = init_utsname()->release;
release_len = strlen(release) + 1;
}
- if (msg_len + release_len <= MAX_PRINT_CHUNK) {
+ if (msg_len + release_len + userdata_len <= MAX_PRINT_CHUNK) {
/* No fragmentation needed */
if (nt->release) {
scnprintf(buf, MAX_PRINT_CHUNK, "%s,%s", release, msg);
msg_len += release_len;
- msg_ready = buf;
+ } else {
+ memcpy(buf, msg, msg_len);
}
+
+ if (userdata)
+ msg_len += scnprintf(&buf[msg_len],
+ MAX_PRINT_CHUNK - msg_len,
+ "%s", userdata);
+
+ msg_ready = buf;
netpoll_send_udp(&nt->np, msg_ready, msg_len);
return;
}
@@ -860,24 +1087,48 @@ static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg,
memcpy(buf + release_len, header, header_len);
header_len += release_len;
- while (offset < body_len) {
+ while (offset < body_len + userdata_len) {
int this_header = header_len;
- int this_chunk;
+ int this_offset = 0;
+ int this_chunk = 0;
this_header += scnprintf(buf + this_header,
sizeof(buf) - this_header,
- ",ncfrag=%d/%d;", offset, body_len);
-
- this_chunk = min(body_len - offset,
- MAX_PRINT_CHUNK - this_header);
- if (WARN_ON_ONCE(this_chunk <= 0))
- return;
-
- memcpy(buf + this_header, body + offset, this_chunk);
-
- netpoll_send_udp(&nt->np, buf, this_header + this_chunk);
+ ",ncfrag=%d/%d;", offset,
+ body_len + userdata_len);
+
+ /* Not all body data has been written yet */
+ if (offset < body_len) {
+ this_chunk = min(body_len - offset,
+ MAX_PRINT_CHUNK - this_header);
+ if (WARN_ON_ONCE(this_chunk <= 0))
+ return;
+ memcpy(buf + this_header, body + offset, this_chunk);
+ this_offset += this_chunk;
+ }
+ /* Body is fully written and there is pending userdata to write,
+ * append userdata in this chunk
+ */
+ if (offset + this_offset >= body_len &&
+ offset + this_offset < userdata_len + body_len) {
+ int sent_userdata = (offset + this_offset) - body_len;
+ int preceding_bytes = this_chunk + this_header;
+
+ if (WARN_ON_ONCE(sent_userdata < 0))
+ return;
+
+ this_chunk = min(userdata_len - sent_userdata,
+ MAX_PRINT_CHUNK - preceding_bytes);
+ if (WARN_ON_ONCE(this_chunk <= 0))
+ return;
+ memcpy(buf + this_header + this_offset,
+ userdata + sent_userdata,
+ this_chunk);
+ this_offset += this_chunk;
+ }
- offset += this_chunk;
+ netpoll_send_udp(&nt->np, buf, this_header + this_offset);
+ offset += this_offset;
}
}
diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index bcbc1e19edde..64c0cdd31bf8 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -129,7 +129,7 @@ static void nsim_bus_dev_release(struct device *dev)
complete(&nsim_bus_devs_released);
}
-static struct device_type nsim_bus_dev_type = {
+static const struct device_type nsim_bus_dev_type = {
.groups = nsim_bus_dev_attr_groups,
.release = nsim_bus_dev_release,
};
@@ -232,9 +232,154 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count)
}
static BUS_ATTR_WO(del_device);
+static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+ struct netdevsim *nsim_a, *nsim_b, *peer;
+ struct net_device *dev_a, *dev_b;
+ unsigned int ifidx_a, ifidx_b;
+ int netnsfd_a, netnsfd_b, err;
+ struct net *ns_a, *ns_b;
+
+ err = sscanf(buf, "%d:%u %d:%u", &netnsfd_a, &ifidx_a, &netnsfd_b,
+ &ifidx_b);
+ if (err != 4) {
+ pr_err("Format for linking two devices is \"netnsfd_a:ifidx_a netnsfd_b:ifidx_b\" (int uint int uint).\n");
+ return -EINVAL;
+ }
+
+ ns_a = get_net_ns_by_fd(netnsfd_a);
+ if (IS_ERR(ns_a)) {
+ pr_err("Could not find netns with fd: %d\n", netnsfd_a);
+ return -EINVAL;
+ }
+
+ ns_b = get_net_ns_by_fd(netnsfd_b);
+ if (IS_ERR(ns_b)) {
+ pr_err("Could not find netns with fd: %d\n", netnsfd_b);
+ put_net(ns_a);
+ return -EINVAL;
+ }
+
+ err = -EINVAL;
+ rtnl_lock();
+ dev_a = __dev_get_by_index(ns_a, ifidx_a);
+ if (!dev_a) {
+ pr_err("Could not find device with ifindex %u in netnsfd %d\n",
+ ifidx_a, netnsfd_a);
+ goto out_err;
+ }
+
+ if (!netdev_is_nsim(dev_a)) {
+ pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
+ ifidx_a, netnsfd_a);
+ goto out_err;
+ }
+
+ dev_b = __dev_get_by_index(ns_b, ifidx_b);
+ if (!dev_b) {
+ pr_err("Could not find device with ifindex %u in netnsfd %d\n",
+ ifidx_b, netnsfd_b);
+ goto out_err;
+ }
+
+ if (!netdev_is_nsim(dev_b)) {
+ pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
+ ifidx_b, netnsfd_b);
+ goto out_err;
+ }
+
+ if (dev_a == dev_b) {
+ pr_err("Cannot link a netdevsim to itself\n");
+ goto out_err;
+ }
+
+ err = -EBUSY;
+ nsim_a = netdev_priv(dev_a);
+ peer = rtnl_dereference(nsim_a->peer);
+ if (peer) {
+ pr_err("Netdevsim %d:%u is already linked\n", netnsfd_a,
+ ifidx_a);
+ goto out_err;
+ }
+
+ nsim_b = netdev_priv(dev_b);
+ peer = rtnl_dereference(nsim_b->peer);
+ if (peer) {
+ pr_err("Netdevsim %d:%u is already linked\n", netnsfd_b,
+ ifidx_b);
+ goto out_err;
+ }
+
+ err = 0;
+ rcu_assign_pointer(nsim_a->peer, nsim_b);
+ rcu_assign_pointer(nsim_b->peer, nsim_a);
+
+out_err:
+ put_net(ns_b);
+ put_net(ns_a);
+ rtnl_unlock();
+
+ return !err ? count : err;
+}
+static BUS_ATTR_WO(link_device);
+
+static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+ struct netdevsim *nsim, *peer;
+ struct net_device *dev;
+ unsigned int ifidx;
+ int netnsfd, err;
+ struct net *ns;
+
+ err = sscanf(buf, "%u:%u", &netnsfd, &ifidx);
+ if (err != 2) {
+ pr_err("Format for unlinking a device is \"netnsfd:ifidx\" (int uint).\n");
+ return -EINVAL;
+ }
+
+ ns = get_net_ns_by_fd(netnsfd);
+ if (IS_ERR(ns)) {
+ pr_err("Could not find netns with fd: %d\n", netnsfd);
+ return -EINVAL;
+ }
+
+ err = -EINVAL;
+ rtnl_lock();
+ dev = __dev_get_by_index(ns, ifidx);
+ if (!dev) {
+ pr_err("Could not find device with ifindex %u in netnsfd %d\n",
+ ifidx, netnsfd);
+ goto out_put_netns;
+ }
+
+ if (!netdev_is_nsim(dev)) {
+ pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
+ ifidx, netnsfd);
+ goto out_put_netns;
+ }
+
+ nsim = netdev_priv(dev);
+ peer = rtnl_dereference(nsim->peer);
+ if (!peer)
+ goto out_put_netns;
+
+ err = 0;
+ RCU_INIT_POINTER(nsim->peer, NULL);
+ RCU_INIT_POINTER(peer->peer, NULL);
+
+out_put_netns:
+ put_net(ns);
+ rtnl_unlock();
+
+ return !err ? count : err;
+}
+static BUS_ATTR_WO(unlink_device);
+
static struct attribute *nsim_bus_attrs[] = {
&bus_attr_new_device.attr,
&bus_attr_del_device.attr,
+ &bus_attr_link_device.attr,
+ &bus_attr_unlink_device.attr,
NULL
};
ATTRIBUTE_GROUPS(nsim_bus);
@@ -260,7 +405,7 @@ static int nsim_num_vf(struct device *dev)
return nsim_bus_dev->num_vfs;
}
-static struct bus_type nsim_bus = {
+static const struct bus_type nsim_bus = {
.name = DRV_NAME,
.dev_name = DRV_NAME,
.bus_groups = nsim_bus_groups,
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index b4d3b9cde8bd..92a7a36b93ac 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -835,14 +835,14 @@ static void nsim_dev_trap_report_work(struct work_struct *work)
trap_report_dw.work);
nsim_dev = nsim_trap_data->nsim_dev;
- /* For each running port and enabled packet trap, generate a UDP
- * packet with a random 5-tuple and report it.
- */
if (!devl_trylock(priv_to_devlink(nsim_dev))) {
- schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 0);
+ schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 1);
return;
}
+ /* For each running port and enabled packet trap, generate a UDP
+ * packet with a random 5-tuple and report it.
+ */
list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) {
if (!netif_running(nsim_dev_port->ns->netdev))
continue;
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 77e8250282a5..8330bc0bcb7e 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -29,18 +29,35 @@
static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct netdevsim *ns = netdev_priv(dev);
+ unsigned int len = skb->len;
+ struct netdevsim *peer_ns;
+ rcu_read_lock();
if (!nsim_ipsec_tx(ns, skb))
- goto out;
+ goto out_drop_free;
+ peer_ns = rcu_dereference(ns->peer);
+ if (!peer_ns)
+ goto out_drop_free;
+
+ skb_tx_timestamp(skb);
+ if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
+ goto out_drop_cnt;
+
+ rcu_read_unlock();
u64_stats_update_begin(&ns->syncp);
ns->tx_packets++;
- ns->tx_bytes += skb->len;
+ ns->tx_bytes += len;
u64_stats_update_end(&ns->syncp);
+ return NETDEV_TX_OK;
-out:
+out_drop_free:
dev_kfree_skb(skb);
-
+out_drop_cnt:
+ rcu_read_unlock();
+ u64_stats_update_begin(&ns->syncp);
+ ns->tx_dropped++;
+ u64_stats_update_end(&ns->syncp);
return NETDEV_TX_OK;
}
@@ -70,6 +87,7 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
start = u64_stats_fetch_begin(&ns->syncp);
stats->tx_bytes = ns->tx_bytes;
stats->tx_packets = ns->tx_packets;
+ stats->tx_dropped = ns->tx_dropped;
} while (u64_stats_fetch_retry(&ns->syncp, start));
}
@@ -265,6 +283,21 @@ nsim_set_features(struct net_device *dev, netdev_features_t features)
return 0;
}
+static int nsim_get_iflink(const struct net_device *dev)
+{
+ struct netdevsim *nsim, *peer;
+ int iflink;
+
+ nsim = netdev_priv(dev);
+
+ rcu_read_lock();
+ peer = rcu_dereference(nsim->peer);
+ iflink = peer ? READ_ONCE(peer->netdev->ifindex) : 0;
+ rcu_read_unlock();
+
+ return iflink;
+}
+
static const struct net_device_ops nsim_netdev_ops = {
.ndo_start_xmit = nsim_start_xmit,
.ndo_set_rx_mode = nsim_set_rx_mode,
@@ -282,6 +315,7 @@ static const struct net_device_ops nsim_netdev_ops = {
.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
.ndo_setup_tc = nsim_setup_tc,
.ndo_set_features = nsim_set_features,
+ .ndo_get_iflink = nsim_get_iflink,
.ndo_bpf = nsim_bpf,
};
@@ -302,7 +336,6 @@ static void nsim_setup(struct net_device *dev)
eth_hw_addr_random(dev);
dev->tx_queue_len = 0;
- dev->flags |= IFF_NOARP;
dev->flags &= ~IFF_MULTICAST;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
IFF_NO_QUEUE;
@@ -413,8 +446,13 @@ err_free_netdev:
void nsim_destroy(struct netdevsim *ns)
{
struct net_device *dev = ns->netdev;
+ struct netdevsim *peer;
rtnl_lock();
+ peer = rtnl_dereference(ns->peer);
+ if (peer)
+ RCU_INIT_POINTER(peer->peer, NULL);
+ RCU_INIT_POINTER(ns->peer, NULL);
unregister_netdevice(dev);
if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
nsim_macsec_teardown(ns);
@@ -427,6 +465,11 @@ void nsim_destroy(struct netdevsim *ns)
free_netdev(dev);
}
+bool netdev_is_nsim(struct net_device *dev)
+{
+ return dev->netdev_ops == &nsim_netdev_ops;
+}
+
static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 028c825b86db..553c4b9b4f63 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -98,6 +98,7 @@ struct netdevsim {
u64 tx_packets;
u64 tx_bytes;
+ u64 tx_dropped;
struct u64_stats_sync syncp;
struct nsim_bus_dev *nsim_bus_dev;
@@ -125,11 +126,13 @@ struct netdevsim {
} udp_ports;
struct nsim_ethtool ethtool;
+ struct netdevsim __rcu *peer;
};
struct netdevsim *
nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port);
void nsim_destroy(struct netdevsim *ns);
+bool netdev_is_nsim(struct net_device *dev);
void nsim_ethtool_init(struct netdevsim *ns);
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index 39171380ccf2..a4d2e76a8d58 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -145,7 +145,7 @@ static int netkit_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(nk->peer);
if (peer)
- iflink = peer->ifindex;
+ iflink = READ_ONCE(peer->ifindex);
rcu_read_unlock();
return iflink;
}
diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c
index 5e19a6839dea..e5a0987a263e 100644
--- a/drivers/net/nlmon.c
+++ b/drivers/net/nlmon.c
@@ -17,17 +17,6 @@ static netdev_tx_t nlmon_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-static int nlmon_dev_init(struct net_device *dev)
-{
- dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
- return dev->lstats == NULL ? -ENOMEM : 0;
-}
-
-static void nlmon_dev_uninit(struct net_device *dev)
-{
- free_percpu(dev->lstats);
-}
-
struct nlmon {
struct netlink_tap nt;
};
@@ -51,15 +40,7 @@ static int nlmon_close(struct net_device *dev)
static void
nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
- u64 packets, bytes;
-
- dev_lstats_read(dev, &packets, &bytes);
-
- stats->rx_packets = packets;
- stats->tx_packets = 0;
-
- stats->rx_bytes = bytes;
- stats->tx_bytes = 0;
+ dev_lstats_read(dev, &stats->rx_packets, &stats->rx_bytes);
}
static u32 always_on(struct net_device *dev)
@@ -72,8 +53,6 @@ static const struct ethtool_ops nlmon_ethtool_ops = {
};
static const struct net_device_ops nlmon_ops = {
- .ndo_init = nlmon_dev_init,
- .ndo_uninit = nlmon_dev_uninit,
.ndo_open = nlmon_open,
.ndo_stop = nlmon_close,
.ndo_start_xmit = nlmon_xmit,
@@ -92,6 +71,7 @@ static void nlmon_setup(struct net_device *dev)
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
NETIF_F_HIGHDMA | NETIF_F_LLTX;
dev->flags = IFF_NOARP;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
/* That's rather a softlimit here, which, of course,
* can be altered. Not a real MTU, but what is to be
diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index dc3962b2aa6b..853b8c138718 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -398,4 +398,5 @@ void lynx_pcs_destroy(struct phylink_pcs *pcs)
}
EXPORT_SYMBOL(lynx_pcs_destroy);
+MODULE_DESCRIPTION("NXP Lynx PCS phylink library");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/pcs/pcs-mtk-lynxi.c b/drivers/net/pcs/pcs-mtk-lynxi.c
index 8501dd365279..4f63abe638c4 100644
--- a/drivers/net/pcs/pcs-mtk-lynxi.c
+++ b/drivers/net/pcs/pcs-mtk-lynxi.c
@@ -303,4 +303,5 @@ void mtk_pcs_lynxi_destroy(struct phylink_pcs *pcs)
}
EXPORT_SYMBOL(mtk_pcs_lynxi_destroy);
+MODULE_DESCRIPTION("MediaTek SGMII library for LynxI");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c
index d93f84fbb1fd..4bd66fdde367 100644
--- a/drivers/net/pcs/pcs-rzn1-miic.c
+++ b/drivers/net/pcs/pcs-rzn1-miic.c
@@ -183,7 +183,7 @@ static void miic_converter_enable(struct miic *miic, int port, int enable)
miic_reg_rmw(miic, MIIC_CONVRST, MIIC_CONVRST_PHYIF_RST(port), val);
}
-static int miic_config(struct phylink_pcs *pcs, unsigned int mode,
+static int miic_config(struct phylink_pcs *pcs, unsigned int neg_mode,
phy_interface_t interface,
const unsigned long *advertising, bool permit)
{
@@ -234,7 +234,7 @@ static int miic_config(struct phylink_pcs *pcs, unsigned int mode,
return 0;
}
-static void miic_link_up(struct phylink_pcs *pcs, unsigned int mode,
+static void miic_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
phy_interface_t interface, int speed, int duplex)
{
struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs);
@@ -333,6 +333,7 @@ struct phylink_pcs *miic_create(struct device *dev, struct device_node *np)
miic_port->miic = miic;
miic_port->port = port - 1;
miic_port->pcs.ops = &miic_phylink_ops;
+ miic_port->pcs.neg_mode = true;
return &miic_port->pcs;
}
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 31f0beba638a..31525fe9c32e 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -10,7 +10,7 @@
#include <linux/pcs/pcs-xpcs.h>
#include <linux/mdio.h>
#include <linux/phylink.h>
-#include <linux/workqueue.h>
+
#include "pcs-xpcs.h"
#define phylink_pcs_to_xpcs(pl_pcs) \
@@ -130,7 +130,6 @@ static const phy_interface_t xpcs_1000basex_interfaces[] = {
static const phy_interface_t xpcs_2500basex_interfaces[] = {
PHY_INTERFACE_MODE_2500BASEX,
- PHY_INTERFACE_MODE_MAX,
};
enum {
@@ -293,7 +292,7 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs,
dev = MDIO_MMD_VEND2;
break;
default:
- return -1;
+ return -EINVAL;
}
ret = xpcs_write(xpcs, dev, MDIO_CTRL1, MDIO_CTRL1_RESET);
@@ -614,14 +613,15 @@ static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
xpcs = phylink_pcs_to_xpcs(pcs);
compat = xpcs_find_compat(xpcs->id, state->interface);
+ if (!compat)
+ return -EINVAL;
/* Populate the supported link modes for this PHY interface type.
* FIXME: what about the port modes and autoneg bit? This masks
* all those away.
*/
- if (compat)
- for (i = 0; compat->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
- set_bit(compat->supported[i], xpcs_supported);
+ for (i = 0; compat->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
+ set_bit(compat->supported[i], xpcs_supported);
linkmode_and(supported, supported, xpcs_supported);
@@ -636,8 +636,7 @@ void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
const struct xpcs_compat *compat = &xpcs->id->compat[i];
for (j = 0; j < compat->num_interfaces; j++)
- if (compat->interface[j] < PHY_INTERFACE_MODE_MAX)
- __set_bit(compat->interface[j], interfaces);
+ __set_bit(compat->interface[j], interfaces);
}
}
EXPORT_SYMBOL_GPL(xpcs_get_interfaces);
@@ -891,7 +890,7 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
return ret;
break;
default:
- return -1;
+ return -EINVAL;
}
if (compat->pma_config) {
@@ -1456,4 +1455,5 @@ struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
}
EXPORT_SYMBOL_GPL(xpcs_create_mdiodev);
+MODULE_DESCRIPTION("Synopsys DesignWare XPCS library");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 9e2672800f0b..1df0595c5ba9 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -232,6 +232,7 @@ config MARVELL_10G_PHY
config MARVELL_88Q2XXX_PHY
tristate "Marvell 88Q2XXX PHY"
+ depends on HWMON || HWMON=n
help
Support for the Marvell 88Q2XXX 100/1000BASE-T1 Automotive Ethernet
PHYs.
@@ -335,12 +336,7 @@ config NCN26000_PHY
Currently supports the NCN26000 10BASE-T1S Industrial PHY
with MII interface.
-config AT803X_PHY
- tristate "Qualcomm Atheros AR803X PHYs and QCA833x PHYs"
- depends on REGULATOR
- help
- Currently supports the AR8030, AR8031, AR8033, AR8035 and internal
- QCA8337(Internal qca8k PHY) model
+source "drivers/net/phy/qcom/Kconfig"
config QSEMI_PHY
tristate "Quality Semiconductor PHYs"
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 6097afd44392..197acfa0b412 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -36,7 +36,6 @@ obj-$(CONFIG_ADIN_PHY) += adin.o
obj-$(CONFIG_ADIN1100_PHY) += adin1100.o
obj-$(CONFIG_AMD_PHY) += amd.o
obj-$(CONFIG_AQUANTIA_PHY) += aquantia/
-obj-$(CONFIG_AT803X_PHY) += at803x.o
ifdef CONFIG_AX88796B_RUST_PHY
obj-$(CONFIG_AX88796B_PHY) += ax88796b_rust.o
else
@@ -91,6 +90,7 @@ endif
obj-$(CONFIG_NXP_C45_TJA11XX_PHY) += nxp-c45-tja.o
obj-$(CONFIG_NXP_CBTX_PHY) += nxp-cbtx.o
obj-$(CONFIG_NXP_TJA11XX_PHY) += nxp-tja11xx.o
+obj-y += qcom/
obj-$(CONFIG_QSEMI_PHY) += qsemi.o
obj-$(CONFIG_REALTEK_PHY) += realtek.o
obj-$(CONFIG_RENESAS_PHY) += uPD60620.o
diff --git a/drivers/net/phy/adin1100.c b/drivers/net/phy/adin1100.c
index 7619d6185801..85f910e2d4fb 100644
--- a/drivers/net/phy/adin1100.c
+++ b/drivers/net/phy/adin1100.c
@@ -18,6 +18,12 @@
#define PHY_ID_ADIN1110 0x0283bc91
#define PHY_ID_ADIN2111 0x0283bca1
+#define ADIN_PHY_SUBSYS_IRQ_MASK 0x0021
+#define ADIN_LINK_STAT_CHNG_IRQ_EN BIT(1)
+
+#define ADIN_PHY_SUBSYS_IRQ_STATUS 0x0011
+#define ADIN_LINK_STAT_CHNG BIT(1)
+
#define ADIN_FORCED_MODE 0x8000
#define ADIN_FORCED_MODE_EN BIT(0)
@@ -136,6 +142,53 @@ static int adin_config_aneg(struct phy_device *phydev)
return genphy_c45_config_aneg(phydev);
}
+static int adin_phy_ack_intr(struct phy_device *phydev)
+{
+ /* Clear pending interrupts */
+ int rc = phy_read_mmd(phydev, MDIO_MMD_VEND2,
+ ADIN_PHY_SUBSYS_IRQ_STATUS);
+
+ return rc < 0 ? rc : 0;
+}
+
+static int adin_config_intr(struct phy_device *phydev)
+{
+ u16 irq_mask;
+ int ret;
+
+ ret = adin_phy_ack_intr(phydev);
+ if (ret)
+ return ret;
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+ irq_mask = ADIN_LINK_STAT_CHNG_IRQ_EN;
+ else
+ irq_mask = 0;
+
+ return phy_modify_mmd(phydev, MDIO_MMD_VEND2,
+ ADIN_PHY_SUBSYS_IRQ_MASK,
+ ADIN_LINK_STAT_CHNG_IRQ_EN, irq_mask);
+}
+
+static irqreturn_t adin_phy_handle_interrupt(struct phy_device *phydev)
+{
+ int irq_status;
+
+ irq_status = phy_read_mmd(phydev, MDIO_MMD_VEND2,
+ ADIN_PHY_SUBSYS_IRQ_STATUS);
+ if (irq_status < 0) {
+ phy_error(phydev);
+ return IRQ_NONE;
+ }
+
+ if (!(irq_status & ADIN_LINK_STAT_CHNG))
+ return IRQ_NONE;
+
+ phy_trigger_machine(phydev);
+
+ return IRQ_HANDLED;
+}
+
static int adin_set_powerdown_mode(struct phy_device *phydev, bool en)
{
int ret;
@@ -275,6 +328,8 @@ static struct phy_driver adin_driver[] = {
.probe = adin_probe,
.config_aneg = adin_config_aneg,
.read_status = adin_read_status,
+ .config_intr = adin_config_intr,
+ .handle_interrupt = adin_phy_handle_interrupt,
.set_loopback = adin_set_loopback,
.suspend = adin_suspend,
.resume = adin_resume,
diff --git a/drivers/net/phy/aquantia/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c
index 97a2fafa15ca..71bfddb8f453 100644
--- a/drivers/net/phy/aquantia/aquantia_main.c
+++ b/drivers/net/phy/aquantia/aquantia_main.c
@@ -22,9 +22,13 @@
#define PHY_ID_AQR107 0x03a1b4e0
#define PHY_ID_AQCS109 0x03a1b5c2
#define PHY_ID_AQR405 0x03a1b4b0
+#define PHY_ID_AQR111 0x03a1b610
+#define PHY_ID_AQR111B0 0x03a1b612
#define PHY_ID_AQR112 0x03a1b662
#define PHY_ID_AQR412 0x03a1b712
+#define PHY_ID_AQR113 0x31c31c40
#define PHY_ID_AQR113C 0x31c31c12
+#define PHY_ID_AQR813 0x31c31cb2
#define MDIO_PHYXS_VEND_IF_STATUS 0xe812
#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK GENMASK(7, 3)
@@ -727,6 +731,15 @@ static int aqr113c_config_init(struct phy_device *phydev)
if (ret < 0)
return ret;
+ ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_TXDIS,
+ MDIO_PMD_TXDIS_GLOBAL);
+ if (ret)
+ return ret;
+
+ ret = aqr107_wait_processor_intensive_op(phydev);
+ if (ret)
+ return ret;
+
return aqr107_fill_interface_modes(phydev);
}
@@ -746,6 +759,16 @@ static int aqr107_probe(struct phy_device *phydev)
return aqr_hwmon_probe(phydev);
}
+static int aqr111_config_init(struct phy_device *phydev)
+{
+ /* AQR111 reports supporting speed up to 10G,
+ * however only speeds up to 5G are supported.
+ */
+ phy_set_max_speed(phydev, SPEED_5000);
+
+ return aqr107_config_init(phydev);
+}
+
static struct phy_driver aqr_driver[] = {
{
PHY_ID_MATCH_MODEL(PHY_ID_AQ1202),
@@ -820,6 +843,44 @@ static struct phy_driver aqr_driver[] = {
.link_change_notify = aqr107_link_change_notify,
},
{
+ PHY_ID_MATCH_MODEL(PHY_ID_AQR111),
+ .name = "Aquantia AQR111",
+ .probe = aqr107_probe,
+ .get_rate_matching = aqr107_get_rate_matching,
+ .config_init = aqr111_config_init,
+ .config_aneg = aqr_config_aneg,
+ .config_intr = aqr_config_intr,
+ .handle_interrupt = aqr_handle_interrupt,
+ .read_status = aqr107_read_status,
+ .get_tunable = aqr107_get_tunable,
+ .set_tunable = aqr107_set_tunable,
+ .suspend = aqr107_suspend,
+ .resume = aqr107_resume,
+ .get_sset_count = aqr107_get_sset_count,
+ .get_strings = aqr107_get_strings,
+ .get_stats = aqr107_get_stats,
+ .link_change_notify = aqr107_link_change_notify,
+},
+{
+ PHY_ID_MATCH_MODEL(PHY_ID_AQR111B0),
+ .name = "Aquantia AQR111B0",
+ .probe = aqr107_probe,
+ .get_rate_matching = aqr107_get_rate_matching,
+ .config_init = aqr111_config_init,
+ .config_aneg = aqr_config_aneg,
+ .config_intr = aqr_config_intr,
+ .handle_interrupt = aqr_handle_interrupt,
+ .read_status = aqr107_read_status,
+ .get_tunable = aqr107_get_tunable,
+ .set_tunable = aqr107_set_tunable,
+ .suspend = aqr107_suspend,
+ .resume = aqr107_resume,
+ .get_sset_count = aqr107_get_sset_count,
+ .get_strings = aqr107_get_strings,
+ .get_stats = aqr107_get_stats,
+ .link_change_notify = aqr107_link_change_notify,
+},
+{
PHY_ID_MATCH_MODEL(PHY_ID_AQR405),
.name = "Aquantia AQR405",
.config_aneg = aqr_config_aneg,
@@ -864,6 +925,25 @@ static struct phy_driver aqr_driver[] = {
.link_change_notify = aqr107_link_change_notify,
},
{
+ PHY_ID_MATCH_MODEL(PHY_ID_AQR113),
+ .name = "Aquantia AQR113",
+ .probe = aqr107_probe,
+ .get_rate_matching = aqr107_get_rate_matching,
+ .config_init = aqr113c_config_init,
+ .config_aneg = aqr_config_aneg,
+ .config_intr = aqr_config_intr,
+ .handle_interrupt = aqr_handle_interrupt,
+ .read_status = aqr107_read_status,
+ .get_tunable = aqr107_get_tunable,
+ .set_tunable = aqr107_set_tunable,
+ .suspend = aqr107_suspend,
+ .resume = aqr107_resume,
+ .get_sset_count = aqr107_get_sset_count,
+ .get_strings = aqr107_get_strings,
+ .get_stats = aqr107_get_stats,
+ .link_change_notify = aqr107_link_change_notify,
+},
+{
PHY_ID_MATCH_MODEL(PHY_ID_AQR113C),
.name = "Aquantia AQR113C",
.probe = aqr107_probe,
@@ -882,6 +962,25 @@ static struct phy_driver aqr_driver[] = {
.get_stats = aqr107_get_stats,
.link_change_notify = aqr107_link_change_notify,
},
+{
+ PHY_ID_MATCH_MODEL(PHY_ID_AQR813),
+ .name = "Aquantia AQR813",
+ .probe = aqr107_probe,
+ .get_rate_matching = aqr107_get_rate_matching,
+ .config_init = aqr107_config_init,
+ .config_aneg = aqr_config_aneg,
+ .config_intr = aqr_config_intr,
+ .handle_interrupt = aqr_handle_interrupt,
+ .read_status = aqr107_read_status,
+ .get_tunable = aqr107_get_tunable,
+ .set_tunable = aqr107_set_tunable,
+ .suspend = aqr107_suspend,
+ .resume = aqr107_resume,
+ .get_sset_count = aqr107_get_sset_count,
+ .get_strings = aqr107_get_strings,
+ .get_stats = aqr107_get_stats,
+ .link_change_notify = aqr107_link_change_notify,
+},
};
module_phy_driver(aqr_driver);
@@ -894,9 +993,13 @@ static struct mdio_device_id __maybe_unused aqr_tbl[] = {
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR107) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQCS109) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR405) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_AQR111) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_AQR111B0) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR112) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR412) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_AQR113) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR113C) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_AQR813) },
{ }
};
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
deleted file mode 100644
index a62442a55774..000000000000
--- a/drivers/net/phy/at803x.c
+++ /dev/null
@@ -1,2432 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * drivers/net/phy/at803x.c
- *
- * Driver for Qualcomm Atheros AR803x PHY
- *
- * Author: Matus Ujhelyi <ujhelyi.m@gmail.com>
- */
-
-#include <linux/phy.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool_netlink.h>
-#include <linux/bitfield.h>
-#include <linux/regulator/of_regulator.h>
-#include <linux/regulator/driver.h>
-#include <linux/regulator/consumer.h>
-#include <linux/of.h>
-#include <linux/phylink.h>
-#include <linux/sfp.h>
-#include <dt-bindings/net/qca-ar803x.h>
-
-#define AT803X_SPECIFIC_FUNCTION_CONTROL 0x10
-#define AT803X_SFC_ASSERT_CRS BIT(11)
-#define AT803X_SFC_FORCE_LINK BIT(10)
-#define AT803X_SFC_MDI_CROSSOVER_MODE_M GENMASK(6, 5)
-#define AT803X_SFC_AUTOMATIC_CROSSOVER 0x3
-#define AT803X_SFC_MANUAL_MDIX 0x1
-#define AT803X_SFC_MANUAL_MDI 0x0
-#define AT803X_SFC_SQE_TEST BIT(2)
-#define AT803X_SFC_POLARITY_REVERSAL BIT(1)
-#define AT803X_SFC_DISABLE_JABBER BIT(0)
-
-#define AT803X_SPECIFIC_STATUS 0x11
-#define AT803X_SS_SPEED_MASK GENMASK(15, 14)
-#define AT803X_SS_SPEED_1000 2
-#define AT803X_SS_SPEED_100 1
-#define AT803X_SS_SPEED_10 0
-#define AT803X_SS_DUPLEX BIT(13)
-#define AT803X_SS_SPEED_DUPLEX_RESOLVED BIT(11)
-#define AT803X_SS_MDIX BIT(6)
-
-#define QCA808X_SS_SPEED_MASK GENMASK(9, 7)
-#define QCA808X_SS_SPEED_2500 4
-
-#define AT803X_INTR_ENABLE 0x12
-#define AT803X_INTR_ENABLE_AUTONEG_ERR BIT(15)
-#define AT803X_INTR_ENABLE_SPEED_CHANGED BIT(14)
-#define AT803X_INTR_ENABLE_DUPLEX_CHANGED BIT(13)
-#define AT803X_INTR_ENABLE_PAGE_RECEIVED BIT(12)
-#define AT803X_INTR_ENABLE_LINK_FAIL BIT(11)
-#define AT803X_INTR_ENABLE_LINK_SUCCESS BIT(10)
-#define AT803X_INTR_ENABLE_LINK_FAIL_BX BIT(8)
-#define AT803X_INTR_ENABLE_LINK_SUCCESS_BX BIT(7)
-#define AT803X_INTR_ENABLE_WIRESPEED_DOWNGRADE BIT(5)
-#define AT803X_INTR_ENABLE_POLARITY_CHANGED BIT(1)
-#define AT803X_INTR_ENABLE_WOL BIT(0)
-
-#define AT803X_INTR_STATUS 0x13
-
-#define AT803X_SMART_SPEED 0x14
-#define AT803X_SMART_SPEED_ENABLE BIT(5)
-#define AT803X_SMART_SPEED_RETRY_LIMIT_MASK GENMASK(4, 2)
-#define AT803X_SMART_SPEED_BYPASS_TIMER BIT(1)
-#define AT803X_CDT 0x16
-#define AT803X_CDT_MDI_PAIR_MASK GENMASK(9, 8)
-#define AT803X_CDT_ENABLE_TEST BIT(0)
-#define AT803X_CDT_STATUS 0x1c
-#define AT803X_CDT_STATUS_STAT_NORMAL 0
-#define AT803X_CDT_STATUS_STAT_SHORT 1
-#define AT803X_CDT_STATUS_STAT_OPEN 2
-#define AT803X_CDT_STATUS_STAT_FAIL 3
-#define AT803X_CDT_STATUS_STAT_MASK GENMASK(9, 8)
-#define AT803X_CDT_STATUS_DELTA_TIME_MASK GENMASK(7, 0)
-#define AT803X_LED_CONTROL 0x18
-
-#define AT803X_PHY_MMD3_WOL_CTRL 0x8012
-#define AT803X_WOL_EN BIT(5)
-#define AT803X_LOC_MAC_ADDR_0_15_OFFSET 0x804C
-#define AT803X_LOC_MAC_ADDR_16_31_OFFSET 0x804B
-#define AT803X_LOC_MAC_ADDR_32_47_OFFSET 0x804A
-#define AT803X_REG_CHIP_CONFIG 0x1f
-#define AT803X_BT_BX_REG_SEL 0x8000
-
-#define AT803X_DEBUG_ADDR 0x1D
-#define AT803X_DEBUG_DATA 0x1E
-
-#define AT803X_MODE_CFG_MASK 0x0F
-#define AT803X_MODE_CFG_BASET_RGMII 0x00
-#define AT803X_MODE_CFG_BASET_SGMII 0x01
-#define AT803X_MODE_CFG_BX1000_RGMII_50OHM 0x02
-#define AT803X_MODE_CFG_BX1000_RGMII_75OHM 0x03
-#define AT803X_MODE_CFG_BX1000_CONV_50OHM 0x04
-#define AT803X_MODE_CFG_BX1000_CONV_75OHM 0x05
-#define AT803X_MODE_CFG_FX100_RGMII_50OHM 0x06
-#define AT803X_MODE_CFG_FX100_CONV_50OHM 0x07
-#define AT803X_MODE_CFG_RGMII_AUTO_MDET 0x0B
-#define AT803X_MODE_CFG_FX100_RGMII_75OHM 0x0E
-#define AT803X_MODE_CFG_FX100_CONV_75OHM 0x0F
-
-#define AT803X_PSSR 0x11 /*PHY-Specific Status Register*/
-#define AT803X_PSSR_MR_AN_COMPLETE 0x0200
-
-#define AT803X_DEBUG_ANALOG_TEST_CTRL 0x00
-#define QCA8327_DEBUG_MANU_CTRL_EN BIT(2)
-#define QCA8337_DEBUG_MANU_CTRL_EN GENMASK(3, 2)
-#define AT803X_DEBUG_RX_CLK_DLY_EN BIT(15)
-
-#define AT803X_DEBUG_SYSTEM_CTRL_MODE 0x05
-#define AT803X_DEBUG_TX_CLK_DLY_EN BIT(8)
-
-#define AT803X_DEBUG_REG_HIB_CTRL 0x0b
-#define AT803X_DEBUG_HIB_CTRL_SEL_RST_80U BIT(10)
-#define AT803X_DEBUG_HIB_CTRL_EN_ANY_CHANGE BIT(13)
-#define AT803X_DEBUG_HIB_CTRL_PS_HIB_EN BIT(15)
-
-#define AT803X_DEBUG_REG_3C 0x3C
-
-#define AT803X_DEBUG_REG_GREEN 0x3D
-#define AT803X_DEBUG_GATE_CLK_IN1000 BIT(6)
-
-#define AT803X_DEBUG_REG_1F 0x1F
-#define AT803X_DEBUG_PLL_ON BIT(2)
-#define AT803X_DEBUG_RGMII_1V8 BIT(3)
-
-#define MDIO_AZ_DEBUG 0x800D
-
-/* AT803x supports either the XTAL input pad, an internal PLL or the
- * DSP as clock reference for the clock output pad. The XTAL reference
- * is only used for 25 MHz output, all other frequencies need the PLL.
- * The DSP as a clock reference is used in synchronous ethernet
- * applications.
- *
- * By default the PLL is only enabled if there is a link. Otherwise
- * the PHY will go into low power state and disabled the PLL. You can
- * set the PLL_ON bit (see debug register 0x1f) to keep the PLL always
- * enabled.
- */
-#define AT803X_MMD7_CLK25M 0x8016
-#define AT803X_CLK_OUT_MASK GENMASK(4, 2)
-#define AT803X_CLK_OUT_25MHZ_XTAL 0
-#define AT803X_CLK_OUT_25MHZ_DSP 1
-#define AT803X_CLK_OUT_50MHZ_PLL 2
-#define AT803X_CLK_OUT_50MHZ_DSP 3
-#define AT803X_CLK_OUT_62_5MHZ_PLL 4
-#define AT803X_CLK_OUT_62_5MHZ_DSP 5
-#define AT803X_CLK_OUT_125MHZ_PLL 6
-#define AT803X_CLK_OUT_125MHZ_DSP 7
-
-/* The AR8035 has another mask which is compatible with the AR8031/AR8033 mask
- * but doesn't support choosing between XTAL/PLL and DSP.
- */
-#define AT8035_CLK_OUT_MASK GENMASK(4, 3)
-
-#define AT803X_CLK_OUT_STRENGTH_MASK GENMASK(8, 7)
-#define AT803X_CLK_OUT_STRENGTH_FULL 0
-#define AT803X_CLK_OUT_STRENGTH_HALF 1
-#define AT803X_CLK_OUT_STRENGTH_QUARTER 2
-
-#define AT803X_DEFAULT_DOWNSHIFT 5
-#define AT803X_MIN_DOWNSHIFT 2
-#define AT803X_MAX_DOWNSHIFT 9
-
-#define AT803X_MMD3_SMARTEEE_CTL1 0x805b
-#define AT803X_MMD3_SMARTEEE_CTL2 0x805c
-#define AT803X_MMD3_SMARTEEE_CTL3 0x805d
-#define AT803X_MMD3_SMARTEEE_CTL3_LPI_EN BIT(8)
-
-#define ATH9331_PHY_ID 0x004dd041
-#define ATH8030_PHY_ID 0x004dd076
-#define ATH8031_PHY_ID 0x004dd074
-#define ATH8032_PHY_ID 0x004dd023
-#define ATH8035_PHY_ID 0x004dd072
-#define AT8030_PHY_ID_MASK 0xffffffef
-
-#define QCA8081_PHY_ID 0x004dd101
-
-#define QCA8327_A_PHY_ID 0x004dd033
-#define QCA8327_B_PHY_ID 0x004dd034
-#define QCA8337_PHY_ID 0x004dd036
-#define QCA9561_PHY_ID 0x004dd042
-#define QCA8K_PHY_ID_MASK 0xffffffff
-
-#define QCA8K_DEVFLAGS_REVISION_MASK GENMASK(2, 0)
-
-#define AT803X_PAGE_FIBER 0
-#define AT803X_PAGE_COPPER 1
-
-/* don't turn off internal PLL */
-#define AT803X_KEEP_PLL_ENABLED BIT(0)
-#define AT803X_DISABLE_SMARTEEE BIT(1)
-
-/* disable hibernation mode */
-#define AT803X_DISABLE_HIBERNATION_MODE BIT(2)
-
-/* ADC threshold */
-#define QCA808X_PHY_DEBUG_ADC_THRESHOLD 0x2c80
-#define QCA808X_ADC_THRESHOLD_MASK GENMASK(7, 0)
-#define QCA808X_ADC_THRESHOLD_80MV 0
-#define QCA808X_ADC_THRESHOLD_100MV 0xf0
-#define QCA808X_ADC_THRESHOLD_200MV 0x0f
-#define QCA808X_ADC_THRESHOLD_300MV 0xff
-
-/* CLD control */
-#define QCA808X_PHY_MMD3_ADDR_CLD_CTRL7 0x8007
-#define QCA808X_8023AZ_AFE_CTRL_MASK GENMASK(8, 4)
-#define QCA808X_8023AZ_AFE_EN 0x90
-
-/* AZ control */
-#define QCA808X_PHY_MMD3_AZ_TRAINING_CTRL 0x8008
-#define QCA808X_MMD3_AZ_TRAINING_VAL 0x1c32
-
-#define QCA808X_PHY_MMD1_MSE_THRESHOLD_20DB 0x8014
-#define QCA808X_MSE_THRESHOLD_20DB_VALUE 0x529
-
-#define QCA808X_PHY_MMD1_MSE_THRESHOLD_17DB 0x800E
-#define QCA808X_MSE_THRESHOLD_17DB_VALUE 0x341
-
-#define QCA808X_PHY_MMD1_MSE_THRESHOLD_27DB 0x801E
-#define QCA808X_MSE_THRESHOLD_27DB_VALUE 0x419
-
-#define QCA808X_PHY_MMD1_MSE_THRESHOLD_28DB 0x8020
-#define QCA808X_MSE_THRESHOLD_28DB_VALUE 0x341
-
-#define QCA808X_PHY_MMD7_TOP_OPTION1 0x901c
-#define QCA808X_TOP_OPTION1_DATA 0x0
-
-#define QCA808X_PHY_MMD3_DEBUG_1 0xa100
-#define QCA808X_MMD3_DEBUG_1_VALUE 0x9203
-#define QCA808X_PHY_MMD3_DEBUG_2 0xa101
-#define QCA808X_MMD3_DEBUG_2_VALUE 0x48ad
-#define QCA808X_PHY_MMD3_DEBUG_3 0xa103
-#define QCA808X_MMD3_DEBUG_3_VALUE 0x1698
-#define QCA808X_PHY_MMD3_DEBUG_4 0xa105
-#define QCA808X_MMD3_DEBUG_4_VALUE 0x8001
-#define QCA808X_PHY_MMD3_DEBUG_5 0xa106
-#define QCA808X_MMD3_DEBUG_5_VALUE 0x1111
-#define QCA808X_PHY_MMD3_DEBUG_6 0xa011
-#define QCA808X_MMD3_DEBUG_6_VALUE 0x5f85
-
-/* master/slave seed config */
-#define QCA808X_PHY_DEBUG_LOCAL_SEED 9
-#define QCA808X_MASTER_SLAVE_SEED_ENABLE BIT(1)
-#define QCA808X_MASTER_SLAVE_SEED_CFG GENMASK(12, 2)
-#define QCA808X_MASTER_SLAVE_SEED_RANGE 0x32
-
-/* Hibernation yields lower power consumpiton in contrast with normal operation mode.
- * when the copper cable is unplugged, the PHY enters into hibernation mode in about 10s.
- */
-#define QCA808X_DBG_AN_TEST 0xb
-#define QCA808X_HIBERNATION_EN BIT(15)
-
-#define QCA808X_CDT_ENABLE_TEST BIT(15)
-#define QCA808X_CDT_INTER_CHECK_DIS BIT(13)
-#define QCA808X_CDT_STATUS BIT(11)
-#define QCA808X_CDT_LENGTH_UNIT BIT(10)
-
-#define QCA808X_MMD3_CDT_STATUS 0x8064
-#define QCA808X_MMD3_CDT_DIAG_PAIR_A 0x8065
-#define QCA808X_MMD3_CDT_DIAG_PAIR_B 0x8066
-#define QCA808X_MMD3_CDT_DIAG_PAIR_C 0x8067
-#define QCA808X_MMD3_CDT_DIAG_PAIR_D 0x8068
-#define QCA808X_CDT_DIAG_LENGTH_SAME_SHORT GENMASK(15, 8)
-#define QCA808X_CDT_DIAG_LENGTH_CROSS_SHORT GENMASK(7, 0)
-
-#define QCA808X_CDT_CODE_PAIR_A GENMASK(15, 12)
-#define QCA808X_CDT_CODE_PAIR_B GENMASK(11, 8)
-#define QCA808X_CDT_CODE_PAIR_C GENMASK(7, 4)
-#define QCA808X_CDT_CODE_PAIR_D GENMASK(3, 0)
-
-#define QCA808X_CDT_STATUS_STAT_TYPE GENMASK(1, 0)
-#define QCA808X_CDT_STATUS_STAT_FAIL FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_TYPE, 0)
-#define QCA808X_CDT_STATUS_STAT_NORMAL FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_TYPE, 1)
-#define QCA808X_CDT_STATUS_STAT_SAME_OPEN FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_TYPE, 2)
-#define QCA808X_CDT_STATUS_STAT_SAME_SHORT FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_TYPE, 3)
-
-#define QCA808X_CDT_STATUS_STAT_MDI GENMASK(3, 2)
-#define QCA808X_CDT_STATUS_STAT_MDI1 FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_MDI, 1)
-#define QCA808X_CDT_STATUS_STAT_MDI2 FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_MDI, 2)
-#define QCA808X_CDT_STATUS_STAT_MDI3 FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_MDI, 3)
-
-/* NORMAL are MDI with type set to 0 */
-#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_NORMAL QCA808X_CDT_STATUS_STAT_MDI1
-#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_OPEN (QCA808X_CDT_STATUS_STAT_SAME_OPEN |\
- QCA808X_CDT_STATUS_STAT_MDI1)
-#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_SHORT (QCA808X_CDT_STATUS_STAT_SAME_SHORT |\
- QCA808X_CDT_STATUS_STAT_MDI1)
-#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_NORMAL QCA808X_CDT_STATUS_STAT_MDI2
-#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_OPEN (QCA808X_CDT_STATUS_STAT_SAME_OPEN |\
- QCA808X_CDT_STATUS_STAT_MDI2)
-#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_SHORT (QCA808X_CDT_STATUS_STAT_SAME_SHORT |\
- QCA808X_CDT_STATUS_STAT_MDI2)
-#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_NORMAL QCA808X_CDT_STATUS_STAT_MDI3
-#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_OPEN (QCA808X_CDT_STATUS_STAT_SAME_OPEN |\
- QCA808X_CDT_STATUS_STAT_MDI3)
-#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_SHORT (QCA808X_CDT_STATUS_STAT_SAME_SHORT |\
- QCA808X_CDT_STATUS_STAT_MDI3)
-
-/* Added for reference of existence but should be handled by wait_for_completion already */
-#define QCA808X_CDT_STATUS_STAT_BUSY (BIT(1) | BIT(3))
-
-/* QCA808X 1G chip type */
-#define QCA808X_PHY_MMD7_CHIP_TYPE 0x901d
-#define QCA808X_PHY_CHIP_TYPE_1G BIT(0)
-
-#define QCA8081_PHY_SERDES_MMD1_FIFO_CTRL 0x9072
-#define QCA8081_PHY_FIFO_RSTN BIT(11)
-
-MODULE_DESCRIPTION("Qualcomm Atheros AR803x and QCA808X PHY driver");
-MODULE_AUTHOR("Matus Ujhelyi");
-MODULE_LICENSE("GPL");
-
-enum stat_access_type {
- PHY,
- MMD
-};
-
-struct at803x_hw_stat {
- const char *string;
- u8 reg;
- u32 mask;
- enum stat_access_type access_type;
-};
-
-static struct at803x_hw_stat qca83xx_hw_stats[] = {
- { "phy_idle_errors", 0xa, GENMASK(7, 0), PHY},
- { "phy_receive_errors", 0x15, GENMASK(15, 0), PHY},
- { "eee_wake_errors", 0x16, GENMASK(15, 0), MMD},
-};
-
-struct at803x_ss_mask {
- u16 speed_mask;
- u8 speed_shift;
-};
-
-struct at803x_priv {
- int flags;
- u16 clk_25m_reg;
- u16 clk_25m_mask;
- u8 smarteee_lpi_tw_1g;
- u8 smarteee_lpi_tw_100m;
- bool is_fiber;
- bool is_1000basex;
- struct regulator_dev *vddio_rdev;
- struct regulator_dev *vddh_rdev;
- u64 stats[ARRAY_SIZE(qca83xx_hw_stats)];
-};
-
-struct at803x_context {
- u16 bmcr;
- u16 advertise;
- u16 control1000;
- u16 int_enable;
- u16 smart_speed;
- u16 led_control;
-};
-
-static int at803x_debug_reg_write(struct phy_device *phydev, u16 reg, u16 data)
-{
- int ret;
-
- ret = phy_write(phydev, AT803X_DEBUG_ADDR, reg);
- if (ret < 0)
- return ret;
-
- return phy_write(phydev, AT803X_DEBUG_DATA, data);
-}
-
-static int at803x_debug_reg_read(struct phy_device *phydev, u16 reg)
-{
- int ret;
-
- ret = phy_write(phydev, AT803X_DEBUG_ADDR, reg);
- if (ret < 0)
- return ret;
-
- return phy_read(phydev, AT803X_DEBUG_DATA);
-}
-
-static int at803x_debug_reg_mask(struct phy_device *phydev, u16 reg,
- u16 clear, u16 set)
-{
- u16 val;
- int ret;
-
- ret = at803x_debug_reg_read(phydev, reg);
- if (ret < 0)
- return ret;
-
- val = ret & 0xffff;
- val &= ~clear;
- val |= set;
-
- return phy_write(phydev, AT803X_DEBUG_DATA, val);
-}
-
-static int at803x_write_page(struct phy_device *phydev, int page)
-{
- int mask;
- int set;
-
- if (page == AT803X_PAGE_COPPER) {
- set = AT803X_BT_BX_REG_SEL;
- mask = 0;
- } else {
- set = 0;
- mask = AT803X_BT_BX_REG_SEL;
- }
-
- return __phy_modify(phydev, AT803X_REG_CHIP_CONFIG, mask, set);
-}
-
-static int at803x_read_page(struct phy_device *phydev)
-{
- int ccr = __phy_read(phydev, AT803X_REG_CHIP_CONFIG);
-
- if (ccr < 0)
- return ccr;
-
- if (ccr & AT803X_BT_BX_REG_SEL)
- return AT803X_PAGE_COPPER;
-
- return AT803X_PAGE_FIBER;
-}
-
-static int at803x_enable_rx_delay(struct phy_device *phydev)
-{
- return at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL, 0,
- AT803X_DEBUG_RX_CLK_DLY_EN);
-}
-
-static int at803x_enable_tx_delay(struct phy_device *phydev)
-{
- return at803x_debug_reg_mask(phydev, AT803X_DEBUG_SYSTEM_CTRL_MODE, 0,
- AT803X_DEBUG_TX_CLK_DLY_EN);
-}
-
-static int at803x_disable_rx_delay(struct phy_device *phydev)
-{
- return at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL,
- AT803X_DEBUG_RX_CLK_DLY_EN, 0);
-}
-
-static int at803x_disable_tx_delay(struct phy_device *phydev)
-{
- return at803x_debug_reg_mask(phydev, AT803X_DEBUG_SYSTEM_CTRL_MODE,
- AT803X_DEBUG_TX_CLK_DLY_EN, 0);
-}
-
-/* save relevant PHY registers to private copy */
-static void at803x_context_save(struct phy_device *phydev,
- struct at803x_context *context)
-{
- context->bmcr = phy_read(phydev, MII_BMCR);
- context->advertise = phy_read(phydev, MII_ADVERTISE);
- context->control1000 = phy_read(phydev, MII_CTRL1000);
- context->int_enable = phy_read(phydev, AT803X_INTR_ENABLE);
- context->smart_speed = phy_read(phydev, AT803X_SMART_SPEED);
- context->led_control = phy_read(phydev, AT803X_LED_CONTROL);
-}
-
-/* restore relevant PHY registers from private copy */
-static void at803x_context_restore(struct phy_device *phydev,
- const struct at803x_context *context)
-{
- phy_write(phydev, MII_BMCR, context->bmcr);
- phy_write(phydev, MII_ADVERTISE, context->advertise);
- phy_write(phydev, MII_CTRL1000, context->control1000);
- phy_write(phydev, AT803X_INTR_ENABLE, context->int_enable);
- phy_write(phydev, AT803X_SMART_SPEED, context->smart_speed);
- phy_write(phydev, AT803X_LED_CONTROL, context->led_control);
-}
-
-static int at803x_set_wol(struct phy_device *phydev,
- struct ethtool_wolinfo *wol)
-{
- int ret, irq_enabled;
-
- if (wol->wolopts & WAKE_MAGIC) {
- struct net_device *ndev = phydev->attached_dev;
- const u8 *mac;
- unsigned int i;
- static const unsigned int offsets[] = {
- AT803X_LOC_MAC_ADDR_32_47_OFFSET,
- AT803X_LOC_MAC_ADDR_16_31_OFFSET,
- AT803X_LOC_MAC_ADDR_0_15_OFFSET,
- };
-
- if (!ndev)
- return -ENODEV;
-
- mac = (const u8 *)ndev->dev_addr;
-
- if (!is_valid_ether_addr(mac))
- return -EINVAL;
-
- for (i = 0; i < 3; i++)
- phy_write_mmd(phydev, MDIO_MMD_PCS, offsets[i],
- mac[(i * 2) + 1] | (mac[(i * 2)] << 8));
-
- /* Enable WOL interrupt */
- ret = phy_modify(phydev, AT803X_INTR_ENABLE, 0, AT803X_INTR_ENABLE_WOL);
- if (ret)
- return ret;
- } else {
- /* Disable WOL interrupt */
- ret = phy_modify(phydev, AT803X_INTR_ENABLE, AT803X_INTR_ENABLE_WOL, 0);
- if (ret)
- return ret;
- }
-
- /* Clear WOL status */
- ret = phy_read(phydev, AT803X_INTR_STATUS);
- if (ret < 0)
- return ret;
-
- /* Check if there are other interrupts except for WOL triggered when PHY is
- * in interrupt mode, only the interrupts enabled by AT803X_INTR_ENABLE can
- * be passed up to the interrupt PIN.
- */
- irq_enabled = phy_read(phydev, AT803X_INTR_ENABLE);
- if (irq_enabled < 0)
- return irq_enabled;
-
- irq_enabled &= ~AT803X_INTR_ENABLE_WOL;
- if (ret & irq_enabled && !phy_polling_mode(phydev))
- phy_trigger_machine(phydev);
-
- return 0;
-}
-
-static void at803x_get_wol(struct phy_device *phydev,
- struct ethtool_wolinfo *wol)
-{
- int value;
-
- wol->supported = WAKE_MAGIC;
- wol->wolopts = 0;
-
- value = phy_read(phydev, AT803X_INTR_ENABLE);
- if (value < 0)
- return;
-
- if (value & AT803X_INTR_ENABLE_WOL)
- wol->wolopts |= WAKE_MAGIC;
-}
-
-static int qca83xx_get_sset_count(struct phy_device *phydev)
-{
- return ARRAY_SIZE(qca83xx_hw_stats);
-}
-
-static void qca83xx_get_strings(struct phy_device *phydev, u8 *data)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(qca83xx_hw_stats); i++) {
- strscpy(data + i * ETH_GSTRING_LEN,
- qca83xx_hw_stats[i].string, ETH_GSTRING_LEN);
- }
-}
-
-static u64 qca83xx_get_stat(struct phy_device *phydev, int i)
-{
- struct at803x_hw_stat stat = qca83xx_hw_stats[i];
- struct at803x_priv *priv = phydev->priv;
- int val;
- u64 ret;
-
- if (stat.access_type == MMD)
- val = phy_read_mmd(phydev, MDIO_MMD_PCS, stat.reg);
- else
- val = phy_read(phydev, stat.reg);
-
- if (val < 0) {
- ret = U64_MAX;
- } else {
- val = val & stat.mask;
- priv->stats[i] += val;
- ret = priv->stats[i];
- }
-
- return ret;
-}
-
-static void qca83xx_get_stats(struct phy_device *phydev,
- struct ethtool_stats *stats, u64 *data)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(qca83xx_hw_stats); i++)
- data[i] = qca83xx_get_stat(phydev, i);
-}
-
-static int at803x_suspend(struct phy_device *phydev)
-{
- int value;
- int wol_enabled;
-
- value = phy_read(phydev, AT803X_INTR_ENABLE);
- wol_enabled = value & AT803X_INTR_ENABLE_WOL;
-
- if (wol_enabled)
- value = BMCR_ISOLATE;
- else
- value = BMCR_PDOWN;
-
- phy_modify(phydev, MII_BMCR, 0, value);
-
- return 0;
-}
-
-static int at803x_resume(struct phy_device *phydev)
-{
- return phy_modify(phydev, MII_BMCR, BMCR_PDOWN | BMCR_ISOLATE, 0);
-}
-
-static int at803x_parse_dt(struct phy_device *phydev)
-{
- struct device_node *node = phydev->mdio.dev.of_node;
- struct at803x_priv *priv = phydev->priv;
- u32 freq, strength, tw;
- unsigned int sel;
- int ret;
-
- if (!IS_ENABLED(CONFIG_OF_MDIO))
- return 0;
-
- if (of_property_read_bool(node, "qca,disable-smarteee"))
- priv->flags |= AT803X_DISABLE_SMARTEEE;
-
- if (of_property_read_bool(node, "qca,disable-hibernation-mode"))
- priv->flags |= AT803X_DISABLE_HIBERNATION_MODE;
-
- if (!of_property_read_u32(node, "qca,smarteee-tw-us-1g", &tw)) {
- if (!tw || tw > 255) {
- phydev_err(phydev, "invalid qca,smarteee-tw-us-1g\n");
- return -EINVAL;
- }
- priv->smarteee_lpi_tw_1g = tw;
- }
-
- if (!of_property_read_u32(node, "qca,smarteee-tw-us-100m", &tw)) {
- if (!tw || tw > 255) {
- phydev_err(phydev, "invalid qca,smarteee-tw-us-100m\n");
- return -EINVAL;
- }
- priv->smarteee_lpi_tw_100m = tw;
- }
-
- ret = of_property_read_u32(node, "qca,clk-out-frequency", &freq);
- if (!ret) {
- switch (freq) {
- case 25000000:
- sel = AT803X_CLK_OUT_25MHZ_XTAL;
- break;
- case 50000000:
- sel = AT803X_CLK_OUT_50MHZ_PLL;
- break;
- case 62500000:
- sel = AT803X_CLK_OUT_62_5MHZ_PLL;
- break;
- case 125000000:
- sel = AT803X_CLK_OUT_125MHZ_PLL;
- break;
- default:
- phydev_err(phydev, "invalid qca,clk-out-frequency\n");
- return -EINVAL;
- }
-
- priv->clk_25m_reg |= FIELD_PREP(AT803X_CLK_OUT_MASK, sel);
- priv->clk_25m_mask |= AT803X_CLK_OUT_MASK;
- }
-
- ret = of_property_read_u32(node, "qca,clk-out-strength", &strength);
- if (!ret) {
- priv->clk_25m_mask |= AT803X_CLK_OUT_STRENGTH_MASK;
- switch (strength) {
- case AR803X_STRENGTH_FULL:
- priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_FULL;
- break;
- case AR803X_STRENGTH_HALF:
- priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_HALF;
- break;
- case AR803X_STRENGTH_QUARTER:
- priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_QUARTER;
- break;
- default:
- phydev_err(phydev, "invalid qca,clk-out-strength\n");
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-static int at803x_probe(struct phy_device *phydev)
-{
- struct device *dev = &phydev->mdio.dev;
- struct at803x_priv *priv;
- int ret;
-
- priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
-
- phydev->priv = priv;
-
- ret = at803x_parse_dt(phydev);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static int at803x_get_features(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
- int err;
-
- err = genphy_read_abilities(phydev);
- if (err)
- return err;
-
- if (phydev->drv->phy_id != ATH8031_PHY_ID)
- return 0;
-
- /* AR8031/AR8033 have different status registers
- * for copper and fiber operation. However, the
- * extended status register is the same for both
- * operation modes.
- *
- * As a result of that, ESTATUS_1000_XFULL is set
- * to 1 even when operating in copper TP mode.
- *
- * Remove this mode from the supported link modes
- * when not operating in 1000BaseX mode.
- */
- if (!priv->is_1000basex)
- linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
- phydev->supported);
-
- return 0;
-}
-
-static int at803x_smarteee_config(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
- u16 mask = 0, val = 0;
- int ret;
-
- if (priv->flags & AT803X_DISABLE_SMARTEEE)
- return phy_modify_mmd(phydev, MDIO_MMD_PCS,
- AT803X_MMD3_SMARTEEE_CTL3,
- AT803X_MMD3_SMARTEEE_CTL3_LPI_EN, 0);
-
- if (priv->smarteee_lpi_tw_1g) {
- mask |= 0xff00;
- val |= priv->smarteee_lpi_tw_1g << 8;
- }
- if (priv->smarteee_lpi_tw_100m) {
- mask |= 0x00ff;
- val |= priv->smarteee_lpi_tw_100m;
- }
- if (!mask)
- return 0;
-
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_MMD3_SMARTEEE_CTL1,
- mask, val);
- if (ret)
- return ret;
-
- return phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_MMD3_SMARTEEE_CTL3,
- AT803X_MMD3_SMARTEEE_CTL3_LPI_EN,
- AT803X_MMD3_SMARTEEE_CTL3_LPI_EN);
-}
-
-static int at803x_clk_out_config(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
-
- if (!priv->clk_25m_mask)
- return 0;
-
- return phy_modify_mmd(phydev, MDIO_MMD_AN, AT803X_MMD7_CLK25M,
- priv->clk_25m_mask, priv->clk_25m_reg);
-}
-
-static int at8031_pll_config(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
-
- /* The default after hardware reset is PLL OFF. After a soft reset, the
- * values are retained.
- */
- if (priv->flags & AT803X_KEEP_PLL_ENABLED)
- return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
- 0, AT803X_DEBUG_PLL_ON);
- else
- return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
- AT803X_DEBUG_PLL_ON, 0);
-}
-
-static int at803x_hibernation_mode_config(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
-
- /* The default after hardware reset is hibernation mode enabled. After
- * software reset, the value is retained.
- */
- if (!(priv->flags & AT803X_DISABLE_HIBERNATION_MODE))
- return 0;
-
- return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_HIB_CTRL,
- AT803X_DEBUG_HIB_CTRL_PS_HIB_EN, 0);
-}
-
-static int at803x_config_init(struct phy_device *phydev)
-{
- int ret;
-
- /* The RX and TX delay default is:
- * after HW reset: RX delay enabled and TX delay disabled
- * after SW reset: RX delay enabled, while TX delay retains the
- * value before reset.
- */
- if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
- phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
- ret = at803x_enable_rx_delay(phydev);
- else
- ret = at803x_disable_rx_delay(phydev);
- if (ret < 0)
- return ret;
-
- if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
- phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
- ret = at803x_enable_tx_delay(phydev);
- else
- ret = at803x_disable_tx_delay(phydev);
- if (ret < 0)
- return ret;
-
- ret = at803x_smarteee_config(phydev);
- if (ret < 0)
- return ret;
-
- ret = at803x_clk_out_config(phydev);
- if (ret < 0)
- return ret;
-
- ret = at803x_hibernation_mode_config(phydev);
- if (ret < 0)
- return ret;
-
- /* Ar803x extended next page bit is enabled by default. Cisco
- * multigig switches read this bit and attempt to negotiate 10Gbps
- * rates even if the next page bit is disabled. This is incorrect
- * behaviour but we still need to accommodate it. XNP is only needed
- * for 10Gbps support, so disable XNP.
- */
- return phy_modify(phydev, MII_ADVERTISE, MDIO_AN_CTRL1_XNP, 0);
-}
-
-static int at803x_ack_interrupt(struct phy_device *phydev)
-{
- int err;
-
- err = phy_read(phydev, AT803X_INTR_STATUS);
-
- return (err < 0) ? err : 0;
-}
-
-static int at803x_config_intr(struct phy_device *phydev)
-{
- int err;
- int value;
-
- value = phy_read(phydev, AT803X_INTR_ENABLE);
-
- if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
- /* Clear any pending interrupts */
- err = at803x_ack_interrupt(phydev);
- if (err)
- return err;
-
- value |= AT803X_INTR_ENABLE_AUTONEG_ERR;
- value |= AT803X_INTR_ENABLE_SPEED_CHANGED;
- value |= AT803X_INTR_ENABLE_DUPLEX_CHANGED;
- value |= AT803X_INTR_ENABLE_LINK_FAIL;
- value |= AT803X_INTR_ENABLE_LINK_SUCCESS;
-
- err = phy_write(phydev, AT803X_INTR_ENABLE, value);
- } else {
- err = phy_write(phydev, AT803X_INTR_ENABLE, 0);
- if (err)
- return err;
-
- /* Clear any pending interrupts */
- err = at803x_ack_interrupt(phydev);
- }
-
- return err;
-}
-
-static irqreturn_t at803x_handle_interrupt(struct phy_device *phydev)
-{
- int irq_status, int_enabled;
-
- irq_status = phy_read(phydev, AT803X_INTR_STATUS);
- if (irq_status < 0) {
- phy_error(phydev);
- return IRQ_NONE;
- }
-
- /* Read the current enabled interrupts */
- int_enabled = phy_read(phydev, AT803X_INTR_ENABLE);
- if (int_enabled < 0) {
- phy_error(phydev);
- return IRQ_NONE;
- }
-
- /* See if this was one of our enabled interrupts */
- if (!(irq_status & int_enabled))
- return IRQ_NONE;
-
- phy_trigger_machine(phydev);
-
- return IRQ_HANDLED;
-}
-
-static void at803x_link_change_notify(struct phy_device *phydev)
-{
- /*
- * Conduct a hardware reset for AT8030 every time a link loss is
- * signalled. This is necessary to circumvent a hardware bug that
- * occurs when the cable is unplugged while TX packets are pending
- * in the FIFO. In such cases, the FIFO enters an error mode it
- * cannot recover from by software.
- */
- if (phydev->state == PHY_NOLINK && phydev->mdio.reset_gpio) {
- struct at803x_context context;
-
- at803x_context_save(phydev, &context);
-
- phy_device_reset(phydev, 1);
- usleep_range(1000, 2000);
- phy_device_reset(phydev, 0);
- usleep_range(1000, 2000);
-
- at803x_context_restore(phydev, &context);
-
- phydev_dbg(phydev, "%s(): phy was reset\n", __func__);
- }
-}
-
-static int at803x_read_specific_status(struct phy_device *phydev,
- struct at803x_ss_mask ss_mask)
-{
- int ss;
-
- /* Read the AT8035 PHY-Specific Status register, which indicates the
- * speed and duplex that the PHY is actually using, irrespective of
- * whether we are in autoneg mode or not.
- */
- ss = phy_read(phydev, AT803X_SPECIFIC_STATUS);
- if (ss < 0)
- return ss;
-
- if (ss & AT803X_SS_SPEED_DUPLEX_RESOLVED) {
- int sfc, speed;
-
- sfc = phy_read(phydev, AT803X_SPECIFIC_FUNCTION_CONTROL);
- if (sfc < 0)
- return sfc;
-
- speed = ss & ss_mask.speed_mask;
- speed >>= ss_mask.speed_shift;
-
- switch (speed) {
- case AT803X_SS_SPEED_10:
- phydev->speed = SPEED_10;
- break;
- case AT803X_SS_SPEED_100:
- phydev->speed = SPEED_100;
- break;
- case AT803X_SS_SPEED_1000:
- phydev->speed = SPEED_1000;
- break;
- case QCA808X_SS_SPEED_2500:
- phydev->speed = SPEED_2500;
- break;
- }
- if (ss & AT803X_SS_DUPLEX)
- phydev->duplex = DUPLEX_FULL;
- else
- phydev->duplex = DUPLEX_HALF;
-
- if (ss & AT803X_SS_MDIX)
- phydev->mdix = ETH_TP_MDI_X;
- else
- phydev->mdix = ETH_TP_MDI;
-
- switch (FIELD_GET(AT803X_SFC_MDI_CROSSOVER_MODE_M, sfc)) {
- case AT803X_SFC_MANUAL_MDI:
- phydev->mdix_ctrl = ETH_TP_MDI;
- break;
- case AT803X_SFC_MANUAL_MDIX:
- phydev->mdix_ctrl = ETH_TP_MDI_X;
- break;
- case AT803X_SFC_AUTOMATIC_CROSSOVER:
- phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
- break;
- }
- }
-
- return 0;
-}
-
-static int at803x_read_status(struct phy_device *phydev)
-{
- struct at803x_ss_mask ss_mask = { 0 };
- int err, old_link = phydev->link;
-
- /* Update the link, but return if there was an error */
- err = genphy_update_link(phydev);
- if (err)
- return err;
-
- /* why bother the PHY if nothing can have changed */
- if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link)
- return 0;
-
- phydev->speed = SPEED_UNKNOWN;
- phydev->duplex = DUPLEX_UNKNOWN;
- phydev->pause = 0;
- phydev->asym_pause = 0;
-
- err = genphy_read_lpa(phydev);
- if (err < 0)
- return err;
-
- ss_mask.speed_mask = AT803X_SS_SPEED_MASK;
- ss_mask.speed_shift = __bf_shf(AT803X_SS_SPEED_MASK);
- err = at803x_read_specific_status(phydev, ss_mask);
- if (err < 0)
- return err;
-
- if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete)
- phy_resolve_aneg_pause(phydev);
-
- return 0;
-}
-
-static int at803x_config_mdix(struct phy_device *phydev, u8 ctrl)
-{
- u16 val;
-
- switch (ctrl) {
- case ETH_TP_MDI:
- val = AT803X_SFC_MANUAL_MDI;
- break;
- case ETH_TP_MDI_X:
- val = AT803X_SFC_MANUAL_MDIX;
- break;
- case ETH_TP_MDI_AUTO:
- val = AT803X_SFC_AUTOMATIC_CROSSOVER;
- break;
- default:
- return 0;
- }
-
- return phy_modify_changed(phydev, AT803X_SPECIFIC_FUNCTION_CONTROL,
- AT803X_SFC_MDI_CROSSOVER_MODE_M,
- FIELD_PREP(AT803X_SFC_MDI_CROSSOVER_MODE_M, val));
-}
-
-static int at803x_prepare_config_aneg(struct phy_device *phydev)
-{
- int ret;
-
- ret = at803x_config_mdix(phydev, phydev->mdix_ctrl);
- if (ret < 0)
- return ret;
-
- /* Changes of the midx bits are disruptive to the normal operation;
- * therefore any changes to these registers must be followed by a
- * software reset to take effect.
- */
- if (ret == 1) {
- ret = genphy_soft_reset(phydev);
- if (ret < 0)
- return ret;
- }
-
- return 0;
-}
-
-static int at803x_config_aneg(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
- int ret;
-
- ret = at803x_prepare_config_aneg(phydev);
- if (ret)
- return ret;
-
- if (priv->is_1000basex)
- return genphy_c37_config_aneg(phydev);
-
- return genphy_config_aneg(phydev);
-}
-
-static int at803x_get_downshift(struct phy_device *phydev, u8 *d)
-{
- int val;
-
- val = phy_read(phydev, AT803X_SMART_SPEED);
- if (val < 0)
- return val;
-
- if (val & AT803X_SMART_SPEED_ENABLE)
- *d = FIELD_GET(AT803X_SMART_SPEED_RETRY_LIMIT_MASK, val) + 2;
- else
- *d = DOWNSHIFT_DEV_DISABLE;
-
- return 0;
-}
-
-static int at803x_set_downshift(struct phy_device *phydev, u8 cnt)
-{
- u16 mask, set;
- int ret;
-
- switch (cnt) {
- case DOWNSHIFT_DEV_DEFAULT_COUNT:
- cnt = AT803X_DEFAULT_DOWNSHIFT;
- fallthrough;
- case AT803X_MIN_DOWNSHIFT ... AT803X_MAX_DOWNSHIFT:
- set = AT803X_SMART_SPEED_ENABLE |
- AT803X_SMART_SPEED_BYPASS_TIMER |
- FIELD_PREP(AT803X_SMART_SPEED_RETRY_LIMIT_MASK, cnt - 2);
- mask = AT803X_SMART_SPEED_RETRY_LIMIT_MASK;
- break;
- case DOWNSHIFT_DEV_DISABLE:
- set = 0;
- mask = AT803X_SMART_SPEED_ENABLE |
- AT803X_SMART_SPEED_BYPASS_TIMER;
- break;
- default:
- return -EINVAL;
- }
-
- ret = phy_modify_changed(phydev, AT803X_SMART_SPEED, mask, set);
-
- /* After changing the smart speed settings, we need to perform a
- * software reset, use phy_init_hw() to make sure we set the
- * reapply any values which might got lost during software reset.
- */
- if (ret == 1)
- ret = phy_init_hw(phydev);
-
- return ret;
-}
-
-static int at803x_get_tunable(struct phy_device *phydev,
- struct ethtool_tunable *tuna, void *data)
-{
- switch (tuna->id) {
- case ETHTOOL_PHY_DOWNSHIFT:
- return at803x_get_downshift(phydev, data);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int at803x_set_tunable(struct phy_device *phydev,
- struct ethtool_tunable *tuna, const void *data)
-{
- switch (tuna->id) {
- case ETHTOOL_PHY_DOWNSHIFT:
- return at803x_set_downshift(phydev, *(const u8 *)data);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int at803x_cable_test_result_trans(u16 status)
-{
- switch (FIELD_GET(AT803X_CDT_STATUS_STAT_MASK, status)) {
- case AT803X_CDT_STATUS_STAT_NORMAL:
- return ETHTOOL_A_CABLE_RESULT_CODE_OK;
- case AT803X_CDT_STATUS_STAT_SHORT:
- return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
- case AT803X_CDT_STATUS_STAT_OPEN:
- return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
- case AT803X_CDT_STATUS_STAT_FAIL:
- default:
- return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
- }
-}
-
-static bool at803x_cdt_test_failed(u16 status)
-{
- return FIELD_GET(AT803X_CDT_STATUS_STAT_MASK, status) ==
- AT803X_CDT_STATUS_STAT_FAIL;
-}
-
-static bool at803x_cdt_fault_length_valid(u16 status)
-{
- switch (FIELD_GET(AT803X_CDT_STATUS_STAT_MASK, status)) {
- case AT803X_CDT_STATUS_STAT_OPEN:
- case AT803X_CDT_STATUS_STAT_SHORT:
- return true;
- }
- return false;
-}
-
-static int at803x_cdt_fault_length(int dt)
-{
- /* According to the datasheet the distance to the fault is
- * DELTA_TIME * 0.824 meters.
- *
- * The author suspect the correct formula is:
- *
- * fault_distance = DELTA_TIME * (c * VF) / 125MHz / 2
- *
- * where c is the speed of light, VF is the velocity factor of
- * the twisted pair cable, 125MHz the counter frequency and
- * we need to divide by 2 because the hardware will measure the
- * round trip time to the fault and back to the PHY.
- *
- * With a VF of 0.69 we get the factor 0.824 mentioned in the
- * datasheet.
- */
- return (dt * 824) / 10;
-}
-
-static int at803x_cdt_start(struct phy_device *phydev,
- u32 cdt_start)
-{
- return phy_write(phydev, AT803X_CDT, cdt_start);
-}
-
-static int at803x_cdt_wait_for_completion(struct phy_device *phydev,
- u32 cdt_en)
-{
- int val, ret;
-
- /* One test run takes about 25ms */
- ret = phy_read_poll_timeout(phydev, AT803X_CDT, val,
- !(val & cdt_en),
- 30000, 100000, true);
-
- return ret < 0 ? ret : 0;
-}
-
-static int at803x_cable_test_one_pair(struct phy_device *phydev, int pair)
-{
- static const int ethtool_pair[] = {
- ETHTOOL_A_CABLE_PAIR_A,
- ETHTOOL_A_CABLE_PAIR_B,
- ETHTOOL_A_CABLE_PAIR_C,
- ETHTOOL_A_CABLE_PAIR_D,
- };
- int ret, val;
-
- val = FIELD_PREP(AT803X_CDT_MDI_PAIR_MASK, pair) |
- AT803X_CDT_ENABLE_TEST;
- ret = at803x_cdt_start(phydev, val);
- if (ret)
- return ret;
-
- ret = at803x_cdt_wait_for_completion(phydev, AT803X_CDT_ENABLE_TEST);
- if (ret)
- return ret;
-
- val = phy_read(phydev, AT803X_CDT_STATUS);
- if (val < 0)
- return val;
-
- if (at803x_cdt_test_failed(val))
- return 0;
-
- ethnl_cable_test_result(phydev, ethtool_pair[pair],
- at803x_cable_test_result_trans(val));
-
- if (at803x_cdt_fault_length_valid(val)) {
- val = FIELD_GET(AT803X_CDT_STATUS_DELTA_TIME_MASK, val);
- ethnl_cable_test_fault_length(phydev, ethtool_pair[pair],
- at803x_cdt_fault_length(val));
- }
-
- return 1;
-}
-
-static int at803x_cable_test_get_status(struct phy_device *phydev,
- bool *finished, unsigned long pair_mask)
-{
- int retries = 20;
- int pair, ret;
-
- *finished = false;
-
- /* According to the datasheet the CDT can be performed when
- * there is no link partner or when the link partner is
- * auto-negotiating. Starting the test will restart the AN
- * automatically. It seems that doing this repeatedly we will
- * get a slot where our link partner won't disturb our
- * measurement.
- */
- while (pair_mask && retries--) {
- for_each_set_bit(pair, &pair_mask, 4) {
- ret = at803x_cable_test_one_pair(phydev, pair);
- if (ret < 0)
- return ret;
- if (ret)
- clear_bit(pair, &pair_mask);
- }
- if (pair_mask)
- msleep(250);
- }
-
- *finished = true;
-
- return 0;
-}
-
-static void at803x_cable_test_autoneg(struct phy_device *phydev)
-{
- /* Enable auto-negotiation, but advertise no capabilities, no link
- * will be established. A restart of the auto-negotiation is not
- * required, because the cable test will automatically break the link.
- */
- phy_write(phydev, MII_BMCR, BMCR_ANENABLE);
- phy_write(phydev, MII_ADVERTISE, ADVERTISE_CSMA);
-}
-
-static int at803x_cable_test_start(struct phy_device *phydev)
-{
- at803x_cable_test_autoneg(phydev);
- /* we do all the (time consuming) work later */
- return 0;
-}
-
-static int at8031_rgmii_reg_set_voltage_sel(struct regulator_dev *rdev,
- unsigned int selector)
-{
- struct phy_device *phydev = rdev_get_drvdata(rdev);
-
- if (selector)
- return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
- 0, AT803X_DEBUG_RGMII_1V8);
- else
- return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
- AT803X_DEBUG_RGMII_1V8, 0);
-}
-
-static int at8031_rgmii_reg_get_voltage_sel(struct regulator_dev *rdev)
-{
- struct phy_device *phydev = rdev_get_drvdata(rdev);
- int val;
-
- val = at803x_debug_reg_read(phydev, AT803X_DEBUG_REG_1F);
- if (val < 0)
- return val;
-
- return (val & AT803X_DEBUG_RGMII_1V8) ? 1 : 0;
-}
-
-static const struct regulator_ops vddio_regulator_ops = {
- .list_voltage = regulator_list_voltage_table,
- .set_voltage_sel = at8031_rgmii_reg_set_voltage_sel,
- .get_voltage_sel = at8031_rgmii_reg_get_voltage_sel,
-};
-
-static const unsigned int vddio_voltage_table[] = {
- 1500000,
- 1800000,
-};
-
-static const struct regulator_desc vddio_desc = {
- .name = "vddio",
- .of_match = of_match_ptr("vddio-regulator"),
- .n_voltages = ARRAY_SIZE(vddio_voltage_table),
- .volt_table = vddio_voltage_table,
- .ops = &vddio_regulator_ops,
- .type = REGULATOR_VOLTAGE,
- .owner = THIS_MODULE,
-};
-
-static const struct regulator_ops vddh_regulator_ops = {
-};
-
-static const struct regulator_desc vddh_desc = {
- .name = "vddh",
- .of_match = of_match_ptr("vddh-regulator"),
- .n_voltages = 1,
- .fixed_uV = 2500000,
- .ops = &vddh_regulator_ops,
- .type = REGULATOR_VOLTAGE,
- .owner = THIS_MODULE,
-};
-
-static int at8031_register_regulators(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
- struct device *dev = &phydev->mdio.dev;
- struct regulator_config config = { };
-
- config.dev = dev;
- config.driver_data = phydev;
-
- priv->vddio_rdev = devm_regulator_register(dev, &vddio_desc, &config);
- if (IS_ERR(priv->vddio_rdev)) {
- phydev_err(phydev, "failed to register VDDIO regulator\n");
- return PTR_ERR(priv->vddio_rdev);
- }
-
- priv->vddh_rdev = devm_regulator_register(dev, &vddh_desc, &config);
- if (IS_ERR(priv->vddh_rdev)) {
- phydev_err(phydev, "failed to register VDDH regulator\n");
- return PTR_ERR(priv->vddh_rdev);
- }
-
- return 0;
-}
-
-static int at8031_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
-{
- struct phy_device *phydev = upstream;
- __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_support);
- __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support);
- DECLARE_PHY_INTERFACE_MASK(interfaces);
- phy_interface_t iface;
-
- linkmode_zero(phy_support);
- phylink_set(phy_support, 1000baseX_Full);
- phylink_set(phy_support, 1000baseT_Full);
- phylink_set(phy_support, Autoneg);
- phylink_set(phy_support, Pause);
- phylink_set(phy_support, Asym_Pause);
-
- linkmode_zero(sfp_support);
- sfp_parse_support(phydev->sfp_bus, id, sfp_support, interfaces);
- /* Some modules support 10G modes as well as others we support.
- * Mask out non-supported modes so the correct interface is picked.
- */
- linkmode_and(sfp_support, phy_support, sfp_support);
-
- if (linkmode_empty(sfp_support)) {
- dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n");
- return -EINVAL;
- }
-
- iface = sfp_select_interface(phydev->sfp_bus, sfp_support);
-
- /* Only 1000Base-X is supported by AR8031/8033 as the downstream SerDes
- * interface for use with SFP modules.
- * However, some copper modules detected as having a preferred SGMII
- * interface do default to and function in 1000Base-X mode, so just
- * print a warning and allow such modules, as they may have some chance
- * of working.
- */
- if (iface == PHY_INTERFACE_MODE_SGMII)
- dev_warn(&phydev->mdio.dev, "module may not function if 1000Base-X not supported\n");
- else if (iface != PHY_INTERFACE_MODE_1000BASEX)
- return -EINVAL;
-
- return 0;
-}
-
-static const struct sfp_upstream_ops at8031_sfp_ops = {
- .attach = phy_sfp_attach,
- .detach = phy_sfp_detach,
- .module_insert = at8031_sfp_insert,
-};
-
-static int at8031_parse_dt(struct phy_device *phydev)
-{
- struct device_node *node = phydev->mdio.dev.of_node;
- struct at803x_priv *priv = phydev->priv;
- int ret;
-
- if (of_property_read_bool(node, "qca,keep-pll-enabled"))
- priv->flags |= AT803X_KEEP_PLL_ENABLED;
-
- ret = at8031_register_regulators(phydev);
- if (ret < 0)
- return ret;
-
- ret = devm_regulator_get_enable_optional(&phydev->mdio.dev,
- "vddio");
- if (ret) {
- phydev_err(phydev, "failed to get VDDIO regulator\n");
- return ret;
- }
-
- /* Only AR8031/8033 support 1000Base-X for SFP modules */
- return phy_sfp_probe(phydev, &at8031_sfp_ops);
-}
-
-static int at8031_probe(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
- int mode_cfg;
- int ccr;
- int ret;
-
- ret = at803x_probe(phydev);
- if (ret)
- return ret;
-
- /* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping
- * options.
- */
- ret = at8031_parse_dt(phydev);
- if (ret)
- return ret;
-
- ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
- if (ccr < 0)
- return ccr;
- mode_cfg = ccr & AT803X_MODE_CFG_MASK;
-
- switch (mode_cfg) {
- case AT803X_MODE_CFG_BX1000_RGMII_50OHM:
- case AT803X_MODE_CFG_BX1000_RGMII_75OHM:
- priv->is_1000basex = true;
- fallthrough;
- case AT803X_MODE_CFG_FX100_RGMII_50OHM:
- case AT803X_MODE_CFG_FX100_RGMII_75OHM:
- priv->is_fiber = true;
- break;
- }
-
- /* Disable WoL in 1588 register which is enabled
- * by default
- */
- return phy_modify_mmd(phydev, MDIO_MMD_PCS,
- AT803X_PHY_MMD3_WOL_CTRL,
- AT803X_WOL_EN, 0);
-}
-
-static int at8031_config_init(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
- int ret;
-
- /* Some bootloaders leave the fiber page selected.
- * Switch to the appropriate page (fiber or copper), as otherwise we
- * read the PHY capabilities from the wrong page.
- */
- phy_lock_mdio_bus(phydev);
- ret = at803x_write_page(phydev,
- priv->is_fiber ? AT803X_PAGE_FIBER :
- AT803X_PAGE_COPPER);
- phy_unlock_mdio_bus(phydev);
- if (ret)
- return ret;
-
- ret = at8031_pll_config(phydev);
- if (ret < 0)
- return ret;
-
- return at803x_config_init(phydev);
-}
-
-static int at8031_set_wol(struct phy_device *phydev,
- struct ethtool_wolinfo *wol)
-{
- int ret;
-
- /* First setup MAC address and enable WOL interrupt */
- ret = at803x_set_wol(phydev, wol);
- if (ret)
- return ret;
-
- if (wol->wolopts & WAKE_MAGIC)
- /* Enable WOL function for 1588 */
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
- AT803X_PHY_MMD3_WOL_CTRL,
- 0, AT803X_WOL_EN);
- else
- /* Disable WoL function for 1588 */
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
- AT803X_PHY_MMD3_WOL_CTRL,
- AT803X_WOL_EN, 0);
-
- return ret;
-}
-
-static int at8031_config_intr(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
- int err, value = 0;
-
- if (phydev->interrupts == PHY_INTERRUPT_ENABLED &&
- priv->is_fiber) {
- /* Clear any pending interrupts */
- err = at803x_ack_interrupt(phydev);
- if (err)
- return err;
-
- value |= AT803X_INTR_ENABLE_LINK_FAIL_BX;
- value |= AT803X_INTR_ENABLE_LINK_SUCCESS_BX;
-
- err = phy_set_bits(phydev, AT803X_INTR_ENABLE, value);
- if (err)
- return err;
- }
-
- return at803x_config_intr(phydev);
-}
-
-/* AR8031 and AR8033 share the same read status logic */
-static int at8031_read_status(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
-
- if (priv->is_1000basex)
- return genphy_c37_read_status(phydev);
-
- return at803x_read_status(phydev);
-}
-
-/* AR8031 and AR8035 share the same cable test get status reg */
-static int at8031_cable_test_get_status(struct phy_device *phydev,
- bool *finished)
-{
- return at803x_cable_test_get_status(phydev, finished, 0xf);
-}
-
-/* AR8031 and AR8035 share the same cable test start logic */
-static int at8031_cable_test_start(struct phy_device *phydev)
-{
- at803x_cable_test_autoneg(phydev);
- phy_write(phydev, MII_CTRL1000, 0);
- /* we do all the (time consuming) work later */
- return 0;
-}
-
-/* AR8032, AR9331 and QCA9561 share the same cable test get status reg */
-static int at8032_cable_test_get_status(struct phy_device *phydev,
- bool *finished)
-{
- return at803x_cable_test_get_status(phydev, finished, 0x3);
-}
-
-static int at8035_parse_dt(struct phy_device *phydev)
-{
- struct at803x_priv *priv = phydev->priv;
-
- /* Mask is set by the generic at803x_parse_dt
- * if property is set. Assume property is set
- * with the mask not zero.
- */
- if (priv->clk_25m_mask) {
- /* Fixup for the AR8030/AR8035. This chip has another mask and
- * doesn't support the DSP reference. Eg. the lowest bit of the
- * mask. The upper two bits select the same frequencies. Mask
- * the lowest bit here.
- *
- * Warning:
- * There was no datasheet for the AR8030 available so this is
- * just a guess. But the AR8035 is listed as pin compatible
- * to the AR8030 so there might be a good chance it works on
- * the AR8030 too.
- */
- priv->clk_25m_reg &= AT8035_CLK_OUT_MASK;
- priv->clk_25m_mask &= AT8035_CLK_OUT_MASK;
- }
-
- return 0;
-}
-
-/* AR8030 and AR8035 shared the same special mask for clk_25m */
-static int at8035_probe(struct phy_device *phydev)
-{
- int ret;
-
- ret = at803x_probe(phydev);
- if (ret)
- return ret;
-
- return at8035_parse_dt(phydev);
-}
-
-static int qca83xx_config_init(struct phy_device *phydev)
-{
- u8 switch_revision;
-
- switch_revision = phydev->dev_flags & QCA8K_DEVFLAGS_REVISION_MASK;
-
- switch (switch_revision) {
- case 1:
- /* For 100M waveform */
- at803x_debug_reg_write(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL, 0x02ea);
- /* Turn on Gigabit clock */
- at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_GREEN, 0x68a0);
- break;
-
- case 2:
- phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, 0x0);
- fallthrough;
- case 4:
- phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_AZ_DEBUG, 0x803f);
- at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_GREEN, 0x6860);
- at803x_debug_reg_write(phydev, AT803X_DEBUG_SYSTEM_CTRL_MODE, 0x2c46);
- at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_3C, 0x6000);
- break;
- }
-
- /* Following original QCA sourcecode set port to prefer master */
- phy_set_bits(phydev, MII_CTRL1000, CTL1000_PREFER_MASTER);
-
- return 0;
-}
-
-static int qca8327_config_init(struct phy_device *phydev)
-{
- /* QCA8327 require DAC amplitude adjustment for 100m set to +6%.
- * Disable on init and enable only with 100m speed following
- * qca original source code.
- */
- at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL,
- QCA8327_DEBUG_MANU_CTRL_EN, 0);
-
- return qca83xx_config_init(phydev);
-}
-
-static void qca83xx_link_change_notify(struct phy_device *phydev)
-{
- /* Set DAC Amplitude adjustment to +6% for 100m on link running */
- if (phydev->state == PHY_RUNNING) {
- if (phydev->speed == SPEED_100)
- at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL,
- QCA8327_DEBUG_MANU_CTRL_EN,
- QCA8327_DEBUG_MANU_CTRL_EN);
- } else {
- /* Reset DAC Amplitude adjustment */
- at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL,
- QCA8327_DEBUG_MANU_CTRL_EN, 0);
- }
-}
-
-static int qca83xx_resume(struct phy_device *phydev)
-{
- int ret, val;
-
- /* Skip reset if not suspended */
- if (!phydev->suspended)
- return 0;
-
- /* Reinit the port, reset values set by suspend */
- qca83xx_config_init(phydev);
-
- /* Reset the port on port resume */
- phy_set_bits(phydev, MII_BMCR, BMCR_RESET | BMCR_ANENABLE);
-
- /* On resume from suspend the switch execute a reset and
- * restart auto-negotiation. Wait for reset to complete.
- */
- ret = phy_read_poll_timeout(phydev, MII_BMCR, val, !(val & BMCR_RESET),
- 50000, 600000, true);
- if (ret)
- return ret;
-
- usleep_range(1000, 2000);
-
- return 0;
-}
-
-static int qca83xx_suspend(struct phy_device *phydev)
-{
- at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_GREEN,
- AT803X_DEBUG_GATE_CLK_IN1000, 0);
-
- at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_HIB_CTRL,
- AT803X_DEBUG_HIB_CTRL_EN_ANY_CHANGE |
- AT803X_DEBUG_HIB_CTRL_SEL_RST_80U, 0);
-
- return 0;
-}
-
-static int qca8337_suspend(struct phy_device *phydev)
-{
- /* Only QCA8337 support actual suspend. */
- genphy_suspend(phydev);
-
- return qca83xx_suspend(phydev);
-}
-
-static int qca8327_suspend(struct phy_device *phydev)
-{
- u16 mask = 0;
-
- /* QCA8327 cause port unreliability when phy suspend
- * is set.
- */
- mask |= ~(BMCR_SPEED1000 | BMCR_FULLDPLX);
- phy_modify(phydev, MII_BMCR, mask, 0);
-
- return qca83xx_suspend(phydev);
-}
-
-static int qca808x_phy_fast_retrain_config(struct phy_device *phydev)
-{
- int ret;
-
- /* Enable fast retrain */
- ret = genphy_c45_fast_retrain(phydev, true);
- if (ret)
- return ret;
-
- phy_write_mmd(phydev, MDIO_MMD_AN, QCA808X_PHY_MMD7_TOP_OPTION1,
- QCA808X_TOP_OPTION1_DATA);
- phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_20DB,
- QCA808X_MSE_THRESHOLD_20DB_VALUE);
- phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_17DB,
- QCA808X_MSE_THRESHOLD_17DB_VALUE);
- phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_27DB,
- QCA808X_MSE_THRESHOLD_27DB_VALUE);
- phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_28DB,
- QCA808X_MSE_THRESHOLD_28DB_VALUE);
- phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_1,
- QCA808X_MMD3_DEBUG_1_VALUE);
- phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_4,
- QCA808X_MMD3_DEBUG_4_VALUE);
- phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_5,
- QCA808X_MMD3_DEBUG_5_VALUE);
- phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_3,
- QCA808X_MMD3_DEBUG_3_VALUE);
- phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_6,
- QCA808X_MMD3_DEBUG_6_VALUE);
- phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_2,
- QCA808X_MMD3_DEBUG_2_VALUE);
-
- return 0;
-}
-
-static int qca808x_phy_ms_seed_enable(struct phy_device *phydev, bool enable)
-{
- u16 seed_value;
-
- if (!enable)
- return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_LOCAL_SEED,
- QCA808X_MASTER_SLAVE_SEED_ENABLE, 0);
-
- seed_value = get_random_u32_below(QCA808X_MASTER_SLAVE_SEED_RANGE);
- return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_LOCAL_SEED,
- QCA808X_MASTER_SLAVE_SEED_CFG | QCA808X_MASTER_SLAVE_SEED_ENABLE,
- FIELD_PREP(QCA808X_MASTER_SLAVE_SEED_CFG, seed_value) |
- QCA808X_MASTER_SLAVE_SEED_ENABLE);
-}
-
-static bool qca808x_is_prefer_master(struct phy_device *phydev)
-{
- return (phydev->master_slave_get == MASTER_SLAVE_CFG_MASTER_FORCE) ||
- (phydev->master_slave_get == MASTER_SLAVE_CFG_MASTER_PREFERRED);
-}
-
-static bool qca808x_has_fast_retrain_or_slave_seed(struct phy_device *phydev)
-{
- return linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->supported);
-}
-
-static int qca808x_config_init(struct phy_device *phydev)
-{
- int ret;
-
- /* Active adc&vga on 802.3az for the link 1000M and 100M */
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_ADDR_CLD_CTRL7,
- QCA808X_8023AZ_AFE_CTRL_MASK, QCA808X_8023AZ_AFE_EN);
- if (ret)
- return ret;
-
- /* Adjust the threshold on 802.3az for the link 1000M */
- ret = phy_write_mmd(phydev, MDIO_MMD_PCS,
- QCA808X_PHY_MMD3_AZ_TRAINING_CTRL,
- QCA808X_MMD3_AZ_TRAINING_VAL);
- if (ret)
- return ret;
-
- if (qca808x_has_fast_retrain_or_slave_seed(phydev)) {
- /* Config the fast retrain for the link 2500M */
- ret = qca808x_phy_fast_retrain_config(phydev);
- if (ret)
- return ret;
-
- ret = genphy_read_master_slave(phydev);
- if (ret < 0)
- return ret;
-
- if (!qca808x_is_prefer_master(phydev)) {
- /* Enable seed and configure lower ramdom seed to make phy
- * linked as slave mode.
- */
- ret = qca808x_phy_ms_seed_enable(phydev, true);
- if (ret)
- return ret;
- }
- }
-
- /* Configure adc threshold as 100mv for the link 10M */
- return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_ADC_THRESHOLD,
- QCA808X_ADC_THRESHOLD_MASK,
- QCA808X_ADC_THRESHOLD_100MV);
-}
-
-static int qca808x_read_status(struct phy_device *phydev)
-{
- struct at803x_ss_mask ss_mask = { 0 };
- int ret;
-
- ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
- if (ret < 0)
- return ret;
-
- linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->lp_advertising,
- ret & MDIO_AN_10GBT_STAT_LP2_5G);
-
- ret = genphy_read_status(phydev);
- if (ret)
- return ret;
-
- /* qca8081 takes the different bits for speed value from at803x */
- ss_mask.speed_mask = QCA808X_SS_SPEED_MASK;
- ss_mask.speed_shift = __bf_shf(QCA808X_SS_SPEED_MASK);
- ret = at803x_read_specific_status(phydev, ss_mask);
- if (ret < 0)
- return ret;
-
- if (phydev->link) {
- if (phydev->speed == SPEED_2500)
- phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
- else
- phydev->interface = PHY_INTERFACE_MODE_SGMII;
- } else {
- /* generate seed as a lower random value to make PHY linked as SLAVE easily,
- * except for master/slave configuration fault detected or the master mode
- * preferred.
- *
- * the reason for not putting this code into the function link_change_notify is
- * the corner case where the link partner is also the qca8081 PHY and the seed
- * value is configured as the same value, the link can't be up and no link change
- * occurs.
- */
- if (qca808x_has_fast_retrain_or_slave_seed(phydev)) {
- if (phydev->master_slave_state == MASTER_SLAVE_STATE_ERR ||
- qca808x_is_prefer_master(phydev)) {
- qca808x_phy_ms_seed_enable(phydev, false);
- } else {
- qca808x_phy_ms_seed_enable(phydev, true);
- }
- }
- }
-
- return 0;
-}
-
-static int qca808x_soft_reset(struct phy_device *phydev)
-{
- int ret;
-
- ret = genphy_soft_reset(phydev);
- if (ret < 0)
- return ret;
-
- if (qca808x_has_fast_retrain_or_slave_seed(phydev))
- ret = qca808x_phy_ms_seed_enable(phydev, true);
-
- return ret;
-}
-
-static bool qca808x_cdt_fault_length_valid(int cdt_code)
-{
- switch (cdt_code) {
- case QCA808X_CDT_STATUS_STAT_SAME_SHORT:
- case QCA808X_CDT_STATUS_STAT_SAME_OPEN:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_NORMAL:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_OPEN:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_SHORT:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_NORMAL:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_OPEN:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_SHORT:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_NORMAL:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_OPEN:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_SHORT:
- return true;
- default:
- return false;
- }
-}
-
-static int qca808x_cable_test_result_trans(int cdt_code)
-{
- switch (cdt_code) {
- case QCA808X_CDT_STATUS_STAT_NORMAL:
- return ETHTOOL_A_CABLE_RESULT_CODE_OK;
- case QCA808X_CDT_STATUS_STAT_SAME_SHORT:
- return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
- case QCA808X_CDT_STATUS_STAT_SAME_OPEN:
- return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_NORMAL:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_OPEN:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_SHORT:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_NORMAL:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_OPEN:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_SHORT:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_NORMAL:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_OPEN:
- case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_SHORT:
- return ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT;
- case QCA808X_CDT_STATUS_STAT_FAIL:
- default:
- return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
- }
-}
-
-static int qca808x_cdt_fault_length(struct phy_device *phydev, int pair,
- int result)
-{
- int val;
- u32 cdt_length_reg = 0;
-
- switch (pair) {
- case ETHTOOL_A_CABLE_PAIR_A:
- cdt_length_reg = QCA808X_MMD3_CDT_DIAG_PAIR_A;
- break;
- case ETHTOOL_A_CABLE_PAIR_B:
- cdt_length_reg = QCA808X_MMD3_CDT_DIAG_PAIR_B;
- break;
- case ETHTOOL_A_CABLE_PAIR_C:
- cdt_length_reg = QCA808X_MMD3_CDT_DIAG_PAIR_C;
- break;
- case ETHTOOL_A_CABLE_PAIR_D:
- cdt_length_reg = QCA808X_MMD3_CDT_DIAG_PAIR_D;
- break;
- default:
- return -EINVAL;
- }
-
- val = phy_read_mmd(phydev, MDIO_MMD_PCS, cdt_length_reg);
- if (val < 0)
- return val;
-
- if (result == ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT)
- val = FIELD_GET(QCA808X_CDT_DIAG_LENGTH_SAME_SHORT, val);
- else
- val = FIELD_GET(QCA808X_CDT_DIAG_LENGTH_CROSS_SHORT, val);
-
- return at803x_cdt_fault_length(val);
-}
-
-static int qca808x_cable_test_start(struct phy_device *phydev)
-{
- int ret;
-
- /* perform CDT with the following configs:
- * 1. disable hibernation.
- * 2. force PHY working in MDI mode.
- * 3. for PHY working in 1000BaseT.
- * 4. configure the threshold.
- */
-
- ret = at803x_debug_reg_mask(phydev, QCA808X_DBG_AN_TEST, QCA808X_HIBERNATION_EN, 0);
- if (ret < 0)
- return ret;
-
- ret = at803x_config_mdix(phydev, ETH_TP_MDI);
- if (ret < 0)
- return ret;
-
- /* Force 1000base-T needs to configure PMA/PMD and MII_BMCR */
- phydev->duplex = DUPLEX_FULL;
- phydev->speed = SPEED_1000;
- ret = genphy_c45_pma_setup_forced(phydev);
- if (ret < 0)
- return ret;
-
- ret = genphy_setup_forced(phydev);
- if (ret < 0)
- return ret;
-
- /* configure the thresholds for open, short, pair ok test */
- phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8074, 0xc040);
- phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8076, 0xc040);
- phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8077, 0xa060);
- phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8078, 0xc050);
- phy_write_mmd(phydev, MDIO_MMD_PCS, 0x807a, 0xc060);
- phy_write_mmd(phydev, MDIO_MMD_PCS, 0x807e, 0xb060);
-
- return 0;
-}
-
-static int qca808x_cable_test_get_pair_status(struct phy_device *phydev, u8 pair,
- u16 status)
-{
- int length, result;
- u16 pair_code;
-
- switch (pair) {
- case ETHTOOL_A_CABLE_PAIR_A:
- pair_code = FIELD_GET(QCA808X_CDT_CODE_PAIR_A, status);
- break;
- case ETHTOOL_A_CABLE_PAIR_B:
- pair_code = FIELD_GET(QCA808X_CDT_CODE_PAIR_B, status);
- break;
- case ETHTOOL_A_CABLE_PAIR_C:
- pair_code = FIELD_GET(QCA808X_CDT_CODE_PAIR_C, status);
- break;
- case ETHTOOL_A_CABLE_PAIR_D:
- pair_code = FIELD_GET(QCA808X_CDT_CODE_PAIR_D, status);
- break;
- default:
- return -EINVAL;
- }
-
- result = qca808x_cable_test_result_trans(pair_code);
- ethnl_cable_test_result(phydev, pair, result);
-
- if (qca808x_cdt_fault_length_valid(pair_code)) {
- length = qca808x_cdt_fault_length(phydev, pair, result);
- ethnl_cable_test_fault_length(phydev, pair, length);
- }
-
- return 0;
-}
-
-static int qca808x_cable_test_get_status(struct phy_device *phydev, bool *finished)
-{
- int ret, val;
-
- *finished = false;
-
- val = QCA808X_CDT_ENABLE_TEST |
- QCA808X_CDT_LENGTH_UNIT;
- ret = at803x_cdt_start(phydev, val);
- if (ret)
- return ret;
-
- ret = at803x_cdt_wait_for_completion(phydev, QCA808X_CDT_ENABLE_TEST);
- if (ret)
- return ret;
-
- val = phy_read_mmd(phydev, MDIO_MMD_PCS, QCA808X_MMD3_CDT_STATUS);
- if (val < 0)
- return val;
-
- ret = qca808x_cable_test_get_pair_status(phydev, ETHTOOL_A_CABLE_PAIR_A, val);
- if (ret)
- return ret;
-
- ret = qca808x_cable_test_get_pair_status(phydev, ETHTOOL_A_CABLE_PAIR_B, val);
- if (ret)
- return ret;
-
- ret = qca808x_cable_test_get_pair_status(phydev, ETHTOOL_A_CABLE_PAIR_C, val);
- if (ret)
- return ret;
-
- ret = qca808x_cable_test_get_pair_status(phydev, ETHTOOL_A_CABLE_PAIR_D, val);
- if (ret)
- return ret;
-
- *finished = true;
-
- return 0;
-}
-
-static int qca808x_get_features(struct phy_device *phydev)
-{
- int ret;
-
- ret = genphy_c45_pma_read_abilities(phydev);
- if (ret)
- return ret;
-
- /* The autoneg ability is not existed in bit3 of MMD7.1,
- * but it is supported by qca808x PHY, so we add it here
- * manually.
- */
- linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->supported);
-
- /* As for the qca8081 1G version chip, the 2500baseT ability is also
- * existed in the bit0 of MMD1.21, we need to remove it manually if
- * it is the qca8081 1G chip according to the bit0 of MMD7.0x901d.
- */
- ret = phy_read_mmd(phydev, MDIO_MMD_AN, QCA808X_PHY_MMD7_CHIP_TYPE);
- if (ret < 0)
- return ret;
-
- if (QCA808X_PHY_CHIP_TYPE_1G & ret)
- linkmode_clear_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->supported);
-
- return 0;
-}
-
-static int qca808x_config_aneg(struct phy_device *phydev)
-{
- int phy_ctrl = 0;
- int ret;
-
- ret = at803x_prepare_config_aneg(phydev);
- if (ret)
- return ret;
-
- /* The reg MII_BMCR also needs to be configured for force mode, the
- * genphy_config_aneg is also needed.
- */
- if (phydev->autoneg == AUTONEG_DISABLE)
- genphy_c45_pma_setup_forced(phydev);
-
- if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->advertising))
- phy_ctrl = MDIO_AN_10GBT_CTRL_ADV2_5G;
-
- ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
- MDIO_AN_10GBT_CTRL_ADV2_5G, phy_ctrl);
- if (ret < 0)
- return ret;
-
- return __genphy_config_aneg(phydev, ret);
-}
-
-static void qca808x_link_change_notify(struct phy_device *phydev)
-{
- /* Assert interface sgmii fifo on link down, deassert it on link up,
- * the interface device address is always phy address added by 1.
- */
- mdiobus_c45_modify_changed(phydev->mdio.bus, phydev->mdio.addr + 1,
- MDIO_MMD_PMAPMD, QCA8081_PHY_SERDES_MMD1_FIFO_CTRL,
- QCA8081_PHY_FIFO_RSTN,
- phydev->link ? QCA8081_PHY_FIFO_RSTN : 0);
-}
-
-static struct phy_driver at803x_driver[] = {
-{
- /* Qualcomm Atheros AR8035 */
- PHY_ID_MATCH_EXACT(ATH8035_PHY_ID),
- .name = "Qualcomm Atheros AR8035",
- .flags = PHY_POLL_CABLE_TEST,
- .probe = at8035_probe,
- .config_aneg = at803x_config_aneg,
- .config_init = at803x_config_init,
- .soft_reset = genphy_soft_reset,
- .set_wol = at803x_set_wol,
- .get_wol = at803x_get_wol,
- .suspend = at803x_suspend,
- .resume = at803x_resume,
- /* PHY_GBIT_FEATURES */
- .read_status = at803x_read_status,
- .config_intr = at803x_config_intr,
- .handle_interrupt = at803x_handle_interrupt,
- .get_tunable = at803x_get_tunable,
- .set_tunable = at803x_set_tunable,
- .cable_test_start = at8031_cable_test_start,
- .cable_test_get_status = at8031_cable_test_get_status,
-}, {
- /* Qualcomm Atheros AR8030 */
- .phy_id = ATH8030_PHY_ID,
- .name = "Qualcomm Atheros AR8030",
- .phy_id_mask = AT8030_PHY_ID_MASK,
- .probe = at8035_probe,
- .config_init = at803x_config_init,
- .link_change_notify = at803x_link_change_notify,
- .set_wol = at803x_set_wol,
- .get_wol = at803x_get_wol,
- .suspend = at803x_suspend,
- .resume = at803x_resume,
- /* PHY_BASIC_FEATURES */
- .config_intr = at803x_config_intr,
- .handle_interrupt = at803x_handle_interrupt,
-}, {
- /* Qualcomm Atheros AR8031/AR8033 */
- PHY_ID_MATCH_EXACT(ATH8031_PHY_ID),
- .name = "Qualcomm Atheros AR8031/AR8033",
- .flags = PHY_POLL_CABLE_TEST,
- .probe = at8031_probe,
- .config_init = at8031_config_init,
- .config_aneg = at803x_config_aneg,
- .soft_reset = genphy_soft_reset,
- .set_wol = at8031_set_wol,
- .get_wol = at803x_get_wol,
- .suspend = at803x_suspend,
- .resume = at803x_resume,
- .read_page = at803x_read_page,
- .write_page = at803x_write_page,
- .get_features = at803x_get_features,
- .read_status = at8031_read_status,
- .config_intr = at8031_config_intr,
- .handle_interrupt = at803x_handle_interrupt,
- .get_tunable = at803x_get_tunable,
- .set_tunable = at803x_set_tunable,
- .cable_test_start = at8031_cable_test_start,
- .cable_test_get_status = at8031_cable_test_get_status,
-}, {
- /* Qualcomm Atheros AR8032 */
- PHY_ID_MATCH_EXACT(ATH8032_PHY_ID),
- .name = "Qualcomm Atheros AR8032",
- .probe = at803x_probe,
- .flags = PHY_POLL_CABLE_TEST,
- .config_init = at803x_config_init,
- .link_change_notify = at803x_link_change_notify,
- .suspend = at803x_suspend,
- .resume = at803x_resume,
- /* PHY_BASIC_FEATURES */
- .config_intr = at803x_config_intr,
- .handle_interrupt = at803x_handle_interrupt,
- .cable_test_start = at803x_cable_test_start,
- .cable_test_get_status = at8032_cable_test_get_status,
-}, {
- /* ATHEROS AR9331 */
- PHY_ID_MATCH_EXACT(ATH9331_PHY_ID),
- .name = "Qualcomm Atheros AR9331 built-in PHY",
- .probe = at803x_probe,
- .suspend = at803x_suspend,
- .resume = at803x_resume,
- .flags = PHY_POLL_CABLE_TEST,
- /* PHY_BASIC_FEATURES */
- .config_intr = at803x_config_intr,
- .handle_interrupt = at803x_handle_interrupt,
- .cable_test_start = at803x_cable_test_start,
- .cable_test_get_status = at8032_cable_test_get_status,
- .read_status = at803x_read_status,
- .soft_reset = genphy_soft_reset,
- .config_aneg = at803x_config_aneg,
-}, {
- /* Qualcomm Atheros QCA9561 */
- PHY_ID_MATCH_EXACT(QCA9561_PHY_ID),
- .name = "Qualcomm Atheros QCA9561 built-in PHY",
- .probe = at803x_probe,
- .suspend = at803x_suspend,
- .resume = at803x_resume,
- .flags = PHY_POLL_CABLE_TEST,
- /* PHY_BASIC_FEATURES */
- .config_intr = at803x_config_intr,
- .handle_interrupt = at803x_handle_interrupt,
- .cable_test_start = at803x_cable_test_start,
- .cable_test_get_status = at8032_cable_test_get_status,
- .read_status = at803x_read_status,
- .soft_reset = genphy_soft_reset,
- .config_aneg = at803x_config_aneg,
-}, {
- /* QCA8337 */
- .phy_id = QCA8337_PHY_ID,
- .phy_id_mask = QCA8K_PHY_ID_MASK,
- .name = "Qualcomm Atheros 8337 internal PHY",
- /* PHY_GBIT_FEATURES */
- .probe = at803x_probe,
- .flags = PHY_IS_INTERNAL,
- .config_init = qca83xx_config_init,
- .soft_reset = genphy_soft_reset,
- .get_sset_count = qca83xx_get_sset_count,
- .get_strings = qca83xx_get_strings,
- .get_stats = qca83xx_get_stats,
- .suspend = qca8337_suspend,
- .resume = qca83xx_resume,
-}, {
- /* QCA8327-A from switch QCA8327-AL1A */
- .phy_id = QCA8327_A_PHY_ID,
- .phy_id_mask = QCA8K_PHY_ID_MASK,
- .name = "Qualcomm Atheros 8327-A internal PHY",
- /* PHY_GBIT_FEATURES */
- .link_change_notify = qca83xx_link_change_notify,
- .probe = at803x_probe,
- .flags = PHY_IS_INTERNAL,
- .config_init = qca8327_config_init,
- .soft_reset = genphy_soft_reset,
- .get_sset_count = qca83xx_get_sset_count,
- .get_strings = qca83xx_get_strings,
- .get_stats = qca83xx_get_stats,
- .suspend = qca8327_suspend,
- .resume = qca83xx_resume,
-}, {
- /* QCA8327-B from switch QCA8327-BL1A */
- .phy_id = QCA8327_B_PHY_ID,
- .phy_id_mask = QCA8K_PHY_ID_MASK,
- .name = "Qualcomm Atheros 8327-B internal PHY",
- /* PHY_GBIT_FEATURES */
- .link_change_notify = qca83xx_link_change_notify,
- .probe = at803x_probe,
- .flags = PHY_IS_INTERNAL,
- .config_init = qca8327_config_init,
- .soft_reset = genphy_soft_reset,
- .get_sset_count = qca83xx_get_sset_count,
- .get_strings = qca83xx_get_strings,
- .get_stats = qca83xx_get_stats,
- .suspend = qca8327_suspend,
- .resume = qca83xx_resume,
-}, {
- /* Qualcomm QCA8081 */
- PHY_ID_MATCH_EXACT(QCA8081_PHY_ID),
- .name = "Qualcomm QCA8081",
- .flags = PHY_POLL_CABLE_TEST,
- .probe = at803x_probe,
- .config_intr = at803x_config_intr,
- .handle_interrupt = at803x_handle_interrupt,
- .get_tunable = at803x_get_tunable,
- .set_tunable = at803x_set_tunable,
- .set_wol = at803x_set_wol,
- .get_wol = at803x_get_wol,
- .get_features = qca808x_get_features,
- .config_aneg = qca808x_config_aneg,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
- .read_status = qca808x_read_status,
- .config_init = qca808x_config_init,
- .soft_reset = qca808x_soft_reset,
- .cable_test_start = qca808x_cable_test_start,
- .cable_test_get_status = qca808x_cable_test_get_status,
- .link_change_notify = qca808x_link_change_notify,
-}, };
-
-module_phy_driver(at803x_driver);
-
-static struct mdio_device_id __maybe_unused atheros_tbl[] = {
- { ATH8030_PHY_ID, AT8030_PHY_ID_MASK },
- { PHY_ID_MATCH_EXACT(ATH8031_PHY_ID) },
- { PHY_ID_MATCH_EXACT(ATH8032_PHY_ID) },
- { PHY_ID_MATCH_EXACT(ATH8035_PHY_ID) },
- { PHY_ID_MATCH_EXACT(ATH9331_PHY_ID) },
- { PHY_ID_MATCH_EXACT(QCA8337_PHY_ID) },
- { PHY_ID_MATCH_EXACT(QCA8327_A_PHY_ID) },
- { PHY_ID_MATCH_EXACT(QCA8327_B_PHY_ID) },
- { PHY_ID_MATCH_EXACT(QCA9561_PHY_ID) },
- { PHY_ID_MATCH_EXACT(QCA8081_PHY_ID) },
- { }
-};
-
-MODULE_DEVICE_TABLE(mdio, atheros_tbl);
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 312a8bb35d78..370e4ed45098 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -665,10 +665,11 @@ static int bcm54616s_config_aneg(struct phy_device *phydev)
static int bcm54616s_read_status(struct phy_device *phydev)
{
struct bcm54616s_phy_priv *priv = phydev->priv;
+ bool changed;
int err;
if (priv->mode_1000bx_en)
- err = genphy_c37_read_status(phydev);
+ err = genphy_c37_read_status(phydev, &changed);
else
err = genphy_read_status(phydev);
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index b7cb71817780..c3426a17e6d0 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -12,6 +12,7 @@
#include <linux/of.h>
#include <linux/phy.h>
#include <linux/netdevice.h>
+#include <linux/bitfield.h>
#define DP83822_PHY_ID 0x2000a240
#define DP83825S_PHY_ID 0x2000a140
@@ -34,6 +35,10 @@
#define MII_DP83822_GENCFG 0x465
#define MII_DP83822_SOR1 0x467
+/* DP83826 specific registers */
+#define MII_DP83826_VOD_CFG1 0x30b
+#define MII_DP83826_VOD_CFG2 0x30c
+
/* GENCFG */
#define DP83822_SIG_DET_LOW BIT(0)
@@ -95,6 +100,8 @@
#define DP83822_WOL_CLR_INDICATION BIT(11)
/* RCSR bits */
+#define DP83822_RMII_MODE_EN BIT(5)
+#define DP83822_RMII_MODE_SEL BIT(7)
#define DP83822_RGMII_MODE_EN BIT(9)
#define DP83822_RX_CLK_SHIFT BIT(12)
#define DP83822_TX_CLK_SHIFT BIT(11)
@@ -110,6 +117,19 @@
#define DP83822_RX_ER_STR_MASK GENMASK(9, 8)
#define DP83822_RX_ER_SHIFT 8
+/* DP83826: VOD_CFG1 & VOD_CFG2 */
+#define DP83826_VOD_CFG1_MINUS_MDIX_MASK GENMASK(13, 12)
+#define DP83826_VOD_CFG1_MINUS_MDI_MASK GENMASK(11, 6)
+#define DP83826_VOD_CFG2_MINUS_MDIX_MASK GENMASK(15, 12)
+#define DP83826_VOD_CFG2_PLUS_MDIX_MASK GENMASK(11, 6)
+#define DP83826_VOD_CFG2_PLUS_MDI_MASK GENMASK(5, 0)
+#define DP83826_CFG_DAC_MINUS_MDIX_5_TO_4 GENMASK(5, 4)
+#define DP83826_CFG_DAC_MINUS_MDIX_3_TO_0 GENMASK(3, 0)
+#define DP83826_CFG_DAC_PERCENT_PER_STEP 625
+#define DP83826_CFG_DAC_PERCENT_DEFAULT 10000
+#define DP83826_CFG_DAC_MINUS_DEFAULT 0x30
+#define DP83826_CFG_DAC_PLUS_DEFAULT 0x10
+
#define MII_DP83822_FIBER_ADVERTISE (ADVERTISED_TP | ADVERTISED_MII | \
ADVERTISED_FIBRE | \
ADVERTISED_Pause | ADVERTISED_Asym_Pause)
@@ -118,6 +138,8 @@ struct dp83822_private {
bool fx_signal_det_low;
int fx_enabled;
u16 fx_sd_enable;
+ u8 cfg_dac_minus;
+ u8 cfg_dac_plus;
};
static int dp83822_set_wol(struct phy_device *phydev,
@@ -233,7 +255,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
DP83822_ENERGY_DET_INT_EN |
DP83822_LINK_QUAL_INT_EN);
- /* Private data pointer is NULL on DP83825/26 */
+ /* Private data pointer is NULL on DP83825 */
if (!dp83822 || !dp83822->fx_enabled)
misr_status |= DP83822_ANEG_COMPLETE_INT_EN |
DP83822_DUP_MODE_CHANGE_INT_EN |
@@ -254,7 +276,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
DP83822_PAGE_RX_INT_EN |
DP83822_EEE_ERROR_CHANGE_INT_EN);
- /* Private data pointer is NULL on DP83825/26 */
+ /* Private data pointer is NULL on DP83825 */
if (!dp83822 || !dp83822->fx_enabled)
misr_status |= DP83822_ANEG_ERR_INT_EN |
DP83822_WOL_PKT_INT_EN;
@@ -380,7 +402,7 @@ static int dp83822_config_init(struct phy_device *phydev)
{
struct dp83822_private *dp83822 = phydev->priv;
struct device *dev = &phydev->mdio.dev;
- int rgmii_delay;
+ int rgmii_delay = 0;
s32 rx_int_delay;
s32 tx_int_delay;
int err = 0;
@@ -390,30 +412,33 @@ static int dp83822_config_init(struct phy_device *phydev)
rx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0,
true);
- if (rx_int_delay <= 0)
- rgmii_delay = 0;
- else
- rgmii_delay = DP83822_RX_CLK_SHIFT;
+ /* Set DP83822_RX_CLK_SHIFT to enable rx clk internal delay */
+ if (rx_int_delay > 0)
+ rgmii_delay |= DP83822_RX_CLK_SHIFT;
tx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0,
false);
+
+ /* Set DP83822_TX_CLK_SHIFT to disable tx clk internal delay */
if (tx_int_delay <= 0)
- rgmii_delay &= ~DP83822_TX_CLK_SHIFT;
- else
rgmii_delay |= DP83822_TX_CLK_SHIFT;
- if (rgmii_delay) {
- err = phy_set_bits_mmd(phydev, DP83822_DEVADDR,
- MII_DP83822_RCSR, rgmii_delay);
- if (err)
- return err;
- }
+ err = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR,
+ DP83822_RX_CLK_SHIFT | DP83822_TX_CLK_SHIFT, rgmii_delay);
+ if (err)
+ return err;
+
+ err = phy_set_bits_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_RCSR, DP83822_RGMII_MODE_EN);
- phy_set_bits_mmd(phydev, DP83822_DEVADDR,
- MII_DP83822_RCSR, DP83822_RGMII_MODE_EN);
+ if (err)
+ return err;
} else {
- phy_clear_bits_mmd(phydev, DP83822_DEVADDR,
- MII_DP83822_RCSR, DP83822_RGMII_MODE_EN);
+ err = phy_clear_bits_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_RCSR, DP83822_RGMII_MODE_EN);
+
+ if (err)
+ return err;
}
if (dp83822->fx_enabled) {
@@ -474,6 +499,85 @@ static int dp83822_config_init(struct phy_device *phydev)
return dp8382x_disable_wol(phydev);
}
+static int dp83826_config_rmii_mode(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ const char *of_val;
+ int ret;
+
+ if (!device_property_read_string(dev, "ti,rmii-mode", &of_val)) {
+ if (strcmp(of_val, "master") == 0) {
+ ret = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR,
+ DP83822_RMII_MODE_SEL);
+ } else if (strcmp(of_val, "slave") == 0) {
+ ret = phy_set_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR,
+ DP83822_RMII_MODE_SEL);
+ } else {
+ phydev_err(phydev, "Invalid value for ti,rmii-mode property (%s)\n",
+ of_val);
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int dp83826_config_init(struct phy_device *phydev)
+{
+ struct dp83822_private *dp83822 = phydev->priv;
+ u16 val, mask;
+ int ret;
+
+ if (phydev->interface == PHY_INTERFACE_MODE_RMII) {
+ ret = phy_set_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR,
+ DP83822_RMII_MODE_EN);
+ if (ret)
+ return ret;
+
+ ret = dp83826_config_rmii_mode(phydev);
+ if (ret)
+ return ret;
+ } else {
+ ret = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR,
+ DP83822_RMII_MODE_EN);
+ if (ret)
+ return ret;
+ }
+
+ if (dp83822->cfg_dac_minus != DP83826_CFG_DAC_MINUS_DEFAULT) {
+ val = FIELD_PREP(DP83826_VOD_CFG1_MINUS_MDI_MASK, dp83822->cfg_dac_minus) |
+ FIELD_PREP(DP83826_VOD_CFG1_MINUS_MDIX_MASK,
+ FIELD_GET(DP83826_CFG_DAC_MINUS_MDIX_5_TO_4,
+ dp83822->cfg_dac_minus));
+ mask = DP83826_VOD_CFG1_MINUS_MDIX_MASK | DP83826_VOD_CFG1_MINUS_MDI_MASK;
+ ret = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83826_VOD_CFG1, mask, val);
+ if (ret)
+ return ret;
+
+ val = FIELD_PREP(DP83826_VOD_CFG2_MINUS_MDIX_MASK,
+ FIELD_GET(DP83826_CFG_DAC_MINUS_MDIX_3_TO_0,
+ dp83822->cfg_dac_minus));
+ mask = DP83826_VOD_CFG2_MINUS_MDIX_MASK;
+ ret = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83826_VOD_CFG2, mask, val);
+ if (ret)
+ return ret;
+ }
+
+ if (dp83822->cfg_dac_plus != DP83826_CFG_DAC_PLUS_DEFAULT) {
+ val = FIELD_PREP(DP83826_VOD_CFG2_PLUS_MDIX_MASK, dp83822->cfg_dac_plus) |
+ FIELD_PREP(DP83826_VOD_CFG2_PLUS_MDI_MASK, dp83822->cfg_dac_plus);
+ mask = DP83826_VOD_CFG2_PLUS_MDIX_MASK | DP83826_VOD_CFG2_PLUS_MDI_MASK;
+ ret = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83826_VOD_CFG2, mask, val);
+ if (ret)
+ return ret;
+ }
+
+ return dp8382x_disable_wol(phydev);
+}
+
static int dp8382x_config_init(struct phy_device *phydev)
{
return dp8382x_disable_wol(phydev);
@@ -509,11 +613,44 @@ static int dp83822_of_init(struct phy_device *phydev)
return 0;
}
+
+static int dp83826_to_dac_minus_one_regval(int percent)
+{
+ int tmp = DP83826_CFG_DAC_PERCENT_DEFAULT - percent;
+
+ return tmp / DP83826_CFG_DAC_PERCENT_PER_STEP;
+}
+
+static int dp83826_to_dac_plus_one_regval(int percent)
+{
+ int tmp = percent - DP83826_CFG_DAC_PERCENT_DEFAULT;
+
+ return tmp / DP83826_CFG_DAC_PERCENT_PER_STEP;
+}
+
+static void dp83826_of_init(struct phy_device *phydev)
+{
+ struct dp83822_private *dp83822 = phydev->priv;
+ struct device *dev = &phydev->mdio.dev;
+ u32 val;
+
+ dp83822->cfg_dac_minus = DP83826_CFG_DAC_MINUS_DEFAULT;
+ if (!device_property_read_u32(dev, "ti,cfg-dac-minus-one-bp", &val))
+ dp83822->cfg_dac_minus += dp83826_to_dac_minus_one_regval(val);
+
+ dp83822->cfg_dac_plus = DP83826_CFG_DAC_PLUS_DEFAULT;
+ if (!device_property_read_u32(dev, "ti,cfg-dac-plus-one-bp", &val))
+ dp83822->cfg_dac_plus += dp83826_to_dac_plus_one_regval(val);
+}
#else
static int dp83822_of_init(struct phy_device *phydev)
{
return 0;
}
+
+static void dp83826_of_init(struct phy_device *phydev)
+{
+}
#endif /* CONFIG_OF_MDIO */
static int dp83822_read_straps(struct phy_device *phydev)
@@ -567,6 +704,22 @@ static int dp83822_probe(struct phy_device *phydev)
return 0;
}
+static int dp83826_probe(struct phy_device *phydev)
+{
+ struct dp83822_private *dp83822;
+
+ dp83822 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83822),
+ GFP_KERNEL);
+ if (!dp83822)
+ return -ENOMEM;
+
+ phydev->priv = dp83822;
+
+ dp83826_of_init(phydev);
+
+ return 0;
+}
+
static int dp83822_suspend(struct phy_device *phydev)
{
int value;
@@ -610,6 +763,22 @@ static int dp83822_resume(struct phy_device *phydev)
.resume = dp83822_resume, \
}
+#define DP83826_PHY_DRIVER(_id, _name) \
+ { \
+ PHY_ID_MATCH_MODEL(_id), \
+ .name = (_name), \
+ /* PHY_BASIC_FEATURES */ \
+ .probe = dp83826_probe, \
+ .soft_reset = dp83822_phy_reset, \
+ .config_init = dp83826_config_init, \
+ .get_wol = dp83822_get_wol, \
+ .set_wol = dp83822_set_wol, \
+ .config_intr = dp83822_config_intr, \
+ .handle_interrupt = dp83822_handle_interrupt, \
+ .suspend = dp83822_suspend, \
+ .resume = dp83822_resume, \
+ }
+
#define DP8382X_PHY_DRIVER(_id, _name) \
{ \
PHY_ID_MATCH_MODEL(_id), \
@@ -628,8 +797,8 @@ static int dp83822_resume(struct phy_device *phydev)
static struct phy_driver dp83822_driver[] = {
DP83822_PHY_DRIVER(DP83822_PHY_ID, "TI DP83822"),
DP8382X_PHY_DRIVER(DP83825I_PHY_ID, "TI DP83825I"),
- DP8382X_PHY_DRIVER(DP83826C_PHY_ID, "TI DP83826C"),
- DP8382X_PHY_DRIVER(DP83826NC_PHY_ID, "TI DP83826NC"),
+ DP83826_PHY_DRIVER(DP83826C_PHY_ID, "TI DP83826C"),
+ DP83826_PHY_DRIVER(DP83826NC_PHY_ID, "TI DP83826NC"),
DP8382X_PHY_DRIVER(DP83825S_PHY_ID, "TI DP83825S"),
DP8382X_PHY_DRIVER(DP83825CM_PHY_ID, "TI DP83825M"),
DP8382X_PHY_DRIVER(DP83825CS_PHY_ID, "TI DP83825CS"),
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 5f08f9d38bd7..4120385c5a79 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -158,6 +158,7 @@
/* LED_DRV bits */
#define DP83867_LED_DRV_EN(x) BIT((x) * 4)
#define DP83867_LED_DRV_VAL(x) BIT((x) * 4 + 1)
+#define DP83867_LED_POLARITY(x) BIT((x) * 4 + 2)
#define DP83867_LED_FN(idx, val) (((val) & 0xf) << ((idx) * 4))
#define DP83867_LED_FN_MASK(idx) (0xf << ((idx) * 4))
@@ -1152,6 +1153,26 @@ static int dp83867_led_hw_control_get(struct phy_device *phydev, u8 index,
return 0;
}
+static int dp83867_led_polarity_set(struct phy_device *phydev, int index,
+ unsigned long modes)
+{
+ /* Default active high */
+ u16 polarity = DP83867_LED_POLARITY(index);
+ u32 mode;
+
+ for_each_set_bit(mode, &modes, __PHY_LED_MODES_NUM) {
+ switch (mode) {
+ case PHY_LED_ACTIVE_LOW:
+ polarity = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ return phy_modify(phydev, DP83867_LEDCR2,
+ DP83867_LED_POLARITY(index), polarity);
+}
+
static struct phy_driver dp83867_driver[] = {
{
.phy_id = DP83867_PHY_ID,
@@ -1184,6 +1205,7 @@ static struct phy_driver dp83867_driver[] = {
.led_hw_is_supported = dp83867_led_hw_is_supported,
.led_hw_control_set = dp83867_led_hw_control_set,
.led_hw_control_get = dp83867_led_hw_control_get,
+ .led_polarity_set = dp83867_led_polarity_set,
},
};
module_phy_driver(dp83867_driver);
diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c
index 1c3ff77de56b..6b4bd9883304 100644
--- a/drivers/net/phy/marvell-88q2xxx.c
+++ b/drivers/net/phy/marvell-88q2xxx.c
@@ -1,10 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Marvell 88Q2XXX automotive 100BASE-T1/1000BASE-T1 PHY driver
+ *
+ * Derived from Marvell Q222x API
+ *
+ * Copyright (C) 2024 Liebherr-Electronics and Drives GmbH
*/
#include <linux/ethtool_netlink.h>
#include <linux/marvell_phy.h>
#include <linux/phy.h>
+#include <linux/hwmon.h>
+
+#define PHY_ID_88Q2220_REVB0 (MARVELL_PHY_ID_88Q2220 | 0x1)
#define MDIO_MMD_AN_MV_STAT 32769
#define MDIO_MMD_AN_MV_STAT_ANEG 0x0100
@@ -13,8 +20,38 @@
#define MDIO_MMD_AN_MV_STAT_LOCAL_MASTER 0x4000
#define MDIO_MMD_AN_MV_STAT_MS_CONF_FAULT 0x8000
+#define MDIO_MMD_AN_MV_STAT2 32794
+#define MDIO_MMD_AN_MV_STAT2_AN_RESOLVED 0x0800
+#define MDIO_MMD_AN_MV_STAT2_100BT1 0x2000
+#define MDIO_MMD_AN_MV_STAT2_1000BT1 0x4000
+
+#define MDIO_MMD_PCS_MV_INT_EN 32784
+#define MDIO_MMD_PCS_MV_INT_EN_LINK_UP 0x0040
+#define MDIO_MMD_PCS_MV_INT_EN_LINK_DOWN 0x0080
+#define MDIO_MMD_PCS_MV_INT_EN_100BT1 0x1000
+
+#define MDIO_MMD_PCS_MV_GPIO_INT_STAT 32785
+#define MDIO_MMD_PCS_MV_GPIO_INT_STAT_LINK_UP 0x0040
+#define MDIO_MMD_PCS_MV_GPIO_INT_STAT_LINK_DOWN 0x0080
+#define MDIO_MMD_PCS_MV_GPIO_INT_STAT_100BT1_GEN 0x1000
+
+#define MDIO_MMD_PCS_MV_GPIO_INT_CTRL 32787
+#define MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS 0x0800
+
+#define MDIO_MMD_PCS_MV_TEMP_SENSOR1 32833
+#define MDIO_MMD_PCS_MV_TEMP_SENSOR1_RAW_INT 0x0001
+#define MDIO_MMD_PCS_MV_TEMP_SENSOR1_INT 0x0040
+#define MDIO_MMD_PCS_MV_TEMP_SENSOR1_INT_EN 0x0080
+
+#define MDIO_MMD_PCS_MV_TEMP_SENSOR2 32834
+#define MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK 0xc000
+
+#define MDIO_MMD_PCS_MV_TEMP_SENSOR3 32835
+#define MDIO_MMD_PCS_MV_TEMP_SENSOR3_INT_THRESH_MASK 0xff00
+#define MDIO_MMD_PCS_MV_TEMP_SENSOR3_MASK 0x00ff
+
#define MDIO_MMD_PCS_MV_100BT1_STAT1 33032
-#define MDIO_MMD_PCS_MV_100BT1_STAT1_IDLE_ERROR 0x00FF
+#define MDIO_MMD_PCS_MV_100BT1_STAT1_IDLE_ERROR 0x00ff
#define MDIO_MMD_PCS_MV_100BT1_STAT1_JABBER 0x0100
#define MDIO_MMD_PCS_MV_100BT1_STAT1_LINK 0x0200
#define MDIO_MMD_PCS_MV_100BT1_STAT1_LOCAL_RX 0x1000
@@ -27,6 +64,71 @@
#define MDIO_MMD_PCS_MV_100BT1_STAT2_LINK 0x0004
#define MDIO_MMD_PCS_MV_100BT1_STAT2_ANGE 0x0008
+#define MDIO_MMD_PCS_MV_100BT1_INT_EN 33042
+#define MDIO_MMD_PCS_MV_100BT1_INT_EN_LINKEVENT 0x0400
+
+#define MDIO_MMD_PCS_MV_COPPER_INT_STAT 33043
+#define MDIO_MMD_PCS_MV_COPPER_INT_STAT_LINKEVENT 0x0400
+
+#define MDIO_MMD_PCS_MV_RX_STAT 33328
+
+#define MDIO_MMD_PCS_MV_TDR_RESET 65226
+#define MDIO_MMD_PCS_MV_TDR_RESET_TDR_RST 0x1000
+
+#define MDIO_MMD_PCS_MV_TDR_OFF_SHORT_CABLE 65241
+
+#define MDIO_MMD_PCS_MV_TDR_OFF_LONG_CABLE 65242
+
+#define MDIO_MMD_PCS_MV_TDR_STATUS 65245
+#define MDIO_MMD_PCS_MV_TDR_STATUS_MASK 0x0003
+#define MDIO_MMD_PCS_MV_TDR_STATUS_OFF 0x0001
+#define MDIO_MMD_PCS_MV_TDR_STATUS_ON 0x0002
+#define MDIO_MMD_PCS_MV_TDR_STATUS_DIST_MASK 0xff00
+#define MDIO_MMD_PCS_MV_TDR_STATUS_VCT_STAT_MASK 0x00f0
+#define MDIO_MMD_PCS_MV_TDR_STATUS_VCT_STAT_SHORT 0x0030
+#define MDIO_MMD_PCS_MV_TDR_STATUS_VCT_STAT_OPEN 0x00e0
+#define MDIO_MMD_PCS_MV_TDR_STATUS_VCT_STAT_OK 0x0070
+#define MDIO_MMD_PCS_MV_TDR_STATUS_VCT_STAT_IN_PROGR 0x0080
+#define MDIO_MMD_PCS_MV_TDR_STATUS_VCT_STAT_NOISE 0x0050
+
+#define MDIO_MMD_PCS_MV_TDR_OFF_CUTOFF 65246
+
+struct mmd_val {
+ int devad;
+ u32 regnum;
+ u16 val;
+};
+
+static const struct mmd_val mv88q222x_revb0_init_seq0[] = {
+ { MDIO_MMD_PCS, 0x8033, 0x6801 },
+ { MDIO_MMD_AN, MDIO_AN_T1_CTRL, 0x0 },
+ { MDIO_MMD_PMAPMD, MDIO_CTRL1,
+ MDIO_CTRL1_LPOWER | MDIO_PMA_CTRL1_SPEED1000 },
+ { MDIO_MMD_PCS, 0xfe1b, 0x48 },
+ { MDIO_MMD_PCS, 0xffe4, 0x6b6 },
+ { MDIO_MMD_PMAPMD, MDIO_CTRL1, 0x0 },
+ { MDIO_MMD_PCS, MDIO_CTRL1, 0x0 },
+};
+
+static const struct mmd_val mv88q222x_revb0_init_seq1[] = {
+ { MDIO_MMD_PCS, 0xfe79, 0x0 },
+ { MDIO_MMD_PCS, 0xfe07, 0x125a },
+ { MDIO_MMD_PCS, 0xfe09, 0x1288 },
+ { MDIO_MMD_PCS, 0xfe08, 0x2588 },
+ { MDIO_MMD_PCS, 0xfe11, 0x1105 },
+ { MDIO_MMD_PCS, 0xfe72, 0x042c },
+ { MDIO_MMD_PCS, 0xfbba, 0xcb2 },
+ { MDIO_MMD_PCS, 0xfbbb, 0xc4a },
+ { MDIO_MMD_AN, 0x8032, 0x2020 },
+ { MDIO_MMD_AN, 0x8031, 0xa28 },
+ { MDIO_MMD_AN, 0x8031, 0xc28 },
+ { MDIO_MMD_PCS, 0xffdb, 0xfc10 },
+ { MDIO_MMD_PCS, 0xfe1b, 0x58 },
+ { MDIO_MMD_PCS, 0xfe79, 0x4 },
+ { MDIO_MMD_PCS, 0xfe5f, 0xe8 },
+ { MDIO_MMD_PCS, 0xfe05, 0x755c },
+};
+
static int mv88q2xxx_soft_reset(struct phy_device *phydev)
{
int ret;
@@ -50,20 +152,23 @@ static int mv88q2xxx_read_link_gbit(struct phy_device *phydev)
/* Read vendor specific Auto-Negotiation status register to get local
* and remote receiver status according to software initialization
- * guide.
+ * guide. However, when not in polling mode the local and remote
+ * receiver status are not evaluated due to the Marvell 88Q2xxx APIs.
*/
ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_MMD_AN_MV_STAT);
if (ret < 0) {
return ret;
- } else if ((ret & MDIO_MMD_AN_MV_STAT_LOCAL_RX) &&
- (ret & MDIO_MMD_AN_MV_STAT_REMOTE_RX)) {
+ } else if (((ret & MDIO_MMD_AN_MV_STAT_LOCAL_RX) &&
+ (ret & MDIO_MMD_AN_MV_STAT_REMOTE_RX)) ||
+ !phy_polling_mode(phydev)) {
/* The link state is latched low so that momentary link
* drops can be detected. Do not double-read the status
* in polling mode to detect such short link drops except
* the link was already down.
*/
if (!phy_polling_mode(phydev) || !phydev->link) {
- ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_1000BT1_STAT);
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_PCS_1000BT1_STAT);
if (ret < 0)
return ret;
else if (ret & MDIO_PCS_1000BT1_STAT_LINK)
@@ -71,7 +176,8 @@ static int mv88q2xxx_read_link_gbit(struct phy_device *phydev)
}
if (!link) {
- ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_1000BT1_STAT);
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_PCS_1000BT1_STAT);
if (ret < 0)
return ret;
else if (ret & MDIO_PCS_1000BT1_STAT_LINK)
@@ -94,8 +200,20 @@ static int mv88q2xxx_read_link_100m(struct phy_device *phydev)
* the link was already down. In case we are not polling,
* we always read the realtime status.
*/
- if (!phy_polling_mode(phydev) || !phydev->link) {
- ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_100BT1_STAT1);
+ if (!phy_polling_mode(phydev)) {
+ phydev->link = false;
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_100BT1_STAT2);
+ if (ret < 0)
+ return ret;
+
+ if (ret & MDIO_MMD_PCS_MV_100BT1_STAT2_LINK)
+ phydev->link = true;
+
+ return 0;
+ } else if (!phydev->link) {
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_100BT1_STAT1);
if (ret < 0)
return ret;
else if (ret & MDIO_MMD_PCS_MV_100BT1_STAT1_LINK)
@@ -120,24 +238,90 @@ out:
static int mv88q2xxx_read_link(struct phy_device *phydev)
{
- int ret;
-
/* The 88Q2XXX PHYs do not have the PMA/PMD status register available,
* therefore we need to read the link status from the vendor specific
* registers depending on the speed.
*/
+
if (phydev->speed == SPEED_1000)
- ret = mv88q2xxx_read_link_gbit(phydev);
+ return mv88q2xxx_read_link_gbit(phydev);
+ else if (phydev->speed == SPEED_100)
+ return mv88q2xxx_read_link_100m(phydev);
+
+ phydev->link = false;
+ return 0;
+}
+
+static int mv88q2xxx_read_master_slave_state(struct phy_device *phydev)
+{
+ int ret;
+
+ phydev->master_slave_state = MASTER_SLAVE_STATE_UNKNOWN;
+ ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_MMD_AN_MV_STAT);
+ if (ret < 0)
+ return ret;
+
+ if (ret & MDIO_MMD_AN_MV_STAT_LOCAL_MASTER)
+ phydev->master_slave_state = MASTER_SLAVE_STATE_MASTER;
else
- ret = mv88q2xxx_read_link_100m(phydev);
+ phydev->master_slave_state = MASTER_SLAVE_STATE_SLAVE;
+
+ return 0;
+}
+
+static int mv88q2xxx_read_aneg_speed(struct phy_device *phydev)
+{
+ int ret;
+
+ phydev->speed = SPEED_UNKNOWN;
+ ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_MMD_AN_MV_STAT2);
+ if (ret < 0)
+ return ret;
+
+ if (!(ret & MDIO_MMD_AN_MV_STAT2_AN_RESOLVED))
+ return 0;
- return ret;
+ if (ret & MDIO_MMD_AN_MV_STAT2_100BT1)
+ phydev->speed = SPEED_100;
+ else if (ret & MDIO_MMD_AN_MV_STAT2_1000BT1)
+ phydev->speed = SPEED_1000;
+
+ return 0;
}
static int mv88q2xxx_read_status(struct phy_device *phydev)
{
int ret;
+ if (phydev->autoneg == AUTONEG_ENABLE) {
+ /* We have to get the negotiated speed first, otherwise we are
+ * not able to read the link.
+ */
+ ret = mv88q2xxx_read_aneg_speed(phydev);
+ if (ret < 0)
+ return ret;
+
+ ret = mv88q2xxx_read_link(phydev);
+ if (ret < 0)
+ return ret;
+
+ ret = genphy_c45_read_lpa(phydev);
+ if (ret < 0)
+ return ret;
+
+ ret = genphy_c45_baset1_read_status(phydev);
+ if (ret < 0)
+ return ret;
+
+ ret = mv88q2xxx_read_master_slave_state(phydev);
+ if (ret < 0)
+ return ret;
+
+ phy_resolve_aneg_linkmode(phydev);
+
+ return 0;
+ }
+
ret = mv88q2xxx_read_link(phydev);
if (ret < 0)
return ret;
@@ -166,7 +350,9 @@ static int mv88q2xxx_get_features(struct phy_device *phydev)
* sequence provided by Marvell. Disable it for now until a proper
* workaround is found or a new PHY revision is released.
*/
- linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->supported);
+ if (phydev->drv->phy_id == MARVELL_PHY_ID_88Q2110)
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+ phydev->supported);
return 0;
}
@@ -179,28 +365,29 @@ static int mv88q2xxx_config_aneg(struct phy_device *phydev)
if (ret)
return ret;
- return mv88q2xxx_soft_reset(phydev);
+ return phydev->drv->soft_reset(phydev);
}
static int mv88q2xxx_config_init(struct phy_device *phydev)
{
- int ret;
-
/* The 88Q2XXX PHYs do have the extended ability register available, but
* register MDIO_PMA_EXTABLE where they should signalize it does not
* work according to specification. Therefore, we force it here.
*/
phydev->pma_extable = MDIO_PMA_EXTABLE_BT1;
- /* Read the current PHY configuration */
- ret = genphy_c45_read_pma(phydev);
- if (ret)
- return ret;
+ /* Configure interrupt with default settings, output is driven low for
+ * active interrupt and high for inactive.
+ */
+ if (phy_interrupt_is_valid(phydev))
+ return phy_set_bits_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_GPIO_INT_CTRL,
+ MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS);
- return mv88q2xxx_config_aneg(phydev);
+ return 0;
}
-static int mv88q2xxxx_get_sqi(struct phy_device *phydev)
+static int mv88q2xxx_get_sqi(struct phy_device *phydev)
{
int ret;
@@ -208,7 +395,8 @@ static int mv88q2xxxx_get_sqi(struct phy_device *phydev)
/* Read the SQI from the vendor specific receiver status
* register
*/
- ret = phy_read_mmd(phydev, MDIO_MMD_PCS, 0x8230);
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_RX_STAT);
if (ret < 0)
return ret;
@@ -218,7 +406,7 @@ static int mv88q2xxxx_get_sqi(struct phy_device *phydev)
* but can be found in the Software Initialization Guide. Only
* revisions >= A0 are supported.
*/
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, 0xFC5D, 0x00FF, 0x00AC);
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, 0xfc5d, 0xff, 0xac);
if (ret < 0)
return ret;
@@ -227,14 +415,386 @@ static int mv88q2xxxx_get_sqi(struct phy_device *phydev)
return ret;
}
- return ret & 0x0F;
+ return ret & 0x0f;
}
-static int mv88q2xxxx_get_sqi_max(struct phy_device *phydev)
+static int mv88q2xxx_get_sqi_max(struct phy_device *phydev)
{
return 15;
}
+static int mv88q2xxx_config_intr(struct phy_device *phydev)
+{
+ int ret;
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+ /* Enable interrupts for 1000BASE-T1 link up and down events
+ * and enable general interrupts for 100BASE-T1.
+ */
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_INT_EN,
+ MDIO_MMD_PCS_MV_INT_EN_LINK_UP |
+ MDIO_MMD_PCS_MV_INT_EN_LINK_DOWN |
+ MDIO_MMD_PCS_MV_INT_EN_100BT1);
+ if (ret < 0)
+ return ret;
+
+ /* Enable interrupts for 100BASE-T1 link events */
+ return phy_write_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_100BT1_INT_EN,
+ MDIO_MMD_PCS_MV_100BT1_INT_EN_LINKEVENT);
+ } else {
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_INT_EN, 0);
+ if (ret < 0)
+ return ret;
+
+ return phy_write_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_100BT1_INT_EN, 0);
+ }
+}
+
+static irqreturn_t mv88q2xxx_handle_interrupt(struct phy_device *phydev)
+{
+ bool trigger_machine = false;
+ int irq;
+
+ /* Before we can acknowledge the 100BT1 general interrupt, that is in
+ * the 1000BT1 interrupt status register, we have to acknowledge any
+ * interrupts that are related to it. Therefore we read first the 100BT1
+ * interrupt status register, followed by reading the 1000BT1 interrupt
+ * status register.
+ */
+
+ irq = phy_read_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_COPPER_INT_STAT);
+ if (irq < 0) {
+ phy_error(phydev);
+ return IRQ_NONE;
+ }
+
+ /* Check link status for 100BT1 */
+ if (irq & MDIO_MMD_PCS_MV_COPPER_INT_STAT_LINKEVENT)
+ trigger_machine = true;
+
+ irq = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_GPIO_INT_STAT);
+ if (irq < 0) {
+ phy_error(phydev);
+ return IRQ_NONE;
+ }
+
+ /* Check link status for 1000BT1 */
+ if ((irq & MDIO_MMD_PCS_MV_GPIO_INT_STAT_LINK_UP) ||
+ (irq & MDIO_MMD_PCS_MV_GPIO_INT_STAT_LINK_DOWN))
+ trigger_machine = true;
+
+ if (!trigger_machine)
+ return IRQ_NONE;
+
+ phy_trigger_machine(phydev);
+
+ return IRQ_HANDLED;
+}
+
+static int mv88q2xxx_suspend(struct phy_device *phydev)
+{
+ int ret;
+
+ /* Disable PHY interrupts */
+ if (phy_interrupt_is_valid(phydev)) {
+ phydev->interrupts = PHY_INTERRUPT_DISABLED;
+ ret = mv88q2xxx_config_intr(phydev);
+ if (ret)
+ return ret;
+ }
+
+ return phy_set_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1,
+ MDIO_CTRL1_LPOWER);
+}
+
+static int mv88q2xxx_resume(struct phy_device *phydev)
+{
+ int ret;
+
+ /* Enable PHY interrupts */
+ if (phy_interrupt_is_valid(phydev)) {
+ phydev->interrupts = PHY_INTERRUPT_ENABLED;
+ ret = mv88q2xxx_config_intr(phydev);
+ if (ret)
+ return ret;
+ }
+
+ return phy_clear_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1,
+ MDIO_CTRL1_LPOWER);
+}
+
+#if IS_ENABLED(CONFIG_HWMON)
+static const struct hwmon_channel_info * const mv88q2xxx_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_ALARM),
+ NULL
+};
+
+static umode_t mv88q2xxx_hwmon_is_visible(const void *data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ switch (attr) {
+ case hwmon_temp_input:
+ return 0444;
+ case hwmon_temp_max:
+ return 0644;
+ case hwmon_temp_alarm:
+ return 0444;
+ default:
+ return 0;
+ }
+}
+
+static int mv88q2xxx_hwmon_read(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct phy_device *phydev = dev_get_drvdata(dev);
+ int ret;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_TEMP_SENSOR3);
+ if (ret < 0)
+ return ret;
+
+ ret = FIELD_GET(MDIO_MMD_PCS_MV_TEMP_SENSOR3_MASK, ret);
+ *val = (ret - 75) * 1000;
+ return 0;
+ case hwmon_temp_max:
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_TEMP_SENSOR3);
+ if (ret < 0)
+ return ret;
+
+ ret = FIELD_GET(MDIO_MMD_PCS_MV_TEMP_SENSOR3_INT_THRESH_MASK,
+ ret);
+ *val = (ret - 75) * 1000;
+ return 0;
+ case hwmon_temp_alarm:
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_TEMP_SENSOR1);
+ if (ret < 0)
+ return ret;
+
+ *val = !!(ret & MDIO_MMD_PCS_MV_TEMP_SENSOR1_RAW_INT);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mv88q2xxx_hwmon_write(struct device *dev,
+ enum hwmon_sensor_types type, u32 attr,
+ int channel, long val)
+{
+ struct phy_device *phydev = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_temp_max:
+ clamp_val(val, -75000, 180000);
+ val = (val / 1000) + 75;
+ val = FIELD_PREP(MDIO_MMD_PCS_MV_TEMP_SENSOR3_INT_THRESH_MASK,
+ val);
+ return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_TEMP_SENSOR3,
+ MDIO_MMD_PCS_MV_TEMP_SENSOR3_INT_THRESH_MASK,
+ val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct hwmon_ops mv88q2xxx_hwmon_hwmon_ops = {
+ .is_visible = mv88q2xxx_hwmon_is_visible,
+ .read = mv88q2xxx_hwmon_read,
+ .write = mv88q2xxx_hwmon_write,
+};
+
+static const struct hwmon_chip_info mv88q2xxx_hwmon_chip_info = {
+ .ops = &mv88q2xxx_hwmon_hwmon_ops,
+ .info = mv88q2xxx_hwmon_info,
+};
+
+static int mv88q2xxx_hwmon_probe(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ struct device *hwmon;
+ char *hwmon_name;
+ int ret;
+
+ /* Enable temperature sense */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_TEMP_SENSOR2,
+ MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0);
+ if (ret < 0)
+ return ret;
+
+ hwmon_name = devm_hwmon_sanitize_name(dev, dev_name(dev));
+ if (IS_ERR(hwmon_name))
+ return PTR_ERR(hwmon_name);
+
+ hwmon = devm_hwmon_device_register_with_info(dev,
+ hwmon_name,
+ phydev,
+ &mv88q2xxx_hwmon_chip_info,
+ NULL);
+
+ return PTR_ERR_OR_ZERO(hwmon);
+}
+
+#else
+static int mv88q2xxx_hwmon_probe(struct phy_device *phydev)
+{
+ return 0;
+}
+#endif
+
+static int mv88q2xxx_probe(struct phy_device *phydev)
+{
+ return mv88q2xxx_hwmon_probe(phydev);
+}
+
+static int mv88q222x_soft_reset(struct phy_device *phydev)
+{
+ int ret;
+
+ /* Enable RESET of DCL */
+ if (phydev->autoneg == AUTONEG_ENABLE || phydev->speed == SPEED_1000) {
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS, 0xfe1b, 0x48);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_1000BT1_CTRL,
+ MDIO_PCS_1000BT1_CTRL_RESET);
+ if (ret < 0)
+ return ret;
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS, 0xffe4, 0xc);
+ if (ret < 0)
+ return ret;
+
+ /* Disable RESET of DCL */
+ if (phydev->autoneg == AUTONEG_ENABLE || phydev->speed == SPEED_1000)
+ return phy_write_mmd(phydev, MDIO_MMD_PCS, 0xfe1b, 0x58);
+
+ return 0;
+}
+
+static int mv88q222x_revb0_config_init(struct phy_device *phydev)
+{
+ int ret, i;
+
+ for (i = 0; i < ARRAY_SIZE(mv88q222x_revb0_init_seq0); i++) {
+ ret = phy_write_mmd(phydev, mv88q222x_revb0_init_seq0[i].devad,
+ mv88q222x_revb0_init_seq0[i].regnum,
+ mv88q222x_revb0_init_seq0[i].val);
+ if (ret < 0)
+ return ret;
+ }
+
+ usleep_range(5000, 10000);
+
+ for (i = 0; i < ARRAY_SIZE(mv88q222x_revb0_init_seq1); i++) {
+ ret = phy_write_mmd(phydev, mv88q222x_revb0_init_seq1[i].devad,
+ mv88q222x_revb0_init_seq1[i].regnum,
+ mv88q222x_revb0_init_seq1[i].val);
+ if (ret < 0)
+ return ret;
+ }
+
+ return mv88q2xxx_config_init(phydev);
+}
+
+static int mv88q222x_cable_test_start(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_TDR_OFF_CUTOFF, 0x0058);
+ if (ret < 0)
+ return ret;
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_TDR_OFF_LONG_CABLE, 0x00eb);
+ if (ret < 0)
+ return ret;
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_TDR_OFF_SHORT_CABLE, 0x010e);
+ if (ret < 0)
+ return ret;
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_TDR_RESET,
+ 0x0d90);
+ if (ret < 0)
+ return ret;
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_TDR_STATUS,
+ MDIO_MMD_PCS_MV_TDR_STATUS_ON);
+ if (ret < 0)
+ return ret;
+
+ /* According to the Marvell API the test is finished within 500 ms */
+ msleep(500);
+
+ return 0;
+}
+
+static int mv88q222x_cable_test_get_status(struct phy_device *phydev,
+ bool *finished)
+{
+ int ret, status;
+ u32 dist;
+
+ status = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_TDR_STATUS);
+ if (status < 0)
+ return status;
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_TDR_RESET,
+ MDIO_MMD_PCS_MV_TDR_RESET_TDR_RST | 0xd90);
+ if (ret < 0)
+ return ret;
+
+ /* Test could not be finished */
+ if (FIELD_GET(MDIO_MMD_PCS_MV_TDR_STATUS_MASK, status) !=
+ MDIO_MMD_PCS_MV_TDR_STATUS_OFF)
+ return -ETIMEDOUT;
+
+ *finished = true;
+ /* Fault length reported in meters, convert to centimeters */
+ dist = FIELD_GET(MDIO_MMD_PCS_MV_TDR_STATUS_DIST_MASK, status) * 100;
+ switch (status & MDIO_MMD_PCS_MV_TDR_STATUS_VCT_STAT_MASK) {
+ case MDIO_MMD_PCS_MV_TDR_STATUS_VCT_STAT_OPEN:
+ ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+ ETHTOOL_A_CABLE_RESULT_CODE_OPEN);
+ ethnl_cable_test_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_A,
+ dist);
+ break;
+ case MDIO_MMD_PCS_MV_TDR_STATUS_VCT_STAT_SHORT:
+ ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+ ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT);
+ ethnl_cable_test_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_A,
+ dist);
+ break;
+ case MDIO_MMD_PCS_MV_TDR_STATUS_VCT_STAT_OK:
+ ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+ ETHTOOL_A_CABLE_RESULT_CODE_OK);
+ break;
+ default:
+ ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+ ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC);
+ }
+
+ return 0;
+}
+
static struct phy_driver mv88q2xxx_driver[] = {
{
.phy_id = MARVELL_PHY_ID_88Q2110,
@@ -246,8 +806,29 @@ static struct phy_driver mv88q2xxx_driver[] = {
.read_status = mv88q2xxx_read_status,
.soft_reset = mv88q2xxx_soft_reset,
.set_loopback = genphy_c45_loopback,
- .get_sqi = mv88q2xxxx_get_sqi,
- .get_sqi_max = mv88q2xxxx_get_sqi_max,
+ .get_sqi = mv88q2xxx_get_sqi,
+ .get_sqi_max = mv88q2xxx_get_sqi_max,
+ },
+ {
+ PHY_ID_MATCH_EXACT(PHY_ID_88Q2220_REVB0),
+ .name = "mv88q2220",
+ .flags = PHY_POLL_CABLE_TEST,
+ .probe = mv88q2xxx_probe,
+ .get_features = mv88q2xxx_get_features,
+ .config_aneg = mv88q2xxx_config_aneg,
+ .aneg_done = genphy_c45_aneg_done,
+ .config_init = mv88q222x_revb0_config_init,
+ .read_status = mv88q2xxx_read_status,
+ .soft_reset = mv88q222x_soft_reset,
+ .config_intr = mv88q2xxx_config_intr,
+ .handle_interrupt = mv88q2xxx_handle_interrupt,
+ .set_loopback = genphy_c45_loopback,
+ .cable_test_start = mv88q222x_cable_test_start,
+ .cable_test_get_status = mv88q222x_cable_test_get_status,
+ .get_sqi = mv88q2xxx_get_sqi,
+ .get_sqi_max = mv88q2xxx_get_sqi_max,
+ .suspend = mv88q2xxx_suspend,
+ .resume = mv88q2xxx_resume,
},
};
@@ -255,6 +836,7 @@ module_phy_driver(mv88q2xxx_driver);
static struct mdio_device_id __maybe_unused mv88q2xxx_tbl[] = {
{ MARVELL_PHY_ID_88Q2110, MARVELL_PHY_ID_MASK },
+ { PHY_ID_MATCH_EXACT(PHY_ID_88Q2220_REVB0), },
{ /*sentinel*/ }
};
MODULE_DEVICE_TABLE(mdio, mv88q2xxx_tbl);
diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c
index e3aa30dad2e6..b88398e6872b 100644
--- a/drivers/net/phy/marvell-88x2222.c
+++ b/drivers/net/phy/marvell-88x2222.c
@@ -9,12 +9,10 @@
*/
#include <linux/module.h>
#include <linux/phy.h>
-#include <linux/gpio.h>
#include <linux/delay.h>
#include <linux/mdio.h>
#include <linux/marvell_phy.h>
#include <linux/of.h>
-#include <linux/of_gpio.h>
#include <linux/sfp.h>
#include <linux/netdevice.h>
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index eba652a4c1d8..42ed013385bf 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -919,7 +919,10 @@ static int m88e1111_config_init_1000basex(struct phy_device *phydev)
if (extsr < 0)
return extsr;
- /* If using copper mode, ensure 1000BaseX auto-negotiation is enabled */
+ /* If using copper mode, ensure 1000BaseX auto-negotiation is enabled.
+ * FIXME: this does not actually enable 1000BaseX auto-negotiation if
+ * it was previously disabled in the Fiber BMCR!
+ */
mode = extsr & MII_M1111_HWCFG_MODE_MASK;
if (mode == MII_M1111_HWCFG_MODE_COPPER_1000X_NOAN) {
err = phy_modify(phydev, MII_M1111_PHY_EXT_SR,
@@ -1461,7 +1464,7 @@ static int m88e1540_get_fld(struct phy_device *phydev, u8 *msecs)
static int m88e1540_set_fld(struct phy_device *phydev, const u8 *msecs)
{
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
int val, ret;
if (*msecs == ETHTOOL_PHY_FAST_LINK_DOWN_OFF)
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index afbad1ad8683..8b9ead76e40e 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -13,7 +13,6 @@
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
-#include <linux/gpio.h>
#include <linux/gpio/consumer.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -25,7 +24,6 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/of_device.h>
-#include <linux/of_gpio.h>
#include <linux/of_mdio.h>
#include <linux/phy.h>
#include <linux/reset.h>
@@ -459,19 +457,34 @@ EXPORT_SYMBOL(of_mdio_find_bus);
* found, set the of_node pointer for the mdio device. This allows
* auto-probed phy devices to be supplied with information passed in
* via DT.
+ * If a PHY package is found, PHY is searched also there.
*/
-static void of_mdiobus_link_mdiodev(struct mii_bus *bus,
- struct mdio_device *mdiodev)
+static int of_mdiobus_find_phy(struct device *dev, struct mdio_device *mdiodev,
+ struct device_node *np)
{
- struct device *dev = &mdiodev->dev;
struct device_node *child;
- if (dev->of_node || !bus->dev.of_node)
- return;
-
- for_each_available_child_of_node(bus->dev.of_node, child) {
+ for_each_available_child_of_node(np, child) {
int addr;
+ if (of_node_name_eq(child, "ethernet-phy-package")) {
+ /* Validate PHY package reg presence */
+ if (!of_property_present(child, "reg")) {
+ of_node_put(child);
+ return -EINVAL;
+ }
+
+ if (!of_mdiobus_find_phy(dev, mdiodev, child)) {
+ /* The refcount for the PHY package will be
+ * incremented later when PHY join the Package.
+ */
+ of_node_put(child);
+ return 0;
+ }
+
+ continue;
+ }
+
addr = of_mdio_parse_addr(dev, child);
if (addr < 0)
continue;
@@ -481,9 +494,22 @@ static void of_mdiobus_link_mdiodev(struct mii_bus *bus,
/* The refcount on "child" is passed to the mdio
* device. Do _not_ use of_node_put(child) here.
*/
- return;
+ return 0;
}
}
+
+ return -ENODEV;
+}
+
+static void of_mdiobus_link_mdiodev(struct mii_bus *bus,
+ struct mdio_device *mdiodev)
+{
+ struct device *dev = &mdiodev->dev;
+
+ if (dev->of_node || !bus->dev.of_node)
+ return;
+
+ of_mdiobus_find_phy(dev, mdiodev, bus->dev.of_node);
}
#else /* !IS_ENABLED(CONFIG_OF_MDIO) */
static inline void of_mdiobus_link_mdiodev(struct mii_bus *mdio,
@@ -1398,7 +1424,7 @@ static const struct attribute_group *mdio_bus_dev_groups[] = {
NULL,
};
-struct bus_type mdio_bus_type = {
+const struct bus_type mdio_bus_type = {
.name = "mdio_bus",
.dev_groups = mdio_bus_dev_groups,
.match = mdio_bus_match,
diff --git a/drivers/net/phy/mdio_devres.c b/drivers/net/phy/mdio_devres.c
index 69b829e6ab35..7fd3377dbd79 100644
--- a/drivers/net/phy/mdio_devres.c
+++ b/drivers/net/phy/mdio_devres.c
@@ -131,4 +131,5 @@ int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio,
EXPORT_SYMBOL(__devm_of_mdiobus_register);
#endif /* CONFIG_OF_MDIO */
+MODULE_DESCRIPTION("Network MDIO bus devres helpers");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/mediatek-ge-soc.c b/drivers/net/phy/mediatek-ge-soc.c
index 8a20d9889f10..0f3a1538a8b8 100644
--- a/drivers/net/phy/mediatek-ge-soc.c
+++ b/drivers/net/phy/mediatek-ge-soc.c
@@ -489,7 +489,7 @@ static int tx_r50_fill_result(struct phy_device *phydev, u16 tx_r50_cal_val,
u16 reg, val;
if (phydev->drv->phy_id == MTK_GPHY_ID_MT7988)
- bias = -2;
+ bias = -1;
val = clamp_val(bias + tx_r50_cal_val, 0, 63);
@@ -705,6 +705,11 @@ restore:
static void mt798x_phy_common_finetune(struct phy_device *phydev)
{
phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
+ /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */
+ __phy_write(phydev, 0x11, 0xc71);
+ __phy_write(phydev, 0x12, 0xc);
+ __phy_write(phydev, 0x10, 0x8fae);
+
/* EnabRandUpdTrig = 1 */
__phy_write(phydev, 0x11, 0x2f00);
__phy_write(phydev, 0x12, 0xe);
@@ -715,15 +720,56 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev)
__phy_write(phydev, 0x12, 0x0);
__phy_write(phydev, 0x10, 0x83aa);
- /* TrFreeze = 0 */
+ /* FfeUpdGainForce = 1(Enable), FfeUpdGainForceVal = 4 */
+ __phy_write(phydev, 0x11, 0x240);
+ __phy_write(phydev, 0x12, 0x0);
+ __phy_write(phydev, 0x10, 0x9680);
+
+ /* TrFreeze = 0 (mt7988 default) */
__phy_write(phydev, 0x11, 0x0);
__phy_write(phydev, 0x12, 0x0);
__phy_write(phydev, 0x10, 0x9686);
+ /* SSTrKp100 = 5 */
+ /* SSTrKf100 = 6 */
+ /* SSTrKp1000Mas = 5 */
+ /* SSTrKf1000Mas = 6 */
/* SSTrKp1000Slv = 5 */
+ /* SSTrKf1000Slv = 6 */
__phy_write(phydev, 0x11, 0xbaef);
__phy_write(phydev, 0x12, 0x2e);
__phy_write(phydev, 0x10, 0x968c);
+ phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
+}
+
+static void mt7981_phy_finetune(struct phy_device *phydev)
+{
+ u16 val[8] = { 0x01ce, 0x01c1,
+ 0x020f, 0x0202,
+ 0x03d0, 0x03c0,
+ 0x0013, 0x0005 };
+ int i, k;
+
+ /* 100M eye finetune:
+ * Keep middle level of TX MLT3 shapper as default.
+ * Only change TX MLT3 overshoot level here.
+ */
+ for (k = 0, i = 1; i < 12; i++) {
+ if (i % 3 == 0)
+ continue;
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]);
+ }
+
+ phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
+ /* ResetSyncOffset = 6 */
+ __phy_write(phydev, 0x11, 0x600);
+ __phy_write(phydev, 0x12, 0x0);
+ __phy_write(phydev, 0x10, 0x8fc0);
+
+ /* VgaDecRate = 1 */
+ __phy_write(phydev, 0x11, 0x4c2a);
+ __phy_write(phydev, 0x12, 0x3e);
+ __phy_write(phydev, 0x10, 0x8fa4);
/* MrvlTrFix100Kp = 3, MrvlTrFix100Kf = 2,
* MrvlTrFix1000Kp = 3, MrvlTrFix1000Kf = 2
@@ -738,7 +784,7 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev)
__phy_write(phydev, 0x10, 0x8ec0);
phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
- /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9*/
+ /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9 */
phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234,
MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK,
BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0x9));
@@ -771,48 +817,6 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev)
phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_LDO_OUTPUT_V, 0x2222);
}
-static void mt7981_phy_finetune(struct phy_device *phydev)
-{
- u16 val[8] = { 0x01ce, 0x01c1,
- 0x020f, 0x0202,
- 0x03d0, 0x03c0,
- 0x0013, 0x0005 };
- int i, k;
-
- /* 100M eye finetune:
- * Keep middle level of TX MLT3 shapper as default.
- * Only change TX MLT3 overshoot level here.
- */
- for (k = 0, i = 1; i < 12; i++) {
- if (i % 3 == 0)
- continue;
- phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]);
- }
-
- phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
- /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */
- __phy_write(phydev, 0x11, 0xc71);
- __phy_write(phydev, 0x12, 0xc);
- __phy_write(phydev, 0x10, 0x8fae);
-
- /* ResetSyncOffset = 6 */
- __phy_write(phydev, 0x11, 0x600);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x8fc0);
-
- /* VgaDecRate = 1 */
- __phy_write(phydev, 0x11, 0x4c2a);
- __phy_write(phydev, 0x12, 0x3e);
- __phy_write(phydev, 0x10, 0x8fa4);
-
- /* FfeUpdGainForce = 4 */
- __phy_write(phydev, 0x11, 0x240);
- __phy_write(phydev, 0x12, 0x0);
- __phy_write(phydev, 0x10, 0x9680);
-
- phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
-}
-
static void mt7988_phy_finetune(struct phy_device *phydev)
{
u16 val[12] = { 0x0187, 0x01cd, 0x01c8, 0x0182,
@@ -827,17 +831,7 @@ static void mt7988_phy_finetune(struct phy_device *phydev)
/* TCT finetune */
phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_TX_FILTER, 0x5);
- /* Disable TX power saving */
- phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7,
- MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8);
-
phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
-
- /* SlvDSPreadyTime = 24, MasDSPreadyTime = 12 */
- __phy_write(phydev, 0x11, 0x671);
- __phy_write(phydev, 0x12, 0xc);
- __phy_write(phydev, 0x10, 0x8fae);
-
/* ResetSyncOffset = 5 */
__phy_write(phydev, 0x11, 0x500);
__phy_write(phydev, 0x12, 0x0);
@@ -845,13 +839,27 @@ static void mt7988_phy_finetune(struct phy_device *phydev)
/* VgaDecRate is 1 at default on mt7988 */
- phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
+ /* MrvlTrFix100Kp = 6, MrvlTrFix100Kf = 7,
+ * MrvlTrFix1000Kp = 6, MrvlTrFix1000Kf = 7
+ */
+ __phy_write(phydev, 0x11, 0xb90a);
+ __phy_write(phydev, 0x12, 0x6f);
+ __phy_write(phydev, 0x10, 0x8f82);
+
+ /* RemAckCntLimitCtrl = 1 */
+ __phy_write(phydev, 0x11, 0xfbba);
+ __phy_write(phydev, 0x12, 0xc3);
+ __phy_write(phydev, 0x10, 0x87f8);
- phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_2A30);
- /* TxClkOffset = 2 */
- __phy_modify(phydev, MTK_PHY_ANARG_RG, MTK_PHY_TCLKOFFSET_MASK,
- FIELD_PREP(MTK_PHY_TCLKOFFSET_MASK, 0x2));
phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
+
+ /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 10 */
+ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234,
+ MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK,
+ BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0xa));
+
+ /* rg_tr_lpf_cnt_val = 1023 */
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_LPF_CNT_VAL, 0x3ff);
}
static void mt798x_phy_eee(struct phy_device *phydev)
@@ -884,11 +892,11 @@ static void mt798x_phy_eee(struct phy_device *phydev)
MTK_PHY_LPI_SLV_SEND_TX_EN,
FIELD_PREP(MTK_PHY_LPI_SLV_SEND_TX_TIMER_MASK, 0x120));
- phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239,
- MTK_PHY_LPI_SEND_LOC_TIMER_MASK |
- MTK_PHY_LPI_TXPCS_LOC_RCV,
- FIELD_PREP(MTK_PHY_LPI_SEND_LOC_TIMER_MASK, 0x117));
+ /* Keep MTK_PHY_LPI_SEND_LOC_TIMER as 375 */
+ phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239,
+ MTK_PHY_LPI_TXPCS_LOC_RCV);
+ /* This also fixes some IoT issues, such as CH340 */
phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG2C7,
MTK_PHY_MAX_GAIN_MASK | MTK_PHY_MIN_GAIN_MASK,
FIELD_PREP(MTK_PHY_MAX_GAIN_MASK, 0x8) |
@@ -922,7 +930,7 @@ static void mt798x_phy_eee(struct phy_device *phydev)
__phy_write(phydev, 0x12, 0x0);
__phy_write(phydev, 0x10, 0x9690);
- /* REG_EEE_st2TrKf1000 = 3 */
+ /* REG_EEE_st2TrKf1000 = 2 */
__phy_write(phydev, 0x11, 0x114f);
__phy_write(phydev, 0x12, 0x2);
__phy_write(phydev, 0x10, 0x969a);
@@ -947,7 +955,7 @@ static void mt798x_phy_eee(struct phy_device *phydev)
__phy_write(phydev, 0x12, 0x0);
__phy_write(phydev, 0x10, 0x96b8);
- /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 1 */
+ /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 0 */
__phy_write(phydev, 0x11, 0x1463);
__phy_write(phydev, 0x12, 0x0);
__phy_write(phydev, 0x10, 0x96ca);
@@ -1459,6 +1467,13 @@ static int mt7988_phy_probe(struct phy_device *phydev)
if (err)
return err;
+ /* Disable TX power saving at probing to:
+ * 1. Meet common mode compliance test criteria
+ * 2. Make sure that TX-VCM calibration works fine
+ */
+ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7,
+ MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8);
+
return mt798x_phy_calibration(phydev);
}
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 81c20eb4b54b..8b8634600c51 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -114,12 +114,30 @@
#define LAN8814_INTR_CTRL_REG_POLARITY BIT(1)
#define LAN8814_INTR_CTRL_REG_INTR_ENABLE BIT(0)
+#define LAN8814_EEE_STATE 0x38
+#define LAN8814_EEE_STATE_MASK2P5P BIT(10)
+
+#define LAN8814_PD_CONTROLS 0x9d
+#define LAN8814_PD_CONTROLS_PD_MEAS_TIME_MASK GENMASK(3, 0)
+#define LAN8814_PD_CONTROLS_PD_MEAS_TIME_VAL 0xb
+
/* Represents 1ppm adjustment in 2^32 format with
* each nsec contains 4 clock cycles.
* The value is calculated as following: (1/1000000)/((2^-32)/4)
*/
#define LAN8814_1PPM_FORMAT 17179
+/* Represents 1ppm adjustment in 2^32 format with
+ * each nsec contains 8 clock cycles.
+ * The value is calculated as following: (1/1000000)/((2^-32)/8)
+ */
+#define LAN8841_1PPM_FORMAT 34360
+
+#define PTP_RX_VERSION 0x0248
+#define PTP_TX_VERSION 0x0288
+#define PTP_MAX_VERSION(x) (((x) & GENMASK(7, 0)) << 8)
+#define PTP_MIN_VERSION(x) ((x) & GENMASK(7, 0))
+
#define PTP_RX_MOD 0x024F
#define PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_ BIT(3)
#define PTP_RX_TIMESTAMP_EN 0x024D
@@ -149,11 +167,13 @@
#define PTP_CMD_CTL_PTP_LTC_STEP_SEC_ BIT(5)
#define PTP_CMD_CTL_PTP_LTC_STEP_NSEC_ BIT(6)
+#define PTP_CLOCK_SET_SEC_HI 0x0205
#define PTP_CLOCK_SET_SEC_MID 0x0206
#define PTP_CLOCK_SET_SEC_LO 0x0207
#define PTP_CLOCK_SET_NS_HI 0x0208
#define PTP_CLOCK_SET_NS_LO 0x0209
+#define PTP_CLOCK_READ_SEC_HI 0x0229
#define PTP_CLOCK_READ_SEC_MID 0x022A
#define PTP_CLOCK_READ_SEC_LO 0x022B
#define PTP_CLOCK_READ_NS_HI 0x022C
@@ -2587,35 +2607,31 @@ static bool lan8814_rxtstamp(struct mii_timestamper *mii_ts, struct sk_buff *skb
}
static void lan8814_ptp_clock_set(struct phy_device *phydev,
- u32 seconds, u32 nano_seconds)
+ time64_t sec, u32 nsec)
{
- u32 sec_low, sec_high, nsec_low, nsec_high;
-
- sec_low = seconds & 0xffff;
- sec_high = (seconds >> 16) & 0xffff;
- nsec_low = nano_seconds & 0xffff;
- nsec_high = (nano_seconds >> 16) & 0x3fff;
-
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_LO, sec_low);
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_MID, sec_high);
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_NS_LO, nsec_low);
- lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_NS_HI, nsec_high);
+ lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_LO, lower_16_bits(sec));
+ lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_MID, upper_16_bits(sec));
+ lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_SEC_HI, upper_32_bits(sec));
+ lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_NS_LO, lower_16_bits(nsec));
+ lanphy_write_page_reg(phydev, 4, PTP_CLOCK_SET_NS_HI, upper_16_bits(nsec));
lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, PTP_CMD_CTL_PTP_CLOCK_LOAD_);
}
static void lan8814_ptp_clock_get(struct phy_device *phydev,
- u32 *seconds, u32 *nano_seconds)
+ time64_t *sec, u32 *nsec)
{
lanphy_write_page_reg(phydev, 4, PTP_CMD_CTL, PTP_CMD_CTL_PTP_CLOCK_READ_);
- *seconds = lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_MID);
- *seconds = (*seconds << 16) |
- lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_LO);
+ *sec = lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_HI);
+ *sec <<= 16;
+ *sec |= lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_MID);
+ *sec <<= 16;
+ *sec |= lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_SEC_LO);
- *nano_seconds = lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_NS_HI);
- *nano_seconds = ((*nano_seconds & 0x3fff) << 16) |
- lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_NS_LO);
+ *nsec = lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_NS_HI);
+ *nsec <<= 16;
+ *nsec |= lanphy_read_page_reg(phydev, 4, PTP_CLOCK_READ_NS_LO);
}
static int lan8814_ptpci_gettime64(struct ptp_clock_info *ptpci,
@@ -2625,7 +2641,7 @@ static int lan8814_ptpci_gettime64(struct ptp_clock_info *ptpci,
ptp_clock_info);
struct phy_device *phydev = shared->phydev;
u32 nano_seconds;
- u32 seconds;
+ time64_t seconds;
mutex_lock(&shared->shared_lock);
lan8814_ptp_clock_get(phydev, &seconds, &nano_seconds);
@@ -2655,38 +2671,37 @@ static void lan8814_ptp_clock_step(struct phy_device *phydev,
{
u32 nano_seconds_step;
u64 abs_time_step_ns;
- u32 unsigned_seconds;
+ time64_t set_seconds;
u32 nano_seconds;
u32 remainder;
s32 seconds;
if (time_step_ns > 15000000000LL) {
/* convert to clock set */
- lan8814_ptp_clock_get(phydev, &unsigned_seconds, &nano_seconds);
- unsigned_seconds += div_u64_rem(time_step_ns, 1000000000LL,
- &remainder);
+ lan8814_ptp_clock_get(phydev, &set_seconds, &nano_seconds);
+ set_seconds += div_u64_rem(time_step_ns, 1000000000LL,
+ &remainder);
nano_seconds += remainder;
if (nano_seconds >= 1000000000) {
- unsigned_seconds++;
+ set_seconds++;
nano_seconds -= 1000000000;
}
- lan8814_ptp_clock_set(phydev, unsigned_seconds, nano_seconds);
+ lan8814_ptp_clock_set(phydev, set_seconds, nano_seconds);
return;
} else if (time_step_ns < -15000000000LL) {
/* convert to clock set */
time_step_ns = -time_step_ns;
- lan8814_ptp_clock_get(phydev, &unsigned_seconds, &nano_seconds);
- unsigned_seconds -= div_u64_rem(time_step_ns, 1000000000LL,
- &remainder);
+ lan8814_ptp_clock_get(phydev, &set_seconds, &nano_seconds);
+ set_seconds -= div_u64_rem(time_step_ns, 1000000000LL,
+ &remainder);
nano_seconds_step = remainder;
if (nano_seconds < nano_seconds_step) {
- unsigned_seconds--;
+ set_seconds--;
nano_seconds += 1000000000;
}
nano_seconds -= nano_seconds_step;
- lan8814_ptp_clock_set(phydev, unsigned_seconds,
- nano_seconds);
+ lan8814_ptp_clock_set(phydev, set_seconds, nano_seconds);
return;
}
@@ -3150,6 +3165,12 @@ static void lan8814_ptp_init(struct phy_device *phydev)
lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_IP_ADDR_EN, 0);
lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_IP_ADDR_EN, 0);
+ /* Disable checking for minorVersionPTP field */
+ lanphy_write_page_reg(phydev, 5, PTP_RX_VERSION,
+ PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0));
+ lanphy_write_page_reg(phydev, 5, PTP_TX_VERSION,
+ PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0));
+
skb_queue_head_init(&ptp_priv->tx_queue);
skb_queue_head_init(&ptp_priv->rx_queue);
INIT_LIST_HEAD(&ptp_priv->rx_ts_list);
@@ -3274,6 +3295,33 @@ static int lan8814_release_coma_mode(struct phy_device *phydev)
return 0;
}
+static void lan8814_clear_2psp_bit(struct phy_device *phydev)
+{
+ u16 val;
+
+ /* It was noticed that when traffic is passing through the PHY and the
+ * cable is removed then the LED was still one even though there is no
+ * link
+ */
+ val = lanphy_read_page_reg(phydev, 2, LAN8814_EEE_STATE);
+ val &= ~LAN8814_EEE_STATE_MASK2P5P;
+ lanphy_write_page_reg(phydev, 2, LAN8814_EEE_STATE, val);
+}
+
+static void lan8814_update_meas_time(struct phy_device *phydev)
+{
+ u16 val;
+
+ /* By setting the measure time to a value of 0xb this will allow cables
+ * longer than 100m to be used. This configuration can be used
+ * regardless of the mode of operation of the PHY
+ */
+ val = lanphy_read_page_reg(phydev, 1, LAN8814_PD_CONTROLS);
+ val &= ~LAN8814_PD_CONTROLS_PD_MEAS_TIME_MASK;
+ val |= LAN8814_PD_CONTROLS_PD_MEAS_TIME_VAL;
+ lanphy_write_page_reg(phydev, 1, LAN8814_PD_CONTROLS, val);
+}
+
static int lan8814_probe(struct phy_device *phydev)
{
const struct kszphy_type *type = phydev->drv->driver_data;
@@ -3310,6 +3358,10 @@ static int lan8814_probe(struct phy_device *phydev)
lan8814_ptp_init(phydev);
+ /* Errata workarounds */
+ lan8814_clear_2psp_bit(phydev);
+ lan8814_update_meas_time(phydev);
+
return 0;
}
@@ -4107,8 +4159,8 @@ static int lan8841_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
faster = false;
}
- rate = LAN8814_1PPM_FORMAT * (upper_16_bits(scaled_ppm));
- rate += (LAN8814_1PPM_FORMAT * (lower_16_bits(scaled_ppm))) >> 16;
+ rate = LAN8841_1PPM_FORMAT * (upper_16_bits(scaled_ppm));
+ rate += (LAN8841_1PPM_FORMAT * (lower_16_bits(scaled_ppm))) >> 16;
mutex_lock(&ptp_priv->ptp_lock);
phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_RATE_ADJ_HI,
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
index ea1073adc5a1..b2d36a3a96f1 100644
--- a/drivers/net/phy/mxl-gpy.c
+++ b/drivers/net/phy/mxl-gpy.c
@@ -274,6 +274,14 @@ static int gpy_config_init(struct phy_device *phydev)
return ret < 0 ? ret : 0;
}
+static int gpy21x_config_init(struct phy_device *phydev)
+{
+ __set_bit(PHY_INTERFACE_MODE_2500BASEX, phydev->possible_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_SGMII, phydev->possible_interfaces);
+
+ return gpy_config_init(phydev);
+}
+
static int gpy_probe(struct phy_device *phydev)
{
struct device *dev = &phydev->mdio.dev;
@@ -867,7 +875,7 @@ static struct phy_driver gpy_drivers[] = {
.phy_id_mask = PHY_ID_GPY21xB_MASK,
.name = "Maxlinear Ethernet GPY211B",
.get_features = genphy_c45_pma_read_abilities,
- .config_init = gpy_config_init,
+ .config_init = gpy21x_config_init,
.probe = gpy_probe,
.suspend = genphy_suspend,
.resume = genphy_resume,
@@ -884,7 +892,7 @@ static struct phy_driver gpy_drivers[] = {
PHY_ID_MATCH_MODEL(PHY_ID_GPY211C),
.name = "Maxlinear Ethernet GPY211C",
.get_features = genphy_c45_pma_read_abilities,
- .config_init = gpy_config_init,
+ .config_init = gpy21x_config_init,
.probe = gpy_probe,
.suspend = genphy_suspend,
.resume = genphy_resume,
@@ -902,7 +910,7 @@ static struct phy_driver gpy_drivers[] = {
.phy_id_mask = PHY_ID_GPY21xB_MASK,
.name = "Maxlinear Ethernet GPY212B",
.get_features = genphy_c45_pma_read_abilities,
- .config_init = gpy_config_init,
+ .config_init = gpy21x_config_init,
.probe = gpy_probe,
.suspend = genphy_suspend,
.resume = genphy_resume,
@@ -919,7 +927,7 @@ static struct phy_driver gpy_drivers[] = {
PHY_ID_MATCH_MODEL(PHY_ID_GPY212C),
.name = "Maxlinear Ethernet GPY212C",
.get_features = genphy_c45_pma_read_abilities,
- .config_init = gpy_config_init,
+ .config_init = gpy21x_config_init,
.probe = gpy_probe,
.suspend = genphy_suspend,
.resume = genphy_resume,
@@ -937,7 +945,7 @@ static struct phy_driver gpy_drivers[] = {
.phy_id_mask = PHY_ID_GPYx15B_MASK,
.name = "Maxlinear Ethernet GPY215B",
.get_features = genphy_c45_pma_read_abilities,
- .config_init = gpy_config_init,
+ .config_init = gpy21x_config_init,
.probe = gpy_probe,
.suspend = genphy_suspend,
.resume = genphy_resume,
@@ -954,7 +962,7 @@ static struct phy_driver gpy_drivers[] = {
PHY_ID_MATCH_MODEL(PHY_ID_GPY215C),
.name = "Maxlinear Ethernet GPY215C",
.get_features = genphy_c45_pma_read_abilities,
- .config_init = gpy_config_init,
+ .config_init = gpy21x_config_init,
.probe = gpy_probe,
.suspend = genphy_suspend,
.resume = genphy_resume,
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 747d14bf152c..5695935fdce9 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -208,7 +208,8 @@ static int genphy_c45_baset1_an_config_aneg(struct phy_device *phydev)
adv_l_mask = MDIO_AN_T1_ADV_L_FORCE_MS | MDIO_AN_T1_ADV_L_PAUSE_CAP |
MDIO_AN_T1_ADV_L_PAUSE_ASYM;
- adv_m_mask = MDIO_AN_T1_ADV_M_MST | MDIO_AN_T1_ADV_M_B10L;
+ adv_m_mask = MDIO_AN_T1_ADV_M_1000BT1 | MDIO_AN_T1_ADV_M_100BT1 |
+ MDIO_AN_T1_ADV_M_MST | MDIO_AN_T1_ADV_M_B10L;
switch (phydev->master_slave_set) {
case MASTER_SLAVE_CFG_MASTER_FORCE:
@@ -706,6 +707,22 @@ int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv)
changed = 1;
}
+ if (linkmode_intersects(phydev->supported_eee, PHY_EEE_CAP2_FEATURES)) {
+ val = linkmode_to_mii_eee_cap2_t(adv);
+
+ /* IEEE 802.3-2022 45.2.7.16 EEE advertisement 2
+ * (Register 7.62)
+ */
+ val = phy_modify_mmd_changed(phydev, MDIO_MMD_AN,
+ MDIO_AN_EEE_ADV2,
+ MDIO_EEE_2_5GT | MDIO_EEE_5GT,
+ val);
+ if (val < 0)
+ return val;
+ if (val > 0)
+ changed = 1;
+ }
+
if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT,
phydev->supported_eee)) {
val = linkmode_adv_to_mii_10base_t1_t(adv);
@@ -745,6 +762,17 @@ int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv)
mii_eee_cap1_mod_linkmode_t(adv, val);
}
+ if (linkmode_intersects(phydev->supported_eee, PHY_EEE_CAP2_FEATURES)) {
+ /* IEEE 802.3-2022 45.2.7.16 EEE advertisement 2
+ * (Register 7.62)
+ */
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV2);
+ if (val < 0)
+ return val;
+
+ mii_eee_cap2_mod_linkmode_adv_t(adv, val);
+ }
+
if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT,
phydev->supported_eee)) {
/* IEEE 802.3cg-2019 45.2.7.25 10BASE-T1 AN control register
@@ -781,6 +809,17 @@ static int genphy_c45_read_eee_lpa(struct phy_device *phydev,
mii_eee_cap1_mod_linkmode_t(lpa, val);
}
+ if (linkmode_intersects(phydev->supported_eee, PHY_EEE_CAP2_FEATURES)) {
+ /* IEEE 802.3-2022 45.2.7.17 EEE link partner ability 2
+ * (Register 7.63)
+ */
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE2);
+ if (val < 0)
+ return val;
+
+ mii_eee_cap2_mod_linkmode_adv_t(lpa, val);
+ }
+
if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT,
phydev->supported_eee)) {
/* IEEE 802.3cg-2019 45.2.7.26 10BASE-T1 AN status register
@@ -831,6 +870,30 @@ static int genphy_c45_read_eee_cap1(struct phy_device *phydev)
}
/**
+ * genphy_c45_read_eee_cap2 - read supported EEE link modes from register 3.21
+ * @phydev: target phy_device struct
+ */
+static int genphy_c45_read_eee_cap2(struct phy_device *phydev)
+{
+ int val;
+
+ /* IEEE 802.3-2022 45.2.3.11 EEE control and capability 2
+ * (Register 3.21)
+ */
+ val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE2);
+ if (val < 0)
+ return val;
+
+ /* IEEE 802.3-2022 45.2.3.11 says 9 bits are reserved. */
+ if (val == 0xffff)
+ return 0;
+
+ mii_eee_cap2_mod_linkmode_sup_t(phydev->supported_eee, val);
+
+ return 0;
+}
+
+/**
* genphy_c45_read_eee_abilities - read supported EEE link modes
* @phydev: target phy_device struct
*/
@@ -848,6 +911,13 @@ int genphy_c45_read_eee_abilities(struct phy_device *phydev)
return val;
}
+ /* Same for cap2 (3.21) */
+ if (linkmode_intersects(phydev->supported, PHY_EEE_CAP2_FEATURES)) {
+ val = genphy_c45_read_eee_cap2(phydev);
+ if (val)
+ return val;
+ }
+
if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT,
phydev->supported)) {
/* IEEE 802.3cg-2019 45.2.1.186b 10BASE-T1L PMA status register
@@ -1443,17 +1513,17 @@ EXPORT_SYMBOL(genphy_c45_eee_is_active);
/**
* genphy_c45_ethtool_get_eee - get EEE supported and status
* @phydev: target phy_device struct
- * @data: ethtool_eee data
+ * @data: ethtool_keee data
*
* Description: it reports the Supported/Advertisement/LP Advertisement
* capabilities.
*/
int genphy_c45_ethtool_get_eee(struct phy_device *phydev,
- struct ethtool_eee *data)
+ struct ethtool_keee *data)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(adv) = {};
__ETHTOOL_DECLARE_LINK_MODE_MASK(lp) = {};
- bool overflow = false, is_enabled;
+ bool is_enabled;
int ret;
ret = genphy_c45_eee_is_active(phydev, adv, lp, &is_enabled);
@@ -1462,17 +1532,9 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev,
data->eee_enabled = is_enabled;
data->eee_active = ret;
-
- if (!ethtool_convert_link_mode_to_legacy_u32(&data->supported,
- phydev->supported_eee))
- overflow = true;
- if (!ethtool_convert_link_mode_to_legacy_u32(&data->advertised, adv))
- overflow = true;
- if (!ethtool_convert_link_mode_to_legacy_u32(&data->lp_advertised, lp))
- overflow = true;
-
- if (overflow)
- phydev_warn(phydev, "Not all supported or advertised EEE link modes were passed to the user space\n");
+ linkmode_copy(data->supported, phydev->supported_eee);
+ linkmode_copy(data->advertised, adv);
+ linkmode_copy(data->lp_advertised, lp);
return 0;
}
@@ -1481,50 +1543,53 @@ EXPORT_SYMBOL(genphy_c45_ethtool_get_eee);
/**
* genphy_c45_ethtool_set_eee - set EEE supported and status
* @phydev: target phy_device struct
- * @data: ethtool_eee data
+ * @data: ethtool_keee data
*
* Description: sets the Supported/Advertisement/LP Advertisement
* capabilities. If eee_enabled is false, no links modes are
* advertised, but the previously advertised link modes are
* retained. This allows EEE to be enabled/disabled in a
* non-destructive way.
+ * Returns either error code, 0 if there was no change, or positive
+ * value if there was a change which triggered auto-neg.
*/
int genphy_c45_ethtool_set_eee(struct phy_device *phydev,
- struct ethtool_eee *data)
+ struct ethtool_keee *data)
{
int ret;
if (data->eee_enabled) {
- if (data->advertised) {
- __ETHTOOL_DECLARE_LINK_MODE_MASK(adv);
+ unsigned long *adv = data->advertised;
- ethtool_convert_legacy_u32_to_link_mode(adv,
- data->advertised);
- linkmode_andnot(adv, adv, phydev->supported_eee);
- if (!linkmode_empty(adv)) {
+ if (!linkmode_empty(adv)) {
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp);
+
+ if (linkmode_andnot(tmp, adv, phydev->supported_eee)) {
phydev_warn(phydev, "At least some EEE link modes are not supported.\n");
return -EINVAL;
}
-
- ethtool_convert_legacy_u32_to_link_mode(phydev->advertising_eee,
- data->advertised);
} else {
- linkmode_copy(phydev->advertising_eee,
- phydev->supported_eee);
+ adv = phydev->supported_eee;
}
- phydev->eee_enabled = true;
- } else {
- phydev->eee_enabled = false;
+ linkmode_copy(phydev->advertising_eee, adv);
}
+ phydev->eee_enabled = data->eee_enabled;
+
ret = genphy_c45_an_config_eee_aneg(phydev);
- if (ret < 0)
- return ret;
- if (ret > 0)
- return phy_restart_aneg(phydev);
+ if (ret > 0) {
+ ret = phy_restart_aneg(phydev);
+ if (ret < 0)
+ return ret;
- return 0;
+ /* explicitly return 1, otherwise (ret > 0) value will be
+ * overwritten by phy_restart_aneg().
+ */
+ return 1;
+ }
+
+ return ret;
}
EXPORT_SYMBOL(genphy_c45_ethtool_set_eee);
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 3376e58e2b88..c4236564c1cd 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -983,9 +983,17 @@ static int phy_check_link_status(struct phy_device *phydev)
if (phydev->link && phydev->state != PHY_RUNNING) {
phy_check_downshift(phydev);
phydev->state = PHY_RUNNING;
+ err = genphy_c45_eee_is_active(phydev,
+ NULL, NULL, NULL);
+ if (err <= 0)
+ phydev->enable_tx_lpi = false;
+ else
+ phydev->enable_tx_lpi = phydev->eee_cfg.tx_lpi_enabled;
+
phy_link_up(phydev);
} else if (!phydev->link && phydev->state != PHY_NOLINK) {
phydev->state = PHY_NOLINK;
+ phydev->enable_tx_lpi = false;
phy_link_down(phydev);
}
@@ -1290,7 +1298,6 @@ int phy_disable_interrupts(struct phy_device *phydev)
static irqreturn_t phy_interrupt(int irq, void *phy_dat)
{
struct phy_device *phydev = phy_dat;
- struct phy_driver *drv = phydev->drv;
irqreturn_t ret;
/* Wakeup interrupts may occur during a system sleep transition.
@@ -1316,7 +1323,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
}
mutex_lock(&phydev->lock);
- ret = drv->handle_interrupt(phydev);
+ ret = phydev->drv->handle_interrupt(phydev);
mutex_unlock(&phydev->lock);
return ret;
@@ -1632,12 +1639,12 @@ EXPORT_SYMBOL(phy_get_eee_err);
/**
* phy_ethtool_get_eee - get EEE supported and status
* @phydev: target phy_device struct
- * @data: ethtool_eee data
+ * @data: ethtool_keee data
*
- * Description: it reportes the Supported/Advertisement/LP Advertisement
- * capabilities.
+ * Description: reports the Supported/Advertisement/LP Advertisement
+ * capabilities, etc.
*/
-int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data)
+int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data)
{
int ret;
@@ -1646,6 +1653,7 @@ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data)
mutex_lock(&phydev->lock);
ret = genphy_c45_ethtool_get_eee(phydev, data);
+ eeecfg_to_eee(data, &phydev->eee_cfg);
mutex_unlock(&phydev->lock);
return ret;
@@ -1653,13 +1661,43 @@ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data)
EXPORT_SYMBOL(phy_ethtool_get_eee);
/**
+ * phy_ethtool_set_eee_noneg - Adjusts MAC LPI configuration without PHY
+ * renegotiation
+ * @phydev: pointer to the target PHY device structure
+ * @data: pointer to the ethtool_keee structure containing the new EEE settings
+ *
+ * This function updates the Energy Efficient Ethernet (EEE) configuration
+ * for cases where only the MAC's Low Power Idle (LPI) configuration changes,
+ * without triggering PHY renegotiation. It ensures that the MAC is properly
+ * informed of the new LPI settings by cycling the link down and up, which
+ * is necessary for the MAC to adopt the new configuration. This adjustment
+ * is done only if there is a change in the tx_lpi_enabled or tx_lpi_timer
+ * configuration.
+ */
+static void phy_ethtool_set_eee_noneg(struct phy_device *phydev,
+ struct ethtool_keee *data)
+{
+ if (phydev->eee_cfg.tx_lpi_enabled != data->tx_lpi_enabled ||
+ phydev->eee_cfg.tx_lpi_timer != data->tx_lpi_timer) {
+ eee_to_eeecfg(&phydev->eee_cfg, data);
+ phydev->enable_tx_lpi = eeecfg_mac_can_tx_lpi(&phydev->eee_cfg);
+ if (phydev->link) {
+ phydev->link = false;
+ phy_link_down(phydev);
+ phydev->link = true;
+ phy_link_up(phydev);
+ }
+ }
+}
+
+/**
* phy_ethtool_set_eee - set EEE supported and status
* @phydev: target phy_device struct
- * @data: ethtool_eee data
+ * @data: ethtool_keee data
*
* Description: it is to program the Advertisement EEE register.
*/
-int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
+int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data)
{
int ret;
@@ -1668,9 +1706,14 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
mutex_lock(&phydev->lock);
ret = genphy_c45_ethtool_set_eee(phydev, data);
+ if (ret >= 0) {
+ if (ret == 0)
+ phy_ethtool_set_eee_noneg(phydev, data);
+ eee_to_eeecfg(&phydev->eee_cfg, data);
+ }
mutex_unlock(&phydev->lock);
- return ret;
+ return ret < 0 ? ret : 0;
}
EXPORT_SYMBOL(phy_ethtool_set_eee);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 3611ea64875e..8297ef681bf5 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -148,6 +148,14 @@ static const int phy_eee_cap1_features_array[] = {
__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init;
EXPORT_SYMBOL_GPL(phy_eee_cap1_features);
+static const int phy_eee_cap2_features_array[] = {
+ ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+ ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+};
+
+__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init;
+EXPORT_SYMBOL_GPL(phy_eee_cap2_features);
+
static void features_init(void)
{
/* 10/100 half/full*/
@@ -232,6 +240,9 @@ static void features_init(void)
linkmode_set_bit_array(phy_eee_cap1_features_array,
ARRAY_SIZE(phy_eee_cap1_features_array),
phy_eee_cap1_features);
+ linkmode_set_bit_array(phy_eee_cap2_features_array,
+ ARRAY_SIZE(phy_eee_cap2_features_array),
+ phy_eee_cap2_features);
}
@@ -780,7 +791,7 @@ static int get_phy_c45_devs_in_pkg(struct mii_bus *bus, int addr, int dev_addr,
* and identifiers in @c45_ids.
*
* Returns zero on success, %-EIO on bus access error, or %-ENODEV if
- * the "devices in package" is invalid.
+ * the "devices in package" is invalid or no device responds.
*/
static int get_phy_c45_ids(struct mii_bus *bus, int addr,
struct phy_c45_device_ids *c45_ids)
@@ -803,7 +814,11 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr,
*/
ret = phy_c45_probe_present(bus, addr, i);
if (ret < 0)
- return -EIO;
+ /* returning -ENODEV doesn't stop bus
+ * scanning
+ */
+ return (phy_reg == -EIO ||
+ phy_reg == -ENODEV) ? -ENODEV : -EIO;
if (!ret)
continue;
@@ -1413,6 +1428,11 @@ int phy_sfp_probe(struct phy_device *phydev,
}
EXPORT_SYMBOL(phy_sfp_probe);
+static bool phy_drv_supports_irq(const struct phy_driver *phydrv)
+{
+ return phydrv->config_intr && phydrv->handle_interrupt;
+}
+
/**
* phy_attach_direct - attach a network device to a given PHY device pointer
* @dev: network device to attach
@@ -1527,6 +1547,9 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
if (phydev->dev_flags & PHY_F_NO_IRQ)
phydev->irq = PHY_POLL;
+ if (!phy_drv_supports_irq(phydev->drv) && phy_interrupt_is_valid(phydev))
+ phydev->irq = PHY_POLL;
+
/* Port is set to PORT_TP by default and the actual PHY driver will set
* it to different value depending on the PHY configuration. If we have
* the generic PHY driver we can't figure it out, thus set the old
@@ -1592,7 +1615,6 @@ EXPORT_SYMBOL(phy_attach_direct);
struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
phy_interface_t interface)
{
- struct bus_type *bus = &mdio_bus_type;
struct phy_device *phydev;
struct device *d;
int rc;
@@ -1603,7 +1625,7 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
/* Search the list of PHY devices on the mdio bus for the
* PHY with the requested name
*/
- d = bus_find_device_by_name(bus, NULL, bus_id);
+ d = bus_find_device_by_name(&mdio_bus_type, NULL, bus_id);
if (!d) {
pr_err("PHY %s not found\n", bus_id);
return ERR_PTR(-ENODEV);
@@ -1700,6 +1722,7 @@ int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size)
shared->priv_size = priv_size;
}
shared->base_addr = base_addr;
+ shared->np = NULL;
refcount_set(&shared->refcnt, 1);
bus->shared[base_addr] = shared;
} else {
@@ -1723,6 +1746,63 @@ err_unlock:
EXPORT_SYMBOL_GPL(phy_package_join);
/**
+ * of_phy_package_join - join a common PHY group in PHY package
+ * @phydev: target phy_device struct
+ * @priv_size: if non-zero allocate this amount of bytes for private data
+ *
+ * This is a variant of phy_package_join for PHY package defined in DT.
+ *
+ * The parent node of the @phydev is checked as a valid PHY package node
+ * structure (by matching the node name "ethernet-phy-package") and the
+ * base_addr for the PHY package is passed to phy_package_join.
+ *
+ * With this configuration the shared struct will also have the np value
+ * filled to use additional DT defined properties in PHY specific
+ * probe_once and config_init_once PHY package OPs.
+ *
+ * Returns < 0 on error, 0 on success. Esp. calling phy_package_join()
+ * with the same cookie but a different priv_size is an error. Or a parent
+ * node is not detected or is not valid or doesn't match the expected node
+ * name for PHY package.
+ */
+int of_phy_package_join(struct phy_device *phydev, size_t priv_size)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ struct device_node *package_node;
+ u32 base_addr;
+ int ret;
+
+ if (!node)
+ return -EINVAL;
+
+ package_node = of_get_parent(node);
+ if (!package_node)
+ return -EINVAL;
+
+ if (!of_node_name_eq(package_node, "ethernet-phy-package")) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (of_property_read_u32(package_node, "reg", &base_addr)) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ret = phy_package_join(phydev, base_addr, priv_size);
+ if (ret)
+ goto exit;
+
+ phydev->shared->np = package_node;
+
+ return 0;
+exit:
+ of_node_put(package_node);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(of_phy_package_join);
+
+/**
* phy_package_leave - leave a common PHY group
* @phydev: target phy_device struct
*
@@ -1738,6 +1818,10 @@ void phy_package_leave(struct phy_device *phydev)
if (!shared)
return;
+ /* Decrease the node refcount on leave if present */
+ if (shared->np)
+ of_node_put(shared->np);
+
if (refcount_dec_and_mutex_lock(&shared->refcnt, &bus->shared_lock)) {
bus->shared[shared->base_addr] = NULL;
mutex_unlock(&bus->shared_lock);
@@ -1791,6 +1875,40 @@ int devm_phy_package_join(struct device *dev, struct phy_device *phydev,
EXPORT_SYMBOL_GPL(devm_phy_package_join);
/**
+ * devm_of_phy_package_join - resource managed of_phy_package_join()
+ * @dev: device that is registering this PHY package
+ * @phydev: target phy_device struct
+ * @priv_size: if non-zero allocate this amount of bytes for private data
+ *
+ * Managed of_phy_package_join(). Shared storage fetched by this function,
+ * phy_package_leave() is automatically called on driver detach. See
+ * of_phy_package_join() for more information.
+ */
+int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev,
+ size_t priv_size)
+{
+ struct phy_device **ptr;
+ int ret;
+
+ ptr = devres_alloc(devm_phy_package_leave, sizeof(*ptr),
+ GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+
+ ret = of_phy_package_join(phydev, priv_size);
+
+ if (!ret) {
+ *ptr = phydev;
+ devres_add(dev, ptr);
+ } else {
+ devres_free(ptr);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(devm_of_phy_package_join);
+
+/**
* phy_detach - detach a PHY device from its network device
* @phydev: target phy_device struct
*
@@ -1859,7 +1977,7 @@ int phy_suspend(struct phy_device *phydev)
{
struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
struct net_device *netdev = phydev->attached_dev;
- struct phy_driver *phydrv = phydev->drv;
+ const struct phy_driver *phydrv = phydev->drv;
int ret;
if (phydev->suspended)
@@ -1884,7 +2002,7 @@ EXPORT_SYMBOL(phy_suspend);
int __phy_resume(struct phy_device *phydev)
{
- struct phy_driver *phydrv = phydev->drv;
+ const struct phy_driver *phydrv = phydev->drv;
int ret;
lockdep_assert_held(&phydev->lock);
@@ -2513,12 +2631,15 @@ EXPORT_SYMBOL(genphy_read_status);
/**
* genphy_c37_read_status - check the link status and update current link state
* @phydev: target phy_device struct
+ * @changed: pointer where to store if link changed
*
* Description: Check the link, then figure out the current state
* by comparing what we advertise with what the link partner
* advertises. This function is for Clause 37 1000Base-X mode.
+ *
+ * If link has changed, @changed is set to true, false otherwise.
*/
-int genphy_c37_read_status(struct phy_device *phydev)
+int genphy_c37_read_status(struct phy_device *phydev, bool *changed)
{
int lpa, err, old_link = phydev->link;
@@ -2528,9 +2649,13 @@ int genphy_c37_read_status(struct phy_device *phydev)
return err;
/* why bother the PHY if nothing can have changed */
- if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link)
+ if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) {
+ *changed = false;
return 0;
+ }
+ /* Signal link has changed */
+ *changed = true;
phydev->duplex = DUPLEX_UNKNOWN;
phydev->pause = 0;
phydev->asym_pause = 0;
@@ -2770,6 +2895,50 @@ void phy_advertise_supported(struct phy_device *phydev)
EXPORT_SYMBOL(phy_advertise_supported);
/**
+ * phy_advertise_eee_all - Advertise all supported EEE modes
+ * @phydev: target phy_device struct
+ *
+ * Description: Per default phylib preserves the EEE advertising at the time of
+ * phy probing, which might be a subset of the supported EEE modes. Use this
+ * function when all supported EEE modes should be advertised. This does not
+ * trigger auto-negotiation, so must be called before phy_start()/
+ * phylink_start() which will start auto-negotiation.
+ */
+void phy_advertise_eee_all(struct phy_device *phydev)
+{
+ linkmode_copy(phydev->advertising_eee, phydev->supported_eee);
+}
+EXPORT_SYMBOL_GPL(phy_advertise_eee_all);
+
+/**
+ * phy_support_eee - Set initial EEE policy configuration
+ * @phydev: Target phy_device struct
+ *
+ * This function configures the initial policy for Energy Efficient Ethernet
+ * (EEE) on the specified PHY device, influencing that EEE capabilities are
+ * advertised before the link is established. It should be called during PHY
+ * registration by the MAC driver and/or the PHY driver (for SmartEEE PHYs)
+ * if MAC supports LPI or PHY is capable to compensate missing LPI functionality
+ * of the MAC.
+ *
+ * The function sets default EEE policy parameters, including preparing the PHY
+ * to advertise EEE capabilities based on hardware support.
+ *
+ * It also sets the expected configuration for Low Power Idle (LPI) in the MAC
+ * driver. If the PHY framework determines that both local and remote
+ * advertisements support EEE, and the negotiated link mode is compatible with
+ * EEE, it will set enable_tx_lpi = true. The MAC driver is expected to act on
+ * this setting by enabling the LPI timer if enable_tx_lpi is set.
+ */
+void phy_support_eee(struct phy_device *phydev)
+{
+ linkmode_copy(phydev->advertising_eee, phydev->supported_eee);
+ phydev->eee_cfg.tx_lpi_enabled = true;
+ phydev->eee_cfg.eee_enabled = true;
+}
+EXPORT_SYMBOL(phy_support_eee);
+
+/**
* phy_support_sym_pause - Enable support of symmetrical pause
* @phydev: target phy_device struct
*
@@ -2959,7 +3128,7 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
if (delay < 0)
return delay;
- if (delay && size == 0)
+ if (size == 0)
return delay;
if (delay < delay_values[0] || delay > delay_values[size - 1]) {
@@ -2992,11 +3161,6 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
}
EXPORT_SYMBOL(phy_get_internal_delay);
-static bool phy_drv_supports_irq(struct phy_driver *phydrv)
-{
- return phydrv->config_intr && phydrv->handle_interrupt;
-}
-
static int phy_led_set_brightness(struct led_classdev *led_cdev,
enum led_brightness value)
{
@@ -3097,6 +3261,7 @@ static int of_phy_led(struct phy_device *phydev,
struct device *dev = &phydev->mdio.dev;
struct led_init_data init_data = {};
struct led_classdev *cdev;
+ unsigned long modes = 0;
struct phy_led *phyled;
u32 index;
int err;
@@ -3114,6 +3279,21 @@ static int of_phy_led(struct phy_device *phydev,
if (index > U8_MAX)
return -EINVAL;
+ if (of_property_read_bool(led, "active-low"))
+ set_bit(PHY_LED_ACTIVE_LOW, &modes);
+ if (of_property_read_bool(led, "inactive-high-impedance"))
+ set_bit(PHY_LED_INACTIVE_HIGH_IMPEDANCE, &modes);
+
+ if (modes) {
+ /* Return error if asked to set polarity modes but not supported */
+ if (!phydev->drv->led_polarity_set)
+ return -EINVAL;
+
+ err = phydev->drv->led_polarity_set(phydev, index, modes);
+ if (err)
+ return err;
+ }
+
phyled->index = index;
if (phydev->drv->led_brightness_set)
cdev->brightness_set_blocking = phy_led_set_brightness;
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index ed0b4ccaa6a6..503fd7c40523 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -2764,9 +2764,9 @@ EXPORT_SYMBOL_GPL(phylink_init_eee);
/**
* phylink_ethtool_get_eee() - read the energy efficient ethernet parameters
* @pl: a pointer to a &struct phylink returned from phylink_create()
- * @eee: a pointer to a &struct ethtool_eee for the read parameters
+ * @eee: a pointer to a &struct ethtool_keee for the read parameters
*/
-int phylink_ethtool_get_eee(struct phylink *pl, struct ethtool_eee *eee)
+int phylink_ethtool_get_eee(struct phylink *pl, struct ethtool_keee *eee)
{
int ret = -EOPNOTSUPP;
@@ -2782,9 +2782,9 @@ EXPORT_SYMBOL_GPL(phylink_ethtool_get_eee);
/**
* phylink_ethtool_set_eee() - set the energy efficient ethernet parameters
* @pl: a pointer to a &struct phylink returned from phylink_create()
- * @eee: a pointer to a &struct ethtool_eee for the desired parameters
+ * @eee: a pointer to a &struct ethtool_keee for the desired parameters
*/
-int phylink_ethtool_set_eee(struct phylink *pl, struct ethtool_eee *eee)
+int phylink_ethtool_set_eee(struct phylink *pl, struct ethtool_keee *eee)
{
int ret = -EOPNOTSUPP;
diff --git a/drivers/net/phy/qcom/Kconfig b/drivers/net/phy/qcom/Kconfig
new file mode 100644
index 000000000000..570626cc8e14
--- /dev/null
+++ b/drivers/net/phy/qcom/Kconfig
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config QCOM_NET_PHYLIB
+ tristate
+
+config AT803X_PHY
+ tristate "Qualcomm Atheros AR803X PHYs"
+ select QCOM_NET_PHYLIB
+ depends on REGULATOR
+ help
+ Currently supports the AR8030, AR8031, AR8033, AR8035 model
+
+config QCA83XX_PHY
+ tristate "Qualcomm Atheros QCA833x PHYs"
+ select QCOM_NET_PHYLIB
+ help
+ Currently supports the internal QCA8337(Internal qca8k PHY) model
+
+config QCA808X_PHY
+ tristate "Qualcomm QCA808x PHYs"
+ select QCOM_NET_PHYLIB
+ help
+ Currently supports the QCA8081 model
+
+config QCA807X_PHY
+ tristate "Qualcomm QCA807x PHYs"
+ select QCOM_NET_PHYLIB
+ depends on OF_MDIO
+ help
+ Currently supports the Qualcomm QCA8072, QCA8075 and the PSGMII
+ control PHY.
diff --git a/drivers/net/phy/qcom/Makefile b/drivers/net/phy/qcom/Makefile
new file mode 100644
index 000000000000..f24fb550babd
--- /dev/null
+++ b/drivers/net/phy/qcom/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_QCOM_NET_PHYLIB) += qcom-phy-lib.o
+obj-$(CONFIG_AT803X_PHY) += at803x.o
+obj-$(CONFIG_QCA83XX_PHY) += qca83xx.o
+obj-$(CONFIG_QCA808X_PHY) += qca808x.o
+obj-$(CONFIG_QCA807X_PHY) += qca807x.o
diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c
new file mode 100644
index 000000000000..4717c59d51d0
--- /dev/null
+++ b/drivers/net/phy/qcom/at803x.c
@@ -0,0 +1,1106 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * drivers/net/phy/at803x.c
+ *
+ * Driver for Qualcomm Atheros AR803x PHY
+ *
+ * Author: Matus Ujhelyi <ujhelyi.m@gmail.com>
+ */
+
+#include <linux/phy.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool_netlink.h>
+#include <linux/bitfield.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/consumer.h>
+#include <linux/of.h>
+#include <linux/phylink.h>
+#include <linux/sfp.h>
+#include <dt-bindings/net/qca-ar803x.h>
+
+#include "qcom.h"
+
+#define AT803X_LED_CONTROL 0x18
+
+#define AT803X_PHY_MMD3_WOL_CTRL 0x8012
+#define AT803X_WOL_EN BIT(5)
+
+#define AT803X_REG_CHIP_CONFIG 0x1f
+#define AT803X_BT_BX_REG_SEL 0x8000
+
+#define AT803X_MODE_CFG_MASK 0x0F
+#define AT803X_MODE_CFG_BASET_RGMII 0x00
+#define AT803X_MODE_CFG_BASET_SGMII 0x01
+#define AT803X_MODE_CFG_BX1000_RGMII_50OHM 0x02
+#define AT803X_MODE_CFG_BX1000_RGMII_75OHM 0x03
+#define AT803X_MODE_CFG_BX1000_CONV_50OHM 0x04
+#define AT803X_MODE_CFG_BX1000_CONV_75OHM 0x05
+#define AT803X_MODE_CFG_FX100_RGMII_50OHM 0x06
+#define AT803X_MODE_CFG_FX100_CONV_50OHM 0x07
+#define AT803X_MODE_CFG_RGMII_AUTO_MDET 0x0B
+#define AT803X_MODE_CFG_FX100_RGMII_75OHM 0x0E
+#define AT803X_MODE_CFG_FX100_CONV_75OHM 0x0F
+
+#define AT803X_PSSR 0x11 /*PHY-Specific Status Register*/
+#define AT803X_PSSR_MR_AN_COMPLETE 0x0200
+
+#define AT803X_DEBUG_REG_1F 0x1F
+#define AT803X_DEBUG_PLL_ON BIT(2)
+#define AT803X_DEBUG_RGMII_1V8 BIT(3)
+
+/* AT803x supports either the XTAL input pad, an internal PLL or the
+ * DSP as clock reference for the clock output pad. The XTAL reference
+ * is only used for 25 MHz output, all other frequencies need the PLL.
+ * The DSP as a clock reference is used in synchronous ethernet
+ * applications.
+ *
+ * By default the PLL is only enabled if there is a link. Otherwise
+ * the PHY will go into low power state and disabled the PLL. You can
+ * set the PLL_ON bit (see debug register 0x1f) to keep the PLL always
+ * enabled.
+ */
+#define AT803X_MMD7_CLK25M 0x8016
+#define AT803X_CLK_OUT_MASK GENMASK(4, 2)
+#define AT803X_CLK_OUT_25MHZ_XTAL 0
+#define AT803X_CLK_OUT_25MHZ_DSP 1
+#define AT803X_CLK_OUT_50MHZ_PLL 2
+#define AT803X_CLK_OUT_50MHZ_DSP 3
+#define AT803X_CLK_OUT_62_5MHZ_PLL 4
+#define AT803X_CLK_OUT_62_5MHZ_DSP 5
+#define AT803X_CLK_OUT_125MHZ_PLL 6
+#define AT803X_CLK_OUT_125MHZ_DSP 7
+
+/* The AR8035 has another mask which is compatible with the AR8031/AR8033 mask
+ * but doesn't support choosing between XTAL/PLL and DSP.
+ */
+#define AT8035_CLK_OUT_MASK GENMASK(4, 3)
+
+#define AT803X_CLK_OUT_STRENGTH_MASK GENMASK(8, 7)
+#define AT803X_CLK_OUT_STRENGTH_FULL 0
+#define AT803X_CLK_OUT_STRENGTH_HALF 1
+#define AT803X_CLK_OUT_STRENGTH_QUARTER 2
+
+#define AT803X_MMD3_SMARTEEE_CTL1 0x805b
+#define AT803X_MMD3_SMARTEEE_CTL2 0x805c
+#define AT803X_MMD3_SMARTEEE_CTL3 0x805d
+#define AT803X_MMD3_SMARTEEE_CTL3_LPI_EN BIT(8)
+
+#define ATH9331_PHY_ID 0x004dd041
+#define ATH8030_PHY_ID 0x004dd076
+#define ATH8031_PHY_ID 0x004dd074
+#define ATH8032_PHY_ID 0x004dd023
+#define ATH8035_PHY_ID 0x004dd072
+#define AT8030_PHY_ID_MASK 0xffffffef
+
+#define QCA9561_PHY_ID 0x004dd042
+
+#define AT803X_PAGE_FIBER 0
+#define AT803X_PAGE_COPPER 1
+
+/* don't turn off internal PLL */
+#define AT803X_KEEP_PLL_ENABLED BIT(0)
+#define AT803X_DISABLE_SMARTEEE BIT(1)
+
+/* disable hibernation mode */
+#define AT803X_DISABLE_HIBERNATION_MODE BIT(2)
+
+MODULE_DESCRIPTION("Qualcomm Atheros AR803x PHY driver");
+MODULE_AUTHOR("Matus Ujhelyi");
+MODULE_LICENSE("GPL");
+
+struct at803x_priv {
+ int flags;
+ u16 clk_25m_reg;
+ u16 clk_25m_mask;
+ u8 smarteee_lpi_tw_1g;
+ u8 smarteee_lpi_tw_100m;
+ bool is_fiber;
+ bool is_1000basex;
+ struct regulator_dev *vddio_rdev;
+ struct regulator_dev *vddh_rdev;
+};
+
+struct at803x_context {
+ u16 bmcr;
+ u16 advertise;
+ u16 control1000;
+ u16 int_enable;
+ u16 smart_speed;
+ u16 led_control;
+};
+
+static int at803x_write_page(struct phy_device *phydev, int page)
+{
+ int mask;
+ int set;
+
+ if (page == AT803X_PAGE_COPPER) {
+ set = AT803X_BT_BX_REG_SEL;
+ mask = 0;
+ } else {
+ set = 0;
+ mask = AT803X_BT_BX_REG_SEL;
+ }
+
+ return __phy_modify(phydev, AT803X_REG_CHIP_CONFIG, mask, set);
+}
+
+static int at803x_read_page(struct phy_device *phydev)
+{
+ int ccr = __phy_read(phydev, AT803X_REG_CHIP_CONFIG);
+
+ if (ccr < 0)
+ return ccr;
+
+ if (ccr & AT803X_BT_BX_REG_SEL)
+ return AT803X_PAGE_COPPER;
+
+ return AT803X_PAGE_FIBER;
+}
+
+static int at803x_enable_rx_delay(struct phy_device *phydev)
+{
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL, 0,
+ AT803X_DEBUG_RX_CLK_DLY_EN);
+}
+
+static int at803x_enable_tx_delay(struct phy_device *phydev)
+{
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_SYSTEM_CTRL_MODE, 0,
+ AT803X_DEBUG_TX_CLK_DLY_EN);
+}
+
+static int at803x_disable_rx_delay(struct phy_device *phydev)
+{
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL,
+ AT803X_DEBUG_RX_CLK_DLY_EN, 0);
+}
+
+static int at803x_disable_tx_delay(struct phy_device *phydev)
+{
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_SYSTEM_CTRL_MODE,
+ AT803X_DEBUG_TX_CLK_DLY_EN, 0);
+}
+
+/* save relevant PHY registers to private copy */
+static void at803x_context_save(struct phy_device *phydev,
+ struct at803x_context *context)
+{
+ context->bmcr = phy_read(phydev, MII_BMCR);
+ context->advertise = phy_read(phydev, MII_ADVERTISE);
+ context->control1000 = phy_read(phydev, MII_CTRL1000);
+ context->int_enable = phy_read(phydev, AT803X_INTR_ENABLE);
+ context->smart_speed = phy_read(phydev, AT803X_SMART_SPEED);
+ context->led_control = phy_read(phydev, AT803X_LED_CONTROL);
+}
+
+/* restore relevant PHY registers from private copy */
+static void at803x_context_restore(struct phy_device *phydev,
+ const struct at803x_context *context)
+{
+ phy_write(phydev, MII_BMCR, context->bmcr);
+ phy_write(phydev, MII_ADVERTISE, context->advertise);
+ phy_write(phydev, MII_CTRL1000, context->control1000);
+ phy_write(phydev, AT803X_INTR_ENABLE, context->int_enable);
+ phy_write(phydev, AT803X_SMART_SPEED, context->smart_speed);
+ phy_write(phydev, AT803X_LED_CONTROL, context->led_control);
+}
+
+static int at803x_suspend(struct phy_device *phydev)
+{
+ int value;
+ int wol_enabled;
+
+ value = phy_read(phydev, AT803X_INTR_ENABLE);
+ wol_enabled = value & AT803X_INTR_ENABLE_WOL;
+
+ if (wol_enabled)
+ value = BMCR_ISOLATE;
+ else
+ value = BMCR_PDOWN;
+
+ phy_modify(phydev, MII_BMCR, 0, value);
+
+ return 0;
+}
+
+static int at803x_resume(struct phy_device *phydev)
+{
+ return phy_modify(phydev, MII_BMCR, BMCR_PDOWN | BMCR_ISOLATE, 0);
+}
+
+static int at803x_parse_dt(struct phy_device *phydev)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ struct at803x_priv *priv = phydev->priv;
+ u32 freq, strength, tw;
+ unsigned int sel;
+ int ret;
+
+ if (!IS_ENABLED(CONFIG_OF_MDIO))
+ return 0;
+
+ if (of_property_read_bool(node, "qca,disable-smarteee"))
+ priv->flags |= AT803X_DISABLE_SMARTEEE;
+
+ if (of_property_read_bool(node, "qca,disable-hibernation-mode"))
+ priv->flags |= AT803X_DISABLE_HIBERNATION_MODE;
+
+ if (!of_property_read_u32(node, "qca,smarteee-tw-us-1g", &tw)) {
+ if (!tw || tw > 255) {
+ phydev_err(phydev, "invalid qca,smarteee-tw-us-1g\n");
+ return -EINVAL;
+ }
+ priv->smarteee_lpi_tw_1g = tw;
+ }
+
+ if (!of_property_read_u32(node, "qca,smarteee-tw-us-100m", &tw)) {
+ if (!tw || tw > 255) {
+ phydev_err(phydev, "invalid qca,smarteee-tw-us-100m\n");
+ return -EINVAL;
+ }
+ priv->smarteee_lpi_tw_100m = tw;
+ }
+
+ ret = of_property_read_u32(node, "qca,clk-out-frequency", &freq);
+ if (!ret) {
+ switch (freq) {
+ case 25000000:
+ sel = AT803X_CLK_OUT_25MHZ_XTAL;
+ break;
+ case 50000000:
+ sel = AT803X_CLK_OUT_50MHZ_PLL;
+ break;
+ case 62500000:
+ sel = AT803X_CLK_OUT_62_5MHZ_PLL;
+ break;
+ case 125000000:
+ sel = AT803X_CLK_OUT_125MHZ_PLL;
+ break;
+ default:
+ phydev_err(phydev, "invalid qca,clk-out-frequency\n");
+ return -EINVAL;
+ }
+
+ priv->clk_25m_reg |= FIELD_PREP(AT803X_CLK_OUT_MASK, sel);
+ priv->clk_25m_mask |= AT803X_CLK_OUT_MASK;
+ }
+
+ ret = of_property_read_u32(node, "qca,clk-out-strength", &strength);
+ if (!ret) {
+ priv->clk_25m_mask |= AT803X_CLK_OUT_STRENGTH_MASK;
+ switch (strength) {
+ case AR803X_STRENGTH_FULL:
+ priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_FULL;
+ break;
+ case AR803X_STRENGTH_HALF:
+ priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_HALF;
+ break;
+ case AR803X_STRENGTH_QUARTER:
+ priv->clk_25m_reg |= AT803X_CLK_OUT_STRENGTH_QUARTER;
+ break;
+ default:
+ phydev_err(phydev, "invalid qca,clk-out-strength\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int at803x_probe(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ struct at803x_priv *priv;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ phydev->priv = priv;
+
+ ret = at803x_parse_dt(phydev);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int at803x_get_features(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+ int err;
+
+ err = genphy_read_abilities(phydev);
+ if (err)
+ return err;
+
+ if (phydev->drv->phy_id != ATH8031_PHY_ID)
+ return 0;
+
+ /* AR8031/AR8033 have different status registers
+ * for copper and fiber operation. However, the
+ * extended status register is the same for both
+ * operation modes.
+ *
+ * As a result of that, ESTATUS_1000_XFULL is set
+ * to 1 even when operating in copper TP mode.
+ *
+ * Remove this mode from the supported link modes
+ * when not operating in 1000BaseX mode.
+ */
+ if (!priv->is_1000basex)
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+ phydev->supported);
+
+ return 0;
+}
+
+static int at803x_smarteee_config(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+ u16 mask = 0, val = 0;
+ int ret;
+
+ if (priv->flags & AT803X_DISABLE_SMARTEEE)
+ return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_MMD3_SMARTEEE_CTL3,
+ AT803X_MMD3_SMARTEEE_CTL3_LPI_EN, 0);
+
+ if (priv->smarteee_lpi_tw_1g) {
+ mask |= 0xff00;
+ val |= priv->smarteee_lpi_tw_1g << 8;
+ }
+ if (priv->smarteee_lpi_tw_100m) {
+ mask |= 0x00ff;
+ val |= priv->smarteee_lpi_tw_100m;
+ }
+ if (!mask)
+ return 0;
+
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_MMD3_SMARTEEE_CTL1,
+ mask, val);
+ if (ret)
+ return ret;
+
+ return phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_MMD3_SMARTEEE_CTL3,
+ AT803X_MMD3_SMARTEEE_CTL3_LPI_EN,
+ AT803X_MMD3_SMARTEEE_CTL3_LPI_EN);
+}
+
+static int at803x_clk_out_config(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+
+ if (!priv->clk_25m_mask)
+ return 0;
+
+ return phy_modify_mmd(phydev, MDIO_MMD_AN, AT803X_MMD7_CLK25M,
+ priv->clk_25m_mask, priv->clk_25m_reg);
+}
+
+static int at8031_pll_config(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+
+ /* The default after hardware reset is PLL OFF. After a soft reset, the
+ * values are retained.
+ */
+ if (priv->flags & AT803X_KEEP_PLL_ENABLED)
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
+ 0, AT803X_DEBUG_PLL_ON);
+ else
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
+ AT803X_DEBUG_PLL_ON, 0);
+}
+
+static int at803x_hibernation_mode_config(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+
+ /* The default after hardware reset is hibernation mode enabled. After
+ * software reset, the value is retained.
+ */
+ if (!(priv->flags & AT803X_DISABLE_HIBERNATION_MODE))
+ return 0;
+
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_HIB_CTRL,
+ AT803X_DEBUG_HIB_CTRL_PS_HIB_EN, 0);
+}
+
+static int at803x_config_init(struct phy_device *phydev)
+{
+ int ret;
+
+ /* The RX and TX delay default is:
+ * after HW reset: RX delay enabled and TX delay disabled
+ * after SW reset: RX delay enabled, while TX delay retains the
+ * value before reset.
+ */
+ if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
+ ret = at803x_enable_rx_delay(phydev);
+ else
+ ret = at803x_disable_rx_delay(phydev);
+ if (ret < 0)
+ return ret;
+
+ if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
+ ret = at803x_enable_tx_delay(phydev);
+ else
+ ret = at803x_disable_tx_delay(phydev);
+ if (ret < 0)
+ return ret;
+
+ ret = at803x_smarteee_config(phydev);
+ if (ret < 0)
+ return ret;
+
+ ret = at803x_clk_out_config(phydev);
+ if (ret < 0)
+ return ret;
+
+ ret = at803x_hibernation_mode_config(phydev);
+ if (ret < 0)
+ return ret;
+
+ /* Ar803x extended next page bit is enabled by default. Cisco
+ * multigig switches read this bit and attempt to negotiate 10Gbps
+ * rates even if the next page bit is disabled. This is incorrect
+ * behaviour but we still need to accommodate it. XNP is only needed
+ * for 10Gbps support, so disable XNP.
+ */
+ return phy_modify(phydev, MII_ADVERTISE, MDIO_AN_CTRL1_XNP, 0);
+}
+
+static void at803x_link_change_notify(struct phy_device *phydev)
+{
+ /*
+ * Conduct a hardware reset for AT8030 every time a link loss is
+ * signalled. This is necessary to circumvent a hardware bug that
+ * occurs when the cable is unplugged while TX packets are pending
+ * in the FIFO. In such cases, the FIFO enters an error mode it
+ * cannot recover from by software.
+ */
+ if (phydev->state == PHY_NOLINK && phydev->mdio.reset_gpio) {
+ struct at803x_context context;
+
+ at803x_context_save(phydev, &context);
+
+ phy_device_reset(phydev, 1);
+ usleep_range(1000, 2000);
+ phy_device_reset(phydev, 0);
+ usleep_range(1000, 2000);
+
+ at803x_context_restore(phydev, &context);
+
+ phydev_dbg(phydev, "%s(): phy was reset\n", __func__);
+ }
+}
+
+static int at803x_config_aneg(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+ int ret;
+
+ ret = at803x_prepare_config_aneg(phydev);
+ if (ret)
+ return ret;
+
+ if (priv->is_1000basex)
+ return genphy_c37_config_aneg(phydev);
+
+ return genphy_config_aneg(phydev);
+}
+
+static int at803x_cable_test_result_trans(u16 status)
+{
+ switch (FIELD_GET(AT803X_CDT_STATUS_STAT_MASK, status)) {
+ case AT803X_CDT_STATUS_STAT_NORMAL:
+ return ETHTOOL_A_CABLE_RESULT_CODE_OK;
+ case AT803X_CDT_STATUS_STAT_SHORT:
+ return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
+ case AT803X_CDT_STATUS_STAT_OPEN:
+ return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
+ case AT803X_CDT_STATUS_STAT_FAIL:
+ default:
+ return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
+ }
+}
+
+static bool at803x_cdt_test_failed(u16 status)
+{
+ return FIELD_GET(AT803X_CDT_STATUS_STAT_MASK, status) ==
+ AT803X_CDT_STATUS_STAT_FAIL;
+}
+
+static bool at803x_cdt_fault_length_valid(u16 status)
+{
+ switch (FIELD_GET(AT803X_CDT_STATUS_STAT_MASK, status)) {
+ case AT803X_CDT_STATUS_STAT_OPEN:
+ case AT803X_CDT_STATUS_STAT_SHORT:
+ return true;
+ }
+ return false;
+}
+
+static int at803x_cable_test_one_pair(struct phy_device *phydev, int pair)
+{
+ static const int ethtool_pair[] = {
+ ETHTOOL_A_CABLE_PAIR_A,
+ ETHTOOL_A_CABLE_PAIR_B,
+ ETHTOOL_A_CABLE_PAIR_C,
+ ETHTOOL_A_CABLE_PAIR_D,
+ };
+ int ret, val;
+
+ val = FIELD_PREP(AT803X_CDT_MDI_PAIR_MASK, pair) |
+ AT803X_CDT_ENABLE_TEST;
+ ret = at803x_cdt_start(phydev, val);
+ if (ret)
+ return ret;
+
+ ret = at803x_cdt_wait_for_completion(phydev, AT803X_CDT_ENABLE_TEST);
+ if (ret)
+ return ret;
+
+ val = phy_read(phydev, AT803X_CDT_STATUS);
+ if (val < 0)
+ return val;
+
+ if (at803x_cdt_test_failed(val))
+ return 0;
+
+ ethnl_cable_test_result(phydev, ethtool_pair[pair],
+ at803x_cable_test_result_trans(val));
+
+ if (at803x_cdt_fault_length_valid(val)) {
+ val = FIELD_GET(AT803X_CDT_STATUS_DELTA_TIME_MASK, val);
+ ethnl_cable_test_fault_length(phydev, ethtool_pair[pair],
+ at803x_cdt_fault_length(val));
+ }
+
+ return 1;
+}
+
+static int at803x_cable_test_get_status(struct phy_device *phydev,
+ bool *finished, unsigned long pair_mask)
+{
+ int retries = 20;
+ int pair, ret;
+
+ *finished = false;
+
+ /* According to the datasheet the CDT can be performed when
+ * there is no link partner or when the link partner is
+ * auto-negotiating. Starting the test will restart the AN
+ * automatically. It seems that doing this repeatedly we will
+ * get a slot where our link partner won't disturb our
+ * measurement.
+ */
+ while (pair_mask && retries--) {
+ for_each_set_bit(pair, &pair_mask, 4) {
+ ret = at803x_cable_test_one_pair(phydev, pair);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ clear_bit(pair, &pair_mask);
+ }
+ if (pair_mask)
+ msleep(250);
+ }
+
+ *finished = true;
+
+ return 0;
+}
+
+static void at803x_cable_test_autoneg(struct phy_device *phydev)
+{
+ /* Enable auto-negotiation, but advertise no capabilities, no link
+ * will be established. A restart of the auto-negotiation is not
+ * required, because the cable test will automatically break the link.
+ */
+ phy_write(phydev, MII_BMCR, BMCR_ANENABLE);
+ phy_write(phydev, MII_ADVERTISE, ADVERTISE_CSMA);
+}
+
+static int at803x_cable_test_start(struct phy_device *phydev)
+{
+ at803x_cable_test_autoneg(phydev);
+ /* we do all the (time consuming) work later */
+ return 0;
+}
+
+static int at8031_rgmii_reg_set_voltage_sel(struct regulator_dev *rdev,
+ unsigned int selector)
+{
+ struct phy_device *phydev = rdev_get_drvdata(rdev);
+
+ if (selector)
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
+ 0, AT803X_DEBUG_RGMII_1V8);
+ else
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_1F,
+ AT803X_DEBUG_RGMII_1V8, 0);
+}
+
+static int at8031_rgmii_reg_get_voltage_sel(struct regulator_dev *rdev)
+{
+ struct phy_device *phydev = rdev_get_drvdata(rdev);
+ int val;
+
+ val = at803x_debug_reg_read(phydev, AT803X_DEBUG_REG_1F);
+ if (val < 0)
+ return val;
+
+ return (val & AT803X_DEBUG_RGMII_1V8) ? 1 : 0;
+}
+
+static const struct regulator_ops vddio_regulator_ops = {
+ .list_voltage = regulator_list_voltage_table,
+ .set_voltage_sel = at8031_rgmii_reg_set_voltage_sel,
+ .get_voltage_sel = at8031_rgmii_reg_get_voltage_sel,
+};
+
+static const unsigned int vddio_voltage_table[] = {
+ 1500000,
+ 1800000,
+};
+
+static const struct regulator_desc vddio_desc = {
+ .name = "vddio",
+ .of_match = of_match_ptr("vddio-regulator"),
+ .n_voltages = ARRAY_SIZE(vddio_voltage_table),
+ .volt_table = vddio_voltage_table,
+ .ops = &vddio_regulator_ops,
+ .type = REGULATOR_VOLTAGE,
+ .owner = THIS_MODULE,
+};
+
+static const struct regulator_ops vddh_regulator_ops = {
+};
+
+static const struct regulator_desc vddh_desc = {
+ .name = "vddh",
+ .of_match = of_match_ptr("vddh-regulator"),
+ .n_voltages = 1,
+ .fixed_uV = 2500000,
+ .ops = &vddh_regulator_ops,
+ .type = REGULATOR_VOLTAGE,
+ .owner = THIS_MODULE,
+};
+
+static int at8031_register_regulators(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+ struct device *dev = &phydev->mdio.dev;
+ struct regulator_config config = { };
+
+ config.dev = dev;
+ config.driver_data = phydev;
+
+ priv->vddio_rdev = devm_regulator_register(dev, &vddio_desc, &config);
+ if (IS_ERR(priv->vddio_rdev)) {
+ phydev_err(phydev, "failed to register VDDIO regulator\n");
+ return PTR_ERR(priv->vddio_rdev);
+ }
+
+ priv->vddh_rdev = devm_regulator_register(dev, &vddh_desc, &config);
+ if (IS_ERR(priv->vddh_rdev)) {
+ phydev_err(phydev, "failed to register VDDH regulator\n");
+ return PTR_ERR(priv->vddh_rdev);
+ }
+
+ return 0;
+}
+
+static int at8031_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+ struct phy_device *phydev = upstream;
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_support);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support);
+ DECLARE_PHY_INTERFACE_MASK(interfaces);
+ phy_interface_t iface;
+
+ linkmode_zero(phy_support);
+ phylink_set(phy_support, 1000baseX_Full);
+ phylink_set(phy_support, 1000baseT_Full);
+ phylink_set(phy_support, Autoneg);
+ phylink_set(phy_support, Pause);
+ phylink_set(phy_support, Asym_Pause);
+
+ linkmode_zero(sfp_support);
+ sfp_parse_support(phydev->sfp_bus, id, sfp_support, interfaces);
+ /* Some modules support 10G modes as well as others we support.
+ * Mask out non-supported modes so the correct interface is picked.
+ */
+ linkmode_and(sfp_support, phy_support, sfp_support);
+
+ if (linkmode_empty(sfp_support)) {
+ dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n");
+ return -EINVAL;
+ }
+
+ iface = sfp_select_interface(phydev->sfp_bus, sfp_support);
+
+ /* Only 1000Base-X is supported by AR8031/8033 as the downstream SerDes
+ * interface for use with SFP modules.
+ * However, some copper modules detected as having a preferred SGMII
+ * interface do default to and function in 1000Base-X mode, so just
+ * print a warning and allow such modules, as they may have some chance
+ * of working.
+ */
+ if (iface == PHY_INTERFACE_MODE_SGMII)
+ dev_warn(&phydev->mdio.dev, "module may not function if 1000Base-X not supported\n");
+ else if (iface != PHY_INTERFACE_MODE_1000BASEX)
+ return -EINVAL;
+
+ return 0;
+}
+
+static const struct sfp_upstream_ops at8031_sfp_ops = {
+ .attach = phy_sfp_attach,
+ .detach = phy_sfp_detach,
+ .module_insert = at8031_sfp_insert,
+};
+
+static int at8031_parse_dt(struct phy_device *phydev)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ struct at803x_priv *priv = phydev->priv;
+ int ret;
+
+ if (of_property_read_bool(node, "qca,keep-pll-enabled"))
+ priv->flags |= AT803X_KEEP_PLL_ENABLED;
+
+ ret = at8031_register_regulators(phydev);
+ if (ret < 0)
+ return ret;
+
+ ret = devm_regulator_get_enable_optional(&phydev->mdio.dev,
+ "vddio");
+ if (ret) {
+ phydev_err(phydev, "failed to get VDDIO regulator\n");
+ return ret;
+ }
+
+ /* Only AR8031/8033 support 1000Base-X for SFP modules */
+ return phy_sfp_probe(phydev, &at8031_sfp_ops);
+}
+
+static int at8031_probe(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+ int mode_cfg;
+ int ccr;
+ int ret;
+
+ ret = at803x_probe(phydev);
+ if (ret)
+ return ret;
+
+ /* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping
+ * options.
+ */
+ ret = at8031_parse_dt(phydev);
+ if (ret)
+ return ret;
+
+ ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
+ if (ccr < 0)
+ return ccr;
+ mode_cfg = ccr & AT803X_MODE_CFG_MASK;
+
+ switch (mode_cfg) {
+ case AT803X_MODE_CFG_BX1000_RGMII_50OHM:
+ case AT803X_MODE_CFG_BX1000_RGMII_75OHM:
+ priv->is_1000basex = true;
+ fallthrough;
+ case AT803X_MODE_CFG_FX100_RGMII_50OHM:
+ case AT803X_MODE_CFG_FX100_RGMII_75OHM:
+ priv->is_fiber = true;
+ break;
+ }
+
+ /* Disable WoL in 1588 register which is enabled
+ * by default
+ */
+ return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ AT803X_WOL_EN, 0);
+}
+
+static int at8031_config_init(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+ int ret;
+
+ /* Some bootloaders leave the fiber page selected.
+ * Switch to the appropriate page (fiber or copper), as otherwise we
+ * read the PHY capabilities from the wrong page.
+ */
+ phy_lock_mdio_bus(phydev);
+ ret = at803x_write_page(phydev,
+ priv->is_fiber ? AT803X_PAGE_FIBER :
+ AT803X_PAGE_COPPER);
+ phy_unlock_mdio_bus(phydev);
+ if (ret)
+ return ret;
+
+ ret = at8031_pll_config(phydev);
+ if (ret < 0)
+ return ret;
+
+ return at803x_config_init(phydev);
+}
+
+static int at8031_set_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ int ret;
+
+ /* First setup MAC address and enable WOL interrupt */
+ ret = at803x_set_wol(phydev, wol);
+ if (ret)
+ return ret;
+
+ if (wol->wolopts & WAKE_MAGIC)
+ /* Enable WOL function for 1588 */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ 0, AT803X_WOL_EN);
+ else
+ /* Disable WoL function for 1588 */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ AT803X_WOL_EN, 0);
+
+ return ret;
+}
+
+static int at8031_config_intr(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+ int err, value = 0;
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED &&
+ priv->is_fiber) {
+ /* Clear any pending interrupts */
+ err = at803x_ack_interrupt(phydev);
+ if (err)
+ return err;
+
+ value |= AT803X_INTR_ENABLE_LINK_FAIL_BX;
+ value |= AT803X_INTR_ENABLE_LINK_SUCCESS_BX;
+
+ err = phy_set_bits(phydev, AT803X_INTR_ENABLE, value);
+ if (err)
+ return err;
+ }
+
+ return at803x_config_intr(phydev);
+}
+
+/* AR8031 and AR8033 share the same read status logic */
+static int at8031_read_status(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+ bool changed;
+
+ if (priv->is_1000basex)
+ return genphy_c37_read_status(phydev, &changed);
+
+ return at803x_read_status(phydev);
+}
+
+/* AR8031 and AR8035 share the same cable test get status reg */
+static int at8031_cable_test_get_status(struct phy_device *phydev,
+ bool *finished)
+{
+ return at803x_cable_test_get_status(phydev, finished, 0xf);
+}
+
+/* AR8031 and AR8035 share the same cable test start logic */
+static int at8031_cable_test_start(struct phy_device *phydev)
+{
+ at803x_cable_test_autoneg(phydev);
+ phy_write(phydev, MII_CTRL1000, 0);
+ /* we do all the (time consuming) work later */
+ return 0;
+}
+
+/* AR8032, AR9331 and QCA9561 share the same cable test get status reg */
+static int at8032_cable_test_get_status(struct phy_device *phydev,
+ bool *finished)
+{
+ return at803x_cable_test_get_status(phydev, finished, 0x3);
+}
+
+static int at8035_parse_dt(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+
+ /* Mask is set by the generic at803x_parse_dt
+ * if property is set. Assume property is set
+ * with the mask not zero.
+ */
+ if (priv->clk_25m_mask) {
+ /* Fixup for the AR8030/AR8035. This chip has another mask and
+ * doesn't support the DSP reference. Eg. the lowest bit of the
+ * mask. The upper two bits select the same frequencies. Mask
+ * the lowest bit here.
+ *
+ * Warning:
+ * There was no datasheet for the AR8030 available so this is
+ * just a guess. But the AR8035 is listed as pin compatible
+ * to the AR8030 so there might be a good chance it works on
+ * the AR8030 too.
+ */
+ priv->clk_25m_reg &= AT8035_CLK_OUT_MASK;
+ priv->clk_25m_mask &= AT8035_CLK_OUT_MASK;
+ }
+
+ return 0;
+}
+
+/* AR8030 and AR8035 shared the same special mask for clk_25m */
+static int at8035_probe(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = at803x_probe(phydev);
+ if (ret)
+ return ret;
+
+ return at8035_parse_dt(phydev);
+}
+
+static struct phy_driver at803x_driver[] = {
+{
+ /* Qualcomm Atheros AR8035 */
+ PHY_ID_MATCH_EXACT(ATH8035_PHY_ID),
+ .name = "Qualcomm Atheros AR8035",
+ .flags = PHY_POLL_CABLE_TEST,
+ .probe = at8035_probe,
+ .config_aneg = at803x_config_aneg,
+ .config_init = at803x_config_init,
+ .soft_reset = genphy_soft_reset,
+ .set_wol = at803x_set_wol,
+ .get_wol = at803x_get_wol,
+ .suspend = at803x_suspend,
+ .resume = at803x_resume,
+ /* PHY_GBIT_FEATURES */
+ .read_status = at803x_read_status,
+ .config_intr = at803x_config_intr,
+ .handle_interrupt = at803x_handle_interrupt,
+ .get_tunable = at803x_get_tunable,
+ .set_tunable = at803x_set_tunable,
+ .cable_test_start = at8031_cable_test_start,
+ .cable_test_get_status = at8031_cable_test_get_status,
+}, {
+ /* Qualcomm Atheros AR8030 */
+ .phy_id = ATH8030_PHY_ID,
+ .name = "Qualcomm Atheros AR8030",
+ .phy_id_mask = AT8030_PHY_ID_MASK,
+ .probe = at8035_probe,
+ .config_init = at803x_config_init,
+ .link_change_notify = at803x_link_change_notify,
+ .set_wol = at803x_set_wol,
+ .get_wol = at803x_get_wol,
+ .suspend = at803x_suspend,
+ .resume = at803x_resume,
+ /* PHY_BASIC_FEATURES */
+ .config_intr = at803x_config_intr,
+ .handle_interrupt = at803x_handle_interrupt,
+}, {
+ /* Qualcomm Atheros AR8031/AR8033 */
+ PHY_ID_MATCH_EXACT(ATH8031_PHY_ID),
+ .name = "Qualcomm Atheros AR8031/AR8033",
+ .flags = PHY_POLL_CABLE_TEST,
+ .probe = at8031_probe,
+ .config_init = at8031_config_init,
+ .config_aneg = at803x_config_aneg,
+ .soft_reset = genphy_soft_reset,
+ .set_wol = at8031_set_wol,
+ .get_wol = at803x_get_wol,
+ .suspend = at803x_suspend,
+ .resume = at803x_resume,
+ .read_page = at803x_read_page,
+ .write_page = at803x_write_page,
+ .get_features = at803x_get_features,
+ .read_status = at8031_read_status,
+ .config_intr = at8031_config_intr,
+ .handle_interrupt = at803x_handle_interrupt,
+ .get_tunable = at803x_get_tunable,
+ .set_tunable = at803x_set_tunable,
+ .cable_test_start = at8031_cable_test_start,
+ .cable_test_get_status = at8031_cable_test_get_status,
+}, {
+ /* Qualcomm Atheros AR8032 */
+ PHY_ID_MATCH_EXACT(ATH8032_PHY_ID),
+ .name = "Qualcomm Atheros AR8032",
+ .probe = at803x_probe,
+ .flags = PHY_POLL_CABLE_TEST,
+ .config_init = at803x_config_init,
+ .link_change_notify = at803x_link_change_notify,
+ .suspend = at803x_suspend,
+ .resume = at803x_resume,
+ /* PHY_BASIC_FEATURES */
+ .config_intr = at803x_config_intr,
+ .handle_interrupt = at803x_handle_interrupt,
+ .cable_test_start = at803x_cable_test_start,
+ .cable_test_get_status = at8032_cable_test_get_status,
+}, {
+ /* ATHEROS AR9331 */
+ PHY_ID_MATCH_EXACT(ATH9331_PHY_ID),
+ .name = "Qualcomm Atheros AR9331 built-in PHY",
+ .probe = at803x_probe,
+ .suspend = at803x_suspend,
+ .resume = at803x_resume,
+ .flags = PHY_POLL_CABLE_TEST,
+ /* PHY_BASIC_FEATURES */
+ .config_intr = at803x_config_intr,
+ .handle_interrupt = at803x_handle_interrupt,
+ .cable_test_start = at803x_cable_test_start,
+ .cable_test_get_status = at8032_cable_test_get_status,
+ .read_status = at803x_read_status,
+ .soft_reset = genphy_soft_reset,
+ .config_aneg = at803x_config_aneg,
+}, {
+ /* Qualcomm Atheros QCA9561 */
+ PHY_ID_MATCH_EXACT(QCA9561_PHY_ID),
+ .name = "Qualcomm Atheros QCA9561 built-in PHY",
+ .probe = at803x_probe,
+ .suspend = at803x_suspend,
+ .resume = at803x_resume,
+ .flags = PHY_POLL_CABLE_TEST,
+ /* PHY_BASIC_FEATURES */
+ .config_intr = at803x_config_intr,
+ .handle_interrupt = at803x_handle_interrupt,
+ .cable_test_start = at803x_cable_test_start,
+ .cable_test_get_status = at8032_cable_test_get_status,
+ .read_status = at803x_read_status,
+ .soft_reset = genphy_soft_reset,
+ .config_aneg = at803x_config_aneg,
+}, };
+
+module_phy_driver(at803x_driver);
+
+static struct mdio_device_id __maybe_unused atheros_tbl[] = {
+ { ATH8030_PHY_ID, AT8030_PHY_ID_MASK },
+ { PHY_ID_MATCH_EXACT(ATH8031_PHY_ID) },
+ { PHY_ID_MATCH_EXACT(ATH8032_PHY_ID) },
+ { PHY_ID_MATCH_EXACT(ATH8035_PHY_ID) },
+ { PHY_ID_MATCH_EXACT(ATH9331_PHY_ID) },
+ { PHY_ID_MATCH_EXACT(QCA9561_PHY_ID) },
+ { }
+};
+
+MODULE_DEVICE_TABLE(mdio, atheros_tbl);
diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c
new file mode 100644
index 000000000000..672c6929119a
--- /dev/null
+++ b/drivers/net/phy/qcom/qca807x.c
@@ -0,0 +1,849 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2023 Sartura Ltd.
+ *
+ * Author: Robert Marko <robert.marko@sartura.hr>
+ * Christian Marangi <ansuelsmth@gmail.com>
+ *
+ * Qualcomm QCA8072 and QCA8075 PHY driver
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+#include <linux/bitfield.h>
+#include <linux/gpio/driver.h>
+#include <linux/sfp.h>
+
+#include "qcom.h"
+
+#define QCA807X_CHIP_CONFIGURATION 0x1f
+#define QCA807X_BT_BX_REG_SEL BIT(15)
+#define QCA807X_BT_BX_REG_SEL_FIBER 0
+#define QCA807X_BT_BX_REG_SEL_COPPER 1
+#define QCA807X_CHIP_CONFIGURATION_MODE_CFG_MASK GENMASK(3, 0)
+#define QCA807X_CHIP_CONFIGURATION_MODE_QSGMII_SGMII 4
+#define QCA807X_CHIP_CONFIGURATION_MODE_PSGMII_FIBER 3
+#define QCA807X_CHIP_CONFIGURATION_MODE_PSGMII_ALL_COPPER 0
+
+#define QCA807X_MEDIA_SELECT_STATUS 0x1a
+#define QCA807X_MEDIA_DETECTED_COPPER BIT(5)
+#define QCA807X_MEDIA_DETECTED_1000_BASE_X BIT(4)
+#define QCA807X_MEDIA_DETECTED_100_BASE_FX BIT(3)
+
+#define QCA807X_MMD7_FIBER_MODE_AUTO_DETECTION 0x807e
+#define QCA807X_MMD7_FIBER_MODE_AUTO_DETECTION_EN BIT(0)
+
+#define QCA807X_MMD7_1000BASE_T_POWER_SAVE_PER_CABLE_LENGTH 0x801a
+#define QCA807X_CONTROL_DAC_MASK GENMASK(2, 0)
+/* List of tweaks enabled by this bit:
+ * - With both FULL amplitude and FULL bias current: bias current
+ * is set to half.
+ * - With only DSP amplitude: bias current is set to half and
+ * is set to 1/4 with cable < 10m.
+ * - With DSP bias current (included both DSP amplitude and
+ * DSP bias current): bias current is half the detected current
+ * with cable < 10m.
+ */
+#define QCA807X_CONTROL_DAC_BIAS_CURRENT_TWEAK BIT(2)
+#define QCA807X_CONTROL_DAC_DSP_BIAS_CURRENT BIT(1)
+#define QCA807X_CONTROL_DAC_DSP_AMPLITUDE BIT(0)
+
+#define QCA807X_MMD7_LED_100N_1 0x8074
+#define QCA807X_MMD7_LED_100N_2 0x8075
+#define QCA807X_MMD7_LED_1000N_1 0x8076
+#define QCA807X_MMD7_LED_1000N_2 0x8077
+
+#define QCA807X_MMD7_LED_CTRL(x) (0x8074 + ((x) * 2))
+#define QCA807X_MMD7_LED_FORCE_CTRL(x) (0x8075 + ((x) * 2))
+
+/* LED hw control pattern for fiber port */
+#define QCA807X_LED_FIBER_PATTERN_MASK GENMASK(11, 1)
+#define QCA807X_LED_FIBER_TXACT_BLK_EN BIT(10)
+#define QCA807X_LED_FIBER_RXACT_BLK_EN BIT(9)
+#define QCA807X_LED_FIBER_FDX_ON_EN BIT(6)
+#define QCA807X_LED_FIBER_HDX_ON_EN BIT(5)
+#define QCA807X_LED_FIBER_1000BX_ON_EN BIT(2)
+#define QCA807X_LED_FIBER_100FX_ON_EN BIT(1)
+
+/* Some device repurpose the LED as GPIO out */
+#define QCA807X_GPIO_FORCE_EN QCA808X_LED_FORCE_EN
+#define QCA807X_GPIO_FORCE_MODE_MASK QCA808X_LED_FORCE_MODE_MASK
+
+#define QCA807X_FUNCTION_CONTROL 0x10
+#define QCA807X_FC_MDI_CROSSOVER_MODE_MASK GENMASK(6, 5)
+#define QCA807X_FC_MDI_CROSSOVER_AUTO 3
+#define QCA807X_FC_MDI_CROSSOVER_MANUAL_MDIX 1
+#define QCA807X_FC_MDI_CROSSOVER_MANUAL_MDI 0
+
+/* PQSGMII Analog PHY specific */
+#define PQSGMII_CTRL_REG 0x0
+#define PQSGMII_ANALOG_SW_RESET BIT(6)
+#define PQSGMII_DRIVE_CONTROL_1 0xb
+#define PQSGMII_TX_DRIVER_MASK GENMASK(7, 4)
+#define PQSGMII_TX_DRIVER_140MV 0x0
+#define PQSGMII_TX_DRIVER_160MV 0x1
+#define PQSGMII_TX_DRIVER_180MV 0x2
+#define PQSGMII_TX_DRIVER_200MV 0x3
+#define PQSGMII_TX_DRIVER_220MV 0x4
+#define PQSGMII_TX_DRIVER_240MV 0x5
+#define PQSGMII_TX_DRIVER_260MV 0x6
+#define PQSGMII_TX_DRIVER_280MV 0x7
+#define PQSGMII_TX_DRIVER_300MV 0x8
+#define PQSGMII_TX_DRIVER_320MV 0x9
+#define PQSGMII_TX_DRIVER_400MV 0xa
+#define PQSGMII_TX_DRIVER_500MV 0xb
+#define PQSGMII_TX_DRIVER_600MV 0xc
+#define PQSGMII_MODE_CTRL 0x6d
+#define PQSGMII_MODE_CTRL_AZ_WORKAROUND_MASK BIT(0)
+#define PQSGMII_MMD3_SERDES_CONTROL 0x805a
+
+#define PHY_ID_QCA8072 0x004dd0b2
+#define PHY_ID_QCA8075 0x004dd0b1
+
+#define QCA807X_COMBO_ADDR_OFFSET 4
+#define QCA807X_PQSGMII_ADDR_OFFSET 5
+#define SERDES_RESET_SLEEP 100
+
+enum qca807x_global_phy {
+ QCA807X_COMBO_ADDR = 4,
+ QCA807X_PQSGMII_ADDR = 5,
+};
+
+struct qca807x_shared_priv {
+ unsigned int package_mode;
+ u32 tx_drive_strength;
+};
+
+struct qca807x_gpio_priv {
+ struct phy_device *phy;
+};
+
+struct qca807x_priv {
+ bool dac_full_amplitude;
+ bool dac_full_bias_current;
+ bool dac_disable_bias_current_tweak;
+};
+
+static int qca807x_cable_test_start(struct phy_device *phydev)
+{
+ /* we do all the (time consuming) work later */
+ return 0;
+}
+
+static int qca807x_led_parse_netdev(struct phy_device *phydev, unsigned long rules,
+ u16 *offload_trigger)
+{
+ /* Parsing specific to netdev trigger */
+ switch (phydev->port) {
+ case PORT_TP:
+ if (test_bit(TRIGGER_NETDEV_TX, &rules))
+ *offload_trigger |= QCA808X_LED_TX_BLINK;
+ if (test_bit(TRIGGER_NETDEV_RX, &rules))
+ *offload_trigger |= QCA808X_LED_RX_BLINK;
+ if (test_bit(TRIGGER_NETDEV_LINK_10, &rules))
+ *offload_trigger |= QCA808X_LED_SPEED10_ON;
+ if (test_bit(TRIGGER_NETDEV_LINK_100, &rules))
+ *offload_trigger |= QCA808X_LED_SPEED100_ON;
+ if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules))
+ *offload_trigger |= QCA808X_LED_SPEED1000_ON;
+ if (test_bit(TRIGGER_NETDEV_HALF_DUPLEX, &rules))
+ *offload_trigger |= QCA808X_LED_HALF_DUPLEX_ON;
+ if (test_bit(TRIGGER_NETDEV_FULL_DUPLEX, &rules))
+ *offload_trigger |= QCA808X_LED_FULL_DUPLEX_ON;
+ break;
+ case PORT_FIBRE:
+ if (test_bit(TRIGGER_NETDEV_TX, &rules))
+ *offload_trigger |= QCA807X_LED_FIBER_TXACT_BLK_EN;
+ if (test_bit(TRIGGER_NETDEV_RX, &rules))
+ *offload_trigger |= QCA807X_LED_FIBER_RXACT_BLK_EN;
+ if (test_bit(TRIGGER_NETDEV_LINK_100, &rules))
+ *offload_trigger |= QCA807X_LED_FIBER_100FX_ON_EN;
+ if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules))
+ *offload_trigger |= QCA807X_LED_FIBER_1000BX_ON_EN;
+ if (test_bit(TRIGGER_NETDEV_HALF_DUPLEX, &rules))
+ *offload_trigger |= QCA807X_LED_FIBER_HDX_ON_EN;
+ if (test_bit(TRIGGER_NETDEV_FULL_DUPLEX, &rules))
+ *offload_trigger |= QCA807X_LED_FIBER_FDX_ON_EN;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (rules && !*offload_trigger)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int qca807x_led_hw_control_enable(struct phy_device *phydev, u8 index)
+{
+ u16 reg;
+
+ if (index > 1)
+ return -EINVAL;
+
+ reg = QCA807X_MMD7_LED_FORCE_CTRL(index);
+ return qca808x_led_reg_hw_control_enable(phydev, reg);
+}
+
+static int qca807x_led_hw_is_supported(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ u16 offload_trigger = 0;
+
+ if (index > 1)
+ return -EINVAL;
+
+ return qca807x_led_parse_netdev(phydev, rules, &offload_trigger);
+}
+
+static int qca807x_led_hw_control_set(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ u16 reg, mask, offload_trigger = 0;
+ int ret;
+
+ if (index > 1)
+ return -EINVAL;
+
+ ret = qca807x_led_parse_netdev(phydev, rules, &offload_trigger);
+ if (ret)
+ return ret;
+
+ ret = qca807x_led_hw_control_enable(phydev, index);
+ if (ret)
+ return ret;
+
+ switch (phydev->port) {
+ case PORT_TP:
+ reg = QCA807X_MMD7_LED_CTRL(index);
+ mask = QCA808X_LED_PATTERN_MASK;
+ break;
+ case PORT_FIBRE:
+ /* HW control pattern bits are in LED FORCE reg */
+ reg = QCA807X_MMD7_LED_FORCE_CTRL(index);
+ mask = QCA807X_LED_FIBER_PATTERN_MASK;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return phy_modify_mmd(phydev, MDIO_MMD_AN, reg, mask,
+ offload_trigger);
+}
+
+static bool qca807x_led_hw_control_status(struct phy_device *phydev, u8 index)
+{
+ u16 reg;
+
+ if (index > 1)
+ return false;
+
+ reg = QCA807X_MMD7_LED_FORCE_CTRL(index);
+ return qca808x_led_reg_hw_control_status(phydev, reg);
+}
+
+static int qca807x_led_hw_control_get(struct phy_device *phydev, u8 index,
+ unsigned long *rules)
+{
+ u16 reg;
+ int val;
+
+ if (index > 1)
+ return -EINVAL;
+
+ /* Check if we have hw control enabled */
+ if (qca807x_led_hw_control_status(phydev, index))
+ return -EINVAL;
+
+ /* Parsing specific to netdev trigger */
+ switch (phydev->port) {
+ case PORT_TP:
+ reg = QCA807X_MMD7_LED_CTRL(index);
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, reg);
+ if (val & QCA808X_LED_TX_BLINK)
+ set_bit(TRIGGER_NETDEV_TX, rules);
+ if (val & QCA808X_LED_RX_BLINK)
+ set_bit(TRIGGER_NETDEV_RX, rules);
+ if (val & QCA808X_LED_SPEED10_ON)
+ set_bit(TRIGGER_NETDEV_LINK_10, rules);
+ if (val & QCA808X_LED_SPEED100_ON)
+ set_bit(TRIGGER_NETDEV_LINK_100, rules);
+ if (val & QCA808X_LED_SPEED1000_ON)
+ set_bit(TRIGGER_NETDEV_LINK_1000, rules);
+ if (val & QCA808X_LED_HALF_DUPLEX_ON)
+ set_bit(TRIGGER_NETDEV_HALF_DUPLEX, rules);
+ if (val & QCA808X_LED_FULL_DUPLEX_ON)
+ set_bit(TRIGGER_NETDEV_FULL_DUPLEX, rules);
+ break;
+ case PORT_FIBRE:
+ /* HW control pattern bits are in LED FORCE reg */
+ reg = QCA807X_MMD7_LED_FORCE_CTRL(index);
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, reg);
+ if (val & QCA807X_LED_FIBER_TXACT_BLK_EN)
+ set_bit(TRIGGER_NETDEV_TX, rules);
+ if (val & QCA807X_LED_FIBER_RXACT_BLK_EN)
+ set_bit(TRIGGER_NETDEV_RX, rules);
+ if (val & QCA807X_LED_FIBER_100FX_ON_EN)
+ set_bit(TRIGGER_NETDEV_LINK_100, rules);
+ if (val & QCA807X_LED_FIBER_1000BX_ON_EN)
+ set_bit(TRIGGER_NETDEV_LINK_1000, rules);
+ if (val & QCA807X_LED_FIBER_HDX_ON_EN)
+ set_bit(TRIGGER_NETDEV_HALF_DUPLEX, rules);
+ if (val & QCA807X_LED_FIBER_FDX_ON_EN)
+ set_bit(TRIGGER_NETDEV_FULL_DUPLEX, rules);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int qca807x_led_hw_control_reset(struct phy_device *phydev, u8 index)
+{
+ u16 reg, mask;
+
+ if (index > 1)
+ return -EINVAL;
+
+ switch (phydev->port) {
+ case PORT_TP:
+ reg = QCA807X_MMD7_LED_CTRL(index);
+ mask = QCA808X_LED_PATTERN_MASK;
+ break;
+ case PORT_FIBRE:
+ /* HW control pattern bits are in LED FORCE reg */
+ reg = QCA807X_MMD7_LED_FORCE_CTRL(index);
+ mask = QCA807X_LED_FIBER_PATTERN_MASK;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return phy_clear_bits_mmd(phydev, MDIO_MMD_AN, reg, mask);
+}
+
+static int qca807x_led_brightness_set(struct phy_device *phydev,
+ u8 index, enum led_brightness value)
+{
+ u16 reg;
+ int ret;
+
+ if (index > 1)
+ return -EINVAL;
+
+ /* If we are setting off the LED reset any hw control rule */
+ if (!value) {
+ ret = qca807x_led_hw_control_reset(phydev, index);
+ if (ret)
+ return ret;
+ }
+
+ reg = QCA807X_MMD7_LED_FORCE_CTRL(index);
+ return qca808x_led_reg_brightness_set(phydev, reg, value);
+}
+
+static int qca807x_led_blink_set(struct phy_device *phydev, u8 index,
+ unsigned long *delay_on,
+ unsigned long *delay_off)
+{
+ u16 reg;
+
+ if (index > 1)
+ return -EINVAL;
+
+ reg = QCA807X_MMD7_LED_FORCE_CTRL(index);
+ return qca808x_led_reg_blink_set(phydev, reg, delay_on, delay_off);
+}
+
+#ifdef CONFIG_GPIOLIB
+static int qca807x_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
+{
+ return GPIO_LINE_DIRECTION_OUT;
+}
+
+static int qca807x_gpio_get(struct gpio_chip *gc, unsigned int offset)
+{
+ struct qca807x_gpio_priv *priv = gpiochip_get_data(gc);
+ u16 reg;
+ int val;
+
+ reg = QCA807X_MMD7_LED_FORCE_CTRL(offset);
+ val = phy_read_mmd(priv->phy, MDIO_MMD_AN, reg);
+
+ return FIELD_GET(QCA807X_GPIO_FORCE_MODE_MASK, val);
+}
+
+static void qca807x_gpio_set(struct gpio_chip *gc, unsigned int offset, int value)
+{
+ struct qca807x_gpio_priv *priv = gpiochip_get_data(gc);
+ u16 reg;
+ int val;
+
+ reg = QCA807X_MMD7_LED_FORCE_CTRL(offset);
+
+ val = phy_read_mmd(priv->phy, MDIO_MMD_AN, reg);
+ val &= ~QCA807X_GPIO_FORCE_MODE_MASK;
+ val |= QCA807X_GPIO_FORCE_EN;
+ val |= FIELD_PREP(QCA807X_GPIO_FORCE_MODE_MASK, value);
+
+ phy_write_mmd(priv->phy, MDIO_MMD_AN, reg, val);
+}
+
+static int qca807x_gpio_dir_out(struct gpio_chip *gc, unsigned int offset, int value)
+{
+ qca807x_gpio_set(gc, offset, value);
+
+ return 0;
+}
+
+static int qca807x_gpio(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ struct qca807x_gpio_priv *priv;
+ struct gpio_chip *gc;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->phy = phydev;
+
+ gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL);
+ if (!gc)
+ return -ENOMEM;
+
+ gc->label = dev_name(dev);
+ gc->base = -1;
+ gc->ngpio = 2;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
+ gc->can_sleep = true;
+ gc->get_direction = qca807x_gpio_get_direction;
+ gc->direction_output = qca807x_gpio_dir_out;
+ gc->get = qca807x_gpio_get;
+ gc->set = qca807x_gpio_set;
+
+ return devm_gpiochip_add_data(dev, gc, priv);
+}
+#endif
+
+static int qca807x_read_fiber_status(struct phy_device *phydev)
+{
+ bool changed;
+ int ss, err;
+
+ err = genphy_c37_read_status(phydev, &changed);
+ if (err || !changed)
+ return err;
+
+ /* Read the QCA807x PHY-Specific Status register fiber page,
+ * which indicates the speed and duplex that the PHY is actually
+ * using, irrespective of whether we are in autoneg mode or not.
+ */
+ ss = phy_read(phydev, AT803X_SPECIFIC_STATUS);
+ if (ss < 0)
+ return ss;
+
+ phydev->speed = SPEED_UNKNOWN;
+ phydev->duplex = DUPLEX_UNKNOWN;
+ if (ss & AT803X_SS_SPEED_DUPLEX_RESOLVED) {
+ switch (FIELD_GET(AT803X_SS_SPEED_MASK, ss)) {
+ case AT803X_SS_SPEED_100:
+ phydev->speed = SPEED_100;
+ break;
+ case AT803X_SS_SPEED_1000:
+ phydev->speed = SPEED_1000;
+ break;
+ }
+
+ if (ss & AT803X_SS_DUPLEX)
+ phydev->duplex = DUPLEX_FULL;
+ else
+ phydev->duplex = DUPLEX_HALF;
+ }
+
+ return 0;
+}
+
+static int qca807x_read_status(struct phy_device *phydev)
+{
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, phydev->supported)) {
+ switch (phydev->port) {
+ case PORT_FIBRE:
+ return qca807x_read_fiber_status(phydev);
+ case PORT_TP:
+ return at803x_read_status(phydev);
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return at803x_read_status(phydev);
+}
+
+static int qca807x_phy_package_probe_once(struct phy_device *phydev)
+{
+ struct phy_package_shared *shared = phydev->shared;
+ struct qca807x_shared_priv *priv = shared->priv;
+ unsigned int tx_drive_strength;
+ const char *package_mode_name;
+
+ /* Default to 600mw if not defined */
+ if (of_property_read_u32(shared->np, "qcom,tx-drive-strength-milliwatt",
+ &tx_drive_strength))
+ tx_drive_strength = 600;
+
+ switch (tx_drive_strength) {
+ case 140:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_140MV;
+ break;
+ case 160:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_160MV;
+ break;
+ case 180:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_180MV;
+ break;
+ case 200:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_200MV;
+ break;
+ case 220:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_220MV;
+ break;
+ case 240:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_240MV;
+ break;
+ case 260:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_260MV;
+ break;
+ case 280:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_280MV;
+ break;
+ case 300:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_300MV;
+ break;
+ case 320:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_320MV;
+ break;
+ case 400:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_400MV;
+ break;
+ case 500:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_500MV;
+ break;
+ case 600:
+ priv->tx_drive_strength = PQSGMII_TX_DRIVER_600MV;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ priv->package_mode = PHY_INTERFACE_MODE_NA;
+ if (!of_property_read_string(shared->np, "qcom,package-mode",
+ &package_mode_name)) {
+ if (!strcasecmp(package_mode_name,
+ phy_modes(PHY_INTERFACE_MODE_PSGMII)))
+ priv->package_mode = PHY_INTERFACE_MODE_PSGMII;
+ else if (!strcasecmp(package_mode_name,
+ phy_modes(PHY_INTERFACE_MODE_QSGMII)))
+ priv->package_mode = PHY_INTERFACE_MODE_QSGMII;
+ else
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int qca807x_phy_package_config_init_once(struct phy_device *phydev)
+{
+ struct phy_package_shared *shared = phydev->shared;
+ struct qca807x_shared_priv *priv = shared->priv;
+ int val, ret;
+
+ /* Make sure PHY follow PHY package mode if enforced */
+ if (priv->package_mode != PHY_INTERFACE_MODE_NA &&
+ phydev->interface != priv->package_mode)
+ return -EINVAL;
+
+ phy_lock_mdio_bus(phydev);
+
+ /* Set correct PHY package mode */
+ val = __phy_package_read(phydev, QCA807X_COMBO_ADDR,
+ QCA807X_CHIP_CONFIGURATION);
+ val &= ~QCA807X_CHIP_CONFIGURATION_MODE_CFG_MASK;
+ /* package_mode can be QSGMII or PSGMII and we validate
+ * this in probe_once.
+ * With package_mode to NA, we default to PSGMII.
+ */
+ switch (priv->package_mode) {
+ case PHY_INTERFACE_MODE_QSGMII:
+ val |= QCA807X_CHIP_CONFIGURATION_MODE_QSGMII_SGMII;
+ break;
+ case PHY_INTERFACE_MODE_PSGMII:
+ default:
+ val |= QCA807X_CHIP_CONFIGURATION_MODE_PSGMII_ALL_COPPER;
+ }
+ ret = __phy_package_write(phydev, QCA807X_COMBO_ADDR,
+ QCA807X_CHIP_CONFIGURATION, val);
+ if (ret)
+ goto exit;
+
+ /* After mode change Serdes reset is required */
+ val = __phy_package_read(phydev, QCA807X_PQSGMII_ADDR,
+ PQSGMII_CTRL_REG);
+ val &= ~PQSGMII_ANALOG_SW_RESET;
+ ret = __phy_package_write(phydev, QCA807X_PQSGMII_ADDR,
+ PQSGMII_CTRL_REG, val);
+ if (ret)
+ goto exit;
+
+ msleep(SERDES_RESET_SLEEP);
+
+ val = __phy_package_read(phydev, QCA807X_PQSGMII_ADDR,
+ PQSGMII_CTRL_REG);
+ val |= PQSGMII_ANALOG_SW_RESET;
+ ret = __phy_package_write(phydev, QCA807X_PQSGMII_ADDR,
+ PQSGMII_CTRL_REG, val);
+ if (ret)
+ goto exit;
+
+ /* Workaround to enable AZ transmitting ability */
+ val = __phy_package_read_mmd(phydev, QCA807X_PQSGMII_ADDR,
+ MDIO_MMD_PMAPMD, PQSGMII_MODE_CTRL);
+ val &= ~PQSGMII_MODE_CTRL_AZ_WORKAROUND_MASK;
+ ret = __phy_package_write_mmd(phydev, QCA807X_PQSGMII_ADDR,
+ MDIO_MMD_PMAPMD, PQSGMII_MODE_CTRL, val);
+ if (ret)
+ goto exit;
+
+ /* Set PQSGMII TX AMP strength */
+ val = __phy_package_read(phydev, QCA807X_PQSGMII_ADDR,
+ PQSGMII_DRIVE_CONTROL_1);
+ val &= ~PQSGMII_TX_DRIVER_MASK;
+ val |= FIELD_PREP(PQSGMII_TX_DRIVER_MASK, priv->tx_drive_strength);
+ ret = __phy_package_write(phydev, QCA807X_PQSGMII_ADDR,
+ PQSGMII_DRIVE_CONTROL_1, val);
+ if (ret)
+ goto exit;
+
+ /* Prevent PSGMII going into hibernation via PSGMII self test */
+ val = __phy_package_read_mmd(phydev, QCA807X_COMBO_ADDR,
+ MDIO_MMD_PCS, PQSGMII_MMD3_SERDES_CONTROL);
+ val &= ~BIT(1);
+ ret = __phy_package_write_mmd(phydev, QCA807X_COMBO_ADDR,
+ MDIO_MMD_PCS, PQSGMII_MMD3_SERDES_CONTROL, val);
+
+exit:
+ phy_unlock_mdio_bus(phydev);
+
+ return ret;
+}
+
+static int qca807x_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+ struct phy_device *phydev = upstream;
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(support) = { 0, };
+ phy_interface_t iface;
+ int ret;
+ DECLARE_PHY_INTERFACE_MASK(interfaces);
+
+ sfp_parse_support(phydev->sfp_bus, id, support, interfaces);
+ iface = sfp_select_interface(phydev->sfp_bus, support);
+
+ dev_info(&phydev->mdio.dev, "%s SFP module inserted\n", phy_modes(iface));
+
+ switch (iface) {
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_100BASEX:
+ /* Set PHY mode to PSGMII combo (1/4 copper + combo ports) mode */
+ ret = phy_modify(phydev,
+ QCA807X_CHIP_CONFIGURATION,
+ QCA807X_CHIP_CONFIGURATION_MODE_CFG_MASK,
+ QCA807X_CHIP_CONFIGURATION_MODE_PSGMII_FIBER);
+ /* Enable fiber mode autodection (1000Base-X or 100Base-FX) */
+ ret = phy_set_bits_mmd(phydev,
+ MDIO_MMD_AN,
+ QCA807X_MMD7_FIBER_MODE_AUTO_DETECTION,
+ QCA807X_MMD7_FIBER_MODE_AUTO_DETECTION_EN);
+ /* Select fiber page */
+ ret = phy_clear_bits(phydev,
+ QCA807X_CHIP_CONFIGURATION,
+ QCA807X_BT_BX_REG_SEL);
+
+ phydev->port = PORT_FIBRE;
+ break;
+ default:
+ dev_err(&phydev->mdio.dev, "Incompatible SFP module inserted\n");
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static void qca807x_sfp_remove(void *upstream)
+{
+ struct phy_device *phydev = upstream;
+
+ /* Select copper page */
+ phy_set_bits(phydev,
+ QCA807X_CHIP_CONFIGURATION,
+ QCA807X_BT_BX_REG_SEL);
+
+ phydev->port = PORT_TP;
+}
+
+static const struct sfp_upstream_ops qca807x_sfp_ops = {
+ .attach = phy_sfp_attach,
+ .detach = phy_sfp_detach,
+ .module_insert = qca807x_sfp_insert,
+ .module_remove = qca807x_sfp_remove,
+};
+
+static int qca807x_probe(struct phy_device *phydev)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ struct qca807x_shared_priv *shared_priv;
+ struct device *dev = &phydev->mdio.dev;
+ struct phy_package_shared *shared;
+ struct qca807x_priv *priv;
+ int ret;
+
+ ret = devm_of_phy_package_join(dev, phydev, sizeof(*shared_priv));
+ if (ret)
+ return ret;
+
+ if (phy_package_probe_once(phydev)) {
+ ret = qca807x_phy_package_probe_once(phydev);
+ if (ret)
+ return ret;
+ }
+
+ shared = phydev->shared;
+ shared_priv = shared->priv;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->dac_full_amplitude = of_property_read_bool(node, "qcom,dac-full-amplitude");
+ priv->dac_full_bias_current = of_property_read_bool(node, "qcom,dac-full-bias-current");
+ priv->dac_disable_bias_current_tweak = of_property_read_bool(node,
+ "qcom,dac-disable-bias-current-tweak");
+
+#if IS_ENABLED(CONFIG_GPIOLIB)
+ /* Make sure we don't have mixed leds node and gpio-controller
+ * to prevent registering leds and having gpio-controller usage
+ * conflicting with them.
+ */
+ if (of_find_property(node, "leds", NULL) &&
+ of_find_property(node, "gpio-controller", NULL)) {
+ phydev_err(phydev, "Invalid property detected. LEDs and gpio-controller are mutually exclusive.");
+ return -EINVAL;
+ }
+
+ /* Do not register a GPIO controller unless flagged for it */
+ if (of_property_read_bool(node, "gpio-controller")) {
+ ret = qca807x_gpio(phydev);
+ if (ret)
+ return ret;
+ }
+#endif
+
+ /* Attach SFP bus on combo port*/
+ if (phy_read(phydev, QCA807X_CHIP_CONFIGURATION)) {
+ ret = phy_sfp_probe(phydev, &qca807x_sfp_ops);
+ if (ret)
+ return ret;
+ linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, phydev->supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, phydev->advertising);
+ }
+
+ phydev->priv = priv;
+
+ return 0;
+}
+
+static int qca807x_config_init(struct phy_device *phydev)
+{
+ struct qca807x_priv *priv = phydev->priv;
+ u16 control_dac;
+ int ret;
+
+ if (phy_package_init_once(phydev)) {
+ ret = qca807x_phy_package_config_init_once(phydev);
+ if (ret)
+ return ret;
+ }
+
+ control_dac = phy_read_mmd(phydev, MDIO_MMD_AN,
+ QCA807X_MMD7_1000BASE_T_POWER_SAVE_PER_CABLE_LENGTH);
+ control_dac &= ~QCA807X_CONTROL_DAC_MASK;
+ if (!priv->dac_full_amplitude)
+ control_dac |= QCA807X_CONTROL_DAC_DSP_AMPLITUDE;
+ if (!priv->dac_full_amplitude)
+ control_dac |= QCA807X_CONTROL_DAC_DSP_BIAS_CURRENT;
+ if (!priv->dac_disable_bias_current_tweak)
+ control_dac |= QCA807X_CONTROL_DAC_BIAS_CURRENT_TWEAK;
+ return phy_write_mmd(phydev, MDIO_MMD_AN,
+ QCA807X_MMD7_1000BASE_T_POWER_SAVE_PER_CABLE_LENGTH,
+ control_dac);
+}
+
+static struct phy_driver qca807x_drivers[] = {
+ {
+ PHY_ID_MATCH_EXACT(PHY_ID_QCA8072),
+ .name = "Qualcomm QCA8072",
+ .flags = PHY_POLL_CABLE_TEST,
+ /* PHY_GBIT_FEATURES */
+ .probe = qca807x_probe,
+ .config_init = qca807x_config_init,
+ .read_status = qca807x_read_status,
+ .config_intr = at803x_config_intr,
+ .handle_interrupt = at803x_handle_interrupt,
+ .soft_reset = genphy_soft_reset,
+ .get_tunable = at803x_get_tunable,
+ .set_tunable = at803x_set_tunable,
+ .resume = genphy_resume,
+ .suspend = genphy_suspend,
+ .cable_test_start = qca807x_cable_test_start,
+ .cable_test_get_status = qca808x_cable_test_get_status,
+ },
+ {
+ PHY_ID_MATCH_EXACT(PHY_ID_QCA8075),
+ .name = "Qualcomm QCA8075",
+ .flags = PHY_POLL_CABLE_TEST,
+ /* PHY_GBIT_FEATURES */
+ .probe = qca807x_probe,
+ .config_init = qca807x_config_init,
+ .read_status = qca807x_read_status,
+ .config_intr = at803x_config_intr,
+ .handle_interrupt = at803x_handle_interrupt,
+ .soft_reset = genphy_soft_reset,
+ .get_tunable = at803x_get_tunable,
+ .set_tunable = at803x_set_tunable,
+ .resume = genphy_resume,
+ .suspend = genphy_suspend,
+ .cable_test_start = qca807x_cable_test_start,
+ .cable_test_get_status = qca808x_cable_test_get_status,
+ .led_brightness_set = qca807x_led_brightness_set,
+ .led_blink_set = qca807x_led_blink_set,
+ .led_hw_is_supported = qca807x_led_hw_is_supported,
+ .led_hw_control_set = qca807x_led_hw_control_set,
+ .led_hw_control_get = qca807x_led_hw_control_get,
+ },
+};
+module_phy_driver(qca807x_drivers);
+
+static struct mdio_device_id __maybe_unused qca807x_tbl[] = {
+ { PHY_ID_MATCH_EXACT(PHY_ID_QCA8072) },
+ { PHY_ID_MATCH_EXACT(PHY_ID_QCA8075) },
+ { }
+};
+
+MODULE_AUTHOR("Robert Marko <robert.marko@sartura.hr>");
+MODULE_AUTHOR("Christian Marangi <ansuelsmth@gmail.com>");
+MODULE_DESCRIPTION("Qualcomm QCA807x PHY driver");
+MODULE_DEVICE_TABLE(mdio, qca807x_tbl);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/qcom/qca808x.c b/drivers/net/phy/qcom/qca808x.c
new file mode 100644
index 000000000000..5048304ccc9e
--- /dev/null
+++ b/drivers/net/phy/qcom/qca808x.c
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/phy.h>
+#include <linux/module.h>
+
+#include "qcom.h"
+
+/* ADC threshold */
+#define QCA808X_PHY_DEBUG_ADC_THRESHOLD 0x2c80
+#define QCA808X_ADC_THRESHOLD_MASK GENMASK(7, 0)
+#define QCA808X_ADC_THRESHOLD_80MV 0
+#define QCA808X_ADC_THRESHOLD_100MV 0xf0
+#define QCA808X_ADC_THRESHOLD_200MV 0x0f
+#define QCA808X_ADC_THRESHOLD_300MV 0xff
+
+/* CLD control */
+#define QCA808X_PHY_MMD3_ADDR_CLD_CTRL7 0x8007
+#define QCA808X_8023AZ_AFE_CTRL_MASK GENMASK(8, 4)
+#define QCA808X_8023AZ_AFE_EN 0x90
+
+/* AZ control */
+#define QCA808X_PHY_MMD3_AZ_TRAINING_CTRL 0x8008
+#define QCA808X_MMD3_AZ_TRAINING_VAL 0x1c32
+
+#define QCA808X_PHY_MMD1_MSE_THRESHOLD_20DB 0x8014
+#define QCA808X_MSE_THRESHOLD_20DB_VALUE 0x529
+
+#define QCA808X_PHY_MMD1_MSE_THRESHOLD_17DB 0x800E
+#define QCA808X_MSE_THRESHOLD_17DB_VALUE 0x341
+
+#define QCA808X_PHY_MMD1_MSE_THRESHOLD_27DB 0x801E
+#define QCA808X_MSE_THRESHOLD_27DB_VALUE 0x419
+
+#define QCA808X_PHY_MMD1_MSE_THRESHOLD_28DB 0x8020
+#define QCA808X_MSE_THRESHOLD_28DB_VALUE 0x341
+
+#define QCA808X_PHY_MMD7_TOP_OPTION1 0x901c
+#define QCA808X_TOP_OPTION1_DATA 0x0
+
+#define QCA808X_PHY_MMD3_DEBUG_1 0xa100
+#define QCA808X_MMD3_DEBUG_1_VALUE 0x9203
+#define QCA808X_PHY_MMD3_DEBUG_2 0xa101
+#define QCA808X_MMD3_DEBUG_2_VALUE 0x48ad
+#define QCA808X_PHY_MMD3_DEBUG_3 0xa103
+#define QCA808X_MMD3_DEBUG_3_VALUE 0x1698
+#define QCA808X_PHY_MMD3_DEBUG_4 0xa105
+#define QCA808X_MMD3_DEBUG_4_VALUE 0x8001
+#define QCA808X_PHY_MMD3_DEBUG_5 0xa106
+#define QCA808X_MMD3_DEBUG_5_VALUE 0x1111
+#define QCA808X_PHY_MMD3_DEBUG_6 0xa011
+#define QCA808X_MMD3_DEBUG_6_VALUE 0x5f85
+
+/* master/slave seed config */
+#define QCA808X_PHY_DEBUG_LOCAL_SEED 9
+#define QCA808X_MASTER_SLAVE_SEED_ENABLE BIT(1)
+#define QCA808X_MASTER_SLAVE_SEED_CFG GENMASK(12, 2)
+#define QCA808X_MASTER_SLAVE_SEED_RANGE 0x32
+
+/* Hibernation yields lower power consumpiton in contrast with normal operation mode.
+ * when the copper cable is unplugged, the PHY enters into hibernation mode in about 10s.
+ */
+#define QCA808X_DBG_AN_TEST 0xb
+#define QCA808X_HIBERNATION_EN BIT(15)
+
+#define QCA808X_MMD7_LED2_CTRL 0x8074
+#define QCA808X_MMD7_LED2_FORCE_CTRL 0x8075
+#define QCA808X_MMD7_LED1_CTRL 0x8076
+#define QCA808X_MMD7_LED1_FORCE_CTRL 0x8077
+#define QCA808X_MMD7_LED0_CTRL 0x8078
+#define QCA808X_MMD7_LED_CTRL(x) (0x8078 - ((x) * 2))
+
+#define QCA808X_MMD7_LED0_FORCE_CTRL 0x8079
+#define QCA808X_MMD7_LED_FORCE_CTRL(x) (0x8079 - ((x) * 2))
+
+#define QCA808X_MMD7_LED_POLARITY_CTRL 0x901a
+/* QSDK sets by default 0x46 to this reg that sets BIT 6 for
+ * LED to active high. It's not clear what BIT 3 and BIT 4 does.
+ */
+#define QCA808X_LED_ACTIVE_HIGH BIT(6)
+
+/* QCA808X 1G chip type */
+#define QCA808X_PHY_MMD7_CHIP_TYPE 0x901d
+#define QCA808X_PHY_CHIP_TYPE_1G BIT(0)
+
+#define QCA8081_PHY_SERDES_MMD1_FIFO_CTRL 0x9072
+#define QCA8081_PHY_FIFO_RSTN BIT(11)
+
+#define QCA8081_PHY_ID 0x004dd101
+
+MODULE_DESCRIPTION("Qualcomm Atheros QCA808X PHY driver");
+MODULE_AUTHOR("Matus Ujhelyi");
+MODULE_LICENSE("GPL");
+
+struct qca808x_priv {
+ int led_polarity_mode;
+};
+
+static int qca808x_phy_fast_retrain_config(struct phy_device *phydev)
+{
+ int ret;
+
+ /* Enable fast retrain */
+ ret = genphy_c45_fast_retrain(phydev, true);
+ if (ret)
+ return ret;
+
+ phy_write_mmd(phydev, MDIO_MMD_AN, QCA808X_PHY_MMD7_TOP_OPTION1,
+ QCA808X_TOP_OPTION1_DATA);
+ phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_20DB,
+ QCA808X_MSE_THRESHOLD_20DB_VALUE);
+ phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_17DB,
+ QCA808X_MSE_THRESHOLD_17DB_VALUE);
+ phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_27DB,
+ QCA808X_MSE_THRESHOLD_27DB_VALUE);
+ phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_28DB,
+ QCA808X_MSE_THRESHOLD_28DB_VALUE);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_1,
+ QCA808X_MMD3_DEBUG_1_VALUE);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_4,
+ QCA808X_MMD3_DEBUG_4_VALUE);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_5,
+ QCA808X_MMD3_DEBUG_5_VALUE);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_3,
+ QCA808X_MMD3_DEBUG_3_VALUE);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_6,
+ QCA808X_MMD3_DEBUG_6_VALUE);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_2,
+ QCA808X_MMD3_DEBUG_2_VALUE);
+
+ return 0;
+}
+
+static int qca808x_phy_ms_seed_enable(struct phy_device *phydev, bool enable)
+{
+ u16 seed_value;
+
+ if (!enable)
+ return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_LOCAL_SEED,
+ QCA808X_MASTER_SLAVE_SEED_ENABLE, 0);
+
+ seed_value = get_random_u32_below(QCA808X_MASTER_SLAVE_SEED_RANGE);
+ return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_LOCAL_SEED,
+ QCA808X_MASTER_SLAVE_SEED_CFG | QCA808X_MASTER_SLAVE_SEED_ENABLE,
+ FIELD_PREP(QCA808X_MASTER_SLAVE_SEED_CFG, seed_value) |
+ QCA808X_MASTER_SLAVE_SEED_ENABLE);
+}
+
+static bool qca808x_is_prefer_master(struct phy_device *phydev)
+{
+ return (phydev->master_slave_get == MASTER_SLAVE_CFG_MASTER_FORCE) ||
+ (phydev->master_slave_get == MASTER_SLAVE_CFG_MASTER_PREFERRED);
+}
+
+static bool qca808x_has_fast_retrain_or_slave_seed(struct phy_device *phydev)
+{
+ return linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->supported);
+}
+
+static bool qca808x_is_1g_only(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_AN, QCA808X_PHY_MMD7_CHIP_TYPE);
+ if (ret < 0)
+ return true;
+
+ return !!(QCA808X_PHY_CHIP_TYPE_1G & ret);
+}
+
+static void qca808x_fill_possible_interfaces(struct phy_device *phydev)
+{
+ unsigned long *possible = phydev->possible_interfaces;
+
+ __set_bit(PHY_INTERFACE_MODE_SGMII, possible);
+
+ if (!qca808x_is_1g_only(phydev))
+ __set_bit(PHY_INTERFACE_MODE_2500BASEX, possible);
+}
+
+static int qca808x_probe(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ struct qca808x_priv *priv;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ /* Init LED polarity mode to -1 */
+ priv->led_polarity_mode = -1;
+
+ phydev->priv = priv;
+
+ return 0;
+}
+
+static int qca808x_config_init(struct phy_device *phydev)
+{
+ struct qca808x_priv *priv = phydev->priv;
+ int ret;
+
+ /* Default to LED Active High if active-low not in DT */
+ if (priv->led_polarity_mode == -1) {
+ ret = phy_set_bits_mmd(phydev, MDIO_MMD_AN,
+ QCA808X_MMD7_LED_POLARITY_CTRL,
+ QCA808X_LED_ACTIVE_HIGH);
+ if (ret)
+ return ret;
+ }
+
+ /* Active adc&vga on 802.3az for the link 1000M and 100M */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_ADDR_CLD_CTRL7,
+ QCA808X_8023AZ_AFE_CTRL_MASK, QCA808X_8023AZ_AFE_EN);
+ if (ret)
+ return ret;
+
+ /* Adjust the threshold on 802.3az for the link 1000M */
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS,
+ QCA808X_PHY_MMD3_AZ_TRAINING_CTRL,
+ QCA808X_MMD3_AZ_TRAINING_VAL);
+ if (ret)
+ return ret;
+
+ if (qca808x_has_fast_retrain_or_slave_seed(phydev)) {
+ /* Config the fast retrain for the link 2500M */
+ ret = qca808x_phy_fast_retrain_config(phydev);
+ if (ret)
+ return ret;
+
+ ret = genphy_read_master_slave(phydev);
+ if (ret < 0)
+ return ret;
+
+ if (!qca808x_is_prefer_master(phydev)) {
+ /* Enable seed and configure lower ramdom seed to make phy
+ * linked as slave mode.
+ */
+ ret = qca808x_phy_ms_seed_enable(phydev, true);
+ if (ret)
+ return ret;
+ }
+ }
+
+ qca808x_fill_possible_interfaces(phydev);
+
+ /* Configure adc threshold as 100mv for the link 10M */
+ return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_ADC_THRESHOLD,
+ QCA808X_ADC_THRESHOLD_MASK,
+ QCA808X_ADC_THRESHOLD_100MV);
+}
+
+static int qca808x_read_status(struct phy_device *phydev)
+{
+ struct at803x_ss_mask ss_mask = { 0 };
+ int ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
+ if (ret < 0)
+ return ret;
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->lp_advertising,
+ ret & MDIO_AN_10GBT_STAT_LP2_5G);
+
+ ret = genphy_read_status(phydev);
+ if (ret)
+ return ret;
+
+ /* qca8081 takes the different bits for speed value from at803x */
+ ss_mask.speed_mask = QCA808X_SS_SPEED_MASK;
+ ss_mask.speed_shift = __bf_shf(QCA808X_SS_SPEED_MASK);
+ ret = at803x_read_specific_status(phydev, ss_mask);
+ if (ret < 0)
+ return ret;
+
+ if (phydev->link) {
+ if (phydev->speed == SPEED_2500)
+ phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
+ else
+ phydev->interface = PHY_INTERFACE_MODE_SGMII;
+ } else {
+ /* generate seed as a lower random value to make PHY linked as SLAVE easily,
+ * except for master/slave configuration fault detected or the master mode
+ * preferred.
+ *
+ * the reason for not putting this code into the function link_change_notify is
+ * the corner case where the link partner is also the qca8081 PHY and the seed
+ * value is configured as the same value, the link can't be up and no link change
+ * occurs.
+ */
+ if (qca808x_has_fast_retrain_or_slave_seed(phydev)) {
+ if (phydev->master_slave_state == MASTER_SLAVE_STATE_ERR ||
+ qca808x_is_prefer_master(phydev)) {
+ qca808x_phy_ms_seed_enable(phydev, false);
+ } else {
+ qca808x_phy_ms_seed_enable(phydev, true);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int qca808x_soft_reset(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = genphy_soft_reset(phydev);
+ if (ret < 0)
+ return ret;
+
+ if (qca808x_has_fast_retrain_or_slave_seed(phydev))
+ ret = qca808x_phy_ms_seed_enable(phydev, true);
+
+ return ret;
+}
+
+static int qca808x_cable_test_start(struct phy_device *phydev)
+{
+ int ret;
+
+ /* perform CDT with the following configs:
+ * 1. disable hibernation.
+ * 2. force PHY working in MDI mode.
+ * 3. for PHY working in 1000BaseT.
+ * 4. configure the threshold.
+ */
+
+ ret = at803x_debug_reg_mask(phydev, QCA808X_DBG_AN_TEST, QCA808X_HIBERNATION_EN, 0);
+ if (ret < 0)
+ return ret;
+
+ ret = at803x_config_mdix(phydev, ETH_TP_MDI);
+ if (ret < 0)
+ return ret;
+
+ /* Force 1000base-T needs to configure PMA/PMD and MII_BMCR */
+ phydev->duplex = DUPLEX_FULL;
+ phydev->speed = SPEED_1000;
+ ret = genphy_c45_pma_setup_forced(phydev);
+ if (ret < 0)
+ return ret;
+
+ ret = genphy_setup_forced(phydev);
+ if (ret < 0)
+ return ret;
+
+ /* configure the thresholds for open, short, pair ok test */
+ phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8074, 0xc040);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8076, 0xc040);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8077, 0xa060);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8078, 0xc050);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, 0x807a, 0xc060);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, 0x807e, 0xb060);
+
+ return 0;
+}
+
+static int qca808x_get_features(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = genphy_c45_pma_read_abilities(phydev);
+ if (ret)
+ return ret;
+
+ /* The autoneg ability is not existed in bit3 of MMD7.1,
+ * but it is supported by qca808x PHY, so we add it here
+ * manually.
+ */
+ linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->supported);
+
+ /* As for the qca8081 1G version chip, the 2500baseT ability is also
+ * existed in the bit0 of MMD1.21, we need to remove it manually if
+ * it is the qca8081 1G chip according to the bit0 of MMD7.0x901d.
+ */
+ if (qca808x_is_1g_only(phydev))
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->supported);
+
+ return 0;
+}
+
+static int qca808x_config_aneg(struct phy_device *phydev)
+{
+ int phy_ctrl = 0;
+ int ret;
+
+ ret = at803x_prepare_config_aneg(phydev);
+ if (ret)
+ return ret;
+
+ /* The reg MII_BMCR also needs to be configured for force mode, the
+ * genphy_config_aneg is also needed.
+ */
+ if (phydev->autoneg == AUTONEG_DISABLE)
+ genphy_c45_pma_setup_forced(phydev);
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->advertising))
+ phy_ctrl = MDIO_AN_10GBT_CTRL_ADV2_5G;
+
+ ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
+ MDIO_AN_10GBT_CTRL_ADV2_5G, phy_ctrl);
+ if (ret < 0)
+ return ret;
+
+ return __genphy_config_aneg(phydev, ret);
+}
+
+static void qca808x_link_change_notify(struct phy_device *phydev)
+{
+ /* Assert interface sgmii fifo on link down, deassert it on link up,
+ * the interface device address is always phy address added by 1.
+ */
+ mdiobus_c45_modify_changed(phydev->mdio.bus, phydev->mdio.addr + 1,
+ MDIO_MMD_PMAPMD, QCA8081_PHY_SERDES_MMD1_FIFO_CTRL,
+ QCA8081_PHY_FIFO_RSTN,
+ phydev->link ? QCA8081_PHY_FIFO_RSTN : 0);
+}
+
+static int qca808x_led_parse_netdev(struct phy_device *phydev, unsigned long rules,
+ u16 *offload_trigger)
+{
+ /* Parsing specific to netdev trigger */
+ if (test_bit(TRIGGER_NETDEV_TX, &rules))
+ *offload_trigger |= QCA808X_LED_TX_BLINK;
+ if (test_bit(TRIGGER_NETDEV_RX, &rules))
+ *offload_trigger |= QCA808X_LED_RX_BLINK;
+ if (test_bit(TRIGGER_NETDEV_LINK_10, &rules))
+ *offload_trigger |= QCA808X_LED_SPEED10_ON;
+ if (test_bit(TRIGGER_NETDEV_LINK_100, &rules))
+ *offload_trigger |= QCA808X_LED_SPEED100_ON;
+ if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules))
+ *offload_trigger |= QCA808X_LED_SPEED1000_ON;
+ if (test_bit(TRIGGER_NETDEV_LINK_2500, &rules))
+ *offload_trigger |= QCA808X_LED_SPEED2500_ON;
+ if (test_bit(TRIGGER_NETDEV_HALF_DUPLEX, &rules))
+ *offload_trigger |= QCA808X_LED_HALF_DUPLEX_ON;
+ if (test_bit(TRIGGER_NETDEV_FULL_DUPLEX, &rules))
+ *offload_trigger |= QCA808X_LED_FULL_DUPLEX_ON;
+
+ if (rules && !*offload_trigger)
+ return -EOPNOTSUPP;
+
+ /* Enable BLINK_CHECK_BYPASS by default to make the LED
+ * blink even with duplex or speed mode not enabled.
+ */
+ *offload_trigger |= QCA808X_LED_BLINK_CHECK_BYPASS;
+
+ return 0;
+}
+
+static int qca808x_led_hw_control_enable(struct phy_device *phydev, u8 index)
+{
+ u16 reg;
+
+ if (index > 2)
+ return -EINVAL;
+
+ reg = QCA808X_MMD7_LED_FORCE_CTRL(index);
+ return qca808x_led_reg_hw_control_enable(phydev, reg);
+}
+
+static int qca808x_led_hw_is_supported(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ u16 offload_trigger = 0;
+
+ if (index > 2)
+ return -EINVAL;
+
+ return qca808x_led_parse_netdev(phydev, rules, &offload_trigger);
+}
+
+static int qca808x_led_hw_control_set(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ u16 reg, offload_trigger = 0;
+ int ret;
+
+ if (index > 2)
+ return -EINVAL;
+
+ reg = QCA808X_MMD7_LED_CTRL(index);
+
+ ret = qca808x_led_parse_netdev(phydev, rules, &offload_trigger);
+ if (ret)
+ return ret;
+
+ ret = qca808x_led_hw_control_enable(phydev, index);
+ if (ret)
+ return ret;
+
+ return phy_modify_mmd(phydev, MDIO_MMD_AN, reg,
+ QCA808X_LED_PATTERN_MASK,
+ offload_trigger);
+}
+
+static bool qca808x_led_hw_control_status(struct phy_device *phydev, u8 index)
+{
+ u16 reg;
+
+ if (index > 2)
+ return false;
+
+ reg = QCA808X_MMD7_LED_FORCE_CTRL(index);
+ return qca808x_led_reg_hw_control_status(phydev, reg);
+}
+
+static int qca808x_led_hw_control_get(struct phy_device *phydev, u8 index,
+ unsigned long *rules)
+{
+ u16 reg;
+ int val;
+
+ if (index > 2)
+ return -EINVAL;
+
+ /* Check if we have hw control enabled */
+ if (qca808x_led_hw_control_status(phydev, index))
+ return -EINVAL;
+
+ reg = QCA808X_MMD7_LED_CTRL(index);
+
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, reg);
+ if (val & QCA808X_LED_TX_BLINK)
+ set_bit(TRIGGER_NETDEV_TX, rules);
+ if (val & QCA808X_LED_RX_BLINK)
+ set_bit(TRIGGER_NETDEV_RX, rules);
+ if (val & QCA808X_LED_SPEED10_ON)
+ set_bit(TRIGGER_NETDEV_LINK_10, rules);
+ if (val & QCA808X_LED_SPEED100_ON)
+ set_bit(TRIGGER_NETDEV_LINK_100, rules);
+ if (val & QCA808X_LED_SPEED1000_ON)
+ set_bit(TRIGGER_NETDEV_LINK_1000, rules);
+ if (val & QCA808X_LED_SPEED2500_ON)
+ set_bit(TRIGGER_NETDEV_LINK_2500, rules);
+ if (val & QCA808X_LED_HALF_DUPLEX_ON)
+ set_bit(TRIGGER_NETDEV_HALF_DUPLEX, rules);
+ if (val & QCA808X_LED_FULL_DUPLEX_ON)
+ set_bit(TRIGGER_NETDEV_FULL_DUPLEX, rules);
+
+ return 0;
+}
+
+static int qca808x_led_hw_control_reset(struct phy_device *phydev, u8 index)
+{
+ u16 reg;
+
+ if (index > 2)
+ return -EINVAL;
+
+ reg = QCA808X_MMD7_LED_CTRL(index);
+
+ return phy_clear_bits_mmd(phydev, MDIO_MMD_AN, reg,
+ QCA808X_LED_PATTERN_MASK);
+}
+
+static int qca808x_led_brightness_set(struct phy_device *phydev,
+ u8 index, enum led_brightness value)
+{
+ u16 reg;
+ int ret;
+
+ if (index > 2)
+ return -EINVAL;
+
+ if (!value) {
+ ret = qca808x_led_hw_control_reset(phydev, index);
+ if (ret)
+ return ret;
+ }
+
+ reg = QCA808X_MMD7_LED_FORCE_CTRL(index);
+ return qca808x_led_reg_brightness_set(phydev, reg, value);
+}
+
+static int qca808x_led_blink_set(struct phy_device *phydev, u8 index,
+ unsigned long *delay_on,
+ unsigned long *delay_off)
+{
+ u16 reg;
+
+ if (index > 2)
+ return -EINVAL;
+
+ reg = QCA808X_MMD7_LED_FORCE_CTRL(index);
+ return qca808x_led_reg_blink_set(phydev, reg, delay_on, delay_off);
+}
+
+static int qca808x_led_polarity_set(struct phy_device *phydev, int index,
+ unsigned long modes)
+{
+ struct qca808x_priv *priv = phydev->priv;
+ bool active_low = false;
+ u32 mode;
+
+ for_each_set_bit(mode, &modes, __PHY_LED_MODES_NUM) {
+ switch (mode) {
+ case PHY_LED_ACTIVE_LOW:
+ active_low = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ /* PHY polarity is global and can't be set per LED.
+ * To detect this, check if last requested polarity mode
+ * match the new one.
+ */
+ if (priv->led_polarity_mode >= 0 &&
+ priv->led_polarity_mode != active_low) {
+ phydev_err(phydev, "PHY polarity is global. Mismatched polarity on different LED\n");
+ return -EINVAL;
+ }
+
+ /* Save the last PHY polarity mode */
+ priv->led_polarity_mode = active_low;
+
+ return phy_modify_mmd(phydev, MDIO_MMD_AN,
+ QCA808X_MMD7_LED_POLARITY_CTRL,
+ QCA808X_LED_ACTIVE_HIGH,
+ active_low ? 0 : QCA808X_LED_ACTIVE_HIGH);
+}
+
+static struct phy_driver qca808x_driver[] = {
+{
+ /* Qualcomm QCA8081 */
+ PHY_ID_MATCH_EXACT(QCA8081_PHY_ID),
+ .name = "Qualcomm QCA8081",
+ .flags = PHY_POLL_CABLE_TEST,
+ .probe = qca808x_probe,
+ .config_intr = at803x_config_intr,
+ .handle_interrupt = at803x_handle_interrupt,
+ .get_tunable = at803x_get_tunable,
+ .set_tunable = at803x_set_tunable,
+ .set_wol = at803x_set_wol,
+ .get_wol = at803x_get_wol,
+ .get_features = qca808x_get_features,
+ .config_aneg = qca808x_config_aneg,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .read_status = qca808x_read_status,
+ .config_init = qca808x_config_init,
+ .soft_reset = qca808x_soft_reset,
+ .cable_test_start = qca808x_cable_test_start,
+ .cable_test_get_status = qca808x_cable_test_get_status,
+ .link_change_notify = qca808x_link_change_notify,
+ .led_brightness_set = qca808x_led_brightness_set,
+ .led_blink_set = qca808x_led_blink_set,
+ .led_hw_is_supported = qca808x_led_hw_is_supported,
+ .led_hw_control_set = qca808x_led_hw_control_set,
+ .led_hw_control_get = qca808x_led_hw_control_get,
+ .led_polarity_set = qca808x_led_polarity_set,
+}, };
+
+module_phy_driver(qca808x_driver);
+
+static struct mdio_device_id __maybe_unused qca808x_tbl[] = {
+ { PHY_ID_MATCH_EXACT(QCA8081_PHY_ID) },
+ { }
+};
+
+MODULE_DEVICE_TABLE(mdio, qca808x_tbl);
diff --git a/drivers/net/phy/qcom/qca83xx.c b/drivers/net/phy/qcom/qca83xx.c
new file mode 100644
index 000000000000..5d083ef0250e
--- /dev/null
+++ b/drivers/net/phy/qcom/qca83xx.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/phy.h>
+#include <linux/module.h>
+
+#include "qcom.h"
+
+#define AT803X_DEBUG_REG_3C 0x3C
+
+#define AT803X_DEBUG_REG_GREEN 0x3D
+#define AT803X_DEBUG_GATE_CLK_IN1000 BIT(6)
+
+#define MDIO_AZ_DEBUG 0x800D
+
+#define QCA8327_A_PHY_ID 0x004dd033
+#define QCA8327_B_PHY_ID 0x004dd034
+#define QCA8337_PHY_ID 0x004dd036
+#define QCA8K_PHY_ID_MASK 0xffffffff
+
+#define QCA8K_DEVFLAGS_REVISION_MASK GENMASK(2, 0)
+
+static struct at803x_hw_stat qca83xx_hw_stats[] = {
+ { "phy_idle_errors", 0xa, GENMASK(7, 0), PHY},
+ { "phy_receive_errors", 0x15, GENMASK(15, 0), PHY},
+ { "eee_wake_errors", 0x16, GENMASK(15, 0), MMD},
+};
+
+struct qca83xx_priv {
+ u64 stats[ARRAY_SIZE(qca83xx_hw_stats)];
+};
+
+MODULE_DESCRIPTION("Qualcomm Atheros QCA83XX PHY driver");
+MODULE_AUTHOR("Matus Ujhelyi");
+MODULE_AUTHOR("Christian Marangi <ansuelsmth@gmail.com>");
+MODULE_LICENSE("GPL");
+
+static int qca83xx_get_sset_count(struct phy_device *phydev)
+{
+ return ARRAY_SIZE(qca83xx_hw_stats);
+}
+
+static void qca83xx_get_strings(struct phy_device *phydev, u8 *data)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(qca83xx_hw_stats); i++) {
+ strscpy(data + i * ETH_GSTRING_LEN,
+ qca83xx_hw_stats[i].string, ETH_GSTRING_LEN);
+ }
+}
+
+static u64 qca83xx_get_stat(struct phy_device *phydev, int i)
+{
+ struct at803x_hw_stat stat = qca83xx_hw_stats[i];
+ struct qca83xx_priv *priv = phydev->priv;
+ int val;
+ u64 ret;
+
+ if (stat.access_type == MMD)
+ val = phy_read_mmd(phydev, MDIO_MMD_PCS, stat.reg);
+ else
+ val = phy_read(phydev, stat.reg);
+
+ if (val < 0) {
+ ret = U64_MAX;
+ } else {
+ val = val & stat.mask;
+ priv->stats[i] += val;
+ ret = priv->stats[i];
+ }
+
+ return ret;
+}
+
+static void qca83xx_get_stats(struct phy_device *phydev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(qca83xx_hw_stats); i++)
+ data[i] = qca83xx_get_stat(phydev, i);
+}
+
+static int qca83xx_probe(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ struct qca83xx_priv *priv;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ phydev->priv = priv;
+
+ return 0;
+}
+
+static int qca83xx_config_init(struct phy_device *phydev)
+{
+ u8 switch_revision;
+
+ switch_revision = phydev->dev_flags & QCA8K_DEVFLAGS_REVISION_MASK;
+
+ switch (switch_revision) {
+ case 1:
+ /* For 100M waveform */
+ at803x_debug_reg_write(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL, 0x02ea);
+ /* Turn on Gigabit clock */
+ at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_GREEN, 0x68a0);
+ break;
+
+ case 2:
+ phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, 0x0);
+ fallthrough;
+ case 4:
+ phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_AZ_DEBUG, 0x803f);
+ at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_GREEN, 0x6860);
+ at803x_debug_reg_write(phydev, AT803X_DEBUG_SYSTEM_CTRL_MODE, 0x2c46);
+ at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_3C, 0x6000);
+ break;
+ }
+
+ /* Following original QCA sourcecode set port to prefer master */
+ phy_set_bits(phydev, MII_CTRL1000, CTL1000_PREFER_MASTER);
+
+ return 0;
+}
+
+static int qca8327_config_init(struct phy_device *phydev)
+{
+ /* QCA8327 require DAC amplitude adjustment for 100m set to +6%.
+ * Disable on init and enable only with 100m speed following
+ * qca original source code.
+ */
+ at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL,
+ QCA8327_DEBUG_MANU_CTRL_EN, 0);
+
+ return qca83xx_config_init(phydev);
+}
+
+static void qca83xx_link_change_notify(struct phy_device *phydev)
+{
+ /* Set DAC Amplitude adjustment to +6% for 100m on link running */
+ if (phydev->state == PHY_RUNNING) {
+ if (phydev->speed == SPEED_100)
+ at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL,
+ QCA8327_DEBUG_MANU_CTRL_EN,
+ QCA8327_DEBUG_MANU_CTRL_EN);
+ } else {
+ /* Reset DAC Amplitude adjustment */
+ at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL,
+ QCA8327_DEBUG_MANU_CTRL_EN, 0);
+ }
+}
+
+static int qca83xx_resume(struct phy_device *phydev)
+{
+ int ret, val;
+
+ /* Skip reset if not suspended */
+ if (!phydev->suspended)
+ return 0;
+
+ /* Reinit the port, reset values set by suspend */
+ qca83xx_config_init(phydev);
+
+ /* Reset the port on port resume */
+ phy_set_bits(phydev, MII_BMCR, BMCR_RESET | BMCR_ANENABLE);
+
+ /* On resume from suspend the switch execute a reset and
+ * restart auto-negotiation. Wait for reset to complete.
+ */
+ ret = phy_read_poll_timeout(phydev, MII_BMCR, val, !(val & BMCR_RESET),
+ 50000, 600000, true);
+ if (ret)
+ return ret;
+
+ usleep_range(1000, 2000);
+
+ return 0;
+}
+
+static int qca83xx_suspend(struct phy_device *phydev)
+{
+ at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_GREEN,
+ AT803X_DEBUG_GATE_CLK_IN1000, 0);
+
+ at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_HIB_CTRL,
+ AT803X_DEBUG_HIB_CTRL_EN_ANY_CHANGE |
+ AT803X_DEBUG_HIB_CTRL_SEL_RST_80U, 0);
+
+ return 0;
+}
+
+static int qca8337_suspend(struct phy_device *phydev)
+{
+ /* Only QCA8337 support actual suspend. */
+ genphy_suspend(phydev);
+
+ return qca83xx_suspend(phydev);
+}
+
+static int qca8327_suspend(struct phy_device *phydev)
+{
+ u16 mask = 0;
+
+ /* QCA8327 cause port unreliability when phy suspend
+ * is set.
+ */
+ mask |= ~(BMCR_SPEED1000 | BMCR_FULLDPLX);
+ phy_modify(phydev, MII_BMCR, mask, 0);
+
+ return qca83xx_suspend(phydev);
+}
+
+static struct phy_driver qca83xx_driver[] = {
+{
+ /* QCA8337 */
+ .phy_id = QCA8337_PHY_ID,
+ .phy_id_mask = QCA8K_PHY_ID_MASK,
+ .name = "Qualcomm Atheros 8337 internal PHY",
+ /* PHY_GBIT_FEATURES */
+ .probe = qca83xx_probe,
+ .flags = PHY_IS_INTERNAL,
+ .config_init = qca83xx_config_init,
+ .soft_reset = genphy_soft_reset,
+ .get_sset_count = qca83xx_get_sset_count,
+ .get_strings = qca83xx_get_strings,
+ .get_stats = qca83xx_get_stats,
+ .suspend = qca8337_suspend,
+ .resume = qca83xx_resume,
+}, {
+ /* QCA8327-A from switch QCA8327-AL1A */
+ .phy_id = QCA8327_A_PHY_ID,
+ .phy_id_mask = QCA8K_PHY_ID_MASK,
+ .name = "Qualcomm Atheros 8327-A internal PHY",
+ /* PHY_GBIT_FEATURES */
+ .link_change_notify = qca83xx_link_change_notify,
+ .probe = qca83xx_probe,
+ .flags = PHY_IS_INTERNAL,
+ .config_init = qca8327_config_init,
+ .soft_reset = genphy_soft_reset,
+ .get_sset_count = qca83xx_get_sset_count,
+ .get_strings = qca83xx_get_strings,
+ .get_stats = qca83xx_get_stats,
+ .suspend = qca8327_suspend,
+ .resume = qca83xx_resume,
+}, {
+ /* QCA8327-B from switch QCA8327-BL1A */
+ .phy_id = QCA8327_B_PHY_ID,
+ .phy_id_mask = QCA8K_PHY_ID_MASK,
+ .name = "Qualcomm Atheros 8327-B internal PHY",
+ /* PHY_GBIT_FEATURES */
+ .link_change_notify = qca83xx_link_change_notify,
+ .probe = qca83xx_probe,
+ .flags = PHY_IS_INTERNAL,
+ .config_init = qca8327_config_init,
+ .soft_reset = genphy_soft_reset,
+ .get_sset_count = qca83xx_get_sset_count,
+ .get_strings = qca83xx_get_strings,
+ .get_stats = qca83xx_get_stats,
+ .suspend = qca8327_suspend,
+ .resume = qca83xx_resume,
+}, };
+
+module_phy_driver(qca83xx_driver);
+
+static struct mdio_device_id __maybe_unused qca83xx_tbl[] = {
+ { PHY_ID_MATCH_EXACT(QCA8337_PHY_ID) },
+ { PHY_ID_MATCH_EXACT(QCA8327_A_PHY_ID) },
+ { PHY_ID_MATCH_EXACT(QCA8327_B_PHY_ID) },
+ { }
+};
+
+MODULE_DEVICE_TABLE(mdio, qca83xx_tbl);
diff --git a/drivers/net/phy/qcom/qcom-phy-lib.c b/drivers/net/phy/qcom/qcom-phy-lib.c
new file mode 100644
index 000000000000..d28815ef56bb
--- /dev/null
+++ b/drivers/net/phy/qcom/qcom-phy-lib.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/phy.h>
+#include <linux/module.h>
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool_netlink.h>
+
+#include "qcom.h"
+
+MODULE_DESCRIPTION("Qualcomm PHY driver Common Functions");
+MODULE_AUTHOR("Matus Ujhelyi");
+MODULE_AUTHOR("Christian Marangi <ansuelsmth@gmail.com>");
+MODULE_LICENSE("GPL");
+
+int at803x_debug_reg_read(struct phy_device *phydev, u16 reg)
+{
+ int ret;
+
+ ret = phy_write(phydev, AT803X_DEBUG_ADDR, reg);
+ if (ret < 0)
+ return ret;
+
+ return phy_read(phydev, AT803X_DEBUG_DATA);
+}
+EXPORT_SYMBOL_GPL(at803x_debug_reg_read);
+
+int at803x_debug_reg_mask(struct phy_device *phydev, u16 reg,
+ u16 clear, u16 set)
+{
+ u16 val;
+ int ret;
+
+ ret = at803x_debug_reg_read(phydev, reg);
+ if (ret < 0)
+ return ret;
+
+ val = ret & 0xffff;
+ val &= ~clear;
+ val |= set;
+
+ return phy_write(phydev, AT803X_DEBUG_DATA, val);
+}
+EXPORT_SYMBOL_GPL(at803x_debug_reg_mask);
+
+int at803x_debug_reg_write(struct phy_device *phydev, u16 reg, u16 data)
+{
+ int ret;
+
+ ret = phy_write(phydev, AT803X_DEBUG_ADDR, reg);
+ if (ret < 0)
+ return ret;
+
+ return phy_write(phydev, AT803X_DEBUG_DATA, data);
+}
+EXPORT_SYMBOL_GPL(at803x_debug_reg_write);
+
+int at803x_set_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ int ret, irq_enabled;
+
+ if (wol->wolopts & WAKE_MAGIC) {
+ struct net_device *ndev = phydev->attached_dev;
+ const u8 *mac;
+ unsigned int i;
+ static const unsigned int offsets[] = {
+ AT803X_LOC_MAC_ADDR_32_47_OFFSET,
+ AT803X_LOC_MAC_ADDR_16_31_OFFSET,
+ AT803X_LOC_MAC_ADDR_0_15_OFFSET,
+ };
+
+ if (!ndev)
+ return -ENODEV;
+
+ mac = (const u8 *)ndev->dev_addr;
+
+ if (!is_valid_ether_addr(mac))
+ return -EINVAL;
+
+ for (i = 0; i < 3; i++)
+ phy_write_mmd(phydev, MDIO_MMD_PCS, offsets[i],
+ mac[(i * 2) + 1] | (mac[(i * 2)] << 8));
+
+ /* Enable WOL interrupt */
+ ret = phy_modify(phydev, AT803X_INTR_ENABLE, 0, AT803X_INTR_ENABLE_WOL);
+ if (ret)
+ return ret;
+ } else {
+ /* Disable WOL interrupt */
+ ret = phy_modify(phydev, AT803X_INTR_ENABLE, AT803X_INTR_ENABLE_WOL, 0);
+ if (ret)
+ return ret;
+ }
+
+ /* Clear WOL status */
+ ret = phy_read(phydev, AT803X_INTR_STATUS);
+ if (ret < 0)
+ return ret;
+
+ /* Check if there are other interrupts except for WOL triggered when PHY is
+ * in interrupt mode, only the interrupts enabled by AT803X_INTR_ENABLE can
+ * be passed up to the interrupt PIN.
+ */
+ irq_enabled = phy_read(phydev, AT803X_INTR_ENABLE);
+ if (irq_enabled < 0)
+ return irq_enabled;
+
+ irq_enabled &= ~AT803X_INTR_ENABLE_WOL;
+ if (ret & irq_enabled && !phy_polling_mode(phydev))
+ phy_trigger_machine(phydev);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(at803x_set_wol);
+
+void at803x_get_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ int value;
+
+ wol->supported = WAKE_MAGIC;
+ wol->wolopts = 0;
+
+ value = phy_read(phydev, AT803X_INTR_ENABLE);
+ if (value < 0)
+ return;
+
+ if (value & AT803X_INTR_ENABLE_WOL)
+ wol->wolopts |= WAKE_MAGIC;
+}
+EXPORT_SYMBOL_GPL(at803x_get_wol);
+
+int at803x_ack_interrupt(struct phy_device *phydev)
+{
+ int err;
+
+ err = phy_read(phydev, AT803X_INTR_STATUS);
+
+ return (err < 0) ? err : 0;
+}
+EXPORT_SYMBOL_GPL(at803x_ack_interrupt);
+
+int at803x_config_intr(struct phy_device *phydev)
+{
+ int err;
+ int value;
+
+ value = phy_read(phydev, AT803X_INTR_ENABLE);
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+ /* Clear any pending interrupts */
+ err = at803x_ack_interrupt(phydev);
+ if (err)
+ return err;
+
+ value |= AT803X_INTR_ENABLE_AUTONEG_ERR;
+ value |= AT803X_INTR_ENABLE_SPEED_CHANGED;
+ value |= AT803X_INTR_ENABLE_DUPLEX_CHANGED;
+ value |= AT803X_INTR_ENABLE_LINK_FAIL;
+ value |= AT803X_INTR_ENABLE_LINK_SUCCESS;
+
+ err = phy_write(phydev, AT803X_INTR_ENABLE, value);
+ } else {
+ err = phy_write(phydev, AT803X_INTR_ENABLE, 0);
+ if (err)
+ return err;
+
+ /* Clear any pending interrupts */
+ err = at803x_ack_interrupt(phydev);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(at803x_config_intr);
+
+irqreturn_t at803x_handle_interrupt(struct phy_device *phydev)
+{
+ int irq_status, int_enabled;
+
+ irq_status = phy_read(phydev, AT803X_INTR_STATUS);
+ if (irq_status < 0) {
+ phy_error(phydev);
+ return IRQ_NONE;
+ }
+
+ /* Read the current enabled interrupts */
+ int_enabled = phy_read(phydev, AT803X_INTR_ENABLE);
+ if (int_enabled < 0) {
+ phy_error(phydev);
+ return IRQ_NONE;
+ }
+
+ /* See if this was one of our enabled interrupts */
+ if (!(irq_status & int_enabled))
+ return IRQ_NONE;
+
+ phy_trigger_machine(phydev);
+
+ return IRQ_HANDLED;
+}
+EXPORT_SYMBOL_GPL(at803x_handle_interrupt);
+
+int at803x_read_specific_status(struct phy_device *phydev,
+ struct at803x_ss_mask ss_mask)
+{
+ int ss;
+
+ /* Read the AT8035 PHY-Specific Status register, which indicates the
+ * speed and duplex that the PHY is actually using, irrespective of
+ * whether we are in autoneg mode or not.
+ */
+ ss = phy_read(phydev, AT803X_SPECIFIC_STATUS);
+ if (ss < 0)
+ return ss;
+
+ if (ss & AT803X_SS_SPEED_DUPLEX_RESOLVED) {
+ int sfc, speed;
+
+ sfc = phy_read(phydev, AT803X_SPECIFIC_FUNCTION_CONTROL);
+ if (sfc < 0)
+ return sfc;
+
+ speed = ss & ss_mask.speed_mask;
+ speed >>= ss_mask.speed_shift;
+
+ switch (speed) {
+ case AT803X_SS_SPEED_10:
+ phydev->speed = SPEED_10;
+ break;
+ case AT803X_SS_SPEED_100:
+ phydev->speed = SPEED_100;
+ break;
+ case AT803X_SS_SPEED_1000:
+ phydev->speed = SPEED_1000;
+ break;
+ case QCA808X_SS_SPEED_2500:
+ phydev->speed = SPEED_2500;
+ break;
+ }
+ if (ss & AT803X_SS_DUPLEX)
+ phydev->duplex = DUPLEX_FULL;
+ else
+ phydev->duplex = DUPLEX_HALF;
+
+ if (ss & AT803X_SS_MDIX)
+ phydev->mdix = ETH_TP_MDI_X;
+ else
+ phydev->mdix = ETH_TP_MDI;
+
+ switch (FIELD_GET(AT803X_SFC_MDI_CROSSOVER_MODE_M, sfc)) {
+ case AT803X_SFC_MANUAL_MDI:
+ phydev->mdix_ctrl = ETH_TP_MDI;
+ break;
+ case AT803X_SFC_MANUAL_MDIX:
+ phydev->mdix_ctrl = ETH_TP_MDI_X;
+ break;
+ case AT803X_SFC_AUTOMATIC_CROSSOVER:
+ phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+ break;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(at803x_read_specific_status);
+
+int at803x_config_mdix(struct phy_device *phydev, u8 ctrl)
+{
+ u16 val;
+
+ switch (ctrl) {
+ case ETH_TP_MDI:
+ val = AT803X_SFC_MANUAL_MDI;
+ break;
+ case ETH_TP_MDI_X:
+ val = AT803X_SFC_MANUAL_MDIX;
+ break;
+ case ETH_TP_MDI_AUTO:
+ val = AT803X_SFC_AUTOMATIC_CROSSOVER;
+ break;
+ default:
+ return 0;
+ }
+
+ return phy_modify_changed(phydev, AT803X_SPECIFIC_FUNCTION_CONTROL,
+ AT803X_SFC_MDI_CROSSOVER_MODE_M,
+ FIELD_PREP(AT803X_SFC_MDI_CROSSOVER_MODE_M, val));
+}
+EXPORT_SYMBOL_GPL(at803x_config_mdix);
+
+int at803x_prepare_config_aneg(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = at803x_config_mdix(phydev, phydev->mdix_ctrl);
+ if (ret < 0)
+ return ret;
+
+ /* Changes of the midx bits are disruptive to the normal operation;
+ * therefore any changes to these registers must be followed by a
+ * software reset to take effect.
+ */
+ if (ret == 1) {
+ ret = genphy_soft_reset(phydev);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(at803x_prepare_config_aneg);
+
+int at803x_read_status(struct phy_device *phydev)
+{
+ struct at803x_ss_mask ss_mask = { 0 };
+ int err, old_link = phydev->link;
+
+ /* Update the link, but return if there was an error */
+ err = genphy_update_link(phydev);
+ if (err)
+ return err;
+
+ /* why bother the PHY if nothing can have changed */
+ if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link)
+ return 0;
+
+ phydev->speed = SPEED_UNKNOWN;
+ phydev->duplex = DUPLEX_UNKNOWN;
+ phydev->pause = 0;
+ phydev->asym_pause = 0;
+
+ err = genphy_read_lpa(phydev);
+ if (err < 0)
+ return err;
+
+ ss_mask.speed_mask = AT803X_SS_SPEED_MASK;
+ ss_mask.speed_shift = __bf_shf(AT803X_SS_SPEED_MASK);
+ err = at803x_read_specific_status(phydev, ss_mask);
+ if (err < 0)
+ return err;
+
+ if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete)
+ phy_resolve_aneg_pause(phydev);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(at803x_read_status);
+
+static int at803x_get_downshift(struct phy_device *phydev, u8 *d)
+{
+ int val;
+
+ val = phy_read(phydev, AT803X_SMART_SPEED);
+ if (val < 0)
+ return val;
+
+ if (val & AT803X_SMART_SPEED_ENABLE)
+ *d = FIELD_GET(AT803X_SMART_SPEED_RETRY_LIMIT_MASK, val) + 2;
+ else
+ *d = DOWNSHIFT_DEV_DISABLE;
+
+ return 0;
+}
+
+static int at803x_set_downshift(struct phy_device *phydev, u8 cnt)
+{
+ u16 mask, set;
+ int ret;
+
+ switch (cnt) {
+ case DOWNSHIFT_DEV_DEFAULT_COUNT:
+ cnt = AT803X_DEFAULT_DOWNSHIFT;
+ fallthrough;
+ case AT803X_MIN_DOWNSHIFT ... AT803X_MAX_DOWNSHIFT:
+ set = AT803X_SMART_SPEED_ENABLE |
+ AT803X_SMART_SPEED_BYPASS_TIMER |
+ FIELD_PREP(AT803X_SMART_SPEED_RETRY_LIMIT_MASK, cnt - 2);
+ mask = AT803X_SMART_SPEED_RETRY_LIMIT_MASK;
+ break;
+ case DOWNSHIFT_DEV_DISABLE:
+ set = 0;
+ mask = AT803X_SMART_SPEED_ENABLE |
+ AT803X_SMART_SPEED_BYPASS_TIMER;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = phy_modify_changed(phydev, AT803X_SMART_SPEED, mask, set);
+
+ /* After changing the smart speed settings, we need to perform a
+ * software reset, use phy_init_hw() to make sure we set the
+ * reapply any values which might got lost during software reset.
+ */
+ if (ret == 1)
+ ret = phy_init_hw(phydev);
+
+ return ret;
+}
+
+int at803x_get_tunable(struct phy_device *phydev,
+ struct ethtool_tunable *tuna, void *data)
+{
+ switch (tuna->id) {
+ case ETHTOOL_PHY_DOWNSHIFT:
+ return at803x_get_downshift(phydev, data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+EXPORT_SYMBOL_GPL(at803x_get_tunable);
+
+int at803x_set_tunable(struct phy_device *phydev,
+ struct ethtool_tunable *tuna, const void *data)
+{
+ switch (tuna->id) {
+ case ETHTOOL_PHY_DOWNSHIFT:
+ return at803x_set_downshift(phydev, *(const u8 *)data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+EXPORT_SYMBOL_GPL(at803x_set_tunable);
+
+int at803x_cdt_fault_length(int dt)
+{
+ /* According to the datasheet the distance to the fault is
+ * DELTA_TIME * 0.824 meters.
+ *
+ * The author suspect the correct formula is:
+ *
+ * fault_distance = DELTA_TIME * (c * VF) / 125MHz / 2
+ *
+ * where c is the speed of light, VF is the velocity factor of
+ * the twisted pair cable, 125MHz the counter frequency and
+ * we need to divide by 2 because the hardware will measure the
+ * round trip time to the fault and back to the PHY.
+ *
+ * With a VF of 0.69 we get the factor 0.824 mentioned in the
+ * datasheet.
+ */
+ return (dt * 824) / 10;
+}
+EXPORT_SYMBOL_GPL(at803x_cdt_fault_length);
+
+int at803x_cdt_start(struct phy_device *phydev, u32 cdt_start)
+{
+ return phy_write(phydev, AT803X_CDT, cdt_start);
+}
+EXPORT_SYMBOL_GPL(at803x_cdt_start);
+
+int at803x_cdt_wait_for_completion(struct phy_device *phydev,
+ u32 cdt_en)
+{
+ int val, ret;
+
+ /* One test run takes about 25ms */
+ ret = phy_read_poll_timeout(phydev, AT803X_CDT, val,
+ !(val & cdt_en),
+ 30000, 100000, true);
+
+ return ret < 0 ? ret : 0;
+}
+EXPORT_SYMBOL_GPL(at803x_cdt_wait_for_completion);
+
+static bool qca808x_cdt_fault_length_valid(int cdt_code)
+{
+ switch (cdt_code) {
+ case QCA808X_CDT_STATUS_STAT_SAME_SHORT:
+ case QCA808X_CDT_STATUS_STAT_SAME_OPEN:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_NORMAL:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_OPEN:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_SHORT:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_NORMAL:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_OPEN:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_SHORT:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_NORMAL:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_OPEN:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_SHORT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int qca808x_cable_test_result_trans(int cdt_code)
+{
+ switch (cdt_code) {
+ case QCA808X_CDT_STATUS_STAT_NORMAL:
+ return ETHTOOL_A_CABLE_RESULT_CODE_OK;
+ case QCA808X_CDT_STATUS_STAT_SAME_SHORT:
+ return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
+ case QCA808X_CDT_STATUS_STAT_SAME_OPEN:
+ return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_NORMAL:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_OPEN:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_SHORT:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_NORMAL:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_OPEN:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_SHORT:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_NORMAL:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_OPEN:
+ case QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_SHORT:
+ return ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT;
+ case QCA808X_CDT_STATUS_STAT_FAIL:
+ default:
+ return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
+ }
+}
+
+static int qca808x_cdt_fault_length(struct phy_device *phydev, int pair,
+ int result)
+{
+ int val;
+ u32 cdt_length_reg = 0;
+
+ switch (pair) {
+ case ETHTOOL_A_CABLE_PAIR_A:
+ cdt_length_reg = QCA808X_MMD3_CDT_DIAG_PAIR_A;
+ break;
+ case ETHTOOL_A_CABLE_PAIR_B:
+ cdt_length_reg = QCA808X_MMD3_CDT_DIAG_PAIR_B;
+ break;
+ case ETHTOOL_A_CABLE_PAIR_C:
+ cdt_length_reg = QCA808X_MMD3_CDT_DIAG_PAIR_C;
+ break;
+ case ETHTOOL_A_CABLE_PAIR_D:
+ cdt_length_reg = QCA808X_MMD3_CDT_DIAG_PAIR_D;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ val = phy_read_mmd(phydev, MDIO_MMD_PCS, cdt_length_reg);
+ if (val < 0)
+ return val;
+
+ if (result == ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT)
+ val = FIELD_GET(QCA808X_CDT_DIAG_LENGTH_SAME_SHORT, val);
+ else
+ val = FIELD_GET(QCA808X_CDT_DIAG_LENGTH_CROSS_SHORT, val);
+
+ return at803x_cdt_fault_length(val);
+}
+
+static int qca808x_cable_test_get_pair_status(struct phy_device *phydev, u8 pair,
+ u16 status)
+{
+ int length, result;
+ u16 pair_code;
+
+ switch (pair) {
+ case ETHTOOL_A_CABLE_PAIR_A:
+ pair_code = FIELD_GET(QCA808X_CDT_CODE_PAIR_A, status);
+ break;
+ case ETHTOOL_A_CABLE_PAIR_B:
+ pair_code = FIELD_GET(QCA808X_CDT_CODE_PAIR_B, status);
+ break;
+ case ETHTOOL_A_CABLE_PAIR_C:
+ pair_code = FIELD_GET(QCA808X_CDT_CODE_PAIR_C, status);
+ break;
+ case ETHTOOL_A_CABLE_PAIR_D:
+ pair_code = FIELD_GET(QCA808X_CDT_CODE_PAIR_D, status);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ result = qca808x_cable_test_result_trans(pair_code);
+ ethnl_cable_test_result(phydev, pair, result);
+
+ if (qca808x_cdt_fault_length_valid(pair_code)) {
+ length = qca808x_cdt_fault_length(phydev, pair, result);
+ ethnl_cable_test_fault_length(phydev, pair, length);
+ }
+
+ return 0;
+}
+
+int qca808x_cable_test_get_status(struct phy_device *phydev, bool *finished)
+{
+ int ret, val;
+
+ *finished = false;
+
+ val = QCA808X_CDT_ENABLE_TEST |
+ QCA808X_CDT_LENGTH_UNIT;
+ ret = at803x_cdt_start(phydev, val);
+ if (ret)
+ return ret;
+
+ ret = at803x_cdt_wait_for_completion(phydev, QCA808X_CDT_ENABLE_TEST);
+ if (ret)
+ return ret;
+
+ val = phy_read_mmd(phydev, MDIO_MMD_PCS, QCA808X_MMD3_CDT_STATUS);
+ if (val < 0)
+ return val;
+
+ ret = qca808x_cable_test_get_pair_status(phydev, ETHTOOL_A_CABLE_PAIR_A, val);
+ if (ret)
+ return ret;
+
+ ret = qca808x_cable_test_get_pair_status(phydev, ETHTOOL_A_CABLE_PAIR_B, val);
+ if (ret)
+ return ret;
+
+ ret = qca808x_cable_test_get_pair_status(phydev, ETHTOOL_A_CABLE_PAIR_C, val);
+ if (ret)
+ return ret;
+
+ ret = qca808x_cable_test_get_pair_status(phydev, ETHTOOL_A_CABLE_PAIR_D, val);
+ if (ret)
+ return ret;
+
+ *finished = true;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(qca808x_cable_test_get_status);
+
+int qca808x_led_reg_hw_control_enable(struct phy_device *phydev, u16 reg)
+{
+ return phy_clear_bits_mmd(phydev, MDIO_MMD_AN, reg,
+ QCA808X_LED_FORCE_EN);
+}
+EXPORT_SYMBOL_GPL(qca808x_led_reg_hw_control_enable);
+
+bool qca808x_led_reg_hw_control_status(struct phy_device *phydev, u16 reg)
+{
+ int val;
+
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, reg);
+ return !(val & QCA808X_LED_FORCE_EN);
+}
+EXPORT_SYMBOL_GPL(qca808x_led_reg_hw_control_status);
+
+int qca808x_led_reg_brightness_set(struct phy_device *phydev,
+ u16 reg, enum led_brightness value)
+{
+ return phy_modify_mmd(phydev, MDIO_MMD_AN, reg,
+ QCA808X_LED_FORCE_EN | QCA808X_LED_FORCE_MODE_MASK,
+ QCA808X_LED_FORCE_EN | (value ? QCA808X_LED_FORCE_ON :
+ QCA808X_LED_FORCE_OFF));
+}
+EXPORT_SYMBOL_GPL(qca808x_led_reg_brightness_set);
+
+int qca808x_led_reg_blink_set(struct phy_device *phydev, u16 reg,
+ unsigned long *delay_on,
+ unsigned long *delay_off)
+{
+ int ret;
+
+ /* Set blink to 50% off, 50% on at 4Hz by default */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_AN, QCA808X_MMD7_LED_GLOBAL,
+ QCA808X_LED_BLINK_FREQ_MASK | QCA808X_LED_BLINK_DUTY_MASK,
+ QCA808X_LED_BLINK_FREQ_4HZ | QCA808X_LED_BLINK_DUTY_50_50);
+ if (ret)
+ return ret;
+
+ /* We use BLINK_1 for normal blinking */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_AN, reg,
+ QCA808X_LED_FORCE_EN | QCA808X_LED_FORCE_MODE_MASK,
+ QCA808X_LED_FORCE_EN | QCA808X_LED_FORCE_BLINK_1);
+ if (ret)
+ return ret;
+
+ /* We set blink to 4Hz, aka 250ms */
+ *delay_on = 250 / 2;
+ *delay_off = 250 / 2;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(qca808x_led_reg_blink_set);
diff --git a/drivers/net/phy/qcom/qcom.h b/drivers/net/phy/qcom/qcom.h
new file mode 100644
index 000000000000..4bb541728846
--- /dev/null
+++ b/drivers/net/phy/qcom/qcom.h
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define AT803X_SPECIFIC_FUNCTION_CONTROL 0x10
+#define AT803X_SFC_ASSERT_CRS BIT(11)
+#define AT803X_SFC_FORCE_LINK BIT(10)
+#define AT803X_SFC_MDI_CROSSOVER_MODE_M GENMASK(6, 5)
+#define AT803X_SFC_AUTOMATIC_CROSSOVER 0x3
+#define AT803X_SFC_MANUAL_MDIX 0x1
+#define AT803X_SFC_MANUAL_MDI 0x0
+#define AT803X_SFC_SQE_TEST BIT(2)
+#define AT803X_SFC_POLARITY_REVERSAL BIT(1)
+#define AT803X_SFC_DISABLE_JABBER BIT(0)
+
+#define AT803X_SPECIFIC_STATUS 0x11
+#define AT803X_SS_SPEED_MASK GENMASK(15, 14)
+#define AT803X_SS_SPEED_1000 2
+#define AT803X_SS_SPEED_100 1
+#define AT803X_SS_SPEED_10 0
+#define AT803X_SS_DUPLEX BIT(13)
+#define AT803X_SS_SPEED_DUPLEX_RESOLVED BIT(11)
+#define AT803X_SS_MDIX BIT(6)
+
+#define QCA808X_SS_SPEED_MASK GENMASK(9, 7)
+#define QCA808X_SS_SPEED_2500 4
+
+#define AT803X_INTR_ENABLE 0x12
+#define AT803X_INTR_ENABLE_AUTONEG_ERR BIT(15)
+#define AT803X_INTR_ENABLE_SPEED_CHANGED BIT(14)
+#define AT803X_INTR_ENABLE_DUPLEX_CHANGED BIT(13)
+#define AT803X_INTR_ENABLE_PAGE_RECEIVED BIT(12)
+#define AT803X_INTR_ENABLE_LINK_FAIL BIT(11)
+#define AT803X_INTR_ENABLE_LINK_SUCCESS BIT(10)
+#define AT803X_INTR_ENABLE_LINK_FAIL_BX BIT(8)
+#define AT803X_INTR_ENABLE_LINK_SUCCESS_BX BIT(7)
+#define AT803X_INTR_ENABLE_WIRESPEED_DOWNGRADE BIT(5)
+#define AT803X_INTR_ENABLE_POLARITY_CHANGED BIT(1)
+#define AT803X_INTR_ENABLE_WOL BIT(0)
+
+#define AT803X_INTR_STATUS 0x13
+
+#define AT803X_SMART_SPEED 0x14
+#define AT803X_SMART_SPEED_ENABLE BIT(5)
+#define AT803X_SMART_SPEED_RETRY_LIMIT_MASK GENMASK(4, 2)
+#define AT803X_SMART_SPEED_BYPASS_TIMER BIT(1)
+
+#define AT803X_CDT 0x16
+#define AT803X_CDT_MDI_PAIR_MASK GENMASK(9, 8)
+#define AT803X_CDT_ENABLE_TEST BIT(0)
+#define AT803X_CDT_STATUS 0x1c
+#define AT803X_CDT_STATUS_STAT_NORMAL 0
+#define AT803X_CDT_STATUS_STAT_SHORT 1
+#define AT803X_CDT_STATUS_STAT_OPEN 2
+#define AT803X_CDT_STATUS_STAT_FAIL 3
+#define AT803X_CDT_STATUS_STAT_MASK GENMASK(9, 8)
+#define AT803X_CDT_STATUS_DELTA_TIME_MASK GENMASK(7, 0)
+
+#define QCA808X_CDT_ENABLE_TEST BIT(15)
+#define QCA808X_CDT_INTER_CHECK_DIS BIT(13)
+#define QCA808X_CDT_STATUS BIT(11)
+#define QCA808X_CDT_LENGTH_UNIT BIT(10)
+
+#define QCA808X_MMD3_CDT_STATUS 0x8064
+#define QCA808X_MMD3_CDT_DIAG_PAIR_A 0x8065
+#define QCA808X_MMD3_CDT_DIAG_PAIR_B 0x8066
+#define QCA808X_MMD3_CDT_DIAG_PAIR_C 0x8067
+#define QCA808X_MMD3_CDT_DIAG_PAIR_D 0x8068
+#define QCA808X_CDT_DIAG_LENGTH_SAME_SHORT GENMASK(15, 8)
+#define QCA808X_CDT_DIAG_LENGTH_CROSS_SHORT GENMASK(7, 0)
+
+#define QCA808X_CDT_CODE_PAIR_A GENMASK(15, 12)
+#define QCA808X_CDT_CODE_PAIR_B GENMASK(11, 8)
+#define QCA808X_CDT_CODE_PAIR_C GENMASK(7, 4)
+#define QCA808X_CDT_CODE_PAIR_D GENMASK(3, 0)
+
+#define QCA808X_CDT_STATUS_STAT_TYPE GENMASK(1, 0)
+#define QCA808X_CDT_STATUS_STAT_FAIL FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_TYPE, 0)
+#define QCA808X_CDT_STATUS_STAT_NORMAL FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_TYPE, 1)
+#define QCA808X_CDT_STATUS_STAT_SAME_OPEN FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_TYPE, 2)
+#define QCA808X_CDT_STATUS_STAT_SAME_SHORT FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_TYPE, 3)
+
+#define QCA808X_CDT_STATUS_STAT_MDI GENMASK(3, 2)
+#define QCA808X_CDT_STATUS_STAT_MDI1 FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_MDI, 1)
+#define QCA808X_CDT_STATUS_STAT_MDI2 FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_MDI, 2)
+#define QCA808X_CDT_STATUS_STAT_MDI3 FIELD_PREP_CONST(QCA808X_CDT_STATUS_STAT_MDI, 3)
+
+/* NORMAL are MDI with type set to 0 */
+#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_NORMAL QCA808X_CDT_STATUS_STAT_MDI1
+#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_OPEN (QCA808X_CDT_STATUS_STAT_SAME_OPEN |\
+ QCA808X_CDT_STATUS_STAT_MDI1)
+#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI1_SAME_SHORT (QCA808X_CDT_STATUS_STAT_SAME_SHORT |\
+ QCA808X_CDT_STATUS_STAT_MDI1)
+#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_NORMAL QCA808X_CDT_STATUS_STAT_MDI2
+#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_OPEN (QCA808X_CDT_STATUS_STAT_SAME_OPEN |\
+ QCA808X_CDT_STATUS_STAT_MDI2)
+#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI2_SAME_SHORT (QCA808X_CDT_STATUS_STAT_SAME_SHORT |\
+ QCA808X_CDT_STATUS_STAT_MDI2)
+#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_NORMAL QCA808X_CDT_STATUS_STAT_MDI3
+#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_OPEN (QCA808X_CDT_STATUS_STAT_SAME_OPEN |\
+ QCA808X_CDT_STATUS_STAT_MDI3)
+#define QCA808X_CDT_STATUS_STAT_CROSS_SHORT_WITH_MDI3_SAME_SHORT (QCA808X_CDT_STATUS_STAT_SAME_SHORT |\
+ QCA808X_CDT_STATUS_STAT_MDI3)
+
+/* Added for reference of existence but should be handled by wait_for_completion already */
+#define QCA808X_CDT_STATUS_STAT_BUSY (BIT(1) | BIT(3))
+
+#define QCA808X_MMD7_LED_GLOBAL 0x8073
+#define QCA808X_LED_BLINK_1 GENMASK(11, 6)
+#define QCA808X_LED_BLINK_2 GENMASK(5, 0)
+/* Values are the same for both BLINK_1 and BLINK_2 */
+#define QCA808X_LED_BLINK_FREQ_MASK GENMASK(5, 3)
+#define QCA808X_LED_BLINK_FREQ_2HZ FIELD_PREP(QCA808X_LED_BLINK_FREQ_MASK, 0x0)
+#define QCA808X_LED_BLINK_FREQ_4HZ FIELD_PREP(QCA808X_LED_BLINK_FREQ_MASK, 0x1)
+#define QCA808X_LED_BLINK_FREQ_8HZ FIELD_PREP(QCA808X_LED_BLINK_FREQ_MASK, 0x2)
+#define QCA808X_LED_BLINK_FREQ_16HZ FIELD_PREP(QCA808X_LED_BLINK_FREQ_MASK, 0x3)
+#define QCA808X_LED_BLINK_FREQ_32HZ FIELD_PREP(QCA808X_LED_BLINK_FREQ_MASK, 0x4)
+#define QCA808X_LED_BLINK_FREQ_64HZ FIELD_PREP(QCA808X_LED_BLINK_FREQ_MASK, 0x5)
+#define QCA808X_LED_BLINK_FREQ_128HZ FIELD_PREP(QCA808X_LED_BLINK_FREQ_MASK, 0x6)
+#define QCA808X_LED_BLINK_FREQ_256HZ FIELD_PREP(QCA808X_LED_BLINK_FREQ_MASK, 0x7)
+#define QCA808X_LED_BLINK_DUTY_MASK GENMASK(2, 0)
+#define QCA808X_LED_BLINK_DUTY_50_50 FIELD_PREP(QCA808X_LED_BLINK_DUTY_MASK, 0x0)
+#define QCA808X_LED_BLINK_DUTY_75_25 FIELD_PREP(QCA808X_LED_BLINK_DUTY_MASK, 0x1)
+#define QCA808X_LED_BLINK_DUTY_25_75 FIELD_PREP(QCA808X_LED_BLINK_DUTY_MASK, 0x2)
+#define QCA808X_LED_BLINK_DUTY_33_67 FIELD_PREP(QCA808X_LED_BLINK_DUTY_MASK, 0x3)
+#define QCA808X_LED_BLINK_DUTY_67_33 FIELD_PREP(QCA808X_LED_BLINK_DUTY_MASK, 0x4)
+#define QCA808X_LED_BLINK_DUTY_17_83 FIELD_PREP(QCA808X_LED_BLINK_DUTY_MASK, 0x5)
+#define QCA808X_LED_BLINK_DUTY_83_17 FIELD_PREP(QCA808X_LED_BLINK_DUTY_MASK, 0x6)
+#define QCA808X_LED_BLINK_DUTY_8_92 FIELD_PREP(QCA808X_LED_BLINK_DUTY_MASK, 0x7)
+
+/* LED hw control pattern is the same for every LED */
+#define QCA808X_LED_PATTERN_MASK GENMASK(15, 0)
+#define QCA808X_LED_SPEED2500_ON BIT(15)
+#define QCA808X_LED_SPEED2500_BLINK BIT(14)
+/* Follow blink trigger even if duplex or speed condition doesn't match */
+#define QCA808X_LED_BLINK_CHECK_BYPASS BIT(13)
+#define QCA808X_LED_FULL_DUPLEX_ON BIT(12)
+#define QCA808X_LED_HALF_DUPLEX_ON BIT(11)
+#define QCA808X_LED_TX_BLINK BIT(10)
+#define QCA808X_LED_RX_BLINK BIT(9)
+#define QCA808X_LED_TX_ON_10MS BIT(8)
+#define QCA808X_LED_RX_ON_10MS BIT(7)
+#define QCA808X_LED_SPEED1000_ON BIT(6)
+#define QCA808X_LED_SPEED100_ON BIT(5)
+#define QCA808X_LED_SPEED10_ON BIT(4)
+#define QCA808X_LED_COLLISION_BLINK BIT(3)
+#define QCA808X_LED_SPEED1000_BLINK BIT(2)
+#define QCA808X_LED_SPEED100_BLINK BIT(1)
+#define QCA808X_LED_SPEED10_BLINK BIT(0)
+
+/* LED force ctrl is the same for every LED
+ * No documentation exist for this, not even internal one
+ * with NDA as QCOM gives only info about configuring
+ * hw control pattern rules and doesn't indicate any way
+ * to force the LED to specific mode.
+ * These define comes from reverse and testing and maybe
+ * lack of some info or some info are not entirely correct.
+ * For the basic LED control and hw control these finding
+ * are enough to support LED control in all the required APIs.
+ *
+ * On doing some comparison with implementation with qca807x,
+ * it was found that it's 1:1 equal to it and confirms all the
+ * reverse done. It was also found further specification with the
+ * force mode and the blink modes.
+ */
+#define QCA808X_LED_FORCE_EN BIT(15)
+#define QCA808X_LED_FORCE_MODE_MASK GENMASK(14, 13)
+#define QCA808X_LED_FORCE_BLINK_1 FIELD_PREP(QCA808X_LED_FORCE_MODE_MASK, 0x3)
+#define QCA808X_LED_FORCE_BLINK_2 FIELD_PREP(QCA808X_LED_FORCE_MODE_MASK, 0x2)
+#define QCA808X_LED_FORCE_ON FIELD_PREP(QCA808X_LED_FORCE_MODE_MASK, 0x1)
+#define QCA808X_LED_FORCE_OFF FIELD_PREP(QCA808X_LED_FORCE_MODE_MASK, 0x0)
+
+#define AT803X_LOC_MAC_ADDR_0_15_OFFSET 0x804C
+#define AT803X_LOC_MAC_ADDR_16_31_OFFSET 0x804B
+#define AT803X_LOC_MAC_ADDR_32_47_OFFSET 0x804A
+
+#define AT803X_DEBUG_ADDR 0x1D
+#define AT803X_DEBUG_DATA 0x1E
+
+#define AT803X_DEBUG_ANALOG_TEST_CTRL 0x00
+#define QCA8327_DEBUG_MANU_CTRL_EN BIT(2)
+#define QCA8337_DEBUG_MANU_CTRL_EN GENMASK(3, 2)
+#define AT803X_DEBUG_RX_CLK_DLY_EN BIT(15)
+
+#define AT803X_DEBUG_SYSTEM_CTRL_MODE 0x05
+#define AT803X_DEBUG_TX_CLK_DLY_EN BIT(8)
+
+#define AT803X_DEBUG_REG_HIB_CTRL 0x0b
+#define AT803X_DEBUG_HIB_CTRL_SEL_RST_80U BIT(10)
+#define AT803X_DEBUG_HIB_CTRL_EN_ANY_CHANGE BIT(13)
+#define AT803X_DEBUG_HIB_CTRL_PS_HIB_EN BIT(15)
+
+#define AT803X_DEFAULT_DOWNSHIFT 5
+#define AT803X_MIN_DOWNSHIFT 2
+#define AT803X_MAX_DOWNSHIFT 9
+
+enum stat_access_type {
+ PHY,
+ MMD
+};
+
+struct at803x_hw_stat {
+ const char *string;
+ u8 reg;
+ u32 mask;
+ enum stat_access_type access_type;
+};
+
+struct at803x_ss_mask {
+ u16 speed_mask;
+ u8 speed_shift;
+};
+
+int at803x_debug_reg_read(struct phy_device *phydev, u16 reg);
+int at803x_debug_reg_mask(struct phy_device *phydev, u16 reg,
+ u16 clear, u16 set);
+int at803x_debug_reg_write(struct phy_device *phydev, u16 reg, u16 data);
+int at803x_set_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol);
+void at803x_get_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol);
+int at803x_ack_interrupt(struct phy_device *phydev);
+int at803x_config_intr(struct phy_device *phydev);
+irqreturn_t at803x_handle_interrupt(struct phy_device *phydev);
+int at803x_read_specific_status(struct phy_device *phydev,
+ struct at803x_ss_mask ss_mask);
+int at803x_config_mdix(struct phy_device *phydev, u8 ctrl);
+int at803x_prepare_config_aneg(struct phy_device *phydev);
+int at803x_read_status(struct phy_device *phydev);
+int at803x_get_tunable(struct phy_device *phydev,
+ struct ethtool_tunable *tuna, void *data);
+int at803x_set_tunable(struct phy_device *phydev,
+ struct ethtool_tunable *tuna, const void *data);
+int at803x_cdt_fault_length(int dt);
+int at803x_cdt_start(struct phy_device *phydev, u32 cdt_start);
+int at803x_cdt_wait_for_completion(struct phy_device *phydev,
+ u32 cdt_en);
+int qca808x_cable_test_get_status(struct phy_device *phydev, bool *finished);
+int qca808x_led_reg_hw_control_enable(struct phy_device *phydev, u16 reg);
+bool qca808x_led_reg_hw_control_status(struct phy_device *phydev, u16 reg);
+int qca808x_led_reg_brightness_set(struct phy_device *phydev,
+ u16 reg, enum led_brightness value);
+int qca808x_led_reg_blink_set(struct phy_device *phydev, u16 reg,
+ unsigned long *delay_on,
+ unsigned long *delay_off);
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 894172a3e15f..1fa70427b2a2 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -57,14 +57,6 @@
#define RTL8366RB_POWER_SAVE 0x15
#define RTL8366RB_POWER_SAVE_ON BIT(12)
-#define RTL_SUPPORTS_5000FULL BIT(14)
-#define RTL_SUPPORTS_2500FULL BIT(13)
-#define RTL_SUPPORTS_10000FULL BIT(0)
-#define RTL_ADV_2500FULL BIT(7)
-#define RTL_LPADV_10000FULL BIT(11)
-#define RTL_LPADV_5000FULL BIT(6)
-#define RTL_LPADV_2500FULL BIT(5)
-
#define RTL9000A_GINMR 0x14
#define RTL9000A_GINMR_LINK_STATUS BIT(4)
@@ -421,9 +413,11 @@ static int rtl8211f_config_init(struct phy_device *phydev)
ERR_PTR(ret));
return ret;
}
+
+ return genphy_soft_reset(phydev);
}
- return genphy_soft_reset(phydev);
+ return 0;
}
static int rtl821x_suspend(struct phy_device *phydev)
@@ -674,11 +668,11 @@ static int rtl822x_get_features(struct phy_device *phydev)
return val;
linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
- phydev->supported, val & RTL_SUPPORTS_2500FULL);
+ phydev->supported, val & MDIO_PMA_SPEED_2_5G);
linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
- phydev->supported, val & RTL_SUPPORTS_5000FULL);
+ phydev->supported, val & MDIO_PMA_SPEED_5G);
linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
- phydev->supported, val & RTL_SUPPORTS_10000FULL);
+ phydev->supported, val & MDIO_SPEED_10G);
return genphy_read_abilities(phydev);
}
@@ -688,14 +682,12 @@ static int rtl822x_config_aneg(struct phy_device *phydev)
int ret = 0;
if (phydev->autoneg == AUTONEG_ENABLE) {
- u16 adv2500 = 0;
-
- if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
- phydev->advertising))
- adv2500 = RTL_ADV_2500FULL;
+ u16 adv = linkmode_adv_to_mii_10gbt_adv_t(phydev->advertising);
ret = phy_modify_paged_changed(phydev, 0xa5d, 0x12,
- RTL_ADV_2500FULL, adv2500);
+ MDIO_AN_10GBT_CTRL_ADV2_5G |
+ MDIO_AN_10GBT_CTRL_ADV5G,
+ adv);
if (ret < 0)
return ret;
}
@@ -713,12 +705,8 @@ static int rtl822x_read_status(struct phy_device *phydev)
if (lpadv < 0)
return lpadv;
- linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
- phydev->lp_advertising, lpadv & RTL_LPADV_10000FULL);
- linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
- phydev->lp_advertising, lpadv & RTL_LPADV_5000FULL);
- linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
- phydev->lp_advertising, lpadv & RTL_LPADV_2500FULL);
+ mii_10gbt_stat_mod_linkmode_lpa_t(phydev->lp_advertising,
+ lpadv);
}
ret = genphy_read_status(phydev);
@@ -736,7 +724,7 @@ static bool rtlgen_supports_2_5gbps(struct phy_device *phydev)
val = phy_read(phydev, 0x13);
phy_write(phydev, RTL821x_PAGE_SELECT, 0);
- return val >= 0 && val & RTL_SUPPORTS_2500FULL;
+ return val >= 0 && val & MDIO_PMA_SPEED_2_5G;
}
static int rtlgen_match_phy_device(struct phy_device *phydev)
@@ -1048,6 +1036,16 @@ static struct phy_driver realtek_drvs[] = {
.read_page = rtl821x_read_page,
.write_page = rtl821x_write_page,
}, {
+ PHY_ID_MATCH_EXACT(0x001cc862),
+ .name = "RTL8251B 5Gbps PHY",
+ .get_features = rtl822x_get_features,
+ .config_aneg = rtl822x_config_aneg,
+ .read_status = rtl822x_read_status,
+ .suspend = genphy_suspend,
+ .resume = rtlgen_resume,
+ .read_page = rtl821x_read_page,
+ .write_page = rtl821x_write_page,
+ }, {
PHY_ID_MATCH_EXACT(0x001cc961),
.name = "RTL8366RB Gigabit Ethernet",
.config_init = &rtl8366rb_config_init,
diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
index 7fd9fe6a602b..7b1bc5fcef9b 100644
--- a/drivers/net/phy/xilinx_gmii2rgmii.c
+++ b/drivers/net/phy/xilinx_gmii2rgmii.c
@@ -22,7 +22,7 @@
struct gmii2rgmii {
struct phy_device *phy_dev;
- struct phy_driver *phy_drv;
+ const struct phy_driver *phy_drv;
struct phy_driver conv_phy_drv;
struct mdio_device *mdio;
};
diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c
index 40ce8abe6999..cc7d1113ece0 100644
--- a/drivers/net/plip/plip.c
+++ b/drivers/net/plip/plip.c
@@ -1437,4 +1437,5 @@ static int __init plip_init (void)
module_init(plip_init);
module_exit(plip_cleanup_module);
+MODULE_DESCRIPTION("PLIP (parallel port) network module");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ppp/bsd_comp.c b/drivers/net/ppp/bsd_comp.c
index db0dc36d12e3..55954594e157 100644
--- a/drivers/net/ppp/bsd_comp.c
+++ b/drivers/net/ppp/bsd_comp.c
@@ -1166,5 +1166,6 @@ static void __exit bsdcomp_cleanup(void)
module_init(bsdcomp_init);
module_exit(bsdcomp_cleanup);
+MODULE_DESCRIPTION("PPP BSD-Compress compression module");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("ppp-compress-" __stringify(CI_BSD_COMPRESS));
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index 840da924708b..c33c3db3cc08 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -87,6 +87,7 @@ struct asyncppp {
static int flag_time = HZ;
module_param(flag_time, int, 0);
MODULE_PARM_DESC(flag_time, "ppp_async: interval between flagged packets (in clock ticks)");
+MODULE_DESCRIPTION("PPP async serial channel module");
MODULE_LICENSE("GPL");
MODULE_ALIAS_LDISC(N_PPP);
@@ -460,6 +461,10 @@ ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg)
case PPPIOCSMRU:
if (get_user(val, p))
break;
+ if (val > U16_MAX) {
+ err = -EINVAL;
+ break;
+ }
if (val < PPP_MRU)
val = PPP_MRU;
ap->mru = val;
diff --git a/drivers/net/ppp/ppp_deflate.c b/drivers/net/ppp/ppp_deflate.c
index e6d48e5c65a3..4d2ff63f2ee2 100644
--- a/drivers/net/ppp/ppp_deflate.c
+++ b/drivers/net/ppp/ppp_deflate.c
@@ -630,6 +630,7 @@ static void __exit deflate_cleanup(void)
module_init(deflate_init);
module_exit(deflate_cleanup);
+MODULE_DESCRIPTION("PPP Deflate compression module");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("ppp-compress-" __stringify(CI_DEFLATE));
MODULE_ALIAS("ppp-compress-" __stringify(CI_DEFLATE_DRAFT));
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 0193af2d31c9..fe380fe196e7 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -295,7 +295,9 @@ static void ppp_setup(struct net_device *dev);
static const struct net_device_ops ppp_netdev_ops;
-static struct class *ppp_class;
+static const struct class ppp_class = {
+ .name = "ppp",
+};
/* per net-namespace data */
static inline struct ppp_net *ppp_pernet(struct net *net)
@@ -1394,11 +1396,9 @@ static int __init ppp_init(void)
goto out_net;
}
- ppp_class = class_create("ppp");
- if (IS_ERR(ppp_class)) {
- err = PTR_ERR(ppp_class);
+ err = class_register(&ppp_class);
+ if (err)
goto out_chrdev;
- }
err = rtnl_link_register(&ppp_link_ops);
if (err) {
@@ -1407,12 +1407,12 @@ static int __init ppp_init(void)
}
/* not a big deal if we fail here :-) */
- device_create(ppp_class, NULL, MKDEV(PPP_MAJOR, 0), NULL, "ppp");
+ device_create(&ppp_class, NULL, MKDEV(PPP_MAJOR, 0), NULL, "ppp");
return 0;
out_class:
- class_destroy(ppp_class);
+ class_unregister(&ppp_class);
out_chrdev:
unregister_chrdev(PPP_MAJOR, "ppp");
out_net:
@@ -1607,7 +1607,7 @@ static const struct net_device_ops ppp_netdev_ops = {
.ndo_fill_forward_path = ppp_fill_forward_path,
};
-static struct device_type ppp_type = {
+static const struct device_type ppp_type = {
.name = "ppp",
};
@@ -3549,8 +3549,8 @@ static void __exit ppp_cleanup(void)
pr_err("PPP: removing module but units remain!\n");
rtnl_link_unregister(&ppp_link_ops);
unregister_chrdev(PPP_MAJOR, "ppp");
- device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0));
- class_destroy(ppp_class);
+ device_destroy(&ppp_class, MKDEV(PPP_MAJOR, 0));
+ class_unregister(&ppp_class);
unregister_pernet_device(&ppp_net_ops);
}
@@ -3604,6 +3604,7 @@ EXPORT_SYMBOL(ppp_input_error);
EXPORT_SYMBOL(ppp_output_wakeup);
EXPORT_SYMBOL(ppp_register_compressor);
EXPORT_SYMBOL(ppp_unregister_compressor);
+MODULE_DESCRIPTION("Generic PPP layer driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CHARDEV(PPP_MAJOR, 0);
MODULE_ALIAS_RTNL_LINK("ppp");
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index 52d05ce4a281..45bf59ac8f57 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -724,5 +724,6 @@ ppp_sync_cleanup(void)
module_init(ppp_sync_init);
module_exit(ppp_sync_cleanup);
+MODULE_DESCRIPTION("PPP synchronous TTY channel module");
MODULE_LICENSE("GPL");
MODULE_ALIAS_LDISC(N_SYNC_PPP);
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 8e7238e97d0a..2ea4f4890d23 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1007,26 +1007,21 @@ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
struct sk_buff *skb;
int error = 0;
- if (sk->sk_state & PPPOX_BOUND) {
- error = -EIO;
- goto end;
- }
+ if (sk->sk_state & PPPOX_BOUND)
+ return -EIO;
skb = skb_recv_datagram(sk, flags, &error);
- if (error < 0)
- goto end;
+ if (!skb)
+ return error;
- if (skb) {
- total_len = min_t(size_t, total_len, skb->len);
- error = skb_copy_datagram_msg(skb, 0, m, total_len);
- if (error == 0) {
- consume_skb(skb);
- return total_len;
- }
+ total_len = min_t(size_t, total_len, skb->len);
+ error = skb_copy_datagram_msg(skb, 0, m, total_len);
+ if (error == 0) {
+ consume_skb(skb);
+ return total_len;
}
kfree_skb(skb);
-end:
return error;
}
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index f575f225d417..0a44bbdcfb7b 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -25,7 +25,6 @@
#include <net/genetlink.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
-#include <generated/utsrelease.h>
#include <linux/if_team.h>
#define DRV_NAME "team"
@@ -2074,7 +2073,6 @@ static void team_ethtool_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
strscpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
- strscpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
}
static int team_ethtool_get_link_ksettings(struct net_device *dev,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index afa5497f7c35..0b3f21cba552 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -54,6 +54,7 @@
#include <linux/if_tun.h>
#include <linux/if_vlan.h>
#include <linux/crc32.h>
+#include <linux/math.h>
#include <linux/nsproxy.h>
#include <linux/virtio_net.h>
#include <linux/rcupdate.h>
@@ -77,6 +78,7 @@
#include <net/ax25.h>
#include <net/rose.h>
#include <net/6lowpan.h>
+#include <net/rps.h>
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
@@ -523,8 +525,7 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
{
struct tun_flow_entry *e;
- u32 txq = 0;
- u32 numqueues = 0;
+ u32 txq, numqueues;
numqueues = READ_ONCE(tun->numqueues);
@@ -534,8 +535,7 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
tun_flow_save_rps_rxhash(e, txq);
txq = e->queue_index;
} else {
- /* use multiply and shift instead of expensive divide */
- txq = ((u64)txq * numqueues) >> 32;
+ txq = reciprocal_scale(txq, numqueues);
}
return txq;
@@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->tfiles[tun->numqueues - 1]);
ntfile = rtnl_dereference(tun->tfiles[index]);
ntfile->queue_index = index;
+ ntfile->xdp_rxq.queue_index = index;
rcu_assign_pointer(tun->tfiles[tun->numqueues - 1],
NULL);
@@ -977,20 +978,15 @@ static int tun_net_init(struct net_device *dev)
struct ifreq *ifr = tun->ifr;
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
spin_lock_init(&tun->lock);
err = security_tun_dev_alloc_security(&tun->security);
- if (err < 0) {
- free_percpu(dev->tstats);
+ if (err < 0)
return err;
- }
tun_flow_init(tun);
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
@@ -1008,7 +1004,6 @@ static int tun_net_init(struct net_device *dev)
if (err < 0) {
tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security);
- free_percpu(dev->tstats);
return err;
}
return 0;
@@ -1344,7 +1339,6 @@ static const struct net_device_ops tap_netdev_ops = {
.ndo_select_queue = tun_select_queue,
.ndo_features_check = passthru_features_check,
.ndo_set_rx_headroom = tun_set_headroom,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_bpf = tun_xdp,
.ndo_xdp_xmit = tun_xdp_xmit,
.ndo_change_carrier = tun_net_change_carrier,
@@ -1630,13 +1624,19 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog,
switch (act) {
case XDP_REDIRECT:
err = xdp_do_redirect(tun->dev, xdp, xdp_prog);
- if (err)
+ if (err) {
+ dev_core_stats_rx_dropped_inc(tun->dev);
return err;
+ }
+ dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data);
break;
case XDP_TX:
err = tun_xdp_tx(tun->dev, xdp);
- if (err < 0)
+ if (err < 0) {
+ dev_core_stats_rx_dropped_inc(tun->dev);
return err;
+ }
+ dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data);
break;
case XDP_PASS:
break;
@@ -1921,7 +1921,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
rcu_read_lock();
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
- ret = do_xdp_generic(xdp_prog, skb);
+ ret = do_xdp_generic(xdp_prog, &skb);
if (ret != XDP_PASS) {
rcu_read_unlock();
local_bh_enable();
@@ -2311,7 +2311,6 @@ static void tun_free_netdev(struct net_device *dev)
BUG_ON(!(list_empty(&tun->disabled)));
- free_percpu(dev->tstats);
tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security);
__tun_set_ebpf(tun, &tun->steering_prog, NULL);
@@ -2511,7 +2510,7 @@ build:
skb_record_rx_queue(skb, tfile->queue_index);
if (skb_xdp) {
- ret = do_xdp_generic(xdp_prog, skb);
+ ret = do_xdp_generic(xdp_prog, &skb);
if (ret != XDP_PASS) {
ret = 0;
goto out;
@@ -3638,12 +3637,22 @@ static int tun_set_coalesce(struct net_device *dev,
return 0;
}
+static void tun_get_channels(struct net_device *dev,
+ struct ethtool_channels *channels)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+
+ channels->combined_count = tun->numqueues;
+ channels->max_combined = tun->flags & IFF_MULTI_QUEUE ? MAX_TAP_QUEUES : 1;
+}
+
static const struct ethtool_ops tun_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_RX_MAX_FRAMES,
.get_drvinfo = tun_get_drvinfo,
.get_msglevel = tun_get_msglevel,
.set_msglevel = tun_set_msglevel,
.get_link = ethtool_op_get_link,
+ .get_channels = tun_get_channels,
.get_ts_info = ethtool_op_get_ts_info,
.get_coalesce = tun_get_coalesce,
.set_coalesce = tun_set_coalesce,
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index 3fd7dccf0f9c..3c360d4f0635 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -99,6 +99,7 @@ config USB_RTL8150
config USB_RTL8152
tristate "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters"
select MII
+ select PHYLIB
select CRC32
select CRYPTO
select CRYPTO_HASH
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index d837c1887416..88e084534853 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -667,7 +667,7 @@ static int ax88179_set_link_ksettings(struct net_device *net,
}
static int
-ax88179_ethtool_get_eee(struct usbnet *dev, struct ethtool_eee *data)
+ax88179_ethtool_get_eee(struct usbnet *dev, struct ethtool_keee *data)
{
int val;
@@ -676,29 +676,29 @@ ax88179_ethtool_get_eee(struct usbnet *dev, struct ethtool_eee *data)
MDIO_MMD_PCS);
if (val < 0)
return val;
- data->supported = mmd_eee_cap_to_ethtool_sup_t(val);
+ mii_eee_cap1_mod_linkmode_t(data->supported, val);
/* Get advertisement EEE */
val = ax88179_phy_read_mmd_indirect(dev, MDIO_AN_EEE_ADV,
MDIO_MMD_AN);
if (val < 0)
return val;
- data->advertised = mmd_eee_adv_to_ethtool_adv_t(val);
+ mii_eee_cap1_mod_linkmode_t(data->advertised, val);
/* Get LP advertisement EEE */
val = ax88179_phy_read_mmd_indirect(dev, MDIO_AN_EEE_LPABLE,
MDIO_MMD_AN);
if (val < 0)
return val;
- data->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(val);
+ mii_eee_cap1_mod_linkmode_t(data->lp_advertised, val);
return 0;
}
static int
-ax88179_ethtool_set_eee(struct usbnet *dev, struct ethtool_eee *data)
+ax88179_ethtool_set_eee(struct usbnet *dev, struct ethtool_keee *data)
{
- u16 tmp16 = ethtool_adv_to_mmd_eee_adv_t(data->advertised);
+ u16 tmp16 = linkmode_to_mii_eee_cap1_t(data->advertised);
return ax88179_phy_write_mmd_indirect(dev, MDIO_AN_EEE_ADV,
MDIO_MMD_AN, tmp16);
@@ -807,7 +807,7 @@ static void ax88179_enable_eee(struct usbnet *dev)
GMII_PHY_PAGE_SELECT, 2, &tmp16);
}
-static int ax88179_get_eee(struct net_device *net, struct ethtool_eee *edata)
+static int ax88179_get_eee(struct net_device *net, struct ethtool_keee *edata)
{
struct usbnet *dev = netdev_priv(net);
struct ax88179_data *priv = dev->driver_priv;
@@ -818,7 +818,7 @@ static int ax88179_get_eee(struct net_device *net, struct ethtool_eee *edata)
return ax88179_ethtool_get_eee(dev, edata);
}
-static int ax88179_set_eee(struct net_device *net, struct ethtool_eee *edata)
+static int ax88179_set_eee(struct net_device *net, struct ethtool_keee *edata)
{
struct usbnet *dev = netdev_priv(net);
struct ax88179_data *priv = dev->driver_priv;
@@ -1587,7 +1587,7 @@ static int ax88179_reset(struct usbnet *dev)
u16 *tmp16;
u8 *tmp;
struct ax88179_data *ax179_data = dev->driver_priv;
- struct ethtool_eee eee_data;
+ struct ethtool_keee eee_data;
tmp16 = (u16 *)buf;
tmp = (u8 *)buf;
@@ -1663,7 +1663,7 @@ static int ax88179_reset(struct usbnet *dev)
ax88179_disable_eee(dev);
ax88179_ethtool_get_eee(dev, &eee_data);
- eee_data.advertised = 0;
+ linkmode_zero(eee_data.advertised);
ax88179_ethtool_set_eee(dev, &eee_data);
/* Restart autoneg */
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index cd4083e0b3b9..e13e4920ee9b 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -339,7 +339,7 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci)
in6_dev = in6_dev_get(netdev);
if (!in6_dev)
goto out;
- is_router = !!in6_dev->cnf.forwarding;
+ is_router = !!READ_ONCE(in6_dev->cnf.forwarding);
in6_dev_put(in6_dev);
/* ipv6_stub != NULL if in6_dev_get returned an inet6_dev */
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 99ec1d4a972d..8b6d6a1b3c2e 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc)
err = dm_read_shared_word(dev, 1, loc, &res);
if (err < 0) {
netdev_err(dev->net, "MDIO read error: %d\n", err);
- return err;
+ return 0;
}
netdev_dbg(dev->net,
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index f088ea2ba6f3..1aeb36119d3f 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2465,7 +2465,7 @@ static void hso_create_rfkill(struct hso_device *hso_dev,
}
}
-static struct device_type hso_type = {
+static const struct device_type hso_type = {
.name = "wwan",
};
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index a6d653ff552a..80ee4fcdfb36 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
lan78xx_rx_urb_submit_all(dev);
+ local_bh_disable();
napi_schedule(&dev->napi);
+ local_bh_enable();
}
return 0;
@@ -1673,7 +1675,7 @@ static int lan78xx_set_wol(struct net_device *netdev,
return ret;
}
-static int lan78xx_get_eee(struct net_device *net, struct ethtool_eee *edata)
+static int lan78xx_get_eee(struct net_device *net, struct ethtool_keee *edata)
{
struct lan78xx_net *dev = netdev_priv(net);
struct phy_device *phydev = net->phydev;
@@ -1709,7 +1711,7 @@ exit:
return ret;
}
-static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata)
+static int lan78xx_set_eee(struct net_device *net, struct ethtool_keee *edata)
{
struct lan78xx_net *dev = netdev_priv(net);
int ret;
@@ -3033,7 +3035,8 @@ static int lan78xx_reset(struct lan78xx_net *dev)
if (dev->chipid == ID_REV_CHIP_ID_7801_)
buf &= ~MAC_CR_GMII_EN_;
- if (dev->chipid == ID_REV_CHIP_ID_7800_) {
+ if (dev->chipid == ID_REV_CHIP_ID_7800_ ||
+ dev->chipid == ID_REV_CHIP_ID_7850_) {
ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig);
if (!ret && sig != EEPROM_INDICATOR) {
/* Implies there is no external eeprom. Set mac speed */
@@ -3132,7 +3135,8 @@ static int lan78xx_open(struct net_device *net)
done:
mutex_unlock(&dev->dev_mutex);
- usb_autopm_put_interface(dev->intf);
+ if (ret < 0)
+ usb_autopm_put_interface(dev->intf);
return ret;
}
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 0d0672d2a654..5d6aeb086fc7 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -10,6 +10,7 @@
#include <linux/etherdevice.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
+#include <linux/phy.h>
#include <linux/usb.h>
#include <linux/crc32.h>
#include <linux/if_vlan.h>
@@ -891,8 +892,8 @@ struct r8152 {
void (*up)(struct r8152 *tp);
void (*down)(struct r8152 *tp);
void (*unload)(struct r8152 *tp);
- int (*eee_get)(struct r8152 *tp, struct ethtool_eee *eee);
- int (*eee_set)(struct r8152 *tp, struct ethtool_eee *eee);
+ int (*eee_get)(struct r8152 *tp, struct ethtool_keee *eee);
+ int (*eee_set)(struct r8152 *tp, struct ethtool_keee *eee);
bool (*in_nway)(struct r8152 *tp);
void (*hw_phy_cfg)(struct r8152 *tp);
void (*autosuspend_en)(struct r8152 *tp, bool enable);
@@ -8922,32 +8923,31 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data)
}
}
-static int r8152_get_eee(struct r8152 *tp, struct ethtool_eee *eee)
+static int r8152_get_eee(struct r8152 *tp, struct ethtool_keee *eee)
{
- u32 lp, adv, supported = 0;
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(common);
u16 val;
val = r8152_mmd_read(tp, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
- supported = mmd_eee_cap_to_ethtool_sup_t(val);
+ mii_eee_cap1_mod_linkmode_t(eee->supported, val);
val = r8152_mmd_read(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
- adv = mmd_eee_adv_to_ethtool_adv_t(val);
+ mii_eee_cap1_mod_linkmode_t(eee->advertised, val);
val = r8152_mmd_read(tp, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE);
- lp = mmd_eee_adv_to_ethtool_adv_t(val);
+ mii_eee_cap1_mod_linkmode_t(eee->lp_advertised, val);
eee->eee_enabled = tp->eee_en;
- eee->eee_active = !!(supported & adv & lp);
- eee->supported = supported;
- eee->advertised = tp->eee_adv;
- eee->lp_advertised = lp;
+
+ linkmode_and(common, eee->advertised, eee->lp_advertised);
+ eee->eee_active = phy_check_valid(tp->speed, tp->duplex, common);
return 0;
}
-static int r8152_set_eee(struct r8152 *tp, struct ethtool_eee *eee)
+static int r8152_set_eee(struct r8152 *tp, struct ethtool_keee *eee)
{
- u16 val = ethtool_adv_to_mmd_eee_adv_t(eee->advertised);
+ u16 val = linkmode_to_mii_eee_cap1_t(eee->advertised);
tp->eee_en = eee->eee_enabled;
tp->eee_adv = val;
@@ -8957,31 +8957,30 @@ static int r8152_set_eee(struct r8152 *tp, struct ethtool_eee *eee)
return 0;
}
-static int r8153_get_eee(struct r8152 *tp, struct ethtool_eee *eee)
+static int r8153_get_eee(struct r8152 *tp, struct ethtool_keee *eee)
{
- u32 lp, adv, supported = 0;
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(common);
u16 val;
val = ocp_reg_read(tp, OCP_EEE_ABLE);
- supported = mmd_eee_cap_to_ethtool_sup_t(val);
+ mii_eee_cap1_mod_linkmode_t(eee->supported, val);
val = ocp_reg_read(tp, OCP_EEE_ADV);
- adv = mmd_eee_adv_to_ethtool_adv_t(val);
+ mii_eee_cap1_mod_linkmode_t(eee->advertised, val);
val = ocp_reg_read(tp, OCP_EEE_LPABLE);
- lp = mmd_eee_adv_to_ethtool_adv_t(val);
+ mii_eee_cap1_mod_linkmode_t(eee->lp_advertised, val);
eee->eee_enabled = tp->eee_en;
- eee->eee_active = !!(supported & adv & lp);
- eee->supported = supported;
- eee->advertised = tp->eee_adv;
- eee->lp_advertised = lp;
+
+ linkmode_and(common, eee->advertised, eee->lp_advertised);
+ eee->eee_active = phy_check_valid(tp->speed, tp->duplex, common);
return 0;
}
static int
-rtl_ethtool_get_eee(struct net_device *net, struct ethtool_eee *edata)
+rtl_ethtool_get_eee(struct net_device *net, struct ethtool_keee *edata)
{
struct r8152 *tp = netdev_priv(net);
int ret;
@@ -9008,7 +9007,7 @@ out:
}
static int
-rtl_ethtool_set_eee(struct net_device *net, struct ethtool_eee *edata)
+rtl_ethtool_set_eee(struct net_device *net, struct ethtool_keee *edata)
{
struct r8152 *tp = netdev_priv(net);
int ret;
@@ -10078,7 +10077,7 @@ static int rtl8152_cfgselector_choose_configuration(struct usb_device *udev)
* driver supports it.
*/
if (__rtl_get_hw_ver(udev) == RTL_VER_UNKNOWN)
- return 0;
+ return -ENODEV;
/* The vendor mode is not always config #1, so to find it out. */
c = udev->config;
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index a530f20ee257..2fa46baa589e 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -2105,6 +2105,11 @@ static const struct usb_device_id products[] = {
.driver_info = (unsigned long) &smsc95xx_info,
},
{
+ /* SYSTEC USB-SPEmodule1 10BASE-T1L Ethernet Device */
+ USB_DEVICE(0x0878, 0x1400),
+ .driver_info = (unsigned long)&smsc95xx_info,
+ },
+ {
/* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */
USB_DEVICE(0x184F, 0x0051),
.driver_info = (unsigned long)&smsc95xx_info,
diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c
index 143bd4ab160d..57947a5590cc 100644
--- a/drivers/net/usb/sr9800.c
+++ b/drivers/net/usb/sr9800.c
@@ -737,7 +737,9 @@ static int sr9800_bind(struct usbnet *dev, struct usb_interface *intf)
data->eeprom_len = SR9800_EEPROM_LEN;
- usbnet_get_endpoints(dev, intf);
+ ret = usbnet_get_endpoints(dev, intf);
+ if (ret)
+ goto out;
/* LED Setting Rule :
* AABB:CCDD
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 2d14b0d78541..e84efa661589 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1633,7 +1633,6 @@ void usbnet_disconnect (struct usb_interface *intf)
usb_free_urb(dev->interrupt);
kfree(dev->padding_pkt);
- free_percpu(net->tstats);
free_netdev(net);
}
EXPORT_SYMBOL_GPL(usbnet_disconnect);
@@ -1645,7 +1644,6 @@ static const struct net_device_ops usbnet_netdev_ops = {
.ndo_tx_timeout = usbnet_tx_timeout,
.ndo_set_rx_mode = usbnet_set_rx_mode,
.ndo_change_mtu = usbnet_change_mtu,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
};
@@ -1654,11 +1652,11 @@ static const struct net_device_ops usbnet_netdev_ops = {
// precondition: never called in_interrupt
-static struct device_type wlan_type = {
+static const struct device_type wlan_type = {
.name = "wlan",
};
-static struct device_type wwan_type = {
+static const struct device_type wwan_type = {
.name = "wwan",
};
@@ -1710,10 +1708,6 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
dev->rx_speed = SPEED_UNSET;
dev->tx_speed = SPEED_UNSET;
- net->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!net->tstats)
- goto out0;
-
dev->msg_enable = netif_msg_init (msg_level, NETIF_MSG_DRV
| NETIF_MSG_PROBE | NETIF_MSG_LINK);
init_waitqueue_head(&dev->wait);
@@ -1743,6 +1737,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
net->netdev_ops = &usbnet_netdev_ops;
net->watchdog_timeo = TX_TIMEOUT_JIFFIES;
net->ethtool_ops = &usbnet_ethtool_ops;
+ net->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
// allow device-specific bind/init procedures
// NOTE net->name still not usable ...
@@ -1861,8 +1856,6 @@ out1:
*/
cancel_work_sync(&dev->kevent);
del_timer_sync(&dev->delay);
- free_percpu(net->tstats);
-out0:
free_netdev(net);
out:
return status;
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 578e36ea1589..13d902462d8e 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -729,80 +729,10 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
if (skb_shared(skb) || skb_head_is_locked(skb) ||
skb_shinfo(skb)->nr_frags ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- u32 size, len, max_head_size, off, truesize, page_offset;
- struct sk_buff *nskb;
- struct page *page;
- int i, head_off;
- void *va;
-
- /* We need a private copy of the skb and data buffers since
- * the ebpf program can modify it. We segment the original skb
- * into order-0 pages without linearize it.
- *
- * Make sure we have enough space for linear and paged area
- */
- max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE -
- VETH_XDP_HEADROOM);
- if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size)
- goto drop;
-
- size = min_t(u32, skb->len, max_head_size);
- truesize = SKB_HEAD_ALIGN(size) + VETH_XDP_HEADROOM;
-
- /* Allocate skb head */
- va = page_pool_dev_alloc_va(rq->page_pool, &truesize);
- if (!va)
- goto drop;
-
- nskb = napi_build_skb(va, truesize);
- if (!nskb) {
- page_pool_free_va(rq->page_pool, va, true);
+ if (skb_pp_cow_data(rq->page_pool, pskb, XDP_PACKET_HEADROOM))
goto drop;
- }
-
- skb_reserve(nskb, VETH_XDP_HEADROOM);
- skb_copy_header(nskb, skb);
- skb_mark_for_recycle(nskb);
-
- if (skb_copy_bits(skb, 0, nskb->data, size)) {
- consume_skb(nskb);
- goto drop;
- }
- skb_put(nskb, size);
- head_off = skb_headroom(nskb) - skb_headroom(skb);
- skb_headers_offset_update(nskb, head_off);
-
- /* Allocate paged area of new skb */
- off = size;
- len = skb->len - off;
-
- for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
- size = min_t(u32, len, PAGE_SIZE);
- truesize = size;
-
- page = page_pool_dev_alloc(rq->page_pool, &page_offset,
- &truesize);
- if (!page) {
- consume_skb(nskb);
- goto drop;
- }
-
- skb_add_rx_frag(nskb, i, page, page_offset, size,
- truesize);
- if (skb_copy_bits(skb, off,
- page_address(page) + page_offset,
- size)) {
- consume_skb(nskb);
- goto drop;
- }
-
- len -= size;
- off += size;
- }
-
- consume_skb(skb);
- skb = nskb;
+ skb = *pskb;
}
/* SKB "head" area always have tailroom for skb_shared_info */
@@ -1208,14 +1138,6 @@ static int veth_enable_xdp(struct net_device *dev)
veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
return err;
}
-
- if (!veth_gro_requested(dev)) {
- /* user-space did not require GRO, but adding XDP
- * is supposed to get GRO working
- */
- dev->features |= NETIF_F_GRO;
- netdev_features_change(dev);
- }
}
}
@@ -1235,18 +1157,9 @@ static void veth_disable_xdp(struct net_device *dev)
for (i = 0; i < dev->real_num_rx_queues; i++)
rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
- if (!netif_running(dev) || !veth_gro_requested(dev)) {
+ if (!netif_running(dev) || !veth_gro_requested(dev))
veth_napi_del(dev);
- /* if user-space did not require GRO, since adding XDP
- * enabled it, clear it now
- */
- if (!veth_gro_requested(dev) && netif_running(dev)) {
- dev->features &= ~NETIF_F_GRO;
- netdev_features_change(dev);
- }
- }
-
veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
}
@@ -1478,7 +1391,8 @@ static int veth_alloc_queues(struct net_device *dev)
struct veth_priv *priv = netdev_priv(dev);
int i;
- priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT);
+ priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq),
+ GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!priv->rq)
return -ENOMEM;
@@ -1494,11 +1408,12 @@ static void veth_free_queues(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
- kfree(priv->rq);
+ kvfree(priv->rq);
}
static int veth_dev_init(struct net_device *dev)
{
+ netdev_lockdep_set_classes(dev);
return veth_alloc_queues(dev);
}
@@ -1530,7 +1445,7 @@ static int veth_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(priv->peer);
- iflink = peer ? peer->ifindex : 0;
+ iflink = peer ? READ_ONCE(peer->ifindex) : 0;
rcu_read_unlock();
return iflink;
@@ -1654,6 +1569,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
}
if (!old_prog) {
+ if (!veth_gro_requested(dev)) {
+ /* user-space did not require GRO, but adding
+ * XDP is supposed to get GRO working
+ */
+ dev->features |= NETIF_F_GRO;
+ netdev_features_change(dev);
+ }
+
peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
peer->max_mtu = max_mtu;
}
@@ -1669,6 +1592,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
if (dev->flags & IFF_UP)
veth_disable_xdp(dev);
+ /* if user-space did not require GRO, since adding XDP
+ * enabled it, clear it now
+ */
+ if (!veth_gro_requested(dev)) {
+ dev->features &= ~NETIF_F_GRO;
+ netdev_features_change(dev);
+ }
+
if (peer) {
peer->hw_features |= NETIF_F_GSO_SOFTWARE;
peer->max_mtu = ETH_MAX_MTU;
diff --git a/drivers/net/vsockmon.c b/drivers/net/vsockmon.c
index b1bb1b04b664..a1ba5169ed5d 100644
--- a/drivers/net/vsockmon.c
+++ b/drivers/net/vsockmon.c
@@ -13,19 +13,6 @@
#define DEFAULT_MTU (VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + \
sizeof(struct af_vsockmon_hdr))
-static int vsockmon_dev_init(struct net_device *dev)
-{
- dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
- if (!dev->lstats)
- return -ENOMEM;
- return 0;
-}
-
-static void vsockmon_dev_uninit(struct net_device *dev)
-{
- free_percpu(dev->lstats);
-}
-
struct vsockmon {
struct vsock_tap vt;
};
@@ -59,9 +46,6 @@ static void
vsockmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
dev_lstats_read(dev, &stats->rx_packets, &stats->rx_bytes);
-
- stats->tx_packets = 0;
- stats->tx_bytes = 0;
}
static int vsockmon_is_valid_mtu(int new_mtu)
@@ -79,8 +63,6 @@ static int vsockmon_change_mtu(struct net_device *dev, int new_mtu)
}
static const struct net_device_ops vsockmon_ops = {
- .ndo_init = vsockmon_dev_init,
- .ndo_uninit = vsockmon_dev_uninit,
.ndo_open = vsockmon_open,
.ndo_stop = vsockmon_close,
.ndo_start_xmit = vsockmon_xmit,
@@ -112,6 +94,7 @@ static void vsockmon_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->mtu = DEFAULT_MTU;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
}
static struct rtnl_link_ops vsockmon_link_ops __read_mostly = {
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 16106e088c63..3495591a5c29 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2841,26 +2841,19 @@ static int vxlan_init(struct net_device *dev)
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
vxlan_vnigroup_init(vxlan);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats) {
- err = -ENOMEM;
- goto err_vnigroup_uninit;
- }
-
err = gro_cells_init(&vxlan->gro_cells, dev);
if (err)
- goto err_free_percpu;
+ goto err_vnigroup_uninit;
err = vxlan_mdb_init(vxlan);
if (err)
goto err_gro_cells_destroy;
+ netdev_lockdep_set_classes(dev);
return 0;
err_gro_cells_destroy:
gro_cells_destroy(&vxlan->gro_cells);
-err_free_percpu:
- free_percpu(dev->tstats);
err_vnigroup_uninit:
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
vxlan_vnigroup_uninit(vxlan);
@@ -2891,8 +2884,6 @@ static void vxlan_uninit(struct net_device *dev)
gro_cells_destroy(&vxlan->gro_cells);
vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
-
- free_percpu(dev->tstats);
}
/* Start ageing timer and join group when device is brought up */
@@ -3223,7 +3214,6 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
.ndo_open = vxlan_open,
.ndo_stop = vxlan_stop,
.ndo_start_xmit = vxlan_xmit,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_set_rx_mode = vxlan_set_multicast_list,
.ndo_change_mtu = vxlan_change_mtu,
.ndo_validate_addr = eth_validate_addr,
@@ -3247,13 +3237,12 @@ static const struct net_device_ops vxlan_netdev_raw_ops = {
.ndo_open = vxlan_open,
.ndo_stop = vxlan_stop,
.ndo_start_xmit = vxlan_xmit,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_change_mtu = vxlan_change_mtu,
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};
/* Info for udev, that this is a virtual tunnel endpoint */
-static struct device_type vxlan_type = {
+static const struct device_type vxlan_type = {
.name = "vxlan",
};
@@ -3315,6 +3304,7 @@ static void vxlan_setup(struct net_device *dev)
dev->min_mtu = ETH_MIN_MTU;
dev->max_mtu = ETH_MAX_MTU;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
INIT_LIST_HEAD(&vxlan->next);
timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
@@ -4826,55 +4816,43 @@ static __net_init int vxlan_init_net(struct net *net)
NULL);
}
-static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
+static void __net_exit vxlan_destroy_tunnels(struct vxlan_net *vn,
+ struct list_head *dev_to_kill)
{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan, *next;
- struct net_device *dev, *aux;
-
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == &vxlan_link_ops)
- unregister_netdevice_queue(dev, head);
-
- list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
- /* If vxlan->dev is in the same netns, it has already been added
- * to the list by the previous loop.
- */
- if (!net_eq(dev_net(vxlan->dev), net))
- unregister_netdevice_queue(vxlan->dev, head);
- }
+ list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next)
+ vxlan_dellink(vxlan->dev, dev_to_kill);
}
-static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
+static void __net_exit vxlan_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- unsigned int h;
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
- }
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list)
- vxlan_destroy_tunnels(net, &list);
+ __unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ vxlan_destroy_tunnels(vn, dev_to_kill);
+ }
+}
- list_for_each_entry(net, net_list, exit_list) {
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+static void __net_exit vxlan_exit_net(struct net *net)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ unsigned int h;
- for (h = 0; h < PORT_HASH_SIZE; ++h)
- WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
- }
+ for (h = 0; h < PORT_HASH_SIZE; ++h)
+ WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
}
static struct pernet_operations vxlan_net_ops = {
.init = vxlan_init_net,
- .exit_batch = vxlan_exit_batch_net,
+ .exit_batch_rtnl = vxlan_exit_batch_rtnl,
+ .exit = vxlan_exit_net,
.id = &vxlan_net_id,
.size = sizeof(struct vxlan_net),
};
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index 7dda87756d3f..31ab2136cdf1 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -197,6 +197,18 @@ config FARSYNC
To compile this driver as a module, choose M here: the
module will be called farsync.
+config FSL_QMC_HDLC
+ tristate "Freescale QMC HDLC support"
+ depends on HDLC
+ depends on CPM_QMC
+ help
+ HDLC support using the Freescale QUICC Multichannel Controller (QMC).
+
+ To compile this driver as a module, choose M here: the
+ module will be called fsl_qmc_hdlc.
+
+ If unsure, say N.
+
config FSL_UCC_HDLC
tristate "Freescale QUICC Engine HDLC support"
depends on HDLC
diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile
index 8119b49d1da9..00e9b7ee1e01 100644
--- a/drivers/net/wan/Makefile
+++ b/drivers/net/wan/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_WANXL) += wanxl.o
obj-$(CONFIG_PCI200SYN) += pci200syn.o
obj-$(CONFIG_PC300TOO) += pc300too.o
obj-$(CONFIG_IXP4XX_HSS) += ixp4xx_hss.o
+obj-$(CONFIG_FSL_QMC_HDLC) += fsl_qmc_hdlc.o
obj-$(CONFIG_FSL_UCC_HDLC) += fsl_ucc_hdlc.o
obj-$(CONFIG_SLIC_DS26522) += slic_ds26522.o
diff --git a/drivers/net/wan/framer/framer-core.c b/drivers/net/wan/framer/framer-core.c
index c04dc88bda6c..f547c22e26ac 100644
--- a/drivers/net/wan/framer/framer-core.c
+++ b/drivers/net/wan/framer/framer-core.c
@@ -18,7 +18,12 @@
#include <linux/regulator/consumer.h>
#include <linux/slab.h>
-static struct class *framer_class;
+static void framer_release(struct device *dev);
+static const struct class framer_class = {
+ .name = "framer",
+ .dev_release = framer_release,
+};
+
static DEFINE_MUTEX(framer_provider_mutex);
static LIST_HEAD(framer_provider_list);
static DEFINE_IDA(framer_ida);
@@ -384,7 +389,7 @@ static struct framer_provider *framer_provider_of_lookup(const struct device_nod
return ERR_PTR(-EPROBE_DEFER);
}
-static struct framer *framer_of_get_from_provider(struct of_phandle_args *args)
+static struct framer *framer_of_get_from_provider(const struct of_phandle_args *args)
{
struct framer_provider *framer_provider;
struct framer *framer;
@@ -627,7 +632,7 @@ struct framer *framer_create(struct device *dev, struct device_node *node,
INIT_DELAYED_WORK(&framer->polling_work, framer_polling_work);
BLOCKING_INIT_NOTIFIER_HEAD(&framer->notifier_list);
- framer->dev.class = framer_class;
+ framer->dev.class = &framer_class;
framer->dev.parent = dev;
framer->dev.of_node = node ? node : dev->of_node;
framer->id = id;
@@ -735,12 +740,13 @@ EXPORT_SYMBOL_GPL(devm_framer_create);
* should provide a custom of_xlate function that reads the *args* and returns
* the appropriate framer.
*/
-struct framer *framer_provider_simple_of_xlate(struct device *dev, struct of_phandle_args *args)
+struct framer *framer_provider_simple_of_xlate(struct device *dev,
+ const struct of_phandle_args *args)
{
struct class_dev_iter iter;
struct framer *framer;
- class_dev_iter_init(&iter, framer_class, NULL, NULL);
+ class_dev_iter_init(&iter, &framer_class, NULL, NULL);
while ((dev = class_dev_iter_next(&iter))) {
framer = dev_to_framer(dev);
if (args->np != framer->dev.of_node)
@@ -768,7 +774,7 @@ EXPORT_SYMBOL_GPL(framer_provider_simple_of_xlate);
struct framer_provider *
__framer_provider_of_register(struct device *dev, struct module *owner,
struct framer *(*of_xlate)(struct device *dev,
- struct of_phandle_args *args))
+ const struct of_phandle_args *args))
{
struct framer_provider *framer_provider;
@@ -830,7 +836,7 @@ static void devm_framer_provider_of_unregister(struct device *dev, void *res)
struct framer_provider *
__devm_framer_provider_of_register(struct device *dev, struct module *owner,
struct framer *(*of_xlate)(struct device *dev,
- struct of_phandle_args *args))
+ const struct of_phandle_args *args))
{
struct framer_provider **ptr, *framer_provider;
@@ -869,14 +875,6 @@ static void framer_release(struct device *dev)
static int __init framer_core_init(void)
{
- framer_class = class_create("framer");
- if (IS_ERR(framer_class)) {
- pr_err("failed to create framer class (%pe)\n", framer_class);
- return PTR_ERR(framer_class);
- }
-
- framer_class->dev_release = framer_release;
-
- return 0;
+ return class_register(&framer_class);
}
device_initcall(framer_core_init);
diff --git a/drivers/net/wan/framer/pef2256/pef2256.c b/drivers/net/wan/framer/pef2256/pef2256.c
index 4f81053ee4f0..413a3c1d15bb 100644
--- a/drivers/net/wan/framer/pef2256/pef2256.c
+++ b/drivers/net/wan/framer/pef2256/pef2256.c
@@ -838,7 +838,7 @@ static int pef2256_probe(struct platform_device *pdev)
return 0;
}
-static int pef2256_remove(struct platform_device *pdev)
+static void pef2256_remove(struct platform_device *pdev)
{
struct pef2256 *pef2256 = platform_get_drvdata(pdev);
@@ -849,8 +849,6 @@ static int pef2256_remove(struct platform_device *pdev)
pef2256_write8(pef2256, PEF2256_IMR3, 0xff);
pef2256_write8(pef2256, PEF2256_IMR4, 0xff);
pef2256_write8(pef2256, PEF2256_IMR5, 0xff);
-
- return 0;
}
static const struct of_device_id pef2256_id_table[] = {
@@ -865,7 +863,7 @@ static struct platform_driver pef2256_driver = {
.of_match_table = pef2256_id_table,
},
.probe = pef2256_probe,
- .remove = pef2256_remove,
+ .remove_new = pef2256_remove,
};
module_platform_driver(pef2256_driver);
diff --git a/drivers/net/wan/fsl_qmc_hdlc.c b/drivers/net/wan/fsl_qmc_hdlc.c
new file mode 100644
index 000000000000..960371df470a
--- /dev/null
+++ b/drivers/net/wan/fsl_qmc_hdlc.c
@@ -0,0 +1,797 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale QMC HDLC Device Driver
+ *
+ * Copyright 2023 CS GROUP France
+ *
+ * Author: Herve Codina <herve.codina@bootlin.com>
+ */
+
+#include <linux/array_size.h>
+#include <linux/bug.h>
+#include <linux/cleanup.h>
+#include <linux/bitmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/framer/framer.h>
+#include <linux/hdlc.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <soc/fsl/qe/qmc.h>
+
+struct qmc_hdlc_desc {
+ struct net_device *netdev;
+ struct sk_buff *skb; /* NULL if the descriptor is not in use */
+ dma_addr_t dma_addr;
+ size_t dma_size;
+};
+
+struct qmc_hdlc {
+ struct device *dev;
+ struct qmc_chan *qmc_chan;
+ struct net_device *netdev;
+ struct framer *framer;
+ spinlock_t carrier_lock; /* Protect carrier detection */
+ struct notifier_block nb;
+ bool is_crc32;
+ spinlock_t tx_lock; /* Protect tx descriptors */
+ struct qmc_hdlc_desc tx_descs[8];
+ unsigned int tx_out;
+ struct qmc_hdlc_desc rx_descs[4];
+ u32 slot_map;
+};
+
+static struct qmc_hdlc *netdev_to_qmc_hdlc(struct net_device *netdev)
+{
+ return dev_to_hdlc(netdev)->priv;
+}
+
+static int qmc_hdlc_framer_set_carrier(struct qmc_hdlc *qmc_hdlc)
+{
+ struct framer_status framer_status;
+ int ret;
+
+ if (!qmc_hdlc->framer)
+ return 0;
+
+ guard(spinlock_irqsave)(&qmc_hdlc->carrier_lock);
+
+ ret = framer_get_status(qmc_hdlc->framer, &framer_status);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "get framer status failed (%d)\n", ret);
+ return ret;
+ }
+ if (framer_status.link_is_on)
+ netif_carrier_on(qmc_hdlc->netdev);
+ else
+ netif_carrier_off(qmc_hdlc->netdev);
+
+ return 0;
+}
+
+static int qmc_hdlc_framer_notifier(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct qmc_hdlc *qmc_hdlc = container_of(nb, struct qmc_hdlc, nb);
+ int ret;
+
+ if (action != FRAMER_EVENT_STATUS)
+ return NOTIFY_DONE;
+
+ ret = qmc_hdlc_framer_set_carrier(qmc_hdlc);
+ return ret ? NOTIFY_DONE : NOTIFY_OK;
+}
+
+static int qmc_hdlc_framer_start(struct qmc_hdlc *qmc_hdlc)
+{
+ struct framer_status framer_status;
+ int ret;
+
+ if (!qmc_hdlc->framer)
+ return 0;
+
+ ret = framer_power_on(qmc_hdlc->framer);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "framer power-on failed (%d)\n", ret);
+ return ret;
+ }
+
+ /* Be sure that get_status is supported */
+ ret = framer_get_status(qmc_hdlc->framer, &framer_status);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "get framer status failed (%d)\n", ret);
+ goto framer_power_off;
+ }
+
+ qmc_hdlc->nb.notifier_call = qmc_hdlc_framer_notifier;
+ ret = framer_notifier_register(qmc_hdlc->framer, &qmc_hdlc->nb);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "framer notifier register failed (%d)\n", ret);
+ goto framer_power_off;
+ }
+
+ return 0;
+
+framer_power_off:
+ framer_power_off(qmc_hdlc->framer);
+ return ret;
+}
+
+static void qmc_hdlc_framer_stop(struct qmc_hdlc *qmc_hdlc)
+{
+ if (!qmc_hdlc->framer)
+ return;
+
+ framer_notifier_unregister(qmc_hdlc->framer, &qmc_hdlc->nb);
+ framer_power_off(qmc_hdlc->framer);
+}
+
+static int qmc_hdlc_framer_set_iface(struct qmc_hdlc *qmc_hdlc, int if_iface,
+ const te1_settings *te1)
+{
+ struct framer_config config;
+ int ret;
+
+ if (!qmc_hdlc->framer)
+ return 0;
+
+ ret = framer_get_config(qmc_hdlc->framer, &config);
+ if (ret)
+ return ret;
+
+ switch (if_iface) {
+ case IF_IFACE_E1:
+ config.iface = FRAMER_IFACE_E1;
+ break;
+ case IF_IFACE_T1:
+ config.iface = FRAMER_IFACE_T1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (te1->clock_type) {
+ case CLOCK_DEFAULT:
+ /* Keep current value */
+ break;
+ case CLOCK_EXT:
+ config.clock_type = FRAMER_CLOCK_EXT;
+ break;
+ case CLOCK_INT:
+ config.clock_type = FRAMER_CLOCK_INT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ config.line_clock_rate = te1->clock_rate;
+
+ return framer_set_config(qmc_hdlc->framer, &config);
+}
+
+static int qmc_hdlc_framer_get_iface(struct qmc_hdlc *qmc_hdlc, int *if_iface, te1_settings *te1)
+{
+ struct framer_config config;
+ int ret;
+
+ if (!qmc_hdlc->framer) {
+ *if_iface = IF_IFACE_E1;
+ return 0;
+ }
+
+ ret = framer_get_config(qmc_hdlc->framer, &config);
+ if (ret)
+ return ret;
+
+ switch (config.iface) {
+ case FRAMER_IFACE_E1:
+ *if_iface = IF_IFACE_E1;
+ break;
+ case FRAMER_IFACE_T1:
+ *if_iface = IF_IFACE_T1;
+ break;
+ }
+
+ if (!te1)
+ return 0; /* Only iface type requested */
+
+ switch (config.clock_type) {
+ case FRAMER_CLOCK_EXT:
+ te1->clock_type = CLOCK_EXT;
+ break;
+ case FRAMER_CLOCK_INT:
+ te1->clock_type = CLOCK_INT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ te1->clock_rate = config.line_clock_rate;
+ return 0;
+}
+
+static int qmc_hdlc_framer_init(struct qmc_hdlc *qmc_hdlc)
+{
+ int ret;
+
+ if (!qmc_hdlc->framer)
+ return 0;
+
+ ret = framer_init(qmc_hdlc->framer);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "framer init failed (%d)\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void qmc_hdlc_framer_exit(struct qmc_hdlc *qmc_hdlc)
+{
+ if (!qmc_hdlc->framer)
+ return;
+
+ framer_exit(qmc_hdlc->framer);
+}
+
+static int qmc_hdlc_recv_queue(struct qmc_hdlc *qmc_hdlc, struct qmc_hdlc_desc *desc, size_t size);
+
+#define QMC_HDLC_RX_ERROR_FLAGS \
+ (QMC_RX_FLAG_HDLC_OVF | QMC_RX_FLAG_HDLC_UNA | \
+ QMC_RX_FLAG_HDLC_CRC | QMC_RX_FLAG_HDLC_ABORT)
+
+static void qmc_hcld_recv_complete(void *context, size_t length, unsigned int flags)
+{
+ struct qmc_hdlc_desc *desc = context;
+ struct net_device *netdev;
+ struct qmc_hdlc *qmc_hdlc;
+ int ret;
+
+ netdev = desc->netdev;
+ qmc_hdlc = netdev_to_qmc_hdlc(netdev);
+
+ dma_unmap_single(qmc_hdlc->dev, desc->dma_addr, desc->dma_size, DMA_FROM_DEVICE);
+
+ if (flags & QMC_HDLC_RX_ERROR_FLAGS) {
+ netdev->stats.rx_errors++;
+ if (flags & QMC_RX_FLAG_HDLC_OVF) /* Data overflow */
+ netdev->stats.rx_over_errors++;
+ if (flags & QMC_RX_FLAG_HDLC_UNA) /* bits received not multiple of 8 */
+ netdev->stats.rx_frame_errors++;
+ if (flags & QMC_RX_FLAG_HDLC_ABORT) /* Received an abort sequence */
+ netdev->stats.rx_frame_errors++;
+ if (flags & QMC_RX_FLAG_HDLC_CRC) /* CRC error */
+ netdev->stats.rx_crc_errors++;
+ kfree_skb(desc->skb);
+ } else {
+ netdev->stats.rx_packets++;
+ netdev->stats.rx_bytes += length;
+
+ skb_put(desc->skb, length);
+ desc->skb->protocol = hdlc_type_trans(desc->skb, netdev);
+ netif_rx(desc->skb);
+ }
+
+ /* Re-queue a transfer using the same descriptor */
+ ret = qmc_hdlc_recv_queue(qmc_hdlc, desc, desc->dma_size);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "queue recv desc failed (%d)\n", ret);
+ netdev->stats.rx_errors++;
+ }
+}
+
+static int qmc_hdlc_recv_queue(struct qmc_hdlc *qmc_hdlc, struct qmc_hdlc_desc *desc, size_t size)
+{
+ int ret;
+
+ desc->skb = dev_alloc_skb(size);
+ if (!desc->skb)
+ return -ENOMEM;
+
+ desc->dma_size = size;
+ desc->dma_addr = dma_map_single(qmc_hdlc->dev, desc->skb->data,
+ desc->dma_size, DMA_FROM_DEVICE);
+ ret = dma_mapping_error(qmc_hdlc->dev, desc->dma_addr);
+ if (ret)
+ goto free_skb;
+
+ ret = qmc_chan_read_submit(qmc_hdlc->qmc_chan, desc->dma_addr, desc->dma_size,
+ qmc_hcld_recv_complete, desc);
+ if (ret)
+ goto dma_unmap;
+
+ return 0;
+
+dma_unmap:
+ dma_unmap_single(qmc_hdlc->dev, desc->dma_addr, desc->dma_size, DMA_FROM_DEVICE);
+free_skb:
+ kfree_skb(desc->skb);
+ desc->skb = NULL;
+ return ret;
+}
+
+static void qmc_hdlc_xmit_complete(void *context)
+{
+ struct qmc_hdlc_desc *desc = context;
+ struct net_device *netdev;
+ struct qmc_hdlc *qmc_hdlc;
+ struct sk_buff *skb;
+
+ netdev = desc->netdev;
+ qmc_hdlc = netdev_to_qmc_hdlc(netdev);
+
+ scoped_guard(spinlock_irqsave, &qmc_hdlc->tx_lock) {
+ dma_unmap_single(qmc_hdlc->dev, desc->dma_addr, desc->dma_size, DMA_TO_DEVICE);
+ skb = desc->skb;
+ desc->skb = NULL; /* Release the descriptor */
+ if (netif_queue_stopped(netdev))
+ netif_wake_queue(netdev);
+ }
+
+ netdev->stats.tx_packets++;
+ netdev->stats.tx_bytes += skb->len;
+
+ dev_consume_skb_any(skb);
+}
+
+static int qmc_hdlc_xmit_queue(struct qmc_hdlc *qmc_hdlc, struct qmc_hdlc_desc *desc)
+{
+ int ret;
+
+ desc->dma_addr = dma_map_single(qmc_hdlc->dev, desc->skb->data,
+ desc->dma_size, DMA_TO_DEVICE);
+ ret = dma_mapping_error(qmc_hdlc->dev, desc->dma_addr);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "failed to map skb\n");
+ return ret;
+ }
+
+ ret = qmc_chan_write_submit(qmc_hdlc->qmc_chan, desc->dma_addr, desc->dma_size,
+ qmc_hdlc_xmit_complete, desc);
+ if (ret) {
+ dma_unmap_single(qmc_hdlc->dev, desc->dma_addr, desc->dma_size, DMA_TO_DEVICE);
+ dev_err(qmc_hdlc->dev, "qmc chan write returns %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static netdev_tx_t qmc_hdlc_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct qmc_hdlc *qmc_hdlc = netdev_to_qmc_hdlc(netdev);
+ struct qmc_hdlc_desc *desc;
+ int err;
+
+ guard(spinlock_irqsave)(&qmc_hdlc->tx_lock);
+
+ desc = &qmc_hdlc->tx_descs[qmc_hdlc->tx_out];
+ if (WARN_ONCE(desc->skb, "No tx descriptors available\n")) {
+ /* Should never happen.
+ * Previous xmit should have already stopped the queue.
+ */
+ netif_stop_queue(netdev);
+ return NETDEV_TX_BUSY;
+ }
+
+ desc->netdev = netdev;
+ desc->dma_size = skb->len;
+ desc->skb = skb;
+ err = qmc_hdlc_xmit_queue(qmc_hdlc, desc);
+ if (err) {
+ desc->skb = NULL; /* Release the descriptor */
+ if (err == -EBUSY) {
+ netif_stop_queue(netdev);
+ return NETDEV_TX_BUSY;
+ }
+ dev_kfree_skb(skb);
+ netdev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
+
+ qmc_hdlc->tx_out = (qmc_hdlc->tx_out + 1) % ARRAY_SIZE(qmc_hdlc->tx_descs);
+
+ if (qmc_hdlc->tx_descs[qmc_hdlc->tx_out].skb)
+ netif_stop_queue(netdev);
+
+ return NETDEV_TX_OK;
+}
+
+static int qmc_hdlc_xlate_slot_map(struct qmc_hdlc *qmc_hdlc,
+ u32 slot_map, struct qmc_chan_ts_info *ts_info)
+{
+ DECLARE_BITMAP(ts_mask_avail, 64);
+ DECLARE_BITMAP(ts_mask, 64);
+ DECLARE_BITMAP(map, 64);
+
+ /* Tx and Rx available masks must be identical */
+ if (ts_info->rx_ts_mask_avail != ts_info->tx_ts_mask_avail) {
+ dev_err(qmc_hdlc->dev, "tx and rx available timeslots mismatch (0x%llx, 0x%llx)\n",
+ ts_info->rx_ts_mask_avail, ts_info->tx_ts_mask_avail);
+ return -EINVAL;
+ }
+
+ bitmap_from_u64(ts_mask_avail, ts_info->rx_ts_mask_avail);
+ bitmap_from_u64(map, slot_map);
+ bitmap_scatter(ts_mask, map, ts_mask_avail, 64);
+
+ if (bitmap_weight(ts_mask, 64) != bitmap_weight(map, 64)) {
+ dev_err(qmc_hdlc->dev, "Cannot translate timeslots %64pb -> (%64pb, %64pb)\n",
+ map, ts_mask_avail, ts_mask);
+ return -EINVAL;
+ }
+
+ bitmap_to_arr64(&ts_info->tx_ts_mask, ts_mask, 64);
+ ts_info->rx_ts_mask = ts_info->tx_ts_mask;
+ return 0;
+}
+
+static int qmc_hdlc_xlate_ts_info(struct qmc_hdlc *qmc_hdlc,
+ const struct qmc_chan_ts_info *ts_info, u32 *slot_map)
+{
+ DECLARE_BITMAP(ts_mask_avail, 64);
+ DECLARE_BITMAP(ts_mask, 64);
+ DECLARE_BITMAP(map, 64);
+ u32 slot_array[2];
+
+ /* Tx and Rx masks and available masks must be identical */
+ if (ts_info->rx_ts_mask_avail != ts_info->tx_ts_mask_avail) {
+ dev_err(qmc_hdlc->dev, "tx and rx available timeslots mismatch (0x%llx, 0x%llx)\n",
+ ts_info->rx_ts_mask_avail, ts_info->tx_ts_mask_avail);
+ return -EINVAL;
+ }
+ if (ts_info->rx_ts_mask != ts_info->tx_ts_mask) {
+ dev_err(qmc_hdlc->dev, "tx and rx timeslots mismatch (0x%llx, 0x%llx)\n",
+ ts_info->rx_ts_mask, ts_info->tx_ts_mask);
+ return -EINVAL;
+ }
+
+ bitmap_from_u64(ts_mask_avail, ts_info->rx_ts_mask_avail);
+ bitmap_from_u64(ts_mask, ts_info->rx_ts_mask);
+ bitmap_gather(map, ts_mask, ts_mask_avail, 64);
+
+ if (bitmap_weight(ts_mask, 64) != bitmap_weight(map, 64)) {
+ dev_err(qmc_hdlc->dev, "Cannot translate timeslots (%64pb, %64pb) -> %64pb\n",
+ ts_mask_avail, ts_mask, map);
+ return -EINVAL;
+ }
+
+ bitmap_to_arr32(slot_array, map, 64);
+ if (slot_array[1]) {
+ dev_err(qmc_hdlc->dev, "Slot map out of 32bit (%64pb, %64pb) -> %64pb\n",
+ ts_mask_avail, ts_mask, map);
+ return -EINVAL;
+ }
+
+ *slot_map = slot_array[0];
+ return 0;
+}
+
+static int qmc_hdlc_set_iface(struct qmc_hdlc *qmc_hdlc, int if_iface, const te1_settings *te1)
+{
+ struct qmc_chan_ts_info ts_info;
+ int ret;
+
+ ret = qmc_chan_get_ts_info(qmc_hdlc->qmc_chan, &ts_info);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "get QMC channel ts info failed %d\n", ret);
+ return ret;
+ }
+ ret = qmc_hdlc_xlate_slot_map(qmc_hdlc, te1->slot_map, &ts_info);
+ if (ret)
+ return ret;
+
+ ret = qmc_chan_set_ts_info(qmc_hdlc->qmc_chan, &ts_info);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "set QMC channel ts info failed %d\n", ret);
+ return ret;
+ }
+
+ qmc_hdlc->slot_map = te1->slot_map;
+
+ ret = qmc_hdlc_framer_set_iface(qmc_hdlc, if_iface, te1);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "framer set iface failed %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int qmc_hdlc_ioctl(struct net_device *netdev, struct if_settings *ifs)
+{
+ struct qmc_hdlc *qmc_hdlc = netdev_to_qmc_hdlc(netdev);
+ te1_settings te1;
+ int ret;
+
+ switch (ifs->type) {
+ case IF_GET_IFACE:
+ if (ifs->size < sizeof(te1)) {
+ /* Retrieve type only */
+ ret = qmc_hdlc_framer_get_iface(qmc_hdlc, &ifs->type, NULL);
+ if (ret)
+ return ret;
+
+ if (!ifs->size)
+ return 0; /* only type requested */
+
+ ifs->size = sizeof(te1); /* data size wanted */
+ return -ENOBUFS;
+ }
+
+ memset(&te1, 0, sizeof(te1));
+
+ /* Retrieve info from framer */
+ ret = qmc_hdlc_framer_get_iface(qmc_hdlc, &ifs->type, &te1);
+ if (ret)
+ return ret;
+
+ /* Update slot_map */
+ te1.slot_map = qmc_hdlc->slot_map;
+
+ if (copy_to_user(ifs->ifs_ifsu.te1, &te1, sizeof(te1)))
+ return -EFAULT;
+ return 0;
+
+ case IF_IFACE_E1:
+ case IF_IFACE_T1:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (netdev->flags & IFF_UP)
+ return -EBUSY;
+
+ if (copy_from_user(&te1, ifs->ifs_ifsu.te1, sizeof(te1)))
+ return -EFAULT;
+
+ return qmc_hdlc_set_iface(qmc_hdlc, ifs->type, &te1);
+
+ default:
+ return hdlc_ioctl(netdev, ifs);
+ }
+}
+
+static int qmc_hdlc_open(struct net_device *netdev)
+{
+ struct qmc_hdlc *qmc_hdlc = netdev_to_qmc_hdlc(netdev);
+ struct qmc_chan_param chan_param;
+ struct qmc_hdlc_desc *desc;
+ int ret;
+ int i;
+
+ ret = qmc_hdlc_framer_start(qmc_hdlc);
+ if (ret)
+ return ret;
+
+ ret = hdlc_open(netdev);
+ if (ret)
+ goto framer_stop;
+
+ /* Update carrier */
+ qmc_hdlc_framer_set_carrier(qmc_hdlc);
+
+ chan_param.mode = QMC_HDLC;
+ /* HDLC_MAX_MRU + 4 for the CRC
+ * HDLC_MAX_MRU + 4 + 8 for the CRC and some extraspace needed by the QMC
+ */
+ chan_param.hdlc.max_rx_buf_size = HDLC_MAX_MRU + 4 + 8;
+ chan_param.hdlc.max_rx_frame_size = HDLC_MAX_MRU + 4;
+ chan_param.hdlc.is_crc32 = qmc_hdlc->is_crc32;
+ ret = qmc_chan_set_param(qmc_hdlc->qmc_chan, &chan_param);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "failed to set param (%d)\n", ret);
+ goto hdlc_close;
+ }
+
+ /* Queue as many recv descriptors as possible */
+ for (i = 0; i < ARRAY_SIZE(qmc_hdlc->rx_descs); i++) {
+ desc = &qmc_hdlc->rx_descs[i];
+
+ desc->netdev = netdev;
+ ret = qmc_hdlc_recv_queue(qmc_hdlc, desc, chan_param.hdlc.max_rx_buf_size);
+ if (ret == -EBUSY && i != 0)
+ break; /* We use all the QMC chan capability */
+ if (ret)
+ goto free_desc;
+ }
+
+ ret = qmc_chan_start(qmc_hdlc->qmc_chan, QMC_CHAN_ALL);
+ if (ret) {
+ dev_err(qmc_hdlc->dev, "qmc chan start failed (%d)\n", ret);
+ goto free_desc;
+ }
+
+ netif_start_queue(netdev);
+
+ return 0;
+
+free_desc:
+ qmc_chan_reset(qmc_hdlc->qmc_chan, QMC_CHAN_ALL);
+ while (i--) {
+ desc = &qmc_hdlc->rx_descs[i];
+ dma_unmap_single(qmc_hdlc->dev, desc->dma_addr, desc->dma_size,
+ DMA_FROM_DEVICE);
+ kfree_skb(desc->skb);
+ desc->skb = NULL;
+ }
+hdlc_close:
+ hdlc_close(netdev);
+framer_stop:
+ qmc_hdlc_framer_stop(qmc_hdlc);
+ return ret;
+}
+
+static int qmc_hdlc_close(struct net_device *netdev)
+{
+ struct qmc_hdlc *qmc_hdlc = netdev_to_qmc_hdlc(netdev);
+ struct qmc_hdlc_desc *desc;
+ int i;
+
+ qmc_chan_stop(qmc_hdlc->qmc_chan, QMC_CHAN_ALL);
+ qmc_chan_reset(qmc_hdlc->qmc_chan, QMC_CHAN_ALL);
+
+ netif_stop_queue(netdev);
+
+ for (i = 0; i < ARRAY_SIZE(qmc_hdlc->tx_descs); i++) {
+ desc = &qmc_hdlc->tx_descs[i];
+ if (!desc->skb)
+ continue;
+ dma_unmap_single(qmc_hdlc->dev, desc->dma_addr, desc->dma_size,
+ DMA_TO_DEVICE);
+ kfree_skb(desc->skb);
+ desc->skb = NULL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(qmc_hdlc->rx_descs); i++) {
+ desc = &qmc_hdlc->rx_descs[i];
+ if (!desc->skb)
+ continue;
+ dma_unmap_single(qmc_hdlc->dev, desc->dma_addr, desc->dma_size,
+ DMA_FROM_DEVICE);
+ kfree_skb(desc->skb);
+ desc->skb = NULL;
+ }
+
+ hdlc_close(netdev);
+ qmc_hdlc_framer_stop(qmc_hdlc);
+ return 0;
+}
+
+static int qmc_hdlc_attach(struct net_device *netdev, unsigned short encoding,
+ unsigned short parity)
+{
+ struct qmc_hdlc *qmc_hdlc = netdev_to_qmc_hdlc(netdev);
+
+ if (encoding != ENCODING_NRZ)
+ return -EINVAL;
+
+ switch (parity) {
+ case PARITY_CRC16_PR1_CCITT:
+ qmc_hdlc->is_crc32 = false;
+ break;
+ case PARITY_CRC32_PR1_CCITT:
+ qmc_hdlc->is_crc32 = true;
+ break;
+ default:
+ dev_err(qmc_hdlc->dev, "unsupported parity %u\n", parity);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct net_device_ops qmc_hdlc_netdev_ops = {
+ .ndo_open = qmc_hdlc_open,
+ .ndo_stop = qmc_hdlc_close,
+ .ndo_start_xmit = hdlc_start_xmit,
+ .ndo_siocwandev = qmc_hdlc_ioctl,
+};
+
+static int qmc_hdlc_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct qmc_chan_ts_info ts_info;
+ struct qmc_hdlc *qmc_hdlc;
+ struct qmc_chan_info info;
+ hdlc_device *hdlc;
+ int ret;
+
+ qmc_hdlc = devm_kzalloc(dev, sizeof(*qmc_hdlc), GFP_KERNEL);
+ if (!qmc_hdlc)
+ return -ENOMEM;
+
+ qmc_hdlc->dev = dev;
+ spin_lock_init(&qmc_hdlc->tx_lock);
+ spin_lock_init(&qmc_hdlc->carrier_lock);
+
+ qmc_hdlc->qmc_chan = devm_qmc_chan_get_bychild(dev, dev->of_node);
+ if (IS_ERR(qmc_hdlc->qmc_chan))
+ return dev_err_probe(dev, PTR_ERR(qmc_hdlc->qmc_chan),
+ "get QMC channel failed\n");
+
+ ret = qmc_chan_get_info(qmc_hdlc->qmc_chan, &info);
+ if (ret)
+ return dev_err_probe(dev, ret, "get QMC channel info failed\n");
+
+ if (info.mode != QMC_HDLC)
+ return dev_err_probe(dev, -EINVAL, "QMC chan mode %d is not QMC_HDLC\n",
+ info.mode);
+
+ ret = qmc_chan_get_ts_info(qmc_hdlc->qmc_chan, &ts_info);
+ if (ret)
+ return dev_err_probe(dev, ret, "get QMC channel ts info failed\n");
+
+ ret = qmc_hdlc_xlate_ts_info(qmc_hdlc, &ts_info, &qmc_hdlc->slot_map);
+ if (ret)
+ return ret;
+
+ qmc_hdlc->framer = devm_framer_optional_get(dev, "fsl,framer");
+ if (IS_ERR(qmc_hdlc->framer))
+ return PTR_ERR(qmc_hdlc->framer);
+
+ ret = qmc_hdlc_framer_init(qmc_hdlc);
+ if (ret)
+ return ret;
+
+ qmc_hdlc->netdev = alloc_hdlcdev(qmc_hdlc);
+ if (!qmc_hdlc->netdev) {
+ ret = -ENOMEM;
+ goto framer_exit;
+ }
+
+ hdlc = dev_to_hdlc(qmc_hdlc->netdev);
+ hdlc->attach = qmc_hdlc_attach;
+ hdlc->xmit = qmc_hdlc_xmit;
+ SET_NETDEV_DEV(qmc_hdlc->netdev, dev);
+ qmc_hdlc->netdev->tx_queue_len = ARRAY_SIZE(qmc_hdlc->tx_descs);
+ qmc_hdlc->netdev->netdev_ops = &qmc_hdlc_netdev_ops;
+ ret = register_hdlc_device(qmc_hdlc->netdev);
+ if (ret) {
+ dev_err_probe(dev, ret, "failed to register hdlc device\n");
+ goto free_netdev;
+ }
+
+ platform_set_drvdata(pdev, qmc_hdlc);
+ return 0;
+
+free_netdev:
+ free_netdev(qmc_hdlc->netdev);
+framer_exit:
+ qmc_hdlc_framer_exit(qmc_hdlc);
+ return ret;
+}
+
+static int qmc_hdlc_remove(struct platform_device *pdev)
+{
+ struct qmc_hdlc *qmc_hdlc = platform_get_drvdata(pdev);
+
+ unregister_hdlc_device(qmc_hdlc->netdev);
+ free_netdev(qmc_hdlc->netdev);
+ qmc_hdlc_framer_exit(qmc_hdlc);
+
+ return 0;
+}
+
+static const struct of_device_id qmc_hdlc_id_table[] = {
+ { .compatible = "fsl,qmc-hdlc" },
+ {} /* sentinel */
+};
+MODULE_DEVICE_TABLE(of, qmc_hdlc_driver);
+
+static struct platform_driver qmc_hdlc_driver = {
+ .driver = {
+ .name = "fsl-qmc-hdlc",
+ .of_match_table = qmc_hdlc_id_table,
+ },
+ .probe = qmc_hdlc_probe,
+ .remove = qmc_hdlc_remove,
+};
+module_platform_driver(qmc_hdlc_driver);
+
+MODULE_AUTHOR("Herve Codina <herve.codina@bootlin.com>");
+MODULE_DESCRIPTION("QMC HDLC driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index a176653c8861..df275b4fccb6 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -263,7 +263,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair)
* call skb_cow_data, so that there's no chance that data is removed
* from the skb, so that later we can extract the original endpoint.
*/
- offset = skb->data - skb_network_header(skb);
+ offset = -skb_network_offset(skb);
skb_push(skb, offset);
num_frags = skb_cow_data(skb, 0, &trailer);
offset += sizeof(struct message_data);
diff --git a/drivers/net/wireless/admtek/adm8211.c b/drivers/net/wireless/admtek/adm8211.c
index 2fceea9f6550..e3fd48dd3909 100644
--- a/drivers/net/wireless/admtek/adm8211.c
+++ b/drivers/net/wireless/admtek/adm8211.c
@@ -1759,6 +1759,10 @@ static int adm8211_alloc_rings(struct ieee80211_hw *dev)
}
static const struct ieee80211_ops adm8211_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = adm8211_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = adm8211_start,
diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c
index 43e0db78d42b..815f8f599f5d 100644
--- a/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c
@@ -1358,6 +1358,10 @@ static void ar5523_configure_filter(struct ieee80211_hw *hw,
}
static const struct ieee80211_ops ar5523_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.start = ar5523_start,
.stop = ar5523_stop,
.tx = ar5523_tx,
@@ -1803,5 +1807,6 @@ static struct usb_driver ar5523_driver = {
module_usb_driver(ar5523_driver);
+MODULE_DESCRIPTION("Atheros AR5523 wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_FIRMWARE(AR5523_FIRMWARE_FILE);
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 0032f8aa892f..9ce6f49ab261 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -3,7 +3,7 @@
* Copyright (c) 2005-2011 Atheros Communications Inc.
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
* Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/module.h>
@@ -3613,7 +3613,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
default:
ath10k_err(ar, "unsupported core hardware revision %d\n",
hw_rev);
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
goto err_free_mac;
}
diff --git a/drivers/net/wireless/ath/ath10k/coredump.h b/drivers/net/wireless/ath/ath10k/coredump.h
index e5ef0352e319..8d274e0f374b 100644
--- a/drivers/net/wireless/ath/ath10k/coredump.h
+++ b/drivers/net/wireless/ath/ath10k/coredump.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: ISC */
/*
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef _COREDUMP_H_
@@ -13,7 +13,11 @@
/**
* enum ath10k_fw_crash_dump_type - types of data in the dump file
- * @ATH10K_FW_CRASH_DUMP_REGDUMP: Register crash dump in binary format
+ * @ATH10K_FW_CRASH_DUMP_REGISTERS: Register crash dump in binary format
+ * @ATH10K_FW_CRASH_DUMP_CE_DATA: Copy Engine crash dump data
+ * @ATH10K_FW_CRASH_DUMP_RAM_DATA: RAM crash dump data, contains multiple
+ * struct ath10k_dump_ram_data_hdr
+ * @ATH10K_FW_CRASH_DUMP_MAX: Maximum enumeration
*/
enum ath10k_fw_crash_dump_type {
ATH10K_FW_CRASH_DUMP_REGISTERS = 0,
diff --git a/drivers/net/wireless/ath/ath10k/htt.c b/drivers/net/wireless/ath/ath10k/htt.c
index 907e1e13871a..dbaf262cd7c1 100644
--- a/drivers/net/wireless/ath/ath10k/htt.c
+++ b/drivers/net/wireless/ath/ath10k/htt.c
@@ -2,6 +2,7 @@
/*
* Copyright (c) 2005-2011 Atheros Communications Inc.
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/slab.h>
@@ -381,7 +382,7 @@ static int ath10k_htt_verify_version(struct ath10k_htt *htt)
htt->target_version_major != 3) {
ath10k_err(ar, "unsupported htt major version %d. supported versions are 2 and 3\n",
htt->target_version_major);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
return 0;
diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index 4a9270e2a4c8..603f6de62b0a 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -3,7 +3,7 @@
* Copyright (c) 2005-2011 Atheros Communications Inc.
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
* Copyright (c) 2018, The Linux Foundation. All rights reserved.
- * Copyright (c) 2021, 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021, 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef _HTT_H_
@@ -906,7 +906,7 @@ struct htt_data_tx_completion_ext {
__le16 msdus_rssi[];
} __packed;
-/**
+/*
* @brief target -> host TX completion indication message definition
*
* @details
@@ -1474,15 +1474,19 @@ enum htt_q_depth_type {
#define HTT_TX_Q_STATE_ENTRY_MULTIPLIER 0
/**
- * htt_q_state_conf - part of htt_frag_desc_bank_cfg for host q state config
+ * struct htt_q_state_conf - part of htt_frag_desc_bank_cfg for host q state config
*
* Defines host q state format and behavior. See htt_q_state.
*
+ * @paddr: Queue physical address
+ * @num_peers: Number of supported peers
+ * @num_tids: Number of supported TIDs
* @record_size: Defines the size of each host q entry in bytes. In practice
* however firmware (at least 10.4.3-00191) ignores this host
* configuration value and uses hardcoded value of 1.
* @record_multiplier: This is valid only when q depth type is MSDUs. It
* defines the exponent for the power of 2 multiplication.
+ * @pad: struct padding for 32-bit alignment
*/
struct htt_q_state_conf {
__le32 paddr;
@@ -1518,7 +1522,7 @@ struct htt_frag_desc_bank_cfg64 {
#define HTT_TX_Q_STATE_ENTRY_EXP_LSB 6
/**
- * htt_q_state - shared between host and firmware via DMA
+ * struct htt_q_state - shared between host and firmware via DMA
*
* This structure is used for the host to expose it's software queue state to
* firmware so that its rate control can schedule fetch requests for optimized
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 090bcf148d0c..e322b528baaf 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -3,7 +3,7 @@
* Copyright (c) 2005-2011 Atheros Communications Inc.
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
* Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "mac.h"
@@ -2034,8 +2034,8 @@ static void ath10k_mac_vif_ap_csa_count_down(struct ath10k_vif *arvif)
if (!arvif->is_up)
return;
- if (!ieee80211_beacon_cntdwn_is_complete(vif)) {
- ieee80211_beacon_update_cntdwn(vif);
+ if (!ieee80211_beacon_cntdwn_is_complete(vif, 0)) {
+ ieee80211_beacon_update_cntdwn(vif, 0);
ret = ath10k_mac_setup_bcn_tmpl(arvif);
if (ret)
@@ -2047,7 +2047,7 @@ static void ath10k_mac_vif_ap_csa_count_down(struct ath10k_vif *arvif)
ath10k_warn(ar, "failed to update prb tmpl during csa: %d\n",
ret);
} else {
- ieee80211_csa_finish(vif);
+ ieee80211_csa_finish(vif, 0);
}
}
@@ -4056,7 +4056,7 @@ static int ath10k_mac_tx(struct ath10k *ar,
!(skb_cb->flags & ATH10K_SKB_F_RAW_TX)) {
WARN_ON_ONCE(1);
ieee80211_free_txskb(hw, skb);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
}
@@ -7065,7 +7065,7 @@ static int ath10k_mac_set_tid_config(struct ath10k *ar, struct ieee80211_sta *st
if (sta) {
if (!sta->wme)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
arsta = (struct ath10k_sta *)sta->drv_priv;
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 3de2de6d44bc..5c34b156b4ff 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2005-2011 Atheros Communications Inc.
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
- * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/pci.h>
@@ -889,7 +889,7 @@ static u32 ath10k_pci_targ_cpu_to_ce_addr(struct ath10k *ar, u32 addr)
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
if (WARN_ON_ONCE(!ar_pci->targ_cpu_to_ce_addr))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
return ar_pci->targ_cpu_to_ce_addr(ar, addr);
}
@@ -2668,7 +2668,7 @@ static int ath10k_pci_safe_chip_reset(struct ath10k *ar)
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
if (!ar_pci->pci_soft_reset)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
return ar_pci->pci_soft_reset(ar);
}
@@ -2808,7 +2808,7 @@ static int ath10k_pci_chip_reset(struct ath10k *ar)
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
if (WARN_ON(!ar_pci->pci_hard_reset))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
return ar_pci->pci_hard_reset(ar);
}
@@ -3594,7 +3594,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
break;
default:
WARN_ON(1);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
ar = ath10k_core_create(sizeof(*ar_pci), &pdev->dev, ATH10K_BUS_PCI,
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index 6b6aa3c36744..aed97fd121ba 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -3,6 +3,7 @@
* Copyright (c) 2005-2011 Atheros Communications Inc.
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
* Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "core.h"
#include "debug.h"
@@ -851,6 +852,10 @@ ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev(struct ath10k *ar, struct sk_buff *skb,
}
ev = tb[WMI_TLV_TAG_STRUCT_MGMT_TX_COMPL_EVENT];
+ if (!ev) {
+ kfree(tb);
+ return -EPROTO;
+ }
arg->desc_id = ev->desc_id;
arg->status = ev->status;
@@ -1347,7 +1352,7 @@ static int ath10k_wmi_tlv_op_pull_svc_rdy_ev(struct ath10k *ar,
__le32_to_cpu(ev->abi.abi_ver_ns1) != WMI_TLV_ABI_VER_NS1 ||
__le32_to_cpu(ev->abi.abi_ver_ns2) != WMI_TLV_ABI_VER_NS2 ||
__le32_to_cpu(ev->abi.abi_ver_ns3) != WMI_TLV_ABI_VER_NS3) {
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
arg->min_tx_power = ev->hw_min_tx_power;
@@ -2119,9 +2124,9 @@ static int ath10k_wmi_tlv_op_get_vdev_subtype(struct ath10k *ar,
case WMI_VDEV_SUBTYPE_MESH_11S:
return WMI_TLV_VDEV_SUBTYPE_MESH_11S;
case WMI_VDEV_SUBTYPE_MESH_NON_11S:
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
static struct sk_buff *
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
index 83a8f07a687f..8a2f87d0a3a3 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
@@ -3,7 +3,7 @@
* Copyright (c) 2005-2011 Atheros Communications Inc.
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
* Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef _WMI_TLV_H
#define _WMI_TLV_H
@@ -2343,7 +2343,7 @@ struct wmi_tlv_adaptive_qcs {
} __packed;
/**
- * wmi_tlv_tx_pause_id - firmware tx queue pause reason types
+ * enum wmi_tlv_tx_pause_id - firmware tx queue pause reason types
*
* @WMI_TLV_TX_PAUSE_ID_MCC: used for by multi-channel firmware scheduler.
* Only vdev_map is valid.
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 88befe92f95d..2e9661f4bea8 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -3,7 +3,7 @@
* Copyright (c) 2005-2011 Atheros Communications Inc.
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
* Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/skbuff.h>
@@ -3884,8 +3884,8 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb)
* actual channel switch is done
*/
if (arvif->vif->bss_conf.csa_active &&
- ieee80211_beacon_cntdwn_is_complete(arvif->vif)) {
- ieee80211_csa_finish(arvif->vif);
+ ieee80211_beacon_cntdwn_is_complete(arvif->vif, 0)) {
+ ieee80211_csa_finish(arvif->vif, 0);
continue;
}
@@ -6927,14 +6927,14 @@ void ath10k_wmi_put_start_scan_common(struct wmi_start_scan_common *cmn,
}
static void
-ath10k_wmi_put_start_scan_tlvs(struct wmi_start_scan_tlvs *tlvs,
+ath10k_wmi_put_start_scan_tlvs(u8 *tlvs,
const struct wmi_start_scan_arg *arg)
{
struct wmi_ie_data *ie;
struct wmi_chan_list *channels;
struct wmi_ssid_list *ssids;
struct wmi_bssid_list *bssids;
- void *ptr = tlvs->tlvs;
+ void *ptr = tlvs;
int i;
if (arg->n_channels) {
@@ -7012,7 +7012,7 @@ ath10k_wmi_op_gen_start_scan(struct ath10k *ar,
cmd = (struct wmi_start_scan_cmd *)skb->data;
ath10k_wmi_put_start_scan_common(&cmd->common, arg);
- ath10k_wmi_put_start_scan_tlvs(&cmd->tlvs, arg);
+ ath10k_wmi_put_start_scan_tlvs(cmd->tlvs, arg);
cmd->burst_duration_ms = __cpu_to_le32(0);
@@ -7041,7 +7041,7 @@ ath10k_wmi_10x_op_gen_start_scan(struct ath10k *ar,
cmd = (struct wmi_10x_start_scan_cmd *)skb->data;
ath10k_wmi_put_start_scan_common(&cmd->common, arg);
- ath10k_wmi_put_start_scan_tlvs(&cmd->tlvs, arg);
+ ath10k_wmi_put_start_scan_tlvs(cmd->tlvs, arg);
ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi 10x start scan\n");
return skb;
@@ -8733,9 +8733,9 @@ int ath10k_wmi_op_get_vdev_subtype(struct ath10k *ar,
return WMI_VDEV_SUBTYPE_LEGACY_PROXY_STA;
case WMI_VDEV_SUBTYPE_MESH_11S:
case WMI_VDEV_SUBTYPE_MESH_NON_11S:
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
static int ath10k_wmi_10_2_4_op_get_vdev_subtype(struct ath10k *ar,
@@ -8755,9 +8755,9 @@ static int ath10k_wmi_10_2_4_op_get_vdev_subtype(struct ath10k *ar,
case WMI_VDEV_SUBTYPE_MESH_11S:
return WMI_VDEV_SUBTYPE_10_2_4_MESH_11S;
case WMI_VDEV_SUBTYPE_MESH_NON_11S:
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
static int ath10k_wmi_10_4_op_get_vdev_subtype(struct ath10k *ar,
@@ -8779,7 +8779,7 @@ static int ath10k_wmi_10_4_op_get_vdev_subtype(struct ath10k *ar,
case WMI_VDEV_SUBTYPE_MESH_NON_11S:
return WMI_VDEV_SUBTYPE_10_4_MESH_NON_11S;
}
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
static struct sk_buff *
@@ -8918,8 +8918,6 @@ ath10k_wmi_10_4_gen_tdls_peer_update(struct ath10k *ar,
if (!skb)
return ERR_PTR(-ENOMEM);
- memset(skb->data, 0, sizeof(*cmd));
-
cmd = (struct wmi_10_4_tdls_peer_update_cmd *)skb->data;
cmd->vdev_id = __cpu_to_le32(arg->vdev_id);
ether_addr_copy(cmd->peer_macaddr.addr, arg->addr);
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index 9146df98fcee..2379501225a4 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -3,7 +3,7 @@
* Copyright (c) 2005-2011 Atheros Communications Inc.
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
* Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef _WMI_H_
@@ -3008,8 +3008,11 @@ enum wmi_coex_version {
* @WMI_10_4_TDLS_UAPSD_SLEEP_STA: TDLS sleep sta support enable/disable
* @WMI_10_4_TDLS_CONN_TRACKER_IN_HOST_MODE: TDLS connection tracker in host
* enable/disable
- * @WMI_10_4_TDLS_EXPLICIT_MODE_ONLY:Explicit TDLS mode enable/disable
+ * @WMI_10_4_TDLS_EXPLICIT_MODE_ONLY: Explicit TDLS mode enable/disable
* @WMI_10_4_TX_DATA_ACK_RSSI: Enable DATA ACK RSSI if firmware is capable
+ * @WMI_10_4_EXT_PEER_TID_CONFIGS_SUPPORT: Firmware supports Extended Peer
+ * TID configuration for QoS related settings
+ * @WMI_10_4_REPORT_AIRTIME: Firmware supports transmit airtime reporting
*/
enum wmi_10_4_feature_mask {
WMI_10_4_LTEU_SUPPORT = BIT(0),
@@ -3069,7 +3072,10 @@ struct host_memory_chunk {
struct wmi_host_mem_chunks {
__le32 count;
/* some fw revisions require at least 1 chunk regardless of count */
- struct host_memory_chunk items[1];
+ union {
+ struct host_memory_chunk item;
+ DECLARE_FLEX_ARRAY(struct host_memory_chunk, items);
+ };
} __packed;
struct wmi_init_cmd {
@@ -3215,23 +3221,16 @@ struct wmi_start_scan_common {
__le32 scan_ctrl_flags;
} __packed;
-struct wmi_start_scan_tlvs {
- /* TLV parameters. These includes channel list, ssid list, bssid list,
- * extra ies.
- */
- u8 tlvs[0];
-} __packed;
-
struct wmi_start_scan_cmd {
struct wmi_start_scan_common common;
__le32 burst_duration_ms;
- struct wmi_start_scan_tlvs tlvs;
+ u8 tlvs[];
} __packed;
/* This is the definition from 10.X firmware branch */
struct wmi_10x_start_scan_cmd {
struct wmi_start_scan_common common;
- struct wmi_start_scan_tlvs tlvs;
+ u8 tlvs[];
} __packed;
struct wmi_ssid_arg {
@@ -4260,13 +4259,6 @@ struct wmi_peer_sta_ps_state_chg_event {
__le32 peer_ps_state;
} __packed;
-struct wmi_pdev_chanlist_update_event {
- /* number of channels */
- __le32 num_chan;
- /* array of channels */
- struct wmi_channel channel_list[1];
-} __packed;
-
#define WMI_MAX_DEBUG_MESG (sizeof(u32) * 32)
struct wmi_debug_mesg_event {
@@ -5793,30 +5785,6 @@ struct wmi_bcn_prb_info {
/* app IE */
} __packed;
-struct wmi_bcn_tmpl_cmd {
- /* unique id identifying the VDEV, generated by the caller */
- __le32 vdev_id;
- /* TIM IE offset from the beginning of the template. */
- __le32 tim_ie_offset;
- /* beacon probe capabilities and IEs */
- struct wmi_bcn_prb_info bcn_prb_info;
- /* beacon buffer length */
- __le32 buf_len;
- /* variable length data */
- u8 data[1];
-} __packed;
-
-struct wmi_prb_tmpl_cmd {
- /* unique id identifying the VDEV, generated by the caller */
- __le32 vdev_id;
- /* beacon probe capabilities and IEs */
- struct wmi_bcn_prb_info bcn_prb_info;
- /* beacon buffer length */
- __le32 buf_len;
- /* Variable length data */
- u8 data[1];
-} __packed;
-
enum wmi_sta_ps_mode {
/* enable power save for the given STA VDEV */
WMI_STA_PS_MODE_DISABLED = 0,
@@ -7197,7 +7165,13 @@ struct wmi_tdls_peer_capabilities {
__le32 is_peer_responder;
__le32 pref_offchan_num;
__le32 pref_offchan_bw;
- struct wmi_channel peer_chan_list[1];
+ union {
+ /* to match legacy implementation allocate room for
+ * at least one record even if peer_chan_len is 0
+ */
+ struct wmi_channel peer_chan_min_allocation;
+ DECLARE_FLEX_ARRAY(struct wmi_channel, peer_chan_list);
+ };
} __packed;
struct wmi_10_4_tdls_peer_update_cmd {
diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 0c6ecbb9a066..c78bce19bd75 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/module.h>
@@ -122,6 +122,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tcl_ring_retry = true,
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
+ .support_dual_stations = false,
},
{
.hw_rev = ATH11K_HW_IPQ6018_HW10,
@@ -205,6 +206,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
.support_fw_mac_sequence = false,
+ .support_dual_stations = false,
},
{
.name = "qca6390 hw2.0",
@@ -255,7 +257,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.coldboot_cal_ftm = false,
.cbcal_restart_fw = false,
.fw_mem_mode = 0,
- .num_vdevs = 16 + 1,
+ .num_vdevs = 2 + 1,
.num_peers = 512,
.supports_suspend = true,
.hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074),
@@ -290,6 +292,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
.support_fw_mac_sequence = true,
+ .support_dual_stations = true,
},
{
.name = "qcn9074 hw1.0",
@@ -372,6 +375,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
.support_fw_mac_sequence = false,
+ .support_dual_stations = false,
},
{
.name = "wcn6855 hw2.0",
@@ -422,7 +426,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.coldboot_cal_ftm = false,
.cbcal_restart_fw = false,
.fw_mem_mode = 0,
- .num_vdevs = 16 + 1,
+ .num_vdevs = 2 + 1,
.num_peers = 512,
.supports_suspend = true,
.hal_desc_sz = sizeof(struct hal_rx_desc_wcn6855),
@@ -457,6 +461,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
.support_fw_mac_sequence = true,
+ .support_dual_stations = true,
},
{
.name = "wcn6855 hw2.1",
@@ -505,7 +510,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.coldboot_cal_ftm = false,
.cbcal_restart_fw = false,
.fw_mem_mode = 0,
- .num_vdevs = 16 + 1,
+ .num_vdevs = 2 + 1,
.num_peers = 512,
.supports_suspend = true,
.hal_desc_sz = sizeof(struct hal_rx_desc_wcn6855),
@@ -540,6 +545,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
.support_fw_mac_sequence = true,
+ .support_dual_stations = true,
},
{
.name = "wcn6750 hw1.0",
@@ -621,6 +627,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tx_ring_size = DP_TCL_DATA_RING_SIZE_WCN6750,
.smp2p_wow_exit = true,
.support_fw_mac_sequence = true,
+ .support_dual_stations = false,
},
{
.hw_rev = ATH11K_HW_IPQ5018_HW10,
@@ -702,6 +709,93 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
.support_fw_mac_sequence = false,
+ .support_dual_stations = false,
+ },
+ {
+ .name = "qca2066 hw2.1",
+ .hw_rev = ATH11K_HW_QCA2066_HW21,
+ .fw = {
+ .dir = "QCA2066/hw2.1",
+ .board_size = 256 * 1024,
+ .cal_offset = 128 * 1024,
+ },
+ .max_radios = 3,
+ .bdf_addr = 0x4B0C0000,
+ .hw_ops = &wcn6855_ops,
+ .ring_mask = &ath11k_hw_ring_mask_qca6390,
+ .internal_sleep_clock = true,
+ .regs = &wcn6855_regs,
+ .qmi_service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCA6390,
+ .host_ce_config = ath11k_host_ce_config_qca6390,
+ .ce_count = 9,
+ .target_ce_config = ath11k_target_ce_config_wlan_qca6390,
+ .target_ce_count = 9,
+ .svc_to_ce_map = ath11k_target_service_to_ce_map_wlan_qca6390,
+ .svc_to_ce_map_len = 14,
+ .ce_ie_addr = &ath11k_ce_ie_addr_ipq8074,
+ .single_pdev_only = true,
+ .rxdma1_enable = false,
+ .num_rxmda_per_pdev = 2,
+ .rx_mac_buf_ring = true,
+ .vdev_start_delay = true,
+ .htt_peer_map_v2 = false,
+
+ .spectral = {
+ .fft_sz = 0,
+ .fft_pad_sz = 0,
+ .summary_pad_sz = 0,
+ .fft_hdr_len = 0,
+ .max_fft_bins = 0,
+ .fragment_160mhz = false,
+ },
+
+ .interface_modes = BIT(NL80211_IFTYPE_STATION) |
+ BIT(NL80211_IFTYPE_AP),
+ .supports_monitor = false,
+ .full_monitor_mode = false,
+ .supports_shadow_regs = true,
+ .idle_ps = true,
+ .supports_sta_ps = true,
+ .coldboot_cal_mm = false,
+ .coldboot_cal_ftm = false,
+ .cbcal_restart_fw = false,
+ .fw_mem_mode = 0,
+ .num_vdevs = 2 + 1,
+ .num_peers = 512,
+ .supports_suspend = true,
+ .hal_desc_sz = sizeof(struct hal_rx_desc_wcn6855),
+ .supports_regdb = true,
+ .fix_l1ss = false,
+ .credit_flow = true,
+ .max_tx_ring = DP_TCL_NUM_RING_MAX_QCA6390,
+ .hal_params = &ath11k_hw_hal_params_qca6390,
+ .supports_dynamic_smps_6ghz = false,
+ .alloc_cacheable_memory = false,
+ .supports_rssi_stats = true,
+ .fw_wmi_diag_event = true,
+ .current_cc_support = true,
+ .dbr_debug_support = false,
+ .global_reset = true,
+ .bios_sar_capa = &ath11k_hw_sar_capa_wcn6855,
+ .m3_fw_support = true,
+ .fixed_bdf_addr = false,
+ .fixed_mem_region = false,
+ .static_window_map = false,
+ .hybrid_bus_type = false,
+ .fixed_fw_mem = false,
+ .support_off_channel_tx = true,
+ .supports_multi_bssid = true,
+
+ .sram_dump = {
+ .start = 0x01400000,
+ .end = 0x0177ffff,
+ },
+
+ .tcl_ring_retry = true,
+ .tx_ring_size = DP_TCL_DATA_RING_SIZE,
+ .smp2p_wow_exit = false,
+ .support_fw_mac_sequence = true,
+ .support_dual_stations = true,
},
};
@@ -1775,10 +1869,9 @@ static int ath11k_core_reconfigure_on_crash(struct ath11k_base *ab)
mutex_lock(&ab->core_lock);
ath11k_thermal_unregister(ab);
- ath11k_hif_irq_disable(ab);
ath11k_dp_pdev_free(ab);
ath11k_spectral_deinit(ab);
- ath11k_hif_stop(ab);
+ ath11k_ce_cleanup_pipes(ab);
ath11k_wmi_detach(ab);
ath11k_dp_pdev_reo_cleanup(ab);
mutex_unlock(&ab->core_lock);
@@ -2033,6 +2126,9 @@ static void ath11k_core_reset(struct work_struct *work)
time_left = wait_for_completion_timeout(&ab->recovery_start,
ATH11K_RECOVER_START_TIMEOUT_HZ);
+ ath11k_hif_irq_disable(ab);
+ ath11k_hif_ce_irq_disable(ab);
+
ath11k_hif_power_down(ab);
ath11k_hif_power_up(ab);
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 7e3b6779f4e9..b3fb74a226fb 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -147,6 +147,7 @@ enum ath11k_hw_rev {
ATH11K_HW_WCN6855_HW21,
ATH11K_HW_WCN6750_HW10,
ATH11K_HW_IPQ5018_HW10,
+ ATH11K_HW_QCA2066_HW21,
};
enum ath11k_firmware_mode {
@@ -314,6 +315,43 @@ struct ath11k_rekey_data {
bool enable_offload;
};
+/**
+ * struct ath11k_chan_power_info - TPE containing power info per channel chunk
+ * @chan_cfreq: channel center freq (MHz)
+ * e.g.
+ * channel 37/20 MHz, it is 6135
+ * channel 37/40 MHz, it is 6125
+ * channel 37/80 MHz, it is 6145
+ * channel 37/160 MHz, it is 6185
+ * @tx_power: transmit power (dBm)
+ */
+struct ath11k_chan_power_info {
+ u16 chan_cfreq;
+ s8 tx_power;
+};
+
+/**
+ * struct ath11k_reg_tpc_power_info - regulatory TPC power info
+ * @is_psd_power: is PSD power or not
+ * @eirp_power: Maximum EIRP power (dBm), valid only if power is PSD
+ * @ap_power_type: type of power (SP/LPI/VLP)
+ * @num_pwr_levels: number of power levels
+ * @reg_max: Array of maximum TX power (dBm) per PSD value
+ * @ap_constraint_power: AP constraint power (dBm)
+ * @tpe: TPE values processed from TPE IE
+ * @chan_power_info: power info to send to firmware
+ */
+struct ath11k_reg_tpc_power_info {
+ bool is_psd_power;
+ u8 eirp_power;
+ enum wmi_reg_6ghz_ap_type ap_power_type;
+ u8 num_pwr_levels;
+ u8 reg_max[IEEE80211_MAX_NUM_PWR_LEVEL];
+ u8 ap_constraint_power;
+ s8 tpe[IEEE80211_MAX_NUM_PWR_LEVEL];
+ struct ath11k_chan_power_info chan_power_info[IEEE80211_MAX_NUM_PWR_LEVEL];
+};
+
struct ath11k_vif {
u32 vdev_id;
enum wmi_vdev_type vdev_type;
@@ -369,9 +407,7 @@ struct ath11k_vif {
struct ath11k_arp_ns_offload arp_ns_offload;
struct ath11k_rekey_data rekey_data;
-#ifdef CONFIG_ATH11K_DEBUGFS
- struct dentry *debugfs_twt;
-#endif /* CONFIG_ATH11K_DEBUGFS */
+ struct ath11k_reg_tpc_power_info reg_tpc_info;
};
struct ath11k_vif_iter {
@@ -739,6 +775,7 @@ struct ath11k {
/* protected by conf_mutex */
bool ps_state_enable;
bool ps_timekeeper_enable;
+ s8 max_allowed_tx_power;
};
struct ath11k_band_cap {
@@ -922,6 +959,7 @@ struct ath11k_base {
* This may or may not be used during the runtime
*/
struct ieee80211_regdomain *new_regd[MAX_RADIOS];
+ struct cur_regulatory_info *reg_info_store;
/* Current DFS Regulatory */
enum ath11k_dfs_region dfs_region;
diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c
index a847bc0d50c0..a48e737ef35d 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs.c
+++ b/drivers/net/wireless/ath/ath11k/debugfs.c
@@ -1894,35 +1894,30 @@ static const struct file_operations ath11k_fops_twt_resume_dialog = {
.open = simple_open
};
-void ath11k_debugfs_add_interface(struct ath11k_vif *arvif)
+void ath11k_debugfs_op_vif_add(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif)
{
+ struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
struct ath11k_base *ab = arvif->ar->ab;
+ struct dentry *debugfs_twt;
if (arvif->vif->type != NL80211_IFTYPE_AP &&
!(arvif->vif->type == NL80211_IFTYPE_STATION &&
test_bit(WMI_TLV_SERVICE_STA_TWT, ab->wmi_ab.svc_map)))
return;
- arvif->debugfs_twt = debugfs_create_dir("twt",
- arvif->vif->debugfs_dir);
- debugfs_create_file("add_dialog", 0200, arvif->debugfs_twt,
+ debugfs_twt = debugfs_create_dir("twt",
+ arvif->vif->debugfs_dir);
+ debugfs_create_file("add_dialog", 0200, debugfs_twt,
arvif, &ath11k_fops_twt_add_dialog);
- debugfs_create_file("del_dialog", 0200, arvif->debugfs_twt,
+ debugfs_create_file("del_dialog", 0200, debugfs_twt,
arvif, &ath11k_fops_twt_del_dialog);
- debugfs_create_file("pause_dialog", 0200, arvif->debugfs_twt,
+ debugfs_create_file("pause_dialog", 0200, debugfs_twt,
arvif, &ath11k_fops_twt_pause_dialog);
- debugfs_create_file("resume_dialog", 0200, arvif->debugfs_twt,
+ debugfs_create_file("resume_dialog", 0200, debugfs_twt,
arvif, &ath11k_fops_twt_resume_dialog);
}
-void ath11k_debugfs_remove_interface(struct ath11k_vif *arvif)
-{
- if (!arvif->debugfs_twt)
- return;
-
- debugfs_remove_recursive(arvif->debugfs_twt);
- arvif->debugfs_twt = NULL;
-}
diff --git a/drivers/net/wireless/ath/ath11k/debugfs.h b/drivers/net/wireless/ath/ath11k/debugfs.h
index 44d15845f39a..a39e458637b0 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs.h
+++ b/drivers/net/wireless/ath/ath11k/debugfs.h
@@ -307,8 +307,8 @@ static inline int ath11k_debugfs_rx_filter(struct ath11k *ar)
return ar->debug.rx_filter;
}
-void ath11k_debugfs_add_interface(struct ath11k_vif *arvif);
-void ath11k_debugfs_remove_interface(struct ath11k_vif *arvif);
+void ath11k_debugfs_op_vif_add(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif);
void ath11k_debugfs_add_dbring_entry(struct ath11k *ar,
enum wmi_direct_buffer_module id,
enum ath11k_dbg_dbr_event event,
@@ -387,14 +387,6 @@ static inline int ath11k_debugfs_get_fw_stats(struct ath11k *ar,
return 0;
}
-static inline void ath11k_debugfs_add_interface(struct ath11k_vif *arvif)
-{
-}
-
-static inline void ath11k_debugfs_remove_interface(struct ath11k_vif *arvif)
-{
-}
-
static inline void
ath11k_debugfs_add_dbring_entry(struct ath11k *ar,
enum wmi_direct_buffer_module id,
diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c
index 8975dc57ad77..1a62407e5a9f 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -104,11 +104,14 @@ void ath11k_dp_srng_cleanup(struct ath11k_base *ab, struct dp_srng *ring)
if (!ring->vaddr_unaligned)
return;
- if (ring->cached)
+ if (ring->cached) {
+ dma_unmap_single(ab->dev, ring->paddr_unaligned, ring->size,
+ DMA_FROM_DEVICE);
kfree(ring->vaddr_unaligned);
- else
+ } else {
dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
ring->paddr_unaligned);
+ }
ring->vaddr_unaligned = NULL;
}
@@ -249,7 +252,18 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
if (cached) {
ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL);
- ring->paddr_unaligned = virt_to_phys(ring->vaddr_unaligned);
+ if (!ring->vaddr_unaligned)
+ return -ENOMEM;
+
+ ring->paddr_unaligned = dma_map_single(ab->dev,
+ ring->vaddr_unaligned,
+ ring->size,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(ab->dev, ring->paddr_unaligned)) {
+ kfree(ring->vaddr_unaligned);
+ ring->vaddr_unaligned = NULL;
+ return -ENOMEM;
+ }
}
}
diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
index c1072e66e3e8..272b1c35f98d 100644
--- a/drivers/net/wireless/ath/ath11k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "core.h"
@@ -103,7 +103,7 @@ int ath11k_dp_tx(struct ath11k *ar, struct ath11k_vif *arvif,
if (unlikely(!(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) &&
!ieee80211_is_data(hdr->frame_control)))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
pool_id = skb_get_queue_mapping(skb) & (ATH11K_HW_MAX_QUEUES - 1);
@@ -1018,7 +1018,7 @@ int ath11k_dp_tx_htt_h2t_ver_req_msg(struct ath11k_base *ab)
if (dp->htt_tgt_ver_major != HTT_TARGET_VERSION_MAJOR) {
ath11k_err(ab, "unsupported htt major version %d supported version is %d\n",
dp->htt_tgt_ver_major, HTT_TARGET_VERSION_MAJOR);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
return 0;
diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
index c060c4b5c0cc..f3d04568c221 100644
--- a/drivers/net/wireless/ath/ath11k/hal.c
+++ b/drivers/net/wireless/ath/ath11k/hal.c
@@ -626,15 +626,30 @@ u32 *ath11k_hal_srng_dst_peek(struct ath11k_base *ab, struct hal_srng *srng)
return NULL;
}
+static u32 *ath11k_hal_srng_dst_peek_with_dma(struct ath11k_base *ab,
+ struct hal_srng *srng, dma_addr_t *paddr)
+{
+ lockdep_assert_held(&srng->lock);
+
+ if (srng->u.dst_ring.tp != srng->u.dst_ring.cached_hp) {
+ *paddr = srng->ring_base_paddr +
+ sizeof(*srng->ring_base_vaddr) * srng->u.dst_ring.tp;
+ return srng->ring_base_vaddr + srng->u.dst_ring.tp;
+ }
+
+ return NULL;
+}
+
static void ath11k_hal_srng_prefetch_desc(struct ath11k_base *ab,
struct hal_srng *srng)
{
+ dma_addr_t desc_paddr;
u32 *desc;
/* prefetch only if desc is available */
- desc = ath11k_hal_srng_dst_peek(ab, srng);
+ desc = ath11k_hal_srng_dst_peek_with_dma(ab, srng, &desc_paddr);
if (likely(desc)) {
- dma_sync_single_for_cpu(ab->dev, virt_to_phys(desc),
+ dma_sync_single_for_cpu(ab->dev, desc_paddr,
(srng->entry_size * sizeof(u32)),
DMA_FROM_DEVICE);
prefetch(desc);
diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h
index 80447f488954..65e8f244ebb9 100644
--- a/drivers/net/wireless/ath/ath11k/hal.h
+++ b/drivers/net/wireless/ath/ath11k/hal.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH11K_HAL_H
@@ -674,6 +674,7 @@ struct hal_srng_config {
* @HAL_RX_BUF_RBM_SW1_BM: For Tx completion -- returned to host
* @HAL_RX_BUF_RBM_SW2_BM: For Tx completion -- returned to host
* @HAL_RX_BUF_RBM_SW3_BM: For Rx release -- returned to host
+ * @HAL_RX_BUF_RBM_SW4_BM: For Tx completion -- returned to host
*/
enum hal_rx_buf_return_buf_manager {
diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.c b/drivers/net/wireless/ath/ath11k/hal_rx.c
index e758ee8e17c9..8f7dd43dc1bd 100644
--- a/drivers/net/wireless/ath/ath11k/hal_rx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_rx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "debug.h"
@@ -246,7 +246,7 @@ int ath11k_hal_reo_cmd_send(struct ath11k_base *ab, struct hal_srng *srng,
case HAL_REO_CMD_UNBLOCK_CACHE:
case HAL_REO_CMD_FLUSH_TIMEOUT_LIST:
ath11k_warn(ab, "Unsupported reo command %d\n", type);
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
break;
default:
ath11k_warn(ab, "Unknown reo command %d\n", type);
diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
index 77d8f9237680..caa6dc12a790 100644
--- a/drivers/net/wireless/ath/ath11k/hw.c
+++ b/drivers/net/wireless/ath/ath11k/hw.c
@@ -58,7 +58,7 @@ static void ath11k_hw_wcn6855_tx_mesh_enable(struct ath11k_base *ab,
static void ath11k_init_wmi_config_qca6390(struct ath11k_base *ab,
struct target_resource_config *config)
{
- config->num_vdevs = 4;
+ config->num_vdevs = ab->hw_params.num_vdevs;
config->num_peers = 16;
config->num_tids = 32;
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index 1b070747a5db..14ef4eb48f80 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -226,6 +226,7 @@ struct ath11k_hw_params {
u32 tx_ring_size;
bool smp2p_wow_exit;
bool support_fw_mac_sequence;
+ bool support_dual_stations;
};
struct ath11k_hw_ops {
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index db241589424d..a6a37d67a50a 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <net/mac80211.h>
@@ -255,9 +255,6 @@ static const u32 ath11k_smps_map[] = {
[WLAN_HT_CAP_SM_PS_DISABLED] = WMI_PEER_SMPS_PS_NONE,
};
-static int ath11k_start_vdev_delay(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
-
enum nl80211_he_ru_alloc ath11k_mac_phy_he_ru_to_nl80211_he_ru_alloc(u16 ru_phy)
{
enum nl80211_he_ru_alloc ret;
@@ -1580,7 +1577,7 @@ void ath11k_mac_bcn_tx_event(struct ath11k_vif *arvif)
return;
if (vif->bss_conf.color_change_active &&
- ieee80211_beacon_cntdwn_is_complete(vif)) {
+ ieee80211_beacon_cntdwn_is_complete(vif, 0)) {
arvif->bcca_zero_sent = true;
ieee80211_color_change_finish(vif);
return;
@@ -1589,7 +1586,7 @@ void ath11k_mac_bcn_tx_event(struct ath11k_vif *arvif)
arvif->bcca_zero_sent = false;
if (vif->bss_conf.color_change_active)
- ieee80211_beacon_update_cntdwn(vif);
+ ieee80211_beacon_update_cntdwn(vif, 0);
ath11k_mac_setup_bcn_tmpl(arvif);
}
@@ -2297,6 +2294,8 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar,
mcs_160_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160);
mcs_80_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80);
+ /* Initialize rx_mcs_160 to 9 which is an invalid value */
+ rx_mcs_160 = 9;
if (support_160) {
for (i = 7; i >= 0; i--) {
u8 mcs_160 = (mcs_160_map >> (2 * i)) & 3;
@@ -2308,6 +2307,8 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar,
}
}
+ /* Initialize rx_mcs_80 to 9 which is an invalid value */
+ rx_mcs_80 = 9;
for (i = 7; i >= 0; i--) {
u8 mcs_80 = (mcs_80_map >> (2 * i)) & 3;
@@ -3026,7 +3027,14 @@ static void ath11k_bss_assoc(struct ieee80211_hw *hw,
rcu_read_unlock();
+ if (!ath11k_mac_vif_recalc_sta_he_txbf(ar, vif, &he_cap)) {
+ ath11k_warn(ar->ab, "failed to recalc he txbf for vdev %i on bss %pM\n",
+ arvif->vdev_id, bss_conf->bssid);
+ return;
+ }
+
peer_arg.is_assoc = true;
+
ret = ath11k_wmi_send_peer_assoc_cmd(ar, &peer_arg);
if (ret) {
ath11k_warn(ar->ab, "failed to run peer assoc for %pM vdev %i: %d\n",
@@ -3049,12 +3057,6 @@ static void ath11k_bss_assoc(struct ieee80211_hw *hw,
return;
}
- if (!ath11k_mac_vif_recalc_sta_he_txbf(ar, vif, &he_cap)) {
- ath11k_warn(ar->ab, "failed to recalc he txbf for vdev %i on bss %pM\n",
- arvif->vdev_id, bss_conf->bssid);
- return;
- }
-
WARN_ON(arvif->is_up);
arvif->aid = vif->cfg.aid;
@@ -3397,6 +3399,18 @@ static int ath11k_mac_config_obss_pd(struct ath11k *ar,
return 0;
}
+static bool ath11k_mac_supports_station_tpc(struct ath11k *ar,
+ struct ath11k_vif *arvif,
+ const struct cfg80211_chan_def *chandef)
+{
+ return ath11k_wmi_supports_6ghz_cc_ext(ar) &&
+ test_bit(WMI_TLV_SERVICE_EXT_TPC_REG_SUPPORT, ar->ab->wmi_ab.svc_map) &&
+ arvif->vdev_type == WMI_VDEV_TYPE_STA &&
+ arvif->vdev_subtype == WMI_VDEV_SUBTYPE_NONE &&
+ chandef->chan &&
+ chandef->chan->band == NL80211_BAND_6GHZ;
+}
+
static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_bss_conf *info,
@@ -3596,7 +3610,6 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
if (changed & BSS_CHANGED_TXPOWER) {
ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "vdev_id %i txpower %d\n",
arvif->vdev_id, info->txpower);
-
arvif->txpower = info->txpower;
ath11k_mac_txpower_recalc(ar);
}
@@ -4000,7 +4013,7 @@ static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw,
req->ssids[i].ssid_len);
}
} else {
- arg->scan_flags |= WMI_SCAN_FLAG_PASSIVE;
+ arg->scan_f_passive = 1;
}
if (req->n_channels) {
@@ -4906,100 +4919,6 @@ static void ath11k_mac_dec_num_stations(struct ath11k_vif *arvif,
ar->num_stations--;
}
-static int ath11k_mac_station_add(struct ath11k *ar,
- struct ieee80211_vif *vif,
- struct ieee80211_sta *sta)
-{
- struct ath11k_base *ab = ar->ab;
- struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
- struct ath11k_sta *arsta = ath11k_sta_to_arsta(sta);
- struct peer_create_params peer_param;
- int ret;
-
- lockdep_assert_held(&ar->conf_mutex);
-
- ret = ath11k_mac_inc_num_stations(arvif, sta);
- if (ret) {
- ath11k_warn(ab, "refusing to associate station: too many connected already (%d)\n",
- ar->max_num_stations);
- goto exit;
- }
-
- arsta->rx_stats = kzalloc(sizeof(*arsta->rx_stats), GFP_KERNEL);
- if (!arsta->rx_stats) {
- ret = -ENOMEM;
- goto dec_num_station;
- }
-
- peer_param.vdev_id = arvif->vdev_id;
- peer_param.peer_addr = sta->addr;
- peer_param.peer_type = WMI_PEER_TYPE_DEFAULT;
-
- ret = ath11k_peer_create(ar, arvif, sta, &peer_param);
- if (ret) {
- ath11k_warn(ab, "Failed to add peer: %pM for VDEV: %d\n",
- sta->addr, arvif->vdev_id);
- goto free_rx_stats;
- }
-
- ath11k_dbg(ab, ATH11K_DBG_MAC, "Added peer: %pM for VDEV: %d\n",
- sta->addr, arvif->vdev_id);
-
- if (ath11k_debugfs_is_extd_tx_stats_enabled(ar)) {
- arsta->tx_stats = kzalloc(sizeof(*arsta->tx_stats), GFP_KERNEL);
- if (!arsta->tx_stats) {
- ret = -ENOMEM;
- goto free_peer;
- }
- }
-
- if (ieee80211_vif_is_mesh(vif)) {
- ath11k_dbg(ab, ATH11K_DBG_MAC,
- "setting USE_4ADDR for mesh STA %pM\n", sta->addr);
- ret = ath11k_wmi_set_peer_param(ar, sta->addr,
- arvif->vdev_id,
- WMI_PEER_USE_4ADDR, 1);
- if (ret) {
- ath11k_warn(ab, "failed to set mesh STA %pM 4addr capability: %d\n",
- sta->addr, ret);
- goto free_tx_stats;
- }
- }
-
- ret = ath11k_dp_peer_setup(ar, arvif->vdev_id, sta->addr);
- if (ret) {
- ath11k_warn(ab, "failed to setup dp for peer %pM on vdev %i (%d)\n",
- sta->addr, arvif->vdev_id, ret);
- goto free_tx_stats;
- }
-
- if (ab->hw_params.vdev_start_delay &&
- !arvif->is_started &&
- arvif->vdev_type != WMI_VDEV_TYPE_AP) {
- ret = ath11k_start_vdev_delay(ar->hw, vif);
- if (ret) {
- ath11k_warn(ab, "failed to delay vdev start: %d\n", ret);
- goto free_tx_stats;
- }
- }
-
- ewma_avg_rssi_init(&arsta->avg_rssi);
- return 0;
-
-free_tx_stats:
- kfree(arsta->tx_stats);
- arsta->tx_stats = NULL;
-free_peer:
- ath11k_peer_delete(ar, arvif->vdev_id, sta->addr);
-free_rx_stats:
- kfree(arsta->rx_stats);
- arsta->rx_stats = NULL;
-dec_num_station:
- ath11k_mac_dec_num_stations(arvif, sta);
-exit:
- return ret;
-}
-
static u32 ath11k_mac_ieee80211_sta_bw_to_wmi(struct ath11k *ar,
struct ieee80211_sta *sta)
{
@@ -5028,140 +4947,6 @@ static u32 ath11k_mac_ieee80211_sta_bw_to_wmi(struct ath11k *ar,
return bw;
}
-static int ath11k_mac_op_sta_state(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif,
- struct ieee80211_sta *sta,
- enum ieee80211_sta_state old_state,
- enum ieee80211_sta_state new_state)
-{
- struct ath11k *ar = hw->priv;
- struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
- struct ath11k_sta *arsta = ath11k_sta_to_arsta(sta);
- struct ath11k_peer *peer;
- int ret = 0;
-
- /* cancel must be done outside the mutex to avoid deadlock */
- if ((old_state == IEEE80211_STA_NONE &&
- new_state == IEEE80211_STA_NOTEXIST)) {
- cancel_work_sync(&arsta->update_wk);
- cancel_work_sync(&arsta->set_4addr_wk);
- }
-
- mutex_lock(&ar->conf_mutex);
-
- if (old_state == IEEE80211_STA_NOTEXIST &&
- new_state == IEEE80211_STA_NONE) {
- memset(arsta, 0, sizeof(*arsta));
- arsta->arvif = arvif;
- arsta->peer_ps_state = WMI_PEER_PS_STATE_DISABLED;
- INIT_WORK(&arsta->update_wk, ath11k_sta_rc_update_wk);
- INIT_WORK(&arsta->set_4addr_wk, ath11k_sta_set_4addr_wk);
-
- ret = ath11k_mac_station_add(ar, vif, sta);
- if (ret)
- ath11k_warn(ar->ab, "Failed to add station: %pM for VDEV: %d\n",
- sta->addr, arvif->vdev_id);
- } else if ((old_state == IEEE80211_STA_NONE &&
- new_state == IEEE80211_STA_NOTEXIST)) {
- bool skip_peer_delete = ar->ab->hw_params.vdev_start_delay &&
- vif->type == NL80211_IFTYPE_STATION;
-
- ath11k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr);
-
- if (!skip_peer_delete) {
- ret = ath11k_peer_delete(ar, arvif->vdev_id, sta->addr);
- if (ret)
- ath11k_warn(ar->ab,
- "Failed to delete peer: %pM for VDEV: %d\n",
- sta->addr, arvif->vdev_id);
- else
- ath11k_dbg(ar->ab,
- ATH11K_DBG_MAC,
- "Removed peer: %pM for VDEV: %d\n",
- sta->addr, arvif->vdev_id);
- }
-
- ath11k_mac_dec_num_stations(arvif, sta);
- mutex_lock(&ar->ab->tbl_mtx_lock);
- spin_lock_bh(&ar->ab->base_lock);
- peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr);
- if (skip_peer_delete && peer) {
- peer->sta = NULL;
- } else if (peer && peer->sta == sta) {
- ath11k_warn(ar->ab, "Found peer entry %pM n vdev %i after it was supposedly removed\n",
- vif->addr, arvif->vdev_id);
- ath11k_peer_rhash_delete(ar->ab, peer);
- peer->sta = NULL;
- list_del(&peer->list);
- kfree(peer);
- ar->num_peers--;
- }
- spin_unlock_bh(&ar->ab->base_lock);
- mutex_unlock(&ar->ab->tbl_mtx_lock);
-
- kfree(arsta->tx_stats);
- arsta->tx_stats = NULL;
-
- kfree(arsta->rx_stats);
- arsta->rx_stats = NULL;
- } else if (old_state == IEEE80211_STA_AUTH &&
- new_state == IEEE80211_STA_ASSOC &&
- (vif->type == NL80211_IFTYPE_AP ||
- vif->type == NL80211_IFTYPE_MESH_POINT ||
- vif->type == NL80211_IFTYPE_ADHOC)) {
- ret = ath11k_station_assoc(ar, vif, sta, false);
- if (ret)
- ath11k_warn(ar->ab, "Failed to associate station: %pM\n",
- sta->addr);
-
- spin_lock_bh(&ar->data_lock);
- /* Set arsta bw and prev bw */
- arsta->bw = ath11k_mac_ieee80211_sta_bw_to_wmi(ar, sta);
- arsta->bw_prev = arsta->bw;
- spin_unlock_bh(&ar->data_lock);
- } else if (old_state == IEEE80211_STA_ASSOC &&
- new_state == IEEE80211_STA_AUTHORIZED) {
- spin_lock_bh(&ar->ab->base_lock);
-
- peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr);
- if (peer)
- peer->is_authorized = true;
-
- spin_unlock_bh(&ar->ab->base_lock);
-
- if (vif->type == NL80211_IFTYPE_STATION && arvif->is_up) {
- ret = ath11k_wmi_set_peer_param(ar, sta->addr,
- arvif->vdev_id,
- WMI_PEER_AUTHORIZE,
- 1);
- if (ret)
- ath11k_warn(ar->ab, "Unable to authorize peer %pM vdev %d: %d\n",
- sta->addr, arvif->vdev_id, ret);
- }
- } else if (old_state == IEEE80211_STA_AUTHORIZED &&
- new_state == IEEE80211_STA_ASSOC) {
- spin_lock_bh(&ar->ab->base_lock);
-
- peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr);
- if (peer)
- peer->is_authorized = false;
-
- spin_unlock_bh(&ar->ab->base_lock);
- } else if (old_state == IEEE80211_STA_ASSOC &&
- new_state == IEEE80211_STA_AUTH &&
- (vif->type == NL80211_IFTYPE_AP ||
- vif->type == NL80211_IFTYPE_MESH_POINT ||
- vif->type == NL80211_IFTYPE_ADHOC)) {
- ret = ath11k_station_disassoc(ar, vif, sta);
- if (ret)
- ath11k_warn(ar->ab, "Failed to disassociate station: %pM\n",
- sta->addr);
- }
-
- mutex_unlock(&ar->conf_mutex);
- return ret;
-}
-
static int ath11k_mac_op_sta_set_txpwr(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta)
@@ -6756,13 +6541,6 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
goto err;
}
- /* In the case of hardware recovery, debugfs files are
- * not deleted since ieee80211_ops.remove_interface() is
- * not invoked. In such cases, try to delete the files.
- * These will be re-created later.
- */
- ath11k_debugfs_remove_interface(arvif);
-
memset(arvif, 0, sizeof(*arvif));
arvif->ar = ar;
@@ -6939,8 +6717,6 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
ath11k_dp_vdev_tx_attach(ar, arvif);
- ath11k_debugfs_add_interface(arvif);
-
if (vif->type != NL80211_IFTYPE_MONITOR &&
test_bit(ATH11K_FLAG_MONITOR_CONF_ENABLED, &ar->monitor_flags)) {
ret = ath11k_mac_monitor_vdev_create(ar);
@@ -6949,6 +6725,14 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
ret);
}
+ if (ath11k_wmi_supports_6ghz_cc_ext(ar)) {
+ struct cur_regulatory_info *reg_info;
+
+ reg_info = &ab->reg_info_store[ar->pdev_idx];
+ ath11k_dbg(ab, ATH11K_DBG_MAC, "interface added to change reg rules\n");
+ ath11k_reg_handle_chan_list(ab, reg_info, IEEE80211_REG_LPI_AP);
+ }
+
mutex_unlock(&ar->conf_mutex);
return 0;
@@ -7056,8 +6840,6 @@ err_vdev_del:
/* Recalc txpower for remaining vdev */
ath11k_mac_txpower_recalc(ar);
- ath11k_debugfs_remove_interface(arvif);
-
/* TODO: recal traffic pause state based on the available vdevs */
mutex_unlock(&ar->conf_mutex);
@@ -7277,6 +7059,15 @@ ath11k_mac_vdev_start_restart(struct ath11k_vif *arvif,
return ret;
}
+ /* TODO: For now we only set TPC power here. However when
+ * channel changes, say CSA, it should be updated again.
+ */
+ if (ath11k_mac_supports_station_tpc(ar, arvif, chandef)) {
+ ath11k_mac_fill_reg_tpc_info(ar, arvif->vif, &arvif->chanctx);
+ ath11k_wmi_send_vdev_set_tpc_power(ar, arvif->vdev_id,
+ &arvif->reg_tpc_info);
+ }
+
if (!restart)
ar->num_started_vdevs++;
@@ -7553,8 +7344,8 @@ unlock:
mutex_unlock(&ar->conf_mutex);
}
-static int ath11k_start_vdev_delay(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif)
+static int ath11k_mac_start_vdev_delay(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif)
{
struct ath11k *ar = hw->priv;
struct ath11k_base *ab = ar->ab;
@@ -7600,6 +7391,501 @@ static int ath11k_start_vdev_delay(struct ieee80211_hw *hw,
return 0;
}
+static int ath11k_mac_stop_vdev_early(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif)
+{
+ struct ath11k *ar = hw->priv;
+ struct ath11k_base *ab = ar->ab;
+ struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
+ int ret;
+
+ if (WARN_ON(!arvif->is_started))
+ return -EBUSY;
+
+ ret = ath11k_mac_vdev_stop(arvif);
+ if (ret) {
+ ath11k_warn(ab, "failed to stop vdev %i: %d\n",
+ arvif->vdev_id, ret);
+ return ret;
+ }
+
+ arvif->is_started = false;
+
+ /* TODO: Setup ps and cts/rts protection */
+ return 0;
+}
+
+static u8 ath11k_mac_get_tpe_count(u8 txpwr_intrprt, u8 txpwr_cnt)
+{
+ switch (txpwr_intrprt) {
+ /* Refer "Table 9-276-Meaning of Maximum Transmit Power Count subfield
+ * if the Maximum Transmit Power Interpretation subfield is 0 or 2" of
+ * "IEEE Std 802.11ax 2021".
+ */
+ case IEEE80211_TPE_LOCAL_EIRP:
+ case IEEE80211_TPE_REG_CLIENT_EIRP:
+ txpwr_cnt = txpwr_cnt <= 3 ? txpwr_cnt : 3;
+ txpwr_cnt = txpwr_cnt + 1;
+ break;
+ /* Refer "Table 9-277-Meaning of Maximum Transmit Power Count subfield
+ * if Maximum Transmit Power Interpretation subfield is 1 or 3" of
+ * "IEEE Std 802.11ax 2021".
+ */
+ case IEEE80211_TPE_LOCAL_EIRP_PSD:
+ case IEEE80211_TPE_REG_CLIENT_EIRP_PSD:
+ txpwr_cnt = txpwr_cnt <= 4 ? txpwr_cnt : 4;
+ txpwr_cnt = txpwr_cnt ? (BIT(txpwr_cnt - 1)) : 1;
+ break;
+ }
+
+ return txpwr_cnt;
+}
+
+static u8 ath11k_mac_get_num_pwr_levels(struct cfg80211_chan_def *chan_def)
+{
+ if (chan_def->chan->flags & IEEE80211_CHAN_PSD) {
+ switch (chan_def->width) {
+ case NL80211_CHAN_WIDTH_20:
+ return 1;
+ case NL80211_CHAN_WIDTH_40:
+ return 2;
+ case NL80211_CHAN_WIDTH_80:
+ return 4;
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ return 8;
+ default:
+ return 1;
+ }
+ } else {
+ switch (chan_def->width) {
+ case NL80211_CHAN_WIDTH_20:
+ return 1;
+ case NL80211_CHAN_WIDTH_40:
+ return 2;
+ case NL80211_CHAN_WIDTH_80:
+ return 3;
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ return 4;
+ default:
+ return 1;
+ }
+ }
+}
+
+static u16 ath11k_mac_get_6ghz_start_frequency(struct cfg80211_chan_def *chan_def)
+{
+ u16 diff_seq;
+
+ /* It is to get the lowest channel number's center frequency of the chan.
+ * For example,
+ * bandwidth=40 MHz, center frequency is 5965, lowest channel is 1
+ * with center frequency 5955, its diff is 5965 - 5955 = 10.
+ * bandwidth=80 MHz, center frequency is 5985, lowest channel is 1
+ * with center frequency 5955, its diff is 5985 - 5955 = 30.
+ * bandwidth=160 MHz, center frequency is 6025, lowest channel is 1
+ * with center frequency 5955, its diff is 6025 - 5955 = 70.
+ */
+ switch (chan_def->width) {
+ case NL80211_CHAN_WIDTH_160:
+ diff_seq = 70;
+ break;
+ case NL80211_CHAN_WIDTH_80:
+ case NL80211_CHAN_WIDTH_80P80:
+ diff_seq = 30;
+ break;
+ case NL80211_CHAN_WIDTH_40:
+ diff_seq = 10;
+ break;
+ default:
+ diff_seq = 0;
+ }
+
+ return chan_def->center_freq1 - diff_seq;
+}
+
+static u16 ath11k_mac_get_seg_freq(struct cfg80211_chan_def *chan_def,
+ u16 start_seq, u8 seq)
+{
+ u16 seg_seq;
+
+ /* It is to get the center frequency of the specific bandwidth.
+ * start_seq means the lowest channel number's center frequency.
+ * seq 0/1/2/3 means 20 MHz/40 MHz/80 MHz/160 MHz&80P80.
+ * For example,
+ * lowest channel is 1, its center frequency 5955,
+ * center frequency is 5955 when bandwidth=20 MHz, its diff is 5955 - 5955 = 0.
+ * lowest channel is 1, its center frequency 5955,
+ * center frequency is 5965 when bandwidth=40 MHz, its diff is 5965 - 5955 = 10.
+ * lowest channel is 1, its center frequency 5955,
+ * center frequency is 5985 when bandwidth=80 MHz, its diff is 5985 - 5955 = 30.
+ * lowest channel is 1, its center frequency 5955,
+ * center frequency is 6025 when bandwidth=160 MHz, its diff is 6025 - 5955 = 70.
+ */
+ if (chan_def->width == NL80211_CHAN_WIDTH_80P80 && seq == 3)
+ return chan_def->center_freq2;
+
+ seg_seq = 10 * (BIT(seq) - 1);
+ return seg_seq + start_seq;
+}
+
+static void ath11k_mac_get_psd_channel(struct ath11k *ar,
+ u16 step_freq,
+ u16 *start_freq,
+ u16 *center_freq,
+ u8 i,
+ struct ieee80211_channel **temp_chan,
+ s8 *tx_power)
+{
+ /* It is to get the center frequency for each 20 MHz.
+ * For example, if the chan is 160 MHz and center frequency is 6025,
+ * then it include 8 channels, they are 1/5/9/13/17/21/25/29,
+ * channel number 1's center frequency is 5955, it is parameter start_freq.
+ * parameter i is the step of the 8 channels. i is 0~7 for the 8 channels.
+ * the channel 1/5/9/13/17/21/25/29 maps i=0/1/2/3/4/5/6/7,
+ * and maps its center frequency is 5955/5975/5995/6015/6035/6055/6075/6095,
+ * the gap is 20 for each channel, parameter step_freq means the gap.
+ * after get the center frequency of each channel, it is easy to find the
+ * struct ieee80211_channel of it and get the max_reg_power.
+ */
+ *center_freq = *start_freq + i * step_freq;
+ *temp_chan = ieee80211_get_channel(ar->hw->wiphy, *center_freq);
+ *tx_power = (*temp_chan)->max_reg_power;
+}
+
+static void ath11k_mac_get_eirp_power(struct ath11k *ar,
+ u16 *start_freq,
+ u16 *center_freq,
+ u8 i,
+ struct ieee80211_channel **temp_chan,
+ struct cfg80211_chan_def *def,
+ s8 *tx_power)
+{
+ /* It is to get the center frequency for 20 MHz/40 MHz/80 MHz/
+ * 160 MHz&80P80 bandwidth, and then plus 10 to the center frequency,
+ * it is the center frequency of a channel number.
+ * For example, when configured channel number is 1.
+ * center frequency is 5965 when bandwidth=40 MHz, after plus 10, it is 5975,
+ * then it is channel number 5.
+ * center frequency is 5985 when bandwidth=80 MHz, after plus 10, it is 5995,
+ * then it is channel number 9.
+ * center frequency is 6025 when bandwidth=160 MHz, after plus 10, it is 6035,
+ * then it is channel number 17.
+ * after get the center frequency of each channel, it is easy to find the
+ * struct ieee80211_channel of it and get the max_reg_power.
+ */
+ *center_freq = ath11k_mac_get_seg_freq(def, *start_freq, i);
+
+ /* For the 20 MHz, its center frequency is same with same channel */
+ if (i != 0)
+ *center_freq += 10;
+
+ *temp_chan = ieee80211_get_channel(ar->hw->wiphy, *center_freq);
+ *tx_power = (*temp_chan)->max_reg_power;
+}
+
+void ath11k_mac_fill_reg_tpc_info(struct ath11k *ar,
+ struct ieee80211_vif *vif,
+ struct ieee80211_chanctx_conf *ctx)
+{
+ struct ath11k_base *ab = ar->ab;
+ struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
+ struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+ struct ath11k_reg_tpc_power_info *reg_tpc_info = &arvif->reg_tpc_info;
+ struct ieee80211_channel *chan, *temp_chan;
+ u8 pwr_lvl_idx, num_pwr_levels, pwr_reduction;
+ bool is_psd_power = false, is_tpe_present = false;
+ s8 max_tx_power[IEEE80211_MAX_NUM_PWR_LEVEL],
+ psd_power, tx_power;
+ s8 eirp_power = 0;
+ u16 start_freq, center_freq;
+
+ chan = ctx->def.chan;
+ start_freq = ath11k_mac_get_6ghz_start_frequency(&ctx->def);
+ pwr_reduction = bss_conf->pwr_reduction;
+
+ if (arvif->reg_tpc_info.num_pwr_levels) {
+ is_tpe_present = true;
+ num_pwr_levels = arvif->reg_tpc_info.num_pwr_levels;
+ } else {
+ num_pwr_levels = ath11k_mac_get_num_pwr_levels(&ctx->def);
+ }
+
+ for (pwr_lvl_idx = 0; pwr_lvl_idx < num_pwr_levels; pwr_lvl_idx++) {
+ /* STA received TPE IE*/
+ if (is_tpe_present) {
+ /* local power is PSD power*/
+ if (chan->flags & IEEE80211_CHAN_PSD) {
+ /* Connecting AP is psd power */
+ if (reg_tpc_info->is_psd_power) {
+ is_psd_power = true;
+ ath11k_mac_get_psd_channel(ar, 20,
+ &start_freq,
+ &center_freq,
+ pwr_lvl_idx,
+ &temp_chan,
+ &tx_power);
+ psd_power = temp_chan->psd;
+ eirp_power = tx_power;
+ max_tx_power[pwr_lvl_idx] =
+ min_t(s8,
+ psd_power,
+ reg_tpc_info->tpe[pwr_lvl_idx]);
+ /* Connecting AP is not psd power */
+ } else {
+ ath11k_mac_get_eirp_power(ar,
+ &start_freq,
+ &center_freq,
+ pwr_lvl_idx,
+ &temp_chan,
+ &ctx->def,
+ &tx_power);
+ psd_power = temp_chan->psd;
+ /* convert psd power to EIRP power based
+ * on channel width
+ */
+ tx_power =
+ min_t(s8, tx_power,
+ psd_power + 13 + pwr_lvl_idx * 3);
+ max_tx_power[pwr_lvl_idx] =
+ min_t(s8,
+ tx_power,
+ reg_tpc_info->tpe[pwr_lvl_idx]);
+ }
+ /* local power is not PSD power */
+ } else {
+ /* Connecting AP is psd power */
+ if (reg_tpc_info->is_psd_power) {
+ is_psd_power = true;
+ ath11k_mac_get_psd_channel(ar, 20,
+ &start_freq,
+ &center_freq,
+ pwr_lvl_idx,
+ &temp_chan,
+ &tx_power);
+ eirp_power = tx_power;
+ max_tx_power[pwr_lvl_idx] =
+ reg_tpc_info->tpe[pwr_lvl_idx];
+ /* Connecting AP is not psd power */
+ } else {
+ ath11k_mac_get_eirp_power(ar,
+ &start_freq,
+ &center_freq,
+ pwr_lvl_idx,
+ &temp_chan,
+ &ctx->def,
+ &tx_power);
+ max_tx_power[pwr_lvl_idx] =
+ min_t(s8,
+ tx_power,
+ reg_tpc_info->tpe[pwr_lvl_idx]);
+ }
+ }
+ /* STA not received TPE IE */
+ } else {
+ /* local power is PSD power*/
+ if (chan->flags & IEEE80211_CHAN_PSD) {
+ is_psd_power = true;
+ ath11k_mac_get_psd_channel(ar, 20,
+ &start_freq,
+ &center_freq,
+ pwr_lvl_idx,
+ &temp_chan,
+ &tx_power);
+ psd_power = temp_chan->psd;
+ eirp_power = tx_power;
+ max_tx_power[pwr_lvl_idx] = psd_power;
+ } else {
+ ath11k_mac_get_eirp_power(ar,
+ &start_freq,
+ &center_freq,
+ pwr_lvl_idx,
+ &temp_chan,
+ &ctx->def,
+ &tx_power);
+ max_tx_power[pwr_lvl_idx] = tx_power;
+ }
+ }
+
+ if (is_psd_power) {
+ /* If AP local power constraint is present */
+ if (pwr_reduction)
+ eirp_power = eirp_power - pwr_reduction;
+
+ /* If firmware updated max tx power is non zero, then take
+ * the min of firmware updated ap tx power
+ * and max power derived from above mentioned parameters.
+ */
+ ath11k_dbg(ab, ATH11K_DBG_MAC,
+ "eirp power : %d firmware report power : %d\n",
+ eirp_power, ar->max_allowed_tx_power);
+ /* Firmware reports lower max_allowed_tx_power during vdev
+ * start response. In case of 6 GHz, firmware is not aware
+ * of EIRP power unless driver sets EIRP power through WMI
+ * TPC command. So radio which does not support idle power
+ * save can set maximum calculated EIRP power directly to
+ * firmware through TPC command without min comparison with
+ * vdev start response's max_allowed_tx_power.
+ */
+ if (ar->max_allowed_tx_power && ab->hw_params.idle_ps)
+ eirp_power = min_t(s8,
+ eirp_power,
+ ar->max_allowed_tx_power);
+ } else {
+ /* If AP local power constraint is present */
+ if (pwr_reduction)
+ max_tx_power[pwr_lvl_idx] =
+ max_tx_power[pwr_lvl_idx] - pwr_reduction;
+ /* If firmware updated max tx power is non zero, then take
+ * the min of firmware updated ap tx power
+ * and max power derived from above mentioned parameters.
+ */
+ if (ar->max_allowed_tx_power && ab->hw_params.idle_ps)
+ max_tx_power[pwr_lvl_idx] =
+ min_t(s8,
+ max_tx_power[pwr_lvl_idx],
+ ar->max_allowed_tx_power);
+ }
+ reg_tpc_info->chan_power_info[pwr_lvl_idx].chan_cfreq = center_freq;
+ reg_tpc_info->chan_power_info[pwr_lvl_idx].tx_power =
+ max_tx_power[pwr_lvl_idx];
+ }
+
+ reg_tpc_info->num_pwr_levels = num_pwr_levels;
+ reg_tpc_info->is_psd_power = is_psd_power;
+ reg_tpc_info->eirp_power = eirp_power;
+ reg_tpc_info->ap_power_type =
+ ath11k_reg_ap_pwr_convert(vif->bss_conf.power_type);
+}
+
+static void ath11k_mac_parse_tx_pwr_env(struct ath11k *ar,
+ struct ieee80211_vif *vif,
+ struct ieee80211_chanctx_conf *ctx)
+{
+ struct ath11k_base *ab = ar->ab;
+ struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
+ struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+ struct ieee80211_tx_pwr_env *single_tpe;
+ enum wmi_reg_6ghz_client_type client_type;
+ struct cur_regulatory_info *reg_info;
+ int i;
+ u8 pwr_count, pwr_interpret, pwr_category;
+ u8 psd_index = 0, non_psd_index = 0, local_tpe_count = 0, reg_tpe_count = 0;
+ bool use_local_tpe, non_psd_set = false, psd_set = false;
+
+ reg_info = &ab->reg_info_store[ar->pdev_idx];
+ client_type = reg_info->client_type;
+
+ for (i = 0; i < bss_conf->tx_pwr_env_num; i++) {
+ single_tpe = &bss_conf->tx_pwr_env[i];
+ pwr_category = u8_get_bits(single_tpe->tx_power_info,
+ IEEE80211_TX_PWR_ENV_INFO_CATEGORY);
+ pwr_interpret = u8_get_bits(single_tpe->tx_power_info,
+ IEEE80211_TX_PWR_ENV_INFO_INTERPRET);
+
+ if (pwr_category == client_type) {
+ if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP ||
+ pwr_interpret == IEEE80211_TPE_LOCAL_EIRP_PSD)
+ local_tpe_count++;
+ else if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP ||
+ pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP_PSD)
+ reg_tpe_count++;
+ }
+ }
+
+ if (!reg_tpe_count && !local_tpe_count) {
+ ath11k_warn(ab,
+ "no transmit power envelope match client power type %d\n",
+ client_type);
+ return;
+ } else if (!reg_tpe_count) {
+ use_local_tpe = true;
+ } else {
+ use_local_tpe = false;
+ }
+
+ for (i = 0; i < bss_conf->tx_pwr_env_num; i++) {
+ single_tpe = &bss_conf->tx_pwr_env[i];
+ pwr_category = u8_get_bits(single_tpe->tx_power_info,
+ IEEE80211_TX_PWR_ENV_INFO_CATEGORY);
+ pwr_interpret = u8_get_bits(single_tpe->tx_power_info,
+ IEEE80211_TX_PWR_ENV_INFO_INTERPRET);
+
+ if (pwr_category != client_type)
+ continue;
+
+ /* get local transmit power envelope */
+ if (use_local_tpe) {
+ if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP) {
+ non_psd_index = i;
+ non_psd_set = true;
+ } else if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP_PSD) {
+ psd_index = i;
+ psd_set = true;
+ }
+ /* get regulatory transmit power envelope */
+ } else {
+ if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP) {
+ non_psd_index = i;
+ non_psd_set = true;
+ } else if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP_PSD) {
+ psd_index = i;
+ psd_set = true;
+ }
+ }
+ }
+
+ if (non_psd_set && !psd_set) {
+ single_tpe = &bss_conf->tx_pwr_env[non_psd_index];
+ pwr_count = u8_get_bits(single_tpe->tx_power_info,
+ IEEE80211_TX_PWR_ENV_INFO_COUNT);
+ pwr_interpret = u8_get_bits(single_tpe->tx_power_info,
+ IEEE80211_TX_PWR_ENV_INFO_INTERPRET);
+ arvif->reg_tpc_info.is_psd_power = false;
+ arvif->reg_tpc_info.eirp_power = 0;
+
+ arvif->reg_tpc_info.num_pwr_levels =
+ ath11k_mac_get_tpe_count(pwr_interpret, pwr_count);
+
+ for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) {
+ ath11k_dbg(ab, ATH11K_DBG_MAC,
+ "non PSD power[%d] : %d\n",
+ i, single_tpe->tx_power[i]);
+ arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[i] / 2;
+ }
+ }
+
+ if (psd_set) {
+ single_tpe = &bss_conf->tx_pwr_env[psd_index];
+ pwr_count = u8_get_bits(single_tpe->tx_power_info,
+ IEEE80211_TX_PWR_ENV_INFO_COUNT);
+ pwr_interpret = u8_get_bits(single_tpe->tx_power_info,
+ IEEE80211_TX_PWR_ENV_INFO_INTERPRET);
+ arvif->reg_tpc_info.is_psd_power = true;
+
+ if (pwr_count == 0) {
+ ath11k_dbg(ab, ATH11K_DBG_MAC,
+ "TPE PSD power : %d\n", single_tpe->tx_power[0]);
+ arvif->reg_tpc_info.num_pwr_levels =
+ ath11k_mac_get_num_pwr_levels(&ctx->def);
+
+ for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++)
+ arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[0] / 2;
+ } else {
+ arvif->reg_tpc_info.num_pwr_levels =
+ ath11k_mac_get_tpe_count(pwr_interpret, pwr_count);
+
+ for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) {
+ ath11k_dbg(ab, ATH11K_DBG_MAC,
+ "TPE PSD power[%d] : %d\n",
+ i, single_tpe->tx_power[i]);
+ arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[i] / 2;
+ }
+ }
+ }
+}
+
static int
ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
@@ -7610,7 +7896,8 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
struct ath11k_base *ab = ar->ab;
struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
int ret;
- struct peer_create_params param;
+ struct cur_regulatory_info *reg_info;
+ enum ieee80211_ap_reg_power power_type;
mutex_lock(&ar->conf_mutex);
@@ -7618,6 +7905,24 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
"chanctx assign ptr %p vdev_id %i\n",
ctx, arvif->vdev_id);
+ if (ath11k_wmi_supports_6ghz_cc_ext(ar) &&
+ ctx->def.chan->band == NL80211_BAND_6GHZ &&
+ arvif->vdev_type == WMI_VDEV_TYPE_STA) {
+ reg_info = &ab->reg_info_store[ar->pdev_idx];
+ power_type = vif->bss_conf.power_type;
+
+ ath11k_dbg(ab, ATH11K_DBG_MAC, "chanctx power type %d\n", power_type);
+
+ if (power_type == IEEE80211_REG_UNSET_AP) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ath11k_reg_handle_chan_list(ab, reg_info, power_type);
+ arvif->chanctx = *ctx;
+ ath11k_mac_parse_tx_pwr_env(ar, vif, ctx);
+ }
+
/* for QCA6390 bss peer must be created before vdev_start */
if (ab->hw_params.vdev_start_delay &&
arvif->vdev_type != WMI_VDEV_TYPE_AP &&
@@ -7633,21 +7938,6 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
goto out;
}
- if (ab->hw_params.vdev_start_delay &&
- arvif->vdev_type != WMI_VDEV_TYPE_AP &&
- arvif->vdev_type != WMI_VDEV_TYPE_MONITOR) {
- param.vdev_id = arvif->vdev_id;
- param.peer_type = WMI_PEER_TYPE_DEFAULT;
- param.peer_addr = ar->mac_addr;
-
- ret = ath11k_peer_create(ar, arvif, NULL, &param);
- if (ret) {
- ath11k_warn(ab, "failed to create peer after vdev start delay: %d",
- ret);
- goto out;
- }
- }
-
if (arvif->vdev_type == WMI_VDEV_TYPE_MONITOR) {
ret = ath11k_mac_monitor_start(ar);
if (ret) {
@@ -7660,15 +7950,17 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
goto out;
}
- ret = ath11k_mac_vdev_start(arvif, ctx);
- if (ret) {
- ath11k_warn(ab, "failed to start vdev %i addr %pM on freq %d: %d\n",
- arvif->vdev_id, vif->addr,
- ctx->def.chan->center_freq, ret);
- goto out;
- }
+ if (!arvif->is_started) {
+ ret = ath11k_mac_vdev_start(arvif, ctx);
+ if (ret) {
+ ath11k_warn(ab, "failed to start vdev %i addr %pM on freq %d: %d\n",
+ arvif->vdev_id, vif->addr,
+ ctx->def.chan->center_freq, ret);
+ goto out;
+ }
- arvif->is_started = true;
+ arvif->is_started = true;
+ }
if (arvif->vdev_type != WMI_VDEV_TYPE_MONITOR &&
test_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags)) {
@@ -7708,8 +8000,6 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
"chanctx unassign ptr %p vdev_id %i\n",
ctx, arvif->vdev_id);
- WARN_ON(!arvif->is_started);
-
if (ab->hw_params.vdev_start_delay &&
arvif->vdev_type == WMI_VDEV_TYPE_MONITOR) {
spin_lock_bh(&ab->base_lock);
@@ -7733,24 +8023,13 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
return;
}
- ret = ath11k_mac_vdev_stop(arvif);
- if (ret)
- ath11k_warn(ab, "failed to stop vdev %i: %d\n",
- arvif->vdev_id, ret);
-
- arvif->is_started = false;
-
- if (ab->hw_params.vdev_start_delay &&
- arvif->vdev_type == WMI_VDEV_TYPE_STA) {
- ret = ath11k_peer_delete(ar, arvif->vdev_id, arvif->bssid);
+ if (arvif->is_started) {
+ ret = ath11k_mac_vdev_stop(arvif);
if (ret)
- ath11k_warn(ar->ab,
- "failed to delete peer %pM for vdev %d: %d\n",
- arvif->bssid, arvif->vdev_id, ret);
- else
- ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
- "removed peer %pM vdev %d after vdev stop\n",
- arvif->bssid, arvif->vdev_id);
+ ath11k_warn(ab, "failed to stop vdev %i: %d\n",
+ arvif->vdev_id, ret);
+
+ arvif->is_started = false;
}
if (ab->hw_params.vdev_start_delay &&
@@ -8973,8 +9252,8 @@ static int ath11k_mac_op_remain_on_channel(struct ieee80211_hw *hw,
arg->dwell_time_active = scan_time_msec;
arg->dwell_time_passive = scan_time_msec;
arg->max_scan_time = scan_time_msec;
- arg->scan_flags |= WMI_SCAN_FLAG_PASSIVE;
- arg->scan_flags |= WMI_SCAN_FILTER_PROBE_REQ;
+ arg->scan_f_passive = 1;
+ arg->scan_f_filter_prb_req = 1;
arg->burst_duration = duration;
ret = ath11k_start_scan(ar, arg);
@@ -9108,6 +9387,252 @@ err_fallback:
return 0;
}
+static int ath11k_mac_station_add(struct ath11k *ar,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta)
+{
+ struct ath11k_base *ab = ar->ab;
+ struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
+ struct ath11k_sta *arsta = ath11k_sta_to_arsta(sta);
+ struct peer_create_params peer_param;
+ int ret;
+
+ lockdep_assert_held(&ar->conf_mutex);
+
+ ret = ath11k_mac_inc_num_stations(arvif, sta);
+ if (ret) {
+ ath11k_warn(ab, "refusing to associate station: too many connected already (%d)\n",
+ ar->max_num_stations);
+ goto exit;
+ }
+
+ arsta->rx_stats = kzalloc(sizeof(*arsta->rx_stats), GFP_KERNEL);
+ if (!arsta->rx_stats) {
+ ret = -ENOMEM;
+ goto dec_num_station;
+ }
+
+ peer_param.vdev_id = arvif->vdev_id;
+ peer_param.peer_addr = sta->addr;
+ peer_param.peer_type = WMI_PEER_TYPE_DEFAULT;
+
+ ret = ath11k_peer_create(ar, arvif, sta, &peer_param);
+ if (ret) {
+ ath11k_warn(ab, "Failed to add peer: %pM for VDEV: %d\n",
+ sta->addr, arvif->vdev_id);
+ goto free_rx_stats;
+ }
+
+ ath11k_dbg(ab, ATH11K_DBG_MAC, "Added peer: %pM for VDEV: %d\n",
+ sta->addr, arvif->vdev_id);
+
+ if (ath11k_debugfs_is_extd_tx_stats_enabled(ar)) {
+ arsta->tx_stats = kzalloc(sizeof(*arsta->tx_stats), GFP_KERNEL);
+ if (!arsta->tx_stats) {
+ ret = -ENOMEM;
+ goto free_peer;
+ }
+ }
+
+ if (ieee80211_vif_is_mesh(vif)) {
+ ath11k_dbg(ab, ATH11K_DBG_MAC,
+ "setting USE_4ADDR for mesh STA %pM\n", sta->addr);
+ ret = ath11k_wmi_set_peer_param(ar, sta->addr,
+ arvif->vdev_id,
+ WMI_PEER_USE_4ADDR, 1);
+ if (ret) {
+ ath11k_warn(ab, "failed to set mesh STA %pM 4addr capability: %d\n",
+ sta->addr, ret);
+ goto free_tx_stats;
+ }
+ }
+
+ ret = ath11k_dp_peer_setup(ar, arvif->vdev_id, sta->addr);
+ if (ret) {
+ ath11k_warn(ab, "failed to setup dp for peer %pM on vdev %i (%d)\n",
+ sta->addr, arvif->vdev_id, ret);
+ goto free_tx_stats;
+ }
+
+ if (ab->hw_params.vdev_start_delay &&
+ !arvif->is_started &&
+ arvif->vdev_type != WMI_VDEV_TYPE_AP) {
+ ret = ath11k_mac_start_vdev_delay(ar->hw, vif);
+ if (ret) {
+ ath11k_warn(ab, "failed to delay vdev start: %d\n", ret);
+ goto free_tx_stats;
+ }
+ }
+
+ ewma_avg_rssi_init(&arsta->avg_rssi);
+ return 0;
+
+free_tx_stats:
+ kfree(arsta->tx_stats);
+ arsta->tx_stats = NULL;
+free_peer:
+ ath11k_peer_delete(ar, arvif->vdev_id, sta->addr);
+free_rx_stats:
+ kfree(arsta->rx_stats);
+ arsta->rx_stats = NULL;
+dec_num_station:
+ ath11k_mac_dec_num_stations(arvif, sta);
+exit:
+ return ret;
+}
+
+static int ath11k_mac_station_remove(struct ath11k *ar,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta)
+{
+ struct ath11k_base *ab = ar->ab;
+ struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
+ struct ath11k_sta *arsta = ath11k_sta_to_arsta(sta);
+ int ret;
+
+ if (ab->hw_params.vdev_start_delay &&
+ arvif->is_started &&
+ arvif->vdev_type != WMI_VDEV_TYPE_AP) {
+ ret = ath11k_mac_stop_vdev_early(ar->hw, vif);
+ if (ret) {
+ ath11k_warn(ab, "failed to do early vdev stop: %d\n", ret);
+ return ret;
+ }
+ }
+
+ ath11k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr);
+
+ ret = ath11k_peer_delete(ar, arvif->vdev_id, sta->addr);
+ if (ret)
+ ath11k_warn(ab, "Failed to delete peer: %pM for VDEV: %d\n",
+ sta->addr, arvif->vdev_id);
+ else
+ ath11k_dbg(ab, ATH11K_DBG_MAC, "Removed peer: %pM for VDEV: %d\n",
+ sta->addr, arvif->vdev_id);
+
+ ath11k_mac_dec_num_stations(arvif, sta);
+
+ kfree(arsta->tx_stats);
+ arsta->tx_stats = NULL;
+
+ kfree(arsta->rx_stats);
+ arsta->rx_stats = NULL;
+
+ return ret;
+}
+
+static int ath11k_mac_op_sta_state(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ enum ieee80211_sta_state old_state,
+ enum ieee80211_sta_state new_state)
+{
+ struct ath11k *ar = hw->priv;
+ struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
+ struct ath11k_sta *arsta = ath11k_sta_to_arsta(sta);
+ struct ath11k_peer *peer;
+ int ret = 0;
+
+ /* cancel must be done outside the mutex to avoid deadlock */
+ if ((old_state == IEEE80211_STA_NONE &&
+ new_state == IEEE80211_STA_NOTEXIST)) {
+ cancel_work_sync(&arsta->update_wk);
+ cancel_work_sync(&arsta->set_4addr_wk);
+ }
+
+ mutex_lock(&ar->conf_mutex);
+
+ if (old_state == IEEE80211_STA_NOTEXIST &&
+ new_state == IEEE80211_STA_NONE) {
+ memset(arsta, 0, sizeof(*arsta));
+ arsta->arvif = arvif;
+ arsta->peer_ps_state = WMI_PEER_PS_STATE_DISABLED;
+ INIT_WORK(&arsta->update_wk, ath11k_sta_rc_update_wk);
+ INIT_WORK(&arsta->set_4addr_wk, ath11k_sta_set_4addr_wk);
+
+ ret = ath11k_mac_station_add(ar, vif, sta);
+ if (ret)
+ ath11k_warn(ar->ab, "Failed to add station: %pM for VDEV: %d\n",
+ sta->addr, arvif->vdev_id);
+ } else if ((old_state == IEEE80211_STA_NONE &&
+ new_state == IEEE80211_STA_NOTEXIST)) {
+ ret = ath11k_mac_station_remove(ar, vif, sta);
+ if (ret)
+ ath11k_warn(ar->ab, "Failed to remove station: %pM for VDEV: %d\n",
+ sta->addr, arvif->vdev_id);
+
+ mutex_lock(&ar->ab->tbl_mtx_lock);
+ spin_lock_bh(&ar->ab->base_lock);
+ peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr);
+ if (peer && peer->sta == sta) {
+ ath11k_warn(ar->ab, "Found peer entry %pM n vdev %i after it was supposedly removed\n",
+ vif->addr, arvif->vdev_id);
+ ath11k_peer_rhash_delete(ar->ab, peer);
+ peer->sta = NULL;
+ list_del(&peer->list);
+ kfree(peer);
+ ar->num_peers--;
+ }
+ spin_unlock_bh(&ar->ab->base_lock);
+ mutex_unlock(&ar->ab->tbl_mtx_lock);
+ } else if (old_state == IEEE80211_STA_AUTH &&
+ new_state == IEEE80211_STA_ASSOC &&
+ (vif->type == NL80211_IFTYPE_AP ||
+ vif->type == NL80211_IFTYPE_MESH_POINT ||
+ vif->type == NL80211_IFTYPE_ADHOC)) {
+ ret = ath11k_station_assoc(ar, vif, sta, false);
+ if (ret)
+ ath11k_warn(ar->ab, "Failed to associate station: %pM\n",
+ sta->addr);
+
+ spin_lock_bh(&ar->data_lock);
+ /* Set arsta bw and prev bw */
+ arsta->bw = ath11k_mac_ieee80211_sta_bw_to_wmi(ar, sta);
+ arsta->bw_prev = arsta->bw;
+ spin_unlock_bh(&ar->data_lock);
+ } else if (old_state == IEEE80211_STA_ASSOC &&
+ new_state == IEEE80211_STA_AUTHORIZED) {
+ spin_lock_bh(&ar->ab->base_lock);
+
+ peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr);
+ if (peer)
+ peer->is_authorized = true;
+
+ spin_unlock_bh(&ar->ab->base_lock);
+
+ if (vif->type == NL80211_IFTYPE_STATION && arvif->is_up) {
+ ret = ath11k_wmi_set_peer_param(ar, sta->addr,
+ arvif->vdev_id,
+ WMI_PEER_AUTHORIZE,
+ 1);
+ if (ret)
+ ath11k_warn(ar->ab, "Unable to authorize peer %pM vdev %d: %d\n",
+ sta->addr, arvif->vdev_id, ret);
+ }
+ } else if (old_state == IEEE80211_STA_AUTHORIZED &&
+ new_state == IEEE80211_STA_ASSOC) {
+ spin_lock_bh(&ar->ab->base_lock);
+
+ peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr);
+ if (peer)
+ peer->is_authorized = false;
+
+ spin_unlock_bh(&ar->ab->base_lock);
+ } else if (old_state == IEEE80211_STA_ASSOC &&
+ new_state == IEEE80211_STA_AUTH &&
+ (vif->type == NL80211_IFTYPE_AP ||
+ vif->type == NL80211_IFTYPE_MESH_POINT ||
+ vif->type == NL80211_IFTYPE_ADHOC)) {
+ ret = ath11k_station_disassoc(ar, vif, sta);
+ if (ret)
+ ath11k_warn(ar->ab, "Failed to disassociate station: %pM\n",
+ sta->addr);
+ }
+
+ mutex_unlock(&ar->conf_mutex);
+ return ret;
+}
+
static const struct ieee80211_ops ath11k_ops = {
.tx = ath11k_mac_op_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
@@ -9153,6 +9678,7 @@ static const struct ieee80211_ops ath11k_ops = {
#endif
#ifdef CONFIG_ATH11K_DEBUGFS
+ .vif_add_debugfs = ath11k_debugfs_op_vif_add,
.sta_add_debugfs = ath11k_debugfs_sta_op_add,
#endif
@@ -9298,6 +9824,33 @@ static int ath11k_mac_setup_channels_rates(struct ath11k *ar,
return 0;
}
+static void ath11k_mac_setup_mac_address_list(struct ath11k *ar)
+{
+ struct mac_address *addresses;
+ u16 n_addresses;
+ int i;
+
+ if (!ar->ab->hw_params.support_dual_stations)
+ return;
+
+ n_addresses = ar->ab->hw_params.num_vdevs;
+ addresses = kcalloc(n_addresses, sizeof(*addresses), GFP_KERNEL);
+ if (!addresses)
+ return;
+
+ memcpy(addresses[0].addr, ar->mac_addr, ETH_ALEN);
+ for (i = 1; i < n_addresses; i++) {
+ memcpy(addresses[i].addr, ar->mac_addr, ETH_ALEN);
+ /* set Local Administered Address bit */
+ addresses[i].addr[0] |= 0x2;
+
+ addresses[i].addr[0] += (i - 1) << 4;
+ }
+
+ ar->hw->wiphy->addresses = addresses;
+ ar->hw->wiphy->n_addresses = n_addresses;
+}
+
static int ath11k_mac_setup_iface_combinations(struct ath11k *ar)
{
struct ath11k_base *ab = ar->ab;
@@ -9317,28 +9870,46 @@ static int ath11k_mac_setup_iface_combinations(struct ath11k *ar)
return -ENOMEM;
}
- limits[0].max = 1;
- limits[0].types |= BIT(NL80211_IFTYPE_STATION);
-
- limits[1].max = 16;
- limits[1].types |= BIT(NL80211_IFTYPE_AP);
+ if (ab->hw_params.support_dual_stations) {
+ limits[0].max = 2;
+ limits[0].types |= BIT(NL80211_IFTYPE_STATION);
- if (IS_ENABLED(CONFIG_MAC80211_MESH) &&
- ab->hw_params.interface_modes & BIT(NL80211_IFTYPE_MESH_POINT))
- limits[1].types |= BIT(NL80211_IFTYPE_MESH_POINT);
+ limits[1].max = 1;
+ limits[1].types |= BIT(NL80211_IFTYPE_AP);
+ if (IS_ENABLED(CONFIG_MAC80211_MESH) &&
+ ab->hw_params.interface_modes & BIT(NL80211_IFTYPE_MESH_POINT))
+ limits[1].types |= BIT(NL80211_IFTYPE_MESH_POINT);
- combinations[0].limits = limits;
- combinations[0].n_limits = n_limits;
- combinations[0].max_interfaces = 16;
- combinations[0].num_different_channels = 1;
- combinations[0].beacon_int_infra_match = true;
- combinations[0].beacon_int_min_gcd = 100;
- combinations[0].radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) |
- BIT(NL80211_CHAN_WIDTH_20) |
- BIT(NL80211_CHAN_WIDTH_40) |
- BIT(NL80211_CHAN_WIDTH_80) |
- BIT(NL80211_CHAN_WIDTH_80P80) |
- BIT(NL80211_CHAN_WIDTH_160);
+ combinations[0].limits = limits;
+ combinations[0].n_limits = 2;
+ combinations[0].max_interfaces = ab->hw_params.num_vdevs;
+ combinations[0].num_different_channels = 2;
+ combinations[0].beacon_int_infra_match = true;
+ combinations[0].beacon_int_min_gcd = 100;
+ } else {
+ limits[0].max = 1;
+ limits[0].types |= BIT(NL80211_IFTYPE_STATION);
+
+ limits[1].max = 16;
+ limits[1].types |= BIT(NL80211_IFTYPE_AP);
+
+ if (IS_ENABLED(CONFIG_MAC80211_MESH) &&
+ ab->hw_params.interface_modes & BIT(NL80211_IFTYPE_MESH_POINT))
+ limits[1].types |= BIT(NL80211_IFTYPE_MESH_POINT);
+
+ combinations[0].limits = limits;
+ combinations[0].n_limits = 2;
+ combinations[0].max_interfaces = 16;
+ combinations[0].num_different_channels = 1;
+ combinations[0].beacon_int_infra_match = true;
+ combinations[0].beacon_int_min_gcd = 100;
+ combinations[0].radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) |
+ BIT(NL80211_CHAN_WIDTH_20) |
+ BIT(NL80211_CHAN_WIDTH_40) |
+ BIT(NL80211_CHAN_WIDTH_80) |
+ BIT(NL80211_CHAN_WIDTH_80P80) |
+ BIT(NL80211_CHAN_WIDTH_160);
+ }
ar->hw->wiphy->iface_combinations = combinations;
ar->hw->wiphy->n_iface_combinations = 1;
@@ -9403,6 +9974,8 @@ static void __ath11k_mac_unregister(struct ath11k *ar)
kfree(ar->hw->wiphy->iface_combinations[0].limits);
kfree(ar->hw->wiphy->iface_combinations);
+ kfree(ar->hw->wiphy->addresses);
+
SET_IEEE80211_DEV(ar->hw, NULL);
}
@@ -9445,6 +10018,7 @@ static int __ath11k_mac_register(struct ath11k *ar)
ath11k_pdev_caps_update(ar);
SET_IEEE80211_PERM_ADDR(ar->hw, ar->mac_addr);
+ ath11k_mac_setup_mac_address_list(ar);
SET_IEEE80211_DEV(ar->hw, ab->dev);
diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h
index 0dfdeed5177b..f5800fbecff8 100644
--- a/drivers/net/wireless/ath/ath11k/mac.h
+++ b/drivers/net/wireless/ath/ath11k/mac.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH11K_MAC_H
@@ -176,4 +176,7 @@ int ath11k_mac_wait_tx_complete(struct ath11k *ar);
int ath11k_mac_vif_set_keepalive(struct ath11k_vif *arvif,
enum wmi_sta_keepalive_method method,
u32 interval);
+void ath11k_mac_fill_reg_tpc_info(struct ath11k *ar,
+ struct ieee80211_vif *vif,
+ struct ieee80211_chanctx_conf *ctx);
#endif
diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
index 6835c14b82cc..fb4ecf9a103e 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2020 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/msi.h>
@@ -20,35 +20,7 @@
#define MHI_TIMEOUT_DEFAULT_MS 20000
#define RDDM_DUMP_SIZE 0x420000
-static struct mhi_channel_config ath11k_mhi_channels_qca6390[] = {
- {
- .num = 0,
- .name = "LOOPBACK",
- .num_elements = 32,
- .event_ring = 0,
- .dir = DMA_TO_DEVICE,
- .ee_mask = 0x4,
- .pollcfg = 0,
- .doorbell = MHI_DB_BRST_DISABLE,
- .lpm_notify = false,
- .offload_channel = false,
- .doorbell_mode_switch = false,
- .auto_queue = false,
- },
- {
- .num = 1,
- .name = "LOOPBACK",
- .num_elements = 32,
- .event_ring = 0,
- .dir = DMA_FROM_DEVICE,
- .ee_mask = 0x4,
- .pollcfg = 0,
- .doorbell = MHI_DB_BRST_DISABLE,
- .lpm_notify = false,
- .offload_channel = false,
- .doorbell_mode_switch = false,
- .auto_queue = false,
- },
+static const struct mhi_channel_config ath11k_mhi_channels_qca6390[] = {
{
.num = 20,
.name = "IPCR",
@@ -102,46 +74,18 @@ static struct mhi_event_config ath11k_mhi_events_qca6390[] = {
},
};
-static struct mhi_controller_config ath11k_mhi_config_qca6390 = {
+static const struct mhi_controller_config ath11k_mhi_config_qca6390 = {
.max_channels = 128,
.timeout_ms = 2000,
.use_bounce_buf = false,
- .buf_len = 0,
+ .buf_len = 8192,
.num_channels = ARRAY_SIZE(ath11k_mhi_channels_qca6390),
.ch_cfg = ath11k_mhi_channels_qca6390,
.num_events = ARRAY_SIZE(ath11k_mhi_events_qca6390),
.event_cfg = ath11k_mhi_events_qca6390,
};
-static struct mhi_channel_config ath11k_mhi_channels_qcn9074[] = {
- {
- .num = 0,
- .name = "LOOPBACK",
- .num_elements = 32,
- .event_ring = 1,
- .dir = DMA_TO_DEVICE,
- .ee_mask = 0x14,
- .pollcfg = 0,
- .doorbell = MHI_DB_BRST_DISABLE,
- .lpm_notify = false,
- .offload_channel = false,
- .doorbell_mode_switch = false,
- .auto_queue = false,
- },
- {
- .num = 1,
- .name = "LOOPBACK",
- .num_elements = 32,
- .event_ring = 1,
- .dir = DMA_FROM_DEVICE,
- .ee_mask = 0x14,
- .pollcfg = 0,
- .doorbell = MHI_DB_BRST_DISABLE,
- .lpm_notify = false,
- .offload_channel = false,
- .doorbell_mode_switch = false,
- .auto_queue = false,
- },
+static const struct mhi_channel_config ath11k_mhi_channels_qcn9074[] = {
{
.num = 20,
.name = "IPCR",
@@ -195,7 +139,7 @@ static struct mhi_event_config ath11k_mhi_events_qcn9074[] = {
},
};
-static struct mhi_controller_config ath11k_mhi_config_qcn9074 = {
+static const struct mhi_controller_config ath11k_mhi_config_qcn9074 = {
.max_channels = 30,
.timeout_ms = 10000,
.use_bounce_buf = false,
@@ -384,7 +328,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci)
{
struct ath11k_base *ab = ab_pci->ab;
struct mhi_controller *mhi_ctrl;
- struct mhi_controller_config *ath11k_mhi_config;
+ const struct mhi_controller_config *ath11k_mhi_config;
int ret;
mhi_ctrl = mhi_alloc_controller();
@@ -423,7 +367,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci)
goto free_controller;
} else {
mhi_ctrl->iova_start = 0;
- mhi_ctrl->iova_stop = 0xFFFFFFFF;
+ mhi_ctrl->iova_stop = ab_pci->dma_mask;
}
mhi_ctrl->rddm_size = RDDM_DUMP_SIZE;
@@ -443,6 +387,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci)
case ATH11K_HW_QCA6390_HW20:
case ATH11K_HW_WCN6855_HW20:
case ATH11K_HW_WCN6855_HW21:
+ case ATH11K_HW_QCA2066_HW21:
ath11k_mhi_config = &ath11k_mhi_config_qca6390;
break;
default:
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 09e65c5e55c4..be9d2c69cc41 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2019-2020 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/module.h>
@@ -18,7 +18,8 @@
#include "qmi.h"
#define ATH11K_PCI_BAR_NUM 0
-#define ATH11K_PCI_DMA_MASK 32
+#define ATH11K_PCI_DMA_MASK 36
+#define ATH11K_PCI_COHERENT_DMA_MASK 32
#define TCSR_SOC_HW_VERSION 0x0224
#define TCSR_SOC_HW_VERSION_MAJOR_MASK GENMASK(11, 8)
@@ -28,6 +29,8 @@
#define QCN9074_DEVICE_ID 0x1104
#define WCN6855_DEVICE_ID 0x1103
+#define TCSR_SOC_HW_SUB_VER 0x1910010
+
static const struct pci_device_id ath11k_pci_id_table[] = {
{ PCI_VDEVICE(QCOM, QCA6390_DEVICE_ID) },
{ PCI_VDEVICE(QCOM, WCN6855_DEVICE_ID) },
@@ -526,14 +529,24 @@ static int ath11k_pci_claim(struct ath11k_pci *ab_pci, struct pci_dev *pdev)
goto disable_device;
}
- ret = dma_set_mask_and_coherent(&pdev->dev,
- DMA_BIT_MASK(ATH11K_PCI_DMA_MASK));
+ ret = dma_set_mask(&pdev->dev,
+ DMA_BIT_MASK(ATH11K_PCI_DMA_MASK));
if (ret) {
ath11k_err(ab, "failed to set pci dma mask to %d: %d\n",
ATH11K_PCI_DMA_MASK, ret);
goto release_region;
}
+ ab_pci->dma_mask = DMA_BIT_MASK(ATH11K_PCI_DMA_MASK);
+
+ ret = dma_set_coherent_mask(&pdev->dev,
+ DMA_BIT_MASK(ATH11K_PCI_COHERENT_DMA_MASK));
+ if (ret) {
+ ath11k_err(ab, "failed to set pci coherent dma mask to %d: %d\n",
+ ATH11K_PCI_COHERENT_DMA_MASK, ret);
+ goto release_region;
+ }
+
pci_set_master(pdev);
ab->mem_len = pci_resource_len(pdev, ATH11K_PCI_BAR_NUM);
@@ -731,8 +744,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
struct ath11k_base *ab;
struct ath11k_pci *ab_pci;
u32 soc_hw_version_major, soc_hw_version_minor, addr;
- const struct ath11k_pci_ops *pci_ops;
int ret;
+ u32 sub_version;
ab = ath11k_core_alloc(&pdev->dev, sizeof(*ab_pci), ATH11K_BUS_PCI);
@@ -777,6 +790,12 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
switch (pci_dev->device) {
case QCA6390_DEVICE_ID:
+ ret = ath11k_pcic_register_pci_ops(ab, &ath11k_pci_ops_qca6390);
+ if (ret) {
+ ath11k_err(ab, "failed to register PCI ops: %d\n", ret);
+ goto err_pci_free_region;
+ }
+
ath11k_pci_read_hw_version(ab, &soc_hw_version_major,
&soc_hw_version_minor);
switch (soc_hw_version_major) {
@@ -790,13 +809,21 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
goto err_pci_free_region;
}
- pci_ops = &ath11k_pci_ops_qca6390;
break;
case QCN9074_DEVICE_ID:
- pci_ops = &ath11k_pci_ops_qcn9074;
+ ret = ath11k_pcic_register_pci_ops(ab, &ath11k_pci_ops_qcn9074);
+ if (ret) {
+ ath11k_err(ab, "failed to register PCI ops: %d\n", ret);
+ goto err_pci_free_region;
+ }
ab->hw_rev = ATH11K_HW_QCN9074_HW10;
break;
case WCN6855_DEVICE_ID:
+ ret = ath11k_pcic_register_pci_ops(ab, &ath11k_pci_ops_qca6390);
+ if (ret) {
+ ath11k_err(ab, "failed to register PCI ops: %d\n", ret);
+ goto err_pci_free_region;
+ }
ab->id.bdf_search = ATH11K_BDF_SEARCH_BUS_AND_BOARD;
ath11k_pci_read_hw_version(ab, &soc_hw_version_major,
&soc_hw_version_minor);
@@ -809,7 +836,19 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
break;
case 0x10:
case 0x11:
- ab->hw_rev = ATH11K_HW_WCN6855_HW21;
+ sub_version = ath11k_pcic_read32(ab, TCSR_SOC_HW_SUB_VER);
+ ath11k_dbg(ab, ATH11K_DBG_PCI, "sub_version 0x%x\n",
+ sub_version);
+ switch (sub_version) {
+ case 0x1019A0E1:
+ case 0x1019B0E1:
+ case 0x1019C0E1:
+ case 0x1019D0E1:
+ ab->hw_rev = ATH11K_HW_QCA2066_HW21;
+ break;
+ default:
+ ab->hw_rev = ATH11K_HW_WCN6855_HW21;
+ }
break;
default:
goto unsupported_wcn6855_soc;
@@ -823,7 +862,6 @@ unsupported_wcn6855_soc:
goto err_pci_free_region;
}
- pci_ops = &ath11k_pci_ops_qca6390;
break;
default:
dev_err(&pdev->dev, "Unknown PCI device found: 0x%x\n",
@@ -832,12 +870,6 @@ unsupported_wcn6855_soc:
goto err_pci_free_region;
}
- ret = ath11k_pcic_register_pci_ops(ab, pci_ops);
- if (ret) {
- ath11k_err(ab, "failed to register PCI ops: %d\n", ret);
- goto err_pci_free_region;
- }
-
ret = ath11k_pcic_init_msi_config(ab);
if (ret) {
ath11k_err(ab, "failed to init msi config: %d\n", ret);
diff --git a/drivers/net/wireless/ath/ath11k/pci.h b/drivers/net/wireless/ath/ath11k/pci.h
index e9a01f344ec6..6be73333d90b 100644
--- a/drivers/net/wireless/ath/ath11k/pci.h
+++ b/drivers/net/wireless/ath/ath11k/pci.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2019-2020 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2022, Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2022,2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef _ATH11K_PCI_H
#define _ATH11K_PCI_H
@@ -72,6 +72,7 @@ struct ath11k_pci {
/* enum ath11k_pci_flags */
unsigned long flags;
u16 link_ctl;
+ u64 dma_mask;
};
static inline struct ath11k_pci *ath11k_pci_priv(struct ath11k_base *ab)
diff --git a/drivers/net/wireless/ath/ath11k/pcic.c b/drivers/net/wireless/ath/ath11k/pcic.c
index 15e2ceb22a44..add4db4c50bc 100644
--- a/drivers/net/wireless/ath/ath11k/pcic.c
+++ b/drivers/net/wireless/ath/ath11k/pcic.c
@@ -115,6 +115,17 @@ static const struct ath11k_msi_config ath11k_msi_config[] = {
},
.hw_rev = ATH11K_HW_WCN6750_HW10,
},
+ {
+ .total_vectors = 32,
+ .total_users = 4,
+ .users = (struct ath11k_msi_user[]) {
+ { .name = "MHI", .num_vectors = 3, .base_vector = 0 },
+ { .name = "CE", .num_vectors = 10, .base_vector = 3 },
+ { .name = "WAKE", .num_vectors = 1, .base_vector = 13 },
+ { .name = "DP", .num_vectors = 18, .base_vector = 14 },
+ },
+ .hw_rev = ATH11K_HW_QCA2066_HW21,
+ },
};
int ath11k_pcic_init_msi_config(struct ath11k_base *ab)
diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c
index 2c7cab62b9bb..5006f81f779b 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.c
+++ b/drivers/net/wireless/ath/ath11k/qmi.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/elf.h>
@@ -3249,7 +3249,8 @@ static void ath11k_qmi_driver_event_work(struct work_struct *work)
case ATH11K_QMI_EVENT_FW_INIT_DONE:
clear_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags);
if (test_bit(ATH11K_FLAG_REGISTERED, &ab->dev_flags)) {
- ath11k_hal_dump_srng_stats(ab);
+ if (ab->is_reset)
+ ath11k_hal_dump_srng_stats(ab);
queue_work(ab->workqueue, &ab->restart_work);
break;
}
diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c
index b4fd4d2107c7..737fcd450d4b 100644
--- a/drivers/net/wireless/ath/ath11k/reg.c
+++ b/drivers/net/wireless/ath/ath11k/reg.c
@@ -425,6 +425,11 @@ static void ath11k_reg_intersect_rules(struct ieee80211_reg_rule *rule1,
/* Use the flags of both the rules */
new_rule->flags = rule1->flags | rule2->flags;
+ if ((rule1->flags & NL80211_RRF_PSD) && (rule2->flags & NL80211_RRF_PSD))
+ new_rule->psd = min_t(s8, rule1->psd, rule2->psd);
+ else
+ new_rule->flags &= ~NL80211_RRF_PSD;
+
/* To be safe, lts use the max cac timeout of both rules */
new_rule->dfs_cac_ms = max_t(u32, rule1->dfs_cac_ms,
rule2->dfs_cac_ms);
@@ -527,13 +532,14 @@ ath11k_reg_adjust_bw(u16 start_freq, u16 end_freq, u16 max_bw)
static void
ath11k_reg_update_rule(struct ieee80211_reg_rule *reg_rule, u32 start_freq,
u32 end_freq, u32 bw, u32 ant_gain, u32 reg_pwr,
- u32 reg_flags)
+ s8 psd, u32 reg_flags)
{
reg_rule->freq_range.start_freq_khz = MHZ_TO_KHZ(start_freq);
reg_rule->freq_range.end_freq_khz = MHZ_TO_KHZ(end_freq);
reg_rule->freq_range.max_bandwidth_khz = MHZ_TO_KHZ(bw);
reg_rule->power_rule.max_antenna_gain = DBI_TO_MBI(ant_gain);
reg_rule->power_rule.max_eirp = DBM_TO_MBM(reg_pwr);
+ reg_rule->psd = psd;
reg_rule->flags = reg_flags;
}
@@ -563,7 +569,7 @@ ath11k_reg_update_weather_radar_band(struct ath11k_base *ab,
reg_rule->start_freq,
ETSI_WEATHER_RADAR_BAND_LOW, bw,
reg_rule->ant_gain, reg_rule->reg_power,
- flags);
+ reg_rule->psd_eirp, flags);
ath11k_dbg(ab, ATH11K_DBG_REG,
"\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n",
@@ -584,7 +590,7 @@ ath11k_reg_update_weather_radar_band(struct ath11k_base *ab,
ath11k_reg_update_rule(regd->reg_rules + i, start_freq,
end_freq, bw, reg_rule->ant_gain,
- reg_rule->reg_power, flags);
+ reg_rule->reg_power, reg_rule->psd_eirp, flags);
regd->reg_rules[i].dfs_cac_ms = ETSI_WEATHER_RADAR_BAND_CAC_TIMEOUT;
@@ -605,7 +611,7 @@ ath11k_reg_update_weather_radar_band(struct ath11k_base *ab,
ETSI_WEATHER_RADAR_BAND_HIGH,
reg_rule->end_freq, bw,
reg_rule->ant_gain, reg_rule->reg_power,
- flags);
+ reg_rule->psd_eirp, flags);
ath11k_dbg(ab, ATH11K_DBG_REG,
"\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n",
@@ -618,25 +624,68 @@ ath11k_reg_update_weather_radar_band(struct ath11k_base *ab,
*rule_idx = i;
}
+enum wmi_reg_6ghz_ap_type
+ath11k_reg_ap_pwr_convert(enum ieee80211_ap_reg_power power_type)
+{
+ switch (power_type) {
+ case IEEE80211_REG_LPI_AP:
+ return WMI_REG_INDOOR_AP;
+ case IEEE80211_REG_SP_AP:
+ return WMI_REG_STANDARD_POWER_AP;
+ case IEEE80211_REG_VLP_AP:
+ return WMI_REG_VERY_LOW_POWER_AP;
+ default:
+ return WMI_REG_MAX_AP_TYPE;
+ }
+}
+
struct ieee80211_regdomain *
ath11k_reg_build_regd(struct ath11k_base *ab,
- struct cur_regulatory_info *reg_info, bool intersect)
+ struct cur_regulatory_info *reg_info, bool intersect,
+ enum wmi_vdev_type vdev_type,
+ enum ieee80211_ap_reg_power power_type)
{
struct ieee80211_regdomain *tmp_regd, *default_regd, *new_regd = NULL;
- struct cur_reg_rule *reg_rule;
+ struct cur_reg_rule *reg_rule, *reg_rule_6ghz;
u8 i = 0, j = 0, k = 0;
u8 num_rules;
u16 max_bw;
- u32 flags;
+ u32 flags, reg_6ghz_number, max_bw_6ghz;
char alpha2[3];
num_rules = reg_info->num_5ghz_reg_rules + reg_info->num_2ghz_reg_rules;
- /* FIXME: Currently taking reg rules for 6 GHz only from Indoor AP mode list.
- * This can be updated after complete 6 GHz regulatory support is added.
- */
- if (reg_info->is_ext_reg_event)
- num_rules += reg_info->num_6ghz_rules_ap[WMI_REG_INDOOR_AP];
+ if (reg_info->is_ext_reg_event) {
+ if (vdev_type == WMI_VDEV_TYPE_STA) {
+ enum wmi_reg_6ghz_ap_type ap_type;
+
+ ap_type = ath11k_reg_ap_pwr_convert(power_type);
+
+ if (ap_type == WMI_REG_MAX_AP_TYPE)
+ ap_type = WMI_REG_INDOOR_AP;
+
+ reg_6ghz_number = reg_info->num_6ghz_rules_client
+ [ap_type][WMI_REG_DEFAULT_CLIENT];
+
+ if (reg_6ghz_number == 0) {
+ ap_type = WMI_REG_INDOOR_AP;
+ reg_6ghz_number = reg_info->num_6ghz_rules_client
+ [ap_type][WMI_REG_DEFAULT_CLIENT];
+ }
+
+ reg_rule_6ghz = reg_info->reg_rules_6ghz_client_ptr
+ [ap_type][WMI_REG_DEFAULT_CLIENT];
+ max_bw_6ghz = reg_info->max_bw_6ghz_client
+ [ap_type][WMI_REG_DEFAULT_CLIENT];
+ } else {
+ reg_6ghz_number = reg_info->num_6ghz_rules_ap[WMI_REG_INDOOR_AP];
+ reg_rule_6ghz =
+ reg_info->reg_rules_6ghz_ap_ptr[WMI_REG_INDOOR_AP];
+ max_bw_6ghz = reg_info->max_bw_6ghz_ap[WMI_REG_INDOOR_AP];
+ }
+
+ num_rules += reg_6ghz_number;
+ }
if (!num_rules)
goto ret;
@@ -683,14 +732,13 @@ ath11k_reg_build_regd(struct ath11k_base *ab,
* per other BW rule flags we pass from here
*/
flags = NL80211_RRF_AUTO_BW;
- } else if (reg_info->is_ext_reg_event &&
- reg_info->num_6ghz_rules_ap[WMI_REG_INDOOR_AP] &&
- (k < reg_info->num_6ghz_rules_ap[WMI_REG_INDOOR_AP])) {
- reg_rule = reg_info->reg_rules_6ghz_ap_ptr[WMI_REG_INDOOR_AP] +
- k++;
- max_bw = min_t(u16, reg_rule->max_bw,
- reg_info->max_bw_6ghz_ap[WMI_REG_INDOOR_AP]);
+ } else if (reg_info->is_ext_reg_event && reg_6ghz_number &&
+ k < reg_6ghz_number) {
+ reg_rule = reg_rule_6ghz + k++;
+ max_bw = min_t(u16, reg_rule->max_bw, max_bw_6ghz);
flags = NL80211_RRF_AUTO_BW;
+ if (reg_rule->psd_flag)
+ flags |= NL80211_RRF_PSD;
} else {
break;
}
@@ -702,7 +750,7 @@ ath11k_reg_build_regd(struct ath11k_base *ab,
reg_rule->start_freq,
reg_rule->end_freq, max_bw,
reg_rule->ant_gain, reg_rule->reg_power,
- flags);
+ reg_rule->psd_eirp, flags);
/* Update dfs cac timeout if the dfs domain is ETSI and the
* new rule covers weather radar band.
@@ -758,6 +806,159 @@ ret:
return new_regd;
}
+static bool ath11k_reg_is_world_alpha(char *alpha)
+{
+ if (alpha[0] == '0' && alpha[1] == '0')
+ return true;
+
+ if (alpha[0] == 'n' && alpha[1] == 'a')
+ return true;
+
+ return false;
+}
+
+static enum wmi_vdev_type ath11k_reg_get_ar_vdev_type(struct ath11k *ar)
+{
+ struct ath11k_vif *arvif;
+
+ /* Currently each struct ath11k maps to one struct ieee80211_hw/wiphy
+ * and one struct ieee80211_regdomain, so it could only store one group
+ * reg rules. It means multi-interface concurrency in the same ath11k is
+ * not support for the regdomain. So get the vdev type of the first entry
+ * now. After concurrency support for the regdomain, this should change.
+ */
+ arvif = list_first_entry_or_null(&ar->arvifs, struct ath11k_vif, list);
+ if (arvif)
+ return arvif->vdev_type;
+
+ return WMI_VDEV_TYPE_UNSPEC;
+}
+
+int ath11k_reg_handle_chan_list(struct ath11k_base *ab,
+ struct cur_regulatory_info *reg_info,
+ enum ieee80211_ap_reg_power power_type)
+{
+ struct ieee80211_regdomain *regd;
+ bool intersect = false;
+ int pdev_idx;
+ struct ath11k *ar;
+ enum wmi_vdev_type vdev_type;
+
+ ath11k_dbg(ab, ATH11K_DBG_WMI, "event reg handle chan list");
+
+ if (reg_info->status_code != REG_SET_CC_STATUS_PASS) {
+ /* In case of failure to set the requested ctry,
+ * fw retains the current regd. We print a failure info
+ * and return from here.
+ */
+ ath11k_warn(ab, "Failed to set the requested Country regulatory setting\n");
+ return -EINVAL;
+ }
+
+ pdev_idx = reg_info->phy_id;
+
+ /* Avoid default reg rule updates sent during FW recovery if
+ * it is already available
+ */
+ spin_lock_bh(&ab->base_lock);
+ if (test_bit(ATH11K_FLAG_RECOVERY, &ab->dev_flags) &&
+ ab->default_regd[pdev_idx]) {
+ spin_unlock_bh(&ab->base_lock);
+ goto retfail;
+ }
+ spin_unlock_bh(&ab->base_lock);
+
+ if (pdev_idx >= ab->num_radios) {
+ /* Process the event for phy0 only if single_pdev_only
+ * is true. If pdev_idx is valid but not 0, discard the
+ * event. Otherwise, it goes to fallback. In either case
+ * ath11k_reg_reset_info() needs to be called to avoid
+ * memory leak issue.
+ */
+ ath11k_reg_reset_info(reg_info);
+
+ if (ab->hw_params.single_pdev_only &&
+ pdev_idx < ab->hw_params.num_rxmda_per_pdev)
+ return 0;
+ goto fallback;
+ }
+
+ /* Avoid multiple overwrites to default regd, during core
+ * stop-start after mac registration.
+ */
+ if (ab->default_regd[pdev_idx] && !ab->new_regd[pdev_idx] &&
+ !memcmp((char *)ab->default_regd[pdev_idx]->alpha2,
+ (char *)reg_info->alpha2, 2))
+ goto retfail;
+
+ /* Intersect new rules with default regd if a new country setting was
+ * requested, i.e a default regd was already set during initialization
+ * and the regd coming from this event has a valid country info.
+ */
+ if (ab->default_regd[pdev_idx] &&
+ !ath11k_reg_is_world_alpha((char *)
+ ab->default_regd[pdev_idx]->alpha2) &&
+ !ath11k_reg_is_world_alpha((char *)reg_info->alpha2))
+ intersect = true;
+
+ ar = ab->pdevs[pdev_idx].ar;
+ vdev_type = ath11k_reg_get_ar_vdev_type(ar);
+
+ ath11k_dbg(ab, ATH11K_DBG_WMI,
+ "wmi handle chan list power type %d vdev type %d intersect %d\n",
+ power_type, vdev_type, intersect);
+
+ regd = ath11k_reg_build_regd(ab, reg_info, intersect, vdev_type, power_type);
+ if (!regd) {
+ ath11k_warn(ab, "failed to build regd from reg_info\n");
+ goto fallback;
+ }
+
+ if (power_type == IEEE80211_REG_UNSET_AP) {
+ ath11k_reg_reset_info(&ab->reg_info_store[pdev_idx]);
+ ab->reg_info_store[pdev_idx] = *reg_info;
+ }
+
+ spin_lock_bh(&ab->base_lock);
+ if (ab->default_regd[pdev_idx]) {
+ /* The initial rules from FW after WMI Init is to build
+ * the default regd. From then on, any rules updated for
+ * the pdev could be due to user reg changes.
+ * Free previously built regd before assigning the newly
+ * generated regd to ar. NULL pointer handling will be
+ * taken care by kfree itself.
+ */
+ ar = ab->pdevs[pdev_idx].ar;
+ kfree(ab->new_regd[pdev_idx]);
+ ab->new_regd[pdev_idx] = regd;
+ queue_work(ab->workqueue, &ar->regd_update_work);
+ } else {
+ /* This regd would be applied during mac registration and is
+ * held constant throughout for regd intersection purpose
+ */
+ ab->default_regd[pdev_idx] = regd;
+ }
+ ab->dfs_region = reg_info->dfs_region;
+ spin_unlock_bh(&ab->base_lock);
+
+ return 0;
+
+fallback:
+ /* Fallback to older reg (by sending previous country setting
+ * again if fw has succeeded and we failed to process here.
+ * The Regdomain should be uniform across driver and fw. Since the
+ * FW has processed the command and sent a success status, we expect
+ * this function to succeed as well. If it doesn't, CTRY needs to be
+ * reverted at the fw and the old SCAN_CHAN_LIST cmd needs to be sent.
+ */
+ /* TODO: This is rare, but still should also be handled */
+ WARN_ON(1);
+
+retfail:
+
+ return -EINVAL;
+}
+
void ath11k_regd_update_work(struct work_struct *work)
{
struct ath11k *ar = container_of(work, struct ath11k,
@@ -781,10 +982,36 @@ void ath11k_reg_init(struct ath11k *ar)
ar->hw->wiphy->reg_notifier = ath11k_reg_notifier;
}
+void ath11k_reg_reset_info(struct cur_regulatory_info *reg_info)
+{
+ int i, j;
+
+ if (!reg_info)
+ return;
+
+ kfree(reg_info->reg_rules_2ghz_ptr);
+ kfree(reg_info->reg_rules_5ghz_ptr);
+
+ for (i = 0; i < WMI_REG_CURRENT_MAX_AP_TYPE; i++) {
+ kfree(reg_info->reg_rules_6ghz_ap_ptr[i]);
+
+ for (j = 0; j < WMI_REG_MAX_CLIENT_TYPE; j++)
+ kfree(reg_info->reg_rules_6ghz_client_ptr[i][j]);
+ }
+
+ memset(reg_info, 0, sizeof(*reg_info));
+}
+
void ath11k_reg_free(struct ath11k_base *ab)
{
int i;
+ for (i = 0; i < ab->num_radios; i++)
+ ath11k_reg_reset_info(&ab->reg_info_store[i]);
+
+ kfree(ab->reg_info_store);
+ ab->reg_info_store = NULL;
+
for (i = 0; i < ab->hw_params.max_radios; i++) {
kfree(ab->default_regd[i]);
kfree(ab->new_regd[i]);
diff --git a/drivers/net/wireless/ath/ath11k/reg.h b/drivers/net/wireless/ath/ath11k/reg.h
index f28902f85e41..64edb794260a 100644
--- a/drivers/net/wireless/ath/ath11k/reg.h
+++ b/drivers/net/wireless/ath/ath11k/reg.h
@@ -30,11 +30,20 @@ enum ath11k_dfs_region {
/* ATH11K Regulatory API's */
void ath11k_reg_init(struct ath11k *ar);
+void ath11k_reg_reset_info(struct cur_regulatory_info *reg_info);
void ath11k_reg_free(struct ath11k_base *ab);
void ath11k_regd_update_work(struct work_struct *work);
struct ieee80211_regdomain *
ath11k_reg_build_regd(struct ath11k_base *ab,
- struct cur_regulatory_info *reg_info, bool intersect);
+ struct cur_regulatory_info *reg_info, bool intersect,
+ enum wmi_vdev_type vdev_type,
+ enum ieee80211_ap_reg_power power_type);
int ath11k_regd_update(struct ath11k *ar);
int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait);
+enum wmi_reg_6ghz_ap_type
+ath11k_reg_ap_pwr_convert(enum ieee80211_ap_reg_power power_type);
+int ath11k_reg_handle_chan_list(struct ath11k_base *ab,
+ struct cur_regulatory_info *reg_info,
+ enum ieee80211_ap_reg_power power_type);
+
#endif
diff --git a/drivers/net/wireless/ath/ath11k/testmode.c b/drivers/net/wireless/ath/ath11k/testmode.c
index 43bb23265d34..302d66092b97 100644
--- a/drivers/net/wireless/ath/ath11k/testmode.c
+++ b/drivers/net/wireless/ath/ath11k/testmode.c
@@ -198,7 +198,7 @@ static void ath11k_tm_wmi_event_segmented(struct ath11k_base *ab, u32 cmd_id,
u16 length;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse ftm event tlv: %d\n", ret);
diff --git a/drivers/net/wireless/ath/ath11k/thermal.c b/drivers/net/wireless/ath/ath11k/thermal.c
index c29b11ab5bfa..41e7499f075f 100644
--- a/drivers/net/wireless/ath/ath11k/thermal.c
+++ b/drivers/net/wireless/ath/ath11k/thermal.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2020 The Linux Foundation. All rights reserved.
- * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/device.h>
@@ -163,6 +163,9 @@ int ath11k_thermal_register(struct ath11k_base *ab)
struct ath11k_pdev *pdev;
int i, ret;
+ if (test_bit(ATH11K_FLAG_REGISTERED, &ab->dev_flags))
+ return 0;
+
for (i = 0; i < ab->num_radios; i++) {
pdev = &ab->pdevs[i];
ar = pdev->ar;
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 8a65fa04b48d..34ab9631ff36 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/skbuff.h>
#include <linux/ctype.h>
@@ -238,8 +238,8 @@ static int ath11k_wmi_tlv_parse(struct ath11k_base *ar, const void **tb,
(void *)tb);
}
-const void **ath11k_wmi_tlv_parse_alloc(struct ath11k_base *ab, const void *ptr,
- size_t len, gfp_t gfp)
+const void **ath11k_wmi_tlv_parse_alloc(struct ath11k_base *ab,
+ struct sk_buff *skb, gfp_t gfp)
{
const void **tb;
int ret;
@@ -248,7 +248,7 @@ const void **ath11k_wmi_tlv_parse_alloc(struct ath11k_base *ab, const void *ptr,
if (!tb)
return ERR_PTR(-ENOMEM);
- ret = ath11k_wmi_tlv_parse(ab, tb, ptr, len);
+ ret = ath11k_wmi_tlv_parse(ab, tb, skb->data, skb->len);
if (ret) {
kfree(tb);
return ERR_PTR(ret);
@@ -2098,7 +2098,7 @@ void ath11k_wmi_start_scan_init(struct ath11k *ar,
WMI_SCAN_EVENT_BSS_CHANNEL |
WMI_SCAN_EVENT_FOREIGN_CHAN |
WMI_SCAN_EVENT_DEQUEUED;
- arg->scan_flags |= WMI_SCAN_CHAN_STAT_EVENT;
+ arg->scan_f_chan_stat_evnt = 1;
if (test_bit(WMI_TLV_SERVICE_PASSIVE_SCAN_START_TIME_ENHANCE,
ar->ab->wmi_ab.svc_map))
@@ -2379,6 +2379,70 @@ int ath11k_wmi_send_scan_start_cmd(struct ath11k *ar,
return ret;
}
+int ath11k_wmi_send_vdev_set_tpc_power(struct ath11k *ar,
+ u32 vdev_id,
+ struct ath11k_reg_tpc_power_info *param)
+{
+ struct ath11k_pdev_wmi *wmi = ar->wmi;
+ struct wmi_vdev_set_tpc_power_cmd *cmd;
+ struct wmi_vdev_ch_power_info *ch;
+ struct sk_buff *skb;
+ struct wmi_tlv *tlv;
+ u8 *ptr;
+ int i, ret, len, array_len;
+
+ array_len = sizeof(*ch) * param->num_pwr_levels;
+ len = sizeof(*cmd) + TLV_HDR_SIZE + array_len;
+
+ skb = ath11k_wmi_alloc_skb(wmi->wmi_ab, len);
+ if (!skb)
+ return -ENOMEM;
+
+ ptr = skb->data;
+
+ cmd = (struct wmi_vdev_set_tpc_power_cmd *)ptr;
+ cmd->tlv_header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_VDEV_SET_TPC_POWER_CMD) |
+ FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE);
+ cmd->vdev_id = vdev_id;
+ cmd->psd_power = param->is_psd_power;
+ cmd->eirp_power = param->eirp_power;
+ cmd->power_type_6ghz = param->ap_power_type;
+
+ ath11k_dbg(ar->ab, ATH11K_DBG_WMI,
+ "tpc vdev id %d is psd power %d eirp power %d 6 ghz power type %d\n",
+ vdev_id, param->is_psd_power, param->eirp_power, param->ap_power_type);
+
+ ptr += sizeof(*cmd);
+ tlv = (struct wmi_tlv *)ptr;
+ tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_STRUCT) |
+ FIELD_PREP(WMI_TLV_LEN, array_len);
+
+ ptr += TLV_HDR_SIZE;
+ ch = (struct wmi_vdev_ch_power_info *)ptr;
+
+ for (i = 0; i < param->num_pwr_levels; i++, ch++) {
+ ch->tlv_header = FIELD_PREP(WMI_TLV_TAG,
+ WMI_TAG_VDEV_CH_POWER_INFO) |
+ FIELD_PREP(WMI_TLV_LEN,
+ sizeof(*ch) - TLV_HDR_SIZE);
+
+ ch->chan_cfreq = param->chan_power_info[i].chan_cfreq;
+ ch->tx_power = param->chan_power_info[i].tx_power;
+
+ ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tpc chan freq %d TX power %d\n",
+ ch->chan_cfreq, ch->tx_power);
+ }
+
+ ret = ath11k_wmi_cmd_send(wmi, skb, WMI_VDEV_SET_TPC_POWER_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_VDEV_SET_TPC_POWER_CMDID\n");
+ dev_kfree_skb(skb);
+ return ret;
+ }
+
+ return 0;
+}
+
int ath11k_wmi_send_scan_stop_cmd(struct ath11k *ar,
struct scan_cancel_param *param)
{
@@ -3930,7 +3994,7 @@ ath11k_wmi_obss_color_collision_event(struct ath11k_base *ab, struct sk_buff *sk
struct ath11k_vif *arvif;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -3956,8 +4020,7 @@ ath11k_wmi_obss_color_collision_event(struct ath11k_base *ab, struct sk_buff *sk
switch (ev->evt_type) {
case WMI_BSS_COLOR_COLLISION_DETECTION:
- ieee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap,
- GFP_KERNEL);
+ ieee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap);
ath11k_dbg(ab, ATH11K_DBG_WMI,
"OBSS color collision detected vdev:%d, event:%d, bitmap:%08llx\n",
ev->vdev_id, ev->evt_type, ev->obss_color_bitmap);
@@ -4749,6 +4812,14 @@ static int ath11k_wmi_tlv_ext_soc_hal_reg_caps_parse(struct ath11k_base *soc,
soc->pdevs[0].pdev_id = 0;
}
+ if (!soc->reg_info_store) {
+ soc->reg_info_store = kcalloc(soc->num_radios,
+ sizeof(*soc->reg_info_store),
+ GFP_ATOMIC);
+ if (!soc->reg_info_store)
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -4786,6 +4857,7 @@ static void ath11k_wmi_free_dbring_caps(struct ath11k_base *ab)
{
kfree(ab->db_caps);
ab->db_caps = NULL;
+ ab->num_db_cap = 0;
}
static int ath11k_wmi_tlv_dma_ring_caps(struct ath11k_base *ab,
@@ -5003,7 +5075,7 @@ static int ath11k_pull_vdev_start_resp_tlv(struct ath11k_base *ab, struct sk_buf
const struct wmi_vdev_start_resp_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5028,6 +5100,7 @@ static int ath11k_pull_vdev_start_resp_tlv(struct ath11k_base *ab, struct sk_buf
vdev_rsp->mac_id = ev->mac_id;
vdev_rsp->cfgd_tx_streams = ev->cfgd_tx_streams;
vdev_rsp->cfgd_rx_streams = ev->cfgd_rx_streams;
+ vdev_rsp->max_allowed_tx_power = ev->max_allowed_tx_power;
kfree(tb);
return 0;
@@ -5102,7 +5175,7 @@ static int ath11k_pull_reg_chan_list_update_ev(struct ath11k_base *ab,
ath11k_dbg(ab, ATH11K_DBG_WMI, "processing regulatory channel list\n");
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5278,7 +5351,7 @@ static int ath11k_pull_reg_chan_list_ext_update_ev(struct ath11k_base *ab,
ath11k_dbg(ab, ATH11K_DBG_WMI, "processing regulatory ext channel list\n");
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5634,7 +5707,7 @@ static int ath11k_pull_peer_del_resp_ev(struct ath11k_base *ab, struct sk_buff *
const struct wmi_peer_delete_resp_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5666,7 +5739,7 @@ static int ath11k_pull_vdev_del_resp_ev(struct ath11k_base *ab,
const struct wmi_vdev_delete_resp_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5686,15 +5759,15 @@ static int ath11k_pull_vdev_del_resp_ev(struct ath11k_base *ab,
return 0;
}
-static int ath11k_pull_bcn_tx_status_ev(struct ath11k_base *ab, void *evt_buf,
- u32 len, u32 *vdev_id,
- u32 *tx_status)
+static int ath11k_pull_bcn_tx_status_ev(struct ath11k_base *ab,
+ struct sk_buff *skb,
+ u32 *vdev_id, u32 *tx_status)
{
const void **tb;
const struct wmi_bcn_tx_status_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, evt_buf, len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5722,7 +5795,7 @@ static int ath11k_pull_vdev_stopped_param_tlv(struct ath11k_base *ab, struct sk_
const struct wmi_vdev_stopped_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5876,7 +5949,7 @@ static int ath11k_pull_mgmt_tx_compl_param_tlv(struct ath11k_base *ab,
const struct wmi_mgmt_tx_compl_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6052,7 +6125,7 @@ static int ath11k_pull_scan_ev(struct ath11k_base *ab, struct sk_buff *skb,
const struct wmi_scan_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6085,7 +6158,7 @@ static int ath11k_pull_peer_sta_kickout_ev(struct ath11k_base *ab, struct sk_buf
const struct wmi_peer_sta_kickout_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6112,7 +6185,7 @@ static int ath11k_pull_roam_ev(struct ath11k_base *ab, struct sk_buff *skb,
const struct wmi_roam_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6153,14 +6226,14 @@ exit:
return idx;
}
-static int ath11k_pull_chan_info_ev(struct ath11k_base *ab, u8 *evt_buf,
- u32 len, struct wmi_chan_info_event *ch_info_ev)
+static int ath11k_pull_chan_info_ev(struct ath11k_base *ab, struct sk_buff *skb,
+ struct wmi_chan_info_event *ch_info_ev)
{
const void **tb;
const struct wmi_chan_info_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, evt_buf, len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6199,7 +6272,7 @@ ath11k_pull_pdev_bss_chan_info_ev(struct ath11k_base *ab, struct sk_buff *skb,
const struct wmi_pdev_bss_chan_info_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6239,7 +6312,7 @@ ath11k_pull_vdev_install_key_compl_ev(struct ath11k_base *ab, struct sk_buff *sk
const struct wmi_vdev_install_key_compl_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6270,7 +6343,7 @@ static int ath11k_pull_peer_assoc_conf_ev(struct ath11k_base *ab, struct sk_buff
const struct wmi_peer_assoc_conf_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6995,7 +7068,7 @@ static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *s
const void **tb;
int ret, i;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -7060,32 +7133,15 @@ static void ath11k_wmi_htc_tx_complete(struct ath11k_base *ab,
wake_up(&wmi->tx_ce_desc_wq);
}
-static bool ath11k_reg_is_world_alpha(char *alpha)
-{
- if (alpha[0] == '0' && alpha[1] == '0')
- return true;
-
- if (alpha[0] == 'n' && alpha[1] == 'a')
- return true;
-
- return false;
-}
-
-static int ath11k_reg_chan_list_event(struct ath11k_base *ab,
- struct sk_buff *skb,
+static int ath11k_reg_chan_list_event(struct ath11k_base *ab, struct sk_buff *skb,
enum wmi_reg_chan_list_cmd_type id)
{
- struct cur_regulatory_info *reg_info = NULL;
- struct ieee80211_regdomain *regd = NULL;
- bool intersect = false;
- int ret = 0, pdev_idx, i, j;
- struct ath11k *ar;
+ struct cur_regulatory_info *reg_info;
+ int ret;
reg_info = kzalloc(sizeof(*reg_info), GFP_ATOMIC);
- if (!reg_info) {
- ret = -ENOMEM;
- goto fallback;
- }
+ if (!reg_info)
+ return -ENOMEM;
if (id == WMI_REG_CHAN_LIST_CC_ID)
ret = ath11k_pull_reg_chan_list_update_ev(ab, skb, reg_info);
@@ -7093,118 +7149,22 @@ static int ath11k_reg_chan_list_event(struct ath11k_base *ab,
ret = ath11k_pull_reg_chan_list_ext_update_ev(ab, skb, reg_info);
if (ret) {
- ath11k_warn(ab, "failed to extract regulatory info from received event\n");
- goto fallback;
- }
-
- ath11k_dbg(ab, ATH11K_DBG_WMI, "event reg chan list id %d", id);
-
- if (reg_info->status_code != REG_SET_CC_STATUS_PASS) {
- /* In case of failure to set the requested ctry,
- * fw retains the current regd. We print a failure info
- * and return from here.
- */
- ath11k_warn(ab, "Failed to set the requested Country regulatory setting\n");
- goto mem_free;
- }
-
- pdev_idx = reg_info->phy_id;
-
- /* Avoid default reg rule updates sent during FW recovery if
- * it is already available
- */
- spin_lock(&ab->base_lock);
- if (test_bit(ATH11K_FLAG_RECOVERY, &ab->dev_flags) &&
- ab->default_regd[pdev_idx]) {
- spin_unlock(&ab->base_lock);
+ ath11k_warn(ab, "failed to extract regulatory info\n");
goto mem_free;
}
- spin_unlock(&ab->base_lock);
- if (pdev_idx >= ab->num_radios) {
- /* Process the event for phy0 only if single_pdev_only
- * is true. If pdev_idx is valid but not 0, discard the
- * event. Otherwise, it goes to fallback.
- */
- if (ab->hw_params.single_pdev_only &&
- pdev_idx < ab->hw_params.num_rxmda_per_pdev)
- goto mem_free;
- else
- goto fallback;
- }
-
- /* Avoid multiple overwrites to default regd, during core
- * stop-start after mac registration.
- */
- if (ab->default_regd[pdev_idx] && !ab->new_regd[pdev_idx] &&
- !memcmp((char *)ab->default_regd[pdev_idx]->alpha2,
- (char *)reg_info->alpha2, 2))
+ ret = ath11k_reg_handle_chan_list(ab, reg_info, IEEE80211_REG_UNSET_AP);
+ if (ret) {
+ ath11k_warn(ab, "failed to process regulatory info %d\n", ret);
goto mem_free;
-
- /* Intersect new rules with default regd if a new country setting was
- * requested, i.e a default regd was already set during initialization
- * and the regd coming from this event has a valid country info.
- */
- if (ab->default_regd[pdev_idx] &&
- !ath11k_reg_is_world_alpha((char *)
- ab->default_regd[pdev_idx]->alpha2) &&
- !ath11k_reg_is_world_alpha((char *)reg_info->alpha2))
- intersect = true;
-
- regd = ath11k_reg_build_regd(ab, reg_info, intersect);
- if (!regd) {
- ath11k_warn(ab, "failed to build regd from reg_info\n");
- goto fallback;
- }
-
- spin_lock(&ab->base_lock);
- if (ab->default_regd[pdev_idx]) {
- /* The initial rules from FW after WMI Init is to build
- * the default regd. From then on, any rules updated for
- * the pdev could be due to user reg changes.
- * Free previously built regd before assigning the newly
- * generated regd to ar. NULL pointer handling will be
- * taken care by kfree itself.
- */
- ar = ab->pdevs[pdev_idx].ar;
- kfree(ab->new_regd[pdev_idx]);
- ab->new_regd[pdev_idx] = regd;
- queue_work(ab->workqueue, &ar->regd_update_work);
- } else {
- /* This regd would be applied during mac registration and is
- * held constant throughout for regd intersection purpose
- */
- ab->default_regd[pdev_idx] = regd;
}
- ab->dfs_region = reg_info->dfs_region;
- spin_unlock(&ab->base_lock);
- goto mem_free;
+ kfree(reg_info);
+ return 0;
-fallback:
- /* Fallback to older reg (by sending previous country setting
- * again if fw has succeeded and we failed to process here.
- * The Regdomain should be uniform across driver and fw. Since the
- * FW has processed the command and sent a success status, we expect
- * this function to succeed as well. If it doesn't, CTRY needs to be
- * reverted at the fw and the old SCAN_CHAN_LIST cmd needs to be sent.
- */
- /* TODO: This is rare, but still should also be handled */
- WARN_ON(1);
mem_free:
- if (reg_info) {
- kfree(reg_info->reg_rules_2ghz_ptr);
- kfree(reg_info->reg_rules_5ghz_ptr);
- if (reg_info->is_ext_reg_event) {
- for (i = 0; i < WMI_REG_CURRENT_MAX_AP_TYPE; i++)
- kfree(reg_info->reg_rules_6ghz_ap_ptr[i]);
-
- for (j = 0; j < WMI_REG_CURRENT_MAX_AP_TYPE; j++)
- for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++)
- kfree(reg_info->reg_rules_6ghz_client_ptr[j][i]);
- }
- kfree(reg_info);
- }
+ ath11k_reg_reset_info(reg_info);
+ kfree(reg_info);
return ret;
}
@@ -7362,7 +7322,7 @@ static void ath11k_vdev_start_resp_event(struct ath11k_base *ab, struct sk_buff
}
ar->last_wmi_vdev_start_status = 0;
-
+ ar->max_allowed_tx_power = vdev_start_resp.max_allowed_tx_power;
status = vdev_start_resp.status;
if (WARN_ON_ONCE(status)) {
@@ -7384,8 +7344,7 @@ static void ath11k_bcn_tx_status_event(struct ath11k_base *ab, struct sk_buff *s
struct ath11k_vif *arvif;
u32 vdev_id, tx_status;
- if (ath11k_pull_bcn_tx_status_ev(ab, skb->data, skb->len,
- &vdev_id, &tx_status) != 0) {
+ if (ath11k_pull_bcn_tx_status_ev(ab, skb, &vdev_id, &tx_status) != 0) {
ath11k_warn(ab, "failed to extract bcn tx status");
return;
}
@@ -7416,7 +7375,7 @@ static void ath11k_wmi_event_peer_sta_ps_state_chg(struct ath11k_base *ab,
enum ath11k_wmi_peer_ps_state peer_previous_ps_state;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -7884,7 +7843,7 @@ static void ath11k_chan_info_event(struct ath11k_base *ab, struct sk_buff *skb)
/* HW channel counters frequency value in hertz */
u32 cc_freq_hz = ab->cc_freq_hz;
- if (ath11k_pull_chan_info_ev(ab, skb->data, skb->len, &ch_info_ev) != 0) {
+ if (ath11k_pull_chan_info_ev(ab, skb, &ch_info_ev) != 0) {
ath11k_warn(ab, "failed to extract chan info event");
return;
}
@@ -8216,7 +8175,7 @@ static void ath11k_pdev_ctl_failsafe_check_event(struct ath11k_base *ab,
const struct wmi_pdev_ctl_failsafe_chk_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -8267,7 +8226,7 @@ ath11k_wmi_process_csa_switch_count_event(struct ath11k_base *ab,
}
if (arvif->is_up && arvif->vif->bss_conf.csa_active)
- ieee80211_csa_finish(arvif->vif);
+ ieee80211_csa_finish(arvif->vif, 0);
}
rcu_read_unlock();
}
@@ -8281,7 +8240,7 @@ ath11k_wmi_pdev_csa_switch_count_status_event(struct ath11k_base *ab,
const u32 *vdev_ids;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -8315,7 +8274,7 @@ ath11k_wmi_pdev_dfs_radar_detected_event(struct ath11k_base *ab, struct sk_buff
struct ath11k *ar;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -8369,7 +8328,7 @@ ath11k_wmi_pdev_temperature_event(struct ath11k_base *ab,
const struct wmi_pdev_temperature_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -8409,7 +8368,7 @@ static void ath11k_fils_discovery_event(struct ath11k_base *ab,
const struct wmi_fils_discovery_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab,
@@ -8441,7 +8400,7 @@ static void ath11k_probe_resp_tx_status_event(struct ath11k_base *ab,
const struct wmi_probe_resp_tx_status_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab,
@@ -8567,7 +8526,7 @@ static void ath11k_wmi_twt_add_dialog_event(struct ath11k_base *ab,
const struct wmi_twt_add_dialog_event *ev;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab,
@@ -8604,7 +8563,7 @@ static void ath11k_wmi_gtk_offload_status_event(struct ath11k_base *ab,
u64 replay_ctr;
int ret;
- tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath11k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -9793,3 +9752,9 @@ int ath11k_wmi_sta_keepalive(struct ath11k *ar,
return ath11k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID);
}
+
+bool ath11k_wmi_supports_6ghz_cc_ext(struct ath11k *ar)
+{
+ return test_bit(WMI_TLV_SERVICE_REG_CC_EXT_EVENT_SUPPORT,
+ ar->ab->wmi_ab.svc_map) && ar->supports_6ghz;
+}
diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
index ff0a9a92beeb..bb419e3abb00 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.h
+++ b/drivers/net/wireless/ath/ath11k/wmi.h
@@ -15,6 +15,7 @@ struct ath11k;
struct ath11k_fw_stats;
struct ath11k_fw_dbglog;
struct ath11k_vif;
+struct ath11k_reg_tpc_power_info;
#define PSOC_HOST_MAX_NUM_SS (8)
@@ -327,6 +328,22 @@ enum wmi_tlv_cmd_id {
WMI_VDEV_SET_CUSTOM_AGGR_SIZE_CMDID,
WMI_VDEV_ENCRYPT_DECRYPT_DATA_REQ_CMDID,
WMI_VDEV_ADD_MAC_ADDR_TO_RX_FILTER_CMDID,
+ WMI_VDEV_SET_ARP_STAT_CMDID,
+ WMI_VDEV_GET_ARP_STAT_CMDID,
+ WMI_VDEV_GET_TX_POWER_CMDID,
+ WMI_VDEV_LIMIT_OFFCHAN_CMDID,
+ WMI_VDEV_SET_CUSTOM_SW_RETRY_TH_CMDID,
+ WMI_VDEV_CHAINMASK_CONFIG_CMDID,
+ WMI_VDEV_GET_BCN_RECEPTION_STATS_CMDID,
+ WMI_VDEV_GET_MWS_COEX_INFO_CMDID,
+ WMI_VDEV_DELETE_ALL_PEER_CMDID,
+ WMI_VDEV_BSS_MAX_IDLE_TIME_CMDID,
+ WMI_VDEV_AUDIO_SYNC_TRIGGER_CMDID,
+ WMI_VDEV_AUDIO_SYNC_QTIMER_CMDID,
+ WMI_VDEV_SET_PCL_CMDID,
+ WMI_VDEV_GET_BIG_DATA_CMDID,
+ WMI_VDEV_GET_BIG_DATA_P2_CMDID,
+ WMI_VDEV_SET_TPC_POWER_CMDID,
WMI_PEER_CREATE_CMDID = WMI_TLV_CMD(WMI_GRP_PEER),
WMI_PEER_DELETE_CMDID,
WMI_PEER_FLUSH_TIDS_CMDID,
@@ -1880,6 +1897,8 @@ enum wmi_tlv_tag {
WMI_TAG_PDEV_NON_SRG_OBSS_BSSID_ENABLE_BITMAP_CMD,
WMI_TAG_REGULATORY_RULE_EXT_STRUCT = 0x3A9,
WMI_TAG_REG_CHAN_LIST_CC_EXT_EVENT,
+ WMI_TAG_VDEV_SET_TPC_POWER_CMD = 0x3B5,
+ WMI_TAG_VDEV_CH_POWER_INFO,
WMI_TAG_PDEV_SET_BIOS_SAR_TABLE_CMD = 0x3D8,
WMI_TAG_PDEV_SET_BIOS_GEO_TABLE_CMD,
WMI_TAG_MAX
@@ -2114,6 +2133,7 @@ enum wmi_tlv_service {
/* The second 128 bits */
WMI_MAX_EXT_SERVICE = 256,
WMI_TLV_SERVICE_SCAN_CONFIG_PER_CHANNEL = 265,
+ WMI_TLV_SERVICE_EXT_TPC_REG_SUPPORT = 280,
WMI_TLV_SERVICE_REG_CC_EXT_EVENT_SUPPORT = 281,
WMI_TLV_SERVICE_BIOS_SAR_SUPPORT = 326,
WMI_TLV_SERVICE_SUPPORT_11D_FOR_HOST_SCAN = 357,
@@ -3168,6 +3188,41 @@ struct wlan_ssid {
u8 ssid[WLAN_SSID_MAX_LEN];
};
+struct wmi_vdev_ch_power_info {
+ u32 tlv_header;
+
+ /* Channel center frequency (MHz) */
+ u32 chan_cfreq;
+
+ /* Unit: dBm, either PSD/EIRP power for this frequency or
+ * incremental for non-PSD BW
+ */
+ u32 tx_power;
+} __packed;
+
+struct wmi_vdev_set_tpc_power_cmd {
+ u32 tlv_header;
+ u32 vdev_id;
+
+ /* Value: 0 or 1, is PSD power or not */
+ u32 psd_power;
+
+ /* Maximum EIRP power (dBm units), valid only if power is PSD */
+ u32 eirp_power;
+
+ /* Type: WMI_6GHZ_REG_TYPE, used for halphy CTL lookup */
+ u32 power_type_6ghz;
+
+ /* This fixed_param TLV is followed by the below TLVs:
+ * num_pwr_levels of wmi_vdev_ch_power_info
+ * For PSD power, it is the PSD/EIRP power of the frequency (20 MHz chunks).
+ * For non-PSD power, the power values are for 20, 40, and till
+ * BSS BW power levels.
+ * The num_pwr_levels will be checked by sw how many elements present
+ * in the variable-length array.
+ */
+} __packed;
+
#define WMI_IE_BITMAP_SIZE 8
/* prefix used by scan requestor ids on the host */
@@ -3308,24 +3363,19 @@ struct scan_req_params {
u32 vdev_id;
u32 pdev_id;
enum wmi_scan_priority scan_priority;
- union {
- struct {
- u32 scan_ev_started:1,
- scan_ev_completed:1,
- scan_ev_bss_chan:1,
- scan_ev_foreign_chan:1,
- scan_ev_dequeued:1,
- scan_ev_preempted:1,
- scan_ev_start_failed:1,
- scan_ev_restarted:1,
- scan_ev_foreign_chn_exit:1,
- scan_ev_invalid:1,
- scan_ev_gpio_timeout:1,
- scan_ev_suspended:1,
- scan_ev_resumed:1;
- };
- u32 scan_events;
- };
+ u32 scan_ev_started:1,
+ scan_ev_completed:1,
+ scan_ev_bss_chan:1,
+ scan_ev_foreign_chan:1,
+ scan_ev_dequeued:1,
+ scan_ev_preempted:1,
+ scan_ev_start_failed:1,
+ scan_ev_restarted:1,
+ scan_ev_foreign_chn_exit:1,
+ scan_ev_invalid:1,
+ scan_ev_gpio_timeout:1,
+ scan_ev_suspended:1,
+ scan_ev_resumed:1;
u32 scan_ctrl_flags_ext;
u32 dwell_time_active;
u32 dwell_time_active_2g;
@@ -3339,36 +3389,31 @@ struct scan_req_params {
u32 idle_time;
u32 max_scan_time;
u32 probe_delay;
- union {
- struct {
- u32 scan_f_passive:1,
- scan_f_bcast_probe:1,
- scan_f_cck_rates:1,
- scan_f_ofdm_rates:1,
- scan_f_chan_stat_evnt:1,
- scan_f_filter_prb_req:1,
- scan_f_bypass_dfs_chn:1,
- scan_f_continue_on_err:1,
- scan_f_offchan_mgmt_tx:1,
- scan_f_offchan_data_tx:1,
- scan_f_promisc_mode:1,
- scan_f_capture_phy_err:1,
- scan_f_strict_passive_pch:1,
- scan_f_half_rate:1,
- scan_f_quarter_rate:1,
- scan_f_force_active_dfs_chn:1,
- scan_f_add_tpc_ie_in_probe:1,
- scan_f_add_ds_ie_in_probe:1,
- scan_f_add_spoofed_mac_in_probe:1,
- scan_f_add_rand_seq_in_probe:1,
- scan_f_en_ie_whitelist_in_probe:1,
- scan_f_forced:1,
- scan_f_2ghz:1,
- scan_f_5ghz:1,
- scan_f_80mhz:1;
- };
- u32 scan_flags;
- };
+ u32 scan_f_passive:1,
+ scan_f_bcast_probe:1,
+ scan_f_cck_rates:1,
+ scan_f_ofdm_rates:1,
+ scan_f_chan_stat_evnt:1,
+ scan_f_filter_prb_req:1,
+ scan_f_bypass_dfs_chn:1,
+ scan_f_continue_on_err:1,
+ scan_f_offchan_mgmt_tx:1,
+ scan_f_offchan_data_tx:1,
+ scan_f_promisc_mode:1,
+ scan_f_capture_phy_err:1,
+ scan_f_strict_passive_pch:1,
+ scan_f_half_rate:1,
+ scan_f_quarter_rate:1,
+ scan_f_force_active_dfs_chn:1,
+ scan_f_add_tpc_ie_in_probe:1,
+ scan_f_add_ds_ie_in_probe:1,
+ scan_f_add_spoofed_mac_in_probe:1,
+ scan_f_add_rand_seq_in_probe:1,
+ scan_f_en_ie_whitelist_in_probe:1,
+ scan_f_forced:1,
+ scan_f_2ghz:1,
+ scan_f_5ghz:1,
+ scan_f_80mhz:1;
enum scan_dwelltime_adaptive_mode adaptive_dwell_time_mode;
u32 burst_duration;
u32 num_chan;
@@ -4119,6 +4164,7 @@ struct wmi_vdev_start_resp_event {
};
u32 cfgd_tx_streams;
u32 cfgd_rx_streams;
+ s32 max_allowed_tx_power;
} __packed;
/* VDEV start response status codes */
@@ -4951,6 +4997,7 @@ struct ath11k_targ_cap {
};
enum wmi_vdev_type {
+ WMI_VDEV_TYPE_UNSPEC = 0,
WMI_VDEV_TYPE_AP = 1,
WMI_VDEV_TYPE_STA = 2,
WMI_VDEV_TYPE_IBSS = 3,
@@ -6295,8 +6342,8 @@ enum wmi_sta_keepalive_method {
#define WMI_STA_KEEPALIVE_INTERVAL_DEFAULT 30
#define WMI_STA_KEEPALIVE_INTERVAL_DISABLE 0
-const void **ath11k_wmi_tlv_parse_alloc(struct ath11k_base *ab, const void *ptr,
- size_t len, gfp_t gfp);
+const void **ath11k_wmi_tlv_parse_alloc(struct ath11k_base *ab,
+ struct sk_buff *skb, gfp_t gfp);
int ath11k_wmi_cmd_send(struct ath11k_pdev_wmi *wmi, struct sk_buff *skb,
u32 cmd_id);
struct sk_buff *ath11k_wmi_alloc_skb(struct ath11k_wmi_base *wmi_sc, u32 len);
@@ -6479,5 +6526,9 @@ int ath11k_wmi_pdev_set_bios_sar_table_param(struct ath11k *ar, const u8 *sar_va
int ath11k_wmi_pdev_set_bios_geo_table_param(struct ath11k *ar);
int ath11k_wmi_sta_keepalive(struct ath11k *ar,
const struct wmi_sta_keepalive_arg *arg);
+bool ath11k_wmi_supports_6ghz_cc_ext(struct ath11k *ar);
+int ath11k_wmi_send_vdev_set_tpc_power(struct ath11k *ar,
+ u32 vdev_id,
+ struct ath11k_reg_tpc_power_info *param);
#endif
diff --git a/drivers/net/wireless/ath/ath12k/Makefile b/drivers/net/wireless/ath/ath12k/Makefile
index 62c52e733b5e..71669f94ff75 100644
--- a/drivers/net/wireless/ath/ath12k/Makefile
+++ b/drivers/net/wireless/ath/ath12k/Makefile
@@ -19,7 +19,9 @@ ath12k-y += core.o \
hw.o \
mhi.o \
pci.o \
- dp_mon.o
+ dp_mon.o \
+ fw.o \
+ p2p.o
ath12k-$(CONFIG_ATH12K_TRACING) += trace.o
diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c
index 6c01b282fcd3..391b6fb2bd42 100644
--- a/drivers/net/wireless/ath/ath12k/core.c
+++ b/drivers/net/wireless/ath/ath12k/core.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/module.h>
@@ -14,6 +14,7 @@
#include "dp_rx.h"
#include "debug.h"
#include "hif.h"
+#include "fw.h"
unsigned int ath12k_debug_mask;
module_param_named(debug_mask, ath12k_debug_mask, uint, 0644);
@@ -104,27 +105,66 @@ int ath12k_core_resume(struct ath12k_base *ab)
return 0;
}
-static int ath12k_core_create_board_name(struct ath12k_base *ab, char *name,
- size_t name_len)
+static int __ath12k_core_create_board_name(struct ath12k_base *ab, char *name,
+ size_t name_len, bool with_variant,
+ bool bus_type_mode)
{
/* strlen(',variant=') + strlen(ab->qmi.target.bdf_ext) */
char variant[9 + ATH12K_QMI_BDF_EXT_STR_LENGTH] = { 0 };
- if (ab->qmi.target.bdf_ext[0] != '\0')
+ if (with_variant && ab->qmi.target.bdf_ext[0] != '\0')
scnprintf(variant, sizeof(variant), ",variant=%s",
ab->qmi.target.bdf_ext);
- scnprintf(name, name_len,
- "bus=%s,qmi-chip-id=%d,qmi-board-id=%d%s",
- ath12k_bus_str(ab->hif.bus),
- ab->qmi.target.chip_id,
- ab->qmi.target.board_id, variant);
+ switch (ab->id.bdf_search) {
+ case ATH12K_BDF_SEARCH_BUS_AND_BOARD:
+ if (bus_type_mode)
+ scnprintf(name, name_len,
+ "bus=%s",
+ ath12k_bus_str(ab->hif.bus));
+ else
+ scnprintf(name, name_len,
+ "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x,qmi-chip-id=%d,qmi-board-id=%d%s",
+ ath12k_bus_str(ab->hif.bus),
+ ab->id.vendor, ab->id.device,
+ ab->id.subsystem_vendor,
+ ab->id.subsystem_device,
+ ab->qmi.target.chip_id,
+ ab->qmi.target.board_id,
+ variant);
+ break;
+ default:
+ scnprintf(name, name_len,
+ "bus=%s,qmi-chip-id=%d,qmi-board-id=%d%s",
+ ath12k_bus_str(ab->hif.bus),
+ ab->qmi.target.chip_id,
+ ab->qmi.target.board_id, variant);
+ break;
+ }
ath12k_dbg(ab, ATH12K_DBG_BOOT, "boot using board name '%s'\n", name);
return 0;
}
+static int ath12k_core_create_board_name(struct ath12k_base *ab, char *name,
+ size_t name_len)
+{
+ return __ath12k_core_create_board_name(ab, name, name_len, true, false);
+}
+
+static int ath12k_core_create_fallback_board_name(struct ath12k_base *ab, char *name,
+ size_t name_len)
+{
+ return __ath12k_core_create_board_name(ab, name, name_len, false, false);
+}
+
+static int ath12k_core_create_bus_type_board_name(struct ath12k_base *ab, char *name,
+ size_t name_len)
+{
+ return __ath12k_core_create_board_name(ab, name, name_len, false, true);
+}
+
const struct firmware *ath12k_core_firmware_request(struct ath12k_base *ab,
const char *file)
{
@@ -159,7 +199,9 @@ static int ath12k_core_parse_bd_ie_board(struct ath12k_base *ab,
struct ath12k_board_data *bd,
const void *buf, size_t buf_len,
const char *boardname,
- int bd_ie_type)
+ int ie_id,
+ int name_id,
+ int data_id)
{
const struct ath12k_fw_ie *hdr;
bool name_match_found;
@@ -169,7 +211,7 @@ static int ath12k_core_parse_bd_ie_board(struct ath12k_base *ab,
name_match_found = false;
- /* go through ATH12K_BD_IE_BOARD_ elements */
+ /* go through ATH12K_BD_IE_BOARD_/ATH12K_BD_IE_REGDB_ elements */
while (buf_len > sizeof(struct ath12k_fw_ie)) {
hdr = buf;
board_ie_id = le32_to_cpu(hdr->id);
@@ -180,48 +222,50 @@ static int ath12k_core_parse_bd_ie_board(struct ath12k_base *ab,
buf += sizeof(*hdr);
if (buf_len < ALIGN(board_ie_len, 4)) {
- ath12k_err(ab, "invalid ATH12K_BD_IE_BOARD length: %zu < %zu\n",
+ ath12k_err(ab, "invalid %s length: %zu < %zu\n",
+ ath12k_bd_ie_type_str(ie_id),
buf_len, ALIGN(board_ie_len, 4));
ret = -EINVAL;
goto out;
}
- switch (board_ie_id) {
- case ATH12K_BD_IE_BOARD_NAME:
+ if (board_ie_id == name_id) {
ath12k_dbg_dump(ab, ATH12K_DBG_BOOT, "board name", "",
board_ie_data, board_ie_len);
if (board_ie_len != strlen(boardname))
- break;
+ goto next;
ret = memcmp(board_ie_data, boardname, strlen(boardname));
if (ret)
- break;
+ goto next;
name_match_found = true;
ath12k_dbg(ab, ATH12K_DBG_BOOT,
- "boot found match for name '%s'",
+ "boot found match %s for name '%s'",
+ ath12k_bd_ie_type_str(ie_id),
boardname);
- break;
- case ATH12K_BD_IE_BOARD_DATA:
+ } else if (board_ie_id == data_id) {
if (!name_match_found)
/* no match found */
- break;
+ goto next;
ath12k_dbg(ab, ATH12K_DBG_BOOT,
- "boot found board data for '%s'", boardname);
+ "boot found %s for '%s'",
+ ath12k_bd_ie_type_str(ie_id),
+ boardname);
bd->data = board_ie_data;
bd->len = board_ie_len;
ret = 0;
goto out;
- default:
- ath12k_warn(ab, "unknown ATH12K_BD_IE_BOARD found: %d\n",
+ } else {
+ ath12k_warn(ab, "unknown %s id found: %d\n",
+ ath12k_bd_ie_type_str(ie_id),
board_ie_id);
- break;
}
-
+next:
/* jump over the padding */
board_ie_len = ALIGN(board_ie_len, 4);
@@ -238,7 +282,10 @@ out:
static int ath12k_core_fetch_board_data_api_n(struct ath12k_base *ab,
struct ath12k_board_data *bd,
- const char *boardname)
+ const char *boardname,
+ int ie_id_match,
+ int name_id,
+ int data_id)
{
size_t len, magic_len;
const u8 *data;
@@ -303,22 +350,23 @@ static int ath12k_core_fetch_board_data_api_n(struct ath12k_base *ab,
goto err;
}
- switch (ie_id) {
- case ATH12K_BD_IE_BOARD:
+ if (ie_id == ie_id_match) {
ret = ath12k_core_parse_bd_ie_board(ab, bd, data,
ie_len,
boardname,
- ATH12K_BD_IE_BOARD);
+ ie_id_match,
+ name_id,
+ data_id);
if (ret == -ENOENT)
/* no match found, continue */
- break;
+ goto next;
else if (ret)
/* there was an error, bail out */
goto err;
/* either found or error, so stop searching */
goto out;
}
-
+next:
/* jump over the padding */
ie_len = ALIGN(ie_len, 4);
@@ -328,8 +376,9 @@ static int ath12k_core_fetch_board_data_api_n(struct ath12k_base *ab,
out:
if (!bd->data || !bd->len) {
- ath12k_err(ab,
- "failed to fetch board data for %s from %s\n",
+ ath12k_dbg(ab, ATH12K_DBG_BOOT,
+ "failed to fetch %s for %s from %s\n",
+ ath12k_bd_ie_type_str(ie_id_match),
boardname, filepath);
ret = -ENODATA;
goto err;
@@ -356,28 +405,56 @@ int ath12k_core_fetch_board_data_api_1(struct ath12k_base *ab,
return 0;
}
-#define BOARD_NAME_SIZE 100
+#define BOARD_NAME_SIZE 200
int ath12k_core_fetch_bdf(struct ath12k_base *ab, struct ath12k_board_data *bd)
{
- char boardname[BOARD_NAME_SIZE];
+ char boardname[BOARD_NAME_SIZE], fallback_boardname[BOARD_NAME_SIZE];
+ char *filename, filepath[100];
int bd_api;
int ret;
- ret = ath12k_core_create_board_name(ab, boardname, BOARD_NAME_SIZE);
+ filename = ATH12K_BOARD_API2_FILE;
+
+ ret = ath12k_core_create_board_name(ab, boardname, sizeof(boardname));
if (ret) {
ath12k_err(ab, "failed to create board name: %d", ret);
return ret;
}
bd_api = 2;
- ret = ath12k_core_fetch_board_data_api_n(ab, bd, boardname);
+ ret = ath12k_core_fetch_board_data_api_n(ab, bd, boardname,
+ ATH12K_BD_IE_BOARD,
+ ATH12K_BD_IE_BOARD_NAME,
+ ATH12K_BD_IE_BOARD_DATA);
+ if (!ret)
+ goto success;
+
+ ret = ath12k_core_create_fallback_board_name(ab, fallback_boardname,
+ sizeof(fallback_boardname));
+ if (ret) {
+ ath12k_err(ab, "failed to create fallback board name: %d", ret);
+ return ret;
+ }
+
+ ret = ath12k_core_fetch_board_data_api_n(ab, bd, fallback_boardname,
+ ATH12K_BD_IE_BOARD,
+ ATH12K_BD_IE_BOARD_NAME,
+ ATH12K_BD_IE_BOARD_DATA);
if (!ret)
goto success;
bd_api = 1;
ret = ath12k_core_fetch_board_data_api_1(ab, bd, ATH12K_DEFAULT_BOARD_FILE);
if (ret) {
- ath12k_err(ab, "failed to fetch board-2.bin or board.bin from %s\n",
+ ath12k_core_create_firmware_path(ab, filename,
+ filepath, sizeof(filepath));
+ ath12k_err(ab, "failed to fetch board data for %s from %s\n",
+ boardname, filepath);
+ if (memcmp(boardname, fallback_boardname, strlen(boardname)))
+ ath12k_err(ab, "failed to fetch board data for %s from %s\n",
+ fallback_boardname, filepath);
+
+ ath12k_err(ab, "failed to fetch board.bin from %s\n",
ab->hw_params->fw.dir);
return ret;
}
@@ -387,6 +464,79 @@ success:
return 0;
}
+int ath12k_core_fetch_regdb(struct ath12k_base *ab, struct ath12k_board_data *bd)
+{
+ char boardname[BOARD_NAME_SIZE], default_boardname[BOARD_NAME_SIZE];
+ int ret;
+
+ ret = ath12k_core_create_board_name(ab, boardname, BOARD_NAME_SIZE);
+ if (ret) {
+ ath12k_dbg(ab, ATH12K_DBG_BOOT,
+ "failed to create board name for regdb: %d", ret);
+ goto exit;
+ }
+
+ ret = ath12k_core_fetch_board_data_api_n(ab, bd, boardname,
+ ATH12K_BD_IE_REGDB,
+ ATH12K_BD_IE_REGDB_NAME,
+ ATH12K_BD_IE_REGDB_DATA);
+ if (!ret)
+ goto exit;
+
+ ret = ath12k_core_create_bus_type_board_name(ab, default_boardname,
+ BOARD_NAME_SIZE);
+ if (ret) {
+ ath12k_dbg(ab, ATH12K_DBG_BOOT,
+ "failed to create default board name for regdb: %d", ret);
+ goto exit;
+ }
+
+ ret = ath12k_core_fetch_board_data_api_n(ab, bd, default_boardname,
+ ATH12K_BD_IE_REGDB,
+ ATH12K_BD_IE_REGDB_NAME,
+ ATH12K_BD_IE_REGDB_DATA);
+ if (!ret)
+ goto exit;
+
+ ret = ath12k_core_fetch_board_data_api_1(ab, bd, ATH12K_REGDB_FILE_NAME);
+ if (ret)
+ ath12k_dbg(ab, ATH12K_DBG_BOOT, "failed to fetch %s from %s\n",
+ ATH12K_REGDB_FILE_NAME, ab->hw_params->fw.dir);
+
+exit:
+ if (!ret)
+ ath12k_dbg(ab, ATH12K_DBG_BOOT, "fetched regdb\n");
+
+ return ret;
+}
+
+u32 ath12k_core_get_max_station_per_radio(struct ath12k_base *ab)
+{
+ if (ab->num_radios == 2)
+ return TARGET_NUM_STATIONS_DBS;
+ else if (ab->num_radios == 3)
+ return TARGET_NUM_PEERS_PDEV_DBS_SBS;
+ return TARGET_NUM_STATIONS_SINGLE;
+}
+
+u32 ath12k_core_get_max_peers_per_radio(struct ath12k_base *ab)
+{
+ if (ab->num_radios == 2)
+ return TARGET_NUM_PEERS_PDEV_DBS;
+ else if (ab->num_radios == 3)
+ return TARGET_NUM_PEERS_PDEV_DBS_SBS;
+ return TARGET_NUM_PEERS_PDEV_SINGLE;
+}
+
+u32 ath12k_core_get_max_num_tids(struct ath12k_base *ab)
+{
+ if (ab->num_radios == 2)
+ return TARGET_NUM_TIDS(DBS);
+ else if (ab->num_radios == 3)
+ return TARGET_NUM_TIDS(DBS_SBS);
+ return TARGET_NUM_TIDS(SINGLE);
+}
+
static void ath12k_core_stop(struct ath12k_base *ab)
{
if (!test_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags))
@@ -592,14 +742,14 @@ static int ath12k_core_start(struct ath12k_base *ab,
ath12k_dp_cc_config(ab);
- ath12k_dp_pdev_pre_alloc(ab);
-
ret = ath12k_dp_rx_pdev_reo_setup(ab);
if (ret) {
ath12k_err(ab, "failed to initialize reo destination rings: %d\n", ret);
goto err_mac_destroy;
}
+ ath12k_dp_hal_rx_desc_init(ab);
+
ret = ath12k_wmi_cmd_init(ab);
if (ret) {
ath12k_err(ab, "failed to send wmi init cmd: %d\n", ret);
@@ -759,20 +909,30 @@ static void ath12k_rfkill_work(struct work_struct *work)
{
struct ath12k_base *ab = container_of(work, struct ath12k_base, rfkill_work);
struct ath12k *ar;
+ struct ath12k_hw *ah;
+ struct ieee80211_hw *hw;
bool rfkill_radio_on;
- int i;
+ int i, j;
spin_lock_bh(&ab->base_lock);
rfkill_radio_on = ab->rfkill_radio_on;
spin_unlock_bh(&ab->base_lock);
- for (i = 0; i < ab->num_radios; i++) {
- ar = ab->pdevs[i].ar;
- if (!ar)
+ for (i = 0; i < ab->num_hw; i++) {
+ ah = ab->ah[i];
+ if (!ah)
continue;
- ath12k_mac_rfkill_enable_radio(ar, rfkill_radio_on);
- wiphy_rfkill_set_hw_state(ar->hw->wiphy, !rfkill_radio_on);
+ for (j = 0; j < ah->num_radio; j++) {
+ ar = &ah->radio[j];
+ if (!ar)
+ continue;
+
+ ath12k_mac_rfkill_enable_radio(ar, rfkill_radio_on);
+ }
+
+ hw = ah->hw;
+ wiphy_rfkill_set_hw_state(hw->wiphy, !rfkill_radio_on);
}
}
@@ -801,6 +961,7 @@ static void ath12k_core_pre_reconfigure_recovery(struct ath12k_base *ab)
{
struct ath12k *ar;
struct ath12k_pdev *pdev;
+ struct ath12k_hw *ah;
int i;
spin_lock_bh(&ab->base_lock);
@@ -810,16 +971,24 @@ static void ath12k_core_pre_reconfigure_recovery(struct ath12k_base *ab)
if (ab->is_reset)
set_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags);
+ for (i = 0; i < ab->num_hw; i++) {
+ if (!ab->ah[i])
+ continue;
+
+ ah = ab->ah[i];
+ ieee80211_stop_queues(ah->hw);
+ }
+
for (i = 0; i < ab->num_radios; i++) {
pdev = &ab->pdevs[i];
ar = pdev->ar;
if (!ar || ar->state == ATH12K_STATE_OFF)
continue;
- ieee80211_stop_queues(ar->hw);
ath12k_mac_drain_tx(ar);
complete(&ar->scan.started);
complete(&ar->scan.completed);
+ complete(&ar->scan.on_channel);
complete(&ar->peer_assoc_done);
complete(&ar->peer_delete_done);
complete(&ar->install_key_done);
@@ -856,7 +1025,7 @@ static void ath12k_core_post_reconfigure_recovery(struct ath12k_base *ab)
case ATH12K_STATE_ON:
ar->state = ATH12K_STATE_RESTARTING;
ath12k_core_halt(ar);
- ieee80211_restart_hw(ar->hw);
+ ieee80211_restart_hw(ath12k_ar_to_hw(ar));
break;
case ATH12K_STATE_OFF:
ath12k_warn(ab,
@@ -979,6 +1148,8 @@ int ath12k_core_pre_init(struct ath12k_base *ab)
return ret;
}
+ ath12k_fw_map(ab);
+
return 0;
}
@@ -1007,6 +1178,7 @@ void ath12k_core_deinit(struct ath12k_base *ab)
ath12k_hif_power_down(ab);
ath12k_mac_destroy(ab);
ath12k_core_soc_destroy(ab);
+ ath12k_fw_unmap(ab);
}
void ath12k_core_free(struct ath12k_base *ab)
@@ -1054,6 +1226,8 @@ struct ath12k_base *ath12k_core_alloc(struct device *dev, size_t priv_size,
ab->dev = dev;
ab->hif.bus = bus;
+ ab->qmi.num_radios = U8_MAX;
+ ab->slo_capable = true;
return ab;
diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h
index 8458dc292821..97e5a0ccd233 100644
--- a/drivers/net/wireless/ath/ath12k/core.h
+++ b/drivers/net/wireless/ath/ath12k/core.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH12K_CORE_H
@@ -13,6 +13,7 @@
#include <linux/bitfield.h>
#include <linux/dmi.h>
#include <linux/ctype.h>
+#include <linux/firmware.h>
#include "qmi.h"
#include "htc.h"
#include "wmi.h"
@@ -24,6 +25,7 @@
#include "hal_rx.h"
#include "reg.h"
#include "dbring.h"
+#include "fw.h"
#define SM(_v, _f) (((_v) << _f##_LSB) & _f##_MASK)
@@ -55,6 +57,11 @@
#define ATH12K_RECONFIGURE_TIMEOUT_HZ (10 * HZ)
#define ATH12K_RECOVER_START_TIMEOUT_HZ (20 * HZ)
+enum ath12k_bdf_search {
+ ATH12K_BDF_SEARCH_DEFAULT,
+ ATH12K_BDF_SEARCH_BUS_AND_BOARD,
+};
+
enum wme_ac {
WME_AC_BE,
WME_AC_BK,
@@ -259,6 +266,7 @@ struct ath12k_vif {
u8 tx_encap_type;
u8 vdev_stats_id;
u32 punct_bitmap;
+ bool ps;
};
struct ath12k_vif_iter {
@@ -420,7 +428,7 @@ struct ath12k_sta {
};
#define ATH12K_MIN_5G_FREQ 4150
-#define ATH12K_MIN_6G_FREQ 5945
+#define ATH12K_MIN_6G_FREQ 5925
#define ATH12K_MAX_6G_FREQ 7115
#define ATH12K_NUM_CHANS 100
#define ATH12K_MAX_5G_CHAN 173
@@ -468,7 +476,7 @@ struct ath12k_per_peer_tx_stats {
struct ath12k {
struct ath12k_base *ab;
struct ath12k_pdev *pdev;
- struct ieee80211_hw *hw;
+ struct ath12k_hw *ah;
struct ath12k_wmi_pdev *wmi;
struct ath12k_pdev_dp dp;
u8 mac_addr[ETH_ALEN];
@@ -532,6 +540,7 @@ struct ath12k {
/* pdev_idx starts from 0 whereas pdev->pdev_id starts with 1 */
u8 pdev_idx;
u8 lmac_id;
+ u8 hw_link_id;
struct completion peer_assoc_done;
struct completion peer_delete_done;
@@ -591,6 +600,13 @@ struct ath12k {
int monitor_vdev_id;
};
+struct ath12k_hw {
+ struct ieee80211_hw *hw;
+
+ u8 num_radio;
+ struct ath12k radio[] __aligned(sizeof(void *));
+};
+
struct ath12k_band_cap {
u32 phy_id;
u32 max_bw_supported;
@@ -724,6 +740,16 @@ struct ath12k_base {
u8 fw_pdev_count;
struct ath12k_pdev __rcu *pdevs_active[MAX_RADIOS];
+
+ /* Holds information of wiphy (hw) registration.
+ *
+ * In Multi/Single Link Operation case, all pdevs are registered as
+ * a single wiphy. In other (legacy/Non-MLO) cases, each pdev is
+ * registered as separate wiphys.
+ */
+ struct ath12k_hw *ah[MAX_RADIOS];
+ u8 num_hw;
+
struct ath12k_wmi_hal_reg_capabilities_ext_arg hal_reg_cap[MAX_RADIOS];
unsigned long long free_vdev_map;
unsigned long long free_vdev_stats_id_map;
@@ -793,10 +819,44 @@ struct ath12k_base {
/* true means radio is on */
bool rfkill_radio_on;
+ struct {
+ enum ath12k_bdf_search bdf_search;
+ u32 vendor;
+ u32 device;
+ u32 subsystem_vendor;
+ u32 subsystem_device;
+ } id;
+
+ struct {
+ u32 api_version;
+
+ const struct firmware *fw;
+ const u8 *amss_data;
+ size_t amss_len;
+ const u8 *amss_dualmac_data;
+ size_t amss_dualmac_len;
+ const u8 *m3_data;
+ size_t m3_len;
+
+ DECLARE_BITMAP(fw_features, ATH12K_FW_FEATURE_COUNT);
+ } fw;
+
+ const struct hal_rx_ops *hal_rx_ops;
+
+ /* slo_capable denotes if the single/multi link operation
+ * is supported within the same chip (SoC).
+ */
+ bool slo_capable;
+
/* must be last */
u8 drv_priv[] __aligned(sizeof(void *));
};
+struct ath12k_pdev_map {
+ struct ath12k_base *ab;
+ u8 pdev_idx;
+};
+
int ath12k_core_qmi_firmware_ready(struct ath12k_base *ab);
int ath12k_core_pre_init(struct ath12k_base *ab);
int ath12k_core_init(struct ath12k_base *ath12k);
@@ -810,6 +870,7 @@ int ath12k_core_fetch_board_data_api_1(struct ath12k_base *ab,
int ath12k_core_fetch_bdf(struct ath12k_base *ath12k,
struct ath12k_board_data *bd);
void ath12k_core_free_bdf(struct ath12k_base *ab, struct ath12k_board_data *bd);
+int ath12k_core_fetch_regdb(struct ath12k_base *ab, struct ath12k_board_data *bd);
int ath12k_core_check_dt(struct ath12k_base *ath12k);
int ath12k_core_check_smbios(struct ath12k_base *ab);
void ath12k_core_halt(struct ath12k *ar);
@@ -818,6 +879,9 @@ int ath12k_core_suspend(struct ath12k_base *ab);
const struct firmware *ath12k_core_firmware_request(struct ath12k_base *ab,
const char *filename);
+u32 ath12k_core_get_max_station_per_radio(struct ath12k_base *ab);
+u32 ath12k_core_get_max_peers_per_radio(struct ath12k_base *ab);
+u32 ath12k_core_get_max_num_tids(struct ath12k_base *ab);
static inline const char *ath12k_scan_state_str(enum ath12k_scan_state state)
{
@@ -882,4 +946,18 @@ static inline const char *ath12k_bus_str(enum ath12k_bus bus)
return "unknown";
}
+static inline struct ath12k_hw *ath12k_hw_to_ah(struct ieee80211_hw *hw)
+{
+ return hw->priv;
+}
+
+static inline struct ath12k *ath12k_ah_to_ar(struct ath12k_hw *ah)
+{
+ return ah->radio;
+}
+
+static inline struct ieee80211_hw *ath12k_ar_to_hw(struct ath12k *ar)
+{
+ return ar->ah->hw;
+}
#endif /* _CORE_H_ */
diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c
index a6f81f2f97ef..c8e1b244b69e 100644
--- a/drivers/net/wireless/ath/ath12k/dp.c
+++ b/drivers/net/wireless/ath/ath12k/dp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <crypto/hash.h>
@@ -997,6 +997,29 @@ void ath12k_dp_pdev_pre_alloc(struct ath12k_base *ab)
}
}
+bool ath12k_dp_wmask_compaction_rx_tlv_supported(struct ath12k_base *ab)
+{
+ if (test_bit(WMI_TLV_SERVICE_WMSK_COMPACTION_RX_TLVS, ab->wmi_ab.svc_map) &&
+ ab->hw_params->hal_ops->rxdma_ring_wmask_rx_mpdu_start &&
+ ab->hw_params->hal_ops->rxdma_ring_wmask_rx_msdu_end &&
+ ab->hw_params->hal_ops->get_hal_rx_compact_ops) {
+ return true;
+ }
+ return false;
+}
+
+void ath12k_dp_hal_rx_desc_init(struct ath12k_base *ab)
+{
+ if (ath12k_dp_wmask_compaction_rx_tlv_supported(ab)) {
+ /* RX TLVS compaction is supported, hence change the hal_rx_ops
+ * to compact hal_rx_ops.
+ */
+ ab->hal_rx_ops = ab->hw_params->hal_ops->get_hal_rx_compact_ops();
+ }
+ ab->hal.hal_desc_sz =
+ ab->hal_rx_ops->rx_desc_get_desc_size();
+}
+
static void ath12k_dp_service_mon_ring(struct timer_list *t)
{
struct ath12k_base *ab = from_timer(ab, t, mon_reap_timer);
diff --git a/drivers/net/wireless/ath/ath12k/dp.h b/drivers/net/wireless/ath/ath12k/dp.h
index 1df3cdd46140..eb2dd408e081 100644
--- a/drivers/net/wireless/ath/ath12k/dp.h
+++ b/drivers/net/wireless/ath/ath12k/dp.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH12K_DP_H
@@ -150,7 +150,7 @@ struct ath12k_pdev_dp {
#define DP_RX_HASH_ENABLE 1 /* Enable hash based Rx steering */
-#define DP_BA_WIN_SZ_MAX 256
+#define DP_BA_WIN_SZ_MAX 1024
#define DP_TCL_NUM_RING_MAX 4
@@ -170,6 +170,7 @@ struct ath12k_pdev_dp {
#define DP_REO_CMD_RING_SIZE 128
#define DP_REO_STATUS_RING_SIZE 2048
#define DP_RXDMA_BUF_RING_SIZE 4096
+#define DP_RX_MAC_BUF_RING_SIZE 2048
#define DP_RXDMA_REFILL_RING_SIZE 2048
#define DP_RXDMA_ERR_DST_RING_SIZE 1024
#define DP_RXDMA_MON_STATUS_RING_SIZE 1024
@@ -765,6 +766,11 @@ enum htt_stats_internal_ppdu_frametype {
#define HTT_RX_RING_SELECTION_CFG_RX_MSDU_START_OFFSET GENMASK(31, 16)
#define HTT_RX_RING_SELECTION_CFG_RX_ATTENTION_OFFSET GENMASK(15, 0)
+#define HTT_RX_RING_SELECTION_CFG_WORD_MASK_COMPACT_SET BIT(23)
+#define HTT_RX_RING_SELECTION_CFG_RX_MPDU_START_MASK GENMASK(15, 0)
+#define HTT_RX_RING_SELECTION_CFG_RX_MPDU_END_MASK GENMASK(18, 16)
+#define HTT_RX_RING_SELECTION_CFG_RX_MSDU_END_MASK GENMASK(16, 0)
+
enum htt_rx_filter_tlv_flags {
HTT_RX_FILTER_TLV_FLAGS_MPDU_START = BIT(0),
HTT_RX_FILTER_TLV_FLAGS_MSDU_START = BIT(1),
@@ -1088,6 +1094,11 @@ struct htt_rx_ring_selection_cfg_cmd {
__le32 rx_mpdu_offset;
__le32 rx_msdu_offset;
__le32 rx_attn_offset;
+ __le32 info2;
+ __le32 reserved[2];
+ __le32 rx_mpdu_start_end_mask;
+ __le32 rx_msdu_end_word_mask;
+ __le32 info3;
} __packed;
struct htt_rx_ring_tlv_filter {
@@ -1104,6 +1115,9 @@ struct htt_rx_ring_tlv_filter {
u16 rx_msdu_end_offset;
u16 rx_msdu_start_offset;
u16 rx_attn_offset;
+ u16 rx_mpdu_start_wmask;
+ u16 rx_mpdu_end_wmask;
+ u32 rx_msdu_end_wmask;
};
#define HTT_STATS_FRAME_CTRL_TYPE_MGMT 0x0
@@ -1820,4 +1834,6 @@ struct ath12k_rx_desc_info *ath12k_dp_get_rx_desc(struct ath12k_base *ab,
u32 cookie);
struct ath12k_tx_desc_info *ath12k_dp_get_tx_desc(struct ath12k_base *ab,
u32 desc_id);
+bool ath12k_dp_wmask_compaction_rx_tlv_supported(struct ath12k_base *ab);
+void ath12k_dp_hal_rx_desc_init(struct ath12k_base *ab);
#endif
diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c
index be4b39f5fa80..2d56913a75d0 100644
--- a/drivers/net/wireless/ath/ath12k/dp_mon.c
+++ b/drivers/net/wireless/ath/ath12k/dp_mon.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2019-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "dp_mon.h"
@@ -864,7 +864,7 @@ static void ath12k_dp_mon_rx_msdus_set_payload(struct ath12k *ar, struct sk_buff
{
u32 rx_pkt_offset, l2_hdr_offset;
- rx_pkt_offset = ar->ab->hw_params->hal_desc_sz;
+ rx_pkt_offset = ar->ab->hal.hal_desc_sz;
l2_hdr_offset = ath12k_dp_rx_h_l3pad(ar->ab,
(struct hal_rx_desc *)msdu->data);
skb_pull(msdu, rx_pkt_offset + l2_hdr_offset);
@@ -917,7 +917,8 @@ ath12k_dp_mon_rx_merg_msdus(struct ath12k *ar,
u8 qos_pkt = 0;
rx_desc = (struct hal_rx_desc *)head_msdu->data;
- hdr_desc = ab->hw_params->hal_ops->rx_desc_get_msdu_payload(rx_desc);
+ hdr_desc =
+ ab->hal_rx_ops->rx_desc_get_msdu_payload(rx_desc);
/* Base size */
wh = (struct ieee80211_hdr_3addr *)hdr_desc;
@@ -1130,7 +1131,7 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct
!(is_mcbc && rx_status->flag & RX_FLAG_DECRYPTED))
rx_status->flag |= RX_FLAG_8023;
- ieee80211_rx_napi(ar->hw, pubsta, msdu, napi);
+ ieee80211_rx_napi(ath12k_ar_to_hw(ar), pubsta, msdu, napi);
}
static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, u32 mac_id,
diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c
index 1ee83f765929..ca76c018dd0c 100644
--- a/drivers/net/wireless/ath/ath12k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath12k/dp_rx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/ieee80211.h>
@@ -23,34 +23,34 @@
static enum hal_encrypt_type ath12k_dp_rx_h_enctype(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- if (!ab->hw_params->hal_ops->rx_desc_encrypt_valid(desc))
+ if (!ab->hal_rx_ops->rx_desc_encrypt_valid(desc))
return HAL_ENCRYPT_TYPE_OPEN;
- return ab->hw_params->hal_ops->rx_desc_get_encrypt_type(desc);
+ return ab->hal_rx_ops->rx_desc_get_encrypt_type(desc);
}
u8 ath12k_dp_rx_h_decap_type(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_decap_type(desc);
+ return ab->hal_rx_ops->rx_desc_get_decap_type(desc);
}
static u8 ath12k_dp_rx_h_mesh_ctl_present(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_mesh_ctl(desc);
+ return ab->hal_rx_ops->rx_desc_get_mesh_ctl(desc);
}
static bool ath12k_dp_rx_h_seq_ctrl_valid(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_mpdu_seq_ctl_vld(desc);
+ return ab->hal_rx_ops->rx_desc_get_mpdu_seq_ctl_vld(desc);
}
static bool ath12k_dp_rx_h_fc_valid(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_mpdu_fc_valid(desc);
+ return ab->hal_rx_ops->rx_desc_get_mpdu_fc_valid(desc);
}
static bool ath12k_dp_rx_h_more_frags(struct ath12k_base *ab,
@@ -58,7 +58,7 @@ static bool ath12k_dp_rx_h_more_frags(struct ath12k_base *ab,
{
struct ieee80211_hdr *hdr;
- hdr = (struct ieee80211_hdr *)(skb->data + ab->hw_params->hal_desc_sz);
+ hdr = (struct ieee80211_hdr *)(skb->data + ab->hal.hal_desc_sz);
return ieee80211_has_morefrags(hdr->frame_control);
}
@@ -67,156 +67,156 @@ static u16 ath12k_dp_rx_h_frag_no(struct ath12k_base *ab,
{
struct ieee80211_hdr *hdr;
- hdr = (struct ieee80211_hdr *)(skb->data + ab->hw_params->hal_desc_sz);
+ hdr = (struct ieee80211_hdr *)(skb->data + ab->hal.hal_desc_sz);
return le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG;
}
static u16 ath12k_dp_rx_h_seq_no(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_mpdu_start_seq_no(desc);
+ return ab->hal_rx_ops->rx_desc_get_mpdu_start_seq_no(desc);
}
static bool ath12k_dp_rx_h_msdu_done(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->dp_rx_h_msdu_done(desc);
+ return ab->hal_rx_ops->dp_rx_h_msdu_done(desc);
}
static bool ath12k_dp_rx_h_l4_cksum_fail(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->dp_rx_h_l4_cksum_fail(desc);
+ return ab->hal_rx_ops->dp_rx_h_l4_cksum_fail(desc);
}
static bool ath12k_dp_rx_h_ip_cksum_fail(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->dp_rx_h_ip_cksum_fail(desc);
+ return ab->hal_rx_ops->dp_rx_h_ip_cksum_fail(desc);
}
static bool ath12k_dp_rx_h_is_decrypted(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->dp_rx_h_is_decrypted(desc);
+ return ab->hal_rx_ops->dp_rx_h_is_decrypted(desc);
}
u32 ath12k_dp_rx_h_mpdu_err(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->dp_rx_h_mpdu_err(desc);
+ return ab->hal_rx_ops->dp_rx_h_mpdu_err(desc);
}
static u16 ath12k_dp_rx_h_msdu_len(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_msdu_len(desc);
+ return ab->hal_rx_ops->rx_desc_get_msdu_len(desc);
}
static u8 ath12k_dp_rx_h_sgi(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_msdu_sgi(desc);
+ return ab->hal_rx_ops->rx_desc_get_msdu_sgi(desc);
}
static u8 ath12k_dp_rx_h_rate_mcs(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_msdu_rate_mcs(desc);
+ return ab->hal_rx_ops->rx_desc_get_msdu_rate_mcs(desc);
}
static u8 ath12k_dp_rx_h_rx_bw(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_msdu_rx_bw(desc);
+ return ab->hal_rx_ops->rx_desc_get_msdu_rx_bw(desc);
}
static u32 ath12k_dp_rx_h_freq(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_msdu_freq(desc);
+ return ab->hal_rx_ops->rx_desc_get_msdu_freq(desc);
}
static u8 ath12k_dp_rx_h_pkt_type(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_msdu_pkt_type(desc);
+ return ab->hal_rx_ops->rx_desc_get_msdu_pkt_type(desc);
}
static u8 ath12k_dp_rx_h_nss(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return hweight8(ab->hw_params->hal_ops->rx_desc_get_msdu_nss(desc));
+ return hweight8(ab->hal_rx_ops->rx_desc_get_msdu_nss(desc));
}
static u8 ath12k_dp_rx_h_tid(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_mpdu_tid(desc);
+ return ab->hal_rx_ops->rx_desc_get_mpdu_tid(desc);
}
static u16 ath12k_dp_rx_h_peer_id(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_mpdu_peer_id(desc);
+ return ab->hal_rx_ops->rx_desc_get_mpdu_peer_id(desc);
}
u8 ath12k_dp_rx_h_l3pad(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_l3_pad_bytes(desc);
+ return ab->hal_rx_ops->rx_desc_get_l3_pad_bytes(desc);
}
static bool ath12k_dp_rx_h_first_msdu(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_first_msdu(desc);
+ return ab->hal_rx_ops->rx_desc_get_first_msdu(desc);
}
static bool ath12k_dp_rx_h_last_msdu(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_last_msdu(desc);
+ return ab->hal_rx_ops->rx_desc_get_last_msdu(desc);
}
static void ath12k_dp_rx_desc_end_tlv_copy(struct ath12k_base *ab,
struct hal_rx_desc *fdesc,
struct hal_rx_desc *ldesc)
{
- ab->hw_params->hal_ops->rx_desc_copy_end_tlv(fdesc, ldesc);
+ ab->hal_rx_ops->rx_desc_copy_end_tlv(fdesc, ldesc);
}
static void ath12k_dp_rxdesc_set_msdu_len(struct ath12k_base *ab,
struct hal_rx_desc *desc,
u16 len)
{
- ab->hw_params->hal_ops->rx_desc_set_msdu_len(desc, len);
+ ab->hal_rx_ops->rx_desc_set_msdu_len(desc, len);
}
static bool ath12k_dp_rx_h_is_da_mcbc(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
return (ath12k_dp_rx_h_first_msdu(ab, desc) &&
- ab->hw_params->hal_ops->rx_desc_is_da_mcbc(desc));
+ ab->hal_rx_ops->rx_desc_is_da_mcbc(desc));
}
static bool ath12k_dp_rxdesc_mac_addr2_valid(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_mac_addr2_valid(desc);
+ return ab->hal_rx_ops->rx_desc_mac_addr2_valid(desc);
}
static u8 *ath12k_dp_rxdesc_get_mpdu_start_addr2(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_mpdu_start_addr2(desc);
+ return ab->hal_rx_ops->rx_desc_mpdu_start_addr2(desc);
}
static void ath12k_dp_rx_desc_get_dot11_hdr(struct ath12k_base *ab,
struct hal_rx_desc *desc,
struct ieee80211_hdr *hdr)
{
- ab->hw_params->hal_ops->rx_desc_get_dot11_hdr(desc, hdr);
+ ab->hal_rx_ops->rx_desc_get_dot11_hdr(desc, hdr);
}
static void ath12k_dp_rx_desc_get_crypto_header(struct ath12k_base *ab,
@@ -224,13 +224,19 @@ static void ath12k_dp_rx_desc_get_crypto_header(struct ath12k_base *ab,
u8 *crypto_hdr,
enum hal_encrypt_type enctype)
{
- ab->hw_params->hal_ops->rx_desc_get_crypto_header(desc, crypto_hdr, enctype);
+ ab->hal_rx_ops->rx_desc_get_crypto_header(desc, crypto_hdr, enctype);
}
static u16 ath12k_dp_rxdesc_get_mpdu_frame_ctrl(struct ath12k_base *ab,
struct hal_rx_desc *desc)
{
- return ab->hw_params->hal_ops->rx_desc_get_mpdu_frame_ctl(desc);
+ return ab->hal_rx_ops->rx_desc_get_mpdu_frame_ctl(desc);
+}
+
+static inline u8 ath12k_dp_rx_get_msdu_src_link(struct ath12k_base *ab,
+ struct hal_rx_desc *desc)
+{
+ return ab->hal_rx_ops->rx_desc_get_msdu_src_link_id(desc);
}
static int ath12k_dp_purge_mon_ring(struct ath12k_base *ab)
@@ -1761,7 +1767,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar,
int buf_first_hdr_len, buf_first_len;
struct hal_rx_desc *ldesc;
int space_extra, rem_len, buf_len;
- u32 hal_rx_desc_sz = ar->ab->hw_params->hal_desc_sz;
+ u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz;
/* As the msdu is spread across multiple rx buffers,
* find the offset to the start of msdu for computing
@@ -2458,7 +2464,7 @@ static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *nap
!(is_mcbc && rx_status->flag & RX_FLAG_DECRYPTED))
rx_status->flag |= RX_FLAG_8023;
- ieee80211_rx_napi(ar->hw, pubsta, msdu, napi);
+ ieee80211_rx_napi(ath12k_ar_to_hw(ar), pubsta, msdu, napi);
}
static int ath12k_dp_rx_process_msdu(struct ath12k *ar,
@@ -2473,7 +2479,7 @@ static int ath12k_dp_rx_process_msdu(struct ath12k *ar,
u8 l3_pad_bytes;
u16 msdu_len;
int ret;
- u32 hal_rx_desc_sz = ar->ab->hw_params->hal_desc_sz;
+ u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz;
last_buf = ath12k_dp_rx_get_msdu_last_buf(msdu_list, msdu);
if (!last_buf) {
@@ -2804,7 +2810,7 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer
u8 mic[IEEE80211_CCMP_MIC_LEN];
int head_len, tail_len, ret;
size_t data_len;
- u32 hdr_len, hal_rx_desc_sz = ar->ab->hw_params->hal_desc_sz;
+ u32 hdr_len, hal_rx_desc_sz = ar->ab->hal.hal_desc_sz;
u8 *key, *data;
u8 key_idx;
@@ -2844,7 +2850,7 @@ mic_fail:
ath12k_dp_rx_h_ppdu(ar, rx_desc, rxs);
ath12k_dp_rx_h_undecap(ar, msdu, rx_desc,
HAL_ENCRYPT_TYPE_TKIP_MIC, rxs, true);
- ieee80211_rx(ar->hw, msdu);
+ ieee80211_rx(ath12k_ar_to_hw(ar), msdu);
return -EINVAL;
}
@@ -2854,7 +2860,7 @@ static void ath12k_dp_rx_h_undecap_frag(struct ath12k *ar, struct sk_buff *msdu,
struct ieee80211_hdr *hdr;
size_t hdr_len;
size_t crypto_len;
- u32 hal_rx_desc_sz = ar->ab->hw_params->hal_desc_sz;
+ u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz;
if (!flags)
return;
@@ -2892,7 +2898,7 @@ static int ath12k_dp_rx_h_defrag(struct ath12k *ar,
bool is_decrypted = false;
int msdu_len = 0;
int extra_space;
- u32 flags, hal_rx_desc_sz = ar->ab->hw_params->hal_desc_sz;
+ u32 flags, hal_rx_desc_sz = ar->ab->hal.hal_desc_sz;
first_frag = skb_peek(&rx_tid->rx_frags);
last_frag = skb_peek_tail(&rx_tid->rx_frags);
@@ -2968,7 +2974,7 @@ static int ath12k_dp_rx_h_defrag_reo_reinject(struct ath12k *ar,
struct ath12k_rx_desc_info *desc_info;
u8 dst_ind;
- hal_rx_desc_sz = ab->hw_params->hal_desc_sz;
+ hal_rx_desc_sz = ab->hal.hal_desc_sz;
link_desc_banks = dp->link_desc_banks;
reo_dest_ring = rx_tid->dst_ring_desc;
@@ -3122,7 +3128,7 @@ static u64 ath12k_dp_rx_h_get_pn(struct ath12k *ar, struct sk_buff *skb)
struct ieee80211_hdr *hdr;
u64 pn = 0;
u8 *ehdr;
- u32 hal_rx_desc_sz = ar->ab->hw_params->hal_desc_sz;
+ u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz;
hdr = (struct ieee80211_hdr *)(skb->data + hal_rx_desc_sz);
ehdr = skb->data + hal_rx_desc_sz + ieee80211_hdrlen(hdr->frame_control);
@@ -3305,7 +3311,7 @@ ath12k_dp_process_rx_err_buf(struct ath12k *ar, struct hal_reo_dest_ring *desc,
struct ath12k_skb_rxcb *rxcb;
struct hal_rx_desc *rx_desc;
u16 msdu_len;
- u32 hal_rx_desc_sz = ab->hw_params->hal_desc_sz;
+ u32 hal_rx_desc_sz = ab->hal.hal_desc_sz;
struct ath12k_rx_desc_info *desc_info;
u64 desc_va;
@@ -3486,7 +3492,7 @@ static void ath12k_dp_rx_null_q_desc_sg_drop(struct ath12k *ar,
int n_buffs;
n_buffs = DIV_ROUND_UP(msdu_len,
- (DP_RX_BUFFER_SIZE - ar->ab->hw_params->hal_desc_sz));
+ (DP_RX_BUFFER_SIZE - ar->ab->hal.hal_desc_sz));
skb_queue_walk_safe(msdu_list, skb, tmp) {
rxcb = ATH12K_SKB_RXCB(skb);
@@ -3510,7 +3516,7 @@ static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu,
struct hal_rx_desc *desc = (struct hal_rx_desc *)msdu->data;
u8 l3pad_bytes;
struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu);
- u32 hal_rx_desc_sz = ar->ab->hw_params->hal_desc_sz;
+ u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz;
msdu_len = ath12k_dp_rx_h_msdu_len(ab, desc);
@@ -3607,7 +3613,7 @@ static void ath12k_dp_rx_h_tkip_mic_err(struct ath12k *ar, struct sk_buff *msdu,
struct hal_rx_desc *desc = (struct hal_rx_desc *)msdu->data;
u8 l3pad_bytes;
struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu);
- u32 hal_rx_desc_sz = ar->ab->hw_params->hal_desc_sz;
+ u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz;
rxcb->is_first_msdu = ath12k_dp_rx_h_first_msdu(ab, desc);
rxcb->is_last_msdu = ath12k_dp_rx_h_last_msdu(ab, desc);
@@ -3695,16 +3701,15 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab,
struct hal_rx_wbm_rel_info err_info;
struct hal_srng *srng;
struct sk_buff *msdu;
- struct sk_buff_head msdu_list[MAX_RADIOS];
+ struct sk_buff_head msdu_list;
struct ath12k_skb_rxcb *rxcb;
void *rx_desc;
- int mac_id;
+ u8 mac_id;
int num_buffs_reaped = 0;
struct ath12k_rx_desc_info *desc_info;
- int ret, i;
+ int ret, pdev_id;
- for (i = 0; i < ab->num_radios; i++)
- __skb_queue_head_init(&msdu_list[i]);
+ __skb_queue_head_init(&msdu_list);
srng = &ab->hal.srng_list[dp->rx_rel_ring.ring_id];
rx_ring = &dp->rx_refill_buf_ring;
@@ -3737,11 +3742,6 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab,
}
}
- /* FIXME: Extract mac id correctly. Since descs are not tied
- * to mac, we can extract from vdev id in ring desc.
- */
- mac_id = 0;
-
if (desc_info->magic != ATH12K_DP_RX_DESC_MAGIC)
ath12k_warn(ab, "WBM RX err, Check HW CC implementation");
@@ -3771,7 +3771,8 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab,
rxcb->err_rel_src = err_info.err_rel_src;
rxcb->err_code = err_info.err_code;
rxcb->rx_desc = (struct hal_rx_desc *)msdu->data;
- __skb_queue_tail(&msdu_list[mac_id], msdu);
+
+ __skb_queue_tail(&msdu_list, msdu);
rxcb->is_first_msdu = err_info.first_msdu;
rxcb->is_last_msdu = err_info.last_msdu;
@@ -3788,21 +3789,22 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab,
ath12k_dp_rx_bufs_replenish(ab, rx_ring, num_buffs_reaped);
rcu_read_lock();
- for (i = 0; i < ab->num_radios; i++) {
- if (!rcu_dereference(ab->pdevs_active[i])) {
- __skb_queue_purge(&msdu_list[i]);
+ while ((msdu = __skb_dequeue(&msdu_list))) {
+ mac_id = ath12k_dp_rx_get_msdu_src_link(ab,
+ (struct hal_rx_desc *)msdu->data);
+ pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, mac_id);
+ ar = ab->pdevs[pdev_id].ar;
+
+ if (!ar || !rcu_dereference(ar->ab->pdevs_active[mac_id])) {
+ dev_kfree_skb_any(msdu);
continue;
}
- ar = ab->pdevs[i].ar;
-
if (test_bit(ATH12K_CAC_RUNNING, &ar->dev_flags)) {
- __skb_queue_purge(&msdu_list[i]);
+ dev_kfree_skb_any(msdu);
continue;
}
-
- while ((msdu = __skb_dequeue(&msdu_list[i])) != NULL)
- ath12k_dp_rx_wbm_err(ar, napi, msdu, &msdu_list[i]);
+ ath12k_dp_rx_wbm_err(ar, napi, msdu, &msdu_list);
}
rcu_read_unlock();
done:
@@ -3922,7 +3924,7 @@ int ath12k_dp_rxdma_ring_sel_config_qcn9274(struct ath12k_base *ab)
struct htt_rx_ring_tlv_filter tlv_filter = {0};
u32 ring_id;
int ret;
- u32 hal_rx_desc_sz = ab->hw_params->hal_desc_sz;
+ u32 hal_rx_desc_sz = ab->hal.hal_desc_sz;
ring_id = dp->rx_refill_buf_ring.refill_buf_ring.ring_id;
@@ -3935,14 +3937,20 @@ int ath12k_dp_rxdma_ring_sel_config_qcn9274(struct ath12k_base *ab)
tlv_filter.rx_packet_offset = hal_rx_desc_sz;
tlv_filter.rx_mpdu_start_offset =
- ab->hw_params->hal_ops->rx_desc_get_mpdu_start_offset();
+ ab->hal_rx_ops->rx_desc_get_mpdu_start_offset();
tlv_filter.rx_msdu_end_offset =
- ab->hw_params->hal_ops->rx_desc_get_msdu_end_offset();
+ ab->hal_rx_ops->rx_desc_get_msdu_end_offset();
+
+ if (ath12k_dp_wmask_compaction_rx_tlv_supported(ab)) {
+ tlv_filter.rx_mpdu_start_wmask =
+ ab->hw_params->hal_ops->rxdma_ring_wmask_rx_mpdu_start();
+ tlv_filter.rx_msdu_end_wmask =
+ ab->hw_params->hal_ops->rxdma_ring_wmask_rx_msdu_end();
+ ath12k_dbg(ab, ATH12K_DBG_DATA,
+ "Configuring compact tlv masks rx_mpdu_start_wmask 0x%x rx_msdu_end_wmask 0x%x\n",
+ tlv_filter.rx_mpdu_start_wmask, tlv_filter.rx_msdu_end_wmask);
+ }
- /* TODO: Selectively subscribe to required qwords within msdu_end
- * and mpdu_start and setup the mask in below msg
- * and modify the rx_desc struct
- */
ret = ath12k_dp_tx_htt_rx_filter_setup(ab, ring_id, 0,
HAL_RXDMA_BUF,
DP_RXDMA_REFILL_RING_SIZE,
@@ -3957,7 +3965,7 @@ int ath12k_dp_rxdma_ring_sel_config_wcn7850(struct ath12k_base *ab)
struct htt_rx_ring_tlv_filter tlv_filter = {0};
u32 ring_id;
int ret;
- u32 hal_rx_desc_sz = ab->hw_params->hal_desc_sz;
+ u32 hal_rx_desc_sz = ab->hal.hal_desc_sz;
int i;
ring_id = dp->rx_refill_buf_ring.refill_buf_ring.ring_id;
@@ -3973,9 +3981,9 @@ int ath12k_dp_rxdma_ring_sel_config_wcn7850(struct ath12k_base *ab)
tlv_filter.rx_header_offset = offsetof(struct hal_rx_desc_wcn7850, pkt_hdr_tlv);
tlv_filter.rx_mpdu_start_offset =
- ab->hw_params->hal_ops->rx_desc_get_mpdu_start_offset();
+ ab->hal_rx_ops->rx_desc_get_mpdu_start_offset();
tlv_filter.rx_msdu_end_offset =
- ab->hw_params->hal_ops->rx_desc_get_msdu_end_offset();
+ ab->hal_rx_ops->rx_desc_get_msdu_end_offset();
/* TODO: Selectively subscribe to required qwords within msdu_end
* and mpdu_start and setup the mask in below msg
@@ -4086,7 +4094,7 @@ int ath12k_dp_rx_alloc(struct ath12k_base *ab)
ret = ath12k_dp_srng_setup(ab,
&dp->rx_mac_buf_ring[i],
HAL_RXDMA_BUF, 1,
- i, 1024);
+ i, DP_RX_MAC_BUF_RING_SIZE);
if (ret) {
ath12k_warn(ab, "failed to setup rx_mac_buf_ring %d\n",
i);
diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c
index 62f9cdbb811c..572b87153647 100644
--- a/drivers/net/wireless/ath/ath12k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath12k/dp_tx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "core.h"
@@ -151,7 +151,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_vif *arvif,
if (!(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) &&
!ieee80211_is_data(hdr->frame_control))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
pool_id = skb_get_queue_mapping(skb) & (ATH12K_HW_MAX_QUEUES - 1);
@@ -401,7 +401,7 @@ ath12k_dp_tx_htt_tx_complete_buf(struct ath12k_base *ab,
}
}
- ieee80211_tx_status_skb(ar->hw, msdu);
+ ieee80211_tx_status_skb(ath12k_ar_to_hw(ar), msdu);
}
static void
@@ -498,7 +498,7 @@ static void ath12k_dp_tx_complete_msdu(struct ath12k *ar,
* Might end up reporting it out-of-band from HTT stats.
*/
- ieee80211_tx_status_skb(ar->hw, msdu);
+ ieee80211_tx_status_skb(ath12k_ar_to_hw(ar), msdu);
exit:
rcu_read_unlock();
@@ -837,7 +837,7 @@ int ath12k_dp_tx_htt_h2t_ver_req_msg(struct ath12k_base *ab)
if (dp->htt_tgt_ver_major != HTT_TARGET_VERSION_MAJOR) {
ath12k_err(ab, "unsupported htt major version %d supported version is %d\n",
dp->htt_tgt_ver_major, HTT_TARGET_VERSION_MAJOR);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
return 0;
@@ -964,6 +964,26 @@ int ath12k_dp_tx_htt_rx_filter_setup(struct ath12k_base *ab, u32 ring_id,
HTT_RX_RING_SELECTION_CFG_RX_ATTENTION_OFFSET);
}
+ if (tlv_filter->rx_mpdu_start_wmask > 0 &&
+ tlv_filter->rx_msdu_end_wmask > 0) {
+ cmd->info2 |=
+ le32_encode_bits(true,
+ HTT_RX_RING_SELECTION_CFG_WORD_MASK_COMPACT_SET);
+ cmd->rx_mpdu_start_end_mask =
+ le32_encode_bits(tlv_filter->rx_mpdu_start_wmask,
+ HTT_RX_RING_SELECTION_CFG_RX_MPDU_START_MASK);
+ /* mpdu_end is not used for any hardwares so far
+ * please assign it in future if any chip is
+ * using through hal ops
+ */
+ cmd->rx_mpdu_start_end_mask |=
+ le32_encode_bits(tlv_filter->rx_mpdu_end_wmask,
+ HTT_RX_RING_SELECTION_CFG_RX_MPDU_END_MASK);
+ cmd->rx_msdu_end_word_mask =
+ le32_encode_bits(tlv_filter->rx_msdu_end_wmask,
+ HTT_RX_RING_SELECTION_CFG_RX_MSDU_END_MASK);
+ }
+
ret = ath12k_htc_send(&ab->htc, ab->dp.eid, skb);
if (ret)
goto err_free;
diff --git a/drivers/net/wireless/ath/ath12k/fw.c b/drivers/net/wireless/ath/ath12k/fw.c
new file mode 100644
index 000000000000..5be4b2d4a19d
--- /dev/null
+++ b/drivers/net/wireless/ath/ath12k/fw.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: BSD-3-Clause-Clear
+/*
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "core.h"
+
+#include "debug.h"
+
+static int ath12k_fw_request_firmware_api_n(struct ath12k_base *ab,
+ const char *name)
+{
+ size_t magic_len, len, ie_len;
+ int ie_id, i, index, bit, ret;
+ struct ath12k_fw_ie *hdr;
+ const u8 *data;
+ __le32 *timestamp;
+
+ ab->fw.fw = ath12k_core_firmware_request(ab, name);
+ if (IS_ERR(ab->fw.fw)) {
+ ret = PTR_ERR(ab->fw.fw);
+ ath12k_dbg(ab, ATH12K_DBG_BOOT, "failed to load %s: %d\n", name, ret);
+ ab->fw.fw = NULL;
+ return ret;
+ }
+
+ data = ab->fw.fw->data;
+ len = ab->fw.fw->size;
+
+ /* magic also includes the null byte, check that as well */
+ magic_len = strlen(ATH12K_FIRMWARE_MAGIC) + 1;
+
+ if (len < magic_len) {
+ ath12k_err(ab, "firmware image too small to contain magic: %zu\n",
+ len);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (memcmp(data, ATH12K_FIRMWARE_MAGIC, magic_len) != 0) {
+ ath12k_err(ab, "Invalid firmware magic\n");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /* jump over the padding */
+ magic_len = ALIGN(magic_len, 4);
+
+ /* make sure there's space for padding */
+ if (magic_len > len) {
+ ath12k_err(ab, "No space for padding after magic\n");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ len -= magic_len;
+ data += magic_len;
+
+ /* loop elements */
+ while (len > sizeof(struct ath12k_fw_ie)) {
+ hdr = (struct ath12k_fw_ie *)data;
+
+ ie_id = le32_to_cpu(hdr->id);
+ ie_len = le32_to_cpu(hdr->len);
+
+ len -= sizeof(*hdr);
+ data += sizeof(*hdr);
+
+ if (len < ie_len) {
+ ath12k_err(ab, "Invalid length for FW IE %d (%zu < %zu)\n",
+ ie_id, len, ie_len);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ switch (ie_id) {
+ case ATH12K_FW_IE_TIMESTAMP:
+ if (ie_len != sizeof(u32))
+ break;
+
+ timestamp = (__le32 *)data;
+
+ ath12k_dbg(ab, ATH12K_DBG_BOOT, "found fw timestamp %d\n",
+ le32_to_cpup(timestamp));
+ break;
+ case ATH12K_FW_IE_FEATURES:
+ ath12k_dbg(ab, ATH12K_DBG_BOOT,
+ "found firmware features ie (%zd B)\n",
+ ie_len);
+
+ for (i = 0; i < ATH12K_FW_FEATURE_COUNT; i++) {
+ index = i / 8;
+ bit = i % 8;
+
+ if (index == ie_len)
+ break;
+
+ if (data[index] & (1 << bit))
+ __set_bit(i, ab->fw.fw_features);
+ }
+
+ ath12k_dbg_dump(ab, ATH12K_DBG_BOOT, "features", "",
+ ab->fw.fw_features,
+ sizeof(ab->fw.fw_features));
+ break;
+ case ATH12K_FW_IE_AMSS_IMAGE:
+ ath12k_dbg(ab, ATH12K_DBG_BOOT,
+ "found fw image ie (%zd B)\n",
+ ie_len);
+
+ ab->fw.amss_data = data;
+ ab->fw.amss_len = ie_len;
+ break;
+ case ATH12K_FW_IE_M3_IMAGE:
+ ath12k_dbg(ab, ATH12K_DBG_BOOT,
+ "found m3 image ie (%zd B)\n",
+ ie_len);
+
+ ab->fw.m3_data = data;
+ ab->fw.m3_len = ie_len;
+ break;
+ case ATH12K_FW_IE_AMSS_DUALMAC_IMAGE:
+ ath12k_dbg(ab, ATH12K_DBG_BOOT,
+ "found dualmac fw image ie (%zd B)\n",
+ ie_len);
+ ab->fw.amss_dualmac_data = data;
+ ab->fw.amss_dualmac_len = ie_len;
+ break;
+ default:
+ ath12k_warn(ab, "Unknown FW IE: %u\n", ie_id);
+ break;
+ }
+
+ /* jump over the padding */
+ ie_len = ALIGN(ie_len, 4);
+
+ /* make sure there's space for padding */
+ if (ie_len > len)
+ break;
+
+ len -= ie_len;
+ data += ie_len;
+ }
+
+ return 0;
+
+err:
+ release_firmware(ab->fw.fw);
+ ab->fw.fw = NULL;
+ return ret;
+}
+
+void ath12k_fw_map(struct ath12k_base *ab)
+{
+ int ret;
+
+ ret = ath12k_fw_request_firmware_api_n(ab, ATH12K_FW_API2_FILE);
+ if (ret == 0)
+ ab->fw.api_version = 2;
+ else
+ ab->fw.api_version = 1;
+
+ ath12k_dbg(ab, ATH12K_DBG_BOOT, "using fw api %d\n",
+ ab->fw.api_version);
+}
+
+void ath12k_fw_unmap(struct ath12k_base *ab)
+{
+ release_firmware(ab->fw.fw);
+ memset(&ab->fw, 0, sizeof(ab->fw));
+}
diff --git a/drivers/net/wireless/ath/ath12k/fw.h b/drivers/net/wireless/ath/ath12k/fw.h
new file mode 100644
index 000000000000..3ff041f15fa0
--- /dev/null
+++ b/drivers/net/wireless/ath/ath12k/fw.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: BSD-3-Clause-Clear */
+/*
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef ATH12K_FW_H
+#define ATH12K_FW_H
+
+#define ATH12K_FW_API2_FILE "firmware-2.bin"
+#define ATH12K_FIRMWARE_MAGIC "QCOM-ATH12K-FW"
+
+enum ath12k_fw_ie_type {
+ ATH12K_FW_IE_TIMESTAMP = 0,
+ ATH12K_FW_IE_FEATURES = 1,
+ ATH12K_FW_IE_AMSS_IMAGE = 2,
+ ATH12K_FW_IE_M3_IMAGE = 3,
+ ATH12K_FW_IE_AMSS_DUALMAC_IMAGE = 4,
+};
+
+enum ath12k_fw_features {
+ /* The firmware supports setting the QRTR id via register
+ * PCIE_LOCAL_REG_QRTR_NODE_ID
+ */
+ ATH12K_FW_FEATURE_MULTI_QRTR_ID = 0,
+
+ /* keep last */
+ ATH12K_FW_FEATURE_COUNT,
+};
+
+void ath12k_fw_map(struct ath12k_base *ab);
+void ath12k_fw_unmap(struct ath12k_base *ab);
+
+#endif /* ATH12K_FW_H */
diff --git a/drivers/net/wireless/ath/ath12k/hal.c b/drivers/net/wireless/ath/ath12k/hal.c
index a489369d8068..78310da8cfe8 100644
--- a/drivers/net/wireless/ath/ath12k/hal.c
+++ b/drivers/net/wireless/ath/ath12k/hal.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/dma-mapping.h>
#include "hal_tx.h"
@@ -449,8 +449,8 @@ static u8 *ath12k_hw_qcn9274_rx_desc_mpdu_start_addr2(struct hal_rx_desc *desc)
static bool ath12k_hw_qcn9274_rx_desc_is_da_mcbc(struct hal_rx_desc *desc)
{
- return __le16_to_cpu(desc->u.qcn9274.msdu_end.info5) &
- RX_MSDU_END_INFO5_DA_IS_MCBC;
+ return __le32_to_cpu(desc->u.qcn9274.mpdu_start.info6) &
+ RX_MPDU_START_INFO6_MCAST_BCAST;
}
static void ath12k_hw_qcn9274_rx_desc_get_dot11_hdr(struct hal_rx_desc *desc,
@@ -626,6 +626,21 @@ static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab)
return 0;
}
+static u16 ath12k_hal_qcn9274_rx_mpdu_start_wmask_get(void)
+{
+ return QCN9274_MPDU_START_WMASK;
+}
+
+static u32 ath12k_hal_qcn9274_rx_msdu_end_wmask_get(void)
+{
+ return QCN9274_MSDU_END_WMASK;
+}
+
+static const struct hal_rx_ops *ath12k_hal_qcn9274_get_hal_rx_compact_ops(void)
+{
+ return &hal_rx_qcn9274_compact_ops;
+}
+
static bool ath12k_hw_qcn9274_dp_rx_h_msdu_done(struct hal_rx_desc *desc)
{
return !!le32_get_bits(desc->u.qcn9274.msdu_end.info14,
@@ -680,7 +695,17 @@ static u32 ath12k_hw_qcn9274_dp_rx_h_mpdu_err(struct hal_rx_desc *desc)
return errmap;
}
-const struct hal_ops hal_qcn9274_ops = {
+static u32 ath12k_hw_qcn9274_get_rx_desc_size(void)
+{
+ return sizeof(struct hal_rx_desc_qcn9274);
+}
+
+static u8 ath12k_hw_qcn9274_rx_desc_get_msdu_src_link(struct hal_rx_desc *desc)
+{
+ return 0;
+}
+
+const struct hal_rx_ops hal_rx_qcn9274_ops = {
.rx_desc_get_first_msdu = ath12k_hw_qcn9274_rx_desc_get_first_msdu,
.rx_desc_get_last_msdu = ath12k_hw_qcn9274_rx_desc_get_last_msdu,
.rx_desc_get_l3_pad_bytes = ath12k_hw_qcn9274_rx_desc_get_l3_pad_bytes,
@@ -712,13 +737,367 @@ const struct hal_ops hal_qcn9274_ops = {
.rx_desc_get_dot11_hdr = ath12k_hw_qcn9274_rx_desc_get_dot11_hdr,
.rx_desc_get_crypto_header = ath12k_hw_qcn9274_rx_desc_get_crypto_hdr,
.rx_desc_get_mpdu_frame_ctl = ath12k_hw_qcn9274_rx_desc_get_mpdu_frame_ctl,
- .create_srng_config = ath12k_hal_srng_create_config_qcn9274,
- .tcl_to_wbm_rbm_map = ath12k_hal_qcn9274_tcl_to_wbm_rbm_map,
.dp_rx_h_msdu_done = ath12k_hw_qcn9274_dp_rx_h_msdu_done,
.dp_rx_h_l4_cksum_fail = ath12k_hw_qcn9274_dp_rx_h_l4_cksum_fail,
.dp_rx_h_ip_cksum_fail = ath12k_hw_qcn9274_dp_rx_h_ip_cksum_fail,
.dp_rx_h_is_decrypted = ath12k_hw_qcn9274_dp_rx_h_is_decrypted,
.dp_rx_h_mpdu_err = ath12k_hw_qcn9274_dp_rx_h_mpdu_err,
+ .rx_desc_get_desc_size = ath12k_hw_qcn9274_get_rx_desc_size,
+ .rx_desc_get_msdu_src_link_id = ath12k_hw_qcn9274_rx_desc_get_msdu_src_link,
+};
+
+static bool ath12k_hw_qcn9274_compact_rx_desc_get_first_msdu(struct hal_rx_desc *desc)
+{
+ return !!le16_get_bits(desc->u.qcn9274_compact.msdu_end.info5,
+ RX_MSDU_END_INFO5_FIRST_MSDU);
+}
+
+static bool ath12k_hw_qcn9274_compact_rx_desc_get_last_msdu(struct hal_rx_desc *desc)
+{
+ return !!le16_get_bits(desc->u.qcn9274_compact.msdu_end.info5,
+ RX_MSDU_END_INFO5_LAST_MSDU);
+}
+
+static u8 ath12k_hw_qcn9274_compact_rx_desc_get_l3_pad_bytes(struct hal_rx_desc *desc)
+{
+ return le16_get_bits(desc->u.qcn9274_compact.msdu_end.info5,
+ RX_MSDU_END_INFO5_L3_HDR_PADDING);
+}
+
+static bool ath12k_hw_qcn9274_compact_rx_desc_encrypt_valid(struct hal_rx_desc *desc)
+{
+ return !!le32_get_bits(desc->u.qcn9274_compact.mpdu_start.info4,
+ RX_MPDU_START_INFO4_ENCRYPT_INFO_VALID);
+}
+
+static u32 ath12k_hw_qcn9274_compact_rx_desc_get_encrypt_type(struct hal_rx_desc *desc)
+{
+ return le32_get_bits(desc->u.qcn9274_compact.mpdu_start.info2,
+ RX_MPDU_START_INFO2_ENC_TYPE);
+}
+
+static u8 ath12k_hw_qcn9274_compact_rx_desc_get_decap_type(struct hal_rx_desc *desc)
+{
+ return le32_get_bits(desc->u.qcn9274_compact.msdu_end.info11,
+ RX_MSDU_END_INFO11_DECAP_FORMAT);
+}
+
+static u8 ath12k_hw_qcn9274_compact_rx_desc_get_mesh_ctl(struct hal_rx_desc *desc)
+{
+ return le32_get_bits(desc->u.qcn9274.msdu_end.info11,
+ RX_MSDU_END_INFO11_MESH_CTRL_PRESENT);
+}
+
+static bool
+ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_seq_ctl_vld(struct hal_rx_desc *desc)
+{
+ return !!le32_get_bits(desc->u.qcn9274_compact.mpdu_start.info4,
+ RX_MPDU_START_INFO4_MPDU_SEQ_CTRL_VALID);
+}
+
+static bool ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_fc_valid(struct hal_rx_desc *desc)
+{
+ return !!le32_get_bits(desc->u.qcn9274_compact.mpdu_start.info4,
+ RX_MPDU_START_INFO4_MPDU_FCTRL_VALID);
+}
+
+static u16
+ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_start_seq_no(struct hal_rx_desc *desc)
+{
+ return le32_get_bits(desc->u.qcn9274_compact.mpdu_start.info4,
+ RX_MPDU_START_INFO4_MPDU_SEQ_NUM);
+}
+
+static u16 ath12k_hw_qcn9274_compact_rx_desc_get_msdu_len(struct hal_rx_desc *desc)
+{
+ return le32_get_bits(desc->u.qcn9274_compact.msdu_end.info10,
+ RX_MSDU_END_INFO10_MSDU_LENGTH);
+}
+
+static u8 ath12k_hw_qcn9274_compact_rx_desc_get_msdu_sgi(struct hal_rx_desc *desc)
+{
+ return le32_get_bits(desc->u.qcn9274_compact.msdu_end.info12,
+ RX_MSDU_END_INFO12_SGI);
+}
+
+static u8 ath12k_hw_qcn9274_compact_rx_desc_get_msdu_rate_mcs(struct hal_rx_desc *desc)
+{
+ return le32_get_bits(desc->u.qcn9274_compact.msdu_end.info12,
+ RX_MSDU_END_INFO12_RATE_MCS);
+}
+
+static u8 ath12k_hw_qcn9274_compact_rx_desc_get_msdu_rx_bw(struct hal_rx_desc *desc)
+{
+ return le32_get_bits(desc->u.qcn9274_compact.msdu_end.info12,
+ RX_MSDU_END_INFO12_RECV_BW);
+}
+
+static u32 ath12k_hw_qcn9274_compact_rx_desc_get_msdu_freq(struct hal_rx_desc *desc)
+{
+ return __le32_to_cpu(desc->u.qcn9274_compact.msdu_end.phy_meta_data);
+}
+
+static u8 ath12k_hw_qcn9274_compact_rx_desc_get_msdu_pkt_type(struct hal_rx_desc *desc)
+{
+ return le32_get_bits(desc->u.qcn9274_compact.msdu_end.info12,
+ RX_MSDU_END_INFO12_PKT_TYPE);
+}
+
+static u8 ath12k_hw_qcn9274_compact_rx_desc_get_msdu_nss(struct hal_rx_desc *desc)
+{
+ return le32_get_bits(desc->u.qcn9274_compact.msdu_end.info12,
+ RX_MSDU_END_QCN9274_INFO12_MIMO_SS_BITMAP);
+}
+
+static u8 ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_tid(struct hal_rx_desc *desc)
+{
+ return le16_get_bits(desc->u.qcn9274_compact.msdu_end.info5,
+ RX_MSDU_END_QCN9274_INFO5_TID);
+}
+
+static u16 ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_peer_id(struct hal_rx_desc *desc)
+{
+ return __le16_to_cpu(desc->u.qcn9274_compact.mpdu_start.sw_peer_id);
+}
+
+static void ath12k_hw_qcn9274_compact_rx_desc_copy_end_tlv(struct hal_rx_desc *fdesc,
+ struct hal_rx_desc *ldesc)
+{
+ fdesc->u.qcn9274_compact.msdu_end = ldesc->u.qcn9274_compact.msdu_end;
+}
+
+static u32 ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_ppdu_id(struct hal_rx_desc *desc)
+{
+ return __le16_to_cpu(desc->u.qcn9274_compact.mpdu_start.phy_ppdu_id);
+}
+
+static void
+ath12k_hw_qcn9274_compact_rx_desc_set_msdu_len(struct hal_rx_desc *desc, u16 len)
+{
+ u32 info = __le32_to_cpu(desc->u.qcn9274_compact.msdu_end.info10);
+
+ info = u32_replace_bits(info, len, RX_MSDU_END_INFO10_MSDU_LENGTH);
+ desc->u.qcn9274_compact.msdu_end.info10 = __cpu_to_le32(info);
+}
+
+static u8 *ath12k_hw_qcn9274_compact_rx_desc_get_msdu_payload(struct hal_rx_desc *desc)
+{
+ return &desc->u.qcn9274_compact.msdu_payload[0];
+}
+
+static u32 ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_start_offset(void)
+{
+ return offsetof(struct hal_rx_desc_qcn9274_compact, mpdu_start);
+}
+
+static u32 ath12k_hw_qcn9274_compact_rx_desc_get_msdu_end_offset(void)
+{
+ return offsetof(struct hal_rx_desc_qcn9274_compact, msdu_end);
+}
+
+static bool ath12k_hw_qcn9274_compact_rx_desc_mac_addr2_valid(struct hal_rx_desc *desc)
+{
+ return __le32_to_cpu(desc->u.qcn9274_compact.mpdu_start.info4) &
+ RX_MPDU_START_INFO4_MAC_ADDR2_VALID;
+}
+
+static u8 *ath12k_hw_qcn9274_compact_rx_desc_mpdu_start_addr2(struct hal_rx_desc *desc)
+{
+ return desc->u.qcn9274_compact.mpdu_start.addr2;
+}
+
+static bool ath12k_hw_qcn9274_compact_rx_desc_is_da_mcbc(struct hal_rx_desc *desc)
+{
+ return __le32_to_cpu(desc->u.qcn9274_compact.mpdu_start.info6) &
+ RX_MPDU_START_INFO6_MCAST_BCAST;
+}
+
+static void ath12k_hw_qcn9274_compact_rx_desc_get_dot11_hdr(struct hal_rx_desc *desc,
+ struct ieee80211_hdr *hdr)
+{
+ hdr->frame_control = desc->u.qcn9274_compact.mpdu_start.frame_ctrl;
+ hdr->duration_id = desc->u.qcn9274_compact.mpdu_start.duration;
+ ether_addr_copy(hdr->addr1, desc->u.qcn9274_compact.mpdu_start.addr1);
+ ether_addr_copy(hdr->addr2, desc->u.qcn9274_compact.mpdu_start.addr2);
+ ether_addr_copy(hdr->addr3, desc->u.qcn9274_compact.mpdu_start.addr3);
+ if (__le32_to_cpu(desc->u.qcn9274_compact.mpdu_start.info4) &
+ RX_MPDU_START_INFO4_MAC_ADDR4_VALID) {
+ ether_addr_copy(hdr->addr4, desc->u.qcn9274_compact.mpdu_start.addr4);
+ }
+ hdr->seq_ctrl = desc->u.qcn9274_compact.mpdu_start.seq_ctrl;
+}
+
+static void
+ath12k_hw_qcn9274_compact_rx_desc_get_crypto_hdr(struct hal_rx_desc *desc,
+ u8 *crypto_hdr,
+ enum hal_encrypt_type enctype)
+{
+ unsigned int key_id;
+
+ switch (enctype) {
+ case HAL_ENCRYPT_TYPE_OPEN:
+ return;
+ case HAL_ENCRYPT_TYPE_TKIP_NO_MIC:
+ case HAL_ENCRYPT_TYPE_TKIP_MIC:
+ crypto_hdr[0] =
+ HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.qcn9274_compact.mpdu_start.pn[0]);
+ crypto_hdr[1] = 0;
+ crypto_hdr[2] =
+ HAL_RX_MPDU_INFO_PN_GET_BYTE1(desc->u.qcn9274_compact.mpdu_start.pn[0]);
+ break;
+ case HAL_ENCRYPT_TYPE_CCMP_128:
+ case HAL_ENCRYPT_TYPE_CCMP_256:
+ case HAL_ENCRYPT_TYPE_GCMP_128:
+ case HAL_ENCRYPT_TYPE_AES_GCMP_256:
+ crypto_hdr[0] =
+ HAL_RX_MPDU_INFO_PN_GET_BYTE1(desc->u.qcn9274_compact.mpdu_start.pn[0]);
+ crypto_hdr[1] =
+ HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.qcn9274_compact.mpdu_start.pn[0]);
+ crypto_hdr[2] = 0;
+ break;
+ case HAL_ENCRYPT_TYPE_WEP_40:
+ case HAL_ENCRYPT_TYPE_WEP_104:
+ case HAL_ENCRYPT_TYPE_WEP_128:
+ case HAL_ENCRYPT_TYPE_WAPI_GCM_SM4:
+ case HAL_ENCRYPT_TYPE_WAPI:
+ return;
+ }
+ key_id = le32_get_bits(desc->u.qcn9274_compact.mpdu_start.info5,
+ RX_MPDU_START_INFO5_KEY_ID);
+ crypto_hdr[3] = 0x20 | (key_id << 6);
+ crypto_hdr[4] =
+ HAL_RX_MPDU_INFO_PN_GET_BYTE3(desc->u.qcn9274_compact.mpdu_start.pn[0]);
+ crypto_hdr[5] =
+ HAL_RX_MPDU_INFO_PN_GET_BYTE4(desc->u.qcn9274_compact.mpdu_start.pn[0]);
+ crypto_hdr[6] =
+ HAL_RX_MPDU_INFO_PN_GET_BYTE1(desc->u.qcn9274_compact.mpdu_start.pn[1]);
+ crypto_hdr[7] =
+ HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.qcn9274_compact.mpdu_start.pn[1]);
+}
+
+static u16 ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_frame_ctl(struct hal_rx_desc *desc)
+{
+ return __le16_to_cpu(desc->u.qcn9274_compact.mpdu_start.frame_ctrl);
+}
+
+static bool ath12k_hw_qcn9274_compact_dp_rx_h_msdu_done(struct hal_rx_desc *desc)
+{
+ return !!le32_get_bits(desc->u.qcn9274_compact.msdu_end.info14,
+ RX_MSDU_END_INFO14_MSDU_DONE);
+}
+
+static bool ath12k_hw_qcn9274_compact_dp_rx_h_l4_cksum_fail(struct hal_rx_desc *desc)
+{
+ return !!le32_get_bits(desc->u.qcn9274_compact.msdu_end.info13,
+ RX_MSDU_END_INFO13_TCP_UDP_CKSUM_FAIL);
+}
+
+static bool ath12k_hw_qcn9274_compact_dp_rx_h_ip_cksum_fail(struct hal_rx_desc *desc)
+{
+ return !!le32_get_bits(desc->u.qcn9274_compact.msdu_end.info13,
+ RX_MSDU_END_INFO13_IP_CKSUM_FAIL);
+}
+
+static bool ath12k_hw_qcn9274_compact_dp_rx_h_is_decrypted(struct hal_rx_desc *desc)
+{
+ return (le32_get_bits(desc->u.qcn9274_compact.msdu_end.info14,
+ RX_MSDU_END_INFO14_DECRYPT_STATUS_CODE) ==
+ RX_DESC_DECRYPT_STATUS_CODE_OK);
+}
+
+static u32 ath12k_hw_qcn9274_compact_dp_rx_h_mpdu_err(struct hal_rx_desc *desc)
+{
+ u32 info = __le32_to_cpu(desc->u.qcn9274_compact.msdu_end.info13);
+ u32 errmap = 0;
+
+ if (info & RX_MSDU_END_INFO13_FCS_ERR)
+ errmap |= HAL_RX_MPDU_ERR_FCS;
+
+ if (info & RX_MSDU_END_INFO13_DECRYPT_ERR)
+ errmap |= HAL_RX_MPDU_ERR_DECRYPT;
+
+ if (info & RX_MSDU_END_INFO13_TKIP_MIC_ERR)
+ errmap |= HAL_RX_MPDU_ERR_TKIP_MIC;
+
+ if (info & RX_MSDU_END_INFO13_A_MSDU_ERROR)
+ errmap |= HAL_RX_MPDU_ERR_AMSDU_ERR;
+
+ if (info & RX_MSDU_END_INFO13_OVERFLOW_ERR)
+ errmap |= HAL_RX_MPDU_ERR_OVERFLOW;
+
+ if (info & RX_MSDU_END_INFO13_MSDU_LEN_ERR)
+ errmap |= HAL_RX_MPDU_ERR_MSDU_LEN;
+
+ if (info & RX_MSDU_END_INFO13_MPDU_LEN_ERR)
+ errmap |= HAL_RX_MPDU_ERR_MPDU_LEN;
+
+ return errmap;
+}
+
+static u32 ath12k_hw_qcn9274_compact_get_rx_desc_size(void)
+{
+ return sizeof(struct hal_rx_desc_qcn9274_compact);
+}
+
+static u8 ath12k_hw_qcn9274_compact_rx_desc_get_msdu_src_link(struct hal_rx_desc *desc)
+{
+ return le64_get_bits(desc->u.qcn9274_compact.msdu_end.msdu_end_tag,
+ RX_MSDU_END_64_TLV_SRC_LINK_ID);
+}
+
+const struct hal_rx_ops hal_rx_qcn9274_compact_ops = {
+ .rx_desc_get_first_msdu = ath12k_hw_qcn9274_compact_rx_desc_get_first_msdu,
+ .rx_desc_get_last_msdu = ath12k_hw_qcn9274_compact_rx_desc_get_last_msdu,
+ .rx_desc_get_l3_pad_bytes = ath12k_hw_qcn9274_compact_rx_desc_get_l3_pad_bytes,
+ .rx_desc_encrypt_valid = ath12k_hw_qcn9274_compact_rx_desc_encrypt_valid,
+ .rx_desc_get_encrypt_type = ath12k_hw_qcn9274_compact_rx_desc_get_encrypt_type,
+ .rx_desc_get_decap_type = ath12k_hw_qcn9274_compact_rx_desc_get_decap_type,
+ .rx_desc_get_mesh_ctl = ath12k_hw_qcn9274_compact_rx_desc_get_mesh_ctl,
+ .rx_desc_get_mpdu_seq_ctl_vld =
+ ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_seq_ctl_vld,
+ .rx_desc_get_mpdu_fc_valid = ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_fc_valid,
+ .rx_desc_get_mpdu_start_seq_no =
+ ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_start_seq_no,
+ .rx_desc_get_msdu_len = ath12k_hw_qcn9274_compact_rx_desc_get_msdu_len,
+ .rx_desc_get_msdu_sgi = ath12k_hw_qcn9274_compact_rx_desc_get_msdu_sgi,
+ .rx_desc_get_msdu_rate_mcs = ath12k_hw_qcn9274_compact_rx_desc_get_msdu_rate_mcs,
+ .rx_desc_get_msdu_rx_bw = ath12k_hw_qcn9274_compact_rx_desc_get_msdu_rx_bw,
+ .rx_desc_get_msdu_freq = ath12k_hw_qcn9274_compact_rx_desc_get_msdu_freq,
+ .rx_desc_get_msdu_pkt_type = ath12k_hw_qcn9274_compact_rx_desc_get_msdu_pkt_type,
+ .rx_desc_get_msdu_nss = ath12k_hw_qcn9274_compact_rx_desc_get_msdu_nss,
+ .rx_desc_get_mpdu_tid = ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_tid,
+ .rx_desc_get_mpdu_peer_id = ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_peer_id,
+ .rx_desc_copy_end_tlv = ath12k_hw_qcn9274_compact_rx_desc_copy_end_tlv,
+ .rx_desc_get_mpdu_ppdu_id = ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_ppdu_id,
+ .rx_desc_set_msdu_len = ath12k_hw_qcn9274_compact_rx_desc_set_msdu_len,
+ .rx_desc_get_msdu_payload = ath12k_hw_qcn9274_compact_rx_desc_get_msdu_payload,
+ .rx_desc_get_mpdu_start_offset =
+ ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_start_offset,
+ .rx_desc_get_msdu_end_offset =
+ ath12k_hw_qcn9274_compact_rx_desc_get_msdu_end_offset,
+ .rx_desc_mac_addr2_valid = ath12k_hw_qcn9274_compact_rx_desc_mac_addr2_valid,
+ .rx_desc_mpdu_start_addr2 = ath12k_hw_qcn9274_compact_rx_desc_mpdu_start_addr2,
+ .rx_desc_is_da_mcbc = ath12k_hw_qcn9274_compact_rx_desc_is_da_mcbc,
+ .rx_desc_get_dot11_hdr = ath12k_hw_qcn9274_compact_rx_desc_get_dot11_hdr,
+ .rx_desc_get_crypto_header = ath12k_hw_qcn9274_compact_rx_desc_get_crypto_hdr,
+ .rx_desc_get_mpdu_frame_ctl =
+ ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_frame_ctl,
+ .dp_rx_h_msdu_done = ath12k_hw_qcn9274_compact_dp_rx_h_msdu_done,
+ .dp_rx_h_l4_cksum_fail = ath12k_hw_qcn9274_compact_dp_rx_h_l4_cksum_fail,
+ .dp_rx_h_ip_cksum_fail = ath12k_hw_qcn9274_compact_dp_rx_h_ip_cksum_fail,
+ .dp_rx_h_is_decrypted = ath12k_hw_qcn9274_compact_dp_rx_h_is_decrypted,
+ .dp_rx_h_mpdu_err = ath12k_hw_qcn9274_compact_dp_rx_h_mpdu_err,
+ .rx_desc_get_desc_size = ath12k_hw_qcn9274_compact_get_rx_desc_size,
+ .rx_desc_get_msdu_src_link_id =
+ ath12k_hw_qcn9274_compact_rx_desc_get_msdu_src_link,
+};
+
+const struct hal_ops hal_qcn9274_ops = {
+ .create_srng_config = ath12k_hal_srng_create_config_qcn9274,
+ .tcl_to_wbm_rbm_map = ath12k_hal_qcn9274_tcl_to_wbm_rbm_map,
+ .rxdma_ring_wmask_rx_mpdu_start = ath12k_hal_qcn9274_rx_mpdu_start_wmask_get,
+ .rxdma_ring_wmask_rx_msdu_end = ath12k_hal_qcn9274_rx_msdu_end_wmask_get,
+ .get_hal_rx_compact_ops = ath12k_hal_qcn9274_get_hal_rx_compact_ops,
};
static bool ath12k_hw_wcn7850_rx_desc_get_first_msdu(struct hal_rx_desc *desc)
@@ -1134,7 +1513,17 @@ static u32 ath12k_hw_wcn7850_dp_rx_h_mpdu_err(struct hal_rx_desc *desc)
return errmap;
}
-const struct hal_ops hal_wcn7850_ops = {
+static u32 ath12k_hw_wcn7850_get_rx_desc_size(void)
+{
+ return sizeof(struct hal_rx_desc_wcn7850);
+}
+
+static u8 ath12k_hw_wcn7850_rx_desc_get_msdu_src_link(struct hal_rx_desc *desc)
+{
+ return 0;
+}
+
+const struct hal_rx_ops hal_rx_wcn7850_ops = {
.rx_desc_get_first_msdu = ath12k_hw_wcn7850_rx_desc_get_first_msdu,
.rx_desc_get_last_msdu = ath12k_hw_wcn7850_rx_desc_get_last_msdu,
.rx_desc_get_l3_pad_bytes = ath12k_hw_wcn7850_rx_desc_get_l3_pad_bytes,
@@ -1167,13 +1556,21 @@ const struct hal_ops hal_wcn7850_ops = {
.rx_desc_get_dot11_hdr = ath12k_hw_wcn7850_rx_desc_get_dot11_hdr,
.rx_desc_get_crypto_header = ath12k_hw_wcn7850_rx_desc_get_crypto_hdr,
.rx_desc_get_mpdu_frame_ctl = ath12k_hw_wcn7850_rx_desc_get_mpdu_frame_ctl,
- .create_srng_config = ath12k_hal_srng_create_config_wcn7850,
- .tcl_to_wbm_rbm_map = ath12k_hal_wcn7850_tcl_to_wbm_rbm_map,
.dp_rx_h_msdu_done = ath12k_hw_wcn7850_dp_rx_h_msdu_done,
.dp_rx_h_l4_cksum_fail = ath12k_hw_wcn7850_dp_rx_h_l4_cksum_fail,
.dp_rx_h_ip_cksum_fail = ath12k_hw_wcn7850_dp_rx_h_ip_cksum_fail,
.dp_rx_h_is_decrypted = ath12k_hw_wcn7850_dp_rx_h_is_decrypted,
.dp_rx_h_mpdu_err = ath12k_hw_wcn7850_dp_rx_h_mpdu_err,
+ .rx_desc_get_desc_size = ath12k_hw_wcn7850_get_rx_desc_size,
+ .rx_desc_get_msdu_src_link_id = ath12k_hw_wcn7850_rx_desc_get_msdu_src_link,
+};
+
+const struct hal_ops hal_wcn7850_ops = {
+ .create_srng_config = ath12k_hal_srng_create_config_wcn7850,
+ .tcl_to_wbm_rbm_map = ath12k_hal_wcn7850_tcl_to_wbm_rbm_map,
+ .rxdma_ring_wmask_rx_mpdu_start = NULL,
+ .rxdma_ring_wmask_rx_msdu_end = NULL,
+ .get_hal_rx_compact_ops = NULL,
};
static int ath12k_hal_alloc_cont_rdp(struct ath12k_base *ab)
diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h
index fc47e7e6b498..107927d64bbb 100644
--- a/drivers/net/wireless/ath/ath12k/hal.h
+++ b/drivers/net/wireless/ath/ath12k/hal.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH12K_HAL_H
@@ -1023,6 +1023,8 @@ struct ath12k_hal {
/* shadow register configuration */
u32 shadow_reg_addr[HAL_SHADOW_NUM_REGS];
int num_shadow_reg_configured;
+
+ u32 hal_desc_sz;
};
/* Maps WBM ring number and Return Buffer Manager Id per TCL ring */
@@ -1031,7 +1033,7 @@ struct ath12k_hal_tcl_to_wbm_rbm_map {
u8 rbm_id;
};
-struct hal_ops {
+struct hal_rx_ops {
bool (*rx_desc_get_first_msdu)(struct hal_rx_desc *desc);
bool (*rx_desc_get_last_msdu)(struct hal_rx_desc *desc);
u8 (*rx_desc_get_l3_pad_bytes)(struct hal_rx_desc *desc);
@@ -1070,18 +1072,30 @@ struct hal_ops {
void (*rx_desc_get_crypto_header)(struct hal_rx_desc *desc,
u8 *crypto_hdr,
enum hal_encrypt_type enctype);
- int (*create_srng_config)(struct ath12k_base *ab);
bool (*dp_rx_h_msdu_done)(struct hal_rx_desc *desc);
bool (*dp_rx_h_l4_cksum_fail)(struct hal_rx_desc *desc);
bool (*dp_rx_h_ip_cksum_fail)(struct hal_rx_desc *desc);
bool (*dp_rx_h_is_decrypted)(struct hal_rx_desc *desc);
u32 (*dp_rx_h_mpdu_err)(struct hal_rx_desc *desc);
+ u32 (*rx_desc_get_desc_size)(void);
+ u8 (*rx_desc_get_msdu_src_link_id)(struct hal_rx_desc *desc);
+};
+
+struct hal_ops {
+ int (*create_srng_config)(struct ath12k_base *ab);
+ u16 (*rxdma_ring_wmask_rx_mpdu_start)(void);
+ u32 (*rxdma_ring_wmask_rx_msdu_end)(void);
+ const struct hal_rx_ops *(*get_hal_rx_compact_ops)(void);
const struct ath12k_hal_tcl_to_wbm_rbm_map *tcl_to_wbm_rbm_map;
};
extern const struct hal_ops hal_qcn9274_ops;
extern const struct hal_ops hal_wcn7850_ops;
+extern const struct hal_rx_ops hal_rx_qcn9274_ops;
+extern const struct hal_rx_ops hal_rx_qcn9274_compact_ops;
+extern const struct hal_rx_ops hal_rx_wcn7850_ops;
+
u32 ath12k_hal_reo_qdesc_size(u32 ba_window_size, u8 tid);
void ath12k_hal_reo_qdesc_setup(struct hal_rx_reo_queue *qdesc,
int tid, u32 ba_window_size,
diff --git a/drivers/net/wireless/ath/ath12k/hal_desc.h b/drivers/net/wireless/ath/ath12k/hal_desc.h
index 6c17adc6d60b..63340256d3f6 100644
--- a/drivers/net/wireless/ath/ath12k/hal_desc.h
+++ b/drivers/net/wireless/ath/ath12k/hal_desc.h
@@ -2500,13 +2500,13 @@ struct hal_rx_reo_queue {
#define HAL_REO_UPD_RX_QUEUE_INFO1_PN_HANDLE_ENABLE BIT(30)
#define HAL_REO_UPD_RX_QUEUE_INFO1_IGNORE_AMPDU_FLG BIT(31)
-#define HAL_REO_UPD_RX_QUEUE_INFO2_BA_WINDOW_SIZE GENMASK(7, 0)
-#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_SIZE GENMASK(9, 8)
-#define HAL_REO_UPD_RX_QUEUE_INFO2_SVLD BIT(10)
-#define HAL_REO_UPD_RX_QUEUE_INFO2_SSN GENMASK(22, 11)
-#define HAL_REO_UPD_RX_QUEUE_INFO2_SEQ_2K_ERR BIT(23)
-#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_ERR BIT(24)
-#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_VALID BIT(25)
+#define HAL_REO_UPD_RX_QUEUE_INFO2_BA_WINDOW_SIZE GENMASK(9, 0)
+#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_SIZE GENMASK(11, 10)
+#define HAL_REO_UPD_RX_QUEUE_INFO2_SVLD BIT(12)
+#define HAL_REO_UPD_RX_QUEUE_INFO2_SSN GENMASK(24, 13)
+#define HAL_REO_UPD_RX_QUEUE_INFO2_SEQ_2K_ERR BIT(25)
+#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_ERR BIT(26)
+#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_VALID BIT(27)
struct hal_reo_update_rx_queue {
struct hal_reo_cmd_hdr cmd;
@@ -2517,6 +2517,12 @@ struct hal_reo_update_rx_queue {
__le32 pn[4];
} __packed;
+struct hal_rx_reo_queue_1k {
+ struct hal_desc_header desc_hdr;
+ __le32 rx_bitmap_1023_288[23];
+ __le32 reserved[8];
+} __packed;
+
#define HAL_REO_UNBLOCK_CACHE_INFO0_UNBLK_CACHE BIT(0)
#define HAL_REO_UNBLOCK_CACHE_INFO0_RESOURCE_IDX GENMASK(2, 1)
diff --git a/drivers/net/wireless/ath/ath12k/hal_rx.c b/drivers/net/wireless/ath/ath12k/hal_rx.c
index 4f25eb9f7745..f7c1aaa3b5d4 100644
--- a/drivers/net/wireless/ath/ath12k/hal_rx.c
+++ b/drivers/net/wireless/ath/ath12k/hal_rx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "debug.h"
@@ -247,7 +247,7 @@ int ath12k_hal_reo_cmd_send(struct ath12k_base *ab, struct hal_srng *srng,
case HAL_REO_CMD_UNBLOCK_CACHE:
case HAL_REO_CMD_FLUSH_TIMEOUT_LIST:
ath12k_warn(ab, "Unsupported reo command %d\n", type);
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
break;
default:
ath12k_warn(ab, "Unknown reo command %d\n", type);
@@ -688,23 +688,28 @@ void ath12k_hal_reo_update_rx_reo_queue_status(struct ath12k_base *ab,
u32 ath12k_hal_reo_qdesc_size(u32 ba_window_size, u8 tid)
{
- u32 num_ext_desc;
+ u32 num_ext_desc, num_1k_desc = 0;
if (ba_window_size <= 1) {
if (tid != HAL_DESC_REO_NON_QOS_TID)
num_ext_desc = 1;
else
num_ext_desc = 0;
+
} else if (ba_window_size <= 105) {
num_ext_desc = 1;
} else if (ba_window_size <= 210) {
num_ext_desc = 2;
- } else {
+ } else if (ba_window_size <= 256) {
num_ext_desc = 3;
+ } else {
+ num_ext_desc = 10;
+ num_1k_desc = 1;
}
return sizeof(struct hal_rx_reo_queue) +
- (num_ext_desc * sizeof(struct hal_rx_reo_queue_ext));
+ (num_ext_desc * sizeof(struct hal_rx_reo_queue_ext)) +
+ (num_1k_desc * sizeof(struct hal_rx_reo_queue_1k));
}
void ath12k_hal_reo_qdesc_setup(struct hal_rx_reo_queue *qdesc,
diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c
index de60d988d860..0b17dfd47856 100644
--- a/drivers/net/wireless/ath/ath12k/hw.c
+++ b/drivers/net/wireless/ath/ath12k/hw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/types.h>
@@ -897,7 +897,6 @@ static const struct ath12k_hw_params ath12k_hw_params[] = {
.reoq_lut_support = false,
.supports_shadow_regs = false,
- .hal_desc_sz = sizeof(struct hal_rx_desc_qcn9274),
.num_tcl_banks = 48,
.max_tx_ring = 4,
@@ -914,6 +913,13 @@ static const struct ath12k_hw_params ath12k_hw_params[] = {
.rfkill_on_level = 0,
.rddm_size = 0,
+
+ .def_num_link = 0,
+ .max_mlo_peer = 256,
+
+ .otp_board_id_register = QCN9274_QFPROM_RAW_RFA_PDET_ROW13_LSB,
+
+ .supports_sta_ps = false,
},
{
.name = "wcn7850 hw2.0",
@@ -950,7 +956,10 @@ static const struct ath12k_hw_params ath12k_hw_params[] = {
.vdev_start_delay = true,
.interface_modes = BIT(NL80211_IFTYPE_STATION) |
- BIT(NL80211_IFTYPE_AP),
+ BIT(NL80211_IFTYPE_AP) |
+ BIT(NL80211_IFTYPE_P2P_DEVICE) |
+ BIT(NL80211_IFTYPE_P2P_CLIENT) |
+ BIT(NL80211_IFTYPE_P2P_GO),
.supports_monitor = false,
.idle_ps = true,
@@ -960,7 +969,6 @@ static const struct ath12k_hw_params ath12k_hw_params[] = {
.reoq_lut_support = false,
.supports_shadow_regs = true,
- .hal_desc_sz = sizeof(struct hal_rx_desc_wcn7850),
.num_tcl_banks = 7,
.max_tx_ring = 3,
@@ -978,6 +986,13 @@ static const struct ath12k_hw_params ath12k_hw_params[] = {
.rfkill_on_level = 1,
.rddm_size = 0x780000,
+
+ .def_num_link = 2,
+ .max_mlo_peer = 32,
+
+ .otp_board_id_register = 0,
+
+ .supports_sta_ps = true,
},
{
.name = "qcn9274 hw2.0",
@@ -987,7 +1002,7 @@ static const struct ath12k_hw_params ath12k_hw_params[] = {
.board_size = 256 * 1024,
.cal_offset = 128 * 1024,
},
- .max_radios = 1,
+ .max_radios = 2,
.single_pdev_only = false,
.qmi_service_ins_id = ATH12K_QMI_WLFW_SERVICE_INS_ID_V01_QCN9274,
.internal_sleep_clock = false,
@@ -1023,7 +1038,6 @@ static const struct ath12k_hw_params ath12k_hw_params[] = {
.reoq_lut_support = false,
.supports_shadow_regs = false,
- .hal_desc_sz = sizeof(struct hal_rx_desc_qcn9274),
.num_tcl_banks = 48,
.max_tx_ring = 4,
@@ -1040,6 +1054,13 @@ static const struct ath12k_hw_params ath12k_hw_params[] = {
.rfkill_on_level = 0,
.rddm_size = 0,
+
+ .def_num_link = 0,
+ .max_mlo_peer = 256,
+
+ .otp_board_id_register = QCN9274_QFPROM_RAW_RFA_PDET_ROW13_LSB,
+
+ .supports_sta_ps = false,
},
};
diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h
index d2622bfef942..87965980b938 100644
--- a/drivers/net/wireless/ath/ath12k/hw.h
+++ b/drivers/net/wireless/ath/ath12k/hw.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH12K_HW_H
@@ -17,19 +17,30 @@
/* Num VDEVS per radio */
#define TARGET_NUM_VDEVS (16 + 1)
-#define TARGET_NUM_PEERS_PDEV (512 + TARGET_NUM_VDEVS)
+#define TARGET_NUM_PEERS_PDEV_SINGLE (TARGET_NUM_STATIONS_SINGLE + \
+ TARGET_NUM_VDEVS)
+#define TARGET_NUM_PEERS_PDEV_DBS (TARGET_NUM_STATIONS_DBS + \
+ TARGET_NUM_VDEVS)
+#define TARGET_NUM_PEERS_PDEV_DBS_SBS (TARGET_NUM_STATIONS_DBS_SBS + \
+ TARGET_NUM_VDEVS)
/* Num of peers for Single Radio mode */
-#define TARGET_NUM_PEERS_SINGLE (TARGET_NUM_PEERS_PDEV)
+#define TARGET_NUM_PEERS_SINGLE (TARGET_NUM_PEERS_PDEV_SINGLE)
/* Num of peers for DBS */
-#define TARGET_NUM_PEERS_DBS (2 * TARGET_NUM_PEERS_PDEV)
+#define TARGET_NUM_PEERS_DBS (2 * TARGET_NUM_PEERS_PDEV_DBS)
/* Num of peers for DBS_SBS */
-#define TARGET_NUM_PEERS_DBS_SBS (3 * TARGET_NUM_PEERS_PDEV)
+#define TARGET_NUM_PEERS_DBS_SBS (3 * TARGET_NUM_PEERS_PDEV_DBS_SBS)
-/* Max num of stations (per radio) */
-#define TARGET_NUM_STATIONS 512
+/* Max num of stations for Single Radio mode */
+#define TARGET_NUM_STATIONS_SINGLE 512
+
+/* Max num of stations for DBS */
+#define TARGET_NUM_STATIONS_DBS 128
+
+/* Max num of stations for DBS_SBS */
+#define TARGET_NUM_STATIONS_DBS_SBS 128
#define TARGET_NUM_PEERS(x) TARGET_NUM_PEERS_##x
#define TARGET_NUM_PEER_KEYS 2
@@ -66,6 +77,8 @@
#define TARGET_NUM_WDS_ENTRIES 32
#define TARGET_DMA_BURST_SIZE 1
#define TARGET_RX_BATCHMODE 1
+#define TARGET_RX_PEER_METADATA_VER_V1A 2
+#define TARGET_RX_PEER_METADATA_VER_V1B 3
#define ATH12K_HW_MAX_QUEUES 4
#define ATH12K_QUEUE_LEN 4096
@@ -174,7 +187,6 @@ struct ath12k_hw_params {
bool reoq_lut_support:1;
bool supports_shadow_regs:1;
- u32 hal_desc_sz;
u32 num_tcl_banks;
u32 max_tx_ring;
@@ -192,6 +204,13 @@ struct ath12k_hw_params {
u32 rfkill_on_level;
u32 rddm_size;
+
+ u8 def_num_link;
+ u16 max_mlo_peer;
+
+ u32 otp_board_id_register;
+
+ bool supports_sta_ps;
};
struct ath12k_hw_ops {
@@ -242,10 +261,16 @@ enum ath12k_bd_ie_board_type {
ATH12K_BD_IE_BOARD_DATA = 1,
};
+enum ath12k_bd_ie_regdb_type {
+ ATH12K_BD_IE_REGDB_NAME = 0,
+ ATH12K_BD_IE_REGDB_DATA = 1,
+};
+
enum ath12k_bd_ie_type {
/* contains sub IEs of enum ath12k_bd_ie_board_type */
ATH12K_BD_IE_BOARD = 0,
- ATH12K_BD_IE_BOARD_EXT = 1,
+ /* contains sub IEs of enum ath12k_bd_ie_regdb_type */
+ ATH12K_BD_IE_REGDB = 1,
};
struct ath12k_hw_regs {
@@ -315,6 +340,18 @@ struct ath12k_hw_regs {
u32 hal_reo_status_ring_base;
};
+static inline const char *ath12k_bd_ie_type_str(enum ath12k_bd_ie_type type)
+{
+ switch (type) {
+ case ATH12K_BD_IE_BOARD:
+ return "board data";
+ case ATH12K_BD_IE_REGDB:
+ return "regdb data";
+ }
+
+ return "unknown";
+}
+
int ath12k_hw_init(struct ath12k_base *ab);
#endif
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c
index 88cec54c6c2e..52a5fb8b03e9 100644
--- a/drivers/net/wireless/ath/ath12k/mac.c
+++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <net/mac80211.h>
@@ -241,8 +241,8 @@ static const u32 ath12k_smps_map[] = {
[WLAN_HT_CAP_SM_PS_DISABLED] = WMI_PEER_SMPS_PS_NONE,
};
-static int ath12k_start_vdev_delay(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+static int ath12k_start_vdev_delay(struct ath12k *ar,
+ struct ath12k_vif *arvif);
static const char *ath12k_mac_phymode_str(enum wmi_phy_mode mode)
{
@@ -542,7 +542,7 @@ struct ath12k_vif *ath12k_mac_get_arvif(struct ath12k *ar, u32 vdev_id)
arvif_iter.vdev_id = vdev_id;
flags = IEEE80211_IFACE_ITER_RESUME_ALL;
- ieee80211_iterate_active_interfaces_atomic(ar->hw,
+ ieee80211_iterate_active_interfaces_atomic(ath12k_ar_to_hw(ar),
flags,
ath12k_get_arvif_iter,
&arvif_iter);
@@ -563,7 +563,8 @@ struct ath12k_vif *ath12k_mac_get_arvif_by_vdev_id(struct ath12k_base *ab,
for (i = 0; i < ab->num_radios; i++) {
pdev = rcu_dereference(ab->pdevs_active[i]);
- if (pdev && pdev->ar) {
+ if (pdev && pdev->ar &&
+ (pdev->ar->allocated_vdev_map & (1LL << vdev_id))) {
arvif = ath12k_mac_get_arvif(pdev->ar, vdev_id);
if (arvif)
return arvif;
@@ -1040,7 +1041,7 @@ static int ath12k_mac_monitor_start(struct ath12k *ar)
if (ar->monitor_started)
return 0;
- ieee80211_iter_chan_contexts_atomic(ar->hw,
+ ieee80211_iter_chan_contexts_atomic(ath12k_ar_to_hw(ar),
ath12k_mac_get_any_chandef_iter,
&chandef);
if (!chandef)
@@ -1083,9 +1084,49 @@ static int ath12k_mac_monitor_stop(struct ath12k *ar)
return ret;
}
-static int ath12k_mac_op_config(struct ieee80211_hw *hw, u32 changed)
+static int ath12k_mac_vdev_stop(struct ath12k_vif *arvif)
+{
+ struct ath12k *ar = arvif->ar;
+ int ret;
+
+ lockdep_assert_held(&ar->conf_mutex);
+
+ reinit_completion(&ar->vdev_setup_done);
+
+ ret = ath12k_wmi_vdev_stop(ar, arvif->vdev_id);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to stop WMI vdev %i: %d\n",
+ arvif->vdev_id, ret);
+ goto err;
+ }
+
+ ret = ath12k_mac_vdev_setup_sync(ar);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to synchronize setup for vdev %i: %d\n",
+ arvif->vdev_id, ret);
+ goto err;
+ }
+
+ WARN_ON(ar->num_started_vdevs == 0);
+
+ ar->num_started_vdevs--;
+ ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "vdev %pM stopped, vdev_id %d\n",
+ arvif->vif->addr, arvif->vdev_id);
+
+ if (test_bit(ATH12K_CAC_RUNNING, &ar->dev_flags)) {
+ clear_bit(ATH12K_CAC_RUNNING, &ar->dev_flags);
+ ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "CAC Stopped for vdev %d\n",
+ arvif->vdev_id);
+ }
+
+ return 0;
+err:
+ return ret;
+}
+
+static int ath12k_mac_config(struct ath12k *ar, u32 changed)
{
- struct ath12k *ar = hw->priv;
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
struct ieee80211_conf *conf = &hw->conf;
int ret = 0;
@@ -1122,11 +1163,84 @@ err_mon_del:
return ret;
}
+static int ath12k_mac_op_config(struct ieee80211_hw *hw, u32 changed)
+{
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ int ret;
+
+ ar = ath12k_ah_to_ar(ah);
+
+ ret = ath12k_mac_config(ar, changed);
+ if (ret)
+ ath12k_warn(ar->ab, "failed to update config pdev idx %d: %d\n",
+ ar->pdev_idx, ret);
+
+ return ret;
+}
+
+static int ath12k_mac_setup_bcn_p2p_ie(struct ath12k_vif *arvif,
+ struct sk_buff *bcn)
+{
+ struct ath12k *ar = arvif->ar;
+ struct ieee80211_mgmt *mgmt;
+ const u8 *p2p_ie;
+ int ret;
+
+ mgmt = (void *)bcn->data;
+ p2p_ie = cfg80211_find_vendor_ie(WLAN_OUI_WFA, WLAN_OUI_TYPE_WFA_P2P,
+ mgmt->u.beacon.variable,
+ bcn->len - (mgmt->u.beacon.variable -
+ bcn->data));
+ if (!p2p_ie) {
+ ath12k_warn(ar->ab, "no P2P ie found in beacon\n");
+ return -ENOENT;
+ }
+
+ ret = ath12k_wmi_p2p_go_bcn_ie(ar, arvif->vdev_id, p2p_ie);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to submit P2P GO bcn ie for vdev %i: %d\n",
+ arvif->vdev_id, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int ath12k_mac_remove_vendor_ie(struct sk_buff *skb, unsigned int oui,
+ u8 oui_type, size_t ie_offset)
+{
+ const u8 *next, *end;
+ size_t len;
+ u8 *ie;
+
+ if (WARN_ON(skb->len < ie_offset))
+ return -EINVAL;
+
+ ie = (u8 *)cfg80211_find_vendor_ie(oui, oui_type,
+ skb->data + ie_offset,
+ skb->len - ie_offset);
+ if (!ie)
+ return -ENOENT;
+
+ len = ie[1] + 2;
+ end = skb->data + skb->len;
+ next = ie + len;
+
+ if (WARN_ON(next > end))
+ return -EINVAL;
+
+ memmove(ie, next, end - next);
+ skb_trim(skb, skb->len - len);
+
+ return 0;
+}
+
static int ath12k_mac_setup_bcn_tmpl(struct ath12k_vif *arvif)
{
struct ath12k *ar = arvif->ar;
struct ath12k_base *ab = ar->ab;
- struct ieee80211_hw *hw = ar->hw;
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
struct ieee80211_vif *vif = arvif->vif;
struct ieee80211_mutable_offsets offs = {};
struct sk_buff *bcn;
@@ -1154,14 +1268,37 @@ static int ath12k_mac_setup_bcn_tmpl(struct ath12k_vif *arvif)
ies, (skb_tail_pointer(bcn) - ies)))
arvif->wpaie_present = true;
- ret = ath12k_wmi_bcn_tmpl(ar, arvif->vdev_id, &offs, bcn);
+ if (arvif->vif->type == NL80211_IFTYPE_AP && arvif->vif->p2p) {
+ ret = ath12k_mac_setup_bcn_p2p_ie(arvif, bcn);
+ if (ret) {
+ ath12k_warn(ab, "failed to setup P2P GO bcn ie: %d\n",
+ ret);
+ goto free_bcn_skb;
+ }
- kfree_skb(bcn);
+ /* P2P IE is inserted by firmware automatically (as
+ * configured above) so remove it from the base beacon
+ * template to avoid duplicate P2P IEs in beacon frames.
+ */
+ ret = ath12k_mac_remove_vendor_ie(bcn, WLAN_OUI_WFA,
+ WLAN_OUI_TYPE_WFA_P2P,
+ offsetof(struct ieee80211_mgmt,
+ u.beacon.variable));
+ if (ret) {
+ ath12k_warn(ab, "failed to remove P2P vendor ie: %d\n",
+ ret);
+ goto free_bcn_skb;
+ }
+ }
+
+ ret = ath12k_wmi_bcn_tmpl(ar, arvif->vdev_id, &offs, bcn);
if (ret)
ath12k_warn(ab, "failed to submit beacon template command: %d\n",
ret);
+free_bcn_skb:
+ kfree_skb(bcn);
return ret;
}
@@ -1214,6 +1351,7 @@ static void ath12k_peer_assoc_h_basic(struct ath12k *ar,
struct ath12k_wmi_peer_assoc_arg *arg)
{
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
u32 aid;
lockdep_assert_held(&ar->conf_mutex);
@@ -1228,7 +1366,7 @@ static void ath12k_peer_assoc_h_basic(struct ath12k *ar,
arg->peer_associd = aid;
arg->auth_flag = true;
/* TODO: STA WAR in ath10k for listen interval required? */
- arg->peer_listen_intval = ar->hw->conf.listen_interval;
+ arg->peer_listen_intval = hw->conf.listen_interval;
arg->peer_nss = 1;
arg->peer_caps = vif->bss_conf.assoc_capability;
}
@@ -1242,6 +1380,7 @@ static void ath12k_peer_assoc_h_crypto(struct ath12k *ar,
struct cfg80211_chan_def def;
struct cfg80211_bss *bss;
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
const u8 *rsnie = NULL;
const u8 *wpaie = NULL;
@@ -1250,7 +1389,7 @@ static void ath12k_peer_assoc_h_crypto(struct ath12k *ar,
if (WARN_ON(ath12k_mac_vif_chan(vif, &def)))
return;
- bss = cfg80211_get_bss(ar->hw->wiphy, def.chan, info->bssid, NULL, 0,
+ bss = cfg80211_get_bss(hw->wiphy, def.chan, info->bssid, NULL, 0,
IEEE80211_BSS_TYPE_ANY, IEEE80211_PRIVACY_ANY);
if (arvif->rsnie_present || arvif->wpaie_present) {
@@ -1270,7 +1409,7 @@ static void ath12k_peer_assoc_h_crypto(struct ath12k *ar,
ies->data,
ies->len);
rcu_read_unlock();
- cfg80211_put_bss(ar->hw->wiphy, bss);
+ cfg80211_put_bss(hw->wiphy, bss);
}
/* FIXME: base on RSN IE/WPA IE is a correct idea? */
@@ -1304,6 +1443,7 @@ static void ath12k_peer_assoc_h_rates(struct ath12k *ar,
struct cfg80211_chan_def def;
const struct ieee80211_supported_band *sband;
const struct ieee80211_rate *rates;
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
enum nl80211_band band;
u32 ratemask;
u8 rate;
@@ -1315,7 +1455,7 @@ static void ath12k_peer_assoc_h_rates(struct ath12k *ar,
return;
band = def.chan->band;
- sband = ar->hw->wiphy->bands[band];
+ sband = hw->wiphy->bands[band];
ratemask = sta->deflink.supp_rates[band];
ratemask &= arvif->bitrate_mask.control[band].legacy;
rates = sband->bitrates;
@@ -2266,12 +2406,11 @@ static int ath12k_setup_peer_smps(struct ath12k *ar, struct ath12k_vif *arvif,
ath12k_smps_map[smps]);
}
-static void ath12k_bss_assoc(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif,
+static void ath12k_bss_assoc(struct ath12k *ar,
+ struct ath12k_vif *arvif,
struct ieee80211_bss_conf *bss_conf)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ struct ieee80211_vif *vif = arvif->vif;
struct ath12k_wmi_peer_assoc_arg peer_arg;
struct ieee80211_sta *ap_sta;
struct ath12k_peer *peer;
@@ -2361,11 +2500,9 @@ static void ath12k_bss_assoc(struct ieee80211_hw *hw,
arvif->vdev_id, ret);
}
-static void ath12k_bss_disassoc(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif)
+static void ath12k_bss_disassoc(struct ath12k *ar,
+ struct ath12k_vif *arvif)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
int ret;
lockdep_assert_held(&ar->conf_mutex);
@@ -2413,6 +2550,7 @@ static void ath12k_recalculate_mgmt_rate(struct ath12k *ar,
struct cfg80211_chan_def *def)
{
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
const struct ieee80211_supported_band *sband;
u8 basic_rate_idx;
int hw_rate_code;
@@ -2422,7 +2560,7 @@ static void ath12k_recalculate_mgmt_rate(struct ath12k *ar,
lockdep_assert_held(&ar->conf_mutex);
- sband = ar->hw->wiphy->bands[def->chan->band];
+ sband = hw->wiphy->bands[def->chan->band];
basic_rate_idx = ffs(vif->bss_conf.basic_rates) - 1;
bitrate = sband->bitrates[basic_rate_idx].bitrate;
@@ -2449,6 +2587,7 @@ static int ath12k_mac_fils_discovery(struct ath12k_vif *arvif,
struct ieee80211_bss_conf *info)
{
struct ath12k *ar = arvif->ar;
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
struct sk_buff *tmpl;
int ret;
u32 interval;
@@ -2457,7 +2596,7 @@ static int ath12k_mac_fils_discovery(struct ath12k_vif *arvif,
if (info->fils_discovery.max_interval) {
interval = info->fils_discovery.max_interval;
- tmpl = ieee80211_get_fils_discovery_tmpl(ar->hw, arvif->vif);
+ tmpl = ieee80211_get_fils_discovery_tmpl(hw, arvif->vif);
if (tmpl)
ret = ath12k_wmi_fils_discovery_tmpl(ar, arvif->vdev_id,
tmpl);
@@ -2465,7 +2604,7 @@ static int ath12k_mac_fils_discovery(struct ath12k_vif *arvif,
unsol_bcast_probe_resp_enabled = 1;
interval = info->unsol_bcast_probe_resp_interval;
- tmpl = ieee80211_get_unsol_bcast_probe_resp_tmpl(ar->hw,
+ tmpl = ieee80211_get_unsol_bcast_probe_resp_tmpl(hw,
arvif->vif);
if (tmpl)
ret = ath12k_wmi_probe_resp_tmpl(ar, arvif->vdev_id,
@@ -2491,13 +2630,60 @@ static int ath12k_mac_fils_discovery(struct ath12k_vif *arvif,
return ret;
}
-static void ath12k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif,
- struct ieee80211_bss_conf *info,
- u64 changed)
+static void ath12k_mac_vif_setup_ps(struct ath12k_vif *arvif)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ struct ath12k *ar = arvif->ar;
+ struct ieee80211_vif *vif = arvif->vif;
+ struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf;
+ enum wmi_sta_powersave_param param;
+ enum wmi_sta_ps_mode psmode;
+ int ret;
+ int timeout;
+ bool enable_ps;
+
+ lockdep_assert_held(&ar->conf_mutex);
+
+ if (vif->type != NL80211_IFTYPE_STATION)
+ return;
+
+ enable_ps = arvif->ps;
+ if (enable_ps) {
+ psmode = WMI_STA_PS_MODE_ENABLED;
+ param = WMI_STA_PS_PARAM_INACTIVITY_TIME;
+
+ timeout = conf->dynamic_ps_timeout;
+ if (timeout == 0) {
+ /* firmware doesn't like 0 */
+ timeout = ieee80211_tu_to_usec(vif->bss_conf.beacon_int) / 1000;
+ }
+
+ ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param,
+ timeout);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n",
+ arvif->vdev_id, ret);
+ return;
+ }
+ } else {
+ psmode = WMI_STA_PS_MODE_DISABLED;
+ }
+
+ ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n",
+ arvif->vdev_id, psmode ? "enable" : "disable");
+
+ ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode);
+ if (ret)
+ ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n",
+ psmode, arvif->vdev_id, ret);
+}
+
+static void ath12k_mac_bss_info_changed(struct ath12k *ar,
+ struct ath12k_vif *arvif,
+ struct ieee80211_bss_conf *info,
+ u64 changed)
+{
+ struct ieee80211_vif *vif = arvif->vif;
+ struct ieee80211_vif_cfg *vif_cfg = &vif->cfg;
struct cfg80211_chan_def def;
u32 param_id, param_value;
enum nl80211_band band;
@@ -2510,7 +2696,7 @@ static void ath12k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
u8 rateidx;
u32 rate;
- mutex_lock(&ar->conf_mutex);
+ lockdep_assert_held(&ar->conf_mutex);
if (changed & BSS_CHANGED_BEACON_INT) {
arvif->beacon_interval = info->beacon_int;
@@ -2666,9 +2852,9 @@ static void ath12k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
if (changed & BSS_CHANGED_ASSOC) {
if (vif->cfg.assoc)
- ath12k_bss_assoc(hw, vif, info);
+ ath12k_bss_assoc(ar, arvif, info);
else
- ath12k_bss_disassoc(hw, vif);
+ ath12k_bss_disassoc(ar, arvif);
}
if (changed & BSS_CHANGED_TXPOWER) {
@@ -2768,14 +2954,35 @@ static void ath12k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
ath12k_mac_fils_discovery(arvif, info);
- if (changed & BSS_CHANGED_EHT_PUNCTURING)
- arvif->punct_bitmap = info->eht_puncturing;
+ if (changed & BSS_CHANGED_PS &&
+ ar->ab->hw_params->supports_sta_ps) {
+ arvif->ps = vif_cfg->ps;
+ ath12k_mac_vif_setup_ps(arvif);
+ }
+}
+
+static void ath12k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *info,
+ u64 changed)
+{
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+
+ ar = ath12k_ah_to_ar(ah);
+
+ mutex_lock(&ar->conf_mutex);
+
+ ath12k_mac_bss_info_changed(ar, arvif, info, changed);
mutex_unlock(&ar->conf_mutex);
}
void __ath12k_mac_scan_finish(struct ath12k *ar)
{
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
+
lockdep_assert_held(&ar->data_lock);
switch (ar->scan.state) {
@@ -2784,7 +2991,7 @@ void __ath12k_mac_scan_finish(struct ath12k *ar)
case ATH12K_SCAN_RUNNING:
case ATH12K_SCAN_ABORTING:
if (ar->scan.is_roc && ar->scan.roc_notify)
- ieee80211_remain_on_channel_expired(ar->hw);
+ ieee80211_remain_on_channel_expired(hw);
fallthrough;
case ATH12K_SCAN_STARTING:
if (!ar->scan.is_roc) {
@@ -2795,7 +3002,7 @@ void __ath12k_mac_scan_finish(struct ath12k *ar)
ATH12K_SCAN_STARTING)),
};
- ieee80211_scan_completed(ar->hw, &info);
+ ieee80211_scan_completed(hw, &info);
}
ar->scan.state = ATH12K_SCAN_IDLE;
@@ -2940,13 +3147,16 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_scan_request *hw_req)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
struct cfg80211_scan_request *req = &hw_req->req;
struct ath12k_wmi_scan_req_arg arg = {};
int ret;
int i;
+ ar = ath12k_ah_to_ar(ah);
+
mutex_lock(&ar->conf_mutex);
spin_lock_bh(&ar->data_lock);
@@ -2988,7 +3198,7 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw,
for (i = 0; i < arg.num_ssids; i++)
arg.ssid[i] = req->ssids[i];
} else {
- arg.scan_flags |= WMI_SCAN_FLAG_PASSIVE;
+ arg.scan_f_passive = 1;
}
if (req->n_channels) {
@@ -3014,7 +3224,7 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw,
}
/* Add a margin to account for event/command processing */
- ieee80211_queue_delayed_work(ar->hw, &ar->scan.timeout,
+ ieee80211_queue_delayed_work(ath12k_ar_to_hw(ar), &ar->scan.timeout,
msecs_to_jiffies(arg.max_scan_time +
ATH12K_MAC_SCAN_TIMEOUT_MSECS));
@@ -3025,13 +3235,17 @@ exit:
kfree(arg.extraie.ptr);
mutex_unlock(&ar->conf_mutex);
+
return ret;
}
static void ath12k_mac_op_cancel_hw_scan(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+
+ ar = ath12k_ah_to_ar(ah);
mutex_lock(&ar->conf_mutex);
ath12k_scan_abort(ar);
@@ -3159,8 +3373,9 @@ static int ath12k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
struct ieee80211_vif *vif, struct ieee80211_sta *sta,
struct ieee80211_key_conf *key)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_base *ab = ar->ab;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_base *ab;
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
struct ath12k_peer *peer;
struct ath12k_sta *arsta;
@@ -3175,6 +3390,9 @@ static int ath12k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
key->cipher == WLAN_CIPHER_SUITE_BIP_CMAC_256)
return 1;
+ ar = ath12k_ah_to_ar(ah);
+ ab = ar->ab;
+
if (test_bit(ATH12K_FLAG_HW_CRYPTO_DISABLED, &ar->ab->dev_flags))
return 1;
@@ -3696,7 +3914,7 @@ static int ath12k_mac_station_add(struct ath12k *ar,
if (ab->hw_params->vdev_start_delay &&
!arvif->is_started &&
arvif->vdev_type != WMI_VDEV_TYPE_AP) {
- ret = ath12k_start_vdev_delay(ar->hw, vif);
+ ret = ath12k_start_vdev_delay(ar, arvif);
if (ret) {
ath12k_warn(ab, "failed to delay vdev start: %d\n", ret);
goto free_peer;
@@ -3750,7 +3968,8 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw,
enum ieee80211_sta_state old_state,
enum ieee80211_sta_state new_state)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
struct ath12k_sta *arsta = ath12k_sta_to_arsta(sta);
struct ath12k_peer *peer;
@@ -3761,6 +3980,8 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw,
new_state == IEEE80211_STA_NOTEXIST))
cancel_work_sync(&arsta->update_wk);
+ ar = ath12k_ah_to_ar(ah);
+
mutex_lock(&ar->conf_mutex);
if (old_state == IEEE80211_STA_NOTEXIST &&
@@ -3775,6 +3996,13 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw,
sta->addr, arvif->vdev_id);
} else if ((old_state == IEEE80211_STA_NONE &&
new_state == IEEE80211_STA_NOTEXIST)) {
+ if (arvif->vdev_type == WMI_VDEV_TYPE_STA) {
+ ath12k_bss_disassoc(ar, arvif);
+ ret = ath12k_mac_vdev_stop(arvif);
+ if (ret)
+ ath12k_warn(ar->ab, "failed to stop vdev %i: %d\n",
+ arvif->vdev_id, ret);
+ }
ath12k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr);
ret = ath12k_peer_delete(ar, arvif->vdev_id, sta->addr);
@@ -3856,6 +4084,7 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw,
}
mutex_unlock(&ar->conf_mutex);
+
return ret;
}
@@ -3863,7 +4092,8 @@ static int ath12k_mac_op_sta_set_txpwr(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
int ret;
s16 txpwr;
@@ -3879,6 +4109,8 @@ static int ath12k_mac_op_sta_set_txpwr(struct ieee80211_hw *hw,
if (txpwr > ATH12K_TX_POWER_MAX_VAL || txpwr < ATH12K_TX_POWER_MIN_VAL)
return -EINVAL;
+ ar = ath12k_ah_to_ar(ah);
+
mutex_lock(&ar->conf_mutex);
ret = ath12k_wmi_set_peer_param(ar, sta->addr, arvif->vdev_id,
@@ -3899,12 +4131,15 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
u32 changed)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
struct ath12k_sta *arsta = ath12k_sta_to_arsta(sta);
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
struct ath12k_peer *peer;
u32 bw, smps;
+ ar = ath12k_ah_to_ar(ah);
+
spin_lock_bh(&ar->ab->base_lock);
peer = ath12k_peer_find(ar->ab, arvif->vdev_id, sta->addr);
@@ -3964,10 +4199,10 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw,
ieee80211_queue_work(hw, &arsta->update_wk);
}
-static int ath12k_conf_tx_uapsd(struct ath12k *ar, struct ieee80211_vif *vif,
+static int ath12k_conf_tx_uapsd(struct ath12k_vif *arvif,
u16 ac, bool enable)
{
- struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ struct ath12k *ar = arvif->ar;
u32 value;
int ret;
@@ -4021,17 +4256,16 @@ exit:
return ret;
}
-static int ath12k_mac_op_conf_tx(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif,
- unsigned int link_id, u16 ac,
- const struct ieee80211_tx_queue_params *params)
+static int ath12k_mac_conf_tx(struct ath12k_vif *arvif,
+ unsigned int link_id, u16 ac,
+ const struct ieee80211_tx_queue_params *params)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
struct wmi_wmm_params_arg *p = NULL;
+ struct ath12k *ar = arvif->ar;
+ struct ath12k_base *ab = ar->ab;
int ret;
- mutex_lock(&ar->conf_mutex);
+ lockdep_assert_held(&ar->conf_mutex);
switch (ac) {
case IEEE80211_AC_VO:
@@ -4061,17 +4295,36 @@ static int ath12k_mac_op_conf_tx(struct ieee80211_hw *hw,
ret = ath12k_wmi_send_wmm_update_cmd(ar, arvif->vdev_id,
&arvif->wmm_params);
if (ret) {
- ath12k_warn(ar->ab, "failed to set wmm params: %d\n", ret);
+ ath12k_warn(ab, "pdev idx %d failed to set wmm params: %d\n",
+ ar->pdev_idx, ret);
goto exit;
}
- ret = ath12k_conf_tx_uapsd(ar, vif, ac, params->uapsd);
-
+ ret = ath12k_conf_tx_uapsd(arvif, ac, params->uapsd);
if (ret)
- ath12k_warn(ar->ab, "failed to set sta uapsd: %d\n", ret);
+ ath12k_warn(ab, "pdev idx %d failed to set sta uapsd: %d\n",
+ ar->pdev_idx, ret);
exit:
+ return ret;
+}
+
+static int ath12k_mac_op_conf_tx(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ unsigned int link_id, u16 ac,
+ const struct ieee80211_tx_queue_params *params)
+{
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ int ret;
+
+ ar = ath12k_ah_to_ar(ah);
+
+ mutex_lock(&ar->conf_mutex);
+ ret = ath12k_mac_conf_tx(arvif, link_id, ac, params);
mutex_unlock(&ar->conf_mutex);
+
return ret;
}
@@ -4782,7 +5035,7 @@ static void ath12k_mgmt_over_wmi_tx_drop(struct ath12k *ar, struct sk_buff *skb)
{
int num_mgmt;
- ieee80211_free_txskb(ar->hw, skb);
+ ieee80211_free_txskb(ath12k_ar_to_hw(ar), skb);
num_mgmt = atomic_dec_if_positive(&ar->num_pending_mgmt_tx);
@@ -4914,8 +5167,8 @@ static void ath12k_mgmt_over_wmi_tx_work(struct work_struct *work)
}
arvif = ath12k_vif_to_arvif(skb_cb->vif);
- if (ar->allocated_vdev_map & (1LL << arvif->vdev_id) &&
- arvif->is_started) {
+
+ if (ar->allocated_vdev_map & (1LL << arvif->vdev_id)) {
ret = ath12k_mac_mgmt_tx_wmi(ar, arvif, skb);
if (ret) {
ath12k_warn(ar->ab, "failed to tx mgmt frame, vdev_id %d :%d\n",
@@ -4959,20 +5212,41 @@ static int ath12k_mac_mgmt_tx(struct ath12k *ar, struct sk_buff *skb,
skb_queue_tail(q, skb);
atomic_inc(&ar->num_pending_mgmt_tx);
- ieee80211_queue_work(ar->hw, &ar->wmi_mgmt_tx_work);
+ ieee80211_queue_work(ath12k_ar_to_hw(ar), &ar->wmi_mgmt_tx_work);
return 0;
}
+static void ath12k_mac_add_p2p_noa_ie(struct ath12k *ar,
+ struct ieee80211_vif *vif,
+ struct sk_buff *skb,
+ bool is_prb_rsp)
+{
+ struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+
+ if (likely(!is_prb_rsp))
+ return;
+
+ spin_lock_bh(&ar->data_lock);
+
+ if (arvif->u.ap.noa_data &&
+ !pskb_expand_head(skb, 0, arvif->u.ap.noa_len,
+ GFP_ATOMIC))
+ skb_put_data(skb, arvif->u.ap.noa_data,
+ arvif->u.ap.noa_len);
+
+ spin_unlock_bh(&ar->data_lock);
+}
+
static void ath12k_mac_op_tx(struct ieee80211_hw *hw,
struct ieee80211_tx_control *control,
struct sk_buff *skb)
{
struct ath12k_skb_cb *skb_cb = ATH12K_SKB_CB(skb);
- struct ath12k *ar = hw->priv;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_vif *vif = info->control.vif;
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ struct ath12k *ar = arvif->ar;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
struct ieee80211_key_conf *key = info->control.hw_key;
u32 info_flags = info->flags;
@@ -4987,10 +5261,11 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw,
skb_cb->flags |= ATH12K_SKB_CIPHER_SET;
}
+ is_prb_rsp = ieee80211_is_probe_resp(hdr->frame_control);
+
if (info_flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
skb_cb->flags |= ATH12K_SKB_HW_80211_ENCAP;
} else if (ieee80211_is_mgmt(hdr->frame_control)) {
- is_prb_rsp = ieee80211_is_probe_resp(hdr->frame_control);
ret = ath12k_mac_mgmt_tx(ar, skb, is_prb_rsp);
if (ret) {
ath12k_warn(ar->ab, "failed to queue management frame %d\n",
@@ -5000,6 +5275,10 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw,
return;
}
+ /* This is case only for P2P_GO */
+ if (vif->type == NL80211_IFTYPE_AP && vif->p2p)
+ ath12k_mac_add_p2p_noa_ie(ar, vif, skb, is_prb_rsp);
+
ret = ath12k_dp_tx(ar, arvif, skb);
if (ret) {
ath12k_warn(ar->ab, "failed to transmit frame %d\n", ret);
@@ -5018,7 +5297,7 @@ void ath12k_mac_drain_tx(struct ath12k *ar)
static int ath12k_mac_config_mon_status_default(struct ath12k *ar, bool enable)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
/* TODO: Need to support new monitor mode */
}
@@ -5044,14 +5323,12 @@ static void ath12k_mac_wait_reconfigure(struct ath12k_base *ab)
ATH12K_RECONFIGURE_TIMEOUT_HZ);
}
-static int ath12k_mac_op_start(struct ieee80211_hw *hw)
+static int ath12k_mac_start(struct ath12k *ar)
{
- struct ath12k *ar = hw->priv;
struct ath12k_base *ab = ar->ab;
struct ath12k_pdev *pdev = ar->pdev;
int ret;
- ath12k_mac_drain_tx(ar);
mutex_lock(&ar->conf_mutex);
switch (ar->state) {
@@ -5074,14 +5351,14 @@ static int ath12k_mac_op_start(struct ieee80211_hw *hw)
1, pdev->pdev_id);
if (ret) {
- ath12k_err(ar->ab, "failed to enable PMF QOS: (%d\n", ret);
+ ath12k_err(ab, "failed to enable PMF QOS: (%d\n", ret);
goto err;
}
ret = ath12k_wmi_pdev_set_param(ar, WMI_PDEV_PARAM_DYNAMIC_BW, 1,
pdev->pdev_id);
if (ret) {
- ath12k_err(ar->ab, "failed to enable dynamic bw: %d\n", ret);
+ ath12k_err(ab, "failed to enable dynamic bw: %d\n", ret);
goto err;
}
@@ -5111,7 +5388,7 @@ static int ath12k_mac_op_start(struct ieee80211_hw *hw)
1, pdev->pdev_id);
if (ret) {
- ath12k_err(ar->ab, "failed to enable MESH MCAST ENABLE: (%d\n", ret);
+ ath12k_err(ab, "failed to enable MESH MCAST ENABLE: (%d\n", ret);
goto err;
}
@@ -5130,14 +5407,14 @@ static int ath12k_mac_op_start(struct ieee80211_hw *hw)
* such as rssi, rx_duration.
*/
ret = ath12k_mac_config_mon_status_default(ar, true);
- if (ret && (ret != -ENOTSUPP)) {
+ if (ret && (ret != -EOPNOTSUPP)) {
ath12k_err(ab, "failed to configure monitor status ring with default rx_filter: (%d)\n",
ret);
goto err;
}
- if (ret == -ENOTSUPP)
- ath12k_dbg(ar->ab, ATH12K_DBG_MAC,
+ if (ret == -EOPNOTSUPP)
+ ath12k_dbg(ab, ATH12K_DBG_MAC,
"monitor status config is not yet supported");
/* Configure the hash seed for hash based reo dest ring selection */
@@ -5159,7 +5436,6 @@ static int ath12k_mac_op_start(struct ieee80211_hw *hw)
&ab->pdevs[ar->pdev_idx]);
return 0;
-
err:
ar->state = ATH12K_STATE_OFF;
mutex_unlock(&ar->conf_mutex);
@@ -5167,6 +5443,25 @@ err:
return ret;
}
+static int ath12k_mac_op_start(struct ieee80211_hw *hw)
+{
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar = ath12k_ah_to_ar(ah);
+ struct ath12k_base *ab = ar->ab;
+ int ret;
+
+ ath12k_mac_drain_tx(ar);
+
+ ret = ath12k_mac_start(ar);
+ if (ret) {
+ ath12k_err(ab, "fail to start mac operations in pdev idx %d ret %d\n",
+ ar->pdev_idx, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
int ath12k_mac_rfkill_config(struct ath12k *ar)
{
struct ath12k_base *ab = ar->ab;
@@ -5224,17 +5519,14 @@ int ath12k_mac_rfkill_enable_radio(struct ath12k *ar, bool enable)
return 0;
}
-static void ath12k_mac_op_stop(struct ieee80211_hw *hw)
+static void ath12k_mac_stop(struct ath12k *ar)
{
- struct ath12k *ar = hw->priv;
struct htt_ppdu_stats_info *ppdu_stats, *tmp;
int ret;
- ath12k_mac_drain_tx(ar);
-
mutex_lock(&ar->conf_mutex);
ret = ath12k_mac_config_mon_status_default(ar, false);
- if (ret && (ret != -ENOTSUPP))
+ if (ret && (ret != -EOPNOTSUPP))
ath12k_err(ar->ab, "failed to clear rx_filter for monitor status ring: (%d)\n",
ret);
@@ -5260,6 +5552,16 @@ static void ath12k_mac_op_stop(struct ieee80211_hw *hw)
atomic_set(&ar->num_pending_mgmt_tx, 0);
}
+static void ath12k_mac_op_stop(struct ieee80211_hw *hw)
+{
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar = ath12k_ah_to_ar(ah);
+
+ ath12k_mac_drain_tx(ar);
+
+ ath12k_mac_stop(ar);
+}
+
static u8
ath12k_mac_get_vdev_stats_id(struct ath12k_vif *arvif)
{
@@ -5269,7 +5571,7 @@ ath12k_mac_get_vdev_stats_id(struct ath12k_vif *arvif)
do {
if (ab->free_vdev_stats_id_map & (1LL << vdev_stats_id)) {
vdev_stats_id++;
- if (vdev_stats_id <= ATH12K_INVAL_VDEV_STATS_ID) {
+ if (vdev_stats_id >= ATH12K_MAX_VDEV_STATS_ID) {
vdev_stats_id = ATH12K_INVAL_VDEV_STATS_ID;
break;
}
@@ -5376,12 +5678,11 @@ static int ath12k_set_he_mu_sounding_mode(struct ath12k *ar,
return ret;
}
-static void ath12k_mac_op_update_vif_offload(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif)
+static void ath12k_mac_update_vif_offload(struct ath12k_vif *arvif)
{
- struct ath12k *ar = hw->priv;
+ struct ieee80211_vif *vif = arvif->vif;
+ struct ath12k *ar = arvif->ar;
struct ath12k_base *ab = ar->ab;
- struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
u32 param_id, param_value;
int ret;
@@ -5423,11 +5724,20 @@ static void ath12k_mac_op_update_vif_offload(struct ieee80211_hw *hw,
}
}
+static void ath12k_mac_op_update_vif_offload(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif)
+{
+ struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+
+ ath12k_mac_update_vif_offload(arvif);
+}
+
static int ath12k_mac_op_add_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_base *ab = ar->ab;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_base *ab;
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
struct ath12k_wmi_vdev_create_arg vdev_arg = {0};
struct ath12k_wmi_peer_create_arg peer_param;
@@ -5439,6 +5749,9 @@ static int ath12k_mac_op_add_interface(struct ieee80211_hw *hw,
vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD;
+ ar = ath12k_ah_to_ar(ah);
+ ab = ar->ab;
+
mutex_lock(&ar->conf_mutex);
if (vif->type == NL80211_IFTYPE_AP &&
@@ -5483,17 +5796,29 @@ static int ath12k_mac_op_add_interface(struct ieee80211_hw *hw,
case NL80211_IFTYPE_UNSPECIFIED:
case NL80211_IFTYPE_STATION:
arvif->vdev_type = WMI_VDEV_TYPE_STA;
+
+ if (vif->p2p)
+ arvif->vdev_subtype = WMI_VDEV_SUBTYPE_P2P_CLIENT;
+
break;
case NL80211_IFTYPE_MESH_POINT:
arvif->vdev_subtype = WMI_VDEV_SUBTYPE_MESH_11S;
fallthrough;
case NL80211_IFTYPE_AP:
arvif->vdev_type = WMI_VDEV_TYPE_AP;
+
+ if (vif->p2p)
+ arvif->vdev_subtype = WMI_VDEV_SUBTYPE_P2P_GO;
+
break;
case NL80211_IFTYPE_MONITOR:
arvif->vdev_type = WMI_VDEV_TYPE_MONITOR;
ar->monitor_vdev_id = bit;
break;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ arvif->vdev_type = WMI_VDEV_TYPE_STA;
+ arvif->vdev_subtype = WMI_VDEV_SUBTYPE_P2P_DEVICE;
+ break;
default:
WARN_ON(1);
break;
@@ -5526,7 +5851,7 @@ static int ath12k_mac_op_add_interface(struct ieee80211_hw *hw,
list_add(&arvif->list, &ar->arvifs);
spin_unlock_bh(&ar->data_lock);
- ath12k_mac_op_update_vif_offload(hw, vif);
+ ath12k_mac_update_vif_offload(arvif);
nss = hweight32(ar->cfg_tx_chainmask) ? : 1;
ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id,
@@ -5685,12 +6010,16 @@ static void ath12k_mac_vif_unref(struct ath12k_dp *dp, struct ieee80211_vif *vif
static void ath12k_mac_op_remove_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
- struct ath12k_base *ab = ar->ab;
+ struct ath12k_base *ab;
unsigned long time_left;
int ret;
+ ar = ath12k_ah_to_ar(ah);
+ ab = ar->ab;
+
mutex_lock(&ar->conf_mutex);
ath12k_dbg(ab, ATH12K_DBG_MAC, "mac remove interface (vdev %d)\n",
@@ -5766,19 +6095,15 @@ err_vdev_del:
FIF_PROBE_REQ | \
FIF_FCSFAIL)
-static void ath12k_mac_op_configure_filter(struct ieee80211_hw *hw,
- unsigned int changed_flags,
- unsigned int *total_flags,
- u64 multicast)
+static void ath12k_mac_configure_filter(struct ath12k *ar,
+ unsigned int total_flags)
{
- struct ath12k *ar = hw->priv;
bool reset_flag;
int ret;
- mutex_lock(&ar->conf_mutex);
+ lockdep_assert_held(&ar->conf_mutex);
- *total_flags &= SUPPORTED_FILTERS;
- ar->filter_flags = *total_flags;
+ ar->filter_flags = total_flags;
/* For monitor mode */
reset_flag = !(ar->filter_flags & FIF_BCN_PRBRESP_PROMISC);
@@ -5793,16 +6118,36 @@ static void ath12k_mac_op_configure_filter(struct ieee80211_hw *hw,
ath12k_warn(ar->ab,
"fail to set monitor filter: %d\n", ret);
}
+
ath12k_dbg(ar->ab, ATH12K_DBG_MAC,
"total_flags:0x%x, reset_flag:%d\n",
- *total_flags, reset_flag);
+ total_flags, reset_flag);
+}
+
+static void ath12k_mac_op_configure_filter(struct ieee80211_hw *hw,
+ unsigned int changed_flags,
+ unsigned int *total_flags,
+ u64 multicast)
+{
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+
+ ar = ath12k_ah_to_ar(ah);
+
+ mutex_lock(&ar->conf_mutex);
+
+ *total_flags &= SUPPORTED_FILTERS;
+ ath12k_mac_configure_filter(ar, *total_flags);
mutex_unlock(&ar->conf_mutex);
}
static int ath12k_mac_op_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+
+ ar = ath12k_ah_to_ar(ah);
mutex_lock(&ar->conf_mutex);
@@ -5816,9 +6161,12 @@ static int ath12k_mac_op_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *
static int ath12k_mac_op_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
int ret;
+ ar = ath12k_ah_to_ar(ah);
+
mutex_lock(&ar->conf_mutex);
ret = __ath12k_set_antenna(ar, tx_ant, rx_ant);
mutex_unlock(&ar->conf_mutex);
@@ -5826,14 +6174,13 @@ static int ath12k_mac_op_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx
return ret;
}
-static int ath12k_mac_op_ampdu_action(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif,
- struct ieee80211_ampdu_params *params)
+static int ath12k_mac_ampdu_action(struct ath12k_vif *arvif,
+ struct ieee80211_ampdu_params *params)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k *ar = arvif->ar;
int ret = -EINVAL;
- mutex_lock(&ar->conf_mutex);
+ lockdep_assert_held(&ar->conf_mutex);
switch (params->action) {
case IEEE80211_AMPDU_RX_START:
@@ -5854,16 +6201,40 @@ static int ath12k_mac_op_ampdu_action(struct ieee80211_hw *hw,
break;
}
+ return ret;
+}
+
+static int ath12k_mac_op_ampdu_action(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_ampdu_params *params)
+{
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ int ret = -EINVAL;
+
+ ar = ath12k_ah_to_ar(ah);
+
+ mutex_lock(&ar->conf_mutex);
+ ret = ath12k_mac_ampdu_action(arvif, params);
mutex_unlock(&ar->conf_mutex);
+ if (ret)
+ ath12k_warn(ar->ab, "pdev idx %d unable to perform ampdu action %d ret %d\n",
+ ar->pdev_idx, params->action, ret);
+
return ret;
}
static int ath12k_mac_op_add_chanctx(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_base *ab = ar->ab;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_base *ab;
+
+ ar = ath12k_ah_to_ar(ah);
+ ab = ar->ab;
ath12k_dbg(ab, ATH12K_DBG_MAC,
"mac chanctx add freq %u width %d ptr %pK\n",
@@ -5886,8 +6257,12 @@ static int ath12k_mac_op_add_chanctx(struct ieee80211_hw *hw,
static void ath12k_mac_op_remove_chanctx(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_base *ab = ar->ab;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_base *ab;
+
+ ar = ath12k_ah_to_ar(ah);
+ ab = ar->ab;
ath12k_dbg(ab, ATH12K_DBG_MAC,
"mac chanctx remove freq %u width %d ptr %pK\n",
@@ -5995,6 +6370,11 @@ ath12k_mac_vdev_start_restart(struct ath12k_vif *arvif,
arg.pref_tx_streams = ar->num_tx_chains;
arg.pref_rx_streams = ar->num_rx_chains;
+ /* Fill the MBSSID flags to indicate AP is non MBSSID by default
+ * Corresponding flags would be updated with MBSSID support.
+ */
+ arg.mbssid_flags = WMI_VDEV_MBSSID_FLAGS_NON_MBSSID_AP;
+
if (arvif->vdev_type == WMI_VDEV_TYPE_AP) {
arg.ssid = arvif->u.ap.ssid;
arg.ssid_len = arvif->u.ap.ssid_len;
@@ -6071,46 +6451,6 @@ ath12k_mac_vdev_start_restart(struct ath12k_vif *arvif,
return 0;
}
-static int ath12k_mac_vdev_stop(struct ath12k_vif *arvif)
-{
- struct ath12k *ar = arvif->ar;
- int ret;
-
- lockdep_assert_held(&ar->conf_mutex);
-
- reinit_completion(&ar->vdev_setup_done);
-
- ret = ath12k_wmi_vdev_stop(ar, arvif->vdev_id);
- if (ret) {
- ath12k_warn(ar->ab, "failed to stop WMI vdev %i: %d\n",
- arvif->vdev_id, ret);
- goto err;
- }
-
- ret = ath12k_mac_vdev_setup_sync(ar);
- if (ret) {
- ath12k_warn(ar->ab, "failed to synchronize setup for vdev %i: %d\n",
- arvif->vdev_id, ret);
- goto err;
- }
-
- WARN_ON(ar->num_started_vdevs == 0);
-
- ar->num_started_vdevs--;
- ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "vdev %pM stopped, vdev_id %d\n",
- arvif->vif->addr, arvif->vdev_id);
-
- if (test_bit(ATH12K_CAC_RUNNING, &ar->dev_flags)) {
- clear_bit(ATH12K_CAC_RUNNING, &ar->dev_flags);
- ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "CAC Stopped for vdev %d\n",
- arvif->vdev_id);
- }
-
- return 0;
-err:
- return ret;
-}
-
static int ath12k_mac_vdev_start(struct ath12k_vif *arvif,
struct ieee80211_chanctx_conf *ctx)
{
@@ -6215,6 +6555,8 @@ ath12k_mac_update_vif_chan(struct ath12k *ar,
if (WARN_ON(!arvif->is_started))
continue;
+ arvif->punct_bitmap = vifs[i].new_ctx->def.punctured;
+
/* Firmware expect vdev_restart only if vdev is up.
* If vdev is down then it expect vdev_stop->vdev_start.
*/
@@ -6266,7 +6608,7 @@ ath12k_mac_update_active_vif_chan(struct ath12k *ar,
struct ieee80211_chanctx_conf *ctx)
{
struct ath12k_mac_change_chanctx_arg arg = { .ctx = ctx };
- struct ieee80211_hw *hw = ar->hw;
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
lockdep_assert_held(&ar->conf_mutex);
@@ -6295,8 +6637,12 @@ static void ath12k_mac_op_change_chanctx(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx,
u32 changed)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_base *ab = ar->ab;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_base *ab;
+
+ ar = ath12k_ah_to_ar(ah);
+ ab = ar->ab;
mutex_lock(&ar->conf_mutex);
@@ -6311,7 +6657,8 @@ static void ath12k_mac_op_change_chanctx(struct ieee80211_hw *hw,
goto unlock;
if (changed & IEEE80211_CHANCTX_CHANGE_WIDTH ||
- changed & IEEE80211_CHANCTX_CHANGE_RADAR)
+ changed & IEEE80211_CHANCTX_CHANGE_RADAR ||
+ changed & IEEE80211_CHANCTX_CHANGE_PUNCTURING)
ath12k_mac_update_active_vif_chan(ar, ctx);
/* TODO: Recalc radar detection */
@@ -6320,12 +6667,11 @@ unlock:
mutex_unlock(&ar->conf_mutex);
}
-static int ath12k_start_vdev_delay(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif)
+static int ath12k_start_vdev_delay(struct ath12k *ar,
+ struct ath12k_vif *arvif)
{
- struct ath12k *ar = hw->priv;
struct ath12k_base *ab = ar->ab;
- struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ struct ieee80211_vif *vif = arvif->vif;
int ret;
if (WARN_ON(arvif->is_started))
@@ -6359,19 +6705,23 @@ ath12k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
struct ieee80211_bss_conf *link_conf,
struct ieee80211_chanctx_conf *ctx)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_base *ab = ar->ab;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_base *ab;
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
int ret;
struct ath12k_wmi_peer_create_arg param;
+ ar = ath12k_ah_to_ar(ah);
+ ab = ar->ab;
+
mutex_lock(&ar->conf_mutex);
ath12k_dbg(ab, ATH12K_DBG_MAC,
"mac chanctx assign ptr %pK vdev_id %i\n",
ctx, arvif->vdev_id);
- arvif->punct_bitmap = link_conf->eht_puncturing;
+ arvif->punct_bitmap = ctx->def.punctured;
/* for some targets bss peer must be created before vdev_start */
if (ab->hw_params->vdev_start_delay &&
@@ -6438,11 +6788,15 @@ ath12k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
struct ieee80211_bss_conf *link_conf,
struct ieee80211_chanctx_conf *ctx)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_base *ab = ar->ab;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_base *ab;
struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
int ret;
+ ar = ath12k_ah_to_ar(ah);
+ ab = ar->ab;
+
mutex_lock(&ar->conf_mutex);
ath12k_dbg(ab, ATH12K_DBG_MAC,
@@ -6466,11 +6820,13 @@ ath12k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
arvif->is_started = false;
}
- ret = ath12k_mac_vdev_stop(arvif);
- if (ret)
- ath12k_warn(ab, "failed to stop vdev %i: %d\n",
- arvif->vdev_id, ret);
-
+ if (arvif->vdev_type != WMI_VDEV_TYPE_STA) {
+ ath12k_bss_disassoc(ar, arvif);
+ ret = ath12k_mac_vdev_stop(arvif);
+ if (ret)
+ ath12k_warn(ab, "failed to stop vdev %i: %d\n",
+ arvif->vdev_id, ret);
+ }
arvif->is_started = false;
if (ab->hw_params->vdev_start_delay &&
@@ -6490,7 +6846,10 @@ ath12k_mac_op_switch_vif_chanctx(struct ieee80211_hw *hw,
int n_vifs,
enum ieee80211_chanctx_switch_mode mode)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+
+ ar = ath12k_ah_to_ar(ah);
mutex_lock(&ar->conf_mutex);
@@ -6532,10 +6891,15 @@ ath12k_set_vdev_param_to_all_vifs(struct ath12k *ar, int param, u32 value)
*/
static int ath12k_mac_op_set_rts_threshold(struct ieee80211_hw *hw, u32 value)
{
- struct ath12k *ar = hw->priv;
- int param_id = WMI_VDEV_PARAM_RTS_THRESHOLD;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ int param_id = WMI_VDEV_PARAM_RTS_THRESHOLD, ret;
+
+ ar = ath12k_ah_to_ar(ah);
+
+ ret = ath12k_set_vdev_param_to_all_vifs(ar, param_id, value);
- return ath12k_set_vdev_param_to_all_vifs(ar, param_id, value);
+ return ret;
}
static int ath12k_mac_op_set_frag_threshold(struct ieee80211_hw *hw, u32 value)
@@ -6553,15 +6917,10 @@ static int ath12k_mac_op_set_frag_threshold(struct ieee80211_hw *hw, u32 value)
return -EOPNOTSUPP;
}
-static void ath12k_mac_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
- u32 queues, bool drop)
+static void ath12k_mac_flush(struct ath12k *ar)
{
- struct ath12k *ar = hw->priv;
long time_left;
- if (drop)
- return;
-
time_left = wait_event_timeout(ar->dp.tx_empty_waitq,
(atomic_read(&ar->dp.num_tx_pending) == 0),
ATH12K_FLUSH_TIMEOUT);
@@ -6576,6 +6935,18 @@ static void ath12k_mac_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *v
time_left);
}
+static void ath12k_mac_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
+{
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar = ath12k_ah_to_ar(ah);
+
+ if (drop)
+ return;
+
+ ath12k_mac_flush(ar);
+}
+
static int
ath12k_mac_bitrate_mask_num_ht_rates(struct ath12k *ar,
enum nl80211_band band,
@@ -6778,7 +7149,7 @@ static void ath12k_mac_set_bitrate_mask_iter(void *data,
arsta->changed |= IEEE80211_RC_SUPP_RATES_CHANGED;
spin_unlock_bh(&ar->data_lock);
- ieee80211_queue_work(ar->hw, &arsta->update_wk);
+ ieee80211_queue_work(ath12k_ar_to_hw(ar), &arsta->update_wk);
}
static void ath12k_mac_disable_peer_fixed_rate(void *data,
@@ -6826,8 +7197,10 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw,
ldpc = !!(ar->ht_cap_info & WMI_HT_CAP_LDPC);
sgi = mask->control[band].gi;
- if (sgi == NL80211_TXRATE_FORCE_LGI)
- return -EINVAL;
+ if (sgi == NL80211_TXRATE_FORCE_LGI) {
+ ret = -EINVAL;
+ goto out;
+ }
/* mac80211 doesn't support sending a fixed HT/VHT MCS alone, rather it
* requires passing at least one of used basic rates along with them.
@@ -6843,7 +7216,7 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw,
if (ret) {
ath12k_warn(ar->ab, "failed to get single legacy rate for vdev %i: %d\n",
arvif->vdev_id, ret);
- return ret;
+ goto out;
}
ieee80211_iterate_stations_atomic(hw,
ath12k_mac_disable_peer_fixed_rate,
@@ -6888,7 +7261,8 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw,
*/
ath12k_warn(ar->ab,
"Setting more than one MCS Value in bitrate mask not supported\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
ieee80211_iterate_stations_atomic(hw,
@@ -6915,6 +7289,7 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw,
mutex_unlock(&ar->conf_mutex);
+out:
return ret;
}
@@ -6922,14 +7297,18 @@ static void
ath12k_mac_op_reconfig_complete(struct ieee80211_hw *hw,
enum ieee80211_reconfig_type reconfig_type)
{
- struct ath12k *ar = hw->priv;
- struct ath12k_base *ab = ar->ab;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+ struct ath12k_base *ab;
struct ath12k_vif *arvif;
int recovery_count;
if (reconfig_type != IEEE80211_RECONFIG_TYPE_RESTART)
return;
+ ar = ath12k_ah_to_ar(ah);
+ ab = ar->ab;
+
mutex_lock(&ar->conf_mutex);
if (ar->state == ATH12K_STATE_RESTARTED) {
@@ -7013,7 +7392,8 @@ ath12k_mac_update_bss_chan_survey(struct ath12k *ar,
static int ath12k_mac_op_get_survey(struct ieee80211_hw *hw, int idx,
struct survey_info *survey)
{
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
struct ieee80211_supported_band *sband;
struct survey_info *ar_survey;
int ret = 0;
@@ -7021,6 +7401,8 @@ static int ath12k_mac_op_get_survey(struct ieee80211_hw *hw, int idx,
if (idx >= ATH12K_NUM_CHANS)
return -ENOENT;
+ ar = ath12k_ah_to_ar(ah);
+
ar_survey = &ar->survey[idx];
mutex_lock(&ar->conf_mutex);
@@ -7052,6 +7434,7 @@ static int ath12k_mac_op_get_survey(struct ieee80211_hw *hw, int idx,
exit:
mutex_unlock(&ar->conf_mutex);
+
return ret;
}
@@ -7089,6 +7472,125 @@ static void ath12k_mac_op_sta_statistics(struct ieee80211_hw *hw,
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
}
+static int ath12k_mac_op_cancel_remain_on_channel(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif)
+{
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar;
+
+ ar = ath12k_ah_to_ar(ah);
+
+ mutex_lock(&ar->conf_mutex);
+
+ spin_lock_bh(&ar->data_lock);
+ ar->scan.roc_notify = false;
+ spin_unlock_bh(&ar->data_lock);
+
+ ath12k_scan_abort(ar);
+
+ mutex_unlock(&ar->conf_mutex);
+
+ cancel_delayed_work_sync(&ar->scan.timeout);
+
+ return 0;
+}
+
+static int ath12k_mac_op_remain_on_channel(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_channel *chan,
+ int duration,
+ enum ieee80211_roc_type type)
+{
+ struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k_wmi_scan_req_arg arg;
+ struct ath12k *ar;
+ u32 scan_time_msec;
+ int ret;
+
+ ar = ath12k_ah_to_ar(ah);
+
+ mutex_lock(&ar->conf_mutex);
+ spin_lock_bh(&ar->data_lock);
+
+ switch (ar->scan.state) {
+ case ATH12K_SCAN_IDLE:
+ reinit_completion(&ar->scan.started);
+ reinit_completion(&ar->scan.completed);
+ reinit_completion(&ar->scan.on_channel);
+ ar->scan.state = ATH12K_SCAN_STARTING;
+ ar->scan.is_roc = true;
+ ar->scan.vdev_id = arvif->vdev_id;
+ ar->scan.roc_freq = chan->center_freq;
+ ar->scan.roc_notify = true;
+ ret = 0;
+ break;
+ case ATH12K_SCAN_STARTING:
+ case ATH12K_SCAN_RUNNING:
+ case ATH12K_SCAN_ABORTING:
+ ret = -EBUSY;
+ break;
+ }
+
+ spin_unlock_bh(&ar->data_lock);
+
+ if (ret)
+ goto exit;
+
+ scan_time_msec = hw->wiphy->max_remain_on_channel_duration * 2;
+
+ memset(&arg, 0, sizeof(arg));
+ ath12k_wmi_start_scan_init(ar, &arg);
+ arg.num_chan = 1;
+ arg.chan_list = kcalloc(arg.num_chan, sizeof(*arg.chan_list),
+ GFP_KERNEL);
+ if (!arg.chan_list) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ arg.vdev_id = arvif->vdev_id;
+ arg.scan_id = ATH12K_SCAN_ID;
+ arg.chan_list[0] = chan->center_freq;
+ arg.dwell_time_active = scan_time_msec;
+ arg.dwell_time_passive = scan_time_msec;
+ arg.max_scan_time = scan_time_msec;
+ arg.scan_f_passive = 1;
+ arg.burst_duration = duration;
+
+ ret = ath12k_start_scan(ar, &arg);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to start roc scan: %d\n", ret);
+
+ spin_lock_bh(&ar->data_lock);
+ ar->scan.state = ATH12K_SCAN_IDLE;
+ spin_unlock_bh(&ar->data_lock);
+ goto free_chan_list;
+ }
+
+ ret = wait_for_completion_timeout(&ar->scan.on_channel, 3 * HZ);
+ if (ret == 0) {
+ ath12k_warn(ar->ab, "failed to switch to channel for roc scan\n");
+ ret = ath12k_scan_stop(ar);
+ if (ret)
+ ath12k_warn(ar->ab, "failed to stop scan: %d\n", ret);
+ ret = -ETIMEDOUT;
+ goto free_chan_list;
+ }
+
+ ieee80211_queue_delayed_work(hw, &ar->scan.timeout,
+ msecs_to_jiffies(duration));
+
+ ret = 0;
+
+free_chan_list:
+ kfree(arg.chan_list);
+exit:
+ mutex_unlock(&ar->conf_mutex);
+
+ return ret;
+}
+
static const struct ieee80211_ops ath12k_ops = {
.tx = ath12k_mac_op_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
@@ -7123,6 +7625,8 @@ static const struct ieee80211_ops ath12k_ops = {
.get_survey = ath12k_mac_op_get_survey,
.flush = ath12k_mac_op_flush,
.sta_statistics = ath12k_mac_op_sta_statistics,
+ .remain_on_channel = ath12k_mac_op_remain_on_channel,
+ .cancel_remain_on_channel = ath12k_mac_op_cancel_remain_on_channel,
};
static void ath12k_mac_update_ch_list(struct ath12k *ar,
@@ -7158,9 +7662,9 @@ static u32 ath12k_get_phy_id(struct ath12k *ar, u32 band)
}
static int ath12k_mac_setup_channels_rates(struct ath12k *ar,
- u32 supported_bands)
+ u32 supported_bands,
+ struct ieee80211_supported_band *bands[])
{
- struct ieee80211_hw *hw = ar->hw;
struct ieee80211_supported_band *band;
struct ath12k_wmi_hal_reg_capabilities_ext_arg *reg_cap;
void *channels;
@@ -7186,7 +7690,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar,
band->channels = channels;
band->n_bitrates = ath12k_g_rates_size;
band->bitrates = ath12k_g_rates;
- hw->wiphy->bands[NL80211_BAND_2GHZ] = band;
+ bands[NL80211_BAND_2GHZ] = band;
if (ar->ab->hw_params->single_pdev_only) {
phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_2G_CAP);
@@ -7198,7 +7702,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar,
}
if (supported_bands & WMI_HOST_WLAN_5G_CAP) {
- if (reg_cap->high_5ghz_chan >= ATH12K_MAX_6G_FREQ) {
+ if (reg_cap->high_5ghz_chan >= ATH12K_MIN_6G_FREQ) {
channels = kmemdup(ath12k_6ghz_channels,
sizeof(ath12k_6ghz_channels), GFP_KERNEL);
if (!channels) {
@@ -7213,7 +7717,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar,
band->channels = channels;
band->n_bitrates = ath12k_a_rates_size;
band->bitrates = ath12k_a_rates;
- hw->wiphy->bands[NL80211_BAND_6GHZ] = band;
+ bands[NL80211_BAND_6GHZ] = band;
ath12k_mac_update_ch_list(ar, band,
reg_cap->low_5ghz_chan,
reg_cap->high_5ghz_chan);
@@ -7235,7 +7739,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar,
band->channels = channels;
band->n_bitrates = ath12k_a_rates_size;
band->bitrates = ath12k_a_rates;
- hw->wiphy->bands[NL80211_BAND_5GHZ] = band;
+ bands[NL80211_BAND_5GHZ] = band;
if (ar->ab->hw_params->single_pdev_only) {
phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_5G_CAP);
@@ -7251,28 +7755,59 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar,
return 0;
}
-static int ath12k_mac_setup_iface_combinations(struct ath12k *ar)
+static u16 ath12k_mac_get_ifmodes(struct ath12k_hw *ah)
{
- struct ath12k_base *ab = ar->ab;
- struct ieee80211_hw *hw = ar->hw;
- struct wiphy *wiphy = hw->wiphy;
+ struct ath12k *ar = ath12k_ah_to_ar(ah);
+ u16 interface_modes = U16_MAX;
+
+ interface_modes &= ar->ab->hw_params->interface_modes;
+
+ return interface_modes == U16_MAX ? 0 : interface_modes;
+}
+
+static bool ath12k_mac_is_iface_mode_enable(struct ath12k_hw *ah,
+ enum nl80211_iftype type)
+{
+ struct ath12k *ar = ath12k_ah_to_ar(ah);
+ u16 interface_modes, mode;
+ bool is_enable = true;
+
+ mode = BIT(type);
+
+ interface_modes = ar->ab->hw_params->interface_modes;
+ if (!(interface_modes & mode))
+ is_enable = false;
+
+ return is_enable;
+}
+
+static int ath12k_mac_setup_iface_combinations(struct ath12k_hw *ah)
+{
+ struct wiphy *wiphy = ah->hw->wiphy;
struct ieee80211_iface_combination *combinations;
struct ieee80211_iface_limit *limits;
int n_limits, max_interfaces;
- bool ap, mesh;
+ bool ap, mesh, p2p;
- ap = ab->hw_params->interface_modes & BIT(NL80211_IFTYPE_AP);
+ ap = ath12k_mac_is_iface_mode_enable(ah, NL80211_IFTYPE_AP);
+ p2p = ath12k_mac_is_iface_mode_enable(ah, NL80211_IFTYPE_P2P_DEVICE);
mesh = IS_ENABLED(CONFIG_MAC80211_MESH) &&
- ab->hw_params->interface_modes & BIT(NL80211_IFTYPE_MESH_POINT);
+ ath12k_mac_is_iface_mode_enable(ah, NL80211_IFTYPE_MESH_POINT);
combinations = kzalloc(sizeof(*combinations), GFP_KERNEL);
if (!combinations)
return -ENOMEM;
- if (ap || mesh) {
+ if ((ap || mesh) && !p2p) {
n_limits = 2;
max_interfaces = 16;
+ } else if (p2p) {
+ n_limits = 3;
+ if (ap || mesh)
+ max_interfaces = 16;
+ else
+ max_interfaces = 3;
} else {
n_limits = 1;
max_interfaces = 1;
@@ -7287,14 +7822,22 @@ static int ath12k_mac_setup_iface_combinations(struct ath12k *ar)
limits[0].max = 1;
limits[0].types |= BIT(NL80211_IFTYPE_STATION);
- if (ap) {
+ if (ap || mesh || p2p)
limits[1].max = max_interfaces;
+
+ if (ap)
limits[1].types |= BIT(NL80211_IFTYPE_AP);
- }
if (mesh)
limits[1].types |= BIT(NL80211_IFTYPE_MESH_POINT);
+ if (p2p) {
+ limits[1].types |= BIT(NL80211_IFTYPE_P2P_CLIENT) |
+ BIT(NL80211_IFTYPE_P2P_GO);
+ limits[2].max = 1;
+ limits[2].types |= BIT(NL80211_IFTYPE_P2P_DEVICE);
+ }
+
combinations[0].limits = limits;
combinations[0].n_limits = n_limits;
combinations[0].max_interfaces = max_interfaces;
@@ -7349,21 +7892,27 @@ static const struct wiphy_iftype_ext_capab ath12k_iftypes_ext_capa[] = {
},
};
-static void __ath12k_mac_unregister(struct ath12k *ar)
+static void ath12k_mac_cleanup_unregister(struct ath12k *ar)
{
- struct ieee80211_hw *hw = ar->hw;
- struct wiphy *wiphy = hw->wiphy;
-
- cancel_work_sync(&ar->regd_update_work);
-
- ieee80211_unregister_hw(hw);
-
idr_for_each(&ar->txmgmt_idr, ath12k_mac_tx_mgmt_pending_free, ar);
idr_destroy(&ar->txmgmt_idr);
kfree(ar->mac.sbands[NL80211_BAND_2GHZ].channels);
kfree(ar->mac.sbands[NL80211_BAND_5GHZ].channels);
kfree(ar->mac.sbands[NL80211_BAND_6GHZ].channels);
+}
+
+static void ath12k_mac_hw_unregister(struct ath12k_hw *ah)
+{
+ struct ieee80211_hw *hw = ah->hw;
+ struct wiphy *wiphy = hw->wiphy;
+ struct ath12k *ar = ath12k_ah_to_ar(ah);
+
+ cancel_work_sync(&ar->regd_update_work);
+
+ ieee80211_unregister_hw(hw);
+
+ ath12k_mac_cleanup_unregister(ar);
kfree(wiphy->iface_combinations[0].limits);
kfree(wiphy->iface_combinations);
@@ -7371,28 +7920,42 @@ static void __ath12k_mac_unregister(struct ath12k *ar)
SET_IEEE80211_DEV(hw, NULL);
}
-void ath12k_mac_unregister(struct ath12k_base *ab)
+static int ath12k_mac_setup_register(struct ath12k *ar,
+ u32 *ht_cap,
+ struct ieee80211_supported_band *bands[])
{
- struct ath12k *ar;
- struct ath12k_pdev *pdev;
- int i;
+ struct ath12k_pdev_cap *cap = &ar->pdev->cap;
+ int ret;
- for (i = 0; i < ab->num_radios; i++) {
- pdev = &ab->pdevs[i];
- ar = pdev->ar;
- if (!ar)
- continue;
+ init_waitqueue_head(&ar->txmgmt_empty_waitq);
+ idr_init(&ar->txmgmt_idr);
+ spin_lock_init(&ar->txmgmt_idr_lock);
- __ath12k_mac_unregister(ar);
- }
+ ath12k_pdev_caps_update(ar);
+
+ ret = ath12k_mac_setup_channels_rates(ar,
+ cap->supported_bands,
+ bands);
+ if (ret)
+ return ret;
+
+ ath12k_mac_setup_ht_vht_cap(ar, cap, ht_cap);
+ ath12k_mac_setup_sband_iftype_data(ar, cap);
+
+ ar->max_num_stations = ath12k_core_get_max_station_per_radio(ar->ab);
+ ar->max_num_peers = ath12k_core_get_max_peers_per_radio(ar->ab);
+
+ return 0;
}
-static int __ath12k_mac_register(struct ath12k *ar)
+static int ath12k_mac_hw_register(struct ath12k_hw *ah)
{
- struct ath12k_base *ab = ar->ab;
- struct ieee80211_hw *hw = ar->hw;
+ struct ieee80211_hw *hw = ah->hw;
struct wiphy *wiphy = hw->wiphy;
- struct ath12k_pdev_cap *cap = &ar->pdev->cap;
+ struct ath12k *ar = ath12k_ah_to_ar(ah);
+ struct ath12k_base *ab = ar->ab;
+ struct ath12k_pdev *pdev;
+ struct ath12k_pdev_cap *cap;
static const u32 cipher_suites[] = {
WLAN_CIPHER_SUITE_TKIP,
WLAN_CIPHER_SUITE_CCMP,
@@ -7407,30 +7970,34 @@ static int __ath12k_mac_register(struct ath12k *ar)
int ret;
u32 ht_cap = 0;
- ath12k_pdev_caps_update(ar);
+ pdev = ar->pdev;
- SET_IEEE80211_PERM_ADDR(hw, ar->mac_addr);
-
- SET_IEEE80211_DEV(hw, ab->dev);
+ if (ab->pdevs_macaddr_valid)
+ ether_addr_copy(ar->mac_addr, pdev->mac_addr);
+ else
+ ether_addr_copy(ar->mac_addr, ab->mac_addr);
- ret = ath12k_mac_setup_channels_rates(ar,
- cap->supported_bands);
+ ret = ath12k_mac_setup_register(ar, &ht_cap, hw->wiphy->bands);
if (ret)
- goto err;
+ goto out;
- ath12k_mac_setup_ht_vht_cap(ar, cap, &ht_cap);
- ath12k_mac_setup_sband_iftype_data(ar, cap);
+ wiphy->max_ap_assoc_sta = ar->max_num_stations;
- ret = ath12k_mac_setup_iface_combinations(ar);
- if (ret) {
- ath12k_err(ar->ab, "failed to setup interface combinations: %d\n", ret);
- goto err_free_channels;
- }
+ cap = &pdev->cap;
wiphy->available_antennas_rx = cap->rx_chain_mask;
wiphy->available_antennas_tx = cap->tx_chain_mask;
- wiphy->interface_modes = ab->hw_params->interface_modes;
+ SET_IEEE80211_PERM_ADDR(hw, ar->mac_addr);
+ SET_IEEE80211_DEV(hw, ab->dev);
+
+ ret = ath12k_mac_setup_iface_combinations(ah);
+ if (ret) {
+ ath12k_err(ab, "failed to setup interface combinations: %d\n", ret);
+ goto err_cleanup_unregister;
+ }
+
+ wiphy->interface_modes = ath12k_mac_get_ifmodes(ah);
if (wiphy->bands[NL80211_BAND_2GHZ] &&
wiphy->bands[NL80211_BAND_5GHZ] &&
@@ -7483,15 +8050,10 @@ static int __ath12k_mac_register(struct ath12k *ar)
wiphy->features |= NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE |
NL80211_FEATURE_AP_SCAN;
- ar->max_num_stations = TARGET_NUM_STATIONS;
- ar->max_num_peers = TARGET_NUM_PEERS_PDEV;
-
- wiphy->max_ap_assoc_sta = ar->max_num_stations;
-
hw->queues = ATH12K_HW_MAX_QUEUES;
wiphy->tx_queue_len = ATH12K_QUEUE_LEN;
hw->offchannel_tx_hw_queue = ATH12K_HW_MAX_QUEUES - 1;
- hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HE;
+ hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_EHT;
hw->vif_data_size = sizeof(struct ath12k_vif);
hw->sta_data_size = sizeof(struct ath12k_sta);
@@ -7524,7 +8086,7 @@ static int __ath12k_mac_register(struct ath12k *ar)
ret = ieee80211_register_hw(hw);
if (ret) {
- ath12k_err(ar->ab, "ieee80211 registration failed: %d\n", ret);
+ ath12k_err(ab, "ieee80211 registration failed: %d\n", ret);
goto err_free_if_combs;
}
@@ -7552,142 +8114,213 @@ err_free_if_combs:
kfree(wiphy->iface_combinations[0].limits);
kfree(wiphy->iface_combinations);
-err_free_channels:
- kfree(ar->mac.sbands[NL80211_BAND_2GHZ].channels);
- kfree(ar->mac.sbands[NL80211_BAND_5GHZ].channels);
- kfree(ar->mac.sbands[NL80211_BAND_6GHZ].channels);
+err_cleanup_unregister:
+ ath12k_mac_cleanup_unregister(ar);
-err:
+out:
SET_IEEE80211_DEV(hw, NULL);
+
return ret;
}
+static void ath12k_mac_setup(struct ath12k *ar)
+{
+ struct ath12k_base *ab = ar->ab;
+ struct ath12k_pdev *pdev = ar->pdev;
+ u8 pdev_idx = ar->pdev_idx;
+
+ ar->lmac_id = ath12k_hw_get_mac_from_pdev_id(ab->hw_params, pdev_idx);
+
+ ar->wmi = &ab->wmi_ab.wmi[pdev_idx];
+ /* FIXME: wmi[0] is already initialized during attach,
+ * Should we do this again?
+ */
+ ath12k_wmi_pdev_attach(ab, pdev_idx);
+
+ ar->cfg_tx_chainmask = pdev->cap.tx_chain_mask;
+ ar->cfg_rx_chainmask = pdev->cap.rx_chain_mask;
+ ar->num_tx_chains = hweight32(pdev->cap.tx_chain_mask);
+ ar->num_rx_chains = hweight32(pdev->cap.rx_chain_mask);
+
+ spin_lock_init(&ar->data_lock);
+ INIT_LIST_HEAD(&ar->arvifs);
+ INIT_LIST_HEAD(&ar->ppdu_stats_info);
+ mutex_init(&ar->conf_mutex);
+ init_completion(&ar->vdev_setup_done);
+ init_completion(&ar->vdev_delete_done);
+ init_completion(&ar->peer_assoc_done);
+ init_completion(&ar->peer_delete_done);
+ init_completion(&ar->install_key_done);
+ init_completion(&ar->bss_survey_done);
+ init_completion(&ar->scan.started);
+ init_completion(&ar->scan.completed);
+ init_completion(&ar->scan.on_channel);
+
+ INIT_DELAYED_WORK(&ar->scan.timeout, ath12k_scan_timeout_work);
+ INIT_WORK(&ar->regd_update_work, ath12k_regd_update_work);
+
+ INIT_WORK(&ar->wmi_mgmt_tx_work, ath12k_mgmt_over_wmi_tx_work);
+ skb_queue_head_init(&ar->wmi_mgmt_tx_queue);
+ clear_bit(ATH12K_FLAG_MONITOR_ENABLED, &ar->monitor_flags);
+}
+
int ath12k_mac_register(struct ath12k_base *ab)
{
- struct ath12k *ar;
- struct ath12k_pdev *pdev;
+ struct ath12k_hw *ah;
int i;
int ret;
if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags))
return 0;
- for (i = 0; i < ab->num_radios; i++) {
- pdev = &ab->pdevs[i];
- ar = pdev->ar;
- if (ab->pdevs_macaddr_valid) {
- ether_addr_copy(ar->mac_addr, pdev->mac_addr);
- } else {
- ether_addr_copy(ar->mac_addr, ab->mac_addr);
- ar->mac_addr[4] += i;
- }
-
- ret = __ath12k_mac_register(ar);
- if (ret)
- goto err_cleanup;
-
- init_waitqueue_head(&ar->txmgmt_empty_waitq);
- idr_init(&ar->txmgmt_idr);
- spin_lock_init(&ar->txmgmt_idr_lock);
- }
-
/* Initialize channel counters frequency value in hertz */
ab->cc_freq_hz = 320000;
ab->free_vdev_map = (1LL << (ab->num_radios * TARGET_NUM_VDEVS)) - 1;
+ for (i = 0; i < ab->num_hw; i++) {
+ ah = ab->ah[i];
+
+ ret = ath12k_mac_hw_register(ah);
+ if (ret)
+ goto err;
+ }
+
return 0;
-err_cleanup:
+err:
for (i = i - 1; i >= 0; i--) {
- pdev = &ab->pdevs[i];
- ar = pdev->ar;
- __ath12k_mac_unregister(ar);
+ ah = ab->ah[i];
+ if (!ah)
+ continue;
+
+ ath12k_mac_hw_unregister(ah);
}
return ret;
}
-int ath12k_mac_allocate(struct ath12k_base *ab)
+void ath12k_mac_unregister(struct ath12k_base *ab)
+{
+ struct ath12k_hw *ah;
+ int i;
+
+ for (i = ab->num_hw - 1; i >= 0; i--) {
+ ah = ab->ah[i];
+ if (!ah)
+ continue;
+
+ ath12k_mac_hw_unregister(ah);
+ }
+}
+
+static void ath12k_mac_hw_destroy(struct ath12k_hw *ah)
+{
+ ieee80211_free_hw(ah->hw);
+}
+
+static struct ath12k_hw *ath12k_mac_hw_allocate(struct ath12k_base *ab,
+ struct ath12k_pdev_map *pdev_map,
+ u8 num_pdev_map)
{
struct ieee80211_hw *hw;
struct ath12k *ar;
struct ath12k_pdev *pdev;
- int ret;
+ struct ath12k_hw *ah;
int i;
+ u8 pdev_idx;
- if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags))
- return 0;
+ hw = ieee80211_alloc_hw(struct_size(ah, radio, num_pdev_map),
+ &ath12k_ops);
+ if (!hw)
+ return NULL;
- for (i = 0; i < ab->num_radios; i++) {
- pdev = &ab->pdevs[i];
- hw = ieee80211_alloc_hw(sizeof(struct ath12k), &ath12k_ops);
- if (!hw) {
- ath12k_warn(ab, "failed to allocate mac80211 hw device\n");
- ret = -ENOMEM;
- goto err_free_mac;
- }
+ ah = ath12k_hw_to_ah(hw);
+ ah->hw = hw;
+ ah->num_radio = num_pdev_map;
+
+ for (i = 0; i < num_pdev_map; i++) {
+ ab = pdev_map[i].ab;
+ pdev_idx = pdev_map[i].pdev_idx;
+ pdev = &ab->pdevs[pdev_idx];
- ar = hw->priv;
- ar->hw = hw;
+ ar = ath12k_ah_to_ar(ah);
+ ar->ah = ah;
ar->ab = ab;
+ ar->hw_link_id = i;
ar->pdev = pdev;
- ar->pdev_idx = i;
- ar->lmac_id = ath12k_hw_get_mac_from_pdev_id(ab->hw_params, i);
-
- ar->wmi = &ab->wmi_ab.wmi[i];
- /* FIXME: wmi[0] is already initialized during attach,
- * Should we do this again?
- */
- ath12k_wmi_pdev_attach(ab, i);
-
- ar->cfg_tx_chainmask = pdev->cap.tx_chain_mask;
- ar->cfg_rx_chainmask = pdev->cap.rx_chain_mask;
- ar->num_tx_chains = hweight32(pdev->cap.tx_chain_mask);
- ar->num_rx_chains = hweight32(pdev->cap.rx_chain_mask);
-
+ ar->pdev_idx = pdev_idx;
pdev->ar = ar;
- spin_lock_init(&ar->data_lock);
- INIT_LIST_HEAD(&ar->arvifs);
- INIT_LIST_HEAD(&ar->ppdu_stats_info);
- mutex_init(&ar->conf_mutex);
- init_completion(&ar->vdev_setup_done);
- init_completion(&ar->vdev_delete_done);
- init_completion(&ar->peer_assoc_done);
- init_completion(&ar->peer_delete_done);
- init_completion(&ar->install_key_done);
- init_completion(&ar->bss_survey_done);
- init_completion(&ar->scan.started);
- init_completion(&ar->scan.completed);
-
- INIT_DELAYED_WORK(&ar->scan.timeout, ath12k_scan_timeout_work);
- INIT_WORK(&ar->regd_update_work, ath12k_regd_update_work);
-
- INIT_WORK(&ar->wmi_mgmt_tx_work, ath12k_mgmt_over_wmi_tx_work);
- skb_queue_head_init(&ar->wmi_mgmt_tx_queue);
- clear_bit(ATH12K_FLAG_MONITOR_ENABLED, &ar->monitor_flags);
- }
- return 0;
-
-err_free_mac:
- ath12k_mac_destroy(ab);
+ ath12k_mac_setup(ar);
+ }
- return ret;
+ return ah;
}
void ath12k_mac_destroy(struct ath12k_base *ab)
{
- struct ath12k *ar;
struct ath12k_pdev *pdev;
int i;
for (i = 0; i < ab->num_radios; i++) {
pdev = &ab->pdevs[i];
- ar = pdev->ar;
- if (!ar)
+ if (!pdev->ar)
continue;
- ieee80211_free_hw(ar->hw);
pdev->ar = NULL;
}
+
+ for (i = 0; i < ab->num_hw; i++) {
+ if (!ab->ah[i])
+ continue;
+
+ ath12k_mac_hw_destroy(ab->ah[i]);
+ ab->ah[i] = NULL;
+ }
+}
+
+int ath12k_mac_allocate(struct ath12k_base *ab)
+{
+ struct ath12k_hw *ah;
+ struct ath12k_pdev_map pdev_map[MAX_RADIOS];
+ int ret, i, j;
+ u8 radio_per_hw;
+
+ if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags))
+ return 0;
+
+ ab->num_hw = ab->num_radios;
+ radio_per_hw = 1;
+
+ for (i = 0; i < ab->num_hw; i++) {
+ for (j = 0; j < radio_per_hw; j++) {
+ pdev_map[j].ab = ab;
+ pdev_map[j].pdev_idx = (i * radio_per_hw) + j;
+ }
+
+ ah = ath12k_mac_hw_allocate(ab, pdev_map, radio_per_hw);
+ if (!ah) {
+ ath12k_warn(ab, "failed to allocate mac80211 hw device for hw_idx %d\n",
+ i);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ab->ah[i] = ah;
+ }
+
+ ath12k_dp_pdev_pre_alloc(ab);
+
+ return 0;
+
+err:
+ for (i = i - 1; i >= 0; i--) {
+ if (!ab->ah[i])
+ continue;
+
+ ath12k_mac_hw_destroy(ab->ah[i]);
+ ab->ah[i] = NULL;
+ }
+
+ return ret;
}
diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h
index 7c63bb628adc..3f5e1be0dff9 100644
--- a/drivers/net/wireless/ath/ath12k/mac.h
+++ b/drivers/net/wireless/ath/ath12k/mac.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH12K_MAC_H
@@ -12,6 +12,8 @@
struct ath12k;
struct ath12k_base;
+struct ath12k_hw;
+struct ath12k_pdev_map;
struct ath12k_generic_iter {
struct ath12k *ar;
diff --git a/drivers/net/wireless/ath/ath12k/mhi.c b/drivers/net/wireless/ath/ath12k/mhi.c
index d5441ddb374b..adb8c3ec1950 100644
--- a/drivers/net/wireless/ath/ath12k/mhi.c
+++ b/drivers/net/wireless/ath/ath12k/mhi.c
@@ -1,11 +1,12 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2020-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/msi.h>
#include <linux/pci.h>
+#include <linux/firmware.h>
#include "core.h"
#include "debug.h"
@@ -13,6 +14,8 @@
#include "pci.h"
#define MHI_TIMEOUT_DEFAULT_MS 90000
+#define OTP_INVALID_BOARD_ID 0xFFFF
+#define OTP_VALID_DUALMAC_BOARD_ID_MASK 0x1000
static const struct mhi_channel_config ath12k_mhi_channels_qcn9274[] = {
{
@@ -358,23 +361,60 @@ int ath12k_mhi_register(struct ath12k_pci *ab_pci)
{
struct ath12k_base *ab = ab_pci->ab;
struct mhi_controller *mhi_ctrl;
+ unsigned int board_id;
int ret;
+ bool dualmac = false;
mhi_ctrl = mhi_alloc_controller();
if (!mhi_ctrl)
return -ENOMEM;
- ath12k_core_create_firmware_path(ab, ATH12K_AMSS_FILE,
- ab_pci->amss_path,
- sizeof(ab_pci->amss_path));
-
ab_pci->mhi_ctrl = mhi_ctrl;
mhi_ctrl->cntrl_dev = ab->dev;
- mhi_ctrl->fw_image = ab_pci->amss_path;
mhi_ctrl->regs = ab->mem;
mhi_ctrl->reg_len = ab->mem_len;
mhi_ctrl->rddm_size = ab->hw_params->rddm_size;
+ if (ab->hw_params->otp_board_id_register) {
+ board_id =
+ ath12k_pci_read32(ab, ab->hw_params->otp_board_id_register);
+ board_id = u32_get_bits(board_id, OTP_BOARD_ID_MASK);
+
+ if (!board_id || (board_id == OTP_INVALID_BOARD_ID)) {
+ ath12k_dbg(ab, ATH12K_DBG_BOOT,
+ "failed to read board id\n");
+ } else if (board_id & OTP_VALID_DUALMAC_BOARD_ID_MASK) {
+ dualmac = true;
+ ab->slo_capable = false;
+ ath12k_dbg(ab, ATH12K_DBG_BOOT,
+ "dualmac fw selected for board id: %x\n", board_id);
+ }
+ }
+
+ if (dualmac) {
+ if (ab->fw.amss_dualmac_data && ab->fw.amss_dualmac_len > 0) {
+ /* use MHI firmware file from firmware-N.bin */
+ mhi_ctrl->fw_data = ab->fw.amss_dualmac_data;
+ mhi_ctrl->fw_sz = ab->fw.amss_dualmac_len;
+ } else {
+ ath12k_warn(ab, "dualmac firmware IE not present in firmware-N.bin\n");
+ ret = -ENOENT;
+ goto free_controller;
+ }
+ } else {
+ if (ab->fw.amss_data && ab->fw.amss_len > 0) {
+ /* use MHI firmware file from firmware-N.bin */
+ mhi_ctrl->fw_data = ab->fw.amss_data;
+ mhi_ctrl->fw_sz = ab->fw.amss_len;
+ } else {
+ /* use the old separate mhi.bin MHI firmware file */
+ ath12k_core_create_firmware_path(ab, ATH12K_AMSS_FILE,
+ ab_pci->amss_path,
+ sizeof(ab_pci->amss_path));
+ mhi_ctrl->fw_image = ab_pci->amss_path;
+ }
+ }
+
ret = ath12k_mhi_get_msi(ab_pci);
if (ret) {
ath12k_err(ab, "failed to get msi for mhi\n");
diff --git a/drivers/net/wireless/ath/ath12k/p2p.c b/drivers/net/wireless/ath/ath12k/p2p.c
new file mode 100644
index 000000000000..d334df720032
--- /dev/null
+++ b/drivers/net/wireless/ath/ath12k/p2p.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: BSD-3-Clause-Clear
+/*
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <net/mac80211.h>
+#include "core.h"
+#include "mac.h"
+#include "p2p.h"
+
+static void ath12k_p2p_noa_ie_fill(u8 *data, size_t len,
+ const struct ath12k_wmi_p2p_noa_info *noa)
+{
+ struct ieee80211_p2p_noa_attr *noa_attr;
+ u8 ctwindow = le32_get_bits(noa->noa_attr, WMI_P2P_NOA_INFO_CTWIN_TU);
+ bool oppps = le32_get_bits(noa->noa_attr, WMI_P2P_NOA_INFO_OPP_PS);
+ __le16 *noa_attr_len;
+ u16 attr_len;
+ u8 noa_descriptors = le32_get_bits(noa->noa_attr,
+ WMI_P2P_NOA_INFO_DESC_NUM);
+ int i;
+
+ /* P2P IE */
+ data[0] = WLAN_EID_VENDOR_SPECIFIC;
+ data[1] = len - 2;
+ data[2] = (WLAN_OUI_WFA >> 16) & 0xff;
+ data[3] = (WLAN_OUI_WFA >> 8) & 0xff;
+ data[4] = (WLAN_OUI_WFA >> 0) & 0xff;
+ data[5] = WLAN_OUI_TYPE_WFA_P2P;
+
+ /* NOA ATTR */
+ data[6] = IEEE80211_P2P_ATTR_ABSENCE_NOTICE;
+ noa_attr_len = (__le16 *)&data[7]; /* 2 bytes */
+ noa_attr = (struct ieee80211_p2p_noa_attr *)&data[9];
+
+ noa_attr->index = le32_get_bits(noa->noa_attr,
+ WMI_P2P_NOA_INFO_INDEX);
+ noa_attr->oppps_ctwindow = ctwindow;
+ if (oppps)
+ noa_attr->oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT;
+
+ for (i = 0; i < noa_descriptors; i++) {
+ noa_attr->desc[i].count =
+ __le32_to_cpu(noa->descriptors[i].type_count);
+ noa_attr->desc[i].duration = noa->descriptors[i].duration;
+ noa_attr->desc[i].interval = noa->descriptors[i].interval;
+ noa_attr->desc[i].start_time = noa->descriptors[i].start_time;
+ }
+
+ attr_len = 2; /* index + oppps_ctwindow */
+ attr_len += noa_descriptors * sizeof(struct ieee80211_p2p_noa_desc);
+ *noa_attr_len = __cpu_to_le16(attr_len);
+}
+
+static size_t ath12k_p2p_noa_ie_len_compute(const struct ath12k_wmi_p2p_noa_info *noa)
+{
+ size_t len = 0;
+
+ if (!(le32_get_bits(noa->noa_attr, WMI_P2P_NOA_INFO_DESC_NUM)) &&
+ !(le32_get_bits(noa->noa_attr, WMI_P2P_NOA_INFO_OPP_PS)))
+ return 0;
+
+ len += 1 + 1 + 4; /* EID + len + OUI */
+ len += 1 + 2; /* noa attr + attr len */
+ len += 1 + 1; /* index + oppps_ctwindow */
+ len += le32_get_bits(noa->noa_attr, WMI_P2P_NOA_INFO_DESC_NUM) *
+ sizeof(struct ieee80211_p2p_noa_desc);
+
+ return len;
+}
+
+static void ath12k_p2p_noa_ie_assign(struct ath12k_vif *arvif, void *ie,
+ size_t len)
+{
+ struct ath12k *ar = arvif->ar;
+
+ lockdep_assert_held(&ar->data_lock);
+
+ kfree(arvif->u.ap.noa_data);
+
+ arvif->u.ap.noa_data = ie;
+ arvif->u.ap.noa_len = len;
+}
+
+static void __ath12k_p2p_noa_update(struct ath12k_vif *arvif,
+ const struct ath12k_wmi_p2p_noa_info *noa)
+{
+ struct ath12k *ar = arvif->ar;
+ void *ie;
+ size_t len;
+
+ lockdep_assert_held(&ar->data_lock);
+
+ ath12k_p2p_noa_ie_assign(arvif, NULL, 0);
+
+ len = ath12k_p2p_noa_ie_len_compute(noa);
+ if (!len)
+ return;
+
+ ie = kmalloc(len, GFP_ATOMIC);
+ if (!ie)
+ return;
+
+ ath12k_p2p_noa_ie_fill(ie, len, noa);
+ ath12k_p2p_noa_ie_assign(arvif, ie, len);
+}
+
+void ath12k_p2p_noa_update(struct ath12k_vif *arvif,
+ const struct ath12k_wmi_p2p_noa_info *noa)
+{
+ struct ath12k *ar = arvif->ar;
+
+ spin_lock_bh(&ar->data_lock);
+ __ath12k_p2p_noa_update(arvif, noa);
+ spin_unlock_bh(&ar->data_lock);
+}
+
+static void ath12k_p2p_noa_update_vdev_iter(void *data, u8 *mac,
+ struct ieee80211_vif *vif)
+{
+ struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif);
+ struct ath12k_p2p_noa_arg *arg = data;
+
+ if (arvif->vdev_id != arg->vdev_id)
+ return;
+
+ ath12k_p2p_noa_update(arvif, arg->noa);
+}
+
+void ath12k_p2p_noa_update_by_vdev_id(struct ath12k *ar, u32 vdev_id,
+ const struct ath12k_wmi_p2p_noa_info *noa)
+{
+ struct ath12k_p2p_noa_arg arg = {
+ .vdev_id = vdev_id,
+ .noa = noa,
+ };
+
+ ieee80211_iterate_active_interfaces_atomic(ath12k_ar_to_hw(ar),
+ IEEE80211_IFACE_ITER_NORMAL,
+ ath12k_p2p_noa_update_vdev_iter,
+ &arg);
+}
diff --git a/drivers/net/wireless/ath/ath12k/p2p.h b/drivers/net/wireless/ath/ath12k/p2p.h
new file mode 100644
index 000000000000..5768139a7844
--- /dev/null
+++ b/drivers/net/wireless/ath/ath12k/p2p.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause-Clear */
+/*
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved..
+ */
+
+#ifndef ATH12K_P2P_H
+#define ATH12K_P2P_H
+
+#include "wmi.h"
+
+struct ath12k_wmi_p2p_noa_info;
+
+struct ath12k_p2p_noa_arg {
+ u32 vdev_id;
+ const struct ath12k_wmi_p2p_noa_info *noa;
+};
+
+void ath12k_p2p_noa_update(struct ath12k_vif *arvif,
+ const struct ath12k_wmi_p2p_noa_info *noa);
+void ath12k_p2p_noa_update_by_vdev_id(struct ath12k *ar, u32 vdev_id,
+ const struct ath12k_wmi_p2p_noa_info *noa);
+
+#endif
diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c
index f0d2e2d8719c..14954bc05144 100644
--- a/drivers/net/wireless/ath/ath12k/pci.c
+++ b/drivers/net/wireless/ath/ath12k/pci.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2019-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/module.h>
@@ -39,6 +39,10 @@
#define QCN9274_DEVICE_ID 0x1109
#define WCN7850_DEVICE_ID 0x1107
+#define PCIE_LOCAL_REG_QRTR_NODE_ID 0x1E03164
+#define DOMAIN_NUMBER_MASK GENMASK(7, 4)
+#define BUS_NUMBER_MASK GENMASK(3, 0)
+
static const struct pci_device_id ath12k_pci_id_table[] = {
{ PCI_VDEVICE(QCOM, QCN9274_DEVICE_ID) },
{ PCI_VDEVICE(QCOM, WCN7850_DEVICE_ID) },
@@ -201,18 +205,17 @@ static u32 ath12k_pci_get_window_start(struct ath12k_base *ab,
/* If offset lies within CE register range, use 2nd window */
else if ((offset ^ HAL_CE_WFSS_CE_REG_BASE) < WINDOW_RANGE_MASK)
window_start = 2 * WINDOW_START;
- /* If offset lies within PCI_BAR_WINDOW0_BASE and within PCI_SOC_PCI_REG_BASE
- * use 0th window
- */
- else if (((offset ^ PCI_BAR_WINDOW0_BASE) < WINDOW_RANGE_MASK) &&
- !((offset ^ PCI_SOC_PCI_REG_BASE) < PCI_SOC_RANGE_MASK))
- window_start = 0;
else
window_start = WINDOW_START;
return window_start;
}
+static inline bool ath12k_pci_is_offset_within_mhi_region(u32 offset)
+{
+ return (offset >= PCI_MHIREGLEN_REG && offset <= PCI_MHI_REGION_END);
+}
+
static void ath12k_pci_soc_global_reset(struct ath12k_base *ab)
{
u32 val, delay;
@@ -682,12 +685,22 @@ static void ath12k_pci_init_qmi_ce_config(struct ath12k_base *ab)
{
struct ath12k_qmi_ce_cfg *cfg = &ab->qmi.ce_cfg;
+ struct ath12k_pci *ab_pci = ath12k_pci_priv(ab);
+ struct pci_bus *bus = ab_pci->pdev->bus;
+
cfg->tgt_ce = ab->hw_params->target_ce_config;
cfg->tgt_ce_len = ab->hw_params->target_ce_count;
cfg->svc_to_ce_map = ab->hw_params->svc_to_ce_map;
cfg->svc_to_ce_map_len = ab->hw_params->svc_to_ce_map_len;
ab->qmi.service_ins_id = ab->hw_params->qmi_service_ins_id;
+
+ if (test_bit(ATH12K_FW_FEATURE_MULTI_QRTR_ID, ab->fw.fw_features)) {
+ ab_pci->qmi_instance =
+ u32_encode_bits(pci_domain_nr(bus), DOMAIN_NUMBER_MASK) |
+ u32_encode_bits(bus->number, BUS_NUMBER_MASK);
+ ab->qmi.service_ins_id += ab_pci->qmi_instance;
+ }
}
static void ath12k_pci_ce_irqs_enable(struct ath12k_base *ab)
@@ -901,6 +914,26 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci)
set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags);
}
+static void ath12k_pci_update_qrtr_node_id(struct ath12k_base *ab)
+{
+ struct ath12k_pci *ab_pci = ath12k_pci_priv(ab);
+ u32 reg;
+
+ /* On platforms with two or more identical mhi devices, qmi service run
+ * with identical qrtr-node-id. Because of this identical ID qrtr-lookup
+ * cannot register more than one qmi service with identical node ID.
+ *
+ * This generates a unique instance ID from PCIe domain number and bus number,
+ * writes to the given register, it is available for firmware when the QMI service
+ * is spawned.
+ */
+ reg = PCIE_LOCAL_REG_QRTR_NODE_ID & WINDOW_RANGE_MASK;
+ ath12k_pci_write32(ab, reg, ab_pci->qmi_instance);
+
+ ath12k_dbg(ab, ATH12K_DBG_PCI, "pci reg 0x%x instance 0x%x read val 0x%x\n",
+ reg, ab_pci->qmi_instance, ath12k_pci_read32(ab, reg));
+}
+
static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci)
{
if (test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags))
@@ -1138,15 +1171,17 @@ u32 ath12k_pci_read32(struct ath12k_base *ab, u32 offset)
if (window_start == WINDOW_START) {
spin_lock_bh(&ab_pci->window_lock);
ath12k_pci_select_window(ab_pci, offset);
- val = ioread32(ab->mem + window_start +
- (offset & WINDOW_RANGE_MASK));
+
+ if (ath12k_pci_is_offset_within_mhi_region(offset)) {
+ offset = offset - PCI_MHIREGLEN_REG;
+ val = ioread32(ab->mem +
+ (offset & WINDOW_RANGE_MASK));
+ } else {
+ val = ioread32(ab->mem + window_start +
+ (offset & WINDOW_RANGE_MASK));
+ }
spin_unlock_bh(&ab_pci->window_lock);
} else {
- if ((!window_start) &&
- (offset >= PCI_MHIREGLEN_REG &&
- offset <= PCI_MHI_REGION_END))
- offset = offset - PCI_MHIREGLEN_REG;
-
val = ioread32(ab->mem + window_start +
(offset & WINDOW_RANGE_MASK));
}
@@ -1183,15 +1218,17 @@ void ath12k_pci_write32(struct ath12k_base *ab, u32 offset, u32 value)
if (window_start == WINDOW_START) {
spin_lock_bh(&ab_pci->window_lock);
ath12k_pci_select_window(ab_pci, offset);
- iowrite32(value, ab->mem + window_start +
- (offset & WINDOW_RANGE_MASK));
+
+ if (ath12k_pci_is_offset_within_mhi_region(offset)) {
+ offset = offset - PCI_MHIREGLEN_REG;
+ iowrite32(value, ab->mem +
+ (offset & WINDOW_RANGE_MASK));
+ } else {
+ iowrite32(value, ab->mem + window_start +
+ (offset & WINDOW_RANGE_MASK));
+ }
spin_unlock_bh(&ab_pci->window_lock);
} else {
- if ((!window_start) &&
- (offset >= PCI_MHIREGLEN_REG &&
- offset <= PCI_MHI_REGION_END))
- offset = offset - PCI_MHIREGLEN_REG;
-
iowrite32(value, ab->mem + window_start +
(offset & WINDOW_RANGE_MASK));
}
@@ -1219,6 +1256,9 @@ int ath12k_pci_power_up(struct ath12k_base *ab)
ath12k_pci_msi_enable(ab_pci);
+ if (test_bit(ATH12K_FW_FEATURE_MULTI_QRTR_ID, ab->fw.fw_features))
+ ath12k_pci_update_qrtr_node_id(ab);
+
ret = ath12k_mhi_start(ab_pci);
if (ret) {
ath12k_err(ab, "failed to start mhi: %d\n", ret);
@@ -1310,11 +1350,21 @@ static int ath12k_pci_probe(struct pci_dev *pdev,
goto err_free_core;
}
+ ath12k_dbg(ab, ATH12K_DBG_BOOT, "pci probe %04x:%04x %04x:%04x\n",
+ pdev->vendor, pdev->device,
+ pdev->subsystem_vendor, pdev->subsystem_device);
+
+ ab->id.vendor = pdev->vendor;
+ ab->id.device = pdev->device;
+ ab->id.subsystem_vendor = pdev->subsystem_vendor;
+ ab->id.subsystem_device = pdev->subsystem_device;
+
switch (pci_dev->device) {
case QCN9274_DEVICE_ID:
ab_pci->msi_config = &ath12k_msi_config[0];
ab->static_window_map = true;
ab_pci->pci_ops = &ath12k_pci_ops_qcn9274;
+ ab->hal_rx_ops = &hal_rx_qcn9274_ops;
ath12k_pci_read_hw_version(ab, &soc_hw_version_major,
&soc_hw_version_minor);
switch (soc_hw_version_major) {
@@ -1333,9 +1383,11 @@ static int ath12k_pci_probe(struct pci_dev *pdev,
}
break;
case WCN7850_DEVICE_ID:
+ ab->id.bdf_search = ATH12K_BDF_SEARCH_BUS_AND_BOARD;
ab_pci->msi_config = &ath12k_msi_config[0];
ab->static_window_map = false;
ab_pci->pci_ops = &ath12k_pci_ops_wcn7850;
+ ab->hal_rx_ops = &hal_rx_wcn7850_ops;
ath12k_pci_read_hw_version(ab, &soc_hw_version_major,
&soc_hw_version_minor);
switch (soc_hw_version_major) {
diff --git a/drivers/net/wireless/ath/ath12k/pci.h b/drivers/net/wireless/ath/ath12k/pci.h
index b2edf32ada20..ca93693ba4e9 100644
--- a/drivers/net/wireless/ath/ath12k/pci.h
+++ b/drivers/net/wireless/ath/ath12k/pci.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2019-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH12K_PCI_H
#define ATH12K_PCI_H
@@ -53,6 +53,9 @@
#define WLAON_QFPROM_PWR_CTRL_REG 0x01f8031c
#define QFPROM_PWR_CTRL_VDD4BLOW_MASK 0x4
+#define QCN9274_QFPROM_RAW_RFA_PDET_ROW13_LSB 0x1E20338
+#define OTP_BOARD_ID_MASK GENMASK(15, 0)
+
#define PCI_BAR_WINDOW0_BASE 0x1E00000
#define PCI_BAR_WINDOW0_END 0x1E7FFFC
#define PCI_SOC_RANGE_MASK 0x3FFF
@@ -111,6 +114,7 @@ struct ath12k_pci {
u16 link_ctl;
unsigned long irq_flags;
const struct ath12k_pci_ops *pci_ops;
+ u32 qmi_instance;
};
static inline struct ath12k_pci *ath12k_pci_priv(struct ath12k_base *ab)
diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c
index 77a132f6bbd1..92845ffff44a 100644
--- a/drivers/net/wireless/ath/ath12k/qmi.c
+++ b/drivers/net/wireless/ath/ath12k/qmi.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/elf.h>
@@ -17,7 +17,7 @@
#define PLATFORM_CAP_PCIE_GLOBAL_RESET 0x08
#define ATH12K_QMI_MAX_CHUNK_SIZE 2097152
-static struct qmi_elem_info wlfw_host_mlo_chip_info_s_v01_ei[] = {
+static const struct qmi_elem_info wlfw_host_mlo_chip_info_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_1_BYTE,
.elem_len = 1,
@@ -61,7 +61,7 @@ static struct qmi_elem_info wlfw_host_mlo_chip_info_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_host_cap_req_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_host_cap_req_msg_v01_ei[] = {
{
.data_type = QMI_OPT_FLAG,
.elem_len = 1,
@@ -511,7 +511,7 @@ static struct qmi_elem_info qmi_wlanfw_host_cap_req_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_host_cap_resp_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_host_cap_resp_msg_v01_ei[] = {
{
.data_type = QMI_STRUCT,
.elem_len = 1,
@@ -528,7 +528,68 @@ static struct qmi_elem_info qmi_wlanfw_host_cap_resp_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_ind_register_req_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_phy_cap_req_msg_v01_ei[] = {
+ {
+ .data_type = QMI_EOTI,
+ .array_type = NO_ARRAY,
+ .tlv_type = QMI_COMMON_TLV_TYPE,
+ },
+};
+
+static const struct qmi_elem_info qmi_wlanfw_phy_cap_resp_msg_v01_ei[] = {
+ {
+ .data_type = QMI_STRUCT,
+ .elem_len = 1,
+ .elem_size = sizeof(struct qmi_response_type_v01),
+ .array_type = NO_ARRAY,
+ .tlv_type = 0x02,
+ .offset = offsetof(struct qmi_wlanfw_phy_cap_resp_msg_v01, resp),
+ .ei_array = qmi_response_type_v01_ei,
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = 0x10,
+ .offset = offsetof(struct qmi_wlanfw_phy_cap_resp_msg_v01,
+ num_phy_valid),
+ },
+ {
+ .data_type = QMI_UNSIGNED_1_BYTE,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = 0x10,
+ .offset = offsetof(struct qmi_wlanfw_phy_cap_resp_msg_v01,
+ num_phy),
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = 0x11,
+ .offset = offsetof(struct qmi_wlanfw_phy_cap_resp_msg_v01,
+ board_id_valid),
+ },
+ {
+ .data_type = QMI_UNSIGNED_4_BYTE,
+ .elem_len = 1,
+ .elem_size = sizeof(u32),
+ .array_type = NO_ARRAY,
+ .tlv_type = 0x11,
+ .offset = offsetof(struct qmi_wlanfw_phy_cap_resp_msg_v01,
+ board_id),
+ },
+ {
+ .data_type = QMI_EOTI,
+ .array_type = NO_ARRAY,
+ .tlv_type = QMI_COMMON_TLV_TYPE,
+ },
+};
+
+static const struct qmi_elem_info qmi_wlanfw_ind_register_req_msg_v01_ei[] = {
{
.data_type = QMI_OPT_FLAG,
.elem_len = 1,
@@ -753,7 +814,7 @@ static struct qmi_elem_info qmi_wlanfw_ind_register_req_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_ind_register_resp_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_ind_register_resp_msg_v01_ei[] = {
{
.data_type = QMI_STRUCT,
.elem_len = 1,
@@ -789,7 +850,7 @@ static struct qmi_elem_info qmi_wlanfw_ind_register_resp_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_mem_cfg_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_mem_cfg_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_8_BYTE,
.elem_len = 1,
@@ -821,7 +882,7 @@ static struct qmi_elem_info qmi_wlanfw_mem_cfg_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_mem_seg_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_mem_seg_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_4_BYTE,
.elem_len = 1,
@@ -863,7 +924,7 @@ static struct qmi_elem_info qmi_wlanfw_mem_seg_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_request_mem_ind_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_request_mem_ind_msg_v01_ei[] = {
{
.data_type = QMI_DATA_LEN,
.elem_len = 1,
@@ -890,7 +951,7 @@ static struct qmi_elem_info qmi_wlanfw_request_mem_ind_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_mem_seg_resp_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_mem_seg_resp_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_8_BYTE,
.elem_len = 1,
@@ -930,7 +991,7 @@ static struct qmi_elem_info qmi_wlanfw_mem_seg_resp_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_respond_mem_req_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_respond_mem_req_msg_v01_ei[] = {
{
.data_type = QMI_DATA_LEN,
.elem_len = 1,
@@ -957,7 +1018,7 @@ static struct qmi_elem_info qmi_wlanfw_respond_mem_req_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_respond_mem_resp_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_respond_mem_resp_msg_v01_ei[] = {
{
.data_type = QMI_STRUCT,
.elem_len = 1,
@@ -975,7 +1036,7 @@ static struct qmi_elem_info qmi_wlanfw_respond_mem_resp_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_cap_req_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_cap_req_msg_v01_ei[] = {
{
.data_type = QMI_EOTI,
.array_type = NO_ARRAY,
@@ -983,7 +1044,7 @@ static struct qmi_elem_info qmi_wlanfw_cap_req_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_rf_chip_info_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_rf_chip_info_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_4_BYTE,
.elem_len = 1,
@@ -1009,7 +1070,7 @@ static struct qmi_elem_info qmi_wlanfw_rf_chip_info_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_rf_board_info_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_rf_board_info_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_4_BYTE,
.elem_len = 1,
@@ -1026,7 +1087,7 @@ static struct qmi_elem_info qmi_wlanfw_rf_board_info_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_soc_info_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_soc_info_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_4_BYTE,
.elem_len = 1,
@@ -1042,7 +1103,7 @@ static struct qmi_elem_info qmi_wlanfw_soc_info_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_dev_mem_info_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_dev_mem_info_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_8_BYTE,
.elem_len = 1,
@@ -1068,7 +1129,7 @@ static struct qmi_elem_info qmi_wlanfw_dev_mem_info_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_fw_version_info_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_fw_version_info_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_4_BYTE,
.elem_len = 1,
@@ -1094,7 +1155,7 @@ static struct qmi_elem_info qmi_wlanfw_fw_version_info_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_cap_resp_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_cap_resp_msg_v01_ei[] = {
{
.data_type = QMI_STRUCT,
.elem_len = 1,
@@ -1348,7 +1409,7 @@ static struct qmi_elem_info qmi_wlanfw_cap_resp_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_bdf_download_req_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_bdf_download_req_msg_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_1_BYTE,
.elem_len = 1,
@@ -1483,7 +1544,7 @@ static struct qmi_elem_info qmi_wlanfw_bdf_download_req_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_bdf_download_resp_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_bdf_download_resp_msg_v01_ei[] = {
{
.data_type = QMI_STRUCT,
.elem_len = 1,
@@ -1501,7 +1562,7 @@ static struct qmi_elem_info qmi_wlanfw_bdf_download_resp_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_m3_info_req_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_m3_info_req_msg_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_8_BYTE,
.elem_len = 1,
@@ -1525,7 +1586,7 @@ static struct qmi_elem_info qmi_wlanfw_m3_info_req_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_m3_info_resp_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_m3_info_resp_msg_v01_ei[] = {
{
.data_type = QMI_STRUCT,
.elem_len = 1,
@@ -1542,7 +1603,7 @@ static struct qmi_elem_info qmi_wlanfw_m3_info_resp_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_ce_tgt_pipe_cfg_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_ce_tgt_pipe_cfg_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_4_BYTE,
.elem_len = 1,
@@ -1595,7 +1656,7 @@ static struct qmi_elem_info qmi_wlanfw_ce_tgt_pipe_cfg_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_ce_svc_pipe_cfg_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_ce_svc_pipe_cfg_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_4_BYTE,
.elem_len = 1,
@@ -1630,7 +1691,7 @@ static struct qmi_elem_info qmi_wlanfw_ce_svc_pipe_cfg_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_shadow_reg_cfg_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_shadow_reg_cfg_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_2_BYTE,
.elem_len = 1,
@@ -1654,7 +1715,7 @@ static struct qmi_elem_info qmi_wlanfw_shadow_reg_cfg_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_shadow_reg_v3_cfg_s_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_shadow_reg_v3_cfg_s_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_4_BYTE,
.elem_len = 1,
@@ -1671,7 +1732,7 @@ static struct qmi_elem_info qmi_wlanfw_shadow_reg_v3_cfg_s_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_wlan_mode_req_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_wlan_mode_req_msg_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_4_BYTE,
.elem_len = 1,
@@ -1706,7 +1767,7 @@ static struct qmi_elem_info qmi_wlanfw_wlan_mode_req_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_wlan_mode_resp_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_wlan_mode_resp_msg_v01_ei[] = {
{
.data_type = QMI_STRUCT,
.elem_len = 1,
@@ -1724,7 +1785,7 @@ static struct qmi_elem_info qmi_wlanfw_wlan_mode_resp_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_wlan_cfg_req_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_wlan_cfg_req_msg_v01_ei[] = {
{
.data_type = QMI_OPT_FLAG,
.elem_len = 1,
@@ -1862,7 +1923,7 @@ static struct qmi_elem_info qmi_wlanfw_wlan_cfg_req_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_wlan_cfg_resp_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_wlan_cfg_resp_msg_v01_ei[] = {
{
.data_type = QMI_STRUCT,
.elem_len = 1,
@@ -1879,22 +1940,78 @@ static struct qmi_elem_info qmi_wlanfw_wlan_cfg_resp_msg_v01_ei[] = {
},
};
-static struct qmi_elem_info qmi_wlanfw_mem_ready_ind_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_mem_ready_ind_msg_v01_ei[] = {
{
.data_type = QMI_EOTI,
.array_type = NO_ARRAY,
},
};
-static struct qmi_elem_info qmi_wlanfw_fw_ready_ind_msg_v01_ei[] = {
+static const struct qmi_elem_info qmi_wlanfw_fw_ready_ind_msg_v01_ei[] = {
{
.data_type = QMI_EOTI,
.array_type = NO_ARRAY,
},
};
-static void ath12k_host_cap_parse_mlo(struct qmi_wlanfw_host_cap_req_msg_v01 *req)
+static const struct qmi_elem_info qmi_wlanfw_wlan_ini_req_msg_v01_ei[] = {
+ {
+ .data_type = QMI_OPT_FLAG,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = 0x10,
+ .offset = offsetof(struct qmi_wlanfw_wlan_ini_req_msg_v01,
+ enable_fwlog_valid),
+ },
+ {
+ .data_type = QMI_UNSIGNED_1_BYTE,
+ .elem_len = 1,
+ .elem_size = sizeof(u8),
+ .array_type = NO_ARRAY,
+ .tlv_type = 0x10,
+ .offset = offsetof(struct qmi_wlanfw_wlan_ini_req_msg_v01,
+ enable_fwlog),
+ },
+ {
+ .data_type = QMI_EOTI,
+ .array_type = NO_ARRAY,
+ .tlv_type = QMI_COMMON_TLV_TYPE,
+ },
+};
+
+static const struct qmi_elem_info qmi_wlanfw_wlan_ini_resp_msg_v01_ei[] = {
+ {
+ .data_type = QMI_STRUCT,
+ .elem_len = 1,
+ .elem_size = sizeof(struct qmi_response_type_v01),
+ .array_type = NO_ARRAY,
+ .tlv_type = 0x02,
+ .offset = offsetof(struct qmi_wlanfw_wlan_ini_resp_msg_v01,
+ resp),
+ .ei_array = qmi_response_type_v01_ei,
+ },
+ {
+ .data_type = QMI_EOTI,
+ .array_type = NO_ARRAY,
+ .tlv_type = QMI_COMMON_TLV_TYPE,
+ },
+};
+
+static void ath12k_host_cap_parse_mlo(struct ath12k_base *ab,
+ struct qmi_wlanfw_host_cap_req_msg_v01 *req)
{
+ struct wlfw_host_mlo_chip_info_s_v01 *info;
+ u8 hw_link_id = 0;
+ int i;
+
+ if (!ab->qmi.num_radios || ab->qmi.num_radios == U8_MAX) {
+ ath12k_dbg(ab, ATH12K_DBG_QMI,
+ "skip QMI MLO cap due to invalid num_radio %d\n",
+ ab->qmi.num_radios);
+ return;
+ }
+
req->mlo_capable_valid = 1;
req->mlo_capable = 1;
req->mlo_chip_id_valid = 1;
@@ -1905,28 +2022,31 @@ static void ath12k_host_cap_parse_mlo(struct qmi_wlanfw_host_cap_req_msg_v01 *re
/* Max peer number generally won't change for the same device
* but needs to be synced with host driver.
*/
- req->max_mlo_peer = 32;
+ req->max_mlo_peer = ab->hw_params->max_mlo_peer;
req->mlo_num_chips_valid = 1;
req->mlo_num_chips = 1;
+
+ info = &req->mlo_chip_info[0];
+ info->chip_id = 0;
+ info->num_local_links = ab->qmi.num_radios;
+
+ for (i = 0; i < info->num_local_links; i++) {
+ info->hw_link_id[i] = hw_link_id;
+ info->valid_mlo_link_id[i] = 1;
+
+ hw_link_id++;
+ }
+
req->mlo_chip_info_valid = 1;
- req->mlo_chip_info[0].chip_id = 0;
- req->mlo_chip_info[0].num_local_links = 2;
- req->mlo_chip_info[0].hw_link_id[0] = 0;
- req->mlo_chip_info[0].hw_link_id[1] = 1;
- req->mlo_chip_info[0].valid_mlo_link_id[0] = 1;
- req->mlo_chip_info[0].valid_mlo_link_id[1] = 1;
}
static int ath12k_qmi_host_cap_send(struct ath12k_base *ab)
{
- struct qmi_wlanfw_host_cap_req_msg_v01 req;
- struct qmi_wlanfw_host_cap_resp_msg_v01 resp;
- struct qmi_txn txn = {};
+ struct qmi_wlanfw_host_cap_req_msg_v01 req = {};
+ struct qmi_wlanfw_host_cap_resp_msg_v01 resp = {};
+ struct qmi_txn txn;
int ret = 0;
- memset(&req, 0, sizeof(req));
- memset(&resp, 0, sizeof(resp));
-
req.num_clients_valid = 1;
req.num_clients = 1;
req.mem_cfg_mode = ab->qmi.target_mem_mode;
@@ -1963,10 +2083,10 @@ static int ath12k_qmi_host_cap_send(struct ath12k_base *ab)
*/
req.nm_modem |= SLEEP_CLOCK_SELECT_INTERNAL_BIT;
req.nm_modem |= PLATFORM_CAP_PCIE_GLOBAL_RESET;
-
- ath12k_host_cap_parse_mlo(&req);
}
+ ath12k_host_cap_parse_mlo(ab, &req);
+
ret = qmi_txn_init(&ab->qmi.handle, &txn,
qmi_wlanfw_host_cap_resp_msg_v01_ei, &resp);
if (ret < 0)
@@ -1977,6 +2097,7 @@ static int ath12k_qmi_host_cap_send(struct ath12k_base *ab)
QMI_WLANFW_HOST_CAP_REQ_MSG_V01_MAX_LEN,
qmi_wlanfw_host_cap_req_msg_v01_ei, &req);
if (ret < 0) {
+ qmi_txn_cancel(&txn);
ath12k_warn(ab, "Failed to send host capability request,err = %d\n", ret);
goto out;
}
@@ -1996,6 +2117,62 @@ out:
return ret;
}
+static void ath12k_qmi_phy_cap_send(struct ath12k_base *ab)
+{
+ struct qmi_wlanfw_phy_cap_req_msg_v01 req = {};
+ struct qmi_wlanfw_phy_cap_resp_msg_v01 resp = {};
+ struct qmi_txn txn;
+ int ret;
+
+ if (!ab->slo_capable)
+ goto out;
+
+ ret = qmi_txn_init(&ab->qmi.handle, &txn,
+ qmi_wlanfw_phy_cap_resp_msg_v01_ei, &resp);
+ if (ret < 0)
+ goto out;
+
+ ret = qmi_send_request(&ab->qmi.handle, NULL, &txn,
+ QMI_WLANFW_PHY_CAP_REQ_V01,
+ QMI_WLANFW_PHY_CAP_REQ_MSG_V01_MAX_LEN,
+ qmi_wlanfw_phy_cap_req_msg_v01_ei, &req);
+ if (ret < 0) {
+ qmi_txn_cancel(&txn);
+ ath12k_warn(ab, "failed to send phy capability request: %d\n", ret);
+ goto out;
+ }
+
+ ret = qmi_txn_wait(&txn, msecs_to_jiffies(ATH12K_QMI_WLANFW_TIMEOUT_MS));
+ if (ret < 0)
+ goto out;
+
+ if (resp.resp.result != QMI_RESULT_SUCCESS_V01) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (!resp.num_phy_valid) {
+ ret = -ENODATA;
+ goto out;
+ }
+
+ ab->qmi.num_radios = resp.num_phy;
+
+ ath12k_dbg(ab, ATH12K_DBG_QMI, "phy capability resp valid %d num_phy %d valid %d board_id %d\n",
+ resp.num_phy_valid, resp.num_phy,
+ resp.board_id_valid, resp.board_id);
+
+ return;
+
+out:
+ /* If PHY capability not advertised then rely on default num link */
+ ab->qmi.num_radios = ab->hw_params->def_num_link;
+
+ ath12k_dbg(ab, ATH12K_DBG_QMI,
+ "no valid response from PHY capability, choose default num_phy %d\n",
+ ab->qmi.num_radios);
+}
+
static int ath12k_qmi_fw_ind_register_send(struct ath12k_base *ab)
{
struct qmi_wlanfw_ind_register_req_msg_v01 *req;
@@ -2040,6 +2217,7 @@ static int ath12k_qmi_fw_ind_register_send(struct ath12k_base *ab)
QMI_WLANFW_IND_REGISTER_REQ_MSG_V01_MAX_LEN,
qmi_wlanfw_ind_register_req_msg_v01_ei, req);
if (ret < 0) {
+ qmi_txn_cancel(&txn);
ath12k_warn(ab, "Failed to send indication register request, err = %d\n",
ret);
goto out;
@@ -2068,8 +2246,8 @@ resp_out:
static int ath12k_qmi_respond_fw_mem_request(struct ath12k_base *ab)
{
struct qmi_wlanfw_respond_mem_req_msg_v01 *req;
- struct qmi_wlanfw_respond_mem_resp_msg_v01 resp;
- struct qmi_txn txn = {};
+ struct qmi_wlanfw_respond_mem_resp_msg_v01 resp = {};
+ struct qmi_txn txn;
int ret = 0, i;
bool delayed;
@@ -2077,8 +2255,6 @@ static int ath12k_qmi_respond_fw_mem_request(struct ath12k_base *ab)
if (!req)
return -ENOMEM;
- memset(&resp, 0, sizeof(resp));
-
/* Some targets by default request a block of big contiguous
* DMA memory, it's hard to allocate from kernel. So host returns
* failure to firmware and firmware then request multiple blocks of
@@ -2088,7 +2264,6 @@ static int ath12k_qmi_respond_fw_mem_request(struct ath12k_base *ab)
delayed = true;
ath12k_dbg(ab, ATH12K_DBG_QMI, "qmi delays mem_request %d\n",
ab->qmi.mem_seg_count);
- memset(req, 0, sizeof(*req));
} else {
delayed = false;
req->mem_seg_len = ab->qmi.mem_seg_count;
@@ -2114,6 +2289,7 @@ static int ath12k_qmi_respond_fw_mem_request(struct ath12k_base *ab)
QMI_WLANFW_RESPOND_MEM_REQ_MSG_V01_MAX_LEN,
qmi_wlanfw_respond_mem_req_msg_v01_ei, req);
if (ret < 0) {
+ qmi_txn_cancel(&txn);
ath12k_warn(ab, "qmi failed to respond memory request, err = %d\n",
ret);
goto out;
@@ -2208,17 +2384,14 @@ static int ath12k_qmi_alloc_target_mem_chunk(struct ath12k_base *ab)
static int ath12k_qmi_request_target_cap(struct ath12k_base *ab)
{
- struct qmi_wlanfw_cap_req_msg_v01 req;
- struct qmi_wlanfw_cap_resp_msg_v01 resp;
- struct qmi_txn txn = {};
+ struct qmi_wlanfw_cap_req_msg_v01 req = {};
+ struct qmi_wlanfw_cap_resp_msg_v01 resp = {};
+ struct qmi_txn txn;
unsigned int board_id = ATH12K_BOARD_ID_DEFAULT;
int ret = 0;
int r;
int i;
- memset(&req, 0, sizeof(req));
- memset(&resp, 0, sizeof(resp));
-
ret = qmi_txn_init(&ab->qmi.handle, &txn,
qmi_wlanfw_cap_resp_msg_v01_ei, &resp);
if (ret < 0)
@@ -2229,6 +2402,7 @@ static int ath12k_qmi_request_target_cap(struct ath12k_base *ab)
QMI_WLANFW_CAP_REQ_MSG_V01_MAX_LEN,
qmi_wlanfw_cap_req_msg_v01_ei, &req);
if (ret < 0) {
+ qmi_txn_cancel(&txn);
ath12k_warn(ab, "qmi failed to send target cap request, err = %d\n",
ret);
goto out;
@@ -2310,8 +2484,8 @@ static int ath12k_qmi_load_file_target_mem(struct ath12k_base *ab,
const u8 *data, u32 len, u8 type)
{
struct qmi_wlanfw_bdf_download_req_msg_v01 *req;
- struct qmi_wlanfw_bdf_download_resp_msg_v01 resp;
- struct qmi_txn txn = {};
+ struct qmi_wlanfw_bdf_download_resp_msg_v01 resp = {};
+ struct qmi_txn txn;
const u8 *temp = data;
int ret;
u32 remaining = len;
@@ -2319,7 +2493,6 @@ static int ath12k_qmi_load_file_target_mem(struct ath12k_base *ab,
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
return -ENOMEM;
- memset(&resp, 0, sizeof(resp));
while (remaining) {
req->valid = 1;
@@ -2423,8 +2596,7 @@ static int ath12k_qmi_load_bdf_qmi(struct ath12k_base *ab,
break;
case ATH12K_QMI_BDF_TYPE_REGDB:
- ret = ath12k_core_fetch_board_data_api_1(ab, &bd,
- ATH12K_REGDB_FILE_NAME);
+ ret = ath12k_core_fetch_regdb(ab, &bd);
if (ret) {
ath12k_warn(ab, "qmi failed to load regdb bin:\n");
goto out;
@@ -2497,37 +2669,56 @@ out:
static int ath12k_qmi_m3_load(struct ath12k_base *ab)
{
struct m3_mem_region *m3_mem = &ab->qmi.m3_mem;
- const struct firmware *fw;
+ const struct firmware *fw = NULL;
+ const void *m3_data;
char path[100];
+ size_t m3_len;
int ret;
- if (m3_mem->vaddr || m3_mem->size)
+ if (m3_mem->vaddr)
+ /* m3 firmware buffer is already available in the DMA buffer */
return 0;
- fw = ath12k_core_firmware_request(ab, ATH12K_M3_FILE);
- if (IS_ERR(fw)) {
- ret = PTR_ERR(fw);
- ath12k_core_create_firmware_path(ab, ATH12K_M3_FILE,
- path, sizeof(path));
- ath12k_err(ab, "failed to load %s: %d\n", path, ret);
- return ret;
+ if (ab->fw.m3_data && ab->fw.m3_len > 0) {
+ /* firmware-N.bin had a m3 firmware file so use that */
+ m3_data = ab->fw.m3_data;
+ m3_len = ab->fw.m3_len;
+ } else {
+ /* No m3 file in firmware-N.bin so try to request old
+ * separate m3.bin.
+ */
+ fw = ath12k_core_firmware_request(ab, ATH12K_M3_FILE);
+ if (IS_ERR(fw)) {
+ ret = PTR_ERR(fw);
+ ath12k_core_create_firmware_path(ab, ATH12K_M3_FILE,
+ path, sizeof(path));
+ ath12k_err(ab, "failed to load %s: %d\n", path, ret);
+ return ret;
+ }
+
+ m3_data = fw->data;
+ m3_len = fw->size;
}
m3_mem->vaddr = dma_alloc_coherent(ab->dev,
- fw->size, &m3_mem->paddr,
+ m3_len, &m3_mem->paddr,
GFP_KERNEL);
if (!m3_mem->vaddr) {
ath12k_err(ab, "failed to allocate memory for M3 with size %zu\n",
fw->size);
- release_firmware(fw);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
- memcpy(m3_mem->vaddr, fw->data, fw->size);
- m3_mem->size = fw->size;
+ memcpy(m3_mem->vaddr, m3_data, m3_len);
+ m3_mem->size = m3_len;
+
+ ret = 0;
+
+out:
release_firmware(fw);
- return 0;
+ return ret;
}
static void ath12k_qmi_m3_free(struct ath12k_base *ab)
@@ -2546,14 +2737,11 @@ static void ath12k_qmi_m3_free(struct ath12k_base *ab)
static int ath12k_qmi_wlanfw_m3_info_send(struct ath12k_base *ab)
{
struct m3_mem_region *m3_mem = &ab->qmi.m3_mem;
- struct qmi_wlanfw_m3_info_req_msg_v01 req;
- struct qmi_wlanfw_m3_info_resp_msg_v01 resp;
- struct qmi_txn txn = {};
+ struct qmi_wlanfw_m3_info_req_msg_v01 req = {};
+ struct qmi_wlanfw_m3_info_resp_msg_v01 resp = {};
+ struct qmi_txn txn;
int ret = 0;
- memset(&req, 0, sizeof(req));
- memset(&resp, 0, sizeof(resp));
-
ret = ath12k_qmi_m3_load(ab);
if (ret) {
ath12k_err(ab, "failed to load m3 firmware: %d", ret);
@@ -2573,6 +2761,7 @@ static int ath12k_qmi_wlanfw_m3_info_send(struct ath12k_base *ab)
QMI_WLANFW_M3_INFO_REQ_MSG_V01_MAX_MSG_LEN,
qmi_wlanfw_m3_info_req_msg_v01_ei, &req);
if (ret < 0) {
+ qmi_txn_cancel(&txn);
ath12k_warn(ab, "qmi failed to send M3 information request, err = %d\n",
ret);
goto out;
@@ -2597,14 +2786,11 @@ out:
static int ath12k_qmi_wlanfw_mode_send(struct ath12k_base *ab,
u32 mode)
{
- struct qmi_wlanfw_wlan_mode_req_msg_v01 req;
- struct qmi_wlanfw_wlan_mode_resp_msg_v01 resp;
- struct qmi_txn txn = {};
+ struct qmi_wlanfw_wlan_mode_req_msg_v01 req = {};
+ struct qmi_wlanfw_wlan_mode_resp_msg_v01 resp = {};
+ struct qmi_txn txn;
int ret = 0;
- memset(&req, 0, sizeof(req));
- memset(&resp, 0, sizeof(resp));
-
req.mode = mode;
req.hw_debug_valid = 1;
req.hw_debug = 0;
@@ -2619,6 +2805,7 @@ static int ath12k_qmi_wlanfw_mode_send(struct ath12k_base *ab,
QMI_WLANFW_WLAN_MODE_REQ_MSG_V01_MAX_LEN,
qmi_wlanfw_wlan_mode_req_msg_v01_ei, &req);
if (ret < 0) {
+ qmi_txn_cancel(&txn);
ath12k_warn(ab, "qmi failed to send mode request, mode: %d, err = %d\n",
mode, ret);
goto out;
@@ -2649,10 +2836,10 @@ out:
static int ath12k_qmi_wlanfw_wlan_cfg_send(struct ath12k_base *ab)
{
struct qmi_wlanfw_wlan_cfg_req_msg_v01 *req;
- struct qmi_wlanfw_wlan_cfg_resp_msg_v01 resp;
+ struct qmi_wlanfw_wlan_cfg_resp_msg_v01 resp = {};
struct ce_pipe_config *ce_cfg;
struct service_to_pipe *svc_cfg;
- struct qmi_txn txn = {};
+ struct qmi_txn txn;
int ret = 0, pipe_num;
ce_cfg = (struct ce_pipe_config *)ab->qmi.ce_cfg.tgt_ce;
@@ -2662,8 +2849,6 @@ static int ath12k_qmi_wlanfw_wlan_cfg_send(struct ath12k_base *ab)
if (!req)
return -ENOMEM;
- memset(&resp, 0, sizeof(resp));
-
req->host_version_valid = 1;
strscpy(req->host_version, ATH12K_HOST_VERSION_STRING,
sizeof(req->host_version));
@@ -2710,6 +2895,7 @@ static int ath12k_qmi_wlanfw_wlan_cfg_send(struct ath12k_base *ab)
QMI_WLANFW_WLAN_CFG_REQ_MSG_V01_MAX_LEN,
qmi_wlanfw_wlan_cfg_req_msg_v01_ei, req);
if (ret < 0) {
+ qmi_txn_cancel(&txn);
ath12k_warn(ab, "qmi failed to send wlan config request, err = %d\n",
ret);
goto out;
@@ -2733,6 +2919,49 @@ out:
return ret;
}
+static int ath12k_qmi_wlanfw_wlan_ini_send(struct ath12k_base *ab)
+{
+ struct qmi_wlanfw_wlan_ini_resp_msg_v01 resp = {};
+ struct qmi_wlanfw_wlan_ini_req_msg_v01 req = {};
+ struct qmi_txn txn;
+ int ret;
+
+ req.enable_fwlog_valid = true;
+ req.enable_fwlog = 1;
+
+ ret = qmi_txn_init(&ab->qmi.handle, &txn,
+ qmi_wlanfw_wlan_ini_resp_msg_v01_ei, &resp);
+ if (ret < 0)
+ goto out;
+
+ ret = qmi_send_request(&ab->qmi.handle, NULL, &txn,
+ ATH12K_QMI_WLANFW_WLAN_INI_REQ_V01,
+ QMI_WLANFW_WLAN_INI_REQ_MSG_V01_MAX_LEN,
+ qmi_wlanfw_wlan_ini_req_msg_v01_ei, &req);
+ if (ret < 0) {
+ qmi_txn_cancel(&txn);
+ ath12k_warn(ab, "failed to send QMI wlan ini request: %d\n",
+ ret);
+ goto out;
+ }
+
+ ret = qmi_txn_wait(&txn, msecs_to_jiffies(ATH12K_QMI_WLANFW_TIMEOUT_MS));
+ if (ret < 0) {
+ ath12k_warn(ab, "failed to receive QMI wlan ini request: %d\n", ret);
+ goto out;
+ }
+
+ if (resp.resp.result != QMI_RESULT_SUCCESS_V01) {
+ ath12k_warn(ab, "QMI wlan ini response failure: %d %d\n",
+ resp.resp.result, resp.resp.error);
+ ret = -EINVAL;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
void ath12k_qmi_firmware_stop(struct ath12k_base *ab)
{
int ret;
@@ -2749,6 +2978,12 @@ int ath12k_qmi_firmware_start(struct ath12k_base *ab,
{
int ret;
+ ret = ath12k_qmi_wlanfw_wlan_ini_send(ab);
+ if (ret < 0) {
+ ath12k_warn(ab, "qmi failed to send wlan fw ini: %d\n", ret);
+ return ret;
+ }
+
ret = ath12k_qmi_wlanfw_wlan_cfg_send(ab);
if (ret < 0) {
ath12k_warn(ab, "qmi failed to send wlan cfg:%d\n", ret);
@@ -2792,6 +3027,8 @@ static int ath12k_qmi_event_server_arrive(struct ath12k_qmi *qmi)
struct ath12k_base *ab = qmi->ab;
int ret;
+ ath12k_qmi_phy_cap_send(ab);
+
ret = ath12k_qmi_fw_ind_register_send(ab);
if (ret < 0) {
ath12k_warn(ab, "qmi failed to send FW indication QMI:%d\n", ret);
diff --git a/drivers/net/wireless/ath/ath12k/qmi.h b/drivers/net/wireless/ath/ath12k/qmi.h
index e25bbaa125e8..6ee33c9851c6 100644
--- a/drivers/net/wireless/ath/ath12k/qmi.h
+++ b/drivers/net/wireless/ath/ath12k/qmi.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH12K_QMI_H
@@ -15,7 +15,6 @@
#define ATH12K_QMI_MAX_BDF_FILE_NAME_SIZE 64
#define ATH12K_QMI_CALDB_ADDRESS 0x4BA00000
#define ATH12K_QMI_WLANFW_MAX_BUILD_ID_LEN_V01 128
-#define ATH12K_QMI_WLFW_NODE_ID_BASE 0x07
#define ATH12K_QMI_WLFW_SERVICE_ID_V01 0x45
#define ATH12K_QMI_WLFW_SERVICE_VERS_V01 0x01
#define ATH12K_QMI_WLFW_SERVICE_INS_ID_V01 0x02
@@ -141,6 +140,7 @@ struct ath12k_qmi {
u32 target_mem_mode;
bool target_mem_delayed;
u8 cal_done;
+ u8 num_radios;
struct target_info target;
struct m3_mem_region m3_mem;
unsigned int service_ins_id;
@@ -251,6 +251,22 @@ struct qmi_wlanfw_host_cap_resp_msg_v01 {
struct qmi_response_type_v01 resp;
};
+#define QMI_WLANFW_PHY_CAP_REQ_MSG_V01_MAX_LEN 0
+#define QMI_WLANFW_PHY_CAP_REQ_V01 0x0057
+#define QMI_WLANFW_PHY_CAP_RESP_MSG_V01_MAX_LEN 18
+#define QMI_WLANFW_PHY_CAP_RESP_V01 0x0057
+
+struct qmi_wlanfw_phy_cap_req_msg_v01 {
+};
+
+struct qmi_wlanfw_phy_cap_resp_msg_v01 {
+ struct qmi_response_type_v01 resp;
+ u8 num_phy_valid;
+ u8 num_phy;
+ u8 board_id_valid;
+ u32 board_id;
+};
+
#define QMI_WLANFW_IND_REGISTER_REQ_MSG_V01_MAX_LEN 54
#define QMI_WLANFW_IND_REGISTER_REQ_V01 0x0020
#define QMI_WLANFW_IND_REGISTER_RESP_MSG_V01_MAX_LEN 18
@@ -559,6 +575,21 @@ struct qmi_wlanfw_wlan_cfg_resp_msg_v01 {
struct qmi_response_type_v01 resp;
};
+#define ATH12K_QMI_WLANFW_WLAN_INI_REQ_V01 0x002F
+#define ATH12K_QMI_WLANFW_WLAN_INI_RESP_V01 0x002F
+#define QMI_WLANFW_WLAN_INI_REQ_MSG_V01_MAX_LEN 7
+#define QMI_WLANFW_WLAN_INI_RESP_MSG_V01_MAX_LEN 7
+
+struct qmi_wlanfw_wlan_ini_req_msg_v01 {
+ /* Must be set to true if enable_fwlog is being passed */
+ u8 enable_fwlog_valid;
+ u8 enable_fwlog;
+};
+
+struct qmi_wlanfw_wlan_ini_resp_msg_v01 {
+ struct qmi_response_type_v01 resp;
+};
+
int ath12k_qmi_firmware_start(struct ath12k_base *ab,
u32 mode);
void ath12k_qmi_firmware_stop(struct ath12k_base *ab);
diff --git a/drivers/net/wireless/ath/ath12k/reg.c b/drivers/net/wireless/ath/ath12k/reg.c
index f924bc13ccff..f308e9a6ed55 100644
--- a/drivers/net/wireless/ath/ath12k/reg.c
+++ b/drivers/net/wireless/ath/ath12k/reg.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/rtnetlink.h>
#include "core.h"
@@ -48,7 +48,8 @@ ath12k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request)
{
struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy);
struct ath12k_wmi_init_country_arg arg;
- struct ath12k *ar = hw->priv;
+ struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k *ar = ath12k_ah_to_ar(ah);
int ret;
ath12k_dbg(ar->ab, ATH12K_DBG_REG,
@@ -95,7 +96,7 @@ int ath12k_reg_update_chan_list(struct ath12k *ar)
struct ieee80211_supported_band **bands;
struct ath12k_wmi_scan_chan_list_arg *arg;
struct ieee80211_channel *channel;
- struct ieee80211_hw *hw = ar->hw;
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
struct ath12k_wmi_channel_arg *ch;
enum nl80211_band band;
int num_channels = 0;
@@ -103,7 +104,7 @@ int ath12k_reg_update_chan_list(struct ath12k *ar)
bands = hw->wiphy->bands;
for (band = 0; band < NUM_NL80211_BANDS; band++) {
- if (!bands[band])
+ if (!(ar->mac.sbands[band].channels && bands[band]))
continue;
for (i = 0; i < bands[band]->n_channels; i++) {
@@ -129,7 +130,7 @@ int ath12k_reg_update_chan_list(struct ath12k *ar)
ch = arg->channel;
for (band = 0; band < NUM_NL80211_BANDS; band++) {
- if (!bands[band])
+ if (!(ar->mac.sbands[band].channels && bands[band]))
continue;
for (i = 0; i < bands[band]->n_channels; i++) {
@@ -199,7 +200,7 @@ static void ath12k_copy_regd(struct ieee80211_regdomain *regd_orig,
int ath12k_regd_update(struct ath12k *ar, bool init)
{
- struct ieee80211_hw *hw = ar->hw;
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
struct ieee80211_regdomain *regd, *regd_copy = NULL;
int ret, regd_len, pdev_id;
struct ath12k_base *ab;
diff --git a/drivers/net/wireless/ath/ath12k/rx_desc.h b/drivers/net/wireless/ath/ath12k/rx_desc.h
index 55f20c446ca9..a0db6702a189 100644
--- a/drivers/net/wireless/ath/ath12k/rx_desc.h
+++ b/drivers/net/wireless/ath/ath12k/rx_desc.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH12K_RX_DESC_H
#define ATH12K_RX_DESC_H
@@ -147,6 +147,61 @@ struct rx_mpdu_start_qcn9274 {
__le32 res1;
} __packed;
+#define QCN9274_MPDU_START_SELECT_MPDU_START_TAG BIT(0)
+#define QCN9274_MPDU_START_SELECT_INFO0_REO_QUEUE_DESC_LO BIT(1)
+#define QCN9274_MPDU_START_SELECT_INFO1_PN_31_0 BIT(2)
+#define QCN9274_MPDU_START_SELECT_PN_95_32 BIT(3)
+#define QCN9274_MPDU_START_SELECT_PN_127_96_INFO2 BIT(4)
+#define QCN9274_MPDU_START_SELECT_PEER_MDATA_INFO3_PHY_PPDU_ID BIT(5)
+#define QCN9274_MPDU_START_SELECT_AST_IDX_SW_PEER_ID_INFO4 BIT(6)
+#define QCN9274_MPDU_START_SELECT_INFO5_INFO6 BIT(7)
+#define QCN9274_MPDU_START_SELECT_FRAME_CTRL_DURATION_ADDR1_31_0 BIT(8)
+#define QCN9274_MPDU_START_SELECT_ADDR2_47_0_ADDR1_47_32 BIT(9)
+#define QCN9274_MPDU_START_SELECT_ADDR3_47_0_SEQ_CTRL BIT(10)
+#define QCN9274_MPDU_START_SELECT_ADDR4_47_0_QOS_CTRL BIT(11)
+#define QCN9274_MPDU_START_SELECT_HT_CTRL_INFO7 BIT(12)
+#define QCN9274_MPDU_START_SELECT_ML_ADDR1_47_0_ML_ADDR2_15_0 BIT(13)
+#define QCN9274_MPDU_START_SELECT_ML_ADDR2_47_16_INFO8 BIT(14)
+#define QCN9274_MPDU_START_SELECT_RES_0_RES_1 BIT(15)
+
+#define QCN9274_MPDU_START_WMASK (QCN9274_MPDU_START_SELECT_INFO1_PN_31_0 | \
+ QCN9274_MPDU_START_SELECT_PN_95_32 | \
+ QCN9274_MPDU_START_SELECT_PN_127_96_INFO2 | \
+ QCN9274_MPDU_START_SELECT_PEER_MDATA_INFO3_PHY_PPDU_ID | \
+ QCN9274_MPDU_START_SELECT_AST_IDX_SW_PEER_ID_INFO4 | \
+ QCN9274_MPDU_START_SELECT_INFO5_INFO6 | \
+ QCN9274_MPDU_START_SELECT_FRAME_CTRL_DURATION_ADDR1_31_0 | \
+ QCN9274_MPDU_START_SELECT_ADDR2_47_0_ADDR1_47_32 | \
+ QCN9274_MPDU_START_SELECT_ADDR3_47_0_SEQ_CTRL | \
+ QCN9274_MPDU_START_SELECT_ADDR4_47_0_QOS_CTRL)
+
+/* The below rx_mpdu_start_qcn9274_compact structure is tied with the mask
+ * value QCN9274_MPDU_START_WMASK. If the mask value changes the structure
+ * will also change.
+ */
+
+struct rx_mpdu_start_qcn9274_compact {
+ __le32 info1;
+ __le32 pn[4];
+ __le32 info2;
+ __le32 peer_meta_data;
+ __le16 info3;
+ __le16 phy_ppdu_id;
+ __le16 ast_index;
+ __le16 sw_peer_id;
+ __le32 info4;
+ __le32 info5;
+ __le32 info6;
+ __le16 frame_ctrl;
+ __le16 duration;
+ u8 addr1[ETH_ALEN];
+ u8 addr2[ETH_ALEN];
+ u8 addr3[ETH_ALEN];
+ __le16 seq_ctrl;
+ u8 addr4[ETH_ALEN];
+ __le16 qos_ctrl;
+} __packed;
+
/* rx_mpdu_start
*
* reo_destination_indication
@@ -608,6 +663,8 @@ enum rx_msdu_start_reception_type {
RX_MSDU_START_RECEPTION_TYPE_UL_MU_OFDMA_MIMO,
};
+#define RX_MSDU_END_64_TLV_SRC_LINK_ID GENMASK(24, 22)
+
#define RX_MSDU_END_INFO0_RXPCU_MPDU_FITLER GENMASK(1, 0)
#define RX_MSDU_END_INFO0_SW_FRAME_GRP_ID GENMASK(8, 2)
@@ -786,6 +843,52 @@ struct rx_msdu_end_qcn9274 {
__le32 info14;
} __packed;
+#define QCN9274_MSDU_END_SELECT_MSDU_END_TAG BIT(0)
+#define QCN9274_MSDU_END_SELECT_INFO0_PHY_PPDUID_IP_HDR_CSUM_INFO1 BIT(1)
+#define QCN9274_MSDU_END_SELECT_INFO2_CUMULATIVE_CSUM_RULE_IND_0 BIT(2)
+#define QCN9274_MSDU_END_SELECT_IPV6_OP_CRC_INFO3_TYPE13 BIT(3)
+#define QCN9274_MSDU_END_SELECT_RULE_IND_1_TCP_SEQ_NUM BIT(4)
+#define QCN9274_MSDU_END_SELECT_TCP_ACK_NUM_INFO4_WINDOW_SIZE BIT(5)
+#define QCN9274_MSDU_END_SELECT_SA_SW_PER_ID_INFO5_SA_DA_ID BIT(6)
+#define QCN9274_MSDU_END_SELECT_INFO6_FSE_METADATA BIT(7)
+#define QCN9274_MSDU_END_SELECT_CCE_MDATA_TCP_UDP_CSUM_INFO7_IP_LEN BIT(8)
+#define QCN9274_MSDU_END_SELECT_INFO8_INFO9 BIT(9)
+#define QCN9274_MSDU_END_SELECT_INFO10_INFO11 BIT(10)
+#define QCN9274_MSDU_END_SELECT_VLAN_CTAG_STAG_CI_PEER_MDATA BIT(11)
+#define QCN9274_MSDU_END_SELECT_INFO12_AND_FLOW_ID_TOEPLITZ BIT(12)
+#define QCN9274_MSDU_END_SELECT_PPDU_START_TS_63_32_PHY_MDATA BIT(13)
+#define QCN9274_MSDU_END_SELECT_PPDU_START_TS_31_0_TOEPLITZ_HASH_2_4 BIT(14)
+#define QCN9274_MSDU_END_SELECT_RES0_SA_47_0 BIT(15)
+#define QCN9274_MSDU_END_SELECT_INFO13_INFO14 BIT(16)
+
+#define QCN9274_MSDU_END_WMASK (QCN9274_MSDU_END_SELECT_MSDU_END_TAG | \
+ QCN9274_MSDU_END_SELECT_SA_SW_PER_ID_INFO5_SA_DA_ID | \
+ QCN9274_MSDU_END_SELECT_INFO10_INFO11 | \
+ QCN9274_MSDU_END_SELECT_INFO12_AND_FLOW_ID_TOEPLITZ | \
+ QCN9274_MSDU_END_SELECT_PPDU_START_TS_63_32_PHY_MDATA | \
+ QCN9274_MSDU_END_SELECT_INFO13_INFO14)
+
+/* The below rx_msdu_end_qcn9274_compact structure is tied with the mask value
+ * QCN9274_MSDU_END_WMASK. If the mask value changes the structure will also
+ * change.
+ */
+
+struct rx_msdu_end_qcn9274_compact {
+ __le64 msdu_end_tag;
+ __le16 sa_sw_peer_id;
+ __le16 info5;
+ __le16 sa_idx;
+ __le16 da_idx_or_sw_peer_id;
+ __le32 info10;
+ __le32 info11;
+ __le32 info12;
+ __le32 flow_id_toeplitz;
+ __le32 ppdu_start_timestamp_63_32;
+ __le32 phy_meta_data;
+ __le32 info13;
+ __le32 info14;
+} __packed;
+
/* These macro definitions are only used for WCN7850 */
#define RX_MSDU_END_WCN7850_INFO2_KEY_ID BIT(7, 0)
@@ -1450,16 +1553,18 @@ struct rx_msdu_end_wcn7850 {
*
*/
-/* TODO: Move to compact TLV approach
- * By default these tlv's are not aligned to 128b boundary
- * Need to remove unused qwords and make them compact/aligned
- */
struct hal_rx_desc_qcn9274 {
struct rx_msdu_end_qcn9274 msdu_end;
struct rx_mpdu_start_qcn9274 mpdu_start;
u8 msdu_payload[];
} __packed;
+struct hal_rx_desc_qcn9274_compact {
+ struct rx_msdu_end_qcn9274_compact msdu_end;
+ struct rx_mpdu_start_qcn9274_compact mpdu_start;
+ u8 msdu_payload[];
+} __packed;
+
#define RX_BE_PADDING0_BYTES 8
#define RX_BE_PADDING1_BYTES 8
@@ -1484,6 +1589,7 @@ struct hal_rx_desc_wcn7850 {
struct hal_rx_desc {
union {
struct hal_rx_desc_qcn9274 qcn9274;
+ struct hal_rx_desc_qcn9274_compact qcn9274_compact;
struct hal_rx_desc_wcn7850 wcn7850;
} u;
} __packed;
diff --git a/drivers/net/wireless/ath/ath12k/trace.h b/drivers/net/wireless/ath/ath12k/trace.h
index f72096684b74..240737e1542d 100644
--- a/drivers/net/wireless/ath/ath12k/trace.h
+++ b/drivers/net/wireless/ath/ath12k/trace.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2019-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#if !defined(_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
@@ -140,6 +140,33 @@ TRACE_EVENT(ath12k_htt_rxdesc,
)
);
+TRACE_EVENT(ath12k_wmi_diag,
+ TP_PROTO(struct ath12k_base *ab, const void *data, size_t len),
+
+ TP_ARGS(ab, data, len),
+
+ TP_STRUCT__entry(
+ __string(device, dev_name(ab->dev))
+ __string(driver, dev_driver_string(ab->dev))
+ __field(u16, len)
+ __dynamic_array(u8, data, len)
+ ),
+
+ TP_fast_assign(
+ __assign_str(device, dev_name(ab->dev));
+ __assign_str(driver, dev_driver_string(ab->dev));
+ __entry->len = len;
+ memcpy(__get_dynamic_array(data), data, len);
+ ),
+
+ TP_printk(
+ "%s %s tlv diag len %d",
+ __get_str(driver),
+ __get_str(device),
+ __entry->len
+ )
+);
+
#endif /* _TRACE_H_ || TRACE_HEADER_MULTI_READ*/
/* we don't want to use include/trace/events */
diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c
index 11cc3005c0f9..9d69a1769926 100644
--- a/drivers/net/wireless/ath/ath12k/wmi.c
+++ b/drivers/net/wireless/ath/ath12k/wmi.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/skbuff.h>
#include <linux/ctype.h>
@@ -19,6 +19,7 @@
#include "mac.h"
#include "hw.h"
#include "peer.h"
+#include "p2p.h"
struct ath12k_wmi_svc_ready_parse {
bool wmi_svc_bitmap_done;
@@ -162,6 +163,14 @@ static const struct ath12k_wmi_tlv_policy ath12k_wmi_tlv_policies[] = {
.min_len = sizeof(struct wmi_probe_resp_tx_status_event) },
[WMI_TAG_VDEV_DELETE_RESP_EVENT] = {
.min_len = sizeof(struct wmi_vdev_delete_resp_event) },
+ [WMI_TAG_TWT_ENABLE_COMPLETE_EVENT] = {
+ .min_len = sizeof(struct wmi_twt_enable_event) },
+ [WMI_TAG_TWT_DISABLE_COMPLETE_EVENT] = {
+ .min_len = sizeof(struct wmi_twt_disable_event) },
+ [WMI_TAG_P2P_NOA_INFO] = {
+ .min_len = sizeof(struct ath12k_wmi_p2p_noa_info) },
+ [WMI_TAG_P2P_NOA_EVENT] = {
+ .min_len = sizeof(struct wmi_p2p_noa_event) },
};
static __le32 ath12k_wmi_tlv_hdr(u32 cmd, u32 len)
@@ -179,18 +188,9 @@ void ath12k_wmi_init_qcn9274(struct ath12k_base *ab,
struct ath12k_wmi_resource_config_arg *config)
{
config->num_vdevs = ab->num_radios * TARGET_NUM_VDEVS;
-
- if (ab->num_radios == 2) {
- config->num_peers = TARGET_NUM_PEERS(DBS);
- config->num_tids = TARGET_NUM_TIDS(DBS);
- } else if (ab->num_radios == 3) {
- config->num_peers = TARGET_NUM_PEERS(DBS_SBS);
- config->num_tids = TARGET_NUM_TIDS(DBS_SBS);
- } else {
- /* Control should not reach here */
- config->num_peers = TARGET_NUM_PEERS(SINGLE);
- config->num_tids = TARGET_NUM_TIDS(SINGLE);
- }
+ config->num_peers = ab->num_radios *
+ ath12k_core_get_max_peers_per_radio(ab);
+ config->num_tids = ath12k_core_get_max_num_tids(ab);
config->num_offload_peers = TARGET_NUM_OFFLD_PEERS;
config->num_offload_reorder_buffs = TARGET_NUM_OFFLD_REORDER_BUFFS;
config->num_peer_keys = TARGET_NUM_PEER_KEYS;
@@ -228,6 +228,9 @@ void ath12k_wmi_init_qcn9274(struct ath12k_base *ab,
config->peer_map_unmap_version = 0x32;
config->twt_ap_pdev_count = ab->num_radios;
config->twt_ap_sta_count = 1000;
+
+ if (test_bit(WMI_TLV_SERVICE_PEER_METADATA_V1A_V1B_SUPPORT, ab->wmi_ab.svc_map))
+ config->dp_peer_meta_data_ver = TARGET_RX_PEER_METADATA_VER_V1B;
}
void ath12k_wmi_init_wcn7850(struct ath12k_base *ab,
@@ -359,8 +362,8 @@ static int ath12k_wmi_tlv_parse(struct ath12k_base *ar, const void **tb,
}
static const void **
-ath12k_wmi_tlv_parse_alloc(struct ath12k_base *ab, const void *ptr,
- size_t len, gfp_t gfp)
+ath12k_wmi_tlv_parse_alloc(struct ath12k_base *ab,
+ struct sk_buff *skb, gfp_t gfp)
{
const void **tb;
int ret;
@@ -369,7 +372,7 @@ ath12k_wmi_tlv_parse_alloc(struct ath12k_base *ab, const void *ptr,
if (!tb)
return ERR_PTR(-ENOMEM);
- ret = ath12k_wmi_tlv_parse(ab, tb, ptr, len);
+ ret = ath12k_wmi_tlv_parse(ab, tb, skb->data, skb->len);
if (ret) {
kfree(tb);
return ERR_PTR(ret);
@@ -493,13 +496,13 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle,
mac_caps = wmi_mac_phy_caps + phy_idx;
- pdev->pdev_id = le32_to_cpu(mac_caps->pdev_id);
+ pdev->pdev_id = ath12k_wmi_mac_phy_get_pdev_id(mac_caps);
pdev_cap->supported_bands |= le32_to_cpu(mac_caps->supported_bands);
pdev_cap->ampdu_density = le32_to_cpu(mac_caps->ampdu_density);
fw_pdev = &ab->fw_pdev[ab->fw_pdev_count];
fw_pdev->supported_bands = le32_to_cpu(mac_caps->supported_bands);
- fw_pdev->pdev_id = le32_to_cpu(mac_caps->pdev_id);
+ fw_pdev->pdev_id = ath12k_wmi_mac_phy_get_pdev_id(mac_caps);
fw_pdev->phy_id = le32_to_cpu(mac_caps->phy_id);
ab->fw_pdev_count++;
@@ -727,6 +730,20 @@ static int ath12k_service_ready_event(struct ath12k_base *ab, struct sk_buff *sk
return 0;
}
+static u32 ath12k_wmi_mgmt_get_freq(struct ath12k *ar,
+ struct ieee80211_tx_info *info)
+{
+ struct ath12k_base *ab = ar->ab;
+ u32 freq = 0;
+
+ if (ab->hw_params->single_pdev_only &&
+ ar->scan.is_roc &&
+ (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN))
+ freq = ar->scan.roc_freq;
+
+ return freq;
+}
+
struct sk_buff *ath12k_wmi_alloc_skb(struct ath12k_wmi_base *wmi_ab, u32 len)
{
struct sk_buff *skb;
@@ -752,6 +769,7 @@ int ath12k_wmi_mgmt_send(struct ath12k *ar, u32 vdev_id, u32 buf_id,
{
struct ath12k_wmi_pdev *wmi = ar->wmi;
struct wmi_mgmt_send_cmd *cmd;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(frame);
struct wmi_tlv *frame_tlv;
struct sk_buff *skb;
u32 buf_len;
@@ -770,7 +788,7 @@ int ath12k_wmi_mgmt_send(struct ath12k *ar, u32 vdev_id, u32 buf_id,
sizeof(*cmd));
cmd->vdev_id = cpu_to_le32(vdev_id);
cmd->desc_id = cpu_to_le32(buf_id);
- cmd->chanfreq = 0;
+ cmd->chanfreq = cpu_to_le32(ath12k_wmi_mgmt_get_freq(ar, info));
cmd->paddr_lo = cpu_to_le32(lower_32_bits(ATH12K_SKB_CB(frame)->paddr));
cmd->paddr_hi = cpu_to_le32(upper_32_bits(ATH12K_SKB_CB(frame)->paddr));
cmd->frame_len = cpu_to_le32(frame->len);
@@ -826,6 +844,9 @@ int ath12k_wmi_vdev_create(struct ath12k *ar, u8 *macaddr,
cmd->vdev_stats_id = cpu_to_le32(args->if_stats_id);
ether_addr_copy(cmd->vdev_macaddr.addr, macaddr);
+ if (args->if_stats_id != ATH12K_INVAL_VDEV_STATS_ID)
+ cmd->vdev_stats_id_valid = cpu_to_le32(BIT(0));
+
ptr = skb->data + sizeof(*cmd);
len = WMI_NUM_SUPPORTED_BAND_MAX * sizeof(*txrx_streams);
@@ -1024,6 +1045,7 @@ int ath12k_wmi_vdev_start(struct ath12k *ar, struct wmi_vdev_start_req_arg *arg,
cmd->regdomain = cpu_to_le32(arg->regdomain);
cmd->he_ops = cpu_to_le32(arg->he_ops);
cmd->punct_bitmap = cpu_to_le32(arg->punct_bitmap);
+ cmd->mbssid_flags = cpu_to_le32(arg->mbssid_flags);
if (!restart) {
if (arg->ssid) {
@@ -1051,7 +1073,7 @@ int ath12k_wmi_vdev_start(struct ath12k *ar, struct wmi_vdev_start_req_arg *arg,
tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, 0);
/* Note: This is a nested TLV containing:
- * [wmi_tlv][wmi_p2p_noa_descriptor][wmi_tlv]..
+ * [wmi_tlv][ath12k_wmi_p2p_noa_descriptor][wmi_tlv]..
*/
ptr += sizeof(*tlv);
@@ -1710,6 +1732,48 @@ int ath12k_wmi_send_bcn_offload_control_cmd(struct ath12k *ar,
return ret;
}
+int ath12k_wmi_p2p_go_bcn_ie(struct ath12k *ar, u32 vdev_id,
+ const u8 *p2p_ie)
+{
+ struct ath12k_wmi_pdev *wmi = ar->wmi;
+ struct wmi_p2p_go_set_beacon_ie_cmd *cmd;
+ size_t p2p_ie_len, aligned_len;
+ struct wmi_tlv *tlv;
+ struct sk_buff *skb;
+ void *ptr;
+ int ret, len;
+
+ p2p_ie_len = p2p_ie[1] + 2;
+ aligned_len = roundup(p2p_ie_len, sizeof(u32));
+
+ len = sizeof(*cmd) + TLV_HDR_SIZE + aligned_len;
+
+ skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len);
+ if (!skb)
+ return -ENOMEM;
+
+ ptr = skb->data;
+ cmd = ptr;
+ cmd->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_P2P_GO_SET_BEACON_IE,
+ sizeof(*cmd));
+ cmd->vdev_id = cpu_to_le32(vdev_id);
+ cmd->ie_buf_len = cpu_to_le32(p2p_ie_len);
+
+ ptr += sizeof(*cmd);
+ tlv = ptr;
+ tlv->header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_ARRAY_BYTE,
+ aligned_len);
+ memcpy(tlv->value, p2p_ie, p2p_ie_len);
+
+ ret = ath12k_wmi_cmd_send(wmi, skb, WMI_P2P_GO_SET_BEACON_IE);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_P2P_GO_SET_BEACON_IE\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
+}
+
int ath12k_wmi_bcn_tmpl(struct ath12k *ar, u32 vdev_id,
struct ieee80211_mutable_offsets *offs,
struct sk_buff *bcn)
@@ -2130,7 +2194,7 @@ void ath12k_wmi_start_scan_init(struct ath12k *ar,
WMI_SCAN_EVENT_BSS_CHANNEL |
WMI_SCAN_EVENT_FOREIGN_CHAN |
WMI_SCAN_EVENT_DEQUEUED;
- arg->scan_flags |= WMI_SCAN_CHAN_STAT_EVENT;
+ arg->scan_f_chan_stat_evnt = 1;
arg->num_bssid = 1;
/* fill bssid_list[0] with 0xff, otherwise bssid and RA will be
@@ -3265,6 +3329,9 @@ ath12k_wmi_copy_resource_config(struct ath12k_wmi_resource_config_params *wmi_cf
wmi_cfg->sched_params = cpu_to_le32(tg_cfg->sched_params);
wmi_cfg->twt_ap_pdev_count = cpu_to_le32(tg_cfg->twt_ap_pdev_count);
wmi_cfg->twt_ap_sta_count = cpu_to_le32(tg_cfg->twt_ap_sta_count);
+ wmi_cfg->flags2 = le32_encode_bits(tg_cfg->dp_peer_meta_data_ver,
+ WMI_RSRC_CFG_FLAGS2_RX_PEER_METADATA_VERSION);
+
wmi_cfg->host_service_flags = cpu_to_le32(tg_cfg->is_reg_cc_ext_event_supported <<
WMI_RSRC_CFG_HOST_SVC_FLAG_REG_CC_EXT_SUPPORT_BIT);
}
@@ -4214,7 +4281,7 @@ ath12k_wmi_tlv_mac_phy_caps_ext_parse(struct ath12k_base *ab,
for (i = 0; i < ab->fw_pdev_count; i++) {
struct ath12k_fw_pdev *fw_pdev = &ab->fw_pdev[i];
- if (fw_pdev->pdev_id == le32_to_cpu(caps->pdev_id) &&
+ if (fw_pdev->pdev_id == ath12k_wmi_caps_ext_get_pdev_id(caps) &&
fw_pdev->phy_id == le32_to_cpu(caps->phy_id)) {
bands = fw_pdev->supported_bands;
break;
@@ -4271,7 +4338,8 @@ static int ath12k_wmi_tlv_mac_phy_caps_ext(struct ath12k_base *ab, u16 tag,
return 0;
} else {
for (i = 0; i < ab->num_radios; i++) {
- if (ab->pdevs[i].pdev_id == le32_to_cpu(caps->pdev_id))
+ if (ab->pdevs[i].pdev_id ==
+ ath12k_wmi_caps_ext_get_pdev_id(caps))
break;
}
@@ -4374,7 +4442,7 @@ static int ath12k_pull_vdev_start_resp_tlv(struct ath12k_base *ab, struct sk_buf
const struct wmi_vdev_start_resp_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -4452,7 +4520,7 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab,
ath12k_dbg(ab, ATH12K_DBG_WMI, "processing regulatory ext channel list\n");
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -4738,7 +4806,7 @@ static int ath12k_pull_peer_del_resp_ev(struct ath12k_base *ab, struct sk_buff *
const struct wmi_peer_delete_resp_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -4770,7 +4838,7 @@ static int ath12k_pull_vdev_del_resp_ev(struct ath12k_base *ab,
const struct wmi_vdev_delete_resp_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -4790,15 +4858,15 @@ static int ath12k_pull_vdev_del_resp_ev(struct ath12k_base *ab,
return 0;
}
-static int ath12k_pull_bcn_tx_status_ev(struct ath12k_base *ab, void *evt_buf,
- u32 len, u32 *vdev_id,
- u32 *tx_status)
+static int ath12k_pull_bcn_tx_status_ev(struct ath12k_base *ab,
+ struct sk_buff *skb,
+ u32 *vdev_id, u32 *tx_status)
{
const void **tb;
const struct wmi_bcn_tx_status_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, evt_buf, len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -4826,7 +4894,7 @@ static int ath12k_pull_vdev_stopped_param_tlv(struct ath12k_base *ab, struct sk_
const struct wmi_vdev_stopped_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -4948,7 +5016,7 @@ static int wmi_process_mgmt_tx_comp(struct ath12k *ar, u32 desc_id,
if ((!(info->flags & IEEE80211_TX_CTL_NO_ACK)) && !status)
info->flags |= IEEE80211_TX_STAT_ACK;
- ieee80211_tx_status_irqsafe(ar->hw, msdu);
+ ieee80211_tx_status_irqsafe(ath12k_ar_to_hw(ar), msdu);
num_mgmt = atomic_dec_if_positive(&ar->num_pending_mgmt_tx);
@@ -4970,7 +5038,7 @@ static int ath12k_pull_mgmt_tx_compl_param_tlv(struct ath12k_base *ab,
const struct wmi_mgmt_tx_compl_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5006,6 +5074,10 @@ static void ath12k_wmi_event_scan_started(struct ath12k *ar)
break;
case ATH12K_SCAN_STARTING:
ar->scan.state = ATH12K_SCAN_RUNNING;
+
+ if (ar->scan.is_roc)
+ ieee80211_ready_on_channel(ath12k_ar_to_hw(ar));
+
complete(&ar->scan.started);
break;
}
@@ -5076,6 +5148,8 @@ static void ath12k_wmi_event_scan_bss_chan(struct ath12k *ar)
static void ath12k_wmi_event_scan_foreign_chan(struct ath12k *ar, u32 freq)
{
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
+
lockdep_assert_held(&ar->data_lock);
switch (ar->scan.state) {
@@ -5087,7 +5161,11 @@ static void ath12k_wmi_event_scan_foreign_chan(struct ath12k *ar, u32 freq)
break;
case ATH12K_SCAN_RUNNING:
case ATH12K_SCAN_ABORTING:
- ar->scan_channel = ieee80211_get_channel(ar->hw->wiphy, freq);
+ ar->scan_channel = ieee80211_get_channel(hw->wiphy, freq);
+
+ if (ar->scan.is_roc && ar->scan.roc_freq == freq)
+ complete(&ar->scan.on_channel);
+
break;
}
}
@@ -5141,7 +5219,7 @@ static int ath12k_pull_scan_ev(struct ath12k_base *ab, struct sk_buff *skb,
const struct wmi_scan_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5174,7 +5252,7 @@ static int ath12k_pull_peer_sta_kickout_ev(struct ath12k_base *ab, struct sk_buf
const struct wmi_peer_sta_kickout_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5201,7 +5279,7 @@ static int ath12k_pull_roam_ev(struct ath12k_base *ab, struct sk_buff *skb,
const struct wmi_roam_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5226,13 +5304,14 @@ static int ath12k_pull_roam_ev(struct ath12k_base *ab, struct sk_buff *skb,
static int freq_to_idx(struct ath12k *ar, int freq)
{
struct ieee80211_supported_band *sband;
+ struct ieee80211_hw *hw = ath12k_ar_to_hw(ar);
int band, ch, idx = 0;
for (band = NL80211_BAND_2GHZ; band < NUM_NL80211_BANDS; band++) {
if (!ar->mac.sbands[band].channels)
continue;
- sband = ar->hw->wiphy->bands[band];
+ sband = hw->wiphy->bands[band];
if (!sband)
continue;
@@ -5245,14 +5324,14 @@ exit:
return idx;
}
-static int ath12k_pull_chan_info_ev(struct ath12k_base *ab, u8 *evt_buf,
- u32 len, struct wmi_chan_info_event *ch_info_ev)
+static int ath12k_pull_chan_info_ev(struct ath12k_base *ab, struct sk_buff *skb,
+ struct wmi_chan_info_event *ch_info_ev)
{
const void **tb;
const struct wmi_chan_info_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, evt_buf, len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5291,7 +5370,7 @@ ath12k_pull_pdev_bss_chan_info_ev(struct ath12k_base *ab, struct sk_buff *skb,
const struct wmi_pdev_bss_chan_info_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5331,7 +5410,7 @@ ath12k_pull_vdev_install_key_compl_ev(struct ath12k_base *ab, struct sk_buff *sk
const struct wmi_vdev_install_key_compl_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5362,7 +5441,7 @@ static int ath12k_pull_peer_assoc_conf_ev(struct ath12k_base *ab, struct sk_buff
const struct wmi_peer_assoc_conf_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5384,13 +5463,13 @@ static int ath12k_pull_peer_assoc_conf_ev(struct ath12k_base *ab, struct sk_buff
}
static int
-ath12k_pull_pdev_temp_ev(struct ath12k_base *ab, u8 *evt_buf,
- u32 len, const struct wmi_pdev_temperature_event *ev)
+ath12k_pull_pdev_temp_ev(struct ath12k_base *ab, struct sk_buff *skb,
+ const struct wmi_pdev_temperature_event *ev)
{
const void **tb;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, evt_buf, len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -5725,8 +5804,7 @@ static void ath12k_bcn_tx_status_event(struct ath12k_base *ab, struct sk_buff *s
{
u32 vdev_id, tx_status;
- if (ath12k_pull_bcn_tx_status_ev(ab, skb->data, skb->len,
- &vdev_id, &tx_status) != 0) {
+ if (ath12k_pull_bcn_tx_status_ev(ab, skb, &vdev_id, &tx_status) != 0) {
ath12k_warn(ab, "failed to extract bcn tx status");
return;
}
@@ -5864,7 +5942,7 @@ static void ath12k_mgmt_rx_event(struct ath12k_base *ab, struct sk_buff *skb)
status->freq, status->band, status->signal,
status->rate_idx);
- ieee80211_rx_ni(ar->hw, skb);
+ ieee80211_rx_ni(ath12k_ar_to_hw(ar), skb);
exit:
rcu_read_unlock();
@@ -6037,7 +6115,7 @@ static void ath12k_peer_sta_kickout_event(struct ath12k_base *ab, struct sk_buff
goto exit;
}
- sta = ieee80211_find_sta_by_ifaddr(ar->hw,
+ sta = ieee80211_find_sta_by_ifaddr(ath12k_ar_to_hw(ar),
arg.mac_addr, NULL);
if (!sta) {
ath12k_warn(ab, "Spurious quick kickout for STA %pM\n",
@@ -6110,7 +6188,7 @@ static void ath12k_chan_info_event(struct ath12k_base *ab, struct sk_buff *skb)
/* HW channel counters frequency value in hertz */
u32 cc_freq_hz = ab->cc_freq_hz;
- if (ath12k_pull_chan_info_ev(ab, skb->data, skb->len, &ch_info_ev) != 0) {
+ if (ath12k_pull_chan_info_ev(ab, skb, &ch_info_ev) != 0) {
ath12k_warn(ab, "failed to extract chan info event");
return;
}
@@ -6395,7 +6473,7 @@ static void ath12k_pdev_ctl_failsafe_check_event(struct ath12k_base *ab,
const struct wmi_pdev_ctl_failsafe_chk_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6446,7 +6524,7 @@ ath12k_wmi_process_csa_switch_count_event(struct ath12k_base *ab,
}
if (arvif->is_up && arvif->vif->bss_conf.csa_active)
- ieee80211_csa_finish(arvif->vif);
+ ieee80211_csa_finish(arvif->vif, 0);
}
rcu_read_unlock();
}
@@ -6460,7 +6538,7 @@ ath12k_wmi_pdev_csa_switch_count_status_event(struct ath12k_base *ab,
const u32 *vdev_ids;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6494,7 +6572,7 @@ ath12k_wmi_pdev_dfs_radar_detected_event(struct ath12k_base *ab, struct sk_buff
struct ath12k *ar;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6531,7 +6609,7 @@ ath12k_wmi_pdev_dfs_radar_detected_event(struct ath12k_base *ab, struct sk_buff
if (ar->dfs_block_radar_events)
ath12k_info(ab, "DFS Radar detected, but ignored as requested\n");
else
- ieee80211_radar_detected(ar->hw);
+ ieee80211_radar_detected(ath12k_ar_to_hw(ar));
exit:
rcu_read_unlock();
@@ -6546,7 +6624,7 @@ ath12k_wmi_pdev_temperature_event(struct ath12k_base *ab,
struct ath12k *ar;
struct wmi_pdev_temperature_event ev = {0};
- if (ath12k_pull_pdev_temp_ev(ab, skb->data, skb->len, &ev) != 0) {
+ if (ath12k_pull_pdev_temp_ev(ab, skb, &ev) != 0) {
ath12k_warn(ab, "failed to extract pdev temperature event");
return;
}
@@ -6573,7 +6651,7 @@ static void ath12k_fils_discovery_event(struct ath12k_base *ab,
const struct wmi_fils_discovery_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab,
@@ -6603,7 +6681,7 @@ static void ath12k_probe_resp_tx_status_event(struct ath12k_base *ab,
const struct wmi_probe_resp_tx_status_event *ev;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab,
@@ -6628,6 +6706,56 @@ static void ath12k_probe_resp_tx_status_event(struct ath12k_base *ab,
kfree(tb);
}
+static int ath12k_wmi_p2p_noa_event(struct ath12k_base *ab,
+ struct sk_buff *skb)
+{
+ const void **tb;
+ const struct wmi_p2p_noa_event *ev;
+ const struct ath12k_wmi_p2p_noa_info *noa;
+ struct ath12k *ar;
+ int ret, vdev_id;
+
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
+ if (IS_ERR(tb)) {
+ ret = PTR_ERR(tb);
+ ath12k_warn(ab, "failed to parse P2P NoA TLV: %d\n", ret);
+ return ret;
+ }
+
+ ev = tb[WMI_TAG_P2P_NOA_EVENT];
+ noa = tb[WMI_TAG_P2P_NOA_INFO];
+
+ if (!ev || !noa) {
+ ret = -EPROTO;
+ goto out;
+ }
+
+ vdev_id = __le32_to_cpu(ev->vdev_id);
+
+ ath12k_dbg(ab, ATH12K_DBG_WMI,
+ "wmi tlv p2p noa vdev_id %i descriptors %u\n",
+ vdev_id, le32_get_bits(noa->noa_attr, WMI_P2P_NOA_INFO_DESC_NUM));
+
+ rcu_read_lock();
+ ar = ath12k_mac_get_ar_by_vdev_id(ab, vdev_id);
+ if (!ar) {
+ ath12k_warn(ab, "invalid vdev id %d in P2P NoA event\n",
+ vdev_id);
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ ath12k_p2p_noa_update_by_vdev_id(ar, vdev_id, noa);
+
+ ret = 0;
+
+unlock:
+ rcu_read_unlock();
+out:
+ kfree(tb);
+ return ret;
+}
+
static void ath12k_rfkill_state_change_event(struct ath12k_base *ab,
struct sk_buff *skb)
{
@@ -6635,7 +6763,7 @@ static void ath12k_rfkill_state_change_event(struct ath12k_base *ab,
const void **tb;
int ret;
- tb = ath12k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
if (IS_ERR(tb)) {
ret = PTR_ERR(tb);
ath12k_warn(ab, "failed to parse tlv: %d\n", ret);
@@ -6662,6 +6790,70 @@ static void ath12k_rfkill_state_change_event(struct ath12k_base *ab,
kfree(tb);
}
+static void
+ath12k_wmi_diag_event(struct ath12k_base *ab, struct sk_buff *skb)
+{
+ trace_ath12k_wmi_diag(ab, skb->data, skb->len);
+}
+
+static void ath12k_wmi_twt_enable_event(struct ath12k_base *ab,
+ struct sk_buff *skb)
+{
+ const void **tb;
+ const struct wmi_twt_enable_event *ev;
+ int ret;
+
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
+ if (IS_ERR(tb)) {
+ ret = PTR_ERR(tb);
+ ath12k_warn(ab, "failed to parse wmi twt enable status event tlv: %d\n",
+ ret);
+ return;
+ }
+
+ ev = tb[WMI_TAG_TWT_ENABLE_COMPLETE_EVENT];
+ if (!ev) {
+ ath12k_warn(ab, "failed to fetch twt enable wmi event\n");
+ goto exit;
+ }
+
+ ath12k_dbg(ab, ATH12K_DBG_MAC, "wmi twt enable event pdev id %u status %u\n",
+ le32_to_cpu(ev->pdev_id),
+ le32_to_cpu(ev->status));
+
+exit:
+ kfree(tb);
+}
+
+static void ath12k_wmi_twt_disable_event(struct ath12k_base *ab,
+ struct sk_buff *skb)
+{
+ const void **tb;
+ const struct wmi_twt_disable_event *ev;
+ int ret;
+
+ tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC);
+ if (IS_ERR(tb)) {
+ ret = PTR_ERR(tb);
+ ath12k_warn(ab, "failed to parse wmi twt disable status event tlv: %d\n",
+ ret);
+ return;
+ }
+
+ ev = tb[WMI_TAG_TWT_DISABLE_COMPLETE_EVENT];
+ if (!ev) {
+ ath12k_warn(ab, "failed to fetch twt disable wmi event\n");
+ goto exit;
+ }
+
+ ath12k_dbg(ab, ATH12K_DBG_MAC, "wmi twt disable event pdev id %d status %u\n",
+ le32_to_cpu(ev->pdev_id),
+ le32_to_cpu(ev->status));
+
+exit:
+ kfree(tb);
+}
+
static void ath12k_wmi_op_rx(struct ath12k_base *ab, struct sk_buff *skb)
{
struct wmi_cmd_hdr *cmd_hdr;
@@ -6757,11 +6949,18 @@ static void ath12k_wmi_op_rx(struct ath12k_base *ab, struct sk_buff *skb)
case WMI_RFKILL_STATE_CHANGE_EVENTID:
ath12k_rfkill_state_change_event(ab, skb);
break;
+ case WMI_TWT_ENABLE_EVENTID:
+ ath12k_wmi_twt_enable_event(ab, skb);
+ break;
+ case WMI_TWT_DISABLE_EVENTID:
+ ath12k_wmi_twt_disable_event(ab, skb);
+ break;
+ case WMI_P2P_NOA_EVENTID:
+ ath12k_wmi_p2p_noa_event(ab, skb);
+ break;
/* add Unsupported events here */
case WMI_TBTTOFFSET_EXT_UPDATE_EVENTID:
case WMI_PEER_OPER_MODE_CHANGE_EVENTID:
- case WMI_TWT_ENABLE_EVENTID:
- case WMI_TWT_DISABLE_EVENTID:
case WMI_PDEV_DMA_RING_CFG_RSP_EVENTID:
ath12k_dbg(ab, ATH12K_DBG_WMI,
"ignoring unsupported event 0x%x\n", id);
@@ -6772,6 +6971,9 @@ static void ath12k_wmi_op_rx(struct ath12k_base *ab, struct sk_buff *skb)
case WMI_VDEV_DELETE_RESP_EVENTID:
ath12k_vdev_delete_resp_event(ab, skb);
break;
+ case WMI_DIAG_EVENTID:
+ ath12k_wmi_diag_event(ab, skb);
+ break;
/* TODO: Add remaining events */
default:
ath12k_dbg(ab, ATH12K_DBG_WMI, "Unknown eventid: 0x%x\n", id);
diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h
index 06e5b9b4049b..103462feb935 100644
--- a/drivers/net/wireless/ath/ath12k/wmi.h
+++ b/drivers/net/wireless/ath/ath12k/wmi.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH12K_WMI_H
@@ -168,10 +168,6 @@ struct wmi_tlv {
#define WLAN_SCAN_MAX_HINT_BSSID 10
#define MAX_RNR_BSS 5
-#define WLAN_SCAN_PARAMS_MAX_SSID 16
-#define WLAN_SCAN_PARAMS_MAX_BSSID 4
-#define WLAN_SCAN_PARAMS_MAX_IE_LEN 256
-
#define WMI_APPEND_TO_EXISTING_CHAN_LIST_FLAG 1
#define WMI_BA_MODE_BUFFER_SIZE_256 3
@@ -2163,6 +2159,10 @@ enum wmi_tlv_service {
WMI_TLV_SERVICE_11BE = 289,
+ WMI_TLV_SERVICE_WMSK_COMPACTION_RX_TLVS = 361,
+
+ WMI_TLV_SERVICE_PEER_METADATA_V1A_V1B_SUPPORT = 365,
+
WMI_MAX_EXT2_SERVICE,
};
@@ -2350,6 +2350,7 @@ struct ath12k_wmi_resource_config_arg {
u32 twt_ap_pdev_count;
u32 twt_ap_sta_count;
bool is_reg_cc_ext_event_supported;
+ u8 dp_peer_meta_data_ver;
};
struct ath12k_wmi_init_cmd_arg {
@@ -2402,6 +2403,7 @@ struct wmi_init_cmd {
} __packed;
#define WMI_RSRC_CFG_HOST_SVC_FLAG_REG_CC_EXT_SUPPORT_BIT 4
+#define WMI_RSRC_CFG_FLAGS2_RX_PEER_METADATA_VERSION GENMASK(5, 4)
struct ath12k_wmi_resource_config_params {
__le32 tlv_header;
@@ -2542,9 +2544,17 @@ struct ath12k_wmi_hw_mode_cap_params {
#define WMI_MAX_HECAP_PHY_SIZE (3)
+/* pdev_id is present in lower 16 bits of pdev_and_hw_link_ids in
+ * ath12k_wmi_mac_phy_caps_params & ath12k_wmi_caps_ext_params.
+ *
+ * hw_link_id is present in higher 16 bits of pdev_and_hw_link_ids.
+ */
+#define WMI_CAPS_PARAMS_PDEV_ID GENMASK(15, 0)
+#define WMI_CAPS_PARAMS_HW_LINK_ID GENMASK(31, 16)
+
struct ath12k_wmi_mac_phy_caps_params {
__le32 hw_mode_id;
- __le32 pdev_id;
+ __le32 pdev_and_hw_link_ids;
__le32 phy_id;
__le32 supported_flags;
__le32 supported_bands;
@@ -2636,13 +2646,7 @@ struct wmi_service_ready_ext2_event {
struct ath12k_wmi_caps_ext_params {
__le32 hw_mode_id;
- union {
- struct {
- __le16 pdev_id;
- __le16 hw_link_id;
- } __packed ath12k_wmi_pdev_to_link_map;
- __le32 pdev_id;
- };
+ __le32 pdev_and_hw_link_ids;
__le32 phy_id;
__le32 wireless_modes_ext;
__le32 eht_cap_mac_info_2ghz[WMI_MAX_EHTCAP_MAC_SIZE];
@@ -2716,6 +2720,9 @@ struct wmi_vdev_create_cmd {
struct ath12k_wmi_mac_addr_params vdev_macaddr;
__le32 num_cfg_txrx_streams;
__le32 pdev_id;
+ __le32 mbssid_flags;
+ __le32 mbssid_tx_vdev_id;
+ __le32 vdev_stats_id_valid;
__le32 vdev_stats_id;
} __packed;
@@ -2764,6 +2771,10 @@ struct ath12k_wmi_ssid_params {
#define ATH12K_VDEV_SETUP_TIMEOUT_HZ (5 * HZ)
+enum wmi_vdev_mbssid_flags {
+ WMI_VDEV_MBSSID_FLAGS_NON_MBSSID_AP = BIT(0),
+};
+
struct wmi_vdev_start_request_cmd {
__le32 tlv_header;
__le32 vdev_id;
@@ -2782,7 +2793,7 @@ struct wmi_vdev_start_request_cmd {
__le32 cac_duration_ms;
__le32 regdomain;
__le32 min_data_rate;
- __le32 mbssid_flags;
+ __le32 mbssid_flags; /* uses enum wmi_vdev_mbssid_flags */
__le32 mbssid_tx_vdev_id;
__le32 eht_ops;
__le32 punct_bitmap;
@@ -3146,7 +3157,7 @@ struct ath12k_wmi_element_info_arg {
#define WLAN_SCAN_PARAMS_MAX_SSID 16
#define WLAN_SCAN_PARAMS_MAX_BSSID 4
-#define WLAN_SCAN_PARAMS_MAX_IE_LEN 256
+#define WLAN_SCAN_PARAMS_MAX_IE_LEN 512
/* Values lower than this may be refused by some firmware revisions with a scan
* completion with a timedout reason.
@@ -3270,24 +3281,19 @@ struct ath12k_wmi_scan_req_arg {
u32 vdev_id;
u32 pdev_id;
enum wmi_scan_priority scan_priority;
- union {
- struct {
- u32 scan_ev_started:1,
- scan_ev_completed:1,
- scan_ev_bss_chan:1,
- scan_ev_foreign_chan:1,
- scan_ev_dequeued:1,
- scan_ev_preempted:1,
- scan_ev_start_failed:1,
- scan_ev_restarted:1,
- scan_ev_foreign_chn_exit:1,
- scan_ev_invalid:1,
- scan_ev_gpio_timeout:1,
- scan_ev_suspended:1,
- scan_ev_resumed:1;
- };
- u32 scan_events;
- };
+ u32 scan_ev_started:1,
+ scan_ev_completed:1,
+ scan_ev_bss_chan:1,
+ scan_ev_foreign_chan:1,
+ scan_ev_dequeued:1,
+ scan_ev_preempted:1,
+ scan_ev_start_failed:1,
+ scan_ev_restarted:1,
+ scan_ev_foreign_chn_exit:1,
+ scan_ev_invalid:1,
+ scan_ev_gpio_timeout:1,
+ scan_ev_suspended:1,
+ scan_ev_resumed:1;
u32 dwell_time_active;
u32 dwell_time_active_2g;
u32 dwell_time_passive;
@@ -3300,36 +3306,31 @@ struct ath12k_wmi_scan_req_arg {
u32 idle_time;
u32 max_scan_time;
u32 probe_delay;
- union {
- struct {
- u32 scan_f_passive:1,
- scan_f_bcast_probe:1,
- scan_f_cck_rates:1,
- scan_f_ofdm_rates:1,
- scan_f_chan_stat_evnt:1,
- scan_f_filter_prb_req:1,
- scan_f_bypass_dfs_chn:1,
- scan_f_continue_on_err:1,
- scan_f_offchan_mgmt_tx:1,
- scan_f_offchan_data_tx:1,
- scan_f_promisc_mode:1,
- scan_f_capture_phy_err:1,
- scan_f_strict_passive_pch:1,
- scan_f_half_rate:1,
- scan_f_quarter_rate:1,
- scan_f_force_active_dfs_chn:1,
- scan_f_add_tpc_ie_in_probe:1,
- scan_f_add_ds_ie_in_probe:1,
- scan_f_add_spoofed_mac_in_probe:1,
- scan_f_add_rand_seq_in_probe:1,
- scan_f_en_ie_whitelist_in_probe:1,
- scan_f_forced:1,
- scan_f_2ghz:1,
- scan_f_5ghz:1,
- scan_f_80mhz:1;
- };
- u32 scan_flags;
- };
+ u32 scan_f_passive:1,
+ scan_f_bcast_probe:1,
+ scan_f_cck_rates:1,
+ scan_f_ofdm_rates:1,
+ scan_f_chan_stat_evnt:1,
+ scan_f_filter_prb_req:1,
+ scan_f_bypass_dfs_chn:1,
+ scan_f_continue_on_err:1,
+ scan_f_offchan_mgmt_tx:1,
+ scan_f_offchan_data_tx:1,
+ scan_f_promisc_mode:1,
+ scan_f_capture_phy_err:1,
+ scan_f_strict_passive_pch:1,
+ scan_f_half_rate:1,
+ scan_f_quarter_rate:1,
+ scan_f_force_active_dfs_chn:1,
+ scan_f_add_tpc_ie_in_probe:1,
+ scan_f_add_ds_ie_in_probe:1,
+ scan_f_add_spoofed_mac_in_probe:1,
+ scan_f_add_rand_seq_in_probe:1,
+ scan_f_en_ie_whitelist_in_probe:1,
+ scan_f_forced:1,
+ scan_f_2ghz:1,
+ scan_f_5ghz:1,
+ scan_f_80mhz:1;
enum scan_dwelltime_adaptive_mode adaptive_dwell_time_mode;
u32 burst_duration;
u32 num_chan;
@@ -3489,6 +3490,37 @@ struct wmi_get_pdev_temperature_cmd {
__le32 pdev_id;
} __packed;
+#define WMI_P2P_MAX_NOA_DESCRIPTORS 4
+
+struct wmi_p2p_noa_event {
+ __le32 vdev_id;
+} __packed;
+
+struct ath12k_wmi_p2p_noa_descriptor {
+ __le32 type_count; /* 255: continuous schedule, 0: reserved */
+ __le32 duration; /* Absent period duration in micro seconds */
+ __le32 interval; /* Absent period interval in micro seconds */
+ __le32 start_time; /* 32 bit tsf time when in starts */
+} __packed;
+
+#define WMI_P2P_NOA_INFO_CHANGED_FLAG BIT(0)
+#define WMI_P2P_NOA_INFO_INDEX GENMASK(15, 8)
+#define WMI_P2P_NOA_INFO_OPP_PS BIT(16)
+#define WMI_P2P_NOA_INFO_CTWIN_TU GENMASK(23, 17)
+#define WMI_P2P_NOA_INFO_DESC_NUM GENMASK(31, 24)
+
+struct ath12k_wmi_p2p_noa_info {
+ /* Bit 0 - Flag to indicate an update in NOA schedule
+ * Bits 7-1 - Reserved
+ * Bits 15-8 - Index (identifies the instance of NOA sub element)
+ * Bit 16 - Opp PS state of the AP
+ * Bits 23-17 - Ctwindow in TUs
+ * Bits 31-24 - Number of NOA descriptors
+ */
+ __le32 noa_attr;
+ struct ath12k_wmi_p2p_noa_descriptor descriptors[WMI_P2P_MAX_NOA_DESCRIPTORS];
+} __packed;
+
#define WMI_BEACON_TX_BUFFER_SIZE 512
struct wmi_bcn_tmpl_cmd {
@@ -3503,6 +3535,12 @@ struct wmi_bcn_tmpl_cmd {
__le32 esp_ie_offset;
} __packed;
+struct wmi_p2p_go_set_beacon_ie_cmd {
+ __le32 tlv_header;
+ __le32 vdev_id;
+ __le32 ie_buf_len;
+} __packed;
+
struct wmi_vdev_install_key_cmd {
__le32 tlv_header;
__le32 vdev_id;
@@ -4797,6 +4835,16 @@ struct wmi_rfkill_state_change_event {
__le32 radio_state;
} __packed;
+struct wmi_twt_enable_event {
+ __le32 pdev_id;
+ __le32 status;
+} __packed;
+
+struct wmi_twt_disable_event {
+ __le32 pdev_id;
+ __le32 status;
+} __packed;
+
void ath12k_wmi_init_qcn9274(struct ath12k_base *ab,
struct ath12k_wmi_resource_config_arg *config);
void ath12k_wmi_init_wcn7850(struct ath12k_base *ab,
@@ -4806,6 +4854,8 @@ int ath12k_wmi_cmd_send(struct ath12k_wmi_pdev *wmi, struct sk_buff *skb,
struct sk_buff *ath12k_wmi_alloc_skb(struct ath12k_wmi_base *wmi_sc, u32 len);
int ath12k_wmi_mgmt_send(struct ath12k *ar, u32 vdev_id, u32 buf_id,
struct sk_buff *frame);
+int ath12k_wmi_p2p_go_bcn_ie(struct ath12k *ar, u32 vdev_id,
+ const u8 *p2p_ie);
int ath12k_wmi_bcn_tmpl(struct ath12k *ar, u32 vdev_id,
struct ieee80211_mutable_offsets *offs,
struct sk_buff *bcn);
@@ -4917,4 +4967,30 @@ int ath12k_wmi_probe_resp_tmpl(struct ath12k *ar, u32 vdev_id,
int ath12k_wmi_set_hw_mode(struct ath12k_base *ab,
enum wmi_host_hw_mode_config_type mode);
+static inline u32
+ath12k_wmi_caps_ext_get_pdev_id(const struct ath12k_wmi_caps_ext_params *param)
+{
+ return le32_get_bits(param->pdev_and_hw_link_ids, WMI_CAPS_PARAMS_PDEV_ID);
+}
+
+static inline u32
+ath12k_wmi_caps_ext_get_hw_link_id(const struct ath12k_wmi_caps_ext_params *param)
+{
+ return le32_get_bits(param->pdev_and_hw_link_ids, WMI_CAPS_PARAMS_HW_LINK_ID);
+}
+
+static inline u32
+ath12k_wmi_mac_phy_get_pdev_id(const struct ath12k_wmi_mac_phy_caps_params *param)
+{
+ return le32_get_bits(param->pdev_and_hw_link_ids,
+ WMI_CAPS_PARAMS_PDEV_ID);
+}
+
+static inline u32
+ath12k_wmi_mac_phy_get_hw_link_id(const struct ath12k_wmi_mac_phy_caps_params *param)
+{
+ return le32_get_bits(param->pdev_and_hw_link_ids,
+ WMI_CAPS_PARAMS_HW_LINK_ID);
+}
+
#endif
diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
index c630343ca4f9..eea4bda77608 100644
--- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c
+++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
@@ -779,6 +779,10 @@ static int ath5k_set_ringparam(struct ieee80211_hw *hw, u32 tx, u32 rx)
const struct ieee80211_ops ath5k_hw_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = ath5k_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = ath5k_start,
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index e37db4af33de..61b2e3f15f0e 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1119,7 +1119,7 @@ void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq,
NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT);
wiphy_lock(vif->ar->wiphy);
- cfg80211_ch_switch_notify(vif->ndev, &chandef, 0, 0);
+ cfg80211_ch_switch_notify(vif->ndev, &chandef, 0);
wiphy_unlock(vif->ar->wiphy);
}
diff --git a/drivers/net/wireless/ath/ath9k/ahb.c b/drivers/net/wireless/ath/ath9k/ahb.c
index 9bfaadfa6c00..1a6697b6e3b4 100644
--- a/drivers/net/wireless/ath/ath9k/ahb.c
+++ b/drivers/net/wireless/ath/ath9k/ahb.c
@@ -144,7 +144,7 @@ static int ath_ahb_probe(struct platform_device *pdev)
return ret;
}
-static int ath_ahb_remove(struct platform_device *pdev)
+static void ath_ahb_remove(struct platform_device *pdev)
{
struct ieee80211_hw *hw = platform_get_drvdata(pdev);
@@ -155,13 +155,11 @@ static int ath_ahb_remove(struct platform_device *pdev)
free_irq(sc->irq, sc);
ieee80211_free_hw(sc->hw);
}
-
- return 0;
}
static struct platform_driver ath_ahb_driver = {
.probe = ath_ahb_probe,
- .remove = ath_ahb_remove,
+ .remove_new = ath_ahb_remove,
.driver = {
.name = "ath9k",
},
diff --git a/drivers/net/wireless/ath/ath9k/antenna.c b/drivers/net/wireless/ath/ath9k/antenna.c
index 988222cea9df..acc84e6711b0 100644
--- a/drivers/net/wireless/ath/ath9k/antenna.c
+++ b/drivers/net/wireless/ath/ath9k/antenna.c
@@ -643,7 +643,7 @@ static void ath_ant_try_scan(struct ath_ant_comb *antcomb,
conf->main_lna_conf = ATH_ANT_DIV_COMB_LNA1;
conf->alt_lna_conf = ATH_ANT_DIV_COMB_LNA1_PLUS_LNA2;
} else if (antcomb->rssi_sub >
- antcomb->rssi_lna1) {
+ antcomb->rssi_lna2) {
/* set to A-B */
conf->main_lna_conf = ATH_ANT_DIV_COMB_LNA1;
conf->alt_lna_conf = ATH_ANT_DIV_COMB_LNA1_MINUS_LNA2;
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h
index 57e2b4c89125..ad72a30b67c3 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h
+++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h
@@ -851,8 +851,6 @@
#define AR_PHY_TXGAIN_FORCED_TXBB1DBGAIN 0x0000000e
#define AR_PHY_TXGAIN_FORCED_TXBB1DBGAIN_S 1
-#define AR_PHY_POWER_TX_RATE1 0x9934
-#define AR_PHY_POWER_TX_RATE2 0x9938
#define AR_PHY_POWER_TX_RATE_MAX 0x993c
#define AR_PHY_POWER_TX_RATE_MAX_TPC_ENABLE 0x00000040
#define PHY_AGC_CLR 0x10000000
@@ -1041,13 +1039,6 @@
#define AR_PHY_TX_IQCAL_STATUS_B2_FAILED 0x00000001
-/*
- * AGC 3 Register Map
- */
-#define AR_AGC3_BASE 0xce00
-
-#define AR_PHY_RSSI_3 (AR_AGC3_BASE + 0x180)
-
/* GLB Registers */
#define AR_GLB_BASE 0x20000
#define AR_GLB_GPIO_CONTROL (AR_GLB_BASE)
diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c
index ee72faac2f1d..b399a7926ef5 100644
--- a/drivers/net/wireless/ath/ath9k/beacon.c
+++ b/drivers/net/wireless/ath/ath9k/beacon.c
@@ -365,10 +365,10 @@ bool ath9k_csa_is_finished(struct ath_softc *sc, struct ieee80211_vif *vif)
if (!vif || !vif->bss_conf.csa_active)
return false;
- if (!ieee80211_beacon_cntdwn_is_complete(vif))
+ if (!ieee80211_beacon_cntdwn_is_complete(vif, 0))
return false;
- ieee80211_csa_finish(vif);
+ ieee80211_csa_finish(vif, 0);
return true;
}
diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h
index 237f4ec2cffd..6c33e898b300 100644
--- a/drivers/net/wireless/ath/ath9k/htc.h
+++ b/drivers/net/wireless/ath/ath9k/htc.h
@@ -306,7 +306,6 @@ struct ath9k_htc_tx {
DECLARE_BITMAP(tx_slot, MAX_TX_BUF_NUM);
struct timer_list cleanup_timer;
spinlock_t tx_lock;
- bool initialized;
};
struct ath9k_htc_tx_ctl {
@@ -515,6 +514,7 @@ struct ath9k_htc_priv {
unsigned long ps_usecount;
bool ps_enabled;
bool ps_idle;
+ bool initialized;
#ifdef CONFIG_MAC80211_LEDS
enum led_brightness brightness;
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
index 533471e69400..547634f82183 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
@@ -514,10 +514,10 @@ bool ath9k_htc_csa_is_finished(struct ath9k_htc_priv *priv)
if (!vif || !vif->bss_conf.csa_active)
return false;
- if (!ieee80211_beacon_cntdwn_is_complete(vif))
+ if (!ieee80211_beacon_cntdwn_is_complete(vif, 0))
return false;
- ieee80211_csa_finish(vif);
+ ieee80211_csa_finish(vif, 0);
priv->csa_vif = NULL;
return true;
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index 0aa5bdeb44a1..3633f9eb2c55 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -966,6 +966,10 @@ int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev,
htc_handle->drv_priv = priv;
+ /* Allow ath9k_wmi_event_tasklet() to operate. */
+ smp_wmb();
+ priv->initialized = true;
+
return 0;
err_init:
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
index 9a9b5212051a..b389e19381c4 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
@@ -1868,6 +1868,10 @@ static void ath9k_htc_channel_switch_beacon(struct ieee80211_hw *hw,
}
struct ieee80211_ops ath9k_htc_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = ath9k_htc_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = ath9k_htc_start,
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
index efcaeccb055a..ce9c04e418b8 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
@@ -815,10 +815,6 @@ int ath9k_tx_init(struct ath9k_htc_priv *priv)
skb_queue_head_init(&priv->tx.data_vo_queue);
skb_queue_head_init(&priv->tx.tx_failed);
- /* Allow ath9k_wmi_event_tasklet(WMI_TXSTATUS_EVENTID) to operate. */
- smp_wmb();
- priv->tx.initialized = true;
-
return 0;
}
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index c48ff0ffbfef..a2943aaecb20 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -2786,6 +2786,10 @@ static int ath9k_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
}
struct ieee80211_ops ath9k_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = ath9k_tx,
.start = ath9k_start,
.stop = ath9k_stop,
diff --git a/drivers/net/wireless/ath/ath9k/reg_aic.h b/drivers/net/wireless/ath/ath9k/reg_aic.h
index 955147ab48a2..f50994910eae 100644
--- a/drivers/net/wireless/ath/ath9k/reg_aic.h
+++ b/drivers/net/wireless/ath/ath9k/reg_aic.h
@@ -17,10 +17,6 @@
#ifndef REG_AIC_H
#define REG_AIC_H
-#define AR_SM_BASE 0xa200
-#define AR_SM1_BASE 0xb200
-#define AR_AGC_BASE 0x9e00
-
#define AR_PHY_AIC_CTRL_0_B0 (AR_SM_BASE + 0x4b0)
#define AR_PHY_AIC_CTRL_1_B0 (AR_SM_BASE + 0x4b4)
#define AR_PHY_AIC_CTRL_2_B0 (AR_SM_BASE + 0x4b8)
diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c
index 1476b42b52a9..805ad31edba2 100644
--- a/drivers/net/wireless/ath/ath9k/wmi.c
+++ b/drivers/net/wireless/ath/ath9k/wmi.c
@@ -155,6 +155,12 @@ void ath9k_wmi_event_tasklet(struct tasklet_struct *t)
}
spin_unlock_irqrestore(&wmi->wmi_lock, flags);
+ /* Check if ath9k_htc_probe_device() completed. */
+ if (!data_race(priv->initialized)) {
+ kfree_skb(skb);
+ continue;
+ }
+
hdr = (struct wmi_cmd_hdr *) skb->data;
cmd_id = be16_to_cpu(hdr->command_id);
wmi_event = skb_pull(skb, sizeof(struct wmi_cmd_hdr));
@@ -169,10 +175,6 @@ void ath9k_wmi_event_tasklet(struct tasklet_struct *t)
&wmi->drv_priv->fatal_work);
break;
case WMI_TXSTATUS_EVENTID:
- /* Check if ath9k_tx_init() completed. */
- if (!data_race(priv->tx.initialized))
- break;
-
spin_lock_bh(&priv->tx.tx_lock);
if (priv->tx.flags & ATH9K_HTC_OP_TX_DRAIN) {
spin_unlock_bh(&priv->tx.tx_lock);
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index f15684379b03..d519b676a109 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -369,12 +369,11 @@ static void ath_tid_drain(struct ath_softc *sc, struct ath_txq *txq,
struct list_head bf_head;
struct ath_tx_status ts;
struct ath_frame_info *fi;
- int ret;
memset(&ts, 0, sizeof(ts));
INIT_LIST_HEAD(&bf_head);
- while ((ret = ath_tid_dequeue(tid, &skb)) == 0) {
+ while (ath_tid_dequeue(tid, &skb) == 0) {
fi = get_frame_info(skb);
bf = fi->bf;
diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index 524327d24964..7e7797bf44b7 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -1712,6 +1712,10 @@ static bool carl9170_tx_frames_pending(struct ieee80211_hw *hw)
}
static const struct ieee80211_ops carl9170_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.start = carl9170_op_start,
.stop = carl9170_op_stop,
.tx = carl9170_op_tx,
diff --git a/drivers/net/wireless/ath/carl9170/tx.c b/drivers/net/wireless/ath/carl9170/tx.c
index 6bb9aa2bfe65..e902ca80eba7 100644
--- a/drivers/net/wireless/ath/carl9170/tx.c
+++ b/drivers/net/wireless/ath/carl9170/tx.c
@@ -189,7 +189,7 @@ static void carl9170_tx_accounting_free(struct ar9170 *ar, struct sk_buff *skb)
static int carl9170_alloc_dev_space(struct ar9170 *ar, struct sk_buff *skb)
{
- struct _carl9170_tx_superframe *super = (void *) skb->data;
+ struct _carl9170_tx_superframe *super;
unsigned int chunks;
int cookie = -1;
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 41119fb177e3..bfbd3c7a70b3 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1347,6 +1347,10 @@ static void wcn36xx_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif
}
static const struct ieee80211_ops wcn36xx_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.start = wcn36xx_start,
.stop = wcn36xx_stop,
.add_interface = wcn36xx_add_interface,
@@ -1685,6 +1689,7 @@ static struct platform_driver wcn36xx_driver = {
module_platform_driver(wcn36xx_driver);
+MODULE_DESCRIPTION("Qualcomm Atheros WCN3660/3680 wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Eugene Krasnikov k.eugene.e@gmail.com");
MODULE_FIRMWARE(WLAN_NV_FILE);
diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c
index 447b51cff8f9..0b55a272bfd6 100644
--- a/drivers/net/wireless/atmel/at76c50x-usb.c
+++ b/drivers/net/wireless/atmel/at76c50x-usb.c
@@ -2178,6 +2178,10 @@ static int at76_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
}
static const struct ieee80211_ops at76_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = at76_mac80211_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.add_interface = at76_add_interface,
diff --git a/drivers/net/wireless/broadcom/b43/b43.h b/drivers/net/wireless/broadcom/b43/b43.h
index 67b4bac048e5..c0d8fc0b22fb 100644
--- a/drivers/net/wireless/broadcom/b43/b43.h
+++ b/drivers/net/wireless/broadcom/b43/b43.h
@@ -1082,6 +1082,22 @@ static inline bool b43_using_pio_transfers(struct b43_wldev *dev)
return dev->__using_pio_transfers;
}
+static inline void b43_wake_queue(struct b43_wldev *dev, int queue_prio)
+{
+ if (dev->qos_enabled)
+ ieee80211_wake_queue(dev->wl->hw, queue_prio);
+ else
+ ieee80211_wake_queue(dev->wl->hw, 0);
+}
+
+static inline void b43_stop_queue(struct b43_wldev *dev, int queue_prio)
+{
+ if (dev->qos_enabled)
+ ieee80211_stop_queue(dev->wl->hw, queue_prio);
+ else
+ ieee80211_stop_queue(dev->wl->hw, 0);
+}
+
/* Message printing */
__printf(2, 3) void b43info(struct b43_wl *wl, const char *fmt, ...);
__printf(2, 3) void b43err(struct b43_wl *wl, const char *fmt, ...);
diff --git a/drivers/net/wireless/broadcom/b43/dma.c b/drivers/net/wireless/broadcom/b43/dma.c
index 760d1a28edc6..6ac7dcebfff9 100644
--- a/drivers/net/wireless/broadcom/b43/dma.c
+++ b/drivers/net/wireless/broadcom/b43/dma.c
@@ -1399,7 +1399,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
should_inject_overflow(ring)) {
/* This TX ring is full. */
unsigned int skb_mapping = skb_get_queue_mapping(skb);
- ieee80211_stop_queue(dev->wl->hw, skb_mapping);
+ b43_stop_queue(dev, skb_mapping);
dev->wl->tx_queue_stopped[skb_mapping] = true;
ring->stopped = true;
if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
@@ -1570,7 +1570,7 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
} else {
/* If the driver queue is running wake the corresponding
* mac80211 queue. */
- ieee80211_wake_queue(dev->wl->hw, ring->queue_prio);
+ b43_wake_queue(dev, ring->queue_prio);
if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
b43dbg(dev->wl, "Woke up TX ring %d\n", ring->index);
}
diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c
index 92ca0b2ca286..badb2f494035 100644
--- a/drivers/net/wireless/broadcom/b43/main.c
+++ b/drivers/net/wireless/broadcom/b43/main.c
@@ -2587,7 +2587,8 @@ static void b43_request_firmware(struct work_struct *work)
start_ieee80211:
wl->hw->queues = B43_QOS_QUEUE_NUM;
- if (!modparam_qos || dev->fw.opensource)
+ if (!modparam_qos || dev->fw.opensource ||
+ dev->dev->chip_id == BCMA_CHIP_ID_BCM4331)
wl->hw->queues = 1;
err = ieee80211_register_hw(wl->hw);
@@ -3603,7 +3604,7 @@ static void b43_tx_work(struct work_struct *work)
err = b43_dma_tx(dev, skb);
if (err == -ENOSPC) {
wl->tx_queue_stopped[queue_num] = true;
- ieee80211_stop_queue(wl->hw, queue_num);
+ b43_stop_queue(dev, queue_num);
skb_queue_head(&wl->tx_queue[queue_num], skb);
break;
}
@@ -3627,6 +3628,7 @@ static void b43_op_tx(struct ieee80211_hw *hw,
struct sk_buff *skb)
{
struct b43_wl *wl = hw_to_b43_wl(hw);
+ u16 skb_queue_mapping;
if (unlikely(skb->len < 2 + 2 + 6)) {
/* Too short, this can't be a valid frame. */
@@ -3635,12 +3637,12 @@ static void b43_op_tx(struct ieee80211_hw *hw,
}
B43_WARN_ON(skb_shinfo(skb)->nr_frags);
- skb_queue_tail(&wl->tx_queue[skb->queue_mapping], skb);
- if (!wl->tx_queue_stopped[skb->queue_mapping]) {
+ skb_queue_mapping = skb_get_queue_mapping(skb);
+ skb_queue_tail(&wl->tx_queue[skb_queue_mapping], skb);
+ if (!wl->tx_queue_stopped[skb_queue_mapping])
ieee80211_queue_work(wl->hw, &wl->tx_work);
- } else {
- ieee80211_stop_queue(wl->hw, skb->queue_mapping);
- }
+ else
+ b43_stop_queue(wl->current_dev, skb_queue_mapping);
}
static void b43_qos_params_upload(struct b43_wldev *dev,
@@ -5170,6 +5172,10 @@ static int b43_op_get_survey(struct ieee80211_hw *hw, int idx,
}
static const struct ieee80211_ops b43_hw_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = b43_op_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.conf_tx = b43_op_conf_tx,
diff --git a/drivers/net/wireless/broadcom/b43/phy_ht.c b/drivers/net/wireless/broadcom/b43/phy_ht.c
index d050971d150a..26a226126bc4 100644
--- a/drivers/net/wireless/broadcom/b43/phy_ht.c
+++ b/drivers/net/wireless/broadcom/b43/phy_ht.c
@@ -322,8 +322,8 @@ static void b43_phy_ht_bphy_reset(struct b43_wldev *dev, bool reset)
B43_PHY_B_BBCFG_RSTCCA | B43_PHY_B_BBCFG_RSTRX);
else
b43_phy_mask(dev, B43_PHY_B_BBCFG,
- (u16)~(B43_PHY_B_BBCFG_RSTCCA |
- B43_PHY_B_BBCFG_RSTRX));
+ 0xffff & ~(B43_PHY_B_BBCFG_RSTCCA |
+ B43_PHY_B_BBCFG_RSTRX));
b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp);
}
@@ -551,7 +551,7 @@ static void b43_phy_ht_tx_power_ctl(struct b43_wldev *dev, bool enable)
phy_ht->tx_pwr_idx[i] =
b43_phy_read(dev, status_regs[i]);
}
- b43_phy_mask(dev, B43_PHY_HT_TXPCTL_CMD_C1, ~en_bits);
+ b43_phy_mask(dev, B43_PHY_HT_TXPCTL_CMD_C1, 0xffff & ~en_bits);
} else {
b43_phy_set(dev, B43_PHY_HT_TXPCTL_CMD_C1, en_bits);
diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c
index 2c0c019a815d..4bb005b93f2c 100644
--- a/drivers/net/wireless/broadcom/b43/phy_n.c
+++ b/drivers/net/wireless/broadcom/b43/phy_n.c
@@ -6246,7 +6246,7 @@ static void b43_nphy_channel_setup(struct b43_wldev *dev,
b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
/* Take BPHY out of the reset */
b43_phy_mask(dev, B43_PHY_B_BBCFG,
- (u16)~(B43_PHY_B_BBCFG_RSTCCA | B43_PHY_B_BBCFG_RSTRX));
+ ~(B43_PHY_B_BBCFG_RSTCCA | B43_PHY_B_BBCFG_RSTRX) & 0xffff);
b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
}
@@ -6377,7 +6377,7 @@ static int b43_nphy_set_channel(struct b43_wldev *dev,
} else if (channel_type == NL80211_CHAN_HT40MINUS) {
b43_phy_mask(dev, B43_NPHY_RXCTL, ~B43_NPHY_RXCTL_BSELU20);
if (phy->rev >= 7)
- b43_phy_mask(dev, 0x310, (u16)~0x8000);
+ b43_phy_mask(dev, 0x310, 0x7fff);
}
if (phy->rev >= 19) {
diff --git a/drivers/net/wireless/broadcom/b43/pio.c b/drivers/net/wireless/broadcom/b43/pio.c
index 0cf70fdb60a6..e41f2f5b4c26 100644
--- a/drivers/net/wireless/broadcom/b43/pio.c
+++ b/drivers/net/wireless/broadcom/b43/pio.c
@@ -525,7 +525,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb)
if (total_len > (q->buffer_size - q->buffer_used)) {
/* Not enough memory on the queue. */
err = -EBUSY;
- ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb));
+ b43_stop_queue(dev, skb_get_queue_mapping(skb));
q->stopped = true;
goto out;
}
@@ -552,7 +552,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb)
if (((q->buffer_size - q->buffer_used) < roundup(2 + 2 + 6, 4)) ||
(q->free_packet_slots == 0)) {
/* The queue is full. */
- ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb));
+ b43_stop_queue(dev, skb_get_queue_mapping(skb));
q->stopped = true;
}
@@ -587,7 +587,7 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev,
list_add(&pack->list, &q->packets_list);
if (q->stopped) {
- ieee80211_wake_queue(dev->wl->hw, q->queue_prio);
+ b43_wake_queue(dev, q->queue_prio);
q->stopped = false;
}
}
diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c
index 760136638a95..18eb610f600a 100644
--- a/drivers/net/wireless/broadcom/b43legacy/main.c
+++ b/drivers/net/wireless/broadcom/b43legacy/main.c
@@ -3531,6 +3531,10 @@ static int b43legacy_op_get_survey(struct ieee80211_hw *hw, int idx,
}
static const struct ieee80211_ops b43legacy_hw_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = b43legacy_op_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.conf_tx = b43legacy_op_conf_tx,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/core.c
index ac3a36fa3640..f471c962104a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/core.c
@@ -7,21 +7,33 @@
#include <core.h>
#include <bus.h>
#include <fwvid.h>
+#include <feature.h>
#include "vops.h"
-static int brcmf_bca_attach(struct brcmf_pub *drvr)
+#define BRCMF_BCA_E_LAST 212
+
+static void brcmf_bca_feat_attach(struct brcmf_if *ifp)
{
- pr_err("%s: executing\n", __func__);
- return 0;
+ /* SAE support not confirmed so disabling for now */
+ ifp->drvr->feat_flags &= ~BIT(BRCMF_FEAT_SAE);
}
-static void brcmf_bca_detach(struct brcmf_pub *drvr)
+static int brcmf_bca_alloc_fweh_info(struct brcmf_pub *drvr)
{
- pr_err("%s: executing\n", __func__);
+ struct brcmf_fweh_info *fweh;
+
+ fweh = kzalloc(struct_size(fweh, evt_handler, BRCMF_BCA_E_LAST),
+ GFP_KERNEL);
+ if (!fweh)
+ return -ENOMEM;
+
+ fweh->num_event_codes = BRCMF_BCA_E_LAST;
+ drvr->fweh = fweh;
+ return 0;
}
const struct brcmf_fwvid_ops brcmf_bca_ops = {
- .attach = brcmf_bca_attach,
- .detach = brcmf_bca_detach,
+ .feat_attach = brcmf_bca_feat_attach,
+ .alloc_fweh_info = brcmf_bca_alloc_fweh_info,
};
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c
index d55f3271d619..4f0c1e1a8e60 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c
@@ -20,6 +20,7 @@ static void __exit brcmf_bca_exit(void)
brcmf_fwvid_unregister_vendor(BRCMF_FWVENDOR_BCA, THIS_MODULE);
}
+MODULE_DESCRIPTION("Broadcom FullMAC WLAN driver plugin for Broadcom AP chipsets");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_IMPORT_NS(BRCMFMAC);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 133c5ea6429c..b99aa66dc5a9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -32,6 +32,7 @@
#include "vendor.h"
#include "bus.h"
#include "common.h"
+#include "fwvid.h"
#define BRCMF_SCAN_IE_LEN_MAX 2048
@@ -1179,8 +1180,7 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg,
scan_request = cfg->scan_request;
cfg->scan_request = NULL;
- if (timer_pending(&cfg->escan_timeout))
- del_timer_sync(&cfg->escan_timeout);
+ timer_delete_sync(&cfg->escan_timeout);
if (fw_abort) {
/* Do a scan abort to stop the driver's scan engine */
@@ -1687,52 +1687,39 @@ static u16 brcmf_map_fw_linkdown_reason(const struct brcmf_event_msg *e)
return reason;
}
-static int brcmf_set_pmk(struct brcmf_if *ifp, const u8 *pmk_data, u16 pmk_len)
+int brcmf_set_wsec(struct brcmf_if *ifp, const u8 *key, u16 key_len, u16 flags)
{
struct brcmf_pub *drvr = ifp->drvr;
struct brcmf_wsec_pmk_le pmk;
int err;
+ if (key_len > sizeof(pmk.key)) {
+ bphy_err(drvr, "key must be less than %zu bytes\n",
+ sizeof(pmk.key));
+ return -EINVAL;
+ }
+
memset(&pmk, 0, sizeof(pmk));
- /* pass pmk directly */
- pmk.key_len = cpu_to_le16(pmk_len);
- pmk.flags = cpu_to_le16(0);
- memcpy(pmk.key, pmk_data, pmk_len);
+ /* pass key material directly */
+ pmk.key_len = cpu_to_le16(key_len);
+ pmk.flags = cpu_to_le16(flags);
+ memcpy(pmk.key, key, key_len);
- /* store psk in firmware */
+ /* store key material in firmware */
err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SET_WSEC_PMK,
&pmk, sizeof(pmk));
if (err < 0)
bphy_err(drvr, "failed to change PSK in firmware (len=%u)\n",
- pmk_len);
+ key_len);
return err;
}
+BRCMF_EXPORT_SYMBOL_GPL(brcmf_set_wsec);
-static int brcmf_set_sae_password(struct brcmf_if *ifp, const u8 *pwd_data,
- u16 pwd_len)
+static int brcmf_set_pmk(struct brcmf_if *ifp, const u8 *pmk_data, u16 pmk_len)
{
- struct brcmf_pub *drvr = ifp->drvr;
- struct brcmf_wsec_sae_pwd_le sae_pwd;
- int err;
-
- if (pwd_len > BRCMF_WSEC_MAX_SAE_PASSWORD_LEN) {
- bphy_err(drvr, "sae_password must be less than %d\n",
- BRCMF_WSEC_MAX_SAE_PASSWORD_LEN);
- return -EINVAL;
- }
-
- sae_pwd.key_len = cpu_to_le16(pwd_len);
- memcpy(sae_pwd.key, pwd_data, pwd_len);
-
- err = brcmf_fil_iovar_data_set(ifp, "sae_password", &sae_pwd,
- sizeof(sae_pwd));
- if (err < 0)
- bphy_err(drvr, "failed to set SAE password in firmware (len=%u)\n",
- pwd_len);
-
- return err;
+ return brcmf_set_wsec(ifp, pmk_data, pmk_len, 0);
}
static void brcmf_link_down(struct brcmf_cfg80211_vif *vif, u16 reason,
@@ -2503,8 +2490,7 @@ brcmf_cfg80211_connect(struct wiphy *wiphy, struct net_device *ndev,
bphy_err(drvr, "failed to clean up user-space RSNE\n");
goto done;
}
- err = brcmf_set_sae_password(ifp, sme->crypto.sae_pwd,
- sme->crypto.sae_pwd_len);
+ err = brcmf_fwvid_set_sae_password(ifp, &sme->crypto);
if (!err && sme->crypto.psk)
err = brcmf_set_pmk(ifp, sme->crypto.psk,
BRCMF_WSEC_MAX_PSK_LEN);
@@ -3081,7 +3067,7 @@ brcmf_cfg80211_get_station_ibss(struct brcmf_if *ifp,
struct brcmf_scb_val_le scbval;
struct brcmf_pktcnt_le pktcnt;
s32 err;
- u32 rate;
+ u32 rate = 0;
u32 rssi;
/* Get the current tx rate */
@@ -3779,8 +3765,10 @@ static int brcmf_internal_escan_add_info(struct cfg80211_scan_request *req,
if (req->channels[i] == chan)
break;
}
- if (i == req->n_channels)
- req->channels[req->n_channels++] = chan;
+ if (i == req->n_channels) {
+ req->n_channels++;
+ req->channels[i] = chan;
+ }
for (i = 0; i < req->n_ssids; i++) {
if (req->ssids[i].ssid_len == ssid_len &&
@@ -4320,6 +4308,9 @@ brcmf_pmksa_v3_op(struct brcmf_if *ifp, struct cfg80211_pmksa *pmksa,
int ret;
pmk_op = kzalloc(sizeof(*pmk_op), GFP_KERNEL);
+ if (!pmk_op)
+ return -ENOMEM;
+
pmk_op->version = cpu_to_le16(BRCMF_PMKSA_VER_3);
if (!pmksa) {
@@ -5113,6 +5104,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
bool mbss;
int is_11d;
bool supports_11d;
+ bool closednet;
brcmf_dbg(TRACE, "ctrlchn=%d, center=%d, bw=%d, beacon_interval=%d, dtim_period=%d,\n",
settings->chandef.chan->hw_value,
@@ -5252,8 +5244,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
if (crypto->sae_pwd) {
brcmf_dbg(INFO, "using SAE offload\n");
profile->use_fwauth |= BIT(BRCMF_PROFILE_FWAUTH_SAE);
- err = brcmf_set_sae_password(ifp, crypto->sae_pwd,
- crypto->sae_pwd_len);
+ err = brcmf_fwvid_set_sae_password(ifp, crypto);
if (err < 0)
goto exit;
}
@@ -5283,12 +5274,12 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
goto exit;
}
- err = brcmf_fil_iovar_int_set(ifp, "closednet",
- settings->hidden_ssid);
+ closednet =
+ (settings->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE);
+ err = brcmf_fil_iovar_int_set(ifp, "closednet", closednet);
if (err) {
bphy_err(drvr, "%s closednet error (%d)\n",
- settings->hidden_ssid ?
- "enabled" : "disabled",
+ (closednet ? "enabled" : "disabled"),
err);
goto exit;
}
@@ -5360,10 +5351,12 @@ static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev,
msleep(400);
if (profile->use_fwauth != BIT(BRCMF_PROFILE_FWAUTH_NONE)) {
+ struct cfg80211_crypto_settings crypto = {};
+
if (profile->use_fwauth & BIT(BRCMF_PROFILE_FWAUTH_PSK))
brcmf_set_pmk(ifp, NULL, 0);
if (profile->use_fwauth & BIT(BRCMF_PROFILE_FWAUTH_SAE))
- brcmf_set_sae_password(ifp, NULL, 0);
+ brcmf_fwvid_set_sae_password(ifp, &crypto);
profile->use_fwauth = BIT(BRCMF_PROFILE_FWAUTH_NONE);
}
@@ -7269,7 +7262,7 @@ static int brcmf_setup_wiphybands(struct brcmf_cfg80211_info *cfg)
u32 nmode = 0;
u32 vhtmode = 0;
u32 bw_cap[2] = { WLC_BW_20MHZ_BIT, WLC_BW_20MHZ_BIT };
- u32 rxchain;
+ u32 rxchain = 0;
u32 nchain;
int err;
s32 i;
@@ -8435,6 +8428,7 @@ void brcmf_cfg80211_detach(struct brcmf_cfg80211_info *cfg)
brcmf_btcoex_detach(cfg);
wiphy_unregister(cfg->wiphy);
wl_deinit_priv(cfg);
+ cancel_work_sync(&cfg->escan_timeout_work);
brcmf_free_wiphy(cfg->wiphy);
kfree(cfg);
}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
index 0e1fa3f0dea2..dc3a6a537507 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
@@ -468,4 +468,6 @@ void brcmf_set_mpc(struct brcmf_if *ndev, int mpc);
void brcmf_abort_scanning(struct brcmf_cfg80211_info *cfg);
void brcmf_cfg80211_free_netdev(struct net_device *ndev);
+int brcmf_set_wsec(struct brcmf_if *ifp, const u8 *key, u16 key_len, u16 flags);
+
#endif /* BRCMFMAC_CFG80211_H */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index b6d458e022fa..b24faae35873 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -266,7 +266,7 @@ static int brcmf_c_process_cal_blob(struct brcmf_if *ifp)
int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
{
struct brcmf_pub *drvr = ifp->drvr;
- s8 eventmask[BRCMF_EVENTING_MASK_LEN];
+ struct brcmf_fweh_info *fweh = drvr->fweh;
u8 buf[BRCMF_DCMD_SMLEN];
struct brcmf_bus *bus;
struct brcmf_rev_info_le revinfo;
@@ -413,15 +413,21 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
brcmf_c_set_joinpref_default(ifp);
/* Setup event_msgs, enable E_IF */
- err = brcmf_fil_iovar_data_get(ifp, "event_msgs", eventmask,
- BRCMF_EVENTING_MASK_LEN);
+ err = brcmf_fil_iovar_data_get(ifp, "event_msgs", fweh->event_mask,
+ fweh->event_mask_len);
if (err) {
bphy_err(drvr, "Get event_msgs error (%d)\n", err);
goto done;
}
- setbit(eventmask, BRCMF_E_IF);
- err = brcmf_fil_iovar_data_set(ifp, "event_msgs", eventmask,
- BRCMF_EVENTING_MASK_LEN);
+ /*
+ * BRCMF_E_IF can safely be used to set the appropriate bit
+ * in the event_mask as the firmware event code is guaranteed
+ * to match the value of BRCMF_E_IF because it is old cruft
+ * that all vendors have.
+ */
+ setbit(fweh->event_mask, BRCMF_E_IF);
+ err = brcmf_fil_iovar_data_set(ifp, "event_msgs", fweh->event_mask,
+ fweh->event_mask_len);
if (err) {
bphy_err(drvr, "Set event_msgs error (%d)\n", err);
goto done;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index f599d5f896e8..bf91b1e1368f 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -691,7 +691,7 @@ static int brcmf_net_mon_open(struct net_device *ndev)
{
struct brcmf_if *ifp = netdev_priv(ndev);
struct brcmf_pub *drvr = ifp->drvr;
- u32 monitor;
+ u32 monitor = 0;
int err;
brcmf_dbg(TRACE, "Enter\n");
@@ -1348,13 +1348,17 @@ int brcmf_attach(struct device *dev)
goto fail;
}
+ /* attach firmware event handler */
+ ret = brcmf_fweh_attach(drvr);
+ if (ret != 0) {
+ bphy_err(drvr, "brcmf_fweh_attach failed\n");
+ goto fail;
+ }
+
/* Attach to events important for core code */
brcmf_fweh_register(drvr, BRCMF_E_PSM_WATCHDOG,
brcmf_psm_watchdog_notify);
- /* attach firmware event handler */
- brcmf_fweh_attach(drvr);
-
ret = brcmf_bus_started(drvr, drvr->ops);
if (ret != 0) {
bphy_err(drvr, "dongle is not responding: err=%d\n", ret);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
index e4f911dd414b..ea76b8d33401 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
@@ -122,7 +122,7 @@ struct brcmf_pub {
struct mutex proto_block;
unsigned char proto_buf[BRCMF_DCMD_MAXLEN];
- struct brcmf_fweh_info fweh;
+ struct brcmf_fweh_info *fweh;
struct brcmf_ampdu_rx_reorder
*reorder_flows[BRCMF_AMPDU_RX_REORDER_MAXFLOWS];
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/core.c
index b75652ba9359..9a4837881486 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/core.c
@@ -7,21 +7,53 @@
#include <core.h>
#include <bus.h>
#include <fwvid.h>
+#include <fwil.h>
#include "vops.h"
-static int brcmf_cyw_attach(struct brcmf_pub *drvr)
+#define BRCMF_CYW_E_LAST 197
+
+static int brcmf_cyw_set_sae_pwd(struct brcmf_if *ifp,
+ struct cfg80211_crypto_settings *crypto)
{
- pr_err("%s: executing\n", __func__);
- return 0;
+ struct brcmf_pub *drvr = ifp->drvr;
+ struct brcmf_wsec_sae_pwd_le sae_pwd;
+ u16 pwd_len = crypto->sae_pwd_len;
+ int err;
+
+ if (pwd_len > BRCMF_WSEC_MAX_SAE_PASSWORD_LEN) {
+ bphy_err(drvr, "sae_password must be less than %d\n",
+ BRCMF_WSEC_MAX_SAE_PASSWORD_LEN);
+ return -EINVAL;
+ }
+
+ sae_pwd.key_len = cpu_to_le16(pwd_len);
+ memcpy(sae_pwd.key, crypto->sae_pwd, pwd_len);
+
+ err = brcmf_fil_iovar_data_set(ifp, "sae_password", &sae_pwd,
+ sizeof(sae_pwd));
+ if (err < 0)
+ bphy_err(drvr, "failed to set SAE password in firmware (len=%u)\n",
+ pwd_len);
+
+ return err;
}
-static void brcmf_cyw_detach(struct brcmf_pub *drvr)
+static int brcmf_cyw_alloc_fweh_info(struct brcmf_pub *drvr)
{
- pr_err("%s: executing\n", __func__);
+ struct brcmf_fweh_info *fweh;
+
+ fweh = kzalloc(struct_size(fweh, evt_handler, BRCMF_CYW_E_LAST),
+ GFP_KERNEL);
+ if (!fweh)
+ return -ENOMEM;
+
+ fweh->num_event_codes = BRCMF_CYW_E_LAST;
+ drvr->fweh = fweh;
+ return 0;
}
const struct brcmf_fwvid_ops brcmf_cyw_ops = {
- .attach = brcmf_cyw_attach,
- .detach = brcmf_cyw_detach,
+ .set_sae_password = brcmf_cyw_set_sae_pwd,
+ .alloc_fweh_info = brcmf_cyw_alloc_fweh_info,
};
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c
index f82fbbe3ecef..90d06cda03a2 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c
@@ -20,6 +20,7 @@ static void __exit brcmf_cyw_exit(void)
brcmf_fwvid_unregister_vendor(BRCMF_FWVENDOR_CYW, THIS_MODULE);
}
+MODULE_DESCRIPTION("Broadcom FullMAC WLAN driver plugin for Cypress/Infineon chipsets");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_IMPORT_NS(BRCMFMAC);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c
index 86ff174936a9..c3a602197662 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c
@@ -83,6 +83,15 @@ static const struct dmi_system_id dmi_platform_data[] = {
.driver_data = (void *)&acepc_t8_data,
},
{
+ /* ACEPC W5 Pro Cherry Trail Z8350 HDMI stick, same wifi as the T8 */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "T3 MRD"),
+ DMI_MATCH(DMI_CHASSIS_TYPE, "3"),
+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
+ },
+ .driver_data = (void *)&acepc_t8_data,
+ },
+ {
/* Chuwi Hi8 Pro with D2D3_Hi8Pro.233 BIOS */
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Hampoo"),
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
index 6d10c9efbe93..f23310a77a5d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
@@ -13,6 +13,7 @@
#include "debug.h"
#include "fwil.h"
#include "fwil_types.h"
+#include "fwvid.h"
#include "feature.h"
#include "common.h"
@@ -183,7 +184,7 @@ static void brcmf_feat_wlc_version_overrides(struct brcmf_pub *drv)
static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp,
enum brcmf_feat_id id, char *name)
{
- u32 data;
+ u32 data = 0;
int err;
/* we need to know firmware error */
@@ -339,6 +340,11 @@ void brcmf_feat_attach(struct brcmf_pub *drvr)
brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_FWSUP, "sup_wpa");
brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_SCAN_V2, "scan_ver");
+ brcmf_feat_wlc_version_overrides(drvr);
+ brcmf_feat_firmware_overrides(drvr);
+
+ brcmf_fwvid_feat_attach(ifp);
+
if (drvr->settings->feature_disable) {
brcmf_dbg(INFO, "Features: 0x%02x, disable: 0x%02x\n",
ifp->drvr->feat_flags,
@@ -346,9 +352,6 @@ void brcmf_feat_attach(struct brcmf_pub *drvr)
ifp->drvr->feat_flags &= ~drvr->settings->feature_disable;
}
- brcmf_feat_wlc_version_overrides(drvr);
- brcmf_feat_firmware_overrides(drvr);
-
/* set chip related quirks */
switch (drvr->bus_if->chip) {
case BRCM_CC_43236_CHIP_ID:
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
index 68960ae98987..f0b6a7607f16 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
@@ -14,7 +14,8 @@
#include "fweh.h"
#include "fwil.h"
#include "proto.h"
-
+#include "bus.h"
+#include "fwvid.h"
/**
* struct brcmf_fweh_queue_item - event item on event queue.
*
@@ -28,7 +29,7 @@
*/
struct brcmf_fweh_queue_item {
struct list_head q;
- enum brcmf_fweh_event_code code;
+ u32 code;
u8 ifidx;
u8 ifaddr[ETH_ALEN];
struct brcmf_event_msg_be emsg;
@@ -94,7 +95,7 @@ static void brcmf_fweh_queue_event(struct brcmf_fweh_info *fweh,
static int brcmf_fweh_call_event_handler(struct brcmf_pub *drvr,
struct brcmf_if *ifp,
- enum brcmf_fweh_event_code code,
+ u32 fwcode,
struct brcmf_event_msg *emsg,
void *data)
{
@@ -102,13 +103,13 @@ static int brcmf_fweh_call_event_handler(struct brcmf_pub *drvr,
int err = -EINVAL;
if (ifp) {
- fweh = &ifp->drvr->fweh;
+ fweh = ifp->drvr->fweh;
/* handle the event if valid interface and handler */
- if (fweh->evt_handler[code])
- err = fweh->evt_handler[code](ifp, emsg, data);
+ if (fweh->evt_handler[fwcode])
+ err = fweh->evt_handler[fwcode](ifp, emsg, data);
else
- bphy_err(drvr, "unhandled event %d ignored\n", code);
+ bphy_err(drvr, "unhandled fwevt %d ignored\n", fwcode);
} else {
bphy_err(drvr, "no interface object\n");
}
@@ -142,7 +143,7 @@ static void brcmf_fweh_handle_if_event(struct brcmf_pub *drvr,
is_p2pdev = ((ifevent->flags & BRCMF_E_IF_FLAG_NOIF) &&
(ifevent->role == BRCMF_E_IF_ROLE_P2P_CLIENT ||
((ifevent->role == BRCMF_E_IF_ROLE_STA) &&
- (drvr->fweh.p2pdev_setup_ongoing))));
+ (drvr->fweh->p2pdev_setup_ongoing))));
if (!is_p2pdev && (ifevent->flags & BRCMF_E_IF_FLAG_NOIF)) {
brcmf_dbg(EVENT, "event can be ignored\n");
return;
@@ -163,7 +164,7 @@ static void brcmf_fweh_handle_if_event(struct brcmf_pub *drvr,
return;
if (!is_p2pdev)
brcmf_proto_add_if(drvr, ifp);
- if (!drvr->fweh.evt_handler[BRCMF_E_IF])
+ if (!drvr->fweh->evt_handler[BRCMF_E_IF])
if (brcmf_net_attach(ifp, false) < 0)
return;
}
@@ -183,6 +184,45 @@ static void brcmf_fweh_handle_if_event(struct brcmf_pub *drvr,
}
}
+static void brcmf_fweh_map_event_code(struct brcmf_fweh_info *fweh,
+ enum brcmf_fweh_event_code code,
+ u32 *fw_code)
+{
+ int i;
+
+ if (WARN_ON(!fw_code))
+ return;
+
+ *fw_code = code;
+ if (fweh->event_map) {
+ for (i = 0; i < fweh->event_map->n_items; i++) {
+ if (fweh->event_map->items[i].code == code) {
+ *fw_code = fweh->event_map->items[i].fwevt_code;
+ break;
+ }
+ }
+ }
+}
+
+static void brcmf_fweh_map_fwevt_code(struct brcmf_fweh_info *fweh, u32 fw_code,
+ enum brcmf_fweh_event_code *code)
+{
+ int i;
+
+ if (WARN_ON(!code))
+ return;
+
+ *code = fw_code;
+ if (fweh->event_map) {
+ for (i = 0; i < fweh->event_map->n_items; i++) {
+ if (fweh->event_map->items[i].fwevt_code == fw_code) {
+ *code = fweh->event_map->items[i].code;
+ break;
+ }
+ }
+ }
+}
+
/**
* brcmf_fweh_dequeue_event() - get event from the queue.
*
@@ -221,15 +261,19 @@ static void brcmf_fweh_event_worker(struct work_struct *work)
struct brcmf_event_msg emsg;
fweh = container_of(work, struct brcmf_fweh_info, event_work);
- drvr = container_of(fweh, struct brcmf_pub, fweh);
+ drvr = fweh->drvr;
while ((event = brcmf_fweh_dequeue_event(fweh))) {
- brcmf_dbg(EVENT, "event %s (%u) ifidx %u bsscfg %u addr %pM\n",
- brcmf_fweh_event_name(event->code), event->code,
+ enum brcmf_fweh_event_code code;
+
+ brcmf_fweh_map_fwevt_code(fweh, event->code, &code);
+ brcmf_dbg(EVENT, "event %s (%u:%u) ifidx %u bsscfg %u addr %pM\n",
+ brcmf_fweh_event_name(code), code, event->code,
event->emsg.ifidx, event->emsg.bsscfgidx,
event->emsg.addr);
if (event->emsg.bsscfgidx >= BRCMF_MAX_IFS) {
- bphy_err(drvr, "invalid bsscfg index: %u\n", event->emsg.bsscfgidx);
+ bphy_err(drvr, "invalid bsscfg index: %u\n",
+ event->emsg.bsscfgidx);
goto event_free;
}
@@ -237,7 +281,7 @@ static void brcmf_fweh_event_worker(struct work_struct *work)
emsg_be = &event->emsg;
emsg.version = be16_to_cpu(emsg_be->version);
emsg.flags = be16_to_cpu(emsg_be->flags);
- emsg.event_code = event->code;
+ emsg.event_code = code;
emsg.status = be32_to_cpu(emsg_be->status);
emsg.reason = be32_to_cpu(emsg_be->reason);
emsg.auth_type = be32_to_cpu(emsg_be->auth_type);
@@ -283,7 +327,7 @@ event_free:
*/
void brcmf_fweh_p2pdev_setup(struct brcmf_if *ifp, bool ongoing)
{
- ifp->drvr->fweh.p2pdev_setup_ongoing = ongoing;
+ ifp->drvr->fweh->p2pdev_setup_ongoing = ongoing;
}
/**
@@ -291,12 +335,27 @@ void brcmf_fweh_p2pdev_setup(struct brcmf_if *ifp, bool ongoing)
*
* @drvr: driver information object.
*/
-void brcmf_fweh_attach(struct brcmf_pub *drvr)
+int brcmf_fweh_attach(struct brcmf_pub *drvr)
{
- struct brcmf_fweh_info *fweh = &drvr->fweh;
+ struct brcmf_fweh_info *fweh;
+ int err;
+
+ err = brcmf_fwvid_alloc_fweh_info(drvr);
+ if (err < 0)
+ return err;
+
+ fweh = drvr->fweh;
+ fweh->drvr = drvr;
+
+ fweh->event_mask_len = DIV_ROUND_UP(fweh->num_event_codes, 8);
+ fweh->event_mask = kzalloc(fweh->event_mask_len, GFP_KERNEL);
+ if (!fweh->event_mask)
+ return -ENOMEM;
+
INIT_WORK(&fweh->event_work, brcmf_fweh_event_worker);
spin_lock_init(&fweh->evt_q_lock);
INIT_LIST_HEAD(&fweh->event_q);
+ return 0;
}
/**
@@ -306,14 +365,19 @@ void brcmf_fweh_attach(struct brcmf_pub *drvr)
*/
void brcmf_fweh_detach(struct brcmf_pub *drvr)
{
- struct brcmf_fweh_info *fweh = &drvr->fweh;
+ struct brcmf_fweh_info *fweh = drvr->fweh;
+
+ if (!fweh)
+ return;
/* cancel the worker if initialized */
if (fweh->event_work.func) {
cancel_work_sync(&fweh->event_work);
WARN_ON(!list_empty(&fweh->event_q));
- memset(fweh->evt_handler, 0, sizeof(fweh->evt_handler));
}
+ drvr->fweh = NULL;
+ kfree(fweh->event_mask);
+ kfree(fweh);
}
/**
@@ -326,11 +390,17 @@ void brcmf_fweh_detach(struct brcmf_pub *drvr)
int brcmf_fweh_register(struct brcmf_pub *drvr, enum brcmf_fweh_event_code code,
brcmf_fweh_handler_t handler)
{
- if (drvr->fweh.evt_handler[code]) {
+ struct brcmf_fweh_info *fweh = drvr->fweh;
+ u32 evt_handler_idx;
+
+ brcmf_fweh_map_event_code(fweh, code, &evt_handler_idx);
+
+ if (fweh->evt_handler[evt_handler_idx]) {
bphy_err(drvr, "event code %d already registered\n", code);
return -ENOSPC;
}
- drvr->fweh.evt_handler[code] = handler;
+
+ fweh->evt_handler[evt_handler_idx] = handler;
brcmf_dbg(TRACE, "event handler registered for %s\n",
brcmf_fweh_event_name(code));
return 0;
@@ -345,9 +415,12 @@ int brcmf_fweh_register(struct brcmf_pub *drvr, enum brcmf_fweh_event_code code,
void brcmf_fweh_unregister(struct brcmf_pub *drvr,
enum brcmf_fweh_event_code code)
{
+ u32 evt_handler_idx;
+
brcmf_dbg(TRACE, "event handler cleared for %s\n",
brcmf_fweh_event_name(code));
- drvr->fweh.evt_handler[code] = NULL;
+ brcmf_fweh_map_event_code(drvr->fweh, code, &evt_handler_idx);
+ drvr->fweh->evt_handler[evt_handler_idx] = NULL;
}
/**
@@ -357,27 +430,28 @@ void brcmf_fweh_unregister(struct brcmf_pub *drvr,
*/
int brcmf_fweh_activate_events(struct brcmf_if *ifp)
{
- struct brcmf_pub *drvr = ifp->drvr;
+ struct brcmf_fweh_info *fweh = ifp->drvr->fweh;
+ enum brcmf_fweh_event_code code;
int i, err;
- s8 eventmask[BRCMF_EVENTING_MASK_LEN];
- memset(eventmask, 0, sizeof(eventmask));
- for (i = 0; i < BRCMF_E_LAST; i++) {
- if (ifp->drvr->fweh.evt_handler[i]) {
+ memset(fweh->event_mask, 0, fweh->event_mask_len);
+ for (i = 0; i < fweh->num_event_codes; i++) {
+ if (fweh->evt_handler[i]) {
+ brcmf_fweh_map_fwevt_code(fweh, i, &code);
brcmf_dbg(EVENT, "enable event %s\n",
- brcmf_fweh_event_name(i));
- setbit(eventmask, i);
+ brcmf_fweh_event_name(code));
+ setbit(fweh->event_mask, i);
}
}
/* want to handle IF event as well */
brcmf_dbg(EVENT, "enable event IF\n");
- setbit(eventmask, BRCMF_E_IF);
+ setbit(fweh->event_mask, BRCMF_E_IF);
- err = brcmf_fil_iovar_data_set(ifp, "event_msgs",
- eventmask, BRCMF_EVENTING_MASK_LEN);
+ err = brcmf_fil_iovar_data_set(ifp, "event_msgs", fweh->event_mask,
+ fweh->event_mask_len);
if (err)
- bphy_err(drvr, "Set event_msgs error (%d)\n", err);
+ bphy_err(fweh->drvr, "Set event_msgs error (%d)\n", err);
return err;
}
@@ -397,21 +471,21 @@ void brcmf_fweh_process_event(struct brcmf_pub *drvr,
struct brcmf_event *event_packet,
u32 packet_len, gfp_t gfp)
{
- enum brcmf_fweh_event_code code;
- struct brcmf_fweh_info *fweh = &drvr->fweh;
+ u32 fwevt_idx;
+ struct brcmf_fweh_info *fweh = drvr->fweh;
struct brcmf_fweh_queue_item *event;
void *data;
u32 datalen;
/* get event info */
- code = get_unaligned_be32(&event_packet->msg.event_type);
+ fwevt_idx = get_unaligned_be32(&event_packet->msg.event_type);
datalen = get_unaligned_be32(&event_packet->msg.datalen);
data = &event_packet[1];
- if (code >= BRCMF_E_LAST)
+ if (fwevt_idx >= fweh->num_event_codes)
return;
- if (code != BRCMF_E_IF && !fweh->evt_handler[code])
+ if (fwevt_idx != BRCMF_E_IF && !fweh->evt_handler[fwevt_idx])
return;
if (datalen > BRCMF_DCMD_MAXLEN ||
@@ -422,8 +496,8 @@ void brcmf_fweh_process_event(struct brcmf_pub *drvr,
if (!event)
return;
+ event->code = fwevt_idx;
event->datalen = datalen;
- event->code = code;
event->ifidx = event_packet->msg.ifidx;
/* use memcpy to get aligned event message */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h
index 48414e8b9389..9ca1b2aadcb5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h
@@ -17,6 +17,10 @@ struct brcmf_pub;
struct brcmf_if;
struct brcmf_cfg80211_info;
+#define BRCMF_ABSTRACT_EVENT_BIT BIT(31)
+#define BRCMF_ABSTRACT_ENUM_DEF(_id, _val) \
+ BRCMF_ENUM_DEF(_id, (BRCMF_ABSTRACT_EVENT_BIT | (_val)))
+
/* list of firmware events */
#define BRCMF_FWEH_EVENT_ENUM_DEFLIST \
BRCMF_ENUM_DEF(SET_SSID, 0) \
@@ -98,16 +102,9 @@ struct brcmf_cfg80211_info;
/* firmware event codes sent by the dongle */
enum brcmf_fweh_event_code {
BRCMF_FWEH_EVENT_ENUM_DEFLIST
- /* this determines event mask length which must match
- * minimum length check in device firmware so it is
- * hard-coded here.
- */
- BRCMF_E_LAST = 139
};
#undef BRCMF_ENUM_DEF
-#define BRCMF_EVENTING_MASK_LEN DIV_ROUND_UP(BRCMF_E_LAST, 8)
-
/* flags field values in struct brcmf_event_msg */
#define BRCMF_EVENT_MSG_LINK 0x01
#define BRCMF_EVENT_MSG_FLUSHTXQ 0x02
@@ -288,27 +285,66 @@ typedef int (*brcmf_fweh_handler_t)(struct brcmf_if *ifp,
void *data);
/**
+ * struct brcmf_fweh_event_map_item - fweh event and firmware event pair.
+ *
+ * @code: fweh event code as used by higher layers.
+ * @fwevt_code: firmware event code as used by firmware.
+ *
+ * This mapping is needed when a functionally identical event has a
+ * different numerical definition between vendors. When such mapping
+ * is needed the higher layer event code should not collide with the
+ * firmware event.
+ */
+struct brcmf_fweh_event_map_item {
+ enum brcmf_fweh_event_code code;
+ u32 fwevt_code;
+};
+
+/**
+ * struct brcmf_fweh_event_map - mapping between firmware event and fweh event.
+ *
+ * @n_items: number of mapping items.
+ * @items: array of fweh event and firmware event pairs.
+ */
+struct brcmf_fweh_event_map {
+ u32 n_items;
+ const struct brcmf_fweh_event_map_item items[] __counted_by(n_items);
+};
+
+/**
* struct brcmf_fweh_info - firmware event handling information.
*
* @p2pdev_setup_ongoing: P2P device creation in progress.
* @event_work: event worker.
* @evt_q_lock: lock for event queue protection.
* @event_q: event queue.
- * @evt_handler: registered event handlers.
+ * @event_mask_len: length of @event_mask used to enable firmware events.
+ * @event_mask: byte array used in 'event_msgs' iovar command.
+ * @event_map: mapping between fweh event and firmware event which
+ * may be provided by vendor-specific module for events that need
+ * mapping.
+ * @num_event_codes: number of firmware events supported by firmware which
+ * does a minimum length check for the @event_mask. This value is to
+ * be provided by vendor-specific module determining @event_mask_len
+ * and consequently the allocation size for @event_mask.
+ * @evt_handler: event handler registry indexed by firmware event code.
*/
struct brcmf_fweh_info {
+ struct brcmf_pub *drvr;
bool p2pdev_setup_ongoing;
struct work_struct event_work;
spinlock_t evt_q_lock;
struct list_head event_q;
- int (*evt_handler[BRCMF_E_LAST])(struct brcmf_if *ifp,
- const struct brcmf_event_msg *evtmsg,
- void *data);
+ uint event_mask_len;
+ u8 *event_mask;
+ struct brcmf_fweh_event_map *event_map;
+ uint num_event_codes;
+ brcmf_fweh_handler_t evt_handler[] __counted_by(num_event_codes);
};
const char *brcmf_fweh_event_name(enum brcmf_fweh_event_code code);
-void brcmf_fweh_attach(struct brcmf_pub *drvr);
+int brcmf_fweh_attach(struct brcmf_pub *drvr);
void brcmf_fweh_detach(struct brcmf_pub *drvr);
int brcmf_fweh_register(struct brcmf_pub *drvr, enum brcmf_fweh_event_code code,
int (*handler)(struct brcmf_if *ifp,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
index 72fe8bce6eaf..6385a7db7f7d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
@@ -142,6 +142,7 @@ brcmf_fil_cmd_data_set(struct brcmf_if *ifp, u32 cmd, void *data, u32 len)
return err;
}
+BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_cmd_data_set);
s32
brcmf_fil_cmd_data_get(struct brcmf_if *ifp, u32 cmd, void *data, u32 len)
@@ -160,36 +161,7 @@ brcmf_fil_cmd_data_get(struct brcmf_if *ifp, u32 cmd, void *data, u32 len)
return err;
}
-
-
-s32
-brcmf_fil_cmd_int_set(struct brcmf_if *ifp, u32 cmd, u32 data)
-{
- s32 err;
- __le32 data_le = cpu_to_le32(data);
-
- mutex_lock(&ifp->drvr->proto_block);
- brcmf_dbg(FIL, "ifidx=%d, cmd=%d, value=%d\n", ifp->ifidx, cmd, data);
- err = brcmf_fil_cmd_data(ifp, cmd, &data_le, sizeof(data_le), true);
- mutex_unlock(&ifp->drvr->proto_block);
-
- return err;
-}
-
-s32
-brcmf_fil_cmd_int_get(struct brcmf_if *ifp, u32 cmd, u32 *data)
-{
- s32 err;
- __le32 data_le = cpu_to_le32(*data);
-
- mutex_lock(&ifp->drvr->proto_block);
- err = brcmf_fil_cmd_data(ifp, cmd, &data_le, sizeof(data_le), false);
- mutex_unlock(&ifp->drvr->proto_block);
- *data = le32_to_cpu(data_le);
- brcmf_dbg(FIL, "ifidx=%d, cmd=%d, value=%d\n", ifp->ifidx, cmd, *data);
-
- return err;
-}
+BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_cmd_data_get);
static u32
brcmf_create_iovar(const char *name, const char *data, u32 datalen,
@@ -239,6 +211,7 @@ brcmf_fil_iovar_data_set(struct brcmf_if *ifp, const char *name, const void *dat
mutex_unlock(&drvr->proto_block);
return err;
}
+BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_iovar_data_set);
s32
brcmf_fil_iovar_data_get(struct brcmf_if *ifp, const char *name, void *data,
@@ -270,26 +243,7 @@ brcmf_fil_iovar_data_get(struct brcmf_if *ifp, const char *name, void *data,
mutex_unlock(&drvr->proto_block);
return err;
}
-
-s32
-brcmf_fil_iovar_int_set(struct brcmf_if *ifp, const char *name, u32 data)
-{
- __le32 data_le = cpu_to_le32(data);
-
- return brcmf_fil_iovar_data_set(ifp, name, &data_le, sizeof(data_le));
-}
-
-s32
-brcmf_fil_iovar_int_get(struct brcmf_if *ifp, const char *name, u32 *data)
-{
- __le32 data_le = cpu_to_le32(*data);
- s32 err;
-
- err = brcmf_fil_iovar_data_get(ifp, name, &data_le, sizeof(data_le));
- if (err == 0)
- *data = le32_to_cpu(data_le);
- return err;
-}
+BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_iovar_data_get);
static u32
brcmf_create_bsscfg(s32 bsscfgidx, const char *name, char *data, u32 datalen,
@@ -364,6 +318,7 @@ brcmf_fil_bsscfg_data_set(struct brcmf_if *ifp, const char *name,
mutex_unlock(&drvr->proto_block);
return err;
}
+BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_bsscfg_data_set);
s32
brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, const char *name,
@@ -394,28 +349,7 @@ brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, const char *name,
mutex_unlock(&drvr->proto_block);
return err;
}
-
-s32
-brcmf_fil_bsscfg_int_set(struct brcmf_if *ifp, const char *name, u32 data)
-{
- __le32 data_le = cpu_to_le32(data);
-
- return brcmf_fil_bsscfg_data_set(ifp, name, &data_le,
- sizeof(data_le));
-}
-
-s32
-brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, const char *name, u32 *data)
-{
- __le32 data_le = cpu_to_le32(*data);
- s32 err;
-
- err = brcmf_fil_bsscfg_data_get(ifp, name, &data_le,
- sizeof(data_le));
- if (err == 0)
- *data = le32_to_cpu(data_le);
- return err;
-}
+BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_bsscfg_data_get);
static u32 brcmf_create_xtlv(const char *name, u16 id, char *data, u32 len,
char *buf, u32 buflen)
@@ -465,6 +399,7 @@ s32 brcmf_fil_xtlv_data_set(struct brcmf_if *ifp, const char *name, u16 id,
mutex_unlock(&drvr->proto_block);
return err;
}
+BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_xtlv_data_set);
s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, const char *name, u16 id,
void *data, u32 len)
@@ -494,39 +429,4 @@ s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, const char *name, u16 id,
mutex_unlock(&drvr->proto_block);
return err;
}
-
-s32 brcmf_fil_xtlv_int_set(struct brcmf_if *ifp, const char *name, u16 id, u32 data)
-{
- __le32 data_le = cpu_to_le32(data);
-
- return brcmf_fil_xtlv_data_set(ifp, name, id, &data_le,
- sizeof(data_le));
-}
-
-s32 brcmf_fil_xtlv_int_get(struct brcmf_if *ifp, const char *name, u16 id, u32 *data)
-{
- __le32 data_le = cpu_to_le32(*data);
- s32 err;
-
- err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le));
- if (err == 0)
- *data = le32_to_cpu(data_le);
- return err;
-}
-
-s32 brcmf_fil_xtlv_int8_get(struct brcmf_if *ifp, const char *name, u16 id, u8 *data)
-{
- return brcmf_fil_xtlv_data_get(ifp, name, id, data, sizeof(*data));
-}
-
-s32 brcmf_fil_xtlv_int16_get(struct brcmf_if *ifp, const char *name, u16 id, u16 *data)
-{
- __le16 data_le = cpu_to_le16(*data);
- s32 err;
-
- err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le));
- if (err == 0)
- *data = le16_to_cpu(data_le);
- return err;
-}
-
+BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_xtlv_data_get);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h
index bc693157c4b1..a315a7fac6a0 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h
@@ -81,29 +81,122 @@
s32 brcmf_fil_cmd_data_set(struct brcmf_if *ifp, u32 cmd, void *data, u32 len);
s32 brcmf_fil_cmd_data_get(struct brcmf_if *ifp, u32 cmd, void *data, u32 len);
-s32 brcmf_fil_cmd_int_set(struct brcmf_if *ifp, u32 cmd, u32 data);
-s32 brcmf_fil_cmd_int_get(struct brcmf_if *ifp, u32 cmd, u32 *data);
+static inline
+s32 brcmf_fil_cmd_int_set(struct brcmf_if *ifp, u32 cmd, u32 data)
+{
+ s32 err;
+ __le32 data_le = cpu_to_le32(data);
-s32 brcmf_fil_iovar_data_set(struct brcmf_if *ifp, const char *name, const void *data,
- u32 len);
+ brcmf_dbg(FIL, "ifidx=%d, cmd=%d, value=%d\n", ifp->ifidx, cmd, data);
+ err = brcmf_fil_cmd_data_set(ifp, cmd, &data_le, sizeof(data_le));
+
+ return err;
+}
+static inline
+s32 brcmf_fil_cmd_int_get(struct brcmf_if *ifp, u32 cmd, u32 *data)
+{
+ s32 err;
+ __le32 data_le = cpu_to_le32(*data);
+
+ err = brcmf_fil_cmd_data_get(ifp, cmd, &data_le, sizeof(data_le));
+ if (err == 0)
+ *data = le32_to_cpu(data_le);
+ brcmf_dbg(FIL, "ifidx=%d, cmd=%d, value=%d\n", ifp->ifidx, cmd, *data);
+
+ return err;
+}
+
+s32 brcmf_fil_iovar_data_set(struct brcmf_if *ifp, const char *name,
+ const void *data, u32 len);
s32 brcmf_fil_iovar_data_get(struct brcmf_if *ifp, const char *name, void *data,
u32 len);
-s32 brcmf_fil_iovar_int_set(struct brcmf_if *ifp, const char *name, u32 data);
-s32 brcmf_fil_iovar_int_get(struct brcmf_if *ifp, const char *name, u32 *data);
-
-s32 brcmf_fil_bsscfg_data_set(struct brcmf_if *ifp, const char *name, void *data,
- u32 len);
-s32 brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, const char *name, void *data,
- u32 len);
-s32 brcmf_fil_bsscfg_int_set(struct brcmf_if *ifp, const char *name, u32 data);
-s32 brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, const char *name, u32 *data);
+static inline
+s32 brcmf_fil_iovar_int_set(struct brcmf_if *ifp, const char *name, u32 data)
+{
+ __le32 data_le = cpu_to_le32(data);
+
+ return brcmf_fil_iovar_data_set(ifp, name, &data_le, sizeof(data_le));
+}
+static inline
+s32 brcmf_fil_iovar_int_get(struct brcmf_if *ifp, const char *name, u32 *data)
+{
+ __le32 data_le = cpu_to_le32(*data);
+ s32 err;
+
+ err = brcmf_fil_iovar_data_get(ifp, name, &data_le, sizeof(data_le));
+ if (err == 0)
+ *data = le32_to_cpu(data_le);
+ return err;
+}
+
+
+s32 brcmf_fil_bsscfg_data_set(struct brcmf_if *ifp, const char *name,
+ void *data, u32 len);
+s32 brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, const char *name,
+ void *data, u32 len);
+static inline
+s32 brcmf_fil_bsscfg_int_set(struct brcmf_if *ifp, const char *name, u32 data)
+{
+ __le32 data_le = cpu_to_le32(data);
+
+ return brcmf_fil_bsscfg_data_set(ifp, name, &data_le,
+ sizeof(data_le));
+}
+static inline
+s32 brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, const char *name, u32 *data)
+{
+ __le32 data_le = cpu_to_le32(*data);
+ s32 err;
+
+ err = brcmf_fil_bsscfg_data_get(ifp, name, &data_le,
+ sizeof(data_le));
+ if (err == 0)
+ *data = le32_to_cpu(data_le);
+ return err;
+}
+
s32 brcmf_fil_xtlv_data_set(struct brcmf_if *ifp, const char *name, u16 id,
void *data, u32 len);
s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, const char *name, u16 id,
void *data, u32 len);
-s32 brcmf_fil_xtlv_int_set(struct brcmf_if *ifp, const char *name, u16 id, u32 data);
-s32 brcmf_fil_xtlv_int_get(struct brcmf_if *ifp, const char *name, u16 id, u32 *data);
-s32 brcmf_fil_xtlv_int8_get(struct brcmf_if *ifp, const char *name, u16 id, u8 *data);
-s32 brcmf_fil_xtlv_int16_get(struct brcmf_if *ifp, const char *name, u16 id, u16 *data);
+static inline
+s32 brcmf_fil_xtlv_int_set(struct brcmf_if *ifp, const char *name, u16 id,
+ u32 data)
+{
+ __le32 data_le = cpu_to_le32(data);
+
+ return brcmf_fil_xtlv_data_set(ifp, name, id, &data_le,
+ sizeof(data_le));
+}
+static inline
+s32 brcmf_fil_xtlv_int_get(struct brcmf_if *ifp, const char *name, u16 id,
+ u32 *data)
+{
+ __le32 data_le = cpu_to_le32(*data);
+ s32 err;
+
+ err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le));
+ if (err == 0)
+ *data = le32_to_cpu(data_le);
+ return err;
+}
+static inline
+s32 brcmf_fil_xtlv_int8_get(struct brcmf_if *ifp, const char *name, u16 id,
+ u8 *data)
+{
+ return brcmf_fil_xtlv_data_get(ifp, name, id, data, sizeof(*data));
+}
+static inline
+s32 brcmf_fil_xtlv_int16_get(struct brcmf_if *ifp, const char *name, u16 id,
+ u16 *data)
+{
+ __le16 data_le = cpu_to_le16(*data);
+ s32 err;
+
+ err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le));
+ if (err == 0)
+ *data = le16_to_cpu(data_le);
+ return err;
+}
#endif /* _fwil_h_ */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
index 9d248ba1c0b2..e74a23e11830 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
@@ -584,7 +584,7 @@ struct brcmf_wsec_key_le {
struct brcmf_wsec_pmk_le {
__le16 key_len;
__le16 flags;
- u8 key[2 * BRCMF_WSEC_MAX_PSK_LEN + 1];
+ u8 key[BRCMF_WSEC_MAX_SAE_PASSWORD_LEN];
};
/**
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwvid.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwvid.c
index 86eafdb40541..41eafcda77f7 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwvid.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwvid.c
@@ -90,7 +90,7 @@ int brcmf_fwvid_register_vendor(enum brcmf_fwvendor fwvid, struct module *vmod,
return -ERANGE;
if (WARN_ON(!vmod) || WARN_ON(!vops) ||
- WARN_ON(!vops->attach) || WARN_ON(!vops->detach))
+ WARN_ON(!vops->alloc_fweh_info))
return -EINVAL;
if (WARN_ON(fwvid_list[fwvid].vmod))
@@ -150,7 +150,7 @@ static inline int brcmf_fwvid_request_module(enum brcmf_fwvendor fwvid)
}
#endif
-int brcmf_fwvid_attach_ops(struct brcmf_pub *drvr)
+int brcmf_fwvid_attach(struct brcmf_pub *drvr)
{
enum brcmf_fwvendor fwvid = drvr->bus_if->fwvid;
int ret;
@@ -175,7 +175,7 @@ int brcmf_fwvid_attach_ops(struct brcmf_pub *drvr)
return ret;
}
-void brcmf_fwvid_detach_ops(struct brcmf_pub *drvr)
+void brcmf_fwvid_detach(struct brcmf_pub *drvr)
{
enum brcmf_fwvendor fwvid = drvr->bus_if->fwvid;
@@ -187,9 +187,10 @@ void brcmf_fwvid_detach_ops(struct brcmf_pub *drvr)
mutex_lock(&fwvid_list_lock);
- drvr->vops = NULL;
- list_del(&drvr->bus_if->list);
-
+ if (drvr->vops) {
+ drvr->vops = NULL;
+ list_del(&drvr->bus_if->list);
+ }
mutex_unlock(&fwvid_list_lock);
}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwvid.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwvid.h
index 43df58bb70ad..e6ac9fc341bc 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwvid.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwvid.h
@@ -6,12 +6,15 @@
#define FWVID_H_
#include "firmware.h"
+#include "cfg80211.h"
struct brcmf_pub;
+struct brcmf_if;
struct brcmf_fwvid_ops {
- int (*attach)(struct brcmf_pub *drvr);
- void (*detach)(struct brcmf_pub *drvr);
+ void (*feat_attach)(struct brcmf_if *ifp);
+ int (*set_sae_password)(struct brcmf_if *ifp, struct cfg80211_crypto_settings *crypto);
+ int (*alloc_fweh_info)(struct brcmf_pub *drvr);
};
/* exported functions */
@@ -20,28 +23,37 @@ int brcmf_fwvid_register_vendor(enum brcmf_fwvendor fwvid, struct module *mod,
int brcmf_fwvid_unregister_vendor(enum brcmf_fwvendor fwvid, struct module *mod);
/* core driver functions */
-int brcmf_fwvid_attach_ops(struct brcmf_pub *drvr);
-void brcmf_fwvid_detach_ops(struct brcmf_pub *drvr);
+int brcmf_fwvid_attach(struct brcmf_pub *drvr);
+void brcmf_fwvid_detach(struct brcmf_pub *drvr);
const char *brcmf_fwvid_vendor_name(struct brcmf_pub *drvr);
-static inline int brcmf_fwvid_attach(struct brcmf_pub *drvr)
+static inline void brcmf_fwvid_feat_attach(struct brcmf_if *ifp)
{
- int ret;
+ const struct brcmf_fwvid_ops *vops = ifp->drvr->vops;
- ret = brcmf_fwvid_attach_ops(drvr);
- if (ret)
- return ret;
+ if (!vops->feat_attach)
+ return;
- return drvr->vops->attach(drvr);
+ vops->feat_attach(ifp);
}
-static inline void brcmf_fwvid_detach(struct brcmf_pub *drvr)
+static inline int brcmf_fwvid_set_sae_password(struct brcmf_if *ifp,
+ struct cfg80211_crypto_settings *crypto)
+{
+ const struct brcmf_fwvid_ops *vops = ifp->drvr->vops;
+
+ if (!vops || !vops->set_sae_password)
+ return -EOPNOTSUPP;
+
+ return vops->set_sae_password(ifp, crypto);
+}
+
+static inline int brcmf_fwvid_alloc_fweh_info(struct brcmf_pub *drvr)
{
if (!drvr->vops)
- return;
+ return -EIO;
- drvr->vops->detach(drvr);
- brcmf_fwvid_detach_ops(drvr);
+ return drvr->vops->alloc_fweh_info(drvr);
}
#endif /* FWVID_H_ */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/core.c
index 5573a47766ad..05d7c2a4fba5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/core.c
@@ -7,21 +7,34 @@
#include <core.h>
#include <bus.h>
#include <fwvid.h>
+#include <cfg80211.h>
#include "vops.h"
-static int brcmf_wcc_attach(struct brcmf_pub *drvr)
+#define BRCMF_WCC_E_LAST 213
+
+static int brcmf_wcc_set_sae_pwd(struct brcmf_if *ifp,
+ struct cfg80211_crypto_settings *crypto)
{
- pr_debug("%s: executing\n", __func__);
- return 0;
+ return brcmf_set_wsec(ifp, crypto->sae_pwd, crypto->sae_pwd_len,
+ BRCMF_WSEC_PASSPHRASE);
}
-static void brcmf_wcc_detach(struct brcmf_pub *drvr)
+static int brcmf_wcc_alloc_fweh_info(struct brcmf_pub *drvr)
{
- pr_debug("%s: executing\n", __func__);
+ struct brcmf_fweh_info *fweh;
+
+ fweh = kzalloc(struct_size(fweh, evt_handler, BRCMF_WCC_E_LAST),
+ GFP_KERNEL);
+ if (!fweh)
+ return -ENOMEM;
+
+ fweh->num_event_codes = BRCMF_WCC_E_LAST;
+ drvr->fweh = fweh;
+ return 0;
}
const struct brcmf_fwvid_ops brcmf_wcc_ops = {
- .attach = brcmf_wcc_attach,
- .detach = brcmf_wcc_detach,
+ .set_sae_password = brcmf_wcc_set_sae_pwd,
+ .alloc_fweh_info = brcmf_wcc_alloc_fweh_info,
};
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c
index 02918d434556..b66135e3cff4 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c
@@ -20,6 +20,7 @@ static void __exit brcmf_wcc_exit(void)
brcmf_fwvid_unregister_vendor(BRCMF_FWVENDOR_WCC, THIS_MODULE);
}
+MODULE_DESCRIPTION("Broadcom FullMAC WLAN driver plugin for Broadcom mobility chipsets");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_IMPORT_NS(BRCMFMAC);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.c
index 89c8829528c2..9540a05247c2 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <net/mac80211.h>
#include <linux/bcma/bcma_driver_chipcommon.h>
-#include <linux/gpio.h>
#include <linux/gpio/driver.h>
#include <linux/gpio/machine.h>
#include <linux/gpio/consumer.h>
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
index 543e93ec49d2..92860dc0a92e 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
@@ -959,6 +959,10 @@ static int brcms_ops_beacon_set_tim(struct ieee80211_hw *hw,
}
static const struct ieee80211_ops brcms_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = brcms_ops_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = brcms_ops_start,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c
index ccc621b8ed9f..a27d6f0b8819 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c
@@ -383,8 +383,9 @@ struct shared_phy *wlc_phy_shared_attach(struct shared_phy_params *shp)
return sh;
}
-static void wlc_phy_timercb_phycal(struct brcms_phy *pi)
+static void wlc_phy_timercb_phycal(void *ptr)
{
+ struct brcms_phy *pi = ptr;
uint delay = 5;
if (PHY_PERICAL_MPHASE_PENDING(pi)) {
@@ -551,8 +552,7 @@ wlc_phy_attach(struct shared_phy *sh, struct bcma_device *d11core,
if (!pi->phycal_timer)
goto err;
- if (!wlc_phy_attach_nphy(pi))
- goto err;
+ wlc_phy_attach_nphy(pi);
} else if (ISLCNPHY(pi)) {
if (!wlc_phy_attach_lcnphy(pi))
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_int.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_int.h
index 8668fa5558a2..70a9ec050717 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_int.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_int.h
@@ -941,7 +941,7 @@ void wlc_phy_papd_decode_epsilon(u32 epsilon, s32 *eps_real, s32 *eps_imag);
void wlc_phy_cal_perical_mphase_reset(struct brcms_phy *pi);
void wlc_phy_cal_perical_mphase_restart(struct brcms_phy *pi);
-bool wlc_phy_attach_nphy(struct brcms_phy *pi);
+void wlc_phy_attach_nphy(struct brcms_phy *pi);
bool wlc_phy_attach_lcnphy(struct brcms_phy *pi);
void wlc_phy_detach_lcnphy(struct brcms_phy *pi);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c
index 7717eb85a1db..aae2cf95fe95 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c
@@ -3299,7 +3299,7 @@ wlc_lcnphy_run_samples(struct brcms_phy *pi,
if (iqcalmode) {
- and_phy_reg(pi, 0x453, (u16) ~(0x1 << 15));
+ and_phy_reg(pi, 0x453, 0xffff & ~(0x1 << 15));
or_phy_reg(pi, 0x453, (0x1 << 15));
} else {
write_phy_reg(pi, 0x63f, 1);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
index 8580a2754789..d69879e1bd87 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
@@ -14546,7 +14546,7 @@ static void wlc_phy_txpwr_srom_read_ppr_nphy(struct brcms_phy *pi)
wlc_phy_txpwr_apply_nphy(pi);
}
-static bool wlc_phy_txpwr_srom_read_nphy(struct brcms_phy *pi)
+static void wlc_phy_txpwr_srom_read_nphy(struct brcms_phy *pi)
{
struct ssb_sprom *sprom = &pi->d11core->bus->sprom;
@@ -14595,11 +14595,9 @@ static bool wlc_phy_txpwr_srom_read_nphy(struct brcms_phy *pi)
pi->phycal_tempdelta = 0;
wlc_phy_txpwr_srom_read_ppr_nphy(pi);
-
- return true;
}
-bool wlc_phy_attach_nphy(struct brcms_phy *pi)
+void wlc_phy_attach_nphy(struct brcms_phy *pi)
{
uint i;
@@ -14645,10 +14643,7 @@ bool wlc_phy_attach_nphy(struct brcms_phy *pi)
pi->pi_fptr.chanset = wlc_phy_chanspec_set_nphy;
pi->pi_fptr.txpwrrecalc = wlc_phy_txpower_recalc_target_nphy;
- if (!wlc_phy_txpwr_srom_read_nphy(pi))
- return false;
-
- return true;
+ wlc_phy_txpwr_srom_read_nphy(pi);
}
static s32 get_rf_pwr_offset(struct brcms_phy *pi, s16 pga_gn, s16 pad_gn)
@@ -17587,7 +17582,7 @@ static void wlc_phy_txpwrctrl_pwr_setup_nphy(struct brcms_phy *pi)
or_phy_reg(pi, 0x122, (0x1 << 0));
if (NREV_GE(pi->pubpi.phy_rev, 3))
- and_phy_reg(pi, 0x1e7, (u16) (~(0x1 << 15)));
+ and_phy_reg(pi, 0x1e7, 0x7fff);
else
or_phy_reg(pi, 0x1e7, (0x1 << 15));
@@ -18086,7 +18081,7 @@ wlc_phy_rfctrlintc_override_nphy(struct brcms_phy *pi, u8 field, u16 value,
(0x1 << 10));
and_phy_reg(pi, 0x2ff, (u16)
- ~(0x3 << 14));
+ 0xffff & ~(0x3 << 14));
or_phy_reg(pi, 0x2ff, (0x1 << 13));
or_phy_reg(pi, 0x2ff, (0x1 << 0));
} else {
@@ -21053,7 +21048,7 @@ wlc_phy_chanspec_nphy_setup(struct brcms_phy *pi, u16 chanspec,
(val | MAC_PHY_FORCE_CLK));
and_phy_reg(pi, (NPHY_TO_BPHY_OFF + BPHY_BB_CONFIG),
- (u16) (~(BBCFG_RESETCCA | BBCFG_RESETRX)));
+ 0xffff & ~(BBCFG_RESETCCA | BBCFG_RESETRX));
bcma_write16(pi->d11core, D11REGOFFS(psm_phy_hdr_param), val);
}
@@ -21287,7 +21282,8 @@ void wlc_phy_antsel_init(struct brcms_phy_pub *ppi, bool lut_init)
bcma_set16(pi->d11core, D11REGOFFS(psm_gpio_oe), mask);
- bcma_mask16(pi->d11core, D11REGOFFS(psm_gpio_out), ~mask);
+ bcma_mask16(pi->d11core, D11REGOFFS(psm_gpio_out),
+ 0xffff & ~mask);
if (lut_init) {
write_phy_reg(pi, 0xf8, 0x02d8);
@@ -23197,7 +23193,7 @@ void wlc_phy_stopplayback_nphy(struct brcms_phy *pi)
or_phy_reg(pi, 0xc3, NPHY_sampleCmd_STOP);
else if (playback_status & 0x2)
and_phy_reg(pi, 0xc2,
- (u16) ~NPHY_iqloCalCmdGctl_IQLO_CAL_EN);
+ 0xffff & ~NPHY_iqloCalCmdGctl_IQLO_CAL_EN);
and_phy_reg(pi, 0xc3, (u16) ~(0x1 << 2));
@@ -28202,8 +28198,9 @@ void wlc_phy_txpwrctrl_enable_nphy(struct brcms_phy *pi, u8 ctrl_type)
if (NREV_GE(pi->pubpi.phy_rev, 3))
and_phy_reg(pi, 0x1e7,
- (u16) (~((0x1 << 15) |
- (0x1 << 14) | (0x1 << 13))));
+ 0xffff & ~((0x1 << 15) |
+ (0x1 << 14) |
+ (0x1 << 13)));
else
and_phy_reg(pi, 0x1e7,
(u16) (~((0x1 << 14) | (0x1 << 13))));
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c
index a0de5db0cd64..b72381791536 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c
@@ -57,12 +57,11 @@ void wlc_phy_shim_detach(struct phy_shim_info *physhim)
}
struct wlapi_timer *wlapi_init_timer(struct phy_shim_info *physhim,
- void (*fn)(struct brcms_phy *pi),
+ void (*fn)(void *pi),
void *arg, const char *name)
{
return (struct wlapi_timer *)
- brcms_init_timer(physhim->wl, (void (*)(void *))fn,
- arg, name);
+ brcms_init_timer(physhim->wl, fn, arg, name);
}
void wlapi_free_timer(struct wlapi_timer *t)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h
index dd8774717ade..27d0934e600e 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h
@@ -131,7 +131,7 @@ void wlc_phy_shim_detach(struct phy_shim_info *physhim);
/* PHY to WL utility functions */
struct wlapi_timer *wlapi_init_timer(struct phy_shim_info *physhim,
- void (*fn)(struct brcms_phy *pi),
+ void (*fn)(void *pi),
void *arg, const char *name);
void wlapi_free_timer(struct wlapi_timer *t);
void wlapi_add_timer(struct wlapi_timer *t, uint ms, int periodic);
diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
index 9eaf5ec133f9..075b705a8d7b 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
@@ -3432,6 +3432,10 @@ static const struct attribute_group il3945_attribute_group = {
};
static struct ieee80211_ops il3945_mac_ops __ro_after_init = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = il3945_mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = il3945_mac_start,
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
index 70e420df1643..4beb7be6d51d 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
@@ -6301,6 +6301,10 @@ il4965_tx_queue_set_status(struct il_priv *il, struct il_tx_queue *txq,
}
static const struct ieee80211_ops il4965_mac_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = il4965_mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = il4965_mac_start,
diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c
index 17570d62c896..9d33a66a49b5 100644
--- a/drivers/net/wireless/intel/iwlegacy/common.c
+++ b/drivers/net/wireless/intel/iwlegacy/common.c
@@ -3438,9 +3438,7 @@ il_init_geos(struct il_priv *il)
if (!channels)
return -ENOMEM;
- rates =
- kzalloc((sizeof(struct ieee80211_rate) * RATE_COUNT_LEGACY),
- GFP_KERNEL);
+ rates = kcalloc(RATE_COUNT_LEGACY, sizeof(*rates), GFP_KERNEL);
if (!rates) {
kfree(channels);
return -ENOMEM;
diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig
index 20971304fdef..4b04865fc2c9 100644
--- a/drivers/net/wireless/intel/iwlwifi/Kconfig
+++ b/drivers/net/wireless/intel/iwlwifi/Kconfig
@@ -46,6 +46,15 @@ config IWLWIFI
if IWLWIFI
+config IWLWIFI_KUNIT_TESTS
+ tristate
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Enable this option for iwlwifi kunit tests.
+
+ If unsure, say N.
+
config IWLWIFI_LEDS
bool
depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211
diff --git a/drivers/net/wireless/intel/iwlwifi/Makefile b/drivers/net/wireless/intel/iwlwifi/Makefile
index b983982aee45..8bb94a4c12cd 100644
--- a/drivers/net/wireless/intel/iwlwifi/Makefile
+++ b/drivers/net/wireless/intel/iwlwifi/Makefile
@@ -18,6 +18,7 @@ iwlwifi-objs += queue/tx.o
iwlwifi-objs += fw/img.o fw/notif-wait.o fw/rs.o
iwlwifi-objs += fw/dbg.o fw/pnvm.o fw/dump.o
+iwlwifi-objs += fw/regulatory.o
iwlwifi-$(CONFIG_IWLMVM) += fw/paging.o fw/smem.o fw/init.o
iwlwifi-$(CONFIG_ACPI) += fw/acpi.o
iwlwifi-$(CONFIG_EFI) += fw/uefi.o
@@ -33,4 +34,6 @@ obj-$(CONFIG_IWLDVM) += dvm/
obj-$(CONFIG_IWLMVM) += mvm/
obj-$(CONFIG_IWLMEI) += mei/
+obj-$(CONFIG_IWLWIFI_KUNIT_TESTS) += tests/
+
CFLAGS_iwl-devtrace.o := -I$(src)
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/ax210.c b/drivers/net/wireless/intel/iwlwifi/cfg/ax210.c
index 134635c70ce8..25952d0bea99 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/ax210.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/ax210.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/module.h>
#include <linux/stringify.h>
@@ -10,7 +10,7 @@
#include "fw/api/txq.h"
/* Highest firmware API version supported */
-#define IWL_AX210_UCODE_API_MAX 86
+#define IWL_AX210_UCODE_API_MAX 89
/* Lowest firmware API version supported */
#define IWL_AX210_UCODE_API_MIN 59
@@ -299,3 +299,9 @@ MODULE_FIRMWARE(IWL_MA_B_HR_B_FW_MODULE_FIRMWARE(IWL_AX210_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_MA_B_GF_A_FW_MODULE_FIRMWARE(IWL_AX210_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_MA_B_GF4_A_FW_MODULE_FIRMWARE(IWL_AX210_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_MA_B_MR_A_FW_MODULE_FIRMWARE(IWL_AX210_UCODE_API_MAX));
+
+MODULE_FIRMWARE("iwlwifi-so-a0-gf-a0.pnvm");
+MODULE_FIRMWARE("iwlwifi-so-a0-gf4-a0.pnvm");
+MODULE_FIRMWARE("iwlwifi-ty-a0-gf-a0.pnvm");
+MODULE_FIRMWARE("iwlwifi-ma-b0-gf-a0.pnvm");
+MODULE_FIRMWARE("iwlwifi-ma-b0-gf4-a0.pnvm");
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c
index 82da957adcf6..072b0a5827d1 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/module.h>
#include <linux/stringify.h>
@@ -10,7 +10,7 @@
#include "fw/api/txq.h"
/* Highest firmware API version supported */
-#define IWL_BZ_UCODE_API_MAX 86
+#define IWL_BZ_UCODE_API_MAX 90
/* Lowest firmware API version supported */
#define IWL_BZ_UCODE_API_MIN 80
@@ -129,10 +129,6 @@ static const struct iwl_base_params iwl_bz_base_params = {
IWL_DEVICE_BZ_COMMON, \
.ht_params = &iwl_22000_ht_params
-#define IWL_DEVICE_GL_A \
- IWL_DEVICE_BZ_COMMON, \
- .ht_params = &iwl_gl_a_ht_params
-
/*
* This size was picked according to 8 MSDUs inside 512 A-MSDUs in an
* A-MPDU, with additional overhead to account for processing time.
@@ -153,6 +149,7 @@ const struct iwl_cfg_trans_params iwl_bz_trans_cfg = {
};
const char iwl_bz_name[] = "Intel(R) TBD Bz device";
+const char iwl_mtp_name[] = "Intel(R) Wi-Fi 7 BE202 160MHz";
const struct iwl_cfg iwl_cfg_bz = {
.fw_name_mac = "bz",
@@ -179,3 +176,5 @@ MODULE_FIRMWARE(IWL_BZ_A_FM_C_MODULE_FIRMWARE(IWL_BZ_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_BZ_A_FM4_B_MODULE_FIRMWARE(IWL_BZ_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_GL_B_FM_B_MODULE_FIRMWARE(IWL_BZ_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_GL_C_FM_C_MODULE_FIRMWARE(IWL_BZ_UCODE_API_MAX));
+
+MODULE_FIRMWARE("iwlwifi-gl-c0-fm-c0.pnvm");
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c
index 80eb9b499538..9b79279fd76c 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/module.h>
#include <linux/stringify.h>
@@ -10,7 +10,7 @@
#include "fw/api/txq.h"
/* Highest firmware API version supported */
-#define IWL_SC_UCODE_API_MAX 86
+#define IWL_SC_UCODE_API_MAX 90
/* Lowest firmware API version supported */
#define IWL_SC_UCODE_API_MIN 82
@@ -33,6 +33,10 @@
#define IWL_SC_A_GF_A_FW_PRE "iwlwifi-sc-a0-gf-a0"
#define IWL_SC_A_GF4_A_FW_PRE "iwlwifi-sc-a0-gf4-a0"
#define IWL_SC_A_WH_A_FW_PRE "iwlwifi-sc-a0-wh-a0"
+#define IWL_SC2_A_FM_C_FW_PRE "iwlwifi-sc2-a0-fm-c0"
+#define IWL_SC2_A_WH_A_FW_PRE "iwlwifi-sc2-a0-wh-a0"
+#define IWL_SC2F_A_FM_C_FW_PRE "iwlwifi-sc2f-a0-fm-c0"
+#define IWL_SC2F_A_WH_A_FW_PRE "iwlwifi-sc2f-a0-wh-a0"
#define IWL_SC_A_FM_B_FW_MODULE_FIRMWARE(api) \
IWL_SC_A_FM_B_FW_PRE "-" __stringify(api) ".ucode"
@@ -48,6 +52,14 @@
IWL_SC_A_GF4_A_FW_PRE "-" __stringify(api) ".ucode"
#define IWL_SC_A_WH_A_FW_MODULE_FIRMWARE(api) \
IWL_SC_A_WH_A_FW_PRE "-" __stringify(api) ".ucode"
+#define IWL_SC2_A_FM_C_FW_MODULE_FIRMWARE(api) \
+ IWL_SC2_A_FM_C_FW_PRE "-" __stringify(api) ".ucode"
+#define IWL_SC2_A_WH_A_FW_MODULE_FIRMWARE(api) \
+ IWL_SC2_A_WH_A_FW_PRE "-" __stringify(api) ".ucode"
+#define IWL_SC2F_A_FM_C_FW_MODULE_FIRMWARE(api) \
+ IWL_SC2F_A_FM_C_FW_PRE "-" __stringify(api) ".ucode"
+#define IWL_SC2F_A_WH_A_FW_MODULE_FIRMWARE(api) \
+ IWL_SC2F_A_WH_A_FW_PRE "-" __stringify(api) ".ucode"
static const struct iwl_base_params iwl_sc_base_params = {
.eeprom_size = OTP_LOW_IMAGE_SIZE_32K,
@@ -124,6 +136,9 @@ static const struct iwl_base_params iwl_sc_base_params = {
#define IWL_DEVICE_SC \
IWL_DEVICE_BZ_COMMON, \
+ .uhb_supported = true, \
+ .features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM, \
+ .num_rbds = IWL_NUM_RBDS_SC_EHT, \
.ht_params = &iwl_22000_ht_params
/*
@@ -149,10 +164,21 @@ const char iwl_sc_name[] = "Intel(R) TBD Sc device";
const struct iwl_cfg iwl_cfg_sc = {
.fw_name_mac = "sc",
- .uhb_supported = true,
IWL_DEVICE_SC,
- .features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM,
- .num_rbds = IWL_NUM_RBDS_SC_EHT,
+};
+
+const char iwl_sc2_name[] = "Intel(R) TBD Sc2 device";
+
+const struct iwl_cfg iwl_cfg_sc2 = {
+ .fw_name_mac = "sc2",
+ IWL_DEVICE_SC,
+};
+
+const char iwl_sc2f_name[] = "Intel(R) TBD Sc2f device";
+
+const struct iwl_cfg iwl_cfg_sc2f = {
+ .fw_name_mac = "sc2f",
+ IWL_DEVICE_SC,
};
MODULE_FIRMWARE(IWL_SC_A_FM_B_FW_MODULE_FIRMWARE(IWL_SC_UCODE_API_MAX));
@@ -162,3 +188,7 @@ MODULE_FIRMWARE(IWL_SC_A_HR_B_FW_MODULE_FIRMWARE(IWL_SC_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_SC_A_GF_A_FW_MODULE_FIRMWARE(IWL_SC_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_SC_A_GF4_A_FW_MODULE_FIRMWARE(IWL_SC_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_SC_A_WH_A_FW_MODULE_FIRMWARE(IWL_SC_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_SC2_A_FM_C_FW_MODULE_FIRMWARE(IWL_SC_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_SC2_A_WH_A_FW_MODULE_FIRMWARE(IWL_SC_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_SC2F_A_FM_C_FW_MODULE_FIRMWARE(IWL_SC_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_SC2F_A_WH_A_FW_MODULE_FIRMWARE(IWL_SC_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c
index 5f3d5b15f727..52b008ce53bd 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c
@@ -1570,6 +1570,10 @@ static void iwlagn_mac_sta_notify(struct ieee80211_hw *hw,
}
const struct ieee80211_ops iwlagn_hw_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = iwlagn_mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = iwlagn_mac_start,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index b96f30d11644..4caf2e25a297 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -4,7 +4,6 @@
* Copyright (C) 2019-2023 Intel Corporation
*/
#include <linux/uuid.h>
-#include <linux/dmi.h>
#include "iwl-drv.h"
#include "iwl-debug.h"
#include "acpi.h"
@@ -13,68 +12,21 @@
const guid_t iwl_guid = GUID_INIT(0xF21202BF, 0x8F78, 0x4DC6,
0xA5, 0xB3, 0x1F, 0x73,
0x8E, 0x28, 0x5A, 0xDE);
-IWL_EXPORT_SYMBOL(iwl_guid);
-const guid_t iwl_rfi_guid = GUID_INIT(0x7266172C, 0x220B, 0x4B29,
- 0x81, 0x4F, 0x75, 0xE4,
- 0xDD, 0x26, 0xB5, 0xFD);
-IWL_EXPORT_SYMBOL(iwl_rfi_guid);
-
-static const struct dmi_system_id dmi_ppag_approved_list[] = {
- { .ident = "HP",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "HP"),
- },
- },
- { .ident = "SAMSUNG",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD"),
- },
- },
- { .ident = "MSFT",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- },
- },
- { .ident = "ASUS",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- },
- },
- { .ident = "GOOGLE-HP",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Google"),
- DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
- },
- },
- { .ident = "GOOGLE-ASUS",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Google"),
- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTek COMPUTER INC."),
- },
- },
- { .ident = "GOOGLE-SAMSUNG",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Google"),
- DMI_MATCH(DMI_BOARD_VENDOR, "SAMSUNG ELECTRONICS CO., LTD"),
- },
- },
- { .ident = "DELL",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- },
- },
- { .ident = "DELL",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
- },
- },
- { .ident = "RAZER",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Razer"),
- },
- },
- {}
+static const size_t acpi_dsm_size[DSM_FUNC_NUM_FUNCS] = {
+ [DSM_FUNC_QUERY] = sizeof(u32),
+ [DSM_FUNC_DISABLE_SRD] = sizeof(u8),
+ [DSM_FUNC_ENABLE_INDONESIA_5G2] = sizeof(u8),
+ [DSM_FUNC_ENABLE_6E] = sizeof(u32),
+ [DSM_FUNC_REGULATORY_CONFIG] = sizeof(u32),
+ /* Not supported in driver */
+ [5] = (size_t)0,
+ [DSM_FUNC_11AX_ENABLEMENT] = sizeof(u32),
+ [DSM_FUNC_ENABLE_UNII4_CHAN] = sizeof(u32),
+ [DSM_FUNC_ACTIVATE_CHANNEL] = sizeof(u32),
+ [DSM_FUNC_FORCE_DISABLE_CHANNELS] = sizeof(u32),
+ [DSM_FUNC_ENERGY_DETECTION_THRESHOLD] = sizeof(u32),
+ [DSM_FUNC_RFI_CONFIG] = sizeof(u32),
};
static int iwl_acpi_get_handle(struct device *dev, acpi_string method,
@@ -200,46 +152,41 @@ out:
}
/*
- * Evaluate a DSM with no arguments and a u8 return value,
+ * This function receives a DSM function number, calculates its expected size
+ * according to Intel BIOS spec, and fills in the value in a 32-bit field.
+ * In case the expected size is smaller than 32-bit, padding will be added.
*/
-int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func,
- const guid_t *guid, u8 *value)
+int iwl_acpi_get_dsm(struct iwl_fw_runtime *fwrt,
+ enum iwl_dsm_funcs func, u32 *value)
{
+ size_t expected_size;
+ u64 tmp;
int ret;
- u64 val;
- ret = iwl_acpi_get_dsm_integer(dev, rev, func,
- guid, &val, sizeof(u8));
+ BUILD_BUG_ON(ARRAY_SIZE(acpi_dsm_size) != DSM_FUNC_NUM_FUNCS);
- if (ret < 0)
- return ret;
+ if (WARN_ON(func >= ARRAY_SIZE(acpi_dsm_size)))
+ return -EINVAL;
- /* cast val (u64) to be u8 */
- *value = (u8)val;
- return 0;
-}
-IWL_EXPORT_SYMBOL(iwl_acpi_get_dsm_u8);
+ expected_size = acpi_dsm_size[func];
-/*
- * Evaluate a DSM with no arguments and a u32 return value,
- */
-int iwl_acpi_get_dsm_u32(struct device *dev, int rev, int func,
- const guid_t *guid, u32 *value)
-{
- int ret;
- u64 val;
-
- ret = iwl_acpi_get_dsm_integer(dev, rev, func,
- guid, &val, sizeof(u32));
+ /* Currently all ACPI DSMs are either 8-bit or 32-bit */
+ if (expected_size != sizeof(u8) && expected_size != sizeof(u32))
+ return -EOPNOTSUPP;
- if (ret < 0)
+ ret = iwl_acpi_get_dsm_integer(fwrt->dev, ACPI_DSM_REV, func,
+ &iwl_guid, &tmp, expected_size);
+ if (ret)
return ret;
- /* cast val (u64) to be u32 */
- *value = (u32)val;
+ if ((expected_size == sizeof(u8) && tmp != (u8)tmp) ||
+ (expected_size == sizeof(u32) && tmp != (u32)tmp))
+ IWL_DEBUG_RADIO(fwrt,
+ "DSM value overflows the expected size, truncating\n");
+ *value = (u32)tmp;
+
return 0;
}
-IWL_EXPORT_SYMBOL(iwl_acpi_get_dsm_u32);
static union acpi_object *
iwl_acpi_get_wifi_pkg_range(struct device *dev,
@@ -307,9 +254,8 @@ iwl_acpi_get_wifi_pkg(struct device *dev,
tbl_rev);
}
-
-int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
- union iwl_tas_config_cmd *cmd, int fw_ver)
+int iwl_acpi_get_tas_table(struct iwl_fw_runtime *fwrt,
+ struct iwl_tas_data *tas_data)
{
union acpi_object *wifi_pkg, *data;
int ret, tbl_rev, i, block_list_size, enabled;
@@ -331,22 +277,9 @@ int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
ACPI_TYPE_INTEGER) {
u32 tas_selection =
(u32)wifi_pkg->package.elements[1].integer.value;
- u16 override_iec =
- (tas_selection & ACPI_WTAS_OVERRIDE_IEC_MSK) >> ACPI_WTAS_OVERRIDE_IEC_POS;
- u16 enabled_iec = (tas_selection & ACPI_WTAS_ENABLE_IEC_MSK) >>
- ACPI_WTAS_ENABLE_IEC_POS;
- u8 usa_tas_uhb = (tas_selection & ACPI_WTAS_USA_UHB_MSK) >> ACPI_WTAS_USA_UHB_POS;
-
- enabled = tas_selection & ACPI_WTAS_ENABLED_MSK;
- if (fw_ver <= 3) {
- cmd->v3.override_tas_iec = cpu_to_le16(override_iec);
- cmd->v3.enable_tas_iec = cpu_to_le16(enabled_iec);
- } else {
- cmd->v4.usa_tas_uhb_allowed = usa_tas_uhb;
- cmd->v4.override_tas_iec = (u8)override_iec;
- cmd->v4.enable_tas_iec = (u8)enabled_iec;
- }
+ enabled = iwl_parse_tas_selection(fwrt, tas_data,
+ tas_selection);
} else if (tbl_rev == 0 &&
wifi_pkg->package.elements[1].type == ACPI_TYPE_INTEGER) {
@@ -365,22 +298,16 @@ int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
IWL_DEBUG_RADIO(fwrt, "Reading TAS table revision %d\n", tbl_rev);
if (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER ||
wifi_pkg->package.elements[2].integer.value >
- APCI_WTAS_BLACK_LIST_MAX) {
+ IWL_WTAS_BLACK_LIST_MAX) {
IWL_DEBUG_RADIO(fwrt, "TAS invalid array size %llu\n",
wifi_pkg->package.elements[2].integer.value);
ret = -EINVAL;
goto out_free;
}
block_list_size = wifi_pkg->package.elements[2].integer.value;
- cmd->v4.block_list_size = cpu_to_le32(block_list_size);
+ tas_data->block_list_size = cpu_to_le32(block_list_size);
IWL_DEBUG_RADIO(fwrt, "TAS array size %u\n", block_list_size);
- if (block_list_size > APCI_WTAS_BLACK_LIST_MAX) {
- IWL_DEBUG_RADIO(fwrt, "TAS invalid array size value %u\n",
- block_list_size);
- ret = -EINVAL;
- goto out_free;
- }
for (i = 0; i < block_list_size; i++) {
u32 country;
@@ -394,7 +321,7 @@ int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
}
country = wifi_pkg->package.elements[3 + i].integer.value;
- cmd->v4.block_list_array[i] = cpu_to_le32(country);
+ tas_data->block_list_array[i] = cpu_to_le32(country);
IWL_DEBUG_RADIO(fwrt, "TAS block list country %d\n", country);
}
@@ -403,19 +330,19 @@ out_free:
kfree(data);
return ret;
}
-IWL_EXPORT_SYMBOL(iwl_acpi_get_tas);
-int iwl_acpi_get_mcc(struct device *dev, char *mcc)
+int iwl_acpi_get_mcc(struct iwl_fw_runtime *fwrt, char *mcc)
{
union acpi_object *wifi_pkg, *data;
u32 mcc_val;
int ret, tbl_rev;
- data = iwl_acpi_get_object(dev, ACPI_WRDD_METHOD);
+ data = iwl_acpi_get_object(fwrt->dev, ACPI_WRDD_METHOD);
if (IS_ERR(data))
return PTR_ERR(data);
- wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_WRDD_WIFI_DATA_SIZE,
+ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+ ACPI_WRDD_WIFI_DATA_SIZE,
&tbl_rev);
if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
@@ -439,46 +366,42 @@ out_free:
kfree(data);
return ret;
}
-IWL_EXPORT_SYMBOL(iwl_acpi_get_mcc);
-u64 iwl_acpi_get_pwr_limit(struct device *dev)
+int iwl_acpi_get_pwr_limit(struct iwl_fw_runtime *fwrt, u64 *dflt_pwr_limit)
{
union acpi_object *data, *wifi_pkg;
- u64 dflt_pwr_limit;
- int tbl_rev;
+ int tbl_rev, ret = -EINVAL;
- data = iwl_acpi_get_object(dev, ACPI_SPLC_METHOD);
- if (IS_ERR(data)) {
- dflt_pwr_limit = 0;
+ *dflt_pwr_limit = 0;
+ data = iwl_acpi_get_object(fwrt->dev, ACPI_SPLC_METHOD);
+ if (IS_ERR(data))
goto out;
- }
- wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data,
+ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
ACPI_SPLC_WIFI_DATA_SIZE, &tbl_rev);
if (IS_ERR(wifi_pkg) || tbl_rev != 0 ||
- wifi_pkg->package.elements[1].integer.value != ACPI_TYPE_INTEGER) {
- dflt_pwr_limit = 0;
+ wifi_pkg->package.elements[1].integer.value != ACPI_TYPE_INTEGER)
goto out_free;
- }
- dflt_pwr_limit = wifi_pkg->package.elements[1].integer.value;
+ *dflt_pwr_limit = wifi_pkg->package.elements[1].integer.value;
+ ret = 0;
out_free:
kfree(data);
out:
- return dflt_pwr_limit;
+ return ret;
}
-IWL_EXPORT_SYMBOL(iwl_acpi_get_pwr_limit);
-int iwl_acpi_get_eckv(struct device *dev, u32 *extl_clk)
+int iwl_acpi_get_eckv(struct iwl_fw_runtime *fwrt, u32 *extl_clk)
{
union acpi_object *wifi_pkg, *data;
int ret, tbl_rev;
- data = iwl_acpi_get_object(dev, ACPI_ECKV_METHOD);
+ data = iwl_acpi_get_object(fwrt->dev, ACPI_ECKV_METHOD);
if (IS_ERR(data))
return PTR_ERR(data);
- wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_ECKV_WIFI_DATA_SIZE,
+ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+ ACPI_ECKV_WIFI_DATA_SIZE,
&tbl_rev);
if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
@@ -499,11 +422,11 @@ out_free:
kfree(data);
return ret;
}
-IWL_EXPORT_SYMBOL(iwl_acpi_get_eckv);
-static int iwl_sar_set_profile(union acpi_object *table,
- struct iwl_sar_profile *profile,
- bool enabled, u8 num_chains, u8 num_sub_bands)
+static int iwl_acpi_sar_set_profile(union acpi_object *table,
+ struct iwl_sar_profile *profile,
+ bool enabled, u8 num_chains,
+ u8 num_sub_bands)
{
int i, j, idx = 0;
@@ -511,8 +434,8 @@ static int iwl_sar_set_profile(union acpi_object *table,
* The table from ACPI is flat, but we store it in a
* structured array.
*/
- for (i = 0; i < ACPI_SAR_NUM_CHAINS_REV2; i++) {
- for (j = 0; j < ACPI_SAR_NUM_SUB_BANDS_REV2; j++) {
+ for (i = 0; i < BIOS_SAR_MAX_CHAINS_PER_PROFILE; i++) {
+ for (j = 0; j < BIOS_SAR_MAX_SUB_BANDS_NUM; j++) {
/* if we don't have the values, use the default */
if (i >= num_chains || j >= num_sub_bands) {
profile->chains[i].subbands[j] = 0;
@@ -535,73 +458,7 @@ static int iwl_sar_set_profile(union acpi_object *table,
return 0;
}
-static int iwl_sar_fill_table(struct iwl_fw_runtime *fwrt,
- __le16 *per_chain, u32 n_subbands,
- int prof_a, int prof_b)
-{
- int profs[ACPI_SAR_NUM_CHAINS_REV0] = { prof_a, prof_b };
- int i, j;
-
- for (i = 0; i < ACPI_SAR_NUM_CHAINS_REV0; i++) {
- struct iwl_sar_profile *prof;
-
- /* don't allow SAR to be disabled (profile 0 means disable) */
- if (profs[i] == 0)
- return -EPERM;
-
- /* we are off by one, so allow up to ACPI_SAR_PROFILE_NUM */
- if (profs[i] > ACPI_SAR_PROFILE_NUM)
- return -EINVAL;
-
- /* profiles go from 1 to 4, so decrement to access the array */
- prof = &fwrt->sar_profiles[profs[i] - 1];
-
- /* if the profile is disabled, do nothing */
- if (!prof->enabled) {
- IWL_DEBUG_RADIO(fwrt, "SAR profile %d is disabled.\n",
- profs[i]);
- /*
- * if one of the profiles is disabled, we
- * ignore all of them and return 1 to
- * differentiate disabled from other failures.
- */
- return 1;
- }
-
- IWL_DEBUG_INFO(fwrt,
- "SAR EWRD: chain %d profile index %d\n",
- i, profs[i]);
- IWL_DEBUG_RADIO(fwrt, " Chain[%d]:\n", i);
- for (j = 0; j < n_subbands; j++) {
- per_chain[i * n_subbands + j] =
- cpu_to_le16(prof->chains[i].subbands[j]);
- IWL_DEBUG_RADIO(fwrt, " Band[%d] = %d * .125dBm\n",
- j, prof->chains[i].subbands[j]);
- }
- }
-
- return 0;
-}
-
-int iwl_sar_select_profile(struct iwl_fw_runtime *fwrt,
- __le16 *per_chain, u32 n_tables, u32 n_subbands,
- int prof_a, int prof_b)
-{
- int i, ret = 0;
-
- for (i = 0; i < n_tables; i++) {
- ret = iwl_sar_fill_table(fwrt,
- &per_chain[i * n_subbands * ACPI_SAR_NUM_CHAINS_REV0],
- n_subbands, prof_a, prof_b);
- if (ret)
- break;
- }
-
- return ret;
-}
-IWL_EXPORT_SYMBOL(iwl_sar_select_profile);
-
-int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
+int iwl_acpi_get_wrds_table(struct iwl_fw_runtime *fwrt)
{
union acpi_object *wifi_pkg, *table, *data;
int ret, tbl_rev;
@@ -618,7 +475,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 2) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}
@@ -634,7 +491,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 1) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}
@@ -650,7 +507,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 0) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}
@@ -680,16 +537,15 @@ read_table:
/* The profile from WRDS is officially profile 1, but goes
* into sar_profiles[0] (because we don't have a profile 0).
*/
- ret = iwl_sar_set_profile(table, &fwrt->sar_profiles[0],
- flags & IWL_SAR_ENABLE_MSK,
- num_chains, num_sub_bands);
+ ret = iwl_acpi_sar_set_profile(table, &fwrt->sar_profiles[0],
+ flags & IWL_SAR_ENABLE_MSK,
+ num_chains, num_sub_bands);
out_free:
kfree(data);
return ret;
}
-IWL_EXPORT_SYMBOL(iwl_sar_get_wrds_table);
-int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
+int iwl_acpi_get_ewrd_table(struct iwl_fw_runtime *fwrt)
{
union acpi_object *wifi_pkg, *data;
bool enabled;
@@ -707,7 +563,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 2) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}
@@ -723,7 +579,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 1) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}
@@ -739,7 +595,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
&tbl_rev);
if (!IS_ERR(wifi_pkg)) {
if (tbl_rev != 0) {
- ret = PTR_ERR(wifi_pkg);
+ ret = -EINVAL;
goto out_free;
}
@@ -767,7 +623,7 @@ read_table:
* from index 1, so the maximum value allowed here is
* ACPI_SAR_PROFILES_NUM - 1.
*/
- if (n_profiles <= 0 || n_profiles >= ACPI_SAR_PROFILE_NUM) {
+ if (n_profiles >= BIOS_SAR_MAX_PROFILE_NUM) {
ret = -EINVAL;
goto out_free;
}
@@ -776,13 +632,15 @@ read_table:
pos = 3;
for (i = 0; i < n_profiles; i++) {
+ union acpi_object *table = &wifi_pkg->package.elements[pos];
/* The EWRD profiles officially go from 2 to 4, but we
* save them in sar_profiles[1-3] (because we don't
* have profile 0). So in the array we start from 1.
*/
- ret = iwl_sar_set_profile(&wifi_pkg->package.elements[pos],
- &fwrt->sar_profiles[i + 1], enabled,
- num_chains, num_sub_bands);
+ ret = iwl_acpi_sar_set_profile(table,
+ &fwrt->sar_profiles[i + 1],
+ enabled, num_chains,
+ num_sub_bands);
if (ret < 0)
break;
@@ -794,9 +652,8 @@ out_free:
kfree(data);
return ret;
}
-IWL_EXPORT_SYMBOL(iwl_sar_get_ewrd_table);
-int iwl_sar_get_wgds_table(struct iwl_fw_runtime *fwrt)
+int iwl_acpi_get_wgds_table(struct iwl_fw_runtime *fwrt)
{
union acpi_object *wifi_pkg, *data;
int i, j, k, ret, tbl_rev;
@@ -811,7 +668,7 @@ int iwl_sar_get_wgds_table(struct iwl_fw_runtime *fwrt)
.revisions = BIT(3),
.bands = ACPI_GEO_NUM_BANDS_REV2,
.profiles = ACPI_NUM_GEO_PROFILES_REV3,
- .min_profiles = 3,
+ .min_profiles = BIOS_GEO_MIN_PROFILE_NUM,
},
{
.revisions = BIT(2),
@@ -897,7 +754,7 @@ int iwl_sar_get_wgds_table(struct iwl_fw_runtime *fwrt)
read_table:
fwrt->geo_rev = tbl_rev;
for (i = 0; i < num_profiles; i++) {
- for (j = 0; j < ACPI_GEO_NUM_BANDS_REV2; j++) {
+ for (j = 0; j < BIOS_GEO_MAX_NUM_BANDS; j++) {
union acpi_object *entry;
/*
@@ -921,7 +778,7 @@ read_table:
entry->integer.value;
}
- for (k = 0; k < ACPI_GEO_NUM_CHAINS; k++) {
+ for (k = 0; k < BIOS_GEO_NUM_CHAINS; k++) {
/* same here as above */
if (j >= num_bands) {
fwrt->geo_profiles[i].bands[j].chains[k] =
@@ -949,151 +806,26 @@ out_free:
kfree(data);
return ret;
}
-IWL_EXPORT_SYMBOL(iwl_sar_get_wgds_table);
-
-bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt)
-{
- /*
- * The PER_CHAIN_LIMIT_OFFSET_CMD command is not supported on
- * earlier firmware versions. Unfortunately, we don't have a
- * TLV API flag to rely on, so rely on the major version which
- * is in the first byte of ucode_ver. This was implemented
- * initially on version 38 and then backported to 17. It was
- * also backported to 29, but only for 7265D devices. The
- * intention was to have it in 36 as well, but not all 8000
- * family got this feature enabled. The 8000 family is the
- * only one using version 36, so skip this version entirely.
- */
- return IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) >= 38 ||
- (IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 17 &&
- fwrt->trans->hw_rev != CSR_HW_REV_TYPE_3160) ||
- (IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 29 &&
- ((fwrt->trans->hw_rev & CSR_HW_REV_TYPE_MSK) ==
- CSR_HW_REV_TYPE_7265D));
-}
-IWL_EXPORT_SYMBOL(iwl_sar_geo_support);
-
-int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
- struct iwl_per_chain_offset *table,
- u32 n_bands, u32 n_profiles)
-{
- int i, j;
-
- if (!fwrt->geo_enabled)
- return -ENODATA;
-
- if (!iwl_sar_geo_support(fwrt))
- return -EOPNOTSUPP;
-
- for (i = 0; i < n_profiles; i++) {
- for (j = 0; j < n_bands; j++) {
- struct iwl_per_chain_offset *chain =
- &table[i * n_bands + j];
-
- chain->max_tx_power =
- cpu_to_le16(fwrt->geo_profiles[i].bands[j].max);
- chain->chain_a = fwrt->geo_profiles[i].bands[j].chains[0];
- chain->chain_b = fwrt->geo_profiles[i].bands[j].chains[1];
- IWL_DEBUG_RADIO(fwrt,
- "SAR geographic profile[%d] Band[%d]: chain A = %d chain B = %d max_tx_power = %d\n",
- i, j,
- fwrt->geo_profiles[i].bands[j].chains[0],
- fwrt->geo_profiles[i].bands[j].chains[1],
- fwrt->geo_profiles[i].bands[j].max);
- }
- }
-
- return 0;
-}
-IWL_EXPORT_SYMBOL(iwl_sar_geo_init);
-
-__le32 iwl_acpi_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt)
-{
- int ret;
- u8 value;
- u32 val;
- __le32 config_bitmap = 0;
-
- /*
- * Evaluate func 'DSM_FUNC_ENABLE_INDONESIA_5G2'.
- * Setting config_bitmap Indonesia bit is valid only for HR/JF.
- */
- switch (CSR_HW_RFID_TYPE(fwrt->trans->hw_rf_id)) {
- case IWL_CFG_RF_TYPE_HR1:
- case IWL_CFG_RF_TYPE_HR2:
- case IWL_CFG_RF_TYPE_JF1:
- case IWL_CFG_RF_TYPE_JF2:
- ret = iwl_acpi_get_dsm_u8(fwrt->dev, 0,
- DSM_FUNC_ENABLE_INDONESIA_5G2,
- &iwl_guid, &value);
-
- if (!ret && value == DSM_VALUE_INDONESIA_ENABLE)
- config_bitmap |=
- cpu_to_le32(LARI_CONFIG_ENABLE_5G2_IN_INDONESIA_MSK);
- break;
- default:
- break;
- }
-
- /*
- ** Evaluate func 'DSM_FUNC_DISABLE_SRD'
- */
- ret = iwl_acpi_get_dsm_u8(fwrt->dev, 0,
- DSM_FUNC_DISABLE_SRD,
- &iwl_guid, &value);
- if (!ret) {
- if (value == DSM_VALUE_SRD_PASSIVE)
- config_bitmap |=
- cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_PASSIVE_MSK);
- else if (value == DSM_VALUE_SRD_DISABLE)
- config_bitmap |=
- cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_DISABLED_MSK);
- }
-
- if (fw_has_capa(&fwrt->fw->ucode_capa,
- IWL_UCODE_TLV_CAPA_CHINA_22_REG_SUPPORT)) {
- /*
- ** Evaluate func 'DSM_FUNC_REGULATORY_CONFIG'
- */
- ret = iwl_acpi_get_dsm_u32(fwrt->dev, 0,
- DSM_FUNC_REGULATORY_CONFIG,
- &iwl_guid, &val);
- /*
- * China 2022 enable if the BIOS object does not exist or
- * if it is enabled in BIOS.
- */
- if (ret < 0 || val & DSM_MASK_CHINA_22_REG)
- config_bitmap |=
- cpu_to_le32(LARI_CONFIG_ENABLE_CHINA_22_REG_SUPPORT_MSK);
- }
-
- return config_bitmap;
-}
-IWL_EXPORT_SYMBOL(iwl_acpi_get_lari_config_bitmap);
int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt)
{
union acpi_object *wifi_pkg, *data, *flags;
int i, j, ret, tbl_rev, num_sub_bands = 0;
int idx = 2;
- u8 cmd_ver;
-
- fwrt->ppag_flags = 0;
- fwrt->ppag_table_valid = false;
data = iwl_acpi_get_object(fwrt->dev, ACPI_PPAG_METHOD);
if (IS_ERR(data))
return PTR_ERR(data);
- /* try to read ppag table rev 2 or 1 (both have the same data size) */
+ /* try to read ppag table rev 3, 2 or 1 (all have the same data size) */
wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
ACPI_PPAG_WIFI_DATA_SIZE_V2, &tbl_rev);
if (!IS_ERR(wifi_pkg)) {
- if (tbl_rev == 1 || tbl_rev == 2) {
+ if (tbl_rev >= 1 && tbl_rev <= 3) {
num_sub_bands = IWL_NUM_SUB_BANDS_V2;
IWL_DEBUG_RADIO(fwrt,
- "Reading PPAG table v2 (tbl_rev=%d)\n",
+ "Reading PPAG table (tbl_rev=%d)\n",
tbl_rev);
goto read_table;
} else {
@@ -1116,6 +848,9 @@ int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt)
goto read_table;
}
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+
read_table:
fwrt->ppag_ver = tbl_rev;
flags = &wifi_pkg->package.elements[1];
@@ -1125,19 +860,8 @@ read_table:
goto out_free;
}
- fwrt->ppag_flags = flags->integer.value & ACPI_PPAG_MASK;
- cmd_ver = iwl_fw_lookup_cmd_ver(fwrt->fw,
- WIDE_ID(PHY_OPS_GROUP,
- PER_PLATFORM_ANT_GAIN_CMD),
- IWL_FW_CMD_VER_UNKNOWN);
- if (cmd_ver == IWL_FW_CMD_VER_UNKNOWN) {
- ret = -EINVAL;
- goto out_free;
- }
- if (!fwrt->ppag_flags && cmd_ver <= 3) {
- ret = 0;
- goto out_free;
- }
+ fwrt->ppag_flags = iwl_bios_get_ppag_flags(flags->integer.value,
+ fwrt->ppag_ver);
/*
* read, verify gain values and save them into the PPAG table.
@@ -1155,132 +879,15 @@ read_table:
}
fwrt->ppag_chains[i].subbands[j] = ent->integer.value;
- /* from ver 4 the fw deals with out of range values */
- if (cmd_ver >= 4)
- continue;
- if ((j == 0 &&
- (fwrt->ppag_chains[i].subbands[j] > ACPI_PPAG_MAX_LB ||
- fwrt->ppag_chains[i].subbands[j] < ACPI_PPAG_MIN_LB)) ||
- (j != 0 &&
- (fwrt->ppag_chains[i].subbands[j] > ACPI_PPAG_MAX_HB ||
- fwrt->ppag_chains[i].subbands[j] < ACPI_PPAG_MIN_HB))) {
- ret = -EINVAL;
- goto out_free;
- }
}
}
- fwrt->ppag_table_valid = true;
ret = 0;
out_free:
kfree(data);
return ret;
}
-IWL_EXPORT_SYMBOL(iwl_acpi_get_ppag_table);
-
-int iwl_read_ppag_table(struct iwl_fw_runtime *fwrt, union iwl_ppag_table_cmd *cmd,
- int *cmd_size)
-{
- u8 cmd_ver;
- int i, j, num_sub_bands;
- s8 *gain;
-
- /* many firmware images for JF lie about this */
- if (CSR_HW_RFID_TYPE(fwrt->trans->hw_rf_id) ==
- CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_JF))
- return -EOPNOTSUPP;
-
- if (!fw_has_capa(&fwrt->fw->ucode_capa, IWL_UCODE_TLV_CAPA_SET_PPAG)) {
- IWL_DEBUG_RADIO(fwrt,
- "PPAG capability not supported by FW, command not sent.\n");
- return -EINVAL;
- }
-
- cmd_ver = iwl_fw_lookup_cmd_ver(fwrt->fw,
- WIDE_ID(PHY_OPS_GROUP,
- PER_PLATFORM_ANT_GAIN_CMD),
- IWL_FW_CMD_VER_UNKNOWN);
- if (!fwrt->ppag_table_valid || (cmd_ver <= 3 && !fwrt->ppag_flags)) {
- IWL_DEBUG_RADIO(fwrt, "PPAG not enabled, command not sent.\n");
- return -EINVAL;
- }
-
- /* The 'flags' field is the same in v1 and in v2 so we can just
- * use v1 to access it.
- */
- cmd->v1.flags = cpu_to_le32(fwrt->ppag_flags);
-
- IWL_DEBUG_RADIO(fwrt, "PPAG cmd ver is %d\n", cmd_ver);
- if (cmd_ver == 1) {
- num_sub_bands = IWL_NUM_SUB_BANDS_V1;
- gain = cmd->v1.gain[0];
- *cmd_size = sizeof(cmd->v1);
- if (fwrt->ppag_ver == 1 || fwrt->ppag_ver == 2) {
- /* in this case FW supports revision 0 */
- IWL_DEBUG_RADIO(fwrt,
- "PPAG table rev is %d, send truncated table\n",
- fwrt->ppag_ver);
- }
- } else if (cmd_ver >= 2 && cmd_ver <= 4) {
- num_sub_bands = IWL_NUM_SUB_BANDS_V2;
- gain = cmd->v2.gain[0];
- *cmd_size = sizeof(cmd->v2);
- if (fwrt->ppag_ver == 0) {
- /* in this case FW supports revisions 1 or 2 */
- IWL_DEBUG_RADIO(fwrt,
- "PPAG table rev is 0, send padded table\n");
- }
- } else {
- IWL_DEBUG_RADIO(fwrt, "Unsupported PPAG command version\n");
- return -EINVAL;
- }
-
- /* ppag mode */
- IWL_DEBUG_RADIO(fwrt,
- "PPAG MODE bits were read from bios: %d\n",
- cmd->v1.flags & cpu_to_le32(ACPI_PPAG_MASK));
- if ((cmd_ver == 1 && !fw_has_capa(&fwrt->fw->ucode_capa,
- IWL_UCODE_TLV_CAPA_PPAG_CHINA_BIOS_SUPPORT)) ||
- (cmd_ver == 2 && fwrt->ppag_ver == 2)) {
- cmd->v1.flags &= cpu_to_le32(IWL_PPAG_ETSI_MASK);
- IWL_DEBUG_RADIO(fwrt, "masking ppag China bit\n");
- } else {
- IWL_DEBUG_RADIO(fwrt, "isn't masking ppag China bit\n");
- }
-
- IWL_DEBUG_RADIO(fwrt,
- "PPAG MODE bits going to be sent: %d\n",
- cmd->v1.flags & cpu_to_le32(ACPI_PPAG_MASK));
-
- for (i = 0; i < IWL_NUM_CHAIN_LIMITS; i++) {
- for (j = 0; j < num_sub_bands; j++) {
- gain[i * num_sub_bands + j] =
- fwrt->ppag_chains[i].subbands[j];
- IWL_DEBUG_RADIO(fwrt,
- "PPAG table: chain[%d] band[%d]: gain = %d\n",
- i, j, gain[i * num_sub_bands + j]);
- }
- }
-
- return 0;
-}
-IWL_EXPORT_SYMBOL(iwl_read_ppag_table);
-
-bool iwl_acpi_is_ppag_approved(struct iwl_fw_runtime *fwrt)
-{
-
- if (!dmi_check_system(dmi_ppag_approved_list)) {
- IWL_DEBUG_RADIO(fwrt,
- "System vendor '%s' is not in the approved list, disabling PPAG.\n",
- dmi_get_system_info(DMI_SYS_VENDOR));
- fwrt->ppag_flags = 0;
- return false;
- }
-
- return true;
-}
-IWL_EXPORT_SYMBOL(iwl_acpi_is_ppag_approved);
void iwl_acpi_get_phy_filters(struct iwl_fw_runtime *fwrt,
struct iwl_phy_specific_cfg *filters)
@@ -1293,7 +900,6 @@ void iwl_acpi_get_phy_filters(struct iwl_fw_runtime *fwrt,
if (IS_ERR(data))
return;
- /* try to read wtas table revision 1 or revision 0*/
wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
ACPI_WPFC_WIFI_DATA_SIZE,
&tbl_rev);
@@ -1303,13 +909,14 @@ void iwl_acpi_get_phy_filters(struct iwl_fw_runtime *fwrt,
if (tbl_rev != 0)
goto out_free;
- BUILD_BUG_ON(ARRAY_SIZE(filters->filter_cfg_chains) != ACPI_WPFC_WIFI_DATA_SIZE);
+ BUILD_BUG_ON(ARRAY_SIZE(filters->filter_cfg_chains) !=
+ ACPI_WPFC_WIFI_DATA_SIZE - 1);
for (i = 0; i < ARRAY_SIZE(filters->filter_cfg_chains); i++) {
- if (wifi_pkg->package.elements[i].type != ACPI_TYPE_INTEGER)
- return;
+ if (wifi_pkg->package.elements[i + 1].type != ACPI_TYPE_INTEGER)
+ goto out_free;
tmp.filter_cfg_chains[i] =
- cpu_to_le32(wifi_pkg->package.elements[i].integer.value);
+ cpu_to_le32(wifi_pkg->package.elements[i + 1].integer.value);
}
IWL_DEBUG_RADIO(fwrt, "Loaded WPFC filter config from ACPI\n");
@@ -1318,3 +925,38 @@ out_free:
kfree(data);
}
IWL_EXPORT_SYMBOL(iwl_acpi_get_phy_filters);
+
+void iwl_acpi_get_guid_lock_status(struct iwl_fw_runtime *fwrt)
+{
+ union acpi_object *wifi_pkg, *data;
+ int tbl_rev;
+
+ data = iwl_acpi_get_object(fwrt->dev, ACPI_GLAI_METHOD);
+ if (IS_ERR(data))
+ return;
+
+ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+ ACPI_GLAI_WIFI_DATA_SIZE,
+ &tbl_rev);
+ if (IS_ERR(wifi_pkg))
+ goto out_free;
+
+ if (tbl_rev != 0) {
+ IWL_DEBUG_RADIO(fwrt, "Invalid GLAI revision: %d\n", tbl_rev);
+ goto out_free;
+ }
+
+ if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+ wifi_pkg->package.elements[1].integer.value > ACPI_GLAI_MAX_STATUS)
+ goto out_free;
+
+ fwrt->uefi_tables_lock_status =
+ wifi_pkg->package.elements[1].integer.value;
+
+ IWL_DEBUG_RADIO(fwrt,
+ "Loaded UEFI WIFI GUID lock status: %d from ACPI\n",
+ fwrt->uefi_tables_lock_status);
+out_free:
+ kfree(data);
+}
+IWL_EXPORT_SYMBOL(iwl_acpi_get_guid_lock_status);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index e9277f6f3582..1d32b82f73db 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -7,6 +7,7 @@
#define __iwl_fw_acpi__
#include <linux/acpi.h>
+#include "fw/regulatory.h"
#include "fw/api/commands.h"
#include "fw/api/power.h"
#include "fw/api/phy.h"
@@ -25,6 +26,7 @@
#define ACPI_PPAG_METHOD "PPAG"
#define ACPI_WTAS_METHOD "WTAS"
#define ACPI_WPFC_METHOD "WPFC"
+#define ACPI_GLAI_METHOD "GLAI"
#define ACPI_WIFI_DOMAIN (0x07)
@@ -56,187 +58,90 @@
#define ACPI_EWRD_WIFI_DATA_SIZE_REV2 ((ACPI_SAR_PROFILE_NUM - 1) * \
ACPI_SAR_NUM_CHAINS_REV2 * \
ACPI_SAR_NUM_SUB_BANDS_REV2 + 3)
-#define ACPI_WPFC_WIFI_DATA_SIZE 4 /* 4 filter config words */
+#define ACPI_WPFC_WIFI_DATA_SIZE 5 /* domain and 4 filter config words */
/* revision 0 and 1 are identical, except for the semantics in the FW */
#define ACPI_GEO_NUM_BANDS_REV0 2
#define ACPI_GEO_NUM_BANDS_REV2 3
-#define ACPI_GEO_NUM_CHAINS 2
#define ACPI_WRDD_WIFI_DATA_SIZE 2
#define ACPI_SPLC_WIFI_DATA_SIZE 2
#define ACPI_ECKV_WIFI_DATA_SIZE 2
-
+/*
+ * One element for domain type,
+ * and one for the status
+ */
+#define ACPI_GLAI_WIFI_DATA_SIZE 2
+#define ACPI_GLAI_MAX_STATUS 2
/*
* TAS size: 1 elelment for type,
* 1 element for enabled field,
* 1 element for block list size,
* 16 elements for block list array
*/
-#define APCI_WTAS_BLACK_LIST_MAX 16
-#define ACPI_WTAS_WIFI_DATA_SIZE (3 + APCI_WTAS_BLACK_LIST_MAX)
-#define ACPI_WTAS_ENABLED_MSK 0x1
-#define ACPI_WTAS_OVERRIDE_IEC_MSK 0x2
-#define ACPI_WTAS_ENABLE_IEC_MSK 0x4
-#define ACPI_WTAS_OVERRIDE_IEC_POS 0x1
-#define ACPI_WTAS_ENABLE_IEC_POS 0x2
-#define ACPI_WTAS_USA_UHB_MSK BIT(16)
-#define ACPI_WTAS_USA_UHB_POS 16
-
+#define ACPI_WTAS_WIFI_DATA_SIZE (3 + IWL_WTAS_BLACK_LIST_MAX)
#define ACPI_PPAG_WIFI_DATA_SIZE_V1 ((IWL_NUM_CHAIN_LIMITS * \
IWL_NUM_SUB_BANDS_V1) + 2)
#define ACPI_PPAG_WIFI_DATA_SIZE_V2 ((IWL_NUM_CHAIN_LIMITS * \
IWL_NUM_SUB_BANDS_V2) + 2)
-/* PPAG gain value bounds in 1/8 dBm */
-#define ACPI_PPAG_MIN_LB -16
-#define ACPI_PPAG_MAX_LB 24
-#define ACPI_PPAG_MIN_HB -16
-#define ACPI_PPAG_MAX_HB 40
-#define ACPI_PPAG_MASK 3
-#define IWL_PPAG_ETSI_MASK BIT(0)
-
#define IWL_SAR_ENABLE_MSK BIT(0)
#define IWL_REDUCE_POWER_FLAGS_POS 1
-/*
- * The profile for revision 2 is a superset of revision 1, which is in
- * turn a superset of revision 0. So we can store all revisions
- * inside revision 2, which is what we represent here.
- */
-struct iwl_sar_profile_chain {
- u8 subbands[ACPI_SAR_NUM_SUB_BANDS_REV2];
-};
-
-struct iwl_sar_profile {
- bool enabled;
- struct iwl_sar_profile_chain chains[ACPI_SAR_NUM_CHAINS_REV2];
-};
-
-/* Same thing as with SAR, all revisions fit in revision 2 */
-struct iwl_geo_profile_band {
- u8 max;
- u8 chains[ACPI_GEO_NUM_CHAINS];
-};
-
-struct iwl_geo_profile {
- struct iwl_geo_profile_band bands[ACPI_GEO_NUM_BANDS_REV2];
-};
-
-/* Same thing as with SAR, all revisions fit in revision 2 */
-struct iwl_ppag_chain {
- s8 subbands[ACPI_SAR_NUM_SUB_BANDS_REV2];
-};
-
-enum iwl_dsm_funcs_rev_0 {
- DSM_FUNC_QUERY = 0,
- DSM_FUNC_DISABLE_SRD = 1,
- DSM_FUNC_ENABLE_INDONESIA_5G2 = 2,
- DSM_FUNC_ENABLE_6E = 3,
- DSM_FUNC_REGULATORY_CONFIG = 4,
- DSM_FUNC_11AX_ENABLEMENT = 6,
- DSM_FUNC_ENABLE_UNII4_CHAN = 7,
- DSM_FUNC_ACTIVATE_CHANNEL = 8,
- DSM_FUNC_FORCE_DISABLE_CHANNELS = 9,
- DSM_FUNC_ENERGY_DETECTION_THRESHOLD = 10,
-};
-
-enum iwl_dsm_values_srd {
- DSM_VALUE_SRD_ACTIVE,
- DSM_VALUE_SRD_PASSIVE,
- DSM_VALUE_SRD_DISABLE,
- DSM_VALUE_SRD_MAX
-};
-
-enum iwl_dsm_values_indonesia {
- DSM_VALUE_INDONESIA_DISABLE,
- DSM_VALUE_INDONESIA_ENABLE,
- DSM_VALUE_INDONESIA_RESERVED,
- DSM_VALUE_INDONESIA_MAX
-};
-
-/* DSM RFI uses a different GUID, so need separate definitions */
-
-#define DSM_RFI_FUNC_ENABLE 3
-
-enum iwl_dsm_values_rfi {
- DSM_VALUE_RFI_ENABLE,
- DSM_VALUE_RFI_DISABLE,
- DSM_VALUE_RFI_MAX
-};
-
-enum iwl_dsm_masks_reg {
- DSM_MASK_CHINA_22_REG = BIT(2)
-};
+/* The Inidcator whether UEFI WIFI GUID tables are locked is read from ACPI */
+#define UEFI_WIFI_GUID_UNLOCKED 0
+
+#define ACPI_DSM_REV 0
#ifdef CONFIG_ACPI
struct iwl_fw_runtime;
extern const guid_t iwl_guid;
-extern const guid_t iwl_rfi_guid;
-
-int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func,
- const guid_t *guid, u8 *value);
-
-int iwl_acpi_get_dsm_u32(struct device *dev, int rev, int func,
- const guid_t *guid, u32 *value);
/**
* iwl_acpi_get_mcc - read MCC from ACPI, if available
*
- * @dev: the struct device
+ * @fwrt: the fw runtime struct
* @mcc: output buffer (3 bytes) that will get the MCC
*
* This function tries to read the current MCC from ACPI if available.
*/
-int iwl_acpi_get_mcc(struct device *dev, char *mcc);
+int iwl_acpi_get_mcc(struct iwl_fw_runtime *fwrt, char *mcc);
-u64 iwl_acpi_get_pwr_limit(struct device *dev);
+int iwl_acpi_get_pwr_limit(struct iwl_fw_runtime *fwrt, u64 *dflt_pwr_limit);
/*
* iwl_acpi_get_eckv - read external clock validation from ACPI, if available
*
- * @dev: the struct device
+ * @fwrt: the fw runtime struct
* @extl_clk: output var (2 bytes) that will get the clk indication.
*
* This function tries to read the external clock indication
* from ACPI if available.
*/
-int iwl_acpi_get_eckv(struct device *dev, u32 *extl_clk);
-
-int iwl_sar_select_profile(struct iwl_fw_runtime *fwrt,
- __le16 *per_chain, u32 n_tables, u32 n_subbands,
- int prof_a, int prof_b);
+int iwl_acpi_get_eckv(struct iwl_fw_runtime *fwrt, u32 *extl_clk);
-int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt);
+int iwl_acpi_get_wrds_table(struct iwl_fw_runtime *fwrt);
-int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt);
+int iwl_acpi_get_ewrd_table(struct iwl_fw_runtime *fwrt);
-int iwl_sar_get_wgds_table(struct iwl_fw_runtime *fwrt);
+int iwl_acpi_get_wgds_table(struct iwl_fw_runtime *fwrt);
-bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt);
-
-int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
- struct iwl_per_chain_offset *table,
- u32 n_bands, u32 n_profiles);
-
-int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
- union iwl_tas_config_cmd *cmd, int fw_ver);
-
-__le32 iwl_acpi_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt);
+int iwl_acpi_get_tas_table(struct iwl_fw_runtime *fwrt,
+ struct iwl_tas_data *data);
int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt);
-int iwl_read_ppag_table(struct iwl_fw_runtime *fwrt, union iwl_ppag_table_cmd *cmd,
- int *cmd_size);
-
-bool iwl_acpi_is_ppag_approved(struct iwl_fw_runtime *fwrt);
-
void iwl_acpi_get_phy_filters(struct iwl_fw_runtime *fwrt,
struct iwl_phy_specific_cfg *filters);
+void iwl_acpi_get_guid_lock_status(struct iwl_fw_runtime *fwrt);
+
+int iwl_acpi_get_dsm(struct iwl_fw_runtime *fwrt,
+ enum iwl_dsm_funcs func, u32 *value);
+
#else /* CONFIG_ACPI */
static inline void *iwl_acpi_get_dsm_object(struct device *dev, int rev,
@@ -245,92 +150,61 @@ static inline void *iwl_acpi_get_dsm_object(struct device *dev, int rev,
return ERR_PTR(-ENOENT);
}
-static inline int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func,
- const guid_t *guid, u8 *value)
-{
- return -ENOENT;
-}
-
-static inline int iwl_acpi_get_dsm_u32(struct device *dev, int rev, int func,
- const guid_t *guid, u32 *value)
-{
- return -ENOENT;
-}
-
-static inline int iwl_acpi_get_mcc(struct device *dev, char *mcc)
+static inline int iwl_acpi_get_mcc(struct iwl_fw_runtime *fwrt, char *mcc)
{
return -ENOENT;
}
-static inline u64 iwl_acpi_get_pwr_limit(struct device *dev)
+static inline int iwl_acpi_get_pwr_limit(struct iwl_fw_runtime *fwrt,
+ u64 *dflt_pwr_limit)
{
+ *dflt_pwr_limit = 0;
return 0;
}
-static inline int iwl_acpi_get_eckv(struct device *dev, u32 *extl_clk)
+static inline int iwl_acpi_get_eckv(struct iwl_fw_runtime *fwrt, u32 *extl_clk)
{
return -ENOENT;
}
-static inline int iwl_sar_select_profile(struct iwl_fw_runtime *fwrt,
- __le16 *per_chain, u32 n_tables, u32 n_subbands,
- int prof_a, int prof_b)
+static inline int iwl_acpi_get_wrds_table(struct iwl_fw_runtime *fwrt)
{
return -ENOENT;
}
-static inline int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
+static inline int iwl_acpi_get_ewrd_table(struct iwl_fw_runtime *fwrt)
{
return -ENOENT;
}
-static inline int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
-{
- return -ENOENT;
-}
-
-static inline int iwl_sar_get_wgds_table(struct iwl_fw_runtime *fwrt)
+static inline int iwl_acpi_get_wgds_table(struct iwl_fw_runtime *fwrt)
{
return 1;
}
-static inline bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt)
-{
- return false;
-}
-
-static inline int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
- union iwl_tas_config_cmd *cmd, int fw_ver)
+static inline int iwl_acpi_get_tas_table(struct iwl_fw_runtime *fwrt,
+ struct iwl_tas_data *data)
{
return -ENOENT;
}
-static inline __le32 iwl_acpi_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt)
-{
- return 0;
-}
-
static inline int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt)
{
return -ENOENT;
}
-static inline int iwl_read_ppag_table(struct iwl_fw_runtime *fwrt,
- union iwl_ppag_table_cmd *cmd, int *cmd_size)
-{
- return -ENOENT;
-}
+/* macro since the second argument doesn't always exist */
+#define iwl_acpi_get_phy_filters(fwrt, filters) do { } while (0)
-static inline bool iwl_acpi_is_ppag_approved(struct iwl_fw_runtime *fwrt)
+static inline void iwl_acpi_get_guid_lock_status(struct iwl_fw_runtime *fwrt)
{
- return false;
}
-static inline void iwl_acpi_get_phy_filters(struct iwl_fw_runtime *fwrt,
- struct iwl_phy_specific_cfg *filters)
+static inline int iwl_acpi_get_dsm(struct iwl_fw_runtime *fwrt,
+ enum iwl_dsm_funcs func, u32 *value)
{
+ return -ENOENT;
}
-
#endif /* CONFIG_ACPI */
#endif /* __iwl_fw_acpi__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h b/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h
index 3e81e9369224..bc27e15488f5 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
+ * Copyright (C) 2023 Intel Corporation
* Copyright (C) 2013-2014, 2018-2019 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2017 Intel Deutschland GmbH
@@ -170,7 +171,11 @@ enum iwl_bt_ci_compliance {
* @bt_activity_grading: the activity of BT &enum iwl_bt_activity_grading
* @ttc_status: is TTC enabled - one bit per PHY
* @rrc_status: is RRC enabled - one bit per PHY
- * @reserved: reserved
+ * The following fields are only for version 5, and are reserved in version 4:
+ * @wifi_loss_low_rssi: The predicted lost WiFi rate (% of air time that BT is
+ * utilizing) when the RSSI is low (<= -65 dBm)
+ * @wifi_loss_mid_high_rssi: The predicted lost WiFi rate (% of air time that
+ * BT is utilizing) when the RSSI is mid/high (>= -65 dBm)
*/
struct iwl_bt_coex_profile_notif {
__le32 mbox_msg[4];
@@ -182,7 +187,10 @@ struct iwl_bt_coex_profile_notif {
__le32 bt_activity_grading;
u8 ttc_status;
u8 rrc_status;
- __le16 reserved;
-} __packed; /* BT_COEX_PROFILE_NTFY_API_S_VER_4 */
+ u8 wifi_loss_low_rssi;
+ u8 wifi_loss_mid_high_rssi;
+} __packed; /* BT_COEX_PROFILE_NTFY_API_S_VER_4
+ * BT_COEX_PROFILE_NTFY_API_S_VER_5
+ */
#endif /* __iwl_fw_api_coex_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
index ea99d41040d2..d2a74beed3a1 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
@@ -324,7 +324,7 @@ struct iwl_wowlan_patterns_cmd {
u8 n_patterns;
/**
- * @n_patterns: sta_id
+ * @sta_id: sta_id
*/
u8 sta_id;
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h
index 751b596ea1a5..0f7903c5a4df 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h
@@ -101,7 +101,7 @@ enum iwl_data_path_subcmd_ids {
RX_NO_DATA_NOTIF = 0xF5,
/**
- * @THERMAL_DUAL_CHAIN_DISABLE_REQ: firmware request for SMPS mode,
+ * @THERMAL_DUAL_CHAIN_REQUEST: firmware request for SMPS mode,
* &struct iwl_thermal_dual_chain_request
*/
THERMAL_DUAL_CHAIN_REQUEST = 0xF6,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
index 394747deb269..47c914de2992 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#ifndef __iwl_fw_dbg_tlv_h__
#define __iwl_fw_dbg_tlv_h__
@@ -319,7 +319,7 @@ struct iwl_fw_ini_conf_set_tlv {
* @IWL_FW_INI_CONFIG_SET_TYPE_CSR: for CSR configuration
* @IWL_FW_INI_CONFIG_SET_TYPE_DBGC_DRAM_ADDR: for DBGC_DRAM_ADDR configuration
* @IWL_FW_INI_CONFIG_SET_TYPE_PERIPH_SCRATCH_HWM: for PERIPH SCRATCH HWM configuration
- * @IWL_FW_INI_ALLOCATION_NUM: max number of configuration supported
+ * @IWL_FW_INI_CONFIG_SET_TYPE_MAX_NUM: max number of configuration supported
*/
enum iwl_fw_ini_config_set_type {
@@ -360,6 +360,7 @@ enum iwl_fw_ini_allocation_id {
* @IWL_FW_INI_LOCATION_SRAM_PATH: SRAM location
* @IWL_FW_INI_LOCATION_DRAM_PATH: DRAM location
* @IWL_FW_INI_LOCATION_NPK_PATH: NPK location
+ * @IWL_FW_INI_LOCATION_NUM: number of valid locations
*/
enum iwl_fw_ini_buffer_location {
IWL_FW_INI_LOCATION_INVALID,
@@ -439,6 +440,7 @@ enum iwl_fw_ini_region_device_memory_subtype {
* Hard coded time points in which the driver can send hcmd or perform dump
* collection
*
+ * @IWL_FW_INI_TIME_POINT_INVALID: invalid timepoint
* @IWL_FW_INI_TIME_POINT_EARLY: pre loading the FW
* @IWL_FW_INI_TIME_POINT_AFTER_ALIVE: first cmd from host after alive notif
* @IWL_FW_INI_TIME_POINT_POST_INIT: last cmd in series of init sequence
@@ -553,7 +555,7 @@ enum iwl_fw_ini_dump_policy {
* enum iwl_fw_ini_dump_type - Determines dump type based on size defined by FW.
*
* @IWL_FW_INI_DUMP_BRIEF : only dump the most important regions
- * @IWL_FW_INI_DEBUG_MEDIUM: dump more regions than "brief", but not all regions
+ * @IWL_FW_INI_DUMP_MEDIUM: dump more regions than "brief", but not all regions
* @IWL_FW_INI_DUMP_VERBOSE : dump all regions
*/
enum iwl_fw_ini_dump_type {
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
index 798731ecbefd..b31ae6889bd0 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
@@ -394,7 +394,7 @@ struct iwl_buf_alloc_cmd {
*
* @first_word: magic word value
* @second_word: magic word value
- * @framfrags: DRAM fragmentaion detail
+ * @dram_frags: DRAM fragmentaion detail
*/
struct iwl_dram_info {
__le32 first_word;
@@ -537,7 +537,7 @@ enum iwl_fw_dbg_config_cmd_type {
}; /* LDBG_CFG_CMD_TYPE_API_E_VER_1 */
/* this token disables debug asserts in the firmware */
-#define IWL_FW_DBG_CONFIG_TOKEN 0x00011301
+#define IWL_FW_DBG_CONFIG_TOKEN 0x00010001
/**
* struct iwl_fw_dbg_config_cmd - configure FW debug
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
index b044990c7b87..25530a29317e 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
@@ -630,6 +630,7 @@ enum iwl_location_frame_format {
* @IWL_LOCATION_BW_20MHZ: 20MHz
* @IWL_LOCATION_BW_40MHZ: 40MHz
* @IWL_LOCATION_BW_80MHZ: 80MHz
+ * @IWL_LOCATION_BW_160MHZ: 160MHz
*/
enum iwl_location_bw {
IWL_LOCATION_BW_20MHZ,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h
index f15e6d64c298..c6d1f5644638 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h
@@ -242,9 +242,9 @@ struct iwl_mac_low_latency_cmd {
* @esr_transition_timeout: the timeout required by the AP for the
* eSR transition.
* Available only from version 2 of the command.
- * This values comes from the EMLSR transition delay in the EML
+ * This value comes from the EMLSR transition delay in the EML
* Capabilities subfield.
- * @medium_sync_delay: the value as it appeasr in P802.11be_D2.2 Figure 9-1002j.
+ * @medium_sync_delay: the value as it appears in P802.11be_D2.2 Figure 9-1002j.
* @assoc_id: unique ID assigned by the AP during association
* @reserved1: alignment
* @data_policy: see &enum iwl_mac_data_policy
@@ -317,7 +317,6 @@ enum iwl_mac_config_filter_flags {
* If the NIC is not ACK_ENABLED it may use the EOF-bit in first non-0
* len delim to determine if AGG or single.
* @client: client mac data
- * @go_ibss: mac data for go or ibss
* @p2p_dev: mac data for p2p device
*/
struct iwl_mac_config_cmd {
@@ -374,7 +373,7 @@ struct iwl_mac_config_cmd {
* iwl_link_ctx_cfg_cmd::bss_color_disable
* @LINK_CONTEXT_MODIFY_EHT_PARAMS: covers iwl_link_ctx_cfg_cmd::puncture_mask.
* This flag can be set only if the MAC that this link relates to has
- * eht_support set to true.
+ * eht_support set to true. No longer used since _VER_3 of this command.
* @LINK_CONTEXT_MODIFY_ALL: set all above flags
*/
enum iwl_link_ctx_modify_flags {
@@ -447,6 +446,7 @@ enum iwl_link_ctx_flags {
* @listen_lmac: indicates whether the link should be allocated on the Listen
* Lmac or on the Main Lmac. Cannot be changed on an active Link.
* Relevant only for eSR.
+ * @reserved1: in version 2, listen_lmac became reserved
* @cck_rates: basic rates available for CCK
* @ofdm_rates: basic rates available for OFDM
* @cck_short_preamble: 1 for enabling short preamble, 0 otherwise
@@ -462,7 +462,7 @@ enum iwl_link_ctx_flags {
* @bi: beacon interval in TU, applicable only when associated
* @dtim_interval: DTIM interval in TU.
* Relevant only for GO, otherwise this is offloaded.
- * @puncture_mask: puncture mask for EHT
+ * @puncture_mask: puncture mask for EHT (removed in VER_3)
* @frame_time_rts_th: HE duration RTS threshold, in units of 32us
* @flags: a combination from &enum iwl_link_ctx_flags
* @flags_mask: what of %flags have changed. Also &enum iwl_link_ctx_flags
@@ -472,10 +472,10 @@ enum iwl_link_ctx_flags {
* @bssid_index: index of the associated VAP
* @bss_color: 11ax AP ID that is used in the HE SIG-A to mark inter BSS frame
* @spec_link_id: link_id as the AP knows it
- * @reserved: alignment
+ * @reserved2: alignment
* @ibss_bssid_addr: bssid for ibss
* @reserved_for_ibss_bssid_addr: reserved
- * @reserved1: reserved for future use
+ * @reserved3: reserved for future use
*/
struct iwl_link_config_cmd {
__le32 action;
@@ -486,7 +486,10 @@ struct iwl_link_config_cmd {
__le16 reserved_for_local_link_addr;
__le32 modify_mask;
__le32 active;
- __le32 listen_lmac;
+ union {
+ __le32 listen_lmac;
+ __le32 reserved1;
+ };
__le32 cck_rates;
__le32 ofdm_rates;
__le32 cck_short_preamble;
@@ -502,7 +505,7 @@ struct iwl_link_config_cmd {
struct iwl_he_backoff_conf trig_based_txf[AC_NUM];
__le32 bi;
__le32 dtim_interval;
- __le16 puncture_mask;
+ __le16 puncture_mask; /* removed in _VER_3 */
__le16 frame_time_rts_th;
__le32 flags;
__le32 flags_mask;
@@ -512,11 +515,11 @@ struct iwl_link_config_cmd {
u8 bssid_index;
u8 bss_color;
u8 spec_link_id;
- u8 reserved;
+ u8 reserved2;
u8 ibss_bssid_addr[6];
__le16 reserved_for_ibss_bssid_addr;
- __le32 reserved1[8];
-} __packed; /* LINK_CONTEXT_CONFIG_CMD_API_S_VER_1 */
+ __le32 reserved3[8];
+} __packed; /* LINK_CONTEXT_CONFIG_CMD_API_S_VER_1, _VER_2, _VER_3 */
/* Currently FW supports link ids in the range 0-3 and can have
* at most two active links for each vif.
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
index 55882190251c..545826973a80 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2012-2014, 2018-2022 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2022, 2024 Intel Corporation
* Copyright (C) 2017 Intel Deutschland GmbH
*/
#ifndef __iwl_fw_api_mac_h__
@@ -431,8 +431,8 @@ enum iwl_he_pkt_ext_constellations {
};
#define MAX_HE_SUPP_NSS 2
-#define MAX_CHANNEL_BW_INDX_API_D_VER_2 4
-#define MAX_CHANNEL_BW_INDX_API_D_VER_3 5
+#define MAX_CHANNEL_BW_INDX_API_D_VER_1 4
+#define MAX_CHANNEL_BW_INDX_API_D_VER_2 5
/**
* struct iwl_he_pkt_ext_v1 - QAM thresholds
@@ -455,7 +455,7 @@ enum iwl_he_pkt_ext_constellations {
* (0-low_th, 1-high_th)
*/
struct iwl_he_pkt_ext_v1 {
- u8 pkt_ext_qam_th[MAX_HE_SUPP_NSS][MAX_CHANNEL_BW_INDX_API_D_VER_2][2];
+ u8 pkt_ext_qam_th[MAX_HE_SUPP_NSS][MAX_CHANNEL_BW_INDX_API_D_VER_1][2];
} __packed; /* PKT_EXT_DOT11AX_API_S_VER_1 */
/**
@@ -480,7 +480,7 @@ struct iwl_he_pkt_ext_v1 {
* (0-low_th, 1-high_th)
*/
struct iwl_he_pkt_ext_v2 {
- u8 pkt_ext_qam_th[MAX_HE_SUPP_NSS][MAX_CHANNEL_BW_INDX_API_D_VER_3][2];
+ u8 pkt_ext_qam_th[MAX_HE_SUPP_NSS][MAX_CHANNEL_BW_INDX_API_D_VER_2][2];
} __packed; /* PKT_EXT_DOT11AX_API_S_VER_2 */
/**
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
index 7ec959244ffc..58034dfa7e70 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
@@ -1,12 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
#ifndef __iwl_fw_api_nvm_reg_h__
#define __iwl_fw_api_nvm_reg_h__
+#include "fw/regulatory.h"
/**
* enum iwl_regulatory_and_nvm_subcmd_ids - regulatory/NVM commands
*/
@@ -438,36 +439,30 @@ enum iwl_mcc_source {
MCC_SOURCE_GETTING_MCC_TEST_MODE = 0x11,
};
-#define IWL_TAS_BLOCK_LIST_MAX 16
/**
- * struct iwl_tas_config_cmd_v2 - configures the TAS
+ * struct iwl_tas_config_cmd_common - configures the TAS.
+ * This is also the v2 structure.
* @block_list_size: size of relevant field in block_list_array
* @block_list_array: list of countries where TAS must be disabled
*/
-struct iwl_tas_config_cmd_v2 {
+struct iwl_tas_config_cmd_common {
__le32 block_list_size;
- __le32 block_list_array[IWL_TAS_BLOCK_LIST_MAX];
+ __le32 block_list_array[IWL_WTAS_BLACK_LIST_MAX];
} __packed; /* TAS_CONFIG_CMD_API_S_VER_2 */
/**
* struct iwl_tas_config_cmd_v3 - configures the TAS
- * @block_list_size: size of relevant field in block_list_array
- * @block_list_array: list of countries where TAS must be disabled
* @override_tas_iec: indicates whether to override default value of IEC regulatory
* @enable_tas_iec: in case override_tas_iec is set -
* indicates whether IEC regulatory is enabled or disabled
*/
struct iwl_tas_config_cmd_v3 {
- __le32 block_list_size;
- __le32 block_list_array[IWL_TAS_BLOCK_LIST_MAX];
__le16 override_tas_iec;
__le16 enable_tas_iec;
} __packed; /* TAS_CONFIG_CMD_API_S_VER_3 */
/**
- * struct iwl_tas_config_cmd_v3 - configures the TAS
- * @block_list_size: size of relevant field in block_list_array
- * @block_list_array: list of countries where TAS must be disabled
+ * struct iwl_tas_config_cmd_v4 - configures the TAS
* @override_tas_iec: indicates whether to override default value of IEC regulatory
* @enable_tas_iec: in case override_tas_iec is set -
* indicates whether IEC regulatory is enabled or disabled
@@ -475,19 +470,20 @@ struct iwl_tas_config_cmd_v3 {
* @reserved: reserved
*/
struct iwl_tas_config_cmd_v4 {
- __le32 block_list_size;
- __le32 block_list_array[IWL_TAS_BLOCK_LIST_MAX];
u8 override_tas_iec;
u8 enable_tas_iec;
u8 usa_tas_uhb_allowed;
u8 reserved;
} __packed; /* TAS_CONFIG_CMD_API_S_VER_4 */
-union iwl_tas_config_cmd {
- struct iwl_tas_config_cmd_v2 v2;
- struct iwl_tas_config_cmd_v3 v3;
- struct iwl_tas_config_cmd_v4 v4;
+struct iwl_tas_config_cmd {
+ struct iwl_tas_config_cmd_common common;
+ union {
+ struct iwl_tas_config_cmd_v3 v3;
+ struct iwl_tas_config_cmd_v4 v4;
+ };
};
+
/**
* enum iwl_lari_config_masks - bit masks for the various LARI config operations
* @LARI_CONFIG_DISABLE_11AC_UKRAINE_MSK: disable 11ac in ukraine
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h b/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h
index 306ed88de463..08a2c416ce60 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h
@@ -142,6 +142,8 @@ struct iwl_phy_context_cmd_v1 {
* @lmac_id: the lmac id the phy context belongs to
* @ci: channel info
* @rxchain_info: ???
+ * @sbb_bandwidth: 0 disabled, 1 - 40Mhz ... 4 - 320MHz
+ * @sbb_ctrl_channel_loc: location of the control channel
* @dsp_cfg_flags: set to 0
* @reserved: reserved to align to 64 bit
*/
@@ -152,9 +154,20 @@ struct iwl_phy_context_cmd {
/* PHY_CONTEXT_DATA_API_S_VER_3, PHY_CONTEXT_DATA_API_S_VER_4 */
struct iwl_fw_channel_info ci;
__le32 lmac_id;
- __le32 rxchain_info; /* reserved in _VER_4 */
+ union {
+ __le32 rxchain_info; /* reserved in _VER_4 */
+ struct { /* used for _VER_5/_VER_6 */
+ u8 sbb_bandwidth;
+ u8 sbb_ctrl_channel_loc;
+ __le16 puncture_mask; /* added in VER_6 */
+ };
+ };
__le32 dsp_cfg_flags;
__le32 reserved;
-} __packed; /* PHY_CONTEXT_CMD_API_VER_3, PHY_CONTEXT_CMD_API_VER_4 */
+} __packed; /* PHY_CONTEXT_CMD_API_VER_3,
+ * PHY_CONTEXT_CMD_API_VER_4,
+ * PHY_CONTEXT_CMD_API_VER_5,
+ * PHY_CONTEXT_CMD_API_VER_6
+ */
#endif /* __iwl_fw_api_phy_ctxt_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
index 040d83fa5424..0bf38243f88a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
@@ -506,13 +506,40 @@ struct iwl_geo_tx_power_profiles_resp {
} __packed; /* PER_CHAIN_LIMIT_OFFSET_RSP */
/**
+ * enum iwl_ppag_flags - PPAG enable masks
+ * @IWL_PPAG_ETSI_MASK: enable PPAG in ETSI
+ * @IWL_PPAG_CHINA_MASK: enable PPAG in China
+ * @IWL_PPAG_ETSI_LPI_UHB_MASK: enable LPI in ETSI for UHB
+ * @IWL_PPAG_ETSI_VLP_UHB_MASK: enable VLP in ETSI for UHB
+ * @IWL_PPAG_ETSI_SP_UHB_MASK: enable SP in ETSI for UHB
+ * @IWL_PPAG_USA_LPI_UHB_MASK: enable LPI in USA for UHB
+ * @IWL_PPAG_USA_VLP_UHB_MASK: enable VLP in USA for UHB
+ * @IWL_PPAG_USA_SP_UHB_MASK: enable SP in USA for UHB
+ * @IWL_PPAG_CANADA_LPI_UHB_MASK: enable LPI in CANADA for UHB
+ * @IWL_PPAG_CANADA_VLP_UHB_MASK: enable VLP in CANADA for UHB
+ * @IWL_PPAG_CANADA_SP_UHB_MASK: enable SP in CANADA for UHB
+ */
+enum iwl_ppag_flags {
+ IWL_PPAG_ETSI_MASK = BIT(0),
+ IWL_PPAG_CHINA_MASK = BIT(1),
+ IWL_PPAG_ETSI_LPI_UHB_MASK = BIT(2),
+ IWL_PPAG_ETSI_VLP_UHB_MASK = BIT(3),
+ IWL_PPAG_ETSI_SP_UHB_MASK = BIT(4),
+ IWL_PPAG_USA_LPI_UHB_MASK = BIT(5),
+ IWL_PPAG_USA_VLP_UHB_MASK = BIT(6),
+ IWL_PPAG_USA_SP_UHB_MASK = BIT(7),
+ IWL_PPAG_CANADA_LPI_UHB_MASK = BIT(8),
+ IWL_PPAG_CANADA_VLP_UHB_MASK = BIT(9),
+ IWL_PPAG_CANADA_SP_UHB_MASK = BIT(10),
+};
+
+/**
* union iwl_ppag_table_cmd - union for all versions of PPAG command
* @v1: version 1
* @v2: version 2
- *
- * @flags: bit 0 - indicates enablement of PPAG for ETSI
- * bit 1 - indicates enablement of PPAG for CHINA BIOS
- * bit 1 can be used only in v3 (identical to v2)
+ * version 3, 4 and 5 are the same structure as v2,
+ * but has a different format of the flags bitmap
+ * @flags: values from &enum iwl_ppag_flags
* @gain: table of antenna gain values per chain and sub-band
* @reserved: reserved
*/
@@ -529,6 +556,11 @@ union iwl_ppag_table_cmd {
} v2;
} __packed;
+#define IWL_PPAG_CMD_V4_MASK (IWL_PPAG_ETSI_MASK | IWL_PPAG_CHINA_MASK)
+#define IWL_PPAG_CMD_V5_MASK (IWL_PPAG_CMD_V4_MASK | \
+ IWL_PPAG_ETSI_LPI_UHB_MASK | \
+ IWL_PPAG_USA_LPI_UHB_MASK)
+
#define MCC_TO_SAR_OFFSET_TABLE_ROW_SIZE 26
#define MCC_TO_SAR_OFFSET_TABLE_COL_SIZE 13
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
index d62fed543276..d7f8a276b683 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021, 2023 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -109,6 +109,7 @@ enum iwl_sta_flags {
* @STA_KEY_FLG_EN_MSK: mask for encryption algorithmi value
* @STA_KEY_FLG_WEP_KEY_MAP: wep is either a group key (0 - legacy WEP) or from
* station info array (1 - n 1X mode)
+ * @STA_KEY_FLG_AMSDU_SPP: SPP (signaling and payload protected) A-MSDU
* @STA_KEY_FLG_KEYID_MSK: the index of the key
* @STA_KEY_FLG_KEYID_POS: key index bit position
* @STA_KEY_NOT_VALID: key is invalid
@@ -129,6 +130,7 @@ enum iwl_sta_key_flag {
STA_KEY_FLG_EN_MSK = (7 << 0),
STA_KEY_FLG_WEP_KEY_MAP = BIT(3),
+ STA_KEY_FLG_AMSDU_SPP = BIT(7),
STA_KEY_FLG_KEYID_POS = 8,
STA_KEY_FLG_KEYID_MSK = (3 << STA_KEY_FLG_KEYID_POS),
STA_KEY_NOT_VALID = BIT(11),
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h
index 842360b1e995..d9e4c75403b8 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h
@@ -76,6 +76,8 @@ enum iwl_tx_flags {
* to a secured STA
* @IWL_TX_FLAGS_HIGH_PRI: high priority frame (like EAPOL) - can affect rate
* selection, retry limits and BT kill
+ * @IWL_TX_FLAGS_RTS: firmware used an RTS
+ * @IWL_TX_FLAGS_CTS: firmware used CTS-to-self
*/
enum iwl_tx_cmd_flags {
IWL_TX_FLAGS_CMD_RATE = BIT(0),
@@ -884,6 +886,7 @@ struct iwl_tx_path_flush_cmd {
/**
* struct iwl_flush_queue_info - virtual flush queue info
+ * @tid: the tid to flush
* @queue_num: virtual queue id
* @read_before_flush: read pointer before flush
* @read_after_flush: read pointer after flush
@@ -897,6 +900,7 @@ struct iwl_flush_queue_info {
/**
* struct iwl_tx_path_flush_cmd_rsp -- queue/FIFO flush command response
+ * @sta_id: the station for which the queue was flushed
* @num_flushed_queues: number of queues in queues array
* @queues: all flushed queues
*/
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
index 9c69d3674384..e6c0f928a6bb 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2005-2014, 2019-2021, 2023 Intel Corporation
+ * Copyright (C) 2005-2014, 2019-2021, 2023-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -66,6 +66,16 @@ enum iwl_gen2_tx_fifo {
IWL_GEN2_TRIG_TX_FIFO_VO,
};
+enum iwl_bz_tx_fifo {
+ IWL_BZ_EDCA_TX_FIFO_BK,
+ IWL_BZ_EDCA_TX_FIFO_BE,
+ IWL_BZ_EDCA_TX_FIFO_VI,
+ IWL_BZ_EDCA_TX_FIFO_VO,
+ IWL_BZ_TRIG_TX_FIFO_BK,
+ IWL_BZ_TRIG_TX_FIFO_BE,
+ IWL_BZ_TRIG_TX_FIFO_VI,
+ IWL_BZ_TRIG_TX_FIFO_VO,
+};
/**
* enum iwl_tx_queue_cfg_actions - TXQ config options
* @TX_QUEUE_CFG_ENABLE_QUEUE: enable a queue
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index e27774e7ed74..db6d7013df66 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2005-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
*/
@@ -19,7 +19,6 @@
* @fwrt_ptr: pointer to the buffer coming from fwrt
* @trans_ptr: pointer to struct %iwl_trans_dump_data which contains the
* transport's data.
- * @trans_len: length of the valid data in trans_ptr
* @fwrt_len: length of the valid data in fwrt_ptr
*/
struct iwl_fw_dump_ptrs {
@@ -1728,10 +1727,12 @@ iwl_dump_ini_mem_fill_header(struct iwl_fw_runtime *fwrt,
/**
* mask_apply_and_normalize - applies mask on val and normalize the result
*
- * The normalization is based on the first set bit in the mask
- *
* @val: value
* @mask: mask to apply and to normalize with
+ *
+ * The normalization is based on the first set bit in the mask
+ *
+ * Returns: the extracted value
*/
static u32 mask_apply_and_normalize(u32 val, u32 mask)
{
@@ -2200,15 +2201,16 @@ struct iwl_dump_ini_mem_ops {
};
/**
- * iwl_dump_ini_mem
- *
- * Creates a dump tlv and copy a memory region into it.
- * Returns the size of the current dump tlv or 0 if failed
+ * iwl_dump_ini_mem - dump memory region
*
* @fwrt: fw runtime struct
* @list: list to add the dump tlv to
* @reg_data: memory region
* @ops: memory dump operations
+ *
+ * Creates a dump tlv and copy a memory region into it.
+ *
+ * Returns: the size of the current dump tlv or 0 if failed
*/
static u32 iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt, struct list_head *list,
struct iwl_dump_ini_region_data *reg_data,
@@ -2427,9 +2429,12 @@ static u32 iwl_dump_ini_info(struct iwl_fw_runtime *fwrt,
struct iwl_fw_ini_debug_info_tlv *debug_info =
(void *)node->tlv.data;
+ BUILD_BUG_ON(sizeof(cfg_name->cfg_name) !=
+ sizeof(debug_info->debug_cfg_name));
+
cfg_name->image_type = debug_info->image_type;
cfg_name->cfg_name_len =
- cpu_to_le32(IWL_FW_INI_MAX_CFG_NAME);
+ cpu_to_le32(sizeof(cfg_name->cfg_name));
memcpy(cfg_name->cfg_name, debug_info->debug_cfg_name,
sizeof(cfg_name->cfg_name));
cfg_name++;
@@ -2873,7 +2878,8 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt,
IWL_WARN(fwrt, "Collecting data: trigger %d fired.\n",
le32_to_cpu(desc->trig_desc.type));
- schedule_delayed_work(&wk_data->wk, usecs_to_jiffies(delay));
+ queue_delayed_work(system_unbound_wq, &wk_data->wk,
+ usecs_to_jiffies(delay));
return 0;
}
@@ -3175,7 +3181,9 @@ int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt,
if (sync)
iwl_fw_dbg_collect_sync(fwrt, idx);
else
- schedule_delayed_work(&fwrt->dump.wks[idx].wk, usecs_to_jiffies(delay));
+ queue_delayed_work(system_unbound_wq,
+ &fwrt->dump.wks[idx].wk,
+ usecs_to_jiffies(delay));
return 0;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
index eb38c686b5cb..98d56e778d99 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
@@ -306,8 +306,6 @@ static inline void iwl_fw_error_collect(struct iwl_fw_runtime *fwrt, bool sync)
_iwl_dbg_tlv_time_point(fwrt, tp_id, NULL, sync);
}
-void iwl_fw_error_print_fseq_regs(struct iwl_fw_runtime *fwrt);
-
static inline void iwl_fwrt_update_fw_versions(struct iwl_fw_runtime *fwrt,
struct iwl_lmac_alive *lmac,
struct iwl_umac_alive *umac)
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
index 06d6f7f66430..5c76e3b94968 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2014, 2018-2022 Intel Corporation
+ * Copyright (C) 2014, 2018-2024 Intel Corporation
* Copyright (C) 2014-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -16,7 +16,7 @@
/**
* enum iwl_fw_error_dump_type - types of data in the dump file
* @IWL_FW_ERROR_DUMP_CSR: Control Status Registers - from offset 0
- * @IWL_FW_ERROR_DUMP_RXF:
+ * @IWL_FW_ERROR_DUMP_RXF: RX FIFO contents
* @IWL_FW_ERROR_DUMP_TXCMD: last TX command data, structured as
* &struct iwl_fw_error_dump_txcmd packets
* @IWL_FW_ERROR_DUMP_DEV_FW_INFO: struct %iwl_fw_error_dump_info
@@ -24,21 +24,24 @@
* @IWL_FW_ERROR_DUMP_FW_MONITOR: firmware monitor
* @IWL_FW_ERROR_DUMP_PRPH: range of periphery registers - there can be several
* sections like this in a single file.
+ * @IWL_FW_ERROR_DUMP_TXF: TX FIFO contents
* @IWL_FW_ERROR_DUMP_FH_REGS: range of FH registers
* @IWL_FW_ERROR_DUMP_MEM: chunk of memory
* @IWL_FW_ERROR_DUMP_ERROR_INFO: description of what triggered this dump.
* Structured as &struct iwl_fw_error_dump_trigger_desc.
* @IWL_FW_ERROR_DUMP_RB: the content of an RB structured as
* &struct iwl_fw_error_dump_rb
- * @IWL_FW_ERROR_PAGING: UMAC's image memory segments which were
+ * @IWL_FW_ERROR_DUMP_PAGING: UMAC's image memory segments which were
* paged to the DRAM.
* @IWL_FW_ERROR_DUMP_RADIO_REG: Dump the radio registers.
+ * @IWL_FW_ERROR_DUMP_INTERNAL_TXF: internal TX FIFO data
* @IWL_FW_ERROR_DUMP_EXTERNAL: used only by external code utilities, and
* for that reason is not in use in any other place in the Linux Wi-Fi
* stack.
* @IWL_FW_ERROR_DUMP_MEM_CFG: the addresses and sizes of fifos in the smem,
* which we get from the fw after ALIVE. The content is structured as
* &struct iwl_fw_error_dump_smem_cfg.
+ * @IWL_FW_ERROR_DUMP_D3_DEBUG_DATA: D3 debug data
*/
enum iwl_fw_error_dump_type {
/* 0 is deprecated */
@@ -59,8 +62,6 @@ enum iwl_fw_error_dump_type {
IWL_FW_ERROR_DUMP_EXTERNAL = 15, /* Do not move */
IWL_FW_ERROR_DUMP_MEM_CFG = 16,
IWL_FW_ERROR_DUMP_D3_DEBUG_DATA = 17,
-
- IWL_FW_ERROR_DUMP_MAX,
};
/**
@@ -442,7 +443,7 @@ struct iwl_fw_ini_err_table_dump {
* struct iwl_fw_error_dump_rb - content of an Receive Buffer
* @index: the index of the Receive Buffer in the Rx queue
* @rxq: the RB's Rx queue
- * @reserved:
+ * @reserved: reserved
* @data: the content of the Receive Buffer
*/
struct iwl_fw_error_dump_rb {
@@ -488,7 +489,7 @@ struct iwl_fw_ini_special_device_memory {
* struct iwl_fw_error_dump_paging - content of the UMAC's image page
* block on DRAM
* @index: the index of the page block
- * @reserved:
+ * @reserved: reserved
* @data: the content of the page block
*/
struct iwl_fw_error_dump_paging {
@@ -511,6 +512,7 @@ iwl_fw_error_next_data(struct iwl_fw_error_dump_data *data)
/**
* enum iwl_fw_dbg_trigger - triggers available
*
+ * @FW_DBG_TRIGGER_INVALID: invalid trigger value
* @FW_DBG_TRIGGER_USER: trigger log collection by user
* This should not be defined as a trigger to the driver, but a value the
* driver should set to indicate that the trigger was initiated by the
@@ -530,14 +532,15 @@ iwl_fw_error_next_data(struct iwl_fw_error_dump_data *data)
* @FW_DBG_TRIGGER_TIME_EVENT: trigger log collection upon time events related
* events.
* @FW_DBG_TRIGGER_BA: trigger log collection upon BlockAck related events.
- * @FW_DBG_TX_LATENCY: trigger log collection when the tx latency goes above a
- * threshold.
- * @FW_DBG_TDLS: trigger log collection upon TDLS related events.
+ * @FW_DBG_TRIGGER_TX_LATENCY: trigger log collection when the tx latency
+ * goes above a threshold.
+ * @FW_DBG_TRIGGER_TDLS: trigger log collection upon TDLS related events.
* @FW_DBG_TRIGGER_TX_STATUS: trigger log collection upon tx status when
* the firmware sends a tx reply.
* @FW_DBG_TRIGGER_ALIVE_TIMEOUT: trigger log collection if alive flow timeouts
* @FW_DBG_TRIGGER_DRIVER: trigger log collection upon a flow failure
* in the driver.
+ * @FW_DBG_TRIGGER_MAX: beyond triggers, number for sizing arrays etc.
*/
enum iwl_fw_dbg_trigger {
FW_DBG_TRIGGER_INVALID = 0,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index bfc39bd5bbc6..f69d29e531c8 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2008-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2008-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -216,6 +216,7 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t;
* ADD_MODIFY_STA_KEY_API_S_VER_2.
* @IWL_UCODE_TLV_API_STA_TYPE: This ucode supports station type assignement.
* @IWL_UCODE_TLV_API_NAN2_VER2: This ucode supports NAN API version 2
+ * @IWL_UCODE_TLV_API_ADAPTIVE_DWELL: support for adaptive dwell in scanning
* @IWL_UCODE_TLV_API_NEW_RX_STATS: should new RX STATISTICS API be used
* @IWL_UCODE_TLV_API_QUOTA_LOW_LATENCY: Quota command includes a field
* indicating low latency direction.
@@ -239,14 +240,21 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t;
* SCAN_OFFLOAD_PROFILES_QUERY_RSP_S.
* @IWL_UCODE_TLV_API_MBSSID_HE: This ucode supports v2 of
* STA_CONTEXT_DOT11AX_API_S
+ * @IWL_UCODE_TLV_API_FTM_RTT_ACCURACY: version 7 of the range response API
+ * is supported by FW, this indicates the RTT confidence value
* @IWL_UCODE_TLV_API_SAR_TABLE_VER: This ucode supports different sar
* version tables.
* @IWL_UCODE_TLV_API_REDUCED_SCAN_CONFIG: This ucode supports v3 of
- * SCAN_CONFIG_DB_CMD_API_S.
+ * SCAN_CONFIG_DB_CMD_API_S.
+ * @IWL_UCODE_TLV_API_ADWELL_HB_DEF_N_AP: support for setting adaptive dwell
+ * number of APs in the 5 GHz band
+ * @IWL_UCODE_TLV_API_BAND_IN_RX_DATA: FW reports band number in RX notification
* @IWL_UCODE_TLV_API_NO_HOST_DISABLE_TX: Firmware offloaded the station disable tx
* logic.
* @IWL_UCODE_TLV_API_INT_DBG_BUF_CLEAR: Firmware supports clearing the debug
* internal buffer
+ * @IWL_UCODE_TLV_API_SMART_FIFO_OFFLOAD: Firmware doesn't need the host to
+ * configure the smart fifo
*
* @NUM_IWL_UCODE_TLV_API: number of bits used
*/
@@ -287,6 +295,7 @@ enum iwl_ucode_tlv_api {
/* API Set 2 */
IWL_UCODE_TLV_API_NO_HOST_DISABLE_TX = (__force iwl_ucode_tlv_api_t)66,
IWL_UCODE_TLV_API_INT_DBG_BUF_CLEAR = (__force iwl_ucode_tlv_api_t)67,
+ IWL_UCODE_TLV_API_SMART_FIFO_OFFLOAD = (__force iwl_ucode_tlv_api_t)68,
NUM_IWL_UCODE_TLV_API
/*
@@ -383,6 +392,9 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t;
* channels even when these are not enabled.
* @IWL_UCODE_TLV_CAPA_DUMP_COMPLETE_SUPPORT: Support for indicating dump collection
* complete to FW.
+ * @IWL_UCODE_TLV_CAPA_SPP_AMSDU_SUPPORT: Support SPP (signaling and payload
+ * protected) A-MSDU.
+ * @IWL_UCODE_TLV_CAPA_SECURE_LTF_SUPPORT: Support secure LTF measurement.
*
* @NUM_IWL_UCODE_TLV_CAPA: number of bits used
*/
@@ -468,6 +480,7 @@ enum iwl_ucode_tlv_capa {
IWL_UCODE_TLV_CAPA_PSC_CHAN_SUPPORT = (__force iwl_ucode_tlv_capa_t)98,
IWL_UCODE_TLV_CAPA_BIGTK_SUPPORT = (__force iwl_ucode_tlv_capa_t)100,
+ IWL_UCODE_TLV_CAPA_SPP_AMSDU_SUPPORT = (__force iwl_ucode_tlv_capa_t)103,
IWL_UCODE_TLV_CAPA_DRAM_FRAG_SUPPORT = (__force iwl_ucode_tlv_capa_t)104,
IWL_UCODE_TLV_CAPA_DUMP_COMPLETE_SUPPORT = (__force iwl_ucode_tlv_capa_t)105,
IWL_UCODE_TLV_CAPA_SYNCED_TIME = (__force iwl_ucode_tlv_capa_t)106,
@@ -480,7 +493,7 @@ enum iwl_ucode_tlv_capa {
IWL_UCODE_TLV_CAPA_STA_EXP_MFP_SUPPORT = (__force iwl_ucode_tlv_capa_t)114,
IWL_UCODE_TLV_CAPA_SNIFF_VALIDATE_SUPPORT = (__force iwl_ucode_tlv_capa_t)116,
IWL_UCODE_TLV_CAPA_CHINA_22_REG_SUPPORT = (__force iwl_ucode_tlv_capa_t)117,
-
+ IWL_UCODE_TLV_CAPA_SECURE_LTF_SUPPORT = (__force iwl_ucode_tlv_capa_t)121,
NUM_IWL_UCODE_TLV_CAPA
/*
* This construction make both sparse (which cannot increment the previous
@@ -566,6 +579,7 @@ enum iwl_fw_dbg_reg_operator {
* struct iwl_fw_dbg_reg_op - an operation on a register
*
* @op: &enum iwl_fw_dbg_reg_operator
+ * @reserved: reserved
* @addr: offset of the register
* @val: value
*/
@@ -612,6 +626,7 @@ struct iwl_fw_dbg_mem_seg_tlv {
* @version: version of the TLV - currently 0
* @monitor_mode: &enum iwl_fw_dbg_monitor_mode
* @size_power: buffer size will be 2^(size_power + 11)
+ * @reserved: reserved
* @base_reg: addr of the base addr register (PRPH)
* @end_reg: addr of the end addr register (PRPH)
* @write_ptr_reg: the addr of the reg of the write pointer
@@ -722,6 +737,8 @@ enum iwl_fw_dbg_trigger_vif_type {
* @trig_dis_ms: the time, in milliseconds, after an occurrence of this
* trigger in which another occurrence should be ignored.
* @flags: &enum iwl_fw_dbg_trigger_flags
+ * @reserved: reserved (for alignment)
+ * @data: trigger data
*/
struct iwl_fw_dbg_trigger_tlv {
__le32 id;
@@ -762,7 +779,7 @@ struct iwl_fw_dbg_trigger_missed_bcon {
/**
* struct iwl_fw_dbg_trigger_cmd - configures trigger for messages from FW.
- * cmds: the list of commands to trigger the collection on
+ * @cmds: the list of commands to trigger the collection on
*/
struct iwl_fw_dbg_trigger_cmd {
struct cmd {
@@ -772,7 +789,7 @@ struct iwl_fw_dbg_trigger_cmd {
} __packed;
/**
- * iwl_fw_dbg_trigger_stats - configures trigger for statistics
+ * struct iwl_fw_dbg_trigger_stats - configures trigger for statistics
* @stop_offset: the offset of the value to be monitored
* @stop_threshold: the threshold above which to collect
* @start_offset: the offset of the value to be monitored
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
index 650e4bde9c17..1195e708caa9 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2020-2023 Intel Corporation
+ * Copyright(c) 2020-2024 Intel Corporation
*/
#include "iwl-drv.h"
@@ -12,6 +12,8 @@
#include "fw/api/alive.h"
#include "fw/uefi.h"
+#define IWL_PNVM_REDUCED_CAP_BIT BIT(25)
+
struct iwl_pnvm_section {
__le32 offset;
const u8 data[];
@@ -173,6 +175,7 @@ static int iwl_pnvm_parse(struct iwl_trans *trans, const u8 *data,
while (len >= sizeof(*tlv)) {
u32 tlv_len, tlv_type;
+ u32 rf_type;
len -= sizeof(*tlv);
tlv = (const void *)data;
@@ -201,6 +204,16 @@ static int iwl_pnvm_parse(struct iwl_trans *trans, const u8 *data,
data += sizeof(*tlv) + ALIGN(tlv_len, 4);
len -= ALIGN(tlv_len, 4);
+ trans->reduced_cap_sku = false;
+ rf_type = CSR_HW_RFID_TYPE(trans->hw_rf_id);
+ if ((trans->sku_id[0] & IWL_PNVM_REDUCED_CAP_BIT) &&
+ rf_type == IWL_CFG_RF_TYPE_FM)
+ trans->reduced_cap_sku = true;
+
+ IWL_DEBUG_FW(trans,
+ "Reduced SKU device %d\n",
+ trans->reduced_cap_sku);
+
if (trans->sku_id[0] == le32_to_cpu(sku_id->data[0]) &&
trans->sku_id[1] == le32_to_cpu(sku_id->data[1]) &&
trans->sku_id[2] == le32_to_cpu(sku_id->data[2])) {
@@ -239,7 +252,7 @@ static int iwl_pnvm_get_from_fs(struct iwl_trans *trans, u8 **data, size_t *len)
}
new_len = pnvm->size;
- *data = kmemdup(pnvm->data, pnvm->size, GFP_KERNEL);
+ *data = kvmemdup(pnvm->data, pnvm->size, GFP_KERNEL);
release_firmware(pnvm);
if (!*data)
@@ -255,21 +268,27 @@ static u8 *iwl_get_pnvm_image(struct iwl_trans *trans_p, size_t *len)
struct pnvm_sku_package *package;
u8 *image = NULL;
- /* First attempt to get the PNVM from BIOS */
- package = iwl_uefi_get_pnvm(trans_p, len);
- if (!IS_ERR_OR_NULL(package)) {
- if (*len >= sizeof(*package)) {
- /* we need only the data */
- *len -= sizeof(*package);
- image = kmemdup(package->data, *len, GFP_KERNEL);
+ /* Get PNVM from BIOS for non-Intel SKU */
+ if (trans_p->sku_id[2]) {
+ package = iwl_uefi_get_pnvm(trans_p, len);
+ if (!IS_ERR_OR_NULL(package)) {
+ if (*len >= sizeof(*package)) {
+ /* we need only the data */
+ *len -= sizeof(*package);
+ image = kvmemdup(package->data,
+ *len, GFP_KERNEL);
+ }
+ /*
+ * free package regardless of whether kmemdup
+ * succeeded
+ */
+ kfree(package);
+ if (image)
+ return image;
}
- /* free package regardless of whether kmemdup succeeded */
- kfree(package);
- if (image)
- return image;
}
- /* If it's not available, try from the filesystem */
+ /* If it's not available, or for Intel SKU, try from the filesystem */
if (iwl_pnvm_get_from_fs(trans_p, &image, len))
return NULL;
return image;
@@ -314,7 +333,7 @@ static void iwl_pnvm_load_pnvm_to_trans(struct iwl_trans *trans,
set:
iwl_trans_set_pnvm(trans, capa);
free:
- kfree(data);
+ kvfree(data);
kfree(pnvm_data);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c
new file mode 100644
index 000000000000..36d506463e0e
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <linux/dmi.h>
+#include "iwl-drv.h"
+#include "iwl-debug.h"
+#include "regulatory.h"
+#include "fw/runtime.h"
+#include "fw/uefi.h"
+
+#define GET_BIOS_TABLE(__name, ...) \
+do { \
+ int ret = -ENOENT; \
+ if (fwrt->uefi_tables_lock_status > UEFI_WIFI_GUID_UNLOCKED) \
+ ret = iwl_uefi_get_ ## __name(__VA_ARGS__); \
+ if (ret < 0) \
+ ret = iwl_acpi_get_ ## __name(__VA_ARGS__); \
+ return ret; \
+} while (0)
+
+#define IWL_BIOS_TABLE_LOADER(__name) \
+int iwl_bios_get_ ## __name(struct iwl_fw_runtime *fwrt) \
+{GET_BIOS_TABLE(__name, fwrt); } \
+IWL_EXPORT_SYMBOL(iwl_bios_get_ ## __name)
+
+#define IWL_BIOS_TABLE_LOADER_DATA(__name, data_type) \
+int iwl_bios_get_ ## __name(struct iwl_fw_runtime *fwrt, \
+ data_type * data) \
+{GET_BIOS_TABLE(__name, fwrt, data); } \
+IWL_EXPORT_SYMBOL(iwl_bios_get_ ## __name)
+
+IWL_BIOS_TABLE_LOADER(wrds_table);
+IWL_BIOS_TABLE_LOADER(ewrd_table);
+IWL_BIOS_TABLE_LOADER(wgds_table);
+IWL_BIOS_TABLE_LOADER(ppag_table);
+IWL_BIOS_TABLE_LOADER_DATA(tas_table, struct iwl_tas_data);
+IWL_BIOS_TABLE_LOADER_DATA(pwr_limit, u64);
+IWL_BIOS_TABLE_LOADER_DATA(mcc, char);
+IWL_BIOS_TABLE_LOADER_DATA(eckv, u32);
+
+
+static const struct dmi_system_id dmi_ppag_approved_list[] = {
+ { .ident = "HP",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ },
+ },
+ { .ident = "SAMSUNG",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD"),
+ },
+ },
+ { .ident = "MSFT",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ },
+ },
+ { .ident = "ASUS",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ },
+ },
+ { .ident = "GOOGLE-HP",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Google"),
+ DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+ },
+ },
+ { .ident = "GOOGLE-ASUS",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Google"),
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTek COMPUTER INC."),
+ },
+ },
+ { .ident = "GOOGLE-SAMSUNG",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Google"),
+ DMI_MATCH(DMI_BOARD_VENDOR, "SAMSUNG ELECTRONICS CO., LTD"),
+ },
+ },
+ { .ident = "DELL",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ },
+ },
+ { .ident = "DELL",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+ },
+ },
+ { .ident = "RAZER",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Razer"),
+ },
+ },
+ { .ident = "Honor",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HONOR"),
+ },
+ },
+ {}
+};
+
+static const struct dmi_system_id dmi_tas_approved_list[] = {
+ { .ident = "HP",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ },
+ },
+ { .ident = "SAMSUNG",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD"),
+ },
+ },
+ { .ident = "LENOVO",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ },
+ },
+ { .ident = "DELL",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ },
+ },
+ { .ident = "MSFT",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ },
+ },
+ { .ident = "Acer",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ },
+ },
+ { .ident = "ASUS",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ },
+ },
+ { .ident = "GOOGLE-HP",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Google"),
+ DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+ },
+ },
+ { .ident = "MSI",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star International Co., Ltd."),
+ },
+ },
+ { .ident = "Honor",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HONOR"),
+ },
+ },
+ /* keep last */
+ {}
+};
+
+bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt)
+{
+ /*
+ * The PER_CHAIN_LIMIT_OFFSET_CMD command is not supported on
+ * earlier firmware versions. Unfortunately, we don't have a
+ * TLV API flag to rely on, so rely on the major version which
+ * is in the first byte of ucode_ver. This was implemented
+ * initially on version 38 and then backported to 17. It was
+ * also backported to 29, but only for 7265D devices. The
+ * intention was to have it in 36 as well, but not all 8000
+ * family got this feature enabled. The 8000 family is the
+ * only one using version 36, so skip this version entirely.
+ */
+ return IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) >= 38 ||
+ (IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 17 &&
+ fwrt->trans->hw_rev != CSR_HW_REV_TYPE_3160) ||
+ (IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 29 &&
+ ((fwrt->trans->hw_rev & CSR_HW_REV_TYPE_MSK) ==
+ CSR_HW_REV_TYPE_7265D));
+}
+IWL_EXPORT_SYMBOL(iwl_sar_geo_support);
+
+int iwl_sar_geo_fill_table(struct iwl_fw_runtime *fwrt,
+ struct iwl_per_chain_offset *table,
+ u32 n_bands, u32 n_profiles)
+{
+ int i, j;
+
+ if (!fwrt->geo_enabled)
+ return -ENODATA;
+
+ if (!iwl_sar_geo_support(fwrt))
+ return -EOPNOTSUPP;
+
+ for (i = 0; i < n_profiles; i++) {
+ for (j = 0; j < n_bands; j++) {
+ struct iwl_per_chain_offset *chain =
+ &table[i * n_bands + j];
+
+ chain->max_tx_power =
+ cpu_to_le16(fwrt->geo_profiles[i].bands[j].max);
+ chain->chain_a =
+ fwrt->geo_profiles[i].bands[j].chains[0];
+ chain->chain_b =
+ fwrt->geo_profiles[i].bands[j].chains[1];
+ IWL_DEBUG_RADIO(fwrt,
+ "SAR geographic profile[%d] Band[%d]: chain A = %d chain B = %d max_tx_power = %d\n",
+ i, j,
+ fwrt->geo_profiles[i].bands[j].chains[0],
+ fwrt->geo_profiles[i].bands[j].chains[1],
+ fwrt->geo_profiles[i].bands[j].max);
+ }
+ }
+
+ return 0;
+}
+IWL_EXPORT_SYMBOL(iwl_sar_geo_fill_table);
+
+static int iwl_sar_fill_table(struct iwl_fw_runtime *fwrt,
+ __le16 *per_chain, u32 n_subbands,
+ int prof_a, int prof_b)
+{
+ int profs[BIOS_SAR_NUM_CHAINS] = { prof_a, prof_b };
+ int i, j;
+
+ for (i = 0; i < BIOS_SAR_NUM_CHAINS; i++) {
+ struct iwl_sar_profile *prof;
+
+ /* don't allow SAR to be disabled (profile 0 means disable) */
+ if (profs[i] == 0)
+ return -EPERM;
+
+ /* we are off by one, so allow up to BIOS_SAR_MAX_PROFILE_NUM */
+ if (profs[i] > BIOS_SAR_MAX_PROFILE_NUM)
+ return -EINVAL;
+
+ /* profiles go from 1 to 4, so decrement to access the array */
+ prof = &fwrt->sar_profiles[profs[i] - 1];
+
+ /* if the profile is disabled, do nothing */
+ if (!prof->enabled) {
+ IWL_DEBUG_RADIO(fwrt, "SAR profile %d is disabled.\n",
+ profs[i]);
+ /*
+ * if one of the profiles is disabled, we
+ * ignore all of them and return 1 to
+ * differentiate disabled from other failures.
+ */
+ return 1;
+ }
+
+ IWL_DEBUG_INFO(fwrt,
+ "SAR EWRD: chain %d profile index %d\n",
+ i, profs[i]);
+ IWL_DEBUG_RADIO(fwrt, " Chain[%d]:\n", i);
+ for (j = 0; j < n_subbands; j++) {
+ per_chain[i * n_subbands + j] =
+ cpu_to_le16(prof->chains[i].subbands[j]);
+ IWL_DEBUG_RADIO(fwrt, " Band[%d] = %d * .125dBm\n",
+ j, prof->chains[i].subbands[j]);
+ }
+ }
+
+ return 0;
+}
+
+int iwl_sar_fill_profile(struct iwl_fw_runtime *fwrt,
+ __le16 *per_chain, u32 n_tables, u32 n_subbands,
+ int prof_a, int prof_b)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < n_tables; i++) {
+ ret = iwl_sar_fill_table(fwrt,
+ &per_chain[i * n_subbands * BIOS_SAR_NUM_CHAINS],
+ n_subbands, prof_a, prof_b);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+IWL_EXPORT_SYMBOL(iwl_sar_fill_profile);
+
+static bool iwl_ppag_value_valid(struct iwl_fw_runtime *fwrt, int chain,
+ int subband)
+{
+ s8 ppag_val = fwrt->ppag_chains[chain].subbands[subband];
+
+ if ((subband == 0 &&
+ (ppag_val > IWL_PPAG_MAX_LB || ppag_val < IWL_PPAG_MIN_LB)) ||
+ (subband != 0 &&
+ (ppag_val > IWL_PPAG_MAX_HB || ppag_val < IWL_PPAG_MIN_HB))) {
+ IWL_DEBUG_RADIO(fwrt, "Invalid PPAG value: %d\n", ppag_val);
+ return false;
+ }
+ return true;
+}
+
+int iwl_fill_ppag_table(struct iwl_fw_runtime *fwrt,
+ union iwl_ppag_table_cmd *cmd, int *cmd_size)
+{
+ u8 cmd_ver;
+ int i, j, num_sub_bands;
+ s8 *gain;
+ bool send_ppag_always;
+
+ /* many firmware images for JF lie about this */
+ if (CSR_HW_RFID_TYPE(fwrt->trans->hw_rf_id) ==
+ CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_JF))
+ return -EOPNOTSUPP;
+
+ if (!fw_has_capa(&fwrt->fw->ucode_capa, IWL_UCODE_TLV_CAPA_SET_PPAG)) {
+ IWL_DEBUG_RADIO(fwrt,
+ "PPAG capability not supported by FW, command not sent.\n");
+ return -EINVAL;
+ }
+
+ cmd_ver = iwl_fw_lookup_cmd_ver(fwrt->fw,
+ WIDE_ID(PHY_OPS_GROUP,
+ PER_PLATFORM_ANT_GAIN_CMD), 1);
+ /*
+ * Starting from ver 4, driver needs to send the PPAG CMD regardless
+ * if PPAG is enabled/disabled or valid/invalid.
+ */
+ send_ppag_always = cmd_ver > 3;
+
+ /* Don't send PPAG if it is disabled */
+ if (!send_ppag_always && !fwrt->ppag_flags) {
+ IWL_DEBUG_RADIO(fwrt, "PPAG not enabled, command not sent.\n");
+ return -EINVAL;
+ }
+
+ /* The 'flags' field is the same in v1 and in v2 so we can just
+ * use v1 to access it.
+ */
+ cmd->v1.flags = cpu_to_le32(fwrt->ppag_flags);
+
+ IWL_DEBUG_RADIO(fwrt, "PPAG cmd ver is %d\n", cmd_ver);
+ if (cmd_ver == 1) {
+ num_sub_bands = IWL_NUM_SUB_BANDS_V1;
+ gain = cmd->v1.gain[0];
+ *cmd_size = sizeof(cmd->v1);
+ if (fwrt->ppag_ver >= 1) {
+ /* in this case FW supports revision 0 */
+ IWL_DEBUG_RADIO(fwrt,
+ "PPAG table rev is %d, send truncated table\n",
+ fwrt->ppag_ver);
+ }
+ } else if (cmd_ver >= 2 && cmd_ver <= 5) {
+ num_sub_bands = IWL_NUM_SUB_BANDS_V2;
+ gain = cmd->v2.gain[0];
+ *cmd_size = sizeof(cmd->v2);
+ if (fwrt->ppag_ver == 0) {
+ /* in this case FW supports revisions 1,2 or 3 */
+ IWL_DEBUG_RADIO(fwrt,
+ "PPAG table rev is 0, send padded table\n");
+ }
+ } else {
+ IWL_DEBUG_RADIO(fwrt, "Unsupported PPAG command version\n");
+ return -EINVAL;
+ }
+
+ /* ppag mode */
+ IWL_DEBUG_RADIO(fwrt,
+ "PPAG MODE bits were read from bios: %d\n",
+ le32_to_cpu(cmd->v1.flags));
+
+ if (cmd_ver == 5)
+ cmd->v1.flags &= cpu_to_le32(IWL_PPAG_CMD_V5_MASK);
+ else if (cmd_ver < 5)
+ cmd->v1.flags &= cpu_to_le32(IWL_PPAG_CMD_V4_MASK);
+
+ if ((cmd_ver == 1 &&
+ !fw_has_capa(&fwrt->fw->ucode_capa,
+ IWL_UCODE_TLV_CAPA_PPAG_CHINA_BIOS_SUPPORT)) ||
+ (cmd_ver == 2 && fwrt->ppag_ver >= 2)) {
+ cmd->v1.flags &= cpu_to_le32(IWL_PPAG_ETSI_MASK);
+ IWL_DEBUG_RADIO(fwrt, "masking ppag China bit\n");
+ } else {
+ IWL_DEBUG_RADIO(fwrt, "isn't masking ppag China bit\n");
+ }
+
+ IWL_DEBUG_RADIO(fwrt,
+ "PPAG MODE bits going to be sent: %d\n",
+ le32_to_cpu(cmd->v1.flags));
+
+ for (i = 0; i < IWL_NUM_CHAIN_LIMITS; i++) {
+ for (j = 0; j < num_sub_bands; j++) {
+ if (!send_ppag_always &&
+ !iwl_ppag_value_valid(fwrt, i, j))
+ return -EINVAL;
+
+ gain[i * num_sub_bands + j] =
+ fwrt->ppag_chains[i].subbands[j];
+ IWL_DEBUG_RADIO(fwrt,
+ "PPAG table: chain[%d] band[%d]: gain = %d\n",
+ i, j, gain[i * num_sub_bands + j]);
+ }
+ }
+
+ return 0;
+}
+IWL_EXPORT_SYMBOL(iwl_fill_ppag_table);
+
+bool iwl_is_ppag_approved(struct iwl_fw_runtime *fwrt)
+{
+ if (!dmi_check_system(dmi_ppag_approved_list)) {
+ IWL_DEBUG_RADIO(fwrt,
+ "System vendor '%s' is not in the approved list, disabling PPAG.\n",
+ dmi_get_system_info(DMI_SYS_VENDOR) ?: "<unknown>");
+ fwrt->ppag_flags = 0;
+ return false;
+ }
+
+ return true;
+}
+IWL_EXPORT_SYMBOL(iwl_is_ppag_approved);
+
+bool iwl_is_tas_approved(void)
+{
+ return dmi_check_system(dmi_tas_approved_list);
+}
+IWL_EXPORT_SYMBOL(iwl_is_tas_approved);
+
+int iwl_parse_tas_selection(struct iwl_fw_runtime *fwrt,
+ struct iwl_tas_data *tas_data,
+ const u32 tas_selection)
+{
+ u8 override_iec = u32_get_bits(tas_selection,
+ IWL_WTAS_OVERRIDE_IEC_MSK);
+ u8 enabled_iec = u32_get_bits(tas_selection, IWL_WTAS_ENABLE_IEC_MSK);
+ u8 usa_tas_uhb = u32_get_bits(tas_selection, IWL_WTAS_USA_UHB_MSK);
+ int enabled = tas_selection & IWL_WTAS_ENABLED_MSK;
+
+ IWL_DEBUG_RADIO(fwrt, "TAS selection as read from BIOS: 0x%x\n",
+ tas_selection);
+
+ tas_data->usa_tas_uhb_allowed = usa_tas_uhb;
+ tas_data->override_tas_iec = override_iec;
+ tas_data->enable_tas_iec = enabled_iec;
+
+ return enabled;
+}
+
+__le32 iwl_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt)
+{
+ int ret;
+ u32 val;
+ __le32 config_bitmap = 0;
+
+ switch (CSR_HW_RFID_TYPE(fwrt->trans->hw_rf_id)) {
+ case IWL_CFG_RF_TYPE_HR1:
+ case IWL_CFG_RF_TYPE_HR2:
+ case IWL_CFG_RF_TYPE_JF1:
+ case IWL_CFG_RF_TYPE_JF2:
+ ret = iwl_bios_get_dsm(fwrt, DSM_FUNC_ENABLE_INDONESIA_5G2,
+ &val);
+
+ if (!ret && val == DSM_VALUE_INDONESIA_ENABLE)
+ config_bitmap |=
+ cpu_to_le32(LARI_CONFIG_ENABLE_5G2_IN_INDONESIA_MSK);
+ break;
+ default:
+ break;
+ }
+
+ ret = iwl_bios_get_dsm(fwrt, DSM_FUNC_DISABLE_SRD, &val);
+ if (!ret) {
+ if (val == DSM_VALUE_SRD_PASSIVE)
+ config_bitmap |=
+ cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_PASSIVE_MSK);
+ else if (val == DSM_VALUE_SRD_DISABLE)
+ config_bitmap |=
+ cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_DISABLED_MSK);
+ }
+
+ if (fw_has_capa(&fwrt->fw->ucode_capa,
+ IWL_UCODE_TLV_CAPA_CHINA_22_REG_SUPPORT)) {
+ ret = iwl_bios_get_dsm(fwrt, DSM_FUNC_REGULATORY_CONFIG,
+ &val);
+ /*
+ * China 2022 enable if the BIOS object does not exist or
+ * if it is enabled in BIOS.
+ */
+ if (ret < 0 || val & DSM_MASK_CHINA_22_REG)
+ config_bitmap |=
+ cpu_to_le32(LARI_CONFIG_ENABLE_CHINA_22_REG_SUPPORT_MSK);
+ }
+
+ return config_bitmap;
+}
+IWL_EXPORT_SYMBOL(iwl_get_lari_config_bitmap);
+
+int iwl_bios_get_dsm(struct iwl_fw_runtime *fwrt, enum iwl_dsm_funcs func,
+ u32 *value)
+{
+ GET_BIOS_TABLE(dsm, fwrt, func, value);
+}
+IWL_EXPORT_SYMBOL(iwl_bios_get_dsm);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h
new file mode 100644
index 000000000000..28e774766847
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright (C) 2023 Intel Corporation
+ */
+
+#ifndef __fw_regulatory_h__
+#define __fw_regulatory_h__
+
+#include "fw/img.h"
+#include "fw/api/commands.h"
+#include "fw/api/power.h"
+#include "fw/api/phy.h"
+#include "fw/api/config.h"
+#include "fw/img.h"
+#include "iwl-trans.h"
+
+#define BIOS_SAR_MAX_PROFILE_NUM 4
+/*
+ * Each SAR profile has (up to, depends on the table revision) 4 chains:
+ * chain A, chain B, chain A when in CDB, chain B when in CDB
+ */
+#define BIOS_SAR_MAX_CHAINS_PER_PROFILE 4
+#define BIOS_SAR_NUM_CHAINS 2
+#define BIOS_SAR_MAX_SUB_BANDS_NUM 11
+
+#define BIOS_GEO_NUM_CHAINS 2
+#define BIOS_GEO_MAX_NUM_BANDS 3
+#define BIOS_GEO_MAX_PROFILE_NUM 8
+#define BIOS_GEO_MIN_PROFILE_NUM 3
+
+#define IWL_SAR_ENABLE_MSK BIT(0)
+
+/* PPAG gain value bounds in 1/8 dBm */
+#define IWL_PPAG_MIN_LB -16
+#define IWL_PPAG_MAX_LB 24
+#define IWL_PPAG_MIN_HB -16
+#define IWL_PPAG_MAX_HB 40
+
+#define IWL_PPAG_ETSI_CHINA_MASK 3
+#define IWL_PPAG_REV3_MASK 0x7FF
+
+#define IWL_WTAS_BLACK_LIST_MAX 16
+#define IWL_WTAS_ENABLED_MSK 0x1
+#define IWL_WTAS_OVERRIDE_IEC_MSK 0x2
+#define IWL_WTAS_ENABLE_IEC_MSK 0x4
+#define IWL_WTAS_USA_UHB_MSK BIT(16)
+
+/*
+ * The profile for revision 2 is a superset of revision 1, which is in
+ * turn a superset of revision 0. So we can store all revisions
+ * inside revision 2, which is what we represent here.
+ */
+
+/*
+ * struct iwl_sar_profile_chain - per-chain values of a SAR profile
+ * @subbands: the SAR value for each subband
+ */
+struct iwl_sar_profile_chain {
+ u8 subbands[BIOS_SAR_MAX_SUB_BANDS_NUM];
+};
+
+/*
+ * struct iwl_sar_profile - SAR profile from SAR tables
+ * @enabled: whether the profile is enabled or not
+ * @chains: per-chain SAR values
+ */
+struct iwl_sar_profile {
+ bool enabled;
+ struct iwl_sar_profile_chain chains[BIOS_SAR_MAX_CHAINS_PER_PROFILE];
+};
+
+/* Same thing as with SAR, all revisions fit in revision 2 */
+
+/*
+ * struct iwl_geo_profile_band - per-band geo SAR offsets
+ * @max: the max tx power allowed for the band
+ * @chains: SAR offsets values for each chain
+ */
+struct iwl_geo_profile_band {
+ u8 max;
+ u8 chains[BIOS_GEO_NUM_CHAINS];
+};
+
+/*
+ * struct iwl_geo_profile - geo profile
+ * @bands: per-band table of the SAR offsets
+ */
+struct iwl_geo_profile {
+ struct iwl_geo_profile_band bands[BIOS_GEO_MAX_NUM_BANDS];
+};
+
+/* Same thing as with SAR, all revisions fit in revision 2 */
+struct iwl_ppag_chain {
+ s8 subbands[BIOS_SAR_MAX_SUB_BANDS_NUM];
+};
+
+struct iwl_tas_data {
+ __le32 block_list_size;
+ __le32 block_list_array[IWL_WTAS_BLACK_LIST_MAX];
+ u8 override_tas_iec;
+ u8 enable_tas_iec;
+ u8 usa_tas_uhb_allowed;
+};
+
+/* For DSM revision 0 and 4 */
+enum iwl_dsm_funcs {
+ DSM_FUNC_QUERY = 0,
+ DSM_FUNC_DISABLE_SRD = 1,
+ DSM_FUNC_ENABLE_INDONESIA_5G2 = 2,
+ DSM_FUNC_ENABLE_6E = 3,
+ DSM_FUNC_REGULATORY_CONFIG = 4,
+ DSM_FUNC_11AX_ENABLEMENT = 6,
+ DSM_FUNC_ENABLE_UNII4_CHAN = 7,
+ DSM_FUNC_ACTIVATE_CHANNEL = 8,
+ DSM_FUNC_FORCE_DISABLE_CHANNELS = 9,
+ DSM_FUNC_ENERGY_DETECTION_THRESHOLD = 10,
+ DSM_FUNC_RFI_CONFIG = 11,
+ DSM_FUNC_NUM_FUNCS = 12,
+};
+
+enum iwl_dsm_values_srd {
+ DSM_VALUE_SRD_ACTIVE,
+ DSM_VALUE_SRD_PASSIVE,
+ DSM_VALUE_SRD_DISABLE,
+ DSM_VALUE_SRD_MAX
+};
+
+enum iwl_dsm_values_indonesia {
+ DSM_VALUE_INDONESIA_DISABLE,
+ DSM_VALUE_INDONESIA_ENABLE,
+ DSM_VALUE_INDONESIA_RESERVED,
+ DSM_VALUE_INDONESIA_MAX
+};
+
+enum iwl_dsm_values_rfi {
+ DSM_VALUE_RFI_DLVR_DISABLE = BIT(0),
+ DSM_VALUE_RFI_DDR_DISABLE = BIT(1),
+};
+
+#define DSM_VALUE_RFI_DISABLE (DSM_VALUE_RFI_DLVR_DISABLE |\
+ DSM_VALUE_RFI_DDR_DISABLE)
+
+enum iwl_dsm_masks_reg {
+ DSM_MASK_CHINA_22_REG = BIT(2)
+};
+
+struct iwl_fw_runtime;
+
+bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt);
+
+int iwl_sar_geo_fill_table(struct iwl_fw_runtime *fwrt,
+ struct iwl_per_chain_offset *table,
+ u32 n_bands, u32 n_profiles);
+
+int iwl_sar_fill_profile(struct iwl_fw_runtime *fwrt,
+ __le16 *per_chain, u32 n_tables, u32 n_subbands,
+ int prof_a, int prof_b);
+
+int iwl_fill_ppag_table(struct iwl_fw_runtime *fwrt,
+ union iwl_ppag_table_cmd *cmd,
+ int *cmd_size);
+
+bool iwl_is_ppag_approved(struct iwl_fw_runtime *fwrt);
+
+bool iwl_is_tas_approved(void);
+
+int iwl_parse_tas_selection(struct iwl_fw_runtime *fwrt,
+ struct iwl_tas_data *tas_data,
+ const u32 tas_selection);
+
+int iwl_bios_get_wrds_table(struct iwl_fw_runtime *fwrt);
+
+int iwl_bios_get_ewrd_table(struct iwl_fw_runtime *fwrt);
+
+int iwl_bios_get_wgds_table(struct iwl_fw_runtime *fwrt);
+
+int iwl_bios_get_ppag_table(struct iwl_fw_runtime *fwrt);
+
+int iwl_bios_get_tas_table(struct iwl_fw_runtime *fwrt,
+ struct iwl_tas_data *data);
+
+int iwl_bios_get_pwr_limit(struct iwl_fw_runtime *fwrt,
+ u64 *dflt_pwr_limit);
+
+int iwl_bios_get_mcc(struct iwl_fw_runtime *fwrt, char *mcc);
+int iwl_bios_get_eckv(struct iwl_fw_runtime *fwrt, u32 *ext_clk);
+
+__le32 iwl_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt);
+
+int iwl_bios_get_dsm(struct iwl_fw_runtime *fwrt, enum iwl_dsm_funcs func,
+ u32 *value);
+
+static inline u32 iwl_bios_get_ppag_flags(const u32 ppag_modes,
+ const u8 ppag_ver)
+{
+ return ppag_modes & (ppag_ver < 3 ? IWL_PPAG_ETSI_CHINA_MASK :
+ IWL_PPAG_REV3_MASK);
+}
+#endif /* __fw_regulatory_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
index 357727774db9..b2bc4fd37abf 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#ifndef __iwl_fw_runtime_h__
#define __iwl_fw_runtime_h__
@@ -14,6 +14,7 @@
#include "fw/api/power.h"
#include "iwl-eeprom-parse.h"
#include "fw/acpi.h"
+#include "fw/regulatory.h"
struct iwl_fw_runtime_ops {
void (*dump_start)(void *ctx);
@@ -100,6 +101,11 @@ struct iwl_txf_iter_data {
* @dump: debug dump data
* @uats_enabled: VLP or AFC AP is enabled
* @uats_table: AP type table
+ * @uefi_tables_lock_status: The status of the WIFI GUID UEFI variables lock:
+ * 0: Unlocked, 1 and 2: Locked.
+ * Only read the UEFI variables if locked.
+ * @sar_profiles: sar profiles as read from WRDS/EWRD BIOS tables
+ * @geo_profiles: geographic profiles as read from WGDS BIOS table
*/
struct iwl_fw_runtime {
struct iwl_trans *trans;
@@ -158,24 +164,22 @@ struct iwl_fw_runtime {
#ifdef CONFIG_IWLWIFI_DEBUGFS
bool tpc_enabled;
#endif /* CONFIG_IWLWIFI_DEBUGFS */
-#ifdef CONFIG_ACPI
- struct iwl_sar_profile sar_profiles[ACPI_SAR_PROFILE_NUM];
+ struct iwl_sar_profile sar_profiles[BIOS_SAR_MAX_PROFILE_NUM];
u8 sar_chain_a_profile;
u8 sar_chain_b_profile;
- struct iwl_geo_profile geo_profiles[ACPI_NUM_GEO_PROFILES_REV3];
+ u8 reduced_power_flags;
+ struct iwl_geo_profile geo_profiles[BIOS_GEO_MAX_PROFILE_NUM];
u32 geo_rev;
u32 geo_num_profiles;
bool geo_enabled;
struct iwl_ppag_chain ppag_chains[IWL_NUM_CHAIN_LIMITS];
u32 ppag_flags;
- u32 ppag_ver;
- bool ppag_table_valid;
+ u8 ppag_ver;
struct iwl_sar_offset_mapping_cmd sgom_table;
bool sgom_enabled;
- u8 reduced_power_flags;
- bool uats_enabled;
struct iwl_uats_table_cmd uats_table;
-#endif
+ u8 uefi_tables_lock_status;
+ bool uats_enabled;
};
void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
index 2964c5fb11e9..e81fc0129b9d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2021-2023 Intel Corporation
+ * Copyright(c) 2021-2024 Intel Corporation
*/
#include "iwl-drv.h"
@@ -76,6 +76,42 @@ void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
return data;
}
+static
+void *iwl_uefi_get_verified_variable(struct iwl_trans *trans,
+ efi_char16_t *uefi_var_name,
+ char *var_name,
+ unsigned int expected_size,
+ unsigned long *size)
+{
+ void *var;
+ unsigned long var_size;
+
+ var = iwl_uefi_get_variable(uefi_var_name, &IWL_EFI_VAR_GUID,
+ &var_size);
+
+ if (IS_ERR(var)) {
+ IWL_DEBUG_RADIO(trans,
+ "%s UEFI variable not found 0x%lx\n", var_name,
+ PTR_ERR(var));
+ return var;
+ }
+
+ if (var_size < expected_size) {
+ IWL_DEBUG_RADIO(trans,
+ "Invalid %s UEFI variable len (%lu)\n",
+ var_name, var_size);
+ kfree(var);
+ return ERR_PTR(-EINVAL);
+ }
+
+ IWL_DEBUG_RADIO(trans, "%s from UEFI with size %lu\n", var_name,
+ var_size);
+
+ if (size)
+ *size = var_size;
+ return var;
+}
+
int iwl_uefi_handle_tlv_mem_desc(struct iwl_trans *trans, const u8 *data,
u32 tlv_len, struct iwl_pnvm_image *pnvm_data)
{
@@ -230,26 +266,13 @@ u8 *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len)
unsigned long package_size;
u8 *data;
- package = iwl_uefi_get_variable(IWL_UEFI_REDUCED_POWER_NAME,
- &IWL_EFI_VAR_GUID, &package_size);
-
- if (IS_ERR(package)) {
- IWL_DEBUG_FW(trans,
- "Reduced Power UEFI variable not found 0x%lx (len %lu)\n",
- PTR_ERR(package), package_size);
+ package = iwl_uefi_get_verified_variable(trans,
+ IWL_UEFI_REDUCED_POWER_NAME,
+ "Reduced Power",
+ sizeof(*package),
+ &package_size);
+ if (IS_ERR(package))
return ERR_CAST(package);
- }
-
- if (package_size < sizeof(*package)) {
- IWL_DEBUG_FW(trans,
- "Invalid Reduced Power UEFI variable len (%lu)\n",
- package_size);
- kfree(package);
- return ERR_PTR(-EINVAL);
- }
-
- IWL_DEBUG_FW(trans, "Read reduced power from UEFI with size %lu\n",
- package_size);
IWL_DEBUG_FW(trans, "rev %d, total_size %d, n_skus %d\n",
package->rev, package->total_size, package->n_skus);
@@ -283,32 +306,15 @@ static int iwl_uefi_step_parse(struct uefi_cnv_common_step_data *common_step_dat
void iwl_uefi_get_step_table(struct iwl_trans *trans)
{
struct uefi_cnv_common_step_data *data;
- unsigned long package_size;
int ret;
if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
return;
- data = iwl_uefi_get_variable(IWL_UEFI_STEP_NAME, &IWL_EFI_VAR_GUID,
- &package_size);
-
- if (IS_ERR(data)) {
- IWL_DEBUG_FW(trans,
- "STEP UEFI variable not found 0x%lx\n",
- PTR_ERR(data));
+ data = iwl_uefi_get_verified_variable(trans, IWL_UEFI_STEP_NAME,
+ "STEP", sizeof(*data), NULL);
+ if (IS_ERR(data))
return;
- }
-
- if (package_size < sizeof(*data)) {
- IWL_DEBUG_FW(trans,
- "Invalid STEP table UEFI variable len (%lu)\n",
- package_size);
- kfree(data);
- return;
- }
-
- IWL_DEBUG_FW(trans, "Read STEP from UEFI with size %lu\n",
- package_size);
ret = iwl_uefi_step_parse(data, trans);
if (ret < 0)
@@ -318,7 +324,6 @@ void iwl_uefi_get_step_table(struct iwl_trans *trans)
}
IWL_EXPORT_SYMBOL(iwl_uefi_get_step_table);
-#ifdef CONFIG_ACPI
static int iwl_uefi_sgom_parse(struct uefi_cnv_wlan_sgom_data *sgom_data,
struct iwl_fw_runtime *fwrt)
{
@@ -355,31 +360,15 @@ void iwl_uefi_get_sgom_table(struct iwl_trans *trans,
struct iwl_fw_runtime *fwrt)
{
struct uefi_cnv_wlan_sgom_data *data;
- unsigned long package_size;
int ret;
if (!fwrt->geo_enabled)
return;
- data = iwl_uefi_get_variable(IWL_UEFI_SGOM_NAME, &IWL_EFI_VAR_GUID,
- &package_size);
- if (IS_ERR(data)) {
- IWL_DEBUG_FW(trans,
- "SGOM UEFI variable not found 0x%lx\n",
- PTR_ERR(data));
- return;
- }
-
- if (package_size < sizeof(*data)) {
- IWL_DEBUG_FW(trans,
- "Invalid SGOM table UEFI variable len (%lu)\n",
- package_size);
- kfree(data);
+ data = iwl_uefi_get_verified_variable(trans, IWL_UEFI_SGOM_NAME,
+ "SGOM", sizeof(*data), NULL);
+ if (IS_ERR(data))
return;
- }
-
- IWL_DEBUG_FW(trans, "Read SGOM from UEFI with size %lu\n",
- package_size);
ret = iwl_uefi_sgom_parse(data, fwrt);
if (ret < 0)
@@ -404,28 +393,12 @@ int iwl_uefi_get_uats_table(struct iwl_trans *trans,
struct iwl_fw_runtime *fwrt)
{
struct uefi_cnv_wlan_uats_data *data;
- unsigned long package_size;
int ret;
- data = iwl_uefi_get_variable(IWL_UEFI_UATS_NAME, &IWL_EFI_VAR_GUID,
- &package_size);
- if (IS_ERR(data)) {
- IWL_DEBUG_FW(trans,
- "UATS UEFI variable not found 0x%lx\n",
- PTR_ERR(data));
+ data = iwl_uefi_get_verified_variable(trans, IWL_UEFI_UATS_NAME,
+ "UATS", sizeof(*data), NULL);
+ if (IS_ERR(data))
return -EINVAL;
- }
-
- if (package_size < sizeof(*data)) {
- IWL_DEBUG_FW(trans,
- "Invalid UATS table UEFI variable len (%lu)\n",
- package_size);
- kfree(data);
- return -EINVAL;
- }
-
- IWL_DEBUG_FW(trans, "Read UATS from UEFI with size %lu\n",
- package_size);
ret = iwl_uefi_uats_parse(data, fwrt);
if (ret < 0) {
@@ -438,4 +411,298 @@ int iwl_uefi_get_uats_table(struct iwl_trans *trans,
return 0;
}
IWL_EXPORT_SYMBOL(iwl_uefi_get_uats_table);
-#endif /* CONFIG_ACPI */
+
+static void iwl_uefi_set_sar_profile(struct iwl_fw_runtime *fwrt,
+ struct uefi_sar_profile *uefi_sar_prof,
+ u8 prof_index, bool enabled)
+{
+ memcpy(&fwrt->sar_profiles[prof_index].chains, uefi_sar_prof,
+ sizeof(struct uefi_sar_profile));
+
+ fwrt->sar_profiles[prof_index].enabled = enabled & IWL_SAR_ENABLE_MSK;
+}
+
+int iwl_uefi_get_wrds_table(struct iwl_fw_runtime *fwrt)
+{
+ struct uefi_cnv_var_wrds *data;
+ int ret = 0;
+
+ data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_WRDS_NAME,
+ "WRDS", sizeof(*data), NULL);
+ if (IS_ERR(data))
+ return -EINVAL;
+
+ if (data->revision != IWL_UEFI_WRDS_REVISION) {
+ ret = -EINVAL;
+ IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI WRDS revision:%d\n",
+ data->revision);
+ goto out;
+ }
+
+ /* The profile from WRDS is officially profile 1, but goes
+ * into sar_profiles[0] (because we don't have a profile 0).
+ */
+ iwl_uefi_set_sar_profile(fwrt, &data->sar_profile, 0, data->mode);
+out:
+ kfree(data);
+ return ret;
+}
+
+int iwl_uefi_get_ewrd_table(struct iwl_fw_runtime *fwrt)
+{
+ struct uefi_cnv_var_ewrd *data;
+ int i, ret = 0;
+
+ data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_EWRD_NAME,
+ "EWRD", sizeof(*data), NULL);
+ if (IS_ERR(data))
+ return -EINVAL;
+
+ if (data->revision != IWL_UEFI_EWRD_REVISION) {
+ ret = -EINVAL;
+ IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI EWRD revision:%d\n",
+ data->revision);
+ goto out;
+ }
+
+ if (data->num_profiles >= BIOS_SAR_MAX_PROFILE_NUM) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for (i = 0; i < data->num_profiles; i++)
+ /* The EWRD profiles officially go from 2 to 4, but we
+ * save them in sar_profiles[1-3] (because we don't
+ * have profile 0). So in the array we start from 1.
+ */
+ iwl_uefi_set_sar_profile(fwrt, &data->sar_profiles[i], i + 1,
+ data->mode);
+
+out:
+ kfree(data);
+ return ret;
+}
+
+int iwl_uefi_get_wgds_table(struct iwl_fw_runtime *fwrt)
+{
+ struct uefi_cnv_var_wgds *data;
+ int i, ret = 0;
+
+ data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_WGDS_NAME,
+ "WGDS", sizeof(*data), NULL);
+ if (IS_ERR(data))
+ return -EINVAL;
+
+ if (data->revision != IWL_UEFI_WGDS_REVISION) {
+ ret = -EINVAL;
+ IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI WGDS revision:%d\n",
+ data->revision);
+ goto out;
+ }
+
+ if (data->num_profiles < BIOS_GEO_MIN_PROFILE_NUM ||
+ data->num_profiles > BIOS_GEO_MAX_PROFILE_NUM) {
+ ret = -EINVAL;
+ IWL_DEBUG_RADIO(fwrt, "Invalid number of profiles in WGDS: %d\n",
+ data->num_profiles);
+ goto out;
+ }
+
+ fwrt->geo_rev = data->revision;
+ for (i = 0; i < data->num_profiles; i++)
+ memcpy(&fwrt->geo_profiles[i], &data->geo_profiles[i],
+ sizeof(struct iwl_geo_profile));
+
+ fwrt->geo_num_profiles = data->num_profiles;
+ fwrt->geo_enabled = true;
+out:
+ kfree(data);
+ return ret;
+}
+
+int iwl_uefi_get_ppag_table(struct iwl_fw_runtime *fwrt)
+{
+ struct uefi_cnv_var_ppag *data;
+ int ret = 0;
+
+ data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_PPAG_NAME,
+ "PPAG", sizeof(*data), NULL);
+ if (IS_ERR(data))
+ return -EINVAL;
+
+ if (data->revision < IWL_UEFI_MIN_PPAG_REV ||
+ data->revision > IWL_UEFI_MAX_PPAG_REV) {
+ ret = -EINVAL;
+ IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI PPAG revision:%d\n",
+ data->revision);
+ goto out;
+ }
+
+ fwrt->ppag_ver = data->revision;
+ fwrt->ppag_flags = iwl_bios_get_ppag_flags(data->ppag_modes,
+ fwrt->ppag_ver);
+
+ BUILD_BUG_ON(sizeof(fwrt->ppag_chains) != sizeof(data->ppag_chains));
+ memcpy(&fwrt->ppag_chains, &data->ppag_chains,
+ sizeof(data->ppag_chains));
+out:
+ kfree(data);
+ return ret;
+}
+
+int iwl_uefi_get_tas_table(struct iwl_fw_runtime *fwrt,
+ struct iwl_tas_data *tas_data)
+{
+ struct uefi_cnv_var_wtas *uefi_tas;
+ int ret = 0, enabled, i;
+
+ uefi_tas = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_WTAS_NAME,
+ "WTAS", sizeof(*uefi_tas), NULL);
+ if (IS_ERR(uefi_tas))
+ return -EINVAL;
+
+ if (uefi_tas->revision != IWL_UEFI_WTAS_REVISION) {
+ ret = -EINVAL;
+ IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI WTAS revision:%d\n",
+ uefi_tas->revision);
+ goto out;
+ }
+
+ enabled = iwl_parse_tas_selection(fwrt, tas_data,
+ uefi_tas->tas_selection);
+ if (!enabled) {
+ IWL_DEBUG_RADIO(fwrt, "TAS not enabled\n");
+ ret = 0;
+ goto out;
+ }
+
+ IWL_DEBUG_RADIO(fwrt, "Reading TAS table revision %d\n",
+ uefi_tas->revision);
+ if (uefi_tas->black_list_size > IWL_WTAS_BLACK_LIST_MAX) {
+ IWL_DEBUG_RADIO(fwrt, "TAS invalid array size %d\n",
+ uefi_tas->black_list_size);
+ ret = -EINVAL;
+ goto out;
+ }
+ tas_data->block_list_size = cpu_to_le32(uefi_tas->black_list_size);
+ IWL_DEBUG_RADIO(fwrt, "TAS array size %u\n", uefi_tas->black_list_size);
+
+ for (i = 0; i < uefi_tas->black_list_size; i++) {
+ tas_data->block_list_array[i] =
+ cpu_to_le32(uefi_tas->black_list[i]);
+ IWL_DEBUG_RADIO(fwrt, "TAS block list country %d\n",
+ uefi_tas->black_list[i]);
+ }
+out:
+ kfree(uefi_tas);
+ return ret;
+}
+
+int iwl_uefi_get_pwr_limit(struct iwl_fw_runtime *fwrt,
+ u64 *dflt_pwr_limit)
+{
+ struct uefi_cnv_var_splc *data;
+ int ret = 0;
+
+ data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_SPLC_NAME,
+ "SPLC", sizeof(*data), NULL);
+ if (IS_ERR(data))
+ return -EINVAL;
+
+ if (data->revision != IWL_UEFI_SPLC_REVISION) {
+ ret = -EINVAL;
+ IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI SPLC revision:%d\n",
+ data->revision);
+ goto out;
+ }
+ *dflt_pwr_limit = data->default_pwr_limit;
+out:
+ kfree(data);
+ return ret;
+}
+
+int iwl_uefi_get_mcc(struct iwl_fw_runtime *fwrt, char *mcc)
+{
+ struct uefi_cnv_var_wrdd *data;
+ int ret = 0;
+
+ data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_WRDD_NAME,
+ "WRDD", sizeof(*data), NULL);
+ if (IS_ERR(data))
+ return -EINVAL;
+
+ if (data->revision != IWL_UEFI_WRDD_REVISION) {
+ ret = -EINVAL;
+ IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI WRDD revision:%d\n",
+ data->revision);
+ goto out;
+ }
+
+ if (data->mcc != UEFI_MCC_CHINA) {
+ ret = -EINVAL;
+ IWL_DEBUG_RADIO(fwrt, "UEFI WRDD is supported only for CN\n");
+ goto out;
+ }
+
+ mcc[0] = (data->mcc >> 8) & 0xff;
+ mcc[1] = data->mcc & 0xff;
+ mcc[2] = '\0';
+out:
+ kfree(data);
+ return ret;
+}
+
+int iwl_uefi_get_eckv(struct iwl_fw_runtime *fwrt, u32 *extl_clk)
+{
+ struct uefi_cnv_var_eckv *data;
+ int ret = 0;
+
+ data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_ECKV_NAME,
+ "ECKV", sizeof(*data), NULL);
+ if (IS_ERR(data))
+ return -EINVAL;
+
+ if (data->revision != IWL_UEFI_ECKV_REVISION) {
+ ret = -EINVAL;
+ IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI WRDD revision:%d\n",
+ data->revision);
+ goto out;
+ }
+ *extl_clk = data->ext_clock_valid;
+out:
+ kfree(data);
+ return ret;
+}
+
+int iwl_uefi_get_dsm(struct iwl_fw_runtime *fwrt, enum iwl_dsm_funcs func,
+ u32 *value)
+{
+ struct uefi_cnv_var_general_cfg *data;
+ int ret = -EINVAL;
+
+ /* Not supported function index */
+ if (func >= DSM_FUNC_NUM_FUNCS || func == 5)
+ return -EOPNOTSUPP;
+
+ data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_DSM_NAME,
+ "DSM", sizeof(*data), NULL);
+ if (IS_ERR(data))
+ return -EINVAL;
+
+ if (data->revision != IWL_UEFI_DSM_REVISION) {
+ IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI DSM revision:%d\n",
+ data->revision);
+ goto out;
+ }
+
+ if (ARRAY_SIZE(data->functions) != UEFI_MAX_DSM_FUNCS) {
+ IWL_DEBUG_RADIO(fwrt, "Invalid size of DSM functions array\n");
+ goto out;
+ }
+
+ *value = data->functions[func];
+ ret = 0;
+out:
+ kfree(data);
+ return ret;
+}
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
index bf61a8df1225..303cc299d1bc 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
@@ -5,15 +5,38 @@
#ifndef __iwl_fw_uefi__
#define __iwl_fw_uefi__
+#include "fw/regulatory.h"
+
#define IWL_UEFI_OEM_PNVM_NAME L"UefiCnvWlanOemSignedPnvm"
#define IWL_UEFI_REDUCED_POWER_NAME L"UefiCnvWlanReducedPower"
#define IWL_UEFI_SGOM_NAME L"UefiCnvWlanSarGeoOffsetMapping"
#define IWL_UEFI_STEP_NAME L"UefiCnvCommonSTEP"
#define IWL_UEFI_UATS_NAME L"CnvUefiWlanUATS"
+#define IWL_UEFI_WRDS_NAME L"UefiCnvWlanWRDS"
+#define IWL_UEFI_EWRD_NAME L"UefiCnvWlanEWRD"
+#define IWL_UEFI_WGDS_NAME L"UefiCnvWlanWGDS"
+#define IWL_UEFI_PPAG_NAME L"UefiCnvWlanPPAG"
+#define IWL_UEFI_WTAS_NAME L"UefiCnvWlanWTAS"
+#define IWL_UEFI_SPLC_NAME L"UefiCnvWlanSPLC"
+#define IWL_UEFI_WRDD_NAME L"UefiCnvWlanWRDD"
+#define IWL_UEFI_ECKV_NAME L"UefiCnvWlanECKV"
+#define IWL_UEFI_DSM_NAME L"UefiCnvWlanGeneralCfg"
+
#define IWL_SGOM_MAP_SIZE 339
#define IWL_UATS_MAP_SIZE 339
+#define IWL_UEFI_WRDS_REVISION 2
+#define IWL_UEFI_EWRD_REVISION 2
+#define IWL_UEFI_WGDS_REVISION 3
+#define IWL_UEFI_MIN_PPAG_REV 1
+#define IWL_UEFI_MAX_PPAG_REV 3
+#define IWL_UEFI_WTAS_REVISION 1
+#define IWL_UEFI_SPLC_REVISION 0
+#define IWL_UEFI_WRDD_REVISION 0
+#define IWL_UEFI_ECKV_REVISION 0
+#define IWL_UEFI_DSM_REVISION 4
+
struct pnvm_sku_package {
u8 rev;
u32 total_size;
@@ -42,6 +65,120 @@ struct uefi_cnv_common_step_data {
} __packed;
/*
+ * struct uefi_sar_profile - a SAR profile as defined in UEFI
+ *
+ * @chains: a per-chain table of SAR values
+ */
+struct uefi_sar_profile {
+ struct iwl_sar_profile_chain chains[BIOS_SAR_MAX_CHAINS_PER_PROFILE];
+} __packed;
+
+/*
+ * struct uefi_cnv_var_wrds - WRDS table as defined in UEFI
+ *
+ * @revision: the revision of the table
+ * @mode: is WRDS enbaled/disabled
+ * @sar_profile: sar profile #1
+ */
+struct uefi_cnv_var_wrds {
+ u8 revision;
+ u32 mode;
+ struct uefi_sar_profile sar_profile;
+} __packed;
+
+/*
+ * struct uefi_cnv_var_ewrd - EWRD table as defined in UEFI
+ * @revision: the revision of the table
+ * @mode: is WRDS enbaled/disabled
+ * @num_profiles: how many additional profiles we have in this table (0-3)
+ * @sar_profiles: the additional SAR profiles (#2-#4)
+ */
+struct uefi_cnv_var_ewrd {
+ u8 revision;
+ u32 mode;
+ u32 num_profiles;
+ struct uefi_sar_profile sar_profiles[BIOS_SAR_MAX_PROFILE_NUM - 1];
+} __packed;
+
+/*
+ * struct uefi_cnv_var_wgds - WGDS table as defined in UEFI
+ * @revision: the revision of the table
+ * @num_profiles: the number of geo profiles we have in the table.
+ * The first 3 are mandatory, and can have up to 8.
+ * @geo_profiles: a per-profile table of the offsets to add to SAR values.
+ */
+struct uefi_cnv_var_wgds {
+ u8 revision;
+ u8 num_profiles;
+ struct iwl_geo_profile geo_profiles[BIOS_GEO_MAX_PROFILE_NUM];
+} __packed;
+
+/*
+ * struct uefi_cnv_var_ppag - PPAG table as defined in UEFI
+ * @revision: the revision of the table
+ * @ppag_modes: values from &enum iwl_ppag_flags
+ * @ppag_chains: the PPAG values per chain and band
+ */
+struct uefi_cnv_var_ppag {
+ u8 revision;
+ u32 ppag_modes;
+ struct iwl_ppag_chain ppag_chains[IWL_NUM_CHAIN_LIMITS];
+} __packed;
+
+/* struct uefi_cnv_var_wtas - WTAS tabled as defined in UEFI
+ * @revision: the revision of the table
+ * @tas_selection: different options of TAS enablement.
+ * @black_list_size: the number of defined entried in the black list
+ * @black_list: a list of countries that are not allowed to use the TAS feature
+ */
+struct uefi_cnv_var_wtas {
+ u8 revision;
+ u32 tas_selection;
+ u8 black_list_size;
+ u16 black_list[IWL_WTAS_BLACK_LIST_MAX];
+} __packed;
+
+/* struct uefi_cnv_var_splc - SPLC tabled as defined in UEFI
+ * @revision: the revision of the table
+ * @default_pwr_limit: The default maximum power per device
+ */
+struct uefi_cnv_var_splc {
+ u8 revision;
+ u32 default_pwr_limit;
+} __packed;
+
+#define UEFI_MCC_CHINA 0x434e
+
+/* struct uefi_cnv_var_wrdd - WRDD table as defined in UEFI
+ * @revision: the revision of the table
+ * @mcc: country identifier as defined in ISO/IEC 3166-1 Alpha 2 code
+ */
+struct uefi_cnv_var_wrdd {
+ u8 revision;
+ u32 mcc;
+} __packed;
+
+/* struct uefi_cnv_var_eckv - ECKV table as defined in UEFI
+ * @revision: the revision of the table
+ * @ext_clock_valid: indicates if external 32KHz clock is valid
+ */
+struct uefi_cnv_var_eckv {
+ u8 revision;
+ u32 ext_clock_valid;
+} __packed;
+
+#define UEFI_MAX_DSM_FUNCS 32
+
+/* struct uefi_cnv_var_general_cfg - DSM-like table as defined in UEFI
+ * @revision: the revision of the table
+ * @functions: payload of the different DSM functions
+ */
+struct uefi_cnv_var_general_cfg {
+ u8 revision;
+ u32 functions[UEFI_MAX_DSM_FUNCS];
+} __packed;
+
+/*
* This is known to be broken on v4.19 and to work on v5.4. Until we
* figure out why this is the case and how to make it work, simply
* disable the feature in old kernels.
@@ -55,6 +192,21 @@ int iwl_uefi_reduce_power_parse(struct iwl_trans *trans,
void iwl_uefi_get_step_table(struct iwl_trans *trans);
int iwl_uefi_handle_tlv_mem_desc(struct iwl_trans *trans, const u8 *data,
u32 tlv_len, struct iwl_pnvm_image *pnvm_data);
+int iwl_uefi_get_wrds_table(struct iwl_fw_runtime *fwrt);
+int iwl_uefi_get_ewrd_table(struct iwl_fw_runtime *fwrt);
+int iwl_uefi_get_wgds_table(struct iwl_fw_runtime *fwrt);
+int iwl_uefi_get_ppag_table(struct iwl_fw_runtime *fwrt);
+int iwl_uefi_get_tas_table(struct iwl_fw_runtime *fwrt,
+ struct iwl_tas_data *data);
+int iwl_uefi_get_pwr_limit(struct iwl_fw_runtime *fwrt,
+ u64 *dflt_pwr_limit);
+int iwl_uefi_get_mcc(struct iwl_fw_runtime *fwrt, char *mcc);
+int iwl_uefi_get_eckv(struct iwl_fw_runtime *fwrt, u32 *extl_clk);
+int iwl_uefi_get_dsm(struct iwl_fw_runtime *fwrt, enum iwl_dsm_funcs func,
+ u32 *value);
+void iwl_uefi_get_sgom_table(struct iwl_trans *trans, struct iwl_fw_runtime *fwrt);
+int iwl_uefi_get_uats_table(struct iwl_trans *trans,
+ struct iwl_fw_runtime *fwrt);
#else /* CONFIG_EFI */
static inline void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
{
@@ -85,13 +237,56 @@ iwl_uefi_handle_tlv_mem_desc(struct iwl_trans *trans, const u8 *data,
{
return 0;
}
-#endif /* CONFIG_EFI */
-#if defined(CONFIG_EFI) && defined(CONFIG_ACPI)
-void iwl_uefi_get_sgom_table(struct iwl_trans *trans, struct iwl_fw_runtime *fwrt);
-int iwl_uefi_get_uats_table(struct iwl_trans *trans,
- struct iwl_fw_runtime *fwrt);
-#else
+static inline int iwl_uefi_get_wrds_table(struct iwl_fw_runtime *fwrt)
+{
+ return -ENOENT;
+}
+
+static inline int iwl_uefi_get_ewrd_table(struct iwl_fw_runtime *fwrt)
+{
+ return -ENOENT;
+}
+
+static inline int iwl_uefi_get_wgds_table(struct iwl_fw_runtime *fwrt)
+{
+ return -ENOENT;
+}
+
+static inline int iwl_uefi_get_ppag_table(struct iwl_fw_runtime *fwrt)
+{
+ return -ENOENT;
+}
+
+static inline int iwl_uefi_get_tas_table(struct iwl_fw_runtime *fwrt,
+ struct iwl_tas_data *data)
+{
+ return -ENOENT;
+}
+
+static inline int iwl_uefi_get_pwr_limit(struct iwl_fw_runtime *fwrt,
+ u64 *dflt_pwr_limit)
+{
+ *dflt_pwr_limit = 0;
+ return 0;
+}
+
+static inline int iwl_uefi_get_mcc(struct iwl_fw_runtime *fwrt, char *mcc)
+{
+ return -ENOENT;
+}
+
+static inline int iwl_uefi_get_eckv(struct iwl_fw_runtime *fwrt, u32 *extl_clk)
+{
+ return -ENOENT;
+}
+
+static inline int iwl_uefi_get_dsm(struct iwl_fw_runtime *fwrt,
+ enum iwl_dsm_funcs func, u32 *value)
+{
+ return -ENOENT;
+}
+
static inline
void iwl_uefi_get_sgom_table(struct iwl_trans *trans, struct iwl_fw_runtime *fwrt)
{
@@ -103,6 +298,5 @@ int iwl_uefi_get_uats_table(struct iwl_trans *trans,
{
return 0;
}
-
-#endif
+#endif /* CONFIG_EFI */
#endif /* __iwl_fw_uefi__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index ae6f1cd4d660..6aa4f7f9c708 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -2,7 +2,7 @@
/*
* Copyright (C) 2005-2014, 2018-2021 Intel Corporation
* Copyright (C) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#ifndef __IWL_CONFIG_H__
#define __IWL_CONFIG_H__
@@ -12,6 +12,7 @@
#include <linux/ieee80211.h>
#include <linux/nl80211.h>
#include "iwl-csr.h"
+#include "iwl-drv.h"
enum iwl_device_family {
IWL_DEVICE_FAMILY_UNDEFINED,
@@ -418,6 +419,8 @@ struct iwl_cfg {
#define IWL_CFG_MAC_TYPE_BZ 0x46
#define IWL_CFG_MAC_TYPE_GL 0x47
#define IWL_CFG_MAC_TYPE_SC 0x48
+#define IWL_CFG_MAC_TYPE_SC2 0x49
+#define IWL_CFG_MAC_TYPE_SC2F 0x4A
#define IWL_CFG_RF_TYPE_TH 0x105
#define IWL_CFG_RF_TYPE_TH1 0x108
@@ -442,6 +445,9 @@ struct iwl_cfg {
#define IWL_CFG_NO_160 0x1
#define IWL_CFG_160 0x0
+#define IWL_CFG_NO_320 0x1
+#define IWL_CFG_320 0x0
+
#define IWL_CFG_CORES_BT 0x0
#define IWL_CFG_CORES_BT_GNSS 0x5
@@ -471,6 +477,15 @@ struct iwl_dev_info {
const char *name;
};
+#if IS_ENABLED(CONFIG_IWLWIFI_KUNIT_TESTS)
+extern const struct iwl_dev_info iwl_dev_info_table[];
+extern const unsigned int iwl_dev_info_table_size;
+const struct iwl_dev_info *
+iwl_pci_find_dev_info(u16 device, u16 subsystem_device,
+ u16 mac_type, u8 mac_step, u16 rf_type, u8 cdb,
+ u8 jacket, u8 rf_id, u8 no_160, u8 cores, u8 rf_step);
+#endif
+
/*
* This list declares the config structures for all devices.
*/
@@ -526,7 +541,10 @@ extern const char iwl_ax221_name[];
extern const char iwl_ax231_name[];
extern const char iwl_ax411_name[];
extern const char iwl_bz_name[];
+extern const char iwl_mtp_name[];
extern const char iwl_sc_name[];
+extern const char iwl_sc2_name[];
+extern const char iwl_sc2f_name[];
#if IS_ENABLED(CONFIG_IWLDVM)
extern const struct iwl_cfg iwl5300_agn_cfg;
extern const struct iwl_cfg iwl5100_agn_cfg;
@@ -632,6 +650,8 @@ extern const struct iwl_cfg iwl_cfg_bz;
extern const struct iwl_cfg iwl_cfg_gl;
extern const struct iwl_cfg iwl_cfg_sc;
+extern const struct iwl_cfg iwl_cfg_sc2;
+extern const struct iwl_cfg iwl_cfg_sc2f;
#endif /* CONFIG_IWLMVM */
#endif /* __IWL_CONFIG_H__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 3b14f6476743..561d0c261123 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/firmware.h>
#include "iwl-drv.h"
@@ -64,21 +64,22 @@ dbg_ver_table[IWL_DBG_TLV_TYPE_NUM] = {
[IWL_DBG_TLV_TYPE_CONF_SET] = {.min_ver = 1, .max_ver = 1,},
};
-static int iwl_dbg_tlv_add(const struct iwl_ucode_tlv *tlv,
- struct list_head *list)
+/* add a new TLV node, returning it so it can be modified */
+static struct iwl_ucode_tlv *iwl_dbg_tlv_add(const struct iwl_ucode_tlv *tlv,
+ struct list_head *list)
{
u32 len = le32_to_cpu(tlv->length);
struct iwl_dbg_tlv_node *node;
- node = kzalloc(sizeof(*node) + len, GFP_KERNEL);
+ node = kzalloc(struct_size(node, tlv.data, len), GFP_KERNEL);
if (!node)
- return -ENOMEM;
+ return NULL;
memcpy(&node->tlv, tlv, sizeof(node->tlv));
memcpy(node->tlv.data, tlv->data, len);
list_add_tail(&node->list, list);
- return 0;
+ return &node->tlv;
}
static bool iwl_dbg_tlv_ver_support(const struct iwl_ucode_tlv *tlv)
@@ -103,10 +104,18 @@ static int iwl_dbg_tlv_alloc_debug_info(struct iwl_trans *trans,
if (le32_to_cpu(tlv->length) != sizeof(*debug_info))
return -EINVAL;
+ /* we use this as a string, ensure input was NUL terminated */
+ if (strnlen(debug_info->debug_cfg_name,
+ sizeof(debug_info->debug_cfg_name)) ==
+ sizeof(debug_info->debug_cfg_name))
+ return -EINVAL;
+
IWL_DEBUG_FW(trans, "WRT: Loading debug cfg: %s\n",
debug_info->debug_cfg_name);
- return iwl_dbg_tlv_add(tlv, &trans->dbg.debug_info_tlv_list);
+ if (!iwl_dbg_tlv_add(tlv, &trans->dbg.debug_info_tlv_list))
+ return -ENOMEM;
+ return 0;
}
static int iwl_dbg_tlv_alloc_buf_alloc(struct iwl_trans *trans,
@@ -175,7 +184,9 @@ static int iwl_dbg_tlv_alloc_hcmd(struct iwl_trans *trans,
return -EINVAL;
}
- return iwl_dbg_tlv_add(tlv, &trans->dbg.time_point[tp].hcmd_list);
+ if (!iwl_dbg_tlv_add(tlv, &trans->dbg.time_point[tp].hcmd_list))
+ return -ENOMEM;
+ return 0;
}
static int iwl_dbg_tlv_alloc_region(struct iwl_trans *trans,
@@ -246,11 +257,9 @@ static int iwl_dbg_tlv_alloc_trigger(struct iwl_trans *trans,
const struct iwl_ucode_tlv *tlv)
{
const struct iwl_fw_ini_trigger_tlv *trig = (const void *)tlv->data;
- struct iwl_fw_ini_trigger_tlv *dup_trig;
u32 tp = le32_to_cpu(trig->time_point);
u32 rf = le32_to_cpu(trig->reset_fw);
- struct iwl_ucode_tlv *dup = NULL;
- int ret;
+ struct iwl_ucode_tlv *new_tlv;
if (le32_to_cpu(tlv->length) < sizeof(*trig))
return -EINVAL;
@@ -267,20 +276,18 @@ static int iwl_dbg_tlv_alloc_trigger(struct iwl_trans *trans,
"WRT: time point %u for trigger TLV with reset_fw %u\n",
tp, rf);
trans->dbg.last_tp_resetfw = 0xFF;
+
+ new_tlv = iwl_dbg_tlv_add(tlv, &trans->dbg.time_point[tp].trig_list);
+ if (!new_tlv)
+ return -ENOMEM;
+
if (!le32_to_cpu(trig->occurrences)) {
- dup = kmemdup(tlv, sizeof(*tlv) + le32_to_cpu(tlv->length),
- GFP_KERNEL);
- if (!dup)
- return -ENOMEM;
- dup_trig = (void *)dup->data;
- dup_trig->occurrences = cpu_to_le32(-1);
- tlv = dup;
- }
+ struct iwl_fw_ini_trigger_tlv *new_trig = (void *)new_tlv->data;
- ret = iwl_dbg_tlv_add(tlv, &trans->dbg.time_point[tp].trig_list);
- kfree(dup);
+ new_trig->occurrences = cpu_to_le32(-1);
+ }
- return ret;
+ return 0;
}
static int iwl_dbg_tlv_config_set(struct iwl_trans *trans,
@@ -304,7 +311,9 @@ static int iwl_dbg_tlv_config_set(struct iwl_trans *trans,
return -EINVAL;
}
- return iwl_dbg_tlv_add(tlv, &trans->dbg.time_point[tp].config_list);
+ if (!iwl_dbg_tlv_add(tlv, &trans->dbg.time_point[tp].config_list))
+ return -ENOMEM;
+ return 0;
}
static int (*dbg_tlv_alloc[])(struct iwl_trans *trans,
@@ -1096,7 +1105,7 @@ static int iwl_dbg_tlv_override_trig_node(struct iwl_fw_runtime *fwrt,
node_trig = (void *)node_tlv->data;
}
- memcpy(node_trig->data + offset, trig->data, trig_data_len);
+ memcpy((u8 *)node_trig->data + offset, trig->data, trig_data_len);
node_tlv->length = cpu_to_le32(size);
if (policy & IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG) {
@@ -1148,7 +1157,9 @@ iwl_dbg_tlv_add_active_trigger(struct iwl_fw_runtime *fwrt,
if (!match) {
IWL_DEBUG_FW(fwrt, "WRT: Enabling trigger (time point %u)\n",
le32_to_cpu(trig->time_point));
- return iwl_dbg_tlv_add(trig_tlv, trig_list);
+ if (!iwl_dbg_tlv_add(trig_tlv, trig_list))
+ return -ENOMEM;
+ return 0;
}
return iwl_dbg_tlv_override_trig_node(fwrt, trig_tlv, match);
@@ -1234,7 +1245,7 @@ iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt, bool sync,
}
}
- fwrt->trans->dbg.restart_required = FALSE;
+ fwrt->trans->dbg.restart_required = false;
IWL_DEBUG_FW(fwrt, "WRT: tp %d, reset_fw %d\n",
tp, dump_data.trig->reset_fw);
IWL_DEBUG_FW(fwrt,
@@ -1244,22 +1255,22 @@ iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt, bool sync,
if (fwrt->trans->trans_cfg->device_family ==
IWL_DEVICE_FAMILY_9000) {
- fwrt->trans->dbg.restart_required = TRUE;
+ fwrt->trans->dbg.restart_required = true;
} else if (tp == IWL_FW_INI_TIME_POINT_FW_ASSERT &&
fwrt->trans->dbg.last_tp_resetfw ==
IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY) {
- fwrt->trans->dbg.restart_required = FALSE;
+ fwrt->trans->dbg.restart_required = false;
fwrt->trans->dbg.last_tp_resetfw = 0xFF;
IWL_DEBUG_FW(fwrt, "WRT: FW_ASSERT due to reset_fw_mode-no restart\n");
} else if (le32_to_cpu(dump_data.trig->reset_fw) ==
IWL_FW_INI_RESET_FW_MODE_STOP_AND_RELOAD_FW) {
IWL_DEBUG_FW(fwrt, "WRT: stop and reload firmware\n");
- fwrt->trans->dbg.restart_required = TRUE;
+ fwrt->trans->dbg.restart_required = true;
} else if (le32_to_cpu(dump_data.trig->reset_fw) ==
IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY) {
IWL_DEBUG_FW(fwrt,
"WRT: stop only and no reload firmware\n");
- fwrt->trans->dbg.restart_required = FALSE;
+ fwrt->trans->dbg.restart_required = false;
fwrt->trans->dbg.last_tp_resetfw =
le32_to_cpu(dump_data.trig->reset_fw);
} else if (le32_to_cpu(dump_data.trig->reset_fw) ==
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index ffe2670720c9..4696d73c8971 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2005-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -128,6 +128,7 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv)
kfree(drv->fw.ucode_capa.cmd_versions);
kfree(drv->fw.phy_integration_ver);
kfree(drv->trans->dbg.pc_data);
+ drv->trans->dbg.pc_data = NULL;
for (i = 0; i < IWL_UCODE_TYPE_MAX; i++)
iwl_free_fw_img(drv, drv->fw.img + i);
@@ -186,6 +187,7 @@ const char *iwl_drv_get_fwname_pre(struct iwl_trans *trans, char *buf)
case IWL_CFG_RF_TYPE_HR1:
case IWL_CFG_RF_TYPE_HR2:
rf = "hr";
+ rf_step = 'b';
break;
case IWL_CFG_RF_TYPE_GF:
rf = "gf";
@@ -1423,35 +1425,25 @@ _iwl_op_mode_start(struct iwl_drv *drv, struct iwlwifi_opmode_table *op)
const struct iwl_op_mode_ops *ops = op->ops;
struct dentry *dbgfs_dir = NULL;
struct iwl_op_mode *op_mode = NULL;
- int retry, max_retry = !!iwlwifi_mod_params.fw_restart * IWL_MAX_INIT_RETRY;
/* also protects start/stop from racing against each other */
lockdep_assert_held(&iwlwifi_opmode_table_mtx);
- for (retry = 0; retry <= max_retry; retry++) {
-
#ifdef CONFIG_IWLWIFI_DEBUGFS
- drv->dbgfs_op_mode = debugfs_create_dir(op->name,
- drv->dbgfs_drv);
- dbgfs_dir = drv->dbgfs_op_mode;
+ drv->dbgfs_op_mode = debugfs_create_dir(op->name,
+ drv->dbgfs_drv);
+ dbgfs_dir = drv->dbgfs_op_mode;
#endif
- op_mode = ops->start(drv->trans, drv->trans->cfg,
- &drv->fw, dbgfs_dir);
-
- if (op_mode)
- return op_mode;
-
- if (test_bit(STATUS_TRANS_DEAD, &drv->trans->status))
- break;
-
- IWL_ERR(drv, "retry init count %d\n", retry);
+ op_mode = ops->start(drv->trans, drv->trans->cfg,
+ &drv->fw, dbgfs_dir);
+ if (op_mode)
+ return op_mode;
#ifdef CONFIG_IWLWIFI_DEBUGFS
- debugfs_remove_recursive(drv->dbgfs_op_mode);
- drv->dbgfs_op_mode = NULL;
+ debugfs_remove_recursive(drv->dbgfs_op_mode);
+ drv->dbgfs_op_mode = NULL;
#endif
- }
return NULL;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h
index 3d1a27ba35c6..1549ff429549 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h
@@ -6,6 +6,7 @@
#ifndef __iwl_drv_h__
#define __iwl_drv_h__
#include <linux/export.h>
+#include <kunit/visibility.h>
/* for all modules */
#define DRV_NAME "iwlwifi"
@@ -89,8 +90,13 @@ void iwl_drv_stop(struct iwl_drv *drv);
#define IWL_EXPORT_SYMBOL(sym)
#endif
-/* max retry for init flow */
-#define IWL_MAX_INIT_RETRY 2
+#if IS_ENABLED(CONFIG_IWLWIFI_KUNIT_TESTS)
+#define EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym)
+#define VISIBLE_IF_IWLWIFI_KUNIT
+#else
+#define EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(sym)
+#define VISIBLE_IF_IWLWIFI_KUNIT static
+#endif
#define FW_NAME_PRE_BUFSIZE 64
struct iwl_trans;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c
index 5aab64c63a13..2b290fab1ef2 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c
@@ -270,7 +270,7 @@ enum iwl_eeprom_enhanced_txpwr_flags {
};
/**
- * struct iwl_eeprom_enhanced_txpwr
+ * struct iwl_eeprom_enhanced_txpwr - enhanced regulatory TX power limits
* @flags: entry flags
* @channel: channel number
* @chain_a_max: chain a max power in 1/2 dBm
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
index e0400ba2ab74..6ba374efaacb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2005-2014, 2018-2021, 2023 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2021, 2023-2024 Intel Corporation
* Copyright (C) 2015-2017 Intel Deutschland GmbH
*/
#ifndef __iwl_fh_h__
@@ -570,18 +570,19 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans,
/**
* struct iwl_rb_status - reserve buffer status
* host memory mapped FH registers
- * @closed_rb_num [0:11] - Indicates the index of the RB which was closed
- * @closed_fr_num [0:11] - Indicates the index of the RX Frame which was closed
- * @finished_rb_num [0:11] - Indicates the index of the current RB
+ * @closed_rb_num: [0:11] Indicates the index of the RB which was closed
+ * @closed_fr_num: [0:11] Indicates the index of the RX Frame which was closed
+ * @finished_rb_num: [0:11] Indicates the index of the current RB
* in which the last frame was written to
- * @finished_fr_num [0:11] - Indicates the index of the RX Frame
+ * @finished_fr_num: [0:11] Indicates the index of the RX Frame
* which was transferred
+ * @__spare: reserved
*/
struct iwl_rb_status {
__le16 closed_rb_num;
__le16 closed_fr_num;
__le16 finished_rb_num;
- __le16 finished_fr_nam;
+ __le16 finished_fr_num;
__le32 __spare;
} __packed;
@@ -651,15 +652,15 @@ struct iwl_tfd_tb {
*
* This structure contains dma address and length of transmission address
*
- * @tb_len length of the tx buffer
- * @addr 64 bits dma address
+ * @tb_len: length of the tx buffer
+ * @addr: 64 bits dma address
*/
struct iwl_tfh_tb {
__le16 tb_len;
__le64 addr;
} __packed;
-/**
+/*
* Each Tx queue uses a circular buffer of 256 TFDs stored in host DRAM.
* Both driver and device share these circular buffers, each of which must be
* contiguous 256 TFDs.
@@ -698,10 +699,11 @@ struct iwl_tfd {
/**
* struct iwl_tfh_tfd - Transmit Frame Descriptor (TFD)
- * @ num_tbs 0-4 number of active tbs
- * 5 -15 reserved
- * @ tbs[25] transmit frame buffer descriptors
- * @ __pad padding
+ * @num_tbs:
+ * 0-4 number of active tbs
+ * 5-15 reserved
+ * @tbs: transmit frame buffer descriptors
+ * @__pad: padding
*/
struct iwl_tfh_tfd {
__le16 num_tbs;
@@ -718,10 +720,12 @@ struct iwl_tfh_tfd {
* struct iwlagn_schedq_bc_tbl scheduler byte count table
* base physical address provided by SCD_DRAM_BASE_ADDR
* For devices up to 22000:
- * @tfd_offset 0-12 - tx command byte count
+ * @tfd_offset:
+ * For devices up to 22000:
+ * 0-12 - tx command byte count
* 12-16 - station index
- * For 22000:
- * @tfd_offset 0-12 - tx command byte count
+ * For 22000:
+ * 0-12 - tx command byte count
* 12-13 - number of 64 byte chunks
* 14-16 - reserved
*/
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index 402896988686..baa39a18087a 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -156,6 +156,8 @@ static struct ieee80211_rate iwl_cfg80211_rates[] = {
* @NVM_CHANNEL_80MHZ: 80 MHz channel okay
* @NVM_CHANNEL_160MHZ: 160 MHz channel okay
* @NVM_CHANNEL_DC_HIGH: DC HIGH required/allowed (?)
+ * @NVM_CHANNEL_VLP: client support connection to UHB VLP AP
+ * @NVM_CHANNEL_AFC: client support connection to UHB AFC AP
*/
enum iwl_nvm_channel_flags {
NVM_CHANNEL_VALID = BIT(0),
@@ -170,6 +172,8 @@ enum iwl_nvm_channel_flags {
NVM_CHANNEL_80MHZ = BIT(10),
NVM_CHANNEL_160MHZ = BIT(11),
NVM_CHANNEL_DC_HIGH = BIT(12),
+ NVM_CHANNEL_VLP = BIT(13),
+ NVM_CHANNEL_AFC = BIT(14),
};
/**
@@ -309,7 +313,7 @@ static inline void iwl_nvm_print_channel_flags(struct device *dev, u32 level,
/* Note: already can print up to 101 characters, 110 is the limit! */
IWL_DEBUG_DEV(dev, level,
- "Ch. %d: 0x%x:%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ "Ch. %d: 0x%x:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
chan, flags,
CHECK_AND_PRINT_I(VALID),
CHECK_AND_PRINT_I(IBSS),
@@ -322,7 +326,9 @@ static inline void iwl_nvm_print_channel_flags(struct device *dev, u32 level,
CHECK_AND_PRINT_I(40MHZ),
CHECK_AND_PRINT_I(80MHZ),
CHECK_AND_PRINT_I(160MHZ),
- CHECK_AND_PRINT_I(DC_HIGH));
+ CHECK_AND_PRINT_I(DC_HIGH),
+ CHECK_AND_PRINT_I(VLP),
+ CHECK_AND_PRINT_I(AFC));
#undef CHECK_AND_PRINT_I
}
@@ -366,6 +372,12 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, enum nl80211_band band,
(flags & IEEE80211_CHAN_NO_IR))
flags |= IEEE80211_CHAN_IR_CONCURRENT;
+ /* Set the AP type for the UHB case. */
+ if (!(nvm_flags & NVM_CHANNEL_VLP))
+ flags |= IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT;
+ if (!(nvm_flags & NVM_CHANNEL_AFC))
+ flags |= IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT;
+
return flags;
}
@@ -668,7 +680,6 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = {
.has_eht = true,
.eht_cap_elem = {
.mac_cap_info[0] =
- IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS |
IEEE80211_EHT_MAC_CAP0_OM_CONTROL |
IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 |
IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 |
@@ -696,10 +707,11 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = {
IEEE80211_EHT_PHY_CAP4_POWER_BOOST_FACT_SUPP |
IEEE80211_EHT_PHY_CAP4_EHT_MU_PPDU_4_EHT_LTF_08_GI,
.phy_cap_info[5] =
+ FIELD_PREP_CONST(IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_MASK,
+ IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_16US) |
IEEE80211_EHT_PHY_CAP5_NON_TRIG_CQI_FEEDBACK |
IEEE80211_EHT_PHY_CAP5_TX_LESS_242_TONE_RU_SUPP |
- IEEE80211_EHT_PHY_CAP5_RX_LESS_242_TONE_RU_SUPP |
- IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT,
+ IEEE80211_EHT_PHY_CAP5_RX_LESS_242_TONE_RU_SUPP,
.phy_cap_info[6] =
IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK |
IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP,
@@ -733,6 +745,9 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = {
/*
* PPE thresholds for NSS = 2, and RU index bitmap set
* to 0xc.
+ * Note: just for stating what we want, not present in
+ * the transmitted data due to not including
+ * IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT.
*/
.eht_ppe_thres = {0xc1, 0x0e, 0xe0 }
},
@@ -745,7 +760,6 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = {
.mac_cap_info[0] =
IEEE80211_HE_MAC_CAP0_HTC_HE,
.mac_cap_info[1] =
- IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US |
IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8,
.mac_cap_info[3] =
IEEE80211_HE_MAC_CAP3_OMI_CONTROL,
@@ -793,7 +807,6 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = {
.has_eht = true,
.eht_cap_elem = {
.mac_cap_info[0] =
- IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS |
IEEE80211_EHT_MAC_CAP0_OM_CONTROL |
IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 |
IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2,
@@ -801,7 +814,8 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = {
IEEE80211_EHT_PHY_CAP0_242_TONE_RU_GT20MHZ |
IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI,
.phy_cap_info[5] =
- IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT,
+ FIELD_PREP_CONST(IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_MASK,
+ IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_16US),
},
/* For all MCS and bandwidth, set 2 NSS for both Tx and
@@ -829,6 +843,9 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = {
/*
* PPE thresholds for NSS = 2, and RU index bitmap set
* to 0xc.
+ * Note: just for stating what we want, not present in
+ * the transmitted data due to not including
+ * IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT.
*/
.eht_ppe_thres = {0xc1, 0x0e, 0xe0 }
},
@@ -892,8 +909,9 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans,
bool is_ap = iftype_data->types_mask & BIT(NL80211_IFTYPE_AP);
bool no_320;
- no_320 = !trans->trans_cfg->integrated &&
- trans->pcie_link_speed < PCI_EXP_LNKSTA_CLS_8_0GB;
+ no_320 = (!trans->trans_cfg->integrated &&
+ trans->pcie_link_speed < PCI_EXP_LNKSTA_CLS_8_0GB) ||
+ trans->reduced_cap_sku;
if (!data->sku_cap_11be_enable || iwlwifi_mod_params.disable_11be)
iftype_data->eht_cap.has_eht = false;
@@ -1020,8 +1038,7 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans,
if (CSR_HW_REV_TYPE(trans->hw_rev) == IWL_CFG_MAC_TYPE_GL &&
iftype_data->eht_cap.has_eht) {
iftype_data->eht_cap.eht_cap_elem.mac_cap_info[0] &=
- ~(IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS |
- IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 |
+ ~(IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 |
IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2);
iftype_data->eht_cap.eht_cap_elem.phy_cap_info[3] &=
~(IEEE80211_EHT_PHY_CAP0_PARTIAL_BW_UL_MU_MIMO |
@@ -1059,6 +1076,26 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans,
iftype_data->he_cap.he_cap_elem.phy_cap_info[7] &=
~IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ;
}
+
+ if (trans->step_urm) {
+ iftype_data->eht_cap.eht_mcs_nss_supp.bw._320.rx_tx_mcs11_max_nss = 0;
+ iftype_data->eht_cap.eht_mcs_nss_supp.bw._320.rx_tx_mcs13_max_nss = 0;
+ }
+
+ if (trans->no_160)
+ iftype_data->he_cap.he_cap_elem.phy_cap_info[0] &=
+ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+
+ if (trans->reduced_cap_sku) {
+ memset(&iftype_data->eht_cap.eht_mcs_nss_supp.bw._320, 0,
+ sizeof(iftype_data->eht_cap.eht_mcs_nss_supp.bw._320));
+ iftype_data->eht_cap.eht_mcs_nss_supp.bw._80.rx_tx_mcs13_max_nss = 0;
+ iftype_data->eht_cap.eht_mcs_nss_supp.bw._160.rx_tx_mcs13_max_nss = 0;
+ iftype_data->eht_cap.eht_cap_elem.phy_cap_info[8] &=
+ ~IEEE80211_EHT_PHY_CAP8_RX_4096QAM_WIDER_BW_DL_OFDMA;
+ iftype_data->eht_cap.eht_cap_elem.phy_cap_info[2] &=
+ ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK;
+ }
}
static void iwl_init_he_hw_capab(struct iwl_trans *trans,
@@ -1575,7 +1612,8 @@ IWL_EXPORT_SYMBOL(iwl_parse_nvm_data);
static u32 iwl_nvm_get_regdom_bw_flags(const u16 *nvm_chan,
int ch_idx, u16 nvm_flags,
struct iwl_reg_capa reg_capa,
- const struct iwl_cfg *cfg)
+ const struct iwl_cfg *cfg,
+ bool uats_enabled)
{
u32 flags = NL80211_RRF_NO_HT40;
@@ -1620,6 +1658,16 @@ static u32 iwl_nvm_get_regdom_bw_flags(const u16 *nvm_chan,
flags &= ~NL80211_RRF_NO_IR;
}
}
+
+ /* Set the AP type for the UHB case. */
+ if (uats_enabled) {
+ if (!(nvm_flags & NVM_CHANNEL_VLP))
+ flags |= NL80211_RRF_NO_6GHZ_VLP_CLIENT;
+
+ if (!(nvm_flags & NVM_CHANNEL_AFC))
+ flags |= NL80211_RRF_NO_6GHZ_AFC_CLIENT;
+ }
+
/*
* reg_capa is per regulatory domain so apply it for every channel
*/
@@ -1674,7 +1722,7 @@ static struct iwl_reg_capa iwl_get_reg_capa(u32 flags, u8 resp_ver)
struct ieee80211_regdomain *
iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
int num_of_ch, __le32 *channels, u16 fw_mcc,
- u16 geo_info, u32 cap, u8 resp_ver)
+ u16 geo_info, u32 cap, u8 resp_ver, bool uats_enabled)
{
int ch_idx;
u16 ch_flags;
@@ -1740,7 +1788,7 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
reg_rule_flags = iwl_nvm_get_regdom_bw_flags(nvm_chan, ch_idx,
ch_flags, reg_capa,
- cfg);
+ cfg, uats_enabled);
/* we can't continue the same rule */
if (ch_idx == 0 || prev_reg_rule_flags != reg_rule_flags ||
@@ -2100,7 +2148,7 @@ struct iwl_nvm_data *iwl_get_nvm(struct iwl_trans *trans,
!!(mac_flags & NVM_MAC_SKU_FLAGS_BAND_5_2_ENABLED);
nvm->sku_cap_mimo_disabled =
!!(mac_flags & NVM_MAC_SKU_FLAGS_MIMO_DISABLED);
- if (CSR_HW_RFID_TYPE(trans->hw_rf_id) == IWL_CFG_RF_TYPE_FM)
+ if (CSR_HW_RFID_TYPE(trans->hw_rf_id) >= IWL_CFG_RF_TYPE_FM)
nvm->sku_cap_11be_enable = true;
/* Initialize PHY sku data */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
index 651ed25b683b..fd9c3bed9407 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
@@ -50,7 +50,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg,
struct ieee80211_regdomain *
iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
int num_of_ch, __le32 *channels, u16 fw_mcc,
- u16 geo_info, u32 cap, u8 resp_ver);
+ u16 geo_info, u32 cap, u8 resp_ver, bool uats_enabled);
/**
* struct iwl_nvm_section - describes an NVM section in memory.
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
index 3dc618a7c70f..1ca82f3e4ebf 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
@@ -68,9 +68,11 @@ struct iwl_cfg;
* Must be atomic and called with BH disabled.
* @queue_not_full: notifies that a HW queue is not full any more.
* Must be atomic and called with BH disabled.
- * @hw_rf_kill:notifies of a change in the HW rf kill switch. True means that
+ * @hw_rf_kill: notifies of a change in the HW rf kill switch. True means that
* the radio is killed. Return %true if the device should be stopped by
* the transport immediately after the call. May sleep.
+ * Note that this must not return %true for newer devices using gen2 PCIe
+ * transport.
* @free_skb: allows the transport layer to free skbs that haven't been
* reclaimed by the op_mode. This can happen when the driver is freed and
* there are Tx packets pending in the transport layer.
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
index dd32c287b983..a7d44df06eab 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
@@ -368,12 +368,19 @@ enum {
WFPM_AUX_CTL_AUX_IF_MAC_OWNER_MSK = 0x80000000,
};
-#define CNVI_AUX_MISC_CHIP 0xA200B0
+#define CNVI_AUX_MISC_CHIP 0xA200B0
+#define CNVI_AUX_MISC_CHIP_MAC_STEP(_val) (((_val) & 0xf000000) >> 24)
+#define CNVI_AUX_MISC_CHIP_PROD_TYPE(_val) ((_val) & 0xfff)
+#define CNVI_AUX_MISC_CHIP_PROD_TYPE_BZ_U 0x930
+
#define CNVR_AUX_MISC_CHIP 0xA2B800
#define CNVR_SCU_SD_REGS_SD_REG_DIG_DCDC_VTRIM 0xA29890
#define CNVR_SCU_SD_REGS_SD_REG_ACTIVE_VDIG_MIRROR 0xA29938
#define CNVI_SCU_SEQ_DATA_DW9 0xA27488
+#define CNVI_PMU_STEP_FLOW 0xA2D588
+#define CNVI_PMU_STEP_FLOW_FORCE_URM BIT(2)
+
#define PREG_AUX_BUS_WPROT_0 0xA04CC0
/* device family 9000 WPROT register */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index 5789a8735976..b93cef7b2330 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -519,6 +519,7 @@ struct iwl_pnvm_image {
* Must be atomic
* @reclaim: free packet until ssn. Returns a list of freed packets.
* Must be atomic
+ * @set_q_ptrs: set queue pointers internally, after D3 when HW state changed
* @txq_enable: setup a queue. To setup an AC queue, use the
* iwl_trans_ac_txq_enable wrapper. fw_alive must have been called before
* this one. The op_mode must not configure the HCMD queue. The scheduler
@@ -528,6 +529,8 @@ struct iwl_pnvm_image {
* hardware scheduler bug. May sleep.
* @txq_disable: de-configure a Tx queue to send AMPDUs
* Must be atomic
+ * @txq_alloc: Allocate a new TX queue, may sleep.
+ * @txq_free: Free a previously allocated TX queue.
* @txq_set_shared_mode: change Tx queue shared/unshared marking
* @wait_tx_queues_empty: wait until tx queues are empty. May sleep.
* @wait_txq_empty: wait until specific tx queue is empty. May sleep.
@@ -547,23 +550,27 @@ struct iwl_pnvm_image {
* the op_mode. May be called several times before start_fw, can't be
* called after that.
* @set_pmi: set the power pmi state
+ * @sw_reset: trigger software reset of the NIC
* @grab_nic_access: wake the NIC to be able to access non-HBUS regs.
* Sleeping is not allowed between grab_nic_access and
* release_nic_access.
* @release_nic_access: let the NIC go to sleep. The "flags" parameter
* must be the same one that was sent before to the grab_nic_access.
- * @set_bits_mask - set SRAM register according to value and mask.
+ * @set_bits_mask: set SRAM register according to value and mask.
* @dump_data: return a vmalloc'ed buffer with debug data, maybe containing last
* TX'ed commands and similar. The buffer will be vfree'd by the caller.
* Note that the transport must fill in the proper file headers.
* @debugfs_cleanup: used in the driver unload flow to make a proper cleanup
* of the trans debugfs
+ * @sync_nmi: trigger a firmware NMI and wait for it to complete
* @load_pnvm: save the pnvm data in DRAM
* @set_pnvm: set the pnvm data in the prph scratch buffer, inside the
* context info.
* @load_reduce_power: copy reduce power table to the corresponding DRAM memory
* @set_reduce_power: set reduce power table addresses in the sratch buffer
* @interrupts: disable/enable interrupts to transport
+ * @imr_dma_data: set up IMR DMA
+ * @rxq_dma_data: retrieve RX queue DMA data, see @struct iwl_trans_rxq_dma_data
*/
struct iwl_trans_ops {
@@ -775,7 +782,7 @@ struct iwl_self_init_dram {
* @imr_size: imr dram size received from fw
* @sram_addr: sram address from debug tlv
* @sram_size: sram size from debug tlv
- * @imr2sram_remainbyte`: size remained after each dma transfer
+ * @imr2sram_remainbyte: size remained after each dma transfer
* @imr_curr_addr: current dst address used during dma transfer
* @imr_base_addr: imr address received from fw
*/
@@ -822,12 +829,16 @@ struct iwl_pc_data {
* @fw_mon: DRAM buffer for firmware monitor
* @hw_error: equals true if hw error interrupt was received from the FW
* @ini_dest: debug monitor destination uses &enum iwl_fw_ini_buffer_location
+ * @unsupported_region_msk: unsupported regions out of active_regions
* @active_regions: active regions
* @debug_info_tlv_list: list of debug info TLVs
* @time_point: array of debug time points
* @periodic_trig_list: periodic triggers list
* @domains_bitmap: bitmap of active domains other than &IWL_FW_INI_DOMAIN_ALWAYS_ON
* @ucode_preset: preset based on ucode
+ * @restart_required: indicates debug restart is required
+ * @last_tp_resetfw: last handling of reset during debug timepoint
+ * @imr_data: IMR debug data allocation
* @dump_file_name_ext: dump file name extension
* @dump_file_name_ext_valid: dump file name extension if valid or not
* @num_pc: number of program counter for cpu
@@ -930,6 +941,7 @@ struct iwl_pcie_first_tb_buf {
* @wd_timeout: queue watchdog timeout (jiffies) - per queue
* @frozen: tx stuck queue timer is frozen
* @frozen_expiry_remainder: remember how long until the timer fires
+ * @block: queue is blocked
* @bc_tbl: byte count table of the queue (relevant only for gen2 transport)
* @write_ptr: 1-st empty entry (index) host_w
* @read_ptr: last used entry (index) host_r
@@ -938,6 +950,8 @@ struct iwl_pcie_first_tb_buf {
* @id: queue id
* @low_mark: low watermark, resume queue if free space more than this
* @high_mark: high watermark, stop queue if free space less than this
+ * @overflow_q: overflow queue for handling frames that didn't fit on HW queue
+ * @overflow_tx: need to transmit from overflow
*
* A Tx queue consists of circular buffer of BDs (a.k.a. TFDs, transmit frame
* descriptors) and required locking structures.
@@ -990,10 +1004,19 @@ struct iwl_txq {
* @bc_table_dword: true if the BC table expects DWORD (as opposed to bytes)
* @page_offs: offset from skb->cb to mac header page pointer
* @dev_cmd_offs: offset from skb->cb to iwl_device_tx_cmd pointer
- * @queue_used - bit mask of used queues
- * @queue_stopped - bit mask of stopped queues
+ * @queue_used: bit mask of used queues
+ * @queue_stopped: bit mask of stopped queues
+ * @txq: array of TXQ data structures representing the TXQs
* @scd_bc_tbls: gen1 pointer to the byte count table of the scheduler
* @queue_alloc_cmd_ver: queue allocation command version
+ * @bc_pool: bytecount DMA allocations pool
+ * @bc_tbl_size: bytecount table size
+ * @tso_hdr_page: page allocated (per CPU) for A-MSDU headers when doing TSO
+ * (and similar usage)
+ * @tfd: TFD data
+ * @tfd.max_tbs: max number of buffers per TFD
+ * @tfd.size: TFD size
+ * @tfd.addr_size: TFD/TB address size
*/
struct iwl_trans_txqs {
unsigned long queue_used[BITS_TO_LONGS(IWL_MAX_TVQM_QUEUES)];
@@ -1026,27 +1049,35 @@ struct iwl_trans_txqs {
/**
* struct iwl_trans - transport common data
*
- * @csme_own - true if we couldn't get ownership on the device
- * @ops - pointer to iwl_trans_ops
- * @op_mode - pointer to the op_mode
+ * @csme_own: true if we couldn't get ownership on the device
+ * @ops: pointer to iwl_trans_ops
+ * @op_mode: pointer to the op_mode
* @trans_cfg: the trans-specific configuration part
- * @cfg - pointer to the configuration
- * @drv - pointer to iwl_drv
+ * @cfg: pointer to the configuration
+ * @drv: pointer to iwl_drv
+ * @state: current device state
* @status: a bit-mask of transport status flags
- * @dev - pointer to struct device * that represents the device
+ * @dev: pointer to struct device * that represents the device
* @max_skb_frags: maximum number of fragments an SKB can have when transmitted.
* 0 indicates that frag SKBs (NETIF_F_SG) aren't supported.
- * @hw_rf_id a u32 with the device RF ID
- * @hw_crf_id a u32 with the device CRF ID
- * @hw_wfpm_id a u32 with the device wfpm ID
+ * @hw_rf_id: a u32 with the device RF ID
+ * @hw_cnv_id: a u32 with the device CNV ID
+ * @hw_crf_id: a u32 with the device CRF ID
+ * @hw_wfpm_id: a u32 with the device wfpm ID
* @hw_id: a u32 with the ID of the device / sub-device.
* Set during transport allocation.
* @hw_id_str: a string with info about HW ID. Set during transport allocation.
+ * @sku_id: the SKU identifier (for PNVM matching)
+ * @pnvm_loaded: indicates PNVM was loaded
+ * @hw_rev: the revision data of the HW
* @hw_rev_step: The mac step of the HW
* @pm_support: set to true in start_hw if link pm is supported
* @ltr_enabled: set to true if the LTR is enabled
* @fail_to_parse_pnvm_image: set to true if pnvm parsing failed
+ * @reduce_power_loaded: indicates reduced power section was loaded
* @failed_to_load_reduce_power_image: set to true if pnvm loading failed
+ * @command_groups: pointer to command group name list array
+ * @command_groups_size: array size of @command_groups
* @wide_cmd_header: true when ucode supports wide command header format
* @wait_command_queue: wait queue for sync commands
* @num_rx_queues: number of RX queues allocated by the transport;
@@ -1055,19 +1086,29 @@ struct iwl_trans_txqs {
* @iml: a pointer to the image loader itself
* @dev_cmd_pool: pool for Tx cmd allocation - for internal use only.
* The user should use iwl_trans_{alloc,free}_tx_cmd.
+ * @dev_cmd_pool_name: name for the TX command allocation pool
+ * @dbgfs_dir: iwlwifi debugfs base dir for this device
+ * @sync_cmd_lockdep_map: lockdep map for checking sync commands
* @rx_mpdu_cmd: MPDU RX command ID, must be assigned by opmode before
* starting the firmware, used for tracing
* @rx_mpdu_cmd_hdr_size: used for tracing, amount of data before the
* start of the 802.11 header in the @rx_mpdu_cmd
+ * @dbg: additional debug data, see &struct iwl_trans_debug
+ * @init_dram: FW initialization DMA data
* @system_pm_mode: the system-wide power management mode in use.
* This mode is set dynamically, depending on the WoWLAN values
* configured from the userspace at runtime.
+ * @name: the device name
* @txqs: transport tx queues data.
* @mbx_addr_0_step: step address data 0
* @mbx_addr_1_step: step address data 1
* @pcie_link_speed: current PCIe link speed (%PCI_EXP_LNKSTA_CLS_*),
* only valid for discrete (not integrated) NICs
* @invalid_tx_cmd: invalid TX command buffer
+ * @reduced_cap_sku: reduced capability supported SKU
+ * @no_160: device not supporting 160 MHz
+ * @step_urm: STEP is in URM, no support for MCS>9 in 320 MHz
+ * @trans_specific: data for the specific transport this is allocated for/with
*/
struct iwl_trans {
bool csme_own;
@@ -1090,6 +1131,8 @@ struct iwl_trans {
u32 hw_id;
char hw_id_str[52];
u32 sku_id[3];
+ bool reduced_cap_sku;
+ u8 no_160:1, step_urm:1;
u8 rx_mpdu_cmd, rx_mpdu_cmd_hdr_size;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/coex.c b/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
index 9fe1761691ec..535edb51d1c0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
@@ -181,6 +181,9 @@ static int iwl_mvm_bt_coex_reduced_txp(struct iwl_mvm *mvm, u8 sta_id,
struct iwl_mvm_sta *mvmsta;
u32 value;
+ if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
+ return 0;
+
mvmsta = iwl_mvm_sta_from_staid_protected(mvm, sta_id);
if (!mvmsta)
return 0;
@@ -252,6 +255,124 @@ static void iwl_mvm_bt_coex_tcm_based_ci(struct iwl_mvm *mvm,
swap(data->primary, data->secondary);
}
+static void iwl_mvm_bt_coex_enable_esr(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif, bool enable)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ int link_id;
+
+ lockdep_assert_held(&mvm->mutex);
+
+ if (!vif->cfg.assoc || !ieee80211_vif_is_mld(vif))
+ return;
+
+ /* Done already */
+ if (mvmvif->bt_coex_esr_disabled == !enable)
+ return;
+
+ mvmvif->bt_coex_esr_disabled = !enable;
+
+ /* Nothing to do */
+ if (mvmvif->esr_active == enable)
+ return;
+
+ if (enable) {
+ /* Try to re-enable eSR*/
+ iwl_mvm_mld_select_links(mvm, vif, false);
+ return;
+ }
+
+ /*
+ * Find the primary link, as we want to switch to it and drop the
+ * secondary one.
+ */
+ link_id = iwl_mvm_mld_get_primary_link(mvm, vif, vif->active_links);
+ WARN_ON(link_id < 0);
+
+ ieee80211_set_active_links_async(vif,
+ vif->active_links & BIT(link_id));
+}
+
+/*
+ * This function receives the LB link id and checks if eSR should be
+ * enabled or disabled (due to BT coex)
+ */
+bool
+iwl_mvm_bt_coex_calculate_esr_mode(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ int link_id, int primary_link)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct iwl_mvm_vif_link_info *link_info = mvmvif->link[link_id];
+ bool have_wifi_loss_rate =
+ iwl_fw_lookup_notif_ver(mvm->fw, LEGACY_GROUP,
+ BT_PROFILE_NOTIFICATION, 0) > 4;
+ s8 link_rssi = 0;
+ u8 wifi_loss_rate;
+
+ lockdep_assert_held(&mvm->mutex);
+
+ if (mvm->last_bt_notif.wifi_loss_low_rssi == BT_OFF)
+ return true;
+
+ /* If LB link is the primary one we should always disable eSR */
+ if (link_id == primary_link)
+ return false;
+
+ /* The feature is not supported */
+ if (!have_wifi_loss_rate)
+ return true;
+
+ /*
+ * We might not have a link_info when checking whether we can
+ * (re)enable eSR - the LB link might not exist yet
+ */
+ if (link_info)
+ link_rssi = (s8)link_info->beacon_stats.avg_signal;
+
+ /*
+ * In case we don't know the RSSI - take the lower wifi loss,
+ * so we will more likely enter eSR, and if RSSI is low -
+ * we will get an update on this and exit eSR.
+ */
+ if (!link_rssi)
+ wifi_loss_rate = mvm->last_bt_notif.wifi_loss_mid_high_rssi;
+
+ else if (!mvmvif->bt_coex_esr_disabled)
+ /* RSSI needs to get really low to disable eSR... */
+ wifi_loss_rate =
+ link_rssi <= -IWL_MVM_BT_COEX_DISABLE_ESR_THRESH ?
+ mvm->last_bt_notif.wifi_loss_low_rssi :
+ mvm->last_bt_notif.wifi_loss_mid_high_rssi;
+ else
+ /* ...And really high before we enable it back */
+ wifi_loss_rate =
+ link_rssi <= -IWL_MVM_BT_COEX_ENABLE_ESR_THRESH ?
+ mvm->last_bt_notif.wifi_loss_low_rssi :
+ mvm->last_bt_notif.wifi_loss_mid_high_rssi;
+
+ return wifi_loss_rate <= IWL_MVM_BT_COEX_WIFI_LOSS_THRESH;
+}
+
+void iwl_mvm_bt_coex_update_vif_esr(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ int link_id)
+{
+ unsigned long usable_links = ieee80211_vif_usable_links(vif);
+ int primary_link = iwl_mvm_mld_get_primary_link(mvm, vif,
+ usable_links);
+ bool enable;
+
+ /* Not assoc, not MLD vif or only one usable link */
+ if (primary_link < 0)
+ return;
+
+ enable = iwl_mvm_bt_coex_calculate_esr_mode(mvm, vif, link_id,
+ primary_link);
+
+ iwl_mvm_bt_coex_enable_esr(mvm, vif, enable);
+}
+
static void iwl_mvm_bt_notif_per_link(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
struct iwl_bt_iterator_data *data,
@@ -297,6 +418,8 @@ static void iwl_mvm_bt_notif_per_link(struct iwl_mvm *mvm,
return;
}
+ iwl_mvm_bt_coex_update_vif_esr(mvm, vif, link_id);
+
if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_COEX_SCHEMA_2))
min_ag_for_static_smps = BT_VERY_HIGH_TRAFFIC;
else
@@ -432,6 +555,10 @@ static void iwl_mvm_bt_notif_iterator(void *_data, u8 *mac,
return;
}
+ /* When BT is off this will be 0 */
+ if (data->notif->wifi_loss_low_rssi == BT_OFF)
+ iwl_mvm_bt_coex_enable_esr(mvm, vif, true);
+
for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++)
iwl_mvm_bt_notif_per_link(mvm, vif, data, link_id);
}
@@ -454,6 +581,11 @@ static void iwl_mvm_bt_coex_notif_handle(struct iwl_mvm *mvm)
mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
iwl_mvm_bt_notif_iterator, &data);
+ if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
+ rcu_read_unlock();
+ return;
+ }
+
iwl_mvm_bt_coex_tcm_based_ci(mvm, &data);
if (data.primary) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
index c832068b5718..f5122c4678a1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
@@ -11,6 +11,9 @@
#include "fw-api.h"
#define IWL_MVM_UAPSD_NOAGG_BSSIDS_NUM 20
+#define IWL_MVM_BT_COEX_DISABLE_ESR_THRESH 69
+#define IWL_MVM_BT_COEX_ENABLE_ESR_THRESH 63
+#define IWL_MVM_BT_COEX_WIFI_LOSS_THRESH 0
#define IWL_MVM_DEFAULT_PS_TX_DATA_TIMEOUT (100 * USEC_PER_MSEC)
#define IWL_MVM_DEFAULT_PS_RX_DATA_TIMEOUT (100 * USEC_PER_MSEC)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 4582afb149d7..553c6fffc7c6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -450,9 +450,9 @@ static void iwl_mvm_wowlan_get_rsc_v5_data(struct ieee80211_hw *hw,
}
static int iwl_mvm_wowlan_config_rsc_tsc(struct iwl_mvm *mvm,
- struct ieee80211_vif *vif)
+ struct ieee80211_vif *vif,
+ struct iwl_mvm_vif_link_info *mvm_link)
{
- struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
int ver = iwl_fw_lookup_cmd_ver(mvm->fw, WOWLAN_TSC_RSC_PARAM,
IWL_FW_CMD_VER_UNKNOWN);
int ret;
@@ -461,16 +461,14 @@ static int iwl_mvm_wowlan_config_rsc_tsc(struct iwl_mvm *mvm,
struct wowlan_key_rsc_v5_data data = {};
int i;
- data.rsc = kmalloc(sizeof(*data.rsc), GFP_KERNEL);
+ data.rsc = kzalloc(sizeof(*data.rsc), GFP_KERNEL);
if (!data.rsc)
return -ENOMEM;
- memset(data.rsc, 0xff, sizeof(*data.rsc));
-
for (i = 0; i < ARRAY_SIZE(data.rsc->mcast_key_id_map); i++)
data.rsc->mcast_key_id_map[i] =
IWL_MCAST_KEY_MAP_INVALID;
- data.rsc->sta_id = cpu_to_le32(mvmvif->deflink.ap_sta_id);
+ data.rsc->sta_id = cpu_to_le32(mvm_link->ap_sta_id);
ieee80211_iter_keys(mvm->hw, vif,
iwl_mvm_wowlan_get_rsc_v5_data,
@@ -494,7 +492,7 @@ static int iwl_mvm_wowlan_config_rsc_tsc(struct iwl_mvm *mvm,
if (ver == 4) {
size = sizeof(*data.rsc_tsc);
data.rsc_tsc->sta_id =
- cpu_to_le32(mvmvif->deflink.ap_sta_id);
+ cpu_to_le32(mvm_link->ap_sta_id);
} else {
/* ver == 2 || ver == IWL_FW_CMD_VER_UNKNOWN */
size = sizeof(data.rsc_tsc->params);
@@ -668,10 +666,9 @@ static int iwl_mvm_send_patterns_v1(struct iwl_mvm *mvm,
}
static int iwl_mvm_send_patterns(struct iwl_mvm *mvm,
- struct ieee80211_vif *vif,
+ struct iwl_mvm_vif_link_info *mvm_link,
struct cfg80211_wowlan *wowlan)
{
- struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
struct iwl_wowlan_patterns_cmd *pattern_cmd;
struct iwl_host_cmd cmd = {
.id = WOWLAN_PATTERNS,
@@ -693,7 +690,7 @@ static int iwl_mvm_send_patterns(struct iwl_mvm *mvm,
pattern_cmd->n_patterns = wowlan->n_patterns;
if (ver >= 3)
- pattern_cmd->sta_id = mvmvif->deflink.ap_sta_id;
+ pattern_cmd->sta_id = mvm_link->ap_sta_id;
for (i = 0; i < wowlan->n_patterns; i++) {
int mask_len = DIV_ROUND_UP(wowlan->patterns[i].pattern_len, 8);
@@ -723,14 +720,15 @@ static int iwl_mvm_d3_reprogram(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
struct ieee80211_chanctx_conf *ctx;
u8 chains_static, chains_dynamic;
- struct cfg80211_chan_def chandef;
+ struct cfg80211_chan_def chandef, ap_def;
int ret, i;
struct iwl_binding_cmd_v1 binding_cmd = {};
struct iwl_time_quota_cmd quota_cmd = {};
struct iwl_time_quota_data *quota;
u32 status;
- if (WARN_ON_ONCE(iwl_mvm_is_cdb_supported(mvm)))
+ if (WARN_ON_ONCE(iwl_mvm_is_cdb_supported(mvm) ||
+ ieee80211_vif_is_mld(vif)))
return -EINVAL;
/* add back the PHY */
@@ -744,12 +742,13 @@ static int iwl_mvm_d3_reprogram(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
return -EINVAL;
}
chandef = ctx->def;
+ ap_def = ctx->ap;
chains_static = ctx->rx_chains_static;
chains_dynamic = ctx->rx_chains_dynamic;
rcu_read_unlock();
ret = iwl_mvm_phy_ctxt_add(mvm, mvmvif->deflink.phy_ctxt, &chandef,
- chains_static, chains_dynamic);
+ &ap_def, chains_static, chains_dynamic);
if (ret)
return ret;
@@ -927,6 +926,9 @@ iwl_mvm_get_wowlan_config(struct iwl_mvm *mvm,
wowlan_config_cmd->flags = ENABLE_L3_FILTERING |
ENABLE_NBNS_FILTERING | ENABLE_DHCP_FILTERING;
+ if (ap_sta->mfp)
+ wowlan_config_cmd->flags |= IS_11W_ASSOC;
+
if (iwl_fw_lookup_cmd_ver(mvm->fw, WOWLAN_CONFIGURATION, 0) < 6) {
/* Query the last used seqno and set it */
int ret = iwl_mvm_get_last_nonqos_seq(mvm, vif);
@@ -987,7 +989,8 @@ iwl_mvm_get_wowlan_config(struct iwl_mvm *mvm,
}
static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
- struct ieee80211_vif *vif)
+ struct ieee80211_vif *vif,
+ struct iwl_mvm_vif_link_info *mvm_link)
{
bool unified = fw_has_capa(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG);
@@ -1016,7 +1019,7 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
return -EIO;
}
- ret = iwl_mvm_wowlan_config_rsc_tsc(mvm, vif);
+ ret = iwl_mvm_wowlan_config_rsc_tsc(mvm, vif, mvm_link);
if (ret)
return ret;
@@ -1030,7 +1033,7 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
if (ver == 2) {
size = sizeof(tkip_data.tkip);
tkip_data.tkip.sta_id =
- cpu_to_le32(mvmvif->deflink.ap_sta_id);
+ cpu_to_le32(mvm_link->ap_sta_id);
} else if (ver == 1 || ver == IWL_FW_CMD_VER_UNKNOWN) {
size = sizeof(struct iwl_wowlan_tkip_params_cmd_ver_1);
} else {
@@ -1079,7 +1082,7 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
kek_kck_cmd.kek_len = cpu_to_le16(mvmvif->rekey_data.kek_len);
kek_kck_cmd.replay_ctr = mvmvif->rekey_data.replay_ctr;
kek_kck_cmd.akm = cpu_to_le32(mvmvif->rekey_data.akm);
- kek_kck_cmd.sta_id = cpu_to_le32(mvmvif->deflink.ap_sta_id);
+ kek_kck_cmd.sta_id = cpu_to_le32(mvm_link->ap_sta_id);
if (cmd_ver == 4) {
cmd_size = sizeof(struct iwl_wowlan_kek_kck_material_cmd_v4);
@@ -1112,6 +1115,7 @@ iwl_mvm_wowlan_config(struct iwl_mvm *mvm,
struct cfg80211_wowlan *wowlan,
struct iwl_wowlan_config_cmd *wowlan_config_cmd,
struct ieee80211_vif *vif, struct iwl_mvm_vif *mvmvif,
+ struct iwl_mvm_vif_link_info *mvm_link,
struct ieee80211_sta *ap_sta)
{
int ret;
@@ -1130,7 +1134,7 @@ iwl_mvm_wowlan_config(struct iwl_mvm *mvm,
return ret;
}
- ret = iwl_mvm_wowlan_config_key_params(mvm, vif);
+ ret = iwl_mvm_wowlan_config_key_params(mvm, vif, mvm_link);
if (ret)
return ret;
@@ -1142,7 +1146,7 @@ iwl_mvm_wowlan_config(struct iwl_mvm *mvm,
if (fw_has_api(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_API_WOWLAN_TCP_SYN_WAKE))
- ret = iwl_mvm_send_patterns(mvm, vif, wowlan);
+ ret = iwl_mvm_send_patterns(mvm, mvm_link, wowlan);
else
ret = iwl_mvm_send_patterns_v1(mvm, wowlan);
if (ret)
@@ -1223,6 +1227,7 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
struct ieee80211_vif *vif = NULL;
struct iwl_mvm_vif *mvmvif = NULL;
struct ieee80211_sta *ap_sta = NULL;
+ struct iwl_mvm_vif_link_info *mvm_link;
struct iwl_d3_manager_config d3_cfg_cmd_data = {
/*
* Program the minimum sleep time to 10 seconds, as many
@@ -1237,7 +1242,7 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
.data[0] = &d3_cfg_cmd_data,
.len[0] = sizeof(d3_cfg_cmd_data),
};
- int ret;
+ int ret, primary_link;
int len __maybe_unused;
bool unified_image = fw_has_capa(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG);
@@ -1251,21 +1256,46 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
return -EINVAL;
}
+ vif = iwl_mvm_get_bss_vif(mvm);
+ if (IS_ERR_OR_NULL(vif))
+ return 1;
+
+ if (ieee80211_vif_is_mld(vif) && vif->cfg.assoc) {
+ /*
+ * Select the 'best' link. May need to revisit, it seems
+ * better to not optimize for throughput but rather range,
+ * reliability and power here - and select 2.4 GHz ...
+ */
+ primary_link =
+ iwl_mvm_mld_get_primary_link(mvm, vif,
+ vif->active_links);
+
+ if (WARN_ONCE(primary_link < 0, "no primary link in 0x%x\n",
+ vif->active_links))
+ primary_link = __ffs(vif->active_links);
+
+ ret = ieee80211_set_active_links(vif, BIT(primary_link));
+ if (ret)
+ return ret;
+ } else {
+ primary_link = 0;
+ }
+
mutex_lock(&mvm->mutex);
set_bit(IWL_MVM_STATUS_IN_D3, &mvm->status);
synchronize_net();
- vif = iwl_mvm_get_bss_vif(mvm);
- if (IS_ERR_OR_NULL(vif)) {
- ret = 1;
+ mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
+ mvm_link = mvmvif->link[primary_link];
+ if (WARN_ON_ONCE(!mvm_link)) {
+ ret = -EINVAL;
goto out_noreset;
}
- mvmvif = iwl_mvm_vif_from_mac80211(vif);
-
- if (mvmvif->deflink.ap_sta_id == IWL_MVM_INVALID_STA) {
+ if (mvm_link->ap_sta_id == IWL_MVM_INVALID_STA) {
/* if we're not associated, this must be netdetect */
if (!wowlan->nd_config) {
ret = 1;
@@ -1279,24 +1309,31 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
mvm->net_detect = true;
} else {
- struct iwl_wowlan_config_cmd wowlan_config_cmd = {};
+ struct iwl_wowlan_config_cmd wowlan_config_cmd = {
+ .offloading_tid = 0,
+ };
- wowlan_config_cmd.sta_id = mvmvif->deflink.ap_sta_id;
+ wowlan_config_cmd.sta_id = mvm_link->ap_sta_id;
ap_sta = rcu_dereference_protected(
- mvm->fw_id_to_mac_id[mvmvif->deflink.ap_sta_id],
+ mvm->fw_id_to_mac_id[mvm_link->ap_sta_id],
lockdep_is_held(&mvm->mutex));
if (IS_ERR_OR_NULL(ap_sta)) {
ret = -EINVAL;
goto out_noreset;
}
+ ret = iwl_mvm_sta_ensure_queue(
+ mvm, ap_sta->txq[wowlan_config_cmd.offloading_tid]);
+ if (ret)
+ goto out_noreset;
+
ret = iwl_mvm_get_wowlan_config(mvm, wowlan, &wowlan_config_cmd,
vif, mvmvif, ap_sta);
if (ret)
goto out_noreset;
ret = iwl_mvm_wowlan_config(mvm, wowlan, &wowlan_config_cmd,
- vif, mvmvif, ap_sta);
+ vif, mvmvif, mvm_link, ap_sta);
if (ret)
goto out;
@@ -1462,7 +1499,8 @@ static void iwl_mvm_report_wakeup_reasons(struct iwl_mvm *mvm,
status->pattern_number;
if (reasons & (IWL_WOWLAN_WAKEUP_BY_DISCONNECTION_ON_MISSED_BEACON |
- IWL_WOWLAN_WAKEUP_BY_DISCONNECTION_ON_DEAUTH))
+ IWL_WOWLAN_WAKEUP_BY_DISCONNECTION_ON_DEAUTH |
+ IWL_WOWLAN_WAKEUP_BY_GTK_REKEY_FAILURE))
wakeup.disconnect = true;
if (reasons & IWL_WOWLAN_WAKEUP_BY_GTK_REKEY_FAILURE)
@@ -1486,6 +1524,9 @@ static void iwl_mvm_report_wakeup_reasons(struct iwl_mvm *mvm,
if (reasons & IWL_WOWLAN_WAKEUP_BY_REM_WAKE_WAKEUP_PACKET)
wakeup.tcp_match = true;
+ if (reasons & IWL_WAKEUP_BY_11W_UNPROTECTED_DEAUTH_OR_DISASSOC)
+ wakeup.unprot_deauth_disassoc = true;
+
if (status->wake_packet) {
int pktsize = status->wake_packet_bufsize;
int pktlen = status->wake_packet_length;
@@ -1839,9 +1880,12 @@ iwl_mvm_d3_set_igtk_bigtk_ipn(const struct iwl_multicast_key_data *key,
memcpy(seq->aes_gmac.pn, key->ipn, sizeof(seq->aes_gmac.pn));
break;
case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+ case WLAN_CIPHER_SUITE_AES_CMAC:
BUILD_BUG_ON(sizeof(seq->aes_cmac.pn) != sizeof(key->ipn));
memcpy(seq->aes_cmac.pn, key->ipn, sizeof(seq->aes_cmac.pn));
break;
+ default:
+ WARN_ON(1);
}
}
@@ -1931,7 +1975,7 @@ static bool iwl_mvm_gtk_rekey(struct iwl_wowlan_status_data *status,
struct ieee80211_vif *vif,
struct iwl_mvm *mvm, u32 gtk_cipher)
{
- int i;
+ int i, j;
struct ieee80211_key_conf *key;
struct {
struct ieee80211_key_conf conf;
@@ -1939,6 +1983,7 @@ static bool iwl_mvm_gtk_rekey(struct iwl_wowlan_status_data *status,
} conf = {
.conf.cipher = gtk_cipher,
};
+ int link_id = vif->active_links ? __ffs(vif->active_links) : -1;
BUILD_BUG_ON(WLAN_KEY_LEN_CCMP != WLAN_KEY_LEN_GCMP);
BUILD_BUG_ON(sizeof(conf.key) < WLAN_KEY_LEN_CCMP);
@@ -1972,10 +2017,18 @@ static bool iwl_mvm_gtk_rekey(struct iwl_wowlan_status_data *status,
memcpy(conf.conf.key, status->gtk[i].key,
sizeof(status->gtk[i].key));
- key = ieee80211_gtk_rekey_add(vif, &conf.conf);
+ key = ieee80211_gtk_rekey_add(vif, &conf.conf, link_id);
if (IS_ERR(key))
return false;
- iwl_mvm_set_key_rx_seq_idx(key, status, i);
+
+ for (j = 0; j < ARRAY_SIZE(status->gtk_seq); j++) {
+ if (!status->gtk_seq[j].valid ||
+ status->gtk_seq[j].key_id != key->keyidx)
+ continue;
+ iwl_mvm_set_key_rx_seq_idx(key, status, j);
+ break;
+ }
+ WARN_ON(j == ARRAY_SIZE(status->gtk_seq));
}
return true;
@@ -1995,6 +2048,7 @@ iwl_mvm_d3_igtk_bigtk_rekey_add(struct iwl_wowlan_status_data *status,
.conf.keyidx = key_data->id,
};
struct ieee80211_key_seq seq;
+ int link_id = vif->active_links ? __ffs(vif->active_links) : -1;
if (!key_data->len)
return true;
@@ -2020,17 +2074,17 @@ iwl_mvm_d3_igtk_bigtk_rekey_add(struct iwl_wowlan_status_data *status,
BUILD_BUG_ON(sizeof(conf.key) < sizeof(key_data->key));
memcpy(conf.conf.key, key_data->key, conf.conf.keylen);
- key_config = ieee80211_gtk_rekey_add(vif, &conf.conf);
+ key_config = ieee80211_gtk_rekey_add(vif, &conf.conf, link_id);
if (IS_ERR(key_config))
return false;
ieee80211_set_key_rx_seq(key_config, 0, &seq);
if (key_config->keyidx == 4 || key_config->keyidx == 5) {
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- int link_id = vif->active_links ? __ffs(vif->active_links) : 0;
- struct iwl_mvm_vif_link_info *mvm_link =
- mvmvif->link[link_id];
+ struct iwl_mvm_vif_link_info *mvm_link;
+ link_id = link_id < 0 ? 0 : link_id;
+ mvm_link = mvmvif->link[link_id];
mvm_link->igtk = key_config;
}
@@ -2065,7 +2119,6 @@ static bool iwl_mvm_setup_connection_keep(struct iwl_mvm *mvm,
.status = status,
};
int i;
-
u32 disconnection_reasons =
IWL_WOWLAN_WAKEUP_BY_DISCONNECTION_ON_MISSED_BEACON |
IWL_WOWLAN_WAKEUP_BY_DISCONNECTION_ON_DEAUTH;
@@ -2073,9 +2126,6 @@ static bool iwl_mvm_setup_connection_keep(struct iwl_mvm *mvm,
if (!status || !vif->bss_conf.bssid)
return false;
- if (status->wakeup_reasons & disconnection_reasons)
- return false;
-
if (iwl_mvm_lookup_wowlan_status_ver(mvm) > 6 ||
iwl_fw_lookup_notif_ver(mvm->fw, PROT_OFFLOAD_GROUP,
WOWLAN_INFO_NOTIFICATION,
@@ -2136,6 +2186,9 @@ out:
mvmvif->seqno = status->non_qos_seq_ctr + 0x10;
}
+ if (status->wakeup_reasons & disconnection_reasons)
+ return false;
+
return true;
}
@@ -2193,7 +2246,10 @@ static void iwl_mvm_convert_gtk_v3(struct iwl_wowlan_status_data *status,
static void iwl_mvm_convert_igtk(struct iwl_wowlan_status_data *status,
struct iwl_wowlan_igtk_status *data)
{
+ int i;
+
BUILD_BUG_ON(sizeof(status->igtk.key) < sizeof(data->key));
+ BUILD_BUG_ON(sizeof(status->igtk.ipn) != sizeof(data->ipn));
if (!data->key_len)
return;
@@ -2205,7 +2261,10 @@ static void iwl_mvm_convert_igtk(struct iwl_wowlan_status_data *status,
+ WOWLAN_IGTK_MIN_INDEX;
memcpy(status->igtk.key, data->key, sizeof(data->key));
- memcpy(status->igtk.ipn, data->ipn, sizeof(data->ipn));
+
+ /* mac80211 expects big endian for memcmp() to work, convert */
+ for (i = 0; i < sizeof(data->ipn); i++)
+ status->igtk.ipn[i] = data->ipn[sizeof(data->ipn) - i - 1];
}
static void iwl_mvm_convert_bigtk(struct iwl_wowlan_status_data *status,
@@ -2839,6 +2898,9 @@ iwl_mvm_choose_query_wakeup_reasons(struct iwl_mvm *mvm,
u8 sta_id = mvm->net_detect ? IWL_MVM_INVALID_STA :
mvmvif->deflink.ap_sta_id;
+ /* bug - FW with MLO has status notification */
+ WARN_ON(ieee80211_vif_is_mld(vif));
+
d3_data->status = iwl_mvm_send_wowlan_get_status(mvm, sta_id);
}
@@ -2947,7 +3009,7 @@ static void iwl_mvm_nd_match_info_handler(struct iwl_mvm *mvm,
if (results->matched_profiles) {
memcpy(results->matches, notif->matches, matches_len);
- d3_data->nd_results_valid = TRUE;
+ d3_data->nd_results_valid = true;
}
/* no scan should be active at this point */
@@ -3345,6 +3407,7 @@ static ssize_t iwl_mvm_d3_test_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
struct iwl_mvm *mvm = file->private_data;
+ unsigned long end = jiffies + 60 * HZ;
u32 pme_asserted;
while (true) {
@@ -3358,6 +3421,12 @@ static ssize_t iwl_mvm_d3_test_read(struct file *file, char __user *user_buf,
if (msleep_interruptible(100))
break;
+
+ if (time_is_before_jiffies(end)) {
+ IWL_ERR(mvm,
+ "ending pseudo-D3 with timeout after ~60 seconds\n");
+ return -ETIMEDOUT;
+ }
}
return 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index e8b881596baf..51b01f7528be 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -381,9 +381,9 @@ static ssize_t iwl_dbgfs_bf_params_write(struct ieee80211_vif *vif, char *buf,
mutex_lock(&mvm->mutex);
iwl_dbgfs_update_bf(vif, param, value);
if (param == MVM_DEBUGFS_BF_ENABLE_BEACON_FILTER && !value)
- ret = iwl_mvm_disable_beacon_filter(mvm, vif, 0);
+ ret = iwl_mvm_disable_beacon_filter(mvm, vif);
else
- ret = iwl_mvm_enable_beacon_filter(mvm, vif, 0);
+ ret = iwl_mvm_enable_beacon_filter(mvm, vif);
mutex_unlock(&mvm->mutex);
return ret ?: count;
@@ -578,34 +578,47 @@ static ssize_t iwl_dbgfs_rx_phyinfo_write(struct ieee80211_vif *vif, char *buf,
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
struct iwl_mvm *mvm = mvmvif->mvm;
- struct ieee80211_chanctx_conf *chanctx_conf;
- struct iwl_mvm_phy_ctxt *phy_ctxt;
+ struct ieee80211_bss_conf *link_conf;
u16 value;
- int ret;
+ int link_id, ret = -EINVAL;
ret = kstrtou16(buf, 0, &value);
if (ret)
return ret;
mutex_lock(&mvm->mutex);
- rcu_read_lock();
- chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf);
- /* make sure the channel context is assigned */
- if (!chanctx_conf) {
+ mvm->dbgfs_rx_phyinfo = value;
+
+ for_each_vif_active_link(vif, link_conf, link_id) {
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ struct cfg80211_chan_def min_def, ap_def;
+ struct iwl_mvm_phy_ctxt *phy_ctxt;
+ u8 chains_static, chains_dynamic;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(link_conf->chanctx_conf);
+ if (!chanctx_conf) {
+ rcu_read_unlock();
+ continue;
+ }
+ /* A command can't be sent with RCU lock held, so copy
+ * everything here and use it after unlocking
+ */
+ min_def = chanctx_conf->min_def;
+ ap_def = chanctx_conf->ap;
+ chains_static = chanctx_conf->rx_chains_static;
+ chains_dynamic = chanctx_conf->rx_chains_dynamic;
rcu_read_unlock();
- mutex_unlock(&mvm->mutex);
- return -EINVAL;
- }
- phy_ctxt = &mvm->phy_ctxts[*(u16 *)chanctx_conf->drv_priv];
- rcu_read_unlock();
+ phy_ctxt = mvmvif->link[link_id]->phy_ctxt;
+ if (!phy_ctxt)
+ continue;
- mvm->dbgfs_rx_phyinfo = value;
+ ret = iwl_mvm_phy_ctxt_changed(mvm, phy_ctxt, &min_def, &ap_def,
+ chains_static, chains_dynamic);
+ }
- ret = iwl_mvm_phy_ctxt_changed(mvm, phy_ctxt, &chanctx_conf->min_def,
- chanctx_conf->rx_chains_static,
- chanctx_conf->rx_chains_dynamic);
mutex_unlock(&mvm->mutex);
return ret ?: count;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index edc8204f7c0e..79f4ac8cbc72 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -391,9 +391,7 @@ static ssize_t iwl_dbgfs_wifi_6e_enable_read(struct file *file,
char buf[12];
u32 value;
- err = iwl_acpi_get_dsm_u32(mvm->fwrt.dev, 0,
- DSM_FUNC_ENABLE_6E,
- &iwl_guid, &value);
+ err = iwl_bios_get_dsm(&mvm->fwrt, DSM_FUNC_ENABLE_6E, &value);
if (err)
return err;
@@ -877,14 +875,14 @@ static ssize_t iwl_dbgfs_tas_get_status_read(struct file *file,
le16_to_cpu(rsp->curr_mcc));
pos += scnprintf(pos, endpos - pos, "Block list entries:");
- for (i = 0; i < APCI_WTAS_BLACK_LIST_MAX; i++)
+ for (i = 0; i < IWL_WTAS_BLACK_LIST_MAX; i++)
pos += scnprintf(pos, endpos - pos, " 0x%x",
le16_to_cpu(rsp->block_list[i]));
pos += scnprintf(pos, endpos - pos, "\nOEM name: %s\n",
- dmi_get_system_info(DMI_SYS_VENDOR));
+ dmi_get_system_info(DMI_SYS_VENDOR) ?: "<unknown>");
pos += scnprintf(pos, endpos - pos, "\tVendor In Approved List: %s\n",
- iwl_mvm_is_vendor_in_approved_list() ? "YES" : "NO");
+ iwl_is_tas_approved() ? "YES" : "NO");
pos += scnprintf(pos, endpos - pos,
"\tDo TAS Support Dual Radio?: %s\n",
rsp->in_dual_radio ? "TRUE" : "FALSE");
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index 233ae81884a0..4863a3c74640 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#include <linux/etherdevice.h>
#include <linux/math64.h>
@@ -821,9 +821,10 @@ iwl_mvm_ftm_put_target_v8(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
* If secure LTF is turned off, replace the flag with PMF only
*/
flags = le32_to_cpu(target->initiator_ap_flags);
- if ((flags & IWL_INITIATOR_AP_FLAGS_SECURED) &&
- !IWL_MVM_FTM_INITIATOR_SECURE_LTF) {
- flags &= ~IWL_INITIATOR_AP_FLAGS_SECURED;
+ if (flags & IWL_INITIATOR_AP_FLAGS_SECURED) {
+ if (!IWL_MVM_FTM_INITIATOR_SECURE_LTF)
+ flags &= ~IWL_INITIATOR_AP_FLAGS_SECURED;
+
flags |= IWL_INITIATOR_AP_FLAGS_PMF;
target->initiator_ap_flags = cpu_to_le32(flags);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
index 8f10590f9cdd..8e760300a1ab 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
@@ -12,6 +12,9 @@ struct iwl_mvm_pasn_sta {
struct list_head list;
struct iwl_mvm_int_sta int_sta;
u8 addr[ETH_ALEN];
+
+ /* must be last as it followed by buffer holding the key */
+ struct ieee80211_key_conf keyconf;
};
struct iwl_mvm_pasn_hltk_data {
@@ -303,6 +306,10 @@ static void iwl_mvm_resp_del_pasn_sta(struct iwl_mvm *mvm,
{
list_del(&sta->list);
+ if (sta->keyconf.keylen)
+ iwl_mvm_sec_key_del_pasn(mvm, vif, BIT(sta->int_sta.sta_id),
+ &sta->keyconf);
+
if (iwl_mvm_has_mld_api(mvm->fw))
iwl_mvm_mld_rm_sta_id(mvm, sta->int_sta.sta_id);
else
@@ -342,6 +349,12 @@ int iwl_mvm_ftm_respoder_add_pasn_sta(struct iwl_mvm *mvm,
}
if (hltk && hltk_len) {
+ if (!fw_has_capa(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_CAPA_SECURE_LTF_SUPPORT)) {
+ IWL_ERR(mvm, "No support for secure LTF measurement\n");
+ return -EINVAL;
+ }
+
hltk_data.cipher = iwl_mvm_cipher_to_location_cipher(cipher);
if (hltk_data.cipher == IWL_LOCATION_CIPHER_INVALID) {
IWL_ERR(mvm, "invalid cipher: %u\n", cipher);
@@ -352,12 +365,12 @@ int iwl_mvm_ftm_respoder_add_pasn_sta(struct iwl_mvm *mvm,
}
if (tk && tk_len) {
- sta = kzalloc(sizeof(*sta), GFP_KERNEL);
+ sta = kzalloc(sizeof(*sta) + tk_len, GFP_KERNEL);
if (!sta)
return -ENOBUFS;
ret = iwl_mvm_add_pasn_sta(mvm, vif, &sta->int_sta, addr,
- cipher, tk, tk_len);
+ cipher, tk, tk_len, &sta->keyconf);
if (ret) {
kfree(sta);
return ret;
@@ -425,7 +438,7 @@ int iwl_mvm_ftm_start_responder(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
rcu_read_unlock();
phy_ctxt = &mvm->phy_ctxts[*phy_ctxt_id];
- ret = iwl_mvm_phy_ctxt_changed(mvm, phy_ctxt, &ctx.def,
+ ret = iwl_mvm_phy_ctxt_changed(mvm, phy_ctxt, &ctx.def, &ctx.ap,
ctx.rx_chains_static,
ctx.rx_chains_dynamic);
if (ret)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 1252084662c6..e1c2b7fc92ab 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -16,6 +16,7 @@
#include "fw/acpi.h"
#include "fw/pnvm.h"
#include "fw/uefi.h"
+#include "fw/regulatory.h"
#include "mvm.h"
#include "fw/dbg.h"
@@ -487,7 +488,6 @@ static void iwl_mvm_phy_filter_init(struct iwl_mvm *mvm,
#endif /* CONFIG_ACPI */
}
-#if defined(CONFIG_ACPI) && defined(CONFIG_EFI)
static void iwl_mvm_uats_init(struct iwl_mvm *mvm)
{
u8 cmd_ver;
@@ -567,17 +567,6 @@ static int iwl_mvm_sgom_init(struct iwl_mvm *mvm)
return ret;
}
-#else
-
-static int iwl_mvm_sgom_init(struct iwl_mvm *mvm)
-{
- return 0;
-}
-
-static void iwl_mvm_uats_init(struct iwl_mvm *mvm)
-{
-}
-#endif
static int iwl_send_phy_cfg_cmd(struct iwl_mvm *mvm)
{
@@ -677,6 +666,11 @@ static int iwl_run_unified_mvm_ucode(struct iwl_mvm *mvm)
iwl_dbg_tlv_time_point(&mvm->fwrt, IWL_FW_INI_TIME_POINT_AFTER_ALIVE,
NULL);
+ if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_BZ)
+ mvm->trans->step_urm = !!(iwl_read_umac_prph(mvm->trans,
+ CNVI_PMU_STEP_FLOW) &
+ CNVI_PMU_STEP_FLOW_FORCE_URM);
+
/* Send init config command to mark that we are sending NVM access
* commands
*/
@@ -890,7 +884,6 @@ static int iwl_mvm_config_ltr(struct iwl_mvm *mvm)
sizeof(cmd), &cmd);
}
-#ifdef CONFIG_ACPI
int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b)
{
u32 cmd_id = REDUCE_TX_POWER_CMD;
@@ -931,9 +924,9 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b)
/* all structs have the same common part, add it */
len += sizeof(cmd.common);
- ret = iwl_sar_select_profile(&mvm->fwrt, per_chain,
- IWL_NUM_CHAIN_TABLES,
- n_subbands, prof_a, prof_b);
+ ret = iwl_sar_fill_profile(&mvm->fwrt, per_chain,
+ IWL_NUM_CHAIN_TABLES,
+ n_subbands, prof_a, prof_b);
/* return on error or if the profile is disabled (positive number) */
if (ret)
@@ -989,7 +982,7 @@ int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm)
resp = (void *)cmd.resp_pkt->data;
ret = le32_to_cpu(resp->profile_idx);
- if (WARN_ON(ret > ACPI_NUM_GEO_PROFILES_REV3))
+ if (WARN_ON(ret > BIOS_GEO_MAX_PROFILE_NUM))
ret = -EIO;
iwl_free_resp(&cmd);
@@ -1003,7 +996,7 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
u16 len;
u32 n_bands;
u32 n_profiles;
- u32 sk = 0;
+ __le32 sk = cpu_to_le32(0);
int ret;
u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id,
IWL_FW_CMD_VER_UNKNOWN);
@@ -1020,27 +1013,35 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
/* the ops field is at the same spot for all versions, so set in v1 */
cmd.v1.ops = cpu_to_le32(IWL_PER_CHAIN_OFFSET_SET_TABLES);
+ /* Only set to South Korea if the table revision is 1 */
+ if (mvm->fwrt.geo_rev == 1)
+ sk = cpu_to_le32(1);
+
if (cmd_ver == 5) {
len = sizeof(cmd.v5);
n_bands = ARRAY_SIZE(cmd.v5.table[0]);
- n_profiles = ACPI_NUM_GEO_PROFILES_REV3;
+ n_profiles = BIOS_GEO_MAX_PROFILE_NUM;
+ cmd.v5.table_revision = sk;
} else if (cmd_ver == 4) {
len = sizeof(cmd.v4);
n_bands = ARRAY_SIZE(cmd.v4.table[0]);
- n_profiles = ACPI_NUM_GEO_PROFILES_REV3;
+ n_profiles = BIOS_GEO_MAX_PROFILE_NUM;
+ cmd.v4.table_revision = sk;
} else if (cmd_ver == 3) {
len = sizeof(cmd.v3);
n_bands = ARRAY_SIZE(cmd.v3.table[0]);
- n_profiles = ACPI_NUM_GEO_PROFILES;
+ n_profiles = BIOS_GEO_MIN_PROFILE_NUM;
+ cmd.v3.table_revision = sk;
} else if (fw_has_api(&mvm->fwrt.fw->ucode_capa,
IWL_UCODE_TLV_API_SAR_TABLE_VER)) {
len = sizeof(cmd.v2);
n_bands = ARRAY_SIZE(cmd.v2.table[0]);
- n_profiles = ACPI_NUM_GEO_PROFILES;
+ n_profiles = BIOS_GEO_MIN_PROFILE_NUM;
+ cmd.v2.table_revision = sk;
} else {
len = sizeof(cmd.v1);
n_bands = ARRAY_SIZE(cmd.v1.table[0]);
- n_profiles = ACPI_NUM_GEO_PROFILES;
+ n_profiles = BIOS_GEO_MIN_PROFILE_NUM;
}
BUILD_BUG_ON(offsetof(struct iwl_geo_tx_power_profiles_cmd_v1, table) !=
@@ -1052,8 +1053,8 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
offsetof(struct iwl_geo_tx_power_profiles_cmd_v4, table) !=
offsetof(struct iwl_geo_tx_power_profiles_cmd_v5, table));
/* the table is at the same position for all versions, so set use v1 */
- ret = iwl_sar_geo_init(&mvm->fwrt, &cmd.v1.table[0][0],
- n_bands, n_profiles);
+ ret = iwl_sar_geo_fill_table(&mvm->fwrt, &cmd.v1.table[0][0],
+ n_bands, n_profiles);
/*
* It is a valid scenario to not support SAR, or miss wgds table,
@@ -1062,27 +1063,6 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
if (ret)
return 0;
- /* Only set to South Korea if the table revision is 1 */
- if (mvm->fwrt.geo_rev == 1)
- sk = 1;
-
- /*
- * Set the table_revision to South Korea (1) or not (0). The
- * element name is misleading, as it doesn't contain the table
- * revision number, but whether the South Korea variation
- * should be used.
- * This must be done after calling iwl_sar_geo_init().
- */
- if (cmd_ver == 5)
- cmd.v5.table_revision = cpu_to_le32(sk);
- else if (cmd_ver == 4)
- cmd.v4.table_revision = cpu_to_le32(sk);
- else if (cmd_ver == 3)
- cmd.v3.table_revision = cpu_to_le32(sk);
- else if (fw_has_api(&mvm->fwrt.fw->ucode_capa,
- IWL_UCODE_TLV_API_SAR_TABLE_VER))
- cmd.v2.table_revision = cpu_to_le32(sk);
-
return iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, len, &cmd);
}
@@ -1091,7 +1071,7 @@ int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm)
union iwl_ppag_table_cmd cmd;
int ret, cmd_size;
- ret = iwl_read_ppag_table(&mvm->fwrt, &cmd, &cmd_size);
+ ret = iwl_fill_ppag_table(&mvm->fwrt, &cmd, &cmd_size);
/* Not supporting PPAG table is a valid scenario */
if (ret < 0)
return 0;
@@ -1110,80 +1090,19 @@ int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm)
static int iwl_mvm_ppag_init(struct iwl_mvm *mvm)
{
/* no need to read the table, done in INIT stage */
- if (!(iwl_acpi_is_ppag_approved(&mvm->fwrt)))
+ if (!(iwl_is_ppag_approved(&mvm->fwrt)))
return 0;
return iwl_mvm_ppag_send_cmd(mvm);
}
-static const struct dmi_system_id dmi_tas_approved_list[] = {
- { .ident = "HP",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "HP"),
- },
- },
- { .ident = "SAMSUNG",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD"),
- },
- },
- { .ident = "LENOVO",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
- },
- },
- { .ident = "DELL",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- },
- },
- { .ident = "MSFT",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- },
- },
- { .ident = "Acer",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
- },
- },
- { .ident = "ASUS",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- },
- },
- { .ident = "GOOGLE-HP",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Google"),
- DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
- },
- },
- { .ident = "MSI",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star International Co., Ltd."),
- },
- },
- { .ident = "Honor",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "HONOR"),
- },
- },
- /* keep last */
- {}
-};
-
-bool iwl_mvm_is_vendor_in_approved_list(void)
-{
- return dmi_check_system(dmi_tas_approved_list);
-}
-
static bool iwl_mvm_add_to_tas_block_list(__le32 *list, __le32 *le_size, unsigned int mcc)
{
int i;
u32 size = le32_to_cpu(*le_size);
/* Verify that there is room for another country */
- if (size >= IWL_TAS_BLOCK_LIST_MAX)
+ if (size >= IWL_WTAS_BLACK_LIST_MAX)
return false;
for (i = 0; i < size; i++) {
@@ -1200,21 +1119,21 @@ static void iwl_mvm_tas_init(struct iwl_mvm *mvm)
{
u32 cmd_id = WIDE_ID(REGULATORY_AND_NVM_GROUP, TAS_CONFIG);
int ret;
- union iwl_tas_config_cmd cmd = {};
+ struct iwl_tas_data data = {};
+ struct iwl_tas_config_cmd cmd = {};
int cmd_size, fw_ver;
- BUILD_BUG_ON(ARRAY_SIZE(cmd.v3.block_list_array) <
- APCI_WTAS_BLACK_LIST_MAX);
+ BUILD_BUG_ON(ARRAY_SIZE(data.block_list_array) !=
+ IWL_WTAS_BLACK_LIST_MAX);
+ BUILD_BUG_ON(ARRAY_SIZE(cmd.common.block_list_array) !=
+ IWL_WTAS_BLACK_LIST_MAX);
if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TAS_CFG)) {
IWL_DEBUG_RADIO(mvm, "TAS not enabled in FW\n");
return;
}
- fw_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id,
- IWL_FW_CMD_VER_UNKNOWN);
-
- ret = iwl_acpi_get_tas(&mvm->fwrt, &cmd, fw_ver);
+ ret = iwl_bios_get_tas_table(&mvm->fwrt, &data);
if (ret < 0) {
IWL_DEBUG_RADIO(mvm,
"TAS table invalid or unavailable. (%d)\n",
@@ -1225,16 +1144,16 @@ static void iwl_mvm_tas_init(struct iwl_mvm *mvm)
if (ret == 0)
return;
- if (!iwl_mvm_is_vendor_in_approved_list()) {
+ if (!iwl_is_tas_approved()) {
IWL_DEBUG_RADIO(mvm,
"System vendor '%s' is not in the approved list, disabling TAS in US and Canada.\n",
- dmi_get_system_info(DMI_SYS_VENDOR));
- if ((!iwl_mvm_add_to_tas_block_list(cmd.v4.block_list_array,
- &cmd.v4.block_list_size,
- IWL_MCC_US)) ||
- (!iwl_mvm_add_to_tas_block_list(cmd.v4.block_list_array,
- &cmd.v4.block_list_size,
- IWL_MCC_CANADA))) {
+ dmi_get_system_info(DMI_SYS_VENDOR) ?: "<unknown>");
+ if ((!iwl_mvm_add_to_tas_block_list(data.block_list_array,
+ &data.block_list_size,
+ IWL_MCC_US)) ||
+ (!iwl_mvm_add_to_tas_block_list(data.block_list_array,
+ &data.block_list_size,
+ IWL_MCC_CANADA))) {
IWL_DEBUG_RADIO(mvm,
"Unable to add US/Canada to TAS block list, disabling TAS\n");
return;
@@ -1242,41 +1161,64 @@ static void iwl_mvm_tas_init(struct iwl_mvm *mvm)
} else {
IWL_DEBUG_RADIO(mvm,
"System vendor '%s' is in the approved list.\n",
- dmi_get_system_info(DMI_SYS_VENDOR));
+ dmi_get_system_info(DMI_SYS_VENDOR) ?: "<unknown>");
}
- /* v4 is the same size as v3, so no need to differentiate here */
- cmd_size = fw_ver < 3 ?
- sizeof(struct iwl_tas_config_cmd_v2) :
- sizeof(struct iwl_tas_config_cmd_v3);
+ fw_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id,
+ IWL_FW_CMD_VER_UNKNOWN);
+
+ memcpy(&cmd.common, &data, sizeof(struct iwl_tas_config_cmd_common));
+
+ /* Set v3 or v4 specific parts. will be trunctated for fw_ver < 3 */
+ if (fw_ver == 4) {
+ cmd.v4.override_tas_iec = data.override_tas_iec;
+ cmd.v4.enable_tas_iec = data.enable_tas_iec;
+ cmd.v4.usa_tas_uhb_allowed = data.usa_tas_uhb_allowed;
+ } else {
+ cmd.v3.override_tas_iec = cpu_to_le16(data.override_tas_iec);
+ cmd.v3.enable_tas_iec = cpu_to_le16(data.enable_tas_iec);
+ }
+
+ cmd_size = sizeof(struct iwl_tas_config_cmd_common);
+ if (fw_ver >= 3)
+ /* v4 is the same size as v3 */
+ cmd_size += sizeof(struct iwl_tas_config_cmd_v3);
ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, cmd_size, &cmd);
if (ret < 0)
IWL_DEBUG_RADIO(mvm, "failed to send TAS_CONFIG (%d)\n", ret);
}
-static u8 iwl_mvm_eval_dsm_rfi(struct iwl_mvm *mvm)
+static bool iwl_mvm_eval_dsm_rfi(struct iwl_mvm *mvm)
{
- u8 value;
- int ret = iwl_acpi_get_dsm_u8(mvm->fwrt.dev, 0, DSM_RFI_FUNC_ENABLE,
- &iwl_rfi_guid, &value);
+ u32 value = 0;
+ /* default behaviour is disabled */
+ bool bios_enable_rfi = false;
+ int ret = iwl_bios_get_dsm(&mvm->fwrt, DSM_FUNC_RFI_CONFIG, &value);
+
if (ret < 0) {
IWL_DEBUG_RADIO(mvm, "Failed to get DSM RFI, ret=%d\n", ret);
+ return bios_enable_rfi;
+ }
- } else if (value >= DSM_VALUE_RFI_MAX) {
- IWL_DEBUG_RADIO(mvm, "DSM RFI got invalid value, ret=%d\n",
- value);
-
- } else if (value == DSM_VALUE_RFI_ENABLE) {
+ value &= DSM_VALUE_RFI_DISABLE;
+ /* RFI BIOS CONFIG value can be 0 or 3 only.
+ * i.e 0 means DDR and DLVR enabled. 3 means DDR and DLVR disabled.
+ * 1 and 2 are invalid BIOS configurations, So, it's not possible to
+ * disable ddr/dlvr separately.
+ */
+ if (!value) {
IWL_DEBUG_RADIO(mvm, "DSM RFI is evaluated to enable\n");
- return DSM_VALUE_RFI_ENABLE;
+ bios_enable_rfi = true;
+ } else if (value == DSM_VALUE_RFI_DISABLE) {
+ IWL_DEBUG_RADIO(mvm, "DSM RFI is evaluated to disable\n");
+ } else {
+ IWL_DEBUG_RADIO(mvm,
+ "DSM RFI got invalid value, value=%d\n", value);
}
- IWL_DEBUG_RADIO(mvm, "DSM RFI is disabled\n");
-
- /* default behaviour is disabled */
- return DSM_VALUE_RFI_DISABLE;
+ return bios_enable_rfi;
}
static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
@@ -1288,43 +1230,34 @@ static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
WIDE_ID(REGULATORY_AND_NVM_GROUP,
LARI_CONFIG_CHANGE), 1);
- cmd.config_bitmap = iwl_acpi_get_lari_config_bitmap(&mvm->fwrt);
+ cmd.config_bitmap = iwl_get_lari_config_bitmap(&mvm->fwrt);
- ret = iwl_acpi_get_dsm_u32(mvm->fwrt.dev, 0, DSM_FUNC_11AX_ENABLEMENT,
- &iwl_guid, &value);
+ ret = iwl_bios_get_dsm(&mvm->fwrt, DSM_FUNC_11AX_ENABLEMENT, &value);
if (!ret)
cmd.oem_11ax_allow_bitmap = cpu_to_le32(value);
- ret = iwl_acpi_get_dsm_u32(mvm->fwrt.dev, 0,
- DSM_FUNC_ENABLE_UNII4_CHAN,
- &iwl_guid, &value);
+ ret = iwl_bios_get_dsm(&mvm->fwrt, DSM_FUNC_ENABLE_UNII4_CHAN, &value);
if (!ret)
cmd.oem_unii4_allow_bitmap = cpu_to_le32(value);
- ret = iwl_acpi_get_dsm_u32(mvm->fwrt.dev, 0,
- DSM_FUNC_ACTIVATE_CHANNEL,
- &iwl_guid, &value);
+ ret = iwl_bios_get_dsm(&mvm->fwrt, DSM_FUNC_ACTIVATE_CHANNEL, &value);
if (!ret) {
if (cmd_ver < 8)
value &= ~ACTIVATE_5G2_IN_WW_MASK;
cmd.chan_state_active_bitmap = cpu_to_le32(value);
}
- ret = iwl_acpi_get_dsm_u32(mvm->fwrt.dev, 0,
- DSM_FUNC_ENABLE_6E,
- &iwl_guid, &value);
+ ret = iwl_bios_get_dsm(&mvm->fwrt, DSM_FUNC_ENABLE_6E, &value);
if (!ret)
cmd.oem_uhb_allow_bitmap = cpu_to_le32(value);
- ret = iwl_acpi_get_dsm_u32(mvm->fwrt.dev, 0,
- DSM_FUNC_FORCE_DISABLE_CHANNELS,
- &iwl_guid, &value);
+ ret = iwl_bios_get_dsm(&mvm->fwrt, DSM_FUNC_FORCE_DISABLE_CHANNELS,
+ &value);
if (!ret)
cmd.force_disable_channels_bitmap = cpu_to_le32(value);
- ret = iwl_acpi_get_dsm_u32(mvm->fwrt.dev, 0,
- DSM_FUNC_ENERGY_DETECTION_THRESHOLD,
- &iwl_guid, &value);
+ ret = iwl_bios_get_dsm(&mvm->fwrt, DSM_FUNC_ENERGY_DETECTION_THRESHOLD,
+ &value);
if (!ret)
cmd.edt_bitmap = cpu_to_le32(value);
@@ -1390,15 +1323,17 @@ static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
if (le32_to_cpu(cmd.oem_uhb_allow_bitmap) & IWL_UATS_VLP_AP_SUPPORTED ||
le32_to_cpu(cmd.oem_uhb_allow_bitmap) & IWL_UATS_AFC_AP_SUPPORTED)
- mvm->fwrt.uats_enabled = TRUE;
+ mvm->fwrt.uats_enabled = true;
}
-void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
+void iwl_mvm_get_bios_tables(struct iwl_mvm *mvm)
{
int ret;
+ iwl_acpi_get_guid_lock_status(&mvm->fwrt);
+
/* read PPAG table */
- ret = iwl_acpi_get_ppag_table(&mvm->fwrt);
+ ret = iwl_bios_get_ppag_table(&mvm->fwrt);
if (ret < 0) {
IWL_DEBUG_RADIO(mvm,
"PPAG BIOS table invalid or unavailable. (%d)\n",
@@ -1406,7 +1341,7 @@ void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
}
/* read SAR tables */
- ret = iwl_sar_get_wrds_table(&mvm->fwrt);
+ ret = iwl_bios_get_wrds_table(&mvm->fwrt);
if (ret < 0) {
IWL_DEBUG_RADIO(mvm,
"WRDS SAR BIOS table invalid or unavailable. (%d)\n",
@@ -1415,7 +1350,7 @@ void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
* If not available, don't fail and don't bother with EWRD and
* WGDS */
- if (!iwl_sar_get_wgds_table(&mvm->fwrt)) {
+ if (!iwl_bios_get_wgds_table(&mvm->fwrt)) {
/*
* If basic SAR is not available, we check for WGDS,
* which should *not* be available either. If it is
@@ -1426,7 +1361,7 @@ void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
}
} else {
- ret = iwl_sar_get_ewrd_table(&mvm->fwrt);
+ ret = iwl_bios_get_ewrd_table(&mvm->fwrt);
/* if EWRD is not available, we can still use
* WRDS, so don't fail */
if (ret < 0)
@@ -1436,7 +1371,7 @@ void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
/* read geo SAR table */
if (iwl_sar_geo_support(&mvm->fwrt)) {
- ret = iwl_sar_get_wgds_table(&mvm->fwrt);
+ ret = iwl_bios_get_wgds_table(&mvm->fwrt);
if (ret < 0)
IWL_DEBUG_RADIO(mvm,
"Geo SAR BIOS table invalid or unavailable. (%d)\n",
@@ -1446,59 +1381,18 @@ void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
}
iwl_acpi_get_phy_filters(&mvm->fwrt, &mvm->phy_filters);
-}
-#else /* CONFIG_ACPI */
-inline int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm,
- int prof_a, int prof_b)
-{
- return 1;
+ if (iwl_bios_get_eckv(&mvm->fwrt, &mvm->ext_clock_valid))
+ IWL_DEBUG_RADIO(mvm, "ECKV table doesn't exist in BIOS\n");
}
-inline int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm)
+static void iwl_mvm_disconnect_iterator(void *data, u8 *mac,
+ struct ieee80211_vif *vif)
{
- return -ENOENT;
+ if (vif->type == NL80211_IFTYPE_STATION)
+ ieee80211_hw_restart_disconnect(vif);
}
-static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
-{
- return 0;
-}
-
-int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm)
-{
- return -ENOENT;
-}
-
-static int iwl_mvm_ppag_init(struct iwl_mvm *mvm)
-{
- return 0;
-}
-
-static void iwl_mvm_tas_init(struct iwl_mvm *mvm)
-{
-}
-
-static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
-{
-}
-
-bool iwl_mvm_is_vendor_in_approved_list(void)
-{
- return false;
-}
-
-static u8 iwl_mvm_eval_dsm_rfi(struct iwl_mvm *mvm)
-{
- return DSM_VALUE_RFI_DISABLE;
-}
-
-void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
-{
-}
-
-#endif /* CONFIG_ACPI */
-
void iwl_mvm_send_recovery_cmd(struct iwl_mvm *mvm, u32 flags)
{
u32 error_log_size = mvm->fw->ucode_capa.error_log_size;
@@ -1543,10 +1437,15 @@ void iwl_mvm_send_recovery_cmd(struct iwl_mvm *mvm, u32 flags)
/* skb respond is only relevant in ERROR_RECOVERY_UPDATE_DB */
if (flags & ERROR_RECOVERY_UPDATE_DB) {
resp = le32_to_cpu(*(__le32 *)host_cmd.resp_pkt->data);
- if (resp)
+ if (resp) {
IWL_ERR(mvm,
"Failed to send recovery cmd blob was invalid %d\n",
resp);
+
+ ieee80211_iterate_interfaces(mvm->hw, 0,
+ iwl_mvm_disconnect_iterator,
+ mvm);
+ }
}
}
@@ -1781,9 +1680,6 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
if (!mvm->ptp_data.ptp_clock)
iwl_mvm_ptp_init(mvm);
- if (iwl_acpi_get_eckv(mvm->dev, &mvm->ext_clock_valid))
- IWL_DEBUG_INFO(mvm, "ECKV table doesn't exist in BIOS\n");
-
ret = iwl_mvm_ppag_init(mvm);
if (ret)
goto error;
@@ -1803,7 +1699,7 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
iwl_mvm_uats_init(mvm);
if (iwl_rfi_supported(mvm)) {
- if (iwl_mvm_eval_dsm_rfi(mvm) == DSM_VALUE_RFI_ENABLE)
+ if (iwl_mvm_eval_dsm_rfi(mvm))
iwl_rfi_send_config_cmd(mvm, NULL);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c
index be48b0fc9cb6..f13f13e6b71a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/link.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2022 - 2023 Intel Corporation
+ * Copyright (C) 2022 - 2024 Intel Corporation
*/
#include "mvm.h"
#include "time-event.h"
@@ -53,6 +53,8 @@ int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
unsigned int link_id = link_conf->link_id;
struct iwl_mvm_vif_link_info *link_info = mvmvif->link[link_id];
struct iwl_link_config_cmd cmd = {};
+ unsigned int cmd_id = WIDE_ID(MAC_CONF_GROUP, LINK_CONFIG_CMD);
+ u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, 1);
if (WARN_ON_ONCE(!link_info))
return -EINVAL;
@@ -84,7 +86,8 @@ int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
if (vif->type == NL80211_IFTYPE_ADHOC && link_conf->bssid)
memcpy(cmd.ibss_bssid_addr, link_conf->bssid, ETH_ALEN);
- cmd.listen_lmac = cpu_to_le32(link_info->listen_lmac);
+ if (cmd_ver < 2)
+ cmd.listen_lmac = cpu_to_le32(link_info->listen_lmac);
return iwl_mvm_link_cmd_send(mvm, &cmd, FW_CTXT_ACTION_ADD);
}
@@ -100,6 +103,8 @@ int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct iwl_link_config_cmd cmd = {};
u32 ht_flag, flags = 0, flags_mask = 0;
int ret;
+ unsigned int cmd_id = WIDE_ID(MAC_CONF_GROUP, LINK_CONFIG_CMD);
+ u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, 1);
if (WARN_ON_ONCE(!link_info ||
link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID))
@@ -190,12 +195,21 @@ int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
}
if (changes & LINK_CONTEXT_MODIFY_EHT_PARAMS) {
+ struct ieee80211_chanctx_conf *ctx;
+ struct cfg80211_chan_def *def = NULL;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(link_conf->chanctx_conf);
+ if (ctx)
+ def = iwl_mvm_chanctx_def(mvm, ctx);
+
if (iwlwifi_mod_params.disable_11be ||
- !link_conf->eht_support)
+ !link_conf->eht_support || !def ||
+ iwl_fw_lookup_cmd_ver(mvm->fw, PHY_CONTEXT_CMD, 1) >= 6)
changes &= ~LINK_CONTEXT_MODIFY_EHT_PARAMS;
else
- cmd.puncture_mask =
- cpu_to_le16(link_conf->eht_puncturing);
+ cmd.puncture_mask = cpu_to_le16(def->punctured);
+ rcu_read_unlock();
}
cmd.bss_color = link_conf->he_bss_color.color;
@@ -224,7 +238,8 @@ send_cmd:
cmd.flags = cpu_to_le32(flags);
cmd.flags_mask = cpu_to_le32(flags_mask);
cmd.spec_link_id = link_conf->link_id;
- cmd.listen_lmac = cpu_to_le32(link_info->listen_lmac);
+ if (cmd_ver < 2)
+ cmd.listen_lmac = cpu_to_le32(link_info->listen_lmac);
ret = iwl_mvm_link_cmd_send(mvm, &cmd, FW_CTXT_ACTION_MODIFY);
if (!ret && (changes & LINK_CONTEXT_MODIFY_ACTIVE))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index c4f96125cf33..228ede7b8957 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
*/
@@ -31,6 +31,17 @@ const u8 iwl_mvm_ac_to_gen2_tx_fifo[] = {
IWL_GEN2_TRIG_TX_FIFO_BK,
};
+const u8 iwl_mvm_ac_to_bz_tx_fifo[] = {
+ IWL_BZ_EDCA_TX_FIFO_VO,
+ IWL_BZ_EDCA_TX_FIFO_VI,
+ IWL_BZ_EDCA_TX_FIFO_BE,
+ IWL_BZ_EDCA_TX_FIFO_BK,
+ IWL_BZ_TRIG_TX_FIFO_VO,
+ IWL_BZ_TRIG_TX_FIFO_VI,
+ IWL_BZ_TRIG_TX_FIFO_BE,
+ IWL_BZ_TRIG_TX_FIFO_BK,
+};
+
struct iwl_mvm_mac_iface_iterator_data {
struct iwl_mvm *mvm;
struct ieee80211_vif *vif;
@@ -455,7 +466,7 @@ void iwl_mvm_set_fw_protection_flags(struct iwl_mvm *mvm,
break;
case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
/* Protect when channel wider than 20MHz */
- if (link_conf->chandef.width > NL80211_CHAN_WIDTH_20)
+ if (link_conf->chanreq.oper.width > NL80211_CHAN_WIDTH_20)
*protection_flags |= cpu_to_le32(ht_flag);
break;
default:
@@ -494,7 +505,7 @@ void iwl_mvm_set_fw_qos_params(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
if (link_conf->qos)
*qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA);
- if (link_conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT)
+ if (link_conf->chanreq.oper.width != NL80211_CHAN_WIDTH_20_NOHT)
*qos_flags |= cpu_to_le32(MAC_QOS_FLG_TGN);
}
@@ -910,8 +921,8 @@ u8 iwl_mvm_mac_ctxt_get_lowest_rate(struct iwl_mvm *mvm,
link_conf = rcu_dereference(vif->link_conf[link_id]);
if (link_conf) {
basic = link_conf->basic_rates;
- if (link_conf->chandef.chan)
- band = link_conf->chandef.chan->band;
+ if (link_conf->chanreq.oper.chan)
+ band = link_conf->chanreq.oper.chan->band;
}
rcu_read_unlock();
}
@@ -1466,8 +1477,8 @@ static void iwl_mvm_csa_count_down(struct iwl_mvm *mvm,
mvmvif->csa_countdown = true;
- if (!ieee80211_beacon_cntdwn_is_complete(csa_vif)) {
- int c = ieee80211_beacon_update_cntdwn(csa_vif);
+ if (!ieee80211_beacon_cntdwn_is_complete(csa_vif, 0)) {
+ int c = ieee80211_beacon_update_cntdwn(csa_vif, 0);
iwl_mvm_mac_ctxt_beacon_changed(mvm, csa_vif,
&csa_vif->bss_conf);
@@ -1486,7 +1497,7 @@ static void iwl_mvm_csa_count_down(struct iwl_mvm *mvm,
}
} else if (!iwl_mvm_te_scheduled(&mvmvif->time_event_data)) {
/* we don't have CSA NoA scheduled yet, switch now */
- ieee80211_csa_finish(csa_vif);
+ ieee80211_csa_finish(csa_vif, 0);
RCU_INIT_POINTER(mvm->csa_vif, NULL);
}
}
@@ -1626,10 +1637,22 @@ void iwl_mvm_rx_missed_beacons_notif(struct iwl_mvm *mvm,
* TODO: the threshold should be adjusted based on latency conditions,
* and/or in case of a CS flow on one of the other AP vifs.
*/
- if (rx_missed_bcon > IWL_MVM_MISSED_BEACONS_THRESHOLD_LONG)
- iwl_mvm_connection_loss(mvm, vif, "missed beacons");
- else if (rx_missed_bcon_since_rx > IWL_MVM_MISSED_BEACONS_THRESHOLD)
- ieee80211_beacon_loss(vif);
+ if (rx_missed_bcon >= IWL_MVM_MISSED_BEACONS_THRESHOLD_LONG) {
+ if (rx_missed_bcon_since_rx >= IWL_MVM_MISSED_BEACONS_SINCE_RX_THOLD) {
+ iwl_mvm_connection_loss(mvm, vif, "missed beacons");
+ } else {
+ IWL_WARN(mvm,
+ "missed beacons exceeds threshold, but receiving data. Stay connected, Expect bugs.\n");
+ IWL_WARN(mvm,
+ "missed_beacons:%d, missed_beacons_since_rx:%d\n",
+ rx_missed_bcon, rx_missed_bcon_since_rx);
+ }
+ } else if (rx_missed_bcon_since_rx > IWL_MVM_MISSED_BEACONS_THRESHOLD) {
+ if (!iwl_mvm_has_new_tx_api(mvm))
+ ieee80211_beacon_loss(vif);
+ else
+ ieee80211_cqm_beacon_loss_notify(vif, GFP_ATOMIC);
+ }
iwl_dbg_tlv_time_point(&mvm->fwrt,
IWL_FW_INI_TIME_POINT_MISSED_BEACONS, &tp_data);
@@ -1832,7 +1855,7 @@ void iwl_mvm_channel_switch_start_notif(struct iwl_mvm *mvm,
msecs_to_jiffies(IWL_MVM_CS_UNBLOCK_TX_TIMEOUT *
csa_vif->bss_conf.beacon_int));
- ieee80211_csa_finish(csa_vif);
+ ieee80211_csa_finish(csa_vif, 0);
rcu_read_unlock();
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 7f13dff04b26..1935630d3def 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -138,7 +138,8 @@ struct ieee80211_regdomain *iwl_mvm_get_regdomain(struct wiphy *wiphy,
resp->channels,
__le16_to_cpu(resp->mcc),
__le16_to_cpu(resp->geo_info),
- le32_to_cpu(resp->cap), resp_ver);
+ le32_to_cpu(resp->cap), resp_ver,
+ mvm->fwrt.uats_enabled);
/* Store the return source id */
src_id = resp->source_id;
if (IS_ERR_OR_NULL(regd)) {
@@ -263,6 +264,9 @@ static const u8 tm_if_types_ext_capa_sta[] = {
__bf_shf(IEEE80211_EML_CAP_EMLSR_PADDING_DELAY) | \
IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_64US << \
__bf_shf(IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY))
+#define IWL_MVM_MLD_CAPA_OPS FIELD_PREP_CONST( \
+ IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP, \
+ IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME)
static const struct wiphy_iftype_ext_capab add_iftypes_ext_capa[] = {
{
@@ -272,6 +276,7 @@ static const struct wiphy_iftype_ext_capab add_iftypes_ext_capa[] = {
.extended_capabilities_len = sizeof(he_if_types_ext_capa_sta),
/* relevant only if EHT is supported */
.eml_capabilities = IWL_MVM_EMLSR_CAPA,
+ .mld_capa_and_ops = IWL_MVM_MLD_CAPA_OPS,
},
{
.iftype = NL80211_IFTYPE_STATION,
@@ -280,6 +285,7 @@ static const struct wiphy_iftype_ext_capab add_iftypes_ext_capa[] = {
.extended_capabilities_len = sizeof(tm_if_types_ext_capa_sta),
/* relevant only if EHT is supported */
.eml_capabilities = IWL_MVM_EMLSR_CAPA,
+ .mld_capa_and_ops = IWL_MVM_MLD_CAPA_OPS,
},
};
@@ -490,6 +496,11 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
IWL_UCODE_TLV_CAPA_TIME_SYNC_BOTH_FTM_TM))
hw->wiphy->hw_timestamp_max_peers = 1;
+ if (fw_has_capa(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_CAPA_SPP_AMSDU_SUPPORT))
+ wiphy_ext_feature_set(hw->wiphy,
+ NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT);
+
ieee80211_hw_set(hw, SINGLE_SCAN_ON_ALL_BANDS);
hw->wiphy->features |=
NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR |
@@ -695,6 +706,18 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
}
}
+ if (iwl_fw_lookup_cmd_ver(mvm->fw, WIDE_ID(LOCATION_GROUP,
+ TOF_RANGE_REQ_CMD),
+ IWL_FW_CMD_VER_UNKNOWN) >= 11) {
+ wiphy_ext_feature_set(hw->wiphy,
+ NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE);
+
+ if (fw_has_capa(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_CAPA_SECURE_LTF_SUPPORT))
+ wiphy_ext_feature_set(hw->wiphy,
+ NL80211_EXT_FEATURE_SECURE_LTF);
+ }
+
mvm->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
#ifdef CONFIG_PM_SLEEP
@@ -1195,14 +1218,12 @@ int iwl_mvm_mac_start(struct ieee80211_hw *hw)
{
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
int ret;
- int retry, max_retry = 0;
mutex_lock(&mvm->mutex);
/* we are starting the mac not in error flow, and restart is enabled */
if (!test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status) &&
iwlwifi_mod_params.fw_restart) {
- max_retry = IWL_MAX_INIT_RETRY;
/*
* This will prevent mac80211 recovery flows to trigger during
* init failures
@@ -1210,13 +1231,7 @@ int iwl_mvm_mac_start(struct ieee80211_hw *hw)
set_bit(IWL_MVM_STATUS_STARTING, &mvm->status);
}
- for (retry = 0; retry <= max_retry; retry++) {
- ret = __iwl_mvm_mac_start(mvm);
- if (!ret || mvm->pldr_sync)
- break;
-
- IWL_ERR(mvm, "mac start retry %d\n", retry);
- }
+ ret = __iwl_mvm_mac_start(mvm);
clear_bit(IWL_MVM_STATUS_STARTING, &mvm->status);
mutex_unlock(&mvm->mutex);
@@ -1350,6 +1365,7 @@ void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
* discover that its list is now empty.
*/
cancel_work_sync(&mvm->async_handlers_wk);
+ wiphy_work_cancel(hw->wiphy, &mvm->async_handlers_wiphy_wk);
}
struct iwl_mvm_phy_ctxt *iwl_mvm_get_free_phy_ctxt(struct iwl_mvm *mvm)
@@ -1433,7 +1449,7 @@ int iwl_mvm_post_channel_switch(struct ieee80211_hw *hw,
if (!fw_has_capa(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD)) {
- ret = iwl_mvm_enable_beacon_filter(mvm, vif, 0);
+ ret = iwl_mvm_enable_beacon_filter(mvm, vif);
if (ret)
goto out_unlock;
@@ -1454,7 +1470,8 @@ out_unlock:
}
void iwl_mvm_abort_channel_switch(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif)
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf)
{
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
@@ -1600,7 +1617,8 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
*/
if (vif->type == NL80211_IFTYPE_AP ||
vif->type == NL80211_IFTYPE_ADHOC) {
- iwl_mvm_vif_dbgfs_add_link(mvm, vif);
+ if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
+ iwl_mvm_vif_dbgfs_add_link(mvm, vif);
ret = 0;
goto out;
}
@@ -1616,7 +1634,7 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
goto out_remove_mac;
/* beacon filtering */
- ret = iwl_mvm_disable_beacon_filter(mvm, vif, 0);
+ ret = iwl_mvm_disable_beacon_filter(mvm, vif);
if (ret)
goto out_remove_mac;
@@ -1627,6 +1645,9 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
IEEE80211_VIF_SUPPORTS_CQM_RSSI;
}
+ if (vif->p2p || iwl_fw_lookup_cmd_ver(mvm->fw, PHY_CONTEXT_CMD, 1) < 5)
+ vif->driver_flags |= IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW;
+
if (vif->type == NL80211_IFTYPE_P2P_DEVICE)
mvm->p2p_device_vif = vif;
@@ -1637,10 +1658,11 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
if (vif->type == NL80211_IFTYPE_MONITOR) {
mvm->monitor_on = true;
mvm->monitor_p80 =
- iwl_mvm_chandef_get_primary_80(&vif->bss_conf.chandef);
+ iwl_mvm_chandef_get_primary_80(&vif->bss_conf.chanreq.oper);
}
- iwl_mvm_vif_dbgfs_add_link(mvm, vif);
+ if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
+ iwl_mvm_vif_dbgfs_add_link(mvm, vif);
if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
vif->type == NL80211_IFTYPE_STATION && !vif->p2p &&
@@ -2558,7 +2580,7 @@ iwl_mvm_bss_info_changed_station_common(struct iwl_mvm *mvm,
iwl_mvm_stop_session_protection(mvm, vif);
iwl_mvm_sf_update(mvm, vif, false);
- WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0));
+ WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif));
}
if (changes & (BSS_CHANGED_PS | BSS_CHANGED_P2P_PS | BSS_CHANGED_QOS |
@@ -2579,7 +2601,7 @@ iwl_mvm_bss_info_changed_station_common(struct iwl_mvm *mvm,
/* FIXME: need to update per link when FW API will
* support it
*/
- ret = iwl_mvm_enable_beacon_filter(mvm, vif, 0);
+ ret = iwl_mvm_enable_beacon_filter(mvm, vif);
if (ret)
IWL_ERR(mvm,
"failed to update CQM thresholds\n");
@@ -2606,9 +2628,7 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm,
*/
if (changes & BSS_CHANGED_ASSOC && vif->cfg.assoc) {
if ((vif->bss_conf.he_support &&
- !iwlwifi_mod_params.disable_11ax) ||
- (vif->bss_conf.eht_support &&
- !iwlwifi_mod_params.disable_11be))
+ !iwlwifi_mod_params.disable_11ax))
iwl_mvm_cfg_he_sta(mvm, vif, mvmvif->deflink.ap_sta_id);
iwl_mvm_mac_ctxt_recalc_tsf_id(mvm, vif);
@@ -2617,10 +2637,7 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm,
/* Update MU EDCA params */
if (changes & BSS_CHANGED_QOS && mvmvif->associated &&
vif->cfg.assoc &&
- ((vif->bss_conf.he_support &&
- !iwlwifi_mod_params.disable_11ax) ||
- (vif->bss_conf.eht_support &&
- !iwlwifi_mod_params.disable_11be)))
+ (vif->bss_conf.he_support && !iwlwifi_mod_params.disable_11ax))
iwl_mvm_cfg_he_sta(mvm, vif, mvmvif->deflink.ap_sta_id);
/*
@@ -3416,16 +3433,16 @@ iwl_mvm_check_he_obss_narrow_bw_ru(struct ieee80211_hw *hw,
.tolerated = true,
};
- if (WARN_ON_ONCE(!link_conf->chandef.chan ||
+ if (WARN_ON_ONCE(!link_conf->chanreq.oper.chan ||
!mvmvif->link[link_id]))
return;
- if (!(link_conf->chandef.chan->flags & IEEE80211_CHAN_RADAR)) {
+ if (!(link_conf->chanreq.oper.chan->flags & IEEE80211_CHAN_RADAR)) {
mvmvif->link[link_id]->he_ru_2mhz_block = false;
return;
}
- cfg80211_bss_iter(hw->wiphy, &link_conf->chandef,
+ cfg80211_bss_iter(hw->wiphy, &link_conf->chanreq.oper,
iwl_mvm_check_he_obss_narrow_bw_ru_iter,
&iter_data);
@@ -3485,10 +3502,10 @@ static void iwl_mvm_mei_host_associated(struct iwl_mvm *mvm,
return;
/* FIXME: MEI needs to be updated for MLO */
- if (!vif->bss_conf.chandef.chan)
+ if (!vif->bss_conf.chanreq.oper.chan)
return;
- conn_info.channel = vif->bss_conf.chandef.chan->hw_value;
+ conn_info.channel = vif->bss_conf.chanreq.oper.chan->hw_value;
switch (mvm_sta->pairwise_cipher) {
case WLAN_CIPHER_SUITE_TKIP:
@@ -3685,6 +3702,9 @@ iwl_mvm_sta_state_notexist_to_none(struct iwl_mvm *mvm,
NL80211_TDLS_SETUP);
}
+ if (ret)
+ return ret;
+
for_each_sta_active_link(vif, sta, link_sta, i)
link_sta->agg.max_rc_amsdu_len = 1;
@@ -3693,6 +3713,19 @@ iwl_mvm_sta_state_notexist_to_none(struct iwl_mvm *mvm,
if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls)
mvmvif->ap_sta = sta;
+ /*
+ * Initialize the rates here already - this really tells
+ * the firmware only what the supported legacy rates are
+ * (may be) since it's initialized already from what the
+ * AP advertised in the beacon/probe response. This will
+ * allow the firmware to send auth/assoc frames with one
+ * of the supported rates already, rather than having to
+ * use a mandatory rate.
+ * If we're the AP, we'll just assume mandatory rates at
+ * this point, but we know nothing about the STA anyway.
+ */
+ iwl_mvm_rs_rate_init_all_links(mvm, vif, sta);
+
return 0;
}
@@ -3719,10 +3752,8 @@ iwl_mvm_sta_state_auth_to_assoc(struct ieee80211_hw *hw,
* the default bss_conf
*/
if (!mvm->mld_api_is_used &&
- ((vif->bss_conf.he_support &&
- !iwlwifi_mod_params.disable_11ax) ||
- (vif->bss_conf.eht_support &&
- !iwlwifi_mod_params.disable_11be)))
+ (vif->bss_conf.he_support &&
+ !iwlwifi_mod_params.disable_11ax))
iwl_mvm_cfg_he_sta(mvm, vif, mvm_sta->deflink.sta_id);
} else if (vif->type == NL80211_IFTYPE_STATION) {
iwl_mvm_vif_set_he_support(hw, vif, sta, true);
@@ -3774,7 +3805,7 @@ iwl_mvm_sta_state_assoc_to_authorized(struct iwl_mvm *mvm,
NL80211_TDLS_ENABLE_LINK);
} else {
/* enable beacon filtering */
- WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0));
+ WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif));
mvmvif->authorized = 1;
@@ -3791,13 +3822,17 @@ iwl_mvm_sta_state_assoc_to_authorized(struct iwl_mvm *mvm,
mvm_sta->authorized = true;
- iwl_mvm_rs_rate_init_all_links(mvm, vif, sta);
-
/* MFP is set by default before the station is authorized.
* Clear it here in case it's not used.
*/
- if (!sta->mfp)
- return callbacks->update_sta(mvm, vif, sta);
+ if (!sta->mfp) {
+ int ret = callbacks->update_sta(mvm, vif, sta);
+
+ if (ret)
+ return ret;
+ }
+
+ iwl_mvm_rs_rate_init_all_links(mvm, vif, sta);
return 0;
}
@@ -3828,7 +3863,7 @@ iwl_mvm_sta_state_authorized_to_assoc(struct iwl_mvm *mvm,
mvmvif->authorized = 0;
/* disable beacon filtering */
- iwl_mvm_disable_beacon_filter(mvm, vif, 0);
+ iwl_mvm_disable_beacon_filter(mvm, vif);
}
return 0;
@@ -4407,44 +4442,6 @@ static bool iwl_mvm_rx_aux_roc(struct iwl_notif_wait_data *notif_wait,
return true;
}
-#define AUX_ROC_MIN_DURATION MSEC_TO_TU(100)
-#define AUX_ROC_MIN_DELAY MSEC_TO_TU(200)
-#define AUX_ROC_MAX_DELAY MSEC_TO_TU(600)
-#define AUX_ROC_SAFETY_BUFFER MSEC_TO_TU(20)
-#define AUX_ROC_MIN_SAFETY_BUFFER MSEC_TO_TU(10)
-
-static void iwl_mvm_roc_duration_and_delay(struct ieee80211_vif *vif,
- u32 duration_ms,
- u32 *duration_tu,
- u32 *delay)
-{
- u32 dtim_interval = vif->bss_conf.dtim_period *
- vif->bss_conf.beacon_int;
-
- *delay = AUX_ROC_MIN_DELAY;
- *duration_tu = MSEC_TO_TU(duration_ms);
-
- /*
- * If we are associated we want the delay time to be at least one
- * dtim interval so that the FW can wait until after the DTIM and
- * then start the time event, this will potentially allow us to
- * remain off-channel for the max duration.
- * Since we want to use almost a whole dtim interval we would also
- * like the delay to be for 2-3 dtim intervals, in case there are
- * other time events with higher priority.
- */
- if (vif->cfg.assoc) {
- *delay = min_t(u32, dtim_interval * 3, AUX_ROC_MAX_DELAY);
- /* We cannot remain off-channel longer than the DTIM interval */
- if (dtim_interval <= *duration_tu) {
- *duration_tu = dtim_interval - AUX_ROC_SAFETY_BUFFER;
- if (*duration_tu <= AUX_ROC_MIN_DURATION)
- *duration_tu = dtim_interval -
- AUX_ROC_MIN_SAFETY_BUFFER;
- }
- }
-}
-
static int iwl_mvm_send_aux_roc_cmd(struct iwl_mvm *mvm,
struct ieee80211_channel *channel,
struct ieee80211_vif *vif,
@@ -4542,48 +4539,6 @@ static int iwl_mvm_send_aux_roc_cmd(struct iwl_mvm *mvm,
return res;
}
-static int iwl_mvm_roc_add_cmd(struct iwl_mvm *mvm,
- struct ieee80211_channel *channel,
- struct ieee80211_vif *vif,
- int duration, u32 activity)
-{
- int res;
- u32 duration_tu, delay;
- struct iwl_roc_req roc_req = {
- .action = cpu_to_le32(FW_CTXT_ACTION_ADD),
- .activity = cpu_to_le32(activity),
- .sta_id = cpu_to_le32(mvm->aux_sta.sta_id),
- };
-
- lockdep_assert_held(&mvm->mutex);
-
- /* Set the channel info data */
- iwl_mvm_set_chan_info(mvm, &roc_req.channel_info,
- channel->hw_value,
- iwl_mvm_phy_band_from_nl80211(channel->band),
- IWL_PHY_CHANNEL_MODE20, 0);
-
- iwl_mvm_roc_duration_and_delay(vif, duration, &duration_tu,
- &delay);
- roc_req.duration = cpu_to_le32(duration_tu);
- roc_req.max_delay = cpu_to_le32(delay);
-
- IWL_DEBUG_TE(mvm,
- "\t(requested = %ums, max_delay = %ums)\n",
- duration, delay);
- IWL_DEBUG_TE(mvm,
- "Requesting to remain on channel %u for %utu\n",
- channel->hw_value, duration_tu);
-
- /* Set the node address */
- memcpy(roc_req.node_addr, vif->addr, ETH_ALEN);
-
- res = iwl_mvm_send_cmd_pdu(mvm, WIDE_ID(MAC_CONF_GROUP, ROC_CMD),
- 0, sizeof(roc_req), &roc_req);
-
- return res;
-}
-
static int iwl_mvm_add_aux_sta_for_hs20(struct iwl_mvm *mvm, u32 lmac_id)
{
int ret = 0;
@@ -4700,7 +4655,7 @@ static int iwl_mvm_p2p_find_phy_ctxt(struct iwl_mvm *mvm,
cfg80211_chandef_create(&chandef, channel, NL80211_CHAN_NO_HT);
return iwl_mvm_phy_ctxt_add(mvm, mvmvif->deflink.phy_ctxt,
- &chandef, 1, 1);
+ &chandef, NULL, 1, 1);
}
/* Execute the common part for MLD and non-MLD modes */
@@ -4788,8 +4743,8 @@ static void iwl_mvm_ftm_responder_chanctx_iter(void *_data, u8 *mac,
data->responder = true;
}
-static bool iwl_mvm_is_ftm_responder_chanctx(struct iwl_mvm *mvm,
- struct ieee80211_chanctx_conf *ctx)
+bool iwl_mvm_is_ftm_responder_chanctx(struct iwl_mvm *mvm,
+ struct ieee80211_chanctx_conf *ctx)
{
struct iwl_mvm_ftm_responder_iter_data data = {
.responder = false,
@@ -4808,9 +4763,7 @@ static int __iwl_mvm_add_chanctx(struct iwl_mvm *mvm,
{
u16 *phy_ctxt_id = (u16 *)ctx->drv_priv;
struct iwl_mvm_phy_ctxt *phy_ctxt;
- bool use_def = iwl_mvm_is_ftm_responder_chanctx(mvm, ctx) ||
- iwl_mvm_enable_fils(mvm, ctx);
- struct cfg80211_chan_def *def = use_def ? &ctx->def : &ctx->min_def;
+ struct cfg80211_chan_def *def = iwl_mvm_chanctx_def(mvm, ctx);
int ret;
lockdep_assert_held(&mvm->mutex);
@@ -4823,7 +4776,7 @@ static int __iwl_mvm_add_chanctx(struct iwl_mvm *mvm,
goto out;
}
- ret = iwl_mvm_phy_ctxt_add(mvm, phy_ctxt, def,
+ ret = iwl_mvm_phy_ctxt_add(mvm, phy_ctxt, def, &ctx->ap,
ctx->rx_chains_static,
ctx->rx_chains_dynamic);
if (ret) {
@@ -4876,9 +4829,7 @@ void iwl_mvm_change_chanctx(struct ieee80211_hw *hw,
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
u16 *phy_ctxt_id = (u16 *)ctx->drv_priv;
struct iwl_mvm_phy_ctxt *phy_ctxt = &mvm->phy_ctxts[*phy_ctxt_id];
- bool use_def = iwl_mvm_is_ftm_responder_chanctx(mvm, ctx) ||
- iwl_mvm_enable_fils(mvm, ctx);
- struct cfg80211_chan_def *def = use_def ? &ctx->def : &ctx->min_def;
+ struct cfg80211_chan_def *def = iwl_mvm_chanctx_def(mvm, ctx);
if (WARN_ONCE((phy_ctxt->ref > 1) &&
(changed & ~(IEEE80211_CHANCTX_CHANGE_WIDTH |
@@ -4903,7 +4854,7 @@ void iwl_mvm_change_chanctx(struct ieee80211_hw *hw,
}
iwl_mvm_bt_coex_vif_change(mvm);
- iwl_mvm_phy_ctxt_changed(mvm, phy_ctxt, def,
+ iwl_mvm_phy_ctxt_changed(mvm, phy_ctxt, def, &ctx->ap,
ctx->rx_chains_static,
ctx->rx_chains_dynamic);
@@ -5356,8 +5307,8 @@ static int __iwl_mvm_mac_testmode_cmd(struct iwl_mvm *mvm,
return -EINVAL;
if (nla_get_u32(tb[IWL_MVM_TM_ATTR_BEACON_FILTER_STATE]))
- return iwl_mvm_enable_beacon_filter(mvm, vif, 0);
- return iwl_mvm_disable_beacon_filter(mvm, vif, 0);
+ return iwl_mvm_enable_beacon_filter(mvm, vif);
+ return iwl_mvm_disable_beacon_filter(mvm, vif);
}
return -EOPNOTSUPP;
@@ -5441,7 +5392,7 @@ static int iwl_mvm_old_pre_chan_sw_sta(struct iwl_mvm *mvm,
iwl_mvm_csa_client_absent(mvm, vif);
if (mvmvif->bf_data.bf_enabled) {
- int ret = iwl_mvm_disable_beacon_filter(mvm, vif, 0);
+ int ret = iwl_mvm_disable_beacon_filter(mvm, vif);
if (ret)
return ret;
@@ -5601,8 +5552,16 @@ void iwl_mvm_channel_switch_rx_beacon(struct ieee80211_hw *hw,
if (chsw->count >= mvmvif->csa_count && chsw->block_tx) {
if (mvmvif->csa_misbehave) {
+ struct ieee80211_bss_conf *link_conf;
+
/* Second time, give up on this AP*/
- iwl_mvm_abort_channel_switch(hw, vif);
+
+ link_conf = wiphy_dereference(hw->wiphy,
+ vif->link_conf[chsw->link_id]);
+ if (WARN_ON(!link_conf))
+ return;
+
+ iwl_mvm_abort_channel_switch(hw, vif, link_conf);
ieee80211_chswitch_done(vif, false, 0);
mvmvif->csa_misbehave = false;
return;
@@ -6103,6 +6062,7 @@ void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw,
}
}
+#define SYNC_RX_QUEUE_TIMEOUT (HZ)
void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
enum iwl_mvm_rxq_notif_type type,
bool sync,
@@ -6151,11 +6111,12 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
lockdep_assert_held(&mvm->mutex);
ret = wait_event_timeout(mvm->rx_sync_waitq,
READ_ONCE(mvm->queue_sync_state) == 0 ||
- iwl_mvm_is_radio_killed(mvm),
- HZ);
- WARN_ONCE(!ret && !iwl_mvm_is_radio_killed(mvm),
- "queue sync: failed to sync, state is 0x%lx\n",
- mvm->queue_sync_state);
+ iwl_mvm_is_radio_hw_killed(mvm),
+ SYNC_RX_QUEUE_TIMEOUT);
+ WARN_ONCE(!ret && !iwl_mvm_is_radio_hw_killed(mvm),
+ "queue sync: failed to sync, state is 0x%lx, cookie %d\n",
+ mvm->queue_sync_state,
+ mvm->queue_sync_cookie);
}
out:
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-key.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-key.c
index ea3e9e9c6e26..8a38fc4b0b0f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-key.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-key.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2022 - 2023 Intel Corporation
+ * Copyright (C) 2022 - 2024 Intel Corporation
*/
#include <linux/kernel.h>
#include <net/mac80211.h>
@@ -62,11 +62,13 @@ u32 iwl_mvm_get_sec_flags(struct iwl_mvm *mvm,
struct ieee80211_key_conf *keyconf)
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ bool pairwise = keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE;
+ bool igtk = keyconf->keyidx == 4 || keyconf->keyidx == 5;
u32 flags = 0;
lockdep_assert_held(&mvm->mutex);
- if (!(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE))
+ if (!pairwise)
flags |= IWL_SEC_KEY_FLAG_MCAST_KEY;
switch (keyconf->cipher) {
@@ -96,14 +98,19 @@ u32 iwl_mvm_get_sec_flags(struct iwl_mvm *mvm,
if (!sta && vif->type == NL80211_IFTYPE_STATION)
sta = mvmvif->ap_sta;
- /* Set the MFP flag also for an AP interface where the key is an IGTK
- * key as in such a case the station would always be NULL
+ /*
+ * If we are installing an iGTK (in AP or STA mode), we need to tell
+ * the firmware this key will en/decrypt MGMT frames.
+ * Same goes if we are installing a pairwise key for an MFP station.
+ * In case we're installing a groupwise key (which is not an iGTK),
+ * then, we will not use this key for MGMT frames.
*/
- if ((!IS_ERR_OR_NULL(sta) && sta->mfp) ||
- (vif->type == NL80211_IFTYPE_AP &&
- (keyconf->keyidx == 4 || keyconf->keyidx == 5)))
+ if ((!IS_ERR_OR_NULL(sta) && sta->mfp && pairwise) || igtk)
flags |= IWL_SEC_KEY_FLAG_MFP;
+ if (keyconf->flags & IEEE80211_KEY_FLAG_SPP_AMSDU)
+ flags |= IWL_SEC_KEY_FLAG_SPP_AMSDU;
+
return flags;
}
@@ -335,6 +342,21 @@ static int _iwl_mvm_sec_key_del(struct iwl_mvm *mvm,
return ret;
}
+int iwl_mvm_sec_key_del_pasn(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ u32 sta_mask,
+ struct ieee80211_key_conf *keyconf)
+{
+ u32 key_flags = iwl_mvm_get_sec_flags(mvm, vif, NULL, keyconf) |
+ IWL_SEC_KEY_FLAG_MFP;
+
+ if (WARN_ON(!sta_mask))
+ return -EINVAL;
+
+ return __iwl_mvm_sec_key_del(mvm, sta_mask, key_flags, keyconf->keyidx,
+ 0);
+}
+
int iwl_mvm_sec_key_del(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c
index f313a8d771e4..bb7851042177 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2022 - 2023 Intel Corporation
+ * Copyright (C) 2022 - 2024 Intel Corporation
*/
#include "mvm.h"
@@ -167,7 +167,7 @@ static int iwl_mvm_mld_mac_ctxt_cmd_listener(struct iwl_mvm *mvm,
iwl_mvm_mld_mac_ctxt_cmd_common(mvm, vif, &cmd, action);
cmd.filter_flags = cpu_to_le32(MAC_CFG_FILTER_PROMISC |
- MAC_FILTER_IN_CONTROL_AND_MGMT |
+ MAC_CFG_FILTER_ACCEPT_CONTROL_AND_MGMT |
MAC_CFG_FILTER_ACCEPT_BEACON |
MAC_CFG_FILTER_ACCEPT_PROBE_REQ |
MAC_CFG_FILTER_ACCEPT_GRP);
@@ -205,8 +205,11 @@ static int iwl_mvm_mld_mac_ctxt_cmd_p2p_device(struct iwl_mvm *mvm,
cmd.p2p_dev.is_disc_extended =
iwl_mac_ctxt_p2p_dev_has_extended_disc(mvm, vif);
- /* Override the filter flags to accept only probe requests */
- cmd.filter_flags = cpu_to_le32(MAC_CFG_FILTER_ACCEPT_PROBE_REQ);
+ /* Override the filter flags to accept all management frames. This is
+ * needed to support both P2P device discovery using probe requests and
+ * P2P service discovery using action frames
+ */
+ cmd.filter_flags = cpu_to_le32(MAC_CFG_FILTER_ACCEPT_CONTROL_AND_MGMT);
return iwl_mvm_mld_mac_ctxt_send_cmd(mvm, &cmd);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
index 61170173f917..084314bf6f36 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2022-2023 Intel Corporation
+ * Copyright (C) 2022-2024 Intel Corporation
*/
#include "mvm.h"
@@ -47,7 +47,7 @@ static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw,
goto out_unlock;
/* beacon filtering */
- ret = iwl_mvm_disable_beacon_filter(mvm, vif, 0);
+ ret = iwl_mvm_disable_beacon_filter(mvm, vif);
if (ret)
goto out_remove_mac;
@@ -81,7 +81,8 @@ static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw,
ieee80211_hw_set(mvm->hw, RX_INCLUDES_FCS);
}
- iwl_mvm_vif_dbgfs_add_link(mvm, vif);
+ if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
+ iwl_mvm_vif_dbgfs_add_link(mvm, vif);
if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
vif->type == NL80211_IFTYPE_STATION && !vif->p2p &&
@@ -253,9 +254,6 @@ __iwl_mvm_mld_assign_vif_chanctx(struct iwl_mvm *mvm,
if (!rcu_access_pointer(link_conf->chanctx_conf))
n_active++;
- if (n_active > iwl_mvm_max_active_links(mvm, vif))
- return -EOPNOTSUPP;
-
if (WARN_ON_ONCE(!mvmvif->link[link_id]))
return -EINVAL;
@@ -437,6 +435,9 @@ __iwl_mvm_mld_unassign_vif_chanctx(struct iwl_mvm *mvm,
mvmvif->ap_ibss_active = false;
}
+ iwl_mvm_link_changed(mvm, vif, link_conf,
+ LINK_CONTEXT_MODIFY_ACTIVE, false);
+
if (iwl_mvm_is_esr_supported(mvm->fwrt.trans) && n_active > 1) {
int ret = iwl_mvm_esr_mode_inactive(mvm, vif);
@@ -448,9 +449,6 @@ __iwl_mvm_mld_unassign_vif_chanctx(struct iwl_mvm *mvm,
if (vif->type == NL80211_IFTYPE_MONITOR)
iwl_mvm_mld_rm_snif_sta(mvm, vif);
- iwl_mvm_link_changed(mvm, vif, link_conf,
- LINK_CONTEXT_MODIFY_ACTIVE, false);
-
if (switching_chanctx)
return;
mvmvif->link[link_id]->phy_ctxt = NULL;
@@ -606,6 +604,7 @@ static int iwl_mvm_mld_mac_sta_state(struct ieee80211_hw *hw,
struct iwl_mvm_link_sel_data {
u8 link_id;
enum nl80211_band band;
+ enum nl80211_chan_width width;
bool active;
};
@@ -657,7 +656,8 @@ void iwl_mvm_mld_select_links(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
continue;
data[n_data].link_id = link_id;
- data[n_data].band = link_conf->chandef.chan->band;
+ data[n_data].band = link_conf->chanreq.oper.chan->band;
+ data[n_data].width = link_conf->chanreq.oper.width;
data[n_data].active = vif->active_links & BIT(link_id);
n_data++;
}
@@ -752,8 +752,8 @@ iwl_mvm_mld_link_info_changed_station(struct iwl_mvm *mvm,
link_changes |= LINK_CONTEXT_MODIFY_HE_PARAMS;
}
- /* Update EHT Puncturing info */
- if (changes & BSS_CHANGED_EHT_PUNCTURING && vif->cfg.assoc)
+ /* if associated, maybe puncturing changed - we'll check later */
+ if (vif->cfg.assoc)
link_changes |= LINK_CONTEXT_MODIFY_EHT_PARAMS;
if (link_changes) {
@@ -1121,17 +1121,12 @@ iwl_mvm_mld_change_vif_links(struct ieee80211_hw *hw,
struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS])
{
struct iwl_mvm_vif_link_info *new_link[IEEE80211_MLD_MAX_NUM_LINKS] = {};
- unsigned int n_active = iwl_mvm_mld_count_active_links(vif);
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
u16 removed = old_links & ~new_links;
u16 added = new_links & ~old_links;
int err, i;
- if (hweight16(new_links) > 1 &&
- n_active > iwl_mvm_max_active_links(mvm, vif))
- return -EOPNOTSUPP;
-
for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
int r;
@@ -1223,6 +1218,146 @@ iwl_mvm_mld_change_sta_links(struct ieee80211_hw *hw,
return ret;
}
+/*
+ * This function receives a subset of the usable links bitmap and
+ * returns the primary link id, and -1 if such link doesn't exist
+ * (e.g. non-MLO connection) or wasn't found.
+ */
+int iwl_mvm_mld_get_primary_link(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ unsigned long usable_links)
+{
+ struct iwl_mvm_link_sel_data data[IEEE80211_MLD_MAX_NUM_LINKS];
+ u8 link_id, n_data = 0;
+
+ if (!ieee80211_vif_is_mld(vif) || !vif->cfg.assoc)
+ return -1;
+
+ for_each_set_bit(link_id, &usable_links, IEEE80211_MLD_MAX_NUM_LINKS) {
+ struct ieee80211_bss_conf *link_conf =
+ link_conf_dereference_protected(vif, link_id);
+
+ if (WARN_ON_ONCE(!link_conf))
+ continue;
+
+ data[n_data].link_id = link_id;
+ data[n_data].band = link_conf->chanreq.oper.chan->band;
+ data[n_data].width = link_conf->chanreq.oper.width;
+ data[n_data].active = true;
+ n_data++;
+ }
+
+ if (n_data <= 1)
+ return -1;
+
+ /* The logic should be modified to handle more than 2 links */
+ WARN_ON_ONCE(n_data > 2);
+
+ /* Primary link is the link with the wider bandwidth or higher band */
+ if (data[0].width > data[1].width)
+ return data[0].link_id;
+ if (data[0].width < data[1].width)
+ return data[1].link_id;
+ if (data[0].band >= data[1].band)
+ return data[0].link_id;
+
+ return data[1].link_id;
+}
+
+/*
+ * This function receives a bitmap of usable links and check if we can enter
+ * eSR on those links.
+ */
+static bool iwl_mvm_can_enter_esr(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ unsigned long desired_links)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ int primary_link = iwl_mvm_mld_get_primary_link(mvm, vif,
+ desired_links);
+ const struct wiphy_iftype_ext_capab *ext_capa;
+ bool ret = true;
+ int link_id;
+
+ if (primary_link < 0)
+ return false;
+
+ if (!(vif->cfg.eml_cap & IEEE80211_EML_CAP_EMLSR_SUPP))
+ return false;
+
+ ext_capa = cfg80211_get_iftype_ext_capa(mvm->hw->wiphy,
+ ieee80211_vif_type_p2p(vif));
+ if (!ext_capa ||
+ !(ext_capa->eml_capabilities & IEEE80211_EML_CAP_EMLSR_SUPP))
+ return false;
+
+ for_each_set_bit(link_id, &desired_links, IEEE80211_MLD_MAX_NUM_LINKS) {
+ struct ieee80211_bss_conf *link_conf =
+ link_conf_dereference_protected(vif, link_id);
+
+ if (WARN_ON_ONCE(!link_conf))
+ continue;
+
+ /* BT Coex effects eSR mode only if one of the link is on LB */
+ if (link_conf->chanreq.oper.chan->band != NL80211_BAND_2GHZ)
+ continue;
+
+ ret = iwl_mvm_bt_coex_calculate_esr_mode(mvm, vif, link_id,
+ primary_link);
+ // Mark eSR as disabled for the next time
+ if (!ret)
+ mvmvif->bt_coex_esr_disabled = true;
+ break;
+ }
+
+ return ret;
+}
+
+static bool iwl_mvm_mld_can_activate_links(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ u16 desired_links)
+{
+ struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+ int n_links = hweight16(desired_links);
+ bool ret = true;
+
+ if (n_links <= 1)
+ return true;
+
+ mutex_lock(&mvm->mutex);
+
+ /* Check if HW supports the wanted number of links */
+ if (n_links > iwl_mvm_max_active_links(mvm, vif)) {
+ ret = false;
+ goto unlock;
+ }
+
+ /* If it is an eSR device, check that we can enter eSR */
+ if (iwl_mvm_is_esr_supported(mvm->fwrt.trans))
+ ret = iwl_mvm_can_enter_esr(mvm, vif, desired_links);
+unlock:
+ mutex_unlock(&mvm->mutex);
+ return ret;
+}
+
+static enum ieee80211_neg_ttlm_res
+iwl_mvm_mld_can_neg_ttlm(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_neg_ttlm *neg_ttlm)
+{
+ u16 map;
+ u8 i;
+
+ /* Verify all TIDs are mapped to the same links set */
+ map = neg_ttlm->downlink[0];
+ for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++) {
+ if (neg_ttlm->downlink[i] != neg_ttlm->uplink[i] ||
+ neg_ttlm->uplink[i] != map)
+ return NEG_TTLM_RES_REJECT;
+ }
+
+ return NEG_TTLM_RES_ACCEPT;
+}
+
const struct ieee80211_ops iwl_mvm_mld_hw_ops = {
.tx = iwl_mvm_mac_tx,
.wake_tx_queue = iwl_mvm_mac_wake_tx_queue,
@@ -1317,4 +1452,6 @@ const struct ieee80211_ops iwl_mvm_mld_hw_ops = {
.change_vif_links = iwl_mvm_mld_change_vif_links,
.change_sta_links = iwl_mvm_mld_change_sta_links,
+ .can_activate_links = iwl_mvm_mld_can_activate_links,
+ .can_neg_ttlm = iwl_mvm_mld_can_neg_ttlm,
};
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 40627961b834..a10b48947bca 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -40,8 +40,9 @@
#define IWL_MVM_MAX_ADDRESSES 5
/* RSSI offset for WkP */
#define IWL_RSSI_OFFSET 50
+#define IWL_MVM_MISSED_BEACONS_SINCE_RX_THOLD 4
#define IWL_MVM_MISSED_BEACONS_THRESHOLD 8
-#define IWL_MVM_MISSED_BEACONS_THRESHOLD_LONG 16
+#define IWL_MVM_MISSED_BEACONS_THRESHOLD_LONG 19
/* A TimeUnit is 1024 microsecond */
#define MSEC_TO_TU(_msec) (_msec*1000/1024)
@@ -105,6 +106,7 @@ struct iwl_mvm_phy_ctxt {
/* track for RLC config command */
u32 center_freq1;
bool rlc_disabled;
+ u32 channel_load_by_us;
};
struct iwl_mvm_time_event_data {
@@ -121,7 +123,7 @@ struct iwl_mvm_time_event_data {
* if the te is in the time event list or not (when id == TE_MAX)
*/
u32 id;
- u8 link_id;
+ s8 link_id;
};
/* Power management */
@@ -359,6 +361,7 @@ struct iwl_mvm_vif_link_info {
* @pm_enabled - indicate if MAC power management is allowed
* @monitor_active: indicates that monitor context is configured, and that the
* interface should get quota etc.
+ * @bt_coex_esr_disabled: indicates if esr is disabled due to bt coex
* @low_latency: bit flags for low latency
* see enum &iwl_mvm_low_latency_cause for causes.
* @low_latency_actual: boolean, indicates low latency is set,
@@ -389,6 +392,7 @@ struct iwl_mvm_vif {
bool pm_enabled;
bool monitor_active;
bool esr_active;
+ bool bt_coex_esr_disabled;
u8 low_latency: 6;
u8 low_latency_actual: 1;
@@ -537,8 +541,8 @@ struct iwl_mvm_tt_mgmt {
#ifdef CONFIG_THERMAL
/**
- *struct iwl_mvm_thermal_device - thermal zone related data
- * @temp_trips: temperature thresholds for report
+ * struct iwl_mvm_thermal_device - thermal zone related data
+ * @trips: temperature thresholds for report
* @fw_trips_index: keep indexes to original array - temp_trips
* @tzone: thermal zone device data
*/
@@ -848,6 +852,9 @@ struct iwl_mvm {
spinlock_t async_handlers_lock;
struct work_struct async_handlers_wk;
+ /* For async rx handlers that require the wiphy lock */
+ struct wiphy_work async_handlers_wiphy_wk;
+
struct work_struct roc_done_wk;
unsigned long init_status;
@@ -1215,7 +1222,6 @@ struct iwl_mvm {
* @IWL_MVM_STATUS_IN_HW_RESTART: HW restart is active
* @IWL_MVM_STATUS_ROC_AUX_RUNNING: AUX remain-on-channel is running
* @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running
- * @IWL_MVM_STATUS_NEED_FLUSH_P2P: need to flush P2P bcast STA
* @IWL_MVM_STATUS_IN_D3: in D3 (or at least about to go into it)
* @IWL_MVM_STATUS_SUPPRESS_ERROR_LOG_ONCE: suppress one error log
* if this is set, when intentionally triggered
@@ -1230,7 +1236,6 @@ enum iwl_mvm_status {
IWL_MVM_STATUS_IN_HW_RESTART,
IWL_MVM_STATUS_ROC_AUX_RUNNING,
IWL_MVM_STATUS_FIRMWARE_RUNNING,
- IWL_MVM_STATUS_NEED_FLUSH_P2P,
IWL_MVM_STATUS_IN_D3,
IWL_MVM_STATUS_SUPPRESS_ERROR_LOG_ONCE,
IWL_MVM_STATUS_STARTING,
@@ -1567,13 +1572,17 @@ static inline int iwl_mvm_max_active_links(struct iwl_mvm *mvm,
struct ieee80211_vif *vif)
{
struct iwl_trans *trans = mvm->fwrt.trans;
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
+ lockdep_assert_held(&mvm->mutex);
if (vif->type == NL80211_IFTYPE_AP)
return mvm->fw->ucode_capa.num_beacons;
- if (iwl_mvm_is_esr_supported(trans) ||
- (CSR_HW_RFID_TYPE(trans->hw_rf_id) == IWL_CFG_RF_TYPE_FM &&
- CSR_HW_RFID_IS_CDB(trans->hw_rf_id)))
+ if ((iwl_mvm_is_esr_supported(trans) &&
+ !mvmvif->bt_coex_esr_disabled) ||
+ ((CSR_HW_RFID_TYPE(trans->hw_rf_id) == IWL_CFG_RF_TYPE_FM &&
+ CSR_HW_RFID_IS_CDB(trans->hw_rf_id))))
return IWL_MVM_FW_MAX_ACTIVE_LINKS_NUM;
return 1;
@@ -1581,12 +1590,16 @@ static inline int iwl_mvm_max_active_links(struct iwl_mvm *mvm,
extern const u8 iwl_mvm_ac_to_tx_fifo[];
extern const u8 iwl_mvm_ac_to_gen2_tx_fifo[];
+extern const u8 iwl_mvm_ac_to_bz_tx_fifo[];
static inline u8 iwl_mvm_mac_ac_to_tx_fifo(struct iwl_mvm *mvm,
enum ieee80211_ac_numbers ac)
{
- return iwl_mvm_has_new_tx_api(mvm) ?
- iwl_mvm_ac_to_gen2_tx_fifo[ac] : iwl_mvm_ac_to_tx_fifo[ac];
+ if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ)
+ return iwl_mvm_ac_to_bz_tx_fifo[ac];
+ if (iwl_mvm_has_new_tx_api(mvm))
+ return iwl_mvm_ac_to_gen2_tx_fifo[ac];
+ return iwl_mvm_ac_to_tx_fifo[ac];
}
struct iwl_rate_info {
@@ -1801,18 +1814,20 @@ void iwl_mvm_rx_shared_mem_cfg_notif(struct iwl_mvm *mvm,
/* MVM PHY */
struct iwl_mvm_phy_ctxt *iwl_mvm_get_free_phy_ctxt(struct iwl_mvm *mvm);
int iwl_mvm_phy_ctxt_add(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
- struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *ap,
u8 chains_static, u8 chains_dynamic);
int iwl_mvm_phy_ctxt_changed(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
- struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *ap,
u8 chains_static, u8 chains_dynamic);
void iwl_mvm_phy_ctxt_ref(struct iwl_mvm *mvm,
struct iwl_mvm_phy_ctxt *ctxt);
void iwl_mvm_phy_ctxt_unref(struct iwl_mvm *mvm,
struct iwl_mvm_phy_ctxt *ctxt);
int iwl_mvm_phy_ctx_count(struct iwl_mvm *mvm);
-u8 iwl_mvm_get_channel_width(struct cfg80211_chan_def *chandef);
-u8 iwl_mvm_get_ctrl_pos(struct cfg80211_chan_def *chandef);
+u8 iwl_mvm_get_channel_width(const struct cfg80211_chan_def *chandef);
+u8 iwl_mvm_get_ctrl_pos(const struct cfg80211_chan_def *chandef);
int iwl_mvm_phy_send_rlc(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
u8 chains_static, u8 chains_dynamic);
@@ -2116,6 +2131,12 @@ bool iwl_mvm_bt_coex_is_tpc_allowed(struct iwl_mvm *mvm,
u8 iwl_mvm_bt_coex_get_single_ant_msk(struct iwl_mvm *mvm, u8 enabled_ants);
u8 iwl_mvm_bt_coex_tx_prio(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr,
struct ieee80211_tx_info *info, u8 ac);
+bool iwl_mvm_bt_coex_calculate_esr_mode(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ int link_id, int primary_link);
+void iwl_mvm_bt_coex_update_vif_esr(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ int link_id);
/* beacon filtering */
#ifdef CONFIG_IWLWIFI_DEBUGFS
@@ -2129,11 +2150,9 @@ iwl_mvm_beacon_filter_debugfs_parameters(struct ieee80211_vif *vif,
{}
#endif
int iwl_mvm_enable_beacon_filter(struct iwl_mvm *mvm,
- struct ieee80211_vif *vif,
- u32 flags);
+ struct ieee80211_vif *vif);
int iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm,
- struct ieee80211_vif *vif,
- u32 flags);
+ struct ieee80211_vif *vif);
/* SMPS */
void iwl_mvm_update_smps(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
enum iwl_mvm_smps_type_request req_type,
@@ -2366,7 +2385,7 @@ u64 iwl_mvm_ptp_get_adj_time(struct iwl_mvm *mvm, u64 base_time);
int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b);
int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm);
int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm);
-void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm);
+void iwl_mvm_get_bios_tables(struct iwl_mvm *mvm);
#ifdef CONFIG_IWLWIFI_DEBUGFS
void iwl_mvm_link_sta_add_debugfs(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
@@ -2387,6 +2406,10 @@ int iwl_mvm_sec_key_del(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
struct ieee80211_key_conf *keyconf);
+int iwl_mvm_sec_key_del_pasn(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ u32 sta_mask,
+ struct ieee80211_key_conf *keyconf);
void iwl_mvm_sec_key_remove_ap(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
struct iwl_mvm_vif_link_info *link,
@@ -2511,7 +2534,7 @@ static inline void iwl_mvm_set_chan_info(struct iwl_mvm *mvm,
static inline void
iwl_mvm_set_chan_info_chandef(struct iwl_mvm *mvm,
struct iwl_fw_channel_info *ci,
- struct cfg80211_chan_def *chandef)
+ const struct cfg80211_chan_def *chandef)
{
enum nl80211_band band = chandef->chan->band;
@@ -2601,7 +2624,6 @@ static inline bool iwl_mvm_mei_filter_scan(struct iwl_mvm *mvm,
void iwl_mvm_send_roaming_forbidden_event(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
bool forbidden);
-bool iwl_mvm_is_vendor_in_approved_list(void);
/* Callbacks for ieee80211_ops */
void iwl_mvm_mac_tx(struct ieee80211_hw *hw,
@@ -2691,7 +2713,8 @@ int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_channel_switch *chsw);
void iwl_mvm_abort_channel_switch(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
void iwl_mvm_channel_switch_rx_beacon(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_channel_switch *chsw);
@@ -2730,4 +2753,28 @@ bool iwl_mvm_enable_fils(struct iwl_mvm *mvm,
struct ieee80211_chanctx_conf *ctx);
void iwl_mvm_mld_select_links(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
bool valid_links_changed);
+int iwl_mvm_mld_get_primary_link(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ unsigned long usable_links);
+
+bool iwl_mvm_is_ftm_responder_chanctx(struct iwl_mvm *mvm,
+ struct ieee80211_chanctx_conf *ctx);
+
+static inline struct cfg80211_chan_def *
+iwl_mvm_chanctx_def(struct iwl_mvm *mvm, struct ieee80211_chanctx_conf *ctx)
+{
+ bool use_def = iwl_mvm_is_ftm_responder_chanctx(mvm, ctx) ||
+ iwl_mvm_enable_fils(mvm, ctx);
+
+ return use_def ? &ctx->def : &ctx->min_def;
+}
+
+void iwl_mvm_roc_duration_and_delay(struct ieee80211_vif *vif,
+ u32 duration_ms,
+ u32 *duration_tu,
+ u32 *delay);
+int iwl_mvm_roc_add_cmd(struct iwl_mvm *mvm,
+ struct ieee80211_channel *channel,
+ struct ieee80211_vif *vif,
+ int duration, u32 activity);
#endif /* __IWL_MVM_H__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
index c0dd441e800e..ae8177222881 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
@@ -590,7 +590,7 @@ int iwl_mvm_init_mcc(struct iwl_mvm *mvm)
return -EIO;
if (iwl_mvm_is_wifi_mcc_supported(mvm) &&
- !iwl_acpi_get_mcc(mvm->dev, mcc)) {
+ !iwl_bios_get_mcc(&mvm->fwrt, mcc)) {
kfree(regd);
regd = iwl_mvm_get_regdomain(mvm->hw->wiphy, mcc,
MCC_SOURCE_BIOS, NULL);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index adbbe19aeae5..a93981cb9714 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -161,9 +161,9 @@ static void iwl_mvm_rx_monitor_notif(struct iwl_mvm *mvm,
if (!vif || vif->type != NL80211_IFTYPE_STATION)
return;
- if (!vif->bss_conf.chandef.chan ||
- vif->bss_conf.chandef.chan->band != NL80211_BAND_2GHZ ||
- vif->bss_conf.chandef.width < NL80211_CHAN_WIDTH_40)
+ if (!vif->bss_conf.chanreq.oper.chan ||
+ vif->bss_conf.chanreq.oper.chan->band != NL80211_BAND_2GHZ ||
+ vif->bss_conf.chanreq.oper.width < NL80211_CHAN_WIDTH_40)
return;
if (!vif->cfg.assoc)
@@ -219,7 +219,7 @@ void iwl_mvm_update_link_smps(struct ieee80211_vif *vif,
return;
if (mvm->fw_static_smps_request &&
- link_conf->chandef.width == NL80211_CHAN_WIDTH_160 &&
+ link_conf->chanreq.oper.width == NL80211_CHAN_WIDTH_160 &&
link_conf->he_support)
mode = IEEE80211_SMPS_STATIC;
@@ -259,7 +259,7 @@ static void iwl_mvm_rx_thermal_dual_chain_req(struct iwl_mvm *mvm,
}
/**
- * enum iwl_rx_handler_context context for Rx handler
+ * enum iwl_rx_handler_context: context for Rx handler
* @RX_HANDLER_SYNC : this means that it will be called in the Rx path
* which can't acquire mvm->mutex.
* @RX_HANDLER_ASYNC_LOCKED : If the handler needs to hold mvm->mutex
@@ -267,15 +267,19 @@ static void iwl_mvm_rx_thermal_dual_chain_req(struct iwl_mvm *mvm,
* it will be called from a worker with mvm->mutex held.
* @RX_HANDLER_ASYNC_UNLOCKED : in case the handler needs to lock the
* mutex itself, it will be called from a worker without mvm->mutex held.
+ * @RX_HANDLER_ASYNC_LOCKED_WIPHY: If the handler needs to hold the wiphy lock
+ * and mvm->mutex. Will be handled with the wiphy_work queue infra
+ * instead of regular work queue.
*/
enum iwl_rx_handler_context {
RX_HANDLER_SYNC,
RX_HANDLER_ASYNC_LOCKED,
RX_HANDLER_ASYNC_UNLOCKED,
+ RX_HANDLER_ASYNC_LOCKED_WIPHY,
};
/**
- * struct iwl_rx_handlers handler for FW notification
+ * struct iwl_rx_handlers: handler for FW notification
* @cmd_id: command id
* @min_size: minimum size to expect for the notification
* @context: see &iwl_rx_handler_context
@@ -316,7 +320,8 @@ static const struct iwl_rx_handlers iwl_mvm_rx_handlers[] = {
struct iwl_tlc_update_notif),
RX_HANDLER(BT_PROFILE_NOTIFICATION, iwl_mvm_rx_bt_coex_notif,
- RX_HANDLER_ASYNC_LOCKED, struct iwl_bt_coex_profile_notif),
+ RX_HANDLER_ASYNC_LOCKED_WIPHY,
+ struct iwl_bt_coex_profile_notif),
RX_HANDLER_NO_SIZE(BEACON_NOTIFICATION, iwl_mvm_rx_beacon_notif,
RX_HANDLER_ASYNC_LOCKED),
RX_HANDLER_NO_SIZE(STATISTICS_NOTIFICATION, iwl_mvm_rx_statistics,
@@ -324,7 +329,7 @@ static const struct iwl_rx_handlers iwl_mvm_rx_handlers[] = {
RX_HANDLER_GRP(STATISTICS_GROUP, STATISTICS_OPER_NOTIF,
iwl_mvm_handle_rx_system_oper_stats,
- RX_HANDLER_ASYNC_LOCKED,
+ RX_HANDLER_ASYNC_LOCKED_WIPHY,
struct iwl_system_statistics_notif_oper),
RX_HANDLER_GRP(STATISTICS_GROUP, STATISTICS_OPER_PART1_NOTIF,
iwl_mvm_handle_rx_system_oper_part1_stats,
@@ -673,6 +678,8 @@ static const struct iwl_hcmd_arr iwl_mvm_groups[] = {
/* this forward declaration can avoid to export the function */
static void iwl_mvm_async_handlers_wk(struct work_struct *wk);
+static void iwl_mvm_async_handlers_wiphy_wk(struct wiphy *wiphy,
+ struct wiphy_work *work);
static u32 iwl_mvm_min_backoff(struct iwl_mvm *mvm)
{
@@ -682,7 +689,7 @@ static u32 iwl_mvm_min_backoff(struct iwl_mvm *mvm)
if (!backoff)
return 0;
- dflt_pwr_limit = iwl_acpi_get_pwr_limit(mvm->dev);
+ iwl_bios_get_pwr_limit(&mvm->fwrt, &dflt_pwr_limit);
while (backoff->pwr) {
if (dflt_pwr_limit >= backoff->pwr)
@@ -1194,7 +1201,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
iwl_fw_runtime_init(&mvm->fwrt, trans, fw, &iwl_mvm_fwrt_ops, mvm,
&iwl_mvm_sanitize_ops, mvm, dbgfs_dir);
- iwl_mvm_get_acpi_tables(mvm);
+ iwl_mvm_get_bios_tables(mvm);
iwl_uefi_get_sgom_table(trans, &mvm->fwrt);
iwl_uefi_get_step_table(trans);
@@ -1265,6 +1272,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
INIT_LIST_HEAD(&mvm->add_stream_txqs);
spin_lock_init(&mvm->add_stream_lock);
+ wiphy_work_init(&mvm->async_handlers_wiphy_wk,
+ iwl_mvm_async_handlers_wiphy_wk);
init_waitqueue_head(&mvm->rx_sync_waitq);
mvm->queue_sync_state = 0;
@@ -1551,35 +1560,62 @@ void iwl_mvm_async_handlers_purge(struct iwl_mvm *mvm)
spin_unlock_bh(&mvm->async_handlers_lock);
}
-static void iwl_mvm_async_handlers_wk(struct work_struct *wk)
+/*
+ * This function receives a bitmap of rx async handler contexts
+ * (&iwl_rx_handler_context) to handle, and runs only them
+ */
+static void iwl_mvm_async_handlers_by_context(struct iwl_mvm *mvm,
+ u8 contexts)
{
- struct iwl_mvm *mvm =
- container_of(wk, struct iwl_mvm, async_handlers_wk);
struct iwl_async_handler_entry *entry, *tmp;
LIST_HEAD(local_list);
- /* Ensure that we are not in stop flow (check iwl_mvm_mac_stop) */
-
/*
- * Sync with Rx path with a lock. Remove all the entries from this list,
- * add them to a local one (lock free), and then handle them.
+ * Sync with Rx path with a lock. Remove all the entries of the
+ * wanted contexts from this list, add them to a local one (lock free),
+ * and then handle them.
*/
spin_lock_bh(&mvm->async_handlers_lock);
- list_splice_init(&mvm->async_handlers_list, &local_list);
+ list_for_each_entry_safe(entry, tmp, &mvm->async_handlers_list, list) {
+ if (!(BIT(entry->context) & contexts))
+ continue;
+ list_del(&entry->list);
+ list_add_tail(&entry->list, &local_list);
+ }
spin_unlock_bh(&mvm->async_handlers_lock);
list_for_each_entry_safe(entry, tmp, &local_list, list) {
- if (entry->context == RX_HANDLER_ASYNC_LOCKED)
+ if (entry->context != RX_HANDLER_ASYNC_UNLOCKED)
mutex_lock(&mvm->mutex);
entry->fn(mvm, &entry->rxb);
iwl_free_rxb(&entry->rxb);
list_del(&entry->list);
- if (entry->context == RX_HANDLER_ASYNC_LOCKED)
+ if (entry->context != RX_HANDLER_ASYNC_UNLOCKED)
mutex_unlock(&mvm->mutex);
kfree(entry);
}
}
+static void iwl_mvm_async_handlers_wiphy_wk(struct wiphy *wiphy,
+ struct wiphy_work *wk)
+{
+ struct iwl_mvm *mvm =
+ container_of(wk, struct iwl_mvm, async_handlers_wiphy_wk);
+ u8 contexts = BIT(RX_HANDLER_ASYNC_LOCKED_WIPHY);
+
+ iwl_mvm_async_handlers_by_context(mvm, contexts);
+}
+
+static void iwl_mvm_async_handlers_wk(struct work_struct *wk)
+{
+ struct iwl_mvm *mvm =
+ container_of(wk, struct iwl_mvm, async_handlers_wk);
+ u8 contexts = BIT(RX_HANDLER_ASYNC_LOCKED) |
+ BIT(RX_HANDLER_ASYNC_UNLOCKED);
+
+ iwl_mvm_async_handlers_by_context(mvm, contexts);
+}
+
static inline void iwl_mvm_rx_check_trigger(struct iwl_mvm *mvm,
struct iwl_rx_packet *pkt)
{
@@ -1659,7 +1695,11 @@ static void iwl_mvm_rx_common(struct iwl_mvm *mvm,
spin_lock(&mvm->async_handlers_lock);
list_add_tail(&entry->list, &mvm->async_handlers_list);
spin_unlock(&mvm->async_handlers_lock);
- schedule_work(&mvm->async_handlers_wk);
+ if (rx_h->context == RX_HANDLER_ASYNC_LOCKED_WIPHY)
+ wiphy_work_queue(mvm->hw->wiphy,
+ &mvm->async_handlers_wiphy_wk);
+ else
+ schedule_work(&mvm->async_handlers_wk);
break;
}
}
@@ -1788,12 +1828,8 @@ static void iwl_mvm_wake_sw_queue(struct iwl_op_mode *op_mode, int hw_queue)
static void iwl_mvm_set_rfkill_state(struct iwl_mvm *mvm)
{
- bool state = iwl_mvm_is_radio_killed(mvm);
-
- if (state)
- wake_up(&mvm->rx_sync_waitq);
-
- wiphy_rfkill_set_hw_state(mvm->hw->wiphy, state);
+ wiphy_rfkill_set_hw_state(mvm->hw->wiphy,
+ iwl_mvm_is_radio_killed(mvm));
}
void iwl_mvm_set_hw_ctkill_state(struct iwl_mvm *mvm, bool state)
@@ -1818,10 +1854,12 @@ static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state)
bool rfkill_safe_init_done = READ_ONCE(mvm->rfkill_safe_init_done);
bool unified = iwl_mvm_has_unified_ucode(mvm);
- if (state)
+ if (state) {
set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status);
- else
+ wake_up(&mvm->rx_sync_waitq);
+ } else {
clear_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status);
+ }
iwl_mvm_set_rfkill_state(mvm);
@@ -1955,7 +1993,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
ieee80211_restart_hw(mvm->hw);
} else if (mvm->fwrt.trans->dbg.restart_required) {
IWL_DEBUG_INFO(mvm, "FW restart requested after debug collection\n");
- mvm->fwrt.trans->dbg.restart_required = FALSE;
+ mvm->fwrt.trans->dbg.restart_required = false;
ieee80211_restart_hw(mvm->hw);
} else if (mvm->trans->trans_cfg->device_family <= IWL_DEVICE_FAMILY_8000) {
ieee80211_restart_hw(mvm->hw);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
index 334d1f59f6e4..ce264b386029 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2017 Intel Deutschland GmbH
*/
@@ -9,7 +9,7 @@
#include "mvm.h"
/* Maps the driver specific channel width definition to the fw values */
-u8 iwl_mvm_get_channel_width(struct cfg80211_chan_def *chandef)
+u8 iwl_mvm_get_channel_width(const struct cfg80211_chan_def *chandef)
{
switch (chandef->width) {
case NL80211_CHAN_WIDTH_20_NOHT:
@@ -33,7 +33,7 @@ u8 iwl_mvm_get_channel_width(struct cfg80211_chan_def *chandef)
* Maps the driver specific control channel position (relative to the center
* freq) definitions to the the fw values
*/
-u8 iwl_mvm_get_ctrl_pos(struct cfg80211_chan_def *chandef)
+u8 iwl_mvm_get_ctrl_pos(const struct cfg80211_chan_def *chandef)
{
int offs = chandef->chan->center_freq - chandef->center_freq1;
int abs_offs = abs(offs);
@@ -116,7 +116,7 @@ static void iwl_mvm_phy_ctxt_set_rxchain(struct iwl_mvm *mvm,
static void iwl_mvm_phy_ctxt_cmd_data_v1(struct iwl_mvm *mvm,
struct iwl_mvm_phy_ctxt *ctxt,
struct iwl_phy_context_cmd_v1 *cmd,
- struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *chandef,
u8 chains_static, u8 chains_dynamic)
{
struct iwl_phy_context_cmd_tail *tail =
@@ -137,7 +137,7 @@ static void iwl_mvm_phy_ctxt_cmd_data_v1(struct iwl_mvm *mvm,
static void iwl_mvm_phy_ctxt_cmd_data(struct iwl_mvm *mvm,
struct iwl_mvm_phy_ctxt *ctxt,
struct iwl_phy_context_cmd *cmd,
- struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *chandef,
u8 chains_static, u8 chains_dynamic)
{
cmd->lmac_id = cpu_to_le32(iwl_mvm_get_lmac_id(mvm,
@@ -197,14 +197,18 @@ int iwl_mvm_phy_send_rlc(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
*/
static int iwl_mvm_phy_ctxt_apply(struct iwl_mvm *mvm,
struct iwl_mvm_phy_ctxt *ctxt,
- struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *ap,
u8 chains_static, u8 chains_dynamic,
u32 action)
{
int ret;
int ver = iwl_fw_lookup_cmd_ver(mvm->fw, PHY_CONTEXT_CMD, 1);
- if (ver == 3 || ver == 4) {
+ if (ver < 5 || !ap || !ap->chan)
+ ap = NULL;
+
+ if (ver >= 3 && ver <= 6) {
struct iwl_phy_context_cmd cmd = {};
/* Set the command header fields */
@@ -215,6 +219,14 @@ static int iwl_mvm_phy_ctxt_apply(struct iwl_mvm *mvm,
chains_static,
chains_dynamic);
+ if (ap) {
+ cmd.sbb_bandwidth = iwl_mvm_get_channel_width(ap);
+ cmd.sbb_ctrl_channel_loc = iwl_mvm_get_ctrl_pos(ap);
+ }
+
+ if (ver == 6)
+ cmd.puncture_mask = cpu_to_le16(chandef->punctured);
+
ret = iwl_mvm_send_cmd_pdu(mvm, PHY_CONTEXT_CMD,
0, sizeof(cmd), &cmd);
} else if (ver < 3) {
@@ -254,7 +266,8 @@ static int iwl_mvm_phy_ctxt_apply(struct iwl_mvm *mvm,
* Send a command to add a PHY context based on the current HW configuration.
*/
int iwl_mvm_phy_ctxt_add(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
- struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *ap,
u8 chains_static, u8 chains_dynamic)
{
int ret;
@@ -267,7 +280,7 @@ int iwl_mvm_phy_ctxt_add(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
ctxt->width = chandef->width;
ctxt->center_freq1 = chandef->center_freq1;
- ret = iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef,
+ ret = iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef, ap,
chains_static, chains_dynamic,
FW_CTXT_ACTION_ADD);
@@ -300,7 +313,8 @@ void iwl_mvm_phy_ctxt_ref(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt)
* changed.
*/
int iwl_mvm_phy_ctxt_changed(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
- struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *chandef,
+ const struct cfg80211_chan_def *ap,
u8 chains_static, u8 chains_dynamic)
{
enum iwl_ctxt_action action = FW_CTXT_ACTION_MODIFY;
@@ -324,7 +338,7 @@ int iwl_mvm_phy_ctxt_changed(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
int ret;
/* ... remove it here ...*/
- ret = iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef,
+ ret = iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef, NULL,
chains_static, chains_dynamic,
FW_CTXT_ACTION_REMOVE);
if (ret)
@@ -338,7 +352,7 @@ int iwl_mvm_phy_ctxt_changed(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
ctxt->width = chandef->width;
ctxt->center_freq1 = chandef->center_freq1;
- return iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef,
+ return iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef, ap,
chains_static, chains_dynamic,
action);
}
@@ -358,7 +372,7 @@ void iwl_mvm_phy_ctxt_unref(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt)
cfg80211_chandef_create(&chandef, ctxt->channel, NL80211_CHAN_NO_HT);
- iwl_mvm_phy_ctxt_apply(mvm, ctxt, &chandef, 1, 1,
+ iwl_mvm_phy_ctxt_apply(mvm, ctxt, &chandef, NULL, 1, 1,
FW_CTXT_ACTION_REMOVE);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c
index 1b9b06e0443f..41e68aa6bec8 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2019, 2021-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2019, 2021-2024 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
*/
@@ -20,8 +20,7 @@
static
int iwl_mvm_beacon_filter_send_cmd(struct iwl_mvm *mvm,
- struct iwl_beacon_filter_cmd *cmd,
- u32 flags)
+ struct iwl_beacon_filter_cmd *cmd)
{
u16 len;
@@ -62,7 +61,7 @@ int iwl_mvm_beacon_filter_send_cmd(struct iwl_mvm *mvm,
len = offsetof(struct iwl_beacon_filter_cmd,
bf_threshold_absolute_low);
- return iwl_mvm_send_cmd_pdu(mvm, REPLY_BEACON_FILTERING_CMD, flags,
+ return iwl_mvm_send_cmd_pdu(mvm, REPLY_BEACON_FILTERING_CMD, 0,
len, cmd);
}
@@ -813,8 +812,7 @@ iwl_mvm_beacon_filter_debugfs_parameters(struct ieee80211_vif *vif,
static int _iwl_mvm_enable_beacon_filter(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
- struct iwl_beacon_filter_cmd *cmd,
- u32 cmd_flags)
+ struct iwl_beacon_filter_cmd *cmd)
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
int ret;
@@ -825,7 +823,7 @@ static int _iwl_mvm_enable_beacon_filter(struct iwl_mvm *mvm,
iwl_mvm_beacon_filter_set_cqm_params(mvm, vif, cmd);
iwl_mvm_beacon_filter_debugfs_parameters(vif, cmd);
- ret = iwl_mvm_beacon_filter_send_cmd(mvm, cmd, cmd_flags);
+ ret = iwl_mvm_beacon_filter_send_cmd(mvm, cmd);
if (!ret)
mvmvif->bf_data.bf_enabled = true;
@@ -834,20 +832,18 @@ static int _iwl_mvm_enable_beacon_filter(struct iwl_mvm *mvm,
}
int iwl_mvm_enable_beacon_filter(struct iwl_mvm *mvm,
- struct ieee80211_vif *vif,
- u32 flags)
+ struct ieee80211_vif *vif)
{
struct iwl_beacon_filter_cmd cmd = {
IWL_BF_CMD_CONFIG_DEFAULTS,
.bf_enable_beacon_filter = cpu_to_le32(1),
};
- return _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd, flags);
+ return _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd);
}
static int _iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm,
- struct ieee80211_vif *vif,
- u32 flags)
+ struct ieee80211_vif *vif)
{
struct iwl_beacon_filter_cmd cmd = {};
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
@@ -856,7 +852,7 @@ static int _iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm,
if (vif->type != NL80211_IFTYPE_STATION || vif->p2p)
return 0;
- ret = iwl_mvm_beacon_filter_send_cmd(mvm, &cmd, flags);
+ ret = iwl_mvm_beacon_filter_send_cmd(mvm, &cmd);
if (!ret)
mvmvif->bf_data.bf_enabled = false;
@@ -865,10 +861,9 @@ static int _iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm,
}
int iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm,
- struct ieee80211_vif *vif,
- u32 flags)
+ struct ieee80211_vif *vif)
{
- return _iwl_mvm_disable_beacon_filter(mvm, vif, flags);
+ return _iwl_mvm_disable_beacon_filter(mvm, vif);
}
static int iwl_mvm_power_set_ps(struct iwl_mvm *mvm)
@@ -919,7 +914,7 @@ static int iwl_mvm_power_set_ba(struct iwl_mvm *mvm,
!vif->cfg.ps ||
iwl_mvm_vif_low_latency(mvmvif));
- return _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd, 0);
+ return _iwl_mvm_enable_beacon_filter(mvm, vif, &cmd);
}
int iwl_mvm_power_update_ps(struct iwl_mvm *mvm)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
index 6cba8a353b53..00860feefa7a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include "rs.h"
#include "fw-api.h"
@@ -479,9 +479,15 @@ void iwl_mvm_tlc_update_notif(struct iwl_mvm *mvm,
}
if (flags & IWL_TLC_NOTIF_FLAG_AMSDU && !mvm_link_sta->orig_amsdu_len) {
+ u32 enabled = le32_to_cpu(notif->amsdu_enabled);
u16 size = le32_to_cpu(notif->amsdu_size);
int i;
+ if (size < 2000) {
+ size = 0;
+ enabled = 0;
+ }
+
if (link_sta->agg.max_amsdu_len < size) {
/*
* In debug link_sta->agg.max_amsdu_len < size
@@ -492,7 +498,7 @@ void iwl_mvm_tlc_update_notif(struct iwl_mvm *mvm,
goto out;
}
- mvmsta->amsdu_enabled = le32_to_cpu(notif->amsdu_enabled);
+ mvmsta->amsdu_enabled = enabled;
mvmsta->max_amsdu_len = size;
link_sta->agg.max_rc_amsdu_len = mvmsta->max_amsdu_len;
@@ -525,10 +531,10 @@ u16 rs_fw_get_max_amsdu_len(struct ieee80211_sta *sta,
const struct ieee80211_sta_ht_cap *ht_cap = &link_sta->ht_cap;
const struct ieee80211_sta_eht_cap *eht_cap = &link_sta->eht_cap;
- if (WARN_ON_ONCE(!link_conf->chandef.chan))
+ if (WARN_ON_ONCE(!link_conf->chanreq.oper.chan))
return IEEE80211_MAX_MPDU_LEN_VHT_3895;
- if (link_conf->chandef.chan->band == NL80211_BAND_6GHZ) {
+ if (link_conf->chanreq.oper.chan->band == NL80211_BAND_6GHZ) {
switch (le16_get_bits(link_sta->he_6ghz_capa.capa,
IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN)) {
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
@@ -538,7 +544,7 @@ u16 rs_fw_get_max_amsdu_len(struct ieee80211_sta *sta,
default:
return IEEE80211_MAX_MPDU_LEN_VHT_3895;
}
- } else if (link_conf->chandef.chan->band == NL80211_BAND_2GHZ &&
+ } else if (link_conf->chanreq.oper.chan->band == NL80211_BAND_2GHZ &&
eht_cap->has_eht) {
switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0],
IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 481d68cbbbd8..a8c4e354e2ce 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -4161,6 +4161,8 @@ static int rs_drv_tx_protection(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta,
* @mvm: The mvm component
* @mvmsta: The station
* @enable: Enable Tx protection?
+ *
+ * Returns: an error code
*/
int iwl_mvm_tx_protection(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta,
bool enable)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
index 8caa971770c6..b1add7942c5b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -752,6 +752,19 @@ iwl_mvm_update_tcm_from_stats(struct iwl_mvm *mvm, __le32 *air_time_le,
spin_unlock(&mvm->tcm.lock);
}
+static void iwl_mvm_handle_per_phy_stats(struct iwl_mvm *mvm,
+ struct iwl_stats_ntfy_per_phy *per_phy)
+{
+ int i;
+
+ for (i = 0; i < NUM_PHY_CTX; i++) {
+ if (!mvm->phy_ctxts[i].ref)
+ continue;
+ mvm->phy_ctxts[i].channel_load_by_us =
+ le32_to_cpu(per_phy[i].channel_load_by_us);
+ }
+}
+
static void
iwl_mvm_stats_ver_15(struct iwl_mvm *mvm,
struct iwl_statistics_operational_ntfy *stats)
@@ -766,6 +779,7 @@ iwl_mvm_stats_ver_15(struct iwl_mvm *mvm,
IEEE80211_IFACE_ITER_NORMAL,
iwl_mvm_stat_iterator_all_macs,
&data);
+ iwl_mvm_handle_per_phy_stats(mvm, stats->per_phy);
}
static void
@@ -841,6 +855,7 @@ iwl_mvm_stat_iterator_all_links(struct iwl_mvm *mvm,
struct iwl_stats_ntfy_per_link *link_stats;
struct ieee80211_bss_conf *bss_conf;
struct iwl_mvm_vif *mvmvif;
+ struct iwl_mvm_vif_link_info *link_info;
int link_id;
int sig;
@@ -857,20 +872,26 @@ iwl_mvm_stat_iterator_all_links(struct iwl_mvm *mvm,
continue;
mvmvif = iwl_mvm_vif_from_mac80211(bss_conf->vif);
- if (!mvmvif || !mvmvif->link[link_id])
+ link_info = mvmvif->link[link_id];
+ if (!link_info)
continue;
link_stats = &per_link[fw_link_id];
- mvmvif->link[link_id]->beacon_stats.num_beacons =
+ link_info->beacon_stats.num_beacons =
le32_to_cpu(link_stats->beacon_counter);
/* we basically just use the u8 to store 8 bits and then treat
* it as a s8 whenever we take it out to a different type.
*/
- mvmvif->link[link_id]->beacon_stats.avg_signal =
+ link_info->beacon_stats.avg_signal =
-le32_to_cpu(link_stats->beacon_average_energy);
+ if (link_info->phy_ctxt &&
+ link_info->phy_ctxt->channel->band == NL80211_BAND_2GHZ)
+ iwl_mvm_bt_coex_update_vif_esr(mvm, bss_conf->vif,
+ link_id);
+
/* make sure that beacon statistics don't go backwards with TCM
* request to clear statistics
*/
@@ -935,6 +956,7 @@ void iwl_mvm_handle_rx_system_oper_stats(struct iwl_mvm *mvm,
ieee80211_iterate_stations_atomic(mvm->hw, iwl_mvm_stats_energy_iter,
average_energy);
+ iwl_mvm_handle_per_phy_stats(mvm, stats->per_phy);
}
void iwl_mvm_handle_rx_system_oper_part1_stats(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 886d00098528..1484eaedf452 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
*/
@@ -282,6 +282,7 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
u32 status,
struct ieee80211_rx_status *stats)
{
+ struct wireless_dev *wdev;
struct iwl_mvm_sta *mvmsta;
struct iwl_mvm_vif *mvmvif;
u8 keyid;
@@ -303,9 +304,15 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
if (!ieee80211_is_beacon(hdr->frame_control))
return 0;
+ if (!sta)
+ return -1;
+
+ mvmsta = iwl_mvm_sta_from_mac80211(sta);
+ mvmvif = iwl_mvm_vif_from_mac80211(mvmsta->vif);
+
/* key mismatch - will also report !MIC_OK but we shouldn't count it */
if (!(status & IWL_RX_MPDU_STATUS_KEY_VALID))
- return -1;
+ goto report;
/* good cases */
if (likely(status & IWL_RX_MPDU_STATUS_MIC_OK &&
@@ -314,13 +321,6 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
return 0;
}
- if (!sta)
- return -1;
-
- mvmsta = iwl_mvm_sta_from_mac80211(sta);
-
- mvmvif = iwl_mvm_vif_from_mac80211(mvmsta->vif);
-
/*
* both keys will have the same cipher and MIC length, use
* whichever one is available
@@ -329,11 +329,11 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
if (!key) {
key = rcu_dereference(mvmvif->bcn_prot.keys[1]);
if (!key)
- return -1;
+ goto report;
}
if (len < key->icv_len + IEEE80211_GMAC_PN_LEN + 2)
- return -1;
+ goto report;
/* get the real key ID */
keyid = frame[len - key->icv_len - IEEE80211_GMAC_PN_LEN - 2];
@@ -347,7 +347,7 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
return -1;
key = rcu_dereference(mvmvif->bcn_prot.keys[keyid - 6]);
if (!key)
- return -1;
+ goto report;
}
/* Report status to mac80211 */
@@ -355,6 +355,10 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
ieee80211_key_mic_failure(key);
else if (status & IWL_RX_MPDU_STATUS_REPLAY_ERROR)
ieee80211_key_replay(key);
+report:
+ wdev = ieee80211_vif_to_wdev(mvmsta->vif);
+ if (wdev->netdev)
+ cfg80211_rx_unprot_mlme_mgmt(wdev->netdev, (void *)hdr, len);
return -1;
}
@@ -397,8 +401,11 @@ static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
case IWL_RX_MPDU_STATUS_SEC_GCM:
BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN != IEEE80211_GCMP_PN_LEN);
/* alg is CCM: check MIC only */
- if (!(status & IWL_RX_MPDU_STATUS_MIC_OK))
+ if (!(status & IWL_RX_MPDU_STATUS_MIC_OK)) {
+ IWL_DEBUG_DROP(mvm,
+ "Dropping packet, bad MIC (CCM/GCM)\n");
return -1;
+ }
stats->flag |= RX_FLAG_DECRYPTED | RX_FLAG_MIC_STRIPPED;
*crypt_len = IEEE80211_CCMP_HDR_LEN;
@@ -505,6 +512,10 @@ static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue,
return false;
mvm_sta = iwl_mvm_sta_from_mac80211(sta);
+
+ if (WARN_ON_ONCE(!mvm_sta->dup_data))
+ return false;
+
dup_data = &mvm_sta->dup_data[queue];
/*
@@ -512,11 +523,9 @@ static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue,
* (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery")
*/
if (ieee80211_is_ctl(hdr->frame_control) ||
- ieee80211_is_qos_nullfunc(hdr->frame_control) ||
- is_multicast_ether_addr(hdr->addr1)) {
- rx_status->flag |= RX_FLAG_DUP_VALIDATED;
+ ieee80211_is_any_nullfunc(hdr->frame_control) ||
+ is_multicast_ether_addr(hdr->addr1))
return false;
- }
if (ieee80211_is_data_qos(hdr->frame_control)) {
/* frame has qos control */
@@ -642,10 +651,8 @@ static void iwl_mvm_release_frames_from_notif(struct iwl_mvm *mvm,
rcu_read_lock();
ba_data = rcu_dereference(mvm->baid_map[baid]);
- if (!ba_data) {
- WARN(true, "BAID %d not found in map\n", baid);
+ if (WARN(!ba_data, "BAID %d not found in map\n", baid))
goto out;
- }
/* pick any STA ID to find the pointer */
sta_id = ffs(ba_data->sta_mask) - 1;
@@ -681,11 +688,11 @@ void iwl_mvm_rx_queue_notif(struct iwl_mvm *mvm, struct napi_struct *napi,
return;
len -= sizeof(*notif) + sizeof(*internal_notif);
- if (internal_notif->sync &&
- mvm->queue_sync_cookie != internal_notif->cookie) {
- WARN_ONCE(1, "Received expired RX queue sync message\n");
+ if (WARN_ONCE(internal_notif->sync &&
+ mvm->queue_sync_cookie != internal_notif->cookie,
+ "Received expired RX queue sync message (cookie %d but wanted %d, queue %d)\n",
+ internal_notif->cookie, mvm->queue_sync_cookie, queue))
return;
- }
switch (internal_notif->type) {
case IWL_MVM_RXQ_EMPTY:
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 7b6f1cdca067..f3e3986b4c72 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -241,13 +241,11 @@ iwl_mvm_scan_type _iwl_mvm_get_scan_type(struct iwl_mvm *mvm,
return IWL_SCAN_TYPE_FRAGMENTED;
/*
- * in case of DCM with GO where BSS DTIM interval < 220msec
- * set all scan requests as fast-balance scan
+ * in case of DCM with P2P GO set all scan requests as
+ * fast-balance scan
*/
if (vif && vif->type == NL80211_IFTYPE_STATION &&
- data.is_dcm_with_p2p_go &&
- ((vif->bss_conf.beacon_int *
- vif->bss_conf.dtim_period) < 220))
+ data.is_dcm_with_p2p_go)
return IWL_SCAN_TYPE_FAST_BALANCE;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sf.c b/drivers/net/wireless/intel/iwlwifi/mvm/sf.c
index 30d4233595e8..16285ae7cae9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sf.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sf.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2013-2014, 2018-2019, 2022-2023 Intel Corporation
+ * Copyright (C) 2013-2014, 2018-2019, 2022-2024 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
*/
#include "mvm.h"
@@ -232,6 +232,9 @@ int iwl_mvm_sf_update(struct iwl_mvm *mvm, struct ieee80211_vif *changed_vif,
};
struct ieee80211_sta *sta = NULL;
+ if (fw_has_api(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_API_SMART_FIFO_OFFLOAD))
+ return 0;
/*
* Ignore the call if we are in HW Restart flow, or if the handled
* vif is a p2p device.
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 2a3ca9785974..491c449fd431 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2015, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2015, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -71,7 +71,7 @@ u32 iwl_mvm_get_sta_ampdu_dens(struct ieee80211_link_sta *link_sta,
mpdu_dens = link_sta->ht_cap.ampdu_density;
}
- if (link_conf->chandef.chan->band == NL80211_BAND_6GHZ) {
+ if (link_conf->chanreq.oper.chan->band == NL80211_BAND_6GHZ) {
/* overwrite HT values on 6 GHz */
mpdu_dens = le16_get_bits(link_sta->he_6ghz_capa.capa,
IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START);
@@ -208,7 +208,7 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
}
if (sta->deflink.ht_cap.ht_supported ||
- mvm_sta->vif->bss_conf.chandef.chan->band == NL80211_BAND_6GHZ)
+ mvm_sta->vif->bss_conf.chanreq.oper.chan->band == NL80211_BAND_6GHZ)
add_sta_cmd.station_flags_msk |=
cpu_to_le32(STA_FLG_MAX_AGG_SIZE_MSK |
STA_FLG_AGG_MPDU_DENS_MSK);
@@ -1502,6 +1502,34 @@ out_err:
return ret;
}
+int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm,
+ struct ieee80211_txq *txq)
+{
+ struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq);
+ int ret = -EINVAL;
+
+ lockdep_assert_held(&mvm->mutex);
+
+ if (likely(test_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state)) ||
+ !txq->sta) {
+ return 0;
+ }
+
+ if (!iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, txq->tid)) {
+ set_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state);
+ ret = 0;
+ }
+
+ local_bh_disable();
+ spin_lock(&mvm->add_stream_lock);
+ if (!list_empty(&mvmtxq->list))
+ list_del_init(&mvmtxq->list);
+ spin_unlock(&mvm->add_stream_lock);
+ local_bh_enable();
+
+ return ret;
+}
+
void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk)
{
struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm,
@@ -2989,16 +3017,6 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
RCU_INIT_POINTER(mvm->baid_map[baid], NULL);
kfree_rcu(baid_data, rcu_head);
IWL_DEBUG_HT(mvm, "BAID %d is free\n", baid);
-
- /*
- * After we've deleted it, do another queue sync
- * so if an IWL_MVM_RXQ_NSSN_SYNC was concurrently
- * running it won't find a new session in the old
- * BAID. It can find the NULL pointer for the BAID,
- * but we must not have it find a different session.
- */
- iwl_mvm_sync_rx_queues_internal(mvm, IWL_MVM_RXQ_EMPTY,
- true, NULL, 0);
}
return 0;
@@ -3559,6 +3577,9 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
key_flags = cpu_to_le16(keyidx);
key_flags |= cpu_to_le16(STA_KEY_FLG_WEP_KEY_MAP);
+ if (key->flags & IEEE80211_KEY_FLAG_SPP_AMSDU)
+ key_flags |= cpu_to_le16(STA_KEY_FLG_AMSDU_SPP);
+
switch (key->cipher) {
case WLAN_CIPHER_SUITE_TKIP:
key_flags |= cpu_to_le16(STA_KEY_FLG_TKIP);
@@ -4298,12 +4319,12 @@ u16 iwl_mvm_tid_queued(struct iwl_mvm *mvm, struct iwl_mvm_tid_data *tid_data)
int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct iwl_mvm_int_sta *sta, u8 *addr, u32 cipher,
- u8 *key, u32 key_len)
+ u8 *key, u32 key_len,
+ struct ieee80211_key_conf *keyconf)
{
int ret;
u16 queue;
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- struct ieee80211_key_conf *keyconf;
unsigned int wdg_timeout =
iwl_mvm_get_wd_timeout(mvm, vif, false, false);
bool mld = iwl_mvm_has_mld_api(mvm->fw);
@@ -4328,12 +4349,6 @@ int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
if (ret)
goto out;
- keyconf = kzalloc(sizeof(*keyconf) + key_len, GFP_KERNEL);
- if (!keyconf) {
- ret = -ENOBUFS;
- goto out;
- }
-
keyconf->cipher = cipher;
memcpy(keyconf->key, key, key_len);
keyconf->keylen = key_len;
@@ -4354,10 +4369,9 @@ int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
0, NULL, 0, 0, true);
}
- kfree(keyconf);
- return 0;
out:
- iwl_mvm_dealloc_int_sta(mvm, sta);
+ if (ret)
+ iwl_mvm_dealloc_int_sta(mvm, sta);
return ret;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index b33a0ce096d4..b3450569864e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2016 Intel Deutschland GmbH
*/
@@ -571,10 +571,12 @@ void iwl_mvm_modify_all_sta_disable_tx(struct iwl_mvm *mvm,
bool disable);
void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
+int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm, struct ieee80211_txq *txq);
void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk);
int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct iwl_mvm_int_sta *sta, u8 *addr, u32 cipher,
- u8 *key, u32 key_len);
+ u8 *key, u32 key_len,
+ struct ieee80211_key_conf *key_conf_out);
void iwl_mvm_cancel_channel_switch(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
u32 id);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 218fdf1ed530..a59d264a11c5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2017 Intel Deutschland GmbH
*/
@@ -45,32 +45,24 @@ void iwl_mvm_te_clear_data(struct iwl_mvm *mvm,
te_data->link_id = -1;
}
-void iwl_mvm_roc_done_wk(struct work_struct *wk)
+static void iwl_mvm_cleanup_roc(struct iwl_mvm *mvm)
{
- struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, roc_done_wk);
-
/*
* Clear the ROC_RUNNING status bit.
* This will cause the TX path to drop offchannel transmissions.
* That would also be done by mac80211, but it is racy, in particular
- * in the case that the time event actually completed in the firmware
- * (which is handled in iwl_mvm_te_handle_notif).
- */
- clear_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status);
-
- synchronize_net();
-
- /*
- * Flush the offchannel queue -- this is called when the time
+ * in the case that the time event actually completed in the firmware.
+ *
+ * Also flush the offchannel queue -- this is called when the time
* event finishes or is canceled, so that frames queued for it
* won't get stuck on the queue and be transmitted in the next
* time event.
*/
-
- mutex_lock(&mvm->mutex);
- if (test_and_clear_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status)) {
+ if (test_and_clear_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status)) {
struct iwl_mvm_vif *mvmvif;
+ synchronize_net();
+
/*
* NB: access to this pointer would be racy, but the flush bit
* can only be set when we had a P2P-Device VIF, and we have a
@@ -105,21 +97,16 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk)
}
}
- /*
- * Clear the ROC_AUX_RUNNING status bit.
- * This will cause the TX path to drop offchannel transmissions.
- * That would also be done by mac80211, but it is racy, in particular
- * in the case that the time event actually completed in the firmware
- * (which is handled in iwl_mvm_te_handle_notif).
- */
+ /* Do the same for AUX ROC */
if (test_and_clear_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status)) {
- /* do the same in case of hot spot 2.0 */
+ synchronize_net();
+
iwl_mvm_flush_sta(mvm, mvm->aux_sta.sta_id,
mvm->aux_sta.tfd_queue_msk);
if (mvm->mld_api_is_used) {
iwl_mvm_mld_rm_aux_sta(mvm);
- goto out_unlock;
+ return;
}
/* In newer version of this command an aux station is added only
@@ -128,8 +115,14 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk)
if (iwl_mvm_has_new_station_api(mvm->fw))
iwl_mvm_rm_aux_sta(mvm);
}
+}
-out_unlock:
+void iwl_mvm_roc_done_wk(struct work_struct *wk)
+{
+ struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, roc_done_wk);
+
+ mutex_lock(&mvm->mutex);
+ iwl_mvm_cleanup_roc(mvm);
mutex_unlock(&mvm->mutex);
}
@@ -163,12 +156,12 @@ static void iwl_mvm_csa_noa_start(struct iwl_mvm *mvm)
* So we just do nothing here and the switch
* will be performed on the last TBTT.
*/
- if (!ieee80211_beacon_cntdwn_is_complete(csa_vif)) {
+ if (!ieee80211_beacon_cntdwn_is_complete(csa_vif, 0)) {
IWL_WARN(mvm, "CSA NOA started too early\n");
goto out_unlock;
}
- ieee80211_csa_finish(csa_vif);
+ ieee80211_csa_finish(csa_vif, 0);
rcu_read_unlock();
@@ -294,18 +287,6 @@ static void iwl_mvm_te_check_trigger(struct iwl_mvm *mvm,
}
}
-static void iwl_mvm_p2p_roc_finished(struct iwl_mvm *mvm)
-{
- /*
- * If the IWL_MVM_STATUS_NEED_FLUSH_P2P is already set, then the
- * roc_done_wk is already scheduled or running, so don't schedule it
- * again to avoid a race where the roc_done_wk clears this bit after
- * it is set here, affecting the next run of the roc_done_wk.
- */
- if (!test_and_set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status))
- iwl_mvm_roc_finished(mvm);
-}
-
/*
* Handles a FW notification for an event that is known to the driver.
*
@@ -357,7 +338,7 @@ static void iwl_mvm_te_handle_notif(struct iwl_mvm *mvm,
switch (te_data->vif->type) {
case NL80211_IFTYPE_P2P_DEVICE:
ieee80211_remain_on_channel_expired(mvm->hw);
- iwl_mvm_p2p_roc_finished(mvm);
+ iwl_mvm_roc_finished(mvm);
break;
case NL80211_IFTYPE_STATION:
/*
@@ -692,7 +673,7 @@ void iwl_mvm_protect_session(struct iwl_mvm *mvm,
/* Determine whether mac or link id should be used, and validate the link id */
static int iwl_mvm_get_session_prot_id(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
- u32 link_id)
+ s8 link_id)
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
int ver = iwl_fw_lookup_cmd_ver(mvm->fw,
@@ -706,8 +687,7 @@ static int iwl_mvm_get_session_prot_id(struct iwl_mvm *mvm,
"Invalid link ID for session protection: %u\n", link_id))
return -EINVAL;
- if (WARN(ieee80211_vif_is_mld(vif) &&
- !(vif->active_links & BIT(link_id)),
+ if (WARN(!mvmvif->link[link_id]->active,
"Session Protection on an inactive link: %u\n", link_id))
return -EINVAL;
@@ -716,7 +696,7 @@ static int iwl_mvm_get_session_prot_id(struct iwl_mvm *mvm,
static void iwl_mvm_cancel_session_protection(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
- u32 id, u32 link_id)
+ u32 id, s8 link_id)
{
int mac_link_id = iwl_mvm_get_session_prot_id(mvm, vif, link_id);
struct iwl_mvm_session_prot_cmd cmd = {
@@ -745,7 +725,7 @@ static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
struct ieee80211_vif *vif = te_data->vif;
struct iwl_mvm_vif *mvmvif;
enum nl80211_iftype iftype;
- unsigned int link_id;
+ s8 link_id;
if (!vif)
return false;
@@ -783,7 +763,7 @@ static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
iwl_mvm_cancel_session_protection(mvm, vif, id,
link_id);
if (iftype == NL80211_IFTYPE_P2P_DEVICE) {
- iwl_mvm_p2p_roc_finished(mvm);
+ iwl_mvm_roc_finished(mvm);
}
}
return false;
@@ -929,7 +909,7 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
if (WARN(ver > 2 && mvmvif->time_event_data.link_id >= 0 &&
mvmvif->time_event_data.link_id != notif_link_id,
- "SESION_PROTECTION_NOTIF was received for link %u, while the current time event is on link %u\n",
+ "SESSION_PROTECTION_NOTIF was received for link %u, while the current time event is on link %u\n",
notif_link_id, mvmvif->time_event_data.link_id))
goto out_unlock;
@@ -972,7 +952,8 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
if (!le32_to_cpu(notif->status) || !le32_to_cpu(notif->start)) {
/* End TE, notify mac80211 */
mvmvif->time_event_data.id = SESSION_PROTECT_CONF_MAX_ID;
- iwl_mvm_p2p_roc_finished(mvm);
+ mvmvif->time_event_data.link_id = -1;
+ iwl_mvm_roc_finished(mvm);
ieee80211_remain_on_channel_expired(mvm->hw);
} else if (le32_to_cpu(notif->start)) {
if (WARN_ON(mvmvif->time_event_data.id !=
@@ -986,6 +967,86 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
rcu_read_unlock();
}
+#define AUX_ROC_MIN_DURATION MSEC_TO_TU(100)
+#define AUX_ROC_MIN_DELAY MSEC_TO_TU(200)
+#define AUX_ROC_MAX_DELAY MSEC_TO_TU(600)
+#define AUX_ROC_SAFETY_BUFFER MSEC_TO_TU(20)
+#define AUX_ROC_MIN_SAFETY_BUFFER MSEC_TO_TU(10)
+
+void iwl_mvm_roc_duration_and_delay(struct ieee80211_vif *vif,
+ u32 duration_ms,
+ u32 *duration_tu,
+ u32 *delay)
+{
+ u32 dtim_interval = vif->bss_conf.dtim_period *
+ vif->bss_conf.beacon_int;
+
+ *delay = AUX_ROC_MIN_DELAY;
+ *duration_tu = MSEC_TO_TU(duration_ms);
+
+ /*
+ * If we are associated we want the delay time to be at least one
+ * dtim interval so that the FW can wait until after the DTIM and
+ * then start the time event, this will potentially allow us to
+ * remain off-channel for the max duration.
+ * Since we want to use almost a whole dtim interval we would also
+ * like the delay to be for 2-3 dtim intervals, in case there are
+ * other time events with higher priority.
+ */
+ if (vif->cfg.assoc) {
+ *delay = min_t(u32, dtim_interval * 3, AUX_ROC_MAX_DELAY);
+ /* We cannot remain off-channel longer than the DTIM interval */
+ if (dtim_interval <= *duration_tu) {
+ *duration_tu = dtim_interval - AUX_ROC_SAFETY_BUFFER;
+ if (*duration_tu <= AUX_ROC_MIN_DURATION)
+ *duration_tu = dtim_interval -
+ AUX_ROC_MIN_SAFETY_BUFFER;
+ }
+ }
+}
+
+int iwl_mvm_roc_add_cmd(struct iwl_mvm *mvm,
+ struct ieee80211_channel *channel,
+ struct ieee80211_vif *vif,
+ int duration, u32 activity)
+{
+ int res;
+ u32 duration_tu, delay;
+ struct iwl_roc_req roc_req = {
+ .action = cpu_to_le32(FW_CTXT_ACTION_ADD),
+ .activity = cpu_to_le32(activity),
+ .sta_id = cpu_to_le32(mvm->aux_sta.sta_id),
+ };
+
+ lockdep_assert_held(&mvm->mutex);
+
+ /* Set the channel info data */
+ iwl_mvm_set_chan_info(mvm, &roc_req.channel_info,
+ channel->hw_value,
+ iwl_mvm_phy_band_from_nl80211(channel->band),
+ IWL_PHY_CHANNEL_MODE20, 0);
+
+ iwl_mvm_roc_duration_and_delay(vif, duration, &duration_tu,
+ &delay);
+ roc_req.duration = cpu_to_le32(duration_tu);
+ roc_req.max_delay = cpu_to_le32(delay);
+
+ IWL_DEBUG_TE(mvm,
+ "\t(requested = %ums, max_delay = %ums)\n",
+ duration, delay);
+ IWL_DEBUG_TE(mvm,
+ "Requesting to remain on channel %u for %utu\n",
+ channel->hw_value, duration_tu);
+
+ /* Set the node address */
+ memcpy(roc_req.node_addr, vif->addr, ETH_ALEN);
+
+ res = iwl_mvm_send_cmd_pdu(mvm, WIDE_ID(MAC_CONF_GROUP, ROC_CMD),
+ 0, sizeof(roc_req), &roc_req);
+
+ return res;
+}
+
static int
iwl_mvm_start_p2p_roc_session_protection(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
@@ -1163,18 +1224,22 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
if (fw_has_capa(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD)) {
mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ te_data = &mvmvif->time_event_data;
if (vif->type == NL80211_IFTYPE_P2P_DEVICE) {
+ if (te_data->id >= SESSION_PROTECT_CONF_MAX_ID) {
+ IWL_DEBUG_TE(mvm,
+ "No remain on channel event\n");
+ return;
+ }
+
iwl_mvm_cancel_session_protection(mvm, vif,
- mvmvif->time_event_data.id,
- mvmvif->time_event_data.link_id);
- iwl_mvm_p2p_roc_finished(mvm);
+ te_data->id,
+ te_data->link_id);
} else {
iwl_mvm_roc_station_remove(mvm, mvmvif);
- iwl_mvm_roc_finished(mvm);
}
-
- return;
+ goto cleanup_roc;
}
te_data = iwl_mvm_get_roc_te(mvm);
@@ -1185,13 +1250,21 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif);
- if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) {
+ if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE)
iwl_mvm_remove_time_event(mvm, mvmvif, te_data);
- iwl_mvm_p2p_roc_finished(mvm);
- } else {
+ else
iwl_mvm_remove_aux_roc_te(mvm, mvmvif, te_data);
- iwl_mvm_roc_finished(mvm);
- }
+
+cleanup_roc:
+ /*
+ * In case we get here before the ROC event started,
+ * (so the status bit isn't set) set it here so iwl_mvm_cleanup_roc will
+ * cleanup things properly
+ */
+ set_bit(vif->type == NL80211_IFTYPE_P2P_DEVICE ?
+ IWL_MVM_STATUS_ROC_RUNNING : IWL_MVM_STATUS_ROC_AUX_RUNNING,
+ &mvm->status);
+ iwl_mvm_cleanup_roc(mvm);
}
void iwl_mvm_remove_csa_period(struct iwl_mvm *mvm,
@@ -1296,7 +1369,7 @@ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm,
struct iwl_mvm_time_event_data *te_data = &mvmvif->time_event_data;
const u16 notif[] = { WIDE_ID(MAC_CONF_GROUP, SESSION_PROTECTION_NOTIF) };
struct iwl_notification_wait wait_notif;
- int mac_link_id = iwl_mvm_get_session_prot_id(mvm, vif, link_id);
+ int mac_link_id = iwl_mvm_get_session_prot_id(mvm, vif, (s8)link_id);
struct iwl_mvm_session_prot_cmd cmd = {
.id_and_color = cpu_to_le32(mac_link_id),
.action = cpu_to_le32(FW_CTXT_ACTION_ADD),
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index db986bfc4dc3..782ddc8c296b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -520,13 +520,49 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
}
}
+static bool iwl_mvm_use_host_rate(struct iwl_mvm *mvm,
+ struct iwl_mvm_sta *mvmsta,
+ struct ieee80211_hdr *hdr,
+ struct ieee80211_tx_info *info)
+{
+ if (unlikely(!mvmsta))
+ return true;
+
+ if (unlikely(info->control.flags & IEEE80211_TX_CTRL_RATE_INJECT))
+ return true;
+
+ if (likely(ieee80211_is_data(hdr->frame_control) &&
+ mvmsta->sta_state >= IEEE80211_STA_AUTHORIZED))
+ return false;
+
+ /*
+ * Not a data frame, use host rate if on an old device that
+ * can't possibly be doing MLO (firmware may be selecting a
+ * bad rate), if we might be doing MLO we need to let FW pick
+ * (since we don't necesarily know the link), but FW rate
+ * selection was fixed.
+ */
+ return mvm->trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_BZ;
+}
+
+static void iwl_mvm_copy_hdr(void *cmd, const void *hdr, int hdrlen,
+ const u8 *addr3_override)
+{
+ struct ieee80211_hdr *out_hdr = cmd;
+
+ memcpy(cmd, hdr, hdrlen);
+ if (addr3_override)
+ memcpy(out_hdr->addr3, addr3_override, ETH_ALEN);
+}
+
/*
* Allocates and sets the Tx cmd the driver data pointers in the skb
*/
static struct iwl_device_tx_cmd *
iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
struct ieee80211_tx_info *info, int hdrlen,
- struct ieee80211_sta *sta, u8 sta_id)
+ struct ieee80211_sta *sta, u8 sta_id,
+ const u8 *addr3_override)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
struct iwl_device_tx_cmd *dev_cmd;
@@ -556,12 +592,12 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
flags |= IWL_TX_FLAGS_ENCRYPT_DIS;
/*
- * For data and mgmt packets rate info comes from the fw. Only
+ * For data and mgmt packets rate info comes from the fw (for
+ * new devices, older FW is somewhat broken for this). Only
* set rate/antenna for injected frames with fixed rate, or
- * when no sta is given.
+ * when no sta is given, or with older firmware.
*/
- if (unlikely(!sta ||
- info->control.flags & IEEE80211_TX_CTRL_RATE_INJECT)) {
+ if (unlikely(iwl_mvm_use_host_rate(mvm, mvmsta, hdr, info))) {
flags |= IWL_TX_FLAGS_CMD_RATE;
rate_n_flags =
iwl_mvm_get_tx_rate_n_flags(mvm, info, sta,
@@ -584,7 +620,7 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
cmd->len = cpu_to_le16((u16)skb->len);
/* Copy MAC header from skb into command buffer */
- memcpy(cmd->hdr, hdr, hdrlen);
+ iwl_mvm_copy_hdr(cmd->hdr, hdr, hdrlen, addr3_override);
cmd->flags = cpu_to_le16(flags);
cmd->rate_n_flags = cpu_to_le32(rate_n_flags);
@@ -599,7 +635,7 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
cmd->len = cpu_to_le16((u16)skb->len);
/* Copy MAC header from skb into command buffer */
- memcpy(cmd->hdr, hdr, hdrlen);
+ iwl_mvm_copy_hdr(cmd->hdr, hdr, hdrlen, addr3_override);
cmd->flags = cpu_to_le32(flags);
cmd->rate_n_flags = cpu_to_le32(rate_n_flags);
@@ -617,7 +653,7 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
iwl_mvm_set_tx_cmd_rate(mvm, tx_cmd, info, sta, hdr->frame_control);
/* Copy MAC header from skb into command buffer */
- memcpy(tx_cmd->hdr, hdr, hdrlen);
+ iwl_mvm_copy_hdr(tx_cmd->hdr, hdr, hdrlen, addr3_override);
out:
return dev_cmd;
@@ -820,7 +856,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
IWL_DEBUG_TX(mvm, "station Id %d, queue=%d\n", sta_id, queue);
- dev_cmd = iwl_mvm_set_tx_params(mvm, skb, &info, hdrlen, NULL, sta_id);
+ dev_cmd = iwl_mvm_set_tx_params(mvm, skb, &info, hdrlen, NULL, sta_id,
+ NULL);
if (!dev_cmd)
return -1;
@@ -869,10 +906,10 @@ unsigned int iwl_mvm_max_amsdu_size(struct iwl_mvm *mvm,
if (WARN_ON(!link_conf))
band = NL80211_BAND_2GHZ;
else
- band = link_conf->chandef.chan->band;
+ band = link_conf->chanreq.oper.chan->band;
rcu_read_unlock();
} else {
- band = mvmsta->vif->bss_conf.chandef.chan->band;
+ band = mvmsta->vif->bss_conf.chanreq.oper.chan->band;
}
lmac = iwl_mvm_get_lmac_id(mvm, band);
@@ -914,9 +951,15 @@ iwl_mvm_tx_tso_segment(struct sk_buff *skb, unsigned int num_subframes,
next = skb_gso_segment(skb, netdev_flags);
skb_shinfo(skb)->gso_size = mss;
skb_shinfo(skb)->gso_type = ipv4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
- if (WARN_ON_ONCE(IS_ERR(next)))
- return -EINVAL;
- else if (next)
+
+ if (IS_ERR(next) && PTR_ERR(next) == -ENOMEM)
+ return -ENOMEM;
+
+ if (WARN_ONCE(IS_ERR(next),
+ "skb_gso_segment error: %d\n", (int)PTR_ERR(next)))
+ return PTR_ERR(next);
+
+ if (next)
consume_skb(skb);
skb_list_walk_safe(next, tmp, next) {
@@ -972,8 +1015,7 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
netdev_features_t netdev_flags = NETIF_F_CSUM_MASK | NETIF_F_SG;
u8 tid;
- snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) +
- tcp_hdrlen(skb);
+ snap_ip_tcp = 8 + skb_network_header_len(skb) + tcp_hdrlen(skb);
if (!mvmsta->max_amsdu_len ||
!ieee80211_is_data_qos(hdr->frame_control) ||
@@ -1140,7 +1182,8 @@ static int iwl_mvm_tx_pkt_queued(struct iwl_mvm *mvm,
*/
static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
struct ieee80211_tx_info *info,
- struct ieee80211_sta *sta)
+ struct ieee80211_sta *sta,
+ const u8 *addr3_override)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
struct iwl_mvm_sta *mvmsta;
@@ -1172,7 +1215,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
iwl_mvm_probe_resp_set_noa(mvm, skb);
dev_cmd = iwl_mvm_set_tx_params(mvm, skb, info, hdrlen,
- sta, mvmsta->deflink.sta_id);
+ sta, mvmsta->deflink.sta_id,
+ addr3_override);
if (!dev_cmd)
goto drop;
@@ -1294,9 +1338,11 @@ int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb,
struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
struct ieee80211_tx_info info;
struct sk_buff_head mpdus_skbs;
+ struct ieee80211_vif *vif;
unsigned int payload_len;
int ret;
struct sk_buff *orig_skb = skb;
+ const u8 *addr3;
if (WARN_ON_ONCE(!mvmsta))
return -1;
@@ -1307,26 +1353,59 @@ int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb,
memcpy(&info, skb->cb, sizeof(info));
if (!skb_is_gso(skb))
- return iwl_mvm_tx_mpdu(mvm, skb, &info, sta);
+ return iwl_mvm_tx_mpdu(mvm, skb, &info, sta, NULL);
payload_len = skb_tail_pointer(skb) - skb_transport_header(skb) -
tcp_hdrlen(skb) + skb->data_len;
if (payload_len <= skb_shinfo(skb)->gso_size)
- return iwl_mvm_tx_mpdu(mvm, skb, &info, sta);
+ return iwl_mvm_tx_mpdu(mvm, skb, &info, sta, NULL);
__skb_queue_head_init(&mpdus_skbs);
+ vif = info.control.vif;
+ if (!vif)
+ return -1;
+
ret = iwl_mvm_tx_tso(mvm, skb, &info, sta, &mpdus_skbs);
if (ret)
return ret;
WARN_ON(skb_queue_empty(&mpdus_skbs));
+ /*
+ * As described in IEEE sta 802.11-2020, table 9-30 (Address
+ * field contents), A-MSDU address 3 should contain the BSSID
+ * address.
+ * Pass address 3 down to iwl_mvm_tx_mpdu() and further to set it
+ * in the command header. We need to preserve the original
+ * address 3 in the skb header to correctly create all the
+ * A-MSDU subframe headers from it.
+ */
+ switch (vif->type) {
+ case NL80211_IFTYPE_STATION:
+ addr3 = vif->cfg.ap_addr;
+ break;
+ case NL80211_IFTYPE_AP:
+ addr3 = vif->addr;
+ break;
+ default:
+ addr3 = NULL;
+ break;
+ }
+
while (!skb_queue_empty(&mpdus_skbs)) {
+ struct ieee80211_hdr *hdr;
+ bool amsdu;
+
skb = __skb_dequeue(&mpdus_skbs);
+ hdr = (void *)skb->data;
+ amsdu = ieee80211_is_data_qos(hdr->frame_control) &&
+ (*ieee80211_get_qos_ctl(hdr) &
+ IEEE80211_QOS_CTL_A_MSDU_PRESENT);
- ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta);
+ ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta,
+ amsdu ? addr3 : NULL);
if (ret) {
/* Free skbs created as part of TSO logic that have not yet been dequeued */
__skb_queue_purge(&mpdus_skbs);
@@ -1587,12 +1666,18 @@ static void iwl_mvm_tx_status_check_trigger(struct iwl_mvm *mvm,
* of the batch. This is why the SSN of the SCD is written at the end of the
* whole struct at a variable offset. This function knows how to cope with the
* variable offset and returns the SSN of the SCD.
+ *
+ * For 22000-series and lower, this is just 12 bits. For later, 16 bits.
*/
static inline u32 iwl_mvm_get_scd_ssn(struct iwl_mvm *mvm,
struct iwl_mvm_tx_resp *tx_resp)
{
- return le32_to_cpup((__le32 *)iwl_mvm_get_agg_status(mvm, tx_resp) +
- tx_resp->frame_count) & 0xfff;
+ u32 val = le32_to_cpup((__le32 *)iwl_mvm_get_agg_status(mvm, tx_resp) +
+ tx_resp->frame_count);
+
+ if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
+ return val & 0xFFFF;
+ return val & 0xFFF;
}
static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
@@ -2125,6 +2210,12 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
tfd_cnt, pkt_len))
return;
+ IWL_DEBUG_TX_REPLY(mvm,
+ "BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n",
+ sta_id, le32_to_cpu(ba_res->flags),
+ le16_to_cpu(ba_res->txed),
+ le16_to_cpu(ba_res->done));
+
rcu_read_lock();
mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id);
@@ -2160,12 +2251,6 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
iwl_mvm_tx_airtime(mvm, mvmsta,
le32_to_cpu(ba_res->wireless_time));
rcu_read_unlock();
-
- IWL_DEBUG_TX_REPLY(mvm,
- "BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n",
- sta_id, le32_to_cpu(ba_res->flags),
- le16_to_cpu(ba_res->txed),
- le16_to_cpu(ba_res->done));
return;
}
@@ -2197,9 +2282,6 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
rcu_read_unlock();
- iwl_mvm_tx_reclaim(mvm, sta_id, tid, txq, index, &ba_info,
- tid_data->rate_n_flags, false);
-
IWL_DEBUG_TX_REPLY(mvm,
"BA_NOTIFICATION Received from %pM, sta_id = %d\n",
ba_notif->sta_addr, ba_notif->sta_id);
@@ -2212,6 +2294,9 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n",
ba_notif->reduced_txp);
+
+ iwl_mvm_tx_reclaim(mvm, sta_id, tid, txq, index, &ba_info,
+ tid_data->rate_n_flags, false);
}
/*
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index 91286018a69d..ab56ff87c6f9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -249,6 +249,8 @@ u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx)
* This is the special case in which init is set and we call a callback in
* this case to clear the state indicating that station creation is in
* progress.
+ *
+ * Returns: an error code indicating success or failure
*/
int iwl_mvm_send_lq_cmd(struct iwl_mvm *mvm, struct iwl_lq_cmd *lq)
{
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
index fa4a14546860..c8fc8b4fd85c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
@@ -119,7 +119,7 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
prph_sc_ctrl->version.version = 0;
prph_sc_ctrl->version.mac_id =
- cpu_to_le16((u16)iwl_read32(trans, CSR_HW_REV));
+ cpu_to_le16((u16)trans->hw_rev);
prph_sc_ctrl->version.size = cpu_to_le16(sizeof(*prph_scratch) / 4);
control_flags |= IWL_PRPH_SCRATCH_MTR_MODE;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
index 5f55efe64bf5..0fa92704cd14 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#include "iwl-trans.h"
#include "iwl-fh.h"
@@ -180,7 +180,7 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans,
ctxt_info->version.version = 0;
ctxt_info->version.mac_id =
- cpu_to_le16((u16)iwl_read32(trans, CSR_HW_REV));
+ cpu_to_le16((u16)trans->hw_rev);
/* size is in DWs */
ctxt_info->version.size = cpu_to_le16(sizeof(*ctxt_info) / 4);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 2c9b98c8184b..4a657036b9d6 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2005-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2024 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -502,12 +502,16 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
/* Bz devices */
{IWL_PCI_DEVICE(0x2727, PCI_ANY_ID, iwl_bz_trans_cfg)},
+ {IWL_PCI_DEVICE(0x272D, PCI_ANY_ID, iwl_bz_trans_cfg)},
{IWL_PCI_DEVICE(0x272b, PCI_ANY_ID, iwl_bz_trans_cfg)},
{IWL_PCI_DEVICE(0xA840, PCI_ANY_ID, iwl_bz_trans_cfg)},
{IWL_PCI_DEVICE(0x7740, PCI_ANY_ID, iwl_bz_trans_cfg)},
/* Sc devices */
{IWL_PCI_DEVICE(0xE440, PCI_ANY_ID, iwl_sc_trans_cfg)},
+ {IWL_PCI_DEVICE(0xE340, PCI_ANY_ID, iwl_sc_trans_cfg)},
+ {IWL_PCI_DEVICE(0xD340, PCI_ANY_ID, iwl_sc_trans_cfg)},
+ {IWL_PCI_DEVICE(0x6E70, PCI_ANY_ID, iwl_sc_trans_cfg)},
#endif /* CONFIG_IWLMVM */
{0}
@@ -526,7 +530,7 @@ MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids);
IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, \
IWL_CFG_ANY, _cfg, _name)
-static const struct iwl_dev_info iwl_dev_info_table[] = {
+VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = {
#if IS_ENABLED(CONFIG_IWLMVM)
/* 9000 */
IWL_DEV_INFO(0x2526, 0x1550, iwl9260_2ac_cfg, iwl9260_killer_1550_name),
@@ -1008,8 +1012,13 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY,
IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY,
- IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
+ IWL_CFG_320, IWL_CFG_ANY, IWL_CFG_NO_CDB,
iwl_cfg_gl, iwl_bz_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY,
+ IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_NO_320, IWL_CFG_ANY, IWL_CFG_NO_CDB,
+ iwl_cfg_gl, iwl_mtp_name),
/* SoF with JF2 */
_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
@@ -1115,8 +1124,24 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY,
IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY,
iwl_cfg_sc, iwl_sc_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY,
+ IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY,
+ iwl_cfg_sc2, iwl_sc2_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY,
+ IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY,
+ iwl_cfg_sc2f, iwl_sc2f_name),
#endif /* CONFIG_IWLMVM */
};
+EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(iwl_dev_info_table);
+
+#if IS_ENABLED(CONFIG_IWLWIFI_KUNIT_TESTS)
+const unsigned int iwl_dev_info_table_size = ARRAY_SIZE(iwl_dev_info_table);
+EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(iwl_dev_info_table_size);
+#endif
/*
* Read rf id and cdb info from prph register and store it
@@ -1143,6 +1168,20 @@ static void get_crf_id(struct iwl_trans *iwl_trans)
iwl_trans->hw_cnv_id =
iwl_read_prph_no_grab(iwl_trans, CNVI_AUX_MISC_CHIP);
+ /* In BZ, the MAC step must be read from the CNVI aux register */
+ if (CSR_HW_REV_TYPE(iwl_trans->hw_rev) == IWL_CFG_MAC_TYPE_BZ) {
+ u8 step = CNVI_AUX_MISC_CHIP_MAC_STEP(iwl_trans->hw_cnv_id);
+
+ /* For BZ-U, take B step also when A step is indicated */
+ if ((CNVI_AUX_MISC_CHIP_PROD_TYPE(iwl_trans->hw_cnv_id) ==
+ CNVI_AUX_MISC_CHIP_PROD_TYPE_BZ_U) &&
+ step == SILICON_A_STEP)
+ step = SILICON_B_STEP;
+
+ iwl_trans->hw_rev_step = step;
+ iwl_trans->hw_rev |= step;
+ }
+
/* Read cdb info (also contains the jacket info if needed in the future */
iwl_trans->hw_wfpm_id =
iwl_read_umac_prph_no_grab(iwl_trans, WFPM_OTP_CFG1_ADDR);
@@ -1236,7 +1275,7 @@ out:
/* PCI registers */
#define PCI_CFG_RETRY_TIMEOUT 0x041
-static const struct iwl_dev_info *
+VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info *
iwl_pci_find_dev_info(u16 device, u16 subsystem_device,
u16 mac_type, u8 mac_step, u16 rf_type, u8 cdb,
u8 jacket, u8 rf_id, u8 no_160, u8 cores, u8 rf_step)
@@ -1299,6 +1338,7 @@ iwl_pci_find_dev_info(u16 device, u16 subsystem_device,
return NULL;
}
+EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(iwl_pci_find_dev_info);
static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
@@ -1382,6 +1422,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (dev_info) {
iwl_trans->cfg = dev_info->cfg;
iwl_trans->name = dev_info->name;
+ iwl_trans->no_160 = dev_info->no_160 == IWL_CFG_NO_160;
}
#if IS_ENABLED(CONFIG_IWLMVM)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 63e13577aff8..6c76b2dd6878 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1484,12 +1484,9 @@ void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state, bool from_irq)
IWL_WARN(trans, "reporting RF_KILL (radio %s)\n",
state ? "disabled" : "enabled");
- if (iwl_op_mode_hw_rf_kill(trans->op_mode, state)) {
- if (trans->trans_cfg->gen2)
- _iwl_trans_pcie_gen2_stop_device(trans);
- else
- _iwl_trans_pcie_stop_device(trans, from_irq);
- }
+ if (iwl_op_mode_hw_rf_kill(trans->op_mode, state) &&
+ !WARN_ON(trans->trans_cfg->gen2))
+ _iwl_trans_pcie_stop_device(trans, from_irq);
}
void iwl_pcie_d3_complete_suspend(struct iwl_trans *trans,
@@ -1718,6 +1715,7 @@ enable_msi:
static void iwl_pcie_irq_set_affinity(struct iwl_trans *trans)
{
+#if defined(CONFIG_SMP)
int iter_rx_q, i, ret, cpu, offset;
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -1738,6 +1736,7 @@ static void iwl_pcie_irq_set_affinity(struct iwl_trans *trans)
"Failed to set affinity mask for IRQ %d\n",
trans_pcie->msix_entries[i].vector);
}
+#endif
}
static int iwl_pcie_init_msix_handler(struct pci_dev *pdev,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 6c2b37e56c78..fa8eba47dc4c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -1331,7 +1331,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
trans->txqs.tfd.size,
&dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
- ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb);
+ ip_hdrlen = skb_network_header_len(skb);
snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len;
amsdu_pad = 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
index ca74b1b63cac..33973a60d0bf 100644
--- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <net/tso.h>
#include <linux/tcp.h>
@@ -271,9 +271,10 @@ static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans,
meta = NULL;
goto unmap;
}
- IWL_WARN(trans,
- "TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n",
- len, (unsigned long long)oldphys, (unsigned long long)phys);
+ IWL_DEBUG_TX(trans,
+ "TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n",
+ len, (unsigned long long)oldphys,
+ (unsigned long long)phys);
ret = 0;
unmap:
@@ -352,7 +353,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd),
&dev_cmd->hdr, start_len, 0);
- ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb);
+ ip_hdrlen = skb_network_header_len(skb);
snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len;
amsdu_pad = 0;
@@ -1601,8 +1602,8 @@ void iwl_txq_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
if (read_ptr == tfd_num)
goto out;
- IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n",
- txq_id, txq->read_ptr, tfd_num, ssn);
+ IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d (%d) -> %d (%d)\n",
+ txq_id, read_ptr, txq->read_ptr, tfd_num, ssn);
/*Since we free until index _not_ inclusive, the one before index is
* the last we will free. This one must be used */
@@ -1630,7 +1631,8 @@ void iwl_txq_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr)) {
struct sk_buff *skb = txq->entries[read_ptr].skb;
- if (WARN_ON_ONCE(!skb))
+ if (WARN_ONCE(!skb, "no SKB at %d (%d) on queue %d\n",
+ read_ptr, txq->read_ptr, txq_id))
continue;
iwl_txq_free_tso_page(trans, skb);
diff --git a/drivers/net/wireless/intel/iwlwifi/tests/Makefile b/drivers/net/wireless/intel/iwlwifi/tests/Makefile
new file mode 100644
index 000000000000..5658471bdf0a
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/tests/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+iwlwifi-tests-y += module.o devinfo.o
+
+ccflags-y += -I$(srctree)/$(src)/../
+
+obj-$(CONFIG_IWLWIFI_KUNIT_TESTS) += iwlwifi-tests.o
diff --git a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c
new file mode 100644
index 000000000000..7aa47fce6e2d
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * KUnit tests for the iwlwifi device info table
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <kunit/test.h>
+#include "iwl-drv.h"
+#include "iwl-config.h"
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+static void iwl_pci_print_dev_info(const char *pfx, const struct iwl_dev_info *di)
+{
+ printk(KERN_DEBUG "%sdev=%.4x,subdev=%.4x,mac_type=%.4x,mac_step=%.4x,rf_type=%.4x,cdb=%d,jacket=%d,rf_id=%.2x,no_160=%d,cores=%.2x\n",
+ pfx, di->device, di->subdevice, di->mac_type, di->mac_step,
+ di->rf_type, di->cdb, di->jacket, di->rf_id, di->no_160,
+ di->cores);
+}
+
+static void devinfo_table_order(struct kunit *test)
+{
+ int idx;
+
+ for (idx = 0; idx < iwl_dev_info_table_size; idx++) {
+ const struct iwl_dev_info *di = &iwl_dev_info_table[idx];
+ const struct iwl_dev_info *ret;
+
+ ret = iwl_pci_find_dev_info(di->device, di->subdevice,
+ di->mac_type, di->mac_step,
+ di->rf_type, di->cdb,
+ di->jacket, di->rf_id,
+ di->no_160, di->cores, di->rf_step);
+ if (ret != di) {
+ iwl_pci_print_dev_info("searched: ", di);
+ iwl_pci_print_dev_info("found: ", ret);
+ KUNIT_FAIL(test,
+ "unusable entry at index %d (found index %d instead)\n",
+ idx, (int)(ret - iwl_dev_info_table));
+ }
+ }
+}
+
+static struct kunit_case devinfo_test_cases[] = {
+ KUNIT_CASE(devinfo_table_order),
+ {}
+};
+
+static struct kunit_suite iwlwifi_devinfo = {
+ .name = "iwlwifi-devinfo",
+ .test_cases = devinfo_test_cases,
+};
+
+kunit_test_suite(iwlwifi_devinfo);
diff --git a/drivers/net/wireless/intel/iwlwifi/tests/module.c b/drivers/net/wireless/intel/iwlwifi/tests/module.c
new file mode 100644
index 000000000000..0c54f818e5a7
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/tests/module.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Module boilerplate for the iwlwifi kunit module.
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <linux/module.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("kunit tests for iwlwifi");
diff --git a/drivers/net/wireless/intersil/p54/fwio.c b/drivers/net/wireless/intersil/p54/fwio.c
index b52cce38115d..c4fe70e05b9b 100644
--- a/drivers/net/wireless/intersil/p54/fwio.c
+++ b/drivers/net/wireless/intersil/p54/fwio.c
@@ -125,7 +125,7 @@ int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw)
"FW rev %s - Softmac protocol %x.%x\n",
fw_version, priv->fw_var >> 8, priv->fw_var & 0xff);
snprintf(dev->wiphy->fw_version, sizeof(dev->wiphy->fw_version),
- "%s - %x.%x", fw_version,
+ "%.19s - %x.%x", fw_version,
priv->fw_var >> 8, priv->fw_var & 0xff);
}
diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c
index c6084683aedd..687841b2fa2a 100644
--- a/drivers/net/wireless/intersil/p54/main.c
+++ b/drivers/net/wireless/intersil/p54/main.c
@@ -704,6 +704,10 @@ static void p54_set_coverage_class(struct ieee80211_hw *dev,
}
static const struct ieee80211_ops p54_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = p54_tx_80211,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = p54_start,
diff --git a/drivers/net/wireless/intersil/p54/p54spi.c b/drivers/net/wireless/intersil/p54/p54spi.c
index ce0179b8ab36..0073b5e0f9c9 100644
--- a/drivers/net/wireless/intersil/p54/p54spi.c
+++ b/drivers/net/wireless/intersil/p54/p54spi.c
@@ -700,6 +700,7 @@ static struct spi_driver p54spi_driver = {
module_spi_driver(p54spi_driver);
+MODULE_DESCRIPTION("Prism54 SPI wireless driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Christian Lamparter <chunkeey@web.de>");
MODULE_ALIAS("spi:cx3110x");
diff --git a/drivers/net/wireless/marvell/libertas/cmd.c b/drivers/net/wireless/marvell/libertas/cmd.c
index 104d2b6dc9af..5a525da434c2 100644
--- a/drivers/net/wireless/marvell/libertas/cmd.c
+++ b/drivers/net/wireless/marvell/libertas/cmd.c
@@ -1132,7 +1132,7 @@ int lbs_allocate_cmd_buffer(struct lbs_private *priv)
if (!cmdarray[i].cmdbuf) {
lbs_deb_host("ALLOC_CMD_BUF: ptempvirtualaddr is NULL\n");
ret = -1;
- goto done;
+ goto free_cmd_array;
}
}
@@ -1140,8 +1140,17 @@ int lbs_allocate_cmd_buffer(struct lbs_private *priv)
init_waitqueue_head(&cmdarray[i].cmdwait_q);
lbs_cleanup_and_insert_cmd(priv, &cmdarray[i]);
}
- ret = 0;
+ return 0;
+free_cmd_array:
+ for (i = 0; i < LBS_NUM_CMD_BUFFERS; i++) {
+ if (cmdarray[i].cmdbuf) {
+ kfree(cmdarray[i].cmdbuf);
+ cmdarray[i].cmdbuf = NULL;
+ }
+ }
+ kfree(priv->cmd_array);
+ priv->cmd_array = NULL;
done:
return ret;
}
diff --git a/drivers/net/wireless/marvell/libertas_tf/main.c b/drivers/net/wireless/marvell/libertas_tf/main.c
index 199d33ed3bb9..9cca69fe04d7 100644
--- a/drivers/net/wireless/marvell/libertas_tf/main.c
+++ b/drivers/net/wireless/marvell/libertas_tf/main.c
@@ -473,6 +473,10 @@ static int lbtf_op_get_survey(struct ieee80211_hw *hw, int idx,
}
static const struct ieee80211_ops lbtf_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = lbtf_op_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = lbtf_op_start,
diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c
index da211372a481..b90f922f1cdc 100644
--- a/drivers/net/wireless/marvell/mwifiex/11h.c
+++ b/drivers/net/wireless/marvell/mwifiex/11h.c
@@ -288,6 +288,6 @@ void mwifiex_dfs_chan_sw_work_queue(struct work_struct *work)
mwifiex_dbg(priv->adapter, MSG,
"indicating channel switch completion to kernel\n");
wiphy_lock(priv->wdev.wiphy);
- cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef, 0, 0);
+ cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef, 0);
wiphy_unlock(priv->wdev.wiphy);
}
diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c
index 90e401100898..c0c635e74bc5 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n.c
@@ -392,12 +392,10 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv,
chan_list =
(struct mwifiex_ie_types_chan_list_param_set *) *buffer;
- memset(chan_list, 0,
- sizeof(struct mwifiex_ie_types_chan_list_param_set));
+ memset(chan_list, 0, struct_size(chan_list, chan_scan_param, 1));
chan_list->header.type = cpu_to_le16(TLV_TYPE_CHANLIST);
- chan_list->header.len = cpu_to_le16(
- sizeof(struct mwifiex_ie_types_chan_list_param_set) -
- sizeof(struct mwifiex_ie_types_header));
+ chan_list->header.len =
+ cpu_to_le16(sizeof(struct mwifiex_chan_scan_param_set));
chan_list->chan_scan_param[0].chan_number =
bss_desc->bcn_ht_oper->primary_chan;
chan_list->chan_scan_param[0].radio_type =
@@ -411,8 +409,8 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv,
(bss_desc->bcn_ht_oper->ht_param &
IEEE80211_HT_PARAM_CHA_SEC_OFFSET));
- *buffer += sizeof(struct mwifiex_ie_types_chan_list_param_set);
- ret_len += sizeof(struct mwifiex_ie_types_chan_list_param_set);
+ *buffer += struct_size(chan_list, chan_scan_param, 1);
+ ret_len += struct_size(chan_list, chan_scan_param, 1);
}
if (bss_desc->bcn_bss_co_2040) {
diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index 3604abcbcff9..b909a7665e9c 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -3359,7 +3359,7 @@ static int mwifiex_set_wowlan_mef_entry(struct mwifiex_private *priv,
}
if (!wowlan->patterns[i].pkt_offset) {
- if (!(byte_seq[0] & 0x01) &&
+ if (is_unicast_ether_addr(byte_seq) &&
(byte_seq[MWIFIEX_MEF_MAX_BYTESEQ] == 1)) {
mef_cfg->criteria |= MWIFIEX_CRITERIA_UNICAST;
continue;
diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c
index f9c9fec7c792..9deaf59dcb62 100644
--- a/drivers/net/wireless/marvell/mwifiex/debugfs.c
+++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c
@@ -566,14 +566,8 @@ mwifiex_verext_write(struct file *file, const char __user *ubuf,
int ret;
u32 versionstrsel;
struct mwifiex_private *priv = (void *)file->private_data;
- char buf[16];
- memset(buf, 0, sizeof(buf));
-
- if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
- return -EFAULT;
-
- ret = kstrtou32(buf, 10, &versionstrsel);
+ ret = kstrtou32_from_user(ubuf, count, 10, &versionstrsel);
if (ret)
return ret;
@@ -874,19 +868,14 @@ mwifiex_timeshare_coex_write(struct file *file, const char __user *ubuf,
{
bool timeshare_coex;
struct mwifiex_private *priv = file->private_data;
- char kbuf[16];
int ret;
if (priv->adapter->fw_api_ver != MWIFIEX_FW_V15)
return -EOPNOTSUPP;
- memset(kbuf, 0, sizeof(kbuf));
-
- if (copy_from_user(&kbuf, ubuf, min_t(size_t, sizeof(kbuf) - 1, count)))
- return -EFAULT;
-
- if (kstrtobool(kbuf, &timeshare_coex))
- return -EINVAL;
+ ret = kstrtobool_from_user(ubuf, count, &timeshare_coex);
+ if (ret)
+ return ret;
ret = mwifiex_send_cmd(priv, HostCmd_CMD_ROBUST_COEX,
HostCmd_ACT_GEN_SET, 0, &timeshare_coex, true);
@@ -970,9 +959,6 @@ mwifiex_dev_debugfs_init(struct mwifiex_private *priv)
priv->dfs_dev_dir = debugfs_create_dir(priv->netdev->name,
mwifiex_dfs_dir);
- if (!priv->dfs_dev_dir)
- return;
-
MWIFIEX_DFS_ADD_FILE(info);
MWIFIEX_DFS_ADD_FILE(debug);
MWIFIEX_DFS_ADD_FILE(getlog);
diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h
index 62f3c9a52a1d..3adc447b715f 100644
--- a/drivers/net/wireless/marvell/mwifiex/fw.h
+++ b/drivers/net/wireless/marvell/mwifiex/fw.h
@@ -770,7 +770,7 @@ struct mwifiex_chan_scan_param_set {
struct mwifiex_ie_types_chan_list_param_set {
struct mwifiex_ie_types_header header;
- struct mwifiex_chan_scan_param_set chan_scan_param[1];
+ struct mwifiex_chan_scan_param_set chan_scan_param[];
} __packed;
struct mwifiex_ie_types_rxba_sync {
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index 318b42b1896f..175882485a19 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -28,11 +28,9 @@
#include <linux/inetdevice.h>
#include <linux/devcoredump.h>
#include <linux/err.h>
-#include <linux/gpio.h>
#include <linux/gfp.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of_gpio.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c
index a2ddac363b10..0326b121747c 100644
--- a/drivers/net/wireless/marvell/mwifiex/scan.c
+++ b/drivers/net/wireless/marvell/mwifiex/scan.c
@@ -664,15 +664,14 @@ mwifiex_scan_channel_list(struct mwifiex_private *priv,
/* Copy the current channel TLV to the command being
prepared */
- memcpy(chan_tlv_out->chan_scan_param + tlv_idx,
+ memcpy(&chan_tlv_out->chan_scan_param[tlv_idx],
tmp_chan_list,
- sizeof(chan_tlv_out->chan_scan_param));
+ sizeof(*chan_tlv_out->chan_scan_param));
/* Increment the TLV header length by the size
appended */
le16_unaligned_add_cpu(&chan_tlv_out->header.len,
- sizeof(
- chan_tlv_out->chan_scan_param));
+ sizeof(*chan_tlv_out->chan_scan_param));
/*
* The tlv buffer length is set to the number of bytes
@@ -2369,12 +2368,11 @@ int mwifiex_cmd_802_11_bg_scan_config(struct mwifiex_private *priv,
chan_idx < MWIFIEX_BG_SCAN_CHAN_MAX &&
bgscan_cfg_in->chan_list[chan_idx].chan_number;
chan_idx++) {
- temp_chan = chan_list_tlv->chan_scan_param + chan_idx;
+ temp_chan = &chan_list_tlv->chan_scan_param[chan_idx];
/* Increment the TLV header length by size appended */
le16_unaligned_add_cpu(&chan_list_tlv->header.len,
- sizeof(
- chan_list_tlv->chan_scan_param));
+ sizeof(*chan_list_tlv->chan_scan_param));
temp_chan->chan_number =
bgscan_cfg_in->chan_list[chan_idx].chan_number;
@@ -2413,7 +2411,7 @@ int mwifiex_cmd_802_11_bg_scan_config(struct mwifiex_private *priv,
chan_scan_param);
le16_unaligned_add_cpu(&chan_list_tlv->header.len,
chan_num *
- sizeof(chan_list_tlv->chan_scan_param[0]));
+ sizeof(*chan_list_tlv->chan_scan_param));
}
tlv_pos += (sizeof(chan_list_tlv->header)
diff --git a/drivers/net/wireless/marvell/mwifiex/wmm.c b/drivers/net/wireless/marvell/mwifiex/wmm.c
index 00a5679b5c51..8558995e8fc7 100644
--- a/drivers/net/wireless/marvell/mwifiex/wmm.c
+++ b/drivers/net/wireless/marvell/mwifiex/wmm.c
@@ -871,7 +871,7 @@ mwifiex_wmm_add_buf_txqueue(struct mwifiex_private *priv,
}
} else {
memcpy(ra, skb->data, ETH_ALEN);
- if (ra[0] & 0x01 || mwifiex_is_skb_mgmt_frame(skb))
+ if (is_multicast_ether_addr(ra) || mwifiex_is_skb_mgmt_frame(skb))
eth_broadcast_addr(ra);
ra_list = mwifiex_wmm_get_queue_raptr(priv, tid_down, ra);
}
diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c
index 13bcb123d122..ce8fea76dbb2 100644
--- a/drivers/net/wireless/marvell/mwl8k.c
+++ b/drivers/net/wireless/marvell/mwl8k.c
@@ -5610,6 +5610,10 @@ static void mwl8k_sw_scan_complete(struct ieee80211_hw *hw,
}
static const struct ieee80211_ops mwl8k_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = mwl8k_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = mwl8k_start,
diff --git a/drivers/net/wireless/mediatek/mt76/Makefile b/drivers/net/wireless/mediatek/mt76/Makefile
index d6575fe18c6b..f7f2d9a8ab0f 100644
--- a/drivers/net/wireless/mediatek/mt76/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_MT792x_USB) += mt792x-usb.o
mt76-y := \
mmio.o util.o trace.o dma.o mac80211.o debugfs.o eeprom.o \
- tx.o agg-rx.o mcu.o
+ tx.o agg-rx.o mcu.o wed.o
mt76-$(CONFIG_PCI) += pci.o
mt76-$(CONFIG_NL80211_TESTMODE) += testmode.o
diff --git a/drivers/net/wireless/mediatek/mt76/agg-rx.c b/drivers/net/wireless/mediatek/mt76/agg-rx.c
index 10cbd9e560e7..07c386c7b4d0 100644
--- a/drivers/net/wireless/mediatek/mt76/agg-rx.c
+++ b/drivers/net/wireless/mediatek/mt76/agg-rx.c
@@ -122,7 +122,7 @@ mt76_rx_aggr_check_ctl(struct sk_buff *skb, struct sk_buff_head *frames)
struct ieee80211_bar *bar = mt76_skb_get_hdr(skb);
struct mt76_wcid *wcid = status->wcid;
struct mt76_rx_tid *tid;
- u8 tidno = status->qos_ctl & IEEE80211_QOS_CTL_TID_MASK;
+ u8 tidno;
u16 seqno;
if (!ieee80211_is_ctl(bar->frame_control))
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 00230f106294..72a7bd5a8576 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -197,9 +197,8 @@ mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q)
q->tail = q->head;
}
-static void
-__mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q,
- bool reset_idx)
+void __mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q,
+ bool reset_idx)
{
if (!q || !q->ndesc)
return;
@@ -219,8 +218,7 @@ __mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q,
mt76_dma_sync_idx(dev, q);
}
-static void
-mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q)
+void mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q)
{
__mt76_dma_queue_reset(dev, q, true);
}
@@ -632,9 +630,8 @@ free_skb:
return ret;
}
-static int
-mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q,
- bool allow_direct)
+int mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q,
+ bool allow_direct)
{
int len = SKB_WITH_OVERHEAD(q->buf_size);
int frames = 0;
@@ -681,81 +678,6 @@ done:
return frames;
}
-int mt76_dma_wed_setup(struct mt76_dev *dev, struct mt76_queue *q, bool reset)
-{
-#ifdef CONFIG_NET_MEDIATEK_SOC_WED
- int ret = 0, type, ring;
- u16 flags;
-
- if (!q || !q->ndesc)
- return -EINVAL;
-
- flags = q->flags;
- if (!q->wed || !mtk_wed_device_active(q->wed))
- q->flags &= ~MT_QFLAG_WED;
-
- if (!(q->flags & MT_QFLAG_WED))
- return 0;
-
- type = FIELD_GET(MT_QFLAG_WED_TYPE, q->flags);
- ring = FIELD_GET(MT_QFLAG_WED_RING, q->flags);
-
- switch (type) {
- case MT76_WED_Q_TX:
- ret = mtk_wed_device_tx_ring_setup(q->wed, ring, q->regs,
- reset);
- if (!ret)
- q->wed_regs = q->wed->tx_ring[ring].reg_base;
- break;
- case MT76_WED_Q_TXFREE:
- /* WED txfree queue needs ring to be initialized before setup */
- q->flags = 0;
- mt76_dma_queue_reset(dev, q);
- mt76_dma_rx_fill(dev, q, false);
-
- ret = mtk_wed_device_txfree_ring_setup(q->wed, q->regs);
- if (!ret)
- q->wed_regs = q->wed->txfree_ring.reg_base;
- break;
- case MT76_WED_Q_RX:
- ret = mtk_wed_device_rx_ring_setup(q->wed, ring, q->regs,
- reset);
- if (!ret)
- q->wed_regs = q->wed->rx_ring[ring].reg_base;
- break;
- case MT76_WED_RRO_Q_DATA:
- q->flags &= ~MT_QFLAG_WED;
- __mt76_dma_queue_reset(dev, q, false);
- mtk_wed_device_rro_rx_ring_setup(q->wed, ring, q->regs);
- q->head = q->ndesc - 1;
- q->queued = q->head;
- break;
- case MT76_WED_RRO_Q_MSDU_PG:
- q->flags &= ~MT_QFLAG_WED;
- __mt76_dma_queue_reset(dev, q, false);
- mtk_wed_device_msdu_pg_rx_ring_setup(q->wed, ring, q->regs);
- q->head = q->ndesc - 1;
- q->queued = q->head;
- break;
- case MT76_WED_RRO_Q_IND:
- q->flags &= ~MT_QFLAG_WED;
- mt76_dma_queue_reset(dev, q);
- mt76_dma_rx_fill(dev, q, false);
- mtk_wed_device_ind_rx_ring_setup(q->wed, q->regs);
- break;
- default:
- ret = -EINVAL;
- break;
- }
- q->flags = flags;
-
- return ret;
-#else
- return 0;
-#endif
-}
-EXPORT_SYMBOL_GPL(mt76_dma_wed_setup);
-
static int
mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
int idx, int n_desc, int bufsize,
@@ -800,7 +722,7 @@ mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
if (ret)
return ret;
- ret = mt76_dma_wed_setup(dev, q, false);
+ ret = mt76_wed_dma_setup(dev, q, false);
if (ret)
return ret;
@@ -863,7 +785,7 @@ mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid)
mt76_dma_rx_cleanup(dev, q);
/* reset WED rx queues */
- mt76_dma_wed_setup(dev, q, true);
+ mt76_wed_dma_setup(dev, q, true);
if (mt76_queue_is_wed_tx_free(q))
return;
@@ -1054,20 +976,6 @@ void mt76_dma_attach(struct mt76_dev *dev)
}
EXPORT_SYMBOL_GPL(mt76_dma_attach);
-void mt76_dma_wed_reset(struct mt76_dev *dev)
-{
- struct mt76_mmio *mmio = &dev->mmio;
-
- if (!test_bit(MT76_STATE_WED_RESET, &dev->phy.state))
- return;
-
- complete(&mmio->wed_reset);
-
- if (!wait_for_completion_timeout(&mmio->wed_reset_complete, 3 * HZ))
- dev_err(dev->dev, "wed reset complete timeout\n");
-}
-EXPORT_SYMBOL_GPL(mt76_dma_wed_reset);
-
void mt76_dma_cleanup(struct mt76_dev *dev)
{
int i;
diff --git a/drivers/net/wireless/mediatek/mt76/dma.h b/drivers/net/wireless/mediatek/mt76/dma.h
index c479cc6388ef..1de5a2b20f74 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.h
+++ b/drivers/net/wireless/mediatek/mt76/dma.h
@@ -79,15 +79,18 @@ enum mt76_dma_wed_ind_reason {
int mt76_dma_rx_poll(struct napi_struct *napi, int budget);
void mt76_dma_attach(struct mt76_dev *dev);
void mt76_dma_cleanup(struct mt76_dev *dev);
-int mt76_dma_wed_setup(struct mt76_dev *dev, struct mt76_queue *q, bool reset);
-void mt76_dma_wed_reset(struct mt76_dev *dev);
+int mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q,
+ bool allow_direct);
+void __mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q,
+ bool reset_idx);
+void mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q);
static inline void
mt76_dma_reset_tx_queue(struct mt76_dev *dev, struct mt76_queue *q)
{
dev->queue_ops->reset_q(dev, q);
if (mtk_wed_device_active(&dev->mmio.wed))
- mt76_dma_wed_setup(dev, q, true);
+ mt76_wed_dma_setup(dev, q, true);
}
static inline void
diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 8a3a90d1bfac..068206e48aec 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -579,13 +579,18 @@ EXPORT_SYMBOL_GPL(mt76_unregister_phy);
int mt76_create_page_pool(struct mt76_dev *dev, struct mt76_queue *q)
{
+ bool is_qrx = mt76_queue_is_rx(dev, q);
struct page_pool_params pp_params = {
.order = 0,
.flags = 0,
.nid = NUMA_NO_NODE,
.dev = dev->dma_dev,
};
- int idx = q - dev->q_rx;
+ int idx = is_qrx ? q - dev->q_rx : -1;
+
+ /* Allocate page_pools just for rx/wed_tx_free queues */
+ if (!is_qrx && !mt76_queue_is_wed_tx_free(q))
+ return 0;
switch (idx) {
case MT_RXQ_MAIN:
@@ -604,6 +609,9 @@ int mt76_create_page_pool(struct mt76_dev *dev, struct mt76_queue *q)
pp_params.dma_dir = DMA_FROM_DEVICE;
pp_params.max_len = PAGE_SIZE;
pp_params.offset = 0;
+ /* NAPI is available just for rx queues */
+ if (idx >= 0 && idx < ARRAY_SIZE(dev->napi))
+ pp_params.napi = &dev->napi[idx];
}
q->page_pool = page_pool_create(&pp_params);
@@ -1613,8 +1621,8 @@ EXPORT_SYMBOL_GPL(mt76_get_sar_power);
static void
__mt76_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif)
{
- if (vif->bss_conf.csa_active && ieee80211_beacon_cntdwn_is_complete(vif))
- ieee80211_csa_finish(vif);
+ if (vif->bss_conf.csa_active && ieee80211_beacon_cntdwn_is_complete(vif, 0))
+ ieee80211_csa_finish(vif, 0);
}
void mt76_csa_finish(struct mt76_dev *dev)
@@ -1638,7 +1646,7 @@ __mt76_csa_check(void *priv, u8 *mac, struct ieee80211_vif *vif)
if (!vif->bss_conf.csa_active)
return;
- dev->csa_complete |= ieee80211_beacon_cntdwn_is_complete(vif);
+ dev->csa_complete |= ieee80211_beacon_cntdwn_is_complete(vif, 0);
}
void mt76_csa_check(struct mt76_dev *dev)
@@ -1854,19 +1862,3 @@ enum mt76_dfs_state mt76_phy_dfs_state(struct mt76_phy *phy)
return MT_DFS_STATE_ACTIVE;
}
EXPORT_SYMBOL_GPL(mt76_phy_dfs_state);
-
-#ifdef CONFIG_NET_MEDIATEK_SOC_WED
-int mt76_net_setup_tc(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
- struct net_device *netdev, enum tc_setup_type type,
- void *type_data)
-{
- struct mt76_phy *phy = hw->priv;
- struct mtk_wed_device *wed = &phy->dev->mmio.wed;
-
- if (!mtk_wed_device_active(wed))
- return -EOPNOTSUPP;
-
- return mtk_wed_device_setup_tc(wed, netdev, type, type_data);
-}
-EXPORT_SYMBOL_GPL(mt76_net_setup_tc);
-#endif /* CONFIG_NET_MEDIATEK_SOC_WED */
diff --git a/drivers/net/wireless/mediatek/mt76/mmio.c b/drivers/net/wireless/mediatek/mt76/mmio.c
index c3e0e23e0161..cd2e9737c3bf 100644
--- a/drivers/net/wireless/mediatek/mt76/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mmio.c
@@ -85,113 +85,6 @@ void mt76_set_irq_mask(struct mt76_dev *dev, u32 addr,
}
EXPORT_SYMBOL_GPL(mt76_set_irq_mask);
-#ifdef CONFIG_NET_MEDIATEK_SOC_WED
-void mt76_mmio_wed_release_rx_buf(struct mtk_wed_device *wed)
-{
- struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed);
- int i;
-
- for (i = 0; i < dev->rx_token_size; i++) {
- struct mt76_txwi_cache *t;
-
- t = mt76_rx_token_release(dev, i);
- if (!t || !t->ptr)
- continue;
-
- mt76_put_page_pool_buf(t->ptr, false);
- t->ptr = NULL;
-
- mt76_put_rxwi(dev, t);
- }
-
- mt76_free_pending_rxwi(dev);
-}
-EXPORT_SYMBOL_GPL(mt76_mmio_wed_release_rx_buf);
-
-u32 mt76_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
-{
- struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed);
- struct mtk_wed_bm_desc *desc = wed->rx_buf_ring.desc;
- struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN];
- int i, len = SKB_WITH_OVERHEAD(q->buf_size);
- struct mt76_txwi_cache *t = NULL;
-
- for (i = 0; i < size; i++) {
- enum dma_data_direction dir;
- dma_addr_t addr;
- u32 offset;
- int token;
- void *buf;
-
- t = mt76_get_rxwi(dev);
- if (!t)
- goto unmap;
-
- buf = mt76_get_page_pool_buf(q, &offset, q->buf_size);
- if (!buf)
- goto unmap;
-
- addr = page_pool_get_dma_addr(virt_to_head_page(buf)) + offset;
- dir = page_pool_get_dma_dir(q->page_pool);
- dma_sync_single_for_device(dev->dma_dev, addr, len, dir);
-
- desc->buf0 = cpu_to_le32(addr);
- token = mt76_rx_token_consume(dev, buf, t, addr);
- if (token < 0) {
- mt76_put_page_pool_buf(buf, false);
- goto unmap;
- }
-
- token = FIELD_PREP(MT_DMA_CTL_TOKEN, token);
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- token |= FIELD_PREP(MT_DMA_CTL_SDP0_H, addr >> 32);
-#endif
- desc->token |= cpu_to_le32(token);
- desc++;
- }
-
- return 0;
-
-unmap:
- if (t)
- mt76_put_rxwi(dev, t);
- mt76_mmio_wed_release_rx_buf(wed);
-
- return -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(mt76_mmio_wed_init_rx_buf);
-
-int mt76_mmio_wed_offload_enable(struct mtk_wed_device *wed)
-{
- struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed);
-
- spin_lock_bh(&dev->token_lock);
- dev->token_size = wed->wlan.token_start;
- spin_unlock_bh(&dev->token_lock);
-
- return !wait_event_timeout(dev->tx_wait, !dev->wed_token_count, HZ);
-}
-EXPORT_SYMBOL_GPL(mt76_mmio_wed_offload_enable);
-
-void mt76_mmio_wed_offload_disable(struct mtk_wed_device *wed)
-{
- struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed);
-
- spin_lock_bh(&dev->token_lock);
- dev->token_size = dev->drv->token_size;
- spin_unlock_bh(&dev->token_lock);
-}
-EXPORT_SYMBOL_GPL(mt76_mmio_wed_offload_disable);
-
-void mt76_mmio_wed_reset_complete(struct mtk_wed_device *wed)
-{
- struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed);
-
- complete(&dev->mmio.wed_reset_complete);
-}
-EXPORT_SYMBOL_GPL(mt76_mmio_wed_reset_complete);
-#endif /*CONFIG_NET_MEDIATEK_SOC_WED */
-
void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
{
static const struct mt76_bus_ops mt76_mmio_ops = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index b20c34d5a0f7..a91f6ddacbd9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -210,6 +210,8 @@ struct mt76_queue {
u16 first;
u16 head;
u16 tail;
+ u8 hw_idx;
+ u8 ep;
int ndesc;
int queued;
int buf_size;
@@ -217,7 +219,6 @@ struct mt76_queue {
bool blocked;
u8 buf_offset;
- u8 hw_idx;
u16 flags;
struct mtk_wed_device *wed;
@@ -1081,12 +1082,6 @@ bool ____mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
void mt76_pci_disable_aspm(struct pci_dev *pdev);
-#ifdef CONFIG_NET_MEDIATEK_SOC_WED
-int mt76_net_setup_tc(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
- struct net_device *netdev, enum tc_setup_type type,
- void *type_data);
-#endif /*CONFIG_NET_MEDIATEK_SOC_WED */
-
static inline u16 mt76_chip(struct mt76_dev *dev)
{
return dev->rev >> 16;
@@ -1097,13 +1092,34 @@ static inline u16 mt76_rev(struct mt76_dev *dev)
return dev->rev & 0xffff;
}
+void mt76_wed_release_rx_buf(struct mtk_wed_device *wed);
+void mt76_wed_offload_disable(struct mtk_wed_device *wed);
+void mt76_wed_reset_complete(struct mtk_wed_device *wed);
+void mt76_wed_dma_reset(struct mt76_dev *dev);
+int mt76_wed_net_setup_tc(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct net_device *netdev, enum tc_setup_type type,
+ void *type_data);
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
-u32 mt76_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size);
-void mt76_mmio_wed_release_rx_buf(struct mtk_wed_device *wed);
-int mt76_mmio_wed_offload_enable(struct mtk_wed_device *wed);
-void mt76_mmio_wed_offload_disable(struct mtk_wed_device *wed);
-void mt76_mmio_wed_reset_complete(struct mtk_wed_device *wed);
-#endif /*CONFIG_NET_MEDIATEK_SOC_WED */
+u32 mt76_wed_init_rx_buf(struct mtk_wed_device *wed, int size);
+int mt76_wed_offload_enable(struct mtk_wed_device *wed);
+int mt76_wed_dma_setup(struct mt76_dev *dev, struct mt76_queue *q, bool reset);
+#else
+static inline u32 mt76_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
+{
+ return 0;
+}
+
+static inline int mt76_wed_offload_enable(struct mtk_wed_device *wed)
+{
+ return 0;
+}
+
+static inline int mt76_wed_dma_setup(struct mt76_dev *dev, struct mt76_queue *q,
+ bool reset)
+{
+ return 0;
+}
+#endif /* CONFIG_NET_MEDIATEK_SOC_WED */
#define mt76xx_chip(dev) mt76_chip(&((dev)->mt76))
#define mt76xx_rev(dev) mt76_rev(&((dev)->mt76))
@@ -1470,13 +1486,6 @@ static inline bool mt76u_urb_error(struct urb *urb)
urb->status != -ENOENT;
}
-/* Map hardware queues to usb endpoints */
-static inline u8 q2ep(u8 qid)
-{
- /* TODO: take management packets to queue 5 */
- return qid + 1;
-}
-
static inline int
mt76u_bulk_msg(struct mt76_dev *dev, void *data, int len, int *actual_len,
int timeout, int ep)
@@ -1598,6 +1607,18 @@ s8 mt76_get_rate_power_limits(struct mt76_phy *phy,
struct mt76_power_limits *dest,
s8 target_power);
+static inline bool mt76_queue_is_rx(struct mt76_dev *dev, struct mt76_queue *q)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev->q_rx); i++) {
+ if (q == &dev->q_rx[i])
+ return true;
+ }
+
+ return false;
+}
+
static inline bool mt76_queue_is_wed_tx_free(struct mt76_queue *q)
{
return (q->flags & MT_QFLAG_WED) &&
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
index 89d738deea62..9b49267b1eab 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
@@ -701,6 +701,10 @@ static void mt7603_tx(struct ieee80211_hw *hw,
}
const struct ieee80211_ops mt7603_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = mt7603_tx,
.start = mt7603_start,
.stop = mt7603_stop,
@@ -728,6 +732,7 @@ const struct ieee80211_ops mt7603_ops = {
.set_sar_specs = mt7603_set_sar_specs,
};
+MODULE_DESCRIPTION("MediaTek MT7603E and MT76x8 wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
static int __init mt7603_init(void)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index dab16b5fc386..0971c164b57e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -1375,4 +1375,5 @@ const struct ieee80211_ops mt7615_ops = {
};
EXPORT_SYMBOL_GPL(mt7615_ops);
+MODULE_DESCRIPTION("MediaTek MT7615E and MT7663E wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index ae34d019e588..c807bd8d928d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -353,7 +353,7 @@ static void
mt7615_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif)
{
if (vif->bss_conf.csa_active)
- ieee80211_csa_finish(vif);
+ ieee80211_csa_finish(vif, 0);
}
static void
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
index ac036a072439..87a956ea3ad7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -270,4 +270,5 @@ static void __exit mt7615_exit(void)
module_init(mt7615_init);
module_exit(mt7615_exit);
+MODULE_DESCRIPTION("MediaTek MT7615E MMIO helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c
index 67cedd2555f9..9692890ba51b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c
@@ -253,4 +253,5 @@ module_sdio_driver(mt7663s_driver);
MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT7663S (SDIO) wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
index 04963b9f7498..df737e1ff27b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -281,4 +281,5 @@ module_usb_driver(mt7663u_driver);
MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT7663U (USB) wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
index 0052d103e276..820b39590027 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
@@ -349,4 +349,5 @@ EXPORT_SYMBOL_GPL(mt7663_usb_sdio_register_device);
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
+MODULE_DESCRIPTION("MediaTek MT7663 SDIO/USB helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index fdde3d70b300..98d64d3d2993 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -227,6 +227,11 @@ static inline bool is_mt7992(struct mt76_dev *dev)
return mt76_chip(dev) == 0x7992;
}
+static inline bool is_mt799x(struct mt76_dev *dev)
+{
+ return is_mt7996(dev) || is_mt7992(dev);
+}
+
static inline bool is_mt7622(struct mt76_dev *dev)
{
if (!IS_ENABLED(CONFIG_MT7622_WMAC))
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h
index bd2a92467a97..5f132115ebfc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h
@@ -32,6 +32,11 @@ enum {
MT_LMAC_PSMP0,
};
+enum {
+ MT_TXS_MPDU_FMT = 0,
+ MT_TXS_PPDU_FMT = 2,
+};
+
#define MT_TX_FREE_MSDU_CNT GENMASK(9, 0)
#define MT_TX_FREE_WLAN_ID GENMASK(23, 14)
#define MT_TX_FREE_COUNT GENMASK(12, 0)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index c7914643e9c0..b841bf628d02 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -544,7 +544,7 @@ void mt76_connac2_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi,
val = FIELD_PREP(MT_TXD5_PID, pid);
if (pid >= MT_PACKET_ID_FIRST) {
val |= MT_TXD5_TX_STATUS_HOST;
- amsdu_en = amsdu_en && !is_mt7921(dev);
+ amsdu_en = 0;
}
txwi[5] = cpu_to_le32(val);
@@ -579,6 +579,8 @@ void mt76_connac2_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi,
spe_idx = 24 + phy_idx;
txwi[7] |= cpu_to_le32(FIELD_PREP(MT_TXD7_SPE_IDX, spe_idx));
}
+
+ txwi[7] &= ~cpu_to_le32(MT_TXD7_HW_AMSDU);
}
}
EXPORT_SYMBOL_GPL(mt76_connac2_mac_write_txwi);
@@ -714,6 +716,9 @@ bool mt76_connac2_mac_add_txs_skb(struct mt76_dev *dev, struct mt76_wcid *wcid,
struct sk_buff_head list;
struct sk_buff *skb;
+ if (le32_get_bits(txs_data[0], MT_TXS0_TXS_FORMAT) == MT_TXS_PPDU_FMT)
+ return false;
+
mt76_tx_status_lock(dev, &list);
skb = mt76_tx_status_skb_get(dev, wcid, pid, &list);
if (skb) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 96494ba2fdf7..af0c2b2aacb0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -66,7 +66,7 @@ int mt76_connac_mcu_init_download(struct mt76_dev *dev, u32 addr, u32 len,
if ((!is_connac_v1(dev) && addr == MCU_PATCH_ADDRESS) ||
(is_mt7921(dev) && addr == 0x900000) ||
- (is_mt7925(dev) && addr == 0x900000) ||
+ (is_mt7925(dev) && (addr == 0x900000 || addr == 0xe0002800)) ||
(is_mt7996(dev) && addr == 0x900000) ||
(is_mt7992(dev) && addr == 0x900000))
cmd = MCU_CMD(PATCH_START_REQ);
@@ -283,6 +283,9 @@ __mt76_connac_mcu_alloc_sta_req(struct mt76_dev *dev, struct mt76_vif *mvif,
};
struct sk_buff *skb;
+ if (is_mt799x(dev) && !wcid->sta)
+ hdr.muar_idx = 0xe;
+
mt76_connac_mcu_get_wlan_idx(dev, wcid, &hdr.wlan_idx_lo,
&hdr.wlan_idx_hi);
skb = mt76_mcu_msg_alloc(dev, NULL, len);
@@ -2101,7 +2104,7 @@ mt76_connac_mcu_rate_txpower_band(struct mt76_phy *phy,
int j, msg_len, num_ch;
struct sk_buff *skb;
- num_ch = i == batch_size - 1 ? n_chan % batch_len : batch_len;
+ num_ch = i == batch_size - 1 ? n_chan - i * batch_len : batch_len;
msg_len = sizeof(tx_power_tlv) + num_ch * sizeof(sku_tlbv);
skb = mt76_mcu_msg_alloc(dev, NULL, msg_len);
if (!skb) {
@@ -3160,4 +3163,5 @@ exit:
EXPORT_SYMBOL_GPL(mt76_connac2_mcu_fill_message);
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT76x connac layer helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index ae6d0179727d..657a4d1f856b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -808,6 +808,7 @@ enum {
STA_REC_MLD = 0x20,
STA_REC_EHT = 0x22,
STA_REC_PN_INFO = 0x26,
+ STA_REC_KEY_V3 = 0x27,
STA_REC_HDRT = 0x28,
STA_REC_HDR_TRANS = 0x2B,
STA_REC_MAX_NUM
@@ -935,6 +936,9 @@ enum {
PHY_TYPE_INDEX_NUM
};
+#define HR_DSSS_ERP_BASIC_RATE GENMASK(3, 0)
+#define OFDM_BASIC_RATE (BIT(6) | BIT(8) | BIT(10))
+
#define PHY_TYPE_BIT_HR_DSSS BIT(PHY_TYPE_HR_DSSS_INDEX)
#define PHY_TYPE_BIT_ERP BIT(PHY_TYPE_ERP_INDEX)
#define PHY_TYPE_BIT_OFDM BIT(PHY_TYPE_OFDM_INDEX)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
index c3a392a1a659..bcd24c9072ec 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
@@ -342,4 +342,5 @@ int mt76x0_eeprom_init(struct mt76x02_dev *dev)
return 0;
}
+MODULE_DESCRIPTION("MediaTek MT76x EEPROM helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
index 9277ff38b7a2..79b7996ad1a8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
@@ -59,6 +59,10 @@ mt76x0e_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
}
static const struct ieee80211_ops mt76x0e_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = mt76x02_tx,
.start = mt76x0e_start,
.stop = mt76x0e_stop,
@@ -302,6 +306,7 @@ static const struct pci_device_id mt76x0e_device_table[] = {
MODULE_DEVICE_TABLE(pci, mt76x0e_device_table);
MODULE_FIRMWARE(MT7610E_FIRMWARE);
MODULE_FIRMWARE(MT7650E_FIRMWARE);
+MODULE_DESCRIPTION("MediaTek MT76x0E (PCIe) wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
static struct pci_driver mt76x0e_driver = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
index 0422c332354a..bba44f289b4e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
@@ -118,6 +118,10 @@ static int mt76x0u_start(struct ieee80211_hw *hw)
}
static const struct ieee80211_ops mt76x0u_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = mt76x02_tx,
.start = mt76x0u_start,
.stop = mt76x0u_stop,
@@ -336,6 +340,7 @@ err:
MODULE_DEVICE_TABLE(usb, mt76x0_device_table);
MODULE_FIRMWARE(MT7610E_FIRMWARE);
MODULE_FIRMWARE(MT7610U_FIRMWARE);
+MODULE_DESCRIPTION("MediaTek MT76x0U (USB) wireless driver");
MODULE_LICENSE("GPL");
static struct usb_driver mt76x0_driver = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c
index 85a78dea4085..29b9a15f8dbe 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c
@@ -67,7 +67,7 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data,
struct mt76_tx_info *tx_info)
{
struct mt76x02_dev *dev = container_of(mdev, struct mt76x02_dev, mt76);
- int pid, len = tx_info->skb->len, ep = q2ep(dev->mphy.q_tx[qid]->hw_idx);
+ int pid, len = tx_info->skb->len, ep = dev->mphy.q_tx[qid]->ep;
struct mt76x02_txwi *txwi;
bool ampdu = IEEE80211_SKB_CB(tx_info->skb)->flags & IEEE80211_TX_CTL_AMPDU;
enum mt76_qsel qsel;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
index 02da543dfc5c..b2cc44914294 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
@@ -293,4 +293,5 @@ void mt76x02u_init_mcu(struct mt76_dev *dev)
EXPORT_SYMBOL_GPL(mt76x02u_init_mcu);
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>");
+MODULE_DESCRIPTION("MediaTek MT76x02 MCU helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
index 8a0e8124b894..8020446be37b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -696,4 +696,5 @@ void mt76x02_config_mac_addr_list(struct mt76x02_dev *dev)
}
EXPORT_SYMBOL_GPL(mt76x02_config_mac_addr_list);
+MODULE_DESCRIPTION("MediaTek MT76x02 helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
index 8c01855885ce..1fe5f5a02f93 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
@@ -506,4 +506,5 @@ int mt76x2_eeprom_init(struct mt76x02_dev *dev)
}
EXPORT_SYMBOL_GPL(mt76x2_eeprom_init);
+MODULE_DESCRIPTION("MediaTek MT76x2 EEPROM helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
index df85ebc6e1df..30959746e924 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
@@ -165,6 +165,7 @@ mt76x2e_resume(struct pci_dev *pdev)
MODULE_DEVICE_TABLE(pci, mt76x2e_device_table);
MODULE_FIRMWARE(MT7662_FIRMWARE);
MODULE_FIRMWARE(MT7662_ROM_PATCH);
+MODULE_DESCRIPTION("MediaTek MT76x2E (PCIe) wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
static struct pci_driver mt76pci_driver = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
index b38bb7a2362b..bfc8c69f43fa 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
@@ -132,6 +132,10 @@ static int mt76x2_set_antenna(struct ieee80211_hw *hw, u32 tx_ant,
}
const struct ieee80211_ops mt76x2_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = mt76x02_tx,
.start = mt76x2_start,
.stop = mt76x2_stop,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
index 55068f3252ef..e92bb871f231 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
@@ -18,6 +18,7 @@ static const struct usb_device_id mt76x2u_device_table[] = {
{ USB_DEVICE(0x7392, 0xb711) }, /* Edimax EW 7722 UAC */
{ USB_DEVICE(0x0e8d, 0x7632) }, /* HC-M7662BU1 */
{ USB_DEVICE(0x2c4e, 0x0103) }, /* Mercury UD13 */
+ { USB_DEVICE(0x0846, 0x9014) }, /* Netgear WNDA3100v3 */
{ USB_DEVICE(0x0846, 0x9053) }, /* Netgear A6210 */
{ USB_DEVICE(0x045e, 0x02e6) }, /* XBox One Wireless Adapter */
{ USB_DEVICE(0x045e, 0x02fe) }, /* XBox One Wireless Adapter */
@@ -147,4 +148,5 @@ static struct usb_driver mt76x2u_driver = {
module_usb_driver(mt76x2u_driver);
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>");
+MODULE_DESCRIPTION("MediaTek MT76x2U (USB) wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c
index ac07ed1f63a3..9fe390fdd730 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c
@@ -103,6 +103,10 @@ mt76x2u_config(struct ieee80211_hw *hw, u32 changed)
}
const struct ieee80211_ops mt76x2u_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = mt76x02_tx,
.start = mt76x2u_start,
.stop = mt76x2u_stop,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
index c91a1c54027f..0baa82c8df5a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
@@ -614,7 +614,7 @@ int mt7915_dma_reset(struct mt7915_dev *dev, bool force)
mtk_wed_device_dma_reset(wed);
mt7915_dma_disable(dev, force);
- mt76_dma_wed_reset(&dev->mt76);
+ mt76_wed_dma_reset(&dev->mt76);
/* reset hw queues */
for (i = 0; i < __MT_TXQ_MAX; i++) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index b01edbed969c..e45361111f9b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1520,12 +1520,6 @@ void mt7915_mac_reset_work(struct work_struct *work)
if (!(READ_ONCE(dev->recovery.state) & MT_MCU_CMD_STOP_DMA))
return;
- if (mtk_wed_device_active(&dev->mt76.mmio.wed)) {
- mtk_wed_device_stop(&dev->mt76.mmio.wed);
- if (!is_mt798x(&dev->mt76))
- mt76_wr(dev, MT_INT_WED_MASK_CSR, 0);
- }
-
ieee80211_stop_queues(mt76_hw(dev));
if (ext_phy)
ieee80211_stop_queues(ext_phy->hw);
@@ -1545,6 +1539,9 @@ void mt7915_mac_reset_work(struct work_struct *work)
mutex_lock(&dev->mt76.mutex);
+ if (mtk_wed_device_active(&dev->mt76.mmio.wed))
+ mtk_wed_device_stop(&dev->mt76.mmio.wed);
+
mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_STOPPED);
if (mt7915_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index df2d4279790d..3709d18da0e6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -1708,6 +1708,6 @@ const struct ieee80211_ops mt7915_ops = {
.set_radar_background = mt7915_set_radar_background,
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
.net_fill_forward_path = mt7915_net_fill_forward_path,
- .net_setup_tc = mt76_net_setup_tc,
+ .net_setup_tc = mt76_wed_net_setup_tc,
#endif
};
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index c67c4f6ca2aa..d90f98c50039 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -228,7 +228,7 @@ mt7915_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif)
if (!vif->bss_conf.csa_active || vif->type == NL80211_IFTYPE_STATION)
return;
- ieee80211_csa_finish(vif);
+ ieee80211_csa_finish(vif, 0);
}
static void
@@ -463,10 +463,10 @@ static bool mt7915_check_he_obss_narrow_bw_ru(struct ieee80211_hw *hw,
.tolerated = true,
};
- if (!(vif->bss_conf.chandef.chan->flags & IEEE80211_CHAN_RADAR))
+ if (!(vif->bss_conf.chanreq.oper.chan->flags & IEEE80211_CHAN_RADAR))
return false;
- cfg80211_bss_iter(hw->wiphy, &vif->bss_conf.chandef,
+ cfg80211_bss_iter(hw->wiphy, &vif->bss_conf.chanreq.oper,
mt7915_check_he_obss_narrow_bw_ru_iter,
&iter_data);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
index aff4f21e843d..d6ecd698cdcd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
@@ -490,6 +490,11 @@ static u32 __mt7915_reg_addr(struct mt7915_dev *dev, u32 addr)
return dev->reg.map[i].maps + ofs;
}
+ return 0;
+}
+
+static u32 __mt7915_reg_remap_addr(struct mt7915_dev *dev, u32 addr)
+{
if ((addr >= MT_INFRA_BASE && addr < MT_WFSYS0_PHY_START) ||
(addr >= MT_WFSYS0_PHY_START && addr < MT_WFSYS1_PHY_START) ||
(addr >= MT_WFSYS1_PHY_START && addr <= MT_WFSYS1_PHY_END))
@@ -514,15 +519,30 @@ void mt7915_memcpy_fromio(struct mt7915_dev *dev, void *buf, u32 offset,
{
u32 addr = __mt7915_reg_addr(dev, offset);
- memcpy_fromio(buf, dev->mt76.mmio.regs + addr, len);
+ if (addr) {
+ memcpy_fromio(buf, dev->mt76.mmio.regs + addr, len);
+ return;
+ }
+
+ spin_lock_bh(&dev->reg_lock);
+ memcpy_fromio(buf, dev->mt76.mmio.regs +
+ __mt7915_reg_remap_addr(dev, offset), len);
+ spin_unlock_bh(&dev->reg_lock);
}
static u32 mt7915_rr(struct mt76_dev *mdev, u32 offset)
{
struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
- u32 addr = __mt7915_reg_addr(dev, offset);
+ u32 addr = __mt7915_reg_addr(dev, offset), val;
- return dev->bus_ops->rr(mdev, addr);
+ if (addr)
+ return dev->bus_ops->rr(mdev, addr);
+
+ spin_lock_bh(&dev->reg_lock);
+ val = dev->bus_ops->rr(mdev, __mt7915_reg_remap_addr(dev, offset));
+ spin_unlock_bh(&dev->reg_lock);
+
+ return val;
}
static void mt7915_wr(struct mt76_dev *mdev, u32 offset, u32 val)
@@ -530,7 +550,14 @@ static void mt7915_wr(struct mt76_dev *mdev, u32 offset, u32 val)
struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
u32 addr = __mt7915_reg_addr(dev, offset);
- dev->bus_ops->wr(mdev, addr, val);
+ if (addr) {
+ dev->bus_ops->wr(mdev, addr, val);
+ return;
+ }
+
+ spin_lock_bh(&dev->reg_lock);
+ dev->bus_ops->wr(mdev, __mt7915_reg_remap_addr(dev, offset), val);
+ spin_unlock_bh(&dev->reg_lock);
}
static u32 mt7915_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val)
@@ -538,7 +565,14 @@ static u32 mt7915_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val)
struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
u32 addr = __mt7915_reg_addr(dev, offset);
- return dev->bus_ops->rmw(mdev, addr, mask, val);
+ if (addr)
+ return dev->bus_ops->rmw(mdev, addr, mask, val);
+
+ spin_lock_bh(&dev->reg_lock);
+ val = dev->bus_ops->rmw(mdev, __mt7915_reg_remap_addr(dev, offset), mask, val);
+ spin_unlock_bh(&dev->reg_lock);
+
+ return val;
}
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
@@ -672,13 +706,13 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr,
}
wed->wlan.init_buf = mt7915_wed_init_buf;
- wed->wlan.offload_enable = mt76_mmio_wed_offload_enable;
- wed->wlan.offload_disable = mt76_mmio_wed_offload_disable;
- wed->wlan.init_rx_buf = mt76_mmio_wed_init_rx_buf;
- wed->wlan.release_rx_buf = mt76_mmio_wed_release_rx_buf;
+ wed->wlan.offload_enable = mt76_wed_offload_enable;
+ wed->wlan.offload_disable = mt76_wed_offload_disable;
+ wed->wlan.init_rx_buf = mt76_wed_init_rx_buf;
+ wed->wlan.release_rx_buf = mt76_wed_release_rx_buf;
wed->wlan.update_wo_rx_stats = mt7915_mmio_wed_update_rx_stats;
wed->wlan.reset = mt7915_mmio_wed_reset;
- wed->wlan.reset_complete = mt76_mmio_wed_reset_complete;
+ wed->wlan.reset_complete = mt76_wed_reset_complete;
dev->mt76.rx_token_size = wed->wlan.rx_npkt;
@@ -707,6 +741,7 @@ static int mt7915_mmio_init(struct mt76_dev *mdev,
dev = container_of(mdev, struct mt7915_dev, mt76);
mt76_mmio_init(&dev->mt76, mem_base);
+ spin_lock_init(&dev->reg_lock);
switch (device_id) {
case 0x7915:
@@ -958,4 +993,5 @@ static void __exit mt7915_exit(void)
module_init(mt7915_init);
module_exit(mt7915_exit);
+MODULE_DESCRIPTION("MediaTek MT7915E MMIO helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 4727d9c7b11d..6e79bc65f5a5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -287,6 +287,7 @@ struct mt7915_dev {
struct list_head sta_rc_list;
struct list_head twt_list;
+ spinlock_t reg_lock;
u32 hw_pattern;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/soc.c b/drivers/net/wireless/mediatek/mt76/mt7915/soc.c
index 8b4809703efc..f5b99917c08e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/soc.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/soc.c
@@ -516,7 +516,8 @@ static int mt798x_wmac_adie_patch_7976(struct mt7915_dev *dev, u8 adie)
if (ret)
return ret;
- if (version == 0x8a00 || version == 0x8a10 || version == 0x8b00) {
+ if (version == 0x8a00 || version == 0x8a10 ||
+ version == 0x8b00 || version == 0x8c10) {
rg_xo_01 = 0x1d59080f;
rg_xo_03 = 0x34c00fe0;
} else {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 48433c6d5e7d..ef0c721d26e3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -138,9 +138,14 @@ mt7921_regd_notifier(struct wiphy *wiphy,
if (pm->suspended)
return;
+ dev->regd_in_progress = true;
+
mt792x_mutex_acquire(dev);
mt7921_regd_update(dev);
mt792x_mutex_release(dev);
+
+ dev->regd_in_progress = false;
+ wake_up(&dev->wait);
}
int mt7921_mac_init(struct mt792x_dev *dev)
@@ -261,6 +266,7 @@ int mt7921_register_device(struct mt792x_dev *dev)
spin_lock_init(&dev->pm.wake.lock);
mutex_init(&dev->pm.mutex);
init_waitqueue_head(&dev->pm.wait);
+ init_waitqueue_head(&dev->wait);
if (mt76_is_sdio(&dev->mt76))
init_waitqueue_head(&dev->mt76.sdio.wait);
spin_lock_init(&dev->pm.txq_lock);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 0645417e0582..ca36de34171b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -325,6 +325,19 @@ static void mt7921_roc_iter(void *priv, u8 *mac,
mt7921_mcu_abort_roc(phy, mvif, phy->roc_token_id);
}
+void mt7921_roc_abort_sync(struct mt792x_dev *dev)
+{
+ struct mt792x_phy *phy = &dev->phy;
+
+ del_timer_sync(&phy->roc_timer);
+ cancel_work_sync(&phy->roc_work);
+ if (test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state))
+ ieee80211_iterate_active_interfaces(mt76_hw(dev),
+ IEEE80211_IFACE_ITER_RESUME_ALL,
+ mt7921_roc_iter, (void *)phy);
+}
+EXPORT_SYMBOL_GPL(mt7921_roc_abort_sync);
+
void mt7921_roc_work(struct work_struct *work)
{
struct mt792x_phy *phy;
@@ -1418,5 +1431,6 @@ const struct ieee80211_ops mt7921_ops = {
};
EXPORT_SYMBOL_GPL(mt7921_ops);
+MODULE_DESCRIPTION("MediaTek MT7921 core driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index f5582477c7e4..8b4ce32a2cd1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1272,7 +1272,7 @@ int __mt7921_mcu_set_clc(struct mt792x_dev *dev, u8 *alpha2,
.mtcl_conf = mt792x_acpi_get_mtcl_conf(&dev->phy, alpha2),
};
int ret, valid_cnt = 0;
- u16 buf_len = 0;
+ u32 buf_len = 0;
u8 *pos;
if (!clc)
@@ -1283,7 +1283,7 @@ int __mt7921_mcu_set_clc(struct mt792x_dev *dev, u8 *alpha2,
if (mt76_find_power_limits_node(&dev->mt76))
req.cap |= CLC_CAP_DTS_EN;
- buf_len = le16_to_cpu(clc->len) - sizeof(*clc);
+ buf_len = le32_to_cpu(clc->len) - sizeof(*clc);
pos = clc->data;
while (buf_len > 16) {
struct mt7921_clc_rule *rule = (struct mt7921_clc_rule *)pos;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 1cb21133992b..3016636d18c6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -322,4 +322,5 @@ int mt7921_mcu_set_roc(struct mt792x_phy *phy, struct mt792x_vif *vif,
enum mt7921_roc_req type, u8 token_id);
int mt7921_mcu_abort_roc(struct mt792x_phy *phy, struct mt792x_vif *vif,
u8 token_id);
+void mt7921_roc_abort_sync(struct mt792x_dev *dev);
#endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 57903c6e4f11..cda853e86676 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/of.h>
#include "mt7921.h"
#include "../mt76_connac2_mac.h"
@@ -369,6 +370,9 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
if (ret)
goto err_free_irq;
+ if (of_property_read_bool(dev->mt76.dev->of_node, "wakeup-source"))
+ device_init_wakeup(dev->mt76.dev, true);
+
return 0;
err_free_irq:
@@ -386,7 +390,11 @@ static void mt7921_pci_remove(struct pci_dev *pdev)
struct mt76_dev *mdev = pci_get_drvdata(pdev);
struct mt792x_dev *dev = container_of(mdev, struct mt792x_dev, mt76);
+ if (of_property_read_bool(dev->mt76.dev->of_node, "wakeup-source"))
+ device_init_wakeup(dev->mt76.dev, false);
+
mt7921e_unregister_device(dev);
+ set_bit(MT76_REMOVED, &mdev->phy.state);
devm_free_irq(&pdev->dev, pdev->irq, dev);
mt76_free_device(&dev->mt76);
pci_free_irq_vectors(pdev);
@@ -405,10 +413,15 @@ static int mt7921_pci_suspend(struct device *device)
cancel_delayed_work_sync(&pm->ps_work);
cancel_work_sync(&pm->wake_work);
+ mt7921_roc_abort_sync(dev);
+
err = mt792x_mcu_drv_pmctrl(dev);
if (err < 0)
goto restore_suspend;
+ wait_event_timeout(dev->wait,
+ !dev->regd_in_progress, 5 * HZ);
+
err = mt76_connac_mcu_set_hif_suspend(mdev, true);
if (err)
goto restore_suspend;
@@ -544,4 +557,5 @@ MODULE_FIRMWARE(MT7922_FIRMWARE_WM);
MODULE_FIRMWARE(MT7922_ROM_PATCH);
MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT7921E (PCIe) wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c
index 7591e54d2897..004d942ee11a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c
@@ -216,6 +216,8 @@ static int mt7921s_suspend(struct device *__dev)
cancel_delayed_work_sync(&pm->ps_work);
cancel_work_sync(&pm->wake_work);
+ mt7921_roc_abort_sync(dev);
+
err = mt792x_mcu_drv_pmctrl(dev);
if (err < 0)
goto restore_suspend;
@@ -323,5 +325,6 @@ static struct sdio_driver mt7921s_driver = {
.drv.pm = pm_sleep_ptr(&mt7921s_pm_ops),
};
module_sdio_driver(mt7921s_driver);
+MODULE_DESCRIPTION("MediaTek MT7921S (SDIO) wireless driver");
MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/usb.c b/drivers/net/wireless/mediatek/mt76/mt7921/usb.c
index e5258c74fc07..8b7c03c47598 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/usb.c
@@ -336,5 +336,6 @@ static struct usb_driver mt7921u_driver = {
};
module_usb_driver(mt7921u_driver);
+MODULE_DESCRIPTION("MediaTek MT7921U (USB) wireless driver");
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/init.c b/drivers/net/wireless/mediatek/mt76/mt7925/init.c
index 8f9b7a2f376c..c4cbc8976046 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/init.c
@@ -2,11 +2,61 @@
/* Copyright (C) 2023 MediaTek Inc. */
#include <linux/etherdevice.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/thermal.h>
#include <linux/firmware.h>
#include "mt7925.h"
#include "mac.h"
#include "mcu.h"
+static ssize_t mt7925_thermal_temp_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ switch (to_sensor_dev_attr(attr)->index) {
+ case 0: {
+ struct mt792x_phy *phy = dev_get_drvdata(dev);
+ struct mt792x_dev *mdev = phy->dev;
+ int temperature;
+
+ mt792x_mutex_acquire(mdev);
+ temperature = mt7925_mcu_get_temperature(phy);
+ mt792x_mutex_release(mdev);
+
+ if (temperature < 0)
+ return temperature;
+ /* display in millidegree Celsius */
+ return sprintf(buf, "%u\n", temperature * 1000);
+ }
+ default:
+ return -EINVAL;
+ }
+}
+static SENSOR_DEVICE_ATTR_RO(temp1_input, mt7925_thermal_temp, 0);
+
+static struct attribute *mt7925_hwmon_attrs[] = {
+ &sensor_dev_attr_temp1_input.dev_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(mt7925_hwmon);
+
+static int mt7925_thermal_init(struct mt792x_phy *phy)
+{
+ struct wiphy *wiphy = phy->mt76->hw->wiphy;
+ struct device *hwmon;
+ const char *name;
+
+ if (!IS_REACHABLE(CONFIG_HWMON))
+ return 0;
+
+ name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7925_%s",
+ wiphy_name(wiphy));
+
+ hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, phy,
+ mt7925_hwmon_groups);
+ return PTR_ERR_OR_ZERO(hwmon);
+}
static void
mt7925_regd_notifier(struct wiphy *wiphy,
struct regulatory_request *req)
@@ -142,6 +192,12 @@ static void mt7925_init_work(struct work_struct *work)
return;
}
+ ret = mt7925_thermal_init(&dev->phy);
+ if (ret) {
+ dev_err(dev->mt76.dev, "thermal init failed\n");
+ return;
+ }
+
/* we support chip reset now */
dev->hw_init_done = true;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c
index 8f1075da4903..6179798a8845 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c
@@ -359,6 +359,7 @@ mt7925_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
mvif->sta.wcid.phy_idx = mvif->mt76.band_idx;
mvif->sta.wcid.hw_key_idx = -1;
mvif->sta.wcid.tx_info |= MT_WCID_TX_INFO_SET;
+ mvif->sta.vif = mvif;
mt76_wcid_init(&mvif->sta.wcid);
mt7925_mac_wtbl_update(dev, idx,
@@ -526,7 +527,7 @@ static int mt7925_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
if (cmd == SET_KEY && !mvif->mt76.cipher) {
struct mt792x_phy *phy = mt792x_hw_phy(hw);
- mvif->mt76.cipher = mt76_connac_mcu_get_cipher(key->cipher);
+ mvif->mt76.cipher = mt7925_mcu_get_cipher(key->cipher);
mt7925_mcu_add_bss_info(phy, mvif->mt76.ctx, vif, sta, true);
}
@@ -710,7 +711,7 @@ static void mt7925_bss_info_changed(struct ieee80211_hw *hw,
if (slottime != phy->slottime) {
phy->slottime = slottime;
- mt792x_mac_set_timeing(phy);
+ mt7925_mcu_set_timing(phy, vif);
}
}
@@ -1274,6 +1275,25 @@ mt7925_channel_switch_beacon(struct ieee80211_hw *hw,
}
static int
+mt7925_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ unsigned int link_id, u16 queue,
+ const struct ieee80211_tx_queue_params *params)
+{
+ struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv;
+ static const u8 mq_to_aci[] = {
+ [IEEE80211_AC_VO] = 3,
+ [IEEE80211_AC_VI] = 2,
+ [IEEE80211_AC_BE] = 0,
+ [IEEE80211_AC_BK] = 1,
+ };
+
+ /* firmware uses access class index */
+ mvif->queue_params[mq_to_aci[queue]] = *params;
+
+ return 0;
+}
+
+static int
mt7925_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf)
{
@@ -1396,7 +1416,7 @@ const struct ieee80211_ops mt7925_ops = {
.add_interface = mt7925_add_interface,
.remove_interface = mt792x_remove_interface,
.config = mt7925_config,
- .conf_tx = mt792x_conf_tx,
+ .conf_tx = mt7925_conf_tx,
.configure_filter = mt7925_configure_filter,
.bss_info_changed = mt7925_bss_info_changed,
.start_ap = mt7925_start_ap,
@@ -1450,4 +1470,5 @@ const struct ieee80211_ops mt7925_ops = {
EXPORT_SYMBOL_GPL(mt7925_ops);
MODULE_AUTHOR("Deren Wu <deren.wu@mediatek.com>");
+MODULE_DESCRIPTION("MediaTek MT7925 core driver");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
index c5fd7116929b..bd37cb8d734b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
@@ -656,6 +656,42 @@ int mt7925_mcu_fw_log_2_host(struct mt792x_dev *dev, u8 ctrl)
return ret;
}
+int mt7925_mcu_get_temperature(struct mt792x_phy *phy)
+{
+ struct {
+ u8 _rsv[4];
+
+ __le16 tag;
+ __le16 len;
+ u8 _rsv2[4];
+ } __packed req = {
+ .tag = cpu_to_le16(0x0),
+ .len = cpu_to_le16(sizeof(req) - 4),
+ };
+ struct mt7925_thermal_evt {
+ u8 rsv[4];
+ __le32 temperature;
+ } __packed * evt;
+ struct mt792x_dev *dev = phy->dev;
+ int temperature, ret;
+ struct sk_buff *skb;
+
+ ret = mt76_mcu_send_and_get_msg(&dev->mt76,
+ MCU_WM_UNI_CMD_QUERY(THERMAL),
+ &req, sizeof(req), true, &skb);
+ if (ret)
+ return ret;
+
+ skb_pull(skb, 4 + sizeof(struct tlv));
+ evt = (struct mt7925_thermal_evt *)skb->data;
+
+ temperature = le32_to_cpu(evt->temperature);
+
+ dev_kfree_skb(skb);
+
+ return temperature;
+}
+
static void
mt7925_mcu_parse_phy_cap(struct mt792x_dev *dev, char *data)
{
@@ -814,6 +850,7 @@ mt7925_mcu_sta_hdr_trans_tlv(struct sk_buff *skb,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta)
{
+ struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv;
struct sta_rec_hdr_trans *hdr_trans;
struct mt76_wcid *wcid;
struct tlv *tlv;
@@ -827,7 +864,11 @@ mt7925_mcu_sta_hdr_trans_tlv(struct sk_buff *skb,
else
hdr_trans->from_ds = true;
- wcid = (struct mt76_wcid *)sta->drv_priv;
+ if (sta)
+ wcid = (struct mt76_wcid *)sta->drv_priv;
+ else
+ wcid = &mvif->sta.wcid;
+
if (!wcid)
return;
@@ -895,7 +936,7 @@ int mt7925_mcu_set_tx(struct mt792x_dev *dev, struct ieee80211_vif *vif)
e = (struct edca *)tlv;
e->set = WMM_PARAM_SET;
- e->queue = ac + mvif->mt76.wmm_idx * MT76_CONNAC_MAX_WMM_SETS;
+ e->queue = ac;
e->aifs = q->aifs;
e->txop = cpu_to_le16(q->txop);
@@ -921,61 +962,67 @@ mt7925_mcu_sta_key_tlv(struct mt76_wcid *wcid,
struct ieee80211_key_conf *key,
enum set_key_cmd cmd)
{
+ struct mt792x_sta *msta = container_of(wcid, struct mt792x_sta, wcid);
struct sta_rec_sec_uni *sec;
+ struct mt792x_vif *mvif = msta->vif;
+ struct ieee80211_sta *sta;
+ struct ieee80211_vif *vif;
struct tlv *tlv;
- tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_KEY_V2, sizeof(*sec));
+ sta = msta == &mvif->sta ?
+ NULL :
+ container_of((void *)msta, struct ieee80211_sta, drv_priv);
+ vif = container_of((void *)mvif, struct ieee80211_vif, drv_priv);
+
+ tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_KEY_V3, sizeof(*sec));
sec = (struct sta_rec_sec_uni *)tlv;
- sec->add = cmd;
+ sec->bss_idx = mvif->mt76.idx;
+ sec->is_authenticator = 0;
+ sec->mgmt_prot = 0;
+ sec->wlan_idx = (u8)wcid->idx;
+
+ if (sta) {
+ sec->tx_key = 1;
+ sec->key_type = 1;
+ memcpy(sec->peer_addr, sta->addr, ETH_ALEN);
+ } else {
+ memcpy(sec->peer_addr, vif->bss_conf.bssid, ETH_ALEN);
+ }
if (cmd == SET_KEY) {
- struct sec_key_uni *sec_key;
u8 cipher;
- cipher = mt76_connac_mcu_get_cipher(key->cipher);
- if (cipher == MCU_CIPHER_NONE)
+ sec->add = 1;
+ cipher = mt7925_mcu_get_cipher(key->cipher);
+ if (cipher == CONNAC3_CIPHER_NONE)
return -EOPNOTSUPP;
- sec_key = &sec->key[0];
- sec_key->cipher_len = sizeof(*sec_key);
-
- if (cipher == MCU_CIPHER_BIP_CMAC_128) {
- sec_key->wlan_idx = cpu_to_le16(wcid->idx);
- sec_key->cipher_id = MCU_CIPHER_AES_CCMP;
- sec_key->key_id = sta_key_conf->keyidx;
- sec_key->key_len = 16;
- memcpy(sec_key->key, sta_key_conf->key, 16);
-
- sec_key = &sec->key[1];
- sec_key->wlan_idx = cpu_to_le16(wcid->idx);
- sec_key->cipher_id = MCU_CIPHER_BIP_CMAC_128;
- sec_key->cipher_len = sizeof(*sec_key);
- sec_key->key_len = 16;
- memcpy(sec_key->key, key->key, 16);
- sec->n_cipher = 2;
+ if (cipher == CONNAC3_CIPHER_BIP_CMAC_128) {
+ sec->cipher_id = CONNAC3_CIPHER_BIP_CMAC_128;
+ sec->key_id = sta_key_conf->keyidx;
+ sec->key_len = 32;
+ memcpy(sec->key, sta_key_conf->key, 16);
+ memcpy(sec->key + 16, key->key, 16);
} else {
- sec_key->wlan_idx = cpu_to_le16(wcid->idx);
- sec_key->cipher_id = cipher;
- sec_key->key_id = key->keyidx;
- sec_key->key_len = key->keylen;
- memcpy(sec_key->key, key->key, key->keylen);
+ sec->cipher_id = cipher;
+ sec->key_id = key->keyidx;
+ sec->key_len = key->keylen;
+ memcpy(sec->key, key->key, key->keylen);
- if (cipher == MCU_CIPHER_TKIP) {
+ if (cipher == CONNAC3_CIPHER_TKIP) {
/* Rx/Tx MIC keys are swapped */
- memcpy(sec_key->key + 16, key->key + 24, 8);
- memcpy(sec_key->key + 24, key->key + 16, 8);
+ memcpy(sec->key + 16, key->key + 24, 8);
+ memcpy(sec->key + 24, key->key + 16, 8);
}
/* store key_conf for BIP batch update */
- if (cipher == MCU_CIPHER_AES_CCMP) {
+ if (cipher == CONNAC3_CIPHER_AES_CCMP) {
memcpy(sta_key_conf->key, key->key, key->keylen);
sta_key_conf->keyidx = key->keyidx;
}
-
- sec->n_cipher = 1;
}
} else {
- sec->n_cipher = 0;
+ sec->add = 0;
}
return 0;
@@ -1460,12 +1507,10 @@ mt7925_mcu_sta_phy_tlv(struct sk_buff *skb,
struct tlv *tlv;
u8 af = 0, mm = 0;
- if (!sta->deflink.ht_cap.ht_supported && !sta->deflink.he_6ghz_capa.capa)
- return;
-
tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_PHY, sizeof(*phy));
phy = (struct sta_rec_phy *)tlv;
phy->phy_type = mt76_connac_get_phy_mode_v2(mvif->phy->mt76, vif, chandef->chan->band, sta);
+ phy->basic_rate = cpu_to_le16((u16)vif->bss_conf.basic_rates);
if (sta->deflink.ht_cap.ht_supported) {
af = sta->deflink.ht_cap.ampdu_factor;
mm = sta->deflink.ht_cap.ampdu_density;
@@ -1573,8 +1618,6 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy,
{
struct mt76_vif *mvif = (struct mt76_vif *)info->vif->drv_priv;
struct mt76_dev *dev = phy->dev;
- struct wtbl_req_hdr *wtbl_hdr;
- struct tlv *sta_wtbl;
struct sk_buff *skb;
skb = __mt76_connac_mcu_alloc_sta_req(dev, mvif, info->wcid,
@@ -1598,30 +1641,11 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy,
mt7925_mcu_sta_state_v2_tlv(phy, skb, info->sta,
info->vif, info->rcpi,
info->state);
- mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->sta);
mt7925_mcu_sta_mld_tlv(skb, info->vif, info->sta);
}
- sta_wtbl = mt76_connac_mcu_add_tlv(skb, STA_REC_WTBL,
- sizeof(struct tlv));
-
- wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(dev, info->wcid,
- WTBL_RESET_AND_SET,
- sta_wtbl, &skb);
- if (IS_ERR(wtbl_hdr))
- return PTR_ERR(wtbl_hdr);
-
- if (info->enable) {
- mt76_connac_mcu_wtbl_generic_tlv(dev, skb, info->vif,
- info->sta, sta_wtbl,
- wtbl_hdr);
- mt76_connac_mcu_wtbl_hdr_trans_tlv(skb, info->vif, info->wcid,
- sta_wtbl, wtbl_hdr);
- if (info->sta)
- mt76_connac_mcu_wtbl_ht_tlv(dev, skb, info->sta,
- sta_wtbl, wtbl_hdr,
- true, true);
- }
+ if (info->enable)
+ mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->sta);
return mt76_mcu_skb_send_msg(dev, skb, info->cmd, true);
}
@@ -2049,9 +2073,9 @@ mt7925_mcu_bss_basic_tlv(struct sk_buff *skb,
struct cfg80211_chan_def *chandef = ctx ? &ctx->def : &phy->chandef;
enum nl80211_band band = chandef->chan->band;
struct mt76_connac_bss_basic_tlv *basic_req;
- u8 idx, basic_phy;
struct tlv *tlv;
int conn_type;
+ u8 idx;
tlv = mt76_connac_mcu_add_tlv(skb, UNI_BSS_INFO_BASIC, sizeof(*basic_req));
basic_req = (struct mt76_connac_bss_basic_tlv *)tlv;
@@ -2062,8 +2086,10 @@ mt7925_mcu_bss_basic_tlv(struct sk_buff *skb,
basic_req->phymode_ext = mt7925_get_phy_mode_ext(phy, vif, band, sta);
- basic_phy = mt76_connac_get_phy_mode_v2(phy, vif, band, sta);
- basic_req->nonht_basic_phy = cpu_to_le16(basic_phy);
+ if (band == NL80211_BAND_2GHZ)
+ basic_req->nonht_basic_phy = cpu_to_le16(PHY_TYPE_ERP_INDEX);
+ else
+ basic_req->nonht_basic_phy = cpu_to_le16(PHY_TYPE_OFDM_INDEX);
memcpy(basic_req->bssid, vif->bss_conf.bssid, ETH_ALEN);
basic_req->phymode = mt76_connac_get_phy_mode(phy, vif, band, sta);
@@ -2122,21 +2148,21 @@ mt7925_mcu_bss_sec_tlv(struct sk_buff *skb, struct ieee80211_vif *vif)
sec = (struct bss_sec_tlv *)tlv;
switch (mvif->cipher) {
- case MCU_CIPHER_GCMP_256:
- case MCU_CIPHER_GCMP:
+ case CONNAC3_CIPHER_GCMP_256:
+ case CONNAC3_CIPHER_GCMP:
sec->mode = MODE_WPA3_SAE;
sec->status = 8;
break;
- case MCU_CIPHER_AES_CCMP:
+ case CONNAC3_CIPHER_AES_CCMP:
sec->mode = MODE_WPA2_PSK;
sec->status = 6;
break;
- case MCU_CIPHER_TKIP:
+ case CONNAC3_CIPHER_TKIP:
sec->mode = MODE_WPA2_PSK;
sec->status = 4;
break;
- case MCU_CIPHER_WEP104:
- case MCU_CIPHER_WEP40:
+ case CONNAC3_CIPHER_WEP104:
+ case CONNAC3_CIPHER_WEP40:
sec->mode = MODE_SHARED;
sec->status = 0;
break;
@@ -2167,6 +2193,11 @@ mt7925_mcu_bss_bmc_tlv(struct sk_buff *skb, struct mt792x_phy *phy,
bmc = (struct bss_rate_tlv *)tlv;
+ if (band == NL80211_BAND_2GHZ)
+ bmc->basic_rate = cpu_to_le16(HR_DSSS_ERP_BASIC_RATE);
+ else
+ bmc->basic_rate = cpu_to_le16(OFDM_BASIC_RATE);
+
bmc->short_preamble = (band == NL80211_BAND_2GHZ);
bmc->bc_fixed_rate = idx;
bmc->mc_fixed_rate = idx;
@@ -2249,6 +2280,38 @@ mt7925_mcu_bss_color_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
vif->bss_conf.he_bss_color.color : 0;
}
+static void
+mt7925_mcu_bss_ifs_tlv(struct sk_buff *skb, struct ieee80211_vif *vif)
+{
+ struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv;
+ struct mt792x_phy *phy = mvif->phy;
+ struct bss_ifs_time_tlv *ifs_time;
+ struct tlv *tlv;
+
+ tlv = mt76_connac_mcu_add_tlv(skb, UNI_BSS_INFO_IFS_TIME, sizeof(*ifs_time));
+ ifs_time = (struct bss_ifs_time_tlv *)tlv;
+ ifs_time->slot_valid = true;
+ ifs_time->slot_time = cpu_to_le16(phy->slottime);
+}
+
+int mt7925_mcu_set_timing(struct mt792x_phy *phy,
+ struct ieee80211_vif *vif)
+{
+ struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv;
+ struct mt792x_dev *dev = phy->dev;
+ struct sk_buff *skb;
+
+ skb = __mt7925_mcu_alloc_bss_req(&dev->mt76, &mvif->mt76,
+ MT7925_BSS_UPDATE_MAX_SIZE);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ mt7925_mcu_bss_ifs_tlv(skb, vif);
+
+ return mt76_mcu_skb_send_msg(&dev->mt76, skb,
+ MCU_UNI_CMD(BSS_INFO_UPDATE), true);
+}
+
int mt7925_mcu_add_bss_info(struct mt792x_phy *phy,
struct ieee80211_chanctx_conf *ctx,
struct ieee80211_vif *vif,
@@ -2273,6 +2336,7 @@ int mt7925_mcu_add_bss_info(struct mt792x_phy *phy,
mt7925_mcu_bss_bmc_tlv(skb, phy, ctx, vif, sta);
mt7925_mcu_bss_qos_tlv(skb, vif);
mt7925_mcu_bss_mld_tlv(skb, vif, sta);
+ mt7925_mcu_bss_ifs_tlv(skb, vif);
if (vif->bss_conf.he_support) {
mt7925_mcu_bss_he_tlv(skb, vif, phy);
@@ -2845,12 +2909,16 @@ int mt7925_mcu_fill_message(struct mt76_dev *mdev, struct sk_buff *skb,
if (cmd & __MCU_CMD_FIELD_UNI) {
uni_txd = (struct mt76_connac2_mcu_uni_txd *)txd;
uni_txd->len = cpu_to_le16(skb->len - sizeof(uni_txd->txd));
- uni_txd->option = MCU_CMD_UNI_EXT_ACK;
uni_txd->cid = cpu_to_le16(mcu_cmd);
uni_txd->s2d_index = MCU_S2D_H2N;
uni_txd->pkt_type = MCU_PKT_ID;
uni_txd->seq = seq;
+ if (cmd & __MCU_CMD_FIELD_QUERY)
+ uni_txd->option = MCU_CMD_UNI_QUERY_ACK;
+ else
+ uni_txd->option = MCU_CMD_UNI_EXT_ACK;
+
goto exit;
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h
index 3c41e21303b1..2a0bbfe7bfa5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h
@@ -159,6 +159,20 @@ enum {
UNI_EVENT_SCAN_DONE_NLO = 3,
};
+enum connac3_mcu_cipher_type {
+ CONNAC3_CIPHER_NONE = 0,
+ CONNAC3_CIPHER_WEP40 = 1,
+ CONNAC3_CIPHER_TKIP = 2,
+ CONNAC3_CIPHER_AES_CCMP = 4,
+ CONNAC3_CIPHER_WEP104 = 5,
+ CONNAC3_CIPHER_BIP_CMAC_128 = 6,
+ CONNAC3_CIPHER_WEP128 = 7,
+ CONNAC3_CIPHER_WAPI = 8,
+ CONNAC3_CIPHER_CCMP_256 = 10,
+ CONNAC3_CIPHER_GCMP = 11,
+ CONNAC3_CIPHER_GCMP_256 = 12,
+};
+
struct mt7925_mcu_scan_chinfo_event {
u8 nr_chan;
u8 alpha2[3];
@@ -208,7 +222,7 @@ struct scan_req_tlv {
__le16 channel_dwell_time; /* channel Dwell interval */
__le16 timeout_value;
__le16 probe_delay_time;
- u8 func_mask_ext;
+ __le32 func_mask_ext;
};
struct scan_ssid_tlv {
@@ -334,7 +348,8 @@ struct bss_req_hdr {
struct bss_rate_tlv {
__le16 tag;
__le16 len;
- u8 __rsv1[4];
+ u8 __rsv1[2];
+ __le16 basic_rate;
__le16 bc_trans;
__le16 mc_trans;
u8 short_preamble;
@@ -382,25 +397,22 @@ struct sta_rec_eht {
u8 _rsv2[3];
} __packed;
-struct sec_key_uni {
- __le16 wlan_idx;
- u8 mgmt_prot;
- u8 cipher_id;
- u8 cipher_len;
- u8 key_id;
- u8 key_len;
- u8 need_resp;
- u8 key[32];
-} __packed;
-
struct sta_rec_sec_uni {
__le16 tag;
__le16 len;
u8 add;
- u8 n_cipher;
- u8 rsv[2];
-
- struct sec_key_uni key[2];
+ u8 tx_key;
+ u8 key_type;
+ u8 is_authenticator;
+ u8 peer_addr[6];
+ u8 bss_idx;
+ u8 cipher_id;
+ u8 key_id;
+ u8 key_len;
+ u8 wlan_idx;
+ u8 mgmt_prot;
+ u8 key[32];
+ u8 key_rsc[16];
} __packed;
struct sta_rec_hdr_trans {
@@ -428,6 +440,22 @@ struct sta_rec_mld {
} __packed link[2];
} __packed;
+struct bss_ifs_time_tlv {
+ __le16 tag;
+ __le16 len;
+ u8 slot_valid;
+ u8 sifs_valid;
+ u8 rifs_valid;
+ u8 eifs_valid;
+ __le16 slot_time;
+ __le16 sifs_time;
+ __le16 rifs_time;
+ __le16 eifs_time;
+ u8 eifs_cck_valid;
+ u8 rsv;
+ __le16 eifs_cck_time;
+} __packed;
+
#define MT7925_STA_UPDATE_MAX_SIZE (sizeof(struct sta_req_hdr) + \
sizeof(struct sta_rec_basic) + \
sizeof(struct sta_rec_bf) + \
@@ -440,7 +468,7 @@ struct sta_rec_mld {
sizeof(struct sta_rec_bfee) + \
sizeof(struct sta_rec_phy) + \
sizeof(struct sta_rec_ra) + \
- sizeof(struct sta_rec_sec) + \
+ sizeof(struct sta_rec_sec_uni) + \
sizeof(struct sta_rec_ra_fixed) + \
sizeof(struct sta_rec_he_6g_capa) + \
sizeof(struct sta_rec_eht) + \
@@ -455,6 +483,7 @@ struct sta_rec_mld {
sizeof(struct bss_mld_tlv) + \
sizeof(struct bss_info_uni_he) + \
sizeof(struct bss_info_uni_bss_color) + \
+ sizeof(struct bss_ifs_time_tlv) + \
sizeof(struct tlv))
#define MT_CONNAC3_SKU_POWER_LIMIT 449
@@ -509,6 +538,33 @@ struct mt7925_wow_pattern_tlv {
u8 rsv[4];
} __packed;
+static inline enum connac3_mcu_cipher_type
+mt7925_mcu_get_cipher(int cipher)
+{
+ switch (cipher) {
+ case WLAN_CIPHER_SUITE_WEP40:
+ return CONNAC3_CIPHER_WEP40;
+ case WLAN_CIPHER_SUITE_WEP104:
+ return CONNAC3_CIPHER_WEP104;
+ case WLAN_CIPHER_SUITE_TKIP:
+ return CONNAC3_CIPHER_TKIP;
+ case WLAN_CIPHER_SUITE_AES_CMAC:
+ return CONNAC3_CIPHER_BIP_CMAC_128;
+ case WLAN_CIPHER_SUITE_CCMP:
+ return CONNAC3_CIPHER_AES_CCMP;
+ case WLAN_CIPHER_SUITE_CCMP_256:
+ return CONNAC3_CIPHER_CCMP_256;
+ case WLAN_CIPHER_SUITE_GCMP:
+ return CONNAC3_CIPHER_GCMP;
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ return CONNAC3_CIPHER_GCMP_256;
+ case WLAN_CIPHER_SUITE_SMS4:
+ return CONNAC3_CIPHER_WAPI;
+ default:
+ return CONNAC3_CIPHER_NONE;
+ }
+}
+
int mt7925_mcu_set_dbdc(struct mt76_phy *phy);
int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
struct ieee80211_scan_request *scan_req);
@@ -525,6 +581,8 @@ int mt7925_mcu_add_bss_info(struct mt792x_phy *phy,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
int enable);
+int mt7925_mcu_set_timing(struct mt792x_phy *phy,
+ struct ieee80211_vif *vif);
int mt7925_mcu_set_deep_sleep(struct mt792x_dev *dev, bool enable);
int mt7925_mcu_set_channel_domain(struct mt76_phy *phy);
int mt7925_mcu_set_radio_en(struct mt792x_phy *phy, bool enable);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h
index 33785f526acf..8a4a71f6bcb6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h
@@ -271,6 +271,7 @@ int mt7925_mcu_set_sniffer(struct mt792x_dev *dev, struct ieee80211_vif *vif,
bool enable);
int mt7925_mcu_config_sniffer(struct mt792x_vif *vif,
struct ieee80211_chanctx_conf *ctx);
+int mt7925_mcu_get_temperature(struct mt792x_phy *phy);
int mt7925_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
enum mt76_txq_id qid, struct mt76_wcid *wcid,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c
index 734f31ee40d3..07b74d492ce1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c
@@ -386,6 +386,8 @@ static int mt7925_pci_probe(struct pci_dev *pdev,
dev_info(mdev->dev, "ASIC revision: %04x\n", mdev->rev);
+ mt76_rmw_field(dev, MT_HW_EMI_CTL, MT_HW_EMI_CTL_SLPPROT_EN, 1);
+
ret = mt792x_wfsys_reset(dev);
if (ret)
goto err_free_dev;
@@ -425,6 +427,7 @@ static void mt7925_pci_remove(struct pci_dev *pdev)
struct mt792x_dev *dev = container_of(mdev, struct mt792x_dev, mt76);
mt7925e_unregister_device(dev);
+ set_bit(MT76_REMOVED, &mdev->phy.state);
devm_free_irq(&pdev->dev, pdev->irq, dev);
mt76_free_device(&dev->mt76);
pci_free_irq_vectors(pdev);
@@ -583,4 +586,5 @@ MODULE_FIRMWARE(MT7925_FIRMWARE_WM);
MODULE_FIRMWARE(MT7925_ROM_PATCH);
MODULE_AUTHOR("Deren Wu <deren.wu@mediatek.com>");
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT7925E (PCIe) wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/usb.c b/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
index 9b885c5b3ed5..1e0f094fc905 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
@@ -329,4 +329,5 @@ static struct usb_driver mt7925u_driver = {
module_usb_driver(mt7925u_driver);
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT7925U (USB) wireless driver");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x.h b/drivers/net/wireless/mediatek/mt76/mt792x.h
index 3c897b34aaa7..a8556de3d480 100644
--- a/drivers/net/wireless/mediatek/mt76/mt792x.h
+++ b/drivers/net/wireless/mediatek/mt76/mt792x.h
@@ -186,6 +186,8 @@ struct mt792x_dev {
bool hw_init_done:1;
bool fw_assert:1;
bool has_eht:1;
+ bool regd_in_progress:1;
+ wait_queue_head_t wait;
struct work_struct init_work;
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_acpi_sar.c b/drivers/net/wireless/mediatek/mt76/mt792x_acpi_sar.c
index e7afea87e82e..9317f8ff2070 100644
--- a/drivers/net/wireless/mediatek/mt76/mt792x_acpi_sar.c
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_acpi_sar.c
@@ -66,13 +66,15 @@ free:
}
/* MTCL : Country List Table for 6G band */
-static void
+static int
mt792x_asar_acpi_read_mtcl(struct mt792x_dev *dev, u8 **table, u8 *version)
{
- if (mt792x_acpi_read(dev, MT792x_ACPI_MTCL, table, NULL) < 0)
- *version = 1;
- else
- *version = 2;
+ int ret;
+
+ *version = ((ret = mt792x_acpi_read(dev, MT792x_ACPI_MTCL, table, NULL)) < 0)
+ ? 1 : 2;
+
+ return ret;
}
/* MTDS : Dynamic SAR Power Table */
@@ -166,16 +168,16 @@ int mt792x_init_acpi_sar(struct mt792x_dev *dev)
if (!asar)
return -ENOMEM;
- mt792x_asar_acpi_read_mtcl(dev, (u8 **)&asar->countrylist, &asar->ver);
+ ret = mt792x_asar_acpi_read_mtcl(dev, (u8 **)&asar->countrylist, &asar->ver);
+ if (ret) {
+ devm_kfree(dev->mt76.dev, asar->countrylist);
+ asar->countrylist = NULL;
+ }
- /* MTDS is mandatory. Return error if table is invalid */
ret = mt792x_asar_acpi_read_mtds(dev, (u8 **)&asar->dyn, asar->ver);
if (ret) {
devm_kfree(dev->mt76.dev, asar->dyn);
- devm_kfree(dev->mt76.dev, asar->countrylist);
- devm_kfree(dev->mt76.dev, asar);
-
- return ret;
+ asar->dyn = NULL;
}
/* MTGS is optional */
@@ -290,7 +292,7 @@ int mt792x_init_acpi_sar_power(struct mt792x_phy *phy, bool set_default)
const struct cfg80211_sar_capa *capa = phy->mt76->hw->wiphy->sar_capa;
int i;
- if (!phy->acpisar)
+ if (!phy->acpisar || !((struct mt792x_acpi_sar *)phy->acpisar)->dyn)
return 0;
/* When ACPI SAR enabled in HW, we should apply rules for .frp
@@ -353,11 +355,15 @@ static u8
mt792x_acpi_get_mtcl_map(int row, int column, struct mt792x_asar_cl *cl)
{
u8 config = 0;
+ u8 mode_6g, mode_5g9;
+
+ mode_6g = (cl->mode_6g > 0x02) ? 0 : cl->mode_6g;
+ mode_5g9 = (cl->mode_5g9 > 0x01) ? 0 : cl->mode_5g9;
- if (cl->cl6g[row] & BIT(column))
- config |= (cl->mode_6g & 0x3) << 2;
+ if ((cl->cl6g[row] & BIT(column)) || cl->mode_6g == 0x02)
+ config |= (mode_6g & 0x3) << 2;
if (cl->version > 1 && cl->cl5g9[row] & BIT(column))
- config |= (cl->mode_5g9 & 0x3);
+ config |= (mode_5g9 & 0x3);
return config;
}
@@ -374,7 +380,7 @@ u8 mt792x_acpi_get_mtcl_conf(struct mt792x_phy *phy, char *alpha2)
"AT", "BE", "BG", "CY", "CZ", "HR", "DK", "EE",
"FI", "FR", "DE", "GR", "HU", "IS", "IE", "IT",
"LV", "LI", "LT", "LU", "MT", "NL", "NO", "PL",
- "PT", "RO", "MT", "SK", "SI", "ES", "CH",
+ "PT", "RO", "SK", "SI", "ES", "SE", "CH",
};
struct mt792x_acpi_sar *sar = phy->acpisar;
struct mt792x_asar_cl *cl;
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c
index 502be22dbe36..a405af8d9052 100644
--- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c
@@ -354,6 +354,7 @@ static const char mt792x_gstrings_stats[][ETH_GSTRING_LEN] = {
"v_tx_bw_40",
"v_tx_bw_80",
"v_tx_bw_160",
+ "v_tx_bw_320",
"v_tx_mcs_0",
"v_tx_mcs_1",
"v_tx_mcs_2",
@@ -684,9 +685,10 @@ mt792x_get_mac80211_ops(struct device *dev,
if (!(*fw_features & MT792x_FW_CAP_CNM)) {
ops->remain_on_channel = NULL;
ops->cancel_remain_on_channel = NULL;
- ops->add_chanctx = NULL;
- ops->remove_chanctx = NULL;
- ops->change_chanctx = NULL;
+ ops->add_chanctx = ieee80211_emulate_add_chanctx;
+ ops->remove_chanctx = ieee80211_emulate_remove_chanctx;
+ ops->change_chanctx = ieee80211_emulate_change_chanctx;
+ ops->switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx;
ops->assign_vif_chanctx = NULL;
ops->unassign_vif_chanctx = NULL;
ops->mgd_prepare_tx = NULL;
@@ -862,5 +864,6 @@ int mt792x_load_firmware(struct mt792x_dev *dev)
}
EXPORT_SYMBOL_GPL(mt792x_load_firmware);
+MODULE_DESCRIPTION("MediaTek MT792x core driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_dma.c b/drivers/net/wireless/mediatek/mt76/mt792x_dma.c
index 488326ce5ed4..5cc2d59b774a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt792x_dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_dma.c
@@ -12,6 +12,8 @@ irqreturn_t mt792x_irq_handler(int irq, void *dev_instance)
{
struct mt792x_dev *dev = dev_instance;
+ if (test_bit(MT76_REMOVED, &dev->mt76.phy.state))
+ return IRQ_NONE;
mt76_wr(dev, dev->irq_map->host_irq_enable, 0);
if (!test_bit(MT76_STATE_INITIALIZED, &dev->mphy.state))
@@ -123,14 +125,13 @@ static void mt792x_dma_prefetch(struct mt792x_dev *dev)
int mt792x_dma_enable(struct mt792x_dev *dev)
{
- if (is_mt7925(&dev->mt76))
- mt76_rmw(dev, MT_UWFDMA0_GLO_CFG_EXT1, BIT(28), BIT(28));
-
/* configure perfetch settings */
mt792x_dma_prefetch(dev);
/* reset dma idx */
mt76_wr(dev, MT_WFDMA0_RST_DTX_PTR, ~0);
+ if (is_mt7925(&dev->mt76))
+ mt76_wr(dev, MT_WFDMA0_RST_DRX_PTR, ~0);
/* configure delay interrupt */
mt76_wr(dev, MT_WFDMA0_PRI_DLY_INT_CFG0, 0);
@@ -140,12 +141,20 @@ int mt792x_dma_enable(struct mt792x_dev *dev)
MT_WFDMA0_GLO_CFG_FIFO_LITTLE_ENDIAN |
MT_WFDMA0_GLO_CFG_CLK_GAT_DIS |
MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
+ FIELD_PREP(MT_WFDMA0_GLO_CFG_DMA_SIZE, 3) |
+ MT_WFDMA0_GLO_CFG_FIFO_DIS_CHECK |
+ MT_WFDMA0_GLO_CFG_RX_WB_DDONE |
MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
mt76_set(dev, MT_WFDMA0_GLO_CFG,
MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
+ if (is_mt7925(&dev->mt76)) {
+ mt76_rmw(dev, MT_UWFDMA0_GLO_CFG_EXT1, BIT(28), BIT(28));
+ mt76_set(dev, MT_WFDMA0_INT_RX_PRI, 0x0F00);
+ mt76_set(dev, MT_WFDMA0_INT_TX_PRI, 0x7F00);
+ }
mt76_set(dev, MT_WFDMA_DUMMY_CR, MT_WFDMA_NEED_REINIT);
/* enable interrupts for TX/RX rings */
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_regs.h b/drivers/net/wireless/mediatek/mt76/mt792x_regs.h
index a99af23e4b56..458cfd0260b1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt792x_regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_regs.h
@@ -292,9 +292,12 @@
#define MT_WFDMA0_GLO_CFG_TX_DMA_BUSY BIT(1)
#define MT_WFDMA0_GLO_CFG_RX_DMA_EN BIT(2)
#define MT_WFDMA0_GLO_CFG_RX_DMA_BUSY BIT(3)
+#define MT_WFDMA0_GLO_CFG_DMA_SIZE GENMASK(5, 4)
#define MT_WFDMA0_GLO_CFG_TX_WB_DDONE BIT(6)
#define MT_WFDMA0_GLO_CFG_FW_DWLD_BYPASS_DMASHDL BIT(9)
+#define MT_WFDMA0_GLO_CFG_FIFO_DIS_CHECK BIT(11)
#define MT_WFDMA0_GLO_CFG_FIFO_LITTLE_ENDIAN BIT(12)
+#define MT_WFDMA0_GLO_CFG_RX_WB_DDONE BIT(13)
#define MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN BIT(15)
#define MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2 BIT(21)
#define MT_WFDMA0_GLO_CFG_OMIT_RX_INFO BIT(27)
@@ -322,6 +325,8 @@
#define MT_WFDMA0_RST_DTX_PTR MT_WFDMA0(0x20c)
#define MT_WFDMA0_RST_DRX_PTR MT_WFDMA0(0x280)
+#define MT_WFDMA0_INT_RX_PRI MT_WFDMA0(0x298)
+#define MT_WFDMA0_INT_TX_PRI MT_WFDMA0(0x29c)
#define MT_WFDMA0_GLO_CFG_EXT0 MT_WFDMA0(0x2b0)
#define MT_WFDMA0_CSR_TX_DMASHDL_ENABLE BIT(6)
#define MT_WFDMA0_PRI_DLY_INT_CFG0 MT_WFDMA0(0x2f0)
@@ -389,6 +394,9 @@
#define MT_HW_CHIPID 0x70010200
#define MT_HW_REV 0x70010204
+#define MT_HW_EMI_CTL 0x18011100
+#define MT_HW_EMI_CTL_SLPPROT_EN BIT(1)
+
#define MT_PCIE_MAC_BASE 0x10000
#define MT_PCIE_MAC(ofs) (MT_PCIE_MAC_BASE + (ofs))
#define MT_PCIE_MAC_INT_ENABLE MT_PCIE_MAC(0x188)
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
index 2dd283caed36..b49668a4b784 100644
--- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
@@ -121,44 +121,25 @@ static void mt792xu_uhw_wr(struct mt76_dev *dev, u32 addr, u32 val)
static void mt792xu_dma_prefetch(struct mt792x_dev *dev)
{
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(0),
- MT_WPDMA0_MAX_CNT_MASK, 4);
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(0),
- MT_WPDMA0_BASE_PTR_MASK, 0x80);
-
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(1),
- MT_WPDMA0_MAX_CNT_MASK, 4);
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(1),
- MT_WPDMA0_BASE_PTR_MASK, 0xc0);
-
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(2),
- MT_WPDMA0_MAX_CNT_MASK, 4);
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(2),
- MT_WPDMA0_BASE_PTR_MASK, 0x100);
-
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(3),
- MT_WPDMA0_MAX_CNT_MASK, 4);
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(3),
- MT_WPDMA0_BASE_PTR_MASK, 0x140);
-
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(4),
- MT_WPDMA0_MAX_CNT_MASK, 4);
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(4),
- MT_WPDMA0_BASE_PTR_MASK, 0x180);
-
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(16),
- MT_WPDMA0_MAX_CNT_MASK, 4);
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(16),
- MT_WPDMA0_BASE_PTR_MASK, 0x280);
-
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(17),
- MT_WPDMA0_MAX_CNT_MASK, 4);
- mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL(17),
- MT_WPDMA0_BASE_PTR_MASK, 0x2c0);
+#define DMA_PREFETCH_CONF(_idx_, _cnt_, _base_) \
+ mt76_rmw(dev, MT_UWFDMA0_TX_RING_EXT_CTRL((_idx_)), \
+ MT_WPDMA0_MAX_CNT_MASK | MT_WPDMA0_BASE_PTR_MASK, \
+ FIELD_PREP(MT_WPDMA0_MAX_CNT_MASK, (_cnt_)) | \
+ FIELD_PREP(MT_WPDMA0_BASE_PTR_MASK, (_base_)))
+
+ DMA_PREFETCH_CONF(0, 4, 0x080);
+ DMA_PREFETCH_CONF(1, 4, 0x0c0);
+ DMA_PREFETCH_CONF(2, 4, 0x100);
+ DMA_PREFETCH_CONF(3, 4, 0x140);
+ DMA_PREFETCH_CONF(4, 4, 0x180);
+ DMA_PREFETCH_CONF(16, 4, 0x280);
+ DMA_PREFETCH_CONF(17, 4, 0x2c0);
}
static void mt792xu_wfdma_init(struct mt792x_dev *dev)
{
+ int i;
+
mt792xu_dma_prefetch(dev);
mt76_clear(dev, MT_UWFDMA0_GLO_CFG, MT_WFDMA0_GLO_CFG_OMIT_RX_INFO);
@@ -169,10 +150,27 @@ static void mt792xu_wfdma_init(struct mt792x_dev *dev)
MT_WFDMA0_GLO_CFG_TX_DMA_EN |
MT_WFDMA0_GLO_CFG_RX_DMA_EN);
- /* disable dmashdl */
- mt76_clear(dev, MT_UWFDMA0_GLO_CFG_EXT0,
- MT_WFDMA0_CSR_TX_DMASHDL_ENABLE);
- mt76_set(dev, MT_DMASHDL_SW_CONTROL, MT_DMASHDL_DMASHDL_BYPASS);
+ mt76_rmw(dev, MT_DMASHDL_REFILL, MT_DMASHDL_REFILL_MASK, 0xffe00000);
+ mt76_clear(dev, MT_DMASHDL_PAGE, MT_DMASHDL_GROUP_SEQ_ORDER);
+ mt76_rmw(dev, MT_DMASHDL_PKT_MAX_SIZE,
+ MT_DMASHDL_PKT_MAX_SIZE_PLE | MT_DMASHDL_PKT_MAX_SIZE_PSE,
+ FIELD_PREP(MT_DMASHDL_PKT_MAX_SIZE_PLE, 1) |
+ FIELD_PREP(MT_DMASHDL_PKT_MAX_SIZE_PSE, 0));
+ for (i = 0; i < 5; i++)
+ mt76_wr(dev, MT_DMASHDL_GROUP_QUOTA(i),
+ FIELD_PREP(MT_DMASHDL_GROUP_QUOTA_MIN, 0x3) |
+ FIELD_PREP(MT_DMASHDL_GROUP_QUOTA_MAX, 0xfff));
+ for (i = 5; i < 16; i++)
+ mt76_wr(dev, MT_DMASHDL_GROUP_QUOTA(i),
+ FIELD_PREP(MT_DMASHDL_GROUP_QUOTA_MIN, 0x0) |
+ FIELD_PREP(MT_DMASHDL_GROUP_QUOTA_MAX, 0x0));
+ mt76_wr(dev, MT_DMASHDL_Q_MAP(0), 0x32013201);
+ mt76_wr(dev, MT_DMASHDL_Q_MAP(1), 0x32013201);
+ mt76_wr(dev, MT_DMASHDL_Q_MAP(2), 0x55555444);
+ mt76_wr(dev, MT_DMASHDL_Q_MAP(3), 0x55555444);
+
+ mt76_wr(dev, MT_DMASHDL_SCHED_SET(0), 0x76540132);
+ mt76_wr(dev, MT_DMASHDL_SCHED_SET(1), 0xFEDCBA98);
mt76_set(dev, MT_WFDMA_DUMMY_CR, MT_WFDMA_NEED_REINIT);
}
@@ -314,5 +312,6 @@ void mt792xu_disconnect(struct usb_interface *usb_intf)
}
EXPORT_SYMBOL_GPL(mt792xu_disconnect);
+MODULE_DESCRIPTION("MediaTek MT792x USB helpers");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c
index 483ad81b6eec..73e633d0d700 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c
@@ -237,7 +237,8 @@ void mt7996_dma_start(struct mt7996_dev *dev, bool reset, bool wed_reset)
MT_WFDMA0_GLO_CFG_TX_DMA_EN |
MT_WFDMA0_GLO_CFG_RX_DMA_EN |
MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
- MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
+ MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2 |
+ MT_WFDMA0_GLO_CFG_EXT_EN);
if (dev->hif2)
mt76_set(dev, MT_WFDMA0_GLO_CFG + hif1_ofs,
@@ -694,7 +695,7 @@ void mt7996_dma_reset(struct mt7996_dev *dev, bool force)
mtk_wed_device_dma_reset(&dev->mt76.mmio.wed);
mt7996_dma_disable(dev, force);
- mt76_dma_wed_reset(&dev->mt76);
+ mt76_wed_dma_reset(&dev->mt76);
/* reset hw queues */
for (i = 0; i < __MT_TXQ_MAX; i++) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c
index 0cf0d1fe420a..283df84f1b43 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c
@@ -493,7 +493,7 @@ static void mt7996_mac_init_basic_rates(struct mt7996_dev *dev)
void mt7996_mac_init(struct mt7996_dev *dev)
{
-#define HIF_TXD_V2_1 4
+#define HIF_TXD_V2_1 0x21
int i;
mt76_clear(dev, MT_MDP_DCR2, MT_MDP_DCR2_RX_TRANS_SHORT);
@@ -507,11 +507,6 @@ void mt7996_mac_init(struct mt7996_dev *dev)
mt76_rmw_field(dev, i, MT_LED_GPIO_SEL_MASK, 4);
}
- /* txs report queue */
- mt76_rmw_field(dev, MT_DMA_TCRF1(0), MT_DMA_TCRF1_QIDX, 0);
- mt76_rmw_field(dev, MT_DMA_TCRF1(1), MT_DMA_TCRF1_QIDX, 6);
- mt76_rmw_field(dev, MT_DMA_TCRF1(2), MT_DMA_TCRF1_QIDX, 0);
-
/* rro module init */
if (is_mt7996(&dev->mt76))
mt7996_mcu_set_rro(dev, UNI_RRO_SET_PLATFORM_TYPE, 2);
@@ -1012,11 +1007,12 @@ mt7996_set_stream_he_txbf_caps(struct mt7996_phy *phy,
/* the maximum cap is 4 x 3, (Nr, Nc) = (3, 2) */
elem->phy_cap_info[7] |= min_t(int, sts - 1, 2) << 3;
- if (vif != NL80211_IFTYPE_AP)
+ if (!(vif == NL80211_IFTYPE_AP || vif == NL80211_IFTYPE_STATION))
return;
elem->phy_cap_info[3] |= IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER;
- elem->phy_cap_info[4] |= IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER;
+ if (vif == NL80211_IFTYPE_AP)
+ elem->phy_cap_info[4] |= IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER;
c = FIELD_PREP(IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK,
sts - 1) |
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
index 53258488d49f..0384fb059ddf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
@@ -732,6 +732,9 @@ mt7996_mac_write_txwi_8023(struct mt7996_dev *dev, __le32 *txwi,
FIELD_PREP(MT_TXD2_SUB_TYPE, fc_stype);
txwi[2] |= cpu_to_le32(val);
+
+ if (wcid->amsdu)
+ txwi[3] |= cpu_to_le32(MT_TXD3_HW_AMSDU);
}
static void
@@ -862,8 +865,6 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi,
val |= MT_TXD3_PROTECT_FRAME;
if (info->flags & IEEE80211_TX_CTL_NO_ACK)
val |= MT_TXD3_NO_ACK;
- if (wcid->amsdu)
- val |= MT_TXD3_HW_AMSDU;
txwi[3] = cpu_to_le32(val);
txwi[4] = 0;
@@ -1188,25 +1189,28 @@ mt7996_mac_add_txs_skb(struct mt7996_dev *dev, struct mt76_wcid *wcid,
struct ieee80211_tx_info *info;
struct sk_buff_head list;
struct rate_info rate = {};
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
bool cck = false;
u32 txrate, txs, mode, stbc;
txs = le32_to_cpu(txs_data[0]);
mt76_tx_status_lock(mdev, &list);
- skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
- if (skb) {
- info = IEEE80211_SKB_CB(skb);
- if (!(txs & MT_TXS0_ACK_ERROR_MASK))
- info->flags |= IEEE80211_TX_STAT_ACK;
+ /* only report MPDU TXS */
+ if (le32_get_bits(txs_data[0], MT_TXS0_TXS_FORMAT) == 0) {
+ skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
+ if (skb) {
+ info = IEEE80211_SKB_CB(skb);
+ if (!(txs & MT_TXS0_ACK_ERROR_MASK))
+ info->flags |= IEEE80211_TX_STAT_ACK;
- info->status.ampdu_len = 1;
- info->status.ampdu_ack_len =
- !!(info->flags & IEEE80211_TX_STAT_ACK);
+ info->status.ampdu_len = 1;
+ info->status.ampdu_ack_len =
+ !!(info->flags & IEEE80211_TX_STAT_ACK);
- info->status.rates[0].idx = -1;
+ info->status.rates[0].idx = -1;
+ }
}
if (mtk_wed_device_active(&dev->mt76.mmio.wed) && wcid->sta) {
@@ -2527,6 +2531,34 @@ static int mt7996_mac_check_twt_req(struct ieee80211_twt_setup *twt)
return 0;
}
+static bool
+mt7996_mac_twt_param_equal(struct mt7996_sta *msta,
+ struct ieee80211_twt_params *twt_agrt)
+{
+ u16 type = le16_to_cpu(twt_agrt->req_type);
+ u8 exp;
+ int i;
+
+ exp = FIELD_GET(IEEE80211_TWT_REQTYPE_WAKE_INT_EXP, type);
+ for (i = 0; i < MT7996_MAX_STA_TWT_AGRT; i++) {
+ struct mt7996_twt_flow *f;
+
+ if (!(msta->twt.flowid_mask & BIT(i)))
+ continue;
+
+ f = &msta->twt.flow[i];
+ if (f->duration == twt_agrt->min_twt_dur &&
+ f->mantissa == twt_agrt->mantissa &&
+ f->exp == exp &&
+ f->protection == !!(type & IEEE80211_TWT_REQTYPE_PROTECTION) &&
+ f->flowtype == !!(type & IEEE80211_TWT_REQTYPE_FLOWTYPE) &&
+ f->trigger == !!(type & IEEE80211_TWT_REQTYPE_TRIGGER))
+ return true;
+ }
+
+ return false;
+}
+
void mt7996_mac_add_twt_setup(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
struct ieee80211_twt_setup *twt)
@@ -2538,8 +2570,7 @@ void mt7996_mac_add_twt_setup(struct ieee80211_hw *hw,
enum ieee80211_twt_setup_cmd sta_setup_cmd;
struct mt7996_dev *dev = mt7996_hw_dev(hw);
struct mt7996_twt_flow *flow;
- int flowid, table_id;
- u8 exp;
+ u8 flowid, table_id, exp;
if (mt7996_mac_check_twt_req(twt))
goto out;
@@ -2552,9 +2583,19 @@ void mt7996_mac_add_twt_setup(struct ieee80211_hw *hw,
if (hweight8(msta->twt.flowid_mask) == ARRAY_SIZE(msta->twt.flow))
goto unlock;
+ if (twt_agrt->min_twt_dur < MT7996_MIN_TWT_DUR) {
+ setup_cmd = TWT_SETUP_CMD_DICTATE;
+ twt_agrt->min_twt_dur = MT7996_MIN_TWT_DUR;
+ goto unlock;
+ }
+
+ if (mt7996_mac_twt_param_equal(msta, twt_agrt))
+ goto unlock;
+
flowid = ffs(~msta->twt.flowid_mask) - 1;
- le16p_replace_bits(&twt_agrt->req_type, flowid,
- IEEE80211_TWT_REQTYPE_FLOWID);
+ twt_agrt->req_type &= ~cpu_to_le16(IEEE80211_TWT_REQTYPE_FLOWID);
+ twt_agrt->req_type |= le16_encode_bits(flowid,
+ IEEE80211_TWT_REQTYPE_FLOWID);
table_id = ffs(~dev->twt.table_mask) - 1;
exp = FIELD_GET(IEEE80211_TWT_REQTYPE_WAKE_INT_EXP, req_type);
@@ -2601,10 +2642,10 @@ void mt7996_mac_add_twt_setup(struct ieee80211_hw *hw,
unlock:
mutex_unlock(&dev->mt76.mutex);
out:
- le16p_replace_bits(&twt_agrt->req_type, setup_cmd,
- IEEE80211_TWT_REQTYPE_SETUP_CMD);
- twt->control = (twt->control & IEEE80211_TWT_CONTROL_WAKE_DUR_UNIT) |
- (twt->control & IEEE80211_TWT_CONTROL_RX_DISABLED);
+ twt_agrt->req_type &= ~cpu_to_le16(IEEE80211_TWT_REQTYPE_SETUP_CMD);
+ twt_agrt->req_type |=
+ le16_encode_bits(setup_cmd, IEEE80211_TWT_REQTYPE_SETUP_CMD);
+ twt->control = twt->control & IEEE80211_TWT_CONTROL_RX_DISABLED;
}
void mt7996_mac_twt_teardown_flow(struct mt7996_dev *dev,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
index 51deea84b642..f7da8d6dd903 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
@@ -350,9 +350,12 @@ static int mt7996_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
case WLAN_CIPHER_SUITE_GCMP:
case WLAN_CIPHER_SUITE_GCMP_256:
case WLAN_CIPHER_SUITE_SMS4:
+ break;
case WLAN_CIPHER_SUITE_BIP_GMAC_128:
case WLAN_CIPHER_SUITE_BIP_GMAC_256:
- break;
+ if (key->keyidx == 6 || key->keyidx == 7)
+ break;
+ fallthrough;
case WLAN_CIPHER_SUITE_WEP40:
case WLAN_CIPHER_SUITE_WEP104:
default:
@@ -1450,6 +1453,10 @@ mt7996_net_fill_forward_path(struct ieee80211_hw *hw,
#endif
const struct ieee80211_ops mt7996_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = mt7996_tx,
.start = mt7996_start,
.stop = mt7996_stop,
@@ -1495,6 +1502,6 @@ const struct ieee80211_ops mt7996_ops = {
.set_radar_background = mt7996_set_radar_background,
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
.net_fill_forward_path = mt7996_net_fill_forward_path,
- .net_setup_tc = mt76_net_setup_tc,
+ .net_setup_tc = mt76_wed_net_setup_tc,
#endif
};
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
index 3c729b563edc..b44abe2acc81 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
@@ -341,7 +341,7 @@ mt7996_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif)
if (!vif->bss_conf.csa_active || vif->type == NL80211_IFTYPE_STATION)
return;
- ieee80211_csa_finish(vif);
+ ieee80211_csa_finish(vif, 0);
}
static void
@@ -732,13 +732,10 @@ void mt7996_mcu_rx_event(struct mt7996_dev *dev, struct sk_buff *skb)
static struct tlv *
mt7996_mcu_add_uni_tlv(struct sk_buff *skb, u16 tag, u16 len)
{
- struct tlv *ptlv, tlv = {
- .tag = cpu_to_le16(tag),
- .len = cpu_to_le16(len),
- };
+ struct tlv *ptlv = skb_put(skb, len);
- ptlv = skb_put(skb, len);
- memcpy(ptlv, &tlv, sizeof(tlv));
+ ptlv->tag = cpu_to_le16(tag);
+ ptlv->len = cpu_to_le16(len);
return ptlv;
}
@@ -1240,6 +1237,9 @@ mt7996_mcu_sta_he_6g_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
static void
mt7996_mcu_sta_eht_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
{
+ struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv;
+ struct ieee80211_vif *vif = container_of((void *)msta->vif,
+ struct ieee80211_vif, drv_priv);
struct ieee80211_eht_mcs_nss_supp *mcs_map;
struct ieee80211_eht_cap_elem_fixed *elem;
struct sta_rec_eht *eht;
@@ -1259,8 +1259,17 @@ mt7996_mcu_sta_eht_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
eht->phy_cap = cpu_to_le64(*(u64 *)elem->phy_cap_info);
eht->phy_cap_ext = cpu_to_le64(elem->phy_cap_info[8]);
- if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_20)
- memcpy(eht->mcs_map_bw20, &mcs_map->only_20mhz, sizeof(eht->mcs_map_bw20));
+ if (vif->type != NL80211_IFTYPE_STATION &&
+ (sta->deflink.he_cap.he_cap_elem.phy_cap_info[0] &
+ (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G |
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)) == 0) {
+ memcpy(eht->mcs_map_bw20, &mcs_map->only_20mhz,
+ sizeof(eht->mcs_map_bw20));
+ return;
+ }
+
memcpy(eht->mcs_map_bw80, &mcs_map->bw._80, sizeof(eht->mcs_map_bw80));
memcpy(eht->mcs_map_bw160, &mcs_map->bw._160, sizeof(eht->mcs_map_bw160));
memcpy(eht->mcs_map_bw320, &mcs_map->bw._320, sizeof(eht->mcs_map_bw320));
@@ -2510,7 +2519,7 @@ int mt7996_mcu_add_beacon(struct ieee80211_hw *hw,
info = IEEE80211_SKB_CB(skb);
info->hw_queue |= FIELD_PREP(MT_TX_HW_QUEUE_PHY, phy->mt76->band_idx);
- len = sizeof(*bcn) + MT_TXD_SIZE + skb->len;
+ len = ALIGN(sizeof(*bcn) + MT_TXD_SIZE + skb->len, 4);
tlv = mt7996_mcu_add_uni_tlv(rskb, UNI_BSS_INFO_BCN_CONTENT, len);
bcn = (struct bss_bcn_content_tlv *)tlv;
bcn->enable = en;
@@ -2579,8 +2588,7 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev,
info->band = band;
info->hw_queue |= FIELD_PREP(MT_TX_HW_QUEUE_PHY, phy->mt76->band_idx);
- len = sizeof(*discov) + MT_TXD_SIZE + skb->len;
-
+ len = ALIGN(sizeof(*discov) + MT_TXD_SIZE + skb->len, 4);
tlv = mt7996_mcu_add_uni_tlv(rskb, UNI_BSS_INFO_OFFLOAD, len);
discov = (struct bss_inband_discovery_tlv *)tlv;
@@ -3539,7 +3547,7 @@ int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset)
u32 addr = le32_to_cpu(*(__le32 *)(skb->data + 12));
u8 *buf = (u8 *)dev->mt76.eeprom.data + addr;
- skb_pull(skb, 64);
+ skb_pull(skb, 48);
memcpy(buf, skb->data, MT7996_EEPROM_BLOCK_SIZE);
}
@@ -4477,7 +4485,8 @@ int mt7996_mcu_set_txpower_sku(struct mt7996_phy *phy)
skb_put_data(skb, &req, sizeof(req));
/* cck and ofdm */
- skb_put_data(skb, &la.cck, sizeof(la.cck) + sizeof(la.ofdm));
+ skb_put_data(skb, &la.cck, sizeof(la.cck));
+ skb_put_data(skb, &la.ofdm, sizeof(la.ofdm));
/* ht20 */
skb_put_data(skb, &la.mcs[0], 8);
/* ht40 */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h
index 36cacc495c75..43468bcaffc6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h
@@ -800,10 +800,10 @@ enum {
sizeof(struct sta_rec_hdr_trans) + \
sizeof(struct tlv))
-#define MT7996_MAX_BEACON_SIZE 1342
+#define MT7996_MAX_BEACON_SIZE 1338
#define MT7996_BEACON_UPDATE_SIZE (sizeof(struct bss_req_hdr) + \
sizeof(struct bss_bcn_content_tlv) + \
- MT_TXD_SIZE + \
+ 4 + MT_TXD_SIZE + \
sizeof(struct bss_bcn_cntdwn_tlv) + \
sizeof(struct bss_bcn_mbss_tlv))
#define MT7996_MAX_BSS_OFFLOAD_SIZE (MT7996_MAX_BEACON_SIZE + \
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c
index c50d89a445e9..304e5fd14803 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c
@@ -140,7 +140,6 @@ static u32 mt7996_reg_map_l1(struct mt7996_dev *dev, u32 addr)
u32 offset = FIELD_GET(MT_HIF_REMAP_L1_OFFSET, addr);
u32 base = FIELD_GET(MT_HIF_REMAP_L1_BASE, addr);
- dev->reg_l1_backup = dev->bus_ops->rr(&dev->mt76, MT_HIF_REMAP_L1);
dev->bus_ops->rmw(&dev->mt76, MT_HIF_REMAP_L1,
MT_HIF_REMAP_L1_MASK,
FIELD_PREP(MT_HIF_REMAP_L1_MASK, base));
@@ -155,7 +154,6 @@ static u32 mt7996_reg_map_l2(struct mt7996_dev *dev, u32 addr)
u32 offset = FIELD_GET(MT_HIF_REMAP_L2_OFFSET, addr);
u32 base = FIELD_GET(MT_HIF_REMAP_L2_BASE, addr);
- dev->reg_l2_backup = dev->bus_ops->rr(&dev->mt76, MT_HIF_REMAP_L2);
dev->bus_ops->rmw(&dev->mt76, MT_HIF_REMAP_L2,
MT_HIF_REMAP_L2_MASK,
FIELD_PREP(MT_HIF_REMAP_L2_MASK, base));
@@ -165,26 +163,10 @@ static u32 mt7996_reg_map_l2(struct mt7996_dev *dev, u32 addr)
return MT_HIF_REMAP_BASE_L2 + offset;
}
-static void mt7996_reg_remap_restore(struct mt7996_dev *dev)
-{
- /* remap to ori status */
- if (unlikely(dev->reg_l1_backup)) {
- dev->bus_ops->wr(&dev->mt76, MT_HIF_REMAP_L1, dev->reg_l1_backup);
- dev->reg_l1_backup = 0;
- }
-
- if (dev->reg_l2_backup) {
- dev->bus_ops->wr(&dev->mt76, MT_HIF_REMAP_L2, dev->reg_l2_backup);
- dev->reg_l2_backup = 0;
- }
-}
-
static u32 __mt7996_reg_addr(struct mt7996_dev *dev, u32 addr)
{
int i;
- mt7996_reg_remap_restore(dev);
-
if (addr < 0x100000)
return addr;
@@ -201,6 +183,11 @@ static u32 __mt7996_reg_addr(struct mt7996_dev *dev, u32 addr)
return dev->reg.map[i].mapped + ofs;
}
+ return 0;
+}
+
+static u32 __mt7996_reg_remap_addr(struct mt7996_dev *dev, u32 addr)
+{
if ((addr >= MT_INFRA_BASE && addr < MT_WFSYS0_PHY_START) ||
(addr >= MT_WFSYS0_PHY_START && addr < MT_WFSYS1_PHY_START) ||
(addr >= MT_WFSYS1_PHY_START && addr <= MT_WFSYS1_PHY_END))
@@ -225,28 +212,60 @@ void mt7996_memcpy_fromio(struct mt7996_dev *dev, void *buf, u32 offset,
{
u32 addr = __mt7996_reg_addr(dev, offset);
- memcpy_fromio(buf, dev->mt76.mmio.regs + addr, len);
+ if (addr) {
+ memcpy_fromio(buf, dev->mt76.mmio.regs + addr, len);
+ return;
+ }
+
+ spin_lock_bh(&dev->reg_lock);
+ memcpy_fromio(buf, dev->mt76.mmio.regs +
+ __mt7996_reg_remap_addr(dev, offset), len);
+ spin_unlock_bh(&dev->reg_lock);
}
static u32 mt7996_rr(struct mt76_dev *mdev, u32 offset)
{
struct mt7996_dev *dev = container_of(mdev, struct mt7996_dev, mt76);
+ u32 addr = __mt7996_reg_addr(dev, offset), val;
+
+ if (addr)
+ return dev->bus_ops->rr(mdev, addr);
- return dev->bus_ops->rr(mdev, __mt7996_reg_addr(dev, offset));
+ spin_lock_bh(&dev->reg_lock);
+ val = dev->bus_ops->rr(mdev, __mt7996_reg_remap_addr(dev, offset));
+ spin_unlock_bh(&dev->reg_lock);
+
+ return val;
}
static void mt7996_wr(struct mt76_dev *mdev, u32 offset, u32 val)
{
struct mt7996_dev *dev = container_of(mdev, struct mt7996_dev, mt76);
+ u32 addr = __mt7996_reg_addr(dev, offset);
- dev->bus_ops->wr(mdev, __mt7996_reg_addr(dev, offset), val);
+ if (addr) {
+ dev->bus_ops->wr(mdev, addr, val);
+ return;
+ }
+
+ spin_lock_bh(&dev->reg_lock);
+ dev->bus_ops->wr(mdev, __mt7996_reg_remap_addr(dev, offset), val);
+ spin_unlock_bh(&dev->reg_lock);
}
static u32 mt7996_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val)
{
struct mt7996_dev *dev = container_of(mdev, struct mt7996_dev, mt76);
+ u32 addr = __mt7996_reg_addr(dev, offset);
+
+ if (addr)
+ return dev->bus_ops->rmw(mdev, addr, mask, val);
+
+ spin_lock_bh(&dev->reg_lock);
+ val = dev->bus_ops->rmw(mdev, __mt7996_reg_remap_addr(dev, offset), mask, val);
+ spin_unlock_bh(&dev->reg_lock);
- return dev->bus_ops->rmw(mdev, __mt7996_reg_addr(dev, offset), mask, val);
+ return val;
}
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
@@ -391,13 +410,13 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr,
wed->wlan.amsdu_max_len = 1536;
wed->wlan.init_buf = mt7996_wed_init_buf;
- wed->wlan.init_rx_buf = mt76_mmio_wed_init_rx_buf;
- wed->wlan.release_rx_buf = mt76_mmio_wed_release_rx_buf;
- wed->wlan.offload_enable = mt76_mmio_wed_offload_enable;
- wed->wlan.offload_disable = mt76_mmio_wed_offload_disable;
+ wed->wlan.init_rx_buf = mt76_wed_init_rx_buf;
+ wed->wlan.release_rx_buf = mt76_wed_release_rx_buf;
+ wed->wlan.offload_enable = mt76_wed_offload_enable;
+ wed->wlan.offload_disable = mt76_wed_offload_disable;
if (!hif2) {
wed->wlan.reset = mt7996_mmio_wed_reset;
- wed->wlan.reset_complete = mt76_mmio_wed_reset_complete;
+ wed->wlan.reset_complete = mt76_wed_reset_complete;
}
if (mtk_wed_device_attach(wed))
@@ -421,6 +440,7 @@ static int mt7996_mmio_init(struct mt76_dev *mdev,
dev = container_of(mdev, struct mt7996_dev, mt76);
mt76_mmio_init(&dev->mt76, mem_base);
+ spin_lock_init(&dev->reg_lock);
switch (device_id) {
case 0x7990:
@@ -650,4 +670,5 @@ static void __exit mt7996_exit(void)
module_init(mt7996_init);
module_exit(mt7996_exit);
+MODULE_DESCRIPTION("MediaTek MT7996 MMIO helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h
index bc73bcb47bf0..36d1f247d55a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h
@@ -53,6 +53,7 @@
#define MT7996_MAX_TWT_AGRT 16
#define MT7996_MAX_STA_TWT_AGRT 8
+#define MT7996_MIN_TWT_DUR 64
#define MT7996_MAX_QUEUE (__MT_RXQ_MAX + __MT_MCUQ_MAX + 3)
/* NOTE: used to map mt76_rates. idx may change if firmware expands table */
@@ -320,12 +321,11 @@ struct mt7996_dev {
struct rchan *relay_fwlog;
struct {
- u8 table_mask;
+ u16 table_mask;
u8 n_agrt;
} twt;
- u32 reg_l1_backup;
- u32 reg_l2_backup;
+ spinlock_t reg_lock;
u8 wtbl_size_group;
};
diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c
index c52d550f0c32..3e88798df017 100644
--- a/drivers/net/wireless/mediatek/mt76/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/sdio.c
@@ -672,4 +672,5 @@ EXPORT_SYMBOL_GPL(mt76s_init);
MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT76x SDIO helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index 1584665fe3cb..342c3aea549d 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -767,7 +767,7 @@ static void mt76u_status_worker(struct mt76_worker *w)
if (!test_bit(MT76_STATE_RUNNING, &dev->phy.state))
return;
- for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+ for (i = 0; i <= MT_TXQ_PSD; i++) {
q = dev->phy.q_tx[i];
if (!q)
continue;
@@ -872,9 +872,8 @@ mt76u_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
if (err < 0)
return err;
- mt76u_fill_bulk_urb(dev, USB_DIR_OUT, q2ep(q->hw_idx),
- q->entry[idx].urb, mt76u_complete_tx,
- &q->entry[idx]);
+ mt76u_fill_bulk_urb(dev, USB_DIR_OUT, q->ep, q->entry[idx].urb,
+ mt76u_complete_tx, &q->entry[idx]);
q->head = (q->head + 1) % q->ndesc;
q->entry[idx].skb = tx_info.skb;
@@ -906,9 +905,13 @@ static void mt76u_tx_kick(struct mt76_dev *dev, struct mt76_queue *q)
}
}
-static u8 mt76u_ac_to_hwq(struct mt76_dev *dev, u8 ac)
+static void
+mt76u_ac_to_hwq(struct mt76_dev *dev, struct mt76_queue *q, u8 qid)
{
- if (mt76_chip(dev) == 0x7663) {
+ u8 ac = qid < IEEE80211_NUM_ACS ? qid : IEEE80211_AC_BE;
+
+ switch (mt76_chip(dev)) {
+ case 0x7663: {
static const u8 lmac_queue_map[] = {
/* ac to lmac mapping */
[IEEE80211_AC_BK] = 0,
@@ -917,33 +920,36 @@ static u8 mt76u_ac_to_hwq(struct mt76_dev *dev, u8 ac)
[IEEE80211_AC_VO] = 4,
};
- if (WARN_ON(ac >= ARRAY_SIZE(lmac_queue_map)))
- return 1; /* BE */
-
- return lmac_queue_map[ac];
+ q->hw_idx = lmac_queue_map[ac];
+ q->ep = q->hw_idx + 1;
+ break;
+ }
+ case 0x7961:
+ case 0x7925:
+ q->hw_idx = mt76_ac_to_hwq(ac);
+ q->ep = qid == MT_TXQ_PSD ? MT_EP_OUT_HCCA : q->hw_idx + 1;
+ break;
+ default:
+ q->hw_idx = mt76_ac_to_hwq(ac);
+ q->ep = q->hw_idx + 1;
+ break;
}
-
- return mt76_ac_to_hwq(ac);
}
static int mt76u_alloc_tx(struct mt76_dev *dev)
{
- struct mt76_queue *q;
- int i, j, err;
+ int i;
for (i = 0; i <= MT_TXQ_PSD; i++) {
- if (i >= IEEE80211_NUM_ACS) {
- dev->phy.q_tx[i] = dev->phy.q_tx[0];
- continue;
- }
+ struct mt76_queue *q;
+ int j, err;
q = devm_kzalloc(dev->dev, sizeof(*q), GFP_KERNEL);
if (!q)
return -ENOMEM;
spin_lock_init(&q->lock);
- q->hw_idx = mt76u_ac_to_hwq(dev, i);
-
+ mt76u_ac_to_hwq(dev, q, i);
dev->phy.q_tx[i] = q;
q->entry = devm_kcalloc(dev->dev,
@@ -969,7 +975,7 @@ static void mt76u_free_tx(struct mt76_dev *dev)
mt76_worker_teardown(&dev->usb.status_worker);
- for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+ for (i = 0; i <= MT_TXQ_PSD; i++) {
struct mt76_queue *q;
int j;
@@ -999,7 +1005,7 @@ void mt76u_stop_tx(struct mt76_dev *dev)
dev_err(dev->dev, "timed out waiting for pending tx\n");
- for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+ for (i = 0; i <= MT_TXQ_PSD; i++) {
q = dev->phy.q_tx[i];
if (!q)
continue;
@@ -1013,7 +1019,7 @@ void mt76u_stop_tx(struct mt76_dev *dev)
/* On device removal we maight queue skb's, but mt76u_tx_kick()
* will fail to submit urb, cleanup those skb's manually.
*/
- for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+ for (i = 0; i <= MT_TXQ_PSD; i++) {
q = dev->phy.q_tx[i];
if (!q)
continue;
@@ -1128,4 +1134,5 @@ int mt76u_init(struct mt76_dev *dev, struct usb_interface *intf)
EXPORT_SYMBOL_GPL(mt76u_init);
MODULE_AUTHOR("Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>");
+MODULE_DESCRIPTION("MediaTek MT76x USB helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c
index fc76c66ff1a5..d6c01a2dd198 100644
--- a/drivers/net/wireless/mediatek/mt76/util.c
+++ b/drivers/net/wireless/mediatek/mt76/util.c
@@ -138,4 +138,5 @@ int __mt76_worker_fn(void *ptr)
}
EXPORT_SYMBOL_GPL(__mt76_worker_fn);
+MODULE_DESCRIPTION("MediaTek MT76x helpers");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/wed.c b/drivers/net/wireless/mediatek/mt76/wed.c
new file mode 100644
index 000000000000..f89e4537555c
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/wed.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: ISC
+/*
+ * Copyright (C) 2023 Lorenzo Bianconi <lorenzo@kernel.org>
+ */
+
+#include "mt76.h"
+#include "dma.h"
+
+void mt76_wed_release_rx_buf(struct mtk_wed_device *wed)
+{
+ struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed);
+ int i;
+
+ for (i = 0; i < dev->rx_token_size; i++) {
+ struct mt76_txwi_cache *t;
+
+ t = mt76_rx_token_release(dev, i);
+ if (!t || !t->ptr)
+ continue;
+
+ mt76_put_page_pool_buf(t->ptr, false);
+ t->ptr = NULL;
+
+ mt76_put_rxwi(dev, t);
+ }
+
+ mt76_free_pending_rxwi(dev);
+}
+EXPORT_SYMBOL_GPL(mt76_wed_release_rx_buf);
+
+#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+u32 mt76_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
+{
+ struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed);
+ struct mtk_wed_bm_desc *desc = wed->rx_buf_ring.desc;
+ struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN];
+ int i, len = SKB_WITH_OVERHEAD(q->buf_size);
+ struct mt76_txwi_cache *t = NULL;
+
+ for (i = 0; i < size; i++) {
+ enum dma_data_direction dir;
+ dma_addr_t addr;
+ u32 offset;
+ int token;
+ void *buf;
+
+ t = mt76_get_rxwi(dev);
+ if (!t)
+ goto unmap;
+
+ buf = mt76_get_page_pool_buf(q, &offset, q->buf_size);
+ if (!buf)
+ goto unmap;
+
+ addr = page_pool_get_dma_addr(virt_to_head_page(buf)) + offset;
+ dir = page_pool_get_dma_dir(q->page_pool);
+ dma_sync_single_for_device(dev->dma_dev, addr, len, dir);
+
+ desc->buf0 = cpu_to_le32(addr);
+ token = mt76_rx_token_consume(dev, buf, t, addr);
+ if (token < 0) {
+ mt76_put_page_pool_buf(buf, false);
+ goto unmap;
+ }
+
+ token = FIELD_PREP(MT_DMA_CTL_TOKEN, token);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ token |= FIELD_PREP(MT_DMA_CTL_SDP0_H, addr >> 32);
+#endif
+ desc->token |= cpu_to_le32(token);
+ desc++;
+ }
+
+ return 0;
+
+unmap:
+ if (t)
+ mt76_put_rxwi(dev, t);
+ mt76_wed_release_rx_buf(wed);
+
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(mt76_wed_init_rx_buf);
+
+int mt76_wed_offload_enable(struct mtk_wed_device *wed)
+{
+ struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed);
+
+ spin_lock_bh(&dev->token_lock);
+ dev->token_size = wed->wlan.token_start;
+ spin_unlock_bh(&dev->token_lock);
+
+ return !wait_event_timeout(dev->tx_wait, !dev->wed_token_count, HZ);
+}
+EXPORT_SYMBOL_GPL(mt76_wed_offload_enable);
+
+int mt76_wed_dma_setup(struct mt76_dev *dev, struct mt76_queue *q, bool reset)
+{
+ int ret = 0, type, ring;
+ u16 flags;
+
+ if (!q || !q->ndesc)
+ return -EINVAL;
+
+ flags = q->flags;
+ if (!q->wed || !mtk_wed_device_active(q->wed))
+ q->flags &= ~MT_QFLAG_WED;
+
+ if (!(q->flags & MT_QFLAG_WED))
+ return 0;
+
+ type = FIELD_GET(MT_QFLAG_WED_TYPE, q->flags);
+ ring = FIELD_GET(MT_QFLAG_WED_RING, q->flags);
+
+ switch (type) {
+ case MT76_WED_Q_TX:
+ ret = mtk_wed_device_tx_ring_setup(q->wed, ring, q->regs,
+ reset);
+ if (!ret)
+ q->wed_regs = q->wed->tx_ring[ring].reg_base;
+ break;
+ case MT76_WED_Q_TXFREE:
+ /* WED txfree queue needs ring to be initialized before setup */
+ q->flags = 0;
+ mt76_dma_queue_reset(dev, q);
+ mt76_dma_rx_fill(dev, q, false);
+
+ ret = mtk_wed_device_txfree_ring_setup(q->wed, q->regs);
+ if (!ret)
+ q->wed_regs = q->wed->txfree_ring.reg_base;
+ break;
+ case MT76_WED_Q_RX:
+ ret = mtk_wed_device_rx_ring_setup(q->wed, ring, q->regs,
+ reset);
+ if (!ret)
+ q->wed_regs = q->wed->rx_ring[ring].reg_base;
+ break;
+ case MT76_WED_RRO_Q_DATA:
+ q->flags &= ~MT_QFLAG_WED;
+ __mt76_dma_queue_reset(dev, q, false);
+ mtk_wed_device_rro_rx_ring_setup(q->wed, ring, q->regs);
+ q->head = q->ndesc - 1;
+ q->queued = q->head;
+ break;
+ case MT76_WED_RRO_Q_MSDU_PG:
+ q->flags &= ~MT_QFLAG_WED;
+ __mt76_dma_queue_reset(dev, q, false);
+ mtk_wed_device_msdu_pg_rx_ring_setup(q->wed, ring, q->regs);
+ q->head = q->ndesc - 1;
+ q->queued = q->head;
+ break;
+ case MT76_WED_RRO_Q_IND:
+ q->flags &= ~MT_QFLAG_WED;
+ mt76_dma_queue_reset(dev, q);
+ mt76_dma_rx_fill(dev, q, false);
+ mtk_wed_device_ind_rx_ring_setup(q->wed, q->regs);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ q->flags = flags;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mt76_wed_dma_setup);
+#endif /*CONFIG_NET_MEDIATEK_SOC_WED */
+
+void mt76_wed_offload_disable(struct mtk_wed_device *wed)
+{
+ struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed);
+
+ spin_lock_bh(&dev->token_lock);
+ dev->token_size = dev->drv->token_size;
+ spin_unlock_bh(&dev->token_lock);
+}
+EXPORT_SYMBOL_GPL(mt76_wed_offload_disable);
+
+void mt76_wed_reset_complete(struct mtk_wed_device *wed)
+{
+ struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed);
+
+ complete(&dev->mmio.wed_reset_complete);
+}
+EXPORT_SYMBOL_GPL(mt76_wed_reset_complete);
+
+int mt76_wed_net_setup_tc(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct net_device *netdev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct mt76_phy *phy = hw->priv;
+ struct mtk_wed_device *wed = &phy->dev->mmio.wed;
+
+ if (!mtk_wed_device_active(wed))
+ return -EOPNOTSUPP;
+
+ return mtk_wed_device_setup_tc(wed, netdev, type, type_data);
+}
+EXPORT_SYMBOL_GPL(mt76_wed_net_setup_tc);
+
+void mt76_wed_dma_reset(struct mt76_dev *dev)
+{
+ struct mt76_mmio *mmio = &dev->mmio;
+
+ if (!test_bit(MT76_STATE_WED_RESET, &dev->phy.state))
+ return;
+
+ complete(&mmio->wed_reset);
+
+ if (!wait_for_completion_timeout(&mmio->wed_reset_complete, 3 * HZ))
+ dev_err(dev->dev, "wed reset complete timeout\n");
+}
+EXPORT_SYMBOL_GPL(mt76_wed_dma_reset);
diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c
index c8d332456a6b..a7330576486b 100644
--- a/drivers/net/wireless/mediatek/mt7601u/main.c
+++ b/drivers/net/wireless/mediatek/mt7601u/main.c
@@ -405,6 +405,10 @@ out:
}
const struct ieee80211_ops mt7601u_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = mt7601u_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = mt7601u_start,
diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
index ad2509d8c99a..089102ed9ae5 100644
--- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c
+++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
@@ -356,7 +356,7 @@ static int connect(struct wiphy *wiphy, struct net_device *dev,
memcpy(vif->auth.ssid.ssid, sme->ssid, sme->ssid_len);
vif->auth.ssid.ssid_len = sme->ssid_len;
}
- vif->auth.key_mgmt_suite = cpu_to_be32(sme->crypto.akm_suites[0]);
+ vif->auth.key_mgmt_suite = sme->crypto.akm_suites[0];
ether_addr_copy(vif->auth.bssid, sme->bssid);
break;
@@ -1518,7 +1518,7 @@ static struct wilc_vif *wilc_get_vif_from_type(struct wilc *wl, int type)
{
struct wilc_vif *vif;
- list_for_each_entry_rcu(vif, &wl->vif_list, list) {
+ wilc_for_each_vif(wl, vif) {
if (vif->iftype == type)
return vif;
}
@@ -1609,7 +1609,6 @@ static int del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev)
cfg80211_unregister_netdevice(vif->ndev);
vif->monitor_flag = 0;
- wilc_set_operation_mode(vif, 0, 0, 0);
mutex_lock(&wl->vif_mutex);
list_del_rcu(&vif->list);
wl->vif_num--;
@@ -1804,15 +1803,24 @@ int wilc_cfg80211_init(struct wilc **wilc, struct device *dev, int io_type,
INIT_LIST_HEAD(&wl->rxq_head.list);
INIT_LIST_HEAD(&wl->vif_list);
+ wl->hif_workqueue = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM,
+ wiphy_name(wl->wiphy));
+ if (!wl->hif_workqueue) {
+ ret = -ENOMEM;
+ goto free_cfg;
+ }
vif = wilc_netdev_ifc_init(wl, "wlan%d", WILC_STATION_MODE,
NL80211_IFTYPE_STATION, false);
if (IS_ERR(vif)) {
ret = PTR_ERR(vif);
- goto free_cfg;
+ goto free_hq;
}
return 0;
+free_hq:
+ destroy_workqueue(wl->hif_workqueue);
+
free_cfg:
wilc_wlan_cfg_deinit(wl);
diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c
index 839f142663e8..f1085ccb7eed 100644
--- a/drivers/net/wireless/microchip/wilc1000/hif.c
+++ b/drivers/net/wireless/microchip/wilc1000/hif.c
@@ -107,7 +107,7 @@ static struct wilc_vif *wilc_get_vif_from_idx(struct wilc *wilc, int idx)
if (index < 0 || index >= WILC_NUM_CONCURRENT_IFC)
return NULL;
- list_for_each_entry_rcu(vif, &wilc->vif_list, list) {
+ wilc_for_each_vif(wilc, vif) {
if (vif->idx == index)
return vif;
}
@@ -377,38 +377,49 @@ struct wilc_join_bss_param *
wilc_parse_join_bss_param(struct cfg80211_bss *bss,
struct cfg80211_crypto_settings *crypto)
{
- struct wilc_join_bss_param *param;
- struct ieee80211_p2p_noa_attr noa_attr;
- u8 rates_len = 0;
- const u8 *tim_elm, *ssid_elm, *rates_ie, *supp_rates_ie;
+ const u8 *ies_data, *tim_elm, *ssid_elm, *rates_ie, *supp_rates_ie;
const u8 *ht_ie, *wpa_ie, *wmm_ie, *rsn_ie;
+ struct ieee80211_p2p_noa_attr noa_attr;
+ const struct cfg80211_bss_ies *ies;
+ struct wilc_join_bss_param *param;
+ u8 rates_len = 0, ies_len;
int ret;
- const struct cfg80211_bss_ies *ies = rcu_dereference(bss->ies);
param = kzalloc(sizeof(*param), GFP_KERNEL);
if (!param)
return NULL;
+ rcu_read_lock();
+ ies = rcu_dereference(bss->ies);
+ ies_data = kmemdup(ies->data, ies->len, GFP_ATOMIC);
+ if (!ies_data) {
+ rcu_read_unlock();
+ kfree(param);
+ return NULL;
+ }
+ ies_len = ies->len;
+ rcu_read_unlock();
+
param->beacon_period = cpu_to_le16(bss->beacon_interval);
param->cap_info = cpu_to_le16(bss->capability);
param->bss_type = WILC_FW_BSS_TYPE_INFRA;
param->ch = ieee80211_frequency_to_channel(bss->channel->center_freq);
ether_addr_copy(param->bssid, bss->bssid);
- ssid_elm = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len);
+ ssid_elm = cfg80211_find_ie(WLAN_EID_SSID, ies_data, ies_len);
if (ssid_elm) {
if (ssid_elm[1] <= IEEE80211_MAX_SSID_LEN)
memcpy(param->ssid, ssid_elm + 2, ssid_elm[1]);
}
- tim_elm = cfg80211_find_ie(WLAN_EID_TIM, ies->data, ies->len);
+ tim_elm = cfg80211_find_ie(WLAN_EID_TIM, ies_data, ies_len);
if (tim_elm && tim_elm[1] >= 2)
param->dtim_period = tim_elm[3];
memset(param->p_suites, 0xFF, 3);
memset(param->akm_suites, 0xFF, 3);
- rates_ie = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies->data, ies->len);
+ rates_ie = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies_data, ies_len);
if (rates_ie) {
rates_len = rates_ie[1];
if (rates_len > WILC_MAX_RATES_SUPPORTED)
@@ -419,7 +430,7 @@ wilc_parse_join_bss_param(struct cfg80211_bss *bss,
if (rates_len < WILC_MAX_RATES_SUPPORTED) {
supp_rates_ie = cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES,
- ies->data, ies->len);
+ ies_data, ies_len);
if (supp_rates_ie) {
u8 ext_rates = supp_rates_ie[1];
@@ -434,11 +445,11 @@ wilc_parse_join_bss_param(struct cfg80211_bss *bss,
}
}
- ht_ie = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies->data, ies->len);
+ ht_ie = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies_data, ies_len);
if (ht_ie)
param->ht_capable = true;
- ret = cfg80211_get_p2p_attr(ies->data, ies->len,
+ ret = cfg80211_get_p2p_attr(ies_data, ies_len,
IEEE80211_P2P_ATTR_ABSENCE_NOTICE,
(u8 *)&noa_attr, sizeof(noa_attr));
if (ret > 0) {
@@ -462,7 +473,7 @@ wilc_parse_join_bss_param(struct cfg80211_bss *bss,
}
wmm_ie = cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT,
WLAN_OUI_TYPE_MICROSOFT_WMM,
- ies->data, ies->len);
+ ies_data, ies_len);
if (wmm_ie) {
struct ieee80211_wmm_param_ie *ie;
@@ -477,13 +488,13 @@ wilc_parse_join_bss_param(struct cfg80211_bss *bss,
wpa_ie = cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT,
WLAN_OUI_TYPE_MICROSOFT_WPA,
- ies->data, ies->len);
+ ies_data, ies_len);
if (wpa_ie) {
param->mode_802_11i = 1;
param->rsn_found = true;
}
- rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len);
+ rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies_data, ies_len);
if (rsn_ie) {
int rsn_ie_len = sizeof(struct element) + rsn_ie[1];
int offset = 8;
@@ -517,6 +528,7 @@ wilc_parse_join_bss_param(struct cfg80211_bss *bss,
param->akm_suites[i] = crypto->akm_suites[i] & 0xFF;
}
+ kfree(ies_data);
return (void *)param;
}
@@ -1555,26 +1567,28 @@ int wilc_deinit(struct wilc_vif *vif)
void wilc_network_info_received(struct wilc *wilc, u8 *buffer, u32 length)
{
- int result;
- struct host_if_msg *msg;
- int id;
struct host_if_drv *hif_drv;
+ struct host_if_msg *msg;
struct wilc_vif *vif;
+ int srcu_idx;
+ int result;
+ int id;
id = get_unaligned_le32(&buffer[length - 4]);
+ srcu_idx = srcu_read_lock(&wilc->srcu);
vif = wilc_get_vif_from_idx(wilc, id);
if (!vif)
- return;
- hif_drv = vif->hif_drv;
+ goto out;
+ hif_drv = vif->hif_drv;
if (!hif_drv) {
netdev_err(vif->ndev, "driver not init[%p]\n", hif_drv);
- return;
+ goto out;
}
msg = wilc_alloc_work(vif, handle_rcvd_ntwrk_info, false);
if (IS_ERR(msg))
- return;
+ goto out;
msg->body.net_info.frame_len = get_unaligned_le16(&buffer[6]) - 1;
msg->body.net_info.rssi = buffer[8];
@@ -1583,7 +1597,7 @@ void wilc_network_info_received(struct wilc *wilc, u8 *buffer, u32 length)
GFP_KERNEL);
if (!msg->body.net_info.mgmt) {
kfree(msg);
- return;
+ goto out;
}
result = wilc_enqueue_work(msg);
@@ -1592,43 +1606,41 @@ void wilc_network_info_received(struct wilc *wilc, u8 *buffer, u32 length)
kfree(msg->body.net_info.mgmt);
kfree(msg);
}
+out:
+ srcu_read_unlock(&wilc->srcu, srcu_idx);
}
void wilc_gnrl_async_info_received(struct wilc *wilc, u8 *buffer, u32 length)
{
- int result;
- struct host_if_msg *msg;
- int id;
struct host_if_drv *hif_drv;
+ struct host_if_msg *msg;
struct wilc_vif *vif;
+ int srcu_idx;
+ int result;
+ int id;
mutex_lock(&wilc->deinit_lock);
id = get_unaligned_le32(&buffer[length - 4]);
+ srcu_idx = srcu_read_lock(&wilc->srcu);
vif = wilc_get_vif_from_idx(wilc, id);
- if (!vif) {
- mutex_unlock(&wilc->deinit_lock);
- return;
- }
+ if (!vif)
+ goto out;
hif_drv = vif->hif_drv;
if (!hif_drv) {
- mutex_unlock(&wilc->deinit_lock);
- return;
+ goto out;
}
if (!hif_drv->conn_info.conn_result) {
netdev_err(vif->ndev, "%s: conn_result is NULL\n", __func__);
- mutex_unlock(&wilc->deinit_lock);
- return;
+ goto out;
}
msg = wilc_alloc_work(vif, handle_rcvd_gnrl_async_info, false);
- if (IS_ERR(msg)) {
- mutex_unlock(&wilc->deinit_lock);
- return;
- }
+ if (IS_ERR(msg))
+ goto out;
msg->body.mac_info.status = buffer[7];
result = wilc_enqueue_work(msg);
@@ -1636,32 +1648,36 @@ void wilc_gnrl_async_info_received(struct wilc *wilc, u8 *buffer, u32 length)
netdev_err(vif->ndev, "%s: enqueue work failed\n", __func__);
kfree(msg);
}
-
+out:
+ srcu_read_unlock(&wilc->srcu, srcu_idx);
mutex_unlock(&wilc->deinit_lock);
}
void wilc_scan_complete_received(struct wilc *wilc, u8 *buffer, u32 length)
{
- int result;
- int id;
struct host_if_drv *hif_drv;
struct wilc_vif *vif;
+ int srcu_idx;
+ int result;
+ int id;
id = get_unaligned_le32(&buffer[length - 4]);
+ srcu_idx = srcu_read_lock(&wilc->srcu);
vif = wilc_get_vif_from_idx(wilc, id);
if (!vif)
- return;
- hif_drv = vif->hif_drv;
+ goto out;
- if (!hif_drv)
- return;
+ hif_drv = vif->hif_drv;
+ if (!hif_drv) {
+ goto out;
+ }
if (hif_drv->usr_scan_req.scan_result) {
struct host_if_msg *msg;
msg = wilc_alloc_work(vif, handle_scan_complete, false);
if (IS_ERR(msg))
- return;
+ goto out;
result = wilc_enqueue_work(msg);
if (result) {
@@ -1670,6 +1686,8 @@ void wilc_scan_complete_received(struct wilc *wilc, u8 *buffer, u32 length)
kfree(msg);
}
}
+out:
+ srcu_read_unlock(&wilc->srcu, srcu_idx);
}
int wilc_remain_on_channel(struct wilc_vif *vif, u64 cookie, u16 chan,
diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c
index 91d71e0f7ef2..710e29bea560 100644
--- a/drivers/net/wireless/microchip/wilc1000/netdev.c
+++ b/drivers/net/wireless/microchip/wilc1000/netdev.c
@@ -96,7 +96,7 @@ static struct net_device *get_if_handler(struct wilc *wilc, u8 *mac_header)
struct wilc_vif *vif;
struct ieee80211_hdr *h = (struct ieee80211_hdr *)mac_header;
- list_for_each_entry_rcu(vif, &wilc->vif_list, list) {
+ wilc_for_each_vif(wilc, vif) {
if (vif->iftype == WILC_STATION_MODE)
if (ether_addr_equal_unaligned(h->addr2, vif->bssid)) {
ndev = vif->ndev;
@@ -132,7 +132,7 @@ int wilc_wlan_get_num_conn_ifcs(struct wilc *wilc)
struct wilc_vif *vif;
srcu_idx = srcu_read_lock(&wilc->srcu);
- list_for_each_entry_rcu(vif, &wilc->vif_list, list) {
+ wilc_for_each_vif(wilc, vif) {
if (!is_zero_ether_addr(vif->bssid))
ret_val++;
}
@@ -140,6 +140,19 @@ int wilc_wlan_get_num_conn_ifcs(struct wilc *wilc)
return ret_val;
}
+static void wilc_wake_tx_queues(struct wilc *wl)
+{
+ int srcu_idx;
+ struct wilc_vif *ifc;
+
+ srcu_idx = srcu_read_lock(&wl->srcu);
+ wilc_for_each_vif(wl, ifc) {
+ if (ifc->mac_opened && netif_queue_stopped(ifc->ndev))
+ netif_wake_queue(ifc->ndev);
+ }
+ srcu_read_unlock(&wl->srcu, srcu_idx);
+}
+
static int wilc_txq_task(void *vp)
{
int ret;
@@ -160,17 +173,7 @@ static int wilc_txq_task(void *vp)
do {
ret = wilc_wlan_handle_txq(wl, &txq_count);
if (txq_count < FLOW_CONTROL_LOWER_THRESHOLD) {
- int srcu_idx;
- struct wilc_vif *ifc;
-
- srcu_idx = srcu_read_lock(&wl->srcu);
- list_for_each_entry_rcu(ifc, &wl->vif_list,
- list) {
- if (ifc->mac_opened &&
- netif_queue_stopped(ifc->ndev))
- netif_wake_queue(ifc->ndev);
- }
- srcu_read_unlock(&wl->srcu, srcu_idx);
+ wilc_wake_tx_queues(wl);
}
if (ret != WILC_VMM_ENTRY_FULL_RETRY)
break;
@@ -284,7 +287,7 @@ static int wilc_init_fw_config(struct net_device *dev, struct wilc_vif *vif)
if (!wilc_wlan_cfg_set(vif, 0, WID_11G_OPERATING_MODE, &b, 1, 0, 0))
goto fail;
- b = WILC_FW_PREAMBLE_SHORT;
+ b = WILC_FW_PREAMBLE_AUTO;
if (!wilc_wlan_cfg_set(vif, 0, WID_PREAMBLE, &b, 1, 0, 0))
goto fail;
@@ -416,7 +419,7 @@ static int wilc_init_fw_config(struct net_device *dev, struct wilc_vif *vif)
b = 1;
if (!wilc_wlan_cfg_set(vif, 0, WID_11N_IMMEDIATE_BA_ENABLED, &b, 1,
- 1, 1))
+ 1, 0))
goto fail;
return 0;
@@ -665,7 +668,7 @@ static int wilc_set_mac_addr(struct net_device *dev, void *p)
/* Verify MAC Address is not already in use: */
srcu_idx = srcu_read_lock(&wilc->srcu);
- list_for_each_entry_rcu(tmp_vif, &wilc->vif_list, list) {
+ wilc_for_each_vif(wilc, tmp_vif) {
wilc_get_mac_address(tmp_vif, mac_addr);
if (ether_addr_equal(addr->sa_data, mac_addr)) {
if (vif != tmp_vif) {
@@ -768,7 +771,7 @@ netdev_tx_t wilc_mac_xmit(struct sk_buff *skb, struct net_device *ndev)
struct wilc_vif *vif;
srcu_idx = srcu_read_lock(&wilc->srcu);
- list_for_each_entry_rcu(vif, &wilc->vif_list, list) {
+ wilc_for_each_vif(wilc, vif) {
if (vif->mac_opened)
netif_stop_queue(vif->ndev);
}
@@ -811,19 +814,21 @@ static int wilc_mac_close(struct net_device *ndev)
void wilc_frmw_to_host(struct wilc *wilc, u8 *buff, u32 size,
u32 pkt_offset)
{
- unsigned int frame_len = 0;
- int stats;
unsigned char *buff_to_send = NULL;
- struct sk_buff *skb;
struct net_device *wilc_netdev;
+ unsigned int frame_len = 0;
struct wilc_vif *vif;
+ struct sk_buff *skb;
+ int srcu_idx;
+ int stats;
if (!wilc)
return;
+ srcu_idx = srcu_read_lock(&wilc->srcu);
wilc_netdev = get_if_handler(wilc, buff);
if (!wilc_netdev)
- return;
+ goto out;
buff += pkt_offset;
vif = netdev_priv(wilc_netdev);
@@ -834,7 +839,7 @@ void wilc_frmw_to_host(struct wilc *wilc, u8 *buff, u32 size,
skb = dev_alloc_skb(frame_len);
if (!skb)
- return;
+ goto out;
skb->dev = wilc_netdev;
@@ -847,6 +852,8 @@ void wilc_frmw_to_host(struct wilc *wilc, u8 *buff, u32 size,
stats = netif_rx(skb);
netdev_dbg(wilc_netdev, "netif_rx ret value is: %d\n", stats);
}
+out:
+ srcu_read_unlock(&wilc->srcu, srcu_idx);
}
void wilc_wfi_mgmt_rx(struct wilc *wilc, u8 *buff, u32 size, bool is_auth)
@@ -855,7 +862,7 @@ void wilc_wfi_mgmt_rx(struct wilc *wilc, u8 *buff, u32 size, bool is_auth)
struct wilc_vif *vif;
srcu_idx = srcu_read_lock(&wilc->srcu);
- list_for_each_entry_rcu(vif, &wilc->vif_list, list) {
+ wilc_for_each_vif(wilc, vif) {
struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buff;
u16 type = le16_to_cpup((__le16 *)buff);
u32 type_bit = BIT(type >> 4);
@@ -890,8 +897,7 @@ static const struct net_device_ops wilc_netdev_ops = {
void wilc_netdev_cleanup(struct wilc *wilc)
{
- struct wilc_vif *vif;
- int srcu_idx, ifc_cnt = 0;
+ struct wilc_vif *vif, *vif_tmp;
if (!wilc)
return;
@@ -901,32 +907,19 @@ void wilc_netdev_cleanup(struct wilc *wilc)
wilc->firmware = NULL;
}
- srcu_idx = srcu_read_lock(&wilc->srcu);
- list_for_each_entry_rcu(vif, &wilc->vif_list, list) {
+ list_for_each_entry_safe(vif, vif_tmp, &wilc->vif_list, list) {
+ mutex_lock(&wilc->vif_mutex);
+ list_del_rcu(&vif->list);
+ wilc->vif_num--;
+ mutex_unlock(&wilc->vif_mutex);
+ synchronize_srcu(&wilc->srcu);
if (vif->ndev)
unregister_netdev(vif->ndev);
}
- srcu_read_unlock(&wilc->srcu, srcu_idx);
wilc_wfi_deinit_mon_interface(wilc, false);
destroy_workqueue(wilc->hif_workqueue);
- while (ifc_cnt < WILC_NUM_CONCURRENT_IFC) {
- mutex_lock(&wilc->vif_mutex);
- if (wilc->vif_num <= 0) {
- mutex_unlock(&wilc->vif_mutex);
- break;
- }
- vif = wilc_get_wl_to_vif(wilc);
- if (!IS_ERR(vif))
- list_del_rcu(&vif->list);
-
- wilc->vif_num--;
- mutex_unlock(&wilc->vif_mutex);
- synchronize_srcu(&wilc->srcu);
- ifc_cnt++;
- }
-
wilc_wlan_cfg_deinit(wilc);
wlan_deinit_locks(wilc);
wiphy_unregister(wilc->wiphy);
@@ -941,7 +934,7 @@ static u8 wilc_get_available_idx(struct wilc *wl)
int srcu_idx;
srcu_idx = srcu_read_lock(&wl->srcu);
- list_for_each_entry_rcu(vif, &wl->vif_list, list) {
+ wilc_for_each_vif(wl, vif) {
if (vif->idx == 0)
idx = 1;
else
@@ -989,13 +982,6 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name,
goto error;
}
- wl->hif_workqueue = alloc_ordered_workqueue("%s-wq", WQ_MEM_RECLAIM,
- ndev->name);
- if (!wl->hif_workqueue) {
- ret = -ENOMEM;
- goto unregister_netdev;
- }
-
ndev->needs_free_netdev = true;
vif->iftype = vif_type;
vif->idx = wilc_get_available_idx(wl);
@@ -1008,15 +994,15 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name,
return vif;
-unregister_netdev:
+error:
if (rtnl_locked)
cfg80211_unregister_netdevice(ndev);
else
unregister_netdev(ndev);
- error:
free_netdev(ndev);
return ERR_PTR(ret);
}
+MODULE_DESCRIPTION("Atmel WILC1000 core wireless driver");
MODULE_LICENSE("GPL");
MODULE_FIRMWARE(WILC1000_FW(WILC1000_API_VER));
diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.h b/drivers/net/wireless/microchip/wilc1000/netdev.h
index aafe3dc44ac6..5937d6d45695 100644
--- a/drivers/net/wireless/microchip/wilc1000/netdev.h
+++ b/drivers/net/wireless/microchip/wilc1000/netdev.h
@@ -13,6 +13,7 @@
#include <net/ieee80211_radiotap.h>
#include <linux/if_arp.h>
#include <linux/gpio/consumer.h>
+#include <linux/rculist.h>
#include "hif.h"
#include "wlan.h"
@@ -29,6 +30,11 @@
#define TX_BACKOFF_WEIGHT_MS 1
+#define wilc_for_each_vif(w, v) \
+ struct wilc *_w = w; \
+ list_for_each_entry_srcu(v, &_w->vif_list, list, \
+ srcu_read_lock_held(&_w->srcu))
+
struct wilc_wfi_stats {
unsigned long rx_packets;
unsigned long tx_packets;
diff --git a/drivers/net/wireless/microchip/wilc1000/sdio.c b/drivers/net/wireless/microchip/wilc1000/sdio.c
index 0d13e3e46e98..d6d394693090 100644
--- a/drivers/net/wireless/microchip/wilc1000/sdio.c
+++ b/drivers/net/wireless/microchip/wilc1000/sdio.c
@@ -984,4 +984,5 @@ static struct sdio_driver wilc_sdio_driver = {
module_driver(wilc_sdio_driver,
sdio_register_driver,
sdio_unregister_driver);
+MODULE_DESCRIPTION("Atmel WILC1000 SDIO wireless driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c
index 77b4cdff73c3..61c3572ce321 100644
--- a/drivers/net/wireless/microchip/wilc1000/spi.c
+++ b/drivers/net/wireless/microchip/wilc1000/spi.c
@@ -42,7 +42,7 @@ MODULE_PARM_DESC(enable_crc16,
#define WILC_SPI_RSP_HDR_EXTRA_DATA 8
struct wilc_spi {
- bool isinit; /* true if SPI protocol has been configured */
+ bool isinit; /* true if wilc_spi_init was successful */
bool probing_crc; /* true if we're probing chip's CRC config */
bool crc7_enabled; /* true if crc7 is currently enabled */
bool crc16_enabled; /* true if crc16 is currently enabled */
@@ -55,6 +55,8 @@ struct wilc_spi {
static const struct wilc_hif_func wilc_hif_spi;
static int wilc_spi_reset(struct wilc *wilc);
+static int wilc_spi_configure_bus_protocol(struct wilc *wilc);
+static int wilc_validate_chipid(struct wilc *wilc);
/********************************************
*
@@ -192,11 +194,11 @@ static void wilc_wlan_power(struct wilc *wilc, bool on)
/* assert ENABLE: */
gpiod_set_value(gpios->enable, 1);
mdelay(5);
- /* assert RESET: */
- gpiod_set_value(gpios->reset, 1);
- } else {
/* deassert RESET: */
gpiod_set_value(gpios->reset, 0);
+ } else {
+ /* assert RESET: */
+ gpiod_set_value(gpios->reset, 1);
/* deassert ENABLE: */
gpiod_set_value(gpios->enable, 0);
}
@@ -232,8 +234,27 @@ static int wilc_bus_probe(struct spi_device *spi)
}
clk_prepare_enable(wilc->rtc_clk);
+ dev_info(&spi->dev, "Selected CRC config: crc7=%s, crc16=%s\n",
+ enable_crc7 ? "on" : "off", enable_crc16 ? "on" : "off");
+
+ /* we need power to configure the bus protocol and to read the chip id: */
+
+ wilc_wlan_power(wilc, true);
+
+ ret = wilc_spi_configure_bus_protocol(wilc);
+ if (ret)
+ goto power_down;
+
+ ret = wilc_validate_chipid(wilc);
+ if (ret)
+ goto power_down;
+
+ wilc_wlan_power(wilc, false);
return 0;
+power_down:
+ clk_disable_unprepare(wilc->rtc_clk);
+ wilc_wlan_power(wilc, false);
netdev_cleanup:
wilc_netdev_cleanup(wilc);
free:
@@ -273,6 +294,7 @@ static struct spi_driver wilc_spi_driver = {
.remove = wilc_bus_remove,
};
module_spi_driver(wilc_spi_driver);
+MODULE_DESCRIPTION("Atmel WILC1000 SPI wireless driver");
MODULE_LICENSE("GPL");
static int wilc_spi_tx(struct wilc *wilc, u8 *b, u32 len)
@@ -300,7 +322,6 @@ static int wilc_spi_tx(struct wilc *wilc, u8 *b, u32 len)
memset(&msg, 0, sizeof(msg));
spi_message_init(&msg);
- msg.spi = spi;
spi_message_add_tail(&tr, &msg);
ret = spi_sync(spi, &msg);
@@ -343,7 +364,6 @@ static int wilc_spi_rx(struct wilc *wilc, u8 *rb, u32 rlen)
memset(&msg, 0, sizeof(msg));
spi_message_init(&msg);
- msg.spi = spi;
spi_message_add_tail(&tr, &msg);
ret = spi_sync(spi, &msg);
@@ -381,8 +401,6 @@ static int wilc_spi_tx_rx(struct wilc *wilc, u8 *wb, u8 *rb, u32 rlen)
memset(&msg, 0, sizeof(msg));
spi_message_init(&msg);
- msg.spi = spi;
-
spi_message_add_tail(&tr, &msg);
ret = spi_sync(spi, &msg);
if (ret < 0)
@@ -476,7 +494,7 @@ static int spi_data_write(struct wilc *wilc, u8 *b, u32 sz)
********************************************/
static u8 wilc_get_crc7(u8 *buffer, u32 len)
{
- return crc7_be(0xfe, buffer, len);
+ return crc7_be(0xfe, buffer, len) | 0x01;
}
static int wilc_spi_single_read(struct wilc *wilc, u8 cmd, u32 adr, void *b,
@@ -1105,26 +1123,34 @@ static int wilc_spi_deinit(struct wilc *wilc)
static int wilc_spi_init(struct wilc *wilc, bool resume)
{
- struct spi_device *spi = to_spi_device(wilc->dev);
struct wilc_spi *spi_priv = wilc->bus_data;
- u32 reg;
- u32 chipid;
- int ret, i;
+ int ret;
if (spi_priv->isinit) {
/* Confirm we can read chipid register without error: */
- ret = wilc_spi_read_reg(wilc, WILC_CHIPID, &chipid);
- if (ret == 0)
+ if (wilc_validate_chipid(wilc) == 0)
return 0;
-
- dev_err(&spi->dev, "Fail cmd read chip id...\n");
}
wilc_wlan_power(wilc, true);
- /*
- * configure protocol
- */
+ ret = wilc_spi_configure_bus_protocol(wilc);
+ if (ret) {
+ wilc_wlan_power(wilc, false);
+ return ret;
+ }
+
+ spi_priv->isinit = true;
+
+ return 0;
+}
+
+static int wilc_spi_configure_bus_protocol(struct wilc *wilc)
+{
+ struct spi_device *spi = to_spi_device(wilc->dev);
+ struct wilc_spi *spi_priv = wilc->bus_data;
+ u32 reg;
+ int ret, i;
/*
* Infer the CRC settings that are currently in effect. This
@@ -1176,6 +1202,15 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
spi_priv->probing_crc = false;
+ return 0;
+}
+
+static int wilc_validate_chipid(struct wilc *wilc)
+{
+ struct spi_device *spi = to_spi_device(wilc->dev);
+ u32 chipid;
+ int ret;
+
/*
* make sure can read chip id without protocol error
*/
@@ -1184,9 +1219,10 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
dev_err(&spi->dev, "Fail cmd read chip id...\n");
return ret;
}
-
- spi_priv->isinit = true;
-
+ if (!is_wilc1000(chipid)) {
+ dev_err(&spi->dev, "Unknown chip id 0x%x\n", chipid);
+ return -ENODEV;
+ }
return 0;
}
diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c
index 9eb115c79c90..a9e872a7b2c3 100644
--- a/drivers/net/wireless/microchip/wilc1000/wlan.c
+++ b/drivers/net/wireless/microchip/wilc1000/wlan.c
@@ -12,11 +12,6 @@
#define WAKE_UP_TRIAL_RETRY 10000
-static inline bool is_wilc1000(u32 id)
-{
- return (id & (~WILC_CHIP_REV_FIELD)) == WILC_1000_BASE_ID;
-}
-
static inline void acquire_bus(struct wilc *wilc, enum bus_acquire acquire)
{
mutex_lock(&wilc->hif_cs);
@@ -730,7 +725,7 @@ int wilc_wlan_handle_txq(struct wilc *wilc, u32 *txq_count)
mutex_lock(&wilc->txq_add_to_head_cs);
srcu_idx = srcu_read_lock(&wilc->srcu);
- list_for_each_entry_rcu(vif, &wilc->vif_list, list)
+ wilc_for_each_vif(wilc, vif)
wilc_wlan_txq_filter_dup_tcp_ack(vif->ndev);
srcu_read_unlock(&wilc->srcu, srcu_idx);
@@ -1198,27 +1193,32 @@ int wilc_wlan_stop(struct wilc *wilc, struct wilc_vif *vif)
acquire_bus(wilc, WILC_BUS_ACQUIRE_AND_WAKEUP);
- ret = wilc->hif_func->hif_read_reg(wilc, WILC_GP_REG_0, &reg);
- if (ret) {
- netdev_err(vif->ndev, "Error while reading reg\n");
+ ret = wilc->hif_func->hif_read_reg(wilc, GLOBAL_MODE_CONTROL, &reg);
+ if (ret)
goto release;
- }
- ret = wilc->hif_func->hif_write_reg(wilc, WILC_GP_REG_0,
- (reg | WILC_ABORT_REQ_BIT));
- if (ret) {
- netdev_err(vif->ndev, "Error while writing reg\n");
+ reg &= ~WILC_GLOBAL_MODE_ENABLE_WIFI;
+ ret = wilc->hif_func->hif_write_reg(wilc, GLOBAL_MODE_CONTROL, reg);
+ if (ret)
goto release;
- }
- ret = wilc->hif_func->hif_read_reg(wilc, WILC_FW_HOST_COMM, &reg);
+ ret = wilc->hif_func->hif_read_reg(wilc, PWR_SEQ_MISC_CTRL, &reg);
+ if (ret)
+ goto release;
+
+ reg &= ~WILC_PWR_SEQ_ENABLE_WIFI_SLEEP;
+ ret = wilc->hif_func->hif_write_reg(wilc, PWR_SEQ_MISC_CTRL, reg);
+ if (ret)
+ goto release;
+
+ ret = wilc->hif_func->hif_read_reg(wilc, WILC_GP_REG_0, &reg);
if (ret) {
netdev_err(vif->ndev, "Error while reading reg\n");
goto release;
}
- reg = BIT(0);
- ret = wilc->hif_func->hif_write_reg(wilc, WILC_FW_HOST_COMM, reg);
+ ret = wilc->hif_func->hif_write_reg(wilc, WILC_GP_REG_0,
+ (reg | WILC_ABORT_REQ_BIT));
if (ret) {
netdev_err(vif->ndev, "Error while writing reg\n");
goto release;
@@ -1410,7 +1410,7 @@ static int init_chip(struct net_device *dev)
struct wilc_vif *vif = netdev_priv(dev);
struct wilc *wilc = vif->wilc;
- acquire_bus(wilc, WILC_BUS_ACQUIRE_ONLY);
+ acquire_bus(wilc, WILC_BUS_ACQUIRE_AND_WAKEUP);
chipid = wilc_get_chipid(wilc, true);
@@ -1440,7 +1440,7 @@ static int init_chip(struct net_device *dev)
}
release:
- release_bus(wilc, WILC_BUS_RELEASE_ONLY);
+ release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
return ret;
}
diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.h b/drivers/net/wireless/microchip/wilc1000/wlan.h
index a72cd5cac81d..54643d8fef04 100644
--- a/drivers/net/wireless/microchip/wilc1000/wlan.h
+++ b/drivers/net/wireless/microchip/wilc1000/wlan.h
@@ -156,6 +156,12 @@
#define WILC_GP_REG_0 0x149c
#define WILC_GP_REG_1 0x14a0
+#define GLOBAL_MODE_CONTROL 0x1614
+#define PWR_SEQ_MISC_CTRL 0x3008
+
+#define WILC_GLOBAL_MODE_ENABLE_WIFI BIT(0)
+#define WILC_PWR_SEQ_ENABLE_WIFI_SLEEP BIT(28)
+
#define WILC_HAVE_SDIO_IRQ_GPIO BIT(0)
#define WILC_HAVE_USE_PMU BIT(1)
#define WILC_HAVE_SLEEP_CLK_SRC_RTC BIT(2)
@@ -403,6 +409,11 @@ struct wilc_cfg_rsp {
struct wilc_vif;
+static inline bool is_wilc1000(u32 id)
+{
+ return (id & (~WILC_CHIP_REV_FIELD)) == WILC_1000_BASE_ID;
+}
+
int wilc_wlan_firmware_download(struct wilc *wilc, const u8 *buffer,
u32 buffer_size);
int wilc_wlan_start(struct wilc *wilc);
diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.c b/drivers/net/wireless/purelifi/plfxlc/mac.c
index 506d2f31efb5..641f847d47ab 100644
--- a/drivers/net/wireless/purelifi/plfxlc/mac.c
+++ b/drivers/net/wireless/purelifi/plfxlc/mac.c
@@ -7,7 +7,6 @@
#include <linux/etherdevice.h>
#include <linux/slab.h>
#include <linux/usb.h>
-#include <linux/gpio.h>
#include <linux/jiffies.h>
#include <net/ieee80211_radiotap.h>
@@ -685,6 +684,10 @@ static int plfxlc_set_rts_threshold(struct ieee80211_hw *hw, u32 value)
}
static const struct ieee80211_ops plfxlc_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = plfxlc_op_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = plfxlc_op_start,
diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c
index 3b283e93a13e..76b07db284f8 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/event.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/event.c
@@ -478,7 +478,7 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac,
continue;
wiphy_lock(priv_to_wiphy(vif->mac));
- cfg80211_ch_switch_notify(vif->netdev, &chandef, 0, 0);
+ cfg80211_ch_switch_notify(vif->netdev, &chandef, 0);
wiphy_unlock(priv_to_wiphy(vif->mac));
}
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2400pci.c b/drivers/net/wireless/ralink/rt2x00/rt2400pci.c
index 13dd672b825e..42e21e9f303b 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2400pci.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2400pci.c
@@ -1705,6 +1705,10 @@ static int rt2400pci_tx_last_beacon(struct ieee80211_hw *hw)
}
static const struct ieee80211_ops rt2400pci_mac80211_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rt2x00mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rt2x00mac_start,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500pci.c b/drivers/net/wireless/ralink/rt2x00/rt2500pci.c
index ecddda4c471e..36ddc5a69fa4 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2500pci.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2500pci.c
@@ -2003,6 +2003,10 @@ static int rt2500pci_tx_last_beacon(struct ieee80211_hw *hw)
}
static const struct ieee80211_ops rt2500pci_mac80211_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rt2x00mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rt2x00mac_start,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
index 13fdcff0ad66..09923765e2db 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
@@ -1794,6 +1794,10 @@ static int rt2500usb_probe_hw(struct rt2x00_dev *rt2x00dev)
}
static const struct ieee80211_ops rt2500usb_mac80211_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rt2x00mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rt2x00mac_start,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
index aaf31857ae1e..3bb81bcff0ac 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
@@ -10946,13 +10946,13 @@ static void rt2800_efuse_read(struct rt2x00_dev *rt2x00dev, unsigned int i)
/* Apparently the data is read from end to start */
reg = rt2800_register_read_lock(rt2x00dev, efuse_data3_reg);
/* The returned value is in CPU order, but eeprom is le */
- *(u32 *)&rt2x00dev->eeprom[i] = cpu_to_le32(reg);
+ *(__le32 *)&rt2x00dev->eeprom[i] = cpu_to_le32(reg);
reg = rt2800_register_read_lock(rt2x00dev, efuse_data2_reg);
- *(u32 *)&rt2x00dev->eeprom[i + 2] = cpu_to_le32(reg);
+ *(__le32 *)&rt2x00dev->eeprom[i + 2] = cpu_to_le32(reg);
reg = rt2800_register_read_lock(rt2x00dev, efuse_data1_reg);
- *(u32 *)&rt2x00dev->eeprom[i + 4] = cpu_to_le32(reg);
+ *(__le32 *)&rt2x00dev->eeprom[i + 4] = cpu_to_le32(reg);
reg = rt2800_register_read_lock(rt2x00dev, efuse_data0_reg);
- *(u32 *)&rt2x00dev->eeprom[i + 6] = cpu_to_le32(reg);
+ *(__le32 *)&rt2x00dev->eeprom[i + 6] = cpu_to_le32(reg);
mutex_unlock(&rt2x00dev->csr_mutex);
}
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800pci.c b/drivers/net/wireless/ralink/rt2x00/rt2800pci.c
index dcb56f708a5f..14c45aba836f 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800pci.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800pci.c
@@ -287,6 +287,10 @@ static int rt2800pci_read_eeprom(struct rt2x00_dev *rt2x00dev)
}
static const struct ieee80211_ops rt2800pci_mac80211_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rt2x00mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rt2x00mac_start,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800soc.c b/drivers/net/wireless/ralink/rt2x00/rt2800soc.c
index 7118d4f9038d..701ba54bf3e5 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800soc.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800soc.c
@@ -132,6 +132,10 @@ static int rt2800soc_write_firmware(struct rt2x00_dev *rt2x00dev,
}
static const struct ieee80211_ops rt2800soc_mac80211_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rt2x00mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rt2x00mac_start,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
index b2a8e75a901b..160bef79acdb 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
@@ -629,6 +629,10 @@ static int rt2800usb_probe_hw(struct rt2x00_dev *rt2x00dev)
}
static const struct ieee80211_ops rt2800usb_mac80211_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rt2x00mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rt2x00mac_start,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00crypto.c b/drivers/net/wireless/ralink/rt2x00/rt2x00crypto.c
index ad95f9eba301..1000fbfb94b8 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00crypto.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00crypto.c
@@ -197,10 +197,7 @@ void rt2x00crypto_rx_insert_iv(struct sk_buff *skb,
transfer += header_length;
} else {
skb_push(skb, iv_len + align);
- if (align < icv_len)
- skb_put(skb, icv_len - align);
- else if (align > icv_len)
- skb_trim(skb, rxdesc->size + iv_len + icv_len);
+ skb_put(skb, icv_len - align);
/* Move ieee80211 header */
memmove(skb->data + transfer,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt61pci.c b/drivers/net/wireless/ralink/rt2x00/rt61pci.c
index 483723bf514b..d1cd5694e3c7 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt61pci.c
@@ -2872,6 +2872,10 @@ static u64 rt61pci_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
}
static const struct ieee80211_ops rt61pci_mac80211_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rt2x00mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rt2x00mac_start,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt73usb.c b/drivers/net/wireless/ralink/rt2x00/rt73usb.c
index dfa9d5213898..b79dda952a33 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt73usb.c
@@ -2291,6 +2291,10 @@ static u64 rt73usb_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
}
static const struct ieee80211_ops rt73usb_mac80211_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rt2x00mac_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rt2x00mac_start,
diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c
index f6c25a52b69a..77b6cb7e1f6b 100644
--- a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c
+++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c
@@ -1607,6 +1607,10 @@ static void rtl8180_configure_filter(struct ieee80211_hw *dev,
}
static const struct ieee80211_ops rtl8180_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rtl8180_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rtl8180_start,
diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
index 04945f905d6d..78d99afa373d 100644
--- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
+++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
@@ -1377,6 +1377,10 @@ static int rtl8187_conf_tx(struct ieee80211_hw *dev,
static const struct ieee80211_ops rtl8187_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rtl8187_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rtl8187_start,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
index 4695fb4e2d2d..fd92d23c43d9 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
@@ -6,6 +6,7 @@
*/
#include <asm/byteorder.h>
+#include <linux/average.h>
#define RTL8XXXU_DEBUG_REG_WRITE 0x01
#define RTL8XXXU_DEBUG_REG_READ 0x02
@@ -498,6 +499,7 @@ struct rtl8xxxu_txdesc40 {
#define DESC_RATE_ID_SHIFT 16
#define DESC_RATE_ID_MASK 0xf
#define TXDESC_NAVUSEHDR BIT(20)
+#define TXDESC_EN_DESC_ID BIT(21)
#define TXDESC_SEC_RC4 0x00400000
#define TXDESC_SEC_AES 0x00c00000
#define TXDESC_PKT_OFFSET_SHIFT 26
@@ -1774,6 +1776,8 @@ struct rtl8xxxu_cfo_tracking {
#define RTL8XXXU_HW_LED_CONTROL 2
#define RTL8XXXU_MAX_MAC_ID_NUM 128
#define RTL8XXXU_BC_MC_MACID 0
+#define RTL8XXXU_BC_MC_MACID1 1
+#define RTL8XXXU_MAX_SEC_CAM_NUM 64
struct rtl8xxxu_priv {
struct ieee80211_hw *hw;
@@ -1855,6 +1859,8 @@ struct rtl8xxxu_priv {
int next_mbox;
int nr_out_eps;
+ /* Ensure no added or deleted stas while iterating */
+ struct mutex sta_mutex;
struct mutex h2c_mutex;
/* Protect the indirect register accesses of RTL8710BU. */
struct mutex syson_indirect_access_mutex;
@@ -1889,18 +1895,14 @@ struct rtl8xxxu_priv {
u8 pi_enabled:1;
u8 no_pape:1;
u8 int_buf[USB_INTR_CONTENT_LENGTH];
- u8 rssi_level;
DECLARE_BITMAP(tx_aggr_started, IEEE80211_NUM_TIDS);
DECLARE_BITMAP(tid_tx_operational, IEEE80211_NUM_TIDS);
- /*
- * Only one virtual interface permitted because only STA mode
- * is supported and no iface_combinations are provided.
- */
- struct ieee80211_vif *vif;
+
+ struct ieee80211_vif *vifs[2];
struct delayed_work ra_watchdog;
struct work_struct c2hcmd_work;
struct sk_buff_head c2hcmd_queue;
- struct work_struct update_beacon_work;
+ struct delayed_work update_beacon_work;
struct rtl8xxxu_btcoex bt_coex;
struct rtl8xxxu_ra_report ra_report;
struct rtl8xxxu_cfo_tracking cfo_tracking;
@@ -1910,13 +1912,23 @@ struct rtl8xxxu_priv {
char led_name[32];
struct led_classdev led_cdev;
DECLARE_BITMAP(mac_id_map, RTL8XXXU_MAX_MAC_ID_NUM);
+ DECLARE_BITMAP(cam_map, RTL8XXXU_MAX_SEC_CAM_NUM);
};
+DECLARE_EWMA(rssi, 10, 16);
+
struct rtl8xxxu_sta_info {
struct ieee80211_sta *sta;
struct ieee80211_vif *vif;
u8 macid;
+ struct ewma_rssi avg_rssi;
+ u8 rssi_level;
+};
+
+struct rtl8xxxu_vif {
+ int port_num;
+ u8 hw_key_idx;
};
struct rtl8xxxu_rx_urb {
@@ -1986,11 +1998,13 @@ struct rtl8xxxu_fileops {
u8 init_reg_rxfltmap:1;
u8 init_reg_pkt_life_time:1;
u8 init_reg_hmtfr:1;
+ u8 supports_concurrent:1;
u8 ampdu_max_time;
u8 ustime_tsf_edca;
u16 max_aggr_num;
u8 supports_ap:1;
u16 max_macid_num;
+ u16 max_sec_cam_num;
u32 adda_1t_init;
u32 adda_1t_path_on;
u32 adda_2t_path_on_a;
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c
index 6d0f975f891b..afe9cc1b49dc 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c
@@ -1699,7 +1699,7 @@ void rtl8188e_handle_ra_tx_report2(struct rtl8xxxu_priv *priv, struct sk_buff *s
/* We only use macid 0, so only the first item is relevant.
* AP mode will use more of them if it's ever implemented.
*/
- if (!priv->vif || priv->vif->type == NL80211_IFTYPE_STATION)
+ if (!priv->vifs[0] || priv->vifs[0]->type == NL80211_IFTYPE_STATION)
items = 1;
for (macid = 0; macid < items; macid++) {
@@ -1882,6 +1882,7 @@ struct rtl8xxxu_fileops rtl8188eu_fops = {
.has_tx_report = 1,
.init_reg_pkt_life_time = 1,
.gen2_thermal_meter = 1,
+ .max_sec_cam_num = 32,
.adda_1t_init = 0x0b1b25a0,
.adda_1t_path_on = 0x0bdb25a0,
/*
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188f.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188f.c
index 1e1c8fa194cb..464216d007ce 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188f.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188f.c
@@ -1751,6 +1751,8 @@ struct rtl8xxxu_fileops rtl8188fu_fops = {
.max_aggr_num = 0x0c14,
.supports_ap = 1,
.max_macid_num = 16,
+ .max_sec_cam_num = 16,
+ .supports_concurrent = 1,
.adda_1t_init = 0x03c00014,
.adda_1t_path_on = 0x03c00014,
.trxff_boundary = 0x3f7f,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c
index b30a9a513cb8..3ee7d8f87da6 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c
@@ -613,6 +613,7 @@ struct rtl8xxxu_fileops rtl8192cu_fops = {
.rx_agg_buf_size = 16000,
.tx_desc_size = sizeof(struct rtl8xxxu_txdesc32),
.rx_desc_size = sizeof(struct rtl8xxxu_rxdesc16),
+ .max_sec_cam_num = 32,
.adda_1t_init = 0x0b1b25a0,
.adda_1t_path_on = 0x0bdb25a0,
.adda_2t_path_on_a = 0x04db25a4,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
index 47bcaec6f2db..63b73ace27ec 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
@@ -1769,6 +1769,7 @@ struct rtl8xxxu_fileops rtl8192eu_fops = {
.needs_full_init = 1,
.supports_ap = 1,
.max_macid_num = 128,
+ .max_sec_cam_num = 64,
.adda_1t_init = 0x0fc01616,
.adda_1t_path_on = 0x0fc01616,
.adda_2t_path_on_a = 0x0fc01616,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192f.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192f.c
index 28e93835e05a..21e4204769d0 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192f.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192f.c
@@ -2014,26 +2014,40 @@ static int rtl8192fu_led_brightness_set(struct led_classdev *led_cdev,
struct rtl8xxxu_priv *priv = container_of(led_cdev,
struct rtl8xxxu_priv,
led_cdev);
- u16 ledcfg;
+ u32 ledcfg;
/* Values obtained by observing the USB traffic from the Windows driver. */
rtl8xxxu_write32(priv, REG_SW_GPIO_SHARE_CTRL_0, 0x20080);
rtl8xxxu_write32(priv, REG_SW_GPIO_SHARE_CTRL_1, 0x1b0000);
- ledcfg = rtl8xxxu_read16(priv, REG_LEDCFG0);
+ ledcfg = rtl8xxxu_read32(priv, REG_LEDCFG0);
+
+ /* Comfast CF-826F uses LED1. Asus USB-N13 C1 uses LED0. Set both. */
+
+ u32p_replace_bits(&ledcfg, LED_GPIO_ENABLE, LEDCFG0_LED2EN);
+ u32p_replace_bits(&ledcfg, LED_IO_MODE_OUTPUT, LEDCFG0_LED0_IO_MODE);
+ u32p_replace_bits(&ledcfg, LED_IO_MODE_OUTPUT, LEDCFG0_LED1_IO_MODE);
if (brightness == LED_OFF) {
- /* Value obtained like above. */
- ledcfg = BIT(1) | BIT(7);
+ u32p_replace_bits(&ledcfg, LED_MODE_SW_CTRL, LEDCFG0_LED0CM);
+ u32p_replace_bits(&ledcfg, LED_SW_OFF, LEDCFG0_LED0SV);
+ u32p_replace_bits(&ledcfg, LED_MODE_SW_CTRL, LEDCFG0_LED1CM);
+ u32p_replace_bits(&ledcfg, LED_SW_OFF, LEDCFG0_LED1SV);
} else if (brightness == LED_ON) {
- /* Value obtained like above. */
- ledcfg = BIT(1) | BIT(7) | BIT(11);
+ u32p_replace_bits(&ledcfg, LED_MODE_SW_CTRL, LEDCFG0_LED0CM);
+ u32p_replace_bits(&ledcfg, LED_SW_ON, LEDCFG0_LED0SV);
+ u32p_replace_bits(&ledcfg, LED_MODE_SW_CTRL, LEDCFG0_LED1CM);
+ u32p_replace_bits(&ledcfg, LED_SW_ON, LEDCFG0_LED1SV);
} else if (brightness == RTL8XXXU_HW_LED_CONTROL) {
- /* Value obtained by brute force. */
- ledcfg = BIT(8) | BIT(9);
+ u32p_replace_bits(&ledcfg, LED_MODE_TX_OR_RX_EVENTS,
+ LEDCFG0_LED0CM);
+ u32p_replace_bits(&ledcfg, LED_SW_OFF, LEDCFG0_LED0SV);
+ u32p_replace_bits(&ledcfg, LED_MODE_TX_OR_RX_EVENTS,
+ LEDCFG0_LED1CM);
+ u32p_replace_bits(&ledcfg, LED_SW_OFF, LEDCFG0_LED1SV);
}
- rtl8xxxu_write16(priv, REG_LEDCFG0, ledcfg);
+ rtl8xxxu_write32(priv, REG_LEDCFG0, ledcfg);
return 0;
}
@@ -2081,6 +2095,7 @@ struct rtl8xxxu_fileops rtl8192fu_fops = {
.max_aggr_num = 0x1f1f,
.supports_ap = 1,
.max_macid_num = 128,
+ .max_sec_cam_num = 64,
.trxff_boundary = 0x3f3f,
.pbp_rx = PBP_PAGE_SIZE_256,
.pbp_tx = PBP_PAGE_SIZE_256,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8710b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8710b.c
index 871b8cca8a18..46d57510e9fc 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8710b.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8710b.c
@@ -1877,6 +1877,7 @@ struct rtl8xxxu_fileops rtl8710bu_fops = {
.max_aggr_num = 0x0c14,
.supports_ap = 1,
.max_macid_num = 16,
+ .max_sec_cam_num = 32,
.adda_1t_init = 0x03c00016,
.adda_1t_path_on = 0x03c00016,
.trxff_boundary = 0x3f7f,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c
index 15a30e496221..ad1bb9377ca2 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c
@@ -510,6 +510,7 @@ struct rtl8xxxu_fileops rtl8723au_fops = {
.rx_agg_buf_size = 16000,
.tx_desc_size = sizeof(struct rtl8xxxu_txdesc32),
.rx_desc_size = sizeof(struct rtl8xxxu_rxdesc16),
+ .max_sec_cam_num = 32,
.adda_1t_init = 0x0b1b25a0,
.adda_1t_path_on = 0x0bdb25a0,
.adda_2t_path_on_a = 0x04db25a4,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c
index 954369ed6226..9640c841d20a 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c
@@ -1744,6 +1744,7 @@ struct rtl8xxxu_fileops rtl8723bu_fops = {
.max_aggr_num = 0x0c14,
.supports_ap = 1,
.max_macid_num = 128,
+ .max_sec_cam_num = 64,
.adda_1t_init = 0x01c00014,
.adda_1t_path_on = 0x01c00014,
.adda_2t_path_on_a = 0x01c00014,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index 180907319e8c..4a49f8f9d80f 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -1633,33 +1633,41 @@ rtl8xxxu_gen1_set_tx_power(struct rtl8xxxu_priv *priv, int channel, bool ht40)
}
static void rtl8xxxu_set_linktype(struct rtl8xxxu_priv *priv,
- enum nl80211_iftype linktype)
+ enum nl80211_iftype linktype, int port_num)
{
- u8 val8;
-
- val8 = rtl8xxxu_read8(priv, REG_MSR);
- val8 &= ~MSR_LINKTYPE_MASK;
+ u8 val8, type;
switch (linktype) {
case NL80211_IFTYPE_UNSPECIFIED:
- val8 |= MSR_LINKTYPE_NONE;
+ type = MSR_LINKTYPE_NONE;
break;
case NL80211_IFTYPE_ADHOC:
- val8 |= MSR_LINKTYPE_ADHOC;
+ type = MSR_LINKTYPE_ADHOC;
break;
case NL80211_IFTYPE_STATION:
- val8 |= MSR_LINKTYPE_STATION;
+ type = MSR_LINKTYPE_STATION;
break;
case NL80211_IFTYPE_AP:
- val8 |= MSR_LINKTYPE_AP;
+ type = MSR_LINKTYPE_AP;
break;
default:
- goto out;
+ return;
+ }
+
+ switch (port_num) {
+ case 0:
+ val8 = rtl8xxxu_read8(priv, REG_MSR) & 0x0c;
+ val8 |= type;
+ break;
+ case 1:
+ val8 = rtl8xxxu_read8(priv, REG_MSR) & 0x03;
+ val8 |= type << 2;
+ break;
+ default:
+ return;
}
rtl8xxxu_write8(priv, REG_MSR, val8);
-out:
- return;
}
static void
@@ -3572,27 +3580,47 @@ void rtl8723a_phy_lc_calibrate(struct rtl8xxxu_priv *priv)
rtl8xxxu_write8(priv, REG_TXPAUSE, 0x00);
}
-static int rtl8xxxu_set_mac(struct rtl8xxxu_priv *priv)
+static int rtl8xxxu_set_mac(struct rtl8xxxu_priv *priv, int port_num)
{
int i;
u16 reg;
- reg = REG_MACID;
+ switch (port_num) {
+ case 0:
+ reg = REG_MACID;
+ break;
+ case 1:
+ reg = REG_MACID1;
+ break;
+ default:
+ WARN_ONCE(1, "%s: invalid port_num\n", __func__);
+ return -EINVAL;
+ }
for (i = 0; i < ETH_ALEN; i++)
- rtl8xxxu_write8(priv, reg + i, priv->mac_addr[i]);
+ rtl8xxxu_write8(priv, reg + i, priv->vifs[port_num]->addr[i]);
return 0;
}
-static int rtl8xxxu_set_bssid(struct rtl8xxxu_priv *priv, const u8 *bssid)
+static int rtl8xxxu_set_bssid(struct rtl8xxxu_priv *priv, const u8 *bssid, int port_num)
{
int i;
u16 reg;
dev_dbg(&priv->udev->dev, "%s: (%pM)\n", __func__, bssid);
- reg = REG_BSSID;
+ switch (port_num) {
+ case 0:
+ reg = REG_BSSID;
+ break;
+ case 1:
+ reg = REG_BSSID1;
+ break;
+ default:
+ WARN_ONCE(1, "%s: invalid port_num\n", __func__);
+ return -EINVAL;
+ }
for (i = 0; i < ETH_ALEN; i++)
rtl8xxxu_write8(priv, reg + i, bssid[i]);
@@ -4025,10 +4053,13 @@ static inline u8 rtl8xxxu_get_macid(struct rtl8xxxu_priv *priv,
{
struct rtl8xxxu_sta_info *sta_info;
- if (!priv->vif || priv->vif->type == NL80211_IFTYPE_STATION || !sta)
+ if (!sta)
return 0;
sta_info = (struct rtl8xxxu_sta_info *)sta->drv_priv;
+ if (!sta_info)
+ return 0;
+
return sta_info->macid;
}
@@ -4235,9 +4266,6 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw)
rtl8xxxu_write32(priv, REG_HIMR, 0xffffffff);
}
- rtl8xxxu_set_mac(priv);
- rtl8xxxu_set_linktype(priv, NL80211_IFTYPE_STATION);
-
/*
* Configure initial WMAC settings
*/
@@ -4511,6 +4539,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw)
rtl8188e_ra_info_init_all(&priv->ra_info);
set_bit(RTL8XXXU_BC_MC_MACID, priv->mac_id_map);
+ set_bit(RTL8XXXU_BC_MC_MACID1, priv->mac_id_map);
exit:
return ret;
@@ -4530,8 +4559,10 @@ static void rtl8xxxu_cam_write(struct rtl8xxxu_priv *priv,
* This is a bit of a hack - the lower bits of the cipher
* suite selector happens to match the cipher index in the CAM
*/
- addr = key->keyidx << CAM_CMD_KEY_SHIFT;
+ addr = key->hw_key_idx << CAM_CMD_KEY_SHIFT;
ctrl = (key->cipher & 0x0f) << 2 | key->keyidx | CAM_WRITE_VALID;
+ if (!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE))
+ ctrl |= BIT(6);
for (j = 5; j >= 0; j--) {
switch (j) {
@@ -4574,7 +4605,7 @@ static int rtl8xxxu_set_tim(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
{
struct rtl8xxxu_priv *priv = hw->priv;
- schedule_work(&priv->update_beacon_work);
+ schedule_delayed_work(&priv->update_beacon_work, 0);
return 0;
}
@@ -4839,10 +4870,9 @@ static void rtl8xxxu_set_basic_rates(struct rtl8xxxu_priv *priv, u32 rate_cfg)
dev_dbg(&priv->udev->dev, "%s: rates %08x\n", __func__, rate_cfg);
- while (rate_cfg) {
- rate_cfg = (rate_cfg >> 1);
- rate_idx++;
- }
+ if (rate_cfg)
+ rate_idx = __fls(rate_cfg);
+
rtl8xxxu_write8(priv, REG_INIRTS_RATE_SEL, rate_idx);
}
@@ -4888,14 +4918,20 @@ static void rtl8xxxu_set_aifs(struct rtl8xxxu_priv *priv, u8 slot_time)
u8 aifs, aifsn, sifs;
int i;
- if (priv->vif) {
+ for (i = 0; i < ARRAY_SIZE(priv->vifs); i++) {
struct ieee80211_sta *sta;
+ if (!priv->vifs[i])
+ continue;
+
rcu_read_lock();
- sta = ieee80211_find_sta(priv->vif, priv->vif->bss_conf.bssid);
+ sta = ieee80211_find_sta(priv->vifs[i], priv->vifs[i]->bss_conf.bssid);
if (sta)
wireless_mode = rtl8xxxu_wireless_mode(priv->hw, sta);
rcu_read_unlock();
+
+ if (wireless_mode)
+ break;
}
if (priv->hw->conf.chandef.chan->band == NL80211_BAND_5GHZ ||
@@ -4952,19 +4988,21 @@ static void
rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *bss_conf, u64 changed)
{
+ struct rtl8xxxu_vif *rtlvif = (struct rtl8xxxu_vif *)vif->drv_priv;
struct rtl8xxxu_priv *priv = hw->priv;
struct device *dev = &priv->udev->dev;
+ struct rtl8xxxu_sta_info *sta_info;
struct ieee80211_sta *sta;
struct rtl8xxxu_ra_report *rarpt;
+ u8 val8, macid;
u32 val32;
- u8 val8;
rarpt = &priv->ra_report;
if (changed & BSS_CHANGED_ASSOC) {
dev_dbg(dev, "Changed ASSOC: %i!\n", vif->cfg.assoc);
- rtl8xxxu_set_linktype(priv, vif->type);
+ rtl8xxxu_set_linktype(priv, vif->type, rtlvif->port_num);
if (vif->cfg.assoc) {
u32 ramask;
@@ -4980,6 +5018,7 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
rcu_read_unlock();
goto error;
}
+ macid = rtl8xxxu_get_macid(priv, sta);
if (sta->deflink.ht_cap.ht_supported)
dev_info(dev, "%s: HT supported\n", __func__);
@@ -5000,19 +5039,20 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
bw = RATE_INFO_BW_40;
else
bw = RATE_INFO_BW_20;
+
+ sta_info = (struct rtl8xxxu_sta_info *)sta->drv_priv;
+ sta_info->rssi_level = RTL8XXXU_RATR_STA_INIT;
rcu_read_unlock();
rtl8xxxu_update_ra_report(rarpt, highest_rate, sgi, bw);
- priv->vif = vif;
- priv->rssi_level = RTL8XXXU_RATR_STA_INIT;
-
priv->fops->update_rate_mask(priv, ramask, 0, sgi,
- bw == RATE_INFO_BW_40, 0);
+ bw == RATE_INFO_BW_40, macid);
rtl8xxxu_write8(priv, REG_BCN_MAX_ERR, 0xff);
- rtl8xxxu_stop_tx_beacon(priv);
+ if (rtlvif->port_num == 0)
+ rtl8xxxu_stop_tx_beacon(priv);
/* joinbss sequence */
rtl8xxxu_write16(priv, REG_BCN_PSR_RPT,
@@ -5054,7 +5094,7 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
if (changed & BSS_CHANGED_BSSID) {
dev_dbg(dev, "Changed BSSID!\n");
- rtl8xxxu_set_bssid(priv, bss_conf->bssid);
+ rtl8xxxu_set_bssid(priv, bss_conf->bssid, rtlvif->port_num);
}
if (changed & BSS_CHANGED_BASIC_RATES) {
@@ -5070,7 +5110,7 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
}
if (changed & BSS_CHANGED_BEACON)
- schedule_work(&priv->update_beacon_work);
+ schedule_delayed_work(&priv->update_beacon_work, 0);
error:
return;
@@ -5079,11 +5119,12 @@ error:
static int rtl8xxxu_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf)
{
+ struct rtl8xxxu_vif *rtlvif = (struct rtl8xxxu_vif *)vif->drv_priv;
struct rtl8xxxu_priv *priv = hw->priv;
struct device *dev = &priv->udev->dev;
dev_dbg(dev, "Start AP mode\n");
- rtl8xxxu_set_bssid(priv, vif->bss_conf.bssid);
+ rtl8xxxu_set_bssid(priv, vif->bss_conf.bssid, rtlvif->port_num);
rtl8xxxu_write16(priv, REG_BCN_INTERVAL, vif->bss_conf.beacon_int);
priv->fops->report_connect(priv, RTL8XXXU_BC_MC_MACID, 0, true);
@@ -5509,13 +5550,14 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
struct rtl8xxxu_tx_urb *tx_urb;
struct ieee80211_sta *sta = NULL;
struct ieee80211_vif *vif = tx_info->control.vif;
+ struct rtl8xxxu_vif *rtlvif = vif ? (struct rtl8xxxu_vif *)vif->drv_priv : NULL;
struct device *dev = &priv->udev->dev;
u32 queue, rts_rate;
u16 pktlen = skb->len;
int tx_desc_size = priv->fops->tx_desc_size;
u8 macid;
int ret;
- bool ampdu_enable, sgi = false, short_preamble = false;
+ bool ampdu_enable, sgi = false, short_preamble = false, bmc = false;
if (skb_headroom(skb) < tx_desc_size) {
dev_warn(dev,
@@ -5557,10 +5599,14 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
tx_desc->txdw0 =
TXDESC_OWN | TXDESC_FIRST_SEGMENT | TXDESC_LAST_SEGMENT;
if (is_multicast_ether_addr(ieee80211_get_DA(hdr)) ||
- is_broadcast_ether_addr(ieee80211_get_DA(hdr)))
+ is_broadcast_ether_addr(ieee80211_get_DA(hdr))) {
tx_desc->txdw0 |= TXDESC_BROADMULTICAST;
+ bmc = true;
+ }
+
tx_desc->txdw1 = cpu_to_le32(queue << TXDESC_QUEUE_SHIFT);
+ macid = rtl8xxxu_get_macid(priv, sta);
if (tx_info->control.hw_key) {
switch (tx_info->control.hw_key->cipher) {
@@ -5575,6 +5621,10 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
default:
break;
}
+ if (bmc && rtlvif && rtlvif->hw_key_idx != 0xff) {
+ tx_desc->txdw1 |= cpu_to_le32(TXDESC_EN_DESC_ID);
+ macid = rtlvif->hw_key_idx;
+ }
}
/* (tx_info->flags & IEEE80211_TX_CTL_AMPDU) && */
@@ -5618,7 +5668,6 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
else
rts_rate = 0;
- macid = rtl8xxxu_get_macid(priv, sta);
priv->fops->fill_txdesc(hw, hdr, tx_info, tx_desc, sgi, short_preamble,
ampdu_enable, rts_rate, macid);
@@ -5680,18 +5729,44 @@ static void rtl8xxxu_send_beacon_frame(struct ieee80211_hw *hw,
static void rtl8xxxu_update_beacon_work_callback(struct work_struct *work)
{
struct rtl8xxxu_priv *priv =
- container_of(work, struct rtl8xxxu_priv, update_beacon_work);
+ container_of(work, struct rtl8xxxu_priv, update_beacon_work.work);
struct ieee80211_hw *hw = priv->hw;
- struct ieee80211_vif *vif = priv->vif;
+ struct ieee80211_vif *vif = priv->vifs[0];
if (!vif) {
WARN_ONCE(true, "no vif to update beacon\n");
return;
}
+ if (vif->bss_conf.csa_active) {
+ if (ieee80211_beacon_cntdwn_is_complete(vif, 0)) {
+ ieee80211_csa_finish(vif, 0);
+ return;
+ }
+ schedule_delayed_work(&priv->update_beacon_work,
+ msecs_to_jiffies(vif->bss_conf.beacon_int));
+ }
rtl8xxxu_send_beacon_frame(hw, vif);
}
+static inline bool rtl8xxxu_is_packet_match_bssid(struct rtl8xxxu_priv *priv,
+ struct ieee80211_hdr *hdr,
+ int port_num)
+{
+ return priv->vifs[port_num] &&
+ priv->vifs[port_num]->type == NL80211_IFTYPE_STATION &&
+ priv->vifs[port_num]->cfg.assoc &&
+ ether_addr_equal(priv->vifs[port_num]->bss_conf.bssid, hdr->addr2);
+}
+
+static inline bool rtl8xxxu_is_sta_sta(struct rtl8xxxu_priv *priv)
+{
+ return (priv->vifs[0] && priv->vifs[0]->cfg.assoc &&
+ priv->vifs[0]->type == NL80211_IFTYPE_STATION) &&
+ (priv->vifs[1] && priv->vifs[1]->cfg.assoc &&
+ priv->vifs[1]->type == NL80211_IFTYPE_STATION);
+}
+
void rtl8723au_rx_parse_phystats(struct rtl8xxxu_priv *priv,
struct ieee80211_rx_status *rx_status,
struct rtl8723au_phy_stats *phy_stats,
@@ -5708,12 +5783,11 @@ void rtl8723au_rx_parse_phystats(struct rtl8xxxu_priv *priv,
rx_status->signal = priv->fops->cck_rssi(priv, phy_stats);
} else {
bool parse_cfo = priv->fops->set_crystal_cap &&
- priv->vif &&
- priv->vif->type == NL80211_IFTYPE_STATION &&
- priv->vif->cfg.assoc &&
!crc_icv_err &&
!ieee80211_is_ctl(hdr->frame_control) &&
- ether_addr_equal(priv->vif->bss_conf.bssid, hdr->addr2);
+ !rtl8xxxu_is_sta_sta(priv) &&
+ (rtl8xxxu_is_packet_match_bssid(priv, hdr, 0) ||
+ rtl8xxxu_is_packet_match_bssid(priv, hdr, 1));
if (parse_cfo) {
priv->cfo_tracking.cfo_tail[0] = phy_stats->path_cfotail[0];
@@ -5748,12 +5822,11 @@ static void jaguar2_rx_parse_phystats_type1(struct rtl8xxxu_priv *priv,
bool crc_icv_err)
{
bool parse_cfo = priv->fops->set_crystal_cap &&
- priv->vif &&
- priv->vif->type == NL80211_IFTYPE_STATION &&
- priv->vif->cfg.assoc &&
!crc_icv_err &&
!ieee80211_is_ctl(hdr->frame_control) &&
- ether_addr_equal(priv->vif->bss_conf.bssid, hdr->addr2);
+ !rtl8xxxu_is_sta_sta(priv) &&
+ (rtl8xxxu_is_packet_match_bssid(priv, hdr, 0) ||
+ rtl8xxxu_is_packet_match_bssid(priv, hdr, 1));
u8 pwdb_max = 0;
int rx_path;
@@ -6029,18 +6102,20 @@ void rtl8723bu_update_bt_link_info(struct rtl8xxxu_priv *priv, u8 bt_info)
btcoex->bt_busy = false;
}
+static inline bool rtl8xxxu_is_assoc(struct rtl8xxxu_priv *priv)
+{
+ return (priv->vifs[0] && priv->vifs[0]->cfg.assoc) ||
+ (priv->vifs[1] && priv->vifs[1]->cfg.assoc);
+}
+
static
void rtl8723bu_handle_bt_inquiry(struct rtl8xxxu_priv *priv)
{
- struct ieee80211_vif *vif;
struct rtl8xxxu_btcoex *btcoex;
- bool wifi_connected;
- vif = priv->vif;
btcoex = &priv->bt_coex;
- wifi_connected = (vif && vif->cfg.assoc);
- if (!wifi_connected) {
+ if (!rtl8xxxu_is_assoc(priv)) {
rtl8723bu_set_ps_tdma(priv, 0x8, 0x0, 0x0, 0x0, 0x0);
rtl8723bu_set_coex_with_type(priv, 0);
} else if (btcoex->has_sco || btcoex->has_hid || btcoex->has_a2dp) {
@@ -6058,15 +6133,11 @@ void rtl8723bu_handle_bt_inquiry(struct rtl8xxxu_priv *priv)
static
void rtl8723bu_handle_bt_info(struct rtl8xxxu_priv *priv)
{
- struct ieee80211_vif *vif;
struct rtl8xxxu_btcoex *btcoex;
- bool wifi_connected;
- vif = priv->vif;
btcoex = &priv->bt_coex;
- wifi_connected = (vif && vif->cfg.assoc);
- if (wifi_connected) {
+ if (rtl8xxxu_is_assoc(priv)) {
u32 val32 = 0;
u32 high_prio_tx = 0, high_prio_rx = 0;
@@ -6249,6 +6320,76 @@ static void rtl8188e_c2hcmd_callback(struct work_struct *work)
}
}
+#define rtl8xxxu_iterate_vifs_atomic(priv, iterator, data) \
+ ieee80211_iterate_active_interfaces_atomic((priv)->hw, \
+ IEEE80211_IFACE_ITER_NORMAL, iterator, data)
+
+struct rtl8xxxu_rx_update_rssi_data {
+ struct rtl8xxxu_priv *priv;
+ struct ieee80211_hdr *hdr;
+ struct ieee80211_rx_status *rx_status;
+ u8 *bssid;
+};
+
+static void rtl8xxxu_rx_update_rssi_iter(void *data, u8 *mac,
+ struct ieee80211_vif *vif)
+{
+ struct rtl8xxxu_rx_update_rssi_data *iter_data = data;
+ struct ieee80211_sta *sta;
+ struct ieee80211_hdr *hdr = iter_data->hdr;
+ struct rtl8xxxu_priv *priv = iter_data->priv;
+ struct rtl8xxxu_sta_info *sta_info;
+ struct ieee80211_rx_status *rx_status = iter_data->rx_status;
+ u8 *bssid = iter_data->bssid;
+
+ if (!ether_addr_equal(vif->bss_conf.bssid, bssid))
+ return;
+
+ if (!(ether_addr_equal(vif->addr, hdr->addr1) ||
+ ieee80211_is_beacon(hdr->frame_control)))
+ return;
+
+ sta = ieee80211_find_sta_by_ifaddr(priv->hw, hdr->addr2,
+ vif->addr);
+ if (!sta)
+ return;
+
+ sta_info = (struct rtl8xxxu_sta_info *)sta->drv_priv;
+ ewma_rssi_add(&sta_info->avg_rssi, -rx_status->signal);
+}
+
+static inline u8 *get_hdr_bssid(struct ieee80211_hdr *hdr)
+{
+ __le16 fc = hdr->frame_control;
+ u8 *bssid;
+
+ if (ieee80211_has_tods(fc))
+ bssid = hdr->addr1;
+ else if (ieee80211_has_fromds(fc))
+ bssid = hdr->addr2;
+ else
+ bssid = hdr->addr3;
+
+ return bssid;
+}
+
+static void rtl8xxxu_rx_update_rssi(struct rtl8xxxu_priv *priv,
+ struct ieee80211_rx_status *rx_status,
+ struct ieee80211_hdr *hdr)
+{
+ struct rtl8xxxu_rx_update_rssi_data data = {};
+
+ if (ieee80211_is_ctl(hdr->frame_control))
+ return;
+
+ data.priv = priv;
+ data.hdr = hdr;
+ data.rx_status = rx_status;
+ data.bssid = get_hdr_bssid(hdr);
+
+ rtl8xxxu_iterate_vifs_atomic(priv, rtl8xxxu_rx_update_rssi_iter, &data);
+}
+
int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb)
{
struct ieee80211_hw *hw = priv->hw;
@@ -6308,18 +6449,26 @@ int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb)
skb_queue_tail(&priv->c2hcmd_queue, skb);
schedule_work(&priv->c2hcmd_work);
} else {
+ struct ieee80211_hdr *hdr;
+
phy_stats = (struct rtl8723au_phy_stats *)skb->data;
skb_pull(skb, drvinfo_sz + desc_shift);
skb_trim(skb, pkt_len);
- if (rx_desc->phy_stats)
+ hdr = (struct ieee80211_hdr *)skb->data;
+ if (rx_desc->phy_stats) {
priv->fops->parse_phystats(
priv, rx_status, phy_stats,
rx_desc->rxmcs,
- (struct ieee80211_hdr *)skb->data,
+ hdr,
rx_desc->crc32 || rx_desc->icverr);
+ if (!rx_desc->crc32 && !rx_desc->icverr)
+ rtl8xxxu_rx_update_rssi(priv,
+ rx_status,
+ hdr);
+ }
rx_status->mactime = rx_desc->tsfl;
rx_status->flag |= RX_FLAG_MACTIME_START;
@@ -6416,10 +6565,15 @@ int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb)
} else {
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- if (rx_desc->phy_stats)
+ if (rx_desc->phy_stats) {
priv->fops->parse_phystats(priv, rx_status, phy_stats,
rx_desc->rxmcs, hdr,
rx_desc->crc32 || rx_desc->icverr);
+ if (!rx_desc->crc32 && !rx_desc->icverr)
+ rtl8xxxu_rx_update_rssi(priv,
+ rx_status,
+ hdr);
+ }
rx_status->mactime = rx_desc->tsfl;
rx_status->flag |= RX_FLAG_MACTIME_START;
@@ -6563,29 +6717,123 @@ error:
return ret;
}
+static void rtl8xxxu_switch_ports(struct rtl8xxxu_priv *priv)
+{
+ u8 macid[ETH_ALEN], bssid[ETH_ALEN], macid_1[ETH_ALEN], bssid_1[ETH_ALEN];
+ u8 msr, bcn_ctrl, bcn_ctrl_1, atimwnd[2], atimwnd_1[2];
+ struct rtl8xxxu_vif *rtlvif;
+ struct ieee80211_vif *vif;
+ u8 tsftr[8], tsftr_1[8];
+ int i;
+
+ msr = rtl8xxxu_read8(priv, REG_MSR);
+ bcn_ctrl = rtl8xxxu_read8(priv, REG_BEACON_CTRL);
+ bcn_ctrl_1 = rtl8xxxu_read8(priv, REG_BEACON_CTRL_1);
+
+ for (i = 0; i < ARRAY_SIZE(atimwnd); i++)
+ atimwnd[i] = rtl8xxxu_read8(priv, REG_ATIMWND + i);
+ for (i = 0; i < ARRAY_SIZE(atimwnd_1); i++)
+ atimwnd_1[i] = rtl8xxxu_read8(priv, REG_ATIMWND_1 + i);
+
+ for (i = 0; i < ARRAY_SIZE(tsftr); i++)
+ tsftr[i] = rtl8xxxu_read8(priv, REG_TSFTR + i);
+ for (i = 0; i < ARRAY_SIZE(tsftr); i++)
+ tsftr_1[i] = rtl8xxxu_read8(priv, REG_TSFTR1 + i);
+
+ for (i = 0; i < ARRAY_SIZE(macid); i++)
+ macid[i] = rtl8xxxu_read8(priv, REG_MACID + i);
+
+ for (i = 0; i < ARRAY_SIZE(bssid); i++)
+ bssid[i] = rtl8xxxu_read8(priv, REG_BSSID + i);
+
+ for (i = 0; i < ARRAY_SIZE(macid_1); i++)
+ macid_1[i] = rtl8xxxu_read8(priv, REG_MACID1 + i);
+
+ for (i = 0; i < ARRAY_SIZE(bssid_1); i++)
+ bssid_1[i] = rtl8xxxu_read8(priv, REG_BSSID1 + i);
+
+ /* disable bcn function, disable update TSF */
+ rtl8xxxu_write8(priv, REG_BEACON_CTRL, (bcn_ctrl &
+ (~BEACON_FUNCTION_ENABLE)) | BEACON_DISABLE_TSF_UPDATE);
+ rtl8xxxu_write8(priv, REG_BEACON_CTRL_1, (bcn_ctrl_1 &
+ (~BEACON_FUNCTION_ENABLE)) | BEACON_DISABLE_TSF_UPDATE);
+
+ /* switch msr */
+ msr = (msr & 0xf0) | ((msr & 0x03) << 2) | ((msr & 0x0c) >> 2);
+ rtl8xxxu_write8(priv, REG_MSR, msr);
+
+ /* write port0 */
+ rtl8xxxu_write8(priv, REG_BEACON_CTRL, bcn_ctrl_1 & ~BEACON_FUNCTION_ENABLE);
+ for (i = 0; i < ARRAY_SIZE(atimwnd_1); i++)
+ rtl8xxxu_write8(priv, REG_ATIMWND + i, atimwnd_1[i]);
+ for (i = 0; i < ARRAY_SIZE(tsftr_1); i++)
+ rtl8xxxu_write8(priv, REG_TSFTR + i, tsftr_1[i]);
+ for (i = 0; i < ARRAY_SIZE(macid_1); i++)
+ rtl8xxxu_write8(priv, REG_MACID + i, macid_1[i]);
+ for (i = 0; i < ARRAY_SIZE(bssid_1); i++)
+ rtl8xxxu_write8(priv, REG_BSSID + i, bssid_1[i]);
+
+ /* write port1 */
+ rtl8xxxu_write8(priv, REG_BEACON_CTRL_1, bcn_ctrl & ~BEACON_FUNCTION_ENABLE);
+ for (i = 0; i < ARRAY_SIZE(atimwnd); i++)
+ rtl8xxxu_write8(priv, REG_ATIMWND_1 + i, atimwnd[i]);
+ for (i = 0; i < ARRAY_SIZE(tsftr); i++)
+ rtl8xxxu_write8(priv, REG_TSFTR1 + i, tsftr[i]);
+ for (i = 0; i < ARRAY_SIZE(macid); i++)
+ rtl8xxxu_write8(priv, REG_MACID1 + i, macid[i]);
+ for (i = 0; i < ARRAY_SIZE(bssid); i++)
+ rtl8xxxu_write8(priv, REG_BSSID1 + i, bssid[i]);
+
+ /* write bcn ctl */
+ rtl8xxxu_write8(priv, REG_BEACON_CTRL, bcn_ctrl_1);
+ rtl8xxxu_write8(priv, REG_BEACON_CTRL_1, bcn_ctrl);
+
+ vif = priv->vifs[0];
+ priv->vifs[0] = priv->vifs[1];
+ priv->vifs[1] = vif;
+
+ /* priv->vifs[0] is NULL here, based on how this function is currently
+ * called from rtl8xxxu_add_interface().
+ * When this function will be used in the future for a different
+ * scenario, please check whether vifs[0] or vifs[1] can be NULL and if
+ * necessary add code to set port_num = 1.
+ */
+ rtlvif = (struct rtl8xxxu_vif *)priv->vifs[1]->drv_priv;
+ rtlvif->port_num = 1;
+}
+
static int rtl8xxxu_add_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
+ struct rtl8xxxu_vif *rtlvif = (struct rtl8xxxu_vif *)vif->drv_priv;
struct rtl8xxxu_priv *priv = hw->priv;
- int ret;
+ int port_num;
u8 val8;
- if (!priv->vif)
- priv->vif = vif;
+ if (!priv->vifs[0])
+ port_num = 0;
+ else if (!priv->vifs[1])
+ port_num = 1;
else
return -EOPNOTSUPP;
switch (vif->type) {
case NL80211_IFTYPE_STATION:
- rtl8xxxu_stop_tx_beacon(priv);
+ if (port_num == 0) {
+ rtl8xxxu_stop_tx_beacon(priv);
- val8 = rtl8xxxu_read8(priv, REG_BEACON_CTRL);
- val8 |= BEACON_ATIM | BEACON_FUNCTION_ENABLE |
- BEACON_DISABLE_TSF_UPDATE;
- rtl8xxxu_write8(priv, REG_BEACON_CTRL, val8);
- ret = 0;
+ val8 = rtl8xxxu_read8(priv, REG_BEACON_CTRL);
+ val8 |= BEACON_ATIM | BEACON_FUNCTION_ENABLE |
+ BEACON_DISABLE_TSF_UPDATE;
+ rtl8xxxu_write8(priv, REG_BEACON_CTRL, val8);
+ }
break;
case NL80211_IFTYPE_AP:
+ if (port_num == 1) {
+ rtl8xxxu_switch_ports(priv);
+ port_num = 0;
+ }
+
rtl8xxxu_write8(priv, REG_BEACON_CTRL,
BEACON_DISABLE_TSF_UPDATE | BEACON_CTRL_MBSSID);
rtl8xxxu_write8(priv, REG_ATIMWND, 0x0c); /* 12ms */
@@ -6602,29 +6850,31 @@ static int rtl8xxxu_add_interface(struct ieee80211_hw *hw,
val8 = rtl8xxxu_read8(priv, REG_CCK_CHECK);
val8 &= ~BIT_BCN_PORT_SEL;
rtl8xxxu_write8(priv, REG_CCK_CHECK, val8);
-
- ret = 0;
break;
default:
- ret = -EOPNOTSUPP;
+ return -EOPNOTSUPP;
}
- rtl8xxxu_set_linktype(priv, vif->type);
+ priv->vifs[port_num] = vif;
+ rtlvif->port_num = port_num;
+ rtlvif->hw_key_idx = 0xff;
+
+ rtl8xxxu_set_linktype(priv, vif->type, port_num);
ether_addr_copy(priv->mac_addr, vif->addr);
- rtl8xxxu_set_mac(priv);
+ rtl8xxxu_set_mac(priv, port_num);
- return ret;
+ return 0;
}
static void rtl8xxxu_remove_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
+ struct rtl8xxxu_vif *rtlvif = (struct rtl8xxxu_vif *)vif->drv_priv;
struct rtl8xxxu_priv *priv = hw->priv;
dev_dbg(&priv->udev->dev, "%s\n", __func__);
- if (priv->vif)
- priv->vif = NULL;
+ priv->vifs[rtlvif->port_num] = NULL;
}
static int rtl8xxxu_config(struct ieee80211_hw *hw, u32 changed)
@@ -6746,8 +6996,8 @@ static void rtl8xxxu_configure_filter(struct ieee80211_hw *hw,
else
rcr |= RCR_CHECK_BSSID_BEACON | RCR_CHECK_BSSID_MATCH;
- if (priv->vif && priv->vif->type == NL80211_IFTYPE_AP)
- rcr &= ~RCR_CHECK_BSSID_MATCH;
+ if (priv->vifs[0] && priv->vifs[0]->type == NL80211_IFTYPE_AP)
+ rcr &= ~(RCR_CHECK_BSSID_MATCH | RCR_CHECK_BSSID_BEACON);
if (*total_flags & FIF_CONTROL)
rcr |= RCR_ACCEPT_CTRL_FRAME;
@@ -6784,11 +7034,19 @@ static int rtl8xxxu_set_rts_threshold(struct ieee80211_hw *hw, u32 rts)
return 0;
}
+static int rtl8xxxu_get_free_sec_cam(struct ieee80211_hw *hw)
+{
+ struct rtl8xxxu_priv *priv = hw->priv;
+
+ return find_first_zero_bit(priv->cam_map, priv->fops->max_sec_cam_num);
+}
+
static int rtl8xxxu_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
struct ieee80211_key_conf *key)
{
+ struct rtl8xxxu_vif *rtlvif = (struct rtl8xxxu_vif *)vif->drv_priv;
struct rtl8xxxu_priv *priv = hw->priv;
struct device *dev = &priv->udev->dev;
u8 mac_addr[ETH_ALEN];
@@ -6800,9 +7058,6 @@ static int rtl8xxxu_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
dev_dbg(dev, "%s: cmd %02x, cipher %08x, index %i\n",
__func__, cmd, key->cipher, key->keyidx);
- if (vif->type != NL80211_IFTYPE_STATION)
- return -EOPNOTSUPP;
-
if (key->keyidx > 3)
return -EOPNOTSUPP;
@@ -6826,7 +7081,7 @@ static int rtl8xxxu_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
ether_addr_copy(mac_addr, sta->addr);
} else {
dev_dbg(dev, "%s: group key\n", __func__);
- eth_broadcast_addr(mac_addr);
+ ether_addr_copy(mac_addr, vif->bss_conf.bssid);
}
val16 = rtl8xxxu_read16(priv, REG_CR);
@@ -6840,16 +7095,28 @@ static int rtl8xxxu_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
switch (cmd) {
case SET_KEY:
- key->hw_key_idx = key->keyidx;
+
+ retval = rtl8xxxu_get_free_sec_cam(hw);
+ if (retval < 0)
+ return -EOPNOTSUPP;
+
+ key->hw_key_idx = retval;
+
+ if (vif->type == NL80211_IFTYPE_AP && !(key->flags & IEEE80211_KEY_FLAG_PAIRWISE))
+ rtlvif->hw_key_idx = key->hw_key_idx;
+
key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;
rtl8xxxu_cam_write(priv, key, mac_addr);
+ set_bit(key->hw_key_idx, priv->cam_map);
retval = 0;
break;
case DISABLE_KEY:
rtl8xxxu_write32(priv, REG_CAM_WRITE, 0x00000000);
val32 = CAM_CMD_POLLING | CAM_CMD_WRITE |
- key->keyidx << CAM_CMD_KEY_SHIFT;
+ key->hw_key_idx << CAM_CMD_KEY_SHIFT;
rtl8xxxu_write32(priv, REG_CAM_CMD, val32);
+ rtlvif->hw_key_idx = 0xff;
+ clear_bit(key->hw_key_idx, priv->cam_map);
retval = 0;
break;
default:
@@ -6930,6 +7197,7 @@ static void rtl8xxxu_refresh_rate_mask(struct rtl8xxxu_priv *priv,
int signal, struct ieee80211_sta *sta,
bool force)
{
+ struct rtl8xxxu_sta_info *sta_info = (struct rtl8xxxu_sta_info *)sta->drv_priv;
struct ieee80211_hw *hw = priv->hw;
u16 wireless_mode;
u8 rssi_level, ratr_idx;
@@ -6938,7 +7206,7 @@ static void rtl8xxxu_refresh_rate_mask(struct rtl8xxxu_priv *priv,
u8 go_up_gap = 5;
u8 macid = rtl8xxxu_get_macid(priv, sta);
- rssi_level = priv->rssi_level;
+ rssi_level = sta_info->rssi_level;
snr = rtl8xxxu_signal_to_snr(signal);
snr_thresh_high = RTL8XXXU_SNR_THRESH_HIGH;
snr_thresh_low = RTL8XXXU_SNR_THRESH_LOW;
@@ -6963,18 +7231,16 @@ static void rtl8xxxu_refresh_rate_mask(struct rtl8xxxu_priv *priv,
else
rssi_level = RTL8XXXU_RATR_STA_LOW;
- if (rssi_level != priv->rssi_level || force) {
+ if (rssi_level != sta_info->rssi_level || force) {
int sgi = 0;
u32 rate_bitmap = 0;
- rcu_read_lock();
rate_bitmap = (sta->deflink.supp_rates[0] & 0xfff) |
(sta->deflink.ht_cap.mcs.rx_mask[0] << 12) |
(sta->deflink.ht_cap.mcs.rx_mask[1] << 20);
if (sta->deflink.ht_cap.cap &
(IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))
sgi = 1;
- rcu_read_unlock();
wireless_mode = rtl8xxxu_wireless_mode(hw, sta);
switch (wireless_mode) {
@@ -7055,7 +7321,7 @@ static void rtl8xxxu_refresh_rate_mask(struct rtl8xxxu_priv *priv,
break;
}
- priv->rssi_level = rssi_level;
+ sta_info->rssi_level = rssi_level;
priv->fops->update_rate_mask(priv, rate_bitmap, ratr_idx, sgi, txbw_40mhz, macid);
}
}
@@ -7085,7 +7351,7 @@ static void rtl8xxxu_track_cfo(struct rtl8xxxu_priv *priv)
int cfo_khz_a, cfo_khz_b, cfo_average;
int crystal_cap;
- if (!priv->vif || !priv->vif->cfg.assoc) {
+ if (!rtl8xxxu_is_assoc(priv)) {
/* Reset */
cfo->adjust = true;
@@ -7148,41 +7414,64 @@ static void rtl8xxxu_track_cfo(struct rtl8xxxu_priv *priv)
rtl8xxxu_set_atc_status(priv, abs(cfo_average) >= CFO_TH_ATC);
}
-static void rtl8xxxu_watchdog_callback(struct work_struct *work)
+static void rtl8xxxu_ra_iter(void *data, struct ieee80211_sta *sta)
{
- struct ieee80211_vif *vif;
- struct rtl8xxxu_priv *priv;
+ struct rtl8xxxu_sta_info *sta_info = (struct rtl8xxxu_sta_info *)sta->drv_priv;
+ struct rtl8xxxu_priv *priv = data;
+ int signal = -ewma_rssi_read(&sta_info->avg_rssi);
- priv = container_of(work, struct rtl8xxxu_priv, ra_watchdog.work);
- vif = priv->vif;
+ priv->fops->report_rssi(priv, rtl8xxxu_get_macid(priv, sta),
+ rtl8xxxu_signal_to_snr(signal));
+ rtl8xxxu_refresh_rate_mask(priv, signal, sta, false);
+}
- if (vif && vif->type == NL80211_IFTYPE_STATION) {
- int signal;
- struct ieee80211_sta *sta;
+struct rtl8xxxu_stas_entry {
+ struct list_head list;
+ struct ieee80211_sta *sta;
+};
- rcu_read_lock();
- sta = ieee80211_find_sta(vif, vif->bss_conf.bssid);
- if (!sta) {
- struct device *dev = &priv->udev->dev;
+struct rtl8xxxu_iter_stas_data {
+ struct rtl8xxxu_priv *priv;
+ struct list_head list;
+};
- dev_dbg(dev, "%s: no sta found\n", __func__);
- rcu_read_unlock();
- goto out;
- }
- rcu_read_unlock();
+static void rtl8xxxu_collect_sta_iter(void *data, struct ieee80211_sta *sta)
+{
+ struct rtl8xxxu_iter_stas_data *iter_stas = data;
+ struct rtl8xxxu_stas_entry *stas_entry;
- signal = ieee80211_ave_rssi(vif);
+ stas_entry = kmalloc(sizeof(*stas_entry), GFP_ATOMIC);
+ if (!stas_entry)
+ return;
- priv->fops->report_rssi(priv, 0,
- rtl8xxxu_signal_to_snr(signal));
+ stas_entry->sta = sta;
+ list_add_tail(&stas_entry->list, &iter_stas->list);
+}
- if (priv->fops->set_crystal_cap)
- rtl8xxxu_track_cfo(priv);
+static void rtl8xxxu_watchdog_callback(struct work_struct *work)
+{
- rtl8xxxu_refresh_rate_mask(priv, signal, sta, false);
+ struct rtl8xxxu_iter_stas_data iter_data;
+ struct rtl8xxxu_stas_entry *sta_entry, *tmp;
+ struct rtl8xxxu_priv *priv;
+
+ priv = container_of(work, struct rtl8xxxu_priv, ra_watchdog.work);
+ iter_data.priv = priv;
+ INIT_LIST_HEAD(&iter_data.list);
+
+ mutex_lock(&priv->sta_mutex);
+ ieee80211_iterate_stations_atomic(priv->hw, rtl8xxxu_collect_sta_iter,
+ &iter_data);
+ list_for_each_entry_safe(sta_entry, tmp, &iter_data.list, list) {
+ list_del_init(&sta_entry->list);
+ rtl8xxxu_ra_iter(priv, sta_entry->sta);
+ kfree(sta_entry);
}
+ mutex_unlock(&priv->sta_mutex);
+
+ if (priv->fops->set_crystal_cap)
+ rtl8xxxu_track_cfo(priv);
-out:
schedule_delayed_work(&priv->ra_watchdog, 2 * HZ);
}
@@ -7304,7 +7593,9 @@ static void rtl8xxxu_stop(struct ieee80211_hw *hw)
if (priv->usb_interrupts)
rtl8xxxu_write32(priv, REG_USB_HIMR, 0);
+ cancel_work_sync(&priv->c2hcmd_work);
cancel_delayed_work_sync(&priv->ra_watchdog);
+ cancel_delayed_work_sync(&priv->update_beacon_work);
rtl8xxxu_free_rx_resources(priv);
rtl8xxxu_free_tx_resources(priv);
@@ -7315,16 +7606,34 @@ static int rtl8xxxu_sta_add(struct ieee80211_hw *hw,
struct ieee80211_sta *sta)
{
struct rtl8xxxu_sta_info *sta_info = (struct rtl8xxxu_sta_info *)sta->drv_priv;
+ struct rtl8xxxu_vif *rtlvif = (struct rtl8xxxu_vif *)vif->drv_priv;
struct rtl8xxxu_priv *priv = hw->priv;
+ mutex_lock(&priv->sta_mutex);
+ ewma_rssi_init(&sta_info->avg_rssi);
if (vif->type == NL80211_IFTYPE_AP) {
+ sta_info->rssi_level = RTL8XXXU_RATR_STA_INIT;
sta_info->macid = rtl8xxxu_acquire_macid(priv);
- if (sta_info->macid >= RTL8XXXU_MAX_MAC_ID_NUM)
+ if (sta_info->macid >= RTL8XXXU_MAX_MAC_ID_NUM) {
+ mutex_unlock(&priv->sta_mutex);
return -ENOSPC;
+ }
rtl8xxxu_refresh_rate_mask(priv, 0, sta, true);
priv->fops->report_connect(priv, sta_info->macid, H2C_MACID_ROLE_STA, true);
+ } else {
+ switch (rtlvif->port_num) {
+ case 0:
+ sta_info->macid = RTL8XXXU_BC_MC_MACID;
+ break;
+ case 1:
+ sta_info->macid = RTL8XXXU_BC_MC_MACID1;
+ break;
+ default:
+ break;
+ }
}
+ mutex_unlock(&priv->sta_mutex);
return 0;
}
@@ -7336,13 +7645,19 @@ static int rtl8xxxu_sta_remove(struct ieee80211_hw *hw,
struct rtl8xxxu_sta_info *sta_info = (struct rtl8xxxu_sta_info *)sta->drv_priv;
struct rtl8xxxu_priv *priv = hw->priv;
+ mutex_lock(&priv->sta_mutex);
if (vif->type == NL80211_IFTYPE_AP)
rtl8xxxu_release_macid(priv, sta_info->macid);
+ mutex_unlock(&priv->sta_mutex);
return 0;
}
static const struct ieee80211_ops rtl8xxxu_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rtl8xxxu_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.add_interface = rtl8xxxu_add_interface,
@@ -7476,6 +7791,20 @@ static void rtl8xxxu_deinit_led(struct rtl8xxxu_priv *priv)
led_classdev_unregister(led);
}
+static const struct ieee80211_iface_limit rtl8xxxu_limits[] = {
+ { .max = 2, .types = BIT(NL80211_IFTYPE_STATION), },
+ { .max = 1, .types = BIT(NL80211_IFTYPE_AP), },
+};
+
+static const struct ieee80211_iface_combination rtl8xxxu_combinations[] = {
+ {
+ .limits = rtl8xxxu_limits,
+ .n_limits = ARRAY_SIZE(rtl8xxxu_limits),
+ .max_interfaces = 2,
+ .num_different_channels = 1,
+ },
+};
+
static int rtl8xxxu_probe(struct usb_interface *interface,
const struct usb_device_id *id)
{
@@ -7522,7 +7851,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface,
untested = 0;
break;
case 0x2357:
- if (id->idProduct == 0x0109)
+ if (id->idProduct == 0x0109 || id->idProduct == 0x0135)
untested = 0;
break;
case 0x0b05:
@@ -7555,13 +7884,14 @@ static int rtl8xxxu_probe(struct usb_interface *interface,
mutex_init(&priv->usb_buf_mutex);
mutex_init(&priv->syson_indirect_access_mutex);
mutex_init(&priv->h2c_mutex);
+ mutex_init(&priv->sta_mutex);
INIT_LIST_HEAD(&priv->tx_urb_free_list);
spin_lock_init(&priv->tx_urb_lock);
INIT_LIST_HEAD(&priv->rx_urb_pending_list);
spin_lock_init(&priv->rx_urb_lock);
INIT_WORK(&priv->rx_urb_wq, rtl8xxxu_rx_urb_work);
INIT_DELAYED_WORK(&priv->ra_watchdog, rtl8xxxu_watchdog_callback);
- INIT_WORK(&priv->update_beacon_work, rtl8xxxu_update_beacon_work_callback);
+ INIT_DELAYED_WORK(&priv->update_beacon_work, rtl8xxxu_update_beacon_work_callback);
skb_queue_head_init(&priv->c2hcmd_queue);
usb_set_intfdata(interface, hw);
@@ -7611,6 +7941,8 @@ static int rtl8xxxu_probe(struct usb_interface *interface,
if (ret)
goto err_set_intfdata;
+ hw->vif_data_size = sizeof(struct rtl8xxxu_vif);
+
hw->wiphy->max_scan_ssids = 1;
hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
if (priv->fops->max_macid_num)
@@ -7620,6 +7952,13 @@ static int rtl8xxxu_probe(struct usb_interface *interface,
hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP);
hw->queues = 4;
+ hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH;
+
+ if (priv->fops->supports_concurrent) {
+ hw->wiphy->iface_combinations = rtl8xxxu_combinations;
+ hw->wiphy->n_iface_combinations = ARRAY_SIZE(rtl8xxxu_combinations);
+ }
+
sband = &rtl8xxxu_supported_band;
sband->ht_cap.ht_supported = true;
sband->ht_cap.ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K;
@@ -7806,6 +8145,9 @@ static const struct usb_device_id dev_table[] = {
.driver_info = (unsigned long)&rtl8192fu_fops},
{USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x318b, 0xff, 0xff, 0xff),
.driver_info = (unsigned long)&rtl8192fu_fops},
+/* TP-Link TL-WN823N V2 */
+{USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0135, 0xff, 0xff, 0xff),
+ .driver_info = (unsigned long)&rtl8192fu_fops},
#ifdef CONFIG_RTL8XXXU_UNTESTED
/* Still supported by rtlwifi */
{USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x8176, 0xff, 0xff, 0xff),
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h
index 920ee50e2115..61c0c0ec07b3 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h
@@ -146,6 +146,21 @@
#define GPIO_INTM_EDGE_TRIG_IRQ BIT(9)
#define REG_LEDCFG0 0x004c
+#define LEDCFG0_LED0CM GENMASK(2, 0)
+#define LEDCFG0_LED1CM GENMASK(10, 8)
+#define LED_MODE_SW_CTRL 0x0
+#define LED_MODE_TX_OR_RX_EVENTS 0x3
+#define LEDCFG0_LED0SV BIT(3)
+#define LEDCFG0_LED1SV BIT(11)
+#define LED_SW_OFF 0x0
+#define LED_SW_ON 0x1
+#define LEDCFG0_LED0_IO_MODE BIT(7)
+#define LEDCFG0_LED1_IO_MODE BIT(15)
+#define LED_IO_MODE_OUTPUT 0x0
+#define LED_IO_MODE_INPUT 0x1
+#define LEDCFG0_LED2EN BIT(21)
+#define LED_GPIO_DISABLE 0x0
+#define LED_GPIO_ENABLE 0x1
#define LEDCFG0_DPDT_SELECT BIT(23)
#define REG_LEDCFG1 0x004d
#define LEDCFG1_HW_LED_CONTROL BIT(1)
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index 69e97647e3d6..2e60a6991ca1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -1903,6 +1903,10 @@ void rtl_init_sw_leds(struct ieee80211_hw *hw)
EXPORT_SYMBOL(rtl_init_sw_leds);
const struct ieee80211_ops rtl_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.start = rtl_op_start,
.stop = rtl_op_stop,
.tx = rtl_op_tx,
diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.c b/drivers/net/wireless/realtek/rtlwifi/efuse.c
index 2e945554ed6d..c1fbc29d5ca1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.c
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.c
@@ -1287,18 +1287,44 @@ int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv,
}
EXPORT_SYMBOL_GPL(rtl_get_hwinfo);
-void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size)
+static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size)
+{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
+ u32 start = START_ADDRESS;
+ u32 n;
+
+ while (size > 0) {
+ if (size >= 64)
+ n = 64;
+ else if (size >= 8)
+ n = 8;
+ else
+ n = 1;
+
+ rtl_write_chunk(rtlpriv, start, n, buffer);
+
+ start += n;
+ buffer += n;
+ size -= n;
+ }
+}
+
+void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
- u8 *pu4byteptr = (u8 *)buffer;
u32 i;
- for (i = 0; i < size; i++)
- rtl_write_byte(rtlpriv, (START_ADDRESS + i), *(pu4byteptr + i));
+ if (rtlpriv->rtlhal.interface == INTF_PCI) {
+ for (i = 0; i < size; i++)
+ rtl_write_byte(rtlpriv, (START_ADDRESS + i),
+ *(buffer + i));
+ } else if (rtlpriv->rtlhal.interface == INTF_USB) {
+ _rtl_fw_block_write_usb(hw, buffer, size);
+ }
}
EXPORT_SYMBOL_GPL(rtl_fw_block_write);
-void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
+void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer,
u32 size)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.h b/drivers/net/wireless/realtek/rtlwifi/efuse.h
index 1ec59f439382..4821625ad1e5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.h
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.h
@@ -91,8 +91,8 @@ void efuse_power_switch(struct ieee80211_hw *hw, u8 write, u8 pwrstate);
int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv,
int max_size, u8 *hwinfo, int *params);
void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen);
-void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
+void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer,
u32 size);
-void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size);
+void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size);
void rtl_efuse_ops_init(struct ieee80211_hw *hw);
#endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
index 96ce05bcf0b3..11709b6c83f1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
@@ -378,13 +378,13 @@ static void _rtl_pci_io_handler_init(struct device *dev,
rtlpriv->io.dev = dev;
- rtlpriv->io.write8_async = pci_write8_async;
- rtlpriv->io.write16_async = pci_write16_async;
- rtlpriv->io.write32_async = pci_write32_async;
+ rtlpriv->io.write8 = pci_write8_async;
+ rtlpriv->io.write16 = pci_write16_async;
+ rtlpriv->io.write32 = pci_write32_async;
- rtlpriv->io.read8_sync = pci_read8_sync;
- rtlpriv->io.read16_sync = pci_read16_sync;
- rtlpriv->io.read32_sync = pci_read32_sync;
+ rtlpriv->io.read8 = pci_read8_sync;
+ rtlpriv->io.read16 = pci_read16_sync;
+ rtlpriv->io.read32 = pci_read32_sync;
}
static bool _rtl_update_earlymode_info(struct ieee80211_hw *hw,
@@ -2374,7 +2374,6 @@ EXPORT_SYMBOL(rtl_pci_resume);
#endif /* CONFIG_PM_SLEEP */
const struct rtl_intf_ops rtl_pci_ops = {
- .read_efuse_byte = read_efuse_byte,
.adapter_start = rtl_pci_start,
.adapter_stop = rtl_pci_stop,
.check_buddy_priv = rtl_pci_check_buddy_priv,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c
index 50e139186a93..ed151754fc6e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c
@@ -350,7 +350,6 @@ void rtl92ce_tx_fill_desc(struct ieee80211_hw *hw,
struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
- bool defaultadapter = true;
__le32 *pdesc = (__le32 *)pdesc8;
u16 seq_number;
__le16 fc = hdr->frame_control;
@@ -503,9 +502,6 @@ void rtl92ce_tx_fill_desc(struct ieee80211_hw *hw,
if ((!ieee80211_is_data_qos(fc)) && ppsc->fwctrl_lps) {
set_tx_desc_hwseq_en(pdesc, 1);
set_tx_desc_pkt_id(pdesc, 8);
-
- if (!defaultadapter)
- set_tx_desc_qos(pdesc, 1);
}
set_tx_desc_more_frag(pdesc, (lastseg ? 0 : 1));
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/def.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/def.h
index 91e4427ab022..4757f93b84e4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/def.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/def.h
@@ -11,7 +11,7 @@
#define CHIP_VENDOR_UMC_B_CUT BIT(6)
#define IS_92C_1T2R(version) \
- (((version) & CHIP_92C) && ((version) & CHIP_92C_1T2R))
+ (((version) & CHIP_92C_1T2R) == CHIP_92C_1T2R)
#define IS_VENDOR_UMC(version) \
(((version) & CHIP_VENDOR_UMC) ? true : false)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
index 5ec0eb8773a5..4217c9a08d01 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
@@ -622,6 +622,9 @@ static void _rtl92cu_init_chipn_two_out_ep_priority(struct ieee80211_hw *hw,
u16 valuelow;
switch (queue_sel) {
+ default:
+ WARN_ON(1);
+ fallthrough;
case (TX_SELE_HQ | TX_SELE_LQ):
valuehi = QUEUE_HIGH;
valuelow = QUEUE_LOW;
@@ -634,9 +637,6 @@ static void _rtl92cu_init_chipn_two_out_ep_priority(struct ieee80211_hw *hw,
valuehi = QUEUE_HIGH;
valuelow = QUEUE_NORMAL;
break;
- default:
- WARN_ON(1);
- break;
}
if (!wmm_enable) {
beq = valuelow;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
index 4ff0d4118193..a76f2dc8a977 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
@@ -101,7 +101,8 @@ void rtl92c_read_chip_version(struct ieee80211_hw *hw)
rtlphy->rf_type = RF_1T1R;
rtl_dbg(rtlpriv, COMP_INIT, DBG_LOUD,
"Chip RF Type: %s\n",
- rtlphy->rf_type == RF_2T2R ? "RF_2T2R" : "RF_1T1R");
+ rtlphy->rf_type == RF_2T2R ? "RF_2T2R" :
+ rtlphy->rf_type == RF_1T2R ? "RF_1T2R" : "RF_1T1R");
if (get_rf_type(rtlphy) == RF_1T1R)
rtlpriv->dm.rfpath_rxenable[0] = true;
else
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
index 20b4aac69642..48be7e346efc 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
@@ -40,7 +40,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
rtlpriv->dm.thermalvalue = 0;
/* for firmware buf */
- rtlpriv->rtlhal.pfirmware = vzalloc(0x4000);
+ rtlpriv->rtlhal.pfirmware = kmalloc(0x4000, GFP_KERNEL);
if (!rtlpriv->rtlhal.pfirmware) {
pr_err("Can't alloc buffer for fw\n");
return 1;
@@ -61,7 +61,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
fw_name, rtlpriv->io.dev,
GFP_KERNEL, hw, rtl_fw_cb);
if (err) {
- vfree(rtlpriv->rtlhal.pfirmware);
+ kfree(rtlpriv->rtlhal.pfirmware);
rtlpriv->rtlhal.pfirmware = NULL;
}
return err;
@@ -72,7 +72,7 @@ static void rtl92cu_deinit_sw_vars(struct ieee80211_hw *hw)
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (rtlpriv->rtlhal.pfirmware) {
- vfree(rtlpriv->rtlhal.pfirmware);
+ kfree(rtlpriv->rtlhal.pfirmware);
rtlpriv->rtlhal.pfirmware = NULL;
}
}
@@ -145,7 +145,6 @@ MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)");
static struct rtl_hal_usbint_cfg rtl92cu_interface_cfg = {
/* rx */
- .in_ep_num = RTL92C_USB_BULK_IN_NUM,
.rx_urb_num = RTL92C_NUM_RX_URBS,
.rx_max_size = RTL92C_SIZE_MAX_RX_BUFFER,
.usb_rx_hdl = rtl8192cu_rx_hdl,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
index 2f44c8aa6066..aa702ba7c9f5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
@@ -79,68 +79,75 @@ static int configvernoutep(struct ieee80211_hw *hw)
static void twooutepmapping(struct ieee80211_hw *hw, bool is_chip8,
bool bwificfg, struct rtl_ep_map *ep_map)
{
+ struct rtl_usb_priv *usb_priv = rtl_usbpriv(hw);
+ struct rtl_usb *rtlusb = rtl_usbdev(usb_priv);
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (bwificfg) { /* for WMM */
rtl_dbg(rtlpriv, COMP_INIT, DBG_DMESG,
"USB Chip-B & WMM Setting.....\n");
- ep_map->ep_mapping[RTL_TXQ_BE] = 2;
- ep_map->ep_mapping[RTL_TXQ_BK] = 3;
- ep_map->ep_mapping[RTL_TXQ_VI] = 3;
- ep_map->ep_mapping[RTL_TXQ_VO] = 2;
- ep_map->ep_mapping[RTL_TXQ_MGT] = 2;
- ep_map->ep_mapping[RTL_TXQ_BCN] = 2;
- ep_map->ep_mapping[RTL_TXQ_HI] = 2;
+ ep_map->ep_mapping[RTL_TXQ_BE] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_BK] = rtlusb->out_eps[1];
+ ep_map->ep_mapping[RTL_TXQ_VI] = rtlusb->out_eps[1];
+ ep_map->ep_mapping[RTL_TXQ_VO] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_MGT] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_BCN] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_HI] = rtlusb->out_eps[0];
} else { /* typical setting */
rtl_dbg(rtlpriv, COMP_INIT, DBG_DMESG,
"USB typical Setting.....\n");
- ep_map->ep_mapping[RTL_TXQ_BE] = 3;
- ep_map->ep_mapping[RTL_TXQ_BK] = 3;
- ep_map->ep_mapping[RTL_TXQ_VI] = 2;
- ep_map->ep_mapping[RTL_TXQ_VO] = 2;
- ep_map->ep_mapping[RTL_TXQ_MGT] = 2;
- ep_map->ep_mapping[RTL_TXQ_BCN] = 2;
- ep_map->ep_mapping[RTL_TXQ_HI] = 2;
+ ep_map->ep_mapping[RTL_TXQ_BE] = rtlusb->out_eps[1];
+ ep_map->ep_mapping[RTL_TXQ_BK] = rtlusb->out_eps[1];
+ ep_map->ep_mapping[RTL_TXQ_VI] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_VO] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_MGT] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_BCN] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_HI] = rtlusb->out_eps[0];
}
}
static void threeoutepmapping(struct ieee80211_hw *hw, bool bwificfg,
struct rtl_ep_map *ep_map)
{
+ struct rtl_usb_priv *usb_priv = rtl_usbpriv(hw);
+ struct rtl_usb *rtlusb = rtl_usbdev(usb_priv);
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (bwificfg) { /* for WMM */
rtl_dbg(rtlpriv, COMP_INIT, DBG_DMESG,
"USB 3EP Setting for WMM.....\n");
- ep_map->ep_mapping[RTL_TXQ_BE] = 5;
- ep_map->ep_mapping[RTL_TXQ_BK] = 3;
- ep_map->ep_mapping[RTL_TXQ_VI] = 3;
- ep_map->ep_mapping[RTL_TXQ_VO] = 2;
- ep_map->ep_mapping[RTL_TXQ_MGT] = 2;
- ep_map->ep_mapping[RTL_TXQ_BCN] = 2;
- ep_map->ep_mapping[RTL_TXQ_HI] = 2;
+ ep_map->ep_mapping[RTL_TXQ_BE] = rtlusb->out_eps[2];
+ ep_map->ep_mapping[RTL_TXQ_BK] = rtlusb->out_eps[1];
+ ep_map->ep_mapping[RTL_TXQ_VI] = rtlusb->out_eps[1];
+ ep_map->ep_mapping[RTL_TXQ_VO] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_MGT] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_BCN] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_HI] = rtlusb->out_eps[0];
} else { /* typical setting */
rtl_dbg(rtlpriv, COMP_INIT, DBG_DMESG,
"USB 3EP Setting for typical.....\n");
- ep_map->ep_mapping[RTL_TXQ_BE] = 5;
- ep_map->ep_mapping[RTL_TXQ_BK] = 5;
- ep_map->ep_mapping[RTL_TXQ_VI] = 3;
- ep_map->ep_mapping[RTL_TXQ_VO] = 2;
- ep_map->ep_mapping[RTL_TXQ_MGT] = 2;
- ep_map->ep_mapping[RTL_TXQ_BCN] = 2;
- ep_map->ep_mapping[RTL_TXQ_HI] = 2;
+ ep_map->ep_mapping[RTL_TXQ_BE] = rtlusb->out_eps[2];
+ ep_map->ep_mapping[RTL_TXQ_BK] = rtlusb->out_eps[2];
+ ep_map->ep_mapping[RTL_TXQ_VI] = rtlusb->out_eps[1];
+ ep_map->ep_mapping[RTL_TXQ_VO] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_MGT] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_BCN] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_HI] = rtlusb->out_eps[0];
}
}
static void oneoutepmapping(struct ieee80211_hw *hw, struct rtl_ep_map *ep_map)
{
- ep_map->ep_mapping[RTL_TXQ_BE] = 2;
- ep_map->ep_mapping[RTL_TXQ_BK] = 2;
- ep_map->ep_mapping[RTL_TXQ_VI] = 2;
- ep_map->ep_mapping[RTL_TXQ_VO] = 2;
- ep_map->ep_mapping[RTL_TXQ_MGT] = 2;
- ep_map->ep_mapping[RTL_TXQ_BCN] = 2;
- ep_map->ep_mapping[RTL_TXQ_HI] = 2;
+ struct rtl_usb_priv *usb_priv = rtl_usbpriv(hw);
+ struct rtl_usb *rtlusb = rtl_usbdev(usb_priv);
+
+ ep_map->ep_mapping[RTL_TXQ_BE] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_BK] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_VI] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_VO] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_MGT] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_BCN] = rtlusb->out_eps[0];
+ ep_map->ep_mapping[RTL_TXQ_HI] = rtlusb->out_eps[0];
}
static int _out_ep_mapping(struct ieee80211_hw *hw)
@@ -475,9 +482,9 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
- bool defaultadapter = true;
- u8 *qc = ieee80211_get_qos_ctl(hdr);
- u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
+ struct rtl_sta_info *sta_entry;
+ u8 agg_state = RTL_AGG_STOP;
+ u8 ampdu_density = 0;
u16 seq_number;
__le16 fc = hdr->frame_control;
u8 rate_flag = info->control.rates[0].flags;
@@ -486,6 +493,7 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
skb_get_queue_mapping(skb));
u8 *txdesc8;
__le32 *txdesc;
+ u8 tid;
seq_number = (le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ) >> 4;
rtl_get_tcb_desc(hw, info, sta, skb, tcb_desc);
@@ -499,10 +507,21 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
set_tx_desc_tx_rate(txdesc, tcb_desc->hw_rate);
if (tcb_desc->use_shortgi || tcb_desc->use_shortpreamble)
set_tx_desc_data_shortgi(txdesc, 1);
- if (mac->tids[tid].agg.agg_state == RTL_AGG_ON &&
- info->flags & IEEE80211_TX_CTL_AMPDU) {
+
+ if (sta) {
+ sta_entry = (struct rtl_sta_info *)sta->drv_priv;
+ tid = ieee80211_get_tid(hdr);
+ agg_state = sta_entry->tids[tid].agg.agg_state;
+ ampdu_density = sta->deflink.ht_cap.ampdu_density;
+ }
+
+ if (agg_state == RTL_AGG_OPERATIONAL &&
+ info->flags & IEEE80211_TX_CTL_AMPDU) {
set_tx_desc_agg_enable(txdesc, 1);
set_tx_desc_max_agg_num(txdesc, 0x14);
+ set_tx_desc_ampdu_density(txdesc, ampdu_density);
+ tcb_desc->rts_enable = 1;
+ tcb_desc->rts_rate = DESC_RATE24M;
} else {
set_tx_desc_agg_break(txdesc, 1);
}
@@ -537,14 +556,6 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
set_tx_desc_data_bw(txdesc, 0);
set_tx_desc_data_sc(txdesc, 0);
}
- rcu_read_lock();
- sta = ieee80211_find_sta(mac->vif, mac->bssid);
- if (sta) {
- u8 ampdu_density = sta->deflink.ht_cap.ampdu_density;
-
- set_tx_desc_ampdu_density(txdesc, ampdu_density);
- }
- rcu_read_unlock();
if (info->control.hw_key) {
struct ieee80211_key_conf *keyconf = info->control.hw_key;
@@ -587,8 +598,6 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
ppsc->fwctrl_lps) {
set_tx_desc_hwseq_en(txdesc, 1);
set_tx_desc_pkt_id(txdesc, 8);
- if (!defaultadapter)
- set_tx_desc_qos(txdesc, 1);
}
if (ieee80211_has_morefrags(fc))
set_tx_desc_more_frag(txdesc, 1);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.h
index 5f81cab205cc..09e61dc0f317 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.h
@@ -4,15 +4,12 @@
#ifndef __RTL92CU_TRX_H__
#define __RTL92CU_TRX_H__
-#define RTL92C_USB_BULK_IN_NUM 1
#define RTL92C_NUM_RX_URBS 8
#define RTL92C_NUM_TX_URBS 32
#define RTL92C_SIZE_MAX_RX_BUFFER 15360 /* 8192 */
#define RX_DRV_INFO_SIZE_UNIT 8
-#define RTL_AGG_ON 1
-
enum usb_rx_agg_mode {
USB_RX_AGG_DISABLE,
USB_RX_AGG_DMA,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
index 743ac6871bf4..4ba42f6be3f2 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
@@ -1669,10 +1669,8 @@ static void _rtl92de_efuse_update_chip_version(struct ieee80211_hw *hw)
u8 cutvalue[2];
u16 chipvalue;
- rtlpriv->intf_ops->read_efuse_byte(hw, EEPROME_CHIP_VERSION_H,
- &cutvalue[1]);
- rtlpriv->intf_ops->read_efuse_byte(hw, EEPROME_CHIP_VERSION_L,
- &cutvalue[0]);
+ read_efuse_byte(hw, EEPROME_CHIP_VERSION_H, &cutvalue[1]);
+ read_efuse_byte(hw, EEPROME_CHIP_VERSION_L, &cutvalue[0]);
chipvalue = (cutvalue[1] << 8) | cutvalue[0];
switch (chipvalue) {
case 0xAA55:
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c
index 02ac69c08ed3..192982ec8152 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c
@@ -42,6 +42,7 @@ static void _rtl92de_query_rxphystatus(struct ieee80211_hw *hw,
bool packet_beacon)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
+ struct rtl_phy *rtlphy = &(rtlpriv->phy);
struct rtl_ps_ctl *ppsc = rtl_psc(rtlpriv);
struct phy_sts_cck_8192d *cck_buf;
s8 rx_pwr_all, rx_pwr[4];
@@ -62,9 +63,7 @@ static void _rtl92de_query_rxphystatus(struct ieee80211_hw *hw,
u8 report, cck_highpwr;
cck_buf = (struct phy_sts_cck_8192d *)p_drvinfo;
if (ppsc->rfpwr_state == ERFON)
- cck_highpwr = (u8) rtl_get_bbreg(hw,
- RFPGA0_XA_HSSIPARAMETER2,
- BIT(9));
+ cck_highpwr = rtlphy->cck_high_power;
else
cck_highpwr = false;
if (!cck_highpwr) {
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c
index d9823ddab7be..65bfc14702f4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c
@@ -349,7 +349,6 @@ void rtl8723e_tx_fill_desc(struct ieee80211_hw *hw,
struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
- bool b_defaultadapter = true;
/* bool b_trigger_ac = false; */
u8 *pdesc8 = (u8 *)pdesc_tx;
__le32 *pdesc = (__le32 *)pdesc8;
@@ -503,10 +502,7 @@ void rtl8723e_tx_fill_desc(struct ieee80211_hw *hw,
set_tx_desc_hwseq_en_8723(pdesc, 1);
/* set_tx_desc_hwseq_en(pdesc, 1); */
/* set_tx_desc_pkt_id(pdesc, 8); */
-
- if (!b_defaultadapter)
- set_tx_desc_hwseq_sel_8723(pdesc, 1);
- /* set_tx_desc_qos(pdesc, 1); */
+ /* set_tx_desc_qos(pdesc, 1); */
}
set_tx_desc_more_frag(pdesc, (lastseg ? 0 : 1));
diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c
index 30bf2775a335..6e8c87a2fae4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/usb.c
+++ b/drivers/net/wireless/realtek/rtlwifi/usb.c
@@ -23,86 +23,23 @@ MODULE_DESCRIPTION("USB basic driver for rtlwifi");
#define MAX_USBCTRL_VENDORREQ_TIMES 10
-static void usbctrl_async_callback(struct urb *urb)
-{
- if (urb) {
- /* free dr */
- kfree(urb->setup_packet);
- /* free databuf */
- kfree(urb->transfer_buffer);
- }
-}
-
-static int _usbctrl_vendorreq_async_write(struct usb_device *udev, u8 request,
- u16 value, u16 index, void *pdata,
- u16 len)
-{
- int rc;
- unsigned int pipe;
- u8 reqtype;
- struct usb_ctrlrequest *dr;
- struct urb *urb;
- const u16 databuf_maxlen = REALTEK_USB_VENQT_MAX_BUF_SIZE;
- u8 *databuf;
-
- if (WARN_ON_ONCE(len > databuf_maxlen))
- len = databuf_maxlen;
-
- pipe = usb_sndctrlpipe(udev, 0); /* write_out */
- reqtype = REALTEK_USB_VENQT_WRITE;
-
- dr = kzalloc(sizeof(*dr), GFP_ATOMIC);
- if (!dr)
- return -ENOMEM;
-
- databuf = kzalloc(databuf_maxlen, GFP_ATOMIC);
- if (!databuf) {
- kfree(dr);
- return -ENOMEM;
- }
-
- urb = usb_alloc_urb(0, GFP_ATOMIC);
- if (!urb) {
- kfree(databuf);
- kfree(dr);
- return -ENOMEM;
- }
-
- dr->bRequestType = reqtype;
- dr->bRequest = request;
- dr->wValue = cpu_to_le16(value);
- dr->wIndex = cpu_to_le16(index);
- dr->wLength = cpu_to_le16(len);
- /* data are already in little-endian order */
- memcpy(databuf, pdata, len);
- usb_fill_control_urb(urb, udev, pipe,
- (unsigned char *)dr, databuf, len,
- usbctrl_async_callback, NULL);
- rc = usb_submit_urb(urb, GFP_ATOMIC);
- if (rc < 0) {
- kfree(databuf);
- kfree(dr);
- }
- usb_free_urb(urb);
- return rc;
-}
-
-static int _usbctrl_vendorreq_sync_read(struct usb_device *udev, u8 request,
- u16 value, u16 index, void *pdata,
- u16 len)
+static void _usbctrl_vendorreq_sync(struct usb_device *udev, u8 reqtype,
+ u16 value, void *pdata, u16 len)
{
unsigned int pipe;
int status;
- u8 reqtype;
int vendorreq_times = 0;
static int count;
- pipe = usb_rcvctrlpipe(udev, 0); /* read_in */
- reqtype = REALTEK_USB_VENQT_READ;
+ if (reqtype == REALTEK_USB_VENQT_READ)
+ pipe = usb_rcvctrlpipe(udev, 0); /* read_in */
+ else
+ pipe = usb_sndctrlpipe(udev, 0); /* write_out */
do {
- status = usb_control_msg(udev, pipe, request, reqtype, value,
- index, pdata, len, 1000);
+ status = usb_control_msg(udev, pipe, REALTEK_USB_VENQT_CMD_REQ,
+ reqtype, value, REALTEK_USB_VENQT_CMD_IDX,
+ pdata, len, 1000);
if (status < 0) {
/* firmware download is checksumed, don't retry */
if ((value >= FW_8192C_START_ADDRESS &&
@@ -114,18 +51,15 @@ static int _usbctrl_vendorreq_sync_read(struct usb_device *udev, u8 request,
} while (++vendorreq_times < MAX_USBCTRL_VENDORREQ_TIMES);
if (status < 0 && count++ < 4)
- pr_err("reg 0x%x, usbctrl_vendorreq TimeOut! status:0x%x value=0x%x\n",
- value, status, *(u32 *)pdata);
- return status;
+ dev_err(&udev->dev, "reg 0x%x, usbctrl_vendorreq TimeOut! status:0x%x value=0x%x reqtype=0x%x\n",
+ value, status, *(u32 *)pdata, reqtype);
}
static u32 _usb_read_sync(struct rtl_priv *rtlpriv, u32 addr, u16 len)
{
struct device *dev = rtlpriv->io.dev;
struct usb_device *udev = to_usb_device(dev);
- u8 request;
u16 wvalue;
- u16 index;
__le32 *data;
unsigned long flags;
@@ -134,14 +68,33 @@ static u32 _usb_read_sync(struct rtl_priv *rtlpriv, u32 addr, u16 len)
rtlpriv->usb_data_index = 0;
data = &rtlpriv->usb_data[rtlpriv->usb_data_index];
spin_unlock_irqrestore(&rtlpriv->locks.usb_lock, flags);
- request = REALTEK_USB_VENQT_CMD_REQ;
- index = REALTEK_USB_VENQT_CMD_IDX; /* n/a */
wvalue = (u16)addr;
- _usbctrl_vendorreq_sync_read(udev, request, wvalue, index, data, len);
+ _usbctrl_vendorreq_sync(udev, REALTEK_USB_VENQT_READ, wvalue, data, len);
return le32_to_cpu(*data);
}
+
+static void _usb_write_sync(struct rtl_priv *rtlpriv, u32 addr, u32 val, u16 len)
+{
+ struct device *dev = rtlpriv->io.dev;
+ struct usb_device *udev = to_usb_device(dev);
+ unsigned long flags;
+ __le32 *data;
+ u16 wvalue;
+
+ spin_lock_irqsave(&rtlpriv->locks.usb_lock, flags);
+ if (++rtlpriv->usb_data_index >= RTL_USB_MAX_RX_COUNT)
+ rtlpriv->usb_data_index = 0;
+ data = &rtlpriv->usb_data[rtlpriv->usb_data_index];
+ spin_unlock_irqrestore(&rtlpriv->locks.usb_lock, flags);
+
+ wvalue = (u16)(addr & 0x0000ffff);
+ *data = cpu_to_le32(val);
+
+ _usbctrl_vendorreq_sync(udev, REALTEK_USB_VENQT_WRITE, wvalue, data, len);
+}
+
static u8 _usb_read8_sync(struct rtl_priv *rtlpriv, u32 addr)
{
return (u8)_usb_read_sync(rtlpriv, addr, 1);
@@ -157,45 +110,27 @@ static u32 _usb_read32_sync(struct rtl_priv *rtlpriv, u32 addr)
return _usb_read_sync(rtlpriv, addr, 4);
}
-static void _usb_write_async(struct usb_device *udev, u32 addr, u32 val,
- u16 len)
+static void _usb_write8_sync(struct rtl_priv *rtlpriv, u32 addr, u8 val)
{
- u8 request;
- u16 wvalue;
- u16 index;
- __le32 data;
- int ret;
-
- request = REALTEK_USB_VENQT_CMD_REQ;
- index = REALTEK_USB_VENQT_CMD_IDX; /* n/a */
- wvalue = (u16)(addr&0x0000ffff);
- data = cpu_to_le32(val);
-
- ret = _usbctrl_vendorreq_async_write(udev, request, wvalue,
- index, &data, len);
- if (ret < 0)
- dev_err(&udev->dev, "error %d writing at 0x%x\n", ret, addr);
+ _usb_write_sync(rtlpriv, addr, val, 1);
}
-static void _usb_write8_async(struct rtl_priv *rtlpriv, u32 addr, u8 val)
+static void _usb_write16_sync(struct rtl_priv *rtlpriv, u32 addr, u16 val)
{
- struct device *dev = rtlpriv->io.dev;
-
- _usb_write_async(to_usb_device(dev), addr, val, 1);
+ _usb_write_sync(rtlpriv, addr, val, 2);
}
-static void _usb_write16_async(struct rtl_priv *rtlpriv, u32 addr, u16 val)
+static void _usb_write32_sync(struct rtl_priv *rtlpriv, u32 addr, u32 val)
{
- struct device *dev = rtlpriv->io.dev;
-
- _usb_write_async(to_usb_device(dev), addr, val, 2);
+ _usb_write_sync(rtlpriv, addr, val, 4);
}
-static void _usb_write32_async(struct rtl_priv *rtlpriv, u32 addr, u32 val)
+static void _usb_write_chunk_sync(struct rtl_priv *rtlpriv, u32 addr,
+ u32 length, u8 *data)
{
- struct device *dev = rtlpriv->io.dev;
+ struct usb_device *udev = to_usb_device(rtlpriv->io.dev);
- _usb_write_async(to_usb_device(dev), addr, val, 4);
+ _usbctrl_vendorreq_sync(udev, REALTEK_USB_VENQT_WRITE, addr, data, length);
}
static void _rtl_usb_io_handler_init(struct device *dev,
@@ -205,12 +140,13 @@ static void _rtl_usb_io_handler_init(struct device *dev,
rtlpriv->io.dev = dev;
mutex_init(&rtlpriv->io.bb_mutex);
- rtlpriv->io.write8_async = _usb_write8_async;
- rtlpriv->io.write16_async = _usb_write16_async;
- rtlpriv->io.write32_async = _usb_write32_async;
- rtlpriv->io.read8_sync = _usb_read8_sync;
- rtlpriv->io.read16_sync = _usb_read16_sync;
- rtlpriv->io.read32_sync = _usb_read32_sync;
+ rtlpriv->io.write8 = _usb_write8_sync;
+ rtlpriv->io.write16 = _usb_write16_sync;
+ rtlpriv->io.write32 = _usb_write32_sync;
+ rtlpriv->io.write_chunk = _usb_write_chunk_sync;
+ rtlpriv->io.read8 = _usb_read8_sync;
+ rtlpriv->io.read16 = _usb_read16_sync;
+ rtlpriv->io.read32 = _usb_read32_sync;
}
static void _rtl_usb_io_handler_release(struct ieee80211_hw *hw)
@@ -280,7 +216,6 @@ static int _rtl_usb_init_rx(struct ieee80211_hw *hw)
rtlusb->rx_max_size = rtlpriv->cfg->usb_interface_cfg->rx_max_size;
rtlusb->rx_urb_num = rtlpriv->cfg->usb_interface_cfg->rx_urb_num;
- rtlusb->in_ep = rtlpriv->cfg->usb_interface_cfg->in_ep_num;
rtlusb->usb_rx_hdl = rtlpriv->cfg->usb_interface_cfg->usb_rx_hdl;
rtlusb->usb_rx_segregate_hdl =
rtlpriv->cfg->usb_interface_cfg->usb_rx_segregate_hdl;
@@ -312,20 +247,38 @@ static int _rtl_usb_init(struct ieee80211_hw *hw)
pep_desc = &usb_intf->cur_altsetting->endpoint[epidx].desc;
- if (usb_endpoint_dir_in(pep_desc))
+ if (usb_endpoint_dir_in(pep_desc)) {
+ if (usb_endpoint_xfer_bulk(pep_desc)) {
+ /* The vendor drivers assume there is only one
+ * bulk in ep and that it's the first in ep.
+ */
+ if (rtlusb->in_ep_nums == 0)
+ rtlusb->in_ep = usb_endpoint_num(pep_desc);
+ else
+ pr_warn("%s: bulk in endpoint is not the first in endpoint\n",
+ __func__);
+ }
+
rtlusb->in_ep_nums++;
- else if (usb_endpoint_dir_out(pep_desc))
+ } else if (usb_endpoint_dir_out(pep_desc)) {
+ if (rtlusb->out_ep_nums < RTL_USB_MAX_BULKOUT_NUM) {
+ if (usb_endpoint_xfer_bulk(pep_desc))
+ rtlusb->out_eps[rtlusb->out_ep_nums] =
+ usb_endpoint_num(pep_desc);
+ } else {
+ pr_warn("%s: found more bulk out endpoints than the expected %d\n",
+ __func__, RTL_USB_MAX_BULKOUT_NUM);
+ }
+
rtlusb->out_ep_nums++;
+ }
rtl_dbg(rtlpriv, COMP_INIT, DBG_DMESG,
"USB EP(0x%02x), MaxPacketSize=%d, Interval=%d\n",
pep_desc->bEndpointAddress, pep_desc->wMaxPacketSize,
pep_desc->bInterval);
}
- if (rtlusb->in_ep_nums < rtlpriv->cfg->usb_interface_cfg->in_ep_num) {
- pr_err("Too few input end points found\n");
- return -EINVAL;
- }
+
if (rtlusb->out_ep_nums == 0) {
pr_err("No output end points found\n");
return -EINVAL;
diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.h b/drivers/net/wireless/realtek/rtlwifi/usb.h
index 3bf85b23eec1..12529afc0510 100644
--- a/drivers/net/wireless/realtek/rtlwifi/usb.h
+++ b/drivers/net/wireless/realtek/rtlwifi/usb.h
@@ -19,6 +19,7 @@
#define RTL_USB_MAX_TXQ_NUM 4 /* max tx queue */
#define RTL_USB_MAX_EP_NUM 6 /* max ep number */
+#define RTL_USB_MAX_BULKOUT_NUM 4
#define RTL_USB_MAX_TX_URBS_NUM 8
enum rtl_txq {
@@ -94,6 +95,7 @@ struct rtl_usb {
/* Tx */
u8 out_ep_nums ;
+ u8 out_eps[RTL_USB_MAX_BULKOUT_NUM];
u8 out_queue_sel;
struct rtl_ep_map ep_map;
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index d87cd2252eac..9fabf597cfd6 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -1397,8 +1397,6 @@ struct rtl_phy {
#define RTL_AGG_PROGRESS 1
#define RTL_AGG_START 2
#define RTL_AGG_OPERATIONAL 3
-#define RTL_AGG_OFF 0
-#define RTL_AGG_ON 1
#define RTL_RX_AGG_START 1
#define RTL_RX_AGG_STOP 0
#define RTL_AGG_EMPTYING_HW_QUEUE_ADDBA 2
@@ -1447,13 +1445,15 @@ struct rtl_io {
/*PCI IO map */
unsigned long pci_base_addr; /*device I/O address */
- void (*write8_async)(struct rtl_priv *rtlpriv, u32 addr, u8 val);
- void (*write16_async)(struct rtl_priv *rtlpriv, u32 addr, u16 val);
- void (*write32_async)(struct rtl_priv *rtlpriv, u32 addr, u32 val);
+ void (*write8)(struct rtl_priv *rtlpriv, u32 addr, u8 val);
+ void (*write16)(struct rtl_priv *rtlpriv, u32 addr, u16 val);
+ void (*write32)(struct rtl_priv *rtlpriv, u32 addr, u32 val);
+ void (*write_chunk)(struct rtl_priv *rtlpriv, u32 addr, u32 length,
+ u8 *data);
- u8 (*read8_sync)(struct rtl_priv *rtlpriv, u32 addr);
- u16 (*read16_sync)(struct rtl_priv *rtlpriv, u32 addr);
- u32 (*read32_sync)(struct rtl_priv *rtlpriv, u32 addr);
+ u8 (*read8)(struct rtl_priv *rtlpriv, u32 addr);
+ u16 (*read16)(struct rtl_priv *rtlpriv, u32 addr);
+ u32 (*read32)(struct rtl_priv *rtlpriv, u32 addr);
};
@@ -1471,7 +1471,6 @@ struct rtl_mac {
enum nl80211_iftype opmode;
/*Probe Beacon management */
- struct rtl_tid_data tids[MAX_TID_COUNT];
enum rtl_link_state link_state;
int n_channels;
@@ -2290,7 +2289,6 @@ struct rtl_hal_ops {
struct rtl_intf_ops {
/*com */
- void (*read_efuse_byte)(struct ieee80211_hw *hw, u16 _offset, u8 *pbuf);
int (*adapter_start)(struct ieee80211_hw *hw);
void (*adapter_stop)(struct ieee80211_hw *hw);
bool (*check_buddy_priv)(struct ieee80211_hw *hw,
@@ -2354,7 +2352,6 @@ struct rtl_mod_params {
struct rtl_hal_usbint_cfg {
/* data - rx */
- u32 in_ep_num;
u32 rx_urb_num;
u32 rx_max_size;
@@ -2916,25 +2913,25 @@ extern u8 channel5g_80m[CHANNEL_MAX_NUMBER_5G_80M];
static inline u8 rtl_read_byte(struct rtl_priv *rtlpriv, u32 addr)
{
- return rtlpriv->io.read8_sync(rtlpriv, addr);
+ return rtlpriv->io.read8(rtlpriv, addr);
}
static inline u16 rtl_read_word(struct rtl_priv *rtlpriv, u32 addr)
{
- return rtlpriv->io.read16_sync(rtlpriv, addr);
+ return rtlpriv->io.read16(rtlpriv, addr);
}
static inline u32 rtl_read_dword(struct rtl_priv *rtlpriv, u32 addr)
{
- return rtlpriv->io.read32_sync(rtlpriv, addr);
+ return rtlpriv->io.read32(rtlpriv, addr);
}
static inline void rtl_write_byte(struct rtl_priv *rtlpriv, u32 addr, u8 val8)
{
- rtlpriv->io.write8_async(rtlpriv, addr, val8);
+ rtlpriv->io.write8(rtlpriv, addr, val8);
if (rtlpriv->cfg->write_readback)
- rtlpriv->io.read8_sync(rtlpriv, addr);
+ rtlpriv->io.read8(rtlpriv, addr);
}
static inline void rtl_write_byte_with_val32(struct ieee80211_hw *hw,
@@ -2947,19 +2944,25 @@ static inline void rtl_write_byte_with_val32(struct ieee80211_hw *hw,
static inline void rtl_write_word(struct rtl_priv *rtlpriv, u32 addr, u16 val16)
{
- rtlpriv->io.write16_async(rtlpriv, addr, val16);
+ rtlpriv->io.write16(rtlpriv, addr, val16);
if (rtlpriv->cfg->write_readback)
- rtlpriv->io.read16_sync(rtlpriv, addr);
+ rtlpriv->io.read16(rtlpriv, addr);
}
static inline void rtl_write_dword(struct rtl_priv *rtlpriv,
u32 addr, u32 val32)
{
- rtlpriv->io.write32_async(rtlpriv, addr, val32);
+ rtlpriv->io.write32(rtlpriv, addr, val32);
if (rtlpriv->cfg->write_readback)
- rtlpriv->io.read32_sync(rtlpriv, addr);
+ rtlpriv->io.read32(rtlpriv, addr);
+}
+
+static inline void rtl_write_chunk(struct rtl_priv *rtlpriv,
+ u32 addr, u32 length, u8 *data)
+{
+ rtlpriv->io.write_chunk(rtlpriv, addr, length, data);
}
static inline u32 rtl_get_bbreg(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/realtek/rtw88/debug.c b/drivers/net/wireless/realtek/rtw88/debug.c
index 1b2ad81838be..5b2036798159 100644
--- a/drivers/net/wireless/realtek/rtw88/debug.c
+++ b/drivers/net/wireless/realtek/rtw88/debug.c
@@ -316,23 +316,13 @@ static ssize_t rtw_debugfs_set_single_input(struct file *filp,
{
struct seq_file *seqpriv = (struct seq_file *)filp->private_data;
struct rtw_debugfs_priv *debugfs_priv = seqpriv->private;
- struct rtw_dev *rtwdev = debugfs_priv->rtwdev;
- char tmp[32 + 1];
u32 input;
- int num;
int ret;
- ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+ ret = kstrtou32_from_user(buffer, count, 0, &input);
if (ret)
return ret;
- num = kstrtoint(tmp, 0, &input);
-
- if (num) {
- rtw_warn(rtwdev, "kstrtoint failed\n");
- return num;
- }
-
debugfs_priv->cb_data = input;
return count;
@@ -485,19 +475,12 @@ static ssize_t rtw_debugfs_set_fix_rate(struct file *filp,
struct rtw_dev *rtwdev = debugfs_priv->rtwdev;
struct rtw_dm_info *dm_info = &rtwdev->dm_info;
u8 fix_rate;
- char tmp[32 + 1];
int ret;
- ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+ ret = kstrtou8_from_user(buffer, count, 0, &fix_rate);
if (ret)
return ret;
- ret = kstrtou8(tmp, 0, &fix_rate);
- if (ret) {
- rtw_warn(rtwdev, "invalid args, [rate]\n");
- return ret;
- }
-
dm_info->fix_rate = fix_rate;
return count;
@@ -879,20 +862,13 @@ static ssize_t rtw_debugfs_set_coex_enable(struct file *filp,
struct rtw_debugfs_priv *debugfs_priv = seqpriv->private;
struct rtw_dev *rtwdev = debugfs_priv->rtwdev;
struct rtw_coex *coex = &rtwdev->coex;
- char tmp[32 + 1];
bool enable;
int ret;
- ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+ ret = kstrtobool_from_user(buffer, count, &enable);
if (ret)
return ret;
- ret = kstrtobool(tmp, &enable);
- if (ret) {
- rtw_warn(rtwdev, "invalid arguments\n");
- return ret;
- }
-
mutex_lock(&rtwdev->mutex);
coex->manual_control = !enable;
mutex_unlock(&rtwdev->mutex);
@@ -951,18 +927,13 @@ static ssize_t rtw_debugfs_set_fw_crash(struct file *filp,
struct seq_file *seqpriv = (struct seq_file *)filp->private_data;
struct rtw_debugfs_priv *debugfs_priv = seqpriv->private;
struct rtw_dev *rtwdev = debugfs_priv->rtwdev;
- char tmp[32 + 1];
bool input;
int ret;
- ret = rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+ ret = kstrtobool_from_user(buffer, count, &input);
if (ret)
return ret;
- ret = kstrtobool(tmp, &input);
- if (ret)
- return -EINVAL;
-
if (!input)
return -EINVAL;
@@ -1030,11 +1001,12 @@ static ssize_t rtw_debugfs_set_dm_cap(struct file *filp,
struct rtw_debugfs_priv *debugfs_priv = seqpriv->private;
struct rtw_dev *rtwdev = debugfs_priv->rtwdev;
struct rtw_dm_info *dm_info = &rtwdev->dm_info;
- int bit;
+ int ret, bit;
bool en;
- if (kstrtoint_from_user(buffer, count, 10, &bit))
- return -EINVAL;
+ ret = kstrtoint_from_user(buffer, count, 10, &bit);
+ if (ret)
+ return ret;
en = bit > 0;
bit = abs(bit);
diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index 298663b03580..0c1c1ff31085 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -309,6 +309,13 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on)
pwr_seq = pwr_on ? chip->pwr_on_seq : chip->pwr_off_seq;
ret = rtw_pwr_seq_parser(rtwdev, pwr_seq);
+ if (pwr_on && rtw_hci_type(rtwdev) == RTW_HCI_TYPE_USB) {
+ if (chip->id == RTW_CHIP_TYPE_8822C ||
+ chip->id == RTW_CHIP_TYPE_8822B ||
+ chip->id == RTW_CHIP_TYPE_8821C)
+ rtw_write8_clr(rtwdev, REG_SYS_STATUS1 + 1, BIT(0));
+ }
+
if (rtw_hci_type(rtwdev) == RTW_HCI_TYPE_SDIO)
rtw_write32(rtwdev, REG_SDIO_HIMR, imr);
diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c
index d8d68f16014e..7af5bf7fe5b6 100644
--- a/drivers/net/wireless/realtek/rtw88/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw88/mac80211.c
@@ -927,6 +927,10 @@ static void rtw_ops_sta_rc_update(struct ieee80211_hw *hw,
}
const struct ieee80211_ops rtw_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rtw_ops_tx,
.wake_tx_queue = rtw_ops_wake_tx_queue,
.start = rtw_ops_start,
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index 6d22628129d0..ffba6b88f392 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -2032,8 +2032,6 @@ static int rtw_chip_board_info_setup(struct rtw_dev *rtwdev)
rtw_phy_setup_phy_cond(rtwdev, hal->pkg_type);
rtw_phy_init_tx_power(rtwdev);
- if (rfe_def->agc_btg_tbl)
- rtw_load_table(rtwdev, rfe_def->agc_btg_tbl);
rtw_load_table(rtwdev, rfe_def->phy_pg_tbl);
rtw_load_table(rtwdev, rfe_def->txpwr_lmt_tbl);
rtw_phy_tx_power_by_rate_config(hal);
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index 2bfc0e822b8d..9986a4cb37eb 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -1450,6 +1450,7 @@ static void rtw_pci_phy_cfg(struct rtw_dev *rtwdev)
{
struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
const struct rtw_chip_info *chip = rtwdev->chip;
+ struct rtw_efuse *efuse = &rtwdev->efuse;
struct pci_dev *pdev = rtwpci->pdev;
const struct rtw_intf_phy_para *para;
u16 cut;
@@ -1498,6 +1499,9 @@ static void rtw_pci_phy_cfg(struct rtw_dev *rtwdev)
rtw_err(rtwdev, "failed to set PCI cap, ret = %d\n",
ret);
}
+
+ if (chip->id == RTW_CHIP_TYPE_8822C && efuse->rfe_option == 5)
+ rtw_write32_mask(rtwdev, REG_ANAPARSW_MAC_0, BIT_CF_L_V2, 0x1);
}
static int __maybe_unused rtw_pci_suspend(struct device *dev)
diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index 128e75a81bf3..37ef80c9091d 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -1761,12 +1761,15 @@ static void rtw_load_rfk_table(struct rtw_dev *rtwdev)
void rtw_phy_load_tables(struct rtw_dev *rtwdev)
{
+ const struct rtw_rfe_def *rfe_def = rtw_get_rfe_def(rtwdev);
const struct rtw_chip_info *chip = rtwdev->chip;
u8 rf_path;
rtw_load_table(rtwdev, chip->mac_tbl);
rtw_load_table(rtwdev, chip->bb_tbl);
rtw_load_table(rtwdev, chip->agc_tbl);
+ if (rfe_def->agc_btg_tbl)
+ rtw_load_table(rtwdev, rfe_def->agc_btg_tbl);
rtw_load_rfk_table(rtwdev);
for (rf_path = 0; rf_path < rtwdev->hal.rf_path_num; rf_path++) {
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index 1634f03784f1..b122f226924b 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -557,6 +557,9 @@
#define REG_RFE_INV16 0x0cbe
#define BIT_RFE_BUF_EN BIT(3)
+#define REG_ANAPARSW_MAC_0 0x1010
+#define BIT_CF_L_V2 GENMASK(29, 28)
+
#define REG_ANAPAR_XTAL_0 0x1040
#define BIT_XCAP_0 GENMASK(23, 10)
#define REG_CPU_DMEM_CON 0x1080
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
index 429bb420b056..fe5d8e188350 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
@@ -773,9 +773,9 @@ static void rtw8821c_false_alarm_statistics(struct rtw_dev *rtwdev)
dm_info->cck_fa_cnt = cck_fa_cnt;
dm_info->ofdm_fa_cnt = ofdm_fa_cnt;
+ dm_info->total_fa_cnt = ofdm_fa_cnt;
if (cck_enable)
dm_info->total_fa_cnt += cck_fa_cnt;
- dm_info->total_fa_cnt = ofdm_fa_cnt;
crc32_cnt = rtw_read32(rtwdev, REG_CRC_CCK);
dm_info->cck_ok_cnt = FIELD_GET(GENMASK(15, 0), crc32_cnt);
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821cu.c b/drivers/net/wireless/realtek/rtw88/rtw8821cu.c
index 7a5cbdc31ef7..e2c7d9f87683 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8821cu.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8821cu.c
@@ -9,24 +9,36 @@
#include "usb.h"
static const struct usb_device_id rtw_8821cu_id_table[] = {
- { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xb82b, 0xff, 0xff, 0xff),
- .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* 8821CU */
+ { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x2006, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
+ { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x8731, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
+ { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x8811, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
{ USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xb820, 0xff, 0xff, 0xff),
- .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* 8821CU */
- { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc821, 0xff, 0xff, 0xff),
- .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* 8821CU */
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
+ { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xb82b, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
+ { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc80c, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
+ { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc811, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
{ USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc820, 0xff, 0xff, 0xff),
- .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* 8821CU */
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
+ { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc821, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
{ USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc82a, 0xff, 0xff, 0xff),
- .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* 8821CU */
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
{ USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc82b, 0xff, 0xff, 0xff),
- .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* 8821CU */
- { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc811, 0xff, 0xff, 0xff),
- .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* 8811CU */
- { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x8811, 0xff, 0xff, 0xff),
- .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* 8811CU */
- { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x2006, 0xff, 0xff, 0xff),
- .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* TOTOLINK A650UA v3 */
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
+ { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc82c, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x331d, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* D-Link */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x7392, 0xc811, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* Edimax */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x7392, 0xd811, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* Edimax */
{},
};
MODULE_DEVICE_TABLE(usb, rtw_8821cu_id_table);
diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
index e6ab1ac6d709..a0188511099a 100644
--- a/drivers/net/wireless/realtek/rtw88/usb.c
+++ b/drivers/net/wireless/realtek/rtw88/usb.c
@@ -33,6 +33,36 @@ static void rtw_usb_fill_tx_checksum(struct rtw_usb *rtwusb,
rtw_tx_fill_txdesc_checksum(rtwdev, &pkt_info, skb->data);
}
+static void rtw_usb_reg_sec(struct rtw_dev *rtwdev, u32 addr, __le32 *data)
+{
+ struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev);
+ struct usb_device *udev = rtwusb->udev;
+ bool reg_on_section = false;
+ u16 t_reg = 0x4e0;
+ u8 t_len = 1;
+ int status;
+
+ /* There are three sections:
+ * 1. on (0x00~0xFF; 0x1000~0x10FF): this section is always powered on
+ * 2. off (< 0xFE00, excluding "on" section): this section could be
+ * powered off
+ * 3. local (>= 0xFE00): usb specific registers section
+ */
+ if (addr <= 0xff || (addr >= 0x1000 && addr <= 0x10ff))
+ reg_on_section = true;
+
+ if (!reg_on_section)
+ return;
+
+ status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
+ RTW_USB_CMD_REQ, RTW_USB_CMD_WRITE,
+ t_reg, 0, data, t_len, 500);
+
+ if (status != t_len && status != -ENODEV)
+ rtw_err(rtwdev, "%s: reg 0x%x, usb write %u fail, status: %d\n",
+ __func__, t_reg, t_len, status);
+}
+
static u32 rtw_usb_read(struct rtw_dev *rtwdev, u32 addr, u16 len)
{
struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev);
@@ -58,6 +88,11 @@ static u32 rtw_usb_read(struct rtw_dev *rtwdev, u32 addr, u16 len)
rtw_err(rtwdev, "read register 0x%x failed with %d\n",
addr, ret);
+ if (rtwdev->chip->id == RTW_CHIP_TYPE_8822C ||
+ rtwdev->chip->id == RTW_CHIP_TYPE_8822B ||
+ rtwdev->chip->id == RTW_CHIP_TYPE_8821C)
+ rtw_usb_reg_sec(rtwdev, addr, data);
+
return le32_to_cpu(*data);
}
@@ -102,6 +137,11 @@ static void rtw_usb_write(struct rtw_dev *rtwdev, u32 addr, u32 val, int len)
if (ret < 0 && ret != -ENODEV && count++ < 4)
rtw_err(rtwdev, "write register 0x%x failed with %d\n",
addr, ret);
+
+ if (rtwdev->chip->id == RTW_CHIP_TYPE_8822C ||
+ rtwdev->chip->id == RTW_CHIP_TYPE_8822B ||
+ rtwdev->chip->id == RTW_CHIP_TYPE_8821C)
+ rtw_usb_reg_sec(rtwdev, addr, data);
}
static void rtw_usb_write8(struct rtw_dev *rtwdev, u32 addr, u8 val)
diff --git a/drivers/net/wireless/realtek/rtw89/cam.c b/drivers/net/wireless/realtek/rtw89/cam.c
index 914c94988b2f..11fbdd142162 100644
--- a/drivers/net/wireless/realtek/rtw89/cam.c
+++ b/drivers/net/wireless/realtek/rtw89/cam.c
@@ -777,3 +777,64 @@ void rtw89_cam_fill_dctl_sec_cam_info_v1(struct rtw89_dev *rtwdev,
SET_DCTL_SEC_ENT5_V1(cmd, addr_cam->sec_ent[5]);
SET_DCTL_SEC_ENT6_V1(cmd, addr_cam->sec_ent[6]);
}
+
+void rtw89_cam_fill_dctl_sec_cam_info_v2(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta,
+ struct rtw89_h2c_dctlinfo_ud_v2 *h2c)
+{
+ struct rtw89_addr_cam_entry *addr_cam = rtw89_get_addr_cam_of(rtwvif, rtwsta);
+
+ h2c->c0 = le32_encode_bits(rtwsta ? rtwsta->mac_id : rtwvif->mac_id,
+ DCTLINFO_V2_C0_MACID) |
+ le32_encode_bits(1, DCTLINFO_V2_C0_OP);
+
+ h2c->w4 = le32_encode_bits(addr_cam->sec_ent_keyid[0],
+ DCTLINFO_V2_W4_SEC_ENT0_KEYID) |
+ le32_encode_bits(addr_cam->sec_ent_keyid[1],
+ DCTLINFO_V2_W4_SEC_ENT1_KEYID) |
+ le32_encode_bits(addr_cam->sec_ent_keyid[2],
+ DCTLINFO_V2_W4_SEC_ENT2_KEYID) |
+ le32_encode_bits(addr_cam->sec_ent_keyid[3],
+ DCTLINFO_V2_W4_SEC_ENT3_KEYID) |
+ le32_encode_bits(addr_cam->sec_ent_keyid[4],
+ DCTLINFO_V2_W4_SEC_ENT4_KEYID) |
+ le32_encode_bits(addr_cam->sec_ent_keyid[5],
+ DCTLINFO_V2_W4_SEC_ENT5_KEYID) |
+ le32_encode_bits(addr_cam->sec_ent_keyid[6],
+ DCTLINFO_V2_W4_SEC_ENT6_KEYID);
+ h2c->m4 = cpu_to_le32(DCTLINFO_V2_W4_SEC_ENT0_KEYID |
+ DCTLINFO_V2_W4_SEC_ENT1_KEYID |
+ DCTLINFO_V2_W4_SEC_ENT2_KEYID |
+ DCTLINFO_V2_W4_SEC_ENT3_KEYID |
+ DCTLINFO_V2_W4_SEC_ENT4_KEYID |
+ DCTLINFO_V2_W4_SEC_ENT5_KEYID |
+ DCTLINFO_V2_W4_SEC_ENT6_KEYID);
+
+ h2c->w5 = le32_encode_bits(addr_cam->sec_cam_map[0],
+ DCTLINFO_V2_W5_SEC_ENT_VALID_V1) |
+ le32_encode_bits(addr_cam->sec_ent[0],
+ DCTLINFO_V2_W5_SEC_ENT0_V1);
+ h2c->m5 = cpu_to_le32(DCTLINFO_V2_W5_SEC_ENT_VALID_V1 |
+ DCTLINFO_V2_W5_SEC_ENT0_V1);
+
+ h2c->w6 = le32_encode_bits(addr_cam->sec_ent[1],
+ DCTLINFO_V2_W6_SEC_ENT1_V1) |
+ le32_encode_bits(addr_cam->sec_ent[2],
+ DCTLINFO_V2_W6_SEC_ENT2_V1) |
+ le32_encode_bits(addr_cam->sec_ent[3],
+ DCTLINFO_V2_W6_SEC_ENT3_V1) |
+ le32_encode_bits(addr_cam->sec_ent[4],
+ DCTLINFO_V2_W6_SEC_ENT4_V1);
+ h2c->m6 = cpu_to_le32(DCTLINFO_V2_W6_SEC_ENT1_V1 |
+ DCTLINFO_V2_W6_SEC_ENT2_V1 |
+ DCTLINFO_V2_W6_SEC_ENT3_V1 |
+ DCTLINFO_V2_W6_SEC_ENT4_V1);
+
+ h2c->w7 = le32_encode_bits(addr_cam->sec_ent[5],
+ DCTLINFO_V2_W7_SEC_ENT5_V1) |
+ le32_encode_bits(addr_cam->sec_ent[6],
+ DCTLINFO_V2_W7_SEC_ENT6_V1);
+ h2c->m7 = cpu_to_le32(DCTLINFO_V2_W7_SEC_ENT5_V1 |
+ DCTLINFO_V2_W7_SEC_ENT6_V1);
+}
diff --git a/drivers/net/wireless/realtek/rtw89/cam.h b/drivers/net/wireless/realtek/rtw89/cam.h
index 83c160a614e6..fa09d11c345c 100644
--- a/drivers/net/wireless/realtek/rtw89/cam.h
+++ b/drivers/net/wireless/realtek/rtw89/cam.h
@@ -352,6 +352,111 @@ static inline void FWCMD_SET_ADDR_BSSID_BSSID5(void *cmd, u32 value)
le32p_replace_bits((__le32 *)(cmd) + 14, value, GENMASK(31, 24));
}
+struct rtw89_h2c_dctlinfo_ud_v2 {
+ __le32 c0;
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+ __le32 w3;
+ __le32 w4;
+ __le32 w5;
+ __le32 w6;
+ __le32 w7;
+ __le32 w8;
+ __le32 w9;
+ __le32 w10;
+ __le32 w11;
+ __le32 w12;
+ __le32 w13;
+ __le32 w14;
+ __le32 w15;
+ __le32 m0;
+ __le32 m1;
+ __le32 m2;
+ __le32 m3;
+ __le32 m4;
+ __le32 m5;
+ __le32 m6;
+ __le32 m7;
+ __le32 m8;
+ __le32 m9;
+ __le32 m10;
+ __le32 m11;
+ __le32 m12;
+ __le32 m13;
+ __le32 m14;
+ __le32 m15;
+} __packed;
+
+#define DCTLINFO_V2_C0_MACID GENMASK(6, 0)
+#define DCTLINFO_V2_C0_OP BIT(7)
+
+#define DCTLINFO_V2_W0_QOS_FIELD_H GENMASK(7, 0)
+#define DCTLINFO_V2_W0_HW_EXSEQ_MACID GENMASK(14, 8)
+#define DCTLINFO_V2_W0_QOS_DATA BIT(15)
+#define DCTLINFO_V2_W0_AES_IV_L GENMASK(31, 16)
+#define DCTLINFO_V2_W0_ALL GENMASK(31, 0)
+#define DCTLINFO_V2_W1_AES_IV_H GENMASK(31, 0)
+#define DCTLINFO_V2_W1_ALL GENMASK(31, 0)
+#define DCTLINFO_V2_W2_SEQ0 GENMASK(11, 0)
+#define DCTLINFO_V2_W2_SEQ1 GENMASK(23, 12)
+#define DCTLINFO_V2_W2_AMSDU_MAX_LEN GENMASK(26, 24)
+#define DCTLINFO_V2_W2_STA_AMSDU_EN BIT(27)
+#define DCTLINFO_V2_W2_CHKSUM_OFLD_EN BIT(28)
+#define DCTLINFO_V2_W2_WITH_LLC BIT(29)
+#define DCTLINFO_V2_W2_NAT25_EN BIT(30)
+#define DCTLINFO_V2_W2_IS_MLD BIT(31)
+#define DCTLINFO_V2_W2_ALL GENMASK(31, 0)
+#define DCTLINFO_V2_W3_SEQ2 GENMASK(11, 0)
+#define DCTLINFO_V2_W3_SEQ3 GENMASK(23, 12)
+#define DCTLINFO_V2_W3_TGT_IND GENMASK(27, 24)
+#define DCTLINFO_V2_W3_TGT_IND_EN BIT(28)
+#define DCTLINFO_V2_W3_HTC_LB GENMASK(31, 29)
+#define DCTLINFO_V2_W3_ALL GENMASK(31, 0)
+#define DCTLINFO_V2_W4_VLAN_TAG_SEL GENMASK(7, 5)
+#define DCTLINFO_V2_W4_HTC_ORDER BIT(8)
+#define DCTLINFO_V2_W4_SEC_KEY_ID GENMASK(10, 9)
+#define DCTLINFO_V2_W4_VLAN_RX_DYNAMIC_PCP_EN BIT(11)
+#define DCTLINFO_V2_W4_VLAN_RX_PKT_DROP BIT(12)
+#define DCTLINFO_V2_W4_VLAN_RX_VALID BIT(13)
+#define DCTLINFO_V2_W4_VLAN_TX_VALID BIT(14)
+#define DCTLINFO_V2_W4_WAPI BIT(15)
+#define DCTLINFO_V2_W4_SEC_ENT_MODE GENMASK(17, 16)
+#define DCTLINFO_V2_W4_SEC_ENT0_KEYID GENMASK(19, 18)
+#define DCTLINFO_V2_W4_SEC_ENT1_KEYID GENMASK(21, 20)
+#define DCTLINFO_V2_W4_SEC_ENT2_KEYID GENMASK(23, 22)
+#define DCTLINFO_V2_W4_SEC_ENT3_KEYID GENMASK(25, 24)
+#define DCTLINFO_V2_W4_SEC_ENT4_KEYID GENMASK(27, 26)
+#define DCTLINFO_V2_W4_SEC_ENT5_KEYID GENMASK(29, 28)
+#define DCTLINFO_V2_W4_SEC_ENT6_KEYID GENMASK(31, 30)
+#define DCTLINFO_V2_W4_ALL GENMASK(31, 5)
+#define DCTLINFO_V2_W5_SEC_ENT7_KEYID GENMASK(1, 0)
+#define DCTLINFO_V2_W5_SEC_ENT8_KEYID GENMASK(3, 2)
+#define DCTLINFO_V2_W5_SEC_ENT_VALID_V1 GENMASK(23, 8)
+#define DCTLINFO_V2_W5_SEC_ENT0_V1 GENMASK(31, 24)
+#define DCTLINFO_V2_W5_ALL (GENMASK(31, 8) | GENMASK(3, 0))
+#define DCTLINFO_V2_W6_SEC_ENT1_V1 GENMASK(7, 0)
+#define DCTLINFO_V2_W6_SEC_ENT2_V1 GENMASK(15, 8)
+#define DCTLINFO_V2_W6_SEC_ENT3_V1 GENMASK(23, 16)
+#define DCTLINFO_V2_W6_SEC_ENT4_V1 GENMASK(31, 24)
+#define DCTLINFO_V2_W6_ALL GENMASK(31, 0)
+#define DCTLINFO_V2_W7_SEC_ENT5_V1 GENMASK(7, 0)
+#define DCTLINFO_V2_W7_SEC_ENT6_V1 GENMASK(15, 8)
+#define DCTLINFO_V2_W7_SEC_ENT7 GENMASK(23, 16)
+#define DCTLINFO_V2_W7_SEC_ENT8 GENMASK(31, 24)
+#define DCTLINFO_V2_W7_ALL GENMASK(31, 0)
+#define DCTLINFO_V2_W8_MLD_SMA_L_V1 GENMASK(31, 0)
+#define DCTLINFO_V2_W8_ALL GENMASK(31, 0)
+#define DCTLINFO_V2_W9_MLD_SMA_H_V1 GENMASK(15, 0)
+#define DCTLINFO_V2_W9_MLD_TMA_L_V1 GENMASK(31, 16)
+#define DCTLINFO_V2_W9_ALL GENMASK(31, 0)
+#define DCTLINFO_V2_W10_MLD_TMA_H_V1 GENMASK(31, 0)
+#define DCTLINFO_V2_W10_ALL GENMASK(31, 0)
+#define DCTLINFO_V2_W11_MLD_TA_BSSID_L_V1 GENMASK(31, 0)
+#define DCTLINFO_V2_W11_ALL GENMASK(31, 0)
+#define DCTLINFO_V2_W12_MLD_TA_BSSID_H_V1 GENMASK(15, 0)
+#define DCTLINFO_V2_W12_ALL GENMASK(15, 0)
+
int rtw89_cam_init(struct rtw89_dev *rtwdev, struct rtw89_vif *vif);
void rtw89_cam_deinit(struct rtw89_dev *rtwdev, struct rtw89_vif *vif);
int rtw89_cam_init_addr_cam(struct rtw89_dev *rtwdev,
@@ -373,6 +478,10 @@ void rtw89_cam_fill_dctl_sec_cam_info_v1(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif,
struct rtw89_sta *rtwsta,
u8 *cmd);
+void rtw89_cam_fill_dctl_sec_cam_info_v2(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta,
+ struct rtw89_h2c_dctlinfo_ud_v2 *h2c);
int rtw89_cam_fill_bssid_cam_info(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif,
struct rtw89_sta *rtwsta, u8 *cmd);
diff --git a/drivers/net/wireless/realtek/rtw89/chan.c b/drivers/net/wireless/realtek/rtw89/chan.c
index cbf6821af6b8..051a3cad6101 100644
--- a/drivers/net/wireless/realtek/rtw89/chan.c
+++ b/drivers/net/wireless/realtek/rtw89/chan.c
@@ -212,33 +212,68 @@ void rtw89_entity_init(struct rtw89_dev *rtwdev)
rtw89_config_default_chandef(rtwdev);
}
+static void rtw89_entity_calculate_weight(struct rtw89_dev *rtwdev,
+ struct rtw89_entity_weight *w)
+{
+ struct rtw89_hal *hal = &rtwdev->hal;
+ const struct rtw89_chanctx_cfg *cfg;
+ struct rtw89_vif *rtwvif;
+ int idx;
+
+ for_each_set_bit(idx, hal->entity_map, NUM_OF_RTW89_SUB_ENTITY) {
+ cfg = hal->sub[idx].cfg;
+ if (!cfg) {
+ /* doesn't run with chanctx ops; one channel at most */
+ w->active_chanctxs = 1;
+ break;
+ }
+
+ if (cfg->ref_count > 0)
+ w->active_chanctxs++;
+ }
+
+ rtw89_for_each_rtwvif(rtwdev, rtwvif) {
+ if (rtwvif->chanctx_assigned)
+ w->active_roles++;
+ }
+}
+
enum rtw89_entity_mode rtw89_entity_recalc(struct rtw89_dev *rtwdev)
{
+ DECLARE_BITMAP(recalc_map, NUM_OF_RTW89_SUB_ENTITY) = {};
struct rtw89_hal *hal = &rtwdev->hal;
const struct cfg80211_chan_def *chandef;
+ struct rtw89_entity_weight w = {};
enum rtw89_entity_mode mode;
struct rtw89_chan chan;
- u8 weight;
- u8 last;
u8 idx;
lockdep_assert_held(&rtwdev->mutex);
- weight = bitmap_weight(hal->entity_map, NUM_OF_RTW89_SUB_ENTITY);
- switch (weight) {
+ bitmap_copy(recalc_map, hal->entity_map, NUM_OF_RTW89_SUB_ENTITY);
+
+ rtw89_entity_calculate_weight(rtwdev, &w);
+ switch (w.active_chanctxs) {
default:
- rtw89_warn(rtwdev, "unknown ent chan weight: %d\n", weight);
- bitmap_zero(hal->entity_map, NUM_OF_RTW89_SUB_ENTITY);
+ rtw89_warn(rtwdev, "unknown ent chanctxs weight: %d\n",
+ w.active_chanctxs);
+ bitmap_zero(recalc_map, NUM_OF_RTW89_SUB_ENTITY);
fallthrough;
case 0:
rtw89_config_default_chandef(rtwdev);
+ set_bit(RTW89_SUB_ENTITY_0, recalc_map);
fallthrough;
case 1:
- last = RTW89_SUB_ENTITY_0;
mode = RTW89_ENTITY_MODE_SCC;
break;
- case 2:
- last = RTW89_SUB_ENTITY_1;
+ case 2 ... NUM_OF_RTW89_SUB_ENTITY:
+ if (w.active_roles != NUM_OF_RTW89_MCC_ROLES) {
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "unhandled ent: %d chanctxs %d roles\n",
+ w.active_chanctxs, w.active_roles);
+ return RTW89_ENTITY_MODE_UNHANDLED;
+ }
+
mode = rtw89_get_entity_mode(rtwdev);
if (mode == RTW89_ENTITY_MODE_MCC)
break;
@@ -247,7 +282,7 @@ enum rtw89_entity_mode rtw89_entity_recalc(struct rtw89_dev *rtwdev)
break;
}
- for (idx = 0; idx <= last; idx++) {
+ for_each_set_bit(idx, recalc_map, NUM_OF_RTW89_SUB_ENTITY) {
chandef = rtw89_chandef_get(rtwdev, idx);
rtw89_get_channel_params(chandef, &chan);
if (chan.channel == 0) {
@@ -287,6 +322,13 @@ static void rtw89_chanctx_notify(struct rtw89_dev *rtwdev,
}
}
+static bool rtw89_concurrent_via_mrc(struct rtw89_dev *rtwdev)
+{
+ enum rtw89_chip_gen chip_gen = rtwdev->chip->chip_gen;
+
+ return chip_gen == RTW89_CHIP_BE;
+}
+
/* This function centrally manages how MCC roles are sorted and iterated.
* And, it guarantees that ordered_idx is less than NUM_OF_RTW89_MCC_ROLES.
* So, if data needs to pass an array for ordered_idx, the array can declare
@@ -320,19 +362,12 @@ int rtw89_iterate_mcc_roles(struct rtw89_dev *rtwdev,
return 0;
}
-/* For now, IEEE80211_HW_TIMING_BEACON_ONLY can make things simple to ensure
- * correctness of MCC calculation logic below. We have noticed that once driver
- * declares WIPHY_FLAG_SUPPORTS_MLO, the use of IEEE80211_HW_TIMING_BEACON_ONLY
- * will be restricted. We will make an alternative in driver when it is ready
- * for MLO.
- */
static u32 rtw89_mcc_get_tbtt_ofst(struct rtw89_dev *rtwdev,
struct rtw89_mcc_role *role, u64 tsf)
{
struct rtw89_vif *rtwvif = role->rtwvif;
- struct ieee80211_vif *vif = rtwvif_to_vif(rtwvif);
u32 bcn_intvl_us = ieee80211_tu_to_usec(role->beacon_interval);
- u64 sync_tsf = vif->bss_conf.sync_tsf;
+ u64 sync_tsf = READ_ONCE(rtwvif->sync_bcn_tsf);
u32 remainder;
if (tsf < sync_tsf) {
@@ -346,16 +381,13 @@ static u32 rtw89_mcc_get_tbtt_ofst(struct rtw89_dev *rtwdev,
return remainder;
}
-static u16 rtw89_mcc_get_bcn_ofst(struct rtw89_dev *rtwdev)
+static int __mcc_fw_req_tsf(struct rtw89_dev *rtwdev, u64 *tsf_ref, u64 *tsf_aux)
{
struct rtw89_mcc_info *mcc = &rtwdev->mcc;
struct rtw89_mcc_role *ref = &mcc->role_ref;
struct rtw89_mcc_role *aux = &mcc->role_aux;
struct rtw89_mac_mcc_tsf_rpt rpt = {};
struct rtw89_fw_mcc_tsf_req req = {};
- u32 bcn_intvl_ref_us = ieee80211_tu_to_usec(ref->beacon_interval);
- u32 tbtt_ofst_ref, tbtt_ofst_aux;
- u64 tsf_ref, tsf_aux;
int ret;
req.group = mcc->group;
@@ -365,11 +397,63 @@ static u16 rtw89_mcc_get_bcn_ofst(struct rtw89_dev *rtwdev)
if (ret) {
rtw89_debug(rtwdev, RTW89_DBG_CHAN,
"MCC h2c failed to request tsf: %d\n", ret);
- return RTW89_MCC_DFLT_BCN_OFST_TIME;
+ return ret;
+ }
+
+ *tsf_ref = (u64)rpt.tsf_x_high << 32 | rpt.tsf_x_low;
+ *tsf_aux = (u64)rpt.tsf_y_high << 32 | rpt.tsf_y_low;
+
+ return 0;
+}
+
+static int __mrc_fw_req_tsf(struct rtw89_dev *rtwdev, u64 *tsf_ref, u64 *tsf_aux)
+{
+ struct rtw89_mcc_info *mcc = &rtwdev->mcc;
+ struct rtw89_mcc_role *ref = &mcc->role_ref;
+ struct rtw89_mcc_role *aux = &mcc->role_aux;
+ struct rtw89_fw_mrc_req_tsf_arg arg = {};
+ struct rtw89_mac_mrc_tsf_rpt rpt = {};
+ int ret;
+
+ BUILD_BUG_ON(RTW89_MAC_MRC_MAX_REQ_TSF_NUM < NUM_OF_RTW89_MCC_ROLES);
+
+ arg.num = 2;
+ arg.infos[0].band = ref->rtwvif->mac_idx;
+ arg.infos[0].port = ref->rtwvif->port;
+ arg.infos[1].band = aux->rtwvif->mac_idx;
+ arg.infos[1].port = aux->rtwvif->port;
+
+ ret = rtw89_fw_h2c_mrc_req_tsf(rtwdev, &arg, &rpt);
+ if (ret) {
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MRC h2c failed to request tsf: %d\n", ret);
+ return ret;
}
- tsf_ref = (u64)rpt.tsf_x_high << 32 | rpt.tsf_x_low;
- tsf_aux = (u64)rpt.tsf_y_high << 32 | rpt.tsf_y_low;
+ *tsf_ref = rpt.tsfs[0];
+ *tsf_aux = rpt.tsfs[1];
+
+ return 0;
+}
+
+static u16 rtw89_mcc_get_bcn_ofst(struct rtw89_dev *rtwdev)
+{
+ struct rtw89_mcc_info *mcc = &rtwdev->mcc;
+ struct rtw89_mcc_role *ref = &mcc->role_ref;
+ struct rtw89_mcc_role *aux = &mcc->role_aux;
+ u32 bcn_intvl_ref_us = ieee80211_tu_to_usec(ref->beacon_interval);
+ u32 tbtt_ofst_ref, tbtt_ofst_aux;
+ u64 tsf_ref, tsf_aux;
+ int ret;
+
+ if (rtw89_concurrent_via_mrc(rtwdev))
+ ret = __mrc_fw_req_tsf(rtwdev, &tsf_ref, &tsf_aux);
+ else
+ ret = __mcc_fw_req_tsf(rtwdev, &tsf_ref, &tsf_aux);
+
+ if (ret)
+ return RTW89_MCC_DFLT_BCN_OFST_TIME;
+
tbtt_ofst_ref = rtw89_mcc_get_tbtt_ofst(rtwdev, ref, tsf_ref);
tbtt_ofst_aux = rtw89_mcc_get_tbtt_ofst(rtwdev, aux, tsf_aux);
@@ -392,6 +476,28 @@ void rtw89_mcc_role_fw_macid_bitmap_set_bit(struct rtw89_mcc_role *mcc_role,
mcc_role->macid_bitmap[idx] |= BIT(pos);
}
+static
+u32 rtw89_mcc_role_fw_macid_bitmap_to_u32(struct rtw89_mcc_role *mcc_role)
+{
+ unsigned int macid;
+ unsigned int i, j;
+ u32 bitmap = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mcc_role->macid_bitmap); i++) {
+ for (j = 0; j < 8; j++) {
+ macid = i * 8 + j;
+ if (macid >= 32)
+ goto out;
+
+ if (mcc_role->macid_bitmap[i] & BIT(j))
+ bitmap |= BIT(macid);
+ }
+ }
+
+out:
+ return bitmap;
+}
+
static void rtw89_mcc_role_macid_sta_iter(void *data, struct ieee80211_sta *sta)
{
struct rtw89_sta *rtwsta = (struct rtw89_sta *)sta->drv_priv;
@@ -588,6 +694,9 @@ static int rtw89_mcc_fill_all_roles(struct rtw89_dev *rtwdev)
int ret;
rtw89_for_each_rtwvif(rtwdev, rtwvif) {
+ if (!rtwvif->chanctx_assigned)
+ continue;
+
if (sel.bind_vif[rtwvif->sub_entity_idx]) {
rtw89_warn(rtwdev,
"MCC skip extra vif <macid %d> on chanctx[%d]\n",
@@ -1150,7 +1259,11 @@ static void rtw89_mcc_sync_tbtt(struct rtw89_dev *rtwdev,
tsf_ofst_tgt = bcn_intvl_src_us - remainder;
config->sync.macid_tgt = tgt->rtwvif->mac_id;
+ config->sync.band_tgt = tgt->rtwvif->mac_idx;
+ config->sync.port_tgt = tgt->rtwvif->port;
config->sync.macid_src = src->rtwvif->mac_id;
+ config->sync.band_src = src->rtwvif->mac_idx;
+ config->sync.port_src = src->rtwvif->port;
config->sync.offset = tsf_ofst_tgt / 1024;
config->sync.enable = true;
@@ -1297,6 +1410,37 @@ static int __mcc_fw_add_role(struct rtw89_dev *rtwdev, struct rtw89_mcc_role *ro
return 0;
}
+static
+void __mrc_fw_add_role(struct rtw89_dev *rtwdev, struct rtw89_mcc_role *role,
+ struct rtw89_fw_mrc_add_arg *arg, u8 slot_idx)
+{
+ struct rtw89_mcc_info *mcc = &rtwdev->mcc;
+ struct rtw89_mcc_role *ref = &mcc->role_ref;
+ struct rtw89_mcc_policy *policy = &role->policy;
+ struct rtw89_fw_mrc_add_slot_arg *slot_arg;
+ const struct rtw89_chan *chan;
+
+ slot_arg = &arg->slots[slot_idx];
+ role->slot_idx = slot_idx;
+
+ slot_arg->duration = role->duration;
+ slot_arg->role_num = 1;
+
+ chan = rtw89_chan_get(rtwdev, role->rtwvif->sub_entity_idx);
+
+ slot_arg->roles[0].role_type = RTW89_H2C_MRC_ROLE_WIFI;
+ slot_arg->roles[0].is_master = role == ref;
+ slot_arg->roles[0].band = chan->band_type;
+ slot_arg->roles[0].bw = chan->band_width;
+ slot_arg->roles[0].central_ch = chan->channel;
+ slot_arg->roles[0].primary_ch = chan->primary_channel;
+ slot_arg->roles[0].en_tx_null = !policy->dis_tx_null;
+ slot_arg->roles[0].null_early = policy->tx_null_early;
+ slot_arg->roles[0].macid = role->rtwvif->mac_id;
+ slot_arg->roles[0].macid_main_bitmap =
+ rtw89_mcc_role_fw_macid_bitmap_to_u32(role);
+}
+
static int __mcc_fw_add_bt_role(struct rtw89_dev *rtwdev)
{
struct rtw89_mcc_info *mcc = &rtwdev->mcc;
@@ -1318,6 +1462,20 @@ static int __mcc_fw_add_bt_role(struct rtw89_dev *rtwdev)
return 0;
}
+static
+void __mrc_fw_add_bt_role(struct rtw89_dev *rtwdev,
+ struct rtw89_fw_mrc_add_arg *arg, u8 slot_idx)
+{
+ struct rtw89_mcc_info *mcc = &rtwdev->mcc;
+ struct rtw89_mcc_bt_role *bt_role = &mcc->bt_role;
+ struct rtw89_fw_mrc_add_slot_arg *slot_arg = &arg->slots[slot_idx];
+
+ slot_arg->duration = bt_role->duration;
+ slot_arg->role_num = 1;
+
+ slot_arg->roles[0].role_type = RTW89_H2C_MRC_ROLE_BT;
+}
+
static int __mcc_fw_start(struct rtw89_dev *rtwdev, bool replace)
{
struct rtw89_mcc_info *mcc = &rtwdev->mcc;
@@ -1403,6 +1561,130 @@ static int __mcc_fw_start(struct rtw89_dev *rtwdev, bool replace)
return 0;
}
+static void __mrc_fw_add_courtesy(struct rtw89_dev *rtwdev,
+ struct rtw89_fw_mrc_add_arg *arg)
+{
+ struct rtw89_mcc_info *mcc = &rtwdev->mcc;
+ struct rtw89_mcc_role *ref = &mcc->role_ref;
+ struct rtw89_mcc_role *aux = &mcc->role_aux;
+ struct rtw89_mcc_config *config = &mcc->config;
+ struct rtw89_mcc_pattern *pattern = &config->pattern;
+ struct rtw89_mcc_courtesy *courtesy = &pattern->courtesy;
+ struct rtw89_fw_mrc_add_slot_arg *slot_arg_src;
+ u8 slot_idx_tgt;
+
+ if (!courtesy->enable)
+ return;
+
+ if (courtesy->macid_src == ref->rtwvif->mac_id) {
+ slot_arg_src = &arg->slots[ref->slot_idx];
+ slot_idx_tgt = aux->slot_idx;
+ } else {
+ slot_arg_src = &arg->slots[aux->slot_idx];
+ slot_idx_tgt = ref->slot_idx;
+ }
+
+ slot_arg_src->courtesy_target = slot_idx_tgt;
+ slot_arg_src->courtesy_period = courtesy->slot_num;
+ slot_arg_src->courtesy_en = true;
+}
+
+static int __mrc_fw_start(struct rtw89_dev *rtwdev, bool replace)
+{
+ struct rtw89_mcc_info *mcc = &rtwdev->mcc;
+ struct rtw89_mcc_role *ref = &mcc->role_ref;
+ struct rtw89_mcc_role *aux = &mcc->role_aux;
+ struct rtw89_mcc_config *config = &mcc->config;
+ struct rtw89_mcc_pattern *pattern = &config->pattern;
+ struct rtw89_mcc_sync *sync = &config->sync;
+ struct rtw89_fw_mrc_start_arg start_arg = {};
+ struct rtw89_fw_mrc_add_arg add_arg = {};
+ int ret;
+
+ BUILD_BUG_ON(RTW89_MAC_MRC_MAX_ADD_SLOT_NUM <
+ NUM_OF_RTW89_MCC_ROLES + 1 /* bt role */);
+
+ if (replace) {
+ start_arg.old_sch_idx = mcc->group;
+ start_arg.action = RTW89_H2C_MRC_START_ACTION_REPLACE_OLD;
+ mcc->group = RTW89_MCC_NEXT_GROUP(mcc->group);
+ }
+
+ add_arg.sch_idx = mcc->group;
+ add_arg.sch_type = RTW89_H2C_MRC_SCH_BAND0_ONLY;
+
+ switch (pattern->plan) {
+ case RTW89_MCC_PLAN_TAIL_BT:
+ __mrc_fw_add_role(rtwdev, ref, &add_arg, 0);
+ __mrc_fw_add_role(rtwdev, aux, &add_arg, 1);
+ __mrc_fw_add_bt_role(rtwdev, &add_arg, 2);
+
+ add_arg.slot_num = 3;
+ add_arg.btc_in_sch = true;
+ break;
+ case RTW89_MCC_PLAN_MID_BT:
+ __mrc_fw_add_role(rtwdev, ref, &add_arg, 0);
+ __mrc_fw_add_bt_role(rtwdev, &add_arg, 1);
+ __mrc_fw_add_role(rtwdev, aux, &add_arg, 2);
+
+ add_arg.slot_num = 3;
+ add_arg.btc_in_sch = true;
+ break;
+ case RTW89_MCC_PLAN_NO_BT:
+ __mrc_fw_add_role(rtwdev, ref, &add_arg, 0);
+ __mrc_fw_add_role(rtwdev, aux, &add_arg, 1);
+
+ add_arg.slot_num = 2;
+ add_arg.btc_in_sch = false;
+ break;
+ default:
+ rtw89_warn(rtwdev, "MCC unknown plan: %d\n", pattern->plan);
+ return -EFAULT;
+ }
+
+ __mrc_fw_add_courtesy(rtwdev, &add_arg);
+
+ ret = rtw89_fw_h2c_mrc_add(rtwdev, &add_arg);
+ if (ret) {
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MRC h2c failed to trigger add: %d\n", ret);
+ return ret;
+ }
+
+ if (sync->enable) {
+ struct rtw89_fw_mrc_sync_arg sync_arg = {
+ .offset = sync->offset,
+ .src = {
+ .band = sync->band_src,
+ .port = sync->port_src,
+ },
+ .dest = {
+ .band = sync->band_tgt,
+ .port = sync->port_tgt,
+ },
+ };
+
+ ret = rtw89_fw_h2c_mrc_sync(rtwdev, &sync_arg);
+ if (ret) {
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MRC h2c failed to trigger sync: %d\n", ret);
+ return ret;
+ }
+ }
+
+ start_arg.sch_idx = mcc->group;
+ start_arg.start_tsf = config->start_tsf;
+
+ ret = rtw89_fw_h2c_mrc_start(rtwdev, &start_arg);
+ if (ret) {
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MRC h2c failed to trigger start: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static int __mcc_fw_set_duration_no_bt(struct rtw89_dev *rtwdev, bool sync_changed)
{
struct rtw89_mcc_info *mcc = &rtwdev->mcc;
@@ -1444,6 +1726,60 @@ static int __mcc_fw_set_duration_no_bt(struct rtw89_dev *rtwdev, bool sync_chang
return 0;
}
+static int __mrc_fw_set_duration_no_bt(struct rtw89_dev *rtwdev, bool sync_changed)
+{
+ struct rtw89_mcc_info *mcc = &rtwdev->mcc;
+ struct rtw89_mcc_config *config = &mcc->config;
+ struct rtw89_mcc_sync *sync = &config->sync;
+ struct rtw89_mcc_role *ref = &mcc->role_ref;
+ struct rtw89_mcc_role *aux = &mcc->role_aux;
+ struct rtw89_fw_mrc_upd_duration_arg dur_arg = {
+ .sch_idx = mcc->group,
+ .start_tsf = config->start_tsf,
+ .slot_num = 2,
+ .slots[0] = {
+ .slot_idx = ref->slot_idx,
+ .duration = ref->duration,
+ },
+ .slots[1] = {
+ .slot_idx = aux->slot_idx,
+ .duration = aux->duration,
+ },
+ };
+ struct rtw89_fw_mrc_sync_arg sync_arg = {
+ .offset = sync->offset,
+ .src = {
+ .band = sync->band_src,
+ .port = sync->port_src,
+ },
+ .dest = {
+ .band = sync->band_tgt,
+ .port = sync->port_tgt,
+ },
+
+ };
+ int ret;
+
+ ret = rtw89_fw_h2c_mrc_upd_duration(rtwdev, &dur_arg);
+ if (ret) {
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MRC h2c failed to update duration: %d\n", ret);
+ return ret;
+ }
+
+ if (!sync->enable || !sync_changed)
+ return 0;
+
+ ret = rtw89_fw_h2c_mrc_sync(rtwdev, &sync_arg);
+ if (ret) {
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MRC h2c failed to trigger sync: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static void rtw89_mcc_handle_beacon_noa(struct rtw89_dev *rtwdev, bool enable)
{
struct rtw89_mcc_info *mcc = &rtwdev->mcc;
@@ -1494,7 +1830,7 @@ static void rtw89_mcc_handle_beacon_noa(struct rtw89_dev *rtwdev, bool enable)
if (!rtwvif_go->chanctx_assigned)
return;
- rtw89_fw_h2c_update_beacon(rtwdev, rtwvif_go);
+ rtw89_chip_h2c_update_beacon(rtwdev, rtwvif_go);
}
static void rtw89_mcc_start_beacon_noa(struct rtw89_dev *rtwdev)
@@ -1562,7 +1898,11 @@ static int rtw89_mcc_start(struct rtw89_dev *rtwdev)
if (ret)
return ret;
- ret = __mcc_fw_start(rtwdev, false);
+ if (rtw89_concurrent_via_mrc(rtwdev))
+ ret = __mrc_fw_start(rtwdev, false);
+ else
+ ret = __mcc_fw_start(rtwdev, false);
+
if (ret)
return ret;
@@ -1580,16 +1920,23 @@ static void rtw89_mcc_stop(struct rtw89_dev *rtwdev)
rtw89_debug(rtwdev, RTW89_DBG_CHAN, "MCC stop\n");
- ret = rtw89_fw_h2c_stop_mcc(rtwdev, mcc->group,
- ref->rtwvif->mac_id, true);
- if (ret)
- rtw89_debug(rtwdev, RTW89_DBG_CHAN,
- "MCC h2c failed to trigger stop: %d\n", ret);
+ if (rtw89_concurrent_via_mrc(rtwdev)) {
+ ret = rtw89_fw_h2c_mrc_del(rtwdev, mcc->group);
+ if (ret)
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MRC h2c failed to trigger del: %d\n", ret);
+ } else {
+ ret = rtw89_fw_h2c_stop_mcc(rtwdev, mcc->group,
+ ref->rtwvif->mac_id, true);
+ if (ret)
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MCC h2c failed to trigger stop: %d\n", ret);
- ret = rtw89_fw_h2c_del_mcc_group(rtwdev, mcc->group, true);
- if (ret)
- rtw89_debug(rtwdev, RTW89_DBG_CHAN,
- "MCC h2c failed to delete group: %d\n", ret);
+ ret = rtw89_fw_h2c_del_mcc_group(rtwdev, mcc->group, true);
+ if (ret)
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MCC h2c failed to delete group: %d\n", ret);
+ }
rtw89_chanctx_notify(rtwdev, RTW89_CHANCTX_STATE_MCC_STOP);
@@ -1615,7 +1962,11 @@ static int rtw89_mcc_update(struct rtw89_dev *rtwdev)
if (old_cfg.pattern.plan != RTW89_MCC_PLAN_NO_BT ||
config->pattern.plan != RTW89_MCC_PLAN_NO_BT) {
- ret = __mcc_fw_start(rtwdev, true);
+ if (rtw89_concurrent_via_mrc(rtwdev))
+ ret = __mrc_fw_start(rtwdev, true);
+ else
+ ret = __mcc_fw_start(rtwdev, true);
+
if (ret)
return ret;
} else {
@@ -1624,7 +1975,11 @@ static int rtw89_mcc_update(struct rtw89_dev *rtwdev)
else
sync_changed = true;
- ret = __mcc_fw_set_duration_no_bt(rtwdev, sync_changed);
+ if (rtw89_concurrent_via_mrc(rtwdev))
+ ret = __mrc_fw_set_duration_no_bt(rtwdev, sync_changed);
+ else
+ ret = __mcc_fw_set_duration_no_bt(rtwdev, sync_changed);
+
if (ret)
return ret;
}
@@ -1666,12 +2021,75 @@ static void rtw89_mcc_track(struct rtw89_dev *rtwdev)
rtw89_queue_chanctx_change(rtwdev, RTW89_CHANCTX_BCN_OFFSET_CHANGE);
}
+static int __mcc_fw_upd_macid_bitmap(struct rtw89_dev *rtwdev,
+ struct rtw89_mcc_role *upd)
+{
+ struct rtw89_mcc_info *mcc = &rtwdev->mcc;
+ int ret;
+
+ ret = rtw89_fw_h2c_mcc_macid_bitmap(rtwdev, mcc->group,
+ upd->rtwvif->mac_id,
+ upd->macid_bitmap);
+ if (ret) {
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MCC h2c failed to update macid bitmap: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int __mrc_fw_upd_macid_bitmap(struct rtw89_dev *rtwdev,
+ struct rtw89_mcc_role *cur,
+ struct rtw89_mcc_role *upd)
+{
+ struct rtw89_mcc_info *mcc = &rtwdev->mcc;
+ struct rtw89_fw_mrc_upd_bitmap_arg arg = {};
+ u32 old = rtw89_mcc_role_fw_macid_bitmap_to_u32(cur);
+ u32 new = rtw89_mcc_role_fw_macid_bitmap_to_u32(upd);
+ u32 add = new & ~old;
+ u32 del = old & ~new;
+ int ret;
+ int i;
+
+ arg.sch_idx = mcc->group;
+ arg.macid = upd->rtwvif->mac_id;
+
+ for (i = 0; i < 32; i++) {
+ if (add & BIT(i)) {
+ arg.client_macid = i;
+ arg.action = RTW89_H2C_MRC_UPD_BITMAP_ACTION_ADD;
+
+ ret = rtw89_fw_h2c_mrc_upd_bitmap(rtwdev, &arg);
+ if (ret)
+ goto err;
+ }
+ }
+
+ for (i = 0; i < 32; i++) {
+ if (del & BIT(i)) {
+ arg.client_macid = i;
+ arg.action = RTW89_H2C_MRC_UPD_BITMAP_ACTION_DEL;
+
+ ret = rtw89_fw_h2c_mrc_upd_bitmap(rtwdev, &arg);
+ if (ret)
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MRC h2c failed to update bitmap: %d\n", ret);
+ return ret;
+}
+
static int rtw89_mcc_upd_map_iterator(struct rtw89_dev *rtwdev,
struct rtw89_mcc_role *mcc_role,
unsigned int ordered_idx,
void *data)
{
- struct rtw89_mcc_info *mcc = &rtwdev->mcc;
struct rtw89_mcc_role upd = {
.rtwvif = mcc_role->rtwvif,
};
@@ -1685,14 +2103,13 @@ static int rtw89_mcc_upd_map_iterator(struct rtw89_dev *rtwdev,
sizeof(mcc_role->macid_bitmap)) == 0)
return 0;
- ret = rtw89_fw_h2c_mcc_macid_bitmap(rtwdev, mcc->group,
- upd.rtwvif->mac_id,
- upd.macid_bitmap);
- if (ret) {
- rtw89_debug(rtwdev, RTW89_DBG_CHAN,
- "MCC h2c failed to update macid bitmap: %d\n", ret);
+ if (rtw89_concurrent_via_mrc(rtwdev))
+ ret = __mrc_fw_upd_macid_bitmap(rtwdev, mcc_role, &upd);
+ else
+ ret = __mcc_fw_upd_macid_bitmap(rtwdev, &upd);
+
+ if (ret)
return ret;
- }
memcpy(mcc_role->macid_bitmap, upd.macid_bitmap,
sizeof(mcc_role->macid_bitmap));
@@ -1900,6 +2317,41 @@ void rtw89_chanctx_proceed(struct rtw89_dev *rtwdev)
rtw89_queue_chanctx_work(rtwdev);
}
+static void rtw89_swap_sub_entity(struct rtw89_dev *rtwdev,
+ enum rtw89_sub_entity_idx idx1,
+ enum rtw89_sub_entity_idx idx2)
+{
+ struct rtw89_hal *hal = &rtwdev->hal;
+ struct rtw89_sub_entity tmp;
+ struct rtw89_vif *rtwvif;
+ u8 cur;
+
+ if (idx1 == idx2)
+ return;
+
+ hal->sub[idx1].cfg->idx = idx2;
+ hal->sub[idx2].cfg->idx = idx1;
+
+ tmp = hal->sub[idx1];
+ hal->sub[idx1] = hal->sub[idx2];
+ hal->sub[idx2] = tmp;
+
+ rtw89_for_each_rtwvif(rtwdev, rtwvif) {
+ if (!rtwvif->chanctx_assigned)
+ continue;
+ if (rtwvif->sub_entity_idx == idx1)
+ rtwvif->sub_entity_idx = idx2;
+ else if (rtwvif->sub_entity_idx == idx2)
+ rtwvif->sub_entity_idx = idx1;
+ }
+
+ cur = atomic_read(&hal->roc_entity_idx);
+ if (cur == idx1)
+ atomic_set(&hal->roc_entity_idx, idx2);
+ else if (cur == idx2)
+ atomic_set(&hal->roc_entity_idx, idx1);
+}
+
int rtw89_chanctx_ops_add(struct rtw89_dev *rtwdev,
struct ieee80211_chanctx_conf *ctx)
{
@@ -1913,8 +2365,8 @@ int rtw89_chanctx_ops_add(struct rtw89_dev *rtwdev,
return -ENOENT;
rtw89_config_entity_chandef(rtwdev, idx, &ctx->def);
- rtw89_set_channel(rtwdev);
cfg->idx = idx;
+ cfg->ref_count = 0;
hal->sub[idx].cfg = cfg;
return 0;
}
@@ -1924,47 +2376,8 @@ void rtw89_chanctx_ops_remove(struct rtw89_dev *rtwdev,
{
struct rtw89_hal *hal = &rtwdev->hal;
struct rtw89_chanctx_cfg *cfg = (struct rtw89_chanctx_cfg *)ctx->drv_priv;
- enum rtw89_entity_mode mode;
- struct rtw89_vif *rtwvif;
- u8 drop, roll;
-
- drop = cfg->idx;
- if (drop != RTW89_SUB_ENTITY_0)
- goto out;
- roll = find_next_bit(hal->entity_map, NUM_OF_RTW89_SUB_ENTITY, drop + 1);
-
- /* Follow rtw89_config_default_chandef() when rtw89_entity_recalc(). */
- if (roll == NUM_OF_RTW89_SUB_ENTITY)
- goto out;
-
- /* RTW89_SUB_ENTITY_0 is going to release, and another exists.
- * Make another roll down to RTW89_SUB_ENTITY_0 to replace.
- */
- hal->sub[roll].cfg->idx = RTW89_SUB_ENTITY_0;
- hal->sub[RTW89_SUB_ENTITY_0] = hal->sub[roll];
-
- rtw89_for_each_rtwvif(rtwdev, rtwvif) {
- if (rtwvif->sub_entity_idx == roll)
- rtwvif->sub_entity_idx = RTW89_SUB_ENTITY_0;
- }
-
- atomic_cmpxchg(&hal->roc_entity_idx, roll, RTW89_SUB_ENTITY_0);
-
- drop = roll;
-
-out:
- mode = rtw89_get_entity_mode(rtwdev);
- switch (mode) {
- case RTW89_ENTITY_MODE_MCC:
- rtw89_mcc_stop(rtwdev);
- break;
- default:
- break;
- }
-
- clear_bit(drop, hal->entity_map);
- rtw89_set_channel(rtwdev);
+ clear_bit(cfg->idx, hal->entity_map);
}
void rtw89_chanctx_ops_change(struct rtw89_dev *rtwdev,
@@ -1985,16 +2398,73 @@ int rtw89_chanctx_ops_assign_vif(struct rtw89_dev *rtwdev,
struct ieee80211_chanctx_conf *ctx)
{
struct rtw89_chanctx_cfg *cfg = (struct rtw89_chanctx_cfg *)ctx->drv_priv;
+ struct rtw89_entity_weight w = {};
rtwvif->sub_entity_idx = cfg->idx;
rtwvif->chanctx_assigned = true;
- return 0;
+ cfg->ref_count++;
+
+ if (cfg->idx == RTW89_SUB_ENTITY_0)
+ goto out;
+
+ rtw89_entity_calculate_weight(rtwdev, &w);
+ if (w.active_chanctxs != 1)
+ goto out;
+
+ /* put the first active chanctx at RTW89_SUB_ENTITY_0 */
+ rtw89_swap_sub_entity(rtwdev, cfg->idx, RTW89_SUB_ENTITY_0);
+
+out:
+ return rtw89_set_channel(rtwdev);
}
void rtw89_chanctx_ops_unassign_vif(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif,
struct ieee80211_chanctx_conf *ctx)
{
+ struct rtw89_chanctx_cfg *cfg = (struct rtw89_chanctx_cfg *)ctx->drv_priv;
+ struct rtw89_hal *hal = &rtwdev->hal;
+ struct rtw89_entity_weight w = {};
+ enum rtw89_sub_entity_idx roll;
+ enum rtw89_entity_mode cur;
+
rtwvif->sub_entity_idx = RTW89_SUB_ENTITY_0;
rtwvif->chanctx_assigned = false;
+ cfg->ref_count--;
+
+ if (cfg->ref_count != 0)
+ goto out;
+
+ if (cfg->idx != RTW89_SUB_ENTITY_0)
+ goto out;
+
+ roll = find_next_bit(hal->entity_map, NUM_OF_RTW89_SUB_ENTITY,
+ cfg->idx + 1);
+ /* Follow rtw89_config_default_chandef() when rtw89_entity_recalc(). */
+ if (roll == NUM_OF_RTW89_SUB_ENTITY)
+ goto out;
+
+ /* RTW89_SUB_ENTITY_0 is going to release, and another exists.
+ * Make another roll down to RTW89_SUB_ENTITY_0 to replace.
+ */
+ rtw89_swap_sub_entity(rtwdev, cfg->idx, roll);
+
+out:
+ rtw89_entity_calculate_weight(rtwdev, &w);
+
+ cur = rtw89_get_entity_mode(rtwdev);
+ switch (cur) {
+ case RTW89_ENTITY_MODE_MCC:
+ /* If still multi-roles, re-plan MCC for chanctx changes.
+ * Otherwise, just stop MCC.
+ */
+ rtw89_mcc_stop(rtwdev);
+ if (w.active_roles == NUM_OF_RTW89_MCC_ROLES)
+ rtw89_mcc_start(rtwdev);
+ break;
+ default:
+ break;
+ }
+
+ rtw89_set_channel(rtwdev);
}
diff --git a/drivers/net/wireless/realtek/rtw89/chan.h b/drivers/net/wireless/realtek/rtw89/chan.h
index 9b98d8f4ee9d..ffa412f281f3 100644
--- a/drivers/net/wireless/realtek/rtw89/chan.h
+++ b/drivers/net/wireless/realtek/rtw89/chan.h
@@ -38,6 +38,11 @@ enum rtw89_chanctx_pause_reasons {
RTW89_CHANCTX_PAUSE_REASON_ROC,
};
+struct rtw89_entity_weight {
+ unsigned int active_chanctxs;
+ unsigned int active_roles;
+};
+
static inline bool rtw89_get_entity_state(struct rtw89_dev *rtwdev)
{
struct rtw89_hal *hal = &rtwdev->hal;
diff --git a/drivers/net/wireless/realtek/rtw89/coex.c b/drivers/net/wireless/realtek/rtw89/coex.c
index f37afb4cbb63..d9b66d43f32e 100644
--- a/drivers/net/wireless/realtek/rtw89/coex.c
+++ b/drivers/net/wireless/realtek/rtw89/coex.c
@@ -129,68 +129,75 @@ static const u32 cxtbl[] = {
static const struct rtw89_btc_ver rtw89_btc_ver_defs[] = {
/* firmware version must be in decreasing order for each chip */
+ {RTL8922A, RTW89_FW_VER_CODE(0, 35, 8, 0),
+ .fcxbtcrpt = 8, .fcxtdma = 7, .fcxslots = 7, .fcxcysta = 7,
+ .fcxstep = 7, .fcxnullsta = 7, .fcxmreg = 7, .fcxgpiodbg = 7,
+ .fcxbtver = 7, .fcxbtscan = 7, .fcxbtafh = 7, .fcxbtdevinfo = 7,
+ .fwlrole = 2, .frptmap = 7, .fcxctrl = 7, .fcxinit = 7,
+ .drvinfo_type = 1, .info_buf = 1800, .max_role_num = 6,
+ },
{RTL8851B, RTW89_FW_VER_CODE(0, 29, 29, 0),
.fcxbtcrpt = 105, .fcxtdma = 3, .fcxslots = 1, .fcxcysta = 5,
.fcxstep = 3, .fcxnullsta = 2, .fcxmreg = 2, .fcxgpiodbg = 1,
.fcxbtver = 1, .fcxbtscan = 2, .fcxbtafh = 2, .fcxbtdevinfo = 1,
- .fwlrole = 2, .frptmap = 3, .fcxctrl = 1,
- .info_buf = 1800, .max_role_num = 6,
+ .fwlrole = 2, .frptmap = 3, .fcxctrl = 1, .fcxinit = 0,
+ .drvinfo_type = 0, .info_buf = 1800, .max_role_num = 6,
},
{RTL8852C, RTW89_FW_VER_CODE(0, 27, 57, 0),
.fcxbtcrpt = 4, .fcxtdma = 3, .fcxslots = 1, .fcxcysta = 3,
.fcxstep = 3, .fcxnullsta = 2, .fcxmreg = 1, .fcxgpiodbg = 1,
.fcxbtver = 1, .fcxbtscan = 1, .fcxbtafh = 2, .fcxbtdevinfo = 1,
- .fwlrole = 1, .frptmap = 3, .fcxctrl = 1,
- .info_buf = 1280, .max_role_num = 5,
+ .fwlrole = 1, .frptmap = 3, .fcxctrl = 1, .fcxinit = 0,
+ .drvinfo_type = 0, .info_buf = 1280, .max_role_num = 5,
},
{RTL8852C, RTW89_FW_VER_CODE(0, 27, 42, 0),
.fcxbtcrpt = 4, .fcxtdma = 3, .fcxslots = 1, .fcxcysta = 3,
.fcxstep = 3, .fcxnullsta = 2, .fcxmreg = 1, .fcxgpiodbg = 1,
.fcxbtver = 1, .fcxbtscan = 1, .fcxbtafh = 2, .fcxbtdevinfo = 1,
- .fwlrole = 1, .frptmap = 2, .fcxctrl = 1,
- .info_buf = 1280, .max_role_num = 5,
+ .fwlrole = 1, .frptmap = 2, .fcxctrl = 1, .fcxinit = 0,
+ .drvinfo_type = 0, .info_buf = 1280, .max_role_num = 5,
},
{RTL8852C, RTW89_FW_VER_CODE(0, 27, 0, 0),
.fcxbtcrpt = 4, .fcxtdma = 3, .fcxslots = 1, .fcxcysta = 3,
.fcxstep = 3, .fcxnullsta = 2, .fcxmreg = 1, .fcxgpiodbg = 1,
.fcxbtver = 1, .fcxbtscan = 1, .fcxbtafh = 1, .fcxbtdevinfo = 1,
- .fwlrole = 1, .frptmap = 2, .fcxctrl = 1,
- .info_buf = 1280, .max_role_num = 5,
+ .fwlrole = 1, .frptmap = 2, .fcxctrl = 1, .fcxinit = 0,
+ .drvinfo_type = 0, .info_buf = 1280, .max_role_num = 5,
},
{RTL8852B, RTW89_FW_VER_CODE(0, 29, 29, 0),
.fcxbtcrpt = 105, .fcxtdma = 3, .fcxslots = 1, .fcxcysta = 5,
.fcxstep = 3, .fcxnullsta = 2, .fcxmreg = 2, .fcxgpiodbg = 1,
.fcxbtver = 1, .fcxbtscan = 2, .fcxbtafh = 2, .fcxbtdevinfo = 1,
- .fwlrole = 2, .frptmap = 3, .fcxctrl = 1,
- .info_buf = 1800, .max_role_num = 6,
+ .fwlrole = 2, .frptmap = 3, .fcxctrl = 1, .fcxinit = 0,
+ .drvinfo_type = 0, .info_buf = 1800, .max_role_num = 6,
},
{RTL8852B, RTW89_FW_VER_CODE(0, 29, 14, 0),
.fcxbtcrpt = 5, .fcxtdma = 3, .fcxslots = 1, .fcxcysta = 4,
.fcxstep = 3, .fcxnullsta = 2, .fcxmreg = 1, .fcxgpiodbg = 1,
.fcxbtver = 1, .fcxbtscan = 1, .fcxbtafh = 2, .fcxbtdevinfo = 1,
- .fwlrole = 1, .frptmap = 3, .fcxctrl = 1,
- .info_buf = 1800, .max_role_num = 6,
+ .fwlrole = 1, .frptmap = 3, .fcxctrl = 1, .fcxinit = 0,
+ .drvinfo_type = 0, .info_buf = 1800, .max_role_num = 6,
},
{RTL8852B, RTW89_FW_VER_CODE(0, 27, 0, 0),
.fcxbtcrpt = 4, .fcxtdma = 3, .fcxslots = 1, .fcxcysta = 3,
.fcxstep = 3, .fcxnullsta = 2, .fcxmreg = 1, .fcxgpiodbg = 1,
.fcxbtver = 1, .fcxbtscan = 1, .fcxbtafh = 1, .fcxbtdevinfo = 1,
- .fwlrole = 1, .frptmap = 1, .fcxctrl = 1,
- .info_buf = 1280, .max_role_num = 5,
+ .fwlrole = 1, .frptmap = 1, .fcxctrl = 1, .fcxinit = 0,
+ .drvinfo_type = 0, .info_buf = 1280, .max_role_num = 5,
},
{RTL8852A, RTW89_FW_VER_CODE(0, 13, 37, 0),
.fcxbtcrpt = 4, .fcxtdma = 3, .fcxslots = 1, .fcxcysta = 3,
.fcxstep = 3, .fcxnullsta = 2, .fcxmreg = 1, .fcxgpiodbg = 1,
.fcxbtver = 1, .fcxbtscan = 1, .fcxbtafh = 2, .fcxbtdevinfo = 1,
- .fwlrole = 1, .frptmap = 3, .fcxctrl = 1,
- .info_buf = 1280, .max_role_num = 5,
+ .fwlrole = 1, .frptmap = 3, .fcxctrl = 1, .fcxinit = 0,
+ .drvinfo_type = 0, .info_buf = 1280, .max_role_num = 5,
},
{RTL8852A, RTW89_FW_VER_CODE(0, 13, 0, 0),
.fcxbtcrpt = 1, .fcxtdma = 1, .fcxslots = 1, .fcxcysta = 2,
.fcxstep = 2, .fcxnullsta = 1, .fcxmreg = 1, .fcxgpiodbg = 1,
.fcxbtver = 1, .fcxbtscan = 1, .fcxbtafh = 1, .fcxbtdevinfo = 1,
- .fwlrole = 0, .frptmap = 0, .fcxctrl = 0,
- .info_buf = 1024, .max_role_num = 5,
+ .fwlrole = 0, .frptmap = 0, .fcxctrl = 0, .fcxinit = 0,
+ .drvinfo_type = 0, .info_buf = 1024, .max_role_num = 5,
},
/* keep it to be the last as default entry */
@@ -198,8 +205,8 @@ static const struct rtw89_btc_ver rtw89_btc_ver_defs[] = {
.fcxbtcrpt = 1, .fcxtdma = 1, .fcxslots = 1, .fcxcysta = 2,
.fcxstep = 2, .fcxnullsta = 1, .fcxmreg = 1, .fcxgpiodbg = 1,
.fcxbtver = 1, .fcxbtscan = 1, .fcxbtafh = 1, .fcxbtdevinfo = 1,
- .fwlrole = 0, .frptmap = 0, .fcxctrl = 0,
- .info_buf = 1024, .max_role_num = 5,
+ .fwlrole = 0, .frptmap = 0, .fcxctrl = 0, .fcxinit = 0,
+ .drvinfo_type = 0, .info_buf = 1024, .max_role_num = 5,
},
};
@@ -351,17 +358,26 @@ enum btc_cx_poicy_type {
/* TDMA off + pri: WL_Rx = BT, BT_HI > WL_Tx > BT_Lo */
BTC_CXP_OFF_EQ3 = (BTC_CXP_OFF << 8) | 5,
+ /* TDMA off + pri: WL_Rx = BT, BT_HI > WL_Tx > BT_Lo */
+ BTC_CXP_OFF_EQ4 = (BTC_CXP_OFF << 8) | 6,
+
+ /* TDMA off + pri: WL_Rx = BT, BT_HI > WL_Tx > BT_Lo */
+ BTC_CXP_OFF_EQ5 = (BTC_CXP_OFF << 8) | 7,
+
/* TDMA off + pri: BT_Hi > WL > BT_Lo */
- BTC_CXP_OFF_BWB0 = (BTC_CXP_OFF << 8) | 6,
+ BTC_CXP_OFF_BWB0 = (BTC_CXP_OFF << 8) | 8,
/* TDMA off + pri: WL_Hi-Tx > BT_Hi_Rx, BT_Hi > WL > BT_Lo */
- BTC_CXP_OFF_BWB1 = (BTC_CXP_OFF << 8) | 7,
+ BTC_CXP_OFF_BWB1 = (BTC_CXP_OFF << 8) | 9,
/* TDMA off + pri: WL_Hi-Tx > BT, BT_Hi > other-WL > BT_Lo */
- BTC_CXP_OFF_BWB2 = (BTC_CXP_OFF << 8) | 8,
+ BTC_CXP_OFF_BWB2 = (BTC_CXP_OFF << 8) | 10,
/* TDMA off + pri: WL_Hi-Tx = BT */
- BTC_CXP_OFF_BWB3 = (BTC_CXP_OFF << 8) | 9,
+ BTC_CXP_OFF_BWB3 = (BTC_CXP_OFF << 8) | 11,
+
+ /* TDMA off + pri: WL > BT, Block-BT*/
+ BTC_CXP_OFF_WL2 = (BTC_CXP_OFF << 8) | 12,
/* TDMA off+Bcn-Protect + pri: WL_Hi-Tx > BT_Hi_Rx, BT_Hi > WL > BT_Lo*/
BTC_CXP_OFFB_BWB0 = (BTC_CXP_OFFB << 8) | 0,
@@ -676,20 +692,25 @@ static void _run_coex(struct rtw89_dev *rtwdev,
static void _write_scbd(struct rtw89_dev *rtwdev, u32 val, bool state);
static void _update_bt_scbd(struct rtw89_dev *rtwdev, bool only_update);
-static void _send_fw_cmd(struct rtw89_dev *rtwdev, u8 h2c_class, u8 h2c_func,
- void *param, u16 len)
+static int _send_fw_cmd(struct rtw89_dev *rtwdev, u8 h2c_class, u8 h2c_func,
+ void *param, u16 len)
{
struct rtw89_btc *btc = &rtwdev->btc;
struct rtw89_btc_btf_fwinfo *pfwinfo = &btc->fwinfo;
struct rtw89_btc_cx *cx = &btc->cx;
struct rtw89_btc_wl_info *wl = &cx->wl;
+ struct rtw89_btc_dm *dm = &btc->dm;
int ret;
- if (!wl->status.map.init_ok) {
+ if (len > BTC_H2C_MAXLEN || len == 0) {
+ btc->fwinfo.cnt_h2c_fail++;
+ dm->error.map.h2c_buffer_over = true;
+ return -EINVAL;
+ } else if (!wl->status.map.init_ok) {
rtw89_debug(rtwdev, RTW89_DBG_BTC,
"[BTC], %s(): return by btc not init!!\n", __func__);
pfwinfo->cnt_h2c_fail++;
- return;
+ return -EINVAL;
} else if ((wl->status.map.rf_off_pre == BTC_LPS_RF_OFF &&
wl->status.map.rf_off == BTC_LPS_RF_OFF) ||
(wl->status.map.lps_pre == BTC_LPS_RF_OFF &&
@@ -697,20 +718,23 @@ static void _send_fw_cmd(struct rtw89_dev *rtwdev, u8 h2c_class, u8 h2c_func,
rtw89_debug(rtwdev, RTW89_DBG_BTC,
"[BTC], %s(): return by wl off!!\n", __func__);
pfwinfo->cnt_h2c_fail++;
- return;
+ return -EINVAL;
}
- pfwinfo->cnt_h2c++;
-
ret = rtw89_fw_h2c_raw_with_hdr(rtwdev, h2c_class, h2c_func, param, len,
false, true);
- if (ret != 0)
+ if (ret)
pfwinfo->cnt_h2c_fail++;
+ else
+ pfwinfo->cnt_h2c++;
+
+ return ret;
}
static void _reset_btc_var(struct rtw89_dev *rtwdev, u8 type)
{
struct rtw89_btc *btc = &rtwdev->btc;
+ const struct rtw89_btc_ver *ver = btc->ver;
struct rtw89_btc_cx *cx = &btc->cx;
struct rtw89_btc_wl_info *wl = &btc->cx.wl;
struct rtw89_btc_bt_info *bt = &btc->cx.bt;
@@ -728,7 +752,9 @@ static void _reset_btc_var(struct rtw89_dev *rtwdev, u8 type)
if (type & BTC_RESET_CTRL) {
memset(&btc->ctrl, 0, sizeof(btc->ctrl));
- btc->ctrl.trace_step = FCXDEF_STEP;
+ btc->manual_ctrl = false;
+ if (ver->fcxctrl != 7)
+ btc->ctrl.ctrl.trace_step = FCXDEF_STEP;
}
/* Init Coex variables that are not zero */
@@ -777,22 +803,27 @@ static void _get_reg_status(struct rtw89_dev *rtwdev, u8 type, u8 *val)
{
struct rtw89_btc *btc = &rtwdev->btc;
const struct rtw89_btc_ver *ver = btc->ver;
- struct rtw89_btc_module *md = &btc->mdinfo;
+ union rtw89_btc_module_info *md = &btc->mdinfo;
union rtw89_btc_fbtc_mreg_val *pmreg;
u32 pre_agc_addr = R_BTC_BB_PRE_AGC_S1;
u32 reg_val;
- u8 idx;
+ u8 idx, switch_type;
- if (md->ant.btg_pos == RF_PATH_A)
+ if (ver->fcxinit == 7)
+ switch_type = md->md_v7.switch_type;
+ else
+ switch_type = md->md.switch_type;
+
+ if (btc->btg_pos == RF_PATH_A)
pre_agc_addr = R_BTC_BB_PRE_AGC_S0;
switch (type) {
case BTC_CSTATUS_TXDIV_POS:
- if (md->switch_type == BTC_SWITCH_INTERNAL)
+ if (switch_type == BTC_SWITCH_INTERNAL)
*val = BTC_ANT_DIV_MAIN;
break;
case BTC_CSTATUS_RXDIV_POS:
- if (md->switch_type == BTC_SWITCH_INTERNAL)
+ if (switch_type == BTC_SWITCH_INTERNAL)
*val = BTC_ANT_DIV_MAIN;
break;
case BTC_CSTATUS_BB_GNT_MUX:
@@ -1117,7 +1148,7 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev,
void *rpt_content = NULL, *pfinfo = NULL;
u8 rpt_type = 0;
u16 wl_slot_set = 0, wl_slot_real = 0;
- u32 trace_step = btc->ctrl.trace_step, rpt_len = 0, diff_t = 0;
+ u32 trace_step = 0, rpt_len = 0, diff_t = 0;
u32 cnt_leak_slot, bt_slot_real, bt_slot_set, cnt_rx_imr;
u8 i, val = 0;
@@ -1207,6 +1238,9 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev,
break;
case BTC_RPT_TYPE_STEP:
pcinfo = &pfwinfo->rpt_fbtc_step.cinfo;
+ if (ver->fcxctrl != 7)
+ trace_step = btc->ctrl.ctrl.trace_step;
+
if (ver->fcxstep == 2) {
pfinfo = &pfwinfo->rpt_fbtc_step.finfo.v2;
pcinfo->req_len = sizeof(pfwinfo->rpt_fbtc_step.finfo.v2.step[0]) *
@@ -1920,6 +1954,7 @@ static void rtw89_btc_fw_en_rpt(struct rtw89_dev *rtwdev,
struct rtw89_btc_btf_fwinfo *fwinfo = &btc->fwinfo;
struct rtw89_btc_btf_set_report r = {0};
u32 val, bit_map;
+ int ret;
if ((wl_smap->rf_off || wl_smap->lps != BTC_LPS_OFF) && rpt_state != 0)
return;
@@ -1938,13 +1973,13 @@ static void rtw89_btc_fw_en_rpt(struct rtw89_dev *rtwdev,
if (val == fwinfo->rpt_en_map)
return;
- fwinfo->rpt_en_map = val;
-
r.fver = BTF_SET_REPORT_VER;
r.enable = cpu_to_le32(val);
r.para = cpu_to_le32(rpt_state);
- _send_fw_cmd(rtwdev, BTFC_SET, SET_REPORT_EN, &r, sizeof(r));
+ ret = _send_fw_cmd(rtwdev, BTFC_SET, SET_REPORT_EN, &r, sizeof(r));
+ if (!ret)
+ fwinfo->rpt_en_map = val;
}
static void rtw89_btc_fw_set_slots(struct rtw89_dev *rtwdev, u8 num,
@@ -2032,6 +2067,7 @@ static void _fw_set_policy(struct rtw89_dev *rtwdev, u16 policy_type,
{
struct rtw89_btc *btc = &rtwdev->btc;
struct rtw89_btc_dm *dm = &btc->dm;
+ int ret;
dm->run_action = action;
@@ -2060,11 +2096,12 @@ static void _fw_set_policy(struct rtw89_dev *rtwdev, u16 policy_type,
if (btc->lps == 1)
rtw89_set_coex_ctrl_lps(rtwdev, btc->lps);
- _send_fw_cmd(rtwdev, BTFC_SET, SET_CX_POLICY,
- btc->policy, btc->policy_len);
-
- memcpy(&dm->tdma_now, &dm->tdma, sizeof(dm->tdma_now));
- memcpy(&dm->slot_now, &dm->slot, sizeof(dm->slot_now));
+ ret = _send_fw_cmd(rtwdev, BTFC_SET, SET_CX_POLICY,
+ btc->policy, btc->policy_len);
+ if (!ret) {
+ memcpy(&dm->tdma_now, &dm->tdma, sizeof(dm->tdma_now));
+ memcpy(&dm->slot_now, &dm->slot, sizeof(dm->slot_now));
+ }
if (btc->update_policy_force)
btc->update_policy_force = false;
@@ -2083,20 +2120,32 @@ static void _fw_set_drv_info(struct rtw89_dev *rtwdev, u8 type)
switch (type) {
case CXDRVINFO_INIT:
- rtw89_fw_h2c_cxdrv_init(rtwdev);
+ if (ver->fcxinit == 7)
+ rtw89_fw_h2c_cxdrv_init_v7(rtwdev, type);
+ else
+ rtw89_fw_h2c_cxdrv_init(rtwdev, type);
break;
case CXDRVINFO_ROLE:
if (ver->fwlrole == 0)
- rtw89_fw_h2c_cxdrv_role(rtwdev);
+ rtw89_fw_h2c_cxdrv_role(rtwdev, type);
else if (ver->fwlrole == 1)
- rtw89_fw_h2c_cxdrv_role_v1(rtwdev);
+ rtw89_fw_h2c_cxdrv_role_v1(rtwdev, type);
else if (ver->fwlrole == 2)
- rtw89_fw_h2c_cxdrv_role_v2(rtwdev);
+ rtw89_fw_h2c_cxdrv_role_v2(rtwdev, type);
break;
case CXDRVINFO_CTRL:
- rtw89_fw_h2c_cxdrv_ctrl(rtwdev);
+ if (ver->drvinfo_type == 1)
+ type = 2;
+
+ if (ver->fcxctrl == 7)
+ rtw89_fw_h2c_cxdrv_ctrl_v7(rtwdev, type);
+ else
+ rtw89_fw_h2c_cxdrv_ctrl(rtwdev, type);
break;
case CXDRVINFO_TRX:
+ if (ver->drvinfo_type == 1)
+ type = 3;
+
dm->trx_info.tx_power = u32_get_bits(rf_para.wl_tx_power,
RTW89_BTC_WL_DEF_TX_PWR);
dm->trx_info.rx_gain = u32_get_bits(rf_para.wl_rx_gain,
@@ -2107,11 +2156,18 @@ static void _fw_set_drv_info(struct rtw89_dev *rtwdev, u8 type)
RTW89_BTC_WL_DEF_TX_PWR);
dm->trx_info.cn = wl->cn_report;
dm->trx_info.nhm = wl->nhm.pwr;
- rtw89_fw_h2c_cxdrv_trx(rtwdev);
+ rtw89_fw_h2c_cxdrv_trx(rtwdev, type);
break;
case CXDRVINFO_RFK:
- rtw89_fw_h2c_cxdrv_rfk(rtwdev);
+ if (ver->drvinfo_type == 1)
+ return;
+
+ rtw89_fw_h2c_cxdrv_rfk(rtwdev, type);
break;
+ case CXDRVINFO_TXPWR:
+ case CXDRVINFO_FDDT:
+ case CXDRVINFO_MLO:
+ case CXDRVINFO_OSI:
default:
break;
}
@@ -2261,20 +2317,25 @@ static void _set_bt_tx_power(struct rtw89_dev *rtwdev, u8 level)
{
struct rtw89_btc *btc = &rtwdev->btc;
struct rtw89_btc_bt_info *bt = &btc->cx.bt;
+ int ret;
u8 buf;
- if (bt->rf_para.tx_pwr_freerun == level)
+ if (btc->cx.cnt_bt[BTC_BCNT_INFOUPDATE] == 0)
return;
- bt->rf_para.tx_pwr_freerun = level;
- btc->dm.rf_trx_para.bt_tx_power = level;
+ if (bt->rf_para.tx_pwr_freerun == level)
+ return;
rtw89_debug(rtwdev, RTW89_DBG_BTC,
"[BTC], %s(): level = %d\n",
__func__, level);
buf = (s8)(-level);
- _send_fw_cmd(rtwdev, BTFC_SET, SET_BT_TX_PWR, &buf, 1);
+ ret = _send_fw_cmd(rtwdev, BTFC_SET, SET_BT_TX_PWR, &buf, 1);
+ if (!ret) {
+ bt->rf_para.tx_pwr_freerun = level;
+ btc->dm.rf_trx_para.bt_tx_power = level;
+ }
}
#define BTC_BT_RX_NORMAL_LVL 7
@@ -2284,6 +2345,9 @@ static void _set_bt_rx_gain(struct rtw89_dev *rtwdev, u8 level)
struct rtw89_btc *btc = &rtwdev->btc;
struct rtw89_btc_bt_info *bt = &btc->cx.bt;
+ if (btc->cx.cnt_bt[BTC_BCNT_INFOUPDATE] == 0)
+ return;
+
if ((bt->rf_para.rx_gain_freerun == level ||
level > BTC_BT_RX_NORMAL_LVL) &&
(!rtwdev->chip->scbd || bt->lna_constrain == level))
@@ -2333,7 +2397,7 @@ static void _set_rf_trx_para(struct rtw89_dev *rtwdev)
}
/* decide trx_para_level */
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) {
+ if (btc->ant_type == BTC_ANT_SHARED) {
/* fix LNA2 + TIA gain not change by GNT_BT */
if ((btc->dm.wl_btg_rx && b->profile_cnt.now != 0) ||
dm->bt_only == 1)
@@ -2435,7 +2499,7 @@ static void _set_bt_afh_info(struct rtw89_dev *rtwdev)
u8 en = 0, i, ch = 0, bw = 0;
u8 mode, connect_cnt;
- if (btc->ctrl.manual || wl->status.map.scan)
+ if (btc->manual_ctrl || wl->status.map.scan)
return;
if (ver->fwlrole == 0) {
@@ -2560,8 +2624,16 @@ static bool _check_freerun(struct rtw89_dev *rtwdev)
struct rtw89_btc_wl_role_info_v1 *wl_rinfo_v1 = &wl->role_info_v1;
struct rtw89_btc_bt_link_info *bt_linfo = &bt->link_info;
struct rtw89_btc_bt_hid_desc *hid = &bt_linfo->hid_desc;
+ union rtw89_btc_module_info *md = &btc->mdinfo;
+ const struct rtw89_btc_ver *ver = btc->ver;
+ u8 isolation;
+
+ if (ver->fcxinit == 7)
+ isolation = md->md_v7.ant.isolation;
+ else
+ isolation = md->md.ant.isolation;
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) {
+ if (btc->ant_type == BTC_ANT_SHARED) {
btc->dm.trx_para_level = 0;
return false;
}
@@ -2584,7 +2656,7 @@ static bool _check_freerun(struct rtw89_dev *rtwdev)
}
/* TODO get isolation by BT psd */
- if (btc->mdinfo.ant.isolation >= BTC_FREERUN_ANTISO_MIN) {
+ if (isolation >= BTC_FREERUN_ANTISO_MIN) {
btc->dm.trx_para_level = 5;
return true;
}
@@ -2712,7 +2784,7 @@ void rtw89_btc_set_policy(struct rtw89_dev *rtwdev, u16 policy_type)
u8 type;
u32 tbl_w1, tbl_b1, tbl_b4;
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) {
+ if (btc->ant_type == BTC_ANT_SHARED) {
if (btc->cx.wl.status.map._4way)
tbl_w1 = cxtbl[1];
else
@@ -3023,12 +3095,13 @@ void rtw89_btc_set_policy_v1(struct rtw89_dev *rtwdev, u16 policy_type)
struct rtw89_btc_wl_role_info_v1 *wl_rinfo = &btc->cx.wl.role_info_v1;
struct rtw89_btc_bt_hid_desc *hid = &btc->cx.bt.link_info.hid_desc;
struct rtw89_btc_bt_hfp_desc *hfp = &btc->cx.bt.link_info.hfp_desc;
+ struct rtw89_btc_wl_info *wl = &btc->cx.wl;
u8 type, null_role;
u32 tbl_w1, tbl_b1, tbl_b4;
type = FIELD_GET(BTC_CXP_MASK, policy_type);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) {
+ if (btc->ant_type == BTC_ANT_SHARED) {
if (btc->cx.wl.status.map._4way)
tbl_w1 = cxtbl[1];
else if (hid->exist && hid->type == BTC_HID_218)
@@ -3048,9 +3121,16 @@ void rtw89_btc_set_policy_v1(struct rtw89_dev *rtwdev, u16 policy_type)
tbl_b4 = cxtbl[2];
}
} else {
- tbl_w1 = cxtbl[16];
tbl_b1 = cxtbl[17];
tbl_b4 = cxtbl[17];
+
+ if (wl->bg_mode)
+ tbl_w1 = cxtbl[8];
+ else if ((wl->status.map.traffic_dir & BIT(RTW89_TFC_UL)) &&
+ hid->exist)
+ tbl_w1 = cxtbl[19];
+ else
+ tbl_w1 = cxtbl[16];
}
btc->bt_req_en = false;
@@ -3615,7 +3695,7 @@ static void _action_bt_idle(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) { /* shared-antenna */
+ if (btc->ant_type == BTC_ANT_SHARED) { /* shared-antenna */
switch (btc->cx.state_map) {
case BTC_WBUSY_BNOSCAN: /*wl-busy + bt idle*/
case BTC_WSCAN_BNOSCAN: /* wl-scan + bt-idle */
@@ -3654,7 +3734,7 @@ static void _action_bt_hfp(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) {
+ if (btc->ant_type == BTC_ANT_SHARED) {
if (btc->cx.wl.status.map._4way) {
_set_policy(rtwdev, BTC_CXP_OFF_WL, BTC_ACT_BT_HFP);
} else if (wl->status.map.traffic_dir & BIT(RTW89_TFC_UL)) {
@@ -3664,7 +3744,12 @@ static void _action_bt_hfp(struct rtw89_dev *rtwdev)
_set_policy(rtwdev, BTC_CXP_OFF_BWB1, BTC_ACT_BT_HFP);
}
} else {
- _set_policy(rtwdev, BTC_CXP_OFF_EQ2, BTC_ACT_BT_HFP);
+ if (wl->bg_mode)
+ _set_policy(rtwdev, BTC_CXP_OFF_BWB1, BTC_ACT_BT_HFP);
+ else if (wl->status.map.traffic_dir & BIT(RTW89_TFC_UL))
+ _set_policy(rtwdev, BTC_CXP_OFF_EQ5, BTC_ACT_BT_HFP);
+ else
+ _set_policy(rtwdev, BTC_CXP_OFF_EQ2, BTC_ACT_BT_HFP);
}
}
@@ -3679,7 +3764,7 @@ static void _action_bt_hid(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) { /* shared-antenna */
+ if (btc->ant_type == BTC_ANT_SHARED) { /* shared-antenna */
if (wl->status.map._4way) {
policy_type = BTC_CXP_OFF_WL;
} else if (wl->status.map.traffic_dir & BIT(RTW89_TFC_UL)) {
@@ -3697,7 +3782,12 @@ static void _action_bt_hid(struct rtw89_dev *rtwdev)
policy_type = BTC_CXP_OFF_BWB1;
}
} else { /* dedicated-antenna */
- policy_type = BTC_CXP_OFF_EQ3;
+ if (wl->bg_mode)
+ policy_type = BTC_CXP_OFF_BWB1;
+ else if (wl->status.map.traffic_dir & BIT(RTW89_TFC_UL))
+ policy_type = BTC_CXP_OFF_EQ4;
+ else
+ policy_type = BTC_CXP_OFF_EQ3;
}
_set_policy(rtwdev, policy_type, BTC_ACT_BT_HID);
@@ -3947,7 +4037,7 @@ static void _action_wl_other(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED)
+ if (btc->ant_type == BTC_ANT_SHARED)
_set_policy(rtwdev, BTC_CXP_OFFB_BWB0, BTC_ACT_WL_OTHER);
else
_set_policy(rtwdev, BTC_CXP_OFF_EQ0, BTC_ACT_WL_OTHER);
@@ -3991,7 +4081,7 @@ static void _set_btg_ctrl(struct rtw89_dev *rtwdev)
u32 is_btg;
u8 i, val;
- if (btc->ctrl.manual)
+ if (btc->manual_ctrl)
return;
if (ver->fwlrole == 0)
@@ -4063,7 +4153,7 @@ static void _set_wl_preagc_ctrl(struct rtw89_dev *rtwdev)
struct rtw89_btc_dm *dm = &btc->dm;
u8 is_preagc, val;
- if (btc->ctrl.manual)
+ if (btc->manual_ctrl)
return;
if (wl_rinfo->link_mode == BTC_WLINK_25G_MCC)
@@ -4083,7 +4173,7 @@ static void _set_wl_preagc_ctrl(struct rtw89_dev *rtwdev)
else if (ver->fwlrole == 2 && wl_rinfo->dbcc_en &&
wl_rinfo->dbcc_2g_phy != RTW89_PHY_1)
is_preagc = BTC_PREAGC_DISABLE;
- else if (btc->mdinfo.ant.type == BTC_ANT_SHARED)
+ else if (btc->ant_type == BTC_ANT_SHARED)
is_preagc = BTC_PREAGC_DISABLE;
else
is_preagc = BTC_PREAGC_ENABLE;
@@ -4187,13 +4277,12 @@ static void _set_wl_tx_limit(struct rtw89_dev *rtwdev)
struct rtw89_btc_wl_role_info_v1 *wl_rinfo_v1 = &wl->role_info_v1;
struct rtw89_btc_wl_role_info_v2 *wl_rinfo_v2 = &wl->role_info_v2;
struct rtw89_txtime_data data = {.rtwdev = rtwdev};
- u8 mode;
- u8 tx_retry;
+ u8 mode, igno_bt, tx_retry;
u32 tx_time;
u16 enable;
bool reenable = false;
- if (btc->ctrl.manual)
+ if (btc->manual_ctrl)
return;
if (ver->fwlrole == 0)
@@ -4205,7 +4294,12 @@ static void _set_wl_tx_limit(struct rtw89_dev *rtwdev)
else
return;
- if (btc->dm.freerun || btc->ctrl.igno_bt || b->profile_cnt.now == 0 ||
+ if (ver->fcxctrl == 7)
+ igno_bt = btc->ctrl.ctrl_v7.igno_bt;
+ else
+ igno_bt = btc->ctrl.ctrl.igno_bt;
+
+ if (btc->dm.freerun || igno_bt || b->profile_cnt.now == 0 ||
mode == BTC_WLINK_5G || mode == BTC_WLINK_NOLINK) {
enable = 0;
tx_time = BTC_MAX_TX_TIME_DEF;
@@ -4402,7 +4496,7 @@ static void _action_wl_scan(struct rtw89_dev *rtwdev)
if (RTW89_CHK_FW_FEATURE(SCAN_OFFLOAD, &rtwdev->fw)) {
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W25G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED)
+ if (btc->ant_type == BTC_ANT_SHARED)
_set_policy(rtwdev, BTC_CXP_OFFE_DEF,
BTC_RSN_NTFY_SCAN_START);
else
@@ -4430,7 +4524,7 @@ static void _action_wl_25g_mcc(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W25G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) {
+ if (btc->ant_type == BTC_ANT_SHARED) {
if (btc->cx.bt.link_info.profile_cnt.now == 0)
_set_policy(rtwdev, BTC_CXP_OFFE_DEF2,
BTC_ACT_WL_25G_MCC);
@@ -4447,7 +4541,7 @@ static void _action_wl_2g_mcc(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) { /* shared-antenna */
+ if (btc->ant_type == BTC_ANT_SHARED) { /* shared-antenna */
if (btc->cx.bt.link_info.profile_cnt.now == 0)
_set_policy(rtwdev, BTC_CXP_OFFE_DEF2,
BTC_ACT_WL_2G_MCC);
@@ -4465,7 +4559,7 @@ static void _action_wl_2g_scc(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) { /* shared-antenna */
+ if (btc->ant_type == BTC_ANT_SHARED) { /* shared-antenna */
if (btc->cx.bt.link_info.profile_cnt.now == 0)
_set_policy(rtwdev,
BTC_CXP_OFFE_DEF2, BTC_ACT_WL_2G_SCC);
@@ -4487,7 +4581,7 @@ static void _action_wl_2g_scc_v1(struct rtw89_dev *rtwdev)
u16 policy_type = BTC_CXP_OFF_BT;
u32 dur;
- if (btc->mdinfo.ant.type == BTC_ANT_DEDICATED) {
+ if (btc->ant_type == BTC_ANT_DEDICATED) {
policy_type = BTC_CXP_OFF_EQ0;
} else {
/* shared-antenna */
@@ -4549,7 +4643,7 @@ static void _action_wl_2g_scc_v2(struct rtw89_dev *rtwdev)
u16 policy_type = BTC_CXP_OFF_BT;
u32 dur;
- if (btc->mdinfo.ant.type == BTC_ANT_DEDICATED) {
+ if (btc->ant_type == BTC_ANT_DEDICATED) {
policy_type = BTC_CXP_OFF_EQ0;
} else {
/* shared-antenna */
@@ -4607,7 +4701,7 @@ static void _action_wl_2g_ap(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) {
+ if (btc->ant_type == BTC_ANT_SHARED) {
if (btc->cx.bt.link_info.profile_cnt.now == 0)
_set_policy(rtwdev, BTC_CXP_OFFE_DEF2,
BTC_ACT_WL_2G_AP);
@@ -4624,7 +4718,7 @@ static void _action_wl_2g_go(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) { /* shared-antenna */
+ if (btc->ant_type == BTC_ANT_SHARED) { /* shared-antenna */
if (btc->cx.bt.link_info.profile_cnt.now == 0)
_set_policy(rtwdev,
BTC_CXP_OFFE_DEF2, BTC_ACT_WL_2G_GO);
@@ -4642,7 +4736,7 @@ static void _action_wl_2g_gc(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) { /* shared-antenna */
+ if (btc->ant_type == BTC_ANT_SHARED) { /* shared-antenna */
_action_by_bt(rtwdev);
} else {/* dedicated-antenna */
_set_policy(rtwdev, BTC_CXP_OFF_EQ0, BTC_ACT_WL_2G_GC);
@@ -4655,7 +4749,7 @@ static void _action_wl_2g_nan(struct rtw89_dev *rtwdev)
_set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G);
- if (btc->mdinfo.ant.type == BTC_ANT_SHARED) { /* shared-antenna */
+ if (btc->ant_type == BTC_ANT_SHARED) { /* shared-antenna */
if (btc->cx.bt.link_info.profile_cnt.now == 0)
_set_policy(rtwdev,
BTC_CXP_OFFE_DEF2, BTC_ACT_WL_2G_NAN);
@@ -5351,7 +5445,7 @@ void _run_coex(struct rtw89_dev *rtwdev, enum btc_reason_and_action reason)
struct rtw89_btc_wl_role_info *wl_rinfo = &wl->role_info;
struct rtw89_btc_wl_role_info_v1 *wl_rinfo_v1 = &wl->role_info_v1;
struct rtw89_btc_wl_role_info_v2 *wl_rinfo_v2 = &wl->role_info_v2;
- u8 mode;
+ u8 mode, igno_bt, always_freerun;
lockdep_assert_held(&rtwdev->mutex);
@@ -5368,20 +5462,28 @@ void _run_coex(struct rtw89_dev *rtwdev, enum btc_reason_and_action reason)
else
return;
+ if (ver->fcxctrl == 7) {
+ igno_bt = btc->ctrl.ctrl_v7.igno_bt;
+ always_freerun = btc->ctrl.ctrl_v7.always_freerun;
+ } else {
+ igno_bt = btc->ctrl.ctrl.igno_bt;
+ always_freerun = btc->ctrl.ctrl.always_freerun;
+ }
+
rtw89_debug(rtwdev, RTW89_DBG_BTC, "[BTC], %s(): reason=%d, mode=%d\n",
__func__, reason, mode);
rtw89_debug(rtwdev, RTW89_DBG_BTC, "[BTC], %s(): wl_only=%d, bt_only=%d\n",
__func__, dm->wl_only, dm->bt_only);
/* Be careful to change the following function sequence!! */
- if (btc->ctrl.manual) {
+ if (btc->manual_ctrl) {
rtw89_debug(rtwdev, RTW89_DBG_BTC,
"[BTC], %s(): return for Manual CTRL!!\n",
__func__);
return;
}
- if (btc->ctrl.igno_bt &&
+ if (igno_bt &&
(reason == BTC_RSN_UPDATE_BT_INFO ||
reason == BTC_RSN_UPDATE_BT_SCBD)) {
rtw89_debug(rtwdev, RTW89_DBG_BTC,
@@ -5418,24 +5520,24 @@ void _run_coex(struct rtw89_dev *rtwdev, enum btc_reason_and_action reason)
dm->freerun = false;
dm->cnt_dm[BTC_DCNT_RUN]++;
dm->fddt_train = BTC_FDDT_DISABLE;
- btc->ctrl.igno_bt = false;
bt->scan_rx_low_pri = false;
+ igno_bt = false;
- if (btc->ctrl.always_freerun) {
+ if (always_freerun) {
_action_freerun(rtwdev);
- btc->ctrl.igno_bt = true;
+ igno_bt = true;
goto exit;
}
if (dm->wl_only) {
_action_wl_only(rtwdev);
- btc->ctrl.igno_bt = true;
+ igno_bt = true;
goto exit;
}
if (wl->status.map.rf_off || wl->status.map.lps || dm->bt_only) {
_action_wl_off(rtwdev, mode);
- btc->ctrl.igno_bt = true;
+ igno_bt = true;
goto exit;
}
@@ -5525,6 +5627,10 @@ void _run_coex(struct rtw89_dev *rtwdev, enum btc_reason_and_action reason)
exit:
rtw89_debug(rtwdev, RTW89_DBG_BTC, "[BTC], %s(): exit\n", __func__);
+ if (ver->fcxctrl == 7)
+ btc->ctrl.ctrl_v7.igno_bt = igno_bt;
+ else
+ btc->ctrl.ctrl.igno_bt = igno_bt;
_action_common(rtwdev);
}
@@ -5560,16 +5666,26 @@ static void _set_init_info(struct rtw89_dev *rtwdev)
{
const struct rtw89_chip_info *chip = rtwdev->chip;
struct rtw89_btc *btc = &rtwdev->btc;
+ const struct rtw89_btc_ver *ver = btc->ver;
struct rtw89_btc_dm *dm = &btc->dm;
struct rtw89_btc_wl_info *wl = &btc->cx.wl;
- dm->init_info.wl_only = (u8)dm->wl_only;
- dm->init_info.bt_only = (u8)dm->bt_only;
- dm->init_info.wl_init_ok = (u8)wl->status.map.init_ok;
- dm->init_info.dbcc_en = rtwdev->dbcc_en;
- dm->init_info.cx_other = btc->cx.other.type;
- dm->init_info.wl_guard_ch = chip->afh_guard_ch;
- dm->init_info.module = btc->mdinfo;
+ if (ver->fcxinit == 7) {
+ dm->init_info.init_v7.wl_only = (u8)dm->wl_only;
+ dm->init_info.init_v7.bt_only = (u8)dm->bt_only;
+ dm->init_info.init_v7.wl_init_ok = (u8)wl->status.map.init_ok;
+ dm->init_info.init_v7.cx_other = btc->cx.other.type;
+ dm->init_info.init_v7.wl_guard_ch = chip->afh_guard_ch;
+ dm->init_info.init_v7.module = btc->mdinfo.md_v7;
+ } else {
+ dm->init_info.init.wl_only = (u8)dm->wl_only;
+ dm->init_info.init.bt_only = (u8)dm->bt_only;
+ dm->init_info.init.wl_init_ok = (u8)wl->status.map.init_ok;
+ dm->init_info.init.dbcc_en = rtwdev->dbcc_en;
+ dm->init_info.init.cx_other = btc->cx.other.type;
+ dm->init_info.init.wl_guard_ch = chip->afh_guard_ch;
+ dm->init_info.init.module = btc->mdinfo.md;
+ }
}
void rtw89_btc_ntfy_init(struct rtw89_dev *rtwdev, u8 mode)
@@ -5578,11 +5694,15 @@ void rtw89_btc_ntfy_init(struct rtw89_dev *rtwdev, u8 mode)
struct rtw89_btc_dm *dm = &rtwdev->btc.dm;
struct rtw89_btc_wl_info *wl = &btc->cx.wl;
const struct rtw89_chip_info *chip = rtwdev->chip;
+ const struct rtw89_btc_ver *ver = btc->ver;
_reset_btc_var(rtwdev, BTC_RESET_ALL);
btc->dm.run_reason = BTC_RSN_NONE;
btc->dm.run_action = BTC_ACT_NONE;
- btc->ctrl.igno_bt = true;
+ if (ver->fcxctrl == 7)
+ btc->ctrl.ctrl_v7.igno_bt = true;
+ else
+ btc->ctrl.ctrl.igno_bt = true;
rtw89_debug(rtwdev, RTW89_DBG_BTC,
"[BTC], %s(): mode=%d\n", __func__, mode);
@@ -6298,7 +6418,7 @@ static void rtw89_btc_ntfy_wl_sta_iter(void *data, struct ieee80211_sta *sta)
if (BTC_RSSI_LOW(link_info->rssi_state[i]))
rssi_map |= BIT(i);
- if (btc->mdinfo.ant.type == BTC_ANT_DEDICATED &&
+ if (btc->ant_type == BTC_ANT_DEDICATED &&
BTC_RSSI_CHANGE(link_info->rssi_state[i]))
is_sta_change = true;
}
@@ -6489,13 +6609,16 @@ void rtw89_btc_c2h_handle(struct rtw89_dev *rtwdev, struct sk_buff *skb,
static void _show_cx_info(struct rtw89_dev *rtwdev, struct seq_file *m)
{
+ union rtw89_btc_module_info *md = &rtwdev->btc.mdinfo;
const struct rtw89_chip_info *chip = rtwdev->chip;
+ const struct rtw89_btc_ver *ver = rtwdev->btc.ver;
struct rtw89_hal *hal = &rtwdev->hal;
struct rtw89_btc *btc = &rtwdev->btc;
struct rtw89_btc_dm *dm = &btc->dm;
struct rtw89_btc_bt_info *bt = &btc->cx.bt;
struct rtw89_btc_wl_info *wl = &btc->cx.wl;
u32 ver_main = 0, ver_sub = 0, ver_hotfix = 0, id_branch = 0;
+ u8 cv, rfe, iso, ant_num, ant_single_pos;
if (!(dm->coex_info_map & BTC_COEX_INFO_CX))
return;
@@ -6545,11 +6668,24 @@ static void _show_cx_info(struct rtw89_dev *rtwdev, struct seq_file *m)
ver_main, ver_sub, ver_hotfix, id_branch,
bt->ver_info.fw, bt->run_patch_code ? "patch" : "ROM");
+ if (ver->fcxinit == 7) {
+ cv = md->md_v7.kt_ver;
+ rfe = md->md_v7.rfe_type;
+ iso = md->md_v7.ant.isolation;
+ ant_num = md->md_v7.ant.num;
+ ant_single_pos = md->md_v7.ant.single_pos;
+ } else {
+ cv = md->md.cv;
+ rfe = md->md.rfe_type;
+ iso = md->md.ant.isolation;
+ ant_num = md->md.ant.num;
+ ant_single_pos = md->md.ant.single_pos;
+ }
+
seq_printf(m, " %-15s : cv:%x, rfe_type:0x%x, ant_iso:%d, ant_pg:%d, %s",
- "[hw_info]", btc->mdinfo.cv, btc->mdinfo.rfe_type,
- btc->mdinfo.ant.isolation, btc->mdinfo.ant.num,
- (btc->mdinfo.ant.num > 1 ? "" : (btc->mdinfo.ant.single_pos ?
- "1Ant_Pos:S1, " : "1Ant_Pos:S0, ")));
+ "[hw_info]", cv, rfe, iso, ant_num,
+ ant_num > 1 ? "" :
+ ant_single_pos ? "1Ant_Pos:S1, " : "1Ant_Pos:S0, ");
seq_printf(m, "3rd_coex:%d, dbcc:%d, tx_num:%d, rx_num:%d\n",
btc->cx.other.type, rtwdev->dbcc_en, hal->tx_nss,
@@ -6722,20 +6858,26 @@ static void _show_bt_info(struct rtw89_dev *rtwdev, struct seq_file *m)
struct rtw89_btc_cx *cx = &btc->cx;
struct rtw89_btc_bt_info *bt = &cx->bt;
struct rtw89_btc_wl_info *wl = &cx->wl;
- struct rtw89_btc_module *module = &btc->mdinfo;
struct rtw89_btc_bt_link_info *bt_linfo = &bt->link_info;
+ union rtw89_btc_module_info *md = &btc->mdinfo;
u8 *afh = bt_linfo->afh_map;
u8 *afh_le = bt_linfo->afh_map_le;
+ u8 bt_pos;
if (!(btc->dm.coex_info_map & BTC_COEX_INFO_BT))
return;
+ if (ver->fcxinit == 7)
+ bt_pos = md->md_v7.bt_pos;
+ else
+ bt_pos = md->md.bt_pos;
+
seq_puts(m, "========== [BT Status] ==========\n");
seq_printf(m, " %-15s : enable:%s, btg:%s%s, connect:%s, ",
"[status]", bt->enable.now ? "Y" : "N",
bt->btg_type ? "Y" : "N",
- (bt->enable.now && (bt->btg_type != module->bt_pos) ?
+ (bt->enable.now && (bt->btg_type != bt_pos) ?
"(efuse-mismatch!!)" : ""),
(bt_linfo->status.map.connect ? "Y" : "N"));
@@ -6934,10 +7076,13 @@ static const char *steps_to_str(u16 step)
CASE_BTC_POLICY_STR(OFF_EQ1);
CASE_BTC_POLICY_STR(OFF_EQ2);
CASE_BTC_POLICY_STR(OFF_EQ3);
+ CASE_BTC_POLICY_STR(OFF_EQ4);
+ CASE_BTC_POLICY_STR(OFF_EQ5);
CASE_BTC_POLICY_STR(OFF_BWB0);
CASE_BTC_POLICY_STR(OFF_BWB1);
CASE_BTC_POLICY_STR(OFF_BWB2);
CASE_BTC_POLICY_STR(OFF_BWB3);
+ CASE_BTC_POLICY_STR(OFF_WL2);
CASE_BTC_POLICY_STR(OFFB_BWB0);
CASE_BTC_POLICY_STR(OFFE_DEF);
CASE_BTC_POLICY_STR(OFFE_DEF2);
@@ -7123,21 +7268,22 @@ static void _show_dm_step(struct rtw89_dev *rtwdev, struct seq_file *m)
static void _show_dm_info(struct rtw89_dev *rtwdev, struct seq_file *m)
{
struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_module *module = &btc->mdinfo;
+ const struct rtw89_btc_ver *ver = btc->ver;
struct rtw89_btc_dm *dm = &btc->dm;
struct rtw89_btc_wl_info *wl = &btc->cx.wl;
struct rtw89_btc_bt_info *bt = &btc->cx.bt;
+ u8 igno_bt;
if (!(dm->coex_info_map & BTC_COEX_INFO_DM))
return;
seq_printf(m, "========== [Mechanism Status %s] ==========\n",
- (btc->ctrl.manual ? "(Manual)" : "(Auto)"));
+ (btc->manual_ctrl ? "(Manual)" : "(Auto)"));
seq_printf(m,
" %-15s : type:%s, reason:%s(), action:%s(), ant_path:%s, init_mode:%s, run_cnt:%d\n",
"[status]",
- module->ant.type == BTC_ANT_SHARED ? "shared" : "dedicated",
+ btc->ant_type == BTC_ANT_SHARED ? "shared" : "dedicated",
steps_to_str(dm->run_reason),
steps_to_str(dm->run_action | BTC_ACT_EXT_BIT),
id_to_ant(FIELD_GET(GENMASK(7, 0), dm->set_ant_path)),
@@ -7146,8 +7292,13 @@ static void _show_dm_info(struct rtw89_dev *rtwdev, struct seq_file *m)
_show_dm_step(rtwdev, m);
+ if (ver->fcxctrl == 7)
+ igno_bt = btc->ctrl.ctrl_v7.igno_bt;
+ else
+ igno_bt = btc->ctrl.ctrl.igno_bt;
+
seq_printf(m, " %-15s : wl_only:%d, bt_only:%d, igno_bt:%d, free_run:%d, wl_ps_ctrl:%d, wl_mimo_ps:%d, ",
- "[dm_flag]", dm->wl_only, dm->bt_only, btc->ctrl.igno_bt,
+ "[dm_flag]", dm->wl_only, dm->bt_only, igno_bt,
dm->freerun, btc->lps, dm->wl_mimo_ps);
seq_printf(m, "leak_ap:%d, fw_offload:%s%s\n", dm->leak_ap,
@@ -7888,10 +8039,11 @@ static void _show_fbtc_step_v2(struct rtw89_dev *rtwdev, struct seq_file *m)
struct rtw89_btc_btf_fwinfo *pfwinfo = &btc->fwinfo;
struct rtw89_btc_rpt_cmn_info *pcinfo = NULL;
struct rtw89_btc_fbtc_steps_v2 *pstep = NULL;
+ const struct rtw89_btc_ver *ver = btc->ver;
u8 type, val, cnt = 0, state = 0;
bool outloop = false;
u16 i, diff_t, n_start = 0, n_stop = 0;
- u16 pos_old, pos_new;
+ u16 pos_old, pos_new, trace_step;
pcinfo = &pfwinfo->rpt_fbtc_step.cinfo;
if (!pcinfo->valid)
@@ -7908,11 +8060,16 @@ static void _show_fbtc_step_v2(struct rtw89_dev *rtwdev, struct seq_file *m)
do {
switch (state) {
case 0:
+ if (ver->fcxctrl == 7 || ver->fcxctrl == 1)
+ trace_step = 50;
+ else
+ trace_step = btc->ctrl.ctrl.trace_step;
+
n_start = pos_old;
if (pos_new >= pos_old)
n_stop = pos_new;
else
- n_stop = btc->ctrl.trace_step - 1;
+ n_stop = trace_step - 1;
state = 1;
break;
@@ -8742,7 +8899,7 @@ void rtw89_btc_dump_info(struct rtw89_dev *rtwdev, struct seq_file *m)
seq_printf(m, "WL FW / BT FW %d.%d.%d.%d / NA\n",
fw_suit->major_ver, fw_suit->minor_ver,
fw_suit->sub_ver, fw_suit->sub_idex);
- seq_printf(m, "manual %d\n", btc->ctrl.manual);
+ seq_printf(m, "manual %d\n", btc->manual_ctrl);
seq_puts(m, "=========================================\n");
diff --git a/drivers/net/wireless/realtek/rtw89/coex.h b/drivers/net/wireless/realtek/rtw89/coex.h
index 46e25c6f88a6..13303830684e 100644
--- a/drivers/net/wireless/realtek/rtw89/coex.h
+++ b/drivers/net/wireless/realtek/rtw89/coex.h
@@ -7,6 +7,8 @@
#include "core.h"
+#define BTC_H2C_MAXLEN 2020
+
enum btc_mode {
BTC_MODE_NORMAL,
BTC_MODE_WL,
@@ -23,6 +25,7 @@ enum btc_wl_rfk_type {
BTC_WRFKT_DACK = 4,
BTC_WRFKT_RXDCK = 5,
BTC_WRFKT_TSSI = 6,
+ BTC_WRFKT_CHLK = 7,
};
#define NM_EXEC false
@@ -152,6 +155,10 @@ enum btc_lps_state {
#define BTC_REG_NOTFOUND 0xff
+#define R_BTC_ZB_COEX_TBL_0 0xE328
+#define R_BTC_ZB_COEX_TBL_1 0xE32c
+#define R_BTC_ZB_BREAK_TBL 0xE350
+
enum btc_ant_div_pos {
BTC_ANT_DIV_MAIN = 0,
BTC_ANT_DIV_AUX = 1,
@@ -180,6 +187,20 @@ enum btc_btgctrl_type {
BTC_BTGCTRL_BB_GNT_NOTFOUND,
};
+enum btc_wa_type {
+ BTC_WA_5G_HI_CH_RX = BIT(0),
+ BTC_WA_NULL_AP = BIT(1),
+ BTC_WA_HFP_ZB = BIT(2), /* HFP PTA req bit4 define issue */
+};
+
+enum btc_3cx_type {
+ BTC_3CX_NONE = 0,
+ BTC_3CX_BT2 = BIT(0),
+ BTC_3CX_ZB = BIT(1),
+ BTC_3CX_LTE = BIT(2),
+ BTC_3CX_MAX,
+};
+
void rtw89_btc_ntfy_poweron(struct rtw89_dev *rtwdev);
void rtw89_btc_ntfy_poweroff(struct rtw89_dev *rtwdev);
void rtw89_btc_ntfy_init(struct rtw89_dev *rtwdev, u8 mode);
diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c
index fd527a249996..d474b8d5df3d 100644
--- a/drivers/net/wireless/realtek/rtw89/core.c
+++ b/drivers/net/wireless/realtek/rtw89/core.c
@@ -372,7 +372,7 @@ void rtw89_core_set_chip_txpwr(struct rtw89_dev *rtwdev)
chip->ops->set_txpwr(rtwdev, chan, phy_idx);
}
-void rtw89_set_channel(struct rtw89_dev *rtwdev)
+int rtw89_set_channel(struct rtw89_dev *rtwdev)
{
struct rtw89_hal *hal = &rtwdev->hal;
const struct rtw89_chip_info *chip = rtwdev->chip;
@@ -399,7 +399,7 @@ void rtw89_set_channel(struct rtw89_dev *rtwdev)
break;
default:
WARN(1, "Invalid ent mode: %d\n", mode);
- return;
+ return -EINVAL;
}
roc_idx = atomic_read(&hal->roc_entity_idx);
@@ -426,6 +426,7 @@ void rtw89_set_channel(struct rtw89_dev *rtwdev)
}
rtw89_set_entity_state(rtwdev, true);
+ return 0;
}
void rtw89_get_channel(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif,
@@ -1176,7 +1177,8 @@ static __le32 rtw89_build_txwd_info2_v1(struct rtw89_tx_desc_info *desc_info)
static __le32 rtw89_build_txwd_info4(struct rtw89_tx_desc_info *desc_info)
{
- u32 dword = FIELD_PREP(RTW89_TXWD_INFO4_RTS_EN, 1) |
+ bool rts_en = !desc_info->is_bmc;
+ u32 dword = FIELD_PREP(RTW89_TXWD_INFO4_RTS_EN, rts_en) |
FIELD_PREP(RTW89_TXWD_INFO4_HW_RTS_EN, 1);
return cpu_to_le32(dword);
@@ -1329,7 +1331,8 @@ static __le32 rtw89_build_txwd_info2_v2(struct rtw89_tx_desc_info *desc_info)
static __le32 rtw89_build_txwd_info4_v2(struct rtw89_tx_desc_info *desc_info)
{
- u32 dword = FIELD_PREP(BE_TXD_INFO4_RTS_EN, 1) |
+ bool rts_en = !desc_info->is_bmc;
+ u32 dword = FIELD_PREP(BE_TXD_INFO4_RTS_EN, rts_en) |
FIELD_PREP(BE_TXD_INFO4_HW_RTS_EN, 1);
return cpu_to_le32(dword);
@@ -1866,6 +1869,17 @@ static void rtw89_core_cancel_6ghz_probe_tx(struct rtw89_dev *rtwdev,
ieee80211_queue_work(rtwdev->hw, &rtwdev->cancel_6ghz_probe_work);
}
+static void rtw89_vif_sync_bcn_tsf(struct rtw89_vif *rtwvif,
+ struct ieee80211_hdr *hdr, size_t len)
+{
+ struct ieee80211_mgmt *mgmt = (typeof(mgmt))hdr;
+
+ if (len < offsetof(typeof(*mgmt), u.beacon.variable))
+ return;
+
+ WRITE_ONCE(rtwvif->sync_bcn_tsf, le64_to_cpu(mgmt->u.beacon.timestamp));
+}
+
static void rtw89_vif_rx_stats_iter(void *data, u8 *mac,
struct ieee80211_vif *vif)
{
@@ -1896,8 +1910,10 @@ static void rtw89_vif_rx_stats_iter(void *data, u8 *mac,
return;
if (ieee80211_is_beacon(hdr->frame_control)) {
- if (vif->type == NL80211_IFTYPE_STATION)
+ if (vif->type == NL80211_IFTYPE_STATION) {
+ rtw89_vif_sync_bcn_tsf(rtwvif, hdr, skb->len);
rtw89_fw_h2c_rssi_offload(rtwdev, phy_ppdu);
+ }
pkt_stat->beacon_nr++;
}
@@ -3345,6 +3361,14 @@ int rtw89_core_sta_add(struct rtw89_dev *rtwdev,
return ret;
}
+ ret = rtw89_chip_h2c_default_cmac_tbl(rtwdev, rtwvif, rtwsta);
+ if (ret)
+ return ret;
+
+ ret = rtw89_chip_h2c_default_dmac_tbl(rtwdev, rtwvif, rtwsta);
+ if (ret)
+ return ret;
+
rtw89_queue_chanctx_change(rtwdev, RTW89_CHANCTX_REMOTE_STA_CHANGE);
}
@@ -3393,7 +3417,7 @@ int rtw89_core_sta_disconnect(struct rtw89_dev *rtwdev,
rtw89_fw_release_general_pkt_list_vif(rtwdev, rtwvif, true);
}
- ret = rtw89_fw_h2c_assoc_cmac_tbl(rtwdev, vif, sta);
+ ret = rtw89_chip_h2c_assoc_cmac_tbl(rtwdev, vif, sta);
if (ret) {
rtw89_warn(rtwdev, "failed to send h2c cmac table\n");
return ret;
@@ -3442,7 +3466,7 @@ int rtw89_core_sta_assoc(struct rtw89_dev *rtwdev,
}
}
- ret = rtw89_fw_h2c_assoc_cmac_tbl(rtwdev, vif, sta);
+ ret = rtw89_chip_h2c_assoc_cmac_tbl(rtwdev, vif, sta);
if (ret) {
rtw89_warn(rtwdev, "failed to send h2c cmac table\n");
return ret;
@@ -3485,6 +3509,8 @@ int rtw89_core_sta_assoc(struct rtw89_dev *rtwdev,
rtw89_warn(rtwdev, "failed to send h2c general packet\n");
return ret;
}
+
+ rtw89_fw_h2c_set_bcn_fltr_cfg(rtwdev, vif, true);
}
return ret;
@@ -3611,7 +3637,8 @@ static void rtw89_init_vht_cap(struct rtw89_dev *rtwdev,
cpu_to_le16(867), cpu_to_le16(1733), cpu_to_le16(2600), cpu_to_le16(3467),
};
const struct rtw89_chip_info *chip = rtwdev->chip;
- const __le16 *highest = chip->support_bw160 ? highest_bw160 : highest_bw80;
+ const __le16 *highest = chip->support_bandwidths & BIT(NL80211_CHAN_WIDTH_160) ?
+ highest_bw160 : highest_bw80;
struct rtw89_hal *hal = &rtwdev->hal;
u16 tx_mcs_map = 0, rx_mcs_map = 0;
u8 sts_cap = 3;
@@ -3640,34 +3667,34 @@ static void rtw89_init_vht_cap(struct rtw89_dev *rtwdev,
vht_cap->cap |= IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE |
IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
vht_cap->cap |= sts_cap << IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT;
- if (chip->support_bw160)
+ if (chip->support_bandwidths & BIT(NL80211_CHAN_WIDTH_160))
vht_cap->cap |= IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ |
IEEE80211_VHT_CAP_SHORT_GI_160;
vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(rx_mcs_map);
vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(tx_mcs_map);
vht_cap->vht_mcs.rx_highest = highest[hal->rx_nss - 1];
vht_cap->vht_mcs.tx_highest = highest[hal->tx_nss - 1];
-}
-#define RTW89_SBAND_IFTYPES_NR 2
+ if (ieee80211_hw_check(rtwdev->hw, SUPPORTS_VHT_EXT_NSS_BW))
+ vht_cap->vht_mcs.tx_highest |=
+ cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE);
+}
static void rtw89_init_he_cap(struct rtw89_dev *rtwdev,
enum nl80211_band band,
- struct ieee80211_supported_band *sband)
+ enum nl80211_iftype iftype,
+ struct ieee80211_sband_iftype_data *iftype_data)
{
const struct rtw89_chip_info *chip = rtwdev->chip;
struct rtw89_hal *hal = &rtwdev->hal;
- struct ieee80211_sband_iftype_data *iftype_data;
bool no_ng16 = (chip->chip_id == RTL8852A && hal->cv == CHIP_CBV) ||
(chip->chip_id == RTL8852B && hal->cv == CHIP_CAV);
+ struct ieee80211_sta_he_cap *he_cap;
+ int nss = hal->rx_nss;
+ u8 *mac_cap_info;
+ u8 *phy_cap_info;
u16 mcs_map = 0;
int i;
- int nss = hal->rx_nss;
- int idx = 0;
-
- iftype_data = kcalloc(RTW89_SBAND_IFTYPES_NR, sizeof(*iftype_data), GFP_KERNEL);
- if (!iftype_data)
- return;
for (i = 0; i < 8; i++) {
if (i < nss)
@@ -3676,12 +3703,196 @@ static void rtw89_init_he_cap(struct rtw89_dev *rtwdev,
mcs_map |= IEEE80211_HE_MCS_NOT_SUPPORTED << (i * 2);
}
- for (i = 0; i < NUM_NL80211_IFTYPES; i++) {
- struct ieee80211_sta_he_cap *he_cap;
- u8 *mac_cap_info;
- u8 *phy_cap_info;
+ he_cap = &iftype_data->he_cap;
+ mac_cap_info = he_cap->he_cap_elem.mac_cap_info;
+ phy_cap_info = he_cap->he_cap_elem.phy_cap_info;
+
+ he_cap->has_he = true;
+ mac_cap_info[0] = IEEE80211_HE_MAC_CAP0_HTC_HE;
+ if (iftype == NL80211_IFTYPE_STATION)
+ mac_cap_info[1] = IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US;
+ mac_cap_info[2] = IEEE80211_HE_MAC_CAP2_ALL_ACK |
+ IEEE80211_HE_MAC_CAP2_BSR;
+ mac_cap_info[3] = IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2;
+ if (iftype == NL80211_IFTYPE_AP)
+ mac_cap_info[3] |= IEEE80211_HE_MAC_CAP3_OMI_CONTROL;
+ mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_OPS |
+ IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU;
+ if (iftype == NL80211_IFTYPE_STATION)
+ mac_cap_info[5] = IEEE80211_HE_MAC_CAP5_HT_VHT_TRIG_FRAME_RX;
+ if (band == NL80211_BAND_2GHZ) {
+ phy_cap_info[0] =
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G;
+ } else {
+ phy_cap_info[0] =
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G;
+ if (chip->support_bandwidths & BIT(NL80211_CHAN_WIDTH_160))
+ phy_cap_info[0] |= IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+ }
+ phy_cap_info[1] = IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
+ IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD |
+ IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US;
+ phy_cap_info[2] = IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
+ IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
+ IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ |
+ IEEE80211_HE_PHY_CAP2_DOPPLER_TX;
+ phy_cap_info[3] = IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_16_QAM;
+ if (iftype == NL80211_IFTYPE_STATION)
+ phy_cap_info[3] |= IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_16_QAM |
+ IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_2;
+ if (iftype == NL80211_IFTYPE_AP)
+ phy_cap_info[3] |= IEEE80211_HE_PHY_CAP3_RX_PARTIAL_BW_SU_IN_20MHZ_MU;
+ phy_cap_info[4] = IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE |
+ IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4;
+ if (chip->support_bandwidths & BIT(NL80211_CHAN_WIDTH_160))
+ phy_cap_info[4] |= IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4;
+ phy_cap_info[5] = no_ng16 ? 0 :
+ IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK |
+ IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK;
+ phy_cap_info[6] = IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU |
+ IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU |
+ IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB |
+ IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE;
+ phy_cap_info[7] = IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP |
+ IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI |
+ IEEE80211_HE_PHY_CAP7_MAX_NC_1;
+ phy_cap_info[8] = IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI |
+ IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI |
+ IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_996;
+ if (chip->support_bandwidths & BIT(NL80211_CHAN_WIDTH_160))
+ phy_cap_info[8] |= IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU |
+ IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU;
+ phy_cap_info[9] = IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM |
+ IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU |
+ IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB |
+ IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB |
+ u8_encode_bits(IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US,
+ IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK);
+ if (iftype == NL80211_IFTYPE_STATION)
+ phy_cap_info[9] |= IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU;
+ he_cap->he_mcs_nss_supp.rx_mcs_80 = cpu_to_le16(mcs_map);
+ he_cap->he_mcs_nss_supp.tx_mcs_80 = cpu_to_le16(mcs_map);
+ if (chip->support_bandwidths & BIT(NL80211_CHAN_WIDTH_160)) {
+ he_cap->he_mcs_nss_supp.rx_mcs_160 = cpu_to_le16(mcs_map);
+ he_cap->he_mcs_nss_supp.tx_mcs_160 = cpu_to_le16(mcs_map);
+ }
+
+ if (band == NL80211_BAND_6GHZ) {
+ __le16 capa;
- switch (i) {
+ capa = le16_encode_bits(IEEE80211_HT_MPDU_DENSITY_NONE,
+ IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START) |
+ le16_encode_bits(IEEE80211_VHT_MAX_AMPDU_1024K,
+ IEEE80211_HE_6GHZ_CAP_MAX_AMPDU_LEN_EXP) |
+ le16_encode_bits(IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454,
+ IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN);
+ iftype_data->he_6ghz_capa.capa = capa;
+ }
+}
+
+static void rtw89_init_eht_cap(struct rtw89_dev *rtwdev,
+ enum nl80211_band band,
+ enum nl80211_iftype iftype,
+ struct ieee80211_sband_iftype_data *iftype_data)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+ struct ieee80211_eht_cap_elem_fixed *eht_cap_elem;
+ struct ieee80211_eht_mcs_nss_supp *eht_nss;
+ struct ieee80211_sta_eht_cap *eht_cap;
+ struct rtw89_hal *hal = &rtwdev->hal;
+ bool support_320mhz = false;
+ int sts = 8;
+ u8 val;
+
+ if (chip->chip_gen == RTW89_CHIP_AX)
+ return;
+
+ if (band == NL80211_BAND_6GHZ &&
+ chip->support_bandwidths & BIT(NL80211_CHAN_WIDTH_320))
+ support_320mhz = true;
+
+ eht_cap = &iftype_data->eht_cap;
+ eht_cap_elem = &eht_cap->eht_cap_elem;
+ eht_nss = &eht_cap->eht_mcs_nss_supp;
+
+ eht_cap->has_eht = true;
+
+ eht_cap_elem->mac_cap_info[0] =
+ u8_encode_bits(IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_7991,
+ IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK);
+ eht_cap_elem->mac_cap_info[1] = 0;
+
+ eht_cap_elem->phy_cap_info[0] =
+ IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI |
+ IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE;
+ if (support_320mhz)
+ eht_cap_elem->phy_cap_info[0] |=
+ IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ;
+
+ eht_cap_elem->phy_cap_info[0] |=
+ u8_encode_bits(u8_get_bits(sts - 1, BIT(0)),
+ IEEE80211_EHT_PHY_CAP0_BEAMFORMEE_SS_80MHZ_MASK);
+ eht_cap_elem->phy_cap_info[1] =
+ u8_encode_bits(u8_get_bits(sts - 1, GENMASK(2, 1)),
+ IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_80MHZ_MASK) |
+ u8_encode_bits(sts - 1,
+ IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_160MHZ_MASK);
+ if (support_320mhz)
+ eht_cap_elem->phy_cap_info[1] |=
+ u8_encode_bits(sts - 1,
+ IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_320MHZ_MASK);
+
+ eht_cap_elem->phy_cap_info[2] = 0;
+
+ eht_cap_elem->phy_cap_info[3] =
+ IEEE80211_EHT_PHY_CAP3_CODEBOOK_4_2_SU_FDBK |
+ IEEE80211_EHT_PHY_CAP3_CODEBOOK_7_5_MU_FDBK |
+ IEEE80211_EHT_PHY_CAP3_TRIG_SU_BF_FDBK |
+ IEEE80211_EHT_PHY_CAP3_TRIG_MU_BF_PART_BW_FDBK;
+
+ eht_cap_elem->phy_cap_info[4] =
+ IEEE80211_EHT_PHY_CAP4_POWER_BOOST_FACT_SUPP |
+ u8_encode_bits(1, IEEE80211_EHT_PHY_CAP4_MAX_NC_MASK);
+
+ eht_cap_elem->phy_cap_info[5] =
+ u8_encode_bits(IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_20US,
+ IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_MASK);
+
+ eht_cap_elem->phy_cap_info[6] = 0;
+ eht_cap_elem->phy_cap_info[7] = 0;
+ eht_cap_elem->phy_cap_info[8] = 0;
+
+ val = u8_encode_bits(hal->rx_nss, IEEE80211_EHT_MCS_NSS_RX) |
+ u8_encode_bits(hal->tx_nss, IEEE80211_EHT_MCS_NSS_TX);
+ eht_nss->bw._80.rx_tx_mcs9_max_nss = val;
+ eht_nss->bw._80.rx_tx_mcs11_max_nss = val;
+ eht_nss->bw._80.rx_tx_mcs13_max_nss = val;
+ eht_nss->bw._160.rx_tx_mcs9_max_nss = val;
+ eht_nss->bw._160.rx_tx_mcs11_max_nss = val;
+ eht_nss->bw._160.rx_tx_mcs13_max_nss = val;
+ if (support_320mhz) {
+ eht_nss->bw._320.rx_tx_mcs9_max_nss = val;
+ eht_nss->bw._320.rx_tx_mcs11_max_nss = val;
+ eht_nss->bw._320.rx_tx_mcs13_max_nss = val;
+ }
+}
+
+#define RTW89_SBAND_IFTYPES_NR 2
+
+static void rtw89_init_he_eht_cap(struct rtw89_dev *rtwdev,
+ enum nl80211_band band,
+ struct ieee80211_supported_band *sband)
+{
+ struct ieee80211_sband_iftype_data *iftype_data;
+ enum nl80211_iftype iftype;
+ int idx = 0;
+
+ iftype_data = kcalloc(RTW89_SBAND_IFTYPES_NR, sizeof(*iftype_data), GFP_KERNEL);
+ if (!iftype_data)
+ return;
+
+ for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
+ switch (iftype) {
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_AP:
break;
@@ -3694,92 +3905,10 @@ static void rtw89_init_he_cap(struct rtw89_dev *rtwdev,
break;
}
- iftype_data[idx].types_mask = BIT(i);
- he_cap = &iftype_data[idx].he_cap;
- mac_cap_info = he_cap->he_cap_elem.mac_cap_info;
- phy_cap_info = he_cap->he_cap_elem.phy_cap_info;
-
- he_cap->has_he = true;
- mac_cap_info[0] = IEEE80211_HE_MAC_CAP0_HTC_HE;
- if (i == NL80211_IFTYPE_STATION)
- mac_cap_info[1] = IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US;
- mac_cap_info[2] = IEEE80211_HE_MAC_CAP2_ALL_ACK |
- IEEE80211_HE_MAC_CAP2_BSR;
- mac_cap_info[3] = IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2;
- if (i == NL80211_IFTYPE_AP)
- mac_cap_info[3] |= IEEE80211_HE_MAC_CAP3_OMI_CONTROL;
- mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_OPS |
- IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU;
- if (i == NL80211_IFTYPE_STATION)
- mac_cap_info[5] = IEEE80211_HE_MAC_CAP5_HT_VHT_TRIG_FRAME_RX;
- if (band == NL80211_BAND_2GHZ) {
- phy_cap_info[0] =
- IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G;
- } else {
- phy_cap_info[0] =
- IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G;
- if (chip->support_bw160)
- phy_cap_info[0] |= IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
- }
- phy_cap_info[1] = IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
- IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD |
- IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US;
- phy_cap_info[2] = IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
- IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
- IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ |
- IEEE80211_HE_PHY_CAP2_DOPPLER_TX;
- phy_cap_info[3] = IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_16_QAM;
- if (i == NL80211_IFTYPE_STATION)
- phy_cap_info[3] |= IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_16_QAM |
- IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_2;
- if (i == NL80211_IFTYPE_AP)
- phy_cap_info[3] |= IEEE80211_HE_PHY_CAP3_RX_PARTIAL_BW_SU_IN_20MHZ_MU;
- phy_cap_info[4] = IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE |
- IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4;
- if (chip->support_bw160)
- phy_cap_info[4] |= IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4;
- phy_cap_info[5] = no_ng16 ? 0 :
- IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK |
- IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK;
- phy_cap_info[6] = IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU |
- IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU |
- IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB |
- IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE;
- phy_cap_info[7] = IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP |
- IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI |
- IEEE80211_HE_PHY_CAP7_MAX_NC_1;
- phy_cap_info[8] = IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI |
- IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI |
- IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_996;
- if (chip->support_bw160)
- phy_cap_info[8] |= IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU |
- IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU;
- phy_cap_info[9] = IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM |
- IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU |
- IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB |
- IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB |
- u8_encode_bits(IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US,
- IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK);
- if (i == NL80211_IFTYPE_STATION)
- phy_cap_info[9] |= IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU;
- he_cap->he_mcs_nss_supp.rx_mcs_80 = cpu_to_le16(mcs_map);
- he_cap->he_mcs_nss_supp.tx_mcs_80 = cpu_to_le16(mcs_map);
- if (chip->support_bw160) {
- he_cap->he_mcs_nss_supp.rx_mcs_160 = cpu_to_le16(mcs_map);
- he_cap->he_mcs_nss_supp.tx_mcs_160 = cpu_to_le16(mcs_map);
- }
-
- if (band == NL80211_BAND_6GHZ) {
- __le16 capa;
+ iftype_data[idx].types_mask = BIT(iftype);
- capa = le16_encode_bits(IEEE80211_HT_MPDU_DENSITY_NONE,
- IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START) |
- le16_encode_bits(IEEE80211_VHT_MAX_AMPDU_1024K,
- IEEE80211_HE_6GHZ_CAP_MAX_AMPDU_LEN_EXP) |
- le16_encode_bits(IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454,
- IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN);
- iftype_data[idx].he_6ghz_capa.capa = capa;
- }
+ rtw89_init_he_cap(rtwdev, band, iftype, &iftype_data[idx]);
+ rtw89_init_eht_cap(rtwdev, band, iftype, &iftype_data[idx]);
idx++;
}
@@ -3800,7 +3929,7 @@ static int rtw89_core_set_supported_band(struct rtw89_dev *rtwdev)
if (!sband_2ghz)
goto err;
rtw89_init_ht_cap(rtwdev, &sband_2ghz->ht_cap);
- rtw89_init_he_cap(rtwdev, NL80211_BAND_2GHZ, sband_2ghz);
+ rtw89_init_he_eht_cap(rtwdev, NL80211_BAND_2GHZ, sband_2ghz);
hw->wiphy->bands[NL80211_BAND_2GHZ] = sband_2ghz;
}
@@ -3810,7 +3939,7 @@ static int rtw89_core_set_supported_band(struct rtw89_dev *rtwdev)
goto err;
rtw89_init_ht_cap(rtwdev, &sband_5ghz->ht_cap);
rtw89_init_vht_cap(rtwdev, &sband_5ghz->vht_cap);
- rtw89_init_he_cap(rtwdev, NL80211_BAND_5GHZ, sband_5ghz);
+ rtw89_init_he_eht_cap(rtwdev, NL80211_BAND_5GHZ, sband_5ghz);
hw->wiphy->bands[NL80211_BAND_5GHZ] = sband_5ghz;
}
@@ -3818,7 +3947,7 @@ static int rtw89_core_set_supported_band(struct rtw89_dev *rtwdev)
sband_6ghz = kmemdup(&rtw89_sband_6ghz, size, GFP_KERNEL);
if (!sband_6ghz)
goto err;
- rtw89_init_he_cap(rtwdev, NL80211_BAND_6GHZ, sband_6ghz);
+ rtw89_init_he_eht_cap(rtwdev, NL80211_BAND_6GHZ, sband_6ghz);
hw->wiphy->bands[NL80211_BAND_6GHZ] = sband_6ghz;
}
@@ -3879,7 +4008,7 @@ void rtw89_core_update_beacon_work(struct work_struct *work)
rtwdev = rtwvif->rtwdev;
mutex_lock(&rtwdev->mutex);
- rtw89_fw_h2c_update_beacon(rtwdev, rtwvif);
+ rtw89_chip_h2c_update_beacon(rtwdev, rtwvif);
mutex_unlock(&rtwdev->mutex);
}
@@ -3944,7 +4073,6 @@ int rtw89_core_start(struct rtw89_dev *rtwdev)
{
int ret;
- rtwdev->mac.qta_mode = RTW89_QTA_SCC;
ret = rtw89_mac_init(rtwdev);
if (ret) {
rtw89_err(rtwdev, "mac init fail, ret:%d\n", ret);
@@ -3961,6 +4089,7 @@ int rtw89_core_start(struct rtw89_dev *rtwdev)
return ret;
rtw89_phy_init_bb_reg(rtwdev);
+ rtw89_chip_bb_postinit(rtwdev);
rtw89_phy_init_rf_reg(rtwdev, false);
rtw89_btc_ntfy_init(rtwdev, BTC_MODE_NORMAL);
@@ -3983,6 +4112,7 @@ int rtw89_core_start(struct rtw89_dev *rtwdev)
set_bit(RTW89_FLAG_RUNNING, rtwdev->flags);
+ rtw89_chip_rfk_init_late(rtwdev);
rtw89_btc_ntfy_radio_state(rtwdev, BTC_RFCTRL_WL_ON);
rtw89_fw_h2c_fw_log(rtwdev, rtwdev->fw.log.enable);
rtw89_fw_h2c_init_ba_cam(rtwdev);
@@ -4078,6 +4208,15 @@ int rtw89_core_init(struct rtw89_dev *rtwdev)
rtw89_traffic_stats_init(rtwdev, &rtwdev->stats);
rtwdev->hal.rx_fltr = DEFAULT_AX_RX_FLTR;
+ rtwdev->dbcc_en = false;
+ rtwdev->mlo_dbcc_mode = MLO_DBCC_NOT_SUPPORT;
+ rtwdev->mac.qta_mode = RTW89_QTA_SCC;
+
+ if (rtwdev->chip->chip_gen == RTW89_CHIP_BE) {
+ rtwdev->dbcc_en = true;
+ rtwdev->mac.qta_mode = RTW89_QTA_DBCC;
+ rtwdev->mlo_dbcc_mode = MLO_2_PLUS_0_1RF;
+ }
INIT_WORK(&btc->eapol_notify_work, rtw89_btc_ntfy_eapol_packet_work);
INIT_WORK(&btc->arp_notify_work, rtw89_btc_ntfy_arp_packet_work);
@@ -4085,6 +4224,7 @@ int rtw89_core_init(struct rtw89_dev *rtwdev)
INIT_WORK(&btc->icmp_notify_work, rtw89_btc_ntfy_icmp_packet_work);
init_completion(&rtwdev->fw.req.completion);
+ init_completion(&rtwdev->rfk_wait.completion);
schedule_work(&rtwdev->load_firmware_work);
@@ -4290,6 +4430,7 @@ EXPORT_SYMBOL(rtw89_chip_info_setup);
static int rtw89_core_register_hw(struct rtw89_dev *rtwdev)
{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
struct ieee80211_hw *hw = rtwdev->hw;
struct rtw89_efuse *efuse = &rtwdev->efuse;
struct rtw89_hal *hal = &rtwdev->hal;
@@ -4324,8 +4465,8 @@ static int rtw89_core_register_hw(struct rtw89_dev *rtwdev)
ieee80211_hw_set(hw, SUPPORTS_MULTI_BSSID);
ieee80211_hw_set(hw, WANT_MONITOR_VIF);
- /* ref: description of rtw89_mcc_get_tbtt_ofst() in chan.c */
- ieee80211_hw_set(hw, TIMING_BEACON_ONLY);
+ if (chip->support_bandwidths & BIT(NL80211_CHAN_WIDTH_160))
+ ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW);
if (RTW89_CHK_FW_FEATURE(BEACON_FILTER, &rtwdev->fw))
ieee80211_hw_set(hw, CONNECTION_MONITOR);
@@ -4362,6 +4503,8 @@ static int rtw89_core_register_hw(struct rtw89_dev *rtwdev)
hw->wiphy->max_remain_on_channel_duration = 1000;
wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CAN_REPLACE_PTK0);
+ wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_SCAN_RANDOM_SN);
+ wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_SET_SCAN_DWELL);
ret = rtw89_core_set_supported_band(rtwdev);
if (ret) {
@@ -4453,9 +4596,10 @@ struct rtw89_dev *rtw89_alloc_ieee80211_hw(struct device *device,
!RTW89_CHK_FW_FEATURE(BEACON_FILTER, &early_fw);
if (no_chanctx) {
- ops->add_chanctx = NULL;
- ops->remove_chanctx = NULL;
- ops->change_chanctx = NULL;
+ ops->add_chanctx = ieee80211_emulate_add_chanctx;
+ ops->remove_chanctx = ieee80211_emulate_remove_chanctx;
+ ops->change_chanctx = ieee80211_emulate_change_chanctx;
+ ops->switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx;
ops->assign_vif_chanctx = NULL;
ops->unassign_vif_chanctx = NULL;
ops->remain_on_channel = NULL;
diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h
index ea6df859ba15..2e854c9af709 100644
--- a/drivers/net/wireless/realtek/rtw89/core.h
+++ b/drivers/net/wireless/realtek/rtw89/core.h
@@ -17,6 +17,7 @@ struct rtw89_pci_info;
struct rtw89_mac_gen_def;
struct rtw89_phy_gen_def;
struct rtw89_efuse_block_cfg;
+struct rtw89_h2c_rf_tssi;
struct rtw89_fw_txpwr_track_cfg;
struct rtw89_phy_rfk_log_fmt;
@@ -32,6 +33,7 @@ extern const struct ieee80211_ops rtw89_ops;
#define MASKDWORD 0xffffffff
#define RFREG_MASK 0xfffff
#define INV_RF_DATA 0xffffffff
+#define BYPASS_CR_DATA 0xbabecafe
#define RTW89_TRACK_WORK_PERIOD round_jiffies_relative(HZ * 2)
#define RTW89_FORBID_BA_TIMER round_jiffies_relative(HZ * 4)
@@ -878,7 +880,7 @@ enum rtw89_ps_mode {
#define RTW89_5G_BW_NUM (RTW89_CHANNEL_WIDTH_160 + 1)
#define RTW89_6G_BW_NUM (RTW89_CHANNEL_WIDTH_320 + 1)
#define RTW89_BYR_BW_NUM (RTW89_CHANNEL_WIDTH_320 + 1)
-#define RTW89_PPE_BW_NUM (RTW89_CHANNEL_WIDTH_160 + 1)
+#define RTW89_PPE_BW_NUM (RTW89_CHANNEL_WIDTH_320 + 1)
enum rtw89_ru_bandwidth {
RTW89_RU26 = 0,
@@ -956,6 +958,9 @@ struct rtw89_port_reg {
u32 mbssid;
u32 mbssid_drop;
u32 tsf_sync;
+ u32 ptcl_dbg;
+ u32 ptcl_dbg_info;
+ u32 bcn_drop_all;
u32 hiq_win[RTW89_PORT_NUM];
};
@@ -1146,9 +1151,15 @@ struct rtw89_mac_ax_gnt {
u8 gnt_wl;
} __packed;
+struct rtw89_mac_ax_wl_act {
+ u8 wlan_act_en;
+ u8 wlan_act;
+};
+
#define RTW89_MAC_AX_COEX_GNT_NR 2
struct rtw89_mac_ax_coex_gnt {
struct rtw89_mac_ax_gnt band[RTW89_MAC_AX_COEX_GNT_NR];
+ struct rtw89_mac_ax_wl_act bt[RTW89_MAC_AX_COEX_GNT_NR];
};
enum rtw89_btc_ncnt {
@@ -1266,6 +1277,18 @@ struct rtw89_btc_ant_info {
u8 stream_cnt: 4;
};
+struct rtw89_btc_ant_info_v7 {
+ u8 type; /* shared, dedicated(non-shared) */
+ u8 num; /* antenna count */
+ u8 isolation;
+ u8 single_pos;/* wifi 1ss-1ant at 0:S0 or 1:S1 */
+
+ u8 diversity; /* only for wifi use 1-antenna */
+ u8 btg_pos; /* btg-circuit at 0:S0/1:S1/others:all */
+ u8 stream_cnt; /* spatial_stream count */
+ u8 rsvd;
+} __packed;
+
enum rtw89_tfc_dir {
RTW89_TFC_UL,
RTW89_TFC_DL,
@@ -1660,6 +1683,16 @@ struct rtw89_btc_dm_emap {
u32 wl_e2g_hang: 1;
u32 wl_ver_mismatch: 1;
u32 bt_ver_mismatch: 1;
+ u32 rfe_type0: 1;
+ u32 h2c_buffer_over: 1;
+ u32 bt_tx_hang: 1; /* for SNR too low bug, BT has no Tx req*/
+ u32 wl_no_sta_ntfy: 1;
+
+ u32 h2c_bmap_mismatch: 1;
+ u32 c2h_bmap_mismatch: 1;
+ u32 h2c_struct_invalid: 1;
+ u32 c2h_struct_invalid: 1;
+ u32 h2c_c2h_buffer_mismatch: 1;
};
union rtw89_btc_dm_error_map {
@@ -1708,6 +1741,7 @@ struct rtw89_btc_wl_info {
u8 cn_report;
u8 coex_mode;
+ bool bg_mode;
bool scbd_change;
u32 scbd;
};
@@ -1725,6 +1759,25 @@ struct rtw89_btc_module {
u8 kt_ver_adie;
};
+struct rtw89_btc_module_v7 {
+ u8 rfe_type;
+ u8 kt_ver;
+ u8 bt_solo;
+ u8 bt_pos; /* wl-end view: get from efuse, must compare bt.btg_type*/
+
+ u8 switch_type; /* WL/BT switch type: 0: internal, 1: external */
+ u8 wa_type; /* WA type: 0:none, 1: 51B 5G_Hi-Ch_Rx */
+ u8 kt_ver_adie;
+ u8 rsvd;
+
+ struct rtw89_btc_ant_info_v7 ant;
+} __packed;
+
+union rtw89_btc_module_info {
+ struct rtw89_btc_module md;
+ struct rtw89_btc_module_v7 md_v7;
+};
+
#define RTW89_BTC_DM_MAXSTEP 30
#define RTW89_BTC_DM_CNT_MAX (RTW89_BTC_DM_MAXSTEP * 8)
@@ -1747,6 +1800,25 @@ struct rtw89_btc_init_info {
u16 rsvd;
};
+struct rtw89_btc_init_info_v7 {
+ u8 wl_guard_ch;
+ u8 wl_only;
+ u8 wl_init_ok;
+ u8 rsvd3;
+
+ u8 cx_other;
+ u8 bt_only;
+ u8 pta_mode;
+ u8 pta_direction;
+
+ struct rtw89_btc_module_v7 module;
+} __packed;
+
+union rtw89_btc_init_info_u {
+ struct rtw89_btc_init_info init;
+ struct rtw89_btc_init_info_v7 init_v7;
+};
+
struct rtw89_btc_wl_tx_limit_para {
u16 enable;
u32 tx_time; /* unit: us */
@@ -2485,7 +2557,7 @@ struct rtw89_btc_dm {
struct rtw89_btc_fbtc_tdma tdma;
struct rtw89_btc_fbtc_tdma tdma_now;
struct rtw89_mac_ax_coex_gnt gnt;
- struct rtw89_btc_init_info init_info; /* pass to wl_fw if offload */
+ union rtw89_btc_init_info_u init_info; /* pass to wl_fw if offload */
struct rtw89_btc_rf_trx_para rf_trx_para;
struct rtw89_btc_wl_tx_limit_para wl_tx_limit;
struct rtw89_btc_dm_step dm_step;
@@ -2534,6 +2606,18 @@ struct rtw89_btc_ctrl {
u32 rsvd: 12;
};
+struct rtw89_btc_ctrl_v7 {
+ u8 manual;
+ u8 igno_bt;
+ u8 always_freerun;
+ u8 rsvd;
+} __packed;
+
+union rtw89_btc_ctrl_list {
+ struct rtw89_btc_ctrl ctrl;
+ struct rtw89_btc_ctrl_v7 ctrl_v7;
+};
+
struct rtw89_btc_dbg {
/* cmd "rb" */
bool rb_done;
@@ -2706,7 +2790,9 @@ struct rtw89_btc_ver {
u8 fwlrole;
u8 frptmap;
u8 fcxctrl;
+ u8 fcxinit;
+ u8 drvinfo_type;
u16 info_buf;
u8 max_role_num;
};
@@ -2718,8 +2804,8 @@ struct rtw89_btc {
struct rtw89_btc_cx cx;
struct rtw89_btc_dm dm;
- struct rtw89_btc_ctrl ctrl;
- struct rtw89_btc_module mdinfo;
+ union rtw89_btc_ctrl_list ctrl;
+ union rtw89_btc_module_info mdinfo;
struct rtw89_btc_btf_fwinfo fwinfo;
struct rtw89_btc_dbg dbg;
@@ -2731,11 +2817,14 @@ struct rtw89_btc {
u32 bt_req_len;
u8 policy[RTW89_BTC_POLICY_MAXLEN];
+ u8 ant_type;
+ u8 btg_pos;
u16 policy_len;
u16 policy_type;
bool bt_req_en;
bool update_policy_force;
bool lps;
+ bool manual_ctrl;
};
enum rtw89_btc_hmsg {
@@ -2875,7 +2964,7 @@ struct rtw89_ba_cam_entry {
#define RTW89_MAX_ADDR_CAM_NUM 128
#define RTW89_MAX_BSSID_CAM_NUM 20
#define RTW89_MAX_SEC_CAM_NUM 128
-#define RTW89_MAX_BA_CAM_NUM 8
+#define RTW89_MAX_BA_CAM_NUM 24
#define RTW89_SEC_CAM_IN_ADDR_CAM 7
struct rtw89_addr_cam_entry {
@@ -2932,6 +3021,7 @@ struct rtw89_sta {
struct ewma_evm evm_min[RF_PATH_MAX];
struct ewma_evm evm_max[RF_PATH_MAX];
struct rtw89_ampdu_params ampdu_params[IEEE80211_NUM_TIDS];
+ DECLARE_BITMAP(ampdu_map, IEEE80211_NUM_TIDS);
struct ieee80211_rx_status rx_status;
u16 rx_hw_rate;
__le32 htc_template;
@@ -3041,6 +3131,7 @@ struct rtw89_vif {
u8 bcn_hit_cond;
u8 hit_rule;
u8 last_noa_nr;
+ u64 sync_bcn_tsf;
bool offchan;
bool trigger;
bool lsig_txop;
@@ -3111,7 +3202,7 @@ struct rtw89_hci_ops {
void (*ctrl_txdma_ch)(struct rtw89_dev *rtwdev, bool enable);
void (*ctrl_txdma_fw_ch)(struct rtw89_dev *rtwdev, bool enable);
void (*ctrl_trxhci)(struct rtw89_dev *rtwdev, bool enable);
- int (*poll_txdma_ch)(struct rtw89_dev *rtwdev);
+ int (*poll_txdma_ch_idle)(struct rtw89_dev *rtwdev);
void (*clr_idx_all)(struct rtw89_dev *rtwdev);
void (*clear)(struct rtw89_dev *rtwdev, struct pci_dev *pdev);
void (*disable_intr)(struct rtw89_dev *rtwdev);
@@ -3131,6 +3222,7 @@ struct rtw89_chip_ops {
int (*enable_bb_rf)(struct rtw89_dev *rtwdev);
int (*disable_bb_rf)(struct rtw89_dev *rtwdev);
void (*bb_preinit)(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx);
+ void (*bb_postinit)(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx);
void (*bb_reset)(struct rtw89_dev *rtwdev,
enum rtw89_phy_idx phy_idx);
void (*bb_sethw)(struct rtw89_dev *rtwdev);
@@ -3152,7 +3244,9 @@ struct rtw89_chip_ops {
int (*read_phycap)(struct rtw89_dev *rtwdev, u8 *phycap_map);
void (*fem_setup)(struct rtw89_dev *rtwdev);
void (*rfe_gpio)(struct rtw89_dev *rtwdev);
+ void (*rfk_hw_init)(struct rtw89_dev *rtwdev);
void (*rfk_init)(struct rtw89_dev *rtwdev);
+ void (*rfk_init_late)(struct rtw89_dev *rtwdev);
void (*rfk_channel)(struct rtw89_dev *rtwdev);
void (*rfk_band_changed)(struct rtw89_dev *rtwdev,
enum rtw89_phy_idx phy_idx);
@@ -3196,6 +3290,22 @@ struct rtw89_chip_ops {
int (*h2c_dctl_sec_cam)(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif,
struct rtw89_sta *rtwsta);
+ int (*h2c_default_cmac_tbl)(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta);
+ int (*h2c_assoc_cmac_tbl)(struct rtw89_dev *rtwdev,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta);
+ int (*h2c_ampdu_cmac_tbl)(struct rtw89_dev *rtwdev,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta);
+ int (*h2c_default_dmac_tbl)(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta);
+ int (*h2c_update_beacon)(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif);
+ int (*h2c_ba_cam)(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta,
+ bool valid, struct ieee80211_ampdu_params *params);
void (*btc_set_rfe)(struct rtw89_dev *rtwdev);
void (*btc_init_cfg)(struct rtw89_dev *rtwdev);
@@ -3225,8 +3335,62 @@ enum rtw89_dma_ch {
RTW89_DMA_CH_NUM = 13
};
+#define MLO_MODE_FOR_BB0_BB1_RF(bb0, bb1, rf) ((rf) << 12 | (bb1) << 4 | (bb0))
+
+enum rtw89_mlo_dbcc_mode {
+ MLO_DBCC_NOT_SUPPORT = 1,
+ MLO_0_PLUS_2_1RF = MLO_MODE_FOR_BB0_BB1_RF(0, 2, 1),
+ MLO_0_PLUS_2_2RF = MLO_MODE_FOR_BB0_BB1_RF(0, 2, 2),
+ MLO_1_PLUS_1_1RF = MLO_MODE_FOR_BB0_BB1_RF(1, 1, 1),
+ MLO_1_PLUS_1_2RF = MLO_MODE_FOR_BB0_BB1_RF(1, 1, 2),
+ MLO_2_PLUS_0_1RF = MLO_MODE_FOR_BB0_BB1_RF(2, 0, 1),
+ MLO_2_PLUS_0_2RF = MLO_MODE_FOR_BB0_BB1_RF(2, 0, 2),
+ MLO_2_PLUS_2_2RF = MLO_MODE_FOR_BB0_BB1_RF(2, 2, 2),
+ DBCC_LEGACY = 0xffffffff,
+};
+
+enum rtw89_scan_be_operation {
+ RTW89_SCAN_OP_STOP,
+ RTW89_SCAN_OP_START,
+ RTW89_SCAN_OP_SETPARM,
+ RTW89_SCAN_OP_GETRPT,
+ RTW89_SCAN_OP_NUM
+};
+
+enum rtw89_scan_be_mode {
+ RTW89_SCAN_MODE_SA,
+ RTW89_SCAN_MODE_MACC,
+ RTW89_SCAN_MODE_NUM
+};
+
+enum rtw89_scan_be_opmode {
+ RTW89_SCAN_OPMODE_NONE,
+ RTW89_SCAN_OPMODE_TBTT,
+ RTW89_SCAN_OPMODE_INTV,
+ RTW89_SCAN_OPMODE_CNT,
+ RTW89_SCAN_OPMODE_NUM,
+};
+
+struct rtw89_scan_option {
+ bool enable;
+ bool target_ch_mode;
+ u8 num_macc_role;
+ u8 num_opch;
+ u8 repeat;
+ u16 norm_pd;
+ u16 slow_pd;
+ u16 norm_cy;
+ u8 opch_end;
+ u64 prohib_chan;
+ enum rtw89_phy_idx band;
+ enum rtw89_scan_be_operation operation;
+ enum rtw89_scan_be_mode scan_mode;
+ enum rtw89_mlo_dbcc_mode mlo_mode;
+};
+
enum rtw89_qta_mode {
RTW89_QTA_SCC,
+ RTW89_QTA_DBCC,
RTW89_QTA_DLFW,
RTW89_QTA_WOW,
@@ -3713,7 +3877,7 @@ struct rtw89_chip_info {
u32 rf_base_addr[2];
u8 support_chanctx_num;
u8 support_bands;
- bool support_bw160;
+ u16 support_bandwidths;
bool support_unii4;
bool ul_tb_waveform_ctrl;
bool ul_tb_pwr_diff;
@@ -3790,6 +3954,7 @@ struct rtw89_chip_info {
const u32 *c2h_regs;
struct rtw89_reg_def c2h_counter_reg;
const struct rtw89_page_regs *page_regs;
+ u32 wow_reason_reg;
bool cfo_src_fd;
bool cfo_hw_comp;
const struct rtw89_reg_def *dcfo_comp;
@@ -3838,7 +4003,7 @@ enum rtw89_host_rpr_mode {
RTW89_RPR_MODE_STF
};
-#define RTW89_COMPLETION_BUF_SIZE 24
+#define RTW89_COMPLETION_BUF_SIZE 40
#define RTW89_WAIT_COND_IDLE UINT_MAX
struct rtw89_completion_data {
@@ -3897,6 +4062,7 @@ enum rtw89_fw_feature {
RTW89_FW_FEATURE_NO_DEEP_PS,
RTW89_FW_FEATURE_NO_LPS_PG,
RTW89_FW_FEATURE_BEACON_FILTER,
+ RTW89_FW_FEATURE_MACID_PAUSE_SLEEP,
};
struct rtw89_fw_suit {
@@ -3957,6 +4123,19 @@ struct rtw89_fw_elm_info {
struct rtw89_phy_rfk_log_fmt *rfk_log_fmt;
};
+enum rtw89_fw_mss_dev_type {
+ RTW89_FW_MSS_DEV_TYPE_FWSEC_DEF = 0xF,
+ RTW89_FW_MSS_DEV_TYPE_FWSEC_INV = 0xFF,
+};
+
+struct rtw89_fw_secure {
+ bool secure_boot;
+ u32 sb_sel_mgn;
+ u8 mss_dev_type;
+ u8 mss_cust_idx;
+ u8 mss_key_num;
+};
+
struct rtw89_fw_info {
struct rtw89_fw_req_info req;
int fw_format;
@@ -3971,6 +4150,7 @@ struct rtw89_fw_info {
struct rtw89_fw_log log;
u32 feature_map;
struct rtw89_fw_elm_info elm_info;
+ struct rtw89_fw_secure sec;
};
#define RTW89_CHK_FW_FEATURE(_feat, _fw) \
@@ -4045,6 +4225,7 @@ struct rtw89_tas_info {
struct rtw89_chanctx_cfg {
enum rtw89_sub_entity_idx idx;
+ int ref_count;
};
enum rtw89_chanctx_changes {
@@ -4064,13 +4245,16 @@ enum rtw89_entity_mode {
RTW89_ENTITY_MODE_MCC,
NUM_OF_RTW89_ENTITY_MODE,
- RTW89_ENTITY_MODE_INVALID = NUM_OF_RTW89_ENTITY_MODE,
+ RTW89_ENTITY_MODE_INVALID = -EINVAL,
+ RTW89_ENTITY_MODE_UNHANDLED = -ESRCH,
};
struct rtw89_sub_entity {
struct cfg80211_chan_def chandef;
struct rtw89_chan chan;
struct rtw89_chan_rcd rcd;
+
+ /* only assigned when running with chanctx_ops */
struct rtw89_chanctx_cfg *cfg;
};
@@ -4123,6 +4307,7 @@ enum rtw89_flags {
RTW89_FLAG_CMAC1_FUNC,
RTW89_FLAG_FW_RDY,
RTW89_FLAG_RUNNING,
+ RTW89_FLAG_PROBE_DONE,
RTW89_FLAG_BFEE_MON,
RTW89_FLAG_BFEE_EN,
RTW89_FLAG_BFEE_TIMER_KEEP,
@@ -4179,6 +4364,21 @@ struct rtw89_phy_stat {
struct rtw89_pkt_stat last_pkt_stat;
};
+enum rtw89_rfk_report_state {
+ RTW89_RFK_STATE_START = 0x0,
+ RTW89_RFK_STATE_OK = 0x1,
+ RTW89_RFK_STATE_FAIL = 0x2,
+ RTW89_RFK_STATE_TIMEOUT = 0x3,
+ RTW89_RFK_STATE_H2C_CMD_ERR = 0x4,
+};
+
+struct rtw89_rfk_wait_info {
+ struct completion completion;
+ ktime_t start_time;
+ enum rtw89_rfk_report_state state;
+ u8 version;
+};
+
#define RTW89_DACK_PATH_NR 2
#define RTW89_DACK_IDX_NR 2
#define RTW89_DACK_MSBK_NR 16
@@ -4194,15 +4394,18 @@ struct rtw89_dack_info {
bool msbk_timeout[RTW89_DACK_PATH_NR];
};
-#define RTW89_IQK_CHS_NR 2
-#define RTW89_IQK_PATH_NR 4
+#define RTW89_RFK_CHS_NR 3
struct rtw89_rfk_mcc_info {
- u8 ch[RTW89_IQK_CHS_NR];
- u8 band[RTW89_IQK_CHS_NR];
+ u8 ch[RTW89_RFK_CHS_NR];
+ u8 band[RTW89_RFK_CHS_NR];
+ u8 bw[RTW89_RFK_CHS_NR];
u8 table_idx;
};
+#define RTW89_IQK_CHS_NR 2
+#define RTW89_IQK_PATH_NR 4
+
struct rtw89_lck_info {
u8 thermal[RF_PATH_MAX];
};
@@ -4380,6 +4583,11 @@ struct rtw89_cfo_tracking_info {
u8 lock_cnt;
};
+enum rtw89_tssi_mode {
+ RTW89_TSSI_NORMAL = 0,
+ RTW89_TSSI_SCAN = 1,
+};
+
enum rtw89_tssi_alimk_band {
TSSI_ALIMK_2G = 0,
TSSI_ALIMK_5GL,
@@ -4589,6 +4797,7 @@ struct rtw89_hw_scan_info {
struct ieee80211_vif *scanning_vif;
struct list_head pkt_list[NUM_NL80211_BANDS];
struct rtw89_chan op_chan;
+ bool abort;
u32 last_chan_idx;
};
@@ -4605,6 +4814,48 @@ enum rtw89_phy_bb_gain_band {
RTW89_BB_GAIN_BAND_NR,
};
+enum rtw89_phy_gain_band_be {
+ RTW89_BB_GAIN_BAND_2G_BE = 0,
+ RTW89_BB_GAIN_BAND_5G_L_BE = 1,
+ RTW89_BB_GAIN_BAND_5G_M_BE = 2,
+ RTW89_BB_GAIN_BAND_5G_H_BE = 3,
+ RTW89_BB_GAIN_BAND_6G_L0_BE = 4,
+ RTW89_BB_GAIN_BAND_6G_L1_BE = 5,
+ RTW89_BB_GAIN_BAND_6G_M0_BE = 6,
+ RTW89_BB_GAIN_BAND_6G_M1_BE = 7,
+ RTW89_BB_GAIN_BAND_6G_H0_BE = 8,
+ RTW89_BB_GAIN_BAND_6G_H1_BE = 9,
+ RTW89_BB_GAIN_BAND_6G_UH0_BE = 10,
+ RTW89_BB_GAIN_BAND_6G_UH1_BE = 11,
+
+ RTW89_BB_GAIN_BAND_NR_BE,
+};
+
+enum rtw89_phy_bb_bw_be {
+ RTW89_BB_BW_20_40 = 0,
+ RTW89_BB_BW_80_160_320 = 1,
+
+ RTW89_BB_BW_NR_BE,
+};
+
+enum rtw89_bw20_sc {
+ RTW89_BW20_SC_20M = 1,
+ RTW89_BW20_SC_40M = 2,
+ RTW89_BW20_SC_80M = 4,
+ RTW89_BW20_SC_160M = 8,
+ RTW89_BW20_SC_320M = 16,
+};
+
+enum rtw89_cmac_table_bw {
+ RTW89_CMAC_BW_20M = 0,
+ RTW89_CMAC_BW_40M = 1,
+ RTW89_CMAC_BW_80M = 2,
+ RTW89_CMAC_BW_160M = 3,
+ RTW89_CMAC_BW_320M = 4,
+
+ RTW89_CMAC_BW_NR,
+};
+
enum rtw89_phy_bb_rxsc_num {
RTW89_BB_RXSC_NUM_40 = 9, /* SC: 0, 1~8 */
RTW89_BB_RXSC_NUM_80 = 13, /* SC: 0, 1~8, 9~12 */
@@ -4627,6 +4878,27 @@ struct rtw89_phy_bb_gain_info {
[RTW89_BB_RXSC_NUM_160];
};
+struct rtw89_phy_bb_gain_info_be {
+ s8 lna_gain[RTW89_BB_GAIN_BAND_NR_BE][RTW89_BB_BW_NR_BE][RF_PATH_MAX]
+ [LNA_GAIN_NUM];
+ s8 tia_gain[RTW89_BB_GAIN_BAND_NR_BE][RTW89_BB_BW_NR_BE][RF_PATH_MAX]
+ [TIA_GAIN_NUM];
+ s8 lna_gain_bypass[RTW89_BB_GAIN_BAND_NR_BE][RTW89_BB_BW_NR_BE]
+ [RF_PATH_MAX][LNA_GAIN_NUM];
+ s8 lna_op1db[RTW89_BB_GAIN_BAND_NR_BE][RTW89_BB_BW_NR_BE]
+ [RF_PATH_MAX][LNA_GAIN_NUM];
+ s8 tia_lna_op1db[RTW89_BB_GAIN_BAND_NR_BE][RTW89_BB_BW_NR_BE]
+ [RF_PATH_MAX][LNA_GAIN_NUM + 1];
+ s8 rpl_ofst_20[RTW89_BB_GAIN_BAND_NR_BE][RF_PATH_MAX]
+ [RTW89_BW20_SC_20M];
+ s8 rpl_ofst_40[RTW89_BB_GAIN_BAND_NR_BE][RF_PATH_MAX]
+ [RTW89_BW20_SC_40M];
+ s8 rpl_ofst_80[RTW89_BB_GAIN_BAND_NR_BE][RF_PATH_MAX]
+ [RTW89_BW20_SC_80M];
+ s8 rpl_ofst_160[RTW89_BB_GAIN_BAND_NR_BE][RF_PATH_MAX]
+ [RTW89_BW20_SC_160M];
+};
+
struct rtw89_phy_efuse_gain {
bool offset_valid;
bool comp_valid;
@@ -4681,6 +4953,9 @@ struct rtw89_mcc_role {
struct rtw89_mcc_policy policy;
struct rtw89_mcc_limit limit;
+ /* only valid when running with FW MRC mechanism */
+ u8 slot_idx;
+
/* byte-array in LE order for FW */
u8 macid_bitmap[BITS_TO_BYTES(RTW89_MAX_MAC_ID_NUM)];
@@ -4724,7 +4999,11 @@ struct rtw89_mcc_sync {
bool enable;
u16 offset; /* TU */
u8 macid_src;
+ u8 band_src;
+ u8 port_src;
u8 macid_tgt;
+ u8 band_tgt;
+ u8 port_tgt;
};
struct rtw89_mcc_config {
@@ -4757,6 +5036,7 @@ struct rtw89_dev {
const struct ieee80211_ops *ops;
bool dbcc_en;
+ enum rtw89_mlo_dbcc_mode mlo_dbcc_mode;
struct rtw89_hw_scan_info scan_info;
const struct rtw89_chip_info *chip;
const struct rtw89_pci_info *pci_info;
@@ -4806,6 +5086,7 @@ struct rtw89_dev {
DECLARE_BITMAP(pkt_offload, RTW89_MAX_PKT_OFLD_NUM);
struct rtw89_phy_stat phystat;
+ struct rtw89_rfk_wait_info rfk_wait;
struct rtw89_dack_info dack;
struct rtw89_iqk_info iqk;
struct rtw89_dpk_info dpk;
@@ -4824,7 +5105,10 @@ struct rtw89_dev {
struct rtw89_env_monitor_info env_monitor;
struct rtw89_dig_info dig;
struct rtw89_phy_ch_info ch_info;
- struct rtw89_phy_bb_gain_info bb_gain;
+ union {
+ struct rtw89_phy_bb_gain_info ax;
+ struct rtw89_phy_bb_gain_info_be be;
+ } bb_gain;
struct rtw89_phy_efuse_gain efuse_gain;
struct rtw89_phy_ul_tb_info ul_tb_info;
struct rtw89_antdiv_info antdiv;
@@ -4969,12 +5253,12 @@ static inline void rtw89_hci_ctrl_trxhci(struct rtw89_dev *rtwdev, bool enable)
rtwdev->hci.ops->ctrl_trxhci(rtwdev, enable);
}
-static inline int rtw89_hci_poll_txdma_ch(struct rtw89_dev *rtwdev)
+static inline int rtw89_hci_poll_txdma_ch_idle(struct rtw89_dev *rtwdev)
{
int ret = 0;
- if (rtwdev->hci.ops->poll_txdma_ch)
- ret = rtwdev->hci.ops->poll_txdma_ch(rtwdev);
+ if (rtwdev->hci.ops->poll_txdma_ch_idle)
+ ret = rtwdev->hci.ops->poll_txdma_ch_idle(rtwdev);
return ret;
}
@@ -5437,6 +5721,14 @@ static inline void rtw89_chip_rfe_gpio(struct rtw89_dev *rtwdev)
chip->ops->rfe_gpio(rtwdev);
}
+static inline void rtw89_chip_rfk_hw_init(struct rtw89_dev *rtwdev)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+
+ if (chip->ops->rfk_hw_init)
+ chip->ops->rfk_hw_init(rtwdev);
+}
+
static inline
void rtw89_chip_bb_preinit(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
{
@@ -5446,6 +5738,20 @@ void rtw89_chip_bb_preinit(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
chip->ops->bb_preinit(rtwdev, phy_idx);
}
+static inline
+void rtw89_chip_bb_postinit(struct rtw89_dev *rtwdev)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+
+ if (!chip->ops->bb_postinit)
+ return;
+
+ chip->ops->bb_postinit(rtwdev, RTW89_PHY_0);
+
+ if (rtwdev->dbcc_en)
+ chip->ops->bb_postinit(rtwdev, RTW89_PHY_1);
+}
+
static inline void rtw89_chip_bb_sethw(struct rtw89_dev *rtwdev)
{
const struct rtw89_chip_info *chip = rtwdev->chip;
@@ -5462,6 +5768,14 @@ static inline void rtw89_chip_rfk_init(struct rtw89_dev *rtwdev)
chip->ops->rfk_init(rtwdev);
}
+static inline void rtw89_chip_rfk_init_late(struct rtw89_dev *rtwdev)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+
+ if (chip->ops->rfk_init_late)
+ chip->ops->rfk_init_late(rtwdev);
+}
+
static inline void rtw89_chip_rfk_channel(struct rtw89_dev *rtwdev)
{
const struct rtw89_chip_info *chip = rtwdev->chip;
@@ -5750,6 +6064,18 @@ out:
rcu_read_unlock();
}
+static inline bool rtw89_is_mlo_1_1(struct rtw89_dev *rtwdev)
+{
+ switch (rtwdev->mlo_dbcc_mode) {
+ case MLO_1_PLUS_1_1RF:
+ case MLO_1_PLUS_1_2RF:
+ case DBCC_LEGACY:
+ return true;
+ default:
+ return false;
+ }
+}
+
int rtw89_core_tx_write(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
struct ieee80211_sta *sta, struct sk_buff *skb, int *qsel);
int rtw89_h2c_tx(struct rtw89_dev *rtwdev,
@@ -5815,7 +6141,7 @@ void rtw89_core_set_chip_txpwr(struct rtw89_dev *rtwdev);
void rtw89_get_default_chandef(struct cfg80211_chan_def *chandef);
void rtw89_get_channel_params(const struct cfg80211_chan_def *chandef,
struct rtw89_chan *chan);
-void rtw89_set_channel(struct rtw89_dev *rtwdev);
+int rtw89_set_channel(struct rtw89_dev *rtwdev);
void rtw89_get_channel(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif,
struct rtw89_chan *chan);
u8 rtw89_core_acquire_bit_map(unsigned long *addr, unsigned long size);
diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c
index 44829a148185..affffc4092ba 100644
--- a/drivers/net/wireless/realtek/rtw89/debug.c
+++ b/drivers/net/wireless/realtek/rtw89/debug.c
@@ -3427,14 +3427,17 @@ static ssize_t rtw89_debug_priv_btc_manual_set(struct file *filp,
struct rtw89_debugfs_priv *debugfs_priv = filp->private_data;
struct rtw89_dev *rtwdev = debugfs_priv->rtwdev;
struct rtw89_btc *btc = &rtwdev->btc;
- bool btc_manual;
+ const struct rtw89_btc_ver *ver = btc->ver;
int ret;
- ret = kstrtobool_from_user(user_buf, count, &btc_manual);
+ ret = kstrtobool_from_user(user_buf, count, &btc->manual_ctrl);
if (ret)
return ret;
- btc->ctrl.manual = btc_manual;
+ if (ver->fcxctrl == 7)
+ btc->ctrl.ctrl_v7.manual = btc->manual_ctrl;
+ else
+ btc->ctrl.ctrl.manual = btc->manual_ctrl;
return count;
}
diff --git a/drivers/net/wireless/realtek/rtw89/efuse.h b/drivers/net/wireless/realtek/rtw89/efuse.h
index 5c6787179bad..72416f56a071 100644
--- a/drivers/net/wireless/realtek/rtw89/efuse.h
+++ b/drivers/net/wireless/realtek/rtw89/efuse.h
@@ -23,5 +23,6 @@ int rtw89_parse_efuse_map_be(struct rtw89_dev *rtwdev);
int rtw89_parse_phycap_map_be(struct rtw89_dev *rtwdev);
int rtw89_cnv_efuse_state_be(struct rtw89_dev *rtwdev, bool idle);
int rtw89_read_efuse_ver(struct rtw89_dev *rtwdev, u8 *efv);
+int rtw89_efuse_read_fw_secure_be(struct rtw89_dev *rtwdev);
#endif
diff --git a/drivers/net/wireless/realtek/rtw89/efuse_be.c b/drivers/net/wireless/realtek/rtw89/efuse_be.c
index 8e8b7cd315f7..0be26d5fdf7c 100644
--- a/drivers/net/wireless/realtek/rtw89/efuse_be.c
+++ b/drivers/net/wireless/realtek/rtw89/efuse_be.c
@@ -7,6 +7,31 @@
#include "mac.h"
#include "reg.h"
+#define EFUSE_EXTERNALPN_ADDR_BE 0x1580
+#define EFUSE_B1_MSSDEVTYPE_MASK GENMASK(3, 0)
+#define EFUSE_B1_MSSCUSTIDX0_MASK GENMASK(7, 4)
+#define EFUSE_SERIALNUM_ADDR_BE 0x1581
+#define EFUSE_B2_MSSKEYNUM_MASK GENMASK(3, 0)
+#define EFUSE_B2_MSSCUSTIDX1_MASK BIT(6)
+#define EFUSE_SB_CRYP_SEL_ADDR 0x1582
+#define EFUSE_SB_CRYP_SEL_SIZE 2
+#define EFUSE_SB_CRYP_SEL_DEFAULT 0xFFFF
+#define SB_SEL_MGN_MAX_SIZE 2
+#define EFUSE_SEC_BE_START 0x1580
+#define EFUSE_SEC_BE_SIZE 4
+
+enum rtw89_efuse_mss_dev_type {
+ MSS_DEV_TYPE_FWSEC_DEF = 0xF,
+ MSS_DEV_TYPE_FWSEC_WINLIN_INBOX = 0xC,
+ MSS_DEV_TYPE_FWSEC_NONLIN_INBOX_NON_COB = 0xA,
+ MSS_DEV_TYPE_FWSEC_NONLIN_INBOX_COB = 0x9,
+ MSS_DEV_TYPE_FWSEC_NONWIN_INBOX = 0x6,
+};
+
+static const u32 sb_sel_mgn[SB_SEL_MGN_MAX_SIZE] = {
+ 0x8000100, 0xC000180
+};
+
static void rtw89_enable_efuse_pwr_cut_ddv_be(struct rtw89_dev *rtwdev)
{
const struct rtw89_chip_info *chip = rtwdev->chip;
@@ -418,3 +443,120 @@ out_free:
return ret;
}
+
+static u16 get_sb_cryp_sel_idx(u16 sb_cryp_sel)
+{
+ u8 low_bit, high_bit, cnt_zero = 0;
+ u8 idx, sel_form_v, sel_idx_v;
+ u16 sb_cryp_sel_v = 0x0;
+
+ sel_form_v = u16_get_bits(sb_cryp_sel, MASKBYTE0);
+ sel_idx_v = u16_get_bits(sb_cryp_sel, MASKBYTE1);
+
+ for (idx = 0; idx < 4; idx++) {
+ low_bit = !!(sel_form_v & BIT(idx));
+ high_bit = !!(sel_form_v & BIT(7 - idx));
+ if (low_bit != high_bit)
+ return U16_MAX;
+ if (low_bit)
+ continue;
+
+ cnt_zero++;
+ if (cnt_zero == 1)
+ sb_cryp_sel_v = idx * 16;
+ else if (cnt_zero > 1)
+ return U16_MAX;
+ }
+
+ low_bit = u8_get_bits(sel_idx_v, 0x0F);
+ high_bit = u8_get_bits(sel_idx_v, 0xF0);
+
+ if ((low_bit ^ high_bit) != 0xF)
+ return U16_MAX;
+
+ return sb_cryp_sel_v + low_bit;
+}
+
+static u8 get_mss_dev_type_idx(struct rtw89_dev *rtwdev, u8 mss_dev_type)
+{
+ switch (mss_dev_type) {
+ case MSS_DEV_TYPE_FWSEC_WINLIN_INBOX:
+ mss_dev_type = 0x0;
+ break;
+ case MSS_DEV_TYPE_FWSEC_NONLIN_INBOX_NON_COB:
+ mss_dev_type = 0x1;
+ break;
+ case MSS_DEV_TYPE_FWSEC_NONLIN_INBOX_COB:
+ mss_dev_type = 0x2;
+ break;
+ case MSS_DEV_TYPE_FWSEC_NONWIN_INBOX:
+ mss_dev_type = 0x3;
+ break;
+ case MSS_DEV_TYPE_FWSEC_DEF:
+ mss_dev_type = RTW89_FW_MSS_DEV_TYPE_FWSEC_DEF;
+ break;
+ default:
+ rtw89_warn(rtwdev, "unknown mss_dev_type %d", mss_dev_type);
+ mss_dev_type = RTW89_FW_MSS_DEV_TYPE_FWSEC_INV;
+ break;
+ }
+
+ return mss_dev_type;
+}
+
+int rtw89_efuse_read_fw_secure_be(struct rtw89_dev *rtwdev)
+{
+ struct rtw89_fw_secure *sec = &rtwdev->fw.sec;
+ u32 sec_addr = EFUSE_SEC_BE_START;
+ u32 sec_size = EFUSE_SEC_BE_SIZE;
+ u16 sb_cryp_sel, sb_cryp_sel_idx;
+ u8 sec_map[EFUSE_SEC_BE_SIZE];
+ u8 mss_dev_type;
+ u8 b1, b2;
+ int ret;
+
+ ret = rtw89_dump_physical_efuse_map_be(rtwdev, sec_map,
+ sec_addr, sec_size, false);
+ if (ret) {
+ rtw89_warn(rtwdev, "failed to dump secsel map\n");
+ return ret;
+ }
+
+ sb_cryp_sel = sec_map[EFUSE_SB_CRYP_SEL_ADDR - sec_addr] |
+ sec_map[EFUSE_SB_CRYP_SEL_ADDR - sec_addr + 1] << 8;
+ if (sb_cryp_sel == EFUSE_SB_CRYP_SEL_DEFAULT)
+ goto out;
+
+ sb_cryp_sel_idx = get_sb_cryp_sel_idx(sb_cryp_sel);
+ if (sb_cryp_sel_idx >= SB_SEL_MGN_MAX_SIZE) {
+ rtw89_warn(rtwdev, "invalid SB cryp sel idx %d\n", sb_cryp_sel_idx);
+ goto out;
+ }
+
+ sec->sb_sel_mgn = sb_sel_mgn[sb_cryp_sel_idx];
+
+ b1 = sec_map[EFUSE_EXTERNALPN_ADDR_BE - sec_addr];
+ b2 = sec_map[EFUSE_SERIALNUM_ADDR_BE - sec_addr];
+
+ mss_dev_type = u8_get_bits(b1, EFUSE_B1_MSSDEVTYPE_MASK);
+ sec->mss_cust_idx = 0x1F - (u8_get_bits(b1, EFUSE_B1_MSSCUSTIDX0_MASK) |
+ u8_get_bits(b2, EFUSE_B2_MSSCUSTIDX1_MASK) << 4);
+ sec->mss_key_num = 0xF - u8_get_bits(b2, EFUSE_B2_MSSKEYNUM_MASK);
+
+ sec->mss_dev_type = get_mss_dev_type_idx(rtwdev, mss_dev_type);
+ if (sec->mss_dev_type == RTW89_FW_MSS_DEV_TYPE_FWSEC_INV) {
+ rtw89_warn(rtwdev, "invalid mss_dev_type %d\n", mss_dev_type);
+ goto out;
+ }
+
+ sec->secure_boot = true;
+
+out:
+ rtw89_debug(rtwdev, RTW89_DBG_FW,
+ "MSS secure_boot=%d dev_type=%d cust_idx=%d key_num=%d\n",
+ sec->secure_boot, sec->mss_dev_type, sec->mss_cust_idx,
+ sec->mss_key_num);
+
+ return 0;
+}
+EXPORT_SYMBOL(rtw89_efuse_read_fw_secure_be);
diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c
index 09684cea9731..185cd339c085 100644
--- a/drivers/net/wireless/realtek/rtw89/fw.c
+++ b/drivers/net/wireless/realtek/rtw89/fw.c
@@ -13,6 +13,8 @@
#include "reg.h"
#include "util.h"
+static const u8 mss_signature[] = {0x4D, 0x53, 0x53, 0x4B, 0x50, 0x4F, 0x4F, 0x4C};
+
union rtw89_fw_element_arg {
size_t offset;
enum rtw89_rf_path rf_path;
@@ -163,6 +165,161 @@ static int rtw89_fw_hdr_parser_v0(struct rtw89_dev *rtwdev, const u8 *fw, u32 le
return 0;
}
+static int __get_mssc_key_idx(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mss_pool_hdr *mss_hdr,
+ u32 rmp_tbl_size, u32 *key_idx)
+{
+ struct rtw89_fw_secure *sec = &rtwdev->fw.sec;
+ u32 sel_byte_idx;
+ u32 mss_sel_idx;
+ u8 sel_bit_idx;
+ int i;
+
+ if (sec->mss_dev_type == RTW89_FW_MSS_DEV_TYPE_FWSEC_DEF) {
+ if (!mss_hdr->defen)
+ return -ENOENT;
+
+ mss_sel_idx = sec->mss_cust_idx * le16_to_cpu(mss_hdr->msskey_num_max) +
+ sec->mss_key_num;
+ } else {
+ if (mss_hdr->defen)
+ mss_sel_idx = FWDL_MSS_POOL_DEFKEYSETS_SIZE << 3;
+ else
+ mss_sel_idx = 0;
+ mss_sel_idx += sec->mss_dev_type * le16_to_cpu(mss_hdr->msskey_num_max) *
+ le16_to_cpu(mss_hdr->msscust_max) +
+ sec->mss_cust_idx * le16_to_cpu(mss_hdr->msskey_num_max) +
+ sec->mss_key_num;
+ }
+
+ sel_byte_idx = mss_sel_idx >> 3;
+ sel_bit_idx = mss_sel_idx & 0x7;
+
+ if (sel_byte_idx >= rmp_tbl_size)
+ return -EFAULT;
+
+ if (!(mss_hdr->rmp_tbl[sel_byte_idx] & BIT(sel_bit_idx)))
+ return -ENOENT;
+
+ *key_idx = hweight8(mss_hdr->rmp_tbl[sel_byte_idx] & (BIT(sel_bit_idx) - 1));
+
+ for (i = 0; i < sel_byte_idx; i++)
+ *key_idx += hweight8(mss_hdr->rmp_tbl[i]);
+
+ return 0;
+}
+
+static int __parse_formatted_mssc(struct rtw89_dev *rtwdev,
+ struct rtw89_fw_bin_info *info,
+ struct rtw89_fw_hdr_section_info *section_info,
+ const struct rtw89_fw_hdr_section_v1 *section,
+ const void *content,
+ u32 *mssc_len)
+{
+ const struct rtw89_fw_mss_pool_hdr *mss_hdr = content + section_info->len;
+ const union rtw89_fw_section_mssc_content *section_content = content;
+ struct rtw89_fw_secure *sec = &rtwdev->fw.sec;
+ u32 rmp_tbl_size;
+ u32 key_sign_len;
+ u32 real_key_idx;
+ u32 sb_sel_ver;
+ int ret;
+
+ if (memcmp(mss_signature, mss_hdr->signature, sizeof(mss_signature)) != 0) {
+ rtw89_err(rtwdev, "[ERR] wrong MSS signature\n");
+ return -ENOENT;
+ }
+
+ if (mss_hdr->rmpfmt == MSS_POOL_RMP_TBL_BITMASK) {
+ rmp_tbl_size = (le16_to_cpu(mss_hdr->msskey_num_max) *
+ le16_to_cpu(mss_hdr->msscust_max) *
+ mss_hdr->mssdev_max) >> 3;
+ if (mss_hdr->defen)
+ rmp_tbl_size += FWDL_MSS_POOL_DEFKEYSETS_SIZE;
+ } else {
+ rtw89_err(rtwdev, "[ERR] MSS Key Pool Remap Table Format Unsupport:%X\n",
+ mss_hdr->rmpfmt);
+ return -EINVAL;
+ }
+
+ if (rmp_tbl_size + sizeof(*mss_hdr) != le32_to_cpu(mss_hdr->key_raw_offset)) {
+ rtw89_err(rtwdev, "[ERR] MSS Key Pool Format Error:0x%X + 0x%X != 0x%X\n",
+ rmp_tbl_size, (int)sizeof(*mss_hdr),
+ le32_to_cpu(mss_hdr->key_raw_offset));
+ return -EINVAL;
+ }
+
+ key_sign_len = le16_to_cpu(section_content->key_sign_len.v) >> 2;
+ if (!key_sign_len)
+ key_sign_len = 512;
+
+ if (info->dsp_checksum)
+ key_sign_len += FWDL_SECURITY_CHKSUM_LEN;
+
+ *mssc_len = sizeof(*mss_hdr) + rmp_tbl_size +
+ le16_to_cpu(mss_hdr->keypair_num) * key_sign_len;
+
+ if (!sec->secure_boot)
+ goto out;
+
+ sb_sel_ver = le32_to_cpu(section_content->sb_sel_ver.v);
+ if (sb_sel_ver && sb_sel_ver != sec->sb_sel_mgn)
+ goto ignore;
+
+ ret = __get_mssc_key_idx(rtwdev, mss_hdr, rmp_tbl_size, &real_key_idx);
+ if (ret)
+ goto ignore;
+
+ section_info->key_addr = content + section_info->len +
+ le32_to_cpu(mss_hdr->key_raw_offset) +
+ key_sign_len * real_key_idx;
+ section_info->key_len = key_sign_len;
+ section_info->key_idx = real_key_idx;
+
+out:
+ if (info->secure_section_exist) {
+ section_info->ignore = true;
+ return 0;
+ }
+
+ info->secure_section_exist = true;
+
+ return 0;
+
+ignore:
+ section_info->ignore = true;
+
+ return 0;
+}
+
+static int __parse_security_section(struct rtw89_dev *rtwdev,
+ struct rtw89_fw_bin_info *info,
+ struct rtw89_fw_hdr_section_info *section_info,
+ const struct rtw89_fw_hdr_section_v1 *section,
+ const void *content,
+ u32 *mssc_len)
+{
+ int ret;
+
+ section_info->mssc =
+ le32_get_bits(section->w2, FWSECTION_HDR_V1_W2_MSSC);
+
+ if (section_info->mssc == FORMATTED_MSSC) {
+ ret = __parse_formatted_mssc(rtwdev, info, section_info,
+ section, content, mssc_len);
+ if (ret)
+ return -EINVAL;
+ } else {
+ *mssc_len = section_info->mssc * FWDL_SECURITY_SIGLEN;
+ if (info->dsp_checksum)
+ *mssc_len += section_info->mssc * FWDL_SECURITY_CHKSUM_LEN;
+
+ info->secure_section_exist = true;
+ }
+
+ return 0;
+}
+
static int rtw89_fw_hdr_parser_v1(struct rtw89_dev *rtwdev, const u8 *fw, u32 len,
struct rtw89_fw_bin_info *info)
{
@@ -173,10 +330,12 @@ static int rtw89_fw_hdr_parser_v1(struct rtw89_dev *rtwdev, const u8 *fw, u32 le
const u8 *fw_end = fw + len;
const u8 *bin;
u32 base_hdr_len;
- u32 mssc_len = 0;
+ u32 mssc_len;
+ int ret;
u32 i;
info->section_num = le32_get_bits(fw_hdr->w6, FW_HDR_V1_W6_SEC_NUM);
+ info->dsp_checksum = le32_get_bits(fw_hdr->w6, FW_HDR_V1_W6_DSP_CHKSUM);
base_hdr_len = struct_size(fw_hdr, sections, info->section_num);
info->dynamic_hdr_en = le32_get_bits(fw_hdr->w7, FW_HDR_V1_W7_DYN_HDR);
@@ -199,16 +358,9 @@ static int rtw89_fw_hdr_parser_v1(struct rtw89_dev *rtwdev, const u8 *fw, u32 le
section_info = info->section_info;
for (i = 0; i < info->section_num; i++) {
section = &fw_hdr->sections[i];
+
section_info->type =
le32_get_bits(section->w1, FWSECTION_HDR_V1_W1_SECTIONTYPE);
- if (section_info->type == FWDL_SECURITY_SECTION_TYPE) {
- section_info->mssc =
- le32_get_bits(section->w2, FWSECTION_HDR_V1_W2_MSSC);
- mssc_len += section_info->mssc * FWDL_SECURITY_SIGLEN;
- } else {
- section_info->mssc = 0;
- }
-
section_info->len =
le32_get_bits(section->w1, FWSECTION_HDR_V1_W1_SEC_SIZE);
if (le32_get_bits(section->w1, FWSECTION_HDR_V1_W1_CHECKSUM))
@@ -217,15 +369,40 @@ static int rtw89_fw_hdr_parser_v1(struct rtw89_dev *rtwdev, const u8 *fw, u32 le
section_info->dladdr =
le32_get_bits(section->w0, FWSECTION_HDR_V1_W0_DL_ADDR);
section_info->addr = bin;
- bin += section_info->len;
+
+ if (section_info->type == FWDL_SECURITY_SECTION_TYPE) {
+ ret = __parse_security_section(rtwdev, info, section_info,
+ section, bin, &mssc_len);
+ if (ret)
+ return ret;
+ } else {
+ section_info->mssc = 0;
+ mssc_len = 0;
+ }
+
+ rtw89_debug(rtwdev, RTW89_DBG_FW,
+ "section[%d] type=%d len=0x%-6x mssc=%d mssc_len=%d addr=%tx\n",
+ i, section_info->type, section_info->len,
+ section_info->mssc, mssc_len, bin - fw);
+ rtw89_debug(rtwdev, RTW89_DBG_FW,
+ " ignore=%d key_addr=%p (0x%tx) key_len=%d key_idx=%d\n",
+ section_info->ignore, section_info->key_addr,
+ section_info->key_addr ?
+ section_info->key_addr - section_info->addr : 0,
+ section_info->key_len, section_info->key_idx);
+
+ bin += section_info->len + mssc_len;
section_info++;
}
- if (fw_end != bin + mssc_len) {
+ if (fw_end != bin) {
rtw89_err(rtwdev, "[ERR]fw bin size\n");
return -EINVAL;
}
+ if (!info->secure_section_exist)
+ rtw89_warn(rtwdev, "no firmware secure section\n");
+
return 0;
}
@@ -458,6 +635,8 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = {
__CFG_FW_FEAT(RTL8852C, ge, 0, 27, 40, 0, CRASH_TRIGGER),
__CFG_FW_FEAT(RTL8852C, ge, 0, 27, 56, 10, BEACON_FILTER),
__CFG_FW_FEAT(RTL8922A, ge, 0, 34, 30, 0, CRASH_TRIGGER),
+ __CFG_FW_FEAT(RTL8922A, ge, 0, 34, 11, 0, MACID_PAUSE_SLEEP),
+ __CFG_FW_FEAT(RTL8922A, ge, 0, 34, 35, 0, SCAN_OFFLOAD),
};
static void rtw89_fw_iterate_feature_cfg(struct rtw89_fw_info *fw,
@@ -919,9 +1098,56 @@ static void rtw89_h2c_pkt_set_hdr_fwdl(struct rtw89_dev *rtwdev,
len + H2C_HEADER_LEN));
}
-static int __rtw89_fw_download_hdr(struct rtw89_dev *rtwdev, const u8 *fw, u32 len)
+static u32 __rtw89_fw_download_tweak_hdr_v0(struct rtw89_dev *rtwdev,
+ struct rtw89_fw_bin_info *info,
+ struct rtw89_fw_hdr *fw_hdr)
+{
+ le32p_replace_bits(&fw_hdr->w7, FWDL_SECTION_PER_PKT_LEN,
+ FW_HDR_W7_PART_SIZE);
+
+ return 0;
+}
+
+static u32 __rtw89_fw_download_tweak_hdr_v1(struct rtw89_dev *rtwdev,
+ struct rtw89_fw_bin_info *info,
+ struct rtw89_fw_hdr_v1 *fw_hdr)
{
+ struct rtw89_fw_hdr_section_info *section_info;
+ struct rtw89_fw_hdr_section_v1 *section;
+ u8 dst_sec_idx = 0;
+ u8 sec_idx;
+
+ le32p_replace_bits(&fw_hdr->w7, FWDL_SECTION_PER_PKT_LEN,
+ FW_HDR_V1_W7_PART_SIZE);
+
+ for (sec_idx = 0; sec_idx < info->section_num; sec_idx++) {
+ section_info = &info->section_info[sec_idx];
+ section = &fw_hdr->sections[sec_idx];
+
+ if (section_info->ignore)
+ continue;
+
+ if (dst_sec_idx != sec_idx)
+ fw_hdr->sections[dst_sec_idx] = *section;
+
+ dst_sec_idx++;
+ }
+
+ le32p_replace_bits(&fw_hdr->w6, dst_sec_idx, FW_HDR_V1_W6_SEC_NUM);
+
+ return (info->section_num - dst_sec_idx) * sizeof(*section);
+}
+
+static int __rtw89_fw_download_hdr(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_suit *fw_suit,
+ struct rtw89_fw_bin_info *info)
+{
+ u32 len = info->hdr_len - info->dynamic_hdr_len;
+ struct rtw89_fw_hdr_v1 *fw_hdr_v1;
+ const u8 *fw = fw_suit->data;
+ struct rtw89_fw_hdr *fw_hdr;
struct sk_buff *skb;
+ u32 truncated;
u32 ret = 0;
skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
@@ -931,7 +1157,26 @@ static int __rtw89_fw_download_hdr(struct rtw89_dev *rtwdev, const u8 *fw, u32 l
}
skb_put_data(skb, fw, len);
- SET_FW_HDR_PART_SIZE(skb->data, FWDL_SECTION_PER_PKT_LEN);
+
+ switch (fw_suit->hdr_ver) {
+ case 0:
+ fw_hdr = (struct rtw89_fw_hdr *)skb->data;
+ truncated = __rtw89_fw_download_tweak_hdr_v0(rtwdev, info, fw_hdr);
+ break;
+ case 1:
+ fw_hdr_v1 = (struct rtw89_fw_hdr_v1 *)skb->data;
+ truncated = __rtw89_fw_download_tweak_hdr_v1(rtwdev, info, fw_hdr_v1);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ goto fail;
+ }
+
+ if (truncated) {
+ len -= truncated;
+ skb_trim(skb, len);
+ }
+
rtw89_h2c_pkt_set_hdr_fwdl(rtwdev, skb, FWCMD_TYPE_H2C,
H2C_CAT_MAC, H2C_CL_MAC_FWDL,
H2C_FUNC_MAC_FWHDR_DL, len);
@@ -950,12 +1195,14 @@ fail:
return ret;
}
-static int rtw89_fw_download_hdr(struct rtw89_dev *rtwdev, const u8 *fw, u32 len)
+static int rtw89_fw_download_hdr(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_suit *fw_suit,
+ struct rtw89_fw_bin_info *info)
{
const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
int ret;
- ret = __rtw89_fw_download_hdr(rtwdev, fw, len);
+ ret = __rtw89_fw_download_hdr(rtwdev, fw_suit, info);
if (ret) {
rtw89_err(rtwdev, "[ERR]FW header download\n");
return ret;
@@ -979,9 +1226,21 @@ static int __rtw89_fw_download_main(struct rtw89_dev *rtwdev,
struct sk_buff *skb;
const u8 *section = info->addr;
u32 residue_len = info->len;
+ bool copy_key = false;
u32 pkt_len;
int ret;
+ if (info->ignore)
+ return 0;
+
+ if (info->key_addr && info->key_len) {
+ if (info->len > FWDL_SECTION_PER_PKT_LEN || info->len < info->key_len)
+ rtw89_warn(rtwdev, "ignore to copy key data because of len %d, %d, %d\n",
+ info->len, FWDL_SECTION_PER_PKT_LEN, info->key_len);
+ else
+ copy_key = true;
+ }
+
while (residue_len) {
if (residue_len >= FWDL_SECTION_PER_PKT_LEN)
pkt_len = FWDL_SECTION_PER_PKT_LEN;
@@ -995,6 +1254,10 @@ static int __rtw89_fw_download_main(struct rtw89_dev *rtwdev,
}
skb_put_data(skb, section, pkt_len);
+ if (copy_key)
+ memcpy(skb->data + pkt_len - info->key_len,
+ info->key_addr, info->key_len);
+
ret = rtw89_h2c_tx(rtwdev, skb, true);
if (ret) {
rtw89_err(rtwdev, "failed to send h2c\n");
@@ -1101,7 +1364,7 @@ static int rtw89_fw_download_suit(struct rtw89_dev *rtwdev,
struct rtw89_fw_suit *fw_suit)
{
const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
- struct rtw89_fw_bin_info info;
+ struct rtw89_fw_bin_info info = {};
int ret;
ret = rtw89_fw_hdr_parser(rtwdev, fw_suit, &info);
@@ -1120,8 +1383,7 @@ static int rtw89_fw_download_suit(struct rtw89_dev *rtwdev,
return ret;
}
- ret = rtw89_fw_download_hdr(rtwdev, fw_suit->data, info.hdr_len -
- info.dynamic_hdr_len);
+ ret = rtw89_fw_download_hdr(rtwdev, fw_suit, &info);
if (ret)
return ret;
@@ -1485,13 +1747,108 @@ fail:
}
EXPORT_SYMBOL(rtw89_fw_h2c_dctl_sec_cam_v1);
-#define H2C_BA_CAM_LEN 8
+int rtw89_fw_h2c_dctl_sec_cam_v2(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta)
+{
+ struct rtw89_h2c_dctlinfo_ud_v2 *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for dctl sec cam\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_dctlinfo_ud_v2 *)skb->data;
+
+ rtw89_cam_fill_dctl_sec_cam_info_v2(rtwdev, rtwvif, rtwsta, h2c);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_MAC_FR_EXCHG,
+ H2C_FUNC_MAC_DCTLINFO_UD_V2, 0, 0,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(rtw89_fw_h2c_dctl_sec_cam_v2);
+
+int rtw89_fw_h2c_default_dmac_tbl_v2(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta)
+{
+ u8 mac_id = rtwsta ? rtwsta->mac_id : rtwvif->mac_id;
+ struct rtw89_h2c_dctlinfo_ud_v2 *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for dctl v2\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_dctlinfo_ud_v2 *)skb->data;
+
+ h2c->c0 = le32_encode_bits(mac_id, DCTLINFO_V2_C0_MACID) |
+ le32_encode_bits(1, DCTLINFO_V2_C0_OP);
+
+ h2c->m0 = cpu_to_le32(DCTLINFO_V2_W0_ALL);
+ h2c->m1 = cpu_to_le32(DCTLINFO_V2_W1_ALL);
+ h2c->m2 = cpu_to_le32(DCTLINFO_V2_W2_ALL);
+ h2c->m3 = cpu_to_le32(DCTLINFO_V2_W3_ALL);
+ h2c->m4 = cpu_to_le32(DCTLINFO_V2_W4_ALL);
+ h2c->m5 = cpu_to_le32(DCTLINFO_V2_W5_ALL);
+ h2c->m6 = cpu_to_le32(DCTLINFO_V2_W6_ALL);
+ h2c->m7 = cpu_to_le32(DCTLINFO_V2_W7_ALL);
+ h2c->m8 = cpu_to_le32(DCTLINFO_V2_W8_ALL);
+ h2c->m9 = cpu_to_le32(DCTLINFO_V2_W9_ALL);
+ h2c->m10 = cpu_to_le32(DCTLINFO_V2_W10_ALL);
+ h2c->m11 = cpu_to_le32(DCTLINFO_V2_W11_ALL);
+ h2c->m12 = cpu_to_le32(DCTLINFO_V2_W12_ALL);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_MAC_FR_EXCHG,
+ H2C_FUNC_MAC_DCTLINFO_UD_V2, 0, 0,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(rtw89_fw_h2c_default_dmac_tbl_v2);
+
int rtw89_fw_h2c_ba_cam(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta,
bool valid, struct ieee80211_ampdu_params *params)
{
const struct rtw89_chip_info *chip = rtwdev->chip;
struct rtw89_vif *rtwvif = rtwsta->rtwvif;
+ struct rtw89_h2c_ba_cam *h2c;
u8 macid = rtwsta->mac_id;
+ u32 len = sizeof(*h2c);
struct sk_buff *skb;
u8 entry_idx;
int ret;
@@ -1509,32 +1866,34 @@ int rtw89_fw_h2c_ba_cam(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta,
return 0;
}
- skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, H2C_BA_CAM_LEN);
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
if (!skb) {
rtw89_err(rtwdev, "failed to alloc skb for h2c ba cam\n");
return -ENOMEM;
}
- skb_put(skb, H2C_BA_CAM_LEN);
- SET_BA_CAM_MACID(skb->data, macid);
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_ba_cam *)skb->data;
+
+ h2c->w0 = le32_encode_bits(macid, RTW89_H2C_BA_CAM_W0_MACID);
if (chip->bacam_ver == RTW89_BACAM_V0_EXT)
- SET_BA_CAM_ENTRY_IDX_V1(skb->data, entry_idx);
+ h2c->w1 |= le32_encode_bits(entry_idx, RTW89_H2C_BA_CAM_W1_ENTRY_IDX_V1);
else
- SET_BA_CAM_ENTRY_IDX(skb->data, entry_idx);
+ h2c->w0 |= le32_encode_bits(entry_idx, RTW89_H2C_BA_CAM_W0_ENTRY_IDX);
if (!valid)
goto end;
- SET_BA_CAM_VALID(skb->data, valid);
- SET_BA_CAM_TID(skb->data, params->tid);
+ h2c->w0 |= le32_encode_bits(valid, RTW89_H2C_BA_CAM_W0_VALID) |
+ le32_encode_bits(params->tid, RTW89_H2C_BA_CAM_W0_TID);
if (params->buf_size > 64)
- SET_BA_CAM_BMAP_SIZE(skb->data, 4);
+ h2c->w0 |= le32_encode_bits(4, RTW89_H2C_BA_CAM_W0_BMAP_SIZE);
else
- SET_BA_CAM_BMAP_SIZE(skb->data, 0);
+ h2c->w0 |= le32_encode_bits(0, RTW89_H2C_BA_CAM_W0_BMAP_SIZE);
/* If init req is set, hw will set the ssn */
- SET_BA_CAM_INIT_REQ(skb->data, 1);
- SET_BA_CAM_SSN(skb->data, params->ssn);
+ h2c->w0 |= le32_encode_bits(1, RTW89_H2C_BA_CAM_W0_INIT_REQ) |
+ le32_encode_bits(params->ssn, RTW89_H2C_BA_CAM_W0_SSN);
if (chip->bacam_ver == RTW89_BACAM_V0_EXT) {
- SET_BA_CAM_STD_EN(skb->data, 1);
- SET_BA_CAM_BAND(skb->data, rtwvif->mac_idx);
+ h2c->w1 |= le32_encode_bits(1, RTW89_H2C_BA_CAM_W1_STD_EN) |
+ le32_encode_bits(rtwvif->mac_idx, RTW89_H2C_BA_CAM_W1_BAND);
}
end:
@@ -1542,7 +1901,7 @@ end:
H2C_CAT_MAC,
H2C_CL_BA_CAM,
H2C_FUNC_MAC_BA_CAM, 0, 1,
- H2C_BA_CAM_LEN);
+ len);
ret = rtw89_h2c_tx(rtwdev, skb, false);
if (ret) {
@@ -1556,31 +1915,35 @@ fail:
return ret;
}
+EXPORT_SYMBOL(rtw89_fw_h2c_ba_cam);
static int rtw89_fw_h2c_init_ba_cam_v0_ext(struct rtw89_dev *rtwdev,
u8 entry_idx, u8 uid)
{
+ struct rtw89_h2c_ba_cam *h2c;
+ u32 len = sizeof(*h2c);
struct sk_buff *skb;
int ret;
- skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, H2C_BA_CAM_LEN);
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
if (!skb) {
rtw89_err(rtwdev, "failed to alloc skb for dynamic h2c ba cam\n");
return -ENOMEM;
}
- skb_put(skb, H2C_BA_CAM_LEN);
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_ba_cam *)skb->data;
- SET_BA_CAM_VALID(skb->data, 1);
- SET_BA_CAM_ENTRY_IDX_V1(skb->data, entry_idx);
- SET_BA_CAM_UID(skb->data, uid);
- SET_BA_CAM_BAND(skb->data, 0);
- SET_BA_CAM_STD_EN(skb->data, 0);
+ h2c->w0 = le32_encode_bits(1, RTW89_H2C_BA_CAM_W0_VALID);
+ h2c->w1 = le32_encode_bits(entry_idx, RTW89_H2C_BA_CAM_W1_ENTRY_IDX_V1) |
+ le32_encode_bits(uid, RTW89_H2C_BA_CAM_W1_UID) |
+ le32_encode_bits(0, RTW89_H2C_BA_CAM_W1_BAND) |
+ le32_encode_bits(0, RTW89_H2C_BA_CAM_W1_STD_EN);
rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
H2C_CAT_MAC,
H2C_CL_BA_CAM,
H2C_FUNC_MAC_BA_CAM, 0, 1,
- H2C_BA_CAM_LEN);
+ len);
ret = rtw89_h2c_tx(rtwdev, skb, false);
if (ret) {
@@ -1609,14 +1972,132 @@ void rtw89_fw_h2c_init_dynamic_ba_cam_v0_ext(struct rtw89_dev *rtwdev)
}
}
+int rtw89_fw_h2c_ba_cam_v1(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta,
+ bool valid, struct ieee80211_ampdu_params *params)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+ struct rtw89_vif *rtwvif = rtwsta->rtwvif;
+ struct rtw89_h2c_ba_cam_v1 *h2c;
+ u8 macid = rtwsta->mac_id;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ u8 entry_idx;
+ u8 bmap_size;
+ int ret;
+
+ ret = valid ?
+ rtw89_core_acquire_sta_ba_entry(rtwdev, rtwsta, params->tid, &entry_idx) :
+ rtw89_core_release_sta_ba_entry(rtwdev, rtwsta, params->tid, &entry_idx);
+ if (ret) {
+ /* it still works even if we don't have static BA CAM, because
+ * hardware can create dynamic BA CAM automatically.
+ */
+ rtw89_debug(rtwdev, RTW89_DBG_TXRX,
+ "failed to %s entry tid=%d for h2c ba cam\n",
+ valid ? "alloc" : "free", params->tid);
+ return 0;
+ }
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c ba cam\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_ba_cam_v1 *)skb->data;
+
+ if (params->buf_size > 512)
+ bmap_size = 10;
+ else if (params->buf_size > 256)
+ bmap_size = 8;
+ else if (params->buf_size > 64)
+ bmap_size = 4;
+ else
+ bmap_size = 0;
+
+ h2c->w0 = le32_encode_bits(valid, RTW89_H2C_BA_CAM_V1_W0_VALID) |
+ le32_encode_bits(1, RTW89_H2C_BA_CAM_V1_W0_INIT_REQ) |
+ le32_encode_bits(macid, RTW89_H2C_BA_CAM_V1_W0_MACID_MASK) |
+ le32_encode_bits(params->tid, RTW89_H2C_BA_CAM_V1_W0_TID_MASK) |
+ le32_encode_bits(bmap_size, RTW89_H2C_BA_CAM_V1_W0_BMAP_SIZE_MASK) |
+ le32_encode_bits(params->ssn, RTW89_H2C_BA_CAM_V1_W0_SSN_MASK);
+
+ entry_idx += chip->bacam_dynamic_num; /* std entry right after dynamic ones */
+ h2c->w1 = le32_encode_bits(entry_idx, RTW89_H2C_BA_CAM_V1_W1_ENTRY_IDX_MASK) |
+ le32_encode_bits(1, RTW89_H2C_BA_CAM_V1_W1_STD_ENTRY_EN) |
+ le32_encode_bits(!!rtwvif->mac_idx, RTW89_H2C_BA_CAM_V1_W1_BAND_SEL);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_BA_CAM,
+ H2C_FUNC_MAC_BA_CAM_V1, 0, 1,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(rtw89_fw_h2c_ba_cam_v1);
+
+int rtw89_fw_h2c_init_ba_cam_users(struct rtw89_dev *rtwdev, u8 users,
+ u8 offset, u8 mac_idx)
+{
+ struct rtw89_h2c_ba_cam_init *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c ba cam init\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_ba_cam_init *)skb->data;
+
+ h2c->w0 = le32_encode_bits(users, RTW89_H2C_BA_CAM_INIT_USERS_MASK) |
+ le32_encode_bits(offset, RTW89_H2C_BA_CAM_INIT_OFFSET_MASK) |
+ le32_encode_bits(mac_idx, RTW89_H2C_BA_CAM_INIT_BAND_SEL);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_BA_CAM,
+ H2C_FUNC_MAC_BA_CAM_INIT, 0, 1,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
#define H2C_LOG_CFG_LEN 12
int rtw89_fw_h2c_fw_log(struct rtw89_dev *rtwdev, bool enable)
{
struct sk_buff *skb;
- u32 comp = enable ? BIT(RTW89_FW_LOG_COMP_INIT) | BIT(RTW89_FW_LOG_COMP_TASK) |
- BIT(RTW89_FW_LOG_COMP_PS) | BIT(RTW89_FW_LOG_COMP_ERROR) : 0;
+ u32 comp = 0;
int ret;
+ if (enable)
+ comp = BIT(RTW89_FW_LOG_COMP_INIT) | BIT(RTW89_FW_LOG_COMP_TASK) |
+ BIT(RTW89_FW_LOG_COMP_PS) | BIT(RTW89_FW_LOG_COMP_ERROR) |
+ BIT(RTW89_FW_LOG_COMP_SCAN);
+
skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, H2C_LOG_CFG_LEN);
if (!skb) {
rtw89_err(rtwdev, "failed to alloc skb for fw log cfg\n");
@@ -1815,6 +2296,50 @@ fail:
return ret;
}
+int rtw89_fw_h2c_lps_ch_info(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif)
+{
+ const struct rtw89_chan *chan = rtw89_chan_get(rtwdev,
+ rtwvif->sub_entity_idx);
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+ struct rtw89_h2c_lps_ch_info *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ if (chip->chip_gen != RTW89_CHIP_BE)
+ return 0;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c lps_ch_info\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_lps_ch_info *)skb->data;
+
+ h2c->info[0].central_ch = chan->channel;
+ h2c->info[0].pri_ch = chan->primary_channel;
+ h2c->info[0].band = chan->band_type;
+ h2c->info[0].bw = chan->band_width;
+ h2c->mlo_dbcc_mode_lps = cpu_to_le32(MLO_2_PLUS_0_1RF);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_OUTSRC, H2C_CL_OUTSRC_DM,
+ H2C_FUNC_FW_LPS_CH_INFO, 0, 0, len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
#define H2C_P2P_ACT_LEN 20
int rtw89_fw_h2c_p2p_act(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
struct ieee80211_p2p_noa_desc *desc,
@@ -1892,11 +2417,12 @@ static void __rtw89_fw_h2c_set_tx_path(struct rtw89_dev *rtwdev,
#define H2C_CMC_TBL_LEN 68
int rtw89_fw_h2c_default_cmac_tbl(struct rtw89_dev *rtwdev,
- struct rtw89_vif *rtwvif)
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta)
{
const struct rtw89_chip_info *chip = rtwdev->chip;
+ u8 macid = rtwsta ? rtwsta->mac_id : rtwvif->mac_id;
struct sk_buff *skb;
- u8 macid = rtwvif->mac_id;
int ret;
skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, H2C_CMC_TBL_LEN);
@@ -1937,6 +2463,91 @@ fail:
return ret;
}
+EXPORT_SYMBOL(rtw89_fw_h2c_default_cmac_tbl);
+
+int rtw89_fw_h2c_default_cmac_tbl_g7(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta)
+{
+ u8 mac_id = rtwsta ? rtwsta->mac_id : rtwvif->mac_id;
+ struct rtw89_h2c_cctlinfo_ud_g7 *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for cmac g7\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_cctlinfo_ud_g7 *)skb->data;
+
+ h2c->c0 = le32_encode_bits(mac_id, CCTLINFO_G7_C0_MACID) |
+ le32_encode_bits(1, CCTLINFO_G7_C0_OP);
+
+ h2c->w0 = le32_encode_bits(4, CCTLINFO_G7_W0_DATARATE);
+ h2c->m0 = cpu_to_le32(CCTLINFO_G7_W0_ALL);
+
+ h2c->w1 = le32_encode_bits(4, CCTLINFO_G7_W1_DATA_RTY_LOWEST_RATE) |
+ le32_encode_bits(0xa, CCTLINFO_G7_W1_RTSRATE) |
+ le32_encode_bits(4, CCTLINFO_G7_W1_RTS_RTY_LOWEST_RATE);
+ h2c->m1 = cpu_to_le32(CCTLINFO_G7_W1_ALL);
+
+ h2c->m2 = cpu_to_le32(CCTLINFO_G7_W2_ALL);
+
+ h2c->m3 = cpu_to_le32(CCTLINFO_G7_W3_ALL);
+
+ h2c->w4 = le32_encode_bits(0xFFFF, CCTLINFO_G7_W4_ACT_SUBCH_CBW);
+ h2c->m4 = cpu_to_le32(CCTLINFO_G7_W4_ALL);
+
+ h2c->w5 = le32_encode_bits(2, CCTLINFO_G7_W5_NOMINAL_PKT_PADDING0) |
+ le32_encode_bits(2, CCTLINFO_G7_W5_NOMINAL_PKT_PADDING1) |
+ le32_encode_bits(2, CCTLINFO_G7_W5_NOMINAL_PKT_PADDING2) |
+ le32_encode_bits(2, CCTLINFO_G7_W5_NOMINAL_PKT_PADDING3) |
+ le32_encode_bits(2, CCTLINFO_G7_W5_NOMINAL_PKT_PADDING4);
+ h2c->m5 = cpu_to_le32(CCTLINFO_G7_W5_ALL);
+
+ h2c->w6 = le32_encode_bits(0xb, CCTLINFO_G7_W6_RESP_REF_RATE);
+ h2c->m6 = cpu_to_le32(CCTLINFO_G7_W6_ALL);
+
+ h2c->w7 = le32_encode_bits(1, CCTLINFO_G7_W7_NC) |
+ le32_encode_bits(1, CCTLINFO_G7_W7_NR) |
+ le32_encode_bits(1, CCTLINFO_G7_W7_CB) |
+ le32_encode_bits(0x1, CCTLINFO_G7_W7_CSI_PARA_EN) |
+ le32_encode_bits(0xb, CCTLINFO_G7_W7_CSI_FIX_RATE);
+ h2c->m7 = cpu_to_le32(CCTLINFO_G7_W7_ALL);
+
+ h2c->m8 = cpu_to_le32(CCTLINFO_G7_W8_ALL);
+
+ h2c->w14 = le32_encode_bits(0, CCTLINFO_G7_W14_VO_CURR_RATE) |
+ le32_encode_bits(0, CCTLINFO_G7_W14_VI_CURR_RATE) |
+ le32_encode_bits(0, CCTLINFO_G7_W14_BE_CURR_RATE_L);
+ h2c->m14 = cpu_to_le32(CCTLINFO_G7_W14_ALL);
+
+ h2c->w15 = le32_encode_bits(0, CCTLINFO_G7_W15_BE_CURR_RATE_H) |
+ le32_encode_bits(0, CCTLINFO_G7_W15_BK_CURR_RATE) |
+ le32_encode_bits(0, CCTLINFO_G7_W15_MGNT_CURR_RATE);
+ h2c->m15 = cpu_to_le32(CCTLINFO_G7_W15_ALL);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC, H2C_CL_MAC_FR_EXCHG,
+ H2C_FUNC_MAC_CCTLINFO_UD_G7, 0, 1,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(rtw89_fw_h2c_default_cmac_tbl_g7);
static void __get_sta_he_pkt_padding(struct rtw89_dev *rtwdev,
struct ieee80211_sta *sta, u8 *pads)
@@ -1950,9 +2561,6 @@ static void __get_sta_he_pkt_padding(struct rtw89_dev *rtwdev,
u16 ppe;
int i;
- if (!sta->deflink.he_cap.has_he)
- return;
-
ppe_th = FIELD_GET(IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT,
sta->deflink.he_cap.he_cap_elem.phy_cap_info[6]);
if (!ppe_th) {
@@ -2011,7 +2619,7 @@ int rtw89_fw_h2c_assoc_cmac_tbl(struct rtw89_dev *rtwdev,
int ret;
memset(pads, 0, sizeof(pads));
- if (sta)
+ if (sta && sta->deflink.he_cap.has_he)
__get_sta_he_pkt_padding(rtwdev, sta, pads);
if (vif->p2p)
@@ -2073,6 +2681,246 @@ fail:
return ret;
}
+EXPORT_SYMBOL(rtw89_fw_h2c_assoc_cmac_tbl);
+
+static void __get_sta_eht_pkt_padding(struct rtw89_dev *rtwdev,
+ struct ieee80211_sta *sta, u8 *pads)
+{
+ u8 nss = min(sta->deflink.rx_nss, rtwdev->hal.tx_nss) - 1;
+ u16 ppe_thres_hdr;
+ u8 ppe16, ppe8;
+ u8 n, idx, sh;
+ u8 ru_bitmap;
+ bool ppe_th;
+ u16 ppe;
+ int i;
+
+ ppe_th = !!u8_get_bits(sta->deflink.eht_cap.eht_cap_elem.phy_cap_info[5],
+ IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT);
+ if (!ppe_th) {
+ u8 pad;
+
+ pad = u8_get_bits(sta->deflink.eht_cap.eht_cap_elem.phy_cap_info[5],
+ IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_MASK);
+
+ for (i = 0; i < RTW89_PPE_BW_NUM; i++)
+ pads[i] = pad;
+
+ return;
+ }
+
+ ppe_thres_hdr = get_unaligned_le16(sta->deflink.eht_cap.eht_ppe_thres);
+ ru_bitmap = u16_get_bits(ppe_thres_hdr,
+ IEEE80211_EHT_PPE_THRES_RU_INDEX_BITMASK_MASK);
+ n = hweight8(ru_bitmap);
+ n = IEEE80211_EHT_PPE_THRES_INFO_HEADER_SIZE +
+ (n * IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE * 2) * nss;
+
+ for (i = 0; i < RTW89_PPE_BW_NUM; i++) {
+ if (!(ru_bitmap & BIT(i))) {
+ pads[i] = 1;
+ continue;
+ }
+
+ idx = n >> 3;
+ sh = n & 7;
+ n += IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE * 2;
+
+ ppe = get_unaligned_le16(sta->deflink.eht_cap.eht_ppe_thres + idx);
+ ppe16 = (ppe >> sh) & IEEE80211_PPE_THRES_NSS_MASK;
+ sh += IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE;
+ ppe8 = (ppe >> sh) & IEEE80211_PPE_THRES_NSS_MASK;
+
+ if (ppe16 != 7 && ppe8 == 7)
+ pads[i] = 2;
+ else if (ppe8 != 7)
+ pads[i] = 1;
+ else
+ pads[i] = 0;
+ }
+}
+
+int rtw89_fw_h2c_assoc_cmac_tbl_g7(struct rtw89_dev *rtwdev,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta)
+{
+ const struct rtw89_chan *chan = rtw89_chan_get(rtwdev, RTW89_SUB_ENTITY_0);
+ struct rtw89_vif *rtwvif = (struct rtw89_vif *)vif->drv_priv;
+ struct rtw89_sta *rtwsta = sta_to_rtwsta_safe(sta);
+ u8 mac_id = rtwsta ? rtwsta->mac_id : rtwvif->mac_id;
+ struct rtw89_h2c_cctlinfo_ud_g7 *h2c;
+ u8 pads[RTW89_PPE_BW_NUM];
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ u16 lowest_rate;
+ int ret;
+
+ memset(pads, 0, sizeof(pads));
+ if (sta) {
+ if (sta->deflink.eht_cap.has_eht)
+ __get_sta_eht_pkt_padding(rtwdev, sta, pads);
+ else if (sta->deflink.he_cap.has_he)
+ __get_sta_he_pkt_padding(rtwdev, sta, pads);
+ }
+
+ if (vif->p2p)
+ lowest_rate = RTW89_HW_RATE_OFDM6;
+ else if (chan->band_type == RTW89_BAND_2G)
+ lowest_rate = RTW89_HW_RATE_CCK1;
+ else
+ lowest_rate = RTW89_HW_RATE_OFDM6;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for cmac g7\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_cctlinfo_ud_g7 *)skb->data;
+
+ h2c->c0 = le32_encode_bits(mac_id, CCTLINFO_G7_C0_MACID) |
+ le32_encode_bits(1, CCTLINFO_G7_C0_OP);
+
+ h2c->w0 = le32_encode_bits(1, CCTLINFO_G7_W0_DISRTSFB) |
+ le32_encode_bits(1, CCTLINFO_G7_W0_DISDATAFB);
+ h2c->m0 = cpu_to_le32(CCTLINFO_G7_W0_DISRTSFB |
+ CCTLINFO_G7_W0_DISDATAFB);
+
+ h2c->w1 = le32_encode_bits(lowest_rate, CCTLINFO_G7_W1_RTS_RTY_LOWEST_RATE);
+ h2c->m1 = cpu_to_le32(CCTLINFO_G7_W1_RTS_RTY_LOWEST_RATE);
+
+ h2c->w2 = le32_encode_bits(0, CCTLINFO_G7_W2_DATA_TXCNT_LMT_SEL);
+ h2c->m2 = cpu_to_le32(CCTLINFO_G7_W2_DATA_TXCNT_LMT_SEL);
+
+ h2c->w3 = le32_encode_bits(0, CCTLINFO_G7_W3_RTS_TXCNT_LMT_SEL);
+ h2c->m3 = cpu_to_le32(CCTLINFO_G7_W3_RTS_TXCNT_LMT_SEL);
+
+ h2c->w4 = le32_encode_bits(rtwvif->port, CCTLINFO_G7_W4_MULTI_PORT_ID);
+ h2c->m4 = cpu_to_le32(CCTLINFO_G7_W4_MULTI_PORT_ID);
+
+ if (rtwvif->net_type == RTW89_NET_TYPE_AP_MODE) {
+ h2c->w4 |= le32_encode_bits(0, CCTLINFO_G7_W4_DATA_DCM);
+ h2c->m4 |= cpu_to_le32(CCTLINFO_G7_W4_DATA_DCM);
+ }
+
+ if (vif->bss_conf.eht_support) {
+ u16 punct = vif->bss_conf.chanreq.oper.punctured;
+
+ h2c->w4 |= le32_encode_bits(~punct,
+ CCTLINFO_G7_W4_ACT_SUBCH_CBW);
+ h2c->m4 |= cpu_to_le32(CCTLINFO_G7_W4_ACT_SUBCH_CBW);
+ }
+
+ h2c->w5 = le32_encode_bits(pads[RTW89_CHANNEL_WIDTH_20],
+ CCTLINFO_G7_W5_NOMINAL_PKT_PADDING0) |
+ le32_encode_bits(pads[RTW89_CHANNEL_WIDTH_40],
+ CCTLINFO_G7_W5_NOMINAL_PKT_PADDING1) |
+ le32_encode_bits(pads[RTW89_CHANNEL_WIDTH_80],
+ CCTLINFO_G7_W5_NOMINAL_PKT_PADDING2) |
+ le32_encode_bits(pads[RTW89_CHANNEL_WIDTH_160],
+ CCTLINFO_G7_W5_NOMINAL_PKT_PADDING3) |
+ le32_encode_bits(pads[RTW89_CHANNEL_WIDTH_320],
+ CCTLINFO_G7_W5_NOMINAL_PKT_PADDING4);
+ h2c->m5 = cpu_to_le32(CCTLINFO_G7_W5_NOMINAL_PKT_PADDING0 |
+ CCTLINFO_G7_W5_NOMINAL_PKT_PADDING1 |
+ CCTLINFO_G7_W5_NOMINAL_PKT_PADDING2 |
+ CCTLINFO_G7_W5_NOMINAL_PKT_PADDING3 |
+ CCTLINFO_G7_W5_NOMINAL_PKT_PADDING4);
+
+ h2c->w6 = le32_encode_bits(vif->type == NL80211_IFTYPE_STATION ? 1 : 0,
+ CCTLINFO_G7_W6_ULDL);
+ h2c->m6 = cpu_to_le32(CCTLINFO_G7_W6_ULDL);
+
+ if (sta) {
+ h2c->w8 = le32_encode_bits(sta->deflink.he_cap.has_he,
+ CCTLINFO_G7_W8_BSR_QUEUE_SIZE_FORMAT);
+ h2c->m8 = cpu_to_le32(CCTLINFO_G7_W8_BSR_QUEUE_SIZE_FORMAT);
+ }
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC, H2C_CL_MAC_FR_EXCHG,
+ H2C_FUNC_MAC_CCTLINFO_UD_G7, 0, 1,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(rtw89_fw_h2c_assoc_cmac_tbl_g7);
+
+int rtw89_fw_h2c_ampdu_cmac_tbl_g7(struct rtw89_dev *rtwdev,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta)
+{
+ struct rtw89_sta *rtwsta = (struct rtw89_sta *)sta->drv_priv;
+ struct rtw89_h2c_cctlinfo_ud_g7 *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ u16 agg_num = 0;
+ u8 ba_bmap = 0;
+ int ret;
+ u8 tid;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for ampdu cmac g7\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_cctlinfo_ud_g7 *)skb->data;
+
+ for_each_set_bit(tid, rtwsta->ampdu_map, IEEE80211_NUM_TIDS) {
+ if (agg_num == 0)
+ agg_num = rtwsta->ampdu_params[tid].agg_num;
+ else
+ agg_num = min(agg_num, rtwsta->ampdu_params[tid].agg_num);
+ }
+
+ if (agg_num <= 0x20)
+ ba_bmap = 3;
+ else if (agg_num > 0x20 && agg_num <= 0x40)
+ ba_bmap = 0;
+ else if (agg_num > 0x40 && agg_num <= 0x80)
+ ba_bmap = 1;
+ else if (agg_num > 0x80 && agg_num <= 0x100)
+ ba_bmap = 2;
+ else if (agg_num > 0x100 && agg_num <= 0x200)
+ ba_bmap = 4;
+ else if (agg_num > 0x200 && agg_num <= 0x400)
+ ba_bmap = 5;
+
+ h2c->c0 = le32_encode_bits(rtwsta->mac_id, CCTLINFO_G7_C0_MACID) |
+ le32_encode_bits(1, CCTLINFO_G7_C0_OP);
+
+ h2c->w3 = le32_encode_bits(ba_bmap, CCTLINFO_G7_W3_BA_BMAP);
+ h2c->m3 = cpu_to_le32(CCTLINFO_G7_W3_BA_BMAP);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC, H2C_CL_MAC_FR_EXCHG,
+ H2C_FUNC_MAC_CCTLINFO_UD_G7, 0, 0,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(rtw89_fw_h2c_ampdu_cmac_tbl_g7);
int rtw89_fw_h2c_txtime_cmac_tbl(struct rtw89_dev *rtwdev,
struct rtw89_sta *rtwsta)
@@ -2155,18 +3003,20 @@ fail:
return ret;
}
-#define H2C_BCN_BASE_LEN 12
int rtw89_fw_h2c_update_beacon(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif)
{
- struct ieee80211_vif *vif = rtwvif_to_vif(rtwvif);
const struct rtw89_chan *chan = rtw89_chan_get(rtwdev,
rtwvif->sub_entity_idx);
- struct sk_buff *skb;
+ struct ieee80211_vif *vif = rtwvif_to_vif(rtwvif);
+ struct rtw89_h2c_bcn_upd *h2c;
struct sk_buff *skb_beacon;
- u16 tim_offset;
+ struct ieee80211_hdr *hdr;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
int bcn_total_len;
u16 beacon_rate;
+ u16 tim_offset;
void *noa_data;
u8 noa_len;
int ret;
@@ -2192,23 +3042,27 @@ int rtw89_fw_h2c_update_beacon(struct rtw89_dev *rtwdev,
skb_put_data(skb_beacon, noa_data, noa_len);
}
- bcn_total_len = H2C_BCN_BASE_LEN + skb_beacon->len;
+ hdr = (struct ieee80211_hdr *)skb_beacon;
+ tim_offset -= ieee80211_hdrlen(hdr->frame_control);
+
+ bcn_total_len = len + skb_beacon->len;
skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, bcn_total_len);
if (!skb) {
rtw89_err(rtwdev, "failed to alloc skb for fw dl\n");
dev_kfree_skb_any(skb_beacon);
return -ENOMEM;
}
- skb_put(skb, H2C_BCN_BASE_LEN);
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_bcn_upd *)skb->data;
- SET_BCN_UPD_PORT(skb->data, rtwvif->port);
- SET_BCN_UPD_MBSSID(skb->data, 0);
- SET_BCN_UPD_BAND(skb->data, rtwvif->mac_idx);
- SET_BCN_UPD_GRP_IE_OFST(skb->data, tim_offset);
- SET_BCN_UPD_MACID(skb->data, rtwvif->mac_id);
- SET_BCN_UPD_SSN_SEL(skb->data, RTW89_MGMT_HW_SSN_SEL);
- SET_BCN_UPD_SSN_MODE(skb->data, RTW89_MGMT_HW_SEQ_MODE);
- SET_BCN_UPD_RATE(skb->data, beacon_rate);
+ h2c->w0 = le32_encode_bits(rtwvif->port, RTW89_H2C_BCN_UPD_W0_PORT) |
+ le32_encode_bits(0, RTW89_H2C_BCN_UPD_W0_MBSSID) |
+ le32_encode_bits(rtwvif->mac_idx, RTW89_H2C_BCN_UPD_W0_BAND) |
+ le32_encode_bits(tim_offset | BIT(7), RTW89_H2C_BCN_UPD_W0_GRP_IE_OFST);
+ h2c->w1 = le32_encode_bits(rtwvif->mac_id, RTW89_H2C_BCN_UPD_W1_MACID) |
+ le32_encode_bits(RTW89_MGMT_HW_SSN_SEL, RTW89_H2C_BCN_UPD_W1_SSN_SEL) |
+ le32_encode_bits(RTW89_MGMT_HW_SEQ_MODE, RTW89_H2C_BCN_UPD_W1_SSN_MODE) |
+ le32_encode_bits(beacon_rate, RTW89_H2C_BCN_UPD_W1_RATE);
skb_put_data(skb, skb_beacon->data, skb_beacon->len);
dev_kfree_skb_any(skb_beacon);
@@ -2227,6 +3081,90 @@ int rtw89_fw_h2c_update_beacon(struct rtw89_dev *rtwdev,
return 0;
}
+EXPORT_SYMBOL(rtw89_fw_h2c_update_beacon);
+
+int rtw89_fw_h2c_update_beacon_be(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif)
+{
+ const struct rtw89_chan *chan = rtw89_chan_get(rtwdev, RTW89_SUB_ENTITY_0);
+ struct ieee80211_vif *vif = rtwvif_to_vif(rtwvif);
+ struct rtw89_h2c_bcn_upd_be *h2c;
+ struct sk_buff *skb_beacon;
+ struct ieee80211_hdr *hdr;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int bcn_total_len;
+ u16 beacon_rate;
+ u16 tim_offset;
+ void *noa_data;
+ u8 noa_len;
+ int ret;
+
+ if (vif->p2p)
+ beacon_rate = RTW89_HW_RATE_OFDM6;
+ else if (chan->band_type == RTW89_BAND_2G)
+ beacon_rate = RTW89_HW_RATE_CCK1;
+ else
+ beacon_rate = RTW89_HW_RATE_OFDM6;
+
+ skb_beacon = ieee80211_beacon_get_tim(rtwdev->hw, vif, &tim_offset,
+ NULL, 0);
+ if (!skb_beacon) {
+ rtw89_err(rtwdev, "failed to get beacon skb\n");
+ return -ENOMEM;
+ }
+
+ noa_len = rtw89_p2p_noa_fetch(rtwvif, &noa_data);
+ if (noa_len &&
+ (noa_len <= skb_tailroom(skb_beacon) ||
+ pskb_expand_head(skb_beacon, 0, noa_len, GFP_KERNEL) == 0)) {
+ skb_put_data(skb_beacon, noa_data, noa_len);
+ }
+
+ hdr = (struct ieee80211_hdr *)skb_beacon;
+ tim_offset -= ieee80211_hdrlen(hdr->frame_control);
+
+ bcn_total_len = len + skb_beacon->len;
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, bcn_total_len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for fw dl\n");
+ dev_kfree_skb_any(skb_beacon);
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_bcn_upd_be *)skb->data;
+
+ h2c->w0 = le32_encode_bits(rtwvif->port, RTW89_H2C_BCN_UPD_BE_W0_PORT) |
+ le32_encode_bits(0, RTW89_H2C_BCN_UPD_BE_W0_MBSSID) |
+ le32_encode_bits(rtwvif->mac_idx, RTW89_H2C_BCN_UPD_BE_W0_BAND) |
+ le32_encode_bits(tim_offset | BIT(7), RTW89_H2C_BCN_UPD_BE_W0_GRP_IE_OFST);
+ h2c->w1 = le32_encode_bits(rtwvif->mac_id, RTW89_H2C_BCN_UPD_BE_W1_MACID) |
+ le32_encode_bits(RTW89_MGMT_HW_SSN_SEL, RTW89_H2C_BCN_UPD_BE_W1_SSN_SEL) |
+ le32_encode_bits(RTW89_MGMT_HW_SEQ_MODE, RTW89_H2C_BCN_UPD_BE_W1_SSN_MODE) |
+ le32_encode_bits(beacon_rate, RTW89_H2C_BCN_UPD_BE_W1_RATE);
+
+ skb_put_data(skb, skb_beacon->data, skb_beacon->len);
+ dev_kfree_skb_any(skb_beacon);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC, H2C_CL_MAC_FR_EXCHG,
+ H2C_FUNC_MAC_BCN_UPD_BE, 0, 1,
+ bcn_total_len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(rtw89_fw_h2c_update_beacon_be);
#define H2C_ROLE_MAINTAIN_LEN 4
int rtw89_fw_h2c_role_maintain(struct rtw89_dev *rtwdev,
@@ -2277,45 +3215,93 @@ fail:
return ret;
}
-#define H2C_JOIN_INFO_LEN 4
+static enum rtw89_fw_sta_type
+rtw89_fw_get_sta_type(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta)
+{
+ struct ieee80211_sta *sta = rtwsta_to_sta_safe(rtwsta);
+ struct ieee80211_vif *vif = rtwvif_to_vif(rtwvif);
+
+ if (!sta)
+ goto by_vif;
+
+ if (sta->deflink.eht_cap.has_eht)
+ return RTW89_FW_BE_STA;
+ else if (sta->deflink.he_cap.has_he)
+ return RTW89_FW_AX_STA;
+ else
+ return RTW89_FW_N_AC_STA;
+
+by_vif:
+ if (vif->bss_conf.eht_support)
+ return RTW89_FW_BE_STA;
+ else if (vif->bss_conf.he_support)
+ return RTW89_FW_AX_STA;
+ else
+ return RTW89_FW_N_AC_STA;
+}
+
int rtw89_fw_h2c_join_info(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif,
struct rtw89_sta *rtwsta, bool dis_conn)
{
struct sk_buff *skb;
u8 mac_id = rtwsta ? rtwsta->mac_id : rtwvif->mac_id;
u8 self_role = rtwvif->self_role;
+ enum rtw89_fw_sta_type sta_type;
u8 net_type = rtwvif->net_type;
+ struct rtw89_h2c_join_v1 *h2c_v1;
+ struct rtw89_h2c_join *h2c;
+ u32 len = sizeof(*h2c);
+ bool format_v1 = false;
int ret;
+ if (rtwdev->chip->chip_gen == RTW89_CHIP_BE) {
+ len = sizeof(*h2c_v1);
+ format_v1 = true;
+ }
+
if (net_type == RTW89_NET_TYPE_AP_MODE && rtwsta) {
self_role = RTW89_SELF_ROLE_AP_CLIENT;
net_type = dis_conn ? RTW89_NET_TYPE_NO_LINK : net_type;
}
- skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, H2C_JOIN_INFO_LEN);
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
if (!skb) {
rtw89_err(rtwdev, "failed to alloc skb for h2c join\n");
return -ENOMEM;
}
- skb_put(skb, H2C_JOIN_INFO_LEN);
- SET_JOININFO_MACID(skb->data, mac_id);
- SET_JOININFO_OP(skb->data, dis_conn);
- SET_JOININFO_BAND(skb->data, rtwvif->mac_idx);
- SET_JOININFO_WMM(skb->data, rtwvif->wmm);
- SET_JOININFO_TGR(skb->data, rtwvif->trigger);
- SET_JOININFO_ISHESTA(skb->data, 0);
- SET_JOININFO_DLBW(skb->data, 0);
- SET_JOININFO_TF_MAC_PAD(skb->data, 0);
- SET_JOININFO_DL_T_PE(skb->data, 0);
- SET_JOININFO_PORT_ID(skb->data, rtwvif->port);
- SET_JOININFO_NET_TYPE(skb->data, net_type);
- SET_JOININFO_WIFI_ROLE(skb->data, rtwvif->wifi_role);
- SET_JOININFO_SELF_ROLE(skb->data, self_role);
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_join *)skb->data;
+
+ h2c->w0 = le32_encode_bits(mac_id, RTW89_H2C_JOININFO_W0_MACID) |
+ le32_encode_bits(dis_conn, RTW89_H2C_JOININFO_W0_OP) |
+ le32_encode_bits(rtwvif->mac_idx, RTW89_H2C_JOININFO_W0_BAND) |
+ le32_encode_bits(rtwvif->wmm, RTW89_H2C_JOININFO_W0_WMM) |
+ le32_encode_bits(rtwvif->trigger, RTW89_H2C_JOININFO_W0_TGR) |
+ le32_encode_bits(0, RTW89_H2C_JOININFO_W0_ISHESTA) |
+ le32_encode_bits(0, RTW89_H2C_JOININFO_W0_DLBW) |
+ le32_encode_bits(0, RTW89_H2C_JOININFO_W0_TF_MAC_PAD) |
+ le32_encode_bits(0, RTW89_H2C_JOININFO_W0_DL_T_PE) |
+ le32_encode_bits(rtwvif->port, RTW89_H2C_JOININFO_W0_PORT_ID) |
+ le32_encode_bits(net_type, RTW89_H2C_JOININFO_W0_NET_TYPE) |
+ le32_encode_bits(rtwvif->wifi_role, RTW89_H2C_JOININFO_W0_WIFI_ROLE) |
+ le32_encode_bits(self_role, RTW89_H2C_JOININFO_W0_SELF_ROLE);
+ if (!format_v1)
+ goto done;
+
+ h2c_v1 = (struct rtw89_h2c_join_v1 *)skb->data;
+
+ sta_type = rtw89_fw_get_sta_type(rtwdev, rtwvif, rtwsta);
+
+ h2c_v1->w1 = le32_encode_bits(sta_type, RTW89_H2C_JOININFO_W1_STA_TYPE);
+ h2c_v1->w2 = 0;
+
+done:
rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
H2C_CAT_MAC, H2C_CL_MAC_MEDIA_RPT,
H2C_FUNC_MAC_JOININFO, 0, 1,
- H2C_JOIN_INFO_LEN);
+ len);
ret = rtw89_h2c_tx(rtwdev, skb, false);
if (ret) {
@@ -2368,24 +3354,49 @@ fail:
int rtw89_fw_h2c_macid_pause(struct rtw89_dev *rtwdev, u8 sh, u8 grp,
bool pause)
{
- struct rtw89_fw_macid_pause_grp h2c = {{0}};
- u8 len = sizeof(struct rtw89_fw_macid_pause_grp);
+ struct rtw89_fw_macid_pause_sleep_grp *h2c_new;
+ struct rtw89_fw_macid_pause_grp *h2c;
+ __le32 set = cpu_to_le32(BIT(sh));
+ u8 h2c_macid_pause_id;
struct sk_buff *skb;
+ u32 len;
int ret;
- skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, H2C_JOIN_INFO_LEN);
+ if (RTW89_CHK_FW_FEATURE(MACID_PAUSE_SLEEP, &rtwdev->fw)) {
+ h2c_macid_pause_id = H2C_FUNC_MAC_MACID_PAUSE_SLEEP;
+ len = sizeof(*h2c_new);
+ } else {
+ h2c_macid_pause_id = H2C_FUNC_MAC_MACID_PAUSE;
+ len = sizeof(*h2c);
+ }
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
if (!skb) {
- rtw89_err(rtwdev, "failed to alloc skb for h2c join\n");
+ rtw89_err(rtwdev, "failed to alloc skb for h2c macid pause\n");
return -ENOMEM;
}
- h2c.mask_grp[grp] = cpu_to_le32(BIT(sh));
- if (pause)
- h2c.pause_grp[grp] = cpu_to_le32(BIT(sh));
- skb_put_data(skb, &h2c, len);
+ skb_put(skb, len);
+
+ if (h2c_macid_pause_id == H2C_FUNC_MAC_MACID_PAUSE_SLEEP) {
+ h2c_new = (struct rtw89_fw_macid_pause_sleep_grp *)skb->data;
+
+ h2c_new->n[0].pause_mask_grp[grp] = set;
+ h2c_new->n[0].sleep_mask_grp[grp] = set;
+ if (pause) {
+ h2c_new->n[0].pause_grp[grp] = set;
+ h2c_new->n[0].sleep_grp[grp] = set;
+ }
+ } else {
+ h2c = (struct rtw89_fw_macid_pause_grp *)skb->data;
+
+ h2c->mask_grp[grp] = set;
+ if (pause)
+ h2c->pause_grp[grp] = set;
+ }
rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
H2C_CAT_MAC, H2C_CL_MAC_FW_OFLD,
- H2C_FUNC_MAC_MACID_PAUSE, 1, 0,
+ h2c_macid_pause_id, 1, 0,
len);
ret = rtw89_h2c_tx(rtwdev, skb, false);
@@ -2516,6 +3527,8 @@ int rtw89_fw_h2c_set_bcn_fltr_cfg(struct rtw89_dev *rtwdev,
{
struct rtw89_vif *rtwvif = vif_to_rtwvif_safe(vif);
struct ieee80211_bss_conf *bss_conf = vif ? &vif->bss_conf : NULL;
+ s32 thold = RTW89_DEFAULT_CQM_THOLD;
+ u32 hyst = RTW89_DEFAULT_CQM_HYST;
struct rtw89_h2c_bcnfltr *h2c;
u32 len = sizeof(*h2c);
struct sk_buff *skb;
@@ -2536,14 +3549,19 @@ int rtw89_fw_h2c_set_bcn_fltr_cfg(struct rtw89_dev *rtwdev,
skb_put(skb, len);
h2c = (struct rtw89_h2c_bcnfltr *)skb->data;
+ if (bss_conf->cqm_rssi_hyst)
+ hyst = bss_conf->cqm_rssi_hyst;
+ if (bss_conf->cqm_rssi_thold)
+ thold = bss_conf->cqm_rssi_thold;
+
h2c->w0 = le32_encode_bits(connect, RTW89_H2C_BCNFLTR_W0_MON_RSSI) |
le32_encode_bits(connect, RTW89_H2C_BCNFLTR_W0_MON_BCN) |
le32_encode_bits(connect, RTW89_H2C_BCNFLTR_W0_MON_EN) |
le32_encode_bits(RTW89_BCN_FLTR_OFFLOAD_MODE_DEFAULT,
RTW89_H2C_BCNFLTR_W0_MODE) |
le32_encode_bits(RTW89_BCN_LOSS_CNT, RTW89_H2C_BCNFLTR_W0_BCN_LOSS_CNT) |
- le32_encode_bits(bss_conf->cqm_rssi_hyst, RTW89_H2C_BCNFLTR_W0_RSSI_HYST) |
- le32_encode_bits(bss_conf->cqm_rssi_thold + MAX_RSSI,
+ le32_encode_bits(hyst, RTW89_H2C_BCNFLTR_W0_RSSI_HYST) |
+ le32_encode_bits(thold + MAX_RSSI,
RTW89_H2C_BCNFLTR_W0_RSSI_THRESHOLD) |
le32_encode_bits(rtwvif->mac_id, RTW89_H2C_BCNFLTR_W0_MAC_ID);
@@ -2735,11 +3753,11 @@ fail:
return ret;
}
-int rtw89_fw_h2c_cxdrv_init(struct rtw89_dev *rtwdev)
+int rtw89_fw_h2c_cxdrv_init(struct rtw89_dev *rtwdev, u8 type)
{
struct rtw89_btc *btc = &rtwdev->btc;
struct rtw89_btc_dm *dm = &btc->dm;
- struct rtw89_btc_init_info *init_info = &dm->init_info;
+ struct rtw89_btc_init_info *init_info = &dm->init_info.init;
struct rtw89_btc_module *module = &init_info->module;
struct rtw89_btc_ant_info *ant = &module->ant;
struct rtw89_h2c_cxinit *h2c;
@@ -2755,7 +3773,7 @@ int rtw89_fw_h2c_cxdrv_init(struct rtw89_dev *rtwdev)
skb_put(skb, len);
h2c = (struct rtw89_h2c_cxinit *)skb->data;
- h2c->hdr.type = CXDRVINFO_INIT;
+ h2c->hdr.type = type;
h2c->hdr.len = len - H2C_LEN_CXDRVHDR;
h2c->ant_type = ant->type;
@@ -2802,12 +3820,53 @@ fail:
return ret;
}
+int rtw89_fw_h2c_cxdrv_init_v7(struct rtw89_dev *rtwdev, u8 type)
+{
+ struct rtw89_btc *btc = &rtwdev->btc;
+ struct rtw89_btc_dm *dm = &btc->dm;
+ struct rtw89_btc_init_info_v7 *init_info = &dm->init_info.init_v7;
+ struct rtw89_h2c_cxinit_v7 *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c cxdrv_init_v7\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_cxinit_v7 *)skb->data;
+
+ h2c->hdr.type = type;
+ h2c->hdr.ver = btc->ver->fcxinit;
+ h2c->hdr.len = len - H2C_LEN_CXDRVHDR_V7;
+ h2c->init = *init_info;
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_OUTSRC, BTFC_SET,
+ SET_DRV_INFO, 0, 0,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
#define PORT_DATA_OFFSET 4
#define H2C_LEN_CXDRVINFO_ROLE_DBCC_LEN 12
#define H2C_LEN_CXDRVINFO_ROLE_SIZE(max_role_num) \
(4 + 12 * (max_role_num) + H2C_LEN_CXDRVHDR)
-int rtw89_fw_h2c_cxdrv_role(struct rtw89_dev *rtwdev)
+int rtw89_fw_h2c_cxdrv_role(struct rtw89_dev *rtwdev, u8 type)
{
struct rtw89_btc *btc = &rtwdev->btc;
const struct rtw89_btc_ver *ver = btc->ver;
@@ -2832,7 +3891,7 @@ int rtw89_fw_h2c_cxdrv_role(struct rtw89_dev *rtwdev)
skb_put(skb, len);
cmd = skb->data;
- RTW89_SET_FWCMD_CXHDR_TYPE(cmd, CXDRVINFO_ROLE);
+ RTW89_SET_FWCMD_CXHDR_TYPE(cmd, type);
RTW89_SET_FWCMD_CXHDR_LEN(cmd, len - H2C_LEN_CXDRVHDR);
RTW89_SET_FWCMD_CXROLE_CONNECT_CNT(cmd, role_info->connect_cnt);
@@ -2888,7 +3947,7 @@ fail:
#define H2C_LEN_CXDRVINFO_ROLE_SIZE_V1(max_role_num) \
(4 + 16 * (max_role_num) + H2C_LEN_CXDRVINFO_ROLE_DBCC_LEN + H2C_LEN_CXDRVHDR)
-int rtw89_fw_h2c_cxdrv_role_v1(struct rtw89_dev *rtwdev)
+int rtw89_fw_h2c_cxdrv_role_v1(struct rtw89_dev *rtwdev, u8 type)
{
struct rtw89_btc *btc = &rtwdev->btc;
const struct rtw89_btc_ver *ver = btc->ver;
@@ -2912,7 +3971,7 @@ int rtw89_fw_h2c_cxdrv_role_v1(struct rtw89_dev *rtwdev)
skb_put(skb, len);
cmd = skb->data;
- RTW89_SET_FWCMD_CXHDR_TYPE(cmd, CXDRVINFO_ROLE);
+ RTW89_SET_FWCMD_CXHDR_TYPE(cmd, type);
RTW89_SET_FWCMD_CXHDR_LEN(cmd, len - H2C_LEN_CXDRVHDR);
RTW89_SET_FWCMD_CXROLE_CONNECT_CNT(cmd, role_info->connect_cnt);
@@ -2978,7 +4037,7 @@ fail:
#define H2C_LEN_CXDRVINFO_ROLE_SIZE_V2(max_role_num) \
(4 + 8 * (max_role_num) + H2C_LEN_CXDRVINFO_ROLE_DBCC_LEN + H2C_LEN_CXDRVHDR)
-int rtw89_fw_h2c_cxdrv_role_v2(struct rtw89_dev *rtwdev)
+int rtw89_fw_h2c_cxdrv_role_v2(struct rtw89_dev *rtwdev, u8 type)
{
struct rtw89_btc *btc = &rtwdev->btc;
const struct rtw89_btc_ver *ver = btc->ver;
@@ -3002,7 +4061,7 @@ int rtw89_fw_h2c_cxdrv_role_v2(struct rtw89_dev *rtwdev)
skb_put(skb, len);
cmd = skb->data;
- RTW89_SET_FWCMD_CXHDR_TYPE(cmd, CXDRVINFO_ROLE);
+ RTW89_SET_FWCMD_CXHDR_TYPE(cmd, type);
RTW89_SET_FWCMD_CXHDR_LEN(cmd, len - H2C_LEN_CXDRVHDR);
RTW89_SET_FWCMD_CXROLE_CONNECT_CNT(cmd, role_info->connect_cnt);
@@ -3062,11 +4121,11 @@ fail:
}
#define H2C_LEN_CXDRVINFO_CTRL (4 + H2C_LEN_CXDRVHDR)
-int rtw89_fw_h2c_cxdrv_ctrl(struct rtw89_dev *rtwdev)
+int rtw89_fw_h2c_cxdrv_ctrl(struct rtw89_dev *rtwdev, u8 type)
{
struct rtw89_btc *btc = &rtwdev->btc;
const struct rtw89_btc_ver *ver = btc->ver;
- struct rtw89_btc_ctrl *ctrl = &btc->ctrl;
+ struct rtw89_btc_ctrl *ctrl = &btc->ctrl.ctrl;
struct sk_buff *skb;
u8 *cmd;
int ret;
@@ -3079,7 +4138,7 @@ int rtw89_fw_h2c_cxdrv_ctrl(struct rtw89_dev *rtwdev)
skb_put(skb, H2C_LEN_CXDRVINFO_CTRL);
cmd = skb->data;
- RTW89_SET_FWCMD_CXHDR_TYPE(cmd, CXDRVINFO_CTRL);
+ RTW89_SET_FWCMD_CXHDR_TYPE(cmd, type);
RTW89_SET_FWCMD_CXHDR_LEN(cmd, H2C_LEN_CXDRVINFO_CTRL - H2C_LEN_CXDRVHDR);
RTW89_SET_FWCMD_CXCTRL_MANUAL(cmd, ctrl->manual);
@@ -3106,8 +4165,47 @@ fail:
return ret;
}
+int rtw89_fw_h2c_cxdrv_ctrl_v7(struct rtw89_dev *rtwdev, u8 type)
+{
+ struct rtw89_btc *btc = &rtwdev->btc;
+ struct rtw89_btc_ctrl_v7 *ctrl = &btc->ctrl.ctrl_v7;
+ struct rtw89_h2c_cxctrl_v7 *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c cxdrv_ctrl\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_cxctrl_v7 *)skb->data;
+
+ h2c->hdr.type = type;
+ h2c->hdr.ver = btc->ver->fcxctrl;
+ h2c->hdr.len = sizeof(*h2c) - H2C_LEN_CXDRVHDR_V7;
+ h2c->ctrl = *ctrl;
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_OUTSRC, BTFC_SET,
+ SET_DRV_INFO, 0, 0, len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
#define H2C_LEN_CXDRVINFO_TRX (28 + H2C_LEN_CXDRVHDR)
-int rtw89_fw_h2c_cxdrv_trx(struct rtw89_dev *rtwdev)
+int rtw89_fw_h2c_cxdrv_trx(struct rtw89_dev *rtwdev, u8 type)
{
struct rtw89_btc *btc = &rtwdev->btc;
struct rtw89_btc_trx_info *trx = &btc->dm.trx_info;
@@ -3123,7 +4221,7 @@ int rtw89_fw_h2c_cxdrv_trx(struct rtw89_dev *rtwdev)
skb_put(skb, H2C_LEN_CXDRVINFO_TRX);
cmd = skb->data;
- RTW89_SET_FWCMD_CXHDR_TYPE(cmd, CXDRVINFO_TRX);
+ RTW89_SET_FWCMD_CXHDR_TYPE(cmd, type);
RTW89_SET_FWCMD_CXHDR_LEN(cmd, H2C_LEN_CXDRVINFO_TRX - H2C_LEN_CXDRVHDR);
RTW89_SET_FWCMD_CXTRX_TXLV(cmd, trx->tx_lvl);
@@ -3163,7 +4261,7 @@ fail:
}
#define H2C_LEN_CXDRVINFO_RFK (4 + H2C_LEN_CXDRVHDR)
-int rtw89_fw_h2c_cxdrv_rfk(struct rtw89_dev *rtwdev)
+int rtw89_fw_h2c_cxdrv_rfk(struct rtw89_dev *rtwdev, u8 type)
{
struct rtw89_btc *btc = &rtwdev->btc;
struct rtw89_btc_wl_info *wl = &btc->cx.wl;
@@ -3180,7 +4278,7 @@ int rtw89_fw_h2c_cxdrv_rfk(struct rtw89_dev *rtwdev)
skb_put(skb, H2C_LEN_CXDRVINFO_RFK);
cmd = skb->data;
- RTW89_SET_FWCMD_CXHDR_TYPE(cmd, CXDRVINFO_RFK);
+ RTW89_SET_FWCMD_CXHDR_TYPE(cmd, type);
RTW89_SET_FWCMD_CXHDR_LEN(cmd, H2C_LEN_CXDRVINFO_RFK - H2C_LEN_CXDRVHDR);
RTW89_SET_FWCMD_CXRFK_STATE(cmd, rfk_info->state);
@@ -3296,62 +4394,163 @@ int rtw89_fw_h2c_add_pkt_offload(struct rtw89_dev *rtwdev, u8 *id,
return 0;
}
-#define H2C_LEN_SCAN_LIST_OFFLOAD 4
-int rtw89_fw_h2c_scan_list_offload(struct rtw89_dev *rtwdev, int len,
+int rtw89_fw_h2c_scan_list_offload(struct rtw89_dev *rtwdev, int ch_num,
struct list_head *chan_list)
{
struct rtw89_wait_info *wait = &rtwdev->mac.fw_ofld_wait;
+ struct rtw89_h2c_chinfo_elem *elem;
struct rtw89_mac_chinfo *ch_info;
+ struct rtw89_h2c_chinfo *h2c;
struct sk_buff *skb;
- int skb_len = H2C_LEN_SCAN_LIST_OFFLOAD + len * RTW89_MAC_CHINFO_SIZE;
unsigned int cond;
- u8 *cmd;
+ int skb_len;
int ret;
+ static_assert(sizeof(*elem) == RTW89_MAC_CHINFO_SIZE);
+
+ skb_len = struct_size(h2c, elem, ch_num);
skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, skb_len);
if (!skb) {
rtw89_err(rtwdev, "failed to alloc skb for h2c scan list\n");
return -ENOMEM;
}
- skb_put(skb, H2C_LEN_SCAN_LIST_OFFLOAD);
- cmd = skb->data;
+ skb_put(skb, sizeof(*h2c));
+ h2c = (struct rtw89_h2c_chinfo *)skb->data;
+
+ h2c->ch_num = ch_num;
+ h2c->elem_size = sizeof(*elem) / 4; /* in unit of 4 bytes */
+
+ list_for_each_entry(ch_info, chan_list, list) {
+ elem = (struct rtw89_h2c_chinfo_elem *)skb_put(skb, sizeof(*elem));
+
+ elem->w0 = le32_encode_bits(ch_info->period, RTW89_H2C_CHINFO_W0_PERIOD) |
+ le32_encode_bits(ch_info->dwell_time, RTW89_H2C_CHINFO_W0_DWELL) |
+ le32_encode_bits(ch_info->central_ch, RTW89_H2C_CHINFO_W0_CENTER_CH) |
+ le32_encode_bits(ch_info->pri_ch, RTW89_H2C_CHINFO_W0_PRI_CH);
+
+ elem->w1 = le32_encode_bits(ch_info->bw, RTW89_H2C_CHINFO_W1_BW) |
+ le32_encode_bits(ch_info->notify_action, RTW89_H2C_CHINFO_W1_ACTION) |
+ le32_encode_bits(ch_info->num_pkt, RTW89_H2C_CHINFO_W1_NUM_PKT) |
+ le32_encode_bits(ch_info->tx_pkt, RTW89_H2C_CHINFO_W1_TX) |
+ le32_encode_bits(ch_info->pause_data, RTW89_H2C_CHINFO_W1_PAUSE_DATA) |
+ le32_encode_bits(ch_info->ch_band, RTW89_H2C_CHINFO_W1_BAND) |
+ le32_encode_bits(ch_info->probe_id, RTW89_H2C_CHINFO_W1_PKT_ID) |
+ le32_encode_bits(ch_info->dfs_ch, RTW89_H2C_CHINFO_W1_DFS) |
+ le32_encode_bits(ch_info->tx_null, RTW89_H2C_CHINFO_W1_TX_NULL) |
+ le32_encode_bits(ch_info->rand_seq_num, RTW89_H2C_CHINFO_W1_RANDOM);
+
+ elem->w2 = le32_encode_bits(ch_info->pkt_id[0], RTW89_H2C_CHINFO_W2_PKT0) |
+ le32_encode_bits(ch_info->pkt_id[1], RTW89_H2C_CHINFO_W2_PKT1) |
+ le32_encode_bits(ch_info->pkt_id[2], RTW89_H2C_CHINFO_W2_PKT2) |
+ le32_encode_bits(ch_info->pkt_id[3], RTW89_H2C_CHINFO_W2_PKT3);
+
+ elem->w3 = le32_encode_bits(ch_info->pkt_id[4], RTW89_H2C_CHINFO_W3_PKT4) |
+ le32_encode_bits(ch_info->pkt_id[5], RTW89_H2C_CHINFO_W3_PKT5) |
+ le32_encode_bits(ch_info->pkt_id[6], RTW89_H2C_CHINFO_W3_PKT6) |
+ le32_encode_bits(ch_info->pkt_id[7], RTW89_H2C_CHINFO_W3_PKT7);
+ }
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC, H2C_CL_MAC_FW_OFLD,
+ H2C_FUNC_ADD_SCANOFLD_CH, 1, 1, skb_len);
+
+ cond = RTW89_SCANOFLD_WAIT_COND_ADD_CH;
+
+ ret = rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
+ if (ret) {
+ rtw89_debug(rtwdev, RTW89_DBG_FW, "failed to add scan ofld ch\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+int rtw89_fw_h2c_scan_list_offload_be(struct rtw89_dev *rtwdev, int ch_num,
+ struct list_head *chan_list)
+{
+ struct rtw89_wait_info *wait = &rtwdev->mac.fw_ofld_wait;
+ struct rtw89_h2c_chinfo_elem_be *elem;
+ struct rtw89_mac_chinfo_be *ch_info;
+ struct rtw89_h2c_chinfo *h2c;
+ struct sk_buff *skb;
+ unsigned int cond;
+ int skb_len;
+ int ret;
+
+ static_assert(sizeof(*elem) == RTW89_MAC_CHINFO_SIZE);
+
+ skb_len = struct_size(h2c, elem, ch_num);
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, skb_len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c scan list\n");
+ return -ENOMEM;
+ }
+
+ skb_put(skb, sizeof(*h2c));
+ h2c = (struct rtw89_h2c_chinfo *)skb->data;
- RTW89_SET_FWCMD_SCANOFLD_CH_NUM(cmd, len);
- /* in unit of 4 bytes */
- RTW89_SET_FWCMD_SCANOFLD_CH_SIZE(cmd, RTW89_MAC_CHINFO_SIZE / 4);
+ h2c->ch_num = ch_num;
+ h2c->elem_size = sizeof(*elem) / 4; /* in unit of 4 bytes */
+ h2c->arg = u8_encode_bits(RTW89_PHY_0, RTW89_H2C_CHINFO_ARG_MAC_IDX_MASK);
list_for_each_entry(ch_info, chan_list, list) {
- cmd = skb_put(skb, RTW89_MAC_CHINFO_SIZE);
-
- RTW89_SET_FWCMD_CHINFO_PERIOD(cmd, ch_info->period);
- RTW89_SET_FWCMD_CHINFO_DWELL(cmd, ch_info->dwell_time);
- RTW89_SET_FWCMD_CHINFO_CENTER_CH(cmd, ch_info->central_ch);
- RTW89_SET_FWCMD_CHINFO_PRI_CH(cmd, ch_info->pri_ch);
- RTW89_SET_FWCMD_CHINFO_BW(cmd, ch_info->bw);
- RTW89_SET_FWCMD_CHINFO_ACTION(cmd, ch_info->notify_action);
- RTW89_SET_FWCMD_CHINFO_NUM_PKT(cmd, ch_info->num_pkt);
- RTW89_SET_FWCMD_CHINFO_TX(cmd, ch_info->tx_pkt);
- RTW89_SET_FWCMD_CHINFO_PAUSE_DATA(cmd, ch_info->pause_data);
- RTW89_SET_FWCMD_CHINFO_BAND(cmd, ch_info->ch_band);
- RTW89_SET_FWCMD_CHINFO_PKT_ID(cmd, ch_info->probe_id);
- RTW89_SET_FWCMD_CHINFO_DFS(cmd, ch_info->dfs_ch);
- RTW89_SET_FWCMD_CHINFO_TX_NULL(cmd, ch_info->tx_null);
- RTW89_SET_FWCMD_CHINFO_RANDOM(cmd, ch_info->rand_seq_num);
- RTW89_SET_FWCMD_CHINFO_PKT0(cmd, ch_info->pkt_id[0]);
- RTW89_SET_FWCMD_CHINFO_PKT1(cmd, ch_info->pkt_id[1]);
- RTW89_SET_FWCMD_CHINFO_PKT2(cmd, ch_info->pkt_id[2]);
- RTW89_SET_FWCMD_CHINFO_PKT3(cmd, ch_info->pkt_id[3]);
- RTW89_SET_FWCMD_CHINFO_PKT4(cmd, ch_info->pkt_id[4]);
- RTW89_SET_FWCMD_CHINFO_PKT5(cmd, ch_info->pkt_id[5]);
- RTW89_SET_FWCMD_CHINFO_PKT6(cmd, ch_info->pkt_id[6]);
- RTW89_SET_FWCMD_CHINFO_PKT7(cmd, ch_info->pkt_id[7]);
+ elem = (struct rtw89_h2c_chinfo_elem_be *)skb_put(skb, sizeof(*elem));
+
+ elem->w0 = le32_encode_bits(ch_info->period, RTW89_H2C_CHINFO_BE_W0_PERIOD) |
+ le32_encode_bits(ch_info->dwell_time, RTW89_H2C_CHINFO_BE_W0_DWELL) |
+ le32_encode_bits(ch_info->central_ch,
+ RTW89_H2C_CHINFO_BE_W0_CENTER_CH) |
+ le32_encode_bits(ch_info->pri_ch, RTW89_H2C_CHINFO_BE_W0_PRI_CH);
+
+ elem->w1 = le32_encode_bits(ch_info->bw, RTW89_H2C_CHINFO_BE_W1_BW) |
+ le32_encode_bits(ch_info->ch_band, RTW89_H2C_CHINFO_BE_W1_CH_BAND) |
+ le32_encode_bits(ch_info->dfs_ch, RTW89_H2C_CHINFO_BE_W1_DFS) |
+ le32_encode_bits(ch_info->pause_data,
+ RTW89_H2C_CHINFO_BE_W1_PAUSE_DATA) |
+ le32_encode_bits(ch_info->tx_null, RTW89_H2C_CHINFO_BE_W1_TX_NULL) |
+ le32_encode_bits(ch_info->rand_seq_num,
+ RTW89_H2C_CHINFO_BE_W1_RANDOM) |
+ le32_encode_bits(ch_info->notify_action,
+ RTW89_H2C_CHINFO_BE_W1_NOTIFY) |
+ le32_encode_bits(ch_info->probe_id != 0xff ? 1 : 0,
+ RTW89_H2C_CHINFO_BE_W1_PROBE) |
+ le32_encode_bits(ch_info->leave_crit,
+ RTW89_H2C_CHINFO_BE_W1_EARLY_LEAVE_CRIT) |
+ le32_encode_bits(ch_info->chkpt_timer,
+ RTW89_H2C_CHINFO_BE_W1_CHKPT_TIMER);
+
+ elem->w2 = le32_encode_bits(ch_info->leave_time,
+ RTW89_H2C_CHINFO_BE_W2_EARLY_LEAVE_TIME) |
+ le32_encode_bits(ch_info->leave_th,
+ RTW89_H2C_CHINFO_BE_W2_EARLY_LEAVE_TH) |
+ le32_encode_bits(ch_info->tx_pkt_ctrl,
+ RTW89_H2C_CHINFO_BE_W2_TX_PKT_CTRL);
+
+ elem->w3 = le32_encode_bits(ch_info->pkt_id[0], RTW89_H2C_CHINFO_BE_W3_PKT0) |
+ le32_encode_bits(ch_info->pkt_id[1], RTW89_H2C_CHINFO_BE_W3_PKT1) |
+ le32_encode_bits(ch_info->pkt_id[2], RTW89_H2C_CHINFO_BE_W3_PKT2) |
+ le32_encode_bits(ch_info->pkt_id[3], RTW89_H2C_CHINFO_BE_W3_PKT3);
+
+ elem->w4 = le32_encode_bits(ch_info->pkt_id[4], RTW89_H2C_CHINFO_BE_W4_PKT4) |
+ le32_encode_bits(ch_info->pkt_id[5], RTW89_H2C_CHINFO_BE_W4_PKT5) |
+ le32_encode_bits(ch_info->pkt_id[6], RTW89_H2C_CHINFO_BE_W4_PKT6) |
+ le32_encode_bits(ch_info->pkt_id[7], RTW89_H2C_CHINFO_BE_W4_PKT7);
+
+ elem->w5 = le32_encode_bits(ch_info->sw_def, RTW89_H2C_CHINFO_BE_W5_SW_DEF) |
+ le32_encode_bits(ch_info->fw_probe0_ssids,
+ RTW89_H2C_CHINFO_BE_W5_FW_PROBE0_SSIDS);
+
+ elem->w6 = le32_encode_bits(ch_info->fw_probe0_shortssids,
+ RTW89_H2C_CHINFO_BE_W6_FW_PROBE0_SHORTSSIDS) |
+ le32_encode_bits(ch_info->fw_probe0_bssids,
+ RTW89_H2C_CHINFO_BE_W6_FW_PROBE0_BSSIDS);
}
rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
H2C_CAT_MAC, H2C_CL_MAC_FW_OFLD,
H2C_FUNC_ADD_SCANOFLD_CH, 1, 1, skb_len);
- cond = RTW89_FW_OFLD_WAIT_COND(0, H2C_FUNC_ADD_SCANOFLD_CH);
+ cond = RTW89_SCANOFLD_WAIT_COND_ADD_CH;
ret = rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
if (ret) {
@@ -3410,7 +4609,10 @@ int rtw89_fw_h2c_scan_offload(struct rtw89_dev *rtwdev,
H2C_FUNC_SCANOFLD, 1, 1,
len);
- cond = RTW89_FW_OFLD_WAIT_COND(0, H2C_FUNC_SCANOFLD);
+ if (option->enable)
+ cond = RTW89_SCANOFLD_WAIT_COND_START;
+ else
+ cond = RTW89_SCANOFLD_WAIT_COND_STOP;
ret = rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
if (ret) {
@@ -3421,6 +4623,169 @@ int rtw89_fw_h2c_scan_offload(struct rtw89_dev *rtwdev,
return 0;
}
+static void rtw89_scan_get_6g_disabled_chan(struct rtw89_dev *rtwdev,
+ struct rtw89_scan_option *option)
+{
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_channel *chan;
+ u8 i, idx;
+
+ sband = rtwdev->hw->wiphy->bands[NL80211_BAND_6GHZ];
+
+ for (i = 0; i < sband->n_channels; i++) {
+ chan = &sband->channels[i];
+ if (chan->flags & IEEE80211_CHAN_DISABLED) {
+ idx = (chan->hw_value - 1) / 4;
+ option->prohib_chan |= BIT(idx);
+ }
+ }
+}
+
+int rtw89_fw_h2c_scan_offload_be(struct rtw89_dev *rtwdev,
+ struct rtw89_scan_option *option,
+ struct rtw89_vif *rtwvif)
+{
+ struct rtw89_hw_scan_info *scan_info = &rtwdev->scan_info;
+ struct rtw89_wait_info *wait = &rtwdev->mac.fw_ofld_wait;
+ struct rtw89_h2c_scanofld_be_macc_role *macc_role;
+ struct rtw89_chan *op = &scan_info->op_chan;
+ struct rtw89_h2c_scanofld_be_opch *opch;
+ struct rtw89_h2c_scanofld_be *h2c;
+ struct sk_buff *skb;
+ u8 macc_role_size = sizeof(*macc_role) * option->num_macc_role;
+ u8 opch_size = sizeof(*opch) * option->num_opch;
+ u8 probe_id[NUM_NL80211_BANDS];
+ unsigned int cond;
+ void *ptr;
+ int ret;
+ u32 len;
+ u8 i;
+
+ rtw89_scan_get_6g_disabled_chan(rtwdev, option);
+
+ len = sizeof(*h2c) + macc_role_size + opch_size;
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c scan offload\n");
+ return -ENOMEM;
+ }
+
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_scanofld_be *)skb->data;
+ ptr = skb->data;
+
+ h2c->w0 = le32_encode_bits(option->operation, RTW89_H2C_SCANOFLD_BE_W0_OP) |
+ le32_encode_bits(option->scan_mode,
+ RTW89_H2C_SCANOFLD_BE_W0_SCAN_MODE) |
+ le32_encode_bits(option->repeat, RTW89_H2C_SCANOFLD_BE_W0_REPEAT) |
+ le32_encode_bits(true, RTW89_H2C_SCANOFLD_BE_W0_NOTIFY_END) |
+ le32_encode_bits(true, RTW89_H2C_SCANOFLD_BE_W0_LEARN_CH) |
+ le32_encode_bits(rtwvif->mac_id, RTW89_H2C_SCANOFLD_BE_W0_MACID) |
+ le32_encode_bits(rtwvif->port, RTW89_H2C_SCANOFLD_BE_W0_PORT) |
+ le32_encode_bits(option->band, RTW89_H2C_SCANOFLD_BE_W0_BAND);
+
+ h2c->w1 = le32_encode_bits(option->num_macc_role, RTW89_H2C_SCANOFLD_BE_W1_NUM_MACC_ROLE) |
+ le32_encode_bits(option->num_opch, RTW89_H2C_SCANOFLD_BE_W1_NUM_OP) |
+ le32_encode_bits(option->norm_pd, RTW89_H2C_SCANOFLD_BE_W1_NORM_PD);
+
+ h2c->w2 = le32_encode_bits(option->slow_pd, RTW89_H2C_SCANOFLD_BE_W2_SLOW_PD) |
+ le32_encode_bits(option->norm_cy, RTW89_H2C_SCANOFLD_BE_W2_NORM_CY) |
+ le32_encode_bits(option->opch_end, RTW89_H2C_SCANOFLD_BE_W2_OPCH_END);
+
+ h2c->w3 = le32_encode_bits(0, RTW89_H2C_SCANOFLD_BE_W3_NUM_SSID) |
+ le32_encode_bits(0, RTW89_H2C_SCANOFLD_BE_W3_NUM_SHORT_SSID) |
+ le32_encode_bits(0, RTW89_H2C_SCANOFLD_BE_W3_NUM_BSSID) |
+ le32_encode_bits(probe_id[NL80211_BAND_2GHZ], RTW89_H2C_SCANOFLD_BE_W3_PROBEID);
+
+ h2c->w4 = le32_encode_bits(probe_id[NL80211_BAND_5GHZ],
+ RTW89_H2C_SCANOFLD_BE_W4_PROBE_5G) |
+ le32_encode_bits(probe_id[NL80211_BAND_6GHZ],
+ RTW89_H2C_SCANOFLD_BE_W4_PROBE_6G) |
+ le32_encode_bits(0, RTW89_H2C_SCANOFLD_BE_W4_DELAY_START);
+
+ h2c->w5 = le32_encode_bits(option->mlo_mode, RTW89_H2C_SCANOFLD_BE_W5_MLO_MODE);
+
+ h2c->w6 = le32_encode_bits(option->prohib_chan,
+ RTW89_H2C_SCANOFLD_BE_W6_CHAN_PROHIB_LOW);
+ h2c->w7 = le32_encode_bits(option->prohib_chan >> 32,
+ RTW89_H2C_SCANOFLD_BE_W7_CHAN_PROHIB_HIGH);
+ ptr += sizeof(*h2c);
+
+ for (i = 0; i < option->num_macc_role; i++) {
+ macc_role = (struct rtw89_h2c_scanofld_be_macc_role *)&h2c->role[i];
+ macc_role->w0 =
+ le32_encode_bits(0, RTW89_H2C_SCANOFLD_BE_MACC_ROLE_W0_BAND) |
+ le32_encode_bits(0, RTW89_H2C_SCANOFLD_BE_MACC_ROLE_W0_PORT) |
+ le32_encode_bits(0, RTW89_H2C_SCANOFLD_BE_MACC_ROLE_W0_MACID) |
+ le32_encode_bits(0, RTW89_H2C_SCANOFLD_BE_MACC_ROLE_W0_OPCH_END);
+ ptr += sizeof(*macc_role);
+ }
+
+ for (i = 0; i < option->num_opch; i++) {
+ opch = ptr;
+ opch->w0 = le32_encode_bits(rtwvif->mac_id,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W0_MACID) |
+ le32_encode_bits(option->band,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W0_BAND) |
+ le32_encode_bits(rtwvif->port,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W0_PORT) |
+ le32_encode_bits(RTW89_SCAN_OPMODE_INTV,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W0_POLICY) |
+ le32_encode_bits(true,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W0_TXNULL) |
+ le32_encode_bits(RTW89_OFF_CHAN_TIME / 10,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W0_POLICY_VAL);
+
+ opch->w1 = le32_encode_bits(RTW89_CHANNEL_TIME,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W1_DURATION) |
+ le32_encode_bits(op->band_type,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W1_CH_BAND) |
+ le32_encode_bits(op->band_width,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W1_BW) |
+ le32_encode_bits(0x3,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W1_NOTIFY) |
+ le32_encode_bits(op->primary_channel,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W1_PRI_CH) |
+ le32_encode_bits(op->channel,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W1_CENTRAL_CH);
+
+ opch->w2 = le32_encode_bits(0,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W2_PKTS_CTRL) |
+ le32_encode_bits(0,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W2_SW_DEF) |
+ le32_encode_bits(2,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W2_SS);
+
+ opch->w3 = le32_encode_bits(RTW89_SCANOFLD_PKT_NONE,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT0) |
+ le32_encode_bits(RTW89_SCANOFLD_PKT_NONE,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT1) |
+ le32_encode_bits(RTW89_SCANOFLD_PKT_NONE,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT2) |
+ le32_encode_bits(RTW89_SCANOFLD_PKT_NONE,
+ RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT3);
+ ptr += sizeof(*opch);
+ }
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC, H2C_CL_MAC_FW_OFLD,
+ H2C_FUNC_SCANOFLD_BE, 1, 1,
+ len);
+
+ if (option->enable)
+ cond = RTW89_SCANOFLD_BE_WAIT_COND_START;
+ else
+ cond = RTW89_SCANOFLD_BE_WAIT_COND_STOP;
+
+ ret = rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
+ if (ret) {
+ rtw89_debug(rtwdev, RTW89_DBG_FW, "failed to scan be ofld\n");
+ return ret;
+ }
+
+ return 0;
+}
+
int rtw89_fw_h2c_rf_reg(struct rtw89_dev *rtwdev,
struct rtw89_fw_h2c_rf_reg_info *info,
u16 len, u8 page)
@@ -3497,6 +4862,328 @@ fail:
}
EXPORT_SYMBOL(rtw89_fw_h2c_rf_ntfy_mcc);
+int rtw89_fw_h2c_rf_pre_ntfy(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx)
+{
+ struct rtw89_rfk_mcc_info *rfk_mcc = &rtwdev->rfk_mcc;
+ struct rtw89_fw_h2c_rfk_pre_info *h2c;
+ u8 tbl_sel = rfk_mcc->table_idx;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ u8 tbl, path;
+ u32 val32;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c rfk_pre_ntfy\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_fw_h2c_rfk_pre_info *)skb->data;
+
+ h2c->mlo_mode = cpu_to_le32(rtwdev->mlo_dbcc_mode);
+
+ BUILD_BUG_ON(NUM_OF_RTW89_FW_RFK_TBL > RTW89_RFK_CHS_NR);
+
+ for (tbl = 0; tbl < NUM_OF_RTW89_FW_RFK_TBL; tbl++) {
+ for (path = 0; path < NUM_OF_RTW89_FW_RFK_PATH; path++) {
+ h2c->dbcc.ch[path][tbl] = cpu_to_le32(rfk_mcc->ch[tbl]);
+ h2c->dbcc.band[path][tbl] = cpu_to_le32(rfk_mcc->band[tbl]);
+ }
+ }
+
+ for (path = 0; path < NUM_OF_RTW89_FW_RFK_PATH; path++) {
+ h2c->tbl.cur_ch[path] = cpu_to_le32(rfk_mcc->ch[tbl_sel]);
+ h2c->tbl.cur_band[path] = cpu_to_le32(rfk_mcc->band[tbl_sel]);
+ }
+
+ h2c->phy_idx = cpu_to_le32(phy_idx);
+ h2c->cur_band = cpu_to_le32(rfk_mcc->band[tbl_sel]);
+ h2c->cur_bw = cpu_to_le32(rfk_mcc->bw[tbl_sel]);
+ h2c->cur_center_ch = cpu_to_le32(rfk_mcc->ch[tbl_sel]);
+
+ val32 = rtw89_phy_read32_mask(rtwdev, R_COEF_SEL, B_COEF_SEL_IQC_V1);
+ h2c->ktbl_sel0 = cpu_to_le32(val32);
+ val32 = rtw89_phy_read32_mask(rtwdev, R_COEF_SEL_C1, B_COEF_SEL_IQC_V1);
+ h2c->ktbl_sel1 = cpu_to_le32(val32);
+ val32 = rtw89_read_rf(rtwdev, RF_PATH_A, RR_CFGCH, RFREG_MASK);
+ h2c->rfmod0 = cpu_to_le32(val32);
+ val32 = rtw89_read_rf(rtwdev, RF_PATH_B, RR_CFGCH, RFREG_MASK);
+ h2c->rfmod1 = cpu_to_le32(val32);
+
+ if (rtw89_is_mlo_1_1(rtwdev))
+ h2c->mlo_1_1 = cpu_to_le32(1);
+
+ h2c->rfe_type = cpu_to_le32(rtwdev->efuse.rfe_type);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_OUTSRC, H2C_CL_OUTSRC_RF_FW_RFK,
+ H2C_FUNC_RFK_PRE_NOTIFY, 0, 0,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
+int rtw89_fw_h2c_rf_tssi(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+ enum rtw89_tssi_mode tssi_mode)
+{
+ const struct rtw89_chan *chan = rtw89_chan_get(rtwdev,
+ RTW89_SUB_ENTITY_0);
+ struct rtw89_hal *hal = &rtwdev->hal;
+ struct rtw89_h2c_rf_tssi *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c RF TSSI\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_rf_tssi *)skb->data;
+
+ h2c->len = cpu_to_le16(len);
+ h2c->phy = phy_idx;
+ h2c->ch = chan->channel;
+ h2c->bw = chan->band_width;
+ h2c->band = chan->band_type;
+ h2c->hwtx_en = true;
+ h2c->cv = hal->cv;
+ h2c->tssi_mode = tssi_mode;
+
+ rtw89_phy_rfk_tssi_fill_fwcmd_efuse_to_de(rtwdev, phy_idx, chan, h2c);
+ rtw89_phy_rfk_tssi_fill_fwcmd_tmeter_tbl(rtwdev, phy_idx, chan, h2c);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_OUTSRC, H2C_CL_OUTSRC_RF_FW_RFK,
+ H2C_FUNC_RFK_TSSI_OFFLOAD, 0, 0, len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
+int rtw89_fw_h2c_rf_iqk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+ struct rtw89_h2c_rf_iqk *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c RF IQK\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_rf_iqk *)skb->data;
+
+ h2c->phy_idx = cpu_to_le32(phy_idx);
+ h2c->dbcc = cpu_to_le32(rtwdev->dbcc_en);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_OUTSRC, H2C_CL_OUTSRC_RF_FW_RFK,
+ H2C_FUNC_RFK_IQK_OFFLOAD, 0, 0, len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
+int rtw89_fw_h2c_rf_dpk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+ const struct rtw89_chan *chan = rtw89_chan_get(rtwdev,
+ RTW89_SUB_ENTITY_0);
+ struct rtw89_h2c_rf_dpk *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c RF DPK\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_rf_dpk *)skb->data;
+
+ h2c->len = len;
+ h2c->phy = phy_idx;
+ h2c->dpk_enable = true;
+ h2c->kpath = RF_AB;
+ h2c->cur_band = chan->band_type;
+ h2c->cur_bw = chan->band_width;
+ h2c->cur_ch = chan->channel;
+ h2c->dpk_dbg_en = rtw89_debug_is_enabled(rtwdev, RTW89_DBG_RFK);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_OUTSRC, H2C_CL_OUTSRC_RF_FW_RFK,
+ H2C_FUNC_RFK_DPK_OFFLOAD, 0, 0, len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
+int rtw89_fw_h2c_rf_txgapk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+ const struct rtw89_chan *chan = rtw89_chan_get(rtwdev,
+ RTW89_SUB_ENTITY_0);
+ struct rtw89_hal *hal = &rtwdev->hal;
+ struct rtw89_h2c_rf_txgapk *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c RF TXGAPK\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_rf_txgapk *)skb->data;
+
+ h2c->len = len;
+ h2c->ktype = 2;
+ h2c->phy = phy_idx;
+ h2c->kpath = RF_AB;
+ h2c->band = chan->band_type;
+ h2c->bw = chan->band_width;
+ h2c->ch = chan->channel;
+ h2c->cv = hal->cv;
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_OUTSRC, H2C_CL_OUTSRC_RF_FW_RFK,
+ H2C_FUNC_RFK_TXGAPK_OFFLOAD, 0, 0, len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
+int rtw89_fw_h2c_rf_dack(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+ struct rtw89_h2c_rf_dack *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c RF DACK\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_rf_dack *)skb->data;
+
+ h2c->len = cpu_to_le32(len);
+ h2c->phy = cpu_to_le32(phy_idx);
+ h2c->type = cpu_to_le32(0);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_OUTSRC, H2C_CL_OUTSRC_RF_FW_RFK,
+ H2C_FUNC_RFK_DACK_OFFLOAD, 0, 0, len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
+int rtw89_fw_h2c_rf_rxdck(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+ const struct rtw89_chan *chan = rtw89_chan_get(rtwdev,
+ RTW89_SUB_ENTITY_0);
+ struct rtw89_h2c_rf_rxdck *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for h2c RF RXDCK\n");
+ return -ENOMEM;
+ }
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_rf_rxdck *)skb->data;
+
+ h2c->len = len;
+ h2c->phy = phy_idx;
+ h2c->is_afe = false;
+ h2c->kpath = RF_AB;
+ h2c->cur_band = chan->band_type;
+ h2c->cur_bw = chan->band_width;
+ h2c->cur_ch = chan->channel;
+ h2c->rxdck_dbg_en = rtw89_debug_is_enabled(rtwdev, RTW89_DBG_RFK);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_OUTSRC, H2C_CL_OUTSRC_RF_FW_RFK,
+ H2C_FUNC_RFK_RXDCK_OFFLOAD, 0, 0, len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ goto fail;
+ }
+
+ return 0;
+fail:
+ dev_kfree_skb_any(skb);
+
+ return ret;
+}
+
int rtw89_fw_h2c_raw_with_hdr(struct rtw89_dev *rtwdev,
u8 h2c_class, u8 h2c_func, u8 *buf, u16 len,
bool rack, bool dack)
@@ -3600,7 +5287,7 @@ static bool rtw89_fw_c2h_chk_atomic(struct rtw89_dev *rtwdev,
default:
return false;
case RTW89_C2H_CAT_MAC:
- return rtw89_mac_c2h_chk_atomic(rtwdev, class, func);
+ return rtw89_mac_c2h_chk_atomic(rtwdev, c2h, class, func);
case RTW89_C2H_CAT_OUTSRC:
return rtw89_phy_c2h_chk_atomic(rtwdev, class, func);
}
@@ -4050,8 +5737,66 @@ static void rtw89_hw_scan_add_chan(struct rtw89_dev *rtwdev, int chan_type,
}
}
-static int rtw89_hw_scan_add_chan_list(struct rtw89_dev *rtwdev,
- struct rtw89_vif *rtwvif, bool connected)
+static void rtw89_hw_scan_add_chan_be(struct rtw89_dev *rtwdev, int chan_type,
+ int ssid_num,
+ struct rtw89_mac_chinfo_be *ch_info)
+{
+ struct rtw89_hw_scan_info *scan_info = &rtwdev->scan_info;
+ struct ieee80211_vif *vif = rtwdev->scan_info.scanning_vif;
+ struct rtw89_vif *rtwvif = (struct rtw89_vif *)vif->drv_priv;
+ struct cfg80211_scan_request *req = rtwvif->scan_req;
+ struct rtw89_pktofld_info *info;
+ u8 band, probe_count = 0, i;
+
+ ch_info->notify_action = RTW89_SCANOFLD_DEBUG_MASK;
+ ch_info->dfs_ch = chan_type == RTW89_CHAN_DFS;
+ ch_info->bw = RTW89_SCAN_WIDTH;
+ ch_info->tx_null = false;
+ ch_info->pause_data = false;
+ ch_info->probe_id = RTW89_SCANOFLD_PKT_NONE;
+
+ if (ssid_num) {
+ band = rtw89_hw_to_nl80211_band(ch_info->ch_band);
+
+ list_for_each_entry(info, &scan_info->pkt_list[band], list) {
+ if (info->channel_6ghz &&
+ ch_info->pri_ch != info->channel_6ghz)
+ continue;
+ ch_info->pkt_id[probe_count++] = info->id;
+ if (probe_count >= RTW89_SCANOFLD_MAX_SSID)
+ break;
+ }
+ }
+
+ if (ch_info->ch_band == RTW89_BAND_6G) {
+ if ((ssid_num == 1 && req->ssids[0].ssid_len == 0) ||
+ !ch_info->is_psc) {
+ ch_info->probe_id = RTW89_SCANOFLD_PKT_NONE;
+ if (!req->duration_mandatory)
+ ch_info->period -= RTW89_DWELL_TIME_6G;
+ }
+ }
+
+ for (i = probe_count; i < RTW89_SCANOFLD_MAX_SSID; i++)
+ ch_info->pkt_id[i] = RTW89_SCANOFLD_PKT_NONE;
+
+ switch (chan_type) {
+ case RTW89_CHAN_DFS:
+ if (ch_info->ch_band != RTW89_BAND_6G)
+ ch_info->period =
+ max_t(u8, ch_info->period, RTW89_DFS_CHAN_TIME);
+ ch_info->dwell_time = RTW89_DWELL_TIME;
+ break;
+ case RTW89_CHAN_ACTIVE:
+ break;
+ default:
+ rtw89_warn(rtwdev, "Channel type out of bound\n");
+ break;
+ }
+}
+
+int rtw89_hw_scan_add_chan_list(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif, bool connected)
{
struct cfg80211_scan_request *req = rtwvif->scan_req;
struct rtw89_mac_chinfo *ch_info, *tmp;
@@ -4074,7 +5819,7 @@ static int rtw89_hw_scan_add_chan_list(struct rtw89_dev *rtwdev,
goto out;
}
- if (req->duration_mandatory)
+ if (req->duration)
ch_info->period = req->duration;
else if (channel->band == NL80211_BAND_6GHZ)
ch_info->period = RTW89_CHANNEL_TIME_6G +
@@ -4127,9 +5872,69 @@ out:
return ret;
}
+int rtw89_hw_scan_add_chan_list_be(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif, bool connected)
+{
+ struct cfg80211_scan_request *req = rtwvif->scan_req;
+ struct rtw89_mac_chinfo_be *ch_info, *tmp;
+ struct ieee80211_channel *channel;
+ struct list_head chan_list;
+ enum rtw89_chan_type type;
+ int list_len, ret;
+ bool random_seq;
+ u32 idx;
+
+ random_seq = !!(req->flags & NL80211_SCAN_FLAG_RANDOM_SN);
+ INIT_LIST_HEAD(&chan_list);
+
+ for (idx = rtwdev->scan_info.last_chan_idx, list_len = 0;
+ idx < req->n_channels && list_len < RTW89_SCAN_LIST_LIMIT;
+ idx++, list_len++) {
+ channel = req->channels[idx];
+ ch_info = kzalloc(sizeof(*ch_info), GFP_KERNEL);
+ if (!ch_info) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (req->duration)
+ ch_info->period = req->duration;
+ else if (channel->band == NL80211_BAND_6GHZ)
+ ch_info->period = RTW89_CHANNEL_TIME_6G + RTW89_DWELL_TIME_6G;
+ else
+ ch_info->period = RTW89_CHANNEL_TIME;
+
+ ch_info->ch_band = rtw89_nl80211_to_hw_band(channel->band);
+ ch_info->central_ch = channel->hw_value;
+ ch_info->pri_ch = channel->hw_value;
+ ch_info->rand_seq_num = random_seq;
+ ch_info->is_psc = cfg80211_channel_is_psc(channel);
+
+ if (channel->flags & (IEEE80211_CHAN_RADAR | IEEE80211_CHAN_NO_IR))
+ type = RTW89_CHAN_DFS;
+ else
+ type = RTW89_CHAN_ACTIVE;
+ rtw89_hw_scan_add_chan_be(rtwdev, type, req->n_ssids, ch_info);
+
+ list_add_tail(&ch_info->list, &chan_list);
+ }
+
+ rtwdev->scan_info.last_chan_idx = idx;
+ ret = rtw89_fw_h2c_scan_list_offload_be(rtwdev, list_len, &chan_list);
+
+out:
+ list_for_each_entry_safe(ch_info, tmp, &chan_list, list) {
+ list_del(&ch_info->list);
+ kfree(ch_info);
+ }
+
+ return ret;
+}
+
static int rtw89_hw_scan_prehandle(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif, bool connected)
{
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
int ret;
ret = rtw89_hw_scan_update_probe_req(rtwdev, rtwvif);
@@ -4137,7 +5942,7 @@ static int rtw89_hw_scan_prehandle(struct rtw89_dev *rtwdev,
rtw89_err(rtwdev, "Update probe request failed\n");
goto out;
}
- ret = rtw89_hw_scan_add_chan_list(rtwdev, rtwvif, connected);
+ ret = mac->add_chan_list(rtwdev, rtwvif, connected);
out:
return ret;
}
@@ -4154,9 +5959,11 @@ void rtw89_hw_scan_start(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
rtw89_get_channel(rtwdev, rtwvif, &rtwdev->scan_info.op_chan);
rtwdev->scan_info.scanning_vif = vif;
rtwdev->scan_info.last_chan_idx = 0;
+ rtwdev->scan_info.abort = false;
rtwvif->scan_ies = &scan_req->ies;
rtwvif->scan_req = req;
ieee80211_stop_queues(rtwdev->hw);
+ rtw89_mac_port_cfg_rx_sync(rtwdev, rtwvif, false);
if (req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
get_random_mask_addr(mac_addr, req->mac_addr,
@@ -4181,10 +5988,10 @@ void rtw89_hw_scan_complete(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
{
const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
struct rtw89_hw_scan_info *scan_info = &rtwdev->scan_info;
+ struct rtw89_vif *rtwvif = vif_to_rtwvif_safe(vif);
struct cfg80211_scan_info info = {
.aborted = aborted,
};
- struct rtw89_vif *rtwvif;
if (!vif)
return;
@@ -4197,22 +6004,29 @@ void rtw89_hw_scan_complete(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
rtw89_core_scan_complete(rtwdev, vif, true);
ieee80211_scan_completed(rtwdev->hw, &info);
ieee80211_wake_queues(rtwdev->hw);
+ rtw89_mac_port_cfg_rx_sync(rtwdev, rtwvif, true);
rtw89_mac_enable_beacon_for_ap_vifs(rtwdev, true);
rtw89_release_pkt_list(rtwdev);
- rtwvif = (struct rtw89_vif *)vif->drv_priv;
rtwvif->scan_req = NULL;
rtwvif->scan_ies = NULL;
scan_info->last_chan_idx = 0;
scan_info->scanning_vif = NULL;
+ scan_info->abort = false;
rtw89_chanctx_proceed(rtwdev);
}
void rtw89_hw_scan_abort(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif)
{
- rtw89_hw_scan_offload(rtwdev, vif, false);
- rtw89_hw_scan_complete(rtwdev, vif, true);
+ struct rtw89_hw_scan_info *scan_info = &rtwdev->scan_info;
+ int ret;
+
+ scan_info->abort = true;
+
+ ret = rtw89_hw_scan_offload(rtwdev, vif, false);
+ if (ret)
+ rtw89_hw_scan_complete(rtwdev, vif, true);
}
static bool rtw89_is_any_vif_connected_or_connecting(struct rtw89_dev *rtwdev)
@@ -4231,6 +6045,7 @@ static bool rtw89_is_any_vif_connected_or_connecting(struct rtw89_dev *rtwdev)
int rtw89_hw_scan_offload(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
bool enable)
{
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
struct rtw89_scan_option opt = {0};
struct rtw89_vif *rtwvif;
bool connected;
@@ -4248,7 +6063,18 @@ int rtw89_hw_scan_offload(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
if (ret)
goto out;
}
- ret = rtw89_fw_h2c_scan_offload(rtwdev, &opt, rtwvif);
+
+ if (rtwdev->chip->chip_gen == RTW89_CHIP_BE) {
+ opt.operation = enable ? RTW89_SCAN_OP_START : RTW89_SCAN_OP_STOP;
+ opt.scan_mode = RTW89_SCAN_MODE_SA;
+ opt.band = RTW89_PHY_0;
+ opt.num_macc_role = 0;
+ opt.mlo_mode = rtwdev->mlo_dbcc_mode;
+ opt.num_opch = connected ? 1 : 0;
+ opt.opch_end = connected ? 0 : RTW89_CHAN_INVALID;
+ }
+
+ ret = mac->scan_offload(rtwdev, &opt, rtwvif);
out:
return ret;
}
@@ -4922,6 +6748,372 @@ int rtw89_fw_h2c_mcc_set_duration(struct rtw89_dev *rtwdev,
return rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
}
+static
+u32 rtw89_fw_h2c_mrc_add_slot(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_add_slot_arg *slot_arg,
+ struct rtw89_h2c_mrc_add_slot *slot_h2c)
+{
+ bool fill_h2c = !!slot_h2c;
+ unsigned int i;
+
+ if (!fill_h2c)
+ goto calc_len;
+
+ slot_h2c->w0 = le32_encode_bits(slot_arg->duration,
+ RTW89_H2C_MRC_ADD_SLOT_W0_DURATION) |
+ le32_encode_bits(slot_arg->courtesy_en,
+ RTW89_H2C_MRC_ADD_SLOT_W0_COURTESY_EN) |
+ le32_encode_bits(slot_arg->role_num,
+ RTW89_H2C_MRC_ADD_SLOT_W0_ROLE_NUM);
+ slot_h2c->w1 = le32_encode_bits(slot_arg->courtesy_period,
+ RTW89_H2C_MRC_ADD_SLOT_W1_COURTESY_PERIOD) |
+ le32_encode_bits(slot_arg->courtesy_target,
+ RTW89_H2C_MRC_ADD_SLOT_W1_COURTESY_TARGET);
+
+ for (i = 0; i < slot_arg->role_num; i++) {
+ slot_h2c->roles[i].w0 =
+ le32_encode_bits(slot_arg->roles[i].macid,
+ RTW89_H2C_MRC_ADD_ROLE_W0_MACID) |
+ le32_encode_bits(slot_arg->roles[i].role_type,
+ RTW89_H2C_MRC_ADD_ROLE_W0_ROLE_TYPE) |
+ le32_encode_bits(slot_arg->roles[i].is_master,
+ RTW89_H2C_MRC_ADD_ROLE_W0_IS_MASTER) |
+ le32_encode_bits(slot_arg->roles[i].en_tx_null,
+ RTW89_H2C_MRC_ADD_ROLE_W0_TX_NULL_EN) |
+ le32_encode_bits(false,
+ RTW89_H2C_MRC_ADD_ROLE_W0_IS_ALT_ROLE) |
+ le32_encode_bits(false,
+ RTW89_H2C_MRC_ADD_ROLE_W0_ROLE_ALT_EN);
+ slot_h2c->roles[i].w1 =
+ le32_encode_bits(slot_arg->roles[i].central_ch,
+ RTW89_H2C_MRC_ADD_ROLE_W1_CENTRAL_CH_SEG) |
+ le32_encode_bits(slot_arg->roles[i].primary_ch,
+ RTW89_H2C_MRC_ADD_ROLE_W1_PRI_CH) |
+ le32_encode_bits(slot_arg->roles[i].bw,
+ RTW89_H2C_MRC_ADD_ROLE_W1_BW) |
+ le32_encode_bits(slot_arg->roles[i].band,
+ RTW89_H2C_MRC_ADD_ROLE_W1_CH_BAND_TYPE) |
+ le32_encode_bits(slot_arg->roles[i].null_early,
+ RTW89_H2C_MRC_ADD_ROLE_W1_NULL_EARLY) |
+ le32_encode_bits(false,
+ RTW89_H2C_MRC_ADD_ROLE_W1_RFK_BY_PASS) |
+ le32_encode_bits(true,
+ RTW89_H2C_MRC_ADD_ROLE_W1_CAN_BTC);
+ slot_h2c->roles[i].macid_main_bitmap =
+ cpu_to_le32(slot_arg->roles[i].macid_main_bitmap);
+ slot_h2c->roles[i].macid_paired_bitmap =
+ cpu_to_le32(slot_arg->roles[i].macid_paired_bitmap);
+ }
+
+calc_len:
+ return struct_size(slot_h2c, roles, slot_arg->role_num);
+}
+
+int rtw89_fw_h2c_mrc_add(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_add_arg *arg)
+{
+ struct rtw89_h2c_mrc_add *h2c_head;
+ struct sk_buff *skb;
+ unsigned int i;
+ void *tmp;
+ u32 len;
+ int ret;
+
+ len = sizeof(*h2c_head);
+ for (i = 0; i < arg->slot_num; i++)
+ len += rtw89_fw_h2c_mrc_add_slot(rtwdev, &arg->slots[i], NULL);
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for mrc add\n");
+ return -ENOMEM;
+ }
+
+ skb_put(skb, len);
+ tmp = skb->data;
+
+ h2c_head = tmp;
+ h2c_head->w0 = le32_encode_bits(arg->sch_idx,
+ RTW89_H2C_MRC_ADD_W0_SCH_IDX) |
+ le32_encode_bits(arg->sch_type,
+ RTW89_H2C_MRC_ADD_W0_SCH_TYPE) |
+ le32_encode_bits(arg->slot_num,
+ RTW89_H2C_MRC_ADD_W0_SLOT_NUM) |
+ le32_encode_bits(arg->btc_in_sch,
+ RTW89_H2C_MRC_ADD_W0_BTC_IN_SCH);
+
+ tmp += sizeof(*h2c_head);
+ for (i = 0; i < arg->slot_num; i++)
+ tmp += rtw89_fw_h2c_mrc_add_slot(rtwdev, &arg->slots[i], tmp);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_MRC,
+ H2C_FUNC_ADD_MRC, 0, 0,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ dev_kfree_skb_any(skb);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+int rtw89_fw_h2c_mrc_start(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_start_arg *arg)
+{
+ struct rtw89_wait_info *wait = &rtwdev->mcc.wait;
+ struct rtw89_h2c_mrc_start *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ unsigned int cond;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for mrc start\n");
+ return -ENOMEM;
+ }
+
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_mrc_start *)skb->data;
+
+ h2c->w0 = le32_encode_bits(arg->sch_idx,
+ RTW89_H2C_MRC_START_W0_SCH_IDX) |
+ le32_encode_bits(arg->old_sch_idx,
+ RTW89_H2C_MRC_START_W0_OLD_SCH_IDX) |
+ le32_encode_bits(arg->action,
+ RTW89_H2C_MRC_START_W0_ACTION);
+
+ h2c->start_tsf_high = cpu_to_le32(arg->start_tsf >> 32);
+ h2c->start_tsf_low = cpu_to_le32(arg->start_tsf);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_MRC,
+ H2C_FUNC_START_MRC, 0, 0,
+ len);
+
+ cond = RTW89_MRC_WAIT_COND(arg->sch_idx, H2C_FUNC_START_MRC);
+ return rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
+}
+
+int rtw89_fw_h2c_mrc_del(struct rtw89_dev *rtwdev, u8 sch_idx)
+{
+ struct rtw89_wait_info *wait = &rtwdev->mcc.wait;
+ struct rtw89_h2c_mrc_del *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ unsigned int cond;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for mrc del\n");
+ return -ENOMEM;
+ }
+
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_mrc_del *)skb->data;
+
+ h2c->w0 = le32_encode_bits(sch_idx, RTW89_H2C_MRC_DEL_W0_SCH_IDX);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_MRC,
+ H2C_FUNC_DEL_MRC, 0, 0,
+ len);
+
+ cond = RTW89_MRC_WAIT_COND(sch_idx, H2C_FUNC_DEL_MRC);
+ return rtw89_h2c_tx_and_wait(rtwdev, skb, wait, cond);
+}
+
+int rtw89_fw_h2c_mrc_req_tsf(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_req_tsf_arg *arg,
+ struct rtw89_mac_mrc_tsf_rpt *rpt)
+{
+ struct rtw89_wait_info *wait = &rtwdev->mcc.wait;
+ struct rtw89_h2c_mrc_req_tsf *h2c;
+ struct rtw89_mac_mrc_tsf_rpt *tmp;
+ struct sk_buff *skb;
+ unsigned int i;
+ u32 len;
+ int ret;
+
+ len = struct_size(h2c, infos, arg->num);
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for mrc req tsf\n");
+ return -ENOMEM;
+ }
+
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_mrc_req_tsf *)skb->data;
+
+ h2c->req_tsf_num = arg->num;
+ for (i = 0; i < arg->num; i++)
+ h2c->infos[i] =
+ u8_encode_bits(arg->infos[i].band,
+ RTW89_H2C_MRC_REQ_TSF_INFO_BAND) |
+ u8_encode_bits(arg->infos[i].port,
+ RTW89_H2C_MRC_REQ_TSF_INFO_PORT);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_MRC,
+ H2C_FUNC_MRC_REQ_TSF, 0, 0,
+ len);
+
+ ret = rtw89_h2c_tx_and_wait(rtwdev, skb, wait, RTW89_MRC_WAIT_COND_REQ_TSF);
+ if (ret)
+ return ret;
+
+ tmp = (struct rtw89_mac_mrc_tsf_rpt *)wait->data.buf;
+ *rpt = *tmp;
+
+ return 0;
+}
+
+int rtw89_fw_h2c_mrc_upd_bitmap(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_upd_bitmap_arg *arg)
+{
+ struct rtw89_h2c_mrc_upd_bitmap *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for mrc upd bitmap\n");
+ return -ENOMEM;
+ }
+
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_mrc_upd_bitmap *)skb->data;
+
+ h2c->w0 = le32_encode_bits(arg->sch_idx,
+ RTW89_H2C_MRC_UPD_BITMAP_W0_SCH_IDX) |
+ le32_encode_bits(arg->action,
+ RTW89_H2C_MRC_UPD_BITMAP_W0_ACTION) |
+ le32_encode_bits(arg->macid,
+ RTW89_H2C_MRC_UPD_BITMAP_W0_MACID);
+ h2c->w1 = le32_encode_bits(arg->client_macid,
+ RTW89_H2C_MRC_UPD_BITMAP_W1_CLIENT_MACID);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_MRC,
+ H2C_FUNC_MRC_UPD_BITMAP, 0, 0,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ dev_kfree_skb_any(skb);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+int rtw89_fw_h2c_mrc_sync(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_sync_arg *arg)
+{
+ struct rtw89_h2c_mrc_sync *h2c;
+ u32 len = sizeof(*h2c);
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for mrc sync\n");
+ return -ENOMEM;
+ }
+
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_mrc_sync *)skb->data;
+
+ h2c->w0 = le32_encode_bits(true, RTW89_H2C_MRC_SYNC_W0_SYNC_EN) |
+ le32_encode_bits(arg->src.port,
+ RTW89_H2C_MRC_SYNC_W0_SRC_PORT) |
+ le32_encode_bits(arg->src.band,
+ RTW89_H2C_MRC_SYNC_W0_SRC_BAND) |
+ le32_encode_bits(arg->dest.port,
+ RTW89_H2C_MRC_SYNC_W0_DEST_PORT) |
+ le32_encode_bits(arg->dest.band,
+ RTW89_H2C_MRC_SYNC_W0_DEST_BAND);
+ h2c->w1 = le32_encode_bits(arg->offset, RTW89_H2C_MRC_SYNC_W1_OFFSET);
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_MRC,
+ H2C_FUNC_MRC_SYNC, 0, 0,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ dev_kfree_skb_any(skb);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+int rtw89_fw_h2c_mrc_upd_duration(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_upd_duration_arg *arg)
+{
+ struct rtw89_h2c_mrc_upd_duration *h2c;
+ struct sk_buff *skb;
+ unsigned int i;
+ u32 len;
+ int ret;
+
+ len = struct_size(h2c, slots, arg->slot_num);
+ skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len);
+ if (!skb) {
+ rtw89_err(rtwdev, "failed to alloc skb for mrc upd duration\n");
+ return -ENOMEM;
+ }
+
+ skb_put(skb, len);
+ h2c = (struct rtw89_h2c_mrc_upd_duration *)skb->data;
+
+ h2c->w0 = le32_encode_bits(arg->sch_idx,
+ RTW89_H2C_MRC_UPD_DURATION_W0_SCH_IDX) |
+ le32_encode_bits(arg->slot_num,
+ RTW89_H2C_MRC_UPD_DURATION_W0_SLOT_NUM) |
+ le32_encode_bits(false,
+ RTW89_H2C_MRC_UPD_DURATION_W0_BTC_IN_SCH);
+
+ h2c->start_tsf_high = cpu_to_le32(arg->start_tsf >> 32);
+ h2c->start_tsf_low = cpu_to_le32(arg->start_tsf);
+
+ for (i = 0; i < arg->slot_num; i++) {
+ h2c->slots[i] =
+ le32_encode_bits(arg->slots[i].slot_idx,
+ RTW89_H2C_MRC_UPD_DURATION_SLOT_SLOT_IDX) |
+ le32_encode_bits(arg->slots[i].duration,
+ RTW89_H2C_MRC_UPD_DURATION_SLOT_DURATION);
+ }
+
+ rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C,
+ H2C_CAT_MAC,
+ H2C_CL_MRC,
+ H2C_FUNC_MRC_UPD_DURATION, 0, 0,
+ len);
+
+ ret = rtw89_h2c_tx(rtwdev, skb, false);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to send h2c\n");
+ dev_kfree_skb_any(skb);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
static bool __fw_txpwr_entry_zero_ext(const void *ext_ptr, u8 ext_len)
{
static const u8 zeros[U8_MAX] = {};
diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h
index 01016588b1fc..44311f65b4fa 100644
--- a/drivers/net/wireless/realtek/rtw89/fw.h
+++ b/drivers/net/wireless/realtek/rtw89/fw.h
@@ -64,6 +64,8 @@ struct rtw89_h2creg_sch_tx_en {
#define RTW89_H2CREG_SCH_TX_EN_W1_MASK GENMASK(15, 0)
#define RTW89_H2CREG_SCH_TX_EN_W1_BAND BIT(16)
+#define RTW89_H2CREG_WOW_CPUIO_RX_CTRL_EN GENMASK(23, 16)
+
#define RTW89_H2CREG_MAX 4
#define RTW89_C2HREG_MAX 4
#define RTW89_C2HREG_HDR_LEN 2
@@ -95,7 +97,9 @@ enum rtw89_mac_h2c_type {
RTW89_FWCMD_H2CREG_FUNC_FWERR,
RTW89_FWCMD_H2CREG_FUNC_GET_FEATURE,
RTW89_FWCMD_H2CREG_FUNC_GETPKT_INFORM,
- RTW89_FWCMD_H2CREG_FUNC_SCH_TX_EN
+ RTW89_FWCMD_H2CREG_FUNC_SCH_TX_EN,
+ RTW89_FWCMD_H2CREG_FUNC_WOW_TRX_STOP = 0x6,
+ RTW89_FWCMD_H2CREG_FUNC_WOW_CPUIO_RX_CTRL = 0xA,
};
enum rtw89_mac_c2h_type {
@@ -104,7 +108,8 @@ enum rtw89_mac_c2h_type {
RTW89_FWCMD_C2HREG_FUNC_ERR_MSG,
RTW89_FWCMD_C2HREG_FUNC_PHY_CAP,
RTW89_FWCMD_C2HREG_FUNC_TX_PAUSE_RPT,
- RTW89_FWCMD_C2HREG_FUNC_NULL = 0xFF
+ RTW89_FWCMD_C2HREG_FUNC_WOW_CPUIO_RX_ACK = 0xA,
+ RTW89_FWCMD_C2HREG_FUNC_NULL = 0xFF,
};
enum rtw89_fw_c2h_category {
@@ -149,6 +154,7 @@ enum rtw89_fw_log_comp {
RTW89_FW_LOG_COMP_TWT,
RTW89_FW_LOG_COMP_RF,
RTW89_FW_LOG_COMP_MCC = 20,
+ RTW89_FW_LOG_COMP_SCAN = 28,
};
enum rtw89_pkt_offload_op {
@@ -169,6 +175,16 @@ enum rtw89_scanofld_notify_reason {
RTW89_SCAN_ENTER_CH_NOTIFY,
RTW89_SCAN_LEAVE_CH_NOTIFY,
RTW89_SCAN_END_SCAN_NOTIFY,
+ RTW89_SCAN_REPORT_NOTIFY,
+ RTW89_SCAN_CHKPT_NOTIFY,
+ RTW89_SCAN_ENTER_OP_NOTIFY,
+ RTW89_SCAN_LEAVE_OP_NOTIFY,
+};
+
+enum rtw89_scanofld_status {
+ RTW89_SCAN_STATUS_NOTIFY,
+ RTW89_SCAN_STATUS_SUCCESS,
+ RTW89_SCAN_STATUS_FAIL,
};
enum rtw89_chan_type {
@@ -184,6 +200,9 @@ enum rtw89_p2pps_action {
RTW89_P2P_ACT_TERMINATE = 3,
};
+#define RTW89_DEFAULT_CQM_HYST 4
+#define RTW89_DEFAULT_CQM_THOLD -70
+
enum rtw89_bcn_fltr_offload_mode {
RTW89_BCN_FLTR_OFFLOAD_MODE_0 = 0,
RTW89_BCN_FLTR_OFFLOAD_MODE_1,
@@ -216,6 +235,10 @@ struct rtw89_fw_hdr_section_info {
u32 dladdr;
u32 mssc;
u8 type;
+ bool ignore;
+ const u8 *key_addr;
+ u32 key_len;
+ u32 key_idx;
};
struct rtw89_fw_bin_info {
@@ -223,6 +246,8 @@ struct rtw89_fw_bin_info {
u32 hdr_len;
bool dynamic_hdr_en;
u32 dynamic_hdr_len;
+ bool dsp_checksum;
+ bool secure_section_exist;
struct rtw89_fw_hdr_section_info section_info[FWDL_SECTION_MAX_NUM];
};
@@ -231,6 +256,15 @@ struct rtw89_fw_macid_pause_grp {
__le32 mask_grp[4];
} __packed;
+struct rtw89_fw_macid_pause_sleep_grp {
+ struct {
+ __le32 pause_grp[4];
+ __le32 pause_mask_grp[4];
+ __le32 sleep_grp[4];
+ __le32 sleep_mask_grp[4];
+ } __packed n[4];
+} __packed;
+
#define RTW89_H2C_MAX_SIZE 2048
#define RTW89_CHANNEL_TIME 45
#define RTW89_CHANNEL_TIME_6G 20
@@ -243,6 +277,7 @@ struct rtw89_fw_macid_pause_grp {
#define RTW89_SCANOFLD_MAX_IE_LEN 512
#define RTW89_SCANOFLD_PKT_NONE 0xFF
#define RTW89_SCANOFLD_DEBUG_MASK 0x1F
+#define RTW89_CHAN_INVALID 0xFF
#define RTW89_MAC_CHINFO_SIZE 28
#define RTW89_SCAN_LIST_GUARD 4
#define RTW89_SCAN_LIST_LIMIT \
@@ -274,9 +309,32 @@ struct rtw89_mac_chinfo {
bool is_psc;
};
-struct rtw89_scan_option {
- bool enable;
- bool target_ch_mode;
+struct rtw89_mac_chinfo_be {
+ u8 period;
+ u8 dwell_time;
+ u8 central_ch;
+ u8 pri_ch;
+ u8 bw:3;
+ u8 ch_band:2;
+ u8 dfs_ch:1;
+ u8 pause_data:1;
+ u8 tx_null:1;
+ u8 rand_seq_num:1;
+ u8 notify_action:5;
+ u8 probe_id;
+ u8 leave_crit;
+ u8 chkpt_timer;
+ u8 leave_time;
+ u8 leave_th;
+ u16 tx_pkt_ctrl;
+ u8 pkt_id[RTW89_SCANOFLD_MAX_SSID];
+ u8 sw_def;
+ u16 fw_probe0_ssids;
+ u16 fw_probe0_shortssids;
+ u16 fw_probe0_bssids;
+
+ struct list_head list;
+ bool is_psc;
};
struct rtw89_pktofld_info {
@@ -419,6 +477,7 @@ static inline void RTW89_SET_EDCA_PARAM(void *cmd, u32 val)
#define FWDL_SECURITY_SECTION_TYPE 9
#define FWDL_SECURITY_SIGLEN 512
+#define FWDL_SECURITY_CHKSUM_LEN 8
struct rtw89_fw_dynhdr_sec {
__le32 w0;
@@ -472,6 +531,7 @@ struct rtw89_fw_hdr {
#define FW_HDR_W4_MIN GENMASK(31, 24)
#define FW_HDR_W5_YEAR GENMASK(31, 0)
#define FW_HDR_W6_SEC_NUM GENMASK(15, 8)
+#define FW_HDR_W7_PART_SIZE GENMASK(15, 0)
#define FW_HDR_W7_DYN_HDR BIT(16)
#define FW_HDR_W7_CMD_VERSERION GENMASK(31, 24)
@@ -489,6 +549,7 @@ struct rtw89_fw_hdr_section_v1 {
#define FWSECTION_HDR_V1_W1_CHECKSUM BIT(28)
#define FWSECTION_HDR_V1_W1_REDL BIT(29)
#define FWSECTION_HDR_V1_W2_MSSC GENMASK(7, 0)
+#define FORMATTED_MSSC 0xFF
#define FWSECTION_HDR_V1_W2_BBMCU_IDX GENMASK(27, 24)
struct rtw89_fw_hdr_v1 {
@@ -521,12 +582,42 @@ struct rtw89_fw_hdr_v1 {
#define FW_HDR_V1_W5_YEAR GENMASK(15, 0)
#define FW_HDR_V1_W5_HDR_SIZE GENMASK(31, 16)
#define FW_HDR_V1_W6_SEC_NUM GENMASK(15, 8)
+#define FW_HDR_V1_W6_DSP_CHKSUM BIT(24)
+#define FW_HDR_V1_W7_PART_SIZE GENMASK(15, 0)
#define FW_HDR_V1_W7_DYN_HDR BIT(16)
-static inline void SET_FW_HDR_PART_SIZE(void *fwhdr, u32 val)
-{
- le32p_replace_bits((__le32 *)fwhdr + 7, val, GENMASK(15, 0));
-}
+enum rtw89_fw_mss_pool_rmp_tbl_type {
+ MSS_POOL_RMP_TBL_BITMASK = 0x0,
+ MSS_POOL_RMP_TBL_RECORD = 0x1,
+};
+
+#define FWDL_MSS_POOL_DEFKEYSETS_SIZE 8
+
+struct rtw89_fw_mss_pool_hdr {
+ u8 signature[8]; /* equal to mss_signature[] */
+ __le32 rmp_tbl_offset;
+ __le32 key_raw_offset;
+ u8 defen;
+ u8 rsvd[3];
+ u8 rmpfmt; /* enum rtw89_fw_mss_pool_rmp_tbl_type */
+ u8 mssdev_max;
+ __le16 keypair_num;
+ __le16 msscust_max;
+ __le16 msskey_num_max;
+ __le32 rsvd3;
+ u8 rmp_tbl[];
+} __packed;
+
+union rtw89_fw_section_mssc_content {
+ struct {
+ u8 pad[58];
+ __le32 v;
+ } __packed sb_sel_ver;
+ struct {
+ u8 pad[60];
+ __le16 v;
+ } __packed key_sign_len;
+} __packed;
static inline void SET_CTRL_INFO_MACID(void *table, u32 val)
{
@@ -1198,6 +1289,149 @@ static inline void SET_CMC_TBL_CSI_BW(void *table, u32 val)
GENMASK(31, 30));
}
+struct rtw89_h2c_cctlinfo_ud_g7 {
+ __le32 c0;
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+ __le32 w3;
+ __le32 w4;
+ __le32 w5;
+ __le32 w6;
+ __le32 w7;
+ __le32 w8;
+ __le32 w9;
+ __le32 w10;
+ __le32 w11;
+ __le32 w12;
+ __le32 w13;
+ __le32 w14;
+ __le32 w15;
+ __le32 m0;
+ __le32 m1;
+ __le32 m2;
+ __le32 m3;
+ __le32 m4;
+ __le32 m5;
+ __le32 m6;
+ __le32 m7;
+ __le32 m8;
+ __le32 m9;
+ __le32 m10;
+ __le32 m11;
+ __le32 m12;
+ __le32 m13;
+ __le32 m14;
+ __le32 m15;
+} __packed;
+
+#define CCTLINFO_G7_C0_MACID GENMASK(6, 0)
+#define CCTLINFO_G7_C0_OP BIT(7)
+
+#define CCTLINFO_G7_W0_DATARATE GENMASK(11, 0)
+#define CCTLINFO_G7_W0_DATA_GI_LTF GENMASK(14, 12)
+#define CCTLINFO_G7_W0_TRYRATE BIT(15)
+#define CCTLINFO_G7_W0_ARFR_CTRL GENMASK(17, 16)
+#define CCTLINFO_G7_W0_DIS_HE1SS_STBC BIT(18)
+#define CCTLINFO_G7_W0_ACQ_RPT_EN BIT(20)
+#define CCTLINFO_G7_W0_MGQ_RPT_EN BIT(21)
+#define CCTLINFO_G7_W0_ULQ_RPT_EN BIT(22)
+#define CCTLINFO_G7_W0_TWTQ_RPT_EN BIT(23)
+#define CCTLINFO_G7_W0_FORCE_TXOP BIT(24)
+#define CCTLINFO_G7_W0_DISRTSFB BIT(25)
+#define CCTLINFO_G7_W0_DISDATAFB BIT(26)
+#define CCTLINFO_G7_W0_NSTR_EN BIT(27)
+#define CCTLINFO_G7_W0_AMPDU_DENSITY GENMASK(31, 28)
+#define CCTLINFO_G7_W0_ALL (GENMASK(31, 20) | GENMASK(18, 0))
+#define CCTLINFO_G7_W1_DATA_RTY_LOWEST_RATE GENMASK(11, 0)
+#define CCTLINFO_G7_W1_RTS_TXCNT_LMT GENMASK(15, 12)
+#define CCTLINFO_G7_W1_RTSRATE GENMASK(27, 16)
+#define CCTLINFO_G7_W1_RTS_RTY_LOWEST_RATE GENMASK(31, 28)
+#define CCTLINFO_G7_W1_ALL GENMASK(31, 0)
+#define CCTLINFO_G7_W2_DATA_TX_CNT_LMT GENMASK(5, 0)
+#define CCTLINFO_G7_W2_DATA_TXCNT_LMT_SEL BIT(6)
+#define CCTLINFO_G7_W2_MAX_AGG_NUM_SEL BIT(7)
+#define CCTLINFO_G7_W2_RTS_EN BIT(8)
+#define CCTLINFO_G7_W2_CTS2SELF_EN BIT(9)
+#define CCTLINFO_G7_W2_CCA_RTS GENMASK(11, 10)
+#define CCTLINFO_G7_W2_HW_RTS_EN BIT(12)
+#define CCTLINFO_G7_W2_RTS_DROP_DATA_MODE GENMASK(14, 13)
+#define CCTLINFO_G7_W2_PRELD_EN BIT(15)
+#define CCTLINFO_G7_W2_AMPDU_MAX_LEN GENMASK(26, 16)
+#define CCTLINFO_G7_W2_UL_MU_DIS BIT(27)
+#define CCTLINFO_G7_W2_AMPDU_MAX_TIME GENMASK(31, 28)
+#define CCTLINFO_G7_W2_ALL GENMASK(31, 0)
+#define CCTLINFO_G7_W3_MAX_AGG_NUM GENMASK(7, 0)
+#define CCTLINFO_G7_W3_DATA_BW GENMASK(10, 8)
+#define CCTLINFO_G7_W3_DATA_BW_ER BIT(11)
+#define CCTLINFO_G7_W3_BA_BMAP GENMASK(14, 12)
+#define CCTLINFO_G7_W3_VCS_STBC BIT(15)
+#define CCTLINFO_G7_W3_VO_LFTIME_SEL GENMASK(18, 16)
+#define CCTLINFO_G7_W3_VI_LFTIME_SEL GENMASK(21, 19)
+#define CCTLINFO_G7_W3_BE_LFTIME_SEL GENMASK(24, 22)
+#define CCTLINFO_G7_W3_BK_LFTIME_SEL GENMASK(27, 25)
+#define CCTLINFO_G7_W3_AMPDU_TIME_SEL BIT(28)
+#define CCTLINFO_G7_W3_AMPDU_LEN_SEL BIT(29)
+#define CCTLINFO_G7_W3_RTS_TXCNT_LMT_SEL BIT(30)
+#define CCTLINFO_G7_W3_LSIG_TXOP_EN BIT(31)
+#define CCTLINFO_G7_W3_ALL GENMASK(31, 0)
+#define CCTLINFO_G7_W4_MULTI_PORT_ID GENMASK(2, 0)
+#define CCTLINFO_G7_W4_BYPASS_PUNC BIT(3)
+#define CCTLINFO_G7_W4_MBSSID GENMASK(7, 4)
+#define CCTLINFO_G7_W4_DATA_DCM BIT(8)
+#define CCTLINFO_G7_W4_DATA_ER BIT(9)
+#define CCTLINFO_G7_W4_DATA_LDPC BIT(10)
+#define CCTLINFO_G7_W4_DATA_STBC BIT(11)
+#define CCTLINFO_G7_W4_A_CTRL_BQR BIT(12)
+#define CCTLINFO_G7_W4_A_CTRL_BSR BIT(14)
+#define CCTLINFO_G7_W4_A_CTRL_CAS BIT(15)
+#define CCTLINFO_G7_W4_ACT_SUBCH_CBW GENMASK(31, 16)
+#define CCTLINFO_G7_W4_ALL (GENMASK(31, 14) | GENMASK(12, 0))
+#define CCTLINFO_G7_W5_NOMINAL_PKT_PADDING0 GENMASK(1, 0)
+#define CCTLINFO_G7_W5_NOMINAL_PKT_PADDING1 GENMASK(3, 2)
+#define CCTLINFO_G7_W5_NOMINAL_PKT_PADDING2 GENMASK(5, 4)
+#define CCTLINFO_G7_W5_NOMINAL_PKT_PADDING3 GENMASK(7, 6)
+#define CCTLINFO_G7_W5_NOMINAL_PKT_PADDING4 GENMASK(9, 8)
+#define CCTLINFO_G7_W5_SR_RATE GENMASK(14, 10)
+#define CCTLINFO_G7_W5_TID_DISABLE GENMASK(23, 16)
+#define CCTLINFO_G7_W5_ADDR_CAM_INDEX GENMASK(31, 24)
+#define CCTLINFO_G7_W5_ALL (GENMASK(31, 16) | GENMASK(14, 0))
+#define CCTLINFO_G7_W6_AID12_PAID GENMASK(11, 0)
+#define CCTLINFO_G7_W6_RESP_REF_RATE GENMASK(23, 12)
+#define CCTLINFO_G7_W6_ULDL BIT(31)
+#define CCTLINFO_G7_W6_ALL (BIT(31) | GENMASK(23, 0))
+#define CCTLINFO_G7_W7_NC GENMASK(2, 0)
+#define CCTLINFO_G7_W7_NR GENMASK(5, 3)
+#define CCTLINFO_G7_W7_NG GENMASK(7, 6)
+#define CCTLINFO_G7_W7_CB GENMASK(9, 8)
+#define CCTLINFO_G7_W7_CS GENMASK(11, 10)
+#define CCTLINFO_G7_W7_CSI_STBC_EN BIT(13)
+#define CCTLINFO_G7_W7_CSI_LDPC_EN BIT(14)
+#define CCTLINFO_G7_W7_CSI_PARA_EN BIT(15)
+#define CCTLINFO_G7_W7_CSI_FIX_RATE GENMASK(27, 16)
+#define CCTLINFO_G7_W7_CSI_BW GENMASK(31, 29)
+#define CCTLINFO_G7_W7_ALL (GENMASK(31, 29) | GENMASK(27, 13) | GENMASK(11, 0))
+#define CCTLINFO_G7_W8_ALL_ACK_SUPPORT BIT(0)
+#define CCTLINFO_G7_W8_BSR_QUEUE_SIZE_FORMAT BIT(1)
+#define CCTLINFO_G7_W8_BSR_OM_UPD_EN BIT(2)
+#define CCTLINFO_G7_W8_MACID_FWD_IDC BIT(3)
+#define CCTLINFO_G7_W8_AZ_SEC_EN BIT(4)
+#define CCTLINFO_G7_W8_CSI_SEC_EN BIT(5)
+#define CCTLINFO_G7_W8_FIX_UL_ADDRCAM_IDX BIT(6)
+#define CCTLINFO_G7_W8_CTRL_CNT_VLD BIT(7)
+#define CCTLINFO_G7_W8_CTRL_CNT GENMASK(11, 8)
+#define CCTLINFO_G7_W8_RESP_SEC_TYPE GENMASK(15, 12)
+#define CCTLINFO_G7_W8_ALL GENMASK(15, 0)
+/* W9~13 are reserved */
+#define CCTLINFO_G7_W14_VO_CURR_RATE GENMASK(11, 0)
+#define CCTLINFO_G7_W14_VI_CURR_RATE GENMASK(23, 12)
+#define CCTLINFO_G7_W14_BE_CURR_RATE_L GENMASK(31, 24)
+#define CCTLINFO_G7_W14_ALL GENMASK(31, 0)
+#define CCTLINFO_G7_W15_BE_CURR_RATE_H GENMASK(3, 0)
+#define CCTLINFO_G7_W15_BK_CURR_RATE GENMASK(15, 4)
+#define CCTLINFO_G7_W15_MGNT_CURR_RATE GENMASK(27, 16)
+#define CCTLINFO_G7_W15_ALL GENMASK(27, 0)
+
static inline void SET_DCTL_MACID_V1(void *table, u32 val)
{
le32p_replace_bits((__le32 *)(table) + 0, val, GENMASK(6, 0));
@@ -1500,105 +1734,98 @@ static inline void SET_DCTL_SEC_ENT6_V1(void *table, u32 val)
GENMASK(31, 24));
}
-static inline void SET_BCN_UPD_PORT(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(7, 0));
-}
-
-static inline void SET_BCN_UPD_MBSSID(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(15, 8));
-}
-
-static inline void SET_BCN_UPD_BAND(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(23, 16));
-}
-
-static inline void SET_BCN_UPD_GRP_IE_OFST(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, (val - 24) | BIT(7), GENMASK(31, 24));
-}
-
-static inline void SET_BCN_UPD_MACID(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 1, val, GENMASK(7, 0));
-}
-
-static inline void SET_BCN_UPD_SSN_SEL(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 1, val, GENMASK(9, 8));
-}
-
-static inline void SET_BCN_UPD_SSN_MODE(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 1, val, GENMASK(11, 10));
-}
-
-static inline void SET_BCN_UPD_RATE(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 1, val, GENMASK(20, 12));
-}
-
-static inline void SET_BCN_UPD_TXPWR(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 1, val, GENMASK(23, 21));
-}
-
-static inline void SET_BCN_UPD_TXINFO_CTRL_EN(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, BIT(0));
-}
-
-static inline void SET_BCN_UPD_NTX_PATH_EN(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, GENMASK(4, 1));
-}
-
-static inline void SET_BCN_UPD_PATH_MAP_A(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, GENMASK(6, 5));
-}
-
-static inline void SET_BCN_UPD_PATH_MAP_B(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, GENMASK(8, 7));
-}
-
-static inline void SET_BCN_UPD_PATH_MAP_C(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, GENMASK(10, 9));
-}
-
-static inline void SET_BCN_UPD_PATH_MAP_D(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, GENMASK(12, 11));
-}
-
-static inline void SET_BCN_UPD_PATH_ANTSEL_A(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, BIT(13));
-}
-
-static inline void SET_BCN_UPD_PATH_ANTSEL_B(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, BIT(14));
-}
-
-static inline void SET_BCN_UPD_PATH_ANTSEL_C(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, BIT(15));
-}
+struct rtw89_h2c_bcn_upd {
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+} __packed;
-static inline void SET_BCN_UPD_PATH_ANTSEL_D(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, BIT(16));
-}
+#define RTW89_H2C_BCN_UPD_W0_PORT GENMASK(7, 0)
+#define RTW89_H2C_BCN_UPD_W0_MBSSID GENMASK(15, 8)
+#define RTW89_H2C_BCN_UPD_W0_BAND GENMASK(23, 16)
+#define RTW89_H2C_BCN_UPD_W0_GRP_IE_OFST GENMASK(31, 24)
+#define RTW89_H2C_BCN_UPD_W1_MACID GENMASK(7, 0)
+#define RTW89_H2C_BCN_UPD_W1_SSN_SEL GENMASK(9, 8)
+#define RTW89_H2C_BCN_UPD_W1_SSN_MODE GENMASK(11, 10)
+#define RTW89_H2C_BCN_UPD_W1_RATE GENMASK(20, 12)
+#define RTW89_H2C_BCN_UPD_W1_TXPWR GENMASK(23, 21)
+#define RTW89_H2C_BCN_UPD_W2_TXINFO_CTRL_EN BIT(0)
+#define RTW89_H2C_BCN_UPD_W2_NTX_PATH_EN GENMASK(4, 1)
+#define RTW89_H2C_BCN_UPD_W2_PATH_MAP_A GENMASK(6, 5)
+#define RTW89_H2C_BCN_UPD_W2_PATH_MAP_B GENMASK(8, 7)
+#define RTW89_H2C_BCN_UPD_W2_PATH_MAP_C GENMASK(10, 9)
+#define RTW89_H2C_BCN_UPD_W2_PATH_MAP_D GENMASK(12, 11)
+#define RTW89_H2C_BCN_UPD_W2_PATH_ANTSEL_A BIT(13)
+#define RTW89_H2C_BCN_UPD_W2_PATH_ANTSEL_B BIT(14)
+#define RTW89_H2C_BCN_UPD_W2_PATH_ANTSEL_C BIT(15)
+#define RTW89_H2C_BCN_UPD_W2_PATH_ANTSEL_D BIT(16)
+#define RTW89_H2C_BCN_UPD_W2_CSA_OFST GENMASK(31, 17)
+
+struct rtw89_h2c_bcn_upd_be {
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+ __le32 w3;
+ __le32 w4;
+ __le32 w5;
+ __le32 w6;
+ __le32 w7;
+ __le32 w8;
+ __le32 w9;
+ __le32 w10;
+ __le32 w11;
+ __le32 w12;
+ __le32 w13;
+ __le32 w14;
+ __le32 w15;
+ __le32 w16;
+ __le32 w17;
+ __le32 w18;
+ __le32 w19;
+ __le32 w20;
+ __le32 w21;
+ __le32 w22;
+ __le32 w23;
+ __le32 w24;
+ __le32 w25;
+ __le32 w26;
+ __le32 w27;
+ __le32 w28;
+ __le32 w29;
+} __packed;
-static inline void SET_BCN_UPD_CSA_OFST(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)(h2c) + 2, val, GENMASK(31, 17));
-}
+#define RTW89_H2C_BCN_UPD_BE_W0_PORT GENMASK(7, 0)
+#define RTW89_H2C_BCN_UPD_BE_W0_MBSSID GENMASK(15, 8)
+#define RTW89_H2C_BCN_UPD_BE_W0_BAND GENMASK(23, 16)
+#define RTW89_H2C_BCN_UPD_BE_W0_GRP_IE_OFST GENMASK(31, 24)
+#define RTW89_H2C_BCN_UPD_BE_W1_MACID GENMASK(7, 0)
+#define RTW89_H2C_BCN_UPD_BE_W1_SSN_SEL GENMASK(9, 8)
+#define RTW89_H2C_BCN_UPD_BE_W1_SSN_MODE GENMASK(11, 10)
+#define RTW89_H2C_BCN_UPD_BE_W1_RATE GENMASK(20, 12)
+#define RTW89_H2C_BCN_UPD_BE_W1_TXPWR GENMASK(23, 21)
+#define RTW89_H2C_BCN_UPD_BE_W1_MACID_EXT GENMASK(31, 24)
+#define RTW89_H2C_BCN_UPD_BE_W2_TXINFO_CTRL_EN BIT(0)
+#define RTW89_H2C_BCN_UPD_BE_W2_NTX_PATH_EN GENMASK(4, 1)
+#define RTW89_H2C_BCN_UPD_BE_W2_PATH_MAP_A GENMASK(6, 5)
+#define RTW89_H2C_BCN_UPD_BE_W2_PATH_MAP_B GENMASK(8, 7)
+#define RTW89_H2C_BCN_UPD_BE_W2_PATH_MAP_C GENMASK(10, 9)
+#define RTW89_H2C_BCN_UPD_BE_W2_PATH_MAP_D GENMASK(12, 11)
+#define RTW89_H2C_BCN_UPD_BE_W2_ANTSEL_A BIT(13)
+#define RTW89_H2C_BCN_UPD_BE_W2_ANTSEL_B BIT(14)
+#define RTW89_H2C_BCN_UPD_BE_W2_ANTSEL_C BIT(15)
+#define RTW89_H2C_BCN_UPD_BE_W2_ANTSEL_D BIT(16)
+#define RTW89_H2C_BCN_UPD_BE_W2_CSA_OFST GENMASK(31, 17)
+#define RTW89_H2C_BCN_UPD_BE_W3_MLIE_CSA_OFST GENMASK(15, 0)
+#define RTW89_H2C_BCN_UPD_BE_W3_CRITICAL_UPD_FLAG_OFST GENMASK(31, 16)
+#define RTW89_H2C_BCN_UPD_BE_W4_VAP1_DTIM_CNT_OFST GENMASK(15, 0)
+#define RTW89_H2C_BCN_UPD_BE_W4_VAP2_DTIM_CNT_OFST GENMASK(31, 16)
+#define RTW89_H2C_BCN_UPD_BE_W5_VAP3_DTIM_CNT_OFST GENMASK(15, 0)
+#define RTW89_H2C_BCN_UPD_BE_W5_VAP4_DTIM_CNT_OFST GENMASK(31, 16)
+#define RTW89_H2C_BCN_UPD_BE_W6_VAP5_DTIM_CNT_OFST GENMASK(15, 0)
+#define RTW89_H2C_BCN_UPD_BE_W6_VAP6_DTIM_CNT_OFST GENMASK(31, 16)
+#define RTW89_H2C_BCN_UPD_BE_W7_VAP7_DTIM_CNT_OFST GENMASK(15, 0)
+#define RTW89_H2C_BCN_UPD_BE_W7_ECSA_OFST GENMASK(30, 16)
+#define RTW89_H2C_BCN_UPD_BE_W7_PROTECTION_KEY_ID BIT(31)
static inline void SET_FWROLE_MAINTAIN_MACID(void *h2c, u32 val)
{
@@ -1620,70 +1847,46 @@ static inline void SET_FWROLE_MAINTAIN_WIFI_ROLE(void *h2c, u32 val)
le32p_replace_bits((__le32 *)h2c, val, GENMASK(16, 13));
}
-static inline void SET_JOININFO_MACID(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(7, 0));
-}
-
-static inline void SET_JOININFO_OP(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, BIT(8));
-}
-
-static inline void SET_JOININFO_BAND(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, BIT(9));
-}
-
-static inline void SET_JOININFO_WMM(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(11, 10));
-}
-
-static inline void SET_JOININFO_TGR(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, BIT(12));
-}
-
-static inline void SET_JOININFO_ISHESTA(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, BIT(13));
-}
-
-static inline void SET_JOININFO_DLBW(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(15, 14));
-}
-
-static inline void SET_JOININFO_TF_MAC_PAD(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(17, 16));
-}
-
-static inline void SET_JOININFO_DL_T_PE(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(20, 18));
-}
-
-static inline void SET_JOININFO_PORT_ID(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(23, 21));
-}
+enum rtw89_fw_sta_type { /* value of RTW89_H2C_JOININFO_W1_STA_TYPE */
+ RTW89_FW_N_AC_STA = 0,
+ RTW89_FW_AX_STA = 1,
+ RTW89_FW_BE_STA = 2,
+};
-static inline void SET_JOININFO_NET_TYPE(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(25, 24));
-}
+struct rtw89_h2c_join {
+ __le32 w0;
+} __packed;
-static inline void SET_JOININFO_WIFI_ROLE(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(29, 26));
-}
+struct rtw89_h2c_join_v1 {
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+} __packed;
-static inline void SET_JOININFO_SELF_ROLE(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(31, 30));
-}
+#define RTW89_H2C_JOININFO_W0_MACID GENMASK(7, 0)
+#define RTW89_H2C_JOININFO_W0_OP BIT(8)
+#define RTW89_H2C_JOININFO_W0_BAND BIT(9)
+#define RTW89_H2C_JOININFO_W0_WMM GENMASK(11, 10)
+#define RTW89_H2C_JOININFO_W0_TGR BIT(12)
+#define RTW89_H2C_JOININFO_W0_ISHESTA BIT(13)
+#define RTW89_H2C_JOININFO_W0_DLBW GENMASK(15, 14)
+#define RTW89_H2C_JOININFO_W0_TF_MAC_PAD GENMASK(17, 16)
+#define RTW89_H2C_JOININFO_W0_DL_T_PE GENMASK(20, 18)
+#define RTW89_H2C_JOININFO_W0_PORT_ID GENMASK(23, 21)
+#define RTW89_H2C_JOININFO_W0_NET_TYPE GENMASK(25, 24)
+#define RTW89_H2C_JOININFO_W0_WIFI_ROLE GENMASK(29, 26)
+#define RTW89_H2C_JOININFO_W0_SELF_ROLE GENMASK(31, 30)
+#define RTW89_H2C_JOININFO_W1_STA_TYPE GENMASK(2, 0)
+#define RTW89_H2C_JOININFO_W1_IS_MLD BIT(3)
+#define RTW89_H2C_JOININFO_W1_MAIN_MACID GENMASK(11, 4)
+#define RTW89_H2C_JOININFO_W1_MLO_MODE BIT(12)
+#define RTW89_H2C_JOININFO_W1_EMLSR_CAB BIT(13)
+#define RTW89_H2C_JOININFO_W1_NSTR_EN BIT(14)
+#define RTW89_H2C_JOININFO_W1_INIT_PWR_STATE BIT(15)
+#define RTW89_H2C_JOININFO_W1_EMLSR_PADDING GENMASK(18, 16)
+#define RTW89_H2C_JOININFO_W1_EMLSR_TRANS_DELAY GENMASK(21, 19)
+#define RTW89_H2C_JOININFO_W2_MACID_EXT GENMASK(7, 0)
+#define RTW89_H2C_JOININFO_W2_MAIN_MACID_EXT GENMASK(15, 8)
struct rtw89_h2c_notify_dbcc {
__le32 w0;
@@ -1741,60 +1944,47 @@ static inline void SET_LOG_CFG_COMP_EXT(void *h2c, u32 val)
le32p_replace_bits((__le32 *)(h2c) + 2, val, GENMASK(31, 0));
}
-static inline void SET_BA_CAM_VALID(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, BIT(0));
-}
-
-static inline void SET_BA_CAM_INIT_REQ(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, BIT(1));
-}
-
-static inline void SET_BA_CAM_ENTRY_IDX(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(3, 2));
-}
-
-static inline void SET_BA_CAM_TID(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(7, 4));
-}
-
-static inline void SET_BA_CAM_MACID(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(15, 8));
-}
-
-static inline void SET_BA_CAM_BMAP_SIZE(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(19, 16));
-}
-
-static inline void SET_BA_CAM_SSN(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c, val, GENMASK(31, 20));
-}
-
-static inline void SET_BA_CAM_UID(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c + 1, val, GENMASK(7, 0));
-}
+struct rtw89_h2c_ba_cam {
+ __le32 w0;
+ __le32 w1;
+} __packed;
-static inline void SET_BA_CAM_STD_EN(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c + 1, val, BIT(8));
-}
+#define RTW89_H2C_BA_CAM_W0_VALID BIT(0)
+#define RTW89_H2C_BA_CAM_W0_INIT_REQ BIT(1)
+#define RTW89_H2C_BA_CAM_W0_ENTRY_IDX GENMASK(3, 2)
+#define RTW89_H2C_BA_CAM_W0_TID GENMASK(7, 4)
+#define RTW89_H2C_BA_CAM_W0_MACID GENMASK(15, 8)
+#define RTW89_H2C_BA_CAM_W0_BMAP_SIZE GENMASK(19, 16)
+#define RTW89_H2C_BA_CAM_W0_SSN GENMASK(31, 20)
+#define RTW89_H2C_BA_CAM_W1_UID GENMASK(7, 0)
+#define RTW89_H2C_BA_CAM_W1_STD_EN BIT(8)
+#define RTW89_H2C_BA_CAM_W1_BAND BIT(9)
+#define RTW89_H2C_BA_CAM_W1_ENTRY_IDX_V1 GENMASK(31, 28)
+
+struct rtw89_h2c_ba_cam_v1 {
+ __le32 w0;
+ __le32 w1;
+} __packed;
-static inline void SET_BA_CAM_BAND(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c + 1, val, BIT(9));
-}
+#define RTW89_H2C_BA_CAM_V1_W0_VALID BIT(0)
+#define RTW89_H2C_BA_CAM_V1_W0_INIT_REQ BIT(1)
+#define RTW89_H2C_BA_CAM_V1_W0_TID_MASK GENMASK(7, 4)
+#define RTW89_H2C_BA_CAM_V1_W0_MACID_MASK GENMASK(15, 8)
+#define RTW89_H2C_BA_CAM_V1_W0_BMAP_SIZE_MASK GENMASK(19, 16)
+#define RTW89_H2C_BA_CAM_V1_W0_SSN_MASK GENMASK(31, 20)
+#define RTW89_H2C_BA_CAM_V1_W1_UID_VALUE_MASK GENMASK(7, 0)
+#define RTW89_H2C_BA_CAM_V1_W1_STD_ENTRY_EN BIT(8)
+#define RTW89_H2C_BA_CAM_V1_W1_BAND_SEL BIT(9)
+#define RTW89_H2C_BA_CAM_V1_W1_MLD_EN BIT(10)
+#define RTW89_H2C_BA_CAM_V1_W1_ENTRY_IDX_MASK GENMASK(31, 24)
+
+struct rtw89_h2c_ba_cam_init {
+ __le32 w0;
+} __packed;
-static inline void SET_BA_CAM_ENTRY_IDX_V1(void *h2c, u32 val)
-{
- le32p_replace_bits((__le32 *)h2c + 1, val, GENMASK(31, 28));
-}
+#define RTW89_H2C_BA_CAM_INIT_USERS_MASK GENMASK(7, 0)
+#define RTW89_H2C_BA_CAM_INIT_OFFSET_MASK GENMASK(19, 12)
+#define RTW89_H2C_BA_CAM_INIT_BAND_SEL BIT(24)
static inline void SET_LPS_PARM_MACID(void *h2c, u32 val)
{
@@ -1846,6 +2036,17 @@ static inline void SET_LPS_PARM_LASTRPWM(void *h2c, u32 val)
le32p_replace_bits((__le32 *)(h2c) + 1, val, GENMASK(15, 8));
}
+struct rtw89_h2c_lps_ch_info {
+ struct {
+ u8 pri_ch;
+ u8 central_ch;
+ u8 bw;
+ u8 band;
+ } __packed info[2];
+
+ __le32 mlo_dbcc_mode_lps;
+} __packed;
+
static inline void RTW89_SET_FWCMD_CPU_EXCEPTION_TYPE(void *cmd, u32 val)
{
le32p_replace_bits((__le32 *)cmd, val, GENMASK(31, 0));
@@ -2128,9 +2329,15 @@ enum rtw89_btc_btf_set {
SET_BT_IGNORE_WLAN_ACT,
SET_BT_TX_PWR,
SET_BT_LNA_CONSTRAIN,
- SET_BT_GOLDEN_RX_RANGE,
+ SET_BT_QUERY_DEV_LIST,
+ SET_BT_QUERY_DEV_INFO,
SET_BT_PSD_REPORT,
SET_H2C_TEST,
+ SET_IOFLD_RF,
+ SET_IOFLD_BB,
+ SET_IOFLD_MAC,
+ SET_IOFLD_SCBD,
+ SET_H2C_MACRO,
SET_MAX1,
};
@@ -2144,6 +2351,10 @@ enum rtw89_btc_cxdrvinfo {
CXDRVINFO_CTRL,
CXDRVINFO_SCAN,
CXDRVINFO_TRX, /* WL traffic to WL fw */
+ CXDRVINFO_TXPWR,
+ CXDRVINFO_FDDT,
+ CXDRVINFO_MLO,
+ CXDRVINFO_OSI,
CXDRVINFO_MAX,
};
@@ -2170,7 +2381,19 @@ struct rtw89_h2c_cxhdr {
u8 len;
} __packed;
+struct rtw89_h2c_cxhdr_v7 {
+ u8 type;
+ u8 ver;
+ u8 len;
+} __packed;
+
+struct rtw89_h2c_cxctrl_v7 {
+ struct rtw89_h2c_cxhdr_v7 hdr;
+ struct rtw89_btc_ctrl_v7 ctrl;
+} __packed;
+
#define H2C_LEN_CXDRVHDR sizeof(struct rtw89_h2c_cxhdr)
+#define H2C_LEN_CXDRVHDR_V7 sizeof(struct rtw89_h2c_cxhdr_v7)
struct rtw89_h2c_cxinit {
struct rtw89_h2c_cxhdr hdr;
@@ -2204,6 +2427,11 @@ struct rtw89_h2c_cxinit {
#define RTW89_H2C_CXINIT_INFO_CX_OTHER BIT(3)
#define RTW89_H2C_CXINIT_INFO_BT_ONLY BIT(4)
+struct rtw89_h2c_cxinit_v7 {
+ struct rtw89_h2c_cxhdr_v7 hdr;
+ struct rtw89_btc_init_info_v7 init;
+} __packed;
+
static inline void RTW89_SET_FWCMD_CXROLE_CONNECT_CNT(void *cmd, u8 val)
{
u8p_replace_bits((u8 *)(cmd) + 2, val, GENMASK(7, 0));
@@ -2569,135 +2797,91 @@ static inline void RTW89_SET_FWCMD_PACKET_OFLD_PKT_LENGTH(void *cmd, u32 val)
le32p_replace_bits((__le32 *)((u8 *)(cmd)), val, GENMASK(31, 16));
}
-static inline void RTW89_SET_FWCMD_SCANOFLD_CH_NUM(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd)), val, GENMASK(7, 0));
-}
-
-static inline void RTW89_SET_FWCMD_SCANOFLD_CH_SIZE(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd)), val, GENMASK(15, 8));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_PERIOD(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd)), val, GENMASK(7, 0));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_DWELL(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd)), val, GENMASK(15, 8));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_CENTER_CH(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd)), val, GENMASK(23, 16));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_PRI_CH(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd)), val, GENMASK(31, 24));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_BW(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, GENMASK(2, 0));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_ACTION(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, GENMASK(7, 3));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_NUM_PKT(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, GENMASK(11, 8));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_TX(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, BIT(12));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_PAUSE_DATA(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, BIT(13));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_BAND(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, GENMASK(15, 14));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_PKT_ID(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, GENMASK(23, 16));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_DFS(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, BIT(24));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_TX_NULL(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, BIT(25));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_RANDOM(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, BIT(26));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_CFG_TX(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 4), val, BIT(27));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_PKT0(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 8), val, GENMASK(7, 0));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_PKT1(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 8), val, GENMASK(15, 8));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_PKT2(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 8), val, GENMASK(23, 16));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_PKT3(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 8), val, GENMASK(31, 24));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_PKT4(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 12), val, GENMASK(7, 0));
-}
-
-static inline void RTW89_SET_FWCMD_CHINFO_PKT5(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 12), val, GENMASK(15, 8));
-}
+struct rtw89_h2c_chinfo_elem {
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+ __le32 w3;
+ __le32 w4;
+ __le32 w5;
+ __le32 w6;
+} __packed;
-static inline void RTW89_SET_FWCMD_CHINFO_PKT6(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 12), val, GENMASK(23, 16));
-}
+#define RTW89_H2C_CHINFO_W0_PERIOD GENMASK(7, 0)
+#define RTW89_H2C_CHINFO_W0_DWELL GENMASK(15, 8)
+#define RTW89_H2C_CHINFO_W0_CENTER_CH GENMASK(23, 16)
+#define RTW89_H2C_CHINFO_W0_PRI_CH GENMASK(31, 24)
+#define RTW89_H2C_CHINFO_W1_BW GENMASK(2, 0)
+#define RTW89_H2C_CHINFO_W1_ACTION GENMASK(7, 3)
+#define RTW89_H2C_CHINFO_W1_NUM_PKT GENMASK(11, 8)
+#define RTW89_H2C_CHINFO_W1_TX BIT(12)
+#define RTW89_H2C_CHINFO_W1_PAUSE_DATA BIT(13)
+#define RTW89_H2C_CHINFO_W1_BAND GENMASK(15, 14)
+#define RTW89_H2C_CHINFO_W1_PKT_ID GENMASK(23, 16)
+#define RTW89_H2C_CHINFO_W1_DFS BIT(24)
+#define RTW89_H2C_CHINFO_W1_TX_NULL BIT(25)
+#define RTW89_H2C_CHINFO_W1_RANDOM BIT(26)
+#define RTW89_H2C_CHINFO_W1_CFG_TX BIT(27)
+#define RTW89_H2C_CHINFO_W2_PKT0 GENMASK(7, 0)
+#define RTW89_H2C_CHINFO_W2_PKT1 GENMASK(15, 8)
+#define RTW89_H2C_CHINFO_W2_PKT2 GENMASK(23, 16)
+#define RTW89_H2C_CHINFO_W2_PKT3 GENMASK(31, 24)
+#define RTW89_H2C_CHINFO_W3_PKT4 GENMASK(7, 0)
+#define RTW89_H2C_CHINFO_W3_PKT5 GENMASK(15, 8)
+#define RTW89_H2C_CHINFO_W3_PKT6 GENMASK(23, 16)
+#define RTW89_H2C_CHINFO_W3_PKT7 GENMASK(31, 24)
+#define RTW89_H2C_CHINFO_W4_POWER_IDX GENMASK(15, 0)
+
+struct rtw89_h2c_chinfo_elem_be {
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+ __le32 w3;
+ __le32 w4;
+ __le32 w5;
+ __le32 w6;
+} __packed;
-static inline void RTW89_SET_FWCMD_CHINFO_PKT7(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 12), val, GENMASK(31, 24));
-}
+#define RTW89_H2C_CHINFO_BE_W0_PERIOD GENMASK(7, 0)
+#define RTW89_H2C_CHINFO_BE_W0_DWELL GENMASK(15, 8)
+#define RTW89_H2C_CHINFO_BE_W0_CENTER_CH GENMASK(23, 16)
+#define RTW89_H2C_CHINFO_BE_W0_PRI_CH GENMASK(31, 24)
+#define RTW89_H2C_CHINFO_BE_W1_BW GENMASK(2, 0)
+#define RTW89_H2C_CHINFO_BE_W1_CH_BAND GENMASK(4, 3)
+#define RTW89_H2C_CHINFO_BE_W1_DFS BIT(5)
+#define RTW89_H2C_CHINFO_BE_W1_PAUSE_DATA BIT(6)
+#define RTW89_H2C_CHINFO_BE_W1_TX_NULL BIT(7)
+#define RTW89_H2C_CHINFO_BE_W1_RANDOM BIT(8)
+#define RTW89_H2C_CHINFO_BE_W1_NOTIFY GENMASK(13, 9)
+#define RTW89_H2C_CHINFO_BE_W1_PROBE BIT(14)
+#define RTW89_H2C_CHINFO_BE_W1_EARLY_LEAVE_CRIT GENMASK(17, 15)
+#define RTW89_H2C_CHINFO_BE_W1_CHKPT_TIMER GENMASK(31, 24)
+#define RTW89_H2C_CHINFO_BE_W2_EARLY_LEAVE_TIME GENMASK(7, 0)
+#define RTW89_H2C_CHINFO_BE_W2_EARLY_LEAVE_TH GENMASK(15, 8)
+#define RTW89_H2C_CHINFO_BE_W2_TX_PKT_CTRL GENMASK(31, 16)
+#define RTW89_H2C_CHINFO_BE_W3_PKT0 GENMASK(7, 0)
+#define RTW89_H2C_CHINFO_BE_W3_PKT1 GENMASK(15, 8)
+#define RTW89_H2C_CHINFO_BE_W3_PKT2 GENMASK(23, 16)
+#define RTW89_H2C_CHINFO_BE_W3_PKT3 GENMASK(31, 24)
+#define RTW89_H2C_CHINFO_BE_W4_PKT4 GENMASK(7, 0)
+#define RTW89_H2C_CHINFO_BE_W4_PKT5 GENMASK(15, 8)
+#define RTW89_H2C_CHINFO_BE_W4_PKT6 GENMASK(23, 16)
+#define RTW89_H2C_CHINFO_BE_W4_PKT7 GENMASK(31, 24)
+#define RTW89_H2C_CHINFO_BE_W5_SW_DEF GENMASK(7, 0)
+#define RTW89_H2C_CHINFO_BE_W5_FW_PROBE0_SSIDS GENMASK(31, 16)
+#define RTW89_H2C_CHINFO_BE_W6_FW_PROBE0_SHORTSSIDS GENMASK(15, 0)
+#define RTW89_H2C_CHINFO_BE_W6_FW_PROBE0_BSSIDS GENMASK(31, 16)
+
+struct rtw89_h2c_chinfo {
+ u8 ch_num;
+ u8 elem_size;
+ u8 arg;
+ u8 rsvd0;
+ struct rtw89_h2c_chinfo_elem elem[] __counted_by(ch_num);
+} __packed;
-static inline void RTW89_SET_FWCMD_CHINFO_POWER_IDX(void *cmd, u32 val)
-{
- le32p_replace_bits((__le32 *)((u8 *)(cmd) + 16), val, GENMASK(15, 0));
-}
+#define RTW89_H2C_CHINFO_ARG_MAC_IDX_MASK BIT(0)
+#define RTW89_H2C_CHINFO_ARG_APPEND_MASK BIT(1)
struct rtw89_h2c_scanofld {
__le32 w0;
@@ -2726,6 +2910,79 @@ struct rtw89_h2c_scanofld {
#define RTW89_H2C_SCANOFLD_W2_NORM_PD GENMASK(15, 0)
#define RTW89_H2C_SCANOFLD_W2_SLOW_PD GENMASK(23, 16)
+struct rtw89_h2c_scanofld_be_macc_role {
+ __le32 w0;
+} __packed;
+
+#define RTW89_H2C_SCANOFLD_BE_MACC_ROLE_W0_BAND GENMASK(1, 0)
+#define RTW89_H2C_SCANOFLD_BE_MACC_ROLE_W0_PORT GENMASK(4, 2)
+#define RTW89_H2C_SCANOFLD_BE_MACC_ROLE_W0_MACID GENMASK(23, 8)
+#define RTW89_H2C_SCANOFLD_BE_MACC_ROLE_W0_OPCH_END GENMASK(31, 24)
+
+struct rtw89_h2c_scanofld_be_opch {
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+ __le32 w3;
+} __packed;
+
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W0_MACID GENMASK(15, 0)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W0_BAND GENMASK(17, 16)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W0_PORT GENMASK(20, 18)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W0_POLICY GENMASK(22, 21)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W0_TXNULL BIT(23)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W0_POLICY_VAL GENMASK(31, 24)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W1_DURATION GENMASK(7, 0)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W1_CH_BAND GENMASK(9, 8)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W1_BW GENMASK(12, 10)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W1_NOTIFY GENMASK(14, 13)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W1_PRI_CH GENMASK(23, 16)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W1_CENTRAL_CH GENMASK(31, 24)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W2_PKTS_CTRL GENMASK(7, 0)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W2_SW_DEF GENMASK(15, 8)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W2_SS GENMASK(18, 16)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT0 GENMASK(7, 0)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT1 GENMASK(15, 8)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT2 GENMASK(23, 16)
+#define RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT3 GENMASK(31, 24)
+
+struct rtw89_h2c_scanofld_be {
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+ __le32 w3;
+ __le32 w4;
+ __le32 w5;
+ __le32 w6;
+ __le32 w7;
+ struct rtw89_h2c_scanofld_be_macc_role role[];
+} __packed;
+
+#define RTW89_H2C_SCANOFLD_BE_W0_OP GENMASK(1, 0)
+#define RTW89_H2C_SCANOFLD_BE_W0_SCAN_MODE GENMASK(3, 2)
+#define RTW89_H2C_SCANOFLD_BE_W0_REPEAT GENMASK(5, 4)
+#define RTW89_H2C_SCANOFLD_BE_W0_NOTIFY_END BIT(6)
+#define RTW89_H2C_SCANOFLD_BE_W0_LEARN_CH BIT(7)
+#define RTW89_H2C_SCANOFLD_BE_W0_MACID GENMASK(23, 8)
+#define RTW89_H2C_SCANOFLD_BE_W0_PORT GENMASK(26, 24)
+#define RTW89_H2C_SCANOFLD_BE_W0_BAND GENMASK(28, 27)
+#define RTW89_H2C_SCANOFLD_BE_W1_NUM_MACC_ROLE GENMASK(7, 0)
+#define RTW89_H2C_SCANOFLD_BE_W1_NUM_OP GENMASK(15, 8)
+#define RTW89_H2C_SCANOFLD_BE_W1_NORM_PD GENMASK(31, 16)
+#define RTW89_H2C_SCANOFLD_BE_W2_SLOW_PD GENMASK(15, 0)
+#define RTW89_H2C_SCANOFLD_BE_W2_NORM_CY GENMASK(23, 16)
+#define RTW89_H2C_SCANOFLD_BE_W2_OPCH_END GENMASK(31, 24)
+#define RTW89_H2C_SCANOFLD_BE_W3_NUM_SSID GENMASK(7, 0)
+#define RTW89_H2C_SCANOFLD_BE_W3_NUM_SHORT_SSID GENMASK(15, 8)
+#define RTW89_H2C_SCANOFLD_BE_W3_NUM_BSSID GENMASK(23, 16)
+#define RTW89_H2C_SCANOFLD_BE_W3_PROBEID GENMASK(31, 24)
+#define RTW89_H2C_SCANOFLD_BE_W4_PROBE_5G GENMASK(7, 0)
+#define RTW89_H2C_SCANOFLD_BE_W4_PROBE_6G GENMASK(15, 8)
+#define RTW89_H2C_SCANOFLD_BE_W4_DELAY_START GENMASK(31, 16)
+#define RTW89_H2C_SCANOFLD_BE_W5_MLO_MODE GENMASK(31, 0)
+#define RTW89_H2C_SCANOFLD_BE_W6_CHAN_PROHIB_LOW GENMASK(31, 0)
+#define RTW89_H2C_SCANOFLD_BE_W7_CHAN_PROHIB_HIGH GENMASK(31, 0)
+
static inline void RTW89_SET_FWCMD_P2P_MACID(void *cmd, u32 val)
{
le32p_replace_bits((__le32 *)cmd, val, GENMASK(7, 0));
@@ -3160,6 +3417,225 @@ inline void RTW89_SET_FWCMD_MCC_SET_DURATION_DURATION_Y(void *cmd, u32 val)
le32p_replace_bits((__le32 *)cmd + 4, val, GENMASK(31, 0));
}
+enum rtw89_h2c_mrc_sch_types {
+ RTW89_H2C_MRC_SCH_BAND0_ONLY = 0,
+ RTW89_H2C_MRC_SCH_BAND1_ONLY = 1,
+ RTW89_H2C_MRC_SCH_DUAL_BAND = 2,
+};
+
+enum rtw89_h2c_mrc_role_types {
+ RTW89_H2C_MRC_ROLE_WIFI = 0,
+ RTW89_H2C_MRC_ROLE_BT = 1,
+ RTW89_H2C_MRC_ROLE_EMPTY = 2,
+};
+
+#define RTW89_MAC_MRC_MAX_ADD_SLOT_NUM 3
+#define RTW89_MAC_MRC_MAX_ADD_ROLE_NUM_PER_SLOT 1 /* before MLO */
+
+struct rtw89_fw_mrc_add_slot_arg {
+ u16 duration; /* unit: TU */
+ bool courtesy_en;
+ u8 courtesy_period;
+ u8 courtesy_target; /* slot idx */
+
+ unsigned int role_num;
+ struct {
+ enum rtw89_h2c_mrc_role_types role_type;
+ bool is_master;
+ bool en_tx_null;
+ enum rtw89_band band;
+ enum rtw89_bandwidth bw;
+ u8 macid;
+ u8 central_ch;
+ u8 primary_ch;
+ u8 null_early; /* unit: TU */
+
+ /* if MLD, for macid: [0, chip::support_mld_num)
+ * otherwise, for macid: [0, 32)
+ */
+ u32 macid_main_bitmap;
+ /* for MLD, bit X maps to macid: X + chip::support_mld_num */
+ u32 macid_paired_bitmap;
+ } roles[RTW89_MAC_MRC_MAX_ADD_ROLE_NUM_PER_SLOT];
+};
+
+struct rtw89_fw_mrc_add_arg {
+ u8 sch_idx;
+ enum rtw89_h2c_mrc_sch_types sch_type;
+ bool btc_in_sch;
+
+ unsigned int slot_num;
+ struct rtw89_fw_mrc_add_slot_arg slots[RTW89_MAC_MRC_MAX_ADD_SLOT_NUM];
+};
+
+struct rtw89_h2c_mrc_add_role {
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+ __le32 macid_main_bitmap;
+ __le32 macid_paired_bitmap;
+} __packed;
+
+#define RTW89_H2C_MRC_ADD_ROLE_W0_MACID GENMASK(15, 0)
+#define RTW89_H2C_MRC_ADD_ROLE_W0_ROLE_TYPE GENMASK(23, 16)
+#define RTW89_H2C_MRC_ADD_ROLE_W0_IS_MASTER BIT(24)
+#define RTW89_H2C_MRC_ADD_ROLE_W0_IS_ALT_ROLE BIT(25)
+#define RTW89_H2C_MRC_ADD_ROLE_W0_TX_NULL_EN BIT(26)
+#define RTW89_H2C_MRC_ADD_ROLE_W0_ROLE_ALT_EN BIT(27)
+#define RTW89_H2C_MRC_ADD_ROLE_W1_CENTRAL_CH_SEG GENMASK(7, 0)
+#define RTW89_H2C_MRC_ADD_ROLE_W1_PRI_CH GENMASK(15, 8)
+#define RTW89_H2C_MRC_ADD_ROLE_W1_BW GENMASK(19, 16)
+#define RTW89_H2C_MRC_ADD_ROLE_W1_CH_BAND_TYPE GENMASK(21, 20)
+#define RTW89_H2C_MRC_ADD_ROLE_W1_RFK_BY_PASS BIT(22)
+#define RTW89_H2C_MRC_ADD_ROLE_W1_CAN_BTC BIT(23)
+#define RTW89_H2C_MRC_ADD_ROLE_W1_NULL_EARLY GENMASK(31, 24)
+#define RTW89_H2C_MRC_ADD_ROLE_W2_ALT_PERIOD GENMASK(7, 0)
+#define RTW89_H2C_MRC_ADD_ROLE_W2_ALT_ROLE_TYPE GENMASK(15, 8)
+#define RTW89_H2C_MRC_ADD_ROLE_W2_ALT_ROLE_MACID GENMASK(23, 16)
+
+struct rtw89_h2c_mrc_add_slot {
+ __le32 w0;
+ __le32 w1;
+ struct rtw89_h2c_mrc_add_role roles[];
+} __packed;
+
+#define RTW89_H2C_MRC_ADD_SLOT_W0_DURATION GENMASK(15, 0)
+#define RTW89_H2C_MRC_ADD_SLOT_W0_COURTESY_EN BIT(17)
+#define RTW89_H2C_MRC_ADD_SLOT_W0_ROLE_NUM GENMASK(31, 24)
+#define RTW89_H2C_MRC_ADD_SLOT_W1_COURTESY_PERIOD GENMASK(7, 0)
+#define RTW89_H2C_MRC_ADD_SLOT_W1_COURTESY_TARGET GENMASK(15, 8)
+
+struct rtw89_h2c_mrc_add {
+ __le32 w0;
+ /* Logically append flexible struct rtw89_h2c_mrc_add_slot, but there
+ * are other flexible array inside it. We cannot access them correctly
+ * through this struct. So, in case misusing, we don't really declare
+ * it here.
+ */
+} __packed;
+
+#define RTW89_H2C_MRC_ADD_W0_SCH_IDX GENMASK(3, 0)
+#define RTW89_H2C_MRC_ADD_W0_SCH_TYPE GENMASK(7, 4)
+#define RTW89_H2C_MRC_ADD_W0_SLOT_NUM GENMASK(15, 8)
+#define RTW89_H2C_MRC_ADD_W0_BTC_IN_SCH BIT(16)
+
+enum rtw89_h2c_mrc_start_actions {
+ RTW89_H2C_MRC_START_ACTION_START_NEW = 0,
+ RTW89_H2C_MRC_START_ACTION_REPLACE_OLD = 1,
+};
+
+struct rtw89_fw_mrc_start_arg {
+ u8 sch_idx;
+ u8 old_sch_idx;
+ u64 start_tsf;
+ enum rtw89_h2c_mrc_start_actions action;
+};
+
+struct rtw89_h2c_mrc_start {
+ __le32 w0;
+ __le32 start_tsf_low;
+ __le32 start_tsf_high;
+} __packed;
+
+#define RTW89_H2C_MRC_START_W0_SCH_IDX GENMASK(3, 0)
+#define RTW89_H2C_MRC_START_W0_OLD_SCH_IDX GENMASK(7, 4)
+#define RTW89_H2C_MRC_START_W0_ACTION GENMASK(15, 8)
+
+struct rtw89_h2c_mrc_del {
+ __le32 w0;
+} __packed;
+
+#define RTW89_H2C_MRC_DEL_W0_SCH_IDX GENMASK(3, 0)
+#define RTW89_H2C_MRC_DEL_W0_DEL_ALL BIT(4)
+#define RTW89_H2C_MRC_DEL_W0_STOP_ONLY BIT(5)
+#define RTW89_H2C_MRC_DEL_W0_SPECIFIC_ROLE_EN BIT(6)
+#define RTW89_H2C_MRC_DEL_W0_STOP_SLOT_IDX GENMASK(15, 8)
+#define RTW89_H2C_MRC_DEL_W0_SPECIFIC_ROLE_MACID GENMASK(31, 16)
+
+#define RTW89_MAC_MRC_MAX_REQ_TSF_NUM 2
+
+struct rtw89_fw_mrc_req_tsf_arg {
+ unsigned int num;
+ struct {
+ u8 band;
+ u8 port;
+ } infos[RTW89_MAC_MRC_MAX_REQ_TSF_NUM];
+};
+
+struct rtw89_h2c_mrc_req_tsf {
+ u8 req_tsf_num;
+ u8 infos[] __counted_by(req_tsf_num);
+} __packed;
+
+#define RTW89_H2C_MRC_REQ_TSF_INFO_BAND GENMASK(3, 0)
+#define RTW89_H2C_MRC_REQ_TSF_INFO_PORT GENMASK(7, 4)
+
+enum rtw89_h2c_mrc_upd_bitmap_actions {
+ RTW89_H2C_MRC_UPD_BITMAP_ACTION_DEL = 0,
+ RTW89_H2C_MRC_UPD_BITMAP_ACTION_ADD = 1,
+};
+
+struct rtw89_fw_mrc_upd_bitmap_arg {
+ u8 sch_idx;
+ u8 macid;
+ u8 client_macid;
+ enum rtw89_h2c_mrc_upd_bitmap_actions action;
+};
+
+struct rtw89_h2c_mrc_upd_bitmap {
+ __le32 w0;
+ __le32 w1;
+} __packed;
+
+#define RTW89_H2C_MRC_UPD_BITMAP_W0_SCH_IDX GENMASK(3, 0)
+#define RTW89_H2C_MRC_UPD_BITMAP_W0_ACTION BIT(4)
+#define RTW89_H2C_MRC_UPD_BITMAP_W0_MACID GENMASK(31, 16)
+#define RTW89_H2C_MRC_UPD_BITMAP_W1_CLIENT_MACID GENMASK(15, 0)
+
+struct rtw89_fw_mrc_sync_arg {
+ u8 offset; /* unit: TU */
+ struct {
+ u8 band;
+ u8 port;
+ } src, dest;
+};
+
+struct rtw89_h2c_mrc_sync {
+ __le32 w0;
+ __le32 w1;
+} __packed;
+
+#define RTW89_H2C_MRC_SYNC_W0_SYNC_EN BIT(0)
+#define RTW89_H2C_MRC_SYNC_W0_SRC_PORT GENMASK(11, 8)
+#define RTW89_H2C_MRC_SYNC_W0_SRC_BAND GENMASK(15, 12)
+#define RTW89_H2C_MRC_SYNC_W0_DEST_PORT GENMASK(19, 16)
+#define RTW89_H2C_MRC_SYNC_W0_DEST_BAND GENMASK(23, 20)
+#define RTW89_H2C_MRC_SYNC_W1_OFFSET GENMASK(15, 0)
+
+struct rtw89_fw_mrc_upd_duration_arg {
+ u8 sch_idx;
+ u64 start_tsf;
+
+ unsigned int slot_num;
+ struct {
+ u8 slot_idx;
+ u16 duration; /* unit: TU */
+ } slots[RTW89_MAC_MRC_MAX_ADD_SLOT_NUM];
+};
+
+struct rtw89_h2c_mrc_upd_duration {
+ __le32 w0;
+ __le32 start_tsf_low;
+ __le32 start_tsf_high;
+ __le32 slots[];
+} __packed;
+
+#define RTW89_H2C_MRC_UPD_DURATION_W0_SCH_IDX GENMASK(3, 0)
+#define RTW89_H2C_MRC_UPD_DURATION_W0_SLOT_NUM GENMASK(15, 8)
+#define RTW89_H2C_MRC_UPD_DURATION_W0_BTC_IN_SCH BIT(16)
+#define RTW89_H2C_MRC_UPD_DURATION_SLOT_SLOT_IDX GENMASK(7, 0)
+#define RTW89_H2C_MRC_UPD_DURATION_SLOT_DURATION GENMASK(31, 16)
+
#define RTW89_C2H_HEADER_LEN 8
struct rtw89_c2h_hdr {
@@ -3275,20 +3751,29 @@ struct rtw89_c2h_ra_rpt {
#define RTW89_GET_MAC_C2H_PKTOFLD_LEN(c2h) \
le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(31, 16))
-#define RTW89_GET_MAC_C2H_SCANOFLD_PRI_CH(c2h) \
- le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(7, 0))
-#define RTW89_GET_MAC_C2H_SCANOFLD_RSP(c2h) \
- le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(19, 16))
-#define RTW89_GET_MAC_C2H_SCANOFLD_STATUS(c2h) \
- le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(23, 20))
-#define RTW89_GET_MAC_C2H_ACTUAL_PERIOD(c2h) \
- le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(31, 24))
-#define RTW89_GET_MAC_C2H_SCANOFLD_TX_FAIL(c2h) \
- le32_get_bits(*((const __le32 *)(c2h) + 5), GENMASK(3, 0))
-#define RTW89_GET_MAC_C2H_SCANOFLD_AIR_DENSITY(c2h) \
- le32_get_bits(*((const __le32 *)(c2h) + 5), GENMASK(7, 4))
-#define RTW89_GET_MAC_C2H_SCANOFLD_BAND(c2h) \
- le32_get_bits(*((const __le32 *)(c2h) + 5), GENMASK(25, 24))
+struct rtw89_c2h_scanofld {
+ __le32 w0;
+ __le32 w1;
+ __le32 w2;
+ __le32 w3;
+ __le32 w4;
+ __le32 w5;
+ __le32 w6;
+ __le32 w7;
+} __packed;
+
+#define RTW89_C2H_SCANOFLD_W2_PRI_CH GENMASK(7, 0)
+#define RTW89_C2H_SCANOFLD_W2_RSN GENMASK(19, 16)
+#define RTW89_C2H_SCANOFLD_W2_STATUS GENMASK(23, 20)
+#define RTW89_C2H_SCANOFLD_W2_PERIOD GENMASK(31, 24)
+#define RTW89_C2H_SCANOFLD_W5_TX_FAIL GENMASK(3, 0)
+#define RTW89_C2H_SCANOFLD_W5_AIR_DENSITY GENMASK(7, 4)
+#define RTW89_C2H_SCANOFLD_W5_BAND GENMASK(25, 24)
+#define RTW89_C2H_SCANOFLD_W5_MAC_IDX BIT(26)
+#define RTW89_C2H_SCANOFLD_W6_SW_DEF GENMASK(7, 0)
+#define RTW89_C2H_SCANOFLD_W6_EXPECT_PERIOD GENMASK(15, 8)
+#define RTW89_C2H_SCANOFLD_W6_FW_DEF GENMASK(23, 16)
+#define RTW89_C2H_SCANOFLD_W7_REPORT_TSF GENMASK(31, 0)
#define RTW89_GET_MAC_C2H_MCC_RCV_ACK_GROUP(c2h) \
le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(1, 0))
@@ -3339,6 +3824,36 @@ static_assert(sizeof(struct rtw89_mac_mcc_tsf_rpt) <= RTW89_COMPLETION_BUF_SIZE)
#define RTW89_GET_MAC_C2H_MCC_STATUS_RPT_TSF_HIGH(c2h) \
le32_get_bits(*((const __le32 *)(c2h) + 4), GENMASK(31, 0))
+struct rtw89_mac_mrc_tsf_rpt {
+ unsigned int num;
+ u64 tsfs[RTW89_MAC_MRC_MAX_REQ_TSF_NUM];
+};
+
+static_assert(sizeof(struct rtw89_mac_mrc_tsf_rpt) <= RTW89_COMPLETION_BUF_SIZE);
+
+struct rtw89_c2h_mrc_tsf_rpt_info {
+ __le32 tsf_low;
+ __le32 tsf_high;
+} __packed;
+
+struct rtw89_c2h_mrc_tsf_rpt {
+ struct rtw89_c2h_hdr hdr;
+ __le32 w2;
+ struct rtw89_c2h_mrc_tsf_rpt_info infos[];
+} __packed;
+
+#define RTW89_C2H_MRC_TSF_RPT_W2_REQ_TSF_NUM GENMASK(7, 0)
+
+struct rtw89_c2h_mrc_status_rpt {
+ struct rtw89_c2h_hdr hdr;
+ __le32 w2;
+ __le32 tsf_low;
+ __le32 tsf_high;
+} __packed;
+
+#define RTW89_C2H_MRC_STATUS_RPT_W2_STATUS GENMASK(5, 0)
+#define RTW89_C2H_MRC_STATUS_RPT_W2_SCH_IDX GENMASK(7, 6)
+
struct rtw89_c2h_pkt_ofld_rsp {
__le32 w0;
__le32 w1;
@@ -3647,6 +4162,9 @@ struct rtw89_fw_h2c_rf_reg_info {
#define H2C_FUNC_MAC_BCN_UPD 0x5
#define H2C_FUNC_MAC_DCTLINFO_UD_V1 0x9
#define H2C_FUNC_MAC_CCTLINFO_UD_V1 0xa
+#define H2C_FUNC_MAC_DCTLINFO_UD_V2 0xc
+#define H2C_FUNC_MAC_BCN_UPD_BE 0xd
+#define H2C_FUNC_MAC_CCTLINFO_UD_G7 0x11
/* CLASS 6 - Address CAM */
#define H2C_CL_MAC_ADDR_CAM_UPDATE 0x6
@@ -3672,6 +4190,8 @@ enum rtw89_fw_ofld_h2c_func {
H2C_FUNC_CFG_BCNFLTR = 0x1e,
H2C_FUNC_OFLD_RSSI = 0x1f,
H2C_FUNC_OFLD_TP = 0x20,
+ H2C_FUNC_MAC_MACID_PAUSE_SLEEP = 0x28,
+ H2C_FUNC_SCANOFLD_BE = 0x2c,
NUM_OF_RTW89_FW_OFLD_H2C_FUNC,
};
@@ -3683,6 +4203,14 @@ enum rtw89_fw_ofld_h2c_func {
RTW89_FW_OFLD_WAIT_COND(RTW89_PKT_OFLD_WAIT_TAG(pkt_id, pkt_op), \
H2C_FUNC_PACKET_OFLD)
+#define RTW89_SCANOFLD_WAIT_COND_ADD_CH RTW89_FW_OFLD_WAIT_COND(0, H2C_FUNC_ADD_SCANOFLD_CH)
+
+#define RTW89_SCANOFLD_WAIT_COND_START RTW89_FW_OFLD_WAIT_COND(0, H2C_FUNC_SCANOFLD)
+#define RTW89_SCANOFLD_WAIT_COND_STOP RTW89_FW_OFLD_WAIT_COND(1, H2C_FUNC_SCANOFLD)
+#define RTW89_SCANOFLD_BE_WAIT_COND_START RTW89_FW_OFLD_WAIT_COND(0, H2C_FUNC_SCANOFLD_BE)
+#define RTW89_SCANOFLD_BE_WAIT_COND_STOP RTW89_FW_OFLD_WAIT_COND(1, H2C_FUNC_SCANOFLD_BE)
+
+
/* CLASS 10 - Security CAM */
#define H2C_CL_MAC_SEC_CAM 0xa
#define H2C_FUNC_MAC_SEC_UPD 0x1
@@ -3690,6 +4218,8 @@ enum rtw89_fw_ofld_h2c_func {
/* CLASS 12 - BA CAM */
#define H2C_CL_BA_CAM 0xc
#define H2C_FUNC_MAC_BA_CAM 0x0
+#define H2C_FUNC_MAC_BA_CAM_V1 0x1
+#define H2C_FUNC_MAC_BA_CAM_INIT 0x2
/* CLASS 14 - MCC */
#define H2C_CL_MCC 0xe
@@ -3710,15 +4240,50 @@ enum rtw89_mcc_h2c_func {
#define RTW89_MCC_WAIT_COND(group, func) \
((group) * NUM_OF_RTW89_MCC_H2C_FUNC + (func))
+/* CLASS 24 - MRC */
+#define H2C_CL_MRC 0x18
+enum rtw89_mrc_h2c_func {
+ H2C_FUNC_MRC_REQ_TSF = 0x0,
+ H2C_FUNC_ADD_MRC = 0x1,
+ H2C_FUNC_START_MRC = 0x2,
+ H2C_FUNC_DEL_MRC = 0x3,
+ H2C_FUNC_MRC_SYNC = 0x4,
+ H2C_FUNC_MRC_UPD_DURATION = 0x5,
+ H2C_FUNC_MRC_UPD_BITMAP = 0x6,
+
+ NUM_OF_RTW89_MRC_H2C_FUNC,
+};
+
+/* can consider MRC's sch_idx as MCC's group */
+#define RTW89_MRC_WAIT_COND(sch_idx, func) \
+ ((sch_idx) * NUM_OF_RTW89_MRC_H2C_FUNC + (func))
+
+#define RTW89_MRC_WAIT_COND_REQ_TSF \
+ RTW89_MRC_WAIT_COND(0 /* don't care */, H2C_FUNC_MRC_REQ_TSF)
+
#define H2C_CAT_OUTSRC 0x2
#define H2C_CL_OUTSRC_RA 0x1
#define H2C_FUNC_OUTSRC_RA_MACIDCFG 0x0
+#define H2C_CL_OUTSRC_DM 0x2
+#define H2C_FUNC_FW_LPS_CH_INFO 0xb
+
#define H2C_CL_OUTSRC_RF_REG_A 0x8
#define H2C_CL_OUTSRC_RF_REG_B 0x9
#define H2C_CL_OUTSRC_RF_FW_NOTIFY 0xa
#define H2C_FUNC_OUTSRC_RF_GET_MCCCH 0x2
+#define H2C_CL_OUTSRC_RF_FW_RFK 0xb
+
+enum rtw89_rfk_offload_h2c_func {
+ H2C_FUNC_RFK_TSSI_OFFLOAD = 0x0,
+ H2C_FUNC_RFK_IQK_OFFLOAD = 0x1,
+ H2C_FUNC_RFK_DPK_OFFLOAD = 0x3,
+ H2C_FUNC_RFK_TXGAPK_OFFLOAD = 0x4,
+ H2C_FUNC_RFK_DACK_OFFLOAD = 0x5,
+ H2C_FUNC_RFK_RXDCK_OFFLOAD = 0x6,
+ H2C_FUNC_RFK_PRE_NOTIFY = 0x8,
+};
struct rtw89_fw_h2c_rf_get_mccch {
__le32 ch_0;
@@ -3729,6 +4294,114 @@ struct rtw89_fw_h2c_rf_get_mccch {
__le32 current_band_type;
} __packed;
+#define NUM_OF_RTW89_FW_RFK_PATH 2
+#define NUM_OF_RTW89_FW_RFK_TBL 3
+
+struct rtw89_fw_h2c_rfk_pre_info {
+ struct {
+ __le32 ch[NUM_OF_RTW89_FW_RFK_PATH][NUM_OF_RTW89_FW_RFK_TBL];
+ __le32 band[NUM_OF_RTW89_FW_RFK_PATH][NUM_OF_RTW89_FW_RFK_TBL];
+ } __packed dbcc;
+
+ __le32 mlo_mode;
+ struct {
+ __le32 cur_ch[NUM_OF_RTW89_FW_RFK_PATH];
+ __le32 cur_band[NUM_OF_RTW89_FW_RFK_PATH];
+ } __packed tbl;
+
+ __le32 phy_idx;
+ __le32 cur_band;
+ __le32 cur_bw;
+ __le32 cur_center_ch;
+
+ __le32 ktbl_sel0;
+ __le32 ktbl_sel1;
+ __le32 rfmod0;
+ __le32 rfmod1;
+
+ __le32 mlo_1_1;
+ __le32 rfe_type;
+ __le32 drv_mode;
+
+ struct {
+ __le32 ch[NUM_OF_RTW89_FW_RFK_PATH];
+ __le32 band[NUM_OF_RTW89_FW_RFK_PATH];
+ } __packed mlo;
+} __packed;
+
+struct rtw89_h2c_rf_tssi {
+ __le16 len;
+ u8 phy;
+ u8 ch;
+ u8 bw;
+ u8 band;
+ u8 hwtx_en;
+ u8 cv;
+ s8 curr_tssi_cck_de[2];
+ s8 curr_tssi_cck_de_20m[2];
+ s8 curr_tssi_cck_de_40m[2];
+ s8 curr_tssi_efuse_cck_de[2];
+ s8 curr_tssi_ofdm_de[2];
+ s8 curr_tssi_ofdm_de_20m[2];
+ s8 curr_tssi_ofdm_de_40m[2];
+ s8 curr_tssi_ofdm_de_80m[2];
+ s8 curr_tssi_ofdm_de_160m[2];
+ s8 curr_tssi_ofdm_de_320m[2];
+ s8 curr_tssi_efuse_ofdm_de[2];
+ s8 curr_tssi_ofdm_de_diff_20m[2];
+ s8 curr_tssi_ofdm_de_diff_80m[2];
+ s8 curr_tssi_ofdm_de_diff_160m[2];
+ s8 curr_tssi_ofdm_de_diff_320m[2];
+ s8 curr_tssi_trim_de[2];
+ u8 pg_thermal[2];
+ u8 ftable[2][128];
+ u8 tssi_mode;
+} __packed;
+
+struct rtw89_h2c_rf_iqk {
+ __le32 phy_idx;
+ __le32 dbcc;
+} __packed;
+
+struct rtw89_h2c_rf_dpk {
+ u8 len;
+ u8 phy;
+ u8 dpk_enable;
+ u8 kpath;
+ u8 cur_band;
+ u8 cur_bw;
+ u8 cur_ch;
+ u8 dpk_dbg_en;
+} __packed;
+
+struct rtw89_h2c_rf_txgapk {
+ u8 len;
+ u8 ktype;
+ u8 phy;
+ u8 kpath;
+ u8 band;
+ u8 bw;
+ u8 ch;
+ u8 cv;
+} __packed;
+
+struct rtw89_h2c_rf_dack {
+ __le32 len;
+ __le32 phy;
+ __le32 type;
+} __packed;
+
+struct rtw89_h2c_rf_rxdck {
+ u8 len;
+ u8 phy;
+ u8 is_afe;
+ u8 kpath;
+ u8 cur_band;
+ u8 cur_bw;
+ u8 cur_ch;
+ u8 rxdck_dbg_en;
+} __packed;
+
enum rtw89_rf_log_type {
RTW89_RF_RUN_LOG = 0,
RTW89_RF_RPT_LOG = 1,
@@ -3800,6 +4473,12 @@ struct rtw89_c2h_rf_txgapk_rpt_log {
u8 rsv1;
} __packed;
+struct rtw89_c2h_rfk_report {
+ struct rtw89_c2h_hdr hdr;
+ u8 state; /* enum rtw89_rfk_report_state */
+ u8 version;
+} __packed;
+
#define RTW89_FW_RSVD_PLE_SIZE 0x800
#define RTW89_FW_BACKTRACE_INFO_SIZE 8
@@ -3830,21 +4509,39 @@ void rtw89_h2c_pkt_set_hdr(struct rtw89_dev *rtwdev, struct sk_buff *skb,
u8 type, u8 cat, u8 class, u8 func,
bool rack, bool dack, u32 len);
int rtw89_fw_h2c_default_cmac_tbl(struct rtw89_dev *rtwdev,
- struct rtw89_vif *rtwvif);
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta);
+int rtw89_fw_h2c_default_cmac_tbl_g7(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta);
+int rtw89_fw_h2c_default_dmac_tbl_v2(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta);
int rtw89_fw_h2c_assoc_cmac_tbl(struct rtw89_dev *rtwdev,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta);
+int rtw89_fw_h2c_assoc_cmac_tbl_g7(struct rtw89_dev *rtwdev,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta);
+int rtw89_fw_h2c_ampdu_cmac_tbl_g7(struct rtw89_dev *rtwdev,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta);
int rtw89_fw_h2c_txtime_cmac_tbl(struct rtw89_dev *rtwdev,
struct rtw89_sta *rtwsta);
int rtw89_fw_h2c_txpath_cmac_tbl(struct rtw89_dev *rtwdev,
struct rtw89_sta *rtwsta);
int rtw89_fw_h2c_update_beacon(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif);
+int rtw89_fw_h2c_update_beacon_be(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif);
int rtw89_fw_h2c_cam(struct rtw89_dev *rtwdev, struct rtw89_vif *vif,
struct rtw89_sta *rtwsta, const u8 *scan_mac_addr);
int rtw89_fw_h2c_dctl_sec_cam_v1(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif,
struct rtw89_sta *rtwsta);
+int rtw89_fw_h2c_dctl_sec_cam_v2(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta);
void rtw89_fw_c2h_irqsafe(struct rtw89_dev *rtwdev, struct sk_buff *c2h);
void rtw89_fw_c2h_work(struct work_struct *work);
int rtw89_fw_h2c_role_maintain(struct rtw89_dev *rtwdev,
@@ -3866,25 +4563,41 @@ int rtw89_fw_h2c_rssi_offload(struct rtw89_dev *rtwdev,
struct rtw89_rx_phy_ppdu *phy_ppdu);
int rtw89_fw_h2c_tp_offload(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif);
int rtw89_fw_h2c_ra(struct rtw89_dev *rtwdev, struct rtw89_ra_info *ra, bool csi);
-int rtw89_fw_h2c_cxdrv_init(struct rtw89_dev *rtwdev);
-int rtw89_fw_h2c_cxdrv_role(struct rtw89_dev *rtwdev);
-int rtw89_fw_h2c_cxdrv_role_v1(struct rtw89_dev *rtwdev);
-int rtw89_fw_h2c_cxdrv_role_v2(struct rtw89_dev *rtwdev);
-int rtw89_fw_h2c_cxdrv_ctrl(struct rtw89_dev *rtwdev);
-int rtw89_fw_h2c_cxdrv_trx(struct rtw89_dev *rtwdev);
-int rtw89_fw_h2c_cxdrv_rfk(struct rtw89_dev *rtwdev);
+int rtw89_fw_h2c_cxdrv_init(struct rtw89_dev *rtwdev, u8 type);
+int rtw89_fw_h2c_cxdrv_init_v7(struct rtw89_dev *rtwdev, u8 type);
+int rtw89_fw_h2c_cxdrv_role(struct rtw89_dev *rtwdev, u8 type);
+int rtw89_fw_h2c_cxdrv_role_v1(struct rtw89_dev *rtwdev, u8 type);
+int rtw89_fw_h2c_cxdrv_role_v2(struct rtw89_dev *rtwdev, u8 type);
+int rtw89_fw_h2c_cxdrv_ctrl(struct rtw89_dev *rtwdev, u8 type);
+int rtw89_fw_h2c_cxdrv_ctrl_v7(struct rtw89_dev *rtwdev, u8 type);
+int rtw89_fw_h2c_cxdrv_trx(struct rtw89_dev *rtwdev, u8 type);
+int rtw89_fw_h2c_cxdrv_rfk(struct rtw89_dev *rtwdev, u8 type);
int rtw89_fw_h2c_del_pkt_offload(struct rtw89_dev *rtwdev, u8 id);
int rtw89_fw_h2c_add_pkt_offload(struct rtw89_dev *rtwdev, u8 *id,
struct sk_buff *skb_ofld);
-int rtw89_fw_h2c_scan_list_offload(struct rtw89_dev *rtwdev, int len,
+int rtw89_fw_h2c_scan_list_offload(struct rtw89_dev *rtwdev, int ch_num,
struct list_head *chan_list);
+int rtw89_fw_h2c_scan_list_offload_be(struct rtw89_dev *rtwdev, int ch_num,
+ struct list_head *chan_list);
int rtw89_fw_h2c_scan_offload(struct rtw89_dev *rtwdev,
struct rtw89_scan_option *opt,
struct rtw89_vif *vif);
+int rtw89_fw_h2c_scan_offload_be(struct rtw89_dev *rtwdev,
+ struct rtw89_scan_option *opt,
+ struct rtw89_vif *vif);
int rtw89_fw_h2c_rf_reg(struct rtw89_dev *rtwdev,
struct rtw89_fw_h2c_rf_reg_info *info,
u16 len, u8 page);
int rtw89_fw_h2c_rf_ntfy_mcc(struct rtw89_dev *rtwdev);
+int rtw89_fw_h2c_rf_pre_ntfy(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx);
+int rtw89_fw_h2c_rf_tssi(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx,
+ enum rtw89_tssi_mode tssi_mode);
+int rtw89_fw_h2c_rf_iqk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx);
+int rtw89_fw_h2c_rf_dpk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx);
+int rtw89_fw_h2c_rf_txgapk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx);
+int rtw89_fw_h2c_rf_dack(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx);
+int rtw89_fw_h2c_rf_rxdck(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx);
int rtw89_fw_h2c_raw_with_hdr(struct rtw89_dev *rtwdev,
u8 h2c_class, u8 h2c_func, u8 *buf, u16 len,
bool rack, bool dack);
@@ -3898,10 +4611,16 @@ void rtw89_fw_release_general_pkt_list_vif(struct rtw89_dev *rtwdev,
void rtw89_fw_release_general_pkt_list(struct rtw89_dev *rtwdev, bool notify_fw);
int rtw89_fw_h2c_ba_cam(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta,
bool valid, struct ieee80211_ampdu_params *params);
+int rtw89_fw_h2c_ba_cam_v1(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta,
+ bool valid, struct ieee80211_ampdu_params *params);
void rtw89_fw_h2c_init_dynamic_ba_cam_v0_ext(struct rtw89_dev *rtwdev);
+int rtw89_fw_h2c_init_ba_cam_users(struct rtw89_dev *rtwdev, u8 users,
+ u8 offset, u8 mac_idx);
int rtw89_fw_h2c_lps_parm(struct rtw89_dev *rtwdev,
struct rtw89_lps_parm *lps_param);
+int rtw89_fw_h2c_lps_ch_info(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif);
struct sk_buff *rtw89_fw_h2c_alloc_skb_with_hdr(struct rtw89_dev *rtwdev, u32 len);
struct sk_buff *rtw89_fw_h2c_alloc_skb_no_hdr(struct rtw89_dev *rtwdev, u32 len);
int rtw89_fw_msg_reg(struct rtw89_dev *rtwdev,
@@ -3916,6 +4635,10 @@ void rtw89_hw_scan_complete(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
int rtw89_hw_scan_offload(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
bool enable);
void rtw89_hw_scan_abort(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif);
+int rtw89_hw_scan_add_chan_list(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif, bool connected);
+int rtw89_hw_scan_add_chan_list_be(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif, bool connected);
int rtw89_fw_h2c_trigger_cpu_exception(struct rtw89_dev *rtwdev);
int rtw89_fw_h2c_pkt_drop(struct rtw89_dev *rtwdev,
const struct rtw89_pkt_drop_params *params);
@@ -3956,6 +4679,20 @@ int rtw89_fw_h2c_mcc_sync(struct rtw89_dev *rtwdev, u8 group, u8 source,
u8 target, u8 offset);
int rtw89_fw_h2c_mcc_set_duration(struct rtw89_dev *rtwdev,
const struct rtw89_fw_mcc_duration *p);
+int rtw89_fw_h2c_mrc_add(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_add_arg *arg);
+int rtw89_fw_h2c_mrc_start(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_start_arg *arg);
+int rtw89_fw_h2c_mrc_del(struct rtw89_dev *rtwdev, u8 sch_idx);
+int rtw89_fw_h2c_mrc_req_tsf(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_req_tsf_arg *arg,
+ struct rtw89_mac_mrc_tsf_rpt *rpt);
+int rtw89_fw_h2c_mrc_upd_bitmap(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_upd_bitmap_arg *arg);
+int rtw89_fw_h2c_mrc_sync(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_sync_arg *arg);
+int rtw89_fw_h2c_mrc_upd_duration(struct rtw89_dev *rtwdev,
+ const struct rtw89_fw_mrc_upd_duration_arg *arg);
static inline void rtw89_fw_h2c_init_ba_cam(struct rtw89_dev *rtwdev)
{
@@ -3965,6 +4702,65 @@ static inline void rtw89_fw_h2c_init_ba_cam(struct rtw89_dev *rtwdev)
rtw89_fw_h2c_init_dynamic_ba_cam_v0_ext(rtwdev);
}
+static inline int rtw89_chip_h2c_default_cmac_tbl(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+
+ return chip->ops->h2c_default_cmac_tbl(rtwdev, rtwvif, rtwsta);
+}
+
+static inline int rtw89_chip_h2c_default_dmac_tbl(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_sta *rtwsta)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+
+ if (chip->ops->h2c_default_dmac_tbl)
+ return chip->ops->h2c_default_dmac_tbl(rtwdev, rtwvif, rtwsta);
+
+ return 0;
+}
+
+static inline int rtw89_chip_h2c_update_beacon(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+
+ return chip->ops->h2c_update_beacon(rtwdev, rtwvif);
+}
+
+static inline int rtw89_chip_h2c_assoc_cmac_tbl(struct rtw89_dev *rtwdev,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+
+ return chip->ops->h2c_assoc_cmac_tbl(rtwdev, vif, sta);
+}
+
+static inline int rtw89_chip_h2c_ampdu_cmac_tbl(struct rtw89_dev *rtwdev,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+
+ if (chip->ops->h2c_ampdu_cmac_tbl)
+ return chip->ops->h2c_ampdu_cmac_tbl(rtwdev, vif, sta);
+
+ return 0;
+}
+
+static inline
+int rtw89_chip_h2c_ba_cam(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta,
+ bool valid, struct ieee80211_ampdu_params *params)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+
+ return chip->ops->h2c_ba_cam(rtwdev, rtwsta, valid, params);
+}
+
/* must consider compatibility; don't insert new in the mid */
struct rtw89_fw_txpwr_byrate_entry {
u8 band;
diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c
index c485ef2cc3d3..aa5b396b5d2b 100644
--- a/drivers/net/wireless/realtek/rtw89/mac.c
+++ b/drivers/net/wireless/realtek/rtw89/mac.c
@@ -1625,7 +1625,7 @@ const struct rtw89_mac_size_set rtw89_mac_size = {
.wde_size19 = {RTW89_WDE_PG_64, 3328, 0,},
/* PCIE */
.ple_size0 = {RTW89_PLE_PG_128, 1520, 16,},
- .ple_size0_v1 = {RTW89_PLE_PG_128, 2672, 256, 212992,},
+ .ple_size0_v1 = {RTW89_PLE_PG_128, 2688, 240, 212992,},
.ple_size3_v1 = {RTW89_PLE_PG_128, 2928, 0, 212992,},
/* DLFW */
.ple_size4 = {RTW89_PLE_PG_128, 64, 1472,},
@@ -1650,8 +1650,8 @@ const struct rtw89_mac_size_set rtw89_mac_size = {
.wde_qt17 = {0, 0, 0, 0,},
/* 8852C PCIE SCC */
.wde_qt18 = {3228, 60, 0, 40,},
- .ple_qt0 = {320, 0, 32, 16, 13, 13, 292, 0, 32, 18, 1, 4, 0,},
- .ple_qt1 = {320, 0, 32, 16, 1944, 1944, 2223, 0, 1963, 1949, 1, 1935, 0,},
+ .ple_qt0 = {320, 320, 32, 16, 13, 13, 292, 292, 64, 18, 1, 4, 0,},
+ .ple_qt1 = {320, 320, 32, 16, 1316, 1316, 1595, 1595, 1367, 1321, 1, 1307, 0,},
/* PCIE SCC */
.ple_qt4 = {264, 0, 16, 20, 26, 13, 356, 0, 32, 40, 8,},
/* PCIE SCC */
@@ -1677,7 +1677,7 @@ const struct rtw89_mac_size_set rtw89_mac_size = {
.ple_qt_52b_wow = {147, 0, 16, 20, 157, 13, 133, 0, 172, 14, 24, 0,},
/* 8851B PCIE WOW */
.ple_qt_51b_wow = {147, 0, 16, 20, 157, 13, 133, 0, 172, 14, 24, 0,},
- .ple_rsvd_qt0 = {2, 112, 56, 6, 6, 6, 6, 0, 0, 62,},
+ .ple_rsvd_qt0 = {2, 107, 107, 6, 6, 6, 6, 0, 0, 0,},
.ple_rsvd_qt1 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
.rsvd0_size0 = {212992, 0,},
.rsvd1_size0 = {587776, 2048,},
@@ -2025,6 +2025,9 @@ void rtw89_mac_hw_mgnt_sec(struct rtw89_dev *rtwdev, bool enable)
{
u32 msk32 = B_AX_UC_MGNT_DEC | B_AX_BMC_MGNT_DEC;
+ if (rtwdev->chip->chip_gen != RTW89_CHIP_AX)
+ return;
+
if (enable)
rtw89_write32_set(rtwdev, R_AX_SEC_ENG_CTRL, msk32);
else
@@ -2537,6 +2540,9 @@ static int spatial_reuse_init_ax(struct rtw89_dev *rtwdev, u8 mac_idx)
reg = rtw89_mac_reg_by_idx(rtwdev, R_AX_RX_SR_CTRL, mac_idx);
rtw89_write8_clr(rtwdev, reg, B_AX_SR_EN);
+ reg = rtw89_mac_reg_by_idx(rtwdev, R_AX_BSSID_SRC_CTRL, mac_idx);
+ rtw89_write8_set(rtwdev, reg, B_AX_PLCP_SRC_EN);
+
return 0;
}
@@ -3192,13 +3198,11 @@ static int set_cpuio_ax(struct rtw89_dev *rtwdev,
return 0;
}
-int rtw89_mac_dle_quota_change(struct rtw89_dev *rtwdev, enum rtw89_qta_mode mode)
+int rtw89_mac_dle_quota_change(struct rtw89_dev *rtwdev, enum rtw89_qta_mode mode,
+ bool band1_en)
{
const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
const struct rtw89_dle_mem *cfg;
- struct rtw89_cpuio_ctrl ctrl_para = {0};
- u16 pkt_id;
- int ret;
cfg = get_dle_mem_cfg(rtwdev, mode);
if (!cfg) {
@@ -3213,6 +3217,16 @@ int rtw89_mac_dle_quota_change(struct rtw89_dev *rtwdev, enum rtw89_qta_mode mod
dle_quota_cfg(rtwdev, cfg, INVALID_QT_WCPU);
+ return mac->dle_quota_change(rtwdev, band1_en);
+}
+
+static int dle_quota_change_ax(struct rtw89_dev *rtwdev, bool band1_en)
+{
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
+ struct rtw89_cpuio_ctrl ctrl_para = {0};
+ u16 pkt_id;
+ int ret;
+
ret = mac->dle_buf_req(rtwdev, 0x20, true, &pkt_id);
if (ret) {
rtw89_err(rtwdev, "[ERR]WDE DLE buf req\n");
@@ -3301,7 +3315,7 @@ static int band1_enable_ax(struct rtw89_dev *rtwdev)
return ret;
}
- ret = rtw89_mac_dle_quota_change(rtwdev, rtwdev->mac.qta_mode);
+ ret = rtw89_mac_dle_quota_change(rtwdev, rtwdev->mac.qta_mode, true);
if (ret) {
rtw89_err(rtwdev, "[ERR]DLE quota change %d\n", ret);
return ret;
@@ -3676,6 +3690,28 @@ static int trx_init_ax(struct rtw89_dev *rtwdev)
return 0;
}
+static int rtw89_mac_feat_init(struct rtw89_dev *rtwdev)
+{
+#define BACAM_1024BMP_OCC_ENTRY 4
+#define BACAM_MAX_RU_SUPPORT_B0_STA 1
+#define BACAM_MAX_RU_SUPPORT_B1_STA 1
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+ u8 users, offset;
+
+ if (chip->bacam_ver != RTW89_BACAM_V1)
+ return 0;
+
+ offset = 0;
+ users = BACAM_MAX_RU_SUPPORT_B0_STA;
+ rtw89_fw_h2c_init_ba_cam_users(rtwdev, users, offset, RTW89_MAC_0);
+
+ offset += users * BACAM_1024BMP_OCC_ENTRY;
+ users = BACAM_MAX_RU_SUPPORT_B1_STA;
+ rtw89_fw_h2c_init_ba_cam_users(rtwdev, users, offset, RTW89_MAC_1);
+
+ return 0;
+}
+
static void rtw89_disable_fw_watchdog(struct rtw89_dev *rtwdev)
{
enum rtw89_core_chip_id chip_id = rtwdev->chip->chip_id;
@@ -3910,6 +3946,10 @@ int rtw89_mac_init(struct rtw89_dev *rtwdev)
if (ret)
goto fail;
+ ret = rtw89_mac_feat_init(rtwdev);
+ if (ret)
+ goto fail;
+
if (rtwdev->hci.ops->mac_post_init) {
ret = rtwdev->hci.ops->mac_post_init(rtwdev);
if (ret)
@@ -4000,6 +4040,9 @@ static const struct rtw89_port_reg rtw89_port_base_ax = {
.mbssid = R_AX_MBSSID_CTRL,
.mbssid_drop = R_AX_MBSSID_DROP_0,
.tsf_sync = R_AX_PORT0_TSF_SYNC,
+ .ptcl_dbg = R_AX_PTCL_DBG,
+ .ptcl_dbg_info = R_AX_PTCL_DBG_INFO,
+ .bcn_drop_all = R_AX_BCN_DROP_ALL0,
.hiq_win = {R_AX_P0MB_HGQ_WINDOW_CFG_0, R_AX_PORT_HGQ_WINDOW_CFG,
R_AX_PORT_HGQ_WINDOW_CFG + 1, R_AX_PORT_HGQ_WINDOW_CFG + 2,
R_AX_PORT_HGQ_WINDOW_CFG + 3},
@@ -4008,13 +4051,15 @@ static const struct rtw89_port_reg rtw89_port_base_ax = {
static void rtw89_mac_check_packet_ctrl(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif, u8 type)
{
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
+ const struct rtw89_port_reg *p = mac->port_base;
u8 mask = B_AX_PTCL_DBG_INFO_MASK_BY_PORT(rtwvif->port);
u32 reg_info, reg_ctrl;
u32 val;
int ret;
- reg_info = rtw89_mac_reg_by_idx(rtwdev, R_AX_PTCL_DBG_INFO, rtwvif->mac_idx);
- reg_ctrl = rtw89_mac_reg_by_idx(rtwdev, R_AX_PTCL_DBG, rtwvif->mac_idx);
+ reg_info = rtw89_mac_reg_by_idx(rtwdev, p->ptcl_dbg_info, rtwvif->mac_idx);
+ reg_ctrl = rtw89_mac_reg_by_idx(rtwdev, p->ptcl_dbg, rtwvif->mac_idx);
rtw89_write32_mask(rtwdev, reg_ctrl, B_AX_PTCL_DBG_SEL_MASK, type);
rtw89_write32_set(rtwdev, reg_ctrl, B_AX_PTCL_DBG_EN);
@@ -4031,7 +4076,7 @@ static void rtw89_mac_bcn_drop(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvi
const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
const struct rtw89_port_reg *p = mac->port_base;
- rtw89_write32_set(rtwdev, R_AX_BCN_DROP_ALL0, BIT(rtwvif->port));
+ rtw89_write32_set(rtwdev, p->bcn_drop_all, BIT(rtwvif->port));
rtw89_write32_port_mask(rtwdev, rtwvif, p->tbtt_prohib, B_AX_TBTT_SETUP_MASK, 1);
rtw89_write32_port_mask(rtwdev, rtwvif, p->bcn_area, B_AX_BCN_MSK_AREA_MASK, 0);
rtw89_write32_port_mask(rtwdev, rtwvif, p->tbtt_prohib, B_AX_TBTT_HOLD_MASK, 0);
@@ -4044,9 +4089,9 @@ static void rtw89_mac_bcn_drop(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvi
if (rtwvif->port == RTW89_PORT_0)
rtw89_mac_check_packet_ctrl(rtwdev, rtwvif, AX_PTCL_DBG_BCNQ_NUM1);
- rtw89_write32_clr(rtwdev, R_AX_BCN_DROP_ALL0, BIT(rtwvif->port));
+ rtw89_write32_clr(rtwdev, p->bcn_drop_all, BIT(rtwvif->port));
rtw89_write32_port_clr(rtwdev, rtwvif, p->port_cfg, B_AX_TBTT_PROHIB_EN);
- fsleep(2);
+ fsleep(2000);
}
#define BCN_INTERVAL 100
@@ -4159,13 +4204,11 @@ static void rtw89_mac_port_cfg_rx_sw(struct rtw89_dev *rtwdev,
rtw89_write32_port_clr(rtwdev, rtwvif, p->port_cfg, bit);
}
-static void rtw89_mac_port_cfg_rx_sync(struct rtw89_dev *rtwdev,
- struct rtw89_vif *rtwvif)
+void rtw89_mac_port_cfg_rx_sync(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif, bool en)
{
const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
const struct rtw89_port_reg *p = mac->port_base;
- bool en = rtwvif->net_type == RTW89_NET_TYPE_INFRA ||
- rtwvif->net_type == RTW89_NET_TYPE_AD_HOC;
if (en)
rtw89_write32_port_set(rtwdev, rtwvif, p->port_cfg, B_AX_TSF_UDT_EN);
@@ -4173,6 +4216,15 @@ static void rtw89_mac_port_cfg_rx_sync(struct rtw89_dev *rtwdev,
rtw89_write32_port_clr(rtwdev, rtwvif, p->port_cfg, B_AX_TSF_UDT_EN);
}
+static void rtw89_mac_port_cfg_rx_sync_by_nettype(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif)
+{
+ bool en = rtwvif->net_type == RTW89_NET_TYPE_INFRA ||
+ rtwvif->net_type == RTW89_NET_TYPE_AD_HOC;
+
+ rtw89_mac_port_cfg_rx_sync(rtwdev, rtwvif, en);
+}
+
static void rtw89_mac_port_cfg_tx_sw(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif, bool en)
{
@@ -4471,7 +4523,11 @@ int rtw89_mac_vif_init(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif)
if (ret)
return ret;
- ret = rtw89_fw_h2c_default_cmac_tbl(rtwdev, rtwvif);
+ ret = rtw89_chip_h2c_default_cmac_tbl(rtwdev, rtwvif, NULL);
+ if (ret)
+ return ret;
+
+ ret = rtw89_chip_h2c_default_dmac_tbl(rtwdev, rtwvif, NULL);
if (ret)
return ret;
@@ -4508,7 +4564,7 @@ int rtw89_mac_port_update(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif)
rtw89_mac_port_cfg_net_type(rtwdev, rtwvif);
rtw89_mac_port_cfg_bcn_prct(rtwdev, rtwvif);
rtw89_mac_port_cfg_rx_sw(rtwdev, rtwvif);
- rtw89_mac_port_cfg_rx_sync(rtwdev, rtwvif);
+ rtw89_mac_port_cfg_rx_sync_by_nettype(rtwdev, rtwvif);
rtw89_mac_port_cfg_tx_sw_by_nettype(rtwdev, rtwvif);
rtw89_mac_port_cfg_bcn_intv(rtwdev, rtwvif);
rtw89_mac_port_cfg_hiq_win(rtwdev, rtwvif);
@@ -4571,6 +4627,7 @@ void rtw89_mac_set_he_obss_narrow_bw_ru(struct rtw89_dev *rtwdev,
struct ieee80211_vif *vif)
{
struct rtw89_vif *rtwvif = (struct rtw89_vif *)vif->drv_priv;
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
struct ieee80211_hw *hw = rtwdev->hw;
bool tolerated = true;
u32 reg;
@@ -4578,18 +4635,19 @@ void rtw89_mac_set_he_obss_narrow_bw_ru(struct rtw89_dev *rtwdev,
if (!vif->bss_conf.he_support || vif->type != NL80211_IFTYPE_STATION)
return;
- if (!(vif->bss_conf.chandef.chan->flags & IEEE80211_CHAN_RADAR))
+ if (!(vif->bss_conf.chanreq.oper.chan->flags & IEEE80211_CHAN_RADAR))
return;
- cfg80211_bss_iter(hw->wiphy, &vif->bss_conf.chandef,
+ cfg80211_bss_iter(hw->wiphy, &vif->bss_conf.chanreq.oper,
rtw89_mac_check_he_obss_narrow_bw_ru_iter,
&tolerated);
- reg = rtw89_mac_reg_by_idx(rtwdev, R_AX_RXTRIG_TEST_USER_2, rtwvif->mac_idx);
+ reg = rtw89_mac_reg_by_idx(rtwdev, mac->narrow_bw_ru_dis.addr,
+ rtwvif->mac_idx);
if (tolerated)
- rtw89_write32_clr(rtwdev, reg, B_AX_RXTRIG_RU26_DIS);
+ rtw89_write32_clr(rtwdev, reg, mac->narrow_bw_ru_dis.mask);
else
- rtw89_write32_set(rtwdev, reg, B_AX_RXTRIG_RU26_DIS);
+ rtw89_write32_set(rtwdev, reg, mac->narrow_bw_ru_dis.mask);
}
void rtw89_mac_stop_ap(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif)
@@ -4641,35 +4699,52 @@ static bool rtw89_is_op_chan(struct rtw89_dev *rtwdev, u8 band, u8 channel)
}
static void
-rtw89_mac_c2h_scanofld_rsp(struct rtw89_dev *rtwdev, struct sk_buff *c2h,
+rtw89_mac_c2h_scanofld_rsp(struct rtw89_dev *rtwdev, struct sk_buff *skb,
u32 len)
{
+ const struct rtw89_c2h_scanofld *c2h =
+ (const struct rtw89_c2h_scanofld *)skb->data;
struct ieee80211_vif *vif = rtwdev->scan_info.scanning_vif;
struct rtw89_vif *rtwvif = vif_to_rtwvif_safe(vif);
struct rtw89_chan new;
- u8 reason, status, tx_fail, band, actual_period;
- u32 last_chan = rtwdev->scan_info.last_chan_idx;
+ u8 reason, status, tx_fail, band, actual_period, expect_period;
+ u32 last_chan = rtwdev->scan_info.last_chan_idx, report_tsf;
+ u8 mac_idx, sw_def, fw_def;
u16 chan;
int ret;
if (!rtwvif)
return;
- tx_fail = RTW89_GET_MAC_C2H_SCANOFLD_TX_FAIL(c2h->data);
- status = RTW89_GET_MAC_C2H_SCANOFLD_STATUS(c2h->data);
- chan = RTW89_GET_MAC_C2H_SCANOFLD_PRI_CH(c2h->data);
- reason = RTW89_GET_MAC_C2H_SCANOFLD_RSP(c2h->data);
- band = RTW89_GET_MAC_C2H_SCANOFLD_BAND(c2h->data);
- actual_period = RTW89_GET_MAC_C2H_ACTUAL_PERIOD(c2h->data);
+ tx_fail = le32_get_bits(c2h->w5, RTW89_C2H_SCANOFLD_W5_TX_FAIL);
+ status = le32_get_bits(c2h->w2, RTW89_C2H_SCANOFLD_W2_STATUS);
+ chan = le32_get_bits(c2h->w2, RTW89_C2H_SCANOFLD_W2_PRI_CH);
+ reason = le32_get_bits(c2h->w2, RTW89_C2H_SCANOFLD_W2_RSN);
+ band = le32_get_bits(c2h->w5, RTW89_C2H_SCANOFLD_W5_BAND);
+ actual_period = le32_get_bits(c2h->w2, RTW89_C2H_SCANOFLD_W2_PERIOD);
+ mac_idx = le32_get_bits(c2h->w5, RTW89_C2H_SCANOFLD_W5_MAC_IDX);
+
if (!(rtwdev->chip->support_bands & BIT(NL80211_BAND_6GHZ)))
band = chan > 14 ? RTW89_BAND_5G : RTW89_BAND_2G;
rtw89_debug(rtwdev, RTW89_DBG_HW_SCAN,
- "band: %d, chan: %d, reason: %d, status: %d, tx_fail: %d, actual: %d\n",
- band, chan, reason, status, tx_fail, actual_period);
+ "mac_idx[%d] band: %d, chan: %d, reason: %d, status: %d, tx_fail: %d, actual: %d\n",
+ mac_idx, band, chan, reason, status, tx_fail, actual_period);
+
+ if (rtwdev->chip->chip_gen == RTW89_CHIP_BE) {
+ sw_def = le32_get_bits(c2h->w6, RTW89_C2H_SCANOFLD_W6_SW_DEF);
+ expect_period = le32_get_bits(c2h->w6, RTW89_C2H_SCANOFLD_W6_EXPECT_PERIOD);
+ fw_def = le32_get_bits(c2h->w6, RTW89_C2H_SCANOFLD_W6_FW_DEF);
+ report_tsf = le32_get_bits(c2h->w7, RTW89_C2H_SCANOFLD_W7_REPORT_TSF);
+
+ rtw89_debug(rtwdev, RTW89_DBG_HW_SCAN,
+ "sw_def: %d, fw_def: %d, tsf: %x, expect: %d\n",
+ sw_def, fw_def, report_tsf, expect_period);
+ }
switch (reason) {
+ case RTW89_SCAN_LEAVE_OP_NOTIFY:
case RTW89_SCAN_LEAVE_CH_NOTIFY:
if (rtw89_is_op_chan(rtwdev, band, chan)) {
rtw89_mac_enable_beacon_for_ap_vifs(rtwdev, false);
@@ -4685,9 +4760,10 @@ rtw89_mac_c2h_scanofld_rsp(struct rtw89_dev *rtwdev, struct sk_buff *c2h,
rtw89_warn(rtwdev, "HW scan failed: %d\n", ret);
}
} else {
- rtw89_hw_scan_complete(rtwdev, vif, false);
+ rtw89_hw_scan_complete(rtwdev, vif, rtwdev->scan_info.abort);
}
break;
+ case RTW89_SCAN_ENTER_OP_NOTIFY:
case RTW89_SCAN_ENTER_CH_NOTIFY:
if (rtw89_is_op_chan(rtwdev, band, chan)) {
rtw89_assign_entity_chan(rtwdev, rtwvif->sub_entity_idx,
@@ -4807,8 +4883,13 @@ rtw89_mac_c2h_done_ack(struct rtw89_dev *rtwdev, struct sk_buff *skb_c2h, u32 le
default:
return;
case H2C_FUNC_ADD_SCANOFLD_CH:
+ cond = RTW89_SCANOFLD_WAIT_COND_ADD_CH;
+ break;
case H2C_FUNC_SCANOFLD:
- cond = RTW89_FW_OFLD_WAIT_COND(0, h2c_func);
+ cond = RTW89_SCANOFLD_WAIT_COND_START;
+ break;
+ case H2C_FUNC_SCANOFLD_BE:
+ cond = RTW89_SCANOFLD_BE_WAIT_COND_START;
break;
}
@@ -5021,6 +5102,84 @@ rtw89_mac_c2h_mcc_status_rpt(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32
rtw89_complete_cond(&rtwdev->mcc.wait, cond, &data);
}
+static void
+rtw89_mac_c2h_mrc_tsf_rpt(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 len)
+{
+ struct rtw89_wait_info *wait = &rtwdev->mcc.wait;
+ const struct rtw89_c2h_mrc_tsf_rpt *c2h_rpt;
+ struct rtw89_completion_data data = {};
+ struct rtw89_mac_mrc_tsf_rpt *rpt;
+ unsigned int i;
+
+ c2h_rpt = (const struct rtw89_c2h_mrc_tsf_rpt *)c2h->data;
+ rpt = (struct rtw89_mac_mrc_tsf_rpt *)data.buf;
+ rpt->num = min_t(u8, RTW89_MAC_MRC_MAX_REQ_TSF_NUM,
+ le32_get_bits(c2h_rpt->w2,
+ RTW89_C2H_MRC_TSF_RPT_W2_REQ_TSF_NUM));
+
+ for (i = 0; i < rpt->num; i++) {
+ u32 tsf_high = le32_to_cpu(c2h_rpt->infos[i].tsf_high);
+ u32 tsf_low = le32_to_cpu(c2h_rpt->infos[i].tsf_low);
+
+ rpt->tsfs[i] = (u64)tsf_high << 32 | tsf_low;
+
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MRC C2H TSF RPT: index %u> %llu\n",
+ i, rpt->tsfs[i]);
+ }
+
+ rtw89_complete_cond(wait, RTW89_MRC_WAIT_COND_REQ_TSF, &data);
+}
+
+static void
+rtw89_mac_c2h_mrc_status_rpt(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 len)
+{
+ struct rtw89_wait_info *wait = &rtwdev->mcc.wait;
+ const struct rtw89_c2h_mrc_status_rpt *c2h_rpt;
+ struct rtw89_completion_data data = {};
+ enum rtw89_mac_mrc_status status;
+ unsigned int cond;
+ bool next = false;
+ u32 tsf_high;
+ u32 tsf_low;
+ u8 sch_idx;
+ u8 func;
+
+ c2h_rpt = (const struct rtw89_c2h_mrc_status_rpt *)c2h->data;
+ sch_idx = le32_get_bits(c2h_rpt->w2, RTW89_C2H_MRC_STATUS_RPT_W2_SCH_IDX);
+ status = le32_get_bits(c2h_rpt->w2, RTW89_C2H_MRC_STATUS_RPT_W2_STATUS);
+ tsf_high = le32_to_cpu(c2h_rpt->tsf_high);
+ tsf_low = le32_to_cpu(c2h_rpt->tsf_low);
+
+ switch (status) {
+ case RTW89_MAC_MRC_START_SCH_OK:
+ func = H2C_FUNC_START_MRC;
+ break;
+ case RTW89_MAC_MRC_STOP_SCH_OK:
+ /* H2C_FUNC_DEL_MRC without STOP_ONLY, so wait for DEL_SCH_OK */
+ func = H2C_FUNC_DEL_MRC;
+ next = true;
+ break;
+ case RTW89_MAC_MRC_DEL_SCH_OK:
+ func = H2C_FUNC_DEL_MRC;
+ break;
+ default:
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "invalid MRC C2H STS RPT: status %d\n", status);
+ return;
+ }
+
+ rtw89_debug(rtwdev, RTW89_DBG_CHAN,
+ "MRC C2H STS RPT: sch_idx %d, status %d, tsf %llu\n",
+ sch_idx, status, (u64)tsf_high << 32 | tsf_low);
+
+ if (next)
+ return;
+
+ cond = RTW89_MRC_WAIT_COND(sch_idx, func);
+ rtw89_complete_cond(wait, cond, &data);
+}
+
static
void (* const rtw89_mac_c2h_ofld_handler[])(struct rtw89_dev *rtwdev,
struct sk_buff *c2h, u32 len) = {
@@ -5052,7 +5211,39 @@ void (* const rtw89_mac_c2h_mcc_handler[])(struct rtw89_dev *rtwdev,
[RTW89_MAC_C2H_FUNC_MCC_STATUS_RPT] = rtw89_mac_c2h_mcc_status_rpt,
};
-bool rtw89_mac_c2h_chk_atomic(struct rtw89_dev *rtwdev, u8 class, u8 func)
+static
+void (* const rtw89_mac_c2h_mrc_handler[])(struct rtw89_dev *rtwdev,
+ struct sk_buff *c2h, u32 len) = {
+ [RTW89_MAC_C2H_FUNC_MRC_TSF_RPT] = rtw89_mac_c2h_mrc_tsf_rpt,
+ [RTW89_MAC_C2H_FUNC_MRC_STATUS_RPT] = rtw89_mac_c2h_mrc_status_rpt,
+};
+
+static void rtw89_mac_c2h_scanofld_rsp_atomic(struct rtw89_dev *rtwdev,
+ struct sk_buff *skb)
+{
+ const struct rtw89_c2h_scanofld *c2h =
+ (const struct rtw89_c2h_scanofld *)skb->data;
+ struct rtw89_wait_info *fw_ofld_wait = &rtwdev->mac.fw_ofld_wait;
+ struct rtw89_completion_data data = {};
+ unsigned int cond;
+ u8 status, reason;
+
+ status = le32_get_bits(c2h->w2, RTW89_C2H_SCANOFLD_W2_STATUS);
+ reason = le32_get_bits(c2h->w2, RTW89_C2H_SCANOFLD_W2_RSN);
+ data.err = status != RTW89_SCAN_STATUS_SUCCESS;
+
+ if (reason == RTW89_SCAN_END_SCAN_NOTIFY) {
+ if (rtwdev->chip->chip_gen == RTW89_CHIP_BE)
+ cond = RTW89_SCANOFLD_BE_WAIT_COND_STOP;
+ else
+ cond = RTW89_SCANOFLD_WAIT_COND_STOP;
+
+ rtw89_complete_cond(fw_ofld_wait, cond, &data);
+ }
+}
+
+bool rtw89_mac_c2h_chk_atomic(struct rtw89_dev *rtwdev, struct sk_buff *c2h,
+ u8 class, u8 func)
{
switch (class) {
default:
@@ -5069,11 +5260,16 @@ bool rtw89_mac_c2h_chk_atomic(struct rtw89_dev *rtwdev, u8 class, u8 func)
switch (func) {
default:
return false;
+ case RTW89_MAC_C2H_FUNC_SCANOFLD_RSP:
+ rtw89_mac_c2h_scanofld_rsp_atomic(rtwdev, c2h);
+ return false;
case RTW89_MAC_C2H_FUNC_PKT_OFLD_RSP:
return true;
}
case RTW89_MAC_C2H_CLASS_MCC:
return true;
+ case RTW89_MAC_C2H_CLASS_MRC:
+ return true;
}
}
@@ -5096,6 +5292,10 @@ void rtw89_mac_c2h_handle(struct rtw89_dev *rtwdev, struct sk_buff *skb,
if (func < NUM_OF_RTW89_MAC_C2H_FUNC_MCC)
handler = rtw89_mac_c2h_mcc_handler[func];
break;
+ case RTW89_MAC_C2H_CLASS_MRC:
+ if (func < NUM_OF_RTW89_MAC_C2H_FUNC_MRC)
+ handler = rtw89_mac_c2h_mrc_handler[func];
+ break;
case RTW89_MAC_C2H_CLASS_FWDBG:
return;
default:
@@ -5115,8 +5315,7 @@ bool rtw89_mac_get_txpwr_cr_ax(struct rtw89_dev *rtwdev,
enum rtw89_phy_idx phy_idx,
u32 reg_base, u32 *cr)
{
- const struct rtw89_dle_mem *dle_mem = rtwdev->chip->dle_mem;
- enum rtw89_qta_mode mode = dle_mem->mode;
+ enum rtw89_qta_mode mode = rtwdev->mac.qta_mode;
u32 addr = rtw89_mac_reg_by_idx(rtwdev, reg_base, phy_idx);
if (addr < R_AX_PWR_RATE_CTRL || addr > CMAC1_END_ADDR_AX) {
@@ -5143,7 +5342,8 @@ error:
return false;
}
-int rtw89_mac_cfg_ppdu_status(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable)
+static
+int rtw89_mac_cfg_ppdu_status_ax(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable)
{
u32 reg = rtw89_mac_reg_by_idx(rtwdev, R_AX_PPDU_STAT, mac_idx);
int ret;
@@ -5166,7 +5366,6 @@ int rtw89_mac_cfg_ppdu_status(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable)
return 0;
}
-EXPORT_SYMBOL(rtw89_mac_cfg_ppdu_status);
void rtw89_mac_update_rts_threshold(struct rtw89_dev *rtwdev, u8 mac_idx)
{
@@ -5419,7 +5618,8 @@ int rtw89_mac_cfg_gnt_v1(struct rtw89_dev *rtwdev,
}
EXPORT_SYMBOL(rtw89_mac_cfg_gnt_v1);
-int rtw89_mac_cfg_plt(struct rtw89_dev *rtwdev, struct rtw89_mac_ax_plt *plt)
+static
+int rtw89_mac_cfg_plt_ax(struct rtw89_dev *rtwdev, struct rtw89_mac_ax_plt *plt)
{
u32 reg;
u16 val;
@@ -5515,7 +5715,7 @@ bool rtw89_mac_get_ctrl_path(struct rtw89_dev *rtwdev)
return !!val;
}
-u16 rtw89_mac_get_plt_cnt(struct rtw89_dev *rtwdev, u8 band)
+static u16 rtw89_mac_get_plt_cnt_ax(struct rtw89_dev *rtwdev, u8 band)
{
u32 reg;
u16 cnt;
@@ -6069,6 +6269,41 @@ int rtw89_mac_ptk_drop_by_band_and_wait(struct rtw89_dev *rtwdev,
return ret;
}
+static int rtw89_wow_config_mac_ax(struct rtw89_dev *rtwdev, bool enable_wow)
+{
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
+ int ret;
+
+ if (enable_wow) {
+ ret = rtw89_mac_resize_ple_rx_quota(rtwdev, true);
+ if (ret) {
+ rtw89_err(rtwdev, "[ERR]patch rx qta %d\n", ret);
+ return ret;
+ }
+
+ rtw89_write32_set(rtwdev, R_AX_RX_FUNCTION_STOP, B_AX_HDR_RX_STOP);
+ rtw89_write32_clr(rtwdev, mac->rx_fltr, B_AX_SNIFFER_MODE);
+ rtw89_mac_cfg_ppdu_status(rtwdev, RTW89_MAC_0, false);
+ rtw89_write32(rtwdev, R_AX_ACTION_FWD0, 0);
+ rtw89_write32(rtwdev, R_AX_ACTION_FWD1, 0);
+ rtw89_write32(rtwdev, R_AX_TF_FWD, 0);
+ rtw89_write32(rtwdev, R_AX_HW_RPT_FWD, 0);
+ } else {
+ ret = rtw89_mac_resize_ple_rx_quota(rtwdev, false);
+ if (ret) {
+ rtw89_err(rtwdev, "[ERR]patch rx qta %d\n", ret);
+ return ret;
+ }
+
+ rtw89_write32_clr(rtwdev, R_AX_RX_FUNCTION_STOP, B_AX_HDR_RX_STOP);
+ rtw89_mac_cfg_ppdu_status(rtwdev, RTW89_MAC_0, true);
+ rtw89_write32(rtwdev, R_AX_ACTION_FWD0, TRXCFG_MPDU_PROC_ACT_FRWD);
+ rtw89_write32(rtwdev, R_AX_TF_FWD, TRXCFG_MPDU_PROC_TF_FRWD);
+ }
+
+ return 0;
+}
+
static u8 rtw89_fw_get_rdy_ax(struct rtw89_dev *rtwdev, enum rtw89_fwdl_check_type type)
{
u8 val = rtw89_read8(rtwdev, R_AX_WCPU_FW_CTRL);
@@ -6096,6 +6331,7 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_ax = {
.rx_fltr = R_AX_RX_FLTR_OPT,
.port_base = &rtw89_port_base_ax,
.agg_len_ht = R_AX_AGG_LEN_HT_0,
+ .ps_status = R_AX_PPWRBIT_SETTING,
.muedca_ctrl = {
.addr = R_AX_MUEDCA_EN,
@@ -6106,6 +6342,11 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_ax = {
.mask = B_AX_BFMEE_HT_NDPA_EN | B_AX_BFMEE_VHT_NDPA_EN |
B_AX_BFMEE_HE_NDPA_EN,
},
+ .narrow_bw_ru_dis = {
+ .addr = R_AX_RXTRIG_TEST_USER_2,
+ .mask = B_AX_RXTRIG_RU26_DIS,
+ },
+ .wow_ctrl = {.addr = R_AX_WOW_CTRL, .mask = B_AX_WOW_WOWEN,},
.check_mac_en = rtw89_mac_check_mac_en_ax,
.sys_init = sys_init_ax,
@@ -6117,6 +6358,7 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_ax = {
.bf_assoc = rtw89_mac_bf_assoc_ax,
.typ_fltr_opt = rtw89_mac_typ_fltr_opt_ax,
+ .cfg_ppdu_status = rtw89_mac_cfg_ppdu_status_ax,
.dle_mix_cfg = dle_mix_cfg_ax,
.chk_dle_rdy = chk_dle_rdy_ax,
@@ -6128,6 +6370,7 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_ax = {
.wde_quota_cfg = wde_quota_cfg_ax,
.ple_quota_cfg = ple_quota_cfg_ax,
.set_cpuio = set_cpuio_ax,
+ .dle_quota_change = dle_quota_change_ax,
.disable_cpu = rtw89_mac_disable_cpu_ax,
.fwdl_enable_wcpu = rtw89_mac_enable_cpu_ax,
@@ -6137,6 +6380,9 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_ax = {
.parse_phycap_map = rtw89_parse_phycap_map_ax,
.cnv_efuse_state = rtw89_cnv_efuse_state_ax,
+ .cfg_plt = rtw89_mac_cfg_plt_ax,
+ .get_plt_cnt = rtw89_mac_get_plt_cnt_ax,
+
.get_txpwr_cr = rtw89_mac_get_txpwr_cr_ax,
.write_xtal_si = rtw89_mac_write_xtal_si_ax,
@@ -6146,5 +6392,10 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_ax = {
.dump_err_status = rtw89_mac_dump_err_status_ax,
.is_txq_empty = mac_is_txq_empty_ax,
+
+ .add_chan_list = rtw89_hw_scan_add_chan_list,
+ .scan_offload = rtw89_fw_h2c_scan_offload,
+
+ .wow_config_mac = rtw89_wow_config_mac_ax,
};
EXPORT_SYMBOL(rtw89_mac_gen_ax);
diff --git a/drivers/net/wireless/realtek/rtw89/mac.h b/drivers/net/wireless/realtek/rtw89/mac.h
index ed98b49809a4..6fb457153a11 100644
--- a/drivers/net/wireless/realtek/rtw89/mac.h
+++ b/drivers/net/wireless/realtek/rtw89/mac.h
@@ -169,6 +169,12 @@ enum rtw89_mac_ax_l0_to_l1_event {
MAC_AX_L0_TO_L1_EVENT_MAX = 15,
};
+enum rtw89_mac_wow_fw_status {
+ WOWLAN_NOT_READY = 0x00,
+ WOWLAN_SLEEP_READY = 0x01,
+ WOWLAN_RESUME_READY = 0x02,
+};
+
#define RTW89_PORT_OFFSET_TU_TO_32US(shift_tu) ((shift_tu) * 1024 / 32)
enum rtw89_mac_dbg_port_sel {
@@ -406,13 +412,21 @@ enum rtw89_mac_c2h_mcc_func {
NUM_OF_RTW89_MAC_C2H_FUNC_MCC,
};
+enum rtw89_mac_c2h_mrc_func {
+ RTW89_MAC_C2H_FUNC_MRC_TSF_RPT = 0,
+ RTW89_MAC_C2H_FUNC_MRC_STATUS_RPT = 1,
+
+ NUM_OF_RTW89_MAC_C2H_FUNC_MRC,
+};
+
enum rtw89_mac_c2h_class {
- RTW89_MAC_C2H_CLASS_INFO,
- RTW89_MAC_C2H_CLASS_OFLD,
- RTW89_MAC_C2H_CLASS_TWT,
- RTW89_MAC_C2H_CLASS_WOW,
- RTW89_MAC_C2H_CLASS_MCC,
- RTW89_MAC_C2H_CLASS_FWDBG,
+ RTW89_MAC_C2H_CLASS_INFO = 0x0,
+ RTW89_MAC_C2H_CLASS_OFLD = 0x1,
+ RTW89_MAC_C2H_CLASS_TWT = 0x2,
+ RTW89_MAC_C2H_CLASS_WOW = 0x3,
+ RTW89_MAC_C2H_CLASS_MCC = 0x4,
+ RTW89_MAC_C2H_CLASS_FWDBG = 0x5,
+ RTW89_MAC_C2H_CLASS_MRC = 0xe,
RTW89_MAC_C2H_CLASS_MAX,
};
@@ -441,6 +455,12 @@ enum rtw89_mac_mcc_status {
RTW89_MAC_MCC_TXNULL1_FAIL = 27,
};
+enum rtw89_mac_mrc_status {
+ RTW89_MAC_MRC_START_SCH_OK = 0,
+ RTW89_MAC_MRC_STOP_SCH_OK = 1,
+ RTW89_MAC_MRC_DEL_SCH_OK = 2,
+};
+
struct rtw89_mac_ax_coex {
#define RTW89_MAC_AX_COEX_RTK_MODE 0
#define RTW89_MAC_AX_COEX_CSR_MODE 1
@@ -894,9 +914,12 @@ struct rtw89_mac_gen_def {
u32 rx_fltr;
const struct rtw89_port_reg *port_base;
u32 agg_len_ht;
+ u32 ps_status;
struct rtw89_reg_def muedca_ctrl;
struct rtw89_reg_def bfee_ctrl;
+ struct rtw89_reg_def narrow_bw_ru_dis;
+ struct rtw89_reg_def wow_ctrl;
int (*check_mac_en)(struct rtw89_dev *rtwdev, u8 band,
enum rtw89_mac_hwmod_sel sel);
@@ -913,6 +936,7 @@ struct rtw89_mac_gen_def {
enum rtw89_machdr_frame_type type,
enum rtw89_mac_fwd_target fwd_target,
u8 mac_idx);
+ int (*cfg_ppdu_status)(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable);
int (*dle_mix_cfg)(struct rtw89_dev *rtwdev, const struct rtw89_dle_mem *cfg);
int (*chk_dle_rdy)(struct rtw89_dev *rtwdev, bool wde_or_ple);
@@ -930,6 +954,7 @@ struct rtw89_mac_gen_def {
const struct rtw89_ple_quota *max_cfg);
int (*set_cpuio)(struct rtw89_dev *rtwdev,
struct rtw89_cpuio_ctrl *ctrl_para, bool wd);
+ int (*dle_quota_change)(struct rtw89_dev *rtwdev, bool band1_en);
void (*disable_cpu)(struct rtw89_dev *rtwdev);
int (*fwdl_enable_wcpu)(struct rtw89_dev *rtwdev, u8 boot_reason,
@@ -940,6 +965,9 @@ struct rtw89_mac_gen_def {
int (*parse_phycap_map)(struct rtw89_dev *rtwdev);
int (*cnv_efuse_state)(struct rtw89_dev *rtwdev, bool idle);
+ int (*cfg_plt)(struct rtw89_dev *rtwdev, struct rtw89_mac_ax_plt *plt);
+ u16 (*get_plt_cnt)(struct rtw89_dev *rtwdev, u8 band);
+
bool (*get_txpwr_cr)(struct rtw89_dev *rtwdev,
enum rtw89_phy_idx phy_idx,
u32 reg_base, u32 *cr);
@@ -952,6 +980,14 @@ struct rtw89_mac_gen_def {
enum mac_ax_err_info err);
bool (*is_txq_empty)(struct rtw89_dev *rtwdev);
+
+ int (*add_chan_list)(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif, bool connected);
+ int (*scan_offload)(struct rtw89_dev *rtwdev,
+ struct rtw89_scan_option *option,
+ struct rtw89_vif *rtwvif);
+
+ int (*wow_config_mac)(struct rtw89_dev *rtwdev, bool enable_wow);
};
extern const struct rtw89_mac_gen_def rtw89_mac_gen_ax;
@@ -1086,6 +1122,8 @@ void rtw89_mac_port_tsf_sync(struct rtw89_dev *rtwdev,
u16 offset_tu);
int rtw89_mac_port_get_tsf(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif,
u64 *tsf);
+void rtw89_mac_port_cfg_rx_sync(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif, bool en);
void rtw89_mac_set_he_obss_narrow_bw_ru(struct rtw89_dev *rtwdev,
struct ieee80211_vif *vif);
void rtw89_mac_stop_ap(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif);
@@ -1127,7 +1165,8 @@ static inline int rtw89_chip_reset_bb_rf(struct rtw89_dev *rtwdev)
u32 rtw89_mac_get_err_status(struct rtw89_dev *rtwdev);
int rtw89_mac_set_err_status(struct rtw89_dev *rtwdev, u32 err);
-bool rtw89_mac_c2h_chk_atomic(struct rtw89_dev *rtwdev, u8 class, u8 func);
+bool rtw89_mac_c2h_chk_atomic(struct rtw89_dev *rtwdev, struct sk_buff *c2h,
+ u8 class, u8 func);
void rtw89_mac_c2h_handle(struct rtw89_dev *rtwdev, struct sk_buff *skb,
u32 len, u8 class, u8 func);
int rtw89_mac_setup_phycap(struct rtw89_dev *rtwdev);
@@ -1135,9 +1174,20 @@ int rtw89_mac_stop_sch_tx(struct rtw89_dev *rtwdev, u8 mac_idx,
u32 *tx_en, enum rtw89_sch_tx_sel sel);
int rtw89_mac_stop_sch_tx_v1(struct rtw89_dev *rtwdev, u8 mac_idx,
u32 *tx_en, enum rtw89_sch_tx_sel sel);
+int rtw89_mac_stop_sch_tx_v2(struct rtw89_dev *rtwdev, u8 mac_idx,
+ u32 *tx_en, enum rtw89_sch_tx_sel sel);
int rtw89_mac_resume_sch_tx(struct rtw89_dev *rtwdev, u8 mac_idx, u32 tx_en);
int rtw89_mac_resume_sch_tx_v1(struct rtw89_dev *rtwdev, u8 mac_idx, u32 tx_en);
-int rtw89_mac_cfg_ppdu_status(struct rtw89_dev *rtwdev, u8 mac_ids, bool enable);
+int rtw89_mac_resume_sch_tx_v2(struct rtw89_dev *rtwdev, u8 mac_idx, u32 tx_en);
+
+static inline
+int rtw89_mac_cfg_ppdu_status(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable)
+{
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
+
+ return mac->cfg_ppdu_status(rtwdev, mac_idx, enable);
+}
+
void rtw89_mac_update_rts_threshold(struct rtw89_dev *rtwdev, u8 mac_idx);
void rtw89_mac_flush_txq(struct rtw89_dev *rtwdev, u32 queues, bool drop);
int rtw89_mac_coex_init(struct rtw89_dev *rtwdev, const struct rtw89_mac_ax_coex *coex);
@@ -1147,13 +1197,31 @@ int rtw89_mac_cfg_gnt(struct rtw89_dev *rtwdev,
const struct rtw89_mac_ax_coex_gnt *gnt_cfg);
int rtw89_mac_cfg_gnt_v1(struct rtw89_dev *rtwdev,
const struct rtw89_mac_ax_coex_gnt *gnt_cfg);
-int rtw89_mac_cfg_plt(struct rtw89_dev *rtwdev, struct rtw89_mac_ax_plt *plt);
-u16 rtw89_mac_get_plt_cnt(struct rtw89_dev *rtwdev, u8 band);
+int rtw89_mac_cfg_gnt_v2(struct rtw89_dev *rtwdev,
+ const struct rtw89_mac_ax_coex_gnt *gnt_cfg);
+
+static inline
+int rtw89_mac_cfg_plt(struct rtw89_dev *rtwdev, struct rtw89_mac_ax_plt *plt)
+{
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
+
+ return mac->cfg_plt(rtwdev, plt);
+}
+
+static inline
+u16 rtw89_mac_get_plt_cnt(struct rtw89_dev *rtwdev, u8 band)
+{
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
+
+ return mac->get_plt_cnt(rtwdev, band);
+}
+
void rtw89_mac_cfg_sb(struct rtw89_dev *rtwdev, u32 val);
u32 rtw89_mac_get_sb(struct rtw89_dev *rtwdev);
bool rtw89_mac_get_ctrl_path(struct rtw89_dev *rtwdev);
int rtw89_mac_cfg_ctrl_path(struct rtw89_dev *rtwdev, bool wl);
int rtw89_mac_cfg_ctrl_path_v1(struct rtw89_dev *rtwdev, bool wl);
+int rtw89_mac_cfg_ctrl_path_v2(struct rtw89_dev *rtwdev, bool wl);
void rtw89_mac_power_mode_change(struct rtw89_dev *rtwdev, bool enter);
void rtw89_mac_notify_wake(struct rtw89_dev *rtwdev);
@@ -1306,6 +1374,7 @@ enum rtw89_mac_xtal_si_offset {
#define XTAL_SI_BIG_PWR_CUT BIT(1)
XTAL_SI_XTAL_DRV = 0x15,
#define XTAL_SI_DRV_LATCH BIT(4)
+ XTAL_SI_XTAL_PLL = 0x16,
XTAL_SI_XTAL_XMD_2 = 0x24,
#define XTAL_SI_LDO_LPS GENMASK(6, 4)
XTAL_SI_XTAL_XMD_4 = 0x26,
@@ -1339,6 +1408,7 @@ enum rtw89_mac_xtal_si_offset {
XTAL_SI_SRAM_CTRL = 0xA1,
#define XTAL_SI_SRAM_DIS BIT(1)
#define FULL_BIT_MASK GENMASK(7, 0)
+ XTAL_SI_APBT = 0xD1,
XTAL_SI_PLL = 0xE0,
XTAL_SI_PLL_1 = 0xE1,
};
@@ -1364,7 +1434,8 @@ int rtw89_mac_resize_ple_rx_quota(struct rtw89_dev *rtwdev, bool wow);
int rtw89_mac_ptk_drop_by_band_and_wait(struct rtw89_dev *rtwdev,
enum rtw89_mac_idx band);
void rtw89_mac_hw_mgnt_sec(struct rtw89_dev *rtwdev, bool wow);
-int rtw89_mac_dle_quota_change(struct rtw89_dev *rtwdev, enum rtw89_qta_mode mode);
+int rtw89_mac_dle_quota_change(struct rtw89_dev *rtwdev, enum rtw89_qta_mode mode,
+ bool band1_en);
int rtw89_mac_get_dle_rsvd_qt_cfg(struct rtw89_dev *rtwdev,
enum rtw89_mac_dle_rsvd_qt_type type,
struct rtw89_mac_dle_rsvd_qt_cfg *cfg);
diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c
index 93889d2fface..31d1ffb16e83 100644
--- a/drivers/net/wireless/realtek/rtw89/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw89/mac80211.c
@@ -441,7 +441,7 @@ static void rtw89_ops_bss_info_changed(struct ieee80211_hw *hw,
* when disconnected by peer
*/
if (rtwdev->scanning)
- rtw89_hw_scan_abort(rtwdev, vif);
+ rtw89_hw_scan_abort(rtwdev, rtwdev->scan_info.scanning_vif);
}
}
@@ -449,10 +449,11 @@ static void rtw89_ops_bss_info_changed(struct ieee80211_hw *hw,
ether_addr_copy(rtwvif->bssid, conf->bssid);
rtw89_cam_bssid_changed(rtwdev, rtwvif);
rtw89_fw_h2c_cam(rtwdev, rtwvif, NULL, NULL);
+ WRITE_ONCE(rtwvif->sync_bcn_tsf, 0);
}
if (changed & BSS_CHANGED_BEACON)
- rtw89_fw_h2c_update_beacon(rtwdev, rtwvif);
+ rtw89_chip_h2c_update_beacon(rtwdev, rtwvif);
if (changed & BSS_CHANGED_ERP_SLOT)
rtw89_conf_tx(rtwdev, rtwvif);
@@ -497,7 +498,7 @@ static int rtw89_ops_start_ap(struct ieee80211_hw *hw,
ether_addr_copy(rtwvif->bssid, vif->bss_conf.bssid);
rtw89_cam_bssid_changed(rtwdev, rtwvif);
rtw89_mac_port_update(rtwdev, rtwvif);
- rtw89_fw_h2c_assoc_cmac_tbl(rtwdev, vif, NULL);
+ rtw89_chip_h2c_assoc_cmac_tbl(rtwdev, vif, NULL);
rtw89_fw_h2c_role_maintain(rtwdev, rtwvif, NULL, RTW89_ROLE_TYPE_CHANGE);
rtw89_fw_h2c_join_info(rtwdev, rtwvif, NULL, true);
rtw89_fw_h2c_cam(rtwdev, rtwvif, NULL, NULL);
@@ -518,7 +519,7 @@ void rtw89_ops_stop_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
mutex_lock(&rtwdev->mutex);
rtw89_mac_stop_ap(rtwdev, rtwvif);
- rtw89_fw_h2c_assoc_cmac_tbl(rtwdev, vif, NULL);
+ rtw89_chip_h2c_assoc_cmac_tbl(rtwdev, vif, NULL);
rtw89_fw_h2c_join_info(rtwdev, rtwvif, NULL, true);
mutex_unlock(&rtwdev->mutex);
}
@@ -660,6 +661,8 @@ static int rtw89_ops_ampdu_action(struct ieee80211_hw *hw,
case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
mutex_lock(&rtwdev->mutex);
clear_bit(RTW89_TXQ_F_AMPDU, &rtwtxq->flags);
+ clear_bit(tid, rtwsta->ampdu_map);
+ rtw89_chip_h2c_ampdu_cmac_tbl(rtwdev, vif, sta);
mutex_unlock(&rtwdev->mutex);
ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
break;
@@ -668,17 +671,19 @@ static int rtw89_ops_ampdu_action(struct ieee80211_hw *hw,
set_bit(RTW89_TXQ_F_AMPDU, &rtwtxq->flags);
rtwsta->ampdu_params[tid].agg_num = params->buf_size;
rtwsta->ampdu_params[tid].amsdu = params->amsdu;
+ set_bit(tid, rtwsta->ampdu_map);
rtw89_leave_ps_mode(rtwdev);
+ rtw89_chip_h2c_ampdu_cmac_tbl(rtwdev, vif, sta);
mutex_unlock(&rtwdev->mutex);
break;
case IEEE80211_AMPDU_RX_START:
mutex_lock(&rtwdev->mutex);
- rtw89_fw_h2c_ba_cam(rtwdev, rtwsta, true, params);
+ rtw89_chip_h2c_ba_cam(rtwdev, rtwsta, true, params);
mutex_unlock(&rtwdev->mutex);
break;
case IEEE80211_AMPDU_RX_STOP:
mutex_lock(&rtwdev->mutex);
- rtw89_fw_h2c_ba_cam(rtwdev, rtwsta, false, params);
+ rtw89_chip_h2c_ba_cam(rtwdev, rtwsta, false, params);
mutex_unlock(&rtwdev->mutex);
break;
default:
@@ -990,7 +995,7 @@ static int rtw89_ops_remain_on_channel(struct ieee80211_hw *hw,
}
if (rtwdev->scanning)
- rtw89_hw_scan_abort(rtwdev, vif);
+ rtw89_hw_scan_abort(rtwdev, rtwdev->scan_info.scanning_vif);
if (type == IEEE80211_ROC_TYPE_MGMT_TX)
roc->state = RTW89_ROC_MGMT;
diff --git a/drivers/net/wireless/realtek/rtw89/mac_be.c b/drivers/net/wireless/realtek/rtw89/mac_be.c
index be30c9346293..f16467377eab 100644
--- a/drivers/net/wireless/realtek/rtw89/mac_be.c
+++ b/drivers/net/wireless/realtek/rtw89/mac_be.c
@@ -52,6 +52,9 @@ static const struct rtw89_port_reg rtw89_port_base_be = {
.mbssid = R_BE_MBSSID_CTRL,
.mbssid_drop = R_BE_MBSSID_DROP_0,
.tsf_sync = R_BE_PORT_0_TSF_SYNC,
+ .ptcl_dbg = R_BE_PTCL_DBG,
+ .ptcl_dbg_info = R_BE_PTCL_DBG_INFO,
+ .bcn_drop_all = R_BE_BCN_DROP_ALL0,
.hiq_win = {R_BE_P0MB_HGQ_WINDOW_CFG_0, R_BE_PORT_HGQ_WINDOW_CFG,
R_BE_PORT_HGQ_WINDOW_CFG + 1, R_BE_PORT_HGQ_WINDOW_CFG + 2,
R_BE_PORT_HGQ_WINDOW_CFG + 3},
@@ -988,6 +991,9 @@ static int spatial_reuse_init_be(struct rtw89_dev *rtwdev, u8 mac_idx)
reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_RX_SR_CTRL, mac_idx);
rtw89_write8_clr(rtwdev, reg, B_BE_SR_EN | B_BE_SR_CTRL_PLCP_EN);
+ reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_BSSID_SRC_CTRL, mac_idx);
+ rtw89_write8_set(rtwdev, reg, B_BE_PLCP_SRC_EN);
+
return 0;
}
@@ -995,7 +1001,8 @@ static int tmac_init_be(struct rtw89_dev *rtwdev, u8 mac_idx)
{
u32 reg;
- rtw89_write32_clr(rtwdev, R_BE_TB_PPDU_CTRL, B_BE_QOSNULL_UPD_MUEDCA_EN);
+ reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_TB_PPDU_CTRL, mac_idx);
+ rtw89_write32_clr(rtwdev, reg, B_BE_QOSNULL_UPD_MUEDCA_EN);
reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_WMTX_TCR_BE_4, mac_idx);
rtw89_write32_mask(rtwdev, reg, B_BE_EHT_HE_PPDU_4XLTF_ZLD_USTIMER_MASK, 0x12);
@@ -1449,6 +1456,71 @@ static int set_cpuio_be(struct rtw89_dev *rtwdev,
return 0;
}
+static int dle_upd_qta_aval_page_be(struct rtw89_dev *rtwdev,
+ enum rtw89_mac_dle_ctrl_type type,
+ enum rtw89_mac_dle_ple_quota_id quota_id)
+{
+ u32 val;
+
+ if (type == DLE_CTRL_TYPE_WDE) {
+ rtw89_write32_mask(rtwdev, R_BE_WDE_BUFMGN_CTL,
+ B_BE_WDE_AVAL_UPD_QTAID_MASK, quota_id);
+ rtw89_write32_set(rtwdev, R_BE_WDE_BUFMGN_CTL, B_BE_WDE_AVAL_UPD_REQ);
+
+ return read_poll_timeout(rtw89_read32, val,
+ !(val & B_BE_WDE_AVAL_UPD_REQ),
+ 1, 2000, false, rtwdev, R_BE_WDE_BUFMGN_CTL);
+ } else if (type == DLE_CTRL_TYPE_PLE) {
+ rtw89_write32_mask(rtwdev, R_BE_PLE_BUFMGN_CTL,
+ B_BE_PLE_AVAL_UPD_QTAID_MASK, quota_id);
+ rtw89_write32_set(rtwdev, R_BE_PLE_BUFMGN_CTL, B_BE_PLE_AVAL_UPD_REQ);
+
+ return read_poll_timeout(rtw89_read32, val,
+ !(val & B_BE_PLE_AVAL_UPD_REQ),
+ 1, 2000, false, rtwdev, R_BE_PLE_BUFMGN_CTL);
+ }
+
+ rtw89_warn(rtwdev, "%s wrong type %d\n", __func__, type);
+ return -EINVAL;
+}
+
+static int dle_quota_change_be(struct rtw89_dev *rtwdev, bool band1_en)
+{
+ int ret;
+
+ if (band1_en) {
+ ret = dle_upd_qta_aval_page_be(rtwdev, DLE_CTRL_TYPE_PLE,
+ PLE_QTAID_B0_TXPL);
+ if (ret) {
+ rtw89_err(rtwdev, "update PLE B0 TX avail page fail %d\n", ret);
+ return ret;
+ }
+
+ ret = dle_upd_qta_aval_page_be(rtwdev, DLE_CTRL_TYPE_PLE,
+ PLE_QTAID_CMAC0_RX);
+ if (ret) {
+ rtw89_err(rtwdev, "update PLE CMAC0 RX avail page fail %d\n", ret);
+ return ret;
+ }
+ } else {
+ ret = dle_upd_qta_aval_page_be(rtwdev, DLE_CTRL_TYPE_PLE,
+ PLE_QTAID_B1_TXPL);
+ if (ret) {
+ rtw89_err(rtwdev, "update PLE B1 TX avail page fail %d\n", ret);
+ return ret;
+ }
+
+ ret = dle_upd_qta_aval_page_be(rtwdev, DLE_CTRL_TYPE_PLE,
+ PLE_QTAID_CMAC1_RX);
+ if (ret) {
+ rtw89_err(rtwdev, "update PLE CMAC1 RX avail page fail %d\n", ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
static int preload_init_be(struct rtw89_dev *rtwdev, u8 mac_idx,
enum rtw89_qta_mode mode)
{
@@ -1480,6 +1552,13 @@ static int preload_init_be(struct rtw89_dev *rtwdev, u8 mac_idx,
static int dbcc_bb_ctrl_be(struct rtw89_dev *rtwdev, bool bb1_en)
{
+ u32 set = B_BE_FEN_BB1PLAT_RSTB | B_BE_FEN_BB1_IP_RSTN;
+
+ if (bb1_en)
+ rtw89_write32_set(rtwdev, R_BE_FEN_RST_ENABLE, set);
+ else
+ rtw89_write32_clr(rtwdev, R_BE_FEN_RST_ENABLE, set);
+
return 0;
}
@@ -1538,7 +1617,7 @@ static int band1_enable_be(struct rtw89_dev *rtwdev)
return ret;
}
- ret = rtw89_mac_dle_quota_change(rtwdev, rtwdev->mac.qta_mode);
+ ret = rtw89_mac_dle_quota_change(rtwdev, rtwdev->mac.qta_mode, true);
if (ret) {
rtw89_err(rtwdev, "[ERR]DLE quota change %d\n", ret);
return ret;
@@ -1593,7 +1672,7 @@ static int band1_disable_be(struct rtw89_dev *rtwdev)
return ret;
}
- ret = rtw89_mac_dle_quota_change(rtwdev, rtwdev->mac.qta_mode);
+ ret = rtw89_mac_dle_quota_change(rtwdev, rtwdev->mac.qta_mode, false);
if (ret) {
rtw89_err(rtwdev, "[ERR]DLE quota change %d\n", ret);
return ret;
@@ -1616,7 +1695,7 @@ static int dbcc_enable_be(struct rtw89_dev *rtwdev, bool enable)
if (test_bit(RTW89_FLAG_FW_RDY, rtwdev->flags)) {
ret = rtw89_fw_h2c_notify_dbcc(rtwdev, true);
if (ret) {
- rtw89_err(rtwdev, "%s:[ERR]notfify dbcc1 fail %d\n",
+ rtw89_err(rtwdev, "%s:[ERR] notify dbcc1 fail %d\n",
__func__, ret);
return ret;
}
@@ -1625,7 +1704,7 @@ static int dbcc_enable_be(struct rtw89_dev *rtwdev, bool enable)
if (test_bit(RTW89_FLAG_FW_RDY, rtwdev->flags)) {
ret = rtw89_fw_h2c_notify_dbcc(rtwdev, false);
if (ret) {
- rtw89_err(rtwdev, "%s:[ERR]notfify dbcc1 fail %d\n",
+ rtw89_err(rtwdev, "%s:[ERR] notify dbcc1 fail %d\n",
__func__, ret);
return ret;
}
@@ -1718,12 +1797,220 @@ static int trx_init_be(struct rtw89_dev *rtwdev)
return 0;
}
+int rtw89_mac_cfg_gnt_v2(struct rtw89_dev *rtwdev,
+ const struct rtw89_mac_ax_coex_gnt *gnt_cfg)
+{
+ u32 val = 0;
+
+ if (gnt_cfg->band[0].gnt_bt)
+ val |= B_BE_GNT_BT_BB0_VAL | B_BE_GNT_BT_RX_BB0_VAL |
+ B_BE_GNT_BT_TX_BB0_VAL;
+
+ if (gnt_cfg->band[0].gnt_bt_sw_en)
+ val |= B_BE_GNT_BT_BB0_SWCTRL | B_BE_GNT_BT_RX_BB0_SWCTRL |
+ B_BE_GNT_BT_TX_BB0_SWCTRL;
+
+ if (gnt_cfg->band[0].gnt_wl)
+ val |= B_BE_GNT_WL_BB0_VAL | B_BE_GNT_WL_RX_VAL |
+ B_BE_GNT_WL_TX_VAL | B_BE_GNT_WL_BB_PWR_VAL;
+
+ if (gnt_cfg->band[0].gnt_wl_sw_en)
+ val |= B_BE_GNT_WL_BB0_SWCTRL | B_BE_GNT_WL_RX_SWCTRL |
+ B_BE_GNT_WL_TX_SWCTRL | B_BE_GNT_WL_BB_PWR_SWCTRL;
+
+ if (gnt_cfg->band[1].gnt_bt)
+ val |= B_BE_GNT_BT_BB1_VAL | B_BE_GNT_BT_RX_BB1_VAL |
+ B_BE_GNT_BT_TX_BB1_VAL;
+
+ if (gnt_cfg->band[1].gnt_bt_sw_en)
+ val |= B_BE_GNT_BT_BB1_SWCTRL | B_BE_GNT_BT_RX_BB1_SWCTRL |
+ B_BE_GNT_BT_TX_BB1_SWCTRL;
+
+ if (gnt_cfg->band[1].gnt_wl)
+ val |= B_BE_GNT_WL_BB1_VAL | B_BE_GNT_WL_RX_VAL |
+ B_BE_GNT_WL_TX_VAL | B_BE_GNT_WL_BB_PWR_VAL;
+
+ if (gnt_cfg->band[1].gnt_wl_sw_en)
+ val |= B_BE_GNT_WL_BB1_SWCTRL | B_BE_GNT_WL_RX_SWCTRL |
+ B_BE_GNT_WL_TX_SWCTRL | B_BE_GNT_WL_BB_PWR_SWCTRL;
+
+ if (gnt_cfg->bt[0].wlan_act_en)
+ val |= B_BE_WL_ACT_SWCTRL;
+ if (gnt_cfg->bt[0].wlan_act)
+ val |= B_BE_WL_ACT_VAL;
+ if (gnt_cfg->bt[1].wlan_act_en)
+ val |= B_BE_WL_ACT2_SWCTRL;
+ if (gnt_cfg->bt[1].wlan_act)
+ val |= B_BE_WL_ACT2_VAL;
+
+ rtw89_write32(rtwdev, R_BE_GNT_SW_CTRL, val);
+
+ return 0;
+}
+EXPORT_SYMBOL(rtw89_mac_cfg_gnt_v2);
+
+int rtw89_mac_cfg_ctrl_path_v2(struct rtw89_dev *rtwdev, bool wl)
+{
+ struct rtw89_btc *btc = &rtwdev->btc;
+ struct rtw89_btc_dm *dm = &btc->dm;
+ struct rtw89_mac_ax_gnt *g = dm->gnt.band;
+ struct rtw89_mac_ax_wl_act *gbt = dm->gnt.bt;
+ int i;
+
+ if (wl)
+ return 0;
+
+ for (i = 0; i < RTW89_PHY_MAX; i++) {
+ g[i].gnt_bt_sw_en = 1;
+ g[i].gnt_bt = 1;
+ g[i].gnt_wl_sw_en = 1;
+ g[i].gnt_wl = 0;
+ gbt[i].wlan_act = 1;
+ gbt[i].wlan_act_en = 0;
+ }
+
+ return rtw89_mac_cfg_gnt_v2(rtwdev, &dm->gnt);
+}
+EXPORT_SYMBOL(rtw89_mac_cfg_ctrl_path_v2);
+
+static
+int rtw89_mac_cfg_plt_be(struct rtw89_dev *rtwdev, struct rtw89_mac_ax_plt *plt)
+{
+ u32 reg;
+ u16 val;
+ int ret;
+
+ ret = rtw89_mac_check_mac_en(rtwdev, plt->band, RTW89_CMAC_SEL);
+ if (ret)
+ return ret;
+
+ reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_BT_PLT, plt->band);
+ val = (plt->tx & RTW89_MAC_AX_PLT_LTE_RX ? B_BE_TX_PLT_GNT_LTE_RX : 0) |
+ (plt->tx & RTW89_MAC_AX_PLT_GNT_BT_TX ? B_BE_TX_PLT_GNT_BT_TX : 0) |
+ (plt->tx & RTW89_MAC_AX_PLT_GNT_BT_RX ? B_BE_TX_PLT_GNT_BT_RX : 0) |
+ (plt->tx & RTW89_MAC_AX_PLT_GNT_WL ? B_BE_TX_PLT_GNT_WL : 0) |
+ (plt->rx & RTW89_MAC_AX_PLT_LTE_RX ? B_BE_RX_PLT_GNT_LTE_RX : 0) |
+ (plt->rx & RTW89_MAC_AX_PLT_GNT_BT_TX ? B_BE_RX_PLT_GNT_BT_TX : 0) |
+ (plt->rx & RTW89_MAC_AX_PLT_GNT_BT_RX ? B_BE_RX_PLT_GNT_BT_RX : 0) |
+ (plt->rx & RTW89_MAC_AX_PLT_GNT_WL ? B_BE_RX_PLT_GNT_WL : 0) |
+ B_BE_PLT_EN;
+ rtw89_write16(rtwdev, reg, val);
+
+ return 0;
+}
+
+static u16 rtw89_mac_get_plt_cnt_be(struct rtw89_dev *rtwdev, u8 band)
+{
+ u32 reg;
+ u16 cnt;
+
+ reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_BT_PLT, band);
+ cnt = rtw89_read32_mask(rtwdev, reg, B_BE_BT_PLT_PKT_CNT_MASK);
+ rtw89_write16_set(rtwdev, reg, B_BE_BT_PLT_RST);
+
+ return cnt;
+}
+
+static int rtw89_set_hw_sch_tx_en_v2(struct rtw89_dev *rtwdev, u8 mac_idx,
+ u32 tx_en, u32 tx_en_mask)
+{
+ u32 reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_CTN_DRV_TXEN, mac_idx);
+ u32 val;
+ int ret;
+
+ ret = rtw89_mac_check_mac_en(rtwdev, mac_idx, RTW89_CMAC_SEL);
+ if (ret)
+ return ret;
+
+ val = rtw89_read32(rtwdev, reg);
+ val = (val & ~tx_en_mask) | (tx_en & tx_en_mask);
+ rtw89_write32(rtwdev, reg, val);
+
+ return 0;
+}
+
+int rtw89_mac_stop_sch_tx_v2(struct rtw89_dev *rtwdev, u8 mac_idx,
+ u32 *tx_en, enum rtw89_sch_tx_sel sel)
+{
+ int ret;
+
+ *tx_en = rtw89_read32(rtwdev,
+ rtw89_mac_reg_by_idx(rtwdev, R_BE_CTN_DRV_TXEN, mac_idx));
+
+ switch (sel) {
+ case RTW89_SCH_TX_SEL_ALL:
+ ret = rtw89_set_hw_sch_tx_en_v2(rtwdev, mac_idx, 0,
+ B_BE_CTN_TXEN_ALL_MASK);
+ if (ret)
+ return ret;
+ break;
+ case RTW89_SCH_TX_SEL_HIQ:
+ ret = rtw89_set_hw_sch_tx_en_v2(rtwdev, mac_idx,
+ 0, B_BE_CTN_TXEN_HGQ);
+ if (ret)
+ return ret;
+ break;
+ case RTW89_SCH_TX_SEL_MG0:
+ ret = rtw89_set_hw_sch_tx_en_v2(rtwdev, mac_idx,
+ 0, B_BE_CTN_TXEN_MGQ);
+ if (ret)
+ return ret;
+ break;
+ case RTW89_SCH_TX_SEL_MACID:
+ ret = rtw89_set_hw_sch_tx_en_v2(rtwdev, mac_idx, 0,
+ B_BE_CTN_TXEN_ALL_MASK);
+ if (ret)
+ return ret;
+ break;
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(rtw89_mac_stop_sch_tx_v2);
+
+int rtw89_mac_resume_sch_tx_v2(struct rtw89_dev *rtwdev, u8 mac_idx, u32 tx_en)
+{
+ int ret;
+
+ ret = rtw89_set_hw_sch_tx_en_v2(rtwdev, mac_idx, tx_en,
+ B_BE_CTN_TXEN_ALL_MASK);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(rtw89_mac_resume_sch_tx_v2);
+
+static
+int rtw89_mac_cfg_ppdu_status_be(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable)
+{
+ u32 reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_PPDU_STAT, mac_idx);
+ int ret;
+
+ ret = rtw89_mac_check_mac_en(rtwdev, mac_idx, RTW89_CMAC_SEL);
+ if (ret)
+ return ret;
+
+ if (!enable) {
+ rtw89_write32_clr(rtwdev, reg, B_BE_PPDU_STAT_RPT_EN);
+ return 0;
+ }
+
+ rtw89_write32_mask(rtwdev, R_BE_HW_PPDU_STATUS, B_BE_FWD_PPDU_STAT_MASK, 3);
+ rtw89_write32(rtwdev, reg, B_BE_PPDU_STAT_RPT_EN | B_BE_PPDU_MAC_INFO |
+ B_BE_APP_RX_CNT_RPT | B_BE_APP_PLCP_HDR_RPT |
+ B_BE_PPDU_STAT_RPT_CRC32 | B_BE_PPDU_STAT_RPT_DMA);
+
+ return 0;
+}
+
static bool rtw89_mac_get_txpwr_cr_be(struct rtw89_dev *rtwdev,
enum rtw89_phy_idx phy_idx,
u32 reg_base, u32 *cr)
{
- const struct rtw89_dle_mem *dle_mem = rtwdev->chip->dle_mem;
- enum rtw89_qta_mode mode = dle_mem->mode;
+ enum rtw89_qta_mode mode = rtwdev->mac.qta_mode;
int ret;
ret = rtw89_mac_check_mac_en(rtwdev, (enum rtw89_mac_idx)phy_idx,
@@ -2020,6 +2307,52 @@ static void rtw89_mac_dump_qta_lost_be(struct rtw89_dev *rtwdev)
dump_err_status_dispatcher_be(rtwdev);
}
+static int rtw89_mac_cpu_io_rx(struct rtw89_dev *rtwdev, bool wow_enable)
+{
+ struct rtw89_mac_h2c_info h2c_info = {};
+ struct rtw89_mac_c2h_info c2h_info = {};
+ u32 ret;
+
+ h2c_info.id = RTW89_FWCMD_H2CREG_FUNC_WOW_CPUIO_RX_CTRL;
+ h2c_info.content_len = sizeof(h2c_info.u.hdr);
+ h2c_info.u.hdr.w0 = u32_encode_bits(wow_enable, RTW89_H2CREG_WOW_CPUIO_RX_CTRL_EN);
+
+ ret = rtw89_fw_msg_reg(rtwdev, &h2c_info, &c2h_info);
+ if (ret)
+ return ret;
+
+ if (c2h_info.id != RTW89_FWCMD_C2HREG_FUNC_WOW_CPUIO_RX_ACK)
+ ret = -EINVAL;
+
+ return ret;
+}
+
+static int rtw89_wow_config_mac_be(struct rtw89_dev *rtwdev, bool enable_wow)
+{
+ if (enable_wow) {
+ rtw89_write32_set(rtwdev, R_BE_RX_STOP, B_BE_HOST_RX_STOP);
+ rtw89_write32_clr(rtwdev, R_BE_RX_FLTR_OPT, B_BE_SNIFFER_MODE);
+ rtw89_mac_cpu_io_rx(rtwdev, enable_wow);
+ rtw89_mac_cfg_ppdu_status(rtwdev, RTW89_MAC_0, false);
+ rtw89_write32(rtwdev, R_BE_FWD_ERR, 0);
+ rtw89_write32(rtwdev, R_BE_FWD_ACTN0, 0);
+ rtw89_write32(rtwdev, R_BE_FWD_ACTN1, 0);
+ rtw89_write32(rtwdev, R_BE_FWD_ACTN2, 0);
+ rtw89_write32(rtwdev, R_BE_FWD_TF0, 0);
+ rtw89_write32(rtwdev, R_BE_FWD_TF1, 0);
+ rtw89_write32(rtwdev, R_BE_FWD_ERR, 0);
+ rtw89_write32(rtwdev, R_BE_HW_PPDU_STATUS, 0);
+ rtw89_write8(rtwdev, R_BE_DBG_WOW_READY, WOWLAN_NOT_READY);
+ } else {
+ rtw89_mac_cpu_io_rx(rtwdev, enable_wow);
+ rtw89_write32_clr(rtwdev, R_BE_RX_STOP, B_BE_HOST_RX_STOP);
+ rtw89_write32_set(rtwdev, R_BE_RX_FLTR_OPT, R_BE_RX_FLTR_OPT);
+ rtw89_mac_cfg_ppdu_status(rtwdev, RTW89_MAC_0, true);
+ }
+
+ return 0;
+}
+
static void rtw89_mac_dump_cmac_err_status_be(struct rtw89_dev *rtwdev,
u8 band)
{
@@ -2218,6 +2551,7 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_be = {
.rx_fltr = R_BE_RX_FLTR_OPT,
.port_base = &rtw89_port_base_be,
.agg_len_ht = R_BE_AGG_LEN_HT_0,
+ .ps_status = R_BE_WMTX_POWER_BE_BIT_CTL,
.muedca_ctrl = {
.addr = R_BE_MUEDCA_EN,
@@ -2228,6 +2562,11 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_be = {
.mask = B_BE_BFMEE_HT_NDPA_EN | B_BE_BFMEE_VHT_NDPA_EN |
B_BE_BFMEE_HE_NDPA_EN | B_BE_BFMEE_EHT_NDPA_EN,
},
+ .narrow_bw_ru_dis = {
+ .addr = R_BE_RXTRIG_TEST_USER_2,
+ .mask = B_BE_RXTRIG_RU26_DIS,
+ },
+ .wow_ctrl = {.addr = R_BE_WOW_CTRL, .mask = B_BE_WOW_WOWEN,},
.check_mac_en = rtw89_mac_check_mac_en_be,
.sys_init = sys_init_be,
@@ -2239,6 +2578,7 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_be = {
.bf_assoc = rtw89_mac_bf_assoc_be,
.typ_fltr_opt = rtw89_mac_typ_fltr_opt_be,
+ .cfg_ppdu_status = rtw89_mac_cfg_ppdu_status_be,
.dle_mix_cfg = dle_mix_cfg_be,
.chk_dle_rdy = chk_dle_rdy_be,
@@ -2250,6 +2590,7 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_be = {
.wde_quota_cfg = wde_quota_cfg_be,
.ple_quota_cfg = ple_quota_cfg_be,
.set_cpuio = set_cpuio_be,
+ .dle_quota_change = dle_quota_change_be,
.disable_cpu = rtw89_mac_disable_cpu_be,
.fwdl_enable_wcpu = rtw89_mac_fwdl_enable_wcpu_be,
@@ -2259,6 +2600,9 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_be = {
.parse_phycap_map = rtw89_parse_phycap_map_be,
.cnv_efuse_state = rtw89_cnv_efuse_state_be,
+ .cfg_plt = rtw89_mac_cfg_plt_be,
+ .get_plt_cnt = rtw89_mac_get_plt_cnt_be,
+
.get_txpwr_cr = rtw89_mac_get_txpwr_cr_be,
.write_xtal_si = rtw89_mac_write_xtal_si_be,
@@ -2268,5 +2612,10 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_be = {
.dump_err_status = rtw89_mac_dump_err_status_be,
.is_txq_empty = mac_is_txq_empty_be,
+
+ .add_chan_list = rtw89_hw_scan_add_chan_list_be,
+ .scan_offload = rtw89_fw_h2c_scan_offload_be,
+
+ .wow_config_mac = rtw89_wow_config_mac_be,
};
EXPORT_SYMBOL(rtw89_mac_gen_be);
diff --git a/drivers/net/wireless/realtek/rtw89/pci.c b/drivers/net/wireless/realtek/rtw89/pci.c
index 769f1ce62ebc..19001130ad94 100644
--- a/drivers/net/wireless/realtek/rtw89/pci.c
+++ b/drivers/net/wireless/realtek/rtw89/pci.c
@@ -155,8 +155,8 @@ static void rtw89_pci_sync_skb_for_device(struct rtw89_dev *rtwdev,
DMA_FROM_DEVICE);
}
-static int rtw89_pci_rxbd_info_update(struct rtw89_dev *rtwdev,
- struct sk_buff *skb)
+static void rtw89_pci_rxbd_info_update(struct rtw89_dev *rtwdev,
+ struct sk_buff *skb)
{
struct rtw89_pci_rxbd_info *rxbd_info;
struct rtw89_pci_rx_info *rx_info = RTW89_PCI_RX_SKB_CB(skb);
@@ -166,11 +166,59 @@ static int rtw89_pci_rxbd_info_update(struct rtw89_dev *rtwdev,
rx_info->ls = le32_get_bits(rxbd_info->dword, RTW89_PCI_RXBD_LS);
rx_info->len = le32_get_bits(rxbd_info->dword, RTW89_PCI_RXBD_WRITE_SIZE);
rx_info->tag = le32_get_bits(rxbd_info->dword, RTW89_PCI_RXBD_TAG);
+}
+
+static int rtw89_pci_validate_rx_tag(struct rtw89_dev *rtwdev,
+ struct rtw89_pci_rx_ring *rx_ring,
+ struct sk_buff *skb)
+{
+ struct rtw89_pci_rx_info *rx_info = RTW89_PCI_RX_SKB_CB(skb);
+ const struct rtw89_pci_info *info = rtwdev->pci_info;
+ u32 target_rx_tag;
+
+ if (!info->check_rx_tag)
+ return 0;
+
+ /* valid range is 1 ~ 0x1FFF */
+ if (rx_ring->target_rx_tag == 0)
+ target_rx_tag = 1;
+ else
+ target_rx_tag = rx_ring->target_rx_tag;
+
+ if (rx_info->tag != target_rx_tag) {
+ rtw89_debug(rtwdev, RTW89_DBG_UNEXP, "mismatch RX tag 0x%x 0x%x\n",
+ rx_info->tag, target_rx_tag);
+ return -EAGAIN;
+ }
return 0;
}
-static void rtw89_pci_ctrl_txdma_ch_pcie(struct rtw89_dev *rtwdev, bool enable)
+static
+int rtw89_pci_sync_skb_for_device_and_validate_rx_info(struct rtw89_dev *rtwdev,
+ struct rtw89_pci_rx_ring *rx_ring,
+ struct sk_buff *skb)
+{
+ struct rtw89_pci_rx_info *rx_info = RTW89_PCI_RX_SKB_CB(skb);
+ int rx_tag_retry = 100;
+ int ret;
+
+ do {
+ rtw89_pci_sync_skb_for_cpu(rtwdev, skb);
+ rtw89_pci_rxbd_info_update(rtwdev, skb);
+
+ ret = rtw89_pci_validate_rx_tag(rtwdev, rx_ring, skb);
+ if (ret != -EAGAIN)
+ break;
+ } while (rx_tag_retry--);
+
+ /* update target rx_tag for next RX */
+ rx_ring->target_rx_tag = rx_info->tag + 1;
+
+ return ret;
+}
+
+static void rtw89_pci_ctrl_txdma_ch_ax(struct rtw89_dev *rtwdev, bool enable)
{
const struct rtw89_pci_info *info = rtwdev->pci_info;
const struct rtw89_reg_def *dma_stop1 = &info->dma_stop1;
@@ -187,7 +235,7 @@ static void rtw89_pci_ctrl_txdma_ch_pcie(struct rtw89_dev *rtwdev, bool enable)
}
}
-static void rtw89_pci_ctrl_txdma_fw_ch_pcie(struct rtw89_dev *rtwdev, bool enable)
+static void rtw89_pci_ctrl_txdma_fw_ch_ax(struct rtw89_dev *rtwdev, bool enable)
{
const struct rtw89_pci_info *info = rtwdev->pci_info;
const struct rtw89_reg_def *dma_stop1 = &info->dma_stop1;
@@ -259,9 +307,8 @@ static u32 rtw89_pci_rxbd_deliver_skbs(struct rtw89_dev *rtwdev,
skb_idx = rtw89_pci_get_rx_skb_idx(rtwdev, bd_ring);
skb = rx_ring->buf[skb_idx];
- rtw89_pci_sync_skb_for_cpu(rtwdev, skb);
- ret = rtw89_pci_rxbd_info_update(rtwdev, skb);
+ ret = rtw89_pci_sync_skb_for_device_and_validate_rx_info(rtwdev, rx_ring, skb);
if (ret) {
rtw89_err(rtwdev, "failed to update %d RXBD info: %d\n",
bd_ring->wp, ret);
@@ -549,9 +596,8 @@ static u32 rtw89_pci_release_tx_skbs(struct rtw89_dev *rtwdev,
skb_idx = rtw89_pci_get_rx_skb_idx(rtwdev, bd_ring);
skb = rx_ring->buf[skb_idx];
- rtw89_pci_sync_skb_for_cpu(rtwdev, skb);
- ret = rtw89_pci_rxbd_info_update(rtwdev, skb);
+ ret = rtw89_pci_sync_skb_for_device_and_validate_rx_info(rtwdev, rx_ring, skb);
if (ret) {
rtw89_err(rtwdev, "failed to update %d RXBD info: %d\n",
bd_ring->wp, ret);
@@ -705,7 +751,7 @@ void rtw89_pci_recognize_intrs_v2(struct rtw89_dev *rtwdev,
rtw89_read32(rtwdev, R_BE_HISR0) & rtwpci->halt_c2h_intrs : 0;
isrs->isrs[0] = isrs->ind_isrs & B_BE_HCI_AXIDMA_INT ?
rtw89_read32(rtwdev, R_BE_HAXI_HISR00) & rtwpci->intrs[0] : 0;
- isrs->isrs[1] = rtw89_read32(rtwdev, R_BE_PCIE_DMA_ISR);
+ isrs->isrs[1] = rtw89_read32(rtwdev, R_BE_PCIE_DMA_ISR) & rtwpci->intrs[1];
if (isrs->halt_c2h_isrs)
rtw89_write32(rtwdev, R_BE_HISR0, isrs->halt_c2h_isrs);
@@ -1550,6 +1596,7 @@ static void rtw89_pci_reset_trx_rings(struct rtw89_dev *rtwdev)
bd_ring->rp = 0;
rx_ring->diliver_skb = NULL;
rx_ring->diliver_desc.ready = false;
+ rx_ring->target_rx_tag = 0;
rtw89_write16(rtwdev, addr_num, bd_ring->len);
rtw89_write32(rtwdev, addr_desa_l, bd_ring->dma);
@@ -1907,22 +1954,87 @@ static int rtw89_write16_mdio_clr(struct rtw89_dev *rtwdev, u8 addr, u16 mask, u
return 0;
}
+static int rtw89_dbi_write8(struct rtw89_dev *rtwdev, u16 addr, u8 data)
+{
+ u16 addr_2lsb = addr & B_AX_DBI_2LSB;
+ u16 write_addr;
+ u8 flag;
+ int ret;
+
+ write_addr = addr & B_AX_DBI_ADDR_MSK;
+ write_addr |= u16_encode_bits(BIT(addr_2lsb), B_AX_DBI_WREN_MSK);
+ rtw89_write8(rtwdev, R_AX_DBI_WDATA + addr_2lsb, data);
+ rtw89_write16(rtwdev, R_AX_DBI_FLAG, write_addr);
+ rtw89_write8(rtwdev, R_AX_DBI_FLAG + 2, B_AX_DBI_WFLAG >> 16);
+
+ ret = read_poll_timeout_atomic(rtw89_read8, flag, !flag, 10,
+ 10 * RTW89_PCI_WR_RETRY_CNT, false,
+ rtwdev, R_AX_DBI_FLAG + 2);
+ if (ret)
+ rtw89_err(rtwdev, "failed to write DBI register, addr=0x%X\n",
+ addr);
+
+ return ret;
+}
+
+static int rtw89_dbi_read8(struct rtw89_dev *rtwdev, u16 addr, u8 *value)
+{
+ u16 read_addr = addr & B_AX_DBI_ADDR_MSK;
+ u8 flag;
+ int ret;
+
+ rtw89_write16(rtwdev, R_AX_DBI_FLAG, read_addr);
+ rtw89_write8(rtwdev, R_AX_DBI_FLAG + 2, B_AX_DBI_RFLAG >> 16);
+
+ ret = read_poll_timeout_atomic(rtw89_read8, flag, !flag, 10,
+ 10 * RTW89_PCI_WR_RETRY_CNT, false,
+ rtwdev, R_AX_DBI_FLAG + 2);
+ if (ret) {
+ rtw89_err(rtwdev, "failed to read DBI register, addr=0x%X\n",
+ addr);
+ return ret;
+ }
+
+ read_addr = R_AX_DBI_RDATA + (addr & 3);
+ *value = rtw89_read8(rtwdev, read_addr);
+
+ return 0;
+}
+
static int rtw89_pci_write_config_byte(struct rtw89_dev *rtwdev, u16 addr,
u8 data)
{
struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv;
+ enum rtw89_core_chip_id chip_id = rtwdev->chip->chip_id;
struct pci_dev *pdev = rtwpci->pdev;
+ int ret;
- return pci_write_config_byte(pdev, addr, data);
+ ret = pci_write_config_byte(pdev, addr, data);
+ if (!ret)
+ return 0;
+
+ if (chip_id == RTL8852A || chip_id == RTL8852B || chip_id == RTL8851B)
+ ret = rtw89_dbi_write8(rtwdev, addr, data);
+
+ return ret;
}
static int rtw89_pci_read_config_byte(struct rtw89_dev *rtwdev, u16 addr,
u8 *value)
{
struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv;
+ enum rtw89_core_chip_id chip_id = rtwdev->chip->chip_id;
struct pci_dev *pdev = rtwpci->pdev;
+ int ret;
- return pci_read_config_byte(pdev, addr, value);
+ ret = pci_read_config_byte(pdev, addr, value);
+ if (!ret)
+ return 0;
+
+ if (chip_id == RTL8852A || chip_id == RTL8852B || chip_id == RTL8851B)
+ ret = rtw89_dbi_read8(rtwdev, addr, value);
+
+ return ret;
}
static int rtw89_pci_config_byte_set(struct rtw89_dev *rtwdev, u16 addr,
@@ -2412,7 +2524,7 @@ static void rtw89_pci_clr_idx_all_ax(struct rtw89_dev *rtwdev)
B_AX_CLR_RXQ_IDX | B_AX_CLR_RPQ_IDX);
}
-static int rtw89_poll_txdma_ch_idle_pcie(struct rtw89_dev *rtwdev)
+static int rtw89_pci_poll_txdma_ch_idle_ax(struct rtw89_dev *rtwdev)
{
const struct rtw89_pci_info *info = rtwdev->pci_info;
u32 ret, check, dma_busy;
@@ -2439,7 +2551,7 @@ static int rtw89_poll_txdma_ch_idle_pcie(struct rtw89_dev *rtwdev)
return 0;
}
-static int rtw89_poll_rxdma_ch_idle_pcie(struct rtw89_dev *rtwdev)
+static int rtw89_pci_poll_rxdma_ch_idle_ax(struct rtw89_dev *rtwdev)
{
const struct rtw89_pci_info *info = rtwdev->pci_info;
u32 ret, check, dma_busy;
@@ -2459,13 +2571,13 @@ static int rtw89_pci_poll_dma_all_idle(struct rtw89_dev *rtwdev)
{
u32 ret;
- ret = rtw89_poll_txdma_ch_idle_pcie(rtwdev);
+ ret = rtw89_pci_poll_txdma_ch_idle_ax(rtwdev);
if (ret) {
rtw89_err(rtwdev, "txdma ch busy\n");
return ret;
}
- ret = rtw89_poll_rxdma_ch_idle_pcie(rtwdev);
+ ret = rtw89_pci_poll_rxdma_ch_idle_ax(rtwdev);
if (ret) {
rtw89_err(rtwdev, "rxdma ch busy\n");
return ret;
@@ -2644,8 +2756,8 @@ static int rtw89_pci_ops_mac_pre_init_ax(struct rtw89_dev *rtwdev)
}
/* disable all channels except to FW CMD channel to download firmware */
- rtw89_pci_ctrl_txdma_ch_pcie(rtwdev, false);
- rtw89_pci_ctrl_txdma_fw_ch_pcie(rtwdev, true);
+ rtw89_pci_ctrl_txdma_ch_ax(rtwdev, false);
+ rtw89_pci_ctrl_txdma_fw_ch_ax(rtwdev, true);
/* start DMA activities */
rtw89_pci_ctrl_dma_all(rtwdev, true);
@@ -2758,7 +2870,7 @@ static int rtw89_pci_ops_mac_post_init_ax(struct rtw89_dev *rtwdev)
}
/* enable DMA for all queues */
- rtw89_pci_ctrl_txdma_ch_pcie(rtwdev, true);
+ rtw89_pci_ctrl_txdma_ch_ax(rtwdev, true);
/* Release PCI IO */
rtw89_write32_clr(rtwdev, info->dma_stop1.addr,
@@ -3148,6 +3260,7 @@ static int rtw89_pci_alloc_rx_ring(struct rtw89_dev *rtwdev,
rx_ring->buf_sz = buf_sz;
rx_ring->diliver_skb = NULL;
rx_ring->diliver_desc.ready = false;
+ rx_ring->target_rx_tag = 0;
for (i = 0; i < len; i++) {
skb = dev_alloc_skb(buf_sz);
@@ -3387,8 +3500,7 @@ static void rtw89_pci_recovery_intr_mask_v2(struct rtw89_dev *rtwdev)
rtwpci->ind_intrs = B_BE_HS0_IND_INT_EN0;
rtwpci->halt_c2h_intrs = B_BE_HALT_C2H_INT_EN | B_BE_WDT_TIMEOUT_INT_EN;
rtwpci->intrs[0] = 0;
- rtwpci->intrs[1] = B_BE_PCIE_RX_RX0P2_IMR0_V1 |
- B_BE_PCIE_RX_RPQ0_IMR0_V1;
+ rtwpci->intrs[1] = 0;
}
static void rtw89_pci_default_intr_mask_v2(struct rtw89_dev *rtwdev)
@@ -3540,12 +3652,20 @@ static int rtw89_pci_filter_out(struct rtw89_dev *rtwdev)
static void rtw89_pci_clkreq_set(struct rtw89_dev *rtwdev, bool enable)
{
- enum rtw89_core_chip_id chip_id = rtwdev->chip->chip_id;
- int ret;
+ const struct rtw89_pci_info *info = rtwdev->pci_info;
+ const struct rtw89_pci_gen_def *gen_def = info->gen_def;
if (rtw89_pci_disable_clkreq)
return;
+ gen_def->clkreq_set(rtwdev, enable);
+}
+
+static void rtw89_pci_clkreq_set_ax(struct rtw89_dev *rtwdev, bool enable)
+{
+ enum rtw89_core_chip_id chip_id = rtwdev->chip->chip_id;
+ int ret;
+
ret = rtw89_pci_write_config_byte(rtwdev, RTW89_PCIE_CLK_CTRL,
PCIE_CLKDLY_HW_30US);
if (ret)
@@ -3577,24 +3697,31 @@ static void rtw89_pci_clkreq_set(struct rtw89_dev *rtwdev, bool enable)
static void rtw89_pci_aspm_set(struct rtw89_dev *rtwdev, bool enable)
{
- enum rtw89_core_chip_id chip_id = rtwdev->chip->chip_id;
- u8 value = 0;
- int ret;
+ const struct rtw89_pci_info *info = rtwdev->pci_info;
+ const struct rtw89_pci_gen_def *gen_def = info->gen_def;
if (rtw89_pci_disable_aspm_l1)
return;
+ gen_def->aspm_set(rtwdev, enable);
+}
+
+static void rtw89_pci_aspm_set_ax(struct rtw89_dev *rtwdev, bool enable)
+{
+ enum rtw89_core_chip_id chip_id = rtwdev->chip->chip_id;
+ u8 value = 0;
+ int ret;
+
ret = rtw89_pci_read_config_byte(rtwdev, RTW89_PCIE_ASPM_CTRL, &value);
if (ret)
- rtw89_err(rtwdev, "failed to read ASPM Delay\n");
+ rtw89_warn(rtwdev, "failed to read ASPM Delay\n");
- value &= ~(RTW89_L1DLY_MASK | RTW89_L0DLY_MASK);
- value |= FIELD_PREP(RTW89_L1DLY_MASK, PCIE_L1DLY_16US) |
- FIELD_PREP(RTW89_L0DLY_MASK, PCIE_L0SDLY_4US);
+ u8p_replace_bits(&value, PCIE_L1DLY_16US, RTW89_L1DLY_MASK);
+ u8p_replace_bits(&value, PCIE_L0SDLY_4US, RTW89_L0DLY_MASK);
ret = rtw89_pci_write_config_byte(rtwdev, RTW89_PCIE_ASPM_CTRL, value);
if (ret)
- rtw89_err(rtwdev, "failed to read ASPM Delay\n");
+ rtw89_warn(rtwdev, "failed to read ASPM Delay\n");
if (chip_id == RTL8852A || chip_id == RTL8852B || chip_id == RTL8851B) {
if (enable)
@@ -3681,6 +3808,17 @@ static void rtw89_pci_link_cfg(struct rtw89_dev *rtwdev)
static void rtw89_pci_l1ss_set(struct rtw89_dev *rtwdev, bool enable)
{
+ const struct rtw89_pci_info *info = rtwdev->pci_info;
+ const struct rtw89_pci_gen_def *gen_def = info->gen_def;
+
+ if (rtw89_pci_disable_l1ss)
+ return;
+
+ gen_def->l1ss_set(rtwdev, enable);
+}
+
+static void rtw89_pci_l1ss_set_ax(struct rtw89_dev *rtwdev, bool enable)
+{
enum rtw89_core_chip_id chip_id = rtwdev->chip->chip_id;
int ret;
@@ -3954,6 +4092,14 @@ const struct rtw89_pci_gen_def rtw89_pci_gen_ax = {
.lv1rst_stop_dma = rtw89_pci_lv1rst_stop_dma_ax,
.lv1rst_start_dma = rtw89_pci_lv1rst_start_dma_ax,
+
+ .ctrl_txdma_ch = rtw89_pci_ctrl_txdma_ch_ax,
+ .ctrl_txdma_fw_ch = rtw89_pci_ctrl_txdma_fw_ch_ax,
+ .poll_txdma_ch_idle = rtw89_pci_poll_txdma_ch_idle_ax,
+
+ .aspm_set = rtw89_pci_aspm_set_ax,
+ .clkreq_set = rtw89_pci_clkreq_set_ax,
+ .l1ss_set = rtw89_pci_l1ss_set_ax,
};
EXPORT_SYMBOL(rtw89_pci_gen_ax);
@@ -3988,10 +4134,11 @@ static const struct rtw89_hci_ops rtw89_pci_ops = {
.recovery_start = rtw89_pci_ops_recovery_start,
.recovery_complete = rtw89_pci_ops_recovery_complete,
- .ctrl_txdma_ch = rtw89_pci_ctrl_txdma_ch_pcie,
- .ctrl_txdma_fw_ch = rtw89_pci_ctrl_txdma_fw_ch_pcie,
+ .ctrl_txdma_ch = rtw89_pci_ctrl_txdma_ch,
+ .ctrl_txdma_fw_ch = rtw89_pci_ctrl_txdma_fw_ch,
.ctrl_trxhci = rtw89_pci_ctrl_dma_trx,
- .poll_txdma_ch = rtw89_poll_txdma_ch_idle_pcie,
+ .poll_txdma_ch_idle = rtw89_pci_poll_txdma_ch_idle,
+
.clr_idx_all = rtw89_pci_clr_idx_all,
.clear = rtw89_pci_clear_resource,
.disable_intr = rtw89_pci_disable_intr_lock,
@@ -4068,6 +4215,8 @@ int rtw89_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_free_irq;
}
+ set_bit(RTW89_FLAG_PROBE_DONE, rtwdev->flags);
+
return 0;
err_free_irq:
diff --git a/drivers/net/wireless/realtek/rtw89/pci.h b/drivers/net/wireless/realtek/rtw89/pci.h
index ca5de77fee90..a63b6b7c9bfa 100644
--- a/drivers/net/wireless/realtek/rtw89/pci.h
+++ b/drivers/net/wireless/realtek/rtw89/pci.h
@@ -42,6 +42,7 @@
#define B_AX_DBI_WFLAG BIT(16)
#define B_AX_DBI_WREN_MSK GENMASK(15, 12)
#define B_AX_DBI_ADDR_MSK GENMASK(11, 2)
+#define B_AX_DBI_2LSB GENMASK(1, 0)
#define R_AX_DBI_WDATA 0x1094
#define R_AX_DBI_RDATA 0x1098
@@ -281,6 +282,21 @@
#define B_BE_PCIE_EN_SWENT_L23 BIT(1)
#define B_BE_SEL_REQ_EXIT_L1 BIT(0)
+#define R_BE_PCIE_MIX_CFG 0x300C
+#define B_BE_L1SS_TIMEOUT_CTRL BIT(18)
+#define B_BE_ASPM_CTRL_L1 BIT(17)
+#define B_BE_ASPM_CTRL_L0 BIT(16)
+#define B_BE_XFER_PENDING_FW BIT(11)
+#define B_BE_XFER_PENDING BIT(10)
+#define B_BE_REQ_EXIT_L1 BIT(9)
+#define B_BE_REQ_ENTR_L1 BIT(8)
+#define B_BE_L1SUB_ENABLE BIT(0)
+
+#define R_BE_L1_CLK_CTRL 0x3010
+#define B_BE_RAS_SD_HOLD_LTSSM BIT(12)
+#define B_BE_CLK_REQ_N BIT(1)
+#define B_BE_CLK_PM_EN BIT(0)
+
#define R_BE_PCIE_LAT_CTRL 0x3044
#define B_BE_ELBI_PHY_REMAP_MASK GENMASK(29, 24)
#define B_BE_SYS_SUS_L12_EN BIT(17)
@@ -289,6 +305,8 @@
#define B_BE_RTK_LDO_POWER_LATENCY_MASK GENMASK(11, 10)
#define B_BE_RTK_LDO_BIAS_LATENCY_MASK GENMASK(9, 8)
#define B_BE_CLK_REQ_LAT_MASK GENMASK(7, 4)
+#define B_BE_RTK_PM_SEL_OPT BIT(1)
+#define B_BE_CLK_REQ_SEL BIT(0)
#define R_BE_PCIE_HIMR0 0x30B0
#define B_BE_PCIE_HB1_IND_INTA_IMR BIT(31)
@@ -924,6 +942,8 @@
#define B_BE_SER_L1SUB_IMR BIT(1)
#define B_BE_SER_PMU_IMR BIT(0)
+#define R_BE_REG_PL1_ISR 0x34B4
+
#define R_BE_RX_APPEND_MODE 0x8920
#define B_BE_APPEND_OFFSET_MASK GENMASK(23, 16)
#define B_BE_APPEND_LEN_MASK GENMASK(15, 0)
@@ -996,7 +1016,7 @@
#define RTW89_PCI_TXWD_NUM_MAX 512
#define RTW89_PCI_TXWD_PAGE_SIZE 128
#define RTW89_PCI_ADDRINFO_MAX 4
-#define RTW89_PCI_RX_BUF_SIZE 11460
+#define RTW89_PCI_RX_BUF_SIZE (11454 + 40) /* +40 for rtw89_rxdesc_long_v2 */
#define RTW89_PCI_POLL_BDRAM_RST_CNT 100
#define RTW89_PCI_MULTITAG 8
@@ -1065,6 +1085,15 @@ enum rtw89_pcie_clkdly_hw {
PCIE_CLKDLY_HW_200US = 0x5,
};
+enum rtw89_pcie_clkdly_hw_v1 {
+ PCIE_CLKDLY_HW_V1_0 = 0,
+ PCIE_CLKDLY_HW_V1_16US = 0x1,
+ PCIE_CLKDLY_HW_V1_32US = 0x2,
+ PCIE_CLKDLY_HW_V1_64US = 0x3,
+ PCIE_CLKDLY_HW_V1_80US = 0x4,
+ PCIE_CLKDLY_HW_V1_96US = 0x5,
+};
+
enum mac_ax_bd_trunc_mode {
MAC_AX_BD_NORM,
MAC_AX_BD_TRUNC,
@@ -1215,6 +1244,14 @@ struct rtw89_pci_gen_def {
int (*lv1rst_stop_dma)(struct rtw89_dev *rtwdev);
int (*lv1rst_start_dma)(struct rtw89_dev *rtwdev);
+
+ void (*ctrl_txdma_ch)(struct rtw89_dev *rtwdev, bool enable);
+ void (*ctrl_txdma_fw_ch)(struct rtw89_dev *rtwdev, bool enable);
+ int (*poll_txdma_ch_idle)(struct rtw89_dev *rtwdev);
+
+ void (*aspm_set)(struct rtw89_dev *rtwdev, bool enable);
+ void (*clkreq_set)(struct rtw89_dev *rtwdev, bool enable);
+ void (*l1ss_set)(struct rtw89_dev *rtwdev, bool enable);
};
struct rtw89_pci_info {
@@ -1234,6 +1271,7 @@ struct rtw89_pci_info {
enum mac_ax_pcie_func_ctrl io_rcy_en;
enum mac_ax_io_rcy_tmr io_rcy_tmr;
bool rx_ring_eq_is_full;
+ bool check_rx_tag;
u32 init_cfg_reg;
u32 txhci_en_bit;
@@ -1276,7 +1314,7 @@ struct rtw89_pci_tx_data {
struct rtw89_pci_rx_info {
dma_addr_t dma;
- u32 fs:1, ls:1, tag:11, len:14;
+ u32 fs:1, ls:1, tag:13, len:14;
};
#define RTW89_PCI_TXBD_OPTION_LS BIT(14)
@@ -1405,6 +1443,7 @@ struct rtw89_pci_rx_ring {
u32 buf_sz;
struct sk_buff *diliver_skb;
struct rtw89_rx_desc_info diliver_desc;
+ u32 target_rx_tag:13;
};
struct rtw89_pci_isrs {
@@ -1521,6 +1560,7 @@ static inline bool rtw89_pci_ltr_is_err_reg_val(u32 val)
}
extern const struct dev_pm_ops rtw89_pm_ops;
+extern const struct dev_pm_ops rtw89_pm_ops_be;
extern const struct rtw89_pci_ch_dma_addr_set rtw89_pci_ch_dma_addr_set;
extern const struct rtw89_pci_ch_dma_addr_set rtw89_pci_ch_dma_addr_set_v1;
extern const struct rtw89_pci_ch_dma_addr_set rtw89_pci_ch_dma_addr_set_be;
@@ -1676,4 +1716,27 @@ static inline int rtw89_pci_reset_bdram(struct rtw89_dev *rtwdev)
return gen_def->rst_bdram(rtwdev);
}
+static inline void rtw89_pci_ctrl_txdma_ch(struct rtw89_dev *rtwdev, bool enable)
+{
+ const struct rtw89_pci_info *info = rtwdev->pci_info;
+ const struct rtw89_pci_gen_def *gen_def = info->gen_def;
+
+ return gen_def->ctrl_txdma_ch(rtwdev, enable);
+}
+
+static inline void rtw89_pci_ctrl_txdma_fw_ch(struct rtw89_dev *rtwdev, bool enable)
+{
+ const struct rtw89_pci_info *info = rtwdev->pci_info;
+ const struct rtw89_pci_gen_def *gen_def = info->gen_def;
+
+ return gen_def->ctrl_txdma_fw_ch(rtwdev, enable);
+}
+
+static inline int rtw89_pci_poll_txdma_ch_idle(struct rtw89_dev *rtwdev)
+{
+ const struct rtw89_pci_info *info = rtwdev->pci_info;
+ const struct rtw89_pci_gen_def *gen_def = info->gen_def;
+
+ return gen_def->poll_txdma_ch_idle(rtwdev);
+}
#endif
diff --git a/drivers/net/wireless/realtek/rtw89/pci_be.c b/drivers/net/wireless/realtek/rtw89/pci_be.c
index 629ffa4bee91..7cc328222965 100644
--- a/drivers/net/wireless/realtek/rtw89/pci_be.c
+++ b/drivers/net/wireless/realtek/rtw89/pci_be.c
@@ -19,6 +19,54 @@ enum pcie_rxbd_mode {
#define PL0_TMR_MAC_1MS 0x27100
#define PL0_TMR_AUX_1MS 0x1E848
+static void rtw89_pci_aspm_set_be(struct rtw89_dev *rtwdev, bool enable)
+{
+ struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv;
+ struct pci_dev *pdev = rtwpci->pdev;
+ u8 value = 0;
+ int ret;
+
+ ret = pci_read_config_byte(pdev, RTW89_PCIE_ASPM_CTRL, &value);
+ if (ret)
+ rtw89_warn(rtwdev, "failed to read ASPM Delay\n");
+
+ u8p_replace_bits(&value, PCIE_L1DLY_16US, RTW89_L1DLY_MASK);
+
+ ret = pci_write_config_byte(pdev, RTW89_PCIE_ASPM_CTRL, value);
+ if (ret)
+ rtw89_warn(rtwdev, "failed to write ASPM Delay\n");
+
+ if (enable)
+ rtw89_write32_set(rtwdev, R_AX_PCIE_MIX_CFG_V1,
+ B_BE_ASPM_CTRL_L1);
+ else
+ rtw89_write32_clr(rtwdev, R_AX_PCIE_MIX_CFG_V1,
+ B_BE_ASPM_CTRL_L1);
+}
+
+static void rtw89_pci_l1ss_set_be(struct rtw89_dev *rtwdev, bool enable)
+{
+ if (enable)
+ rtw89_write32_set(rtwdev, R_BE_PCIE_MIX_CFG,
+ B_BE_L1SUB_ENABLE);
+ else
+ rtw89_write32_clr(rtwdev, R_BE_PCIE_MIX_CFG,
+ B_BE_L1SUB_ENABLE);
+}
+
+static void rtw89_pci_clkreq_set_be(struct rtw89_dev *rtwdev, bool enable)
+{
+ rtw89_write32_mask(rtwdev, R_BE_PCIE_LAT_CTRL, B_BE_CLK_REQ_LAT_MASK,
+ PCIE_CLKDLY_HW_V1_0);
+
+ if (enable)
+ rtw89_write32_set(rtwdev, R_BE_L1_CLK_CTRL,
+ B_BE_CLK_PM_EN);
+ else
+ rtw89_write32_clr(rtwdev, R_AX_L1_CLK_CTRL,
+ B_BE_CLK_PM_EN);
+}
+
static void _patch_pcie_power_wake_be(struct rtw89_dev *rtwdev, bool power_up)
{
if (power_up)
@@ -105,6 +153,10 @@ static void rtw89_pci_ctrl_trxdma_pcie_be(struct rtw89_dev *rtwdev,
val |= B_BE_STOP_AXI_MST;
rtw89_write32(rtwdev, R_BE_HAXI_INIT_CFG1, val);
+
+ if (io_en == MAC_AX_PCIE_ENABLE)
+ rtw89_write32_mask(rtwdev, R_BE_HAXI_MST_WDT_TIMEOUT_SEL_V1,
+ B_BE_HAXI_MST_WDT_TIMEOUT_SEL_MASK, 4);
}
static void rtw89_pci_clr_idx_all_be(struct rtw89_dev *rtwdev)
@@ -257,6 +309,7 @@ static void rtw89_pci_ser_setting_be(struct rtw89_dev *rtwdev)
rtw89_write32(rtwdev, R_BE_PL1_DBG_INFO, 0x0);
rtw89_write32_set(rtwdev, R_BE_FWS1IMR, B_BE_PCIE_SER_TIMEOUT_INDIC_EN);
rtw89_write32_set(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_SER_PL1_EN);
+ rtw89_write32_mask(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_TIMER_UNIT_MASK, 1);
val32 = rtw89_read32(rtwdev, R_BE_REG_PL1_MASK);
val32 |= B_BE_SER_PMU_IMR | B_BE_SER_L1SUB_IMR | B_BE_SER_PM_MASTER_IMR |
@@ -264,8 +317,7 @@ static void rtw89_pci_ser_setting_be(struct rtw89_dev *rtwdev)
rtw89_write32(rtwdev, R_BE_REG_PL1_MASK, val32);
}
-static void rtw89_pci_ctrl_txdma_ch_be(struct rtw89_dev *rtwdev, bool all_en,
- bool h2c_en)
+static void rtw89_pci_ctrl_txdma_ch_be(struct rtw89_dev *rtwdev, bool enable)
{
u32 mask_all;
u32 val;
@@ -278,12 +330,19 @@ static void rtw89_pci_ctrl_txdma_ch_be(struct rtw89_dev *rtwdev, bool all_en,
val = rtw89_read32(rtwdev, R_BE_HAXI_DMA_STOP1);
val |= B_BE_STOP_CH13 | B_BE_STOP_CH14;
- if (all_en)
+ if (enable)
val &= ~mask_all;
else
val |= mask_all;
- if (h2c_en)
+ rtw89_write32(rtwdev, R_BE_HAXI_DMA_STOP1, val);
+}
+
+static void rtw89_pci_ctrl_txdma_fw_ch_be(struct rtw89_dev *rtwdev, bool enable)
+{
+ u32 val = rtw89_read32(rtwdev, R_BE_HAXI_DMA_STOP1);
+
+ if (enable)
val &= ~B_BE_STOP_CH12;
else
val |= B_BE_STOP_CH12;
@@ -322,7 +381,8 @@ static int rtw89_pci_ops_mac_pre_init_be(struct rtw89_dev *rtwdev)
rtw89_pci_pcie_setting_be(rtwdev);
rtw89_pci_ser_setting_be(rtwdev);
- rtw89_pci_ctrl_txdma_ch_be(rtwdev, false, true);
+ rtw89_pci_ctrl_txdma_ch_be(rtwdev, false);
+ rtw89_pci_ctrl_txdma_fw_ch_be(rtwdev, true);
rtw89_pci_ctrl_trxdma_pcie_be(rtwdev, MAC_AX_PCIE_ENABLE,
MAC_AX_PCIE_ENABLE, MAC_AX_PCIE_ENABLE);
@@ -432,7 +492,8 @@ static int rtw89_pci_ops_mac_post_init_be(struct rtw89_dev *rtwdev)
rtw89_pci_ctrl_trxdma_pcie_be(rtwdev, MAC_AX_PCIE_IGNORE,
MAC_AX_PCIE_IGNORE, MAC_AX_PCIE_ENABLE);
rtw89_pci_ctrl_wpdma_pcie_be(rtwdev, true);
- rtw89_pci_ctrl_txdma_ch_be(rtwdev, true, true);
+ rtw89_pci_ctrl_txdma_ch_be(rtwdev, true);
+ rtw89_pci_ctrl_txdma_fw_ch_be(rtwdev, true);
rtw89_pci_configure_mit_be(rtwdev);
return 0;
@@ -489,6 +550,46 @@ static int rtw89_pci_lv1rst_start_dma_be(struct rtw89_dev *rtwdev)
return 0;
}
+static int __maybe_unused rtw89_pci_suspend_be(struct device *dev)
+{
+ struct ieee80211_hw *hw = dev_get_drvdata(dev);
+ struct rtw89_dev *rtwdev = hw->priv;
+
+ rtw89_write32_set(rtwdev, R_BE_RSV_CTRL, B_BE_WLOCK_1C_BIT6);
+ rtw89_write32_set(rtwdev, R_BE_RSV_CTRL, B_BE_R_DIS_PRST);
+ rtw89_write32_clr(rtwdev, R_BE_RSV_CTRL, B_BE_WLOCK_1C_BIT6);
+ rtw89_write32_set(rtwdev, R_BE_PCIE_FRZ_CLK, B_BE_PCIE_FRZ_REG_RST);
+ rtw89_write32_clr(rtwdev, R_BE_REG_PL1_MASK, B_BE_SER_PM_MASTER_IMR);
+ return 0;
+}
+
+static int __maybe_unused rtw89_pci_resume_be(struct device *dev)
+{
+ struct ieee80211_hw *hw = dev_get_drvdata(dev);
+ struct rtw89_dev *rtwdev = hw->priv;
+ u32 polling;
+ int ret;
+
+ rtw89_write32_set(rtwdev, R_BE_RSV_CTRL, B_BE_WLOCK_1C_BIT6);
+ rtw89_write32_clr(rtwdev, R_BE_RSV_CTRL, B_BE_R_DIS_PRST);
+ rtw89_write32_clr(rtwdev, R_BE_RSV_CTRL, B_BE_WLOCK_1C_BIT6);
+ rtw89_write32_clr(rtwdev, R_BE_PCIE_FRZ_CLK, B_BE_PCIE_FRZ_REG_RST);
+ rtw89_write32_clr(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_SER_PL1_EN);
+
+ ret = read_poll_timeout_atomic(rtw89_read32, polling, !polling, 1, 1000,
+ false, rtwdev, R_BE_REG_PL1_ISR);
+ if (ret)
+ rtw89_warn(rtwdev, "[ERR] PCIE SER clear polling fail\n");
+
+ rtw89_write32_set(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_SER_PL1_EN);
+ rtw89_write32_set(rtwdev, R_BE_REG_PL1_MASK, B_BE_SER_PM_MASTER_IMR);
+
+ return 0;
+}
+
+SIMPLE_DEV_PM_OPS(rtw89_pm_ops_be, rtw89_pci_suspend_be, rtw89_pci_resume_be);
+EXPORT_SYMBOL(rtw89_pm_ops_be);
+
const struct rtw89_pci_gen_def rtw89_pci_gen_be = {
.isr_rdu = B_BE_RDU_CH1_INT | B_BE_RDU_CH0_INT,
.isr_halt_c2h = B_BE_HALT_C2H_INT,
@@ -505,5 +606,13 @@ const struct rtw89_pci_gen_def rtw89_pci_gen_be = {
.lv1rst_stop_dma = rtw89_pci_lv1rst_stop_dma_be,
.lv1rst_start_dma = rtw89_pci_lv1rst_start_dma_be,
+
+ .ctrl_txdma_ch = rtw89_pci_ctrl_txdma_ch_be,
+ .ctrl_txdma_fw_ch = rtw89_pci_ctrl_txdma_fw_ch_be,
+ .poll_txdma_ch_idle = rtw89_pci_poll_txdma_ch_idle_be,
+
+ .aspm_set = rtw89_pci_aspm_set_be,
+ .clkreq_set = rtw89_pci_clkreq_set_be,
+ .l1ss_set = rtw89_pci_l1ss_set_be,
};
EXPORT_SYMBOL(rtw89_pci_gen_be);
diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c
index bafc7b1cc104..12da63d64307 100644
--- a/drivers/net/wireless/realtek/rtw89/phy.c
+++ b/drivers/net/wireless/realtek/rtw89/phy.c
@@ -13,6 +13,13 @@
#include "txrx.h"
#include "util.h"
+static u32 rtw89_phy0_phy1_offset(struct rtw89_dev *rtwdev, u32 addr)
+{
+ const struct rtw89_phy_gen_def *phy = rtwdev->chip->phy_def;
+
+ return phy->phy0_phy1_offset(rtwdev, addr);
+}
+
static u16 get_max_amsdu_len(struct rtw89_dev *rtwdev,
const struct rtw89_ra_report *report)
{
@@ -718,6 +725,53 @@ u8 rtw89_phy_get_txsc(struct rtw89_dev *rtwdev,
}
EXPORT_SYMBOL(rtw89_phy_get_txsc);
+u8 rtw89_phy_get_txsb(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan,
+ enum rtw89_bandwidth dbw)
+{
+ enum rtw89_bandwidth cbw = chan->band_width;
+ u8 pri_ch = chan->primary_channel;
+ u8 central_ch = chan->channel;
+ u8 txsb_idx = 0;
+
+ if (cbw == dbw || cbw == RTW89_CHANNEL_WIDTH_20)
+ return txsb_idx;
+
+ switch (cbw) {
+ case RTW89_CHANNEL_WIDTH_40:
+ txsb_idx = pri_ch > central_ch ? 1 : 0;
+ break;
+ case RTW89_CHANNEL_WIDTH_80:
+ if (dbw == RTW89_CHANNEL_WIDTH_20)
+ txsb_idx = (pri_ch - central_ch + 6) / 4;
+ else
+ txsb_idx = pri_ch > central_ch ? 1 : 0;
+ break;
+ case RTW89_CHANNEL_WIDTH_160:
+ if (dbw == RTW89_CHANNEL_WIDTH_20)
+ txsb_idx = (pri_ch - central_ch + 14) / 4;
+ else if (dbw == RTW89_CHANNEL_WIDTH_40)
+ txsb_idx = (pri_ch - central_ch + 12) / 8;
+ else
+ txsb_idx = pri_ch > central_ch ? 1 : 0;
+ break;
+ case RTW89_CHANNEL_WIDTH_320:
+ if (dbw == RTW89_CHANNEL_WIDTH_20)
+ txsb_idx = (pri_ch - central_ch + 30) / 4;
+ else if (dbw == RTW89_CHANNEL_WIDTH_40)
+ txsb_idx = (pri_ch - central_ch + 28) / 8;
+ else if (dbw == RTW89_CHANNEL_WIDTH_80)
+ txsb_idx = (pri_ch - central_ch + 24) / 16;
+ else
+ txsb_idx = pri_ch > central_ch ? 1 : 0;
+ break;
+ default:
+ break;
+ }
+
+ return txsb_idx;
+}
+EXPORT_SYMBOL(rtw89_phy_get_txsb);
+
static bool rtw89_phy_check_swsi_busy(struct rtw89_dev *rtwdev)
{
return !!rtw89_phy_read32_mask(rtwdev, R_SWSI_V1, B_SWSI_W_BUSY_V1) ||
@@ -796,6 +850,71 @@ u32 rtw89_phy_read_rf_v1(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
}
EXPORT_SYMBOL(rtw89_phy_read_rf_v1);
+static u32 rtw89_phy_read_full_rf_v2_a(struct rtw89_dev *rtwdev,
+ enum rtw89_rf_path rf_path, u32 addr)
+{
+ static const u16 r_addr_ofst[2] = {0x2C24, 0x2D24};
+ static const u16 addr_ofst[2] = {0x2ADC, 0x2BDC};
+ bool busy, done;
+ int ret;
+ u32 val;
+
+ rtw89_phy_write32_mask(rtwdev, addr_ofst[rf_path], B_HWSI_ADD_CTL_MASK, 0x1);
+ ret = read_poll_timeout_atomic(rtw89_phy_read32_mask, busy, !busy,
+ 1, 3800, false,
+ rtwdev, r_addr_ofst[rf_path], B_HWSI_VAL_BUSY);
+ if (ret) {
+ rtw89_warn(rtwdev, "poll HWSI is busy\n");
+ return INV_RF_DATA;
+ }
+
+ rtw89_phy_write32_mask(rtwdev, addr_ofst[rf_path], B_HWSI_ADD_MASK, addr);
+ rtw89_phy_write32_mask(rtwdev, addr_ofst[rf_path], B_HWSI_ADD_RD, 0x1);
+ udelay(2);
+
+ ret = read_poll_timeout_atomic(rtw89_phy_read32_mask, done, done,
+ 1, 3800, false,
+ rtwdev, r_addr_ofst[rf_path], B_HWSI_VAL_RDONE);
+ if (ret) {
+ rtw89_warn(rtwdev, "read HWSI is busy\n");
+ val = INV_RF_DATA;
+ goto out;
+ }
+
+ val = rtw89_phy_read32_mask(rtwdev, r_addr_ofst[rf_path], RFREG_MASK);
+out:
+ rtw89_phy_write32_mask(rtwdev, addr_ofst[rf_path], B_HWSI_ADD_POLL_MASK, 0);
+
+ return val;
+}
+
+static u32 rtw89_phy_read_rf_v2_a(struct rtw89_dev *rtwdev,
+ enum rtw89_rf_path rf_path, u32 addr, u32 mask)
+{
+ u32 val;
+
+ val = rtw89_phy_read_full_rf_v2_a(rtwdev, rf_path, addr);
+
+ return (val & mask) >> __ffs(mask);
+}
+
+u32 rtw89_phy_read_rf_v2(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
+ u32 addr, u32 mask)
+{
+ bool ad_sel = u32_get_bits(addr, RTW89_RF_ADDR_ADSEL_MASK);
+
+ if (rf_path >= rtwdev->chip->rf_path_num) {
+ rtw89_err(rtwdev, "unsupported rf path (%d)\n", rf_path);
+ return INV_RF_DATA;
+ }
+
+ if (ad_sel)
+ return rtw89_phy_read_rf(rtwdev, rf_path, addr, mask);
+ else
+ return rtw89_phy_read_rf_v2_a(rtwdev, rf_path, addr, mask);
+}
+EXPORT_SYMBOL(rtw89_phy_read_rf_v2);
+
bool rtw89_phy_write_rf(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
u32 addr, u32 mask, u32 data)
{
@@ -875,6 +994,66 @@ bool rtw89_phy_write_rf_v1(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
}
EXPORT_SYMBOL(rtw89_phy_write_rf_v1);
+static
+bool rtw89_phy_write_full_rf_v2_a(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
+ u32 addr, u32 data)
+{
+ static const u32 addr_is_idle[2] = {0x2C24, 0x2D24};
+ static const u32 addr_ofst[2] = {0x2AE0, 0x2BE0};
+ bool busy;
+ u32 val;
+ int ret;
+
+ ret = read_poll_timeout_atomic(rtw89_phy_read32_mask, busy, !busy,
+ 1, 3800, false,
+ rtwdev, addr_is_idle[rf_path], BIT(29));
+ if (ret) {
+ rtw89_warn(rtwdev, "[%s] HWSI is busy\n", __func__);
+ return false;
+ }
+
+ val = u32_encode_bits(addr, B_HWSI_DATA_ADDR) |
+ u32_encode_bits(data, B_HWSI_DATA_VAL);
+
+ rtw89_phy_write32(rtwdev, addr_ofst[rf_path], val);
+
+ return true;
+}
+
+static
+bool rtw89_phy_write_rf_a_v2(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
+ u32 addr, u32 mask, u32 data)
+{
+ u32 val;
+
+ if (mask == RFREG_MASK) {
+ val = data;
+ } else {
+ val = rtw89_phy_read_full_rf_v2_a(rtwdev, rf_path, addr);
+ val &= ~mask;
+ val |= (data << __ffs(mask)) & mask;
+ }
+
+ return rtw89_phy_write_full_rf_v2_a(rtwdev, rf_path, addr, val);
+}
+
+bool rtw89_phy_write_rf_v2(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
+ u32 addr, u32 mask, u32 data)
+{
+ bool ad_sel = u32_get_bits(addr, RTW89_RF_ADDR_ADSEL_MASK);
+
+ if (rf_path >= rtwdev->chip->rf_path_num) {
+ rtw89_err(rtwdev, "unsupported rf path (%d)\n", rf_path);
+ return INV_RF_DATA;
+ }
+
+ if (ad_sel)
+ return rtw89_phy_write_rf(rtwdev, rf_path, addr, mask, data);
+ else
+ return rtw89_phy_write_rf_a_v2(rtwdev, rf_path, addr, mask, data);
+}
+EXPORT_SYMBOL(rtw89_phy_write_rf_v2);
+
static bool rtw89_chip_rf_v1(struct rtw89_dev *rtwdev)
{
return rtwdev->chip->ops->write_rf == rtw89_phy_write_rf_v1;
@@ -893,20 +1072,30 @@ static void rtw89_phy_config_bb_reg(struct rtw89_dev *rtwdev,
enum rtw89_rf_path rf_path,
void *extra_data)
{
- if (reg->addr == 0xfe)
+ u32 addr;
+
+ if (reg->addr == 0xfe) {
mdelay(50);
- else if (reg->addr == 0xfd)
+ } else if (reg->addr == 0xfd) {
mdelay(5);
- else if (reg->addr == 0xfc)
+ } else if (reg->addr == 0xfc) {
mdelay(1);
- else if (reg->addr == 0xfb)
+ } else if (reg->addr == 0xfb) {
udelay(50);
- else if (reg->addr == 0xfa)
+ } else if (reg->addr == 0xfa) {
udelay(5);
- else if (reg->addr == 0xf9)
+ } else if (reg->addr == 0xf9) {
udelay(1);
- else
- rtw89_phy_write32(rtwdev, reg->addr, reg->data);
+ } else if (reg->data == BYPASS_CR_DATA) {
+ rtw89_debug(rtwdev, RTW89_DBG_PHY_TRACK, "Bypass CR 0x%x\n", reg->addr);
+ } else {
+ addr = reg->addr;
+
+ if ((uintptr_t)extra_data == RTW89_PHY_1)
+ addr += rtw89_phy0_phy1_offset(rtwdev, reg->addr);
+
+ rtw89_phy_write32(rtwdev, addr, reg->data);
+ }
}
union rtw89_phy_bb_gain_arg {
@@ -929,7 +1118,7 @@ static void
rtw89_phy_cfg_bb_gain_error(struct rtw89_dev *rtwdev,
union rtw89_phy_bb_gain_arg arg, u32 data)
{
- struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain;
+ struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain.ax;
u8 type = arg.type;
u8 path = arg.path;
u8 gband = arg.gain_band;
@@ -968,7 +1157,7 @@ static void
rtw89_phy_cfg_bb_rpl_ofst(struct rtw89_dev *rtwdev,
union rtw89_phy_bb_gain_arg arg, u32 data)
{
- struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain;
+ struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain.ax;
u8 rxsc_start = arg.rxsc_start;
u8 bw = arg.bw;
u8 path = arg.path;
@@ -1050,7 +1239,7 @@ static void
rtw89_phy_cfg_bb_gain_bypass(struct rtw89_dev *rtwdev,
union rtw89_phy_bb_gain_arg arg, u32 data)
{
- struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain;
+ struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain.ax;
u8 type = arg.type;
u8 path = arg.path;
u8 gband = arg.gain_band;
@@ -1077,7 +1266,7 @@ static void
rtw89_phy_cfg_bb_gain_op1db(struct rtw89_dev *rtwdev,
union rtw89_phy_bb_gain_arg arg, u32 data)
{
- struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain;
+ struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain.ax;
u8 type = arg.type;
u8 path = arg.path;
u8 gband = arg.gain_band;
@@ -1108,10 +1297,10 @@ rtw89_phy_cfg_bb_gain_op1db(struct rtw89_dev *rtwdev,
}
}
-static void rtw89_phy_config_bb_gain(struct rtw89_dev *rtwdev,
- const struct rtw89_reg2_def *reg,
- enum rtw89_rf_path rf_path,
- void *extra_data)
+static void rtw89_phy_config_bb_gain_ax(struct rtw89_dev *rtwdev,
+ const struct rtw89_reg2_def *reg,
+ enum rtw89_rf_path rf_path,
+ void *extra_data)
{
const struct rtw89_chip_info *chip = rtwdev->chip;
union rtw89_phy_bb_gain_arg arg = { .addr = reg->addr };
@@ -1420,12 +1609,15 @@ void rtw89_phy_init_bb_reg(struct rtw89_dev *rtwdev)
bb_table = elm_info->bb_tbl ? elm_info->bb_tbl : chip->bb_table;
rtw89_phy_init_reg(rtwdev, bb_table, rtw89_phy_config_bb_reg, NULL);
+ if (rtwdev->dbcc_en)
+ rtw89_phy_init_reg(rtwdev, bb_table, rtw89_phy_config_bb_reg,
+ (void *)RTW89_PHY_1);
rtw89_chip_init_txpwr_unit(rtwdev, RTW89_PHY_0);
bb_gain_table = elm_info->bb_gain ? elm_info->bb_gain : chip->bb_gain_table;
if (bb_gain_table)
rtw89_phy_init_reg(rtwdev, bb_gain_table,
- rtw89_phy_config_bb_gain, NULL);
+ chip->phy_def->config_bb_gain, NULL);
rtw89_phy_bb_reset(rtwdev, RTW89_PHY_0);
}
@@ -1467,11 +1659,9 @@ void rtw89_phy_init_rf_reg(struct rtw89_dev *rtwdev, bool noio)
kfree(rf_reg_info);
}
-static void rtw89_phy_init_rf_nctl(struct rtw89_dev *rtwdev)
+static void rtw89_phy_preinit_rf_nctl_ax(struct rtw89_dev *rtwdev)
{
- struct rtw89_fw_elm_info *elm_info = &rtwdev->fw.elm_info;
const struct rtw89_chip_info *chip = rtwdev->chip;
- const struct rtw89_phy_table *nctl_table;
u32 val;
int ret;
@@ -1491,6 +1681,15 @@ static void rtw89_phy_init_rf_nctl(struct rtw89_dev *rtwdev)
1000, false, rtwdev);
if (ret)
rtw89_err(rtwdev, "failed to poll nctl block\n");
+}
+
+static void rtw89_phy_init_rf_nctl(struct rtw89_dev *rtwdev)
+{
+ struct rtw89_fw_elm_info *elm_info = &rtwdev->fw.elm_info;
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+ const struct rtw89_phy_table *nctl_table;
+
+ rtw89_phy_preinit_rf_nctl(rtwdev);
nctl_table = elm_info->rf_nctl ? elm_info->rf_nctl : chip->nctl_table;
rtw89_phy_init_reg(rtwdev, nctl_table, rtw89_phy_config_bb_reg, NULL);
@@ -1499,14 +1698,11 @@ static void rtw89_phy_init_rf_nctl(struct rtw89_dev *rtwdev)
rtw89_rfk_parser(rtwdev, chip->nctl_post_table);
}
-static u32 rtw89_phy0_phy1_offset(struct rtw89_dev *rtwdev, u32 addr)
+static u32 rtw89_phy0_phy1_offset_ax(struct rtw89_dev *rtwdev, u32 addr)
{
u32 phy_page = addr >> 8;
u32 ofst = 0;
- if (rtwdev->chip->chip_gen == RTW89_CHIP_BE)
- return addr < 0x10000 ? 0x20000 : 0;
-
switch (phy_page) {
case 0x6:
case 0x7:
@@ -1561,6 +1757,7 @@ void rtw89_phy_set_phy_regs(struct rtw89_dev *rtwdev, u32 addr, u32 mask,
rtw89_phy_write32_idx(rtwdev, addr, mask, val, RTW89_PHY_1);
}
+EXPORT_SYMBOL(rtw89_phy_set_phy_regs);
void rtw89_phy_write_reg3_tbl(struct rtw89_dev *rtwdev,
const struct rtw89_phy_reg3_tbl *tbl)
@@ -2699,9 +2896,63 @@ void (* const rtw89_phy_c2h_rfk_log_handler[])(struct rtw89_dev *rtwdev,
[RTW89_PHY_C2H_RFK_LOG_FUNC_TXGAPK] = rtw89_phy_c2h_rfk_log_txgapk,
};
+static
+void rtw89_phy_rfk_report_prep(struct rtw89_dev *rtwdev)
+{
+ struct rtw89_rfk_wait_info *wait = &rtwdev->rfk_wait;
+
+ wait->state = RTW89_RFK_STATE_START;
+ wait->start_time = ktime_get();
+ reinit_completion(&wait->completion);
+}
+
+static
+int rtw89_phy_rfk_report_wait(struct rtw89_dev *rtwdev, const char *rfk_name,
+ unsigned int ms)
+{
+ struct rtw89_rfk_wait_info *wait = &rtwdev->rfk_wait;
+ unsigned long time_left;
+
+ /* Since we can't receive C2H event during SER, use a fixed delay. */
+ if (test_bit(RTW89_FLAG_SER_HANDLING, rtwdev->flags)) {
+ fsleep(1000 * ms / 2);
+ goto out;
+ }
+
+ time_left = wait_for_completion_timeout(&wait->completion,
+ msecs_to_jiffies(ms));
+ if (time_left == 0) {
+ rtw89_warn(rtwdev, "failed to wait RF %s\n", rfk_name);
+ return -ETIMEDOUT;
+ } else if (wait->state != RTW89_RFK_STATE_OK) {
+ rtw89_warn(rtwdev, "failed to do RF %s result from state %d\n",
+ rfk_name, wait->state);
+ return -EFAULT;
+ }
+
+out:
+ rtw89_debug(rtwdev, RTW89_DBG_RFK, "RF %s takes %lld ms to complete\n",
+ rfk_name, ktime_ms_delta(ktime_get(), wait->start_time));
+
+ return 0;
+}
+
static void
rtw89_phy_c2h_rfk_report_state(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 len)
{
+ const struct rtw89_c2h_rfk_report *report =
+ (const struct rtw89_c2h_rfk_report *)c2h->data;
+ struct rtw89_rfk_wait_info *wait = &rtwdev->rfk_wait;
+
+ wait->state = report->state;
+ wait->version = report->version;
+
+ complete(&wait->completion);
+
+ rtw89_debug(rtwdev, RTW89_DBG_RFK,
+ "RFK report state %d with version %d (%*ph)\n",
+ wait->state, wait->version,
+ (int)(len - sizeof(report->hdr)), &report->state);
}
static
@@ -2772,6 +3023,726 @@ void rtw89_phy_c2h_handle(struct rtw89_dev *rtwdev, struct sk_buff *skb,
handler(rtwdev, skb, len);
}
+int rtw89_phy_rfk_pre_ntfy_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms)
+{
+ int ret;
+
+ rtw89_phy_rfk_report_prep(rtwdev);
+
+ ret = rtw89_fw_h2c_rf_pre_ntfy(rtwdev, phy_idx);
+ if (ret)
+ return ret;
+
+ return rtw89_phy_rfk_report_wait(rtwdev, "PRE_NTFY", ms);
+}
+EXPORT_SYMBOL(rtw89_phy_rfk_pre_ntfy_and_wait);
+
+int rtw89_phy_rfk_tssi_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ enum rtw89_tssi_mode tssi_mode,
+ unsigned int ms)
+{
+ int ret;
+
+ rtw89_phy_rfk_report_prep(rtwdev);
+
+ ret = rtw89_fw_h2c_rf_tssi(rtwdev, phy_idx, tssi_mode);
+ if (ret)
+ return ret;
+
+ return rtw89_phy_rfk_report_wait(rtwdev, "TSSI", ms);
+}
+EXPORT_SYMBOL(rtw89_phy_rfk_tssi_and_wait);
+
+int rtw89_phy_rfk_iqk_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms)
+{
+ int ret;
+
+ rtw89_phy_rfk_report_prep(rtwdev);
+
+ ret = rtw89_fw_h2c_rf_iqk(rtwdev, phy_idx);
+ if (ret)
+ return ret;
+
+ return rtw89_phy_rfk_report_wait(rtwdev, "IQK", ms);
+}
+EXPORT_SYMBOL(rtw89_phy_rfk_iqk_and_wait);
+
+int rtw89_phy_rfk_dpk_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms)
+{
+ int ret;
+
+ rtw89_phy_rfk_report_prep(rtwdev);
+
+ ret = rtw89_fw_h2c_rf_dpk(rtwdev, phy_idx);
+ if (ret)
+ return ret;
+
+ return rtw89_phy_rfk_report_wait(rtwdev, "DPK", ms);
+}
+EXPORT_SYMBOL(rtw89_phy_rfk_dpk_and_wait);
+
+int rtw89_phy_rfk_txgapk_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms)
+{
+ int ret;
+
+ rtw89_phy_rfk_report_prep(rtwdev);
+
+ ret = rtw89_fw_h2c_rf_txgapk(rtwdev, phy_idx);
+ if (ret)
+ return ret;
+
+ return rtw89_phy_rfk_report_wait(rtwdev, "TXGAPK", ms);
+}
+EXPORT_SYMBOL(rtw89_phy_rfk_txgapk_and_wait);
+
+int rtw89_phy_rfk_dack_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms)
+{
+ int ret;
+
+ rtw89_phy_rfk_report_prep(rtwdev);
+
+ ret = rtw89_fw_h2c_rf_dack(rtwdev, phy_idx);
+ if (ret)
+ return ret;
+
+ return rtw89_phy_rfk_report_wait(rtwdev, "DACK", ms);
+}
+EXPORT_SYMBOL(rtw89_phy_rfk_dack_and_wait);
+
+int rtw89_phy_rfk_rxdck_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms)
+{
+ int ret;
+
+ rtw89_phy_rfk_report_prep(rtwdev);
+
+ ret = rtw89_fw_h2c_rf_rxdck(rtwdev, phy_idx);
+ if (ret)
+ return ret;
+
+ return rtw89_phy_rfk_report_wait(rtwdev, "RX_DCK", ms);
+}
+EXPORT_SYMBOL(rtw89_phy_rfk_rxdck_and_wait);
+
+static u32 phy_tssi_get_cck_group(u8 ch)
+{
+ switch (ch) {
+ case 1 ... 2:
+ return 0;
+ case 3 ... 5:
+ return 1;
+ case 6 ... 8:
+ return 2;
+ case 9 ... 11:
+ return 3;
+ case 12 ... 13:
+ return 4;
+ case 14:
+ return 5;
+ }
+
+ return 0;
+}
+
+#define PHY_TSSI_EXTRA_GROUP_BIT BIT(31)
+#define PHY_TSSI_EXTRA_GROUP(idx) (PHY_TSSI_EXTRA_GROUP_BIT | (idx))
+#define PHY_IS_TSSI_EXTRA_GROUP(group) ((group) & PHY_TSSI_EXTRA_GROUP_BIT)
+#define PHY_TSSI_EXTRA_GET_GROUP_IDX1(group) \
+ ((group) & ~PHY_TSSI_EXTRA_GROUP_BIT)
+#define PHY_TSSI_EXTRA_GET_GROUP_IDX2(group) \
+ (PHY_TSSI_EXTRA_GET_GROUP_IDX1(group) + 1)
+
+static u32 phy_tssi_get_ofdm_group(u8 ch)
+{
+ switch (ch) {
+ case 1 ... 2:
+ return 0;
+ case 3 ... 5:
+ return 1;
+ case 6 ... 8:
+ return 2;
+ case 9 ... 11:
+ return 3;
+ case 12 ... 14:
+ return 4;
+ case 36 ... 40:
+ return 5;
+ case 41 ... 43:
+ return PHY_TSSI_EXTRA_GROUP(5);
+ case 44 ... 48:
+ return 6;
+ case 49 ... 51:
+ return PHY_TSSI_EXTRA_GROUP(6);
+ case 52 ... 56:
+ return 7;
+ case 57 ... 59:
+ return PHY_TSSI_EXTRA_GROUP(7);
+ case 60 ... 64:
+ return 8;
+ case 100 ... 104:
+ return 9;
+ case 105 ... 107:
+ return PHY_TSSI_EXTRA_GROUP(9);
+ case 108 ... 112:
+ return 10;
+ case 113 ... 115:
+ return PHY_TSSI_EXTRA_GROUP(10);
+ case 116 ... 120:
+ return 11;
+ case 121 ... 123:
+ return PHY_TSSI_EXTRA_GROUP(11);
+ case 124 ... 128:
+ return 12;
+ case 129 ... 131:
+ return PHY_TSSI_EXTRA_GROUP(12);
+ case 132 ... 136:
+ return 13;
+ case 137 ... 139:
+ return PHY_TSSI_EXTRA_GROUP(13);
+ case 140 ... 144:
+ return 14;
+ case 149 ... 153:
+ return 15;
+ case 154 ... 156:
+ return PHY_TSSI_EXTRA_GROUP(15);
+ case 157 ... 161:
+ return 16;
+ case 162 ... 164:
+ return PHY_TSSI_EXTRA_GROUP(16);
+ case 165 ... 169:
+ return 17;
+ case 170 ... 172:
+ return PHY_TSSI_EXTRA_GROUP(17);
+ case 173 ... 177:
+ return 18;
+ }
+
+ return 0;
+}
+
+static u32 phy_tssi_get_6g_ofdm_group(u8 ch)
+{
+ switch (ch) {
+ case 1 ... 5:
+ return 0;
+ case 6 ... 8:
+ return PHY_TSSI_EXTRA_GROUP(0);
+ case 9 ... 13:
+ return 1;
+ case 14 ... 16:
+ return PHY_TSSI_EXTRA_GROUP(1);
+ case 17 ... 21:
+ return 2;
+ case 22 ... 24:
+ return PHY_TSSI_EXTRA_GROUP(2);
+ case 25 ... 29:
+ return 3;
+ case 33 ... 37:
+ return 4;
+ case 38 ... 40:
+ return PHY_TSSI_EXTRA_GROUP(4);
+ case 41 ... 45:
+ return 5;
+ case 46 ... 48:
+ return PHY_TSSI_EXTRA_GROUP(5);
+ case 49 ... 53:
+ return 6;
+ case 54 ... 56:
+ return PHY_TSSI_EXTRA_GROUP(6);
+ case 57 ... 61:
+ return 7;
+ case 65 ... 69:
+ return 8;
+ case 70 ... 72:
+ return PHY_TSSI_EXTRA_GROUP(8);
+ case 73 ... 77:
+ return 9;
+ case 78 ... 80:
+ return PHY_TSSI_EXTRA_GROUP(9);
+ case 81 ... 85:
+ return 10;
+ case 86 ... 88:
+ return PHY_TSSI_EXTRA_GROUP(10);
+ case 89 ... 93:
+ return 11;
+ case 97 ... 101:
+ return 12;
+ case 102 ... 104:
+ return PHY_TSSI_EXTRA_GROUP(12);
+ case 105 ... 109:
+ return 13;
+ case 110 ... 112:
+ return PHY_TSSI_EXTRA_GROUP(13);
+ case 113 ... 117:
+ return 14;
+ case 118 ... 120:
+ return PHY_TSSI_EXTRA_GROUP(14);
+ case 121 ... 125:
+ return 15;
+ case 129 ... 133:
+ return 16;
+ case 134 ... 136:
+ return PHY_TSSI_EXTRA_GROUP(16);
+ case 137 ... 141:
+ return 17;
+ case 142 ... 144:
+ return PHY_TSSI_EXTRA_GROUP(17);
+ case 145 ... 149:
+ return 18;
+ case 150 ... 152:
+ return PHY_TSSI_EXTRA_GROUP(18);
+ case 153 ... 157:
+ return 19;
+ case 161 ... 165:
+ return 20;
+ case 166 ... 168:
+ return PHY_TSSI_EXTRA_GROUP(20);
+ case 169 ... 173:
+ return 21;
+ case 174 ... 176:
+ return PHY_TSSI_EXTRA_GROUP(21);
+ case 177 ... 181:
+ return 22;
+ case 182 ... 184:
+ return PHY_TSSI_EXTRA_GROUP(22);
+ case 185 ... 189:
+ return 23;
+ case 193 ... 197:
+ return 24;
+ case 198 ... 200:
+ return PHY_TSSI_EXTRA_GROUP(24);
+ case 201 ... 205:
+ return 25;
+ case 206 ... 208:
+ return PHY_TSSI_EXTRA_GROUP(25);
+ case 209 ... 213:
+ return 26;
+ case 214 ... 216:
+ return PHY_TSSI_EXTRA_GROUP(26);
+ case 217 ... 221:
+ return 27;
+ case 225 ... 229:
+ return 28;
+ case 230 ... 232:
+ return PHY_TSSI_EXTRA_GROUP(28);
+ case 233 ... 237:
+ return 29;
+ case 238 ... 240:
+ return PHY_TSSI_EXTRA_GROUP(29);
+ case 241 ... 245:
+ return 30;
+ case 246 ... 248:
+ return PHY_TSSI_EXTRA_GROUP(30);
+ case 249 ... 253:
+ return 31;
+ }
+
+ return 0;
+}
+
+static u32 phy_tssi_get_trim_group(u8 ch)
+{
+ switch (ch) {
+ case 1 ... 8:
+ return 0;
+ case 9 ... 14:
+ return 1;
+ case 36 ... 48:
+ return 2;
+ case 49 ... 51:
+ return PHY_TSSI_EXTRA_GROUP(2);
+ case 52 ... 64:
+ return 3;
+ case 100 ... 112:
+ return 4;
+ case 113 ... 115:
+ return PHY_TSSI_EXTRA_GROUP(4);
+ case 116 ... 128:
+ return 5;
+ case 132 ... 144:
+ return 6;
+ case 149 ... 177:
+ return 7;
+ }
+
+ return 0;
+}
+
+static u32 phy_tssi_get_6g_trim_group(u8 ch)
+{
+ switch (ch) {
+ case 1 ... 13:
+ return 0;
+ case 14 ... 16:
+ return PHY_TSSI_EXTRA_GROUP(0);
+ case 17 ... 29:
+ return 1;
+ case 33 ... 45:
+ return 2;
+ case 46 ... 48:
+ return PHY_TSSI_EXTRA_GROUP(2);
+ case 49 ... 61:
+ return 3;
+ case 65 ... 77:
+ return 4;
+ case 78 ... 80:
+ return PHY_TSSI_EXTRA_GROUP(4);
+ case 81 ... 93:
+ return 5;
+ case 97 ... 109:
+ return 6;
+ case 110 ... 112:
+ return PHY_TSSI_EXTRA_GROUP(6);
+ case 113 ... 125:
+ return 7;
+ case 129 ... 141:
+ return 8;
+ case 142 ... 144:
+ return PHY_TSSI_EXTRA_GROUP(8);
+ case 145 ... 157:
+ return 9;
+ case 161 ... 173:
+ return 10;
+ case 174 ... 176:
+ return PHY_TSSI_EXTRA_GROUP(10);
+ case 177 ... 189:
+ return 11;
+ case 193 ... 205:
+ return 12;
+ case 206 ... 208:
+ return PHY_TSSI_EXTRA_GROUP(12);
+ case 209 ... 221:
+ return 13;
+ case 225 ... 237:
+ return 14;
+ case 238 ... 240:
+ return PHY_TSSI_EXTRA_GROUP(14);
+ case 241 ... 253:
+ return 15;
+ }
+
+ return 0;
+}
+
+static s8 phy_tssi_get_ofdm_de(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy,
+ const struct rtw89_chan *chan,
+ enum rtw89_rf_path path)
+{
+ struct rtw89_tssi_info *tssi_info = &rtwdev->tssi;
+ enum rtw89_band band = chan->band_type;
+ u8 ch = chan->channel;
+ u32 gidx_1st;
+ u32 gidx_2nd;
+ s8 de_1st;
+ s8 de_2nd;
+ u32 gidx;
+ s8 val;
+
+ if (band == RTW89_BAND_6G)
+ goto calc_6g;
+
+ gidx = phy_tssi_get_ofdm_group(ch);
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs group_idx=0x%x\n",
+ path, gidx);
+
+ if (PHY_IS_TSSI_EXTRA_GROUP(gidx)) {
+ gidx_1st = PHY_TSSI_EXTRA_GET_GROUP_IDX1(gidx);
+ gidx_2nd = PHY_TSSI_EXTRA_GET_GROUP_IDX2(gidx);
+ de_1st = tssi_info->tssi_mcs[path][gidx_1st];
+ de_2nd = tssi_info->tssi_mcs[path][gidx_2nd];
+ val = (de_1st + de_2nd) / 2;
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs de=%d 1st=%d 2nd=%d\n",
+ path, val, de_1st, de_2nd);
+ } else {
+ val = tssi_info->tssi_mcs[path][gidx];
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs de=%d\n", path, val);
+ }
+
+ return val;
+
+calc_6g:
+ gidx = phy_tssi_get_6g_ofdm_group(ch);
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs group_idx=0x%x\n",
+ path, gidx);
+
+ if (PHY_IS_TSSI_EXTRA_GROUP(gidx)) {
+ gidx_1st = PHY_TSSI_EXTRA_GET_GROUP_IDX1(gidx);
+ gidx_2nd = PHY_TSSI_EXTRA_GET_GROUP_IDX2(gidx);
+ de_1st = tssi_info->tssi_6g_mcs[path][gidx_1st];
+ de_2nd = tssi_info->tssi_6g_mcs[path][gidx_2nd];
+ val = (de_1st + de_2nd) / 2;
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs de=%d 1st=%d 2nd=%d\n",
+ path, val, de_1st, de_2nd);
+ } else {
+ val = tssi_info->tssi_6g_mcs[path][gidx];
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs de=%d\n", path, val);
+ }
+
+ return val;
+}
+
+static s8 phy_tssi_get_ofdm_trim_de(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy,
+ const struct rtw89_chan *chan,
+ enum rtw89_rf_path path)
+{
+ struct rtw89_tssi_info *tssi_info = &rtwdev->tssi;
+ enum rtw89_band band = chan->band_type;
+ u8 ch = chan->channel;
+ u32 tgidx_1st;
+ u32 tgidx_2nd;
+ s8 tde_1st;
+ s8 tde_2nd;
+ u32 tgidx;
+ s8 val;
+
+ if (band == RTW89_BAND_6G)
+ goto calc_6g;
+
+ tgidx = phy_tssi_get_trim_group(ch);
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs trim_group_idx=0x%x\n",
+ path, tgidx);
+
+ if (PHY_IS_TSSI_EXTRA_GROUP(tgidx)) {
+ tgidx_1st = PHY_TSSI_EXTRA_GET_GROUP_IDX1(tgidx);
+ tgidx_2nd = PHY_TSSI_EXTRA_GET_GROUP_IDX2(tgidx);
+ tde_1st = tssi_info->tssi_trim[path][tgidx_1st];
+ tde_2nd = tssi_info->tssi_trim[path][tgidx_2nd];
+ val = (tde_1st + tde_2nd) / 2;
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs trim_de=%d 1st=%d 2nd=%d\n",
+ path, val, tde_1st, tde_2nd);
+ } else {
+ val = tssi_info->tssi_trim[path][tgidx];
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs trim_de=%d\n",
+ path, val);
+ }
+
+ return val;
+
+calc_6g:
+ tgidx = phy_tssi_get_6g_trim_group(ch);
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs trim_group_idx=0x%x\n",
+ path, tgidx);
+
+ if (PHY_IS_TSSI_EXTRA_GROUP(tgidx)) {
+ tgidx_1st = PHY_TSSI_EXTRA_GET_GROUP_IDX1(tgidx);
+ tgidx_2nd = PHY_TSSI_EXTRA_GET_GROUP_IDX2(tgidx);
+ tde_1st = tssi_info->tssi_trim_6g[path][tgidx_1st];
+ tde_2nd = tssi_info->tssi_trim_6g[path][tgidx_2nd];
+ val = (tde_1st + tde_2nd) / 2;
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs trim_de=%d 1st=%d 2nd=%d\n",
+ path, val, tde_1st, tde_2nd);
+ } else {
+ val = tssi_info->tssi_trim_6g[path][tgidx];
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d mcs trim_de=%d\n",
+ path, val);
+ }
+
+ return val;
+}
+
+void rtw89_phy_rfk_tssi_fill_fwcmd_efuse_to_de(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy,
+ const struct rtw89_chan *chan,
+ struct rtw89_h2c_rf_tssi *h2c)
+{
+ struct rtw89_tssi_info *tssi_info = &rtwdev->tssi;
+ u8 ch = chan->channel;
+ s8 trim_de;
+ s8 ofdm_de;
+ s8 cck_de;
+ u8 gidx;
+ s8 val;
+ int i;
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI, "[TSSI][TRIM]: phy=%d ch=%d\n",
+ phy, ch);
+
+ for (i = RF_PATH_A; i <= RF_PATH_B; i++) {
+ trim_de = phy_tssi_get_ofdm_trim_de(rtwdev, phy, chan, i);
+ h2c->curr_tssi_trim_de[i] = trim_de;
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d trim_de=0x%x\n", i, trim_de);
+
+ gidx = phy_tssi_get_cck_group(ch);
+ cck_de = tssi_info->tssi_cck[i][gidx];
+ val = u32_get_bits(cck_de + trim_de, 0xff);
+
+ h2c->curr_tssi_cck_de[i] = 0x0;
+ h2c->curr_tssi_cck_de_20m[i] = val;
+ h2c->curr_tssi_cck_de_40m[i] = val;
+ h2c->curr_tssi_efuse_cck_de[i] = cck_de;
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d cck_de=0x%x\n", i, cck_de);
+
+ ofdm_de = phy_tssi_get_ofdm_de(rtwdev, phy, chan, i);
+ val = u32_get_bits(ofdm_de + trim_de, 0xff);
+
+ h2c->curr_tssi_ofdm_de[i] = 0x0;
+ h2c->curr_tssi_ofdm_de_20m[i] = val;
+ h2c->curr_tssi_ofdm_de_40m[i] = val;
+ h2c->curr_tssi_ofdm_de_80m[i] = val;
+ h2c->curr_tssi_ofdm_de_160m[i] = val;
+ h2c->curr_tssi_ofdm_de_320m[i] = val;
+ h2c->curr_tssi_efuse_ofdm_de[i] = ofdm_de;
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI][TRIM]: path=%d ofdm_de=0x%x\n", i, ofdm_de);
+ }
+}
+
+void rtw89_phy_rfk_tssi_fill_fwcmd_tmeter_tbl(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy,
+ const struct rtw89_chan *chan,
+ struct rtw89_h2c_rf_tssi *h2c)
+{
+ struct rtw89_fw_txpwr_track_cfg *trk = rtwdev->fw.elm_info.txpwr_trk;
+ struct rtw89_tssi_info *tssi_info = &rtwdev->tssi;
+ const s8 *thm_up[RF_PATH_B + 1] = {};
+ const s8 *thm_down[RF_PATH_B + 1] = {};
+ u8 subband = chan->subband_type;
+ s8 thm_ofst[128] = {0};
+ u8 thermal;
+ u8 path;
+ u8 i, j;
+
+ switch (subband) {
+ default:
+ case RTW89_CH_2G:
+ thm_up[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_2GA_P][0];
+ thm_down[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_2GA_N][0];
+ thm_up[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_2GB_P][0];
+ thm_down[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_2GB_N][0];
+ break;
+ case RTW89_CH_5G_BAND_1:
+ thm_up[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GA_P][0];
+ thm_down[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GA_N][0];
+ thm_up[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GB_P][0];
+ thm_down[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GB_N][0];
+ break;
+ case RTW89_CH_5G_BAND_3:
+ thm_up[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GA_P][1];
+ thm_down[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GA_N][1];
+ thm_up[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GB_P][1];
+ thm_down[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GB_N][1];
+ break;
+ case RTW89_CH_5G_BAND_4:
+ thm_up[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GA_P][2];
+ thm_down[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GA_N][2];
+ thm_up[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GB_P][2];
+ thm_down[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_5GB_N][2];
+ break;
+ case RTW89_CH_6G_BAND_IDX0:
+ case RTW89_CH_6G_BAND_IDX1:
+ thm_up[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GA_P][0];
+ thm_down[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GA_N][0];
+ thm_up[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GB_P][0];
+ thm_down[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GB_N][0];
+ break;
+ case RTW89_CH_6G_BAND_IDX2:
+ case RTW89_CH_6G_BAND_IDX3:
+ thm_up[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GA_P][1];
+ thm_down[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GA_N][1];
+ thm_up[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GB_P][1];
+ thm_down[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GB_N][1];
+ break;
+ case RTW89_CH_6G_BAND_IDX4:
+ case RTW89_CH_6G_BAND_IDX5:
+ thm_up[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GA_P][2];
+ thm_down[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GA_N][2];
+ thm_up[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GB_P][2];
+ thm_down[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GB_N][2];
+ break;
+ case RTW89_CH_6G_BAND_IDX6:
+ case RTW89_CH_6G_BAND_IDX7:
+ thm_up[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GA_P][3];
+ thm_down[RF_PATH_A] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GA_N][3];
+ thm_up[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GB_P][3];
+ thm_down[RF_PATH_B] = trk->delta[RTW89_FW_TXPWR_TRK_TYPE_6GB_N][3];
+ break;
+ }
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "[TSSI] tmeter tbl on subband: %u\n", subband);
+
+ for (path = RF_PATH_A; path <= RF_PATH_B; path++) {
+ thermal = tssi_info->thermal[path];
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "path: %u, pg thermal: 0x%x\n", path, thermal);
+
+ if (thermal == 0xff) {
+ h2c->pg_thermal[path] = 0x38;
+ memset(h2c->ftable[path], 0, sizeof(h2c->ftable[path]));
+ continue;
+ }
+
+ h2c->pg_thermal[path] = thermal;
+
+ i = 0;
+ for (j = 0; j < 64; j++)
+ thm_ofst[j] = i < DELTA_SWINGIDX_SIZE ?
+ thm_up[path][i++] :
+ thm_up[path][DELTA_SWINGIDX_SIZE - 1];
+
+ i = 1;
+ for (j = 127; j >= 64; j--)
+ thm_ofst[j] = i < DELTA_SWINGIDX_SIZE ?
+ -thm_down[path][i++] :
+ -thm_down[path][DELTA_SWINGIDX_SIZE - 1];
+
+ for (i = 0; i < 128; i += 4) {
+ h2c->ftable[path][i + 0] = thm_ofst[i + 3];
+ h2c->ftable[path][i + 1] = thm_ofst[i + 2];
+ h2c->ftable[path][i + 2] = thm_ofst[i + 1];
+ h2c->ftable[path][i + 3] = thm_ofst[i + 0];
+
+ rtw89_debug(rtwdev, RTW89_DBG_TSSI,
+ "thm ofst [%x]: %02x %02x %02x %02x\n",
+ i, thm_ofst[i], thm_ofst[i + 1],
+ thm_ofst[i + 2], thm_ofst[i + 3]);
+ }
+ }
+}
+
static u8 rtw89_phy_cfo_get_xcap_reg(struct rtw89_dev *rtwdev, bool sc_xo)
{
const struct rtw89_xtal_info *xtal = rtwdev->chip->xtal_info;
@@ -4551,6 +5522,9 @@ static void rtw89_phy_dig_set_rxb_idx(struct rtw89_dev *rtwdev, u8 rxb_idx)
static void rtw89_phy_dig_set_igi_cr(struct rtw89_dev *rtwdev,
const struct rtw89_agc_gaincode_set set)
{
+ if (!rtwdev->hal.support_igi)
+ return;
+
rtw89_phy_dig_set_lna_idx(rtwdev, set.lna_idx);
rtw89_phy_dig_set_tia_idx(rtwdev, set.tia_idx);
rtw89_phy_dig_set_rxb_idx(rtwdev, set.rxb_idx);
@@ -4606,7 +5580,8 @@ static void rtw89_phy_dig_dyn_pd_th(struct rtw89_dev *rtwdev, u8 rssi,
s8 cck_cca_th;
u32 pd_val = 0;
- under_region += PD_TH_SB_FLTR_CMP_VAL;
+ if (rtwdev->chip->chip_gen == RTW89_CHIP_AX)
+ under_region += PD_TH_SB_FLTR_CMP_VAL;
switch (cbw) {
case RTW89_CHANNEL_WIDTH_40:
@@ -4953,12 +5928,15 @@ void rtw89_phy_dm_init(struct rtw89_dev *rtwdev)
rtw89_physts_parsing_init(rtwdev);
rtw89_phy_dig_init(rtwdev);
rtw89_phy_cfo_init(rtwdev);
+ rtw89_phy_bb_wrap_init(rtwdev);
rtw89_phy_edcca_init(rtwdev);
+ rtw89_phy_ch_info_init(rtwdev);
rtw89_phy_ul_tb_info_init(rtwdev);
rtw89_phy_antdiv_init(rtwdev);
rtw89_chip_rfe_gpio(rtwdev);
rtw89_phy_antdiv_set_ant(rtwdev);
+ rtw89_chip_rfk_hw_init(rtwdev);
rtw89_phy_init_rf_nctl(rtwdev);
rtw89_chip_rfk_init(rtwdev);
rtw89_chip_set_txpwr_ctrl(rtwdev);
@@ -5400,6 +6378,78 @@ void rtw89_phy_edcca_track(struct rtw89_dev *rtwdev)
rtw89_phy_edcca_log(rtwdev);
}
+enum rtw89_rf_path_bit rtw89_phy_get_kpath(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx)
+{
+ rtw89_debug(rtwdev, RTW89_DBG_RFK,
+ "[RFK] kpath dbcc_en: 0x%x, mode=0x%x, PHY%d\n",
+ rtwdev->dbcc_en, rtwdev->mlo_dbcc_mode, phy_idx);
+
+ switch (rtwdev->mlo_dbcc_mode) {
+ case MLO_1_PLUS_1_1RF:
+ if (phy_idx == RTW89_PHY_0)
+ return RF_A;
+ else
+ return RF_B;
+ case MLO_1_PLUS_1_2RF:
+ if (phy_idx == RTW89_PHY_0)
+ return RF_A;
+ else
+ return RF_D;
+ case MLO_0_PLUS_2_1RF:
+ case MLO_2_PLUS_0_1RF:
+ if (phy_idx == RTW89_PHY_0)
+ return RF_AB;
+ else
+ return RF_AB;
+ case MLO_0_PLUS_2_2RF:
+ case MLO_2_PLUS_0_2RF:
+ case MLO_2_PLUS_2_2RF:
+ default:
+ if (phy_idx == RTW89_PHY_0)
+ return RF_AB;
+ else
+ return RF_CD;
+ }
+}
+EXPORT_SYMBOL(rtw89_phy_get_kpath);
+
+enum rtw89_rf_path rtw89_phy_get_syn_sel(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx)
+{
+ rtw89_debug(rtwdev, RTW89_DBG_RFK,
+ "[RFK] kpath dbcc_en: 0x%x, mode=0x%x, PHY%d\n",
+ rtwdev->dbcc_en, rtwdev->mlo_dbcc_mode, phy_idx);
+
+ switch (rtwdev->mlo_dbcc_mode) {
+ case MLO_1_PLUS_1_1RF:
+ if (phy_idx == RTW89_PHY_0)
+ return RF_PATH_A;
+ else
+ return RF_PATH_B;
+ case MLO_1_PLUS_1_2RF:
+ if (phy_idx == RTW89_PHY_0)
+ return RF_PATH_A;
+ else
+ return RF_PATH_D;
+ case MLO_0_PLUS_2_1RF:
+ case MLO_2_PLUS_0_1RF:
+ if (phy_idx == RTW89_PHY_0)
+ return RF_PATH_A;
+ else
+ return RF_PATH_B;
+ case MLO_0_PLUS_2_2RF:
+ case MLO_2_PLUS_0_2RF:
+ case MLO_2_PLUS_2_2RF:
+ default:
+ if (phy_idx == RTW89_PHY_0)
+ return RF_PATH_A;
+ else
+ return RF_PATH_C;
+ }
+}
+EXPORT_SYMBOL(rtw89_phy_get_syn_sel);
+
static const struct rtw89_ccx_regs rtw89_ccx_regs_ax = {
.setting_addr = R_CCX,
.edcca_opt_mask = B_CCX_EDCCA_OPT_MSK,
@@ -5476,6 +6526,11 @@ const struct rtw89_phy_gen_def rtw89_phy_gen_ax = {
.ccx = &rtw89_ccx_regs_ax,
.physts = &rtw89_physts_regs_ax,
.cfo = &rtw89_cfo_regs_ax,
+ .phy0_phy1_offset = rtw89_phy0_phy1_offset_ax,
+ .config_bb_gain = rtw89_phy_config_bb_gain_ax,
+ .preinit_rf_nctl = rtw89_phy_preinit_rf_nctl_ax,
+ .bb_wrap_init = NULL,
+ .ch_info_init = NULL,
.set_txpwr_byrate = rtw89_phy_set_txpwr_byrate_ax,
.set_txpwr_offset = rtw89_phy_set_txpwr_offset_ax,
diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h
index 3e379077c6ca..082231ebbee5 100644
--- a/drivers/net/wireless/realtek/rtw89/phy.h
+++ b/drivers/net/wireless/realtek/rtw89/phy.h
@@ -7,6 +7,7 @@
#include "core.h"
+#define RTW89_BBMCU_ADDR_OFFSET 0x30000
#define RTW89_RF_ADDR_ADSEL_MASK BIT(16)
#define get_phy_headline(addr) FIELD_GET(GENMASK(31, 28), addr)
@@ -509,6 +510,14 @@ struct rtw89_phy_gen_def {
const struct rtw89_ccx_regs *ccx;
const struct rtw89_physts_regs *physts;
const struct rtw89_cfo_regs *cfo;
+ u32 (*phy0_phy1_offset)(struct rtw89_dev *rtwdev, u32 addr);
+ void (*config_bb_gain)(struct rtw89_dev *rtwdev,
+ const struct rtw89_reg2_def *reg,
+ enum rtw89_rf_path rf_path,
+ void *extra_data);
+ void (*preinit_rf_nctl)(struct rtw89_dev *rtwdev);
+ void (*bb_wrap_init)(struct rtw89_dev *rtwdev);
+ void (*ch_info_init)(struct rtw89_dev *rtwdev);
void (*set_txpwr_byrate)(struct rtw89_dev *rtwdev,
const struct rtw89_chan *chan,
@@ -604,6 +613,15 @@ static inline u32 rtw89_phy_read32_mask(struct rtw89_dev *rtwdev,
return rtw89_read32_mask(rtwdev, addr + phy->cr_base, mask);
}
+static inline void rtw89_bbmcu_write32(struct rtw89_dev *rtwdev,
+ u32 addr, u32 data, enum rtw89_phy_idx phy_idx)
+{
+ if (phy_idx && addr < 0x10000)
+ addr += 0x20000;
+
+ rtw89_write32(rtwdev, addr + RTW89_BBMCU_ADDR_OFFSET, data);
+}
+
static inline
enum rtw89_gain_offset rtw89_subband_to_gain_offset_band_of_ofdm(enum rtw89_subband subband)
{
@@ -664,6 +682,38 @@ enum rtw89_phy_bb_gain_band rtw89_subband_to_bb_gain_band(enum rtw89_subband sub
}
}
+static inline
+enum rtw89_phy_gain_band_be rtw89_subband_to_gain_band_be(enum rtw89_subband subband)
+{
+ switch (subband) {
+ default:
+ case RTW89_CH_2G:
+ return RTW89_BB_GAIN_BAND_2G_BE;
+ case RTW89_CH_5G_BAND_1:
+ return RTW89_BB_GAIN_BAND_5G_L_BE;
+ case RTW89_CH_5G_BAND_3:
+ return RTW89_BB_GAIN_BAND_5G_M_BE;
+ case RTW89_CH_5G_BAND_4:
+ return RTW89_BB_GAIN_BAND_5G_H_BE;
+ case RTW89_CH_6G_BAND_IDX0:
+ return RTW89_BB_GAIN_BAND_6G_L0_BE;
+ case RTW89_CH_6G_BAND_IDX1:
+ return RTW89_BB_GAIN_BAND_6G_L1_BE;
+ case RTW89_CH_6G_BAND_IDX2:
+ return RTW89_BB_GAIN_BAND_6G_M0_BE;
+ case RTW89_CH_6G_BAND_IDX3:
+ return RTW89_BB_GAIN_BAND_6G_M1_BE;
+ case RTW89_CH_6G_BAND_IDX4:
+ return RTW89_BB_GAIN_BAND_6G_H0_BE;
+ case RTW89_CH_6G_BAND_IDX5:
+ return RTW89_BB_GAIN_BAND_6G_H1_BE;
+ case RTW89_CH_6G_BAND_IDX6:
+ return RTW89_BB_GAIN_BAND_6G_UH0_BE;
+ case RTW89_CH_6G_BAND_IDX7:
+ return RTW89_BB_GAIN_BAND_6G_UH1_BE;
+ }
+}
+
enum rtw89_rfk_flag {
RTW89_RFK_F_WRF = 0,
RTW89_RFK_F_WM = 1,
@@ -728,14 +778,20 @@ void rtw89_phy_write_reg3_tbl(struct rtw89_dev *rtwdev,
u8 rtw89_phy_get_txsc(struct rtw89_dev *rtwdev,
const struct rtw89_chan *chan,
enum rtw89_bandwidth dbw);
+u8 rtw89_phy_get_txsb(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan,
+ enum rtw89_bandwidth dbw);
u32 rtw89_phy_read_rf(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
u32 addr, u32 mask);
u32 rtw89_phy_read_rf_v1(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
u32 addr, u32 mask);
+u32 rtw89_phy_read_rf_v2(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
+ u32 addr, u32 mask);
bool rtw89_phy_write_rf(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
u32 addr, u32 mask, u32 data);
bool rtw89_phy_write_rf_v1(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
u32 addr, u32 mask, u32 data);
+bool rtw89_phy_write_rf_v2(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path,
+ u32 addr, u32 mask, u32 data);
void rtw89_phy_init_bb_reg(struct rtw89_dev *rtwdev);
void rtw89_phy_init_rf_reg(struct rtw89_dev *rtwdev, bool noio);
void rtw89_phy_config_rf_reg_v1(struct rtw89_dev *rtwdev,
@@ -759,6 +815,29 @@ s8 rtw89_phy_read_txpwr_limit(struct rtw89_dev *rtwdev, u8 band,
s8 rtw89_phy_read_txpwr_limit_ru(struct rtw89_dev *rtwdev, u8 band,
u8 ru, u8 ntx, u8 ch);
+static inline void rtw89_phy_preinit_rf_nctl(struct rtw89_dev *rtwdev)
+{
+ const struct rtw89_phy_gen_def *phy = rtwdev->chip->phy_def;
+
+ phy->preinit_rf_nctl(rtwdev);
+}
+
+static inline void rtw89_phy_bb_wrap_init(struct rtw89_dev *rtwdev)
+{
+ const struct rtw89_phy_gen_def *phy = rtwdev->chip->phy_def;
+
+ if (phy->bb_wrap_init)
+ phy->bb_wrap_init(rtwdev);
+}
+
+static inline void rtw89_phy_ch_info_init(struct rtw89_dev *rtwdev)
+{
+ const struct rtw89_phy_gen_def *phy = rtwdev->chip->phy_def;
+
+ if (phy->ch_info_init)
+ phy->ch_info_init(rtwdev);
+}
+
static inline
void rtw89_phy_set_txpwr_byrate(struct rtw89_dev *rtwdev,
const struct rtw89_chan *chan,
@@ -809,6 +888,36 @@ void rtw89_phy_rate_pattern_vif(struct rtw89_dev *rtwdev,
bool rtw89_phy_c2h_chk_atomic(struct rtw89_dev *rtwdev, u8 class, u8 func);
void rtw89_phy_c2h_handle(struct rtw89_dev *rtwdev, struct sk_buff *skb,
u32 len, u8 class, u8 func);
+int rtw89_phy_rfk_pre_ntfy_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms);
+int rtw89_phy_rfk_tssi_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ enum rtw89_tssi_mode tssi_mode,
+ unsigned int ms);
+int rtw89_phy_rfk_iqk_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms);
+int rtw89_phy_rfk_dpk_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms);
+int rtw89_phy_rfk_txgapk_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms);
+int rtw89_phy_rfk_dack_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms);
+int rtw89_phy_rfk_rxdck_and_wait(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx,
+ unsigned int ms);
+void rtw89_phy_rfk_tssi_fill_fwcmd_efuse_to_de(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy,
+ const struct rtw89_chan *chan,
+ struct rtw89_h2c_rf_tssi *h2c);
+void rtw89_phy_rfk_tssi_fill_fwcmd_tmeter_tbl(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy,
+ const struct rtw89_chan *chan,
+ struct rtw89_h2c_rf_tssi *h2c);
void rtw89_phy_cfo_track(struct rtw89_dev *rtwdev);
void rtw89_phy_cfo_track_work(struct work_struct *work);
void rtw89_phy_cfo_parse(struct rtw89_dev *rtwdev, s16 cfo_val,
@@ -836,5 +945,9 @@ void rtw89_decode_chan_idx(struct rtw89_dev *rtwdev, u8 chan_idx,
void rtw89_phy_config_edcca(struct rtw89_dev *rtwdev, bool scan);
void rtw89_phy_edcca_track(struct rtw89_dev *rtwdev);
void rtw89_phy_edcca_thre_calc(struct rtw89_dev *rtwdev);
+enum rtw89_rf_path_bit rtw89_phy_get_kpath(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx);
+enum rtw89_rf_path rtw89_phy_get_syn_sel(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx);
#endif
diff --git a/drivers/net/wireless/realtek/rtw89/phy_be.c b/drivers/net/wireless/realtek/rtw89/phy_be.c
index 63eeeea72b68..be0148f2b96f 100644
--- a/drivers/net/wireless/realtek/rtw89/phy_be.c
+++ b/drivers/net/wireless/realtek/rtw89/phy_be.c
@@ -78,6 +78,332 @@ static const struct rtw89_cfo_regs rtw89_cfo_regs_be = {
.valid_0_mask = B_DCFO_OPT_EN_V1,
};
+static u32 rtw89_phy0_phy1_offset_be(struct rtw89_dev *rtwdev, u32 addr)
+{
+ u32 phy_page = addr >> 8;
+ u32 ofst = 0;
+
+ if ((phy_page >= 0x4 && phy_page <= 0xF) ||
+ (phy_page >= 0x20 && phy_page <= 0x2B) ||
+ (phy_page >= 0x40 && phy_page <= 0x4f) ||
+ (phy_page >= 0x60 && phy_page <= 0x6f) ||
+ (phy_page >= 0xE4 && phy_page <= 0xE5) ||
+ (phy_page >= 0xE8 && phy_page <= 0xED))
+ ofst = 0x1000;
+ else
+ ofst = 0x0;
+
+ return ofst;
+}
+
+union rtw89_phy_bb_gain_arg_be {
+ u32 addr;
+ struct {
+ u8 type;
+#define BB_GAIN_TYPE_SUB0_BE GENMASK(3, 0)
+#define BB_GAIN_TYPE_SUB1_BE GENMASK(7, 4)
+ u8 path_bw;
+#define BB_GAIN_PATH_BE GENMASK(3, 0)
+#define BB_GAIN_BW_BE GENMASK(7, 4)
+ u8 gain_band;
+ u8 cfg_type;
+ } __packed;
+} __packed;
+
+static void
+rtw89_phy_cfg_bb_gain_error_be(struct rtw89_dev *rtwdev,
+ union rtw89_phy_bb_gain_arg_be arg, u32 data)
+{
+ struct rtw89_phy_bb_gain_info_be *gain = &rtwdev->bb_gain.be;
+ u8 bw_type = u8_get_bits(arg.path_bw, BB_GAIN_BW_BE);
+ u8 path = u8_get_bits(arg.path_bw, BB_GAIN_PATH_BE);
+ u8 gband = arg.gain_band;
+ u8 type = arg.type;
+ int i;
+
+ switch (type) {
+ case 0:
+ for (i = 0; i < 4; i++, data >>= 8)
+ gain->lna_gain[gband][bw_type][path][i] = data & 0xff;
+ break;
+ case 1:
+ for (i = 4; i < 7; i++, data >>= 8)
+ gain->lna_gain[gband][bw_type][path][i] = data & 0xff;
+ break;
+ case 2:
+ for (i = 0; i < 2; i++, data >>= 8)
+ gain->tia_gain[gband][bw_type][path][i] = data & 0xff;
+ break;
+ default:
+ rtw89_warn(rtwdev,
+ "bb gain error {0x%x:0x%x} with unknown type: %d\n",
+ arg.addr, data, type);
+ break;
+ }
+}
+
+static void
+rtw89_phy_cfg_bb_rpl_ofst_be(struct rtw89_dev *rtwdev,
+ union rtw89_phy_bb_gain_arg_be arg, u32 data)
+{
+ struct rtw89_phy_bb_gain_info_be *gain = &rtwdev->bb_gain.be;
+ u8 type_sub0 = u8_get_bits(arg.type, BB_GAIN_TYPE_SUB0_BE);
+ u8 type_sub1 = u8_get_bits(arg.type, BB_GAIN_TYPE_SUB1_BE);
+ u8 path = u8_get_bits(arg.path_bw, BB_GAIN_PATH_BE);
+ u8 gband = arg.gain_band;
+ u8 ofst = 0;
+ int i;
+
+ switch (type_sub1) {
+ case RTW89_CMAC_BW_20M:
+ gain->rpl_ofst_20[gband][path][0] = (s8)data;
+ break;
+ case RTW89_CMAC_BW_40M:
+ for (i = 0; i < RTW89_BW20_SC_40M; i++, data >>= 8)
+ gain->rpl_ofst_40[gband][path][i] = data & 0xff;
+ break;
+ case RTW89_CMAC_BW_80M:
+ for (i = 0; i < RTW89_BW20_SC_80M; i++, data >>= 8)
+ gain->rpl_ofst_80[gband][path][i] = data & 0xff;
+ break;
+ case RTW89_CMAC_BW_160M:
+ if (type_sub0 == 0)
+ ofst = 0;
+ else
+ ofst = RTW89_BW20_SC_80M;
+
+ for (i = 0; i < RTW89_BW20_SC_80M; i++, data >>= 8)
+ gain->rpl_ofst_160[gband][path][i + ofst] = data & 0xff;
+ break;
+ default:
+ rtw89_warn(rtwdev,
+ "bb rpl ofst {0x%x:0x%x} with unknown type_sub1: %d\n",
+ arg.addr, data, type_sub1);
+ break;
+ }
+}
+
+static void
+rtw89_phy_cfg_bb_gain_op1db_be(struct rtw89_dev *rtwdev,
+ union rtw89_phy_bb_gain_arg_be arg, u32 data)
+{
+ struct rtw89_phy_bb_gain_info_be *gain = &rtwdev->bb_gain.be;
+ u8 bw_type = u8_get_bits(arg.path_bw, BB_GAIN_BW_BE);
+ u8 path = u8_get_bits(arg.path_bw, BB_GAIN_PATH_BE);
+ u8 gband = arg.gain_band;
+ u8 type = arg.type;
+ int i;
+
+ switch (type) {
+ case 0:
+ for (i = 0; i < 4; i++, data >>= 8)
+ gain->lna_op1db[gband][bw_type][path][i] = data & 0xff;
+ break;
+ case 1:
+ for (i = 4; i < 7; i++, data >>= 8)
+ gain->lna_op1db[gband][bw_type][path][i] = data & 0xff;
+ break;
+ case 2:
+ for (i = 0; i < 4; i++, data >>= 8)
+ gain->tia_lna_op1db[gband][bw_type][path][i] = data & 0xff;
+ break;
+ case 3:
+ for (i = 4; i < 8; i++, data >>= 8)
+ gain->tia_lna_op1db[gband][bw_type][path][i] = data & 0xff;
+ break;
+ default:
+ rtw89_warn(rtwdev,
+ "bb gain op1db {0x%x:0x%x} with unknown type: %d\n",
+ arg.addr, data, type);
+ break;
+ }
+}
+
+static void rtw89_phy_config_bb_gain_be(struct rtw89_dev *rtwdev,
+ const struct rtw89_reg2_def *reg,
+ enum rtw89_rf_path rf_path,
+ void *extra_data)
+{
+ const struct rtw89_chip_info *chip = rtwdev->chip;
+ union rtw89_phy_bb_gain_arg_be arg = { .addr = reg->addr };
+ struct rtw89_efuse *efuse = &rtwdev->efuse;
+ u8 bw_type = u8_get_bits(arg.path_bw, BB_GAIN_BW_BE);
+ u8 path = u8_get_bits(arg.path_bw, BB_GAIN_PATH_BE);
+
+ if (bw_type >= RTW89_BB_BW_NR_BE)
+ return;
+
+ if (arg.gain_band >= RTW89_BB_GAIN_BAND_NR_BE)
+ return;
+
+ if (path >= chip->rf_path_num)
+ return;
+
+ if (arg.addr >= 0xf9 && arg.addr <= 0xfe) {
+ rtw89_warn(rtwdev, "bb gain table with flow ctrl\n");
+ return;
+ }
+
+ switch (arg.cfg_type) {
+ case 0:
+ rtw89_phy_cfg_bb_gain_error_be(rtwdev, arg, reg->data);
+ break;
+ case 1:
+ rtw89_phy_cfg_bb_rpl_ofst_be(rtwdev, arg, reg->data);
+ break;
+ case 2:
+ /* ignore BB gain bypass */
+ break;
+ case 3:
+ rtw89_phy_cfg_bb_gain_op1db_be(rtwdev, arg, reg->data);
+ break;
+ case 4:
+ /* This cfg_type is only used by rfe_type >= 50 with eFEM */
+ if (efuse->rfe_type < 50)
+ break;
+ fallthrough;
+ default:
+ rtw89_warn(rtwdev,
+ "bb gain {0x%x:0x%x} with unknown cfg type: %d\n",
+ arg.addr, reg->data, arg.cfg_type);
+ break;
+ }
+}
+
+static void rtw89_phy_preinit_rf_nctl_be(struct rtw89_dev *rtwdev)
+{
+ rtw89_phy_write32_mask(rtwdev, R_GOTX_IQKDPK_C0, B_GOTX_IQKDPK, 0x3);
+ rtw89_phy_write32_mask(rtwdev, R_GOTX_IQKDPK_C1, B_GOTX_IQKDPK, 0x3);
+ rtw89_phy_write32_mask(rtwdev, R_IQKDPK_HC, B_IQKDPK_HC, 0x1);
+ rtw89_phy_write32_mask(rtwdev, R_CLK_GCK, B_CLK_GCK, 0x00fffff);
+ rtw89_phy_write32_mask(rtwdev, R_IOQ_IQK_DPK, B_IOQ_IQK_DPK_CLKEN, 0x3);
+ rtw89_phy_write32_mask(rtwdev, R_IQK_DPK_RST, B_IQK_DPK_RST, 0x1);
+ rtw89_phy_write32_mask(rtwdev, R_IQK_DPK_PRST, B_IQK_DPK_PRST, 0x1);
+ rtw89_phy_write32_mask(rtwdev, R_IQK_DPK_PRST_C1, B_IQK_DPK_PRST, 0x1);
+ rtw89_phy_write32_mask(rtwdev, R_TXRFC, B_TXRFC_RST, 0x1);
+
+ if (rtwdev->dbcc_en) {
+ rtw89_phy_write32_mask(rtwdev, R_IQK_DPK_RST_C1, B_IQK_DPK_RST, 0x1);
+ rtw89_phy_write32_mask(rtwdev, R_TXRFC_C1, B_TXRFC_RST, 0x1);
+ }
+}
+
+static
+void rtw89_phy_bb_wrap_pwr_by_macid_init(struct rtw89_dev *rtwdev)
+{
+ u32 macid_idx, cr, base_macid_lmt, max_macid = 32;
+
+ base_macid_lmt = R_BE_PWR_MACID_LMT_BASE;
+
+ for (macid_idx = 0; macid_idx < 4 * max_macid; macid_idx += 4) {
+ cr = base_macid_lmt + macid_idx;
+ rtw89_write32(rtwdev, cr, 0x03007F7F);
+ }
+}
+
+static
+void rtw89_phy_bb_wrap_tx_path_by_macid_init(struct rtw89_dev *rtwdev)
+{
+ int i, max_macid = 32;
+ u32 cr = R_BE_PWR_MACID_PATH_BASE;
+
+ for (i = 0; i < max_macid; i++, cr += 4)
+ rtw89_write32(rtwdev, cr, 0x03C86000);
+}
+
+static void rtw89_phy_bb_wrap_tpu_set_all(struct rtw89_dev *rtwdev,
+ enum rtw89_mac_idx mac_idx)
+{
+ u32 addr;
+
+ for (addr = R_BE_PWR_BY_RATE; addr <= R_BE_PWR_BY_RATE_END; addr += 4)
+ rtw89_write32(rtwdev, addr, 0);
+ for (addr = R_BE_PWR_RULMT_START; addr <= R_BE_PWR_RULMT_END; addr += 4)
+ rtw89_write32(rtwdev, addr, 0);
+ for (addr = R_BE_PWR_RATE_OFST_CTRL; addr <= R_BE_PWR_RATE_OFST_END; addr += 4)
+ rtw89_write32(rtwdev, addr, 0);
+
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_REF_CTRL, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_OFST_LMT_DB, 0);
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_OFST_LMTBF, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_OFST_LMTBF_DB, 0);
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_RATE_CTRL, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_OFST_BYRATE_DB, 0);
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_OFST_RULMT, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_OFST_RULMT_DB, 0);
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_OFST_SW, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_OFST_SW_DB, 0);
+}
+
+static
+void rtw89_phy_bb_wrap_listen_path_en_init(struct rtw89_dev *rtwdev)
+{
+ u32 addr;
+ int ret;
+
+ ret = rtw89_mac_check_mac_en(rtwdev, RTW89_MAC_1, RTW89_CMAC_SEL);
+ if (ret)
+ return;
+
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_LISTEN_PATH, RTW89_MAC_1);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_LISTEN_PATH_EN, 0x2);
+}
+
+static void rtw89_phy_bb_wrap_force_cr_init(struct rtw89_dev *rtwdev,
+ enum rtw89_mac_idx mac_idx)
+{
+ u32 addr;
+
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_FORCE_LMT, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_FORCE_LMT_ON, 0);
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_BOOST, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_FORCE_RATE_ON, 0);
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_OFST_RULMT, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_FORCE_RU_ENON, 0);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_FORCE_RU_ON, 0);
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_FORCE_MACID, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_FORCE_MACID_ON, 0);
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_COEX_CTRL, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_PWR_FORCE_COEX_ON, 0);
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_RATE_CTRL, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, B_BE_FORCE_PWR_BY_RATE_EN, 0);
+}
+
+static void rtw89_phy_bb_wrap_ftm_init(struct rtw89_dev *rtwdev,
+ enum rtw89_mac_idx mac_idx)
+{
+ u32 addr;
+
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_FTM, mac_idx);
+ rtw89_write32(rtwdev, addr, 0xE4E431);
+
+ addr = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_FTM_SS, mac_idx);
+ rtw89_write32_mask(rtwdev, addr, 0x7, 0);
+}
+
+static void rtw89_phy_bb_wrap_init_be(struct rtw89_dev *rtwdev)
+{
+ enum rtw89_mac_idx mac_idx = RTW89_MAC_0;
+
+ rtw89_phy_bb_wrap_pwr_by_macid_init(rtwdev);
+ rtw89_phy_bb_wrap_tx_path_by_macid_init(rtwdev);
+ rtw89_phy_bb_wrap_listen_path_en_init(rtwdev);
+ rtw89_phy_bb_wrap_force_cr_init(rtwdev, mac_idx);
+ rtw89_phy_bb_wrap_ftm_init(rtwdev, mac_idx);
+ rtw89_phy_bb_wrap_tpu_set_all(rtwdev, mac_idx);
+}
+
+static void rtw89_phy_ch_info_init_be(struct rtw89_dev *rtwdev)
+{
+ rtw89_phy_write32_mask(rtwdev, R_CHINFO_SEG, B_CHINFO_SEG_LEN, 0x0);
+ rtw89_phy_write32_mask(rtwdev, R_CHINFO_SEG, B_CHINFO_SEG, 0xf);
+ rtw89_phy_write32_mask(rtwdev, R_CHINFO_DATA, B_CHINFO_DATA_BITMAP, 0x1);
+ rtw89_phy_set_phy_regs(rtwdev, R_CHINFO_ELM_SRC, B_CHINFO_ELM_BITMAP, 0x40303);
+ rtw89_phy_set_phy_regs(rtwdev, R_CHINFO_ELM_SRC, B_CHINFO_SRC, 0x0);
+ rtw89_phy_set_phy_regs(rtwdev, R_CHINFO_TYPE_SCAL, B_CHINFO_TYPE, 0x3);
+ rtw89_phy_set_phy_regs(rtwdev, R_CHINFO_TYPE_SCAL, B_CHINFO_SCAL, 0x0);
+}
+
struct rtw89_byr_spec_ent_be {
struct rtw89_rate_desc init;
u8 num_of_idx;
@@ -644,6 +970,11 @@ const struct rtw89_phy_gen_def rtw89_phy_gen_be = {
.ccx = &rtw89_ccx_regs_be,
.physts = &rtw89_physts_regs_be,
.cfo = &rtw89_cfo_regs_be,
+ .phy0_phy1_offset = rtw89_phy0_phy1_offset_be,
+ .config_bb_gain = rtw89_phy_config_bb_gain_be,
+ .preinit_rf_nctl = rtw89_phy_preinit_rf_nctl_be,
+ .bb_wrap_init = rtw89_phy_bb_wrap_init_be,
+ .ch_info_init = rtw89_phy_ch_info_init_be,
.set_txpwr_byrate = rtw89_phy_set_txpwr_byrate_be,
.set_txpwr_offset = rtw89_phy_set_txpwr_offset_be,
diff --git a/drivers/net/wireless/realtek/rtw89/ps.c b/drivers/net/wireless/realtek/rtw89/ps.c
index 917c01e5e9ed..31290d8cb7f7 100644
--- a/drivers/net/wireless/realtek/rtw89/ps.c
+++ b/drivers/net/wireless/realtek/rtw89/ps.c
@@ -14,6 +14,7 @@
static int rtw89_fw_leave_lps_check(struct rtw89_dev *rtwdev, u8 macid)
{
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
u32 pwr_en_bit = 0xE;
u32 chk_msk = pwr_en_bit << (4 * macid);
u32 polling;
@@ -21,7 +22,7 @@ static int rtw89_fw_leave_lps_check(struct rtw89_dev *rtwdev, u8 macid)
ret = read_poll_timeout_atomic(rtw89_read32_mask, polling, !polling,
1000, 50000, false, rtwdev,
- R_AX_PPWRBIT_SETTING, chk_msk);
+ mac->ps_status, chk_msk);
if (ret) {
rtw89_info(rtwdev, "rtw89: failed to leave lps state\n");
return -EBUSY;
@@ -83,16 +84,17 @@ void __rtw89_leave_ps_mode(struct rtw89_dev *rtwdev)
rtw89_ps_power_mode_change(rtwdev, false);
}
-static void __rtw89_enter_lps(struct rtw89_dev *rtwdev, u8 mac_id)
+static void __rtw89_enter_lps(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif)
{
struct rtw89_lps_parm lps_param = {
- .macid = mac_id,
+ .macid = rtwvif->mac_id,
.psmode = RTW89_MAC_AX_PS_MODE_LEGACY,
.lastrpwm = RTW89_LAST_RPWM_PS,
};
rtw89_btc_ntfy_radio_state(rtwdev, BTC_RFCTRL_FW_CTRL);
rtw89_fw_h2c_lps_parm(rtwdev, &lps_param);
+ rtw89_fw_h2c_lps_ch_info(rtwdev, rtwvif);
}
static void __rtw89_leave_lps(struct rtw89_dev *rtwdev, u8 mac_id)
@@ -123,7 +125,7 @@ void rtw89_enter_lps(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif,
if (test_and_set_bit(RTW89_FLAG_LEISURE_PS, rtwdev->flags))
return;
- __rtw89_enter_lps(rtwdev, rtwvif->mac_id);
+ __rtw89_enter_lps(rtwdev, rtwvif);
if (ps_mode)
__rtw89_enter_ps_mode(rtwdev, rtwvif);
}
diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h
index 8456e2b0c14f..72e448e91b6f 100644
--- a/drivers/net/wireless/realtek/rtw89/reg.h
+++ b/drivers/net/wireless/realtek/rtw89/reg.h
@@ -3246,6 +3246,13 @@
#define R_AX_RX_SR_CTRL_C1 0xEE4A
#define B_AX_SR_EN BIT(0)
+#define R_AX_BSSID_SRC_CTRL 0xCE4B
+#define R_AX_BSSID_SRC_CTRL_C1 0xEE4B
+#define B_AX_BSSID_MATCH BIT(3)
+#define B_AX_PARTIAL_AID_MATCH BIT(2)
+#define B_AX_BSSCOLOR_MATCH BIT(1)
+#define B_AX_PLCP_SRC_EN BIT(0)
+
#define R_AX_CSIRPT_OPTION 0xCE64
#define R_AX_CSIRPT_OPTION_C1 0xEE64
#define B_AX_CSIPRT_HESU_AID_EN BIT(25)
@@ -3503,8 +3510,13 @@
#define B_AX_PTA_EDCCA_EN BIT(0)
#define R_BTC_COEX_WL_REQ 0xDA24
+#define R_BTC_COEX_WL_REQ_BE 0xE324
+#define B_BTC_TX_NULL_HI BIT(23)
#define B_BTC_TX_BCN_HI BIT(22)
+#define B_BTC_TX_TRI_HI BIT(17)
#define B_BTC_RSP_ACK_HI BIT(10)
+#define B_BTC_PRI_MASK_TX_TIME GENMASK(4, 3)
+#define B_BTC_PRI_MASK_RX_TIME_V1 GENMASK(2, 1)
#define R_BTC_BREAK_TABLE 0xDA2C
#define BTC_BREAK_PARAM 0xf0ffffff
@@ -3752,6 +3764,19 @@
#define B_BE_SYM_PADPDN_WL_RFC1_1P3 BIT(6)
#define B_BE_SYM_PADPDN_WL_RFC0_1P3 BIT(5)
+#define R_BE_RSV_CTRL 0x001C
+#define B_BE_HR_BE_DBG GENMASK(23, 12)
+#define B_BE_R_SYM_DIS_PCIE_FLR BIT(9)
+#define B_BE_R_EN_HRST_PWRON BIT(8)
+#define B_BE_LOCK_ALL_EN BIT(7)
+#define B_BE_R_DIS_PRST BIT(6)
+#define B_BE_WLOCK_1C_BIT6 BIT(5)
+#define B_BE_WLOCK_40 BIT(4)
+#define B_BE_WLOCK_08 BIT(3)
+#define B_BE_WLOCK_04 BIT(2)
+#define B_BE_WLOCK_00 BIT(1)
+#define B_BE_WLOCK_ALL BIT(0)
+
#define R_BE_AFE_LDO_CTRL 0x0020
#define B_BE_FORCE_MACBBBT_PWR_ON BIT(31)
#define B_BE_R_SYM_WLPOFF_P4_PC_EN BIT(28)
@@ -4033,6 +4058,30 @@
#define B_BE_SYSON_DIS_PMCR_BE_WRMSK BIT(2)
#define B_BE_SYSON_R_BE_ARB_MASK GENMASK(1, 0)
+#define R_BE_MEM_PWR_CTRL 0x00D0
+#define B_BE_DMEM5_WLMCU_DS BIT(31)
+#define B_BE_DMEM4_WLMCU_DS BIT(30)
+#define B_BE_DMEM3_WLMCU_DS BIT(29)
+#define B_BE_DMEM2_WLMCU_DS BIT(28)
+#define B_BE_DMEM1_WLMCU_DS BIT(27)
+#define B_BE_DMEM0_WLMCU_DS BIT(26)
+#define B_BE_IMEM5_WLMCU_DS BIT(25)
+#define B_BE_IMEM4_WLMCU_DS BIT(24)
+#define B_BE_IMEM3_WLMCU_DS BIT(23)
+#define B_BE_IMEM2_WLMCU_DS BIT(22)
+#define B_BE_IMEM1_WLMCU_DS BIT(21)
+#define B_BE_IMEM0_WLMCU_DS BIT(20)
+#define B_BE_MEM_BBMCU1_DS BIT(19)
+#define B_BE_MEM_BBMCU0_DS_V1 BIT(17)
+#define B_BE_MEM_BT_DS BIT(10)
+#define B_BE_MEM_SDIO_LS BIT(9)
+#define B_BE_MEM_SDIO_DS BIT(8)
+#define B_BE_MEM_USB_LS BIT(7)
+#define B_BE_MEM_USB_DS BIT(6)
+#define B_BE_MEM_PCI_LS BIT(5)
+#define B_BE_MEM_PCI_DS BIT(4)
+#define B_BE_MEM_WLMAC_LS BIT(3)
+
#define R_BE_PCIE_MIO_INTF 0x00E4
#define B_BE_AON_MIO_EPHY_1K_SEL_MASK GENMASK(29, 24)
#define B_BE_PCIE_MIO_ADDR_PAGE_V1_MASK GENMASK(20, 16)
@@ -4401,12 +4450,28 @@
#define R_BE_LTR_LATENCY_IDX2_V1 0x361C
#define R_BE_LTR_LATENCY_IDX3_V1 0x3620
+#define R_BE_H2CREG_DATA0 0x7140
+#define R_BE_H2CREG_DATA1 0x7144
+#define R_BE_H2CREG_DATA2 0x7148
+#define R_BE_H2CREG_DATA3 0x714C
+#define R_BE_C2HREG_DATA0 0x7150
+#define R_BE_C2HREG_DATA1 0x7154
+#define R_BE_C2HREG_DATA2 0x7158
+#define R_BE_C2HREG_DATA3 0x715C
+#define R_BE_H2CREG_CTRL 0x7160
+#define B_BE_H2CREG_TRIGGER BIT(0)
+#define R_BE_C2HREG_CTRL 0x7164
+#define B_BE_C2HREG_TRIGGER BIT(0)
+
#define R_BE_HCI_FUNC_EN 0x7880
#define B_BE_HCI_CR_PROTECT BIT(31)
#define B_BE_HCI_TRXBUF_EN BIT(2)
#define B_BE_HCI_RXDMA_EN BIT(1)
#define B_BE_HCI_TXDMA_EN BIT(0)
+#define R_BE_DBG_WOW_READY 0x815E
+#define B_BE_DBG_WOW_READY GENMASK(7, 0)
+
#define R_BE_DMAC_FUNC_EN 0x8400
#define B_BE_DMAC_CRPRT BIT(31)
#define B_BE_MAC_FUNC_EN BIT(30)
@@ -4488,6 +4553,42 @@
#define B_BE_RMAC_PPDU_HANG_CNT_MASK GENMASK(23, 16)
#define B_BE_SER_L0_COUNTER_MASK GENMASK(8, 0)
+#define R_BE_DMAC_SYS_CR32B 0x842C
+#define B_BE_DMAC_BB_PHY1_MASK GENMASK(31, 16)
+#define B_BE_DMAC_BB_PHY0_MASK GENMASK(15, 0)
+#define B_BE_DMAC_BB_CTRL_39 BIT(31)
+#define B_BE_DMAC_BB_CTRL_38 BIT(30)
+#define B_BE_DMAC_BB_CTRL_37 BIT(29)
+#define B_BE_DMAC_BB_CTRL_36 BIT(28)
+#define B_BE_DMAC_BB_CTRL_35 BIT(27)
+#define B_BE_DMAC_BB_CTRL_34 BIT(26)
+#define B_BE_DMAC_BB_CTRL_33 BIT(25)
+#define B_BE_DMAC_BB_CTRL_32 BIT(24)
+#define B_BE_DMAC_BB_CTRL_31 BIT(23)
+#define B_BE_DMAC_BB_CTRL_30 BIT(22)
+#define B_BE_DMAC_BB_CTRL_29 BIT(21)
+#define B_BE_DMAC_BB_CTRL_28 BIT(20)
+#define B_BE_DMAC_BB_CTRL_27 BIT(19)
+#define B_BE_DMAC_BB_CTRL_26 BIT(18)
+#define B_BE_DMAC_BB_CTRL_25 BIT(17)
+#define B_BE_DMAC_BB_CTRL_24 BIT(16)
+#define B_BE_DMAC_BB_CTRL_23 BIT(15)
+#define B_BE_DMAC_BB_CTRL_22 BIT(14)
+#define B_BE_DMAC_BB_CTRL_21 BIT(13)
+#define B_BE_DMAC_BB_CTRL_20 BIT(12)
+#define B_BE_DMAC_BB_CTRL_19 BIT(11)
+#define B_BE_DMAC_BB_CTRL_18 BIT(10)
+#define B_BE_DMAC_BB_CTRL_17 BIT(9)
+#define B_BE_DMAC_BB_CTRL_16 BIT(8)
+#define B_BE_DMAC_BB_CTRL_15 BIT(7)
+#define B_BE_DMAC_BB_CTRL_14 BIT(6)
+#define B_BE_DMAC_BB_CTRL_13 BIT(5)
+#define B_BE_DMAC_BB_CTRL_12 BIT(4)
+#define B_BE_DMAC_BB_CTRL_11 BIT(3)
+#define B_BE_DMAC_BB_CTRL_10 BIT(2)
+#define B_BE_DMAC_BB_CTRL_9 BIT(1)
+#define B_BE_DMAC_BB_CTRL_8 BIT(0)
+
#define R_BE_DLE_EMPTY0 0x8430
#define B_BE_PLE_EMPTY_QTA_DMAC_H2D BIT(27)
#define B_BE_PLE_EMPTY_QTA_DMAC_CPUIO BIT(26)
@@ -4924,6 +5025,12 @@
B_BE_CR_WRFF_OVERFLOW_ERR_INT_EN | \
B_BE_CR_WRFF_UNDERFLOW_ERR_INT_EN)
+#define R_BE_RX_STOP 0x8914
+#define B_BE_CPU_RX_STOP BIT(17)
+#define B_BE_HOST_RX_STOP BIT(16)
+#define B_BE_CPU_RX_CH_STOP_MSK GENMASK(15, 8)
+#define B_BE_HOST_RX_CH_STOP_MSK GENMASK(5, 0)
+
#define R_BE_DISP_FWD_WLAN_0 0x8938
#define B_BE_FWD_WLAN_CPU_TYPE_13_MASK GENMASK(31, 30)
#define B_BE_FWD_WLAN_CPU_TYPE_12_MASK GENMASK(29, 28)
@@ -4947,6 +5054,11 @@
#define B_BE_WDE_START_BOUND_MASK GENMASK(14, 8)
#define B_BE_WDE_PAGE_SEL_MASK GENMASK(1, 0)
+#define R_BE_WDE_BUFMGN_CTL 0x8C10
+#define B_BE_WDE_AVAL_UPD_REQ BIT(29)
+#define B_BE_WDE_AVAL_UPD_QTAID_MASK GENMASK(27, 24)
+#define B_BE_WDE_BUFMGN_FRZTMR_MODE BIT(0)
+
#define R_BE_WDE_ERR_IMR 0x8C38
#define B_BE_WDE_DATCHN_CAMREQ_ERR_INT_EN BIT(29)
#define B_BE_WDE_DATCHN_ADRERR_ERR_INT_EN BIT(28)
@@ -5063,6 +5175,11 @@
#define B_BE_PLE_START_BOUND_MASK GENMASK(14, 8)
#define B_BE_PLE_PAGE_SEL_MASK GENMASK(1, 0)
+#define R_BE_PLE_BUFMGN_CTL 0x9010
+#define B_BE_PLE_AVAL_UPD_REQ BIT(29)
+#define B_BE_PLE_AVAL_UPD_QTAID_MASK GENMASK(27, 24)
+#define B_BE_PLE_BUFMGN_FRZTMR_MODE BIT(0)
+
#define R_BE_PLE_ERR_IMR 0x9038
#define B_BE_PLE_DATCHN_CAMREQ_ERR_INT_EN BIT(29)
#define B_BE_PLE_DATCHN_ADRERR_ERR_INT_EN BIT(28)
@@ -5429,6 +5546,21 @@
#define B_BE_DROP_NONDMA_PPDU BIT(2)
#define B_BE_APPEND_FCS BIT(0)
+#define R_BE_FWD_ERR 0x9C10
+#define R_BE_FWD_ACTN0 0x9C14
+#define R_BE_FWD_ACTN1 0x9C18
+#define R_BE_FWD_ACTN2 0x9C1C
+#define R_BE_FWD_TF0 0x9C20
+#define R_BE_FWD_TF1 0x9C24
+
+#define R_BE_HW_PPDU_STATUS 0x9C30
+#define B_BE_FWD_RPKTTYPE_MASK GENMASK(31, 26)
+#define B_BE_FWD_PPDU_PRTID_MASK GENMASK(25, 23)
+#define B_BE_FWD_PPDU_FW_RLS BIT(22)
+#define B_BE_FWD_PPDU_QUEID_MASK GENMASK(21, 16)
+#define B_BE_FWD_OTHER_RPKT_MASK GENMASK(15, 8)
+#define B_BE_FWD_PPDU_STAT_MASK GENMASK(7, 0)
+
#define R_BE_CUT_AMSDU_CTRL 0x9C94
#define B_BE_EN_CUT_AMSDU BIT(31)
#define B_BE_CUT_AMSDU_CHKLEN_EN BIT(30)
@@ -5437,6 +5569,12 @@
#define B_BE_CUT_AMSDU_CHKLEN_L_TH_MASK GENMASK(23, 16)
#define B_BE_CUT_AMSDU_CHKLEN_H_TH_MASK GENMASK(15, 0)
+#define R_BE_WOW_CTRL 0x9CB8
+#define B_BE_WOW_HCI BIT(5)
+#define B_BE_WOW_DROP BIT(2)
+#define B_BE_WOW_WOWEN BIT(1)
+#define B_BE_WOW_FORCE_WAKEUP BIT(0)
+
#define R_BE_RX_HDRTRNS 0x9CC0
#define B_BE_RX_MGN_MLD_ADDR_EN BIT(6)
#define B_BE_HDR_INFO_MASK GENMASK(5, 4)
@@ -5727,6 +5865,9 @@
#define B_BE_STOP_CH1 BIT(1)
#define B_BE_STOP_CH0 BIT(0)
+#define R_BE_HAXI_MST_WDT_TIMEOUT_SEL_V1 0xB02C
+#define B_BE_HAXI_MST_WDT_TIMEOUT_SEL_MASK GENMASK(4, 0)
+
#define R_BE_HAXI_IDCT_MSK 0xB0B8
#define B_BE_HAXI_RRESP_ERR_IDCT_MSK BIT(7)
#define B_BE_HAXI_BRESP_ERR_IDCT_MSK BIT(6)
@@ -5777,6 +5918,15 @@
#define B_BE_PREC_PAGE_CH12_V1_MASK GENMASK(21, 16)
#define B_BE_PREC_PAGE_CH011_V1_MASK GENMASK(5, 0)
+#define R_BE_CH0_PAGE_CTRL 0xB718
+#define B_BE_CH0_GRP BIT(31)
+#define B_BE_CH0_MAX_PG_MASK GENMASK(28, 16)
+#define B_BE_CH0_MIN_PG_MASK GENMASK(12, 0)
+
+#define R_BE_CH0_PAGE_INFO 0xB750
+#define B_BE_CH0_AVAL_PG_MASK GENMASK(28, 16)
+#define B_BE_CH0_USE_PG_MASK GENMASK(12, 0)
+
#define R_BE_PUB_PAGE_INFO3 0xB78C
#define B_BE_G1_AVAL_PG_MASK GENMASK(28, 16)
#define B_BE_G0_AVAL_PG_MASK GENMASK(12, 0)
@@ -5822,6 +5972,39 @@
#define B_BE_MACID_ACQ_GRP0_CLR_P BIT(2)
#define B_BE_R_MACID_ACQ_CHK_EN BIT(0)
+#define R_BE_BT_BREAK_TABLE 0x0E344
+
+#define R_BE_GNT_SW_CTRL 0x0E348
+#define B_BE_WL_ACT2_VAL BIT(25)
+#define B_BE_WL_ACT2_SWCTRL BIT(24)
+#define B_BE_WL_ACT_VAL BIT(23)
+#define B_BE_WL_ACT_SWCTRL BIT(22)
+#define B_BE_GNT_BT_RX_BB1_VAL BIT(21)
+#define B_BE_GNT_BT_RX_BB1_SWCTRL BIT(20)
+#define B_BE_GNT_BT_TX_BB1_VAL BIT(19)
+#define B_BE_GNT_BT_TX_BB1_SWCTRL BIT(18)
+#define B_BE_GNT_BT_RX_BB0_VAL BIT(17)
+#define B_BE_GNT_BT_RX_BB0_SWCTRL BIT(16)
+#define B_BE_GNT_BT_TX_BB0_VAL BIT(15)
+#define B_BE_GNT_BT_TX_BB0_SWCTRL BIT(14)
+#define B_BE_GNT_WL_RX_VAL BIT(13)
+#define B_BE_GNT_WL_RX_SWCTRL BIT(12)
+#define B_BE_GNT_WL_TX_VAL BIT(11)
+#define B_BE_GNT_WL_TX_SWCTRL BIT(10)
+#define B_BE_GNT_BT_BB1_VAL BIT(9)
+#define B_BE_GNT_BT_BB1_SWCTRL BIT(8)
+#define B_BE_GNT_WL_BB1_VAL BIT(7)
+#define B_BE_GNT_WL_BB1_SWCTRL BIT(6)
+#define B_BE_GNT_BT_BB0_VAL BIT(5)
+#define B_BE_GNT_BT_BB0_SWCTRL BIT(4)
+#define B_BE_GNT_WL_BB0_VAL BIT(3)
+#define B_BE_GNT_WL_BB0_SWCTRL BIT(2)
+#define B_BE_GNT_WL_BB_PWR_VAL BIT(1)
+#define B_BE_GNT_WL_BB_PWR_SWCTRL BIT(0)
+
+#define R_BE_PWR_MACID_PATH_BASE 0x0E500
+#define R_BE_PWR_MACID_LMT_BASE 0x0ED00
+
#define R_BE_CMAC_FUNC_EN 0x10000
#define R_BE_CMAC_FUNC_EN_C1 0x14000
#define B_BE_CMAC_CRPRT BIT(31)
@@ -5873,6 +6056,16 @@
B_BE_RMAC_CKEN | B_BE_TXTIME_CKEN | B_BE_RESP_PKTCTL_CKEN | \
B_BE_SIGB_CKEN)
+#define R_BE_WMAC_RFMOD 0x10010
+#define R_BE_WMAC_RFMOD_C1 0x14010
+#define B_BE_CMAC_ASSERTION BIT(31)
+#define B_BE_WMAC_RFMOD_MASK GENMASK(2, 0)
+#define BE_WMAC_RFMOD_20M 0
+#define BE_WMAC_RFMOD_40M 1
+#define BE_WMAC_RFMOD_80M 2
+#define BE_WMAC_RFMOD_160M 3
+#define BE_WMAC_RFMOD_320M 4
+
#define R_BE_TX_SUB_BAND_VALUE 0x10088
#define R_BE_TX_SUB_BAND_VALUE_C1 0x14088
#define B_BE_PRI20_BITMAP_MASK GENMASK(31, 16)
@@ -6009,6 +6202,13 @@
#define B_BE_MACTX_LATENCY_MASK GENMASK(10, 8)
#define B_BE_PREBKF_TIME_MASK GENMASK(4, 0)
+#define R_BE_PREBKF_CFG_1 0x1033C
+#define R_BE_PREBKF_CFG_1_C1 0x1433C
+#define B_BE_SIFS_TIMEOUT_TB_AGGR_MASK GENMASK(31, 24)
+#define B_BE_SIFS_PREBKF_MASK GENMASK(23, 16)
+#define B_BE_SIFS_TIMEOUT_T2_MASK GENMASK(14, 8)
+#define B_BE_SIFS_MACTXEN_T1_MASK GENMASK(6, 0)
+
#define R_BE_CCA_CFG_0 0x10340
#define R_BE_CCA_CFG_0_C1 0x14340
#define B_BE_R_SIFS_AGGR_TIME_V1_MASK GENMASK(31, 24)
@@ -6050,11 +6250,36 @@
#define R_BE_MUEDCA_EN 0x10370
#define R_BE_MUEDCA_EN_C1 0x14370
+#define B_BE_SIFS_TIMEOUT_TB_T2_MASK GENMASK(30, 24)
+#define B_BE_SIFS_MACTXEN_TB_T1_MASK GENMASK(22, 16)
#define B_BE_MUEDCA_WMM_SEL BIT(8)
-#define B_BE_SET_MUEDCATIMER_TF_1 BIT(5)
+#define B_BE_SET_MUEDCATIMER_TF_MASK GENMASK(5, 4)
#define B_BE_SET_MUEDCATIMER_TF_0 BIT(4)
+#define B_BE_MUEDCA_EN_MASK GENMASK(1, 0)
#define B_BE_MUEDCA_EN_0 BIT(0)
+#define R_BE_CTN_DRV_TXEN 0x10398
+#define R_BE_CTN_DRV_TXEN_C1 0x14398
+#define B_BE_CTN_TXEN_TWT_3 BIT(17)
+#define B_BE_CTN_TXEN_TWT_2 BIT(16)
+#define B_BE_CTN_TXEN_TWT_1 BIT(15)
+#define B_BE_CTN_TXEN_TWT_0 BIT(14)
+#define B_BE_CTN_TXEN_ULQ BIT(13)
+#define B_BE_CTN_TXEN_BCNQ BIT(12)
+#define B_BE_CTN_TXEN_HGQ BIT(11)
+#define B_BE_CTN_TXEN_CPUMGQ BIT(10)
+#define B_BE_CTN_TXEN_MGQ1 BIT(9)
+#define B_BE_CTN_TXEN_MGQ BIT(8)
+#define B_BE_CTN_TXEN_VO_1 BIT(7)
+#define B_BE_CTN_TXEN_VI_1 BIT(6)
+#define B_BE_CTN_TXEN_BK_1 BIT(5)
+#define B_BE_CTN_TXEN_BE_1 BIT(4)
+#define B_BE_CTN_TXEN_VO_0 BIT(3)
+#define B_BE_CTN_TXEN_VI_0 BIT(2)
+#define B_BE_CTN_TXEN_BK_0 BIT(1)
+#define B_BE_CTN_TXEN_BE_0 BIT(0)
+#define B_BE_CTN_TXEN_ALL_MASK GENMASK(17, 0)
+
#define R_BE_TB_CHK_CCA_NAV 0x103AC
#define R_BE_TB_CHK_CCA_NAV_C1 0x143AC
#define B_BE_TB_CHK_TX_NAV BIT(15)
@@ -6212,6 +6437,8 @@
#define R_BE_TSFTR_HIGH_P0_C1 0x1443C
#define B_BE_TSFTR_HIGH_P0_MASK GENMASK(31, 0)
+#define R_BE_BCN_DROP_ALL0 0x10560
+
#define R_BE_MBSSID_CTRL 0x10568
#define R_BE_MBSSID_CTRL_C1 0x14568
#define B_BE_MBSSID_MODE_SEL BIT(20)
@@ -6282,6 +6509,17 @@
#define B_BE_SPEC_SIFS_OFDM_PTCL_MASK GENMASK(15, 8)
#define B_BE_SPEC_SIFS_CCK_PTCL_MASK GENMASK(7, 0)
+#define R_BE_TXRATE_CHK 0x10828
+#define R_BE_TXRATE_CHK_C1 0x14828
+#define B_BE_LATENCY_PADDING_PKT_TH_MASK GENMASK(31, 24)
+#define B_BE_PLCP_FETCH_BUFF_MASK GENMASK(23, 16)
+#define B_BE_OFDM_CCK_ERR_PROC BIT(6)
+#define B_BE_PKT_LAST_TX BIT(5)
+#define B_BE_BAND_MODE BIT(4)
+#define B_BE_MAX_TXNSS_MASK GENMASK(3, 2)
+#define B_BE_RTS_LIMIT_IN_OFDM6 BIT(1)
+#define B_BE_CHECK_CCK_EN BIT(0)
+
#define R_BE_MBSSID_DROP_0 0x1083C
#define R_BE_MBSSID_DROP_0_C1 0x1483C
#define B_BE_GI_LTF_FB_SEL BIT(30)
@@ -6289,6 +6527,20 @@
#define B_BE_PORT_DROP_4_0_MASK GENMASK(20, 16)
#define B_BE_MBSSID_DROP_15_0_MASK GENMASK(15, 0)
+#define R_BE_BT_PLT 0x1087C
+#define R_BE_BT_PLT_C1 0x1487C
+#define B_BE_BT_PLT_PKT_CNT_MASK GENMASK(31, 16)
+#define B_BE_BT_PLT_RST BIT(9)
+#define B_BE_PLT_EN BIT(8)
+#define B_BE_RX_PLT_GNT_LTE_RX BIT(7)
+#define B_BE_RX_PLT_GNT_BT_RX BIT(6)
+#define B_BE_RX_PLT_GNT_BT_TX BIT(5)
+#define B_BE_RX_PLT_GNT_WL BIT(4)
+#define B_BE_TX_PLT_GNT_LTE_RX BIT(3)
+#define B_BE_TX_PLT_GNT_BT_RX BIT(2)
+#define B_BE_TX_PLT_GNT_BT_TX BIT(1)
+#define B_BE_TX_PLT_GNT_WL BIT(0)
+
#define R_BE_PTCL_BSS_COLOR_0 0x108A0
#define R_BE_PTCL_BSS_COLOR_0_C1 0x148A0
#define B_BE_BSS_COLOB_BE_PORT_3_MASK GENMASK(29, 24)
@@ -6398,6 +6650,10 @@
#define B_BE_PTCL_DROP BIT(5)
#define B_BE_PTCL_TX_QUEUE_IDX_MASK GENMASK(4, 0)
+#define R_BE_PTCL_DBG_INFO 0x108F0
+
+#define R_BE_PTCL_DBG 0x108F4
+
#define R_BE_RX_ERROR_FLAG 0x10C00
#define R_BE_RX_ERROR_FLAG_C1 0x14C00
#define B_BE_RX_CSI_NOT_RELEASE_ERROR BIT(31)
@@ -6676,6 +6932,9 @@
#define B_BE_UPD_HGQMD BIT(1)
#define B_BE_UPD_TIMIE BIT(0)
+#define R_BE_WMTX_POWER_BE_BIT_CTL 0x10E0C
+#define R_BE_WMTX_POWER_BE_BIT_CTL_C1 0x14E0C
+
#define R_BE_WMTX_TCR_BE_4 0x10E2C
#define R_BE_WMTX_TCR_BE_4_C1 0x14E2C
#define B_BE_UL_EHT_MUMIMO_LTF_MODE BIT(30)
@@ -7056,6 +7315,20 @@
#define S_BE_BACAM_RST_ENT 1
#define S_BE_BACAM_RST_ALL 2
+#define R_BE_PPDU_STAT 0x11440
+#define R_BE_PPDU_STAT_C1 0x15440
+#define B_BE_STAT_IORST BIT(13)
+#define B_BE_STAT_GCKDIS BIT(12)
+#define B_BE_PPDU_STAT_WR_BW_MASK GENMASK(11, 10)
+#define B_BE_PPDU_STAT_RPT_TRIG BIT(8)
+#define B_BE_PPDU_STAT_RPT_DMA BIT(6)
+#define B_BE_PPDU_STAT_RPT_CRC32 BIT(5)
+#define B_BE_PPDU_STAT_RPT_ADDR BIT(4)
+#define B_BE_APP_PLCP_HDR_RPT BIT(3)
+#define B_BE_APP_RX_CNT_RPT BIT(2)
+#define B_BE_PPDU_MAC_INFO BIT(1)
+#define B_BE_PPDU_STAT_RPT_EN BIT(0)
+
#define R_BE_RX_SR_CTRL 0x1144A
#define R_BE_RX_SR_CTRL_C1 0x1544A
#define B_BE_SR_OP_MODE_MASK GENMASK(5, 4)
@@ -7063,6 +7336,13 @@
#define B_BE_SR_CTRL_PLCP_EN BIT(1)
#define B_BE_SR_EN BIT(0)
+#define R_BE_BSSID_SRC_CTRL 0x1144B
+#define R_BE_BSSID_SRC_CTRL_C1 0x1544B
+#define B_BE_BSSID_MATCH BIT(3)
+#define B_BE_PARTIAL_AID_MATCH BIT(2)
+#define B_BE_BSSCOLOR_MATCH BIT(1)
+#define B_BE_PLCP_SRC_EN BIT(0)
+
#define R_BE_CSIRPT_OPTION 0x11464
#define R_BE_CSIRPT_OPTION_C1 0x15464
#define B_BE_CSIPRT_EHTSU_AID_EN BIT(26)
@@ -7178,12 +7458,56 @@
#define R_BE_PWR_MODULE 0x11900
#define R_BE_PWR_MODULE_C1 0x15900
+#define R_BE_PWR_LISTEN_PATH 0x11988
+#define B_BE_PWR_LISTEN_PATH_EN GENMASK(31, 28)
+
+#define R_BE_PWR_REF_CTRL 0x11A20
+#define B_BE_PWR_REF_CTRL_OFDM GENMASK(9, 1)
+#define B_BE_PWR_REF_CTRL_CCK GENMASK(18, 10)
+#define B_BE_PWR_OFST_LMT_DB GENMASK(27, 19)
+#define R_BE_PWR_OFST_LMTBF 0x11A24
+#define B_BE_PWR_OFST_LMTBF_DB GENMASK(8, 0)
+#define R_BE_PWR_FORCE_LMT 0x11A28
+#define B_BE_PWR_FORCE_LMT_ON BIT(6)
+
+#define R_BE_PWR_RATE_CTRL 0x11A2C
+#define B_BE_PWR_OFST_BYRATE_DB GENMASK(8, 0)
+#define B_BE_FORCE_PWR_BY_RATE_EN BIT(19)
+#define B_BE_FORCE_PWR_BY_RATE_VAL GENMASK(28, 20)
#define R_BE_PWR_RATE_OFST_CTRL 0x11A30
+#define R_BE_PWR_RATE_OFST_END 0x11A38
+#define R_BE_PWR_RULMT_START 0x12048
+#define R_BE_PWR_RULMT_END 0x120e4
+
+#define R_BE_PWR_BOOST 0x11A40
+#define B_BE_PWR_CTRL_SEL BIT(16)
+#define B_BE_PWR_FORCE_RATE_ON BIT(29)
+#define R_BE_PWR_OFST_RULMT 0x11A44
+#define B_BE_PWR_OFST_RULMT_DB GENMASK(17, 9)
+#define B_BE_PWR_FORCE_RU_ON BIT(18)
+#define B_BE_PWR_FORCE_RU_ENON BIT(28)
+#define R_BE_PWR_FORCE_MACID 0x11A48
+#define B_BE_PWR_FORCE_MACID_ON BIT(9)
+
+#define R_BE_PWR_REG_CTRL 0x11A50
+#define B_BE_PWR_BT_EN BIT(23)
+
+#define R_BE_PWR_COEX_CTRL 0x11A54
+#define B_BE_PWR_BT_VAL GENMASK(8, 0)
+#define B_BE_PWR_FORCE_COEX_ON GENMASK(29, 27)
+
+#define R_BE_PWR_OFST_SW 0x11AE8
+#define B_BE_PWR_OFST_SW_DB GENMASK(27, 24)
+
+#define R_BE_PWR_FTM 0x11B00
+#define R_BE_PWR_FTM_SS 0x11B04
+
#define R_BE_PWR_BY_RATE 0x11E00
#define R_BE_PWR_BY_RATE_MAX 0x11FA8
#define R_BE_PWR_LMT 0x11FAC
#define R_BE_PWR_LMT_MAX 0x12040
+#define R_BE_PWR_BY_RATE_END 0x12044
#define R_BE_PWR_RU_LMT 0x12048
#define R_BE_PWR_RU_LMT_MAX 0x120E4
@@ -7223,6 +7547,7 @@
#define RR_MOD_M_RXBB GENMASK(9, 5)
#define RR_MOD_LO_SEL BIT(1)
#define RR_MODOPT 0x01
+#define RR_TXG_SEL GENMASK(19, 17)
#define RR_MODOPT_M_TXPWR GENMASK(5, 0)
#define RR_WLSEL 0x02
#define RR_WLSEL_AG GENMASK(18, 16)
@@ -7256,6 +7581,12 @@
#define CFGCH_BAND0_2G 0
#define CFGCH_BAND0_5G 1
#define CFGCH_BAND0_6G 0
+#define RR_CFGCH_BW_V2 GENMASK(12, 10)
+#define CFGCH_BW_V2_20M 0
+#define CFGCH_BW_V2_40M 1
+#define CFGCH_BW_V2_80M 2
+#define CFGCH_BW_V2_160M 3
+#define CFGCH_BW_V2_320M 4
#define RR_CFGCH_BW GENMASK(11, 10)
#define RR_CFGCH_CH GENMASK(7, 0)
#define CFGCH_BW_20M 3
@@ -7292,6 +7623,7 @@
#define RR_LUTWD0_LB GENMASK(5, 0)
#define RR_TM 0x42
#define RR_TM_TRI BIT(19)
+#define RR_TM_VAL_V1 GENMASK(7, 0)
#define RR_TM_VAL GENMASK(6, 1)
#define RR_TM2 0x43
#define RR_TM2_OFF GENMASK(19, 16)
@@ -7325,8 +7657,12 @@
#define RR_TXAC 0x5f
#define RR_TXAC_IQG GENMASK(3, 0)
#define RR_BIASA 0x60
-#define RR_BIASA_TXG GENMASK(15, 12)
#define RR_BIASA_TXA GENMASK(19, 16)
+#define RR_BIASA_TXG GENMASK(15, 12)
+#define RR_BIASD_TXA_V1 GENMASK(15, 12)
+#define RR_BIASA_TXA_V1 GENMASK(11, 8)
+#define RR_BIASD_TXG_V1 GENMASK(7, 4)
+#define RR_BIASA_TXG_V1 GENMASK(3, 0)
#define RR_BIASA_A GENMASK(2, 0)
#define RR_BIASA2 0x63
#define RR_BIASA2_LB GENMASK(4, 2)
@@ -7410,6 +7746,7 @@
#define RR_MIXER_GN GENMASK(4, 3)
#define RR_POW 0xa0
#define RR_POW_SYN GENMASK(3, 2)
+#define RR_POW_SYN_V1 GENMASK(3, 0)
#define RR_LOGEN 0xa3
#define RR_LOGEN_RPT GENMASK(19, 16)
#define RR_SX 0xaf
@@ -7436,6 +7773,8 @@
#define RR_MMD 0xd5
#define RR_MMD_RST_EN BIT(8)
#define RR_MMD_RST_SYN BIT(6)
+#define RR_SMD 0xd6
+#define RR_VCO2 BIT(19)
#define RR_IQKPLL 0xdc
#define RR_IQKPLL_MOD GENMASK(9, 8)
#define RR_SYNLUT 0xdd
@@ -7459,15 +7798,24 @@
#define RR_RFC_CKEN BIT(1)
#define R_UPD_P0 0x0000
+#define R_BBCLK 0x0000
+#define B_CLK_640M BIT(2)
#define R_RSTB_WATCH_DOG 0x000C
#define B_P0_RSTB_WATCH_DOG BIT(0)
#define B_P1_RSTB_WATCH_DOG BIT(1)
#define B_UPD_P0_EN BIT(31)
+#define R_EMLSR 0x0044
+#define B_EMLSR_PARM GENMASK(27, 12)
#define R_SPOOF_CG 0x00B4
#define B_SPOOF_CG_EN BIT(17)
+#define R_CHINFO_SEG 0x00B4
+#define B_CHINFO_SEG_LEN GENMASK(2, 0)
+#define B_CHINFO_SEG GENMASK(16, 7)
#define R_DFS_FFT_CG 0x00B8
#define B_DFS_CG_EN BIT(1)
#define B_DFS_FFT_EN BIT(0)
+#define R_CHINFO_DATA 0x00C0
+#define B_CHINFO_DATA_BITMAP GENMASK(22, 0)
#define R_ANAPAR_PW15 0x030C
#define B_ANAPAR_PW15 GENMASK(31, 24)
#define B_ANAPAR_PW15_H GENMASK(27, 24)
@@ -7497,6 +7845,23 @@
#define B_SWSI_READ_ADDR_ADDR_V1 GENMASK(7, 0)
#define B_SWSI_READ_ADDR_PATH_V1 GENMASK(10, 8)
#define B_SWSI_READ_ADDR_V1 GENMASK(10, 0)
+#define R_BRK_R 0x0418
+#define B_VHTMCS_LMT GENMASK(22, 21)
+#define B_HTMCS_LMT GENMASK(9, 8)
+#define R_BRK_EHT 0x0474
+#define B_RXEHT_NSS_MAX GENMASK(4, 2)
+#define R_BRK_RXEHT 0x0478
+#define B_RXEHT_N_USER_MAX GENMASK(31, 24)
+#define B_RXEHTTB_NSS_MAX GENMASK(16, 14)
+#define R_EN_SND_WO_NDP 0x047c
+#define R_EN_SND_WO_NDP_C1 0x147c
+#define B_EN_SND_WO_NDP BIT(1)
+#define R_BRK_HE 0x0480
+#define B_TB_NSS_MAX GENMASK(25, 23)
+#define B_NSS_MAX GENMASK(16, 14)
+#define B_N_USR_MAX GENMASK(13, 6)
+#define R_RXCCA_BE1 0x0520
+#define B_RXCCA_BE1_DIS BIT(0)
#define R_UPD_CLK_ADC 0x0700
#define B_UPD_CLK_ADC_VAL GENMASK(26, 25)
#define B_UPD_CLK_ADC_ON BIT(24)
@@ -7543,6 +7908,7 @@
#define B_PMAC_RXMOD_MSK GENMASK(7, 4)
#define R_MAC_SEL 0x09A4
#define B_MAC_SEL_OFDM_TRI_FILTER BIT(31)
+#define B_MAC_SEL GENMASK(19, 17)
#define B_MAC_SEL_PWR_EN BIT(16)
#define B_MAC_SEL_DPD_EN BIT(10)
#define B_MAC_SEL_MOD GENMASK(4, 2)
@@ -7588,19 +7954,28 @@
#define R_PD_CTRL 0x0C3C
#define B_PD_HIT_DIS BIT(9)
#define R_IOQ_IQK_DPK 0x0C60
+#define B_IOQ_IQK_DPK_CLKEN GENMASK(1, 0)
#define B_IOQ_IQK_DPK_EN BIT(1)
#define R_GNT_BT_WGT_EN 0x0C6C
#define B_GNT_BT_WGT_EN BIT(21)
+#define R_IQK_DPK_RST 0x0C6C
+#define R_IQK_DPK_RST_C1 0x1C6C
+#define B_IQK_DPK_RST BIT(0)
#define R_TX_COLLISION_T2R_ST 0x0C70
#define B_TX_COLLISION_T2R_ST_M GENMASK(25, 20)
#define R_TXGATING 0x0C74
#define B_TXGATING_EN BIT(4)
+#define R_TXRFC 0x0C7C
+#define R_TXRFC_C1 0x1C7C
+#define B_TXRFC_RST GENMASK(23, 21)
#define R_PD_ARBITER_OFF 0x0C80
#define B_PD_ARBITER_OFF BIT(31)
#define R_SNDCCA_A1 0x0C9C
#define B_SNDCCA_A1_EN GENMASK(19, 12)
#define R_SNDCCA_A2 0x0CA0
#define B_SNDCCA_A2_VAL GENMASK(19, 12)
+#define R_UDP_COEEF 0x0CBC
+#define B_UDP_COEEF BIT(19)
#define R_TX_COLLISION_T2R_ST_BE 0x0CC8
#define B_TX_COLLISION_T2R_ST_BE_M GENMASK(13, 8)
#define R_RXHT_MCS_LIMIT 0x0D18
@@ -7624,7 +7999,11 @@
#define R_CTLTOP 0x1008
#define B_CTLTOP_ON BIT(23)
#define B_CTLTOP_VAL GENMASK(15, 12)
+#define R_CLK_GCK 0x1008
+#define B_CLK_GCK GENMASK(24, 0)
#define R_EDCCA_RPT_SEL_BE 0x10CC
+#define R_ADC_FIFO_V1 0x10FC
+#define B_ADC_FIFO_EN_V1 GENMASK(31, 24)
#define R_S0_HW_SI_DIS 0x1200
#define B_S0_HW_SI_DIS_W_R_TRIG GENMASK(30, 28)
#define R_P0_RXCK 0x12A0
@@ -7771,6 +8150,27 @@
#define B_P80_AT_HIGH_FREQ_RU_ALLOC_PHY0 BIT(13)
#define R_DBCC_80P80_SEL_EVM_RPT2 0x2A10
#define B_DBCC_80P80_SEL_EVM_RPT2_EN BIT(0)
+#define R_AFEDAC0 0x2A5C
+#define B_AFEDAC0 GENMASK(31, 27)
+#define R_AFEDAC1 0x2A60
+#define B_AFEDAC1 GENMASK(2, 0)
+#define R_IQKDPK_HC 0x2AB8
+#define B_IQKDPK_HC BIT(28)
+#define R_HWSI_ADD0 0x2ADC
+#define R_HWSI_ADD1 0x2BDC
+#define B_HWSI_ADD_MASK GENMASK(11, 4)
+#define B_HWSI_ADD_CTL_MASK GENMASK(2, 0)
+#define B_HWSI_ADD_RD BIT(2)
+#define B_HWSI_ADD_POLL_MASK GENMASK(1, 0)
+#define B_HWSI_ADD_RUN BIT(1)
+#define B_HWSI_ADD_BUSY BIT(0)
+#define R_HWSI_DATA 0x2AE0
+#define B_HWSI_DATA_VAL GENMASK(27, 8)
+#define B_HWSI_DATA_ADDR GENMASK(7, 0)
+#define R_HWSI_VAL0 0x2C24
+#define R_HWSI_VAL1 0x2D24
+#define B_HWSI_VAL_RDONE BIT(31)
+#define B_HWSI_VAL_BUSY BIT(29)
#define R_P1_EN_SOUND_WO_NDP 0x2D7C
#define B_P1_EN_SOUND_WO_NDP BIT(1)
#define R_EDCCA_RPT_A_BE 0x2E38
@@ -7806,8 +8206,30 @@
#define R_S1_ADDCK 0x3E00
#define B_S1_ADDCK_I GENMASK(9, 0)
#define B_S1_ADDCK_Q GENMASK(19, 10)
+#define R_OP1DB_A 0x40B0
+#define B_OP1DB_A GENMASK(31, 24)
+#define R_OP1DB1_A 0x40BC
+#define B_TIA10_A GENMASK(15, 0)
+#define B_TIA1_A GENMASK(15, 8)
+#define B_TIA0_A GENMASK(7, 0)
+#define R_BKOFF_A 0x40E0
+#define B_BKOFF_IBADC_A GENMASK(23, 18)
+#define R_BACKOFF_A 0x40E4
+#define B_LNA_IBADC_A GENMASK(29, 18)
+#define B_BACKOFF_LNA_A GENMASK(29, 24)
+#define B_BACKOFF_IBADC_A GENMASK(23, 18)
+#define R_RXBY_WBADC_A 0x40F4
+#define B_RXBY_WBADC_A GENMASK(14, 10)
#define R_MUIC 0x40F8
#define B_MUIC_EN BIT(0)
+#define R_BT_RXBY_WBADC_A 0x4160
+#define B_BT_RXBY_WBADC_A BIT(31)
+#define R_BT_SHARE_A 0x4164
+#define B_BT_SHARE_A BIT(0)
+#define B_BT_TRK_OFF_A BIT(1)
+#define B_BTG_PATH_A BIT(4)
+#define R_FORCE_FIR_A 0x418C
+#define B_FORCE_FIR_A GENMASK(1, 0)
#define R_DCFO 0x4264
#define B_DCFO GENMASK(7, 0)
#define R_SEG0CSI 0x42AC
@@ -7846,8 +8268,30 @@
#define R_DPD_BF 0x44a0
#define B_DPD_BF_OFDM GENMASK(16, 12)
#define B_DPD_BF_SCA GENMASK(6, 0)
+#define R_LNA_OP 0x44B0
+#define B_LNA6 GENMASK(31, 24)
+#define R_LNA_TIA 0x44BC
+#define B_TIA10_B GENMASK(15, 0)
+#define B_TIA1_B GENMASK(15, 8)
+#define B_TIA0_B GENMASK(7, 0)
+#define R_BKOFF_B 0x44E0
+#define B_BKOFF_IBADC_B GENMASK(23, 18)
+#define R_BACKOFF_B 0x44E4
+#define B_LNA_IBADC_B GENMASK(29, 18)
+#define B_BACKOFF_LNA_B GENMASK(29, 24)
+#define B_BACKOFF_IBADC_B GENMASK(23, 18)
+#define R_RXBY_WBADC_B 0x44F4
+#define B_RXBY_WBADC_B GENMASK(14, 10)
+#define R_BT_RXBY_WBADC_B 0x4560
+#define B_BT_RXBY_WBADC_B BIT(31)
+#define R_BT_SHARE_B 0x4564
+#define B_BT_SHARE_B BIT(0)
+#define B_BT_TRK_OFF_B BIT(1)
+#define B_BTG_PATH_B BIT(4)
#define R_TXPATH_SEL 0x458C
#define B_TXPATH_SEL_MSK GENMASK(31, 28)
+#define R_FORCE_FIR_B 0x458C
+#define B_FORCE_FIR_B GENMASK(1, 0)
#define R_TXPWR 0x4594
#define B_TXPWR_MSK GENMASK(30, 22)
#define R_TXNSS_MAP 0x45B4
@@ -7910,10 +8354,12 @@
#define R_PATH0_P20_FOLLOW_BY_PAGCUGC 0x46A0
#define R_PATH0_P20_FOLLOW_BY_PAGCUGC_V1 0x4C24
#define R_PATH0_P20_FOLLOW_BY_PAGCUGC_V2 0x46E8
+#define R_PATH0_P20_FOLLOW_BY_PAGCUGC_V3 0x41C8
#define B_PATH0_P20_FOLLOW_BY_PAGCUGC_EN_MSK BIT(5)
#define R_PATH0_S20_FOLLOW_BY_PAGCUGC 0x46A4
#define R_PATH0_S20_FOLLOW_BY_PAGCUGC_V1 0x4C28
#define R_PATH0_S20_FOLLOW_BY_PAGCUGC_V2 0x46EC
+#define R_PATH0_S20_FOLLOW_BY_PAGCUGC_V3 0x41CC
#define B_PATH0_S20_FOLLOW_BY_PAGCUGC_EN_MSK BIT(5)
#define R_PATH0_RXB_INIT_V1 0x46A8
#define B_PATH0_RXB_INIT_IDX_MSK_V1 GENMASK(14, 10)
@@ -7958,10 +8404,12 @@
#define R_PATH1_P20_FOLLOW_BY_PAGCUGC 0x4774
#define R_PATH1_P20_FOLLOW_BY_PAGCUGC_V1 0x4CE8
#define R_PATH1_P20_FOLLOW_BY_PAGCUGC_V2 0x47A8
+#define R_PATH1_P20_FOLLOW_BY_PAGCUGC_V3 0x45C8
#define B_PATH1_P20_FOLLOW_BY_PAGCUGC_EN_MSK BIT(5)
#define R_PATH1_S20_FOLLOW_BY_PAGCUGC 0x4778
#define R_PATH1_S20_FOLLOW_BY_PAGCUGC_V1 0x4CEC
#define R_PATH1_S20_FOLLOW_BY_PAGCUGC_V2 0x47AC
+#define R_PATH1_S20_FOLLOW_BY_PAGCUGC_V3 0x45CC
#define B_PATH1_S20_FOLLOW_BY_PAGCUGC_EN_MSK BIT(5)
#define R_PATH1_G_TIA0_LNA6_OP1DB_V1 0x4778
#define B_PATH1_G_TIA0_LNA6_OP1DB_V1 GENMASK(7, 0)
@@ -8092,6 +8540,15 @@
#define B_PATH1_5MDET_SB2 BIT(8)
#define B_PATH1_5MDET_SB0 BIT(6)
#define B_PATH1_5MDET_TH GENMASK(5, 0)
+#define R_S0S1_CSI_WGT 0x4D34
+#define B_S0S1_CSI_WGT_EN BIT(0)
+#define B_S0S1_CSI_WGT_TONE_IDX GENMASK(31, 20)
+#define R_CHINFO_ELM_SRC 0x4D84
+#define B_CHINFO_ELM_BITMAP GENMASK(22, 0)
+#define B_CHINFO_SRC GENMASK(31, 30)
+#define R_CHINFO_TYPE_SCAL 0x4D88
+#define B_CHINFO_TYPE GENMASK(2, 1)
+#define B_CHINFO_SCAL BIT(8)
#define R_RPL_BIAS_COMP 0x4DF0
#define B_RPL_BIAS_COMP_MASK GENMASK(7, 0)
#define R_RPL_PATHAB 0x4E0C
@@ -8239,14 +8696,90 @@
#define B_S0_DACKQ8_K GENMASK(15, 8)
#define R_DCFO_WEIGHT_V1 0x6244
#define B_DCFO_WEIGHT_MSK_V1 GENMASK(31, 28)
+#define R_DAC_CLK 0x625C
+#define B_DAC_CLK GENMASK(31, 30)
#define R_DCFO_OPT_V1 0x6260
#define B_DCFO_OPT_EN_V1 BIT(17)
+#define R_TXFCTR 0x627C
+#define B_TXFCTR_THD GENMASK(19, 10)
+#define R_TXSCALE 0x6284
+#define B_TXFCTR_EN BIT(19)
+#define R_PCOEFF01 0x6684
+#define B_PCOEFF01 GENMASK(23, 0)
+#define R_PCOEFF23 0x6688
+#define B_PCOEFF23 GENMASK(23, 0)
+#define R_PCOEFF45 0x668c
+#define B_PCOEFF45 GENMASK(23, 0)
+#define R_PCOEFF67 0x6690
+#define B_PCOEFF67 GENMASK(23, 0)
+#define R_PCOEFF89 0x6694
+#define B_PCOEFF89 GENMASK(23, 0)
+#define R_PCOEFFAB 0x6698
+#define B_PCOEFFAB GENMASK(23, 0)
+#define R_PCOEFFCD 0x669c
+#define B_PCOEFFCD GENMASK(23, 0)
+#define R_PCOEFFEF 0x66a0
+#define B_PCOEFFEF GENMASK(23, 0)
+#define R_MGAIN_BIAS 0x672c
+#define B_MGAIN_BIAS_BW20 GENMASK(3, 0)
+#define B_MGAIN_BIAS_BW40 GENMASK(7, 4)
+#define R_CCK_RPL_OFST 0x6750
+#define B_CCK_RPL_OFST GENMASK(7, 0)
+#define R_BK_FC0INV 0x6758
+#define B_BK_FC0INV GENMASK(18, 0)
+#define R_CCK_FC0INV 0x675c
+#define B_CCK_FC0INV GENMASK(18, 0)
#define R_SEG0R_EDCCA_LVL_BE 0x69EC
#define R_SEG0R_PPDU_LVL_BE 0x69F0
#define R_SEGSND 0x6A14
#define B_SEGSND_EN BIT(31)
+#define R_DBCC 0x6B48
+#define B_DBCC_EN BIT(0)
+#define R_FC0 0x6B4C
+#define B_BW40_2XFFT BIT(31)
+#define B_FC0 GENMASK(12, 0)
+#define R_FC0INV_SBW 0x6B50
+#define B_SMALLBW GENMASK(31, 30)
+#define B_RX_BT_SG0 GENMASK(25, 22)
+#define B_RX_1RCCA GENMASK(17, 14)
+#define B_FC0_INV GENMASK(6, 0)
+#define R_ANT_CHBW 0x6B54
+#define B_ANT_BT_SHARE BIT(16)
+#define B_CHBW_BW GENMASK(14, 12)
+#define B_CHBW_PRICH GENMASK(11, 8)
+#define B_ANT_RX_SG0 GENMASK(3, 0)
+#define R_SLOPE 0x6B6C
+#define B_EHT_RATE_TH GENMASK(31, 28)
+#define B_SLOPE_B GENMASK(27, 14)
+#define B_SLOPE_A GENMASK(13, 0)
+#define R_SC_CORNER 0x6B70
+#define B_SC_CORNER GENMASK(10, 0)
+#define R_MAG_A 0x6BF4
+#define B_MGA_AEND GENMASK(31, 24)
+#define R_MAG_AB 0x6BF8
+#define B_BY_SLOPE GENMASK(31, 24)
+#define B_MAG_AB GENMASK(23, 0)
+#define R_BEDGE 0x6BFC
+#define B_EHT_MCS14 BIT(31)
+#define B_HE_RATE_TH GENMASK(30, 27)
+#define R_BEDGE2 0x6C00
+#define B_EHT_MCS15 BIT(31)
+#define B_HT_VHT_TH GENMASK(11, 0)
+#define R_BEDGE3 0x6C04
+#define B_TB_EN BIT(23)
+#define B_HEMU_EN BIT(21)
+#define B_HEERSU_EN BIT(19)
+#define B_EHTTB_EN BIT(15)
+#define B_BEDGE_CFG GENMASK(1, 0)
+#define R_SU_PUNC 0x6C08
+#define B_SU_PUNC_EN BIT(1)
+#define R_BEDGE5 0x6C10
+#define B_HWGEN_EN BIT(25)
+#define B_PWROFST_COMP BIT(20)
#define R_RPL_BIAS_COMP1 0x6DF0
#define B_RPL_BIAS_COMP1_MASK GENMASK(7, 0)
+#define R_DBCC_FA 0x703C
+#define B_DBCC_FA BIT(12)
#define R_P1_TSSI_ALIM1 0x7630
#define B_P1_TSSI_ALIM1 GENMASK(29, 0)
#define B_P1_TSSI_ALIM11 GENMASK(29, 20)
@@ -8389,8 +8922,12 @@
#define B_PRT_COM_RXBB_V1 GENMASK(4, 0)
#define B_PRT_COM_DONE BIT(0)
#define R_COEF_SEL 0x8104
+#define R_COEF_SEL_C1 0x8204
#define B_COEF_SEL_IQC BIT(0)
+#define B_COEF_SEL_IQC_V1 GENMASK(1, 0)
#define B_COEF_SEL_MDPD BIT(8)
+#define B_COEF_SEL_MDPD_V1 GENMASK(9, 8)
+#define B_COEF_SEL_EN BIT(31)
#define R_CFIR_SYS 0x8120
#define R_IQK_RES 0x8124
#define B_IQK_RES_K BIT(28)
@@ -8412,8 +8949,10 @@
#define B_RFGAIN_BND GENMASK(4, 0)
#define R_CFIR_MAP 0x8150
#define R_CFIR_LUT 0x8154
+#define R_CFIR_LUT_C1 0x8254
#define B_CFIR_LUT_SEL BIT(8)
#define B_CFIR_LUT_SET BIT(4)
+#define B_CFIR_LUT_G5 BIT(5)
#define B_CFIR_LUT_G3 BIT(3)
#define B_CFIR_LUT_G2 BIT(2)
#define B_CFIR_LUT_GP_V1 GENMASK(2, 0)
@@ -8626,6 +9165,35 @@
#define B_DACKN0_V GENMASK(21, 14)
#define R_DACKN1_CTL 0xC224
#define B_DACKN1_V GENMASK(21, 14)
+#define R_GAIN_MAP0 0xE44C
+#define B_GAIN_MAP0_EN BIT(0)
+#define R_GAIN_MAP1 0xE54C
+#define B_GAIN_MAP1_EN BIT(0)
+#define R_GOTX_IQKDPK_C0 0xE464
+#define R_GOTX_IQKDPK_C1 0xE564
+#define B_GOTX_IQKDPK GENMASK(28, 27)
+#define R_IQK_DPK_PRST 0xE4AC
+#define R_IQK_DPK_PRST_C1 0xE5AC
+#define B_IQK_DPK_PRST BIT(27)
+#define R_TXPWR_RSTA 0xE60C
+#define B_TXPWR_RSTA BIT(16)
+#define R_TSSI_PWR_P0 0xE610
+#define R_TSSI_PWR_P1 0xE710
+#define B_TSSI_CONT_EN BIT(3)
+#define R_TSSI_MAP_OFST_P0 0xE620
+#define R_TSSI_MAP_OFST_P1 0xE720
+#define B_TSSI_MAP_OFST_OFDM GENMASK(17, 9)
+#define B_TSSI_MAP_OFST_CCK GENMASK(26, 18)
+#define R_TXAGC_REF0_P0 0xE628
+#define R_TXAGC_REF0_P1 0xE728
+#define B_TXAGC_REF0_OFDM_DBM GENMASK(8, 0)
+#define B_TXAGC_REF0_CCK_DBM GENMASK(17, 9)
+#define B_TXAGC_REF0_OFDM_CW GENMASK(26, 18)
+#define R_TXAGC_REF1_P0 0xE62C
+#define R_TXAGC_REF1_P1 0xE72C
+#define B_TXAGC_REF1_CCK_CW GENMASK(8, 0)
+#define R_TXPWR_RSTB 0xE70C
+#define B_TXPWR_RSTB BIT(16)
/* WiFi CPU local domain */
#define R_AX_WDT_CTRL 0x0040
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c
index 5c167a9278ce..51d3e61eaa1d 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c
@@ -901,7 +901,7 @@ static void rtw8851b_set_gain_error(struct rtw89_dev *rtwdev,
enum rtw89_subband subband,
enum rtw89_rf_path path)
{
- const struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain;
+ const struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain.ax;
u8 gain_band = rtw89_subband_to_bb_gain_band(subband);
s32 val;
u32 reg;
@@ -987,7 +987,7 @@ next:
static
void rtw8851b_set_rxsc_rpl_comp(struct rtw89_dev *rtwdev, enum rtw89_subband subband)
{
- const struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain;
+ const struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain.ax;
u8 band = rtw89_subband_to_bb_gain_band(subband);
u32 val;
@@ -1921,41 +1921,81 @@ static u8 rtw8851b_get_thermal(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_p
static void rtw8851b_btc_set_rfe(struct rtw89_dev *rtwdev)
{
- struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_module *module = &btc->mdinfo;
+ const struct rtw89_btc_ver *ver = rtwdev->btc.ver;
+ union rtw89_btc_module_info *md = &rtwdev->btc.mdinfo;
- module->rfe_type = rtwdev->efuse.rfe_type;
- module->cv = rtwdev->hal.cv;
- module->bt_solo = 0;
- module->switch_type = BTC_SWITCH_INTERNAL;
- module->ant.isolation = 10;
- module->kt_ver_adie = rtwdev->hal.acv;
+ if (ver->fcxinit == 7) {
+ md->md_v7.rfe_type = rtwdev->efuse.rfe_type;
+ md->md_v7.kt_ver = rtwdev->hal.cv;
+ md->md_v7.bt_solo = 0;
+ md->md_v7.switch_type = BTC_SWITCH_INTERNAL;
+ md->md_v7.ant.isolation = 10;
+ md->md_v7.kt_ver_adie = rtwdev->hal.acv;
- if (module->rfe_type == 0)
- return;
+ if (md->md_v7.rfe_type == 0)
+ return;
- /* rfe_type 3*n+1: 1-Ant(shared),
- * 3*n+2: 2-Ant+Div(non-shared),
- * 3*n+3: 2-Ant+no-Div(non-shared)
- */
- module->ant.num = (module->rfe_type % 3 == 1) ? 1 : 2;
- /* WL-1ss at S0, btg at s0 (On 1 WL RF) */
- module->ant.single_pos = RF_PATH_A;
- module->ant.btg_pos = RF_PATH_A;
- module->ant.stream_cnt = 1;
-
- if (module->ant.num == 1) {
- module->ant.type = BTC_ANT_SHARED;
- module->bt_pos = BTC_BT_BTG;
- module->wa_type = 1;
- module->ant.diversity = 0;
- } else { /* ant.num == 2 */
- module->ant.type = BTC_ANT_DEDICATED;
- module->bt_pos = BTC_BT_ALONE;
- module->switch_type = BTC_SWITCH_EXTERNAL;
- module->wa_type = 0;
- if (module->rfe_type % 3 == 2)
- module->ant.diversity = 1;
+ /* rfe_type 3*n+1: 1-Ant(shared),
+ * 3*n+2: 2-Ant+Div(non-shared),
+ * 3*n+3: 2-Ant+no-Div(non-shared)
+ */
+ md->md_v7.ant.num = (md->md_v7.rfe_type % 3 == 1) ? 1 : 2;
+ /* WL-1ss at S0, btg at s0 (On 1 WL RF) */
+ md->md_v7.ant.single_pos = RF_PATH_A;
+ md->md_v7.ant.btg_pos = RF_PATH_A;
+ md->md_v7.ant.stream_cnt = 1;
+
+ if (md->md_v7.ant.num == 1) {
+ md->md_v7.ant.type = BTC_ANT_SHARED;
+ md->md_v7.bt_pos = BTC_BT_BTG;
+ md->md_v7.wa_type = 1;
+ md->md_v7.ant.diversity = 0;
+ } else { /* ant.num == 2 */
+ md->md_v7.ant.type = BTC_ANT_DEDICATED;
+ md->md_v7.bt_pos = BTC_BT_ALONE;
+ md->md_v7.switch_type = BTC_SWITCH_EXTERNAL;
+ md->md_v7.wa_type = 0;
+ if (md->md_v7.rfe_type % 3 == 2)
+ md->md_v7.ant.diversity = 1;
+ }
+ rtwdev->btc.btg_pos = md->md_v7.ant.btg_pos;
+ rtwdev->btc.ant_type = md->md_v7.ant.type;
+ } else {
+ md->md.rfe_type = rtwdev->efuse.rfe_type;
+ md->md.cv = rtwdev->hal.cv;
+ md->md.bt_solo = 0;
+ md->md.switch_type = BTC_SWITCH_INTERNAL;
+ md->md.ant.isolation = 10;
+ md->md.kt_ver_adie = rtwdev->hal.acv;
+
+ if (md->md.rfe_type == 0)
+ return;
+
+ /* rfe_type 3*n+1: 1-Ant(shared),
+ * 3*n+2: 2-Ant+Div(non-shared),
+ * 3*n+3: 2-Ant+no-Div(non-shared)
+ */
+ md->md.ant.num = (md->md.rfe_type % 3 == 1) ? 1 : 2;
+ /* WL-1ss at S0, btg at s0 (On 1 WL RF) */
+ md->md.ant.single_pos = RF_PATH_A;
+ md->md.ant.btg_pos = RF_PATH_A;
+ md->md.ant.stream_cnt = 1;
+
+ if (md->md.ant.num == 1) {
+ md->md.ant.type = BTC_ANT_SHARED;
+ md->md.bt_pos = BTC_BT_BTG;
+ md->md.wa_type = 1;
+ md->md.ant.diversity = 0;
+ } else { /* ant.num == 2 */
+ md->md.ant.type = BTC_ANT_DEDICATED;
+ md->md.bt_pos = BTC_BT_ALONE;
+ md->md.switch_type = BTC_SWITCH_EXTERNAL;
+ md->md.wa_type = 0;
+ if (md->md.rfe_type % 3 == 2)
+ md->md.ant.diversity = 1;
+ }
+ rtwdev->btc.btg_pos = md->md.ant.btg_pos;
+ rtwdev->btc.ant_type = md->md.ant.type;
}
}
@@ -1965,7 +2005,7 @@ void rtw8851b_set_trx_mask(struct rtw89_dev *rtwdev, u8 path, u8 group, u32 val)
if (group > BTC_BT_SS_GROUP)
group--; /* Tx-group=1, Rx-group=2 */
- if (rtwdev->btc.mdinfo.ant.type == BTC_ANT_SHARED) /* 1-Ant */
+ if (rtwdev->btc.ant_type == BTC_ANT_SHARED) /* 1-Ant */
group += 3;
rtw89_write_rf(rtwdev, path, RR_LUTWA, RFREG_MASK, group);
@@ -1980,9 +2020,9 @@ static void rtw8851b_btc_init_cfg(struct rtw89_dev *rtwdev)
};
const struct rtw89_chip_info *chip = rtwdev->chip;
struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_module *module = &btc->mdinfo;
- struct rtw89_btc_ant_info *ant = &module->ant;
- u8 path, path_min, path_max;
+ union rtw89_btc_module_info *md = &btc->mdinfo;
+ const struct rtw89_btc_ver *ver = btc->ver;
+ u8 path, path_min, path_max, str_cnt, ant_sing_pos;
/* PTA init */
rtw89_mac_coex_init(rtwdev, &coex_params);
@@ -1991,9 +2031,17 @@ static void rtw8851b_btc_init_cfg(struct rtw89_dev *rtwdev)
chip->ops->btc_set_wl_pri(rtwdev, BTC_PRI_MASK_TX_RESP, true);
chip->ops->btc_set_wl_pri(rtwdev, BTC_PRI_MASK_BEACON, true);
+ if (ver->fcxinit == 7) {
+ str_cnt = md->md_v7.ant.stream_cnt;
+ ant_sing_pos = md->md_v7.ant.single_pos;
+ } else {
+ str_cnt = md->md.ant.stream_cnt;
+ ant_sing_pos = md->md.ant.single_pos;
+ }
+
/* for 1-Ant && 1-ss case: only 1-path */
- if (ant->stream_cnt == 1) {
- path_min = ant->single_pos;
+ if (str_cnt == 1) {
+ path_min = ant_sing_pos;
path_max = path_min;
} else {
path_min = RF_PATH_A;
@@ -2016,7 +2064,7 @@ static void rtw8851b_btc_init_cfg(struct rtw89_dev *rtwdev)
/* if GNT_WL = 0 && BT = Tx_group -->
* Shared-Ant && BTG-path:WL mask(0x55f), others:WL THRU(0x5ff)
*/
- if (ant->type == BTC_ANT_SHARED && ant->btg_pos == path)
+ if (btc->ant_type == BTC_ANT_SHARED && btc->btg_pos == path)
rtw8851b_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x55f);
else
rtw8851b_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff);
@@ -2148,19 +2196,18 @@ void rtw8851b_btc_update_bt_cnt(struct rtw89_dev *rtwdev)
static void rtw8851b_btc_wl_s1_standby(struct rtw89_dev *rtwdev, bool state)
{
struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_ant_info *ant = &btc->mdinfo.ant;
- rtw89_write_rf(rtwdev, ant->btg_pos, RR_LUTWE, RFREG_MASK, 0x80000);
- rtw89_write_rf(rtwdev, ant->btg_pos, RR_LUTWA, RFREG_MASK, 0x1);
- rtw89_write_rf(rtwdev, ant->btg_pos, RR_LUTWD1, RFREG_MASK, 0x110);
+ rtw89_write_rf(rtwdev, btc->btg_pos, RR_LUTWE, RFREG_MASK, 0x80000);
+ rtw89_write_rf(rtwdev, btc->btg_pos, RR_LUTWA, RFREG_MASK, 0x1);
+ rtw89_write_rf(rtwdev, btc->btg_pos, RR_LUTWD1, RFREG_MASK, 0x110);
/* set WL standby = Rx for GNT_BT_Tx = 1->0 settle issue */
if (state)
- rtw89_write_rf(rtwdev, ant->btg_pos, RR_LUTWD0, RFREG_MASK, 0x179c);
+ rtw89_write_rf(rtwdev, btc->btg_pos, RR_LUTWD0, RFREG_MASK, 0x179c);
else
- rtw89_write_rf(rtwdev, ant->btg_pos, RR_LUTWD0, RFREG_MASK, 0x208);
+ rtw89_write_rf(rtwdev, btc->btg_pos, RR_LUTWD0, RFREG_MASK, 0x208);
- rtw89_write_rf(rtwdev, ant->btg_pos, RR_LUTWE, RFREG_MASK, 0x0);
+ rtw89_write_rf(rtwdev, btc->btg_pos, RR_LUTWE, RFREG_MASK, 0x0);
}
#define LNA2_51B_MA 0x700
@@ -2175,7 +2222,6 @@ static void rtw8851b_btc_set_wl_rx_gain(struct rtw89_dev *rtwdev, u32 level)
* level=1 Fix LNA2=5: TIA 1/0= (LNA2,TIAN6) = (5,0)/(5,1) = 18dB/12dB
*/
struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_ant_info *ant = &btc->mdinfo.ant;
const struct rtw89_reg2_def *rf;
u32 n, i, val;
@@ -2203,10 +2249,10 @@ static void rtw8851b_btc_set_wl_rx_gain(struct rtw89_dev *rtwdev, u32 level)
for (i = 0; i < n; i++, rf++) {
val = rf->data;
/* bit[10] = 1 if non-shared-ant for 8851b */
- if (btc->mdinfo.ant.type == BTC_ANT_DEDICATED)
+ if (btc->ant_type == BTC_ANT_DEDICATED)
val |= 0x4;
- rtw89_write_rf(rtwdev, ant->btg_pos, rf->addr, LNA2_51B_MA, val);
+ rtw89_write_rf(rtwdev, btc->btg_pos, rf->addr, LNA2_51B_MA, val);
}
}
@@ -2299,6 +2345,7 @@ static const struct rtw89_chip_ops rtw8851b_chip_ops = {
.enable_bb_rf = rtw8851b_mac_enable_bb_rf,
.disable_bb_rf = rtw8851b_mac_disable_bb_rf,
.bb_preinit = NULL,
+ .bb_postinit = NULL,
.bb_reset = rtw8851b_bb_reset,
.bb_sethw = rtw8851b_bb_sethw,
.read_rf = rtw89_phy_read_rf_v1,
@@ -2309,7 +2356,9 @@ static const struct rtw89_chip_ops rtw8851b_chip_ops = {
.read_phycap = rtw8851b_read_phycap,
.fem_setup = NULL,
.rfe_gpio = rtw8851b_rfe_gpio,
+ .rfk_hw_init = NULL,
.rfk_init = rtw8851b_rfk_init,
+ .rfk_init_late = NULL,
.rfk_channel = rtw8851b_rfk_channel,
.rfk_band_changed = rtw8851b_rfk_band_changed,
.rfk_scan = rtw8851b_rfk_scan,
@@ -2334,6 +2383,12 @@ static const struct rtw89_chip_ops rtw8851b_chip_ops = {
.stop_sch_tx = rtw89_mac_stop_sch_tx,
.resume_sch_tx = rtw89_mac_resume_sch_tx,
.h2c_dctl_sec_cam = NULL,
+ .h2c_default_cmac_tbl = rtw89_fw_h2c_default_cmac_tbl,
+ .h2c_assoc_cmac_tbl = rtw89_fw_h2c_assoc_cmac_tbl,
+ .h2c_ampdu_cmac_tbl = NULL,
+ .h2c_default_dmac_tbl = NULL,
+ .h2c_update_beacon = rtw89_fw_h2c_update_beacon,
+ .h2c_ba_cam = rtw89_fw_h2c_ba_cam,
.btc_set_rfe = rtw8851b_btc_set_rfe,
.btc_init_cfg = rtw8851b_btc_init_cfg,
@@ -2394,7 +2449,9 @@ const struct rtw89_chip_info rtw8851b_chip_info = {
.support_chanctx_num = 0,
.support_bands = BIT(NL80211_BAND_2GHZ) |
BIT(NL80211_BAND_5GHZ),
- .support_bw160 = false,
+ .support_bandwidths = BIT(NL80211_CHAN_WIDTH_20) |
+ BIT(NL80211_CHAN_WIDTH_40) |
+ BIT(NL80211_CHAN_WIDTH_80),
.support_unii4 = true,
.ul_tb_waveform_ctrl = true,
.ul_tb_pwr_diff = false,
@@ -2449,6 +2506,7 @@ const struct rtw89_chip_info rtw8851b_chip_info = {
.c2h_counter_reg = {R_AX_UDM1 + 1, B_AX_UDM1_HALMAC_C2H_ENQ_CNT_MASK >> 8},
.c2h_regs = rtw8851b_c2h_regs,
.page_regs = &rtw8851b_page_regs,
+ .wow_reason_reg = R_AX_C2HREG_DATA3 + 3,
.cfo_src_fd = true,
.cfo_hw_comp = true,
.dcfo_comp = &rtw8851b_dcfo_comp,
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b_table.c b/drivers/net/wireless/realtek/rtw89/rtw8851b_table.c
index 8cb5bde8f625..522883c8dfb9 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8851b_table.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8851b_table.c
@@ -5345,7 +5345,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_FCC][48] = 72,
[0][0][1][0][RTW89_ETSI][48] = 127,
[0][0][1][0][RTW89_MKK][48] = 127,
- [0][0][1][0][RTW89_IC][48] = 127,
+ [0][0][1][0][RTW89_IC][48] = 72,
[0][0][1][0][RTW89_KCC][48] = 127,
[0][0][1][0][RTW89_ACMA][48] = 127,
[0][0][1][0][RTW89_CN][48] = 127,
@@ -5353,7 +5353,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_FCC][50] = 72,
[0][0][1][0][RTW89_ETSI][50] = 127,
[0][0][1][0][RTW89_MKK][50] = 127,
- [0][0][1][0][RTW89_IC][50] = 127,
+ [0][0][1][0][RTW89_IC][50] = 72,
[0][0][1][0][RTW89_KCC][50] = 127,
[0][0][1][0][RTW89_ACMA][50] = 127,
[0][0][1][0][RTW89_CN][50] = 127,
@@ -5361,7 +5361,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_FCC][52] = 72,
[0][0][1][0][RTW89_ETSI][52] = 127,
[0][0][1][0][RTW89_MKK][52] = 127,
- [0][0][1][0][RTW89_IC][52] = 127,
+ [0][0][1][0][RTW89_IC][52] = 72,
[0][0][1][0][RTW89_KCC][52] = 127,
[0][0][1][0][RTW89_ACMA][52] = 127,
[0][0][1][0][RTW89_CN][52] = 127,
@@ -5793,7 +5793,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][2][0][RTW89_FCC][48] = 74,
[0][0][2][0][RTW89_ETSI][48] = 127,
[0][0][2][0][RTW89_MKK][48] = 127,
- [0][0][2][0][RTW89_IC][48] = 127,
+ [0][0][2][0][RTW89_IC][48] = 74,
[0][0][2][0][RTW89_KCC][48] = 127,
[0][0][2][0][RTW89_ACMA][48] = 127,
[0][0][2][0][RTW89_CN][48] = 127,
@@ -5801,7 +5801,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][2][0][RTW89_FCC][50] = 76,
[0][0][2][0][RTW89_ETSI][50] = 127,
[0][0][2][0][RTW89_MKK][50] = 127,
- [0][0][2][0][RTW89_IC][50] = 127,
+ [0][0][2][0][RTW89_IC][50] = 76,
[0][0][2][0][RTW89_KCC][50] = 127,
[0][0][2][0][RTW89_ACMA][50] = 127,
[0][0][2][0][RTW89_CN][50] = 127,
@@ -5809,7 +5809,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][2][0][RTW89_FCC][52] = 76,
[0][0][2][0][RTW89_ETSI][52] = 127,
[0][0][2][0][RTW89_MKK][52] = 127,
- [0][0][2][0][RTW89_IC][52] = 127,
+ [0][0][2][0][RTW89_IC][52] = 76,
[0][0][2][0][RTW89_KCC][52] = 127,
[0][0][2][0][RTW89_ACMA][52] = 127,
[0][0][2][0][RTW89_CN][52] = 127,
@@ -6361,7 +6361,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][0][2][0][RTW89_FCC][47] = 84,
[1][0][2][0][RTW89_ETSI][47] = 127,
[1][0][2][0][RTW89_MKK][47] = 127,
- [1][0][2][0][RTW89_IC][47] = 127,
+ [1][0][2][0][RTW89_IC][47] = 84,
[1][0][2][0][RTW89_KCC][47] = 127,
[1][0][2][0][RTW89_ACMA][47] = 127,
[1][0][2][0][RTW89_CN][47] = 127,
@@ -6369,7 +6369,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][0][2][0][RTW89_FCC][51] = 84,
[1][0][2][0][RTW89_ETSI][51] = 127,
[1][0][2][0][RTW89_MKK][51] = 127,
- [1][0][2][0][RTW89_IC][51] = 127,
+ [1][0][2][0][RTW89_IC][51] = 84,
[1][0][2][0][RTW89_KCC][51] = 127,
[1][0][2][0][RTW89_ACMA][51] = 127,
[1][0][2][0][RTW89_CN][51] = 127,
@@ -6649,7 +6649,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[2][0][2][0][RTW89_FCC][49] = 74,
[2][0][2][0][RTW89_ETSI][49] = 127,
[2][0][2][0][RTW89_MKK][49] = 127,
- [2][0][2][0][RTW89_IC][49] = 127,
+ [2][0][2][0][RTW89_IC][49] = 74,
[2][0][2][0][RTW89_KCC][49] = 127,
[2][0][2][0][RTW89_ACMA][49] = 127,
[2][0][2][0][RTW89_CN][49] = 127,
@@ -7975,7 +7975,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][0][RTW89_FCC][48] = 42,
[0][0][RTW89_ETSI][48] = 127,
[0][0][RTW89_MKK][48] = 127,
- [0][0][RTW89_IC][48] = 127,
+ [0][0][RTW89_IC][48] = 42,
[0][0][RTW89_KCC][48] = 127,
[0][0][RTW89_ACMA][48] = 127,
[0][0][RTW89_CN][48] = 127,
@@ -7983,7 +7983,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][0][RTW89_FCC][50] = 42,
[0][0][RTW89_ETSI][50] = 127,
[0][0][RTW89_MKK][50] = 127,
- [0][0][RTW89_IC][50] = 127,
+ [0][0][RTW89_IC][50] = 42,
[0][0][RTW89_KCC][50] = 127,
[0][0][RTW89_ACMA][50] = 127,
[0][0][RTW89_CN][50] = 127,
@@ -7991,7 +7991,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][0][RTW89_FCC][52] = 40,
[0][0][RTW89_ETSI][52] = 127,
[0][0][RTW89_MKK][52] = 127,
- [0][0][RTW89_IC][52] = 127,
+ [0][0][RTW89_IC][52] = 40,
[0][0][RTW89_KCC][52] = 127,
[0][0][RTW89_ACMA][52] = 127,
[0][0][RTW89_CN][52] = 127,
@@ -8423,7 +8423,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][0][RTW89_FCC][48] = 52,
[1][0][RTW89_ETSI][48] = 127,
[1][0][RTW89_MKK][48] = 127,
- [1][0][RTW89_IC][48] = 127,
+ [1][0][RTW89_IC][48] = 52,
[1][0][RTW89_KCC][48] = 127,
[1][0][RTW89_ACMA][48] = 127,
[1][0][RTW89_CN][48] = 127,
@@ -8431,7 +8431,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][0][RTW89_FCC][50] = 52,
[1][0][RTW89_ETSI][50] = 127,
[1][0][RTW89_MKK][50] = 127,
- [1][0][RTW89_IC][50] = 127,
+ [1][0][RTW89_IC][50] = 52,
[1][0][RTW89_KCC][50] = 127,
[1][0][RTW89_ACMA][50] = 127,
[1][0][RTW89_CN][50] = 127,
@@ -8439,7 +8439,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][0][RTW89_FCC][52] = 52,
[1][0][RTW89_ETSI][52] = 127,
[1][0][RTW89_MKK][52] = 127,
- [1][0][RTW89_IC][52] = 127,
+ [1][0][RTW89_IC][52] = 52,
[1][0][RTW89_KCC][52] = 127,
[1][0][RTW89_ACMA][52] = 127,
[1][0][RTW89_CN][52] = 127,
@@ -8871,7 +8871,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][0][RTW89_FCC][48] = 64,
[2][0][RTW89_ETSI][48] = 127,
[2][0][RTW89_MKK][48] = 127,
- [2][0][RTW89_IC][48] = 127,
+ [2][0][RTW89_IC][48] = 64,
[2][0][RTW89_KCC][48] = 127,
[2][0][RTW89_ACMA][48] = 127,
[2][0][RTW89_CN][48] = 127,
@@ -8879,7 +8879,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][0][RTW89_FCC][50] = 64,
[2][0][RTW89_ETSI][50] = 127,
[2][0][RTW89_MKK][50] = 127,
- [2][0][RTW89_IC][50] = 127,
+ [2][0][RTW89_IC][50] = 64,
[2][0][RTW89_KCC][50] = 127,
[2][0][RTW89_ACMA][50] = 127,
[2][0][RTW89_CN][50] = 127,
@@ -8887,7 +8887,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][0][RTW89_FCC][52] = 60,
[2][0][RTW89_ETSI][52] = 127,
[2][0][RTW89_MKK][52] = 127,
- [2][0][RTW89_IC][52] = 127,
+ [2][0][RTW89_IC][52] = 60,
[2][0][RTW89_KCC][52] = 127,
[2][0][RTW89_ACMA][52] = 127,
[2][0][RTW89_CN][52] = 127,
@@ -11055,7 +11055,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g_type2[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_FCC][48] = 72,
[0][0][1][0][RTW89_ETSI][48] = 127,
[0][0][1][0][RTW89_MKK][48] = 127,
- [0][0][1][0][RTW89_IC][48] = 127,
+ [0][0][1][0][RTW89_IC][48] = 72,
[0][0][1][0][RTW89_KCC][48] = 127,
[0][0][1][0][RTW89_ACMA][48] = 127,
[0][0][1][0][RTW89_CN][48] = 127,
@@ -11063,7 +11063,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g_type2[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_FCC][50] = 72,
[0][0][1][0][RTW89_ETSI][50] = 127,
[0][0][1][0][RTW89_MKK][50] = 127,
- [0][0][1][0][RTW89_IC][50] = 127,
+ [0][0][1][0][RTW89_IC][50] = 72,
[0][0][1][0][RTW89_KCC][50] = 127,
[0][0][1][0][RTW89_ACMA][50] = 127,
[0][0][1][0][RTW89_CN][50] = 127,
@@ -11071,7 +11071,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g_type2[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_FCC][52] = 72,
[0][0][1][0][RTW89_ETSI][52] = 127,
[0][0][1][0][RTW89_MKK][52] = 127,
- [0][0][1][0][RTW89_IC][52] = 127,
+ [0][0][1][0][RTW89_IC][52] = 72,
[0][0][1][0][RTW89_KCC][52] = 127,
[0][0][1][0][RTW89_ACMA][52] = 127,
[0][0][1][0][RTW89_CN][52] = 127,
@@ -11503,7 +11503,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g_type2[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][2][0][RTW89_FCC][48] = 74,
[0][0][2][0][RTW89_ETSI][48] = 127,
[0][0][2][0][RTW89_MKK][48] = 127,
- [0][0][2][0][RTW89_IC][48] = 127,
+ [0][0][2][0][RTW89_IC][48] = 74,
[0][0][2][0][RTW89_KCC][48] = 127,
[0][0][2][0][RTW89_ACMA][48] = 127,
[0][0][2][0][RTW89_CN][48] = 127,
@@ -11511,7 +11511,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g_type2[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][2][0][RTW89_FCC][50] = 74,
[0][0][2][0][RTW89_ETSI][50] = 127,
[0][0][2][0][RTW89_MKK][50] = 127,
- [0][0][2][0][RTW89_IC][50] = 127,
+ [0][0][2][0][RTW89_IC][50] = 74,
[0][0][2][0][RTW89_KCC][50] = 127,
[0][0][2][0][RTW89_ACMA][50] = 127,
[0][0][2][0][RTW89_CN][50] = 127,
@@ -11519,7 +11519,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g_type2[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][2][0][RTW89_FCC][52] = 74,
[0][0][2][0][RTW89_ETSI][52] = 127,
[0][0][2][0][RTW89_MKK][52] = 127,
- [0][0][2][0][RTW89_IC][52] = 127,
+ [0][0][2][0][RTW89_IC][52] = 74,
[0][0][2][0][RTW89_KCC][52] = 127,
[0][0][2][0][RTW89_ACMA][52] = 127,
[0][0][2][0][RTW89_CN][52] = 127,
@@ -12071,7 +12071,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g_type2[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][0][2][0][RTW89_FCC][47] = 80,
[1][0][2][0][RTW89_ETSI][47] = 127,
[1][0][2][0][RTW89_MKK][47] = 127,
- [1][0][2][0][RTW89_IC][47] = 127,
+ [1][0][2][0][RTW89_IC][47] = 80,
[1][0][2][0][RTW89_KCC][47] = 127,
[1][0][2][0][RTW89_ACMA][47] = 127,
[1][0][2][0][RTW89_CN][47] = 127,
@@ -12079,7 +12079,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g_type2[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][0][2][0][RTW89_FCC][51] = 80,
[1][0][2][0][RTW89_ETSI][51] = 127,
[1][0][2][0][RTW89_MKK][51] = 127,
- [1][0][2][0][RTW89_IC][51] = 127,
+ [1][0][2][0][RTW89_IC][51] = 80,
[1][0][2][0][RTW89_KCC][51] = 127,
[1][0][2][0][RTW89_ACMA][51] = 127,
[1][0][2][0][RTW89_CN][51] = 127,
@@ -12359,7 +12359,7 @@ const s8 rtw89_8851b_txpwr_lmt_5g_type2[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[2][0][2][0][RTW89_FCC][49] = 72,
[2][0][2][0][RTW89_ETSI][49] = 127,
[2][0][2][0][RTW89_MKK][49] = 127,
- [2][0][2][0][RTW89_IC][49] = 127,
+ [2][0][2][0][RTW89_IC][49] = 72,
[2][0][2][0][RTW89_KCC][49] = 127,
[2][0][2][0][RTW89_ACMA][49] = 127,
[2][0][2][0][RTW89_CN][49] = 127,
@@ -13685,7 +13685,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g_type2[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][0][RTW89_FCC][48] = 40,
[0][0][RTW89_ETSI][48] = 127,
[0][0][RTW89_MKK][48] = 127,
- [0][0][RTW89_IC][48] = 127,
+ [0][0][RTW89_IC][48] = 40,
[0][0][RTW89_KCC][48] = 127,
[0][0][RTW89_ACMA][48] = 127,
[0][0][RTW89_CN][48] = 127,
@@ -13693,7 +13693,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g_type2[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][0][RTW89_FCC][50] = 42,
[0][0][RTW89_ETSI][50] = 127,
[0][0][RTW89_MKK][50] = 127,
- [0][0][RTW89_IC][50] = 127,
+ [0][0][RTW89_IC][50] = 42,
[0][0][RTW89_KCC][50] = 127,
[0][0][RTW89_ACMA][50] = 127,
[0][0][RTW89_CN][50] = 127,
@@ -13701,7 +13701,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g_type2[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][0][RTW89_FCC][52] = 38,
[0][0][RTW89_ETSI][52] = 127,
[0][0][RTW89_MKK][52] = 127,
- [0][0][RTW89_IC][52] = 127,
+ [0][0][RTW89_IC][52] = 38,
[0][0][RTW89_KCC][52] = 127,
[0][0][RTW89_ACMA][52] = 127,
[0][0][RTW89_CN][52] = 127,
@@ -14133,7 +14133,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g_type2[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][0][RTW89_FCC][48] = 52,
[1][0][RTW89_ETSI][48] = 127,
[1][0][RTW89_MKK][48] = 127,
- [1][0][RTW89_IC][48] = 127,
+ [1][0][RTW89_IC][48] = 52,
[1][0][RTW89_KCC][48] = 127,
[1][0][RTW89_ACMA][48] = 127,
[1][0][RTW89_CN][48] = 127,
@@ -14141,7 +14141,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g_type2[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][0][RTW89_FCC][50] = 52,
[1][0][RTW89_ETSI][50] = 127,
[1][0][RTW89_MKK][50] = 127,
- [1][0][RTW89_IC][50] = 127,
+ [1][0][RTW89_IC][50] = 52,
[1][0][RTW89_KCC][50] = 127,
[1][0][RTW89_ACMA][50] = 127,
[1][0][RTW89_CN][50] = 127,
@@ -14149,7 +14149,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g_type2[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][0][RTW89_FCC][52] = 50,
[1][0][RTW89_ETSI][52] = 127,
[1][0][RTW89_MKK][52] = 127,
- [1][0][RTW89_IC][52] = 127,
+ [1][0][RTW89_IC][52] = 50,
[1][0][RTW89_KCC][52] = 127,
[1][0][RTW89_ACMA][52] = 127,
[1][0][RTW89_CN][52] = 127,
@@ -14581,7 +14581,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g_type2[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][0][RTW89_FCC][48] = 62,
[2][0][RTW89_ETSI][48] = 127,
[2][0][RTW89_MKK][48] = 127,
- [2][0][RTW89_IC][48] = 127,
+ [2][0][RTW89_IC][48] = 62,
[2][0][RTW89_KCC][48] = 127,
[2][0][RTW89_ACMA][48] = 127,
[2][0][RTW89_CN][48] = 127,
@@ -14589,7 +14589,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g_type2[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][0][RTW89_FCC][50] = 62,
[2][0][RTW89_ETSI][50] = 127,
[2][0][RTW89_MKK][50] = 127,
- [2][0][RTW89_IC][50] = 127,
+ [2][0][RTW89_IC][50] = 62,
[2][0][RTW89_KCC][50] = 127,
[2][0][RTW89_ACMA][50] = 127,
[2][0][RTW89_CN][50] = 127,
@@ -14597,7 +14597,7 @@ const s8 rtw89_8851b_txpwr_lmt_ru_5g_type2[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][0][RTW89_FCC][52] = 60,
[2][0][RTW89_ETSI][52] = 127,
[2][0][RTW89_MKK][52] = 127,
- [2][0][RTW89_IC][52] = 127,
+ [2][0][RTW89_IC][52] = 60,
[2][0][RTW89_KCC][52] = 127,
[2][0][RTW89_ACMA][52] = 127,
[2][0][RTW89_CN][52] = 127,
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851be.c b/drivers/net/wireless/realtek/rtw89/rtw8851be.c
index ade69bd30fc8..ca1374a71727 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8851be.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8851be.c
@@ -25,6 +25,8 @@ static const struct rtw89_pci_info rtw8851b_pci_info = {
.autok_en = MAC_AX_PCIE_DISABLE,
.io_rcy_en = MAC_AX_PCIE_DISABLE,
.io_rcy_tmr = MAC_AX_IO_RCY_ANA_TMR_6MS,
+ .rx_ring_eq_is_full = false,
+ .check_rx_tag = false,
.init_cfg_reg = R_AX_PCIE_INIT_CFG1,
.txhci_en_bit = B_AX_TXHCI_EN,
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c
index 0c76c52ce22c..2deadec715cf 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c
@@ -1665,28 +1665,55 @@ static u8 rtw8852a_get_thermal(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_p
static void rtw8852a_btc_set_rfe(struct rtw89_dev *rtwdev)
{
- struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_module *module = &btc->mdinfo;
+ const struct rtw89_btc_ver *ver = rtwdev->btc.ver;
+ union rtw89_btc_module_info *md = &rtwdev->btc.mdinfo;
- module->rfe_type = rtwdev->efuse.rfe_type;
- module->cv = rtwdev->hal.cv;
- module->bt_solo = 0;
- module->switch_type = BTC_SWITCH_INTERNAL;
+ if (ver->fcxinit == 7) {
+ md->md_v7.rfe_type = rtwdev->efuse.rfe_type;
+ md->md_v7.kt_ver = rtwdev->hal.cv;
+ md->md_v7.bt_solo = 0;
+ md->md_v7.switch_type = BTC_SWITCH_INTERNAL;
- if (module->rfe_type > 0)
- module->ant.num = (module->rfe_type % 2 ? 2 : 3);
- else
- module->ant.num = 2;
+ if (md->md_v7.rfe_type > 0)
+ md->md_v7.ant.num = (md->md_v7.rfe_type % 2 ? 2 : 3);
+ else
+ md->md_v7.ant.num = 2;
- module->ant.diversity = 0;
- module->ant.isolation = 10;
+ md->md_v7.ant.diversity = 0;
+ md->md_v7.ant.isolation = 10;
- if (module->ant.num == 3) {
- module->ant.type = BTC_ANT_DEDICATED;
- module->bt_pos = BTC_BT_ALONE;
+ if (md->md_v7.ant.num == 3) {
+ md->md_v7.ant.type = BTC_ANT_DEDICATED;
+ md->md_v7.bt_pos = BTC_BT_ALONE;
+ } else {
+ md->md_v7.ant.type = BTC_ANT_SHARED;
+ md->md_v7.bt_pos = BTC_BT_BTG;
+ }
+ rtwdev->btc.btg_pos = md->md_v7.ant.btg_pos;
+ rtwdev->btc.ant_type = md->md_v7.ant.type;
} else {
- module->ant.type = BTC_ANT_SHARED;
- module->bt_pos = BTC_BT_BTG;
+ md->md.rfe_type = rtwdev->efuse.rfe_type;
+ md->md.cv = rtwdev->hal.cv;
+ md->md.bt_solo = 0;
+ md->md.switch_type = BTC_SWITCH_INTERNAL;
+
+ if (md->md.rfe_type > 0)
+ md->md.ant.num = (md->md.rfe_type % 2 ? 2 : 3);
+ else
+ md->md.ant.num = 2;
+
+ md->md.ant.diversity = 0;
+ md->md.ant.isolation = 10;
+
+ if (md->md.ant.num == 3) {
+ md->md.ant.type = BTC_ANT_DEDICATED;
+ md->md.bt_pos = BTC_BT_ALONE;
+ } else {
+ md->md.ant.type = BTC_ANT_SHARED;
+ md->md.bt_pos = BTC_BT_BTG;
+ }
+ rtwdev->btc.btg_pos = md->md.ant.btg_pos;
+ rtwdev->btc.ant_type = md->md.ant.type;
}
}
@@ -1717,7 +1744,6 @@ static void rtw8852a_ctrl_btg_bt_rx(struct rtw89_dev *rtwdev, bool en,
static void rtw8852a_btc_init_cfg(struct rtw89_dev *rtwdev)
{
struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_module *module = &btc->mdinfo;
const struct rtw89_chip_info *chip = rtwdev->chip;
const struct rtw89_mac_ax_coex coex_params = {
.pta_mode = RTW89_MAC_AX_COEX_RTK_MODE,
@@ -1736,7 +1762,7 @@ static void rtw8852a_btc_init_cfg(struct rtw89_dev *rtwdev)
rtw89_write_rf(rtwdev, RF_PATH_B, RR_WLSEL, 0xfffff, 0x0);
/* set WL Tx thru in TRX mask table if GNT_WL = 0 && BT_S1 = ss group */
- if (module->ant.type == BTC_ANT_SHARED) {
+ if (btc->ant_type == BTC_ANT_SHARED) {
rtw8852a_set_trx_mask(rtwdev,
RF_PATH_A, BTC_BT_SS_GROUP, 0x5ff);
rtw8852a_set_trx_mask(rtwdev,
@@ -2043,6 +2069,7 @@ static const struct rtw89_chip_ops rtw8852a_chip_ops = {
.enable_bb_rf = rtw89_mac_enable_bb_rf,
.disable_bb_rf = rtw89_mac_disable_bb_rf,
.bb_preinit = NULL,
+ .bb_postinit = NULL,
.bb_reset = rtw8852a_bb_reset,
.bb_sethw = rtw8852a_bb_sethw,
.read_rf = rtw89_phy_read_rf,
@@ -2053,7 +2080,9 @@ static const struct rtw89_chip_ops rtw8852a_chip_ops = {
.read_phycap = rtw8852a_read_phycap,
.fem_setup = rtw8852a_fem_setup,
.rfe_gpio = NULL,
+ .rfk_hw_init = NULL,
.rfk_init = rtw8852a_rfk_init,
+ .rfk_init_late = NULL,
.rfk_channel = rtw8852a_rfk_channel,
.rfk_band_changed = rtw8852a_rfk_band_changed,
.rfk_scan = rtw8852a_rfk_scan,
@@ -2078,6 +2107,12 @@ static const struct rtw89_chip_ops rtw8852a_chip_ops = {
.stop_sch_tx = rtw89_mac_stop_sch_tx,
.resume_sch_tx = rtw89_mac_resume_sch_tx,
.h2c_dctl_sec_cam = NULL,
+ .h2c_default_cmac_tbl = rtw89_fw_h2c_default_cmac_tbl,
+ .h2c_assoc_cmac_tbl = rtw89_fw_h2c_assoc_cmac_tbl,
+ .h2c_ampdu_cmac_tbl = NULL,
+ .h2c_default_dmac_tbl = NULL,
+ .h2c_update_beacon = rtw89_fw_h2c_update_beacon,
+ .h2c_ba_cam = rtw89_fw_h2c_ba_cam,
.btc_set_rfe = rtw8852a_btc_set_rfe,
.btc_init_cfg = rtw8852a_btc_init_cfg,
@@ -2130,7 +2165,9 @@ const struct rtw89_chip_info rtw8852a_chip_info = {
.support_chanctx_num = 1,
.support_bands = BIT(NL80211_BAND_2GHZ) |
BIT(NL80211_BAND_5GHZ),
- .support_bw160 = false,
+ .support_bandwidths = BIT(NL80211_CHAN_WIDTH_20) |
+ BIT(NL80211_CHAN_WIDTH_40) |
+ BIT(NL80211_CHAN_WIDTH_80),
.support_unii4 = false,
.ul_tb_waveform_ctrl = false,
.ul_tb_pwr_diff = false,
@@ -2186,6 +2223,7 @@ const struct rtw89_chip_info rtw8852a_chip_info = {
.c2h_regs = rtw8852a_c2h_regs,
.c2h_counter_reg = {R_AX_UDM1 + 1, B_AX_UDM1_HALMAC_C2H_ENQ_CNT_MASK >> 8},
.page_regs = &rtw8852a_page_regs,
+ .wow_reason_reg = R_AX_C2HREG_DATA3 + 3,
.cfo_src_fd = false,
.cfo_hw_comp = false,
.dcfo_comp = &rtw8852a_dcfo_comp,
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852ae.c b/drivers/net/wireless/realtek/rtw89/rtw8852ae.c
index f1e890bde049..7c6ffedb77e2 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8852ae.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8852ae.c
@@ -26,6 +26,7 @@ static const struct rtw89_pci_info rtw8852a_pci_info = {
.io_rcy_en = MAC_AX_PCIE_DISABLE,
.io_rcy_tmr = MAC_AX_IO_RCY_ANA_TMR_6MS,
.rx_ring_eq_is_full = false,
+ .check_rx_tag = false,
.init_cfg_reg = R_AX_PCIE_INIT_CFG1,
.txhci_en_bit = B_AX_TXHCI_EN,
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c
index de887a35f3fb..d025c4135e1c 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c
@@ -988,7 +988,7 @@ static void rtw8852b_set_gain_error(struct rtw89_dev *rtwdev,
enum rtw89_subband subband,
enum rtw89_rf_path path)
{
- const struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain;
+ const struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain.ax;
u8 gain_band = rtw89_subband_to_bb_gain_band(subband);
s32 val;
u32 reg;
@@ -1086,7 +1086,7 @@ next:
static
void rtw8852b_set_rxsc_rpl_comp(struct rtw89_dev *rtwdev, enum rtw89_subband subband)
{
- const struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain;
+ const struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain.ax;
u8 band = rtw89_subband_to_bb_gain_band(subband);
u32 val;
@@ -2125,28 +2125,55 @@ static u8 rtw8852b_get_thermal(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_p
static void rtw8852b_btc_set_rfe(struct rtw89_dev *rtwdev)
{
- struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_module *module = &btc->mdinfo;
+ const struct rtw89_btc_ver *ver = rtwdev->btc.ver;
+ union rtw89_btc_module_info *md = &rtwdev->btc.mdinfo;
- module->rfe_type = rtwdev->efuse.rfe_type;
- module->cv = rtwdev->hal.cv;
- module->bt_solo = 0;
- module->switch_type = BTC_SWITCH_INTERNAL;
+ if (ver->fcxinit == 7) {
+ md->md_v7.rfe_type = rtwdev->efuse.rfe_type;
+ md->md_v7.kt_ver = rtwdev->hal.cv;
+ md->md_v7.bt_solo = 0;
+ md->md_v7.switch_type = BTC_SWITCH_INTERNAL;
- if (module->rfe_type > 0)
- module->ant.num = module->rfe_type % 2 ? 2 : 3;
- else
- module->ant.num = 2;
+ if (md->md_v7.rfe_type > 0)
+ md->md_v7.ant.num = (md->md_v7.rfe_type % 2 ? 2 : 3);
+ else
+ md->md_v7.ant.num = 2;
- module->ant.diversity = 0;
- module->ant.isolation = 10;
+ md->md_v7.ant.diversity = 0;
+ md->md_v7.ant.isolation = 10;
- if (module->ant.num == 3) {
- module->ant.type = BTC_ANT_DEDICATED;
- module->bt_pos = BTC_BT_ALONE;
+ if (md->md_v7.ant.num == 3) {
+ md->md_v7.ant.type = BTC_ANT_DEDICATED;
+ md->md_v7.bt_pos = BTC_BT_ALONE;
+ } else {
+ md->md_v7.ant.type = BTC_ANT_SHARED;
+ md->md_v7.bt_pos = BTC_BT_BTG;
+ }
+ rtwdev->btc.btg_pos = md->md_v7.ant.btg_pos;
+ rtwdev->btc.ant_type = md->md_v7.ant.type;
} else {
- module->ant.type = BTC_ANT_SHARED;
- module->bt_pos = BTC_BT_BTG;
+ md->md.rfe_type = rtwdev->efuse.rfe_type;
+ md->md.cv = rtwdev->hal.cv;
+ md->md.bt_solo = 0;
+ md->md.switch_type = BTC_SWITCH_INTERNAL;
+
+ if (md->md.rfe_type > 0)
+ md->md.ant.num = (md->md.rfe_type % 2 ? 2 : 3);
+ else
+ md->md.ant.num = 2;
+
+ md->md.ant.diversity = 0;
+ md->md.ant.isolation = 10;
+
+ if (md->md.ant.num == 3) {
+ md->md.ant.type = BTC_ANT_DEDICATED;
+ md->md.bt_pos = BTC_BT_ALONE;
+ } else {
+ md->md.ant.type = BTC_ANT_SHARED;
+ md->md.bt_pos = BTC_BT_BTG;
+ }
+ rtwdev->btc.btg_pos = md->md.ant.btg_pos;
+ rtwdev->btc.ant_type = md->md.ant.type;
}
}
@@ -2162,7 +2189,6 @@ void rtw8852b_set_trx_mask(struct rtw89_dev *rtwdev, u8 path, u8 group, u32 val)
static void rtw8852b_btc_init_cfg(struct rtw89_dev *rtwdev)
{
struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_module *module = &btc->mdinfo;
const struct rtw89_chip_info *chip = rtwdev->chip;
const struct rtw89_mac_ax_coex coex_params = {
.pta_mode = RTW89_MAC_AX_COEX_RTK_MODE,
@@ -2181,7 +2207,7 @@ static void rtw8852b_btc_init_cfg(struct rtw89_dev *rtwdev)
rtw89_write_rf(rtwdev, RF_PATH_B, RR_WLSEL, RFREG_MASK, 0x0);
/* set WL Tx thru in TRX mask table if GNT_WL = 0 && BT_S1 = ss group */
- if (module->ant.type == BTC_ANT_SHARED) {
+ if (btc->ant_type == BTC_ANT_SHARED) {
rtw8852b_set_trx_mask(rtwdev, RF_PATH_A, BTC_BT_SS_GROUP, 0x5ff);
rtw8852b_set_trx_mask(rtwdev, RF_PATH_B, BTC_BT_SS_GROUP, 0x5ff);
/* set path-A(S0) Tx/Rx no-mask if GNT_WL=0 && BT_S1=tx group */
@@ -2468,6 +2494,7 @@ static const struct rtw89_chip_ops rtw8852b_chip_ops = {
.enable_bb_rf = rtw8852b_mac_enable_bb_rf,
.disable_bb_rf = rtw8852b_mac_disable_bb_rf,
.bb_preinit = NULL,
+ .bb_postinit = NULL,
.bb_reset = rtw8852b_bb_reset,
.bb_sethw = rtw8852b_bb_sethw,
.read_rf = rtw89_phy_read_rf_v1,
@@ -2478,7 +2505,9 @@ static const struct rtw89_chip_ops rtw8852b_chip_ops = {
.read_phycap = rtw8852b_read_phycap,
.fem_setup = NULL,
.rfe_gpio = NULL,
+ .rfk_hw_init = NULL,
.rfk_init = rtw8852b_rfk_init,
+ .rfk_init_late = NULL,
.rfk_channel = rtw8852b_rfk_channel,
.rfk_band_changed = rtw8852b_rfk_band_changed,
.rfk_scan = rtw8852b_rfk_scan,
@@ -2503,6 +2532,12 @@ static const struct rtw89_chip_ops rtw8852b_chip_ops = {
.stop_sch_tx = rtw89_mac_stop_sch_tx,
.resume_sch_tx = rtw89_mac_resume_sch_tx,
.h2c_dctl_sec_cam = NULL,
+ .h2c_default_cmac_tbl = rtw89_fw_h2c_default_cmac_tbl,
+ .h2c_assoc_cmac_tbl = rtw89_fw_h2c_assoc_cmac_tbl,
+ .h2c_ampdu_cmac_tbl = NULL,
+ .h2c_default_dmac_tbl = NULL,
+ .h2c_update_beacon = rtw89_fw_h2c_update_beacon,
+ .h2c_ba_cam = rtw89_fw_h2c_ba_cam,
.btc_set_rfe = rtw8852b_btc_set_rfe,
.btc_init_cfg = rtw8852b_btc_init_cfg,
@@ -2564,7 +2599,9 @@ const struct rtw89_chip_info rtw8852b_chip_info = {
.support_chanctx_num = 0,
.support_bands = BIT(NL80211_BAND_2GHZ) |
BIT(NL80211_BAND_5GHZ),
- .support_bw160 = false,
+ .support_bandwidths = BIT(NL80211_CHAN_WIDTH_20) |
+ BIT(NL80211_CHAN_WIDTH_40) |
+ BIT(NL80211_CHAN_WIDTH_80),
.support_unii4 = true,
.ul_tb_waveform_ctrl = true,
.ul_tb_pwr_diff = false,
@@ -2620,6 +2657,7 @@ const struct rtw89_chip_info rtw8852b_chip_info = {
.c2h_counter_reg = {R_AX_UDM1 + 1, B_AX_UDM1_HALMAC_C2H_ENQ_CNT_MASK >> 8},
.c2h_regs = rtw8852b_c2h_regs,
.page_regs = &rtw8852b_page_regs,
+ .wow_reason_reg = R_AX_C2HREG_DATA3 + 3,
.cfo_src_fd = true,
.cfo_hw_comp = true,
.dcfo_comp = &rtw8852b_dcfo_comp,
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b_table.c b/drivers/net/wireless/realtek/rtw89/rtw8852b_table.c
index d2ce16e98bac..07945d06dc59 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8852b_table.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8852b_table.c
@@ -16936,7 +16936,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_WW][8] = 52,
[0][0][1][0][RTW89_WW][10] = 52,
[0][0][1][0][RTW89_WW][12] = 52,
- [0][0][1][0][RTW89_WW][14] = 1,
+ [0][0][1][0][RTW89_WW][14] = 52,
[0][0][1][0][RTW89_WW][15] = 52,
[0][0][1][0][RTW89_WW][17] = 52,
[0][0][1][0][RTW89_WW][19] = 52,
@@ -16954,10 +16954,10 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_WW][42] = 28,
[0][0][1][0][RTW89_WW][44] = 28,
[0][0][1][0][RTW89_WW][46] = 28,
- [0][0][1][0][RTW89_WW][48] = 78,
- [0][0][1][0][RTW89_WW][50] = 78,
- [0][0][1][0][RTW89_WW][52] = 78,
- [0][1][1][0][RTW89_WW][0] = 1,
+ [0][0][1][0][RTW89_WW][48] = 76,
+ [0][0][1][0][RTW89_WW][50] = 76,
+ [0][0][1][0][RTW89_WW][52] = 76,
+ [0][1][1][0][RTW89_WW][0] = 30,
[0][1][1][0][RTW89_WW][2] = 32,
[0][1][1][0][RTW89_WW][4] = 30,
[0][1][1][0][RTW89_WW][6] = 30,
@@ -16982,9 +16982,9 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][1][0][RTW89_WW][42] = 16,
[0][1][1][0][RTW89_WW][44] = 16,
[0][1][1][0][RTW89_WW][46] = 16,
- [0][1][1][0][RTW89_WW][48] = 56,
- [0][1][1][0][RTW89_WW][50] = 56,
- [0][1][1][0][RTW89_WW][52] = 56,
+ [0][1][1][0][RTW89_WW][48] = 50,
+ [0][1][1][0][RTW89_WW][50] = 50,
+ [0][1][1][0][RTW89_WW][52] = 50,
[0][0][2][0][RTW89_WW][0] = 42,
[0][0][2][0][RTW89_WW][2] = 42,
[0][0][2][0][RTW89_WW][4] = 42,
@@ -17038,9 +17038,9 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][2][0][RTW89_WW][42] = 16,
[0][1][2][0][RTW89_WW][44] = 16,
[0][1][2][0][RTW89_WW][46] = 16,
- [0][1][2][0][RTW89_WW][48] = 58,
- [0][1][2][0][RTW89_WW][50] = 58,
- [0][1][2][0][RTW89_WW][52] = 58,
+ [0][1][2][0][RTW89_WW][48] = 50,
+ [0][1][2][0][RTW89_WW][50] = 52,
+ [0][1][2][0][RTW89_WW][52] = 52,
[0][1][2][1][RTW89_WW][0] = 14,
[0][1][2][1][RTW89_WW][2] = 14,
[0][1][2][1][RTW89_WW][4] = 14,
@@ -17066,9 +17066,9 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][2][1][RTW89_WW][42] = 4,
[0][1][2][1][RTW89_WW][44] = 4,
[0][1][2][1][RTW89_WW][46] = 4,
- [0][1][2][1][RTW89_WW][48] = 58,
- [0][1][2][1][RTW89_WW][50] = 58,
- [0][1][2][1][RTW89_WW][52] = 58,
+ [0][1][2][1][RTW89_WW][48] = 50,
+ [0][1][2][1][RTW89_WW][50] = 52,
+ [0][1][2][1][RTW89_WW][52] = 52,
[1][0][2][0][RTW89_WW][1] = 42,
[1][0][2][0][RTW89_WW][5] = 42,
[1][0][2][0][RTW89_WW][9] = 52,
@@ -17095,8 +17095,8 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][1][2][0][RTW89_WW][36] = 50,
[1][1][2][0][RTW89_WW][39] = 16,
[1][1][2][0][RTW89_WW][43] = 16,
- [1][1][2][0][RTW89_WW][47] = 68,
- [1][1][2][0][RTW89_WW][51] = 66,
+ [1][1][2][0][RTW89_WW][47] = 62,
+ [1][1][2][0][RTW89_WW][51] = 62,
[1][1][2][1][RTW89_WW][1] = 16,
[1][1][2][1][RTW89_WW][5] = 16,
[1][1][2][1][RTW89_WW][9] = 28,
@@ -17109,8 +17109,8 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][1][2][1][RTW89_WW][36] = 36,
[1][1][2][1][RTW89_WW][39] = 4,
[1][1][2][1][RTW89_WW][43] = 4,
- [1][1][2][1][RTW89_WW][47] = 68,
- [1][1][2][1][RTW89_WW][51] = 66,
+ [1][1][2][1][RTW89_WW][47] = 62,
+ [1][1][2][1][RTW89_WW][51] = 62,
[2][0][2][0][RTW89_WW][3] = 42,
[2][0][2][0][RTW89_WW][11] = 52,
[2][0][2][0][RTW89_WW][18] = 52,
@@ -17227,7 +17227,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_MEXICO][14] = 78,
[0][0][1][0][RTW89_CN][14] = 58,
[0][0][1][0][RTW89_QATAR][14] = 58,
- [0][0][1][0][RTW89_UK][14] = 1,
+ [0][0][1][0][RTW89_UK][14] = 58,
[0][0][1][0][RTW89_FCC][15] = 76,
[0][0][1][0][RTW89_ETSI][15] = 58,
[0][0][1][0][RTW89_MKK][15] = 76,
@@ -17435,7 +17435,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_FCC][48] = 78,
[0][0][1][0][RTW89_ETSI][48] = 127,
[0][0][1][0][RTW89_MKK][48] = 127,
- [0][0][1][0][RTW89_IC][48] = 127,
+ [0][0][1][0][RTW89_IC][48] = 76,
[0][0][1][0][RTW89_KCC][48] = 127,
[0][0][1][0][RTW89_ACMA][48] = 127,
[0][0][1][0][RTW89_CHILE][48] = 127,
@@ -17447,7 +17447,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_FCC][50] = 78,
[0][0][1][0][RTW89_ETSI][50] = 127,
[0][0][1][0][RTW89_MKK][50] = 127,
- [0][0][1][0][RTW89_IC][50] = 127,
+ [0][0][1][0][RTW89_IC][50] = 76,
[0][0][1][0][RTW89_KCC][50] = 127,
[0][0][1][0][RTW89_ACMA][50] = 127,
[0][0][1][0][RTW89_CHILE][50] = 127,
@@ -17459,7 +17459,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][1][0][RTW89_FCC][52] = 78,
[0][0][1][0][RTW89_ETSI][52] = 127,
[0][0][1][0][RTW89_MKK][52] = 127,
- [0][0][1][0][RTW89_IC][52] = 127,
+ [0][0][1][0][RTW89_IC][52] = 76,
[0][0][1][0][RTW89_KCC][52] = 127,
[0][0][1][0][RTW89_ACMA][52] = 127,
[0][0][1][0][RTW89_CHILE][52] = 127,
@@ -17479,7 +17479,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][1][0][RTW89_MEXICO][0] = 50,
[0][1][1][0][RTW89_CN][0] = 46,
[0][1][1][0][RTW89_QATAR][0] = 46,
- [0][1][1][0][RTW89_UK][0] = 1,
+ [0][1][1][0][RTW89_UK][0] = 46,
[0][1][1][0][RTW89_FCC][2] = 68,
[0][1][1][0][RTW89_ETSI][2] = 46,
[0][1][1][0][RTW89_MKK][2] = 48,
@@ -17771,7 +17771,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][1][0][RTW89_FCC][48] = 56,
[0][1][1][0][RTW89_ETSI][48] = 127,
[0][1][1][0][RTW89_MKK][48] = 127,
- [0][1][1][0][RTW89_IC][48] = 127,
+ [0][1][1][0][RTW89_IC][48] = 50,
[0][1][1][0][RTW89_KCC][48] = 127,
[0][1][1][0][RTW89_ACMA][48] = 127,
[0][1][1][0][RTW89_CHILE][48] = 127,
@@ -17783,7 +17783,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][1][0][RTW89_FCC][50] = 56,
[0][1][1][0][RTW89_ETSI][50] = 127,
[0][1][1][0][RTW89_MKK][50] = 127,
- [0][1][1][0][RTW89_IC][50] = 127,
+ [0][1][1][0][RTW89_IC][50] = 50,
[0][1][1][0][RTW89_KCC][50] = 127,
[0][1][1][0][RTW89_ACMA][50] = 127,
[0][1][1][0][RTW89_CHILE][50] = 127,
@@ -17795,7 +17795,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][1][0][RTW89_FCC][52] = 56,
[0][1][1][0][RTW89_ETSI][52] = 127,
[0][1][1][0][RTW89_MKK][52] = 127,
- [0][1][1][0][RTW89_IC][52] = 127,
+ [0][1][1][0][RTW89_IC][52] = 50,
[0][1][1][0][RTW89_KCC][52] = 127,
[0][1][1][0][RTW89_ACMA][52] = 127,
[0][1][1][0][RTW89_CHILE][52] = 127,
@@ -18107,7 +18107,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][2][0][RTW89_FCC][48] = 78,
[0][0][2][0][RTW89_ETSI][48] = 127,
[0][0][2][0][RTW89_MKK][48] = 127,
- [0][0][2][0][RTW89_IC][48] = 127,
+ [0][0][2][0][RTW89_IC][48] = 78,
[0][0][2][0][RTW89_KCC][48] = 127,
[0][0][2][0][RTW89_ACMA][48] = 127,
[0][0][2][0][RTW89_CHILE][48] = 127,
@@ -18119,7 +18119,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][2][0][RTW89_FCC][50] = 78,
[0][0][2][0][RTW89_ETSI][50] = 127,
[0][0][2][0][RTW89_MKK][50] = 127,
- [0][0][2][0][RTW89_IC][50] = 127,
+ [0][0][2][0][RTW89_IC][50] = 78,
[0][0][2][0][RTW89_KCC][50] = 127,
[0][0][2][0][RTW89_ACMA][50] = 127,
[0][0][2][0][RTW89_CHILE][50] = 127,
@@ -18131,7 +18131,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][0][2][0][RTW89_FCC][52] = 78,
[0][0][2][0][RTW89_ETSI][52] = 127,
[0][0][2][0][RTW89_MKK][52] = 127,
- [0][0][2][0][RTW89_IC][52] = 127,
+ [0][0][2][0][RTW89_IC][52] = 78,
[0][0][2][0][RTW89_KCC][52] = 127,
[0][0][2][0][RTW89_ACMA][52] = 127,
[0][0][2][0][RTW89_CHILE][52] = 127,
@@ -18443,7 +18443,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][2][0][RTW89_FCC][48] = 58,
[0][1][2][0][RTW89_ETSI][48] = 127,
[0][1][2][0][RTW89_MKK][48] = 127,
- [0][1][2][0][RTW89_IC][48] = 127,
+ [0][1][2][0][RTW89_IC][48] = 50,
[0][1][2][0][RTW89_KCC][48] = 127,
[0][1][2][0][RTW89_ACMA][48] = 127,
[0][1][2][0][RTW89_CHILE][48] = 127,
@@ -18455,7 +18455,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][2][0][RTW89_FCC][50] = 58,
[0][1][2][0][RTW89_ETSI][50] = 127,
[0][1][2][0][RTW89_MKK][50] = 127,
- [0][1][2][0][RTW89_IC][50] = 127,
+ [0][1][2][0][RTW89_IC][50] = 52,
[0][1][2][0][RTW89_KCC][50] = 127,
[0][1][2][0][RTW89_ACMA][50] = 127,
[0][1][2][0][RTW89_CHILE][50] = 127,
@@ -18467,7 +18467,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][2][0][RTW89_FCC][52] = 58,
[0][1][2][0][RTW89_ETSI][52] = 127,
[0][1][2][0][RTW89_MKK][52] = 127,
- [0][1][2][0][RTW89_IC][52] = 127,
+ [0][1][2][0][RTW89_IC][52] = 52,
[0][1][2][0][RTW89_KCC][52] = 127,
[0][1][2][0][RTW89_ACMA][52] = 127,
[0][1][2][0][RTW89_CHILE][52] = 127,
@@ -18779,7 +18779,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][2][1][RTW89_FCC][48] = 58,
[0][1][2][1][RTW89_ETSI][48] = 127,
[0][1][2][1][RTW89_MKK][48] = 127,
- [0][1][2][1][RTW89_IC][48] = 127,
+ [0][1][2][1][RTW89_IC][48] = 50,
[0][1][2][1][RTW89_KCC][48] = 127,
[0][1][2][1][RTW89_ACMA][48] = 127,
[0][1][2][1][RTW89_CHILE][48] = 127,
@@ -18791,7 +18791,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][2][1][RTW89_FCC][50] = 58,
[0][1][2][1][RTW89_ETSI][50] = 127,
[0][1][2][1][RTW89_MKK][50] = 127,
- [0][1][2][1][RTW89_IC][50] = 127,
+ [0][1][2][1][RTW89_IC][50] = 52,
[0][1][2][1][RTW89_KCC][50] = 127,
[0][1][2][1][RTW89_ACMA][50] = 127,
[0][1][2][1][RTW89_CHILE][50] = 127,
@@ -18803,7 +18803,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[0][1][2][1][RTW89_FCC][52] = 58,
[0][1][2][1][RTW89_ETSI][52] = 127,
[0][1][2][1][RTW89_MKK][52] = 127,
- [0][1][2][1][RTW89_IC][52] = 127,
+ [0][1][2][1][RTW89_IC][52] = 52,
[0][1][2][1][RTW89_KCC][52] = 127,
[0][1][2][1][RTW89_ACMA][52] = 127,
[0][1][2][1][RTW89_CHILE][52] = 127,
@@ -18959,7 +18959,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][0][2][0][RTW89_FCC][47] = 78,
[1][0][2][0][RTW89_ETSI][47] = 127,
[1][0][2][0][RTW89_MKK][47] = 127,
- [1][0][2][0][RTW89_IC][47] = 127,
+ [1][0][2][0][RTW89_IC][47] = 78,
[1][0][2][0][RTW89_KCC][47] = 127,
[1][0][2][0][RTW89_ACMA][47] = 127,
[1][0][2][0][RTW89_CHILE][47] = 127,
@@ -18971,7 +18971,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][0][2][0][RTW89_FCC][51] = 70,
[1][0][2][0][RTW89_ETSI][51] = 127,
[1][0][2][0][RTW89_MKK][51] = 127,
- [1][0][2][0][RTW89_IC][51] = 127,
+ [1][0][2][0][RTW89_IC][51] = 78,
[1][0][2][0][RTW89_KCC][51] = 127,
[1][0][2][0][RTW89_ACMA][51] = 127,
[1][0][2][0][RTW89_CHILE][51] = 127,
@@ -19127,7 +19127,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][1][2][0][RTW89_FCC][47] = 68,
[1][1][2][0][RTW89_ETSI][47] = 127,
[1][1][2][0][RTW89_MKK][47] = 127,
- [1][1][2][0][RTW89_IC][47] = 127,
+ [1][1][2][0][RTW89_IC][47] = 62,
[1][1][2][0][RTW89_KCC][47] = 127,
[1][1][2][0][RTW89_ACMA][47] = 127,
[1][1][2][0][RTW89_CHILE][47] = 127,
@@ -19139,7 +19139,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][1][2][0][RTW89_FCC][51] = 66,
[1][1][2][0][RTW89_ETSI][51] = 127,
[1][1][2][0][RTW89_MKK][51] = 127,
- [1][1][2][0][RTW89_IC][51] = 127,
+ [1][1][2][0][RTW89_IC][51] = 62,
[1][1][2][0][RTW89_KCC][51] = 127,
[1][1][2][0][RTW89_ACMA][51] = 127,
[1][1][2][0][RTW89_CHILE][51] = 127,
@@ -19295,7 +19295,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][1][2][1][RTW89_FCC][47] = 68,
[1][1][2][1][RTW89_ETSI][47] = 127,
[1][1][2][1][RTW89_MKK][47] = 127,
- [1][1][2][1][RTW89_IC][47] = 127,
+ [1][1][2][1][RTW89_IC][47] = 62,
[1][1][2][1][RTW89_KCC][47] = 127,
[1][1][2][1][RTW89_ACMA][47] = 127,
[1][1][2][1][RTW89_CHILE][47] = 127,
@@ -19307,7 +19307,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[1][1][2][1][RTW89_FCC][51] = 66,
[1][1][2][1][RTW89_ETSI][51] = 127,
[1][1][2][1][RTW89_MKK][51] = 127,
- [1][1][2][1][RTW89_IC][51] = 127,
+ [1][1][2][1][RTW89_IC][51] = 62,
[1][1][2][1][RTW89_KCC][51] = 127,
[1][1][2][1][RTW89_ACMA][51] = 127,
[1][1][2][1][RTW89_CHILE][51] = 127,
@@ -19391,7 +19391,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[2][0][2][0][RTW89_FCC][49] = 64,
[2][0][2][0][RTW89_ETSI][49] = 127,
[2][0][2][0][RTW89_MKK][49] = 127,
- [2][0][2][0][RTW89_IC][49] = 127,
+ [2][0][2][0][RTW89_IC][49] = 74,
[2][0][2][0][RTW89_KCC][49] = 127,
[2][0][2][0][RTW89_ACMA][49] = 127,
[2][0][2][0][RTW89_CHILE][49] = 127,
@@ -19475,7 +19475,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[2][1][2][0][RTW89_FCC][49] = 58,
[2][1][2][0][RTW89_ETSI][49] = 127,
[2][1][2][0][RTW89_MKK][49] = 127,
- [2][1][2][0][RTW89_IC][49] = 127,
+ [2][1][2][0][RTW89_IC][49] = 66,
[2][1][2][0][RTW89_KCC][49] = 127,
[2][1][2][0][RTW89_ACMA][49] = 127,
[2][1][2][0][RTW89_CHILE][49] = 127,
@@ -19559,7 +19559,7 @@ const s8 rtw89_8852b_txpwr_lmt_5g[RTW89_5G_BW_NUM][RTW89_NTX_NUM]
[2][1][2][1][RTW89_FCC][49] = 58,
[2][1][2][1][RTW89_ETSI][49] = 127,
[2][1][2][1][RTW89_MKK][49] = 127,
- [2][1][2][1][RTW89_IC][49] = 127,
+ [2][1][2][1][RTW89_IC][49] = 66,
[2][1][2][1][RTW89_KCC][49] = 127,
[2][1][2][1][RTW89_ACMA][49] = 127,
[2][1][2][1][RTW89_CHILE][49] = 127,
@@ -20723,9 +20723,9 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][1][RTW89_WW][42] = 14,
[0][1][RTW89_WW][44] = 14,
[0][1][RTW89_WW][46] = 14,
- [0][1][RTW89_WW][48] = 20,
- [0][1][RTW89_WW][50] = 20,
- [0][1][RTW89_WW][52] = 20,
+ [0][1][RTW89_WW][48] = 16,
+ [0][1][RTW89_WW][50] = 16,
+ [0][1][RTW89_WW][52] = 16,
[1][0][RTW89_WW][0] = 34,
[1][0][RTW89_WW][2] = 34,
[1][0][RTW89_WW][4] = 34,
@@ -20779,9 +20779,9 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][1][RTW89_WW][42] = 16,
[1][1][RTW89_WW][44] = 16,
[1][1][RTW89_WW][46] = 16,
- [1][1][RTW89_WW][48] = 32,
- [1][1][RTW89_WW][50] = 32,
- [1][1][RTW89_WW][52] = 32,
+ [1][1][RTW89_WW][48] = 28,
+ [1][1][RTW89_WW][50] = 30,
+ [1][1][RTW89_WW][52] = 30,
[2][0][RTW89_WW][0] = 44,
[2][0][RTW89_WW][2] = 44,
[2][0][RTW89_WW][4] = 44,
@@ -20835,9 +20835,9 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][1][RTW89_WW][42] = 16,
[2][1][RTW89_WW][44] = 16,
[2][1][RTW89_WW][46] = 16,
- [2][1][RTW89_WW][48] = 44,
- [2][1][RTW89_WW][50] = 44,
- [2][1][RTW89_WW][52] = 44,
+ [2][1][RTW89_WW][48] = 40,
+ [2][1][RTW89_WW][50] = 40,
+ [2][1][RTW89_WW][52] = 40,
[0][0][RTW89_FCC][0] = 52,
[0][0][RTW89_ETSI][0] = 24,
[0][0][RTW89_MKK][0] = 26,
@@ -21141,7 +21141,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][0][RTW89_FCC][48] = 32,
[0][0][RTW89_ETSI][48] = 127,
[0][0][RTW89_MKK][48] = 127,
- [0][0][RTW89_IC][48] = 127,
+ [0][0][RTW89_IC][48] = 42,
[0][0][RTW89_KCC][48] = 127,
[0][0][RTW89_ACMA][48] = 127,
[0][0][RTW89_CHILE][48] = 127,
@@ -21153,7 +21153,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][0][RTW89_FCC][50] = 32,
[0][0][RTW89_ETSI][50] = 127,
[0][0][RTW89_MKK][50] = 127,
- [0][0][RTW89_IC][50] = 127,
+ [0][0][RTW89_IC][50] = 42,
[0][0][RTW89_KCC][50] = 127,
[0][0][RTW89_ACMA][50] = 127,
[0][0][RTW89_CHILE][50] = 127,
@@ -21165,7 +21165,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][0][RTW89_FCC][52] = 32,
[0][0][RTW89_ETSI][52] = 127,
[0][0][RTW89_MKK][52] = 127,
- [0][0][RTW89_IC][52] = 127,
+ [0][0][RTW89_IC][52] = 40,
[0][0][RTW89_KCC][52] = 127,
[0][0][RTW89_ACMA][52] = 127,
[0][0][RTW89_CHILE][52] = 127,
@@ -21477,7 +21477,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][1][RTW89_FCC][48] = 20,
[0][1][RTW89_ETSI][48] = 127,
[0][1][RTW89_MKK][48] = 127,
- [0][1][RTW89_IC][48] = 127,
+ [0][1][RTW89_IC][48] = 16,
[0][1][RTW89_KCC][48] = 127,
[0][1][RTW89_ACMA][48] = 127,
[0][1][RTW89_CHILE][48] = 127,
@@ -21489,7 +21489,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][1][RTW89_FCC][50] = 20,
[0][1][RTW89_ETSI][50] = 127,
[0][1][RTW89_MKK][50] = 127,
- [0][1][RTW89_IC][50] = 127,
+ [0][1][RTW89_IC][50] = 16,
[0][1][RTW89_KCC][50] = 127,
[0][1][RTW89_ACMA][50] = 127,
[0][1][RTW89_CHILE][50] = 127,
@@ -21501,7 +21501,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[0][1][RTW89_FCC][52] = 20,
[0][1][RTW89_ETSI][52] = 127,
[0][1][RTW89_MKK][52] = 127,
- [0][1][RTW89_IC][52] = 127,
+ [0][1][RTW89_IC][52] = 16,
[0][1][RTW89_KCC][52] = 127,
[0][1][RTW89_ACMA][52] = 127,
[0][1][RTW89_CHILE][52] = 127,
@@ -21813,7 +21813,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][0][RTW89_FCC][48] = 44,
[1][0][RTW89_ETSI][48] = 127,
[1][0][RTW89_MKK][48] = 127,
- [1][0][RTW89_IC][48] = 127,
+ [1][0][RTW89_IC][48] = 54,
[1][0][RTW89_KCC][48] = 127,
[1][0][RTW89_ACMA][48] = 127,
[1][0][RTW89_CHILE][48] = 127,
@@ -21825,7 +21825,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][0][RTW89_FCC][50] = 44,
[1][0][RTW89_ETSI][50] = 127,
[1][0][RTW89_MKK][50] = 127,
- [1][0][RTW89_IC][50] = 127,
+ [1][0][RTW89_IC][50] = 54,
[1][0][RTW89_KCC][50] = 127,
[1][0][RTW89_ACMA][50] = 127,
[1][0][RTW89_CHILE][50] = 127,
@@ -21837,7 +21837,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][0][RTW89_FCC][52] = 44,
[1][0][RTW89_ETSI][52] = 127,
[1][0][RTW89_MKK][52] = 127,
- [1][0][RTW89_IC][52] = 127,
+ [1][0][RTW89_IC][52] = 52,
[1][0][RTW89_KCC][52] = 127,
[1][0][RTW89_ACMA][52] = 127,
[1][0][RTW89_CHILE][52] = 127,
@@ -22149,7 +22149,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][1][RTW89_FCC][48] = 32,
[1][1][RTW89_ETSI][48] = 127,
[1][1][RTW89_MKK][48] = 127,
- [1][1][RTW89_IC][48] = 127,
+ [1][1][RTW89_IC][48] = 28,
[1][1][RTW89_KCC][48] = 127,
[1][1][RTW89_ACMA][48] = 127,
[1][1][RTW89_CHILE][48] = 127,
@@ -22161,7 +22161,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][1][RTW89_FCC][50] = 32,
[1][1][RTW89_ETSI][50] = 127,
[1][1][RTW89_MKK][50] = 127,
- [1][1][RTW89_IC][50] = 127,
+ [1][1][RTW89_IC][50] = 30,
[1][1][RTW89_KCC][50] = 127,
[1][1][RTW89_ACMA][50] = 127,
[1][1][RTW89_CHILE][50] = 127,
@@ -22173,7 +22173,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[1][1][RTW89_FCC][52] = 32,
[1][1][RTW89_ETSI][52] = 127,
[1][1][RTW89_MKK][52] = 127,
- [1][1][RTW89_IC][52] = 127,
+ [1][1][RTW89_IC][52] = 30,
[1][1][RTW89_KCC][52] = 127,
[1][1][RTW89_ACMA][52] = 127,
[1][1][RTW89_CHILE][52] = 127,
@@ -22486,7 +22486,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][0][RTW89_ETSI][48] = 127,
[2][0][RTW89_MKK][48] = 127,
[2][0][RTW89_IC][48] = 127,
- [2][0][RTW89_KCC][48] = 127,
+ [2][0][RTW89_KCC][48] = 66,
[2][0][RTW89_ACMA][48] = 127,
[2][0][RTW89_CHILE][48] = 127,
[2][0][RTW89_UKRAINE][48] = 127,
@@ -22498,7 +22498,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][0][RTW89_ETSI][50] = 127,
[2][0][RTW89_MKK][50] = 127,
[2][0][RTW89_IC][50] = 127,
- [2][0][RTW89_KCC][50] = 127,
+ [2][0][RTW89_KCC][50] = 66,
[2][0][RTW89_ACMA][50] = 127,
[2][0][RTW89_CHILE][50] = 127,
[2][0][RTW89_UKRAINE][50] = 127,
@@ -22510,7 +22510,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][0][RTW89_ETSI][52] = 127,
[2][0][RTW89_MKK][52] = 127,
[2][0][RTW89_IC][52] = 127,
- [2][0][RTW89_KCC][52] = 127,
+ [2][0][RTW89_KCC][52] = 66,
[2][0][RTW89_ACMA][52] = 127,
[2][0][RTW89_CHILE][52] = 127,
[2][0][RTW89_UKRAINE][52] = 127,
@@ -22821,7 +22821,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][1][RTW89_FCC][48] = 44,
[2][1][RTW89_ETSI][48] = 127,
[2][1][RTW89_MKK][48] = 127,
- [2][1][RTW89_IC][48] = 127,
+ [2][1][RTW89_IC][48] = 40,
[2][1][RTW89_KCC][48] = 127,
[2][1][RTW89_ACMA][48] = 127,
[2][1][RTW89_CHILE][48] = 127,
@@ -22833,7 +22833,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][1][RTW89_FCC][50] = 44,
[2][1][RTW89_ETSI][50] = 127,
[2][1][RTW89_MKK][50] = 127,
- [2][1][RTW89_IC][50] = 127,
+ [2][1][RTW89_IC][50] = 40,
[2][1][RTW89_KCC][50] = 127,
[2][1][RTW89_ACMA][50] = 127,
[2][1][RTW89_CHILE][50] = 127,
@@ -22845,7 +22845,7 @@ const s8 rtw89_8852b_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM]
[2][1][RTW89_FCC][52] = 44,
[2][1][RTW89_ETSI][52] = 127,
[2][1][RTW89_MKK][52] = 127,
- [2][1][RTW89_IC][52] = 127,
+ [2][1][RTW89_IC][52] = 40,
[2][1][RTW89_KCC][52] = 127,
[2][1][RTW89_ACMA][52] = 127,
[2][1][RTW89_CHILE][52] = 127,
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852be.c b/drivers/net/wireless/realtek/rtw89/rtw8852be.c
index 920b20bbcfb7..ed71364e6437 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8852be.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8852be.c
@@ -26,6 +26,7 @@ static const struct rtw89_pci_info rtw8852b_pci_info = {
.io_rcy_en = MAC_AX_PCIE_DISABLE,
.io_rcy_tmr = MAC_AX_IO_RCY_ANA_TMR_6MS,
.rx_ring_eq_is_full = false,
+ .check_rx_tag = false,
.init_cfg_reg = R_AX_PCIE_INIT_CFG1,
.txhci_en_bit = B_AX_TXHCI_EN,
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c
index 8618d0204f66..17e6164855fa 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c
@@ -842,7 +842,7 @@ static void rtw8852c_set_gain_error(struct rtw89_dev *rtwdev,
enum rtw89_subband subband,
enum rtw89_rf_path path)
{
- const struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain;
+ const struct rtw89_phy_bb_gain_info *gain = &rtwdev->bb_gain.ax;
u8 gain_band = rtw89_subband_to_bb_gain_band(subband);
s32 val;
u32 reg;
@@ -2365,28 +2365,55 @@ static u8 rtw8852c_get_thermal(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_p
static void rtw8852c_btc_set_rfe(struct rtw89_dev *rtwdev)
{
- struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_module *module = &btc->mdinfo;
+ const struct rtw89_btc_ver *ver = rtwdev->btc.ver;
+ union rtw89_btc_module_info *md = &rtwdev->btc.mdinfo;
- module->rfe_type = rtwdev->efuse.rfe_type;
- module->cv = rtwdev->hal.cv;
- module->bt_solo = 0;
- module->switch_type = BTC_SWITCH_INTERNAL;
+ if (ver->fcxinit == 7) {
+ md->md_v7.rfe_type = rtwdev->efuse.rfe_type;
+ md->md_v7.kt_ver = rtwdev->hal.cv;
+ md->md_v7.bt_solo = 0;
+ md->md_v7.switch_type = BTC_SWITCH_INTERNAL;
- if (module->rfe_type > 0)
- module->ant.num = (module->rfe_type % 2 ? 2 : 3);
- else
- module->ant.num = 2;
+ if (md->md_v7.rfe_type > 0)
+ md->md_v7.ant.num = (md->md_v7.rfe_type % 2 ? 2 : 3);
+ else
+ md->md_v7.ant.num = 2;
- module->ant.diversity = 0;
- module->ant.isolation = 10;
+ md->md_v7.ant.diversity = 0;
+ md->md_v7.ant.isolation = 10;
- if (module->ant.num == 3) {
- module->ant.type = BTC_ANT_DEDICATED;
- module->bt_pos = BTC_BT_ALONE;
+ if (md->md_v7.ant.num == 3) {
+ md->md_v7.ant.type = BTC_ANT_DEDICATED;
+ md->md_v7.bt_pos = BTC_BT_ALONE;
+ } else {
+ md->md_v7.ant.type = BTC_ANT_SHARED;
+ md->md_v7.bt_pos = BTC_BT_BTG;
+ }
+ rtwdev->btc.btg_pos = md->md_v7.ant.btg_pos;
+ rtwdev->btc.ant_type = md->md_v7.ant.type;
} else {
- module->ant.type = BTC_ANT_SHARED;
- module->bt_pos = BTC_BT_BTG;
+ md->md.rfe_type = rtwdev->efuse.rfe_type;
+ md->md.cv = rtwdev->hal.cv;
+ md->md.bt_solo = 0;
+ md->md.switch_type = BTC_SWITCH_INTERNAL;
+
+ if (md->md.rfe_type > 0)
+ md->md.ant.num = (md->md.rfe_type % 2 ? 2 : 3);
+ else
+ md->md.ant.num = 2;
+
+ md->md.ant.diversity = 0;
+ md->md.ant.isolation = 10;
+
+ if (md->md.ant.num == 3) {
+ md->md.ant.type = BTC_ANT_DEDICATED;
+ md->md.bt_pos = BTC_BT_ALONE;
+ } else {
+ md->md.ant.type = BTC_ANT_SHARED;
+ md->md.bt_pos = BTC_BT_BTG;
+ }
+ rtwdev->btc.btg_pos = md->md.ant.btg_pos;
+ rtwdev->btc.ant_type = md->md.ant.type;
}
}
@@ -2449,7 +2476,6 @@ void rtw8852c_set_trx_mask(struct rtw89_dev *rtwdev, u8 path, u8 group, u32 val)
static void rtw8852c_btc_init_cfg(struct rtw89_dev *rtwdev)
{
struct rtw89_btc *btc = &rtwdev->btc;
- struct rtw89_btc_module *module = &btc->mdinfo;
const struct rtw89_chip_info *chip = rtwdev->chip;
const struct rtw89_mac_ax_coex coex_params = {
.pta_mode = RTW89_MAC_AX_COEX_RTK_MODE,
@@ -2468,7 +2494,7 @@ static void rtw8852c_btc_init_cfg(struct rtw89_dev *rtwdev)
rtw89_write_rf(rtwdev, RF_PATH_B, RR_WLSEL, RFREG_MASK, 0x0);
/* set WL Tx thru in TRX mask table if GNT_WL = 0 && BT_S1 = ss group */
- if (module->ant.type == BTC_ANT_SHARED) {
+ if (btc->ant_type == BTC_ANT_SHARED) {
rtw8852c_set_trx_mask(rtwdev,
RF_PATH_A, BTC_BT_SS_GROUP, 0x5ff);
rtw8852c_set_trx_mask(rtwdev,
@@ -2813,6 +2839,7 @@ static const struct rtw89_chip_ops rtw8852c_chip_ops = {
.enable_bb_rf = rtw8852c_mac_enable_bb_rf,
.disable_bb_rf = rtw8852c_mac_disable_bb_rf,
.bb_preinit = NULL,
+ .bb_postinit = NULL,
.bb_reset = rtw8852c_bb_reset,
.bb_sethw = rtw8852c_bb_sethw,
.read_rf = rtw89_phy_read_rf_v1,
@@ -2823,7 +2850,9 @@ static const struct rtw89_chip_ops rtw8852c_chip_ops = {
.read_phycap = rtw8852c_read_phycap,
.fem_setup = NULL,
.rfe_gpio = NULL,
+ .rfk_hw_init = NULL,
.rfk_init = rtw8852c_rfk_init,
+ .rfk_init_late = NULL,
.rfk_channel = rtw8852c_rfk_channel,
.rfk_band_changed = rtw8852c_rfk_band_changed,
.rfk_scan = rtw8852c_rfk_scan,
@@ -2848,6 +2877,12 @@ static const struct rtw89_chip_ops rtw8852c_chip_ops = {
.stop_sch_tx = rtw89_mac_stop_sch_tx_v1,
.resume_sch_tx = rtw89_mac_resume_sch_tx_v1,
.h2c_dctl_sec_cam = rtw89_fw_h2c_dctl_sec_cam_v1,
+ .h2c_default_cmac_tbl = rtw89_fw_h2c_default_cmac_tbl,
+ .h2c_assoc_cmac_tbl = rtw89_fw_h2c_assoc_cmac_tbl,
+ .h2c_ampdu_cmac_tbl = NULL,
+ .h2c_default_dmac_tbl = NULL,
+ .h2c_update_beacon = rtw89_fw_h2c_update_beacon,
+ .h2c_ba_cam = rtw89_fw_h2c_ba_cam,
.btc_set_rfe = rtw8852c_btc_set_rfe,
.btc_init_cfg = rtw8852c_btc_init_cfg,
@@ -2902,7 +2937,10 @@ const struct rtw89_chip_info rtw8852c_chip_info = {
.support_bands = BIT(NL80211_BAND_2GHZ) |
BIT(NL80211_BAND_5GHZ) |
BIT(NL80211_BAND_6GHZ),
- .support_bw160 = true,
+ .support_bandwidths = BIT(NL80211_CHAN_WIDTH_20) |
+ BIT(NL80211_CHAN_WIDTH_40) |
+ BIT(NL80211_CHAN_WIDTH_80) |
+ BIT(NL80211_CHAN_WIDTH_160),
.support_unii4 = true,
.ul_tb_waveform_ctrl = false,
.ul_tb_pwr_diff = true,
@@ -2959,6 +2997,7 @@ const struct rtw89_chip_info rtw8852c_chip_info = {
.c2h_counter_reg = {R_AX_UDM1 + 1, B_AX_UDM1_HALMAC_C2H_ENQ_CNT_MASK >> 8},
.c2h_regs = rtw8852c_c2h_regs,
.page_regs = &rtw8852c_page_regs,
+ .wow_reason_reg = R_AX_C2HREG_DATA3_V1 + 3,
.cfo_src_fd = false,
.cfo_hw_comp = false,
.dcfo_comp = &rtw8852c_dcfo_comp,
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852ce.c b/drivers/net/wireless/realtek/rtw89/rtw8852ce.c
index 4592de3dbd94..583ea673a4f5 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8852ce.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8852ce.c
@@ -35,6 +35,7 @@ static const struct rtw89_pci_info rtw8852c_pci_info = {
.io_rcy_en = MAC_AX_PCIE_ENABLE,
.io_rcy_tmr = MAC_AX_IO_RCY_ANA_TMR_6MS,
.rx_ring_eq_is_full = false,
+ .check_rx_tag = false,
.init_cfg_reg = R_AX_HAXI_INIT_CFG1,
.txhci_en_bit = B_AX_TXHCI_EN_V1,
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c
index 0e7300cc6d9e..367459bd1345 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c
@@ -2,6 +2,7 @@
/* Copyright(c) 2023 Realtek Corporation
*/
+#include "coex.h"
#include "debug.h"
#include "efuse.h"
#include "fw.h"
@@ -9,12 +10,16 @@
#include "phy.h"
#include "reg.h"
#include "rtw8922a.h"
+#include "rtw8922a_rfk.h"
+#include "util.h"
#define RTW8922A_FW_FORMAT_MAX 0
#define RTW8922A_FW_BASENAME "rtw89/rtw8922a_fw"
#define RTW8922A_MODULE_FIRMWARE \
RTW8922A_FW_BASENAME ".bin"
+#define HE_N_USER_MAX_8922A 4
+
static const struct rtw89_hfc_ch_cfg rtw8922a_hfc_chcfg_pcie[] = {
{2, 1641, grp_0}, /* ACH 0 */
{2, 1641, grp_0}, /* ACH 1 */
@@ -43,6 +48,8 @@ static const struct rtw89_hfc_pub_cfg rtw8922a_hfc_pubcfg_pcie = {
static const struct rtw89_hfc_param_ini rtw8922a_hfc_param_ini_pcie[] = {
[RTW89_QTA_SCC] = {rtw8922a_hfc_chcfg_pcie, &rtw8922a_hfc_pubcfg_pcie,
&rtw89_mac_size.hfc_prec_cfg_c0, RTW89_HCIFC_POH},
+ [RTW89_QTA_DBCC] = {rtw8922a_hfc_chcfg_pcie, &rtw8922a_hfc_pubcfg_pcie,
+ &rtw89_mac_size.hfc_prec_cfg_c0, RTW89_HCIFC_POH},
[RTW89_QTA_DLFW] = {NULL, NULL, &rtw89_mac_size.hfc_prec_cfg_c2,
RTW89_HCIFC_POH},
[RTW89_QTA_INVALID] = {NULL},
@@ -54,6 +61,11 @@ static const struct rtw89_dle_mem rtw8922a_dle_mem_pcie[] = {
&rtw89_mac_size.wde_qt0_v1, &rtw89_mac_size.ple_qt0,
&rtw89_mac_size.ple_qt1, &rtw89_mac_size.ple_rsvd_qt0,
&rtw89_mac_size.rsvd0_size0, &rtw89_mac_size.rsvd1_size0},
+ [RTW89_QTA_DBCC] = {RTW89_QTA_DBCC, &rtw89_mac_size.wde_size0_v1,
+ &rtw89_mac_size.ple_size0_v1, &rtw89_mac_size.wde_qt0_v1,
+ &rtw89_mac_size.wde_qt0_v1, &rtw89_mac_size.ple_qt0,
+ &rtw89_mac_size.ple_qt1, &rtw89_mac_size.ple_rsvd_qt0,
+ &rtw89_mac_size.rsvd0_size0, &rtw89_mac_size.rsvd1_size0},
[RTW89_QTA_DLFW] = {RTW89_QTA_DLFW, &rtw89_mac_size.wde_size4_v1,
&rtw89_mac_size.ple_size3_v1, &rtw89_mac_size.wde_qt4,
&rtw89_mac_size.wde_qt4, &rtw89_mac_size.ple_qt9,
@@ -63,6 +75,31 @@ static const struct rtw89_dle_mem rtw8922a_dle_mem_pcie[] = {
NULL},
};
+static const u32 rtw8922a_h2c_regs[RTW89_H2CREG_MAX] = {
+ R_BE_H2CREG_DATA0, R_BE_H2CREG_DATA1, R_BE_H2CREG_DATA2,
+ R_BE_H2CREG_DATA3
+};
+
+static const u32 rtw8922a_c2h_regs[RTW89_H2CREG_MAX] = {
+ R_BE_C2HREG_DATA0, R_BE_C2HREG_DATA1, R_BE_C2HREG_DATA2,
+ R_BE_C2HREG_DATA3
+};
+
+static const struct rtw89_page_regs rtw8922a_page_regs = {
+ .hci_fc_ctrl = R_BE_HCI_FC_CTRL,
+ .ch_page_ctrl = R_BE_CH_PAGE_CTRL,
+ .ach_page_ctrl = R_BE_CH0_PAGE_CTRL,
+ .ach_page_info = R_BE_CH0_PAGE_INFO,
+ .pub_page_info3 = R_BE_PUB_PAGE_INFO3,
+ .pub_page_ctrl1 = R_BE_PUB_PAGE_CTRL1,
+ .pub_page_ctrl2 = R_BE_PUB_PAGE_CTRL2,
+ .pub_page_info1 = R_BE_PUB_PAGE_INFO1,
+ .pub_page_info2 = R_BE_PUB_PAGE_INFO2,
+ .wp_page_ctrl1 = R_BE_WP_PAGE_CTRL1,
+ .wp_page_ctrl2 = R_BE_WP_PAGE_CTRL2,
+ .wp_page_info1 = R_BE_WP_PAGE_INFO1,
+};
+
static const struct rtw89_reg_imr rtw8922a_imr_dmac_regs[] = {
{R_BE_DISP_HOST_IMR, B_BE_DISP_HOST_IMR_CLR, B_BE_DISP_HOST_IMR_SET},
{R_BE_DISP_CPU_IMR, B_BE_DISP_CPU_IMR_CLR, B_BE_DISP_CPU_IMR_SET},
@@ -119,6 +156,51 @@ static const struct rtw89_imr_table rtw8922a_imr_cmac_table = {
.n_regs = ARRAY_SIZE(rtw8922a_imr_cmac_regs),
};
+static const struct rtw89_rrsr_cfgs rtw8922a_rrsr_cfgs = {
+ .ref_rate = {R_BE_TRXPTCL_RESP_1, B_BE_WMAC_RESP_REF_RATE_SEL, 0},
+ .rsc = {R_BE_PTCL_RRSR1, B_BE_RSC_MASK, 2},
+};
+
+static const struct rtw89_dig_regs rtw8922a_dig_regs = {
+ .seg0_pd_reg = R_SEG0R_PD_V2,
+ .pd_lower_bound_mask = B_SEG0R_PD_LOWER_BOUND_MSK,
+ .pd_spatial_reuse_en = B_SEG0R_PD_SPATIAL_REUSE_EN_MSK_V1,
+ .bmode_pd_reg = R_BMODE_PDTH_EN_V2,
+ .bmode_cca_rssi_limit_en = B_BMODE_PDTH_LIMIT_EN_MSK_V1,
+ .bmode_pd_lower_bound_reg = R_BMODE_PDTH_V2,
+ .bmode_rssi_nocca_low_th_mask = B_BMODE_PDTH_LOWER_BOUND_MSK_V1,
+ .p0_lna_init = {R_PATH0_LNA_INIT_V1, B_PATH0_LNA_INIT_IDX_MSK},
+ .p1_lna_init = {R_PATH1_LNA_INIT_V1, B_PATH1_LNA_INIT_IDX_MSK},
+ .p0_tia_init = {R_PATH0_TIA_INIT_V1, B_PATH0_TIA_INIT_IDX_MSK_V1},
+ .p1_tia_init = {R_PATH1_TIA_INIT_V1, B_PATH1_TIA_INIT_IDX_MSK_V1},
+ .p0_rxb_init = {R_PATH0_RXB_INIT_V1, B_PATH0_RXB_INIT_IDX_MSK_V1},
+ .p1_rxb_init = {R_PATH1_RXB_INIT_V1, B_PATH1_RXB_INIT_IDX_MSK_V1},
+ .p0_p20_pagcugc_en = {R_PATH0_P20_FOLLOW_BY_PAGCUGC_V3,
+ B_PATH0_P20_FOLLOW_BY_PAGCUGC_EN_MSK},
+ .p0_s20_pagcugc_en = {R_PATH0_S20_FOLLOW_BY_PAGCUGC_V3,
+ B_PATH0_S20_FOLLOW_BY_PAGCUGC_EN_MSK},
+ .p1_p20_pagcugc_en = {R_PATH1_P20_FOLLOW_BY_PAGCUGC_V3,
+ B_PATH1_P20_FOLLOW_BY_PAGCUGC_EN_MSK},
+ .p1_s20_pagcugc_en = {R_PATH1_S20_FOLLOW_BY_PAGCUGC_V3,
+ B_PATH1_S20_FOLLOW_BY_PAGCUGC_EN_MSK},
+};
+
+static const struct rtw89_edcca_regs rtw8922a_edcca_regs = {
+ .edcca_level = R_SEG0R_EDCCA_LVL_BE,
+ .edcca_mask = B_EDCCA_LVL_MSK0,
+ .edcca_p_mask = B_EDCCA_LVL_MSK1,
+ .ppdu_level = R_SEG0R_PPDU_LVL_BE,
+ .ppdu_mask = B_EDCCA_LVL_MSK1,
+ .rpt_a = R_EDCCA_RPT_A_BE,
+ .rpt_b = R_EDCCA_RPT_B_BE,
+ .rpt_sel = R_EDCCA_RPT_SEL_BE,
+ .rpt_sel_mask = B_EDCCA_RPT_SEL_MSK,
+ .rpt_sel_be = R_EDCCA_RPTREG_SEL_BE,
+ .rpt_sel_be_mask = B_EDCCA_RPTREG_SEL_BE_MSK,
+ .tx_collision_t2r_st = R_TX_COLLISION_T2R_ST_BE,
+ .tx_collision_t2r_st_mask = B_TX_COLLISION_T2R_ST_BE_M,
+};
+
static const struct rtw89_efuse_block_cfg rtw8922a_efuse_blocks[] = {
[RTW89_EFUSE_BLOCK_SYS] = {.offset = 0x00000, .size = 0x310},
[RTW89_EFUSE_BLOCK_RF] = {.offset = 0x10000, .size = 0x240},
@@ -130,6 +212,36 @@ static const struct rtw89_efuse_block_cfg rtw8922a_efuse_blocks[] = {
[RTW89_EFUSE_BLOCK_ADIE] = {.offset = 0x70000, .size = 0x10},
};
+static void rtw8922a_ctrl_btg_bt_rx(struct rtw89_dev *rtwdev, bool en,
+ enum rtw89_phy_idx phy_idx)
+{
+ if (en) {
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_A, B_BT_SHARE_A, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_A, B_BTG_PATH_A, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_B, B_BT_SHARE_B, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_B, B_BTG_PATH_B, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_LNA_OP, B_LNA6, 0x20, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_LNA_TIA, B_TIA0_B, 0x30, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PMAC_GNT, B_PMAC_GNT_P1, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_ANT_BT_SHARE, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_RX_BT_SG0, 0x2, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GNT_BT_WGT_EN, B_GNT_BT_WGT_EN,
+ 0x1, phy_idx);
+ } else {
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_A, B_BT_SHARE_A, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_A, B_BTG_PATH_A, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_B, B_BT_SHARE_B, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_B, B_BTG_PATH_B, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_LNA_OP, B_LNA6, 0x1a, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_LNA_TIA, B_TIA0_B, 0x2a, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PMAC_GNT, B_PMAC_GNT_P1, 0xc, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_ANT_BT_SHARE, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_RX_BT_SG0, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GNT_BT_WGT_EN, B_GNT_BT_WGT_EN,
+ 0x0, phy_idx);
+ }
+}
+
static int rtw8922a_pwr_on_func(struct rtw89_dev *rtwdev)
{
struct rtw89_hal *hal = &rtwdev->hal;
@@ -273,6 +385,9 @@ static int rtw8922a_pwr_on_func(struct rtw89_dev *rtwdev)
rtw89_write32_set(rtwdev, R_BE_FEN_RST_ENABLE, B_BE_FEN_BB_IP_RSTN |
B_BE_FEN_BBPLAT_RSTB);
+ if (!test_bit(RTW89_FLAG_PROBE_DONE, rtwdev->flags))
+ rtw89_efuse_read_fw_secure_be(rtwdev);
+
return 0;
}
@@ -574,6 +689,32 @@ static void rtw8922a_phycap_parsing_pa_bias_trim(struct rtw89_dev *rtwdev,
}
}
+static void rtw8922a_pa_bias_trim(struct rtw89_dev *rtwdev)
+{
+ struct rtw89_power_trim_info *info = &rtwdev->pwr_trim;
+ u8 pabias_2g, pabias_5g;
+ u8 i;
+
+ if (!info->pg_pa_bias_trim) {
+ rtw89_debug(rtwdev, RTW89_DBG_RFK,
+ "[PA_BIAS][TRIM] no PG, do nothing\n");
+
+ return;
+ }
+
+ for (i = 0; i < RF_PATH_NUM_8922A; i++) {
+ pabias_2g = FIELD_GET(GENMASK(3, 0), info->pa_bias_trim[i]);
+ pabias_5g = FIELD_GET(GENMASK(7, 4), info->pa_bias_trim[i]);
+
+ rtw89_debug(rtwdev, RTW89_DBG_RFK,
+ "[PA_BIAS][TRIM] path=%d 2G=0x%x 5G=0x%x\n",
+ i, pabias_2g, pabias_5g);
+
+ rtw89_write_rf(rtwdev, i, RR_BIASA, RR_BIASA_TXG_V1, pabias_2g);
+ rtw89_write_rf(rtwdev, i, RR_BIASA, RR_BIASA_TXA_V1, pabias_5g);
+ }
+}
+
static void rtw8922a_phycap_parsing_pad_bias_trim(struct rtw89_dev *rtwdev,
u8 *phycap_map)
{
@@ -591,6 +732,31 @@ static void rtw8922a_phycap_parsing_pad_bias_trim(struct rtw89_dev *rtwdev,
}
}
+static void rtw8922a_pad_bias_trim(struct rtw89_dev *rtwdev)
+{
+ struct rtw89_power_trim_info *info = &rtwdev->pwr_trim;
+ u8 pad_bias_2g, pad_bias_5g;
+ u8 i;
+
+ if (!info->pg_pa_bias_trim) {
+ rtw89_debug(rtwdev, RTW89_DBG_RFK,
+ "[PAD_BIAS][TRIM] no PG, do nothing\n");
+ return;
+ }
+
+ for (i = 0; i < RF_PATH_NUM_8922A; i++) {
+ pad_bias_2g = u8_get_bits(info->pad_bias_trim[i], GENMASK(3, 0));
+ pad_bias_5g = u8_get_bits(info->pad_bias_trim[i], GENMASK(7, 4));
+
+ rtw89_debug(rtwdev, RTW89_DBG_RFK,
+ "[PAD_BIAS][TRIM] path=%d 2G=0x%x 5G=0x%x\n",
+ i, pad_bias_2g, pad_bias_5g);
+
+ rtw89_write_rf(rtwdev, i, RR_BIASA, RR_BIASD_TXG_V1, pad_bias_2g);
+ rtw89_write_rf(rtwdev, i, RR_BIASA, RR_BIASD_TXA_V1, pad_bias_5g);
+ }
+}
+
static int rtw8922a_read_phycap(struct rtw89_dev *rtwdev, u8 *phycap_map)
{
rtw8922a_phycap_parsing_thermal_trim(rtwdev, phycap_map);
@@ -600,6 +766,1547 @@ static int rtw8922a_read_phycap(struct rtw89_dev *rtwdev, u8 *phycap_map)
return 0;
}
+static void rtw8922a_power_trim(struct rtw89_dev *rtwdev)
+{
+ rtw8922a_pa_bias_trim(rtwdev);
+ rtw8922a_pad_bias_trim(rtwdev);
+}
+
+static void rtw8922a_set_channel_mac(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ u8 mac_idx)
+{
+ u32 sub_carr = rtw89_mac_reg_by_idx(rtwdev, R_BE_TX_SUB_BAND_VALUE, mac_idx);
+ u32 chk_rate = rtw89_mac_reg_by_idx(rtwdev, R_BE_TXRATE_CHK, mac_idx);
+ u32 rf_mod = rtw89_mac_reg_by_idx(rtwdev, R_BE_WMAC_RFMOD, mac_idx);
+ u8 txsb20 = 0, txsb40 = 0, txsb80 = 0;
+ u8 rf_mod_val, chk_rate_mask;
+ u32 txsb;
+ u32 reg;
+
+ switch (chan->band_width) {
+ case RTW89_CHANNEL_WIDTH_160:
+ txsb80 = rtw89_phy_get_txsb(rtwdev, chan, RTW89_CHANNEL_WIDTH_80);
+ fallthrough;
+ case RTW89_CHANNEL_WIDTH_80:
+ txsb40 = rtw89_phy_get_txsb(rtwdev, chan, RTW89_CHANNEL_WIDTH_40);
+ fallthrough;
+ case RTW89_CHANNEL_WIDTH_40:
+ txsb20 = rtw89_phy_get_txsb(rtwdev, chan, RTW89_CHANNEL_WIDTH_20);
+ break;
+ default:
+ break;
+ }
+
+ switch (chan->band_width) {
+ case RTW89_CHANNEL_WIDTH_160:
+ rf_mod_val = BE_WMAC_RFMOD_160M;
+ txsb = u32_encode_bits(txsb20, B_BE_TXSB_20M_MASK) |
+ u32_encode_bits(txsb40, B_BE_TXSB_40M_MASK) |
+ u32_encode_bits(txsb80, B_BE_TXSB_80M_MASK);
+ break;
+ case RTW89_CHANNEL_WIDTH_80:
+ rf_mod_val = BE_WMAC_RFMOD_80M;
+ txsb = u32_encode_bits(txsb20, B_BE_TXSB_20M_MASK) |
+ u32_encode_bits(txsb40, B_BE_TXSB_40M_MASK);
+ break;
+ case RTW89_CHANNEL_WIDTH_40:
+ rf_mod_val = BE_WMAC_RFMOD_40M;
+ txsb = u32_encode_bits(txsb20, B_BE_TXSB_20M_MASK);
+ break;
+ case RTW89_CHANNEL_WIDTH_20:
+ default:
+ rf_mod_val = BE_WMAC_RFMOD_20M;
+ txsb = 0;
+ break;
+ }
+
+ if (txsb20 <= BE_PRI20_BITMAP_MAX)
+ txsb |= u32_encode_bits(BIT(txsb20), B_BE_PRI20_BITMAP_MASK);
+
+ rtw89_write8_mask(rtwdev, rf_mod, B_BE_WMAC_RFMOD_MASK, rf_mod_val);
+ rtw89_write32(rtwdev, sub_carr, txsb);
+
+ switch (chan->band_type) {
+ case RTW89_BAND_2G:
+ chk_rate_mask = B_BE_BAND_MODE;
+ break;
+ case RTW89_BAND_5G:
+ case RTW89_BAND_6G:
+ chk_rate_mask = B_BE_CHECK_CCK_EN | B_BE_RTS_LIMIT_IN_OFDM6;
+ break;
+ default:
+ rtw89_warn(rtwdev, "Invalid band_type:%d\n", chan->band_type);
+ return;
+ }
+
+ rtw89_write8_clr(rtwdev, chk_rate, B_BE_BAND_MODE | B_BE_CHECK_CCK_EN |
+ B_BE_RTS_LIMIT_IN_OFDM6);
+ rtw89_write8_set(rtwdev, chk_rate, chk_rate_mask);
+
+ switch (chan->band_width) {
+ case RTW89_CHANNEL_WIDTH_320:
+ case RTW89_CHANNEL_WIDTH_160:
+ case RTW89_CHANNEL_WIDTH_80:
+ case RTW89_CHANNEL_WIDTH_40:
+ reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_PREBKF_CFG_1, mac_idx);
+ rtw89_write32_mask(rtwdev, reg, B_BE_SIFS_MACTXEN_T1_MASK, 0x41);
+ reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_MUEDCA_EN, mac_idx);
+ rtw89_write32_mask(rtwdev, reg, B_BE_SIFS_MACTXEN_TB_T1_MASK, 0x41);
+ break;
+ default:
+ reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_PREBKF_CFG_1, mac_idx);
+ rtw89_write32_mask(rtwdev, reg, B_BE_SIFS_MACTXEN_T1_MASK, 0x3f);
+ reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_MUEDCA_EN, mac_idx);
+ rtw89_write32_mask(rtwdev, reg, B_BE_SIFS_MACTXEN_TB_T1_MASK, 0x3e);
+ break;
+ }
+}
+
+static const u32 rtw8922a_sco_barker_threshold[14] = {
+ 0x1fe4f, 0x1ff5e, 0x2006c, 0x2017b, 0x2028a, 0x20399, 0x204a8, 0x205b6,
+ 0x206c5, 0x207d4, 0x208e3, 0x209f2, 0x20b00, 0x20d8a
+};
+
+static const u32 rtw8922a_sco_cck_threshold[14] = {
+ 0x2bdac, 0x2bf21, 0x2c095, 0x2c209, 0x2c37e, 0x2c4f2, 0x2c666, 0x2c7db,
+ 0x2c94f, 0x2cac3, 0x2cc38, 0x2cdac, 0x2cf21, 0x2d29e
+};
+
+static int rtw8922a_ctrl_sco_cck(struct rtw89_dev *rtwdev,
+ u8 primary_ch, enum rtw89_bandwidth bw,
+ enum rtw89_phy_idx phy_idx)
+{
+ u8 ch_element;
+
+ if (primary_ch >= 14)
+ return -EINVAL;
+
+ ch_element = primary_ch - 1;
+
+ rtw89_phy_write32_idx(rtwdev, R_BK_FC0INV, B_BK_FC0INV,
+ rtw8922a_sco_barker_threshold[ch_element],
+ phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_CCK_FC0INV, B_CCK_FC0INV,
+ rtw8922a_sco_cck_threshold[ch_element],
+ phy_idx);
+
+ return 0;
+}
+
+struct rtw8922a_bb_gain {
+ u32 gain_g[BB_PATH_NUM_8922A];
+ u32 gain_a[BB_PATH_NUM_8922A];
+ u32 gain_g_mask;
+ u32 gain_a_mask;
+};
+
+static const struct rtw89_reg_def rpl_comp_bw160[RTW89_BW20_SC_160M] = {
+ { .addr = 0x41E8, .mask = 0xFF00},
+ { .addr = 0x41E8, .mask = 0xFF0000},
+ { .addr = 0x41E8, .mask = 0xFF000000},
+ { .addr = 0x41EC, .mask = 0xFF},
+ { .addr = 0x41EC, .mask = 0xFF00},
+ { .addr = 0x41EC, .mask = 0xFF0000},
+ { .addr = 0x41EC, .mask = 0xFF000000},
+ { .addr = 0x41F0, .mask = 0xFF}
+};
+
+static const struct rtw89_reg_def rpl_comp_bw80[RTW89_BW20_SC_80M] = {
+ { .addr = 0x41F4, .mask = 0xFF},
+ { .addr = 0x41F4, .mask = 0xFF00},
+ { .addr = 0x41F4, .mask = 0xFF0000},
+ { .addr = 0x41F4, .mask = 0xFF000000}
+};
+
+static const struct rtw89_reg_def rpl_comp_bw40[RTW89_BW20_SC_40M] = {
+ { .addr = 0x41F0, .mask = 0xFF0000},
+ { .addr = 0x41F0, .mask = 0xFF000000}
+};
+
+static const struct rtw89_reg_def rpl_comp_bw20[RTW89_BW20_SC_20M] = {
+ { .addr = 0x41F0, .mask = 0xFF00}
+};
+
+static const struct rtw8922a_bb_gain bb_gain_lna[LNA_GAIN_NUM] = {
+ { .gain_g = {0x409c, 0x449c}, .gain_a = {0x406C, 0x446C},
+ .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF},
+ { .gain_g = {0x409c, 0x449c}, .gain_a = {0x406C, 0x446C},
+ .gain_g_mask = 0xFF000000, .gain_a_mask = 0xFF0000},
+ { .gain_g = {0x40a0, 0x44a0}, .gain_a = {0x4070, 0x4470},
+ .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF},
+ { .gain_g = {0x40a0, 0x44a0}, .gain_a = {0x4070, 0x4470},
+ .gain_g_mask = 0xFF000000, .gain_a_mask = 0xFF0000},
+ { .gain_g = {0x40a4, 0x44a4}, .gain_a = {0x4074, 0x4474},
+ .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF},
+ { .gain_g = {0x40a4, 0x44a4}, .gain_a = {0x4074, 0x4474},
+ .gain_g_mask = 0xFF000000, .gain_a_mask = 0xFF0000},
+ { .gain_g = {0x40a8, 0x44a8}, .gain_a = {0x4078, 0x4478},
+ .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF},
+};
+
+static const struct rtw8922a_bb_gain bb_gain_tia[TIA_GAIN_NUM] = {
+ { .gain_g = {0x4054, 0x4454}, .gain_a = {0x4054, 0x4454},
+ .gain_g_mask = 0x7FC0000, .gain_a_mask = 0x1FF},
+ { .gain_g = {0x4058, 0x4458}, .gain_a = {0x4054, 0x4454},
+ .gain_g_mask = 0x1FF, .gain_a_mask = 0x3FE00 },
+};
+
+struct rtw8922a_bb_gain_bypass {
+ u32 gain_g[BB_PATH_NUM_8922A];
+ u32 gain_a[BB_PATH_NUM_8922A];
+ u32 gain_mask_g;
+ u32 gain_mask_a;
+};
+
+static void rtw8922a_set_rpl_gain(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_rf_path path,
+ enum rtw89_phy_idx phy_idx)
+{
+ const struct rtw89_phy_bb_gain_info_be *gain = &rtwdev->bb_gain.be;
+ u8 gain_band = rtw89_subband_to_gain_band_be(chan->subband_type);
+ u32 reg_path_ofst = 0;
+ u32 mask;
+ s32 val;
+ u32 reg;
+ int i;
+
+ if (path == RF_PATH_B)
+ reg_path_ofst = 0x400;
+
+ for (i = 0; i < RTW89_BW20_SC_160M; i++) {
+ reg = rpl_comp_bw160[i].addr | reg_path_ofst;
+ mask = rpl_comp_bw160[i].mask;
+ val = gain->rpl_ofst_160[gain_band][path][i];
+ rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx);
+ }
+
+ for (i = 0; i < RTW89_BW20_SC_80M; i++) {
+ reg = rpl_comp_bw80[i].addr | reg_path_ofst;
+ mask = rpl_comp_bw80[i].mask;
+ val = gain->rpl_ofst_80[gain_band][path][i];
+ rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx);
+ }
+
+ for (i = 0; i < RTW89_BW20_SC_40M; i++) {
+ reg = rpl_comp_bw40[i].addr | reg_path_ofst;
+ mask = rpl_comp_bw40[i].mask;
+ val = gain->rpl_ofst_40[gain_band][path][i];
+ rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx);
+ }
+
+ for (i = 0; i < RTW89_BW20_SC_20M; i++) {
+ reg = rpl_comp_bw20[i].addr | reg_path_ofst;
+ mask = rpl_comp_bw20[i].mask;
+ val = gain->rpl_ofst_20[gain_band][path][i];
+ rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx);
+ }
+}
+
+static void rtw8922a_set_lna_tia_gain(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_rf_path path,
+ enum rtw89_phy_idx phy_idx)
+{
+ const struct rtw89_phy_bb_gain_info_be *gain = &rtwdev->bb_gain.be;
+ u8 gain_band = rtw89_subband_to_gain_band_be(chan->subband_type);
+ enum rtw89_phy_bb_bw_be bw_type;
+ s32 val;
+ u32 reg;
+ u32 mask;
+ int i;
+
+ bw_type = chan->band_width <= RTW89_CHANNEL_WIDTH_40 ?
+ RTW89_BB_BW_20_40 : RTW89_BB_BW_80_160_320;
+
+ for (i = 0; i < LNA_GAIN_NUM; i++) {
+ if (chan->band_type == RTW89_BAND_2G) {
+ reg = bb_gain_lna[i].gain_g[path];
+ mask = bb_gain_lna[i].gain_g_mask;
+ } else {
+ reg = bb_gain_lna[i].gain_a[path];
+ mask = bb_gain_lna[i].gain_a_mask;
+ }
+ val = gain->lna_gain[gain_band][bw_type][path][i];
+ rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx);
+ }
+
+ for (i = 0; i < TIA_GAIN_NUM; i++) {
+ if (chan->band_type == RTW89_BAND_2G) {
+ reg = bb_gain_tia[i].gain_g[path];
+ mask = bb_gain_tia[i].gain_g_mask;
+ } else {
+ reg = bb_gain_tia[i].gain_a[path];
+ mask = bb_gain_tia[i].gain_a_mask;
+ }
+ val = gain->tia_gain[gain_band][bw_type][path][i];
+ rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx);
+ }
+}
+
+static void rtw8922a_set_gain(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_rf_path path,
+ enum rtw89_phy_idx phy_idx)
+{
+ rtw8922a_set_lna_tia_gain(rtwdev, chan, path, phy_idx);
+ rtw8922a_set_rpl_gain(rtwdev, chan, path, phy_idx);
+}
+
+static void rtw8922a_set_rx_gain_normal_cck(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_rf_path path)
+{
+ struct rtw89_phy_efuse_gain *gain = &rtwdev->efuse_gain;
+ s8 value = -gain->offset[path][RTW89_GAIN_OFFSET_2G_CCK]; /* S(8,2) */
+ u8 fraction = value & 0x3;
+
+ if (fraction) {
+ rtw89_phy_write32_mask(rtwdev, R_MGAIN_BIAS, B_MGAIN_BIAS_BW20,
+ (0x4 - fraction) << 1);
+ rtw89_phy_write32_mask(rtwdev, R_MGAIN_BIAS, B_MGAIN_BIAS_BW40,
+ (0x4 - fraction) << 1);
+
+ value >>= 2;
+ rtw89_phy_write32_mask(rtwdev, R_CCK_RPL_OFST, B_CCK_RPL_OFST,
+ value + 1 + 0xdc);
+ } else {
+ rtw89_phy_write32_mask(rtwdev, R_MGAIN_BIAS, B_MGAIN_BIAS_BW20, 0);
+ rtw89_phy_write32_mask(rtwdev, R_MGAIN_BIAS, B_MGAIN_BIAS_BW40, 0);
+
+ value >>= 2;
+ rtw89_phy_write32_mask(rtwdev, R_CCK_RPL_OFST, B_CCK_RPL_OFST,
+ value + 0xdc);
+ }
+}
+
+static void rtw8922a_set_rx_gain_normal_ofdm(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_rf_path path)
+{
+ static const u32 rssi_tb_bias_comp[2] = {0x41f8, 0x45f8};
+ static const u32 rssi_tb_ext_comp[2] = {0x4208, 0x4608};
+ static const u32 rssi_ofst_addr[2] = {0x40c8, 0x44c8};
+ static const u32 rpl_bias_comp[2] = {0x41e8, 0x45e8};
+ static const u32 rpl_ext_comp[2] = {0x41f8, 0x45f8};
+ struct rtw89_phy_efuse_gain *gain = &rtwdev->efuse_gain;
+ enum rtw89_gain_offset gain_band;
+ s8 v1, v2, v3;
+ s32 value;
+
+ gain_band = rtw89_subband_to_gain_offset_band_of_ofdm(chan->subband_type);
+ value = gain->offset[path][gain_band];
+ rtw89_phy_write32_mask(rtwdev, rssi_ofst_addr[path], 0xff000000, value + 0xF8);
+
+ value *= -4;
+ v1 = clamp_t(s32, value, S8_MIN, S8_MAX);
+ value -= v1;
+ v2 = clamp_t(s32, value, S8_MIN, S8_MAX);
+ value -= v2;
+ v3 = clamp_t(s32, value, S8_MIN, S8_MAX);
+
+ rtw89_phy_write32_mask(rtwdev, rpl_bias_comp[path], 0xff, v1);
+ rtw89_phy_write32_mask(rtwdev, rpl_ext_comp[path], 0xff, v2);
+ rtw89_phy_write32_mask(rtwdev, rpl_ext_comp[path], 0xff00, v3);
+
+ rtw89_phy_write32_mask(rtwdev, rssi_tb_bias_comp[path], 0xff0000, v1);
+ rtw89_phy_write32_mask(rtwdev, rssi_tb_ext_comp[path], 0xff0000, v2);
+ rtw89_phy_write32_mask(rtwdev, rssi_tb_ext_comp[path], 0xff000000, v3);
+}
+
+static void rtw8922a_set_rx_gain_normal(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_rf_path path)
+{
+ struct rtw89_phy_efuse_gain *gain = &rtwdev->efuse_gain;
+
+ if (!gain->offset_valid)
+ return;
+
+ if (chan->band_type == RTW89_BAND_2G)
+ rtw8922a_set_rx_gain_normal_cck(rtwdev, chan, path);
+
+ rtw8922a_set_rx_gain_normal_ofdm(rtwdev, chan, path);
+}
+
+static void rtw8922a_set_cck_parameters(struct rtw89_dev *rtwdev, u8 central_ch,
+ enum rtw89_phy_idx phy_idx)
+{
+ if (central_ch == 14) {
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFF01, B_PCOEFF01, 0x3b13ff, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFF23, B_PCOEFF23, 0x1c42de, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFF45, B_PCOEFF45, 0xfdb0ad, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFF67, B_PCOEFF67, 0xf60f6e, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFF89, B_PCOEFF89, 0xfd8f92, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFFAB, B_PCOEFFAB, 0x02d011, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFFCD, B_PCOEFFCD, 0x01c02c, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFFEF, B_PCOEFFEF, 0xfff00a, phy_idx);
+ } else {
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFF01, B_PCOEFF01, 0x3a63ca, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFF23, B_PCOEFF23, 0x2a833f, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFF45, B_PCOEFF45, 0x1491f8, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFF67, B_PCOEFF67, 0x03c0b0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFF89, B_PCOEFF89, 0xfccff1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFFAB, B_PCOEFFAB, 0xfccfc3, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFFCD, B_PCOEFFCD, 0xfebfdc, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PCOEFFEF, B_PCOEFFEF, 0xffdff7, phy_idx);
+ }
+}
+
+static void rtw8922a_ctrl_ch(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_phy_idx phy_idx)
+{
+ static const u32 band_sel[2] = {0x4160, 0x4560};
+ u16 central_freq = chan->freq;
+ u8 central_ch = chan->channel;
+ u8 band = chan->band_type;
+ bool is_2g = band == RTW89_BAND_2G;
+ u8 chan_idx;
+ u8 path;
+ u8 sco;
+
+ if (!central_freq) {
+ rtw89_warn(rtwdev, "Invalid central_freq\n");
+ return;
+ }
+
+ rtw8922a_set_gain(rtwdev, chan, RF_PATH_A, phy_idx);
+ rtw8922a_set_gain(rtwdev, chan, RF_PATH_B, phy_idx);
+
+ for (path = RF_PATH_A; path < BB_PATH_NUM_8922A; path++)
+ rtw89_phy_write32_idx(rtwdev, band_sel[path], BIT((26)), is_2g, phy_idx);
+
+ rtw8922a_set_rx_gain_normal(rtwdev, chan, RF_PATH_A);
+ rtw8922a_set_rx_gain_normal(rtwdev, chan, RF_PATH_B);
+
+ rtw89_phy_write32_idx(rtwdev, R_FC0, B_FC0, central_freq, phy_idx);
+ sco = DIV_ROUND_CLOSEST(1 << 18, central_freq);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_FC0_INV, sco, phy_idx);
+
+ if (band == RTW89_BAND_2G)
+ rtw8922a_set_cck_parameters(rtwdev, central_ch, phy_idx);
+
+ chan_idx = rtw89_encode_chan_idx(rtwdev, chan->primary_channel, band);
+ rtw89_phy_write32_idx(rtwdev, R_MAC_PIN_SEL, B_CH_IDX_SEG0, chan_idx, phy_idx);
+}
+
+static void
+rtw8922a_ctrl_bw(struct rtw89_dev *rtwdev, u8 pri_sb, u8 bw,
+ enum rtw89_phy_idx phy_idx)
+{
+ switch (bw) {
+ case RTW89_CHANNEL_WIDTH_5:
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_BW, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_SMALLBW, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_PRICH, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_DAC_CLK, B_DAC_CLK, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP0, B_GAIN_MAP0_EN, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP1, B_GAIN_MAP1_EN, 0x0, phy_idx);
+ break;
+ case RTW89_CHANNEL_WIDTH_10:
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_BW, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_SMALLBW, 0x2, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_PRICH, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_DAC_CLK, B_DAC_CLK, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP0, B_GAIN_MAP0_EN, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP1, B_GAIN_MAP1_EN, 0x0, phy_idx);
+ break;
+ case RTW89_CHANNEL_WIDTH_20:
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_BW, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_SMALLBW, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_PRICH, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_DAC_CLK, B_DAC_CLK, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP0, B_GAIN_MAP0_EN, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP1, B_GAIN_MAP1_EN, 0x0, phy_idx);
+ break;
+ case RTW89_CHANNEL_WIDTH_40:
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_BW, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_SMALLBW, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_PRICH, pri_sb, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_DAC_CLK, B_DAC_CLK, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP0, B_GAIN_MAP0_EN, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP1, B_GAIN_MAP1_EN, 0x0, phy_idx);
+ break;
+ case RTW89_CHANNEL_WIDTH_80:
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_BW, 0x2, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_SMALLBW, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_PRICH, pri_sb, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_DAC_CLK, B_DAC_CLK, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP0, B_GAIN_MAP0_EN, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP1, B_GAIN_MAP1_EN, 0x1, phy_idx);
+ break;
+ case RTW89_CHANNEL_WIDTH_160:
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_BW, 0x3, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_SMALLBW, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_CHBW_PRICH, pri_sb, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_DAC_CLK, B_DAC_CLK, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP0, B_GAIN_MAP0_EN, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_GAIN_MAP1, B_GAIN_MAP1_EN, 0x1, phy_idx);
+ break;
+ default:
+ rtw89_warn(rtwdev, "Fail to switch bw (bw:%d, pri_sb:%d)\n", bw,
+ pri_sb);
+ break;
+ }
+
+ if (bw == RTW89_CHANNEL_WIDTH_40)
+ rtw89_phy_write32_idx(rtwdev, R_FC0, B_BW40_2XFFT, 1, phy_idx);
+ else
+ rtw89_phy_write32_idx(rtwdev, R_FC0, B_BW40_2XFFT, 0, phy_idx);
+}
+
+static u32 rtw8922a_spur_freq(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan)
+{
+ return 0;
+}
+
+#define CARRIER_SPACING_312_5 312500 /* 312.5 kHz */
+#define CARRIER_SPACING_78_125 78125 /* 78.125 kHz */
+#define MAX_TONE_NUM 2048
+
+static void rtw8922a_set_csi_tone_idx(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_phy_idx phy_idx)
+{
+ s32 freq_diff, csi_idx, csi_tone_idx;
+ u32 spur_freq;
+
+ spur_freq = rtw8922a_spur_freq(rtwdev, chan);
+ if (spur_freq == 0) {
+ rtw89_phy_write32_idx(rtwdev, R_S0S1_CSI_WGT, B_S0S1_CSI_WGT_EN,
+ 0, phy_idx);
+ return;
+ }
+
+ freq_diff = (spur_freq - chan->freq) * 1000000;
+ csi_idx = s32_div_u32_round_closest(freq_diff, CARRIER_SPACING_78_125);
+ s32_div_u32_round_down(csi_idx, MAX_TONE_NUM, &csi_tone_idx);
+
+ rtw89_phy_write32_idx(rtwdev, R_S0S1_CSI_WGT, B_S0S1_CSI_WGT_TONE_IDX,
+ csi_tone_idx, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_S0S1_CSI_WGT, B_S0S1_CSI_WGT_EN, 1, phy_idx);
+}
+
+static const struct rtw89_nbi_reg_def rtw8922a_nbi_reg_def[] = {
+ [RF_PATH_A] = {
+ .notch1_idx = {0x41a0, 0xFF},
+ .notch1_frac_idx = {0x41a0, 0xC00},
+ .notch1_en = {0x41a0, 0x1000},
+ .notch2_idx = {0x41ac, 0xFF},
+ .notch2_frac_idx = {0x41ac, 0xC00},
+ .notch2_en = {0x41ac, 0x1000},
+ },
+ [RF_PATH_B] = {
+ .notch1_idx = {0x45a0, 0xFF},
+ .notch1_frac_idx = {0x45a0, 0xC00},
+ .notch1_en = {0x45a0, 0x1000},
+ .notch2_idx = {0x45ac, 0xFF},
+ .notch2_frac_idx = {0x45ac, 0xC00},
+ .notch2_en = {0x45ac, 0x1000},
+ },
+};
+
+static void rtw8922a_set_nbi_tone_idx(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_rf_path path,
+ enum rtw89_phy_idx phy_idx)
+{
+ const struct rtw89_nbi_reg_def *nbi = &rtw8922a_nbi_reg_def[path];
+ s32 nbi_frac_idx, nbi_frac_tone_idx;
+ s32 nbi_idx, nbi_tone_idx;
+ bool notch2_chk = false;
+ u32 spur_freq, fc;
+ s32 freq_diff;
+
+ spur_freq = rtw8922a_spur_freq(rtwdev, chan);
+ if (spur_freq == 0) {
+ rtw89_phy_write32_idx(rtwdev, nbi->notch1_en.addr,
+ nbi->notch1_en.mask, 0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, nbi->notch2_en.addr,
+ nbi->notch2_en.mask, 0, phy_idx);
+ return;
+ }
+
+ fc = chan->freq;
+ if (chan->band_width == RTW89_CHANNEL_WIDTH_160) {
+ fc = (spur_freq > fc) ? fc + 40 : fc - 40;
+ if ((fc > spur_freq &&
+ chan->channel < chan->primary_channel) ||
+ (fc < spur_freq &&
+ chan->channel > chan->primary_channel))
+ notch2_chk = true;
+ }
+
+ freq_diff = (spur_freq - fc) * 1000000;
+ nbi_idx = s32_div_u32_round_down(freq_diff, CARRIER_SPACING_312_5,
+ &nbi_frac_idx);
+
+ if (chan->band_width == RTW89_CHANNEL_WIDTH_20) {
+ s32_div_u32_round_down(nbi_idx + 32, 64, &nbi_tone_idx);
+ } else {
+ u16 tone_para = (chan->band_width == RTW89_CHANNEL_WIDTH_40) ?
+ 128 : 256;
+
+ s32_div_u32_round_down(nbi_idx, tone_para, &nbi_tone_idx);
+ }
+ nbi_frac_tone_idx =
+ s32_div_u32_round_closest(nbi_frac_idx, CARRIER_SPACING_78_125);
+
+ if (chan->band_width == RTW89_CHANNEL_WIDTH_160 && notch2_chk) {
+ rtw89_phy_write32_idx(rtwdev, nbi->notch2_idx.addr,
+ nbi->notch2_idx.mask, nbi_tone_idx, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, nbi->notch2_frac_idx.addr,
+ nbi->notch2_frac_idx.mask, nbi_frac_tone_idx,
+ phy_idx);
+ rtw89_phy_write32_idx(rtwdev, nbi->notch2_en.addr,
+ nbi->notch2_en.mask, 0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, nbi->notch2_en.addr,
+ nbi->notch2_en.mask, 1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, nbi->notch1_en.addr,
+ nbi->notch1_en.mask, 0, phy_idx);
+ } else {
+ rtw89_phy_write32_idx(rtwdev, nbi->notch1_idx.addr,
+ nbi->notch1_idx.mask, nbi_tone_idx, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, nbi->notch1_frac_idx.addr,
+ nbi->notch1_frac_idx.mask, nbi_frac_tone_idx,
+ phy_idx);
+ rtw89_phy_write32_idx(rtwdev, nbi->notch1_en.addr,
+ nbi->notch1_en.mask, 0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, nbi->notch1_en.addr,
+ nbi->notch1_en.mask, 1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, nbi->notch2_en.addr,
+ nbi->notch2_en.mask, 0, phy_idx);
+ }
+}
+
+static void rtw8922a_spur_elimination(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_phy_idx phy_idx)
+{
+ rtw8922a_set_csi_tone_idx(rtwdev, chan, phy_idx);
+ rtw8922a_set_nbi_tone_idx(rtwdev, chan, RF_PATH_A, phy_idx);
+ rtw8922a_set_nbi_tone_idx(rtwdev, chan, RF_PATH_B, phy_idx);
+}
+
+static void rtw8922a_ctrl_afe_dac(struct rtw89_dev *rtwdev, enum rtw89_bandwidth bw,
+ enum rtw89_rf_path path)
+{
+ u32 cr_ofst = 0x0;
+
+ if (path == RF_PATH_B)
+ cr_ofst = 0x100;
+
+ switch (bw) {
+ case RTW89_CHANNEL_WIDTH_5:
+ case RTW89_CHANNEL_WIDTH_10:
+ case RTW89_CHANNEL_WIDTH_20:
+ case RTW89_CHANNEL_WIDTH_40:
+ case RTW89_CHANNEL_WIDTH_80:
+ rtw89_phy_write32_mask(rtwdev, R_AFEDAC0 + cr_ofst, B_AFEDAC0, 0xE);
+ rtw89_phy_write32_mask(rtwdev, R_AFEDAC1 + cr_ofst, B_AFEDAC1, 0x7);
+ break;
+ case RTW89_CHANNEL_WIDTH_160:
+ rtw89_phy_write32_mask(rtwdev, R_AFEDAC0 + cr_ofst, B_AFEDAC0, 0xD);
+ rtw89_phy_write32_mask(rtwdev, R_AFEDAC1 + cr_ofst, B_AFEDAC1, 0x6);
+ break;
+ default:
+ break;
+ }
+}
+
+static const struct rtw89_reg2_def bb_mcu0_init_reg[] = {
+ {0x6990, 0x00000000},
+ {0x6994, 0x00000000},
+ {0x6998, 0x00000000},
+ {0x6820, 0xFFFFFFFE},
+ {0x6800, 0xC0000FFE},
+ {0x6808, 0x76543210},
+ {0x6814, 0xBFBFB000},
+ {0x6818, 0x0478C009},
+ {0x6800, 0xC0000FFF},
+ {0x6820, 0xFFFFFFFF},
+};
+
+static const struct rtw89_reg2_def bb_mcu1_init_reg[] = {
+ {0x6990, 0x00000000},
+ {0x6994, 0x00000000},
+ {0x6998, 0x00000000},
+ {0x6820, 0xFFFFFFFE},
+ {0x6800, 0xC0000FFE},
+ {0x6808, 0x76543210},
+ {0x6814, 0xBFBFB000},
+ {0x6818, 0x0478C009},
+ {0x6800, 0xC0000FFF},
+ {0x6820, 0xFFFFFFFF},
+};
+
+static void rtw8922a_bbmcu_cr_init(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+ const struct rtw89_reg2_def *reg;
+ int size;
+ int i;
+
+ if (phy_idx == RTW89_PHY_0) {
+ reg = bb_mcu0_init_reg;
+ size = ARRAY_SIZE(bb_mcu0_init_reg);
+ } else {
+ reg = bb_mcu1_init_reg;
+ size = ARRAY_SIZE(bb_mcu1_init_reg);
+ }
+
+ for (i = 0; i < size; i++, reg++)
+ rtw89_bbmcu_write32(rtwdev, reg->addr, reg->data, phy_idx);
+}
+
+static const u32 dmac_sys_mask[2] = {B_BE_DMAC_BB_PHY0_MASK, B_BE_DMAC_BB_PHY1_MASK};
+static const u32 bbrst_mask[2] = {B_BE_FEN_BBPLAT_RSTB, B_BE_FEN_BB1PLAT_RSTB};
+static const u32 glbrst_mask[2] = {B_BE_FEN_BB_IP_RSTN, B_BE_FEN_BB1_IP_RSTN};
+static const u32 mcu_bootrdy_mask[2] = {B_BE_BOOT_RDY0, B_BE_BOOT_RDY1};
+
+static void rtw8922a_bb_preinit(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+ u32 rdy = 0;
+
+ if (phy_idx == RTW89_PHY_1)
+ rdy = 1;
+
+ rtw89_write32_mask(rtwdev, R_BE_DMAC_SYS_CR32B, dmac_sys_mask[phy_idx], 0x7FF9);
+ rtw89_write32_mask(rtwdev, R_BE_FEN_RST_ENABLE, glbrst_mask[phy_idx], 0x0);
+ rtw89_write32_mask(rtwdev, R_BE_FEN_RST_ENABLE, bbrst_mask[phy_idx], 0x0);
+ rtw89_write32_mask(rtwdev, R_BE_FEN_RST_ENABLE, glbrst_mask[phy_idx], 0x1);
+ rtw89_write32_mask(rtwdev, R_BE_FEN_RST_ENABLE, mcu_bootrdy_mask[phy_idx], rdy);
+ rtw89_write32_mask(rtwdev, R_BE_MEM_PWR_CTRL, B_BE_MEM_BBMCU0_DS_V1, 0);
+
+ fsleep(1);
+ rtw8922a_bbmcu_cr_init(rtwdev, phy_idx);
+}
+
+static void rtw8922a_bb_postinit(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+ if (phy_idx == RTW89_PHY_0)
+ rtw89_write32_set(rtwdev, R_BE_FEN_RST_ENABLE, mcu_bootrdy_mask[phy_idx]);
+ rtw89_write32_set(rtwdev, R_BE_FEN_RST_ENABLE, bbrst_mask[phy_idx]);
+
+ rtw89_phy_write32_set(rtwdev, R_BBCLK, B_CLK_640M);
+ rtw89_phy_write32_clr(rtwdev, R_TXSCALE, B_TXFCTR_EN);
+ rtw89_phy_set_phy_regs(rtwdev, R_TXFCTR, B_TXFCTR_THD, 0x200);
+ rtw89_phy_set_phy_regs(rtwdev, R_SLOPE, B_EHT_RATE_TH, 0xA);
+ rtw89_phy_set_phy_regs(rtwdev, R_BEDGE, B_HE_RATE_TH, 0xA);
+ rtw89_phy_set_phy_regs(rtwdev, R_BEDGE2, B_HT_VHT_TH, 0xAAA);
+ rtw89_phy_set_phy_regs(rtwdev, R_BEDGE, B_EHT_MCS14, 0x1);
+ rtw89_phy_set_phy_regs(rtwdev, R_BEDGE2, B_EHT_MCS15, 0x1);
+ rtw89_phy_set_phy_regs(rtwdev, R_BEDGE3, B_EHTTB_EN, 0x0);
+ rtw89_phy_set_phy_regs(rtwdev, R_BEDGE3, B_HEERSU_EN, 0x0);
+ rtw89_phy_set_phy_regs(rtwdev, R_BEDGE3, B_HEMU_EN, 0x0);
+ rtw89_phy_set_phy_regs(rtwdev, R_BEDGE3, B_TB_EN, 0x0);
+ rtw89_phy_set_phy_regs(rtwdev, R_SU_PUNC, B_SU_PUNC_EN, 0x1);
+ rtw89_phy_set_phy_regs(rtwdev, R_BEDGE5, B_HWGEN_EN, 0x1);
+ rtw89_phy_set_phy_regs(rtwdev, R_BEDGE5, B_PWROFST_COMP, 0x1);
+ rtw89_phy_set_phy_regs(rtwdev, R_MAG_AB, B_BY_SLOPE, 0x1);
+ rtw89_phy_set_phy_regs(rtwdev, R_MAG_A, B_MGA_AEND, 0xe0);
+ rtw89_phy_set_phy_regs(rtwdev, R_MAG_AB, B_MAG_AB, 0xe0c000);
+ rtw89_phy_set_phy_regs(rtwdev, R_SLOPE, B_SLOPE_A, 0x3FE0);
+ rtw89_phy_set_phy_regs(rtwdev, R_SLOPE, B_SLOPE_B, 0x3FE0);
+ rtw89_phy_set_phy_regs(rtwdev, R_SC_CORNER, B_SC_CORNER, 0x200);
+ rtw89_phy_write32_idx(rtwdev, R_UDP_COEEF, B_UDP_COEEF, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_UDP_COEEF, B_UDP_COEEF, 0x1, phy_idx);
+}
+
+static void rtw8922a_bb_reset_en(struct rtw89_dev *rtwdev, enum rtw89_band band,
+ bool en, enum rtw89_phy_idx phy_idx)
+{
+ if (en) {
+ rtw89_phy_write32_idx(rtwdev, R_RSTB_ASYNC, B_RSTB_ASYNC_ALL, 1, phy_idx);
+ if (band == RTW89_BAND_2G)
+ rtw89_phy_write32_idx(rtwdev, R_RXCCA_BE1,
+ B_RXCCA_BE1_DIS, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PD_CTRL, B_PD_HIT_DIS, 0x0, phy_idx);
+ } else {
+ rtw89_phy_write32_idx(rtwdev, R_RXCCA_BE1, B_RXCCA_BE1_DIS, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PD_CTRL, B_PD_HIT_DIS, 0x1, phy_idx);
+ fsleep(1);
+ rtw89_phy_write32_idx(rtwdev, R_RSTB_ASYNC, B_RSTB_ASYNC_ALL, 0, phy_idx);
+ }
+}
+
+static int rtw8922a_ctrl_tx_path_tmac(struct rtw89_dev *rtwdev,
+ enum rtw89_rf_path tx_path,
+ enum rtw89_phy_idx phy_idx)
+{
+ struct rtw89_reg2_def path_com_cr[] = {
+ {0x11A00, 0x21C86900},
+ {0x11A04, 0x00E4E433},
+ {0x11A08, 0x39390CC9},
+ {0x11A0C, 0x4E433240},
+ {0x11A10, 0x90CC900E},
+ {0x11A14, 0x00240393},
+ {0x11A18, 0x201C8600},
+ };
+ int ret = 0;
+ u32 reg;
+ int i;
+
+ rtw89_phy_write32_idx(rtwdev, R_MAC_SEL, B_MAC_SEL, 0x0, phy_idx);
+
+ if (phy_idx == RTW89_PHY_1 && !rtwdev->dbcc_en)
+ return 0;
+
+ if (tx_path == RF_PATH_A) {
+ path_com_cr[0].data = 0x21C82900;
+ path_com_cr[1].data = 0x00E4E431;
+ path_com_cr[2].data = 0x39390C49;
+ path_com_cr[3].data = 0x4E431240;
+ path_com_cr[4].data = 0x90C4900E;
+ path_com_cr[6].data = 0x201C8200;
+ } else if (tx_path == RF_PATH_B) {
+ path_com_cr[0].data = 0x21C04900;
+ path_com_cr[1].data = 0x00E4E032;
+ path_com_cr[2].data = 0x39380C89;
+ path_com_cr[3].data = 0x4E032240;
+ path_com_cr[4].data = 0x80C8900E;
+ path_com_cr[6].data = 0x201C0400;
+ } else if (tx_path == RF_PATH_AB) {
+ path_com_cr[0].data = 0x21C86900;
+ path_com_cr[1].data = 0x00E4E433;
+ path_com_cr[2].data = 0x39390CC9;
+ path_com_cr[3].data = 0x4E433240;
+ path_com_cr[4].data = 0x90CC900E;
+ path_com_cr[6].data = 0x201C8600;
+ } else {
+ ret = -EINVAL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(path_com_cr); i++) {
+ reg = rtw89_mac_reg_by_idx(rtwdev, path_com_cr[i].addr, phy_idx);
+ rtw89_write32(rtwdev, reg, path_com_cr[i].data);
+ }
+
+ return ret;
+}
+
+static void rtw8922a_bb_reset(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+}
+
+static int rtw8922a_cfg_rx_nss_limit(struct rtw89_dev *rtwdev, u8 rx_nss,
+ enum rtw89_phy_idx phy_idx)
+{
+ if (rx_nss == 1) {
+ rtw89_phy_write32_idx(rtwdev, R_BRK_R, B_HTMCS_LMT, 0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_R, B_VHTMCS_LMT, 0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_HE, B_N_USR_MAX,
+ HE_N_USER_MAX_8922A, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_HE, B_NSS_MAX, 0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_HE, B_TB_NSS_MAX, 0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_EHT, B_RXEHT_NSS_MAX, 0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_RXEHT, B_RXEHTTB_NSS_MAX, 0,
+ phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_RXEHT, B_RXEHT_N_USER_MAX,
+ HE_N_USER_MAX_8922A, phy_idx);
+ } else if (rx_nss == 2) {
+ rtw89_phy_write32_idx(rtwdev, R_BRK_R, B_HTMCS_LMT, 1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_R, B_VHTMCS_LMT, 1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_HE, B_N_USR_MAX,
+ HE_N_USER_MAX_8922A, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_HE, B_NSS_MAX, 1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_HE, B_TB_NSS_MAX, 1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_EHT, B_RXEHT_NSS_MAX, 1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_RXEHT, B_RXEHTTB_NSS_MAX, 1,
+ phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BRK_RXEHT, B_RXEHT_N_USER_MAX,
+ HE_N_USER_MAX_8922A, phy_idx);
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void rtw8922a_tssi_reset(struct rtw89_dev *rtwdev,
+ enum rtw89_rf_path path,
+ enum rtw89_phy_idx phy_idx)
+{
+ if (rtwdev->mlo_dbcc_mode == MLO_1_PLUS_1_1RF) {
+ if (phy_idx == RTW89_PHY_0) {
+ rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTA, B_TXPWR_RSTA, 0x0);
+ rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTA, B_TXPWR_RSTA, 0x1);
+ } else {
+ rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB, B_TXPWR_RSTB, 0x0);
+ rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB, B_TXPWR_RSTB, 0x1);
+ }
+ } else {
+ rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTA, B_TXPWR_RSTA, 0x0);
+ rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTA, B_TXPWR_RSTA, 0x1);
+ rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB, B_TXPWR_RSTB, 0x0);
+ rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB, B_TXPWR_RSTB, 0x1);
+ }
+}
+
+static int rtw8922a_ctrl_rx_path_tmac(struct rtw89_dev *rtwdev,
+ enum rtw89_rf_path rx_path,
+ enum rtw89_phy_idx phy_idx)
+{
+ u8 rx_nss = (rx_path == RF_PATH_AB) ? 2 : 1;
+
+ /* Set to 0 first to avoid abnormal EDCCA report */
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_ANT_RX_SG0, 0x0, phy_idx);
+
+ if (rx_path == RF_PATH_A) {
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_ANT_RX_SG0, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_RX_1RCCA, 1, phy_idx);
+ rtw8922a_cfg_rx_nss_limit(rtwdev, rx_nss, phy_idx);
+ rtw8922a_tssi_reset(rtwdev, rx_path, phy_idx);
+ } else if (rx_path == RF_PATH_B) {
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_ANT_RX_SG0, 0x2, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_RX_1RCCA, 2, phy_idx);
+ rtw8922a_cfg_rx_nss_limit(rtwdev, rx_nss, phy_idx);
+ rtw8922a_tssi_reset(rtwdev, rx_path, phy_idx);
+ } else if (rx_path == RF_PATH_AB) {
+ rtw89_phy_write32_idx(rtwdev, R_ANT_CHBW, B_ANT_RX_SG0, 0x3, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FC0INV_SBW, B_RX_1RCCA, 3, phy_idx);
+ rtw8922a_cfg_rx_nss_limit(rtwdev, rx_nss, phy_idx);
+ rtw8922a_tssi_reset(rtwdev, rx_path, phy_idx);
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int rtw8922a_ctrl_mlo(struct rtw89_dev *rtwdev, enum rtw89_mlo_dbcc_mode mode)
+{
+ const struct rtw89_chan *chan = rtw89_chan_get(rtwdev, RTW89_SUB_ENTITY_0);
+
+ if (mode == MLO_1_PLUS_1_1RF || mode == DBCC_LEGACY) {
+ rtw89_phy_write32_mask(rtwdev, R_DBCC, B_DBCC_EN, 0x1);
+ rtw89_phy_write32_mask(rtwdev, R_DBCC_FA, B_DBCC_FA, 0x0);
+ } else if (mode == MLO_2_PLUS_0_1RF || mode == MLO_0_PLUS_2_1RF ||
+ mode == MLO_DBCC_NOT_SUPPORT) {
+ rtw89_phy_write32_mask(rtwdev, R_DBCC, B_DBCC_EN, 0x0);
+ rtw89_phy_write32_mask(rtwdev, R_DBCC_FA, B_DBCC_FA, 0x1);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ if (mode == MLO_2_PLUS_0_1RF) {
+ rtw8922a_ctrl_afe_dac(rtwdev, chan->band_width, RF_PATH_A);
+ rtw8922a_ctrl_afe_dac(rtwdev, chan->band_width, RF_PATH_B);
+ } else {
+ rtw89_warn(rtwdev, "unsupported MLO mode %d\n", mode);
+ }
+
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0x6180);
+
+ if (mode == MLO_2_PLUS_0_1RF) {
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xBBAB);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xABA9);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xEBA9);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xEAA9);
+ } else if (mode == MLO_0_PLUS_2_1RF) {
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xBBAB);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xAFFF);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xEFFF);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xEEFF);
+ } else if ((mode == MLO_1_PLUS_1_1RF) || (mode == DBCC_LEGACY)) {
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0x7BAB);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0x3BAB);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0x3AAB);
+ } else {
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0x180);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0x0);
+ }
+
+ return 0;
+}
+
+static void rtw8922a_bb_sethw(struct rtw89_dev *rtwdev)
+{
+ u32 reg;
+
+ rtw89_phy_write32_clr(rtwdev, R_EN_SND_WO_NDP, B_EN_SND_WO_NDP);
+ rtw89_phy_write32_clr(rtwdev, R_EN_SND_WO_NDP_C1, B_EN_SND_WO_NDP);
+
+ rtw89_write32_mask(rtwdev, R_BE_PWR_BOOST, B_BE_PWR_CTRL_SEL, 0);
+ if (rtwdev->dbcc_en) {
+ reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_BOOST, RTW89_MAC_1);
+ rtw89_write32_mask(rtwdev, reg, B_BE_PWR_CTRL_SEL, 0);
+ }
+
+ rtw8922a_ctrl_mlo(rtwdev, rtwdev->mlo_dbcc_mode);
+}
+
+static void rtw8922a_ctrl_cck_en(struct rtw89_dev *rtwdev, bool cck_en,
+ enum rtw89_phy_idx phy_idx)
+{
+ if (cck_en) {
+ rtw89_phy_write32_idx(rtwdev, R_RXCCA_BE1, B_RXCCA_BE1_DIS, 0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_UPD_CLK_ADC, B_ENABLE_CCK, 1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PD_ARBITER_OFF, B_PD_ARBITER_OFF,
+ 0, phy_idx);
+ } else {
+ rtw89_phy_write32_idx(rtwdev, R_RXCCA_BE1, B_RXCCA_BE1_DIS, 1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_UPD_CLK_ADC, B_ENABLE_CCK, 0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_PD_ARBITER_OFF, B_PD_ARBITER_OFF,
+ 1, phy_idx);
+ }
+}
+
+static void rtw8922a_set_channel_bb(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_phy_idx phy_idx)
+{
+ bool cck_en = chan->band_type == RTW89_BAND_2G;
+ u8 pri_sb = chan->pri_sb_idx;
+
+ if (cck_en)
+ rtw8922a_ctrl_sco_cck(rtwdev, chan->primary_channel,
+ chan->band_width, phy_idx);
+
+ rtw8922a_ctrl_ch(rtwdev, chan, phy_idx);
+ rtw8922a_ctrl_bw(rtwdev, pri_sb, chan->band_width, phy_idx);
+ rtw8922a_ctrl_cck_en(rtwdev, cck_en, phy_idx);
+ rtw8922a_spur_elimination(rtwdev, chan, phy_idx);
+
+ rtw89_phy_write32_idx(rtwdev, R_RSTB_ASYNC, B_RSTB_ASYNC_ALL, 1, phy_idx);
+ rtw8922a_tssi_reset(rtwdev, RF_PATH_AB, phy_idx);
+}
+
+static void rtw8922a_pre_set_channel_bb(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx)
+{
+ if (!rtwdev->dbcc_en)
+ return;
+
+ if (phy_idx == RTW89_PHY_0) {
+ rtw89_phy_write32_mask(rtwdev, R_DBCC, B_DBCC_EN, 0x0);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0x6180);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xBBAB);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xABA9);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xEBA9);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xEAA9);
+ } else {
+ rtw89_phy_write32_mask(rtwdev, R_DBCC, B_DBCC_EN, 0x0);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xBBAB);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xAFFF);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xEFFF);
+ rtw89_phy_write32_mask(rtwdev, R_EMLSR, B_EMLSR_PARM, 0xEEFF);
+ }
+}
+
+static void rtw8922a_post_set_channel_bb(struct rtw89_dev *rtwdev,
+ enum rtw89_mlo_dbcc_mode mode)
+{
+ if (!rtwdev->dbcc_en)
+ return;
+
+ rtw8922a_ctrl_mlo(rtwdev, mode);
+}
+
+static void rtw8922a_set_channel(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_mac_idx mac_idx,
+ enum rtw89_phy_idx phy_idx)
+{
+ rtw8922a_set_channel_mac(rtwdev, chan, mac_idx);
+ rtw8922a_set_channel_bb(rtwdev, chan, phy_idx);
+ rtw8922a_set_channel_rf(rtwdev, chan, phy_idx);
+}
+
+static void rtw8922a_dfs_en_idx(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx, enum rtw89_rf_path path,
+ bool en)
+{
+ u32 path_ofst = (path == RF_PATH_B) ? 0x100 : 0x0;
+
+ if (en)
+ rtw89_phy_write32_idx(rtwdev, 0x2800 + path_ofst, BIT(1), 1,
+ phy_idx);
+ else
+ rtw89_phy_write32_idx(rtwdev, 0x2800 + path_ofst, BIT(1), 0,
+ phy_idx);
+}
+
+static void rtw8922a_dfs_en(struct rtw89_dev *rtwdev, bool en,
+ enum rtw89_phy_idx phy_idx)
+{
+ rtw8922a_dfs_en_idx(rtwdev, phy_idx, RF_PATH_A, en);
+ rtw8922a_dfs_en_idx(rtwdev, phy_idx, RF_PATH_B, en);
+}
+
+static void rtw8922a_adc_en_path(struct rtw89_dev *rtwdev,
+ enum rtw89_rf_path path, bool en)
+{
+ u32 val;
+
+ val = rtw89_phy_read32_mask(rtwdev, R_ADC_FIFO_V1, B_ADC_FIFO_EN_V1);
+
+ if (en) {
+ if (path == RF_PATH_A)
+ val &= ~0x1;
+ else
+ val &= ~0x2;
+ } else {
+ if (path == RF_PATH_A)
+ val |= 0x1;
+ else
+ val |= 0x2;
+ }
+
+ rtw89_phy_write32_mask(rtwdev, R_ADC_FIFO_V1, B_ADC_FIFO_EN_V1, val);
+}
+
+static void rtw8922a_adc_en(struct rtw89_dev *rtwdev, bool en, u8 phy_idx)
+{
+ if (rtwdev->mlo_dbcc_mode == MLO_1_PLUS_1_1RF) {
+ if (phy_idx == RTW89_PHY_0)
+ rtw8922a_adc_en_path(rtwdev, RF_PATH_A, en);
+ else
+ rtw8922a_adc_en_path(rtwdev, RF_PATH_B, en);
+ } else {
+ rtw8922a_adc_en_path(rtwdev, RF_PATH_A, en);
+ rtw8922a_adc_en_path(rtwdev, RF_PATH_B, en);
+ }
+}
+
+static
+void rtw8922a_hal_reset(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx, enum rtw89_mac_idx mac_idx,
+ enum rtw89_band band, u32 *tx_en, bool enter)
+{
+ if (enter) {
+ rtw89_chip_stop_sch_tx(rtwdev, mac_idx, tx_en, RTW89_SCH_TX_SEL_ALL);
+ rtw89_mac_cfg_ppdu_status(rtwdev, mac_idx, false);
+ rtw8922a_dfs_en(rtwdev, false, phy_idx);
+ rtw8922a_tssi_cont_en_phyidx(rtwdev, false, phy_idx);
+ rtw8922a_adc_en(rtwdev, false, phy_idx);
+ fsleep(40);
+ rtw8922a_bb_reset_en(rtwdev, band, false, phy_idx);
+ } else {
+ rtw89_mac_cfg_ppdu_status(rtwdev, mac_idx, true);
+ rtw8922a_adc_en(rtwdev, true, phy_idx);
+ rtw8922a_dfs_en(rtwdev, true, phy_idx);
+ rtw8922a_tssi_cont_en_phyidx(rtwdev, true, phy_idx);
+ rtw8922a_bb_reset_en(rtwdev, band, true, phy_idx);
+ rtw89_chip_resume_sch_tx(rtwdev, mac_idx, *tx_en);
+ }
+}
+
+static void rtw8922a_set_channel_help(struct rtw89_dev *rtwdev, bool enter,
+ struct rtw89_channel_help_params *p,
+ const struct rtw89_chan *chan,
+ enum rtw89_mac_idx mac_idx,
+ enum rtw89_phy_idx phy_idx)
+{
+ if (enter) {
+ rtw8922a_pre_set_channel_bb(rtwdev, phy_idx);
+ rtw8922a_pre_set_channel_rf(rtwdev, phy_idx);
+ }
+
+ rtw8922a_hal_reset(rtwdev, phy_idx, mac_idx, chan->band_type, &p->tx_en, enter);
+
+ if (!enter) {
+ rtw8922a_post_set_channel_bb(rtwdev, rtwdev->mlo_dbcc_mode);
+ rtw8922a_post_set_channel_rf(rtwdev, phy_idx);
+ }
+}
+
+static void rtw8922a_rfk_init(struct rtw89_dev *rtwdev)
+{
+ struct rtw89_rfk_mcc_info *rfk_mcc = &rtwdev->rfk_mcc;
+
+ rtwdev->is_tssi_mode[RF_PATH_A] = false;
+ rtwdev->is_tssi_mode[RF_PATH_B] = false;
+ memset(rfk_mcc, 0, sizeof(*rfk_mcc));
+}
+
+static void rtw8922a_rfk_init_late(struct rtw89_dev *rtwdev)
+{
+ rtw89_phy_rfk_pre_ntfy_and_wait(rtwdev, RTW89_PHY_0, 5);
+
+ rtw89_phy_rfk_dack_and_wait(rtwdev, RTW89_PHY_0, 58);
+ rtw89_phy_rfk_rxdck_and_wait(rtwdev, RTW89_PHY_0, 32);
+}
+
+static void _wait_rx_mode(struct rtw89_dev *rtwdev, u8 kpath)
+{
+ u32 rf_mode;
+ u8 path;
+ int ret;
+
+ for (path = 0; path < RF_PATH_NUM_8922A; path++) {
+ if (!(kpath & BIT(path)))
+ continue;
+
+ ret = read_poll_timeout_atomic(rtw89_read_rf, rf_mode, rf_mode != 2,
+ 2, 5000, false, rtwdev, path, 0x00,
+ RR_MOD_MASK);
+ rtw89_debug(rtwdev, RTW89_DBG_RFK,
+ "[RFK] Wait S%d to Rx mode!! (ret = %d)\n",
+ path, ret);
+ }
+}
+
+static void rtw8922a_rfk_channel(struct rtw89_dev *rtwdev)
+{
+ enum rtw89_phy_idx phy_idx = RTW89_PHY_0;
+ u8 phy_map = rtw89_btc_phymap(rtwdev, phy_idx, RF_AB);
+ u32 tx_en;
+
+ rtw89_btc_ntfy_wl_rfk(rtwdev, phy_map, BTC_WRFKT_CHLK, BTC_WRFK_START);
+ rtw89_chip_stop_sch_tx(rtwdev, phy_idx, &tx_en, RTW89_SCH_TX_SEL_ALL);
+ _wait_rx_mode(rtwdev, RF_AB);
+
+ rtw89_phy_rfk_pre_ntfy_and_wait(rtwdev, phy_idx, 5);
+ rtw89_phy_rfk_txgapk_and_wait(rtwdev, phy_idx, 54);
+ rtw89_phy_rfk_iqk_and_wait(rtwdev, phy_idx, 84);
+ rtw89_phy_rfk_tssi_and_wait(rtwdev, phy_idx, RTW89_TSSI_NORMAL, 6);
+ rtw89_phy_rfk_dpk_and_wait(rtwdev, phy_idx, 34);
+ rtw89_phy_rfk_rxdck_and_wait(rtwdev, RTW89_PHY_0, 32);
+
+ rtw89_chip_resume_sch_tx(rtwdev, phy_idx, tx_en);
+ rtw89_btc_ntfy_wl_rfk(rtwdev, phy_map, BTC_WRFKT_CHLK, BTC_WRFK_STOP);
+}
+
+static void rtw8922a_rfk_band_changed(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx)
+{
+ rtw89_phy_rfk_tssi_and_wait(rtwdev, phy_idx, RTW89_TSSI_SCAN, 6);
+}
+
+static void rtw8922a_rfk_scan(struct rtw89_dev *rtwdev, bool start)
+{
+}
+
+static void rtw8922a_rfk_track(struct rtw89_dev *rtwdev)
+{
+}
+
+static void rtw8922a_set_txpwr_ref(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx)
+{
+ s16 ref_ofdm = 0;
+ s16 ref_cck = 0;
+
+ rtw89_debug(rtwdev, RTW89_DBG_TXPWR, "[TXPWR] set txpwr reference\n");
+
+ rtw89_mac_txpwr_write32_mask(rtwdev, phy_idx, R_BE_PWR_REF_CTRL,
+ B_BE_PWR_REF_CTRL_OFDM, ref_ofdm);
+ rtw89_mac_txpwr_write32_mask(rtwdev, phy_idx, R_BE_PWR_REF_CTRL,
+ B_BE_PWR_REF_CTRL_CCK, ref_cck);
+}
+
+static void rtw8922a_bb_tx_triangular(struct rtw89_dev *rtwdev, bool en,
+ enum rtw89_phy_idx phy_idx)
+{
+ u8 ctrl = en ? 0x1 : 0x0;
+
+ rtw89_phy_write32_idx(rtwdev, R_BEDGE3, B_BEDGE_CFG, ctrl, phy_idx);
+}
+
+static void rtw8922a_set_tx_shape(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_phy_idx phy_idx)
+{
+ const struct rtw89_rfe_parms *rfe_parms = rtwdev->rfe_parms;
+ const struct rtw89_tx_shape *tx_shape = &rfe_parms->tx_shape;
+ u8 tx_shape_idx;
+ u8 band, regd;
+
+ band = chan->band_type;
+ regd = rtw89_regd_get(rtwdev, band);
+ tx_shape_idx = (*tx_shape->lmt)[band][RTW89_RS_OFDM][regd];
+
+ if (tx_shape_idx == 0)
+ rtw8922a_bb_tx_triangular(rtwdev, false, phy_idx);
+ else
+ rtw8922a_bb_tx_triangular(rtwdev, true, phy_idx);
+}
+
+static void rtw8922a_set_txpwr(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_phy_idx phy_idx)
+{
+ rtw89_phy_set_txpwr_byrate(rtwdev, chan, phy_idx);
+ rtw89_phy_set_txpwr_offset(rtwdev, chan, phy_idx);
+ rtw8922a_set_tx_shape(rtwdev, chan, phy_idx);
+ rtw89_phy_set_txpwr_limit(rtwdev, chan, phy_idx);
+ rtw89_phy_set_txpwr_limit_ru(rtwdev, chan, phy_idx);
+}
+
+static void rtw8922a_set_txpwr_ctrl(struct rtw89_dev *rtwdev,
+ enum rtw89_phy_idx phy_idx)
+{
+ rtw8922a_set_txpwr_ref(rtwdev, phy_idx);
+}
+
+static void rtw8922a_ctrl_trx_path(struct rtw89_dev *rtwdev,
+ enum rtw89_rf_path tx_path, u8 tx_nss,
+ enum rtw89_rf_path rx_path, u8 rx_nss)
+{
+ enum rtw89_phy_idx phy_idx;
+
+ for (phy_idx = RTW89_PHY_0; phy_idx <= RTW89_PHY_1; phy_idx++) {
+ rtw8922a_ctrl_tx_path_tmac(rtwdev, tx_path, phy_idx);
+ rtw8922a_ctrl_rx_path_tmac(rtwdev, rx_path, phy_idx);
+ rtw8922a_cfg_rx_nss_limit(rtwdev, rx_nss, phy_idx);
+ }
+}
+
+static void rtw8922a_ctrl_nbtg_bt_tx(struct rtw89_dev *rtwdev, bool en,
+ enum rtw89_phy_idx phy_idx)
+{
+ if (en) {
+ rtw89_phy_write32_idx(rtwdev, R_FORCE_FIR_A, B_FORCE_FIR_A, 0x3, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_RXBY_WBADC_A, B_RXBY_WBADC_A,
+ 0xf, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_RXBY_WBADC_A, B_BT_RXBY_WBADC_A,
+ 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_A, B_BT_TRK_OFF_A, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_OP1DB_A, B_OP1DB_A, 0x80, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_OP1DB1_A, B_TIA10_A, 0x8080, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BACKOFF_A, B_LNA_IBADC_A, 0x34, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BKOFF_A, B_BKOFF_IBADC_A, 0x34, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FORCE_FIR_B, B_FORCE_FIR_B, 0x3, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_RXBY_WBADC_B, B_RXBY_WBADC_B,
+ 0xf, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_RXBY_WBADC_B, B_BT_RXBY_WBADC_B,
+ 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_B, B_BT_TRK_OFF_B, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_LNA_OP, B_LNA6, 0x80, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_LNA_TIA, B_TIA10_B, 0x8080, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BACKOFF_B, B_LNA_IBADC_B, 0x34, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BKOFF_B, B_BKOFF_IBADC_B, 0x34, phy_idx);
+ } else {
+ rtw89_phy_write32_idx(rtwdev, R_FORCE_FIR_A, B_FORCE_FIR_A, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_RXBY_WBADC_A, B_RXBY_WBADC_A,
+ 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_RXBY_WBADC_A, B_BT_RXBY_WBADC_A,
+ 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_A, B_BT_TRK_OFF_A, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_OP1DB_A, B_OP1DB_A, 0x1a, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_OP1DB1_A, B_TIA10_A, 0x2a2a, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BACKOFF_A, B_LNA_IBADC_A, 0x7a6, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BKOFF_A, B_BKOFF_IBADC_A, 0x26, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_FORCE_FIR_B, B_FORCE_FIR_B, 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_RXBY_WBADC_B, B_RXBY_WBADC_B,
+ 0x0, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_RXBY_WBADC_B, B_BT_RXBY_WBADC_B,
+ 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_B, B_BT_TRK_OFF_B, 0x1, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_LNA_OP, B_LNA6, 0x20, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_LNA_TIA, B_TIA10_B, 0x2a30, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BACKOFF_B, B_LNA_IBADC_B, 0x7a6, phy_idx);
+ rtw89_phy_write32_idx(rtwdev, R_BKOFF_B, B_BKOFF_IBADC_B, 0x26, phy_idx);
+ }
+}
+
+static void rtw8922a_bb_cfg_txrx_path(struct rtw89_dev *rtwdev)
+{
+ const struct rtw89_chan *chan = rtw89_chan_get(rtwdev, RTW89_SUB_ENTITY_0);
+ enum rtw89_band band = chan->band_type;
+ struct rtw89_hal *hal = &rtwdev->hal;
+ u8 ntx_path = RF_PATH_AB;
+ u32 tx_en0, tx_en1;
+
+ if (hal->antenna_tx == RF_A)
+ ntx_path = RF_PATH_A;
+ else if (hal->antenna_tx == RF_B)
+ ntx_path = RF_PATH_B;
+
+ rtw8922a_hal_reset(rtwdev, RTW89_PHY_0, RTW89_MAC_0, band, &tx_en0, true);
+ if (rtwdev->dbcc_en)
+ rtw8922a_hal_reset(rtwdev, RTW89_PHY_1, RTW89_MAC_1, band,
+ &tx_en1, true);
+
+ rtw8922a_ctrl_trx_path(rtwdev, ntx_path, 2, RF_PATH_AB, 2);
+
+ rtw8922a_hal_reset(rtwdev, RTW89_PHY_0, RTW89_MAC_0, band, &tx_en0, false);
+ if (rtwdev->dbcc_en)
+ rtw8922a_hal_reset(rtwdev, RTW89_PHY_1, RTW89_MAC_1, band,
+ &tx_en0, false);
+}
+
+static u8 rtw8922a_get_thermal(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path)
+{
+ struct rtw89_power_trim_info *info = &rtwdev->pwr_trim;
+ int th;
+
+ /* read thermal only if debugging */
+ if (!rtw89_debug_is_enabled(rtwdev, RTW89_DBG_CFO | RTW89_DBG_RFK_TRACK))
+ return 80;
+
+ rtw89_write_rf(rtwdev, rf_path, RR_TM, RR_TM_TRI, 0x1);
+ rtw89_write_rf(rtwdev, rf_path, RR_TM, RR_TM_TRI, 0x0);
+ rtw89_write_rf(rtwdev, rf_path, RR_TM, RR_TM_TRI, 0x1);
+
+ fsleep(200);
+
+ th = rtw89_read_rf(rtwdev, rf_path, RR_TM, RR_TM_VAL_V1);
+ th += (s8)info->thermal_trim[rf_path];
+
+ return clamp_t(int, th, 0, U8_MAX);
+}
+
+static void rtw8922a_btc_set_rfe(struct rtw89_dev *rtwdev)
+{
+ union rtw89_btc_module_info *md = &rtwdev->btc.mdinfo;
+ struct rtw89_btc_module_v7 *module = &md->md_v7;
+
+ module->rfe_type = rtwdev->efuse.rfe_type;
+ module->kt_ver = rtwdev->hal.cv;
+ module->bt_solo = 0;
+ module->switch_type = BTC_SWITCH_INTERNAL;
+ module->wa_type = 0;
+
+ module->ant.type = BTC_ANT_SHARED;
+ module->ant.num = 2;
+ module->ant.isolation = 10;
+ module->ant.diversity = 0;
+ module->ant.single_pos = RF_PATH_A;
+ module->ant.btg_pos = RF_PATH_B;
+
+ if (module->kt_ver <= 1)
+ module->wa_type |= BTC_WA_HFP_ZB;
+
+ rtwdev->btc.cx.other.type = BTC_3CX_NONE;
+
+ if (module->rfe_type == 0) {
+ rtwdev->btc.dm.error.map.rfe_type0 = true;
+ return;
+ }
+
+ module->ant.num = (module->rfe_type % 2) ? 2 : 3;
+
+ if (module->kt_ver == 0)
+ module->ant.num = 2;
+
+ if (module->ant.num == 3) {
+ module->ant.type = BTC_ANT_DEDICATED;
+ module->bt_pos = BTC_BT_ALONE;
+ } else {
+ module->ant.type = BTC_ANT_SHARED;
+ module->bt_pos = BTC_BT_BTG;
+ }
+ rtwdev->btc.btg_pos = module->ant.btg_pos;
+ rtwdev->btc.ant_type = module->ant.type;
+}
+
+static
+void rtw8922a_set_trx_mask(struct rtw89_dev *rtwdev, u8 path, u8 group, u32 val)
+{
+ rtw89_write_rf(rtwdev, path, RR_LUTWA, RFREG_MASK, group);
+ rtw89_write_rf(rtwdev, path, RR_LUTWD0, RFREG_MASK, val);
+}
+
+static void rtw8922a_btc_init_cfg(struct rtw89_dev *rtwdev)
+{
+ struct rtw89_btc *btc = &rtwdev->btc;
+ struct rtw89_btc_ant_info_v7 *ant = &btc->mdinfo.md_v7.ant;
+ u32 wl_pri, path_min, path_max;
+ u8 path;
+
+ /* for 1-Ant && 1-ss case: only 1-path */
+ if (ant->num == 1) {
+ path_min = ant->single_pos;
+ path_max = path_min;
+ } else {
+ path_min = RF_PATH_A;
+ path_max = RF_PATH_B;
+ }
+
+ path = path_min;
+
+ for (path = path_min; path <= path_max; path++) {
+ /* set DEBUG_LUT_RFMODE_MASK = 1 to start trx-mask-setup */
+ rtw89_write_rf(rtwdev, path, RR_LUTWE, RFREG_MASK, BIT(17));
+
+ /* if GNT_WL=0 && BT=SS_group --> WL Tx/Rx = THRU */
+ rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_SS_GROUP, 0x5ff);
+
+ /* if GNT_WL=0 && BT=Rx_group --> WL-Rx = THRU + WL-Tx = MASK */
+ rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_RX_GROUP, 0x5df);
+
+ /* if GNT_WL = 0 && BT = Tx_group -->
+ * Shared-Ant && BTG-path:WL mask(0x55f), others:WL THRU(0x5ff)
+ */
+ if (btc->ant_type == BTC_ANT_SHARED && btc->btg_pos == path)
+ rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff);
+ else
+ rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff);
+
+ rtw89_write_rf(rtwdev, path, RR_LUTWE, RFREG_MASK, 0);
+ }
+
+ /* set WL PTA Hi-Pri: Ack-Tx, beacon-tx, Trig-frame-Tx, Null-Tx*/
+ wl_pri = B_BTC_RSP_ACK_HI | B_BTC_TX_BCN_HI | B_BTC_TX_TRI_HI |
+ B_BTC_TX_NULL_HI;
+ rtw89_write32(rtwdev, R_BTC_COEX_WL_REQ_BE, wl_pri);
+
+ /* set PTA break table */
+ rtw89_write32(rtwdev, R_BE_BT_BREAK_TABLE, BTC_BREAK_PARAM);
+
+ /* ZB coex table init for HFP PTA req-cmd bit-4 define issue COEX-900*/
+ rtw89_write32(rtwdev, R_BTC_ZB_COEX_TBL_0, 0xda5a5a5a);
+
+ rtw89_write32(rtwdev, R_BTC_ZB_COEX_TBL_1, 0xda5a5a5a);
+
+ rtw89_write32(rtwdev, R_BTC_ZB_BREAK_TBL, 0xf0ffffff);
+ btc->cx.wl.status.map.init_ok = true;
+}
+
+static void rtw8922a_fill_freq_with_ppdu(struct rtw89_dev *rtwdev,
+ struct rtw89_rx_phy_ppdu *phy_ppdu,
+ struct ieee80211_rx_status *status)
+{
+ u8 chan_idx = phy_ppdu->chan_idx;
+ enum nl80211_band band;
+ u8 ch;
+
+ if (chan_idx == 0)
+ return;
+
+ rtw89_decode_chan_idx(rtwdev, chan_idx, &ch, &band);
+ status->freq = ieee80211_channel_to_frequency(ch, band);
+ status->band = band;
+}
+
+static void rtw8922a_query_ppdu(struct rtw89_dev *rtwdev,
+ struct rtw89_rx_phy_ppdu *phy_ppdu,
+ struct ieee80211_rx_status *status)
+{
+ u8 path;
+ u8 *rx_power = phy_ppdu->rssi;
+
+ status->signal =
+ RTW89_RSSI_RAW_TO_DBM(max(rx_power[RF_PATH_A], rx_power[RF_PATH_B]));
+ for (path = 0; path < rtwdev->chip->rf_path_num; path++) {
+ status->chains |= BIT(path);
+ status->chain_signal[path] = RTW89_RSSI_RAW_TO_DBM(rx_power[path]);
+ }
+ if (phy_ppdu->valid)
+ rtw8922a_fill_freq_with_ppdu(rtwdev, phy_ppdu, status);
+}
+
+static int rtw8922a_mac_enable_bb_rf(struct rtw89_dev *rtwdev)
+{
+ rtw89_write8_set(rtwdev, R_BE_FEN_RST_ENABLE,
+ B_BE_FEN_BBPLAT_RSTB | B_BE_FEN_BB_IP_RSTN);
+ rtw89_write32(rtwdev, R_BE_DMAC_SYS_CR32B, 0x7FF97FF9);
+
+ return 0;
+}
+
+static int rtw8922a_mac_disable_bb_rf(struct rtw89_dev *rtwdev)
+{
+ rtw89_write8_clr(rtwdev, R_BE_FEN_RST_ENABLE,
+ B_BE_FEN_BBPLAT_RSTB | B_BE_FEN_BB_IP_RSTN);
+
+ return 0;
+}
+
#ifdef CONFIG_PM
static const struct wiphy_wowlan_support rtw_wowlan_stub_8922a = {
.flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT,
@@ -610,10 +2317,56 @@ static const struct wiphy_wowlan_support rtw_wowlan_stub_8922a = {
#endif
static const struct rtw89_chip_ops rtw8922a_chip_ops = {
+ .enable_bb_rf = rtw8922a_mac_enable_bb_rf,
+ .disable_bb_rf = rtw8922a_mac_disable_bb_rf,
+ .bb_preinit = rtw8922a_bb_preinit,
+ .bb_postinit = rtw8922a_bb_postinit,
+ .bb_reset = rtw8922a_bb_reset,
+ .bb_sethw = rtw8922a_bb_sethw,
+ .read_rf = rtw89_phy_read_rf_v2,
+ .write_rf = rtw89_phy_write_rf_v2,
+ .set_channel = rtw8922a_set_channel,
+ .set_channel_help = rtw8922a_set_channel_help,
.read_efuse = rtw8922a_read_efuse,
.read_phycap = rtw8922a_read_phycap,
+ .fem_setup = NULL,
+ .rfe_gpio = NULL,
+ .rfk_hw_init = rtw8922a_rfk_hw_init,
+ .rfk_init = rtw8922a_rfk_init,
+ .rfk_init_late = rtw8922a_rfk_init_late,
+ .rfk_channel = rtw8922a_rfk_channel,
+ .rfk_band_changed = rtw8922a_rfk_band_changed,
+ .rfk_scan = rtw8922a_rfk_scan,
+ .rfk_track = rtw8922a_rfk_track,
+ .power_trim = rtw8922a_power_trim,
+ .set_txpwr = rtw8922a_set_txpwr,
+ .set_txpwr_ctrl = rtw8922a_set_txpwr_ctrl,
+ .init_txpwr_unit = NULL,
+ .get_thermal = rtw8922a_get_thermal,
+ .ctrl_btg_bt_rx = rtw8922a_ctrl_btg_bt_rx,
+ .query_ppdu = rtw8922a_query_ppdu,
+ .ctrl_nbtg_bt_tx = rtw8922a_ctrl_nbtg_bt_tx,
+ .cfg_txrx_path = rtw8922a_bb_cfg_txrx_path,
+ .set_txpwr_ul_tb_offset = NULL,
.pwr_on_func = rtw8922a_pwr_on_func,
.pwr_off_func = rtw8922a_pwr_off_func,
+ .query_rxdesc = rtw89_core_query_rxdesc_v2,
+ .fill_txdesc = rtw89_core_fill_txdesc_v2,
+ .fill_txdesc_fwcmd = rtw89_core_fill_txdesc_fwcmd_v2,
+ .cfg_ctrl_path = rtw89_mac_cfg_ctrl_path_v2,
+ .mac_cfg_gnt = rtw89_mac_cfg_gnt_v2,
+ .stop_sch_tx = rtw89_mac_stop_sch_tx_v2,
+ .resume_sch_tx = rtw89_mac_resume_sch_tx_v2,
+ .h2c_dctl_sec_cam = rtw89_fw_h2c_dctl_sec_cam_v2,
+ .h2c_default_cmac_tbl = rtw89_fw_h2c_default_cmac_tbl_g7,
+ .h2c_assoc_cmac_tbl = rtw89_fw_h2c_assoc_cmac_tbl_g7,
+ .h2c_ampdu_cmac_tbl = rtw89_fw_h2c_ampdu_cmac_tbl_g7,
+ .h2c_default_dmac_tbl = rtw89_fw_h2c_default_dmac_tbl_v2,
+ .h2c_update_beacon = rtw89_fw_h2c_update_beacon_be,
+ .h2c_ba_cam = rtw89_fw_h2c_ba_cam_v1,
+
+ .btc_set_rfe = rtw8922a_btc_set_rfe,
+ .btc_init_cfg = rtw8922a_btc_init_cfg,
};
const struct rtw89_chip_info rtw8922a_chip_info = {
@@ -650,11 +2403,16 @@ const struct rtw89_chip_info rtw8922a_chip_info = {
.txpwr_factor_rf = 2,
.txpwr_factor_mac = 1,
.dig_table = NULL,
+ .dig_regs = &rtw8922a_dig_regs,
.tssi_dbw_table = NULL,
- .support_chanctx_num = 1,
+ .support_chanctx_num = 2,
.support_bands = BIT(NL80211_BAND_2GHZ) |
BIT(NL80211_BAND_5GHZ) |
BIT(NL80211_BAND_6GHZ),
+ .support_bandwidths = BIT(NL80211_CHAN_WIDTH_20) |
+ BIT(NL80211_CHAN_WIDTH_40) |
+ BIT(NL80211_CHAN_WIDTH_80) |
+ BIT(NL80211_CHAN_WIDTH_160),
.support_unii4 = true,
.ul_tb_waveform_ctrl = false,
.ul_tb_pwr_diff = false,
@@ -665,7 +2423,7 @@ const struct rtw89_chip_info rtw8922a_chip_info = {
.acam_num = 128,
.bcam_num = 20,
.scam_num = 32,
- .bacam_num = 8,
+ .bacam_num = 24,
.bacam_dynamic_num = 8,
.bacam_ver = RTW89_BACAM_V1,
.ppdu_max_usr = 16,
@@ -683,10 +2441,19 @@ const struct rtw89_chip_info rtw8922a_chip_info = {
BIT(RTW89_PS_MODE_CLK_GATED) |
BIT(RTW89_PS_MODE_PWR_GATED),
.low_power_hci_modes = 0,
+ .h2c_cctl_func_id = H2C_FUNC_MAC_CCTLINFO_UD_G7,
.hci_func_en_addr = R_BE_HCI_FUNC_EN,
.h2c_desc_size = sizeof(struct rtw89_rxdesc_short_v2),
.txwd_body_size = sizeof(struct rtw89_txwd_body_v2),
.txwd_info_size = sizeof(struct rtw89_txwd_info_v2),
+ .h2c_ctrl_reg = R_BE_H2CREG_CTRL,
+ .h2c_counter_reg = {R_BE_UDM1 + 1, B_BE_UDM1_HALMAC_H2C_DEQ_CNT_MASK >> 8},
+ .h2c_regs = rtw8922a_h2c_regs,
+ .c2h_ctrl_reg = R_BE_C2HREG_CTRL,
+ .c2h_counter_reg = {R_BE_UDM1 + 1, B_BE_UDM1_HALMAC_C2H_ENQ_CNT_MASK >> 8},
+ .c2h_regs = rtw8922a_c2h_regs,
+ .page_regs = &rtw8922a_page_regs,
+ .wow_reason_reg = R_AX_C2HREG_DATA3_V1 + 3,
.cfo_src_fd = true,
.cfo_hw_comp = true,
.dcfo_comp = NULL,
@@ -694,9 +2461,11 @@ const struct rtw89_chip_info rtw8922a_chip_info = {
.imr_info = NULL,
.imr_dmac_table = &rtw8922a_imr_dmac_table,
.imr_cmac_table = &rtw8922a_imr_cmac_table,
+ .rrsr_cfgs = &rtw8922a_rrsr_cfgs,
.bss_clr_vld = {R_BSS_CLR_VLD_V2, B_BSS_CLR_VLD0_V2},
.bss_clr_map_reg = R_BSS_CLR_MAP_V2,
.dma_ch_mask = 0,
+ .edcca_regs = &rtw8922a_edcca_regs,
#ifdef CONFIG_PM
.wowlan_stub = &rtw_wowlan_stub_8922a,
#endif
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a_rfk.c b/drivers/net/wireless/realtek/rtw89/rtw8922a_rfk.c
new file mode 100644
index 000000000000..2a371829268c
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw89/rtw8922a_rfk.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright(c) 2023 Realtek Corporation
+ */
+
+#include "chan.h"
+#include "debug.h"
+#include "mac.h"
+#include "phy.h"
+#include "reg.h"
+#include "rtw8922a.h"
+#include "rtw8922a_rfk.h"
+
+static void rtw8922a_tssi_cont_en(struct rtw89_dev *rtwdev, bool en,
+ enum rtw89_rf_path path)
+{
+ static const u32 tssi_trk_man[2] = {R_TSSI_PWR_P0, R_TSSI_PWR_P1};
+
+ if (en)
+ rtw89_phy_write32_mask(rtwdev, tssi_trk_man[path], B_TSSI_CONT_EN, 0);
+ else
+ rtw89_phy_write32_mask(rtwdev, tssi_trk_man[path], B_TSSI_CONT_EN, 1);
+}
+
+void rtw8922a_tssi_cont_en_phyidx(struct rtw89_dev *rtwdev, bool en, u8 phy_idx)
+{
+ if (rtwdev->mlo_dbcc_mode == MLO_1_PLUS_1_1RF) {
+ if (phy_idx == RTW89_PHY_0)
+ rtw8922a_tssi_cont_en(rtwdev, en, RF_PATH_A);
+ else
+ rtw8922a_tssi_cont_en(rtwdev, en, RF_PATH_B);
+ } else {
+ rtw8922a_tssi_cont_en(rtwdev, en, RF_PATH_A);
+ rtw8922a_tssi_cont_en(rtwdev, en, RF_PATH_B);
+ }
+}
+
+static
+void rtw8922a_ctl_band_ch_bw(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy,
+ u8 central_ch, enum rtw89_band band,
+ enum rtw89_bandwidth bw)
+{
+ const u32 rf_addr[2] = {RR_CFGCH, RR_CFGCH_V1};
+ struct rtw89_hal *hal = &rtwdev->hal;
+ u32 rf_reg[RF_PATH_NUM_8922A][2];
+ u8 synpath;
+ u32 rf18;
+ u8 kpath;
+ u8 path;
+ u8 i;
+
+ rf_reg[RF_PATH_A][0] = rtw89_read_rf(rtwdev, RF_PATH_A, rf_addr[0], RFREG_MASK);
+ rf_reg[RF_PATH_A][1] = rtw89_read_rf(rtwdev, RF_PATH_A, rf_addr[1], RFREG_MASK);
+ rf_reg[RF_PATH_B][0] = rtw89_read_rf(rtwdev, RF_PATH_B, rf_addr[0], RFREG_MASK);
+ rf_reg[RF_PATH_B][1] = rtw89_read_rf(rtwdev, RF_PATH_B, rf_addr[1], RFREG_MASK);
+
+ kpath = rtw89_phy_get_kpath(rtwdev, phy);
+ synpath = rtw89_phy_get_syn_sel(rtwdev, phy);
+
+ rf18 = rtw89_read_rf(rtwdev, synpath, RR_CFGCH, RFREG_MASK);
+ if (rf18 == INV_RF_DATA) {
+ rtw89_warn(rtwdev, "[RFK] Invalid RF18 value\n");
+ return;
+ }
+
+ for (path = 0; path < RF_PATH_NUM_8922A; path++) {
+ if (!(kpath & BIT(path)))
+ continue;
+
+ for (i = 0; i < 2; i++) {
+ if (rf_reg[path][i] == INV_RF_DATA) {
+ rtw89_warn(rtwdev,
+ "[RFK] Invalid RF_0x18 for Path-%d\n", path);
+ return;
+ }
+
+ rf_reg[path][i] &= ~(RR_CFGCH_BAND1 | RR_CFGCH_BW |
+ RR_CFGCH_BAND0 | RR_CFGCH_CH);
+ rf_reg[path][i] |= u32_encode_bits(central_ch, RR_CFGCH_CH);
+
+ if (band == RTW89_BAND_2G)
+ rtw89_write_rf(rtwdev, path, RR_SMD, RR_VCO2, 0x0);
+ else
+ rtw89_write_rf(rtwdev, path, RR_SMD, RR_VCO2, 0x1);
+
+ switch (band) {
+ case RTW89_BAND_2G:
+ default:
+ break;
+ case RTW89_BAND_5G:
+ rf_reg[path][i] |=
+ u32_encode_bits(CFGCH_BAND1_5G, RR_CFGCH_BAND1) |
+ u32_encode_bits(CFGCH_BAND0_5G, RR_CFGCH_BAND0);
+ break;
+ case RTW89_BAND_6G:
+ rf_reg[path][i] |=
+ u32_encode_bits(CFGCH_BAND1_6G, RR_CFGCH_BAND1) |
+ u32_encode_bits(CFGCH_BAND0_6G, RR_CFGCH_BAND0);
+ break;
+ }
+
+ switch (bw) {
+ case RTW89_CHANNEL_WIDTH_5:
+ case RTW89_CHANNEL_WIDTH_10:
+ case RTW89_CHANNEL_WIDTH_20:
+ default:
+ break;
+ case RTW89_CHANNEL_WIDTH_40:
+ rf_reg[path][i] |=
+ u32_encode_bits(CFGCH_BW_V2_40M, RR_CFGCH_BW_V2);
+ break;
+ case RTW89_CHANNEL_WIDTH_80:
+ rf_reg[path][i] |=
+ u32_encode_bits(CFGCH_BW_V2_80M, RR_CFGCH_BW_V2);
+ break;
+ case RTW89_CHANNEL_WIDTH_160:
+ rf_reg[path][i] |=
+ u32_encode_bits(CFGCH_BW_V2_160M, RR_CFGCH_BW_V2);
+ break;
+ case RTW89_CHANNEL_WIDTH_320:
+ rf_reg[path][i] |=
+ u32_encode_bits(CFGCH_BW_V2_320M, RR_CFGCH_BW_V2);
+ break;
+ }
+
+ rtw89_write_rf(rtwdev, path, rf_addr[i],
+ RFREG_MASK, rf_reg[path][i]);
+ fsleep(100);
+ }
+ }
+
+ if (hal->cv != CHIP_CAV)
+ return;
+
+ if (band == RTW89_BAND_2G) {
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWE, RFREG_MASK, 0x80000);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWA, RFREG_MASK, 0x00003);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWD1, RFREG_MASK, 0x0c990);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWD0, RFREG_MASK, 0xebe38);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWE, RFREG_MASK, 0x00000);
+ } else {
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWE, RFREG_MASK, 0x80000);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWA, RFREG_MASK, 0x00003);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWD1, RFREG_MASK, 0x0c190);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWD0, RFREG_MASK, 0xebe38);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_LUTWE, RFREG_MASK, 0x00000);
+ }
+}
+
+void rtw8922a_set_channel_rf(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_phy_idx phy_idx)
+{
+ rtw8922a_ctl_band_ch_bw(rtwdev, phy_idx, chan->channel, chan->band_type,
+ chan->band_width);
+}
+
+enum _rf_syn_pow {
+ RF_SYN_ON_OFF,
+ RF_SYN_OFF_ON,
+ RF_SYN_ALLON,
+ RF_SYN_ALLOFF,
+};
+
+static void rtw8922a_set_syn01_cav(struct rtw89_dev *rtwdev, enum _rf_syn_pow syn)
+{
+ if (syn == RF_SYN_ALLON) {
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN, 0x3);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN, 0x2);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN, 0x3);
+
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN, 0x3);
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN, 0x2);
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN, 0x3);
+ } else if (syn == RF_SYN_ON_OFF) {
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN, 0x3);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN, 0x2);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN, 0x3);
+
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN, 0x0);
+ } else if (syn == RF_SYN_OFF_ON) {
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN, 0x0);
+
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN, 0x3);
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN, 0x2);
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN, 0x3);
+ } else if (syn == RF_SYN_ALLOFF) {
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN, 0x0);
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN, 0x0);
+ }
+}
+
+static void rtw8922a_set_syn01_cbv(struct rtw89_dev *rtwdev, enum _rf_syn_pow syn)
+{
+ if (syn == RF_SYN_ALLON) {
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN_V1, 0xf);
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN_V1, 0xf);
+ } else if (syn == RF_SYN_ON_OFF) {
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN_V1, 0xf);
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN_V1, 0x0);
+ } else if (syn == RF_SYN_OFF_ON) {
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN_V1, 0x0);
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN_V1, 0xf);
+ } else if (syn == RF_SYN_ALLOFF) {
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN_V1, 0x0);
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN_V1, 0x0);
+ }
+}
+
+static void rtw8922a_set_syn01(struct rtw89_dev *rtwdev, enum _rf_syn_pow syn)
+{
+ struct rtw89_hal *hal = &rtwdev->hal;
+
+ rtw89_debug(rtwdev, RTW89_DBG_RFK, "SYN config=%d\n", syn);
+
+ if (hal->cv == CHIP_CAV)
+ rtw8922a_set_syn01_cav(rtwdev, syn);
+ else
+ rtw8922a_set_syn01_cbv(rtwdev, syn);
+}
+
+static void rtw8922a_chlk_ktbl_sel(struct rtw89_dev *rtwdev, u8 kpath, u8 idx)
+{
+ u32 tmp;
+
+ if (idx > 2) {
+ rtw89_warn(rtwdev, "[DBCC][ERROR]indx is out of limit!! index(%d)", idx);
+ return;
+ }
+
+ if (kpath & RF_A) {
+ rtw89_phy_write32_mask(rtwdev, R_COEF_SEL, B_COEF_SEL_EN, 0x1);
+ rtw89_phy_write32_mask(rtwdev, R_COEF_SEL, B_COEF_SEL_IQC_V1, idx);
+ rtw89_phy_write32_mask(rtwdev, R_COEF_SEL, B_COEF_SEL_MDPD_V1, idx);
+ rtw89_write_rf(rtwdev, RF_PATH_A, RR_MODOPT, RR_TXG_SEL, 0x4 | idx);
+
+ tmp = rtw89_phy_read32_mask(rtwdev, R_COEF_SEL, BIT(0));
+ rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G3, tmp);
+ tmp = rtw89_phy_read32_mask(rtwdev, R_COEF_SEL, BIT(1));
+ rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT, B_CFIR_LUT_G5, tmp);
+ }
+
+ if (kpath & RF_B) {
+ rtw89_phy_write32_mask(rtwdev, R_COEF_SEL_C1, B_COEF_SEL_EN, 0x1);
+ rtw89_phy_write32_mask(rtwdev, R_COEF_SEL_C1, B_COEF_SEL_IQC_V1, idx);
+ rtw89_phy_write32_mask(rtwdev, R_COEF_SEL_C1, B_COEF_SEL_MDPD_V1, idx);
+ rtw89_write_rf(rtwdev, RF_PATH_B, RR_MODOPT, RR_TXG_SEL, 0x4 | idx);
+
+ tmp = rtw89_phy_read32_mask(rtwdev, R_COEF_SEL_C1, BIT(0));
+ rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT_C1, B_CFIR_LUT_G3, tmp);
+ tmp = rtw89_phy_read32_mask(rtwdev, R_COEF_SEL_C1, BIT(1));
+ rtw89_phy_write32_mask(rtwdev, R_CFIR_LUT_C1, B_CFIR_LUT_G5, tmp);
+ }
+}
+
+static void rtw8922a_chlk_reload(struct rtw89_dev *rtwdev)
+{
+ struct rtw89_rfk_mcc_info *rfk_mcc = &rtwdev->rfk_mcc;
+ enum rtw89_sub_entity_idx sub_entity_idx;
+ const struct rtw89_chan *chan;
+ enum rtw89_entity_mode mode;
+ u8 s0_tbl, s1_tbl;
+ u8 tbl_sel;
+
+ mode = rtw89_get_entity_mode(rtwdev);
+ switch (mode) {
+ case RTW89_ENTITY_MODE_MCC_PREPARE:
+ sub_entity_idx = RTW89_SUB_ENTITY_1;
+ tbl_sel = 1;
+ break;
+ default:
+ sub_entity_idx = RTW89_SUB_ENTITY_0;
+ tbl_sel = 0;
+ break;
+ }
+
+ chan = rtw89_chan_get(rtwdev, sub_entity_idx);
+
+ rfk_mcc->ch[tbl_sel] = chan->channel;
+ rfk_mcc->band[tbl_sel] = chan->band_type;
+ rfk_mcc->bw[tbl_sel] = chan->band_width;
+ rfk_mcc->table_idx = tbl_sel;
+
+ s0_tbl = tbl_sel;
+ s1_tbl = tbl_sel;
+
+ rtw8922a_chlk_ktbl_sel(rtwdev, RF_A, s0_tbl);
+ rtw8922a_chlk_ktbl_sel(rtwdev, RF_B, s1_tbl);
+}
+
+static void rtw8922a_rfk_mlo_ctrl(struct rtw89_dev *rtwdev)
+{
+ enum _rf_syn_pow syn_pow;
+
+ if (!rtwdev->dbcc_en)
+ goto set_rfk_reload;
+
+ switch (rtwdev->mlo_dbcc_mode) {
+ case MLO_0_PLUS_2_1RF:
+ syn_pow = RF_SYN_OFF_ON;
+ break;
+ case MLO_0_PLUS_2_2RF:
+ case MLO_1_PLUS_1_2RF:
+ case MLO_2_PLUS_0_1RF:
+ case MLO_2_PLUS_0_2RF:
+ case MLO_2_PLUS_2_2RF:
+ case MLO_DBCC_NOT_SUPPORT:
+ default:
+ syn_pow = RF_SYN_ON_OFF;
+ break;
+ case MLO_1_PLUS_1_1RF:
+ case DBCC_LEGACY:
+ syn_pow = RF_SYN_ALLON;
+ break;
+ }
+
+ rtw8922a_set_syn01(rtwdev, syn_pow);
+
+set_rfk_reload:
+ rtw8922a_chlk_reload(rtwdev);
+}
+
+static void rtw8922a_rfk_pll_init(struct rtw89_dev *rtwdev)
+{
+ int ret;
+ u8 tmp;
+
+ ret = rtw89_mac_read_xtal_si(rtwdev, XTAL_SI_PLL_1, &tmp);
+ if (ret)
+ return;
+ ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_PLL_1, tmp | 0xf8, 0xFF);
+ if (ret)
+ return;
+
+ ret = rtw89_mac_read_xtal_si(rtwdev, XTAL_SI_APBT, &tmp);
+ if (ret)
+ return;
+ ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_APBT, tmp & ~0x60, 0xFF);
+ if (ret)
+ return;
+
+ ret = rtw89_mac_read_xtal_si(rtwdev, XTAL_SI_XTAL_PLL, &tmp);
+ if (ret)
+ return;
+ ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_XTAL_PLL, tmp | 0x38, 0xFF);
+ if (ret)
+ return;
+}
+
+void rtw8922a_rfk_hw_init(struct rtw89_dev *rtwdev)
+{
+ if (rtwdev->dbcc_en)
+ rtw8922a_rfk_mlo_ctrl(rtwdev);
+
+ rtw8922a_rfk_pll_init(rtwdev);
+}
+
+void rtw8922a_pre_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+ bool mlo_1_1;
+
+ if (!rtwdev->dbcc_en)
+ return;
+
+ mlo_1_1 = rtw89_is_mlo_1_1(rtwdev);
+ if (mlo_1_1)
+ rtw8922a_set_syn01(rtwdev, RF_SYN_ALLON);
+ else if (phy_idx == RTW89_PHY_0)
+ rtw8922a_set_syn01(rtwdev, RF_SYN_ON_OFF);
+ else
+ rtw8922a_set_syn01(rtwdev, RF_SYN_OFF_ON);
+
+ fsleep(1000);
+}
+
+void rtw8922a_post_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx)
+{
+ rtw8922a_rfk_mlo_ctrl(rtwdev);
+}
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a_rfk.h b/drivers/net/wireless/realtek/rtw89/rtw8922a_rfk.h
new file mode 100644
index 000000000000..66bdd57c1eea
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw89/rtw8922a_rfk.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright(c) 2023 Realtek Corporation
+ */
+
+#ifndef __RTW89_8922A_RFK_H__
+#define __RTW89_8922A_RFK_H__
+
+#include "core.h"
+
+void rtw8922a_tssi_cont_en_phyidx(struct rtw89_dev *rtwdev, bool en, u8 phy_idx);
+void rtw8922a_set_channel_rf(struct rtw89_dev *rtwdev,
+ const struct rtw89_chan *chan,
+ enum rtw89_phy_idx phy_idx);
+void rtw8922a_rfk_hw_init(struct rtw89_dev *rtwdev);
+void rtw8922a_pre_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx);
+void rtw8922a_post_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx);
+
+#endif
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922ae.c b/drivers/net/wireless/realtek/rtw89/rtw8922ae.c
index 7b3d98d2c402..4981b657bd7b 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8922ae.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8922ae.c
@@ -26,6 +26,7 @@ static const struct rtw89_pci_info rtw8922a_pci_info = {
.io_rcy_en = MAC_AX_PCIE_ENABLE,
.io_rcy_tmr = MAC_AX_IO_RCY_ANA_TMR_DEF,
.rx_ring_eq_is_full = true,
+ .check_rx_tag = true,
.init_cfg_reg = R_BE_HAXI_INIT_CFG1,
.txhci_en_bit = B_BE_TXDMA_EN,
@@ -79,7 +80,7 @@ static struct pci_driver rtw89_8922ae_driver = {
.id_table = rtw89_8922ae_id_table,
.probe = rtw89_pci_probe,
.remove = rtw89_pci_remove,
- .driver.pm = &rtw89_pm_ops,
+ .driver.pm = &rtw89_pm_ops_be,
};
module_pci_driver(rtw89_8922ae_driver);
diff --git a/drivers/net/wireless/realtek/rtw89/wow.c b/drivers/net/wireless/realtek/rtw89/wow.c
index 5c7ca36c09b6..ccad026defb5 100644
--- a/drivers/net/wireless/realtek/rtw89/wow.c
+++ b/drivers/net/wireless/realtek/rtw89/wow.c
@@ -41,34 +41,8 @@ static void rtw89_wow_leave_lps(struct rtw89_dev *rtwdev)
static int rtw89_wow_config_mac(struct rtw89_dev *rtwdev, bool enable_wow)
{
const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
- int ret;
-
- if (enable_wow) {
- ret = rtw89_mac_resize_ple_rx_quota(rtwdev, true);
- if (ret) {
- rtw89_err(rtwdev, "[ERR]patch rx qta %d\n", ret);
- return ret;
- }
- rtw89_write32_set(rtwdev, R_AX_RX_FUNCTION_STOP, B_AX_HDR_RX_STOP);
- rtw89_write32_clr(rtwdev, mac->rx_fltr, B_AX_SNIFFER_MODE);
- rtw89_mac_cfg_ppdu_status(rtwdev, RTW89_MAC_0, false);
- rtw89_write32(rtwdev, R_AX_ACTION_FWD0, 0);
- rtw89_write32(rtwdev, R_AX_ACTION_FWD1, 0);
- rtw89_write32(rtwdev, R_AX_TF_FWD, 0);
- rtw89_write32(rtwdev, R_AX_HW_RPT_FWD, 0);
- } else {
- ret = rtw89_mac_resize_ple_rx_quota(rtwdev, false);
- if (ret) {
- rtw89_err(rtwdev, "[ERR]patch rx qta %d\n", ret);
- return ret;
- }
- rtw89_write32_clr(rtwdev, R_AX_RX_FUNCTION_STOP, B_AX_HDR_RX_STOP);
- rtw89_mac_cfg_ppdu_status(rtwdev, RTW89_MAC_0, true);
- rtw89_write32(rtwdev, R_AX_ACTION_FWD0, TRXCFG_MPDU_PROC_ACT_FRWD);
- rtw89_write32(rtwdev, R_AX_TF_FWD, TRXCFG_MPDU_PROC_TF_FRWD);
- }
- return 0;
+ return mac->wow_config_mac(rtwdev, enable_wow);
}
static void rtw89_wow_set_rx_filter(struct rtw89_dev *rtwdev, bool enable)
@@ -85,21 +59,14 @@ static void rtw89_wow_set_rx_filter(struct rtw89_dev *rtwdev, bool enable)
static void rtw89_wow_show_wakeup_reason(struct rtw89_dev *rtwdev)
{
- enum rtw89_core_chip_id chip_id = rtwdev->chip->chip_id;
+ u32 wow_reason_reg = rtwdev->chip->wow_reason_reg;
struct cfg80211_wowlan_nd_info nd_info;
struct cfg80211_wowlan_wakeup wakeup = {
.pattern_idx = -1,
};
- u32 wow_reason_reg;
u8 reason;
- if (chip_id == RTL8852A || chip_id == RTL8852B || chip_id == RTL8851B)
- wow_reason_reg = R_AX_C2HREG_DATA3 + 3;
- else
- wow_reason_reg = R_AX_C2HREG_DATA3_V1 + 3;
-
reason = rtw89_read8(rtwdev, wow_reason_reg);
-
switch (reason) {
case RTW89_WOW_RSN_RX_DEAUTH:
wakeup.disconnect = true;
@@ -470,13 +437,14 @@ static int rtw89_wow_cfg_wake(struct rtw89_dev *rtwdev, bool wow)
static int rtw89_wow_check_fw_status(struct rtw89_dev *rtwdev, bool wow_enable)
{
+ const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def;
u8 polling;
int ret;
ret = read_poll_timeout_atomic(rtw89_read8_mask, polling,
wow_enable == !!polling,
50, 50000, false, rtwdev,
- R_AX_WOW_CTRL, B_AX_WOW_WOWEN);
+ mac->wow_ctrl.addr, mac->wow_ctrl.mask);
if (ret)
rtw89_err(rtwdev, "failed to check wow status %s\n",
wow_enable ? "enabled" : "disabled");
@@ -519,7 +487,7 @@ static int rtw89_wow_swap_fw(struct rtw89_dev *rtwdev, bool wow)
return ret;
}
- ret = rtw89_fw_h2c_assoc_cmac_tbl(rtwdev, wow_vif, wow_sta);
+ ret = rtw89_chip_h2c_assoc_cmac_tbl(rtwdev, wow_vif, wow_sta);
if (ret) {
rtw89_warn(rtwdev, "failed to send h2c assoc cmac tbl\n");
return ret;
@@ -566,7 +534,7 @@ static int rtw89_wow_enable_trx_pre(struct rtw89_dev *rtwdev)
rtw89_mac_ptk_drop_by_band_and_wait(rtwdev, RTW89_MAC_0);
- ret = rtw89_hci_poll_txdma_ch(rtwdev);
+ ret = rtw89_hci_poll_txdma_ch_idle(rtwdev);
if (ret) {
rtw89_err(rtwdev, "txdma ch busy\n");
return ret;
@@ -589,7 +557,7 @@ static int rtw89_wow_enable_trx_post(struct rtw89_dev *rtwdev)
rtw89_hci_disable_intr(rtwdev);
rtw89_hci_ctrl_trxhci(rtwdev, false);
- ret = rtw89_hci_poll_txdma_ch(rtwdev);
+ ret = rtw89_hci_poll_txdma_ch_idle(rtwdev);
if (ret) {
rtw89_err(rtwdev, "failed to poll txdma ch idle pcie\n");
return ret;
@@ -699,14 +667,14 @@ static int rtw89_wow_fw_stop(struct rtw89_dev *rtwdev)
goto out;
}
+ rtw89_fw_release_general_pkt_list(rtwdev, true);
+
ret = rtw89_wow_cfg_wake(rtwdev, false);
if (ret) {
rtw89_err(rtwdev, "wow: failed to disable config wake\n");
goto out;
}
- rtw89_fw_release_general_pkt_list(rtwdev, true);
-
ret = rtw89_wow_check_fw_status(rtwdev, false);
if (ret) {
rtw89_err(rtwdev, "wow: failed to check disable fw ready\n");
diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index 05890536e353..211fa25b9a78 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -740,7 +740,7 @@ u16 rsi_get_connected_channel(struct ieee80211_vif *vif)
return 0;
bss = &vif->bss_conf;
- channel = bss->chandef.chan;
+ channel = bss->chanreq.oper.chan;
if (!channel)
return 0;
@@ -759,7 +759,7 @@ static void rsi_switch_channel(struct rsi_hw *adapter,
if (!vif)
return;
- channel = vif->bss_conf.chandef.chan;
+ channel = vif->bss_conf.chanreq.oper.chan;
if (!channel)
return;
@@ -1957,6 +1957,10 @@ static int rsi_mac80211_resume(struct ieee80211_hw *hw)
#endif
static const struct ieee80211_ops mac80211_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = rsi_mac80211_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = rsi_mac80211_start,
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index 10a465686439..dccc139cabb2 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -232,17 +232,17 @@ static int rsi_usb_reg_write(struct usb_device *usbdev,
if (!usb_reg_buf)
return status;
- usb_reg_buf[0] = (cpu_to_le32(value) & 0x00ff);
- usb_reg_buf[1] = (cpu_to_le32(value) & 0xff00) >> 8;
- usb_reg_buf[2] = (cpu_to_le32(value) & 0x00ff0000) >> 16;
- usb_reg_buf[3] = (cpu_to_le32(value) & 0xff000000) >> 24;
+ usb_reg_buf[0] = value & 0x00ff;
+ usb_reg_buf[1] = (value & 0xff00) >> 8;
+ usb_reg_buf[2] = (value & 0x00ff0000) >> 16;
+ usb_reg_buf[3] = (value & 0xff000000) >> 24;
status = usb_control_msg(usbdev,
usb_sndctrlpipe(usbdev, 0),
USB_VENDOR_REGISTER_WRITE,
RSI_USB_REQ_OUT,
- ((cpu_to_le32(reg) & 0xffff0000) >> 16),
- (cpu_to_le32(reg) & 0xffff),
+ (reg & 0xffff0000) >> 16,
+ reg & 0xffff,
(void *)usb_reg_buf,
len,
USB_CTRL_SET_TIMEOUT);
diff --git a/drivers/net/wireless/silabs/wfx/sta.c b/drivers/net/wireless/silabs/wfx/sta.c
index 537caf9d914a..a904602f02ce 100644
--- a/drivers/net/wireless/silabs/wfx/sta.c
+++ b/drivers/net/wireless/silabs/wfx/sta.c
@@ -144,13 +144,13 @@ static int wfx_get_ps_timeout(struct wfx_vif *wvif, bool *enable_ps)
struct wfx_vif *wvif_ch0 = wdev_to_wvif(wvif->wdev, 0);
struct ieee80211_vif *vif_ch0 = wvif_to_vif(wvif_ch0);
- chan0 = vif_ch0->bss_conf.chandef.chan;
+ chan0 = vif_ch0->bss_conf.chanreq.oper.chan;
}
if (wdev_to_wvif(wvif->wdev, 1)) {
struct wfx_vif *wvif_ch1 = wdev_to_wvif(wvif->wdev, 1);
struct ieee80211_vif *vif_ch1 = wvif_to_vif(wvif_ch1);
- chan1 = vif_ch1->bss_conf.chandef.chan;
+ chan1 = vif_ch1->bss_conf.chanreq.oper.chan;
}
if (chan0 && chan1 && vif->type != NL80211_IFTYPE_AP) {
if (chan0->hw_value == chan1->hw_value) {
@@ -344,6 +344,7 @@ static int wfx_set_mfp_ap(struct wfx_vif *wvif)
const int pairwise_cipher_suite_count_offset = 8 / sizeof(u16);
const int pairwise_cipher_suite_size = 4 / sizeof(u16);
const int akm_suite_size = 4 / sizeof(u16);
+ int ret = -EINVAL;
const u16 *ptr;
if (unlikely(!skb))
@@ -352,22 +353,26 @@ static int wfx_set_mfp_ap(struct wfx_vif *wvif)
ptr = (u16 *)cfg80211_find_ie(WLAN_EID_RSN, skb->data + ieoffset,
skb->len - ieoffset);
if (unlikely(!ptr))
- return -EINVAL;
+ goto free_skb;
ptr += pairwise_cipher_suite_count_offset;
if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb)))
- return -EINVAL;
+ goto free_skb;
ptr += 1 + pairwise_cipher_suite_size * *ptr;
if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb)))
- return -EINVAL;
+ goto free_skb;
ptr += 1 + akm_suite_size * *ptr;
if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb)))
- return -EINVAL;
+ goto free_skb;
wfx_hif_set_mfp(wvif, *ptr & BIT(7), *ptr & BIT(6));
- return 0;
+ ret = 0;
+
+free_skb:
+ dev_kfree_skb(skb);
+ return ret;
}
int wfx_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/st/cw1200/cw1200_sdio.c b/drivers/net/wireless/st/cw1200/cw1200_sdio.c
index 4c30b5772ce0..00c4731d8f8e 100644
--- a/drivers/net/wireless/st/cw1200/cw1200_sdio.c
+++ b/drivers/net/wireless/st/cw1200/cw1200_sdio.c
@@ -8,7 +8,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/delay.h>
#include <linux/mmc/host.h>
#include <linux/mmc/sdio_func.h>
@@ -178,12 +178,15 @@ static int cw1200_sdio_irq_unsubscribe(struct hwbus_priv *self)
return ret;
}
+/* Like the rest of the driver, this only supports one device per system */
+static struct gpio_desc *cw1200_reset;
+static struct gpio_desc *cw1200_powerup;
+
static int cw1200_sdio_off(const struct cw1200_platform_data_sdio *pdata)
{
- if (pdata->reset) {
- gpio_set_value(pdata->reset, 0);
+ if (cw1200_reset) {
+ gpiod_set_value(cw1200_reset, 0);
msleep(30); /* Min is 2 * CLK32K cycles */
- gpio_free(pdata->reset);
}
if (pdata->power_ctrl)
@@ -196,16 +199,21 @@ static int cw1200_sdio_off(const struct cw1200_platform_data_sdio *pdata)
static int cw1200_sdio_on(const struct cw1200_platform_data_sdio *pdata)
{
- /* Ensure I/Os are pulled low */
- if (pdata->reset) {
- gpio_request(pdata->reset, "cw1200_wlan_reset");
- gpio_direction_output(pdata->reset, 0);
+ /* Ensure I/Os are pulled low (reset is active low) */
+ cw1200_reset = devm_gpiod_get_optional(NULL, "reset", GPIOD_OUT_HIGH);
+ if (IS_ERR(cw1200_reset)) {
+ pr_err("could not get CW1200 SDIO reset GPIO\n");
+ return PTR_ERR(cw1200_reset);
}
- if (pdata->powerup) {
- gpio_request(pdata->powerup, "cw1200_wlan_powerup");
- gpio_direction_output(pdata->powerup, 0);
+ gpiod_set_consumer_name(cw1200_reset, "cw1200_wlan_reset");
+ cw1200_powerup = devm_gpiod_get_optional(NULL, "powerup", GPIOD_OUT_LOW);
+ if (IS_ERR(cw1200_powerup)) {
+ pr_err("could not get CW1200 SDIO powerup GPIO\n");
+ return PTR_ERR(cw1200_powerup);
}
- if (pdata->reset || pdata->powerup)
+ gpiod_set_consumer_name(cw1200_powerup, "cw1200_wlan_powerup");
+
+ if (cw1200_reset || cw1200_powerup)
msleep(10); /* Settle time? */
/* Enable 3v3 and 1v8 to hardware */
@@ -226,13 +234,13 @@ static int cw1200_sdio_on(const struct cw1200_platform_data_sdio *pdata)
}
/* Enable POWERUP signal */
- if (pdata->powerup) {
- gpio_set_value(pdata->powerup, 1);
+ if (cw1200_powerup) {
+ gpiod_set_value(cw1200_powerup, 1);
msleep(250); /* or more..? */
}
- /* Enable RSTn signal */
- if (pdata->reset) {
- gpio_set_value(pdata->reset, 1);
+ /* Deassert RSTn signal, note active low */
+ if (cw1200_reset) {
+ gpiod_set_value(cw1200_reset, 0);
msleep(50); /* Or more..? */
}
return 0;
diff --git a/drivers/net/wireless/st/cw1200/cw1200_spi.c b/drivers/net/wireless/st/cw1200/cw1200_spi.c
index c82c0688b549..4f346fb977a9 100644
--- a/drivers/net/wireless/st/cw1200/cw1200_spi.c
+++ b/drivers/net/wireless/st/cw1200/cw1200_spi.c
@@ -11,7 +11,7 @@
*/
#include <linux/module.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
@@ -38,6 +38,8 @@ struct hwbus_priv {
const struct cw1200_platform_data_spi *pdata;
spinlock_t lock; /* Serialize all bus operations */
wait_queue_head_t wq;
+ struct gpio_desc *reset;
+ struct gpio_desc *powerup;
int claimed;
};
@@ -80,7 +82,7 @@ static int cw1200_spi_memcpy_fromio(struct hwbus_priv *self,
#endif
/* Header is LE16 */
- regaddr = cpu_to_le16(regaddr);
+ regaddr = (__force u16)cpu_to_le16(regaddr);
/* We have to byteswap if the SPI bus is limited to 8b operation
or we are running on a Big Endian system
@@ -145,7 +147,7 @@ static int cw1200_spi_memcpy_toio(struct hwbus_priv *self,
#endif
/* Header is LE16 */
- regaddr = cpu_to_le16(regaddr);
+ regaddr = (__force u16)cpu_to_le16(regaddr);
/* We have to byteswap if the SPI bus is limited to 8b operation
or we are running on a Big Endian system
@@ -275,12 +277,12 @@ static void cw1200_spi_irq_unsubscribe(struct hwbus_priv *self)
free_irq(self->func->irq, self);
}
-static int cw1200_spi_off(const struct cw1200_platform_data_spi *pdata)
+static int cw1200_spi_off(struct hwbus_priv *self, const struct cw1200_platform_data_spi *pdata)
{
- if (pdata->reset) {
- gpio_set_value(pdata->reset, 0);
+ if (self->reset) {
+ /* Assert RESET, note active low */
+ gpiod_set_value(self->reset, 1);
msleep(30); /* Min is 2 * CLK32K cycles */
- gpio_free(pdata->reset);
}
if (pdata->power_ctrl)
@@ -291,18 +293,12 @@ static int cw1200_spi_off(const struct cw1200_platform_data_spi *pdata)
return 0;
}
-static int cw1200_spi_on(const struct cw1200_platform_data_spi *pdata)
+static int cw1200_spi_on(struct hwbus_priv *self, const struct cw1200_platform_data_spi *pdata)
{
/* Ensure I/Os are pulled low */
- if (pdata->reset) {
- gpio_request(pdata->reset, "cw1200_wlan_reset");
- gpio_direction_output(pdata->reset, 0);
- }
- if (pdata->powerup) {
- gpio_request(pdata->powerup, "cw1200_wlan_powerup");
- gpio_direction_output(pdata->powerup, 0);
- }
- if (pdata->reset || pdata->powerup)
+ gpiod_direction_output(self->reset, 1); /* Active low */
+ gpiod_direction_output(self->powerup, 0);
+ if (self->reset || self->powerup)
msleep(10); /* Settle time? */
/* Enable 3v3 and 1v8 to hardware */
@@ -323,13 +319,13 @@ static int cw1200_spi_on(const struct cw1200_platform_data_spi *pdata)
}
/* Enable POWERUP signal */
- if (pdata->powerup) {
- gpio_set_value(pdata->powerup, 1);
+ if (self->powerup) {
+ gpiod_set_value(self->powerup, 1);
msleep(250); /* or more..? */
}
- /* Enable RSTn signal */
- if (pdata->reset) {
- gpio_set_value(pdata->reset, 1);
+ /* Assert RSTn signal, note active low */
+ if (self->reset) {
+ gpiod_set_value(self->reset, 0);
msleep(50); /* Or more..? */
}
return 0;
@@ -381,20 +377,33 @@ static int cw1200_spi_probe(struct spi_device *func)
spi_get_chipselect(func, 0), func->mode, func->bits_per_word,
func->max_speed_hz);
- if (cw1200_spi_on(plat_data)) {
+ self = devm_kzalloc(&func->dev, sizeof(*self), GFP_KERNEL);
+ if (!self) {
+ pr_err("Can't allocate SPI hwbus_priv.");
+ return -ENOMEM;
+ }
+
+ /* Request reset asserted */
+ self->reset = devm_gpiod_get_optional(&func->dev, "reset", GPIOD_OUT_HIGH);
+ if (IS_ERR(self->reset))
+ return dev_err_probe(&func->dev, PTR_ERR(self->reset),
+ "could not get reset GPIO\n");
+ gpiod_set_consumer_name(self->reset, "cw1200_wlan_reset");
+
+ self->powerup = devm_gpiod_get_optional(&func->dev, "powerup", GPIOD_OUT_LOW);
+ if (IS_ERR(self->powerup))
+ return dev_err_probe(&func->dev, PTR_ERR(self->powerup),
+ "could not get powerup GPIO\n");
+ gpiod_set_consumer_name(self->reset, "cw1200_wlan_powerup");
+
+ if (cw1200_spi_on(self, plat_data)) {
pr_err("spi_on() failed!\n");
- return -1;
+ return -ENODEV;
}
if (spi_setup(func)) {
pr_err("spi_setup() failed!\n");
- return -1;
- }
-
- self = devm_kzalloc(&func->dev, sizeof(*self), GFP_KERNEL);
- if (!self) {
- pr_err("Can't allocate SPI hwbus_priv.");
- return -ENOMEM;
+ return -ENODEV;
}
self->pdata = plat_data;
@@ -416,7 +425,7 @@ static int cw1200_spi_probe(struct spi_device *func)
if (status) {
cw1200_spi_irq_unsubscribe(self);
- cw1200_spi_off(plat_data);
+ cw1200_spi_off(self, plat_data);
}
return status;
@@ -434,7 +443,7 @@ static void cw1200_spi_disconnect(struct spi_device *func)
self->core = NULL;
}
}
- cw1200_spi_off(dev_get_platdata(&func->dev));
+ cw1200_spi_off(self, dev_get_platdata(&func->dev));
}
static int __maybe_unused cw1200_spi_suspend(struct device *dev)
diff --git a/drivers/net/wireless/st/cw1200/main.c b/drivers/net/wireless/st/cw1200/main.c
index 381013e0db63..a54a7b86864f 100644
--- a/drivers/net/wireless/st/cw1200/main.c
+++ b/drivers/net/wireless/st/cw1200/main.c
@@ -203,6 +203,10 @@ static const unsigned long cw1200_ttl[] = {
};
static const struct ieee80211_ops cw1200_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.start = cw1200_start,
.stop = cw1200_stop,
.add_interface = cw1200_add_interface,
diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index cd9a41f59f32..0da2d29dd7bd 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -1351,6 +1351,10 @@ static struct ieee80211_supported_band wl1251_band_2ghz = {
};
static const struct ieee80211_ops wl1251_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.start = wl1251_op_start,
.stop = wl1251_op_stop,
.add_interface = wl1251_op_add_interface,
diff --git a/drivers/net/wireless/ti/wl1251/sdio.c b/drivers/net/wireless/ti/wl1251/sdio.c
index 301bd0043a43..4e5b351f80f0 100644
--- a/drivers/net/wireless/ti/wl1251/sdio.c
+++ b/drivers/net/wireless/ti/wl1251/sdio.c
@@ -343,5 +343,6 @@ static void __exit wl1251_sdio_exit(void)
module_init(wl1251_sdio_init);
module_exit(wl1251_sdio_exit);
+MODULE_DESCRIPTION("TI WL1251 SDIO helpers");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kalle Valo <kvalo@adurom.com>");
diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c
index 29292f06bd3d..1936bb3af54a 100644
--- a/drivers/net/wireless/ti/wl1251/spi.c
+++ b/drivers/net/wireless/ti/wl1251/spi.c
@@ -342,6 +342,7 @@ static struct spi_driver wl1251_spi_driver = {
module_spi_driver(wl1251_spi_driver);
+MODULE_DESCRIPTION("TI WL1251 SPI helpers");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kalle Valo <kvalo@adurom.com>");
MODULE_ALIAS("spi:wl1251");
diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c
index de045fe4ca1e..b26d42b4e3cc 100644
--- a/drivers/net/wireless/ti/wl12xx/main.c
+++ b/drivers/net/wireless/ti/wl12xx/main.c
@@ -1955,6 +1955,7 @@ module_param_named(tcxo, tcxo_param, charp, 0);
MODULE_PARM_DESC(tcxo,
"TCXO clock: 19.2, 26, 38.4, 52, 16.368, 32.736, 16.8, 33.6");
+MODULE_DESCRIPTION("TI WL12xx wireless driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
MODULE_FIRMWARE(WL127X_FW_NAME_SINGLE);
diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c
index 20d9181b3410..2ccac1cdec01 100644
--- a/drivers/net/wireless/ti/wl18xx/main.c
+++ b/drivers/net/wireless/ti/wl18xx/main.c
@@ -2086,6 +2086,7 @@ module_param_named(num_rx_desc, num_rx_desc_param, int, 0400);
MODULE_PARM_DESC(num_rx_desc_param,
"Number of Rx descriptors: u8 (default is 32)");
+MODULE_DESCRIPTION("TI WiLink 8 wireless driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
MODULE_FIRMWARE(WL18XX_FW_NAME);
diff --git a/drivers/net/wireless/ti/wlcore/event.c b/drivers/net/wireless/ti/wlcore/event.c
index 1e082d039b82..2499dc908305 100644
--- a/drivers/net/wireless/ti/wlcore/event.c
+++ b/drivers/net/wireless/ti/wlcore/event.c
@@ -233,7 +233,7 @@ void wlcore_event_channel_switch(struct wl1271 *wl,
cancel_delayed_work(&wlvif->channel_switch_work);
} else {
set_bit(WLVIF_FLAG_BEACON_DISABLED, &wlvif->flags);
- ieee80211_csa_finish(vif);
+ ieee80211_csa_finish(vif, 0);
}
}
}
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index fb9ed97774c7..ef12169f8044 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -2910,7 +2910,7 @@ static int wlcore_set_assoc(struct wl1271 *wl, struct wl12xx_vif *wlvif,
int ret;
wlvif->aid = vif->cfg.aid;
- wlvif->channel_type = cfg80211_get_chandef_type(&bss_conf->chandef);
+ wlvif->channel_type = cfg80211_get_chandef_type(&bss_conf->chanreq.oper);
wlvif->beacon_int = bss_conf->beacon_int;
wlvif->wmm_enabled = bss_conf->qos;
@@ -4242,7 +4242,7 @@ static void wl1271_bss_info_changed_ap(struct wl1271 *wl,
/* Handle HT information change */
if ((changed & BSS_CHANGED_HT) &&
- (bss_conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT)) {
+ (bss_conf->chanreq.oper.width != NL80211_CHAN_WIDTH_20_NOHT)) {
ret = wl1271_acx_set_ht_information(wl, wlvif,
bss_conf->ht_operation_mode);
if (ret < 0) {
@@ -4515,7 +4515,7 @@ static void wl1271_bss_info_changed_sta(struct wl1271 *wl,
/* Handle new association with HT. Do this after join. */
if (sta_exists) {
bool enabled =
- bss_conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT;
+ bss_conf->chanreq.oper.width != NL80211_CHAN_WIDTH_20_NOHT;
ret = wlcore_hw_set_peer_cap(wl,
&sta_ht_cap,
@@ -6793,6 +6793,7 @@ MODULE_PARM_DESC(bug_on_recovery, "BUG() on fw recovery");
module_param(no_recovery, int, 0600);
MODULE_PARM_DESC(no_recovery, "Prevent HW recovery. FW will remain stuck.");
+MODULE_DESCRIPTION("TI WLAN core driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
MODULE_AUTHOR("Juuso Oikarinen <juuso.oikarinen@nokia.com>");
diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index f0686635db46..92fb5b8dcdae 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -16,7 +16,6 @@
#include <linux/mmc/sdio_ids.h>
#include <linux/mmc/card.h>
#include <linux/mmc/host.h>
-#include <linux/gpio.h>
#include <linux/pm_runtime.h>
#include <linux/printk.h>
#include <linux/of.h>
@@ -75,8 +74,8 @@ static int __must_check wl12xx_sdio_raw_read(struct device *child, int addr,
sdio_release_host(func);
- if (WARN_ON(ret))
- dev_err(child->parent, "sdio read failed (%d)\n", ret);
+ if (ret)
+ dev_err_ratelimited(child->parent, "sdio read failed (%d)\n", ret);
if (unlikely(dump)) {
printk(KERN_DEBUG "wlcore_sdio: READ from 0x%04x\n", addr);
@@ -120,8 +119,8 @@ static int __must_check wl12xx_sdio_raw_write(struct device *child, int addr,
sdio_release_host(func);
- if (WARN_ON(ret))
- dev_err(child->parent, "sdio write failed (%d)\n", ret);
+ if (ret)
+ dev_err_ratelimited(child->parent, "sdio write failed (%d)\n", ret);
return ret;
}
@@ -447,6 +446,7 @@ module_sdio_driver(wl1271_sdio_driver);
module_param(dump, bool, 0600);
MODULE_PARM_DESC(dump, "Enable sdio read/write dumps.");
+MODULE_DESCRIPTION("TI WLAN SDIO helpers");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
MODULE_AUTHOR("Juuso Oikarinen <juuso.oikarinen@nokia.com>");
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index 7d9a139db59e..0aa2b2f3c5c9 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -562,6 +562,7 @@ static struct spi_driver wl1271_spi_driver = {
};
module_spi_driver(wl1271_spi_driver);
+MODULE_DESCRIPTION("TI WLAN SPI helpers");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
MODULE_AUTHOR("Juuso Oikarinen <juuso.oikarinen@nokia.com>");
diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c
index a84340c2075f..b55fe320633c 100644
--- a/drivers/net/wireless/virtual/mac80211_hwsim.c
+++ b/drivers/net/wireless/virtual/mac80211_hwsim.c
@@ -4,7 +4,7 @@
* Copyright (c) 2008, Jouni Malinen <j@w1.fi>
* Copyright (c) 2011, Javier Lopez <jlopex@gmail.com>
* Copyright (c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
/*
@@ -196,8 +196,11 @@ static const struct ieee80211_regdomain hwsim_world_regdom_custom_04 = {
.reg_rules = {
REG_RULE(2412 - 10, 2462 + 10, 40, 0, 20, 0),
REG_RULE(2484 - 10, 2484 + 10, 40, 0, 20, 0),
- REG_RULE(5150 - 10, 5240 + 10, 80, 0, 30, 0),
+ REG_RULE(5150 - 10, 5240 + 10, 80, 0, 30, NL80211_RRF_AUTO_BW),
REG_RULE(5260 - 10, 5320 + 10, 80, 0, 30,
+ NL80211_RRF_DFS_CONCURRENT | NL80211_RRF_DFS |
+ NL80211_RRF_AUTO_BW),
+ REG_RULE(5500 - 10, 5720 + 10, 160, 0, 30,
NL80211_RRF_DFS_CONCURRENT | NL80211_RRF_DFS),
REG_RULE(5745 - 10, 5825 + 10, 80, 0, 30, 0),
REG_RULE(5855 - 10, 5925 + 10, 80, 0, 33, 0),
@@ -213,6 +216,7 @@ static const struct ieee80211_regdomain *hwsim_world_regdom_custom[] = {
struct hwsim_vif_priv {
u32 magic;
+ u32 skip_beacons;
u8 bssid[ETH_ALEN];
bool assoc;
bool bcn_en;
@@ -2128,6 +2132,16 @@ static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw,
return 0;
}
+#ifdef CONFIG_MAC80211_DEBUGFS
+static void mac80211_hwsim_vif_add_debugfs(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif)
+{
+ struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
+
+ debugfs_create_u32("skip_beacons", 0600, vif->debugfs_dir,
+ &vp->skip_beacons);
+}
+#endif
static int mac80211_hwsim_change_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
@@ -2193,12 +2207,19 @@ static void __mac80211_hwsim_beacon_tx(struct ieee80211_bss_conf *link_conf,
struct ieee80211_vif *vif,
struct sk_buff *skb)
{
+ struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
struct ieee80211_tx_info *info;
struct ieee80211_rate *txrate;
struct ieee80211_mgmt *mgmt;
/* TODO: get MCS */
int bitrate = 100;
+ if (vp->skip_beacons) {
+ vp->skip_beacons--;
+ dev_kfree_skb(skb);
+ return;
+ }
+
info = IEEE80211_SKB_CB(skb);
if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE))
ieee80211_get_tx_rates(vif, NULL, skb,
@@ -2284,8 +2305,8 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac,
rcu_dereference(link_conf->chanctx_conf)->def.chan);
}
- if (link_conf->csa_active && ieee80211_beacon_cntdwn_is_complete(vif))
- ieee80211_csa_finish(vif);
+ if (link_conf->csa_active && ieee80211_beacon_cntdwn_is_complete(vif, link_id))
+ ieee80211_csa_finish(vif, link_id);
}
static enum hrtimer_restart
@@ -2462,7 +2483,7 @@ static void mac80211_hwsim_vif_info_changed(struct ieee80211_hw *hw,
}
if (vif->type == NL80211_IFTYPE_STATION &&
- changed & BSS_CHANGED_MLD_VALID_LINKS) {
+ changed & (BSS_CHANGED_MLD_VALID_LINKS | BSS_CHANGED_MLD_TTLM)) {
u16 usable_links = ieee80211_vif_usable_links(vif);
if (vif->active_links != usable_links)
@@ -2653,10 +2674,11 @@ static int mac80211_hwsim_sta_state(struct ieee80211_hw *hw,
return mac80211_hwsim_sta_add(hw, vif, sta);
/*
- * when client is authorized (AP station marked as such),
- * enable all links
+ * in an MLO connection, when client is authorized
+ * (AP station marked as such), enable all links
*/
- if (vif->type == NL80211_IFTYPE_STATION &&
+ if (ieee80211_vif_is_mld(vif) &&
+ vif->type == NL80211_IFTYPE_STATION &&
new_state == IEEE80211_STA_AUTHORIZED && !sta->tdls)
ieee80211_set_active_links_async(vif,
ieee80211_vif_usable_links(vif));
@@ -2738,6 +2760,24 @@ static int mac80211_hwsim_get_survey(struct ieee80211_hw *hw, int idx,
return 0;
}
+static enum ieee80211_neg_ttlm_res
+mac80211_hwsim_can_neg_ttlm(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_neg_ttlm *neg_ttlm)
+{
+ u32 i;
+
+ /* For testing purposes, accept if all TIDs are mapped to the same links
+ * set, otherwise reject.
+ */
+ for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++) {
+ if (neg_ttlm->downlink[i] != neg_ttlm->uplink[i] ||
+ neg_ttlm->downlink[i] != neg_ttlm->downlink[0])
+ return NEG_TTLM_RES_REJECT;
+ }
+
+ return NEG_TTLM_RES_ACCEPT;
+}
+
#ifdef CONFIG_NL80211_TESTMODE
/*
* This section contains example code for using netlink
@@ -3175,6 +3215,47 @@ static void mac80211_hwsim_unassign_vif_chanctx(struct ieee80211_hw *hw,
}
}
+static int mac80211_hwsim_switch_vif_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs,
+ enum ieee80211_chanctx_switch_mode mode)
+{
+ int i;
+
+ if (n_vifs <= 0)
+ return -EINVAL;
+
+ wiphy_dbg(hw->wiphy,
+ "switch vif channel context mode: %u\n", mode);
+
+ for (i = 0; i < n_vifs; i++) {
+ hwsim_check_chanctx_magic(vifs[i].old_ctx);
+ wiphy_dbg(hw->wiphy,
+ "switch vif channel context: %d MHz/width: %d/cfreqs:%d/%d MHz -> %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+ vifs[i].old_ctx->def.chan->center_freq,
+ vifs[i].old_ctx->def.width,
+ vifs[i].old_ctx->def.center_freq1,
+ vifs[i].old_ctx->def.center_freq2,
+ vifs[i].new_ctx->def.chan->center_freq,
+ vifs[i].new_ctx->def.width,
+ vifs[i].new_ctx->def.center_freq1,
+ vifs[i].new_ctx->def.center_freq2);
+
+ switch (mode) {
+ case CHANCTX_SWMODE_REASSIGN_VIF:
+ hwsim_check_chanctx_magic(vifs[i].new_ctx);
+ break;
+ case CHANCTX_SWMODE_SWAP_CONTEXTS:
+ hwsim_set_chanctx_magic(vifs[i].new_ctx);
+ hwsim_clear_chanctx_magic(vifs[i].old_ctx);
+ break;
+ default:
+ WARN_ON("Invalid mode");
+ }
+ }
+ return 0;
+}
+
static const char mac80211_hwsim_gstrings_stats[][ETH_GSTRING_LEN] = {
"tx_pkts_nic",
"tx_bytes_nic",
@@ -3839,6 +3920,13 @@ out:
return err;
}
+#ifdef CONFIG_MAC80211_DEBUGFS
+#define HWSIM_DEBUGFS_OPS \
+ .vif_add_debugfs = mac80211_hwsim_vif_add_debugfs,
+#else
+#define HWSIM_DEBUGFS_OPS
+#endif
+
#define HWSIM_COMMON_OPS \
.tx = mac80211_hwsim_tx, \
.wake_tx_queue = ieee80211_handle_wake_tx_queue, \
@@ -3863,7 +3951,8 @@ out:
.get_et_stats = mac80211_hwsim_get_et_stats, \
.get_et_strings = mac80211_hwsim_get_et_strings, \
.start_pmsr = mac80211_hwsim_start_pmsr, \
- .abort_pmsr = mac80211_hwsim_abort_pmsr,
+ .abort_pmsr = mac80211_hwsim_abort_pmsr, \
+ HWSIM_DEBUGFS_OPS
#define HWSIM_NON_MLO_OPS \
.sta_add = mac80211_hwsim_sta_add, \
@@ -3877,6 +3966,10 @@ static const struct ieee80211_ops mac80211_hwsim_ops = {
HWSIM_NON_MLO_OPS
.sw_scan_start = mac80211_hwsim_sw_scan,
.sw_scan_complete = mac80211_hwsim_sw_scan_complete,
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
};
#define HWSIM_CHANCTX_OPS \
@@ -3888,7 +3981,8 @@ static const struct ieee80211_ops mac80211_hwsim_ops = {
.remove_chanctx = mac80211_hwsim_remove_chanctx, \
.change_chanctx = mac80211_hwsim_change_chanctx, \
.assign_vif_chanctx = mac80211_hwsim_assign_vif_chanctx,\
- .unassign_vif_chanctx = mac80211_hwsim_unassign_vif_chanctx,
+ .unassign_vif_chanctx = mac80211_hwsim_unassign_vif_chanctx, \
+ .switch_vif_chanctx = mac80211_hwsim_switch_vif_chanctx,
static const struct ieee80211_ops mac80211_hwsim_mchan_ops = {
HWSIM_COMMON_OPS
@@ -3903,6 +3997,7 @@ static const struct ieee80211_ops mac80211_hwsim_mlo_ops = {
.change_vif_links = mac80211_hwsim_change_vif_links,
.change_sta_links = mac80211_hwsim_change_sta_links,
.sta_state = mac80211_hwsim_sta_state,
+ .can_neg_ttlm = mac80211_hwsim_can_neg_ttlm,
};
struct hwsim_new_radio_params {
@@ -4965,6 +5060,33 @@ static void mac80211_hwsim_sband_capab(struct ieee80211_supported_band *sband)
BIT(NL80211_IFTYPE_MESH_POINT) | \
BIT(NL80211_IFTYPE_OCB))
+static const u8 iftypes_ext_capa_ap[] = {
+ [0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING,
+ [2] = WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT,
+ [7] = WLAN_EXT_CAPA8_OPMODE_NOTIF |
+ WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB,
+ [8] = WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB,
+ [9] = WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT,
+};
+
+#define MAC80211_HWSIM_MLD_CAPA_OPS \
+ FIELD_PREP_CONST(IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP, \
+ IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME) | \
+ FIELD_PREP_CONST(IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS, \
+ IEEE80211_MLD_MAX_NUM_LINKS - 1)
+
+static const struct wiphy_iftype_ext_capab mac80211_hwsim_iftypes_ext_capa[] = {
+ {
+ .iftype = NL80211_IFTYPE_AP,
+ .extended_capabilities = iftypes_ext_capa_ap,
+ .extended_capabilities_mask = iftypes_ext_capa_ap,
+ .extended_capabilities_len = sizeof(iftypes_ext_capa_ap),
+ .eml_capabilities = IEEE80211_EML_CAP_EMLSR_SUPP |
+ IEEE80211_EML_CAP_EMLMR_SUPPORT,
+ .mld_capa_and_ops = MAC80211_HWSIM_MLD_CAPA_OPS,
+ },
+};
+
static int mac80211_hwsim_new_radio(struct genl_info *info,
struct hwsim_new_radio_params *param)
{
@@ -5159,6 +5281,10 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
ieee80211_hw_set(hw, CONNECTION_MONITOR);
ieee80211_hw_set(hw, AP_LINK_PS);
+
+ hw->wiphy->iftype_ext_capab = mac80211_hwsim_iftypes_ext_capa;
+ hw->wiphy->num_iftype_ext_capab =
+ ARRAY_SIZE(mac80211_hwsim_iftypes_ext_capa);
} else {
ieee80211_hw_set(hw, HOST_BROADCAST_PS_BUFFERING);
ieee80211_hw_set(hw, PS_NULLFUNC_STACK);
@@ -5309,7 +5435,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
schedule_timeout_interruptible(1);
}
- /* TODO: Add param */
wiphy_ext_feature_set(hw->wiphy,
NL80211_EXT_FEATURE_DFS_CONCURRENT);
diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.h b/drivers/net/wireless/virtual/mac80211_hwsim.h
index 4676cdaf4cfd..21b1afd83dc1 100644
--- a/drivers/net/wireless/virtual/mac80211_hwsim.h
+++ b/drivers/net/wireless/virtual/mac80211_hwsim.h
@@ -3,7 +3,7 @@
* mac80211_hwsim - software simulator of 802.11 radio(s) for mac80211
* Copyright (c) 2008, Jouni Malinen <j@w1.fi>
* Copyright (c) 2011, Javier Lopez <jlopex@gmail.com>
- * Copyright (C) 2020, 2022-2023 Intel Corporation
+ * Copyright (C) 2020, 2022-2024 Intel Corporation
*/
#ifndef __MAC80211_HWSIM_H
@@ -84,6 +84,8 @@ enum hwsim_tx_control_flags {
* @HWSIM_CMD_START_PMSR: request to start peer measurement with the
* %HWSIM_ATTR_PMSR_REQUEST. Result will be sent back asynchronously
* with %HWSIM_CMD_REPORT_PMSR.
+ * @HWSIM_CMD_ABORT_PMSR: Abort previously started peer measurement.
+ * @HWSIM_CMD_REPORT_PMSR: Report peer measurement data.
* @__HWSIM_CMD_MAX: enum limit
*/
enum hwsim_commands {
@@ -298,6 +300,7 @@ enum hwsim_vqs {
* Information about a receiving or transmitting bitrate
* that can be mapped to struct rate_info
*
+ * @__HWSIM_RATE_INFO_ATTR_INVALID: reserved, netlink attribute 0 is invalid
* @HWSIM_RATE_INFO_ATTR_FLAGS: bitflag of flags from &enum rate_info_flags
* @HWSIM_RATE_INFO_ATTR_MCS: mcs index if struct describes an HT/VHT/HE rate
* @HWSIM_RATE_INFO_ATTR_LEGACY: bitrate in 100kbit/s for 802.11abg
diff --git a/drivers/net/wireless/virtual/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c
index ba14d83353a4..6a84ec58d618 100644
--- a/drivers/net/wireless/virtual/virt_wifi.c
+++ b/drivers/net/wireless/virtual/virt_wifi.c
@@ -453,7 +453,7 @@ static int virt_wifi_net_device_get_iflink(const struct net_device *dev)
{
struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
- return priv->lowerdev->ifindex;
+ return READ_ONCE(priv->lowerdev->ifindex);
}
static const struct net_device_ops virt_wifi_ops = {
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_def.h b/drivers/net/wireless/zydas/zd1211rw/zd_def.h
index 8ca2d0aab170..2f55e8deee82 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_def.h
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_def.h
@@ -12,7 +12,7 @@
#include <linux/stringify.h>
#include <linux/device.h>
-typedef u16 __nocast zd_addr_t;
+typedef u16 zd_addr_t;
#define dev_printk_f(level, dev, fmt, args...) \
dev_printk(level, dev, "%s() " fmt, __func__, ##args)
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
index 5d534e15a844..900c063bd724 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
@@ -1343,6 +1343,10 @@ static u64 zd_op_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
}
static const struct ieee80211_ops zd_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = zd_op_tx,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = zd_op_start,
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
index 8505d84eeed6..f3b567a13ded 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
@@ -380,7 +380,7 @@ static inline void handle_regs_int(struct urb *urb)
spin_lock_irqsave(&intr->lock, flags);
int_num = le16_to_cpu(*(__le16 *)(urb->transfer_buffer+2));
- if (int_num == CR_INTERRUPT) {
+ if (int_num == (u16)CR_INTERRUPT) {
struct zd_mac *mac = zd_hw_mac(zd_usb_to_hw(urb->context));
spin_lock(&mac->lock);
memcpy(&mac->intr_buffer, urb->transfer_buffer,
@@ -416,7 +416,8 @@ out:
spin_unlock_irqrestore(&intr->lock, flags);
/* CR_INTERRUPT might override read_reg too. */
- if (int_num == CR_INTERRUPT && atomic_read(&intr->read_regs_enabled))
+ if (int_num == (u16)CR_INTERRUPT &&
+ atomic_read(&intr->read_regs_enabled))
handle_regs_int_override(urb);
}
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
index cc70360364b7..abc41a7089fa 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
+++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
@@ -57,8 +57,6 @@
#define CHECK_Q_STOP_TIMEOUT_US 1000000
#define CHECK_Q_STOP_STEP_US 10000
-#define CLDMA_JUMBO_BUFF_SZ (63 * 1024 + sizeof(struct ccci_header))
-
static void md_cd_queue_struct_reset(struct cldma_queue *queue, struct cldma_ctrl *md_ctrl,
enum mtk_txrx tx_rx, unsigned int index)
{
@@ -161,7 +159,7 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool
skb_reset_tail_pointer(skb);
skb_put(skb, le16_to_cpu(gpd->data_buff_len));
- ret = md_ctrl->recv_skb(queue, skb);
+ ret = queue->recv_skb(queue, skb);
/* Break processing, will try again later */
if (ret < 0)
return ret;
@@ -897,13 +895,13 @@ static void t7xx_cldma_hw_start_send(struct cldma_ctrl *md_ctrl, int qno,
/**
* t7xx_cldma_set_recv_skb() - Set the callback to handle RX packets.
- * @md_ctrl: CLDMA context structure.
+ * @queue: CLDMA queue.
* @recv_skb: Receiving skb callback.
*/
-void t7xx_cldma_set_recv_skb(struct cldma_ctrl *md_ctrl,
+void t7xx_cldma_set_recv_skb(struct cldma_queue *queue,
int (*recv_skb)(struct cldma_queue *queue, struct sk_buff *skb))
{
- md_ctrl->recv_skb = recv_skb;
+ queue->recv_skb = recv_skb;
}
/**
@@ -993,6 +991,28 @@ allow_sleep:
return ret;
}
+static void t7xx_cldma_adjust_config(struct cldma_ctrl *md_ctrl, enum cldma_cfg cfg_id)
+{
+ int qno;
+
+ for (qno = 0; qno < CLDMA_RXQ_NUM; qno++) {
+ md_ctrl->rx_ring[qno].pkt_size = CLDMA_SHARED_Q_BUFF_SZ;
+ t7xx_cldma_set_recv_skb(&md_ctrl->rxq[qno], t7xx_port_proxy_recv_skb);
+ }
+
+ md_ctrl->rx_ring[CLDMA_RXQ_NUM - 1].pkt_size = CLDMA_JUMBO_BUFF_SZ;
+
+ for (qno = 0; qno < CLDMA_TXQ_NUM; qno++)
+ md_ctrl->tx_ring[qno].pkt_size = CLDMA_SHARED_Q_BUFF_SZ;
+
+ if (cfg_id == CLDMA_DEDICATED_Q_CFG) {
+ md_ctrl->tx_ring[CLDMA_Q_IDX_DUMP].pkt_size = CLDMA_DEDICATED_Q_BUFF_SZ;
+ md_ctrl->rx_ring[CLDMA_Q_IDX_DUMP].pkt_size = CLDMA_DEDICATED_Q_BUFF_SZ;
+ t7xx_cldma_set_recv_skb(&md_ctrl->rxq[CLDMA_Q_IDX_DUMP],
+ t7xx_port_proxy_recv_skb_from_dedicated_queue);
+ }
+}
+
static int t7xx_cldma_late_init(struct cldma_ctrl *md_ctrl)
{
char dma_pool_name[32];
@@ -1018,16 +1038,9 @@ static int t7xx_cldma_late_init(struct cldma_ctrl *md_ctrl)
dev_err(md_ctrl->dev, "control TX ring init fail\n");
goto err_free_tx_ring;
}
-
- md_ctrl->tx_ring[i].pkt_size = CLDMA_MTU;
}
for (j = 0; j < CLDMA_RXQ_NUM; j++) {
- md_ctrl->rx_ring[j].pkt_size = CLDMA_MTU;
-
- if (j == CLDMA_RXQ_NUM - 1)
- md_ctrl->rx_ring[j].pkt_size = CLDMA_JUMBO_BUFF_SZ;
-
ret = t7xx_cldma_rx_ring_init(md_ctrl, &md_ctrl->rx_ring[j]);
if (ret) {
dev_err(md_ctrl->dev, "Control RX ring init fail\n");
@@ -1094,6 +1107,7 @@ int t7xx_cldma_alloc(enum cldma_id hif_id, struct t7xx_pci_dev *t7xx_dev)
{
struct device *dev = &t7xx_dev->pdev->dev;
struct cldma_ctrl *md_ctrl;
+ int qno;
md_ctrl = devm_kzalloc(dev, sizeof(*md_ctrl), GFP_KERNEL);
if (!md_ctrl)
@@ -1102,7 +1116,9 @@ int t7xx_cldma_alloc(enum cldma_id hif_id, struct t7xx_pci_dev *t7xx_dev)
md_ctrl->t7xx_dev = t7xx_dev;
md_ctrl->dev = dev;
md_ctrl->hif_id = hif_id;
- md_ctrl->recv_skb = t7xx_cldma_default_recv_skb;
+ for (qno = 0; qno < CLDMA_RXQ_NUM; qno++)
+ md_ctrl->rxq[qno].recv_skb = t7xx_cldma_default_recv_skb;
+
t7xx_hw_info_init(md_ctrl);
t7xx_dev->md->md_ctrl[hif_id] = md_ctrl;
return 0;
@@ -1332,9 +1348,10 @@ err_workqueue:
return -ENOMEM;
}
-void t7xx_cldma_switch_cfg(struct cldma_ctrl *md_ctrl)
+void t7xx_cldma_switch_cfg(struct cldma_ctrl *md_ctrl, enum cldma_cfg cfg_id)
{
t7xx_cldma_late_release(md_ctrl);
+ t7xx_cldma_adjust_config(md_ctrl, cfg_id);
t7xx_cldma_late_init(md_ctrl);
}
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.h b/drivers/net/wwan/t7xx/t7xx_hif_cldma.h
index 4410bac6993a..f2d9941be9c8 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.h
+++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.h
@@ -31,6 +31,10 @@
#include "t7xx_cldma.h"
#include "t7xx_pci.h"
+#define CLDMA_JUMBO_BUFF_SZ (63 * 1024 + sizeof(struct ccci_header))
+#define CLDMA_SHARED_Q_BUFF_SZ 3584
+#define CLDMA_DEDICATED_Q_BUFF_SZ 2048
+
/**
* enum cldma_id - Identifiers for CLDMA HW units.
* @CLDMA_ID_MD: Modem control channel.
@@ -55,6 +59,11 @@ struct cldma_gpd {
__le16 not_used2;
};
+enum cldma_cfg {
+ CLDMA_SHARED_Q_CFG,
+ CLDMA_DEDICATED_Q_CFG,
+};
+
struct cldma_request {
struct cldma_gpd *gpd; /* Virtual address for CPU */
dma_addr_t gpd_addr; /* Physical address for DMA */
@@ -82,6 +91,7 @@ struct cldma_queue {
wait_queue_head_t req_wq; /* Only for TX */
struct workqueue_struct *worker;
struct work_struct cldma_work;
+ int (*recv_skb)(struct cldma_queue *queue, struct sk_buff *skb);
};
struct cldma_ctrl {
@@ -101,24 +111,22 @@ struct cldma_ctrl {
struct md_pm_entity *pm_entity;
struct t7xx_cldma_hw hw_info;
bool is_late_init;
- int (*recv_skb)(struct cldma_queue *queue, struct sk_buff *skb);
};
+#define CLDMA_Q_IDX_DUMP 1
#define GPD_FLAGS_HWO BIT(0)
#define GPD_FLAGS_IOC BIT(7)
#define GPD_DMAPOOL_ALIGN 16
-#define CLDMA_MTU 3584 /* 3.5kB */
-
int t7xx_cldma_alloc(enum cldma_id hif_id, struct t7xx_pci_dev *t7xx_dev);
void t7xx_cldma_hif_hw_init(struct cldma_ctrl *md_ctrl);
int t7xx_cldma_init(struct cldma_ctrl *md_ctrl);
void t7xx_cldma_exit(struct cldma_ctrl *md_ctrl);
-void t7xx_cldma_switch_cfg(struct cldma_ctrl *md_ctrl);
+void t7xx_cldma_switch_cfg(struct cldma_ctrl *md_ctrl, enum cldma_cfg cfg_id);
void t7xx_cldma_start(struct cldma_ctrl *md_ctrl);
int t7xx_cldma_stop(struct cldma_ctrl *md_ctrl);
void t7xx_cldma_reset(struct cldma_ctrl *md_ctrl);
-void t7xx_cldma_set_recv_skb(struct cldma_ctrl *md_ctrl,
+void t7xx_cldma_set_recv_skb(struct cldma_queue *queue,
int (*recv_skb)(struct cldma_queue *queue, struct sk_buff *skb));
int t7xx_cldma_send_skb(struct cldma_ctrl *md_ctrl, int qno, struct sk_buff *skb);
void t7xx_cldma_stop_all_qs(struct cldma_ctrl *md_ctrl, enum mtk_txrx tx_rx);
diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c
index 24e7d491468e..8d864d4ed77f 100644
--- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c
+++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c
@@ -177,6 +177,11 @@ int t7xx_acpi_fldr_func(struct t7xx_pci_dev *t7xx_dev)
return t7xx_acpi_reset(t7xx_dev, "_RST");
}
+int t7xx_acpi_pldr_func(struct t7xx_pci_dev *t7xx_dev)
+{
+ return t7xx_acpi_reset(t7xx_dev, "MRST._RST");
+}
+
static void t7xx_reset_device_via_pmic(struct t7xx_pci_dev *t7xx_dev)
{
u32 val;
@@ -192,6 +197,7 @@ static irqreturn_t t7xx_rgu_isr_thread(int irq, void *data)
{
struct t7xx_pci_dev *t7xx_dev = data;
+ t7xx_mode_update(t7xx_dev, T7XX_RESET);
msleep(RGU_RESET_DELAY_MS);
t7xx_reset_device_via_pmic(t7xx_dev);
return IRQ_HANDLED;
@@ -529,7 +535,7 @@ static void t7xx_md_hk_wq(struct work_struct *work)
/* Clear the HS2 EXIT event appended in core_reset() */
t7xx_fsm_clr_event(ctl, FSM_EVENT_MD_HS2_EXIT);
- t7xx_cldma_switch_cfg(md->md_ctrl[CLDMA_ID_MD]);
+ t7xx_cldma_switch_cfg(md->md_ctrl[CLDMA_ID_MD], CLDMA_SHARED_Q_CFG);
t7xx_cldma_start(md->md_ctrl[CLDMA_ID_MD]);
t7xx_fsm_broadcast_state(ctl, MD_STATE_WAITING_FOR_HS2);
md->core_md.handshake_ongoing = true;
@@ -544,7 +550,7 @@ static void t7xx_ap_hk_wq(struct work_struct *work)
/* Clear the HS2 EXIT event appended in t7xx_core_reset(). */
t7xx_fsm_clr_event(ctl, FSM_EVENT_AP_HS2_EXIT);
t7xx_cldma_stop(md->md_ctrl[CLDMA_ID_AP]);
- t7xx_cldma_switch_cfg(md->md_ctrl[CLDMA_ID_AP]);
+ t7xx_cldma_switch_cfg(md->md_ctrl[CLDMA_ID_AP], CLDMA_SHARED_Q_CFG);
t7xx_cldma_start(md->md_ctrl[CLDMA_ID_AP]);
md->core_ap.handshake_ongoing = true;
t7xx_core_hk_handler(md, &md->core_ap, ctl, FSM_EVENT_AP_HS2, FSM_EVENT_AP_HS2_EXIT);
@@ -758,6 +764,7 @@ err_destroy_hswq:
void t7xx_md_exit(struct t7xx_pci_dev *t7xx_dev)
{
+ enum t7xx_mode mode = READ_ONCE(t7xx_dev->mode);
struct t7xx_modem *md = t7xx_dev->md;
t7xx_pcie_mac_clear_int(t7xx_dev, SAP_RGU_INT);
@@ -765,7 +772,8 @@ void t7xx_md_exit(struct t7xx_pci_dev *t7xx_dev)
if (!md->md_init_finish)
return;
- t7xx_fsm_append_cmd(md->fsm_ctl, FSM_CMD_PRE_STOP, FSM_CMD_FLAG_WAIT_FOR_COMPLETION);
+ if (mode != T7XX_RESET && mode != T7XX_UNKNOWN)
+ t7xx_fsm_append_cmd(md->fsm_ctl, FSM_CMD_PRE_STOP, FSM_CMD_FLAG_WAIT_FOR_COMPLETION);
t7xx_port_proxy_uninit(md->port_prox);
t7xx_cldma_exit(md->md_ctrl[CLDMA_ID_AP]);
t7xx_cldma_exit(md->md_ctrl[CLDMA_ID_MD]);
diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.h b/drivers/net/wwan/t7xx/t7xx_modem_ops.h
index abe633cf7adc..b39e945a92e0 100644
--- a/drivers/net/wwan/t7xx/t7xx_modem_ops.h
+++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.h
@@ -85,6 +85,7 @@ int t7xx_md_init(struct t7xx_pci_dev *t7xx_dev);
void t7xx_md_exit(struct t7xx_pci_dev *t7xx_dev);
void t7xx_clear_rgu_irq(struct t7xx_pci_dev *t7xx_dev);
int t7xx_acpi_fldr_func(struct t7xx_pci_dev *t7xx_dev);
+int t7xx_acpi_pldr_func(struct t7xx_pci_dev *t7xx_dev);
int t7xx_pci_mhccif_isr(struct t7xx_pci_dev *t7xx_dev);
#endif /* __T7XX_MODEM_OPS_H__ */
diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c
index 91256e005b84..e0b1e7a616ca 100644
--- a/drivers/net/wwan/t7xx/t7xx_pci.c
+++ b/drivers/net/wwan/t7xx/t7xx_pci.c
@@ -52,6 +52,81 @@
#define PM_RESOURCE_POLL_TIMEOUT_US 10000
#define PM_RESOURCE_POLL_STEP_US 100
+static const char * const t7xx_mode_names[] = {
+ [T7XX_UNKNOWN] = "unknown",
+ [T7XX_READY] = "ready",
+ [T7XX_RESET] = "reset",
+ [T7XX_FASTBOOT_SWITCHING] = "fastboot_switching",
+ [T7XX_FASTBOOT_DOWNLOAD] = "fastboot_download",
+ [T7XX_FASTBOOT_DUMP] = "fastboot_dump",
+};
+
+static_assert(ARRAY_SIZE(t7xx_mode_names) == T7XX_MODE_LAST);
+
+static ssize_t t7xx_mode_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct t7xx_pci_dev *t7xx_dev;
+ struct pci_dev *pdev;
+ int index = 0;
+
+ pdev = to_pci_dev(dev);
+ t7xx_dev = pci_get_drvdata(pdev);
+ if (!t7xx_dev)
+ return -ENODEV;
+
+ index = sysfs_match_string(t7xx_mode_names, buf);
+ if (index == T7XX_FASTBOOT_SWITCHING) {
+ WRITE_ONCE(t7xx_dev->mode, T7XX_FASTBOOT_SWITCHING);
+ } else if (index == T7XX_RESET) {
+ WRITE_ONCE(t7xx_dev->mode, T7XX_RESET);
+ t7xx_acpi_pldr_func(t7xx_dev);
+ }
+
+ return count;
+};
+
+static ssize_t t7xx_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ enum t7xx_mode mode = T7XX_UNKNOWN;
+ struct t7xx_pci_dev *t7xx_dev;
+ struct pci_dev *pdev;
+
+ pdev = to_pci_dev(dev);
+ t7xx_dev = pci_get_drvdata(pdev);
+ if (!t7xx_dev)
+ return -ENODEV;
+
+ mode = READ_ONCE(t7xx_dev->mode);
+ if (mode < T7XX_MODE_LAST)
+ return sysfs_emit(buf, "%s\n", t7xx_mode_names[mode]);
+
+ return sysfs_emit(buf, "%s\n", t7xx_mode_names[T7XX_UNKNOWN]);
+}
+
+static DEVICE_ATTR_RW(t7xx_mode);
+
+static struct attribute *t7xx_mode_attr[] = {
+ &dev_attr_t7xx_mode.attr,
+ NULL
+};
+
+static const struct attribute_group t7xx_mode_attribute_group = {
+ .attrs = t7xx_mode_attr,
+};
+
+void t7xx_mode_update(struct t7xx_pci_dev *t7xx_dev, enum t7xx_mode mode)
+{
+ if (!t7xx_dev)
+ return;
+
+ WRITE_ONCE(t7xx_dev->mode, mode);
+ sysfs_notify(&t7xx_dev->pdev->dev.kobj, NULL, "t7xx_mode");
+}
+
enum t7xx_pm_state {
MTK_PM_EXCEPTION,
MTK_PM_INIT, /* Device initialized, but handshake not completed */
@@ -108,7 +183,7 @@ static int t7xx_pci_pm_init(struct t7xx_pci_dev *t7xx_dev)
pm_runtime_set_autosuspend_delay(&pdev->dev, PM_AUTOSUSPEND_MS);
pm_runtime_use_autosuspend(&pdev->dev);
- return t7xx_wait_pm_config(t7xx_dev);
+ return 0;
}
void t7xx_pci_pm_init_late(struct t7xx_pci_dev *t7xx_dev)
@@ -279,7 +354,8 @@ static int __t7xx_pci_pm_suspend(struct pci_dev *pdev)
int ret;
t7xx_dev = pci_get_drvdata(pdev);
- if (atomic_read(&t7xx_dev->md_pm_state) <= MTK_PM_INIT) {
+ if (atomic_read(&t7xx_dev->md_pm_state) <= MTK_PM_INIT ||
+ READ_ONCE(t7xx_dev->mode) != T7XX_READY) {
dev_err(&pdev->dev, "[PM] Exiting suspend, modem in invalid state\n");
return -EFAULT;
}
@@ -729,16 +805,28 @@ static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
t7xx_pcie_mac_interrupts_dis(t7xx_dev);
+ ret = sysfs_create_group(&t7xx_dev->pdev->dev.kobj,
+ &t7xx_mode_attribute_group);
+ if (ret)
+ goto err_md_exit;
+
ret = t7xx_interrupt_init(t7xx_dev);
- if (ret) {
- t7xx_md_exit(t7xx_dev);
- return ret;
- }
+ if (ret)
+ goto err_remove_group;
+
t7xx_pcie_mac_set_int(t7xx_dev, MHCCIF_INT);
t7xx_pcie_mac_interrupts_en(t7xx_dev);
return 0;
+
+err_remove_group:
+ sysfs_remove_group(&t7xx_dev->pdev->dev.kobj,
+ &t7xx_mode_attribute_group);
+
+err_md_exit:
+ t7xx_md_exit(t7xx_dev);
+ return ret;
}
static void t7xx_pci_remove(struct pci_dev *pdev)
@@ -747,6 +835,9 @@ static void t7xx_pci_remove(struct pci_dev *pdev)
int i;
t7xx_dev = pci_get_drvdata(pdev);
+
+ sysfs_remove_group(&t7xx_dev->pdev->dev.kobj,
+ &t7xx_mode_attribute_group);
t7xx_md_exit(t7xx_dev);
for (i = 0; i < EXT_INT_NUM; i++) {
diff --git a/drivers/net/wwan/t7xx/t7xx_pci.h b/drivers/net/wwan/t7xx/t7xx_pci.h
index f08f1ab74469..49a11586d8d8 100644
--- a/drivers/net/wwan/t7xx/t7xx_pci.h
+++ b/drivers/net/wwan/t7xx/t7xx_pci.h
@@ -43,6 +43,16 @@ struct t7xx_addr_base {
typedef irqreturn_t (*t7xx_intr_callback)(int irq, void *param);
+enum t7xx_mode {
+ T7XX_UNKNOWN,
+ T7XX_READY,
+ T7XX_RESET,
+ T7XX_FASTBOOT_SWITCHING,
+ T7XX_FASTBOOT_DOWNLOAD,
+ T7XX_FASTBOOT_DUMP,
+ T7XX_MODE_LAST, /* must always be last */
+};
+
/* struct t7xx_pci_dev - MTK device context structure
* @intr_handler: array of handler function for request_threaded_irq
* @intr_thread: array of thread_fn for request_threaded_irq
@@ -59,6 +69,7 @@ typedef irqreturn_t (*t7xx_intr_callback)(int irq, void *param);
* @md_pm_lock: protects PCIe sleep lock
* @sleep_disable_count: PCIe L1.2 lock counter
* @sleep_lock_acquire: indicates that sleep has been disabled
+ * @mode: indicates the device mode
*/
struct t7xx_pci_dev {
t7xx_intr_callback intr_handler[EXT_INT_NUM];
@@ -82,6 +93,7 @@ struct t7xx_pci_dev {
#ifdef CONFIG_WWAN_DEBUGFS
struct dentry *debugfs_dir;
#endif
+ u32 mode;
};
enum t7xx_pm_id {
@@ -120,5 +132,5 @@ int t7xx_pci_pm_entity_register(struct t7xx_pci_dev *t7xx_dev, struct md_pm_enti
int t7xx_pci_pm_entity_unregister(struct t7xx_pci_dev *t7xx_dev, struct md_pm_entity *pm_entity);
void t7xx_pci_pm_init_late(struct t7xx_pci_dev *t7xx_dev);
void t7xx_pci_pm_exp_detected(struct t7xx_pci_dev *t7xx_dev);
-
+void t7xx_mode_update(struct t7xx_pci_dev *t7xx_dev, enum t7xx_mode mode);
#endif /* __T7XX_PCI_H__ */
diff --git a/drivers/net/wwan/t7xx/t7xx_port.h b/drivers/net/wwan/t7xx/t7xx_port.h
index 4ae8a00a8532..f74d3bab810d 100644
--- a/drivers/net/wwan/t7xx/t7xx_port.h
+++ b/drivers/net/wwan/t7xx/t7xx_port.h
@@ -75,6 +75,8 @@ enum port_ch {
PORT_CH_DSS6_TX = 0x20df,
PORT_CH_DSS7_RX = 0x20e0,
PORT_CH_DSS7_TX = 0x20e1,
+
+ PORT_CH_UNIMPORTANT = 0xffff,
};
struct t7xx_port;
@@ -135,11 +137,13 @@ struct t7xx_port {
};
};
+int t7xx_get_port_mtu(struct t7xx_port *port);
struct sk_buff *t7xx_port_alloc_skb(int payload);
struct sk_buff *t7xx_ctrl_alloc_skb(int payload);
int t7xx_port_enqueue_skb(struct t7xx_port *port, struct sk_buff *skb);
int t7xx_port_send_skb(struct t7xx_port *port, struct sk_buff *skb, unsigned int pkt_header,
unsigned int ex_msg);
+int t7xx_port_send_raw_skb(struct t7xx_port *port, struct sk_buff *skb);
int t7xx_port_send_ctl_skb(struct t7xx_port *port, struct sk_buff *skb, unsigned int msg,
unsigned int ex_msg);
diff --git a/drivers/net/wwan/t7xx/t7xx_port_proxy.c b/drivers/net/wwan/t7xx/t7xx_port_proxy.c
index 274846d39fbf..7d6388bf1d7c 100644
--- a/drivers/net/wwan/t7xx/t7xx_port_proxy.c
+++ b/drivers/net/wwan/t7xx/t7xx_port_proxy.c
@@ -48,6 +48,9 @@
i < (proxy)->port_count; \
i++, (p) = &(proxy)->ports[i])
+#define T7XX_MAX_POSSIBLE_PORTS_NUM \
+ (max(ARRAY_SIZE(t7xx_port_conf), ARRAY_SIZE(t7xx_early_port_conf)))
+
static const struct t7xx_port_conf t7xx_port_conf[] = {
{
.tx_ch = PORT_CH_UART2_TX,
@@ -100,6 +103,21 @@ static const struct t7xx_port_conf t7xx_port_conf[] = {
},
};
+static const struct t7xx_port_conf t7xx_early_port_conf[] = {
+ {
+ .tx_ch = PORT_CH_UNIMPORTANT,
+ .rx_ch = PORT_CH_UNIMPORTANT,
+ .txq_index = CLDMA_Q_IDX_DUMP,
+ .rxq_index = CLDMA_Q_IDX_DUMP,
+ .txq_exp_index = CLDMA_Q_IDX_DUMP,
+ .rxq_exp_index = CLDMA_Q_IDX_DUMP,
+ .path_id = CLDMA_ID_AP,
+ .ops = &wwan_sub_port_ops,
+ .name = "fastboot",
+ .port_type = WWAN_PORT_FASTBOOT,
+ },
+};
+
static struct t7xx_port *t7xx_proxy_get_port_by_ch(struct port_proxy *port_prox, enum port_ch ch)
{
const struct t7xx_port_conf *port_conf;
@@ -214,7 +232,17 @@ int t7xx_port_enqueue_skb(struct t7xx_port *port, struct sk_buff *skb)
return 0;
}
-static int t7xx_port_send_raw_skb(struct t7xx_port *port, struct sk_buff *skb)
+int t7xx_get_port_mtu(struct t7xx_port *port)
+{
+ enum cldma_id path_id = port->port_conf->path_id;
+ int tx_qno = t7xx_port_get_queue_no(port);
+ struct cldma_ctrl *md_ctrl;
+
+ md_ctrl = port->t7xx_dev->md->md_ctrl[path_id];
+ return md_ctrl->tx_ring[tx_qno].pkt_size;
+}
+
+int t7xx_port_send_raw_skb(struct t7xx_port *port, struct sk_buff *skb)
{
enum cldma_id path_id = port->port_conf->path_id;
struct cldma_ctrl *md_ctrl;
@@ -329,6 +357,39 @@ static void t7xx_proxy_setup_ch_mapping(struct port_proxy *port_prox)
}
}
+/**
+ * t7xx_port_proxy_recv_skb_from_dedicated_queue() - Dispatch early port received skb.
+ * @queue: CLDMA queue.
+ * @skb: Socket buffer.
+ *
+ * Return:
+ ** 0 - Packet consumed.
+ ** -ERROR - Failed to process skb.
+ */
+int t7xx_port_proxy_recv_skb_from_dedicated_queue(struct cldma_queue *queue, struct sk_buff *skb)
+{
+ struct t7xx_pci_dev *t7xx_dev = queue->md_ctrl->t7xx_dev;
+ struct port_proxy *port_prox = t7xx_dev->md->port_prox;
+ const struct t7xx_port_conf *port_conf;
+ struct t7xx_port *port;
+ int ret;
+
+ port = &port_prox->ports[0];
+ if (WARN_ON_ONCE(port->port_conf->rxq_index != queue->index)) {
+ dev_kfree_skb_any(skb);
+ return -EINVAL;
+ }
+
+ port_conf = port->port_conf;
+ ret = port_conf->ops->recv_skb(port, skb);
+ if (ret < 0 && ret != -ENOBUFS) {
+ dev_err(port->dev, "drop on RX ch %d, %d\n", port_conf->rx_ch, ret);
+ dev_kfree_skb_any(skb);
+ }
+
+ return ret;
+}
+
static struct t7xx_port *t7xx_port_proxy_find_port(struct t7xx_pci_dev *t7xx_dev,
struct cldma_queue *queue, u16 channel)
{
@@ -359,7 +420,7 @@ static struct t7xx_port *t7xx_port_proxy_find_port(struct t7xx_pci_dev *t7xx_dev
** 0 - Packet consumed.
** -ERROR - Failed to process skb.
*/
-static int t7xx_port_proxy_recv_skb(struct cldma_queue *queue, struct sk_buff *skb)
+int t7xx_port_proxy_recv_skb(struct cldma_queue *queue, struct sk_buff *skb)
{
struct ccci_header *ccci_h = (struct ccci_header *)skb->data;
struct t7xx_pci_dev *t7xx_dev = queue->md_ctrl->t7xx_dev;
@@ -444,33 +505,56 @@ static void t7xx_proxy_init_all_ports(struct t7xx_modem *md)
spin_lock_init(&port->port_update_lock);
port->chan_enable = false;
- if (port_conf->ops->init)
+ if (port_conf->ops && port_conf->ops->init)
port_conf->ops->init(port);
}
t7xx_proxy_setup_ch_mapping(port_prox);
}
+void t7xx_port_proxy_set_cfg(struct t7xx_modem *md, enum port_cfg_id cfg_id)
+{
+ struct port_proxy *port_prox = md->port_prox;
+ const struct t7xx_port_conf *port_conf;
+ u32 port_count;
+ int i;
+
+ t7xx_port_proxy_uninit(port_prox);
+
+ if (cfg_id == PORT_CFG_ID_EARLY) {
+ port_conf = t7xx_early_port_conf;
+ port_count = ARRAY_SIZE(t7xx_early_port_conf);
+ } else {
+ port_conf = t7xx_port_conf;
+ port_count = ARRAY_SIZE(t7xx_port_conf);
+ }
+
+ for (i = 0; i < port_count; i++)
+ port_prox->ports[i].port_conf = &port_conf[i];
+
+ port_prox->cfg_id = cfg_id;
+ port_prox->port_count = port_count;
+
+ t7xx_proxy_init_all_ports(md);
+}
+
static int t7xx_proxy_alloc(struct t7xx_modem *md)
{
- unsigned int port_count = ARRAY_SIZE(t7xx_port_conf);
struct device *dev = &md->t7xx_dev->pdev->dev;
struct port_proxy *port_prox;
- int i;
- port_prox = devm_kzalloc(dev, sizeof(*port_prox) + sizeof(struct t7xx_port) * port_count,
+ port_prox = devm_kzalloc(dev,
+ struct_size(port_prox,
+ ports,
+ T7XX_MAX_POSSIBLE_PORTS_NUM),
GFP_KERNEL);
if (!port_prox)
return -ENOMEM;
md->port_prox = port_prox;
port_prox->dev = dev;
+ t7xx_port_proxy_set_cfg(md, PORT_CFG_ID_EARLY);
- for (i = 0; i < port_count; i++)
- port_prox->ports[i].port_conf = &t7xx_port_conf[i];
-
- port_prox->port_count = port_count;
- t7xx_proxy_init_all_ports(md);
return 0;
}
@@ -492,8 +576,6 @@ int t7xx_port_proxy_init(struct t7xx_modem *md)
if (ret)
return ret;
- t7xx_cldma_set_recv_skb(md->md_ctrl[CLDMA_ID_AP], t7xx_port_proxy_recv_skb);
- t7xx_cldma_set_recv_skb(md->md_ctrl[CLDMA_ID_MD], t7xx_port_proxy_recv_skb);
return 0;
}
@@ -505,7 +587,7 @@ void t7xx_port_proxy_uninit(struct port_proxy *port_prox)
for_each_proxy_port(i, port, port_prox) {
const struct t7xx_port_conf *port_conf = port->port_conf;
- if (port_conf->ops->uninit)
+ if (port_conf->ops && port_conf->ops->uninit)
port_conf->ops->uninit(port);
}
}
diff --git a/drivers/net/wwan/t7xx/t7xx_port_proxy.h b/drivers/net/wwan/t7xx/t7xx_port_proxy.h
index 81d059fbc0fb..7f5706811445 100644
--- a/drivers/net/wwan/t7xx/t7xx_port_proxy.h
+++ b/drivers/net/wwan/t7xx/t7xx_port_proxy.h
@@ -31,11 +31,18 @@
#define RX_QUEUE_MAXLEN 32
#define CTRL_QUEUE_MAXLEN 16
+enum port_cfg_id {
+ PORT_CFG_ID_INVALID,
+ PORT_CFG_ID_NORMAL,
+ PORT_CFG_ID_EARLY,
+};
+
struct port_proxy {
int port_count;
struct list_head rx_ch_ports[PORT_CH_ID_MASK + 1];
struct list_head queue_ports[CLDMA_NUM][MTK_QUEUES];
struct device *dev;
+ enum port_cfg_id cfg_id;
struct t7xx_port ports[];
};
@@ -98,5 +105,8 @@ void t7xx_port_proxy_md_status_notify(struct port_proxy *port_prox, unsigned int
int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg);
int t7xx_port_proxy_chl_enable_disable(struct port_proxy *port_prox, unsigned int ch_id,
bool en_flag);
+void t7xx_port_proxy_set_cfg(struct t7xx_modem *md, enum port_cfg_id cfg_id);
+int t7xx_port_proxy_recv_skb(struct cldma_queue *queue, struct sk_buff *skb);
+int t7xx_port_proxy_recv_skb_from_dedicated_queue(struct cldma_queue *queue, struct sk_buff *skb);
#endif /* __T7XX_PORT_PROXY_H__ */
diff --git a/drivers/net/wwan/t7xx/t7xx_port_wwan.c b/drivers/net/wwan/t7xx/t7xx_port_wwan.c
index 17389c8f6600..4b23ba693f3f 100644
--- a/drivers/net/wwan/t7xx/t7xx_port_wwan.c
+++ b/drivers/net/wwan/t7xx/t7xx_port_wwan.c
@@ -2,6 +2,7 @@
/*
* Copyright (c) 2021, MediaTek Inc.
* Copyright (c) 2021-2022, Intel Corporation.
+ * Copyright (c) 2024, Fibocom Wireless Inc.
*
* Authors:
* Amir Hanania <amir.hanania@intel.com>
@@ -15,6 +16,7 @@
* Chiranjeevi Rapolu <chiranjeevi.rapolu@intel.com>
* Eliot Lee <eliot.lee@intel.com>
* Sreehari Kancharla <sreehari.kancharla@intel.com>
+ * Jinjian Song <jinjian.song@fibocom.com>
*/
#include <linux/atomic.h>
@@ -33,7 +35,7 @@
#include "t7xx_port_proxy.h"
#include "t7xx_state_monitor.h"
-static int t7xx_port_ctrl_start(struct wwan_port *port)
+static int t7xx_port_wwan_start(struct wwan_port *port)
{
struct t7xx_port *port_mtk = wwan_port_get_drvdata(port);
@@ -44,30 +46,60 @@ static int t7xx_port_ctrl_start(struct wwan_port *port)
return 0;
}
-static void t7xx_port_ctrl_stop(struct wwan_port *port)
+static void t7xx_port_wwan_stop(struct wwan_port *port)
{
struct t7xx_port *port_mtk = wwan_port_get_drvdata(port);
atomic_dec(&port_mtk->usage_cnt);
}
-static int t7xx_port_ctrl_tx(struct wwan_port *port, struct sk_buff *skb)
+static int t7xx_port_fastboot_tx(struct t7xx_port *port, struct sk_buff *skb)
+{
+ struct sk_buff *cur = skb, *tx_skb;
+ size_t actual, len, offset = 0;
+ int txq_mtu;
+ int ret;
+
+ txq_mtu = t7xx_get_port_mtu(port);
+ if (txq_mtu < 0)
+ return -EINVAL;
+
+ actual = cur->len;
+ while (actual) {
+ len = min_t(size_t, actual, txq_mtu);
+ tx_skb = __dev_alloc_skb(len, GFP_KERNEL);
+ if (!tx_skb)
+ return -ENOMEM;
+
+ skb_put_data(tx_skb, cur->data + offset, len);
+
+ ret = t7xx_port_send_raw_skb(port, tx_skb);
+ if (ret) {
+ dev_kfree_skb(tx_skb);
+ dev_err(port->dev, "Write error on fastboot port, %d\n", ret);
+ break;
+ }
+ offset += len;
+ actual -= len;
+ }
+
+ dev_kfree_skb(skb);
+ return 0;
+}
+
+static int t7xx_port_ctrl_tx(struct t7xx_port *port, struct sk_buff *skb)
{
- struct t7xx_port *port_private = wwan_port_get_drvdata(port);
const struct t7xx_port_conf *port_conf;
struct sk_buff *cur = skb, *cloned;
struct t7xx_fsm_ctl *ctl;
enum md_state md_state;
int cnt = 0, ret;
- if (!port_private->chan_enable)
- return -EINVAL;
-
- port_conf = port_private->port_conf;
- ctl = port_private->t7xx_dev->md->fsm_ctl;
+ port_conf = port->port_conf;
+ ctl = port->t7xx_dev->md->fsm_ctl;
md_state = t7xx_fsm_get_md_state(ctl);
if (md_state == MD_STATE_WAITING_FOR_HS1 || md_state == MD_STATE_WAITING_FOR_HS2) {
- dev_warn(port_private->dev, "Cannot write to %s port when md_state=%d\n",
+ dev_warn(port->dev, "Cannot write to %s port when md_state=%d\n",
port_conf->name, md_state);
return -ENODEV;
}
@@ -75,10 +107,10 @@ static int t7xx_port_ctrl_tx(struct wwan_port *port, struct sk_buff *skb)
while (cur) {
cloned = skb_clone(cur, GFP_KERNEL);
cloned->len = skb_headlen(cur);
- ret = t7xx_port_send_skb(port_private, cloned, 0, 0);
+ ret = t7xx_port_send_skb(port, cloned, 0, 0);
if (ret) {
dev_kfree_skb(cloned);
- dev_err(port_private->dev, "Write error on %s port, %d\n",
+ dev_err(port->dev, "Write error on %s port, %d\n",
port_conf->name, ret);
return cnt ? cnt + ret : ret;
}
@@ -93,14 +125,53 @@ static int t7xx_port_ctrl_tx(struct wwan_port *port, struct sk_buff *skb)
return 0;
}
+static int t7xx_port_wwan_tx(struct wwan_port *port, struct sk_buff *skb)
+{
+ struct t7xx_port *port_private = wwan_port_get_drvdata(port);
+ const struct t7xx_port_conf *port_conf = port_private->port_conf;
+ int ret;
+
+ if (!port_private->chan_enable)
+ return -EINVAL;
+
+ if (port_conf->port_type != WWAN_PORT_FASTBOOT)
+ ret = t7xx_port_ctrl_tx(port_private, skb);
+ else
+ ret = t7xx_port_fastboot_tx(port_private, skb);
+
+ return ret;
+}
+
static const struct wwan_port_ops wwan_ops = {
- .start = t7xx_port_ctrl_start,
- .stop = t7xx_port_ctrl_stop,
- .tx = t7xx_port_ctrl_tx,
+ .start = t7xx_port_wwan_start,
+ .stop = t7xx_port_wwan_stop,
+ .tx = t7xx_port_wwan_tx,
};
+static void t7xx_port_wwan_create(struct t7xx_port *port)
+{
+ const struct t7xx_port_conf *port_conf = port->port_conf;
+ unsigned int header_len = sizeof(struct ccci_header), mtu;
+ struct wwan_port_caps caps;
+
+ if (!port->wwan.wwan_port) {
+ mtu = t7xx_get_port_mtu(port);
+ caps.frag_len = mtu - header_len;
+ caps.headroom_len = header_len;
+ port->wwan.wwan_port = wwan_create_port(port->dev, port_conf->port_type,
+ &wwan_ops, &caps, port);
+ if (IS_ERR(port->wwan.wwan_port))
+ dev_err(port->dev, "Unable to create WWAN port %s", port_conf->name);
+ }
+}
+
static int t7xx_port_wwan_init(struct t7xx_port *port)
{
+ const struct t7xx_port_conf *port_conf = port->port_conf;
+
+ if (port_conf->port_type == WWAN_PORT_FASTBOOT)
+ t7xx_port_wwan_create(port);
+
port->rx_length_th = RX_QUEUE_MAXLEN;
return 0;
}
@@ -152,20 +223,14 @@ static int t7xx_port_wwan_disable_chl(struct t7xx_port *port)
static void t7xx_port_wwan_md_state_notify(struct t7xx_port *port, unsigned int state)
{
const struct t7xx_port_conf *port_conf = port->port_conf;
- unsigned int header_len = sizeof(struct ccci_header);
- struct wwan_port_caps caps;
+
+ if (port_conf->port_type == WWAN_PORT_FASTBOOT)
+ return;
if (state != MD_STATE_READY)
return;
- if (!port->wwan.wwan_port) {
- caps.frag_len = CLDMA_MTU - header_len;
- caps.headroom_len = header_len;
- port->wwan.wwan_port = wwan_create_port(port->dev, port_conf->port_type,
- &wwan_ops, &caps, port);
- if (IS_ERR(port->wwan.wwan_port))
- dev_err(port->dev, "Unable to create WWWAN port %s", port_conf->name);
- }
+ t7xx_port_wwan_create(port);
}
struct port_ops wwan_sub_port_ops = {
diff --git a/drivers/net/wwan/t7xx/t7xx_reg.h b/drivers/net/wwan/t7xx/t7xx_reg.h
index c41d7d094c08..9c7dc72ac6f6 100644
--- a/drivers/net/wwan/t7xx/t7xx_reg.h
+++ b/drivers/net/wwan/t7xx/t7xx_reg.h
@@ -101,11 +101,33 @@ enum t7xx_pm_resume_state {
PM_RESUME_REG_STATE_L2_EXP,
};
+enum host_event_e {
+ HOST_EVENT_INIT = 0,
+ FASTBOOT_DL_NOTIFY = 0x3,
+};
+
#define T7XX_PCIE_MISC_DEV_STATUS 0x0d1c
#define MISC_STAGE_MASK GENMASK(2, 0)
#define MISC_RESET_TYPE_PLDR BIT(26)
#define MISC_RESET_TYPE_FLDR BIT(27)
-#define LINUX_STAGE 4
+#define MISC_RESET_TYPE_PLDR BIT(26)
+#define MISC_LK_EVENT_MASK GENMASK(11, 8)
+#define HOST_EVENT_MASK GENMASK(31, 28)
+
+enum lk_event_id {
+ LK_EVENT_NORMAL = 0,
+ LK_EVENT_CREATE_PD_PORT = 1,
+ LK_EVENT_CREATE_POST_DL_PORT = 2,
+ LK_EVENT_RESET = 7,
+};
+
+enum t7xx_device_stage {
+ T7XX_DEV_STAGE_INIT = 0,
+ T7XX_DEV_STAGE_BROM_PRE = 1,
+ T7XX_DEV_STAGE_BROM_POST = 2,
+ T7XX_DEV_STAGE_LK = 3,
+ T7XX_DEV_STAGE_LINUX = 4,
+};
#define T7XX_PCIE_RESOURCE_STATUS 0x0d28
#define T7XX_PCIE_RESOURCE_STS_MSK GENMASK(4, 0)
diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.c b/drivers/net/wwan/t7xx/t7xx_state_monitor.c
index 0bc97430211b..9889ca4621cf 100644
--- a/drivers/net/wwan/t7xx/t7xx_state_monitor.c
+++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.c
@@ -47,6 +47,13 @@
#define FSM_MD_EX_PASS_TIMEOUT_MS 45000
#define FSM_CMD_TIMEOUT_MS 2000
+#define wait_for_expected_dev_stage(status) \
+ read_poll_timeout(ioread32, status, \
+ ((status & MISC_STAGE_MASK) == T7XX_DEV_STAGE_LINUX) || \
+ ((status & MISC_STAGE_MASK) == T7XX_DEV_STAGE_LK), 100000, \
+ 20000000, false, IREG_BASE(md->t7xx_dev) + \
+ T7XX_PCIE_MISC_DEV_STATUS)
+
void t7xx_fsm_notifier_register(struct t7xx_modem *md, struct t7xx_fsm_notifier *notifier)
{
struct t7xx_fsm_ctl *ctl = md->fsm_ctl;
@@ -206,6 +213,55 @@ static void fsm_routine_exception(struct t7xx_fsm_ctl *ctl, struct t7xx_fsm_comm
fsm_finish_command(ctl, cmd, 0);
}
+static void t7xx_host_event_notify(struct t7xx_modem *md, unsigned int event_id)
+{
+ u32 value;
+
+ value = ioread32(IREG_BASE(md->t7xx_dev) + T7XX_PCIE_MISC_DEV_STATUS);
+ value &= ~HOST_EVENT_MASK;
+ value |= FIELD_PREP(HOST_EVENT_MASK, event_id);
+ iowrite32(value, IREG_BASE(md->t7xx_dev) + T7XX_PCIE_MISC_DEV_STATUS);
+}
+
+static void t7xx_lk_stage_event_handling(struct t7xx_fsm_ctl *ctl, unsigned int status)
+{
+ struct t7xx_modem *md = ctl->md;
+ struct cldma_ctrl *md_ctrl;
+ enum lk_event_id lk_event;
+ struct device *dev;
+ struct t7xx_port *port;
+
+ dev = &md->t7xx_dev->pdev->dev;
+ lk_event = FIELD_GET(MISC_LK_EVENT_MASK, status);
+ switch (lk_event) {
+ case LK_EVENT_NORMAL:
+ case LK_EVENT_RESET:
+ break;
+
+ case LK_EVENT_CREATE_PD_PORT:
+ case LK_EVENT_CREATE_POST_DL_PORT:
+ md_ctrl = md->md_ctrl[CLDMA_ID_AP];
+ t7xx_cldma_hif_hw_init(md_ctrl);
+ t7xx_cldma_stop(md_ctrl);
+ t7xx_cldma_switch_cfg(md_ctrl, CLDMA_DEDICATED_Q_CFG);
+
+ port = &ctl->md->port_prox->ports[0];
+ port->port_conf->ops->enable_chl(port);
+
+ t7xx_cldma_start(md_ctrl);
+
+ if (lk_event == LK_EVENT_CREATE_POST_DL_PORT)
+ t7xx_mode_update(md->t7xx_dev, T7XX_FASTBOOT_DOWNLOAD);
+ else
+ t7xx_mode_update(md->t7xx_dev, T7XX_FASTBOOT_DUMP);
+ break;
+
+ default:
+ dev_err(dev, "Invalid LK event %d\n", lk_event);
+ break;
+ }
+}
+
static int fsm_stopped_handler(struct t7xx_fsm_ctl *ctl)
{
ctl->curr_state = FSM_STATE_STOPPED;
@@ -226,8 +282,9 @@ static void fsm_routine_stopped(struct t7xx_fsm_ctl *ctl, struct t7xx_fsm_comman
static void fsm_routine_stopping(struct t7xx_fsm_ctl *ctl, struct t7xx_fsm_command *cmd)
{
- struct t7xx_pci_dev *t7xx_dev;
- struct cldma_ctrl *md_ctrl;
+ struct cldma_ctrl *md_ctrl = ctl->md->md_ctrl[CLDMA_ID_MD];
+ struct t7xx_pci_dev *t7xx_dev = ctl->md->t7xx_dev;
+ enum t7xx_mode mode = READ_ONCE(t7xx_dev->mode);
int err;
if (ctl->curr_state == FSM_STATE_STOPPED || ctl->curr_state == FSM_STATE_STOPPING) {
@@ -235,18 +292,20 @@ static void fsm_routine_stopping(struct t7xx_fsm_ctl *ctl, struct t7xx_fsm_comma
return;
}
- md_ctrl = ctl->md->md_ctrl[CLDMA_ID_MD];
- t7xx_dev = ctl->md->t7xx_dev;
-
ctl->curr_state = FSM_STATE_STOPPING;
t7xx_fsm_broadcast_state(ctl, MD_STATE_WAITING_TO_STOP);
t7xx_cldma_stop(md_ctrl);
- if (!ctl->md->rgu_irq_asserted) {
- t7xx_mhccif_h2d_swint_trigger(t7xx_dev, H2D_CH_DRM_DISABLE_AP);
- /* Wait for the DRM disable to take effect */
- msleep(FSM_DRM_DISABLE_DELAY_MS);
+ if (mode == T7XX_FASTBOOT_SWITCHING)
+ t7xx_host_event_notify(ctl->md, FASTBOOT_DL_NOTIFY);
+
+ t7xx_mhccif_h2d_swint_trigger(t7xx_dev, H2D_CH_DRM_DISABLE_AP);
+ /* Wait for the DRM disable to take effect */
+ msleep(FSM_DRM_DISABLE_DELAY_MS);
+ if (mode == T7XX_FASTBOOT_SWITCHING) {
+ t7xx_mhccif_h2d_swint_trigger(t7xx_dev, H2D_CH_DEVICE_RESET);
+ } else {
err = t7xx_acpi_fldr_func(t7xx_dev);
if (err)
t7xx_mhccif_h2d_swint_trigger(t7xx_dev, H2D_CH_DEVICE_RESET);
@@ -272,6 +331,7 @@ static void fsm_routine_ready(struct t7xx_fsm_ctl *ctl)
ctl->curr_state = FSM_STATE_READY;
t7xx_fsm_broadcast_ready_state(ctl);
+ t7xx_mode_update(md->t7xx_dev, T7XX_READY);
t7xx_md_event_notify(md, FSM_READY);
}
@@ -317,7 +377,8 @@ static int fsm_routine_starting(struct t7xx_fsm_ctl *ctl)
static void fsm_routine_start(struct t7xx_fsm_ctl *ctl, struct t7xx_fsm_command *cmd)
{
struct t7xx_modem *md = ctl->md;
- u32 dev_status;
+ struct device *dev;
+ u32 status;
int ret;
if (!md)
@@ -329,23 +390,53 @@ static void fsm_routine_start(struct t7xx_fsm_ctl *ctl, struct t7xx_fsm_command
return;
}
+ dev = &md->t7xx_dev->pdev->dev;
ctl->curr_state = FSM_STATE_PRE_START;
t7xx_md_event_notify(md, FSM_PRE_START);
- ret = read_poll_timeout(ioread32, dev_status,
- (dev_status & MISC_STAGE_MASK) == LINUX_STAGE, 20000, 2000000,
- false, IREG_BASE(md->t7xx_dev) + T7XX_PCIE_MISC_DEV_STATUS);
+ ret = wait_for_expected_dev_stage(status);
+
if (ret) {
- struct device *dev = &md->t7xx_dev->pdev->dev;
+ dev_err(dev, "read poll timeout %d\n", ret);
+ goto finish_command;
+ }
- fsm_finish_command(ctl, cmd, -ETIMEDOUT);
- dev_err(dev, "Invalid device status 0x%lx\n", dev_status & MISC_STAGE_MASK);
- return;
+ if (status != ctl->status || cmd->flag != 0) {
+ u32 stage = FIELD_GET(MISC_STAGE_MASK, status);
+
+ switch (stage) {
+ case T7XX_DEV_STAGE_INIT:
+ case T7XX_DEV_STAGE_BROM_PRE:
+ case T7XX_DEV_STAGE_BROM_POST:
+ dev_dbg(dev, "BROM_STAGE Entered\n");
+ ret = t7xx_fsm_append_cmd(ctl, FSM_CMD_START, 0);
+ break;
+
+ case T7XX_DEV_STAGE_LK:
+ dev_dbg(dev, "LK_STAGE Entered\n");
+ t7xx_lk_stage_event_handling(ctl, status);
+ break;
+
+ case T7XX_DEV_STAGE_LINUX:
+ dev_dbg(dev, "LINUX_STAGE Entered\n");
+ t7xx_mhccif_mask_clr(md->t7xx_dev, D2H_INT_PORT_ENUM |
+ D2H_INT_ASYNC_MD_HK | D2H_INT_ASYNC_AP_HK);
+ if (cmd->flag == 0)
+ break;
+ t7xx_cldma_hif_hw_init(md->md_ctrl[CLDMA_ID_AP]);
+ t7xx_cldma_hif_hw_init(md->md_ctrl[CLDMA_ID_MD]);
+ t7xx_port_proxy_set_cfg(md, PORT_CFG_ID_NORMAL);
+ ret = fsm_routine_starting(ctl);
+ break;
+
+ default:
+ break;
+ }
+ ctl->status = status;
}
- t7xx_cldma_hif_hw_init(md->md_ctrl[CLDMA_ID_AP]);
- t7xx_cldma_hif_hw_init(md->md_ctrl[CLDMA_ID_MD]);
- fsm_finish_command(ctl, cmd, fsm_routine_starting(ctl));
+finish_command:
+ fsm_finish_command(ctl, cmd, ret);
}
static int fsm_main_thread(void *data)
@@ -517,6 +608,7 @@ void t7xx_fsm_reset(struct t7xx_modem *md)
fsm_flush_event_cmd_qs(ctl);
ctl->curr_state = FSM_STATE_STOPPED;
ctl->exp_flg = false;
+ ctl->status = T7XX_DEV_STAGE_INIT;
}
int t7xx_fsm_init(struct t7xx_modem *md)
diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.h b/drivers/net/wwan/t7xx/t7xx_state_monitor.h
index b0b3662ae6d7..7b0a9baf488c 100644
--- a/drivers/net/wwan/t7xx/t7xx_state_monitor.h
+++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.h
@@ -96,6 +96,7 @@ struct t7xx_fsm_ctl {
bool exp_flg;
spinlock_t notifier_lock; /* Protects notifier list */
struct list_head notifier_list;
+ u32 status; /* Device boot stage */
};
struct t7xx_fsm_event {
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 72e01e550a16..17431f1b1a0c 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -26,7 +26,9 @@
static DEFINE_MUTEX(wwan_register_lock); /* WWAN device create|remove lock */
static DEFINE_IDA(minors); /* minors for WWAN port chardevs */
static DEFINE_IDA(wwan_dev_ids); /* for unique WWAN device IDs */
-static struct class *wwan_class;
+static const struct class wwan_class = {
+ .name = "wwan",
+};
static int wwan_major;
static struct dentry *wwan_debugfs_dir;
@@ -130,7 +132,7 @@ static struct wwan_device *wwan_dev_get_by_parent(struct device *parent)
{
struct device *dev;
- dev = class_find_device(wwan_class, NULL, parent, wwan_dev_parent_match);
+ dev = class_find_device(&wwan_class, NULL, parent, wwan_dev_parent_match);
if (!dev)
return ERR_PTR(-ENODEV);
@@ -147,7 +149,7 @@ static struct wwan_device *wwan_dev_get_by_name(const char *name)
{
struct device *dev;
- dev = class_find_device(wwan_class, NULL, name, wwan_dev_name_match);
+ dev = class_find_device(&wwan_class, NULL, name, wwan_dev_name_match);
if (!dev)
return ERR_PTR(-ENODEV);
@@ -183,7 +185,7 @@ static struct wwan_device *wwan_dev_get_by_debugfs(struct dentry *dir)
{
struct device *dev;
- dev = class_find_device(wwan_class, NULL, dir, wwan_dev_debugfs_match);
+ dev = class_find_device(&wwan_class, NULL, dir, wwan_dev_debugfs_match);
if (!dev)
return ERR_PTR(-ENODEV);
@@ -239,7 +241,7 @@ static struct wwan_device *wwan_create_dev(struct device *parent)
}
wwandev->dev.parent = parent;
- wwandev->dev.class = wwan_class;
+ wwandev->dev.class = &wwan_class;
wwandev->dev.type = &wwan_dev_type;
wwandev->id = id;
dev_set_name(&wwandev->dev, "wwan%d", wwandev->id);
@@ -265,7 +267,7 @@ done_unlock:
static int is_wwan_child(struct device *dev, void *data)
{
- return dev->class == wwan_class;
+ return dev->class == &wwan_class;
}
static void wwan_remove_dev(struct wwan_device *wwandev)
@@ -328,6 +330,10 @@ static const struct {
.name = "XMMRPC",
.devsuf = "xmmrpc",
},
+ [WWAN_PORT_FASTBOOT] = {
+ .name = "FASTBOOT",
+ .devsuf = "fastboot",
+ },
};
static ssize_t type_show(struct device *dev, struct device_attribute *attr,
@@ -371,7 +377,7 @@ static struct wwan_port *wwan_port_get_by_minor(unsigned int minor)
{
struct device *dev;
- dev = class_find_device(wwan_class, NULL, &minor, wwan_port_minor_match);
+ dev = class_find_device(&wwan_class, NULL, &minor, wwan_port_minor_match);
if (!dev)
return ERR_PTR(-ENODEV);
@@ -401,7 +407,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt)
return -ENOMEM;
/* Collect ids of same name format ports */
- class_dev_iter_init(&iter, wwan_class, NULL, &wwan_port_dev_type);
+ class_dev_iter_init(&iter, &wwan_class, NULL, &wwan_port_dev_type);
while ((dev = class_dev_iter_next(&iter))) {
if (dev->parent != &wwandev->dev)
continue;
@@ -473,7 +479,7 @@ struct wwan_port *wwan_create_port(struct device *parent,
mutex_init(&port->data_lock);
port->dev.parent = &wwandev->dev;
- port->dev.class = wwan_class;
+ port->dev.class = &wwan_class;
port->dev.type = &wwan_port_dev_type;
port->dev.devt = MKDEV(wwan_major, minor);
dev_set_drvdata(&port->dev, drvdata);
@@ -916,7 +922,7 @@ static int wwan_rtnl_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
}
-static struct device_type wwan_type = { .name = "wwan" };
+static const struct device_type wwan_type = { .name = "wwan" };
static struct net_device *wwan_rtnl_alloc(struct nlattr *tb[],
const char *ifname,
@@ -1208,11 +1214,9 @@ static int __init wwan_init(void)
if (err)
return err;
- wwan_class = class_create("wwan");
- if (IS_ERR(wwan_class)) {
- err = PTR_ERR(wwan_class);
+ err = class_register(&wwan_class);
+ if (err)
goto unregister;
- }
/* chrdev used for wwan ports */
wwan_major = __register_chrdev(0, 0, WWAN_MAX_MINORS, "wwan_port",
@@ -1229,7 +1233,7 @@ static int __init wwan_init(void)
return 0;
destroy:
- class_destroy(wwan_class);
+ class_unregister(&wwan_class);
unregister:
rtnl_link_unregister(&wwan_rtnl_link_ops);
return err;
@@ -1240,7 +1244,7 @@ static void __exit wwan_exit(void)
debugfs_remove_recursive(wwan_debugfs_dir);
__unregister_chrdev(wwan_major, 0, WWAN_MAX_MINORS, "wwan_port");
rtnl_link_unregister(&wwan_rtnl_link_ops);
- class_destroy(wwan_class);
+ class_unregister(&wwan_class);
}
module_init(wwan_init);
diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c
index ff3dd24ddb33..b02befd1b6fb 100644
--- a/drivers/net/wwan/wwan_hwsim.c
+++ b/drivers/net/wwan/wwan_hwsim.c
@@ -25,7 +25,9 @@ static int wwan_hwsim_devsnum = 2;
module_param_named(devices, wwan_hwsim_devsnum, int, 0444);
MODULE_PARM_DESC(devices, "Number of simulated devices");
-static struct class *wwan_hwsim_class;
+static const struct class wwan_hwsim_class = {
+ .name = "wwan_hwsim",
+};
static struct dentry *wwan_hwsim_debugfs_topdir;
static struct dentry *wwan_hwsim_debugfs_devcreate;
@@ -277,7 +279,7 @@ static struct wwan_hwsim_dev *wwan_hwsim_dev_new(void)
spin_unlock(&wwan_hwsim_devs_lock);
dev->dev.release = wwan_hwsim_dev_release;
- dev->dev.class = wwan_hwsim_class;
+ dev->dev.class = &wwan_hwsim_class;
dev_set_name(&dev->dev, "hwsim%u", dev->id);
spin_lock_init(&dev->ports_lock);
@@ -511,11 +513,9 @@ static int __init wwan_hwsim_init(void)
if (!wwan_wq)
return -ENOMEM;
- wwan_hwsim_class = class_create("wwan_hwsim");
- if (IS_ERR(wwan_hwsim_class)) {
- err = PTR_ERR(wwan_hwsim_class);
+ err = class_register(&wwan_hwsim_class);
+ if (err)
goto err_wq_destroy;
- }
wwan_hwsim_debugfs_topdir = debugfs_create_dir("wwan_hwsim", NULL);
wwan_hwsim_debugfs_devcreate =
@@ -534,7 +534,7 @@ err_clean_devs:
wwan_hwsim_free_devs();
flush_workqueue(wwan_wq); /* Wait deletion works completion */
debugfs_remove(wwan_hwsim_debugfs_topdir);
- class_destroy(wwan_hwsim_class);
+ class_unregister(&wwan_hwsim_class);
err_wq_destroy:
destroy_workqueue(wwan_wq);
@@ -547,7 +547,7 @@ static void __exit wwan_hwsim_exit(void)
wwan_hwsim_free_devs();
flush_workqueue(wwan_wq); /* Wait deletion works completion */
debugfs_remove(wwan_hwsim_debugfs_topdir);
- class_destroy(wwan_hwsim_class);
+ class_unregister(&wwan_hwsim_class);
destroy_workqueue(wwan_wq);
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 88f760a7cbc3..ef76850d9bcd 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -104,13 +104,12 @@ bool provides_xdp_headroom = true;
module_param(provides_xdp_headroom, bool, 0644);
static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
- u8 status);
+ s8 status);
static void make_tx_response(struct xenvif_queue *queue,
- struct xen_netif_tx_request *txp,
+ const struct xen_netif_tx_request *txp,
unsigned int extra_count,
- s8 st);
-static void push_tx_responses(struct xenvif_queue *queue);
+ s8 status);
static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
@@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue,
unsigned int extra_count, RING_IDX end)
{
RING_IDX cons = queue->tx.req_cons;
- unsigned long flags;
do {
- spin_lock_irqsave(&queue->response_lock, flags);
make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR);
- push_tx_responses(queue);
- spin_unlock_irqrestore(&queue->response_lock, flags);
if (cons == end)
break;
RING_COPY_REQUEST(&queue->tx, cons++, txp);
@@ -463,12 +458,20 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
}
for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS;
- shinfo->nr_frags++, gop++, nr_slots--) {
+ nr_slots--) {
+ if (unlikely(!txp->size)) {
+ make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY);
+ ++txp;
+ continue;
+ }
+
index = pending_index(queue->pending_cons++);
pending_idx = queue->pending_ring[index];
xenvif_tx_create_map_op(queue, pending_idx, txp,
txp == first ? extra_count : 0, gop);
frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
+ ++shinfo->nr_frags;
+ ++gop;
if (txp == first)
txp = txfrags;
@@ -481,20 +484,33 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
shinfo = skb_shinfo(nskb);
frags = shinfo->frags;
- for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
- shinfo->nr_frags++, txp++, gop++) {
+ for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) {
+ if (unlikely(!txp->size)) {
+ make_tx_response(queue, txp, 0,
+ XEN_NETIF_RSP_OKAY);
+ continue;
+ }
+
index = pending_index(queue->pending_cons++);
pending_idx = queue->pending_ring[index];
xenvif_tx_create_map_op(queue, pending_idx, txp, 0,
gop);
frag_set_pending_idx(&frags[shinfo->nr_frags],
pending_idx);
+ ++shinfo->nr_frags;
+ ++gop;
+ }
+
+ if (shinfo->nr_frags) {
+ skb_shinfo(skb)->frag_list = nskb;
+ nskb = NULL;
}
+ }
- skb_shinfo(skb)->frag_list = nskb;
- } else if (nskb) {
+ if (nskb) {
/* A frag_list skb was allocated but it is no longer needed
- * because enough slots were converted to copy ops above.
+ * because enough slots were converted to copy ops above or some
+ * were empty.
*/
kfree_skb(nskb);
}
@@ -963,7 +979,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
(ret == 0) ?
XEN_NETIF_RSP_OKAY :
XEN_NETIF_RSP_ERROR);
- push_tx_responses(queue);
continue;
}
@@ -975,7 +990,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
make_tx_response(queue, &txreq, extra_count,
XEN_NETIF_RSP_OKAY);
- push_tx_responses(queue);
continue;
}
@@ -1401,8 +1415,35 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
return work_done;
}
+static void _make_tx_response(struct xenvif_queue *queue,
+ const struct xen_netif_tx_request *txp,
+ unsigned int extra_count,
+ s8 status)
+{
+ RING_IDX i = queue->tx.rsp_prod_pvt;
+ struct xen_netif_tx_response *resp;
+
+ resp = RING_GET_RESPONSE(&queue->tx, i);
+ resp->id = txp->id;
+ resp->status = status;
+
+ while (extra_count-- != 0)
+ RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
+
+ queue->tx.rsp_prod_pvt = ++i;
+}
+
+static void push_tx_responses(struct xenvif_queue *queue)
+{
+ int notify;
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
+ if (notify)
+ notify_remote_via_irq(queue->tx_irq);
+}
+
static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
- u8 status)
+ s8 status)
{
struct pending_tx_info *pending_tx_info;
pending_ring_idx_t index;
@@ -1412,8 +1453,8 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
spin_lock_irqsave(&queue->response_lock, flags);
- make_tx_response(queue, &pending_tx_info->req,
- pending_tx_info->extra_count, status);
+ _make_tx_response(queue, &pending_tx_info->req,
+ pending_tx_info->extra_count, status);
/* Release the pending index before pusing the Tx response so
* its available before a new Tx request is pushed by the
@@ -1427,32 +1468,19 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
spin_unlock_irqrestore(&queue->response_lock, flags);
}
-
static void make_tx_response(struct xenvif_queue *queue,
- struct xen_netif_tx_request *txp,
+ const struct xen_netif_tx_request *txp,
unsigned int extra_count,
- s8 st)
+ s8 status)
{
- RING_IDX i = queue->tx.rsp_prod_pvt;
- struct xen_netif_tx_response *resp;
-
- resp = RING_GET_RESPONSE(&queue->tx, i);
- resp->id = txp->id;
- resp->status = st;
-
- while (extra_count-- != 0)
- RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
+ unsigned long flags;
- queue->tx.rsp_prod_pvt = ++i;
-}
+ spin_lock_irqsave(&queue->response_lock, flags);
-static void push_tx_responses(struct xenvif_queue *queue)
-{
- int notify;
+ _make_tx_response(queue, txp, extra_count, status);
+ push_tx_responses(queue);
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
- if (notify)
- notify_remote_via_irq(queue->tx_irq);
+ spin_unlock_irqrestore(&queue->response_lock, flags);
}
static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
@@ -1750,5 +1778,6 @@ static void __exit netback_fini(void)
}
module_exit(netback_fini);
+MODULE_DESCRIPTION("Xen backend network device module");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("xen-backend:vif");
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index bb3726b622ad..4d0c527e8576 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1496,19 +1496,21 @@ static int btt_blk_init(struct btt *btt)
{
struct nd_btt *nd_btt = btt->nd_btt;
struct nd_namespace_common *ndns = nd_btt->ndns;
- int rc = -ENOMEM;
+ struct queue_limits lim = {
+ .logical_block_size = btt->sector_size,
+ .max_hw_sectors = UINT_MAX,
+ };
+ int rc;
- btt->btt_disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!btt->btt_disk)
- return -ENOMEM;
+ btt->btt_disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(btt->btt_disk))
+ return PTR_ERR(btt->btt_disk);
nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
btt->btt_disk->first_minor = 0;
btt->btt_disk->fops = &btt_fops;
btt->btt_disk->private_data = btt;
- blk_queue_logical_block_size(btt->btt_disk->queue, btt->sector_size);
- blk_queue_max_hw_sectors(btt->btt_disk->queue, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue);
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4e8fdcb3f1c8..8dcc10b6db5b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -451,6 +451,11 @@ static int pmem_attach_disk(struct device *dev,
{
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
struct nd_region *nd_region = to_nd_region(dev->parent);
+ struct queue_limits lim = {
+ .logical_block_size = pmem_sector_size(ndns),
+ .physical_block_size = PAGE_SIZE,
+ .max_hw_sectors = UINT_MAX,
+ };
int nid = dev_to_node(dev), fua;
struct resource *res = &nsio->res;
struct range bb_range;
@@ -497,9 +502,9 @@ static int pmem_attach_disk(struct device *dev,
return -EBUSY;
}
- disk = blk_alloc_disk(nid);
- if (!disk)
- return -ENOMEM;
+ disk = blk_alloc_disk(&lim, nid);
+ if (IS_ERR(disk))
+ return PTR_ERR(disk);
q = disk->queue;
pmem->disk = disk;
@@ -539,9 +544,6 @@ static int pmem_attach_disk(struct device *dev,
pmem->virt_addr = addr;
blk_queue_write_cache(q, true, fua);
- blk_queue_physical_block_size(q, PAGE_SIZE);
- blk_queue_logical_block_size(q, pmem_sector_size(ndns));
- blk_queue_max_hw_sectors(q, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q);
if (pmem->pfn_flags & PFN_MAP)
diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c
index a23ab5c968b9..a3455f1d67fa 100644
--- a/drivers/nvme/common/auth.c
+++ b/drivers/nvme/common/auth.c
@@ -471,4 +471,5 @@ int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key)
}
EXPORT_SYMBOL_GPL(nvme_auth_generate_key);
+MODULE_DESCRIPTION("NVMe Authentication framework");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c
index a5c0431c101c..6f7e7a8fa5ae 100644
--- a/drivers/nvme/common/keyring.c
+++ b/drivers/nvme/common/keyring.c
@@ -181,5 +181,6 @@ static void __exit nvme_keyring_exit(void)
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
+MODULE_DESCRIPTION("NVMe Keyring implementation");
module_init(nvme_keyring_init);
module_exit(nvme_keyring_exit);
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 596bb11eeba5..a480cdeac288 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -797,6 +797,7 @@ static int apple_nvme_init_request(struct blk_mq_tag_set *set,
static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
{
+ enum nvme_ctrl_state state = nvme_ctrl_state(&anv->ctrl);
u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS);
bool dead = false, freeze = false;
unsigned long flags;
@@ -808,8 +809,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
if (csts & NVME_CSTS_CFS)
dead = true;
- if (anv->ctrl.state == NVME_CTRL_LIVE ||
- anv->ctrl.state == NVME_CTRL_RESETTING) {
+ if (state == NVME_CTRL_LIVE ||
+ state == NVME_CTRL_RESETTING) {
freeze = true;
nvme_start_freeze(&anv->ctrl);
}
@@ -881,7 +882,7 @@ static enum blk_eh_timer_return apple_nvme_timeout(struct request *req)
unsigned long flags;
u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS);
- if (anv->ctrl.state != NVME_CTRL_LIVE) {
+ if (nvme_ctrl_state(&anv->ctrl) != NVME_CTRL_LIVE) {
/*
* From rdma.c:
* If we are resetting, connecting or deleting we should
@@ -985,10 +986,10 @@ static void apple_nvme_reset_work(struct work_struct *work)
u32 boot_status, aqa;
struct apple_nvme *anv =
container_of(work, struct apple_nvme, ctrl.reset_work);
+ enum nvme_ctrl_state state = nvme_ctrl_state(&anv->ctrl);
- if (anv->ctrl.state != NVME_CTRL_RESETTING) {
- dev_warn(anv->dev, "ctrl state %d is not RESETTING\n",
- anv->ctrl.state);
+ if (state != NVME_CTRL_RESETTING) {
+ dev_warn(anv->dev, "ctrl state %d is not RESETTING\n", state);
ret = -ENODEV;
goto out;
}
@@ -1515,7 +1516,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
goto put_dev;
}
- anv->ctrl.admin_q = blk_mq_init_queue(&anv->admin_tagset);
+ anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL);
if (IS_ERR(anv->ctrl.admin_q)) {
ret = -ENOMEM;
goto put_dev;
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index 72c0525c75f5..a264b3ae078b 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -48,11 +48,6 @@ struct nvme_dhchap_queue_context {
static struct workqueue_struct *nvme_auth_wq;
-#define nvme_auth_flags_from_qid(qid) \
- (qid == 0) ? 0 : BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED
-#define nvme_auth_queue_from_qid(ctrl, qid) \
- (qid == 0) ? (ctrl)->fabrics_q : (ctrl)->connect_q
-
static inline int ctrl_max_dhchaps(struct nvme_ctrl *ctrl)
{
return ctrl->opts->nr_io_queues + ctrl->opts->nr_write_queues +
@@ -63,10 +58,15 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid,
void *data, size_t data_len, bool auth_send)
{
struct nvme_command cmd = {};
- blk_mq_req_flags_t flags = nvme_auth_flags_from_qid(qid);
- struct request_queue *q = nvme_auth_queue_from_qid(ctrl, qid);
+ nvme_submit_flags_t flags = NVME_SUBMIT_RETRY;
+ struct request_queue *q = ctrl->fabrics_q;
int ret;
+ if (qid != 0) {
+ flags |= NVME_SUBMIT_NOWAIT | NVME_SUBMIT_RESERVED;
+ q = ctrl->connect_q;
+ }
+
cmd.auth_common.opcode = nvme_fabrics_command;
cmd.auth_common.secp = NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER;
cmd.auth_common.spsp0 = 0x01;
@@ -80,8 +80,7 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid,
}
ret = __nvme_submit_sync_cmd(q, &cmd, NULL, data, data_len,
- qid == 0 ? NVME_QID_ANY : qid,
- 0, flags);
+ qid == 0 ? NVME_QID_ANY : qid, flags);
if (ret > 0)
dev_warn(ctrl->device,
"qid %d auth_send failed with status %d\n", qid, ret);
@@ -897,7 +896,7 @@ static void nvme_ctrl_auth_work(struct work_struct *work)
* If the ctrl is no connected, bail as reconnect will handle
* authentication.
*/
- if (ctrl->state != NVME_CTRL_LIVE)
+ if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
return;
/* Authenticate admin queue first */
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
index 20f46c230885..6f2ebb5fcdb0 100644
--- a/drivers/nvme/host/constants.c
+++ b/drivers/nvme/host/constants.c
@@ -171,15 +171,15 @@ static const char * const nvme_statuses[] = {
[NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command",
};
-const unsigned char *nvme_get_error_status_str(u16 status)
+const char *nvme_get_error_status_str(u16 status)
{
status &= 0x7ff;
if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status])
- return nvme_statuses[status & 0x7ff];
+ return nvme_statuses[status];
return "Unknown";
}
-const unsigned char *nvme_get_opcode_str(u8 opcode)
+const char *nvme_get_opcode_str(u8 opcode)
{
if (opcode < ARRAY_SIZE(nvme_ops) && nvme_ops[opcode])
return nvme_ops[opcode];
@@ -187,7 +187,7 @@ const unsigned char *nvme_get_opcode_str(u8 opcode)
}
EXPORT_SYMBOL_GPL(nvme_get_opcode_str);
-const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
+const char *nvme_get_admin_opcode_str(u8 opcode)
{
if (opcode < ARRAY_SIZE(nvme_admin_ops) && nvme_admin_ops[opcode])
return nvme_admin_ops[opcode];
@@ -195,7 +195,7 @@ const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
}
EXPORT_SYMBOL_GPL(nvme_get_admin_opcode_str);
-const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) {
+const char *nvme_get_fabrics_opcode_str(u8 opcode) {
if (opcode < ARRAY_SIZE(nvme_fabrics_ops) && nvme_fabrics_ops[opcode])
return nvme_fabrics_ops[opcode];
return "Unknown";
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 85ab0fcf9e88..00864a634470 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -114,12 +114,21 @@ static DEFINE_MUTEX(nvme_subsystems_lock);
static DEFINE_IDA(nvme_instance_ida);
static dev_t nvme_ctrl_base_chr_devt;
-static struct class *nvme_class;
-static struct class *nvme_subsys_class;
+static int nvme_class_uevent(const struct device *dev, struct kobj_uevent_env *env);
+static const struct class nvme_class = {
+ .name = "nvme",
+ .dev_uevent = nvme_class_uevent,
+};
+
+static const struct class nvme_subsys_class = {
+ .name = "nvme-subsystem",
+};
static DEFINE_IDA(nvme_ns_chr_minor_ida);
static dev_t nvme_ns_chr_devt;
-static struct class *nvme_ns_chr_class;
+static const struct class nvme_ns_chr_class = {
+ .name = "nvme-generic",
+};
static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
@@ -338,6 +347,30 @@ static void nvme_log_error(struct request *req)
nr->status & NVME_SC_DNR ? "DNR " : "");
}
+static void nvme_log_err_passthru(struct request *req)
+{
+ struct nvme_ns *ns = req->q->queuedata;
+ struct nvme_request *nr = nvme_req(req);
+
+ pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s"
+ "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x cdw15=0x%x\n",
+ ns ? ns->disk->disk_name : dev_name(nr->ctrl->device),
+ ns ? nvme_get_opcode_str(nr->cmd->common.opcode) :
+ nvme_get_admin_opcode_str(nr->cmd->common.opcode),
+ nr->cmd->common.opcode,
+ nvme_get_error_status_str(nr->status),
+ nr->status >> 8 & 7, /* Status Code Type */
+ nr->status & 0xff, /* Status Code */
+ nr->status & NVME_SC_MORE ? "MORE " : "",
+ nr->status & NVME_SC_DNR ? "DNR " : "",
+ nr->cmd->common.cdw10,
+ nr->cmd->common.cdw11,
+ nr->cmd->common.cdw12,
+ nr->cmd->common.cdw13,
+ nr->cmd->common.cdw14,
+ nr->cmd->common.cdw14);
+}
+
enum nvme_disposition {
COMPLETE,
RETRY,
@@ -385,8 +418,12 @@ static inline void nvme_end_req(struct request *req)
{
blk_status_t status = nvme_error_status(nvme_req(req)->status);
- if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET)))
- nvme_log_error(req);
+ if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) {
+ if (blk_rq_is_passthrough(req))
+ nvme_log_err_passthru(req);
+ else
+ nvme_log_error(req);
+ }
nvme_end_req_zoned(req);
nvme_trace_bio_complete(req);
if (req->cmd_flags & REQ_NVME_MPATH)
@@ -679,10 +716,21 @@ static inline void nvme_clear_nvme_request(struct request *req)
/* initialize a passthrough request */
void nvme_init_request(struct request *req, struct nvme_command *cmd)
{
- if (req->q->queuedata)
+ struct nvme_request *nr = nvme_req(req);
+ bool logging_enabled;
+
+ if (req->q->queuedata) {
+ struct nvme_ns *ns = req->q->disk->private_data;
+
+ logging_enabled = ns->head->passthru_err_log_enabled;
req->timeout = NVME_IO_TIMEOUT;
- else /* no queuedata implies admin queue */
+ } else { /* no queuedata implies admin queue */
+ logging_enabled = nr->ctrl->passthru_err_log_enabled;
req->timeout = NVME_ADMIN_TIMEOUT;
+ }
+
+ if (!logging_enabled)
+ req->rq_flags |= RQF_QUIET;
/* passthru commands should let the driver set the SGL flags */
cmd->common.flags &= ~NVME_CMD_SGL_ALL;
@@ -691,8 +739,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd)
if (req->mq_hctx->type == HCTX_TYPE_POLL)
req->cmd_flags |= REQ_POLLED;
nvme_clear_nvme_request(req);
- req->rq_flags |= RQF_QUIET;
- memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
+ memcpy(nr->cmd, cmd, sizeof(*cmd));
}
EXPORT_SYMBOL_GPL(nvme_init_request);
@@ -721,7 +768,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
- bool queue_live)
+ bool queue_live, enum nvme_ctrl_state state)
{
struct nvme_request *req = nvme_req(rq);
@@ -742,7 +789,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
* command, which is require to set the queue live in the
* appropinquate states.
*/
- switch (nvme_ctrl_state(ctrl)) {
+ switch (state) {
case NVME_CTRL_CONNECTING:
if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
(req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
@@ -1051,20 +1098,27 @@ EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, NVME_TARGET_PASSTHRU);
*/
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
union nvme_result *result, void *buffer, unsigned bufflen,
- int qid, int at_head, blk_mq_req_flags_t flags)
+ int qid, nvme_submit_flags_t flags)
{
struct request *req;
int ret;
+ blk_mq_req_flags_t blk_flags = 0;
+ if (flags & NVME_SUBMIT_NOWAIT)
+ blk_flags |= BLK_MQ_REQ_NOWAIT;
+ if (flags & NVME_SUBMIT_RESERVED)
+ blk_flags |= BLK_MQ_REQ_RESERVED;
if (qid == NVME_QID_ANY)
- req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
+ req = blk_mq_alloc_request(q, nvme_req_op(cmd), blk_flags);
else
- req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
+ req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), blk_flags,
qid - 1);
if (IS_ERR(req))
return PTR_ERR(req);
nvme_init_request(req, cmd);
+ if (flags & NVME_SUBMIT_RETRY)
+ req->cmd_flags &= ~REQ_FAILFAST_DRIVER;
if (buffer && bufflen) {
ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
@@ -1072,7 +1126,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
goto out;
}
- ret = nvme_execute_rq(req, at_head);
+ ret = nvme_execute_rq(req, flags & NVME_SUBMIT_AT_HEAD);
if (result && ret >= 0)
*result = nvme_req(req)->result;
out:
@@ -1085,7 +1139,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buffer, unsigned bufflen)
{
return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0);
}
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
@@ -1108,6 +1162,10 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
} else {
effects = le32_to_cpu(ctrl->effects->acs[opcode]);
+
+ /* Ignore execution restrictions if any relaxation bits are set */
+ if (effects & NVME_CMD_EFFECTS_CSER_MASK)
+ effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
}
return effects;
@@ -1349,8 +1407,10 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
sizeof(struct nvme_id_ctrl));
- if (error)
+ if (error) {
kfree(*id);
+ *id = NULL;
+ }
return error;
}
@@ -1479,6 +1539,7 @@ int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
if (error) {
dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
kfree(*id);
+ *id = NULL;
}
return error;
}
@@ -1560,7 +1621,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
c.features.dword11 = cpu_to_le32(dword11);
ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res,
- buffer, buflen, NVME_QID_ANY, 0, 0);
+ buffer, buflen, NVME_QID_ANY, 0);
if (ret >= 0 && result)
*result = le32_to_cpu(res.u32);
return ret;
@@ -1678,12 +1739,23 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_init_integrity(struct gendisk *disk,
- struct nvme_ns_head *head, u32 max_integrity_segments)
+static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head)
{
struct blk_integrity integrity = { };
+ blk_integrity_unregister(disk);
+
+ if (!head->ms)
+ return true;
+
+ /*
+ * PI can always be supported as we can ask the controller to simply
+ * insert/strip it, which is not possible for other kinds of metadata.
+ */
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) ||
+ !(head->features & NVME_NS_METADATA_SUPPORTED))
+ return nvme_ns_has_pi(head);
+
switch (head->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
switch (head->guard_type) {
@@ -1726,53 +1798,32 @@ static void nvme_init_integrity(struct gendisk *disk,
}
integrity.tuple_size = head->ms;
+ integrity.pi_offset = head->pi_offset;
blk_integrity_register(disk, &integrity);
- blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
-}
-#else
-static void nvme_init_integrity(struct gendisk *disk,
- struct nvme_ns_head *head, u32 max_integrity_segments)
-{
+ return true;
}
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
- struct nvme_ns_head *head)
+static void nvme_config_discard(struct nvme_ns *ns, struct queue_limits *lim)
{
- struct request_queue *queue = disk->queue;
- u32 max_discard_sectors;
-
- if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) {
- max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl);
- } else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
- max_discard_sectors = UINT_MAX;
- } else {
- blk_queue_max_discard_sectors(queue, 0);
- return;
- }
+ struct nvme_ctrl *ctrl = ns->ctrl;
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
- /*
- * If discard is already enabled, don't reset queue limits.
- *
- * This works around the fact that the block layer can't cope well with
- * updating the hardware limits when overridden through sysfs. This is
- * harmless because discard limits in NVMe are purely advisory.
- */
- if (queue->limits.max_discard_sectors)
- return;
+ if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX))
+ lim->max_hw_discard_sectors =
+ nvme_lba_to_sect(ns->head, ctrl->dmrsl);
+ else if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
+ lim->max_hw_discard_sectors = UINT_MAX;
+ else
+ lim->max_hw_discard_sectors = 0;
+
+ lim->discard_granularity = lim->logical_block_size;
- blk_queue_max_discard_sectors(queue, max_discard_sectors);
if (ctrl->dmrl)
- blk_queue_max_discard_segments(queue, ctrl->dmrl);
+ lim->max_discard_segments = ctrl->dmrl;
else
- blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
- queue->limits.discard_granularity = queue_logical_block_size(queue);
-
- if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
- blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
+ lim->max_discard_segments = NVME_DSM_MAX_RANGES;
}
static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
@@ -1783,42 +1834,38 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
a->csi == b->csi;
}
-static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
- struct nvme_id_ns *id)
+static int nvme_identify_ns_nvm(struct nvme_ctrl *ctrl, unsigned int nsid,
+ struct nvme_id_ns_nvm **nvmp)
{
- bool first = id->dps & NVME_NS_DPS_PI_FIRST;
- unsigned lbaf = nvme_lbaf_index(id->flbas);
- struct nvme_command c = { };
+ struct nvme_command c = {
+ .identify.opcode = nvme_admin_identify,
+ .identify.nsid = cpu_to_le32(nsid),
+ .identify.cns = NVME_ID_CNS_CS_NS,
+ .identify.csi = NVME_CSI_NVM,
+ };
struct nvme_id_ns_nvm *nvm;
- int ret = 0;
- u32 elbaf;
-
- head->pi_size = 0;
- head->ms = le16_to_cpu(id->lbaf[lbaf].ms);
- if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
- head->pi_size = sizeof(struct t10_pi_tuple);
- head->guard_type = NVME_NVM_NS_16B_GUARD;
- goto set_pi;
- }
+ int ret;
nvm = kzalloc(sizeof(*nvm), GFP_KERNEL);
if (!nvm)
return -ENOMEM;
- c.identify.opcode = nvme_admin_identify;
- c.identify.nsid = cpu_to_le32(head->ns_id);
- c.identify.cns = NVME_ID_CNS_CS_NS;
- c.identify.csi = NVME_CSI_NVM;
-
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm));
if (ret)
- goto free_data;
+ kfree(nvm);
+ else
+ *nvmp = nvm;
+ return ret;
+}
- elbaf = le32_to_cpu(nvm->elbaf[lbaf]);
+static void nvme_configure_pi_elbas(struct nvme_ns_head *head,
+ struct nvme_id_ns *id, struct nvme_id_ns_nvm *nvm)
+{
+ u32 elbaf = le32_to_cpu(nvm->elbaf[nvme_lbaf_index(id->flbas)]);
/* no support for storage tag formats right now */
if (nvme_elbaf_sts(elbaf))
- goto free_data;
+ return;
head->guard_type = nvme_elbaf_guard_type(elbaf);
switch (head->guard_type) {
@@ -1831,30 +1878,31 @@ static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
default:
break;
}
-
-free_data:
- kfree(nvm);
-set_pi:
- if (head->pi_size && (first || head->ms == head->pi_size))
- head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
- else
- head->pi_type = 0;
-
- return ret;
}
-static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
- struct nvme_ns_head *head, struct nvme_id_ns *id)
+static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
+ struct nvme_ns_head *head, struct nvme_id_ns *id,
+ struct nvme_id_ns_nvm *nvm)
{
- int ret;
-
- ret = nvme_init_ms(ctrl, head, id);
- if (ret)
- return ret;
-
head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
+ head->pi_type = 0;
+ head->pi_size = 0;
+ head->pi_offset = 0;
+ head->ms = le16_to_cpu(id->lbaf[nvme_lbaf_index(id->flbas)].ms);
if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
- return 0;
+ return;
+
+ if (nvm && (ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
+ nvme_configure_pi_elbas(head, id, nvm);
+ } else {
+ head->pi_size = sizeof(struct t10_pi_tuple);
+ head->guard_type = NVME_NVM_NS_16B_GUARD;
+ }
+
+ if (head->pi_size && head->ms >= head->pi_size)
+ head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
+ if (!(id->dps & NVME_NS_DPS_PI_FIRST))
+ head->pi_offset = head->ms - head->pi_size;
if (ctrl->ops->flags & NVME_F_FABRICS) {
/*
@@ -1863,7 +1911,7 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
* remap the separate metadata buffer from the block layer.
*/
if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
- return 0;
+ return;
head->features |= NVME_NS_EXT_LBAS;
@@ -1890,33 +1938,32 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
else
head->features |= NVME_NS_METADATA_SUPPORTED;
}
- return 0;
}
-static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
- struct request_queue *q)
+static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
{
- bool vwc = ctrl->vwc & NVME_CTRL_VWC_PRESENT;
-
- if (ctrl->max_hw_sectors) {
- u32 max_segments =
- (ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1;
+ return ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> SECTOR_SHIFT) + 1;
+}
- max_segments = min_not_zero(max_segments, ctrl->max_segments);
- blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
- blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
- }
- blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
- blk_queue_dma_alignment(q, 3);
- blk_queue_write_cache(q, vwc, vwc);
+static void nvme_set_ctrl_limits(struct nvme_ctrl *ctrl,
+ struct queue_limits *lim)
+{
+ lim->max_hw_sectors = ctrl->max_hw_sectors;
+ lim->max_segments = min_t(u32, USHRT_MAX,
+ min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments));
+ lim->max_integrity_segments = ctrl->max_integrity_segments;
+ lim->virt_boundary_mask = NVME_CTRL_PAGE_SIZE - 1;
+ lim->max_segment_size = UINT_MAX;
+ lim->dma_alignment = 3;
}
-static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
- struct nvme_ns_head *head, struct nvme_id_ns *id)
+static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
+ struct queue_limits *lim)
{
- sector_t capacity = nvme_lba_to_sect(head, le64_to_cpu(id->nsze));
+ struct nvme_ns_head *head = ns->head;
u32 bs = 1U << head->lba_shift;
u32 atomic_bs, phys_bs, io_opt = 0;
+ bool valid = true;
/*
* The block layer can't support LBA sizes larger than the page size
@@ -1924,12 +1971,10 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
* allow block I/O.
*/
if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) {
- capacity = 0;
bs = (1 << 9);
+ valid = false;
}
- blk_integrity_unregister(disk);
-
atomic_bs = phys_bs = bs;
if (id->nabo == 0) {
/*
@@ -1940,7 +1985,7 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
else
- atomic_bs = (1 + ctrl->subsys->awupf) * bs;
+ atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
}
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
@@ -1950,36 +1995,20 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
io_opt = bs * (1 + le16_to_cpu(id->nows));
}
- blk_queue_logical_block_size(disk->queue, bs);
/*
* Linux filesystems assume writing a single physical block is
* an atomic operation. Hence limit the physical block size to the
* value of the Atomic Write Unit Power Fail parameter.
*/
- blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
- blk_queue_io_min(disk->queue, phys_bs);
- blk_queue_io_opt(disk->queue, io_opt);
-
- /*
- * Register a metadata profile for PI, or the plain non-integrity NVMe
- * metadata masquerading as Type 0 if supported, otherwise reject block
- * I/O to namespaces with metadata except when the namespace supports
- * PI, as it can strip/insert in that case.
- */
- if (head->ms) {
- if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
- (head->features & NVME_NS_METADATA_SUPPORTED))
- nvme_init_integrity(disk, head,
- ctrl->max_integrity_segments);
- else if (!nvme_ns_has_pi(head))
- capacity = 0;
- }
-
- set_capacity_and_notify(disk, capacity);
-
- nvme_config_discard(ctrl, disk, head);
- blk_queue_max_write_zeroes_sectors(disk->queue,
- ctrl->max_zeroes_sectors);
+ lim->logical_block_size = bs;
+ lim->physical_block_size = min(phys_bs, atomic_bs);
+ lim->io_min = phys_bs;
+ lim->io_opt = io_opt;
+ if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
+ lim->max_write_zeroes_sectors = UINT_MAX;
+ else
+ lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors;
+ return valid;
}
static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info)
@@ -1993,7 +2022,8 @@ static inline bool nvme_first_scan(struct gendisk *disk)
return !disk_live(disk);
}
-static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
+static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id,
+ struct queue_limits *lim)
{
struct nvme_ctrl *ctrl = ns->ctrl;
u32 iob;
@@ -2021,38 +2051,36 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
return;
}
- blk_queue_chunk_sectors(ns->queue, iob);
+ lim->chunk_sectors = iob;
}
static int nvme_update_ns_info_generic(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
+ struct queue_limits lim;
+ int ret;
+
blk_mq_freeze_queue(ns->disk->queue);
- nvme_set_queue_limits(ns->ctrl, ns->queue);
+ lim = queue_limits_start_update(ns->disk->queue);
+ nvme_set_ctrl_limits(ns->ctrl, &lim);
+ ret = queue_limits_commit_update(ns->disk->queue, &lim);
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
blk_mq_unfreeze_queue(ns->disk->queue);
- if (nvme_ns_head_multipath(ns->head)) {
- blk_mq_freeze_queue(ns->head->disk->queue);
- set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
- nvme_mpath_revalidate_paths(ns);
- blk_stack_limits(&ns->head->disk->queue->limits,
- &ns->queue->limits, 0);
- ns->head->disk->flags |= GENHD_FL_HIDDEN;
- blk_mq_unfreeze_queue(ns->head->disk->queue);
- }
-
/* Hide the block-interface for these devices */
- ns->disk->flags |= GENHD_FL_HIDDEN;
- set_bit(NVME_NS_READY, &ns->flags);
-
- return 0;
+ if (!ret)
+ ret = -ENODEV;
+ return ret;
}
static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
+ bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT;
+ struct queue_limits lim;
+ struct nvme_id_ns_nvm *nvm = NULL;
struct nvme_id_ns *id;
+ sector_t capacity;
unsigned lbaf;
int ret;
@@ -2064,30 +2092,52 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
/* namespace not allocated or attached */
info->is_removed = true;
ret = -ENODEV;
- goto error;
+ goto out;
+ }
+
+ if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) {
+ ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm);
+ if (ret < 0)
+ goto out;
}
blk_mq_freeze_queue(ns->disk->queue);
lbaf = nvme_lbaf_index(id->flbas);
ns->head->lba_shift = id->lbaf[lbaf].ds;
ns->head->nuse = le64_to_cpu(id->nuse);
- nvme_set_queue_limits(ns->ctrl, ns->queue);
+ capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
- ret = nvme_configure_metadata(ns->ctrl, ns->head, id);
- if (ret < 0) {
- blk_mq_unfreeze_queue(ns->disk->queue);
- goto out;
- }
- nvme_set_chunk_sectors(ns, id);
- nvme_update_disk_info(ns->ctrl, ns->disk, ns->head, id);
-
- if (ns->head->ids.csi == NVME_CSI_ZNS) {
- ret = nvme_update_zone_info(ns, lbaf);
+ lim = queue_limits_start_update(ns->disk->queue);
+ nvme_set_ctrl_limits(ns->ctrl, &lim);
+ nvme_configure_metadata(ns->ctrl, ns->head, id, nvm);
+ nvme_set_chunk_sectors(ns, id, &lim);
+ if (!nvme_update_disk_info(ns, id, &lim))
+ capacity = 0;
+ nvme_config_discard(ns, &lim);
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ ns->head->ids.csi == NVME_CSI_ZNS) {
+ ret = nvme_update_zone_info(ns, lbaf, &lim);
if (ret) {
blk_mq_unfreeze_queue(ns->disk->queue);
goto out;
}
}
+ ret = queue_limits_commit_update(ns->disk->queue, &lim);
+ if (ret) {
+ blk_mq_unfreeze_queue(ns->disk->queue);
+ goto out;
+ }
+
+ /*
+ * Register a metadata profile for PI, or the plain non-integrity NVMe
+ * metadata masquerading as Type 0 if supported, otherwise reject block
+ * I/O to namespaces with metadata except when the namespace supports
+ * PI, as it can strip/insert in that case.
+ */
+ if (!nvme_init_integrity(ns->disk, ns->head))
+ capacity = 0;
+
+ set_capacity_and_notify(ns->disk, capacity);
/*
* Only set the DEAC bit if the device guarantees that reads from
@@ -2098,62 +2148,81 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
ns->head->features |= NVME_NS_DEAC;
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
+ blk_queue_write_cache(ns->disk->queue, vwc, vwc);
set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue);
if (blk_queue_is_zoned(ns->queue)) {
- ret = nvme_revalidate_zones(ns);
+ ret = blk_revalidate_disk_zones(ns->disk, NULL);
if (ret && !nvme_first_scan(ns->disk))
goto out;
}
- if (nvme_ns_head_multipath(ns->head)) {
- blk_mq_freeze_queue(ns->head->disk->queue);
- nvme_update_disk_info(ns->ctrl, ns->head->disk, ns->head, id);
- set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
- nvme_mpath_revalidate_paths(ns);
- blk_stack_limits(&ns->head->disk->queue->limits,
- &ns->queue->limits, 0);
- disk_update_readahead(ns->head->disk);
- blk_mq_unfreeze_queue(ns->head->disk->queue);
- }
-
ret = 0;
out:
- /*
- * If probing fails due an unsupported feature, hide the block device,
- * but still allow other access.
- */
- if (ret == -ENODEV) {
- ns->disk->flags |= GENHD_FL_HIDDEN;
- set_bit(NVME_NS_READY, &ns->flags);
- ret = 0;
- }
-
-error:
+ kfree(nvm);
kfree(id);
return ret;
}
static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
{
+ bool unsupported = false;
+ int ret;
+
switch (info->ids.csi) {
case NVME_CSI_ZNS:
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
dev_info(ns->ctrl->device,
"block device for nsid %u not supported without CONFIG_BLK_DEV_ZONED\n",
info->nsid);
- return nvme_update_ns_info_generic(ns, info);
+ ret = nvme_update_ns_info_generic(ns, info);
+ break;
}
- return nvme_update_ns_info_block(ns, info);
+ ret = nvme_update_ns_info_block(ns, info);
+ break;
case NVME_CSI_NVM:
- return nvme_update_ns_info_block(ns, info);
+ ret = nvme_update_ns_info_block(ns, info);
+ break;
default:
dev_info(ns->ctrl->device,
"block device for nsid %u not supported (csi %u)\n",
info->nsid, info->ids.csi);
- return nvme_update_ns_info_generic(ns, info);
+ ret = nvme_update_ns_info_generic(ns, info);
+ break;
+ }
+
+ /*
+ * If probing fails due an unsupported feature, hide the block device,
+ * but still allow other access.
+ */
+ if (ret == -ENODEV) {
+ ns->disk->flags |= GENHD_FL_HIDDEN;
+ set_bit(NVME_NS_READY, &ns->flags);
+ unsupported = true;
+ ret = 0;
+ }
+
+ if (!ret && nvme_ns_head_multipath(ns->head)) {
+ struct queue_limits lim;
+
+ blk_mq_freeze_queue(ns->head->disk->queue);
+ if (unsupported)
+ ns->head->disk->flags |= GENHD_FL_HIDDEN;
+ else
+ nvme_init_integrity(ns->head->disk, ns->head);
+ set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
+ set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
+ nvme_mpath_revalidate_paths(ns);
+
+ lim = queue_limits_start_update(ns->head->disk->queue);
+ queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
+ ns->head->disk->disk_name);
+ ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
+ blk_mq_unfreeze_queue(ns->head->disk->queue);
}
+
+ return ret;
}
#ifdef CONFIG_BLK_SED_OPAL
@@ -2172,7 +2241,7 @@ static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t l
cmd.common.cdw11 = cpu_to_le32(len);
return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
- NVME_QID_ANY, 1, 0);
+ NVME_QID_ANY, NVME_SUBMIT_AT_HEAD);
}
static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
@@ -2828,7 +2897,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
subsys->awupf = le16_to_cpu(id->awupf);
nvme_mpath_default_iopolicy(subsys);
- subsys->dev.class = nvme_subsys_class;
+ subsys->dev.class = &nvme_subsys_class;
subsys->dev.release = nvme_release_subsystem;
subsys->dev.groups = nvme_subsys_attrs_groups;
dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance);
@@ -3068,11 +3137,17 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct
return -EINVAL;
}
+ if (!ctrl->maxcmd) {
+ dev_err(ctrl->device, "Maximum outstanding commands is 0\n");
+ return -EINVAL;
+ }
+
return 0;
}
static int nvme_init_identify(struct nvme_ctrl *ctrl)
{
+ struct queue_limits lim;
struct nvme_id_ctrl *id;
u32 max_hw_sectors;
bool prev_apst_enabled;
@@ -3139,7 +3214,12 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->max_hw_sectors =
min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
- nvme_set_queue_limits(ctrl, ctrl->admin_q);
+ lim = queue_limits_start_update(ctrl->admin_q);
+ nvme_set_ctrl_limits(ctrl, &lim);
+ ret = queue_limits_commit_update(ctrl->admin_q, &lim);
+ if (ret)
+ goto out_free;
+
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
ctrl->max_namespaces = le32_to_cpu(id->mnan);
@@ -3371,7 +3451,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
if (minor < 0)
return minor;
cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
- cdev_device->class = nvme_ns_chr_class;
+ cdev_device->class = &nvme_ns_chr_class;
cdev_device->release = nvme_cdev_rel;
device_initialize(cdev_device);
cdev_init(cdev, fops);
@@ -3643,7 +3723,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
if (!ns)
return;
- disk = blk_mq_alloc_disk(ctrl->tagset, ns);
+ disk = blk_mq_alloc_disk(ctrl->tagset, NULL, ns);
if (IS_ERR(disk))
goto out_free_ns;
disk->fops = &nvme_bdev_ops;
@@ -3714,6 +3794,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
nvme_mpath_add_disk(ns, info->anagrpid);
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
+ /*
+ * Set ns->disk->device->driver_data to ns so we can access
+ * ns->head->passthru_err_log_enabled in
+ * nvme_io_passthru_err_log_enabled_[store | show]().
+ */
+ dev_set_drvdata(disk_to_dev(ns->disk), ns);
+
return;
out_cleanup_ns_from_list:
@@ -4138,6 +4225,7 @@ static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
{
struct nvme_fw_slot_info_log *log;
+ u8 next_fw_slot, cur_fw_slot;
log = kmalloc(sizeof(*log), GFP_KERNEL);
if (!log)
@@ -4149,13 +4237,15 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
goto out_free_log;
}
- if (log->afi & 0x70 || !(log->afi & 0x7)) {
+ cur_fw_slot = log->afi & 0x7;
+ next_fw_slot = (log->afi & 0x70) >> 4;
+ if (!cur_fw_slot || (next_fw_slot && (cur_fw_slot != next_fw_slot))) {
dev_info(ctrl->device,
"Firmware is activated after next Controller Level Reset\n");
goto out_free_log;
}
- memcpy(ctrl->subsys->firmware_rev, &log->frs[(log->afi & 0x7) - 1],
+ memcpy(ctrl->subsys->firmware_rev, &log->frs[cur_fw_slot - 1],
sizeof(ctrl->subsys->firmware_rev));
out_free_log:
@@ -4294,6 +4384,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
const struct blk_mq_ops *ops, unsigned int cmd_size)
{
+ struct queue_limits lim = {};
int ret;
memset(set, 0, sizeof(*set));
@@ -4313,14 +4404,14 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
if (ret)
return ret;
- ctrl->admin_q = blk_mq_init_queue(set);
+ ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL);
if (IS_ERR(ctrl->admin_q)) {
ret = PTR_ERR(ctrl->admin_q);
goto out_free_tagset;
}
if (ctrl->ops->flags & NVME_F_FABRICS) {
- ctrl->fabrics_q = blk_mq_init_queue(set);
+ ctrl->fabrics_q = blk_mq_alloc_queue(set, NULL, NULL);
if (IS_ERR(ctrl->fabrics_q)) {
ret = PTR_ERR(ctrl->fabrics_q);
goto out_cleanup_admin_q;
@@ -4384,7 +4475,7 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
return ret;
if (ctrl->ops->flags & NVME_F_FABRICS) {
- ctrl->connect_q = blk_mq_init_queue(set);
+ ctrl->connect_q = blk_mq_alloc_queue(set, NULL, NULL);
if (IS_ERR(ctrl->connect_q)) {
ret = PTR_ERR(ctrl->connect_q);
goto out_free_tag_set;
@@ -4514,6 +4605,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
int ret;
WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
+ ctrl->passthru_err_log_enabled = false;
clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock);
@@ -4553,7 +4645,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->device = &ctrl->ctrl_device;
ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
ctrl->instance);
- ctrl->device->class = nvme_class;
+ ctrl->device->class = &nvme_class;
ctrl->device->parent = ctrl->dev;
if (ops->dev_attr_groups)
ctrl->device->groups = ops->dev_attr_groups;
@@ -4786,42 +4878,36 @@ static int __init nvme_core_init(void)
if (result < 0)
goto destroy_delete_wq;
- nvme_class = class_create("nvme");
- if (IS_ERR(nvme_class)) {
- result = PTR_ERR(nvme_class);
+ result = class_register(&nvme_class);
+ if (result)
goto unregister_chrdev;
- }
- nvme_class->dev_uevent = nvme_class_uevent;
- nvme_subsys_class = class_create("nvme-subsystem");
- if (IS_ERR(nvme_subsys_class)) {
- result = PTR_ERR(nvme_subsys_class);
+ result = class_register(&nvme_subsys_class);
+ if (result)
goto destroy_class;
- }
result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS,
"nvme-generic");
if (result < 0)
goto destroy_subsys_class;
- nvme_ns_chr_class = class_create("nvme-generic");
- if (IS_ERR(nvme_ns_chr_class)) {
- result = PTR_ERR(nvme_ns_chr_class);
+ result = class_register(&nvme_ns_chr_class);
+ if (result)
goto unregister_generic_ns;
- }
+
result = nvme_init_auth();
if (result)
goto destroy_ns_chr;
return 0;
destroy_ns_chr:
- class_destroy(nvme_ns_chr_class);
+ class_unregister(&nvme_ns_chr_class);
unregister_generic_ns:
unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
destroy_subsys_class:
- class_destroy(nvme_subsys_class);
+ class_unregister(&nvme_subsys_class);
destroy_class:
- class_destroy(nvme_class);
+ class_unregister(&nvme_class);
unregister_chrdev:
unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_delete_wq:
@@ -4837,9 +4923,9 @@ out:
static void __exit nvme_core_exit(void)
{
nvme_exit_auth();
- class_destroy(nvme_ns_chr_class);
- class_destroy(nvme_subsys_class);
- class_destroy(nvme_class);
+ class_unregister(&nvme_ns_chr_class);
+ class_unregister(&nvme_subsys_class);
+ class_unregister(&nvme_class);
unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_workqueue(nvme_delete_wq);
@@ -4851,5 +4937,6 @@ static void __exit nvme_core_exit(void)
MODULE_LICENSE("GPL");
MODULE_VERSION("1.0");
+MODULE_DESCRIPTION("NVMe host core framework");
module_init(nvme_core_init);
module_exit(nvme_core_exit);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index b5752a77ad98..1f0ea1f32d22 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -180,7 +180,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
cmd.prop_get.offset = cpu_to_le32(off);
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0);
if (ret >= 0)
*val = le64_to_cpu(res.u64);
@@ -226,7 +226,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
cmd.prop_get.offset = cpu_to_le32(off);
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0);
if (ret >= 0)
*val = le64_to_cpu(res.u64);
@@ -271,7 +271,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
cmd.prop_set.value = cpu_to_le64(val);
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0);
if (unlikely(ret))
dev_err(ctrl->device,
"Property Set error: %d, offset %#x\n",
@@ -450,8 +450,10 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
return -ENOMEM;
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res,
- data, sizeof(*data), NVME_QID_ANY, 1,
- BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+ data, sizeof(*data), NVME_QID_ANY,
+ NVME_SUBMIT_AT_HEAD |
+ NVME_SUBMIT_NOWAIT |
+ NVME_SUBMIT_RESERVED);
if (ret) {
nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
&cmd, data);
@@ -525,11 +527,14 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
return -ENOMEM;
ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
- data, sizeof(*data), qid, 1,
- BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+ data, sizeof(*data), qid,
+ NVME_SUBMIT_AT_HEAD |
+ NVME_SUBMIT_RESERVED |
+ NVME_SUBMIT_NOWAIT);
if (ret) {
nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
&cmd, data);
+ goto out_free_data;
}
result = le32_to_cpu(res.u32);
if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) {
@@ -633,7 +638,7 @@ static struct key *nvmf_parse_key(int key_id)
}
key = key_lookup(key_id);
- if (!IS_ERR(key))
+ if (IS_ERR(key))
pr_err("key id %08x not found\n", key_id);
else
pr_debug("Using key id %08x\n", key_id);
@@ -1314,7 +1319,10 @@ out_free_opts:
return ERR_PTR(ret);
}
-static struct class *nvmf_class;
+static const struct class nvmf_class = {
+ .name = "nvme-fabrics",
+};
+
static struct device *nvmf_device;
static DEFINE_MUTEX(nvmf_dev_mutex);
@@ -1434,15 +1442,14 @@ static int __init nvmf_init(void)
if (!nvmf_default_host)
return -ENOMEM;
- nvmf_class = class_create("nvme-fabrics");
- if (IS_ERR(nvmf_class)) {
+ ret = class_register(&nvmf_class);
+ if (ret) {
pr_err("couldn't register class nvme-fabrics\n");
- ret = PTR_ERR(nvmf_class);
goto out_free_host;
}
nvmf_device =
- device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
+ device_create(&nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
if (IS_ERR(nvmf_device)) {
pr_err("couldn't create nvme-fabrics device!\n");
ret = PTR_ERR(nvmf_device);
@@ -1458,9 +1465,9 @@ static int __init nvmf_init(void)
return 0;
out_destroy_device:
- device_destroy(nvmf_class, MKDEV(0, 0));
+ device_destroy(&nvmf_class, MKDEV(0, 0));
out_destroy_class:
- class_destroy(nvmf_class);
+ class_unregister(&nvmf_class);
out_free_host:
nvmf_host_put(nvmf_default_host);
return ret;
@@ -1469,8 +1476,8 @@ out_free_host:
static void __exit nvmf_exit(void)
{
misc_deregister(&nvmf_misc);
- device_destroy(nvmf_class, MKDEV(0, 0));
- class_destroy(nvmf_class);
+ device_destroy(&nvmf_class, MKDEV(0, 0));
+ class_unregister(&nvmf_class);
nvmf_host_put(nvmf_default_host);
BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64);
@@ -1488,6 +1495,7 @@ static void __exit nvmf_exit(void)
}
MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("NVMe host fabrics library");
module_init(nvmf_init);
module_exit(nvmf_exit);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index fbaee5a7be19..06cc54851b1b 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -185,9 +185,11 @@ static inline bool
nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts)
{
- if (ctrl->state == NVME_CTRL_DELETING ||
- ctrl->state == NVME_CTRL_DELETING_NOIO ||
- ctrl->state == NVME_CTRL_DEAD ||
+ enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+
+ if (state == NVME_CTRL_DELETING ||
+ state == NVME_CTRL_DELETING_NOIO ||
+ state == NVME_CTRL_DEAD ||
strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
!uuid_equal(&opts->host->id, &ctrl->opts->host->id))
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 16847a316421..68a5d971657b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list);
static DEFINE_IDA(nvme_fc_local_port_cnt);
static DEFINE_IDA(nvme_fc_ctrl_cnt);
-static struct workqueue_struct *nvme_fc_wq;
-
-static bool nvme_fc_waiting_to_unload;
-static DECLARE_COMPLETION(nvme_fc_unload_proceed);
-
/*
* These items are short-term. They will eventually be moved into
* a generic FC class. See comments in module init.
@@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref)
/* remove from transport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&lport->port_list);
- if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
- complete(&nvme_fc_unload_proceed);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num);
@@ -2574,6 +2567,7 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq)
{
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
struct nvme_fc_ctrl *ctrl = op->ctrl;
+ u16 qnum = op->queue->qnum;
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
struct nvme_command *sqe = &cmdiu->sqe;
@@ -2582,10 +2576,11 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq)
* will detect the aborted io and will fail the connection.
*/
dev_info(ctrl->ctrl.device,
- "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
+ "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d (%s) w10/11: "
"x%08x/x%08x\n",
- ctrl->cnum, op->queue->qnum, sqe->common.opcode,
- sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
+ ctrl->cnum, qnum, sqe->common.opcode, sqe->fabrics.fctype,
+ nvme_fabrics_opcode_str(qnum, sqe),
+ sqe->common.cdw10, sqe->common.cdw11);
if (__nvme_fc_abort_op(ctrl, op))
nvme_fc_error_recovery(ctrl, "io timeout abort failed");
@@ -3575,8 +3570,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
flush_delayed_work(&ctrl->connect_work);
dev_info(ctrl->ctrl.device,
- "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
- ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl));
+ "NVME-FC{%d}: new ctrl: NQN \"%s\", hostnqn: %s\n",
+ ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl), opts->host->nqn);
return &ctrl->ctrl;
@@ -3894,10 +3889,6 @@ static int __init nvme_fc_init_module(void)
{
int ret;
- nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0);
- if (!nvme_fc_wq)
- return -ENOMEM;
-
/*
* NOTE:
* It is expected that in the future the kernel will combine
@@ -3915,7 +3906,7 @@ static int __init nvme_fc_init_module(void)
ret = class_register(&fc_class);
if (ret) {
pr_err("couldn't register class fc\n");
- goto out_destroy_wq;
+ return ret;
}
/*
@@ -3939,8 +3930,6 @@ out_destroy_device:
device_destroy(&fc_class, MKDEV(0, 0));
out_destroy_class:
class_unregister(&fc_class);
-out_destroy_wq:
- destroy_workqueue(nvme_fc_wq);
return ret;
}
@@ -3960,48 +3949,27 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
spin_unlock(&rport->lock);
}
-static void
-nvme_fc_cleanup_for_unload(void)
+static void __exit nvme_fc_exit_module(void)
{
struct nvme_fc_lport *lport;
struct nvme_fc_rport *rport;
-
- list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
- list_for_each_entry(rport, &lport->endp_list, endp_list) {
- nvme_fc_delete_controllers(rport);
- }
- }
-}
-
-static void __exit nvme_fc_exit_module(void)
-{
unsigned long flags;
- bool need_cleanup = false;
spin_lock_irqsave(&nvme_fc_lock, flags);
- nvme_fc_waiting_to_unload = true;
- if (!list_empty(&nvme_fc_lport_list)) {
- need_cleanup = true;
- nvme_fc_cleanup_for_unload();
- }
+ list_for_each_entry(lport, &nvme_fc_lport_list, port_list)
+ list_for_each_entry(rport, &lport->endp_list, endp_list)
+ nvme_fc_delete_controllers(rport);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
- if (need_cleanup) {
- pr_info("%s: waiting for ctlr deletes\n", __func__);
- wait_for_completion(&nvme_fc_unload_proceed);
- pr_info("%s: ctrl deletes complete\n", __func__);
- }
+ flush_workqueue(nvme_delete_wq);
nvmf_unregister_transport(&nvme_fc_transport);
- ida_destroy(&nvme_fc_local_port_cnt);
- ida_destroy(&nvme_fc_ctrl_cnt);
-
device_destroy(&fc_class, MKDEV(0, 0));
class_unregister(&fc_class);
- destroy_workqueue(nvme_fc_wq);
}
module_init(nvme_fc_init_module);
module_exit(nvme_fc_exit_module);
+MODULE_DESCRIPTION("NVMe host FC transport driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 18f5c1be5d67..3dfd5ae99ae0 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -228,7 +228,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
length = (io.nblocks + 1) << ns->head->lba_shift;
if ((io.control & NVME_RW_PRINFO_PRACT) &&
- ns->head->ms == sizeof(struct t10_pi_tuple)) {
+ (ns->head->ms == ns->head->pi_size)) {
/*
* Protection information is stripped/inserted by the
* controller.
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 2dd4137a08b2..5397fb428b24 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -156,7 +156,7 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
if (!ns->head->disk)
continue;
kblockd_schedule_work(&ns->head->requeue_work);
- if (ctrl->state == NVME_CTRL_LIVE)
+ if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
disk_uevent(ns->head->disk, KOBJ_CHANGE);
}
up_read(&ctrl->namespaces_rwsem);
@@ -223,13 +223,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
static bool nvme_path_is_disabled(struct nvme_ns *ns)
{
+ enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl);
+
/*
* We don't treat NVME_CTRL_DELETING as a disabled path as I/O should
* still be able to complete assuming that the controller is connected.
* Otherwise it will fail immediately and return to the requeue list.
*/
- if (ns->ctrl->state != NVME_CTRL_LIVE &&
- ns->ctrl->state != NVME_CTRL_DELETING)
+ if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING)
return true;
if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
!test_bit(NVME_NS_READY, &ns->flags))
@@ -331,7 +332,7 @@ out:
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
{
- return ns->ctrl->state == NVME_CTRL_LIVE &&
+ return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE &&
ns->ana_state == NVME_ANA_OPTIMIZED;
}
@@ -358,7 +359,7 @@ static bool nvme_available_path(struct nvme_ns_head *head)
list_for_each_entry_rcu(ns, &head->list, siblings) {
if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
continue;
- switch (ns->ctrl->state) {
+ switch (nvme_ctrl_state(ns->ctrl)) {
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
@@ -515,6 +516,7 @@ static void nvme_requeue_work(struct work_struct *work)
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
{
+ struct queue_limits lim;
bool vwc = false;
mutex_init(&head->lock);
@@ -531,9 +533,14 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
!nvme_is_unique_nsid(ctrl, head) || !multipath)
return 0;
- head->disk = blk_alloc_disk(ctrl->numa_node);
- if (!head->disk)
- return -ENOMEM;
+ blk_set_stacking_limits(&lim);
+ lim.dma_alignment = 3;
+ if (head->ids.csi != NVME_CSI_ZNS)
+ lim.max_zone_append_sectors = 0;
+
+ head->disk = blk_alloc_disk(&lim, ctrl->numa_node);
+ if (IS_ERR(head->disk))
+ return PTR_ERR(head->disk);
head->disk->fops = &nvme_ns_head_ops;
head->disk->private_data = head;
sprintf(head->disk->disk_name, "nvme%dn%d",
@@ -552,11 +559,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues)
blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue);
- /* set to a default value of 512 until the disk is validated */
- blk_queue_logical_block_size(head->disk->queue, 512);
- blk_set_stacking_limits(&head->disk->queue->limits);
- blk_queue_dma_alignment(head->disk->queue, 3);
-
/* we need to propagate up the VMC settings */
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
vwc = true;
@@ -667,7 +669,7 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
* controller is ready.
*/
if (nvme_state_is_live(ns->ana_state) &&
- ns->ctrl->state == NVME_CTRL_LIVE)
+ nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
nvme_mpath_set_live(ns);
}
@@ -748,7 +750,7 @@ static void nvme_ana_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
- if (ctrl->state != NVME_CTRL_LIVE)
+ if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
return;
nvme_read_ana_log(ctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 030c80818240..24193fcb8bd5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -263,6 +263,7 @@ enum nvme_ctrl_flags {
struct nvme_ctrl {
bool comp_seen;
bool identified;
+ bool passthru_err_log_enabled;
enum nvme_ctrl_state state;
spinlock_t lock;
struct mutex scan_lock;
@@ -454,6 +455,7 @@ struct nvme_ns_head {
struct list_head entry;
struct kref ref;
bool shared;
+ bool passthru_err_log_enabled;
int instance;
struct nvme_effects_log *effects;
u64 nuse;
@@ -462,6 +464,7 @@ struct nvme_ns_head {
u16 ms;
u16 pi_size;
u8 pi_type;
+ u8 pi_offset;
u8 guard_type;
u16 sgs;
u32 sws;
@@ -522,7 +525,6 @@ struct nvme_ns {
struct device cdev_device;
struct nvme_fault_inject fault_inject;
-
};
/* NVMe ns supports metadata actions by the controller (generate/strip) */
@@ -805,17 +807,18 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req);
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *req);
bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
- bool queue_live);
+ bool queue_live, enum nvme_ctrl_state state);
static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
- if (likely(ctrl->state == NVME_CTRL_LIVE))
+ enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+
+ if (likely(state == NVME_CTRL_LIVE))
return true;
- if (ctrl->ops->flags & NVME_F_FABRICS &&
- ctrl->state == NVME_CTRL_DELETING)
+ if (ctrl->ops->flags & NVME_F_FABRICS && state == NVME_CTRL_DELETING)
return queue_live;
- return __nvme_check_ready(ctrl, rq, queue_live);
+ return __nvme_check_ready(ctrl, rq, queue_live, state);
}
/*
@@ -836,12 +839,27 @@ static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl,
(ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS);
}
+/*
+ * Flags for __nvme_submit_sync_cmd()
+ */
+typedef __u32 __bitwise nvme_submit_flags_t;
+
+enum {
+ /* Insert request at the head of the queue */
+ NVME_SUBMIT_AT_HEAD = (__force nvme_submit_flags_t)(1 << 0),
+ /* Set BLK_MQ_REQ_NOWAIT when allocating request */
+ NVME_SUBMIT_NOWAIT = (__force nvme_submit_flags_t)(1 << 1),
+ /* Set BLK_MQ_REQ_RESERVED when allocating request */
+ NVME_SUBMIT_RESERVED = (__force nvme_submit_flags_t)(1 << 2),
+ /* Retry command when NVME_SC_DNR is not set in the result */
+ NVME_SUBMIT_RETRY = (__force nvme_submit_flags_t)(1 << 3),
+};
+
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
union nvme_result *result, void *buffer, unsigned bufflen,
- int qid, int at_head,
- blk_mq_req_flags_t flags);
+ int qid, nvme_submit_flags_t flags);
int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen,
u32 *result);
@@ -1018,11 +1036,11 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
}
#endif /* CONFIG_NVME_MULTIPATH */
-int nvme_revalidate_zones(struct nvme_ns *ns);
int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
+int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
+ struct queue_limits *lim);
#ifdef CONFIG_BLK_DEV_ZONED
-int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf);
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd,
enum nvme_zone_mgmt_action action);
@@ -1033,13 +1051,6 @@ static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
{
return BLK_STS_NOTSUPP;
}
-
-static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
-{
- dev_warn(ns->ctrl->device,
- "Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n");
- return -EPROTONOSUPPORT;
-}
#endif
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
@@ -1124,35 +1135,42 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
}
#ifdef CONFIG_NVME_VERBOSE_ERRORS
-const unsigned char *nvme_get_error_status_str(u16 status);
-const unsigned char *nvme_get_opcode_str(u8 opcode);
-const unsigned char *nvme_get_admin_opcode_str(u8 opcode);
-const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode);
+const char *nvme_get_error_status_str(u16 status);
+const char *nvme_get_opcode_str(u8 opcode);
+const char *nvme_get_admin_opcode_str(u8 opcode);
+const char *nvme_get_fabrics_opcode_str(u8 opcode);
#else /* CONFIG_NVME_VERBOSE_ERRORS */
-static inline const unsigned char *nvme_get_error_status_str(u16 status)
+static inline const char *nvme_get_error_status_str(u16 status)
{
return "I/O Error";
}
-static inline const unsigned char *nvme_get_opcode_str(u8 opcode)
+static inline const char *nvme_get_opcode_str(u8 opcode)
{
return "I/O Cmd";
}
-static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
+static inline const char *nvme_get_admin_opcode_str(u8 opcode)
{
return "Admin Cmd";
}
-static inline const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode)
+static inline const char *nvme_get_fabrics_opcode_str(u8 opcode)
{
return "Fabrics Cmd";
}
#endif /* CONFIG_NVME_VERBOSE_ERRORS */
-static inline const unsigned char *nvme_opcode_str(int qid, u8 opcode, u8 fctype)
+static inline const char *nvme_opcode_str(int qid, u8 opcode)
{
- if (opcode == nvme_fabrics_command)
- return nvme_get_fabrics_opcode_str(fctype);
return qid ? nvme_get_opcode_str(opcode) :
nvme_get_admin_opcode_str(opcode);
}
+
+static inline const char *nvme_fabrics_opcode_str(
+ int qid, const struct nvme_command *cmd)
+{
+ if (nvme_is_fabrics(cmd))
+ return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype);
+
+ return nvme_opcode_str(qid, cmd->common.opcode);
+}
#endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index c1d6357ec98a..e6267a6aa380 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1349,7 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
dev_warn(dev->ctrl.device,
"I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
req->tag, nvme_cid(req), opcode,
- nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid);
+ nvme_opcode_str(nvmeq->qid, opcode), nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
goto disable;
}
@@ -3543,5 +3543,6 @@ static void __exit nvme_exit(void)
MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
MODULE_LICENSE("GPL");
MODULE_VERSION("1.0");
+MODULE_DESCRIPTION("NVMe host PCIe transport driver");
module_init(nvme_init);
module_exit(nvme_exit);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 11dde0d83044..366f0bb4ebfc 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1006,6 +1006,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
{
int ret;
bool changed;
+ u16 max_queue_size;
ret = nvme_rdma_configure_admin_queue(ctrl, new);
if (ret)
@@ -1030,11 +1031,16 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
}
- if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) {
+ if (ctrl->ctrl.max_integrity_segments)
+ max_queue_size = NVME_RDMA_MAX_METADATA_QUEUE_SIZE;
+ else
+ max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE;
+
+ if (ctrl->ctrl.sqsize + 1 > max_queue_size) {
dev_warn(ctrl->ctrl.device,
- "ctrl sqsize %u > max queue size %u, clamping down\n",
- ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE);
- ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1;
+ "ctrl sqsize %u > max queue size %u, clamping down\n",
+ ctrl->ctrl.sqsize + 1, max_queue_size);
+ ctrl->ctrl.sqsize = max_queue_size - 1;
}
if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
@@ -1410,6 +1416,8 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
struct nvme_ns *ns = rq->q->queuedata;
struct bio *bio = rq->bio;
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ u32 xfer_len;
int nr;
req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
@@ -1422,8 +1430,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
if (unlikely(nr))
goto mr_put;
- nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c,
- req->mr->sig_attrs, ns->head->pi_type);
+ nvme_rdma_set_sig_attrs(bi, c, req->mr->sig_attrs, ns->head->pi_type);
nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
@@ -1441,7 +1448,11 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
IB_ACCESS_REMOTE_WRITE;
sg->addr = cpu_to_le64(req->mr->iova);
- put_unaligned_le24(req->mr->length, sg->length);
+ xfer_len = req->mr->length;
+ /* Check if PI is added by the HW */
+ if (!pi_count)
+ xfer_len += (xfer_len >> bi->interval_exp) * ns->head->pi_size;
+ put_unaligned_le24(xfer_len, sg->length);
put_unaligned_le32(req->mr->rkey, sg->key);
sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
@@ -1946,14 +1957,13 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
- u8 opcode = req->req.cmd->common.opcode;
- u8 fctype = req->req.cmd->fabrics.fctype;
+ struct nvme_command *cmd = req->req.cmd;
int qid = nvme_rdma_queue_idx(queue);
dev_warn(ctrl->ctrl.device,
"I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n",
- rq->tag, nvme_cid(rq), opcode,
- nvme_opcode_str(qid, opcode, fctype), qid);
+ rq->tag, nvme_cid(rq), cmd->common.opcode,
+ nvme_fabrics_opcode_str(qid, cmd), qid);
if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
/*
@@ -2296,8 +2306,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (ret)
goto out_uninit_ctrl;
- dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
- nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr);
+ dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs, hostnqn: %s\n",
+ nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn);
mutex_lock(&nvme_rdma_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
@@ -2400,4 +2410,5 @@ static void __exit nvme_rdma_cleanup_module(void)
module_init(nvme_rdma_init_module);
module_exit(nvme_rdma_cleanup_module);
+MODULE_DESCRIPTION("NVMe host RDMA transport driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 754e91111042..09fcaa519e5b 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -35,6 +35,31 @@ static ssize_t nvme_sysfs_rescan(struct device *dev,
}
static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);
+static ssize_t nvme_adm_passthru_err_log_enabled_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf,
+ ctrl->passthru_err_log_enabled ? "on\n" : "off\n");
+}
+
+static ssize_t nvme_adm_passthru_err_log_enabled_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ bool passthru_err_log_enabled;
+ int err;
+
+ err = kstrtobool(buf, &passthru_err_log_enabled);
+ if (err)
+ return -EINVAL;
+
+ ctrl->passthru_err_log_enabled = passthru_err_log_enabled;
+
+ return count;
+}
+
static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
{
struct gendisk *disk = dev_to_disk(dev);
@@ -44,6 +69,37 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
return nvme_get_ns_from_dev(dev)->head;
}
+static ssize_t nvme_io_passthru_err_log_enabled_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ns_head *head = dev_to_ns_head(dev);
+
+ return sysfs_emit(buf, head->passthru_err_log_enabled ? "on\n" : "off\n");
+}
+
+static ssize_t nvme_io_passthru_err_log_enabled_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct nvme_ns_head *head = dev_to_ns_head(dev);
+ bool passthru_err_log_enabled;
+ int err;
+
+ err = kstrtobool(buf, &passthru_err_log_enabled);
+ if (err)
+ return -EINVAL;
+ head->passthru_err_log_enabled = passthru_err_log_enabled;
+
+ return count;
+}
+
+static struct device_attribute dev_attr_adm_passthru_err_log_enabled = \
+ __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \
+ nvme_adm_passthru_err_log_enabled_show, nvme_adm_passthru_err_log_enabled_store);
+
+static struct device_attribute dev_attr_io_passthru_err_log_enabled = \
+ __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \
+ nvme_io_passthru_err_log_enabled_show, nvme_io_passthru_err_log_enabled_store);
+
static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -165,14 +221,11 @@ static int ns_update_nuse(struct nvme_ns *ns)
ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id);
if (ret)
- goto out_free_id;
+ return ret;
ns->head->nuse = le64_to_cpu(id->nuse);
-
-out_free_id:
kfree(id);
-
- return ret;
+ return 0;
}
static ssize_t nuse_show(struct device *dev, struct device_attribute *attr,
@@ -208,6 +261,7 @@ static struct attribute *nvme_ns_attrs[] = {
&dev_attr_ana_grpid.attr,
&dev_attr_ana_state.attr,
#endif
+ &dev_attr_io_passthru_err_log_enabled.attr,
NULL,
};
@@ -311,6 +365,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
char *buf)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ unsigned state = (unsigned)nvme_ctrl_state(ctrl);
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
@@ -321,9 +376,8 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
[NVME_CTRL_DEAD] = "dead",
};
- if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
- state_name[ctrl->state])
- return sysfs_emit(buf, "%s\n", state_name[ctrl->state]);
+ if (state < ARRAY_SIZE(state_name) && state_name[state])
+ return sysfs_emit(buf, "%s\n", state_name[state]);
return sysfs_emit(buf, "unknown state\n");
}
@@ -655,6 +709,7 @@ static struct attribute *nvme_dev_attrs[] = {
#ifdef CONFIG_NVME_TCP_TLS
&dev_attr_tls_key.attr,
#endif
+ &dev_attr_adm_passthru_err_log_enabled.attr,
NULL
};
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index d058d990532b..3692b56cb58d 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1344,7 +1344,6 @@ static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl)
static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
{
- struct page *page;
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
unsigned int noreclaim_flag;
@@ -1355,11 +1354,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
if (queue->hdr_digest || queue->data_digest)
nvme_tcp_free_crypto(queue);
- if (queue->pf_cache.va) {
- page = virt_to_head_page(queue->pf_cache.va);
- __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
- queue->pf_cache.va = NULL;
- }
+ page_frag_cache_drain(&queue->pf_cache);
noreclaim_flag = memalloc_noreclaim_save();
/* ->sock will be released by fput() */
@@ -2428,13 +2423,13 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
- u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype;
+ struct nvme_command *cmd = &pdu->cmd;
int qid = nvme_tcp_queue_id(req->queue);
dev_warn(ctrl->device,
"I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n",
- rq->tag, nvme_cid(rq), pdu->hdr.type, opc,
- nvme_opcode_str(qid, opc, fctype), qid);
+ rq->tag, nvme_cid(rq), pdu->hdr.type, cmd->common.opcode,
+ nvme_fabrics_opcode_str(qid, cmd), qid);
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
/*
@@ -2753,8 +2748,8 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
if (ret)
goto out_uninit_ctrl;
- dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
- nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr);
+ dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp, hostnqn: %s\n",
+ nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn);
mutex_lock(&nvme_tcp_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
@@ -2826,4 +2821,5 @@ static void __exit nvme_tcp_cleanup_module(void)
module_init(nvme_tcp_init_module);
module_exit(nvme_tcp_cleanup_module);
+MODULE_DESCRIPTION("NVMe host TCP transport driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 499bbb0eee8d..722384bcc765 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -7,16 +7,6 @@
#include <linux/vmalloc.h>
#include "nvme.h"
-int nvme_revalidate_zones(struct nvme_ns *ns)
-{
- struct request_queue *q = ns->queue;
-
- blk_queue_chunk_sectors(q, ns->head->zsze);
- blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
-
- return blk_revalidate_disk_zones(ns->disk, NULL);
-}
-
static int nvme_set_max_append(struct nvme_ctrl *ctrl)
{
struct nvme_command c = { };
@@ -45,10 +35,10 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
return 0;
}
-int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
+int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
+ struct queue_limits *lim)
{
struct nvme_effects_log *log = ns->head->effects;
- struct request_queue *q = ns->queue;
struct nvme_command c = { };
struct nvme_id_ns_zns *id;
int status;
@@ -109,10 +99,12 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
goto free_data;
}
- disk_set_zoned(ns->disk);
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
- disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1);
- disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1);
+ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
+ lim->zoned = 1;
+ lim->max_open_zones = le32_to_cpu(id->mor) + 1;
+ lim->max_active_zones = le32_to_cpu(id->mar) + 1;
+ lim->chunk_sectors = ns->head->zsze;
+ lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
free_data:
kfree(id);
return status;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 39cb570f833d..f5b7054a4a05 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -428,7 +428,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->cqes = (0x4 << 4) | 0x4;
/* no enforcement soft-limit for maxcmd - pick arbitrary high value */
- id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
+ id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl));
id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 2482a0db2504..77a6e817b315 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -273,6 +273,32 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, param_inline_data_size);
+static ssize_t nvmet_param_max_queue_size_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+
+ return snprintf(page, PAGE_SIZE, "%d\n", port->max_queue_size);
+}
+
+static ssize_t nvmet_param_max_queue_size_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+ int ret;
+
+ if (nvmet_is_port_enabled(port, __func__))
+ return -EACCES;
+ ret = kstrtoint(page, 0, &port->max_queue_size);
+ if (ret) {
+ pr_err("Invalid value '%s' for max_queue_size\n", page);
+ return -EINVAL;
+ }
+ return count;
+}
+
+CONFIGFS_ATTR(nvmet_, param_max_queue_size);
+
#ifdef CONFIG_BLK_DEV_INTEGRITY
static ssize_t nvmet_param_pi_enable_show(struct config_item *item,
char *page)
@@ -1859,6 +1885,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_trtype,
&nvmet_attr_addr_tsas,
&nvmet_attr_param_inline_data_size,
+ &nvmet_attr_param_max_queue_size,
#ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_attr_param_pi_enable,
#endif
@@ -1917,6 +1944,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
INIT_LIST_HEAD(&port->subsystems);
INIT_LIST_HEAD(&port->referrals);
port->inline_data_size = -1; /* < 0 == let the transport choose */
+ port->max_queue_size = -1; /* < 0 == let the transport choose */
port->disc_addr.portid = cpu_to_le16(portid);
port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index d26aa30f8702..6bbe4df0166c 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -248,7 +248,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
continue;
- nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+ nvmet_add_async_event(ctrl, NVME_AER_NOTICE,
NVME_AER_NOTICE_NS_CHANGED,
NVME_LOG_CHANGED_NS);
}
@@ -265,7 +265,7 @@ void nvmet_send_ana_event(struct nvmet_subsys *subsys,
continue;
if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE))
continue;
- nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+ nvmet_add_async_event(ctrl, NVME_AER_NOTICE,
NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
}
mutex_unlock(&subsys->lock);
@@ -358,6 +358,18 @@ int nvmet_enable_port(struct nvmet_port *port)
if (port->inline_data_size < 0)
port->inline_data_size = 0;
+ /*
+ * If the transport didn't set the max_queue_size properly, then clamp
+ * it to the target limits. Also set default values in case the
+ * transport didn't set it at all.
+ */
+ if (port->max_queue_size < 0)
+ port->max_queue_size = NVMET_MAX_QUEUE_SIZE;
+ else
+ port->max_queue_size = clamp_t(int, port->max_queue_size,
+ NVMET_MIN_QUEUE_SIZE,
+ NVMET_MAX_QUEUE_SIZE);
+
port->enabled = true;
port->tr_ops = ops;
return 0;
@@ -1223,9 +1235,10 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
ctrl->cap |= (15ULL << 24);
/* maximum queue entries supported: */
if (ctrl->ops->get_max_queue_size)
- ctrl->cap |= ctrl->ops->get_max_queue_size(ctrl) - 1;
+ ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl),
+ ctrl->port->max_queue_size) - 1;
else
- ctrl->cap |= NVMET_QUEUE_SIZE - 1;
+ ctrl->cap |= ctrl->port->max_queue_size - 1;
if (nvmet_is_passthru_subsys(ctrl->subsys))
nvmet_passthrough_override_cap(ctrl);
@@ -1411,6 +1424,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
kref_init(&ctrl->ref);
ctrl->subsys = subsys;
+ ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
nvmet_init_cap(ctrl);
WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
@@ -1705,4 +1719,5 @@ static void __exit nvmet_exit(void)
module_init(nvmet_init);
module_exit(nvmet_exit);
+MODULE_DESCRIPTION("NVMe target core framework");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 668d257fa986..ce54da8c6b36 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -21,7 +21,7 @@ static void __nvmet_disc_changed(struct nvmet_port *port,
if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_DISC_CHANGE))
return;
- nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+ nvmet_add_async_event(ctrl, NVME_AER_NOTICE,
NVME_AER_NOTICE_DISC_CHANGED, NVME_LOG_DISC);
}
@@ -282,7 +282,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
id->lpa = (1 << 2);
/* no enforcement soft-limit for maxcmd - pick arbitrary high value */
- id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
+ id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl));
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->flags & NVMF_KEYED_SGLS)
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index d8da840a1c0e..b23f4cf840bd 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -157,7 +157,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
- if (sqsize > mqes) {
+ /* for fabrics, this value applies to only the I/O Submission Queues */
+ if (qid && sqsize > mqes) {
pr_warn("sqsize %u is larger than MQES supported %u cntlid %d\n",
sqsize, mqes, ctrl->cntlid);
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
@@ -209,7 +210,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
struct nvmf_connect_command *c = &req->cmd->connect;
struct nvmf_connect_data *d;
struct nvmet_ctrl *ctrl = NULL;
- u16 status = 0;
+ u16 status;
int ret;
if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
@@ -251,8 +252,6 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
if (status)
goto out;
- ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
-
uuid_copy(&ctrl->hostid, &d->hostid);
ret = nvmet_setup_auth(ctrl);
@@ -290,7 +289,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
struct nvmf_connect_data *d;
struct nvmet_ctrl *ctrl;
u16 qid = le16_to_cpu(c->qid);
- u16 status = 0;
+ u16 status;
if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
return;
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index bda7a3009e85..fd229f310c93 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -111,6 +111,8 @@ struct nvmet_fc_tgtport {
struct nvmet_fc_port_entry *pe;
struct kref ref;
u32 max_sg_cnt;
+
+ struct work_struct put_work;
};
struct nvmet_fc_port_entry {
@@ -145,7 +147,6 @@ struct nvmet_fc_tgt_queue {
struct list_head avail_defer_list;
struct workqueue_struct *work_q;
struct kref ref;
- struct rcu_head rcu;
/* array of fcp_iods */
struct nvmet_fc_fcp_iod fod[] __counted_by(sqsize);
} __aligned(sizeof(unsigned long long));
@@ -166,10 +167,9 @@ struct nvmet_fc_tgt_assoc {
struct nvmet_fc_hostport *hostport;
struct nvmet_fc_ls_iod *rcv_disconn;
struct list_head a_list;
- struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1];
+ struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1];
struct kref ref;
struct work_struct del_work;
- struct rcu_head rcu;
};
@@ -249,6 +249,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue);
static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
+static void nvmet_fc_put_tgtport_work(struct work_struct *work)
+{
+ struct nvmet_fc_tgtport *tgtport =
+ container_of(work, struct nvmet_fc_tgtport, put_work);
+
+ nvmet_fc_tgtport_put(tgtport);
+}
static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_fcp_iod *fod);
@@ -360,7 +367,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
if (!lsop->req_queued) {
spin_unlock_irqrestore(&tgtport->lock, flags);
- return;
+ goto out_putwork;
}
list_del(&lsop->lsreq_list);
@@ -373,7 +380,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
(lsreq->rqstlen + lsreq->rsplen),
DMA_BIDIRECTIONAL);
- nvmet_fc_tgtport_put(tgtport);
+out_putwork:
+ queue_work(nvmet_wq, &tgtport->put_work);
}
static int
@@ -489,8 +497,7 @@ nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc)
* message is normal. Otherwise, send unless the hostport has
* already been invalidated by the lldd.
*/
- if (!tgtport->ops->ls_req || !assoc->hostport ||
- assoc->hostport->invalid)
+ if (!tgtport->ops->ls_req || assoc->hostport->invalid)
return;
lsop = kzalloc((sizeof(*lsop) +
@@ -802,14 +809,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
if (!queue)
return NULL;
- if (!nvmet_fc_tgt_a_get(assoc))
- goto out_free_queue;
-
queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0,
assoc->tgtport->fc_target_port.port_num,
assoc->a_id, qid);
if (!queue->work_q)
- goto out_a_put;
+ goto out_free_queue;
queue->qid = qid;
queue->sqsize = sqsize;
@@ -831,15 +835,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
goto out_fail_iodlist;
WARN_ON(assoc->queues[qid]);
- rcu_assign_pointer(assoc->queues[qid], queue);
+ assoc->queues[qid] = queue;
return queue;
out_fail_iodlist:
nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue);
destroy_workqueue(queue->work_q);
-out_a_put:
- nvmet_fc_tgt_a_put(assoc);
out_free_queue:
kfree(queue);
return NULL;
@@ -852,15 +854,11 @@ nvmet_fc_tgt_queue_free(struct kref *ref)
struct nvmet_fc_tgt_queue *queue =
container_of(ref, struct nvmet_fc_tgt_queue, ref);
- rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL);
-
nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue);
- nvmet_fc_tgt_a_put(queue->assoc);
-
destroy_workqueue(queue->work_q);
- kfree_rcu(queue, rcu);
+ kfree(queue);
}
static void
@@ -969,7 +967,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
rcu_read_lock();
list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) {
if (association_id == assoc->association_id) {
- queue = rcu_dereference(assoc->queues[qid]);
+ queue = assoc->queues[qid];
if (queue &&
(!atomic_read(&queue->connected) ||
!nvmet_fc_tgt_q_get(queue)))
@@ -1078,8 +1076,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
/* new allocation not needed */
kfree(newhost);
newhost = match;
- /* no new allocation - release reference */
- nvmet_fc_tgtport_put(tgtport);
} else {
newhost->tgtport = tgtport;
newhost->hosthandle = hosthandle;
@@ -1094,23 +1090,54 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
}
static void
-nvmet_fc_delete_assoc(struct work_struct *work)
+nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc)
+{
+ nvmet_fc_delete_target_assoc(assoc);
+ nvmet_fc_tgt_a_put(assoc);
+}
+
+static void
+nvmet_fc_delete_assoc_work(struct work_struct *work)
{
struct nvmet_fc_tgt_assoc *assoc =
container_of(work, struct nvmet_fc_tgt_assoc, del_work);
+ struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
- nvmet_fc_delete_target_assoc(assoc);
- nvmet_fc_tgt_a_put(assoc);
+ nvmet_fc_delete_assoc(assoc);
+ nvmet_fc_tgtport_put(tgtport);
+}
+
+static void
+nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc)
+{
+ nvmet_fc_tgtport_get(assoc->tgtport);
+ queue_work(nvmet_wq, &assoc->del_work);
+}
+
+static bool
+nvmet_fc_assoc_exits(struct nvmet_fc_tgtport *tgtport, u64 association_id)
+{
+ struct nvmet_fc_tgt_assoc *a;
+
+ list_for_each_entry_rcu(a, &tgtport->assoc_list, a_list) {
+ if (association_id == a->association_id)
+ return true;
+ }
+
+ return false;
}
static struct nvmet_fc_tgt_assoc *
nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
{
- struct nvmet_fc_tgt_assoc *assoc, *tmpassoc;
+ struct nvmet_fc_tgt_assoc *assoc;
unsigned long flags;
+ bool done;
u64 ran;
int idx;
- bool needrandom = true;
+
+ if (!tgtport->pe)
+ return NULL;
assoc = kzalloc(sizeof(*assoc), GFP_KERNEL);
if (!assoc)
@@ -1120,43 +1147,35 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
if (idx < 0)
goto out_free_assoc;
- if (!nvmet_fc_tgtport_get(tgtport))
- goto out_ida;
-
assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle);
if (IS_ERR(assoc->hostport))
- goto out_put;
+ goto out_ida;
assoc->tgtport = tgtport;
assoc->a_id = idx;
INIT_LIST_HEAD(&assoc->a_list);
kref_init(&assoc->ref);
- INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc);
+ INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work);
atomic_set(&assoc->terminating, 0);
- while (needrandom) {
+ done = false;
+ do {
get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID);
ran = ran << BYTES_FOR_QID_SHIFT;
spin_lock_irqsave(&tgtport->lock, flags);
- needrandom = false;
- list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) {
- if (ran == tmpassoc->association_id) {
- needrandom = true;
- break;
- }
- }
- if (!needrandom) {
+ rcu_read_lock();
+ if (!nvmet_fc_assoc_exits(tgtport, ran)) {
assoc->association_id = ran;
list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list);
+ done = true;
}
+ rcu_read_unlock();
spin_unlock_irqrestore(&tgtport->lock, flags);
- }
+ } while (!done);
return assoc;
-out_put:
- nvmet_fc_tgtport_put(tgtport);
out_ida:
ida_free(&tgtport->assoc_cnt, idx);
out_free_assoc:
@@ -1172,13 +1191,18 @@ nvmet_fc_target_assoc_free(struct kref *ref)
struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
struct nvmet_fc_ls_iod *oldls;
unsigned long flags;
+ int i;
+
+ for (i = NVMET_NR_QUEUES; i >= 0; i--) {
+ if (assoc->queues[i])
+ nvmet_fc_delete_target_queue(assoc->queues[i]);
+ }
/* Send Disconnect now that all i/o has completed */
nvmet_fc_xmt_disconnect_assoc(assoc);
nvmet_fc_free_hostport(assoc->hostport);
spin_lock_irqsave(&tgtport->lock, flags);
- list_del_rcu(&assoc->a_list);
oldls = assoc->rcv_disconn;
spin_unlock_irqrestore(&tgtport->lock, flags);
/* if pending Rcv Disconnect Association LS, send rsp now */
@@ -1188,8 +1212,7 @@ nvmet_fc_target_assoc_free(struct kref *ref)
dev_info(tgtport->dev,
"{%d:%d} Association freed\n",
tgtport->fc_target_port.port_num, assoc->a_id);
- kfree_rcu(assoc, rcu);
- nvmet_fc_tgtport_put(tgtport);
+ kfree(assoc);
}
static void
@@ -1208,7 +1231,7 @@ static void
nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
{
struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
- struct nvmet_fc_tgt_queue *queue;
+ unsigned long flags;
int i, terminating;
terminating = atomic_xchg(&assoc->terminating, 1);
@@ -1217,29 +1240,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
if (terminating)
return;
+ spin_lock_irqsave(&tgtport->lock, flags);
+ list_del_rcu(&assoc->a_list);
+ spin_unlock_irqrestore(&tgtport->lock, flags);
- for (i = NVMET_NR_QUEUES; i >= 0; i--) {
- rcu_read_lock();
- queue = rcu_dereference(assoc->queues[i]);
- if (!queue) {
- rcu_read_unlock();
- continue;
- }
+ synchronize_rcu();
- if (!nvmet_fc_tgt_q_get(queue)) {
- rcu_read_unlock();
- continue;
- }
- rcu_read_unlock();
- nvmet_fc_delete_target_queue(queue);
- nvmet_fc_tgt_q_put(queue);
+ /* ensure all in-flight I/Os have been processed */
+ for (i = NVMET_NR_QUEUES; i >= 0; i--) {
+ if (assoc->queues[i])
+ flush_workqueue(assoc->queues[i]->work_q);
}
dev_info(tgtport->dev,
"{%d:%d} Association deleted\n",
tgtport->fc_target_port.port_num, assoc->a_id);
-
- nvmet_fc_tgt_a_put(assoc);
}
static struct nvmet_fc_tgt_assoc *
@@ -1415,6 +1430,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
kref_init(&newrec->ref);
ida_init(&newrec->assoc_cnt);
newrec->max_sg_cnt = template->max_sgl_segments;
+ INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work);
ret = nvmet_fc_alloc_ls_iodlist(newrec);
if (ret) {
@@ -1492,9 +1508,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) {
if (!nvmet_fc_tgt_a_get(assoc))
continue;
- if (!queue_work(nvmet_wq, &assoc->del_work))
- /* already deleting - release local reference */
- nvmet_fc_tgt_a_put(assoc);
+ nvmet_fc_schedule_delete_assoc(assoc);
+ nvmet_fc_tgt_a_put(assoc);
}
rcu_read_unlock();
}
@@ -1540,16 +1555,14 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port,
spin_lock_irqsave(&tgtport->lock, flags);
list_for_each_entry_safe(assoc, next,
&tgtport->assoc_list, a_list) {
- if (!assoc->hostport ||
- assoc->hostport->hosthandle != hosthandle)
+ if (assoc->hostport->hosthandle != hosthandle)
continue;
if (!nvmet_fc_tgt_a_get(assoc))
continue;
assoc->hostport->invalid = 1;
noassoc = false;
- if (!queue_work(nvmet_wq, &assoc->del_work))
- /* already deleting - release local reference */
- nvmet_fc_tgt_a_put(assoc);
+ nvmet_fc_schedule_delete_assoc(assoc);
+ nvmet_fc_tgt_a_put(assoc);
}
spin_unlock_irqrestore(&tgtport->lock, flags);
@@ -1581,7 +1594,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
rcu_read_lock();
list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) {
- queue = rcu_dereference(assoc->queues[0]);
+ queue = assoc->queues[0];
if (queue && queue->nvme_sq.ctrl == ctrl) {
if (nvmet_fc_tgt_a_get(assoc))
found_ctrl = true;
@@ -1593,9 +1606,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
nvmet_fc_tgtport_put(tgtport);
if (found_ctrl) {
- if (!queue_work(nvmet_wq, &assoc->del_work))
- /* already deleting - release local reference */
- nvmet_fc_tgt_a_put(assoc);
+ nvmet_fc_schedule_delete_assoc(assoc);
+ nvmet_fc_tgt_a_put(assoc);
return;
}
@@ -1625,6 +1637,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port)
/* terminate any outstanding associations */
__nvmet_fc_free_assocs(tgtport);
+ flush_workqueue(nvmet_wq);
+
/*
* should terminate LS's as well. However, LS's will be generated
* at the tail end of association termination, so they likely don't
@@ -1870,9 +1884,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
FCNVME_LS_DISCONNECT_ASSOC);
- /* release get taken in nvmet_fc_find_target_assoc */
- nvmet_fc_tgt_a_put(assoc);
-
/*
* The rules for LS response says the response cannot
* go back until ABTS's have been sent for all outstanding
@@ -1887,8 +1898,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
assoc->rcv_disconn = iod;
spin_unlock_irqrestore(&tgtport->lock, flags);
- nvmet_fc_delete_target_assoc(assoc);
-
if (oldls) {
dev_info(tgtport->dev,
"{%d:%d} Multiple Disconnect Association LS's "
@@ -1904,6 +1913,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
nvmet_fc_xmt_ls_rsp(tgtport, oldls);
}
+ nvmet_fc_schedule_delete_assoc(assoc);
+ nvmet_fc_tgt_a_put(assoc);
+
return false;
}
@@ -2540,8 +2552,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
fod->req.cmd = &fod->cmdiubuf.sqe;
fod->req.cqe = &fod->rspiubuf.cqe;
- if (tgtport->pe)
- fod->req.port = tgtport->pe->port;
+ if (!tgtport->pe)
+ goto transport_error;
+ fod->req.port = tgtport->pe->port;
/* clear any response payload */
memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
@@ -2902,6 +2915,9 @@ nvmet_fc_remove_port(struct nvmet_port *port)
nvmet_fc_portentry_unbind(pe);
+ /* terminate any outstanding associations */
+ __nvmet_fc_free_assocs(pe->tgtport);
+
kfree(pe);
}
@@ -2933,6 +2949,9 @@ static int __init nvmet_fc_init_module(void)
static void __exit nvmet_fc_exit_module(void)
{
+ /* ensure any shutdown operation, e.g. delete ctrls have finished */
+ flush_workqueue(nvmet_wq);
+
/* sanity check - all lports should be removed */
if (!list_empty(&nvmet_fc_target_list))
pr_warn("%s: targetport list not empty\n", __func__);
@@ -2945,4 +2964,5 @@ static void __exit nvmet_fc_exit_module(void)
module_init(nvmet_fc_init_module);
module_exit(nvmet_fc_exit_module);
+MODULE_DESCRIPTION("NVMe target FC transport driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index ead349af30f1..913cd2ec7a6f 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport,
if (!rport->targetport) {
tls_req->status = -ECONNREFUSED;
spin_lock(&rport->lock);
- list_add_tail(&rport->ls_list, &tls_req->ls_list);
+ list_add_tail(&tls_req->ls_list, &rport->ls_list);
spin_unlock(&rport->lock);
queue_work(nvmet_wq, &rport->ls_work);
return ret;
@@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
if (remoteport) {
rport = remoteport->private;
spin_lock(&rport->lock);
- list_add_tail(&rport->ls_list, &tls_req->ls_list);
+ list_add_tail(&tls_req->ls_list, &rport->ls_list);
spin_unlock(&rport->lock);
queue_work(nvmet_wq, &rport->ls_work);
}
@@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
if (!tport->remoteport) {
tls_req->status = -ECONNREFUSED;
spin_lock(&tport->lock);
- list_add_tail(&tport->ls_list, &tls_req->ls_list);
+ list_add_tail(&tls_req->ls_list, &tport->ls_list);
spin_unlock(&tport->lock);
queue_work(nvmet_wq, &tport->ls_work);
return ret;
@@ -1556,7 +1556,9 @@ static const struct attribute_group *fcloop_dev_attr_groups[] = {
NULL,
};
-static struct class *fcloop_class;
+static const struct class fcloop_class = {
+ .name = "fcloop",
+};
static struct device *fcloop_device;
@@ -1564,15 +1566,14 @@ static int __init fcloop_init(void)
{
int ret;
- fcloop_class = class_create("fcloop");
- if (IS_ERR(fcloop_class)) {
+ ret = class_register(&fcloop_class);
+ if (ret) {
pr_err("couldn't register class fcloop\n");
- ret = PTR_ERR(fcloop_class);
return ret;
}
fcloop_device = device_create_with_groups(
- fcloop_class, NULL, MKDEV(0, 0), NULL,
+ &fcloop_class, NULL, MKDEV(0, 0), NULL,
fcloop_dev_attr_groups, "ctl");
if (IS_ERR(fcloop_device)) {
pr_err("couldn't create ctl device!\n");
@@ -1585,7 +1586,7 @@ static int __init fcloop_init(void)
return 0;
out_destroy_class:
- class_destroy(fcloop_class);
+ class_unregister(&fcloop_class);
return ret;
}
@@ -1643,11 +1644,12 @@ static void __exit fcloop_exit(void)
put_device(fcloop_device);
- device_destroy(fcloop_class, MKDEV(0, 0));
- class_destroy(fcloop_class);
+ device_destroy(&fcloop_class, MKDEV(0, 0));
+ class_unregister(&fcloop_class);
}
module_init(fcloop_init);
module_exit(fcloop_exit);
+MODULE_DESCRIPTION("NVMe target FC loop transport driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index f11400a908f2..6426aac2634a 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -50,10 +50,10 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
{
- if (ns->bdev_handle) {
- bdev_release(ns->bdev_handle);
+ if (ns->bdev_file) {
+ fput(ns->bdev_file);
ns->bdev = NULL;
- ns->bdev_handle = NULL;
+ ns->bdev_file = NULL;
}
}
@@ -85,18 +85,18 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
if (ns->buffered_io)
return -ENOTBLK;
- ns->bdev_handle = bdev_open_by_path(ns->device_path,
+ ns->bdev_file = bdev_file_open_by_path(ns->device_path,
BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, NULL);
- if (IS_ERR(ns->bdev_handle)) {
- ret = PTR_ERR(ns->bdev_handle);
+ if (IS_ERR(ns->bdev_file)) {
+ ret = PTR_ERR(ns->bdev_file);
if (ret != -ENOTBLK) {
pr_err("failed to open block device %s: (%d)\n",
ns->device_path, ret);
}
- ns->bdev_handle = NULL;
+ ns->bdev_file = NULL;
return ret;
}
- ns->bdev = ns->bdev_handle->bdev;
+ ns->bdev = file_bdev(ns->bdev_file);
ns->size = bdev_nr_bytes(ns->bdev);
ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 9cb434c58075..e589915ddef8 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -400,7 +400,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
}
nvme_quiesce_admin_queue(&ctrl->ctrl);
- if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+ if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE)
nvme_disable_ctrl(&ctrl->ctrl, true);
nvme_cancel_admin_tagset(&ctrl->ctrl);
@@ -434,8 +434,10 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
nvme_loop_shutdown_ctrl(ctrl);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
- if (ctrl->ctrl.state != NVME_CTRL_DELETING &&
- ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO)
+ enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
+ if (state != NVME_CTRL_DELETING &&
+ state != NVME_CTRL_DELETING_NOIO)
/* state change failure for non-deleted ctrl? */
WARN_ON_ONCE(1);
return;
@@ -688,5 +690,6 @@ static void __exit nvme_loop_cleanup_module(void)
module_init(nvme_loop_init_module);
module_exit(nvme_loop_cleanup_module);
+MODULE_DESCRIPTION("NVMe target loop transport driver");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("nvmet-transport-254"); /* 254 == NVMF_TRTYPE_LOOP */
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 6c8acebe1a1a..f460728e1df1 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -58,7 +58,7 @@
struct nvmet_ns {
struct percpu_ref ref;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct block_device *bdev;
struct file *file;
bool readonly;
@@ -163,6 +163,7 @@ struct nvmet_port {
void *priv;
bool enabled;
int inline_data_size;
+ int max_queue_size;
const struct nvmet_fabrics_ops *tr_ops;
bool pi_enable;
};
@@ -543,9 +544,10 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
u8 event_info, u8 log_page);
-#define NVMET_QUEUE_SIZE 1024
+#define NVMET_MIN_QUEUE_SIZE 16
+#define NVMET_MAX_QUEUE_SIZE 1024
#define NVMET_NR_QUEUES 128
-#define NVMET_MAX_CMD NVMET_QUEUE_SIZE
+#define NVMET_MAX_CMD(ctrl) (NVME_CAP_MQES(ctrl->cap) + 1)
/*
* Nice round number that makes a list of nsids fit into a page.
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index f2d963e1fe94..bb4a69d538fd 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -132,7 +132,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes);
id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes);
- id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
+ id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl));
/* don't support fuse commands */
id->fuses = 0;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 667f9c04f35d..f2bb9d95ecf4 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1956,6 +1956,14 @@ static int nvmet_rdma_add_port(struct nvmet_port *nport)
nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
}
+ if (nport->max_queue_size < 0) {
+ nport->max_queue_size = NVME_RDMA_DEFAULT_QUEUE_SIZE;
+ } else if (nport->max_queue_size > NVME_RDMA_MAX_QUEUE_SIZE) {
+ pr_warn("max_queue_size %u is too large, reducing to %u\n",
+ nport->max_queue_size, NVME_RDMA_MAX_QUEUE_SIZE);
+ nport->max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE;
+ }
+
ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
nport->disc_addr.trsvcid, &port->addr);
if (ret) {
@@ -2015,6 +2023,8 @@ static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
static u16 nvmet_rdma_get_max_queue_size(const struct nvmet_ctrl *ctrl)
{
+ if (ctrl->pi_support)
+ return NVME_RDMA_MAX_METADATA_QUEUE_SIZE;
return NVME_RDMA_MAX_QUEUE_SIZE;
}
@@ -2104,5 +2114,6 @@ static void __exit nvmet_rdma_exit(void)
module_init(nvmet_rdma_init);
module_exit(nvmet_rdma_exit);
+MODULE_DESCRIPTION("NVMe target RDMA transport driver");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("nvmet-transport-1"); /* 1 == NVMF_TRTYPE_RDMA */
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 6a1e6bb80062..2aa5762e9f50 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1591,7 +1591,6 @@ static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue)
static void nvmet_tcp_release_queue_work(struct work_struct *w)
{
- struct page *page;
struct nvmet_tcp_queue *queue =
container_of(w, struct nvmet_tcp_queue, release_work);
@@ -1615,8 +1614,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
if (queue->hdr_digest || queue->data_digest)
nvmet_tcp_free_crypto(queue);
ida_free(&nvmet_tcp_queue_ida, queue->idx);
- page = virt_to_head_page(queue->pf_cache.va);
- __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
+ page_frag_cache_drain(&queue->pf_cache);
kfree(queue);
}
@@ -2216,10 +2214,12 @@ static void __exit nvmet_tcp_exit(void)
flush_workqueue(nvmet_wq);
destroy_workqueue(nvmet_tcp_wq);
+ ida_destroy(&nvmet_tcp_queue_ida);
}
module_init(nvmet_tcp_init);
module_exit(nvmet_tcp_exit);
+MODULE_DESCRIPTION("NVMe target TCP transport driver");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 5b5c1e481722..3148d9f1bde6 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -456,8 +456,7 @@ static u16 nvmet_bdev_execute_zmgmt_send_all(struct nvmet_req *req)
switch (zsa_req_op(req->cmd->zms.zsa)) {
case REQ_OP_ZONE_RESET:
ret = blkdev_zone_mgmt(req->ns->bdev, REQ_OP_ZONE_RESET, 0,
- get_capacity(req->ns->bdev->bd_disk),
- GFP_KERNEL);
+ get_capacity(req->ns->bdev->bd_disk));
if (ret < 0)
return blkdev_zone_mgmt_errno_to_nvme_status(ret);
break;
@@ -508,7 +507,7 @@ static void nvmet_bdev_zmgmt_send_work(struct work_struct *w)
goto out;
}
- ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors, GFP_KERNEL);
+ ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors);
if (ret < 0)
status = blkdev_zone_mgmt_errno_to_nvme_status(ret);
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 980123fb4dde..eb357ac2e54a 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -460,8 +460,9 @@ static int nvmem_populate_sysfs_cells(struct nvmem_device *nvmem)
list_for_each_entry(entry, &nvmem->cells, node) {
sysfs_bin_attr_init(&attrs[i]);
attrs[i].attr.name = devm_kasprintf(&nvmem->dev, GFP_KERNEL,
- "%s@%x", entry->name,
- entry->offset);
+ "%s@%x,%x", entry->name,
+ entry->offset,
+ entry->bit_offset);
attrs[i].attr.mode = 0444;
attrs[i].size = entry->bytes;
attrs[i].read = &nvmem_cell_attr_read;
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 641a40cf5cf3..fa8cd33be131 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -763,7 +763,9 @@ struct device_node *of_graph_get_port_parent(struct device_node *node)
/* Walk 3 levels up only if there is 'ports' node. */
for (depth = 3; depth && node; depth--) {
node = of_get_next_parent(node);
- if (depth == 2 && !of_node_name_eq(node, "ports"))
+ if (depth == 2 && !of_node_name_eq(node, "ports") &&
+ !of_node_name_eq(node, "in-ports") &&
+ !of_node_name_eq(node, "out-ports"))
break;
}
return node;
@@ -1063,36 +1065,6 @@ of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
return of_device_get_match_data(dev);
}
-static struct device_node *of_get_compat_node(struct device_node *np)
-{
- of_node_get(np);
-
- while (np) {
- if (!of_device_is_available(np)) {
- of_node_put(np);
- np = NULL;
- }
-
- if (of_property_present(np, "compatible"))
- break;
-
- np = of_get_next_parent(np);
- }
-
- return np;
-}
-
-static struct device_node *of_get_compat_node_parent(struct device_node *np)
-{
- struct device_node *parent, *node;
-
- parent = of_get_parent(np);
- node = of_get_compat_node(parent);
- of_node_put(parent);
-
- return node;
-}
-
static void of_link_to_phandle(struct device_node *con_np,
struct device_node *sup_np)
{
@@ -1222,10 +1194,10 @@ static struct device_node *parse_##fname(struct device_node *np, \
* parse_prop.prop_name: Name of property holding a phandle value
* parse_prop.index: For properties holding a list of phandles, this is the
* index into the list
+ * @get_con_dev: If the consumer node containing the property is never converted
+ * to a struct device, implement this ops so fw_devlink can use it
+ * to find the true consumer.
* @optional: Describes whether a supplier is mandatory or not
- * @node_not_dev: The consumer node containing the property is never converted
- * to a struct device. Instead, parse ancestor nodes for the
- * compatible property to find a node corresponding to a device.
*
* Returns:
* parse_prop() return values are
@@ -1236,15 +1208,15 @@ static struct device_node *parse_##fname(struct device_node *np, \
struct supplier_bindings {
struct device_node *(*parse_prop)(struct device_node *np,
const char *prop_name, int index);
+ struct device_node *(*get_con_dev)(struct device_node *np);
bool optional;
- bool node_not_dev;
};
DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells")
DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells")
DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells")
DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells")
-DEFINE_SIMPLE_PROP(io_channels, "io-channel", "#io-channel-cells")
+DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells")
DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL)
DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells")
DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells")
@@ -1262,7 +1234,6 @@ DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL)
DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL)
DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL)
DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL)
-DEFINE_SIMPLE_PROP(remote_endpoint, "remote-endpoint", NULL)
DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells")
DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells")
DEFINE_SIMPLE_PROP(leds, "leds", NULL)
@@ -1328,6 +1299,17 @@ static struct device_node *parse_interrupts(struct device_node *np,
return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np;
}
+static struct device_node *parse_remote_endpoint(struct device_node *np,
+ const char *prop_name,
+ int index)
+{
+ /* Return NULL for index > 0 to signify end of remote-endpoints. */
+ if (index > 0 || strcmp(prop_name, "remote-endpoint"))
+ return NULL;
+
+ return of_graph_get_remote_port_parent(np);
+}
+
static const struct supplier_bindings of_supplier_bindings[] = {
{ .parse_prop = parse_clocks, },
{ .parse_prop = parse_interconnects, },
@@ -1352,7 +1334,10 @@ static const struct supplier_bindings of_supplier_bindings[] = {
{ .parse_prop = parse_pinctrl6, },
{ .parse_prop = parse_pinctrl7, },
{ .parse_prop = parse_pinctrl8, },
- { .parse_prop = parse_remote_endpoint, .node_not_dev = true, },
+ {
+ .parse_prop = parse_remote_endpoint,
+ .get_con_dev = of_graph_get_port_parent,
+ },
{ .parse_prop = parse_pwms, },
{ .parse_prop = parse_resets, },
{ .parse_prop = parse_leds, },
@@ -1403,8 +1388,8 @@ static int of_link_property(struct device_node *con_np, const char *prop_name)
while ((phandle = s->parse_prop(con_np, prop_name, i))) {
struct device_node *con_dev_np;
- con_dev_np = s->node_not_dev
- ? of_get_compat_node_parent(con_np)
+ con_dev_np = s->get_con_dev
+ ? s->get_con_dev(con_np)
: of_node_get(con_np);
matched = true;
i++;
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index cfd60e35a899..d7593bde2d02 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -50,6 +50,12 @@ static struct unittest_results {
failed; \
})
+#ifdef CONFIG_OF_KOBJ
+#define OF_KREF_READ(NODE) kref_read(&(NODE)->kobj.kref)
+#else
+#define OF_KREF_READ(NODE) 1
+#endif
+
/*
* Expected message may have a message level other than KERN_INFO.
* Print the expected message only if the current loglevel will allow
@@ -570,7 +576,7 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
pr_err("missing testcase data\n");
return;
}
- prefs[i] = kref_read(&p[i]->kobj.kref);
+ prefs[i] = OF_KREF_READ(p[i]);
}
rc = of_count_phandle_with_args(np, "phandle-list", "#phandle-cells");
@@ -693,9 +699,9 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
for (i = 0; i < ARRAY_SIZE(p); ++i) {
- unittest(prefs[i] == kref_read(&p[i]->kobj.kref),
+ unittest(prefs[i] == OF_KREF_READ(p[i]),
"provider%d: expected:%d got:%d\n",
- i, prefs[i], kref_read(&p[i]->kobj.kref));
+ i, prefs[i], OF_KREF_READ(p[i]));
of_node_put(p[i]);
}
}
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 9c2137dae429..826b5016a101 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -386,21 +386,8 @@ void pci_bus_add_devices(const struct pci_bus *bus)
}
EXPORT_SYMBOL(pci_bus_add_devices);
-/** pci_walk_bus - walk devices on/under bus, calling callback.
- * @top bus whose devices should be walked
- * @cb callback to be called for each device found
- * @userdata arbitrary pointer to be passed to callback.
- *
- * Walk the given bus, including any bridged devices
- * on buses under this bus. Call the provided callback
- * on each device found.
- *
- * We check the return of @cb each time. If it returns anything
- * other than 0, we break out.
- *
- */
-void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
- void *userdata)
+static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
+ void *userdata, bool locked)
{
struct pci_dev *dev;
struct pci_bus *bus;
@@ -408,7 +395,8 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
int retval;
bus = top;
- down_read(&pci_bus_sem);
+ if (!locked)
+ down_read(&pci_bus_sem);
next = top->devices.next;
for (;;) {
if (next == &bus->devices) {
@@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
if (retval)
break;
}
- up_read(&pci_bus_sem);
+ if (!locked)
+ up_read(&pci_bus_sem);
+}
+
+/**
+ * pci_walk_bus - walk devices on/under bus, calling callback.
+ * @top: bus whose devices should be walked
+ * @cb: callback to be called for each device found
+ * @userdata: arbitrary pointer to be passed to callback
+ *
+ * Walk the given bus, including any bridged devices
+ * on buses under this bus. Call the provided callback
+ * on each device found.
+ *
+ * We check the return of @cb each time. If it returns anything
+ * other than 0, we break out.
+ */
+void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
+{
+ __pci_walk_bus(top, cb, userdata, false);
}
EXPORT_SYMBOL_GPL(pci_walk_bus);
+void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
+{
+ lockdep_assert_held(&pci_bus_sem);
+
+ __pci_walk_bus(top, cb, userdata, true);
+}
+EXPORT_SYMBOL_GPL(pci_walk_bus_locked);
+
struct pci_bus *pci_bus_get(struct pci_bus *bus)
{
if (bus)
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 5befed2dc02b..9a437cfce073 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -6,6 +6,7 @@
* Author: Kishon Vijay Abraham I <kishon@ti.com>
*/
+#include <linux/align.h>
#include <linux/bitfield.h>
#include <linux/of.h>
#include <linux/platform_device.h>
@@ -482,9 +483,10 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
reg = ep_func->msi_cap + PCI_MSI_DATA_32;
msg_data = dw_pcie_ep_readw_dbi(ep, func_no, reg);
}
- aligned_offset = msg_addr_lower & (epc->mem->window.page_size - 1);
- msg_addr = ((u64)msg_addr_upper) << 32 |
- (msg_addr_lower & ~aligned_offset);
+ msg_addr = ((u64)msg_addr_upper) << 32 | msg_addr_lower;
+
+ aligned_offset = msg_addr & (epc->mem->window.page_size - 1);
+ msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size);
ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
epc->mem->window.page_size);
if (ret)
@@ -551,7 +553,7 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
}
aligned_offset = msg_addr & (epc->mem->window.page_size - 1);
- msg_addr &= ~aligned_offset;
+ msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size);
ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
epc->mem->window.page_size);
if (ret)
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 10f2d0bb86be..2ce2a3bd932b 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -972,7 +972,7 @@ static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata)
* Downstream devices need to be in D0 state before enabling PCI PM
* substates.
*/
- pci_set_power_state(pdev, PCI_D0);
+ pci_set_power_state_locked(pdev, PCI_D0);
pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
return 0;
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
index a89b7de72dcf..7333b305f2a5 100644
--- a/drivers/pci/hotplug/s390_pci_hpc.c
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -26,58 +26,79 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
hotplug_slot);
int rc;
- if (zdev->state != ZPCI_FN_STATE_STANDBY)
- return -EIO;
+ mutex_lock(&zdev->state_lock);
+ if (zdev->state != ZPCI_FN_STATE_STANDBY) {
+ rc = -EIO;
+ goto out;
+ }
rc = sclp_pci_configure(zdev->fid);
zpci_dbg(3, "conf fid:%x, rc:%d\n", zdev->fid, rc);
if (rc)
- return rc;
+ goto out;
zdev->state = ZPCI_FN_STATE_CONFIGURED;
- return zpci_scan_configured_device(zdev, zdev->fh);
+ rc = zpci_scan_configured_device(zdev, zdev->fh);
+out:
+ mutex_unlock(&zdev->state_lock);
+ return rc;
}
static int disable_slot(struct hotplug_slot *hotplug_slot)
{
struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
hotplug_slot);
- struct pci_dev *pdev;
+ struct pci_dev *pdev = NULL;
+ int rc;
- if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
- return -EIO;
+ mutex_lock(&zdev->state_lock);
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED) {
+ rc = -EIO;
+ goto out;
+ }
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
if (pdev && pci_num_vf(pdev)) {
pci_dev_put(pdev);
- return -EBUSY;
+ rc = -EBUSY;
+ goto out;
}
- pci_dev_put(pdev);
- return zpci_deconfigure_device(zdev);
+ rc = zpci_deconfigure_device(zdev);
+out:
+ mutex_unlock(&zdev->state_lock);
+ if (pdev)
+ pci_dev_put(pdev);
+ return rc;
}
static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe)
{
struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
hotplug_slot);
+ int rc = -EIO;
- if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
- return -EIO;
/*
- * We can't take the zdev->lock as reset_slot may be called during
- * probing and/or device removal which already happens under the
- * zdev->lock. Instead the user should use the higher level
- * pci_reset_function() or pci_bus_reset() which hold the PCI device
- * lock preventing concurrent removal. If not using these functions
- * holding the PCI device lock is required.
+ * If we can't get the zdev->state_lock the device state is
+ * currently undergoing a transition and we bail out - just
+ * the same as if the device's state is not configured at all.
*/
+ if (!mutex_trylock(&zdev->state_lock))
+ return rc;
- /* As long as the function is configured we can reset */
- if (probe)
- return 0;
+ /* We can reset only if the function is configured */
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+ goto out;
+
+ if (probe) {
+ rc = 0;
+ goto out;
+ }
- return zpci_hot_reset_device(zdev);
+ rc = zpci_hot_reset_device(zdev);
+out:
+ mutex_unlock(&zdev->state_lock);
+ return rc;
}
static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c
index c8be056c248d..cfd84a899c82 100644
--- a/drivers/pci/msi/irqdomain.c
+++ b/drivers/pci/msi/irqdomain.c
@@ -61,7 +61,7 @@ static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc)
return (irq_hw_number_t)desc->msi_index |
pci_dev_id(dev) << 11 |
- (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27;
+ ((irq_hw_number_t)(pci_domain_nr(dev->bus) & 0xFFFFFFFF)) << 27;
}
static void pci_msi_domain_set_desc(msi_alloc_info_t *arg,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d8f11a078924..ccee56615f78 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1354,6 +1354,7 @@ end:
/**
* pci_set_full_power_state - Put a PCI device into D0 and update its state
* @dev: PCI device to power up
+ * @locked: whether pci_bus_sem is held
*
* Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register
* to confirm the state change, restore its BARs if they might be lost and
@@ -1363,7 +1364,7 @@ end:
* to D0, it is more efficient to use pci_power_up() directly instead of this
* function.
*/
-static int pci_set_full_power_state(struct pci_dev *dev)
+static int pci_set_full_power_state(struct pci_dev *dev, bool locked)
{
u16 pmcsr;
int ret;
@@ -1399,7 +1400,7 @@ static int pci_set_full_power_state(struct pci_dev *dev)
}
if (dev->bus->self)
- pcie_aspm_pm_state_change(dev->bus->self);
+ pcie_aspm_pm_state_change(dev->bus->self, locked);
return 0;
}
@@ -1428,10 +1429,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
pci_walk_bus(bus, __pci_dev_set_current_state, &state);
}
+static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked)
+{
+ if (!bus)
+ return;
+
+ if (locked)
+ pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state);
+ else
+ pci_walk_bus(bus, __pci_dev_set_current_state, &state);
+}
+
/**
* pci_set_low_power_state - Put a PCI device into a low-power state.
* @dev: PCI device to handle.
* @state: PCI power state (D1, D2, D3hot) to put the device into.
+ * @locked: whether pci_bus_sem is held
*
* Use the device's PCI_PM_CTRL register to put it into a low-power state.
*
@@ -1442,7 +1455,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
* 0 if device already is in the requested state.
* 0 if device's power state has been successfully changed.
*/
-static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
+static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
{
u16 pmcsr;
@@ -1496,29 +1509,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
pci_power_name(state));
if (dev->bus->self)
- pcie_aspm_pm_state_change(dev->bus->self);
+ pcie_aspm_pm_state_change(dev->bus->self, locked);
return 0;
}
-/**
- * pci_set_power_state - Set the power state of a PCI device
- * @dev: PCI device to handle.
- * @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
- *
- * Transition a device to a new power state, using the platform firmware and/or
- * the device's PCI PM registers.
- *
- * RETURN VALUE:
- * -EINVAL if the requested state is invalid.
- * -EIO if device does not support PCI PM or its PM capabilities register has a
- * wrong version, or device doesn't support the requested state.
- * 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
- * 0 if device already is in the requested state.
- * 0 if the transition is to D3 but D3 is not supported.
- * 0 if device's power state has been successfully changed.
- */
-int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
{
int error;
@@ -1542,7 +1538,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
return 0;
if (state == PCI_D0)
- return pci_set_full_power_state(dev);
+ return pci_set_full_power_state(dev, locked);
/*
* This device is quirked not to be put into D3, so don't put it in
@@ -1556,16 +1552,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
* To put the device in D3cold, put it into D3hot in the native
* way, then put it into D3cold using platform ops.
*/
- error = pci_set_low_power_state(dev, PCI_D3hot);
+ error = pci_set_low_power_state(dev, PCI_D3hot, locked);
if (pci_platform_power_transition(dev, PCI_D3cold))
return error;
/* Powering off a bridge may power off the whole hierarchy */
if (dev->current_state == PCI_D3cold)
- pci_bus_set_current_state(dev->subordinate, PCI_D3cold);
+ __pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked);
} else {
- error = pci_set_low_power_state(dev, state);
+ error = pci_set_low_power_state(dev, state, locked);
if (pci_platform_power_transition(dev, state))
return error;
@@ -1573,8 +1569,38 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
return 0;
}
+
+/**
+ * pci_set_power_state - Set the power state of a PCI device
+ * @dev: PCI device to handle.
+ * @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
+ *
+ * Transition a device to a new power state, using the platform firmware and/or
+ * the device's PCI PM registers.
+ *
+ * RETURN VALUE:
+ * -EINVAL if the requested state is invalid.
+ * -EIO if device does not support PCI PM or its PM capabilities register has a
+ * wrong version, or device doesn't support the requested state.
+ * 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
+ * 0 if device already is in the requested state.
+ * 0 if the transition is to D3 but D3 is not supported.
+ * 0 if device's power state has been successfully changed.
+ */
+int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+{
+ return __pci_set_power_state(dev, state, false);
+}
EXPORT_SYMBOL(pci_set_power_state);
+int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
+{
+ lockdep_assert_held(&pci_bus_sem);
+
+ return __pci_set_power_state(dev, state, true);
+}
+EXPORT_SYMBOL(pci_set_power_state_locked);
+
#define PCI_EXP_SAVE_REGS 7
static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev,
@@ -2496,29 +2522,36 @@ static void pci_pme_list_scan(struct work_struct *work)
if (pdev->pme_poll) {
struct pci_dev *bridge = pdev->bus->self;
struct device *dev = &pdev->dev;
- int pm_status;
+ struct device *bdev = bridge ? &bridge->dev : NULL;
+ int bref = 0;
/*
- * If bridge is in low power state, the
- * configuration space of subordinate devices
- * may be not accessible
+ * If we have a bridge, it should be in an active/D0
+ * state or the configuration space of subordinate
+ * devices may not be accessible or stable over the
+ * course of the call.
*/
- if (bridge && bridge->current_state != PCI_D0)
- continue;
+ if (bdev) {
+ bref = pm_runtime_get_if_active(bdev, true);
+ if (!bref)
+ continue;
+
+ if (bridge->current_state != PCI_D0)
+ goto put_bridge;
+ }
/*
- * If the device is in a low power state it
- * should not be polled either.
+ * The device itself should be suspended but config
+ * space must be accessible, therefore it cannot be in
+ * D3cold.
*/
- pm_status = pm_runtime_get_if_active(dev, true);
- if (!pm_status)
- continue;
-
- if (pdev->current_state != PCI_D3cold)
+ if (pm_runtime_suspended(dev) &&
+ pdev->current_state != PCI_D3cold)
pci_pme_wakeup(pdev, NULL);
- if (pm_status > 0)
- pm_runtime_put(dev);
+put_bridge:
+ if (bref > 0)
+ pm_runtime_put(bdev);
} else {
list_del(&pme_dev->list);
kfree(pme_dev);
@@ -4320,8 +4353,8 @@ int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
if (res->end > IO_SPACE_LIMIT)
return -EINVAL;
- return ioremap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
- pgprot_device(PAGE_KERNEL));
+ return vmap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
+ pgprot_device(PAGE_KERNEL));
#else
/*
* This architecture does not have memory mapped I/O space,
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 2336a8d1edab..bfc56f7bee1c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -368,11 +368,6 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
return 0;
}
-static inline bool pci_dev_is_disconnected(const struct pci_dev *dev)
-{
- return dev->error_state == pci_channel_io_perm_failure;
-}
-
/* pci_dev priv_flags */
#define PCI_DEV_ADDED 0
#define PCI_DPC_RECOVERED 1
@@ -571,12 +566,12 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
#ifdef CONFIG_PCIEASPM
void pcie_aspm_init_link_state(struct pci_dev *pdev);
void pcie_aspm_exit_link_state(struct pci_dev *pdev);
-void pcie_aspm_pm_state_change(struct pci_dev *pdev);
+void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
#else
static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
-static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
+static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { }
static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
#endif
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 5a0066ecc3c5..bc0bd86695ec 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -1003,8 +1003,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
up_read(&pci_bus_sem);
}
-/* @pdev: the root port or switch downstream port */
-void pcie_aspm_pm_state_change(struct pci_dev *pdev)
+/*
+ * @pdev: the root port or switch downstream port
+ * @locked: whether pci_bus_sem is held
+ */
+void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
{
struct pcie_link_state *link = pdev->link_state;
@@ -1014,12 +1017,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev)
* Devices changed PM state, we should recheck if latency
* meets all functions' requirement
*/
- down_read(&pci_bus_sem);
+ if (!locked)
+ down_read(&pci_bus_sem);
mutex_lock(&aspm_lock);
pcie_update_aspm_capable(link->root);
pcie_config_aspm_path(link);
mutex_unlock(&aspm_lock);
- up_read(&pci_bus_sem);
+ if (!locked)
+ up_read(&pci_bus_sem);
}
void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index c584165b13ba..7e3aa7e2345f 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -2305,6 +2305,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
dev_dbg(cmn->dev, "ignoring external node %llx\n", reg);
continue;
}
+ /*
+ * AmpereOneX erratum AC04_MESH_1 makes some XPs report a bogus
+ * child count larger than the number of valid child pointers.
+ * A child offset of 0 can only occur on CMN-600; otherwise it
+ * would imply the root node being its own grandchild, which
+ * we can safely dismiss in general.
+ */
+ if (reg == 0 && cmn->part != PART_CMN600) {
+ dev_dbg(cmn->dev, "bogus child pointer?\n");
+ continue;
+ }
arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn);
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 6303b82566f9..9e5d7fa647b6 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -716,7 +716,7 @@ static void smmu_pmu_free_msis(void *data)
{
struct device *dev = data;
- platform_msi_domain_free_irqs(dev);
+ platform_device_msi_free_irqs_all(dev);
}
static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
@@ -746,7 +746,7 @@ static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
return;
- ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
+ ret = platform_device_msi_init_and_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
if (ret) {
dev_warn(dev, "failed to allocate MSIs\n");
return;
diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c
index 365d964b0f6a..308c9969642e 100644
--- a/drivers/perf/cxl_pmu.c
+++ b/drivers/perf/cxl_pmu.c
@@ -59,7 +59,7 @@
#define CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK GENMASK_ULL(63, 59)
#define CXL_PMU_FILTER_CFG_REG(n, f) (0x400 + 4 * ((f) + (n) * 8))
-#define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(15, 0)
+#define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(31, 0)
#define CXL_PMU_COUNTER_REG(n) (0xc00 + 8 * (n))
@@ -314,9 +314,9 @@ static bool cxl_pmu_config1_get_edge(struct perf_event *event)
}
/*
- * CPMU specification allows for 8 filters, each with a 16 bit value...
- * So we need to find 8x16bits to store it in.
- * As the value used for disable is 0xffff, a separate enable switch
+ * CPMU specification allows for 8 filters, each with a 32 bit value...
+ * So we need to find 8x32bits to store it in.
+ * As the value used for disable is 0xffff_ffff, a separate enable switch
* is needed.
*/
@@ -419,7 +419,7 @@ static struct attribute *cxl_pmu_event_attrs[] = {
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)),
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)),
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)),
- CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(3)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(4)),
/* CXL rev 3.0 Table 3-46 S2M DRS opcodes */
CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata, CXL_PMU_GID_S2M_DRS, BIT(0)),
CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm, CXL_PMU_GID_S2M_DRS, BIT(1)),
@@ -642,7 +642,7 @@ static void cxl_pmu_event_start(struct perf_event *event, int flags)
if (cxl_pmu_config1_hdm_filter_en(event))
cfg = cxl_pmu_config2_get_hdm_decoder(event);
else
- cfg = GENMASK(15, 0); /* No filtering if 0xFFFF_FFFF */
+ cfg = GENMASK(31, 0); /* No filtering if 0xFFFF_FFFF */
writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0));
}
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index 0dda70e1ef90..c78a6fd6c57f 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -150,19 +150,11 @@ u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event)
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- if (!rvpmu->ctr_get_width)
- /**
- * If the pmu driver doesn't support counter width, set it to default
- * maximum allowed by the specification.
- */
- cwidth = 63;
- else {
- if (hwc->idx == -1)
- /* Handle init case where idx is not initialized yet */
- cwidth = rvpmu->ctr_get_width(0);
- else
- cwidth = rvpmu->ctr_get_width(hwc->idx);
- }
+ if (hwc->idx == -1)
+ /* Handle init case where idx is not initialized yet */
+ cwidth = rvpmu->ctr_get_width(0);
+ else
+ cwidth = rvpmu->ctr_get_width(hwc->idx);
return GENMASK_ULL(cwidth, 0);
}
diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c
index 79fdd667922e..fa0bccf4edf2 100644
--- a/drivers/perf/riscv_pmu_legacy.c
+++ b/drivers/perf/riscv_pmu_legacy.c
@@ -37,6 +37,12 @@ static int pmu_legacy_event_map(struct perf_event *event, u64 *config)
return pmu_legacy_ctr_get_idx(event);
}
+/* cycle & instret are always 64 bit, one bit less according to SBI spec */
+static int pmu_legacy_ctr_get_width(int idx)
+{
+ return 63;
+}
+
static u64 pmu_legacy_read_ctr(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -111,12 +117,14 @@ static void pmu_legacy_init(struct riscv_pmu *pmu)
pmu->ctr_stop = NULL;
pmu->event_map = pmu_legacy_event_map;
pmu->ctr_get_idx = pmu_legacy_ctr_get_idx;
- pmu->ctr_get_width = NULL;
+ pmu->ctr_get_width = pmu_legacy_ctr_get_width;
pmu->ctr_clear_idx = NULL;
pmu->ctr_read = pmu_legacy_read_ctr;
pmu->event_mapped = pmu_legacy_event_mapped;
pmu->event_unmapped = pmu_legacy_event_unmapped;
pmu->csr_index = pmu_legacy_csr_index;
+ pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+ pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
}
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 16acd4dcdb96..452aab49db1e 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -512,7 +512,7 @@ static void pmu_sbi_set_scounteren(void *arg)
if (event->hw.idx != -1)
csr_write(CSR_SCOUNTEREN,
- csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event)));
+ csr_read(CSR_SCOUNTEREN) | BIT(pmu_sbi_csr_index(event)));
}
static void pmu_sbi_reset_scounteren(void *arg)
@@ -521,7 +521,7 @@ static void pmu_sbi_reset_scounteren(void *arg)
if (event->hw.idx != -1)
csr_write(CSR_SCOUNTEREN,
- csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event)));
+ csr_read(CSR_SCOUNTEREN) & ~BIT(pmu_sbi_csr_index(event)));
}
static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
@@ -731,14 +731,14 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
/* compute hardware counter index */
hidx = info->csr - CSR_CYCLE;
/* check if the corresponding bit is set in sscountovf */
- if (!(overflow & (1 << hidx)))
+ if (!(overflow & BIT(hidx)))
continue;
/*
* Keep a track of overflowed counters so that they can be started
* with updated initial value.
*/
- overflowed_ctrs |= 1 << lidx;
+ overflowed_ctrs |= BIT(lidx);
hw_evt = &event->hw;
riscv_pmu_event_update(event);
perf_sample_data_init(&data, 0, hw_evt->last_period);
diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
index e625b32889bf..0928a526e2ab 100644
--- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
+++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
@@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev)
return ret;
}
- priv->id = of_alias_get_id(np, "mipi_dphy");
+ priv->id = of_alias_get_id(np, "mipi-dphy");
if (priv->id < 0) {
dev_err(dev, "Failed to get phy node alias id: %d\n",
priv->id);
diff --git a/drivers/phy/microchip/lan966x_serdes.c b/drivers/phy/microchip/lan966x_serdes.c
index c1a41b6cd29b..b5ac2b7995e7 100644
--- a/drivers/phy/microchip/lan966x_serdes.c
+++ b/drivers/phy/microchip/lan966x_serdes.c
@@ -96,6 +96,8 @@ static const struct serdes_mux lan966x_serdes_muxes[] = {
SERDES_MUX_SGMII(SERDES6G(1), 3, HSIO_HW_CFG_SD6G_1_CFG,
HSIO_HW_CFG_SD6G_1_CFG_SET(1)),
+ SERDES_MUX_SGMII(SERDES6G(2), 4, 0, 0),
+
SERDES_MUX_RGMII(RGMII(0), 2, HSIO_HW_CFG_RGMII_0_CFG |
HSIO_HW_CFG_RGMII_ENA |
HSIO_HW_CFG_GMII_ENA,
diff --git a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c
index a623f092b11f..a43e20abb10d 100644
--- a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c
+++ b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c
@@ -37,56 +37,28 @@
#define EUSB2_TUNE_EUSB_EQU 0x5A
#define EUSB2_TUNE_EUSB_HS_COMP_CUR 0x5B
-#define QCOM_EUSB2_REPEATER_INIT_CFG(r, v) \
- { \
- .reg = r, \
- .val = v, \
- }
-
-enum reg_fields {
- F_TUNE_EUSB_HS_COMP_CUR,
- F_TUNE_EUSB_EQU,
- F_TUNE_EUSB_SLEW,
- F_TUNE_USB2_HS_COMP_CUR,
- F_TUNE_USB2_PREEM,
- F_TUNE_USB2_EQU,
- F_TUNE_USB2_SLEW,
- F_TUNE_SQUELCH_U,
- F_TUNE_HSDISC,
- F_TUNE_RES_FSDIF,
- F_TUNE_IUSB2,
- F_TUNE_USB2_CROSSOVER,
- F_NUM_TUNE_FIELDS,
-
- F_FORCE_VAL_5 = F_NUM_TUNE_FIELDS,
- F_FORCE_EN_5,
-
- F_EN_CTL1,
-
- F_RPTR_STATUS,
- F_NUM_FIELDS,
-};
-
-static struct reg_field eusb2_repeater_tune_reg_fields[F_NUM_FIELDS] = {
- [F_TUNE_EUSB_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_EUSB_HS_COMP_CUR, 0, 1),
- [F_TUNE_EUSB_EQU] = REG_FIELD(EUSB2_TUNE_EUSB_EQU, 0, 1),
- [F_TUNE_EUSB_SLEW] = REG_FIELD(EUSB2_TUNE_EUSB_SLEW, 0, 1),
- [F_TUNE_USB2_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_USB2_HS_COMP_CUR, 0, 1),
- [F_TUNE_USB2_PREEM] = REG_FIELD(EUSB2_TUNE_USB2_PREEM, 0, 2),
- [F_TUNE_USB2_EQU] = REG_FIELD(EUSB2_TUNE_USB2_EQU, 0, 1),
- [F_TUNE_USB2_SLEW] = REG_FIELD(EUSB2_TUNE_USB2_SLEW, 0, 1),
- [F_TUNE_SQUELCH_U] = REG_FIELD(EUSB2_TUNE_SQUELCH_U, 0, 2),
- [F_TUNE_HSDISC] = REG_FIELD(EUSB2_TUNE_HSDISC, 0, 2),
- [F_TUNE_RES_FSDIF] = REG_FIELD(EUSB2_TUNE_RES_FSDIF, 0, 2),
- [F_TUNE_IUSB2] = REG_FIELD(EUSB2_TUNE_IUSB2, 0, 3),
- [F_TUNE_USB2_CROSSOVER] = REG_FIELD(EUSB2_TUNE_USB2_CROSSOVER, 0, 2),
-
- [F_FORCE_VAL_5] = REG_FIELD(EUSB2_FORCE_VAL_5, 0, 7),
- [F_FORCE_EN_5] = REG_FIELD(EUSB2_FORCE_EN_5, 0, 7),
-
- [F_EN_CTL1] = REG_FIELD(EUSB2_EN_CTL1, 0, 7),
-
- [F_RPTR_STATUS] = REG_FIELD(EUSB2_RPTR_STATUS, 0, 7),
+enum eusb2_reg_layout {
+ TUNE_EUSB_HS_COMP_CUR,
+ TUNE_EUSB_EQU,
+ TUNE_EUSB_SLEW,
+ TUNE_USB2_HS_COMP_CUR,
+ TUNE_USB2_PREEM,
+ TUNE_USB2_EQU,
+ TUNE_USB2_SLEW,
+ TUNE_SQUELCH_U,
+ TUNE_HSDISC,
+ TUNE_RES_FSDIF,
+ TUNE_IUSB2,
+ TUNE_USB2_CROSSOVER,
+ NUM_TUNE_FIELDS,
+
+ FORCE_VAL_5 = NUM_TUNE_FIELDS,
+ FORCE_EN_5,
+
+ EN_CTL1,
+
+ RPTR_STATUS,
+ LAYOUT_SIZE,
};
struct eusb2_repeater_cfg {
@@ -98,10 +70,11 @@ struct eusb2_repeater_cfg {
struct eusb2_repeater {
struct device *dev;
- struct regmap_field *regs[F_NUM_FIELDS];
+ struct regmap *regmap;
struct phy *phy;
struct regulator_bulk_data *vregs;
const struct eusb2_repeater_cfg *cfg;
+ u32 base;
enum phy_mode mode;
};
@@ -109,10 +82,10 @@ static const char * const pm8550b_vreg_l[] = {
"vdd18", "vdd3",
};
-static const u32 pm8550b_init_tbl[F_NUM_TUNE_FIELDS] = {
- [F_TUNE_IUSB2] = 0x8,
- [F_TUNE_SQUELCH_U] = 0x3,
- [F_TUNE_USB2_PREEM] = 0x5,
+static const u32 pm8550b_init_tbl[NUM_TUNE_FIELDS] = {
+ [TUNE_IUSB2] = 0x8,
+ [TUNE_SQUELCH_U] = 0x3,
+ [TUNE_USB2_PREEM] = 0x5,
};
static const struct eusb2_repeater_cfg pm8550b_eusb2_cfg = {
@@ -140,47 +113,42 @@ static int eusb2_repeater_init_vregs(struct eusb2_repeater *rptr)
static int eusb2_repeater_init(struct phy *phy)
{
- struct reg_field *regfields = eusb2_repeater_tune_reg_fields;
struct eusb2_repeater *rptr = phy_get_drvdata(phy);
struct device_node *np = rptr->dev->of_node;
- u32 init_tbl[F_NUM_TUNE_FIELDS] = { 0 };
- u8 override;
+ struct regmap *regmap = rptr->regmap;
+ const u32 *init_tbl = rptr->cfg->init_tbl;
+ u8 tune_usb2_preem = init_tbl[TUNE_USB2_PREEM];
+ u8 tune_hsdisc = init_tbl[TUNE_HSDISC];
+ u8 tune_iusb2 = init_tbl[TUNE_IUSB2];
+ u32 base = rptr->base;
u32 val;
int ret;
- int i;
+
+ of_property_read_u8(np, "qcom,tune-usb2-amplitude", &tune_iusb2);
+ of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &tune_hsdisc);
+ of_property_read_u8(np, "qcom,tune-usb2-preem", &tune_usb2_preem);
ret = regulator_bulk_enable(rptr->cfg->num_vregs, rptr->vregs);
if (ret)
return ret;
- regmap_field_update_bits(rptr->regs[F_EN_CTL1], EUSB2_RPTR_EN, EUSB2_RPTR_EN);
+ regmap_write(regmap, base + EUSB2_EN_CTL1, EUSB2_RPTR_EN);
- for (i = 0; i < F_NUM_TUNE_FIELDS; i++) {
- if (init_tbl[i]) {
- regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]);
- } else {
- /* Write 0 if there's no value set */
- u32 mask = GENMASK(regfields[i].msb, regfields[i].lsb);
-
- regmap_field_update_bits(rptr->regs[i], mask, 0);
- }
- }
- memcpy(init_tbl, rptr->cfg->init_tbl, sizeof(init_tbl));
+ regmap_write(regmap, base + EUSB2_TUNE_EUSB_HS_COMP_CUR, init_tbl[TUNE_EUSB_HS_COMP_CUR]);
+ regmap_write(regmap, base + EUSB2_TUNE_EUSB_EQU, init_tbl[TUNE_EUSB_EQU]);
+ regmap_write(regmap, base + EUSB2_TUNE_EUSB_SLEW, init_tbl[TUNE_EUSB_SLEW]);
+ regmap_write(regmap, base + EUSB2_TUNE_USB2_HS_COMP_CUR, init_tbl[TUNE_USB2_HS_COMP_CUR]);
+ regmap_write(regmap, base + EUSB2_TUNE_USB2_EQU, init_tbl[TUNE_USB2_EQU]);
+ regmap_write(regmap, base + EUSB2_TUNE_USB2_SLEW, init_tbl[TUNE_USB2_SLEW]);
+ regmap_write(regmap, base + EUSB2_TUNE_SQUELCH_U, init_tbl[TUNE_SQUELCH_U]);
+ regmap_write(regmap, base + EUSB2_TUNE_RES_FSDIF, init_tbl[TUNE_RES_FSDIF]);
+ regmap_write(regmap, base + EUSB2_TUNE_USB2_CROSSOVER, init_tbl[TUNE_USB2_CROSSOVER]);
- if (!of_property_read_u8(np, "qcom,tune-usb2-amplitude", &override))
- init_tbl[F_TUNE_IUSB2] = override;
+ regmap_write(regmap, base + EUSB2_TUNE_USB2_PREEM, tune_usb2_preem);
+ regmap_write(regmap, base + EUSB2_TUNE_HSDISC, tune_hsdisc);
+ regmap_write(regmap, base + EUSB2_TUNE_IUSB2, tune_iusb2);
- if (!of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &override))
- init_tbl[F_TUNE_HSDISC] = override;
-
- if (!of_property_read_u8(np, "qcom,tune-usb2-preem", &override))
- init_tbl[F_TUNE_USB2_PREEM] = override;
-
- for (i = 0; i < F_NUM_TUNE_FIELDS; i++)
- regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]);
-
- ret = regmap_field_read_poll_timeout(rptr->regs[F_RPTR_STATUS],
- val, val & RPTR_OK, 10, 5);
+ ret = regmap_read_poll_timeout(regmap, base + EUSB2_RPTR_STATUS, val, val & RPTR_OK, 10, 5);
if (ret)
dev_err(rptr->dev, "initialization timed-out\n");
@@ -191,6 +159,8 @@ static int eusb2_repeater_set_mode(struct phy *phy,
enum phy_mode mode, int submode)
{
struct eusb2_repeater *rptr = phy_get_drvdata(phy);
+ struct regmap *regmap = rptr->regmap;
+ u32 base = rptr->base;
switch (mode) {
case PHY_MODE_USB_HOST:
@@ -199,10 +169,8 @@ static int eusb2_repeater_set_mode(struct phy *phy,
* per eUSB 1.2 Spec. Below implement software workaround until
* PHY and controller is fixing seen observation.
*/
- regmap_field_update_bits(rptr->regs[F_FORCE_EN_5],
- F_CLK_19P2M_EN, F_CLK_19P2M_EN);
- regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5],
- V_CLK_19P2M_EN, V_CLK_19P2M_EN);
+ regmap_write(regmap, base + EUSB2_FORCE_EN_5, F_CLK_19P2M_EN);
+ regmap_write(regmap, base + EUSB2_FORCE_VAL_5, V_CLK_19P2M_EN);
break;
case PHY_MODE_USB_DEVICE:
/*
@@ -211,10 +179,8 @@ static int eusb2_repeater_set_mode(struct phy *phy,
* repeater doesn't clear previous value due to shared
* regulators (say host <-> device mode switch).
*/
- regmap_field_update_bits(rptr->regs[F_FORCE_EN_5],
- F_CLK_19P2M_EN, 0);
- regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5],
- V_CLK_19P2M_EN, 0);
+ regmap_write(regmap, base + EUSB2_FORCE_EN_5, 0);
+ regmap_write(regmap, base + EUSB2_FORCE_VAL_5, 0);
break;
default:
return -EINVAL;
@@ -243,9 +209,8 @@ static int eusb2_repeater_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct phy_provider *phy_provider;
struct device_node *np = dev->of_node;
- struct regmap *regmap;
- int i, ret;
u32 res;
+ int ret;
rptr = devm_kzalloc(dev, sizeof(*rptr), GFP_KERNEL);
if (!rptr)
@@ -258,22 +223,15 @@ static int eusb2_repeater_probe(struct platform_device *pdev)
if (!rptr->cfg)
return -EINVAL;
- regmap = dev_get_regmap(dev->parent, NULL);
- if (!regmap)
+ rptr->regmap = dev_get_regmap(dev->parent, NULL);
+ if (!rptr->regmap)
return -ENODEV;
ret = of_property_read_u32(np, "reg", &res);
if (ret < 0)
return ret;
- for (i = 0; i < F_NUM_FIELDS; i++)
- eusb2_repeater_tune_reg_fields[i].reg += res;
-
- ret = devm_regmap_field_bulk_alloc(dev, regmap, rptr->regs,
- eusb2_repeater_tune_reg_fields,
- F_NUM_FIELDS);
- if (ret)
- return ret;
+ rptr->base = res;
ret = eusb2_repeater_init_vregs(rptr);
if (ret < 0) {
diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c
index c2590579190a..03fb0d4b75d7 100644
--- a/drivers/phy/qualcomm/phy-qcom-m31.c
+++ b/drivers/phy/qualcomm/phy-qcom-m31.c
@@ -299,7 +299,7 @@ static int m31usb_phy_probe(struct platform_device *pdev)
qphy->vreg = devm_regulator_get(dev, "vdda-phy");
if (IS_ERR(qphy->vreg))
- return dev_err_probe(dev, PTR_ERR(qphy->phy),
+ return dev_err_probe(dev, PTR_ERR(qphy->vreg),
"failed to get vreg\n");
phy_set_drvdata(qphy->phy, qphy);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 1ad10110dd25..17c4ad7553a5 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -3562,14 +3562,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = qmp_combo_typec_switch_register(qmp);
- if (ret)
- return ret;
-
- ret = drm_aux_bridge_register(dev);
- if (ret)
- return ret;
-
/* Check for legacy binding with child nodes. */
usb_np = of_get_child_by_name(dev->of_node, "usb3-phy");
if (usb_np) {
@@ -3589,6 +3581,14 @@ static int qmp_combo_probe(struct platform_device *pdev)
if (ret)
goto err_node_put;
+ ret = qmp_combo_typec_switch_register(qmp);
+ if (ret)
+ goto err_node_put;
+
+ ret = drm_aux_bridge_register(dev);
+ if (ret)
+ goto err_node_put;
+
pm_runtime_set_active(dev);
ret = devm_pm_runtime_enable(dev);
if (ret)
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 243cc2b9a0fb..5c003988c35d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1556,6 +1556,14 @@ static const char * const qmp_phy_vreg_l[] = {
"vdda-phy", "vdda-pll",
};
+static const struct qmp_usb_offsets qmp_usb_offsets_v3 = {
+ .serdes = 0,
+ .pcs = 0x800,
+ .pcs_misc = 0x600,
+ .tx = 0x200,
+ .rx = 0x400,
+};
+
static const struct qmp_usb_offsets qmp_usb_offsets_ipq9574 = {
.serdes = 0,
.pcs = 0x800,
@@ -1564,7 +1572,7 @@ static const struct qmp_usb_offsets qmp_usb_offsets_ipq9574 = {
.rx = 0x400,
};
-static const struct qmp_usb_offsets qmp_usb_offsets_v3 = {
+static const struct qmp_usb_offsets qmp_usb_offsets_v3_msm8996 = {
.serdes = 0,
.pcs = 0x600,
.tx = 0x200,
@@ -1613,6 +1621,24 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v7 = {
.rx = 0x1000,
};
+static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = {
+ .lanes = 1,
+
+ .offsets = &qmp_usb_offsets_v3,
+
+ .serdes_tbl = ipq9574_usb3_serdes_tbl,
+ .serdes_tbl_num = ARRAY_SIZE(ipq9574_usb3_serdes_tbl),
+ .tx_tbl = msm8996_usb3_tx_tbl,
+ .tx_tbl_num = ARRAY_SIZE(msm8996_usb3_tx_tbl),
+ .rx_tbl = ipq8074_usb3_rx_tbl,
+ .rx_tbl_num = ARRAY_SIZE(ipq8074_usb3_rx_tbl),
+ .pcs_tbl = ipq8074_usb3_pcs_tbl,
+ .pcs_tbl_num = ARRAY_SIZE(ipq8074_usb3_pcs_tbl),
+ .vreg_list = qmp_phy_vreg_l,
+ .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l),
+ .regs = qmp_v3_usb3phy_regs_layout,
+};
+
static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = {
.lanes = 1,
@@ -1652,7 +1678,7 @@ static const struct qmp_phy_cfg ipq9574_usb3phy_cfg = {
static const struct qmp_phy_cfg msm8996_usb3phy_cfg = {
.lanes = 1,
- .offsets = &qmp_usb_offsets_v3,
+ .offsets = &qmp_usb_offsets_v3_msm8996,
.serdes_tbl = msm8996_usb3_serdes_tbl,
.serdes_tbl_num = ARRAY_SIZE(msm8996_usb3_serdes_tbl),
@@ -2563,7 +2589,7 @@ err_node_put:
static const struct of_device_id qmp_usb_of_match_table[] = {
{
.compatible = "qcom,ipq6018-qmp-usb3-phy",
- .data = &ipq8074_usb3phy_cfg,
+ .data = &ipq6018_usb3phy_cfg,
}, {
.compatible = "qcom,ipq8074-qmp-usb3-phy",
.data = &ipq8074_usb3phy_cfg,
diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
index e53eace7c91e..6387c0d34c55 100644
--- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
@@ -673,8 +673,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
channel->irq = platform_get_irq_optional(pdev, 0);
channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node);
if (channel->dr_mode != USB_DR_MODE_UNKNOWN) {
- int ret;
-
channel->is_otg_channel = true;
channel->uses_otg_pins = !of_property_read_bool(dev->of_node,
"renesas,no-otg-pins");
@@ -738,8 +736,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
ret = PTR_ERR(provider);
goto error;
} else if (channel->is_otg_channel) {
- int ret;
-
ret = device_create_file(dev, &dev_attr_role);
if (ret < 0)
goto error;
diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c
index dd2913ac0fa2..78e19b128962 100644
--- a/drivers/phy/ti/phy-omap-usb2.c
+++ b/drivers/phy/ti/phy-omap-usb2.c
@@ -117,7 +117,7 @@ static int omap_usb_set_vbus(struct usb_otg *otg, bool enabled)
{
struct omap_usb *phy = phy_to_omapusb(otg->usb_phy);
- if (!phy->comparator)
+ if (!phy->comparator || !phy->comparator->set_vbus)
return -ENODEV;
return phy->comparator->set_vbus(phy->comparator, enabled);
@@ -127,7 +127,7 @@ static int omap_usb_start_srp(struct usb_otg *otg)
{
struct omap_usb *phy = phy_to_omapusb(otg->usb_phy);
- if (!phy->comparator)
+ if (!phy->comparator || !phy->comparator->start_srp)
return -ENODEV;
return phy->comparator->start_srp(phy->comparator);
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index ee56856cb80c..bbcdece83bf4 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1644,7 +1644,7 @@ static int pinctrl_pins_show(struct seq_file *s, void *what)
const struct pinctrl_ops *ops = pctldev->desc->pctlops;
unsigned int i, pin;
#ifdef CONFIG_GPIOLIB
- struct gpio_device *gdev __free(gpio_device_put) = NULL;
+ struct gpio_device *gdev = NULL;
struct pinctrl_gpio_range *range;
int gpio_num;
#endif
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 03ecb3d1aaf6..49f89b70dcec 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -1159,7 +1159,7 @@ static int amd_gpio_probe(struct platform_device *pdev)
}
ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler,
- IRQF_SHARED, KBUILD_MODNAME, gpio_dev);
+ IRQF_SHARED | IRQF_ONESHOT, KBUILD_MODNAME, gpio_dev);
if (ret)
goto out2;
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32mp257.c b/drivers/pinctrl/stm32/pinctrl-stm32mp257.c
index 73f091cd827e..23aebd4695e9 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32mp257.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32mp257.c
@@ -2562,7 +2562,7 @@ static const struct of_device_id stm32mp257_pctrl_match[] = {
};
static const struct dev_pm_ops stm32_pinctrl_dev_pm_ops = {
- SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, stm32_pinctrl_resume)
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(stm32_pinctrl_suspend, stm32_pinctrl_resume)
};
static struct platform_driver stm32mp257_pinctrl_driver = {
diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c
index 1dd84c7a79de..b1995ac268d7 100644
--- a/drivers/platform/mellanox/mlxbf-pmc.c
+++ b/drivers/platform/mellanox/mlxbf-pmc.c
@@ -1170,7 +1170,7 @@ static int mlxbf_pmc_program_crspace_counter(int blk_num, uint32_t cnt_num,
int ret;
addr = pmc->block[blk_num].mmio_base +
- (rounddown(cnt_num, 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ);
+ ((cnt_num / 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ);
ret = mlxbf_pmc_readl(addr, &word);
if (ret)
return ret;
@@ -1413,7 +1413,7 @@ static int mlxbf_pmc_read_crspace_event(int blk_num, uint32_t cnt_num,
int ret;
addr = pmc->block[blk_num].mmio_base +
- (rounddown(cnt_num, 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ);
+ ((cnt_num / 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ);
ret = mlxbf_pmc_readl(addr, &word);
if (ret)
return ret;
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
index ed16ec422a7b..b8d1e32e97eb 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -47,6 +47,9 @@
/* Message with data needs at least two words (for header & data). */
#define MLXBF_TMFIFO_DATA_MIN_WORDS 2
+/* Tx timeout in milliseconds. */
+#define TMFIFO_TX_TIMEOUT 2000
+
/* ACPI UID for BlueField-3. */
#define TMFIFO_BF3_UID 1
@@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
* @drop_desc: dummy desc for packet dropping
* @cur_len: processed length of the current descriptor
* @rem_len: remaining length of the pending packet
+ * @rem_padding: remaining bytes to send as paddings
* @pkt_len: total length of the pending packet
* @next_avail: next avail descriptor id
* @num: vring size (number of descriptors)
* @align: vring alignment size
* @index: vring index
* @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @tx_timeout: expire time of last tx packet
* @fifo: pointer to the tmfifo structure
*/
struct mlxbf_tmfifo_vring {
@@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
struct vring_desc drop_desc;
int cur_len;
int rem_len;
+ int rem_padding;
u32 pkt_len;
u16 next_avail;
int num;
int align;
int index;
int vdev_id;
+ unsigned long tx_timeout;
struct mlxbf_tmfifo *fifo;
};
@@ -819,6 +826,50 @@ mlxbf_tmfifo_desc_done:
return true;
}
+static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring)
+{
+ unsigned long flags;
+
+ /* Only handle Tx timeout for network vdev. */
+ if (vring->vdev_id != VIRTIO_ID_NET)
+ return;
+
+ /* Initialize the timeout or return if not expired. */
+ if (!vring->tx_timeout) {
+ /* Initialize the timeout. */
+ vring->tx_timeout = jiffies +
+ msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
+ return;
+ } else if (time_before(jiffies, vring->tx_timeout)) {
+ /* Return if not timeout yet. */
+ return;
+ }
+
+ /*
+ * Drop the packet after timeout. The outstanding packet is
+ * released and the remaining bytes will be sent with padding byte 0x00
+ * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
+ * either dropped directly, or appended into existing outstanding packet
+ * thus dropped as corrupted network packet.
+ */
+ vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
+ mlxbf_tmfifo_release_pkt(vring);
+ vring->cur_len = 0;
+ vring->rem_len = 0;
+ vring->fifo->vring[0] = NULL;
+
+ /*
+ * Make sure the load/store are in order before
+ * returning back to virtio.
+ */
+ virtio_mb(false);
+
+ /* Notify upper layer. */
+ spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
+ vring_interrupt(0, vring->vq);
+ spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
+}
+
/* Rx & Tx processing of a queue. */
static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
{
@@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
return;
do {
+retry:
/* Get available FIFO space. */
if (avail == 0) {
if (is_rx)
@@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
break;
}
+ /* Insert paddings for discarded Tx packet. */
+ if (!is_rx) {
+ vring->tx_timeout = 0;
+ while (vring->rem_padding >= sizeof(u64)) {
+ writeq(0, vring->fifo->tx.data);
+ vring->rem_padding -= sizeof(u64);
+ if (--avail == 0)
+ goto retry;
+ }
+ }
+
/* Console output always comes from the Tx buffer. */
if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
mlxbf_tmfifo_console_tx(fifo, avail);
@@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
/* Handle one descriptor. */
more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
} while (more);
+
+ /* Check Tx timeout. */
+ if (avail <= 0 && !is_rx)
+ mlxbf_tmfifo_check_tx_timeout(vring);
}
/* Handle Rx or Tx queues. */
diff --git a/drivers/platform/x86/amd/pmf/Kconfig b/drivers/platform/x86/amd/pmf/Kconfig
index f246252bddd8..f4fa8bd8bda8 100644
--- a/drivers/platform/x86/amd/pmf/Kconfig
+++ b/drivers/platform/x86/amd/pmf/Kconfig
@@ -10,6 +10,7 @@ config AMD_PMF
depends on AMD_NB
select ACPI_PLATFORM_PROFILE
depends on TEE && AMDTEE
+ depends on AMD_SFH_HID
help
This driver provides support for the AMD Platform Management Framework.
The goal is to enhance end user experience by making AMD PCs smarter,
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index feaa09f5b35a..4f734e049f4a 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -296,7 +296,8 @@ static int amd_pmf_suspend_handler(struct device *dev)
{
struct amd_pmf_dev *pdev = dev_get_drvdata(dev);
- kfree(pdev->buf);
+ if (pdev->smart_pc_enabled)
+ cancel_delayed_work_sync(&pdev->pb_work);
return 0;
}
@@ -312,6 +313,9 @@ static int amd_pmf_resume_handler(struct device *dev)
return ret;
}
+ if (pdev->smart_pc_enabled)
+ schedule_delayed_work(&pdev->pb_work, msecs_to_jiffies(2000));
+
return 0;
}
@@ -330,9 +334,14 @@ static void amd_pmf_init_features(struct amd_pmf_dev *dev)
dev_dbg(dev->dev, "SPS enabled and Platform Profiles registered\n");
}
- if (!amd_pmf_init_smart_pc(dev)) {
+ amd_pmf_init_smart_pc(dev);
+ if (dev->smart_pc_enabled) {
dev_dbg(dev->dev, "Smart PC Solution Enabled\n");
- } else if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) {
+ /* If Smart PC is enabled, no need to check for other features */
+ return;
+ }
+
+ if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) {
amd_pmf_init_auto_mode(dev);
dev_dbg(dev->dev, "Auto Mode Init done\n");
} else if (is_apmf_func_supported(dev, APMF_FUNC_DYN_SLIDER_AC) ||
@@ -351,7 +360,7 @@ static void amd_pmf_deinit_features(struct amd_pmf_dev *dev)
amd_pmf_deinit_sps(dev);
}
- if (!dev->smart_pc_enabled) {
+ if (dev->smart_pc_enabled) {
amd_pmf_deinit_smart_pc(dev);
} else if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) {
amd_pmf_deinit_auto_mode(dev);
diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h
index 16999c5b334f..66cae1cca73c 100644
--- a/drivers/platform/x86/amd/pmf/pmf.h
+++ b/drivers/platform/x86/amd/pmf/pmf.h
@@ -441,11 +441,6 @@ struct apmf_dyn_slider_output {
struct apmf_cnqf_power_set ps[APMF_CNQF_MAX];
} __packed;
-enum smart_pc_status {
- PMF_SMART_PC_ENABLED,
- PMF_SMART_PC_DISABLED,
-};
-
/* Smart PC - TA internals */
enum system_state {
SYSTEM_STATE_S0i3,
diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c
index a0423942f771..a3dec14c3004 100644
--- a/drivers/platform/x86/amd/pmf/spc.c
+++ b/drivers/platform/x86/amd/pmf/spc.c
@@ -10,6 +10,7 @@
*/
#include <acpi/button.h>
+#include <linux/amd-pmf-io.h>
#include <linux/power_supply.h>
#include <linux/units.h>
#include "pmf.h"
@@ -44,6 +45,8 @@ void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *
dev_dbg(dev->dev, "Max C0 Residency: %u\n", in->ev_info.max_c0residency);
dev_dbg(dev->dev, "GFX Busy: %u\n", in->ev_info.gfx_busy);
dev_dbg(dev->dev, "LID State: %s\n", in->ev_info.lid_state ? "close" : "open");
+ dev_dbg(dev->dev, "User Presence: %s\n", in->ev_info.user_present ? "Present" : "Away");
+ dev_dbg(dev->dev, "Ambient Light: %d\n", in->ev_info.ambient_light);
dev_dbg(dev->dev, "==== TA inputs END ====\n");
}
#else
@@ -147,6 +150,38 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_
return 0;
}
+static int amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in)
+{
+ struct amd_sfh_info sfh_info;
+ int ret;
+
+ /* Get ALS data */
+ ret = amd_get_sfh_info(&sfh_info, MT_ALS);
+ if (!ret)
+ in->ev_info.ambient_light = sfh_info.ambient_light;
+ else
+ return ret;
+
+ /* get HPD data */
+ ret = amd_get_sfh_info(&sfh_info, MT_HPD);
+ if (ret)
+ return ret;
+
+ switch (sfh_info.user_present) {
+ case SFH_NOT_DETECTED:
+ in->ev_info.user_present = 0xff; /* assume no sensors connected */
+ break;
+ case SFH_USER_PRESENT:
+ in->ev_info.user_present = 1;
+ break;
+ case SFH_USER_AWAY:
+ in->ev_info.user_present = 0;
+ break;
+ }
+
+ return 0;
+}
+
void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in)
{
/* TA side lid open is 1 and close is 0, hence the ! here */
@@ -155,4 +190,5 @@ void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_tab
amd_pmf_get_smu_info(dev, in);
amd_pmf_get_battery_info(dev, in);
amd_pmf_get_slider_info(dev, in);
+ amd_pmf_get_sensor_info(dev, in);
}
diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
index 502ce93d5cdd..dcbe8f85e122 100644
--- a/drivers/platform/x86/amd/pmf/tee-if.c
+++ b/drivers/platform/x86/amd/pmf/tee-if.c
@@ -252,15 +252,17 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev)
cookie = readl(dev->policy_buf + POLICY_COOKIE_OFFSET);
length = readl(dev->policy_buf + POLICY_COOKIE_LEN);
- if (cookie != POLICY_SIGN_COOKIE || !length)
+ if (cookie != POLICY_SIGN_COOKIE || !length) {
+ dev_dbg(dev->dev, "cookie doesn't match\n");
return -EINVAL;
+ }
/* Update the actual length */
dev->policy_sz = length + 512;
res = amd_pmf_invoke_cmd_init(dev);
if (res == TA_PMF_TYPE_SUCCESS) {
/* Now its safe to announce that smart pc is enabled */
- dev->smart_pc_enabled = PMF_SMART_PC_ENABLED;
+ dev->smart_pc_enabled = true;
/*
* Start collecting the data from TA FW after a small delay
* or else, we might end up getting stale values.
@@ -268,7 +270,7 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev)
schedule_delayed_work(&dev->pb_work, msecs_to_jiffies(pb_actions_ms * 3));
} else {
dev_err(dev->dev, "ta invoke cmd init failed err: %x\n", res);
- dev->smart_pc_enabled = PMF_SMART_PC_DISABLED;
+ dev->smart_pc_enabled = false;
return res;
}
@@ -298,8 +300,10 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf,
if (!new_policy_buf)
return -ENOMEM;
- if (copy_from_user(new_policy_buf, buf, length))
+ if (copy_from_user(new_policy_buf, buf, length)) {
+ kfree(new_policy_buf);
return -EFAULT;
+ }
kfree(dev->policy_buf);
dev->policy_buf = new_policy_buf;
@@ -334,25 +338,6 @@ static void amd_pmf_remove_pb(struct amd_pmf_dev *dev) {}
static void amd_pmf_hex_dump_pb(struct amd_pmf_dev *dev) {}
#endif
-static int amd_pmf_get_bios_buffer(struct amd_pmf_dev *dev)
-{
- dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL);
- if (!dev->policy_buf)
- return -ENOMEM;
-
- dev->policy_base = devm_ioremap(dev->dev, dev->policy_addr, dev->policy_sz);
- if (!dev->policy_base)
- return -ENOMEM;
-
- memcpy(dev->policy_buf, dev->policy_base, dev->policy_sz);
-
- amd_pmf_hex_dump_pb(dev);
- if (pb_side_load)
- amd_pmf_open_pb(dev, dev->dbgfs_dir);
-
- return amd_pmf_start_policy_engine(dev);
-}
-
static int amd_pmf_amdtee_ta_match(struct tee_ioctl_version_data *ver, const void *data)
{
return ver->impl_id == TEE_IMPL_ID_AMDTEE;
@@ -451,22 +436,59 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
return ret;
INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd);
- amd_pmf_set_dram_addr(dev, true);
- amd_pmf_get_bios_buffer(dev);
+
+ ret = amd_pmf_set_dram_addr(dev, true);
+ if (ret)
+ goto error;
+
+ dev->policy_base = devm_ioremap(dev->dev, dev->policy_addr, dev->policy_sz);
+ if (!dev->policy_base) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL);
+ if (!dev->policy_buf) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ memcpy(dev->policy_buf, dev->policy_base, dev->policy_sz);
+
+ amd_pmf_hex_dump_pb(dev);
+
dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
- if (!dev->prev_data)
- return -ENOMEM;
+ if (!dev->prev_data) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ ret = amd_pmf_start_policy_engine(dev);
+ if (ret)
+ goto error;
+
+ if (pb_side_load)
+ amd_pmf_open_pb(dev, dev->dbgfs_dir);
+
+ return 0;
- return dev->smart_pc_enabled;
+error:
+ amd_pmf_deinit_smart_pc(dev);
+
+ return ret;
}
void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev)
{
- if (pb_side_load)
+ if (pb_side_load && dev->esbin)
amd_pmf_remove_pb(dev);
+ cancel_delayed_work_sync(&dev->pb_work);
kfree(dev->prev_data);
+ dev->prev_data = NULL;
kfree(dev->policy_buf);
- cancel_delayed_work_sync(&dev->pb_work);
+ dev->policy_buf = NULL;
+ kfree(dev->buf);
+ dev->buf = NULL;
amd_pmf_tee_deinit(dev);
}
diff --git a/drivers/platform/x86/intel/ifs/load.c b/drivers/platform/x86/intel/ifs/load.c
index a1ee1a74fc3c..2cf3b4a8813f 100644
--- a/drivers/platform/x86/intel/ifs/load.c
+++ b/drivers/platform/x86/intel/ifs/load.c
@@ -399,7 +399,8 @@ int ifs_load_firmware(struct device *dev)
if (fw->size != expected_size) {
dev_err(dev, "File size mismatch (expected %u, actual %zu). Corrupted IFS image.\n",
expected_size, fw->size);
- return -EINVAL;
+ ret = -EINVAL;
+ goto release;
}
ret = image_sanity_check(dev, (struct microcode_header_intel *)fw->data);
diff --git a/drivers/platform/x86/intel/int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c
index b6708bab7c53..527d8fbc7cc1 100644
--- a/drivers/platform/x86/intel/int0002_vgpio.c
+++ b/drivers/platform/x86/intel/int0002_vgpio.c
@@ -196,7 +196,7 @@ static int int0002_probe(struct platform_device *pdev)
* IRQs into gpiolib.
*/
ret = devm_request_irq(dev, irq, int0002_irq,
- IRQF_SHARED, "INT0002", chip);
+ IRQF_ONESHOT | IRQF_SHARED, "INT0002", chip);
if (ret) {
dev_err(dev, "Error requesting IRQ %d: %d\n", irq, ret);
return ret;
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
index 33ab207493e3..33bb58dc3f78 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
@@ -23,23 +23,23 @@ static int (*uncore_read)(struct uncore_data *data, unsigned int *min, unsigned
static int (*uncore_write)(struct uncore_data *data, unsigned int input, unsigned int min_max);
static int (*uncore_read_freq)(struct uncore_data *data, unsigned int *freq);
-static ssize_t show_domain_id(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t show_domain_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
- struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_dev_attr);
+ struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_kobj_attr);
return sprintf(buf, "%u\n", data->domain_id);
}
-static ssize_t show_fabric_cluster_id(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t show_fabric_cluster_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
- struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_dev_attr);
+ struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_kobj_attr);
return sprintf(buf, "%u\n", data->cluster_id);
}
-static ssize_t show_package_id(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t show_package_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
- struct uncore_data *data = container_of(attr, struct uncore_data, package_id_dev_attr);
+ struct uncore_data *data = container_of(attr, struct uncore_data, package_id_kobj_attr);
return sprintf(buf, "%u\n", data->package_id);
}
@@ -97,30 +97,30 @@ static ssize_t show_perf_status_freq_khz(struct uncore_data *data, char *buf)
}
#define store_uncore_min_max(name, min_max) \
- static ssize_t store_##name(struct device *dev, \
- struct device_attribute *attr, \
+ static ssize_t store_##name(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
const char *buf, size_t count) \
{ \
- struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\
+ struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\
\
return store_min_max_freq_khz(data, buf, count, \
min_max); \
}
#define show_uncore_min_max(name, min_max) \
- static ssize_t show_##name(struct device *dev, \
- struct device_attribute *attr, char *buf)\
+ static ssize_t show_##name(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf)\
{ \
- struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\
+ struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\
\
return show_min_max_freq_khz(data, buf, min_max); \
}
#define show_uncore_perf_status(name) \
- static ssize_t show_##name(struct device *dev, \
- struct device_attribute *attr, char *buf)\
+ static ssize_t show_##name(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf)\
{ \
- struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\
+ struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\
\
return show_perf_status_freq_khz(data, buf); \
}
@@ -134,11 +134,11 @@ show_uncore_min_max(max_freq_khz, 1);
show_uncore_perf_status(current_freq_khz);
#define show_uncore_data(member_name) \
- static ssize_t show_##member_name(struct device *dev, \
- struct device_attribute *attr, char *buf)\
+ static ssize_t show_##member_name(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf)\
{ \
struct uncore_data *data = container_of(attr, struct uncore_data,\
- member_name##_dev_attr);\
+ member_name##_kobj_attr);\
\
return sysfs_emit(buf, "%u\n", \
data->member_name); \
@@ -149,29 +149,29 @@ show_uncore_data(initial_max_freq_khz);
#define init_attribute_rw(_name) \
do { \
- sysfs_attr_init(&data->_name##_dev_attr.attr); \
- data->_name##_dev_attr.show = show_##_name; \
- data->_name##_dev_attr.store = store_##_name; \
- data->_name##_dev_attr.attr.name = #_name; \
- data->_name##_dev_attr.attr.mode = 0644; \
+ sysfs_attr_init(&data->_name##_kobj_attr.attr); \
+ data->_name##_kobj_attr.show = show_##_name; \
+ data->_name##_kobj_attr.store = store_##_name; \
+ data->_name##_kobj_attr.attr.name = #_name; \
+ data->_name##_kobj_attr.attr.mode = 0644; \
} while (0)
#define init_attribute_ro(_name) \
do { \
- sysfs_attr_init(&data->_name##_dev_attr.attr); \
- data->_name##_dev_attr.show = show_##_name; \
- data->_name##_dev_attr.store = NULL; \
- data->_name##_dev_attr.attr.name = #_name; \
- data->_name##_dev_attr.attr.mode = 0444; \
+ sysfs_attr_init(&data->_name##_kobj_attr.attr); \
+ data->_name##_kobj_attr.show = show_##_name; \
+ data->_name##_kobj_attr.store = NULL; \
+ data->_name##_kobj_attr.attr.name = #_name; \
+ data->_name##_kobj_attr.attr.mode = 0444; \
} while (0)
#define init_attribute_root_ro(_name) \
do { \
- sysfs_attr_init(&data->_name##_dev_attr.attr); \
- data->_name##_dev_attr.show = show_##_name; \
- data->_name##_dev_attr.store = NULL; \
- data->_name##_dev_attr.attr.name = #_name; \
- data->_name##_dev_attr.attr.mode = 0400; \
+ sysfs_attr_init(&data->_name##_kobj_attr.attr); \
+ data->_name##_kobj_attr.show = show_##_name; \
+ data->_name##_kobj_attr.store = NULL; \
+ data->_name##_kobj_attr.attr.name = #_name; \
+ data->_name##_kobj_attr.attr.mode = 0400; \
} while (0)
static int create_attr_group(struct uncore_data *data, char *name)
@@ -186,21 +186,21 @@ static int create_attr_group(struct uncore_data *data, char *name)
if (data->domain_id != UNCORE_DOMAIN_ID_INVALID) {
init_attribute_root_ro(domain_id);
- data->uncore_attrs[index++] = &data->domain_id_dev_attr.attr;
+ data->uncore_attrs[index++] = &data->domain_id_kobj_attr.attr;
init_attribute_root_ro(fabric_cluster_id);
- data->uncore_attrs[index++] = &data->fabric_cluster_id_dev_attr.attr;
+ data->uncore_attrs[index++] = &data->fabric_cluster_id_kobj_attr.attr;
init_attribute_root_ro(package_id);
- data->uncore_attrs[index++] = &data->package_id_dev_attr.attr;
+ data->uncore_attrs[index++] = &data->package_id_kobj_attr.attr;
}
- data->uncore_attrs[index++] = &data->max_freq_khz_dev_attr.attr;
- data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr;
- data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr;
- data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr;
+ data->uncore_attrs[index++] = &data->max_freq_khz_kobj_attr.attr;
+ data->uncore_attrs[index++] = &data->min_freq_khz_kobj_attr.attr;
+ data->uncore_attrs[index++] = &data->initial_min_freq_khz_kobj_attr.attr;
+ data->uncore_attrs[index++] = &data->initial_max_freq_khz_kobj_attr.attr;
ret = uncore_read_freq(data, &freq);
if (!ret)
- data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr;
+ data->uncore_attrs[index++] = &data->current_freq_khz_kobj_attr.attr;
data->uncore_attrs[index] = NULL;
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
index 7afb69977c7e..0e5bf507e555 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
@@ -26,14 +26,14 @@
* @instance_id: Unique instance id to append to directory name
* @name: Sysfs entry name for this instance
* @uncore_attr_group: Attribute group storage
- * @max_freq_khz_dev_attr: Storage for device attribute max_freq_khz
- * @mix_freq_khz_dev_attr: Storage for device attribute min_freq_khz
- * @initial_max_freq_khz_dev_attr: Storage for device attribute initial_max_freq_khz
- * @initial_min_freq_khz_dev_attr: Storage for device attribute initial_min_freq_khz
- * @current_freq_khz_dev_attr: Storage for device attribute current_freq_khz
- * @domain_id_dev_attr: Storage for device attribute domain_id
- * @fabric_cluster_id_dev_attr: Storage for device attribute fabric_cluster_id
- * @package_id_dev_attr: Storage for device attribute package_id
+ * @max_freq_khz_kobj_attr: Storage for kobject attribute max_freq_khz
+ * @mix_freq_khz_kobj_attr: Storage for kobject attribute min_freq_khz
+ * @initial_max_freq_khz_kobj_attr: Storage for kobject attribute initial_max_freq_khz
+ * @initial_min_freq_khz_kobj_attr: Storage for kobject attribute initial_min_freq_khz
+ * @current_freq_khz_kobj_attr: Storage for kobject attribute current_freq_khz
+ * @domain_id_kobj_attr: Storage for kobject attribute domain_id
+ * @fabric_cluster_id_kobj_attr: Storage for kobject attribute fabric_cluster_id
+ * @package_id_kobj_attr: Storage for kobject attribute package_id
* @uncore_attrs: Attribute storage for group creation
*
* This structure is used to encapsulate all data related to uncore sysfs
@@ -53,14 +53,14 @@ struct uncore_data {
char name[32];
struct attribute_group uncore_attr_group;
- struct device_attribute max_freq_khz_dev_attr;
- struct device_attribute min_freq_khz_dev_attr;
- struct device_attribute initial_max_freq_khz_dev_attr;
- struct device_attribute initial_min_freq_khz_dev_attr;
- struct device_attribute current_freq_khz_dev_attr;
- struct device_attribute domain_id_dev_attr;
- struct device_attribute fabric_cluster_id_dev_attr;
- struct device_attribute package_id_dev_attr;
+ struct kobj_attribute max_freq_khz_kobj_attr;
+ struct kobj_attribute min_freq_khz_kobj_attr;
+ struct kobj_attribute initial_max_freq_khz_kobj_attr;
+ struct kobj_attribute initial_min_freq_khz_kobj_attr;
+ struct kobj_attribute current_freq_khz_kobj_attr;
+ struct kobj_attribute domain_id_kobj_attr;
+ struct kobj_attribute fabric_cluster_id_kobj_attr;
+ struct kobj_attribute package_id_kobj_attr;
struct attribute *uncore_attrs[9];
};
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c
index a5e0f5c22179..b89c0dda9e5d 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c
@@ -242,7 +242,7 @@ static int __init intel_uncore_init(void)
return -ENODEV;
uncore_max_entries = topology_max_packages() *
- topology_max_die_per_package();
+ topology_max_dies_per_package();
uncore_instances = kcalloc(uncore_max_entries,
sizeof(*uncore_instances), GFP_KERNEL);
if (!uncore_instances)
diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c
index 210b0a81b7ec..084c355c86f5 100644
--- a/drivers/platform/x86/intel/vbtn.c
+++ b/drivers/platform/x86/intel/vbtn.c
@@ -200,9 +200,6 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE);
sparse_keymap_report_event(input_dev, event, val, autorelease);
-
- /* Some devices need this to report further events */
- acpi_evaluate_object(handle, "VBDL", NULL, NULL);
}
/*
diff --git a/drivers/platform/x86/intel/wmi/sbl-fw-update.c b/drivers/platform/x86/intel/wmi/sbl-fw-update.c
index 9cf5ed0f8dc2..040153ad67c1 100644
--- a/drivers/platform/x86/intel/wmi/sbl-fw-update.c
+++ b/drivers/platform/x86/intel/wmi/sbl-fw-update.c
@@ -32,7 +32,7 @@ static int get_fwu_request(struct device *dev, u32 *out)
return -ENODEV;
if (obj->type != ACPI_TYPE_INTEGER) {
- dev_warn(dev, "wmi_query_block returned invalid value\n");
+ dev_warn(dev, "wmidev_block_query returned invalid value\n");
kfree(obj);
return -EINVAL;
}
@@ -55,7 +55,7 @@ static int set_fwu_request(struct device *dev, u32 in)
status = wmidev_block_set(to_wmi_device(dev), 0, &input);
if (ACPI_FAILURE(status)) {
- dev_err(dev, "wmi_set_block failed\n");
+ dev_err(dev, "wmidev_block_set failed\n");
return -ENODEV;
}
diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
index 1cf2471d54dd..3d66e1d4eb1f 100644
--- a/drivers/platform/x86/p2sb.c
+++ b/drivers/platform/x86/p2sb.c
@@ -20,12 +20,29 @@
#define P2SBC_HIDE BIT(8)
#define P2SB_DEVFN_DEFAULT PCI_DEVFN(31, 1)
+#define P2SB_DEVFN_GOLDMONT PCI_DEVFN(13, 0)
+#define SPI_DEVFN_GOLDMONT PCI_DEVFN(13, 2)
static const struct x86_cpu_id p2sb_cpu_ids[] = {
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, PCI_DEVFN(13, 0)),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, P2SB_DEVFN_GOLDMONT),
{}
};
+/*
+ * Cache BAR0 of P2SB device functions 0 to 7.
+ * TODO: The constant 8 is the number of functions that PCI specification
+ * defines. Same definitions exist tree-wide. Unify this definition and
+ * the other definitions then move to include/uapi/linux/pci.h.
+ */
+#define NR_P2SB_RES_CACHE 8
+
+struct p2sb_res_cache {
+ u32 bus_dev_id;
+ struct resource res;
+};
+
+static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE];
+
static int p2sb_get_devfn(unsigned int *devfn)
{
unsigned int fn = P2SB_DEVFN_DEFAULT;
@@ -39,10 +56,18 @@ static int p2sb_get_devfn(unsigned int *devfn)
return 0;
}
+static bool p2sb_valid_resource(struct resource *res)
+{
+ if (res->flags)
+ return true;
+
+ return false;
+}
+
/* Copy resource from the first BAR of the device in question */
-static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem)
+static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem)
{
- struct resource *bar0 = &pdev->resource[0];
+ struct resource *bar0 = pci_resource_n(pdev, 0);
/* Make sure we have no dangling pointers in the output */
memset(mem, 0, sizeof(*mem));
@@ -56,49 +81,57 @@ static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem)
mem->end = bar0->end;
mem->flags = bar0->flags;
mem->desc = bar0->desc;
-
- return 0;
}
-static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn)
{
+ struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)];
struct pci_dev *pdev;
- int ret;
pdev = pci_scan_single_device(bus, devfn);
if (!pdev)
- return -ENODEV;
+ return;
- ret = p2sb_read_bar0(pdev, mem);
+ p2sb_read_bar0(pdev, &cache->res);
+ cache->bus_dev_id = bus->dev.id;
pci_stop_and_remove_bus_device(pdev);
- return ret;
}
-/**
- * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR
- * @bus: PCI bus to communicate with
- * @devfn: PCI slot and function to communicate with
- * @mem: memory resource to be filled in
- *
- * The BIOS prevents the P2SB device from being enumerated by the PCI
- * subsystem, so we need to unhide and hide it back to lookup the BAR.
- *
- * if @bus is NULL, the bus 0 in domain 0 will be used.
- * If @devfn is 0, it will be replaced by devfn of the P2SB device.
- *
- * Caller must provide a valid pointer to @mem.
- *
- * Locking is handled by pci_rescan_remove_lock mutex.
- *
- * Return:
- * 0 on success or appropriate errno value on error.
- */
-int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
+{
+ /* Scan the P2SB device and cache its BAR0 */
+ p2sb_scan_and_cache_devfn(bus, devfn);
+
+ /* On Goldmont p2sb_bar() also gets called for the SPI controller */
+ if (devfn == P2SB_DEVFN_GOLDMONT)
+ p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT);
+
+ if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res))
+ return -ENOENT;
+
+ return 0;
+}
+
+static struct pci_bus *p2sb_get_bus(struct pci_bus *bus)
+{
+ static struct pci_bus *p2sb_bus;
+
+ bus = bus ?: p2sb_bus;
+ if (bus)
+ return bus;
+
+ /* Assume P2SB is on the bus 0 in domain 0 */
+ p2sb_bus = pci_find_bus(0, 0);
+ return p2sb_bus;
+}
+
+static int p2sb_cache_resources(void)
{
- struct pci_dev *pdev_p2sb;
unsigned int devfn_p2sb;
u32 value = P2SBC_HIDE;
+ struct pci_bus *bus;
+ u16 class;
int ret;
/* Get devfn for P2SB device itself */
@@ -106,8 +139,17 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
if (ret)
return ret;
- /* if @bus is NULL, use bus 0 in domain 0 */
- bus = bus ?: pci_find_bus(0, 0);
+ bus = p2sb_get_bus(NULL);
+ if (!bus)
+ return -ENODEV;
+
+ /*
+ * When a device with same devfn exists and its device class is not
+ * PCI_CLASS_MEMORY_OTHER for P2SB, do not touch it.
+ */
+ pci_bus_read_config_word(bus, devfn_p2sb, PCI_CLASS_DEVICE, &class);
+ if (!PCI_POSSIBLE_ERROR(class) && class != PCI_CLASS_MEMORY_OTHER)
+ return -ENODEV;
/*
* Prevent concurrent PCI bus scan from seeing the P2SB device and
@@ -115,17 +157,16 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
*/
pci_lock_rescan_remove();
- /* Unhide the P2SB device, if needed */
+ /*
+ * The BIOS prevents the P2SB device from being enumerated by the PCI
+ * subsystem, so we need to unhide and hide it back to lookup the BAR.
+ * Unhide the P2SB device here, if needed.
+ */
pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
if (value & P2SBC_HIDE)
pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0);
- pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb);
- if (devfn)
- ret = p2sb_scan_and_read(bus, devfn, mem);
- else
- ret = p2sb_read_bar0(pdev_p2sb, mem);
- pci_stop_and_remove_bus_device(pdev_p2sb);
+ ret = p2sb_scan_and_cache(bus, devfn_p2sb);
/* Hide the P2SB device, if it was hidden */
if (value & P2SBC_HIDE)
@@ -133,12 +174,62 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
pci_unlock_rescan_remove();
- if (ret)
- return ret;
+ return ret;
+}
- if (mem->flags == 0)
+/**
+ * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR
+ * @bus: PCI bus to communicate with
+ * @devfn: PCI slot and function to communicate with
+ * @mem: memory resource to be filled in
+ *
+ * If @bus is NULL, the bus 0 in domain 0 will be used.
+ * If @devfn is 0, it will be replaced by devfn of the P2SB device.
+ *
+ * Caller must provide a valid pointer to @mem.
+ *
+ * Return:
+ * 0 on success or appropriate errno value on error.
+ */
+int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+{
+ struct p2sb_res_cache *cache;
+ int ret;
+
+ bus = p2sb_get_bus(bus);
+ if (!bus)
+ return -ENODEV;
+
+ if (!devfn) {
+ ret = p2sb_get_devfn(&devfn);
+ if (ret)
+ return ret;
+ }
+
+ cache = &p2sb_resources[PCI_FUNC(devfn)];
+ if (cache->bus_dev_id != bus->dev.id)
return -ENODEV;
+ if (!p2sb_valid_resource(&cache->res))
+ return -ENOENT;
+
+ memcpy(mem, &cache->res, sizeof(*mem));
return 0;
}
EXPORT_SYMBOL_GPL(p2sb_bar);
+
+static int __init p2sb_fs_init(void)
+{
+ p2sb_cache_resources();
+ return 0;
+}
+
+/*
+ * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can
+ * not be locked in sysfs pci bus rescan path because of deadlock. To
+ * avoid the deadlock, access to P2SB devices with the lock at an early
+ * step in kernel initialization and cache required resources. This
+ * should happen after subsys_initcall which initializes PCI subsystem
+ * and before device_initcall which requires P2SB resources.
+ */
+fs_initcall(p2sb_fs_init);
diff --git a/drivers/platform/x86/serdev_helpers.h b/drivers/platform/x86/serdev_helpers.h
new file mode 100644
index 000000000000..bcf3a0c356ea
--- /dev/null
+++ b/drivers/platform/x86/serdev_helpers.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * In some cases UART attached devices which require an in kernel driver,
+ * e.g. UART attached Bluetooth HCIs are described in the ACPI tables
+ * by an ACPI device with a broken or missing UartSerialBusV2() resource.
+ *
+ * This causes the kernel to create a /dev/ttyS# char-device for the UART
+ * instead of creating an in kernel serdev-controller + serdev-device pair
+ * for the in kernel driver.
+ *
+ * The quirk handling in acpi_quirk_skip_serdev_enumeration() makes the kernel
+ * create a serdev-controller device for these UARTs instead of a /dev/ttyS#.
+ *
+ * Instantiating the actual serdev-device to bind to is up to pdx86 code,
+ * this header provides a helper for getting the serdev-controller device.
+ */
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/printk.h>
+#include <linux/sprintf.h>
+#include <linux/string.h>
+
+static inline struct device *
+get_serdev_controller(const char *serial_ctrl_hid,
+ const char *serial_ctrl_uid,
+ int serial_ctrl_port,
+ const char *serdev_ctrl_name)
+{
+ struct device *ctrl_dev, *child;
+ struct acpi_device *ctrl_adev;
+ char name[32];
+ int i;
+
+ ctrl_adev = acpi_dev_get_first_match_dev(serial_ctrl_hid, serial_ctrl_uid, -1);
+ if (!ctrl_adev) {
+ pr_err("error could not get %s/%s serial-ctrl adev\n",
+ serial_ctrl_hid, serial_ctrl_uid);
+ return ERR_PTR(-ENODEV);
+ }
+
+ /* get_first_physical_node() returns a weak ref */
+ ctrl_dev = get_device(acpi_get_first_physical_node(ctrl_adev));
+ if (!ctrl_dev) {
+ pr_err("error could not get %s/%s serial-ctrl physical node\n",
+ serial_ctrl_hid, serial_ctrl_uid);
+ ctrl_dev = ERR_PTR(-ENODEV);
+ goto put_ctrl_adev;
+ }
+
+ /* Walk host -> uart-ctrl -> port -> serdev-ctrl */
+ for (i = 0; i < 3; i++) {
+ switch (i) {
+ case 0:
+ snprintf(name, sizeof(name), "%s:0", dev_name(ctrl_dev));
+ break;
+ case 1:
+ snprintf(name, sizeof(name), "%s.%d",
+ dev_name(ctrl_dev), serial_ctrl_port);
+ break;
+ case 2:
+ strscpy(name, serdev_ctrl_name, sizeof(name));
+ break;
+ }
+
+ child = device_find_child_by_name(ctrl_dev, name);
+ put_device(ctrl_dev);
+ if (!child) {
+ pr_err("error could not find '%s' device\n", name);
+ ctrl_dev = ERR_PTR(-ENODEV);
+ goto put_ctrl_adev;
+ }
+
+ ctrl_dev = child;
+ }
+
+put_ctrl_adev:
+ acpi_dev_put(ctrl_adev);
+ return ctrl_dev;
+}
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 3a396b763c49..ce3e08815a8e 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -1009,7 +1009,16 @@ static ssize_t current_value_store(struct kobject *kobj,
* Note - this sets the variable and then the password as separate
* WMI calls. Function tlmi_save_bios_settings will error if the
* password is incorrect.
+ * Workstation's require the opcode to be set before changing the
+ * attribute.
*/
+ if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) {
+ ret = tlmi_opcode_setting("WmiOpcodePasswordAdmin",
+ tlmi_priv.pwd_admin->password);
+ if (ret)
+ goto out;
+ }
+
set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name,
new_setting);
if (!set_str) {
@@ -1021,17 +1030,10 @@ static ssize_t current_value_store(struct kobject *kobj,
if (ret)
goto out;
- if (tlmi_priv.save_mode == TLMI_SAVE_BULK) {
+ if (tlmi_priv.save_mode == TLMI_SAVE_BULK)
tlmi_priv.save_required = true;
- } else {
- if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) {
- ret = tlmi_opcode_setting("WmiOpcodePasswordAdmin",
- tlmi_priv.pwd_admin->password);
- if (ret)
- goto out;
- }
+ else
ret = tlmi_save_bios_settings("");
- }
} else { /* old non-opcode based authentication method (deprecated) */
if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) {
auth_str = kasprintf(GFP_KERNEL, "%s,%s,%s;",
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index c4895e9bc714..5ecd9d33250d 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -10308,6 +10308,7 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode,
return 0;
default:
/* Unknown function */
+ pr_debug("unknown function 0x%x\n", funcmode);
return -EOPNOTSUPP;
}
return 0;
@@ -10493,8 +10494,8 @@ static void dytc_profile_refresh(void)
return;
perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF;
- convert_dytc_to_profile(funcmode, perfmode, &profile);
- if (profile != dytc_current_profile) {
+ err = convert_dytc_to_profile(funcmode, perfmode, &profile);
+ if (!err && profile != dytc_current_profile) {
dytc_current_profile = profile;
platform_profile_notify();
}
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 0c6733772698..975cf24ae359 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -81,7 +81,7 @@ static const struct property_entry chuwi_hi8_air_props[] = {
};
static const struct ts_dmi_data chuwi_hi8_air_data = {
- .acpi_name = "MSSL1680:00",
+ .acpi_name = "MSSL1680",
.properties = chuwi_hi8_air_props,
};
@@ -415,18 +415,13 @@ static const struct property_entry gdix1001_upside_down_props[] = {
{ }
};
-static const struct ts_dmi_data gdix1001_00_upside_down_data = {
- .acpi_name = "GDIX1001:00",
- .properties = gdix1001_upside_down_props,
-};
-
-static const struct ts_dmi_data gdix1001_01_upside_down_data = {
- .acpi_name = "GDIX1001:01",
+static const struct ts_dmi_data gdix1001_upside_down_data = {
+ .acpi_name = "GDIX1001",
.properties = gdix1001_upside_down_props,
};
-static const struct ts_dmi_data gdix1002_00_upside_down_data = {
- .acpi_name = "GDIX1002:00",
+static const struct ts_dmi_data gdix1002_upside_down_data = {
+ .acpi_name = "GDIX1002",
.properties = gdix1001_upside_down_props,
};
@@ -944,6 +939,32 @@ static const struct ts_dmi_data teclast_tbook11_data = {
.properties = teclast_tbook11_props,
};
+static const struct property_entry teclast_x16_plus_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 8),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 14),
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1916),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1264),
+ PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+ PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-teclast-x16-plus.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+ PROPERTY_ENTRY_BOOL("silead,home-button"),
+ { }
+};
+
+static const struct ts_dmi_data teclast_x16_plus_data = {
+ .embedded_fw = {
+ .name = "silead/gsl3692-teclast-x16-plus.fw",
+ .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 },
+ .length = 43560,
+ .sha256 = { 0x9d, 0xb0, 0x3d, 0xf1, 0x00, 0x3c, 0xb5, 0x25,
+ 0x62, 0x8a, 0xa0, 0x93, 0x4b, 0xe0, 0x4e, 0x75,
+ 0xd1, 0x27, 0xb1, 0x65, 0x3c, 0xba, 0xa5, 0x0f,
+ 0xcd, 0xb4, 0xbe, 0x00, 0xbb, 0xf6, 0x43, 0x29 },
+ },
+ .acpi_name = "MSSL1680:00",
+ .properties = teclast_x16_plus_props,
+};
+
static const struct property_entry teclast_x3_plus_props[] = {
PROPERTY_ENTRY_U32("touchscreen-size-x", 1980),
PROPERTY_ENTRY_U32("touchscreen-size-y", 1500),
@@ -1386,7 +1407,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
{
/* Juno Tablet */
- .driver_data = (void *)&gdix1002_00_upside_down_data,
+ .driver_data = (void *)&gdix1002_upside_down_data,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Default string"),
/* Both product- and board-name being "Default string" is somewhat rare */
@@ -1613,6 +1634,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
},
{
+ /* Teclast X16 Plus */
+ .driver_data = (void *)&teclast_x16_plus_data,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Default string"),
+ DMI_MATCH(DMI_PRODUCT_SKU, "D3A5_A1"),
+ },
+ },
+ {
/* Teclast X3 Plus */
.driver_data = (void *)&teclast_x3_plus_data,
.matches = {
@@ -1623,7 +1653,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
{
/* Teclast X89 (Android version / BIOS) */
- .driver_data = (void *)&gdix1001_00_upside_down_data,
+ .driver_data = (void *)&gdix1001_upside_down_data,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "WISKY"),
DMI_MATCH(DMI_BOARD_NAME, "3G062i"),
@@ -1631,7 +1661,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
{
/* Teclast X89 (Windows version / BIOS) */
- .driver_data = (void *)&gdix1001_01_upside_down_data,
+ .driver_data = (void *)&gdix1001_upside_down_data,
.matches = {
/* tPAD is too generic, also match on bios date */
DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"),
@@ -1649,7 +1679,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
{
/* Teclast X98 Pro */
- .driver_data = (void *)&gdix1001_00_upside_down_data,
+ .driver_data = (void *)&gdix1001_upside_down_data,
.matches = {
/*
* Only match BIOS date, because the manufacturers
@@ -1753,7 +1783,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
{
/* "WinBook TW100" */
- .driver_data = (void *)&gdix1001_00_upside_down_data,
+ .driver_data = (void *)&gdix1001_upside_down_data,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "WinBook"),
DMI_MATCH(DMI_PRODUCT_NAME, "TW100")
@@ -1761,7 +1791,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
{
/* WinBook TW700 */
- .driver_data = (void *)&gdix1001_00_upside_down_data,
+ .driver_data = (void *)&gdix1001_upside_down_data,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "WinBook"),
DMI_MATCH(DMI_PRODUCT_NAME, "TW700")
@@ -1786,7 +1816,7 @@ static void ts_dmi_add_props(struct i2c_client *client)
int error;
if (has_acpi_companion(dev) &&
- !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) {
+ strstarts(client->name, ts_data->acpi_name)) {
error = device_create_managed_software_node(dev, ts_data->properties, NULL);
if (error)
dev_err(dev, "failed to add properties: %d\n", error);
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index bd271a5730aa..3c288e8f404b 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -25,6 +25,7 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/platform_device.h>
+#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/types.h>
@@ -56,7 +57,6 @@ static_assert(__alignof__(struct guid_block) == 1);
enum { /* wmi_block flags */
WMI_READ_TAKES_NO_ARGS,
- WMI_PROBED,
};
struct wmi_block {
@@ -64,8 +64,10 @@ struct wmi_block {
struct list_head list;
struct guid_block gblock;
struct acpi_device *acpi_device;
+ struct rw_semaphore notify_lock; /* Protects notify callback add/remove */
wmi_notify_handler handler;
void *handler_data;
+ bool driver_ready;
unsigned long flags;
};
@@ -219,6 +221,17 @@ static int wmidev_match_guid(struct device *dev, const void *data)
return 0;
}
+static int wmidev_match_notify_id(struct device *dev, const void *data)
+{
+ struct wmi_block *wblock = dev_to_wblock(dev);
+ const u32 *notify_id = data;
+
+ if (wblock->gblock.flags & ACPI_WMI_EVENT && wblock->gblock.notify_id == *notify_id)
+ return 1;
+
+ return 0;
+}
+
static struct bus_type wmi_bus_type;
static struct wmi_device *wmi_find_device_by_guid(const char *guid_string)
@@ -238,6 +251,17 @@ static struct wmi_device *wmi_find_device_by_guid(const char *guid_string)
return dev_to_wdev(dev);
}
+static struct wmi_device *wmi_find_event_by_notify_id(const u32 notify_id)
+{
+ struct device *dev;
+
+ dev = bus_find_device(&wmi_bus_type, NULL, &notify_id, wmidev_match_notify_id);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ return to_wmi_device(dev);
+}
+
static void wmi_device_put(struct wmi_device *wdev)
{
put_device(&wdev->dev);
@@ -572,32 +596,31 @@ acpi_status wmi_install_notify_handler(const char *guid,
wmi_notify_handler handler,
void *data)
{
- struct wmi_block *block;
- acpi_status status = AE_NOT_EXIST;
- guid_t guid_input;
-
- if (!guid || !handler)
- return AE_BAD_PARAMETER;
+ struct wmi_block *wblock;
+ struct wmi_device *wdev;
+ acpi_status status;
- if (guid_parse(guid, &guid_input))
- return AE_BAD_PARAMETER;
+ wdev = wmi_find_device_by_guid(guid);
+ if (IS_ERR(wdev))
+ return AE_ERROR;
- list_for_each_entry(block, &wmi_block_list, list) {
- acpi_status wmi_status;
+ wblock = container_of(wdev, struct wmi_block, dev);
- if (guid_equal(&block->gblock.guid, &guid_input)) {
- if (block->handler)
- return AE_ALREADY_ACQUIRED;
+ down_write(&wblock->notify_lock);
+ if (wblock->handler) {
+ status = AE_ALREADY_ACQUIRED;
+ } else {
+ wblock->handler = handler;
+ wblock->handler_data = data;
- block->handler = handler;
- block->handler_data = data;
+ if (ACPI_FAILURE(wmi_method_enable(wblock, true)))
+ dev_warn(&wblock->dev.dev, "Failed to enable device\n");
- wmi_status = wmi_method_enable(block, true);
- if ((wmi_status != AE_OK) ||
- ((wmi_status == AE_OK) && (status == AE_NOT_EXIST)))
- status = wmi_status;
- }
+ status = AE_OK;
}
+ up_write(&wblock->notify_lock);
+
+ wmi_device_put(wdev);
return status;
}
@@ -613,30 +636,31 @@ EXPORT_SYMBOL_GPL(wmi_install_notify_handler);
*/
acpi_status wmi_remove_notify_handler(const char *guid)
{
- struct wmi_block *block;
- acpi_status status = AE_NOT_EXIST;
- guid_t guid_input;
+ struct wmi_block *wblock;
+ struct wmi_device *wdev;
+ acpi_status status;
- if (!guid)
- return AE_BAD_PARAMETER;
+ wdev = wmi_find_device_by_guid(guid);
+ if (IS_ERR(wdev))
+ return AE_ERROR;
- if (guid_parse(guid, &guid_input))
- return AE_BAD_PARAMETER;
+ wblock = container_of(wdev, struct wmi_block, dev);
- list_for_each_entry(block, &wmi_block_list, list) {
- acpi_status wmi_status;
+ down_write(&wblock->notify_lock);
+ if (!wblock->handler) {
+ status = AE_NULL_ENTRY;
+ } else {
+ if (ACPI_FAILURE(wmi_method_enable(wblock, false)))
+ dev_warn(&wblock->dev.dev, "Failed to disable device\n");
- if (guid_equal(&block->gblock.guid, &guid_input)) {
- if (!block->handler)
- return AE_NULL_ENTRY;
+ wblock->handler = NULL;
+ wblock->handler_data = NULL;
- wmi_status = wmi_method_enable(block, false);
- block->handler = NULL;
- block->handler_data = NULL;
- if (wmi_status != AE_OK || (wmi_status == AE_OK && status == AE_NOT_EXIST))
- status = wmi_status;
- }
+ status = AE_OK;
}
+ up_write(&wblock->notify_lock);
+
+ wmi_device_put(wdev);
return status;
}
@@ -655,15 +679,19 @@ EXPORT_SYMBOL_GPL(wmi_remove_notify_handler);
acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out)
{
struct wmi_block *wblock;
+ struct wmi_device *wdev;
+ acpi_status status;
- list_for_each_entry(wblock, &wmi_block_list, list) {
- struct guid_block *gblock = &wblock->gblock;
+ wdev = wmi_find_event_by_notify_id(event);
+ if (IS_ERR(wdev))
+ return AE_NOT_FOUND;
- if ((gblock->flags & ACPI_WMI_EVENT) && gblock->notify_id == event)
- return get_event_data(wblock, out);
- }
+ wblock = container_of(wdev, struct wmi_block, dev);
+ status = get_event_data(wblock, out);
- return AE_NOT_FOUND;
+ wmi_device_put(wdev);
+
+ return status;
}
EXPORT_SYMBOL_GPL(wmi_get_event_data);
@@ -868,7 +896,7 @@ static int wmi_dev_probe(struct device *dev)
if (wdriver->probe) {
ret = wdriver->probe(dev_to_wdev(dev),
find_guid_context(wblock, wdriver));
- if (!ret) {
+ if (ret) {
if (ACPI_FAILURE(wmi_method_enable(wblock, false)))
dev_warn(dev, "Failed to disable device\n");
@@ -876,7 +904,9 @@ static int wmi_dev_probe(struct device *dev)
}
}
- set_bit(WMI_PROBED, &wblock->flags);
+ down_write(&wblock->notify_lock);
+ wblock->driver_ready = true;
+ up_write(&wblock->notify_lock);
return 0;
}
@@ -886,7 +916,9 @@ static void wmi_dev_remove(struct device *dev)
struct wmi_block *wblock = dev_to_wblock(dev);
struct wmi_driver *wdriver = drv_to_wdrv(dev->driver);
- clear_bit(WMI_PROBED, &wblock->flags);
+ down_write(&wblock->notify_lock);
+ wblock->driver_ready = false;
+ up_write(&wblock->notify_lock);
if (wdriver->remove)
wdriver->remove(dev_to_wdev(dev));
@@ -999,6 +1031,8 @@ static int wmi_create_device(struct device *wmi_bus_dev,
wblock->dev.setable = true;
out_init:
+ init_rwsem(&wblock->notify_lock);
+ wblock->driver_ready = false;
wblock->dev.dev.bus = &wmi_bus_type;
wblock->dev.dev.parent = wmi_bus_dev;
@@ -1171,6 +1205,26 @@ acpi_wmi_ec_space_handler(u32 function, acpi_physical_address address,
}
}
+static void wmi_notify_driver(struct wmi_block *wblock)
+{
+ struct wmi_driver *driver = drv_to_wdrv(wblock->dev.dev.driver);
+ struct acpi_buffer data = { ACPI_ALLOCATE_BUFFER, NULL };
+ acpi_status status;
+
+ if (!driver->no_notify_data) {
+ status = get_event_data(wblock, &data);
+ if (ACPI_FAILURE(status)) {
+ dev_warn(&wblock->dev.dev, "Failed to get event data\n");
+ return;
+ }
+ }
+
+ if (driver->notify)
+ driver->notify(&wblock->dev, data.pointer);
+
+ kfree(data.pointer);
+}
+
static int wmi_notify_device(struct device *dev, void *data)
{
struct wmi_block *wblock = dev_to_wblock(dev);
@@ -1179,28 +1233,17 @@ static int wmi_notify_device(struct device *dev, void *data)
if (!(wblock->gblock.flags & ACPI_WMI_EVENT && wblock->gblock.notify_id == *event))
return 0;
- /* If a driver is bound, then notify the driver. */
- if (test_bit(WMI_PROBED, &wblock->flags) && wblock->dev.dev.driver) {
- struct wmi_driver *driver = drv_to_wdrv(wblock->dev.dev.driver);
- struct acpi_buffer evdata = { ACPI_ALLOCATE_BUFFER, NULL };
- acpi_status status;
-
- if (!driver->no_notify_data) {
- status = get_event_data(wblock, &evdata);
- if (ACPI_FAILURE(status)) {
- dev_warn(&wblock->dev.dev, "failed to get event data\n");
- return -EIO;
- }
- }
-
- if (driver->notify)
- driver->notify(&wblock->dev, evdata.pointer);
-
- kfree(evdata.pointer);
- } else if (wblock->handler) {
- /* Legacy handler */
- wblock->handler(*event, wblock->handler_data);
+ down_read(&wblock->notify_lock);
+ /* The WMI driver notify handler conflicts with the legacy WMI handler.
+ * Because of this the WMI driver notify handler takes precedence.
+ */
+ if (wblock->dev.dev.driver && wblock->driver_ready) {
+ wmi_notify_driver(wblock);
+ } else {
+ if (wblock->handler)
+ wblock->handler(*event, wblock->handler_data);
}
+ up_read(&wblock->notify_lock);
acpi_bus_generate_netlink_event(wblock->acpi_device->pnp.device_class,
dev_name(&wblock->dev.dev), *event, 0);
diff --git a/drivers/platform/x86/x86-android-tablets/core.c b/drivers/platform/x86/x86-android-tablets/core.c
index f8221a15575b..a3415f1c0b5f 100644
--- a/drivers/platform/x86/x86-android-tablets/core.c
+++ b/drivers/platform/x86/x86-android-tablets/core.c
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include "x86-android-tablets.h"
+#include "../serdev_helpers.h"
static struct platform_device *x86_android_tablet_device;
@@ -113,6 +114,9 @@ int x86_acpi_irq_helper_get(const struct x86_acpi_irq_data *data)
if (irq_type != IRQ_TYPE_NONE && irq_type != irq_get_trigger_type(irq))
irq_set_irq_type(irq, irq_type);
+ if (data->free_gpio)
+ devm_gpiod_put(&x86_android_tablet_device->dev, gpiod);
+
return irq;
case X86_ACPI_IRQ_TYPE_PMIC:
status = acpi_get_handle(NULL, data->chip, &handle);
@@ -229,38 +233,20 @@ static __init int x86_instantiate_spi_dev(const struct x86_dev_info *dev_info, i
static __init int x86_instantiate_serdev(const struct x86_serdev_info *info, int idx)
{
- struct acpi_device *ctrl_adev, *serdev_adev;
+ struct acpi_device *serdev_adev;
struct serdev_device *serdev;
struct device *ctrl_dev;
int ret = -ENODEV;
- ctrl_adev = acpi_dev_get_first_match_dev(info->ctrl_hid, info->ctrl_uid, -1);
- if (!ctrl_adev) {
- pr_err("error could not get %s/%s ctrl adev\n",
- info->ctrl_hid, info->ctrl_uid);
- return -ENODEV;
- }
+ ctrl_dev = get_serdev_controller(info->ctrl_hid, info->ctrl_uid, 0,
+ info->ctrl_devname);
+ if (IS_ERR(ctrl_dev))
+ return PTR_ERR(ctrl_dev);
serdev_adev = acpi_dev_get_first_match_dev(info->serdev_hid, NULL, -1);
if (!serdev_adev) {
pr_err("error could not get %s serdev adev\n", info->serdev_hid);
- goto put_ctrl_adev;
- }
-
- /* get_first_physical_node() returns a weak ref, no need to put() it */
- ctrl_dev = acpi_get_first_physical_node(ctrl_adev);
- if (!ctrl_dev) {
- pr_err("error could not get %s/%s ctrl physical dev\n",
- info->ctrl_hid, info->ctrl_uid);
- goto put_serdev_adev;
- }
-
- /* ctrl_dev now points to the controller's parent, get the controller */
- ctrl_dev = device_find_child_by_name(ctrl_dev, info->ctrl_devname);
- if (!ctrl_dev) {
- pr_err("error could not get %s/%s %s ctrl dev\n",
- info->ctrl_hid, info->ctrl_uid, info->ctrl_devname);
- goto put_serdev_adev;
+ goto put_ctrl_dev;
}
serdev = serdev_device_alloc(to_serdev_controller(ctrl_dev));
@@ -283,8 +269,8 @@ static __init int x86_instantiate_serdev(const struct x86_serdev_info *info, int
put_serdev_adev:
acpi_dev_put(serdev_adev);
-put_ctrl_adev:
- acpi_dev_put(ctrl_adev);
+put_ctrl_dev:
+ put_device(ctrl_dev);
return ret;
}
diff --git a/drivers/platform/x86/x86-android-tablets/lenovo.c b/drivers/platform/x86/x86-android-tablets/lenovo.c
index f1c66a61bfc5..c297391955ad 100644
--- a/drivers/platform/x86/x86-android-tablets/lenovo.c
+++ b/drivers/platform/x86/x86-android-tablets/lenovo.c
@@ -116,6 +116,7 @@ static const struct x86_i2c_client_info lenovo_yb1_x90_i2c_clients[] __initconst
.trigger = ACPI_EDGE_SENSITIVE,
.polarity = ACPI_ACTIVE_LOW,
.con_id = "goodix_ts_irq",
+ .free_gpio = true,
},
}, {
/* Wacom Digitizer in keyboard half */
diff --git a/drivers/platform/x86/x86-android-tablets/other.c b/drivers/platform/x86/x86-android-tablets/other.c
index bc6bbf7ec6ea..278402dcb808 100644
--- a/drivers/platform/x86/x86-android-tablets/other.c
+++ b/drivers/platform/x86/x86-android-tablets/other.c
@@ -68,7 +68,7 @@ static const struct x86_i2c_client_info acer_b1_750_i2c_clients[] __initconst =
},
};
-static struct gpiod_lookup_table acer_b1_750_goodix_gpios = {
+static struct gpiod_lookup_table acer_b1_750_nvt_ts_gpios = {
.dev_id = "i2c-NVT-ts",
.table = {
GPIO_LOOKUP("INT33FC:01", 26, "reset", GPIO_ACTIVE_LOW),
@@ -77,7 +77,7 @@ static struct gpiod_lookup_table acer_b1_750_goodix_gpios = {
};
static struct gpiod_lookup_table * const acer_b1_750_gpios[] = {
- &acer_b1_750_goodix_gpios,
+ &acer_b1_750_nvt_ts_gpios,
&int3496_reference_gpios,
NULL
};
diff --git a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h
index 49fed9410adb..468993edfeee 100644
--- a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h
+++ b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h
@@ -39,6 +39,7 @@ struct x86_acpi_irq_data {
int index;
int trigger; /* ACPI_EDGE_SENSITIVE / ACPI_LEVEL_SENSITIVE */
int polarity; /* ACPI_ACTIVE_HIGH / ACPI_ACTIVE_LOW / ACPI_ACTIVE_BOTH */
+ bool free_gpio; /* Release GPIO after getting IRQ (for TYPE_GPIOINT) */
const char *con_id;
};
diff --git a/drivers/pmdomain/arm/scmi_perf_domain.c b/drivers/pmdomain/arm/scmi_perf_domain.c
index 709bbc448fad..d7ef46ccd9b8 100644
--- a/drivers/pmdomain/arm/scmi_perf_domain.c
+++ b/drivers/pmdomain/arm/scmi_perf_domain.c
@@ -159,6 +159,9 @@ static void scmi_perf_domain_remove(struct scmi_device *sdev)
struct genpd_onecell_data *scmi_pd_data = dev_get_drvdata(dev);
int i;
+ if (!scmi_pd_data)
+ return;
+
of_genpd_del_provider(dev->of_node);
for (i = 0; i < scmi_pd_data->num_domains; i++)
diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c
index a1f6cba3ae6c..18e232b5ed53 100644
--- a/drivers/pmdomain/core.c
+++ b/drivers/pmdomain/core.c
@@ -1109,7 +1109,7 @@ static int __init genpd_power_off_unused(void)
return 0;
}
-late_initcall(genpd_power_off_unused);
+late_initcall_sync(genpd_power_off_unused);
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c
index e26dc17d07ad..e274e3315fe7 100644
--- a/drivers/pmdomain/mediatek/mtk-pm-domains.c
+++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c
@@ -561,6 +561,11 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren
goto err_put_node;
}
+ /* recursive call to add all subdomains */
+ ret = scpsys_add_subdomain(scpsys, child);
+ if (ret)
+ goto err_put_node;
+
ret = pm_genpd_add_subdomain(parent_pd, child_pd);
if (ret) {
dev_err(scpsys->dev, "failed to add %s subdomain to parent %s\n",
@@ -570,11 +575,6 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren
dev_dbg(scpsys->dev, "%s add subdomain: %s\n", parent_pd->name,
child_pd->name);
}
-
- /* recursive call to add all subdomains */
- ret = scpsys_add_subdomain(scpsys, child);
- if (ret)
- goto err_put_node;
}
return 0;
@@ -588,9 +588,6 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd)
{
int ret;
- if (scpsys_domain_is_on(pd))
- scpsys_power_off(&pd->genpd);
-
/*
* We're in the error cleanup already, so we only complain,
* but won't emit another error on top of the original one.
@@ -600,6 +597,8 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd)
dev_err(pd->scpsys->dev,
"failed to remove domain '%s' : %d - state may be inconsistent\n",
pd->genpd.name, ret);
+ if (scpsys_domain_is_on(pd))
+ scpsys_power_off(&pd->genpd);
clk_bulk_put(pd->num_clks, pd->clks);
clk_bulk_put(pd->num_subsys_clks, pd->subsys_clks);
diff --git a/drivers/pmdomain/qcom/rpmhpd.c b/drivers/pmdomain/qcom/rpmhpd.c
index 3078896b1300..de9121ef4216 100644
--- a/drivers/pmdomain/qcom/rpmhpd.c
+++ b/drivers/pmdomain/qcom/rpmhpd.c
@@ -217,7 +217,6 @@ static struct rpmhpd *sa8540p_rpmhpds[] = {
[SC8280XP_CX] = &cx,
[SC8280XP_CX_AO] = &cx_ao,
[SC8280XP_EBI] = &ebi,
- [SC8280XP_GFX] = &gfx,
[SC8280XP_LCX] = &lcx,
[SC8280XP_LMX] = &lmx,
[SC8280XP_MMCX] = &mmcx,
@@ -692,6 +691,7 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner)
unsigned int active_corner, sleep_corner;
unsigned int this_active_corner = 0, this_sleep_corner = 0;
unsigned int peer_active_corner = 0, peer_sleep_corner = 0;
+ unsigned int peer_enabled_corner;
if (pd->state_synced) {
to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner);
@@ -701,9 +701,11 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner)
this_sleep_corner = pd->level_count - 1;
}
- if (peer && peer->enabled)
- to_active_sleep(peer, peer->corner, &peer_active_corner,
+ if (peer && peer->enabled) {
+ peer_enabled_corner = max(peer->corner, peer->enable_corner);
+ to_active_sleep(peer, peer_enabled_corner, &peer_active_corner,
&peer_sleep_corner);
+ }
active_corner = max(this_active_corner, peer_active_corner);
diff --git a/drivers/pmdomain/renesas/r8a77980-sysc.c b/drivers/pmdomain/renesas/r8a77980-sysc.c
index 39ca84a67daa..621e411fc999 100644
--- a/drivers/pmdomain/renesas/r8a77980-sysc.c
+++ b/drivers/pmdomain/renesas/r8a77980-sysc.c
@@ -25,7 +25,8 @@ static const struct rcar_sysc_area r8a77980_areas[] __initconst = {
PD_CPU_NOCR },
{ "ca53-cpu3", 0x200, 3, R8A77980_PD_CA53_CPU3, R8A77980_PD_CA53_SCU,
PD_CPU_NOCR },
- { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON },
+ { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON,
+ PD_CPU_NOCR },
{ "a3ir", 0x180, 0, R8A77980_PD_A3IR, R8A77980_PD_ALWAYS_ON },
{ "a2ir0", 0x400, 0, R8A77980_PD_A2IR0, R8A77980_PD_A3IR },
{ "a2ir1", 0x400, 1, R8A77980_PD_A2IR1, R8A77980_PD_A3IR },
diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index f21cb05815ec..3e31375491d5 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -978,6 +978,7 @@ config CHARGER_QCOM_SMB2
config FUEL_GAUGE_MM8013
tristate "Mitsumi MM8013 fuel gauge driver"
depends on I2C
+ select REGMAP_I2C
help
Say Y here to enable the Mitsumi MM8013 fuel gauge driver.
It enables the monitoring of many battery parameters, including
diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
index 3a1798b0c1a7..9910c600743e 100644
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client)
{
struct bq27xxx_device_info *di = i2c_get_clientdata(client);
- free_irq(client->irq, di);
+ if (client->irq)
+ free_irq(client->irq, di);
+
bq27xxx_battery_teardown(di);
mutex_lock(&battery_mutex);
diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c
index a12e2a66d516..ec163d1bcd18 100644
--- a/drivers/power/supply/qcom_battmgr.c
+++ b/drivers/power/supply/qcom_battmgr.c
@@ -282,7 +282,6 @@ struct qcom_battmgr_wireless {
struct qcom_battmgr {
struct device *dev;
- struct auxiliary_device *adev;
struct pmic_glink_client *client;
enum qcom_battmgr_variant variant;
@@ -1294,69 +1293,11 @@ static void qcom_battmgr_enable_worker(struct work_struct *work)
dev_err(battmgr->dev, "failed to request power notifications\n");
}
-static char *qcom_battmgr_battery[] = { "battery" };
-
-static void qcom_battmgr_register_psy(struct qcom_battmgr *battmgr)
-{
- struct power_supply_config psy_cfg_supply = {};
- struct auxiliary_device *adev = battmgr->adev;
- struct power_supply_config psy_cfg = {};
- struct device *dev = &adev->dev;
-
- psy_cfg.drv_data = battmgr;
- psy_cfg.of_node = adev->dev.of_node;
-
- psy_cfg_supply.drv_data = battmgr;
- psy_cfg_supply.of_node = adev->dev.of_node;
- psy_cfg_supply.supplied_to = qcom_battmgr_battery;
- psy_cfg_supply.num_supplicants = 1;
-
- if (battmgr->variant == QCOM_BATTMGR_SC8280XP) {
- battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg);
- if (IS_ERR(battmgr->bat_psy))
- dev_err(dev, "failed to register battery power supply (%ld)\n",
- PTR_ERR(battmgr->bat_psy));
-
- battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply);
- if (IS_ERR(battmgr->ac_psy))
- dev_err(dev, "failed to register AC power supply (%ld)\n",
- PTR_ERR(battmgr->ac_psy));
-
- battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply);
- if (IS_ERR(battmgr->usb_psy))
- dev_err(dev, "failed to register USB power supply (%ld)\n",
- PTR_ERR(battmgr->usb_psy));
-
- battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply);
- if (IS_ERR(battmgr->wls_psy))
- dev_err(dev, "failed to register wireless charing power supply (%ld)\n",
- PTR_ERR(battmgr->wls_psy));
- } else {
- battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg);
- if (IS_ERR(battmgr->bat_psy))
- dev_err(dev, "failed to register battery power supply (%ld)\n",
- PTR_ERR(battmgr->bat_psy));
-
- battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply);
- if (IS_ERR(battmgr->usb_psy))
- dev_err(dev, "failed to register USB power supply (%ld)\n",
- PTR_ERR(battmgr->usb_psy));
-
- battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply);
- if (IS_ERR(battmgr->wls_psy))
- dev_err(dev, "failed to register wireless charing power supply (%ld)\n",
- PTR_ERR(battmgr->wls_psy));
- }
-}
-
static void qcom_battmgr_pdr_notify(void *priv, int state)
{
struct qcom_battmgr *battmgr = priv;
if (state == SERVREG_SERVICE_STATE_UP) {
- if (!battmgr->bat_psy)
- qcom_battmgr_register_psy(battmgr);
-
battmgr->service_up = true;
schedule_work(&battmgr->enable_work);
} else {
@@ -1371,9 +1312,13 @@ static const struct of_device_id qcom_battmgr_of_variants[] = {
{}
};
+static char *qcom_battmgr_battery[] = { "battery" };
+
static int qcom_battmgr_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
+ struct power_supply_config psy_cfg_supply = {};
+ struct power_supply_config psy_cfg = {};
const struct of_device_id *match;
struct qcom_battmgr *battmgr;
struct device *dev = &adev->dev;
@@ -1383,7 +1328,14 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev,
return -ENOMEM;
battmgr->dev = dev;
- battmgr->adev = adev;
+
+ psy_cfg.drv_data = battmgr;
+ psy_cfg.of_node = adev->dev.of_node;
+
+ psy_cfg_supply.drv_data = battmgr;
+ psy_cfg_supply.of_node = adev->dev.of_node;
+ psy_cfg_supply.supplied_to = qcom_battmgr_battery;
+ psy_cfg_supply.num_supplicants = 1;
INIT_WORK(&battmgr->enable_work, qcom_battmgr_enable_worker);
mutex_init(&battmgr->lock);
@@ -1395,6 +1347,43 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev,
else
battmgr->variant = QCOM_BATTMGR_SM8350;
+ if (battmgr->variant == QCOM_BATTMGR_SC8280XP) {
+ battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg);
+ if (IS_ERR(battmgr->bat_psy))
+ return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy),
+ "failed to register battery power supply\n");
+
+ battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply);
+ if (IS_ERR(battmgr->ac_psy))
+ return dev_err_probe(dev, PTR_ERR(battmgr->ac_psy),
+ "failed to register AC power supply\n");
+
+ battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply);
+ if (IS_ERR(battmgr->usb_psy))
+ return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy),
+ "failed to register USB power supply\n");
+
+ battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply);
+ if (IS_ERR(battmgr->wls_psy))
+ return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy),
+ "failed to register wireless charing power supply\n");
+ } else {
+ battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg);
+ if (IS_ERR(battmgr->bat_psy))
+ return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy),
+ "failed to register battery power supply\n");
+
+ battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply);
+ if (IS_ERR(battmgr->usb_psy))
+ return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy),
+ "failed to register USB power supply\n");
+
+ battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply);
+ if (IS_ERR(battmgr->wls_psy))
+ return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy),
+ "failed to register wireless charing power supply\n");
+ }
+
battmgr->client = devm_pmic_glink_register_client(dev,
PMIC_GLINK_OWNER_BATTMGR,
qcom_battmgr_callback,
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 2feed036c1cd..00c861899a47 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -1564,7 +1564,7 @@ struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id
if (id_is_cpu) {
rp->id = topology_logical_die_id(id);
rp->lead_cpu = id;
- if (topology_max_die_per_package() > 1)
+ if (topology_max_dies_per_package() > 1)
snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d",
topology_physical_package_id(id), topology_die_id(id));
else
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index 5dd5f188e14f..604541dcb320 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -155,6 +155,18 @@ config PTP_1588_CLOCK_IDTCM
To compile this driver as a module, choose M here: the module
will be called ptp_clockmatrix.
+config PTP_1588_CLOCK_FC3W
+ tristate "RENESAS FemtoClock3 Wireless as PTP clock"
+ depends on PTP_1588_CLOCK && I2C
+ default n
+ help
+ This driver adds support for using Renesas FemtoClock3 Wireless
+ as a PTP clock. This clock is only useful if your time stamping
+ MAC is connected to the RENESAS chip.
+
+ To compile this driver as a module, choose M here: the module
+ will be called ptp_fc3.
+
config PTP_1588_CLOCK_MOCK
tristate "Mock-up PTP clock"
depends on PTP_1588_CLOCK
diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile
index dea0cebd2303..68bf02078053 100644
--- a/drivers/ptp/Makefile
+++ b/drivers/ptp/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_PTP_1588_CLOCK_QORIQ) += ptp-qoriq.o
ptp-qoriq-y += ptp_qoriq.o
ptp-qoriq-$(CONFIG_DEBUG_FS) += ptp_qoriq_debugfs.o
obj-$(CONFIG_PTP_1588_CLOCK_IDTCM) += ptp_clockmatrix.o
+obj-$(CONFIG_PTP_1588_CLOCK_FC3W) += ptp_fc3.o
obj-$(CONFIG_PTP_1588_CLOCK_IDT82P33) += ptp_idt82p33.o
obj-$(CONFIG_PTP_1588_CLOCK_MOCK) += ptp_mock.o
obj-$(CONFIG_PTP_1588_CLOCK_VMW) += ptp_vmw.o
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 15b804ba4868..c56cd0f63909 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -4,7 +4,6 @@
*
* Copyright (C) 2010 OMICRON electronics GmbH
*/
-#include <linux/idr.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/init.h>
@@ -16,6 +15,7 @@
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <linux/debugfs.h>
+#include <linux/xarray.h>
#include <uapi/linux/sched/types.h>
#include "ptp_private.h"
@@ -25,13 +25,16 @@
#define PTP_PPS_EVENT PPS_CAPTUREASSERT
#define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC)
-struct class *ptp_class;
+const struct class ptp_class = {
+ .name = "ptp",
+ .dev_groups = ptp_groups
+};
/* private globals */
static dev_t ptp_devt;
-static DEFINE_IDA(ptp_clocks_map);
+static DEFINE_XARRAY_ALLOC(ptp_clocks_map);
/* time stamp event queue operations */
@@ -44,18 +47,31 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
struct ptp_clock_event *src)
{
struct ptp_extts_event *dst;
+ struct timespec64 offset_ts;
unsigned long flags;
s64 seconds;
u32 remainder;
- seconds = div_u64_rem(src->timestamp, 1000000000, &remainder);
+ if (src->type == PTP_CLOCK_EXTTS) {
+ seconds = div_u64_rem(src->timestamp, 1000000000, &remainder);
+ } else if (src->type == PTP_CLOCK_EXTOFF) {
+ offset_ts = ns_to_timespec64(src->offset);
+ seconds = offset_ts.tv_sec;
+ remainder = offset_ts.tv_nsec;
+ } else {
+ WARN(1, "%s: unknown type %d\n", __func__, src->type);
+ return;
+ }
spin_lock_irqsave(&queue->lock, flags);
dst = &queue->buf[queue->tail];
dst->index = src->index;
+ dst->flags = PTP_EXTTS_EVENT_VALID;
dst->t.sec = seconds;
dst->t.nsec = remainder;
+ if (src->type == PTP_CLOCK_EXTOFF)
+ dst->flags |= PTP_EXT_OFFSET;
/* Both WRITE_ONCE() are paired with READ_ONCE() in queue_cnt() */
if (!queue_free(queue))
@@ -188,7 +204,7 @@ static void ptp_clock_release(struct device *dev)
bitmap_free(tsevq->mask);
kfree(tsevq);
debugfs_remove(ptp->debugfs_root);
- ida_free(&ptp_clocks_map, ptp->index);
+ xa_erase(&ptp_clocks_map, ptp->index);
kfree(ptp);
}
@@ -220,7 +236,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
{
struct ptp_clock *ptp;
struct timestamp_event_queue *queue = NULL;
- int err = 0, index, major = MAJOR(ptp_devt);
+ int err, index, major = MAJOR(ptp_devt);
char debugfsname[16];
size_t size;
@@ -228,16 +244,16 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
return ERR_PTR(-EINVAL);
/* Initialize a clock structure. */
- err = -ENOMEM;
ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL);
- if (ptp == NULL)
+ if (!ptp) {
+ err = -ENOMEM;
goto no_memory;
+ }
- index = ida_alloc_max(&ptp_clocks_map, MINORMASK, GFP_KERNEL);
- if (index < 0) {
- err = index;
+ err = xa_alloc(&ptp_clocks_map, &index, ptp, xa_limit_31b,
+ GFP_KERNEL);
+ if (err)
goto no_slot;
- }
ptp->clock.ops = ptp_clock_ops;
ptp->info = info;
@@ -245,13 +261,17 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
ptp->index = index;
INIT_LIST_HEAD(&ptp->tsevqs);
queue = kzalloc(sizeof(*queue), GFP_KERNEL);
- if (!queue)
+ if (!queue) {
+ err = -ENOMEM;
goto no_memory_queue;
+ }
list_add_tail(&queue->qlist, &ptp->tsevqs);
spin_lock_init(&ptp->tsevqs_lock);
queue->mask = bitmap_alloc(PTP_MAX_CHANNELS, GFP_KERNEL);
- if (!queue->mask)
+ if (!queue->mask) {
+ err = -ENOMEM;
goto no_memory_bitmap;
+ }
bitmap_set(queue->mask, 0, PTP_MAX_CHANNELS);
spin_lock_init(&queue->lock);
mutex_init(&ptp->pincfg_mux);
@@ -322,7 +342,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
/* Initialize a new device of our class in our clock structure. */
device_initialize(&ptp->dev);
ptp->dev.devt = ptp->devid;
- ptp->dev.class = ptp_class;
+ ptp->dev.class = &ptp_class;
ptp->dev.parent = parent;
ptp->dev.groups = ptp->pin_attr_groups;
ptp->dev.release = ptp_clock_release;
@@ -365,7 +385,7 @@ no_memory_bitmap:
list_del(&queue->qlist);
kfree(queue);
no_memory_queue:
- ida_free(&ptp_clocks_map, index);
+ xa_erase(&ptp_clocks_map, index);
no_slot:
kfree(ptp);
no_memory:
@@ -417,6 +437,7 @@ void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event)
break;
case PTP_CLOCK_EXTTS:
+ case PTP_CLOCK_EXTOFF:
/* Enqueue timestamp on selected queues */
spin_lock_irqsave(&ptp->tsevqs_lock, flags);
list_for_each_entry(tsevq, &ptp->tsevqs, qlist) {
@@ -495,19 +516,19 @@ EXPORT_SYMBOL(ptp_cancel_worker_sync);
static void __exit ptp_exit(void)
{
- class_destroy(ptp_class);
+ class_unregister(&ptp_class);
unregister_chrdev_region(ptp_devt, MINORMASK + 1);
- ida_destroy(&ptp_clocks_map);
+ xa_destroy(&ptp_clocks_map);
}
static int __init ptp_init(void)
{
int err;
- ptp_class = class_create("ptp");
- if (IS_ERR(ptp_class)) {
+ err = class_register(&ptp_class);
+ if (err) {
pr_err("ptp: failed to allocate class\n");
- return PTR_ERR(ptp_class);
+ return err;
}
err = alloc_chrdev_region(&ptp_devt, 0, MINORMASK + 1, "ptp");
@@ -516,12 +537,11 @@ static int __init ptp_init(void)
goto no_region;
}
- ptp_class->dev_groups = ptp_groups;
pr_info("PTP clock support registered\n");
return 0;
no_region:
- class_destroy(ptp_class);
+ class_unregister(&ptp_class);
return err;
}
diff --git a/drivers/ptp/ptp_fc3.c b/drivers/ptp/ptp_fc3.c
new file mode 100644
index 000000000000..6ef982862e27
--- /dev/null
+++ b/drivers/ptp/ptp_fc3.c
@@ -0,0 +1,1014 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PTP hardware clock driver for the FemtoClock3 family of timing and
+ * synchronization devices.
+ *
+ * Copyright (C) 2023 Integrated Device Technology, Inc., a Renesas Company.
+ */
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/timekeeping.h>
+#include <linux/string.h>
+#include <linux/of.h>
+#include <linux/bitfield.h>
+#include <linux/mfd/rsmu.h>
+#include <linux/mfd/idtRC38xxx_reg.h>
+#include <asm/unaligned.h>
+
+#include "ptp_private.h"
+#include "ptp_fc3.h"
+
+MODULE_DESCRIPTION("Driver for IDT FemtoClock3(TM) family");
+MODULE_AUTHOR("IDT support-1588 <IDT-support-1588@lm.renesas.com>");
+MODULE_VERSION("1.0");
+MODULE_LICENSE("GPL");
+
+/*
+ * The name of the firmware file to be loaded
+ * over-rides any automatic selection
+ */
+static char *firmware;
+module_param(firmware, charp, 0);
+
+static s64 ns2counters(struct idtfc3 *idtfc3, s64 nsec, u32 *sub_ns)
+{
+ s64 sync;
+ s32 rem;
+
+ if (likely(nsec >= 0)) {
+ sync = div_u64_rem(nsec, idtfc3->ns_per_sync, &rem);
+ *sub_ns = rem;
+ } else {
+ sync = -div_u64_rem(-nsec - 1, idtfc3->ns_per_sync, &rem) - 1;
+ *sub_ns = idtfc3->ns_per_sync - rem - 1;
+ }
+
+ return sync * idtfc3->ns_per_sync;
+}
+
+static s64 tdc_meas2offset(struct idtfc3 *idtfc3, u64 meas_read)
+{
+ s64 coarse, fine;
+
+ fine = sign_extend64(FIELD_GET(FINE_MEAS_MASK, meas_read), 12);
+ coarse = sign_extend64(FIELD_GET(COARSE_MEAS_MASK, meas_read), (39 - 13));
+
+ fine = div64_s64(fine * NSEC_PER_SEC, idtfc3->tdc_apll_freq * 62LL);
+ coarse = div64_s64(coarse * NSEC_PER_SEC, idtfc3->time_ref_freq);
+
+ return coarse + fine;
+}
+
+static s64 tdc_offset2phase(struct idtfc3 *idtfc3, s64 offset_ns)
+{
+ if (offset_ns > idtfc3->ns_per_sync / 2)
+ offset_ns -= idtfc3->ns_per_sync;
+
+ return offset_ns * idtfc3->tdc_offset_sign;
+}
+
+static int idtfc3_set_lpf_mode(struct idtfc3 *idtfc3, u8 mode)
+{
+ int err;
+
+ if (mode >= LPF_INVALID)
+ return -EINVAL;
+
+ if (idtfc3->lpf_mode == mode)
+ return 0;
+
+ err = regmap_bulk_write(idtfc3->regmap, LPF_MODE_CNFG, &mode, sizeof(mode));
+ if (err)
+ return err;
+
+ idtfc3->lpf_mode = mode;
+
+ return 0;
+}
+
+static int idtfc3_enable_lpf(struct idtfc3 *idtfc3, bool enable)
+{
+ u8 val;
+ int err;
+
+ err = regmap_bulk_read(idtfc3->regmap, LPF_CTRL, &val, sizeof(val));
+ if (err)
+ return err;
+
+ if (enable == true)
+ val |= LPF_EN;
+ else
+ val &= ~LPF_EN;
+
+ return regmap_bulk_write(idtfc3->regmap, LPF_CTRL, &val, sizeof(val));
+}
+
+static int idtfc3_get_time_ref_freq(struct idtfc3 *idtfc3)
+{
+ int err;
+ u8 buf[4];
+ u8 time_ref_div;
+ u8 time_clk_div;
+
+ err = regmap_bulk_read(idtfc3->regmap, TIME_CLOCK_MEAS_DIV_CNFG, buf, sizeof(buf));
+ if (err)
+ return err;
+ time_ref_div = FIELD_GET(TIME_REF_DIV_MASK, get_unaligned_le32(buf)) + 1;
+
+ err = regmap_bulk_read(idtfc3->regmap, TIME_CLOCK_COUNT, buf, 1);
+ if (err)
+ return err;
+ time_clk_div = (buf[0] & TIME_CLOCK_COUNT_MASK) + 1;
+ idtfc3->time_ref_freq = idtfc3->hw_param.time_clk_freq *
+ time_clk_div / time_ref_div;
+
+ return 0;
+}
+
+static int idtfc3_get_tdc_offset_sign(struct idtfc3 *idtfc3)
+{
+ int err;
+ u8 buf[4];
+ u32 val;
+ u8 sig1, sig2;
+
+ err = regmap_bulk_read(idtfc3->regmap, TIME_CLOCK_TDC_FANOUT_CNFG, buf, sizeof(buf));
+ if (err)
+ return err;
+
+ val = get_unaligned_le32(buf);
+ if ((val & TIME_SYNC_TO_TDC_EN) != TIME_SYNC_TO_TDC_EN) {
+ dev_err(idtfc3->dev, "TIME_SYNC_TO_TDC_EN is off !!!");
+ return -EINVAL;
+ }
+
+ sig1 = FIELD_GET(SIG1_MUX_SEL_MASK, val);
+ sig2 = FIELD_GET(SIG2_MUX_SEL_MASK, val);
+
+ if ((sig1 == sig2) || ((sig1 != TIME_SYNC) && (sig2 != TIME_SYNC))) {
+ dev_err(idtfc3->dev, "Invalid tdc_mux_sel sig1=%d sig2=%d", sig1, sig2);
+ return -EINVAL;
+ } else if (sig1 == TIME_SYNC) {
+ idtfc3->tdc_offset_sign = 1;
+ } else if (sig2 == TIME_SYNC) {
+ idtfc3->tdc_offset_sign = -1;
+ }
+
+ return 0;
+}
+
+static int idtfc3_lpf_bw(struct idtfc3 *idtfc3, u8 shift, u8 mult)
+{
+ u8 val = FIELD_PREP(LPF_BW_SHIFT, shift) | FIELD_PREP(LPF_BW_MULT, mult);
+
+ return regmap_bulk_write(idtfc3->regmap, LPF_BW_CNFG, &val, sizeof(val));
+}
+
+static int idtfc3_enable_tdc(struct idtfc3 *idtfc3, bool enable, u8 meas_mode)
+{
+ int err;
+ u8 val = 0;
+
+ /* Disable TDC first */
+ err = regmap_bulk_write(idtfc3->regmap, TIME_CLOCK_MEAS_CTRL, &val, sizeof(val));
+ if (err)
+ return err;
+
+ if (enable == false)
+ return idtfc3_lpf_bw(idtfc3, LPF_BW_SHIFT_DEFAULT, LPF_BW_MULT_DEFAULT);
+
+ if (meas_mode >= MEAS_MODE_INVALID)
+ return -EINVAL;
+
+ /* Change TDC meas mode */
+ err = regmap_bulk_write(idtfc3->regmap, TIME_CLOCK_MEAS_CNFG,
+ &meas_mode, sizeof(meas_mode));
+ if (err)
+ return err;
+
+ /* Enable TDC */
+ val = TDC_MEAS_EN;
+ if (meas_mode == CONTINUOUS)
+ val |= TDC_MEAS_START;
+ err = regmap_bulk_write(idtfc3->regmap, TIME_CLOCK_MEAS_CTRL, &val, sizeof(val));
+ if (err)
+ return err;
+
+ return idtfc3_lpf_bw(idtfc3, LPF_BW_SHIFT_1PPS, LPF_BW_MULT_DEFAULT);
+}
+
+static bool get_tdc_meas(struct idtfc3 *idtfc3, s64 *offset_ns)
+{
+ bool valid = false;
+ u8 buf[9];
+ u8 val;
+ int err;
+
+ while (true) {
+ err = regmap_bulk_read(idtfc3->regmap, TDC_FIFO_STS,
+ &val, sizeof(val));
+ if (err)
+ return false;
+
+ if (val & FIFO_EMPTY)
+ break;
+
+ err = regmap_bulk_read(idtfc3->regmap, TDC_FIFO_READ_REQ,
+ &buf, sizeof(buf));
+ if (err)
+ return false;
+
+ valid = true;
+ }
+
+ if (valid)
+ *offset_ns = tdc_meas2offset(idtfc3, get_unaligned_le64(&buf[1]));
+
+ return valid;
+}
+
+static int check_tdc_fifo_overrun(struct idtfc3 *idtfc3)
+{
+ u8 val;
+ int err;
+
+ /* Check if FIFO is overrun */
+ err = regmap_bulk_read(idtfc3->regmap, TDC_FIFO_STS, &val, sizeof(val));
+ if (err)
+ return err;
+
+ if (!(val & FIFO_FULL))
+ return 0;
+
+ dev_warn(idtfc3->dev, "TDC FIFO overrun !!!");
+
+ err = idtfc3_enable_tdc(idtfc3, true, CONTINUOUS);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int get_tdc_meas_continuous(struct idtfc3 *idtfc3)
+{
+ int err;
+ s64 offset_ns;
+ struct ptp_clock_event event;
+
+ err = check_tdc_fifo_overrun(idtfc3);
+ if (err)
+ return err;
+
+ if (get_tdc_meas(idtfc3, &offset_ns) && offset_ns >= 0) {
+ event.index = 0;
+ event.offset = tdc_offset2phase(idtfc3, offset_ns);
+ event.type = PTP_CLOCK_EXTOFF;
+ ptp_clock_event(idtfc3->ptp_clock, &event);
+ }
+
+ return 0;
+}
+
+static int idtfc3_read_subcounter(struct idtfc3 *idtfc3)
+{
+ u8 buf[5] = {0};
+ int err;
+
+ err = regmap_bulk_read(idtfc3->regmap, TOD_COUNTER_READ_REQ,
+ &buf, sizeof(buf));
+ if (err)
+ return err;
+
+ /* sync_counter_value is [31:82] and sub_sync_counter_value is [0:30] */
+ return get_unaligned_le32(&buf[1]) & SUB_SYNC_COUNTER_MASK;
+}
+
+static int idtfc3_tod_update_is_done(struct idtfc3 *idtfc3)
+{
+ int err;
+ u8 req;
+
+ err = read_poll_timeout_atomic(regmap_bulk_read, err, !req, USEC_PER_MSEC,
+ idtfc3->tc_write_timeout, true, idtfc3->regmap,
+ TOD_SYNC_LOAD_REQ_CTRL, &req, 1);
+ if (err)
+ dev_err(idtfc3->dev, "TOD counter write timeout !!!");
+
+ return err;
+}
+
+static int idtfc3_write_subcounter(struct idtfc3 *idtfc3, u32 counter)
+{
+ u8 buf[18] = {0};
+ int err;
+
+ /* sync_counter_value is [31:82] and sub_sync_counter_value is [0:30] */
+ put_unaligned_le32(counter & SUB_SYNC_COUNTER_MASK, &buf[0]);
+
+ buf[16] = SUB_SYNC_LOAD_ENABLE | SYNC_LOAD_ENABLE;
+ buf[17] = SYNC_LOAD_REQ;
+
+ err = regmap_bulk_write(idtfc3->regmap, TOD_SYNC_LOAD_VAL_CTRL,
+ &buf, sizeof(buf));
+ if (err)
+ return err;
+
+ return idtfc3_tod_update_is_done(idtfc3);
+}
+
+static int idtfc3_timecounter_update(struct idtfc3 *idtfc3, u32 counter, s64 ns)
+{
+ int err;
+
+ err = idtfc3_write_subcounter(idtfc3, counter);
+ if (err)
+ return err;
+
+ /* Update time counter */
+ idtfc3->ns = ns;
+ idtfc3->last_counter = counter;
+
+ return 0;
+}
+
+static int idtfc3_timecounter_read(struct idtfc3 *idtfc3)
+{
+ int now, delta;
+
+ now = idtfc3_read_subcounter(idtfc3);
+ if (now < 0)
+ return now;
+
+ /* calculate the delta since the last idtfc3_timecounter_read(): */
+ if (now >= idtfc3->last_counter)
+ delta = now - idtfc3->last_counter;
+ else
+ delta = idtfc3->sub_sync_count - idtfc3->last_counter + now;
+
+ /* Update time counter */
+ idtfc3->ns += delta * idtfc3->ns_per_counter;
+ idtfc3->last_counter = now;
+
+ return 0;
+}
+
+static int _idtfc3_gettime(struct idtfc3 *idtfc3, struct timespec64 *ts)
+{
+ int err;
+
+ err = idtfc3_timecounter_read(idtfc3);
+ if (err)
+ return err;
+
+ *ts = ns_to_timespec64(idtfc3->ns);
+
+ return 0;
+}
+
+static int idtfc3_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+ struct idtfc3 *idtfc3 = container_of(ptp, struct idtfc3, caps);
+ int err;
+
+ mutex_lock(idtfc3->lock);
+ err = _idtfc3_gettime(idtfc3, ts);
+ mutex_unlock(idtfc3->lock);
+
+ return err;
+}
+
+static int _idtfc3_settime(struct idtfc3 *idtfc3, const struct timespec64 *ts)
+{
+ s64 offset_ns, now_ns;
+ u32 counter, sub_ns;
+ int now;
+
+ if (timespec64_valid(ts) == false) {
+ dev_err(idtfc3->dev, "%s: invalid timespec", __func__);
+ return -EINVAL;
+ }
+
+ now = idtfc3_read_subcounter(idtfc3);
+ if (now < 0)
+ return now;
+
+ offset_ns = (idtfc3->sub_sync_count - now) * idtfc3->ns_per_counter;
+ now_ns = timespec64_to_ns(ts);
+ (void)ns2counters(idtfc3, offset_ns + now_ns, &sub_ns);
+
+ counter = sub_ns / idtfc3->ns_per_counter;
+ return idtfc3_timecounter_update(idtfc3, counter, now_ns);
+}
+
+static int idtfc3_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
+{
+ struct idtfc3 *idtfc3 = container_of(ptp, struct idtfc3, caps);
+ int err;
+
+ mutex_lock(idtfc3->lock);
+ err = _idtfc3_settime(idtfc3, ts);
+ mutex_unlock(idtfc3->lock);
+
+ return err;
+}
+
+static int _idtfc3_adjtime(struct idtfc3 *idtfc3, s64 delta)
+{
+ /*
+ * The TOD counter can be synchronously loaded with any value,
+ * to be loaded on the next Time Sync pulse
+ */
+ s64 sync_ns;
+ u32 sub_ns;
+ u32 counter;
+
+ if (idtfc3->ns + delta < 0) {
+ dev_err(idtfc3->dev, "%lld ns adj is too large", delta);
+ return -EINVAL;
+ }
+
+ sync_ns = ns2counters(idtfc3, delta + idtfc3->ns_per_sync, &sub_ns);
+
+ counter = sub_ns / idtfc3->ns_per_counter;
+ return idtfc3_timecounter_update(idtfc3, counter, idtfc3->ns + sync_ns +
+ counter * idtfc3->ns_per_counter);
+}
+
+static int idtfc3_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct idtfc3 *idtfc3 = container_of(ptp, struct idtfc3, caps);
+ int err;
+
+ mutex_lock(idtfc3->lock);
+ err = _idtfc3_adjtime(idtfc3, delta);
+ mutex_unlock(idtfc3->lock);
+
+ return err;
+}
+
+static int _idtfc3_adjphase(struct idtfc3 *idtfc3, s32 delta)
+{
+ u8 buf[8] = {0};
+ int err;
+ s64 pcw;
+
+ err = idtfc3_set_lpf_mode(idtfc3, LPF_WP);
+ if (err)
+ return err;
+
+ /*
+ * Phase Control Word unit is: 10^9 / (TDC_APLL_FREQ * 124)
+ *
+ * delta * TDC_APLL_FREQ * 124
+ * PCW = ---------------------------
+ * 10^9
+ *
+ */
+ pcw = div_s64((s64)delta * idtfc3->tdc_apll_freq * 124, NSEC_PER_SEC);
+
+ put_unaligned_le64(pcw, buf);
+
+ return regmap_bulk_write(idtfc3->regmap, LPF_WR_PHASE_CTRL, buf, sizeof(buf));
+}
+
+static int idtfc3_adjphase(struct ptp_clock_info *ptp, s32 delta)
+{
+ struct idtfc3 *idtfc3 = container_of(ptp, struct idtfc3, caps);
+ int err;
+
+ mutex_lock(idtfc3->lock);
+ err = _idtfc3_adjphase(idtfc3, delta);
+ mutex_unlock(idtfc3->lock);
+
+ return err;
+}
+
+static int _idtfc3_adjfine(struct idtfc3 *idtfc3, long scaled_ppm)
+{
+ u8 buf[8] = {0};
+ int err;
+ s64 fcw;
+
+ err = idtfc3_set_lpf_mode(idtfc3, LPF_WF);
+ if (err)
+ return err;
+
+ /*
+ * Frequency Control Word unit is: 2^-44 * 10^6 ppm
+ *
+ * adjfreq:
+ * ppb * 2^44
+ * FCW = ----------
+ * 10^9
+ *
+ * adjfine:
+ * ppm_16 * 2^28
+ * FCW = -------------
+ * 10^6
+ */
+ fcw = scaled_ppm * BIT(28);
+ fcw = div_s64(fcw, 1000000);
+
+ put_unaligned_le64(fcw, buf);
+
+ return regmap_bulk_write(idtfc3->regmap, LPF_WR_FREQ_CTRL, buf, sizeof(buf));
+}
+
+static int idtfc3_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+ struct idtfc3 *idtfc3 = container_of(ptp, struct idtfc3, caps);
+ int err;
+
+ mutex_lock(idtfc3->lock);
+ err = _idtfc3_adjfine(idtfc3, scaled_ppm);
+ mutex_unlock(idtfc3->lock);
+
+ return err;
+}
+
+static int idtfc3_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq, int on)
+{
+ struct idtfc3 *idtfc3 = container_of(ptp, struct idtfc3, caps);
+ int err = -EOPNOTSUPP;
+
+ mutex_lock(idtfc3->lock);
+ switch (rq->type) {
+ case PTP_CLK_REQ_PEROUT:
+ if (!on)
+ err = 0;
+ /* Only accept a 1-PPS aligned to the second. */
+ else if (rq->perout.start.nsec || rq->perout.period.sec != 1 ||
+ rq->perout.period.nsec)
+ err = -ERANGE;
+ else
+ err = 0;
+ break;
+ case PTP_CLK_REQ_EXTTS:
+ if (on) {
+ /* Only accept requests for external phase offset */
+ if ((rq->extts.flags & PTP_EXT_OFFSET) != (PTP_EXT_OFFSET))
+ err = -EOPNOTSUPP;
+ else
+ err = idtfc3_enable_tdc(idtfc3, true, CONTINUOUS);
+ } else {
+ err = idtfc3_enable_tdc(idtfc3, false, MEAS_MODE_INVALID);
+ }
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(idtfc3->lock);
+
+ if (err)
+ dev_err(idtfc3->dev, "Failed in %s with err %d!", __func__, err);
+
+ return err;
+}
+
+static long idtfc3_aux_work(struct ptp_clock_info *ptp)
+{
+ struct idtfc3 *idtfc3 = container_of(ptp, struct idtfc3, caps);
+ static int tdc_get;
+
+ mutex_lock(idtfc3->lock);
+ tdc_get %= TDC_GET_PERIOD;
+ if ((tdc_get == 0) || (tdc_get == TDC_GET_PERIOD / 2))
+ idtfc3_timecounter_read(idtfc3);
+ get_tdc_meas_continuous(idtfc3);
+ tdc_get++;
+ mutex_unlock(idtfc3->lock);
+
+ return idtfc3->tc_update_period;
+}
+
+static const struct ptp_clock_info idtfc3_caps = {
+ .owner = THIS_MODULE,
+ .max_adj = MAX_FFO_PPB,
+ .n_per_out = 1,
+ .n_ext_ts = 1,
+ .adjphase = &idtfc3_adjphase,
+ .adjfine = &idtfc3_adjfine,
+ .adjtime = &idtfc3_adjtime,
+ .gettime64 = &idtfc3_gettime,
+ .settime64 = &idtfc3_settime,
+ .enable = &idtfc3_enable,
+ .do_aux_work = &idtfc3_aux_work,
+};
+
+static int idtfc3_hw_calibrate(struct idtfc3 *idtfc3)
+{
+ int err = 0;
+ u8 val;
+
+ mdelay(10);
+ /*
+ * Toggle TDC_DAC_RECAL_REQ:
+ * (1) set tdc_en to 1
+ * (2) set tdc_dac_recal_req to 0
+ * (3) set tdc_dac_recal_req to 1
+ */
+ val = TDC_EN;
+ err = regmap_bulk_write(idtfc3->regmap, TDC_CTRL,
+ &val, sizeof(val));
+ if (err)
+ return err;
+ val = TDC_EN | TDC_DAC_RECAL_REQ;
+ err = regmap_bulk_write(idtfc3->regmap, TDC_CTRL,
+ &val, sizeof(val));
+ if (err)
+ return err;
+ mdelay(10);
+
+ /*
+ * Toggle APLL_REINIT:
+ * (1) set apll_reinit to 0
+ * (2) set apll_reinit to 1
+ */
+ val = 0;
+ err = regmap_bulk_write(idtfc3->regmap, SOFT_RESET_CTRL,
+ &val, sizeof(val));
+ if (err)
+ return err;
+ val = APLL_REINIT;
+ err = regmap_bulk_write(idtfc3->regmap, SOFT_RESET_CTRL,
+ &val, sizeof(val));
+ if (err)
+ return err;
+ mdelay(10);
+
+ return err;
+}
+
+static int idtfc3_init_timecounter(struct idtfc3 *idtfc3)
+{
+ int err;
+ u32 period_ms;
+
+ period_ms = idtfc3->sub_sync_count * MSEC_PER_SEC /
+ idtfc3->hw_param.time_clk_freq;
+
+ idtfc3->tc_update_period = msecs_to_jiffies(period_ms / TDC_GET_PERIOD);
+ idtfc3->tc_write_timeout = period_ms * USEC_PER_MSEC;
+
+ err = idtfc3_timecounter_update(idtfc3, 0, 0);
+ if (err)
+ return err;
+
+ err = idtfc3_timecounter_read(idtfc3);
+ if (err)
+ return err;
+
+ ptp_schedule_worker(idtfc3->ptp_clock, idtfc3->tc_update_period);
+
+ return 0;
+}
+
+static int idtfc3_get_tdc_apll_freq(struct idtfc3 *idtfc3)
+{
+ int err;
+ u8 tdc_fb_div_int;
+ u8 tdc_ref_div;
+ struct idtfc3_hw_param *param = &idtfc3->hw_param;
+
+ err = regmap_bulk_read(idtfc3->regmap, TDC_REF_DIV_CNFG,
+ &tdc_ref_div, sizeof(tdc_ref_div));
+ if (err)
+ return err;
+
+ err = regmap_bulk_read(idtfc3->regmap, TDC_FB_DIV_INT_CNFG,
+ &tdc_fb_div_int, sizeof(tdc_fb_div_int));
+ if (err)
+ return err;
+
+ tdc_fb_div_int &= TDC_FB_DIV_INT_MASK;
+ tdc_ref_div &= TDC_REF_DIV_CONFIG_MASK;
+
+ idtfc3->tdc_apll_freq = div_u64(param->xtal_freq * (u64)tdc_fb_div_int,
+ 1 << tdc_ref_div);
+
+ return 0;
+}
+
+static int idtfc3_get_fod(struct idtfc3 *idtfc3)
+{
+ int err;
+ u8 fod;
+
+ err = regmap_bulk_read(idtfc3->regmap, TIME_CLOCK_SRC, &fod, sizeof(fod));
+ if (err)
+ return err;
+
+ switch (fod) {
+ case 0:
+ idtfc3->fod_n = FOD_0;
+ break;
+ case 1:
+ idtfc3->fod_n = FOD_1;
+ break;
+ case 2:
+ idtfc3->fod_n = FOD_2;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int idtfc3_get_sync_count(struct idtfc3 *idtfc3)
+{
+ int err;
+ u8 buf[4];
+
+ err = regmap_bulk_read(idtfc3->regmap, SUB_SYNC_GEN_CNFG, buf, sizeof(buf));
+ if (err)
+ return err;
+
+ idtfc3->sub_sync_count = (get_unaligned_le32(buf) & SUB_SYNC_COUNTER_MASK) + 1;
+ idtfc3->ns_per_counter = NSEC_PER_SEC / idtfc3->hw_param.time_clk_freq;
+ idtfc3->ns_per_sync = idtfc3->sub_sync_count * idtfc3->ns_per_counter;
+
+ return 0;
+}
+
+static int idtfc3_setup_hw_param(struct idtfc3 *idtfc3)
+{
+ int err;
+
+ err = idtfc3_get_fod(idtfc3);
+ if (err)
+ return err;
+
+ err = idtfc3_get_sync_count(idtfc3);
+ if (err)
+ return err;
+
+ err = idtfc3_get_time_ref_freq(idtfc3);
+ if (err)
+ return err;
+
+ return idtfc3_get_tdc_apll_freq(idtfc3);
+}
+
+static int idtfc3_configure_hw(struct idtfc3 *idtfc3)
+{
+ int err = 0;
+
+ err = idtfc3_hw_calibrate(idtfc3);
+ if (err)
+ return err;
+
+ err = idtfc3_enable_lpf(idtfc3, true);
+ if (err)
+ return err;
+
+ err = idtfc3_enable_tdc(idtfc3, false, MEAS_MODE_INVALID);
+ if (err)
+ return err;
+
+ err = idtfc3_get_tdc_offset_sign(idtfc3);
+ if (err)
+ return err;
+
+ return idtfc3_setup_hw_param(idtfc3);
+}
+
+static int idtfc3_set_overhead(struct idtfc3 *idtfc3)
+{
+ s64 current_ns = 0;
+ s64 lowest_ns = 0;
+ int err;
+ u8 i;
+ ktime_t start;
+ ktime_t stop;
+ ktime_t diff;
+
+ char buf[18] = {0};
+
+ for (i = 0; i < 5; i++) {
+ start = ktime_get_raw();
+
+ err = regmap_bulk_write(idtfc3->regmap, TOD_SYNC_LOAD_VAL_CTRL,
+ &buf, sizeof(buf));
+ if (err)
+ return err;
+
+ stop = ktime_get_raw();
+
+ diff = ktime_sub(stop, start);
+
+ current_ns = ktime_to_ns(diff);
+
+ if (i == 0) {
+ lowest_ns = current_ns;
+ } else {
+ if (current_ns < lowest_ns)
+ lowest_ns = current_ns;
+ }
+ }
+
+ idtfc3->tod_write_overhead = lowest_ns;
+
+ return err;
+}
+
+static int idtfc3_enable_ptp(struct idtfc3 *idtfc3)
+{
+ int err;
+
+ idtfc3->caps = idtfc3_caps;
+ snprintf(idtfc3->caps.name, sizeof(idtfc3->caps.name), "IDT FC3W");
+ idtfc3->ptp_clock = ptp_clock_register(&idtfc3->caps, NULL);
+
+ if (IS_ERR(idtfc3->ptp_clock)) {
+ err = PTR_ERR(idtfc3->ptp_clock);
+ idtfc3->ptp_clock = NULL;
+ return err;
+ }
+
+ err = idtfc3_set_overhead(idtfc3);
+ if (err)
+ return err;
+
+ err = idtfc3_init_timecounter(idtfc3);
+ if (err)
+ return err;
+
+ dev_info(idtfc3->dev, "TIME_SYNC_CHANNEL registered as ptp%d",
+ idtfc3->ptp_clock->index);
+
+ return 0;
+}
+
+static int idtfc3_load_firmware(struct idtfc3 *idtfc3)
+{
+ char fname[128] = FW_FILENAME;
+ const struct firmware *fw;
+ struct idtfc3_fwrc *rec;
+ u16 addr;
+ u8 val;
+ int err;
+ s32 len;
+
+ idtfc3_default_hw_param(&idtfc3->hw_param);
+
+ if (firmware) /* module parameter */
+ snprintf(fname, sizeof(fname), "%s", firmware);
+
+ dev_info(idtfc3->dev, "requesting firmware '%s'\n", fname);
+
+ err = request_firmware(&fw, fname, idtfc3->dev);
+
+ if (err) {
+ dev_err(idtfc3->dev,
+ "requesting firmware failed with err %d!\n", err);
+ return err;
+ }
+
+ dev_dbg(idtfc3->dev, "firmware size %zu bytes\n", fw->size);
+
+ rec = (struct idtfc3_fwrc *)fw->data;
+
+ for (len = fw->size; len > 0; len -= sizeof(*rec)) {
+ if (rec->reserved) {
+ dev_err(idtfc3->dev,
+ "bad firmware, reserved field non-zero\n");
+ err = -EINVAL;
+ } else {
+ val = rec->value;
+ addr = rec->hiaddr << 8 | rec->loaddr;
+
+ rec++;
+
+ err = idtfc3_set_hw_param(&idtfc3->hw_param, addr, val);
+ }
+
+ if (err != -EINVAL) {
+ err = 0;
+
+ /* Max register */
+ if (addr >= 0xE88)
+ continue;
+
+ err = regmap_bulk_write(idtfc3->regmap, addr,
+ &val, sizeof(val));
+ }
+
+ if (err)
+ goto out;
+ }
+
+ err = idtfc3_configure_hw(idtfc3);
+out:
+ release_firmware(fw);
+ return err;
+}
+
+static int idtfc3_read_device_id(struct idtfc3 *idtfc3, u16 *device_id)
+{
+ int err;
+ u8 buf[2] = {0};
+
+ err = regmap_bulk_read(idtfc3->regmap, DEVICE_ID,
+ &buf, sizeof(buf));
+ if (err) {
+ dev_err(idtfc3->dev, "%s failed with %d", __func__, err);
+ return err;
+ }
+
+ *device_id = get_unaligned_le16(buf);
+
+ return 0;
+}
+
+static int idtfc3_check_device_compatibility(struct idtfc3 *idtfc3)
+{
+ int err;
+ u16 device_id;
+
+ err = idtfc3_read_device_id(idtfc3, &device_id);
+ if (err)
+ return err;
+
+ if ((device_id & DEVICE_ID_MASK) == 0) {
+ dev_err(idtfc3->dev, "invalid device");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int idtfc3_probe(struct platform_device *pdev)
+{
+ struct rsmu_ddata *ddata = dev_get_drvdata(pdev->dev.parent);
+ struct idtfc3 *idtfc3;
+ int err;
+
+ idtfc3 = devm_kzalloc(&pdev->dev, sizeof(struct idtfc3), GFP_KERNEL);
+
+ if (!idtfc3)
+ return -ENOMEM;
+
+ idtfc3->dev = &pdev->dev;
+ idtfc3->mfd = pdev->dev.parent;
+ idtfc3->lock = &ddata->lock;
+ idtfc3->regmap = ddata->regmap;
+
+ mutex_lock(idtfc3->lock);
+
+ err = idtfc3_check_device_compatibility(idtfc3);
+ if (err) {
+ mutex_unlock(idtfc3->lock);
+ return err;
+ }
+
+ err = idtfc3_load_firmware(idtfc3);
+ if (err) {
+ if (err == -ENOENT) {
+ mutex_unlock(idtfc3->lock);
+ return -EPROBE_DEFER;
+ }
+ dev_warn(idtfc3->dev, "loading firmware failed with %d", err);
+ }
+
+ err = idtfc3_enable_ptp(idtfc3);
+ if (err) {
+ dev_err(idtfc3->dev, "idtfc3_enable_ptp failed with %d", err);
+ mutex_unlock(idtfc3->lock);
+ return err;
+ }
+
+ mutex_unlock(idtfc3->lock);
+
+ if (err) {
+ ptp_clock_unregister(idtfc3->ptp_clock);
+ return err;
+ }
+
+ platform_set_drvdata(pdev, idtfc3);
+
+ return 0;
+}
+
+static void idtfc3_remove(struct platform_device *pdev)
+{
+ struct idtfc3 *idtfc3 = platform_get_drvdata(pdev);
+
+ ptp_clock_unregister(idtfc3->ptp_clock);
+}
+
+static struct platform_driver idtfc3_driver = {
+ .driver = {
+ .name = "rc38xxx-phc",
+ },
+ .probe = idtfc3_probe,
+ .remove_new = idtfc3_remove,
+};
+
+module_platform_driver(idtfc3_driver);
diff --git a/drivers/ptp/ptp_fc3.h b/drivers/ptp/ptp_fc3.h
new file mode 100644
index 000000000000..897101579207
--- /dev/null
+++ b/drivers/ptp/ptp_fc3.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * PTP hardware clock driver for the FemtoClock3 family of timing and
+ * synchronization devices.
+ *
+ * Copyright (C) 2023 Integrated Device Technology, Inc., a Renesas Company.
+ */
+#ifndef PTP_IDTFC3_H
+#define PTP_IDTFC3_H
+
+#include <linux/ktime.h>
+#include <linux/ptp_clock.h>
+#include <linux/regmap.h>
+
+#define FW_FILENAME "idtfc3.bin"
+
+#define MAX_FFO_PPB (244000)
+#define TDC_GET_PERIOD (10)
+
+struct idtfc3 {
+ struct ptp_clock_info caps;
+ struct ptp_clock *ptp_clock;
+ struct device *dev;
+ /* Mutex to protect operations from being interrupted */
+ struct mutex *lock;
+ struct device *mfd;
+ struct regmap *regmap;
+ struct idtfc3_hw_param hw_param;
+ u32 sub_sync_count;
+ u32 ns_per_sync;
+ int tdc_offset_sign;
+ u64 tdc_apll_freq;
+ u32 time_ref_freq;
+ u16 fod_n;
+ u8 lpf_mode;
+ /* Time counter */
+ u32 last_counter;
+ s64 ns;
+ u32 ns_per_counter;
+ u32 tc_update_period;
+ u32 tc_write_timeout;
+ s64 tod_write_overhead;
+};
+
+#endif /* PTP_IDTFC3_H */
diff --git a/drivers/ptp/ptp_kvm_common.c b/drivers/ptp/ptp_kvm_common.c
index 2418977989be..15ccb7dd2ed0 100644
--- a/drivers/ptp/ptp_kvm_common.c
+++ b/drivers/ptp/ptp_kvm_common.c
@@ -28,15 +28,15 @@ static int ptp_kvm_get_time_fn(ktime_t *device_time,
struct system_counterval_t *system_counter,
void *ctx)
{
- long ret;
- u64 cycle;
+ enum clocksource_ids cs_id;
struct timespec64 tspec;
- struct clocksource *cs;
+ u64 cycle;
+ int ret;
spin_lock(&kvm_ptp_lock);
preempt_disable_notrace();
- ret = kvm_arch_ptp_get_crosststamp(&cycle, &tspec, &cs);
+ ret = kvm_arch_ptp_get_crosststamp(&cycle, &tspec, &cs_id);
if (ret) {
spin_unlock(&kvm_ptp_lock);
preempt_enable_notrace();
@@ -46,7 +46,7 @@ static int ptp_kvm_get_time_fn(ktime_t *device_time,
preempt_enable_notrace();
system_counter->cycles = cycle;
- system_counter->cs = cs;
+ system_counter->cs_id = cs_id;
*device_time = timespec64_to_ktime(tspec);
diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c
index 902844cc1a17..617c8d6706d3 100644
--- a/drivers/ptp/ptp_kvm_x86.c
+++ b/drivers/ptp/ptp_kvm_x86.c
@@ -93,7 +93,7 @@ int kvm_arch_ptp_get_clock(struct timespec64 *ts)
}
int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec,
- struct clocksource **cs)
+ enum clocksource_ids *cs_id)
{
struct pvclock_vcpu_time_info *src;
unsigned int version;
@@ -123,7 +123,7 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec,
*cycle = __pvclock_read_cycles(src, clock_pair->tsc);
} while (pvclock_read_retry(src, version));
- *cs = &kvm_clock;
+ *cs_id = CSID_X86_KVM_CLK;
return 0;
}
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 5f858e426bbd..6506cfb89aa9 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -34,6 +34,9 @@
#define PCI_VENDOR_ID_OROLIA 0x1ad7
#define PCI_DEVICE_ID_OROLIA_ARTCARD 0xa000
+#define PCI_VENDOR_ID_ADVA 0xad5a
+#define PCI_DEVICE_ID_ADVA_TIMECARD 0x0400
+
static struct class timecard_class = {
.name = "timecard",
};
@@ -63,6 +66,13 @@ struct ocp_reg {
u32 status_drift;
};
+struct ptp_ocp_servo_conf {
+ u32 servo_offset_p;
+ u32 servo_offset_i;
+ u32 servo_drift_p;
+ u32 servo_drift_i;
+};
+
#define OCP_CTRL_ENABLE BIT(0)
#define OCP_CTRL_ADJUST_TIME BIT(1)
#define OCP_CTRL_ADJUST_OFFSET BIT(2)
@@ -397,10 +407,14 @@ static int ptp_ocp_sma_store(struct ptp_ocp *bp, const char *buf, int sma_nr);
static int ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r);
+
static const struct ocp_attr_group fb_timecard_groups[];
static const struct ocp_attr_group art_timecard_groups[];
+static const struct ocp_attr_group adva_timecard_groups[];
+
struct ptp_ocp_eeprom_map {
u16 off;
u16 len;
@@ -700,6 +714,12 @@ static struct ocp_resource ocp_fb_resource[] = {
},
{
.setup = ptp_ocp_fb_board_init,
+ .extra = &(struct ptp_ocp_servo_conf) {
+ .servo_offset_p = 0x2000,
+ .servo_offset_i = 0x1000,
+ .servo_drift_p = 0,
+ .servo_drift_i = 0,
+ },
},
{ }
};
@@ -831,6 +851,170 @@ static struct ocp_resource ocp_art_resource[] = {
},
{
.setup = ptp_ocp_art_board_init,
+ .extra = &(struct ptp_ocp_servo_conf) {
+ .servo_offset_p = 0x2000,
+ .servo_offset_i = 0x1000,
+ .servo_drift_p = 0,
+ .servo_drift_i = 0,
+ },
+ },
+ { }
+};
+
+static struct ocp_resource ocp_adva_resource[] = {
+ {
+ OCP_MEM_RESOURCE(reg),
+ .offset = 0x01000000, .size = 0x10000,
+ },
+ {
+ OCP_EXT_RESOURCE(ts0),
+ .offset = 0x01010000, .size = 0x10000, .irq_vec = 1,
+ .extra = &(struct ptp_ocp_ext_info) {
+ .index = 0,
+ .irq_fcn = ptp_ocp_ts_irq,
+ .enable = ptp_ocp_ts_enable,
+ },
+ },
+ {
+ OCP_EXT_RESOURCE(ts1),
+ .offset = 0x01020000, .size = 0x10000, .irq_vec = 2,
+ .extra = &(struct ptp_ocp_ext_info) {
+ .index = 1,
+ .irq_fcn = ptp_ocp_ts_irq,
+ .enable = ptp_ocp_ts_enable,
+ },
+ },
+ {
+ OCP_EXT_RESOURCE(ts2),
+ .offset = 0x01060000, .size = 0x10000, .irq_vec = 6,
+ .extra = &(struct ptp_ocp_ext_info) {
+ .index = 2,
+ .irq_fcn = ptp_ocp_ts_irq,
+ .enable = ptp_ocp_ts_enable,
+ },
+ },
+ /* Timestamp for PHC and/or PPS generator */
+ {
+ OCP_EXT_RESOURCE(pps),
+ .offset = 0x010C0000, .size = 0x10000, .irq_vec = 0,
+ .extra = &(struct ptp_ocp_ext_info) {
+ .index = 5,
+ .irq_fcn = ptp_ocp_ts_irq,
+ .enable = ptp_ocp_ts_enable,
+ },
+ },
+ {
+ OCP_EXT_RESOURCE(signal_out[0]),
+ .offset = 0x010D0000, .size = 0x10000, .irq_vec = 11,
+ .extra = &(struct ptp_ocp_ext_info) {
+ .index = 1,
+ .irq_fcn = ptp_ocp_signal_irq,
+ .enable = ptp_ocp_signal_enable,
+ },
+ },
+ {
+ OCP_EXT_RESOURCE(signal_out[1]),
+ .offset = 0x010E0000, .size = 0x10000, .irq_vec = 12,
+ .extra = &(struct ptp_ocp_ext_info) {
+ .index = 2,
+ .irq_fcn = ptp_ocp_signal_irq,
+ .enable = ptp_ocp_signal_enable,
+ },
+ },
+ {
+ OCP_MEM_RESOURCE(pps_to_ext),
+ .offset = 0x01030000, .size = 0x10000,
+ },
+ {
+ OCP_MEM_RESOURCE(pps_to_clk),
+ .offset = 0x01040000, .size = 0x10000,
+ },
+ {
+ OCP_MEM_RESOURCE(tod),
+ .offset = 0x01050000, .size = 0x10000,
+ },
+ {
+ OCP_MEM_RESOURCE(image),
+ .offset = 0x00020000, .size = 0x1000,
+ },
+ {
+ OCP_MEM_RESOURCE(pps_select),
+ .offset = 0x00130000, .size = 0x1000,
+ },
+ {
+ OCP_MEM_RESOURCE(sma_map1),
+ .offset = 0x00140000, .size = 0x1000,
+ },
+ {
+ OCP_MEM_RESOURCE(sma_map2),
+ .offset = 0x00220000, .size = 0x1000,
+ },
+ {
+ OCP_SERIAL_RESOURCE(gnss_port),
+ .offset = 0x00160000 + 0x1000, .irq_vec = 3,
+ .extra = &(struct ptp_ocp_serial_port) {
+ .baud = 9600,
+ },
+ },
+ {
+ OCP_SERIAL_RESOURCE(mac_port),
+ .offset = 0x00180000 + 0x1000, .irq_vec = 5,
+ .extra = &(struct ptp_ocp_serial_port) {
+ .baud = 115200,
+ },
+ },
+ {
+ OCP_MEM_RESOURCE(freq_in[0]),
+ .offset = 0x01200000, .size = 0x10000,
+ },
+ {
+ OCP_MEM_RESOURCE(freq_in[1]),
+ .offset = 0x01210000, .size = 0x10000,
+ },
+ {
+ OCP_SPI_RESOURCE(spi_flash),
+ .offset = 0x00310400, .size = 0x10000, .irq_vec = 9,
+ .extra = &(struct ptp_ocp_flash_info) {
+ .name = "spi_altera", .pci_offset = 0,
+ .data_size = sizeof(struct altera_spi_platform_data),
+ .data = &(struct altera_spi_platform_data) {
+ .num_chipselect = 1,
+ .num_devices = 1,
+ .devices = &(struct spi_board_info) {
+ .modalias = "spi-nor",
+ },
+ },
+ },
+ },
+ {
+ OCP_I2C_RESOURCE(i2c_ctrl),
+ .offset = 0x150000, .size = 0x100, .irq_vec = 7,
+ .extra = &(struct ptp_ocp_i2c_info) {
+ .name = "ocores-i2c",
+ .fixed_rate = 50000000,
+ .data_size = sizeof(struct ocores_i2c_platform_data),
+ .data = &(struct ocores_i2c_platform_data) {
+ .clock_khz = 50000,
+ .bus_khz = 100,
+ .reg_io_width = 4, // 32-bit/4-byte
+ .reg_shift = 2, // 32-bit addressing
+ .num_devices = 2,
+ .devices = (struct i2c_board_info[]) {
+ { I2C_BOARD_INFO("24c02", 0x50) },
+ { I2C_BOARD_INFO("24mac402", 0x58),
+ .platform_data = "mac" },
+ },
+ },
+ },
+ },
+ {
+ .setup = ptp_ocp_adva_board_init,
+ .extra = &(struct ptp_ocp_servo_conf) {
+ .servo_offset_p = 0xc000,
+ .servo_offset_i = 0x1000,
+ .servo_drift_p = 0,
+ .servo_drift_i = 0,
+ },
},
{ }
};
@@ -839,6 +1023,7 @@ static const struct pci_device_id ptp_ocp_pcidev_id[] = {
{ PCI_DEVICE_DATA(FACEBOOK, TIMECARD, &ocp_fb_resource) },
{ PCI_DEVICE_DATA(CELESTICA, TIMECARD, &ocp_fb_resource) },
{ PCI_DEVICE_DATA(OROLIA, ARTCARD, &ocp_art_resource) },
+ { PCI_DEVICE_DATA(ADVA, TIMECARD, &ocp_adva_resource) },
{ }
};
MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
@@ -917,6 +1102,30 @@ static const struct ocp_selector ptp_ocp_art_sma_out[] = {
{ }
};
+static const struct ocp_selector ptp_ocp_adva_sma_in[] = {
+ { .name = "10Mhz", .value = 0x0000, .frequency = 10000000},
+ { .name = "PPS1", .value = 0x0001, .frequency = 1 },
+ { .name = "PPS2", .value = 0x0002, .frequency = 1 },
+ { .name = "TS1", .value = 0x0004, .frequency = 0 },
+ { .name = "TS2", .value = 0x0008, .frequency = 0 },
+ { .name = "FREQ1", .value = 0x0100, .frequency = 0 },
+ { .name = "FREQ2", .value = 0x0200, .frequency = 0 },
+ { .name = "None", .value = SMA_DISABLE, .frequency = 0 },
+ { }
+};
+
+static const struct ocp_selector ptp_ocp_adva_sma_out[] = {
+ { .name = "10Mhz", .value = 0x0000, .frequency = 10000000},
+ { .name = "PHC", .value = 0x0001, .frequency = 1 },
+ { .name = "MAC", .value = 0x0002, .frequency = 1 },
+ { .name = "GNSS1", .value = 0x0004, .frequency = 1 },
+ { .name = "GEN1", .value = 0x0040 },
+ { .name = "GEN2", .value = 0x0080 },
+ { .name = "GND", .value = 0x2000 },
+ { .name = "VCC", .value = 0x4000 },
+ { }
+};
+
struct ocp_sma_op {
const struct ocp_selector *tbl[2];
void (*init)(struct ptp_ocp *bp);
@@ -1363,7 +1572,7 @@ ptp_ocp_estimate_pci_timing(struct ptp_ocp *bp)
}
static int
-ptp_ocp_init_clock(struct ptp_ocp *bp)
+ptp_ocp_init_clock(struct ptp_ocp *bp, struct ptp_ocp_servo_conf *servo_conf)
{
struct timespec64 ts;
u32 ctrl;
@@ -1371,12 +1580,11 @@ ptp_ocp_init_clock(struct ptp_ocp *bp)
ctrl = OCP_CTRL_ENABLE;
iowrite32(ctrl, &bp->reg->ctrl);
- /* NO DRIFT Correction */
- /* offset_p:i 1/8, offset_i: 1/16, drift_p: 0, drift_i: 0 */
- iowrite32(0x2000, &bp->reg->servo_offset_p);
- iowrite32(0x1000, &bp->reg->servo_offset_i);
- iowrite32(0, &bp->reg->servo_drift_p);
- iowrite32(0, &bp->reg->servo_drift_i);
+ /* servo configuration */
+ iowrite32(servo_conf->servo_offset_p, &bp->reg->servo_offset_p);
+ iowrite32(servo_conf->servo_offset_i, &bp->reg->servo_offset_i);
+ iowrite32(servo_conf->servo_drift_p, &bp->reg->servo_drift_p);
+ iowrite32(servo_conf->servo_drift_p, &bp->reg->servo_drift_i);
/* latch servo values */
ctrl |= OCP_CTRL_ADJUST_SERVO;
@@ -2348,6 +2556,14 @@ static const struct ocp_sma_op ocp_fb_sma_op = {
.set_output = ptp_ocp_sma_fb_set_output,
};
+static const struct ocp_sma_op ocp_adva_sma_op = {
+ .tbl = { ptp_ocp_adva_sma_in, ptp_ocp_adva_sma_out },
+ .init = ptp_ocp_sma_fb_init,
+ .get = ptp_ocp_sma_fb_get,
+ .set_inputs = ptp_ocp_sma_fb_set_inputs,
+ .set_output = ptp_ocp_sma_fb_set_output,
+};
+
static int
ptp_ocp_set_pins(struct ptp_ocp *bp)
{
@@ -2427,7 +2643,7 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
return err;
ptp_ocp_sma_init(bp);
- return ptp_ocp_init_clock(bp);
+ return ptp_ocp_init_clock(bp, r->extra);
}
static bool
@@ -2589,7 +2805,44 @@ ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
if (err)
return err;
- return ptp_ocp_init_clock(bp);
+ return ptp_ocp_init_clock(bp, r->extra);
+}
+
+/* ADVA specific board initializers; last "resource" registered. */
+static int
+ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ int err;
+ u32 version;
+
+ bp->flash_start = 0xA00000;
+ bp->eeprom_map = fb_eeprom_map;
+ bp->sma_op = &ocp_adva_sma_op;
+
+ version = ioread32(&bp->image->version);
+ /* if lower 16 bits are empty, this is the fw loader. */
+ if ((version & 0xffff) == 0) {
+ version = version >> 16;
+ bp->fw_loader = true;
+ }
+ bp->fw_tag = 3;
+ bp->fw_version = version & 0xffff;
+ bp->fw_cap = OCP_CAP_BASIC | OCP_CAP_SIGNAL | OCP_CAP_FREQ;
+
+ ptp_ocp_tod_init(bp);
+ ptp_ocp_nmea_out_init(bp);
+ ptp_ocp_signal_init(bp);
+
+ err = ptp_ocp_attr_group_add(bp, adva_timecard_groups);
+ if (err)
+ return err;
+
+ err = ptp_ocp_set_pins(bp);
+ if (err)
+ return err;
+ ptp_ocp_sma_init(bp);
+
+ return ptp_ocp_init_clock(bp, r->extra);
}
static ssize_t
@@ -3564,6 +3817,37 @@ static const struct ocp_attr_group art_timecard_groups[] = {
{ },
};
+static struct attribute *adva_timecard_attrs[] = {
+ &dev_attr_serialnum.attr,
+ &dev_attr_gnss_sync.attr,
+ &dev_attr_clock_source.attr,
+ &dev_attr_available_clock_sources.attr,
+ &dev_attr_sma1.attr,
+ &dev_attr_sma2.attr,
+ &dev_attr_sma3.attr,
+ &dev_attr_sma4.attr,
+ &dev_attr_available_sma_inputs.attr,
+ &dev_attr_available_sma_outputs.attr,
+ &dev_attr_clock_status_drift.attr,
+ &dev_attr_clock_status_offset.attr,
+ &dev_attr_ts_window_adjust.attr,
+ &dev_attr_tod_correction.attr,
+ NULL,
+};
+
+static const struct attribute_group adva_timecard_group = {
+ .attrs = adva_timecard_attrs,
+};
+
+static const struct ocp_attr_group adva_timecard_groups[] = {
+ { .cap = OCP_CAP_BASIC, .group = &adva_timecard_group },
+ { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal0_group },
+ { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal1_group },
+ { .cap = OCP_CAP_FREQ, .group = &fb_timecard_freq0_group },
+ { .cap = OCP_CAP_FREQ, .group = &fb_timecard_freq1_group },
+ { },
+};
+
static void
gpio_input_map(char *buf, struct ptp_ocp *bp, u16 map[][2], u16 bit,
const char *def)
@@ -4209,10 +4493,11 @@ ptp_ocp_detach(struct ptp_ocp *bp)
device_unregister(&bp->dev);
}
-static int ptp_ocp_dpll_lock_status_get(const struct dpll_device *dpll,
- void *priv,
- enum dpll_lock_status *status,
- struct netlink_ext_ack *extack)
+static int
+ptp_ocp_dpll_lock_status_get(const struct dpll_device *dpll, void *priv,
+ enum dpll_lock_status *status,
+ enum dpll_lock_status_error *status_error,
+ struct netlink_ext_ack *extack)
{
struct ptp_ocp *bp = priv;
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index 45f9002a5dca..18934e28469e 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -120,7 +120,7 @@ static inline bool ptp_clock_freerun(struct ptp_clock *ptp)
return ptp_vclock_in_use(ptp);
}
-extern struct class *ptp_class;
+extern const struct class ptp_class;
/*
* see ptp_chardev.c
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
index f7a499a1bd39..a15460aaa03b 100644
--- a/drivers/ptp/ptp_sysfs.c
+++ b/drivers/ptp/ptp_sysfs.c
@@ -24,8 +24,7 @@ static ssize_t max_phase_adjustment_show(struct device *dev,
{
struct ptp_clock *ptp = dev_get_drvdata(dev);
- return snprintf(page, PAGE_SIZE - 1, "%d\n",
- ptp->info->getmaxphase(ptp->info));
+ return sysfs_emit(page, "%d\n", ptp->info->getmaxphase(ptp->info));
}
static DEVICE_ATTR_RO(max_phase_adjustment);
@@ -34,7 +33,7 @@ static ssize_t var##_show(struct device *dev, \
struct device_attribute *attr, char *page) \
{ \
struct ptp_clock *ptp = dev_get_drvdata(dev); \
- return snprintf(page, PAGE_SIZE-1, "%d\n", ptp->info->var); \
+ return sysfs_emit(page, "%d\n", ptp->info->var); \
} \
static DEVICE_ATTR(name, 0444, var##_show, NULL);
@@ -102,8 +101,8 @@ static ssize_t extts_fifo_show(struct device *dev,
if (!qcnt)
goto out;
- cnt = snprintf(page, PAGE_SIZE, "%u %lld %u\n",
- event.index, event.t.sec, event.t.nsec);
+ cnt = sysfs_emit(page, "%u %lld %u\n",
+ event.index, event.t.sec, event.t.nsec);
out:
return cnt;
}
@@ -194,7 +193,7 @@ static ssize_t n_vclocks_show(struct device *dev,
if (mutex_lock_interruptible(&ptp->n_vclocks_mux))
return -ERESTARTSYS;
- size = snprintf(page, PAGE_SIZE - 1, "%u\n", ptp->n_vclocks);
+ size = sysfs_emit(page, "%u\n", ptp->n_vclocks);
mutex_unlock(&ptp->n_vclocks_mux);
@@ -270,7 +269,7 @@ static ssize_t max_vclocks_show(struct device *dev,
struct ptp_clock *ptp = dev_get_drvdata(dev);
ssize_t size;
- size = snprintf(page, PAGE_SIZE - 1, "%u\n", ptp->max_vclocks);
+ size = sysfs_emit(page, "%u\n", ptp->max_vclocks);
return size;
}
diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c
index dcf752c9e045..7febfdcbde8b 100644
--- a/drivers/ptp/ptp_vclock.c
+++ b/drivers/ptp/ptp_vclock.c
@@ -241,7 +241,7 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index)
return num;
snprintf(name, PTP_CLOCK_NAME_LEN, "ptp%d", pclock_index);
- dev = class_find_device_by_name(ptp_class, name);
+ dev = class_find_device_by_name(&ptp_class, name);
if (!dev)
return num;
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig
index c2a236f2e846..fc4f4bb94a4c 100644
--- a/drivers/ras/Kconfig
+++ b/drivers/ras/Kconfig
@@ -32,5 +32,18 @@ menuconfig RAS
if RAS
source "arch/x86/ras/Kconfig"
+source "drivers/ras/amd/atl/Kconfig"
+
+config RAS_FMPM
+ tristate "FRU Memory Poison Manager"
+ default m
+ depends on AMD_ATL && ACPI_APEI
+ help
+ Support saving and restoring memory error information across reboot
+ using ACPI ERST as persistent storage. Error information is saved with
+ the UEFI CPER "FRU Memory Poison" section format.
+
+ Memory will be retired during boot time and run time depending on
+ platform-specific policies.
endif
diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile
index 6f0404f50107..11f95d59d397 100644
--- a/drivers/ras/Makefile
+++ b/drivers/ras/Makefile
@@ -2,3 +2,6 @@
obj-$(CONFIG_RAS) += ras.o
obj-$(CONFIG_DEBUG_FS) += debugfs.o
obj-$(CONFIG_RAS_CEC) += cec.o
+
+obj-$(CONFIG_RAS_FMPM) += amd/fmpm.o
+obj-y += amd/atl/
diff --git a/drivers/ras/amd/atl/Kconfig b/drivers/ras/amd/atl/Kconfig
new file mode 100644
index 000000000000..df49c23e7f62
--- /dev/null
+++ b/drivers/ras/amd/atl/Kconfig
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# AMD Address Translation Library Kconfig
+#
+# Copyright (c) 2023, Advanced Micro Devices, Inc.
+# All Rights Reserved.
+#
+# Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+
+config AMD_ATL
+ tristate "AMD Address Translation Library"
+ depends on AMD_NB && X86_64 && RAS
+ depends on MEMORY_FAILURE
+ default N
+ help
+ This library includes support for implementation-specific
+ address translation procedures needed for various error
+ handling cases.
+
+ Enable this option if using DRAM ECC on Zen-based systems
+ and OS-based error handling.
diff --git a/drivers/ras/amd/atl/Makefile b/drivers/ras/amd/atl/Makefile
new file mode 100644
index 000000000000..4acd5f05bd9c
--- /dev/null
+++ b/drivers/ras/amd/atl/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# AMD Address Translation Library Makefile
+#
+# Copyright (c) 2023, Advanced Micro Devices, Inc.
+# All Rights Reserved.
+#
+# Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+
+amd_atl-y := access.o
+amd_atl-y += core.o
+amd_atl-y += dehash.o
+amd_atl-y += denormalize.o
+amd_atl-y += map.o
+amd_atl-y += system.o
+amd_atl-y += umc.o
+
+obj-$(CONFIG_AMD_ATL) += amd_atl.o
diff --git a/drivers/ras/amd/atl/access.c b/drivers/ras/amd/atl/access.c
new file mode 100644
index 000000000000..ee4661ed28ba
--- /dev/null
+++ b/drivers/ras/amd/atl/access.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Address Translation Library
+ *
+ * access.c : DF Indirect Access functions
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ */
+
+#include "internal.h"
+
+/* Protect the PCI config register pairs used for DF indirect access. */
+static DEFINE_MUTEX(df_indirect_mutex);
+
+/*
+ * Data Fabric Indirect Access uses FICAA/FICAD.
+ *
+ * Fabric Indirect Configuration Access Address (FICAA): constructed based
+ * on the device's Instance Id and the PCI function and register offset of
+ * the desired register.
+ *
+ * Fabric Indirect Configuration Access Data (FICAD): there are FICAD
+ * low and high registers but so far only the low register is needed.
+ *
+ * Use Instance Id 0xFF to indicate a broadcast read.
+ */
+#define DF_BROADCAST 0xFF
+
+#define DF_FICAA_INST_EN BIT(0)
+#define DF_FICAA_REG_NUM GENMASK(10, 1)
+#define DF_FICAA_FUNC_NUM GENMASK(13, 11)
+#define DF_FICAA_INST_ID GENMASK(23, 16)
+
+#define DF_FICAA_REG_NUM_LEGACY GENMASK(10, 2)
+
+static u16 get_accessible_node(u16 node)
+{
+ /*
+ * On heterogeneous systems, not all AMD Nodes are accessible
+ * through software-visible registers. The Node ID needs to be
+ * adjusted for register accesses. But its value should not be
+ * changed for the translation methods.
+ */
+ if (df_cfg.flags.heterogeneous) {
+ /* Only Node 0 is accessible on DF3.5 systems. */
+ if (df_cfg.rev == DF3p5)
+ node = 0;
+
+ /*
+ * Only the first Node in each Socket is accessible on
+ * DF4.5 systems, and this is visible to software as one
+ * Fabric per Socket. The Socket ID can be derived from
+ * the Node ID and global shift values.
+ */
+ if (df_cfg.rev == DF4p5)
+ node >>= df_cfg.socket_id_shift - df_cfg.node_id_shift;
+ }
+
+ return node;
+}
+
+static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
+{
+ u32 ficaa_addr = 0x8C, ficad_addr = 0xB8;
+ struct pci_dev *F4;
+ int err = -ENODEV;
+ u32 ficaa = 0;
+
+ node = get_accessible_node(node);
+ if (node >= amd_nb_num())
+ goto out;
+
+ F4 = node_to_amd_nb(node)->link;
+ if (!F4)
+ goto out;
+
+ /* Enable instance-specific access. */
+ if (instance_id != DF_BROADCAST) {
+ ficaa |= FIELD_PREP(DF_FICAA_INST_EN, 1);
+ ficaa |= FIELD_PREP(DF_FICAA_INST_ID, instance_id);
+ }
+
+ /*
+ * The two least-significant bits are masked when inputing the
+ * register offset to FICAA.
+ */
+ reg >>= 2;
+
+ if (df_cfg.flags.legacy_ficaa) {
+ ficaa_addr = 0x5C;
+ ficad_addr = 0x98;
+
+ ficaa |= FIELD_PREP(DF_FICAA_REG_NUM_LEGACY, reg);
+ } else {
+ ficaa |= FIELD_PREP(DF_FICAA_REG_NUM, reg);
+ }
+
+ ficaa |= FIELD_PREP(DF_FICAA_FUNC_NUM, func);
+
+ mutex_lock(&df_indirect_mutex);
+
+ err = pci_write_config_dword(F4, ficaa_addr, ficaa);
+ if (err) {
+ pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa);
+ goto out_unlock;
+ }
+
+ err = pci_read_config_dword(F4, ficad_addr, lo);
+ if (err)
+ pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa);
+
+ pr_debug("node=%u inst=0x%x func=0x%x reg=0x%x val=0x%x",
+ node, instance_id, func, reg << 2, *lo);
+
+out_unlock:
+ mutex_unlock(&df_indirect_mutex);
+
+out:
+ return err;
+}
+
+int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
+{
+ return __df_indirect_read(node, func, reg, instance_id, lo);
+}
+
+int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo)
+{
+ return __df_indirect_read(node, func, reg, DF_BROADCAST, lo);
+}
diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
new file mode 100644
index 000000000000..6dc4e06305f7
--- /dev/null
+++ b/drivers/ras/amd/atl/core.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Address Translation Library
+ *
+ * core.c : Module init and base translation functions
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ */
+
+#include <linux/module.h>
+#include <asm/cpu_device_id.h>
+
+#include "internal.h"
+
+struct df_config df_cfg __read_mostly;
+
+static int addr_over_limit(struct addr_ctx *ctx)
+{
+ u64 dram_limit_addr;
+
+ if (df_cfg.rev >= DF4)
+ dram_limit_addr = FIELD_GET(DF4_DRAM_LIMIT_ADDR, ctx->map.limit);
+ else
+ dram_limit_addr = FIELD_GET(DF2_DRAM_LIMIT_ADDR, ctx->map.limit);
+
+ dram_limit_addr <<= DF_DRAM_BASE_LIMIT_LSB;
+ dram_limit_addr |= GENMASK(DF_DRAM_BASE_LIMIT_LSB - 1, 0);
+
+ /* Is calculated system address above DRAM limit address? */
+ if (ctx->ret_addr > dram_limit_addr) {
+ atl_debug(ctx, "Calculated address (0x%016llx) > DRAM limit (0x%016llx)",
+ ctx->ret_addr, dram_limit_addr);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static bool legacy_hole_en(struct addr_ctx *ctx)
+{
+ u32 reg = ctx->map.base;
+
+ if (df_cfg.rev >= DF4)
+ reg = ctx->map.ctl;
+
+ return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg);
+}
+
+static int add_legacy_hole(struct addr_ctx *ctx)
+{
+ u32 dram_hole_base;
+ u8 func = 0;
+
+ if (!legacy_hole_en(ctx))
+ return 0;
+
+ if (df_cfg.rev >= DF4)
+ func = 7;
+
+ if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base))
+ return -EINVAL;
+
+ dram_hole_base &= DF_DRAM_HOLE_BASE_MASK;
+
+ if (ctx->ret_addr >= dram_hole_base)
+ ctx->ret_addr += (BIT_ULL(32) - dram_hole_base);
+
+ return 0;
+}
+
+static u64 get_base_addr(struct addr_ctx *ctx)
+{
+ u64 base_addr;
+
+ if (df_cfg.rev >= DF4)
+ base_addr = FIELD_GET(DF4_BASE_ADDR, ctx->map.base);
+ else
+ base_addr = FIELD_GET(DF2_BASE_ADDR, ctx->map.base);
+
+ return base_addr << DF_DRAM_BASE_LIMIT_LSB;
+}
+
+static int add_base_and_hole(struct addr_ctx *ctx)
+{
+ ctx->ret_addr += get_base_addr(ctx);
+
+ if (add_legacy_hole(ctx))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool late_hole_remove(struct addr_ctx *ctx)
+{
+ if (df_cfg.rev == DF3p5)
+ return true;
+
+ if (df_cfg.rev == DF4)
+ return true;
+
+ if (ctx->map.intlv_mode == DF3_6CHAN)
+ return true;
+
+ return false;
+}
+
+unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr)
+{
+ struct addr_ctx ctx;
+
+ if (df_cfg.rev == UNKNOWN)
+ return -EINVAL;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ /* Start from the normalized address */
+ ctx.ret_addr = addr;
+ ctx.inst_id = coh_st_inst_id;
+
+ ctx.inputs.norm_addr = addr;
+ ctx.inputs.socket_id = socket_id;
+ ctx.inputs.die_id = die_id;
+ ctx.inputs.coh_st_inst_id = coh_st_inst_id;
+
+ if (determine_node_id(&ctx, socket_id, die_id))
+ return -EINVAL;
+
+ if (get_address_map(&ctx))
+ return -EINVAL;
+
+ if (denormalize_address(&ctx))
+ return -EINVAL;
+
+ if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx))
+ return -EINVAL;
+
+ if (dehash_address(&ctx))
+ return -EINVAL;
+
+ if (late_hole_remove(&ctx) && add_base_and_hole(&ctx))
+ return -EINVAL;
+
+ if (addr_over_limit(&ctx))
+ return -EINVAL;
+
+ return ctx.ret_addr;
+}
+
+static void check_for_legacy_df_access(void)
+{
+ /*
+ * All Zen-based systems before Family 19h use the legacy
+ * DF Indirect Access (FICAA/FICAD) offsets.
+ */
+ if (boot_cpu_data.x86 < 0x19) {
+ df_cfg.flags.legacy_ficaa = true;
+ return;
+ }
+
+ /* All systems after Family 19h use the current offsets. */
+ if (boot_cpu_data.x86 > 0x19)
+ return;
+
+ /* Some Family 19h systems use the legacy offsets. */
+ switch (boot_cpu_data.x86_model) {
+ case 0x00 ... 0x0f:
+ case 0x20 ... 0x5f:
+ df_cfg.flags.legacy_ficaa = true;
+ }
+}
+
+/*
+ * This library provides functionality for AMD-based systems with a Data Fabric.
+ * The set of systems with a Data Fabric is equivalent to the set of Zen-based systems
+ * and the set of systems with the Scalable MCA feature at this time. However, these
+ * are technically independent things.
+ *
+ * It's possible to match on the PCI IDs of the Data Fabric devices, but this will be
+ * an ever expanding list. Instead, match on the SMCA and Zen features to cover all
+ * relevant systems.
+ */
+static const struct x86_cpu_id amd_atl_cpuids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_SMCA, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_ZEN, NULL),
+ { }
+};
+MODULE_DEVICE_TABLE(x86cpu, amd_atl_cpuids);
+
+static int __init amd_atl_init(void)
+{
+ if (!x86_match_cpu(amd_atl_cpuids))
+ return -ENODEV;
+
+ if (!amd_nb_num())
+ return -ENODEV;
+
+ check_for_legacy_df_access();
+
+ if (get_df_system_info())
+ return -ENODEV;
+
+ /* Increment this module's recount so that it can't be easily unloaded. */
+ __module_get(THIS_MODULE);
+ amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr);
+
+ pr_info("AMD Address Translation Library initialized");
+ return 0;
+}
+
+/*
+ * Exit function is only needed for testing and debug. Module unload must be
+ * forced to override refcount check.
+ */
+static void __exit amd_atl_exit(void)
+{
+ amd_atl_unregister_decoder();
+}
+
+module_init(amd_atl_init);
+module_exit(amd_atl_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c
new file mode 100644
index 000000000000..4ea46262c4f5
--- /dev/null
+++ b/drivers/ras/amd/atl/dehash.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Address Translation Library
+ *
+ * dehash.c : Functions to account for hashing bits
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ */
+
+#include "internal.h"
+
+/*
+ * Verify the interleave bits are correct in the different interleaving
+ * settings.
+ *
+ * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
+ * respective interleaving is disabled.
+ */
+static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
+ u8 num_intlv_dies, u8 num_intlv_sockets)
+{
+ if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
+ pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
+ return false;
+ }
+
+ if (ctx->map.num_intlv_dies > num_intlv_dies) {
+ pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies);
+ return false;
+ }
+
+ if (ctx->map.num_intlv_sockets > num_intlv_sockets) {
+ pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets);
+ return false;
+ }
+
+ return true;
+}
+
+static int df2_dehash_addr(struct addr_ctx *ctx)
+{
+ u8 hashed_bit, intlv_bit, intlv_bit_pos;
+
+ if (!map_bits_valid(ctx, 8, 9, 1, 1))
+ return -EINVAL;
+
+ intlv_bit_pos = ctx->map.intlv_bit_pos;
+ intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(12), ctx->ret_addr);
+ hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr);
+ hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr);
+ hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr);
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(intlv_bit_pos);
+
+ return 0;
+}
+
+static int df3_dehash_addr(struct addr_ctx *ctx)
+{
+ bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
+ u8 hashed_bit, intlv_bit, intlv_bit_pos;
+
+ if (!map_bits_valid(ctx, 8, 9, 1, 1))
+ return -EINVAL;
+
+ hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl);
+
+ intlv_bit_pos = ctx->map.intlv_bit_pos;
+ intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr);
+ hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(intlv_bit_pos);
+
+ /* Calculation complete for 2 channels. Continue for 4 and 8 channels. */
+ if (ctx->map.intlv_mode == DF3_COD4_2CHAN_HASH)
+ return 0;
+
+ intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(12);
+
+ /* Calculation complete for 4 channels. Continue for 8 channels. */
+ if (ctx->map.intlv_mode == DF3_COD2_4CHAN_HASH)
+ return 0;
+
+ intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(13);
+
+ return 0;
+}
+
+static int df3_6chan_dehash_addr(struct addr_ctx *ctx)
+{
+ u8 intlv_bit_pos = ctx->map.intlv_bit_pos;
+ u8 hashed_bit, intlv_bit, num_intlv_bits;
+ bool hash_ctl_2M, hash_ctl_1G;
+
+ if (ctx->map.intlv_mode != DF3_6CHAN) {
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ num_intlv_bits = ilog2(ctx->map.num_intlv_chan) + 1;
+
+ hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl);
+
+ intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= !!(BIT_ULL(intlv_bit_pos + num_intlv_bits) & ctx->ret_addr);
+ hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(intlv_bit_pos);
+
+ intlv_bit_pos++;
+ intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(intlv_bit_pos);
+
+ intlv_bit_pos++;
+ intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(intlv_bit_pos);
+
+ return 0;
+}
+
+static int df4_dehash_addr(struct addr_ctx *ctx)
+{
+ bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
+ u8 hashed_bit, intlv_bit;
+
+ if (!map_bits_valid(ctx, 8, 8, 1, 2))
+ return -EINVAL;
+
+ hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
+
+ intlv_bit = FIELD_GET(BIT_ULL(8), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G;
+
+ if (ctx->map.num_intlv_sockets == 1)
+ hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr);
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(8);
+
+ /*
+ * Hashing is possible with socket interleaving, so check the total number
+ * of channels in the system rather than DRAM map interleaving mode.
+ *
+ * Calculation complete for 2 channels. Continue for 4, 8, and 16 channels.
+ */
+ if (ctx->map.total_intlv_chan <= 2)
+ return 0;
+
+ intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(12);
+
+ /* Calculation complete for 4 channels. Continue for 8 and 16 channels. */
+ if (ctx->map.total_intlv_chan <= 4)
+ return 0;
+
+ intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(13);
+
+ /* Calculation complete for 8 channels. Continue for 16 channels. */
+ if (ctx->map.total_intlv_chan <= 8)
+ return 0;
+
+ intlv_bit = FIELD_GET(BIT_ULL(14), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(19), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(24), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(33), ctx->ret_addr) & hash_ctl_1G;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(14);
+
+ return 0;
+}
+
+static int df4p5_dehash_addr(struct addr_ctx *ctx)
+{
+ bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
+ u8 hashed_bit, intlv_bit;
+ u64 rehash_vector;
+
+ if (!map_bits_valid(ctx, 8, 8, 1, 2))
+ return -EINVAL;
+
+ hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
+ hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
+
+ /*
+ * Generate a unique address to determine which bits
+ * need to be dehashed.
+ *
+ * Start with a contiguous bitmask for the total
+ * number of channels starting at bit 8.
+ *
+ * Then make a gap in the proper place based on
+ * interleave mode.
+ */
+ rehash_vector = ctx->map.total_intlv_chan - 1;
+ rehash_vector <<= 8;
+
+ if (ctx->map.intlv_mode == DF4p5_NPS2_4CHAN_1K_HASH ||
+ ctx->map.intlv_mode == DF4p5_NPS1_8CHAN_1K_HASH ||
+ ctx->map.intlv_mode == DF4p5_NPS1_16CHAN_1K_HASH)
+ rehash_vector = expand_bits(10, 2, rehash_vector);
+ else
+ rehash_vector = expand_bits(9, 3, rehash_vector);
+
+ if (rehash_vector & BIT_ULL(8)) {
+ intlv_bit = FIELD_GET(BIT_ULL(8), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(40), ctx->ret_addr) & hash_ctl_1T;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(8);
+ }
+
+ if (rehash_vector & BIT_ULL(9)) {
+ intlv_bit = FIELD_GET(BIT_ULL(9), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(41), ctx->ret_addr) & hash_ctl_1T;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(9);
+ }
+
+ if (rehash_vector & BIT_ULL(12)) {
+ intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(42), ctx->ret_addr) & hash_ctl_1T;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(12);
+ }
+
+ if (rehash_vector & BIT_ULL(13)) {
+ intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(19), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(24), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(33), ctx->ret_addr) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(43), ctx->ret_addr) & hash_ctl_1T;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(13);
+ }
+
+ if (rehash_vector & BIT_ULL(14)) {
+ intlv_bit = FIELD_GET(BIT_ULL(14), ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= FIELD_GET(BIT_ULL(20), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(25), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(34), ctx->ret_addr) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(44), ctx->ret_addr) & hash_ctl_1T;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(14);
+ }
+
+ return 0;
+}
+
+/*
+ * MI300 hash bits
+ * 4K 64K 2M 1G 1T 1T
+ * COH_ST_Select[0] = XOR of addr{8, 12, 15, 22, 29, 36, 43}
+ * COH_ST_Select[1] = XOR of addr{9, 13, 16, 23, 30, 37, 44}
+ * COH_ST_Select[2] = XOR of addr{10, 14, 17, 24, 31, 38, 45}
+ * COH_ST_Select[3] = XOR of addr{11, 18, 25, 32, 39, 46}
+ * COH_ST_Select[4] = XOR of addr{14, 19, 26, 33, 40, 47} aka Stack
+ * DieID[0] = XOR of addr{12, 20, 27, 34, 41 }
+ * DieID[1] = XOR of addr{13, 21, 28, 35, 42 }
+ */
+static int mi300_dehash_addr(struct addr_ctx *ctx)
+{
+ bool hash_ctl_4k, hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
+ bool hashed_bit, intlv_bit, test_bit;
+ u8 num_intlv_bits, base_bit, i;
+
+ if (!map_bits_valid(ctx, 8, 8, 4, 1))
+ return -EINVAL;
+
+ hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl);
+ hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
+ hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
+
+ /* Channel bits */
+ num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
+
+ for (i = 0; i < num_intlv_bits; i++) {
+ base_bit = 8 + i;
+
+ /* COH_ST_Select[4] jumps to a base bit of 14. */
+ if (i == 4)
+ base_bit = 14;
+
+ intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr;
+
+ hashed_bit = intlv_bit;
+
+ /* 4k hash bit only applies to the first 3 bits. */
+ if (i <= 2) {
+ test_bit = BIT_ULL(12 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_4k;
+ }
+
+ /* Use temporary 'test_bit' value to avoid Sparse warnings. */
+ test_bit = BIT_ULL(15 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_64k;
+ test_bit = BIT_ULL(22 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_2M;
+ test_bit = BIT_ULL(29 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_1G;
+ test_bit = BIT_ULL(36 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_1T;
+ test_bit = BIT_ULL(43 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_1T;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(base_bit);
+ }
+
+ /* Die bits */
+ num_intlv_bits = ilog2(ctx->map.num_intlv_dies);
+
+ for (i = 0; i < num_intlv_bits; i++) {
+ base_bit = 12 + i;
+
+ intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr;
+
+ hashed_bit = intlv_bit;
+
+ test_bit = BIT_ULL(20 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_64k;
+ test_bit = BIT_ULL(27 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_2M;
+ test_bit = BIT_ULL(34 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_1G;
+ test_bit = BIT_ULL(41 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_1T;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(base_bit);
+ }
+
+ return 0;
+}
+
+int dehash_address(struct addr_ctx *ctx)
+{
+ switch (ctx->map.intlv_mode) {
+ /* No hashing cases. */
+ case NONE:
+ case NOHASH_2CHAN:
+ case NOHASH_4CHAN:
+ case NOHASH_8CHAN:
+ case NOHASH_16CHAN:
+ case NOHASH_32CHAN:
+ /* Hashing bits handled earlier during CS ID calculation. */
+ case DF4_NPS4_3CHAN_HASH:
+ case DF4_NPS2_5CHAN_HASH:
+ case DF4_NPS2_6CHAN_HASH:
+ case DF4_NPS1_10CHAN_HASH:
+ case DF4_NPS1_12CHAN_HASH:
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ /* No hash physical address bits, so nothing to do. */
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ return 0;
+
+ case DF2_2CHAN_HASH:
+ return df2_dehash_addr(ctx);
+
+ case DF3_COD4_2CHAN_HASH:
+ case DF3_COD2_4CHAN_HASH:
+ case DF3_COD1_8CHAN_HASH:
+ return df3_dehash_addr(ctx);
+
+ case DF3_6CHAN:
+ return df3_6chan_dehash_addr(ctx);
+
+ case DF4_NPS4_2CHAN_HASH:
+ case DF4_NPS2_4CHAN_HASH:
+ case DF4_NPS1_8CHAN_HASH:
+ return df4_dehash_addr(ctx);
+
+ case DF4p5_NPS4_2CHAN_1K_HASH:
+ case DF4p5_NPS4_2CHAN_2K_HASH:
+ case DF4p5_NPS2_4CHAN_2K_HASH:
+ case DF4p5_NPS2_4CHAN_1K_HASH:
+ case DF4p5_NPS1_8CHAN_1K_HASH:
+ case DF4p5_NPS1_8CHAN_2K_HASH:
+ case DF4p5_NPS1_16CHAN_1K_HASH:
+ case DF4p5_NPS1_16CHAN_2K_HASH:
+ return df4p5_dehash_addr(ctx);
+
+ case MI3_HASH_8CHAN:
+ case MI3_HASH_16CHAN:
+ case MI3_HASH_32CHAN:
+ return mi300_dehash_addr(ctx);
+
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+}
diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c
new file mode 100644
index 000000000000..e279224288d6
--- /dev/null
+++ b/drivers/ras/amd/atl/denormalize.c
@@ -0,0 +1,718 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Address Translation Library
+ *
+ * denormalize.c : Functions to account for interleaving bits
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ */
+
+#include "internal.h"
+
+/*
+ * Returns the Destination Fabric ID. This is the first (lowest)
+ * COH_ST Fabric ID used within a DRAM Address map.
+ */
+static u16 get_dst_fabric_id(struct addr_ctx *ctx)
+{
+ switch (df_cfg.rev) {
+ case DF2: return FIELD_GET(DF2_DST_FABRIC_ID, ctx->map.limit);
+ case DF3: return FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit);
+ case DF3p5: return FIELD_GET(DF3p5_DST_FABRIC_ID, ctx->map.limit);
+ case DF4: return FIELD_GET(DF4_DST_FABRIC_ID, ctx->map.ctl);
+ case DF4p5: return FIELD_GET(DF4p5_DST_FABRIC_ID, ctx->map.ctl);
+ default:
+ atl_debug_on_bad_df_rev();
+ return 0;
+ }
+}
+
+/*
+ * Make a contiguous gap in address for N bits starting at bit P.
+ *
+ * Example:
+ * address bits: [20:0]
+ * # of interleave bits (n): 3
+ * starting interleave bit (p): 8
+ *
+ * expanded address bits: [20+n : n+p][n+p-1 : p][p-1 : 0]
+ * [23 : 11][10 : 8][7 : 0]
+ */
+static u64 make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx *ctx)
+{
+ return expand_bits(ctx->map.intlv_bit_pos,
+ ctx->map.total_intlv_bits,
+ ctx->ret_addr);
+}
+
+/*
+ * Make two gaps in address for N bits.
+ * First gap is a single bit at bit P.
+ * Second gap is the remaining N-1 bits at bit 12.
+ *
+ * Example:
+ * address bits: [20:0]
+ * # of interleave bits (n): 3
+ * starting interleave bit (p): 8
+ *
+ * First gap
+ * expanded address bits: [20+1 : p+1][p][p-1 : 0]
+ * [21 : 9][8][7 : 0]
+ *
+ * Second gap uses result from first.
+ * r = n - 1; remaining interleave bits
+ * expanded address bits: [21+r : 12+r][12+r-1: 12][11 : 0]
+ * [23 : 14][13 : 12][11 : 0]
+ */
+static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx)
+{
+ /* Make a single space at the interleave bit. */
+ u64 denorm_addr = expand_bits(ctx->map.intlv_bit_pos, 1, ctx->ret_addr);
+
+ /* Done if there's only a single interleave bit. */
+ if (ctx->map.total_intlv_bits <= 1)
+ return denorm_addr;
+
+ /* Make spaces for the remaining interleave bits starting at bit 12. */
+ return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr);
+}
+
+/*
+ * Make space for CS ID at bits [14:8] as follows:
+ *
+ * 8 channels -> bits [10:8]
+ * 16 channels -> bits [11:8]
+ * 32 channels -> bits [14,11:8]
+ *
+ * 1 die -> N/A
+ * 2 dies -> bit [12]
+ * 4 dies -> bits [13:12]
+ */
+static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx)
+{
+ u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
+ u64 denorm_addr;
+
+ if (ctx->map.intlv_bit_pos != 8) {
+ pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
+ return ~0ULL;
+ }
+
+ /* Channel bits. Covers up to 4 bits at [11:8]. */
+ denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr);
+
+ /* Die bits. Always starts at [12]. */
+ denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr);
+
+ /* Additional channel bit at [14]. */
+ if (num_intlv_bits > 4)
+ denorm_addr = expand_bits(14, 1, denorm_addr);
+
+ return denorm_addr;
+}
+
+/*
+ * Take the current calculated address and shift enough bits in the middle
+ * to make a gap where the interleave bits will be inserted.
+ */
+static u64 make_space_for_coh_st_id(struct addr_ctx *ctx)
+{
+ switch (ctx->map.intlv_mode) {
+ case NOHASH_2CHAN:
+ case NOHASH_4CHAN:
+ case NOHASH_8CHAN:
+ case NOHASH_16CHAN:
+ case NOHASH_32CHAN:
+ case DF2_2CHAN_HASH:
+ return make_space_for_coh_st_id_at_intlv_bit(ctx);
+
+ case DF3_COD4_2CHAN_HASH:
+ case DF3_COD2_4CHAN_HASH:
+ case DF3_COD1_8CHAN_HASH:
+ case DF4_NPS4_2CHAN_HASH:
+ case DF4_NPS2_4CHAN_HASH:
+ case DF4_NPS1_8CHAN_HASH:
+ case DF4p5_NPS4_2CHAN_1K_HASH:
+ case DF4p5_NPS4_2CHAN_2K_HASH:
+ case DF4p5_NPS2_4CHAN_2K_HASH:
+ case DF4p5_NPS1_8CHAN_2K_HASH:
+ case DF4p5_NPS1_16CHAN_2K_HASH:
+ return make_space_for_coh_st_id_split_2_1(ctx);
+
+ case MI3_HASH_8CHAN:
+ case MI3_HASH_16CHAN:
+ case MI3_HASH_32CHAN:
+ return make_space_for_coh_st_id_mi300(ctx);
+
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return ~0ULL;
+ }
+}
+
+static u16 get_coh_st_id_df2(struct addr_ctx *ctx)
+{
+ u8 num_socket_intlv_bits = ilog2(ctx->map.num_intlv_sockets);
+ u8 num_die_intlv_bits = ilog2(ctx->map.num_intlv_dies);
+ u8 num_intlv_bits;
+ u16 coh_st_id, mask;
+
+ coh_st_id = ctx->coh_st_fabric_id - get_dst_fabric_id(ctx);
+
+ /* Channel interleave bits */
+ num_intlv_bits = order_base_2(ctx->map.num_intlv_chan);
+ mask = GENMASK(num_intlv_bits - 1, 0);
+ coh_st_id &= mask;
+
+ /* Die interleave bits */
+ if (num_die_intlv_bits) {
+ u16 die_bits;
+
+ mask = GENMASK(num_die_intlv_bits - 1, 0);
+ die_bits = ctx->coh_st_fabric_id & df_cfg.die_id_mask;
+ die_bits >>= df_cfg.die_id_shift;
+
+ coh_st_id |= (die_bits & mask) << num_intlv_bits;
+ num_intlv_bits += num_die_intlv_bits;
+ }
+
+ /* Socket interleave bits */
+ if (num_socket_intlv_bits) {
+ u16 socket_bits;
+
+ mask = GENMASK(num_socket_intlv_bits - 1, 0);
+ socket_bits = ctx->coh_st_fabric_id & df_cfg.socket_id_mask;
+ socket_bits >>= df_cfg.socket_id_shift;
+
+ coh_st_id |= (socket_bits & mask) << num_intlv_bits;
+ }
+
+ return coh_st_id;
+}
+
+static u16 get_coh_st_id_df4(struct addr_ctx *ctx)
+{
+ /*
+ * Start with the original component mask and the number of interleave
+ * bits for the channels in this map.
+ */
+ u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
+ u16 mask = df_cfg.component_id_mask;
+
+ u16 socket_bits;
+
+ /* Set the derived Coherent Station ID to the input Coherent Station Fabric ID. */
+ u16 coh_st_id = ctx->coh_st_fabric_id & mask;
+
+ /*
+ * Subtract the "base" Destination Fabric ID.
+ * This accounts for systems with disabled Coherent Stations.
+ */
+ coh_st_id -= get_dst_fabric_id(ctx) & mask;
+
+ /*
+ * Generate and use a new mask based on the number of bits
+ * needed for channel interleaving in this map.
+ */
+ mask = GENMASK(num_intlv_bits - 1, 0);
+ coh_st_id &= mask;
+
+ /* Done if socket interleaving is not enabled. */
+ if (ctx->map.num_intlv_sockets <= 1)
+ return coh_st_id;
+
+ /*
+ * Figure out how many bits are needed for the number of
+ * interleaved sockets. And shift the derived Coherent Station ID to account
+ * for these.
+ */
+ num_intlv_bits = ilog2(ctx->map.num_intlv_sockets);
+ coh_st_id <<= num_intlv_bits;
+
+ /* Generate a new mask for the socket interleaving bits. */
+ mask = GENMASK(num_intlv_bits - 1, 0);
+
+ /* Get the socket interleave bits from the original Coherent Station Fabric ID. */
+ socket_bits = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask) >> df_cfg.socket_id_shift;
+
+ /* Apply the appropriate socket bits to the derived Coherent Station ID. */
+ coh_st_id |= socket_bits & mask;
+
+ return coh_st_id;
+}
+
+/*
+ * MI300 hash has:
+ * (C)hannel[3:0] = coh_st_id[3:0]
+ * (S)tack[0] = coh_st_id[4]
+ * (D)ie[1:0] = coh_st_id[6:5]
+ *
+ * Hashed coh_st_id is swizzled so that Stack bit is at the end.
+ * coh_st_id = SDDCCCC
+ */
+static u16 get_coh_st_id_mi300(struct addr_ctx *ctx)
+{
+ u8 channel_bits, die_bits, stack_bit;
+ u16 die_id;
+
+ /* Subtract the "base" Destination Fabric ID. */
+ ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx);
+
+ die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift;
+
+ channel_bits = FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id);
+ stack_bit = FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6;
+ die_bits = die_id << 4;
+
+ return stack_bit | die_bits | channel_bits;
+}
+
+/*
+ * Derive the correct Coherent Station ID that represents the interleave bits
+ * used within the system physical address. This accounts for the
+ * interleave mode, number of interleaved channels/dies/sockets, and
+ * other system/mode-specific bit swizzling.
+ *
+ * Returns: Coherent Station ID on success.
+ * All bits set on error.
+ */
+static u16 calculate_coh_st_id(struct addr_ctx *ctx)
+{
+ switch (ctx->map.intlv_mode) {
+ case NOHASH_2CHAN:
+ case NOHASH_4CHAN:
+ case NOHASH_8CHAN:
+ case NOHASH_16CHAN:
+ case NOHASH_32CHAN:
+ case DF3_COD4_2CHAN_HASH:
+ case DF3_COD2_4CHAN_HASH:
+ case DF3_COD1_8CHAN_HASH:
+ case DF2_2CHAN_HASH:
+ return get_coh_st_id_df2(ctx);
+
+ case DF4_NPS4_2CHAN_HASH:
+ case DF4_NPS2_4CHAN_HASH:
+ case DF4_NPS1_8CHAN_HASH:
+ case DF4p5_NPS4_2CHAN_1K_HASH:
+ case DF4p5_NPS4_2CHAN_2K_HASH:
+ case DF4p5_NPS2_4CHAN_2K_HASH:
+ case DF4p5_NPS1_8CHAN_2K_HASH:
+ case DF4p5_NPS1_16CHAN_2K_HASH:
+ return get_coh_st_id_df4(ctx);
+
+ case MI3_HASH_8CHAN:
+ case MI3_HASH_16CHAN:
+ case MI3_HASH_32CHAN:
+ return get_coh_st_id_mi300(ctx);
+
+ /* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */
+ case DF4p5_NPS2_4CHAN_1K_HASH:
+ case DF4p5_NPS1_8CHAN_1K_HASH:
+ case DF4p5_NPS1_16CHAN_1K_HASH:
+ return ctx->coh_st_fabric_id - get_dst_fabric_id(ctx);
+
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return ~0;
+ }
+}
+
+static u64 insert_coh_st_id_at_intlv_bit(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
+{
+ return denorm_addr | (coh_st_id << ctx->map.intlv_bit_pos);
+}
+
+static u64 insert_coh_st_id_split_2_1(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
+{
+ /* Insert coh_st_id[0] at the interleave bit. */
+ denorm_addr |= (coh_st_id & BIT(0)) << ctx->map.intlv_bit_pos;
+
+ /* Insert coh_st_id[2:1] at bit 12. */
+ denorm_addr |= (coh_st_id & GENMASK(2, 1)) << 11;
+
+ return denorm_addr;
+}
+
+static u64 insert_coh_st_id_split_2_2(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
+{
+ /* Insert coh_st_id[1:0] at bit 8. */
+ denorm_addr |= (coh_st_id & GENMASK(1, 0)) << 8;
+
+ /*
+ * Insert coh_st_id[n:2] at bit 12. 'n' could be 2 or 3.
+ * Grab both because bit 3 will be clear if unused.
+ */
+ denorm_addr |= (coh_st_id & GENMASK(3, 2)) << 10;
+
+ return denorm_addr;
+}
+
+static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
+{
+ switch (ctx->map.intlv_mode) {
+ case NOHASH_2CHAN:
+ case NOHASH_4CHAN:
+ case NOHASH_8CHAN:
+ case NOHASH_16CHAN:
+ case NOHASH_32CHAN:
+ case MI3_HASH_8CHAN:
+ case MI3_HASH_16CHAN:
+ case MI3_HASH_32CHAN:
+ case DF2_2CHAN_HASH:
+ return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id);
+
+ case DF3_COD4_2CHAN_HASH:
+ case DF3_COD2_4CHAN_HASH:
+ case DF3_COD1_8CHAN_HASH:
+ case DF4_NPS4_2CHAN_HASH:
+ case DF4_NPS2_4CHAN_HASH:
+ case DF4_NPS1_8CHAN_HASH:
+ case DF4p5_NPS4_2CHAN_1K_HASH:
+ case DF4p5_NPS4_2CHAN_2K_HASH:
+ case DF4p5_NPS2_4CHAN_2K_HASH:
+ case DF4p5_NPS1_8CHAN_2K_HASH:
+ case DF4p5_NPS1_16CHAN_2K_HASH:
+ return insert_coh_st_id_split_2_1(ctx, denorm_addr, coh_st_id);
+
+ case DF4p5_NPS2_4CHAN_1K_HASH:
+ case DF4p5_NPS1_8CHAN_1K_HASH:
+ case DF4p5_NPS1_16CHAN_1K_HASH:
+ return insert_coh_st_id_split_2_2(ctx, denorm_addr, coh_st_id);
+
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return ~0ULL;
+ }
+}
+
+/*
+ * MI300 systems have a fixed, hardware-defined physical-to-logical
+ * Coherent Station mapping. The Remap registers are not used.
+ */
+static const u16 phy_to_log_coh_st_map_mi300[] = {
+ 12, 13, 14, 15,
+ 8, 9, 10, 11,
+ 4, 5, 6, 7,
+ 0, 1, 2, 3,
+ 28, 29, 30, 31,
+ 24, 25, 26, 27,
+ 20, 21, 22, 23,
+ 16, 17, 18, 19,
+};
+
+static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx)
+{
+ if (ctx->inst_id >= ARRAY_SIZE(phy_to_log_coh_st_map_mi300)) {
+ atl_debug(ctx, "Instance ID out of range");
+ return ~0;
+ }
+
+ return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift);
+}
+
+static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
+{
+ u16 component_id, log_fabric_id;
+
+ /* Start with the physical COH_ST Fabric ID. */
+ u16 phys_fabric_id = ctx->coh_st_fabric_id;
+
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
+ return get_logical_coh_st_fabric_id_mi300(ctx);
+
+ /* Skip logical ID lookup if remapping is disabled. */
+ if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) &&
+ ctx->map.intlv_mode != DF3_6CHAN)
+ return phys_fabric_id;
+
+ /* Mask off the Node ID bits to get the "local" Component ID. */
+ component_id = phys_fabric_id & df_cfg.component_id_mask;
+
+ /*
+ * Search the list of logical Component IDs for the one that
+ * matches this physical Component ID.
+ */
+ for (log_fabric_id = 0; log_fabric_id < MAX_COH_ST_CHANNELS; log_fabric_id++) {
+ if (ctx->map.remap_array[log_fabric_id] == component_id)
+ break;
+ }
+
+ if (log_fabric_id == MAX_COH_ST_CHANNELS)
+ atl_debug(ctx, "COH_ST remap entry not found for 0x%x",
+ log_fabric_id);
+
+ /* Get the Node ID bits from the physical and apply to the logical. */
+ return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
+}
+
+static int denorm_addr_common(struct addr_ctx *ctx)
+{
+ u64 denorm_addr;
+ u16 coh_st_id;
+
+ /*
+ * Convert the original physical COH_ST Fabric ID to a logical value.
+ * This is required for non-power-of-two and other interleaving modes.
+ */
+ ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx);
+
+ denorm_addr = make_space_for_coh_st_id(ctx);
+ coh_st_id = calculate_coh_st_id(ctx);
+ ctx->ret_addr = insert_coh_st_id(ctx, denorm_addr, coh_st_id);
+ return 0;
+}
+
+static int denorm_addr_df3_6chan(struct addr_ctx *ctx)
+{
+ u16 coh_st_id = ctx->coh_st_fabric_id & df_cfg.component_id_mask;
+ u8 total_intlv_bits = ctx->map.total_intlv_bits;
+ u8 low_bit, intlv_bit = ctx->map.intlv_bit_pos;
+ u64 msb_intlv_bits, temp_addr_a, temp_addr_b;
+ u8 np2_bits = ctx->map.np2_bits;
+
+ if (ctx->map.intlv_mode != DF3_6CHAN)
+ return -EINVAL;
+
+ /*
+ * 'np2_bits' holds the number of bits needed to cover the
+ * amount of memory (rounded up) in this map using 64K chunks.
+ *
+ * Example:
+ * Total memory in map: 6GB
+ * Rounded up to next power-of-2: 8GB
+ * Number of 64K chunks: 0x20000
+ * np2_bits = log2(# of chunks): 17
+ *
+ * Get the two most-significant interleave bits from the
+ * input address based on the following:
+ *
+ * [15 + np2_bits - total_intlv_bits : 14 + np2_bits - total_intlv_bits]
+ */
+ low_bit = 14 + np2_bits - total_intlv_bits;
+ msb_intlv_bits = ctx->ret_addr >> low_bit;
+ msb_intlv_bits &= 0x3;
+
+ /*
+ * If MSB are 11b, then logical COH_ST ID is 6 or 7.
+ * Need to adjust based on the mod3 result.
+ */
+ if (msb_intlv_bits == 3) {
+ u8 addr_mod, phys_addr_msb, msb_coh_st_id;
+
+ /* Get the remaining interleave bits from the input address. */
+ temp_addr_b = GENMASK_ULL(low_bit - 1, intlv_bit) & ctx->ret_addr;
+ temp_addr_b >>= intlv_bit;
+
+ /* Calculate the logical COH_ST offset based on mod3. */
+ addr_mod = temp_addr_b % 3;
+
+ /* Get COH_ST ID bits [2:1]. */
+ msb_coh_st_id = (coh_st_id >> 1) & 0x3;
+
+ /* Get the bit that starts the physical address bits. */
+ phys_addr_msb = (intlv_bit + np2_bits + 1);
+ phys_addr_msb &= BIT(0);
+ phys_addr_msb++;
+ phys_addr_msb *= 3 - addr_mod + msb_coh_st_id;
+ phys_addr_msb %= 3;
+
+ /* Move the physical address MSB to the correct place. */
+ temp_addr_b |= phys_addr_msb << (low_bit - total_intlv_bits - intlv_bit);
+
+ /* Generate a new COH_ST ID as follows: coh_st_id = [1, 1, coh_st_id[0]] */
+ coh_st_id &= BIT(0);
+ coh_st_id |= GENMASK(2, 1);
+ } else {
+ temp_addr_b = GENMASK_ULL(63, intlv_bit) & ctx->ret_addr;
+ temp_addr_b >>= intlv_bit;
+ }
+
+ temp_addr_a = GENMASK_ULL(intlv_bit - 1, 0) & ctx->ret_addr;
+ temp_addr_b <<= intlv_bit + total_intlv_bits;
+
+ ctx->ret_addr = temp_addr_a | temp_addr_b;
+ ctx->ret_addr |= coh_st_id << intlv_bit;
+ return 0;
+}
+
+static int denorm_addr_df4_np2(struct addr_ctx *ctx)
+{
+ bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
+ u16 group, group_offset, log_coh_st_offset;
+ unsigned int mod_value, shift_value;
+ u16 mask = df_cfg.component_id_mask;
+ u64 temp_addr_a, temp_addr_b;
+ bool hash_pa8, hashed_bit;
+
+ switch (ctx->map.intlv_mode) {
+ case DF4_NPS4_3CHAN_HASH:
+ mod_value = 3;
+ shift_value = 13;
+ break;
+ case DF4_NPS2_6CHAN_HASH:
+ mod_value = 3;
+ shift_value = 12;
+ break;
+ case DF4_NPS1_12CHAN_HASH:
+ mod_value = 3;
+ shift_value = 11;
+ break;
+ case DF4_NPS2_5CHAN_HASH:
+ mod_value = 5;
+ shift_value = 13;
+ break;
+ case DF4_NPS1_10CHAN_HASH:
+ mod_value = 5;
+ shift_value = 12;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ };
+
+ if (ctx->map.num_intlv_sockets == 1) {
+ hash_pa8 = BIT_ULL(shift_value) & ctx->ret_addr;
+ temp_addr_a = remove_bits(shift_value, shift_value, ctx->ret_addr);
+ } else {
+ hash_pa8 = ctx->coh_st_fabric_id & df_cfg.socket_id_mask;
+ temp_addr_a = ctx->ret_addr;
+ }
+
+ /* Make a gap for the real bit [8]. */
+ temp_addr_a = expand_bits(8, 1, temp_addr_a);
+
+ /* Make an additional gap for bits [13:12], as appropriate.*/
+ if (ctx->map.intlv_mode == DF4_NPS2_6CHAN_HASH ||
+ ctx->map.intlv_mode == DF4_NPS1_10CHAN_HASH) {
+ temp_addr_a = expand_bits(13, 1, temp_addr_a);
+ } else if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) {
+ temp_addr_a = expand_bits(12, 2, temp_addr_a);
+ }
+
+ /* Keep bits [13:0]. */
+ temp_addr_a &= GENMASK_ULL(13, 0);
+
+ /* Get the appropriate high bits. */
+ shift_value += 1 - ilog2(ctx->map.num_intlv_sockets);
+ temp_addr_b = GENMASK_ULL(63, shift_value) & ctx->ret_addr;
+ temp_addr_b >>= shift_value;
+ temp_addr_b *= mod_value;
+
+ /*
+ * Coherent Stations are divided into groups.
+ *
+ * Multiples of 3 (mod3) are divided into quadrants.
+ * e.g. NP4_3CHAN -> [0, 1, 2] [6, 7, 8]
+ * [3, 4, 5] [9, 10, 11]
+ *
+ * Multiples of 5 (mod5) are divided into sides.
+ * e.g. NP2_5CHAN -> [0, 1, 2, 3, 4] [5, 6, 7, 8, 9]
+ */
+
+ /*
+ * Calculate the logical offset for the COH_ST within its DRAM Address map.
+ * e.g. if map includes [5, 6, 7, 8, 9] and target instance is '8', then
+ * log_coh_st_offset = 8 - 5 = 3
+ */
+ log_coh_st_offset = (ctx->coh_st_fabric_id & mask) - (get_dst_fabric_id(ctx) & mask);
+
+ /*
+ * Figure out the group number.
+ *
+ * Following above example,
+ * log_coh_st_offset = 3
+ * mod_value = 5
+ * group = 3 / 5 = 0
+ */
+ group = log_coh_st_offset / mod_value;
+
+ /*
+ * Figure out the offset within the group.
+ *
+ * Following above example,
+ * log_coh_st_offset = 3
+ * mod_value = 5
+ * group_offset = 3 % 5 = 3
+ */
+ group_offset = log_coh_st_offset % mod_value;
+
+ /* Adjust group_offset if the hashed bit [8] is set. */
+ if (hash_pa8) {
+ if (!group_offset)
+ group_offset = mod_value - 1;
+ else
+ group_offset--;
+ }
+
+ /* Add in the group offset to the high bits. */
+ temp_addr_b += group_offset;
+
+ /* Shift the high bits to the proper starting position. */
+ temp_addr_b <<= 14;
+
+ /* Combine the high and low bits together. */
+ ctx->ret_addr = temp_addr_a | temp_addr_b;
+
+ /* Account for hashing here instead of in dehash_address(). */
+ hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
+
+ hashed_bit = !!hash_pa8;
+ hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr);
+ hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G;
+
+ ctx->ret_addr |= hashed_bit << 8;
+
+ /* Done for 3 and 5 channel. */
+ if (ctx->map.intlv_mode == DF4_NPS4_3CHAN_HASH ||
+ ctx->map.intlv_mode == DF4_NPS2_5CHAN_HASH)
+ return 0;
+
+ /* Select the proper 'group' bit to use for Bit 13. */
+ if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH)
+ hashed_bit = !!(group & BIT(1));
+ else
+ hashed_bit = group & BIT(0);
+
+ hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G;
+
+ ctx->ret_addr |= hashed_bit << 13;
+
+ /* Done for 6 and 10 channel. */
+ if (ctx->map.intlv_mode != DF4_NPS1_12CHAN_HASH)
+ return 0;
+
+ hashed_bit = group & BIT(0);
+ hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G;
+
+ ctx->ret_addr |= hashed_bit << 12;
+ return 0;
+}
+
+int denormalize_address(struct addr_ctx *ctx)
+{
+ switch (ctx->map.intlv_mode) {
+ case NONE:
+ return 0;
+ case DF4_NPS4_3CHAN_HASH:
+ case DF4_NPS2_6CHAN_HASH:
+ case DF4_NPS1_12CHAN_HASH:
+ case DF4_NPS2_5CHAN_HASH:
+ case DF4_NPS1_10CHAN_HASH:
+ return denorm_addr_df4_np2(ctx);
+ case DF3_6CHAN:
+ return denorm_addr_df3_6chan(ctx);
+ default:
+ return denorm_addr_common(ctx);
+ }
+}
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
new file mode 100644
index 000000000000..5de69e0bb0f9
--- /dev/null
+++ b/drivers/ras/amd/atl/internal.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Address Translation Library
+ *
+ * internal.h : Helper functions and common defines
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ */
+
+#ifndef __AMD_ATL_INTERNAL_H__
+#define __AMD_ATL_INTERNAL_H__
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/ras.h>
+
+#include <asm/amd_nb.h>
+
+#include "reg_fields.h"
+
+/* Maximum possible number of Coherent Stations within a single Data Fabric. */
+#define MAX_COH_ST_CHANNELS 32
+
+/* PCI ID for Zen4 Server DF Function 0. */
+#define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022
+
+/* PCI IDs for MI300 DF Function 0. */
+#define DF_FUNC0_ID_MI300 0x15281022
+
+/* Shift needed for adjusting register values to true values. */
+#define DF_DRAM_BASE_LIMIT_LSB 28
+#define MI300_DRAM_LIMIT_LSB 20
+
+enum df_revisions {
+ UNKNOWN,
+ DF2,
+ DF3,
+ DF3p5,
+ DF4,
+ DF4p5,
+};
+
+/* These are mapped 1:1 to the hardware values. Special cases are set at > 0x20. */
+enum intlv_modes {
+ NONE = 0x00,
+ NOHASH_2CHAN = 0x01,
+ NOHASH_4CHAN = 0x03,
+ NOHASH_8CHAN = 0x05,
+ DF3_6CHAN = 0x06,
+ NOHASH_16CHAN = 0x07,
+ NOHASH_32CHAN = 0x08,
+ DF3_COD4_2CHAN_HASH = 0x0C,
+ DF3_COD2_4CHAN_HASH = 0x0D,
+ DF3_COD1_8CHAN_HASH = 0x0E,
+ DF4_NPS4_2CHAN_HASH = 0x10,
+ DF4_NPS2_4CHAN_HASH = 0x11,
+ DF4_NPS1_8CHAN_HASH = 0x12,
+ DF4_NPS4_3CHAN_HASH = 0x13,
+ DF4_NPS2_6CHAN_HASH = 0x14,
+ DF4_NPS1_12CHAN_HASH = 0x15,
+ DF4_NPS2_5CHAN_HASH = 0x16,
+ DF4_NPS1_10CHAN_HASH = 0x17,
+ MI3_HASH_8CHAN = 0x18,
+ MI3_HASH_16CHAN = 0x19,
+ MI3_HASH_32CHAN = 0x1A,
+ DF2_2CHAN_HASH = 0x21,
+ /* DF4.5 modes are all IntLvNumChan + 0x20 */
+ DF4p5_NPS1_16CHAN_1K_HASH = 0x2C,
+ DF4p5_NPS0_24CHAN_1K_HASH = 0x2E,
+ DF4p5_NPS4_2CHAN_1K_HASH = 0x30,
+ DF4p5_NPS2_4CHAN_1K_HASH = 0x31,
+ DF4p5_NPS1_8CHAN_1K_HASH = 0x32,
+ DF4p5_NPS4_3CHAN_1K_HASH = 0x33,
+ DF4p5_NPS2_6CHAN_1K_HASH = 0x34,
+ DF4p5_NPS1_12CHAN_1K_HASH = 0x35,
+ DF4p5_NPS2_5CHAN_1K_HASH = 0x36,
+ DF4p5_NPS1_10CHAN_1K_HASH = 0x37,
+ DF4p5_NPS4_2CHAN_2K_HASH = 0x40,
+ DF4p5_NPS2_4CHAN_2K_HASH = 0x41,
+ DF4p5_NPS1_8CHAN_2K_HASH = 0x42,
+ DF4p5_NPS1_16CHAN_2K_HASH = 0x43,
+ DF4p5_NPS4_3CHAN_2K_HASH = 0x44,
+ DF4p5_NPS2_6CHAN_2K_HASH = 0x45,
+ DF4p5_NPS1_12CHAN_2K_HASH = 0x46,
+ DF4p5_NPS0_24CHAN_2K_HASH = 0x47,
+ DF4p5_NPS2_5CHAN_2K_HASH = 0x48,
+ DF4p5_NPS1_10CHAN_2K_HASH = 0x49,
+};
+
+struct df_flags {
+ __u8 legacy_ficaa : 1,
+ socket_id_shift_quirk : 1,
+ heterogeneous : 1,
+ __reserved_0 : 5;
+};
+
+struct df_config {
+ enum df_revisions rev;
+
+ /*
+ * These masks operate on the 16-bit Coherent Station IDs,
+ * e.g. Instance, Fabric, Destination, etc.
+ */
+ u16 component_id_mask;
+ u16 die_id_mask;
+ u16 node_id_mask;
+ u16 socket_id_mask;
+
+ /*
+ * Least-significant bit of Node ID portion of the
+ * system-wide Coherent Station Fabric ID.
+ */
+ u8 node_id_shift;
+
+ /*
+ * Least-significant bit of Die portion of the Node ID.
+ * Adjusted to include the Node ID shift in order to apply
+ * to the Coherent Station Fabric ID.
+ */
+ u8 die_id_shift;
+
+ /*
+ * Least-significant bit of Socket portion of the Node ID.
+ * Adjusted to include the Node ID shift in order to apply
+ * to the Coherent Station Fabric ID.
+ */
+ u8 socket_id_shift;
+
+ /* Number of DRAM Address maps visible in a Coherent Station. */
+ u8 num_coh_st_maps;
+
+ /* Global flags to handle special cases. */
+ struct df_flags flags;
+};
+
+extern struct df_config df_cfg;
+
+struct dram_addr_map {
+ /*
+ * Each DRAM Address Map can operate independently
+ * in different interleaving modes.
+ */
+ enum intlv_modes intlv_mode;
+
+ /* System-wide number for this address map. */
+ u8 num;
+
+ /* Raw register values */
+ u32 base;
+ u32 limit;
+ u32 ctl;
+ u32 intlv;
+
+ /*
+ * Logical to Physical Coherent Station Remapping array
+ *
+ * Index: Logical Coherent Station Instance ID
+ * Value: Physical Coherent Station Instance ID
+ *
+ * phys_coh_st_inst_id = remap_array[log_coh_st_inst_id]
+ */
+ u8 remap_array[MAX_COH_ST_CHANNELS];
+
+ /*
+ * Number of bits covering DRAM Address map 0
+ * when interleaving is non-power-of-2.
+ *
+ * Used only for DF3_6CHAN.
+ */
+ u8 np2_bits;
+
+ /* Position of the 'interleave bit'. */
+ u8 intlv_bit_pos;
+ /* Number of channels interleaved in this map. */
+ u8 num_intlv_chan;
+ /* Number of dies interleaved in this map. */
+ u8 num_intlv_dies;
+ /* Number of sockets interleaved in this map. */
+ u8 num_intlv_sockets;
+ /*
+ * Total number of channels interleaved accounting
+ * for die and socket interleaving.
+ */
+ u8 total_intlv_chan;
+ /* Total bits needed to cover 'total_intlv_chan'. */
+ u8 total_intlv_bits;
+};
+
+/* Original input values cached for debug printing. */
+struct addr_ctx_inputs {
+ u64 norm_addr;
+ u8 socket_id;
+ u8 die_id;
+ u8 coh_st_inst_id;
+};
+
+struct addr_ctx {
+ u64 ret_addr;
+
+ struct addr_ctx_inputs inputs;
+ struct dram_addr_map map;
+
+ /* AMD Node ID calculated from Socket and Die IDs. */
+ u8 node_id;
+
+ /*
+ * Coherent Station Instance ID
+ * Local ID used within a 'node'.
+ */
+ u16 inst_id;
+
+ /*
+ * Coherent Station Fabric ID
+ * System-wide ID that includes 'node' bits.
+ */
+ u16 coh_st_fabric_id;
+};
+
+int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo);
+int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo);
+
+int get_df_system_info(void);
+int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num);
+int get_addr_hash_mi300(void);
+
+int get_address_map(struct addr_ctx *ctx);
+
+int denormalize_address(struct addr_ctx *ctx);
+int dehash_address(struct addr_ctx *ctx);
+
+unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
+unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
+
+/*
+ * Make a gap in @data that is @num_bits long starting at @bit_num.
+ * e.g. data = 11111111'b
+ * bit_num = 3
+ * num_bits = 2
+ * result = 1111100111'b
+ */
+static inline u64 expand_bits(u8 bit_num, u8 num_bits, u64 data)
+{
+ u64 temp1, temp2;
+
+ if (!num_bits)
+ return data;
+
+ if (!bit_num) {
+ WARN_ON_ONCE(num_bits >= BITS_PER_LONG);
+ return data << num_bits;
+ }
+
+ WARN_ON_ONCE(bit_num >= BITS_PER_LONG);
+
+ temp1 = data & GENMASK_ULL(bit_num - 1, 0);
+
+ temp2 = data & GENMASK_ULL(63, bit_num);
+ temp2 <<= num_bits;
+
+ return temp1 | temp2;
+}
+
+/*
+ * Remove bits in @data between @low_bit and @high_bit inclusive.
+ * e.g. data = XXXYYZZZ'b
+ * low_bit = 3
+ * high_bit = 4
+ * result = XXXZZZ'b
+ */
+static inline u64 remove_bits(u8 low_bit, u8 high_bit, u64 data)
+{
+ u64 temp1, temp2;
+
+ WARN_ON_ONCE(high_bit >= BITS_PER_LONG);
+ WARN_ON_ONCE(low_bit >= BITS_PER_LONG);
+ WARN_ON_ONCE(low_bit > high_bit);
+
+ if (!low_bit)
+ return data >> (high_bit++);
+
+ temp1 = GENMASK_ULL(low_bit - 1, 0) & data;
+ temp2 = GENMASK_ULL(63, high_bit + 1) & data;
+ temp2 >>= high_bit - low_bit + 1;
+
+ return temp1 | temp2;
+}
+
+#define atl_debug(ctx, fmt, arg...) \
+ pr_debug("socket_id=%u die_id=%u coh_st_inst_id=%u norm_addr=0x%016llx: " fmt,\
+ (ctx)->inputs.socket_id, (ctx)->inputs.die_id,\
+ (ctx)->inputs.coh_st_inst_id, (ctx)->inputs.norm_addr, ##arg)
+
+static inline void atl_debug_on_bad_df_rev(void)
+{
+ pr_debug("Unrecognized DF rev: %u", df_cfg.rev);
+}
+
+static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx)
+{
+ atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode);
+}
+
+#endif /* __AMD_ATL_INTERNAL_H__ */
diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c
new file mode 100644
index 000000000000..8b908e8d7495
--- /dev/null
+++ b/drivers/ras/amd/atl/map.c
@@ -0,0 +1,682 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Address Translation Library
+ *
+ * map.c : Functions to read and decode DRAM address maps
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ */
+
+#include "internal.h"
+
+static int df2_get_intlv_mode(struct addr_ctx *ctx)
+{
+ ctx->map.intlv_mode = FIELD_GET(DF2_INTLV_NUM_CHAN, ctx->map.base);
+
+ if (ctx->map.intlv_mode == 8)
+ ctx->map.intlv_mode = DF2_2CHAN_HASH;
+
+ if (ctx->map.intlv_mode != NONE &&
+ ctx->map.intlv_mode != NOHASH_2CHAN &&
+ ctx->map.intlv_mode != DF2_2CHAN_HASH)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int df3_get_intlv_mode(struct addr_ctx *ctx)
+{
+ ctx->map.intlv_mode = FIELD_GET(DF3_INTLV_NUM_CHAN, ctx->map.base);
+ return 0;
+}
+
+static int df3p5_get_intlv_mode(struct addr_ctx *ctx)
+{
+ ctx->map.intlv_mode = FIELD_GET(DF3p5_INTLV_NUM_CHAN, ctx->map.base);
+
+ if (ctx->map.intlv_mode == DF3_6CHAN)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int df4_get_intlv_mode(struct addr_ctx *ctx)
+{
+ ctx->map.intlv_mode = FIELD_GET(DF4_INTLV_NUM_CHAN, ctx->map.intlv);
+
+ if (ctx->map.intlv_mode == DF3_COD4_2CHAN_HASH ||
+ ctx->map.intlv_mode == DF3_COD2_4CHAN_HASH ||
+ ctx->map.intlv_mode == DF3_COD1_8CHAN_HASH ||
+ ctx->map.intlv_mode == DF3_6CHAN)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int df4p5_get_intlv_mode(struct addr_ctx *ctx)
+{
+ ctx->map.intlv_mode = FIELD_GET(DF4p5_INTLV_NUM_CHAN, ctx->map.intlv);
+
+ if (ctx->map.intlv_mode <= NOHASH_32CHAN)
+ return 0;
+
+ if (ctx->map.intlv_mode >= MI3_HASH_8CHAN &&
+ ctx->map.intlv_mode <= MI3_HASH_32CHAN)
+ return 0;
+
+ /*
+ * Modes matching the ranges above are returned as-is.
+ *
+ * All other modes are "fixed up" by adding 20h to make a unique value.
+ */
+ ctx->map.intlv_mode += 0x20;
+
+ return 0;
+}
+
+static int get_intlv_mode(struct addr_ctx *ctx)
+{
+ int ret;
+
+ switch (df_cfg.rev) {
+ case DF2:
+ ret = df2_get_intlv_mode(ctx);
+ break;
+ case DF3:
+ ret = df3_get_intlv_mode(ctx);
+ break;
+ case DF3p5:
+ ret = df3p5_get_intlv_mode(ctx);
+ break;
+ case DF4:
+ ret = df4_get_intlv_mode(ctx);
+ break;
+ case DF4p5:
+ ret = df4p5_get_intlv_mode(ctx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ atl_debug_on_bad_df_rev();
+
+ return ret;
+}
+
+static u64 get_hi_addr_offset(u32 reg_dram_offset)
+{
+ u8 shift = DF_DRAM_BASE_LIMIT_LSB;
+ u64 hi_addr_offset;
+
+ switch (df_cfg.rev) {
+ case DF2:
+ hi_addr_offset = FIELD_GET(DF2_HI_ADDR_OFFSET, reg_dram_offset);
+ break;
+ case DF3:
+ case DF3p5:
+ hi_addr_offset = FIELD_GET(DF3_HI_ADDR_OFFSET, reg_dram_offset);
+ break;
+ case DF4:
+ case DF4p5:
+ hi_addr_offset = FIELD_GET(DF4_HI_ADDR_OFFSET, reg_dram_offset);
+ break;
+ default:
+ hi_addr_offset = 0;
+ atl_debug_on_bad_df_rev();
+ }
+
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
+ shift = MI300_DRAM_LIMIT_LSB;
+
+ return hi_addr_offset << shift;
+}
+
+/*
+ * Returns: 0 if offset is disabled.
+ * 1 if offset is enabled.
+ * -EINVAL on error.
+ */
+static int get_dram_offset(struct addr_ctx *ctx, u64 *norm_offset)
+{
+ u32 reg_dram_offset;
+ u8 map_num;
+
+ /* Should not be called for map 0. */
+ if (!ctx->map.num) {
+ atl_debug(ctx, "Trying to find DRAM offset for map 0");
+ return -EINVAL;
+ }
+
+ /*
+ * DramOffset registers don't exist for map 0, so the base register
+ * actually refers to map 1.
+ * Adjust the map_num for the register offsets.
+ */
+ map_num = ctx->map.num - 1;
+
+ if (df_cfg.rev >= DF4) {
+ /* Read D18F7x140 (DramOffset) */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0x140 + (4 * map_num),
+ ctx->inst_id, &reg_dram_offset))
+ return -EINVAL;
+
+ } else {
+ /* Read D18F0x1B4 (DramOffset) */
+ if (df_indirect_read_instance(ctx->node_id, 0, 0x1B4 + (4 * map_num),
+ ctx->inst_id, &reg_dram_offset))
+ return -EINVAL;
+ }
+
+ if (!FIELD_GET(DF_HI_ADDR_OFFSET_EN, reg_dram_offset))
+ return 0;
+
+ *norm_offset = get_hi_addr_offset(reg_dram_offset);
+
+ return 1;
+}
+
+static int df3_6ch_get_dram_addr_map(struct addr_ctx *ctx)
+{
+ u16 dst_fabric_id = FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit);
+ u8 i, j, shift = 4, mask = 0xF;
+ u32 reg, offset = 0x60;
+ u16 dst_node_id;
+
+ /* Get Socket 1 register. */
+ if (dst_fabric_id & df_cfg.socket_id_mask)
+ offset = 0x68;
+
+ /* Read D18F0x06{0,8} (DF::Skt0CsTargetRemap0)/(DF::Skt0CsTargetRemap1) */
+ if (df_indirect_read_broadcast(ctx->node_id, 0, offset, &reg))
+ return -EINVAL;
+
+ /* Save 8 remap entries. */
+ for (i = 0, j = 0; i < 8; i++, j++)
+ ctx->map.remap_array[i] = (reg >> (j * shift)) & mask;
+
+ dst_node_id = dst_fabric_id & df_cfg.node_id_mask;
+ dst_node_id >>= df_cfg.node_id_shift;
+
+ /* Read D18F2x090 (DF::Np2ChannelConfig) */
+ if (df_indirect_read_broadcast(dst_node_id, 2, 0x90, &reg))
+ return -EINVAL;
+
+ ctx->map.np2_bits = FIELD_GET(DF_LOG2_ADDR_64K_SPACE0, reg);
+ return 0;
+}
+
+static int df2_get_dram_addr_map(struct addr_ctx *ctx)
+{
+ /* Read D18F0x110 (DramBaseAddress). */
+ if (df_indirect_read_instance(ctx->node_id, 0, 0x110 + (8 * ctx->map.num),
+ ctx->inst_id, &ctx->map.base))
+ return -EINVAL;
+
+ /* Read D18F0x114 (DramLimitAddress). */
+ if (df_indirect_read_instance(ctx->node_id, 0, 0x114 + (8 * ctx->map.num),
+ ctx->inst_id, &ctx->map.limit))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int df3_get_dram_addr_map(struct addr_ctx *ctx)
+{
+ if (df2_get_dram_addr_map(ctx))
+ return -EINVAL;
+
+ /* Read D18F0x3F8 (DfGlobalCtl). */
+ if (df_indirect_read_instance(ctx->node_id, 0, 0x3F8,
+ ctx->inst_id, &ctx->map.ctl))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int df4_get_dram_addr_map(struct addr_ctx *ctx)
+{
+ u8 remap_sel, i, j, shift = 4, mask = 0xF;
+ u32 remap_reg;
+
+ /* Read D18F7xE00 (DramBaseAddress). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0xE00 + (16 * ctx->map.num),
+ ctx->inst_id, &ctx->map.base))
+ return -EINVAL;
+
+ /* Read D18F7xE04 (DramLimitAddress). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0xE04 + (16 * ctx->map.num),
+ ctx->inst_id, &ctx->map.limit))
+ return -EINVAL;
+
+ /* Read D18F7xE08 (DramAddressCtl). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0xE08 + (16 * ctx->map.num),
+ ctx->inst_id, &ctx->map.ctl))
+ return -EINVAL;
+
+ /* Read D18F7xE0C (DramAddressIntlv). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0xE0C + (16 * ctx->map.num),
+ ctx->inst_id, &ctx->map.intlv))
+ return -EINVAL;
+
+ /* Check if Remap Enable bit is valid. */
+ if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl))
+ return 0;
+
+ /* Fill with bogus values, because '0' is a valid value. */
+ memset(&ctx->map.remap_array, 0xFF, sizeof(ctx->map.remap_array));
+
+ /* Get Remap registers. */
+ remap_sel = FIELD_GET(DF4_REMAP_SEL, ctx->map.ctl);
+
+ /* Read D18F7x180 (CsTargetRemap0A). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0x180 + (8 * remap_sel),
+ ctx->inst_id, &remap_reg))
+ return -EINVAL;
+
+ /* Save first 8 remap entries. */
+ for (i = 0, j = 0; i < 8; i++, j++)
+ ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask;
+
+ /* Read D18F7x184 (CsTargetRemap0B). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0x184 + (8 * remap_sel),
+ ctx->inst_id, &remap_reg))
+ return -EINVAL;
+
+ /* Save next 8 remap entries. */
+ for (i = 8, j = 0; i < 16; i++, j++)
+ ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask;
+
+ return 0;
+}
+
+static int df4p5_get_dram_addr_map(struct addr_ctx *ctx)
+{
+ u8 remap_sel, i, j, shift = 5, mask = 0x1F;
+ u32 remap_reg;
+
+ /* Read D18F7x200 (DramBaseAddress). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0x200 + (16 * ctx->map.num),
+ ctx->inst_id, &ctx->map.base))
+ return -EINVAL;
+
+ /* Read D18F7x204 (DramLimitAddress). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0x204 + (16 * ctx->map.num),
+ ctx->inst_id, &ctx->map.limit))
+ return -EINVAL;
+
+ /* Read D18F7x208 (DramAddressCtl). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0x208 + (16 * ctx->map.num),
+ ctx->inst_id, &ctx->map.ctl))
+ return -EINVAL;
+
+ /* Read D18F7x20C (DramAddressIntlv). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0x20C + (16 * ctx->map.num),
+ ctx->inst_id, &ctx->map.intlv))
+ return -EINVAL;
+
+ /* Check if Remap Enable bit is valid. */
+ if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl))
+ return 0;
+
+ /* Fill with bogus values, because '0' is a valid value. */
+ memset(&ctx->map.remap_array, 0xFF, sizeof(ctx->map.remap_array));
+
+ /* Get Remap registers. */
+ remap_sel = FIELD_GET(DF4p5_REMAP_SEL, ctx->map.ctl);
+
+ /* Read D18F7x180 (CsTargetRemap0A). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0x180 + (24 * remap_sel),
+ ctx->inst_id, &remap_reg))
+ return -EINVAL;
+
+ /* Save first 6 remap entries. */
+ for (i = 0, j = 0; i < 6; i++, j++)
+ ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask;
+
+ /* Read D18F7x184 (CsTargetRemap0B). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0x184 + (24 * remap_sel),
+ ctx->inst_id, &remap_reg))
+ return -EINVAL;
+
+ /* Save next 6 remap entries. */
+ for (i = 6, j = 0; i < 12; i++, j++)
+ ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask;
+
+ /* Read D18F7x188 (CsTargetRemap0C). */
+ if (df_indirect_read_instance(ctx->node_id, 7, 0x188 + (24 * remap_sel),
+ ctx->inst_id, &remap_reg))
+ return -EINVAL;
+
+ /* Save next 6 remap entries. */
+ for (i = 12, j = 0; i < 18; i++, j++)
+ ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask;
+
+ return 0;
+}
+
+static int get_dram_addr_map(struct addr_ctx *ctx)
+{
+ switch (df_cfg.rev) {
+ case DF2: return df2_get_dram_addr_map(ctx);
+ case DF3:
+ case DF3p5: return df3_get_dram_addr_map(ctx);
+ case DF4: return df4_get_dram_addr_map(ctx);
+ case DF4p5: return df4p5_get_dram_addr_map(ctx);
+ default:
+ atl_debug_on_bad_df_rev();
+ return -EINVAL;
+ }
+}
+
+static int get_coh_st_fabric_id(struct addr_ctx *ctx)
+{
+ u32 reg;
+
+ /*
+ * On MI300 systems, the Coherent Station Fabric ID is derived
+ * later. And it does not depend on the register value.
+ */
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
+ return 0;
+
+ /* Read D18F0x50 (FabricBlockInstanceInformation3). */
+ if (df_indirect_read_instance(ctx->node_id, 0, 0x50, ctx->inst_id, &reg))
+ return -EINVAL;
+
+ if (df_cfg.rev < DF4p5)
+ ctx->coh_st_fabric_id = FIELD_GET(DF2_COH_ST_FABRIC_ID, reg);
+ else
+ ctx->coh_st_fabric_id = FIELD_GET(DF4p5_COH_ST_FABRIC_ID, reg);
+
+ return 0;
+}
+
+static int find_normalized_offset(struct addr_ctx *ctx, u64 *norm_offset)
+{
+ u64 last_offset = 0;
+ int ret;
+
+ for (ctx->map.num = 1; ctx->map.num < df_cfg.num_coh_st_maps; ctx->map.num++) {
+ ret = get_dram_offset(ctx, norm_offset);
+ if (ret < 0)
+ return ret;
+
+ /* Continue search if this map's offset is not enabled. */
+ if (!ret)
+ continue;
+
+ /* Enabled offsets should never be 0. */
+ if (*norm_offset == 0) {
+ atl_debug(ctx, "Enabled map %u offset is 0", ctx->map.num);
+ return -EINVAL;
+ }
+
+ /* Offsets should always increase from one map to the next. */
+ if (*norm_offset <= last_offset) {
+ atl_debug(ctx, "Map %u offset (0x%016llx) <= previous (0x%016llx)",
+ ctx->map.num, *norm_offset, last_offset);
+ return -EINVAL;
+ }
+
+ /* Match if this map's offset is less than the current calculated address. */
+ if (ctx->ret_addr >= *norm_offset)
+ break;
+
+ last_offset = *norm_offset;
+ }
+
+ /*
+ * Finished search without finding a match.
+ * Reset to map 0 and no offset.
+ */
+ if (ctx->map.num >= df_cfg.num_coh_st_maps) {
+ ctx->map.num = 0;
+ *norm_offset = 0;
+ }
+
+ return 0;
+}
+
+static bool valid_map(struct addr_ctx *ctx)
+{
+ if (df_cfg.rev >= DF4)
+ return FIELD_GET(DF_ADDR_RANGE_VAL, ctx->map.ctl);
+ else
+ return FIELD_GET(DF_ADDR_RANGE_VAL, ctx->map.base);
+}
+
+static int get_address_map_common(struct addr_ctx *ctx)
+{
+ u64 norm_offset = 0;
+
+ if (get_coh_st_fabric_id(ctx))
+ return -EINVAL;
+
+ if (find_normalized_offset(ctx, &norm_offset))
+ return -EINVAL;
+
+ if (get_dram_addr_map(ctx))
+ return -EINVAL;
+
+ if (!valid_map(ctx))
+ return -EINVAL;
+
+ ctx->ret_addr -= norm_offset;
+
+ return 0;
+}
+
+static u8 get_num_intlv_chan(struct addr_ctx *ctx)
+{
+ switch (ctx->map.intlv_mode) {
+ case NONE:
+ return 1;
+ case NOHASH_2CHAN:
+ case DF2_2CHAN_HASH:
+ case DF3_COD4_2CHAN_HASH:
+ case DF4_NPS4_2CHAN_HASH:
+ case DF4p5_NPS4_2CHAN_1K_HASH:
+ case DF4p5_NPS4_2CHAN_2K_HASH:
+ return 2;
+ case DF4_NPS4_3CHAN_HASH:
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ return 3;
+ case NOHASH_4CHAN:
+ case DF3_COD2_4CHAN_HASH:
+ case DF4_NPS2_4CHAN_HASH:
+ case DF4p5_NPS2_4CHAN_1K_HASH:
+ case DF4p5_NPS2_4CHAN_2K_HASH:
+ return 4;
+ case DF4_NPS2_5CHAN_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ return 5;
+ case DF3_6CHAN:
+ case DF4_NPS2_6CHAN_HASH:
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ return 6;
+ case NOHASH_8CHAN:
+ case DF3_COD1_8CHAN_HASH:
+ case DF4_NPS1_8CHAN_HASH:
+ case MI3_HASH_8CHAN:
+ case DF4p5_NPS1_8CHAN_1K_HASH:
+ case DF4p5_NPS1_8CHAN_2K_HASH:
+ return 8;
+ case DF4_NPS1_10CHAN_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ return 10;
+ case DF4_NPS1_12CHAN_HASH:
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ return 12;
+ case NOHASH_16CHAN:
+ case MI3_HASH_16CHAN:
+ case DF4p5_NPS1_16CHAN_1K_HASH:
+ case DF4p5_NPS1_16CHAN_2K_HASH:
+ return 16;
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ return 24;
+ case NOHASH_32CHAN:
+ case MI3_HASH_32CHAN:
+ return 32;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return 0;
+ }
+}
+
+static void calculate_intlv_bits(struct addr_ctx *ctx)
+{
+ ctx->map.num_intlv_chan = get_num_intlv_chan(ctx);
+
+ ctx->map.total_intlv_chan = ctx->map.num_intlv_chan;
+ ctx->map.total_intlv_chan *= ctx->map.num_intlv_dies;
+ ctx->map.total_intlv_chan *= ctx->map.num_intlv_sockets;
+
+ /*
+ * Get the number of bits needed to cover this many channels.
+ * order_base_2() rounds up automatically.
+ */
+ ctx->map.total_intlv_bits = order_base_2(ctx->map.total_intlv_chan);
+}
+
+static u8 get_intlv_bit_pos(struct addr_ctx *ctx)
+{
+ u8 addr_sel = 0;
+
+ switch (df_cfg.rev) {
+ case DF2:
+ addr_sel = FIELD_GET(DF2_INTLV_ADDR_SEL, ctx->map.base);
+ break;
+ case DF3:
+ case DF3p5:
+ addr_sel = FIELD_GET(DF3_INTLV_ADDR_SEL, ctx->map.base);
+ break;
+ case DF4:
+ case DF4p5:
+ addr_sel = FIELD_GET(DF4_INTLV_ADDR_SEL, ctx->map.intlv);
+ break;
+ default:
+ atl_debug_on_bad_df_rev();
+ break;
+ }
+
+ /* Add '8' to get the 'interleave bit position'. */
+ return addr_sel + 8;
+}
+
+static u8 get_num_intlv_dies(struct addr_ctx *ctx)
+{
+ u8 dies = 0;
+
+ switch (df_cfg.rev) {
+ case DF2:
+ dies = FIELD_GET(DF2_INTLV_NUM_DIES, ctx->map.limit);
+ break;
+ case DF3:
+ dies = FIELD_GET(DF3_INTLV_NUM_DIES, ctx->map.base);
+ break;
+ case DF3p5:
+ dies = FIELD_GET(DF3p5_INTLV_NUM_DIES, ctx->map.base);
+ break;
+ case DF4:
+ case DF4p5:
+ dies = FIELD_GET(DF4_INTLV_NUM_DIES, ctx->map.intlv);
+ break;
+ default:
+ atl_debug_on_bad_df_rev();
+ break;
+ }
+
+ /* Register value is log2, e.g. 0 -> 1 die, 1 -> 2 dies, etc. */
+ return 1 << dies;
+}
+
+static u8 get_num_intlv_sockets(struct addr_ctx *ctx)
+{
+ u8 sockets = 0;
+
+ switch (df_cfg.rev) {
+ case DF2:
+ sockets = FIELD_GET(DF2_INTLV_NUM_SOCKETS, ctx->map.limit);
+ break;
+ case DF3:
+ case DF3p5:
+ sockets = FIELD_GET(DF2_INTLV_NUM_SOCKETS, ctx->map.base);
+ break;
+ case DF4:
+ case DF4p5:
+ sockets = FIELD_GET(DF4_INTLV_NUM_SOCKETS, ctx->map.intlv);
+ break;
+ default:
+ atl_debug_on_bad_df_rev();
+ break;
+ }
+
+ /* Register value is log2, e.g. 0 -> 1 sockets, 1 -> 2 sockets, etc. */
+ return 1 << sockets;
+}
+
+static int get_global_map_data(struct addr_ctx *ctx)
+{
+ if (get_intlv_mode(ctx))
+ return -EINVAL;
+
+ if (ctx->map.intlv_mode == DF3_6CHAN &&
+ df3_6ch_get_dram_addr_map(ctx))
+ return -EINVAL;
+
+ ctx->map.intlv_bit_pos = get_intlv_bit_pos(ctx);
+ ctx->map.num_intlv_dies = get_num_intlv_dies(ctx);
+ ctx->map.num_intlv_sockets = get_num_intlv_sockets(ctx);
+ calculate_intlv_bits(ctx);
+
+ return 0;
+}
+
+static void dump_address_map(struct dram_addr_map *map)
+{
+ u8 i;
+
+ pr_debug("intlv_mode=0x%x", map->intlv_mode);
+ pr_debug("num=0x%x", map->num);
+ pr_debug("base=0x%x", map->base);
+ pr_debug("limit=0x%x", map->limit);
+ pr_debug("ctl=0x%x", map->ctl);
+ pr_debug("intlv=0x%x", map->intlv);
+
+ for (i = 0; i < MAX_COH_ST_CHANNELS; i++)
+ pr_debug("remap_array[%u]=0x%x", i, map->remap_array[i]);
+
+ pr_debug("intlv_bit_pos=%u", map->intlv_bit_pos);
+ pr_debug("num_intlv_chan=%u", map->num_intlv_chan);
+ pr_debug("num_intlv_dies=%u", map->num_intlv_dies);
+ pr_debug("num_intlv_sockets=%u", map->num_intlv_sockets);
+ pr_debug("total_intlv_chan=%u", map->total_intlv_chan);
+ pr_debug("total_intlv_bits=%u", map->total_intlv_bits);
+}
+
+int get_address_map(struct addr_ctx *ctx)
+{
+ int ret;
+
+ ret = get_address_map_common(ctx);
+ if (ret)
+ return ret;
+
+ ret = get_global_map_data(ctx);
+ if (ret)
+ return ret;
+
+ dump_address_map(&ctx->map);
+
+ return ret;
+}
diff --git a/drivers/ras/amd/atl/reg_fields.h b/drivers/ras/amd/atl/reg_fields.h
new file mode 100644
index 000000000000..9dcdf6e4a856
--- /dev/null
+++ b/drivers/ras/amd/atl/reg_fields.h
@@ -0,0 +1,606 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Address Translation Library
+ *
+ * reg_fields.h : Register field definitions
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ */
+
+/*
+ * Notes on naming:
+ * 1) Use "DF_" prefix for fields that are the same for all revisions.
+ * 2) Use "DFx_" prefix for fields that differ between revisions.
+ * a) "x" is the first major revision where the new field appears.
+ * b) E.g., if DF2 and DF3 have the same field, then call it DF2.
+ * c) E.g., if DF3p5 and DF4 have the same field, then call it DF4.
+ */
+
+/*
+ * Coherent Station Fabric ID
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x50 [Fabric Block Instance Information 3]
+ * DF2 BlockFabricId [19:8]
+ * DF3 BlockFabricId [19:8]
+ * DF3p5 BlockFabricId [19:8]
+ * DF4 BlockFabricId [19:8]
+ * DF4p5 BlockFabricId [15:8]
+ */
+#define DF2_COH_ST_FABRIC_ID GENMASK(19, 8)
+#define DF4p5_COH_ST_FABRIC_ID GENMASK(15, 8)
+
+/*
+ * Component ID Mask
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * DF2 N/A
+ *
+ * D18F1x208 [System Fabric ID Mask 0]
+ * DF3 ComponentIdMask [9:0]
+ *
+ * D18F1x150 [System Fabric ID Mask 0]
+ * DF3p5 ComponentIdMask [15:0]
+ *
+ * D18F4x1B0 [System Fabric ID Mask 0]
+ * DF4 ComponentIdMask [15:0]
+ * DF4p5 ComponentIdMask [15:0]
+ */
+#define DF3_COMPONENT_ID_MASK GENMASK(9, 0)
+#define DF4_COMPONENT_ID_MASK GENMASK(15, 0)
+
+/*
+ * Destination Fabric ID
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x114 [DRAM Limit Address]
+ * DF2 DstFabricID [7:0]
+ * DF3 DstFabricID [9:0]
+ * DF3 DstFabricID [11:0]
+ *
+ * D18F7xE08 [DRAM Address Control]
+ * DF4 DstFabricID [27:16]
+ *
+ * D18F7x208 [DRAM Address Control]
+ * DF4p5 DstFabricID [23:16]
+ */
+#define DF2_DST_FABRIC_ID GENMASK(7, 0)
+#define DF3_DST_FABRIC_ID GENMASK(9, 0)
+#define DF3p5_DST_FABRIC_ID GENMASK(11, 0)
+#define DF4_DST_FABRIC_ID GENMASK(27, 16)
+#define DF4p5_DST_FABRIC_ID GENMASK(23, 16)
+
+/*
+ * Die ID Mask
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F1x208 [System Fabric ID Mask]
+ * DF2 DieIdMask [15:8]
+ *
+ * D18F1x20C [System Fabric ID Mask 1]
+ * DF3 DieIdMask [18:16]
+ *
+ * D18F1x158 [System Fabric ID Mask 2]
+ * DF3p5 DieIdMask [15:0]
+ *
+ * D18F4x1B8 [System Fabric ID Mask 2]
+ * DF4 DieIdMask [15:0]
+ * DF4p5 DieIdMask [15:0]
+ */
+#define DF2_DIE_ID_MASK GENMASK(15, 8)
+#define DF3_DIE_ID_MASK GENMASK(18, 16)
+#define DF4_DIE_ID_MASK GENMASK(15, 0)
+
+/*
+ * Die ID Shift
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F1x208 [System Fabric ID Mask]
+ * DF2 DieIdShift [27:24]
+ *
+ * DF3 N/A
+ * DF3p5 N/A
+ * DF4 N/A
+ * DF4p5 N/A
+ */
+#define DF2_DIE_ID_SHIFT GENMASK(27, 24)
+
+/*
+ * DRAM Address Range Valid
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x110 [DRAM Base Address]
+ * DF2 AddrRngVal [0]
+ * DF3 AddrRngVal [0]
+ * DF3p5 AddrRngVal [0]
+ *
+ * D18F7xE08 [DRAM Address Control]
+ * DF4 AddrRngVal [0]
+ *
+ * D18F7x208 [DRAM Address Control]
+ * DF4p5 AddrRngVal [0]
+ */
+#define DF_ADDR_RANGE_VAL BIT(0)
+
+/*
+ * DRAM Base Address
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x110 [DRAM Base Address]
+ * DF2 DramBaseAddr [31:12]
+ * DF3 DramBaseAddr [31:12]
+ * DF3p5 DramBaseAddr [31:12]
+ *
+ * D18F7xE00 [DRAM Base Address]
+ * DF4 DramBaseAddr [27:0]
+ *
+ * D18F7x200 [DRAM Base Address]
+ * DF4p5 DramBaseAddr [27:0]
+ */
+#define DF2_BASE_ADDR GENMASK(31, 12)
+#define DF4_BASE_ADDR GENMASK(27, 0)
+
+/*
+ * DRAM Hole Base
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x104 [DRAM Hole Control]
+ * DF2 DramHoleBase [31:24]
+ * DF3 DramHoleBase [31:24]
+ * DF3p5 DramHoleBase [31:24]
+ *
+ * D18F7x104 [DRAM Hole Control]
+ * DF4 DramHoleBase [31:24]
+ * DF4p5 DramHoleBase [31:24]
+ */
+#define DF_DRAM_HOLE_BASE_MASK GENMASK(31, 24)
+
+/*
+ * DRAM Limit Address
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x114 [DRAM Limit Address]
+ * DF2 DramLimitAddr [31:12]
+ * DF3 DramLimitAddr [31:12]
+ * DF3p5 DramLimitAddr [31:12]
+ *
+ * D18F7xE04 [DRAM Limit Address]
+ * DF4 DramLimitAddr [27:0]
+ *
+ * D18F7x204 [DRAM Limit Address]
+ * DF4p5 DramLimitAddr [27:0]
+ */
+#define DF2_DRAM_LIMIT_ADDR GENMASK(31, 12)
+#define DF4_DRAM_LIMIT_ADDR GENMASK(27, 0)
+
+/*
+ * Hash Interleave Controls
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * DF2 N/A
+ *
+ * D18F0x3F8 [DF Global Control]
+ * DF3 GlbHashIntlvCtl64K [20]
+ * GlbHashIntlvCtl2M [21]
+ * GlbHashIntlvCtl1G [22]
+ *
+ * DF3p5 GlbHashIntlvCtl64K [20]
+ * GlbHashIntlvCtl2M [21]
+ * GlbHashIntlvCtl1G [22]
+ *
+ * D18F7xE08 [DRAM Address Control]
+ * DF4 HashIntlvCtl64K [8]
+ * HashIntlvCtl2M [9]
+ * HashIntlvCtl1G [10]
+ *
+ * D18F7x208 [DRAM Address Control]
+ * DF4p5 HashIntlvCtl4K [7]
+ * HashIntlvCtl64K [8]
+ * HashIntlvCtl2M [9]
+ * HashIntlvCtl1G [10]
+ * HashIntlvCtl1T [15]
+ */
+#define DF3_HASH_CTL_64K BIT(20)
+#define DF3_HASH_CTL_2M BIT(21)
+#define DF3_HASH_CTL_1G BIT(22)
+#define DF4_HASH_CTL_64K BIT(8)
+#define DF4_HASH_CTL_2M BIT(9)
+#define DF4_HASH_CTL_1G BIT(10)
+#define DF4p5_HASH_CTL_4K BIT(7)
+#define DF4p5_HASH_CTL_1T BIT(15)
+
+/*
+ * High Address Offset
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x1B4 [DRAM Offset]
+ * DF2 HiAddrOffset [31:20]
+ * DF3 HiAddrOffset [31:12]
+ * DF3p5 HiAddrOffset [31:12]
+ *
+ * D18F7x140 [DRAM Offset]
+ * DF4 HiAddrOffset [24:1]
+ * DF4p5 HiAddrOffset [24:1]
+ * MI300 HiAddrOffset [31:1]
+ */
+#define DF2_HI_ADDR_OFFSET GENMASK(31, 20)
+#define DF3_HI_ADDR_OFFSET GENMASK(31, 12)
+
+/* Follow reference code by including reserved bits for simplicity. */
+#define DF4_HI_ADDR_OFFSET GENMASK(31, 1)
+
+/*
+ * High Address Offset Enable
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x1B4 [DRAM Offset]
+ * DF2 HiAddrOffsetEn [0]
+ * DF3 HiAddrOffsetEn [0]
+ * DF3p5 HiAddrOffsetEn [0]
+ *
+ * D18F7x140 [DRAM Offset]
+ * DF4 HiAddrOffsetEn [0]
+ * DF4p5 HiAddrOffsetEn [0]
+ */
+#define DF_HI_ADDR_OFFSET_EN BIT(0)
+
+/*
+ * Interleave Address Select
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x110 [DRAM Base Address]
+ * DF2 IntLvAddrSel [10:8]
+ * DF3 IntLvAddrSel [11:9]
+ * DF3p5 IntLvAddrSel [11:9]
+ *
+ * D18F7xE0C [DRAM Address Interleave]
+ * DF4 IntLvAddrSel [2:0]
+ *
+ * D18F7x20C [DRAM Address Interleave]
+ * DF4p5 IntLvAddrSel [2:0]
+ */
+#define DF2_INTLV_ADDR_SEL GENMASK(10, 8)
+#define DF3_INTLV_ADDR_SEL GENMASK(11, 9)
+#define DF4_INTLV_ADDR_SEL GENMASK(2, 0)
+
+/*
+ * Interleave Number of Channels
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x110 [DRAM Base Address]
+ * DF2 IntLvNumChan [7:4]
+ * DF3 IntLvNumChan [5:2]
+ * DF3p5 IntLvNumChan [6:2]
+ *
+ * D18F7xE0C [DRAM Address Interleave]
+ * DF4 IntLvNumChan [8:4]
+ *
+ * D18F7x20C [DRAM Address Interleave]
+ * DF4p5 IntLvNumChan [9:4]
+ */
+#define DF2_INTLV_NUM_CHAN GENMASK(7, 4)
+#define DF3_INTLV_NUM_CHAN GENMASK(5, 2)
+#define DF3p5_INTLV_NUM_CHAN GENMASK(6, 2)
+#define DF4_INTLV_NUM_CHAN GENMASK(8, 4)
+#define DF4p5_INTLV_NUM_CHAN GENMASK(9, 4)
+
+/*
+ * Interleave Number of Dies
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x114 [DRAM Limit Address]
+ * DF2 IntLvNumDies [11:10]
+ *
+ * D18F0x110 [DRAM Base Address]
+ * DF3 IntLvNumDies [7:6]
+ * DF3p5 IntLvNumDies [7]
+ *
+ * D18F7xE0C [DRAM Address Interleave]
+ * DF4 IntLvNumDies [13:12]
+ *
+ * D18F7x20C [DRAM Address Interleave]
+ * DF4p5 IntLvNumDies [13:12]
+ */
+#define DF2_INTLV_NUM_DIES GENMASK(11, 10)
+#define DF3_INTLV_NUM_DIES GENMASK(7, 6)
+#define DF3p5_INTLV_NUM_DIES BIT(7)
+#define DF4_INTLV_NUM_DIES GENMASK(13, 12)
+
+/*
+ * Interleave Number of Sockets
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x114 [DRAM Limit Address]
+ * DF2 IntLvNumSockets [8]
+ *
+ * D18F0x110 [DRAM Base Address]
+ * DF3 IntLvNumSockets [8]
+ * DF3p5 IntLvNumSockets [8]
+ *
+ * D18F7xE0C [DRAM Address Interleave]
+ * DF4 IntLvNumSockets [18]
+ *
+ * D18F7x20C [DRAM Address Interleave]
+ * DF4p5 IntLvNumSockets [18]
+ */
+#define DF2_INTLV_NUM_SOCKETS BIT(8)
+#define DF4_INTLV_NUM_SOCKETS BIT(18)
+
+/*
+ * Legacy MMIO Hole Enable
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x110 [DRAM Base Address]
+ * DF2 LgcyMmioHoleEn [1]
+ * DF3 LgcyMmioHoleEn [1]
+ * DF3p5 LgcyMmioHoleEn [1]
+ *
+ * D18F7xE08 [DRAM Address Control]
+ * DF4 LgcyMmioHoleEn [1]
+ *
+ * D18F7x208 [DRAM Address Control]
+ * DF4p5 LgcyMmioHoleEn [1]
+ */
+#define DF_LEGACY_MMIO_HOLE_EN BIT(1)
+
+/*
+ * Log2 Address 64K Space 0
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * DF2 N/A
+ *
+ * D18F2x90 [Non-power-of-2 channel Configuration Register for COH_ST DRAM Address Maps]
+ * DF3 Log2Addr64KSpace0 [5:0]
+ *
+ * DF3p5 N/A
+ * DF4 N/A
+ * DF4p5 N/A
+ */
+#define DF_LOG2_ADDR_64K_SPACE0 GENMASK(5, 0)
+
+/*
+ * Major Revision
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * DF2 N/A
+ * DF3 N/A
+ * DF3p5 N/A
+ *
+ * D18F0x040 [Fabric Block Instance Count]
+ * DF4 MajorRevision [27:24]
+ * DF4p5 MajorRevision [27:24]
+ */
+#define DF_MAJOR_REVISION GENMASK(27, 24)
+
+/*
+ * Minor Revision
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * DF2 N/A
+ * DF3 N/A
+ * DF3p5 N/A
+ *
+ * D18F0x040 [Fabric Block Instance Count]
+ * DF4 MinorRevision [23:16]
+ * DF4p5 MinorRevision [23:16]
+ */
+#define DF_MINOR_REVISION GENMASK(23, 16)
+
+/*
+ * Node ID Mask
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * DF2 N/A
+ *
+ * D18F1x208 [System Fabric ID Mask 0]
+ * DF3 NodeIdMask [25:16]
+ *
+ * D18F1x150 [System Fabric ID Mask 0]
+ * DF3p5 NodeIdMask [31:16]
+ *
+ * D18F4x1B0 [System Fabric ID Mask 0]
+ * DF4 NodeIdMask [31:16]
+ * DF4p5 NodeIdMask [31:16]
+ */
+#define DF3_NODE_ID_MASK GENMASK(25, 16)
+#define DF4_NODE_ID_MASK GENMASK(31, 16)
+
+/*
+ * Node ID Shift
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * DF2 N/A
+ *
+ * D18F1x20C [System Fabric ID Mask 1]
+ * DF3 NodeIdShift [3:0]
+ *
+ * D18F1x154 [System Fabric ID Mask 1]
+ * DF3p5 NodeIdShift [3:0]
+ *
+ * D18F4x1B4 [System Fabric ID Mask 1]
+ * DF4 NodeIdShift [3:0]
+ * DF4p5 NodeIdShift [3:0]
+ */
+#define DF3_NODE_ID_SHIFT GENMASK(3, 0)
+
+/*
+ * Remap Enable
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * DF2 N/A
+ * DF3 N/A
+ * DF3p5 N/A
+ *
+ * D18F7xE08 [DRAM Address Control]
+ * DF4 RemapEn [4]
+ *
+ * D18F7x208 [DRAM Address Control]
+ * DF4p5 RemapEn [4]
+ */
+#define DF4_REMAP_EN BIT(4)
+
+/*
+ * Remap Select
+ *
+ * Access type: Instance
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * DF2 N/A
+ * DF3 N/A
+ * DF3p5 N/A
+ *
+ * D18F7xE08 [DRAM Address Control]
+ * DF4 RemapSel [7:5]
+ *
+ * D18F7x208 [DRAM Address Control]
+ * DF4p5 RemapSel [6:5]
+ */
+#define DF4_REMAP_SEL GENMASK(7, 5)
+#define DF4p5_REMAP_SEL GENMASK(6, 5)
+
+/*
+ * Socket ID Mask
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F1x208 [System Fabric ID Mask]
+ * DF2 SocketIdMask [23:16]
+ *
+ * D18F1x20C [System Fabric ID Mask 1]
+ * DF3 SocketIdMask [26:24]
+ *
+ * D18F1x158 [System Fabric ID Mask 2]
+ * DF3p5 SocketIdMask [31:16]
+ *
+ * D18F4x1B8 [System Fabric ID Mask 2]
+ * DF4 SocketIdMask [31:16]
+ * DF4p5 SocketIdMask [31:16]
+ */
+#define DF2_SOCKET_ID_MASK GENMASK(23, 16)
+#define DF3_SOCKET_ID_MASK GENMASK(26, 24)
+#define DF4_SOCKET_ID_MASK GENMASK(31, 16)
+
+/*
+ * Socket ID Shift
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F1x208 [System Fabric ID Mask]
+ * DF2 SocketIdShift [31:28]
+ *
+ * D18F1x20C [System Fabric ID Mask 1]
+ * DF3 SocketIdShift [9:8]
+ *
+ * D18F1x158 [System Fabric ID Mask 2]
+ * DF3p5 SocketIdShift [11:8]
+ *
+ * D18F4x1B4 [System Fabric ID Mask 1]
+ * DF4 SocketIdShift [11:8]
+ * DF4p5 SocketIdShift [11:8]
+ */
+#define DF2_SOCKET_ID_SHIFT GENMASK(31, 28)
+#define DF3_SOCKET_ID_SHIFT GENMASK(9, 8)
+#define DF4_SOCKET_ID_SHIFT GENMASK(11, 8)
diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c
new file mode 100644
index 000000000000..701349e84942
--- /dev/null
+++ b/drivers/ras/amd/atl/system.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Address Translation Library
+ *
+ * system.c : Functions to read and save system-wide data
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ */
+
+#include "internal.h"
+
+int determine_node_id(struct addr_ctx *ctx, u8 socket_id, u8 die_id)
+{
+ u16 socket_id_bits, die_id_bits;
+
+ if (socket_id > 0 && df_cfg.socket_id_mask == 0) {
+ atl_debug(ctx, "Invalid socket inputs: socket_id=%u socket_id_mask=0x%x",
+ socket_id, df_cfg.socket_id_mask);
+ return -EINVAL;
+ }
+
+ /* Do each step independently to avoid shift out-of-bounds issues. */
+ socket_id_bits = socket_id;
+ socket_id_bits <<= df_cfg.socket_id_shift;
+ socket_id_bits &= df_cfg.socket_id_mask;
+
+ if (die_id > 0 && df_cfg.die_id_mask == 0) {
+ atl_debug(ctx, "Invalid die inputs: die_id=%u die_id_mask=0x%x",
+ die_id, df_cfg.die_id_mask);
+ return -EINVAL;
+ }
+
+ /* Do each step independently to avoid shift out-of-bounds issues. */
+ die_id_bits = die_id;
+ die_id_bits <<= df_cfg.die_id_shift;
+ die_id_bits &= df_cfg.die_id_mask;
+
+ ctx->node_id = (socket_id_bits | die_id_bits) >> df_cfg.node_id_shift;
+ return 0;
+}
+
+static void df2_get_masks_shifts(u32 mask0)
+{
+ df_cfg.socket_id_shift = FIELD_GET(DF2_SOCKET_ID_SHIFT, mask0);
+ df_cfg.socket_id_mask = FIELD_GET(DF2_SOCKET_ID_MASK, mask0);
+ df_cfg.die_id_shift = FIELD_GET(DF2_DIE_ID_SHIFT, mask0);
+ df_cfg.die_id_mask = FIELD_GET(DF2_DIE_ID_MASK, mask0);
+ df_cfg.node_id_shift = df_cfg.die_id_shift;
+ df_cfg.node_id_mask = df_cfg.socket_id_mask | df_cfg.die_id_mask;
+ df_cfg.component_id_mask = ~df_cfg.node_id_mask;
+}
+
+static void df3_get_masks_shifts(u32 mask0, u32 mask1)
+{
+ df_cfg.component_id_mask = FIELD_GET(DF3_COMPONENT_ID_MASK, mask0);
+ df_cfg.node_id_mask = FIELD_GET(DF3_NODE_ID_MASK, mask0);
+
+ df_cfg.node_id_shift = FIELD_GET(DF3_NODE_ID_SHIFT, mask1);
+ df_cfg.socket_id_shift = FIELD_GET(DF3_SOCKET_ID_SHIFT, mask1);
+ df_cfg.socket_id_mask = FIELD_GET(DF3_SOCKET_ID_MASK, mask1);
+ df_cfg.die_id_mask = FIELD_GET(DF3_DIE_ID_MASK, mask1);
+}
+
+static void df3p5_get_masks_shifts(u32 mask0, u32 mask1, u32 mask2)
+{
+ df_cfg.component_id_mask = FIELD_GET(DF4_COMPONENT_ID_MASK, mask0);
+ df_cfg.node_id_mask = FIELD_GET(DF4_NODE_ID_MASK, mask0);
+
+ df_cfg.node_id_shift = FIELD_GET(DF3_NODE_ID_SHIFT, mask1);
+ df_cfg.socket_id_shift = FIELD_GET(DF4_SOCKET_ID_SHIFT, mask1);
+
+ df_cfg.socket_id_mask = FIELD_GET(DF4_SOCKET_ID_MASK, mask2);
+ df_cfg.die_id_mask = FIELD_GET(DF4_DIE_ID_MASK, mask2);
+}
+
+static void df4_get_masks_shifts(u32 mask0, u32 mask1, u32 mask2)
+{
+ df3p5_get_masks_shifts(mask0, mask1, mask2);
+
+ if (!(df_cfg.flags.socket_id_shift_quirk && df_cfg.socket_id_shift == 1))
+ return;
+
+ df_cfg.socket_id_shift = 0;
+ df_cfg.socket_id_mask = 1;
+ df_cfg.die_id_shift = 0;
+ df_cfg.die_id_mask = 0;
+ df_cfg.node_id_shift = 8;
+ df_cfg.node_id_mask = 0x100;
+}
+
+static int df4_get_fabric_id_mask_registers(void)
+{
+ u32 mask0, mask1, mask2;
+
+ /* Read D18F4x1B0 (SystemFabricIdMask0) */
+ if (df_indirect_read_broadcast(0, 4, 0x1B0, &mask0))
+ return -EINVAL;
+
+ /* Read D18F4x1B4 (SystemFabricIdMask1) */
+ if (df_indirect_read_broadcast(0, 4, 0x1B4, &mask1))
+ return -EINVAL;
+
+ /* Read D18F4x1B8 (SystemFabricIdMask2) */
+ if (df_indirect_read_broadcast(0, 4, 0x1B8, &mask2))
+ return -EINVAL;
+
+ df4_get_masks_shifts(mask0, mask1, mask2);
+ return 0;
+}
+
+static int df4_determine_df_rev(u32 reg)
+{
+ df_cfg.rev = FIELD_GET(DF_MINOR_REVISION, reg) < 5 ? DF4 : DF4p5;
+
+ /* Check for special cases or quirks based on Device/Vendor IDs.*/
+
+ /* Read D18F0x000 (DeviceVendorId0) */
+ if (df_indirect_read_broadcast(0, 0, 0, &reg))
+ return -EINVAL;
+
+ if (reg == DF_FUNC0_ID_ZEN4_SERVER)
+ df_cfg.flags.socket_id_shift_quirk = 1;
+
+ if (reg == DF_FUNC0_ID_MI300) {
+ df_cfg.flags.heterogeneous = 1;
+
+ if (get_addr_hash_mi300())
+ return -EINVAL;
+ }
+
+ return df4_get_fabric_id_mask_registers();
+}
+
+static int determine_df_rev_legacy(void)
+{
+ u32 fabric_id_mask0, fabric_id_mask1, fabric_id_mask2;
+
+ /*
+ * Check for DF3.5.
+ *
+ * Component ID Mask must be non-zero. Register D18F1x150 is
+ * reserved pre-DF3.5, so value will be Read-as-Zero.
+ */
+
+ /* Read D18F1x150 (SystemFabricIdMask0). */
+ if (df_indirect_read_broadcast(0, 1, 0x150, &fabric_id_mask0))
+ return -EINVAL;
+
+ if (FIELD_GET(DF4_COMPONENT_ID_MASK, fabric_id_mask0)) {
+ df_cfg.rev = DF3p5;
+
+ /* Read D18F1x154 (SystemFabricIdMask1) */
+ if (df_indirect_read_broadcast(0, 1, 0x154, &fabric_id_mask1))
+ return -EINVAL;
+
+ /* Read D18F1x158 (SystemFabricIdMask2) */
+ if (df_indirect_read_broadcast(0, 1, 0x158, &fabric_id_mask2))
+ return -EINVAL;
+
+ df3p5_get_masks_shifts(fabric_id_mask0, fabric_id_mask1, fabric_id_mask2);
+ return 0;
+ }
+
+ /*
+ * Check for DF3.
+ *
+ * Component ID Mask must be non-zero. Field is Read-as-Zero on DF2.
+ */
+
+ /* Read D18F1x208 (SystemFabricIdMask). */
+ if (df_indirect_read_broadcast(0, 1, 0x208, &fabric_id_mask0))
+ return -EINVAL;
+
+ if (FIELD_GET(DF3_COMPONENT_ID_MASK, fabric_id_mask0)) {
+ df_cfg.rev = DF3;
+
+ /* Read D18F1x20C (SystemFabricIdMask1) */
+ if (df_indirect_read_broadcast(0, 1, 0x20C, &fabric_id_mask1))
+ return -EINVAL;
+
+ df3_get_masks_shifts(fabric_id_mask0, fabric_id_mask1);
+ return 0;
+ }
+
+ /* Default to DF2. */
+ df_cfg.rev = DF2;
+ df2_get_masks_shifts(fabric_id_mask0);
+ return 0;
+}
+
+static int determine_df_rev(void)
+{
+ u32 reg;
+ u8 rev;
+
+ if (df_cfg.rev != UNKNOWN)
+ return 0;
+
+ /* Read D18F0x40 (FabricBlockInstanceCount). */
+ if (df_indirect_read_broadcast(0, 0, 0x40, &reg))
+ return -EINVAL;
+
+ /*
+ * Revision fields added for DF4 and later.
+ *
+ * Major revision of '0' is found pre-DF4. Field is Read-as-Zero.
+ */
+ rev = FIELD_GET(DF_MAJOR_REVISION, reg);
+ if (!rev)
+ return determine_df_rev_legacy();
+
+ /*
+ * Fail out for major revisions other than '4'.
+ *
+ * Explicit support should be added for newer systems to avoid issues.
+ */
+ if (rev == 4)
+ return df4_determine_df_rev(reg);
+
+ return -EINVAL;
+}
+
+static void get_num_maps(void)
+{
+ switch (df_cfg.rev) {
+ case DF2:
+ case DF3:
+ case DF3p5:
+ df_cfg.num_coh_st_maps = 2;
+ break;
+ case DF4:
+ case DF4p5:
+ df_cfg.num_coh_st_maps = 4;
+ break;
+ default:
+ atl_debug_on_bad_df_rev();
+ }
+}
+
+static void apply_node_id_shift(void)
+{
+ if (df_cfg.rev == DF2)
+ return;
+
+ df_cfg.die_id_shift = df_cfg.node_id_shift;
+ df_cfg.die_id_mask <<= df_cfg.node_id_shift;
+ df_cfg.socket_id_mask <<= df_cfg.node_id_shift;
+ df_cfg.socket_id_shift += df_cfg.node_id_shift;
+}
+
+static void dump_df_cfg(void)
+{
+ pr_debug("rev=0x%x", df_cfg.rev);
+
+ pr_debug("component_id_mask=0x%x", df_cfg.component_id_mask);
+ pr_debug("die_id_mask=0x%x", df_cfg.die_id_mask);
+ pr_debug("node_id_mask=0x%x", df_cfg.node_id_mask);
+ pr_debug("socket_id_mask=0x%x", df_cfg.socket_id_mask);
+
+ pr_debug("die_id_shift=0x%x", df_cfg.die_id_shift);
+ pr_debug("node_id_shift=0x%x", df_cfg.node_id_shift);
+ pr_debug("socket_id_shift=0x%x", df_cfg.socket_id_shift);
+
+ pr_debug("num_coh_st_maps=%u", df_cfg.num_coh_st_maps);
+
+ pr_debug("flags.legacy_ficaa=%u", df_cfg.flags.legacy_ficaa);
+ pr_debug("flags.socket_id_shift_quirk=%u", df_cfg.flags.socket_id_shift_quirk);
+}
+
+int get_df_system_info(void)
+{
+ if (determine_df_rev()) {
+ pr_warn("amd_atl: Failed to determine DF Revision");
+ df_cfg.rev = UNKNOWN;
+ return -EINVAL;
+ }
+
+ apply_node_id_shift();
+
+ get_num_maps();
+
+ dump_df_cfg();
+
+ return 0;
+}
diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c
new file mode 100644
index 000000000000..59b6169093f7
--- /dev/null
+++ b/drivers/ras/amd/atl/umc.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Address Translation Library
+ *
+ * umc.c : Unified Memory Controller (UMC) topology helpers
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ */
+
+#include "internal.h"
+
+/*
+ * MI300 has a fixed, model-specific mapping between a UMC instance and
+ * its related Data Fabric Coherent Station instance.
+ *
+ * The MCA_IPID_UMC[InstanceId] field holds a unique identifier for the
+ * UMC instance within a Node. Use this to find the appropriate Coherent
+ * Station ID.
+ *
+ * Redundant bits were removed from the map below.
+ */
+static const u16 umc_coh_st_map[32] = {
+ 0x393, 0x293, 0x193, 0x093,
+ 0x392, 0x292, 0x192, 0x092,
+ 0x391, 0x291, 0x191, 0x091,
+ 0x390, 0x290, 0x190, 0x090,
+ 0x793, 0x693, 0x593, 0x493,
+ 0x792, 0x692, 0x592, 0x492,
+ 0x791, 0x691, 0x591, 0x491,
+ 0x790, 0x690, 0x590, 0x490,
+};
+
+#define UMC_ID_MI300 GENMASK(23, 12)
+static u8 get_coh_st_inst_id_mi300(struct atl_err *err)
+{
+ u16 umc_id = FIELD_GET(UMC_ID_MI300, err->ipid);
+ u8 i;
+
+ for (i = 0; i < ARRAY_SIZE(umc_coh_st_map); i++) {
+ if (umc_id == umc_coh_st_map[i])
+ break;
+ }
+
+ WARN_ON_ONCE(i >= ARRAY_SIZE(umc_coh_st_map));
+
+ return i;
+}
+
+/* XOR the bits in @val. */
+static u16 bitwise_xor_bits(u16 val)
+{
+ u16 tmp = 0;
+ u8 i;
+
+ for (i = 0; i < 16; i++)
+ tmp ^= (val >> i) & 0x1;
+
+ return tmp;
+}
+
+struct xor_bits {
+ bool xor_enable;
+ u16 col_xor;
+ u32 row_xor;
+};
+
+#define NUM_BANK_BITS 4
+
+static struct {
+ /* UMC::CH::AddrHashBank */
+ struct xor_bits bank[NUM_BANK_BITS];
+
+ /* UMC::CH::AddrHashPC */
+ struct xor_bits pc;
+
+ /* UMC::CH::AddrHashPC2 */
+ u8 bank_xor;
+} addr_hash;
+
+#define MI300_UMC_CH_BASE 0x90000
+#define MI300_ADDR_HASH_BANK0 (MI300_UMC_CH_BASE + 0xC8)
+#define MI300_ADDR_HASH_PC (MI300_UMC_CH_BASE + 0xE0)
+#define MI300_ADDR_HASH_PC2 (MI300_UMC_CH_BASE + 0xE4)
+
+#define ADDR_HASH_XOR_EN BIT(0)
+#define ADDR_HASH_COL_XOR GENMASK(13, 1)
+#define ADDR_HASH_ROW_XOR GENMASK(31, 14)
+#define ADDR_HASH_BANK_XOR GENMASK(5, 0)
+
+/*
+ * Read UMC::CH::AddrHash{Bank,PC,PC2} registers to get XOR bits used
+ * for hashing. Do this during module init, since the values will not
+ * change during run time.
+ *
+ * These registers are instantiated for each UMC across each AMD Node.
+ * However, they should be identically programmed due to the fixed hardware
+ * design of MI300 systems. So read the values from Node 0 UMC 0 and keep a
+ * single global structure for simplicity.
+ */
+int get_addr_hash_mi300(void)
+{
+ u32 temp;
+ int ret;
+ u8 i;
+
+ for (i = 0; i < NUM_BANK_BITS; i++) {
+ ret = amd_smn_read(0, MI300_ADDR_HASH_BANK0 + (i * 4), &temp);
+ if (ret)
+ return ret;
+
+ addr_hash.bank[i].xor_enable = FIELD_GET(ADDR_HASH_XOR_EN, temp);
+ addr_hash.bank[i].col_xor = FIELD_GET(ADDR_HASH_COL_XOR, temp);
+ addr_hash.bank[i].row_xor = FIELD_GET(ADDR_HASH_ROW_XOR, temp);
+ }
+
+ ret = amd_smn_read(0, MI300_ADDR_HASH_PC, &temp);
+ if (ret)
+ return ret;
+
+ addr_hash.pc.xor_enable = FIELD_GET(ADDR_HASH_XOR_EN, temp);
+ addr_hash.pc.col_xor = FIELD_GET(ADDR_HASH_COL_XOR, temp);
+ addr_hash.pc.row_xor = FIELD_GET(ADDR_HASH_ROW_XOR, temp);
+
+ ret = amd_smn_read(0, MI300_ADDR_HASH_PC2, &temp);
+ if (ret)
+ return ret;
+
+ addr_hash.bank_xor = FIELD_GET(ADDR_HASH_BANK_XOR, temp);
+
+ return 0;
+}
+
+/*
+ * MI300 systems report a DRAM address in MCA_ADDR for DRAM ECC errors. This must
+ * be converted to the intermediate normalized address (NA) before translating to a
+ * system physical address.
+ *
+ * The DRAM address includes bank, row, and column. Also included are bits for
+ * pseudochannel (PC) and stack ID (SID).
+ *
+ * Abbreviations: (S)tack ID, (P)seudochannel, (R)ow, (B)ank, (C)olumn, (Z)ero
+ *
+ * The MCA address format is as follows:
+ * MCA_ADDR[27:0] = {S[1:0], P[0], R[14:0], B[3:0], C[4:0], Z[0]}
+ *
+ * The normalized address format is fixed in hardware and is as follows:
+ * NA[30:0] = {S[1:0], R[13:0], C4, B[1:0], B[3:2], C[3:2], P, C[1:0], Z[4:0]}
+ *
+ * Additionally, the PC and Bank bits may be hashed. This must be accounted for before
+ * reconstructing the normalized address.
+ */
+#define MI300_UMC_MCA_COL GENMASK(5, 1)
+#define MI300_UMC_MCA_BANK GENMASK(9, 6)
+#define MI300_UMC_MCA_ROW GENMASK(24, 10)
+#define MI300_UMC_MCA_PC BIT(25)
+#define MI300_UMC_MCA_SID GENMASK(27, 26)
+
+#define MI300_NA_COL_1_0 GENMASK(6, 5)
+#define MI300_NA_PC BIT(7)
+#define MI300_NA_COL_3_2 GENMASK(9, 8)
+#define MI300_NA_BANK_3_2 GENMASK(11, 10)
+#define MI300_NA_BANK_1_0 GENMASK(13, 12)
+#define MI300_NA_COL_4 BIT(14)
+#define MI300_NA_ROW GENMASK(28, 15)
+#define MI300_NA_SID GENMASK(30, 29)
+
+static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr)
+{
+ u16 i, col, row, bank, pc, sid, temp;
+
+ col = FIELD_GET(MI300_UMC_MCA_COL, addr);
+ bank = FIELD_GET(MI300_UMC_MCA_BANK, addr);
+ row = FIELD_GET(MI300_UMC_MCA_ROW, addr);
+ pc = FIELD_GET(MI300_UMC_MCA_PC, addr);
+ sid = FIELD_GET(MI300_UMC_MCA_SID, addr);
+
+ /* Calculate hash for each Bank bit. */
+ for (i = 0; i < NUM_BANK_BITS; i++) {
+ if (!addr_hash.bank[i].xor_enable)
+ continue;
+
+ temp = bitwise_xor_bits(col & addr_hash.bank[i].col_xor);
+ temp ^= bitwise_xor_bits(row & addr_hash.bank[i].row_xor);
+ bank ^= temp << i;
+ }
+
+ /* Calculate hash for PC bit. */
+ if (addr_hash.pc.xor_enable) {
+ /* Bits SID[1:0] act as Bank[6:5] for PC hash, so apply them here. */
+ bank |= sid << 5;
+
+ temp = bitwise_xor_bits(col & addr_hash.pc.col_xor);
+ temp ^= bitwise_xor_bits(row & addr_hash.pc.row_xor);
+ temp ^= bitwise_xor_bits(bank & addr_hash.bank_xor);
+ pc ^= temp;
+
+ /* Drop SID bits for the sake of debug printing later. */
+ bank &= 0x1F;
+ }
+
+ /* Reconstruct the normalized address starting with NA[4:0] = 0 */
+ addr = 0;
+
+ /* NA[6:5] = Column[1:0] */
+ temp = col & 0x3;
+ addr |= FIELD_PREP(MI300_NA_COL_1_0, temp);
+
+ /* NA[7] = PC */
+ addr |= FIELD_PREP(MI300_NA_PC, pc);
+
+ /* NA[9:8] = Column[3:2] */
+ temp = (col >> 2) & 0x3;
+ addr |= FIELD_PREP(MI300_NA_COL_3_2, temp);
+
+ /* NA[11:10] = Bank[3:2] */
+ temp = (bank >> 2) & 0x3;
+ addr |= FIELD_PREP(MI300_NA_BANK_3_2, temp);
+
+ /* NA[13:12] = Bank[1:0] */
+ temp = bank & 0x3;
+ addr |= FIELD_PREP(MI300_NA_BANK_1_0, temp);
+
+ /* NA[14] = Column[4] */
+ temp = (col >> 4) & 0x1;
+ addr |= FIELD_PREP(MI300_NA_COL_4, temp);
+
+ /* NA[28:15] = Row[13:0] */
+ addr |= FIELD_PREP(MI300_NA_ROW, row);
+
+ /* NA[30:29] = SID[1:0] */
+ addr |= FIELD_PREP(MI300_NA_SID, sid);
+
+ pr_debug("Addr=0x%016lx", addr);
+ pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid);
+
+ return addr;
+}
+
+/*
+ * When a DRAM ECC error occurs on MI300 systems, it is recommended to retire
+ * all memory within that DRAM row. This applies to the memory with a DRAM
+ * bank.
+ *
+ * To find the memory addresses, loop through permutations of the DRAM column
+ * bits and find the System Physical address of each. The column bits are used
+ * to calculate the intermediate Normalized address, so all permutations should
+ * be checked.
+ *
+ * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats.
+ */
+#define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL))
+static void retire_row_mi300(struct atl_err *a_err)
+{
+ unsigned long addr;
+ struct page *p;
+ u8 col;
+
+ for (col = 0; col < MI300_NUM_COL; col++) {
+ a_err->addr &= ~MI300_UMC_MCA_COL;
+ a_err->addr |= FIELD_PREP(MI300_UMC_MCA_COL, col);
+
+ addr = amd_convert_umc_mca_addr_to_sys_addr(a_err);
+ if (IS_ERR_VALUE(addr))
+ continue;
+
+ addr = PHYS_PFN(addr);
+
+ /*
+ * Skip invalid or already poisoned pages to avoid unnecessary
+ * error messages from memory_failure().
+ */
+ p = pfn_to_online_page(addr);
+ if (!p)
+ continue;
+
+ if (PageHWPoison(p))
+ continue;
+
+ memory_failure(addr, 0);
+ }
+}
+
+void amd_retire_dram_row(struct atl_err *a_err)
+{
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
+ return retire_row_mi300(a_err);
+}
+EXPORT_SYMBOL_GPL(amd_retire_dram_row);
+
+static unsigned long get_addr(unsigned long addr)
+{
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
+ return convert_dram_to_norm_addr_mi300(addr);
+
+ return addr;
+}
+
+#define MCA_IPID_INST_ID_HI GENMASK_ULL(47, 44)
+static u8 get_die_id(struct atl_err *err)
+{
+ /*
+ * AMD Node ID is provided in MCA_IPID[InstanceIdHi], and this
+ * needs to be divided by 4 to get the internal Die ID.
+ */
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) {
+ u8 node_id = FIELD_GET(MCA_IPID_INST_ID_HI, err->ipid);
+
+ return node_id >> 2;
+ }
+
+ /*
+ * For CPUs, this is the AMD Node ID modulo the number
+ * of AMD Nodes per socket.
+ */
+ return topology_amd_node_id(err->cpu) % topology_amd_nodes_per_pkg();
+}
+
+#define UMC_CHANNEL_NUM GENMASK(31, 20)
+static u8 get_coh_st_inst_id(struct atl_err *err)
+{
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
+ return get_coh_st_inst_id_mi300(err);
+
+ return FIELD_GET(UMC_CHANNEL_NUM, err->ipid);
+}
+
+unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
+{
+ u8 socket_id = topology_physical_package_id(err->cpu);
+ u8 coh_st_inst_id = get_coh_st_inst_id(err);
+ unsigned long addr = get_addr(err->addr);
+ u8 die_id = get_die_id(err);
+
+ pr_debug("socket_id=0x%x die_id=0x%x coh_st_inst_id=0x%x addr=0x%016lx",
+ socket_id, die_id, coh_st_inst_id, addr);
+
+ return norm_to_sys_addr(socket_id, die_id, coh_st_inst_id, addr);
+}
diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c
new file mode 100644
index 000000000000..2f4ac9591c8f
--- /dev/null
+++ b/drivers/ras/amd/fmpm.c
@@ -0,0 +1,1013 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * FRU (Field-Replaceable Unit) Memory Poison Manager
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Authors:
+ * Naveen Krishna Chatradhi <naveenkrishna.chatradhi@amd.com>
+ * Muralidhara M K <muralidhara.mk@amd.com>
+ * Yazen Ghannam <Yazen.Ghannam@amd.com>
+ *
+ * Implementation notes, assumptions, and limitations:
+ *
+ * - FRU memory poison section and memory poison descriptor definitions are not yet
+ * included in the UEFI specification. So they are defined here. Afterwards, they
+ * may be moved to linux/cper.h, if appropriate.
+ *
+ * - Platforms based on AMD MI300 systems will be the first to use these structures.
+ * There are a number of assumptions made here that will need to be generalized
+ * to support other platforms.
+ *
+ * AMD MI300-based platform(s) assumptions:
+ * - Memory errors are reported through x86 MCA.
+ * - The entire DRAM row containing a memory error should be retired.
+ * - There will be (1) FRU memory poison section per CPER.
+ * - The FRU will be the CPU package (processor socket).
+ * - The default number of memory poison descriptor entries should be (8).
+ * - The platform will use ACPI ERST for persistent storage.
+ * - All FRU records should be saved to persistent storage. Module init will
+ * fail if any FRU record is not successfully written.
+ *
+ * - Boot time memory retirement may occur later than ideal due to dependencies
+ * on other libraries and drivers. This leaves a gap where bad memory may be
+ * accessed during early boot stages.
+ *
+ * - Enough memory should be pre-allocated for each FRU record to be able to hold
+ * the expected number of descriptor entries. This, mostly empty, record is
+ * written to storage during init time. Subsequent writes to the same record
+ * should allow the Platform to update the stored record in-place. Otherwise,
+ * if the record is extended, then the Platform may need to perform costly memory
+ * management operations on the storage. For example, the Platform may spend time
+ * in Firmware copying and invalidating memory on a relatively slow SPI ROM.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cper.h>
+#include <linux/ras.h>
+#include <linux/cpu.h>
+
+#include <acpi/apei.h>
+
+#include <asm/cpu_device_id.h>
+#include <asm/mce.h>
+
+#include "../debugfs.h"
+
+#define INVALID_CPU UINT_MAX
+
+/* Validation Bits */
+#define FMP_VALID_ARCH_TYPE BIT_ULL(0)
+#define FMP_VALID_ARCH BIT_ULL(1)
+#define FMP_VALID_ID_TYPE BIT_ULL(2)
+#define FMP_VALID_ID BIT_ULL(3)
+#define FMP_VALID_LIST_ENTRIES BIT_ULL(4)
+#define FMP_VALID_LIST BIT_ULL(5)
+
+/* FRU Architecture Types */
+#define FMP_ARCH_TYPE_X86_CPUID_1_EAX 0
+
+/* FRU ID Types */
+#define FMP_ID_TYPE_X86_PPIN 0
+
+/* FRU Memory Poison Section */
+struct cper_sec_fru_mem_poison {
+ u32 checksum;
+ u64 validation_bits;
+ u32 fru_arch_type;
+ u64 fru_arch;
+ u32 fru_id_type;
+ u64 fru_id;
+ u32 nr_entries;
+} __packed;
+
+/* FRU Descriptor ID Types */
+#define FPD_HW_ID_TYPE_MCA_IPID 0
+
+/* FRU Descriptor Address Types */
+#define FPD_ADDR_TYPE_MCA_ADDR 0
+
+/* Memory Poison Descriptor */
+struct cper_fru_poison_desc {
+ u64 timestamp;
+ u32 hw_id_type;
+ u64 hw_id;
+ u32 addr_type;
+ u64 addr;
+} __packed;
+
+/* Collection of headers and sections for easy pointer use. */
+struct fru_rec {
+ struct cper_record_header hdr;
+ struct cper_section_descriptor sec_desc;
+ struct cper_sec_fru_mem_poison fmp;
+ struct cper_fru_poison_desc entries[];
+} __packed;
+
+/*
+ * Pointers to the complete CPER record of each FRU.
+ *
+ * Memory allocation will include padded space for descriptor entries.
+ */
+static struct fru_rec **fru_records;
+
+/* system physical addresses array */
+static u64 *spa_entries;
+
+#define INVALID_SPA ~0ULL
+
+static struct dentry *fmpm_dfs_dir;
+static struct dentry *fmpm_dfs_entries;
+
+#define CPER_CREATOR_FMP \
+ GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3, \
+ 0xa0, 0x33, 0x08, 0x75)
+
+#define CPER_SECTION_TYPE_FMP \
+ GUID_INIT(0x5e4706c1, 0x5356, 0x48c6, 0x93, 0x0b, 0x52, 0xf2, \
+ 0x12, 0x0a, 0x44, 0x58)
+
+/**
+ * DOC: max_nr_entries (byte)
+ * Maximum number of descriptor entries possible for each FRU.
+ *
+ * Values between '1' and '255' are valid.
+ * No input or '0' will default to FMPM_DEFAULT_MAX_NR_ENTRIES.
+ */
+static u8 max_nr_entries;
+module_param(max_nr_entries, byte, 0644);
+MODULE_PARM_DESC(max_nr_entries,
+ "Maximum number of memory poison descriptor entries per FRU");
+
+#define FMPM_DEFAULT_MAX_NR_ENTRIES 8
+
+/* Maximum number of FRUs in the system. */
+#define FMPM_MAX_NR_FRU 256
+static unsigned int max_nr_fru;
+
+/* Total length of record including headers and list of descriptor entries. */
+static size_t max_rec_len;
+
+/* Total number of SPA entries across all FRUs. */
+static unsigned int spa_nr_entries;
+
+/*
+ * Protect the local records cache in fru_records and prevent concurrent
+ * writes to storage. This is only needed after init once notifier block
+ * registration is done.
+ *
+ * The majority of a record is fixed at module init and will not change
+ * during run time. The entries within a record will be updated as new
+ * errors are reported. The mutex should be held whenever the entries are
+ * accessed during run time.
+ */
+static DEFINE_MUTEX(fmpm_update_mutex);
+
+#define for_each_fru(i, rec) \
+ for (i = 0; rec = fru_records[i], i < max_nr_fru; i++)
+
+static inline u32 get_fmp_len(struct fru_rec *rec)
+{
+ return rec->sec_desc.section_length - sizeof(struct cper_section_descriptor);
+}
+
+static struct fru_rec *get_fru_record(u64 fru_id)
+{
+ struct fru_rec *rec;
+ unsigned int i;
+
+ for_each_fru(i, rec) {
+ if (rec->fmp.fru_id == fru_id)
+ return rec;
+ }
+
+ pr_debug("Record not found for FRU 0x%016llx\n", fru_id);
+
+ return NULL;
+}
+
+/*
+ * Sum up all bytes within the FRU Memory Poison Section including the Memory
+ * Poison Descriptor entries.
+ *
+ * Don't include the old checksum here. It's a u32 value, so summing each of its
+ * bytes will give the wrong total.
+ */
+static u32 do_fmp_checksum(struct cper_sec_fru_mem_poison *fmp, u32 len)
+{
+ u32 checksum = 0;
+ u8 *buf, *end;
+
+ /* Skip old checksum. */
+ buf = (u8 *)fmp + sizeof(u32);
+ end = buf + len;
+
+ while (buf < end)
+ checksum += (u8)(*(buf++));
+
+ return checksum;
+}
+
+static int update_record_on_storage(struct fru_rec *rec)
+{
+ u32 len, checksum;
+ int ret;
+
+ /* Calculate a new checksum. */
+ len = get_fmp_len(rec);
+
+ /* Get the current total. */
+ checksum = do_fmp_checksum(&rec->fmp, len);
+
+ /* Use the complement value. */
+ rec->fmp.checksum = -checksum;
+
+ pr_debug("Writing to storage\n");
+
+ ret = erst_write(&rec->hdr);
+ if (ret) {
+ pr_warn("Storage update failed for FRU 0x%016llx\n", rec->fmp.fru_id);
+
+ if (ret == -ENOSPC)
+ pr_warn("Not enough space on storage\n");
+ }
+
+ return ret;
+}
+
+static bool rec_has_valid_entries(struct fru_rec *rec)
+{
+ if (!(rec->fmp.validation_bits & FMP_VALID_LIST_ENTRIES))
+ return false;
+
+ if (!(rec->fmp.validation_bits & FMP_VALID_LIST))
+ return false;
+
+ return true;
+}
+
+static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new)
+{
+ /*
+ * Ignore timestamp field.
+ * The same physical error may be reported multiple times due to stuck bits, etc.
+ *
+ * Also, order the checks from most->least likely to fail to shortcut the code.
+ */
+ if (old->addr != new->addr)
+ return false;
+
+ if (old->hw_id != new->hw_id)
+ return false;
+
+ if (old->addr_type != new->addr_type)
+ return false;
+
+ if (old->hw_id_type != new->hw_id_type)
+ return false;
+
+ return true;
+}
+
+static bool rec_has_fpd(struct fru_rec *rec, struct cper_fru_poison_desc *fpd)
+{
+ unsigned int i;
+
+ for (i = 0; i < rec->fmp.nr_entries; i++) {
+ struct cper_fru_poison_desc *fpd_i = &rec->entries[i];
+
+ if (fpds_equal(fpd_i, fpd)) {
+ pr_debug("Found duplicate record\n");
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void save_spa(struct fru_rec *rec, unsigned int entry,
+ u64 addr, u64 id, unsigned int cpu)
+{
+ unsigned int i, fru_idx, spa_entry;
+ struct atl_err a_err;
+ unsigned long spa;
+
+ if (entry >= max_nr_entries) {
+ pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n",
+ entry, max_nr_entries);
+ return;
+ }
+
+ /* spa_nr_entries is always multiple of max_nr_entries */
+ for (i = 0; i < spa_nr_entries; i += max_nr_entries) {
+ fru_idx = i / max_nr_entries;
+ if (fru_records[fru_idx] == rec)
+ break;
+ }
+
+ if (i >= spa_nr_entries) {
+ pr_warn_once("FRU record %d not found\n", i);
+ return;
+ }
+
+ spa_entry = i + entry;
+ if (spa_entry >= spa_nr_entries) {
+ pr_warn_once("spa_entries[] index out-of-bounds\n");
+ return;
+ }
+
+ memset(&a_err, 0, sizeof(struct atl_err));
+
+ a_err.addr = addr;
+ a_err.ipid = id;
+ a_err.cpu = cpu;
+
+ spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
+ if (IS_ERR_VALUE(spa)) {
+ pr_debug("Failed to get system address\n");
+ return;
+ }
+
+ spa_entries[spa_entry] = spa;
+ pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n",
+ fru_idx, entry, spa_entry, spa_entries[spa_entry]);
+}
+
+static void update_fru_record(struct fru_rec *rec, struct mce *m)
+{
+ struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
+ struct cper_fru_poison_desc fpd, *fpd_dest;
+ u32 entry = 0;
+
+ mutex_lock(&fmpm_update_mutex);
+
+ memset(&fpd, 0, sizeof(struct cper_fru_poison_desc));
+
+ fpd.timestamp = m->time;
+ fpd.hw_id_type = FPD_HW_ID_TYPE_MCA_IPID;
+ fpd.hw_id = m->ipid;
+ fpd.addr_type = FPD_ADDR_TYPE_MCA_ADDR;
+ fpd.addr = m->addr;
+
+ /* This is the first entry, so just save it. */
+ if (!rec_has_valid_entries(rec))
+ goto save_fpd;
+
+ /* Ignore already recorded errors. */
+ if (rec_has_fpd(rec, &fpd))
+ goto out_unlock;
+
+ if (rec->fmp.nr_entries >= max_nr_entries) {
+ pr_warn("Exceeded number of entries for FRU 0x%016llx\n", rec->fmp.fru_id);
+ goto out_unlock;
+ }
+
+ entry = fmp->nr_entries;
+
+save_fpd:
+ save_spa(rec, entry, m->addr, m->ipid, m->extcpu);
+ fpd_dest = &rec->entries[entry];
+ memcpy(fpd_dest, &fpd, sizeof(struct cper_fru_poison_desc));
+
+ fmp->nr_entries = entry + 1;
+ fmp->validation_bits |= FMP_VALID_LIST_ENTRIES;
+ fmp->validation_bits |= FMP_VALID_LIST;
+
+ pr_debug("Updated FRU 0x%016llx entry #%u\n", fmp->fru_id, entry);
+
+ update_record_on_storage(rec);
+
+out_unlock:
+ mutex_unlock(&fmpm_update_mutex);
+}
+
+static void retire_dram_row(u64 addr, u64 id, u32 cpu)
+{
+ struct atl_err a_err;
+
+ memset(&a_err, 0, sizeof(struct atl_err));
+
+ a_err.addr = addr;
+ a_err.ipid = id;
+ a_err.cpu = cpu;
+
+ amd_retire_dram_row(&a_err);
+}
+
+static int fru_handle_mem_poison(struct notifier_block *nb, unsigned long val, void *data)
+{
+ struct mce *m = (struct mce *)data;
+ struct fru_rec *rec;
+
+ if (!mce_is_memory_error(m))
+ return NOTIFY_DONE;
+
+ retire_dram_row(m->addr, m->ipid, m->extcpu);
+
+ /*
+ * An invalid FRU ID should not happen on real errors. But it
+ * could happen from software error injection, etc.
+ */
+ rec = get_fru_record(m->ppin);
+ if (!rec)
+ return NOTIFY_DONE;
+
+ update_fru_record(rec, m);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block fru_mem_poison_nb = {
+ .notifier_call = fru_handle_mem_poison,
+ .priority = MCE_PRIO_LOWEST,
+};
+
+static void retire_mem_fmp(struct fru_rec *rec)
+{
+ struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
+ unsigned int i, cpu;
+
+ for (i = 0; i < fmp->nr_entries; i++) {
+ struct cper_fru_poison_desc *fpd = &rec->entries[i];
+ unsigned int err_cpu = INVALID_CPU;
+
+ if (fpd->hw_id_type != FPD_HW_ID_TYPE_MCA_IPID)
+ continue;
+
+ if (fpd->addr_type != FPD_ADDR_TYPE_MCA_ADDR)
+ continue;
+
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ if (topology_ppin(cpu) == fmp->fru_id) {
+ err_cpu = cpu;
+ break;
+ }
+ }
+ cpus_read_unlock();
+
+ if (err_cpu == INVALID_CPU)
+ continue;
+
+ retire_dram_row(fpd->addr, fpd->hw_id, err_cpu);
+ save_spa(rec, i, fpd->addr, fpd->hw_id, err_cpu);
+ }
+}
+
+static void retire_mem_records(void)
+{
+ struct fru_rec *rec;
+ unsigned int i;
+
+ for_each_fru(i, rec) {
+ if (!rec_has_valid_entries(rec))
+ continue;
+
+ retire_mem_fmp(rec);
+ }
+}
+
+/* Set the CPER Record Header and CPER Section Descriptor fields. */
+static void set_rec_fields(struct fru_rec *rec)
+{
+ struct cper_section_descriptor *sec_desc = &rec->sec_desc;
+ struct cper_record_header *hdr = &rec->hdr;
+
+ memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+ hdr->revision = CPER_RECORD_REV;
+ hdr->signature_end = CPER_SIG_END;
+
+ /*
+ * Currently, it is assumed that there is one FRU Memory Poison
+ * section per CPER. But this may change for other implementations.
+ */
+ hdr->section_count = 1;
+
+ /* The logged errors are recoverable. Otherwise, they'd never make it here. */
+ hdr->error_severity = CPER_SEV_RECOVERABLE;
+
+ hdr->validation_bits = 0;
+ hdr->record_length = max_rec_len;
+ hdr->creator_id = CPER_CREATOR_FMP;
+ hdr->notification_type = CPER_NOTIFY_MCE;
+ hdr->record_id = cper_next_record_id();
+ hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+ sec_desc->section_offset = sizeof(struct cper_record_header);
+ sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header);
+ sec_desc->revision = CPER_SEC_REV;
+ sec_desc->validation_bits = 0;
+ sec_desc->flags = CPER_SEC_PRIMARY;
+ sec_desc->section_type = CPER_SECTION_TYPE_FMP;
+ sec_desc->section_severity = CPER_SEV_RECOVERABLE;
+}
+
+static int save_new_records(void)
+{
+ DECLARE_BITMAP(new_records, FMPM_MAX_NR_FRU);
+ struct fru_rec *rec;
+ unsigned int i;
+ int ret = 0;
+
+ for_each_fru(i, rec) {
+ if (rec->hdr.record_length)
+ continue;
+
+ set_rec_fields(rec);
+
+ ret = update_record_on_storage(rec);
+ if (ret)
+ goto out_clear;
+
+ set_bit(i, new_records);
+ }
+
+ return ret;
+
+out_clear:
+ for_each_fru(i, rec) {
+ if (!test_bit(i, new_records))
+ continue;
+
+ erst_clear(rec->hdr.record_id);
+ }
+
+ return ret;
+}
+
+/* Check that the record matches expected types for the current system.*/
+static bool fmp_is_usable(struct fru_rec *rec)
+{
+ struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
+ u64 cpuid;
+
+ pr_debug("Validation bits: 0x%016llx\n", fmp->validation_bits);
+
+ if (!(fmp->validation_bits & FMP_VALID_ARCH_TYPE)) {
+ pr_debug("Arch type unknown\n");
+ return false;
+ }
+
+ if (fmp->fru_arch_type != FMP_ARCH_TYPE_X86_CPUID_1_EAX) {
+ pr_debug("Arch type not 'x86 Family/Model/Stepping'\n");
+ return false;
+ }
+
+ if (!(fmp->validation_bits & FMP_VALID_ARCH)) {
+ pr_debug("Arch value unknown\n");
+ return false;
+ }
+
+ cpuid = cpuid_eax(1);
+ if (fmp->fru_arch != cpuid) {
+ pr_debug("Arch value mismatch: record = 0x%016llx, system = 0x%016llx\n",
+ fmp->fru_arch, cpuid);
+ return false;
+ }
+
+ if (!(fmp->validation_bits & FMP_VALID_ID_TYPE)) {
+ pr_debug("FRU ID type unknown\n");
+ return false;
+ }
+
+ if (fmp->fru_id_type != FMP_ID_TYPE_X86_PPIN) {
+ pr_debug("FRU ID type is not 'x86 PPIN'\n");
+ return false;
+ }
+
+ if (!(fmp->validation_bits & FMP_VALID_ID)) {
+ pr_debug("FRU ID value unknown\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool fmp_is_valid(struct fru_rec *rec)
+{
+ struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
+ u32 checksum, len;
+
+ len = get_fmp_len(rec);
+ if (len < sizeof(struct cper_sec_fru_mem_poison)) {
+ pr_debug("fmp length is too small\n");
+ return false;
+ }
+
+ /* Checksum must sum to zero for the entire section. */
+ checksum = do_fmp_checksum(fmp, len) + fmp->checksum;
+ if (checksum) {
+ pr_debug("fmp checksum failed: sum = 0x%x\n", checksum);
+ print_hex_dump_debug("fmp record: ", DUMP_PREFIX_NONE, 16, 1, fmp, len, false);
+ return false;
+ }
+
+ if (!fmp_is_usable(rec))
+ return false;
+
+ return true;
+}
+
+static struct fru_rec *get_valid_record(struct fru_rec *old)
+{
+ struct fru_rec *new;
+
+ if (!fmp_is_valid(old)) {
+ pr_debug("Ignoring invalid record\n");
+ return NULL;
+ }
+
+ new = get_fru_record(old->fmp.fru_id);
+ if (!new)
+ pr_debug("Ignoring record for absent FRU\n");
+
+ return new;
+}
+
+/*
+ * Fetch saved records from persistent storage.
+ *
+ * For each found record:
+ * - If it was not created by this module, then ignore it.
+ * - If it is valid, then copy its data to the local cache.
+ * - If it is not valid, then erase it.
+ */
+static int get_saved_records(void)
+{
+ struct fru_rec *old, *new;
+ u64 record_id;
+ int ret, pos;
+ ssize_t len;
+
+ /*
+ * Assume saved records match current max size.
+ *
+ * However, this may not be true depending on module parameters.
+ */
+ old = kmalloc(max_rec_len, GFP_KERNEL);
+ if (!old) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = erst_get_record_id_begin(&pos);
+ if (ret < 0)
+ goto out_end;
+
+ while (!erst_get_record_id_next(&pos, &record_id)) {
+ if (record_id == APEI_ERST_INVALID_RECORD_ID)
+ goto out_end;
+ /*
+ * Make sure to clear temporary buffer between reads to avoid
+ * leftover data from records of various sizes.
+ */
+ memset(old, 0, max_rec_len);
+
+ len = erst_read_record(record_id, &old->hdr, max_rec_len,
+ sizeof(struct fru_rec), &CPER_CREATOR_FMP);
+ if (len < 0)
+ continue;
+
+ if (len > max_rec_len) {
+ pr_debug("Found record larger than max_rec_len\n");
+ continue;
+ }
+
+ new = get_valid_record(old);
+ if (!new)
+ erst_clear(record_id);
+
+ /* Restore the record */
+ memcpy(new, old, len);
+ }
+
+out_end:
+ erst_get_record_id_end();
+ kfree(old);
+out:
+ return ret;
+}
+
+static void set_fmp_fields(struct fru_rec *rec, unsigned int cpu)
+{
+ struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
+
+ fmp->fru_arch_type = FMP_ARCH_TYPE_X86_CPUID_1_EAX;
+ fmp->validation_bits |= FMP_VALID_ARCH_TYPE;
+
+ /* Assume all CPUs in the system have the same value for now. */
+ fmp->fru_arch = cpuid_eax(1);
+ fmp->validation_bits |= FMP_VALID_ARCH;
+
+ fmp->fru_id_type = FMP_ID_TYPE_X86_PPIN;
+ fmp->validation_bits |= FMP_VALID_ID_TYPE;
+
+ fmp->fru_id = topology_ppin(cpu);
+ fmp->validation_bits |= FMP_VALID_ID;
+}
+
+static int init_fmps(void)
+{
+ struct fru_rec *rec;
+ unsigned int i, cpu;
+ int ret = 0;
+
+ for_each_fru(i, rec) {
+ unsigned int fru_cpu = INVALID_CPU;
+
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ if (topology_physical_package_id(cpu) == i) {
+ fru_cpu = cpu;
+ break;
+ }
+ }
+ cpus_read_unlock();
+
+ if (fru_cpu == INVALID_CPU) {
+ pr_debug("Failed to find matching CPU for FRU #%u\n", i);
+ ret = -ENODEV;
+ break;
+ }
+
+ set_fmp_fields(rec, fru_cpu);
+ }
+
+ return ret;
+}
+
+static int get_system_info(void)
+{
+ /* Only load on MI300A systems for now. */
+ if (!(boot_cpu_data.x86_model >= 0x90 &&
+ boot_cpu_data.x86_model <= 0x9f))
+ return -ENODEV;
+
+ if (!cpu_feature_enabled(X86_FEATURE_AMD_PPIN)) {
+ pr_debug("PPIN feature not available\n");
+ return -ENODEV;
+ }
+
+ /* Use CPU socket as FRU for MI300 systems. */
+ max_nr_fru = topology_max_packages();
+ if (!max_nr_fru)
+ return -ENODEV;
+
+ if (max_nr_fru > FMPM_MAX_NR_FRU) {
+ pr_warn("Too many FRUs to manage: found: %u, max: %u\n",
+ max_nr_fru, FMPM_MAX_NR_FRU);
+ return -ENODEV;
+ }
+
+ if (!max_nr_entries)
+ max_nr_entries = FMPM_DEFAULT_MAX_NR_ENTRIES;
+
+ spa_nr_entries = max_nr_fru * max_nr_entries;
+
+ max_rec_len = sizeof(struct fru_rec);
+ max_rec_len += sizeof(struct cper_fru_poison_desc) * max_nr_entries;
+
+ pr_info("max FRUs: %u, max entries: %u, max record length: %lu\n",
+ max_nr_fru, max_nr_entries, max_rec_len);
+
+ return 0;
+}
+
+static void free_records(void)
+{
+ struct fru_rec *rec;
+ int i;
+
+ for_each_fru(i, rec)
+ kfree(rec);
+
+ kfree(fru_records);
+ kfree(spa_entries);
+}
+
+static int allocate_records(void)
+{
+ int i, ret = 0;
+
+ fru_records = kcalloc(max_nr_fru, sizeof(struct fru_rec *), GFP_KERNEL);
+ if (!fru_records) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < max_nr_fru; i++) {
+ fru_records[i] = kzalloc(max_rec_len, GFP_KERNEL);
+ if (!fru_records[i]) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+ }
+
+ spa_entries = kcalloc(spa_nr_entries, sizeof(u64), GFP_KERNEL);
+ if (!spa_entries) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ for (i = 0; i < spa_nr_entries; i++)
+ spa_entries[i] = INVALID_SPA;
+
+ return ret;
+
+out_free:
+ while (--i >= 0)
+ kfree(fru_records[i]);
+
+ kfree(fru_records);
+out:
+ return ret;
+}
+
+static void *fmpm_start(struct seq_file *f, loff_t *pos)
+{
+ if (*pos >= (spa_nr_entries + 1))
+ return NULL;
+ return pos;
+}
+
+static void *fmpm_next(struct seq_file *f, void *data, loff_t *pos)
+{
+ if (++(*pos) >= (spa_nr_entries + 1))
+ return NULL;
+ return pos;
+}
+
+static void fmpm_stop(struct seq_file *f, void *data)
+{
+}
+
+#define SHORT_WIDTH 8
+#define U64_WIDTH 18
+#define TIMESTAMP_WIDTH 19
+#define LONG_WIDTH 24
+#define U64_PAD (LONG_WIDTH - U64_WIDTH)
+#define TS_PAD (LONG_WIDTH - TIMESTAMP_WIDTH)
+static int fmpm_show(struct seq_file *f, void *data)
+{
+ unsigned int fru_idx, entry, spa_entry, line;
+ struct cper_fru_poison_desc *fpd;
+ struct fru_rec *rec;
+
+ line = *(loff_t *)data;
+ if (line == 0) {
+ seq_printf(f, "%-*s", SHORT_WIDTH, "fru_idx");
+ seq_printf(f, "%-*s", LONG_WIDTH, "fru_id");
+ seq_printf(f, "%-*s", SHORT_WIDTH, "entry");
+ seq_printf(f, "%-*s", LONG_WIDTH, "timestamp");
+ seq_printf(f, "%-*s", LONG_WIDTH, "hw_id");
+ seq_printf(f, "%-*s", LONG_WIDTH, "addr");
+ seq_printf(f, "%-*s", LONG_WIDTH, "spa");
+ goto out_newline;
+ }
+
+ spa_entry = line - 1;
+ fru_idx = spa_entry / max_nr_entries;
+ entry = spa_entry % max_nr_entries;
+
+ rec = fru_records[fru_idx];
+ if (!rec)
+ goto out;
+
+ seq_printf(f, "%-*u", SHORT_WIDTH, fru_idx);
+ seq_printf(f, "0x%016llx%-*s", rec->fmp.fru_id, U64_PAD, "");
+ seq_printf(f, "%-*u", SHORT_WIDTH, entry);
+
+ mutex_lock(&fmpm_update_mutex);
+
+ if (entry >= rec->fmp.nr_entries) {
+ seq_printf(f, "%-*s", LONG_WIDTH, "*");
+ seq_printf(f, "%-*s", LONG_WIDTH, "*");
+ seq_printf(f, "%-*s", LONG_WIDTH, "*");
+ seq_printf(f, "%-*s", LONG_WIDTH, "*");
+ goto out_unlock;
+ }
+
+ fpd = &rec->entries[entry];
+
+ seq_printf(f, "%ptT%-*s", &fpd->timestamp, TS_PAD, "");
+ seq_printf(f, "0x%016llx%-*s", fpd->hw_id, U64_PAD, "");
+ seq_printf(f, "0x%016llx%-*s", fpd->addr, U64_PAD, "");
+
+ if (spa_entries[spa_entry] == INVALID_SPA)
+ seq_printf(f, "%-*s", LONG_WIDTH, "*");
+ else
+ seq_printf(f, "0x%016llx%-*s", spa_entries[spa_entry], U64_PAD, "");
+
+out_unlock:
+ mutex_unlock(&fmpm_update_mutex);
+out_newline:
+ seq_putc(f, '\n');
+out:
+ return 0;
+}
+
+static const struct seq_operations fmpm_seq_ops = {
+ .start = fmpm_start,
+ .next = fmpm_next,
+ .stop = fmpm_stop,
+ .show = fmpm_show,
+};
+
+static int fmpm_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &fmpm_seq_ops);
+}
+
+static const struct file_operations fmpm_fops = {
+ .open = fmpm_open,
+ .release = seq_release,
+ .read = seq_read,
+ .llseek = seq_lseek,
+};
+
+static void setup_debugfs(void)
+{
+ struct dentry *dfs = ras_get_debugfs_root();
+
+ if (!dfs)
+ return;
+
+ fmpm_dfs_dir = debugfs_create_dir("fmpm", dfs);
+ if (!fmpm_dfs_dir)
+ return;
+
+ fmpm_dfs_entries = debugfs_create_file("entries", 0400, fmpm_dfs_dir, NULL, &fmpm_fops);
+ if (!fmpm_dfs_entries)
+ debugfs_remove(fmpm_dfs_dir);
+}
+
+static const struct x86_cpu_id fmpm_cpuids[] = {
+ X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL),
+ { }
+};
+MODULE_DEVICE_TABLE(x86cpu, fmpm_cpuids);
+
+static int __init fru_mem_poison_init(void)
+{
+ int ret;
+
+ if (!x86_match_cpu(fmpm_cpuids)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (erst_disable) {
+ pr_debug("ERST not available\n");
+ ret = -ENODEV;
+ goto out;
+ }
+
+ ret = get_system_info();
+ if (ret)
+ goto out;
+
+ ret = allocate_records();
+ if (ret)
+ goto out;
+
+ ret = init_fmps();
+ if (ret)
+ goto out_free;
+
+ ret = get_saved_records();
+ if (ret)
+ goto out_free;
+
+ ret = save_new_records();
+ if (ret)
+ goto out_free;
+
+ setup_debugfs();
+
+ retire_mem_records();
+
+ mce_register_decode_chain(&fru_mem_poison_nb);
+
+ pr_info("FRU Memory Poison Manager initialized\n");
+ return 0;
+
+out_free:
+ free_records();
+out:
+ return ret;
+}
+
+static void __exit fru_mem_poison_exit(void)
+{
+ mce_unregister_decode_chain(&fru_mem_poison_nb);
+ debugfs_remove(fmpm_dfs_dir);
+ free_records();
+}
+
+module_init(fru_mem_poison_init);
+module_exit(fru_mem_poison_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FRU Memory Poison Manager");
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 321af498ee11..e440b15fbabc 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -480,9 +480,15 @@ DEFINE_SHOW_ATTRIBUTE(array);
static int __init create_debugfs_nodes(void)
{
- struct dentry *d, *pfn, *decay, *count, *array;
+ struct dentry *d, *pfn, *decay, *count, *array, *dfs;
- d = debugfs_create_dir("cec", ras_debugfs_dir);
+ dfs = ras_get_debugfs_root();
+ if (!dfs) {
+ pr_warn("Error getting RAS debugfs root!\n");
+ return -1;
+ }
+
+ d = debugfs_create_dir("cec", dfs);
if (!d) {
pr_warn("Error creating cec debugfs node!\n");
return -1;
diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c
index ffb973c328e3..42afd3de68b2 100644
--- a/drivers/ras/debugfs.c
+++ b/drivers/ras/debugfs.c
@@ -3,10 +3,16 @@
#include <linux/ras.h>
#include "debugfs.h"
-struct dentry *ras_debugfs_dir;
+static struct dentry *ras_debugfs_dir;
static atomic_t trace_count = ATOMIC_INIT(0);
+struct dentry *ras_get_debugfs_root(void)
+{
+ return ras_debugfs_dir;
+}
+EXPORT_SYMBOL_GPL(ras_get_debugfs_root);
+
int ras_userspace_consumers(void)
{
return atomic_read(&trace_count);
diff --git a/drivers/ras/debugfs.h b/drivers/ras/debugfs.h
index c07443b462ad..4749ccdeeba1 100644
--- a/drivers/ras/debugfs.h
+++ b/drivers/ras/debugfs.h
@@ -4,6 +4,6 @@
#include <linux/debugfs.h>
-extern struct dentry *ras_debugfs_dir;
+struct dentry *ras_get_debugfs_root(void);
#endif /* __RAS_DEBUGFS_H__ */
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 95540ea8dd9d..a6e4792a1b2e 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -10,6 +10,37 @@
#include <linux/ras.h>
#include <linux/uuid.h>
+#if IS_ENABLED(CONFIG_AMD_ATL)
+/*
+ * Once set, this function pointer should never be unset.
+ *
+ * The library module will set this pointer if it successfully loads. The module
+ * should not be unloaded except for testing and debug purposes.
+ */
+static unsigned long (*amd_atl_umc_na_to_spa)(struct atl_err *err);
+
+void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *))
+{
+ amd_atl_umc_na_to_spa = f;
+}
+EXPORT_SYMBOL_GPL(amd_atl_register_decoder);
+
+void amd_atl_unregister_decoder(void)
+{
+ amd_atl_umc_na_to_spa = NULL;
+}
+EXPORT_SYMBOL_GPL(amd_atl_unregister_decoder);
+
+unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
+{
+ if (!amd_atl_umc_na_to_spa)
+ return -EINVAL;
+
+ return amd_atl_umc_na_to_spa(err);
+}
+EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_sys_addr);
+#endif /* CONFIG_AMD_ATL */
+
#define CREATE_TRACE_POINTS
#define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h>
diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c
index bc88a40a88d4..8bbcd983a74a 100644
--- a/drivers/regulator/max5970-regulator.c
+++ b/drivers/regulator/max5970-regulator.c
@@ -29,8 +29,8 @@ struct max5970_regulator {
};
enum max597x_regulator_id {
- MAX597X_SW0,
- MAX597X_SW1,
+ MAX597X_sw0,
+ MAX597X_sw1,
};
static int max5970_read_adc(struct regmap *regmap, int reg, long *val)
@@ -378,8 +378,8 @@ static int max597x_dt_parse(struct device_node *np,
}
static const struct regulator_desc regulators[] = {
- MAX597X_SWITCH(SW0, MAX5970_REG_CHXEN, 0, "vss1"),
- MAX597X_SWITCH(SW1, MAX5970_REG_CHXEN, 1, "vss2"),
+ MAX597X_SWITCH(sw0, MAX5970_REG_CHXEN, 0, "vss1"),
+ MAX597X_SWITCH(sw1, MAX5970_REG_CHXEN, 1, "vss2"),
};
static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg,
@@ -392,7 +392,7 @@ static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg,
return ret;
if (*val)
- return regmap_write(map, reg, *val);
+ return regmap_write(map, reg, 0);
return 0;
}
diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c
index 698c420e0869..60cfcd741c2a 100644
--- a/drivers/regulator/pwm-regulator.c
+++ b/drivers/regulator/pwm-regulator.c
@@ -157,7 +157,17 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev)
pwm_get_state(drvdata->pwm, &pstate);
+ if (!pstate.enabled) {
+ if (pstate.polarity == PWM_POLARITY_INVERSED)
+ pstate.duty_cycle = pstate.period;
+ else
+ pstate.duty_cycle = 0;
+ }
+
voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit);
+ if (voltage < min(max_uV_duty, min_uV_duty) ||
+ voltage > max(max_uV_duty, min_uV_duty))
+ return -ENOTRECOVERABLE;
/*
* The dutycycle for min_uV might be greater than the one for max_uV.
@@ -313,6 +323,32 @@ static int pwm_regulator_init_continuous(struct platform_device *pdev,
return 0;
}
+static int pwm_regulator_init_boot_on(struct platform_device *pdev,
+ struct pwm_regulator_data *drvdata,
+ const struct regulator_init_data *init_data)
+{
+ struct pwm_state pstate;
+
+ if (!init_data->constraints.boot_on || drvdata->enb_gpio)
+ return 0;
+
+ pwm_get_state(drvdata->pwm, &pstate);
+ if (pstate.enabled)
+ return 0;
+
+ /*
+ * Update the duty cycle so the output does not change
+ * when the regulator core enables the regulator (and
+ * thus the PWM channel).
+ */
+ if (pstate.polarity == PWM_POLARITY_INVERSED)
+ pstate.duty_cycle = pstate.period;
+ else
+ pstate.duty_cycle = 0;
+
+ return pwm_apply_might_sleep(drvdata->pwm, &pstate);
+}
+
static int pwm_regulator_probe(struct platform_device *pdev)
{
const struct regulator_init_data *init_data;
@@ -372,6 +408,13 @@ static int pwm_regulator_probe(struct platform_device *pdev)
if (ret)
return ret;
+ ret = pwm_regulator_init_boot_on(pdev, drvdata, init_data);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to apply boot_on settings: %d\n",
+ ret);
+ return ret;
+ }
+
regulator = devm_regulator_register(&pdev->dev,
&drvdata->desc, &config);
if (IS_ERR(regulator)) {
diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c
index e374fa6e5f28..d89ae7f16d7a 100644
--- a/drivers/regulator/rk808-regulator.c
+++ b/drivers/regulator/rk808-regulator.c
@@ -1017,14 +1017,14 @@ static const struct regulator_desc rk805_reg[] = {
};
static const struct linear_range rk806_buck_voltage_ranges[] = {
- REGULATOR_LINEAR_RANGE(500000, 0, 160, 6250), /* 500mV ~ 1500mV */
- REGULATOR_LINEAR_RANGE(1500000, 161, 237, 25000), /* 1500mV ~ 3400mV */
- REGULATOR_LINEAR_RANGE(3400000, 238, 255, 0),
+ REGULATOR_LINEAR_RANGE(500000, 0, 159, 6250), /* 500mV ~ 1500mV */
+ REGULATOR_LINEAR_RANGE(1500000, 160, 235, 25000), /* 1500mV ~ 3400mV */
+ REGULATOR_LINEAR_RANGE(3400000, 236, 255, 0),
};
static const struct linear_range rk806_ldo_voltage_ranges[] = {
- REGULATOR_LINEAR_RANGE(500000, 0, 232, 12500), /* 500mV ~ 3400mV */
- REGULATOR_LINEAR_RANGE(3400000, 233, 255, 0), /* 500mV ~ 3400mV */
+ REGULATOR_LINEAR_RANGE(500000, 0, 231, 12500), /* 500mV ~ 3400mV */
+ REGULATOR_LINEAR_RANGE(3400000, 232, 255, 0),
};
static const struct regulator_desc rk806_reg[] = {
diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c
index f48214e2c3b4..04133510e5af 100644
--- a/drivers/regulator/ti-abb-regulator.c
+++ b/drivers/regulator/ti-abb-regulator.c
@@ -726,9 +726,25 @@ static int ti_abb_probe(struct platform_device *pdev)
return PTR_ERR(abb->setup_reg);
}
- abb->int_base = devm_platform_ioremap_resource_byname(pdev, "int-address");
- if (IS_ERR(abb->int_base))
- return PTR_ERR(abb->int_base);
+ pname = "int-address";
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, pname);
+ if (!res) {
+ dev_err(dev, "Missing '%s' IO resource\n", pname);
+ return -ENODEV;
+ }
+ /*
+ * The MPU interrupt status register (PRM_IRQSTATUS_MPU) is
+ * shared between regulator-abb-{ivahd,dspeve,gpu} driver
+ * instances. Therefore use devm_ioremap() rather than
+ * devm_platform_ioremap_resource_byname() to avoid busy
+ * resource region conflicts.
+ */
+ abb->int_base = devm_ioremap(dev, res->start,
+ resource_size(res));
+ if (!abb->int_base) {
+ dev_err(dev, "Unable to map '%s'\n", pname);
+ return -ENOMEM;
+ }
/* Map Optional resources */
pname = "efuse-address";
diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/lib_test.c
index d5caf36c56cd..225c859d6da5 100644
--- a/drivers/rtc/lib_test.c
+++ b/drivers/rtc/lib_test.c
@@ -54,7 +54,7 @@ static void rtc_time64_to_tm_test_date_range(struct kunit *test)
days = div_s64(secs, 86400);
- #define FAIL_MSG "%d/%02d/%02d (%2d) : %ld", \
+ #define FAIL_MSG "%d/%02d/%02d (%2d) : %lld", \
year, month, mday, yday, days
KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG);
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 7327e81352e9..cead018c3f06 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -8,9 +8,6 @@
* Copyright IBM Corp. 1999, 2009
*/
-#define KMSG_COMPONENT "dasd"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
#include <linux/kmod.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -30,9 +27,6 @@
#include <asm/itcw.h>
#include <asm/diag.h>
-/* This is ugly... */
-#define PRINTK_HEADER "dasd:"
-
#include "dasd_int.h"
/*
* SECTION: Constant definitions to be used within this file
@@ -313,39 +307,57 @@ static int dasd_state_basic_to_known(struct dasd_device *device)
*/
static int dasd_state_basic_to_ready(struct dasd_device *device)
{
- int rc;
- struct dasd_block *block;
- struct gendisk *disk;
+ struct dasd_block *block = device->block;
+ struct queue_limits lim;
+ int rc = 0;
- rc = 0;
- block = device->block;
/* make disk known with correct capacity */
- if (block) {
- if (block->base->discipline->do_analysis != NULL)
- rc = block->base->discipline->do_analysis(block);
- if (rc) {
- if (rc != -EAGAIN) {
- device->state = DASD_STATE_UNFMT;
- disk = device->block->gdp;
- kobject_uevent(&disk_to_dev(disk)->kobj,
- KOBJ_CHANGE);
- goto out;
- }
- return rc;
- }
- if (device->discipline->setup_blk_queue)
- device->discipline->setup_blk_queue(block);
- set_capacity(block->gdp,
- block->blocks << block->s2b_shift);
+ if (!block) {
device->state = DASD_STATE_READY;
- rc = dasd_scan_partitions(block);
- if (rc) {
- device->state = DASD_STATE_BASIC;
+ goto out;
+ }
+
+ if (block->base->discipline->do_analysis != NULL)
+ rc = block->base->discipline->do_analysis(block);
+ if (rc) {
+ if (rc == -EAGAIN)
return rc;
- }
- } else {
- device->state = DASD_STATE_READY;
+ device->state = DASD_STATE_UNFMT;
+ kobject_uevent(&disk_to_dev(device->block->gdp)->kobj,
+ KOBJ_CHANGE);
+ goto out;
+ }
+
+ lim = queue_limits_start_update(block->gdp->queue);
+ lim.max_dev_sectors = device->discipline->max_sectors(block);
+ lim.max_hw_sectors = lim.max_dev_sectors;
+ lim.logical_block_size = block->bp_block;
+
+ if (device->discipline->has_discard) {
+ unsigned int max_bytes;
+
+ lim.discard_granularity = block->bp_block;
+
+ /* Calculate max_discard_sectors and make it PAGE aligned */
+ max_bytes = USHRT_MAX * block->bp_block;
+ max_bytes = ALIGN_DOWN(max_bytes, PAGE_SIZE);
+
+ lim.max_hw_discard_sectors = max_bytes / block->bp_block;
+ lim.max_write_zeroes_sectors = lim.max_hw_discard_sectors;
}
+ rc = queue_limits_commit_update(block->gdp->queue, &lim);
+ if (rc)
+ return rc;
+
+ set_capacity(block->gdp, block->blocks << block->s2b_shift);
+ device->state = DASD_STATE_READY;
+
+ rc = dasd_scan_partitions(block);
+ if (rc) {
+ device->state = DASD_STATE_BASIC;
+ return rc;
+ }
+
out:
if (device->discipline->basic_to_ready)
rc = device->discipline->basic_to_ready(device);
@@ -412,7 +424,7 @@ dasd_state_ready_to_online(struct dasd_device * device)
KOBJ_CHANGE);
return 0;
}
- disk_uevent(device->block->bdev_handle->bdev->bd_disk,
+ disk_uevent(file_bdev(device->block->bdev_file)->bd_disk,
KOBJ_CHANGE);
}
return 0;
@@ -433,7 +445,7 @@ static int dasd_state_online_to_ready(struct dasd_device *device)
device->state = DASD_STATE_READY;
if (device->block && !(device->features & DASD_FEATURE_USERAW))
- disk_uevent(device->block->bdev_handle->bdev->bd_disk,
+ disk_uevent(file_bdev(device->block->bdev_file)->bd_disk,
KOBJ_CHANGE);
return 0;
}
@@ -1301,7 +1313,6 @@ int dasd_term_IO(struct dasd_ccw_req *cqr)
{
struct dasd_device *device;
int retries, rc;
- char errorstring[ERRORLENGTH];
/* Check the cqr */
rc = dasd_check_cqr(cqr);
@@ -1340,10 +1351,8 @@ int dasd_term_IO(struct dasd_ccw_req *cqr)
rc = 0;
break;
default:
- /* internal error 10 - unknown rc*/
- snprintf(errorstring, ERRORLENGTH, "10 %d", rc);
- dev_err(&device->cdev->dev, "An error occurred in the "
- "DASD device driver, reason=%s\n", errorstring);
+ dev_err(&device->cdev->dev,
+ "Unexpected error during request termination %d\n", rc);
BUG();
break;
}
@@ -1362,7 +1371,6 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
{
struct dasd_device *device;
int rc;
- char errorstring[ERRORLENGTH];
/* Check the cqr */
rc = dasd_check_cqr(cqr);
@@ -1382,10 +1390,8 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
return -EPERM;
}
if (cqr->retries < 0) {
- /* internal error 14 - start_IO run out of retries */
- sprintf(errorstring, "14 %p", cqr);
- dev_err(&device->cdev->dev, "An error occurred in the DASD "
- "device driver, reason=%s\n", errorstring);
+ dev_err(&device->cdev->dev,
+ "Start I/O ran out of retries\n");
cqr->status = DASD_CQR_ERROR;
return -EIO;
}
@@ -1463,11 +1469,8 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
"not accessible");
break;
default:
- /* internal error 11 - unknown rc */
- snprintf(errorstring, ERRORLENGTH, "11 %d", rc);
dev_err(&device->cdev->dev,
- "An error occurred in the DASD device driver, "
- "reason=%s\n", errorstring);
+ "Unexpected error during request start %d", rc);
BUG();
break;
}
@@ -1904,8 +1907,6 @@ static void __dasd_device_process_ccw_queue(struct dasd_device *device,
static void __dasd_process_cqr(struct dasd_device *device,
struct dasd_ccw_req *cqr)
{
- char errorstring[ERRORLENGTH];
-
switch (cqr->status) {
case DASD_CQR_SUCCESS:
cqr->status = DASD_CQR_DONE;
@@ -1917,11 +1918,8 @@ static void __dasd_process_cqr(struct dasd_device *device,
cqr->status = DASD_CQR_TERMINATED;
break;
default:
- /* internal error 12 - wrong cqr status*/
- snprintf(errorstring, ERRORLENGTH, "12 %p %x02", cqr, cqr->status);
dev_err(&device->cdev->dev,
- "An error occurred in the DASD device driver, "
- "reason=%s\n", errorstring);
+ "Unexpected CQR status %02x", cqr->status);
BUG();
}
if (cqr->callback)
@@ -1986,16 +1984,14 @@ static void __dasd_device_check_expire(struct dasd_device *device)
if (device->discipline->term_IO(cqr) != 0) {
/* Hmpf, try again in 5 sec */
dev_err(&device->cdev->dev,
- "cqr %p timed out (%lus) but cannot be "
- "ended, retrying in 5 s\n",
- cqr, (cqr->expires/HZ));
+ "CQR timed out (%lus) but cannot be ended, retrying in 5s\n",
+ (cqr->expires / HZ));
cqr->expires += 5*HZ;
dasd_device_set_timer(device, 5*HZ);
} else {
dev_err(&device->cdev->dev,
- "cqr %p timed out (%lus), %i retries "
- "remaining\n", cqr, (cqr->expires/HZ),
- cqr->retries);
+ "CQR timed out (%lus), %i retries remaining\n",
+ (cqr->expires / HZ), cqr->retries);
}
__dasd_device_check_autoquiesce_timeout(device, cqr);
}
@@ -2116,8 +2112,7 @@ int dasd_flush_device_queue(struct dasd_device *device)
if (rc) {
/* unable to terminate requeust */
dev_err(&device->cdev->dev,
- "Flushing the DASD request queue "
- "failed for request %p\n", cqr);
+ "Flushing the DASD request queue failed\n");
/* stop flush processing */
goto finished;
}
@@ -2633,8 +2628,7 @@ static int __dasd_cancel_req(struct dasd_ccw_req *cqr)
rc = device->discipline->term_IO(cqr);
if (rc) {
dev_err(&device->cdev->dev,
- "Cancelling request %p failed with rc=%d\n",
- cqr, rc);
+ "Cancelling request failed with rc=%d\n", rc);
} else {
cqr->stopclk = get_tod_clock();
}
@@ -3402,8 +3396,7 @@ static void dasd_generic_auto_online(void *data, async_cookie_t cookie)
ret = ccw_device_set_online(cdev);
if (ret)
- pr_warn("%s: Setting the DASD online failed with rc=%d\n",
- dev_name(&cdev->dev), ret);
+ dev_warn(&cdev->dev, "Setting the DASD online failed with rc=%d\n", ret);
}
/*
@@ -3490,8 +3483,11 @@ int dasd_generic_set_online(struct ccw_device *cdev,
{
struct dasd_discipline *discipline;
struct dasd_device *device;
+ struct device *dev;
int rc;
+ dev = &cdev->dev;
+
/* first online clears initial online feature flag */
dasd_set_feature(cdev, DASD_FEATURE_INITIAL_ONLINE, 0);
device = dasd_create_device(cdev);
@@ -3504,11 +3500,10 @@ int dasd_generic_set_online(struct ccw_device *cdev,
/* Try to load the required module. */
rc = request_module(DASD_DIAG_MOD);
if (rc) {
- pr_warn("%s Setting the DASD online failed "
- "because the required module %s "
- "could not be loaded (rc=%d)\n",
- dev_name(&cdev->dev), DASD_DIAG_MOD,
- rc);
+ dev_warn(dev, "Setting the DASD online failed "
+ "because the required module %s "
+ "could not be loaded (rc=%d)\n",
+ DASD_DIAG_MOD, rc);
dasd_delete_device(device);
return -ENODEV;
}
@@ -3516,8 +3511,7 @@ int dasd_generic_set_online(struct ccw_device *cdev,
/* Module init could have failed, so check again here after
* request_module(). */
if (!dasd_diag_discipline_pointer) {
- pr_warn("%s Setting the DASD online failed because of missing DIAG discipline\n",
- dev_name(&cdev->dev));
+ dev_warn(dev, "Setting the DASD online failed because of missing DIAG discipline\n");
dasd_delete_device(device);
return -ENODEV;
}
@@ -3527,37 +3521,33 @@ int dasd_generic_set_online(struct ccw_device *cdev,
dasd_delete_device(device);
return -EINVAL;
}
+ device->base_discipline = base_discipline;
if (!try_module_get(discipline->owner)) {
- module_put(base_discipline->owner);
dasd_delete_device(device);
return -EINVAL;
}
- device->base_discipline = base_discipline;
device->discipline = discipline;
/* check_device will allocate block device if necessary */
rc = discipline->check_device(device);
if (rc) {
- pr_warn("%s Setting the DASD online with discipline %s failed with rc=%i\n",
- dev_name(&cdev->dev), discipline->name, rc);
- module_put(discipline->owner);
- module_put(base_discipline->owner);
+ dev_warn(dev, "Setting the DASD online with discipline %s failed with rc=%i\n",
+ discipline->name, rc);
dasd_delete_device(device);
return rc;
}
dasd_set_target_state(device, DASD_STATE_ONLINE);
if (device->state <= DASD_STATE_KNOWN) {
- pr_warn("%s Setting the DASD online failed because of a missing discipline\n",
- dev_name(&cdev->dev));
+ dev_warn(dev, "Setting the DASD online failed because of a missing discipline\n");
rc = -ENODEV;
dasd_set_target_state(device, DASD_STATE_NEW);
if (device->block)
dasd_free_block(device->block);
dasd_delete_device(device);
- } else
- pr_debug("dasd_generic device %s found\n",
- dev_name(&cdev->dev));
+ } else {
+ dev_dbg(dev, "dasd_generic device found\n");
+ }
wait_event(dasd_init_waitq, _wait_for_device(device));
@@ -3568,10 +3558,13 @@ EXPORT_SYMBOL_GPL(dasd_generic_set_online);
int dasd_generic_set_offline(struct ccw_device *cdev)
{
+ int max_count, open_count, rc;
struct dasd_device *device;
struct dasd_block *block;
- int max_count, open_count, rc;
unsigned long flags;
+ struct device *dev;
+
+ dev = &cdev->dev;
rc = 0;
spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
@@ -3588,15 +3581,14 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
* in the other openers.
*/
if (device->block) {
- max_count = device->block->bdev_handle ? 0 : -1;
+ max_count = device->block->bdev_file ? 0 : -1;
open_count = atomic_read(&device->block->open_count);
if (open_count > max_count) {
if (open_count > 0)
- pr_warn("%s: The DASD cannot be set offline with open count %i\n",
- dev_name(&cdev->dev), open_count);
+ dev_warn(dev, "The DASD cannot be set offline with open count %i\n",
+ open_count);
else
- pr_warn("%s: The DASD cannot be set offline while it is in use\n",
- dev_name(&cdev->dev));
+ dev_warn(dev, "The DASD cannot be set offline while it is in use\n");
rc = -EBUSY;
goto out_err;
}
@@ -3634,8 +3626,8 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
* so sync bdev first and then wait for our queues to become
* empty
*/
- if (device->block && device->block->bdev_handle)
- bdev_mark_dead(device->block->bdev_handle->bdev, false);
+ if (device->block && device->block->bdev_file)
+ bdev_mark_dead(file_bdev(device->block->bdev_file), false);
dasd_schedule_device_bh(device);
rc = wait_event_interruptible(shutdown_waitq,
_wait_for_empty_queues(device));
@@ -3956,8 +3948,8 @@ static int dasd_handle_autoquiesce(struct dasd_device *device,
if (dasd_eer_enabled(device))
dasd_eer_write(device, NULL, DASD_EER_AUTOQUIESCE);
- pr_info("%s: The DASD has been put in the quiesce state\n",
- dev_name(&device->cdev->dev));
+ dev_info(&device->cdev->dev,
+ "The DASD has been put in the quiesce state\n");
dasd_device_set_stop_bits(device, DASD_STOPPED_QUIESCE);
if (device->features & DASD_FEATURE_REQUEUEQUIESCE)
@@ -3977,10 +3969,8 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
NULL);
if (IS_ERR(cqr)) {
- /* internal error 13 - Allocating the RDC request failed*/
- dev_err(&device->cdev->dev,
- "An error occurred in the DASD device driver, "
- "reason=%s\n", "13");
+ DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s",
+ "Could not allocate RDC request");
return cqr;
}
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index 89957bb7244d..459b7f8ac883 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -7,13 +7,9 @@
*
*/
-#define KMSG_COMPONENT "dasd-eckd"
-
#include <linux/timer.h>
#include <asm/idals.h>
-#define PRINTK_HEADER "dasd_erp(3990): "
-
#include "dasd_int.h"
#include "dasd_eckd.h"
@@ -398,7 +394,6 @@ dasd_3990_handle_env_data(struct dasd_ccw_req * erp, char *sense)
struct dasd_device *device = erp->startdev;
char msg_format = (sense[7] & 0xF0);
char msg_no = (sense[7] & 0x0F);
- char errorstring[ERRORLENGTH];
switch (msg_format) {
case 0x00: /* Format 0 - Program or System Checks */
@@ -1004,12 +999,9 @@ dasd_3990_handle_env_data(struct dasd_ccw_req * erp, char *sense)
}
break;
- default: /* unknown message format - should not happen
- internal error 03 - unknown message format */
- snprintf(errorstring, ERRORLENGTH, "03 %x02", msg_format);
+ default:
dev_err(&device->cdev->dev,
- "An error occurred in the DASD device driver, "
- "reason=%s\n", errorstring);
+ "Unknown message format %02x", msg_format);
break;
} /* end switch message format */
@@ -1056,11 +1048,9 @@ dasd_3990_erp_com_rej(struct dasd_ccw_req * erp, char *sense)
set_bit(DASD_CQR_SUPPRESS_CR, &erp->refers->flags);
erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
} else {
- /* fatal error - set status to FAILED
- internal error 09 - Command Reject */
if (!test_bit(DASD_CQR_SUPPRESS_CR, &erp->flags))
dev_err(&device->cdev->dev,
- "An error occurred in the DASD device driver, reason=09\n");
+ "An I/O command request was rejected\n");
erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
}
@@ -1128,13 +1118,7 @@ dasd_3990_erp_equip_check(struct dasd_ccw_req * erp, char *sense)
erp->function = dasd_3990_erp_equip_check;
if (sense[1] & SNS1_WRITE_INHIBITED) {
- dev_info(&device->cdev->dev,
- "Write inhibited path encountered\n");
-
- /* vary path offline
- internal error 04 - Path should be varied off-line.*/
- dev_err(&device->cdev->dev, "An error occurred in the DASD "
- "device driver, reason=%s\n", "04");
+ dev_err(&device->cdev->dev, "Write inhibited path encountered\n");
erp = dasd_3990_erp_action_1(erp);
@@ -1285,11 +1269,7 @@ dasd_3990_erp_inv_format(struct dasd_ccw_req * erp, char *sense)
erp = dasd_3990_erp_action_4(erp, sense);
} else {
- /* internal error 06 - The track format is not valid*/
- dev_err(&device->cdev->dev,
- "An error occurred in the DASD device driver, "
- "reason=%s\n", "06");
-
+ dev_err(&device->cdev->dev, "Track format is not valid\n");
erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
}
@@ -1663,9 +1643,8 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense)
sizeof(struct LO_eckd_data), device);
if (IS_ERR(erp)) {
- /* internal error 01 - Unable to allocate ERP */
- dev_err(&device->cdev->dev, "An error occurred in the DASD "
- "device driver, reason=%s\n", "01");
+ DBF_DEV_EVENT(DBF_ERR, device, "%s",
+ "Unable to allocate ERP request (1B 32)");
return dasd_3990_erp_cleanup(default_erp, DASD_CQR_FAILED);
}
@@ -1807,10 +1786,8 @@ dasd_3990_update_1B(struct dasd_ccw_req * previous_erp, char *sense)
cpa = previous_erp->irb.scsw.cmd.cpa;
if (cpa == 0) {
- /* internal error 02 -
- Unable to determine address of the CCW to be restarted */
- dev_err(&device->cdev->dev, "An error occurred in the DASD "
- "device driver, reason=%s\n", "02");
+ dev_err(&device->cdev->dev,
+ "Unable to determine address of to be restarted CCW\n");
previous_erp->status = DASD_CQR_FAILED;
@@ -2009,15 +1986,9 @@ dasd_3990_erp_compound_config(struct dasd_ccw_req * erp, char *sense)
{
if ((sense[25] & DASD_SENSE_BIT_1) && (sense[26] & DASD_SENSE_BIT_2)) {
-
- /* set to suspended duplex state then restart
- internal error 05 - Set device to suspended duplex state
- should be done */
struct dasd_device *device = erp->startdev;
dev_err(&device->cdev->dev,
- "An error occurred in the DASD device driver, "
- "reason=%s\n", "05");
-
+ "Compound configuration error occurred\n");
}
erp->function = dasd_3990_erp_compound_config;
@@ -2153,10 +2124,9 @@ dasd_3990_erp_inspect_32(struct dasd_ccw_req * erp, char *sense)
erp = dasd_3990_erp_int_req(erp);
break;
- case 0x0F: /* length mismatch during update write command
- internal error 08 - update write command error*/
- dev_err(&device->cdev->dev, "An error occurred in the "
- "DASD device driver, reason=%s\n", "08");
+ case 0x0F:
+ dev_err(&device->cdev->dev,
+ "Update write command error occurred\n");
erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
break;
@@ -2165,12 +2135,9 @@ dasd_3990_erp_inspect_32(struct dasd_ccw_req * erp, char *sense)
erp = dasd_3990_erp_action_10_32(erp, sense);
break;
- case 0x15: /* next track outside defined extend
- internal error 07 - The next track is not
- within the defined storage extent */
+ case 0x15:
dev_err(&device->cdev->dev,
- "An error occurred in the DASD device driver, "
- "reason=%s\n", "07");
+ "Track outside defined extent error occurred\n");
erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
break;
@@ -2663,7 +2630,7 @@ dasd_3990_erp_further_erp(struct dasd_ccw_req *erp)
* necessary
*/
dev_err(&device->cdev->dev,
- "ERP %p has run out of retries and failed\n", erp);
+ "ERP %px has run out of retries and failed\n", erp);
erp->status = DASD_CQR_FAILED;
}
@@ -2704,8 +2671,7 @@ dasd_3990_erp_handle_match_erp(struct dasd_ccw_req *erp_head,
while (erp_done != erp) {
if (erp_done == NULL) /* end of chain reached */
- panic(PRINTK_HEADER "Programming error in ERP! The "
- "original request was lost\n");
+ panic("Programming error in ERP! The original request was lost\n");
/* remove the request from the device queue */
list_del(&erp_done->blocklist);
@@ -2786,11 +2752,9 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr)
"ERP chain at BEGINNING of ERP-ACTION\n");
for (temp_erp = cqr;
temp_erp != NULL; temp_erp = temp_erp->refers) {
-
dev_err(&device->cdev->dev,
- "ERP %p (%02x) refers to %p\n",
- temp_erp, temp_erp->status,
- temp_erp->refers);
+ "ERP %px (%02x) refers to %px\n",
+ temp_erp, temp_erp->status, temp_erp->refers);
}
}
@@ -2837,11 +2801,9 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr)
"ERP chain at END of ERP-ACTION\n");
for (temp_erp = erp;
temp_erp != NULL; temp_erp = temp_erp->refers) {
-
dev_err(&device->cdev->dev,
- "ERP %p (%02x) refers to %p\n",
- temp_erp, temp_erp->status,
- temp_erp->refers);
+ "ERP %px (%02x) refers to %px\n",
+ temp_erp, temp_erp->status, temp_erp->refers);
}
}
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index c9740ae88d1a..e84cd5436556 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -6,20 +6,12 @@
* Author(s): Stefan Weinhuber <wein@de.ibm.com>
*/
-#define KMSG_COMPONENT "dasd-eckd"
-
#include <linux/list.h>
#include <linux/slab.h>
#include <asm/ebcdic.h>
#include "dasd_int.h"
#include "dasd_eckd.h"
-#ifdef PRINTK_HEADER
-#undef PRINTK_HEADER
-#endif /* PRINTK_HEADER */
-#define PRINTK_HEADER "dasd(eckd):"
-
-
/*
* General concept of alias management:
* - PAV and DASD alias management is specific to the eckd discipline.
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index c4e36650c426..0316c20823ee 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -13,8 +13,6 @@
*
*/
-#define KMSG_COMPONENT "dasd"
-
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -24,8 +22,6 @@
#include <linux/uaccess.h>
#include <asm/ipl.h>
-/* This is ugly... */
-#define PRINTK_HEADER "dasd_devmap:"
#define DASD_MAX_PARAMS 256
#include "dasd_int.h"
@@ -1114,7 +1110,7 @@ dasd_use_diag_show(struct device *dev, struct device_attribute *attr, char *buf)
use_diag = (devmap->features & DASD_FEATURE_USEDIAG) != 0;
else
use_diag = (DASD_FEATURE_DEFAULT & DASD_FEATURE_USEDIAG) != 0;
- return sprintf(buf, use_diag ? "1\n" : "0\n");
+ return sysfs_emit(buf, use_diag ? "1\n" : "0\n");
}
static ssize_t
@@ -1163,7 +1159,7 @@ dasd_use_raw_show(struct device *dev, struct device_attribute *attr, char *buf)
use_raw = (devmap->features & DASD_FEATURE_USERAW) != 0;
else
use_raw = (DASD_FEATURE_DEFAULT & DASD_FEATURE_USERAW) != 0;
- return sprintf(buf, use_raw ? "1\n" : "0\n");
+ return sysfs_emit(buf, use_raw ? "1\n" : "0\n");
}
static ssize_t
@@ -1259,7 +1255,7 @@ dasd_access_show(struct device *dev, struct device_attribute *attr,
if (count < 0)
return count;
- return sprintf(buf, "%d\n", count);
+ return sysfs_emit(buf, "%d\n", count);
}
static DEVICE_ATTR(host_access_count, 0444, dasd_access_show, NULL);
@@ -1338,19 +1334,19 @@ static ssize_t dasd_alias_show(struct device *dev,
device = dasd_device_from_cdev(to_ccwdev(dev));
if (IS_ERR(device))
- return sprintf(buf, "0\n");
+ return sysfs_emit(buf, "0\n");
if (device->discipline && device->discipline->get_uid &&
!device->discipline->get_uid(device, &uid)) {
if (uid.type == UA_BASE_PAV_ALIAS ||
uid.type == UA_HYPER_PAV_ALIAS) {
dasd_put_device(device);
- return sprintf(buf, "1\n");
+ return sysfs_emit(buf, "1\n");
}
}
dasd_put_device(device);
- return sprintf(buf, "0\n");
+ return sysfs_emit(buf, "0\n");
}
static DEVICE_ATTR(alias, 0444, dasd_alias_show, NULL);
@@ -1412,15 +1408,9 @@ dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf)
break;
}
- if (strlen(uid.vduit) > 0)
- snprintf(uid_string, sizeof(uid_string),
- "%s.%s.%04x.%s.%s",
- uid.vendor, uid.serial, uid.ssid, ua_string,
- uid.vduit);
- else
- snprintf(uid_string, sizeof(uid_string),
- "%s.%s.%04x.%s",
- uid.vendor, uid.serial, uid.ssid, ua_string);
+ snprintf(uid_string, sizeof(uid_string), "%s.%s.%04x.%s%s%s",
+ uid.vendor, uid.serial, uid.ssid, ua_string,
+ uid.vduit[0] ? "." : "", uid.vduit);
}
dasd_put_device(device);
@@ -1862,7 +1852,7 @@ static ssize_t dasd_pm_show(struct device *dev,
device = dasd_device_from_cdev(to_ccwdev(dev));
if (IS_ERR(device))
- return sprintf(buf, "0\n");
+ return sysfs_emit(buf, "0\n");
opm = dasd_path_get_opm(device);
nppm = dasd_path_get_nppm(device);
@@ -1872,8 +1862,8 @@ static ssize_t dasd_pm_show(struct device *dev,
ifccpm = dasd_path_get_ifccpm(device);
dasd_put_device(device);
- return sprintf(buf, "%02x %02x %02x %02x %02x %02x\n", opm, nppm,
- cablepm, cuirpm, hpfpm, ifccpm);
+ return sysfs_emit(buf, "%02x %02x %02x %02x %02x %02x\n", opm, nppm,
+ cablepm, cuirpm, hpfpm, ifccpm);
}
static DEVICE_ATTR(path_masks, 0444, dasd_pm_show, NULL);
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 2e4e555b37c3..ea4b1d01bb76 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -8,8 +8,6 @@
*
*/
-#define KMSG_COMPONENT "dasd"
-
#include <linux/kernel_stat.h>
#include <linux/stddef.h>
#include <linux/kernel.h>
@@ -31,8 +29,6 @@
#include "dasd_int.h"
#include "dasd_diag.h"
-#define PRINTK_HEADER "dasd(diag):"
-
MODULE_LICENSE("GPL");
/* The maximum number of blocks per request (max_blocks) is dependent on the
@@ -621,25 +617,9 @@ dasd_diag_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req,
"dump sense not available for DIAG data");
}
-/*
- * Initialize block layer request queue.
- */
-static void dasd_diag_setup_blk_queue(struct dasd_block *block)
+static unsigned int dasd_diag_max_sectors(struct dasd_block *block)
{
- unsigned int logical_block_size = block->bp_block;
- struct request_queue *q = block->gdp->queue;
- int max;
-
- max = DIAG_MAX_BLOCKS << block->s2b_shift;
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- q->limits.max_dev_sectors = max;
- blk_queue_logical_block_size(q, logical_block_size);
- blk_queue_max_hw_sectors(q, max);
- blk_queue_max_segments(q, USHRT_MAX);
- /* With page sized segments each segment can be translated into one idaw/tidaw */
- blk_queue_max_segment_size(q, PAGE_SIZE);
- blk_queue_segment_boundary(q, PAGE_SIZE - 1);
- blk_queue_dma_alignment(q, PAGE_SIZE - 1);
+ return DIAG_MAX_BLOCKS << block->s2b_shift;
}
static int dasd_diag_pe_handler(struct dasd_device *device,
@@ -652,10 +632,10 @@ static struct dasd_discipline dasd_diag_discipline = {
.owner = THIS_MODULE,
.name = "DIAG",
.ebcname = "DIAG",
+ .max_sectors = dasd_diag_max_sectors,
.check_device = dasd_diag_check_device,
.pe_handler = dasd_diag_pe_handler,
.fill_geometry = dasd_diag_fill_geometry,
- .setup_blk_queue = dasd_diag_setup_blk_queue,
.start_IO = dasd_start_diag,
.term_IO = dasd_diag_term_IO,
.handle_terminated_request = dasd_diag_handle_terminated_request,
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index bd89b032968a..373c1a86c33e 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -10,8 +10,6 @@
* Author.........: Nigel Hislop <hislop_nigel@emc.com>
*/
-#define KMSG_COMPONENT "dasd-eckd"
-
#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -37,11 +35,6 @@
#include "dasd_int.h"
#include "dasd_eckd.h"
-#ifdef PRINTK_HEADER
-#undef PRINTK_HEADER
-#endif /* PRINTK_HEADER */
-#define PRINTK_HEADER "dasd(eckd):"
-
/*
* raw track access always map to 64k in memory
* so it maps to 16 blocks of 4k per track
@@ -1072,22 +1065,14 @@ static void dasd_eckd_read_fc_security(struct dasd_device *device)
}
}
-static void dasd_eckd_get_uid_string(struct dasd_conf *conf,
- char *print_uid)
+static void dasd_eckd_get_uid_string(struct dasd_conf *conf, char *print_uid)
{
struct dasd_uid uid;
create_uid(conf, &uid);
- if (strlen(uid.vduit) > 0)
- snprintf(print_uid, DASD_UID_STRLEN,
- "%s.%s.%04x.%02x.%s",
- uid.vendor, uid.serial, uid.ssid,
- uid.real_unit_addr, uid.vduit);
- else
- snprintf(print_uid, DASD_UID_STRLEN,
- "%s.%s.%04x.%02x",
- uid.vendor, uid.serial, uid.ssid,
- uid.real_unit_addr);
+ snprintf(print_uid, DASD_UID_STRLEN, "%s.%s.%04x.%02x%s%s",
+ uid.vendor, uid.serial, uid.ssid, uid.real_unit_addr,
+ uid.vduit[0] ? "." : "", uid.vduit);
}
static int dasd_eckd_check_cabling(struct dasd_device *device,
@@ -5529,15 +5514,15 @@ dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp)
* and return number of printed chars.
*/
static void
-dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page)
+dasd_eckd_dump_ccw_range(struct dasd_device *device, struct ccw1 *from,
+ struct ccw1 *to, char *page)
{
int len, count;
char *datap;
len = 0;
while (from <= to) {
- len += sprintf(page + len, PRINTK_HEADER
- " CCW %p: %08X %08X DAT:",
+ len += sprintf(page + len, "CCW %px: %08X %08X DAT:",
from, ((int *) from)[0], ((int *) from)[1]);
/* get pointer to data (consider IDALs) */
@@ -5560,7 +5545,7 @@ dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page)
from++;
}
if (len > 0)
- printk(KERN_ERR "%s", page);
+ dev_err(&device->cdev->dev, "%s", page);
}
static void
@@ -5591,9 +5576,12 @@ dasd_eckd_dump_sense_dbf(struct dasd_device *device, struct irb *irb,
static void dasd_eckd_dump_sense_ccw(struct dasd_device *device,
struct dasd_ccw_req *req, struct irb *irb)
{
- char *page;
struct ccw1 *first, *last, *fail, *from, *to;
+ struct device *dev;
int len, sl, sct;
+ char *page;
+
+ dev = &device->cdev->dev;
page = (char *) get_zeroed_page(GFP_ATOMIC);
if (page == NULL) {
@@ -5602,24 +5590,18 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device,
return;
}
/* dump the sense data */
- len = sprintf(page, PRINTK_HEADER
- " I/O status report for device %s:\n",
- dev_name(&device->cdev->dev));
- len += sprintf(page + len, PRINTK_HEADER
- " in req: %p CC:%02X FC:%02X AC:%02X SC:%02X DS:%02X "
- "CS:%02X RC:%d\n",
+ len = sprintf(page, "I/O status report:\n");
+ len += sprintf(page + len,
+ "in req: %px CC:%02X FC:%02X AC:%02X SC:%02X DS:%02X CS:%02X RC:%d\n",
req, scsw_cc(&irb->scsw), scsw_fctl(&irb->scsw),
scsw_actl(&irb->scsw), scsw_stctl(&irb->scsw),
scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw),
req ? req->intrc : 0);
- len += sprintf(page + len, PRINTK_HEADER
- " device %s: Failing CCW: %p\n",
- dev_name(&device->cdev->dev),
+ len += sprintf(page + len, "Failing CCW: %px\n",
phys_to_virt(irb->scsw.cmd.cpa));
if (irb->esw.esw0.erw.cons) {
for (sl = 0; sl < 4; sl++) {
- len += sprintf(page + len, PRINTK_HEADER
- " Sense(hex) %2d-%2d:",
+ len += sprintf(page + len, "Sense(hex) %2d-%2d:",
(8 * sl), ((8 * sl) + 7));
for (sct = 0; sct < 8; sct++) {
@@ -5631,23 +5613,20 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device,
if (irb->ecw[27] & DASD_SENSE_BIT_0) {
/* 24 Byte Sense Data */
- sprintf(page + len, PRINTK_HEADER
- " 24 Byte: %x MSG %x, "
- "%s MSGb to SYSOP\n",
+ sprintf(page + len,
+ "24 Byte: %x MSG %x, %s MSGb to SYSOP\n",
irb->ecw[7] >> 4, irb->ecw[7] & 0x0f,
irb->ecw[1] & 0x10 ? "" : "no");
} else {
/* 32 Byte Sense Data */
- sprintf(page + len, PRINTK_HEADER
- " 32 Byte: Format: %x "
- "Exception class %x\n",
+ sprintf(page + len,
+ "32 Byte: Format: %x Exception class %x\n",
irb->ecw[6] & 0x0f, irb->ecw[22] >> 4);
}
} else {
- sprintf(page + len, PRINTK_HEADER
- " SORRY - NO VALID SENSE AVAILABLE\n");
+ sprintf(page + len, "SORRY - NO VALID SENSE AVAILABLE\n");
}
- printk(KERN_ERR "%s", page);
+ dev_err(dev, "%s", page);
if (req) {
/* req == NULL for unsolicited interrupts */
@@ -5656,8 +5635,8 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device,
first = req->cpaddr;
for (last = first; last->flags & (CCW_FLAG_CC | CCW_FLAG_DC); last++);
to = min(first + 6, last);
- printk(KERN_ERR PRINTK_HEADER " Related CP in req: %p\n", req);
- dasd_eckd_dump_ccw_range(first, to, page);
+ dev_err(dev, "Related CP in req: %px\n", req);
+ dasd_eckd_dump_ccw_range(device, first, to, page);
/* print failing CCW area (maximum 4) */
/* scsw->cda is either valid or zero */
@@ -5665,19 +5644,19 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device,
fail = phys_to_virt(irb->scsw.cmd.cpa); /* failing CCW */
if (from < fail - 2) {
from = fail - 2; /* there is a gap - print header */
- printk(KERN_ERR PRINTK_HEADER "......\n");
+ dev_err(dev, "......\n");
}
to = min(fail + 1, last);
- dasd_eckd_dump_ccw_range(from, to, page + len);
+ dasd_eckd_dump_ccw_range(device, from, to, page + len);
/* print last CCWs (maximum 2) */
len = 0;
from = max(from, ++to);
if (from < last - 1) {
from = last - 1; /* there is a gap - print header */
- printk(KERN_ERR PRINTK_HEADER "......\n");
+ dev_err(dev, "......\n");
}
- dasd_eckd_dump_ccw_range(from, last, page + len);
+ dasd_eckd_dump_ccw_range(device, from, last, page + len);
}
free_page((unsigned long) page);
}
@@ -5701,11 +5680,9 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device,
return;
}
/* dump the sense data */
- len = sprintf(page, PRINTK_HEADER
- " I/O status report for device %s:\n",
- dev_name(&device->cdev->dev));
- len += sprintf(page + len, PRINTK_HEADER
- " in req: %p CC:%02X FC:%02X AC:%02X SC:%02X DS:%02X "
+ len = sprintf(page, "I/O status report:\n");
+ len += sprintf(page + len,
+ "in req: %px CC:%02X FC:%02X AC:%02X SC:%02X DS:%02X "
"CS:%02X fcxs:%02X schxs:%02X RC:%d\n",
req, scsw_cc(&irb->scsw), scsw_fctl(&irb->scsw),
scsw_actl(&irb->scsw), scsw_stctl(&irb->scsw),
@@ -5713,9 +5690,7 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device,
irb->scsw.tm.fcxs,
(irb->scsw.tm.ifob << 7) | irb->scsw.tm.sesq,
req ? req->intrc : 0);
- len += sprintf(page + len, PRINTK_HEADER
- " device %s: Failing TCW: %p\n",
- dev_name(&device->cdev->dev),
+ len += sprintf(page + len, "Failing TCW: %px\n",
phys_to_virt(irb->scsw.tm.tcw));
tsb = NULL;
@@ -5724,47 +5699,37 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device,
tsb = tcw_get_tsb(phys_to_virt(irb->scsw.tm.tcw));
if (tsb) {
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->length %d\n", tsb->length);
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->flags %x\n", tsb->flags);
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->dcw_offset %d\n", tsb->dcw_offset);
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->count %d\n", tsb->count);
+ len += sprintf(page + len, "tsb->length %d\n", tsb->length);
+ len += sprintf(page + len, "tsb->flags %x\n", tsb->flags);
+ len += sprintf(page + len, "tsb->dcw_offset %d\n", tsb->dcw_offset);
+ len += sprintf(page + len, "tsb->count %d\n", tsb->count);
residual = tsb->count - 28;
- len += sprintf(page + len, PRINTK_HEADER
- " residual %d\n", residual);
+ len += sprintf(page + len, "residual %d\n", residual);
switch (tsb->flags & 0x07) {
case 1: /* tsa_iostat */
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->tsa.iostat.dev_time %d\n",
+ len += sprintf(page + len, "tsb->tsa.iostat.dev_time %d\n",
tsb->tsa.iostat.dev_time);
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->tsa.iostat.def_time %d\n",
+ len += sprintf(page + len, "tsb->tsa.iostat.def_time %d\n",
tsb->tsa.iostat.def_time);
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->tsa.iostat.queue_time %d\n",
+ len += sprintf(page + len, "tsb->tsa.iostat.queue_time %d\n",
tsb->tsa.iostat.queue_time);
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->tsa.iostat.dev_busy_time %d\n",
+ len += sprintf(page + len, "tsb->tsa.iostat.dev_busy_time %d\n",
tsb->tsa.iostat.dev_busy_time);
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->tsa.iostat.dev_act_time %d\n",
+ len += sprintf(page + len, "tsb->tsa.iostat.dev_act_time %d\n",
tsb->tsa.iostat.dev_act_time);
sense = tsb->tsa.iostat.sense;
break;
case 2: /* ts_ddpc */
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->tsa.ddpc.rc %d\n", tsb->tsa.ddpc.rc);
+ len += sprintf(page + len, "tsb->tsa.ddpc.rc %d\n",
+ tsb->tsa.ddpc.rc);
for (sl = 0; sl < 2; sl++) {
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->tsa.ddpc.rcq %2d-%2d: ",
+ len += sprintf(page + len,
+ "tsb->tsa.ddpc.rcq %2d-%2d: ",
(8 * sl), ((8 * sl) + 7));
rcq = tsb->tsa.ddpc.rcq;
for (sct = 0; sct < 8; sct++) {
- len += sprintf(page + len, " %02x",
+ len += sprintf(page + len, "%02x",
rcq[8 * sl + sct]);
}
len += sprintf(page + len, "\n");
@@ -5772,15 +5737,15 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device,
sense = tsb->tsa.ddpc.sense;
break;
case 3: /* tsa_intrg */
- len += sprintf(page + len, PRINTK_HEADER
- " tsb->tsa.intrg.: not supported yet\n");
+ len += sprintf(page + len,
+ "tsb->tsa.intrg.: not supported yet\n");
break;
}
if (sense) {
for (sl = 0; sl < 4; sl++) {
- len += sprintf(page + len, PRINTK_HEADER
- " Sense(hex) %2d-%2d:",
+ len += sprintf(page + len,
+ "Sense(hex) %2d-%2d:",
(8 * sl), ((8 * sl) + 7));
for (sct = 0; sct < 8; sct++) {
len += sprintf(page + len, " %02x",
@@ -5791,27 +5756,23 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device,
if (sense[27] & DASD_SENSE_BIT_0) {
/* 24 Byte Sense Data */
- sprintf(page + len, PRINTK_HEADER
- " 24 Byte: %x MSG %x, "
- "%s MSGb to SYSOP\n",
+ sprintf(page + len,
+ "24 Byte: %x MSG %x, %s MSGb to SYSOP\n",
sense[7] >> 4, sense[7] & 0x0f,
sense[1] & 0x10 ? "" : "no");
} else {
/* 32 Byte Sense Data */
- sprintf(page + len, PRINTK_HEADER
- " 32 Byte: Format: %x "
- "Exception class %x\n",
+ sprintf(page + len,
+ "32 Byte: Format: %x Exception class %x\n",
sense[6] & 0x0f, sense[22] >> 4);
}
} else {
- sprintf(page + len, PRINTK_HEADER
- " SORRY - NO VALID SENSE AVAILABLE\n");
+ sprintf(page + len, "SORRY - NO VALID SENSE AVAILABLE\n");
}
} else {
- sprintf(page + len, PRINTK_HEADER
- " SORRY - NO TSB DATA AVAILABLE\n");
+ sprintf(page + len, "SORRY - NO TSB DATA AVAILABLE\n");
}
- printk(KERN_ERR "%s", page);
+ dev_err(&device->cdev->dev, "%s", page);
free_page((unsigned long) page);
}
@@ -6865,17 +6826,9 @@ static void dasd_eckd_handle_hpf_error(struct dasd_device *device,
dasd_schedule_requeue(device);
}
-/*
- * Initialize block layer request queue.
- */
-static void dasd_eckd_setup_blk_queue(struct dasd_block *block)
+static unsigned int dasd_eckd_max_sectors(struct dasd_block *block)
{
- unsigned int logical_block_size = block->bp_block;
- struct request_queue *q = block->gdp->queue;
- struct dasd_device *device = block->base;
- int max;
-
- if (device->features & DASD_FEATURE_USERAW) {
+ if (block->base->features & DASD_FEATURE_USERAW) {
/*
* the max_blocks value for raw_track access is 256
* it is higher than the native ECKD value because we
@@ -6883,19 +6836,10 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block)
* so the max_hw_sectors are
* 2048 x 512B = 1024kB = 16 tracks
*/
- max = DASD_ECKD_MAX_BLOCKS_RAW << block->s2b_shift;
- } else {
- max = DASD_ECKD_MAX_BLOCKS << block->s2b_shift;
+ return DASD_ECKD_MAX_BLOCKS_RAW << block->s2b_shift;
}
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- q->limits.max_dev_sectors = max;
- blk_queue_logical_block_size(q, logical_block_size);
- blk_queue_max_hw_sectors(q, max);
- blk_queue_max_segments(q, USHRT_MAX);
- /* With page sized segments each segment can be translated into one idaw/tidaw */
- blk_queue_max_segment_size(q, PAGE_SIZE);
- blk_queue_segment_boundary(q, PAGE_SIZE - 1);
- blk_queue_dma_alignment(q, PAGE_SIZE - 1);
+
+ return DASD_ECKD_MAX_BLOCKS << block->s2b_shift;
}
static struct ccw_driver dasd_eckd_driver = {
@@ -6927,7 +6871,7 @@ static struct dasd_discipline dasd_eckd_discipline = {
.basic_to_ready = dasd_eckd_basic_to_ready,
.online_to_ready = dasd_eckd_online_to_ready,
.basic_to_known = dasd_eckd_basic_to_known,
- .setup_blk_queue = dasd_eckd_setup_blk_queue,
+ .max_sectors = dasd_eckd_max_sectors,
.fill_geometry = dasd_eckd_fill_geometry,
.start_IO = dasd_start_IO,
.term_IO = dasd_term_IO,
diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index c956de711cf7..5064a616e041 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c
@@ -7,8 +7,6 @@
* Author(s): Stefan Weinhuber <wein@de.ibm.com>
*/
-#define KMSG_COMPONENT "dasd-eckd"
-
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/kernel.h>
@@ -28,11 +26,6 @@
#include "dasd_int.h"
#include "dasd_eckd.h"
-#ifdef PRINTK_HEADER
-#undef PRINTK_HEADER
-#endif /* PRINTK_HEADER */
-#define PRINTK_HEADER "dasd(eer):"
-
/*
* SECTION: the internal buffer
*/
diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c
index c07e6e713518..4c0d3a704513 100644
--- a/drivers/s390/block/dasd_erp.c
+++ b/drivers/s390/block/dasd_erp.c
@@ -9,8 +9,6 @@
*
*/
-#define KMSG_COMPONENT "dasd"
-
#include <linux/ctype.h>
#include <linux/init.h>
@@ -18,9 +16,6 @@
#include <asm/ebcdic.h>
#include <linux/uaccess.h>
-/* This is ugly... */
-#define PRINTK_HEADER "dasd_erp:"
-
#include "dasd_int.h"
struct dasd_ccw_req *
@@ -170,12 +165,12 @@ dasd_log_sense(struct dasd_ccw_req *cqr, struct irb *irb)
device = cqr->startdev;
if (cqr->intrc == -ETIMEDOUT) {
dev_err(&device->cdev->dev,
- "A timeout error occurred for cqr %p\n", cqr);
+ "A timeout error occurred for cqr %px\n", cqr);
return;
}
if (cqr->intrc == -ENOLINK) {
dev_err(&device->cdev->dev,
- "A transport error occurred for cqr %p\n", cqr);
+ "A transport error occurred for cqr %px\n", cqr);
return;
}
/* dump sense data */
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index c06fa2b27120..bcbb2f8e91fe 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -25,11 +25,6 @@
#include "dasd_int.h"
#include "dasd_fba.h"
-#ifdef PRINTK_HEADER
-#undef PRINTK_HEADER
-#endif /* PRINTK_HEADER */
-#define PRINTK_HEADER "dasd(fba):"
-
#define FBA_DEFAULT_RETRIES 32
#define DASD_FBA_CCW_WRITE 0x41
@@ -660,30 +655,27 @@ static void
dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req,
struct irb *irb)
{
- char *page;
struct ccw1 *act, *end, *last;
int len, sl, sct, count;
+ struct device *dev;
+ char *page;
+
+ dev = &device->cdev->dev;
page = (char *) get_zeroed_page(GFP_ATOMIC);
if (page == NULL) {
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
- "No memory to dump sense data");
+ "No memory to dump sense data");
return;
}
- len = sprintf(page, PRINTK_HEADER
- " I/O status report for device %s:\n",
- dev_name(&device->cdev->dev));
- len += sprintf(page + len, PRINTK_HEADER
- " in req: %p CS: 0x%02X DS: 0x%02X\n", req,
- irb->scsw.cmd.cstat, irb->scsw.cmd.dstat);
- len += sprintf(page + len, PRINTK_HEADER
- " device %s: Failing CCW: %p\n",
- dev_name(&device->cdev->dev),
+ len = sprintf(page, "I/O status report:\n");
+ len += sprintf(page + len, "in req: %px CS: 0x%02X DS: 0x%02X\n",
+ req, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat);
+ len += sprintf(page + len, "Failing CCW: %px\n",
(void *) (addr_t) irb->scsw.cmd.cpa);
if (irb->esw.esw0.erw.cons) {
for (sl = 0; sl < 4; sl++) {
- len += sprintf(page + len, PRINTK_HEADER
- " Sense(hex) %2d-%2d:",
+ len += sprintf(page + len, "Sense(hex) %2d-%2d:",
(8 * sl), ((8 * sl) + 7));
for (sct = 0; sct < 8; sct++) {
@@ -693,20 +685,18 @@ dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req,
len += sprintf(page + len, "\n");
}
} else {
- len += sprintf(page + len, PRINTK_HEADER
- " SORRY - NO VALID SENSE AVAILABLE\n");
+ len += sprintf(page + len, "SORRY - NO VALID SENSE AVAILABLE\n");
}
- printk(KERN_ERR "%s", page);
+ dev_err(dev, "%s", page);
/* dump the Channel Program */
/* print first CCWs (maximum 8) */
act = req->cpaddr;
- for (last = act; last->flags & (CCW_FLAG_CC | CCW_FLAG_DC); last++);
+ for (last = act; last->flags & (CCW_FLAG_CC | CCW_FLAG_DC); last++);
end = min(act + 8, last);
- len = sprintf(page, PRINTK_HEADER " Related CP in req: %p\n", req);
+ len = sprintf(page, "Related CP in req: %px\n", req);
while (act <= end) {
- len += sprintf(page + len, PRINTK_HEADER
- " CCW %p: %08X %08X DAT:",
+ len += sprintf(page + len, "CCW %px: %08X %08X DAT:",
act, ((int *) act)[0], ((int *) act)[1]);
for (count = 0; count < 32 && count < act->count;
count += sizeof(int))
@@ -716,19 +706,17 @@ dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req,
len += sprintf(page + len, "\n");
act++;
}
- printk(KERN_ERR "%s", page);
-
+ dev_err(dev, "%s", page);
/* print failing CCW area */
len = 0;
if (act < ((struct ccw1 *)(addr_t) irb->scsw.cmd.cpa) - 2) {
act = ((struct ccw1 *)(addr_t) irb->scsw.cmd.cpa) - 2;
- len += sprintf(page + len, PRINTK_HEADER "......\n");
+ len += sprintf(page + len, "......\n");
}
end = min((struct ccw1 *)(addr_t) irb->scsw.cmd.cpa + 2, last);
while (act <= end) {
- len += sprintf(page + len, PRINTK_HEADER
- " CCW %p: %08X %08X DAT:",
+ len += sprintf(page + len, "CCW %px: %08X %08X DAT:",
act, ((int *) act)[0], ((int *) act)[1]);
for (count = 0; count < 32 && count < act->count;
count += sizeof(int))
@@ -742,11 +730,10 @@ dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req,
/* print last CCWs */
if (act < last - 2) {
act = last - 2;
- len += sprintf(page + len, PRINTK_HEADER "......\n");
+ len += sprintf(page + len, "......\n");
}
while (act <= last) {
- len += sprintf(page + len, PRINTK_HEADER
- " CCW %p: %08X %08X DAT:",
+ len += sprintf(page + len, "CCW %px: %08X %08X DAT:",
act, ((int *) act)[0], ((int *) act)[1]);
for (count = 0; count < 32 && count < act->count;
count += sizeof(int))
@@ -757,39 +744,13 @@ dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req,
act++;
}
if (len > 0)
- printk(KERN_ERR "%s", page);
+ dev_err(dev, "%s", page);
free_page((unsigned long) page);
}
-/*
- * Initialize block layer request queue.
- */
-static void dasd_fba_setup_blk_queue(struct dasd_block *block)
+static unsigned int dasd_fba_max_sectors(struct dasd_block *block)
{
- unsigned int logical_block_size = block->bp_block;
- struct request_queue *q = block->gdp->queue;
- unsigned int max_bytes, max_discard_sectors;
- int max;
-
- max = DASD_FBA_MAX_BLOCKS << block->s2b_shift;
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- q->limits.max_dev_sectors = max;
- blk_queue_logical_block_size(q, logical_block_size);
- blk_queue_max_hw_sectors(q, max);
- blk_queue_max_segments(q, USHRT_MAX);
- /* With page sized segments each segment can be translated into one idaw/tidaw */
- blk_queue_max_segment_size(q, PAGE_SIZE);
- blk_queue_segment_boundary(q, PAGE_SIZE - 1);
-
- q->limits.discard_granularity = logical_block_size;
-
- /* Calculate max_discard_sectors and make it PAGE aligned */
- max_bytes = USHRT_MAX * logical_block_size;
- max_bytes = ALIGN_DOWN(max_bytes, PAGE_SIZE);
- max_discard_sectors = max_bytes / logical_block_size;
-
- blk_queue_max_discard_sectors(q, max_discard_sectors);
- blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
+ return DASD_FBA_MAX_BLOCKS << block->s2b_shift;
}
static int dasd_fba_pe_handler(struct dasd_device *device,
@@ -802,10 +763,11 @@ static struct dasd_discipline dasd_fba_discipline = {
.owner = THIS_MODULE,
.name = "FBA ",
.ebcname = "FBA ",
+ .has_discard = true,
.check_device = dasd_fba_check_characteristics,
.do_analysis = dasd_fba_do_analysis,
.pe_handler = dasd_fba_pe_handler,
- .setup_blk_queue = dasd_fba_setup_blk_queue,
+ .max_sectors = dasd_fba_max_sectors,
.fill_geometry = dasd_fba_fill_geometry,
.start_IO = dasd_start_IO,
.term_IO = dasd_term_IO,
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 55e3abe94cde..4533dd055ca8 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -11,8 +11,6 @@
*
*/
-#define KMSG_COMPONENT "dasd"
-
#include <linux/interrupt.h>
#include <linux/major.h>
#include <linux/fs.h>
@@ -20,9 +18,6 @@
#include <linux/uaccess.h>
-/* This is ugly... */
-#define PRINTK_HEADER "dasd_gendisk:"
-
#include "dasd_int.h"
static unsigned int queue_depth = 32;
@@ -39,6 +34,16 @@ MODULE_PARM_DESC(nr_hw_queues, "Default number of hardware queues for new DASD d
*/
int dasd_gendisk_alloc(struct dasd_block *block)
{
+ struct queue_limits lim = {
+ /*
+ * With page sized segments, each segment can be translated into
+ * one idaw/tidaw.
+ */
+ .max_segment_size = PAGE_SIZE,
+ .seg_boundary_mask = PAGE_SIZE - 1,
+ .dma_alignment = PAGE_SIZE - 1,
+ .max_segments = USHRT_MAX,
+ };
struct gendisk *gdp;
struct dasd_device *base;
int len, rc;
@@ -58,11 +63,12 @@ int dasd_gendisk_alloc(struct dasd_block *block)
if (rc)
return rc;
- gdp = blk_mq_alloc_disk(&block->tag_set, block);
+ gdp = blk_mq_alloc_disk(&block->tag_set, &lim, block);
if (IS_ERR(gdp)) {
blk_mq_free_tag_set(&block->tag_set);
return PTR_ERR(gdp);
}
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, gdp->queue);
/* Initialize gendisk structure. */
gdp->major = DASD_MAJOR;
@@ -127,15 +133,15 @@ void dasd_gendisk_free(struct dasd_block *block)
*/
int dasd_scan_partitions(struct dasd_block *block)
{
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
int rc;
- bdev_handle = bdev_open_by_dev(disk_devt(block->gdp), BLK_OPEN_READ,
+ bdev_file = bdev_file_open_by_dev(disk_devt(block->gdp), BLK_OPEN_READ,
NULL, NULL);
- if (IS_ERR(bdev_handle)) {
+ if (IS_ERR(bdev_file)) {
DBF_DEV_EVENT(DBF_ERR, block->base,
"scan partitions error, blkdev_get returned %ld",
- PTR_ERR(bdev_handle));
+ PTR_ERR(bdev_file));
return -ENODEV;
}
@@ -147,15 +153,15 @@ int dasd_scan_partitions(struct dasd_block *block)
"scan partitions error, rc %d", rc);
/*
- * Since the matching bdev_release() call to the
- * bdev_open_by_path() in this function is not called before
+ * Since the matching fput() call to the
+ * bdev_file_open_by_path() in this function is not called before
* dasd_destroy_partitions the offline open_count limit needs to be
- * increased from 0 to 1. This is done by setting device->bdev_handle
+ * increased from 0 to 1. This is done by setting device->bdev_file
* (see dasd_generic_set_offline). As long as the partition detection
* is running no offline should be allowed. That is why the assignment
- * to block->bdev_handle is done AFTER the BLKRRPART ioctl.
+ * to block->bdev_file is done AFTER the BLKRRPART ioctl.
*/
- block->bdev_handle = bdev_handle;
+ block->bdev_file = bdev_file;
return 0;
}
@@ -165,21 +171,21 @@ int dasd_scan_partitions(struct dasd_block *block)
*/
void dasd_destroy_partitions(struct dasd_block *block)
{
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
/*
- * Get the bdev_handle pointer from the device structure and clear
- * device->bdev_handle to lower the offline open_count limit again.
+ * Get the bdev_file pointer from the device structure and clear
+ * device->bdev_file to lower the offline open_count limit again.
*/
- bdev_handle = block->bdev_handle;
- block->bdev_handle = NULL;
+ bdev_file = block->bdev_file;
+ block->bdev_file = NULL;
- mutex_lock(&bdev_handle->bdev->bd_disk->open_mutex);
- bdev_disk_changed(bdev_handle->bdev->bd_disk, true);
- mutex_unlock(&bdev_handle->bdev->bd_disk->open_mutex);
+ mutex_lock(&file_bdev(bdev_file)->bd_disk->open_mutex);
+ bdev_disk_changed(file_bdev(bdev_file)->bd_disk, true);
+ mutex_unlock(&file_bdev(bdev_file)->bd_disk->open_mutex);
/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
- bdev_release(bdev_handle);
+ fput(bdev_file);
}
int dasd_gendisk_init(void)
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 1b1b8a41c4d4..e5f40536b425 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -113,9 +113,6 @@ do { \
__dev_id.ssid, __dev_id.devno, d_data); \
} while (0)
-/* limit size for an errorstring */
-#define ERRORLENGTH 30
-
/* definition of dbf debug levels */
#define DBF_EMERG 0 /* system is unusable */
#define DBF_ALERT 1 /* action must be taken immediately */
@@ -126,32 +123,6 @@ do { \
#define DBF_INFO 6 /* informational */
#define DBF_DEBUG 6 /* debug-level messages */
-/* messages to be written via klogd and dbf */
-#define DEV_MESSAGE(d_loglevel,d_device,d_string,d_args...)\
-do { \
- printk(d_loglevel PRINTK_HEADER " %s: " d_string "\n", \
- dev_name(&d_device->cdev->dev), d_args); \
- DBF_DEV_EVENT(DBF_ALERT, d_device, d_string, d_args); \
-} while(0)
-
-#define MESSAGE(d_loglevel,d_string,d_args...)\
-do { \
- printk(d_loglevel PRINTK_HEADER " " d_string "\n", d_args); \
- DBF_EVENT(DBF_ALERT, d_string, d_args); \
-} while(0)
-
-/* messages to be written via klogd only */
-#define DEV_MESSAGE_LOG(d_loglevel,d_device,d_string,d_args...)\
-do { \
- printk(d_loglevel PRINTK_HEADER " %s: " d_string "\n", \
- dev_name(&d_device->cdev->dev), d_args); \
-} while(0)
-
-#define MESSAGE_LOG(d_loglevel,d_string,d_args...)\
-do { \
- printk(d_loglevel PRINTK_HEADER " " d_string "\n", d_args); \
-} while(0)
-
/* Macro to calculate number of blocks per page */
#define BLOCKS_PER_PAGE(blksize) (PAGE_SIZE / blksize)
@@ -322,6 +293,7 @@ struct dasd_discipline {
struct module *owner;
char ebcname[8]; /* a name used for tagging and printks */
char name[8]; /* a name used for tagging and printks */
+ bool has_discard;
struct list_head list; /* used for list of disciplines */
@@ -360,10 +332,7 @@ struct dasd_discipline {
int (*online_to_ready) (struct dasd_device *);
int (*basic_to_known)(struct dasd_device *);
- /*
- * Initialize block layer request queue.
- */
- void (*setup_blk_queue)(struct dasd_block *);
+ unsigned int (*max_sectors)(struct dasd_block *);
/* (struct dasd_device *);
* Device operation functions. build_cp creates a ccw chain for
* a block device request, start_io starts the request and
@@ -650,7 +619,7 @@ struct dasd_block {
struct gendisk *gdp;
spinlock_t request_queue_lock;
struct blk_mq_tag_set tag_set;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
atomic_t open_count;
unsigned long blocks; /* size of volume in blocks */
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 61b9675e2a67..7e0ed7032f76 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -10,8 +10,6 @@
* i/o controls for the dasd driver.
*/
-#define KMSG_COMPONENT "dasd"
-
#include <linux/interrupt.h>
#include <linux/compat.h>
#include <linux/major.h>
@@ -24,12 +22,8 @@
#include <linux/uaccess.h>
#include <linux/dasd_mod.h>
-/* This is ugly... */
-#define PRINTK_HEADER "dasd_ioctl:"
-
#include "dasd_int.h"
-
static int
dasd_ioctl_api_version(void __user *argp)
{
@@ -537,7 +531,7 @@ static int __dasd_ioctl_information(struct dasd_block *block,
* This must be hidden from user-space.
*/
dasd_info->open_count = atomic_read(&block->open_count);
- if (!block->bdev_handle)
+ if (!block->bdev_file)
dasd_info->open_count++;
/*
diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c
index 62a859ea67f8..0faaa437d9be 100644
--- a/drivers/s390/block/dasd_proc.c
+++ b/drivers/s390/block/dasd_proc.c
@@ -11,8 +11,6 @@
*
*/
-#define KMSG_COMPONENT "dasd"
-
#include <linux/ctype.h>
#include <linux/slab.h>
#include <linux/string.h>
@@ -23,9 +21,6 @@
#include <asm/debug.h>
#include <linux/uaccess.h>
-/* This is ugly... */
-#define PRINTK_HEADER "dasd_proc:"
-
#include "dasd_int.h"
static struct proc_dir_entry *dasd_proc_root_entry = NULL;
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 4b7ecd4fd431..9c8f529b827c 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -546,6 +546,9 @@ static const struct attribute_group *dcssblk_dev_attr_groups[] = {
static ssize_t
dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
{
+ struct queue_limits lim = {
+ .logical_block_size = 4096,
+ };
int rc, i, j, num_of_segments;
struct dcssblk_dev_info *dev_info;
struct segment_info *seg_info, *temp;
@@ -629,9 +632,9 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
dev_info->dev.release = dcssblk_release_segment;
dev_info->dev.groups = dcssblk_dev_attr_groups;
INIT_LIST_HEAD(&dev_info->lh);
- dev_info->gd = blk_alloc_disk(NUMA_NO_NODE);
- if (dev_info->gd == NULL) {
- rc = -ENOMEM;
+ dev_info->gd = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(dev_info->gd)) {
+ rc = PTR_ERR(dev_info->gd);
goto seg_list_del;
}
dev_info->gd->major = dcssblk_major;
@@ -639,7 +642,6 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
dev_info->gd->fops = &dcssblk_devops;
dev_info->gd->private_data = dev_info;
dev_info->gd->flags |= GENHD_FL_NO_PART;
- blk_queue_logical_block_size(dev_info->gd->queue, 4096);
blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue);
seg_byte_size = (dev_info->end - dev_info->start + 1);
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index ade95e91b3c8..9f6fdd0daa74 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -435,10 +435,17 @@ static const struct blk_mq_ops scm_mq_ops = {
int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
{
- unsigned int devindex, nr_max_blk;
+ struct queue_limits lim = {
+ .logical_block_size = 1 << 12,
+ };
+ unsigned int devindex;
struct request_queue *rq;
int len, ret;
+ lim.max_segments = min(scmdev->nr_max_block,
+ (unsigned int) (PAGE_SIZE / sizeof(struct aidaw)));
+ lim.max_hw_sectors = lim.max_segments << 3; /* 8 * 512 = blk_size */
+
devindex = atomic_inc_return(&nr_devices) - 1;
/* scma..scmz + scmaa..scmzz */
if (devindex > 701) {
@@ -462,18 +469,12 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
if (ret)
goto out;
- bdev->gendisk = blk_mq_alloc_disk(&bdev->tag_set, scmdev);
+ bdev->gendisk = blk_mq_alloc_disk(&bdev->tag_set, &lim, scmdev);
if (IS_ERR(bdev->gendisk)) {
ret = PTR_ERR(bdev->gendisk);
goto out_tag;
}
rq = bdev->rq = bdev->gendisk->queue;
- nr_max_blk = min(scmdev->nr_max_block,
- (unsigned int) (PAGE_SIZE / sizeof(struct aidaw)));
-
- blk_queue_logical_block_size(rq, 1 << 12);
- blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */
- blk_queue_max_segments(rq, nr_max_blk);
blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 82efdd20ad01..1d17a83569ce 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -195,7 +195,7 @@ static void free_chan_prog(struct ccw1 *cpa)
struct ccw1 *ptr = cpa;
while (ptr->cda) {
- kfree((void *)(addr_t) ptr->cda);
+ kfree(phys_to_virt(ptr->cda));
ptr++;
}
kfree(cpa);
@@ -237,7 +237,7 @@ static struct ccw1 *alloc_chan_prog(const char __user *ubuf, int rec_count,
free_chan_prog(cpa);
return ERR_PTR(-ENOMEM);
}
- cpa[i].cda = (u32)(addr_t) kbuf;
+ cpa[i].cda = (u32)virt_to_phys(kbuf);
if (copy_from_user(kbuf, ubuf, reclen)) {
free_chan_prog(cpa);
return ERR_PTR(-EFAULT);
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index bc3be0330f1d..0969fa01df58 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -29,7 +29,6 @@
#include <asm/irqflags.h>
#include <asm/checksum.h>
#include <asm/os_info.h>
-#include <asm/switch_to.h>
#include <asm/maccess.h>
#include "sclp.h"
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index aa3292e57e38..6eb8bcd948dc 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -31,7 +31,7 @@
* to devices that use multiple subchannels.
*/
-static struct bus_type ccwgroup_bus_type;
+static const struct bus_type ccwgroup_bus_type;
static void __ccwgroup_remove_symlinks(struct ccwgroup_device *gdev)
{
@@ -465,7 +465,7 @@ static void ccwgroup_shutdown(struct device *dev)
gdrv->shutdown(gdev);
}
-static struct bus_type ccwgroup_bus_type = {
+static const struct bus_type ccwgroup_bus_type = {
.name = "ccwgroup",
.dev_groups = ccwgroup_dev_groups,
.remove = ccwgroup_remove,
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 64ed55c3aed6..3d88899dff7c 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -1091,8 +1091,8 @@ int __init chsc_init(void)
{
int ret;
- sei_page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
- chsc_page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ sei_page = (void *)get_zeroed_page(GFP_KERNEL);
+ chsc_page = (void *)get_zeroed_page(GFP_KERNEL);
if (!sei_page || !chsc_page) {
ret = -ENOMEM;
goto out_err;
diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c
index 902237d0baef..e6c800653f98 100644
--- a/drivers/s390/cio/chsc_sch.c
+++ b/drivers/s390/cio/chsc_sch.c
@@ -293,7 +293,7 @@ static int chsc_ioctl_start(void __user *user_area)
if (!css_general_characteristics.dynio)
/* It makes no sense to try. */
return -EOPNOTSUPP;
- chsc_area = (void *)get_zeroed_page(GFP_DMA | GFP_KERNEL);
+ chsc_area = (void *)get_zeroed_page(GFP_KERNEL);
if (!chsc_area)
return -ENOMEM;
request = kzalloc(sizeof(*request), GFP_KERNEL);
@@ -341,7 +341,7 @@ static int chsc_ioctl_on_close_set(void __user *user_area)
ret = -ENOMEM;
goto out_unlock;
}
- on_close_chsc_area = (void *)get_zeroed_page(GFP_DMA | GFP_KERNEL);
+ on_close_chsc_area = (void *)get_zeroed_page(GFP_KERNEL);
if (!on_close_chsc_area) {
ret = -ENOMEM;
goto out_free_request;
@@ -393,7 +393,7 @@ static int chsc_ioctl_start_sync(void __user *user_area)
struct chsc_sync_area *chsc_area;
int ret, ccode;
- chsc_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ chsc_area = (void *)get_zeroed_page(GFP_KERNEL);
if (!chsc_area)
return -ENOMEM;
if (copy_from_user(chsc_area, user_area, PAGE_SIZE)) {
@@ -439,7 +439,7 @@ static int chsc_ioctl_info_channel_path(void __user *user_cd)
u8 data[PAGE_SIZE - 20];
} __attribute__ ((packed)) *scpcd_area;
- scpcd_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ scpcd_area = (void *)get_zeroed_page(GFP_KERNEL);
if (!scpcd_area)
return -ENOMEM;
cd = kzalloc(sizeof(*cd), GFP_KERNEL);
@@ -501,7 +501,7 @@ static int chsc_ioctl_info_cu(void __user *user_cd)
u8 data[PAGE_SIZE - 20];
} __attribute__ ((packed)) *scucd_area;
- scucd_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ scucd_area = (void *)get_zeroed_page(GFP_KERNEL);
if (!scucd_area)
return -ENOMEM;
cd = kzalloc(sizeof(*cd), GFP_KERNEL);
@@ -564,7 +564,7 @@ static int chsc_ioctl_info_sch_cu(void __user *user_cud)
u8 data[PAGE_SIZE - 20];
} __attribute__ ((packed)) *sscud_area;
- sscud_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ sscud_area = (void *)get_zeroed_page(GFP_KERNEL);
if (!sscud_area)
return -ENOMEM;
cud = kzalloc(sizeof(*cud), GFP_KERNEL);
@@ -626,7 +626,7 @@ static int chsc_ioctl_conf_info(void __user *user_ci)
u8 data[PAGE_SIZE - 20];
} __attribute__ ((packed)) *sci_area;
- sci_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ sci_area = (void *)get_zeroed_page(GFP_KERNEL);
if (!sci_area)
return -ENOMEM;
ci = kzalloc(sizeof(*ci), GFP_KERNEL);
@@ -697,7 +697,7 @@ static int chsc_ioctl_conf_comp_list(void __user *user_ccl)
u32 res;
} __attribute__ ((packed)) *cssids_parm;
- sccl_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ sccl_area = (void *)get_zeroed_page(GFP_KERNEL);
if (!sccl_area)
return -ENOMEM;
ccl = kzalloc(sizeof(*ccl), GFP_KERNEL);
@@ -757,7 +757,7 @@ static int chsc_ioctl_chpd(void __user *user_chpd)
int ret;
chpd = kzalloc(sizeof(*chpd), GFP_KERNEL);
- scpd_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ scpd_area = (void *)get_zeroed_page(GFP_KERNEL);
if (!scpd_area || !chpd) {
ret = -ENOMEM;
goto out_free;
@@ -797,7 +797,7 @@ static int chsc_ioctl_dcal(void __user *user_dcal)
u8 data[PAGE_SIZE - 36];
} __attribute__ ((packed)) *sdcal_area;
- sdcal_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ sdcal_area = (void *)get_zeroed_page(GFP_KERNEL);
if (!sdcal_area)
return -ENOMEM;
dcal = kzalloc(sizeof(*dcal), GFP_KERNEL);
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index 5584aa46c94e..f80dc18e2a76 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -169,7 +169,8 @@ static inline void cmf_activate(void *area, unsigned int onoff)
" lgr 2,%[mbo]\n"
" schm\n"
:
- : [r1] "d" ((unsigned long)onoff), [mbo] "d" (area)
+ : [r1] "d" ((unsigned long)onoff),
+ [mbo] "d" (virt_to_phys(area))
: "1", "2");
}
@@ -501,8 +502,7 @@ static int alloc_cmb(struct ccw_device *cdev)
WARN_ON(!list_empty(&cmb_area.list));
spin_unlock(&cmb_area.lock);
- mem = (void*)__get_free_pages(GFP_KERNEL | GFP_DMA,
- get_order(size));
+ mem = (void *)__get_free_pages(GFP_KERNEL, get_order(size));
spin_lock(&cmb_area.lock);
if (cmb_area.mem) {
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 28a88ed2c3aa..094431a62ad5 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -39,7 +39,7 @@ int max_ssid;
#define MAX_CSS_IDX 0
struct channel_subsystem *channel_subsystems[MAX_CSS_IDX + 1];
-static struct bus_type css_bus_type;
+static const struct bus_type css_bus_type;
int
for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *data)
@@ -1409,7 +1409,7 @@ static int css_uevent(const struct device *dev, struct kobj_uevent_env *env)
return ret;
}
-static struct bus_type css_bus_type = {
+static const struct bus_type css_bus_type = {
.name = "css",
.match = css_bus_match,
.probe = css_probe,
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 0cfb179e1bcb..f95d12345d98 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -49,7 +49,7 @@ static const unsigned long recovery_delay[] = { 3, 30, 300 };
static atomic_t ccw_device_init_count = ATOMIC_INIT(0);
static DECLARE_WAIT_QUEUE_HEAD(ccw_device_init_wq);
-static struct bus_type ccw_bus_type;
+static const struct bus_type ccw_bus_type;
/******************* bus type handling ***********************/
@@ -1776,7 +1776,7 @@ static void ccw_device_shutdown(struct device *dev)
__disable_cmf(cdev);
}
-static struct bus_type ccw_bus_type = {
+static const struct bus_type ccw_bus_type = {
.name = "ccw",
.match = ccw_bus_match,
.uevent = ccw_uevent,
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index c533d1dadc6b..a5dba3829769 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
return -EINVAL;
if (cdev->private->state == DEV_STATE_NOT_OPER)
return -ENODEV;
- if (cdev->private->state == DEV_STATE_VERIFY) {
+ if (cdev->private->state == DEV_STATE_VERIFY ||
+ cdev->private->flags.doverify) {
/* Remember to fake irb when finished. */
if (!cdev->private->flags.fake_irb) {
cdev->private->flags.fake_irb = FAKE_CMD_IRB;
@@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
}
if (cdev->private->state != DEV_STATE_ONLINE ||
((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) &&
- !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) ||
- cdev->private->flags.doverify)
+ !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)))
return -EBUSY;
ret = cio_set_options (sch, flags);
if (ret)
diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index 6b21ba68c1fe..c7894d61306d 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c
@@ -42,7 +42,7 @@ static int scmdev_uevent(const struct device *dev, struct kobj_uevent_env *env)
return add_uevent_var(env, "MODALIAS=scm:scmdev");
}
-static struct bus_type scm_bus_type = {
+static const struct bus_type scm_bus_type = {
.name = "scm",
.probe = scmdev_probe,
.remove = scmdev_remove,
@@ -228,7 +228,7 @@ int scm_update_information(void)
size_t num;
int ret;
- scm_info = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+ scm_info = (void *)__get_free_page(GFP_KERNEL);
if (!scm_info)
return -ENOMEM;
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index f46dd6abacd7..cce0bafd4c92 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -38,6 +38,7 @@
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/module.h>
+#include <asm/uv.h>
#include "ap_bus.h"
#include "ap_debug.h"
@@ -83,14 +84,11 @@ EXPORT_SYMBOL(ap_perms);
DEFINE_MUTEX(ap_perms_mutex);
EXPORT_SYMBOL(ap_perms_mutex);
-/* # of bus scans since init */
-static atomic64_t ap_scan_bus_count;
-
/* # of bindings complete since init */
static atomic64_t ap_bindings_complete_count = ATOMIC64_INIT(0);
-/* completion for initial APQN bindings complete */
-static DECLARE_COMPLETION(ap_init_apqn_bindings_complete);
+/* completion for APQN bindings complete */
+static DECLARE_COMPLETION(ap_apqn_bindings_complete);
static struct ap_config_info *ap_qci_info;
static struct ap_config_info *ap_qci_info_old;
@@ -101,12 +99,16 @@ static struct ap_config_info *ap_qci_info_old;
debug_info_t *ap_dbf_info;
/*
- * Workqueue timer for bus rescan.
+ * AP bus rescan related things.
*/
-static struct timer_list ap_config_timer;
-static int ap_config_time = AP_CONFIG_TIME;
-static void ap_scan_bus(struct work_struct *);
-static DECLARE_WORK(ap_scan_work, ap_scan_bus);
+static bool ap_scan_bus(void);
+static bool ap_scan_bus_result; /* result of last ap_scan_bus() */
+static DEFINE_MUTEX(ap_scan_bus_mutex); /* mutex ap_scan_bus() invocations */
+static atomic64_t ap_scan_bus_count; /* counter ap_scan_bus() invocations */
+static int ap_scan_bus_time = AP_CONFIG_TIME;
+static struct timer_list ap_scan_bus_timer;
+static void ap_scan_bus_wq_callback(struct work_struct *);
+static DECLARE_WORK(ap_scan_bus_work, ap_scan_bus_wq_callback);
/*
* Tasklet & timer for AP request polling and interrupts
@@ -135,7 +137,7 @@ static int ap_max_domain_id = 15;
/* Maximum adapter id, if not given via qci */
static int ap_max_adapter_id = 63;
-static struct bus_type ap_bus_type;
+static const struct bus_type ap_bus_type;
/* Adapter interrupt definitions */
static void ap_interrupt_handler(struct airq_struct *airq,
@@ -753,7 +755,7 @@ static void ap_calc_bound_apqns(unsigned int *apqns, unsigned int *bound)
}
/*
- * After initial ap bus scan do check if all existing APQNs are
+ * After ap bus scan do check if all existing APQNs are
* bound to device drivers.
*/
static void ap_check_bindings_complete(void)
@@ -763,9 +765,9 @@ static void ap_check_bindings_complete(void)
if (atomic64_read(&ap_scan_bus_count) >= 1) {
ap_calc_bound_apqns(&apqns, &bound);
if (bound == apqns) {
- if (!completion_done(&ap_init_apqn_bindings_complete)) {
- complete_all(&ap_init_apqn_bindings_complete);
- AP_DBF_INFO("%s complete\n", __func__);
+ if (!completion_done(&ap_apqn_bindings_complete)) {
+ complete_all(&ap_apqn_bindings_complete);
+ pr_debug("%s all apqn bindings complete\n", __func__);
}
ap_send_bindings_complete_uevent();
}
@@ -782,27 +784,29 @@ static void ap_check_bindings_complete(void)
* -ETIME is returned. On failures negative return values are
* returned to the caller.
*/
-int ap_wait_init_apqn_bindings_complete(unsigned long timeout)
+int ap_wait_apqn_bindings_complete(unsigned long timeout)
{
+ int rc = 0;
long l;
- if (completion_done(&ap_init_apqn_bindings_complete))
+ if (completion_done(&ap_apqn_bindings_complete))
return 0;
if (timeout)
l = wait_for_completion_interruptible_timeout(
- &ap_init_apqn_bindings_complete, timeout);
+ &ap_apqn_bindings_complete, timeout);
else
l = wait_for_completion_interruptible(
- &ap_init_apqn_bindings_complete);
+ &ap_apqn_bindings_complete);
if (l < 0)
- return l == -ERESTARTSYS ? -EINTR : l;
+ rc = l == -ERESTARTSYS ? -EINTR : l;
else if (l == 0 && timeout)
- return -ETIME;
+ rc = -ETIME;
- return 0;
+ pr_debug("%s rc=%d\n", __func__, rc);
+ return rc;
}
-EXPORT_SYMBOL(ap_wait_init_apqn_bindings_complete);
+EXPORT_SYMBOL(ap_wait_apqn_bindings_complete);
static int __ap_queue_devices_with_id_unregister(struct device *dev, void *data)
{
@@ -826,8 +830,8 @@ static int __ap_revise_reserved(struct device *dev, void *dummy)
drvres = to_ap_drv(dev->driver)->flags
& AP_DRIVER_FLAG_DEFAULT;
if (!!devres != !!drvres) {
- AP_DBF_DBG("%s reprobing queue=%02x.%04x\n",
- __func__, card, queue);
+ pr_debug("%s reprobing queue=%02x.%04x\n",
+ __func__, card, queue);
rc = device_reprobe(dev);
if (rc)
AP_DBF_WARN("%s reprobing queue=%02x.%04x failed\n",
@@ -939,8 +943,6 @@ static int ap_device_probe(struct device *dev)
if (is_queue_dev(dev))
hash_del(&to_ap_queue(dev)->hnode);
spin_unlock_bh(&ap_queues_lock);
- } else {
- ap_check_bindings_complete();
}
out:
@@ -1012,16 +1014,47 @@ void ap_driver_unregister(struct ap_driver *ap_drv)
}
EXPORT_SYMBOL(ap_driver_unregister);
-void ap_bus_force_rescan(void)
+/*
+ * Enforce a synchronous AP bus rescan.
+ * Returns true if the bus scan finds a change in the AP configuration
+ * and AP devices have been added or deleted when this function returns.
+ */
+bool ap_bus_force_rescan(void)
{
+ unsigned long scan_counter = atomic64_read(&ap_scan_bus_count);
+ bool rc = false;
+
+ pr_debug(">%s scan counter=%lu\n", __func__, scan_counter);
+
/* Only trigger AP bus scans after the initial scan is done */
- if (atomic64_read(&ap_scan_bus_count) <= 0)
- return;
+ if (scan_counter <= 0)
+ goto out;
+
+ /* Try to acquire the AP scan bus mutex */
+ if (mutex_trylock(&ap_scan_bus_mutex)) {
+ /* mutex acquired, run the AP bus scan */
+ ap_scan_bus_result = ap_scan_bus();
+ rc = ap_scan_bus_result;
+ mutex_unlock(&ap_scan_bus_mutex);
+ goto out;
+ }
+
+ /*
+ * Mutex acquire failed. So there is currently another task
+ * already running the AP bus scan. Then let's simple wait
+ * for the lock which means the other task has finished and
+ * stored the result in ap_scan_bus_result.
+ */
+ if (mutex_lock_interruptible(&ap_scan_bus_mutex)) {
+ /* some error occurred, ignore and go out */
+ goto out;
+ }
+ rc = ap_scan_bus_result;
+ mutex_unlock(&ap_scan_bus_mutex);
- /* processing a asynchronous bus rescan */
- del_timer(&ap_config_timer);
- queue_work(system_long_wq, &ap_scan_work);
- flush_work(&ap_scan_work);
+out:
+ pr_debug("%s rc=%d\n", __func__, rc);
+ return rc;
}
EXPORT_SYMBOL(ap_bus_force_rescan);
@@ -1030,7 +1063,7 @@ EXPORT_SYMBOL(ap_bus_force_rescan);
*/
void ap_bus_cfg_chg(void)
{
- AP_DBF_DBG("%s config change, forcing bus rescan\n", __func__);
+ pr_debug("%s config change, forcing bus rescan\n", __func__);
ap_bus_force_rescan();
}
@@ -1250,7 +1283,7 @@ static BUS_ATTR_RO(ap_interrupts);
static ssize_t config_time_show(const struct bus_type *bus, char *buf)
{
- return sysfs_emit(buf, "%d\n", ap_config_time);
+ return sysfs_emit(buf, "%d\n", ap_scan_bus_time);
}
static ssize_t config_time_store(const struct bus_type *bus,
@@ -1260,8 +1293,8 @@ static ssize_t config_time_store(const struct bus_type *bus,
if (sscanf(buf, "%d\n", &time) != 1 || time < 5 || time > 120)
return -EINVAL;
- ap_config_time = time;
- mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
+ ap_scan_bus_time = time;
+ mod_timer(&ap_scan_bus_timer, jiffies + ap_scan_bus_time * HZ);
return count;
}
@@ -1603,7 +1636,7 @@ static struct attribute *ap_bus_attrs[] = {
};
ATTRIBUTE_GROUPS(ap_bus);
-static struct bus_type ap_bus_type = {
+static const struct bus_type ap_bus_type = {
.name = "ap",
.bus_groups = ap_bus_groups,
.match = &ap_bus_match,
@@ -1888,8 +1921,8 @@ static inline void ap_scan_domains(struct ap_card *ac)
aq->last_err_rc = AP_RESPONSE_CHECKSTOPPED;
}
spin_unlock_bh(&aq->lock);
- AP_DBF_DBG("%s(%d,%d) queue dev checkstop on\n",
- __func__, ac->id, dom);
+ pr_debug("%s(%d,%d) queue dev checkstop on\n",
+ __func__, ac->id, dom);
/* 'receive' pending messages with -EAGAIN */
ap_flush_queue(aq);
goto put_dev_and_continue;
@@ -1899,8 +1932,8 @@ static inline void ap_scan_domains(struct ap_card *ac)
if (aq->dev_state > AP_DEV_STATE_UNINITIATED)
_ap_queue_init_state(aq);
spin_unlock_bh(&aq->lock);
- AP_DBF_DBG("%s(%d,%d) queue dev checkstop off\n",
- __func__, ac->id, dom);
+ pr_debug("%s(%d,%d) queue dev checkstop off\n",
+ __func__, ac->id, dom);
goto put_dev_and_continue;
}
/* config state change */
@@ -1912,8 +1945,8 @@ static inline void ap_scan_domains(struct ap_card *ac)
aq->last_err_rc = AP_RESPONSE_DECONFIGURED;
}
spin_unlock_bh(&aq->lock);
- AP_DBF_DBG("%s(%d,%d) queue dev config off\n",
- __func__, ac->id, dom);
+ pr_debug("%s(%d,%d) queue dev config off\n",
+ __func__, ac->id, dom);
ap_send_config_uevent(&aq->ap_dev, aq->config);
/* 'receive' pending messages with -EAGAIN */
ap_flush_queue(aq);
@@ -1924,8 +1957,8 @@ static inline void ap_scan_domains(struct ap_card *ac)
if (aq->dev_state > AP_DEV_STATE_UNINITIATED)
_ap_queue_init_state(aq);
spin_unlock_bh(&aq->lock);
- AP_DBF_DBG("%s(%d,%d) queue dev config on\n",
- __func__, ac->id, dom);
+ pr_debug("%s(%d,%d) queue dev config on\n",
+ __func__, ac->id, dom);
ap_send_config_uevent(&aq->ap_dev, aq->config);
goto put_dev_and_continue;
}
@@ -1997,8 +2030,8 @@ static inline void ap_scan_adapter(int ap)
ap_scan_rm_card_dev_and_queue_devs(ac);
put_device(dev);
} else {
- AP_DBF_DBG("%s(%d) no type info (no APQN found), ignored\n",
- __func__, ap);
+ pr_debug("%s(%d) no type info (no APQN found), ignored\n",
+ __func__, ap);
}
return;
}
@@ -2010,8 +2043,8 @@ static inline void ap_scan_adapter(int ap)
ap_scan_rm_card_dev_and_queue_devs(ac);
put_device(dev);
} else {
- AP_DBF_DBG("%s(%d) no valid type (0) info, ignored\n",
- __func__, ap);
+ pr_debug("%s(%d) no valid type (0) info, ignored\n",
+ __func__, ap);
}
return;
}
@@ -2135,23 +2168,80 @@ static bool ap_get_configuration(void)
sizeof(struct ap_config_info)) != 0;
}
+/*
+ * ap_config_has_new_aps - Check current against old qci info if
+ * new adapters have appeared. Returns true if at least one new
+ * adapter in the apm mask is showing up. Existing adapters or
+ * receding adapters are not counted.
+ */
+static bool ap_config_has_new_aps(void)
+{
+
+ unsigned long m[BITS_TO_LONGS(AP_DEVICES)];
+
+ if (!ap_qci_info)
+ return false;
+
+ bitmap_andnot(m, (unsigned long *)ap_qci_info->apm,
+ (unsigned long *)ap_qci_info_old->apm, AP_DEVICES);
+ if (!bitmap_empty(m, AP_DEVICES))
+ return true;
+
+ return false;
+}
+
+/*
+ * ap_config_has_new_doms - Check current against old qci info if
+ * new (usage) domains have appeared. Returns true if at least one
+ * new domain in the aqm mask is showing up. Existing domains or
+ * receding domains are not counted.
+ */
+static bool ap_config_has_new_doms(void)
+{
+ unsigned long m[BITS_TO_LONGS(AP_DOMAINS)];
+
+ if (!ap_qci_info)
+ return false;
+
+ bitmap_andnot(m, (unsigned long *)ap_qci_info->aqm,
+ (unsigned long *)ap_qci_info_old->aqm, AP_DOMAINS);
+ if (!bitmap_empty(m, AP_DOMAINS))
+ return true;
+
+ return false;
+}
+
/**
* ap_scan_bus(): Scan the AP bus for new devices
- * Runs periodically, workqueue timer (ap_config_time)
- * @unused: Unused pointer.
+ * Always run under mutex ap_scan_bus_mutex protection
+ * which needs to get locked/unlocked by the caller!
+ * Returns true if any config change has been detected
+ * during the scan, otherwise false.
*/
-static void ap_scan_bus(struct work_struct *unused)
+static bool ap_scan_bus(void)
{
- int ap, config_changed = 0;
+ bool config_changed;
+ int ap;
+
+ pr_debug(">%s\n", __func__);
- /* config change notify */
+ /* (re-)fetch configuration via QCI */
config_changed = ap_get_configuration();
- if (config_changed)
+ if (config_changed) {
+ if (ap_config_has_new_aps() || ap_config_has_new_doms()) {
+ /*
+ * Appearance of new adapters and/or domains need to
+ * build new ap devices which need to get bound to an
+ * device driver. Thus reset the APQN bindings complete
+ * completion.
+ */
+ reinit_completion(&ap_apqn_bindings_complete);
+ }
+ /* post a config change notify */
notify_config_changed();
+ }
ap_select_domain();
- AP_DBF_DBG("%s running\n", __func__);
-
/* loop over all possible adapters */
for (ap = 0; ap <= ap_max_adapter_id; ap++)
ap_scan_adapter(ap);
@@ -2174,23 +2264,56 @@ static void ap_scan_bus(struct work_struct *unused)
}
if (atomic64_inc_return(&ap_scan_bus_count) == 1) {
- AP_DBF_DBG("%s init scan complete\n", __func__);
+ pr_debug("%s init scan complete\n", __func__);
ap_send_init_scan_done_uevent();
- ap_check_bindings_complete();
}
- mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
+ ap_check_bindings_complete();
+
+ mod_timer(&ap_scan_bus_timer, jiffies + ap_scan_bus_time * HZ);
+
+ pr_debug("<%s config_changed=%d\n", __func__, config_changed);
+
+ return config_changed;
}
-static void ap_config_timeout(struct timer_list *unused)
+/*
+ * Callback for the ap_scan_bus_timer
+ * Runs periodically, workqueue timer (ap_scan_bus_time)
+ */
+static void ap_scan_bus_timer_callback(struct timer_list *unused)
{
- queue_work(system_long_wq, &ap_scan_work);
+ /*
+ * schedule work into the system long wq which when
+ * the work is finally executed, calls the AP bus scan.
+ */
+ queue_work(system_long_wq, &ap_scan_bus_work);
+}
+
+/*
+ * Callback for the ap_scan_bus_work
+ */
+static void ap_scan_bus_wq_callback(struct work_struct *unused)
+{
+ /*
+ * Try to invoke an ap_scan_bus(). If the mutex acquisition
+ * fails there is currently another task already running the
+ * AP scan bus and there is no need to wait and re-trigger the
+ * scan again. Please note at the end of the scan bus function
+ * the AP scan bus timer is re-armed which triggers then the
+ * ap_scan_bus_timer_callback which enqueues a work into the
+ * system_long_wq which invokes this function here again.
+ */
+ if (mutex_trylock(&ap_scan_bus_mutex)) {
+ ap_scan_bus_result = ap_scan_bus();
+ mutex_unlock(&ap_scan_bus_mutex);
+ }
}
static int __init ap_debug_init(void)
{
ap_dbf_info = debug_register("ap", 2, 1,
- DBF_MAX_SPRINTF_ARGS * sizeof(long));
+ AP_DBF_MAX_SPRINTF_ARGS * sizeof(long));
debug_register_view(ap_dbf_info, &debug_sprintf_view);
debug_set_level(ap_dbf_info, DBF_ERR);
@@ -2274,7 +2397,7 @@ static int __init ap_module_init(void)
ap_root_device->bus = &ap_bus_type;
/* Setup the AP bus rescan timer. */
- timer_setup(&ap_config_timer, ap_config_timeout, 0);
+ timer_setup(&ap_scan_bus_timer, ap_scan_bus_timer_callback, 0);
/*
* Setup the high resolution poll timer.
@@ -2292,7 +2415,7 @@ static int __init ap_module_init(void)
goto out_work;
}
- queue_work(system_long_wq, &ap_scan_work);
+ queue_work(system_long_wq, &ap_scan_bus_work);
return 0;
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 98814839ef30..59c7ed49aa02 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -266,7 +266,7 @@ int ap_sb_available(void);
bool ap_is_se_guest(void);
void ap_wait(enum ap_sm_wait wait);
void ap_request_timeout(struct timer_list *t);
-void ap_bus_force_rescan(void);
+bool ap_bus_force_rescan(void);
int ap_test_config_usage_domain(unsigned int domain);
int ap_test_config_ctrl_domain(unsigned int domain);
@@ -352,8 +352,12 @@ int ap_parse_mask_str(const char *str,
* the return value is 0. If the timeout (in jiffies) hits instead
* -ETIME is returned. On failures negative return values are
* returned to the caller.
+ * It may be that the AP bus scan finds new devices. Then the
+ * condition that all APQNs are bound to their device drivers
+ * is reset to false and this call again blocks until either all
+ * APQNs are bound to a device driver or the timeout hits again.
*/
-int ap_wait_init_apqn_bindings_complete(unsigned long timeout);
+int ap_wait_apqn_bindings_complete(unsigned long timeout);
void ap_send_config_uevent(struct ap_device *ap_dev, bool cfg);
void ap_send_online_uevent(struct ap_device *ap_dev, int online);
diff --git a/drivers/s390/crypto/ap_debug.h b/drivers/s390/crypto/ap_debug.h
index c083ce88a9a6..2f66271b8564 100644
--- a/drivers/s390/crypto/ap_debug.h
+++ b/drivers/s390/crypto/ap_debug.h
@@ -16,7 +16,7 @@
#define RC2ERR(rc) ((rc) ? DBF_ERR : DBF_INFO)
#define RC2WARN(rc) ((rc) ? DBF_WARN : DBF_INFO)
-#define DBF_MAX_SPRINTF_ARGS 6
+#define AP_DBF_MAX_SPRINTF_ARGS 6
#define AP_DBF(...) \
debug_sprintf_event(ap_dbf_info, ##__VA_ARGS__)
@@ -26,8 +26,6 @@
debug_sprintf_event(ap_dbf_info, DBF_WARN, ##__VA_ARGS__)
#define AP_DBF_INFO(...) \
debug_sprintf_event(ap_dbf_info, DBF_INFO, ##__VA_ARGS__)
-#define AP_DBF_DBG(...) \
- debug_sprintf_event(ap_dbf_info, DBF_DEBUG, ##__VA_ARGS__)
extern debug_info_t *ap_dbf_info;
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 682595443145..6e4e8d324a6d 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -136,6 +136,8 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
+ print_hex_dump_debug("aprpl: ", DUMP_PREFIX_ADDRESS, 16, 1,
+ aq->reply->msg, aq->reply->len, false);
aq->queue_count = max_t(int, 0, aq->queue_count - 1);
if (!status.queue_empty && !aq->queue_count)
aq->queue_count++;
@@ -169,6 +171,9 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
aq->queue_count = 0;
list_splice_init(&aq->pendingq, &aq->requestq);
aq->requestq_count += aq->pendingq_count;
+ pr_debug("%s queue 0x%02x.%04x rescheduled %d reqs (new req %d)\n",
+ __func__, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid),
+ aq->pendingq_count, aq->requestq_count);
aq->pendingq_count = 0;
break;
default:
@@ -243,6 +248,8 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
/* Start the next request on the queue. */
ap_msg = list_entry(aq->requestq.next, struct ap_message, list);
+ print_hex_dump_debug("apreq: ", DUMP_PREFIX_ADDRESS, 16, 1,
+ ap_msg->msg, ap_msg->len, false);
status = __ap_send(qid, ap_msg->psmid,
ap_msg->msg, ap_msg->len,
ap_msg->flags & AP_MSG_FLAG_SPECIAL);
@@ -446,9 +453,9 @@ static enum ap_sm_wait ap_sm_assoc_wait(struct ap_queue *aq)
case AP_BS_Q_USABLE:
/* association is through */
aq->sm_state = AP_SM_STATE_IDLE;
- AP_DBF_DBG("%s queue 0x%02x.%04x associated with %u\n",
- __func__, AP_QID_CARD(aq->qid),
- AP_QID_QUEUE(aq->qid), aq->assoc_idx);
+ pr_debug("%s queue 0x%02x.%04x associated with %u\n",
+ __func__, AP_QID_CARD(aq->qid),
+ AP_QID_QUEUE(aq->qid), aq->assoc_idx);
return AP_SM_WAIT_NONE;
case AP_BS_Q_USABLE_NO_SECURE_KEY:
/* association still pending */
@@ -690,9 +697,9 @@ static ssize_t ap_functions_show(struct device *dev,
status = ap_test_queue(aq->qid, 1, &hwinfo);
if (status.response_code > AP_RESPONSE_BUSY) {
- AP_DBF_DBG("%s RC 0x%02x on tapq(0x%02x.%04x)\n",
- __func__, status.response_code,
- AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
+ pr_debug("%s RC 0x%02x on tapq(0x%02x.%04x)\n",
+ __func__, status.response_code,
+ AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
return -EIO;
}
@@ -846,9 +853,9 @@ static ssize_t se_bind_show(struct device *dev,
status = ap_test_queue(aq->qid, 1, &hwinfo);
if (status.response_code > AP_RESPONSE_BUSY) {
- AP_DBF_DBG("%s RC 0x%02x on tapq(0x%02x.%04x)\n",
- __func__, status.response_code,
- AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
+ pr_debug("%s RC 0x%02x on tapq(0x%02x.%04x)\n",
+ __func__, status.response_code,
+ AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
return -EIO;
}
@@ -974,9 +981,9 @@ static ssize_t se_associate_show(struct device *dev,
status = ap_test_queue(aq->qid, 1, &hwinfo);
if (status.response_code > AP_RESPONSE_BUSY) {
- AP_DBF_DBG("%s RC 0x%02x on tapq(0x%02x.%04x)\n",
- __func__, status.response_code,
- AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
+ pr_debug("%s RC 0x%02x on tapq(0x%02x.%04x)\n",
+ __func__, status.response_code,
+ AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
return -EIO;
}
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 6cfb6b2340c9..dccf664a3d95 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -42,24 +42,23 @@ MODULE_DESCRIPTION("s390 protected key interface");
* debug feature data and functions
*/
-static debug_info_t *debug_info;
+static debug_info_t *pkey_dbf_info;
-#define DEBUG_DBG(...) debug_sprintf_event(debug_info, 6, ##__VA_ARGS__)
-#define DEBUG_INFO(...) debug_sprintf_event(debug_info, 5, ##__VA_ARGS__)
-#define DEBUG_WARN(...) debug_sprintf_event(debug_info, 4, ##__VA_ARGS__)
-#define DEBUG_ERR(...) debug_sprintf_event(debug_info, 3, ##__VA_ARGS__)
+#define PKEY_DBF_INFO(...) debug_sprintf_event(pkey_dbf_info, 5, ##__VA_ARGS__)
+#define PKEY_DBF_WARN(...) debug_sprintf_event(pkey_dbf_info, 4, ##__VA_ARGS__)
+#define PKEY_DBF_ERR(...) debug_sprintf_event(pkey_dbf_info, 3, ##__VA_ARGS__)
static void __init pkey_debug_init(void)
{
/* 5 arguments per dbf entry (including the format string ptr) */
- debug_info = debug_register("pkey", 1, 1, 5 * sizeof(long));
- debug_register_view(debug_info, &debug_sprintf_view);
- debug_set_level(debug_info, 3);
+ pkey_dbf_info = debug_register("pkey", 1, 1, 5 * sizeof(long));
+ debug_register_view(pkey_dbf_info, &debug_sprintf_view);
+ debug_set_level(pkey_dbf_info, 3);
}
static void __exit pkey_debug_exit(void)
{
- debug_unregister(debug_info);
+ debug_unregister(pkey_dbf_info);
}
/* inside view of a protected key token (only type 0x00 version 0x01) */
@@ -163,14 +162,14 @@ static int pkey_clr2protkey(u32 keytype, const u8 *clrkey,
fc = CPACF_PCKMO_ENC_ECC_ED448_KEY;
break;
default:
- DEBUG_ERR("%s unknown/unsupported keytype %u\n",
- __func__, keytype);
+ PKEY_DBF_ERR("%s unknown/unsupported keytype %u\n",
+ __func__, keytype);
return -EINVAL;
}
if (*protkeylen < keysize + AES_WK_VP_SIZE) {
- DEBUG_ERR("%s prot key buffer size too small: %u < %d\n",
- __func__, *protkeylen, keysize + AES_WK_VP_SIZE);
+ PKEY_DBF_ERR("%s prot key buffer size too small: %u < %d\n",
+ __func__, *protkeylen, keysize + AES_WK_VP_SIZE);
return -EINVAL;
}
@@ -182,7 +181,7 @@ static int pkey_clr2protkey(u32 keytype, const u8 *clrkey,
}
/* check for the pckmo subfunction we need now */
if (!cpacf_test_func(&pckmo_functions, fc)) {
- DEBUG_ERR("%s pckmo functions not available\n", __func__);
+ PKEY_DBF_ERR("%s pckmo functions not available\n", __func__);
return -ENODEV;
}
@@ -244,7 +243,7 @@ static int pkey_skey2pkey(const u8 *key, u8 *protkey,
}
if (rc)
- DEBUG_DBG("%s failed rc=%d\n", __func__, rc);
+ pr_debug("%s failed rc=%d\n", __func__, rc);
return rc;
}
@@ -283,7 +282,7 @@ static int pkey_clr2ep11key(const u8 *clrkey, size_t clrkeylen,
out:
kfree(apqns);
if (rc)
- DEBUG_DBG("%s failed rc=%d\n", __func__, rc);
+ pr_debug("%s failed rc=%d\n", __func__, rc);
return rc;
}
@@ -294,33 +293,36 @@ static int pkey_ep11key2pkey(const u8 *key, size_t keylen,
u8 *protkey, u32 *protkeylen, u32 *protkeytype)
{
u32 nr_apqns, *apqns = NULL;
+ int i, j, rc = -ENODEV;
u16 card, dom;
- int i, rc;
zcrypt_wait_api_operational();
- /* build a list of apqns suitable for this key */
- rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF,
- ZCRYPT_CEX7,
- ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4,
- ep11_kb_wkvp(key, keylen));
- if (rc)
- goto out;
+ /* try two times in case of failure */
+ for (i = 0; i < 2 && rc; i++) {
- /* go through the list of apqns and try to derive an pkey */
- for (rc = -ENODEV, i = 0; i < nr_apqns; i++) {
- card = apqns[i] >> 16;
- dom = apqns[i] & 0xFFFF;
- rc = ep11_kblob2protkey(card, dom, key, keylen,
- protkey, protkeylen, protkeytype);
- if (rc == 0)
- break;
+ /* build a list of apqns suitable for this key */
+ rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF,
+ ZCRYPT_CEX7,
+ ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4,
+ ep11_kb_wkvp(key, keylen));
+ if (rc)
+ continue; /* retry findcard on failure */
+
+ /* go through the list of apqns and try to derive an pkey */
+ for (rc = -ENODEV, j = 0; j < nr_apqns && rc; j++) {
+ card = apqns[j] >> 16;
+ dom = apqns[j] & 0xFFFF;
+ rc = ep11_kblob2protkey(card, dom, key, keylen,
+ protkey, protkeylen, protkeytype);
+ }
+
+ kfree(apqns);
}
-out:
- kfree(apqns);
if (rc)
- DEBUG_DBG("%s failed rc=%d\n", __func__, rc);
+ pr_debug("%s failed rc=%d\n", __func__, rc);
+
return rc;
}
@@ -336,7 +338,7 @@ static int pkey_verifykey(const struct pkey_seckey *seckey,
int rc;
/* check the secure key for valid AES secure key */
- rc = cca_check_secaeskeytoken(debug_info, 3, (u8 *)seckey, 0);
+ rc = cca_check_secaeskeytoken(pkey_dbf_info, 3, (u8 *)seckey, 0);
if (rc)
goto out;
if (pattributes)
@@ -351,7 +353,7 @@ static int pkey_verifykey(const struct pkey_seckey *seckey,
if (rc > 0) {
/* key mkvp matches to old master key mkvp */
- DEBUG_DBG("%s secure key has old mkvp\n", __func__);
+ pr_debug("%s secure key has old mkvp\n", __func__);
if (pattributes)
*pattributes |= PKEY_VERIFY_ATTR_OLD_MKVP;
rc = 0;
@@ -363,7 +365,7 @@ static int pkey_verifykey(const struct pkey_seckey *seckey,
*pdomain = domain;
out:
- DEBUG_DBG("%s rc=%d\n", __func__, rc);
+ pr_debug("%s rc=%d\n", __func__, rc);
return rc;
}
@@ -379,8 +381,8 @@ static int pkey_genprotkey(u32 keytype, u8 *protkey,
keysize = pkey_keytype_aes_to_size(keytype);
if (!keysize) {
- DEBUG_ERR("%s unknown/unsupported keytype %d\n", __func__,
- keytype);
+ PKEY_DBF_ERR("%s unknown/unsupported keytype %d\n", __func__,
+ keytype);
return -EINVAL;
}
@@ -428,13 +430,13 @@ static int pkey_verifyprotkey(const u8 *protkey, u32 protkeylen,
fc = CPACF_KMC_PAES_256;
break;
default:
- DEBUG_ERR("%s unknown/unsupported keytype %u\n", __func__,
- protkeytype);
+ PKEY_DBF_ERR("%s unknown/unsupported keytype %u\n", __func__,
+ protkeytype);
return -EINVAL;
}
if (protkeylen != pkeylen) {
- DEBUG_ERR("%s invalid protected key size %u for keytype %u\n",
- __func__, protkeylen, protkeytype);
+ PKEY_DBF_ERR("%s invalid protected key size %u for keytype %u\n",
+ __func__, protkeylen, protkeytype);
return -EINVAL;
}
@@ -446,7 +448,7 @@ static int pkey_verifyprotkey(const u8 *protkey, u32 protkeylen,
k = cpacf_kmc(fc | CPACF_ENCRYPT, &param, null_msg, dest_buf,
sizeof(null_msg));
if (k != sizeof(null_msg)) {
- DEBUG_ERR("%s protected key is not valid\n", __func__);
+ PKEY_DBF_ERR("%s protected key is not valid\n", __func__);
return -EKEYREJECTED;
}
@@ -464,13 +466,13 @@ static int nonccatokaes2pkey(const struct clearkeytoken *t,
keysize = pkey_keytype_aes_to_size(t->keytype);
if (!keysize) {
- DEBUG_ERR("%s unknown/unsupported keytype %u\n",
- __func__, t->keytype);
+ PKEY_DBF_ERR("%s unknown/unsupported keytype %u\n",
+ __func__, t->keytype);
return -EINVAL;
}
if (t->len != keysize) {
- DEBUG_ERR("%s non clear key aes token: invalid key len %u\n",
- __func__, t->len);
+ PKEY_DBF_ERR("%s non clear key aes token: invalid key len %u\n",
+ __func__, t->len);
return -EINVAL;
}
@@ -505,7 +507,7 @@ try_via_ep11:
goto out;
failure:
- DEBUG_ERR("%s unable to build protected key from clear", __func__);
+ PKEY_DBF_ERR("%s unable to build protected key from clear", __func__);
out:
kfree(tmpbuf);
@@ -536,14 +538,14 @@ static int nonccatokecc2pkey(const struct clearkeytoken *t,
keylen = 64;
break;
default:
- DEBUG_ERR("%s unknown/unsupported keytype %u\n",
- __func__, t->keytype);
+ PKEY_DBF_ERR("%s unknown/unsupported keytype %u\n",
+ __func__, t->keytype);
return -EINVAL;
}
if (t->len != keylen) {
- DEBUG_ERR("%s non clear key ecc token: invalid key len %u\n",
- __func__, t->len);
+ PKEY_DBF_ERR("%s non clear key ecc token: invalid key len %u\n",
+ __func__, t->len);
return -EINVAL;
}
@@ -551,8 +553,8 @@ static int nonccatokecc2pkey(const struct clearkeytoken *t,
rc = pkey_clr2protkey(t->keytype, t->clearkey,
protkey, protkeylen, protkeytype);
if (rc) {
- DEBUG_ERR("%s unable to build protected key from clear",
- __func__);
+ PKEY_DBF_ERR("%s unable to build protected key from clear",
+ __func__);
}
return rc;
@@ -604,15 +606,15 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen,
protkeylen, protkeytype);
break;
default:
- DEBUG_ERR("%s unknown/unsupported non cca clear key type %u\n",
- __func__, t->keytype);
+ PKEY_DBF_ERR("%s unknown/unsupported non cca clear key type %u\n",
+ __func__, t->keytype);
return -EINVAL;
}
break;
}
case TOKVER_EP11_AES: {
/* check ep11 key for exportable as protected key */
- rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1);
+ rc = ep11_check_aes_key(pkey_dbf_info, 3, key, keylen, 1);
if (rc)
goto out;
rc = pkey_ep11key2pkey(key, keylen,
@@ -621,15 +623,16 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen,
}
case TOKVER_EP11_AES_WITH_HEADER:
/* check ep11 key with header for exportable as protected key */
- rc = ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1);
+ rc = ep11_check_aes_key_with_hdr(pkey_dbf_info,
+ 3, key, keylen, 1);
if (rc)
goto out;
rc = pkey_ep11key2pkey(key, keylen,
protkey, protkeylen, protkeytype);
break;
default:
- DEBUG_ERR("%s unknown/unsupported non-CCA token version %d\n",
- __func__, hdr->version);
+ PKEY_DBF_ERR("%s unknown/unsupported non-CCA token version %d\n",
+ __func__, hdr->version);
}
out:
@@ -654,8 +657,8 @@ static int pkey_ccainttok2pkey(const u8 *key, u32 keylen,
return -EINVAL;
break;
default:
- DEBUG_ERR("%s unknown/unsupported CCA internal token version %d\n",
- __func__, hdr->version);
+ PKEY_DBF_ERR("%s unknown/unsupported CCA internal token version %d\n",
+ __func__, hdr->version);
return -EINVAL;
}
@@ -672,7 +675,7 @@ int pkey_keyblob2pkey(const u8 *key, u32 keylen,
int rc;
if (keylen < sizeof(struct keytoken_header)) {
- DEBUG_ERR("%s invalid keylen %d\n", __func__, keylen);
+ PKEY_DBF_ERR("%s invalid keylen %d\n", __func__, keylen);
return -EINVAL;
}
@@ -686,12 +689,12 @@ int pkey_keyblob2pkey(const u8 *key, u32 keylen,
protkey, protkeylen, protkeytype);
break;
default:
- DEBUG_ERR("%s unknown/unsupported blob type %d\n",
- __func__, hdr->type);
+ PKEY_DBF_ERR("%s unknown/unsupported blob type %d\n",
+ __func__, hdr->type);
return -EINVAL;
}
- DEBUG_DBG("%s rc=%d\n", __func__, rc);
+ pr_debug("%s rc=%d\n", __func__, rc);
return rc;
}
EXPORT_SYMBOL(pkey_keyblob2pkey);
@@ -839,7 +842,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
hdr->version == TOKVER_CCA_AES) {
struct secaeskeytoken *t = (struct secaeskeytoken *)key;
- rc = cca_check_secaeskeytoken(debug_info, 3, key, 0);
+ rc = cca_check_secaeskeytoken(pkey_dbf_info, 3, key, 0);
if (rc)
goto out;
if (ktype)
@@ -869,7 +872,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
hdr->version == TOKVER_CCA_VLSC) {
struct cipherkeytoken *t = (struct cipherkeytoken *)key;
- rc = cca_check_secaescipherkey(debug_info, 3, key, 0, 1);
+ rc = cca_check_secaescipherkey(pkey_dbf_info, 3, key, 0, 1);
if (rc)
goto out;
if (ktype)
@@ -907,7 +910,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
struct ep11keyblob *kb = (struct ep11keyblob *)key;
int api;
- rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1);
+ rc = ep11_check_aes_key(pkey_dbf_info, 3, key, keylen, 1);
if (rc)
goto out;
if (ktype)
@@ -933,8 +936,8 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
struct ep11kblob_header *kh = (struct ep11kblob_header *)key;
int api;
- rc = ep11_check_aes_key_with_hdr(debug_info, 3,
- key, keylen, 1);
+ rc = ep11_check_aes_key_with_hdr(pkey_dbf_info,
+ 3, key, keylen, 1);
if (rc)
goto out;
if (ktype)
@@ -981,25 +984,27 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns,
if (hdr->version == TOKVER_CCA_AES) {
if (keylen != sizeof(struct secaeskeytoken))
return -EINVAL;
- if (cca_check_secaeskeytoken(debug_info, 3, key, 0))
+ if (cca_check_secaeskeytoken(pkey_dbf_info, 3, key, 0))
return -EINVAL;
} else if (hdr->version == TOKVER_CCA_VLSC) {
if (keylen < hdr->len || keylen > MAXCCAVLSCTOKENSIZE)
return -EINVAL;
- if (cca_check_secaescipherkey(debug_info, 3, key, 0, 1))
+ if (cca_check_secaescipherkey(pkey_dbf_info,
+ 3, key, 0, 1))
return -EINVAL;
} else {
- DEBUG_ERR("%s unknown CCA internal token version %d\n",
- __func__, hdr->version);
+ PKEY_DBF_ERR("%s unknown CCA internal token version %d\n",
+ __func__, hdr->version);
return -EINVAL;
}
} else if (hdr->type == TOKTYPE_NON_CCA) {
if (hdr->version == TOKVER_EP11_AES) {
- if (ep11_check_aes_key(debug_info, 3, key, keylen, 1))
+ if (ep11_check_aes_key(pkey_dbf_info,
+ 3, key, keylen, 1))
return -EINVAL;
} else if (hdr->version == TOKVER_EP11_AES_WITH_HEADER) {
- if (ep11_check_aes_key_with_hdr(debug_info, 3,
- key, keylen, 1))
+ if (ep11_check_aes_key_with_hdr(pkey_dbf_info,
+ 3, key, keylen, 1))
return -EINVAL;
} else {
return pkey_nonccatok2pkey(key, keylen,
@@ -1007,8 +1012,8 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns,
protkeytype);
}
} else {
- DEBUG_ERR("%s unknown/unsupported blob type %d\n",
- __func__, hdr->type);
+ PKEY_DBF_ERR("%s unknown/unsupported blob type %d\n",
+ __func__, hdr->type);
return -EINVAL;
}
@@ -1234,50 +1239,53 @@ static int pkey_keyblob2pkey3(const struct pkey_apqn *apqns, size_t nr_apqns,
hdr->version == TOKVER_EP11_AES_WITH_HEADER &&
is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
/* EP11 AES key blob with header */
- if (ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1))
+ if (ep11_check_aes_key_with_hdr(pkey_dbf_info,
+ 3, key, keylen, 1))
return -EINVAL;
} else if (hdr->type == TOKTYPE_NON_CCA &&
hdr->version == TOKVER_EP11_ECC_WITH_HEADER &&
is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
/* EP11 ECC key blob with header */
- if (ep11_check_ecc_key_with_hdr(debug_info, 3, key, keylen, 1))
+ if (ep11_check_ecc_key_with_hdr(pkey_dbf_info,
+ 3, key, keylen, 1))
return -EINVAL;
} else if (hdr->type == TOKTYPE_NON_CCA &&
hdr->version == TOKVER_EP11_AES &&
is_ep11_keyblob(key)) {
/* EP11 AES key blob with header in session field */
- if (ep11_check_aes_key(debug_info, 3, key, keylen, 1))
+ if (ep11_check_aes_key(pkey_dbf_info, 3, key, keylen, 1))
return -EINVAL;
} else if (hdr->type == TOKTYPE_CCA_INTERNAL) {
if (hdr->version == TOKVER_CCA_AES) {
/* CCA AES data key */
if (keylen != sizeof(struct secaeskeytoken))
return -EINVAL;
- if (cca_check_secaeskeytoken(debug_info, 3, key, 0))
+ if (cca_check_secaeskeytoken(pkey_dbf_info, 3, key, 0))
return -EINVAL;
} else if (hdr->version == TOKVER_CCA_VLSC) {
/* CCA AES cipher key */
if (keylen < hdr->len || keylen > MAXCCAVLSCTOKENSIZE)
return -EINVAL;
- if (cca_check_secaescipherkey(debug_info, 3, key, 0, 1))
+ if (cca_check_secaescipherkey(pkey_dbf_info,
+ 3, key, 0, 1))
return -EINVAL;
} else {
- DEBUG_ERR("%s unknown CCA internal token version %d\n",
- __func__, hdr->version);
+ PKEY_DBF_ERR("%s unknown CCA internal token version %d\n",
+ __func__, hdr->version);
return -EINVAL;
}
} else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA) {
/* CCA ECC (private) key */
if (keylen < sizeof(struct eccprivkeytoken))
return -EINVAL;
- if (cca_check_sececckeytoken(debug_info, 3, key, keylen, 1))
+ if (cca_check_sececckeytoken(pkey_dbf_info, 3, key, keylen, 1))
return -EINVAL;
} else if (hdr->type == TOKTYPE_NON_CCA) {
return pkey_nonccatok2pkey(key, keylen,
protkey, protkeylen, protkeytype);
} else {
- DEBUG_ERR("%s unknown/unsupported blob type %d\n",
- __func__, hdr->type);
+ PKEY_DBF_ERR("%s unknown/unsupported blob type %d\n",
+ __func__, hdr->type);
return -EINVAL;
}
@@ -1350,7 +1358,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
return -EFAULT;
rc = cca_genseckey(kgs.cardnr, kgs.domain,
kgs.keytype, kgs.seckey.seckey);
- DEBUG_DBG("%s cca_genseckey()=%d\n", __func__, rc);
+ pr_debug("%s cca_genseckey()=%d\n", __func__, rc);
if (rc)
break;
if (copy_to_user(ugs, &kgs, sizeof(kgs)))
@@ -1365,7 +1373,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
return -EFAULT;
rc = cca_clr2seckey(kcs.cardnr, kcs.domain, kcs.keytype,
kcs.clrkey.clrkey, kcs.seckey.seckey);
- DEBUG_DBG("%s cca_clr2seckey()=%d\n", __func__, rc);
+ pr_debug("%s cca_clr2seckey()=%d\n", __func__, rc);
if (rc)
break;
if (copy_to_user(ucs, &kcs, sizeof(kcs)))
@@ -1383,7 +1391,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
rc = cca_sec2protkey(ksp.cardnr, ksp.domain,
ksp.seckey.seckey, ksp.protkey.protkey,
&ksp.protkey.len, &ksp.protkey.type);
- DEBUG_DBG("%s cca_sec2protkey()=%d\n", __func__, rc);
+ pr_debug("%s cca_sec2protkey()=%d\n", __func__, rc);
if (rc)
break;
if (copy_to_user(usp, &ksp, sizeof(ksp)))
@@ -1400,7 +1408,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
rc = pkey_clr2protkey(kcp.keytype, kcp.clrkey.clrkey,
kcp.protkey.protkey,
&kcp.protkey.len, &kcp.protkey.type);
- DEBUG_DBG("%s pkey_clr2protkey()=%d\n", __func__, rc);
+ pr_debug("%s pkey_clr2protkey()=%d\n", __func__, rc);
if (rc)
break;
if (copy_to_user(ucp, &kcp, sizeof(kcp)))
@@ -1416,7 +1424,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
return -EFAULT;
rc = cca_findcard(kfc.seckey.seckey,
&kfc.cardnr, &kfc.domain, 1);
- DEBUG_DBG("%s cca_findcard()=%d\n", __func__, rc);
+ pr_debug("%s cca_findcard()=%d\n", __func__, rc);
if (rc < 0)
break;
if (copy_to_user(ufc, &kfc, sizeof(kfc)))
@@ -1432,7 +1440,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
ksp.protkey.len = sizeof(ksp.protkey.protkey);
rc = pkey_skey2pkey(ksp.seckey.seckey, ksp.protkey.protkey,
&ksp.protkey.len, &ksp.protkey.type);
- DEBUG_DBG("%s pkey_skey2pkey()=%d\n", __func__, rc);
+ pr_debug("%s pkey_skey2pkey()=%d\n", __func__, rc);
if (rc)
break;
if (copy_to_user(usp, &ksp, sizeof(ksp)))
@@ -1447,7 +1455,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
return -EFAULT;
rc = pkey_verifykey(&kvk.seckey, &kvk.cardnr, &kvk.domain,
&kvk.keysize, &kvk.attributes);
- DEBUG_DBG("%s pkey_verifykey()=%d\n", __func__, rc);
+ pr_debug("%s pkey_verifykey()=%d\n", __func__, rc);
if (rc)
break;
if (copy_to_user(uvk, &kvk, sizeof(kvk)))
@@ -1463,7 +1471,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
kgp.protkey.len = sizeof(kgp.protkey.protkey);
rc = pkey_genprotkey(kgp.keytype, kgp.protkey.protkey,
&kgp.protkey.len, &kgp.protkey.type);
- DEBUG_DBG("%s pkey_genprotkey()=%d\n", __func__, rc);
+ pr_debug("%s pkey_genprotkey()=%d\n", __func__, rc);
if (rc)
break;
if (copy_to_user(ugp, &kgp, sizeof(kgp)))
@@ -1478,7 +1486,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
return -EFAULT;
rc = pkey_verifyprotkey(kvp.protkey.protkey,
kvp.protkey.len, kvp.protkey.type);
- DEBUG_DBG("%s pkey_verifyprotkey()=%d\n", __func__, rc);
+ pr_debug("%s pkey_verifyprotkey()=%d\n", __func__, rc);
break;
}
case PKEY_KBLOB2PROTK: {
@@ -1494,7 +1502,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
ktp.protkey.len = sizeof(ktp.protkey.protkey);
rc = pkey_keyblob2pkey(kkey, ktp.keylen, ktp.protkey.protkey,
&ktp.protkey.len, &ktp.protkey.type);
- DEBUG_DBG("%s pkey_keyblob2pkey()=%d\n", __func__, rc);
+ pr_debug("%s pkey_keyblob2pkey()=%d\n", __func__, rc);
memzero_explicit(kkey, ktp.keylen);
kfree(kkey);
if (rc)
@@ -1523,7 +1531,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
rc = pkey_genseckey2(apqns, kgs.apqn_entries,
kgs.type, kgs.size, kgs.keygenflags,
kkey, &klen);
- DEBUG_DBG("%s pkey_genseckey2()=%d\n", __func__, rc);
+ pr_debug("%s pkey_genseckey2()=%d\n", __func__, rc);
kfree(apqns);
if (rc) {
kfree(kkey);
@@ -1565,7 +1573,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
rc = pkey_clr2seckey2(apqns, kcs.apqn_entries,
kcs.type, kcs.size, kcs.keygenflags,
kcs.clrkey.clrkey, kkey, &klen);
- DEBUG_DBG("%s pkey_clr2seckey2()=%d\n", __func__, rc);
+ pr_debug("%s pkey_clr2seckey2()=%d\n", __func__, rc);
kfree(apqns);
if (rc) {
kfree(kkey);
@@ -1601,7 +1609,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
rc = pkey_verifykey2(kkey, kvk.keylen,
&kvk.cardnr, &kvk.domain,
&kvk.type, &kvk.size, &kvk.flags);
- DEBUG_DBG("%s pkey_verifykey2()=%d\n", __func__, rc);
+ pr_debug("%s pkey_verifykey2()=%d\n", __func__, rc);
kfree(kkey);
if (rc)
break;
@@ -1630,7 +1638,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
kkey, ktp.keylen,
ktp.protkey.protkey, &ktp.protkey.len,
&ktp.protkey.type);
- DEBUG_DBG("%s pkey_keyblob2pkey2()=%d\n", __func__, rc);
+ pr_debug("%s pkey_keyblob2pkey2()=%d\n", __func__, rc);
kfree(apqns);
memzero_explicit(kkey, ktp.keylen);
kfree(kkey);
@@ -1664,7 +1672,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
}
rc = pkey_apqns4key(kkey, kak.keylen, kak.flags,
apqns, &nr_apqns);
- DEBUG_DBG("%s pkey_apqns4key()=%d\n", __func__, rc);
+ pr_debug("%s pkey_apqns4key()=%d\n", __func__, rc);
kfree(kkey);
if (rc && rc != -ENOSPC) {
kfree(apqns);
@@ -1707,7 +1715,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
}
rc = pkey_apqns4keytype(kat.type, kat.cur_mkvp, kat.alt_mkvp,
kat.flags, apqns, &nr_apqns);
- DEBUG_DBG("%s pkey_apqns4keytype()=%d\n", __func__, rc);
+ pr_debug("%s pkey_apqns4keytype()=%d\n", __func__, rc);
if (rc && rc != -ENOSPC) {
kfree(apqns);
break;
@@ -1757,7 +1765,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
rc = pkey_keyblob2pkey3(apqns, ktp.apqn_entries,
kkey, ktp.keylen,
protkey, &protkeylen, &ktp.pkeytype);
- DEBUG_DBG("%s pkey_keyblob2pkey3()=%d\n", __func__, rc);
+ pr_debug("%s pkey_keyblob2pkey3()=%d\n", __func__, rc);
kfree(apqns);
memzero_explicit(kkey, ktp.keylen);
kfree(kkey);
diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
index a5ab03e42ff1..4aeb3e1213c7 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -60,7 +60,7 @@ static void vfio_ap_matrix_dev_release(struct device *dev)
kfree(matrix_dev);
}
-static struct bus_type matrix_bus = {
+static const struct bus_type matrix_bus = {
.name = "matrix",
};
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 983b3b16196c..fc169bc61593 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -659,6 +659,21 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev)
AP_DOMAINS);
}
+static bool _queue_passable(struct vfio_ap_queue *q)
+{
+ if (!q)
+ return false;
+
+ switch (q->reset_status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
* vfio_ap_mdev_filter_matrix - filter the APQNs assigned to the matrix mdev
* to ensure no queue devices are passed through to
@@ -687,7 +702,6 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid, apqi, apqn;
DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES);
DECLARE_BITMAP(prev_shadow_aqm, AP_DOMAINS);
- struct vfio_ap_queue *q;
bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES);
bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS);
@@ -716,8 +730,7 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
* hardware device.
*/
apqn = AP_MKQID(apid, apqi);
- q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
- if (!q || q->reset_status.response_code) {
+ if (!_queue_passable(vfio_ap_mdev_get_queue(matrix_mdev, apqn))) {
clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
/*
@@ -1691,6 +1704,7 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
switch (status->response_code) {
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
return 0;
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_BUSY:
@@ -1747,14 +1761,6 @@ static void apq_reset_check(struct work_struct *reset_work)
memcpy(&q->reset_status, &status, sizeof(status));
continue;
}
- /*
- * When an AP adapter is deconfigured, the
- * associated queues are reset, so let's set the
- * status response code to 0 so the queue may be
- * passed through (i.e., not filtered)
- */
- if (status.response_code == AP_RESPONSE_DECONFIGURED)
- q->reset_status.response_code = 0;
if (q->saved_isc != VFIO_AP_ISC_INVALID)
vfio_ap_free_aqic_resources(q);
break;
@@ -1781,12 +1787,7 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
queue_work(system_long_wq, &q->reset_work);
break;
case AP_RESPONSE_DECONFIGURED:
- /*
- * When an AP adapter is deconfigured, the associated
- * queues are reset, so let's set the status response code to 0
- * so the queue may be passed through (i.e., not filtered).
- */
- q->reset_status.response_code = 0;
+ case AP_RESPONSE_CHECKSTOPPED:
vfio_ap_free_aqic_resources(q);
break;
default:
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 74200f54dfff..02c503f16bc2 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -12,6 +12,9 @@
* Multiple device nodes: Harald Freudenberger <freude@linux.ibm.com>
*/
+#define KMSG_COMPONENT "zcrypt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -57,10 +60,6 @@ DEFINE_SPINLOCK(zcrypt_list_lock);
LIST_HEAD(zcrypt_card_list);
static atomic_t zcrypt_open_count = ATOMIC_INIT(0);
-static atomic_t zcrypt_rescan_count = ATOMIC_INIT(0);
-
-atomic_t zcrypt_rescan_req = ATOMIC_INIT(0);
-EXPORT_SYMBOL(zcrypt_rescan_req);
static LIST_HEAD(zcrypt_ops_list);
@@ -69,20 +68,15 @@ debug_info_t *zcrypt_dbf_info;
/*
* Process a rescan of the transport layer.
- *
- * Returns 1, if the rescan has been processed, otherwise 0.
+ * Runs a synchronous AP bus rescan.
+ * Returns true if something has changed (for example the
+ * bus scan has found and build up new devices) and it is
+ * worth to do a retry. Otherwise false is returned meaning
+ * no changes on the AP bus level.
*/
-static inline int zcrypt_process_rescan(void)
-{
- if (atomic_read(&zcrypt_rescan_req)) {
- atomic_set(&zcrypt_rescan_req, 0);
- atomic_inc(&zcrypt_rescan_count);
- ap_bus_force_rescan();
- ZCRYPT_DBF_INFO("%s rescan count=%07d\n", __func__,
- atomic_inc_return(&zcrypt_rescan_count));
- return 1;
- }
- return 0;
+static inline bool zcrypt_process_rescan(void)
+{
+ return ap_bus_force_rescan();
}
void zcrypt_msgtype_register(struct zcrypt_ops *zops)
@@ -715,8 +709,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
- ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n",
- __func__);
+ pr_debug("%s no matching queue found => ENODEV\n", __func__);
rc = -ENODEV;
goto out;
}
@@ -820,8 +813,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
- ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n",
- __func__);
+ pr_debug("%s no matching queue found => ENODEV\n", __func__);
rc = -ENODEV;
goto out;
}
@@ -865,6 +857,8 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
rc = prep_cca_ap_msg(userspace, xcrb, &ap_msg, &func_code, &domain);
if (rc)
goto out;
+ print_hex_dump_debug("ccareq: ", DUMP_PREFIX_ADDRESS, 16, 1,
+ ap_msg.msg, ap_msg.len, false);
tdom = *domain;
if (perms != &ap_perms && tdom < AP_DOMAINS) {
@@ -940,8 +934,8 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
- ZCRYPT_DBF_DBG("%s no match for address %02x.%04x => ENODEV\n",
- __func__, xcrb->user_defined, *domain);
+ pr_debug("%s no match for address %02x.%04x => ENODEV\n",
+ __func__, xcrb->user_defined, *domain);
rc = -ENODEV;
goto out;
}
@@ -952,6 +946,10 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
*domain = AP_QID_QUEUE(qid);
rc = pref_zq->ops->send_cprb(userspace, pref_zq, xcrb, &ap_msg);
+ if (!rc) {
+ print_hex_dump_debug("ccarpl: ", DUMP_PREFIX_ADDRESS, 16, 1,
+ ap_msg.msg, ap_msg.len, false);
+ }
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt);
@@ -970,7 +968,26 @@ out:
long zcrypt_send_cprb(struct ica_xcRB *xcrb)
{
- return _zcrypt_send_cprb(false, &ap_perms, NULL, xcrb);
+ struct zcrypt_track tr;
+ int rc;
+
+ memset(&tr, 0, sizeof(tr));
+
+ do {
+ rc = _zcrypt_send_cprb(false, &ap_perms, &tr, xcrb);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+
+ /* on ENODEV failure: retry once again after a requested rescan */
+ if (rc == -ENODEV && zcrypt_process_rescan())
+ do {
+ rc = _zcrypt_send_cprb(false, &ap_perms, &tr, xcrb);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+ if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
+ rc = -EIO;
+ if (rc)
+ pr_debug("%s rc=%d\n", __func__, rc);
+
+ return rc;
}
EXPORT_SYMBOL(zcrypt_send_cprb);
@@ -1045,6 +1062,8 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
rc = prep_ep11_ap_msg(userspace, xcrb, &ap_msg, &func_code, &domain);
if (rc)
goto out_free;
+ print_hex_dump_debug("ep11req: ", DUMP_PREFIX_ADDRESS, 16, 1,
+ ap_msg.msg, ap_msg.len, false);
if (perms != &ap_perms && domain < AUTOSEL_DOM) {
if (ap_msg.flags & AP_MSG_FLAG_ADMIN) {
@@ -1113,15 +1132,15 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
if (!pref_zq) {
if (targets && target_num == 1) {
- ZCRYPT_DBF_DBG("%s no match for address %02x.%04x => ENODEV\n",
- __func__, (int)targets->ap_id,
- (int)targets->dom_id);
+ pr_debug("%s no match for address %02x.%04x => ENODEV\n",
+ __func__, (int)targets->ap_id,
+ (int)targets->dom_id);
} else if (targets) {
- ZCRYPT_DBF_DBG("%s no match for %d target addrs => ENODEV\n",
- __func__, (int)target_num);
+ pr_debug("%s no match for %d target addrs => ENODEV\n",
+ __func__, (int)target_num);
} else {
- ZCRYPT_DBF_DBG("%s no match for address ff.ffff => ENODEV\n",
- __func__);
+ pr_debug("%s no match for address ff.ffff => ENODEV\n",
+ __func__);
}
rc = -ENODEV;
goto out_free;
@@ -1129,6 +1148,10 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
qid = pref_zq->queue->qid;
rc = pref_zq->ops->send_ep11_cprb(userspace, pref_zq, xcrb, &ap_msg);
+ if (!rc) {
+ print_hex_dump_debug("ep11rpl: ", DUMP_PREFIX_ADDRESS, 16, 1,
+ ap_msg.msg, ap_msg.len, false);
+ }
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt);
@@ -1149,7 +1172,26 @@ out:
long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb)
{
- return _zcrypt_send_ep11_cprb(false, &ap_perms, NULL, xcrb);
+ struct zcrypt_track tr;
+ int rc;
+
+ memset(&tr, 0, sizeof(tr));
+
+ do {
+ rc = _zcrypt_send_ep11_cprb(false, &ap_perms, &tr, xcrb);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+
+ /* on ENODEV failure: retry once again after a requested rescan */
+ if (rc == -ENODEV && zcrypt_process_rescan())
+ do {
+ rc = _zcrypt_send_ep11_cprb(false, &ap_perms, &tr, xcrb);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+ if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
+ rc = -EIO;
+ if (rc)
+ pr_debug("%s rc=%d\n", __func__, rc);
+
+ return rc;
}
EXPORT_SYMBOL(zcrypt_send_ep11_cprb);
@@ -1199,8 +1241,7 @@ static long zcrypt_rng(char *buffer)
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
- ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n",
- __func__);
+ pr_debug("%s no matching queue found => ENODEV\n", __func__);
rc = -ENODEV;
goto out;
}
@@ -1431,20 +1472,17 @@ static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg)
do {
rc = zcrypt_rsa_modexpo(perms, &tr, &mex);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
- /* on failure: retry once again after a requested rescan */
- if ((rc == -ENODEV) && (zcrypt_process_rescan()))
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+
+ /* on ENODEV failure: retry once again after a requested rescan */
+ if (rc == -ENODEV && zcrypt_process_rescan())
do {
rc = zcrypt_rsa_modexpo(perms, &tr, &mex);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
if (rc) {
- ZCRYPT_DBF_DBG("ioctl ICARSAMODEXPO rc=%d\n", rc);
+ pr_debug("ioctl ICARSAMODEXPO rc=%d\n", rc);
return rc;
}
return put_user(mex.outputdatalength, &umex->outputdatalength);
@@ -1463,20 +1501,17 @@ static int icarsacrt_ioctl(struct ap_perms *perms, unsigned long arg)
do {
rc = zcrypt_rsa_crt(perms, &tr, &crt);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
- /* on failure: retry once again after a requested rescan */
- if ((rc == -ENODEV) && (zcrypt_process_rescan()))
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+
+ /* on ENODEV failure: retry once again after a requested rescan */
+ if (rc == -ENODEV && zcrypt_process_rescan())
do {
rc = zcrypt_rsa_crt(perms, &tr, &crt);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
if (rc) {
- ZCRYPT_DBF_DBG("ioctl ICARSACRT rc=%d\n", rc);
+ pr_debug("ioctl ICARSACRT rc=%d\n", rc);
return rc;
}
return put_user(crt.outputdatalength, &ucrt->outputdatalength);
@@ -1495,21 +1530,18 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg)
do {
rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
- /* on failure: retry once again after a requested rescan */
- if ((rc == -ENODEV) && (zcrypt_process_rescan()))
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+
+ /* on ENODEV failure: retry once again after a requested rescan */
+ if (rc == -ENODEV && zcrypt_process_rescan())
do {
rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
if (rc)
- ZCRYPT_DBF_DBG("ioctl ZSENDCPRB rc=%d status=0x%x\n",
- rc, xcrb.status);
+ pr_debug("ioctl ZSENDCPRB rc=%d status=0x%x\n",
+ rc, xcrb.status);
if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb)))
return -EFAULT;
return rc;
@@ -1528,20 +1560,17 @@ static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg)
do {
rc = _zcrypt_send_ep11_cprb(true, perms, &tr, &xcrb);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
- /* on failure: retry once again after a requested rescan */
- if ((rc == -ENODEV) && (zcrypt_process_rescan()))
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+
+ /* on ENODEV failure: retry once again after a requested rescan */
+ if (rc == -ENODEV && zcrypt_process_rescan())
do {
rc = _zcrypt_send_ep11_cprb(true, perms, &tr, &xcrb);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
if (rc)
- ZCRYPT_DBF_DBG("ioctl ZSENDEP11CPRB rc=%d\n", rc);
+ pr_debug("ioctl ZSENDEP11CPRB rc=%d\n", rc);
if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb)))
return -EFAULT;
return rc;
@@ -1670,7 +1699,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
}
/* unknown ioctl number */
default:
- ZCRYPT_DBF_DBG("unknown ioctl 0x%08x\n", cmd);
+ pr_debug("unknown ioctl 0x%08x\n", cmd);
return -ENOIOCTLCMD;
}
}
@@ -1708,16 +1737,13 @@ static long trans_modexpo32(struct ap_perms *perms, struct file *filp,
mex64.n_modulus = compat_ptr(mex32.n_modulus);
do {
rc = zcrypt_rsa_modexpo(perms, &tr, &mex64);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
- /* on failure: retry once again after a requested rescan */
- if ((rc == -ENODEV) && (zcrypt_process_rescan()))
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+
+ /* on ENODEV failure: retry once again after a requested rescan */
+ if (rc == -ENODEV && zcrypt_process_rescan())
do {
rc = zcrypt_rsa_modexpo(perms, &tr, &mex64);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
if (rc)
@@ -1761,16 +1787,13 @@ static long trans_modexpo_crt32(struct ap_perms *perms, struct file *filp,
crt64.u_mult_inv = compat_ptr(crt32.u_mult_inv);
do {
rc = zcrypt_rsa_crt(perms, &tr, &crt64);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
- /* on failure: retry once again after a requested rescan */
- if ((rc == -ENODEV) && (zcrypt_process_rescan()))
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+
+ /* on ENODEV failure: retry once again after a requested rescan */
+ if (rc == -ENODEV && zcrypt_process_rescan())
do {
rc = zcrypt_rsa_crt(perms, &tr, &crt64);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
if (rc)
@@ -1833,16 +1856,13 @@ static long trans_xcrb32(struct ap_perms *perms, struct file *filp,
xcrb64.status = xcrb32.status;
do {
rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb64);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
- /* on failure: retry once again after a requested rescan */
- if ((rc == -ENODEV) && (zcrypt_process_rescan()))
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
+
+ /* on ENODEV failure: retry once again after a requested rescan */
+ if (rc == -ENODEV && zcrypt_process_rescan())
do {
rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb64);
- if (rc == -EAGAIN)
- tr.again_counter++;
- } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
+ } while (rc == -EAGAIN && ++tr.again_counter < TRACK_AGAIN_MAX);
if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
rc = -EIO;
xcrb32.reply_control_blk_length = xcrb64.reply_control_blk_length;
@@ -1914,8 +1934,8 @@ static int zcrypt_rng_data_read(struct hwrng *rng, u32 *data)
*/
if (zcrypt_rng_buffer_index == 0) {
rc = zcrypt_rng((char *)zcrypt_rng_buffer);
- /* on failure: retry once again after a requested rescan */
- if ((rc == -ENODEV) && (zcrypt_process_rescan()))
+ /* on ENODEV failure: retry once again after an AP bus rescan */
+ if (rc == -ENODEV && zcrypt_process_rescan())
rc = zcrypt_rng((char *)zcrypt_rng_buffer);
if (rc < 0)
return -EIO;
@@ -1977,7 +1997,7 @@ void zcrypt_rng_device_remove(void)
* an asynchronous job. This function waits until these initial jobs
* are done and so the zcrypt api should be ready to serve crypto
* requests - if there are resources available. The function uses an
- * internal timeout of 60s. The very first caller will either wait for
+ * internal timeout of 30s. The very first caller will either wait for
* ap bus bindings complete or the timeout happens. This state will be
* remembered for further callers which will only be blocked until a
* decision is made (timeout or bindings complete).
@@ -1996,8 +2016,8 @@ int zcrypt_wait_api_operational(void)
switch (zcrypt_wait_api_state) {
case 0:
/* initial state, invoke wait for the ap bus complete */
- rc = ap_wait_init_apqn_bindings_complete(
- msecs_to_jiffies(60 * 1000));
+ rc = ap_wait_apqn_bindings_complete(
+ msecs_to_jiffies(ZCRYPT_WAIT_BINDINGS_COMPLETE_MS));
switch (rc) {
case 0:
/* ap bus bindings are complete */
@@ -2014,8 +2034,8 @@ int zcrypt_wait_api_operational(void)
break;
default:
/* other failure */
- ZCRYPT_DBF_DBG("%s ap_wait_init_apqn_bindings_complete()=%d\n",
- __func__, rc);
+ pr_debug("%s ap_wait_init_apqn_bindings_complete()=%d\n",
+ __func__, rc);
break;
}
break;
@@ -2038,7 +2058,7 @@ EXPORT_SYMBOL(zcrypt_wait_api_operational);
int __init zcrypt_debug_init(void)
{
zcrypt_dbf_info = debug_register("zcrypt", 2, 1,
- DBF_MAX_SPRINTF_ARGS * sizeof(long));
+ ZCRYPT_DBF_MAX_SPRINTF_ARGS * sizeof(long));
debug_register_view(zcrypt_dbf_info, &debug_sprintf_view);
debug_set_level(zcrypt_dbf_info, DBF_ERR);
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index de659954c8f7..4ed481df57ca 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -38,6 +38,15 @@
*/
#define ZCRYPT_RNG_BUFFER_SIZE 4096
+/**
+ * The zcrypt_wait_api_operational() function waits this
+ * amount in milliseconds for ap_wait_aqpn_bindings_complete().
+ * Also on a cprb send failure with ENODEV the send functions
+ * trigger an ap bus rescan and wait this time in milliseconds
+ * for ap_wait_aqpn_bindings_complete() before resending.
+ */
+#define ZCRYPT_WAIT_BINDINGS_COMPLETE_MS 30000
+
/*
* Identifier for Crypto Request Performance Index
*/
diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c
index 263fe182648b..0a3a678ffc7e 100644
--- a/drivers/s390/crypto/zcrypt_ccamisc.c
+++ b/drivers/s390/crypto/zcrypt_ccamisc.c
@@ -23,11 +23,6 @@
#include "zcrypt_msgtype6.h"
#include "zcrypt_ccamisc.h"
-#define DEBUG_DBG(...) ZCRYPT_DBF(DBF_DEBUG, ##__VA_ARGS__)
-#define DEBUG_INFO(...) ZCRYPT_DBF(DBF_INFO, ##__VA_ARGS__)
-#define DEBUG_WARN(...) ZCRYPT_DBF(DBF_WARN, ##__VA_ARGS__)
-#define DEBUG_ERR(...) ZCRYPT_DBF(DBF_ERR, ##__VA_ARGS__)
-
/* Size of parameter block used for all cca requests/replies */
#define PARMBSIZE 512
@@ -367,8 +362,8 @@ int cca_genseckey(u16 cardnr, u16 domain,
memcpy(preqparm->lv1.key_length, "KEYLN32 ", 8);
break;
default:
- DEBUG_ERR("%s unknown/unsupported keybitsize %d\n",
- __func__, keybitsize);
+ ZCRYPT_DBF_ERR("%s unknown/unsupported keybitsize %d\n",
+ __func__, keybitsize);
rc = -EINVAL;
goto out;
}
@@ -386,15 +381,15 @@ int cca_genseckey(u16 cardnr, u16 domain,
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
rc = zcrypt_send_cprb(&xcrb);
if (rc) {
- DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, errno %d\n",
- __func__, (int)cardnr, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, errno %d\n",
+ __func__, (int)cardnr, (int)domain, rc);
goto out;
}
/* check response returncode and reasoncode */
if (prepcblk->ccp_rtcode != 0) {
- DEBUG_ERR("%s secure key generate failure, card response %d/%d\n",
- __func__,
+ ZCRYPT_DBF_ERR("%s secure key generate failure, card response %d/%d\n",
+ __func__,
(int)prepcblk->ccp_rtcode,
(int)prepcblk->ccp_rscode);
rc = -EIO;
@@ -411,8 +406,8 @@ int cca_genseckey(u16 cardnr, u16 domain,
- sizeof(prepparm->lv3.keyblock.toklen)
- sizeof(prepparm->lv3.keyblock.tokattr);
if (seckeysize != SECKEYBLOBSIZE) {
- DEBUG_ERR("%s secure token size mismatch %d != %d bytes\n",
- __func__, seckeysize, SECKEYBLOBSIZE);
+ ZCRYPT_DBF_ERR("%s secure token size mismatch %d != %d bytes\n",
+ __func__, seckeysize, SECKEYBLOBSIZE);
rc = -EIO;
goto out;
}
@@ -505,8 +500,8 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
keysize = 32;
break;
default:
- DEBUG_ERR("%s unknown/unsupported keybitsize %d\n",
- __func__, keybitsize);
+ ZCRYPT_DBF_ERR("%s unknown/unsupported keybitsize %d\n",
+ __func__, keybitsize);
rc = -EINVAL;
goto out;
}
@@ -524,17 +519,17 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
rc = zcrypt_send_cprb(&xcrb);
if (rc) {
- DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
- __func__, (int)cardnr, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
+ __func__, (int)cardnr, (int)domain, rc);
goto out;
}
/* check response returncode and reasoncode */
if (prepcblk->ccp_rtcode != 0) {
- DEBUG_ERR("%s clear key import failure, card response %d/%d\n",
- __func__,
- (int)prepcblk->ccp_rtcode,
- (int)prepcblk->ccp_rscode);
+ ZCRYPT_DBF_ERR("%s clear key import failure, card response %d/%d\n",
+ __func__,
+ (int)prepcblk->ccp_rtcode,
+ (int)prepcblk->ccp_rscode);
rc = -EIO;
goto out;
}
@@ -549,8 +544,8 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
- sizeof(prepparm->lv3.keyblock.toklen)
- sizeof(prepparm->lv3.keyblock.tokattr);
if (seckeysize != SECKEYBLOBSIZE) {
- DEBUG_ERR("%s secure token size mismatch %d != %d bytes\n",
- __func__, seckeysize, SECKEYBLOBSIZE);
+ ZCRYPT_DBF_ERR("%s secure token size mismatch %d != %d bytes\n",
+ __func__, seckeysize, SECKEYBLOBSIZE);
rc = -EIO;
goto out;
}
@@ -651,17 +646,17 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
rc = zcrypt_send_cprb(&xcrb);
if (rc) {
- DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
- __func__, (int)cardnr, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
+ __func__, (int)cardnr, (int)domain, rc);
goto out;
}
/* check response returncode and reasoncode */
if (prepcblk->ccp_rtcode != 0) {
- DEBUG_ERR("%s unwrap secure key failure, card response %d/%d\n",
- __func__,
- (int)prepcblk->ccp_rtcode,
- (int)prepcblk->ccp_rscode);
+ ZCRYPT_DBF_ERR("%s unwrap secure key failure, card response %d/%d\n",
+ __func__,
+ (int)prepcblk->ccp_rtcode,
+ (int)prepcblk->ccp_rscode);
if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290)
rc = -EAGAIN;
else
@@ -669,10 +664,10 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
goto out;
}
if (prepcblk->ccp_rscode != 0) {
- DEBUG_WARN("%s unwrap secure key warning, card response %d/%d\n",
- __func__,
- (int)prepcblk->ccp_rtcode,
- (int)prepcblk->ccp_rscode);
+ ZCRYPT_DBF_WARN("%s unwrap secure key warning, card response %d/%d\n",
+ __func__,
+ (int)prepcblk->ccp_rtcode,
+ (int)prepcblk->ccp_rscode);
}
/* process response cprb param block */
@@ -683,8 +678,8 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
/* check the returned keyblock */
if (prepparm->lv3.ckb.version != 0x01 &&
prepparm->lv3.ckb.version != 0x02) {
- DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x\n",
- __func__, (int)prepparm->lv3.ckb.version);
+ ZCRYPT_DBF_ERR("%s reply param keyblock version mismatch 0x%02x\n",
+ __func__, (int)prepparm->lv3.ckb.version);
rc = -EIO;
goto out;
}
@@ -707,8 +702,8 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
*protkeytype = PKEY_KEYTYPE_AES_256;
break;
default:
- DEBUG_ERR("%s unknown/unsupported keylen %d\n",
- __func__, prepparm->lv3.ckb.len);
+ ZCRYPT_DBF_ERR("%s unknown/unsupported keylen %d\n",
+ __func__, prepparm->lv3.ckb.len);
rc = -EIO;
goto out;
}
@@ -840,9 +835,8 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
case 256:
break;
default:
- DEBUG_ERR(
- "%s unknown/unsupported keybitsize %d\n",
- __func__, keybitsize);
+ ZCRYPT_DBF_ERR("%s unknown/unsupported keybitsize %d\n",
+ __func__, keybitsize);
rc = -EINVAL;
goto out;
}
@@ -880,19 +874,17 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
rc = zcrypt_send_cprb(&xcrb);
if (rc) {
- DEBUG_ERR(
- "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
- __func__, (int)cardnr, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
+ __func__, (int)cardnr, (int)domain, rc);
goto out;
}
/* check response returncode and reasoncode */
if (prepcblk->ccp_rtcode != 0) {
- DEBUG_ERR(
- "%s cipher key generate failure, card response %d/%d\n",
- __func__,
- (int)prepcblk->ccp_rtcode,
- (int)prepcblk->ccp_rscode);
+ ZCRYPT_DBF_ERR("%s cipher key generate failure, card response %d/%d\n",
+ __func__,
+ (int)prepcblk->ccp_rtcode,
+ (int)prepcblk->ccp_rscode);
rc = -EIO;
goto out;
}
@@ -905,8 +897,8 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
/* do some plausibility checks on the key block */
if (prepparm->kb.len < 120 + 5 * sizeof(uint16_t) ||
prepparm->kb.len > 136 + 5 * sizeof(uint16_t)) {
- DEBUG_ERR("%s reply with invalid or unknown key block\n",
- __func__);
+ ZCRYPT_DBF_ERR("%s reply with invalid or unknown key block\n",
+ __func__);
rc = -EIO;
goto out;
}
@@ -1048,19 +1040,17 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
rc = zcrypt_send_cprb(&xcrb);
if (rc) {
- DEBUG_ERR(
- "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
- __func__, (int)cardnr, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
+ __func__, (int)cardnr, (int)domain, rc);
goto out;
}
/* check response returncode and reasoncode */
if (prepcblk->ccp_rtcode != 0) {
- DEBUG_ERR(
- "%s CSNBKPI2 failure, card response %d/%d\n",
- __func__,
- (int)prepcblk->ccp_rtcode,
- (int)prepcblk->ccp_rscode);
+ ZCRYPT_DBF_ERR("%s CSNBKPI2 failure, card response %d/%d\n",
+ __func__,
+ (int)prepcblk->ccp_rtcode,
+ (int)prepcblk->ccp_rscode);
rc = -EIO;
goto out;
}
@@ -1073,8 +1063,8 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
/* do some plausibility checks on the key block */
if (prepparm->kb.len < 120 + 3 * sizeof(uint16_t) ||
prepparm->kb.len > 136 + 3 * sizeof(uint16_t)) {
- DEBUG_ERR("%s reply with invalid or unknown key block\n",
- __func__);
+ ZCRYPT_DBF_ERR("%s reply with invalid or unknown key block\n",
+ __func__);
rc = -EIO;
goto out;
}
@@ -1132,33 +1122,29 @@ int cca_clr2cipherkey(u16 card, u16 dom, u32 keybitsize, u32 keygenflags,
rc = _ip_cprb_helper(card, dom, "AES ", "FIRST ", "MIN3PART",
exorbuf, keybitsize, token, &tokensize);
if (rc) {
- DEBUG_ERR(
- "%s clear key import 1/4 with CSNBKPI2 failed, rc=%d\n",
- __func__, rc);
+ ZCRYPT_DBF_ERR("%s clear key import 1/4 with CSNBKPI2 failed, rc=%d\n",
+ __func__, rc);
goto out;
}
rc = _ip_cprb_helper(card, dom, "AES ", "ADD-PART", NULL,
clrkey, keybitsize, token, &tokensize);
if (rc) {
- DEBUG_ERR(
- "%s clear key import 2/4 with CSNBKPI2 failed, rc=%d\n",
- __func__, rc);
+ ZCRYPT_DBF_ERR("%s clear key import 2/4 with CSNBKPI2 failed, rc=%d\n",
+ __func__, rc);
goto out;
}
rc = _ip_cprb_helper(card, dom, "AES ", "ADD-PART", NULL,
exorbuf, keybitsize, token, &tokensize);
if (rc) {
- DEBUG_ERR(
- "%s clear key import 3/4 with CSNBKPI2 failed, rc=%d\n",
- __func__, rc);
+ ZCRYPT_DBF_ERR("%s clear key import 3/4 with CSNBKPI2 failed, rc=%d\n",
+ __func__, rc);
goto out;
}
rc = _ip_cprb_helper(card, dom, "AES ", "COMPLETE", NULL,
NULL, keybitsize, token, &tokensize);
if (rc) {
- DEBUG_ERR(
- "%s clear key import 4/4 with CSNBKPI2 failed, rc=%d\n",
- __func__, rc);
+ ZCRYPT_DBF_ERR("%s clear key import 4/4 with CSNBKPI2 failed, rc=%d\n",
+ __func__, rc);
goto out;
}
@@ -1265,19 +1251,17 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
rc = zcrypt_send_cprb(&xcrb);
if (rc) {
- DEBUG_ERR(
- "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
- __func__, (int)cardnr, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
+ __func__, (int)cardnr, (int)domain, rc);
goto out;
}
/* check response returncode and reasoncode */
if (prepcblk->ccp_rtcode != 0) {
- DEBUG_ERR(
- "%s unwrap secure key failure, card response %d/%d\n",
- __func__,
- (int)prepcblk->ccp_rtcode,
- (int)prepcblk->ccp_rscode);
+ ZCRYPT_DBF_ERR("%s unwrap secure key failure, card response %d/%d\n",
+ __func__,
+ (int)prepcblk->ccp_rtcode,
+ (int)prepcblk->ccp_rscode);
if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290)
rc = -EAGAIN;
else
@@ -1285,11 +1269,10 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
goto out;
}
if (prepcblk->ccp_rscode != 0) {
- DEBUG_WARN(
- "%s unwrap secure key warning, card response %d/%d\n",
- __func__,
- (int)prepcblk->ccp_rtcode,
- (int)prepcblk->ccp_rscode);
+ ZCRYPT_DBF_WARN("%s unwrap secure key warning, card response %d/%d\n",
+ __func__,
+ (int)prepcblk->ccp_rtcode,
+ (int)prepcblk->ccp_rscode);
}
/* process response cprb param block */
@@ -1300,15 +1283,14 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
/* check the returned keyblock */
if (prepparm->vud.ckb.version != 0x01 &&
prepparm->vud.ckb.version != 0x02) {
- DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x\n",
- __func__, (int)prepparm->vud.ckb.version);
+ ZCRYPT_DBF_ERR("%s reply param keyblock version mismatch 0x%02x\n",
+ __func__, (int)prepparm->vud.ckb.version);
rc = -EIO;
goto out;
}
if (prepparm->vud.ckb.algo != 0x02) {
- DEBUG_ERR(
- "%s reply param keyblock algo mismatch 0x%02x != 0x02\n",
- __func__, (int)prepparm->vud.ckb.algo);
+ ZCRYPT_DBF_ERR("%s reply param keyblock algo mismatch 0x%02x != 0x02\n",
+ __func__, (int)prepparm->vud.ckb.algo);
rc = -EIO;
goto out;
}
@@ -1331,8 +1313,8 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
*protkeytype = PKEY_KEYTYPE_AES_256;
break;
default:
- DEBUG_ERR("%s unknown/unsupported keylen %d\n",
- __func__, prepparm->vud.ckb.keylen);
+ ZCRYPT_DBF_ERR("%s unknown/unsupported keylen %d\n",
+ __func__, prepparm->vud.ckb.keylen);
rc = -EIO;
goto out;
}
@@ -1432,19 +1414,17 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
rc = zcrypt_send_cprb(&xcrb);
if (rc) {
- DEBUG_ERR(
- "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
- __func__, (int)cardnr, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
+ __func__, (int)cardnr, (int)domain, rc);
goto out;
}
/* check response returncode and reasoncode */
if (prepcblk->ccp_rtcode != 0) {
- DEBUG_ERR(
- "%s unwrap secure key failure, card response %d/%d\n",
- __func__,
- (int)prepcblk->ccp_rtcode,
- (int)prepcblk->ccp_rscode);
+ ZCRYPT_DBF_ERR("%s unwrap secure key failure, card response %d/%d\n",
+ __func__,
+ (int)prepcblk->ccp_rtcode,
+ (int)prepcblk->ccp_rscode);
if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290)
rc = -EAGAIN;
else
@@ -1452,11 +1432,10 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
goto out;
}
if (prepcblk->ccp_rscode != 0) {
- DEBUG_WARN(
- "%s unwrap secure key warning, card response %d/%d\n",
- __func__,
- (int)prepcblk->ccp_rtcode,
- (int)prepcblk->ccp_rscode);
+ ZCRYPT_DBF_WARN("%s unwrap secure key warning, card response %d/%d\n",
+ __func__,
+ (int)prepcblk->ccp_rtcode,
+ (int)prepcblk->ccp_rscode);
}
/* process response cprb param block */
@@ -1466,23 +1445,22 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
/* check the returned keyblock */
if (prepparm->vud.ckb.version != 0x02) {
- DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x != 0x02\n",
- __func__, (int)prepparm->vud.ckb.version);
+ ZCRYPT_DBF_ERR("%s reply param keyblock version mismatch 0x%02x != 0x02\n",
+ __func__, (int)prepparm->vud.ckb.version);
rc = -EIO;
goto out;
}
if (prepparm->vud.ckb.algo != 0x81) {
- DEBUG_ERR(
- "%s reply param keyblock algo mismatch 0x%02x != 0x81\n",
- __func__, (int)prepparm->vud.ckb.algo);
+ ZCRYPT_DBF_ERR("%s reply param keyblock algo mismatch 0x%02x != 0x81\n",
+ __func__, (int)prepparm->vud.ckb.algo);
rc = -EIO;
goto out;
}
/* copy the translated protected key */
if (prepparm->vud.ckb.keylen > *protkeylen) {
- DEBUG_ERR("%s prot keylen mismatch %d > buffersize %u\n",
- __func__, prepparm->vud.ckb.keylen, *protkeylen);
+ ZCRYPT_DBF_ERR("%s prot keylen mismatch %d > buffersize %u\n",
+ __func__, prepparm->vud.ckb.keylen, *protkeylen);
rc = -EIO;
goto out;
}
@@ -1550,17 +1528,17 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
rc = zcrypt_send_cprb(&xcrb);
if (rc) {
- DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
- __func__, (int)cardnr, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
+ __func__, (int)cardnr, (int)domain, rc);
goto out;
}
/* check response returncode and reasoncode */
if (prepcblk->ccp_rtcode != 0) {
- DEBUG_ERR("%s unwrap secure key failure, card response %d/%d\n",
- __func__,
- (int)prepcblk->ccp_rtcode,
- (int)prepcblk->ccp_rscode);
+ ZCRYPT_DBF_ERR("%s unwrap secure key failure, card response %d/%d\n",
+ __func__,
+ (int)prepcblk->ccp_rtcode,
+ (int)prepcblk->ccp_rscode);
rc = -EIO;
goto out;
}
diff --git a/drivers/s390/crypto/zcrypt_debug.h b/drivers/s390/crypto/zcrypt_debug.h
index 5cf88aabd64b..9a208dc4c200 100644
--- a/drivers/s390/crypto/zcrypt_debug.h
+++ b/drivers/s390/crypto/zcrypt_debug.h
@@ -17,7 +17,7 @@
#define RC2ERR(rc) ((rc) ? DBF_ERR : DBF_INFO)
#define RC2WARN(rc) ((rc) ? DBF_WARN : DBF_INFO)
-#define DBF_MAX_SPRINTF_ARGS 6
+#define ZCRYPT_DBF_MAX_SPRINTF_ARGS 6
#define ZCRYPT_DBF(...) \
debug_sprintf_event(zcrypt_dbf_info, ##__VA_ARGS__)
@@ -27,8 +27,6 @@
debug_sprintf_event(zcrypt_dbf_info, DBF_WARN, ##__VA_ARGS__)
#define ZCRYPT_DBF_INFO(...) \
debug_sprintf_event(zcrypt_dbf_info, DBF_INFO, ##__VA_ARGS__)
-#define ZCRYPT_DBF_DBG(...) \
- debug_sprintf_event(zcrypt_dbf_info, DBF_DEBUG, ##__VA_ARGS__)
extern debug_info_t *zcrypt_dbf_info;
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c
index 0a877f9792c2..eb7f5489ccf9 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.c
+++ b/drivers/s390/crypto/zcrypt_ep11misc.c
@@ -24,11 +24,6 @@
#include "zcrypt_ep11misc.h"
#include "zcrypt_ccamisc.h"
-#define DEBUG_DBG(...) ZCRYPT_DBF(DBF_DEBUG, ##__VA_ARGS__)
-#define DEBUG_INFO(...) ZCRYPT_DBF(DBF_INFO, ##__VA_ARGS__)
-#define DEBUG_WARN(...) ZCRYPT_DBF(DBF_WARN, ##__VA_ARGS__)
-#define DEBUG_ERR(...) ZCRYPT_DBF(DBF_ERR, ##__VA_ARGS__)
-
#define EP11_PINBLOB_V1_BYTES 56
/* default iv used here */
@@ -510,7 +505,7 @@ static int check_reply_pl(const u8 *pl, const char *func)
/* start tag */
if (*pl++ != 0x30) {
- DEBUG_ERR("%s reply start tag mismatch\n", func);
+ ZCRYPT_DBF_ERR("%s reply start tag mismatch\n", func);
return -EIO;
}
@@ -527,40 +522,41 @@ static int check_reply_pl(const u8 *pl, const char *func)
len = *((u16 *)pl);
pl += 2;
} else {
- DEBUG_ERR("%s reply start tag lenfmt mismatch 0x%02hhx\n",
- func, *pl);
+ ZCRYPT_DBF_ERR("%s reply start tag lenfmt mismatch 0x%02hhx\n",
+ func, *pl);
return -EIO;
}
/* len should cover at least 3 fields with 32 bit value each */
if (len < 3 * 6) {
- DEBUG_ERR("%s reply length %d too small\n", func, len);
+ ZCRYPT_DBF_ERR("%s reply length %d too small\n", func, len);
return -EIO;
}
/* function tag, length and value */
if (pl[0] != 0x04 || pl[1] != 0x04) {
- DEBUG_ERR("%s function tag or length mismatch\n", func);
+ ZCRYPT_DBF_ERR("%s function tag or length mismatch\n", func);
return -EIO;
}
pl += 6;
/* dom tag, length and value */
if (pl[0] != 0x04 || pl[1] != 0x04) {
- DEBUG_ERR("%s dom tag or length mismatch\n", func);
+ ZCRYPT_DBF_ERR("%s dom tag or length mismatch\n", func);
return -EIO;
}
pl += 6;
/* return value tag, length and value */
if (pl[0] != 0x04 || pl[1] != 0x04) {
- DEBUG_ERR("%s return value tag or length mismatch\n", func);
+ ZCRYPT_DBF_ERR("%s return value tag or length mismatch\n",
+ func);
return -EIO;
}
pl += 2;
ret = *((u32 *)pl);
if (ret != 0) {
- DEBUG_ERR("%s return value 0x%04x != 0\n", func, ret);
+ ZCRYPT_DBF_ERR("%s return value 0x%04x != 0\n", func, ret);
return -EIO;
}
@@ -626,9 +622,8 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
rc = zcrypt_send_ep11_cprb(urb);
if (rc) {
- DEBUG_ERR(
- "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
- __func__, (int)cardnr, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
+ __func__, (int)cardnr, (int)domain, rc);
goto out;
}
@@ -636,13 +631,13 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
if (rc)
goto out;
if (rep_pl->data_tag != 0x04 || rep_pl->data_lenfmt != 0x82) {
- DEBUG_ERR("%s unknown reply data format\n", __func__);
+ ZCRYPT_DBF_ERR("%s unknown reply data format\n", __func__);
rc = -EIO;
goto out;
}
if (rep_pl->data_len > buflen) {
- DEBUG_ERR("%s mismatch between reply data len and buffer len\n",
- __func__);
+ ZCRYPT_DBF_ERR("%s mismatch between reply data len and buffer len\n",
+ __func__);
rc = -ENOSPC;
goto out;
}
@@ -816,9 +811,8 @@ static int _ep11_genaeskey(u16 card, u16 domain,
case 256:
break;
default:
- DEBUG_ERR(
- "%s unknown/unsupported keybitsize %d\n",
- __func__, keybitsize);
+ ZCRYPT_DBF_ERR("%s unknown/unsupported keybitsize %d\n",
+ __func__, keybitsize);
rc = -EINVAL;
goto out;
}
@@ -878,9 +872,8 @@ static int _ep11_genaeskey(u16 card, u16 domain,
rc = zcrypt_send_ep11_cprb(urb);
if (rc) {
- DEBUG_ERR(
- "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
- __func__, (int)card, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
+ __func__, (int)card, (int)domain, rc);
goto out;
}
@@ -888,13 +881,13 @@ static int _ep11_genaeskey(u16 card, u16 domain,
if (rc)
goto out;
if (rep_pl->data_tag != 0x04 || rep_pl->data_lenfmt != 0x82) {
- DEBUG_ERR("%s unknown reply data format\n", __func__);
+ ZCRYPT_DBF_ERR("%s unknown reply data format\n", __func__);
rc = -EIO;
goto out;
}
if (rep_pl->data_len > *keybufsize) {
- DEBUG_ERR("%s mismatch reply data len / key buffer len\n",
- __func__);
+ ZCRYPT_DBF_ERR("%s mismatch reply data len / key buffer len\n",
+ __func__);
rc = -ENOSPC;
goto out;
}
@@ -1030,9 +1023,8 @@ static int ep11_cryptsingle(u16 card, u16 domain,
rc = zcrypt_send_ep11_cprb(urb);
if (rc) {
- DEBUG_ERR(
- "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
- __func__, (int)card, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
+ __func__, (int)card, (int)domain, rc);
goto out;
}
@@ -1040,7 +1032,7 @@ static int ep11_cryptsingle(u16 card, u16 domain,
if (rc)
goto out;
if (rep_pl->data_tag != 0x04) {
- DEBUG_ERR("%s unknown reply data format\n", __func__);
+ ZCRYPT_DBF_ERR("%s unknown reply data format\n", __func__);
rc = -EIO;
goto out;
}
@@ -1053,14 +1045,14 @@ static int ep11_cryptsingle(u16 card, u16 domain,
n = *((u16 *)p);
p += 2;
} else {
- DEBUG_ERR("%s unknown reply data length format 0x%02hhx\n",
- __func__, rep_pl->data_lenfmt);
+ ZCRYPT_DBF_ERR("%s unknown reply data length format 0x%02hhx\n",
+ __func__, rep_pl->data_lenfmt);
rc = -EIO;
goto out;
}
if (n > *outbufsize) {
- DEBUG_ERR("%s mismatch reply data len %d / output buffer %zu\n",
- __func__, n, *outbufsize);
+ ZCRYPT_DBF_ERR("%s mismatch reply data len %d / output buffer %zu\n",
+ __func__, n, *outbufsize);
rc = -ENOSPC;
goto out;
}
@@ -1188,9 +1180,8 @@ static int _ep11_unwrapkey(u16 card, u16 domain,
rc = zcrypt_send_ep11_cprb(urb);
if (rc) {
- DEBUG_ERR(
- "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
- __func__, (int)card, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
+ __func__, (int)card, (int)domain, rc);
goto out;
}
@@ -1198,13 +1189,13 @@ static int _ep11_unwrapkey(u16 card, u16 domain,
if (rc)
goto out;
if (rep_pl->data_tag != 0x04 || rep_pl->data_lenfmt != 0x82) {
- DEBUG_ERR("%s unknown reply data format\n", __func__);
+ ZCRYPT_DBF_ERR("%s unknown reply data format\n", __func__);
rc = -EIO;
goto out;
}
if (rep_pl->data_len > *keybufsize) {
- DEBUG_ERR("%s mismatch reply data len / key buffer len\n",
- __func__);
+ ZCRYPT_DBF_ERR("%s mismatch reply data len / key buffer len\n",
+ __func__);
rc = -ENOSPC;
goto out;
}
@@ -1343,9 +1334,8 @@ static int _ep11_wrapkey(u16 card, u16 domain,
rc = zcrypt_send_ep11_cprb(urb);
if (rc) {
- DEBUG_ERR(
- "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
- __func__, (int)card, (int)domain, rc);
+ ZCRYPT_DBF_ERR("%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
+ __func__, (int)card, (int)domain, rc);
goto out;
}
@@ -1353,13 +1343,13 @@ static int _ep11_wrapkey(u16 card, u16 domain,
if (rc)
goto out;
if (rep_pl->data_tag != 0x04 || rep_pl->data_lenfmt != 0x82) {
- DEBUG_ERR("%s unknown reply data format\n", __func__);
+ ZCRYPT_DBF_ERR("%s unknown reply data format\n", __func__);
rc = -EIO;
goto out;
}
if (rep_pl->data_len > *datasize) {
- DEBUG_ERR("%s mismatch reply data len / data buffer len\n",
- __func__);
+ ZCRYPT_DBF_ERR("%s mismatch reply data len / data buffer len\n",
+ __func__);
rc = -ENOSPC;
goto out;
}
@@ -1386,9 +1376,8 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
if (keybitsize == 128 || keybitsize == 192 || keybitsize == 256) {
clrkeylen = keybitsize / 8;
} else {
- DEBUG_ERR(
- "%s unknown/unsupported keybitsize %d\n",
- __func__, keybitsize);
+ ZCRYPT_DBF_ERR("%s unknown/unsupported keybitsize %d\n",
+ __func__, keybitsize);
return -EINVAL;
}
@@ -1405,9 +1394,8 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
0x00006c00, /* EN/DECRYPT, WRAP/UNWRAP */
kek, &keklen);
if (rc) {
- DEBUG_ERR(
- "%s generate kek key failed, rc=%d\n",
- __func__, rc);
+ ZCRYPT_DBF_ERR("%s generate kek key failed, rc=%d\n",
+ __func__, rc);
goto out;
}
@@ -1415,9 +1403,8 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
rc = ep11_cryptsingle(card, domain, 0, 0, def_iv, kek, keklen,
clrkey, clrkeylen, encbuf, &encbuflen);
if (rc) {
- DEBUG_ERR(
- "%s encrypting key value with kek key failed, rc=%d\n",
- __func__, rc);
+ ZCRYPT_DBF_ERR("%s encrypting key value with kek key failed, rc=%d\n",
+ __func__, rc);
goto out;
}
@@ -1426,9 +1413,8 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
encbuf, encbuflen, 0, def_iv,
keybitsize, 0, keybuf, keybufsize, keytype);
if (rc) {
- DEBUG_ERR(
- "%s importing key value as new key failed,, rc=%d\n",
- __func__, rc);
+ ZCRYPT_DBF_ERR("%s importing key value as new key failed,, rc=%d\n",
+ __func__, rc);
goto out;
}
@@ -1476,17 +1462,16 @@ int ep11_kblob2protkey(u16 card, u16 dom,
rc = _ep11_wrapkey(card, dom, (u8 *)key, keylen,
0, def_iv, wkbuf, &wkbuflen);
if (rc) {
- DEBUG_ERR(
- "%s rewrapping ep11 key to pkey failed, rc=%d\n",
- __func__, rc);
+ ZCRYPT_DBF_ERR("%s rewrapping ep11 key to pkey failed, rc=%d\n",
+ __func__, rc);
goto out;
}
wki = (struct wk_info *)wkbuf;
/* check struct version and pkey type */
if (wki->version != 1 || wki->pkeytype < 1 || wki->pkeytype > 5) {
- DEBUG_ERR("%s wk info version %d or pkeytype %d mismatch.\n",
- __func__, (int)wki->version, (int)wki->pkeytype);
+ ZCRYPT_DBF_ERR("%s wk info version %d or pkeytype %d mismatch.\n",
+ __func__, (int)wki->version, (int)wki->pkeytype);
rc = -EIO;
goto out;
}
@@ -1511,8 +1496,8 @@ int ep11_kblob2protkey(u16 card, u16 dom,
*protkeytype = PKEY_KEYTYPE_AES_256;
break;
default:
- DEBUG_ERR("%s unknown/unsupported AES pkeysize %d\n",
- __func__, (int)wki->pkeysize);
+ ZCRYPT_DBF_ERR("%s unknown/unsupported AES pkeysize %d\n",
+ __func__, (int)wki->pkeysize);
rc = -EIO;
goto out;
}
@@ -1525,16 +1510,16 @@ int ep11_kblob2protkey(u16 card, u16 dom,
break;
case 2: /* TDES */
default:
- DEBUG_ERR("%s unknown/unsupported key type %d\n",
- __func__, (int)wki->pkeytype);
+ ZCRYPT_DBF_ERR("%s unknown/unsupported key type %d\n",
+ __func__, (int)wki->pkeytype);
rc = -EIO;
goto out;
}
/* copy the translated protected key */
if (wki->pkeysize > *protkeylen) {
- DEBUG_ERR("%s wk info pkeysize %llu > protkeysize %u\n",
- __func__, wki->pkeysize, *protkeylen);
+ ZCRYPT_DBF_ERR("%s wk info pkeysize %llu > protkeysize %u\n",
+ __func__, wki->pkeysize, *protkeylen);
rc = -EINVAL;
goto out;
}
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
index a44fcfcec938..46e27b43a8af 100644
--- a/drivers/s390/crypto/zcrypt_error.h
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -119,10 +119,9 @@ static inline int convert_error(struct zcrypt_queue *zq,
case REP82_ERROR_MESSAGE_TYPE: /* 0x20 */
case REP82_ERROR_TRANSPORT_FAIL: /* 0x90 */
/*
- * Msg to wrong type or card/infrastructure failure.
- * Trigger rescan of the ap bus, trigger retry request.
+ * Msg to wrong type or card/infrastructure failure. Return
+ * EAGAIN, the upper layer may do a retry on the request.
*/
- atomic_set(&zcrypt_rescan_req, 1);
/* For type 86 response show the apfs value (failure reason) */
if (ehdr->reply_code == REP82_ERROR_TRANSPORT_FAIL &&
ehdr->type == TYPE86_RSP_CODE) {
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 2e155de8abe5..3b39cb8f926d 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -427,7 +427,7 @@ static void zcrypt_msgtype50_receive(struct ap_queue *aq,
len = t80h->len;
if (len > reply->bufsize || len > msg->bufsize ||
len != reply->len) {
- ZCRYPT_DBF_DBG("%s len mismatch => EMSGSIZE\n", __func__);
+ pr_debug("%s len mismatch => EMSGSIZE\n", __func__);
msg->rc = -EMSGSIZE;
goto out;
}
@@ -487,9 +487,9 @@ static long zcrypt_msgtype50_modexpo(struct zcrypt_queue *zq,
out:
ap_msg->private = NULL;
if (rc)
- ZCRYPT_DBF_DBG("%s send me cprb at dev=%02x.%04x rc=%d\n",
- __func__, AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid), rc);
+ pr_debug("%s send me cprb at dev=%02x.%04x rc=%d\n",
+ __func__, AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid), rc);
return rc;
}
@@ -537,9 +537,9 @@ static long zcrypt_msgtype50_modexpo_crt(struct zcrypt_queue *zq,
out:
ap_msg->private = NULL;
if (rc)
- ZCRYPT_DBF_DBG("%s send crt cprb at dev=%02x.%04x rc=%d\n",
- __func__, AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid), rc);
+ pr_debug("%s send crt cprb at dev=%02x.%04x rc=%d\n",
+ __func__, AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid), rc);
return rc;
}
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 3c53abbdc342..215f257d2360 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -437,9 +437,9 @@ static int xcrb_msg_to_type6cprb_msgx(bool userspace, struct ap_message *ap_msg,
ap_msg->flags |= AP_MSG_FLAG_ADMIN;
break;
default:
- ZCRYPT_DBF_DBG("%s unknown CPRB minor version '%c%c'\n",
- __func__, msg->cprbx.func_id[0],
- msg->cprbx.func_id[1]);
+ pr_debug("%s unknown CPRB minor version '%c%c'\n",
+ __func__, msg->cprbx.func_id[0],
+ msg->cprbx.func_id[1]);
}
/* copy data block */
@@ -629,9 +629,9 @@ static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq,
/* Copy CPRB to user */
if (xcrb->reply_control_blk_length < msg->fmt2.count1) {
- ZCRYPT_DBF_DBG("%s reply_control_blk_length %u < required %u => EMSGSIZE\n",
- __func__, xcrb->reply_control_blk_length,
- msg->fmt2.count1);
+ pr_debug("%s reply_control_blk_length %u < required %u => EMSGSIZE\n",
+ __func__, xcrb->reply_control_blk_length,
+ msg->fmt2.count1);
return -EMSGSIZE;
}
if (z_copy_to_user(userspace, xcrb->reply_control_blk_addr,
@@ -642,9 +642,9 @@ static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq,
/* Copy data buffer to user */
if (msg->fmt2.count2) {
if (xcrb->reply_data_length < msg->fmt2.count2) {
- ZCRYPT_DBF_DBG("%s reply_data_length %u < required %u => EMSGSIZE\n",
- __func__, xcrb->reply_data_length,
- msg->fmt2.count2);
+ pr_debug("%s reply_data_length %u < required %u => EMSGSIZE\n",
+ __func__, xcrb->reply_data_length,
+ msg->fmt2.count2);
return -EMSGSIZE;
}
if (z_copy_to_user(userspace, xcrb->reply_data_addr,
@@ -673,9 +673,9 @@ static int convert_type86_ep11_xcrb(bool userspace, struct zcrypt_queue *zq,
char *data = reply->msg;
if (xcrb->resp_len < msg->fmt2.count1) {
- ZCRYPT_DBF_DBG("%s resp_len %u < required %u => EMSGSIZE\n",
- __func__, (unsigned int)xcrb->resp_len,
- msg->fmt2.count1);
+ pr_debug("%s resp_len %u < required %u => EMSGSIZE\n",
+ __func__, (unsigned int)xcrb->resp_len,
+ msg->fmt2.count1);
return -EMSGSIZE;
}
@@ -875,7 +875,8 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq,
len = sizeof(struct type86x_reply) + t86r->length;
if (len > reply->bufsize || len > msg->bufsize ||
len != reply->len) {
- ZCRYPT_DBF_DBG("%s len mismatch => EMSGSIZE\n", __func__);
+ pr_debug("%s len mismatch => EMSGSIZE\n",
+ __func__);
msg->rc = -EMSGSIZE;
goto out;
}
@@ -889,7 +890,8 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq,
len = t86r->fmt2.offset1 + t86r->fmt2.count1;
if (len > reply->bufsize || len > msg->bufsize ||
len != reply->len) {
- ZCRYPT_DBF_DBG("%s len mismatch => EMSGSIZE\n", __func__);
+ pr_debug("%s len mismatch => EMSGSIZE\n",
+ __func__);
msg->rc = -EMSGSIZE;
goto out;
}
@@ -939,7 +941,8 @@ static void zcrypt_msgtype6_receive_ep11(struct ap_queue *aq,
len = t86r->fmt2.offset1 + t86r->fmt2.count1;
if (len > reply->bufsize || len > msg->bufsize ||
len != reply->len) {
- ZCRYPT_DBF_DBG("%s len mismatch => EMSGSIZE\n", __func__);
+ pr_debug("%s len mismatch => EMSGSIZE\n",
+ __func__);
msg->rc = -EMSGSIZE;
goto out;
}
@@ -1151,9 +1154,9 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq,
out:
if (rc)
- ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n",
- __func__, AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid), rc);
+ pr_debug("%s send cprb at dev=%02x.%04x rc=%d\n",
+ __func__, AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid), rc);
return rc;
}
@@ -1274,9 +1277,9 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *
out:
if (rc)
- ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n",
- __func__, AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid), rc);
+ pr_debug("%s send cprb at dev=%02x.%04x rc=%d\n",
+ __func__, AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid), rc);
return rc;
}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index b92a32b4b114..04c64ce0a1ca 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -255,9 +255,10 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
if (!recover) {
hash_del(&addr->hnode);
kfree(addr);
- continue;
+ } else {
+ /* prepare for recovery */
+ addr->disp_flag = QETH_DISP_ADDR_ADD;
}
- addr->disp_flag = QETH_DISP_ADDR_ADD;
}
mutex_unlock(&card->ip_lock);
@@ -278,9 +279,11 @@ static void qeth_l3_recover_ip(struct qeth_card *card)
if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
rc = qeth_l3_register_addr_entry(card, addr);
- if (!rc) {
+ if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) {
+ /* keep it in the records */
addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
} else {
+ /* bad address */
hash_del(&addr->hnode);
kfree(addr);
}
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index addac7fbe37b..9ce27092729c 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1270,7 +1270,7 @@ source "drivers/scsi/arm/Kconfig"
config JAZZ_ESP
bool "MIPS JAZZ FAS216 SCSI support"
- depends on MACH_JAZZ && SCSI
+ depends on MACH_JAZZ && SCSI=y
select SCSI_SPI_ATTRS
help
This is the driver for the onboard SCSI host adapter of MIPS Magnum
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 19eee108db02..5c8d1ba3f8f3 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -319,17 +319,16 @@ static void fcoe_ctlr_announce(struct fcoe_ctlr *fip)
{
struct fcoe_fcf *sel;
struct fcoe_fcf *fcf;
- unsigned long flags;
mutex_lock(&fip->ctlr_mutex);
- spin_lock_irqsave(&fip->ctlr_lock, flags);
+ spin_lock_bh(&fip->ctlr_lock);
kfree_skb(fip->flogi_req);
fip->flogi_req = NULL;
list_for_each_entry(fcf, &fip->fcfs, list)
fcf->flogi_sent = 0;
- spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ spin_unlock_bh(&fip->ctlr_lock);
sel = fip->sel_fcf;
if (sel && ether_addr_equal(sel->fcf_mac, fip->dest_addr))
@@ -700,7 +699,6 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
{
struct fc_frame *fp;
struct fc_frame_header *fh;
- unsigned long flags;
u16 old_xid;
u8 op;
u8 mac[ETH_ALEN];
@@ -734,11 +732,11 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
op = FIP_DT_FLOGI;
if (fip->mode == FIP_MODE_VN2VN)
break;
- spin_lock_irqsave(&fip->ctlr_lock, flags);
+ spin_lock_bh(&fip->ctlr_lock);
kfree_skb(fip->flogi_req);
fip->flogi_req = skb;
fip->flogi_req_send = 1;
- spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ spin_unlock_bh(&fip->ctlr_lock);
schedule_work(&fip->timer_work);
return -EINPROGRESS;
case ELS_FDISC:
@@ -1707,11 +1705,10 @@ static int fcoe_ctlr_flogi_send_locked(struct fcoe_ctlr *fip)
static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
{
struct fcoe_fcf *fcf;
- unsigned long flags;
int error;
mutex_lock(&fip->ctlr_mutex);
- spin_lock_irqsave(&fip->ctlr_lock, flags);
+ spin_lock_bh(&fip->ctlr_lock);
LIBFCOE_FIP_DBG(fip, "re-sending FLOGI - reselect\n");
fcf = fcoe_ctlr_select(fip);
if (!fcf || fcf->flogi_sent) {
@@ -1722,7 +1719,7 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
fcoe_ctlr_solicit(fip, NULL);
error = fcoe_ctlr_flogi_send_locked(fip);
}
- spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ spin_unlock_bh(&fip->ctlr_lock);
mutex_unlock(&fip->ctlr_mutex);
return error;
}
@@ -1739,9 +1736,8 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip)
{
struct fcoe_fcf *fcf;
- unsigned long flags;
- spin_lock_irqsave(&fip->ctlr_lock, flags);
+ spin_lock_bh(&fip->ctlr_lock);
fcf = fip->sel_fcf;
if (!fcf || !fip->flogi_req_send)
goto unlock;
@@ -1768,7 +1764,7 @@ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip)
} else /* XXX */
LIBFCOE_FIP_DBG(fip, "No FCF selected - defer send\n");
unlock:
- spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ spin_unlock_bh(&fip->ctlr_lock);
}
/**
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index 2074937c05bc..ce73f08ee889 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -305,6 +305,7 @@ struct fnic {
unsigned int copy_wq_base;
struct work_struct link_work;
struct work_struct frame_work;
+ struct work_struct flush_work;
struct sk_buff_head frame_queue;
struct sk_buff_head tx_queue;
@@ -363,7 +364,7 @@ void fnic_handle_event(struct work_struct *work);
int fnic_rq_cmpl_handler(struct fnic *fnic, int);
int fnic_alloc_rq_frame(struct vnic_rq *rq);
void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf);
-void fnic_flush_tx(struct fnic *);
+void fnic_flush_tx(struct work_struct *work);
void fnic_eth_send(struct fcoe_ctlr *, struct sk_buff *skb);
void fnic_set_port_id(struct fc_lport *, u32, struct fc_frame *);
void fnic_update_mac(struct fc_lport *, u8 *new);
diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c
index 5e312a55cc7d..a08293b2ad9f 100644
--- a/drivers/scsi/fnic/fnic_fcs.c
+++ b/drivers/scsi/fnic/fnic_fcs.c
@@ -1182,7 +1182,7 @@ int fnic_send(struct fc_lport *lp, struct fc_frame *fp)
/**
* fnic_flush_tx() - send queued frames.
- * @fnic: fnic device
+ * @work: pointer to work element
*
* Send frames that were waiting to go out in FC or Ethernet mode.
* Whenever changing modes we purge queued frames, so these frames should
@@ -1190,8 +1190,9 @@ int fnic_send(struct fc_lport *lp, struct fc_frame *fp)
*
* Called without fnic_lock held.
*/
-void fnic_flush_tx(struct fnic *fnic)
+void fnic_flush_tx(struct work_struct *work)
{
+ struct fnic *fnic = container_of(work, struct fnic, flush_work);
struct sk_buff *skb;
struct fc_frame *fp;
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 5ed1d897311a..29eead383eb9 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -830,6 +830,7 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
spin_lock_init(&fnic->vlans_lock);
INIT_WORK(&fnic->fip_frame_work, fnic_handle_fip_frame);
INIT_WORK(&fnic->event_work, fnic_handle_event);
+ INIT_WORK(&fnic->flush_work, fnic_flush_tx);
skb_queue_head_init(&fnic->fip_frame_queue);
INIT_LIST_HEAD(&fnic->evlist);
INIT_LIST_HEAD(&fnic->vlans);
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 8d7fc5284293..fc4cee91b175 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -680,7 +680,7 @@ static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic,
spin_unlock_irqrestore(&fnic->fnic_lock, flags);
- fnic_flush_tx(fnic);
+ queue_work(fnic_event_queue, &fnic->flush_work);
reset_cmpl_handler_end:
fnic_clear_state_flags(fnic, FNIC_FLAGS_FWRESET);
@@ -736,7 +736,7 @@ static int fnic_fcpio_flogi_reg_cmpl_handler(struct fnic *fnic,
}
spin_unlock_irqrestore(&fnic->fnic_lock, flags);
- fnic_flush_tx(fnic);
+ queue_work(fnic_event_queue, &fnic->flush_work);
queue_work(fnic_event_queue, &fnic->frame_work);
} else {
spin_unlock_irqrestore(&fnic->fnic_lock, flags);
diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c
index 2a50fda3a628..625fd547ee60 100644
--- a/drivers/scsi/initio.c
+++ b/drivers/scsi/initio.c
@@ -371,7 +371,6 @@ static u16 initio_se2_rd(unsigned long base, u8 addr)
*/
static void initio_se2_wr(unsigned long base, u8 addr, u16 val)
{
- u8 rb;
u8 instr;
int i;
@@ -400,7 +399,7 @@ static void initio_se2_wr(unsigned long base, u8 addr, u16 val)
udelay(30);
outb(SE2CS, base + TUL_NVRAM); /* -CLK */
udelay(30);
- if ((rb = inb(base + TUL_NVRAM)) & SE2DI)
+ if (inb(base + TUL_NVRAM) & SE2DI)
break; /* write complete */
}
outb(0, base + TUL_NVRAM); /* -CS */
diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index 71f711cb0628..355a0bc0828e 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -3387,7 +3387,7 @@ static enum sci_status isci_io_request_build(struct isci_host *ihost,
return SCI_FAILURE;
}
- return SCI_SUCCESS;
+ return status;
}
static struct isci_request *isci_request_from_tag(struct isci_host *ihost, u16 tag)
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index d26941b131fd..bf879d81846b 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1918,7 +1918,7 @@ out:
*
* Returns the number of SGEs added to the SGL.
**/
-static int
+static uint32_t
lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
struct sli4_sge *sgl, int datasegcnt,
struct lpfc_io_buf *lpfc_cmd)
@@ -1926,8 +1926,8 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
struct scatterlist *sgde = NULL; /* s/g data entry */
struct sli4_sge_diseed *diseed = NULL;
dma_addr_t physaddr;
- int i = 0, num_sge = 0, status;
- uint32_t reftag;
+ int i = 0, status;
+ uint32_t reftag, num_sge = 0;
uint8_t txop, rxop;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
uint32_t rc;
@@ -2099,7 +2099,7 @@ out:
*
* Returns the number of SGEs added to the SGL.
**/
-static int
+static uint32_t
lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
struct sli4_sge *sgl, int datacnt, int protcnt,
struct lpfc_io_buf *lpfc_cmd)
@@ -2123,8 +2123,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
uint32_t rc;
#endif
uint32_t checking = 1;
- uint32_t dma_offset = 0;
- int num_sge = 0, j = 2;
+ uint32_t dma_offset = 0, num_sge = 0;
+ int j = 2;
struct sli4_hybrid_sgl *sgl_xtra = NULL;
sgpe = scsi_prot_sglist(sc);
diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c
index c0c8ab586957..d32ad46318cb 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_transport.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c
@@ -1671,7 +1671,7 @@ mpi3mr_update_mr_sas_port(struct mpi3mr_ioc *mrioc, struct host_port *h_port,
void
mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc)
{
- struct host_port h_port[64];
+ struct host_port *h_port = NULL;
int i, j, found, host_port_count = 0, port_idx;
u16 sz, attached_handle, ioc_status;
struct mpi3_sas_io_unit_page0 *sas_io_unit_pg0 = NULL;
@@ -1685,6 +1685,10 @@ mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc)
sas_io_unit_pg0 = kzalloc(sz, GFP_KERNEL);
if (!sas_io_unit_pg0)
return;
+ h_port = kcalloc(64, sizeof(struct host_port), GFP_KERNEL);
+ if (!h_port)
+ goto out;
+
if (mpi3mr_cfg_get_sas_io_unit_pg0(mrioc, sas_io_unit_pg0, sz)) {
ioc_err(mrioc, "failure at %s:%d/%s()!\n",
__FILE__, __LINE__, __func__);
@@ -1814,6 +1818,7 @@ mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc)
}
}
out:
+ kfree(h_port);
kfree(sas_io_unit_pg0);
}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 8761bc58d965..b8120ca93c79 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -7378,7 +7378,9 @@ _base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout)
return -EFAULT;
}
- issue_diag_reset:
+ return 0;
+
+issue_diag_reset:
rc = _base_diag_reset(ioc);
return rc;
}
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 76d369343c7a..8cad9792a562 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer,
return result + 4;
}
+enum scsi_vpd_parameters {
+ SCSI_VPD_HEADER_SIZE = 4,
+ SCSI_VPD_LIST_SIZE = 36,
+};
+
static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page)
{
- unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4);
+ unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4);
int result;
if (sdev->no_vpd_size)
return SCSI_DEFAULT_VPD_LEN;
/*
+ * Fetch the supported pages VPD and validate that the requested page
+ * number is present.
+ */
+ if (page != 0) {
+ result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd));
+ if (result < SCSI_VPD_HEADER_SIZE)
+ return 0;
+
+ result -= SCSI_VPD_HEADER_SIZE;
+ if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result))
+ return 0;
+ }
+ /*
* Fetch the VPD page header to find out how big the page
* is. This is done to prevent problems on legacy devices
* which can not handle allocation lengths as large as
* potentially requested by the caller.
*/
- result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header));
+ result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE);
if (result < 0)
return 0;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 79da4b1c1df0..612489afe8d2 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -61,11 +61,11 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
static enum scsi_disposition scsi_try_to_abort_cmd(const struct scsi_host_template *,
struct scsi_cmnd *);
-void scsi_eh_wakeup(struct Scsi_Host *shost)
+void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy)
{
lockdep_assert_held(shost->host_lock);
- if (scsi_host_busy(shost) == shost->host_failed) {
+ if (busy == shost->host_failed) {
trace_scsi_eh_wakeup(shost);
wake_up_process(shost->ehandler);
SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost,
@@ -88,7 +88,7 @@ void scsi_schedule_eh(struct Scsi_Host *shost)
if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 ||
scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) {
shost->host_eh_scheduled++;
- scsi_eh_wakeup(shost);
+ scsi_eh_wakeup(shost, scsi_host_busy(shost));
}
spin_unlock_irqrestore(shost->host_lock, flags);
@@ -282,11 +282,12 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head)
{
struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu);
struct Scsi_Host *shost = scmd->device->host;
+ unsigned int busy = scsi_host_busy(shost);
unsigned long flags;
spin_lock_irqsave(shost->host_lock, flags);
shost->host_failed++;
- scsi_eh_wakeup(shost);
+ scsi_eh_wakeup(shost, busy);
spin_unlock_irqrestore(shost->host_lock, flags);
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index cf3864f72093..df5ac03d5d6c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -278,9 +278,11 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
rcu_read_lock();
__clear_bit(SCMD_STATE_INFLIGHT, &cmd->state);
if (unlikely(scsi_host_in_recovery(shost))) {
+ unsigned int busy = scsi_host_busy(shost);
+
spin_lock_irqsave(shost->host_lock, flags);
if (shost->host_failed || shost->host_eh_scheduled)
- scsi_eh_wakeup(shost);
+ scsi_eh_wakeup(shost, busy);
spin_unlock_irqrestore(shost->host_lock, flags);
}
rcu_read_unlock();
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 3f0dfb97db6b..1fbfe1b52c9f 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -92,7 +92,7 @@ extern void scmd_eh_abort_handler(struct work_struct *work);
extern enum blk_eh_timer_return scsi_timeout(struct request *req);
extern int scsi_error_handler(void *host);
extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd);
-extern void scsi_eh_wakeup(struct Scsi_Host *shost);
+extern void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy);
extern void scsi_eh_scmd_add(struct scsi_cmnd *);
void scsi_eh_ready_devs(struct Scsi_Host *shost,
struct list_head *work_q,
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 44680f65ea14..9969f4e2f1c3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -332,7 +332,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
sdev->sg_reserved_size = INT_MAX;
- q = blk_mq_init_queue(&sdev->host->tag_set);
+ q = blk_mq_alloc_queue(&sdev->host->tag_set, NULL, NULL);
if (IS_ERR(q)) {
/* release fn is set up in scsi_sysfs_device_initialise, so
* have to free and put manually here */
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 0833b3e6aa6e..bdd0acf7fa3c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3407,6 +3407,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp,
return true;
}
+static void sd_read_block_zero(struct scsi_disk *sdkp)
+{
+ unsigned int buf_len = sdkp->device->sector_size;
+ char *buffer, cmd[10] = { };
+
+ buffer = kmalloc(buf_len, GFP_KERNEL);
+ if (!buffer)
+ return;
+
+ cmd[0] = READ_10;
+ put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */
+ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */
+
+ scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len,
+ SD_TIMEOUT, sdkp->max_retries, NULL);
+ kfree(buffer);
+}
+
/**
* sd_revalidate_disk - called the first time a new disk is seen,
* performs disk spin up, read_capacity, etc.
@@ -3446,7 +3464,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
*/
if (sdkp->media_present) {
sd_read_capacity(sdkp, buffer);
-
+ /*
+ * Some USB/UAS devices return generic values for mode pages
+ * until the media has been accessed. Trigger a READ operation
+ * to force the device to populate mode pages.
+ */
+ if (sdp->read_before_ms)
+ sd_read_block_zero(sdkp);
/*
* set the default to rotational. All non-rotational devices
* support the block characteristics VPD page, which will
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index ceff1ec13f9e..385180c98be4 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -6533,8 +6533,11 @@ static void pqi_map_queues(struct Scsi_Host *shost)
{
struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost);
- blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
+ if (!ctrl_info->disable_managed_interrupts)
+ return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
ctrl_info->pci_dev, 0);
+ else
+ return blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]);
}
static inline bool pqi_is_tape_changer_device(struct pqi_scsi_dev *device)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index a95936b18f69..7ceb982040a5 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -330,6 +330,7 @@ enum storvsc_request_type {
*/
static int storvsc_ringbuffer_size = (128 * 1024);
+static int aligned_ringbuffer_size;
static u32 max_outstanding_req_per_channel;
static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth);
@@ -687,8 +688,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
new_sc->next_request_id_callback = storvsc_next_request_id;
ret = vmbus_open(new_sc,
- storvsc_ringbuffer_size,
- storvsc_ringbuffer_size,
+ aligned_ringbuffer_size,
+ aligned_ringbuffer_size,
(void *)&props,
sizeof(struct vmstorage_channel_properties),
storvsc_on_channel_callback, new_sc);
@@ -1973,7 +1974,7 @@ static int storvsc_probe(struct hv_device *device,
dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1);
stor_device->port_number = host->host_no;
- ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc);
+ ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc);
if (ret)
goto err_out1;
@@ -2164,7 +2165,7 @@ static int storvsc_resume(struct hv_device *hv_dev)
{
int ret;
- ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size,
+ ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size,
hv_dev_is_fc(hv_dev));
return ret;
}
@@ -2198,8 +2199,9 @@ static int __init storvsc_drv_init(void)
* the ring buffer indices) by the max request size (which is
* vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64)
*/
+ aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size);
max_outstanding_req_per_channel =
- ((storvsc_ringbuffer_size - PAGE_SIZE) /
+ ((aligned_ringbuffer_size - PAGE_SIZE) /
ALIGN(MAX_MULTIPAGE_BUFFER_PACKET +
sizeof(struct vstor_packet) + sizeof(u64),
sizeof(u64)));
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 4cf20be668a6..617eb892f4ad 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -188,8 +188,6 @@ static void virtscsi_vq_done(struct virtio_scsi *vscsi,
while ((buf = virtqueue_get_buf(vq, &len)) != NULL)
fn(vscsi, buf);
- if (unlikely(virtqueue_is_broken(vq)))
- break;
} while (!virtqueue_enable_cb(vq));
spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags);
}
diff --git a/drivers/soc/apple/mailbox.c b/drivers/soc/apple/mailbox.c
index 780199bf351e..49a0955e82d6 100644
--- a/drivers/soc/apple/mailbox.c
+++ b/drivers/soc/apple/mailbox.c
@@ -296,14 +296,14 @@ struct apple_mbox *apple_mbox_get(struct device *dev, int index)
of_node_put(args.np);
if (!pdev)
- return ERR_PTR(EPROBE_DEFER);
+ return ERR_PTR(-EPROBE_DEFER);
mbox = platform_get_drvdata(pdev);
if (!mbox)
- return ERR_PTR(EPROBE_DEFER);
+ return ERR_PTR(-EPROBE_DEFER);
if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_CONSUMER))
- return ERR_PTR(ENODEV);
+ return ERR_PTR(-ENODEV);
return mbox;
}
diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig
index 0810b5b0c688..50c664b65f4d 100644
--- a/drivers/soc/mediatek/Kconfig
+++ b/drivers/soc/mediatek/Kconfig
@@ -68,4 +68,13 @@ config MTK_SVS
chip process corner, temperatures and other factors. Then DVFS
driver could apply SVS bank voltage to PMIC/Buck.
+config MTK_SOCINFO
+ tristate "MediaTek SoC Information"
+ default y
+ depends on NVMEM_MTK_EFUSE
+ help
+ The MediaTek SoC Information (mtk-socinfo) driver provides
+ information about the SoC to the userspace including the
+ manufacturer name, marketing name and soc name.
+
endmenu
diff --git a/drivers/soc/mediatek/Makefile b/drivers/soc/mediatek/Makefile
index 9d3ce7878c5c..6830512848fd 100644
--- a/drivers/soc/mediatek/Makefile
+++ b/drivers/soc/mediatek/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_MTK_REGULATOR_COUPLER) += mtk-regulator-coupler.o
obj-$(CONFIG_MTK_MMSYS) += mtk-mmsys.o
obj-$(CONFIG_MTK_MMSYS) += mtk-mutex.o
obj-$(CONFIG_MTK_SVS) += mtk-svs.o
+obj-$(CONFIG_MTK_SOCINFO) += mtk-socinfo.o
diff --git a/drivers/soc/mediatek/mtk-socinfo.c b/drivers/soc/mediatek/mtk-socinfo.c
new file mode 100644
index 000000000000..42572e8c1520
--- /dev/null
+++ b/drivers/soc/mediatek/mtk-socinfo.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 MediaTek Inc.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/device.h>
+#include <linux/device/bus.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/sys_soc.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#define MTK_SOCINFO_ENTRY(_soc_name, _segment_name, _marketing_name, _cell_data1, _cell_data2) {\
+ .soc_name = _soc_name, \
+ .segment_name = _segment_name, \
+ .marketing_name = _marketing_name, \
+ .cell_data = {_cell_data1, _cell_data2} \
+}
+#define CELL_NOT_USED (0xFFFFFFFF)
+#define MAX_CELLS (2)
+
+struct mtk_socinfo {
+ struct device *dev;
+ struct name_data *name_data;
+ struct socinfo_data *socinfo_data;
+ struct soc_device *soc_dev;
+};
+
+struct socinfo_data {
+ char *soc_name;
+ char *segment_name;
+ char *marketing_name;
+ u32 cell_data[MAX_CELLS];
+};
+
+static const char *cell_names[MAX_CELLS] = {"socinfo-data1", "socinfo-data2"};
+
+static struct socinfo_data socinfo_data_table[] = {
+ MTK_SOCINFO_ENTRY("MT8173", "MT8173V/AC", "MT8173", 0x6CA20004, 0x10000000),
+ MTK_SOCINFO_ENTRY("MT8183", "MT8183V/AZA", "Kompanio 500", 0x00010043, 0x00000840),
+ MTK_SOCINFO_ENTRY("MT8183", "MT8183V/AZA", "Kompanio 500", 0x00010043, 0x00000940),
+ MTK_SOCINFO_ENTRY("MT8186", "MT8186GV/AZA", "Kompanio 520", 0x81861001, CELL_NOT_USED),
+ MTK_SOCINFO_ENTRY("MT8186T", "MT8186TV/AZA", "Kompanio 528", 0x81862001, CELL_NOT_USED),
+ MTK_SOCINFO_ENTRY("MT8188", "MT8188GV/AZA", "Kompanio 830", 0x81880000, 0x00000010),
+ MTK_SOCINFO_ENTRY("MT8188", "MT8188GV/HZA", "Kompanio 830", 0x81880000, 0x00000011),
+ MTK_SOCINFO_ENTRY("MT8192", "MT8192V/AZA", "Kompanio 820", 0x00001100, 0x00040080),
+ MTK_SOCINFO_ENTRY("MT8192T", "MT8192V/ATZA", "Kompanio 828", 0x00000100, 0x000400C0),
+ MTK_SOCINFO_ENTRY("MT8195", "MT8195GV/EZA", "Kompanio 1200", 0x81950300, CELL_NOT_USED),
+ MTK_SOCINFO_ENTRY("MT8195", "MT8195GV/EHZA", "Kompanio 1200", 0x81950304, CELL_NOT_USED),
+ MTK_SOCINFO_ENTRY("MT8195", "MT8195TV/EZA", "Kompanio 1380", 0x81950400, CELL_NOT_USED),
+ MTK_SOCINFO_ENTRY("MT8195", "MT8195TV/EHZA", "Kompanio 1380", 0x81950404, CELL_NOT_USED),
+};
+
+static int mtk_socinfo_create_socinfo_node(struct mtk_socinfo *mtk_socinfop)
+{
+ struct soc_device_attribute *attrs;
+ static char machine[30] = {0};
+ static const char *soc_manufacturer = "MediaTek";
+
+ attrs = devm_kzalloc(mtk_socinfop->dev, sizeof(*attrs), GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+
+ snprintf(machine, sizeof(machine), "%s (%s)", mtk_socinfop->socinfo_data->marketing_name,
+ mtk_socinfop->socinfo_data->soc_name);
+ attrs->family = soc_manufacturer;
+ attrs->machine = machine;
+
+ mtk_socinfop->soc_dev = soc_device_register(attrs);
+ if (IS_ERR(mtk_socinfop->soc_dev))
+ return PTR_ERR(mtk_socinfop->soc_dev);
+
+ dev_info(mtk_socinfop->dev, "%s %s SoC detected.\n", soc_manufacturer, attrs->machine);
+ return 0;
+}
+
+static u32 mtk_socinfo_read_cell(struct device *dev, const char *name)
+{
+ struct nvmem_device *nvmemp;
+ struct device_node *np, *nvmem_node = dev->parent->of_node;
+ u32 offset;
+ u32 cell_val = CELL_NOT_USED;
+
+ /* should never fail since the nvmem driver registers this child */
+ nvmemp = nvmem_device_find(nvmem_node, device_match_of_node);
+ if (IS_ERR(nvmemp))
+ goto out;
+
+ np = of_get_child_by_name(nvmem_node, name);
+ if (!np)
+ goto put_device;
+
+ if (of_property_read_u32_index(np, "reg", 0, &offset))
+ goto put_node;
+
+ nvmem_device_read(nvmemp, offset, sizeof(cell_val), &cell_val);
+
+put_node:
+ of_node_put(np);
+put_device:
+ nvmem_device_put(nvmemp);
+out:
+ return cell_val;
+}
+
+static int mtk_socinfo_get_socinfo_data(struct mtk_socinfo *mtk_socinfop)
+{
+ unsigned int i, j;
+ unsigned int num_cell_data = 0;
+ u32 cell_data[MAX_CELLS] = {0};
+ bool match_socinfo;
+ int match_socinfo_index = -1;
+
+ for (i = 0; i < MAX_CELLS; i++) {
+ cell_data[i] = mtk_socinfo_read_cell(mtk_socinfop->dev, cell_names[i]);
+ if (cell_data[i] != CELL_NOT_USED)
+ num_cell_data++;
+ else
+ break;
+ }
+
+ if (!num_cell_data)
+ return -ENOENT;
+
+ for (i = 0; i < ARRAY_SIZE(socinfo_data_table); i++) {
+ match_socinfo = true;
+ for (j = 0; j < num_cell_data; j++) {
+ if (cell_data[j] != socinfo_data_table[i].cell_data[j]) {
+ match_socinfo = false;
+ break;
+ }
+ }
+ if (match_socinfo) {
+ mtk_socinfop->socinfo_data = &(socinfo_data_table[i]);
+ match_socinfo_index = i;
+ break;
+ }
+ }
+
+ return match_socinfo_index >= 0 ? match_socinfo_index : -ENOENT;
+}
+
+static int mtk_socinfo_probe(struct platform_device *pdev)
+{
+ struct mtk_socinfo *mtk_socinfop;
+ int ret;
+
+ mtk_socinfop = devm_kzalloc(&pdev->dev, sizeof(*mtk_socinfop), GFP_KERNEL);
+ if (!mtk_socinfop)
+ return -ENOMEM;
+
+ mtk_socinfop->dev = &pdev->dev;
+
+ ret = mtk_socinfo_get_socinfo_data(mtk_socinfop);
+ if (ret < 0)
+ return dev_err_probe(mtk_socinfop->dev, ret, "Failed to get socinfo data\n");
+
+ ret = mtk_socinfo_create_socinfo_node(mtk_socinfop);
+ if (ret)
+ return dev_err_probe(mtk_socinfop->dev, ret, "Cannot create node\n");
+
+ platform_set_drvdata(pdev, mtk_socinfop);
+ return 0;
+}
+
+static void mtk_socinfo_remove(struct platform_device *pdev)
+{
+ struct mtk_socinfo *mtk_socinfop = platform_get_drvdata(pdev);
+
+ soc_device_unregister(mtk_socinfop->soc_dev);
+}
+
+static struct platform_driver mtk_socinfo = {
+ .probe = mtk_socinfo_probe,
+ .remove_new = mtk_socinfo_remove,
+ .driver = {
+ .name = "mtk-socinfo",
+ },
+};
+module_platform_driver(mtk_socinfo);
+
+MODULE_AUTHOR("William-TW LIN <william-tw.lin@mediatek.com>");
+MODULE_DESCRIPTION("MediaTek socinfo driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/soc/microchip/Kconfig b/drivers/soc/microchip/Kconfig
index 9b0fdd95276e..19f4b576f822 100644
--- a/drivers/soc/microchip/Kconfig
+++ b/drivers/soc/microchip/Kconfig
@@ -1,5 +1,5 @@
config POLARFIRE_SOC_SYS_CTRL
- tristate "POLARFIRE_SOC_SYS_CTRL"
+ tristate "Microchip PolarFire SoC (MPFS) system controller support"
depends on POLARFIRE_SOC_MAILBOX
depends on MTD
help
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index c6ca4de42586..5af33b0e3470 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -268,4 +268,13 @@ config QCOM_INLINE_CRYPTO_ENGINE
tristate
select QCOM_SCM
+config QCOM_PBS
+ tristate "PBS trigger support for Qualcomm Technologies, Inc. PMICS"
+ depends on SPMI
+ help
+ This driver supports configuring software programmable boot sequencer (PBS)
+ trigger event through PBS RAM on Qualcomm Technologies, Inc. PMICs.
+ This module provides the APIs to the client drivers that wants to send the
+ PBS trigger event to the PBS RAM.
+
endmenu
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 05b3d54e8dc9..ca0bece0dfff 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS_rpmh-rsc.o := -I$(src)
+CFLAGS_qcom_aoss.o := -I$(src)
obj-$(CONFIG_QCOM_AOSS_QMP) += qcom_aoss.o
obj-$(CONFIG_QCOM_GENI_SE) += qcom-geni-se.o
obj-$(CONFIG_QCOM_COMMAND_DB) += cmd-db.o
@@ -34,3 +35,4 @@ obj-$(CONFIG_QCOM_KRYO_L2_ACCESSORS) += kryo-l2-accessors.o
obj-$(CONFIG_QCOM_ICC_BWMON) += icc-bwmon.o
qcom_ice-objs += ice.o
obj-$(CONFIG_QCOM_INLINE_CRYPTO_ENGINE) += qcom_ice.o
+obj-$(CONFIG_QCOM_PBS) += qcom-pbs.o
diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c
index 1f8b315576a4..50749e870efa 100644
--- a/drivers/soc/qcom/apr.c
+++ b/drivers/soc/qcom/apr.c
@@ -399,7 +399,7 @@ static int apr_uevent(const struct device *dev, struct kobj_uevent_env *env)
return add_uevent_var(env, "MODALIAS=apr:%s", adev->name);
}
-struct bus_type aprbus = {
+const struct bus_type aprbus = {
.name = "aprbus",
.match = apr_device_match,
.probe = apr_device_probe,
diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c
index 4ca88eaebf06..cbef0dea1d5d 100644
--- a/drivers/soc/qcom/llcc-qcom.c
+++ b/drivers/soc/qcom/llcc-qcom.c
@@ -859,6 +859,8 @@ static int llcc_update_act_ctrl(u32 sid,
ret = regmap_read_poll_timeout(drv_data->bcast_regmap, status_reg,
slice_status, !(slice_status & status),
0, LLCC_STATUS_READ_DELAY);
+ if (ret)
+ return ret;
if (drv_data->version >= LLCC_VERSION_4_1_0_0)
ret = regmap_write(drv_data->bcast_regmap, act_clear_reg,
diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c
index f4bfd24386f1..f913e9bd57ed 100644
--- a/drivers/soc/qcom/pmic_glink.c
+++ b/drivers/soc/qcom/pmic_glink.c
@@ -265,10 +265,17 @@ static int pmic_glink_probe(struct platform_device *pdev)
pg->client_mask = *match_data;
+ pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg);
+ if (IS_ERR(pg->pdr)) {
+ ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr),
+ "failed to initialize pdr\n");
+ return ret;
+ }
+
if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI)) {
ret = pmic_glink_add_aux_device(pg, &pg->ucsi_aux, "ucsi");
if (ret)
- return ret;
+ goto out_release_pdr_handle;
}
if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_ALTMODE)) {
ret = pmic_glink_add_aux_device(pg, &pg->altmode_aux, "altmode");
@@ -281,17 +288,11 @@ static int pmic_glink_probe(struct platform_device *pdev)
goto out_release_altmode_aux;
}
- pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg);
- if (IS_ERR(pg->pdr)) {
- ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr), "failed to initialize pdr\n");
- goto out_release_aux_devices;
- }
-
service = pdr_add_lookup(pg->pdr, "tms/servreg", "msm/adsp/charger_pd");
if (IS_ERR(service)) {
ret = dev_err_probe(&pdev->dev, PTR_ERR(service),
"failed adding pdr lookup for charger_pd\n");
- goto out_release_pdr_handle;
+ goto out_release_aux_devices;
}
mutex_lock(&__pmic_glink_lock);
@@ -300,8 +301,6 @@ static int pmic_glink_probe(struct platform_device *pdev)
return 0;
-out_release_pdr_handle:
- pdr_handle_release(pg->pdr);
out_release_aux_devices:
if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_BATT))
pmic_glink_del_aux_device(pg, &pg->ps_aux);
@@ -311,6 +310,8 @@ out_release_altmode_aux:
out_release_ucsi_aux:
if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI))
pmic_glink_del_aux_device(pg, &pg->ucsi_aux);
+out_release_pdr_handle:
+ pdr_handle_release(pg->pdr);
return ret;
}
diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c
index 5fcd0fdd2faa..b3808fc24c69 100644
--- a/drivers/soc/qcom/pmic_glink_altmode.c
+++ b/drivers/soc/qcom/pmic_glink_altmode.c
@@ -76,7 +76,7 @@ struct pmic_glink_altmode_port {
struct work_struct work;
- struct device *bridge;
+ struct auxiliary_device *bridge;
enum typec_orientation orientation;
u16 svid;
@@ -230,7 +230,7 @@ static void pmic_glink_altmode_worker(struct work_struct *work)
else
pmic_glink_altmode_enable_usb(altmode, alt_port);
- drm_aux_hpd_bridge_notify(alt_port->bridge,
+ drm_aux_hpd_bridge_notify(&alt_port->bridge->dev,
alt_port->hpd_state ?
connector_status_connected :
connector_status_disconnected);
@@ -454,7 +454,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
alt_port->index = port;
INIT_WORK(&alt_port->work, pmic_glink_altmode_worker);
- alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode));
+ alt_port->bridge = devm_drm_dp_hpd_bridge_alloc(dev, to_of_node(fwnode));
if (IS_ERR(alt_port->bridge)) {
fwnode_handle_put(fwnode);
return PTR_ERR(alt_port->bridge);
@@ -510,6 +510,16 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
}
}
+ for (port = 0; port < ARRAY_SIZE(altmode->ports); port++) {
+ alt_port = &altmode->ports[port];
+ if (!alt_port->bridge)
+ continue;
+
+ ret = devm_drm_dp_hpd_bridge_add(dev, alt_port->bridge);
+ if (ret)
+ return ret;
+ }
+
altmode->client = devm_pmic_glink_register_client(dev,
altmode->owner_id,
pmic_glink_altmode_callback,
diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c
index bdcf44b85b2f..2e8f24d5da80 100644
--- a/drivers/soc/qcom/qcom-geni-se.c
+++ b/drivers/soc/qcom/qcom-geni-se.c
@@ -89,7 +89,6 @@
* @base: Base address of this instance of QUP wrapper core
* @clks: Handle to the primary & optional secondary AHB clocks
* @num_clks: Count of clocks
- * @to_core: Core ICC path
*/
struct geni_wrapper {
struct device *dev;
diff --git a/drivers/soc/qcom/qcom-pbs.c b/drivers/soc/qcom/qcom-pbs.c
new file mode 100644
index 000000000000..6af49b5060e5
--- /dev/null
+++ b/drivers/soc/qcom/qcom-pbs.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/regmap.h>
+#include <linux/spmi.h>
+#include <linux/soc/qcom/qcom-pbs.h>
+
+#define PBS_CLIENT_TRIG_CTL 0x42
+#define PBS_CLIENT_SW_TRIG_BIT BIT(7)
+#define PBS_CLIENT_SCRATCH1 0x50
+#define PBS_CLIENT_SCRATCH2 0x51
+#define PBS_CLIENT_SCRATCH2_ERROR 0xFF
+
+#define RETRIES 2000
+#define DELAY 1100
+
+struct pbs_dev {
+ struct device *dev;
+ struct regmap *regmap;
+ struct mutex lock;
+ struct device_link *link;
+
+ u32 base;
+};
+
+static int qcom_pbs_wait_for_ack(struct pbs_dev *pbs, u8 bit_pos)
+{
+ unsigned int val;
+ int ret;
+
+ ret = regmap_read_poll_timeout(pbs->regmap, pbs->base + PBS_CLIENT_SCRATCH2,
+ val, val & BIT(bit_pos), DELAY, DELAY * RETRIES);
+
+ if (ret < 0) {
+ dev_err(pbs->dev, "Timeout for PBS ACK/NACK for bit %u\n", bit_pos);
+ return -ETIMEDOUT;
+ }
+
+ if (val == PBS_CLIENT_SCRATCH2_ERROR) {
+ ret = regmap_write(pbs->regmap, pbs->base + PBS_CLIENT_SCRATCH2, 0);
+ dev_err(pbs->dev, "NACK from PBS for bit %u\n", bit_pos);
+ return -EINVAL;
+ }
+
+ dev_dbg(pbs->dev, "PBS sequence for bit %u executed!\n", bit_pos);
+ return 0;
+}
+
+/**
+ * qcom_pbs_trigger_event() - Trigger the PBS RAM sequence
+ * @pbs: Pointer to PBS device
+ * @bitmap: bitmap
+ *
+ * This function is used to trigger the PBS RAM sequence to be
+ * executed by the client driver.
+ *
+ * The PBS trigger sequence involves
+ * 1. setting the PBS sequence bit in PBS_CLIENT_SCRATCH1
+ * 2. Initiating the SW PBS trigger
+ * 3. Checking the equivalent bit in PBS_CLIENT_SCRATCH2 for the
+ * completion of the sequence.
+ * 4. If PBS_CLIENT_SCRATCH2 == 0xFF, the PBS sequence failed to execute
+ *
+ * Return: 0 on success, < 0 on failure
+ */
+int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap)
+{
+ unsigned int val;
+ u16 bit_pos;
+ int ret;
+
+ if (WARN_ON(!bitmap))
+ return -EINVAL;
+
+ if (IS_ERR_OR_NULL(pbs))
+ return -EINVAL;
+
+ mutex_lock(&pbs->lock);
+ ret = regmap_read(pbs->regmap, pbs->base + PBS_CLIENT_SCRATCH2, &val);
+ if (ret < 0)
+ goto out;
+
+ if (val == PBS_CLIENT_SCRATCH2_ERROR) {
+ /* PBS error - clear SCRATCH2 register */
+ ret = regmap_write(pbs->regmap, pbs->base + PBS_CLIENT_SCRATCH2, 0);
+ if (ret < 0)
+ goto out;
+ }
+
+ for (bit_pos = 0; bit_pos < 8; bit_pos++) {
+ if (!(bitmap & BIT(bit_pos)))
+ continue;
+
+ /* Clear the PBS sequence bit position */
+ ret = regmap_update_bits(pbs->regmap, pbs->base + PBS_CLIENT_SCRATCH2,
+ BIT(bit_pos), 0);
+ if (ret < 0)
+ goto out_clear_scratch1;
+
+ /* Set the PBS sequence bit position */
+ ret = regmap_update_bits(pbs->regmap, pbs->base + PBS_CLIENT_SCRATCH1,
+ BIT(bit_pos), BIT(bit_pos));
+ if (ret < 0)
+ goto out_clear_scratch1;
+
+ /* Initiate the SW trigger */
+ ret = regmap_update_bits(pbs->regmap, pbs->base + PBS_CLIENT_TRIG_CTL,
+ PBS_CLIENT_SW_TRIG_BIT, PBS_CLIENT_SW_TRIG_BIT);
+ if (ret < 0)
+ goto out_clear_scratch1;
+
+ ret = qcom_pbs_wait_for_ack(pbs, bit_pos);
+ if (ret < 0)
+ goto out_clear_scratch1;
+
+ /* Clear the PBS sequence bit position */
+ regmap_update_bits(pbs->regmap, pbs->base + PBS_CLIENT_SCRATCH1, BIT(bit_pos), 0);
+ regmap_update_bits(pbs->regmap, pbs->base + PBS_CLIENT_SCRATCH2, BIT(bit_pos), 0);
+ }
+
+out_clear_scratch1:
+ /* Clear all the requested bitmap */
+ ret = regmap_update_bits(pbs->regmap, pbs->base + PBS_CLIENT_SCRATCH1, bitmap, 0);
+
+out:
+ mutex_unlock(&pbs->lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(qcom_pbs_trigger_event);
+
+/**
+ * get_pbs_client_device() - Get the PBS device used by client
+ * @dev: Client device
+ *
+ * This function is used to get the PBS device that is being
+ * used by the client.
+ *
+ * Return: pbs_dev on success, ERR_PTR on failure
+ */
+struct pbs_dev *get_pbs_client_device(struct device *dev)
+{
+ struct device_node *pbs_dev_node;
+ struct platform_device *pdev;
+ struct pbs_dev *pbs;
+
+ pbs_dev_node = of_parse_phandle(dev->of_node, "qcom,pbs", 0);
+ if (!pbs_dev_node) {
+ dev_err(dev, "Missing qcom,pbs property\n");
+ return ERR_PTR(-ENODEV);
+ }
+
+ pdev = of_find_device_by_node(pbs_dev_node);
+ if (!pdev) {
+ dev_err(dev, "Unable to find PBS dev_node\n");
+ pbs = ERR_PTR(-EPROBE_DEFER);
+ goto out;
+ }
+
+ pbs = platform_get_drvdata(pdev);
+ if (!pbs) {
+ dev_err(dev, "Cannot get pbs instance from %s\n", dev_name(&pdev->dev));
+ platform_device_put(pdev);
+ pbs = ERR_PTR(-EPROBE_DEFER);
+ goto out;
+ }
+
+ pbs->link = device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_SUPPLIER);
+ if (!pbs->link) {
+ dev_err(&pdev->dev, "Failed to create device link to consumer %s\n", dev_name(dev));
+ platform_device_put(pdev);
+ pbs = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+out:
+ of_node_put(pbs_dev_node);
+ return pbs;
+}
+EXPORT_SYMBOL_GPL(get_pbs_client_device);
+
+static int qcom_pbs_probe(struct platform_device *pdev)
+{
+ struct pbs_dev *pbs;
+ u32 val;
+ int ret;
+
+ pbs = devm_kzalloc(&pdev->dev, sizeof(*pbs), GFP_KERNEL);
+ if (!pbs)
+ return -ENOMEM;
+
+ pbs->dev = &pdev->dev;
+ pbs->regmap = dev_get_regmap(pbs->dev->parent, NULL);
+ if (!pbs->regmap) {
+ dev_err(pbs->dev, "Couldn't get parent's regmap\n");
+ return -EINVAL;
+ }
+
+ ret = device_property_read_u32(pbs->dev, "reg", &val);
+ if (ret < 0) {
+ dev_err(pbs->dev, "Couldn't find reg, ret = %d\n", ret);
+ return ret;
+ }
+ pbs->base = val;
+ mutex_init(&pbs->lock);
+
+ platform_set_drvdata(pdev, pbs);
+
+ return 0;
+}
+
+static const struct of_device_id qcom_pbs_match_table[] = {
+ { .compatible = "qcom,pbs" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, qcom_pbs_match_table);
+
+static struct platform_driver qcom_pbs_driver = {
+ .driver = {
+ .name = "qcom-pbs",
+ .of_match_table = qcom_pbs_match_table,
+ },
+ .probe = qcom_pbs_probe,
+};
+module_platform_driver(qcom_pbs_driver)
+
+MODULE_DESCRIPTION("QCOM PBS DRIVER");
+MODULE_LICENSE("GPL");
diff --git a/drivers/soc/qcom/qcom_aoss.c b/drivers/soc/qcom/qcom_aoss.c
index aff0cfb71482..ca2f6b7629ce 100644
--- a/drivers/soc/qcom/qcom_aoss.c
+++ b/drivers/soc/qcom/qcom_aoss.c
@@ -3,6 +3,7 @@
* Copyright (c) 2019, Linaro Ltd
*/
#include <linux/clk-provider.h>
+#include <linux/debugfs.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/mailbox_client.h>
@@ -13,6 +14,9 @@
#include <linux/slab.h>
#include <linux/soc/qcom/qcom_aoss.h>
+#define CREATE_TRACE_POINTS
+#include "trace-aoss.h"
+
#define QMP_DESC_MAGIC 0x0
#define QMP_DESC_VERSION 0x4
#define QMP_DESC_FEATURES 0x8
@@ -44,6 +48,8 @@
#define QMP_NUM_COOLING_RESOURCES 2
+#define QMP_DEBUGFS_FILES 4
+
static bool qmp_cdev_max_state = 1;
struct qmp_cooling_device {
@@ -65,6 +71,8 @@ struct qmp_cooling_device {
* @tx_lock: provides synchronization between multiple callers of qmp_send()
* @qdss_clk: QDSS clock hw struct
* @cooling_devs: thermal cooling devices
+ * @debugfs_root: directory for the developer/tester interface
+ * @debugfs_files: array of individual debugfs entries under debugfs_root
*/
struct qmp {
void __iomem *msgram;
@@ -82,6 +90,8 @@ struct qmp {
struct clk_hw qdss_clk;
struct qmp_cooling_device *cooling_devs;
+ struct dentry *debugfs_root;
+ struct dentry *debugfs_files[QMP_DEBUGFS_FILES];
};
static void qmp_kick(struct qmp *qmp)
@@ -214,7 +224,7 @@ static bool qmp_message_empty(struct qmp *qmp)
*
* Return: 0 on success, negative errno on failure
*/
-int qmp_send(struct qmp *qmp, const char *fmt, ...)
+int __printf(2, 3) qmp_send(struct qmp *qmp, const char *fmt, ...)
{
char buf[QMP_MSG_LEN];
long time_left;
@@ -235,6 +245,8 @@ int qmp_send(struct qmp *qmp, const char *fmt, ...)
mutex_lock(&qmp->tx_lock);
+ trace_aoss_send(buf);
+
/* The message RAM only implements 32-bit accesses */
__iowrite32_copy(qmp->msgram + qmp->offset + sizeof(u32),
buf, sizeof(buf) / sizeof(u32));
@@ -256,6 +268,8 @@ int qmp_send(struct qmp *qmp, const char *fmt, ...)
ret = 0;
}
+ trace_aoss_send_done(buf, ret);
+
mutex_unlock(&qmp->tx_lock);
return ret;
@@ -475,6 +489,91 @@ void qmp_put(struct qmp *qmp)
}
EXPORT_SYMBOL_GPL(qmp_put);
+struct qmp_debugfs_entry {
+ const char *name;
+ const char *fmt;
+ bool is_bool;
+ const char *true_val;
+ const char *false_val;
+};
+
+static const struct qmp_debugfs_entry qmp_debugfs_entries[QMP_DEBUGFS_FILES] = {
+ { "ddr_frequency_mhz", "{class: ddr, res: fixed, val: %u}", false },
+ { "prevent_aoss_sleep", "{class: aoss_slp, res: sleep: %s}", true, "enable", "disable" },
+ { "prevent_cx_collapse", "{class: cx_mol, res: cx, val: %s}", true, "mol", "off" },
+ { "prevent_ddr_collapse", "{class: ddr_mol, res: ddr, val: %s}", true, "mol", "off" },
+};
+
+static ssize_t qmp_debugfs_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *pos)
+{
+ const struct qmp_debugfs_entry *entry = NULL;
+ struct qmp *qmp = file->private_data;
+ char buf[QMP_MSG_LEN];
+ unsigned int uint_val;
+ const char *str_val;
+ bool bool_val;
+ int ret;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(qmp->debugfs_files); i++) {
+ if (qmp->debugfs_files[i] == file->f_path.dentry) {
+ entry = &qmp_debugfs_entries[i];
+ break;
+ }
+ }
+ if (WARN_ON(!entry))
+ return -EFAULT;
+
+ if (entry->is_bool) {
+ ret = kstrtobool_from_user(user_buf, count, &bool_val);
+ if (ret)
+ return ret;
+
+ str_val = bool_val ? entry->true_val : entry->false_val;
+
+ ret = snprintf(buf, sizeof(buf), entry->fmt, str_val);
+ if (ret >= sizeof(buf))
+ return -EINVAL;
+ } else {
+ ret = kstrtou32_from_user(user_buf, count, 0, &uint_val);
+ if (ret)
+ return ret;
+
+ ret = snprintf(buf, sizeof(buf), entry->fmt, uint_val);
+ if (ret >= sizeof(buf))
+ return -EINVAL;
+ }
+
+ ret = qmp_send(qmp, buf);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static const struct file_operations qmp_debugfs_fops = {
+ .open = simple_open,
+ .write = qmp_debugfs_write,
+};
+
+static void qmp_debugfs_create(struct qmp *qmp)
+{
+ const struct qmp_debugfs_entry *entry;
+ int i;
+
+ qmp->debugfs_root = debugfs_create_dir("qcom_aoss", NULL);
+
+ for (i = 0; i < ARRAY_SIZE(qmp->debugfs_files); i++) {
+ entry = &qmp_debugfs_entries[i];
+
+ qmp->debugfs_files[i] = debugfs_create_file(entry->name, 0200,
+ qmp->debugfs_root,
+ qmp,
+ &qmp_debugfs_fops);
+ }
+}
+
static int qmp_probe(struct platform_device *pdev)
{
struct qmp *qmp;
@@ -523,6 +622,8 @@ static int qmp_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, qmp);
+ qmp_debugfs_create(qmp);
+
return 0;
err_close_qmp:
@@ -537,6 +638,8 @@ static void qmp_remove(struct platform_device *pdev)
{
struct qmp *qmp = platform_get_drvdata(pdev);
+ debugfs_remove_recursive(qmp->debugfs_root);
+
qmp_qdss_clk_remove(qmp);
qmp_cooling_devices_remove(qmp);
diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c
index 690afc9a12f4..7191fa0c087f 100644
--- a/drivers/soc/qcom/smem.c
+++ b/drivers/soc/qcom/smem.c
@@ -655,8 +655,6 @@ invalid_canary:
void *qcom_smem_get(unsigned host, unsigned item, size_t *size)
{
struct smem_partition *part;
- unsigned long flags;
- int ret;
void *ptr = ERR_PTR(-EPROBE_DEFER);
if (!__smem)
@@ -665,12 +663,6 @@ void *qcom_smem_get(unsigned host, unsigned item, size_t *size)
if (WARN_ON(item >= __smem->item_count))
return ERR_PTR(-EINVAL);
- ret = hwspin_lock_timeout_irqsave(__smem->hwlock,
- HWSPINLOCK_TIMEOUT,
- &flags);
- if (ret)
- return ERR_PTR(ret);
-
if (host < SMEM_HOST_COUNT && __smem->partitions[host].virt_base) {
part = &__smem->partitions[host];
ptr = qcom_smem_get_private(__smem, part, item, size);
@@ -681,10 +673,7 @@ void *qcom_smem_get(unsigned host, unsigned item, size_t *size)
ptr = qcom_smem_get_global(__smem, item, size);
}
- hwspin_unlock_irqrestore(__smem->hwlock, &flags);
-
return ptr;
-
}
EXPORT_SYMBOL_GPL(qcom_smem_get);
diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c
index 914b2246148f..a21241cbeec7 100644
--- a/drivers/soc/qcom/smp2p.c
+++ b/drivers/soc/qcom/smp2p.c
@@ -58,8 +58,8 @@
* @valid_entries: number of allocated entries
* @flags:
* @entries: individual communication entries
- * @name: name of the entry
- * @value: content of the entry
+ * @entries.name: name of the entry
+ * @entries.value: content of the entry
*/
struct smp2p_smem_item {
u32 magic;
@@ -275,6 +275,8 @@ static void qcom_smp2p_notify_in(struct qcom_smp2p *smp2p)
*
* Handle notifications from the remote side to handle newly allocated entries
* or any changes to the state bits of existing entries.
+ *
+ * Return: %IRQ_HANDLED
*/
static irqreturn_t qcom_smp2p_intr(int irq, void *data)
{
diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c
index 6349a0debeb5..e8ff9819ac47 100644
--- a/drivers/soc/qcom/socinfo.c
+++ b/drivers/soc/qcom/socinfo.c
@@ -124,7 +124,7 @@ static const char *const pmic_models[] = {
[50] = "PM8350B",
[51] = "PMR735A",
[52] = "PMR735B",
- [55] = "PM2250",
+ [55] = "PM4125",
[58] = "PM8450",
[65] = "PM8010",
[69] = "PM8550VS",
@@ -424,8 +424,11 @@ static const struct soc_id soc_id[] = {
{ qcom_board_id(IPQ9510) },
{ qcom_board_id(QRB4210) },
{ qcom_board_id(QRB2210) },
+ { qcom_board_id(SM8475) },
+ { qcom_board_id(SM8475P) },
{ qcom_board_id(SA8775P) },
{ qcom_board_id(QRU1000) },
+ { qcom_board_id(SM8475_2) },
{ qcom_board_id(QDU1000) },
{ qcom_board_id(SM8650) },
{ qcom_board_id(SM4450) },
@@ -437,6 +440,8 @@ static const struct soc_id soc_id[] = {
{ qcom_board_id(IPQ5322) },
{ qcom_board_id(IPQ5312) },
{ qcom_board_id(IPQ5302) },
+ { qcom_board_id(QCS8550) },
+ { qcom_board_id(QCM8550) },
{ qcom_board_id(IPQ5300) },
};
diff --git a/drivers/soc/qcom/spm.c b/drivers/soc/qcom/spm.c
index 2f0b1bfe7658..06e2c4c2a4a8 100644
--- a/drivers/soc/qcom/spm.c
+++ b/drivers/soc/qcom/spm.c
@@ -6,20 +6,40 @@
* SAW power controller driver
*/
-#include <linux/kernel.h>
+#include <linux/bitfield.h>
+#include <linux/err.h>
#include <linux/init.h>
#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/linear_range.h>
#include <linux/module.h>
-#include <linux/slab.h>
#include <linux/of.h>
-#include <linux/err.h>
#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+
+#include <linux/regulator/driver.h>
+
#include <soc/qcom/spm.h>
+#define FIELD_SET(current, mask, val) \
+ (((current) & ~(mask)) | FIELD_PREP((mask), (val)))
+
#define SPM_CTL_INDEX 0x7f
#define SPM_CTL_INDEX_SHIFT 4
#define SPM_CTL_EN BIT(0)
+/* These registers might be specific to SPM 1.1 */
+#define SPM_VCTL_VLVL GENMASK(7, 0)
+#define SPM_PMIC_DATA_0_VLVL GENMASK(7, 0)
+#define SPM_PMIC_DATA_1_MIN_VSEL GENMASK(5, 0)
+#define SPM_PMIC_DATA_1_MAX_VSEL GENMASK(21, 16)
+
+#define SPM_1_1_AVS_CTL_AVS_ENABLED BIT(27)
+#define SPM_AVS_CTL_MAX_VLVL GENMASK(22, 17)
+#define SPM_AVS_CTL_MIN_VLVL GENMASK(15, 10)
+
enum spm_reg {
SPM_REG_CFG,
SPM_REG_SPM_CTL,
@@ -29,13 +49,44 @@ enum spm_reg {
SPM_REG_PMIC_DATA_1,
SPM_REG_VCTL,
SPM_REG_SEQ_ENTRY,
- SPM_REG_SPM_STS,
+ SPM_REG_STS0,
+ SPM_REG_STS1,
SPM_REG_PMIC_STS,
SPM_REG_AVS_CTL,
SPM_REG_AVS_LIMIT,
+ SPM_REG_RST,
SPM_REG_NR,
};
+#define MAX_PMIC_DATA 2
+#define MAX_SEQ_DATA 64
+
+struct spm_reg_data {
+ const u16 *reg_offset;
+ u32 spm_cfg;
+ u32 spm_dly;
+ u32 pmic_dly;
+ u32 pmic_data[MAX_PMIC_DATA];
+ u32 avs_ctl;
+ u32 avs_limit;
+ u8 seq[MAX_SEQ_DATA];
+ u8 start_index[PM_SLEEP_MODE_NR];
+
+ smp_call_func_t set_vdd;
+ /* for now we support only a single range */
+ struct linear_range *range;
+ unsigned int ramp_delay;
+ unsigned int init_uV;
+};
+
+struct spm_driver_data {
+ void __iomem *reg_base;
+ const struct spm_reg_data *reg_data;
+ struct device *dev;
+ unsigned int volt_sel;
+ int reg_cpu;
+};
+
static const u16 spm_reg_offset_v4_1[SPM_REG_NR] = {
[SPM_REG_AVS_CTL] = 0x904,
[SPM_REG_AVS_LIMIT] = 0x908,
@@ -169,6 +220,10 @@ static const struct spm_reg_data spm_reg_8226_cpu = {
static const u16 spm_reg_offset_v1_1[SPM_REG_NR] = {
[SPM_REG_CFG] = 0x08,
+ [SPM_REG_STS0] = 0x0c,
+ [SPM_REG_STS1] = 0x10,
+ [SPM_REG_VCTL] = 0x14,
+ [SPM_REG_AVS_CTL] = 0x18,
[SPM_REG_SPM_CTL] = 0x20,
[SPM_REG_PMIC_DLY] = 0x24,
[SPM_REG_PMIC_DATA_0] = 0x28,
@@ -176,7 +231,12 @@ static const u16 spm_reg_offset_v1_1[SPM_REG_NR] = {
[SPM_REG_SEQ_ENTRY] = 0x80,
};
+static void smp_set_vdd_v1_1(void *data);
+
/* SPM register data for 8064 */
+static struct linear_range spm_v1_1_regulator_range =
+ REGULATOR_LINEAR_RANGE(700000, 0, 56, 12500);
+
static const struct spm_reg_data spm_reg_8064_cpu = {
.reg_offset = spm_reg_offset_v1_1,
.spm_cfg = 0x1F,
@@ -187,6 +247,10 @@ static const struct spm_reg_data spm_reg_8064_cpu = {
0x10, 0x54, 0x30, 0x0C, 0x24, 0x30, 0x0F },
.start_index[PM_SLEEP_MODE_STBY] = 0,
.start_index[PM_SLEEP_MODE_SPC] = 2,
+ .set_vdd = smp_set_vdd_v1_1,
+ .range = &spm_v1_1_regulator_range,
+ .init_uV = 1300000,
+ .ramp_delay = 1250,
};
static inline void spm_register_write(struct spm_driver_data *drv,
@@ -238,6 +302,178 @@ void spm_set_low_power_mode(struct spm_driver_data *drv,
spm_register_write_sync(drv, SPM_REG_SPM_CTL, ctl_val);
}
+static int spm_set_voltage_sel(struct regulator_dev *rdev, unsigned int selector)
+{
+ struct spm_driver_data *drv = rdev_get_drvdata(rdev);
+
+ drv->volt_sel = selector;
+
+ /* Always do the SAW register writes on the corresponding CPU */
+ return smp_call_function_single(drv->reg_cpu, drv->reg_data->set_vdd, drv, true);
+}
+
+static int spm_get_voltage_sel(struct regulator_dev *rdev)
+{
+ struct spm_driver_data *drv = rdev_get_drvdata(rdev);
+
+ return drv->volt_sel;
+}
+
+static const struct regulator_ops spm_reg_ops = {
+ .set_voltage_sel = spm_set_voltage_sel,
+ .get_voltage_sel = spm_get_voltage_sel,
+ .list_voltage = regulator_list_voltage_linear_range,
+ .set_voltage_time_sel = regulator_set_voltage_time_sel,
+};
+
+static void smp_set_vdd_v1_1(void *data)
+{
+ struct spm_driver_data *drv = data;
+ unsigned int vctl, data0, data1, avs_ctl, sts;
+ unsigned int vlevel, volt_sel;
+ bool avs_enabled;
+
+ volt_sel = drv->volt_sel;
+ vlevel = volt_sel | 0x80; /* band */
+
+ avs_ctl = spm_register_read(drv, SPM_REG_AVS_CTL);
+ vctl = spm_register_read(drv, SPM_REG_VCTL);
+ data0 = spm_register_read(drv, SPM_REG_PMIC_DATA_0);
+ data1 = spm_register_read(drv, SPM_REG_PMIC_DATA_1);
+
+ avs_enabled = avs_ctl & SPM_1_1_AVS_CTL_AVS_ENABLED;
+
+ /* If AVS is enabled, switch it off during the voltage change */
+ if (avs_enabled) {
+ avs_ctl &= ~SPM_1_1_AVS_CTL_AVS_ENABLED;
+ spm_register_write(drv, SPM_REG_AVS_CTL, avs_ctl);
+ }
+
+ /* Kick the state machine back to idle */
+ spm_register_write(drv, SPM_REG_RST, 1);
+
+ vctl = FIELD_SET(vctl, SPM_VCTL_VLVL, vlevel);
+ data0 = FIELD_SET(data0, SPM_PMIC_DATA_0_VLVL, vlevel);
+ data1 = FIELD_SET(data1, SPM_PMIC_DATA_1_MIN_VSEL, volt_sel);
+ data1 = FIELD_SET(data1, SPM_PMIC_DATA_1_MAX_VSEL, volt_sel);
+
+ spm_register_write(drv, SPM_REG_VCTL, vctl);
+ spm_register_write(drv, SPM_REG_PMIC_DATA_0, data0);
+ spm_register_write(drv, SPM_REG_PMIC_DATA_1, data1);
+
+ if (read_poll_timeout_atomic(spm_register_read,
+ sts, sts == vlevel,
+ 1, 200, false,
+ drv, SPM_REG_STS1)) {
+ dev_err_ratelimited(drv->dev, "timeout setting the voltage (%x %x)!\n", sts, vlevel);
+ goto enable_avs;
+ }
+
+ if (avs_enabled) {
+ unsigned int max_avs = volt_sel;
+ unsigned int min_avs = max(max_avs, 4U) - 4;
+
+ avs_ctl = FIELD_SET(avs_ctl, SPM_AVS_CTL_MIN_VLVL, min_avs);
+ avs_ctl = FIELD_SET(avs_ctl, SPM_AVS_CTL_MAX_VLVL, max_avs);
+ spm_register_write(drv, SPM_REG_AVS_CTL, avs_ctl);
+ }
+
+enable_avs:
+ if (avs_enabled) {
+ avs_ctl |= SPM_1_1_AVS_CTL_AVS_ENABLED;
+ spm_register_write(drv, SPM_REG_AVS_CTL, avs_ctl);
+ }
+}
+
+static int spm_get_cpu(struct device *dev)
+{
+ int cpu;
+ bool found;
+
+ for_each_possible_cpu(cpu) {
+ struct device_node *cpu_node, *saw_node;
+
+ cpu_node = of_cpu_device_node_get(cpu);
+ if (!cpu_node)
+ continue;
+
+ saw_node = of_parse_phandle(cpu_node, "qcom,saw", 0);
+ found = (saw_node == dev->of_node);
+ of_node_put(saw_node);
+ of_node_put(cpu_node);
+
+ if (found)
+ return cpu;
+ }
+
+ /* L2 SPM is not bound to any CPU, voltage setting is not supported */
+
+ return -EOPNOTSUPP;
+}
+
+static int spm_register_regulator(struct device *dev, struct spm_driver_data *drv)
+{
+ struct regulator_config config = {
+ .dev = dev,
+ .driver_data = drv,
+ };
+ struct regulator_desc *rdesc;
+ struct regulator_dev *rdev;
+ int ret;
+ bool found;
+
+ if (!drv->reg_data->set_vdd)
+ return 0;
+
+ rdesc = devm_kzalloc(dev, sizeof(*rdesc), GFP_KERNEL);
+ if (!rdesc)
+ return -ENOMEM;
+
+ rdesc->name = "spm";
+ rdesc->of_match = of_match_ptr("regulator");
+ rdesc->type = REGULATOR_VOLTAGE;
+ rdesc->owner = THIS_MODULE;
+ rdesc->ops = &spm_reg_ops;
+
+ rdesc->linear_ranges = drv->reg_data->range;
+ rdesc->n_linear_ranges = 1;
+ rdesc->n_voltages = rdesc->linear_ranges[rdesc->n_linear_ranges - 1].max_sel + 1;
+ rdesc->ramp_delay = drv->reg_data->ramp_delay;
+
+ ret = spm_get_cpu(dev);
+ if (ret < 0)
+ return ret;
+
+ drv->reg_cpu = ret;
+ dev_dbg(dev, "SAW2 bound to CPU %d\n", drv->reg_cpu);
+
+ /*
+ * Program initial voltage, otherwise registration will also try
+ * setting the voltage, which might result in undervolting the CPU.
+ */
+ drv->volt_sel = DIV_ROUND_UP(drv->reg_data->init_uV - rdesc->min_uV,
+ rdesc->uV_step);
+ ret = linear_range_get_selector_high(drv->reg_data->range,
+ drv->reg_data->init_uV,
+ &drv->volt_sel,
+ &found);
+ if (ret) {
+ dev_err(dev, "Initial uV value out of bounds\n");
+ return ret;
+ }
+
+ /* Always do the SAW register writes on the corresponding CPU */
+ smp_call_function_single(drv->reg_cpu, drv->reg_data->set_vdd, drv, true);
+
+ rdev = devm_regulator_register(dev, rdesc, &config);
+ if (IS_ERR(rdev)) {
+ dev_err(dev, "failed to register regulator\n");
+ return PTR_ERR(rdev);
+ }
+
+ return 0;
+}
+
static const struct of_device_id spm_match_table[] = {
{ .compatible = "qcom,sdm660-gold-saw2-v4.1-l2",
.data = &spm_reg_660_gold_l2 },
@@ -288,6 +524,7 @@ static int spm_dev_probe(struct platform_device *pdev)
return -ENODEV;
drv->reg_data = match_id->data;
+ drv->dev = &pdev->dev;
platform_set_drvdata(pdev, drv);
/* Write the SPM sequences first.. */
@@ -315,6 +552,9 @@ static int spm_dev_probe(struct platform_device *pdev)
if (drv->reg_data->reg_offset[SPM_REG_SPM_CTL])
spm_set_low_power_mode(drv, PM_SLEEP_MODE_STBY);
+ if (IS_ENABLED(CONFIG_REGULATOR))
+ return spm_register_regulator(&pdev->dev, drv);
+
return 0;
}
diff --git a/drivers/soc/qcom/trace-aoss.h b/drivers/soc/qcom/trace-aoss.h
new file mode 100644
index 000000000000..554029b33b44
--- /dev/null
+++ b/drivers/soc/qcom/trace-aoss.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM qcom_aoss
+
+#if !defined(_TRACE_QCOM_AOSS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_QCOM_AOSS_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(aoss_send,
+ TP_PROTO(const char *msg),
+ TP_ARGS(msg),
+ TP_STRUCT__entry(
+ __string(msg, msg)
+ ),
+ TP_fast_assign(
+ __assign_str(msg, msg);
+ ),
+ TP_printk("%s", __get_str(msg))
+);
+
+TRACE_EVENT(aoss_send_done,
+ TP_PROTO(const char *msg, int ret),
+ TP_ARGS(msg, ret),
+ TP_STRUCT__entry(
+ __string(msg, msg)
+ __field(int, ret)
+ ),
+ TP_fast_assign(
+ __assign_str(msg, msg);
+ __entry->ret = ret;
+ ),
+ TP_printk("%s: %d", __get_str(msg), __entry->ret)
+);
+
+#endif /* _TRACE_QCOM_AOSS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace-aoss
+
+#include <trace/define_trace.h>
diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig
index 0986672f6375..5deca747fb77 100644
--- a/drivers/soc/renesas/Kconfig
+++ b/drivers/soc/renesas/Kconfig
@@ -34,6 +34,10 @@ config ARCH_RCAR_GEN3
select SYS_SUPPORTS_SH_CMT
select SYS_SUPPORTS_SH_TMU
+config ARCH_RCAR_GEN4
+ bool
+ select ARCH_RCAR_GEN3
+
config ARCH_RMOBILE
bool
select PM
@@ -240,7 +244,7 @@ config ARCH_R8A77961
config ARCH_R8A779F0
bool "ARM64 Platform support for R-Car S4-8"
- select ARCH_RCAR_GEN3
+ select ARCH_RCAR_GEN4
select SYSC_R8A779F0
help
This enables support for the Renesas R-Car S4-8 SoC.
@@ -261,18 +265,25 @@ config ARCH_R8A77970
config ARCH_R8A779A0
bool "ARM64 Platform support for R-Car V3U"
- select ARCH_RCAR_GEN3
+ select ARCH_RCAR_GEN4
select SYSC_R8A779A0
help
This enables support for the Renesas R-Car V3U SoC.
config ARCH_R8A779G0
bool "ARM64 Platform support for R-Car V4H"
- select ARCH_RCAR_GEN3
+ select ARCH_RCAR_GEN4
select SYSC_R8A779G0
help
This enables support for the Renesas R-Car V4H SoC.
+config ARCH_R8A779H0
+ bool "ARM64 Platform support for R-Car V4M"
+ select ARCH_RCAR_GEN4
+ select SYSC_R8A779H0
+ help
+ This enables support for the Renesas R-Car V4M SoC.
+
config ARCH_R8A774C0
bool "ARM64 Platform support for RZ/G2E"
select ARCH_RCAR_GEN3
diff --git a/drivers/soc/renesas/rcar-rst.c b/drivers/soc/renesas/rcar-rst.c
index 98fd97da6cd4..7ba02f3a4a4f 100644
--- a/drivers/soc/renesas/rcar-rst.c
+++ b/drivers/soc/renesas/rcar-rst.c
@@ -117,6 +117,7 @@ static const struct of_device_id rcar_rst_matches[] __initconst = {
{ .compatible = "renesas,r8a779a0-rst", .data = &rcar_rst_v3u },
{ .compatible = "renesas,r8a779f0-rst", .data = &rcar_rst_gen4 },
{ .compatible = "renesas,r8a779g0-rst", .data = &rcar_rst_gen4 },
+ { .compatible = "renesas,r8a779h0-rst", .data = &rcar_rst_gen4 },
{ /* sentinel */ }
};
diff --git a/drivers/soc/renesas/renesas-soc.c b/drivers/soc/renesas/renesas-soc.c
index 27eae1a354ab..8f9b8d3736dc 100644
--- a/drivers/soc/renesas/renesas-soc.c
+++ b/drivers/soc/renesas/renesas-soc.c
@@ -270,6 +270,11 @@ static const struct renesas_soc soc_rcar_v4h __initconst __maybe_unused = {
.id = 0x5c,
};
+static const struct renesas_soc soc_rcar_v4m __initconst __maybe_unused = {
+ .family = &fam_rcar_gen4,
+ .id = 0x5d,
+};
+
static const struct renesas_soc soc_shmobile_ag5 __initconst __maybe_unused = {
.family = &fam_shmobile,
.id = 0x37,
@@ -380,6 +385,9 @@ static const struct of_device_id renesas_socs[] __initconst __maybe_unused = {
#ifdef CONFIG_ARCH_R8A779G0
{ .compatible = "renesas,r8a779g0", .data = &soc_rcar_v4h },
#endif
+#ifdef CONFIG_ARCH_R8A779H0
+ { .compatible = "renesas,r8a779h0", .data = &soc_rcar_v4m },
+#endif
#ifdef CONFIG_ARCH_R9A07G043
#ifdef CONFIG_RISCV
{ .compatible = "renesas,r9a07g043", .data = &soc_rz_five },
diff --git a/drivers/soc/samsung/Kconfig b/drivers/soc/samsung/Kconfig
index 27ec99af77e3..1a5dfdc978dc 100644
--- a/drivers/soc/samsung/Kconfig
+++ b/drivers/soc/samsung/Kconfig
@@ -42,6 +42,7 @@ config EXYNOS_PMU
depends on ARCH_EXYNOS || ((ARM || ARM64) && COMPILE_TEST)
select EXYNOS_PMU_ARM_DRIVERS if ARM && ARCH_EXYNOS
select MFD_CORE
+ select REGMAP_MMIO
# There is no need to enable these drivers for ARMv8
config EXYNOS_PMU_ARM_DRIVERS
diff --git a/drivers/soc/samsung/exynos-pmu.c b/drivers/soc/samsung/exynos-pmu.c
index 250537d7cfd6..fd8b6ac06656 100644
--- a/drivers/soc/samsung/exynos-pmu.c
+++ b/drivers/soc/samsung/exynos-pmu.c
@@ -5,6 +5,7 @@
//
// Exynos - CPU PMU(Power Management Unit) support
+#include <linux/arm-smccc.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/mfd/core.h>
@@ -12,19 +13,134 @@
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/delay.h>
+#include <linux/regmap.h>
#include <linux/soc/samsung/exynos-regs-pmu.h>
#include <linux/soc/samsung/exynos-pmu.h>
#include "exynos-pmu.h"
+#define PMUALIVE_MASK GENMASK(13, 0)
+#define TENSOR_SET_BITS (BIT(15) | BIT(14))
+#define TENSOR_CLR_BITS BIT(15)
+#define TENSOR_SMC_PMU_SEC_REG 0x82000504
+#define TENSOR_PMUREG_READ 0
+#define TENSOR_PMUREG_WRITE 1
+#define TENSOR_PMUREG_RMW 2
+
struct exynos_pmu_context {
struct device *dev;
const struct exynos_pmu_data *pmu_data;
+ struct regmap *pmureg;
};
void __iomem *pmu_base_addr;
static struct exynos_pmu_context *pmu_context;
+/* forward declaration */
+static struct platform_driver exynos_pmu_driver;
+
+/*
+ * Tensor SoCs are configured so that PMU_ALIVE registers can only be written
+ * from EL3, but are still read accessible. As Linux needs to write some of
+ * these registers, the following functions are provided and exposed via
+ * regmap.
+ *
+ * Note: This SMC interface is known to be implemented on gs101 and derivative
+ * SoCs.
+ */
+
+/* Write to a protected PMU register. */
+static int tensor_sec_reg_write(void *context, unsigned int reg,
+ unsigned int val)
+{
+ struct arm_smccc_res res;
+ unsigned long pmu_base = (unsigned long)context;
+
+ arm_smccc_smc(TENSOR_SMC_PMU_SEC_REG, pmu_base + reg,
+ TENSOR_PMUREG_WRITE, val, 0, 0, 0, 0, &res);
+
+ /* returns -EINVAL if access isn't allowed or 0 */
+ if (res.a0)
+ pr_warn("%s(): SMC failed: %d\n", __func__, (int)res.a0);
+
+ return (int)res.a0;
+}
+
+/* Read/Modify/Write a protected PMU register. */
+static int tensor_sec_reg_rmw(void *context, unsigned int reg,
+ unsigned int mask, unsigned int val)
+{
+ struct arm_smccc_res res;
+ unsigned long pmu_base = (unsigned long)context;
+
+ arm_smccc_smc(TENSOR_SMC_PMU_SEC_REG, pmu_base + reg,
+ TENSOR_PMUREG_RMW, mask, val, 0, 0, 0, &res);
+
+ /* returns -EINVAL if access isn't allowed or 0 */
+ if (res.a0)
+ pr_warn("%s(): SMC failed: %d\n", __func__, (int)res.a0);
+
+ return (int)res.a0;
+}
+
+/*
+ * Read a protected PMU register. All PMU registers can be read by Linux.
+ * Note: The SMC read register is not used, as only registers that can be
+ * written are readable via SMC.
+ */
+static int tensor_sec_reg_read(void *context, unsigned int reg,
+ unsigned int *val)
+{
+ *val = pmu_raw_readl(reg);
+ return 0;
+}
+
+/*
+ * For SoCs that have set/clear bit hardware this function can be used when
+ * the PMU register will be accessed by multiple masters.
+ *
+ * For example, to set bits 13:8 in PMU reg offset 0x3e80
+ * tensor_set_bits_atomic(ctx, 0x3e80, 0x3f00, 0x3f00);
+ *
+ * Set bit 8, and clear bits 13:9 PMU reg offset 0x3e80
+ * tensor_set_bits_atomic(0x3e80, 0x100, 0x3f00);
+ */
+static int tensor_set_bits_atomic(void *ctx, unsigned int offset, u32 val,
+ u32 mask)
+{
+ int ret;
+ unsigned int i;
+
+ for (i = 0; i < 32; i++) {
+ if (!(mask & BIT(i)))
+ continue;
+
+ offset &= ~TENSOR_SET_BITS;
+
+ if (val & BIT(i))
+ offset |= TENSOR_SET_BITS;
+ else
+ offset |= TENSOR_CLR_BITS;
+
+ ret = tensor_sec_reg_write(ctx, offset, i);
+ if (ret)
+ return ret;
+ }
+ return ret;
+}
+
+static int tensor_sec_update_bits(void *ctx, unsigned int reg,
+ unsigned int mask, unsigned int val)
+{
+ /*
+ * Use atomic operations for PMU_ALIVE registers (offset 0~0x3FFF)
+ * as the target registers can be accessed by multiple masters.
+ */
+ if (reg > PMUALIVE_MASK)
+ return tensor_sec_reg_rmw(ctx, reg, mask, val);
+
+ return tensor_set_bits_atomic(ctx, reg, val, mask);
+}
void pmu_raw_writel(u32 val, u32 offset)
{
@@ -75,11 +191,41 @@ void exynos_sys_powerdown_conf(enum sys_powerdown mode)
#define exynos_pmu_data_arm_ptr(data) NULL
#endif
+static const struct regmap_config regmap_smccfg = {
+ .name = "pmu_regs",
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .fast_io = true,
+ .use_single_read = true,
+ .use_single_write = true,
+ .reg_read = tensor_sec_reg_read,
+ .reg_write = tensor_sec_reg_write,
+ .reg_update_bits = tensor_sec_update_bits,
+};
+
+static const struct regmap_config regmap_mmiocfg = {
+ .name = "pmu_regs",
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .fast_io = true,
+ .use_single_read = true,
+ .use_single_write = true,
+};
+
+static const struct exynos_pmu_data gs101_pmu_data = {
+ .pmu_secure = true
+};
+
/*
* PMU platform driver and devicetree bindings.
*/
static const struct of_device_id exynos_pmu_of_device_ids[] = {
{
+ .compatible = "google,gs101-pmu",
+ .data = &gs101_pmu_data,
+ }, {
.compatible = "samsung,exynos3250-pmu",
.data = exynos_pmu_data_arm_ptr(exynos3250_pmu_data),
}, {
@@ -113,19 +259,75 @@ static const struct mfd_cell exynos_pmu_devs[] = {
{ .name = "exynos-clkout", },
};
+/**
+ * exynos_get_pmu_regmap() - Obtain pmureg regmap
+ *
+ * Find the pmureg regmap previously configured in probe() and return regmap
+ * pointer.
+ *
+ * Return: A pointer to regmap if found or ERR_PTR error value.
+ */
struct regmap *exynos_get_pmu_regmap(void)
{
struct device_node *np = of_find_matching_node(NULL,
exynos_pmu_of_device_ids);
if (np)
- return syscon_node_to_regmap(np);
+ return exynos_get_pmu_regmap_by_phandle(np, NULL);
return ERR_PTR(-ENODEV);
}
EXPORT_SYMBOL_GPL(exynos_get_pmu_regmap);
+/**
+ * exynos_get_pmu_regmap_by_phandle() - Obtain pmureg regmap via phandle
+ * @np: Device node holding PMU phandle property
+ * @propname: Name of property holding phandle value
+ *
+ * Find the pmureg regmap previously configured in probe() and return regmap
+ * pointer.
+ *
+ * Return: A pointer to regmap if found or ERR_PTR error value.
+ */
+struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np,
+ const char *propname)
+{
+ struct exynos_pmu_context *ctx;
+ struct device_node *pmu_np;
+ struct device *dev;
+
+ if (propname)
+ pmu_np = of_parse_phandle(np, propname, 0);
+ else
+ pmu_np = np;
+
+ if (!pmu_np)
+ return ERR_PTR(-ENODEV);
+
+ /*
+ * Determine if exynos-pmu device has probed and therefore regmap
+ * has been created and can be returned to the caller. Otherwise we
+ * return -EPROBE_DEFER.
+ */
+ dev = driver_find_device_by_of_node(&exynos_pmu_driver.driver,
+ (void *)pmu_np);
+
+ if (propname)
+ of_node_put(pmu_np);
+
+ if (!dev)
+ return ERR_PTR(-EPROBE_DEFER);
+
+ ctx = dev_get_drvdata(dev);
+
+ return ctx->pmureg;
+}
+EXPORT_SYMBOL_GPL(exynos_get_pmu_regmap_by_phandle);
+
static int exynos_pmu_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
+ struct regmap_config pmu_regmcfg;
+ struct regmap *regmap;
+ struct resource *res;
int ret;
pmu_base_addr = devm_platform_ioremap_resource(pdev, 0);
@@ -137,9 +339,38 @@ static int exynos_pmu_probe(struct platform_device *pdev)
GFP_KERNEL);
if (!pmu_context)
return -ENOMEM;
- pmu_context->dev = dev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
pmu_context->pmu_data = of_device_get_match_data(dev);
+ /* For SoCs that secure PMU register writes use custom regmap */
+ if (pmu_context->pmu_data && pmu_context->pmu_data->pmu_secure) {
+ pmu_regmcfg = regmap_smccfg;
+ pmu_regmcfg.max_register = resource_size(res) -
+ pmu_regmcfg.reg_stride;
+ /* Need physical address for SMC call */
+ regmap = devm_regmap_init(dev, NULL,
+ (void *)(uintptr_t)res->start,
+ &pmu_regmcfg);
+ } else {
+ /* All other SoCs use a MMIO regmap */
+ pmu_regmcfg = regmap_mmiocfg;
+ pmu_regmcfg.max_register = resource_size(res) -
+ pmu_regmcfg.reg_stride;
+ regmap = devm_regmap_init_mmio(dev, pmu_base_addr,
+ &pmu_regmcfg);
+ }
+
+ if (IS_ERR(regmap))
+ return dev_err_probe(&pdev->dev, PTR_ERR(regmap),
+ "regmap init failed\n");
+
+ pmu_context->pmureg = regmap;
+ pmu_context->dev = dev;
+
if (pmu_context->pmu_data && pmu_context->pmu_data->pmu_init)
pmu_context->pmu_data->pmu_init();
diff --git a/drivers/soc/samsung/exynos-pmu.h b/drivers/soc/samsung/exynos-pmu.h
index 1c652ffd79b4..0a49a2c9a08e 100644
--- a/drivers/soc/samsung/exynos-pmu.h
+++ b/drivers/soc/samsung/exynos-pmu.h
@@ -21,6 +21,7 @@ struct exynos_pmu_conf {
struct exynos_pmu_data {
const struct exynos_pmu_conf *pmu_config;
const struct exynos_pmu_conf *pmu_config_extra;
+ bool pmu_secure;
void (*pmu_init)(void);
void (*powerdown_conf)(enum sys_powerdown);
diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
index f16beeabaa92..33512558af9f 100644
--- a/drivers/soc/tegra/Kconfig
+++ b/drivers/soc/tegra/Kconfig
@@ -133,6 +133,11 @@ config ARCH_TEGRA_234_SOC
help
Enable support for the NVIDIA Tegra234 SoC.
+config ARCH_TEGRA_241_SOC
+ bool "NVIDIA Tegra241 SoC"
+ help
+ Enable support for the NVIDIA Tegra241 SoC.
+
endif
endif
diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c
index a2c28f493a75..b6bfd6729df3 100644
--- a/drivers/soc/tegra/fuse/fuse-tegra.c
+++ b/drivers/soc/tegra/fuse/fuse-tegra.c
@@ -3,11 +3,13 @@
* Copyright (c) 2013-2023, NVIDIA CORPORATION. All rights reserved.
*/
+#include <linux/acpi.h>
#include <linux/clk.h>
#include <linux/device.h>
#include <linux/kobject.h>
#include <linux/init.h>
#include <linux/io.h>
+#include <linux/mod_devicetable.h>
#include <linux/nvmem-consumer.h>
#include <linux/nvmem-provider.h>
#include <linux/of.h>
@@ -113,6 +115,28 @@ static void tegra_fuse_restore(void *base)
fuse->clk = NULL;
}
+static void tegra_fuse_print_sku_info(struct tegra_sku_info *tegra_sku_info)
+{
+ pr_info("Tegra Revision: %s SKU: %d CPU Process: %d SoC Process: %d\n",
+ tegra_revision_name[tegra_sku_info->revision],
+ tegra_sku_info->sku_id, tegra_sku_info->cpu_process_id,
+ tegra_sku_info->soc_process_id);
+ pr_debug("Tegra CPU Speedo ID %d, SoC Speedo ID %d\n",
+ tegra_sku_info->cpu_speedo_id, tegra_sku_info->soc_speedo_id);
+}
+
+static int tegra_fuse_add_lookups(struct tegra_fuse *fuse)
+{
+ fuse->lookups = kmemdup_array(fuse->soc->lookups, sizeof(*fuse->lookups),
+ fuse->soc->num_lookups, GFP_KERNEL);
+ if (!fuse->lookups)
+ return -ENOMEM;
+
+ nvmem_add_cell_lookups(fuse->lookups, fuse->soc->num_lookups);
+
+ return 0;
+}
+
static int tegra_fuse_probe(struct platform_device *pdev)
{
void __iomem *base = fuse->base;
@@ -130,15 +154,46 @@ static int tegra_fuse_probe(struct platform_device *pdev)
return PTR_ERR(fuse->base);
fuse->phys = res->start;
- fuse->clk = devm_clk_get(&pdev->dev, "fuse");
- if (IS_ERR(fuse->clk)) {
- if (PTR_ERR(fuse->clk) != -EPROBE_DEFER)
- dev_err(&pdev->dev, "failed to get FUSE clock: %ld",
- PTR_ERR(fuse->clk));
+ /* Initialize the soc data and lookups if using ACPI boot. */
+ if (is_acpi_node(dev_fwnode(&pdev->dev)) && !fuse->soc) {
+ u8 chip;
- return PTR_ERR(fuse->clk);
+ tegra_acpi_init_apbmisc();
+
+ chip = tegra_get_chip_id();
+ switch (chip) {
+#if defined(CONFIG_ARCH_TEGRA_194_SOC)
+ case TEGRA194:
+ fuse->soc = &tegra194_fuse_soc;
+ break;
+#endif
+#if defined(CONFIG_ARCH_TEGRA_234_SOC)
+ case TEGRA234:
+ fuse->soc = &tegra234_fuse_soc;
+ break;
+#endif
+#if defined(CONFIG_ARCH_TEGRA_241_SOC)
+ case TEGRA241:
+ fuse->soc = &tegra241_fuse_soc;
+ break;
+#endif
+ default:
+ return dev_err_probe(&pdev->dev, -EINVAL, "Unsupported SoC: %02x\n", chip);
+ }
+
+ fuse->soc->init(fuse);
+ tegra_fuse_print_sku_info(&tegra_sku_info);
+ tegra_soc_device_register();
+
+ err = tegra_fuse_add_lookups(fuse);
+ if (err)
+ return dev_err_probe(&pdev->dev, err, "failed to add FUSE lookups\n");
}
+ fuse->clk = devm_clk_get_optional(&pdev->dev, "fuse");
+ if (IS_ERR(fuse->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(fuse->clk), "failed to get FUSE clock\n");
+
platform_set_drvdata(pdev, fuse);
fuse->dev = &pdev->dev;
@@ -179,12 +234,8 @@ static int tegra_fuse_probe(struct platform_device *pdev)
}
fuse->rst = devm_reset_control_get_optional(&pdev->dev, "fuse");
- if (IS_ERR(fuse->rst)) {
- err = PTR_ERR(fuse->rst);
- dev_err(&pdev->dev, "failed to get FUSE reset: %pe\n",
- fuse->rst);
- return err;
- }
+ if (IS_ERR(fuse->rst))
+ return dev_err_probe(&pdev->dev, PTR_ERR(fuse->rst), "failed to get FUSE reset\n");
/*
* FUSE clock is enabled at a boot time, hence this resume/suspend
@@ -262,10 +313,17 @@ static const struct dev_pm_ops tegra_fuse_pm = {
SET_SYSTEM_SLEEP_PM_OPS(tegra_fuse_suspend, tegra_fuse_resume)
};
+static const struct acpi_device_id tegra_fuse_acpi_match[] = {
+ { "NVDA200F" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(acpi, tegra_fuse_acpi_match);
+
static struct platform_driver tegra_fuse_driver = {
.driver = {
.name = "tegra-fuse",
.of_match_table = tegra_fuse_match,
+ .acpi_match_table = tegra_fuse_acpi_match,
.pm = &tegra_fuse_pm,
.suppress_bind_attrs = true,
},
@@ -287,7 +345,16 @@ u32 __init tegra_fuse_read_early(unsigned int offset)
int tegra_fuse_readl(unsigned long offset, u32 *value)
{
- if (!fuse->read || !fuse->clk)
+ if (!fuse->dev)
+ return -EPROBE_DEFER;
+
+ /*
+ * Wait for fuse->clk to be initialized if device-tree boot is used.
+ */
+ if (is_of_node(dev_fwnode(fuse->dev)) && !fuse->clk)
+ return -EPROBE_DEFER;
+
+ if (!fuse->read)
return -EPROBE_DEFER;
if (IS_ERR(fuse->clk))
@@ -343,7 +410,8 @@ const struct attribute_group tegra_soc_attr_group = {
};
#if IS_ENABLED(CONFIG_ARCH_TEGRA_194_SOC) || \
- IS_ENABLED(CONFIG_ARCH_TEGRA_234_SOC)
+ IS_ENABLED(CONFIG_ARCH_TEGRA_234_SOC) || \
+ IS_ENABLED(CONFIG_ARCH_TEGRA_241_SOC)
static ssize_t platform_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -370,7 +438,7 @@ const struct attribute_group tegra194_soc_attr_group = {
};
#endif
-struct device * __init tegra_soc_device_register(void)
+struct device *tegra_soc_device_register(void)
{
struct soc_device_attribute *attr;
struct soc_device *dev;
@@ -407,6 +475,7 @@ static int __init tegra_init_fuse(void)
const struct of_device_id *match;
struct device_node *np;
struct resource regs;
+ int err;
tegra_init_apbmisc();
@@ -497,22 +566,13 @@ static int __init tegra_init_fuse(void)
fuse->soc->init(fuse);
- pr_info("Tegra Revision: %s SKU: %d CPU Process: %d SoC Process: %d\n",
- tegra_revision_name[tegra_sku_info.revision],
- tegra_sku_info.sku_id, tegra_sku_info.cpu_process_id,
- tegra_sku_info.soc_process_id);
- pr_debug("Tegra CPU Speedo ID %d, SoC Speedo ID %d\n",
- tegra_sku_info.cpu_speedo_id, tegra_sku_info.soc_speedo_id);
+ tegra_fuse_print_sku_info(&tegra_sku_info);
- if (fuse->soc->lookups) {
- size_t size = sizeof(*fuse->lookups) * fuse->soc->num_lookups;
-
- fuse->lookups = kmemdup(fuse->soc->lookups, size, GFP_KERNEL);
- if (fuse->lookups)
- nvmem_add_cell_lookups(fuse->lookups, fuse->soc->num_lookups);
- }
+ err = tegra_fuse_add_lookups(fuse);
+ if (err)
+ pr_err("failed to add FUSE lookups\n");
- return 0;
+ return err;
}
early_initcall(tegra_init_fuse);
diff --git a/drivers/soc/tegra/fuse/fuse-tegra30.c b/drivers/soc/tegra/fuse/fuse-tegra30.c
index e94d46372a63..eb14e5ff5a0a 100644
--- a/drivers/soc/tegra/fuse/fuse-tegra30.c
+++ b/drivers/soc/tegra/fuse/fuse-tegra30.c
@@ -38,7 +38,8 @@
defined(CONFIG_ARCH_TEGRA_210_SOC) || \
defined(CONFIG_ARCH_TEGRA_186_SOC) || \
defined(CONFIG_ARCH_TEGRA_194_SOC) || \
- defined(CONFIG_ARCH_TEGRA_234_SOC)
+ defined(CONFIG_ARCH_TEGRA_234_SOC) || \
+ defined(CONFIG_ARCH_TEGRA_241_SOC)
static u32 tegra30_fuse_read_early(struct tegra_fuse *fuse, unsigned int offset)
{
if (WARN_ON(!fuse->base))
@@ -678,3 +679,23 @@ const struct tegra_fuse_soc tegra234_fuse_soc = {
.clk_suspend_on = false,
};
#endif
+
+#if defined(CONFIG_ARCH_TEGRA_241_SOC)
+static const struct tegra_fuse_info tegra241_fuse_info = {
+ .read = tegra30_fuse_read,
+ .size = 0x16008,
+ .spare = 0xcf0,
+};
+
+static const struct nvmem_keepout tegra241_fuse_keepouts[] = {
+ { .start = 0xc, .end = 0x1600c }
+};
+
+const struct tegra_fuse_soc tegra241_fuse_soc = {
+ .init = tegra30_fuse_init,
+ .info = &tegra241_fuse_info,
+ .keepouts = tegra241_fuse_keepouts,
+ .num_keepouts = ARRAY_SIZE(tegra241_fuse_keepouts),
+ .soc_attr_group = &tegra194_soc_attr_group,
+};
+#endif
diff --git a/drivers/soc/tegra/fuse/fuse.h b/drivers/soc/tegra/fuse/fuse.h
index 90f23be73894..9fee6ad6ad9e 100644
--- a/drivers/soc/tegra/fuse/fuse.h
+++ b/drivers/soc/tegra/fuse/fuse.h
@@ -69,6 +69,7 @@ struct tegra_fuse {
void tegra_init_revision(void);
void tegra_init_apbmisc(void);
+void tegra_acpi_init_apbmisc(void);
u32 __init tegra_fuse_read_spare(unsigned int spare);
u32 __init tegra_fuse_read_early(unsigned int offset);
@@ -123,7 +124,8 @@ extern const struct tegra_fuse_soc tegra186_fuse_soc;
#endif
#if IS_ENABLED(CONFIG_ARCH_TEGRA_194_SOC) || \
- IS_ENABLED(CONFIG_ARCH_TEGRA_234_SOC)
+ IS_ENABLED(CONFIG_ARCH_TEGRA_234_SOC) || \
+ IS_ENABLED(CONFIG_ARCH_TEGRA_241_SOC)
extern const struct attribute_group tegra194_soc_attr_group;
#endif
@@ -135,4 +137,8 @@ extern const struct tegra_fuse_soc tegra194_fuse_soc;
extern const struct tegra_fuse_soc tegra234_fuse_soc;
#endif
+#ifdef CONFIG_ARCH_TEGRA_241_SOC
+extern const struct tegra_fuse_soc tegra241_fuse_soc;
+#endif
+
#endif
diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c
index da970f3dbf35..e2ca5d55fd31 100644
--- a/drivers/soc/tegra/fuse/tegra-apbmisc.c
+++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c
@@ -3,9 +3,11 @@
* Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
*/
+#include <linux/acpi.h>
#include <linux/export.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -62,6 +64,7 @@ bool tegra_is_silicon(void)
switch (tegra_get_chip_id()) {
case TEGRA194:
case TEGRA234:
+ case TEGRA241:
case TEGRA264:
if (tegra_get_platform() == 0)
return true;
@@ -160,9 +163,34 @@ void __init tegra_init_revision(void)
tegra_sku_info.platform = tegra_get_platform();
}
-void __init tegra_init_apbmisc(void)
+static void tegra_init_apbmisc_resources(struct resource *apbmisc,
+ struct resource *straps)
{
void __iomem *strapping_base;
+
+ apbmisc_base = ioremap(apbmisc->start, resource_size(apbmisc));
+ if (apbmisc_base)
+ chipid = readl_relaxed(apbmisc_base + 4);
+ else
+ pr_err("failed to map APBMISC registers\n");
+
+ strapping_base = ioremap(straps->start, resource_size(straps));
+ if (strapping_base) {
+ strapping = readl_relaxed(strapping_base);
+ iounmap(strapping_base);
+ } else {
+ pr_err("failed to map strapping options registers\n");
+ }
+}
+
+/**
+ * tegra_init_apbmisc - Initializes Tegra APBMISC and Strapping registers.
+ *
+ * This is called during early init as some of the old 32-bit ARM code needs
+ * information from the APBMISC registers very early during boot.
+ */
+void __init tegra_init_apbmisc(void)
+{
struct resource apbmisc, straps;
struct device_node *np;
@@ -219,23 +247,73 @@ void __init tegra_init_apbmisc(void)
}
}
- apbmisc_base = ioremap(apbmisc.start, resource_size(&apbmisc));
- if (!apbmisc_base) {
- pr_err("failed to map APBMISC registers\n");
- } else {
- chipid = readl_relaxed(apbmisc_base + 4);
+ tegra_init_apbmisc_resources(&apbmisc, &straps);
+ long_ram_code = of_property_read_bool(np, "nvidia,long-ram-code");
+
+put:
+ of_node_put(np);
+}
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id apbmisc_acpi_match[] = {
+ { "NVDA2010" },
+ { /* sentinel */ }
+};
+
+void tegra_acpi_init_apbmisc(void)
+{
+ struct resource *resources[2] = { NULL };
+ struct resource_entry *rentry;
+ struct acpi_device *adev = NULL;
+ struct list_head resource_list;
+ int rcount = 0;
+ int ret;
+
+ adev = acpi_dev_get_first_match_dev(apbmisc_acpi_match[0].id, NULL, -1);
+ if (!adev)
+ return;
+
+ INIT_LIST_HEAD(&resource_list);
+
+ ret = acpi_dev_get_memory_resources(adev, &resource_list);
+ if (ret < 0) {
+ pr_err("failed to get APBMISC memory resources");
+ goto out_put_acpi_dev;
}
- strapping_base = ioremap(straps.start, resource_size(&straps));
- if (!strapping_base) {
- pr_err("failed to map strapping options registers\n");
- } else {
- strapping = readl_relaxed(strapping_base);
- iounmap(strapping_base);
+ /*
+ * Get required memory resources.
+ *
+ * resources[0]: apbmisc.
+ * resources[1]: straps.
+ */
+ resource_list_for_each_entry(rentry, &resource_list) {
+ if (rcount >= ARRAY_SIZE(resources))
+ break;
+
+ resources[rcount++] = rentry->res;
}
- long_ram_code = of_property_read_bool(np, "nvidia,long-ram-code");
+ if (!resources[0]) {
+ pr_err("failed to get APBMISC registers\n");
+ goto out_free_resource_list;
+ }
-put:
- of_node_put(np);
+ if (!resources[1]) {
+ pr_err("failed to get strapping options registers\n");
+ goto out_free_resource_list;
+ }
+
+ tegra_init_apbmisc_resources(resources[0], resources[1]);
+
+out_free_resource_list:
+ acpi_dev_free_resource_list(&resource_list);
+
+out_put_acpi_dev:
+ acpi_dev_put(adev);
+}
+#else
+void tegra_acpi_init_apbmisc(void)
+{
}
+#endif
diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
index f432aa022ace..d6bfcea5ee65 100644
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -3,7 +3,7 @@
* drivers/soc/tegra/pmc.c
*
* Copyright (c) 2010 Google, Inc
- * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
*
* Author:
* Colin Cross <ccross@google.com>
@@ -384,6 +384,7 @@ struct tegra_pmc_soc {
bool has_blink_output;
bool has_usb_sleepwalk;
bool supports_core_domain;
+ bool has_single_mmio_aperture;
};
/**
@@ -1777,30 +1778,6 @@ static int tegra_io_pad_get_voltage(struct tegra_pmc *pmc, enum tegra_io_pad id)
return TEGRA_IO_PAD_VOLTAGE_3V3;
}
-/**
- * tegra_io_rail_power_on() - enable power to I/O rail
- * @id: Tegra I/O pad ID for which to enable power
- *
- * See also: tegra_io_pad_power_enable()
- */
-int tegra_io_rail_power_on(unsigned int id)
-{
- return tegra_io_pad_power_enable(id);
-}
-EXPORT_SYMBOL(tegra_io_rail_power_on);
-
-/**
- * tegra_io_rail_power_off() - disable power to I/O rail
- * @id: Tegra I/O pad ID for which to disable power
- *
- * See also: tegra_io_pad_power_disable()
- */
-int tegra_io_rail_power_off(unsigned int id)
-{
- return tegra_io_pad_power_disable(id);
-}
-EXPORT_SYMBOL(tegra_io_rail_power_off);
-
#ifdef CONFIG_PM_SLEEP
enum tegra_suspend_mode tegra_pmc_get_suspend_mode(void)
{
@@ -2909,31 +2886,33 @@ static int tegra_pmc_probe(struct platform_device *pdev)
if (IS_ERR(base))
return PTR_ERR(base);
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "wake");
- if (res) {
+ if (pmc->soc->has_single_mmio_aperture) {
+ pmc->wake = base;
+ pmc->aotag = base;
+ pmc->scratch = base;
+ } else {
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+ "wake");
pmc->wake = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(pmc->wake))
return PTR_ERR(pmc->wake);
- } else {
- pmc->wake = base;
- }
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "aotag");
- if (res) {
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+ "aotag");
pmc->aotag = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(pmc->aotag))
return PTR_ERR(pmc->aotag);
- } else {
- pmc->aotag = base;
- }
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "scratch");
- if (res) {
- pmc->scratch = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(pmc->scratch))
- return PTR_ERR(pmc->scratch);
- } else {
- pmc->scratch = base;
+ /* "scratch" is an optional aperture */
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+ "scratch");
+ if (res) {
+ pmc->scratch = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(pmc->scratch))
+ return PTR_ERR(pmc->scratch);
+ } else {
+ pmc->scratch = NULL;
+ }
}
pmc->clk = devm_clk_get_optional(&pdev->dev, "pclk");
@@ -2945,12 +2924,15 @@ static int tegra_pmc_probe(struct platform_device *pdev)
* PMC should be last resort for restarting since it soft-resets
* CPU without resetting everything else.
*/
- err = devm_register_reboot_notifier(&pdev->dev,
- &tegra_pmc_reboot_notifier);
- if (err) {
- dev_err(&pdev->dev, "unable to register reboot notifier, %d\n",
- err);
- return err;
+ if (pmc->scratch) {
+ err = devm_register_reboot_notifier(&pdev->dev,
+ &tegra_pmc_reboot_notifier);
+ if (err) {
+ dev_err(&pdev->dev,
+ "unable to register reboot notifier, %d\n",
+ err);
+ return err;
+ }
}
err = devm_register_sys_off_handler(&pdev->dev,
@@ -3324,6 +3306,7 @@ static const struct tegra_pmc_soc tegra20_pmc_soc = {
.num_pmc_clks = 0,
.has_blink_output = true,
.has_usb_sleepwalk = true,
+ .has_single_mmio_aperture = true,
};
static const char * const tegra30_powergates[] = {
@@ -3385,6 +3368,7 @@ static const struct tegra_pmc_soc tegra30_pmc_soc = {
.num_pmc_clks = ARRAY_SIZE(tegra_pmc_clks_data),
.has_blink_output = true,
.has_usb_sleepwalk = true,
+ .has_single_mmio_aperture = true,
};
static const char * const tegra114_powergates[] = {
@@ -3442,6 +3426,7 @@ static const struct tegra_pmc_soc tegra114_pmc_soc = {
.num_pmc_clks = ARRAY_SIZE(tegra_pmc_clks_data),
.has_blink_output = true,
.has_usb_sleepwalk = true,
+ .has_single_mmio_aperture = true,
};
static const char * const tegra124_powergates[] = {
@@ -3586,6 +3571,7 @@ static const struct tegra_pmc_soc tegra124_pmc_soc = {
.num_pmc_clks = ARRAY_SIZE(tegra_pmc_clks_data),
.has_blink_output = true,
.has_usb_sleepwalk = true,
+ .has_single_mmio_aperture = true,
};
static const char * const tegra210_powergates[] = {
@@ -3749,6 +3735,7 @@ static const struct tegra_pmc_soc tegra210_pmc_soc = {
.num_pmc_clks = ARRAY_SIZE(tegra_pmc_clks_data),
.has_blink_output = true,
.has_usb_sleepwalk = true,
+ .has_single_mmio_aperture = true,
};
static const struct tegra_io_pad_soc tegra186_io_pads[] = {
@@ -3946,6 +3933,7 @@ static const struct tegra_pmc_soc tegra186_pmc_soc = {
.num_pmc_clks = 0,
.has_blink_output = false,
.has_usb_sleepwalk = false,
+ .has_single_mmio_aperture = false,
};
static const struct tegra_io_pad_soc tegra194_io_pads[] = {
@@ -4131,6 +4119,7 @@ static const struct tegra_pmc_soc tegra194_pmc_soc = {
.num_pmc_clks = 0,
.has_blink_output = false,
.has_usb_sleepwalk = false,
+ .has_single_mmio_aperture = false,
};
static const struct tegra_io_pad_soc tegra234_io_pads[] = {
@@ -4220,6 +4209,7 @@ static const char * const tegra234_reset_sources[] = {
};
static const struct tegra_wake_event tegra234_wake_events[] = {
+ TEGRA_WAKE_GPIO("sd-wake", 8, 0, TEGRA234_MAIN_GPIO(G, 7)),
TEGRA_WAKE_IRQ("pmu", 24, 209),
TEGRA_WAKE_GPIO("power", 29, 1, TEGRA234_AON_GPIO(EE, 4)),
TEGRA_WAKE_GPIO("mgbe", 56, 0, TEGRA234_MAIN_GPIO(Y, 3)),
@@ -4259,6 +4249,7 @@ static const struct tegra_pmc_soc tegra234_pmc_soc = {
.pmc_clks_data = NULL,
.num_pmc_clks = 0,
.has_blink_output = false,
+ .has_single_mmio_aperture = false,
};
static const struct of_device_id tegra_pmc_match[] = {
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index d96222e6d7d2..cfdaa5eaec76 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -19,7 +19,7 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spi/spi.h>
-#include <linux/spi/spi-mem.h>
+#include <linux/mtd/spi-nor.h>
#include <linux/sysfs.h>
#include <linux/types.h>
#include "spi-bcm-qspi.h"
@@ -1221,7 +1221,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem,
/* non-aligned and very short transfers are handled by MSPI */
if (!IS_ALIGNED((uintptr_t)addr, 4) || !IS_ALIGNED((uintptr_t)buf, 4) ||
- len < 4)
+ len < 4 || op->cmd.opcode == SPINOR_OP_RDSFDP)
mspi_read = true;
if (!has_bspi(qspi) || mspi_read)
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index f94e0d370d46..1a8d03958dff 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -1927,24 +1927,18 @@ static void cqspi_remove(struct platform_device *pdev)
pm_runtime_disable(&pdev->dev);
}
-static int cqspi_suspend(struct device *dev)
+static int cqspi_runtime_suspend(struct device *dev)
{
struct cqspi_st *cqspi = dev_get_drvdata(dev);
- struct spi_controller *host = dev_get_drvdata(dev);
- int ret;
- ret = spi_controller_suspend(host);
cqspi_controller_enable(cqspi, 0);
-
clk_disable_unprepare(cqspi->clk);
-
- return ret;
+ return 0;
}
-static int cqspi_resume(struct device *dev)
+static int cqspi_runtime_resume(struct device *dev)
{
struct cqspi_st *cqspi = dev_get_drvdata(dev);
- struct spi_controller *host = dev_get_drvdata(dev);
clk_prepare_enable(cqspi->clk);
cqspi_wait_idle(cqspi);
@@ -1952,12 +1946,27 @@ static int cqspi_resume(struct device *dev)
cqspi->current_cs = -1;
cqspi->sclk = 0;
+ return 0;
+}
+
+static int cqspi_suspend(struct device *dev)
+{
+ struct cqspi_st *cqspi = dev_get_drvdata(dev);
+
+ return spi_controller_suspend(cqspi->host);
+}
- return spi_controller_resume(host);
+static int cqspi_resume(struct device *dev)
+{
+ struct cqspi_st *cqspi = dev_get_drvdata(dev);
+
+ return spi_controller_resume(cqspi->host);
}
-static DEFINE_RUNTIME_DEV_PM_OPS(cqspi_dev_pm_ops, cqspi_suspend,
- cqspi_resume, NULL);
+static const struct dev_pm_ops cqspi_dev_pm_ops = {
+ RUNTIME_PM_OPS(cqspi_runtime_suspend, cqspi_runtime_resume, NULL)
+ SYSTEM_SLEEP_PM_OPS(cqspi_suspend, cqspi_resume)
+};
static const struct cqspi_driver_platdata cdns_qspi = {
.quirks = CQSPI_DISABLE_DAC_MODE,
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index a50eb4db79de..e5140532071d 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -317,6 +317,15 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx)
xspi->rx_bytes -= nrx;
while (ntx || nrx) {
+ if (nrx) {
+ u8 data = cdns_spi_read(xspi, CDNS_SPI_RXD);
+
+ if (xspi->rxbuf)
+ *xspi->rxbuf++ = data;
+
+ nrx--;
+ }
+
if (ntx) {
if (xspi->txbuf)
cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++);
@@ -326,14 +335,6 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx)
ntx--;
}
- if (nrx) {
- u8 data = cdns_spi_read(xspi, CDNS_SPI_RXD);
-
- if (xspi->rxbuf)
- *xspi->rxbuf++ = data;
-
- nrx--;
- }
}
}
diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c
index f13073e12593..adf19e8c4c8a 100644
--- a/drivers/spi/spi-cs42l43.c
+++ b/drivers/spi/spi-cs42l43.c
@@ -148,8 +148,7 @@ static void cs42l43_set_cs(struct spi_device *spi, bool is_high)
{
struct cs42l43_spi *priv = spi_controller_get_devdata(spi->controller);
- if (spi_get_chipselect(spi, 0) == 0)
- regmap_write(priv->regmap, CS42L43_SPI_CONFIG2, !is_high);
+ regmap_write(priv->regmap, CS42L43_SPI_CONFIG2, !is_high);
}
static int cs42l43_prepare_message(struct spi_controller *ctlr, struct spi_message *msg)
@@ -244,7 +243,10 @@ static int cs42l43_spi_probe(struct platform_device *pdev)
priv->ctlr->use_gpio_descriptors = true;
priv->ctlr->auto_runtime_pm = true;
- devm_pm_runtime_enable(priv->dev);
+ ret = devm_pm_runtime_enable(priv->dev);
+ if (ret)
+ return ret;
+
pm_runtime_idle(priv->dev);
regmap_write(priv->regmap, CS42L43_TRAN_CONFIG6, CS42L43_FIFO_SIZE - 1);
diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c
index 9d22018f7985..1301d14483d4 100644
--- a/drivers/spi/spi-hisi-sfc-v3xx.c
+++ b/drivers/spi/spi-hisi-sfc-v3xx.c
@@ -377,6 +377,11 @@ static const struct spi_controller_mem_ops hisi_sfc_v3xx_mem_ops = {
static irqreturn_t hisi_sfc_v3xx_isr(int irq, void *data)
{
struct hisi_sfc_v3xx_host *host = data;
+ u32 reg;
+
+ reg = readl(host->regbase + HISI_SFC_V3XX_INT_STAT);
+ if (!reg)
+ return IRQ_NONE;
hisi_sfc_v3xx_disable_int(host);
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 272bc871a848..833a1bb7a914 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -2,6 +2,7 @@
// Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
// Copyright (C) 2008 Juergen Beisert
+#include <linux/bits.h>
#include <linux/clk.h>
#include <linux/completion.h>
#include <linux/delay.h>
@@ -660,15 +661,15 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx,
<< MX51_ECSPI_CTRL_BL_OFFSET;
else {
if (spi_imx->usedma) {
- ctrl |= (spi_imx->bits_per_word *
- spi_imx_bytes_per_word(spi_imx->bits_per_word) - 1)
+ ctrl |= (spi_imx->bits_per_word - 1)
<< MX51_ECSPI_CTRL_BL_OFFSET;
} else {
if (spi_imx->count >= MX51_ECSPI_CTRL_MAX_BURST)
- ctrl |= (MX51_ECSPI_CTRL_MAX_BURST - 1)
+ ctrl |= (MX51_ECSPI_CTRL_MAX_BURST * BITS_PER_BYTE - 1)
<< MX51_ECSPI_CTRL_BL_OFFSET;
else
- ctrl |= (spi_imx->count * spi_imx->bits_per_word - 1)
+ ctrl |= spi_imx->count / DIV_ROUND_UP(spi_imx->bits_per_word,
+ BITS_PER_BYTE) * spi_imx->bits_per_word
<< MX51_ECSPI_CTRL_BL_OFFSET;
}
}
@@ -1344,7 +1345,7 @@ static int spi_imx_sdma_init(struct device *dev, struct spi_imx_data *spi_imx,
controller->dma_tx = dma_request_chan(dev, "tx");
if (IS_ERR(controller->dma_tx)) {
ret = PTR_ERR(controller->dma_tx);
- dev_dbg(dev, "can't get the TX DMA channel, error %d!\n", ret);
+ dev_err_probe(dev, ret, "can't get the TX DMA channel!\n");
controller->dma_tx = NULL;
goto err;
}
@@ -1353,7 +1354,7 @@ static int spi_imx_sdma_init(struct device *dev, struct spi_imx_data *spi_imx,
controller->dma_rx = dma_request_chan(dev, "rx");
if (IS_ERR(controller->dma_rx)) {
ret = PTR_ERR(controller->dma_rx);
- dev_dbg(dev, "can't get the RX DMA channel, error %d\n", ret);
+ dev_err_probe(dev, ret, "can't get the RX DMA channel!\n");
controller->dma_rx = NULL;
goto err;
}
diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c
index 57d767a68e7b..4337ca51d7aa 100644
--- a/drivers/spi/spi-intel-pci.c
+++ b/drivers/spi/spi-intel-pci.c
@@ -76,6 +76,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
{ PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x7e23), (unsigned long)&cnl_info },
+ { PCI_VDEVICE(INTEL, 0x7f24), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x9d24), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x9da4), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&cnl_info },
@@ -84,7 +85,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
{ PCI_VDEVICE(INTEL, 0xa2a4), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0xa324), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0xa3a4), (unsigned long)&cnl_info },
- { PCI_VDEVICE(INTEL, 0xae23), (unsigned long)&cnl_info },
+ { PCI_VDEVICE(INTEL, 0xa823), (unsigned long)&cnl_info },
{ },
};
MODULE_DEVICE_TABLE(pci, intel_spi_pci_ids);
diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
index 1bf080339b5a..88cbe4f00cc3 100644
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -39,6 +39,7 @@
#include <linux/spi/spi.h>
#include <linux/spi/mxs-spi.h>
#include <trace/events/spi.h>
+#include <linux/dma/mxs-dma.h>
#define DRIVER_NAME "mxs-spi"
@@ -252,7 +253,7 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi,
desc = dmaengine_prep_slave_sg(ssp->dmach,
&dma_xfer[sg_count].sg, 1,
(flags & TXRX_WRITE) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
- DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ DMA_PREP_INTERRUPT | MXS_DMA_CTRL_WAIT4END);
if (!desc) {
dev_err(ssp->dev,
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index a0c9fea908f5..ddf1c684bcc7 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -53,8 +53,6 @@
/* per-register bitmasks: */
#define OMAP2_MCSPI_IRQSTATUS_EOW BIT(17)
-#define OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY BIT(0)
-#define OMAP2_MCSPI_IRQSTATUS_RX0_FULL BIT(2)
#define OMAP2_MCSPI_MODULCTRL_SINGLE BIT(0)
#define OMAP2_MCSPI_MODULCTRL_MS BIT(2)
@@ -293,7 +291,7 @@ static void omap2_mcspi_set_mode(struct spi_controller *ctlr)
}
static void omap2_mcspi_set_fifo(const struct spi_device *spi,
- struct spi_transfer *t, int enable, int dma_enabled)
+ struct spi_transfer *t, int enable)
{
struct spi_controller *ctlr = spi->controller;
struct omap2_mcspi_cs *cs = spi->controller_state;
@@ -314,28 +312,20 @@ static void omap2_mcspi_set_fifo(const struct spi_device *spi,
max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH / 2;
else
max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH;
- if (dma_enabled)
- wcnt = t->len / bytes_per_word;
- else
- wcnt = 0;
+
+ wcnt = t->len / bytes_per_word;
if (wcnt > OMAP2_MCSPI_MAX_FIFOWCNT)
goto disable_fifo;
xferlevel = wcnt << 16;
if (t->rx_buf != NULL) {
chconf |= OMAP2_MCSPI_CHCONF_FFER;
- if (dma_enabled)
- xferlevel |= (bytes_per_word - 1) << 8;
- else
- xferlevel |= (max_fifo_depth - 1) << 8;
+ xferlevel |= (bytes_per_word - 1) << 8;
}
if (t->tx_buf != NULL) {
chconf |= OMAP2_MCSPI_CHCONF_FFET;
- if (dma_enabled)
- xferlevel |= bytes_per_word - 1;
- else
- xferlevel |= (max_fifo_depth - 1);
+ xferlevel |= bytes_per_word - 1;
}
mcspi_write_reg(ctlr, OMAP2_MCSPI_XFERLEVEL, xferlevel);
@@ -892,113 +882,6 @@ out:
return count - c;
}
-static unsigned
-omap2_mcspi_txrx_piofifo(struct spi_device *spi, struct spi_transfer *xfer)
-{
- struct omap2_mcspi_cs *cs = spi->controller_state;
- struct omap2_mcspi *mcspi;
- unsigned int count, c;
- unsigned int iter, cwc;
- int last_request;
- void __iomem *base = cs->base;
- void __iomem *tx_reg;
- void __iomem *rx_reg;
- void __iomem *chstat_reg;
- void __iomem *irqstat_reg;
- int word_len, bytes_per_word;
- u8 *rx;
- const u8 *tx;
-
- mcspi = spi_controller_get_devdata(spi->controller);
- count = xfer->len;
- c = count;
- word_len = cs->word_len;
- bytes_per_word = mcspi_bytes_per_word(word_len);
-
- /*
- * We store the pre-calculated register addresses on stack to speed
- * up the transfer loop.
- */
- tx_reg = base + OMAP2_MCSPI_TX0;
- rx_reg = base + OMAP2_MCSPI_RX0;
- chstat_reg = base + OMAP2_MCSPI_CHSTAT0;
- irqstat_reg = base + OMAP2_MCSPI_IRQSTATUS;
-
- if (c < (word_len >> 3))
- return 0;
-
- rx = xfer->rx_buf;
- tx = xfer->tx_buf;
-
- do {
- /* calculate number of words in current iteration */
- cwc = min((unsigned int)mcspi->fifo_depth / bytes_per_word,
- c / bytes_per_word);
- last_request = cwc != (mcspi->fifo_depth / bytes_per_word);
- if (tx) {
- if (mcspi_wait_for_reg_bit(irqstat_reg,
- OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY) < 0) {
- dev_err(&spi->dev, "TX Empty timed out\n");
- goto out;
- }
- writel_relaxed(OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY, irqstat_reg);
-
- for (iter = 0; iter < cwc; iter++, tx += bytes_per_word) {
- if (bytes_per_word == 1)
- writel_relaxed(*tx, tx_reg);
- else if (bytes_per_word == 2)
- writel_relaxed(*((u16 *)tx), tx_reg);
- else if (bytes_per_word == 4)
- writel_relaxed(*((u32 *)tx), tx_reg);
- }
- }
-
- if (rx) {
- if (!last_request &&
- mcspi_wait_for_reg_bit(irqstat_reg,
- OMAP2_MCSPI_IRQSTATUS_RX0_FULL) < 0) {
- dev_err(&spi->dev, "RX_FULL timed out\n");
- goto out;
- }
- writel_relaxed(OMAP2_MCSPI_IRQSTATUS_RX0_FULL, irqstat_reg);
-
- for (iter = 0; iter < cwc; iter++, rx += bytes_per_word) {
- if (last_request &&
- mcspi_wait_for_reg_bit(chstat_reg,
- OMAP2_MCSPI_CHSTAT_RXS) < 0) {
- dev_err(&spi->dev, "RXS timed out\n");
- goto out;
- }
- if (bytes_per_word == 1)
- *rx = readl_relaxed(rx_reg);
- else if (bytes_per_word == 2)
- *((u16 *)rx) = readl_relaxed(rx_reg);
- else if (bytes_per_word == 4)
- *((u32 *)rx) = readl_relaxed(rx_reg);
- }
- }
-
- if (last_request) {
- if (mcspi_wait_for_reg_bit(chstat_reg,
- OMAP2_MCSPI_CHSTAT_EOT) < 0) {
- dev_err(&spi->dev, "EOT timed out\n");
- goto out;
- }
- if (mcspi_wait_for_reg_bit(chstat_reg,
- OMAP2_MCSPI_CHSTAT_TXFFE) < 0) {
- dev_err(&spi->dev, "TXFFE timed out\n");
- goto out;
- }
- omap2_mcspi_set_enable(spi, 0);
- }
- c -= cwc * bytes_per_word;
- } while (c >= bytes_per_word);
-
-out:
- omap2_mcspi_set_enable(spi, 1);
- return count - c;
-}
-
static u32 omap2_mcspi_calc_divisor(u32 speed_hz, u32 ref_clk_hz)
{
u32 div;
@@ -1323,9 +1206,7 @@ static int omap2_mcspi_transfer_one(struct spi_controller *ctlr,
if ((mcspi_dma->dma_rx && mcspi_dma->dma_tx) &&
ctlr->cur_msg_mapped &&
ctlr->can_dma(ctlr, spi, t))
- omap2_mcspi_set_fifo(spi, t, 1, 1);
- else if (t->len > OMAP2_MCSPI_MAX_FIFODEPTH)
- omap2_mcspi_set_fifo(spi, t, 1, 0);
+ omap2_mcspi_set_fifo(spi, t, 1);
omap2_mcspi_set_enable(spi, 1);
@@ -1338,8 +1219,6 @@ static int omap2_mcspi_transfer_one(struct spi_controller *ctlr,
ctlr->cur_msg_mapped &&
ctlr->can_dma(ctlr, spi, t))
count = omap2_mcspi_txrx_dma(spi, t);
- else if (mcspi->fifo_depth > 0)
- count = omap2_mcspi_txrx_piofifo(spi, t);
else
count = omap2_mcspi_txrx_pio(spi, t);
@@ -1352,7 +1231,7 @@ static int omap2_mcspi_transfer_one(struct spi_controller *ctlr,
omap2_mcspi_set_enable(spi, 0);
if (mcspi->fifo_depth > 0)
- omap2_mcspi_set_fifo(spi, t, 0, 0);
+ omap2_mcspi_set_fifo(spi, t, 0);
out:
/* Restore defaults if they were overriden */
@@ -1375,7 +1254,7 @@ out:
omap2_mcspi_set_cs(spi, !(spi->mode & SPI_CS_HIGH));
if (mcspi->fifo_depth > 0 && t)
- omap2_mcspi_set_fifo(spi, t, 0, 0);
+ omap2_mcspi_set_fifo(spi, t, 0);
return status;
}
diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 03aab661be9d..82d6264841fc 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -25,11 +25,13 @@
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/wait.h>
+#include <linux/platform_device.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
+#include <linux/platform_device.h>
#include <linux/spi/spi.h>
#include <linux/spi/spi_bitbang.h>
@@ -166,10 +168,8 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t)
int scr;
u8 cdm = 0;
u32 speed;
- u8 bits_per_word;
/* Start with the generic configuration for this device. */
- bits_per_word = spi->bits_per_word;
speed = spi->max_speed_hz;
/*
@@ -177,9 +177,6 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t)
* the transfer to overwrite the generic configuration with zeros.
*/
if (t) {
- if (t->bits_per_word)
- bits_per_word = t->bits_per_word;
-
if (t->speed_hz)
speed = min(t->speed_hz, spi->max_speed_hz);
}
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index cfc3b1ddbd22..6f12e4fb2e2e 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -136,14 +136,14 @@ struct sh_msiof_spi_priv {
/* SIFCTR */
#define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */
-#define SIFCTR_TFWM_64 (0 << 29) /* Transfer Request when 64 empty stages */
-#define SIFCTR_TFWM_32 (1 << 29) /* Transfer Request when 32 empty stages */
-#define SIFCTR_TFWM_24 (2 << 29) /* Transfer Request when 24 empty stages */
-#define SIFCTR_TFWM_16 (3 << 29) /* Transfer Request when 16 empty stages */
-#define SIFCTR_TFWM_12 (4 << 29) /* Transfer Request when 12 empty stages */
-#define SIFCTR_TFWM_8 (5 << 29) /* Transfer Request when 8 empty stages */
-#define SIFCTR_TFWM_4 (6 << 29) /* Transfer Request when 4 empty stages */
-#define SIFCTR_TFWM_1 (7 << 29) /* Transfer Request when 1 empty stage */
+#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */
+#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */
+#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */
+#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */
+#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */
+#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */
+#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */
+#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */
#define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */
#define SIFCTR_TFUA_SHIFT 20
#define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 7477a11e12be..f2170f4b5077 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1717,6 +1717,10 @@ static int __spi_pump_transfer_message(struct spi_controller *ctlr,
pm_runtime_put_noidle(ctlr->dev.parent);
dev_err(&ctlr->dev, "Failed to power device: %d\n",
ret);
+
+ msg->status = ret;
+ spi_finalize_current_message(ctlr);
+
return ret;
}
}
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index b9934b9c2d70..9f30e0edadfe 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -384,7 +384,7 @@ static struct attribute *ssb_device_attrs[] = {
};
ATTRIBUTE_GROUPS(ssb_device);
-static struct bus_type ssb_bustype = {
+static const struct bus_type ssb_bustype = {
.name = "ssb",
.match = ssb_bus_match,
.probe = ssb_device_probe,
diff --git a/drivers/staging/fieldbus/Documentation/devicetree/bindings/fieldbus/arcx,anybus-controller.txt b/drivers/staging/fieldbus/Documentation/devicetree/bindings/fieldbus/arcx,anybus-controller.txt
index b1f9474f36d5..f34a95611645 100644
--- a/drivers/staging/fieldbus/Documentation/devicetree/bindings/fieldbus/arcx,anybus-controller.txt
+++ b/drivers/staging/fieldbus/Documentation/devicetree/bindings/fieldbus/arcx,anybus-controller.txt
@@ -48,7 +48,7 @@ Example of usage:
-----------------
This example places the bridge on top of the i.MX WEIM parallel bus, see:
-Documentation/devicetree/bindings/bus/imx-weim.txt
+Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim.yaml
&weim {
controller@0,0 {
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index e748a5d04e97..9149d41fe65b 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -608,7 +608,7 @@ static void ad5933_work(struct work_struct *work)
struct ad5933_state, work.work);
struct iio_dev *indio_dev = i2c_get_clientdata(st->client);
__be16 buf[2];
- int val[2];
+ u16 val[2];
unsigned char status;
int ret;
diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
index f44e6412f4e3..d0db2efe0045 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
@@ -3723,12 +3723,10 @@ apply_min_padding:
static int atomisp_set_crop(struct atomisp_device *isp,
const struct v4l2_mbus_framefmt *format,
+ struct v4l2_subdev_state *sd_state,
int which)
{
struct atomisp_input_subdev *input = &isp->inputs[isp->asd.input_curr];
- struct v4l2_subdev_state pad_state = {
- .pads = &input->pad_cfg,
- };
struct v4l2_subdev_selection sel = {
.which = which,
.target = V4L2_SEL_TGT_CROP,
@@ -3754,7 +3752,7 @@ static int atomisp_set_crop(struct atomisp_device *isp,
sel.r.left = ((input->native_rect.width - sel.r.width) / 2) & ~1;
sel.r.top = ((input->native_rect.height - sel.r.height) / 2) & ~1;
- ret = v4l2_subdev_call(input->camera, pad, set_selection, &pad_state, &sel);
+ ret = v4l2_subdev_call(input->camera, pad, set_selection, sd_state, &sel);
if (ret)
dev_err(isp->dev, "Error setting crop to %ux%u @%ux%u: %d\n",
sel.r.width, sel.r.height, sel.r.left, sel.r.top, ret);
@@ -3770,9 +3768,6 @@ int atomisp_try_fmt(struct atomisp_device *isp, struct v4l2_pix_format *f,
const struct atomisp_format_bridge *fmt, *snr_fmt;
struct atomisp_sub_device *asd = &isp->asd;
struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr];
- struct v4l2_subdev_state pad_state = {
- .pads = &input->pad_cfg,
- };
struct v4l2_subdev_format format = {
.which = V4L2_SUBDEV_FORMAT_TRY,
};
@@ -3809,11 +3804,16 @@ int atomisp_try_fmt(struct atomisp_device *isp, struct v4l2_pix_format *f,
dev_dbg(isp->dev, "try_mbus_fmt: asking for %ux%u\n",
format.format.width, format.format.height);
- ret = atomisp_set_crop(isp, &format.format, V4L2_SUBDEV_FORMAT_TRY);
- if (ret)
- return ret;
+ v4l2_subdev_lock_state(input->try_sd_state);
+
+ ret = atomisp_set_crop(isp, &format.format, input->try_sd_state,
+ V4L2_SUBDEV_FORMAT_TRY);
+ if (ret == 0)
+ ret = v4l2_subdev_call(input->camera, pad, set_fmt,
+ input->try_sd_state, &format);
+
+ v4l2_subdev_unlock_state(input->try_sd_state);
- ret = v4l2_subdev_call(input->camera, pad, set_fmt, &pad_state, &format);
if (ret)
return ret;
@@ -4238,9 +4238,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p
struct atomisp_device *isp = asd->isp;
struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr];
const struct atomisp_format_bridge *format;
- struct v4l2_subdev_state pad_state = {
- .pads = &input->pad_cfg,
- };
+ struct v4l2_subdev_state *act_sd_state;
struct v4l2_subdev_format vformat = {
.which = V4L2_SUBDEV_FORMAT_TRY,
};
@@ -4268,12 +4266,18 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p
/* Disable dvs if resolution can't be supported by sensor */
if (asd->params.video_dis_en && asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO) {
- ret = atomisp_set_crop(isp, &vformat.format, V4L2_SUBDEV_FORMAT_TRY);
- if (ret)
- return ret;
+ v4l2_subdev_lock_state(input->try_sd_state);
+
+ ret = atomisp_set_crop(isp, &vformat.format, input->try_sd_state,
+ V4L2_SUBDEV_FORMAT_TRY);
+ if (ret == 0) {
+ vformat.which = V4L2_SUBDEV_FORMAT_TRY;
+ ret = v4l2_subdev_call(input->camera, pad, set_fmt,
+ input->try_sd_state, &vformat);
+ }
+
+ v4l2_subdev_unlock_state(input->try_sd_state);
- vformat.which = V4L2_SUBDEV_FORMAT_TRY;
- ret = v4l2_subdev_call(input->camera, pad, set_fmt, &pad_state, &vformat);
if (ret)
return ret;
@@ -4291,12 +4295,18 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p
}
}
- ret = atomisp_set_crop(isp, &vformat.format, V4L2_SUBDEV_FORMAT_ACTIVE);
- if (ret)
- return ret;
+ act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
+
+ ret = atomisp_set_crop(isp, &vformat.format, act_sd_state,
+ V4L2_SUBDEV_FORMAT_ACTIVE);
+ if (ret == 0) {
+ vformat.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+ ret = v4l2_subdev_call(input->camera, pad, set_fmt, act_sd_state, &vformat);
+ }
+
+ if (act_sd_state)
+ v4l2_subdev_unlock_state(act_sd_state);
- vformat.which = V4L2_SUBDEV_FORMAT_ACTIVE;
- ret = v4l2_subdev_call(input->camera, pad, set_fmt, NULL, &vformat);
if (ret)
return ret;
diff --git a/drivers/staging/media/atomisp/pci/atomisp_internal.h b/drivers/staging/media/atomisp/pci/atomisp_internal.h
index f7b4bee9574b..d5b077e602ca 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_internal.h
+++ b/drivers/staging/media/atomisp/pci/atomisp_internal.h
@@ -132,8 +132,8 @@ struct atomisp_input_subdev {
/* Sensor rects for sensors which support crop */
struct v4l2_rect native_rect;
struct v4l2_rect active_rect;
- /* Sensor pad_cfg for which == V4L2_SUBDEV_FORMAT_TRY calls */
- struct v4l2_subdev_pad_config pad_cfg;
+ /* Sensor state for which == V4L2_SUBDEV_FORMAT_TRY calls */
+ struct v4l2_subdev_state *try_sd_state;
struct v4l2_subdev *motor;
diff --git a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
index 01b7fa9b56a2..5b2d88c02d36 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
@@ -781,12 +781,20 @@ static int atomisp_enum_framesizes(struct file *file, void *priv,
.which = V4L2_SUBDEV_FORMAT_ACTIVE,
.code = input->code,
};
+ struct v4l2_subdev_state *act_sd_state;
int ret;
+ if (!input->camera)
+ return -EINVAL;
+
if (input->crop_support)
return atomisp_enum_framesizes_crop(isp, fsize);
- ret = v4l2_subdev_call(input->camera, pad, enum_frame_size, NULL, &fse);
+ act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
+ ret = v4l2_subdev_call(input->camera, pad, enum_frame_size,
+ act_sd_state, &fse);
+ if (act_sd_state)
+ v4l2_subdev_unlock_state(act_sd_state);
if (ret)
return ret;
@@ -803,18 +811,25 @@ static int atomisp_enum_frameintervals(struct file *file, void *priv,
struct video_device *vdev = video_devdata(file);
struct atomisp_device *isp = video_get_drvdata(vdev);
struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
+ struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr];
struct v4l2_subdev_frame_interval_enum fie = {
- .code = atomisp_in_fmt_conv[0].code,
+ .code = atomisp_in_fmt_conv[0].code,
.index = fival->index,
.width = fival->width,
.height = fival->height,
.which = V4L2_SUBDEV_FORMAT_ACTIVE,
};
+ struct v4l2_subdev_state *act_sd_state;
int ret;
- ret = v4l2_subdev_call(isp->inputs[asd->input_curr].camera,
- pad, enum_frame_interval, NULL,
- &fie);
+ if (!input->camera)
+ return -EINVAL;
+
+ act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
+ ret = v4l2_subdev_call(input->camera, pad, enum_frame_interval,
+ act_sd_state, &fie);
+ if (act_sd_state)
+ v4l2_subdev_unlock_state(act_sd_state);
if (ret)
return ret;
@@ -830,30 +845,25 @@ static int atomisp_enum_fmt_cap(struct file *file, void *fh,
struct video_device *vdev = video_devdata(file);
struct atomisp_device *isp = video_get_drvdata(vdev);
struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
+ struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr];
struct v4l2_subdev_mbus_code_enum code = {
.which = V4L2_SUBDEV_FORMAT_ACTIVE,
};
const struct atomisp_format_bridge *format;
- struct v4l2_subdev *camera;
+ struct v4l2_subdev_state *act_sd_state;
unsigned int i, fi = 0;
- int rval;
+ int ret;
- camera = isp->inputs[asd->input_curr].camera;
- if(!camera) {
- dev_err(isp->dev, "%s(): camera is NULL, device is %s\n",
- __func__, vdev->name);
+ if (!input->camera)
return -EINVAL;
- }
- rval = v4l2_subdev_call(camera, pad, enum_mbus_code, NULL, &code);
- if (rval == -ENOIOCTLCMD) {
- dev_warn(isp->dev,
- "enum_mbus_code pad op not supported by %s. Please fix your sensor driver!\n",
- camera->name);
- }
-
- if (rval)
- return rval;
+ act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
+ ret = v4l2_subdev_call(input->camera, pad, enum_mbus_code,
+ act_sd_state, &code);
+ if (act_sd_state)
+ v4l2_subdev_unlock_state(act_sd_state);
+ if (ret)
+ return ret;
for (i = 0; i < ARRAY_SIZE(atomisp_output_fmts); i++) {
format = &atomisp_output_fmts[i];
diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
index c1c8501ec61f..547e1444ad97 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
@@ -862,6 +862,9 @@ static void atomisp_unregister_entities(struct atomisp_device *isp)
v4l2_device_unregister(&isp->v4l2_dev);
media_device_unregister(&isp->media_dev);
media_device_cleanup(&isp->media_dev);
+
+ for (i = 0; i < isp->input_cnt; i++)
+ __v4l2_subdev_state_free(isp->inputs[i].try_sd_state);
}
static int atomisp_register_entities(struct atomisp_device *isp)
@@ -933,32 +936,49 @@ v4l2_device_failed:
static void atomisp_init_sensor(struct atomisp_input_subdev *input)
{
+ static struct lock_class_key try_sd_state_key;
struct v4l2_subdev_mbus_code_enum mbus_code_enum = { };
struct v4l2_subdev_frame_size_enum fse = { };
- struct v4l2_subdev_state sd_state = {
- .pads = &input->pad_cfg,
- };
struct v4l2_subdev_selection sel = { };
+ struct v4l2_subdev_state *try_sd_state, *act_sd_state;
int i, err;
+ /*
+ * FIXME: Drivers are not supposed to use __v4l2_subdev_state_alloc()
+ * but atomisp needs this for try_fmt on its /dev/video# node since
+ * it emulates a normal v4l2 device there, passing through try_fmt /
+ * set_fmt to the sensor.
+ */
+ try_sd_state = __v4l2_subdev_state_alloc(input->camera,
+ "atomisp:try_sd_state->lock", &try_sd_state_key);
+ if (IS_ERR(try_sd_state))
+ return;
+
+ input->try_sd_state = try_sd_state;
+
+ act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
+
mbus_code_enum.which = V4L2_SUBDEV_FORMAT_ACTIVE;
- err = v4l2_subdev_call(input->camera, pad, enum_mbus_code, NULL, &mbus_code_enum);
+ err = v4l2_subdev_call(input->camera, pad, enum_mbus_code,
+ act_sd_state, &mbus_code_enum);
if (!err)
input->code = mbus_code_enum.code;
sel.which = V4L2_SUBDEV_FORMAT_ACTIVE;
sel.target = V4L2_SEL_TGT_NATIVE_SIZE;
- err = v4l2_subdev_call(input->camera, pad, get_selection, NULL, &sel);
+ err = v4l2_subdev_call(input->camera, pad, get_selection,
+ act_sd_state, &sel);
if (err)
- return;
+ goto unlock_act_sd_state;
input->native_rect = sel.r;
sel.which = V4L2_SUBDEV_FORMAT_ACTIVE;
sel.target = V4L2_SEL_TGT_CROP_DEFAULT;
- err = v4l2_subdev_call(input->camera, pad, get_selection, NULL, &sel);
+ err = v4l2_subdev_call(input->camera, pad, get_selection,
+ act_sd_state, &sel);
if (err)
- return;
+ goto unlock_act_sd_state;
input->active_rect = sel.r;
@@ -973,7 +993,8 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input)
fse.code = input->code;
fse.which = V4L2_SUBDEV_FORMAT_ACTIVE;
- err = v4l2_subdev_call(input->camera, pad, enum_frame_size, NULL, &fse);
+ err = v4l2_subdev_call(input->camera, pad, enum_frame_size,
+ act_sd_state, &fse);
if (err)
break;
@@ -989,22 +1010,26 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input)
* for padding, set the crop rect to cover the entire sensor instead
* of only the default active area.
*
- * Do this for both try and active formats since the try_crop rect in
- * pad_cfg may influence (clamp) future try_fmt calls with which == try.
+ * Do this for both try and active formats since the crop rect in
+ * try_sd_state may influence (clamp size) in calls with which == try.
*/
sel.which = V4L2_SUBDEV_FORMAT_TRY;
sel.target = V4L2_SEL_TGT_CROP;
sel.r = input->native_rect;
- err = v4l2_subdev_call(input->camera, pad, set_selection, &sd_state, &sel);
+ v4l2_subdev_lock_state(input->try_sd_state);
+ err = v4l2_subdev_call(input->camera, pad, set_selection,
+ input->try_sd_state, &sel);
+ v4l2_subdev_unlock_state(input->try_sd_state);
if (err)
- return;
+ goto unlock_act_sd_state;
sel.which = V4L2_SUBDEV_FORMAT_ACTIVE;
sel.target = V4L2_SEL_TGT_CROP;
sel.r = input->native_rect;
- err = v4l2_subdev_call(input->camera, pad, set_selection, NULL, &sel);
+ err = v4l2_subdev_call(input->camera, pad, set_selection,
+ act_sd_state, &sel);
if (err)
- return;
+ goto unlock_act_sd_state;
dev_info(input->camera->dev, "Supports crop native %dx%d active %dx%d binning %d\n",
input->native_rect.width, input->native_rect.height,
@@ -1012,6 +1037,10 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input)
input->binning_support);
input->crop_support = true;
+
+unlock_act_sd_state:
+ if (act_sd_state)
+ v4l2_subdev_unlock_state(act_sd_state);
}
int atomisp_register_device_nodes(struct atomisp_device *isp)
diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
index b0b262de6480..283804b49e91 100644
--- a/drivers/staging/vt6655/device_main.c
+++ b/drivers/staging/vt6655/device_main.c
@@ -1515,7 +1515,7 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw,
if (changed & BSS_CHANGED_TXPOWER)
RFbSetPower(priv, priv->wCurrentRate,
- conf->chandef.chan->hw_value);
+ conf->chanreq.oper.chan->hw_value);
if (changed & BSS_CHANGED_BEACON_ENABLED) {
dev_dbg(&priv->pcid->dev,
@@ -1684,6 +1684,10 @@ static void vnt_reset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
}
static const struct ieee80211_ops vnt_mac_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = vnt_tx_80211,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = vnt_start,
diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c
index 2abae90f3f52..7bbed462f062 100644
--- a/drivers/staging/vt6656/main_usb.c
+++ b/drivers/staging/vt6656/main_usb.c
@@ -794,7 +794,7 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw,
vnt_set_bss_mode(priv);
if (changed & (BSS_CHANGED_TXPOWER | BSS_CHANGED_BANDWIDTH))
- vnt_rf_setpower(priv, conf->chandef.chan);
+ vnt_rf_setpower(priv, conf->chanreq.oper.chan);
if (changed & BSS_CHANGED_BEACON_ENABLED) {
dev_dbg(&priv->usb->dev,
@@ -956,6 +956,10 @@ static void vnt_reset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
}
static const struct ieee80211_ops vnt_mac_ops = {
+ .add_chanctx = ieee80211_emulate_add_chanctx,
+ .remove_chanctx = ieee80211_emulate_remove_chanctx,
+ .change_chanctx = ieee80211_emulate_change_chanctx,
+ .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
.tx = vnt_tx_80211,
.wake_tx_queue = ieee80211_handle_wake_tx_queue,
.start = vnt_start,
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index a5f58988130a..c1fbcdd16182 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -759,6 +759,29 @@ static ssize_t emulate_tas_store(struct config_item *item,
return count;
}
+static int target_try_configure_unmap(struct se_device *dev,
+ const char *config_opt)
+{
+ if (!dev->transport->configure_unmap) {
+ pr_err("Generic Block Discard not supported\n");
+ return -ENOSYS;
+ }
+
+ if (!target_dev_configured(dev)) {
+ pr_err("Generic Block Discard setup for %s requires device to be configured\n",
+ config_opt);
+ return -ENODEV;
+ }
+
+ if (!dev->transport->configure_unmap(dev)) {
+ pr_err("Generic Block Discard setup for %s failed\n",
+ config_opt);
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
static ssize_t emulate_tpu_store(struct config_item *item,
const char *page, size_t count)
{
@@ -776,11 +799,9 @@ static ssize_t emulate_tpu_store(struct config_item *item,
* Discard supported is detected iblock_create_virtdevice().
*/
if (flag && !da->max_unmap_block_desc_count) {
- if (!dev->transport->configure_unmap ||
- !dev->transport->configure_unmap(dev)) {
- pr_err("Generic Block Discard not supported\n");
- return -ENOSYS;
- }
+ ret = target_try_configure_unmap(dev, "emulate_tpu");
+ if (ret)
+ return ret;
}
da->emulate_tpu = flag;
@@ -806,11 +827,9 @@ static ssize_t emulate_tpws_store(struct config_item *item,
* Discard supported is detected iblock_create_virtdevice().
*/
if (flag && !da->max_unmap_block_desc_count) {
- if (!dev->transport->configure_unmap ||
- !dev->transport->configure_unmap(dev)) {
- pr_err("Generic Block Discard not supported\n");
- return -ENOSYS;
- }
+ ret = target_try_configure_unmap(dev, "emulate_tpws");
+ if (ret)
+ return ret;
}
da->emulate_tpws = flag;
@@ -1022,12 +1041,9 @@ static ssize_t unmap_zeroes_data_store(struct config_item *item,
* Discard supported is detected iblock_configure_device().
*/
if (flag && !da->max_unmap_block_desc_count) {
- if (!dev->transport->configure_unmap ||
- !dev->transport->configure_unmap(dev)) {
- pr_err("dev[%p]: Thin Provisioning LBPRZ will not be set because max_unmap_block_desc_count is zero\n",
- da->da_dev);
- return -ENOSYS;
- }
+ ret = target_try_configure_unmap(dev, "unmap_zeroes_data");
+ if (ret)
+ return ret;
}
da->unmap_zeroes_data = flag;
pr_debug("dev[%p]: SE Device Thin Provisioning LBPRZ bit: %d\n",
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 8eb9eb7ce5df..7f6ca8177845 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -91,7 +91,7 @@ static int iblock_configure_device(struct se_device *dev)
{
struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
struct request_queue *q;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct block_device *bd;
struct blk_integrity *bi;
blk_mode_t mode = BLK_OPEN_READ;
@@ -117,14 +117,14 @@ static int iblock_configure_device(struct se_device *dev)
else
dev->dev_flags |= DF_READ_ONLY;
- bdev_handle = bdev_open_by_path(ib_dev->ibd_udev_path, mode, ib_dev,
+ bdev_file = bdev_file_open_by_path(ib_dev->ibd_udev_path, mode, ib_dev,
NULL);
- if (IS_ERR(bdev_handle)) {
- ret = PTR_ERR(bdev_handle);
+ if (IS_ERR(bdev_file)) {
+ ret = PTR_ERR(bdev_file);
goto out_free_bioset;
}
- ib_dev->ibd_bdev_handle = bdev_handle;
- ib_dev->ibd_bd = bd = bdev_handle->bdev;
+ ib_dev->ibd_bdev_file = bdev_file;
+ ib_dev->ibd_bd = bd = file_bdev(bdev_file);
q = bdev_get_queue(bd);
@@ -180,7 +180,7 @@ static int iblock_configure_device(struct se_device *dev)
return 0;
out_blkdev_put:
- bdev_release(ib_dev->ibd_bdev_handle);
+ fput(ib_dev->ibd_bdev_file);
out_free_bioset:
bioset_exit(&ib_dev->ibd_bio_set);
out:
@@ -205,8 +205,8 @@ static void iblock_destroy_device(struct se_device *dev)
{
struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
- if (ib_dev->ibd_bdev_handle)
- bdev_release(ib_dev->ibd_bdev_handle);
+ if (ib_dev->ibd_bdev_file)
+ fput(ib_dev->ibd_bdev_file);
bioset_exit(&ib_dev->ibd_bio_set);
}
diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h
index 683f9a55945b..91f6f4280666 100644
--- a/drivers/target/target_core_iblock.h
+++ b/drivers/target/target_core_iblock.h
@@ -32,7 +32,7 @@ struct iblock_dev {
u32 ibd_flags;
struct bio_set ibd_bio_set;
struct block_device *ibd_bd;
- struct bdev_handle *ibd_bdev_handle;
+ struct file *ibd_bdev_file;
bool ibd_readonly;
struct iblock_dev_plug *ibd_plug;
} ____cacheline_aligned;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 41b7489d37ce..f98ebb18666b 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -352,7 +352,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
struct pscsi_dev_virt *pdv = PSCSI_DEV(dev);
struct Scsi_Host *sh = sd->host;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
int ret;
if (scsi_device_get(sd)) {
@@ -366,18 +366,18 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
* Claim exclusive struct block_device access to struct scsi_device
* for TYPE_DISK and TYPE_ZBC using supplied udev_path
*/
- bdev_handle = bdev_open_by_path(dev->udev_path,
+ bdev_file = bdev_file_open_by_path(dev->udev_path,
BLK_OPEN_WRITE | BLK_OPEN_READ, pdv, NULL);
- if (IS_ERR(bdev_handle)) {
+ if (IS_ERR(bdev_file)) {
pr_err("pSCSI: bdev_open_by_path() failed\n");
scsi_device_put(sd);
- return PTR_ERR(bdev_handle);
+ return PTR_ERR(bdev_file);
}
- pdv->pdv_bdev_handle = bdev_handle;
+ pdv->pdv_bdev_file = bdev_file;
ret = pscsi_add_device_to_list(dev, sd);
if (ret) {
- bdev_release(bdev_handle);
+ fput(bdev_file);
scsi_device_put(sd);
return ret;
}
@@ -564,9 +564,9 @@ static void pscsi_destroy_device(struct se_device *dev)
* from pscsi_create_type_disk()
*/
if ((sd->type == TYPE_DISK || sd->type == TYPE_ZBC) &&
- pdv->pdv_bdev_handle) {
- bdev_release(pdv->pdv_bdev_handle);
- pdv->pdv_bdev_handle = NULL;
+ pdv->pdv_bdev_file) {
+ fput(pdv->pdv_bdev_file);
+ pdv->pdv_bdev_file = NULL;
}
/*
* For HBA mode PHV_LLD_SCSI_HOST_NO, release the reference
@@ -907,12 +907,15 @@ new_bio:
return 0;
fail:
- if (bio)
- bio_put(bio);
+ if (bio) {
+ bio_uninit(bio);
+ kfree(bio);
+ }
while (req->bio) {
bio = req->bio;
req->bio = bio->bi_next;
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
}
req->biotail = NULL;
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -994,8 +997,8 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
{
struct pscsi_dev_virt *pdv = PSCSI_DEV(dev);
- if (pdv->pdv_bdev_handle)
- return bdev_nr_sectors(pdv->pdv_bdev_handle->bdev);
+ if (pdv->pdv_bdev_file)
+ return bdev_nr_sectors(file_bdev(pdv->pdv_bdev_file));
return 0;
}
diff --git a/drivers/target/target_core_pscsi.h b/drivers/target/target_core_pscsi.h
index b0a3ef136592..9acaa21e4c78 100644
--- a/drivers/target/target_core_pscsi.h
+++ b/drivers/target/target_core_pscsi.h
@@ -37,7 +37,7 @@ struct pscsi_dev_virt {
int pdv_channel_id;
int pdv_target_id;
int pdv_lun_id;
- struct bdev_handle *pdv_bdev_handle;
+ struct file *pdv_bdev_file;
struct scsi_device *pdv_sd;
struct Scsi_Host *pdv_lld_host;
} ____cacheline_aligned;
diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c
index 4b1092127694..1892e49a8e6a 100644
--- a/drivers/tee/optee/device.c
+++ b/drivers/tee/optee/device.c
@@ -90,13 +90,14 @@ static int optee_register_device(const uuid_t *device_uuid, u32 func)
if (rc) {
pr_err("device registration failed, err: %d\n", rc);
put_device(&optee_device->dev);
+ return rc;
}
if (func == PTA_CMD_GET_DEVICES_SUPP)
device_create_file(&optee_device->dev,
&dev_attr_need_supplicant);
- return rc;
+ return 0;
}
static int __optee_enumerate_devices(u32 func)
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index 792d6fae4354..e59c20d74b36 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -1226,7 +1226,7 @@ static int tee_client_device_uevent(const struct device *dev,
return add_uevent_var(env, "MODALIAS=tee:%pUb", dev_id);
}
-struct bus_type tee_bus_type = {
+const struct bus_type tee_bus_type = {
.name = "tee",
.match = tee_client_device_match,
.uevent = tee_client_device_uevent,
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 3b04c6ec4fca..40d664a66cdc 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -607,7 +607,7 @@ void __init intel_hfi_init(void)
/* There is one HFI instance per die/package. */
max_hfi_instances = topology_max_packages() *
- topology_max_die_per_package();
+ topology_max_dies_per_package();
/*
* This allocation may fail. CPU hotplug callbacks must check
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 5ac5cb60bae6..4ba649370aa1 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -49,7 +49,6 @@
*/
#define DEFAULT_DURATION_JIFFIES (6)
-static unsigned int target_mwait;
static struct dentry *debug_dir;
static bool poll_pkg_cstate_enable;
@@ -312,34 +311,6 @@ MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
"\twindow size results in slower response time but more smooth\n"
"\tclamping results. default to 2.");
-static void find_target_mwait(void)
-{
- unsigned int eax, ebx, ecx, edx;
- unsigned int highest_cstate = 0;
- unsigned int highest_subcstate = 0;
- int i;
-
- if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
- return;
-
- cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
-
- if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
- !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
- return;
-
- edx >>= MWAIT_SUBSTATE_SIZE;
- for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
- if (edx & MWAIT_SUBSTATE_MASK) {
- highest_cstate = i;
- highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
- }
- }
- target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
- (highest_subcstate - 1);
-
-}
-
struct pkg_cstate_info {
bool skip;
int msr_index;
@@ -616,7 +587,7 @@ static int powerclamp_idle_injection_register(void)
poll_pkg_cstate_enable = false;
if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) {
ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update);
- if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
+ if (topology_max_packages() == 1 && topology_max_dies_per_package() == 1)
poll_pkg_cstate_enable = true;
} else {
ii_dev = idle_inject_register(idle_injection_cpu_mask);
@@ -759,9 +730,6 @@ static int __init powerclamp_probe(void)
return -ENODEV;
}
- /* find the deepest mwait value */
- find_target_mwait();
-
return 0;
}
diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 11a7f8108bbb..f6c2e5964b8f 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -494,7 +494,7 @@ static int __init pkg_temp_thermal_init(void)
if (!x86_match_cpu(pkg_temp_thermal_ids))
return -ENODEV;
- max_id = topology_max_packages() * topology_max_die_per_package();
+ max_id = topology_max_packages() * topology_max_dies_per_package();
zones = kcalloc(max_id, sizeof(struct zone_device *),
GFP_KERNEL);
if (!zones)
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 900114ba4371..fad40c4bc710 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -1249,6 +1249,9 @@ int tb_port_update_credits(struct tb_port *port)
ret = tb_port_do_update_credits(port);
if (ret)
return ret;
+
+ if (!port->dual_link_port)
+ return 0;
return tb_port_do_update_credits(port->dual_link_port);
}
diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h
index 87e4795275fe..6f798f6a2b84 100644
--- a/drivers/thunderbolt/tb_regs.h
+++ b/drivers/thunderbolt/tb_regs.h
@@ -203,7 +203,7 @@ struct tb_regs_switch_header {
#define ROUTER_CS_5_WOP BIT(1)
#define ROUTER_CS_5_WOU BIT(2)
#define ROUTER_CS_5_WOD BIT(3)
-#define ROUTER_CS_5_C3S BIT(23)
+#define ROUTER_CS_5_CNS BIT(23)
#define ROUTER_CS_5_PTO BIT(24)
#define ROUTER_CS_5_UTO BIT(25)
#define ROUTER_CS_5_HCO BIT(26)
diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c
index f8f0d24ff6e4..1515eff8cc3e 100644
--- a/drivers/thunderbolt/usb4.c
+++ b/drivers/thunderbolt/usb4.c
@@ -290,7 +290,7 @@ int usb4_switch_setup(struct tb_switch *sw)
}
/* TBT3 supported by the CM */
- val |= ROUTER_CS_5_C3S;
+ val &= ~ROUTER_CS_5_CNS;
return tb_sw_write(sw, &val, TB_CFG_SWITCH, ROUTER_CS_5, 1);
}
diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig
index 6e05c5c7bca1..c2a4e88b328f 100644
--- a/drivers/tty/hvc/Kconfig
+++ b/drivers/tty/hvc/Kconfig
@@ -108,13 +108,15 @@ config HVC_DCC_SERIALIZE_SMP
config HVC_RISCV_SBI
bool "RISC-V SBI console support"
- depends on RISCV_SBI
+ depends on RISCV_SBI && NONPORTABLE
select HVC_DRIVER
help
This enables support for console output via RISC-V SBI calls, which
- is normally used only during boot to output printk.
+ is normally used only during boot to output printk. This driver
+ conflicts with real console drivers and should not be enabled on
+ systems that directly access the console.
- If you don't know what do to here, say Y.
+ If you don't know what do to here, say N.
config HVCS
tristate "IBM Hypervisor Virtual Console Server support"
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 2d1f350a4bea..c1d43f040c43 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -357,9 +357,9 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
long rate;
int ret;
- clk_disable_unprepare(d->clk);
rate = clk_round_rate(d->clk, newrate);
- if (rate > 0) {
+ if (rate > 0 && p->uartclk != rate) {
+ clk_disable_unprepare(d->clk);
/*
* Note that any clock-notifer worker will block in
* serial8250_update_uartclk() until we are done.
@@ -367,8 +367,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
ret = clk_set_rate(d->clk, newrate);
if (!ret)
p->uartclk = rate;
+ clk_prepare_enable(d->clk);
}
- clk_prepare_enable(d->clk);
dw8250_do_set_termios(p, termios, old);
}
diff --git a/drivers/tty/serial/8250/8250_pci1xxxx.c b/drivers/tty/serial/8250/8250_pci1xxxx.c
index 558c4c7f3104..2dda737b1660 100644
--- a/drivers/tty/serial/8250/8250_pci1xxxx.c
+++ b/drivers/tty/serial/8250/8250_pci1xxxx.c
@@ -311,7 +311,7 @@ static void pci1xxxx_process_read_data(struct uart_port *port,
}
while (*valid_byte_count) {
- if (*buff_index > RX_BUF_SIZE)
+ if (*buff_index >= RX_BUF_SIZE)
break;
rx_buff[*buff_index] = readb(port->membase +
UART_RX_BYTE_FIFO);
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index fccec1698a54..cf2c890a560f 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1339,11 +1339,41 @@ static void pl011_start_tx_pio(struct uart_amba_port *uap)
}
}
+static void pl011_rs485_tx_start(struct uart_amba_port *uap)
+{
+ struct uart_port *port = &uap->port;
+ u32 cr;
+
+ /* Enable transmitter */
+ cr = pl011_read(uap, REG_CR);
+ cr |= UART011_CR_TXE;
+
+ /* Disable receiver if half-duplex */
+ if (!(port->rs485.flags & SER_RS485_RX_DURING_TX))
+ cr &= ~UART011_CR_RXE;
+
+ if (port->rs485.flags & SER_RS485_RTS_ON_SEND)
+ cr &= ~UART011_CR_RTS;
+ else
+ cr |= UART011_CR_RTS;
+
+ pl011_write(cr, uap, REG_CR);
+
+ if (port->rs485.delay_rts_before_send)
+ mdelay(port->rs485.delay_rts_before_send);
+
+ uap->rs485_tx_started = true;
+}
+
static void pl011_start_tx(struct uart_port *port)
{
struct uart_amba_port *uap =
container_of(port, struct uart_amba_port, port);
+ if ((uap->port.rs485.flags & SER_RS485_ENABLED) &&
+ !uap->rs485_tx_started)
+ pl011_rs485_tx_start(uap);
+
if (!pl011_dma_tx_start(uap))
pl011_start_tx_pio(uap);
}
@@ -1424,42 +1454,12 @@ static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c,
return true;
}
-static void pl011_rs485_tx_start(struct uart_amba_port *uap)
-{
- struct uart_port *port = &uap->port;
- u32 cr;
-
- /* Enable transmitter */
- cr = pl011_read(uap, REG_CR);
- cr |= UART011_CR_TXE;
-
- /* Disable receiver if half-duplex */
- if (!(port->rs485.flags & SER_RS485_RX_DURING_TX))
- cr &= ~UART011_CR_RXE;
-
- if (port->rs485.flags & SER_RS485_RTS_ON_SEND)
- cr &= ~UART011_CR_RTS;
- else
- cr |= UART011_CR_RTS;
-
- pl011_write(cr, uap, REG_CR);
-
- if (port->rs485.delay_rts_before_send)
- mdelay(port->rs485.delay_rts_before_send);
-
- uap->rs485_tx_started = true;
-}
-
/* Returns true if tx interrupts have to be (kept) enabled */
static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq)
{
struct circ_buf *xmit = &uap->port.state->xmit;
int count = uap->fifosize >> 1;
- if ((uap->port.rs485.flags & SER_RS485_ENABLED) &&
- !uap->rs485_tx_started)
- pl011_rs485_tx_start(uap);
-
if (uap->port.x_char) {
if (!pl011_tx_char(uap, uap->port.x_char, from_irq))
return true;
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 5ddf110aedbe..bbcbc91482af 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -2345,9 +2345,12 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios,
lpuart32_write(&sport->port, bd, UARTBAUD);
lpuart32_serial_setbrg(sport, baud);
- lpuart32_write(&sport->port, modem, UARTMODIR);
- lpuart32_write(&sport->port, ctrl, UARTCTRL);
+ /* disable CTS before enabling UARTCTRL_TE to avoid pending idle preamble */
+ lpuart32_write(&sport->port, modem & ~UARTMODIR_TXCTSE, UARTMODIR);
/* restore control register */
+ lpuart32_write(&sport->port, ctrl, UARTCTRL);
+ /* re-enable the CTS if needed */
+ lpuart32_write(&sport->port, modem, UARTMODIR);
if ((ctrl & (UARTCTRL_PE | UARTCTRL_M)) == UARTCTRL_PE)
sport->is_cs7 = true;
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 4aa72d5aeafb..e14813250616 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -462,8 +462,7 @@ static void imx_uart_stop_tx(struct uart_port *port)
}
}
-/* called with port.lock taken and irqs off */
-static void imx_uart_stop_rx(struct uart_port *port)
+static void imx_uart_stop_rx_with_loopback_ctrl(struct uart_port *port, bool loopback)
{
struct imx_port *sport = (struct imx_port *)port;
u32 ucr1, ucr2, ucr4, uts;
@@ -485,7 +484,7 @@ static void imx_uart_stop_rx(struct uart_port *port)
/* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */
if (port->rs485.flags & SER_RS485_ENABLED &&
port->rs485.flags & SER_RS485_RTS_ON_SEND &&
- sport->have_rtscts && !sport->have_rtsgpio) {
+ sport->have_rtscts && !sport->have_rtsgpio && loopback) {
uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
uts |= UTS_LOOP;
imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
@@ -498,6 +497,16 @@ static void imx_uart_stop_rx(struct uart_port *port)
}
/* called with port.lock taken and irqs off */
+static void imx_uart_stop_rx(struct uart_port *port)
+{
+ /*
+ * Stop RX and enable loopback in order to make sure RS485 bus
+ * is not blocked. Se comment in imx_uart_probe().
+ */
+ imx_uart_stop_rx_with_loopback_ctrl(port, true);
+}
+
+/* called with port.lock taken and irqs off */
static void imx_uart_enable_ms(struct uart_port *port)
{
struct imx_port *sport = (struct imx_port *)port;
@@ -682,9 +691,14 @@ static void imx_uart_start_tx(struct uart_port *port)
imx_uart_rts_inactive(sport, &ucr2);
imx_uart_writel(sport, ucr2, UCR2);
+ /*
+ * Since we are about to transmit we can not stop RX
+ * with loopback enabled because that will make our
+ * transmitted data being just looped to RX.
+ */
if (!(port->rs485.flags & SER_RS485_RX_DURING_TX) &&
!port->rs485_rx_during_tx_gpio)
- imx_uart_stop_rx(port);
+ imx_uart_stop_rx_with_loopback_ctrl(port, false);
sport->tx_state = WAIT_AFTER_RTS;
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index f3a99daebdaa..10bf6d75bf9e 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -237,6 +237,14 @@
#define MAX310x_REV_MASK (0xf8)
#define MAX310X_WRITE_BIT 0x80
+/* Port startup definitions */
+#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */
+#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */
+
+/* Crystal-related definitions */
+#define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */
+#define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */
+
/* MAX3107 specific */
#define MAX3107_REV_ID (0xa0)
@@ -583,7 +591,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr)
return 1;
}
-static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
unsigned long freq, bool xtal)
{
unsigned int div, clksrc, pllcfg = 0;
@@ -641,12 +649,20 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
/* Wait for crystal */
if (xtal) {
- unsigned int val;
- msleep(10);
- regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val);
- if (!(val & MAX310X_STS_CLKREADY_BIT)) {
- dev_warn(dev, "clock is not stable yet\n");
- }
+ bool stable = false;
+ unsigned int try = 0, val = 0;
+
+ do {
+ msleep(MAX310X_XTAL_WAIT_DELAY_MS);
+ regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val);
+
+ if (val & MAX310X_STS_CLKREADY_BIT)
+ stable = true;
+ } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES));
+
+ if (!stable)
+ return dev_err_probe(dev, -EAGAIN,
+ "clock is not stable\n");
}
return bestfreq;
@@ -1271,7 +1287,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
{
int i, ret, fmin, fmax, freq;
struct max310x_port *s;
- u32 uartclk = 0;
+ s32 uartclk = 0;
bool xtal;
for (i = 0; i < devtype->nr; i++)
@@ -1334,6 +1350,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
goto out_clk;
for (i = 0; i < devtype->nr; i++) {
+ bool started = false;
+ unsigned int try = 0, val = 0;
+
/* Reset port */
regmap_write(regmaps[i], MAX310X_MODE2_REG,
MAX310X_MODE2_RST_BIT);
@@ -1342,13 +1361,27 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
/* Wait for port startup */
do {
- regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret);
- } while (ret != 0x01);
+ msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS);
+ regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val);
+
+ if (val == 0x01)
+ started = true;
+ } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES));
+
+ if (!started) {
+ ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n");
+ goto out_uart;
+ }
regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1);
}
uartclk = max310x_set_ref_clk(dev, s, freq, xtal);
+ if (uartclk < 0) {
+ ret = uartclk;
+ goto out_uart;
+ }
+
dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk);
for (i = 0; i < devtype->nr; i++) {
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 3ec725555bcc..4749331fe618 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -605,13 +605,16 @@ static void mxs_auart_tx_chars(struct mxs_auart_port *s)
return;
}
- pending = uart_port_tx(&s->port, ch,
+ pending = uart_port_tx_flags(&s->port, ch, UART_TX_NOSTOP,
!(mxs_read(s, REG_STAT) & AUART_STAT_TXFF),
mxs_write(ch, s, REG_DATA));
if (pending)
mxs_set(AUART_INTR_TXIEN, s, REG_INTR);
else
mxs_clr(AUART_INTR_TXIEN, s, REG_INTR);
+
+ if (uart_tx_stopped(&s->port))
+ mxs_auart_stop_tx(&s->port);
}
static void mxs_auart_rx_char(struct mxs_auart_port *s)
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index e63a8fbe63bd..99e08737f293 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -851,19 +851,21 @@ static void qcom_geni_serial_stop_tx(struct uart_port *uport)
}
static void qcom_geni_serial_send_chunk_fifo(struct uart_port *uport,
- unsigned int remaining)
+ unsigned int chunk)
{
struct qcom_geni_serial_port *port = to_dev_port(uport);
struct circ_buf *xmit = &uport->state->xmit;
- unsigned int tx_bytes;
+ unsigned int tx_bytes, c, remaining = chunk;
u8 buf[BYTES_PER_FIFO_WORD];
while (remaining) {
memset(buf, 0, sizeof(buf));
tx_bytes = min(remaining, BYTES_PER_FIFO_WORD);
- memcpy(buf, &xmit->buf[xmit->tail], tx_bytes);
- uart_xmit_advance(uport, tx_bytes);
+ for (c = 0; c < tx_bytes ; c++) {
+ buf[c] = xmit->buf[xmit->tail];
+ uart_xmit_advance(uport, 1);
+ }
iowrite32_rep(uport->membase + SE_GENI_TX_FIFOn, buf, 1);
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index b56ed8c376b2..d6a58a9e072a 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1084,8 +1084,8 @@ static int uart_tiocmget(struct tty_struct *tty)
goto out;
if (!tty_io_error(tty)) {
- result = uport->mctrl;
uart_port_lock_irq(uport);
+ result = uport->mctrl;
result |= uport->ops->get_mctrl(uport);
uart_port_unlock_irq(uport);
}
diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c
index 88975a4df306..72b6f4f326e2 100644
--- a/drivers/tty/serial/serial_port.c
+++ b/drivers/tty/serial/serial_port.c
@@ -46,8 +46,31 @@ out:
return 0;
}
+static int serial_port_runtime_suspend(struct device *dev)
+{
+ struct serial_port_device *port_dev = to_serial_base_port_device(dev);
+ struct uart_port *port = port_dev->port;
+ unsigned long flags;
+ bool busy;
+
+ if (port->flags & UPF_DEAD)
+ return 0;
+
+ uart_port_lock_irqsave(port, &flags);
+ busy = __serial_port_busy(port);
+ if (busy)
+ port->ops->start_tx(port);
+ uart_port_unlock_irqrestore(port, flags);
+
+ if (busy)
+ pm_runtime_mark_last_busy(dev);
+
+ return busy ? -EBUSY : 0;
+}
+
static DEFINE_RUNTIME_DEV_PM_OPS(serial_port_pm,
- NULL, serial_port_runtime_resume, NULL);
+ serial_port_runtime_suspend,
+ serial_port_runtime_resume, NULL);
static int serial_port_probe(struct device *dev)
{
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index 794b77512740..693e932d6feb 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -251,7 +251,9 @@ static int stm32_usart_config_rs485(struct uart_port *port, struct ktermios *ter
writel_relaxed(cr3, port->membase + ofs->cr3);
writel_relaxed(cr1, port->membase + ofs->cr1);
- rs485conf->flags |= SER_RS485_RX_DURING_TX;
+ if (!port->rs485_rx_during_tx_gpio)
+ rs485conf->flags |= SER_RS485_RX_DURING_TX;
+
} else {
stm32_usart_clr_bits(port, ofs->cr3,
USART_CR3_DEM | USART_CR3_DEP);
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 156efda7c80d..38a765eadbe2 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -381,7 +381,7 @@ static void vc_uniscr_delete(struct vc_data *vc, unsigned int nr)
u32 *ln = vc->vc_uni_lines[vc->state.y];
unsigned int x = vc->state.x, cols = vc->vc_cols;
- memcpy(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln));
+ memmove(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln));
memset32(&ln[cols - nr], ' ', nr);
}
}
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 029d017fc1b6..eac7fff6992d 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1469,7 +1469,7 @@ static int ufshcd_devfreq_target(struct device *dev,
int ret = 0;
struct ufs_hba *hba = dev_get_drvdata(dev);
ktime_t start;
- bool scale_up, sched_clk_scaling_suspend_work = false;
+ bool scale_up = false, sched_clk_scaling_suspend_work = false;
struct list_head *clk_list = &hba->clk_list_head;
struct ufs_clk_info *clki;
unsigned long irq_flags;
@@ -3057,7 +3057,7 @@ bool ufshcd_cmd_inflight(struct scsi_cmnd *cmd)
*/
static int ufshcd_clear_cmd(struct ufs_hba *hba, u32 task_tag)
{
- u32 mask = 1U << task_tag;
+ u32 mask;
unsigned long flags;
int err;
@@ -3075,6 +3075,8 @@ static int ufshcd_clear_cmd(struct ufs_hba *hba, u32 task_tag)
return 0;
}
+ mask = 1U << task_tag;
+
/* clear outstanding transaction before retry */
spin_lock_irqsave(hba->host->host_lock, flags);
ufshcd_utrl_clear(hba, mask);
@@ -6352,7 +6354,6 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
ufshcd_hold(hba);
if (!ufshcd_is_clkgating_allowed(hba))
ufshcd_setup_clocks(hba, true);
- ufshcd_release(hba);
pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
ufshcd_vops_resume(hba, pm_op);
} else {
@@ -10592,7 +10593,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
err = blk_mq_alloc_tag_set(&hba->tmf_tag_set);
if (err < 0)
goto out_remove_scsi_host;
- hba->tmf_queue = blk_mq_init_queue(&hba->tmf_tag_set);
+ hba->tmf_queue = blk_mq_alloc_queue(&hba->tmf_tag_set, NULL, NULL);
if (IS_ERR(hba->tmf_queue)) {
err = PTR_ERR(hba->tmf_queue);
goto free_tmf_tag_set;
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index 39eef470f8fa..8fde5204e88b 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -1712,8 +1712,8 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba)
* 2. Poll queues do not need ESI.
*/
nr_irqs = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL];
- ret = platform_msi_domain_alloc_irqs(hba->dev, nr_irqs,
- ufs_qcom_write_msi_msg);
+ ret = platform_device_msi_init_and_alloc_irqs(hba->dev, nr_irqs,
+ ufs_qcom_write_msi_msg);
if (ret) {
dev_err(hba->dev, "Failed to request Platform MSI %d\n", ret);
return ret;
@@ -1742,7 +1742,7 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba)
devm_free_irq(hba->dev, desc->irq, hba);
}
msi_unlock_descs(hba->dev);
- platform_msi_domain_free_irqs(hba->dev);
+ platform_device_msi_free_irqs_all(hba->dev);
} else {
if (host->hw_ver.major == 6 && host->hw_ver.minor == 0 &&
host->hw_ver.step == 0)
@@ -1818,7 +1818,7 @@ static void ufs_qcom_remove(struct platform_device *pdev)
pm_runtime_get_sync(&(pdev)->dev);
ufshcd_remove(hba);
- platform_msi_domain_free_irqs(hba->dev);
+ platform_device_msi_free_irqs_all(hba->dev);
}
static const struct of_device_id ufs_qcom_of_match[] __maybe_unused = {
diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
index aeca902ab6cc..fd1beb10bba7 100644
--- a/drivers/usb/cdns3/cdns3-gadget.c
+++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -828,7 +828,11 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep,
return;
}
- if (request->complete) {
+ /*
+ * zlp request is appended by driver, needn't call usb_gadget_giveback_request() to notify
+ * gadget composite driver.
+ */
+ if (request->complete && request->buf != priv_dev->zlp_buf) {
spin_unlock(&priv_dev->lock);
usb_gadget_giveback_request(&priv_ep->endpoint,
request);
@@ -2540,11 +2544,11 @@ static int cdns3_gadget_ep_disable(struct usb_ep *ep)
while (!list_empty(&priv_ep->wa2_descmiss_req_list)) {
priv_req = cdns3_next_priv_request(&priv_ep->wa2_descmiss_req_list);
+ list_del_init(&priv_req->list);
kfree(priv_req->request.buf);
cdns3_gadget_ep_free_request(&priv_ep->endpoint,
&priv_req->request);
- list_del_init(&priv_req->list);
--priv_ep->wa2_counter;
}
diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c
index 33548771a0d3..465e9267b49c 100644
--- a/drivers/usb/cdns3/core.c
+++ b/drivers/usb/cdns3/core.c
@@ -395,7 +395,6 @@ pm_put:
return ret;
}
-
/**
* cdns_wakeup_irq - interrupt handler for wakeup events
* @irq: irq number for cdns3/cdnsp core device
diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c
index 04b6d12f2b9a..ee917f1b091c 100644
--- a/drivers/usb/cdns3/drd.c
+++ b/drivers/usb/cdns3/drd.c
@@ -156,7 +156,8 @@ bool cdns_is_device(struct cdns *cdns)
*/
static void cdns_otg_disable_irq(struct cdns *cdns)
{
- writel(0, &cdns->otg_irq_regs->ien);
+ if (cdns->version)
+ writel(0, &cdns->otg_irq_regs->ien);
}
/**
@@ -422,15 +423,20 @@ int cdns_drd_init(struct cdns *cdns)
cdns->otg_regs = (void __iomem *)&cdns->otg_v1_regs->cmd;
- if (readl(&cdns->otg_cdnsp_regs->did) == OTG_CDNSP_DID) {
+ state = readl(&cdns->otg_cdnsp_regs->did);
+
+ if (OTG_CDNSP_CHECK_DID(state)) {
cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *)
&cdns->otg_cdnsp_regs->ien;
cdns->version = CDNSP_CONTROLLER_V2;
- } else {
+ } else if (OTG_CDNS3_CHECK_DID(state)) {
cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *)
&cdns->otg_v1_regs->ien;
writel(1, &cdns->otg_v1_regs->simulate);
cdns->version = CDNS3_CONTROLLER_V1;
+ } else {
+ dev_err(cdns->dev, "not supporte DID=0x%08x\n", state);
+ return -EINVAL;
}
dev_dbg(cdns->dev, "DRD version v1 (ID: %08x, rev: %08x)\n",
@@ -483,7 +489,6 @@ int cdns_drd_exit(struct cdns *cdns)
return 0;
}
-
/* Indicate the cdns3 core was power lost before */
bool cdns_power_is_lost(struct cdns *cdns)
{
diff --git a/drivers/usb/cdns3/drd.h b/drivers/usb/cdns3/drd.h
index cbdf94f73ed9..d72370c321d3 100644
--- a/drivers/usb/cdns3/drd.h
+++ b/drivers/usb/cdns3/drd.h
@@ -79,7 +79,11 @@ struct cdnsp_otg_regs {
__le32 susp_timing_ctrl;
};
-#define OTG_CDNSP_DID 0x0004034E
+/* CDNSP driver supports 0x000403xx Cadence USB controller family. */
+#define OTG_CDNSP_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040300)
+
+/* CDNS3 driver supports 0x000402xx Cadence USB controller family. */
+#define OTG_CDNS3_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040200)
/*
* Common registers interface for both CDNS3 and CDNSP version of DRD.
diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c
index 6164fc4c96a4..ceca4d839dfd 100644
--- a/drivers/usb/cdns3/host.c
+++ b/drivers/usb/cdns3/host.c
@@ -18,6 +18,11 @@
#include "../host/xhci.h"
#include "../host/xhci-plat.h"
+/*
+ * The XECP_PORT_CAP_REG and XECP_AUX_CTRL_REG1 exist only
+ * in Cadence USB3 dual-role controller, so it can't be used
+ * with Cadence CDNSP dual-role controller.
+ */
#define XECP_PORT_CAP_REG 0x8000
#define XECP_AUX_CTRL_REG1 0x8120
@@ -57,6 +62,8 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = {
.resume_quirk = xhci_cdns3_resume_quirk,
};
+static const struct xhci_plat_priv xhci_plat_cdnsp_xhci;
+
static int __cdns_host_init(struct cdns *cdns)
{
struct platform_device *xhci;
@@ -81,8 +88,13 @@ static int __cdns_host_init(struct cdns *cdns)
goto err1;
}
- cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci,
- sizeof(struct xhci_plat_priv), GFP_KERNEL);
+ if (cdns->version < CDNSP_CONTROLLER_V2)
+ cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci,
+ sizeof(struct xhci_plat_priv), GFP_KERNEL);
+ else
+ cdns->xhci_plat_data = kmemdup(&xhci_plat_cdnsp_xhci,
+ sizeof(struct xhci_plat_priv), GFP_KERNEL);
+
if (!cdns->xhci_plat_data) {
ret = -ENOMEM;
goto err1;
diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h
index d9bb3d3f026e..2a38e1eb6546 100644
--- a/drivers/usb/chipidea/ci.h
+++ b/drivers/usb/chipidea/ci.h
@@ -176,6 +176,7 @@ struct hw_bank {
* @enabled_otg_timer_bits: bits of enabled otg timers
* @next_otg_timer: next nearest enabled timer to be expired
* @work: work for role changing
+ * @power_lost_work: work for power lost handling
* @wq: workqueue thread
* @qh_pool: allocation pool for queue heads
* @td_pool: allocation pool for transfer descriptors
@@ -226,6 +227,7 @@ struct ci_hdrc {
enum otg_fsm_timer next_otg_timer;
struct usb_role_switch *role_switch;
struct work_struct work;
+ struct work_struct power_lost_work;
struct workqueue_struct *wq;
struct dma_pool *qh_pool;
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 41014f93cfdf..835bf2428dc6 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -856,6 +856,27 @@ static int ci_extcon_register(struct ci_hdrc *ci)
return 0;
}
+static void ci_power_lost_work(struct work_struct *work)
+{
+ struct ci_hdrc *ci = container_of(work, struct ci_hdrc, power_lost_work);
+ enum ci_role role;
+
+ disable_irq_nosync(ci->irq);
+ pm_runtime_get_sync(ci->dev);
+ if (!ci_otg_is_fsm_mode(ci)) {
+ role = ci_get_role(ci);
+
+ if (ci->role != role) {
+ ci_handle_id_switch(ci);
+ } else if (role == CI_ROLE_GADGET) {
+ if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV))
+ usb_gadget_vbus_connect(&ci->gadget);
+ }
+ }
+ pm_runtime_put_sync(ci->dev);
+ enable_irq(ci->irq);
+}
+
static DEFINE_IDA(ci_ida);
struct platform_device *ci_hdrc_add_device(struct device *dev,
@@ -1045,6 +1066,8 @@ static int ci_hdrc_probe(struct platform_device *pdev)
spin_lock_init(&ci->lock);
mutex_init(&ci->mutex);
+ INIT_WORK(&ci->power_lost_work, ci_power_lost_work);
+
ci->dev = dev;
ci->platdata = dev_get_platdata(dev);
ci->imx28_write_fix = !!(ci->platdata->flags &
@@ -1396,25 +1419,6 @@ static int ci_suspend(struct device *dev)
return 0;
}
-static void ci_handle_power_lost(struct ci_hdrc *ci)
-{
- enum ci_role role;
-
- disable_irq_nosync(ci->irq);
- if (!ci_otg_is_fsm_mode(ci)) {
- role = ci_get_role(ci);
-
- if (ci->role != role) {
- ci_handle_id_switch(ci);
- } else if (role == CI_ROLE_GADGET) {
- if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV))
- usb_gadget_vbus_connect(&ci->gadget);
- }
- }
-
- enable_irq(ci->irq);
-}
-
static int ci_resume(struct device *dev)
{
struct ci_hdrc *ci = dev_get_drvdata(dev);
@@ -1446,7 +1450,7 @@ static int ci_resume(struct device *dev)
ci_role(ci)->resume(ci, power_lost);
if (power_lost)
- ci_handle_power_lost(ci);
+ queue_work(system_freezable_wq, &ci->power_lost_work);
if (ci->supports_runtime_pm) {
pm_runtime_disable(dev);
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c
index 84d91b1c1eed..0886b19d2e1c 100644
--- a/drivers/usb/common/ulpi.c
+++ b/drivers/usb/common/ulpi.c
@@ -301,7 +301,7 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi)
return ret;
}
- root = debugfs_create_dir(dev_name(dev), ulpi_root);
+ root = debugfs_create_dir(dev_name(&ulpi->dev), ulpi_root);
debugfs_create_file("regs", 0444, root, ulpi, &ulpi_regs_fops);
dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n",
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 12b6dfeaf658..edf74458474a 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1664,9 +1664,10 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
usb_put_urb(urb);
}
-static void usb_giveback_urb_bh(struct tasklet_struct *t)
+static void usb_giveback_urb_bh(struct work_struct *work)
{
- struct giveback_urb_bh *bh = from_tasklet(bh, t, bh);
+ struct giveback_urb_bh *bh =
+ container_of(work, struct giveback_urb_bh, bh);
struct list_head local_list;
spin_lock_irq(&bh->lock);
@@ -1691,9 +1692,9 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t)
spin_lock_irq(&bh->lock);
if (!list_empty(&bh->head)) {
if (bh->high_prio)
- tasklet_hi_schedule(&bh->bh);
+ queue_work(system_bh_highpri_wq, &bh->bh);
else
- tasklet_schedule(&bh->bh);
+ queue_work(system_bh_wq, &bh->bh);
}
bh->running = false;
spin_unlock_irq(&bh->lock);
@@ -1706,7 +1707,7 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t)
* @status: completion status code for the URB.
*
* Context: atomic. The completion callback is invoked in caller's context.
- * For HCDs with HCD_BH flag set, the completion callback is invoked in tasklet
+ * For HCDs with HCD_BH flag set, the completion callback is invoked in BH
* context (except for URBs submitted to the root hub which always complete in
* caller's context).
*
@@ -1725,7 +1726,7 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
struct giveback_urb_bh *bh;
bool running;
- /* pass status to tasklet via unlinked */
+ /* pass status to BH via unlinked */
if (likely(!urb->unlinked))
urb->unlinked = status;
@@ -1747,9 +1748,9 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
if (running)
;
else if (bh->high_prio)
- tasklet_hi_schedule(&bh->bh);
+ queue_work(system_bh_highpri_wq, &bh->bh);
else
- tasklet_schedule(&bh->bh);
+ queue_work(system_bh_wq, &bh->bh);
}
EXPORT_SYMBOL_GPL(usb_hcd_giveback_urb);
@@ -2540,7 +2541,7 @@ static void init_giveback_urb_bh(struct giveback_urb_bh *bh)
spin_lock_init(&bh->lock);
INIT_LIST_HEAD(&bh->head);
- tasklet_setup(&bh->bh, usb_giveback_urb_bh);
+ INIT_WORK(&bh->bh, usb_giveback_urb_bh);
}
struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver,
@@ -2926,7 +2927,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
&& device_can_wakeup(&hcd->self.root_hub->dev))
dev_dbg(hcd->self.controller, "supports USB remote wakeup\n");
- /* initialize tasklets */
+ /* initialize BHs */
init_giveback_urb_bh(&hcd->high_prio_bh);
hcd->high_prio_bh.high_prio = true;
init_giveback_urb_bh(&hcd->low_prio_bh);
@@ -3036,7 +3037,7 @@ void usb_remove_hcd(struct usb_hcd *hcd)
mutex_unlock(&usb_bus_idr_lock);
/*
- * tasklet_kill() isn't needed here because:
+ * flush_work() isn't needed here because:
* - driver's disconnect() called from usb_disconnect() should
* make sure its URBs are completed during the disconnect()
* callback
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index ffd7c99e24a3..e38a4124f610 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2053,9 +2053,19 @@ static void update_port_device_state(struct usb_device *udev)
if (udev->parent) {
hub = usb_hub_to_struct_hub(udev->parent);
- port_dev = hub->ports[udev->portnum - 1];
- WRITE_ONCE(port_dev->state, udev->state);
- sysfs_notify_dirent(port_dev->state_kn);
+
+ /*
+ * The Link Layer Validation System Driver (lvstest)
+ * has a test step to unbind the hub before running the
+ * rest of the procedure. This triggers hub_disconnect
+ * which will set the hub's maxchild to 0, further
+ * resulting in usb_hub_to_struct_hub returning NULL.
+ */
+ if (hub) {
+ port_dev = hub->ports[udev->portnum - 1];
+ WRITE_ONCE(port_dev->state, udev->state);
+ sysfs_notify_dirent(port_dev->state_kn);
+ }
}
}
@@ -2388,17 +2398,25 @@ static int usb_enumerate_device_otg(struct usb_device *udev)
}
} else if (desc->bLength == sizeof
(struct usb_otg_descriptor)) {
- /* Set a_alt_hnp_support for legacy otg device */
- err = usb_control_msg(udev,
- usb_sndctrlpipe(udev, 0),
- USB_REQ_SET_FEATURE, 0,
- USB_DEVICE_A_ALT_HNP_SUPPORT,
- 0, NULL, 0,
- USB_CTRL_SET_TIMEOUT);
- if (err < 0)
- dev_err(&udev->dev,
- "set a_alt_hnp_support failed: %d\n",
- err);
+ /*
+ * We are operating on a legacy OTP device
+ * These should be told that they are operating
+ * on the wrong port if we have another port that does
+ * support HNP
+ */
+ if (bus->otg_port != 0) {
+ /* Set a_alt_hnp_support for legacy otg device */
+ err = usb_control_msg(udev,
+ usb_sndctrlpipe(udev, 0),
+ USB_REQ_SET_FEATURE, 0,
+ USB_DEVICE_A_ALT_HNP_SUPPORT,
+ 0, NULL, 0,
+ USB_CTRL_SET_TIMEOUT);
+ if (err < 0)
+ dev_err(&udev->dev,
+ "set a_alt_hnp_support failed: %d\n",
+ err);
+ }
}
}
#endif
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index c628c1abc907..4d63496f98b6 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -573,7 +573,7 @@ static int match_location(struct usb_device *peer_hdev, void *p)
struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev);
struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent);
- if (!peer_hub)
+ if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED)
return 0;
hcd = bus_to_hcd(hdev->bus);
@@ -584,7 +584,8 @@ static int match_location(struct usb_device *peer_hdev, void *p)
for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) {
peer = peer_hub->ports[port1 - 1];
- if (peer && peer->location == port_dev->location) {
+ if (peer && peer->connect_type != USB_PORT_NOT_USED &&
+ peer->location == port_dev->location) {
link_peers_report(port_dev, peer);
return 1; /* done */
}
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index e3eea965e57b..e120611a5174 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -376,7 +376,6 @@
/* Global HWPARAMS4 Register */
#define DWC3_GHWPARAMS4_HIBER_SCRATCHBUFS(n) (((n) & (0x0f << 13)) >> 13)
#define DWC3_MAX_HIBER_SCRATCHBUFS 15
-#define DWC3_EXT_BUFF_CONTROL BIT(21)
/* Global HWPARAMS6 Register */
#define DWC3_GHWPARAMS6_BCSUPPORT BIT(14)
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 6604845c397c..39564e17f3b0 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -51,6 +51,8 @@
#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1
#define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f
#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e
+#define PCI_DEVICE_ID_INTEL_ARLH 0x7ec1
+#define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e
#define PCI_DEVICE_ID_INTEL_TGL 0x9a15
#define PCI_DEVICE_ID_AMD_MR 0x163a
@@ -421,6 +423,8 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
{ PCI_DEVICE_DATA(INTEL, MTLP, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, MTL, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) },
+ { PCI_DEVICE_DATA(INTEL, ARLH, &dwc3_pci_intel_swnode) },
+ { PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(AMD, NL_USB, &dwc3_pci_amd_swnode) },
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 019368f8e9c4..28f49400f3e8 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -673,12 +673,6 @@ static int dwc3_gadget_set_ep_config(struct dwc3_ep *dep, unsigned int action)
params.param1 |= DWC3_DEPCFG_BINTERVAL_M1(bInterval_m1);
}
- if (dep->endpoint.fifo_mode) {
- if (!(dwc->hwparams.hwparams4 & DWC3_EXT_BUFF_CONTROL))
- return -EINVAL;
- params.param1 |= DWC3_DEPCFG_EBC_HWO_NOWB | DWC3_DEPCFG_USE_EBC;
- }
-
return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETEPCONFIG, &params);
}
@@ -2656,6 +2650,11 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
int ret;
spin_lock_irqsave(&dwc->lock, flags);
+ if (!dwc->pullups_connected) {
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return 0;
+ }
+
dwc->connected = false;
/*
@@ -4709,15 +4708,13 @@ int dwc3_gadget_suspend(struct dwc3 *dwc)
unsigned long flags;
int ret;
- if (!dwc->gadget_driver)
- return 0;
-
ret = dwc3_gadget_soft_disconnect(dwc);
if (ret)
goto err;
spin_lock_irqsave(&dwc->lock, flags);
- dwc3_disconnect_gadget(dwc);
+ if (dwc->gadget_driver)
+ dwc3_disconnect_gadget(dwc);
spin_unlock_irqrestore(&dwc->lock, flags);
return 0;
diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h
index fd7a4e94397e..55a56cf67d73 100644
--- a/drivers/usb/dwc3/gadget.h
+++ b/drivers/usb/dwc3/gadget.h
@@ -26,8 +26,6 @@ struct dwc3;
#define DWC3_DEPCFG_XFER_NOT_READY_EN BIT(10)
#define DWC3_DEPCFG_FIFO_ERROR_EN BIT(11)
#define DWC3_DEPCFG_STREAM_EVENT_EN BIT(13)
-#define DWC3_DEPCFG_EBC_HWO_NOWB BIT(14)
-#define DWC3_DEPCFG_USE_EBC BIT(15)
#define DWC3_DEPCFG_BINTERVAL_M1(n) (((n) & 0xff) << 16)
#define DWC3_DEPCFG_STREAM_CAPABLE BIT(24)
#define DWC3_DEPCFG_EP_NUMBER(n) (((n) & 0x1f) << 25)
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index 61f57fe5bb78..43230915323c 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -61,7 +61,7 @@ out:
int dwc3_host_init(struct dwc3 *dwc)
{
- struct property_entry props[4];
+ struct property_entry props[5];
struct platform_device *xhci;
int ret, irq;
int prop_idx = 0;
@@ -89,6 +89,8 @@ int dwc3_host_init(struct dwc3 *dwc)
memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props));
+ props[prop_idx++] = PROPERTY_ENTRY_BOOL("xhci-sg-trb-cache-size-quirk");
+
if (dwc->usb3_lpm_capable)
props[prop_idx++] = PROPERTY_ENTRY_BOOL("usb3-lpm-capable");
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 722a3ab2b337..c265a1f62fc1 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -545,21 +545,37 @@ static int start_transfer(struct fsg_dev *fsg, struct usb_ep *ep,
static bool start_in_transfer(struct fsg_common *common, struct fsg_buffhd *bh)
{
+ int rc;
+
if (!fsg_is_set(common))
return false;
bh->state = BUF_STATE_SENDING;
- if (start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq))
+ rc = start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq);
+ if (rc) {
bh->state = BUF_STATE_EMPTY;
+ if (rc == -ESHUTDOWN) {
+ common->running = 0;
+ return false;
+ }
+ }
return true;
}
static bool start_out_transfer(struct fsg_common *common, struct fsg_buffhd *bh)
{
+ int rc;
+
if (!fsg_is_set(common))
return false;
bh->state = BUF_STATE_RECEIVING;
- if (start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq))
+ rc = start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq);
+ if (rc) {
bh->state = BUF_STATE_FULL;
+ if (rc == -ESHUTDOWN) {
+ common->running = 0;
+ return false;
+ }
+ }
return true;
}
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index a1575a0ca568..28f4e6552e84 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -105,8 +105,8 @@ static inline struct f_ncm *func_to_ncm(struct usb_function *f)
/*
* Although max mtu as dictated by u_ether is 15412 bytes, setting
- * max_segment_sizeto 15426 would not be efficient. If user chooses segment
- * size to be (>= 8192), then we can't aggregate more than one buffer in each
+ * max_segment_size to 15426 would not be efficient. If user chooses segment
+ * size to be (>= 8192), then we can't aggregate more than one buffer in each
* NTB (assuming each packet coming from network layer is >= 8192 bytes) as ep
* maxpacket limit is 16384. So let max_segment_size be limited to 8000 to allow
* at least 2 packets to be aggregated reducing wastage of NTB buffer space
@@ -1338,7 +1338,15 @@ parse_ntb:
"Parsed NTB with %d frames\n", dgram_counter);
to_process -= block_len;
- if (to_process != 0) {
+
+ /*
+ * Windows NCM driver avoids USB ZLPs by adding a 1-byte
+ * zero pad as needed.
+ */
+ if (to_process == 1 &&
+ (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) {
+ to_process--;
+ } else if ((to_process > 0) && (block_len != 0)) {
ntb_ptr = (unsigned char *)(ntb_ptr + block_len);
goto parse_ntb;
}
@@ -1489,7 +1497,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
ncm_data_intf.bInterfaceNumber = status;
ncm_union_desc.bSlaveInterface0 = status;
- ecm_desc.wMaxSegmentSize = ncm_opts->max_segment_size;
+ ecm_desc.wMaxSegmentSize = cpu_to_le16(ncm_opts->max_segment_size);
status = -ENODEV;
@@ -1685,7 +1693,7 @@ static struct usb_function_instance *ncm_alloc_inst(void)
kfree(opts);
return ERR_CAST(net);
}
- opts->max_segment_size = cpu_to_le16(ETH_FRAME_LEN);
+ opts->max_segment_size = ETH_FRAME_LEN;
INIT_LIST_HEAD(&opts->ncm_os_desc.ext_prop);
descs[0] = &opts->ncm_os_desc;
diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index 10c5d7f726a1..f90eeecf27de 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2036,7 +2036,8 @@ static irqreturn_t omap_udc_iso_irq(int irq, void *_dev)
static inline int machine_without_vbus_sense(void)
{
- return machine_is_omap_osk() || machine_is_sx1();
+ return machine_is_omap_osk() || machine_is_omap_palmte() ||
+ machine_is_sx1();
}
static int omap_udc_start(struct usb_gadget *g,
diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
index 4f8617210d85..169f72665739 100644
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -274,7 +274,6 @@ struct pch_udc_cfg_data {
* @td_data: for data request
* @dev: reference to device struct
* @offset_addr: offset address of ep register
- * @desc: for this ep
* @queue: queue for requests
* @num: endpoint number
* @in: endpoint is IN
diff --git a/drivers/usb/host/uhci-grlib.c b/drivers/usb/host/uhci-grlib.c
index ac3fc5970315..cfebb833668e 100644
--- a/drivers/usb/host/uhci-grlib.c
+++ b/drivers/usb/host/uhci-grlib.c
@@ -22,6 +22,7 @@
#include <linux/of_irq.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
static int uhci_grlib_init(struct usb_hcd *hcd)
{
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 4460fa7e9fab..a7716202a8dd 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1861,14 +1861,14 @@ void xhci_remove_secondary_interrupter(struct usb_hcd *hcd, struct xhci_interrup
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
unsigned int intr_num;
+ spin_lock_irq(&xhci->lock);
+
/* interrupter 0 is primary interrupter, don't touch it */
- if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters)
+ if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters) {
xhci_dbg(xhci, "Invalid secondary interrupter, can't remove\n");
-
- /* fixme, should we check xhci->interrupter[intr_num] == ir */
- /* fixme locking */
-
- spin_lock_irq(&xhci->lock);
+ spin_unlock_irq(&xhci->lock);
+ return;
+ }
intr_num = ir->intr_num;
@@ -2322,7 +2322,7 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir,
u64 erst_base;
u32 erst_size;
- if (intr_num > xhci->max_interrupters) {
+ if (intr_num >= xhci->max_interrupters) {
xhci_warn(xhci, "Can't add interrupter %d, max interrupters %d\n",
intr_num, xhci->max_interrupters);
return -EINVAL;
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index f04fde19f551..3d071b875308 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -253,6 +253,9 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s
if (device_property_read_bool(tmpdev, "quirk-broken-port-ped"))
xhci->quirks |= XHCI_BROKEN_PORT_PED;
+ if (device_property_read_bool(tmpdev, "xhci-sg-trb-cache-size-quirk"))
+ xhci->quirks |= XHCI_SG_TRB_CACHE_SIZE_QUIRK;
+
device_property_read_u32(tmpdev, "imod-interval-ns",
&xhci->imod_interval);
}
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 33806ae966f9..4f64b814d4aa 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -326,7 +326,13 @@ static unsigned int xhci_ring_expansion_needed(struct xhci_hcd *xhci, struct xhc
/* how many trbs will be queued past the enqueue segment? */
trbs_past_seg = enq_used + num_trbs - (TRBS_PER_SEGMENT - 1);
- if (trbs_past_seg <= 0)
+ /*
+ * Consider expanding the ring already if num_trbs fills the current
+ * segment (i.e. trbs_past_seg == 0), not only when num_trbs goes into
+ * the next segment. Avoids confusing full ring with special empty ring
+ * case below
+ */
+ if (trbs_past_seg < 0)
return 0;
/* Empty ring special case, enqueue stuck on link trb while dequeue advanced */
@@ -2376,6 +2382,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
/* handle completion code */
switch (trb_comp_code) {
case COMP_SUCCESS:
+ /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */
+ if (td->error_mid_td)
+ break;
if (remaining) {
frame->status = short_framestatus;
if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
@@ -2391,9 +2400,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
case COMP_BANDWIDTH_OVERRUN_ERROR:
frame->status = -ECOMM;
break;
- case COMP_ISOCH_BUFFER_OVERRUN:
case COMP_BABBLE_DETECTED_ERROR:
+ sum_trbs_for_length = true;
+ fallthrough;
+ case COMP_ISOCH_BUFFER_OVERRUN:
frame->status = -EOVERFLOW;
+ if (ep_trb != td->last_trb)
+ td->error_mid_td = true;
break;
case COMP_INCOMPATIBLE_DEVICE_ERROR:
case COMP_STALL_ERROR:
@@ -2401,8 +2414,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
break;
case COMP_USB_TRANSACTION_ERROR:
frame->status = -EPROTO;
+ sum_trbs_for_length = true;
if (ep_trb != td->last_trb)
- return 0;
+ td->error_mid_td = true;
break;
case COMP_STOPPED:
sum_trbs_for_length = true;
@@ -2422,6 +2436,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
break;
}
+ if (td->urb_length_set)
+ goto finish_td;
+
if (sum_trbs_for_length)
frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) +
ep_trb_len - remaining;
@@ -2430,6 +2447,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
td->urb->actual_length += frame->actual_length;
+finish_td:
+ /* Don't give back TD yet if we encountered an error mid TD */
+ if (td->error_mid_td && ep_trb != td->last_trb) {
+ xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n");
+ td->urb_length_set = true;
+ return 0;
+ }
+
return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
}
@@ -2808,17 +2833,51 @@ static int handle_tx_event(struct xhci_hcd *xhci,
}
if (!ep_seg) {
- if (!ep->skip ||
- !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
- /* Some host controllers give a spurious
- * successful event after a short transfer.
- * Ignore it.
- */
- if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
- ep_ring->last_td_was_short) {
- ep_ring->last_td_was_short = false;
- goto cleanup;
+
+ if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
+ skip_isoc_td(xhci, td, ep, status);
+ goto cleanup;
+ }
+
+ /*
+ * Some hosts give a spurious success event after a short
+ * transfer. Ignore it.
+ */
+ if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
+ ep_ring->last_td_was_short) {
+ ep_ring->last_td_was_short = false;
+ goto cleanup;
+ }
+
+ /*
+ * xhci 4.10.2 states isoc endpoints should continue
+ * processing the next TD if there was an error mid TD.
+ * So host like NEC don't generate an event for the last
+ * isoc TRB even if the IOC flag is set.
+ * xhci 4.9.1 states that if there are errors in mult-TRB
+ * TDs xHC should generate an error for that TRB, and if xHC
+ * proceeds to the next TD it should genete an event for
+ * any TRB with IOC flag on the way. Other host follow this.
+ * So this event might be for the next TD.
+ */
+ if (td->error_mid_td &&
+ !list_is_last(&td->td_list, &ep_ring->td_list)) {
+ struct xhci_td *td_next = list_next_entry(td, td_list);
+
+ ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb,
+ td_next->last_trb, ep_trb_dma, false);
+ if (ep_seg) {
+ /* give back previous TD, start handling new */
+ xhci_dbg(xhci, "Missing TD completion event after mid TD error\n");
+ ep_ring->dequeue = td->last_trb;
+ ep_ring->deq_seg = td->last_trb_seg;
+ inc_deq(xhci, ep_ring);
+ xhci_td_cleanup(xhci, td, ep_ring, td->status);
+ td = td_next;
}
+ }
+
+ if (!ep_seg) {
/* HC is busted, give up! */
xhci_err(xhci,
"ERROR Transfer event TRB DMA ptr not "
@@ -2830,9 +2889,6 @@ static int handle_tx_event(struct xhci_hcd *xhci,
ep_trb_dma, true);
return -ESHUTDOWN;
}
-
- skip_isoc_td(xhci, td, ep, status);
- goto cleanup;
}
if (trb_comp_code == COMP_SHORT_PACKET)
ep_ring->last_td_was_short = true;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index a5c72a634e6a..6f82d404883f 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1549,6 +1549,7 @@ struct xhci_td {
struct xhci_segment *bounce_seg;
/* actual_length of the URB has already been set */
bool urb_length_set;
+ bool error_mid_td;
unsigned int num_trbs;
};
diff --git a/drivers/usb/isp1760/isp1760-hcd.c b/drivers/usb/isp1760/isp1760-hcd.c
index 76862ba40f35..0e5e4cb74c87 100644
--- a/drivers/usb/isp1760/isp1760-hcd.c
+++ b/drivers/usb/isp1760/isp1760-hcd.c
@@ -2521,21 +2521,19 @@ static const struct hc_driver isp1760_hc_driver = {
int __init isp1760_init_kmem_once(void)
{
urb_listitem_cachep = kmem_cache_create("isp1760_urb_listitem",
- sizeof(struct urb_listitem), 0, SLAB_TEMPORARY |
- SLAB_MEM_SPREAD, NULL);
+ sizeof(struct urb_listitem), 0, SLAB_TEMPORARY, NULL);
if (!urb_listitem_cachep)
return -ENOMEM;
qtd_cachep = kmem_cache_create("isp1760_qtd",
- sizeof(struct isp1760_qtd), 0, SLAB_TEMPORARY |
- SLAB_MEM_SPREAD, NULL);
+ sizeof(struct isp1760_qtd), 0, SLAB_TEMPORARY, NULL);
if (!qtd_cachep)
goto destroy_urb_listitem;
qh_cachep = kmem_cache_create("isp1760_qh", sizeof(struct isp1760_qh),
- 0, SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
+ 0, SLAB_TEMPORARY, NULL);
if (!qh_cachep)
goto destroy_qtd;
diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c
index ae41578bd014..70165dd86b5d 100644
--- a/drivers/usb/roles/class.c
+++ b/drivers/usb/roles/class.c
@@ -21,7 +21,9 @@ static const struct class role_class = {
struct usb_role_switch {
struct device dev;
struct mutex lock; /* device lock*/
+ struct module *module; /* the module this device depends on */
enum usb_role role;
+ bool registered;
/* From descriptor */
struct device *usb2_port;
@@ -48,6 +50,9 @@ int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role)
if (IS_ERR_OR_NULL(sw))
return 0;
+ if (!sw->registered)
+ return -EOPNOTSUPP;
+
mutex_lock(&sw->lock);
ret = sw->set(sw, role);
@@ -73,7 +78,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw)
{
enum usb_role role;
- if (IS_ERR_OR_NULL(sw))
+ if (IS_ERR_OR_NULL(sw) || !sw->registered)
return USB_ROLE_NONE;
mutex_lock(&sw->lock);
@@ -135,7 +140,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev)
usb_role_switch_match);
if (!IS_ERR_OR_NULL(sw))
- WARN_ON(!try_module_get(sw->dev.parent->driver->owner));
+ WARN_ON(!try_module_get(sw->module));
return sw;
}
@@ -157,7 +162,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode)
sw = fwnode_connection_find_match(fwnode, "usb-role-switch",
NULL, usb_role_switch_match);
if (!IS_ERR_OR_NULL(sw))
- WARN_ON(!try_module_get(sw->dev.parent->driver->owner));
+ WARN_ON(!try_module_get(sw->module));
return sw;
}
@@ -172,7 +177,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get);
void usb_role_switch_put(struct usb_role_switch *sw)
{
if (!IS_ERR_OR_NULL(sw)) {
- module_put(sw->dev.parent->driver->owner);
+ module_put(sw->module);
put_device(&sw->dev);
}
}
@@ -189,15 +194,18 @@ struct usb_role_switch *
usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode)
{
struct device *dev;
+ struct usb_role_switch *sw = NULL;
if (!fwnode)
return NULL;
dev = class_find_device_by_fwnode(&role_class, fwnode);
- if (dev)
- WARN_ON(!try_module_get(dev->parent->driver->owner));
+ if (dev) {
+ sw = to_role_switch(dev);
+ WARN_ON(!try_module_get(sw->module));
+ }
- return dev ? to_role_switch(dev) : NULL;
+ return sw;
}
EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode);
@@ -338,6 +346,7 @@ usb_role_switch_register(struct device *parent,
sw->set = desc->set;
sw->get = desc->get;
+ sw->module = parent->driver->owner;
sw->dev.parent = parent;
sw->dev.fwnode = desc->fwnode;
sw->dev.class = &role_class;
@@ -352,6 +361,8 @@ usb_role_switch_register(struct device *parent,
return ERR_PTR(ret);
}
+ sw->registered = true;
+
/* TODO: Symlinks for the host port and the device controller. */
return sw;
@@ -366,8 +377,10 @@ EXPORT_SYMBOL_GPL(usb_role_switch_register);
*/
void usb_role_switch_unregister(struct usb_role_switch *sw)
{
- if (!IS_ERR_OR_NULL(sw))
+ if (!IS_ERR_OR_NULL(sw)) {
+ sw->registered = false;
device_unregister(&sw->dev);
+ }
}
EXPORT_SYMBOL_GPL(usb_role_switch_unregister);
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 1e61fe043171..923e0ed85444 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -146,6 +146,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
{ USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */
{ USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */
+ { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */
{ USB_DEVICE(0x10C4, 0x8856) }, /* CEL EM357 ZigBee USB Stick - LR */
{ USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */
{ USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 72390dbf0769..2ae124c49d44 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -2269,6 +2269,7 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */
.driver_info = RSVD(4) },
{ USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index b1e844bf31f8..703a9c563557 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -184,6 +184,8 @@ static const struct usb_device_id id_table[] = {
{DEVICE_SWI(0x413c, 0x81d0)}, /* Dell Wireless 5819 */
{DEVICE_SWI(0x413c, 0x81d1)}, /* Dell Wireless 5818 */
{DEVICE_SWI(0x413c, 0x81d2)}, /* Dell Wireless 5818 */
+ {DEVICE_SWI(0x413c, 0x8217)}, /* Dell Wireless DW5826e */
+ {DEVICE_SWI(0x413c, 0x8218)}, /* Dell Wireless DW5826e QDL */
/* Huawei devices */
{DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */
diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c
index 4e0eef1440b7..300aeef160e7 100644
--- a/drivers/usb/storage/isd200.c
+++ b/drivers/usb/storage/isd200.c
@@ -1105,7 +1105,7 @@ static void isd200_dump_driveid(struct us_data *us, u16 *id)
static int isd200_get_inquiry_data( struct us_data *us )
{
struct isd200_info *info = (struct isd200_info *)us->extra;
- int retStatus = ISD200_GOOD;
+ int retStatus;
u16 *id = info->id;
usb_stor_dbg(us, "Entering isd200_get_inquiry_data\n");
@@ -1137,6 +1137,13 @@ static int isd200_get_inquiry_data( struct us_data *us )
isd200_fix_driveid(id);
isd200_dump_driveid(us, id);
+ /* Prevent division by 0 in isd200_scsi_to_ata() */
+ if (id[ATA_ID_HEADS] == 0 || id[ATA_ID_SECTORS] == 0) {
+ usb_stor_dbg(us, " Invalid ATA Identify data\n");
+ retStatus = ISD200_ERROR;
+ goto Done;
+ }
+
memset(&info->InquiryData, 0, sizeof(info->InquiryData));
/* Standard IDE interface only supports disks */
@@ -1202,6 +1209,7 @@ static int isd200_get_inquiry_data( struct us_data *us )
}
}
+ Done:
usb_stor_dbg(us, "Leaving isd200_get_inquiry_data %08X\n", retStatus);
return(retStatus);
@@ -1481,22 +1489,27 @@ static int isd200_init_info(struct us_data *us)
static int isd200_Initialization(struct us_data *us)
{
+ int rc = 0;
+
usb_stor_dbg(us, "ISD200 Initialization...\n");
/* Initialize ISD200 info struct */
- if (isd200_init_info(us) == ISD200_ERROR) {
+ if (isd200_init_info(us) < 0) {
usb_stor_dbg(us, "ERROR Initializing ISD200 Info struct\n");
+ rc = -ENOMEM;
} else {
/* Get device specific data */
- if (isd200_get_inquiry_data(us) != ISD200_GOOD)
+ if (isd200_get_inquiry_data(us) != ISD200_GOOD) {
usb_stor_dbg(us, "ISD200 Initialization Failure\n");
- else
+ rc = -EINVAL;
+ } else {
usb_stor_dbg(us, "ISD200 Initialization complete\n");
+ }
}
- return 0;
+ return rc;
}
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index c54e9805da53..12cf9940e5b6 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -180,6 +180,13 @@ static int slave_configure(struct scsi_device *sdev)
sdev->use_192_bytes_for_3f = 1;
/*
+ * Some devices report generic values until the media has been
+ * accessed. Force a READ(10) prior to querying device
+ * characteristics.
+ */
+ sdev->read_before_ms = 1;
+
+ /*
* Some devices don't like MODE SENSE with page=0x3f,
* which is the command used for checking if a device
* is write-protected. Now that we tell the sd driver
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 9707f53cfda9..71ace274761f 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -879,6 +879,13 @@ static int uas_slave_configure(struct scsi_device *sdev)
sdev->guess_capacity = 1;
/*
+ * Some devices report generic values until the media has been
+ * accessed. Force a READ(10) prior to querying device
+ * characteristics.
+ */
+ sdev->read_before_ms = 1;
+
+ /*
* Some devices don't like MODE SENSE with page=0x3f,
* which is the command used for checking if a device
* is write-protected. Now that we tell the sd driver
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index f81bec0c7b86..f8ea3054be54 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -559,16 +559,21 @@ static ssize_t hpd_show(struct device *dev, struct device_attribute *attr, char
}
static DEVICE_ATTR_RO(hpd);
-static struct attribute *dp_altmode_attrs[] = {
+static struct attribute *displayport_attrs[] = {
&dev_attr_configuration.attr,
&dev_attr_pin_assignment.attr,
&dev_attr_hpd.attr,
NULL
};
-static const struct attribute_group dp_altmode_group = {
+static const struct attribute_group displayport_group = {
.name = "displayport",
- .attrs = dp_altmode_attrs,
+ .attrs = displayport_attrs,
+};
+
+static const struct attribute_group *displayport_groups[] = {
+ &displayport_group,
+ NULL,
};
int dp_altmode_probe(struct typec_altmode *alt)
@@ -576,7 +581,6 @@ int dp_altmode_probe(struct typec_altmode *alt)
const struct typec_altmode *port = typec_altmode_get_partner(alt);
struct fwnode_handle *fwnode;
struct dp_altmode *dp;
- int ret;
/* FIXME: Port can only be DFP_U. */
@@ -587,10 +591,6 @@ int dp_altmode_probe(struct typec_altmode *alt)
DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo)))
return -ENODEV;
- ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group);
- if (ret)
- return ret;
-
dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL);
if (!dp)
return -ENOMEM;
@@ -624,7 +624,6 @@ void dp_altmode_remove(struct typec_altmode *alt)
{
struct dp_altmode *dp = typec_altmode_get_drvdata(alt);
- sysfs_remove_group(&alt->dev.kobj, &dp_altmode_group);
cancel_work_sync(&dp->work);
if (dp->connector_fwnode) {
@@ -649,6 +648,7 @@ static struct typec_altmode_driver dp_altmode_driver = {
.driver = {
.name = "typec_displayport",
.owner = THIS_MODULE,
+ .dev_groups = displayport_groups,
},
};
module_typec_altmode_driver(dp_altmode_driver);
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 5945e3a2b0f7..096597231027 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -3743,9 +3743,6 @@ static void tcpm_detach(struct tcpm_port *port)
if (tcpm_port_is_disconnected(port))
port->hard_reset_count = 0;
- port->try_src_count = 0;
- port->try_snk_count = 0;
-
if (!port->attached)
return;
@@ -4876,8 +4873,11 @@ static void run_state_machine(struct tcpm_port *port)
break;
case PORT_RESET:
tcpm_reset_port(port);
- tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ?
- TYPEC_CC_RD : tcpm_rp_cc(port));
+ if (port->self_powered)
+ tcpm_set_cc(port, TYPEC_CC_OPEN);
+ else
+ tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ?
+ TYPEC_CC_RD : tcpm_rp_cc(port));
tcpm_set_state(port, PORT_RESET_WAIT_OFF,
PD_T_ERROR_RECOVERY);
break;
@@ -6848,7 +6848,8 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
if (err)
goto out_role_sw_put;
- port->typec_caps.pd = port->pds[0];
+ if (port->pds)
+ port->typec_caps.pd = port->pds[0];
port->typec_port = typec_register_port(port->dev, &port->typec_caps);
if (IS_ERR(port->typec_port)) {
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 5392ec698959..14f5a7bfae2e 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -938,7 +938,9 @@ static void ucsi_handle_connector_change(struct work_struct *work)
clear_bit(EVENT_PENDING, &con->ucsi->flags);
+ mutex_lock(&ucsi->ppm_lock);
ret = ucsi_acknowledge_connector_change(ucsi);
+ mutex_unlock(&ucsi->ppm_lock);
if (ret)
dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret);
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index 6bbf490ac401..928eacbeb21a 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -25,6 +25,8 @@ struct ucsi_acpi {
unsigned long flags;
guid_t guid;
u64 cmd;
+ bool dell_quirk_probed;
+ bool dell_quirk_active;
};
static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func)
@@ -73,9 +75,13 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
const void *val, size_t val_len)
{
struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
+ bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI;
int ret;
- set_bit(COMMAND_PENDING, &ua->flags);
+ if (ack)
+ set_bit(ACK_PENDING, &ua->flags);
+ else
+ set_bit(COMMAND_PENDING, &ua->flags);
ret = ucsi_acpi_async_write(ucsi, offset, val, val_len);
if (ret)
@@ -85,7 +91,10 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
ret = -ETIMEDOUT;
out_clear_bit:
- clear_bit(COMMAND_PENDING, &ua->flags);
+ if (ack)
+ clear_bit(ACK_PENDING, &ua->flags);
+ else
+ clear_bit(COMMAND_PENDING, &ua->flags);
return ret;
}
@@ -119,12 +128,73 @@ static const struct ucsi_operations ucsi_zenbook_ops = {
.async_write = ucsi_acpi_async_write
};
-static const struct dmi_system_id zenbook_dmi_id[] = {
+/*
+ * Some Dell laptops expect that an ACK command with the
+ * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate)
+ * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set.
+ * If this is not done events are not delivered to OSPM and
+ * subsequent commands will timeout.
+ */
+static int
+ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset,
+ const void *val, size_t val_len)
+{
+ struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
+ u64 cmd = *(u64 *)val, ack = 0;
+ int ret;
+
+ if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI &&
+ cmd & UCSI_ACK_CONNECTOR_CHANGE)
+ ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE;
+
+ ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len);
+ if (ret != 0)
+ return ret;
+ if (ack == 0)
+ return ret;
+
+ if (!ua->dell_quirk_probed) {
+ ua->dell_quirk_probed = true;
+
+ cmd = UCSI_GET_CAPABILITY;
+ ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd,
+ sizeof(cmd));
+ if (ret == 0)
+ return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL,
+ &ack, sizeof(ack));
+ if (ret != -ETIMEDOUT)
+ return ret;
+
+ ua->dell_quirk_active = true;
+ dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n");
+ dev_err(ua->dev, "Firmware bug: Enabling workaround\n");
+ }
+
+ if (!ua->dell_quirk_active)
+ return ret;
+
+ return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack));
+}
+
+static const struct ucsi_operations ucsi_dell_ops = {
+ .read = ucsi_acpi_read,
+ .sync_write = ucsi_dell_sync_write,
+ .async_write = ucsi_acpi_async_write
+};
+
+static const struct dmi_system_id ucsi_acpi_quirks[] = {
{
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"),
},
+ .driver_data = (void *)&ucsi_zenbook_ops,
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ },
+ .driver_data = (void *)&ucsi_dell_ops,
},
{ }
};
@@ -142,8 +212,10 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data)
if (UCSI_CCI_CONNECTOR(cci))
ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci));
- if (test_bit(COMMAND_PENDING, &ua->flags) &&
- cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE))
+ if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags))
+ complete(&ua->complete);
+ if (cci & UCSI_CCI_COMMAND_COMPLETE &&
+ test_bit(COMMAND_PENDING, &ua->flags))
complete(&ua->complete);
}
@@ -151,6 +223,7 @@ static int ucsi_acpi_probe(struct platform_device *pdev)
{
struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
const struct ucsi_operations *ops = &ucsi_acpi_ops;
+ const struct dmi_system_id *id;
struct ucsi_acpi *ua;
struct resource *res;
acpi_status status;
@@ -180,8 +253,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev)
init_completion(&ua->complete);
ua->dev = &pdev->dev;
- if (dmi_check_system(zenbook_dmi_id))
- ops = &ucsi_zenbook_ops;
+ id = dmi_first_match(ucsi_acpi_quirks);
+ if (id)
+ ops = id->driver_data;
ua->ucsi = ucsi_create(&pdev->dev, ops);
if (IS_ERR(ua->ucsi))
diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
index 53a7ede8556d..faccc942b381 100644
--- a/drivers/usb/typec/ucsi/ucsi_glink.c
+++ b/drivers/usb/typec/ucsi/ucsi_glink.c
@@ -301,6 +301,7 @@ static const struct of_device_id pmic_glink_ucsi_of_quirks[] = {
{ .compatible = "qcom,sc8180x-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{ .compatible = "qcom,sc8280xp-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{ .compatible = "qcom,sm8350-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
+ { .compatible = "qcom,sm8550-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{}
};
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f2ed7167c848..4b2fcb228a0a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -141,10 +141,8 @@ struct vhost_net {
unsigned tx_zcopy_err;
/* Flush in progress. Protected by tx vq lock. */
bool tx_flush;
- /* Private page frag */
- struct page_frag page_frag;
- /* Refcount bias of page frag */
- int refcnt_bias;
+ /* Private page frag cache */
+ struct page_frag_cache pf_cache;
};
static unsigned vhost_net_zcopy_mask __read_mostly;
@@ -655,41 +653,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len)
!vhost_vq_avail_empty(vq->dev, vq);
}
-static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz,
- struct page_frag *pfrag, gfp_t gfp)
-{
- if (pfrag->page) {
- if (pfrag->offset + sz <= pfrag->size)
- return true;
- __page_frag_cache_drain(pfrag->page, net->refcnt_bias);
- }
-
- pfrag->offset = 0;
- net->refcnt_bias = 0;
- if (SKB_FRAG_PAGE_ORDER) {
- /* Avoid direct reclaim but allow kswapd to wake */
- pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
- __GFP_COMP | __GFP_NOWARN |
- __GFP_NORETRY,
- SKB_FRAG_PAGE_ORDER);
- if (likely(pfrag->page)) {
- pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
- goto done;
- }
- }
- pfrag->page = alloc_page(gfp);
- if (likely(pfrag->page)) {
- pfrag->size = PAGE_SIZE;
- goto done;
- }
- return false;
-
-done:
- net->refcnt_bias = USHRT_MAX;
- page_ref_add(pfrag->page, USHRT_MAX - 1);
- return true;
-}
-
#define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
@@ -699,7 +662,6 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
struct vhost_net *net = container_of(vq->dev, struct vhost_net,
dev);
struct socket *sock = vhost_vq_get_backend(vq);
- struct page_frag *alloc_frag = &net->page_frag;
struct virtio_net_hdr *gso;
struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp];
struct tun_xdp_hdr *hdr;
@@ -710,6 +672,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
int sock_hlen = nvq->sock_hlen;
void *buf;
int copied;
+ int ret;
if (unlikely(len < nvq->sock_hlen))
return -EFAULT;
@@ -719,18 +682,17 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
return -ENOSPC;
buflen += SKB_DATA_ALIGN(len + pad);
- alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
- if (unlikely(!vhost_net_page_frag_refill(net, buflen,
- alloc_frag, GFP_KERNEL)))
+ buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL,
+ SMP_CACHE_BYTES);
+ if (unlikely(!buf))
return -ENOMEM;
- buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
- copied = copy_page_from_iter(alloc_frag->page,
- alloc_frag->offset +
- offsetof(struct tun_xdp_hdr, gso),
- sock_hlen, from);
- if (copied != sock_hlen)
- return -EFAULT;
+ copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso),
+ sock_hlen, from);
+ if (copied != sock_hlen) {
+ ret = -EFAULT;
+ goto err;
+ }
hdr = buf;
gso = &hdr->gso;
@@ -743,27 +705,30 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
vhost16_to_cpu(vq, gso->csum_start) +
vhost16_to_cpu(vq, gso->csum_offset) + 2);
- if (vhost16_to_cpu(vq, gso->hdr_len) > len)
- return -EINVAL;
+ if (vhost16_to_cpu(vq, gso->hdr_len) > len) {
+ ret = -EINVAL;
+ goto err;
+ }
}
len -= sock_hlen;
- copied = copy_page_from_iter(alloc_frag->page,
- alloc_frag->offset + pad,
- len, from);
- if (copied != len)
- return -EFAULT;
+ copied = copy_from_iter(buf + pad, len, from);
+ if (copied != len) {
+ ret = -EFAULT;
+ goto err;
+ }
xdp_init_buff(xdp, buflen, NULL);
xdp_prepare_buff(xdp, buf, pad, len, true);
hdr->buflen = buflen;
- --net->refcnt_bias;
- alloc_frag->offset += buflen;
-
++nvq->batched_xdp;
return 0;
+
+err:
+ page_frag_free(buf);
+ return ret;
}
static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
@@ -1353,8 +1318,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
vqs[VHOST_NET_VQ_RX]);
f->private_data = n;
- n->page_frag.page = NULL;
- n->refcnt_bias = 0;
+ n->pf_cache.va = NULL;
return 0;
}
@@ -1422,8 +1386,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue);
kfree(n->vqs[VHOST_NET_VQ_TX].xdp);
kfree(n->dev.vqs);
- if (n->page_frag.page)
- __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias);
+ page_frag_cache_drain(&n->pf_cache);
kvfree(n);
return 0;
}
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 63af6ab034b5..46823c2e2ba1 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -631,8 +631,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info,
if (logo_lines > vc->vc_bottom) {
logo_shown = FBCON_LOGO_CANSHOW;
- printk(KERN_INFO
- "fbcon_init: disable boot-logo (boot-logo bigger than screen).\n");
+ pr_info("fbcon: disable boot-logo (boot-logo bigger than screen).\n");
} else {
logo_shown = FBCON_LOGO_DRAW;
vc->vc_top = logo_lines;
@@ -2400,11 +2399,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
struct fbcon_ops *ops = info->fbcon_par;
struct fbcon_display *p = &fb_display[vc->vc_num];
int resize, ret, old_userfont, old_width, old_height, old_charcount;
- char *old_data = NULL;
+ u8 *old_data = vc->vc_font.data;
resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
- if (p->userfont)
- old_data = vc->vc_font.data;
vc->vc_font.data = (void *)(p->fontdata = data);
old_userfont = p->userfont;
if ((p->userfont = userfont))
@@ -2438,13 +2435,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
update_screen(vc);
}
- if (old_data && (--REFCOUNT(old_data) == 0))
+ if (old_userfont && (--REFCOUNT(old_data) == 0))
kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
return 0;
err_out:
p->fontdata = old_data;
- vc->vc_font.data = (void *)old_data;
+ vc->vc_font.data = old_data;
if (userfont) {
p->userfont = old_userfont;
diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
index c26ee6fd73c9..8fdccf033b2d 100644
--- a/drivers/video/fbdev/hyperv_fb.c
+++ b/drivers/video/fbdev/hyperv_fb.c
@@ -1010,8 +1010,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
goto getmem_done;
}
pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n");
- } else {
- goto err1;
}
/*
diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c
index dddd6afcb972..ebc9aeffdde7 100644
--- a/drivers/video/fbdev/savage/savagefb_driver.c
+++ b/drivers/video/fbdev/savage/savagefb_driver.c
@@ -869,6 +869,9 @@ static int savagefb_check_var(struct fb_var_screeninfo *var,
DBG("savagefb_check_var");
+ if (!var->pixclock)
+ return -EINVAL;
+
var->transp.offset = 0;
var->transp.length = 0;
switch (var->bits_per_pixel) {
diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c
index 803ccb6aa479..009bf1d92644 100644
--- a/drivers/video/fbdev/sis/sis_main.c
+++ b/drivers/video/fbdev/sis/sis_main.c
@@ -1444,6 +1444,8 @@ sisfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
vtotal = var->upper_margin + var->lower_margin + var->vsync_len;
+ if (!var->pixclock)
+ return -EINVAL;
pixclock = var->pixclock;
if((var->vmode & FB_VMODE_MASK) == FB_VMODE_NONINTERLACED) {
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 2de0e675fd15..8e5bac27542d 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -1158,7 +1158,7 @@ stifb_init_display(struct stifb_info *fb)
}
break;
}
- stifb_blank(0, (struct fb_info *)fb); /* 0=enable screen */
+ stifb_blank(0, fb->info); /* 0=enable screen */
SETUP_FB(fb);
}
diff --git a/drivers/video/fbdev/vt8500lcdfb.c b/drivers/video/fbdev/vt8500lcdfb.c
index 42c25dc85197..ac73937073a7 100644
--- a/drivers/video/fbdev/vt8500lcdfb.c
+++ b/drivers/video/fbdev/vt8500lcdfb.c
@@ -374,7 +374,6 @@ static int vt8500lcd_probe(struct platform_device *pdev)
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
- dev_err(&pdev->dev, "no IRQ defined\n");
ret = -ENODEV;
goto failed_free_palette;
}
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 7d22051b15a2..d78fe7137799 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -512,7 +512,6 @@ config S3C2410_WATCHDOG
tristate "S3C6410/S5Pv210/Exynos Watchdog"
depends on ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
select WATCHDOG_CORE
- select MFD_SYSCON if ARCH_EXYNOS
help
Watchdog timer block in the Samsung S3C64xx, S5Pv210 and Exynos
SoCs. This will reboot the system when the timer expires with
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 349d30462c8c..686cf544d0ae 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -24,9 +24,9 @@
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/of.h>
-#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
#include <linux/delay.h>
+#include <linux/soc/samsung/exynos-pmu.h>
#define S3C2410_WTCON 0x00
#define S3C2410_WTDAT 0x04
@@ -699,11 +699,11 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
return ret;
if (wdt->drv_data->quirks & QUIRKS_HAVE_PMUREG) {
- wdt->pmureg = syscon_regmap_lookup_by_phandle(dev->of_node,
- "samsung,syscon-phandle");
+ wdt->pmureg = exynos_get_pmu_regmap_by_phandle(dev->of_node,
+ "samsung,syscon-phandle");
if (IS_ERR(wdt->pmureg))
return dev_err_probe(dev, PTR_ERR(wdt->pmureg),
- "syscon regmap lookup failed.\n");
+ "PMU regmap lookup failed.\n");
}
wdt_irq = platform_get_irq(pdev, 0);
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index b8cfea7812d6..2faa4bf78c7a 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -923,8 +923,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
do_mask(info, EVT_MASK_REASON_EXPLICIT);
- xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
static void enable_pirq(struct irq_data *data)
@@ -956,6 +956,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
{
evtchn_port_t evtchn;
+ bool close_evtchn = false;
if (!info) {
xen_irq_free_desc(irq);
@@ -975,7 +976,7 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
struct xenbus_device *dev;
if (!info->is_static)
- xen_evtchn_close(evtchn);
+ close_evtchn = true;
switch (info->type) {
case IRQT_VIRQ:
@@ -995,6 +996,9 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
}
xen_irq_info_cleanup(info);
+
+ if (close_evtchn)
+ xen_evtchn_close(evtchn);
}
xen_free_irq(info);
@@ -2216,7 +2220,7 @@ static __init void xen_alloc_callback_vector(void)
return;
pr_info("Xen HVM callback vector for event delivery is enabled\n");
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
+ sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback);
}
#else
void xen_setup_callback_vector(void) {}
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 26ffb8755ffb..f93f73ecefee 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -317,7 +317,7 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
rc = -EFAULT;
goto out_free;
}
- if (copy_to_user(arg->gref_ids, gref_ids,
+ if (copy_to_user(arg->gref_ids_flex, gref_ids,
sizeof(gref_ids[0]) * op.count)) {
rc = -EFAULT;
goto out_free;
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
index 508655273145..c63f317e3df3 100644
--- a/drivers/xen/pcpu.c
+++ b/drivers/xen/pcpu.c
@@ -65,7 +65,7 @@ struct pcpu {
uint32_t flags;
};
-static struct bus_type xen_pcpu_subsys = {
+static const struct bus_type xen_pcpu_subsys = {
.name = "xen_cpu",
.dev_name = "xen_cpu",
};
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 35b6e306026a..67dfa4778864 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -1223,18 +1223,13 @@ struct privcmd_kernel_ioreq *alloc_ioreq(struct privcmd_ioeventfd *ioeventfd)
kioreq->ioreq = (struct ioreq *)(page_to_virt(pages[0]));
mmap_write_unlock(mm);
- size = sizeof(*ports) * kioreq->vcpus;
- ports = kzalloc(size, GFP_KERNEL);
- if (!ports) {
- ret = -ENOMEM;
+ ports = memdup_array_user(u64_to_user_ptr(ioeventfd->ports),
+ kioreq->vcpus, sizeof(*ports));
+ if (IS_ERR(ports)) {
+ ret = PTR_ERR(ports);
goto error_kfree;
}
- if (copy_from_user(ports, u64_to_user_ptr(ioeventfd->ports), size)) {
- ret = -EFAULT;
- goto error_kfree_ports;
- }
-
for (i = 0; i < kioreq->vcpus; i++) {
kioreq->ports[i].vcpu = i;
kioreq->ports[i].port = ports[i];
@@ -1256,7 +1251,7 @@ struct privcmd_kernel_ioreq *alloc_ioreq(struct privcmd_ioeventfd *ioeventfd)
error_unbind:
while (--i >= 0)
unbind_from_irqhandler(irq_from_evtchn(ports[i]), &kioreq->ports[i]);
-error_kfree_ports:
+
kfree(ports);
error_kfree:
kfree(kioreq);
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 8cd583db20b1..b293d7652f15 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -237,7 +237,7 @@ static const struct attribute_group *balloon_groups[] = {
NULL
};
-static struct bus_type balloon_subsys = {
+static const struct bus_type balloon_subsys = {
.name = BALLOON_CLASS_NAME,
.dev_name = BALLOON_CLASS_NAME,
};
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 32835b4b9bc5..51b3124b0d56 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -116,14 +116,15 @@ EXPORT_SYMBOL_GPL(xenbus_strstate);
* @dev: xenbus device
* @path: path to watch
* @watch: watch to register
+ * @will_handle: events queuing determine callback
* @callback: callback to register
*
* Register a @watch on the given path, using the given xenbus_watch structure
- * for storage, and the given @callback function as the callback. On success,
- * the given @path will be saved as @watch->node, and remains the
- * caller's to free. On error, @watch->node will
- * be NULL, the device will switch to %XenbusStateClosing, and the error will
- * be saved in the store.
+ * for storage, @will_handle function as the callback to determine if each
+ * event need to be queued, and the given @callback function as the callback.
+ * On success, the given @path will be saved as @watch->node, and remains the
+ * caller's to free. On error, @watch->node will be NULL, the device will
+ * switch to %XenbusStateClosing, and the error will be saved in the store.
*
* Returns: %0 on success or -errno on error
*/
@@ -158,11 +159,13 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path);
* xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
* @dev: xenbus device
* @watch: watch to register
+ * @will_handle: events queuing determine callback
* @callback: callback to register
* @pathfmt: format of path to watch
*
* Register a watch on the given @path, using the given xenbus_watch
- * structure for storage, and the given @callback function as the
+ * structure for storage, @will_handle function as the callback to determine if
+ * each event need to be queued, and the given @callback function as the
* callback. On success, the watched path (@path/@path2) will be saved
* as @watch->node, and becomes the caller's to kfree().
* On error, watch->node will be NULL, so the caller has nothing to
diff --git a/drivers/zorro/zorro-driver.c b/drivers/zorro/zorro-driver.c
index 025edfccedcf..f49d19977e82 100644
--- a/drivers/zorro/zorro-driver.c
+++ b/drivers/zorro/zorro-driver.c
@@ -150,7 +150,7 @@ static int zorro_uevent(const struct device *dev, struct kobj_uevent_env *env)
return 0;
}
-struct bus_type zorro_bus_type = {
+const struct bus_type zorro_bus_type = {
.name = "zorro",
.dev_name = "zorro",
.dev_groups = zorro_device_attribute_groups,
diff --git a/drivers/zorro/zorro.h b/drivers/zorro/zorro.h
index f84df9fb4c20..df44e35203fd 100644
--- a/drivers/zorro/zorro.h
+++ b/drivers/zorro/zorro.h
@@ -4,7 +4,7 @@
* Zorro bus
*/
-extern struct bus_type zorro_bus_type;
+extern const struct bus_type zorro_bus_type;
#ifdef CONFIG_ZORRO_NAMES
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 61dbe52bb3a3..281a1ed03a04 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -637,7 +637,7 @@ static int v9fs_init_inode_cache(void)
v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache",
sizeof(struct v9fs_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
v9fs_inode_init_once);
if (!v9fs_inode_cache)
return -ENOMEM;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index bae330c2f0cf..abdbbaee5184 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -107,7 +107,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
- if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+ if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->c.flc_type != F_UNLCK) {
filemap_write_and_wait(inode->i_mapping);
invalidate_mapping_pages(&inode->i_data, 0, -1);
}
@@ -121,13 +121,12 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
struct p9_fid *fid;
uint8_t status = P9_LOCK_ERROR;
int res = 0;
- unsigned char fl_type;
struct v9fs_session_info *v9ses;
fid = filp->private_data;
BUG_ON(fid == NULL);
- BUG_ON((fl->fl_flags & FL_POSIX) != FL_POSIX);
+ BUG_ON((fl->c.flc_flags & FL_POSIX) != FL_POSIX);
res = locks_lock_file_wait(filp, fl);
if (res < 0)
@@ -136,7 +135,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
/* convert posix lock to p9 tlock args */
memset(&flock, 0, sizeof(flock));
/* map the lock type */
- switch (fl->fl_type) {
+ switch (fl->c.flc_type) {
case F_RDLCK:
flock.type = P9_LOCK_TYPE_RDLCK;
break;
@@ -152,7 +151,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
flock.length = 0;
else
flock.length = fl->fl_end - fl->fl_start + 1;
- flock.proc_id = fl->fl_pid;
+ flock.proc_id = fl->c.flc_pid;
flock.client_id = fid->clnt->name;
if (IS_SETLKW(cmd))
flock.flags = P9_LOCK_FLAGS_BLOCK;
@@ -207,12 +206,13 @@ out_unlock:
* incase server returned error for lock request, revert
* it locally
*/
- if (res < 0 && fl->fl_type != F_UNLCK) {
- fl_type = fl->fl_type;
- fl->fl_type = F_UNLCK;
+ if (res < 0 && fl->c.flc_type != F_UNLCK) {
+ unsigned char type = fl->c.flc_type;
+
+ fl->c.flc_type = F_UNLCK;
/* Even if this fails we want to return the remote error */
locks_lock_file_wait(filp, fl);
- fl->fl_type = fl_type;
+ fl->c.flc_type = type;
}
if (flock.client_id != fid->clnt->name)
kfree(flock.client_id);
@@ -234,7 +234,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
* if we have a conflicting lock locally, no need to validate
* with server
*/
- if (fl->fl_type != F_UNLCK)
+ if (fl->c.flc_type != F_UNLCK)
return res;
/* convert posix lock to p9 tgetlock args */
@@ -245,7 +245,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
glock.length = 0;
else
glock.length = fl->fl_end - fl->fl_start + 1;
- glock.proc_id = fl->fl_pid;
+ glock.proc_id = fl->c.flc_pid;
glock.client_id = fid->clnt->name;
res = p9_client_getlock_dotl(fid, &glock);
@@ -254,13 +254,13 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
/* map 9p lock type to os lock type */
switch (glock.type) {
case P9_LOCK_TYPE_RDLCK:
- fl->fl_type = F_RDLCK;
+ fl->c.flc_type = F_RDLCK;
break;
case P9_LOCK_TYPE_WRLCK:
- fl->fl_type = F_WRLCK;
+ fl->c.flc_type = F_WRLCK;
break;
case P9_LOCK_TYPE_UNLCK:
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
break;
}
if (glock.type != P9_LOCK_TYPE_UNLCK) {
@@ -269,7 +269,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
fl->fl_end = OFFSET_MAX;
else
fl->fl_end = glock.start + glock.length - 1;
- fl->fl_pid = -glock.proc_id;
+ fl->c.flc_pid = -glock.proc_id;
}
out:
if (glock.client_id != fid->clnt->name)
@@ -293,7 +293,7 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n",
filp, cmd, fl, filp);
- if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+ if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->c.flc_type != F_UNLCK) {
filemap_write_and_wait(inode->i_mapping);
invalidate_mapping_pages(&inode->i_data, 0, -1);
}
@@ -324,16 +324,16 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd,
p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n",
filp, cmd, fl, filp);
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
goto out_err;
- if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+ if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->c.flc_type != F_UNLCK) {
filemap_write_and_wait(inode->i_mapping);
invalidate_mapping_pages(&inode->i_data, 0, -1);
}
/* Convert flock to posix lock */
- fl->fl_flags |= FL_POSIX;
- fl->fl_flags ^= FL_FLOCK;
+ fl->c.flc_flags |= FL_POSIX;
+ fl->c.flc_flags ^= FL_FLOCK;
if (IS_SETLK(cmd) | IS_SETLKW(cmd))
ret = v9fs_file_do_lock(filp, cmd, fl);
diff --git a/fs/Kconfig b/fs/Kconfig
index 89fdbefd1075..4bc7dd420874 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -162,7 +162,6 @@ menu "DOS/FAT/EXFAT/NT Filesystems"
source "fs/fat/Kconfig"
source "fs/exfat/Kconfig"
-source "fs/ntfs/Kconfig"
source "fs/ntfs3/Kconfig"
endmenu
@@ -174,6 +173,13 @@ source "fs/proc/Kconfig"
source "fs/kernfs/Kconfig"
source "fs/sysfs/Kconfig"
+config FS_PID
+ bool "Pseudo filesystem for process file descriptors"
+ depends on 64BIT
+ default y
+ help
+ Pidfs implements advanced features for process file descriptors.
+
config TMPFS
bool "Tmpfs virtual memory file system support (former shm fs)"
depends on SHMEM
diff --git a/fs/Makefile b/fs/Makefile
index c09016257f05..6ecc9b0a53f2 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -15,7 +15,7 @@ obj-y := open.o read_write.o file_table.o super.o \
pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
fs_types.o fs_context.o fs_parser.o fsopen.o init.o \
- kernel_read_file.o mnt_idmapping.o remap_range.o
+ kernel_read_file.o mnt_idmapping.o remap_range.o pidfs.o
obj-$(CONFIG_BUFFER_HEAD) += buffer.o mpage.o
obj-$(CONFIG_PROC_FS) += proc_namespace.o
@@ -91,7 +91,6 @@ obj-y += unicode/
obj-$(CONFIG_SYSV_FS) += sysv/
obj-$(CONFIG_SMBFS) += smb/
obj-$(CONFIG_HPFS_FS) += hpfs/
-obj-$(CONFIG_NTFS_FS) += ntfs/
obj-$(CONFIG_NTFS3_FS) += ntfs3/
obj-$(CONFIG_UFS_FS) += ufs/
obj-$(CONFIG_EFS_FS) += efs/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index e8bfc38239cd..9354b14bbfe3 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -249,7 +249,7 @@ static int __init init_inodecache(void)
adfs_inode_cachep = kmem_cache_create("adfs_inode_cache",
sizeof(struct adfs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (adfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 60685ec76d98..2e612834329a 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -105,6 +105,7 @@ struct affs_sb_info {
int work_queued; /* non-zero delayed work is queued */
struct delayed_work sb_work; /* superblock flush delayed work */
spinlock_t work_lock; /* protects sb_work and work_queued */
+ struct rcu_head rcu;
};
#define AFFS_MOUNT_SF_INTL 0x0001 /* International filesystem. */
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 58b391446ae1..3c5821339609 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -130,8 +130,7 @@ static int __init init_inodecache(void)
{
affs_inode_cachep = kmem_cache_create("affs_inode_cache",
sizeof(struct affs_inode_info),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ 0, (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT),
init_once);
if (affs_inode_cachep == NULL)
return -ENOMEM;
@@ -640,7 +639,7 @@ static void affs_kill_sb(struct super_block *sb)
affs_brelse(sbi->s_root_bh);
kfree(sbi->s_prefix);
mutex_destroy(&sbi->s_bmlock);
- kfree(sbi);
+ kfree_rcu(sbi, rcu);
}
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index c14533ef108f..8a67fc427e74 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -124,7 +124,7 @@ static void afs_dir_read_cleanup(struct afs_read *req)
if (xas_retry(&xas, folio))
continue;
BUG_ON(xa_is_value(folio));
- ASSERTCMP(folio_file_mapping(folio), ==, mapping);
+ ASSERTCMP(folio->mapping, ==, mapping);
folio_put(folio);
}
@@ -202,12 +202,12 @@ static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
if (xas_retry(&xas, folio))
continue;
- BUG_ON(folio_file_mapping(folio) != mapping);
+ BUG_ON(folio->mapping != mapping);
size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio));
for (offset = 0; offset < size; offset += sizeof(*block)) {
block = kmap_local_folio(folio, offset);
- pr_warn("[%02lx] %32phN\n", folio_index(folio) + offset, block);
+ pr_warn("[%02lx] %32phN\n", folio->index + offset, block);
kunmap_local(block);
}
}
@@ -233,7 +233,7 @@ static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
if (xas_retry(&xas, folio))
continue;
- BUG_ON(folio_file_mapping(folio) != mapping);
+ BUG_ON(folio->mapping != mapping);
if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) {
afs_dir_dump(dvnode, req);
@@ -474,6 +474,16 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
continue;
}
+ /* Don't expose silly rename entries to userspace. */
+ if (nlen > 6 &&
+ dire->u.name[0] == '.' &&
+ ctx->actor != afs_lookup_filldir &&
+ ctx->actor != afs_lookup_one_filldir &&
+ memcmp(dire->u.name, ".__afs", 6) == 0) {
+ ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ continue;
+ }
+
/* found the next entry */
if (!dir_emit(ctx, dire->u.name, nlen,
ntohl(dire->u.vnode),
@@ -708,6 +718,8 @@ static void afs_do_lookup_success(struct afs_operation *op)
break;
}
+ if (vp->scb.status.abort_code)
+ trace_afs_bulkstat_error(op, &vp->fid, i, vp->scb.status.abort_code);
if (!vp->scb.have_status && !vp->scb.have_error)
continue;
@@ -897,12 +909,16 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
afs_begin_vnode_operation(op);
afs_wait_for_operation(op);
}
- inode = ERR_PTR(afs_op_error(op));
out_op:
if (!afs_op_error(op)) {
- inode = &op->file[1].vnode->netfs.inode;
- op->file[1].vnode = NULL;
+ if (op->file[1].scb.status.abort_code) {
+ afs_op_accumulate_error(op, -ECONNABORTED,
+ op->file[1].scb.status.abort_code);
+ } else {
+ inode = &op->file[1].vnode->netfs.inode;
+ op->file[1].vnode = NULL;
+ }
}
if (op->file[0].scb.have_status)
@@ -2022,7 +2038,7 @@ static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags)
{
struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
- _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio_index(folio));
+ _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio->index);
folio_detach_private(folio);
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index d3bc4a2d7085..c4d2711e20ad 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -258,16 +258,7 @@ const struct inode_operations afs_dynroot_inode_operations = {
.lookup = afs_dynroot_lookup,
};
-/*
- * Dirs in the dynamic root don't need revalidation.
- */
-static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags)
-{
- return 1;
-}
-
const struct dentry_operations afs_dynroot_dentry_operations = {
- .d_revalidate = afs_dynroot_d_revalidate,
.d_delete = always_delete_dentry,
.d_release = afs_d_release,
.d_automount = afs_d_automount,
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 3d33b221d9ca..ef2cc8f565d2 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -417,13 +417,17 @@ static void afs_add_open_mmap(struct afs_vnode *vnode)
static void afs_drop_open_mmap(struct afs_vnode *vnode)
{
- if (!atomic_dec_and_test(&vnode->cb_nr_mmap))
+ if (atomic_add_unless(&vnode->cb_nr_mmap, -1, 1))
return;
down_write(&vnode->volume->open_mmaps_lock);
- if (atomic_read(&vnode->cb_nr_mmap) == 0)
+ read_seqlock_excl(&vnode->cb_lock);
+ // the only place where ->cb_nr_mmap may hit 0
+ // see __afs_break_callback() for the other side...
+ if (atomic_dec_and_test(&vnode->cb_nr_mmap))
list_del_init(&vnode->cb_mmap_link);
+ read_sequnlock_excl(&vnode->cb_lock);
up_write(&vnode->volume->open_mmaps_lock);
flush_work(&vnode->cb_work);
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 9c6dea3139f5..f0e96a35093f 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -93,13 +93,13 @@ static void afs_grant_locks(struct afs_vnode *vnode)
bool exclusive = (vnode->lock_type == AFS_LOCK_WRITE);
list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
- if (!exclusive && p->fl_type == F_WRLCK)
+ if (!exclusive && lock_is_write(p))
continue;
list_move_tail(&p->fl_u.afs.link, &vnode->granted_locks);
p->fl_u.afs.state = AFS_LOCK_GRANTED;
trace_afs_flock_op(vnode, p, afs_flock_op_grant);
- wake_up(&p->fl_wait);
+ locks_wake_up(p);
}
}
@@ -112,25 +112,24 @@ static void afs_next_locker(struct afs_vnode *vnode, int error)
{
struct file_lock *p, *_p, *next = NULL;
struct key *key = vnode->lock_key;
- unsigned int fl_type = F_RDLCK;
+ unsigned int type = F_RDLCK;
_enter("");
if (vnode->lock_type == AFS_LOCK_WRITE)
- fl_type = F_WRLCK;
+ type = F_WRLCK;
list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
if (error &&
- p->fl_type == fl_type &&
- afs_file_key(p->fl_file) == key) {
+ p->c.flc_type == type &&
+ afs_file_key(p->c.flc_file) == key) {
list_del_init(&p->fl_u.afs.link);
p->fl_u.afs.state = error;
- wake_up(&p->fl_wait);
+ locks_wake_up(p);
}
/* Select the next locker to hand off to. */
- if (next &&
- (next->fl_type == F_WRLCK || p->fl_type == F_RDLCK))
+ if (next && (lock_is_write(next) || lock_is_read(p)))
continue;
next = p;
}
@@ -142,7 +141,7 @@ static void afs_next_locker(struct afs_vnode *vnode, int error)
afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
next->fl_u.afs.state = AFS_LOCK_YOUR_TRY;
trace_afs_flock_op(vnode, next, afs_flock_op_wake);
- wake_up(&next->fl_wait);
+ locks_wake_up(next);
} else {
afs_set_lock_state(vnode, AFS_VNODE_LOCK_NONE);
trace_afs_flock_ev(vnode, NULL, afs_flock_no_lockers, 0);
@@ -166,7 +165,7 @@ static void afs_kill_lockers_enoent(struct afs_vnode *vnode)
struct file_lock, fl_u.afs.link);
list_del_init(&p->fl_u.afs.link);
p->fl_u.afs.state = -ENOENT;
- wake_up(&p->fl_wait);
+ locks_wake_up(p);
}
key_put(vnode->lock_key);
@@ -464,14 +463,14 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
_enter("{%llx:%llu},%llu-%llu,%u,%u",
vnode->fid.vid, vnode->fid.vnode,
- fl->fl_start, fl->fl_end, fl->fl_type, mode);
+ fl->fl_start, fl->fl_end, fl->c.flc_type, mode);
fl->fl_ops = &afs_lock_ops;
INIT_LIST_HEAD(&fl->fl_u.afs.link);
fl->fl_u.afs.state = AFS_LOCK_PENDING;
partial = (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX);
- type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+ type = lock_is_read(fl) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
if (mode == afs_flock_mode_write && partial)
type = AFS_LOCK_WRITE;
@@ -524,7 +523,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
}
if (vnode->lock_state == AFS_VNODE_LOCK_NONE &&
- !(fl->fl_flags & FL_SLEEP)) {
+ !(fl->c.flc_flags & FL_SLEEP)) {
ret = -EAGAIN;
if (type == AFS_LOCK_READ) {
if (vnode->status.lock_count == -1)
@@ -621,7 +620,7 @@ skip_server_lock:
return 0;
lock_is_contended:
- if (!(fl->fl_flags & FL_SLEEP)) {
+ if (!(fl->c.flc_flags & FL_SLEEP)) {
list_del_init(&fl->fl_u.afs.link);
afs_next_locker(vnode, 0);
ret = -EAGAIN;
@@ -641,7 +640,7 @@ need_to_wait:
spin_unlock(&vnode->lock);
trace_afs_flock_ev(vnode, fl, afs_flock_waiting, 0);
- ret = wait_event_interruptible(fl->fl_wait,
+ ret = wait_event_interruptible(fl->c.flc_wait,
fl->fl_u.afs.state != AFS_LOCK_PENDING);
trace_afs_flock_ev(vnode, fl, afs_flock_waited, ret);
@@ -704,7 +703,8 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
int ret;
- _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+ _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode,
+ fl->c.flc_type);
trace_afs_flock_op(vnode, fl, afs_flock_op_unlock);
@@ -730,11 +730,11 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
if (vnode->lock_state == AFS_VNODE_LOCK_DELETED)
return -ENOENT;
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
/* check local lock records first */
posix_test_lock(file, fl);
- if (fl->fl_type == F_UNLCK) {
+ if (lock_is_unlock(fl)) {
/* no local locks; consult the server */
ret = afs_fetch_status(vnode, key, false, NULL);
if (ret < 0)
@@ -743,18 +743,18 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
lock_count = READ_ONCE(vnode->status.lock_count);
if (lock_count != 0) {
if (lock_count > 0)
- fl->fl_type = F_RDLCK;
+ fl->c.flc_type = F_RDLCK;
else
- fl->fl_type = F_WRLCK;
+ fl->c.flc_type = F_WRLCK;
fl->fl_start = 0;
fl->fl_end = OFFSET_MAX;
- fl->fl_pid = 0;
+ fl->c.flc_pid = 0;
}
}
ret = 0;
error:
- _leave(" = %d [%hd]", ret, fl->fl_type);
+ _leave(" = %d [%hd]", ret, fl->c.flc_type);
return ret;
}
@@ -769,7 +769,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
_enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
vnode->fid.vid, vnode->fid.vnode, cmd,
- fl->fl_type, fl->fl_flags,
+ fl->c.flc_type, fl->c.flc_flags,
(long long) fl->fl_start, (long long) fl->fl_end);
if (IS_GETLK(cmd))
@@ -778,7 +778,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
trace_afs_flock_op(vnode, fl, afs_flock_op_lock);
- if (fl->fl_type == F_UNLCK)
+ if (lock_is_unlock(fl))
ret = afs_do_unlk(file, fl);
else
ret = afs_do_setlk(file, fl);
@@ -804,7 +804,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
_enter("{%llx:%llu},%d,{t=%x,fl=%x}",
vnode->fid.vid, vnode->fid.vnode, cmd,
- fl->fl_type, fl->fl_flags);
+ fl->c.flc_type, fl->c.flc_flags);
/*
* No BSD flocks over NFS allowed.
@@ -813,14 +813,14 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
* Not sure whether that would be unique, though, or whether
* that would break in other places.
*/
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
return -ENOLCK;
fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
trace_afs_flock_op(vnode, fl, afs_flock_op_flock);
/* we're simulating flock() locks using posix locks on the server */
- if (fl->fl_type == F_UNLCK)
+ if (lock_is_unlock(fl))
ret = afs_do_unlk(file, fl);
else
ret = afs_do_setlk(file, fl);
@@ -843,7 +843,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
*/
static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->fl_file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->c.flc_file));
_enter("");
@@ -861,7 +861,7 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
*/
static void afs_fl_release_private(struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->fl_file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->c.flc_file));
_enter("");
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 9c03fcf7ffaa..6ce5a612937c 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -321,8 +321,7 @@ struct afs_net {
struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */
struct hlist_head fs_proc; /* procfs servers list */
- struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */
- struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */
+ struct hlist_head fs_addresses; /* afs_server (by lowest IPv6 addr) */
seqlock_t fs_addr_lock; /* For fs_addresses[46] */
struct work_struct fs_manager;
@@ -561,8 +560,7 @@ struct afs_server {
struct afs_server __rcu *uuid_next; /* Next server with same UUID */
struct afs_server *uuid_prev; /* Previous server with same UUID */
struct list_head probe_link; /* Link in net->fs_probe_list */
- struct hlist_node addr4_link; /* Link in net->fs_addresses4 */
- struct hlist_node addr6_link; /* Link in net->fs_addresses6 */
+ struct hlist_node addr_link; /* Link in net->fs_addresses6 */
struct hlist_node proc_link; /* Link in net->fs_proc */
struct list_head volumes; /* RCU list of afs_server_entry objects */
struct afs_server *gc_next; /* Next server in manager's list */
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 1b3bd21c168a..a14f6013e316 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -90,8 +90,7 @@ static int __net_init afs_net_init(struct net *net_ns)
INIT_LIST_HEAD(&net->fs_probe_slow);
INIT_HLIST_HEAD(&net->fs_proc);
- INIT_HLIST_HEAD(&net->fs_addresses4);
- INIT_HLIST_HEAD(&net->fs_addresses6);
+ INIT_HLIST_HEAD(&net->fs_addresses);
seqlock_init(&net->fs_addr_lock);
INIT_WORK(&net->fs_manager, afs_manage_servers);
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 3bd02571f30d..15eab053af6d 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -166,7 +166,7 @@ static int afs_proc_addr_prefs_show(struct seq_file *m, void *v)
if (!preflist) {
seq_puts(m, "NO PREFS\n");
- return 0;
+ goto out;
}
seq_printf(m, "PROT SUBNET PRIOR (v=%u n=%u/%u/%u)\n",
@@ -191,7 +191,8 @@ static int afs_proc_addr_prefs_show(struct seq_file *m, void *v)
}
}
- rcu_read_lock();
+out:
+ rcu_read_unlock();
return 0;
}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index e169121f603e..038f9d0ae3af 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -38,7 +38,7 @@ struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer
seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
- hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
+ hlist_for_each_entry_rcu(server, &net->fs_addresses, addr_link) {
estate = rcu_dereference(server->endpoint_state);
alist = estate->addresses;
for (i = 0; i < alist->nr_addrs; i++)
@@ -177,10 +177,8 @@ added_dup:
* bit, but anything we might want to do gets messy and memory
* intensive.
*/
- if (alist->nr_ipv4 > 0)
- hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
- if (alist->nr_addrs > alist->nr_ipv4)
- hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
+ if (alist->nr_addrs > 0)
+ hlist_add_head_rcu(&server->addr_link, &net->fs_addresses);
write_sequnlock(&net->fs_addr_lock);
@@ -511,10 +509,8 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
list_del(&server->probe_link);
hlist_del_rcu(&server->proc_link);
- if (!hlist_unhashed(&server->addr4_link))
- hlist_del_rcu(&server->addr4_link);
- if (!hlist_unhashed(&server->addr6_link))
- hlist_del_rcu(&server->addr6_link);
+ if (!hlist_unhashed(&server->addr_link))
+ hlist_del_rcu(&server->addr_link);
}
write_sequnlock(&net->fs_lock);
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 020ecd45e476..af3a3f57c1b3 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -353,7 +353,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
{
struct afs_server_list *new, *old, *discard;
struct afs_vldb_entry *vldb;
- char idbuf[16];
+ char idbuf[24];
int ret, idsz;
_enter("");
@@ -361,7 +361,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
/* We look up an ID by passing it as a decimal string in the
* operation's name parameter.
*/
- idsz = sprintf(idbuf, "%llu", volume->vid);
+ idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid);
vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
if (IS_ERR(vldb)) {
diff --git a/fs/aio.c b/fs/aio.c
index bb2ff48991f3..9cdaa2faa536 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -589,13 +589,24 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
- struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
- struct kioctx *ctx = req->ki_ctx;
+ struct aio_kiocb *req;
+ struct kioctx *ctx;
unsigned long flags;
+ /*
+ * kiocb didn't come from aio or is neither a read nor a write, hence
+ * ignore it.
+ */
+ if (!(iocb->ki_flags & IOCB_AIO_RW))
+ return;
+
+ req = container_of(iocb, struct aio_kiocb, rw);
+
if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
return;
+ ctx = req->ki_ctx;
+
spin_lock_irqsave(&ctx->ctx_lock, flags);
list_add_tail(&req->ki_list, &ctx->active_reqs);
req->ki_cancel = cancel;
@@ -1509,7 +1520,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
- req->ki_flags = req->ki_filp->f_iocb_flags;
+ req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW;
if (iocb->aio_flags & IOCB_FLAG_RESFD)
req->ki_flags |= IOCB_EVENTFD;
if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
diff --git a/fs/attr.c b/fs/attr.c
index 5a13f0c8495f..960a310581eb 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -16,8 +16,6 @@
#include <linux/fcntl.h>
#include <linux/filelock.h>
#include <linux/security.h>
-#include <linux/evm.h>
-#include <linux/ima.h>
#include "internal.h"
@@ -352,7 +350,7 @@ int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
EXPORT_SYMBOL(may_setattr);
/**
- * notify_change - modify attributes of a filesytem object
+ * notify_change - modify attributes of a filesystem object
* @idmap: idmap of the mount the inode was found from
* @dentry: object affected
* @attr: new attributes
@@ -502,8 +500,7 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
if (!error) {
fsnotify_change(dentry, ia_valid);
- ima_inode_post_setattr(idmap, dentry);
- evm_inode_post_setattr(dentry, ia_valid);
+ security_inode_post_setattr(idmap, dentry, ia_valid);
}
return error;
diff --git a/fs/backing-file.c b/fs/backing-file.c
index a681f38d84d8..740185198db3 100644
--- a/fs/backing-file.c
+++ b/fs/backing-file.c
@@ -325,9 +325,7 @@ EXPORT_SYMBOL_GPL(backing_file_mmap);
static int __init backing_aio_init(void)
{
- backing_aio_cachep = kmem_cache_create("backing_aio",
- sizeof(struct backing_aio),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN);
if (!backing_aio_cachep)
return -ENOMEM;
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 10704f2d3af5..fd3e175d8342 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1715,7 +1715,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
* This works without any other locks because this is the only
* thread that removes items from the need_discard tree
*/
- bch2_trans_unlock(trans);
+ bch2_trans_unlock_long(trans);
blkdev_issue_discard(ca->disk_sb.bdev,
k.k->p.offset * ca->mi.bucket_size,
ca->mi.bucket_size,
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index b4dc319bcb2b..569b97904da4 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -68,9 +68,11 @@ void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer
void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
{
- prt_str(out, "bucket=");
- bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
- prt_str(out, " ");
+ if (bch2_dev_exists2(c, k.k->p.inode)) {
+ prt_str(out, "bucket=");
+ bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
+ prt_str(out, " ");
+ }
bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
}
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index b80c6c9efd8c..69d0d60d50e3 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -1249,6 +1249,18 @@ static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c)
return stdio;
}
+static inline unsigned metadata_replicas_required(struct bch_fs *c)
+{
+ return min(c->opts.metadata_replicas,
+ c->opts.metadata_replicas_required);
+}
+
+static inline unsigned data_replicas_required(struct bch_fs *c)
+{
+ return min(c->opts.data_replicas,
+ c->opts.data_replicas_required);
+}
+
#define BKEY_PADDED_ONSTACK(key, pad) \
struct { struct bkey_i key; __u64 key ## _pad[pad]; }
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 5467a8635be1..3ef338df82f5 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2156,7 +2156,9 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
* isn't monotonically increasing before FILTER_SNAPSHOTS, and
* that's what we check against in extents mode:
*/
- if (k.k->p.inode > end.inode)
+ if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
+ ? bkey_gt(k.k->p, end)
+ : k.k->p.inode > end.inode))
goto end;
if (iter->update_path &&
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index bed75c93c069..684397442338 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -92,7 +92,7 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
continue;
bch2_btree_trans_to_text(out, i->trans);
- bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1);
+ bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1, GFP_NOWAIT);
}
}
@@ -227,7 +227,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
prt_printf(&buf, "backtrace:");
prt_newline(&buf);
printbuf_indent_add(&buf, 2);
- bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2);
+ bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT);
printbuf_indent_sub(&buf, 2);
prt_newline(&buf);
}
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 17a5938aa71a..4530b14ff2c3 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -280,7 +280,8 @@ retry:
writepoint_ptr(&c->btree_write_point),
&devs_have,
res->nr_replicas,
- c->opts.metadata_replicas_required,
+ min(res->nr_replicas,
+ c->opts.metadata_replicas_required),
watermark, 0, cl, &wp);
if (unlikely(ret))
return ERR_PTR(ret);
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index cadda9bbe4a4..7bdba8507fc9 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -627,7 +627,7 @@ restart:
prt_printf(&i->buf, "backtrace:");
prt_newline(&i->buf);
printbuf_indent_add(&i->buf, 2);
- bch2_prt_task_backtrace(&i->buf, task, 0);
+ bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL);
printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 73c12e565af5..27710cdd5710 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -303,18 +303,6 @@ void bch2_readahead(struct readahead_control *ractl)
darray_exit(&readpages_iter.folios);
}
-static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio,
- subvol_inum inum, struct folio *folio)
-{
- bch2_folio_create(folio, __GFP_NOFAIL);
-
- rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
- rbio->bio.bi_iter.bi_sector = folio_sector(folio);
- BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
-
- bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0));
-}
-
static void bch2_read_single_folio_end_io(struct bio *bio)
{
complete(bio->bi_private);
@@ -329,6 +317,9 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping)
int ret;
DECLARE_COMPLETION_ONSTACK(done);
+ if (!bch2_folio_create(folio, GFP_KERNEL))
+ return -ENOMEM;
+
bch2_inode_opts_get(&opts, c, &inode->ei_inode);
rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read),
@@ -336,7 +327,11 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping)
rbio->bio.bi_private = &done;
rbio->bio.bi_end_io = bch2_read_single_folio_end_io;
- __bchfs_readfolio(c, rbio, inode_inum(inode), folio);
+ rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
+ rbio->bio.bi_iter.bi_sector = folio_sector(folio);
+ BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
+
+ bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0));
wait_for_completion(&done);
ret = blk_status_to_errno(rbio->bio.bi_status);
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index e3b219e19e10..33cb6da3a5ad 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -88,6 +88,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
return ret;
shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c));
+ if (shorten >= iter->count)
+ shorten = 0;
iter->count -= shorten;
bio = bio_alloc_bioset(NULL,
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index dc52918d06ef..8c70123b6a0c 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -79,7 +79,7 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c,
continue;
bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0,
- REQ_OP_FLUSH,
+ REQ_OP_WRITE|REQ_PREFLUSH,
GFP_KERNEL,
&c->nocow_flush_bioset),
struct nocow_flush, bio);
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 3a4c24c28e7f..3dc8630ff9fe 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -455,6 +455,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
if (IS_ERR(victim))
return PTR_ERR(victim);
+ dir = d_inode(path.dentry);
if (victim->d_sb->s_fs_info != c) {
ret = -EXDEV;
goto err;
@@ -463,14 +464,13 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
ret = -ENOENT;
goto err;
}
- dir = d_inode(path.dentry);
ret = __bch2_unlink(dir, victim, true);
if (!ret) {
fsnotify_rmdir(dir, victim);
d_delete(victim);
}
- inode_unlock(dir);
err:
+ inode_unlock(dir);
dput(victim);
path_put(&path);
return ret;
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index ec419b8e2c43..77ae65542db9 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -435,7 +435,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
bch2_subvol_is_ro(c, inode->ei_subvol) ?:
__bch2_link(c, inode, dir, dentry);
if (unlikely(ret))
- return ret;
+ return bch2_err_class(ret);
ihold(&inode->v);
d_instantiate(dentry, &inode->v);
@@ -487,8 +487,9 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
struct bch_inode_info *dir= to_bch_ei(vdir);
struct bch_fs *c = dir->v.i_sb->s_fs_info;
- return bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+ int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
__bch2_unlink(vdir, dentry, false);
+ return bch2_err_class(ret);
}
static int bch2_symlink(struct mnt_idmap *idmap,
@@ -523,7 +524,7 @@ static int bch2_symlink(struct mnt_idmap *idmap,
return 0;
err:
iput(&inode->v);
- return ret;
+ return bch2_err_class(ret);
}
static int bch2_mkdir(struct mnt_idmap *idmap,
@@ -641,7 +642,7 @@ err:
src_inode,
dst_inode);
- return ret;
+ return bch2_err_class(ret);
}
static void bch2_setattr_copy(struct mnt_idmap *idmap,
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 4f0ecd605675..6a760777bafb 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -119,22 +119,19 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
if (!ret)
*snapshot = iter.pos.snapshot;
err:
- bch_err_msg(trans->c, ret, "fetching inode %llu:%u", inode_nr, *snapshot);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
-static int __lookup_dirent(struct btree_trans *trans,
+static int lookup_dirent_in_snapshot(struct btree_trans *trans,
struct bch_hash_info hash_info,
subvol_inum dir, struct qstr *name,
- u64 *target, unsigned *type)
+ u64 *target, unsigned *type, u32 snapshot)
{
struct btree_iter iter;
struct bkey_s_c_dirent d;
- int ret;
-
- ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc,
- &hash_info, dir, name, 0);
+ int ret = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc,
+ &hash_info, dir, name, 0, snapshot);
if (ret)
return ret;
@@ -225,15 +222,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
struct bch_inode_unpacked root_inode;
struct bch_hash_info root_hash_info;
- ret = lookup_inode(trans, root_inum.inum, &root_inode, &snapshot);
+ u32 root_inode_snapshot = snapshot;
+ ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot);
bch_err_msg(c, ret, "looking up root inode");
if (ret)
return ret;
root_hash_info = bch2_hash_info_init(c, &root_inode);
- ret = __lookup_dirent(trans, root_hash_info, root_inum,
- &lostfound_str, &inum, &d_type);
+ ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum,
+ &lostfound_str, &inum, &d_type, snapshot);
if (bch2_err_matches(ret, ENOENT))
goto create_lostfound;
@@ -250,7 +248,10 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
* The bch2_check_dirents pass has already run, dangling dirents
* shouldn't exist here:
*/
- return lookup_inode(trans, inum, lostfound, &snapshot);
+ ret = lookup_inode(trans, inum, lostfound, &snapshot);
+ bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)",
+ inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot));
+ return ret;
create_lostfound:
/*
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index ef3a53f9045a..2c098ac017b3 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -1564,6 +1564,7 @@ CLOSURE_CALLBACK(bch2_write)
BUG_ON(!op->write_point.v);
BUG_ON(bkey_eq(op->pos, POS_MAX));
+ op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas);
op->start_time = local_clock();
bch2_keylist_init(&op->insert_keys, op->inline_keys);
wbio_init(bio)->put_bio = false;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index d71d26e39521..bc890776eb57 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -233,7 +233,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
prt_str(&pbuf, "entry size: ");
prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data));
prt_newline(&pbuf);
- bch2_prt_task_backtrace(&pbuf, current, 1);
+ bch2_prt_task_backtrace(&pbuf, current, 1, GFP_NOWAIT);
trace_journal_entry_close(c, pbuf.buf);
printbuf_exit(&pbuf);
}
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 04a1e79a5ed3..47805193f18c 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1478,6 +1478,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w)
c->opts.foreground_target;
unsigned i, replicas = 0, replicas_want =
READ_ONCE(c->opts.metadata_replicas);
+ unsigned replicas_need = min_t(unsigned, replicas_want,
+ READ_ONCE(c->opts.metadata_replicas_required));
rcu_read_lock();
retry:
@@ -1526,7 +1528,7 @@ done:
BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
- return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
+ return replicas >= replicas_need ? 0 : -EROFS;
}
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
@@ -1988,7 +1990,8 @@ CLOSURE_CALLBACK(bch2_journal_write)
percpu_ref_get(&ca->io_ref);
bio = ca->journal.bio;
- bio_reset(bio, ca->disk_sb.bdev, REQ_OP_FLUSH);
+ bio_reset(bio, ca->disk_sb.bdev,
+ REQ_OP_WRITE|REQ_PREFLUSH);
bio->bi_end_io = journal_write_endio;
bio->bi_private = ca;
closure_bio_submit(bio, cl);
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 820d25e19e5f..c33dca641575 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -205,7 +205,7 @@ void bch2_journal_space_available(struct journal *j)
j->can_discard = can_discard;
- if (nr_online < c->opts.metadata_replicas_required) {
+ if (nr_online < metadata_replicas_required(c)) {
ret = JOURNAL_ERR_insufficient_devices;
goto out;
}
@@ -892,9 +892,11 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
journal_seq_pin(j, seq)->devs);
seq++;
- spin_unlock(&j->lock);
- ret = bch2_mark_replicas(c, &replicas.e);
- spin_lock(&j->lock);
+ if (replicas.e.nr_devs) {
+ spin_unlock(&j->lock);
+ ret = bch2_mark_replicas(c, &replicas.e);
+ spin_lock(&j->lock);
+ }
}
spin_unlock(&j->lock);
err:
diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h
index b2be565bb8f2..64df11ab422b 100644
--- a/fs/bcachefs/mean_and_variance.h
+++ b/fs/bcachefs/mean_and_variance.h
@@ -17,7 +17,7 @@
* Rust and rustc has issues with u128.
*/
-#if defined(__SIZEOF_INT128__) && defined(__KERNEL__)
+#if defined(__SIZEOF_INT128__) && defined(__KERNEL__) && !defined(CONFIG_PARISC)
typedef struct {
unsigned __int128 v;
diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c
index accf246c3233..b27d22925929 100644
--- a/fs/bcachefs/printbuf.c
+++ b/fs/bcachefs/printbuf.c
@@ -56,6 +56,7 @@ void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list args)
va_copy(args2, args);
len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args2);
+ va_end(args2);
} while (len + 1 >= printbuf_remaining(out) &&
!bch2_printbuf_make_room(out, len + 1));
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 9127d0e3ca2f..21e13bb4335b 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -577,8 +577,9 @@ u64 bch2_recovery_passes_from_stable(u64 v)
static bool check_version_upgrade(struct bch_fs *c)
{
- unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version);
unsigned latest_version = bcachefs_metadata_version_current;
+ unsigned latest_compatible = min(latest_version,
+ bch2_latest_compatible_version(c->sb.version));
unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
unsigned new_version = 0;
@@ -597,7 +598,7 @@ static bool check_version_upgrade(struct bch_fs *c)
new_version = latest_version;
break;
case BCH_VERSION_UPGRADE_none:
- new_version = old_version;
+ new_version = min(old_version, latest_version);
break;
}
}
@@ -774,7 +775,7 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
- if (!(c->opts.nochanges && c->opts.norecovery)) {
+ if (!c->opts.nochanges) {
mutex_lock(&c->sb_lock);
bool write_sb = false;
@@ -804,7 +805,7 @@ int bch2_fs_recovery(struct bch_fs *c)
if (bch2_check_version_downgrade(c)) {
struct printbuf buf = PRINTBUF;
- prt_str(&buf, "Version downgrade required:\n");
+ prt_str(&buf, "Version downgrade required:");
__le64 passes = ext->recovery_passes_required[0];
bch2_sb_set_downgrade(c,
@@ -812,7 +813,7 @@ int bch2_fs_recovery(struct bch_fs *c)
BCH_VERSION_MINOR(c->sb.version));
passes = ext->recovery_passes_required[0] & ~passes;
if (passes) {
- prt_str(&buf, " running recovery passes: ");
+ prt_str(&buf, "\n running recovery passes: ");
prt_bitflags(&buf, bch2_recovery_passes,
bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
}
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index a45354d2acde..eff5ce18c69c 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -421,7 +421,7 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
- m->errors_reset_time = ktime_get_real_seconds();
+ m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds());
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 45f67e8b29eb..ac6ba04d5521 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -728,7 +728,7 @@ static int check_snapshot(struct btree_trans *trans,
return 0;
memset(&s, 0, sizeof(s));
- memcpy(&s, k.v, bkey_val_bytes(k.k));
+ memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k)));
id = le32_to_cpu(s.parent);
if (id) {
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index 89fdb7c21134..fcaa5a888744 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -160,21 +160,16 @@ static inline bool is_visible_key(struct bch_hash_desc desc, subvol_inum inum, s
}
static __always_inline int
-bch2_hash_lookup(struct btree_trans *trans,
+bch2_hash_lookup_in_snapshot(struct btree_trans *trans,
struct btree_iter *iter,
const struct bch_hash_desc desc,
const struct bch_hash_info *info,
subvol_inum inum, const void *key,
- unsigned flags)
+ unsigned flags, u32 snapshot)
{
struct bkey_s_c k;
- u32 snapshot;
int ret;
- ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
- if (ret)
- return ret;
-
for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
SPOS(inum.inum, desc.hash_key(info, key), snapshot),
POS(inum.inum, U64_MAX),
@@ -195,6 +190,19 @@ bch2_hash_lookup(struct btree_trans *trans,
}
static __always_inline int
+bch2_hash_lookup(struct btree_trans *trans,
+ struct btree_iter *iter,
+ const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ subvol_inum inum, const void *key,
+ unsigned flags)
+{
+ u32 snapshot;
+ return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?:
+ bch2_hash_lookup_in_snapshot(trans, iter, desc, info, inum, key, flags, snapshot);
+}
+
+static __always_inline int
bch2_hash_hole(struct btree_trans *trans,
struct btree_iter *iter,
const struct bch_hash_desc desc,
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index d60c7d27a047..bd64eb68e84a 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -142,8 +142,8 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb,
void bch2_free_super(struct bch_sb_handle *sb)
{
kfree(sb->bio);
- if (!IS_ERR_OR_NULL(sb->bdev_handle))
- bdev_release(sb->bdev_handle);
+ if (!IS_ERR_OR_NULL(sb->s_bdev_file))
+ fput(sb->s_bdev_file);
kfree(sb->holder);
kfree(sb->sb_name);
@@ -704,22 +704,22 @@ retry:
if (!opt_get(*opts, nochanges))
sb->mode |= BLK_OPEN_WRITE;
- sb->bdev_handle = bdev_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
- if (IS_ERR(sb->bdev_handle) &&
- PTR_ERR(sb->bdev_handle) == -EACCES &&
+ sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
+ if (IS_ERR(sb->s_bdev_file) &&
+ PTR_ERR(sb->s_bdev_file) == -EACCES &&
opt_get(*opts, read_only)) {
sb->mode &= ~BLK_OPEN_WRITE;
- sb->bdev_handle = bdev_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
- if (!IS_ERR(sb->bdev_handle))
+ sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
+ if (!IS_ERR(sb->s_bdev_file))
opt_set(*opts, nochanges, true);
}
- if (IS_ERR(sb->bdev_handle)) {
- ret = PTR_ERR(sb->bdev_handle);
- goto out;
+ if (IS_ERR(sb->s_bdev_file)) {
+ ret = PTR_ERR(sb->s_bdev_file);
+ goto err;
}
- sb->bdev = sb->bdev_handle->bdev;
+ sb->bdev = file_bdev(sb->s_bdev_file);
ret = bch2_sb_realloc(sb, 0);
if (ret) {
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index b9911402b175..6b23e11825e6 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1428,10 +1428,10 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
? c->opts.metadata_replicas
- : c->opts.metadata_replicas_required,
+ : metadata_replicas_required(c),
!(flags & BCH_FORCE_IF_DATA_DEGRADED)
? c->opts.data_replicas
- : c->opts.data_replicas_required);
+ : data_replicas_required(c));
return nr_rw >= required;
case BCH_MEMBER_STATE_failed:
diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
index 0e5a14fc8e7f..ec784d975f66 100644
--- a/fs/bcachefs/super_types.h
+++ b/fs/bcachefs/super_types.h
@@ -4,7 +4,7 @@
struct bch_sb_handle {
struct bch_sb *sb;
- struct bdev_handle *bdev_handle;
+ struct file *s_bdev_file;
struct block_device *bdev;
char *sb_name;
struct bio *bio;
diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c
index b1c867aa2b58..9220d7de10db 100644
--- a/fs/bcachefs/thread_with_file.c
+++ b/fs/bcachefs/thread_with_file.c
@@ -53,9 +53,9 @@ int bch2_run_thread_with_file(struct thread_with_file *thr,
if (ret)
goto err;
- fd_install(fd, file);
get_task_struct(thr->task);
wake_up_process(thr->task);
+ fd_install(fd, file);
return fd;
err:
if (fd >= 0)
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index a135136adeee..3a32faa86b5c 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -272,14 +272,14 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines)
console_unlock();
}
-int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr)
+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr,
+ gfp_t gfp)
{
#ifdef CONFIG_STACKTRACE
unsigned nr_entries = 0;
- int ret = 0;
stack->nr = 0;
- ret = darray_make_room(stack, 32);
+ int ret = darray_make_room_gfp(stack, 32, gfp);
if (ret)
return ret;
@@ -289,7 +289,7 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne
do {
nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1);
} while (nr_entries == stack->size &&
- !(ret = darray_make_room(stack, stack->size * 2)));
+ !(ret = darray_make_room_gfp(stack, stack->size * 2, gfp)));
stack->nr = nr_entries;
up_read(&task->signal->exec_update_lock);
@@ -308,10 +308,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack)
}
}
-int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr)
+int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp)
{
bch_stacktrace stack = { 0 };
- int ret = bch2_save_backtrace(&stack, task, skipnr + 1);
+ int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp);
bch2_prt_backtrace(out, &stack);
darray_exit(&stack);
@@ -418,14 +418,15 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
bch2_quantiles_update(&stats->quantiles, duration);
}
- if (time_after64(end, stats->last_event)) {
+ if (stats->last_event && time_after64(end, stats->last_event)) {
freq = end - stats->last_event;
mean_and_variance_update(&stats->freq_stats, freq);
mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq);
stats->max_freq = max(stats->max_freq, freq);
stats->min_freq = min(stats->min_freq, freq);
- stats->last_event = end;
}
+
+ stats->last_event = end;
}
static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index df67bf55fe2b..b414736d59a5 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -348,9 +348,9 @@ void bch2_prt_u64_base2(struct printbuf *, u64);
void bch2_print_string_as_lines(const char *prefix, const char *lines);
typedef DARRAY(unsigned long) bch_stacktrace;
-int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned);
+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t);
void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *);
-int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned);
+int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned, gfp_t);
static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev)
{
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 2b4dda047450..d76f406d3b2e 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -435,8 +435,7 @@ befs_init_inodecache(void)
{
befs_inode_cachep = kmem_cache_create_usercopy("befs_inode_cache",
sizeof(struct befs_inode_info), 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
- SLAB_ACCOUNT),
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
offsetof(struct befs_inode_info,
i_data.symlink),
sizeof_field(struct befs_inode_info,
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 355957dbce39..db81570c9637 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -259,7 +259,7 @@ static int __init init_inodecache(void)
bfs_inode_cachep = kmem_cache_create("bfs_inode_cache",
sizeof(struct bfs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (bfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index fefc642541cb..1920ed69279b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -320,7 +320,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
else
executable_stack = EXSTACK_DEFAULT;
- if (stack_size == 0) {
+ if (stack_size == 0 && interp_params.flags & ELF_FDPIC_FLAG_PRESENT) {
stack_size = interp_params.stack_size;
if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
executable_stack = EXSTACK_ENABLE_X;
diff --git a/fs/btrfs/accessors.c b/fs/btrfs/accessors.c
index 1925a0919ca6..79026917db19 100644
--- a/fs/btrfs/accessors.c
+++ b/fs/btrfs/accessors.c
@@ -5,7 +5,8 @@
#include <asm/unaligned.h>
#include "messages.h"
-#include "ctree.h"
+#include "extent_io.h"
+#include "fs.h"
#include "accessors.h"
static bool check_setget_bounds(const struct extent_buffer *eb,
@@ -63,8 +64,8 @@ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \
const unsigned long oil = get_eb_offset_in_folio(token->eb, \
member_offset);\
- const int unit_size = folio_size(token->eb->folios[0]); \
- const int unit_shift = folio_shift(token->eb->folios[0]); \
+ const int unit_size = token->eb->folio_size; \
+ const int unit_shift = token->eb->folio_shift; \
const int size = sizeof(u##bits); \
u8 lebytes[sizeof(u##bits)]; \
const int part = unit_size - oil; \
@@ -94,7 +95,7 @@ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
const unsigned long idx = get_eb_folio_index(eb, member_offset);\
const unsigned long oil = get_eb_offset_in_folio(eb, \
member_offset);\
- const int unit_size = folio_size(eb->folios[0]); \
+ const int unit_size = eb->folio_size; \
char *kaddr = folio_address(eb->folios[idx]); \
const int size = sizeof(u##bits); \
const int part = unit_size - oil; \
@@ -117,8 +118,8 @@ void btrfs_set_token_##bits(struct btrfs_map_token *token, \
const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \
const unsigned long oil = get_eb_offset_in_folio(token->eb, \
member_offset);\
- const int unit_size = folio_size(token->eb->folios[0]); \
- const int unit_shift = folio_shift(token->eb->folios[0]); \
+ const int unit_size = token->eb->folio_size; \
+ const int unit_shift = token->eb->folio_shift; \
const int size = sizeof(u##bits); \
u8 lebytes[sizeof(u##bits)]; \
const int part = unit_size - oil; \
@@ -151,7 +152,7 @@ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
const unsigned long idx = get_eb_folio_index(eb, member_offset);\
const unsigned long oil = get_eb_offset_in_folio(eb, \
member_offset);\
- const int unit_size = folio_size(eb->folios[0]); \
+ const int unit_size = eb->folio_size; \
char *kaddr = folio_address(eb->folios[idx]); \
const int size = sizeof(u##bits); \
const int part = unit_size - oil; \
diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
index ed7aa32972ad..6fce3e8d3dac 100644
--- a/fs/btrfs/accessors.h
+++ b/fs/btrfs/accessors.h
@@ -3,8 +3,17 @@
#ifndef BTRFS_ACCESSORS_H
#define BTRFS_ACCESSORS_H
-#include <linux/stddef.h>
#include <asm/unaligned.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/align.h>
+#include <linux/build_bug.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <uapi/linux/btrfs_tree.h>
+
+struct extent_buffer;
struct btrfs_map_token {
struct extent_buffer *eb;
@@ -844,45 +853,6 @@ static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
-static inline void btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
- const struct btrfs_disk_balance_args *disk)
-{
- memset(cpu, 0, sizeof(*cpu));
-
- cpu->profiles = le64_to_cpu(disk->profiles);
- cpu->usage = le64_to_cpu(disk->usage);
- cpu->devid = le64_to_cpu(disk->devid);
- cpu->pstart = le64_to_cpu(disk->pstart);
- cpu->pend = le64_to_cpu(disk->pend);
- cpu->vstart = le64_to_cpu(disk->vstart);
- cpu->vend = le64_to_cpu(disk->vend);
- cpu->target = le64_to_cpu(disk->target);
- cpu->flags = le64_to_cpu(disk->flags);
- cpu->limit = le64_to_cpu(disk->limit);
- cpu->stripes_min = le32_to_cpu(disk->stripes_min);
- cpu->stripes_max = le32_to_cpu(disk->stripes_max);
-}
-
-static inline void btrfs_cpu_balance_args_to_disk(
- struct btrfs_disk_balance_args *disk,
- const struct btrfs_balance_args *cpu)
-{
- memset(disk, 0, sizeof(*disk));
-
- disk->profiles = cpu_to_le64(cpu->profiles);
- disk->usage = cpu_to_le64(cpu->usage);
- disk->devid = cpu_to_le64(cpu->devid);
- disk->pstart = cpu_to_le64(cpu->pstart);
- disk->pend = cpu_to_le64(cpu->pend);
- disk->vstart = cpu_to_le64(cpu->vstart);
- disk->vend = cpu_to_le64(cpu->vend);
- disk->target = cpu_to_le64(cpu->target);
- disk->flags = cpu_to_le64(cpu->flags);
- disk->limit = cpu_to_le64(cpu->limit);
- disk->stripes_min = cpu_to_le32(cpu->stripes_min);
- disk->stripes_max = cpu_to_le32(cpu->stripes_max);
-}
-
/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 7427449a04a3..e0ba00d64ea0 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -12,7 +12,6 @@
#include <linux/sched/mm.h>
#include <linux/slab.h>
#include "ctree.h"
-#include "btrfs_inode.h"
#include "xattr.h"
#include "acl.h"
diff --git a/fs/btrfs/acl.h b/fs/btrfs/acl.h
index a270e71ec05f..48b9ddae4a46 100644
--- a/fs/btrfs/acl.h
+++ b/fs/btrfs/acl.h
@@ -3,8 +3,15 @@
#ifndef BTRFS_ACL_H
#define BTRFS_ACL_H
+struct posix_acl;
+struct inode;
+struct btrfs_trans_handle;
+
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
+struct mnt_idmap;
+struct dentry;
+
struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu);
int btrfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
@@ -13,6 +20,10 @@ int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode,
#else
+#include <linux/errno.h>
+
+struct btrfs_trans_handle;
+
#define btrfs_get_acl NULL
#define btrfs_set_acl NULL
static inline int __btrfs_set_acl(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 9e261aac671e..361a866c1995 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -11,7 +11,6 @@
#include <linux/freezer.h>
#include <trace/events/btrfs.h>
#include "async-thread.h"
-#include "ctree.h"
enum {
WORK_DONE_BIT,
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 62b8a0d57898..04c2f3175828 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -7,11 +7,14 @@
#ifndef BTRFS_ASYNC_THREAD_H
#define BTRFS_ASYNC_THREAD_H
+#include <linux/compiler_types.h>
#include <linux/workqueue.h>
+#include <linux/list.h>
struct btrfs_fs_info;
struct btrfs_workqueue;
struct btrfs_work;
+
typedef void (*btrfs_func_t)(struct btrfs_work *arg);
typedef void (*btrfs_ordered_func_t)(struct btrfs_work *arg, bool);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index beed7e459dab..c1e6a5bbeeaf 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -198,10 +198,7 @@ static struct kmem_cache *btrfs_prelim_ref_cache;
int __init btrfs_prelim_ref_init(void)
{
btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
- sizeof(struct prelim_ref),
- 0,
- SLAB_MEM_SPREAD,
- NULL);
+ sizeof(struct prelim_ref), 0, 0, NULL);
if (!btrfs_prelim_ref_cache)
return -ENOMEM;
return 0;
@@ -1036,8 +1033,6 @@ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
slot = path->slots[0];
item_size = btrfs_item_size(leaf, slot);
- BUG_ON(item_size < sizeof(*ei));
-
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
if (ctx->check_extent_item) {
@@ -1435,8 +1430,10 @@ again:
if (ret < 0)
goto out;
if (ret == 0) {
- /* This shouldn't happen, indicates a bug or fs corruption. */
- ASSERT(ret != 0);
+ /*
+ * Key with offset -1 found, there would have to exist an extent
+ * item with such offset, but this is out of the valid range.
+ */
ret = -EUCLEAN;
goto out;
}
@@ -2225,6 +2222,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist an extent
+ * item with such offset, but this is out of the valid range.
+ */
+ return -EUCLEAN;
+ }
ret = btrfs_previous_extent_item(extent_root, path, 0);
if (ret) {
@@ -2247,7 +2251,6 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
eb = path->nodes[0];
item_size = btrfs_item_size(eb, path->slots[0]);
- BUG_ON(item_size < sizeof(*ei));
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
flags = btrfs_extent_flags(eb, ei);
@@ -2850,6 +2853,16 @@ struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_inf
return ret;
}
+static void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
+{
+ iter->bytenr = 0;
+ iter->item_ptr = 0;
+ iter->cur_ptr = 0;
+ iter->end_ptr = 0;
+ btrfs_release_path(iter->path);
+ memset(&iter->cur_key, 0, sizeof(iter->cur_key));
+}
+
int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
{
struct btrfs_fs_info *fs_info = iter->fs_info;
@@ -2868,6 +2881,10 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
if (ret < 0)
return ret;
if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist an extent
+ * item with such offset, but this is out of the valid range.
+ */
ret = -EUCLEAN;
goto release;
}
@@ -2938,6 +2955,14 @@ release:
return ret;
}
+static bool btrfs_backref_iter_is_inline_ref(struct btrfs_backref_iter *iter)
+{
+ if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY ||
+ iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
+ return true;
+ return false;
+}
+
/*
* Go to the next backref item of current bytenr, can be either inlined or
* keyed.
@@ -2950,7 +2975,7 @@ release:
*/
int btrfs_backref_iter_next(struct btrfs_backref_iter *iter)
{
- struct extent_buffer *eb = btrfs_backref_get_eb(iter);
+ struct extent_buffer *eb = iter->path->nodes[0];
struct btrfs_root *extent_root;
struct btrfs_path *path = iter->path;
struct btrfs_extent_inline_ref *iref;
@@ -3038,6 +3063,19 @@ struct btrfs_backref_node *btrfs_backref_alloc_node(
return node;
}
+void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *node)
+{
+ if (node) {
+ ASSERT(list_empty(&node->list));
+ ASSERT(list_empty(&node->lower));
+ ASSERT(node->eb == NULL);
+ cache->nr_nodes--;
+ btrfs_put_root(node->root);
+ kfree(node);
+ }
+}
+
struct btrfs_backref_edge *btrfs_backref_alloc_edge(
struct btrfs_backref_cache *cache)
{
@@ -3049,6 +3087,52 @@ struct btrfs_backref_edge *btrfs_backref_alloc_edge(
return edge;
}
+void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_edge *edge)
+{
+ if (edge) {
+ cache->nr_edges--;
+ kfree(edge);
+ }
+}
+
+void btrfs_backref_unlock_node_buffer(struct btrfs_backref_node *node)
+{
+ if (node->locked) {
+ btrfs_tree_unlock(node->eb);
+ node->locked = 0;
+ }
+}
+
+void btrfs_backref_drop_node_buffer(struct btrfs_backref_node *node)
+{
+ if (node->eb) {
+ btrfs_backref_unlock_node_buffer(node);
+ free_extent_buffer(node->eb);
+ node->eb = NULL;
+ }
+}
+
+/*
+ * Drop the backref node from cache without cleaning up its children
+ * edges.
+ *
+ * This can only be called on node without parent edges.
+ * The children edges are still kept as is.
+ */
+void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
+ struct btrfs_backref_node *node)
+{
+ ASSERT(list_empty(&node->upper));
+
+ btrfs_backref_drop_node_buffer(node);
+ list_del_init(&node->list);
+ list_del_init(&node->lower);
+ if (!RB_EMPTY_NODE(&node->rb_node))
+ rb_erase(&node->rb_node, &tree->rb_root);
+ btrfs_backref_free_node(tree, node);
+}
+
/*
* Drop the backref node from cache, also cleaning up all its
* upper edges and any uncached nodes in the path.
@@ -3120,6 +3204,19 @@ void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
ASSERT(!cache->nr_edges);
}
+void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
+ struct btrfs_backref_node *lower,
+ struct btrfs_backref_node *upper,
+ int link_which)
+{
+ ASSERT(upper && lower && upper->level == lower->level + 1);
+ edge->node[LOWER] = lower;
+ edge->node[UPPER] = upper;
+ if (link_which & LINK_LOWER)
+ list_add_tail(&edge->list[LOWER], &lower->upper);
+ if (link_which & LINK_UPPER)
+ list_add_tail(&edge->list[UPPER], &upper->lower);
+}
/*
* Handle direct tree backref
*
@@ -3428,7 +3525,7 @@ int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
int type;
cond_resched();
- eb = btrfs_backref_get_eb(iter);
+ eb = iter->path->nodes[0];
key.objectid = iter->bytenr;
if (btrfs_backref_iter_is_inline_ref(iter)) {
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index ab4ca0eda605..e8c22cccb5c1 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -6,11 +6,23 @@
#ifndef BTRFS_BACKREF_H
#define BTRFS_BACKREF_H
-#include <linux/btrfs.h>
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <uapi/linux/btrfs.h>
+#include <uapi/linux/btrfs_tree.h>
#include "messages.h"
-#include "ulist.h"
+#include "locking.h"
#include "disk-io.h"
#include "extent_io.h"
+#include "ctree.h"
+
+struct extent_inode_elem;
+struct ulist;
+struct btrfs_extent_item;
+struct btrfs_trans_handle;
+struct btrfs_fs_info;
/*
* Used by implementations of iterate_extent_inodes_t (see definition below) to
@@ -271,22 +283,6 @@ struct btrfs_backref_iter {
struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_info);
-static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
-{
- if (!iter)
- return;
- btrfs_free_path(iter->path);
- kfree(iter);
-}
-
-static inline struct extent_buffer *btrfs_backref_get_eb(
- struct btrfs_backref_iter *iter)
-{
- if (!iter)
- return NULL;
- return iter->path->nodes[0];
-}
-
/*
* For metadata with EXTENT_ITEM key (non-skinny) case, the first inline data
* is btrfs_tree_block_info, without a btrfs_extent_inline_ref header.
@@ -306,25 +302,6 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
int btrfs_backref_iter_next(struct btrfs_backref_iter *iter);
-static inline bool btrfs_backref_iter_is_inline_ref(
- struct btrfs_backref_iter *iter)
-{
- if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY ||
- iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
- return true;
- return false;
-}
-
-static inline void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
-{
- iter->bytenr = 0;
- iter->item_ptr = 0;
- iter->cur_ptr = 0;
- iter->end_ptr = 0;
- btrfs_release_path(iter->path);
- memset(&iter->cur_key, 0, sizeof(iter->cur_key));
-}
-
/*
* Backref cache related structures
*
@@ -452,83 +429,22 @@ struct btrfs_backref_edge *btrfs_backref_alloc_edge(
#define LINK_LOWER (1 << 0)
#define LINK_UPPER (1 << 1)
-static inline void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
- struct btrfs_backref_node *lower,
- struct btrfs_backref_node *upper,
- int link_which)
-{
- ASSERT(upper && lower && upper->level == lower->level + 1);
- edge->node[LOWER] = lower;
- edge->node[UPPER] = upper;
- if (link_which & LINK_LOWER)
- list_add_tail(&edge->list[LOWER], &lower->upper);
- if (link_which & LINK_UPPER)
- list_add_tail(&edge->list[UPPER], &upper->lower);
-}
-
-static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
- struct btrfs_backref_node *node)
-{
- if (node) {
- ASSERT(list_empty(&node->list));
- ASSERT(list_empty(&node->lower));
- ASSERT(node->eb == NULL);
- cache->nr_nodes--;
- btrfs_put_root(node->root);
- kfree(node);
- }
-}
-
-static inline void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
- struct btrfs_backref_edge *edge)
-{
- if (edge) {
- cache->nr_edges--;
- kfree(edge);
- }
-}
-
-static inline void btrfs_backref_unlock_node_buffer(
- struct btrfs_backref_node *node)
-{
- if (node->locked) {
- btrfs_tree_unlock(node->eb);
- node->locked = 0;
- }
-}
-static inline void btrfs_backref_drop_node_buffer(
- struct btrfs_backref_node *node)
-{
- if (node->eb) {
- btrfs_backref_unlock_node_buffer(node);
- free_extent_buffer(node->eb);
- node->eb = NULL;
- }
-}
-
-/*
- * Drop the backref node from cache without cleaning up its children
- * edges.
- *
- * This can only be called on node without parent edges.
- * The children edges are still kept as is.
- */
-static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
- struct btrfs_backref_node *node)
-{
- ASSERT(list_empty(&node->upper));
-
- btrfs_backref_drop_node_buffer(node);
- list_del_init(&node->list);
- list_del_init(&node->lower);
- if (!RB_EMPTY_NODE(&node->rb_node))
- rb_erase(&node->rb_node, &tree->rb_root);
- btrfs_backref_free_node(tree, node);
-}
+void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
+ struct btrfs_backref_node *lower,
+ struct btrfs_backref_node *upper,
+ int link_which);
+void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *node);
+void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_edge *edge);
+void btrfs_backref_unlock_node_buffer(struct btrfs_backref_node *node);
+void btrfs_backref_drop_node_buffer(struct btrfs_backref_node *node);
void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node);
+void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
+ struct btrfs_backref_node *node);
void btrfs_backref_release_cache(struct btrfs_backref_cache *cache);
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 928f512cdb4a..477f350a8bd0 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -11,7 +11,6 @@
#include "raid56.h"
#include "async-thread.h"
#include "dev-replace.h"
-#include "rcu-string.h"
#include "zoned.h"
#include "file-item.h"
#include "raid-stripe-tree.h"
@@ -509,8 +508,6 @@ static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
if (!bioc) {
/* Single mirror read/write fast path. */
btrfs_bio(bio)->mirror_num = mirror_num;
- if (bio_op(bio) != REQ_OP_READ)
- btrfs_bio(bio)->orig_physical = smap->physical;
bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
if (bio_op(bio) != REQ_OP_READ)
btrfs_bio(bio)->orig_physical = smap->physical;
@@ -611,8 +608,20 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
static bool should_async_write(struct btrfs_bio *bbio)
{
+ bool auto_csum_mode = true;
+
+#ifdef CONFIG_BTRFS_DEBUG
+ struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices;
+ enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
+
+ if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF)
+ return false;
+
+ auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO);
+#endif
+
/* Submit synchronously if the checksum implementation is fast. */
- if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
+ if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
return false;
/*
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index bbaed317161a..d9dd5276093d 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -7,12 +7,14 @@
#ifndef BTRFS_BIO_H
#define BTRFS_BIO_H
+#include <linux/types.h>
#include <linux/bio.h>
#include <linux/workqueue.h>
#include "tree-checker.h"
struct btrfs_bio;
struct btrfs_fs_info;
+struct btrfs_inode;
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index a9be9ac99222..5f7587ca1ca7 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -418,7 +418,7 @@ struct btrfs_caching_control *btrfs_get_caching_control(
return ctl;
}
-void btrfs_put_caching_control(struct btrfs_caching_control *ctl)
+static void btrfs_put_caching_control(struct btrfs_caching_control *ctl)
{
if (refcount_dec_and_test(&ctl->count))
kfree(ctl);
@@ -1063,7 +1063,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
bool remove_rsv = false;
block_group = btrfs_lookup_block_group(fs_info, map->start);
- BUG_ON(!block_group);
+ if (!block_group)
+ return -ENOENT;
+
BUG_ON(!block_group->ro);
trace_btrfs_remove_block_group(block_group);
@@ -1429,7 +1431,7 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
* group in pinned_extents before we were able to clear the whole block
* group range from pinned_extents. This means that task can lookup for
* the block group after we unpinned it from pinned_extents and removed
- * it, leading to a BUG_ON() at unpin_extent_range().
+ * it, leading to an error at unpin_extent_range().
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans) {
@@ -1455,6 +1457,7 @@ out:
*/
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
{
+ LIST_HEAD(retry_list);
struct btrfs_block_group *block_group;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
@@ -1476,6 +1479,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->unused_bgs_lock);
while (!list_empty(&fs_info->unused_bgs)) {
+ u64 used;
int trimming;
block_group = list_first_entry(&fs_info->unused_bgs,
@@ -1511,22 +1515,68 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
goto next;
}
+ spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
- if (block_group->reserved || block_group->pinned ||
- block_group->used || block_group->ro ||
+ if (btrfs_is_block_group_used(block_group) || block_group->ro ||
list_is_singular(&block_group->list)) {
/*
* We want to bail if we made new allocations or have
* outstanding allocations in this block group. We do
* the ro check in case balance is currently acting on
* this block group.
+ *
+ * Also bail out if this is the only block group for its
+ * type, because otherwise we would lose profile
+ * information from fs_info->avail_*_alloc_bits and the
+ * next block group of this type would be created with a
+ * "single" profile (even if we're in a raid fs) because
+ * fs_info->avail_*_alloc_bits would be 0.
+ */
+ trace_btrfs_skip_unused_block_group(block_group);
+ spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
+ up_write(&space_info->groups_sem);
+ goto next;
+ }
+
+ /*
+ * The block group may be unused but there may be space reserved
+ * accounting with the existence of that block group, that is,
+ * space_info->bytes_may_use was incremented by a task but no
+ * space was yet allocated from the block group by the task.
+ * That space may or may not be allocated, as we are generally
+ * pessimistic about space reservation for metadata as well as
+ * for data when using compression (as we reserve space based on
+ * the worst case, when data can't be compressed, and before
+ * actually attempting compression, before starting writeback).
+ *
+ * So check if the total space of the space_info minus the size
+ * of this block group is less than the used space of the
+ * space_info - if that's the case, then it means we have tasks
+ * that might be relying on the block group in order to allocate
+ * extents, and add back the block group to the unused list when
+ * we finish, so that we retry later in case no tasks ended up
+ * needing to allocate extents from the block group.
+ */
+ used = btrfs_space_info_used(space_info, true);
+ if (space_info->total_bytes - block_group->length < used) {
+ /*
+ * Add a reference for the list, compensate for the ref
+ * drop under the "next" label for the
+ * fs_info->unused_bgs list.
*/
+ btrfs_get_block_group(block_group);
+ list_add_tail(&block_group->bg_list, &retry_list);
+
trace_btrfs_skip_unused_block_group(block_group);
spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
up_write(&space_info->groups_sem);
goto next;
}
+
spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
/* We don't want to force the issue, only flip if it's ok. */
ret = inc_block_group_ro(block_group, 0);
@@ -1650,12 +1700,16 @@ next:
btrfs_put_block_group(block_group);
spin_lock(&fs_info->unused_bgs_lock);
}
+ list_splice_tail(&retry_list, &fs_info->unused_bgs);
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
return;
flip_async:
btrfs_end_transaction(trans);
+ spin_lock(&fs_info->unused_bgs_lock);
+ list_splice_tail(&retry_list, &fs_info->unused_bgs);
+ spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_put_block_group(block_group);
btrfs_discard_punt_unused_bgs_list(fs_info);
@@ -2684,6 +2738,37 @@ next:
btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info);
list_del_init(&block_group->bg_list);
clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
+
+ /*
+ * If the block group is still unused, add it to the list of
+ * unused block groups. The block group may have been created in
+ * order to satisfy a space reservation, in which case the
+ * extent allocation only happens later. But often we don't
+ * actually need to allocate space that we previously reserved,
+ * so the block group may become unused for a long time. For
+ * example for metadata we generally reserve space for a worst
+ * possible scenario, but then don't end up allocating all that
+ * space or none at all (due to no need to COW, extent buffers
+ * were already COWed in the current transaction and still
+ * unwritten, tree heights lower than the maximum possible
+ * height, etc). For data we generally reserve the axact amount
+ * of space we are going to allocate later, the exception is
+ * when using compression, as we must reserve space based on the
+ * uncompressed data size, because the compression is only done
+ * when writeback triggered and we don't know how much space we
+ * are actually going to need, so we reserve the uncompressed
+ * size because the data may be uncompressible in the worst case.
+ */
+ if (ret == 0) {
+ bool used;
+
+ spin_lock(&block_group->lock);
+ used = btrfs_is_block_group_used(block_group);
+ spin_unlock(&block_group->lock);
+
+ if (!used)
+ btrfs_mark_bg_unused(block_group);
+ }
}
btrfs_trans_release_chunk_metadata(trans);
}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index c4a1f01cc1c2..85e2d4cd12dc 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -3,9 +3,22 @@
#ifndef BTRFS_BLOCK_GROUP_H
#define BTRFS_BLOCK_GROUP_H
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/refcount.h>
+#include <linux/wait.h>
+#include <linux/sizes.h>
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <uapi/linux/btrfs_tree.h>
#include "free-space-cache.h"
struct btrfs_chunk_map;
+struct btrfs_fs_info;
+struct btrfs_inode;
+struct btrfs_trans_handle;
enum btrfs_disk_cache_state {
BTRFS_DC_WRITTEN,
@@ -257,6 +270,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
return (block_group->start + block_group->length);
}
+static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg)
+{
+ lockdep_assert_held(&bg->lock);
+
+ return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0);
+}
+
static inline bool btrfs_is_block_group_data_only(
struct btrfs_block_group *block_group)
{
@@ -290,7 +310,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes);
int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
-void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control(
struct btrfs_block_group *cache);
int btrfs_add_new_free_space(struct btrfs_block_group *block_group,
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index ceb5f586a2d5..95c174f9fd4f 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -6,7 +6,6 @@
#include "space-info.h"
#include "transaction.h"
#include "block-group.h"
-#include "disk-io.h"
#include "fs.h"
#include "accessors.h"
@@ -494,7 +493,7 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
- if (unlikely(block_rsv->size == 0))
+ if (unlikely(btrfs_block_rsv_size(block_rsv) == 0))
goto try_reserve;
again:
ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize);
diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h
index b0bd12b8652f..1f53b967d069 100644
--- a/fs/btrfs/block-rsv.h
+++ b/fs/btrfs/block-rsv.h
@@ -3,8 +3,15 @@
#ifndef BTRFS_BLOCK_RSV_H
#define BTRFS_BLOCK_RSV_H
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+
struct btrfs_trans_handle;
struct btrfs_root;
+struct btrfs_space_info;
+struct btrfs_block_rsv;
+struct btrfs_fs_info;
enum btrfs_reserve_flush_enum;
/*
@@ -101,4 +108,36 @@ static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv)
return data_race(rsv->full);
}
+/*
+ * Get the reserved mount of a block reserve in a context where getting a stale
+ * value is acceptable, instead of accessing it directly and trigger data race
+ * warning from KCSAN.
+ */
+static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv)
+{
+ u64 ret;
+
+ spin_lock(&rsv->lock);
+ ret = rsv->reserved;
+ spin_unlock(&rsv->lock);
+
+ return ret;
+}
+
+/*
+ * Get the size of a block reserve in a context where getting a stale value is
+ * acceptable, instead of accessing it directly and trigger data race warning
+ * from KCSAN.
+ */
+static inline u64 btrfs_block_rsv_size(struct btrfs_block_rsv *rsv)
+{
+ u64 ret;
+
+ spin_lock(&rsv->lock);
+ ret = rsv->size;
+ spin_unlock(&rsv->lock);
+
+ return ret;
+}
+
#endif /* BTRFS_BLOCK_RSV_H */
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 7f7c5a92d2b8..100020ca4658 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -8,13 +8,32 @@
#include <linux/hash.h>
#include <linux/refcount.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/compiler.h>
#include <linux/fscrypt.h>
+#include <linux/lockdep.h>
+#include <uapi/linux/btrfs_tree.h>
#include <trace/events/btrfs.h>
+#include "block-rsv.h"
+#include "btrfs_inode.h"
#include "extent_map.h"
#include "extent_io.h"
+#include "extent-io-tree.h"
#include "ordered-data.h"
#include "delayed-inode.h"
+struct extent_state;
+struct posix_acl;
+struct iov_iter;
+struct writeback_control;
+struct btrfs_root;
+struct btrfs_fs_info;
+struct btrfs_trans_handle;
+
/*
* Since we search a directory based on f_pos (struct dir_context::pos) we have
* to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so
@@ -41,7 +60,6 @@ enum {
*/
BTRFS_INODE_NEEDS_FULL_SYNC,
BTRFS_INODE_COPY_EVERYTHING,
- BTRFS_INODE_IN_DELALLOC_LIST,
BTRFS_INODE_HAS_PROPS,
BTRFS_INODE_SNAPSHOT_FLUSH,
/*
@@ -428,7 +446,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes, bool nowait, bool strict);
-void __btrfs_del_delalloc_inode(struct btrfs_root *root, struct btrfs_inode *inode);
+void btrfs_del_delalloc_inode(struct btrfs_inode *inode);
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -490,8 +508,7 @@ struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
struct btrfs_root *root, struct btrfs_path *path);
struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root);
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
- struct page *page, size_t pg_offset,
- u64 start, u64 len);
+ struct page *page, u64 start, u64 len);
int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode);
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 193168214eeb..b2b94009959d 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -25,8 +25,6 @@
#include "misc.h"
#include "ctree.h"
#include "fs.h"
-#include "disk-io.h"
-#include "transaction.h"
#include "btrfs_inode.h"
#include "bio.h"
#include "ordered-data.h"
@@ -34,8 +32,7 @@
#include "extent_io.h"
#include "extent_map.h"
#include "subpage.h"
-#include "zoned.h"
-#include "file-item.h"
+#include "messages.h"
#include "super.h"
static struct bio_set btrfs_compressed_bioset;
@@ -141,16 +138,16 @@ static int compression_decompress_bio(struct list_head *ws,
}
static int compression_decompress(int type, struct list_head *ws,
- const u8 *data_in, struct page *dest_page,
- unsigned long start_byte, size_t srclen, size_t destlen)
+ const u8 *data_in, struct page *dest_page,
+ unsigned long dest_pgoff, size_t srclen, size_t destlen)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page,
- start_byte, srclen, destlen);
+ dest_pgoff, srclen, destlen);
case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_page,
- start_byte, srclen, destlen);
+ dest_pgoff, srclen, destlen);
case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page,
- start_byte, srclen, destlen);
+ dest_pgoff, srclen, destlen);
case BTRFS_COMPRESS_NONE:
default:
/*
@@ -284,7 +281,7 @@ static void end_bbio_comprssed_read(struct btrfs_bio *bbio)
static noinline void end_compressed_writeback(const struct compressed_bio *cb)
{
struct inode *inode = &cb->bbio.inode->vfs_inode;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
unsigned long index = cb->start >> PAGE_SHIFT;
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct folio_batch fbatch;
@@ -415,7 +412,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
struct compressed_bio *cb,
int *memstall, unsigned long *pflags)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
unsigned long end_index;
struct bio *orig_bio = &cb->orig_bbio->bio;
u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size;
@@ -441,7 +438,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
* This makes readahead less effective, so here disable readahead for
* subpage for now, until full compressed write is supported.
*/
- if (btrfs_sb(inode->i_sb)->sectorsize < PAGE_SIZE)
+ if (fs_info->sectorsize < PAGE_SIZE)
return 0;
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
@@ -1037,14 +1034,23 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
* start_byte tells us the offset into the compressed data we're interested in
*/
int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
- unsigned long start_byte, size_t srclen, size_t destlen)
+ unsigned long dest_pgoff, size_t srclen, size_t destlen)
{
+ struct btrfs_fs_info *fs_info = page_to_fs_info(dest_page);
struct list_head *workspace;
+ const u32 sectorsize = fs_info->sectorsize;
int ret;
+ /*
+ * The full destination page range should not exceed the page size.
+ * And the @destlen should not exceed sectorsize, as this is only called for
+ * inline file extents, which should not exceed sectorsize.
+ */
+ ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize);
+
workspace = get_workspace(type, 0);
ret = compression_decompress(type, workspace, data_in, dest_page,
- start_byte, srclen, destlen);
+ dest_pgoff, srclen, destlen);
put_workspace(type, workspace);
return ret;
@@ -1470,11 +1476,6 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
/*
* Compression heuristic.
*
- * For now is's a naive and optimistic 'return true', we'll extend the logic to
- * quickly (compared to direct compression) detect data characteristics
- * (compressible/incompressible) to avoid wasting CPU time on incompressible
- * data.
- *
* The following types of analysis can be performed:
* - detect mostly zero data
* - detect data with low "byte set" size (text, etc)
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 93cc92974dee..4691a84ca838 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -7,10 +7,18 @@
#define BTRFS_COMPRESSION_H
#include <linux/sizes.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
#include "bio.h"
+struct address_space;
+struct page;
+struct inode;
struct btrfs_inode;
struct btrfs_ordered_extent;
+struct btrfs_bio;
/*
* We want to make sure that amount of RAM required to uncompress an extent is
@@ -32,8 +40,6 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
-struct page;
-
struct compressed_bio {
/* Number of compressed pages in the array */
unsigned int nr_pages;
@@ -148,7 +154,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
unsigned long *total_in, unsigned long *total_out);
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zlib_decompress(struct list_head *ws, const u8 *data_in,
- struct page *dest_page, unsigned long start_byte, size_t srclen,
+ struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
struct list_head *zlib_alloc_workspace(unsigned int level);
void zlib_free_workspace(struct list_head *ws);
@@ -159,7 +165,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
unsigned long *total_in, unsigned long *total_out);
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int lzo_decompress(struct list_head *ws, const u8 *data_in,
- struct page *dest_page, unsigned long start_byte, size_t srclen,
+ struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
struct list_head *lzo_alloc_workspace(unsigned int level);
void lzo_free_workspace(struct list_head *ws);
@@ -169,7 +175,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
unsigned long *total_in, unsigned long *total_out);
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zstd_decompress(struct list_head *ws, const u8 *data_in,
- struct page *dest_page, unsigned long start_byte, size_t srclen,
+ struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
void zstd_init_workspace_manager(void);
void zstd_cleanup_workspace_manager(void);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e65e012bac55..aaf53fd84358 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -820,7 +820,7 @@ int btrfs_bin_search(struct extent_buffer *eb, int first_slot,
}
while (low < high) {
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
unsigned long oil;
unsigned long offset;
struct btrfs_disk_key *tmp;
@@ -4280,6 +4280,10 @@ void btrfs_setup_item_for_insert(struct btrfs_trans_handle *trans,
/*
* Given a key and some data, insert items into the tree.
* This does all the path init required, making room in the tree if needed.
+ *
+ * Returns: 0 on success
+ * -EEXIST if the first key already exists
+ * < 0 on other errors
*/
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -5082,9 +5086,7 @@ int btrfs_previous_extent_item(struct btrfs_root *root,
int __init btrfs_ctree_init(void)
{
- btrfs_path_cachep = kmem_cache_create("btrfs_path",
- sizeof(struct btrfs_path), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_path_cachep = KMEM_CACHE(btrfs_path, 0);
if (!btrfs_path_cachep)
return -ENOMEM;
return 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 70e828d33177..c03c58246033 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -7,25 +7,24 @@
#define BTRFS_CTREE_H
#include <linux/pagemap.h>
+#include <linux/spinlock.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/atomic.h>
+#include <linux/xarray.h>
+#include <linux/refcount.h>
+#include <uapi/linux/btrfs_tree.h>
#include "locking.h"
#include "fs.h"
#include "accessors.h"
+#include "extent-io-tree.h"
+struct extent_buffer;
+struct btrfs_block_rsv;
struct btrfs_trans_handle;
-struct btrfs_transaction;
-struct btrfs_pending_snapshot;
-struct btrfs_delayed_ref_root;
-struct btrfs_space_info;
struct btrfs_block_group;
-struct btrfs_ordered_sum;
-struct btrfs_ref;
-struct btrfs_bio;
-struct btrfs_ioctl_encoded_io_args;
-struct btrfs_device;
-struct btrfs_fs_devices;
-struct btrfs_balance_control;
-struct btrfs_delayed_root;
-struct reloc_control;
/* Read ahead values for struct btrfs_path.reada */
enum {
@@ -478,8 +477,7 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
return mapping_gfp_constraint(mapping, ~__GFP_FS);
}
-int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
- u64 start, u64 end);
+void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end);
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes);
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index c276b136ab63..f015fa1b6301 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -6,7 +6,6 @@
#include <linux/sched.h>
#include "ctree.h"
#include "disk-io.h"
-#include "print-tree.h"
#include "transaction.h"
#include "locking.h"
#include "accessors.h"
@@ -521,7 +520,7 @@ static int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
* keep_locks set and lowest_level is 1, regardless of the value of
* path->slots[1].
*/
- BUG_ON(path->locks[1] == 0);
+ ASSERT(path->locks[1] != 0);
ret = btrfs_realloc_node(trans, root,
path->nodes[1], 0,
&last_ret,
@@ -810,7 +809,7 @@ static u32 get_extent_max_capacity(const struct btrfs_fs_info *fs_info,
static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
u32 extent_thresh, u64 newer_than, bool locked)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_map *next;
bool ret = false;
@@ -861,20 +860,21 @@ out:
* NOTE: Caller should also wait for page writeback after the cluster is
* prepared, here we don't do writeback wait for each page.
*/
-static struct page *defrag_prepare_one_page(struct btrfs_inode *inode, pgoff_t index)
+static struct folio *defrag_prepare_one_folio(struct btrfs_inode *inode, pgoff_t index)
{
struct address_space *mapping = inode->vfs_inode.i_mapping;
gfp_t mask = btrfs_alloc_write_mask(mapping);
u64 page_start = (u64)index << PAGE_SHIFT;
u64 page_end = page_start + PAGE_SIZE - 1;
struct extent_state *cached_state = NULL;
- struct page *page;
+ struct folio *folio;
int ret;
again:
- page = find_or_create_page(mapping, index, mask);
- if (!page)
- return ERR_PTR(-ENOMEM);
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask);
+ if (IS_ERR(folio))
+ return folio;
/*
* Since we can defragment files opened read-only, we can encounter
@@ -884,16 +884,16 @@ again:
* executables that explicitly enable them, so this isn't very
* restrictive.
*/
- if (PageCompound(page)) {
- unlock_page(page);
- put_page(page);
+ if (folio_test_large(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
return ERR_PTR(-ETXTBSY);
}
- ret = set_page_extent_mapped(page);
+ ret = set_folio_extent_mapped(folio);
if (ret < 0) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return ERR_PTR(ret);
}
@@ -908,17 +908,17 @@ again:
if (!ordered)
break;
- unlock_page(page);
+ folio_unlock(folio);
btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
- lock_page(page);
+ folio_lock(folio);
/*
- * We unlocked the page above, so we need check if it was
+ * We unlocked the folio above, so we need check if it was
* released or not.
*/
- if (page->mapping != mapping || !PagePrivate(page)) {
- unlock_page(page);
- put_page(page);
+ if (folio->mapping != mapping || !folio->private) {
+ folio_unlock(folio);
+ folio_put(folio);
goto again;
}
}
@@ -927,21 +927,21 @@ again:
* Now the page range has no ordered extent any more. Read the page to
* make it uptodate.
*/
- if (!PageUptodate(page)) {
- btrfs_read_folio(NULL, page_folio(page));
- lock_page(page);
- if (page->mapping != mapping || !PagePrivate(page)) {
- unlock_page(page);
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ btrfs_read_folio(NULL, folio);
+ folio_lock(folio);
+ if (folio->mapping != mapping || !folio->private) {
+ folio_unlock(folio);
+ folio_put(folio);
goto again;
}
- if (!PageUptodate(page)) {
- unlock_page(page);
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
return ERR_PTR(-EIO);
}
}
- return page;
+ return folio;
}
struct defrag_target_range {
@@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
goto add;
/* Skip too large extent */
- if (range_len >= extent_thresh)
+ if (em->len >= extent_thresh)
goto next;
/*
@@ -1162,7 +1162,7 @@ static_assert(PAGE_ALIGNED(CLUSTER_SIZE));
*/
static int defrag_one_locked_target(struct btrfs_inode *inode,
struct defrag_target_range *target,
- struct page **pages, int nr_pages,
+ struct folio **folios, int nr_pages,
struct extent_state **cached_state)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
@@ -1171,7 +1171,7 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
const u64 len = target->len;
unsigned long last_index = (start + len - 1) >> PAGE_SHIFT;
unsigned long start_index = start >> PAGE_SHIFT;
- unsigned long first_index = page_index(pages[0]);
+ unsigned long first_index = folios[0]->index;
int ret = 0;
int i;
@@ -1188,8 +1188,8 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
/* Update the page status */
for (i = start_index - first_index; i <= last_index - first_index; i++) {
- ClearPageChecked(pages[i]);
- btrfs_folio_clamp_set_dirty(fs_info, page_folio(pages[i]), start, len);
+ folio_clear_checked(folios[i]);
+ btrfs_folio_clamp_set_dirty(fs_info, folios[i], start, len);
}
btrfs_delalloc_release_extents(inode, len);
extent_changeset_free(data_reserved);
@@ -1205,7 +1205,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
struct defrag_target_range *entry;
struct defrag_target_range *tmp;
LIST_HEAD(target_list);
- struct page **pages;
+ struct folio **folios;
const u32 sectorsize = inode->root->fs_info->sectorsize;
u64 last_index = (start + len - 1) >> PAGE_SHIFT;
u64 start_index = start >> PAGE_SHIFT;
@@ -1216,21 +1216,21 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
ASSERT(nr_pages <= CLUSTER_SIZE / PAGE_SIZE);
ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(len, sectorsize));
- pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
- if (!pages)
+ folios = kcalloc(nr_pages, sizeof(struct folio *), GFP_NOFS);
+ if (!folios)
return -ENOMEM;
/* Prepare all pages */
for (i = 0; i < nr_pages; i++) {
- pages[i] = defrag_prepare_one_page(inode, start_index + i);
- if (IS_ERR(pages[i])) {
- ret = PTR_ERR(pages[i]);
- pages[i] = NULL;
- goto free_pages;
+ folios[i] = defrag_prepare_one_folio(inode, start_index + i);
+ if (IS_ERR(folios[i])) {
+ ret = PTR_ERR(folios[i]);
+ nr_pages = i;
+ goto free_folios;
}
}
for (i = 0; i < nr_pages; i++)
- wait_on_page_writeback(pages[i]);
+ folio_wait_writeback(folios[i]);
/* Lock the pages range */
lock_extent(&inode->io_tree, start_index << PAGE_SHIFT,
@@ -1250,7 +1250,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
goto unlock_extent;
list_for_each_entry(entry, &target_list, list) {
- ret = defrag_one_locked_target(inode, entry, pages, nr_pages,
+ ret = defrag_one_locked_target(inode, entry, folios, nr_pages,
&cached_state);
if (ret < 0)
break;
@@ -1264,14 +1264,12 @@ unlock_extent:
unlock_extent(&inode->io_tree, start_index << PAGE_SHIFT,
(last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
&cached_state);
-free_pages:
+free_folios:
for (i = 0; i < nr_pages; i++) {
- if (pages[i]) {
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
+ folio_unlock(folios[i]);
+ folio_put(folios[i]);
}
- kfree(pages);
+ kfree(folios);
return ret;
}
@@ -1366,7 +1364,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_to_defrag)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
unsigned long sectors_defragged = 0;
u64 isize = i_size_read(inode);
u64 cur;
@@ -1512,9 +1510,7 @@ void __cold btrfs_auto_defrag_exit(void)
int __init btrfs_auto_defrag_init(void)
{
btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
- sizeof(struct inode_defrag), 0,
- SLAB_MEM_SPREAD,
- NULL);
+ sizeof(struct inode_defrag), 0, 0, NULL);
if (!btrfs_inode_defrag_cachep)
return -ENOMEM;
diff --git a/fs/btrfs/defrag.h b/fs/btrfs/defrag.h
index 5a62763528d1..878528e086fb 100644
--- a/fs/btrfs/defrag.h
+++ b/fs/btrfs/defrag.h
@@ -3,6 +3,16 @@
#ifndef BTRFS_DEFRAG_H
#define BTRFS_DEFRAG_H
+#include <linux/types.h>
+#include <linux/compiler_types.h>
+
+struct inode;
+struct file_ra_state;
+struct btrfs_fs_info;
+struct btrfs_root;
+struct btrfs_trans_handle;
+struct btrfs_ioctl_defrag_range_args;
+
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_to_defrag);
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index 2833e8ef4c09..b3527efd0b4b 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -6,9 +6,7 @@
#include "block-rsv.h"
#include "btrfs_inode.h"
#include "space-info.h"
-#include "transaction.h"
#include "qgroup.h"
-#include "block-group.h"
#include "fs.h"
/*
@@ -245,7 +243,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 reserve_size = 0;
u64 qgroup_rsv_size = 0;
- u64 csum_leaves;
unsigned outstanding_extents;
lockdep_assert_held(&inode->lock);
@@ -260,10 +257,12 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
outstanding_extents);
reserve_size += btrfs_calc_metadata_size(fs_info, 1);
}
- csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
- inode->csum_bytes);
- reserve_size += btrfs_calc_insert_metadata_size(fs_info,
- csum_leaves);
+ if (!(inode->flags & BTRFS_INODE_NODATASUM)) {
+ u64 csum_leaves;
+
+ csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
+ reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves);
+ }
/*
* For qgroup rsv, the calculation is very simple:
* account one nodesize for each outstanding extent
@@ -278,14 +277,20 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
spin_unlock(&block_rsv->lock);
}
-static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+static void calc_inode_reservations(struct btrfs_inode *inode,
u64 num_bytes, u64 disk_num_bytes,
u64 *meta_reserve, u64 *qgroup_reserve)
{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 nr_extents = count_max_extents(fs_info, num_bytes);
- u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+ u64 csum_leaves;
u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
+ if (inode->flags & BTRFS_INODE_NODATASUM)
+ csum_leaves = 0;
+ else
+ csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+
*meta_reserve = btrfs_calc_insert_metadata_size(fs_info,
nr_extents + csum_leaves);
@@ -337,7 +342,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
* everything out and try again, which is bad. This way we just
* over-reserve slightly, and clean up the mess when we are done.
*/
- calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
+ calc_inode_reservations(inode, num_bytes, disk_num_bytes,
&meta_reserve, &qgroup_reserve);
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true,
noflush);
@@ -359,7 +364,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
nr_extents = count_max_extents(fs_info, num_bytes);
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, nr_extents);
- inode->csum_bytes += disk_num_bytes;
+ if (!(inode->flags & BTRFS_INODE_NODATASUM))
+ inode->csum_bytes += disk_num_bytes;
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
@@ -393,7 +399,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
spin_lock(&inode->lock);
- inode->csum_bytes -= num_bytes;
+ if (!(inode->flags & BTRFS_INODE_NODATASUM))
+ inode->csum_bytes -= num_bytes;
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
diff --git a/fs/btrfs/delalloc-space.h b/fs/btrfs/delalloc-space.h
index c5d573f2366e..ce4f889e4f17 100644
--- a/fs/btrfs/delalloc-space.h
+++ b/fs/btrfs/delalloc-space.h
@@ -3,7 +3,11 @@
#ifndef BTRFS_DELALLOC_SPACE_H
#define BTRFS_DELALLOC_SPACE_H
+#include <linux/types.h>
+
struct extent_changeset;
+struct btrfs_inode;
+struct btrfs_fs_info;
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
int btrfs_check_data_free_space(struct btrfs_inode *inode,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 08102883f560..dd6f566a383f 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -28,11 +28,7 @@ static struct kmem_cache *delayed_node_cache;
int __init btrfs_delayed_inode_init(void)
{
- delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
- sizeof(struct btrfs_delayed_node),
- 0,
- SLAB_MEM_SPREAD,
- NULL);
+ delayed_node_cache = KMEM_CACHE(btrfs_delayed_node, 0);
if (!delayed_node_cache)
return -ENOMEM;
return 0;
@@ -43,6 +39,17 @@ void __cold btrfs_delayed_inode_exit(void)
kmem_cache_destroy(delayed_node_cache);
}
+void btrfs_init_delayed_root(struct btrfs_delayed_root *delayed_root)
+{
+ atomic_set(&delayed_root->items, 0);
+ atomic_set(&delayed_root->items_seq, 0);
+ delayed_root->nodes = 0;
+ spin_lock_init(&delayed_root->lock);
+ init_waitqueue_head(&delayed_root->wait);
+ INIT_LIST_HEAD(&delayed_root->node_list);
+ INIT_LIST_HEAD(&delayed_root->prepare_list);
+}
+
static inline void btrfs_init_delayed_node(
struct btrfs_delayed_node *delayed_node,
struct btrfs_root *root, u64 inode_id)
@@ -430,8 +437,6 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
delayed_root = delayed_node->root->fs_info->delayed_root;
- BUG_ON(!delayed_root);
-
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
root = &delayed_node->ins_root;
else
@@ -980,7 +985,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
if (delayed_node &&
test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
- BUG_ON(!delayed_node->root);
+ ASSERT(delayed_node->root);
clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
delayed_node->count--;
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 5cceb31bbd16..64e115d97499 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -7,15 +7,23 @@
#ifndef BTRFS_DELAYED_INODE_H
#define BTRFS_DELAYED_INODE_H
+#include <linux/types.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/wait.h>
+#include <linux/fs.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
#include "ctree.h"
+struct btrfs_disk_key;
+struct btrfs_fs_info;
+struct btrfs_inode;
+struct btrfs_root;
+struct btrfs_trans_handle;
+
enum btrfs_delayed_item_type {
BTRFS_DELAYED_INSERTION_ITEM,
BTRFS_DELAYED_DELETION_ITEM
@@ -98,18 +106,7 @@ struct btrfs_delayed_item {
char data[] __counted_by(data_len);
};
-static inline void btrfs_init_delayed_root(
- struct btrfs_delayed_root *delayed_root)
-{
- atomic_set(&delayed_root->items, 0);
- atomic_set(&delayed_root->items_seq, 0);
- delayed_root->nodes = 0;
- spin_lock_init(&delayed_root->lock);
- init_waitqueue_head(&delayed_root->wait);
- INIT_LIST_HEAD(&delayed_root->node_list);
- INIT_LIST_HEAD(&delayed_root->prepare_list);
-}
-
+void btrfs_init_delayed_root(struct btrfs_delayed_root *delayed_root);
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
const char *name, int name_len,
struct btrfs_inode *dir,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 891ea2fa263c..e44e62cf76bc 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -1004,6 +1004,52 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&ref->add_list);
}
+void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, int action, u64 bytenr,
+ u64 len, u64 parent, u64 owning_root)
+{
+ generic_ref->action = action;
+ generic_ref->bytenr = bytenr;
+ generic_ref->len = len;
+ generic_ref->parent = parent;
+ generic_ref->owning_root = owning_root;
+}
+
+void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 root,
+ u64 mod_root, bool skip_qgroup)
+{
+#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+ /* If @real_root not set, use @root as fallback */
+ generic_ref->real_root = mod_root ?: root;
+#endif
+ generic_ref->tree_ref.level = level;
+ generic_ref->tree_ref.ref_root = root;
+ generic_ref->type = BTRFS_REF_METADATA;
+ if (skip_qgroup || !(is_fstree(root) &&
+ (!mod_root || is_fstree(mod_root))))
+ generic_ref->skip_qgroup = true;
+ else
+ generic_ref->skip_qgroup = false;
+
+}
+
+void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ref_root, u64 ino,
+ u64 offset, u64 mod_root, bool skip_qgroup)
+{
+#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+ /* If @real_root not set, use @root as fallback */
+ generic_ref->real_root = mod_root ?: ref_root;
+#endif
+ generic_ref->data_ref.ref_root = ref_root;
+ generic_ref->data_ref.ino = ino;
+ generic_ref->data_ref.offset = offset;
+ generic_ref->type = BTRFS_REF_DATA;
+ if (skip_qgroup || !(is_fstree(ref_root) &&
+ (!mod_root || is_fstree(mod_root))))
+ generic_ref->skip_qgroup = true;
+ else
+ generic_ref->skip_qgroup = false;
+}
+
/*
* add a delayed tree ref. This does all of the accounting required
* to make sure the delayed ref is eventually processed before this
@@ -1220,6 +1266,25 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
return 0;
}
+void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
+{
+ if (refcount_dec_and_test(&ref->refs)) {
+ WARN_ON(!RB_EMPTY_NODE(&ref->ref_node));
+ switch (ref->type) {
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY:
+ case BTRFS_SHARED_DATA_REF_KEY:
+ kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
+ break;
+ default:
+ BUG();
+ }
+ }
+}
+
/*
* This does a simple search for the head node for a given extent. Returns the
* head node if found, or NULL if not.
@@ -1242,31 +1307,19 @@ void __cold btrfs_delayed_ref_exit(void)
int __init btrfs_delayed_ref_init(void)
{
- btrfs_delayed_ref_head_cachep = kmem_cache_create(
- "btrfs_delayed_ref_head",
- sizeof(struct btrfs_delayed_ref_head), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_delayed_ref_head_cachep = KMEM_CACHE(btrfs_delayed_ref_head, 0);
if (!btrfs_delayed_ref_head_cachep)
goto fail;
- btrfs_delayed_tree_ref_cachep = kmem_cache_create(
- "btrfs_delayed_tree_ref",
- sizeof(struct btrfs_delayed_tree_ref), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_delayed_tree_ref_cachep = KMEM_CACHE(btrfs_delayed_tree_ref, 0);
if (!btrfs_delayed_tree_ref_cachep)
goto fail;
- btrfs_delayed_data_ref_cachep = kmem_cache_create(
- "btrfs_delayed_data_ref",
- sizeof(struct btrfs_delayed_data_ref), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_delayed_data_ref_cachep = KMEM_CACHE(btrfs_delayed_data_ref, 0);
if (!btrfs_delayed_data_ref_cachep)
goto fail;
- btrfs_delayed_extent_op_cachep = kmem_cache_create(
- "btrfs_delayed_extent_op",
- sizeof(struct btrfs_delayed_extent_op), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_delayed_extent_op_cachep = KMEM_CACHE(btrfs_delayed_extent_op, 0);
if (!btrfs_delayed_extent_op_cachep)
goto fail;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 62d679d40f4f..b291147cb8ab 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -6,7 +6,17 @@
#ifndef BTRFS_DELAYED_REF_H
#define BTRFS_DELAYED_REF_H
+#include <linux/types.h>
#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <uapi/linux/btrfs_tree.h>
+
+struct btrfs_trans_handle;
+struct btrfs_fs_info;
/* these are the possible values of struct btrfs_delayed_ref_node->action */
enum btrfs_delayed_ref_action {
@@ -308,53 +318,12 @@ static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *
return btrfs_calc_metadata_size(fs_info, num_csum_items);
}
-static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
- int action, u64 bytenr, u64 len,
- u64 parent, u64 owning_root)
-{
- generic_ref->action = action;
- generic_ref->bytenr = bytenr;
- generic_ref->len = len;
- generic_ref->parent = parent;
- generic_ref->owning_root = owning_root;
-}
-
-static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level,
- u64 root, u64 mod_root, bool skip_qgroup)
-{
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
- /* If @real_root not set, use @root as fallback */
- generic_ref->real_root = mod_root ?: root;
-#endif
- generic_ref->tree_ref.level = level;
- generic_ref->tree_ref.ref_root = root;
- generic_ref->type = BTRFS_REF_METADATA;
- if (skip_qgroup || !(is_fstree(root) &&
- (!mod_root || is_fstree(mod_root))))
- generic_ref->skip_qgroup = true;
- else
- generic_ref->skip_qgroup = false;
-
-}
-
-static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
- u64 ref_root, u64 ino, u64 offset, u64 mod_root,
- bool skip_qgroup)
-{
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
- /* If @real_root not set, use @root as fallback */
- generic_ref->real_root = mod_root ?: ref_root;
-#endif
- generic_ref->data_ref.ref_root = ref_root;
- generic_ref->data_ref.ino = ino;
- generic_ref->data_ref.offset = offset;
- generic_ref->type = BTRFS_REF_DATA;
- if (skip_qgroup || !(is_fstree(ref_root) &&
- (!mod_root || is_fstree(mod_root))))
- generic_ref->skip_qgroup = true;
- else
- generic_ref->skip_qgroup = false;
-}
+void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, int action, u64 bytenr,
+ u64 len, u64 parent, u64 owning_root);
+void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 root,
+ u64 mod_root, bool skip_qgroup);
+void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ref_root, u64 ino,
+ u64 offset, u64 mod_root, bool skip_qgroup);
static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)
@@ -369,24 +338,7 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
kmem_cache_free(btrfs_delayed_extent_op_cachep, op);
}
-static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
-{
- if (refcount_dec_and_test(&ref->refs)) {
- WARN_ON(!RB_EMPTY_NODE(&ref->ref_node));
- switch (ref->type) {
- case BTRFS_TREE_BLOCK_REF_KEY:
- case BTRFS_SHARED_BLOCK_REF_KEY:
- kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
- break;
- case BTRFS_EXTENT_DATA_REF_KEY:
- case BTRFS_SHARED_DATA_REF_KEY:
- kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
- break;
- default:
- BUG();
- }
- }
-}
+void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref);
static inline u64 btrfs_ref_head_to_space_flags(
struct btrfs_delayed_ref_head *head_ref)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 1502d664c892..7696beec4c21 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -11,10 +11,8 @@
#include <linux/math64.h>
#include "misc.h"
#include "ctree.h"
-#include "extent_map.h"
#include "disk-io.h"
#include "transaction.h"
-#include "print-tree.h"
#include "volumes.h"
#include "async-thread.h"
#include "dev-replace.h"
@@ -246,7 +244,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct block_device *bdev;
u64 devid = BTRFS_DEV_REPLACE_DEVID;
int ret = 0;
@@ -257,13 +255,13 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
return -EINVAL;
}
- bdev_handle = bdev_open_by_path(device_path, BLK_OPEN_WRITE,
+ bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE,
fs_info->bdev_holder, NULL);
- if (IS_ERR(bdev_handle)) {
+ if (IS_ERR(bdev_file)) {
btrfs_err(fs_info, "target device %s is invalid!", device_path);
- return PTR_ERR(bdev_handle);
+ return PTR_ERR(bdev_file);
}
- bdev = bdev_handle->bdev;
+ bdev = file_bdev(bdev_file);
if (!btrfs_check_device_zone_type(fs_info, bdev)) {
btrfs_err(fs_info,
@@ -314,7 +312,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
device->commit_bytes_used = device->bytes_used;
device->fs_info = fs_info;
device->bdev = bdev;
- device->bdev_handle = bdev_handle;
+ device->bdev_file = bdev_file;
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
device->dev_stats_valid = 1;
@@ -335,7 +333,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
return 0;
error:
- bdev_release(bdev_handle);
+ fput(bdev_file);
return ret;
}
@@ -725,6 +723,23 @@ leave:
return ret;
}
+static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args)
+{
+ if (args->start.srcdevid == 0) {
+ if (memchr(args->start.srcdev_name, 0,
+ sizeof(args->start.srcdev_name)) == NULL)
+ return -ENAMETOOLONG;
+ } else {
+ args->start.srcdev_name[0] = 0;
+ }
+
+ if (memchr(args->start.tgtdev_name, 0,
+ sizeof(args->start.tgtdev_name)) == NULL)
+ return -ENAMETOOLONG;
+
+ return 0;
+}
+
int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args)
{
@@ -737,10 +752,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
default:
return -EINVAL;
}
-
- if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
- args->start.tgtdev_name[0] == '\0')
- return -EINVAL;
+ ret = btrfs_check_replace_dev_names(args);
+ if (ret < 0)
+ return ret;
ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name,
args->start.srcdevid,
@@ -984,8 +998,7 @@ error:
btrfs_sysfs_remove_device(src_device);
btrfs_sysfs_update_devid(tgt_device);
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state))
- btrfs_scratch_superblocks(fs_info, src_device->bdev,
- src_device->name->str);
+ btrfs_scratch_superblocks(fs_info, src_device);
/* write back the superblocks */
trans = btrfs_start_transaction(root, 0);
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 675082ccec89..23e480efe5e6 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -6,11 +6,15 @@
#ifndef BTRFS_DEV_REPLACE_H
#define BTRFS_DEV_REPLACE_H
+#include <linux/types.h>
+#include <linux/compiler_types.h>
+
struct btrfs_ioctl_dev_replace_args;
struct btrfs_fs_info;
struct btrfs_trans_handle;
struct btrfs_dev_replace;
struct btrfs_block_group;
+struct btrfs_device;
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans);
diff --git a/fs/btrfs/dir-item.h b/fs/btrfs/dir-item.h
index e40a226373d7..00b3d83d7569 100644
--- a/fs/btrfs/dir-item.h
+++ b/fs/btrfs/dir-item.h
@@ -3,9 +3,15 @@
#ifndef BTRFS_DIR_ITEM_H
#define BTRFS_DIR_ITEM_H
+#include <linux/types.h>
#include <linux/crc32c.h>
struct fscrypt_str;
+struct btrfs_fs_info;
+struct btrfs_key;
+struct btrfs_path;
+struct btrfs_root;
+struct btrfs_trans_handle;
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
const struct fscrypt_str *name);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c6907d533fe8..3df5477d48a8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -29,7 +29,6 @@
#include "tree-log.h"
#include "free-space-cache.h"
#include "free-space-tree.h"
-#include "rcu-string.h"
#include "dev-replace.h"
#include "raid56.h"
#include "sysfs.h"
@@ -193,7 +192,7 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
struct folio *folio = eb->folios[i];
u64 start = max_t(u64, eb->start, folio_pos(folio));
u64 end = min_t(u64, eb->start + eb->len,
- folio_pos(folio) + folio_size(folio));
+ folio_pos(folio) + eb->folio_size);
u32 len = end - start;
ret = btrfs_repair_io_failure(fs_info, 0, start, len,
@@ -498,15 +497,15 @@ static int btree_migrate_folio(struct address_space *mapping,
static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct btrfs_fs_info *fs_info;
int ret;
if (wbc->sync_mode == WB_SYNC_NONE) {
+ struct btrfs_fs_info *fs_info;
if (wbc->for_kupdate)
return 0;
- fs_info = BTRFS_I(mapping->host)->root->fs_info;
+ fs_info = inode_to_fs_info(mapping->host);
/* this is a bit racy, but that's ok */
ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
BTRFS_DIRTY_METADATA_THRESH,
@@ -529,11 +528,12 @@ static void btree_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
struct extent_io_tree *tree;
- tree = &BTRFS_I(folio->mapping->host)->io_tree;
+
+ tree = &folio_to_inode(folio)->io_tree;
extent_invalidate_folio(tree, folio, offset);
btree_release_folio(folio, GFP_NOFS);
if (folio_get_private(folio)) {
- btrfs_warn(BTRFS_I(folio->mapping->host)->root->fs_info,
+ btrfs_warn(folio_to_fs_info(folio),
"folio private not zero on folio %llu",
(unsigned long long)folio_pos(folio));
folio_detach_private(folio);
@@ -544,7 +544,7 @@ static void btree_invalidate_folio(struct folio *folio, size_t offset,
static bool btree_dirty_folio(struct address_space *mapping,
struct folio *folio)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host);
struct btrfs_subpage_info *spi = fs_info->subpage_info;
struct btrfs_subpage *subpage;
struct extent_buffer *eb;
@@ -1244,6 +1244,7 @@ void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info, "leaked root %s refcount %d",
btrfs_root_name(&root->root_key, buf),
refcount_read(&root->refs));
+ WARN_ON_ONCE(1);
while (refcount_read(&root->refs) > 1)
btrfs_put_root(root);
btrfs_put_root(root);
@@ -1307,12 +1308,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
*
* @objectid: root id
* @anon_dev: preallocated anonymous block device number for new roots,
- * pass 0 for new allocation.
+ * pass NULL for a new allocation.
* @check_ref: whether to check root item references, If true, return -ENOENT
* for orphan roots
*/
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev,
+ u64 objectid, dev_t *anon_dev,
bool check_ref)
{
struct btrfs_root *root;
@@ -1336,8 +1337,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
- /* Shouldn't get preallocated anon_dev for cached roots */
- ASSERT(!anon_dev);
+ /*
+ * Some other caller may have read out the newly inserted
+ * subvolume already (for things like backref walk etc). Not
+ * that common but still possible. In that case, we just need
+ * to free the anon_dev.
+ */
+ if (unlikely(anon_dev && *anon_dev)) {
+ free_anon_bdev(*anon_dev);
+ *anon_dev = 0;
+ }
+
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
btrfs_put_root(root);
return ERR_PTR(-ENOENT);
@@ -1357,7 +1367,7 @@ again:
goto fail;
}
- ret = btrfs_init_fs_root(root, anon_dev);
+ ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
if (ret)
goto fail;
@@ -1393,7 +1403,7 @@ fail:
* root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
* and once again by our caller.
*/
- if (anon_dev)
+ if (anon_dev && *anon_dev)
root->anon_dev = 0;
btrfs_put_root(root);
return ERR_PTR(ret);
@@ -1409,7 +1419,7 @@ fail:
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref)
{
- return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
+ return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
}
/*
@@ -1417,11 +1427,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
* the anonymous block device id
*
* @objectid: tree objectid
- * @anon_dev: if zero, allocate a new anonymous block device or use the
- * parameter value
+ * @anon_dev: if NULL, allocate a new anonymous block device or use the
+ * parameter value if not NULL
*/
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev)
+ u64 objectid, dev_t *anon_dev)
{
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
}
@@ -2230,7 +2240,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
struct btrfs_key location;
int ret;
- BUG_ON(!fs_info->tree_root);
+ ASSERT(fs_info->tree_root);
ret = load_global_roots(tree_root);
if (ret)
@@ -2830,6 +2840,7 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
int ret;
fs_info->sb = sb;
+ /* Temporary fixed values for block size until we read the superblock. */
sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE;
sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE);
@@ -3347,6 +3358,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
+ /* Update the values for the current filesystem. */
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
memcpy(&sb->s_uuid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE);
@@ -4617,7 +4629,7 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
struct inode *inode = NULL;
btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
delalloc_inodes);
- __btrfs_del_delalloc_inode(root, btrfs_inode);
+ btrfs_del_delalloc_inode(btrfs_inode);
spin_unlock(&root->delalloc_lock);
/*
@@ -4916,7 +4928,14 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root)
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
goto error;
- BUG_ON(ret == 0); /* Corruption */
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist a root
+ * with such id, but this is out of valid range.
+ */
+ ret = -EUCLEAN;
+ goto error;
+ }
if (path->slots[0] > 0) {
slot = path->slots[0] - 1;
l = path->nodes[0];
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 9413726b329b..76eb53fe7a11 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -6,6 +6,22 @@
#ifndef BTRFS_DISK_IO_H
#define BTRFS_DISK_IO_H
+#include <linux/sizes.h>
+#include <linux/compiler_types.h>
+#include "ctree.h"
+#include "fs.h"
+
+struct block_device;
+struct super_block;
+struct extent_buffer;
+struct btrfs_device;
+struct btrfs_fs_devices;
+struct btrfs_fs_info;
+struct btrfs_super_block;
+struct btrfs_trans_handle;
+struct btrfs_tree_parent_check;
+struct btrfs_transaction;
+
#define BTRFS_SUPER_MIRROR_MAX 3
#define BTRFS_SUPER_MIRROR_SHIFT 12
@@ -25,10 +41,6 @@ static inline u64 btrfs_sb_offset(int mirror)
return BTRFS_SUPER_INFO_OFFSET;
}
-struct btrfs_device;
-struct btrfs_fs_devices;
-struct btrfs_tree_parent_check;
-
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
@@ -61,7 +73,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, dev_t anon_dev);
+ u64 objectid, dev_t *anon_dev);
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 objectid);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 744a02b7fd67..8398d345ec5b 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -5,7 +5,6 @@
#include "ctree.h"
#include "disk-io.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "export.h"
#include "accessors.h"
#include "super.h"
@@ -174,8 +173,15 @@ struct dentry *btrfs_get_parent(struct dentry *child)
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto fail;
+ if (ret == 0) {
+ /*
+ * Key with offset of -1 found, there would have to exist an
+ * inode with such number or a root with such id.
+ */
+ ret = -EUCLEAN;
+ goto fail;
+ }
- BUG_ON(ret == 0); /* Key with offset of -1 found */
if (path->slots[0] == 0) {
ret = -ENOENT;
goto fail;
@@ -215,7 +221,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
{
struct inode *inode = d_inode(child);
struct inode *dir = d_inode(parent);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_inode_ref *iref;
diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
index eba6bc4f5a61..464582273af9 100644
--- a/fs/btrfs/export.h
+++ b/fs/btrfs/export.h
@@ -4,6 +4,10 @@
#define BTRFS_EXPORT_H
#include <linux/exportfs.h>
+#include <linux/types.h>
+
+struct dentry;
+struct super_block;
extern const struct export_operations btrfs_export_ops;
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index e3ee5449cc4a..c09b428823d7 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -6,7 +6,6 @@
#include "ctree.h"
#include "extent-io-tree.h"
#include "btrfs_inode.h"
-#include "misc.h"
static struct kmem_cache *extent_state_cache;
@@ -48,6 +47,7 @@ static inline void btrfs_extent_state_leak_debug_check(void)
extent_state_in_tree(state),
refcount_read(&state->refs));
list_del(&state->leak_list);
+ WARN_ON_ONCE(1);
kmem_cache_free(extent_state_cache, state);
}
}
@@ -1883,8 +1883,8 @@ void __cold extent_state_free_cachep(void)
int __init extent_state_init_cachep(void)
{
extent_state_cache = kmem_cache_create("btrfs_extent_state",
- sizeof(struct extent_state), 0,
- SLAB_MEM_SPREAD, NULL);
+ sizeof(struct extent_state), 0, 0,
+ NULL);
if (!extent_state_cache)
return -ENOMEM;
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index ebe6390d65e9..9d3a52d8f59a 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -3,9 +3,16 @@
#ifndef BTRFS_EXTENT_IO_TREE_H
#define BTRFS_EXTENT_IO_TREE_H
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/wait.h>
#include "misc.h"
struct extent_changeset;
+struct btrfs_fs_info;
+struct btrfs_inode;
/* Bits for the extent state */
enum {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f396aba92c57..beedd6ed64d3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -18,7 +18,7 @@
#include <linux/crc32c.h>
#include "ctree.h"
#include "extent-tree.h"
-#include "tree-log.h"
+#include "transaction.h"
#include "disk-io.h"
#include "print-tree.h"
#include "volumes.h"
@@ -26,14 +26,11 @@
#include "locking.h"
#include "free-space-cache.h"
#include "free-space-tree.h"
-#include "sysfs.h"
#include "qgroup.h"
#include "ref-verify.h"
#include "space-info.h"
#include "block-rsv.h"
-#include "delalloc-space.h"
#include "discard.h"
-#include "rcu-string.h"
#include "zoned.h"
#include "dev-replace.h"
#include "fs.h"
@@ -1260,7 +1257,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
u64 bytes_left, end;
u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
- if (WARN_ON(start != aligned_start)) {
+ /* Adjust the range to be aligned to 512B sectors if necessary. */
+ if (start != aligned_start) {
len -= aligned_start - start;
len = round_down(len, 1 << SECTOR_SHIFT);
start = aligned_start;
@@ -2398,7 +2396,14 @@ static noinline int check_committed_ref(struct btrfs_root *root,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
- BUG_ON(ret == 0); /* Corruption */
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist an extent
+ * item with such offset, but this is out of the valid range.
+ */
+ ret = -EUCLEAN;
+ goto out;
+ }
ret = -ENOENT;
if (path->slots[0] == 0)
@@ -2779,6 +2784,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 total_unpinned = 0;
u64 empty_cluster = 0;
bool readonly;
+ int ret = 0;
while (start <= end) {
readonly = false;
@@ -2788,7 +2794,11 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
btrfs_put_block_group(cache);
total_unpinned = 0;
cache = btrfs_lookup_block_group(fs_info, start);
- BUG_ON(!cache); /* Logic error */
+ if (cache == NULL) {
+ /* Logic error, something removed the block group. */
+ ret = -EUCLEAN;
+ goto out;
+ }
cluster = fetch_cluster_info(fs_info,
cache->space_info,
@@ -2857,7 +2867,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
if (cache)
btrfs_put_block_group(cache);
- return 0;
+out:
+ return ret;
}
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
@@ -2887,7 +2898,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
end + 1 - start, NULL);
clear_extent_dirty(unpin, start, end, &cached_state);
- unpin_extent_range(fs_info, start, end, true);
+ ret = unpin_extent_range(fs_info, start, end, true);
+ BUG_ON(ret);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
free_extent_state(cached_state);
cond_resched();
@@ -3446,16 +3458,17 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
u64 parent, int last_ref)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_ref generic_ref = { 0 };
struct btrfs_block_group *bg;
int ret;
- btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
- buf->start, buf->len, parent, btrfs_header_owner(buf));
- btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
- root_id, 0, false);
-
if (root_id != BTRFS_TREE_LOG_OBJECTID) {
+ struct btrfs_ref generic_ref = { 0 };
+
+ btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
+ buf->start, buf->len, parent,
+ btrfs_header_owner(buf));
+ btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
+ root_id, 0, false);
btrfs_ref_tree_mod(fs_info, &generic_ref);
ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
BUG_ON(ret); /* -ENOMEM */
@@ -4298,6 +4311,42 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
return 0;
}
+static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+ struct find_free_extent_ctl *ffe_ctl)
+{
+ if (ffe_ctl->for_treelog) {
+ spin_lock(&fs_info->treelog_bg_lock);
+ if (fs_info->treelog_bg)
+ ffe_ctl->hint_byte = fs_info->treelog_bg;
+ spin_unlock(&fs_info->treelog_bg_lock);
+ } else if (ffe_ctl->for_data_reloc) {
+ spin_lock(&fs_info->relocation_bg_lock);
+ if (fs_info->data_reloc_bg)
+ ffe_ctl->hint_byte = fs_info->data_reloc_bg;
+ spin_unlock(&fs_info->relocation_bg_lock);
+ } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
+ struct btrfs_block_group *block_group;
+
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+ /*
+ * No lock is OK here because avail is monotinically
+ * decreasing, and this is just a hint.
+ */
+ u64 avail = block_group->zone_capacity - block_group->alloc_offset;
+
+ if (block_group_bits(block_group, ffe_ctl->flags) &&
+ avail >= ffe_ctl->num_bytes) {
+ ffe_ctl->hint_byte = block_group->start;
+ break;
+ }
+ }
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+ }
+
+ return 0;
+}
+
static int prepare_allocation(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl,
struct btrfs_space_info *space_info,
@@ -4308,19 +4357,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
return prepare_allocation_clustered(fs_info, ffe_ctl,
space_info, ins);
case BTRFS_EXTENT_ALLOC_ZONED:
- if (ffe_ctl->for_treelog) {
- spin_lock(&fs_info->treelog_bg_lock);
- if (fs_info->treelog_bg)
- ffe_ctl->hint_byte = fs_info->treelog_bg;
- spin_unlock(&fs_info->treelog_bg_lock);
- }
- if (ffe_ctl->for_data_reloc) {
- spin_lock(&fs_info->relocation_bg_lock);
- if (fs_info->data_reloc_bg)
- ffe_ctl->hint_byte = fs_info->data_reloc_bg;
- spin_unlock(&fs_info->relocation_bg_lock);
- }
- return 0;
+ return prepare_allocation_zoned(fs_info, ffe_ctl);
default:
BUG();
}
@@ -4925,7 +4962,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 root_objectid = root->root_key.objectid;
u64 owning_root = root_objectid;
- BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
+ ASSERT(root_objectid != BTRFS_TREE_LOG_OBJECTID);
if (btrfs_is_data_reloc_root(root) && is_fstree(root->relocation_src_root))
owning_root = root->relocation_src_root;
@@ -6142,10 +6179,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
return ret;
}
-int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
- u64 start, u64 end)
+/*
+ * Unpin the extent range in an error context and don't add the space back.
+ * Errors are not propagated further.
+ */
+void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end)
{
- return unpin_extent_range(fs_info, start, end, false);
+ unpin_extent_range(fs_info, start, end, false);
}
/*
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index 2e066035ccee..af9f8800d5ac 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -3,11 +3,21 @@
#ifndef BTRFS_EXTENT_TREE_H
#define BTRFS_EXTENT_TREE_H
+#include <linux/types.h>
#include "misc.h"
#include "block-group.h"
+#include "locking.h"
+struct extent_buffer;
struct btrfs_free_cluster;
+struct btrfs_fs_info;
+struct btrfs_root;
+struct btrfs_path;
+struct btrfs_ref;
+struct btrfs_disk_key;
struct btrfs_delayed_ref_head;
+struct btrfs_delayed_ref_root;
+struct btrfs_extent_inline_ref;
enum btrfs_extent_allocation_policy {
BTRFS_EXTENT_ALLOC_CLUSTERED,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cfd2967f04a2..7441245b1ceb 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -14,7 +14,6 @@
#include <linux/pagevec.h>
#include <linux/prefetch.h>
#include <linux/fsverity.h>
-#include "misc.h"
#include "extent_io.h"
#include "extent-io-tree.h"
#include "extent_map.h"
@@ -22,7 +21,6 @@
#include "btrfs_inode.h"
#include "bio.h"
#include "locking.h"
-#include "rcu-string.h"
#include "backref.h"
#include "disk-io.h"
#include "subpage.h"
@@ -78,10 +76,11 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
eb = list_first_entry(&fs_info->allocated_ebs,
struct extent_buffer, leak_list);
pr_err(
- "BTRFS: buffer leak start %llu len %lu refs %d bflags %lu owner %llu\n",
+ "BTRFS: buffer leak start %llu len %u refs %d bflags %lu owner %llu\n",
eb->start, eb->len, atomic_read(&eb->refs), eb->bflags,
btrfs_header_owner(eb));
list_del(&eb->leak_list);
+ WARN_ON_ONCE(1);
kmem_cache_free(extent_buffer_cache, eb);
}
spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags);
@@ -147,8 +146,8 @@ static void submit_write_bio(struct btrfs_bio_ctrl *bio_ctrl, int ret)
int __init extent_buffer_init_cachep(void)
{
extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
- sizeof(struct extent_buffer), 0,
- SLAB_MEM_SPREAD, NULL);
+ sizeof(struct extent_buffer), 0, 0,
+ NULL);
if (!extent_buffer_cache)
return -ENOMEM;
@@ -207,7 +206,7 @@ static void __process_pages_contig(struct address_space *mapping,
struct page *locked_page, u64 start, u64 end,
unsigned long page_ops)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host);
pgoff_t start_index = start >> PAGE_SHIFT;
pgoff_t end_index = end >> PAGE_SHIFT;
pgoff_t index = start_index;
@@ -251,7 +250,7 @@ static noinline int lock_delalloc_pages(struct inode *inode,
u64 start,
u64 end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct address_space *mapping = inode->i_mapping;
pgoff_t start_index = start >> PAGE_SHIFT;
pgoff_t end_index = end >> PAGE_SHIFT;
@@ -323,7 +322,7 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,
u64 *end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
const u64 orig_start = *start;
const u64 orig_end = *end;
@@ -433,7 +432,7 @@ static bool btrfs_verify_page(struct page *page, u64 start)
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
struct folio *folio = page_folio(page);
ASSERT(page_offset(page) <= start &&
@@ -462,16 +461,15 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
*/
static void end_bbio_data_write(struct btrfs_bio *bbio)
{
+ struct btrfs_fs_info *fs_info = bbio->fs_info;
struct bio *bio = &bbio->bio;
int error = blk_status_to_errno(bio->bi_status);
struct folio_iter fi;
+ const u32 sectorsize = fs_info->sectorsize;
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_folio_all(fi, bio) {
struct folio *folio = fi.folio;
- struct inode *inode = folio->mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- const u32 sectorsize = fs_info->sectorsize;
u64 start = folio_pos(folio) + fi.offset;
u32 len = fi.length;
@@ -593,22 +591,17 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
*/
static void end_bbio_data_read(struct btrfs_bio *bbio)
{
+ struct btrfs_fs_info *fs_info = bbio->fs_info;
struct bio *bio = &bbio->bio;
struct processed_extent processed = { 0 };
struct folio_iter fi;
- /*
- * The offset to the beginning of a bio, since one bio can never be
- * larger than UINT_MAX, u32 here is enough.
- */
- u32 bio_offset = 0;
+ const u32 sectorsize = fs_info->sectorsize;
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_folio_all(fi, &bbio->bio) {
bool uptodate = !bio->bi_status;
struct folio *folio = fi.folio;
struct inode *inode = folio->mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- const u32 sectorsize = fs_info->sectorsize;
u64 start;
u64 end;
u32 len;
@@ -667,10 +660,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
end_page_read(folio_page(folio, 0), uptodate, start, len);
endio_readpage_release_extent(&processed, BTRFS_I(inode),
start, end, uptodate);
-
- ASSERT(bio_offset + len > bio_offset);
- bio_offset += len;
-
}
/* Release the last extent */
endio_readpage_release_extent(&processed, NULL, 0, 0, false);
@@ -738,6 +727,8 @@ static int alloc_eb_folio_array(struct extent_buffer *eb, gfp_t extra_gfp)
for (int i = 0; i < num_pages; i++)
eb->folios[i] = page_folio(page_array[i]);
+ eb->folio_size = PAGE_SIZE;
+ eb->folio_shift = PAGE_SHIFT;
return 0;
}
@@ -827,7 +818,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
u64 disk_bytenr, struct page *page,
size_t size, unsigned long pg_offset)
{
- struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
+ struct btrfs_inode *inode = page_to_inode(page);
ASSERT(pg_offset + size <= PAGE_SIZE);
ASSERT(bio_ctrl->end_io_func);
@@ -936,17 +927,21 @@ static int attach_extent_buffer_folio(struct extent_buffer *eb,
int set_page_extent_mapped(struct page *page)
{
- struct folio *folio = page_folio(page);
+ return set_folio_extent_mapped(page_folio(page));
+}
+
+int set_folio_extent_mapped(struct folio *folio)
+{
struct btrfs_fs_info *fs_info;
- ASSERT(page->mapping);
+ ASSERT(folio->mapping);
if (folio_test_private(folio))
return 0;
- fs_info = btrfs_sb(page->mapping->host->i_sb);
+ fs_info = folio_to_fs_info(folio);
- if (btrfs_is_subpage(fs_info, page->mapping))
+ if (btrfs_is_subpage(fs_info, folio->mapping))
return btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA);
folio_attach_private(folio, (void *)EXTENT_FOLIO_PRIVATE);
@@ -963,20 +958,21 @@ void clear_page_extent_mapped(struct page *page)
if (!folio_test_private(folio))
return;
- fs_info = btrfs_sb(page->mapping->host->i_sb);
+ fs_info = page_to_fs_info(page);
if (btrfs_is_subpage(fs_info, page->mapping))
return btrfs_detach_subpage(fs_info, folio);
folio_detach_private(folio);
}
-static struct extent_map *
-__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
+static struct extent_map *__get_extent_map(struct inode *inode, struct page *page,
u64 start, u64 len, struct extent_map **em_cached)
{
struct extent_map *em;
- if (em_cached && *em_cached) {
+ ASSERT(em_cached);
+
+ if (*em_cached) {
em = *em_cached;
if (extent_map_in_tree(em) && start >= em->start &&
start < extent_map_end(em)) {
@@ -988,8 +984,8 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
*em_cached = NULL;
}
- em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
- if (em_cached && !IS_ERR(em)) {
+ em = btrfs_get_extent(BTRFS_I(inode), page, start, len);
+ if (!IS_ERR(em)) {
BUG_ON(*em_cached);
refcount_inc(&em->refs);
*em_cached = em;
@@ -1007,7 +1003,7 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
struct btrfs_bio_ctrl *bio_ctrl, u64 *prev_em_start)
{
struct inode *inode = page->mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
u64 start = page_offset(page);
const u64 end = start + PAGE_SIZE - 1;
u64 cur = start;
@@ -1018,7 +1014,7 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
int ret = 0;
size_t pg_offset = 0;
size_t iosize;
- size_t blocksize = inode->i_sb->s_blocksize;
+ size_t blocksize = fs_info->sectorsize;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
ret = set_page_extent_mapped(page);
@@ -1051,8 +1047,7 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
end_page_read(page, true, cur, iosize);
break;
}
- em = __get_extent_map(inode, page, pg_offset, cur,
- end - cur + 1, em_cached);
+ em = __get_extent_map(inode, page, cur, end - cur + 1, em_cached);
if (IS_ERR(em)) {
unlock_extent(tree, cur, end, NULL);
end_page_read(page, false, cur, end + 1 - cur);
@@ -1157,15 +1152,18 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
int btrfs_read_folio(struct file *file, struct folio *folio)
{
struct page *page = &folio->page;
- struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
+ struct btrfs_inode *inode = page_to_inode(page);
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ };
+ struct extent_map *em_cached = NULL;
int ret;
btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
- ret = btrfs_do_readpage(page, NULL, &bio_ctrl, NULL);
+ ret = btrfs_do_readpage(page, &em_cached, &bio_ctrl, NULL);
+ free_extent_map(em_cached);
+
/*
* If btrfs_do_readpage() failed we will want to submit the assembled
* bio to do the cleanup.
@@ -1180,9 +1178,11 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
struct btrfs_bio_ctrl *bio_ctrl,
u64 *prev_em_start)
{
- struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
+ struct btrfs_inode *inode = page_to_inode(pages[0]);
int index;
+ ASSERT(em_cached);
+
btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
for (index = 0; index < nr_pages; index++) {
@@ -1371,7 +1371,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
continue;
}
- em = btrfs_get_extent(inode, NULL, 0, cur, len);
+ em = btrfs_get_extent(inode, NULL, cur, len);
if (IS_ERR(em)) {
ret = PTR_ERR_OR_ZERO(em);
goto out_error;
@@ -1739,10 +1739,10 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
folio_lock(folio);
folio_clear_dirty_for_io(folio);
folio_start_writeback(folio);
- ret = bio_add_folio(&bbio->bio, folio, folio_size(folio), 0);
+ ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0);
ASSERT(ret);
wbc_account_cgroup_owner(wbc, folio_page(folio, 0),
- folio_size(folio));
+ eb->folio_size);
wbc->nr_to_write -= folio_nr_pages(folio);
folio_unlock(folio);
}
@@ -1766,7 +1766,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
*/
static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
struct folio *folio = page_folio(page);
int submitted = 0;
u64 page_start = page_offset(page);
@@ -1857,7 +1857,7 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
if (!folio_test_private(folio))
return 0;
- if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
+ if (page_to_fs_info(page)->nodesize < PAGE_SIZE)
return submit_eb_subpage(page, wbc);
spin_lock(&mapping->i_private_lock);
@@ -1915,7 +1915,7 @@ int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct btrfs_eb_write_context ctx = { .wbc = wbc };
- struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host);
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
@@ -2203,7 +2203,7 @@ void extent_write_locked_range(struct inode *inode, struct page *locked_page,
bool found_error = false;
int ret = 0;
struct address_space *mapping = inode->i_mapping;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
const u32 sectorsize = fs_info->sectorsize;
loff_t i_size = i_size_read(inode);
u64 cur = start;
@@ -2309,7 +2309,7 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
struct extent_state *cached_state = NULL;
u64 start = folio_pos(folio);
u64 end = start + folio_size(folio) - 1;
- size_t blocksize = folio->mapping->host->i_sb->s_blocksize;
+ size_t blocksize = folio_to_fs_info(folio)->sectorsize;
/* This function is only called for the btree inode */
ASSERT(tree->owner == IO_TREE_BTREE_INODE_IO);
@@ -2378,7 +2378,7 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
struct extent_map *em;
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
- struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
+ struct btrfs_inode *btrfs_inode = page_to_inode(page);
struct extent_io_tree *tree = &btrfs_inode->io_tree;
struct extent_map_tree *map = &btrfs_inode->extent_tree;
@@ -2453,12 +2453,65 @@ next:
return try_release_extent_state(tree, page, mask);
}
+struct btrfs_fiemap_entry {
+ u64 offset;
+ u64 phys;
+ u64 len;
+ u32 flags;
+};
+
/*
- * To cache previous fiemap extent
+ * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file
+ * range from the inode's io tree, unlock the subvolume tree search path, flush
+ * the fiemap cache and relock the file range and research the subvolume tree.
+ * The value here is something negative that can't be confused with a valid
+ * errno value and different from 1 because that's also a return value from
+ * fiemap_fill_next_extent() and also it's often used to mean some btree search
+ * did not find a key, so make it some distinct negative value.
+ */
+#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1))
+
+/*
+ * Used to:
+ *
+ * - Cache the next entry to be emitted to the fiemap buffer, so that we can
+ * merge extents that are contiguous and can be grouped as a single one;
*
- * Will be used for merging fiemap extent
+ * - Store extents ready to be written to the fiemap buffer in an intermediary
+ * buffer. This intermediary buffer is to ensure that in case the fiemap
+ * buffer is memory mapped to the fiemap target file, we don't deadlock
+ * during btrfs_page_mkwrite(). This is because during fiemap we are locking
+ * an extent range in order to prevent races with delalloc flushing and
+ * ordered extent completion, which is needed in order to reliably detect
+ * delalloc in holes and prealloc extents. And this can lead to a deadlock
+ * if the fiemap buffer is memory mapped to the file we are running fiemap
+ * against (a silly, useless in practice scenario, but possible) because
+ * btrfs_page_mkwrite() will try to lock the same extent range.
*/
struct fiemap_cache {
+ /* An array of ready fiemap entries. */
+ struct btrfs_fiemap_entry *entries;
+ /* Number of entries in the entries array. */
+ int entries_size;
+ /* Index of the next entry in the entries array to write to. */
+ int entries_pos;
+ /*
+ * Once the entries array is full, this indicates what's the offset for
+ * the next file extent item we must search for in the inode's subvolume
+ * tree after unlocking the extent range in the inode's io tree and
+ * releasing the search path.
+ */
+ u64 next_search_offset;
+ /*
+ * This matches struct fiemap_extent_info::fi_mapped_extents, we use it
+ * to count ourselves emitted extents and stop instead of relying on
+ * fiemap_fill_next_extent() because we buffer ready fiemap entries at
+ * the @entries array, and we want to stop as soon as we hit the max
+ * amount of extents to map, not just to save time but also to make the
+ * logic at extent_fiemap() simpler.
+ */
+ unsigned int extents_mapped;
+ /* Fields for the cached extent (unsubmitted, not ready, extent). */
u64 offset;
u64 phys;
u64 len;
@@ -2466,6 +2519,28 @@ struct fiemap_cache {
bool cached;
};
+static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache)
+{
+ for (int i = 0; i < cache->entries_pos; i++) {
+ struct btrfs_fiemap_entry *entry = &cache->entries[i];
+ int ret;
+
+ ret = fiemap_fill_next_extent(fieinfo, entry->offset,
+ entry->phys, entry->len,
+ entry->flags);
+ /*
+ * Ignore 1 (reached max entries) because we keep track of that
+ * ourselves in emit_fiemap_extent().
+ */
+ if (ret < 0)
+ return ret;
+ }
+ cache->entries_pos = 0;
+
+ return 0;
+}
+
/*
* Helper to submit fiemap extent.
*
@@ -2480,7 +2555,8 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache,
u64 offset, u64 phys, u64 len, u32 flags)
{
- int ret = 0;
+ struct btrfs_fiemap_entry *entry;
+ u64 cache_end;
/* Set at the end of extent_fiemap(). */
ASSERT((flags & FIEMAP_EXTENT_LAST) == 0);
@@ -2489,15 +2565,104 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
goto assign;
/*
- * Sanity check, extent_fiemap() should have ensured that new
- * fiemap extent won't overlap with cached one.
- * Not recoverable.
+ * When iterating the extents of the inode, at extent_fiemap(), we may
+ * find an extent that starts at an offset behind the end offset of the
+ * previous extent we processed. This happens if fiemap is called
+ * without FIEMAP_FLAG_SYNC and there are ordered extents completing
+ * after we had to unlock the file range, release the search path, emit
+ * the fiemap extents stored in the buffer (cache->entries array) and
+ * the lock the remainder of the range and re-search the btree.
+ *
+ * For example we are in leaf X processing its last item, which is the
+ * file extent item for file range [512K, 1M[, and after
+ * btrfs_next_leaf() releases the path, there's an ordered extent that
+ * completes for the file range [768K, 2M[, and that results in trimming
+ * the file extent item so that it now corresponds to the file range
+ * [512K, 768K[ and a new file extent item is inserted for the file
+ * range [768K, 2M[, which may end up as the last item of leaf X or as
+ * the first item of the next leaf - in either case btrfs_next_leaf()
+ * will leave us with a path pointing to the new extent item, for the
+ * file range [768K, 2M[, since that's the first key that follows the
+ * last one we processed. So in order not to report overlapping extents
+ * to user space, we trim the length of the previously cached extent and
+ * emit it.
*
- * NOTE: Physical address can overlap, due to compression
+ * Upon calling btrfs_next_leaf() we may also find an extent with an
+ * offset smaller than or equals to cache->offset, and this happens
+ * when we had a hole or prealloc extent with several delalloc ranges in
+ * it, but after btrfs_next_leaf() released the path, delalloc was
+ * flushed and the resulting ordered extents were completed, so we can
+ * now have found a file extent item for an offset that is smaller than
+ * or equals to what we have in cache->offset. We deal with this as
+ * described below.
*/
- if (cache->offset + cache->len > offset) {
- WARN_ON(1);
- return -EINVAL;
+ cache_end = cache->offset + cache->len;
+ if (cache_end > offset) {
+ if (offset == cache->offset) {
+ /*
+ * We cached a dealloc range (found in the io tree) for
+ * a hole or prealloc extent and we have now found a
+ * file extent item for the same offset. What we have
+ * now is more recent and up to date, so discard what
+ * we had in the cache and use what we have just found.
+ */
+ goto assign;
+ } else if (offset > cache->offset) {
+ /*
+ * The extent range we previously found ends after the
+ * offset of the file extent item we found and that
+ * offset falls somewhere in the middle of that previous
+ * extent range. So adjust the range we previously found
+ * to end at the offset of the file extent item we have
+ * just found, since this extent is more up to date.
+ * Emit that adjusted range and cache the file extent
+ * item we have just found. This corresponds to the case
+ * where a previously found file extent item was split
+ * due to an ordered extent completing.
+ */
+ cache->len = offset - cache->offset;
+ goto emit;
+ } else {
+ const u64 range_end = offset + len;
+
+ /*
+ * The offset of the file extent item we have just found
+ * is behind the cached offset. This means we were
+ * processing a hole or prealloc extent for which we
+ * have found delalloc ranges (in the io tree), so what
+ * we have in the cache is the last delalloc range we
+ * found while the file extent item we found can be
+ * either for a whole delalloc range we previously
+ * emmitted or only a part of that range.
+ *
+ * We have two cases here:
+ *
+ * 1) The file extent item's range ends at or behind the
+ * cached extent's end. In this case just ignore the
+ * current file extent item because we don't want to
+ * overlap with previous ranges that may have been
+ * emmitted already;
+ *
+ * 2) The file extent item starts behind the currently
+ * cached extent but its end offset goes beyond the
+ * end offset of the cached extent. We don't want to
+ * overlap with a previous range that may have been
+ * emmitted already, so we emit the currently cached
+ * extent and then partially store the current file
+ * extent item's range in the cache, for the subrange
+ * going the cached extent's end to the end of the
+ * file extent item.
+ */
+ if (range_end <= cache_end)
+ return 0;
+
+ if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC)))
+ phys += cache_end - offset;
+
+ offset = cache_end;
+ len = range_end - cache_end;
+ goto emit;
+ }
}
/*
@@ -2517,12 +2682,37 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
return 0;
}
+emit:
/* Not mergeable, need to submit cached one */
- ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
- cache->len, cache->flags);
- cache->cached = false;
- if (ret)
- return ret;
+
+ if (cache->entries_pos == cache->entries_size) {
+ /*
+ * We will need to research for the end offset of the last
+ * stored extent and not from the current offset, because after
+ * unlocking the range and releasing the path, if there's a hole
+ * between that end offset and this current offset, a new extent
+ * may have been inserted due to a new write, so we don't want
+ * to miss it.
+ */
+ entry = &cache->entries[cache->entries_size - 1];
+ cache->next_search_offset = entry->offset + entry->len;
+ cache->cached = false;
+
+ return BTRFS_FIEMAP_FLUSH_CACHE;
+ }
+
+ entry = &cache->entries[cache->entries_pos];
+ entry->offset = cache->offset;
+ entry->phys = cache->phys;
+ entry->len = cache->len;
+ entry->flags = cache->flags;
+ cache->entries_pos++;
+ cache->extents_mapped++;
+
+ if (cache->extents_mapped == fieinfo->fi_extents_max) {
+ cache->cached = false;
+ return 1;
+ }
assign:
cache->cached = true;
cache->offset = offset;
@@ -2562,7 +2752,7 @@ static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *path)
{
- struct extent_buffer *clone;
+ struct extent_buffer *clone = path->nodes[0];
struct btrfs_key key;
int slot;
int ret;
@@ -2571,29 +2761,45 @@ static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *p
if (path->slots[0] < btrfs_header_nritems(path->nodes[0]))
return 0;
+ /*
+ * Add a temporary extra ref to an already cloned extent buffer to
+ * prevent btrfs_next_leaf() freeing it, we want to reuse it to avoid
+ * the cost of allocating a new one.
+ */
+ ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags));
+ atomic_inc(&clone->refs);
+
ret = btrfs_next_leaf(inode->root, path);
if (ret != 0)
- return ret;
+ goto out;
/*
* Don't bother with cloning if there are no more file extent items for
* our inode.
*/
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY)
- return 1;
+ if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) {
+ ret = 1;
+ goto out;
+ }
/* See the comment at fiemap_search_slot() about why we clone. */
- clone = btrfs_clone_extent_buffer(path->nodes[0]);
- if (!clone)
- return -ENOMEM;
+ copy_extent_buffer_full(clone, path->nodes[0]);
+ /*
+ * Important to preserve the start field, for the optimizations when
+ * checking if extents are shared (see extent_fiemap()).
+ */
+ clone->start = path->nodes[0]->start;
slot = path->slots[0];
btrfs_release_path(path);
path->nodes[0] = clone;
path->slots[0] = slot;
+out:
+ if (ret)
+ free_extent_buffer(clone);
- return 0;
+ return ret;
}
/*
@@ -2648,8 +2854,8 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path
* neighbour leaf).
* We also need the private clone because holding a read lock on an
* extent buffer of the subvolume's b+tree will make lockdep unhappy
- * when we call fiemap_fill_next_extent(), because that may cause a page
- * fault when filling the user space buffer with fiemap data.
+ * when we check if extents are shared, as backref walking may need to
+ * lock the same leaf we are processing.
*/
clone = btrfs_clone_extent_buffer(path->nodes[0]);
if (!clone)
@@ -2873,24 +3079,29 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
struct btrfs_backref_share_check_ctx *backref_ctx;
u64 last_extent_end;
u64 prev_extent_end;
- u64 lockstart;
- u64 lockend;
+ u64 range_start;
+ u64 range_end;
+ const u64 sectorsize = inode->root->fs_info->sectorsize;
bool stopped = false;
int ret;
+ cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry);
+ cache.entries = kmalloc_array(cache.entries_size,
+ sizeof(struct btrfs_fiemap_entry),
+ GFP_KERNEL);
backref_ctx = btrfs_alloc_backref_share_check_ctx();
path = btrfs_alloc_path();
- if (!backref_ctx || !path) {
+ if (!cache.entries || !backref_ctx || !path) {
ret = -ENOMEM;
goto out;
}
- lockstart = round_down(start, inode->root->fs_info->sectorsize);
- lockend = round_up(start + len, inode->root->fs_info->sectorsize);
- prev_extent_end = lockstart;
+restart:
+ range_start = round_down(start, sectorsize);
+ range_end = round_up(start + len, sectorsize);
+ prev_extent_end = range_start;
- btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ lock_extent(&inode->io_tree, range_start, range_end, &cached_state);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0)
@@ -2898,7 +3109,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_release_path(path);
path->reada = READA_FORWARD;
- ret = fiemap_search_slot(inode, path, lockstart);
+ ret = fiemap_search_slot(inode, path, range_start);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
@@ -2910,7 +3121,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto check_eof_delalloc;
}
- while (prev_extent_end < lockend) {
+ while (prev_extent_end < range_end) {
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_file_extent_item *ei;
struct btrfs_key key;
@@ -2933,19 +3144,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
* The first iteration can leave us at an extent item that ends
* before our range's start. Move to the next item.
*/
- if (extent_end <= lockstart)
+ if (extent_end <= range_start)
goto next_item;
backref_ctx->curr_leaf_bytenr = leaf->start;
/* We have in implicit hole (NO_HOLES feature enabled). */
if (prev_extent_end < key.offset) {
- const u64 range_end = min(key.offset, lockend) - 1;
+ const u64 hole_end = min(key.offset, range_end) - 1;
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state,
backref_ctx, 0, 0, 0,
- prev_extent_end, range_end);
+ prev_extent_end, hole_end);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
@@ -2955,7 +3166,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
/* We've reached the end of the fiemap range, stop. */
- if (key.offset >= lockend) {
+ if (key.offset >= range_end) {
stopped = true;
break;
}
@@ -3016,7 +3227,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
- /* fiemap_fill_next_extent() told us to stop. */
+ /* emit_fiemap_extent() told us to stop. */
stopped = true;
break;
}
@@ -3039,23 +3250,13 @@ next_item:
}
check_eof_delalloc:
- /*
- * Release (and free) the path before emitting any final entries to
- * fiemap_fill_next_extent() to keep lockdep happy. This is because
- * once we find no more file extent items exist, we may have a
- * non-cloned leaf, and fiemap_fill_next_extent() can trigger page
- * faults when copying data to the user space buffer.
- */
- btrfs_free_path(path);
- path = NULL;
-
- if (!stopped && prev_extent_end < lockend) {
+ if (!stopped && prev_extent_end < range_end) {
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, backref_ctx,
- 0, 0, 0, prev_extent_end, lockend - 1);
+ 0, 0, 0, prev_extent_end, range_end - 1);
if (ret < 0)
goto out_unlock;
- prev_extent_end = lockend;
+ prev_extent_end = range_end;
}
if (cache.cached && cache.offset + cache.len >= last_extent_end) {
@@ -3079,13 +3280,39 @@ check_eof_delalloc:
}
}
- ret = emit_last_fiemap_cache(fieinfo, &cache);
-
out_unlock:
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
- btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+ unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+
+ if (ret == BTRFS_FIEMAP_FLUSH_CACHE) {
+ btrfs_release_path(path);
+ ret = flush_fiemap_cache(fieinfo, &cache);
+ if (ret)
+ goto out;
+ len -= cache.next_search_offset - start;
+ start = cache.next_search_offset;
+ goto restart;
+ } else if (ret < 0) {
+ goto out;
+ }
+
+ /*
+ * Must free the path before emitting to the fiemap buffer because we
+ * may have a non-cloned leaf and if the fiemap buffer is memory mapped
+ * to a file, a write into it (through btrfs_page_mkwrite()) may trigger
+ * waiting for an ordered extent that in order to complete needs to
+ * modify that leaf, therefore leading to a deadlock.
+ */
+ btrfs_free_path(path);
+ path = NULL;
+
+ ret = flush_fiemap_cache(fieinfo, &cache);
+ if (ret)
+ goto out;
+
+ ret = emit_last_fiemap_cache(fieinfo, &cache);
out:
free_extent_state(delalloc_cached_state);
+ kfree(cache.entries);
btrfs_free_backref_share_ctx(backref_ctx);
btrfs_free_path(path);
return ret;
@@ -3534,7 +3761,7 @@ retry:
/* For now, we should only have single-page folios for btree inode. */
ASSERT(folio_nr_pages(existing_folio) == 1);
- if (folio_size(existing_folio) != folio_size(eb->folios[0])) {
+ if (folio_size(existing_folio) != eb->folio_size) {
folio_unlock(existing_folio);
folio_put(existing_folio);
return -EAGAIN;
@@ -3677,6 +3904,8 @@ reallocate:
* and free the allocated page.
*/
folio = eb->folios[i];
+ eb->folio_size = folio_size(folio);
+ eb->folio_shift = folio_shift(folio);
spin_lock(&mapping->i_private_lock);
/* Should not fail, as we have preallocated the memory */
ret = attach_extent_buffer_folio(eb, folio, prealloc);
@@ -4126,7 +4355,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
for (int i = 0; i < num_folios; i++) {
struct folio *folio = eb->folios[i];
- ret = bio_add_folio(&bbio->bio, folio, folio_size(folio), 0);
+ ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0);
ASSERT(ret);
}
}
@@ -4146,7 +4375,7 @@ static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
unsigned long len)
{
btrfs_warn(eb->fs_info,
- "access to eb bytenr %llu len %lu out of range start %lu len %lu",
+ "access to eb bytenr %llu len %u out of range start %lu len %lu",
eb->start, eb->len, start, len);
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
@@ -4175,7 +4404,7 @@ static inline int check_eb_range(const struct extent_buffer *eb,
void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
unsigned long start, unsigned long len)
{
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
size_t cur;
size_t offset;
char *dst = (char *)dstv;
@@ -4215,7 +4444,7 @@ int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
void __user *dstv,
unsigned long start, unsigned long len)
{
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
size_t cur;
size_t offset;
char __user *dst = (char __user *)dstv;
@@ -4255,7 +4484,7 @@ int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
unsigned long start, unsigned long len)
{
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
size_t cur;
size_t offset;
char *kaddr;
@@ -4326,7 +4555,7 @@ static void __write_extent_buffer(const struct extent_buffer *eb,
const void *srcv, unsigned long start,
unsigned long len, bool use_memmove)
{
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
size_t cur;
size_t offset;
char *kaddr;
@@ -4375,7 +4604,7 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
static void memset_extent_buffer(const struct extent_buffer *eb, int c,
unsigned long start, unsigned long len)
{
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
unsigned long cur = start;
if (eb->addr) {
@@ -4406,7 +4635,7 @@ void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
void copy_extent_buffer_full(const struct extent_buffer *dst,
const struct extent_buffer *src)
{
- const int unit_size = folio_size(src->folios[0]);
+ const int unit_size = src->folio_size;
unsigned long cur = 0;
ASSERT(dst->len == src->len);
@@ -4428,7 +4657,7 @@ void copy_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
- const int unit_size = folio_size(dst->folios[0]);
+ const int unit_size = dst->folio_size;
u64 dst_len = dst->len;
size_t cur;
size_t offset;
@@ -4484,10 +4713,10 @@ static inline void eb_bitmap_offset(const struct extent_buffer *eb,
* the bitmap item in the extent buffer + the offset of the byte in the
* bitmap item.
*/
- offset = start + offset_in_folio(eb->folios[0], eb->start) + byte_offset;
+ offset = start + offset_in_eb_folio(eb, eb->start) + byte_offset;
- *folio_index = offset >> folio_shift(eb->folios[0]);
- *folio_offset = offset_in_folio(eb->folios[0], offset);
+ *folio_index = offset >> eb->folio_shift;
+ *folio_offset = offset_in_eb_folio(eb, offset);
}
/*
@@ -4601,7 +4830,7 @@ void memcpy_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
- const int unit_size = folio_size(dst->folios[0]);
+ const int unit_size = dst->folio_size;
unsigned long cur_off = 0;
if (check_eb_range(dst, dst_offset, len) ||
@@ -4725,7 +4954,7 @@ out:
static int try_release_subpage_extent_buffer(struct page *page)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
u64 cur = page_offset(page);
const u64 end = page_offset(page) + PAGE_SIZE;
int ret;
@@ -4798,7 +5027,7 @@ int try_release_extent_buffer(struct page *page)
struct folio *folio = page_folio(page);
struct extent_buffer *eb;
- if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
+ if (page_to_fs_info(page)->nodesize < PAGE_SIZE)
return try_release_subpage_extent_buffer(page);
/*
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 46050500529b..e3530d427e1f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -7,11 +7,32 @@
#include <linux/refcount.h>
#include <linux/fiemap.h>
#include <linux/btrfs_tree.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/slab.h>
#include "compression.h"
+#include "messages.h"
#include "ulist.h"
#include "misc.h"
+struct page;
+struct file;
+struct folio;
+struct inode;
+struct fiemap_extent_info;
+struct readahead_control;
+struct address_space;
+struct writeback_control;
+struct extent_io_tree;
+struct extent_map_tree;
+struct btrfs_block_group;
+struct btrfs_fs_info;
+struct btrfs_inode;
+struct btrfs_root;
struct btrfs_trans_handle;
+struct btrfs_tree_parent_check;
enum {
EXTENT_BUFFER_UPTODATE,
@@ -63,11 +84,6 @@ enum {
#define BITMAP_LAST_BYTE_MASK(nbits) \
(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
-struct btrfs_root;
-struct btrfs_inode;
-struct btrfs_fs_info;
-struct extent_io_tree;
-struct btrfs_tree_parent_check;
int __init extent_buffer_init_cachep(void);
void __cold extent_buffer_free_cachep(void);
@@ -75,7 +91,8 @@ void __cold extent_buffer_free_cachep(void);
#define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE)
struct extent_buffer {
u64 start;
- unsigned long len;
+ u32 len;
+ u32 folio_size;
unsigned long bflags;
struct btrfs_fs_info *fs_info;
@@ -90,6 +107,7 @@ struct extent_buffer {
int read_mirror;
/* >= 0 if eb belongs to a log tree, -1 otherwise */
s8 log_index;
+ u8 folio_shift;
struct rcu_head rcu_head;
struct rw_semaphore lock;
@@ -113,6 +131,13 @@ struct btrfs_eb_write_context {
struct btrfs_block_group *zoned_bg;
};
+static inline unsigned long offset_in_eb_folio(const struct extent_buffer *eb,
+ u64 start)
+{
+ ASSERT(eb->folio_size);
+ return start & (eb->folio_size - 1);
+}
+
/*
* Get the correct offset inside the page of extent buffer.
*
@@ -151,13 +176,13 @@ static inline unsigned long get_eb_folio_index(const struct extent_buffer *eb,
* the folio_shift would be large enough to always make us
* return 0 as index.
* 1.2) Several page sized folios
- * The folio_shift() would be PAGE_SHIFT, giving us the correct
+ * The folio_shift would be PAGE_SHIFT, giving us the correct
* index.
*
* 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case
* The folio would only be page sized, and always give us 0 as index.
*/
- return offset >> folio_shift(eb->folios[0]);
+ return offset >> eb->folio_shift;
}
/*
@@ -205,8 +230,6 @@ static inline void extent_changeset_free(struct extent_changeset *changeset)
kfree(changeset);
}
-struct extent_map_tree;
-
int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
@@ -221,6 +244,7 @@ int btree_write_cache_pages(struct address_space *mapping,
void extent_readahead(struct readahead_control *rac);
int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
+int set_folio_extent_mapped(struct folio *folio);
int set_page_extent_mapped(struct page *page);
void clear_page_extent_mapped(struct page *page);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b61099bf97a8..347ca13d15a9 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -5,7 +5,6 @@
#include <linux/spinlock.h>
#include "messages.h"
#include "ctree.h"
-#include "volumes.h"
#include "extent_map.h"
#include "compression.h"
#include "btrfs_inode.h"
@@ -16,8 +15,7 @@ static struct kmem_cache *extent_map_cache;
int __init extent_map_init(void)
{
extent_map_cache = kmem_cache_create("btrfs_extent_map",
- sizeof(struct extent_map), 0,
- SLAB_MEM_SPREAD, NULL);
+ sizeof(struct extent_map), 0, 0, NULL);
if (!extent_map_cache)
return -ENOMEM;
return 0;
@@ -291,6 +289,10 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
* Called after an extent has been written to disk properly. Set the generation
* to the generation that actually added the file item to the inode so we know
* we need to sync this extent when we call fsync().
+ *
+ * Returns: 0 on success
+ * -ENOENT when the extent is not found in the tree
+ * -EUCLEAN if the found extent does not match the expected start
*/
int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
{
@@ -308,14 +310,18 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
"no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root),
start, len, gen);
+ ret = -ENOENT;
goto out;
}
- if (WARN_ON(em->start != start))
+ if (WARN_ON(em->start != start)) {
btrfs_warn(fs_info,
"found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root),
em->start, start, len, gen);
+ ret = -EUCLEAN;
+ goto out;
+ }
em->generation = gen;
em->flags &= ~EXTENT_FLAG_PINNED;
@@ -531,7 +537,8 @@ static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
u64 end;
u64 start_diff;
- BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
+ if (map_start < em->start || map_start >= extent_map_end(em))
+ return -EINVAL;
if (existing->start > map_start) {
next = existing;
@@ -626,9 +633,9 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
free_extent_map(em);
*em_in = NULL;
WARN_ONCE(ret,
-"unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
- ret, existing->start, existing->len,
- orig_start, orig_len);
+"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu\n",
+ existing->start, existing->len,
+ orig_start, orig_len, start);
}
free_extent_map(existing);
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index e380fc08bbe4..c5a098c99cc6 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -3,10 +3,18 @@
#ifndef BTRFS_EXTENT_MAP_H
#define BTRFS_EXTENT_MAP_H
+#include <linux/compiler_types.h>
+#include <linux/rwlock_types.h>
#include <linux/rbtree.h>
+#include <linux/list.h>
#include <linux/refcount.h>
+#include "misc.h"
+#include "extent_map.h"
#include "compression.h"
+struct btrfs_inode;
+struct btrfs_fs_info;
+
#define EXTENT_MAP_LAST_BYTE ((u64)-4)
#define EXTENT_MAP_HOLE ((u64)-3)
#define EXTENT_MAP_INLINE ((u64)-2)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 81ac1d474bf1..e58fb5347e65 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -10,17 +10,14 @@
#include <linux/sched/mm.h>
#include <crypto/hash.h>
#include "messages.h"
-#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "bio.h"
-#include "print-tree.h"
#include "compression.h"
#include "fs.h"
#include "accessors.h"
#include "file-item.h"
-#include "super.h"
#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) * 2) / \
@@ -179,7 +176,6 @@ int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
sizeof(*item));
if (ret < 0)
goto out;
- BUG_ON(ret); /* Can't happen */
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -1229,8 +1225,6 @@ insert:
ins_size);
if (ret < 0)
goto out;
- if (WARN_ON(ret != 0))
- goto out;
leaf = path->nodes[0];
csum:
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h
index 04bd2d34efb1..15c05cc0fce6 100644
--- a/fs/btrfs/file-item.h
+++ b/fs/btrfs/file-item.h
@@ -3,8 +3,21 @@
#ifndef BTRFS_FILE_ITEM_H
#define BTRFS_FILE_ITEM_H
+#include <linux/list.h>
+#include <uapi/linux/btrfs_tree.h>
#include "accessors.h"
+struct extent_map;
+struct btrfs_file_extent_item;
+struct btrfs_fs_info;
+struct btrfs_path;
+struct btrfs_bio;
+struct btrfs_trans_handle;
+struct btrfs_root;
+struct btrfs_ordered_sum;
+struct btrfs_path;
+struct btrfs_inode;
+
#define BTRFS_FILE_EXTENT_INLINE_DATA_START \
(offsetof(struct btrfs_file_extent_item, disk_bytenr))
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 38dfcac47609..f9d76072398d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -22,10 +22,8 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "tree-log.h"
#include "locking.h"
-#include "volumes.h"
#include "qgroup.h"
#include "compression.h"
#include "delalloc-space.h"
@@ -1137,7 +1135,7 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
loff_t pos = iocb->ki_pos;
int ret;
loff_t oldsize;
@@ -1185,7 +1183,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
loff_t pos;
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct page **pages = NULL;
struct extent_changeset *data_reserved = NULL;
u64 release_bytes = 0;
@@ -1461,7 +1459,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
loff_t pos;
ssize_t written = 0;
ssize_t written_buffered;
@@ -1787,7 +1785,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
@@ -1912,6 +1910,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out_release_extents;
}
+ btrfs_init_log_ctx_scratch_eb(&ctx);
+
/*
* We use start here because we will need to wait on the IO to complete
* in btrfs_sync_log, which could require joining a transaction (for
@@ -1931,6 +1931,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trans->in_fsync = true;
ret = btrfs_log_dentry_safe(trans, dentry, &ctx);
+ /*
+ * Scratch eb no longer needed, release before syncing log or commit
+ * transaction, to avoid holding unnecessary memory during such long
+ * operations.
+ */
+ if (ctx.scratch_eb) {
+ free_extent_buffer(ctx.scratch_eb);
+ ctx.scratch_eb = NULL;
+ }
btrfs_release_log_ctx_extents(&ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
@@ -2006,6 +2015,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = btrfs_commit_transaction(trans);
out:
+ free_extent_buffer(ctx.scratch_eb);
ASSERT(list_empty(&ctx.list));
ASSERT(list_empty(&ctx.conflict_inodes));
err = file_check_and_advance_wb_err(file);
@@ -2176,7 +2186,7 @@ static int find_first_non_hole(struct btrfs_inode *inode, u64 *start, u64 *len)
struct extent_map *em;
int ret = 0;
- em = btrfs_get_extent(inode, NULL, 0,
+ em = btrfs_get_extent(inode, NULL,
round_down(*start, fs_info->sectorsize),
round_up(*len, fs_info->sectorsize));
if (IS_ERR(em))
@@ -2593,7 +2603,7 @@ out:
static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
@@ -2835,7 +2845,7 @@ static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode,
int ret;
offset = round_down(offset, sectorsize);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(inode, NULL, offset, sectorsize);
if (IS_ERR(em))
return PTR_ERR(em);
@@ -2866,7 +2876,7 @@ static int btrfs_zero_range(struct inode *inode,
u64 bytes_to_reserve = 0;
bool space_reserved = false;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, alloc_start,
alloc_end - alloc_start);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@@ -2909,8 +2919,7 @@ static int btrfs_zero_range(struct inode *inode,
if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
- sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, alloc_start, sectorsize);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
@@ -3005,7 +3014,7 @@ reserve_space:
}
ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
alloc_end - alloc_start,
- i_blocksize(inode),
+ fs_info->sectorsize,
offset + len, &alloc_hint);
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
@@ -3049,7 +3058,7 @@ static long btrfs_fallocate(struct file *file, int mode,
int ret;
/* Do not allow fallocate in ZONED mode */
- if (btrfs_is_zoned(btrfs_sb(inode->i_sb)))
+ if (btrfs_is_zoned(inode_to_fs_info(inode)))
return -EOPNOTSUPP;
alloc_start = round_down(offset, blocksize);
@@ -3126,7 +3135,7 @@ static long btrfs_fallocate(struct file *file, int mode,
/* First, check if we exceed the qgroup limit */
while (cur_offset < alloc_end) {
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, cur_offset,
alloc_end - cur_offset);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@@ -3177,7 +3186,7 @@ static long btrfs_fallocate(struct file *file, int mode,
if (!ret) {
ret = btrfs_prealloc_file_range(inode, mode,
range->start,
- range->len, i_blocksize(inode),
+ range->len, blocksize,
offset + len, &alloc_hint);
/*
* btrfs_prealloc_file_range() releases space even
@@ -3754,7 +3763,7 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
if (fsverity_active(inode))
return 0;
- if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
+ if (check_direct_read(inode_to_fs_info(inode), to, iocb->ki_pos))
return 0;
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
diff --git a/fs/btrfs/file.h b/fs/btrfs/file.h
index 82b34fbb295f..77aaca208c7b 100644
--- a/fs/btrfs/file.h
+++ b/fs/btrfs/file.h
@@ -3,6 +3,21 @@
#ifndef BTRFS_FILE_H
#define BTRFS_FILE_H
+#include <linux/types.h>
+
+struct file;
+struct extent_state;
+struct kiocb;
+struct iov_iter;
+struct page;
+struct btrfs_ioctl_encoded_io_args;
+struct btrfs_drop_extents_args;
+struct btrfs_inode;
+struct btrfs_root;
+struct btrfs_path;
+struct btrfs_replace_extent_info;
+struct btrfs_trans_handle;
+
extern const struct file_operations btrfs_file_operations;
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d372c7ce0e6b..c8a05d5eb9cb 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -19,9 +19,7 @@
#include "transaction.h"
#include "disk-io.h"
#include "extent_io.h"
-#include "volumes.h"
#include "space-info.h"
-#include "delalloc-space.h"
#include "block-group.h"
#include "discard.h"
#include "subpage.h"
@@ -399,7 +397,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
return -ENOMEM;
io_ctl->num_pages = num_pages;
- io_ctl->fs_info = btrfs_sb(inode->i_sb);
+ io_ctl->fs_info = inode_to_fs_info(inode);
io_ctl->inode = inode;
return 0;
@@ -2621,7 +2619,7 @@ static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
}
}
-int __btrfs_add_free_space(struct btrfs_block_group *block_group,
+static int __btrfs_add_free_space(struct btrfs_block_group *block_group,
u64 offset, u64 bytes,
enum btrfs_trim_state trim_state)
{
@@ -4156,15 +4154,13 @@ out:
int __init btrfs_free_space_init(void)
{
- btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
- sizeof(struct btrfs_free_space), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_free_space_cachep = KMEM_CACHE(btrfs_free_space, 0);
if (!btrfs_free_space_cachep)
return -ENOMEM;
btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
PAGE_SIZE, PAGE_SIZE,
- SLAB_MEM_SPREAD, NULL);
+ 0, NULL);
if (!btrfs_free_space_bitmap_cachep) {
kmem_cache_destroy(btrfs_free_space_cachep);
return -ENOMEM;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 33b4da3271b1..83774bfd7b3b 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -6,6 +6,19 @@
#ifndef BTRFS_FREE_SPACE_CACHE_H
#define BTRFS_FREE_SPACE_CACHE_H
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include "fs.h"
+
+struct inode;
+struct page;
+struct btrfs_fs_info;
+struct btrfs_path;
+struct btrfs_trans_handle;
+struct btrfs_trim_block_group;
+
/*
* This is the trim state of an extent or bitmap.
*
@@ -114,8 +127,6 @@ int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
struct btrfs_free_space_ctl *ctl);
-int __btrfs_add_free_space(struct btrfs_block_group *block_group, u64 bytenr,
- u64 size, enum btrfs_trim_state trim_state);
int btrfs_add_free_space(struct btrfs_block_group *block_group,
u64 bytenr, u64 size);
int btrfs_add_free_space_unused(struct btrfs_block_group *block_group,
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 7b598b070700..90f2938bd743 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1176,12 +1176,16 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
BTRFS_FREE_SPACE_TREE_OBJECTID);
if (IS_ERR(free_space_root)) {
ret = PTR_ERR(free_space_root);
- goto abort;
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ goto out_clear;
}
ret = btrfs_global_root_insert(free_space_root);
if (ret) {
btrfs_put_root(free_space_root);
- goto abort;
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ goto out_clear;
}
node = rb_first_cached(&fs_info->block_group_cache_tree);
@@ -1189,8 +1193,11 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
ret = populate_free_space_tree(trans, block_group);
- if (ret)
- goto abort;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ goto out_clear;
+ }
node = rb_next(node);
}
@@ -1206,11 +1213,9 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
return ret;
-abort:
+out_clear:
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
return ret;
}
@@ -1273,12 +1278,18 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
ret = clear_free_space_tree(trans, free_space_root);
- if (ret)
- goto abort;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
ret = btrfs_del_root(trans, &free_space_root->root_key);
- if (ret)
- goto abort;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
btrfs_global_root_delete(free_space_root);
@@ -1295,11 +1306,6 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_put_root(free_space_root);
return btrfs_commit_transaction(trans);
-
-abort:
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- return ret;
}
int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
@@ -1322,8 +1328,11 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
ret = clear_free_space_tree(trans, free_space_root);
- if (ret)
- goto abort;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
node = rb_first_cached(&fs_info->block_group_cache_tree);
while (node) {
@@ -1332,8 +1341,11 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
ret = populate_free_space_tree(trans, block_group);
- if (ret)
- goto abort;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
node = rb_next(node);
}
@@ -1344,10 +1356,6 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
ret = btrfs_commit_transaction(trans);
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
return ret;
-abort:
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- return ret;
}
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index 6d5551d0ced8..e6c6d6f4f221 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -6,7 +6,13 @@
#ifndef BTRFS_FREE_SPACE_TREE_H
#define BTRFS_FREE_SPACE_TREE_H
+#include <linux/bits.h>
+
struct btrfs_caching_control;
+struct btrfs_fs_info;
+struct btrfs_path;
+struct btrfs_block_group;
+struct btrfs_trans_handle;
/*
* The default size for new free space bitmap items. The last bitmap in a block
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index f8bb73d6ab68..93f5c57ea4e3 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -4,13 +4,50 @@
#define BTRFS_FS_H
#include <linux/blkdev.h>
-#include <linux/fs.h>
-#include <linux/btrfs_tree.h>
#include <linux/sizes.h>
+#include <linux/time64.h>
+#include <linux/compiler.h>
+#include <linux/math.h>
+#include <linux/atomic.h>
+#include <linux/blkdev.h>
+#include <linux/percpu_counter.h>
+#include <linux/completion.h>
+#include <linux/lockdep.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/rwlock_types.h>
+#include <linux/rwsem.h>
+#include <linux/semaphore.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/wait_bit.h>
+#include <linux/sched.h>
+#include <linux/rbtree.h>
+#include <uapi/linux/btrfs.h>
+#include <uapi/linux/btrfs_tree.h>
#include "extent-io-tree.h"
-#include "extent_map.h"
#include "async-thread.h"
#include "block-rsv.h"
+#include "fs.h"
+
+struct inode;
+struct super_block;
+struct kobject;
+struct reloc_control;
+struct crypto_shash;
+struct ulist;
+struct btrfs_device;
+struct btrfs_block_group;
+struct btrfs_root;
+struct btrfs_fs_devices;
+struct btrfs_transaction;
+struct btrfs_delayed_root;
+struct btrfs_balance_control;
+struct btrfs_subpage_info;
+struct btrfs_stripe_hash_table;
+struct btrfs_space_info;
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
@@ -732,10 +769,13 @@ struct btrfs_fs_info {
/* Reclaim partially filled block groups in the background */
struct work_struct reclaim_bgs_work;
+ /* Protected by unused_bgs_lock. */
struct list_head reclaim_bgs;
int bg_reclaim_threshold;
+ /* Protects the lists unused_bgs and reclaim_bgs. */
spinlock_t unused_bgs_lock;
+ /* Protected by unused_bgs_lock. */
struct list_head unused_bgs;
struct mutex unused_bg_unpin_mutex;
/* Protect block groups that are going to be deleted */
@@ -829,6 +869,17 @@ struct btrfs_fs_info {
#endif
};
+#define page_to_inode(_page) (BTRFS_I(_Generic((_page), \
+ struct page *: (_page))->mapping->host))
+#define folio_to_inode(_folio) (BTRFS_I(_Generic((_folio), \
+ struct folio *: (_folio))->mapping->host))
+
+#define page_to_fs_info(_page) (page_to_inode(_page)->root->fs_info)
+#define folio_to_fs_info(_folio) (folio_to_inode(_folio)->root->fs_info)
+
+#define inode_to_fs_info(_inode) (BTRFS_I(_Generic((_inode), \
+ struct inode *: (_inode)))->root->fs_info)
+
static inline u64 btrfs_get_fs_generation(const struct btrfs_fs_info *fs_info)
{
return READ_ONCE(fs_info->generation);
@@ -922,6 +973,8 @@ void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation op);
+int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args);
+
/* Compatibility and incompatibility defines */
void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
const char *name);
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 7d734830e514..9c1394c0a6d7 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -9,7 +9,6 @@
#include "inode-item.h"
#include "disk-io.h"
#include "transaction.h"
-#include "print-tree.h"
#include "space-info.h"
#include "accessors.h"
#include "extent-tree.h"
diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h
index 4337bb26f419..c4aded82709b 100644
--- a/fs/btrfs/inode-item.h
+++ b/fs/btrfs/inode-item.h
@@ -6,14 +6,15 @@
#include <linux/types.h>
#include <linux/crc32c.h>
+struct fscrypt_str;
+struct extent_buffer;
struct btrfs_trans_handle;
struct btrfs_root;
struct btrfs_path;
struct btrfs_key;
struct btrfs_inode_extref;
struct btrfs_inode;
-struct extent_buffer;
-struct fscrypt_str;
+struct btrfs_truncate_control;
/*
* Return this if we need to call truncate_block for the last bit of the
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 809b11472a80..37701531eeb1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,14 +39,12 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "ordered-data.h"
#include "xattr.h"
#include "tree-log.h"
#include "bio.h"
#include "compression.h"
#include "locking.h"
-#include "free-space-cache.h"
#include "props.h"
#include "qgroup.h"
#include "delalloc-space.h"
@@ -740,7 +738,8 @@ static noinline int add_async_extent(struct async_chunk *cow,
struct async_extent *async_extent;
async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
- BUG_ON(!async_extent); /* -ENOMEM */
+ if (!async_extent)
+ return -ENOMEM;
async_extent->start = start;
async_extent->ram_size = ram_size;
async_extent->compressed_size = compressed_size;
@@ -1027,8 +1026,9 @@ again:
* The async work queues will take care of doing actual allocation on
* disk for these compressed pages, and will submit the bios.
*/
- add_async_extent(async_chunk, start, total_in, total_compressed, pages,
- nr_pages, compress_type);
+ ret = add_async_extent(async_chunk, start, total_in, total_compressed, pages,
+ nr_pages, compress_type);
+ BUG_ON(ret);
if (start + total_in < end) {
start += total_in;
cond_resched();
@@ -1040,8 +1040,9 @@ mark_incompressible:
if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) && !inode->prop_compress)
inode->flags |= BTRFS_INODE_NOCOMPRESS;
cleanup_and_bail_uncompressed:
- add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
- BTRFS_COMPRESS_NONE);
+ ret = add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
+ BTRFS_COMPRESS_NONE);
+ BUG_ON(ret);
free_pages:
if (pages) {
for (i = 0; i < nr_pages; i++) {
@@ -2302,6 +2303,8 @@ void btrfs_split_delalloc_extent(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 size;
+ lockdep_assert_held(&inode->io_tree.lock);
+
/* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
return;
@@ -2340,6 +2343,8 @@ void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state
u64 new_size, old_size;
u32 num_extents;
+ lockdep_assert_held(&inode->io_tree.lock);
+
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
return;
@@ -2387,55 +2392,50 @@ void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state
spin_unlock(&inode->lock);
}
-static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
- struct btrfs_inode *inode)
+static void btrfs_add_delalloc_inode(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
spin_lock(&root->delalloc_lock);
- if (list_empty(&inode->delalloc_inodes)) {
- list_add_tail(&inode->delalloc_inodes, &root->delalloc_inodes);
- set_bit(BTRFS_INODE_IN_DELALLOC_LIST, &inode->runtime_flags);
- root->nr_delalloc_inodes++;
- if (root->nr_delalloc_inodes == 1) {
- spin_lock(&fs_info->delalloc_root_lock);
- BUG_ON(!list_empty(&root->delalloc_root));
- list_add_tail(&root->delalloc_root,
- &fs_info->delalloc_roots);
- spin_unlock(&fs_info->delalloc_root_lock);
- }
+ ASSERT(list_empty(&inode->delalloc_inodes));
+ list_add_tail(&inode->delalloc_inodes, &root->delalloc_inodes);
+ root->nr_delalloc_inodes++;
+ if (root->nr_delalloc_inodes == 1) {
+ spin_lock(&fs_info->delalloc_root_lock);
+ ASSERT(list_empty(&root->delalloc_root));
+ list_add_tail(&root->delalloc_root, &fs_info->delalloc_roots);
+ spin_unlock(&fs_info->delalloc_root_lock);
}
spin_unlock(&root->delalloc_lock);
}
-void __btrfs_del_delalloc_inode(struct btrfs_root *root,
- struct btrfs_inode *inode)
+void btrfs_del_delalloc_inode(struct btrfs_inode *inode)
{
+ struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
+ lockdep_assert_held(&root->delalloc_lock);
+
+ /*
+ * We may be called after the inode was already deleted from the list,
+ * namely in the transaction abort path btrfs_destroy_delalloc_inodes(),
+ * and then later through btrfs_clear_delalloc_extent() while the inode
+ * still has ->delalloc_bytes > 0.
+ */
if (!list_empty(&inode->delalloc_inodes)) {
list_del_init(&inode->delalloc_inodes);
- clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &inode->runtime_flags);
root->nr_delalloc_inodes--;
if (!root->nr_delalloc_inodes) {
ASSERT(list_empty(&root->delalloc_inodes));
spin_lock(&fs_info->delalloc_root_lock);
- BUG_ON(list_empty(&root->delalloc_root));
+ ASSERT(!list_empty(&root->delalloc_root));
list_del_init(&root->delalloc_root);
spin_unlock(&fs_info->delalloc_root_lock);
}
}
}
-static void btrfs_del_delalloc_inode(struct btrfs_root *root,
- struct btrfs_inode *inode)
-{
- spin_lock(&root->delalloc_lock);
- __btrfs_del_delalloc_inode(root, inode);
- spin_unlock(&root->delalloc_lock);
-}
-
/*
* Properly track delayed allocation bytes in the inode and to maintain the
* list of inodes that have pending delalloc work to be done.
@@ -2445,6 +2445,8 @@ void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *s
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ lockdep_assert_held(&inode->io_tree.lock);
+
if ((bits & EXTENT_DEFRAG) && !(bits & EXTENT_DELALLOC))
WARN_ON(1);
/*
@@ -2453,10 +2455,9 @@ void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *s
* bit, which is only set or cleared with irqs on
*/
if (!(state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
- struct btrfs_root *root = inode->root;
u64 len = state->end + 1 - state->start;
+ u64 prev_delalloc_bytes;
u32 num_extents = count_max_extents(fs_info, len);
- bool do_list = !btrfs_is_free_space_inode(inode);
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, num_extents);
@@ -2469,13 +2470,20 @@ void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *s
percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
fs_info->delalloc_batch);
spin_lock(&inode->lock);
+ prev_delalloc_bytes = inode->delalloc_bytes;
inode->delalloc_bytes += len;
if (bits & EXTENT_DEFRAG)
inode->defrag_bytes += len;
- if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &inode->runtime_flags))
- btrfs_add_delalloc_inodes(root, inode);
spin_unlock(&inode->lock);
+
+ /*
+ * We don't need to be under the protection of the inode's lock,
+ * because we are called while holding the inode's io_tree lock
+ * and are therefore protected against concurrent calls of this
+ * function and btrfs_clear_delalloc_extent().
+ */
+ if (!btrfs_is_free_space_inode(inode) && prev_delalloc_bytes == 0)
+ btrfs_add_delalloc_inode(inode);
}
if (!(state->state & EXTENT_DELALLOC_NEW) &&
@@ -2497,6 +2505,8 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
u64 len = state->end + 1 - state->start;
u32 num_extents = count_max_extents(fs_info, len);
+ lockdep_assert_held(&inode->io_tree.lock);
+
if ((state->state & EXTENT_DEFRAG) && (bits & EXTENT_DEFRAG)) {
spin_lock(&inode->lock);
inode->defrag_bytes -= len;
@@ -2510,7 +2520,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
*/
if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = inode->root;
- bool do_list = !btrfs_is_free_space_inode(inode);
+ u64 new_delalloc_bytes;
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, -num_extents);
@@ -2530,7 +2540,8 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
return;
if (!btrfs_is_data_reloc_root(root) &&
- do_list && !(state->state & EXTENT_NORESERVE) &&
+ !btrfs_is_free_space_inode(inode) &&
+ !(state->state & EXTENT_NORESERVE) &&
(bits & EXTENT_CLEAR_DATA_RESV))
btrfs_free_reserved_data_space_noquota(fs_info, len);
@@ -2538,11 +2549,20 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
fs_info->delalloc_batch);
spin_lock(&inode->lock);
inode->delalloc_bytes -= len;
- if (do_list && inode->delalloc_bytes == 0 &&
- test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &inode->runtime_flags))
- btrfs_del_delalloc_inode(root, inode);
+ new_delalloc_bytes = inode->delalloc_bytes;
spin_unlock(&inode->lock);
+
+ /*
+ * We don't need to be under the protection of the inode's lock,
+ * because we are called while holding the inode's io_tree lock
+ * and are therefore protected against concurrent calls of this
+ * function and btrfs_set_delalloc_extent().
+ */
+ if (!btrfs_is_free_space_inode(inode) && new_delalloc_bytes == 0) {
+ spin_lock(&root->delalloc_lock);
+ btrfs_del_delalloc_inode(inode);
+ spin_unlock(&root->delalloc_lock);
+ }
}
if ((state->state & EXTENT_DELALLOC_NEW) &&
@@ -2632,7 +2652,7 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
u64 em_len;
int ret = 0;
- em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
+ em = btrfs_get_extent(inode, NULL, search_start, search_len);
if (IS_ERR(em))
return PTR_ERR(em);
@@ -2829,7 +2849,7 @@ out_page:
int btrfs_writepage_cow_fixup(struct page *page)
{
struct inode *inode = page->mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_writepage_fixup *fixup;
/* This page has ordered extent covering it already */
@@ -3127,8 +3147,13 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->disk_num_bytes);
}
}
- unpin_extent_cache(inode, ordered_extent->file_offset,
- ordered_extent->num_bytes, trans->transid);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ ret = unpin_extent_cache(inode, ordered_extent->file_offset,
+ ordered_extent->num_bytes, trans->transid);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3184,8 +3209,23 @@ out:
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);
/*
* If the ordered extent had an IOERR or something else went
@@ -3239,7 +3279,7 @@ out:
int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
{
- if (btrfs_is_zoned(btrfs_sb(ordered->inode->i_sb)) &&
+ if (btrfs_is_zoned(inode_to_fs_info(ordered->inode)) &&
!test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) &&
list_empty(&ordered->bioc_list))
btrfs_finish_ordered_zoned(ordered);
@@ -3724,7 +3764,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
static int btrfs_read_locked_inode(struct inode *inode,
struct btrfs_path *in_path)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_path *path = in_path;
struct extent_buffer *leaf;
struct btrfs_inode_item *inode_item;
@@ -4368,7 +4408,14 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0)
goto out;
- BUG_ON(ret == 0);
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist a root
+ * with such id, but this is out of valid range.
+ */
+ ret = -EUCLEAN;
+ goto out;
+ }
ret = 0;
if (path->slots[0] > 0) {
@@ -4449,8 +4496,8 @@ again:
int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
struct btrfs_root *root = dir->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct inode *inode = d_inode(dentry);
struct btrfs_root *dest = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
@@ -4458,6 +4505,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
u64 root_flags;
int ret;
+ down_write(&fs_info->subvol_sem);
+
/*
* Don't allow to delete a subvolume with send in progress. This is
* inside the inode lock so the error handling that has to drop the bit
@@ -4469,25 +4518,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
btrfs_warn(fs_info,
"attempt to delete subvolume %llu during send",
dest->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
if (atomic_read(&dest->nr_swapfiles)) {
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu with active swapfile",
root->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- down_write(&fs_info->subvol_sem);
-
ret = may_destroy_subvol(dest);
if (ret)
- goto out_up_write;
+ goto out_undead;
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
@@ -4497,7 +4546,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
*/
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
- goto out_up_write;
+ goto out_undead;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
@@ -4563,15 +4612,17 @@ out_end_trans:
inode->i_flags |= S_DEAD;
out_release:
btrfs_subvolume_release_metadata(root, &block_rsv);
-out_up_write:
- up_write(&fs_info->subvol_sem);
+out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- } else {
+ }
+out_up_write:
+ up_write(&fs_info->subvol_sem);
+ if (!ret) {
d_invalidate(dentry);
btrfs_prune_dentries(dest);
ASSERT(dest->send_in_progress == 0);
@@ -4676,7 +4727,7 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
u32 blocksize = fs_info->sectorsize;
pgoff_t index = from >> PAGE_SHIFT;
unsigned offset = from & (blocksize - 1);
- struct page *page;
+ struct folio *folio;
gfp_t mask = btrfs_alloc_write_mask(mapping);
size_t write_bytes = blocksize;
int ret = 0;
@@ -4708,8 +4759,9 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
goto out;
}
again:
- page = find_or_create_page(mapping, index, mask);
- if (!page) {
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask);
+ if (IS_ERR(folio)) {
btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize, true);
btrfs_delalloc_release_extents(inode, blocksize);
@@ -4717,15 +4769,15 @@ again:
goto out;
}
- if (!PageUptodate(page)) {
- ret = btrfs_read_folio(NULL, page_folio(page));
- lock_page(page);
- if (page->mapping != mapping) {
- unlock_page(page);
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ ret = btrfs_read_folio(NULL, folio);
+ folio_lock(folio);
+ if (folio->mapping != mapping) {
+ folio_unlock(folio);
+ folio_put(folio);
goto again;
}
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto out_unlock;
}
@@ -4737,19 +4789,19 @@ again:
* folio private, but left the page in the mapping. Set the page mapped
* here to make sure it's properly set for the subpage stuff.
*/
- ret = set_page_extent_mapped(page);
+ ret = set_folio_extent_mapped(folio);
if (ret < 0)
goto out_unlock;
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
lock_extent(io_tree, block_start, block_end, &cached_state);
ordered = btrfs_lookup_ordered_extent(inode, block_start);
if (ordered) {
unlock_extent(io_tree, block_start, block_end, &cached_state);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
@@ -4770,15 +4822,16 @@ again:
if (!len)
len = blocksize - offset;
if (front)
- memzero_page(page, (block_start - page_offset(page)),
- offset);
+ folio_zero_range(folio, block_start - folio_pos(folio),
+ offset);
else
- memzero_page(page, (block_start - page_offset(page)) + offset,
- len);
+ folio_zero_range(folio,
+ (block_start - folio_pos(folio)) + offset,
+ len);
}
- btrfs_folio_clear_checked(fs_info, page_folio(page), block_start,
+ btrfs_folio_clear_checked(fs_info, folio, block_start,
block_end + 1 - block_start);
- btrfs_folio_set_dirty(fs_info, page_folio(page), block_start,
+ btrfs_folio_set_dirty(fs_info, folio, block_start,
block_end + 1 - block_start);
unlock_extent(io_tree, block_start, block_end, &cached_state);
@@ -4795,8 +4848,8 @@ out_unlock:
block_start, blocksize, true);
}
btrfs_delalloc_release_extents(inode, blocksize);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
out:
if (only_release_metadata)
btrfs_check_nocow_unlock(inode);
@@ -4888,8 +4941,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
&cached_state);
cur_offset = hole_start;
while (1) {
- em = btrfs_get_extent(inode, NULL, 0, cur_offset,
- block_end - cur_offset);
+ em = btrfs_get_extent(inode, NULL, cur_offset, block_end - cur_offset);
if (IS_ERR(em)) {
err = PTR_ERR(em);
em = NULL;
@@ -5000,7 +5052,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_end_transaction(trans);
} else {
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
if (btrfs_is_zoned(fs_info)) {
ret = btrfs_wait_ordered_range(inode,
@@ -5203,7 +5255,7 @@ static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
void btrfs_evict_inode(struct inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info;
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv = NULL;
@@ -5217,6 +5269,7 @@ void btrfs_evict_inode(struct inode *inode)
return;
}
+ fs_info = inode_to_fs_info(inode);
evict_inode_truncate_pages(inode);
if (inode->i_nlink &&
@@ -5514,7 +5567,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.offset = 0;
BTRFS_I(inode)->root = btrfs_grab_root(args->root);
- BUG_ON(args->root && !BTRFS_I(inode)->root);
if (args->root && args->root == args->root->fs_info->tree_root &&
args->ino != BTRFS_BTREE_INODE_OBJECTID)
@@ -5642,7 +5694,7 @@ static inline u8 btrfs_inode_type(struct inode *inode)
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct inode *inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
@@ -6181,7 +6233,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
struct inode *dir = args->dir;
struct inode *inode = args->inode;
const struct fscrypt_str *name = args->orphan ? NULL : &args->fname.disk_name;
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_root *root;
struct btrfs_inode_item *inode_item;
struct btrfs_key *location;
@@ -6503,7 +6555,7 @@ fail_dir_item:
static int btrfs_create_common(struct inode *dir, struct dentry *dentry,
struct inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_new_inode_args new_inode_args = {
.dir = dir,
@@ -6573,7 +6625,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
struct btrfs_trans_handle *trans = NULL;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = d_inode(old_dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct fscrypt_name fname;
u64 index;
int err;
@@ -6737,7 +6789,6 @@ static int read_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path
*
* @inode: file to search in
* @page: page to read extent data into if the extent is inline
- * @pg_offset: offset into @page to copy to
* @start: file offset
* @len: length of range starting at @start
*
@@ -6751,8 +6802,7 @@ static int read_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path
* Return: ERR_PTR on error, non-NULL extent_map on success.
*/
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
- struct page *page, size_t pg_offset,
- u64 start, u64 len)
+ struct page *page, u64 start, u64 len)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret = 0;
@@ -6895,7 +6945,6 @@ next:
* ensured by tree-checker and inline extent creation path.
* Thus all members representing file offsets should be zero.
*/
- ASSERT(pg_offset == 0);
ASSERT(extent_start == 0);
ASSERT(em->start == 0);
@@ -7059,7 +7108,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes, bool nowait, bool strict)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct can_nocow_file_extent_args nocow_args = { 0 };
struct btrfs_path *path;
int ret;
@@ -7298,7 +7347,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
unsigned int iomap_flags)
{
const bool nowait = (iomap_flags & IOMAP_NOWAIT);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_map *em = *map;
int type;
u64 block_start, orig_start, orig_block_len, ram_bytes;
@@ -7438,7 +7487,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
struct iomap *srcmap)
{
struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_map *em;
struct extent_state *cached_state = NULL;
struct btrfs_dio_data *dio_data = iter->private;
@@ -7536,7 +7585,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
if (ret < 0)
goto err;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto unlock_err;
@@ -7816,6 +7865,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
+ struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
int ret;
ret = fiemap_prep(inode, fieinfo, start, &len, 0);
@@ -7841,7 +7891,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return ret;
}
- return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
+ btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
+
+ /*
+ * We did an initial flush to avoid holding the inode's lock while
+ * triggering writeback and waiting for the completion of IO and ordered
+ * extents. Now after we locked the inode we do it again, because it's
+ * possible a new write may have happened in between those two steps.
+ */
+ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
+ ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
+ if (ret) {
+ btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
+ return ret;
+ }
+ }
+
+ ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
+ btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
+
+ return ret;
}
static int btrfs_writepages(struct address_space *mapping,
@@ -7864,7 +7933,7 @@ static void btrfs_readahead(struct readahead_control *rac)
*/
static void wait_subpage_spinlock(struct page *page)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
struct folio *folio = page_folio(page);
struct btrfs_subpage *subpage;
@@ -7931,7 +8000,7 @@ static int btrfs_migrate_folio(struct address_space *mapping,
static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
- struct btrfs_inode *inode = BTRFS_I(folio->mapping->host);
+ struct btrfs_inode *inode = folio_to_inode(folio);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_io_tree *tree = &inode->io_tree;
struct extent_state *cached_state = NULL;
@@ -8115,7 +8184,7 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
struct page *page = vmf->page;
struct folio *folio = page_folio(page);
struct inode *inode = file_inode(vmf->vma->vm_file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
@@ -8661,7 +8730,7 @@ int __init btrfs_init_cachep(void)
{
btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
sizeof(struct btrfs_inode), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
init_once);
if (!btrfs_inode_cachep)
goto fail;
@@ -8684,7 +8753,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
u64 delalloc_bytes;
u64 inode_bytes;
struct inode *inode = d_inode(path->dentry);
- u32 blocksize = inode->i_sb->s_blocksize;
+ u32 blocksize = btrfs_sb(inode->i_sb)->sectorsize;
u32 bi_flags = BTRFS_I(inode)->flags;
u32 bi_ro_flags = BTRFS_I(inode)->ro_flags;
@@ -8724,7 +8793,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
struct inode *new_dir,
struct dentry *new_dentry)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(old_dir);
struct btrfs_trans_handle *trans;
unsigned int trans_num_items;
struct btrfs_root *root = BTRFS_I(old_dir)->root;
@@ -8976,7 +9045,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(old_dir);
struct btrfs_new_inode_args whiteout_args = {
.dir = old_dir,
.dentry = old_dentry,
@@ -9418,7 +9487,7 @@ out:
static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
@@ -9599,7 +9668,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
loff_t actual_len, u64 *alloc_hint,
struct btrfs_trans_handle *trans)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
@@ -9751,7 +9820,7 @@ static int btrfs_permission(struct mnt_idmap *idmap,
static int btrfs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode;
@@ -10125,7 +10194,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
cond_resched();
}
- em = btrfs_get_extent(inode, NULL, 0, start, lockend - start + 1);
+ em = btrfs_get_extent(inode, NULL, start, lockend - start + 1);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out_unlock_extent;
@@ -10269,6 +10338,13 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE)
return -EINVAL;
+ /*
+ * Compressed extents should always have checksums, so error out if we
+ * have a NOCOW file or inode was created while mounted with NODATASUM.
+ */
+ if (inode->flags & BTRFS_INODE_NODATASUM)
+ return -EINVAL;
+
orig_count = iov_iter_count(from);
/* The extent size must be sane. */
@@ -10698,7 +10774,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
struct btrfs_block_group *bg;
u64 len = isize - start;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 41b479861b3c..294e31edec9d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -34,11 +34,9 @@
#include "export.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "volumes.h"
#include "locking.h"
#include "backref.h"
-#include "rcu-string.h"
#include "send.h"
#include "dev-replace.h"
#include "props.h"
@@ -47,9 +45,7 @@
#include "tree-log.h"
#include "compression.h"
#include "space-info.h"
-#include "delalloc-space.h"
#include "block-group.h"
-#include "subpage.h"
#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
@@ -231,6 +227,20 @@ static int check_fsflags_compatible(struct btrfs_fs_info *fs_info,
return 0;
}
+int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args)
+{
+ if (memchr(vol_args->name, 0, sizeof(vol_args->name)) == NULL)
+ return -ENAMETOOLONG;
+ return 0;
+}
+
+static int btrfs_check_ioctl_vol_args2_subvol_name(const struct btrfs_ioctl_vol_args_v2 *vol_args2)
+{
+ if (memchr(vol_args2->name, 0, sizeof(vol_args2->name)) == NULL)
+ return -ENAMETOOLONG;
+ return 0;
+}
+
/*
* Set flags/xflags from the internal inode flags. The remaining items of
* fsxattr are zeroed.
@@ -247,7 +257,7 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
@@ -528,7 +538,7 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
* block group is in the logical address space, which can be any
* sectorsize aligned bytenr in the range [0, U64_MAX].
*/
- if (range.len < fs_info->sb->s_blocksize)
+ if (range.len < fs_info->sectorsize)
return -EINVAL;
range.minlen = max(range.minlen, minlen);
@@ -584,7 +594,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
struct btrfs_qgroup_inherit *inherit)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_root_item *root_item;
@@ -721,7 +731,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
free_extent_buffer(leaf);
leaf = NULL;
- new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
+ new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
@@ -776,7 +786,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct dentry *dentry, bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
unsigned int trans_num_items;
@@ -790,6 +800,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
return -EOPNOTSUPP;
}
+ if (btrfs_root_refs(&root->root_item) == 0)
+ return -ENOENT;
+
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return -EINVAL;
@@ -907,7 +920,9 @@ static int btrfs_may_delete(struct mnt_idmap *idmap,
if (d_really_is_negative(victim))
return -ENOENT;
- BUG_ON(d_inode(victim->d_parent) != dir);
+ /* The @victim is not inside @dir. */
+ if (d_inode(victim->d_parent) != dir)
+ return -EINVAL;
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
@@ -959,7 +974,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
struct btrfs_qgroup_inherit *inherit)
{
struct inode *dir = d_inode(parent->dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct dentry *dentry;
struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen);
int error;
@@ -1094,7 +1109,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
{
BTRFS_DEV_LOOKUP_ARGS(args);
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
u64 new_size;
u64 old_size;
u64 devid = 1;
@@ -1125,7 +1140,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
ret = PTR_ERR(vol_args);
goto out_drop;
}
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (ret < 0)
+ goto out_free;
+
sizestr = vol_args->name;
cancel = (strcmp("cancel", sizestr) == 0);
ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_RESIZE, cancel);
@@ -1325,12 +1343,15 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (ret < 0)
+ goto out;
ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file),
vol_args->name, vol_args->fd, subvol,
false, NULL);
+out:
kfree(vol_args);
return ret;
}
@@ -1349,7 +1370,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
- vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args);
+ if (ret < 0)
+ goto free_args;
if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) {
ret = -EOPNOTSUPP;
@@ -1359,7 +1382,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
- u64 nums;
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file));
if (vol_args->size < sizeof(*inherit) ||
vol_args->size > PAGE_SIZE) {
@@ -1372,19 +1395,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
goto free_args;
}
- if (inherit->num_qgroups > PAGE_SIZE ||
- inherit->num_ref_copies > PAGE_SIZE ||
- inherit->num_excl_copies > PAGE_SIZE) {
- ret = -EINVAL;
- goto free_inherit;
- }
-
- nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
- 2 * inherit->num_excl_copies;
- if (vol_args->size != struct_size(inherit, qgroups, nums)) {
- ret = -EINVAL;
+ ret = btrfs_qgroup_check_inherit(fs_info, inherit, vol_args->size);
+ if (ret < 0)
goto free_inherit;
- }
}
ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file),
@@ -1402,7 +1415,7 @@ free_args:
static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
void __user *arg)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
u64 flags = 0;
@@ -1425,7 +1438,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
u64 root_flags;
@@ -1672,7 +1685,7 @@ static noinline int search_ioctl(struct inode *inode,
u64 *buf_size,
char __user *ubuf)
{
- struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *info = inode_to_fs_info(inode);
struct btrfs_root *root;
struct btrfs_key key;
struct btrfs_path *path;
@@ -2343,9 +2356,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
bool destroy_v2)
{
struct dentry *parent = file->f_path.dentry;
- struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
struct dentry *dentry;
struct inode *dir = d_inode(parent);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct inode *inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *dest = NULL;
@@ -2379,7 +2392,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
* name, same as v1 currently does.
*/
if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) {
- vol_args2->name[BTRFS_SUBVOL_NAME_MAX] = 0;
+ err = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2);
+ if (err < 0)
+ goto out;
subvol_name = vol_args2->name;
err = mnt_want_write_file(file);
@@ -2463,7 +2478,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
- vol_args->name[BTRFS_PATH_NAME_MAX] = 0;
+ err = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (err < 0)
+ goto out;
+
subvol_name = vol_args->name;
err = mnt_want_write_file(file);
@@ -2608,6 +2626,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = -EFAULT;
goto out;
}
+ if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
/* compression requires us to start the IO */
if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
@@ -2670,12 +2692,16 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
goto out;
}
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (ret < 0)
+ goto out_free;
+
ret = btrfs_init_new_device(fs_info, vol_args->name);
if (!ret)
btrfs_info(fs_info, "disk added %s", vol_args->name);
+out_free:
kfree(vol_args);
out:
if (restore_op)
@@ -2689,9 +2715,9 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
{
BTRFS_DEV_LOOKUP_ARGS(args);
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_ioctl_vol_args_v2 *vol_args;
- struct bdev_handle *bdev_handle = NULL;
+ struct file *bdev_file = NULL;
int ret;
bool cancel = false;
@@ -2707,7 +2733,10 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
goto out;
}
- vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args);
+ if (ret < 0)
+ goto out;
+
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
args.devid = vol_args->devid;
} else if (!strcmp("cancel", vol_args->name)) {
@@ -2728,7 +2757,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
goto err_drop;
/* Exclusive operation is now claimed */
- ret = btrfs_rm_device(fs_info, &args, &bdev_handle);
+ ret = btrfs_rm_device(fs_info, &args, &bdev_file);
btrfs_exclop_finish(fs_info);
@@ -2742,8 +2771,8 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
}
err_drop:
mnt_drop_write_file(file);
- if (bdev_handle)
- bdev_release(bdev_handle);
+ if (bdev_file)
+ fput(bdev_file);
out:
btrfs_put_dev_args_from_path(&args);
kfree(vol_args);
@@ -2754,9 +2783,9 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
{
BTRFS_DEV_LOOKUP_ARGS(args);
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_ioctl_vol_args *vol_args;
- struct bdev_handle *bdev_handle = NULL;
+ struct file *bdev_file = NULL;
int ret;
bool cancel = false;
@@ -2767,7 +2796,10 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (ret < 0)
+ goto out_free;
+
if (!strcmp("cancel", vol_args->name)) {
cancel = true;
} else {
@@ -2783,17 +2815,18 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE,
cancel);
if (ret == 0) {
- ret = btrfs_rm_device(fs_info, &args, &bdev_handle);
+ ret = btrfs_rm_device(fs_info, &args, &bdev_file);
if (!ret)
btrfs_info(fs_info, "disk deleted %s", vol_args->name);
btrfs_exclop_finish(fs_info);
}
mnt_drop_write_file(file);
- if (bdev_handle)
- bdev_release(bdev_handle);
+ if (bdev_file)
+ fput(bdev_file);
out:
btrfs_put_dev_args_from_path(&args);
+out_free:
kfree(vol_args);
return ret;
}
@@ -2897,7 +2930,7 @@ out:
static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_root *new_root;
struct btrfs_dir_item *di;
@@ -3171,7 +3204,7 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file));
struct btrfs_ioctl_scrub_args *sa;
int ret;
@@ -3689,7 +3722,7 @@ out:
static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_ioctl_quota_ctl_args *sa;
int ret;
@@ -3731,7 +3764,7 @@ drop_write:
static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_assign_args *sa;
struct btrfs_trans_handle *trans;
@@ -3808,6 +3841,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
goto out;
}
+ if (sa->create && is_fstree(sa->qgroupid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -3882,7 +3920,7 @@ drop_write:
static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_ioctl_quota_rescan_args *qsa;
int ret;
@@ -3946,7 +3984,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
struct btrfs_ioctl_received_subvol_args *sa)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_root_item *root_item = &root->root_item;
struct btrfs_trans_handle *trans;
@@ -4134,7 +4172,7 @@ static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info,
static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_super_block *super_block = fs_info->super_copy;
struct btrfs_trans_handle *trans;
@@ -4277,7 +4315,7 @@ check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \
static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_super_block *super_block = fs_info->super_copy;
struct btrfs_ioctl_feature_flags flags[2];
@@ -4568,7 +4606,7 @@ long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
void __user *argp = (void __user *)arg;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index d51b9a2f2f6e..2c5dc25ec670 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -3,6 +3,15 @@
#ifndef BTRFS_IOCTL_H
#define BTRFS_IOCTL_H
+#include <linux/types.h>
+
+struct file;
+struct dentry;
+struct mnt_idmap;
+struct fileattr;
+struct btrfs_fs_info;
+struct btrfs_ioctl_balance_args;
+
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 74d8e2003f58..99ccab86bb86 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -13,7 +13,6 @@
#include "ctree.h"
#include "extent_io.h"
#include "locking.h"
-#include "accessors.h"
/*
* Lockdep class keys for extent_buffer->lock's in this root. For a given
@@ -85,7 +84,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int
{
struct btrfs_lockdep_keyset *ks;
- BUG_ON(level >= ARRAY_SIZE(ks->keys));
+ ASSERT(level < ARRAY_SIZE(ks->keys));
/* Find the matching keyset, id 0 is the default entry */
for (ks = btrfs_lockdep_keysets; ks->id; ks++)
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 7d6ee1e609bf..9576f485a300 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -8,8 +8,14 @@
#include <linux/atomic.h>
#include <linux/wait.h>
+#include <linux/lockdep.h>
#include <linux/percpu_counter.h>
#include "extent_io.h"
+#include "locking.h"
+
+struct extent_buffer;
+struct btrfs_path;
+struct btrfs_root;
#define BTRFS_WRITE_LOCK 1
#define BTRFS_READ_LOCK 2
@@ -157,8 +163,6 @@ enum btrfs_lockdep_trans_states {
static_assert(BTRFS_NESTING_MAX <= MAX_LOCKDEP_SUBCLASSES,
"too many lock subclasses defined");
-struct btrfs_path;
-
void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest);
void btrfs_tree_lock(struct extent_buffer *eb);
void btrfs_tree_unlock(struct extent_buffer *eb);
diff --git a/fs/btrfs/lru_cache.h b/fs/btrfs/lru_cache.h
index 00328c856be6..e32906ab6faa 100644
--- a/fs/btrfs/lru_cache.h
+++ b/fs/btrfs/lru_cache.h
@@ -3,8 +3,10 @@
#ifndef BTRFS_LRU_CACHE_H
#define BTRFS_LRU_CACHE_H
+#include <linux/types.h>
#include <linux/maple_tree.h>
#include <linux/list.h>
+#include "lru_cache.h"
/*
* A cache entry. This is meant to be embedded in a structure of a user of
@@ -50,11 +52,6 @@ struct btrfs_lru_cache {
#define btrfs_lru_cache_for_each_entry_safe(cache, entry, tmp) \
list_for_each_entry_safe_reverse((entry), (tmp), &(cache)->lru_list, lru_list)
-static inline unsigned int btrfs_lru_cache_size(const struct btrfs_lru_cache *cache)
-{
- return cache->size;
-}
-
static inline struct btrfs_lru_cache_entry *btrfs_lru_cache_lru_entry(
struct btrfs_lru_cache *cache)
{
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 1131d5a29d61..3e5d3b7028e8 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -214,7 +214,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
unsigned long *total_in, unsigned long *total_out)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
- const u32 sectorsize = btrfs_sb(mapping->host->i_sb)->sectorsize;
+ const u32 sectorsize = inode_to_fs_info(mapping->host)->sectorsize;
struct page *page_in = NULL;
char *sizes_ptr;
const unsigned long max_nr_page = *out_pages;
@@ -425,16 +425,16 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
}
int lzo_decompress(struct list_head *ws, const u8 *data_in,
- struct page *dest_page, unsigned long start_byte, size_t srclen,
+ struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(dest_page);
+ const u32 sectorsize = fs_info->sectorsize;
size_t in_len;
size_t out_len;
size_t max_segment_len = WORKSPACE_BUF_LENGTH;
int ret = 0;
- char *kaddr;
- unsigned long bytes;
if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)
return -EUCLEAN;
@@ -451,7 +451,7 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
}
data_in += LZO_LEN;
- out_len = PAGE_SIZE;
+ out_len = sectorsize;
ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
if (ret != LZO_E_OK) {
pr_warn("BTRFS: decompress failed!\n");
@@ -459,29 +459,13 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
goto out;
}
- if (out_len < start_byte) {
+ ASSERT(out_len <= sectorsize);
+ memcpy_to_page(dest_page, dest_pgoff, workspace->buf, out_len);
+ /* Early end, considered as an error. */
+ if (unlikely(out_len < destlen)) {
ret = -EIO;
- goto out;
+ memzero_page(dest_page, dest_pgoff + out_len, destlen - out_len);
}
-
- /*
- * the caller is already checking against PAGE_SIZE, but lets
- * move this check closer to the memcpy/memset
- */
- destlen = min_t(unsigned long, destlen, PAGE_SIZE);
- bytes = min_t(unsigned long, destlen, out_len - start_byte);
-
- kaddr = kmap_local_page(dest_page);
- memcpy(kaddr, workspace->buf + start_byte, bytes);
-
- /*
- * btrfs_getblock is doing a zero on the tail of the page too,
- * but this will cover anything missing from the decompressed
- * data.
- */
- if (bytes < destlen)
- memset(kaddr+bytes, 0, destlen-bytes);
- kunmap_local(kaddr);
out:
return ret;
}
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index cdada4865837..c96dd66fd0f7 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -3,8 +3,6 @@
#include "fs.h"
#include "messages.h"
#include "discard.h"
-#include "transaction.h"
-#include "space-info.h"
#include "super.h"
#ifdef CONFIG_PRINTK
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 40f2d9f1a17a..dde4904aead9 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -3,6 +3,8 @@
#ifndef BTRFS_MISC_H
#define BTRFS_MISC_H
+#include <linux/types.h>
+#include <linux/bitmap.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/math64.h>
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 59850dc17b22..b749ba45da2b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -19,7 +19,6 @@
#include "qgroup.h"
#include "subpage.h"
#include "file.h"
-#include "super.h"
static struct kmem_cache *btrfs_ordered_extent_cache;
@@ -1236,10 +1235,7 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
int __init ordered_data_init(void)
{
- btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
- sizeof(struct btrfs_ordered_extent), 0,
- SLAB_MEM_SPREAD,
- NULL);
+ btrfs_ordered_extent_cache = KMEM_CACHE(btrfs_ordered_extent, 0);
if (!btrfs_ordered_extent_cache)
return -ENOMEM;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 127ef8bf0ffd..34413fc5b4bd 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -6,6 +6,21 @@
#ifndef BTRFS_ORDERED_DATA_H
#define BTRFS_ORDERED_DATA_H
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/refcount.h>
+#include <linux/completion.h>
+#include <linux/rbtree.h>
+#include <linux/wait.h>
+#include "async-thread.h"
+
+struct inode;
+struct page;
+struct extent_state;
+struct btrfs_inode;
+struct btrfs_root;
+struct btrfs_fs_info;
+
struct btrfs_ordered_sum {
/*
* Logical start address and length for of the blocks covered by
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 7a1b021b5669..6195a2215b8f 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -4,7 +4,6 @@
*/
#include "ctree.h"
-#include "disk-io.h"
#include "orphan.h"
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/orphan.h b/fs/btrfs/orphan.h
index 3faab5cbb59a..aa54a88a60de 100644
--- a/fs/btrfs/orphan.h
+++ b/fs/btrfs/orphan.h
@@ -3,6 +3,11 @@
#ifndef BTRFS_ORPHAN_H
#define BTRFS_ORPHAN_H
+#include <linux/types.h>
+
+struct btrfs_trans_handle;
+struct btrfs_root;
+
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset);
int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h
index c42bc666d5ee..8504bf1702c7 100644
--- a/fs/btrfs/print-tree.h
+++ b/fs/btrfs/print-tree.h
@@ -9,6 +9,9 @@
/* Buffer size to contain tree name and possibly additional data (offset) */
#define BTRFS_ROOT_NAME_BUF_LEN 48
+struct extent_buffer;
+struct btrfs_key;
+
void btrfs_print_leaf(const struct extent_buffer *l);
void btrfs_print_tree(const struct extent_buffer *c, bool follow);
const char *btrfs_root_name(const struct btrfs_key *key, char *buf);
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index f9bf591a0718..2a9b7b029eeb 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -4,6 +4,7 @@
*/
#include <linux/hashtable.h>
+#include <linux/xattr.h>
#include "messages.h"
#include "props.h"
#include "btrfs_inode.h"
@@ -302,7 +303,7 @@ static int prop_compression_validate(const struct btrfs_inode *inode,
static int prop_compression_apply(struct inode *inode, const char *value,
size_t len)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
int type;
/* Reset to defaults */
diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h
index 6e283196e38a..f60cd89feb29 100644
--- a/fs/btrfs/props.h
+++ b/fs/btrfs/props.h
@@ -6,7 +6,12 @@
#ifndef BTRFS_PROPS_H
#define BTRFS_PROPS_H
-#include "ctree.h"
+#include <linux/compiler_types.h>
+
+struct inode;
+struct btrfs_inode;
+struct btrfs_path;
+struct btrfs_trans_handle;
int __init btrfs_props_init(void);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 63b426cc7798..5f90f0605b12 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1324,7 +1324,7 @@ static int flush_reservations(struct btrfs_fs_info *fs_info)
trans = btrfs_join_transaction(fs_info->tree_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_commit_transaction(trans);
+ ret = btrfs_commit_transaction(trans);
return ret;
}
@@ -1736,6 +1736,15 @@ out:
return ret;
}
+static bool qgroup_has_usage(struct btrfs_qgroup *qgroup)
+{
+ return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 ||
+ qgroup->excl > 0 || qgroup->excl_cmpr > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0);
+}
+
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -1755,6 +1764,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
goto out;
}
+ if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
/* Check if there are no children of this qgroup */
if (!list_empty(&qgroup->members)) {
ret = -EBUSY;
@@ -2491,8 +2505,8 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
struct extent_buffer *eb = root_eb;
struct btrfs_path *path = NULL;
- BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL);
- BUG_ON(root_eb == NULL);
+ ASSERT(0 <= root_level && root_level < BTRFS_MAX_LEVEL);
+ ASSERT(root_eb != NULL);
if (!btrfs_qgroup_full_accounting(fs_info))
return 0;
@@ -2847,8 +2861,6 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
if (nr_old_roots == 0 && nr_new_roots == 0)
goto out_free;
- BUG_ON(!fs_info->quota_root);
-
trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr,
num_bytes, nr_old_roots, nr_new_roots);
@@ -2945,11 +2957,6 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
ctx.roots = NULL;
}
- /* Free the reserved data space */
- btrfs_qgroup_free_refroot(fs_info,
- record->data_rsv_refroot,
- record->data_rsv,
- BTRFS_QGROUP_RSV_DATA);
/*
* Use BTRFS_SEQ_LAST as time_seq to do special search,
* which doesn't lock tree or delayed_refs and search
@@ -2973,6 +2980,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
record->old_roots = NULL;
new_roots = NULL;
}
+ /* Free the reserved data space */
+ btrfs_qgroup_free_refroot(fs_info,
+ record->data_rsv_refroot,
+ record->data_rsv,
+ BTRFS_QGROUP_RSV_DATA);
cleanup:
ulist_free(record->old_roots);
ulist_free(new_roots);
@@ -3034,6 +3046,57 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
return ret;
}
+int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_inherit *inherit,
+ size_t size)
+{
+ if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP)
+ return -EOPNOTSUPP;
+ if (size < sizeof(*inherit) || size > PAGE_SIZE)
+ return -EINVAL;
+
+ /*
+ * In the past we allowed btrfs_qgroup_inherit to specify to copy
+ * rfer/excl numbers directly from other qgroups. This behavior has
+ * been disabled in userspace for a very long time, but here we should
+ * also disable it in kernel, as this behavior is known to mark qgroup
+ * inconsistent, and a rescan would wipe out the changes anyway.
+ *
+ * Reject any btrfs_qgroup_inherit with num_ref_copies or num_excl_copies.
+ */
+ if (inherit->num_ref_copies > 0 || inherit->num_excl_copies > 0)
+ return -EINVAL;
+
+ if (inherit->num_qgroups > PAGE_SIZE)
+ return -EINVAL;
+
+ if (size != struct_size(inherit, qgroups, inherit->num_qgroups))
+ return -EINVAL;
+
+ /*
+ * Now check all the remaining qgroups, they should all:
+ *
+ * - Exist
+ * - Be higher level qgroups.
+ */
+ for (int i = 0; i < inherit->num_qgroups; i++) {
+ struct btrfs_qgroup *qgroup;
+ u64 qgroupid = inherit->qgroups[i];
+
+ if (btrfs_qgroup_level(qgroupid) == 0)
+ return -EINVAL;
+
+ spin_lock(&fs_info->qgroup_lock);
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+ if (!qgroup) {
+ spin_unlock(&fs_info->qgroup_lock);
+ return -ENOENT;
+ }
+ spin_unlock(&fs_info->qgroup_lock);
+ }
+ return 0;
+}
+
static int qgroup_auto_inherit(struct btrfs_fs_info *fs_info,
u64 inode_rootid,
struct btrfs_qgroup_inherit **inherit)
@@ -3076,6 +3139,62 @@ static int qgroup_auto_inherit(struct btrfs_fs_info *fs_info,
}
/*
+ * Check if we can skip rescan when inheriting qgroups. If @src has a single
+ * @parent, and that @parent is owning all its bytes exclusively, we can skip
+ * the full rescan, by just adding nodesize to the @parent's excl/rfer.
+ *
+ * Return <0 for fatal errors (like srcid/parentid has no qgroup).
+ * Return 0 if a quick inherit is done.
+ * Return >0 if a quick inherit is not possible, and a full rescan is needed.
+ */
+static int qgroup_snapshot_quick_inherit(struct btrfs_fs_info *fs_info,
+ u64 srcid, u64 parentid)
+{
+ struct btrfs_qgroup *src;
+ struct btrfs_qgroup *parent;
+ struct btrfs_qgroup_list *list;
+ int nr_parents = 0;
+
+ src = find_qgroup_rb(fs_info, srcid);
+ if (!src)
+ return -ENOENT;
+ parent = find_qgroup_rb(fs_info, parentid);
+ if (!parent)
+ return -ENOENT;
+
+ /*
+ * Source has no parent qgroup, but our new qgroup would have one.
+ * Qgroup numbers would become inconsistent.
+ */
+ if (list_empty(&src->groups))
+ return 1;
+
+ list_for_each_entry(list, &src->groups, next_group) {
+ /* The parent is not the same, quick update is not possible. */
+ if (list->group->qgroupid != parentid)
+ return 1;
+ nr_parents++;
+ /*
+ * More than one parent qgroup, we can't be sure about accounting
+ * consistency.
+ */
+ if (nr_parents > 1)
+ return 1;
+ }
+
+ /*
+ * The parent is not exclusively owning all its bytes. We're not sure
+ * if the source has any bytes not fully owned by the parent.
+ */
+ if (parent->excl != parent->rfer)
+ return 1;
+
+ parent->excl += fs_info->nodesize;
+ parent->rfer += fs_info->nodesize;
+ return 0;
+}
+
+/*
* Copy the accounting information between qgroups. This is necessary
* when a snapshot or a subvolume is created. Throwing an error will
* cause a transaction abort so we take extra care here to only error
@@ -3243,6 +3362,13 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
qgroup_dirty(fs_info, dstgroup);
qgroup_dirty(fs_info, srcgroup);
+
+ /*
+ * If the source qgroup has parent but the new one doesn't,
+ * we need a full rescan.
+ */
+ if (!inherit && !list_empty(&srcgroup->groups))
+ need_rescan = true;
}
if (!inherit)
@@ -3257,14 +3383,16 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
if (ret)
goto unlock;
}
+ if (srcid) {
+ /* Check if we can do a quick inherit. */
+ ret = qgroup_snapshot_quick_inherit(fs_info, srcid, *i_qgroups);
+ if (ret < 0)
+ goto unlock;
+ if (ret > 0)
+ need_rescan = true;
+ ret = 0;
+ }
++i_qgroups;
-
- /*
- * If we're doing a snapshot, and adding the snapshot to a new
- * qgroup, the numbers are guaranteed to be incorrect.
- */
- if (srcid)
- need_rescan = true;
}
for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index be18c862e64e..706640be0ec2 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -6,12 +6,22 @@
#ifndef BTRFS_QGROUP_H
#define BTRFS_QGROUP_H
+#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
#include <linux/kobject.h>
-#include "ulist.h"
-#include "delayed-ref.h"
-#include "misc.h"
+#include <linux/list.h>
+#include <uapi/linux/btrfs_tree.h>
+
+struct extent_buffer;
+struct extent_changeset;
+struct btrfs_delayed_extent_op;
+struct btrfs_fs_info;
+struct btrfs_root;
+struct btrfs_ioctl_quota_ctl_args;
+struct btrfs_trans_handle;
+struct btrfs_delayed_ref_root;
+struct btrfs_inode;
/*
* Btrfs qgroup overview
@@ -321,7 +331,6 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
-struct btrfs_delayed_extent_op;
int btrfs_qgroup_trace_extent_nolock(
struct btrfs_fs_info *fs_info,
@@ -341,6 +350,9 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
struct ulist *new_roots);
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
+int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_inherit *inherit,
+ size_t size);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
u64 objectid, u64 inode_rootid,
struct btrfs_qgroup_inherit *inherit);
diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
index 9589362acfbf..6af6b4b9a32e 100644
--- a/fs/btrfs/raid-stripe-tree.c
+++ b/fs/btrfs/raid-stripe-tree.c
@@ -11,7 +11,6 @@
#include "disk-io.h"
#include "raid-stripe-tree.h"
#include "volumes.h"
-#include "misc.h"
#include "print-tree.h"
int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h
index cdb58b38fcb5..c9c258f84903 100644
--- a/fs/btrfs/raid-stripe-tree.h
+++ b/fs/btrfs/raid-stripe-tree.h
@@ -6,6 +6,10 @@
#ifndef BTRFS_RAID_STRIPE_TREE_H
#define BTRFS_RAID_STRIPE_TREE_H
+#include <linux/types.h>
+#include <uapi/linux/btrfs_tree.h>
+#include "fs.h"
+
#define BTRFS_RST_SUPP_BLOCK_GROUP_MASK (BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID1_MASK | \
BTRFS_BLOCK_GROUP_RAID0 | \
@@ -13,6 +17,7 @@
struct btrfs_io_context;
struct btrfs_io_stripe;
+struct btrfs_fs_info;
struct btrfs_ordered_extent;
struct btrfs_trans_handle;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 792c8e17c31d..6f4a9cfeea44 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -14,7 +14,6 @@
#include <linux/raid/xor.h>
#include <linux/mm.h>
#include "messages.h"
-#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
#include "volumes.h"
@@ -918,6 +917,13 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
*/
ASSERT(stripe_nsectors <= BITS_PER_LONG);
+ /*
+ * Real stripes must be between 2 (2 disks RAID5, aka RAID1) and 256
+ * (limited by u8).
+ */
+ ASSERT(real_stripes >= 2);
+ ASSERT(real_stripes <= U8_MAX);
+
rbio = kzalloc(sizeof(*rbio), GFP_NOFS);
if (!rbio)
return ERR_PTR(-ENOMEM);
@@ -955,6 +961,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
ASSERT(btrfs_nr_parity_stripes(bioc->map_type));
rbio->nr_data = real_stripes - btrfs_nr_parity_stripes(bioc->map_type);
+ ASSERT(rbio->nr_data > 0);
return rbio;
}
@@ -1181,6 +1188,26 @@ static inline void bio_list_put(struct bio_list *bio_list)
bio_put(bio);
}
+static void assert_rbio(struct btrfs_raid_bio *rbio)
+{
+ if (!IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
+ !IS_ENABLED(CONFIG_BTRFS_ASSERT))
+ return;
+
+ /*
+ * At least two stripes (2 disks RAID5), and since real_stripes is U8,
+ * we won't go beyond 256 disks anyway.
+ */
+ ASSERT(rbio->real_stripes >= 2);
+ ASSERT(rbio->nr_data > 0);
+
+ /*
+ * This is another check to make sure nr data stripes is smaller
+ * than total stripes.
+ */
+ ASSERT(rbio->nr_data < rbio->real_stripes);
+}
+
/* Generate PQ for one vertical stripe. */
static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
{
@@ -1212,6 +1239,7 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
pointers[stripe++] = kmap_local_page(sector->page) +
sector->pgoff;
+ assert_rbio(rbio);
raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
pointers);
} else {
@@ -2473,6 +2501,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
}
if (has_qstripe) {
+ assert_rbio(rbio);
/* RAID6, call the library function to fill in our P/Q */
raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
pointers);
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 470213688872..0d7b4c2fb6ae 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -7,9 +7,18 @@
#ifndef BTRFS_RAID56_H
#define BTRFS_RAID56_H
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/bio.h>
+#include <linux/refcount.h>
#include <linux/workqueue.h>
#include "volumes.h"
+struct page;
+struct sector_ptr;
+struct btrfs_fs_info;
+
enum btrfs_rbio_ops {
BTRFS_RBIO_WRITE,
BTRFS_RBIO_READ_REBUILD,
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
index 5c2b66d155ef..1c2d7cb1fe6f 100644
--- a/fs/btrfs/rcu-string.h
+++ b/fs/btrfs/rcu-string.h
@@ -6,6 +6,12 @@
#ifndef BTRFS_RCU_STRING_H
#define BTRFS_RCU_STRING_H
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+#include <linux/printk.h>
+
struct rcu_string {
struct rcu_head rcu;
char str[];
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 6486f0d7e993..8c4fc98ca9ce 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -889,8 +889,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
out_unlock:
spin_unlock(&fs_info->ref_verify_lock);
out:
- if (ret)
+ if (ret) {
+ btrfs_free_ref_cache(fs_info);
btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
+ }
return ret;
}
@@ -1021,8 +1023,8 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
}
}
if (ret) {
- btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
btrfs_free_ref_cache(fs_info);
+ btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
}
btrfs_free_path(path);
return ret;
diff --git a/fs/btrfs/ref-verify.h b/fs/btrfs/ref-verify.h
index 855de37719b5..3511e1a5c96b 100644
--- a/fs/btrfs/ref-verify.h
+++ b/fs/btrfs/ref-verify.h
@@ -6,7 +6,16 @@
#ifndef BTRFS_REF_VERIFY_H
#define BTRFS_REF_VERIFY_H
+#include <linux/types.h>
+#include <linux/rbtree_types.h>
+
+struct btrfs_fs_info;
+struct btrfs_ref;
+
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+
+#include <linux/spinlock.h>
+
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info);
void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info);
int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index ae90894dc7dc..08d0fb46ceec 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -174,7 +174,7 @@ static int clone_copy_inline_extent(struct inode *dst,
char *inline_data,
struct btrfs_trans_handle **trans_out)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dst);
struct btrfs_root *root = BTRFS_I(dst)->root;
const u64 aligned_end = ALIGN(new_key->offset + datal,
fs_info->sectorsize);
@@ -337,7 +337,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
const u64 off, const u64 olen, const u64 olen_aligned,
const u64 destoff, int no_time_update)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_path *path = NULL;
struct extent_buffer *leaf;
struct btrfs_trans_handle *trans;
@@ -663,7 +663,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
struct inode *dst, u64 dst_loff)
{
struct btrfs_fs_info *fs_info = BTRFS_I(src)->root->fs_info;
- const u64 bs = fs_info->sb->s_blocksize;
+ const u64 bs = fs_info->sectorsize;
int ret;
/*
@@ -726,11 +726,11 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
{
struct inode *inode = file_inode(file);
struct inode *src = file_inode(file_src);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
int ret;
int wb_ret;
u64 len = olen;
- u64 bs = fs_info->sb->s_blocksize;
+ u64 bs = fs_info->sectorsize;
/*
* VFS's generic_remap_file_range_prep() protects us from cloning the
@@ -796,7 +796,7 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
{
struct inode *inode_in = file_inode(file_in);
struct inode *inode_out = file_inode(file_out);
- u64 bs = BTRFS_I(inode_out)->root->fs_info->sb->s_blocksize;
+ u64 bs = BTRFS_I(inode_out)->root->fs_info->sectorsize;
u64 wb_len;
int ret;
diff --git a/fs/btrfs/reflink.h b/fs/btrfs/reflink.h
index ecb309b4dad0..1e291f7d85c4 100644
--- a/fs/btrfs/reflink.h
+++ b/fs/btrfs/reflink.h
@@ -3,7 +3,9 @@
#ifndef BTRFS_REFLINK_H
#define BTRFS_REFLINK_H
-#include <linux/fs.h>
+#include <linux/types.h>
+
+struct file;
loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index abe594f77f99..f96f267fb4aa 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -523,7 +523,8 @@ static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
if (handle_useless_nodes(rc, node))
node = NULL;
out:
- btrfs_backref_iter_free(iter);
+ btrfs_free_path(iter->path);
+ kfree(iter);
btrfs_free_path(path);
if (err) {
btrfs_backref_error_cleanup(cache, node);
@@ -2987,7 +2988,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
const struct file_extent_cluster *cluster,
int *cluster_nr, unsigned long page_index)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
u64 offset = BTRFS_I(inode)->index_cnt;
const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
diff --git a/fs/btrfs/relocation.h b/fs/btrfs/relocation.h
index 5fb60f2deb53..788c86d8633a 100644
--- a/fs/btrfs/relocation.h
+++ b/fs/btrfs/relocation.h
@@ -3,6 +3,15 @@
#ifndef BTRFS_RELOCATION_H
#define BTRFS_RELOCATION_H
+#include <linux/types.h>
+
+struct extent_buffer;
+struct btrfs_fs_info;
+struct btrfs_root;
+struct btrfs_trans_handle;
+struct btrfs_ordered_extent;
+struct btrfs_pending_snapshot;
+
int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start);
int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root);
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 603ad1459368..4bb538a372ce 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -10,7 +10,6 @@
#include "messages.h"
#include "transaction.h"
#include "disk-io.h"
-#include "print-tree.h"
#include "qgroup.h"
#include "space-info.h"
#include "accessors.h"
@@ -82,7 +81,14 @@ int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
if (ret > 0)
goto out;
} else {
- BUG_ON(ret == 0); /* Logical error */
+ /*
+ * Key with offset -1 found, there would have to exist a root
+ * with such id, but this is out of the valid range.
+ */
+ if (ret == 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
if (path->slots[0] == 0)
goto out;
path->slots[0]--;
@@ -323,8 +329,11 @@ int btrfs_del_root(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, key, path, -1, 1);
if (ret < 0)
goto out;
-
- BUG_ON(ret != 0);
+ if (ret != 0) {
+ /* The root must exist but we did not find it by the key. */
+ ret = -EUCLEAN;
+ goto out;
+ }
ret = btrfs_del_item(trans, root, path);
out:
diff --git a/fs/btrfs/root-tree.h b/fs/btrfs/root-tree.h
index 8b2c3859e464..6f929cf3bd49 100644
--- a/fs/btrfs/root-tree.h
+++ b/fs/btrfs/root-tree.h
@@ -3,7 +3,17 @@
#ifndef BTRFS_ROOT_TREE_H
#define BTRFS_ROOT_TREE_H
+#include <linux/types.h>
+
struct fscrypt_str;
+struct extent_buffer;
+struct btrfs_key;
+struct btrfs_root;
+struct btrfs_root_item;
+struct btrfs_path;
+struct btrfs_fs_info;
+struct btrfs_block_rsv;
+struct btrfs_trans_handle;
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a01807cbd4d4..c4bd0e60db59 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1098,12 +1098,22 @@ out:
static void scrub_read_endio(struct btrfs_bio *bbio)
{
struct scrub_stripe *stripe = bbio->private;
+ struct bio_vec *bvec;
+ int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio));
+ int num_sectors;
+ u32 bio_size = 0;
+ int i;
+
+ ASSERT(sector_nr < stripe->nr_sectors);
+ bio_for_each_bvec_all(bvec, &bbio->bio, i)
+ bio_size += bvec->bv_len;
+ num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits;
if (bbio->bio.bi_status) {
- bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
- bitmap_set(&stripe->error_bitmap, 0, stripe->nr_sectors);
+ bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors);
+ bitmap_set(&stripe->error_bitmap, sector_nr, num_sectors);
} else {
- bitmap_clear(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
+ bitmap_clear(&stripe->io_error_bitmap, sector_nr, num_sectors);
}
bio_put(&bbio->bio);
if (atomic_dec_and_test(&stripe->pending_io)) {
@@ -1380,8 +1390,15 @@ static int find_first_extent_item(struct btrfs_root *extent_root,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist an extent
+ * item with such offset, but this is out of the valid range.
+ */
+ btrfs_release_path(path);
+ return -EUCLEAN;
+ }
- ASSERT(ret > 0);
/*
* Here we intentionally pass 0 as @min_objectid, as there could be
* an extent item starting before @search_start.
@@ -1636,6 +1653,9 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct btrfs_bio *bbio = NULL;
+ unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start +
+ stripe->bg->length - stripe->logical) >>
+ fs_info->sectorsize_bits;
u64 stripe_len = BTRFS_STRIPE_LEN;
int mirror = stripe->mirror_num;
int i;
@@ -1646,6 +1666,10 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
struct page *page = scrub_stripe_get_page(stripe, i);
unsigned int pgoff = scrub_stripe_get_page_offset(stripe, i);
+ /* We're beyond the chunk boundary, no need to read anymore. */
+ if (i >= nr_sectors)
+ break;
+
/* The current sector cannot be merged, submit the bio. */
if (bbio &&
((i > 0 &&
@@ -1701,6 +1725,9 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_bio *bbio;
+ unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start +
+ stripe->bg->length - stripe->logical) >>
+ fs_info->sectorsize_bits;
int mirror = stripe->mirror_num;
ASSERT(stripe->bg);
@@ -1715,14 +1742,16 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info,
scrub_read_endio, stripe);
- /* Read the whole stripe. */
bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
- for (int i = 0; i < BTRFS_STRIPE_LEN >> PAGE_SHIFT; i++) {
+ /* Read the whole range inside the chunk boundary. */
+ for (unsigned int cur = 0; cur < nr_sectors; cur++) {
+ struct page *page = scrub_stripe_get_page(stripe, cur);
+ unsigned int pgoff = scrub_stripe_get_page_offset(stripe, cur);
int ret;
- ret = bio_add_page(&bbio->bio, stripe->pages[i], PAGE_SIZE, 0);
+ ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
/* We should have allocated enough bio vectors. */
- ASSERT(ret == PAGE_SIZE);
+ ASSERT(ret == fs_info->sectorsize);
}
atomic_inc(&stripe->pending_io);
diff --git a/fs/btrfs/scrub.h b/fs/btrfs/scrub.h
index 7639103ebf9d..f0df597b75c7 100644
--- a/fs/btrfs/scrub.h
+++ b/fs/btrfs/scrub.h
@@ -3,6 +3,12 @@
#ifndef BTRFS_SCRUB_H
#define BTRFS_SCRUB_H
+#include <linux/types.h>
+
+struct btrfs_fs_info;
+struct btrfs_device;
+struct btrfs_scrub_progress;
+
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
int readonly, int is_dev_replace);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 4e36550618e5..50b4a76ac88e 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -25,7 +25,6 @@
#include "btrfs_inode.h"
#include "transaction.h"
#include "compression.h"
-#include "xattr.h"
#include "print-tree.h"
#include "accessors.h"
#include "dir-item.h"
@@ -777,7 +776,12 @@ static int begin_cmd(struct send_ctx *sctx, int cmd)
if (WARN_ON(!sctx->send_buf))
return -EINVAL;
- BUG_ON(sctx->send_size);
+ if (unlikely(sctx->send_size != 0)) {
+ btrfs_err(sctx->send_root->fs_info,
+ "send: command header buffer not empty cmd %d offset %llu",
+ cmd, sctx->send_off);
+ return -EINVAL;
+ }
sctx->send_size += sizeof(*hdr);
hdr = (struct btrfs_cmd_header *)sctx->send_buf;
@@ -1070,7 +1074,15 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
ret = PTR_ERR(start);
goto out;
}
- BUG_ON(start < p->buf);
+ if (unlikely(start < p->buf)) {
+ btrfs_err(root->fs_info,
+ "send: path ref buffer underflow for key (%llu %u %llu)",
+ found_key->objectid,
+ found_key->type,
+ found_key->offset);
+ ret = -EINVAL;
+ goto out;
+ }
}
p->start = start;
} else {
@@ -1406,7 +1418,7 @@ static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
struct btrfs_lru_cache_entry *raw_entry;
struct backref_cache_entry *entry;
- if (btrfs_lru_cache_size(&sctx->backref_cache) == 0)
+ if (sctx->backref_cache.size == 0)
return false;
/*
@@ -1504,7 +1516,7 @@ static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
* transaction handle or holding fs_info->commit_root_sem, so no need
* to take any lock here.
*/
- if (btrfs_lru_cache_size(&sctx->backref_cache) == 1)
+ if (sctx->backref_cache.size == 1)
sctx->backref_cache_last_reloc_trans = fs_info->last_reloc_trans;
}
@@ -2809,8 +2821,7 @@ static int cache_dir_utimes(struct send_ctx *sctx, u64 dir, u64 gen)
static int trim_dir_utimes_cache(struct send_ctx *sctx)
{
- while (btrfs_lru_cache_size(&sctx->dir_utimes_cache) >
- SEND_MAX_DIR_UTIMES_CACHE_SIZE) {
+ while (sctx->dir_utimes_cache.size > SEND_MAX_DIR_UTIMES_CACHE_SIZE) {
struct btrfs_lru_cache_entry *lru;
int ret;
@@ -4182,7 +4193,13 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
* This should never happen as the root dir always has the same ref
* which is always '..'
*/
- BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
+ if (unlikely(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID)) {
+ btrfs_err(fs_info,
+ "send: unexpected inode %llu in process_recorded_refs()",
+ sctx->cur_ino);
+ ret = -EINVAL;
+ goto out;
+ }
valid_path = fs_path_alloc();
if (!valid_path) {
@@ -6140,7 +6157,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
int ret = 0;
u64 offset = key->offset;
u64 end;
- u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
+ u64 bs = sctx->send_root->fs_info->sectorsize;
end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
if (offset >= end)
@@ -6458,21 +6475,18 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
return 0;
- if (sctx->cur_inode_last_extent == (u64)-1) {
- ret = get_last_extent(sctx, key->offset - 1);
- if (ret)
- return ret;
- }
-
- if (path->slots[0] == 0 &&
- sctx->cur_inode_last_extent < key->offset) {
- /*
- * We might have skipped entire leafs that contained only
- * file extent items for our current inode. These leafs have
- * a generation number smaller (older) than the one in the
- * current leaf and the leaf our last extent came from, and
- * are located between these 2 leafs.
- */
+ /*
+ * Get last extent's end offset (exclusive) if we haven't determined it
+ * yet (we're processing the first file extent item that is new), or if
+ * we're at the first slot of a leaf and the last extent's end is less
+ * than the current extent's offset, because we might have skipped
+ * entire leaves that contained only file extent items for our current
+ * inode. These leaves have a generation number smaller (older) than the
+ * one in the current leaf and the leaf our last extent came from, and
+ * are located between these 2 leaves.
+ */
+ if ((sctx->cur_inode_last_extent == (u64)-1) ||
+ (path->slots[0] == 0 && sctx->cur_inode_last_extent < key->offset)) {
ret = get_last_extent(sctx, key->offset - 1);
if (ret)
return ret;
@@ -6705,11 +6719,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
if (ret)
goto out;
}
- if (sctx->cur_inode_last_extent <
- sctx->cur_inode_size) {
- ret = send_hole(sctx, sctx->cur_inode_size);
- if (ret)
+ if (sctx->cur_inode_last_extent < sctx->cur_inode_size) {
+ ret = range_is_hole_in_parent(sctx,
+ sctx->cur_inode_last_extent,
+ sctx->cur_inode_size);
+ if (ret < 0) {
goto out;
+ } else if (ret == 0) {
+ ret = send_hole(sctx, sctx->cur_inode_size);
+ if (ret < 0)
+ goto out;
+ } else {
+ /* Range is already a hole, skip. */
+ ret = 0;
+ }
}
}
if (need_truncate) {
@@ -7420,8 +7443,8 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen
u64 reada_done = 0;
lockdep_assert_held_read(&parent->fs_info->commit_root_sem);
+ ASSERT(*level != 0);
- BUG_ON(*level == 0);
eb = btrfs_read_node_slot(parent, slot);
if (IS_ERR(eb))
return PTR_ERR(eb);
@@ -8111,7 +8134,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
}
if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
- ret = -EINVAL;
+ ret = -EOPNOTSUPP;
goto out;
}
@@ -8205,8 +8228,8 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
goto out;
}
- sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots),
- arg->clone_sources_count + 1,
+ sctx->clone_roots = kvcalloc(arg->clone_sources_count + 1,
+ sizeof(*sctx->clone_roots),
GFP_KERNEL);
if (!sctx->clone_roots) {
ret = -ENOMEM;
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 4f5509cb1803..dd1c9f02b011 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -8,6 +8,11 @@
#define BTRFS_SEND_H
#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/align.h>
+
+struct inode;
+struct btrfs_ioctl_send_args;
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
/* Conditional support for the upcoming protocol version. */
@@ -25,9 +30,6 @@
#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K
#define BTRFS_SEND_BUF_SIZE_V2 ALIGN(SZ_16K + BTRFS_MAX_COMPRESSED, PAGE_SIZE)
-struct inode;
-struct btrfs_ioctl_send_args;
-
enum btrfs_tlv_type {
BTRFS_TLV_U8,
BTRFS_TLV_U16,
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 571bb13587d5..d620323d08ea 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -9,7 +9,6 @@
#include "ordered-data.h"
#include "transaction.h"
#include "block-group.h"
-#include "zoned.h"
#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
@@ -856,7 +855,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info)
{
- u64 global_rsv_size = fs_info->global_block_rsv.reserved;
+ const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv);
u64 ordered, delalloc;
u64 thresh;
u64 used;
@@ -956,8 +955,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1;
delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes);
if (ordered >= delalloc)
- used += fs_info->delayed_refs_rsv.reserved +
- fs_info->delayed_block_rsv.reserved;
+ used += btrfs_block_rsv_reserved(&fs_info->delayed_refs_rsv) +
+ btrfs_block_rsv_reserved(&fs_info->delayed_block_rsv);
else
used += space_info->bytes_may_use - global_rsv_size;
@@ -1173,7 +1172,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
enum btrfs_flush_state flush;
u64 delalloc_size = 0;
u64 to_reclaim, block_rsv_size;
- u64 global_rsv_size = global_rsv->reserved;
+ const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv);
loops++;
@@ -1185,9 +1184,9 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
* assume it's tied up in delalloc reservations.
*/
block_rsv_size = global_rsv_size +
- delayed_block_rsv->reserved +
- delayed_refs_rsv->reserved +
- trans_rsv->reserved;
+ btrfs_block_rsv_reserved(delayed_block_rsv) +
+ btrfs_block_rsv_reserved(delayed_refs_rsv) +
+ btrfs_block_rsv_reserved(trans_rsv);
if (block_rsv_size < space_info->bytes_may_use)
delalloc_size = space_info->bytes_may_use - block_rsv_size;
@@ -1207,16 +1206,16 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
to_reclaim = delalloc_size;
flush = FLUSH_DELALLOC;
} else if (space_info->bytes_pinned >
- (delayed_block_rsv->reserved +
- delayed_refs_rsv->reserved)) {
+ (btrfs_block_rsv_reserved(delayed_block_rsv) +
+ btrfs_block_rsv_reserved(delayed_refs_rsv))) {
to_reclaim = space_info->bytes_pinned;
flush = COMMIT_TRANS;
- } else if (delayed_block_rsv->reserved >
- delayed_refs_rsv->reserved) {
- to_reclaim = delayed_block_rsv->reserved;
+ } else if (btrfs_block_rsv_reserved(delayed_block_rsv) >
+ btrfs_block_rsv_reserved(delayed_refs_rsv)) {
+ to_reclaim = btrfs_block_rsv_reserved(delayed_block_rsv);
flush = FLUSH_DELAYED_ITEMS_NR;
} else {
- to_reclaim = delayed_refs_rsv->reserved;
+ to_reclaim = btrfs_block_rsv_reserved(delayed_refs_rsv);
flush = FLUSH_DELAYED_REFS_NR;
}
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 92c595fed1b0..a733458fd13b 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -4,8 +4,17 @@
#define BTRFS_SPACE_INFO_H
#include <trace/events/btrfs.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/kobject.h>
+#include <linux/lockdep.h>
+#include <linux/wait.h>
+#include <linux/rwsem.h>
#include "volumes.h"
+struct btrfs_fs_info;
+struct btrfs_block_group;
+
/*
* Different levels for to flush space when doing space reservations.
*
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 93511d54abf8..54736f6238e6 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -111,6 +111,9 @@ void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sector
subpage_info->checked_offset = cur;
cur += nr_bits;
+ subpage_info->locked_offset = cur;
+ cur += nr_bits;
+
subpage_info->total_nr_bits = cur;
}
@@ -237,28 +240,58 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
start + len <= folio_pos(folio) + PAGE_SIZE);
}
+#define subpage_calc_start_bit(fs_info, folio, name, start, len) \
+({ \
+ unsigned int start_bit; \
+ \
+ btrfs_subpage_assert(fs_info, folio, start, len); \
+ start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
+ start_bit += fs_info->subpage_info->name##_offset; \
+ start_bit; \
+})
+
void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
+ const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
const int nbits = len >> fs_info->sectorsize_bits;
+ unsigned long flags;
+
btrfs_subpage_assert(fs_info, folio, start, len);
+ spin_lock_irqsave(&subpage->lock, flags);
+ /*
+ * Even though it's just for reading the page, no one should have
+ * locked the subpage range.
+ */
+ ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
+ bitmap_set(subpage->bitmaps, start_bit, nbits);
atomic_add(nbits, &subpage->readers);
+ spin_unlock_irqrestore(&subpage->lock, flags);
}
void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
+ const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
const int nbits = len >> fs_info->sectorsize_bits;
+ unsigned long flags;
bool is_data;
bool last;
btrfs_subpage_assert(fs_info, folio, start, len);
is_data = is_data_inode(folio->mapping->host);
+
+ spin_lock_irqsave(&subpage->lock, flags);
+
+ /* The range should have already been locked. */
+ ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits));
ASSERT(atomic_read(&subpage->readers) >= nbits);
+
+ bitmap_clear(subpage->bitmaps, start_bit, nbits);
last = atomic_sub_and_test(nbits, &subpage->readers);
/*
@@ -270,6 +303,7 @@ void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
*/
if (is_data && last)
folio_unlock(folio);
+ spin_unlock_irqrestore(&subpage->lock, flags);
}
static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
@@ -290,28 +324,38 @@ static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
orig_start + orig_len) - *start;
}
-void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
- struct folio *folio, u64 start, u32 len)
+static void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
+ const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
const int nbits = (len >> fs_info->sectorsize_bits);
+ unsigned long flags;
int ret;
btrfs_subpage_assert(fs_info, folio, start, len);
+ spin_lock_irqsave(&subpage->lock, flags);
ASSERT(atomic_read(&subpage->readers) == 0);
+ ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
+ bitmap_set(subpage->bitmaps, start_bit, nbits);
ret = atomic_add_return(nbits, &subpage->writers);
ASSERT(ret == nbits);
+ spin_unlock_irqrestore(&subpage->lock, flags);
}
-bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
- struct folio *folio, u64 start, u32 len)
+static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
+ const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
const int nbits = (len >> fs_info->sectorsize_bits);
+ unsigned long flags;
+ bool last;
btrfs_subpage_assert(fs_info, folio, start, len);
+ spin_lock_irqsave(&subpage->lock, flags);
/*
* We have call sites passing @lock_page into
* extent_clear_unlock_delalloc() for compression path.
@@ -319,11 +363,18 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
* This @locked_page is locked by plain lock_page(), thus its
* subpage::writers is 0. Handle them in a special way.
*/
- if (atomic_read(&subpage->writers) == 0)
+ if (atomic_read(&subpage->writers) == 0) {
+ spin_unlock_irqrestore(&subpage->lock, flags);
return true;
+ }
ASSERT(atomic_read(&subpage->writers) >= nbits);
- return atomic_sub_and_test(nbits, &subpage->writers);
+ /* The target range should have been locked. */
+ ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits));
+ bitmap_clear(subpage->bitmaps, start_bit, nbits);
+ last = atomic_sub_and_test(nbits, &subpage->writers);
+ spin_unlock_irqrestore(&subpage->lock, flags);
+ return last;
}
/*
@@ -365,16 +416,6 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
folio_unlock(folio);
}
-#define subpage_calc_start_bit(fs_info, folio, name, start, len) \
-({ \
- unsigned int start_bit; \
- \
- btrfs_subpage_assert(fs_info, folio, start, len); \
- start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
- start_bit += fs_info->subpage_info->name##_offset; \
- start_bit; \
-})
-
#define subpage_test_bitmap_all_set(fs_info, subpage, name) \
bitmap_test_range_all_set(subpage->bitmaps, \
fs_info->subpage_info->name##_offset, \
@@ -475,7 +516,8 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
spin_lock_irqsave(&subpage->lock, flags);
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
- folio_start_writeback(folio);
+ if (!folio_test_writeback(folio))
+ folio_start_writeback(folio);
spin_unlock_irqrestore(&subpage->lock, flags);
}
@@ -750,6 +792,7 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
GET_SUBPAGE_BITMAP(subpage, subpage_info, writeback, &writeback_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, ordered, &ordered_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, checked, &checked_bitmap);
+ GET_SUBPAGE_BITMAP(subpage, subpage_info, locked, &checked_bitmap);
spin_unlock_irqrestore(&subpage->lock, flags);
dump_page(folio_page(folio, 0), "btrfs subpage dump");
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 793c2b314a58..b6dc013b0fdc 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -4,6 +4,11 @@
#define BTRFS_SUBPAGE_H
#include <linux/spinlock.h>
+#include <linux/atomic.h>
+
+struct address_space;
+struct folio;
+struct btrfs_fs_info;
/*
* Extra info for subpapge bitmap.
@@ -28,7 +33,7 @@ struct btrfs_subpage_info {
unsigned int total_nr_bits;
/*
- * *_start indicates where the bitmap starts, the length is always
+ * *_offset indicates where the bitmap starts, the length is always
* @bitmap_size, which is calculated from PAGE_SIZE / sectorsize.
*/
unsigned int uptodate_offset;
@@ -36,6 +41,16 @@ struct btrfs_subpage_info {
unsigned int writeback_offset;
unsigned int ordered_offset;
unsigned int checked_offset;
+
+ /*
+ * For locked bitmaps, normally it's subpage representation for folio
+ * Locked flag, but metadata is different:
+ *
+ * - Metadata doesn't really lock the folio
+ * It's just to prevent page::private get cleared before the last
+ * end_page_read().
+ */
+ unsigned int locked_offset;
};
/*
@@ -93,10 +108,6 @@ void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
-void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
- struct folio *folio, u64 start, u32 len);
-bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
- struct folio *folio, u64 start, u32 len);
int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 896acfda1789..7e44ccaf348f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -34,13 +34,11 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "props.h"
#include "xattr.h"
#include "bio.h"
#include "export.h"
#include "compression.h"
-#include "rcu-string.h"
#include "dev-replace.h"
#include "free-space-cache.h"
#include "backref.h"
@@ -1457,6 +1455,14 @@ static int btrfs_reconfigure(struct fs_context *fc)
btrfs_info_to_ctx(fs_info, &old_ctx);
+ /*
+ * This is our "bind mount" trick, we don't want to allow the user to do
+ * anything other than mount a different ro/rw and a different subvol,
+ * all of the mount options should be maintained.
+ */
+ if (mount_reconfigure)
+ ctx->mount_opt = old_ctx.mount_opt;
+
sync_filesystem(sb);
set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
@@ -1759,7 +1765,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = 0;
buf->f_type = BTRFS_SUPER_MAGIC;
- buf->f_bsize = dentry->d_sb->s_blocksize;
+ buf->f_bsize = fs_info->sectorsize;
buf->f_namelen = BTRFS_NAME_LEN;
/* We treat it as constant endianness (it doesn't matter _which_)
@@ -2195,7 +2201,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
vol = memdup_user((void __user *)arg, sizeof(*vol));
if (IS_ERR(vol))
return PTR_ERR(vol);
- vol->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args_path(vol);
+ if (ret < 0)
+ goto out;
switch (cmd) {
case BTRFS_IOC_SCAN_DEV:
@@ -2237,6 +2245,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
break;
}
+out:
kfree(vol);
return ret;
}
diff --git a/fs/btrfs/super.h b/fs/btrfs/super.h
index f18253ca280d..cbcab434b5ec 100644
--- a/fs/btrfs/super.h
+++ b/fs/btrfs/super.h
@@ -3,6 +3,13 @@
#ifndef BTRFS_SUPER_H
#define BTRFS_SUPER_H
+#include <linux/types.h>
+#include <linux/fs.h>
+#include "fs.h"
+
+struct super_block;
+struct btrfs_fs_info;
+
bool btrfs_check_options(struct btrfs_fs_info *info, unsigned long *mount_opt,
unsigned long flags);
int btrfs_sync_fs(struct super_block *sb, int wait);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 84c05246ffd8..c6387a8ddb94 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -421,7 +421,7 @@ BTRFS_ATTR(static_feature, supported_sectorsizes,
static ssize_t acl_show(struct kobject *kobj, struct kobj_attribute *a, char *buf)
{
- return sysfs_emit(buf, "%d\n", !!IS_ENABLED(CONFIG_BTRFS_FS_POSIX_ACL));
+ return sysfs_emit(buf, "%d\n", IS_ENABLED(CONFIG_BTRFS_FS_POSIX_ACL));
}
BTRFS_ATTR(static_feature, acl, acl_show);
@@ -1228,11 +1228,12 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
+ const enum btrfs_read_policy policy = READ_ONCE(fs_devices->read_policy);
ssize_t ret = 0;
int i;
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
- if (fs_devices->read_policy == i)
+ if (policy == i)
ret += sysfs_emit_at(buf, ret, "%s[%s]",
(ret == 0 ? "" : " "),
btrfs_read_policy_name[i]);
@@ -1256,8 +1257,8 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
if (sysfs_streq(buf, btrfs_read_policy_name[i])) {
- if (i != fs_devices->read_policy) {
- fs_devices->read_policy = i;
+ if (i != READ_ONCE(fs_devices->read_policy)) {
+ WRITE_ONCE(fs_devices->read_policy, i);
btrfs_info(fs_devices->fs_info,
"read policy set to '%s'",
btrfs_read_policy_name[i]);
@@ -1306,6 +1307,47 @@ static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
btrfs_bg_reclaim_threshold_store);
+#ifdef CONFIG_BTRFS_DEBUG
+static ssize_t btrfs_offload_csum_show(struct kobject *kobj,
+ struct kobj_attribute *a, char *buf)
+{
+ struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
+
+ switch (READ_ONCE(fs_devices->offload_csum_mode)) {
+ case BTRFS_OFFLOAD_CSUM_AUTO:
+ return sysfs_emit(buf, "auto\n");
+ case BTRFS_OFFLOAD_CSUM_FORCE_ON:
+ return sysfs_emit(buf, "1\n");
+ case BTRFS_OFFLOAD_CSUM_FORCE_OFF:
+ return sysfs_emit(buf, "0\n");
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+}
+
+static ssize_t btrfs_offload_csum_store(struct kobject *kobj,
+ struct kobj_attribute *a, const char *buf,
+ size_t len)
+{
+ struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
+ int ret;
+ bool val;
+
+ ret = kstrtobool(buf, &val);
+ if (ret == 0)
+ WRITE_ONCE(fs_devices->offload_csum_mode,
+ val ? BTRFS_OFFLOAD_CSUM_FORCE_ON : BTRFS_OFFLOAD_CSUM_FORCE_OFF);
+ else if (ret == -EINVAL && sysfs_streq(buf, "auto"))
+ WRITE_ONCE(fs_devices->offload_csum_mode, BTRFS_OFFLOAD_CSUM_AUTO);
+ else
+ return -EINVAL;
+
+ return len;
+}
+BTRFS_ATTR_RW(, offload_csum, btrfs_offload_csum_show, btrfs_offload_csum_store);
+#endif
+
/*
* Per-filesystem information and stats.
*
@@ -1325,6 +1367,9 @@ static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, bg_reclaim_threshold),
BTRFS_ATTR_PTR(, commit_stats),
BTRFS_ATTR_PTR(, temp_fsid),
+#ifdef CONFIG_BTRFS_DEBUG
+ BTRFS_ATTR_PTR(, offload_csum),
+#endif
NULL,
};
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 86c7eef12873..e6a284c59809 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -3,8 +3,17 @@
#ifndef BTRFS_SYSFS_H
#define BTRFS_SYSFS_H
+#include <linux/types.h>
+#include <linux/compiler_types.h>
#include <linux/kobject.h>
+struct btrfs_fs_info;
+struct btrfs_device;
+struct btrfs_fs_devices;
+struct btrfs_block_group;
+struct btrfs_space_info;
+struct btrfs_qgroup;
+
enum btrfs_feature_set {
FEAT_COMPAT,
FEAT_COMPAT_RO,
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 25b3349595e0..865d4af4b303 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -11,6 +11,7 @@
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../extent_io.h"
+#include "../disk-io.h"
#include "../btrfs_inode.h"
#define PROCESS_UNLOCK (1 << 0)
@@ -105,9 +106,11 @@ static void dump_extent_io_tree(const struct extent_io_tree *tree)
}
}
-static int test_find_delalloc(u32 sectorsize)
+static int test_find_delalloc(u32 sectorsize, u32 nodesize)
{
- struct inode *inode;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root = NULL;
+ struct inode *inode = NULL;
struct extent_io_tree *tmp;
struct page *page;
struct page *locked_page = NULL;
@@ -121,12 +124,27 @@ static int test_find_delalloc(u32 sectorsize)
test_msg("running find delalloc tests");
+ fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
+ if (!fs_info) {
+ test_std_err(TEST_ALLOC_FS_INFO);
+ return -ENOMEM;
+ }
+
+ root = btrfs_alloc_dummy_root(fs_info);
+ if (IS_ERR(root)) {
+ test_std_err(TEST_ALLOC_ROOT);
+ ret = PTR_ERR(root);
+ goto out;
+ }
+
inode = btrfs_new_test_inode();
if (!inode) {
test_std_err(TEST_ALLOC_INODE);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
tmp = &BTRFS_I(inode)->io_tree;
+ BTRFS_I(inode)->root = root;
/*
* Passing NULL as we don't have fs_info but tracepoints are not used
@@ -316,6 +334,8 @@ out:
process_page_range(inode, 0, total_dirty - 1,
PROCESS_UNLOCK | PROCESS_RELEASE);
iput(inode);
+ btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
@@ -794,7 +814,7 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
test_msg("running extent I/O tests");
- ret = test_find_delalloc(sectorsize);
+ ret = test_find_delalloc(sectorsize, nodesize);
if (ret)
goto out;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 9957de9f7806..99da9d34b77a 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -258,7 +258,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* First with no extents */
BTRFS_I(inode)->root = root;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize);
if (IS_ERR(em)) {
em = NULL;
test_err("got an error when we shouldn't have");
@@ -278,7 +278,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
*/
setup_file_extents(root, sectorsize);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, (u64)-1);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -316,7 +316,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -339,7 +339,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Regular extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -367,7 +367,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* The next 3 are split extents */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -396,7 +396,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -418,7 +418,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -452,7 +452,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Prealloc extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -481,7 +481,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* The next 3 are a half written prealloc extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -511,7 +511,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -544,7 +544,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -579,7 +579,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Now for the compressed extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -613,7 +613,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Split compressed extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -648,7 +648,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -675,7 +675,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -710,7 +710,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* A hole between regular extents but no hole extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset + 6, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -737,7 +737,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, SZ_4M);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -770,7 +770,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -850,7 +850,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
insert_inode_item_key(root);
insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 2 * sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -872,7 +872,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
}
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize, 2 * sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, sectorsize, 2 * sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5b3333ceef04..46e8426adf4f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -23,12 +23,10 @@
#include "qgroup.h"
#include "block-group.h"
#include "space-info.h"
-#include "zoned.h"
#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
#include "root-tree.h"
-#include "defrag.h"
#include "dir-item.h"
#include "uuid-tree.h"
#include "ioctl.h"
@@ -564,56 +562,22 @@ static int btrfs_reserve_trans_metadata(struct btrfs_fs_info *fs_info,
u64 num_bytes,
u64 *delayed_refs_bytes)
{
- struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
struct btrfs_space_info *si = fs_info->trans_block_rsv.space_info;
- u64 extra_delayed_refs_bytes = 0;
- u64 bytes;
+ u64 bytes = num_bytes + *delayed_refs_bytes;
int ret;
/*
- * If there's a gap between the size of the delayed refs reserve and
- * its reserved space, than some tasks have added delayed refs or bumped
- * its size otherwise (due to block group creation or removal, or block
- * group item update). Also try to allocate that gap in order to prevent
- * using (and possibly abusing) the global reserve when committing the
- * transaction.
- */
- if (flush == BTRFS_RESERVE_FLUSH_ALL &&
- !btrfs_block_rsv_full(delayed_refs_rsv)) {
- spin_lock(&delayed_refs_rsv->lock);
- if (delayed_refs_rsv->size > delayed_refs_rsv->reserved)
- extra_delayed_refs_bytes = delayed_refs_rsv->size -
- delayed_refs_rsv->reserved;
- spin_unlock(&delayed_refs_rsv->lock);
- }
-
- bytes = num_bytes + *delayed_refs_bytes + extra_delayed_refs_bytes;
-
- /*
* We want to reserve all the bytes we may need all at once, so we only
* do 1 enospc flushing cycle per transaction start.
*/
ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
- if (ret == 0) {
- if (extra_delayed_refs_bytes > 0)
- btrfs_migrate_to_delayed_refs_rsv(fs_info,
- extra_delayed_refs_bytes);
- return 0;
- }
-
- if (extra_delayed_refs_bytes > 0) {
- bytes -= extra_delayed_refs_bytes;
- ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
- if (ret == 0)
- return 0;
- }
/*
* If we are an emergency flush, which can steal from the global block
* reserve, then attempt to not reserve space for the delayed refs, as
* we will consume space for them from the global block reserve.
*/
- if (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
+ if (ret && flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
bytes -= *delayed_refs_bytes;
*delayed_refs_bytes = 0;
ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
@@ -1868,7 +1832,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
key.offset = (u64)-1;
- pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
+ pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
pending->snap = NULL;
@@ -1993,19 +1957,6 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
super->uuid_tree_generation = root_item->generation;
}
-int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
-{
- struct btrfs_transaction *trans;
- int ret = 0;
-
- spin_lock(&info->trans_lock);
- trans = info->running_transaction;
- if (trans)
- ret = (trans->state >= TRANS_STATE_COMMIT_START);
- spin_unlock(&info->trans_lock);
- return ret;
-}
-
int btrfs_transaction_blocked(struct btrfs_fs_info *info)
{
struct btrfs_transaction *trans;
@@ -2720,9 +2671,7 @@ void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
int __init btrfs_transaction_init(void)
{
- btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
- sizeof(struct btrfs_trans_handle), 0,
- SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
+ btrfs_trans_handle_cachep = KMEM_CACHE(btrfs_trans_handle, SLAB_TEMPORARY);
if (!btrfs_trans_handle_cachep)
return -ENOMEM;
return 0;
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 2bf8bbdfd0b3..4e451ab173b1 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -6,12 +6,27 @@
#ifndef BTRFS_TRANSACTION_H
#define BTRFS_TRANSACTION_H
+#include <linux/atomic.h>
#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/time64.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
#include "btrfs_inode.h"
#include "delayed-ref.h"
-#include "ctree.h"
+#include "extent-io-tree.h"
+#include "block-rsv.h"
+#include "messages.h"
#include "misc.h"
+struct dentry;
+struct inode;
+struct btrfs_pending_snapshot;
+struct btrfs_fs_info;
+struct btrfs_root_item;
+struct btrfs_root;
+struct btrfs_path;
+
/* Radix-tree tag for roots that are part of the trasaction. */
#define BTRFS_ROOT_TRANS_TAG 0
@@ -262,7 +277,6 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_wait_tree_log_extents(struct btrfs_root *root, int mark);
int btrfs_transaction_blocked(struct btrfs_fs_info *info);
-int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
void btrfs_put_transaction(struct btrfs_transaction *transaction);
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 50fdc69fdddf..c8fbcae4e88e 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -21,7 +21,6 @@
#include "messages.h"
#include "ctree.h"
#include "tree-checker.h"
-#include "disk-io.h"
#include "compression.h"
#include "volumes.h"
#include "misc.h"
@@ -30,7 +29,6 @@
#include "file-item.h"
#include "inode-item.h"
#include "dir-item.h"
-#include "raid-stripe-tree.h"
#include "extent-tree.h"
/*
@@ -67,6 +65,7 @@ static void generic_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -94,6 +93,7 @@ static void file_extent_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -154,6 +154,7 @@ static void dir_item_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -649,6 +650,7 @@ static void block_group_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -1005,6 +1007,7 @@ static void dev_item_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(eb->fs_info,
"corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -1260,6 +1263,7 @@ static void extent_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(eb->fs_info,
"corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -1436,7 +1440,7 @@ static int check_extent_item(struct extent_buffer *leaf,
if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %u end %lu",
- ptr, inline_type, end);
+ ptr, btrfs_extent_inline_ref_size(inline_type), end);
return -EUCLEAN;
}
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
index 14b9fbe82da4..5c809b50b2d0 100644
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -6,10 +6,12 @@
#ifndef BTRFS_TREE_CHECKER_H
#define BTRFS_TREE_CHECKER_H
+#include <linux/types.h>
#include <uapi/linux/btrfs_tree.h>
struct extent_buffer;
struct btrfs_chunk;
+struct btrfs_key;
/* All the extra info needed to verify the parentness of a tree block. */
struct btrfs_tree_parent_check {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 331fc7429952..472918a5bc73 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -13,13 +13,11 @@
#include "tree-log.h"
#include "disk-io.h"
#include "locking.h"
-#include "print-tree.h"
#include "backref.h"
#include "compression.h"
#include "qgroup.h"
#include "block-group.h"
#include "space-info.h"
-#include "zoned.h"
#include "inode-item.h"
#include "fs.h"
#include "accessors.h"
@@ -2820,6 +2818,52 @@ static void wait_for_writer(struct btrfs_root *root)
finish_wait(&root->log_writer_wait, &wait);
}
+void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode)
+{
+ ctx->log_ret = 0;
+ ctx->log_transid = 0;
+ ctx->log_new_dentries = false;
+ ctx->logging_new_name = false;
+ ctx->logging_new_delayed_dentries = false;
+ ctx->logged_before = false;
+ ctx->inode = inode;
+ INIT_LIST_HEAD(&ctx->list);
+ INIT_LIST_HEAD(&ctx->ordered_extents);
+ INIT_LIST_HEAD(&ctx->conflict_inodes);
+ ctx->num_conflict_inodes = 0;
+ ctx->logging_conflict_inodes = false;
+ ctx->scratch_eb = NULL;
+}
+
+void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+
+ if (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
+ !test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags))
+ return;
+
+ /*
+ * Don't care about allocation failure. This is just for optimization,
+ * if we fail to allocate here, we will try again later if needed.
+ */
+ ctx->scratch_eb = alloc_dummy_extent_buffer(inode->root->fs_info, 0);
+}
+
+void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_ordered_extent *ordered;
+ struct btrfs_ordered_extent *tmp;
+
+ ASSERT(inode_is_locked(ctx->inode));
+
+ list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
+ list_del_init(&ordered->log_list);
+ btrfs_put_ordered_extent(ordered);
+ }
+}
+
+
static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
struct btrfs_log_ctx *ctx)
{
@@ -3619,6 +3663,30 @@ out:
return ret;
}
+static int clone_leaf(struct btrfs_path *path, struct btrfs_log_ctx *ctx)
+{
+ const int slot = path->slots[0];
+
+ if (ctx->scratch_eb) {
+ copy_extent_buffer_full(ctx->scratch_eb, path->nodes[0]);
+ } else {
+ ctx->scratch_eb = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!ctx->scratch_eb)
+ return -ENOMEM;
+ }
+
+ btrfs_release_path(path);
+ path->nodes[0] = ctx->scratch_eb;
+ path->slots[0] = slot;
+ /*
+ * Add extra ref to scratch eb so that it is not freed when callers
+ * release the path, so we can reuse it later if needed.
+ */
+ atomic_inc(&ctx->scratch_eb->refs);
+
+ return 0;
+}
+
static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *path,
@@ -3633,23 +3701,20 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
bool last_found = false;
int batch_start = 0;
int batch_size = 0;
- int i;
+ int ret;
/*
* We need to clone the leaf, release the read lock on it, and use the
* clone before modifying the log tree. See the comment at copy_items()
* about why we need to do this.
*/
- src = btrfs_clone_extent_buffer(path->nodes[0]);
- if (!src)
- return -ENOMEM;
+ ret = clone_leaf(path, ctx);
+ if (ret < 0)
+ return ret;
- i = path->slots[0];
- btrfs_release_path(path);
- path->nodes[0] = src;
- path->slots[0] = i;
+ src = path->nodes[0];
- for (; i < nritems; i++) {
+ for (int i = path->slots[0]; i < nritems; i++) {
struct btrfs_dir_item *di;
struct btrfs_key key;
int ret;
@@ -4259,17 +4324,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_path *dst_path,
struct btrfs_path *src_path,
int start_slot, int nr, int inode_only,
- u64 logged_isize)
+ u64 logged_isize, struct btrfs_log_ctx *ctx)
{
struct btrfs_root *log = inode->root->log_root;
struct btrfs_file_extent_item *extent;
struct extent_buffer *src;
- int ret = 0;
+ int ret;
struct btrfs_key *ins_keys;
u32 *ins_sizes;
struct btrfs_item_batch batch;
char *ins_data;
- int i;
int dst_index;
const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
const u64 i_size = i_size_read(&inode->vfs_inode);
@@ -4302,14 +4366,11 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
* while the other is holding the delayed node's mutex and wants to
* write lock the same subvolume leaf for flushing delayed items.
*/
- src = btrfs_clone_extent_buffer(src_path->nodes[0]);
- if (!src)
- return -ENOMEM;
+ ret = clone_leaf(src_path, ctx);
+ if (ret < 0)
+ return ret;
- i = src_path->slots[0];
- btrfs_release_path(src_path);
- src_path->nodes[0] = src;
- src_path->slots[0] = i;
+ src = src_path->nodes[0];
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
@@ -4324,7 +4385,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
batch.nr = 0;
dst_index = 0;
- for (i = 0; i < nr; i++) {
+ for (int i = 0; i < nr; i++) {
const int src_slot = start_slot + i;
struct btrfs_root *csum_root;
struct btrfs_ordered_sum *sums;
@@ -4431,7 +4492,7 @@ add_to_batch:
goto out;
dst_index = 0;
- for (i = 0; i < nr; i++) {
+ for (int i = 0; i < nr; i++) {
const int src_slot = start_slot + i;
const int dst_slot = dst_path->slots[0] + dst_index;
struct btrfs_key key;
@@ -4704,7 +4765,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
*/
static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ struct btrfs_log_ctx *ctx)
{
struct btrfs_root *root = inode->root;
struct btrfs_key key;
@@ -4770,7 +4832,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
if (slot >= btrfs_header_nritems(leaf)) {
if (ins_nr > 0) {
ret = copy_items(trans, inode, dst_path, path,
- start_slot, ins_nr, 1, 0);
+ start_slot, ins_nr, 1, 0, ctx);
if (ret < 0)
goto out;
ins_nr = 0;
@@ -4820,7 +4882,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
}
if (ins_nr > 0)
ret = copy_items(trans, inode, dst_path, path,
- start_slot, ins_nr, 1, 0);
+ start_slot, ins_nr, 1, 0, ctx);
out:
btrfs_release_path(path);
btrfs_free_path(dst_path);
@@ -4899,7 +4961,7 @@ process:
write_unlock(&tree->lock);
if (!ret)
- ret = btrfs_log_prealloc_extents(trans, inode, path);
+ ret = btrfs_log_prealloc_extents(trans, inode, path, ctx);
if (ret)
return ret;
@@ -4980,7 +5042,8 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *path,
- struct btrfs_path *dst_path)
+ struct btrfs_path *dst_path,
+ struct btrfs_log_ctx *ctx)
{
struct btrfs_root *root = inode->root;
int ret;
@@ -5009,7 +5072,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
if (slot >= nritems) {
if (ins_nr > 0) {
ret = copy_items(trans, inode, dst_path, path,
- start_slot, ins_nr, 1, 0);
+ start_slot, ins_nr, 1, 0, ctx);
if (ret < 0)
return ret;
ins_nr = 0;
@@ -5035,7 +5098,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
}
if (ins_nr > 0) {
ret = copy_items(trans, inode, dst_path, path,
- start_slot, ins_nr, 1, 0);
+ start_slot, ins_nr, 1, 0, ctx);
if (ret < 0)
return ret;
}
@@ -5847,7 +5910,7 @@ again:
}
ret = copy_items(trans, inode, dst_path, path,
ins_start_slot, ins_nr,
- inode_only, logged_isize);
+ inode_only, logged_isize, ctx);
if (ret < 0)
return ret;
ins_nr = 0;
@@ -5866,7 +5929,7 @@ again:
goto next_slot;
ret = copy_items(trans, inode, dst_path, path,
ins_start_slot,
- ins_nr, inode_only, logged_isize);
+ ins_nr, inode_only, logged_isize, ctx);
if (ret < 0)
return ret;
ins_nr = 0;
@@ -5883,7 +5946,7 @@ again:
}
ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
- ins_nr, inode_only, logged_isize);
+ ins_nr, inode_only, logged_isize, ctx);
if (ret < 0)
return ret;
ins_nr = 1;
@@ -5898,7 +5961,7 @@ next_slot:
if (ins_nr) {
ret = copy_items(trans, inode, dst_path, path,
ins_start_slot, ins_nr, inode_only,
- logged_isize);
+ logged_isize, ctx);
if (ret < 0)
return ret;
ins_nr = 0;
@@ -5923,7 +5986,7 @@ next_key:
}
if (ins_nr) {
ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
- ins_nr, inode_only, logged_isize);
+ ins_nr, inode_only, logged_isize, ctx);
if (ret)
return ret;
}
@@ -5934,7 +5997,7 @@ next_key:
* lock the same leaf with btrfs_log_prealloc_extents() below.
*/
btrfs_release_path(path);
- ret = btrfs_log_prealloc_extents(trans, inode, dst_path);
+ ret = btrfs_log_prealloc_extents(trans, inode, dst_path, ctx);
}
return ret;
@@ -6526,7 +6589,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
btrfs_release_path(dst_path);
- ret = btrfs_log_all_xattrs(trans, inode, path, dst_path);
+ ret = btrfs_log_all_xattrs(trans, inode, path, dst_path, ctx);
if (ret)
goto out_unlock;
xattrs_logged = true;
@@ -6553,7 +6616,7 @@ log_extents:
* BTRFS_INODE_COPY_EVERYTHING set.
*/
if (!xattrs_logged && inode->logged_trans < trans->transid) {
- ret = btrfs_log_all_xattrs(trans, inode, path, dst_path);
+ ret = btrfs_log_all_xattrs(trans, inode, path, dst_path, ctx);
if (ret)
goto out_unlock;
btrfs_release_path(path);
@@ -7502,6 +7565,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
ctx.logging_new_name = true;
+ btrfs_init_log_ctx_scratch_eb(&ctx);
/*
* We don't care about the return value. If we fail to log the new name
* then we know the next attempt to sync the log will fallback to a full
@@ -7510,6 +7574,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
* inconsistent state after a rename operation.
*/
btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx);
+ free_extent_buffer(ctx.scratch_eb);
ASSERT(list_empty(&ctx.conflict_inodes));
out:
/*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index a550a8a375cd..22e9cbc81577 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -6,10 +6,18 @@
#ifndef BTRFS_TREE_LOG_H
#define BTRFS_TREE_LOG_H
+#include <linux/list.h>
+#include <linux/fs.h>
#include "messages.h"
#include "ctree.h"
#include "transaction.h"
+struct inode;
+struct dentry;
+struct btrfs_ordered_extent;
+struct btrfs_root;
+struct btrfs_trans_handle;
+
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
#define BTRFS_NO_LOG_SYNC 256
@@ -36,37 +44,20 @@ struct btrfs_log_ctx {
struct list_head conflict_inodes;
int num_conflict_inodes;
bool logging_conflict_inodes;
+ /*
+ * Used for fsyncs that need to copy items from the subvolume tree to
+ * the log tree (full sync flag set or copy everything flag set) to
+ * avoid allocating a temporary extent buffer while holding a lock on
+ * an extent buffer of the subvolume tree and under the log transaction.
+ * Also helps to avoid allocating and freeing a temporary extent buffer
+ * in case we need to process multiple leaves from the subvolume tree.
+ */
+ struct extent_buffer *scratch_eb;
};
-static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
- struct inode *inode)
-{
- ctx->log_ret = 0;
- ctx->log_transid = 0;
- ctx->log_new_dentries = false;
- ctx->logging_new_name = false;
- ctx->logging_new_delayed_dentries = false;
- ctx->logged_before = false;
- ctx->inode = inode;
- INIT_LIST_HEAD(&ctx->list);
- INIT_LIST_HEAD(&ctx->ordered_extents);
- INIT_LIST_HEAD(&ctx->conflict_inodes);
- ctx->num_conflict_inodes = 0;
- ctx->logging_conflict_inodes = false;
-}
-
-static inline void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)
-{
- struct btrfs_ordered_extent *ordered;
- struct btrfs_ordered_extent *tmp;
-
- ASSERT(inode_is_locked(ctx->inode));
-
- list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
- list_del_init(&ordered->log_list);
- btrfs_put_ordered_extent(ordered);
- }
-}
+void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode);
+void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx);
+void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx);
static inline void btrfs_set_log_full_commit(struct btrfs_trans_handle *trans)
{
diff --git a/fs/btrfs/tree-mod-log.c b/fs/btrfs/tree-mod-log.c
index 3df6153d5d5a..43b3accbed7a 100644
--- a/fs/btrfs/tree-mod-log.c
+++ b/fs/btrfs/tree-mod-log.c
@@ -44,7 +44,7 @@ struct tree_mod_elem {
/*
* Pull a new tree mod seq number for our operation.
*/
-static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
+static u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic64_inc_return(&fs_info->tree_mod_seq);
}
@@ -170,8 +170,7 @@ static noinline int tree_mod_log_insert(struct btrfs_fs_info *fs_info,
* this until all tree mod log insertions are recorded in the rb tree and then
* write unlock fs_info::tree_mod_log_lock.
*/
-static inline bool tree_mod_dont_log(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb)
+static bool tree_mod_dont_log(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
return true;
@@ -188,7 +187,7 @@ static inline bool tree_mod_dont_log(struct btrfs_fs_info *fs_info,
}
/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
-static inline bool tree_mod_need_log(const struct btrfs_fs_info *fs_info,
+static bool tree_mod_need_log(const struct btrfs_fs_info *fs_info,
struct extent_buffer *eb)
{
if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
@@ -367,9 +366,9 @@ free_tms:
return ret;
}
-static inline int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
- struct tree_mod_elem **tm_list,
- int nritems)
+static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+ struct tree_mod_elem **tm_list,
+ int nritems)
{
int i, j;
int ret;
diff --git a/fs/btrfs/tree-mod-log.h b/fs/btrfs/tree-mod-log.h
index 94f10afeee97..ff00c8e8a393 100644
--- a/fs/btrfs/tree-mod-log.h
+++ b/fs/btrfs/tree-mod-log.h
@@ -3,7 +3,13 @@
#ifndef BTRFS_TREE_MOD_LOG_H
#define BTRFS_TREE_MOD_LOG_H
-#include "ctree.h"
+#include <linux/list.h>
+
+struct extent_buffer;
+struct btrfs_fs_info;
+struct btrfs_path;
+struct btrfs_root;
+struct btrfs_seq_list;
/* Represents a tree mod log user. */
struct btrfs_seq_list {
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index b4ac2b0cd235..183863f4bfa4 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -7,7 +7,6 @@
#include <linux/slab.h>
#include "messages.h"
#include "ulist.h"
-#include "ctree.h"
/*
* ulist is a generic data structure to hold a collection of unique u64
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index b2cef187ea8e..8e200fe1a2dd 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -7,6 +7,7 @@
#ifndef BTRFS_ULIST_H
#define BTRFS_ULIST_H
+#include <linux/types.h>
#include <linux/list.h>
#include <linux/rbtree.h>
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 5be74f9e47eb..b0aff297d67d 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -9,7 +9,6 @@
#include "ctree.h"
#include "transaction.h"
#include "disk-io.h"
-#include "print-tree.h"
#include "fs.h"
#include "accessors.h"
#include "uuid-tree.h"
@@ -114,7 +113,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
ret = btrfs_insert_empty_item(trans, uuid_root, path, &key,
sizeof(subid_le));
- if (ret >= 0) {
+ if (ret == 0) {
/* Add an item for the type for the first time */
eb = path->nodes[0];
slot = path->slots[0];
diff --git a/fs/btrfs/uuid-tree.h b/fs/btrfs/uuid-tree.h
index 5350c87fe2ca..080ede0227ae 100644
--- a/fs/btrfs/uuid-tree.h
+++ b/fs/btrfs/uuid-tree.h
@@ -3,6 +3,11 @@
#ifndef BTRFS_UUID_TREE_H
#define BTRFS_UUID_TREE_H
+#include <linux/types.h>
+
+struct btrfs_trans_handle;
+struct btrfs_fs_info;
+
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid);
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c
index 66e2270b0dae..4042dd6437ae 100644
--- a/fs/btrfs/verity.c
+++ b/fs/btrfs/verity.c
@@ -14,7 +14,6 @@
#include "ctree.h"
#include "btrfs_inode.h"
#include "transaction.h"
-#include "disk-io.h"
#include "locking.h"
#include "fs.h"
#include "accessors.h"
diff --git a/fs/btrfs/verity.h b/fs/btrfs/verity.h
index 91c10f7d0a46..d696659e43e4 100644
--- a/fs/btrfs/verity.h
+++ b/fs/btrfs/verity.h
@@ -3,8 +3,13 @@
#ifndef BTRFS_VERITY_H
#define BTRFS_VERITY_H
+struct inode;
+struct btrfs_inode;
+
#ifdef CONFIG_FS_VERITY
+#include <linux/fsverity.h>
+
extern const struct fsverity_operations btrfs_verityops;
int btrfs_drop_verity_items(struct btrfs_inode *inode);
@@ -12,6 +17,8 @@ int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size)
#else
+#include <linux/errno.h>
+
static inline int btrfs_drop_verity_items(struct btrfs_inode *inode)
{
return 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4c32497311d2..a2d07fa3cfdf 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -14,10 +14,8 @@
#include <linux/namei.h>
#include "misc.h"
#include "ctree.h"
-#include "extent_map.h"
#include "disk-io.h"
#include "transaction.h"
-#include "print-tree.h"
#include "volumes.h"
#include "raid56.h"
#include "rcu-string.h"
@@ -468,39 +466,39 @@ static noinline struct btrfs_fs_devices *find_fsid(
static int
btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
- int flush, struct bdev_handle **bdev_handle,
+ int flush, struct file **bdev_file,
struct btrfs_super_block **disk_super)
{
struct block_device *bdev;
int ret;
- *bdev_handle = bdev_open_by_path(device_path, flags, holder, NULL);
+ *bdev_file = bdev_file_open_by_path(device_path, flags, holder, NULL);
- if (IS_ERR(*bdev_handle)) {
- ret = PTR_ERR(*bdev_handle);
+ if (IS_ERR(*bdev_file)) {
+ ret = PTR_ERR(*bdev_file);
goto error;
}
- bdev = (*bdev_handle)->bdev;
+ bdev = file_bdev(*bdev_file);
if (flush)
sync_blockdev(bdev);
ret = set_blocksize(bdev, BTRFS_BDEV_BLOCKSIZE);
if (ret) {
- bdev_release(*bdev_handle);
+ fput(*bdev_file);
goto error;
}
invalidate_bdev(bdev);
*disk_super = btrfs_read_dev_super(bdev);
if (IS_ERR(*disk_super)) {
ret = PTR_ERR(*disk_super);
- bdev_release(*bdev_handle);
+ fput(*bdev_file);
goto error;
}
return 0;
error:
- *bdev_handle = NULL;
+ *bdev_file = NULL;
return ret;
}
@@ -643,7 +641,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *device, blk_mode_t flags,
void *holder)
{
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct btrfs_super_block *disk_super;
u64 devid;
int ret;
@@ -654,7 +652,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
return -EINVAL;
ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
- &bdev_handle, &disk_super);
+ &bdev_file, &disk_super);
if (ret)
return ret;
@@ -678,20 +676,20 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
fs_devices->seeding = true;
} else {
- if (bdev_read_only(bdev_handle->bdev))
+ if (bdev_read_only(file_bdev(bdev_file)))
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
else
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
}
- if (!bdev_nonrot(bdev_handle->bdev))
+ if (!bdev_nonrot(file_bdev(bdev_file)))
fs_devices->rotating = true;
- if (bdev_max_discard_sectors(bdev_handle->bdev))
+ if (bdev_max_discard_sectors(file_bdev(bdev_file)))
fs_devices->discardable = true;
- device->bdev_handle = bdev_handle;
- device->bdev = bdev_handle->bdev;
+ device->bdev_file = bdev_file;
+ device->bdev = file_bdev(bdev_file);
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
fs_devices->open_devices++;
@@ -706,7 +704,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
error_free_page:
btrfs_release_disk_super(disk_super);
- bdev_release(bdev_handle);
+ fput(bdev_file);
return -EINVAL;
}
@@ -769,8 +767,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (same_fsid_diff_dev) {
generate_random_uuid(fs_devices->fsid);
fs_devices->temp_fsid = true;
- pr_info("BTRFS: device %s using temp-fsid %pU\n",
- path, fs_devices->fsid);
+ pr_info("BTRFS: device %s (%d:%d) using temp-fsid %pU\n",
+ path, MAJOR(path_devt), MINOR(path_devt),
+ fs_devices->fsid);
}
mutex_lock(&fs_devices->device_list_mutex);
@@ -799,8 +798,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (fs_devices->opened) {
btrfs_err(NULL,
-"device %s belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)",
- path, fs_devices->fsid, current->comm,
+"device %s (%d:%d) belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)",
+ path, MAJOR(path_devt), MINOR(path_devt),
+ fs_devices->fsid, current->comm,
task_pid_nr(current));
mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-EBUSY);
@@ -826,13 +826,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (disk_super->label[0])
pr_info(
- "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n",
+"BTRFS: device label %s devid %llu transid %llu %s (%d:%d) scanned by %s (%d)\n",
disk_super->label, devid, found_transid, path,
+ MAJOR(path_devt), MINOR(path_devt),
current->comm, task_pid_nr(current));
else
pr_info(
- "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n",
+"BTRFS: device fsid %pU devid %llu transid %llu %s (%d:%d) scanned by %s (%d)\n",
disk_super->fsid, devid, found_transid, path,
+ MAJOR(path_devt), MINOR(path_devt),
current->comm, task_pid_nr(current));
} else if (!device->name || strcmp(device->name->str, path)) {
@@ -1015,10 +1017,10 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
if (device->devid == BTRFS_DEV_REPLACE_DEVID)
continue;
- if (device->bdev_handle) {
- bdev_release(device->bdev_handle);
+ if (device->bdev_file) {
+ fput(device->bdev_file);
device->bdev = NULL;
- device->bdev_handle = NULL;
+ device->bdev_file = NULL;
fs_devices->open_devices--;
}
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
@@ -1063,7 +1065,7 @@ static void btrfs_close_bdev(struct btrfs_device *device)
invalidate_bdev(device->bdev);
}
- bdev_release(device->bdev_handle);
+ fput(device->bdev_file);
}
static void btrfs_close_one_device(struct btrfs_device *device)
@@ -1316,7 +1318,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
struct btrfs_super_block *disk_super;
bool new_device_added = false;
struct btrfs_device *device = NULL;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
u64 bytenr, bytenr_orig;
int ret;
@@ -1339,18 +1341,18 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
* values temporarily, as the device paths of the fsid are the only
* required information for assembling the volume.
*/
- bdev_handle = bdev_open_by_path(path, flags, NULL, NULL);
- if (IS_ERR(bdev_handle))
- return ERR_CAST(bdev_handle);
+ bdev_file = bdev_file_open_by_path(path, flags, NULL, NULL);
+ if (IS_ERR(bdev_file))
+ return ERR_CAST(bdev_file);
bytenr_orig = btrfs_sb_offset(0);
- ret = btrfs_sb_log_location_bdev(bdev_handle->bdev, 0, READ, &bytenr);
+ ret = btrfs_sb_log_location_bdev(file_bdev(bdev_file), 0, READ, &bytenr);
if (ret) {
device = ERR_PTR(ret);
goto error_bdev_put;
}
- disk_super = btrfs_read_disk_super(bdev_handle->bdev, bytenr,
+ disk_super = btrfs_read_disk_super(file_bdev(bdev_file), bytenr,
bytenr_orig);
if (IS_ERR(disk_super)) {
device = ERR_CAST(disk_super);
@@ -1368,7 +1370,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
else
btrfs_free_stale_devices(devt, NULL);
- pr_debug("BTRFS: skip registering single non-seed device %s\n", path);
+ pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n",
+ path, MAJOR(devt), MINOR(devt));
device = NULL;
goto free_disk_super;
}
@@ -1381,7 +1384,7 @@ free_disk_super:
btrfs_release_disk_super(disk_super);
error_bdev_put:
- bdev_release(bdev_handle);
+ fput(bdev_file);
return device;
}
@@ -1403,7 +1406,7 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
if (in_range(physical_start, *start, len) ||
in_range(*start, physical_start,
- physical_end - physical_start)) {
+ physical_end + 1 - physical_start)) {
*start = physical_end + 1;
return true;
}
@@ -2032,11 +2035,10 @@ static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info,
copy_num, ret);
}
-void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
- struct block_device *bdev,
- const char *device_path)
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_device *device)
{
int copy_num;
+ struct block_device *bdev = device->bdev;
if (!bdev)
return;
@@ -2052,12 +2054,12 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
/* Update ctime/mtime for device path for libblkid */
- update_dev_time(device_path);
+ update_dev_time(device->name->str);
}
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_dev_lookup_args *args,
- struct bdev_handle **bdev_handle)
+ struct file **bdev_file)
{
struct btrfs_trans_handle *trans;
struct btrfs_device *device;
@@ -2166,7 +2168,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
btrfs_assign_next_active_device(device, NULL);
- if (device->bdev_handle) {
+ if (device->bdev_file) {
cur_devices->open_devices--;
/* remove sysfs entry */
btrfs_sysfs_remove_device(device);
@@ -2182,20 +2184,19 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
* free the device.
*
* We cannot call btrfs_close_bdev() here because we're holding the sb
- * write lock, and bdev_release() will pull in the ->open_mutex on
- * the block device and it's dependencies. Instead just flush the
- * device and let the caller do the final bdev_release.
+ * write lock, and fput() on the block device will pull in the
+ * ->open_mutex on the block device and it's dependencies. Instead
+ * just flush the device and let the caller do the final bdev_release.
*/
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
- btrfs_scratch_superblocks(fs_info, device->bdev,
- device->name->str);
+ btrfs_scratch_superblocks(fs_info, device);
if (device->bdev) {
sync_blockdev(device->bdev);
invalidate_bdev(device->bdev);
}
}
- *bdev_handle = device->bdev_handle;
+ *bdev_file = device->bdev_file;
synchronize_rcu();
btrfs_free_device(device);
@@ -2301,8 +2302,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
mutex_unlock(&fs_devices->device_list_mutex);
- btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev,
- tgtdev->name->str);
+ btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev);
btrfs_close_bdev(tgtdev);
synchronize_rcu();
@@ -2332,7 +2332,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
const char *path)
{
struct btrfs_super_block *disk_super;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
int ret;
if (!path || !path[0])
@@ -2350,7 +2350,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
}
ret = btrfs_get_bdev_and_sb(path, BLK_OPEN_READ, NULL, 0,
- &bdev_handle, &disk_super);
+ &bdev_file, &disk_super);
if (ret) {
btrfs_put_dev_args_from_path(args);
return ret;
@@ -2363,7 +2363,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
else
memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
btrfs_release_disk_super(disk_super);
- bdev_release(bdev_handle);
+ fput(bdev_file);
return 0;
}
@@ -2583,7 +2583,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
struct btrfs_root *root = fs_info->dev_root;
struct btrfs_trans_handle *trans;
struct btrfs_device *device;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct super_block *sb = fs_info->sb;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_fs_devices *seed_devices = NULL;
@@ -2596,12 +2596,12 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (sb_rdonly(sb) && !fs_devices->seeding)
return -EROFS;
- bdev_handle = bdev_open_by_path(device_path, BLK_OPEN_WRITE,
+ bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE,
fs_info->bdev_holder, NULL);
- if (IS_ERR(bdev_handle))
- return PTR_ERR(bdev_handle);
+ if (IS_ERR(bdev_file))
+ return PTR_ERR(bdev_file);
- if (!btrfs_check_device_zone_type(fs_info, bdev_handle->bdev)) {
+ if (!btrfs_check_device_zone_type(fs_info, file_bdev(bdev_file))) {
ret = -EINVAL;
goto error;
}
@@ -2613,11 +2613,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
locked = true;
}
- sync_blockdev(bdev_handle->bdev);
+ sync_blockdev(file_bdev(bdev_file));
rcu_read_lock();
list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
- if (device->bdev == bdev_handle->bdev) {
+ if (device->bdev == file_bdev(bdev_file)) {
ret = -EEXIST;
rcu_read_unlock();
goto error;
@@ -2633,8 +2633,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
}
device->fs_info = fs_info;
- device->bdev_handle = bdev_handle;
- device->bdev = bdev_handle->bdev;
+ device->bdev_file = bdev_file;
+ device->bdev = file_bdev(bdev_file);
ret = lookup_bdev(device_path, &device->devt);
if (ret)
goto error_free_device;
@@ -2817,7 +2817,7 @@ error_free_zone:
error_free_device:
btrfs_free_device(device);
error:
- bdev_release(bdev_handle);
+ fput(bdev_file);
if (locked) {
mutex_unlock(&uuid_mutex);
up_write(&sb->s_umount);
@@ -3087,7 +3087,6 @@ struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
map = btrfs_find_chunk_map(fs_info, logical, length);
if (unlikely(!map)) {
- read_unlock(&fs_info->mapping_tree_lock);
btrfs_crit(fs_info,
"unable to find chunk map for logical %llu length %llu",
logical, length);
@@ -3095,7 +3094,6 @@ struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
}
if (unlikely(map->start > logical || map->start + map->chunk_len <= logical)) {
- read_unlock(&fs_info->mapping_tree_lock);
btrfs_crit(fs_info,
"found a bad chunk map, wanted %llu-%llu, found %llu-%llu",
logical, logical + length, map->start,
@@ -3395,7 +3393,17 @@ again:
mutex_unlock(&fs_info->reclaim_bgs_lock);
goto error;
}
- BUG_ON(ret == 0); /* Corruption */
+ if (ret == 0) {
+ /*
+ * On the first search we would find chunk tree with
+ * offset -1, which is not possible. On subsequent
+ * loops this would find an existing item on an invalid
+ * offset (one less than the previous one, wrong
+ * alignment and size).
+ */
+ ret = -EUCLEAN;
+ goto error;
+ }
ret = btrfs_previous_item(chunk_root, path, key.objectid,
key.type);
@@ -3482,6 +3490,44 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
return 0;
}
+static void btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
+ const struct btrfs_disk_balance_args *disk)
+{
+ memset(cpu, 0, sizeof(*cpu));
+
+ cpu->profiles = le64_to_cpu(disk->profiles);
+ cpu->usage = le64_to_cpu(disk->usage);
+ cpu->devid = le64_to_cpu(disk->devid);
+ cpu->pstart = le64_to_cpu(disk->pstart);
+ cpu->pend = le64_to_cpu(disk->pend);
+ cpu->vstart = le64_to_cpu(disk->vstart);
+ cpu->vend = le64_to_cpu(disk->vend);
+ cpu->target = le64_to_cpu(disk->target);
+ cpu->flags = le64_to_cpu(disk->flags);
+ cpu->limit = le64_to_cpu(disk->limit);
+ cpu->stripes_min = le32_to_cpu(disk->stripes_min);
+ cpu->stripes_max = le32_to_cpu(disk->stripes_max);
+}
+
+static void btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
+ const struct btrfs_balance_args *cpu)
+{
+ memset(disk, 0, sizeof(*disk));
+
+ disk->profiles = cpu_to_le64(cpu->profiles);
+ disk->usage = cpu_to_le64(cpu->usage);
+ disk->devid = cpu_to_le64(cpu->devid);
+ disk->pstart = cpu_to_le64(cpu->pstart);
+ disk->pend = cpu_to_le64(cpu->pend);
+ disk->vstart = cpu_to_le64(cpu->vstart);
+ disk->vend = cpu_to_le64(cpu->vend);
+ disk->target = cpu_to_le64(cpu->target);
+ disk->flags = cpu_to_le64(cpu->flags);
+ disk->limit = cpu_to_le64(cpu->limit);
+ disk->stripes_min = cpu_to_le32(cpu->stripes_min);
+ disk->stripes_max = cpu_to_le32(cpu->stripes_max);
+}
+
static int insert_balance_item(struct btrfs_fs_info *fs_info,
struct btrfs_balance_control *bctl)
{
@@ -3626,7 +3672,7 @@ static void reset_balance_state(struct btrfs_fs_info *fs_info)
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
int ret;
- BUG_ON(!fs_info->balance_ctl);
+ ASSERT(fs_info->balance_ctl);
spin_lock(&fs_info->balance_lock);
fs_info->balance_ctl = NULL;
@@ -5946,6 +5992,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
struct btrfs_chunk_map *map, int first,
int dev_replace_is_ongoing)
{
+ const enum btrfs_read_policy policy = READ_ONCE(fs_info->fs_devices->read_policy);
int i;
int num_stripes;
int preferred_mirror;
@@ -5960,13 +6007,12 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
else
num_stripes = map->num_stripes;
- switch (fs_info->fs_devices->read_policy) {
+ switch (policy) {
default:
/* Shouldn't happen, just warn and use pid instead of failing */
- btrfs_warn_rl(fs_info,
- "unknown read_policy type %u, reset to pid",
- fs_info->fs_devices->read_policy);
- fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID;
+ btrfs_warn_rl(fs_info, "unknown read_policy type %u, reset to pid",
+ policy);
+ WRITE_ONCE(fs_info->fs_devices->read_policy, BTRFS_READ_POLICY_PID);
fallthrough;
case BTRFS_READ_POLICY_PID:
preferred_mirror = first + (current->pid % num_stripes);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 53f87f398da7..93854609a4d5 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -6,13 +6,28 @@
#ifndef BTRFS_VOLUMES_H
#define BTRFS_VOLUMES_H
+#include <linux/blk_types.h>
+#include <linux/sizes.h>
+#include <linux/atomic.h>
#include <linux/sort.h>
-#include <linux/btrfs.h>
-#include "async-thread.h"
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/log2.h>
+#include <linux/kobject.h>
+#include <linux/refcount.h>
+#include <linux/completion.h>
+#include <linux/rbtree.h>
+#include <uapi/linux/btrfs.h>
#include "messages.h"
-#include "tree-checker.h"
#include "rcu-string.h"
+struct block_device;
+struct bdev_handle;
+struct btrfs_fs_info;
+struct btrfs_block_group;
+struct btrfs_trans_handle;
+struct btrfs_zoned_device_info;
+
#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
extern struct mutex uuid_mutex;
@@ -77,7 +92,7 @@ enum btrfs_raid_types {
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
#define BTRFS_DEV_STATE_NO_READA (5)
-struct btrfs_zoned_device_info;
+struct btrfs_fs_devices;
struct btrfs_device {
struct list_head dev_list; /* device_list_mutex */
@@ -90,7 +105,7 @@ struct btrfs_device {
u64 generation;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct block_device *bdev;
struct btrfs_zoned_device_info *zone_info;
@@ -276,6 +291,25 @@ enum btrfs_read_policy {
BTRFS_NR_READ_POLICY,
};
+#ifdef CONFIG_BTRFS_DEBUG
+/*
+ * Checksum mode - offload it to workqueues or do it synchronously in
+ * btrfs_submit_chunk().
+ */
+enum btrfs_offload_csum_mode {
+ /*
+ * Choose offloading checksum or do it synchronously automatically.
+ * Do it synchronously if the checksum is fast, or offload to workqueues
+ * otherwise.
+ */
+ BTRFS_OFFLOAD_CSUM_AUTO,
+ /* Always offload checksum to workqueues. */
+ BTRFS_OFFLOAD_CSUM_FORCE_ON,
+ /* Never offload checksum to workqueues. */
+ BTRFS_OFFLOAD_CSUM_FORCE_OFF,
+};
+#endif
+
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
@@ -380,6 +414,11 @@ struct btrfs_fs_devices {
/* Policy used to read the mirrored stripes. */
enum btrfs_read_policy read_policy;
+
+#ifdef CONFIG_BTRFS_DEBUG
+ /* Checksum mode - offload it or do it synchronously. */
+ enum btrfs_offload_csum_mode offload_csum_mode;
+#endif
};
#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
@@ -557,8 +596,6 @@ static inline void btrfs_free_chunk_map(struct btrfs_chunk_map *map)
}
}
-struct btrfs_balance_args;
-struct btrfs_balance_progress;
struct btrfs_balance_control {
struct btrfs_balance_args data;
struct btrfs_balance_args meta;
@@ -661,7 +698,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_dev_lookup_args *args,
- struct bdev_handle **bdev_handle);
+ struct file **bdev_file);
void __exit btrfs_cleanup_fs_uuids(void);
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
int btrfs_grow_device(struct btrfs_trans_handle *trans,
@@ -780,9 +817,7 @@ void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev);
-void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
- struct block_device *bdev,
- const char *device_path);
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_device *device);
enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags);
int btrfs_bg_type_to_factor(u64 flags);
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 118118ca3e1d..b9376ea258ff 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -6,7 +6,11 @@
#ifndef BTRFS_XATTR_H
#define BTRFS_XATTR_H
-#include <linux/xattr.h>
+struct dentry;
+struct inode;
+struct qstr;
+struct xattr_handler;
+struct btrfs_trans_handle;
extern const struct xattr_handler * const btrfs_xattr_handlers[];
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 36cf1f0e338e..e5b3f2003896 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -354,18 +354,13 @@ done:
}
int zlib_decompress(struct list_head *ws, const u8 *data_in,
- struct page *dest_page, unsigned long start_byte, size_t srclen,
+ struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0;
int wbits = MAX_WBITS;
- unsigned long bytes_left;
- unsigned long total_out = 0;
- unsigned long pg_offset = 0;
-
- destlen = min_t(unsigned long, destlen, PAGE_SIZE);
- bytes_left = destlen;
+ unsigned long to_copy;
workspace->strm.next_in = data_in;
workspace->strm.avail_in = srclen;
@@ -390,60 +385,30 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in,
return -EIO;
}
- while (bytes_left > 0) {
- unsigned long buf_start;
- unsigned long buf_offset;
- unsigned long bytes;
-
- ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
- if (ret != Z_OK && ret != Z_STREAM_END)
- break;
-
- buf_start = total_out;
- total_out = workspace->strm.total_out;
-
- if (total_out == buf_start) {
- ret = -EIO;
- break;
- }
-
- if (total_out <= start_byte)
- goto next;
-
- if (total_out > start_byte && buf_start < start_byte)
- buf_offset = start_byte - buf_start;
- else
- buf_offset = 0;
-
- bytes = min(PAGE_SIZE - pg_offset,
- PAGE_SIZE - (buf_offset % PAGE_SIZE));
- bytes = min(bytes, bytes_left);
+ /*
+ * Everything (in/out buf) should be at most one sector, there should
+ * be no need to switch any input/output buffer.
+ */
+ ret = zlib_inflate(&workspace->strm, Z_FINISH);
+ to_copy = min(workspace->strm.total_out, destlen);
+ if (ret != Z_STREAM_END)
+ goto out;
- memcpy_to_page(dest_page, pg_offset,
- workspace->buf + buf_offset, bytes);
+ memcpy_to_page(dest_page, dest_pgoff, workspace->buf, to_copy);
- pg_offset += bytes;
- bytes_left -= bytes;
-next:
- workspace->strm.next_out = workspace->buf;
- workspace->strm.avail_out = workspace->buf_size;
- }
-
- if (ret != Z_STREAM_END && bytes_left != 0)
+out:
+ if (unlikely(to_copy != destlen)) {
+ pr_warn_ratelimited("BTRFS: inflate failed, decompressed=%lu expected=%zu\n",
+ to_copy, destlen);
ret = -EIO;
- else
+ } else {
ret = 0;
+ }
zlib_inflateEnd(&workspace->strm);
- /*
- * this should only happen if zlib returned fewer bytes than we
- * expected. btrfs_get_block is responsible for zeroing from the
- * end of the inline extent (destlen) to the end of the page
- */
- if (pg_offset < destlen) {
- memzero_page(dest_page, pg_offset, destlen - pg_offset);
- }
+ if (unlikely(to_copy < destlen))
+ memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy);
return ret;
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 5bd76813b23f..5a3d5ec75c5a 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -12,10 +12,8 @@
#include "rcu-string.h"
#include "disk-io.h"
#include "block-group.h"
-#include "transaction.h"
#include "dev-replace.h"
#include "space-info.h"
-#include "super.h"
#include "fs.h"
#include "accessors.h"
#include "bio.h"
@@ -824,11 +822,14 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
reset = &zones[1];
if (reset && reset->cond != BLK_ZONE_COND_EMPTY) {
+ unsigned int nofs_flags;
+
ASSERT(sb_zone_is_full(reset));
+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
- reset->start, reset->len,
- GFP_NOFS);
+ reset->start, reset->len);
+ memalloc_nofs_restore(nofs_flags);
if (ret)
return ret;
@@ -974,11 +975,14 @@ int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
* explicit ZONE_FINISH is not necessary.
*/
if (zone->wp != zone->start + zone->capacity) {
+ unsigned int nofs_flags;
int ret;
+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(device->bdev,
REQ_OP_ZONE_FINISH, zone->start,
- zone->len, GFP_NOFS);
+ zone->len);
+ memalloc_nofs_restore(nofs_flags);
if (ret)
return ret;
}
@@ -996,11 +1000,13 @@ int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
{
+ unsigned int nofs_flags;
sector_t zone_sectors;
sector_t nr_sectors;
u8 zone_sectors_shift;
u32 sb_zone;
u32 nr_zones;
+ int ret;
zone_sectors = bdev_zone_sectors(bdev);
zone_sectors_shift = ilog2(zone_sectors);
@@ -1011,9 +1017,12 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
if (sb_zone + 1 >= nr_zones)
return -ENOENT;
- return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
- zone_start_sector(sb_zone, bdev),
- zone_sectors * BTRFS_NR_SB_LOG_ZONES, GFP_NOFS);
+ nofs_flags = memalloc_nofs_save();
+ ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
+ zone_start_sector(sb_zone, bdev),
+ zone_sectors * BTRFS_NR_SB_LOG_ZONES);
+ memalloc_nofs_restore(nofs_flags);
+ return ret;
}
/*
@@ -1124,12 +1133,14 @@ static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos)
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
u64 length, u64 *bytes)
{
+ unsigned int nofs_flags;
int ret;
*bytes = 0;
+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET,
- physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT,
- GFP_NOFS);
+ physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT);
+ memalloc_nofs_restore(nofs_flags);
if (ret)
return ret;
@@ -1639,6 +1650,15 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
}
out:
+ /* Reject non SINGLE data profiles without RST */
+ if ((map->type & BTRFS_BLOCK_GROUP_DATA) &&
+ (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+ !fs_info->stripe_root) {
+ btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
+ btrfs_bg_type_to_raid_name(map->type));
+ return -EINVAL;
+ }
+
if (cache->alloc_offset > cache->zone_capacity) {
btrfs_err(fs_info,
"zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu",
@@ -1670,6 +1690,7 @@ out:
}
bitmap_free(active);
kfree(zone_info);
+ btrfs_free_chunk_map(map);
return ret;
}
@@ -2055,6 +2076,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
map = block_group->physical_map;
+ spin_lock(&fs_info->zone_active_bgs_lock);
spin_lock(&block_group->lock);
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
ret = true;
@@ -2067,7 +2089,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
goto out_unlock;
}
- spin_lock(&fs_info->zone_active_bgs_lock);
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_zoned_device_info *zinfo;
int reserved = 0;
@@ -2087,20 +2108,17 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
*/
if (atomic_read(&zinfo->active_zones_left) <= reserved) {
ret = false;
- spin_unlock(&fs_info->zone_active_bgs_lock);
goto out_unlock;
}
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
- spin_unlock(&fs_info->zone_active_bgs_lock);
goto out_unlock;
}
if (!is_data)
zinfo->reserved_active_zones--;
}
- spin_unlock(&fs_info->zone_active_bgs_lock);
/* Successfully activated all the zones */
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
@@ -2108,8 +2126,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
/* For the active block group list */
btrfs_get_block_group(block_group);
-
- spin_lock(&fs_info->zone_active_bgs_lock);
list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock);
@@ -2117,6 +2133,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
out_unlock:
spin_unlock(&block_group->lock);
+ spin_unlock(&fs_info->zone_active_bgs_lock);
return ret;
}
@@ -2238,14 +2255,16 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
struct btrfs_device *device = map->stripes[i].dev;
const u64 physical = map->stripes[i].physical;
struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ unsigned int nofs_flags;
if (zinfo->max_active_zones == 0)
continue;
+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
physical >> SECTOR_SHIFT,
- zinfo->zone_size >> SECTOR_SHIFT,
- GFP_NOFS);
+ zinfo->zone_size >> SECTOR_SHIFT);
+ memalloc_nofs_restore(nofs_flags);
if (ret)
return ret;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index f573bda496fb..77c4321e331f 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -4,12 +4,27 @@
#define BTRFS_ZONED_H
#include <linux/types.h>
+#include <linux/atomic.h>
#include <linux/blkdev.h>
+#include <linux/blkzoned.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include "messages.h"
#include "volumes.h"
#include "disk-io.h"
#include "block-group.h"
#include "btrfs_inode.h"
+#include "fs.h"
+
+struct block_device;
+struct extent_buffer;
+struct btrfs_bio;
+struct btrfs_ordered_extent;
+struct btrfs_fs_info;
+struct btrfs_space_info;
+struct btrfs_eb_write_context;
+struct btrfs_fs_devices;
#define BTRFS_DEFAULT_RECLAIM_THRESH (75)
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 0d66db8bc1d4..92b3744b819b 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -18,8 +18,9 @@
#include <linux/slab.h>
#include <linux/zstd.h>
#include "misc.h"
+#include "fs.h"
#include "compression.h"
-#include "ctree.h"
+#include "super.h"
#define ZSTD_BTRFS_MAX_WINDOWLOG 17
#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
@@ -618,80 +619,48 @@ done:
}
int zstd_decompress(struct list_head *ws, const u8 *data_in,
- struct page *dest_page, unsigned long start_byte, size_t srclen,
+ struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
+ struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
+ const u32 sectorsize = fs_info->sectorsize;
zstd_dstream *stream;
int ret = 0;
- size_t ret2;
- unsigned long total_out = 0;
- unsigned long pg_offset = 0;
+ unsigned long to_copy = 0;
stream = zstd_init_dstream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
if (!stream) {
pr_warn("BTRFS: zstd_init_dstream failed\n");
- ret = -EIO;
goto finish;
}
- destlen = min_t(size_t, destlen, PAGE_SIZE);
-
workspace->in_buf.src = data_in;
workspace->in_buf.pos = 0;
workspace->in_buf.size = srclen;
workspace->out_buf.dst = workspace->buf;
workspace->out_buf.pos = 0;
- workspace->out_buf.size = PAGE_SIZE;
-
- ret2 = 1;
- while (pg_offset < destlen
- && workspace->in_buf.pos < workspace->in_buf.size) {
- unsigned long buf_start;
- unsigned long buf_offset;
- unsigned long bytes;
-
- /* Check if the frame is over and we still need more input */
- if (ret2 == 0) {
- pr_debug("BTRFS: zstd_decompress_stream ended early\n");
- ret = -EIO;
- goto finish;
- }
- ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
- &workspace->in_buf);
- if (zstd_is_error(ret2)) {
- pr_debug("BTRFS: zstd_decompress_stream returned %d\n",
- zstd_get_error_code(ret2));
- ret = -EIO;
- goto finish;
- }
-
- buf_start = total_out;
- total_out += workspace->out_buf.pos;
- workspace->out_buf.pos = 0;
-
- if (total_out <= start_byte)
- continue;
-
- if (total_out > start_byte && buf_start < start_byte)
- buf_offset = start_byte - buf_start;
- else
- buf_offset = 0;
-
- bytes = min_t(unsigned long, destlen - pg_offset,
- workspace->out_buf.size - buf_offset);
-
- memcpy_to_page(dest_page, pg_offset,
- workspace->out_buf.dst + buf_offset, bytes);
-
- pg_offset += bytes;
+ workspace->out_buf.size = sectorsize;
+
+ /*
+ * Since both input and output buffers should not exceed one sector,
+ * one call should end the decompression.
+ */
+ ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
+ if (zstd_is_error(ret)) {
+ pr_warn_ratelimited("BTRFS: zstd_decompress_stream return %d\n",
+ zstd_get_error_code(ret));
+ goto finish;
}
- ret = 0;
+ to_copy = workspace->out_buf.pos;
+ memcpy_to_page(dest_page, dest_pgoff, workspace->out_buf.dst, to_copy);
finish:
- if (pg_offset < destlen) {
- memzero_page(dest_page, pg_offset, destlen - pg_offset);
+ /* Error or early end. */
+ if (unlikely(to_copy < destlen)) {
+ ret = -EIO;
+ memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy);
}
return ret;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index d3bcf601d3e5..4f73d23c2c46 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -55,7 +55,7 @@
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
- struct writeback_control *wbc);
+ enum rw_hint hint, struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@ -464,7 +464,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
* a successful fsync(). For example, ext2 indirect blocks need to be
* written back and waited upon before fsync() returns.
*
- * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
+ * The functions mark_buffer_dirty_inode(), fsync_inode_buffers(),
* inode_has_buffers() and invalidate_inode_buffers() are provided for the
* management of a list of dependent buffers at ->i_mapping->i_private_list.
*
@@ -1889,7 +1889,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio,
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
@@ -1944,7 +1945,8 @@ recover:
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
- submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
@@ -2756,6 +2758,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
}
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
+ enum rw_hint write_hint,
struct writeback_control *wbc)
{
const enum req_op op = opf & REQ_OP_MASK;
@@ -2783,6 +2786,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_write_hint = write_hint;
__bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
@@ -2802,7 +2806,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
void submit_bh(blk_opf_t opf, struct buffer_head *bh)
{
- submit_bh_wbc(opf, bh, NULL);
+ submit_bh_wbc(opf, bh, WRITE_LIFE_NOT_SET, NULL);
}
EXPORT_SYMBOL(submit_bh);
@@ -3121,12 +3125,8 @@ void __init buffer_init(void)
unsigned long nrpages;
int ret;
- bh_cachep = kmem_cache_create("buffer_head",
- sizeof(struct buffer_head), 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
- SLAB_MEM_SPREAD),
- NULL);
-
+ bh_cachep = KMEM_CACHE(buffer_head,
+ SLAB_RECLAIM_ACCOUNT|SLAB_PANIC);
/*
* Limit the bh occupancy to 10% of ZONE_NORMAL
*/
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index 7077f72e6f47..f449f7340aad 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -168,6 +168,8 @@ error_unsupported:
dput(root);
error_open_root:
cachefiles_end_secure(cache, saved_cred);
+ put_cred(cache->cache_cred);
+ cache->cache_cred = NULL;
error_getsec:
fscache_relinquish_cache(cache_cookie);
cache->cache = NULL;
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 3f24905f4066..6465e2574230 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -816,6 +816,7 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
cachefiles_put_directory(cache->graveyard);
cachefiles_put_directory(cache->store);
mntput(cache->mnt);
+ put_cred(cache->cache_cred);
kfree(cache->rootdirname);
kfree(cache->secctx);
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index 5fd74ec60bef..4ba42f1fa3b4 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -539,6 +539,9 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
struct fscache_volume *volume = object->volume->vcookie;
size_t volume_key_size, cookie_key_size, data_len;
+ if (!object->ondemand)
+ return 0;
+
/*
* CacheFiles will firstly check the cache file under the root cache
* directory. If the coherency check failed, it will fallback to
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 9c02f328c966..7fb4aae97412 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1452,7 +1452,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
if (flushing & CEPH_CAP_XATTR_EXCL) {
arg->old_xattr_buf = __ceph_build_xattrs_blob(ci);
arg->xattr_version = ci->i_xattrs.version;
- arg->xattr_buf = ci->i_xattrs.blob;
+ arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob);
} else {
arg->xattr_buf = NULL;
arg->old_xattr_buf = NULL;
@@ -1553,6 +1553,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
encode_cap_msg(msg, arg);
ceph_con_send(&arg->session->s_con, msg);
ceph_buffer_put(arg->old_xattr_buf);
+ ceph_buffer_put(arg->xattr_buf);
if (arg->wake)
wake_up_all(&ci->i_cap_wq);
}
@@ -2155,6 +2156,30 @@ retry:
ceph_cap_string(cap->implemented),
ceph_cap_string(revoking));
+ /* completed revocation? going down and there are no caps? */
+ if (revoking) {
+ if ((revoking & cap_used) == 0) {
+ doutc(cl, "completed revocation of %s\n",
+ ceph_cap_string(cap->implemented & ~cap->issued));
+ goto ack;
+ }
+
+ /*
+ * If the "i_wrbuffer_ref" was increased by mmap or generic
+ * cache write just before the ceph_check_caps() is called,
+ * the Fb capability revoking will fail this time. Then we
+ * must wait for the BDI's delayed work to flush the dirty
+ * pages and to release the "i_wrbuffer_ref", which will cost
+ * at most 5 seconds. That means the MDS needs to wait at
+ * most 5 seconds to finished the Fb capability's revocation.
+ *
+ * Let's queue a writeback for it.
+ */
+ if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
+ (revoking & CEPH_CAP_FILE_BUFFER))
+ queue_writeback = true;
+ }
+
if (cap == ci->i_auth_cap &&
(cap->issued & CEPH_CAP_FILE_WR)) {
/* request larger max_size from MDS? */
@@ -2182,30 +2207,6 @@ retry:
}
}
- /* completed revocation? going down and there are no caps? */
- if (revoking) {
- if ((revoking & cap_used) == 0) {
- doutc(cl, "completed revocation of %s\n",
- ceph_cap_string(cap->implemented & ~cap->issued));
- goto ack;
- }
-
- /*
- * If the "i_wrbuffer_ref" was increased by mmap or generic
- * cache write just before the ceph_check_caps() is called,
- * the Fb capability revoking will fail this time. Then we
- * must wait for the BDI's delayed work to flush the dirty
- * pages and to release the "i_wrbuffer_ref", which will cost
- * at most 5 seconds. That means the MDS needs to wait at
- * most 5 seconds to finished the Fb capability's revocation.
- *
- * Let's queue a writeback for it.
- */
- if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
- (revoking & CEPH_CAP_FILE_BUFFER))
- queue_writeback = true;
- }
-
/* want more caps from mds? */
if (want & ~cap->mds_wanted) {
if (want & ~(cap->mds_wanted | cap->issued))
@@ -3215,7 +3216,6 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
enum put_cap_refs_mode {
PUT_CAP_REFS_SYNC = 0,
- PUT_CAP_REFS_NO_CHECK,
PUT_CAP_REFS_ASYNC,
};
@@ -3331,11 +3331,6 @@ void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had)
__ceph_put_cap_refs(ci, had, PUT_CAP_REFS_ASYNC);
}
-void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had)
-{
- __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_NO_CHECK);
-}
-
/*
* Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
* context. Adjust per-snap dirty page accounting as appropriate.
@@ -4777,7 +4772,22 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
if (__ceph_caps_dirty(ci)) {
struct ceph_mds_client *mdsc =
ceph_inode_to_fs_client(inode)->mdsc;
- __cap_delay_requeue_front(mdsc, ci);
+
+ doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
+ ceph_vinop(inode));
+ spin_lock(&mdsc->cap_unlink_delay_lock);
+ ci->i_ceph_flags |= CEPH_I_FLUSH;
+ if (!list_empty(&ci->i_cap_delay_list))
+ list_del_init(&ci->i_cap_delay_list);
+ list_add_tail(&ci->i_cap_delay_list,
+ &mdsc->cap_unlink_delay_list);
+ spin_unlock(&mdsc->cap_unlink_delay_lock);
+
+ /*
+ * Fire the work immediately, because the MDS maybe
+ * waiting for caps release.
+ */
+ ceph_queue_cap_unlink_work(mdsc);
}
}
spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0c25d326afc4..7b2e77517f23 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -78,6 +78,8 @@ struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry,
if (!inode)
return ERR_PTR(-ENOMEM);
+ inode->i_blkbits = CEPH_FSCRYPT_BLOCK_SHIFT;
+
if (!S_ISLNK(*mode)) {
err = ceph_pre_init_acls(dir, mode, as_ctx);
if (err < 0)
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index e07ad29ff8b9..ebf4ac0055dd 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -33,7 +33,7 @@ void __init ceph_flock_init(void)
static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
{
- struct inode *inode = file_inode(dst->fl_file);
+ struct inode *inode = file_inode(dst->c.flc_file);
atomic_inc(&ceph_inode(inode)->i_filelock_ref);
dst->fl_u.ceph.inode = igrab(inode);
}
@@ -110,17 +110,18 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
else
length = fl->fl_end - fl->fl_start + 1;
- owner = secure_addr(fl->fl_owner);
+ owner = secure_addr(fl->c.flc_owner);
doutc(cl, "rule: %d, op: %d, owner: %llx, pid: %llu, "
"start: %llu, length: %llu, wait: %d, type: %d\n",
- (int)lock_type, (int)operation, owner, (u64)fl->fl_pid,
- fl->fl_start, length, wait, fl->fl_type);
+ (int)lock_type, (int)operation, owner,
+ (u64) fl->c.flc_pid,
+ fl->fl_start, length, wait, fl->c.flc_type);
req->r_args.filelock_change.rule = lock_type;
req->r_args.filelock_change.type = cmd;
req->r_args.filelock_change.owner = cpu_to_le64(owner);
- req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
+ req->r_args.filelock_change.pid = cpu_to_le64((u64) fl->c.flc_pid);
req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
req->r_args.filelock_change.length = cpu_to_le64(length);
req->r_args.filelock_change.wait = wait;
@@ -130,13 +131,13 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
err = ceph_mdsc_wait_request(mdsc, req, wait ?
ceph_lock_wait_for_completion : NULL);
if (!err && operation == CEPH_MDS_OP_GETFILELOCK) {
- fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid);
+ fl->c.flc_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid);
if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
- fl->fl_type = F_RDLCK;
+ fl->c.flc_type = F_RDLCK;
else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
- fl->fl_type = F_WRLCK;
+ fl->c.flc_type = F_WRLCK;
else
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
@@ -150,8 +151,8 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
ceph_mdsc_put_request(req);
doutc(cl, "rule: %d, op: %d, pid: %llu, start: %llu, "
"length: %llu, wait: %d, type: %d, err code %d\n",
- (int)lock_type, (int)operation, (u64)fl->fl_pid,
- fl->fl_start, length, wait, fl->fl_type, err);
+ (int)lock_type, (int)operation, (u64) fl->c.flc_pid,
+ fl->fl_start, length, wait, fl->c.flc_type, err);
return err;
}
@@ -227,10 +228,10 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
static int try_unlock_file(struct file *file, struct file_lock *fl)
{
int err;
- unsigned int orig_flags = fl->fl_flags;
- fl->fl_flags |= FL_EXISTS;
+ unsigned int orig_flags = fl->c.flc_flags;
+ fl->c.flc_flags |= FL_EXISTS;
err = locks_lock_file_wait(file, fl);
- fl->fl_flags = orig_flags;
+ fl->c.flc_flags = orig_flags;
if (err == -ENOENT) {
if (!(orig_flags & FL_EXISTS))
err = 0;
@@ -253,13 +254,13 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
u8 wait = 0;
u8 lock_cmd;
- if (!(fl->fl_flags & FL_POSIX))
+ if (!(fl->c.flc_flags & FL_POSIX))
return -ENOLCK;
if (ceph_inode_is_shutdown(inode))
return -ESTALE;
- doutc(cl, "fl_owner: %p\n", fl->fl_owner);
+ doutc(cl, "fl_owner: %p\n", fl->c.flc_owner);
/* set wait bit as appropriate, then make command as Ceph expects it*/
if (IS_GETLK(cmd))
@@ -273,19 +274,19 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
}
spin_unlock(&ci->i_ceph_lock);
if (err < 0) {
- if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type)
+ if (op == CEPH_MDS_OP_SETFILELOCK && lock_is_unlock(fl))
posix_lock_file(file, fl, NULL);
return err;
}
- if (F_RDLCK == fl->fl_type)
+ if (lock_is_read(fl))
lock_cmd = CEPH_LOCK_SHARED;
- else if (F_WRLCK == fl->fl_type)
+ else if (lock_is_write(fl))
lock_cmd = CEPH_LOCK_EXCL;
else
lock_cmd = CEPH_LOCK_UNLOCK;
- if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type) {
+ if (op == CEPH_MDS_OP_SETFILELOCK && lock_is_unlock(fl)) {
err = try_unlock_file(file, fl);
if (err <= 0)
return err;
@@ -293,7 +294,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
if (!err) {
- if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK != fl->fl_type) {
+ if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK != fl->c.flc_type) {
doutc(cl, "locking locally\n");
err = posix_lock_file(file, fl, NULL);
if (err) {
@@ -319,13 +320,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
u8 wait = 0;
u8 lock_cmd;
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
return -ENOLCK;
if (ceph_inode_is_shutdown(inode))
return -ESTALE;
- doutc(cl, "fl_file: %p\n", fl->fl_file);
+ doutc(cl, "fl_file: %p\n", fl->c.flc_file);
spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
@@ -333,7 +334,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
}
spin_unlock(&ci->i_ceph_lock);
if (err < 0) {
- if (F_UNLCK == fl->fl_type)
+ if (lock_is_unlock(fl))
locks_lock_file_wait(file, fl);
return err;
}
@@ -341,14 +342,14 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
if (IS_SETLKW(cmd))
wait = 1;
- if (F_RDLCK == fl->fl_type)
+ if (lock_is_read(fl))
lock_cmd = CEPH_LOCK_SHARED;
- else if (F_WRLCK == fl->fl_type)
+ else if (lock_is_write(fl))
lock_cmd = CEPH_LOCK_EXCL;
else
lock_cmd = CEPH_LOCK_UNLOCK;
- if (F_UNLCK == fl->fl_type) {
+ if (lock_is_unlock(fl)) {
err = try_unlock_file(file, fl);
if (err <= 0)
return err;
@@ -356,7 +357,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
inode, lock_cmd, wait, fl);
- if (!err && F_UNLCK != fl->fl_type) {
+ if (!err && F_UNLCK != fl->c.flc_type) {
err = locks_lock_file_wait(file, fl);
if (err) {
ceph_lock_message(CEPH_LOCK_FLOCK,
@@ -385,9 +386,9 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
ctx = locks_inode_context(inode);
if (ctx) {
spin_lock(&ctx->flc_lock);
- list_for_each_entry(lock, &ctx->flc_posix, fl_list)
+ for_each_file_lock(lock, &ctx->flc_posix)
++(*fcntl_count);
- list_for_each_entry(lock, &ctx->flc_flock, fl_list)
+ for_each_file_lock(lock, &ctx->flc_flock)
++(*flock_count);
spin_unlock(&ctx->flc_lock);
}
@@ -408,10 +409,10 @@ static int lock_to_ceph_filelock(struct inode *inode,
cephlock->start = cpu_to_le64(lock->fl_start);
cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
cephlock->client = cpu_to_le64(0);
- cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
- cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
+ cephlock->pid = cpu_to_le64((u64) lock->c.flc_pid);
+ cephlock->owner = cpu_to_le64(secure_addr(lock->c.flc_owner));
- switch (lock->fl_type) {
+ switch (lock->c.flc_type) {
case F_RDLCK:
cephlock->type = CEPH_LOCK_SHARED;
break;
@@ -422,7 +423,8 @@ static int lock_to_ceph_filelock(struct inode *inode,
cephlock->type = CEPH_LOCK_UNLOCK;
break;
default:
- doutc(cl, "Have unknown lock type %d\n", lock->fl_type);
+ doutc(cl, "Have unknown lock type %d\n",
+ lock->c.flc_type);
err = -EINVAL;
}
@@ -453,7 +455,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
return 0;
spin_lock(&ctx->flc_lock);
- list_for_each_entry(lock, &ctx->flc_posix, fl_list) {
+ for_each_file_lock(lock, &ctx->flc_posix) {
++seen_fcntl;
if (seen_fcntl > num_fcntl_locks) {
err = -ENOSPC;
@@ -464,7 +466,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
goto fail;
++l;
}
- list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
+ for_each_file_lock(lock, &ctx->flc_flock) {
++seen_flock;
if (seen_flock > num_flock_locks) {
err = -ENOSPC;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 548d1de379f3..3ab9c268a8bb 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1089,7 +1089,7 @@ void ceph_mdsc_release_request(struct kref *kref)
struct ceph_mds_request *req = container_of(kref,
struct ceph_mds_request,
r_kref);
- ceph_mdsc_release_dir_caps_no_check(req);
+ ceph_mdsc_release_dir_caps_async(req);
destroy_reply_info(&req->r_reply_info);
if (req->r_request)
ceph_msg_put(req->r_request);
@@ -2484,6 +2484,50 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
}
}
+void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc)
+{
+ struct ceph_client *cl = mdsc->fsc->client;
+ if (mdsc->stopping)
+ return;
+
+ if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_unlink_work)) {
+ doutc(cl, "caps unlink work queued\n");
+ } else {
+ doutc(cl, "failed to queue caps unlink work\n");
+ }
+}
+
+static void ceph_cap_unlink_work(struct work_struct *work)
+{
+ struct ceph_mds_client *mdsc =
+ container_of(work, struct ceph_mds_client, cap_unlink_work);
+ struct ceph_client *cl = mdsc->fsc->client;
+
+ doutc(cl, "begin\n");
+ spin_lock(&mdsc->cap_unlink_delay_lock);
+ while (!list_empty(&mdsc->cap_unlink_delay_list)) {
+ struct ceph_inode_info *ci;
+ struct inode *inode;
+
+ ci = list_first_entry(&mdsc->cap_unlink_delay_list,
+ struct ceph_inode_info,
+ i_cap_delay_list);
+ list_del_init(&ci->i_cap_delay_list);
+
+ inode = igrab(&ci->netfs.inode);
+ if (inode) {
+ spin_unlock(&mdsc->cap_unlink_delay_lock);
+ doutc(cl, "on %p %llx.%llx\n", inode,
+ ceph_vinop(inode));
+ ceph_check_caps(ci, CHECK_CAPS_FLUSH);
+ iput(inode);
+ spin_lock(&mdsc->cap_unlink_delay_lock);
+ }
+ }
+ spin_unlock(&mdsc->cap_unlink_delay_lock);
+ doutc(cl, "done\n");
+}
+
/*
* requests
*/
@@ -4261,7 +4305,7 @@ void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
}
}
-void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
+void ceph_mdsc_release_dir_caps_async(struct ceph_mds_request *req)
{
struct ceph_client *cl = req->r_mdsc->fsc->client;
int dcaps;
@@ -4269,8 +4313,7 @@ void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
dcaps = xchg(&req->r_dir_caps, 0);
if (dcaps) {
doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
- ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent),
- dcaps);
+ ceph_put_cap_refs_async(ceph_inode(req->r_parent), dcaps);
}
}
@@ -4306,7 +4349,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
if (req->r_session->s_mds != session->s_mds)
continue;
- ceph_mdsc_release_dir_caps_no_check(req);
+ ceph_mdsc_release_dir_caps_async(req);
__send_request(session, req, true);
}
@@ -5360,6 +5403,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
INIT_LIST_HEAD(&mdsc->cap_delay_list);
INIT_LIST_HEAD(&mdsc->cap_wait_list);
spin_lock_init(&mdsc->cap_delay_lock);
+ INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
+ spin_lock_init(&mdsc->cap_unlink_delay_lock);
INIT_LIST_HEAD(&mdsc->snap_flush_list);
spin_lock_init(&mdsc->snap_flush_lock);
mdsc->last_cap_flush_tid = 1;
@@ -5368,6 +5413,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
spin_lock_init(&mdsc->cap_dirty_lock);
init_waitqueue_head(&mdsc->cap_flushing_wq);
INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
+ INIT_WORK(&mdsc->cap_unlink_work, ceph_cap_unlink_work);
err = ceph_metric_init(&mdsc->metric);
if (err)
goto err_mdsmap;
@@ -5641,6 +5687,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
ceph_cleanup_global_and_empty_realms(mdsc);
cancel_work_sync(&mdsc->cap_reclaim_work);
+ cancel_work_sync(&mdsc->cap_unlink_work);
cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
doutc(cl, "done\n");
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 2e6ddaa13d72..03f8ff00874f 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -462,6 +462,8 @@ struct ceph_mds_client {
unsigned long last_renew_caps; /* last time we renewed our caps */
struct list_head cap_delay_list; /* caps with delayed release */
spinlock_t cap_delay_lock; /* protects cap_delay_list */
+ struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */
+ spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */
struct list_head snap_flush_list; /* cap_snaps ready to flush */
spinlock_t snap_flush_lock;
@@ -475,6 +477,8 @@ struct ceph_mds_client {
struct work_struct cap_reclaim_work;
atomic_t cap_reclaim_pending;
+ struct work_struct cap_unlink_work;
+
/*
* Cap reservations
*
@@ -552,7 +556,7 @@ extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
struct inode *dir,
struct ceph_mds_request *req);
extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req);
-extern void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req);
+extern void ceph_mdsc_release_dir_caps_async(struct ceph_mds_request *req);
static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
{
kref_get(&req->r_kref);
@@ -574,6 +578,7 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
+extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc);
extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
int (*cb)(struct inode *, int mds, void *),
void *arg);
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index fae97c25ce58..8109aba66e02 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -380,10 +380,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p,
ceph_decode_skip_8(p, end, bad_ext);
/* required_client_features */
ceph_decode_skip_set(p, end, 64, bad_ext);
+ /* bal_rank_mask */
+ ceph_decode_skip_string(p, end, bad_ext);
+ }
+ if (mdsmap_ev >= 18) {
ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
- } else {
- /* This forces the usage of the (sync) SETXATTR Op */
- m->m_max_xattr_size = 0;
}
bad_ext:
doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h
index 89f1931f1ba6..1f2171dd01bf 100644
--- a/fs/ceph/mdsmap.h
+++ b/fs/ceph/mdsmap.h
@@ -27,7 +27,11 @@ struct ceph_mdsmap {
u32 m_session_timeout; /* seconds */
u32 m_session_autoclose; /* seconds */
u64 m_max_file_size;
- u64 m_max_xattr_size; /* maximum size for xattrs blob */
+ /*
+ * maximum size for xattrs blob.
+ * Zeroed by default to force the usage of the (sync) SETXATTR Op.
+ */
+ u64 m_max_xattr_size;
u32 m_max_mds; /* expected up:active mds number */
u32 m_num_active_mds; /* actual up:active mds number */
u32 possible_max_rank; /* possible max rank index */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 5ec102f6b1ac..885cb5d4e771 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -928,36 +928,36 @@ static int __init init_caches(void)
ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
sizeof(struct ceph_inode_info),
__alignof__(struct ceph_inode_info),
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
- SLAB_ACCOUNT, ceph_inode_init_once);
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
+ ceph_inode_init_once);
if (!ceph_inode_cachep)
return -ENOMEM;
- ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD);
+ ceph_cap_cachep = KMEM_CACHE(ceph_cap, 0);
if (!ceph_cap_cachep)
goto bad_cap;
- ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, SLAB_MEM_SPREAD);
+ ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, 0);
if (!ceph_cap_snap_cachep)
goto bad_cap_snap;
ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+ SLAB_RECLAIM_ACCOUNT);
if (!ceph_cap_flush_cachep)
goto bad_cap_flush;
ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+ SLAB_RECLAIM_ACCOUNT);
if (!ceph_dentry_cachep)
goto bad_dentry;
- ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD);
+ ceph_file_cachep = KMEM_CACHE(ceph_file_info, 0);
if (!ceph_file_cachep)
goto bad_file;
- ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD);
+ ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, 0);
if (!ceph_dir_file_cachep)
goto bad_dir_file;
- ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD);
+ ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, 0);
if (!ceph_mds_request_cachep)
goto bad_mds_req;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b06e2bc86221..b63b4cd9b5b6 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1255,8 +1255,6 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
extern void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had);
-extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
- int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc);
extern void __ceph_remove_capsnap(struct inode *inode,
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 0c7c2528791e..6898dc621011 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -24,6 +24,8 @@
#include <linux/pid_namespace.h>
#include <linux/uaccess.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/vmalloc.h>
#include <linux/coda.h>
@@ -70,8 +72,8 @@ int __init coda_init_inodecache(void)
{
coda_inode_cachep = kmem_cache_create("coda_inode_cache",
sizeof(struct coda_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
- SLAB_ACCOUNT, init_once);
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
+ init_once);
if (coda_inode_cachep == NULL)
return -ENOMEM;
return 0;
@@ -87,10 +89,10 @@ void coda_destroy_inodecache(void)
kmem_cache_destroy(coda_inode_cachep);
}
-static int coda_remount(struct super_block *sb, int *flags, char *data)
+static int coda_reconfigure(struct fs_context *fc)
{
- sync_filesystem(sb);
- *flags |= SB_NOATIME;
+ sync_filesystem(fc->root->d_sb);
+ fc->sb_flags |= SB_NOATIME;
return 0;
}
@@ -102,78 +104,102 @@ static const struct super_operations coda_super_operations =
.evict_inode = coda_evict_inode,
.put_super = coda_put_super,
.statfs = coda_statfs,
- .remount_fs = coda_remount,
};
-static int get_device_index(struct coda_mount_data *data)
+struct coda_fs_context {
+ int idx;
+};
+
+enum {
+ Opt_fd,
+};
+
+static const struct fs_parameter_spec coda_param_specs[] = {
+ fsparam_fd ("fd", Opt_fd),
+ {}
+};
+
+static int coda_parse_fd(struct fs_context *fc, int fd)
{
+ struct coda_fs_context *ctx = fc->fs_private;
struct fd f;
struct inode *inode;
int idx;
- if (data == NULL) {
- pr_warn("%s: Bad mount data\n", __func__);
- return -1;
- }
-
- if (data->version != CODA_MOUNT_VERSION) {
- pr_warn("%s: Bad mount version\n", __func__);
- return -1;
- }
-
- f = fdget(data->fd);
+ f = fdget(fd);
if (!f.file)
- goto Ebadf;
+ return -EBADF;
inode = file_inode(f.file);
if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
fdput(f);
- goto Ebadf;
+ return invalf(fc, "code: Not coda psdev");
}
idx = iminor(inode);
fdput(f);
- if (idx < 0 || idx >= MAX_CODADEVS) {
- pr_warn("%s: Bad minor number\n", __func__);
- return -1;
+ if (idx < 0 || idx >= MAX_CODADEVS)
+ return invalf(fc, "coda: Bad minor number");
+ ctx->idx = idx;
+ return 0;
+}
+
+static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, coda_param_specs, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_fd:
+ return coda_parse_fd(fc, result.uint_32);
}
- return idx;
-Ebadf:
- pr_warn("%s: Bad file\n", __func__);
- return -1;
+ return 0;
+}
+
+/*
+ * Parse coda's binary mount data form. We ignore any errors and go with index
+ * 0 if we get one for backward compatibility.
+ */
+static int coda_parse_monolithic(struct fs_context *fc, void *_data)
+{
+ struct coda_mount_data *data = _data;
+
+ if (!data)
+ return invalf(fc, "coda: Bad mount data");
+
+ if (data->version != CODA_MOUNT_VERSION)
+ return invalf(fc, "coda: Bad mount version");
+
+ coda_parse_fd(fc, data->fd);
+ return 0;
}
-static int coda_fill_super(struct super_block *sb, void *data, int silent)
+static int coda_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct coda_fs_context *ctx = fc->fs_private;
struct inode *root = NULL;
struct venus_comm *vc;
struct CodaFid fid;
int error;
- int idx;
-
- if (task_active_pid_ns(current) != &init_pid_ns)
- return -EINVAL;
-
- idx = get_device_index((struct coda_mount_data *) data);
- /* Ignore errors in data, for backward compatibility */
- if(idx == -1)
- idx = 0;
-
- pr_info("%s: device index: %i\n", __func__, idx);
+ infof(fc, "coda: device index: %i\n", ctx->idx);
- vc = &coda_comms[idx];
+ vc = &coda_comms[ctx->idx];
mutex_lock(&vc->vc_mutex);
if (!vc->vc_inuse) {
- pr_warn("%s: No pseudo device\n", __func__);
+ errorf(fc, "coda: No pseudo device");
error = -EINVAL;
goto unlock_out;
}
if (vc->vc_sb) {
- pr_warn("%s: Device already mounted\n", __func__);
+ errorf(fc, "coda: Device already mounted");
error = -EBUSY;
goto unlock_out;
}
@@ -313,18 +339,45 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-/* init_coda: used by filesystems.c to register coda */
+static int coda_get_tree(struct fs_context *fc)
+{
+ if (task_active_pid_ns(current) != &init_pid_ns)
+ return -EINVAL;
-static struct dentry *coda_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+ return get_tree_nodev(fc, coda_fill_super);
+}
+
+static void coda_free_fc(struct fs_context *fc)
{
- return mount_nodev(fs_type, flags, data, coda_fill_super);
+ kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations coda_context_ops = {
+ .free = coda_free_fc,
+ .parse_param = coda_parse_param,
+ .parse_monolithic = coda_parse_monolithic,
+ .get_tree = coda_get_tree,
+ .reconfigure = coda_reconfigure,
+};
+
+static int coda_init_fs_context(struct fs_context *fc)
+{
+ struct coda_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct coda_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ fc->fs_private = ctx;
+ fc->ops = &coda_context_ops;
+ return 0;
}
struct file_system_type coda_fs_type = {
.owner = THIS_MODULE,
.name = "coda",
- .mount = coda_mount,
+ .init_fs_context = coda_init_fs_context,
+ .parameters = coda_param_specs,
.kill_sb = kill_anon_super,
.fs_flags = FS_BINARY_MOUNTDATA,
};
diff --git a/fs/coredump.c b/fs/coredump.c
index f258c17c1841..be6403b4b14b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -872,6 +872,9 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
loff_t pos;
ssize_t n;
+ if (!page)
+ return 0;
+
if (cprm->to_skip) {
if (!__dump_skip(cprm, cprm->to_skip))
return 0;
@@ -884,7 +887,6 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
pos = file->f_pos;
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
- iov_iter_set_copy_mc(&iter);
n = __kernel_write_iter(cprm->file, &iter, &pos);
if (n != PAGE_SIZE)
return 0;
@@ -895,10 +897,44 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
return 1;
}
+/*
+ * If we might get machine checks from kernel accesses during the
+ * core dump, let's get those errors early rather than during the
+ * IO. This is not performance-critical enough to warrant having
+ * all the machine check logic in the iovec paths.
+ */
+#ifdef copy_mc_to_kernel
+
+#define dump_page_alloc() alloc_page(GFP_KERNEL)
+#define dump_page_free(x) __free_page(x)
+static struct page *dump_page_copy(struct page *src, struct page *dst)
+{
+ void *buf = kmap_local_page(src);
+ size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE);
+ kunmap_local(buf);
+ return left ? NULL : dst;
+}
+
+#else
+
+/* We just want to return non-NULL; it's never used. */
+#define dump_page_alloc() ERR_PTR(-EINVAL)
+#define dump_page_free(x) ((void)(x))
+static inline struct page *dump_page_copy(struct page *src, struct page *dst)
+{
+ return src;
+}
+#endif
+
int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len)
{
unsigned long addr;
+ struct page *dump_page;
+
+ dump_page = dump_page_alloc();
+ if (!dump_page)
+ return 0;
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
struct page *page;
@@ -912,14 +948,17 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
*/
page = get_dump_page(addr);
if (page) {
- int stop = !dump_emit_page(cprm, page);
+ int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page));
put_page(page);
- if (stop)
+ if (stop) {
+ dump_page_free(dump_page);
return 0;
+ }
} else {
dump_skip(cprm, PAGE_SIZE);
}
}
+ dump_page_free(dump_page);
return 1;
}
#endif
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 60dbfa0f8805..39e75131fd5a 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb)
sb->s_mtd = NULL;
} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
sync_blockdev(sb->s_bdev);
- bdev_release(sb->s_bdev_handle);
+ fput(sb->s_bdev_file);
}
kfree(sbi);
}
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 7b3fc189593a..0ad52fbe51c9 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -74,13 +74,7 @@ struct fscrypt_nokey_name {
static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
{
- if (str->len == 1 && str->name[0] == '.')
- return true;
-
- if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
- return true;
-
- return false;
+ return is_dot_dotdot(str->name, str->len);
}
/**
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 1892356cf924..8371e4e1f596 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -222,16 +222,19 @@ struct fscrypt_inode_info {
struct fscrypt_prepared_key ci_enc_key;
/* True if ci_enc_key should be freed when this struct is freed */
- bool ci_owns_key;
+ u8 ci_owns_key : 1;
#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
/*
* True if this inode will use inline encryption (blk-crypto) instead of
* the traditional filesystem-layer encryption.
*/
- bool ci_inlinecrypt;
+ u8 ci_inlinecrypt : 1;
#endif
+ /* True if ci_dirhash_key is initialized */
+ u8 ci_dirhash_key_initialized : 1;
+
/*
* log2 of the data unit size (granularity of contents encryption) of
* this file. This is computable from ci_policy and ci_inode but is
@@ -242,6 +245,9 @@ struct fscrypt_inode_info {
/* Cached value: log2 of number of data units per FS block */
u8 ci_data_units_per_block_bits;
+ /* Hashed inode number. Only set for IV_INO_LBLK_32 */
+ u32 ci_hashed_ino;
+
/*
* Encryption mode used for this inode. It corresponds to either the
* contents or filenames encryption mode, depending on the inode type.
@@ -276,16 +282,12 @@ struct fscrypt_inode_info {
* the plaintext filenames -- currently just casefolded directories.
*/
siphash_key_t ci_dirhash_key;
- bool ci_dirhash_key_initialized;
/* The encryption policy used by this inode */
union fscrypt_policy ci_policy;
/* This inode's nonce, copied from the fscrypt_context */
u8 ci_nonce[FSCRYPT_FILE_NONCE_SIZE];
-
- /* Hashed inode number. Only set for IV_INO_LBLK_32 */
- u32 ci_hashed_ino;
};
typedef enum {
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 52504dd478d3..104771c3d3f6 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -102,11 +102,8 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry,
if (err && err != -ENOENT)
return err;
- if (fname->is_nokey_name) {
- spin_lock(&dentry->d_lock);
- dentry->d_flags |= DCACHE_NOKEY_NAME;
- spin_unlock(&dentry->d_lock);
- }
+ fscrypt_prepare_dentry(dentry, fname->is_nokey_name);
+
return err;
}
EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup);
@@ -131,12 +128,10 @@ EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup);
int fscrypt_prepare_lookup_partial(struct inode *dir, struct dentry *dentry)
{
int err = fscrypt_get_encryption_info(dir, true);
+ bool is_nokey_name = (!err && !fscrypt_has_encryption_key(dir));
+
+ fscrypt_prepare_dentry(dentry, is_nokey_name);
- if (!err && !fscrypt_has_encryption_key(dir)) {
- spin_lock(&dentry->d_lock);
- dentry->d_flags |= DCACHE_NOKEY_NAME;
- spin_unlock(&dentry->d_lock);
- }
return err;
}
EXPORT_SYMBOL_GPL(fscrypt_prepare_lookup_partial);
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index 0edf0b58daa7..6681a71625f0 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -74,8 +74,12 @@ void fscrypt_put_master_key(struct fscrypt_master_key *mk)
* that concurrent keyring lookups can no longer find it.
*/
WARN_ON_ONCE(refcount_read(&mk->mk_active_refs) != 0);
- key_put(mk->mk_users);
- mk->mk_users = NULL;
+ if (mk->mk_users) {
+ /* Clear the keyring so the quota gets released right away. */
+ keyring_clear(mk->mk_users);
+ key_put(mk->mk_users);
+ mk->mk_users = NULL;
+ }
call_rcu(&mk->mk_rcu_head, fscrypt_free_master_key);
}
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index d71f7c799e79..b4fe01ea4bd4 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -23,7 +23,7 @@ struct fscrypt_mode fscrypt_modes[] = {
.blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_256_XTS,
},
[FSCRYPT_MODE_AES_256_CTS] = {
- .friendly_name = "AES-256-CTS-CBC",
+ .friendly_name = "AES-256-CBC-CTS",
.cipher_str = "cts(cbc(aes))",
.keysize = 32,
.security_strength = 32,
@@ -38,7 +38,7 @@ struct fscrypt_mode fscrypt_modes[] = {
.blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV,
},
[FSCRYPT_MODE_AES_128_CTS] = {
- .friendly_name = "AES-128-CTS-CBC",
+ .friendly_name = "AES-128-CBC-CTS",
.cipher_str = "cts(cbc(aes))",
.keysize = 16,
.security_strength = 16,
@@ -53,7 +53,7 @@ struct fscrypt_mode fscrypt_modes[] = {
.blk_crypto_mode = BLK_ENCRYPTION_MODE_SM4_XTS,
},
[FSCRYPT_MODE_SM4_CTS] = {
- .friendly_name = "SM4-CTS-CBC",
+ .friendly_name = "SM4-CBC-CTS",
.cipher_str = "cts(cbc(sm4))",
.keysize = 16,
.security_strength = 16,
@@ -687,7 +687,7 @@ int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported)
/**
* fscrypt_prepare_new_inode() - prepare to create a new inode in a directory
* @dir: a possibly-encrypted directory
- * @inode: the new inode. ->i_mode must be set already.
+ * @inode: the new inode. ->i_mode and ->i_blkbits must be set already.
* ->i_ino doesn't need to be set yet.
* @encrypt_ret: (output) set to %true if the new inode will be encrypted
*
@@ -717,6 +717,9 @@ int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode,
if (IS_ERR(policy))
return PTR_ERR(policy);
+ if (WARN_ON_ONCE(inode->i_blkbits == 0))
+ return -EINVAL;
+
if (WARN_ON_ONCE(inode->i_mode == 0))
return -EINVAL;
diff --git a/fs/dcache.c b/fs/dcache.c
index b813528fb147..71a8e943a0fa 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3061,7 +3061,10 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
if (d_unhashed(dentry) || !dentry->d_inode)
return D_WALK_SKIP;
- dentry->d_lockref.count--;
+ if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
+ dentry->d_flags |= DCACHE_GENOCIDE;
+ dentry->d_lockref.count--;
+ }
}
return D_WALK_CONTINUE;
}
@@ -3136,7 +3139,7 @@ static void __init dcache_init(void)
* of the dcache.
*/
dentry_cache = KMEM_CACHE_USERCOPY(dentry,
- SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
+ SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT,
d_iname);
/* Hash may have been set up in dcache_init_early */
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 60456263a338..62c97ff9e852 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -410,6 +410,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
bio->bi_end_io = dio_bio_end_io;
if (dio->is_pinned)
bio_set_flag(bio, BIO_PAGE_PINNED);
+ bio->bi_write_hint = file_inode(dio->iocb->ki_filp)->i_write_hint;
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
}
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index d814c5121367..9ca83ef70ed1 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -138,14 +138,14 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
}
op->info.optype = DLM_PLOCK_OP_LOCK;
- op->info.pid = fl->fl_pid;
- op->info.ex = (fl->fl_type == F_WRLCK);
- op->info.wait = !!(fl->fl_flags & FL_SLEEP);
+ op->info.pid = fl->c.flc_pid;
+ op->info.ex = lock_is_write(fl);
+ op->info.wait = !!(fl->c.flc_flags & FL_SLEEP);
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
- op->info.owner = (__u64)(long)fl->fl_owner;
+ op->info.owner = (__u64)(long) fl->c.flc_owner;
/* async handling */
if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
@@ -258,7 +258,7 @@ static int dlm_plock_callback(struct plock_op *op)
}
/* got fs lock; bookkeep locally as well: */
- flc->fl_flags &= ~FL_SLEEP;
+ flc->c.flc_flags &= ~FL_SLEEP;
if (posix_lock_file(file, flc, NULL)) {
/*
* This can only happen in the case of kmalloc() failure.
@@ -291,7 +291,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
struct dlm_ls *ls;
struct plock_op *op;
int rv;
- unsigned char fl_flags = fl->fl_flags;
+ unsigned char saved_flags = fl->c.flc_flags;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
@@ -304,7 +304,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
}
/* cause the vfs unlock to return ENOENT if lock is not found */
- fl->fl_flags |= FL_EXISTS;
+ fl->c.flc_flags |= FL_EXISTS;
rv = locks_lock_file_wait(file, fl);
if (rv == -ENOENT) {
@@ -317,14 +317,14 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
}
op->info.optype = DLM_PLOCK_OP_UNLOCK;
- op->info.pid = fl->fl_pid;
+ op->info.pid = fl->c.flc_pid;
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
- op->info.owner = (__u64)(long)fl->fl_owner;
+ op->info.owner = (__u64)(long) fl->c.flc_owner;
- if (fl->fl_flags & FL_CLOSE) {
+ if (fl->c.flc_flags & FL_CLOSE) {
op->info.flags |= DLM_PLOCK_FL_CLOSE;
send_op(op);
rv = 0;
@@ -345,7 +345,7 @@ out_free:
dlm_release_plock_op(op);
out:
dlm_put_lockspace(ls);
- fl->fl_flags = fl_flags;
+ fl->c.flc_flags = saved_flags;
return rv;
}
EXPORT_SYMBOL_GPL(dlm_posix_unlock);
@@ -375,14 +375,14 @@ int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file,
return -EINVAL;
memset(&info, 0, sizeof(info));
- info.pid = fl->fl_pid;
- info.ex = (fl->fl_type == F_WRLCK);
+ info.pid = fl->c.flc_pid;
+ info.ex = lock_is_write(fl);
info.fsid = ls->ls_global_id;
dlm_put_lockspace(ls);
info.number = number;
info.start = fl->fl_start;
info.end = fl->fl_end;
- info.owner = (__u64)(long)fl->fl_owner;
+ info.owner = (__u64)(long) fl->c.flc_owner;
rv = do_lock_cancel(&info);
switch (rv) {
@@ -437,13 +437,13 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
}
op->info.optype = DLM_PLOCK_OP_GET;
- op->info.pid = fl->fl_pid;
- op->info.ex = (fl->fl_type == F_WRLCK);
+ op->info.pid = fl->c.flc_pid;
+ op->info.ex = lock_is_write(fl);
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
- op->info.owner = (__u64)(long)fl->fl_owner;
+ op->info.owner = (__u64)(long) fl->c.flc_owner;
send_op(op);
wait_event(recv_wq, (op->done != 0));
@@ -455,16 +455,16 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
rv = op->info.rv;
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
if (rv == -ENOENT)
rv = 0;
else if (rv > 0) {
locks_init_lock(fl);
- fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
- fl->fl_flags = FL_POSIX;
- fl->fl_pid = op->info.pid;
+ fl->c.flc_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
+ fl->c.flc_flags = FL_POSIX;
+ fl->c.flc_pid = op->info.pid;
if (op->info.nodeid != dlm_our_nodeid())
- fl->fl_pid = -fl->fl_pid;
+ fl->c.flc_pid = -fl->c.flc_pid;
fl->fl_start = op->info.start;
fl->fl_end = op->info.end;
rv = 0;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 03bd55069d86..2fe0f3af1a08 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1949,16 +1949,6 @@ out:
return rc;
}
-static bool is_dot_dotdot(const char *name, size_t name_size)
-{
- if (name_size == 1 && name[0] == '.')
- return true;
- else if (name_size == 2 && name[0] == '.' && name[1] == '.')
- return true;
-
- return false;
-}
-
/**
* ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext
* @plaintext_name: The plaintext name
diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h
index 169252e6dc46..f7206158ee81 100644
--- a/fs/efivarfs/internal.h
+++ b/fs/efivarfs/internal.h
@@ -38,7 +38,7 @@ struct efivar_entry {
int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *,
struct list_head *),
- void *data, bool duplicates, struct list_head *head);
+ void *data, struct list_head *head);
int efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
void __efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 6038dd39367a..bb14462f6d99 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -343,12 +343,7 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
- err = efivar_init(efivarfs_callback, (void *)sb, true,
- &sfi->efivarfs_list);
- if (err)
- efivar_entry_iter(efivarfs_destroy, &sfi->efivarfs_list, NULL);
-
- return err;
+ return efivar_init(efivarfs_callback, sb, &sfi->efivarfs_list);
}
static int efivarfs_get_tree(struct fs_context *fc)
diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c
index 114ff0fd4e55..4d722af1014f 100644
--- a/fs/efivarfs/vars.c
+++ b/fs/efivarfs/vars.c
@@ -361,7 +361,6 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid,
* efivar_init - build the initial list of EFI variables
* @func: callback function to invoke for every variable
* @data: function-specific data to pass to @func
- * @duplicates: error if we encounter duplicates on @head?
* @head: initialised head of variable list
*
* Get every EFI variable from the firmware and invoke @func. @func
@@ -371,9 +370,9 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid,
*/
int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *,
struct list_head *),
- void *data, bool duplicates, struct list_head *head)
+ void *data, struct list_head *head)
{
- unsigned long variable_name_size = 1024;
+ unsigned long variable_name_size = 512;
efi_char16_t *variable_name;
efi_status_t status;
efi_guid_t vendor_guid;
@@ -390,12 +389,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *,
goto free;
/*
- * Per EFI spec, the maximum storage allocated for both
- * the variable name and variable data is 1024 bytes.
+ * A small set of old UEFI implementations reject sizes
+ * above a certain threshold, the lowest seen in the wild
+ * is 512.
*/
do {
- variable_name_size = 1024;
+ variable_name_size = 512;
status = efivar_get_next_variable(&variable_name_size,
variable_name,
@@ -413,8 +413,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *,
* we'll ever see a different variable name,
* and may end up looping here forever.
*/
- if (duplicates &&
- variable_is_present(variable_name, &vendor_guid,
+ if (variable_is_present(variable_name, &vendor_guid,
head)) {
dup_variable_bug(variable_name, &vendor_guid,
variable_name_size);
@@ -432,9 +431,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *,
break;
case EFI_NOT_FOUND:
break;
+ case EFI_BUFFER_TOO_SMALL:
+ pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n",
+ variable_name_size);
+ status = EFI_NOT_FOUND;
+ break;
default:
- printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n",
- status);
+ pr_warn("efivars: get_next_variable: status=%lx\n", status);
status = EFI_NOT_FOUND;
break;
}
diff --git a/fs/efs/super.c b/fs/efs/super.c
index f17fdac76b2e..e4421c10caeb 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -14,19 +14,14 @@
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/blkdev.h>
-
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include "efs.h"
#include <linux/efs_vh.h>
#include <linux/efs_fs_sb.h>
static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
-static int efs_fill_super(struct super_block *s, void *d, int silent);
-
-static struct dentry *efs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
-{
- return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super);
-}
+static int efs_init_fs_context(struct fs_context *fc);
static void efs_kill_sb(struct super_block *s)
{
@@ -35,15 +30,6 @@ static void efs_kill_sb(struct super_block *s)
kfree(sbi);
}
-static struct file_system_type efs_fs_type = {
- .owner = THIS_MODULE,
- .name = "efs",
- .mount = efs_mount,
- .kill_sb = efs_kill_sb,
- .fs_flags = FS_REQUIRES_DEV,
-};
-MODULE_ALIAS_FS("efs");
-
static struct pt_types sgi_pt_types[] = {
{0x00, "SGI vh"},
{0x01, "SGI trkrepl"},
@@ -63,6 +49,27 @@ static struct pt_types sgi_pt_types[] = {
{0, NULL}
};
+enum {
+ Opt_explicit_open,
+};
+
+static const struct fs_parameter_spec efs_param_spec[] = {
+ fsparam_flag ("explicit-open", Opt_explicit_open),
+ {}
+};
+
+/*
+ * File system definition and registration.
+ */
+static struct file_system_type efs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "efs",
+ .kill_sb = efs_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = efs_init_fs_context,
+ .parameters = efs_param_spec,
+};
+MODULE_ALIAS_FS("efs");
static struct kmem_cache * efs_inode_cachep;
@@ -91,8 +98,8 @@ static int __init init_inodecache(void)
{
efs_inode_cachep = kmem_cache_create("efs_inode_cache",
sizeof(struct efs_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
- SLAB_ACCOUNT, init_once);
+ SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT,
+ init_once);
if (efs_inode_cachep == NULL)
return -ENOMEM;
return 0;
@@ -108,18 +115,10 @@ static void destroy_inodecache(void)
kmem_cache_destroy(efs_inode_cachep);
}
-static int efs_remount(struct super_block *sb, int *flags, char *data)
-{
- sync_filesystem(sb);
- *flags |= SB_RDONLY;
- return 0;
-}
-
static const struct super_operations efs_superblock_operations = {
.alloc_inode = efs_alloc_inode,
.free_inode = efs_free_inode,
.statfs = efs_statfs,
- .remount_fs = efs_remount,
};
static const struct export_operations efs_export_ops = {
@@ -249,26 +248,26 @@ static int efs_validate_super(struct efs_sb_info *sb, struct efs_super *super) {
return 0;
}
-static int efs_fill_super(struct super_block *s, void *d, int silent)
+static int efs_fill_super(struct super_block *s, struct fs_context *fc)
{
struct efs_sb_info *sb;
struct buffer_head *bh;
struct inode *root;
- sb = kzalloc(sizeof(struct efs_sb_info), GFP_KERNEL);
+ sb = kzalloc(sizeof(struct efs_sb_info), GFP_KERNEL);
if (!sb)
return -ENOMEM;
s->s_fs_info = sb;
s->s_time_min = 0;
s->s_time_max = U32_MAX;
-
+
s->s_magic = EFS_SUPER_MAGIC;
if (!sb_set_blocksize(s, EFS_BLOCKSIZE)) {
pr_err("device does not support %d byte blocks\n",
EFS_BLOCKSIZE);
return -EINVAL;
}
-
+
/* read the vh (volume header) block */
bh = sb_bread(s, 0);
@@ -294,7 +293,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
pr_err("cannot read superblock\n");
return -EIO;
}
-
+
if (efs_validate_super(sb, (struct efs_super *) bh->b_data)) {
#ifdef DEBUG
pr_warn("invalid superblock at block %u\n",
@@ -328,6 +327,61 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
return 0;
}
+static void efs_free_fc(struct fs_context *fc)
+{
+ kfree(fc->fs_private);
+}
+
+static int efs_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, efs_fill_super);
+}
+
+static int efs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ int token;
+ struct fs_parse_result result;
+
+ token = fs_parse(fc, efs_param_spec, param, &result);
+ if (token < 0)
+ return token;
+ return 0;
+}
+
+static int efs_reconfigure(struct fs_context *fc)
+{
+ sync_filesystem(fc->root->d_sb);
+
+ return 0;
+}
+
+struct efs_context {
+ unsigned long s_mount_opts;
+};
+
+static const struct fs_context_operations efs_context_opts = {
+ .parse_param = efs_parse_param,
+ .get_tree = efs_get_tree,
+ .reconfigure = efs_reconfigure,
+ .free = efs_free_fc,
+};
+
+/*
+ * Set up the filesystem mount context.
+ */
+static int efs_init_fs_context(struct fs_context *fc)
+{
+ struct efs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct efs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ fc->fs_private = ctx;
+ fc->ops = &efs_context_opts;
+
+ return 0;
+}
+
static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) {
struct super_block *sb = dentry->d_sb;
struct efs_sb_info *sbi = SUPER_INFO(sb);
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 279933e007d2..333587ba6183 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -11,13 +11,12 @@
struct z_erofs_decompress_req {
struct super_block *sb;
struct page **in, **out;
-
unsigned short pageofs_in, pageofs_out;
unsigned int inputsize, outputsize;
- /* indicate the algorithm will be used for decompression */
- unsigned int alg;
+ unsigned int alg; /* the algorithm for decompression */
bool inplace_io, partial_decoding, fillgaps;
+ gfp_t gfp; /* allocation flags for extra temporary buffers */
};
struct z_erofs_decompressor {
@@ -82,13 +81,6 @@ static inline bool z_erofs_put_shortlivedpage(struct page **pagepool,
return true;
}
-#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
-static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
- struct page *page)
-{
- return page->mapping == MNGD_MAPPING(sbi);
-}
-
int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize);
extern const struct z_erofs_decompressor erofs_decompressors[];
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index c98aeda8abb2..52524bd9698b 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -220,7 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
up_read(&devs->rwsem);
return 0;
}
- map->m_bdev = dif->bdev_handle ? dif->bdev_handle->bdev : NULL;
+ map->m_bdev = dif->bdev_file ? file_bdev(dif->bdev_file) : NULL;
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
@@ -238,8 +238,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
if (map->m_pa >= startoff &&
map->m_pa < startoff + length) {
map->m_pa -= startoff;
- map->m_bdev = dif->bdev_handle ?
- dif->bdev_handle->bdev : NULL;
+ map->m_bdev = dif->bdev_file ?
+ file_bdev(dif->bdev_file) : NULL;
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
@@ -447,5 +447,6 @@ const struct file_operations erofs_file_fops = {
.llseek = generic_file_llseek,
.read_iter = erofs_file_read_iter,
.mmap = erofs_file_mmap,
+ .get_unmapped_area = thp_get_unmapped_area,
.splice_read = filemap_splice_read,
};
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 072ef6a66823..2ec9b2bb628d 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -111,8 +111,9 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
victim = availables[--top];
get_page(victim);
} else {
- victim = erofs_allocpage(pagepool,
- GFP_KERNEL | __GFP_NOFAIL);
+ victim = erofs_allocpage(pagepool, rq->gfp);
+ if (!victim)
+ return -ENOMEM;
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
}
rq->out[i] = victim;
@@ -322,7 +323,8 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
unsigned int cur = 0, ni = 0, no, pi, po, insz, cnt;
u8 *kin;
- DBG_BUGON(rq->outputsize > rq->inputsize);
+ if (rq->outputsize > rq->inputsize)
+ return -EOPNOTSUPP;
if (rq->alg == Z_EROFS_COMPRESSION_INTERLACED) {
cur = bs - (rq->pageofs_out & (bs - 1));
pi = (rq->pageofs_in + rq->inputsize - cur) & ~PAGE_MASK;
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
index 4a64a9c91dd3..81e65c453ef0 100644
--- a/fs/erofs/decompressor_deflate.c
+++ b/fs/erofs/decompressor_deflate.c
@@ -95,7 +95,7 @@ int z_erofs_load_deflate_config(struct super_block *sb,
}
int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
- struct page **pagepool)
+ struct page **pgpl)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -158,8 +158,12 @@ again:
strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs);
outsz -= strm->z.avail_out;
if (!rq->out[no]) {
- rq->out[no] = erofs_allocpage(pagepool,
- GFP_KERNEL | __GFP_NOFAIL);
+ rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
+ if (!rq->out[no]) {
+ kout = NULL;
+ err = -ENOMEM;
+ break;
+ }
set_page_private(rq->out[no],
Z_EROFS_SHORTLIVED_PAGE);
}
@@ -208,11 +212,11 @@ again:
if (rq->out[no] != rq->in[j])
continue;
-
- DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
- rq->in[j]));
- tmppage = erofs_allocpage(pagepool,
- GFP_KERNEL | __GFP_NOFAIL);
+ tmppage = erofs_allocpage(pgpl, rq->gfp);
+ if (!tmppage) {
+ err = -ENOMEM;
+ goto failed;
+ }
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
copy_highpage(tmppage, rq->in[j]);
rq->in[j] = tmppage;
@@ -230,7 +234,7 @@ again:
break;
}
}
-
+failed:
if (zlib_inflateEnd(&strm->z) != Z_OK && !err)
err = -EIO;
if (kout)
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 2dd14f99c1dc..4b28dc130c9f 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -148,7 +148,7 @@ again:
}
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
- struct page **pagepool)
+ struct page **pgpl)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -215,8 +215,11 @@ again:
PAGE_SIZE - pageofs);
outlen -= strm->buf.out_size;
if (!rq->out[no] && rq->fillgaps) { /* deduped */
- rq->out[no] = erofs_allocpage(pagepool,
- GFP_KERNEL | __GFP_NOFAIL);
+ rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
+ if (!rq->out[no]) {
+ err = -ENOMEM;
+ break;
+ }
set_page_private(rq->out[no],
Z_EROFS_SHORTLIVED_PAGE);
}
@@ -255,11 +258,11 @@ again:
if (rq->out[no] != rq->in[j])
continue;
-
- DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
- rq->in[j]));
- tmppage = erofs_allocpage(pagepool,
- GFP_KERNEL | __GFP_NOFAIL);
+ tmppage = erofs_allocpage(pgpl, rq->gfp);
+ if (!tmppage) {
+ err = -ENOMEM;
+ goto failed;
+ }
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
copy_highpage(tmppage, rq->in[j]);
rq->in[j] = tmppage;
@@ -277,6 +280,7 @@ again:
break;
}
}
+failed:
if (no < nrpages_out && strm->buf.out)
kunmap(rq->out[no]);
if (ni < nrpages_in)
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index bc12030393b2..8aff1a724805 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -3,6 +3,7 @@
* Copyright (C) 2022, Alibaba Cloud
* Copyright (C) 2022, Bytedance Inc. All rights reserved.
*/
+#include <linux/pseudo_fs.h>
#include <linux/fscache.h>
#include "internal.h"
@@ -12,9 +13,27 @@ static LIST_HEAD(erofs_domain_list);
static LIST_HEAD(erofs_domain_cookies_list);
static struct vfsmount *erofs_pseudo_mnt;
-struct erofs_fscache_request {
- struct erofs_fscache_request *primary;
- struct netfs_cache_resources cache_resources;
+static int erofs_anon_init_fs_context(struct fs_context *fc)
+{
+ return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type erofs_anon_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "pseudo_erofs",
+ .init_fs_context = erofs_anon_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+struct erofs_fscache_io {
+ struct netfs_cache_resources cres;
+ struct iov_iter iter;
+ netfs_io_terminated_t end_io;
+ void *private;
+ refcount_t ref;
+};
+
+struct erofs_fscache_rq {
struct address_space *mapping; /* The mapping being accessed */
loff_t start; /* Start position */
size_t len; /* Length of the request */
@@ -23,44 +42,17 @@ struct erofs_fscache_request {
refcount_t ref;
};
-static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
- loff_t start, size_t len)
+static bool erofs_fscache_io_put(struct erofs_fscache_io *io)
{
- struct erofs_fscache_request *req;
-
- req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
- if (!req)
- return ERR_PTR(-ENOMEM);
-
- req->mapping = mapping;
- req->start = start;
- req->len = len;
- refcount_set(&req->ref, 1);
-
- return req;
+ if (!refcount_dec_and_test(&io->ref))
+ return false;
+ if (io->cres.ops)
+ io->cres.ops->end_operation(&io->cres);
+ kfree(io);
+ return true;
}
-static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
- size_t len)
-{
- struct erofs_fscache_request *req;
-
- /* use primary request for the first submission */
- if (!primary->submitted) {
- refcount_inc(&primary->ref);
- return primary;
- }
-
- req = erofs_fscache_req_alloc(primary->mapping,
- primary->start + primary->submitted, len);
- if (!IS_ERR(req)) {
- req->primary = primary;
- refcount_inc(&primary->ref);
- }
- return req;
-}
-
-static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
+static void erofs_fscache_req_complete(struct erofs_fscache_rq *req)
{
struct folio *folio;
bool failed = req->error;
@@ -80,120 +72,196 @@ static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
rcu_read_unlock();
}
-static void erofs_fscache_req_put(struct erofs_fscache_request *req)
+static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
{
- if (refcount_dec_and_test(&req->ref)) {
- if (req->cache_resources.ops)
- req->cache_resources.ops->end_operation(&req->cache_resources);
- if (!req->primary)
- erofs_fscache_req_complete(req);
- else
- erofs_fscache_req_put(req->primary);
- kfree(req);
- }
+ if (!refcount_dec_and_test(&req->ref))
+ return;
+ erofs_fscache_req_complete(req);
+ kfree(req);
}
-static void erofs_fscache_subreq_complete(void *priv,
+static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping,
+ loff_t start, size_t len)
+{
+ struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL);
+
+ if (!req)
+ return NULL;
+ req->mapping = mapping;
+ req->start = start;
+ req->len = len;
+ refcount_set(&req->ref, 1);
+ return req;
+}
+
+static void erofs_fscache_req_io_put(struct erofs_fscache_io *io)
+{
+ struct erofs_fscache_rq *req = io->private;
+
+ if (erofs_fscache_io_put(io))
+ erofs_fscache_req_put(req);
+}
+
+static void erofs_fscache_req_end_io(void *priv,
ssize_t transferred_or_error, bool was_async)
{
- struct erofs_fscache_request *req = priv;
+ struct erofs_fscache_io *io = priv;
+ struct erofs_fscache_rq *req = io->private;
- if (IS_ERR_VALUE(transferred_or_error)) {
- if (req->primary)
- req->primary->error = transferred_or_error;
- else
- req->error = transferred_or_error;
- }
- erofs_fscache_req_put(req);
+ if (IS_ERR_VALUE(transferred_or_error))
+ req->error = transferred_or_error;
+ erofs_fscache_req_io_put(io);
+}
+
+static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req)
+{
+ struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL);
+
+ if (!io)
+ return NULL;
+ io->end_io = erofs_fscache_req_end_io;
+ io->private = req;
+ refcount_inc(&req->ref);
+ refcount_set(&io->ref, 1);
+ return io;
}
/*
- * Read data from fscache (cookie, pstart, len), and fill the read data into
- * page cache described by (req->mapping, lstart, len). @pstart describeis the
- * start physical address in the cache file.
+ * Read data from fscache described by cookie at pstart physical address
+ * offset, and fill the read data into buffer described by io->iter.
*/
-static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
- struct erofs_fscache_request *req, loff_t pstart, size_t len)
+static int erofs_fscache_read_io_async(struct fscache_cookie *cookie,
+ loff_t pstart, struct erofs_fscache_io *io)
{
enum netfs_io_source source;
- struct super_block *sb = req->mapping->host->i_sb;
- struct netfs_cache_resources *cres = &req->cache_resources;
- struct iov_iter iter;
- loff_t lstart = req->start + req->submitted;
- size_t done = 0;
+ struct netfs_cache_resources *cres = &io->cres;
+ struct iov_iter *iter = &io->iter;
int ret;
- DBG_BUGON(len > req->len - req->submitted);
-
ret = fscache_begin_read_operation(cres, cookie);
if (ret)
return ret;
- while (done < len) {
- loff_t sstart = pstart + done;
- size_t slen = len - done;
+ while (iov_iter_count(iter)) {
+ size_t orig_count = iov_iter_count(iter), len = orig_count;
unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
source = cres->ops->prepare_ondemand_read(cres,
- sstart, &slen, LLONG_MAX, &flags, 0);
- if (WARN_ON(slen == 0))
+ pstart, &len, LLONG_MAX, &flags, 0);
+ if (WARN_ON(len == 0))
source = NETFS_INVALID_READ;
if (source != NETFS_READ_FROM_CACHE) {
- erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
+ erofs_err(NULL, "prepare_read failed (source %d)", source);
return -EIO;
}
- refcount_inc(&req->ref);
- iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
- lstart + done, slen);
-
- ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
- erofs_fscache_subreq_complete, req);
+ iov_iter_truncate(iter, len);
+ refcount_inc(&io->ref);
+ ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL,
+ io->end_io, io);
if (ret == -EIOCBQUEUED)
ret = 0;
if (ret) {
- erofs_err(sb, "failed to fscache_read (ret %d)", ret);
+ erofs_err(NULL, "fscache_read failed (ret %d)", ret);
return ret;
}
+ if (WARN_ON(iov_iter_count(iter)))
+ return -EIO;
- done += slen;
+ iov_iter_reexpand(iter, orig_count - len);
+ pstart += len;
}
- DBG_BUGON(done != len);
return 0;
}
-static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
+struct erofs_fscache_bio {
+ struct erofs_fscache_io io;
+ struct bio bio; /* w/o bdev to share bio_add_page/endio() */
+ struct bio_vec bvecs[BIO_MAX_VECS];
+};
+
+static void erofs_fscache_bio_endio(void *priv,
+ ssize_t transferred_or_error, bool was_async)
+{
+ struct erofs_fscache_bio *io = priv;
+
+ if (IS_ERR_VALUE(transferred_or_error))
+ io->bio.bi_status = errno_to_blk_status(transferred_or_error);
+ io->bio.bi_end_io(&io->bio);
+ BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0);
+ erofs_fscache_io_put(&io->io);
+}
+
+struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
{
+ struct erofs_fscache_bio *io;
+
+ io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
+ bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
+ io->io.private = mdev->m_fscache->cookie;
+ io->io.end_io = erofs_fscache_bio_endio;
+ refcount_set(&io->io.ref, 1);
+ return &io->bio;
+}
+
+void erofs_fscache_submit_bio(struct bio *bio)
+{
+ struct erofs_fscache_bio *io = container_of(bio,
+ struct erofs_fscache_bio, bio);
int ret;
+
+ iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt,
+ bio->bi_iter.bi_size);
+ ret = erofs_fscache_read_io_async(io->io.private,
+ bio->bi_iter.bi_sector << 9, &io->io);
+ erofs_fscache_io_put(&io->io);
+ if (!ret)
+ return;
+ bio->bi_status = errno_to_blk_status(ret);
+ bio->bi_end_io(bio);
+}
+
+static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
+{
struct erofs_fscache *ctx = folio->mapping->host->i_private;
- struct erofs_fscache_request *req;
+ int ret = -ENOMEM;
+ struct erofs_fscache_rq *req;
+ struct erofs_fscache_io *io;
req = erofs_fscache_req_alloc(folio->mapping,
folio_pos(folio), folio_size(folio));
- if (IS_ERR(req)) {
+ if (!req) {
folio_unlock(folio);
- return PTR_ERR(req);
+ return ret;
}
- ret = erofs_fscache_read_folios_async(ctx->cookie, req,
- folio_pos(folio), folio_size(folio));
+ io = erofs_fscache_req_io_alloc(req);
+ if (!io) {
+ req->error = ret;
+ goto out;
+ }
+ iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages,
+ folio_pos(folio), folio_size(folio));
+
+ ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io);
if (ret)
req->error = ret;
+ erofs_fscache_req_io_put(io);
+out:
erofs_fscache_req_put(req);
return ret;
}
-static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
+static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
{
- struct address_space *mapping = primary->mapping;
+ struct address_space *mapping = req->mapping;
struct inode *inode = mapping->host;
struct super_block *sb = inode->i_sb;
- struct erofs_fscache_request *req;
+ struct erofs_fscache_io *io;
struct erofs_map_blocks map;
struct erofs_map_dev mdev;
- struct iov_iter iter;
- loff_t pos = primary->start + primary->submitted;
+ loff_t pos = req->start + req->submitted;
size_t count;
int ret;
@@ -204,6 +272,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
if (map.m_flags & EROFS_MAP_META) {
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
+ struct iov_iter iter;
erofs_blk_t blknr;
size_t offset, size;
void *src;
@@ -224,15 +293,17 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
}
iov_iter_zero(PAGE_SIZE - size, &iter);
erofs_put_metabuf(&buf);
- primary->submitted += PAGE_SIZE;
+ req->submitted += PAGE_SIZE;
return 0;
}
- count = primary->len - primary->submitted;
+ count = req->len - req->submitted;
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
+ struct iov_iter iter;
+
iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
iov_iter_zero(count, &iter);
- primary->submitted += count;
+ req->submitted += count;
return 0;
}
@@ -247,18 +318,19 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
if (ret)
return ret;
- req = erofs_fscache_req_chain(primary, count);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ io = erofs_fscache_req_io_alloc(req);
+ if (!io)
+ return -ENOMEM;
+ iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
+ ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
+ mdev.m_pa + (pos - map.m_la), io);
+ erofs_fscache_req_io_put(io);
- ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
- req, mdev.m_pa + (pos - map.m_la), count);
- erofs_fscache_req_put(req);
- primary->submitted += count;
+ req->submitted += count;
return ret;
}
-static int erofs_fscache_data_read(struct erofs_fscache_request *req)
+static int erofs_fscache_data_read(struct erofs_fscache_rq *req)
{
int ret;
@@ -267,20 +339,19 @@ static int erofs_fscache_data_read(struct erofs_fscache_request *req)
if (ret)
req->error = ret;
} while (!ret && req->submitted < req->len);
-
return ret;
}
static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
{
- struct erofs_fscache_request *req;
+ struct erofs_fscache_rq *req;
int ret;
req = erofs_fscache_req_alloc(folio->mapping,
folio_pos(folio), folio_size(folio));
- if (IS_ERR(req)) {
+ if (!req) {
folio_unlock(folio);
- return PTR_ERR(req);
+ return -ENOMEM;
}
ret = erofs_fscache_data_read(req);
@@ -290,14 +361,14 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
static void erofs_fscache_readahead(struct readahead_control *rac)
{
- struct erofs_fscache_request *req;
+ struct erofs_fscache_rq *req;
if (!readahead_count(rac))
return;
req = erofs_fscache_req_alloc(rac->mapping,
readahead_pos(rac), readahead_length(rac));
- if (IS_ERR(req))
+ if (!req)
return;
/* The request completion will drop refs on the folios. */
@@ -381,11 +452,12 @@ static int erofs_fscache_init_domain(struct super_block *sb)
goto out;
if (!erofs_pseudo_mnt) {
- erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
- if (IS_ERR(erofs_pseudo_mnt)) {
- err = PTR_ERR(erofs_pseudo_mnt);
+ struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
+ if (IS_ERR(mnt)) {
+ err = PTR_ERR(mnt);
goto out;
}
+ erofs_pseudo_mnt = mnt;
}
domain->volume = sbi->volume;
@@ -459,7 +531,7 @@ static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
inode->i_blkbits = EROFS_SB(sb)->blkszbits;
inode->i_private = ctx;
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 3d616dea55dc..0eb0e6f933c3 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -60,7 +60,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
} else {
const unsigned int gotten = sb->s_blocksize - *ofs;
- copied = kmalloc(vi->inode_isize, GFP_NOFS);
+ copied = kmalloc(vi->inode_isize, GFP_KERNEL);
if (!copied) {
err = -ENOMEM;
goto err_out;
@@ -259,14 +259,12 @@ static int erofs_fill_inode(struct inode *inode)
if (erofs_inode_is_data_compressed(vi->datalayout)) {
#ifdef CONFIG_EROFS_FS_ZIP
- if (!erofs_is_fscache_mode(inode->i_sb)) {
- DO_ONCE_LITE_IF(inode->i_sb->s_blocksize != PAGE_SIZE,
- erofs_info, inode->i_sb,
- "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!");
- inode->i_mapping->a_ops = &z_erofs_aops;
- err = 0;
- goto out_unlock;
- }
+ DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT,
+ erofs_info, inode->i_sb,
+ "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!");
+ inode->i_mapping->a_ops = &z_erofs_aops;
+ err = 0;
+ goto out_unlock;
#endif
err = -EOPNOTSUPP;
goto out_unlock;
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index b0409badb017..39c67119f43b 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -49,7 +49,7 @@ typedef u32 erofs_blk_t;
struct erofs_device_info {
char *path;
struct erofs_fscache *fscache;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct dax_device *dax_dev;
u64 dax_part_off;
@@ -385,7 +385,6 @@ struct erofs_map_dev {
unsigned int m_deviceid;
};
-extern struct file_system_type erofs_fs_type;
extern const struct super_operations erofs_sops;
extern const struct address_space_operations erofs_raw_access_aops;
@@ -467,8 +466,8 @@ int __init erofs_init_shrinker(void);
void erofs_exit_shrinker(void);
int __init z_erofs_init_zip_subsystem(void);
void z_erofs_exit_zip_subsystem(void);
-int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
- struct erofs_workgroup *egrp);
+int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
+ struct erofs_workgroup *egrp);
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
int flags);
void *erofs_get_pcpubuf(unsigned int requiredpages);
@@ -513,6 +512,8 @@ void erofs_fscache_unregister_fs(struct super_block *sb);
struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
char *name, unsigned int flags);
void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache);
+struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev);
+void erofs_fscache_submit_bio(struct bio *bio);
#else
static inline int erofs_fscache_register_fs(struct super_block *sb)
{
@@ -530,6 +531,8 @@ struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache)
{
}
+static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) { return NULL; }
+static inline void erofs_fscache_submit_bio(struct bio *bio) {}
#endif
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index d4f631d39f0f..f0110a78acb2 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -130,24 +130,24 @@ static void *erofs_find_target_block(struct erofs_buf *target,
/* string comparison without already matched prefix */
diff = erofs_dirnamecmp(name, &dname, &matched);
- if (!diff) {
- *_ndirents = 0;
- goto out;
- } else if (diff > 0) {
- head = mid + 1;
- startprfx = matched;
-
- if (!IS_ERR(candidate))
- erofs_put_metabuf(target);
- *target = buf;
- candidate = de;
- *_ndirents = ndirents;
- } else {
+ if (diff < 0) {
erofs_put_metabuf(&buf);
-
back = mid - 1;
endprfx = matched;
+ continue;
+ }
+
+ if (!IS_ERR(candidate))
+ erofs_put_metabuf(target);
+ *target = buf;
+ if (!diff) {
+ *_ndirents = 0;
+ return de;
}
+ head = mid + 1;
+ startprfx = matched;
+ candidate = de;
+ *_ndirents = ndirents;
continue;
}
out: /* free if the candidate is valid */
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 5f60f163bd56..69308fd73e4a 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -177,7 +177,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_fscache *fscache;
struct erofs_deviceslot *dis;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
void *ptr;
ptr = erofs_read_metabuf(buf, sb, erofs_blknr(sb, *pos), EROFS_KMAP);
@@ -201,12 +201,12 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
return PTR_ERR(fscache);
dif->fscache = fscache;
} else if (!sbi->devs->flatdev) {
- bdev_handle = bdev_open_by_path(dif->path, BLK_OPEN_READ,
+ bdev_file = bdev_file_open_by_path(dif->path, BLK_OPEN_READ,
sb->s_type, NULL);
- if (IS_ERR(bdev_handle))
- return PTR_ERR(bdev_handle);
- dif->bdev_handle = bdev_handle;
- dif->dax_dev = fs_dax_get_by_bdev(bdev_handle->bdev,
+ if (IS_ERR(bdev_file))
+ return PTR_ERR(bdev_file);
+ dif->bdev_file = bdev_file;
+ dif->dax_dev = fs_dax_get_by_bdev(file_bdev(bdev_file),
&dif->dax_part_off, NULL, NULL);
}
@@ -579,13 +579,6 @@ static const struct export_operations erofs_export_ops = {
.get_parent = erofs_get_parent,
};
-static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc)
-{
- static const struct tree_descr empty_descr = {""};
-
- return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr);
-}
-
static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
@@ -712,11 +705,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return 0;
}
-static int erofs_fc_anon_get_tree(struct fs_context *fc)
-{
- return get_tree_nodev(fc, erofs_fc_fill_pseudo_super);
-}
-
static int erofs_fc_get_tree(struct fs_context *fc)
{
struct erofs_fs_context *ctx = fc->fs_private;
@@ -754,8 +742,8 @@ static int erofs_release_device_info(int id, void *ptr, void *data)
struct erofs_device_info *dif = ptr;
fs_put_dax(dif->dax_dev, NULL);
- if (dif->bdev_handle)
- bdev_release(dif->bdev_handle);
+ if (dif->bdev_file)
+ fput(dif->bdev_file);
erofs_fscache_unregister_cookie(dif->fscache);
dif->fscache = NULL;
kfree(dif->path);
@@ -789,20 +777,10 @@ static const struct fs_context_operations erofs_context_ops = {
.free = erofs_fc_free,
};
-static const struct fs_context_operations erofs_anon_context_ops = {
- .get_tree = erofs_fc_anon_get_tree,
-};
-
static int erofs_init_fs_context(struct fs_context *fc)
{
struct erofs_fs_context *ctx;
- /* pseudo mount for anon inodes */
- if (fc->sb_flags & SB_KERNMOUNT) {
- fc->ops = &erofs_anon_context_ops;
- return 0;
- }
-
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -824,12 +802,6 @@ static void erofs_kill_sb(struct super_block *sb)
{
struct erofs_sb_info *sbi;
- /* pseudo mount for anon inodes */
- if (sb->s_flags & SB_KERNMOUNT) {
- kill_anon_super(sb);
- return;
- }
-
if (erofs_is_fscache_mode(sb))
kill_anon_super(sb);
else
@@ -868,7 +840,7 @@ static void erofs_put_super(struct super_block *sb)
erofs_fscache_unregister_fs(sb);
}
-struct file_system_type erofs_fs_type = {
+static struct file_system_type erofs_fs_type = {
.owner = THIS_MODULE,
.name = "erofs",
.init_fs_context = erofs_init_fs_context,
@@ -885,7 +857,7 @@ static int __init erofs_module_init(void)
erofs_inode_cachep = kmem_cache_create("erofs_inode",
sizeof(struct erofs_inode), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
erofs_inode_init_once);
if (!erofs_inode_cachep)
return -ENOMEM;
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index 5dea308764b4..518bdd69c823 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -81,7 +81,7 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
repeat:
xa_lock(&sbi->managed_pslots);
pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
- NULL, grp, GFP_NOFS);
+ NULL, grp, GFP_KERNEL);
if (pre) {
if (xa_is_err(pre)) {
pre = ERR_PTR(xa_err(pre));
@@ -129,7 +129,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
* the XArray. Otherwise some cached pages could be still attached to
* the orphan old workgroup when the new one is available in the tree.
*/
- if (erofs_try_to_free_all_cached_pages(sbi, grp))
+ if (erofs_try_to_free_all_cached_folios(sbi, grp))
goto out;
/*
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 692c0c39be63..3216b920d369 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -19,7 +19,10 @@
typedef void *z_erofs_next_pcluster_t;
struct z_erofs_bvec {
- struct page *page;
+ union {
+ struct page *page;
+ struct folio *folio;
+ };
int offset;
unsigned int end;
};
@@ -82,6 +85,9 @@ struct z_erofs_pcluster {
/* L: indicate several pageofs_outs or not */
bool multibases;
+ /* L: whether extra buffer allocations are best-effort */
+ bool besteffort;
+
/* A: compressed bvecs (can be cached or inplaced pages) */
struct z_erofs_bvec compressed_bvecs[];
};
@@ -113,47 +119,46 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT;
}
+#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
+static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo)
+{
+ return fo->mapping == MNGD_MAPPING(sbi);
+}
+
/*
- * bit 30: I/O error occurred on this page
- * bit 0 - 29: remaining parts to complete this page
+ * bit 30: I/O error occurred on this folio
+ * bit 0 - 29: remaining parts to complete this folio
*/
-#define Z_EROFS_PAGE_EIO (1 << 30)
+#define Z_EROFS_FOLIO_EIO (1 << 30)
-static inline void z_erofs_onlinepage_init(struct page *page)
+static void z_erofs_onlinefolio_init(struct folio *folio)
{
union {
atomic_t o;
- unsigned long v;
+ void *v;
} u = { .o = ATOMIC_INIT(1) };
- set_page_private(page, u.v);
- smp_wmb();
- SetPagePrivate(page);
+ folio->private = u.v; /* valid only if file-backed folio is locked */
}
-static inline void z_erofs_onlinepage_split(struct page *page)
+static void z_erofs_onlinefolio_split(struct folio *folio)
{
- atomic_inc((atomic_t *)&page->private);
+ atomic_inc((atomic_t *)&folio->private);
}
-static void z_erofs_onlinepage_endio(struct page *page, int err)
+static void z_erofs_onlinefolio_end(struct folio *folio, int err)
{
int orig, v;
- DBG_BUGON(!PagePrivate(page));
-
do {
- orig = atomic_read((atomic_t *)&page->private);
- v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0);
- } while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig);
-
- if (!(v & ~Z_EROFS_PAGE_EIO)) {
- set_page_private(page, 0);
- ClearPagePrivate(page);
- if (!(v & Z_EROFS_PAGE_EIO))
- SetPageUptodate(page);
- unlock_page(page);
- }
+ orig = atomic_read((atomic_t *)&folio->private);
+ v = (orig - 1) | (err ? Z_EROFS_FOLIO_EIO : 0);
+ } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
+
+ if (v & ~Z_EROFS_FOLIO_EIO)
+ return;
+ folio->private = 0;
+ folio_end_read(folio, !(v & Z_EROFS_FOLIO_EIO));
}
#define Z_EROFS_ONSTACK_PAGES 32
@@ -230,7 +235,7 @@ static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter,
struct page *nextpage = *candidate_bvpage;
if (!nextpage) {
- nextpage = erofs_allocpage(pagepool, GFP_NOFS);
+ nextpage = erofs_allocpage(pagepool, GFP_KERNEL);
if (!nextpage)
return -ENOMEM;
set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE);
@@ -302,7 +307,7 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size)
if (nrpages > pcs->maxpages)
continue;
- pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS);
+ pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL);
if (!pcl)
return ERR_PTR(-ENOMEM);
pcl->pclustersize = size;
@@ -563,25 +568,19 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
unsigned int i;
- if (i_blocksize(fe->inode) != PAGE_SIZE)
- return;
- if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
+ if (i_blocksize(fe->inode) != PAGE_SIZE ||
+ fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
return;
for (i = 0; i < pclusterpages; ++i) {
struct page *page, *newpage;
- void *t; /* mark pages just found for debugging */
- /* the compressed page was loaded before */
+ /* Inaccurate check w/o locking to avoid unneeded lookups */
if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
page = find_get_page(mc, pcl->obj.index + i);
-
- if (page) {
- t = (void *)((unsigned long)page | 1);
- newpage = NULL;
- } else {
+ if (!page) {
/* I/O is needed, no possible to decompress directly */
standalone = false;
if (!shouldalloc)
@@ -595,11 +594,14 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
if (!newpage)
continue;
set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
- t = (void *)((unsigned long)newpage | 1);
}
-
- if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t))
+ spin_lock(&pcl->obj.lockref.lock);
+ if (!pcl->compressed_bvecs[i].page) {
+ pcl->compressed_bvecs[i].page = page ? page : newpage;
+ spin_unlock(&pcl->obj.lockref.lock);
continue;
+ }
+ spin_unlock(&pcl->obj.lockref.lock);
if (page)
put_page(page);
@@ -615,9 +617,9 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}
-/* called by erofs_shrinker to get rid of all compressed_pages */
-int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
- struct erofs_workgroup *grp)
+/* called by erofs_shrinker to get rid of all cached compressed bvecs */
+int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
+ struct erofs_workgroup *grp)
{
struct z_erofs_pcluster *const pcl =
container_of(grp, struct z_erofs_pcluster, obj);
@@ -625,27 +627,22 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
int i;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
- /*
- * refcount of workgroup is now freezed as 0,
- * therefore no need to worry about available decompression users.
- */
+ /* There is no actice user since the pcluster is now freezed */
for (i = 0; i < pclusterpages; ++i) {
- struct page *page = pcl->compressed_bvecs[i].page;
+ struct folio *folio = pcl->compressed_bvecs[i].folio;
- if (!page)
+ if (!folio)
continue;
- /* block other users from reclaiming or migrating the page */
- if (!trylock_page(page))
+ /* Avoid reclaiming or migrating this folio */
+ if (!folio_trylock(folio))
return -EBUSY;
- if (!erofs_page_is_managed(sbi, page))
+ if (!erofs_folio_is_managed(sbi, folio))
continue;
-
- /* barrier is implied in the following 'unlock_page' */
- WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
- detach_page_private(page);
- unlock_page(page);
+ pcl->compressed_bvecs[i].folio = NULL;
+ folio_detach_private(folio);
+ folio_unlock(folio);
}
return 0;
}
@@ -662,20 +659,17 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
ret = false;
spin_lock(&pcl->obj.lockref.lock);
- if (pcl->obj.lockref.count > 0)
- goto out;
-
- DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
- for (i = 0; i < pclusterpages; ++i) {
- if (pcl->compressed_bvecs[i].page == &folio->page) {
- WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
- ret = true;
- break;
+ if (pcl->obj.lockref.count <= 0) {
+ DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
+ for (i = 0; i < pclusterpages; ++i) {
+ if (pcl->compressed_bvecs[i].folio == folio) {
+ pcl->compressed_bvecs[i].folio = NULL;
+ folio_detach_private(folio);
+ ret = true;
+ break;
+ }
}
}
- if (ret)
- folio_detach_private(folio);
-out:
spin_unlock(&pcl->obj.lockref.lock);
return ret;
}
@@ -694,7 +688,7 @@ static void z_erofs_cache_invalidate_folio(struct folio *folio,
DBG_BUGON(stop > folio_size(folio) || stop < length);
if (offset == 0 && stop == folio_size(folio))
- while (!z_erofs_cache_release_folio(folio, GFP_NOFS))
+ while (!z_erofs_cache_release_folio(folio, 0))
cond_resched();
}
@@ -713,36 +707,30 @@ int erofs_init_managed_cache(struct super_block *sb)
set_nlink(inode, 1);
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &z_erofs_cache_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
EROFS_SB(sb)->managed_cache = inode;
return 0;
}
-static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
- struct z_erofs_bvec *bvec)
-{
- struct z_erofs_pcluster *const pcl = fe->pcl;
-
- while (fe->icur > 0) {
- if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page,
- NULL, bvec->page)) {
- pcl->compressed_bvecs[fe->icur] = *bvec;
- return true;
- }
- }
- return false;
-}
-
/* callers must be with pcluster lock held */
static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
struct z_erofs_bvec *bvec, bool exclusive)
{
+ struct z_erofs_pcluster *pcl = fe->pcl;
int ret;
if (exclusive) {
/* give priority for inplaceio to use file pages first */
- if (z_erofs_try_inplace_io(fe, bvec))
+ spin_lock(&pcl->obj.lockref.lock);
+ while (fe->icur > 0) {
+ if (pcl->compressed_bvecs[--fe->icur].page)
+ continue;
+ pcl->compressed_bvecs[fe->icur] = *bvec;
+ spin_unlock(&pcl->obj.lockref.lock);
return 0;
+ }
+ spin_unlock(&pcl->obj.lockref.lock);
+
/* otherwise, check if it can be used as a bvpage */
if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
!fe->candidate_bvpage)
@@ -963,20 +951,20 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
return 0;
}
-static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
- struct page *page)
+static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe,
+ struct folio *folio, bool ra)
{
struct inode *const inode = fe->inode;
struct erofs_map_blocks *const map = &fe->map;
- const loff_t offset = page_offset(page);
- const unsigned int bs = i_blocksize(inode);
+ const loff_t offset = folio_pos(folio);
+ const unsigned int bs = i_blocksize(inode), fs = folio_size(folio);
bool tight = true, exclusive;
unsigned int cur, end, len, split;
int err = 0;
- z_erofs_onlinepage_init(page);
+ z_erofs_onlinefolio_init(folio);
split = 0;
- end = PAGE_SIZE;
+ end = fs;
repeat:
if (offset + end - 1 < map->m_la ||
offset + end - 1 >= map->m_la + map->m_llen) {
@@ -993,7 +981,7 @@ repeat:
++split;
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
- zero_user_segment(page, cur, end);
+ folio_zero_segment(folio, cur, end);
tight = false;
goto next_part;
}
@@ -1002,8 +990,8 @@ repeat:
erofs_off_t fpos = offset + cur - map->m_la;
len = min_t(unsigned int, map->m_llen - fpos, end - cur);
- err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len,
- EROFS_I(inode)->z_fragmentoff + fpos);
+ err = z_erofs_read_fragment(inode->i_sb, &folio->page, cur,
+ cur + len, EROFS_I(inode)->z_fragmentoff + fpos);
if (err)
goto out;
tight = false;
@@ -1014,28 +1002,29 @@ repeat:
err = z_erofs_pcluster_begin(fe);
if (err)
goto out;
+ fe->pcl->besteffort |= !ra;
}
/*
- * Ensure the current partial page belongs to this submit chain rather
+ * Ensure the current partial folio belongs to this submit chain rather
* than other concurrent submit chains or the noio(bypass) chain since
- * those chains are handled asynchronously thus the page cannot be used
+ * those chains are handled asynchronously thus the folio cannot be used
* for inplace I/O or bvpage (should be processed in a strict order.)
*/
tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
- exclusive = (!cur && ((split <= 1) || (tight && bs == PAGE_SIZE)));
+ exclusive = (!cur && ((split <= 1) || (tight && bs == fs)));
if (cur)
tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) {
- .page = page,
+ .page = &folio->page,
.offset = offset - map->m_la,
.end = end,
}), exclusive);
if (err)
goto out;
- z_erofs_onlinepage_split(page);
+ z_erofs_onlinefolio_split(folio);
if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
fe->pcl->multibases = true;
if (fe->pcl->length < offset + end - map->m_la) {
@@ -1056,7 +1045,7 @@ next_part:
goto repeat;
out:
- z_erofs_onlinepage_endio(page, err);
+ z_erofs_onlinefolio_end(folio, err);
return err;
}
@@ -1159,7 +1148,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
cur += len;
}
kunmap_local(dst);
- z_erofs_onlinepage_endio(bvi->bvec.page, err);
+ z_erofs_onlinefolio_end(page_folio(bvi->bvec.page), err);
list_del(p);
kfree(bvi);
}
@@ -1210,7 +1199,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
be->compressed_pages[i] = page;
if (z_erofs_is_inline_pcluster(pcl) ||
- erofs_page_is_managed(EROFS_SB(be->sb), page)) {
+ erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) {
if (!PageUptodate(page))
err = -EIO;
continue;
@@ -1280,6 +1269,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
.inplace_io = overlapped,
.partial_decoding = pcl->partial,
.fillgaps = pcl->multibases,
+ .gfp = pcl->besteffort ?
+ GFP_KERNEL | __GFP_NOFAIL :
+ GFP_NOWAIT | __GFP_NORETRY
}, be->pagepool);
/* must handle all compressed pages before actual file pages */
@@ -1292,7 +1284,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
/* consider shortlived pages added when decompressing */
page = be->compressed_pages[i];
- if (!page || erofs_page_is_managed(sbi, page))
+ if (!page ||
+ erofs_folio_is_managed(sbi, page_folio(page)))
continue;
(void)z_erofs_put_shortlivedpage(be->pagepool, page);
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
@@ -1313,7 +1306,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
/* recycle all individual short-lived pages */
if (z_erofs_put_shortlivedpage(be->pagepool, page))
continue;
- z_erofs_onlinepage_endio(page, err);
+ z_erofs_onlinefolio_end(page_folio(page), err);
}
if (be->decompressed_pages != be->onstack_pages)
@@ -1322,6 +1315,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
pcl->length = 0;
pcl->partial = true;
pcl->multibases = false;
+ pcl->besteffort = false;
pcl->bvset.nextpage = NULL;
pcl->vcnt = 0;
@@ -1423,69 +1417,61 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
{
gfp_t gfp = mapping_gfp_mask(mc);
bool tocache = false;
- struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr;
+ struct z_erofs_bvec zbv;
struct address_space *mapping;
- struct page *page, *oldpage;
- int justfound, bs = i_blocksize(f->inode);
+ struct page *page;
+ int bs = i_blocksize(f->inode);
- /* Except for inplace pages, the entire page can be used for I/Os */
+ /* Except for inplace folios, the entire folio can be used for I/Os */
bvec->bv_offset = 0;
bvec->bv_len = PAGE_SIZE;
repeat:
- oldpage = READ_ONCE(zbv->page);
- if (!oldpage)
- goto out_allocpage;
-
- justfound = (unsigned long)oldpage & 1UL;
- page = (struct page *)((unsigned long)oldpage & ~1UL);
- bvec->bv_page = page;
+ spin_lock(&pcl->obj.lockref.lock);
+ zbv = pcl->compressed_bvecs[nr];
+ spin_unlock(&pcl->obj.lockref.lock);
+ if (!zbv.folio)
+ goto out_allocfolio;
- DBG_BUGON(z_erofs_is_shortlived_page(page));
+ bvec->bv_page = &zbv.folio->page;
+ DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page));
/*
- * Handle preallocated cached pages. We tried to allocate such pages
+ * Handle preallocated cached folios. We tried to allocate such folios
* without triggering direct reclaim. If allocation failed, inplace
- * file-backed pages will be used instead.
+ * file-backed folios will be used instead.
*/
- if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
- set_page_private(page, 0);
- WRITE_ONCE(zbv->page, page);
+ if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
+ zbv.folio->private = 0;
tocache = true;
goto out_tocache;
}
- mapping = READ_ONCE(page->mapping);
+ mapping = READ_ONCE(zbv.folio->mapping);
/*
- * File-backed pages for inplace I/Os are all locked steady,
+ * File-backed folios for inplace I/Os are all locked steady,
* therefore it is impossible for `mapping` to be NULL.
*/
if (mapping && mapping != mc) {
- if (zbv->offset < 0)
- bvec->bv_offset = round_up(-zbv->offset, bs);
- bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset;
+ if (zbv.offset < 0)
+ bvec->bv_offset = round_up(-zbv.offset, bs);
+ bvec->bv_len = round_up(zbv.end, bs) - bvec->bv_offset;
return;
}
- lock_page(page);
- /* only true if page reclaim goes wrong, should never happen */
- DBG_BUGON(justfound && PagePrivate(page));
-
- /* the cached page is still in managed cache */
- if (page->mapping == mc) {
- WRITE_ONCE(zbv->page, page);
+ folio_lock(zbv.folio);
+ if (zbv.folio->mapping == mc) {
/*
- * The cached page is still available but without a valid
- * `->private` pcluster hint. Let's reconnect them.
+ * The cached folio is still in managed cache but without
+ * a valid `->private` pcluster hint. Let's reconnect them.
*/
- if (!PagePrivate(page)) {
- DBG_BUGON(!justfound);
- /* compressed_bvecs[] already takes a ref */
- attach_page_private(page, pcl);
- put_page(page);
+ if (!folio_test_private(zbv.folio)) {
+ folio_attach_private(zbv.folio, pcl);
+ /* compressed_bvecs[] already takes a ref before */
+ folio_put(zbv.folio);
}
/* no need to submit if it is already up-to-date */
- if (PageUptodate(page)) {
- unlock_page(page);
+ if (folio_test_uptodate(zbv.folio)) {
+ folio_unlock(zbv.folio);
bvec->bv_page = NULL;
}
return;
@@ -1495,30 +1481,32 @@ repeat:
* It has been truncated, so it's unsafe to reuse this one. Let's
* allocate a new page for compressed data.
*/
- DBG_BUGON(page->mapping);
- DBG_BUGON(!justfound);
-
+ DBG_BUGON(zbv.folio->mapping);
tocache = true;
- unlock_page(page);
- put_page(page);
-out_allocpage:
+ folio_unlock(zbv.folio);
+ folio_put(zbv.folio);
+out_allocfolio:
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
- if (oldpage != cmpxchg(&zbv->page, oldpage, page)) {
+ spin_lock(&pcl->obj.lockref.lock);
+ if (pcl->compressed_bvecs[nr].folio) {
erofs_pagepool_add(&f->pagepool, page);
+ spin_unlock(&pcl->obj.lockref.lock);
cond_resched();
goto repeat;
}
+ pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page);
+ spin_unlock(&pcl->obj.lockref.lock);
bvec->bv_page = page;
out_tocache:
if (!tocache || bs != PAGE_SIZE ||
- add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) {
- /* turn into a temporary shortlived page (1 ref) */
- set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
+ filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) {
+ /* turn into a temporary shortlived folio (1 ref) */
+ zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
return;
}
- attach_page_private(page, pcl);
+ folio_attach_private(zbv.folio, pcl);
/* drop a refcount added by allocpage (then 2 refs in total here) */
- put_page(page);
+ folio_put(zbv.folio);
}
static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,
@@ -1573,28 +1561,29 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
qtail[JQ_BYPASS] = &pcl->next;
}
-static void z_erofs_submissionqueue_endio(struct bio *bio)
+static void z_erofs_endio(struct bio *bio)
{
struct z_erofs_decompressqueue *q = bio->bi_private;
blk_status_t err = bio->bi_status;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct page *page = bvec->bv_page;
+ bio_for_each_folio_all(fi, bio) {
+ struct folio *folio = fi.folio;
- DBG_BUGON(PageUptodate(page));
- DBG_BUGON(z_erofs_page_is_invalidated(page));
- if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
- if (!err)
- SetPageUptodate(page);
- unlock_page(page);
- }
+ DBG_BUGON(folio_test_uptodate(folio));
+ DBG_BUGON(z_erofs_page_is_invalidated(&folio->page));
+ if (!erofs_folio_is_managed(EROFS_SB(q->sb), folio))
+ continue;
+
+ if (!err)
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
}
if (err)
q->eio = true;
z_erofs_decompress_kickoff(q, -1);
- bio_put(bio);
+ if (bio->bi_bdev)
+ bio_put(bio);
}
static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
@@ -1608,7 +1597,6 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
z_erofs_next_pcluster_t owned_head = f->owned_head;
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
erofs_off_t last_pa;
- struct block_device *last_bdev;
unsigned int nr_bios = 0;
struct bio *bio = NULL;
unsigned long pflags;
@@ -1655,9 +1643,13 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
continue;
if (bio && (cur != last_pa ||
- last_bdev != mdev.m_bdev)) {
-submit_bio_retry:
- submit_bio(bio);
+ bio->bi_bdev != mdev.m_bdev)) {
+io_retry:
+ if (!erofs_is_fscache_mode(sb))
+ submit_bio(bio);
+ else
+ erofs_fscache_submit_bio(bio);
+
if (memstall) {
psi_memstall_leave(&pflags);
memstall = 0;
@@ -1672,22 +1664,24 @@ submit_bio_retry:
}
if (!bio) {
- bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
- REQ_OP_READ, GFP_NOIO);
- bio->bi_end_io = z_erofs_submissionqueue_endio;
+ bio = erofs_is_fscache_mode(sb) ?
+ erofs_fscache_bio_alloc(&mdev) :
+ bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
+ REQ_OP_READ, GFP_NOIO);
+ bio->bi_end_io = z_erofs_endio;
bio->bi_iter.bi_sector = cur >> 9;
bio->bi_private = q[JQ_SUBMIT];
if (readahead)
bio->bi_opf |= REQ_RAHEAD;
++nr_bios;
- last_bdev = mdev.m_bdev;
}
if (cur + bvec.bv_len > end)
bvec.bv_len = end - cur;
+ DBG_BUGON(bvec.bv_len < sb->s_blocksize);
if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
bvec.bv_offset))
- goto submit_bio_retry;
+ goto io_retry;
last_pa = cur + bvec.bv_len;
bypass = false;
@@ -1700,7 +1694,10 @@ submit_bio_retry:
} while (owned_head != Z_EROFS_PCLUSTER_TAIL);
if (bio) {
- submit_bio(bio);
+ if (!erofs_is_fscache_mode(sb))
+ submit_bio(bio);
+ else
+ erofs_fscache_submit_bio(bio);
if (memstall)
psi_memstall_leave(&pflags);
}
@@ -1785,7 +1782,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
if (PageUptodate(page))
unlock_page(page);
else
- (void)z_erofs_do_read_page(f, page);
+ z_erofs_scan_folio(f, page_folio(page), !!rac);
put_page(page);
}
@@ -1806,7 +1803,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
z_erofs_pcluster_readmore(&f, NULL, true);
- err = z_erofs_do_read_page(&f, &folio->page);
+ err = z_erofs_scan_folio(&f, folio, false);
z_erofs_pcluster_readmore(&f, NULL, false);
z_erofs_pcluster_end(&f);
@@ -1847,7 +1844,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
folio = head;
head = folio_get_private(folio);
- err = z_erofs_do_read_page(&f, &folio->page);
+ err = z_erofs_scan_folio(&f, folio, true);
if (err && err != -EINTR)
erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
folio->index, EROFS_I(inode)->nid);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index ad8186d47ba7..9afdb722fa92 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -251,7 +251,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
ssize_t res;
__u64 ucnt;
- if (count < sizeof(ucnt))
+ if (count != sizeof(ucnt))
return -EINVAL;
if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
return -EFAULT;
@@ -283,13 +283,18 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
{
struct eventfd_ctx *ctx = f->private_data;
+ __u64 cnt;
spin_lock_irq(&ctx->wqh.lock);
- seq_printf(m, "eventfd-count: %16llx\n",
- (unsigned long long)ctx->count);
+ cnt = ctx->count;
spin_unlock_irq(&ctx->wqh.lock);
- seq_printf(m, "eventfd-id: %d\n", ctx->id);
- seq_printf(m, "eventfd-semaphore: %d\n",
+
+ seq_printf(m,
+ "eventfd-count: %16llx\n"
+ "eventfd-id: %d\n"
+ "eventfd-semaphore: %d\n",
+ cnt,
+ ctx->id,
!!(ctx->flags & EFD_SEMAPHORE));
}
#endif
@@ -383,6 +388,7 @@ static int do_eventfd(unsigned int count, int flags)
/* Check the EFD_* constants for consistency. */
BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
+ BUILD_BUG_ON(EFD_SEMAPHORE != (1 << 0));
if (flags & ~EFD_FLAGS_SET)
return -EINVAL;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 3534d36a1474..882b89edc52a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -37,6 +37,7 @@
#include <linux/seq_file.h>
#include <linux/compat.h>
#include <linux/rculist.h>
+#include <linux/capability.h>
#include <net/busy_poll.h>
/*
@@ -206,7 +207,7 @@ struct eventpoll {
*/
struct epitem *ovflist;
- /* wakeup_source used when ep_scan_ready_list is running */
+ /* wakeup_source used when ep_send_events or __ep_eventpoll_poll is running */
struct wakeup_source *ws;
/* The user that created the eventpoll descriptor */
@@ -227,6 +228,11 @@ struct eventpoll {
#ifdef CONFIG_NET_RX_BUSY_POLL
/* used to track busy poll napi_id */
unsigned int napi_id;
+ /* busy poll timeout */
+ u32 busy_poll_usecs;
+ /* busy poll packet budget */
+ u16 busy_poll_budget;
+ bool prefer_busy_poll;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -387,11 +393,41 @@ static inline int ep_events_available(struct eventpoll *ep)
}
#ifdef CONFIG_NET_RX_BUSY_POLL
+/**
+ * busy_loop_ep_timeout - check if busy poll has timed out. The timeout value
+ * from the epoll instance ep is preferred, but if it is not set fallback to
+ * the system-wide global via busy_loop_timeout.
+ *
+ * @start_time: The start time used to compute the remaining time until timeout.
+ * @ep: Pointer to the eventpoll context.
+ *
+ * Return: true if the timeout has expired, false otherwise.
+ */
+static bool busy_loop_ep_timeout(unsigned long start_time,
+ struct eventpoll *ep)
+{
+ unsigned long bp_usec = READ_ONCE(ep->busy_poll_usecs);
+
+ if (bp_usec) {
+ unsigned long end_time = start_time + bp_usec;
+ unsigned long now = busy_loop_current_time();
+
+ return time_after(now, end_time);
+ } else {
+ return busy_loop_timeout(start_time);
+ }
+}
+
+static bool ep_busy_loop_on(struct eventpoll *ep)
+{
+ return !!ep->busy_poll_usecs || net_busy_loop_on();
+}
+
static bool ep_busy_loop_end(void *p, unsigned long start_time)
{
struct eventpoll *ep = p;
- return ep_events_available(ep) || busy_loop_timeout(start_time);
+ return ep_events_available(ep) || busy_loop_ep_timeout(start_time, ep);
}
/*
@@ -403,10 +439,15 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time)
static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
{
unsigned int napi_id = READ_ONCE(ep->napi_id);
+ u16 budget = READ_ONCE(ep->busy_poll_budget);
+ bool prefer_busy_poll = READ_ONCE(ep->prefer_busy_poll);
- if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) {
- napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false,
- BUSY_POLL_BUDGET);
+ if (!budget)
+ budget = BUSY_POLL_BUDGET;
+
+ if (napi_id >= MIN_NAPI_ID && ep_busy_loop_on(ep)) {
+ napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end,
+ ep, prefer_busy_poll, budget);
if (ep_events_available(ep))
return true;
/*
@@ -425,12 +466,12 @@ static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
*/
static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
{
- struct eventpoll *ep;
+ struct eventpoll *ep = epi->ep;
unsigned int napi_id;
struct socket *sock;
struct sock *sk;
- if (!net_busy_loop_on())
+ if (!ep_busy_loop_on(ep))
return;
sock = sock_from_file(epi->ffd.file);
@@ -442,7 +483,6 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
return;
napi_id = READ_ONCE(sk->sk_napi_id);
- ep = epi->ep;
/* Non-NAPI IDs can be rejected
* or
@@ -455,6 +495,49 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
ep->napi_id = napi_id;
}
+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct eventpoll *ep = file->private_data;
+ void __user *uarg = (void __user *)arg;
+ struct epoll_params epoll_params;
+
+ switch (cmd) {
+ case EPIOCSPARAMS:
+ if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
+ return -EFAULT;
+
+ /* pad byte must be zero */
+ if (epoll_params.__pad)
+ return -EINVAL;
+
+ if (epoll_params.busy_poll_usecs > S32_MAX)
+ return -EINVAL;
+
+ if (epoll_params.prefer_busy_poll > 1)
+ return -EINVAL;
+
+ if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
+ !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ WRITE_ONCE(ep->busy_poll_usecs, epoll_params.busy_poll_usecs);
+ WRITE_ONCE(ep->busy_poll_budget, epoll_params.busy_poll_budget);
+ WRITE_ONCE(ep->prefer_busy_poll, epoll_params.prefer_busy_poll);
+ return 0;
+ case EPIOCGPARAMS:
+ memset(&epoll_params, 0, sizeof(epoll_params));
+ epoll_params.busy_poll_usecs = READ_ONCE(ep->busy_poll_usecs);
+ epoll_params.busy_poll_budget = READ_ONCE(ep->busy_poll_budget);
+ epoll_params.prefer_busy_poll = READ_ONCE(ep->prefer_busy_poll);
+ if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
+ return -EFAULT;
+ return 0;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
#else
static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
@@ -466,6 +549,12 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
{
}
+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return -EOPNOTSUPP;
+}
+
#endif /* CONFIG_NET_RX_BUSY_POLL */
/*
@@ -678,12 +767,6 @@ static void ep_done_scan(struct eventpoll *ep,
write_unlock_irq(&ep->lock);
}
-static void epi_rcu_free(struct rcu_head *head)
-{
- struct epitem *epi = container_of(head, struct epitem, rcu);
- kmem_cache_free(epi_cache, epi);
-}
-
static void ep_get(struct eventpoll *ep)
{
refcount_inc(&ep->refcount);
@@ -767,7 +850,7 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
* ep->mtx. The rcu read side, reverse_path_check_proc(), does not make
* use of the rbn field.
*/
- call_rcu(&epi->rcu, epi_rcu_free);
+ kfree_rcu(epi, rcu);
percpu_counter_dec(&ep->user->epoll_watches);
return ep_refcount_dec_and_test(ep);
@@ -825,6 +908,27 @@ static void ep_clear_and_put(struct eventpoll *ep)
ep_free(ep);
}
+static long ep_eventpoll_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret;
+
+ if (!is_file_epoll(file))
+ return -EINVAL;
+
+ switch (cmd) {
+ case EPIOCSPARAMS:
+ case EPIOCGPARAMS:
+ ret = ep_eventpoll_bp_ioctl(file, cmd, arg);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
static int ep_eventpoll_release(struct inode *inode, struct file *file)
{
struct eventpoll *ep = file->private_data;
@@ -931,6 +1035,8 @@ static const struct file_operations eventpoll_fops = {
.release = ep_eventpoll_release,
.poll = ep_eventpoll_poll,
.llseek = noop_llseek,
+ .unlocked_ioctl = ep_eventpoll_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
/*
@@ -1153,7 +1259,7 @@ static inline bool chain_epi_lockless(struct epitem *epi)
* This callback takes a read lock in order not to contend with concurrent
* events from another file descriptor, thus all modifications to ->rdllist
* or ->ovflist are lockless. Read lock is paired with the write lock from
- * ep_scan_ready_list(), which stops all list modifications and guarantees
+ * ep_start/done_scan(), which stops all list modifications and guarantees
* that lists state is seen correctly.
*
* Another thing worth to mention is that ep_poll_callback() can be called
@@ -1751,7 +1857,7 @@ static int ep_send_events(struct eventpoll *ep,
* availability. At this point, no one can insert
* into ep->rdllist besides us. The epoll_ctl()
* callers are locked out by
- * ep_scan_ready_list() holding "mtx" and the
+ * ep_send_events() holding "mtx" and the
* poll callback will queue them in ep->ovflist.
*/
list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -1904,7 +2010,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
__set_current_state(TASK_INTERRUPTIBLE);
/*
- * Do the final check under the lock. ep_scan_ready_list()
+ * Do the final check under the lock. ep_start/done_scan()
* plays with two lists (->rdllist and ->ovflist) and there
* is always a race when both lists are empty for short
* period of time although events are pending, so lock is
@@ -2058,6 +2164,11 @@ static int do_epoll_create(int flags)
error = PTR_ERR(file);
goto out_free_fd;
}
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ ep->busy_poll_usecs = 0;
+ ep->busy_poll_budget = 0;
+ ep->prefer_busy_poll = false;
+#endif
ep->file = file;
fd_install(fd, file);
return fd;
diff --git a/fs/exec.c b/fs/exec.c
index 8cdd5b2dd09c..ff6f26671cfc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -128,7 +128,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
struct filename *tmp = getname(library);
int error = PTR_ERR(tmp);
static const struct open_flags uselib_flags = {
- .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .open_flag = O_LARGEFILE | O_RDONLY,
.acc_mode = MAY_READ | MAY_EXEC,
.intent = LOOKUP_OPEN,
.lookup_flags = LOOKUP_FOLLOW,
@@ -904,6 +904,10 @@ EXPORT_SYMBOL(transfer_args_to_stack);
#endif /* CONFIG_MMU */
+/*
+ * On success, caller must call do_close_execat() on the returned
+ * struct file to close it.
+ */
static struct file *do_open_execat(int fd, struct filename *name, int flags)
{
struct file *file;
@@ -948,6 +952,17 @@ exit:
return ERR_PTR(err);
}
+/**
+ * open_exec - Open a path name for execution
+ *
+ * @name: path name to open with the intent of executing it.
+ *
+ * Returns ERR_PTR on failure or allocated struct file on success.
+ *
+ * As this is a wrapper for the internal do_open_execat(), callers
+ * must call allow_write_access() before fput() on release. Also see
+ * do_close_execat().
+ */
struct file *open_exec(const char *name)
{
struct filename *filename = getname_kernel(name);
@@ -1143,7 +1158,6 @@ static int de_thread(struct task_struct *tsk)
BUG_ON(leader->exit_state != EXIT_ZOMBIE);
leader->exit_state = EXIT_DEAD;
-
/*
* We are going to release_task()->ptrace_unlink() silently,
* the tracer can sleep in do_wait(). EXIT_DEAD guarantees
@@ -1409,6 +1423,9 @@ int begin_new_exec(struct linux_binprm * bprm)
out_unlock:
up_write(&me->signal->exec_update_lock);
+ if (!bprm->cred)
+ mutex_unlock(&me->signal->cred_guard_mutex);
+
out:
return retval;
}
@@ -1484,6 +1501,15 @@ static int prepare_bprm_creds(struct linux_binprm *bprm)
return -ENOMEM;
}
+/* Matches do_open_execat() */
+static void do_close_execat(struct file *file)
+{
+ if (!file)
+ return;
+ allow_write_access(file);
+ fput(file);
+}
+
static void free_bprm(struct linux_binprm *bprm)
{
if (bprm->mm) {
@@ -1495,10 +1521,7 @@ static void free_bprm(struct linux_binprm *bprm)
mutex_unlock(&current->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
- if (bprm->file) {
- allow_write_access(bprm->file);
- fput(bprm->file);
- }
+ do_close_execat(bprm->file);
if (bprm->executable)
fput(bprm->executable);
/* If a binfmt changed the interp, free it. */
@@ -1520,8 +1543,7 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
if (!bprm) {
- allow_write_access(file);
- fput(file);
+ do_close_execat(file);
return ERR_PTR(-ENOMEM);
}
@@ -1610,6 +1632,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
}
rcu_read_unlock();
+ /* "users" and "in_exec" locked for copy_fs() */
if (p->fs->users > n_fs)
bprm->unsafe |= LSM_UNSAFE_SHARE;
else
@@ -1696,7 +1719,6 @@ static int prepare_binprm(struct linux_binprm *bprm)
*/
int remove_arg_zero(struct linux_binprm *bprm)
{
- int ret = 0;
unsigned long offset;
char *kaddr;
struct page *page;
@@ -1707,10 +1729,8 @@ int remove_arg_zero(struct linux_binprm *bprm)
do {
offset = bprm->p & ~PAGE_MASK;
page = get_arg_page(bprm, bprm->p, 0);
- if (!page) {
- ret = -EFAULT;
- goto out;
- }
+ if (!page)
+ return -EFAULT;
kaddr = kmap_local_page(page);
for (; offset < PAGE_SIZE && kaddr[offset];
@@ -1723,10 +1743,8 @@ int remove_arg_zero(struct linux_binprm *bprm)
bprm->p++;
bprm->argc--;
- ret = 0;
-out:
- return ret;
+ return 0;
}
EXPORT_SYMBOL(remove_arg_zero);
@@ -1826,9 +1844,6 @@ static int exec_binprm(struct linux_binprm *bprm)
return 0;
}
-/*
- * sys_execve() executes a new program.
- */
static int bprm_execve(struct linux_binprm *bprm)
{
int retval;
diff --git a/fs/exfat/cache.c b/fs/exfat/cache.c
index 5a2f119b7e8c..7cc200d89821 100644
--- a/fs/exfat/cache.c
+++ b/fs/exfat/cache.c
@@ -46,7 +46,7 @@ int exfat_cache_init(void)
{
exfat_cachep = kmem_cache_create("exfat_cache",
sizeof(struct exfat_cache),
- 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+ 0, SLAB_RECLAIM_ACCOUNT,
exfat_cache_init_once);
if (!exfat_cachep)
return -ENOMEM;
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 9474cd50da6d..361595433480 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -275,6 +275,7 @@ struct exfat_sb_info {
spinlock_t inode_hash_lock;
struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
+ struct rcu_head rcu;
};
#define EXFAT_CACHE_VALID 0
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index d25a96a148af..cc00f1a7a1e1 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -35,13 +35,18 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
if (new_num_clusters == num_clusters)
goto out;
- exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags);
- ret = exfat_find_last_cluster(sb, &clu, &last_clu);
- if (ret)
- return ret;
+ if (num_clusters) {
+ exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags);
+ ret = exfat_find_last_cluster(sb, &clu, &last_clu);
+ if (ret)
+ return ret;
+
+ clu.dir = last_clu + 1;
+ } else {
+ last_clu = EXFAT_EOF_CLUSTER;
+ clu.dir = EXFAT_EOF_CLUSTER;
+ }
- clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ?
- EXFAT_EOF_CLUSTER : last_clu + 1;
clu.size = 0;
clu.flags = ei->flags;
@@ -51,17 +56,19 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
return ret;
/* Append new clusters to chain */
- if (clu.flags != ei->flags) {
- exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters);
- ei->flags = ALLOC_FAT_CHAIN;
- }
- if (clu.flags == ALLOC_FAT_CHAIN)
- if (exfat_ent_set(sb, last_clu, clu.dir))
- goto free_clu;
-
- if (num_clusters == 0)
+ if (num_clusters) {
+ if (clu.flags != ei->flags)
+ if (exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters))
+ goto free_clu;
+
+ if (clu.flags == ALLOC_FAT_CHAIN)
+ if (exfat_ent_set(sb, last_clu, clu.dir))
+ goto free_clu;
+ } else
ei->start_clu = clu.dir;
+ ei->flags = clu.flags;
+
out:
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
/* Expanded range not zeroed, do not update valid_size */
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 522edcbb2ce4..0687f952956c 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -501,7 +501,7 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
struct inode *inode = mapping->host;
struct exfat_inode_info *ei = EXFAT_I(inode);
loff_t pos = iocb->ki_pos;
- loff_t size = iocb->ki_pos + iov_iter_count(iter);
+ loff_t size = pos + iov_iter_count(iter);
int rw = iov_iter_rw(iter);
ssize_t ret;
@@ -525,11 +525,10 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
*/
ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block);
if (ret < 0) {
- if (rw == WRITE)
+ if (rw == WRITE && ret != -EIOCBQUEUED)
exfat_write_failed(mapping, size);
- if (ret != -EIOCBQUEUED)
- return ret;
+ return ret;
} else
size = pos + ret;
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
index 705710f93e2d..afdf13c34ff5 100644
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -655,7 +655,6 @@ static int exfat_load_upcase_table(struct super_block *sb,
unsigned int sect_size = sb->s_blocksize;
unsigned int i, index = 0;
u32 chksum = 0;
- int ret;
unsigned char skip = false;
unsigned short *upcase_table;
@@ -673,8 +672,7 @@ static int exfat_load_upcase_table(struct super_block *sb,
if (!bh) {
exfat_err(sb, "failed to read sector(0x%llx)",
(unsigned long long)sector);
- ret = -EIO;
- goto free_table;
+ return -EIO;
}
sector++;
for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) {
@@ -701,15 +699,12 @@ static int exfat_load_upcase_table(struct super_block *sb,
exfat_err(sb, "failed to load upcase table (idx : 0x%08x, chksum : 0x%08x, utbl_chksum : 0x%08x)",
index, chksum, utbl_checksum);
- ret = -EINVAL;
-free_table:
- exfat_free_upcase_table(sbi);
- return ret;
+ return -EINVAL;
}
static int exfat_load_default_upcase_table(struct super_block *sb)
{
- int i, ret = -EIO;
+ int i;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
unsigned char skip = false;
unsigned short uni = 0, *upcase_table;
@@ -740,8 +735,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb)
return 0;
/* FATAL error: default upcase table has error */
- exfat_free_upcase_table(sbi);
- return ret;
+ return -EIO;
}
int exfat_create_upcase_table(struct super_block *sb)
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index d9d4fa91010b..3d5ea2cfad66 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -39,9 +39,6 @@ static void exfat_put_super(struct super_block *sb)
exfat_free_bitmap(sbi);
brelse(sbi->boot_bh);
mutex_unlock(&sbi->s_lock);
-
- unload_nls(sbi->nls_io);
- exfat_free_upcase_table(sbi);
}
static int exfat_sync_fs(struct super_block *sb, int wait)
@@ -600,7 +597,7 @@ static int __exfat_fill_super(struct super_block *sb)
ret = exfat_load_bitmap(sb);
if (ret) {
exfat_err(sb, "failed to load alloc-bitmap");
- goto free_upcase_table;
+ goto free_bh;
}
ret = exfat_count_used_clusters(sb, &sbi->used_clusters);
@@ -613,8 +610,6 @@ static int __exfat_fill_super(struct super_block *sb)
free_alloc_bitmap:
exfat_free_bitmap(sbi);
-free_upcase_table:
- exfat_free_upcase_table(sbi);
free_bh:
brelse(sbi->boot_bh);
return ret;
@@ -701,12 +696,10 @@ put_inode:
sb->s_root = NULL;
free_table:
- exfat_free_upcase_table(sbi);
exfat_free_bitmap(sbi);
brelse(sbi->boot_bh);
check_nls_io:
- unload_nls(sbi->nls_io);
return err;
}
@@ -771,13 +764,22 @@ static int exfat_init_fs_context(struct fs_context *fc)
return 0;
}
+static void delayed_free(struct rcu_head *p)
+{
+ struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu);
+
+ unload_nls(sbi->nls_io);
+ exfat_free_upcase_table(sbi);
+ exfat_free_sbi(sbi);
+}
+
static void exfat_kill_sb(struct super_block *sb)
{
struct exfat_sb_info *sbi = sb->s_fs_info;
kill_block_super(sb);
if (sbi)
- exfat_free_sbi(sbi);
+ call_rcu(&sbi->rcu, delayed_free);
}
static struct file_system_type exfat_fs_type = {
@@ -811,7 +813,7 @@ static int __init init_exfat_fs(void)
exfat_inode_cachep = kmem_cache_create("exfat_inode_cache",
sizeof(struct exfat_inode_info),
- 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ 0, SLAB_RECLAIM_ACCOUNT,
exfat_inode_init_once);
if (!exfat_inode_cachep) {
err = -ENOMEM;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 3ae0154c5680..07ea3d62b298 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -255,7 +255,7 @@ static bool filldir_one(struct dir_context *ctx, const char *name, int len,
container_of(ctx, struct getdents_callback, ctx);
buf->sequence++;
- if (buf->ino == ino && len <= NAME_MAX) {
+ if (buf->ino == ino && len <= NAME_MAX && !is_dot_dotdot(name, len)) {
memcpy(buf->name, name, len);
buf->name[len] = '\0';
buf->found = 1;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 01f9addc8b1f..cabea887314d 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -213,8 +213,7 @@ static int __init init_inodecache(void)
{
ext2_inode_cachep = kmem_cache_create_usercopy("ext2_inode_cache",
sizeof(struct ext2_inode_info), 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
- SLAB_ACCOUNT),
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
offsetof(struct ext2_inode_info, i_data),
sizeof_field(struct ext2_inode_info, i_data),
init_once);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a5d784872303..3c0d7d143036 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -252,8 +252,10 @@ struct ext4_allocation_request {
#define EXT4_MAP_MAPPED BIT(BH_Mapped)
#define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten)
#define EXT4_MAP_BOUNDARY BIT(BH_Boundary)
+#define EXT4_MAP_DELAYED BIT(BH_Delay)
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
- EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
+ EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
+ EXT4_MAP_DELAYED)
struct ext4_map_blocks {
ext4_fsblk_t m_pblk;
@@ -1548,7 +1550,7 @@ struct ext4_sb_info {
unsigned long s_commit_interval;
u32 s_max_batch_time;
u32 s_min_batch_time;
- struct bdev_handle *s_journal_bdev_handle;
+ struct file *s_journal_bdev_file;
#ifdef CONFIG_QUOTA
/* Names of quota files with journalled quota */
char __rcu *s_qf_names[EXT4_MAXQUOTAS];
@@ -2912,10 +2914,10 @@ extern const struct seq_operations ext4_mb_seq_groups_ops;
extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
extern int ext4_mb_init(struct super_block *);
-extern int ext4_mb_release(struct super_block *);
+extern void ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
-extern void ext4_discard_preallocations(struct inode *, unsigned int);
+extern void ext4_discard_preallocations(struct inode *);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 01299b55a567..7669d154c05e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
* i_rwsem. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;
@@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode,
/*
- * ext4_ext_determine_hole - determine hole around given block
+ * ext4_ext_find_hole - find hole around given block according to the given path
* @inode: inode we lookup in
* @path: path in extent tree to @lblk
* @lblk: pointer to logical block around which we want to determine hole
@@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode,
* The function returns the length of a hole starting at @lblk. We update @lblk
* to the beginning of the hole if we managed to find it.
*/
-static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
- struct ext4_ext_path *path,
- ext4_lblk_t *lblk)
+static ext4_lblk_t ext4_ext_find_hole(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *lblk)
{
int depth = ext_depth(inode);
struct ext4_extent *ex;
@@ -2271,30 +2271,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
}
/*
- * ext4_ext_put_gap_in_cache:
- * calculate boundaries of the gap that the requested block fits into
- * and cache this gap
- */
-static void
-ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
- ext4_lblk_t hole_len)
-{
- struct extent_status es;
-
- ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
- hole_start + hole_len - 1, &es);
- if (es.es_len) {
- /* There's delayed extent containing lblock? */
- if (es.es_lblk <= hole_start)
- return;
- hole_len = min(es.es_lblk - hole_start, hole_len);
- }
- ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
- ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
- EXTENT_STATUS_HOLE);
-}
-
-/*
* ext4_ext_rm_idx:
* removes index from the index block.
*/
@@ -4062,6 +4038,72 @@ static int get_implied_cluster_alloc(struct super_block *sb,
return 0;
}
+/*
+ * Determine hole length around the given logical block, first try to
+ * locate and expand the hole from the given @path, and then adjust it
+ * if it's partially or completely converted to delayed extents, insert
+ * it into the extent cache tree if it's indeed a hole, finally return
+ * the length of the determined extent.
+ */
+static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t lblk)
+{
+ ext4_lblk_t hole_start, len;
+ struct extent_status es;
+
+ hole_start = lblk;
+ len = ext4_ext_find_hole(inode, path, &hole_start);
+again:
+ ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
+ hole_start + len - 1, &es);
+ if (!es.es_len)
+ goto insert_hole;
+
+ /*
+ * There's a delalloc extent in the hole, handle it if the delalloc
+ * extent is in front of, behind and straddle the queried range.
+ */
+ if (lblk >= es.es_lblk + es.es_len) {
+ /*
+ * The delalloc extent is in front of the queried range,
+ * find again from the queried start block.
+ */
+ len -= lblk - hole_start;
+ hole_start = lblk;
+ goto again;
+ } else if (in_range(lblk, es.es_lblk, es.es_len)) {
+ /*
+ * The delalloc extent containing lblk, it must have been
+ * added after ext4_map_blocks() checked the extent status
+ * tree so we are not holding i_rwsem and delalloc info is
+ * only stabilized by i_data_sem we are going to release
+ * soon. Don't modify the extent status tree and report
+ * extent as a hole, just adjust the length to the delalloc
+ * extent's after lblk.
+ */
+ len = es.es_lblk + es.es_len - lblk;
+ return len;
+ } else {
+ /*
+ * The delalloc extent is partially or completely behind
+ * the queried range, update hole length until the
+ * beginning of the delalloc extent.
+ */
+ len = min(es.es_lblk - hole_start, len);
+ }
+
+insert_hole:
+ /* Put just found gap into cache to speed up subsequent requests */
+ ext_debug(inode, " -> %u:%u\n", hole_start, len);
+ ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE);
+
+ /* Update hole_len to reflect hole size after lblk */
+ if (hole_start != lblk)
+ len -= lblk - hole_start;
+
+ return len;
+}
/*
* Block allocation/map/preallocation routine for extents based files
@@ -4179,22 +4221,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* we couldn't try to create block if create flag is zero
*/
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
- ext4_lblk_t hole_start, hole_len;
+ ext4_lblk_t len;
- hole_start = map->m_lblk;
- hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
- /*
- * put just found gap into cache to speed up
- * subsequent requests
- */
- ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
+ len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk);
- /* Update hole_len to reflect hole size after map->m_lblk */
- if (hole_start != map->m_lblk)
- hole_len -= map->m_lblk - hole_start;
map->m_pblk = 0;
- map->m_len = min_t(unsigned int, map->m_len, hole_len);
-
+ map->m_len = min_t(unsigned int, map->m_len, len);
goto out;
}
@@ -4313,7 +4345,7 @@ got_allocated_blocks:
* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free().
*/
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
ext4_free_blocks(handle, inode, NULL, newblock,
@@ -5357,7 +5389,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start);
ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
@@ -5365,7 +5397,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
ret = ext4_ext_shift_extents(inode, handle, punch_stop,
punch_stop - punch_start, SHIFT_LEFT);
@@ -5497,7 +5529,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
goto out_stop;
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
path = ext4_find_extent(inode, offset_lblk, NULL, 0);
if (IS_ERR(path)) {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6aa15dafc677..54d6ff22585c 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -174,7 +174,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
(atomic_read(&inode->i_writecount) == 1) &&
!EXT4_I(inode)->i_reserved_data_blocks) {
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
}
if (is_dx(inode) && filp->private_data)
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 11e6f33677a2..df853c4d3a8c 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -576,9 +576,9 @@ static bool ext4_getfsmap_is_valid_device(struct super_block *sb,
if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
fm->fmr_device == new_encode_dev(sb->s_bdev->bd_dev))
return true;
- if (EXT4_SB(sb)->s_journal_bdev_handle &&
+ if (EXT4_SB(sb)->s_journal_bdev_file &&
fm->fmr_device ==
- new_encode_dev(EXT4_SB(sb)->s_journal_bdev_handle->bdev->bd_dev))
+ new_encode_dev(file_bdev(EXT4_SB(sb)->s_journal_bdev_file)->bd_dev))
return true;
return false;
}
@@ -648,9 +648,9 @@ int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head,
memset(handlers, 0, sizeof(handlers));
handlers[0].gfd_dev = new_encode_dev(sb->s_bdev->bd_dev);
handlers[0].gfd_fn = ext4_getfsmap_datadev;
- if (EXT4_SB(sb)->s_journal_bdev_handle) {
+ if (EXT4_SB(sb)->s_journal_bdev_file) {
handlers[1].gfd_dev = new_encode_dev(
- EXT4_SB(sb)->s_journal_bdev_handle->bdev->bd_dev);
+ file_bdev(EXT4_SB(sb)->s_journal_bdev_file)->bd_dev);
handlers[1].gfd_fn = ext4_getfsmap_logdev;
}
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index a9f3716119d3..d8ca7f64f952 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -714,7 +714,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
* i_rwsem. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5af1b0b8680e..2ccf3b5e3a7c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -371,7 +371,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
*/
if ((ei->i_reserved_data_blocks == 0) &&
!inode_is_open_for_write(inode))
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
}
static int __check_block_validity(struct inode *inode, const char *func,
@@ -515,6 +515,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
map->m_len = retval;
} else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
map->m_pblk = 0;
+ map->m_flags |= ext4_es_is_delayed(&es) ?
+ EXT4_MAP_DELAYED : 0;
retval = es.es_len - (map->m_lblk - es.es_lblk);
if (retval > map->m_len)
retval = map->m_len;
@@ -1703,11 +1705,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
- if (ext4_es_is_hole(&es)) {
- retval = 0;
- down_read(&EXT4_I(inode)->i_data_sem);
+ if (ext4_es_is_hole(&es))
goto add_delayed;
- }
/*
* Delayed extent could be allocated by fallocate.
@@ -1749,26 +1748,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
retval = ext4_ext_map_blocks(NULL, inode, map, 0);
else
retval = ext4_ind_map_blocks(NULL, inode, map, 0);
-
-add_delayed:
- if (retval == 0) {
- int ret;
-
- /*
- * XXX: __block_prepare_write() unmaps passed block,
- * is it OK?
- */
-
- ret = ext4_insert_delayed_block(inode, map->m_lblk);
- if (ret != 0) {
- retval = ret;
- goto out_unlock;
- }
-
- map_bh(bh, inode->i_sb, invalid_block);
- set_buffer_new(bh);
- set_buffer_delay(bh);
- } else if (retval > 0) {
+ if (retval < 0) {
+ up_read(&EXT4_I(inode)->i_data_sem);
+ return retval;
+ }
+ if (retval > 0) {
unsigned int status;
if (unlikely(retval != map->m_len)) {
@@ -1783,11 +1767,21 @@ add_delayed:
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status);
+ up_read(&EXT4_I(inode)->i_data_sem);
+ return retval;
}
+ up_read(&EXT4_I(inode)->i_data_sem);
-out_unlock:
- up_read((&EXT4_I(inode)->i_data_sem));
+add_delayed:
+ down_write(&EXT4_I(inode)->i_data_sem);
+ retval = ext4_insert_delayed_block(inode, map->m_lblk);
+ up_write(&EXT4_I(inode)->i_data_sem);
+ if (retval)
+ return retval;
+ map_bh(bh, inode->i_sb, invalid_block);
+ set_buffer_new(bh);
+ set_buffer_delay(bh);
return retval;
}
@@ -3268,6 +3262,9 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
iomap->addr = (u64) map->m_pblk << blkbits;
if (flags & IOMAP_DAX)
iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
+ } else if (map->m_flags & EXT4_MAP_DELAYED) {
+ iomap->type = IOMAP_DELALLOC;
+ iomap->addr = IOMAP_NULL_ADDR;
} else {
iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
@@ -3430,35 +3427,11 @@ const struct iomap_ops ext4_iomap_overwrite_ops = {
.iomap_end = ext4_iomap_end,
};
-static bool ext4_iomap_is_delalloc(struct inode *inode,
- struct ext4_map_blocks *map)
-{
- struct extent_status es;
- ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1;
-
- ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
- map->m_lblk, end, &es);
-
- if (!es.es_len || es.es_lblk > end)
- return false;
-
- if (es.es_lblk > map->m_lblk) {
- map->m_len = es.es_lblk - map->m_lblk;
- return false;
- }
-
- offset = map->m_lblk - es.es_lblk;
- map->m_len = es.es_len - offset;
-
- return true;
-}
-
static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
loff_t length, unsigned int flags,
struct iomap *iomap, struct iomap *srcmap)
{
int ret;
- bool delalloc = false;
struct ext4_map_blocks map;
u8 blkbits = inode->i_blkbits;
@@ -3499,13 +3472,8 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret < 0)
return ret;
- if (ret == 0)
- delalloc = ext4_iomap_is_delalloc(inode, &map);
-
set_iomap:
ext4_set_iomap(inode, iomap, &map, offset, length, flags);
- if (delalloc && iomap->type == IOMAP_HOLE)
- iomap->type = IOMAP_DELALLOC;
return 0;
}
@@ -4015,12 +3983,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
/* If there are blocks to remove, do it */
if (stop_block > first_block) {
+ ext4_lblk_t hole_len = stop_block - first_block;
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
- ext4_es_remove_extent(inode, first_block,
- stop_block - first_block);
+ ext4_es_remove_extent(inode, first_block, hole_len);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
ret = ext4_ext_remove_space(inode, first_block,
@@ -4029,6 +3997,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
ret = ext4_ind_remove_space(handle, inode, first_block,
stop_block);
+ ext4_es_insert_extent(inode, first_block, hole_len, ~0,
+ EXTENT_STATUS_HOLE);
up_write(&EXT4_I(inode)->i_data_sem);
}
ext4_fc_track_range(handle, inode, first_block, stop_block);
@@ -4170,7 +4140,7 @@ int ext4_truncate(struct inode *inode)
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
err = ext4_ext_truncate(handle, inode);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index aa6be510eb8f..7160a71044c8 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -467,7 +467,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ext4_reset_inode_seed(inode);
ext4_reset_inode_seed(inode_bl);
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
err = ext4_mark_inode_dirty(handle, inode);
if (err < 0) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f44f668e407f..e4f7cf9d89c4 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -564,14 +564,14 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
+ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(sb, e4b->bd_group,
inode ? inode->i_ino : 0,
blocknr,
"freeing block already freed "
"(bit %u)",
first + i);
- ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
- EXT4_GROUP_INFO_BBITMAP_CORRUPT);
}
mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
}
@@ -677,7 +677,7 @@ do { \
} \
} while (0)
-static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
+static void __mb_check_buddy(struct ext4_buddy *e4b, char *file,
const char *function, int line)
{
struct super_block *sb = e4b->bd_sb;
@@ -696,7 +696,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
void *buddy2;
if (e4b->bd_info->bb_check_counter++ % 10)
- return 0;
+ return;
while (order > 1) {
buddy = mb_find_buddy(e4b, order, &max);
@@ -758,7 +758,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
grp = ext4_get_group_info(sb, e4b->bd_group);
if (!grp)
- return NULL;
+ return;
list_for_each(cur, &grp->bb_prealloc_list) {
ext4_group_t groupnr;
struct ext4_prealloc_space *pa;
@@ -768,7 +768,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
for (i = 0; i < pa->pa_len; i++)
MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
}
- return 0;
}
#undef MB_CHECK_ASSERT
#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
@@ -842,7 +841,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
struct ext4_sb_info *sbi = EXT4_SB(sb);
int new_order;
- if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
+ if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0)
return;
new_order = mb_avg_fragment_size_order(sb,
@@ -871,7 +870,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
* cr level needs an update.
*/
static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac,
- enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+ enum criteria *new_cr, ext4_group_t *group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *iter;
@@ -945,7 +944,7 @@ ext4_mb_find_good_group_avg_frag_lists(struct ext4_allocation_context *ac, int o
* order. Updates *new_cr if cr level needs an update.
*/
static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac,
- enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+ enum criteria *new_cr, ext4_group_t *group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *grp = NULL;
@@ -990,7 +989,7 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *
* much and fall to CR_GOAL_LEN_SLOW in that case.
*/
static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac,
- enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+ enum criteria *new_cr, ext4_group_t *group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *grp = NULL;
@@ -1125,11 +1124,11 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
}
if (*new_cr == CR_POWER2_ALIGNED) {
- ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group, ngroups);
+ ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group);
} else if (*new_cr == CR_GOAL_LEN_FAST) {
- ext4_mb_choose_next_group_goal_fast(ac, new_cr, group, ngroups);
+ ext4_mb_choose_next_group_goal_fast(ac, new_cr, group);
} else if (*new_cr == CR_BEST_AVAIL_LEN) {
- ext4_mb_choose_next_group_best_avail(ac, new_cr, group, ngroups);
+ ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
} else {
/*
* TODO: For CR=2, we can arrange groups in an rb tree sorted by
@@ -1233,6 +1232,24 @@ void ext4_mb_generate_buddy(struct super_block *sb,
atomic64_add(period, &sbi->s_mb_generation_time);
}
+static void mb_regenerate_buddy(struct ext4_buddy *e4b)
+{
+ int count;
+ int order = 1;
+ void *buddy;
+
+ while ((buddy = mb_find_buddy(e4b, order++, &count)))
+ mb_set_bits(buddy, 0, count);
+
+ e4b->bd_info->bb_fragments = 0;
+ memset(e4b->bd_info->bb_counters, 0,
+ sizeof(*e4b->bd_info->bb_counters) *
+ (e4b->bd_sb->s_blocksize_bits + 2));
+
+ ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
+ e4b->bd_bitmap, e4b->bd_group, e4b->bd_info);
+}
+
/* The buddy information is attached the buddy cache inode
* for convenience. The information regarding each group
* is loaded via ext4_mb_load_buddy. The information involve
@@ -1891,11 +1908,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
mb_check_buddy(e4b);
mb_free_blocks_double(inode, e4b, first, count);
- this_cpu_inc(discard_pa_seq);
- e4b->bd_info->bb_free += count;
- if (first < e4b->bd_info->bb_first_free)
- e4b->bd_info->bb_first_free = first;
-
/* access memory sequentially: check left neighbour,
* clear range and then check right neighbour
*/
@@ -1909,21 +1921,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t blocknr;
+ /*
+ * Fastcommit replay can free already freed blocks which
+ * corrupts allocation info. Regenerate it.
+ */
+ if (sbi->s_mount_state & EXT4_FC_REPLAY) {
+ mb_regenerate_buddy(e4b);
+ goto check;
+ }
+
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += EXT4_C2B(sbi, block);
- if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
- ext4_grp_locked_error(sb, e4b->bd_group,
- inode ? inode->i_ino : 0,
- blocknr,
- "freeing already freed block (bit %u); block bitmap corrupt.",
- block);
- ext4_mark_group_bitmap_corrupted(
- sb, e4b->bd_group,
+ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
- }
- goto done;
+ ext4_grp_locked_error(sb, e4b->bd_group,
+ inode ? inode->i_ino : 0, blocknr,
+ "freeing already freed block (bit %u); block bitmap corrupt.",
+ block);
+ return;
}
+ this_cpu_inc(discard_pa_seq);
+ e4b->bd_info->bb_free += count;
+ if (first < e4b->bd_info->bb_first_free)
+ e4b->bd_info->bb_first_free = first;
+
/* let's maintain fragments counter */
if (left_is_free && right_is_free)
e4b->bd_info->bb_fragments--;
@@ -1948,9 +1970,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
if (first <= last)
mb_buddy_mark_free(e4b, first >> 1, last >> 1);
-done:
mb_set_largest_free_order(sb, e4b->bd_info);
mb_update_avg_fragment_size(sb, e4b->bd_info);
+check:
mb_check_buddy(e4b);
}
@@ -2276,6 +2298,9 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac,
return;
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
if (max > 0) {
@@ -2283,6 +2308,7 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac,
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
}
@@ -2309,12 +2335,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
if (err)
return err;
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
- ext4_mb_unload_buddy(e4b);
- return 0;
- }
-
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
ac->ac_g_ex.fe_len, &ex);
ex.fe_logical = 0xDEADFA11; /* debug value */
@@ -2347,6 +2371,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
ac->ac_b_ex = ex;
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
@@ -2380,12 +2405,12 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
k = mb_find_next_zero_bit(buddy, max, 0);
if (k >= max) {
+ ext4_mark_group_bitmap_corrupted(ac->ac_sb,
+ e4b->bd_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
"%d free clusters of order %d. But found 0",
grp->bb_counters[i], i);
- ext4_mark_group_bitmap_corrupted(ac->ac_sb,
- e4b->bd_group,
- EXT4_GROUP_INFO_BBITMAP_CORRUPT);
break;
}
ac->ac_found++;
@@ -2436,12 +2461,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* free blocks even though group info says we
* have free blocks
*/
+ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free clusters as per "
"group info. But bitmap says 0",
free);
- ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
- EXT4_GROUP_INFO_BBITMAP_CORRUPT);
break;
}
@@ -2467,12 +2492,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
if (WARN_ON(ex.fe_len <= 0))
break;
if (free < ex.fe_len) {
+ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free clusters as per "
"group info. But got %d blocks",
free, ex.fe_len);
- ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
- EXT4_GROUP_INFO_BBITMAP_CORRUPT);
/*
* The number of free blocks differs. This mostly
* indicate that the bitmap is corrupt. So exit
@@ -3725,7 +3750,7 @@ static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
return count;
}
-int ext4_mb_release(struct super_block *sb)
+void ext4_mb_release(struct super_block *sb)
{
ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t i;
@@ -3801,8 +3826,6 @@ int ext4_mb_release(struct super_block *sb)
}
free_percpu(sbi->s_locality_groups);
-
- return 0;
}
static inline int ext4_issue_discard(struct super_block *sb,
@@ -5284,7 +5307,7 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
* the caller MUST hold group/inode locks.
* TODO: optimize the case when there are no in-core structures yet
*/
-static noinline_for_stack int
+static noinline_for_stack void
ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
struct ext4_prealloc_space *pa)
{
@@ -5334,11 +5357,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
*/
}
atomic_add(free, &sbi->s_mb_discarded);
-
- return 0;
}
-static noinline_for_stack int
+static noinline_for_stack void
ext4_mb_release_group_pa(struct ext4_buddy *e4b,
struct ext4_prealloc_space *pa)
{
@@ -5352,13 +5373,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) {
ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu",
e4b->bd_group, group, pa->pa_pstart);
- return 0;
+ return;
}
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
-
- return 0;
}
/*
@@ -5479,7 +5498,7 @@ out_dbg:
*
* FIXME!! Make sure it is valid at all the call sites
*/
-void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
+void ext4_discard_preallocations(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct super_block *sb = inode->i_sb;
@@ -5491,9 +5510,8 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
struct rb_node *iter;
int err;
- if (!S_ISREG(inode->i_mode)) {
+ if (!S_ISREG(inode->i_mode))
return;
- }
if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
return;
@@ -5501,15 +5519,12 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
mb_debug(sb, "discard preallocation for inode %lu\n",
inode->i_ino);
trace_ext4_discard_preallocations(inode,
- atomic_read(&ei->i_prealloc_active), needed);
-
- if (needed == 0)
- needed = UINT_MAX;
+ atomic_read(&ei->i_prealloc_active));
repeat:
/* first, collect all pa's in the inode */
write_lock(&ei->i_prealloc_lock);
- for (iter = rb_first(&ei->i_prealloc_node); iter && needed;
+ for (iter = rb_first(&ei->i_prealloc_node); iter;
iter = rb_next(iter)) {
pa = rb_entry(iter, struct ext4_prealloc_space,
pa_node.inode_node);
@@ -5533,7 +5548,6 @@ repeat:
spin_unlock(&pa->pa_lock);
rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node);
list_add(&pa->u.pa_tmp_list, &list);
- needed--;
continue;
}
@@ -5943,7 +5957,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
/*
* release all resource we used in allocation
*/
-static int ext4_mb_release_context(struct ext4_allocation_context *ac)
+static void ext4_mb_release_context(struct ext4_allocation_context *ac)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_prealloc_space *pa = ac->ac_pa;
@@ -5980,7 +5994,6 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
mutex_unlock(&ac->ac_lg->lg_mutex);
ext4_mb_collect_stats(ac);
- return 0;
}
static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
@@ -6761,6 +6774,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
bool set_trimmed = false;
void *bitmap;
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ return 0;
+
last = ext4_last_grp_cluster(sb, e4b->bd_group);
bitmap = e4b->bd_bitmap;
if (start == 0 && max >= last)
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index d7aeb5da7d86..56938532b4ce 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -192,7 +192,6 @@ struct ext4_allocation_context {
*/
ext4_grpblk_t ac_orig_goal_len;
- __u32 ac_groups_considered;
__u32 ac_flags; /* allocation hints */
__u16 ac_groups_scanned;
__u16 ac_groups_linear_remaining;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 3aa57376d9c2..7cd4afa4de1d 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -618,6 +618,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
goto out;
o_end = o_start + len;
+ *moved_len = 0;
while (o_start < o_end) {
struct ext4_extent *ex;
ext4_lblk_t cur_blk, next_blk;
@@ -672,7 +673,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
*/
ext4_double_up_write_data_sem(orig_inode, donor_inode);
/* Swap original branches with new branches */
- move_extent_per_page(o_filp, donor_inode,
+ *moved_len += move_extent_per_page(o_filp, donor_inode,
orig_page_index, donor_page_index,
offset_in_page, cur_len,
unwritten, &ret);
@@ -682,14 +683,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
o_start += cur_len;
d_start += cur_len;
}
- *moved_len = o_start - orig_blk;
- if (*moved_len > len)
- *moved_len = len;
out:
if (*moved_len) {
- ext4_discard_preallocations(orig_inode, 0);
- ext4_discard_preallocations(donor_inode, 0);
+ ext4_discard_preallocations(orig_inode);
+ ext4_discard_preallocations(donor_inode);
}
ext4_free_ext_path(path);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 05b647e6bc19..5e4f65c14dfb 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1762,7 +1762,6 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
struct buffer_head *bh;
err = ext4_fname_prepare_lookup(dir, dentry, &fname);
- generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return NULL;
if (err)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dcba0f85dfe2..59c72b6dd153 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1359,14 +1359,14 @@ static void ext4_put_super(struct super_block *sb)
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
- if (sbi->s_journal_bdev_handle) {
+ if (sbi->s_journal_bdev_file) {
/*
* Invalidate the journal device's buffers. We don't want them
* floating about in memory - the physical journal device may
* hotswapped, and it breaks the `ro-after' testing code.
*/
- sync_blockdev(sbi->s_journal_bdev_handle->bdev);
- invalidate_bdev(sbi->s_journal_bdev_handle->bdev);
+ sync_blockdev(file_bdev(sbi->s_journal_bdev_file));
+ invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
}
ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
@@ -1500,8 +1500,7 @@ static int __init init_inodecache(void)
{
ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
sizeof(struct ext4_inode_info), 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
- SLAB_ACCOUNT),
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
offsetof(struct ext4_inode_info, i_data),
sizeof_field(struct ext4_inode_info, i_data),
init_once);
@@ -1525,7 +1524,7 @@ void ext4_clear_inode(struct inode *inode)
ext4_fc_del(inode);
invalidate_inode_buffers(inode);
clear_inode(inode);
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
dquot_drop(inode);
if (EXT4_I(inode)->jinode) {
@@ -4233,7 +4232,7 @@ int ext4_calculate_overhead(struct super_block *sb)
* Add the internal journal blocks whether the journal has been
* loaded or not
*/
- if (sbi->s_journal && !sbi->s_journal_bdev_handle)
+ if (sbi->s_journal && !sbi->s_journal_bdev_file)
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
/* j_inum for internal journal is non-zero */
@@ -5346,7 +5345,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
sb->s_qcop = &ext4_qctl_operations;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
- memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+ super_set_uuid(sb, es->s_uuid, sizeof(es->s_uuid));
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
@@ -5484,6 +5483,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount4;
}
+ generic_set_sb_d_ops(sb);
sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
@@ -5670,9 +5670,9 @@ failed_mount:
#endif
fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
brelse(sbi->s_sbh);
- if (sbi->s_journal_bdev_handle) {
- invalidate_bdev(sbi->s_journal_bdev_handle->bdev);
- bdev_release(sbi->s_journal_bdev_handle);
+ if (sbi->s_journal_bdev_file) {
+ invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
+ fput(sbi->s_journal_bdev_file);
}
out_fail:
invalidate_bdev(sb->s_bdev);
@@ -5842,30 +5842,30 @@ static journal_t *ext4_open_inode_journal(struct super_block *sb,
return journal;
}
-static struct bdev_handle *ext4_get_journal_blkdev(struct super_block *sb,
+static struct file *ext4_get_journal_blkdev(struct super_block *sb,
dev_t j_dev, ext4_fsblk_t *j_start,
ext4_fsblk_t *j_len)
{
struct buffer_head *bh;
struct block_device *bdev;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
int hblock, blocksize;
ext4_fsblk_t sb_block;
unsigned long offset;
struct ext4_super_block *es;
int errno;
- bdev_handle = bdev_open_by_dev(j_dev,
+ bdev_file = bdev_file_open_by_dev(j_dev,
BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
sb, &fs_holder_ops);
- if (IS_ERR(bdev_handle)) {
+ if (IS_ERR(bdev_file)) {
ext4_msg(sb, KERN_ERR,
"failed to open journal device unknown-block(%u,%u) %ld",
- MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev_handle));
- return bdev_handle;
+ MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev_file));
+ return bdev_file;
}
- bdev = bdev_handle->bdev;
+ bdev = file_bdev(bdev_file);
blocksize = sb->s_blocksize;
hblock = bdev_logical_block_size(bdev);
if (blocksize < hblock) {
@@ -5912,12 +5912,12 @@ static struct bdev_handle *ext4_get_journal_blkdev(struct super_block *sb,
*j_start = sb_block + 1;
*j_len = ext4_blocks_count(es);
brelse(bh);
- return bdev_handle;
+ return bdev_file;
out_bh:
brelse(bh);
out_bdev:
- bdev_release(bdev_handle);
+ fput(bdev_file);
return ERR_PTR(errno);
}
@@ -5927,14 +5927,14 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb,
journal_t *journal;
ext4_fsblk_t j_start;
ext4_fsblk_t j_len;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
int errno = 0;
- bdev_handle = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
- if (IS_ERR(bdev_handle))
- return ERR_CAST(bdev_handle);
+ bdev_file = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
+ if (IS_ERR(bdev_file))
+ return ERR_CAST(bdev_file);
- journal = jbd2_journal_init_dev(bdev_handle->bdev, sb->s_bdev, j_start,
+ journal = jbd2_journal_init_dev(file_bdev(bdev_file), sb->s_bdev, j_start,
j_len, sb->s_blocksize);
if (IS_ERR(journal)) {
ext4_msg(sb, KERN_ERR, "failed to create device journal");
@@ -5949,14 +5949,14 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb,
goto out_journal;
}
journal->j_private = sb;
- EXT4_SB(sb)->s_journal_bdev_handle = bdev_handle;
+ EXT4_SB(sb)->s_journal_bdev_file = bdev_file;
ext4_init_journal_params(sb, journal);
return journal;
out_journal:
jbd2_journal_destroy(journal);
out_bdev:
- bdev_release(bdev_handle);
+ fput(bdev_file);
return ERR_PTR(errno);
}
@@ -7314,12 +7314,12 @@ static inline int ext3_feature_set_ok(struct super_block *sb)
static void ext4_kill_sb(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct bdev_handle *handle = sbi ? sbi->s_journal_bdev_handle : NULL;
+ struct file *bdev_file = sbi ? sbi->s_journal_bdev_file : NULL;
kill_block_super(sb);
- if (handle)
- bdev_release(handle);
+ if (bdev_file)
+ fput(bdev_file);
}
static struct file_system_type ext4_fs_type = {
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 75bf1f88843c..645240cc0229 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -92,10 +92,12 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,
if (!dentry) {
bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT);
- if (IS_ERR(bh))
- return ERR_CAST(bh);
- if (!bh || !ext4_buffer_uptodate(bh))
+ if (IS_ERR(bh) || !bh)
return ERR_PTR(-ECHILD);
+ if (!ext4_buffer_uptodate(bh)) {
+ brelse(bh);
+ return ERR_PTR(-ECHILD);
+ }
} else {
bh = ext4_bread(NULL, inode, 0, 0);
if (IS_ERR(bh))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 65294e3b0bef..4c77e8ce5c75 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -24,6 +24,7 @@
#include <linux/blkdev.h>
#include <linux/quotaops.h>
#include <linux/part_stat.h>
+#include <linux/rw_hint.h>
#include <crypto/hash.h>
#include <linux/fscrypt.h>
@@ -1239,7 +1240,7 @@ struct f2fs_bio_info {
#define FDEV(i) (sbi->devs[i])
#define RDEV(i) (raw_super->devs[i])
struct f2fs_dev_info {
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct block_device *bdev;
char path[MAX_PATH_LEN];
unsigned int total_segments;
@@ -3364,17 +3365,6 @@ static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
return is_set_ckpt_flags(sbi, CP_ERROR_FLAG);
}
-static inline bool is_dot_dotdot(const u8 *name, size_t len)
-{
- if (len == 1 && name[0] == '.')
- return true;
-
- if (len == 2 && name[0] == '.' && name[1] == '.')
- return true;
-
- return false;
-}
-
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index b3bb815fc6aa..f7f63a567d86 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -531,7 +531,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
}
err = f2fs_prepare_lookup(dir, dentry, &fname);
- generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
goto out_splice;
if (err)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4c8836ded90f..e1065ba70207 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1971,9 +1971,15 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
}
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
+ unsigned int nofs_flags;
+ int ret;
+
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
- sector, nr_sects, GFP_NOFS);
+ nofs_flags = memalloc_nofs_save();
+ ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
+ sector, nr_sects);
+ memalloc_nofs_restore(nofs_flags);
+ return ret;
}
__queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
@@ -4865,6 +4871,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
block_t zone_block, valid_block_cnt;
unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
int ret;
+ unsigned int nofs_flags;
if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
@@ -4912,8 +4919,10 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
"pointer: valid block[0x%x,0x%x] cond[0x%x]",
zone_segno, valid_block_cnt, zone->cond);
+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
- zone->start, zone->len, GFP_NOFS);
+ zone->start, zone->len);
+ memalloc_nofs_restore(nofs_flags);
if (ret == -EOPNOTSUPP) {
ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
zone->len - (zone->wp - zone->start),
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index d45ab0992ae5..b880b746f226 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1605,7 +1605,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
for (i = 0; i < sbi->s_ndevs; i++) {
if (i > 0)
- bdev_release(FDEV(i).bdev_handle);
+ fput(FDEV(i).bdev_file);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
#endif
@@ -4247,7 +4247,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
for (i = 0; i < max_devices; i++) {
if (i == 0)
- FDEV(0).bdev_handle = sbi->sb->s_bdev_handle;
+ FDEV(0).bdev_file = sbi->sb->s_bdev_file;
else if (!RDEV(i).path[0])
break;
@@ -4267,14 +4267,14 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
FDEV(i).end_blk = FDEV(i).start_blk +
(FDEV(i).total_segments <<
sbi->log_blocks_per_seg) - 1;
- FDEV(i).bdev_handle = bdev_open_by_path(
+ FDEV(i).bdev_file = bdev_file_open_by_path(
FDEV(i).path, mode, sbi->sb, NULL);
}
}
- if (IS_ERR(FDEV(i).bdev_handle))
- return PTR_ERR(FDEV(i).bdev_handle);
+ if (IS_ERR(FDEV(i).bdev_file))
+ return PTR_ERR(FDEV(i).bdev_file);
- FDEV(i).bdev = FDEV(i).bdev_handle->bdev;
+ FDEV(i).bdev = file_bdev(FDEV(i).bdev_file);
/* to release errored devices */
sbi->s_ndevs = i + 1;
@@ -4496,7 +4496,7 @@ try_onemore:
sb->s_time_gran = 1;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
- memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+ super_set_uuid(sb, (void *) raw_super->uuid, sizeof(raw_super->uuid));
sb->s_iflags |= SB_I_CGROUPWB;
/* init f2fs-specific super block info */
@@ -4660,6 +4660,7 @@ try_onemore:
goto free_node_inode;
}
+ generic_set_sb_d_ops(sb);
sb->s_root = d_make_root(root); /* allocate root dentry */
if (!sb->s_root) {
err = -ENOMEM;
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 738e427e2d21..2af424e200b3 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -47,7 +47,7 @@ int __init fat_cache_init(void)
{
fat_cache_cachep = kmem_cache_create("fat_cache",
sizeof(struct fat_cache),
- 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+ 0, SLAB_RECLAIM_ACCOUNT,
init_once);
if (fat_cache_cachep == NULL)
return -ENOMEM;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 1fac3dabf130..d9e6fbb6f246 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -787,7 +787,7 @@ static int __init fat_init_inodecache(void)
fat_inode_cachep = kmem_cache_create("fat_inode_cache",
sizeof(struct msdos_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (fat_inode_cachep == NULL)
return -ENOMEM;
@@ -1762,6 +1762,9 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
else /* fat 16 or 12 */
sbi->vol_id = bpb.fat16_vol_id;
+ __le32 vol_id_le = cpu_to_le32(sbi->vol_id);
+ super_set_uuid(sb, (void *) &vol_id_le, sizeof(vol_id_le));
+
sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry);
sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c80a6acad742..54cc85d3338e 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -27,6 +27,7 @@
#include <linux/memfd.h>
#include <linux/compat.h>
#include <linux/mount.h>
+#include <linux/rw_hint.h>
#include <linux/poll.h>
#include <asm/siginfo.h>
@@ -268,8 +269,15 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
}
#endif
-static bool rw_hint_valid(enum rw_hint hint)
+static bool rw_hint_valid(u64 hint)
{
+ BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
+ BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
+ BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
+ BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
+ BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
+ BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);
+
switch (hint) {
case RWH_WRITE_LIFE_NOT_SET:
case RWH_WRITE_LIFE_NONE:
@@ -283,34 +291,40 @@ static bool rw_hint_valid(enum rw_hint hint)
}
}
-static long fcntl_rw_hint(struct file *file, unsigned int cmd,
- unsigned long arg)
+static long fcntl_get_rw_hint(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
struct inode *inode = file_inode(file);
u64 __user *argp = (u64 __user *)arg;
- enum rw_hint hint;
- u64 h;
+ u64 hint = READ_ONCE(inode->i_write_hint);
- switch (cmd) {
- case F_GET_RW_HINT:
- h = inode->i_write_hint;
- if (copy_to_user(argp, &h, sizeof(*argp)))
- return -EFAULT;
- return 0;
- case F_SET_RW_HINT:
- if (copy_from_user(&h, argp, sizeof(h)))
- return -EFAULT;
- hint = (enum rw_hint) h;
- if (!rw_hint_valid(hint))
- return -EINVAL;
+ if (copy_to_user(argp, &hint, sizeof(*argp)))
+ return -EFAULT;
+ return 0;
+}
- inode_lock(inode);
- inode->i_write_hint = hint;
- inode_unlock(inode);
- return 0;
- default:
+static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct inode *inode = file_inode(file);
+ u64 __user *argp = (u64 __user *)arg;
+ u64 hint;
+
+ if (copy_from_user(&hint, argp, sizeof(hint)))
+ return -EFAULT;
+ if (!rw_hint_valid(hint))
return -EINVAL;
- }
+
+ WRITE_ONCE(inode->i_write_hint, hint);
+
+ /*
+ * file->f_mapping->host may differ from inode. As an example,
+ * blkdev_open() modifies file->f_mapping.
+ */
+ if (file->f_mapping->host != inode)
+ WRITE_ONCE(file->f_mapping->host->i_write_hint, hint);
+
+ return 0;
}
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
@@ -416,8 +430,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
err = memfd_fcntl(filp, cmd, argi);
break;
case F_GET_RW_HINT:
+ err = fcntl_get_rw_hint(filp, cmd, arg);
+ break;
case F_SET_RW_HINT:
- err = fcntl_rw_hint(filp, cmd, arg);
+ err = fcntl_set_rw_hint(filp, cmd, arg);
break;
default:
break;
@@ -846,12 +862,6 @@ int send_sigurg(struct fown_struct *fown)
static DEFINE_SPINLOCK(fasync_lock);
static struct kmem_cache *fasync_cache __ro_after_init;
-static void fasync_free_rcu(struct rcu_head *head)
-{
- kmem_cache_free(fasync_cache,
- container_of(head, struct fasync_struct, fa_rcu));
-}
-
/*
* Remove a fasync entry. If successfully removed, return
* positive and clear the FASYNC flag. If no entry exists,
@@ -877,7 +887,7 @@ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
write_unlock_irq(&fa->fa_lock);
*fp = fa->fa_next;
- call_rcu(&fa->fa_rcu, fasync_free_rcu);
+ kfree_rcu(fa, fa_rcu);
filp->f_flags &= ~FASYNC;
result = 1;
break;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 18b3ba8dc8ea..57a12614addf 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -36,7 +36,7 @@ static long do_sys_name_to_handle(const struct path *path,
if (f_handle.handle_bytes > MAX_HANDLE_SZ)
return -EINVAL;
- handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
GFP_KERNEL);
if (!handle)
return -ENOMEM;
diff --git a/fs/file_table.c b/fs/file_table.c
index b991f90571b4..4f03beed4737 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -26,7 +26,6 @@
#include <linux/percpu_counter.h>
#include <linux/percpu.h>
#include <linux/task_work.h>
-#include <linux/ima.h>
#include <linux/swap.h>
#include <linux/kmemleak.h>
@@ -276,21 +275,15 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
}
/**
- * alloc_file - allocate and initialize a 'struct file'
+ * file_init_path - initialize a 'struct file' based on path
*
+ * @file: the file to set up
* @path: the (dentry, vfsmount) pair for the new file
- * @flags: O_... flags with which the new file will be opened
* @fop: the 'struct file_operations' for the new file
*/
-static struct file *alloc_file(const struct path *path, int flags,
- const struct file_operations *fop)
+static void file_init_path(struct file *file, const struct path *path,
+ const struct file_operations *fop)
{
- struct file *file;
-
- file = alloc_empty_file(flags, current_cred());
- if (IS_ERR(file))
- return file;
-
file->f_path = *path;
file->f_inode = path->dentry->d_inode;
file->f_mapping = path->dentry->d_inode->i_mapping;
@@ -309,22 +302,51 @@ static struct file *alloc_file(const struct path *path, int flags,
file->f_op = fop;
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(path->dentry->d_inode);
+}
+
+/**
+ * alloc_file - allocate and initialize a 'struct file'
+ *
+ * @path: the (dentry, vfsmount) pair for the new file
+ * @flags: O_... flags with which the new file will be opened
+ * @fop: the 'struct file_operations' for the new file
+ */
+static struct file *alloc_file(const struct path *path, int flags,
+ const struct file_operations *fop)
+{
+ struct file *file;
+
+ file = alloc_empty_file(flags, current_cred());
+ if (!IS_ERR(file))
+ file_init_path(file, path, fop);
return file;
}
-struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
- const char *name, int flags,
- const struct file_operations *fops)
+static inline int alloc_path_pseudo(const char *name, struct inode *inode,
+ struct vfsmount *mnt, struct path *path)
{
struct qstr this = QSTR_INIT(name, strlen(name));
+
+ path->dentry = d_alloc_pseudo(mnt->mnt_sb, &this);
+ if (!path->dentry)
+ return -ENOMEM;
+ path->mnt = mntget(mnt);
+ d_instantiate(path->dentry, inode);
+ return 0;
+}
+
+struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
+ const char *name, int flags,
+ const struct file_operations *fops)
+{
+ int ret;
struct path path;
struct file *file;
- path.dentry = d_alloc_pseudo(mnt->mnt_sb, &this);
- if (!path.dentry)
- return ERR_PTR(-ENOMEM);
- path.mnt = mntget(mnt);
- d_instantiate(path.dentry, inode);
+ ret = alloc_path_pseudo(name, inode, mnt, &path);
+ if (ret)
+ return ERR_PTR(ret);
+
file = alloc_file(&path, flags, fops);
if (IS_ERR(file)) {
ihold(inode);
@@ -334,6 +356,30 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
}
EXPORT_SYMBOL(alloc_file_pseudo);
+struct file *alloc_file_pseudo_noaccount(struct inode *inode,
+ struct vfsmount *mnt, const char *name,
+ int flags,
+ const struct file_operations *fops)
+{
+ int ret;
+ struct path path;
+ struct file *file;
+
+ ret = alloc_path_pseudo(name, inode, mnt, &path);
+ if (ret)
+ return ERR_PTR(ret);
+
+ file = alloc_empty_file_noaccount(flags, current_cred());
+ if (IS_ERR(file)) {
+ ihold(inode);
+ path_put(&path);
+ return file;
+ }
+ file_init_path(file, &path, fops);
+ return file;
+}
+EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount);
+
struct file *alloc_file_clone(struct file *base, int flags,
const struct file_operations *fops)
{
@@ -367,7 +413,7 @@ static void __fput(struct file *file)
eventpoll_release(file);
locks_remove_file(file);
- ima_file_free(file);
+ security_file_release(file);
if (unlikely(file->f_flags & FASYNC)) {
if (file->f_op->fasync)
file->f_op->fasync(-1, file, 0);
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index e6e2a2185e7c..42e03b6b1cc7 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -307,7 +307,7 @@ vxfs_init(void)
vxfs_inode_cachep = kmem_cache_create_usercopy("vxfs_inode",
sizeof(struct vxfs_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+ SLAB_RECLAIM_ACCOUNT,
offsetof(struct vxfs_inode_info, vii_immed.vi_immed),
sizeof_field(struct vxfs_inode_info,
vii_immed.vi_immed),
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 3d84fcc471c6..e4f17c53ddfc 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -141,6 +141,31 @@ static void wb_wakeup(struct bdi_writeback *wb)
spin_unlock_irq(&wb->work_lock);
}
+/*
+ * This function is used when the first inode for this wb is marked dirty. It
+ * wakes-up the corresponding bdi thread which should then take care of the
+ * periodic background write-out of dirty inodes. Since the write-out would
+ * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
+ * set up a timer which wakes the bdi thread up later.
+ *
+ * Note, we wouldn't bother setting up the timer, but this function is on the
+ * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
+ * by delaying the wake-up.
+ *
+ * We have to be careful not to postpone flush work if it is scheduled for
+ * earlier. Thus we use queue_delayed_work().
+ */
+static void wb_wakeup_delayed(struct bdi_writeback *wb)
+{
+ unsigned long timeout;
+
+ timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
+ spin_lock_irq(&wb->work_lock);
+ if (test_bit(WB_registered, &wb->state))
+ queue_delayed_work(bdi_wq, &wb->dwork, timeout);
+ spin_unlock_irq(&wb->work_lock);
+}
+
static void finish_writeback_work(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index edb3712dcfa5..a4d6ca0b8971 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -83,8 +83,8 @@ static const struct fs_parameter_spec *fs_lookup_key(
}
/*
- * fs_parse - Parse a filesystem configuration parameter
- * @fc: The filesystem context to log errors through.
+ * __fs_parse - Parse a filesystem configuration parameter
+ * @log: The filesystem context to log errors through.
* @desc: The parameter description to use.
* @param: The parameter.
* @result: Where to place the result of the parse
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 91e89e68177e..b6cad106c37e 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -474,8 +474,7 @@ err:
static void cuse_fc_release(struct fuse_conn *fc)
{
- struct cuse_conn *cc = fc_to_cc(fc);
- kfree_rcu(cc, fc.rcu);
+ kfree(fc_to_cc(fc));
}
/**
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 148a71b8b4d0..c007b0f0c3a7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2509,14 +2509,14 @@ static int convert_fuse_file_lock(struct fuse_conn *fc,
* translate it into the caller's pid namespace.
*/
rcu_read_lock();
- fl->fl_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns);
+ fl->c.flc_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns);
rcu_read_unlock();
break;
default:
return -EIO;
}
- fl->fl_type = ffl->type;
+ fl->c.flc_type = ffl->type;
return 0;
}
@@ -2530,10 +2530,10 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file,
memset(inarg, 0, sizeof(*inarg));
inarg->fh = ff->fh;
- inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
+ inarg->owner = fuse_lock_owner_id(fc, fl->c.flc_owner);
inarg->lk.start = fl->fl_start;
inarg->lk.end = fl->fl_end;
- inarg->lk.type = fl->fl_type;
+ inarg->lk.type = fl->c.flc_type;
inarg->lk.pid = pid;
if (flock)
inarg->lk_flags |= FUSE_LK_FLOCK;
@@ -2570,8 +2570,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_lk_in inarg;
- int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
- struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
+ int opcode = (fl->c.flc_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
+ struct pid *pid = fl->c.flc_type != F_UNLCK ? task_tgid(current) : NULL;
pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns);
int err;
@@ -2581,7 +2581,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
}
/* Unlock on close is handled by the flush method */
- if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
+ if ((fl->c.flc_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
return 0;
fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1df83eebda92..bcbe34488862 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -888,6 +888,7 @@ struct fuse_mount {
/* Entry on fc->mounts */
struct list_head fc_entry;
+ struct rcu_head rcu;
};
static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2a6d44f91729..516ea2979a90 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -930,6 +930,14 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
}
EXPORT_SYMBOL_GPL(fuse_conn_init);
+static void delayed_release(struct rcu_head *p)
+{
+ struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu);
+
+ put_user_ns(fc->user_ns);
+ fc->release(fc);
+}
+
void fuse_conn_put(struct fuse_conn *fc)
{
if (refcount_dec_and_test(&fc->count)) {
@@ -941,13 +949,12 @@ void fuse_conn_put(struct fuse_conn *fc)
if (fiq->ops->release)
fiq->ops->release(fiq);
put_pid_ns(fc->pid_ns);
- put_user_ns(fc->user_ns);
bucket = rcu_dereference_protected(fc->curr_bucket, 1);
if (bucket) {
WARN_ON(atomic_read(&bucket->count) != 1);
kfree(bucket);
}
- fc->release(fc);
+ call_rcu(&fc->rcu, delayed_release);
}
}
EXPORT_SYMBOL_GPL(fuse_conn_put);
@@ -1366,7 +1373,7 @@ EXPORT_SYMBOL_GPL(fuse_send_init);
void fuse_free_conn(struct fuse_conn *fc)
{
WARN_ON(!list_empty(&fc->devices));
- kfree_rcu(fc, rcu);
+ kfree(fc);
}
EXPORT_SYMBOL_GPL(fuse_free_conn);
@@ -1902,7 +1909,7 @@ static void fuse_sb_destroy(struct super_block *sb)
void fuse_mount_destroy(struct fuse_mount *fm)
{
fuse_conn_put(fm->fc);
- kfree(fm);
+ kfree_rcu(fm, rcu);
}
EXPORT_SYMBOL(fuse_mount_destroy);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index d9ccfd27e4f1..789af5c8fade 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2465,7 +2465,7 @@ out:
}
static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
- loff_t offset)
+ loff_t offset, unsigned int len)
{
int ret;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 177f1f41f225..2e215e8c3c88 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -32,25 +32,21 @@
static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
{
- struct dentry *parent = NULL;
+ struct dentry *parent;
struct gfs2_sbd *sdp;
struct gfs2_inode *dip;
- struct inode *dinode, *inode;
+ struct inode *inode;
struct gfs2_holder d_gh;
struct gfs2_inode *ip = NULL;
int error, valid = 0;
int had_lock = 0;
- if (flags & LOOKUP_RCU) {
- dinode = d_inode_rcu(READ_ONCE(dentry->d_parent));
- if (!dinode)
- return -ECHILD;
- } else {
- parent = dget_parent(dentry);
- dinode = d_inode(parent);
- }
- sdp = GFS2_SB(dinode);
- dip = GFS2_I(dinode);
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ parent = dget_parent(dentry);
+ sdp = GFS2_SB(d_inode(parent));
+ dip = GFS2_I(d_inode(parent));
inode = d_inode(dentry);
if (inode) {
@@ -66,8 +62,7 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
if (!had_lock) {
- error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
- flags & LOOKUP_RCU ? GL_NOBLOCK : 0, &d_gh);
+ error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
if (error)
goto out;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 992ca4effb50..4c42ada60ae7 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1440,10 +1440,10 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- if (!(fl->fl_flags & FL_POSIX))
+ if (!(fl->c.flc_flags & FL_POSIX))
return -ENOLCK;
if (gfs2_withdrawing_or_withdrawn(sdp)) {
- if (fl->fl_type == F_UNLCK)
+ if (lock_is_unlock(fl))
locks_lock_file_wait(file, fl);
return -EIO;
}
@@ -1451,7 +1451,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
return dlm_posix_cancel(ls->ls_dlm, ip->i_no_addr, file, fl);
else if (IS_GETLK(cmd))
return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
- else if (fl->fl_type == F_UNLCK)
+ else if (lock_is_unlock(fl))
return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
else
return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
@@ -1483,7 +1483,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
int error = 0;
int sleeptime;
- state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
+ state = lock_is_write(fl) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
flags = GL_EXACT | GL_NOPID;
if (!IS_SETLKW(cmd))
flags |= LM_FLAG_TRY_1CB;
@@ -1495,8 +1495,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
if (fl_gh->gh_state == state)
goto out;
locks_init_lock(&request);
- request.fl_type = F_UNLCK;
- request.fl_flags = FL_FLOCK;
+ request.c.flc_type = F_UNLCK;
+ request.c.flc_flags = FL_FLOCK;
locks_lock_file_wait(file, &request);
gfs2_glock_dq(fl_gh);
gfs2_holder_reinit(state, flags, fl_gh);
@@ -1557,10 +1557,10 @@ static void do_unflock(struct file *file, struct file_lock *fl)
static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
{
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
return -ENOLCK;
- if (fl->fl_type == F_UNLCK) {
+ if (lock_is_unlock(fl)) {
do_unflock(file, fl);
return 0;
} else {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6bfc9383b7b8..1b95db2c3aac 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1882,10 +1882,10 @@ int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode,
WARN_ON_ONCE(!may_not_block);
return -ECHILD;
}
- if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
- int noblock = may_not_block ? GL_NOBLOCK : 0;
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
- LM_FLAG_ANY | noblock, &i_gh);
+ if (gfs2_glock_is_locked_by_me(gl) == NULL) {
+ if (may_not_block)
+ return -ECHILD;
+ error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return error;
}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 79be0cdc730c..04cadc02e5a6 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -111,7 +111,6 @@ static int __init init_gfs2_fs(void)
gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
sizeof(struct gfs2_inode),
0, SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|
SLAB_ACCOUNT,
gfs2_init_inode_once);
if (!gfs2_inode_cachep)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1281e60be639..572d58e86296 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -214,7 +214,7 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
- memcpy(&s->s_uuid, str->sb_uuid, 16);
+ super_set_uuid(s, str->sb_uuid, 16);
}
/**
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 7ededcb720c1..012a3d003fbe 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -190,6 +190,7 @@ struct hfsplus_sb_info {
int work_queued; /* non-zero delayed work is queued */
struct delayed_work sync_work; /* FS sync delayed work */
spinlock_t work_lock; /* protects sync_work and work_queued */
+ struct rcu_head rcu;
};
#define HFSPLUS_SB_WRITEBACKUP 0
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 1986b4f18a90..97920202790f 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -277,6 +277,14 @@ void hfsplus_mark_mdb_dirty(struct super_block *sb)
spin_unlock(&sbi->work_lock);
}
+static void delayed_free(struct rcu_head *p)
+{
+ struct hfsplus_sb_info *sbi = container_of(p, struct hfsplus_sb_info, rcu);
+
+ unload_nls(sbi->nls);
+ kfree(sbi);
+}
+
static void hfsplus_put_super(struct super_block *sb)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
@@ -302,9 +310,7 @@ static void hfsplus_put_super(struct super_block *sb)
hfs_btree_close(sbi->ext_tree);
kfree(sbi->s_vhdr_buf);
kfree(sbi->s_backup_vhdr_buf);
- unload_nls(sbi->nls);
- kfree(sb->s_fs_info);
- sb->s_fs_info = NULL;
+ call_rcu(&sbi->rcu, delayed_free);
}
static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index b0cb70400996..ce9346099c72 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -30,7 +30,7 @@ struct hfsplus_wd {
* @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
* @buf: buffer for I/O
* @data: output pointer for location of requested data
- * @opf: request op flags
+ * @opf: I/O operation type and flags
*
* The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
* HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 6b0ba3c1efba..314834a078e9 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -255,7 +255,7 @@ static int init_inodecache(void)
hpfs_inode_cachep = kmem_cache_create("hpfs_inode_cache",
sizeof(struct hpfs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (hpfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ea5b8e57d904..6502c7e776d1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -100,6 +100,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
+ vm_flags_t vm_flags;
/*
* vma address alignment (but not the pgoff alignment) has
@@ -141,10 +142,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
file_accessed(file);
ret = -ENOMEM;
+
+ vm_flags = vma->vm_flags;
+ /*
+ * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
+ * reserving here. Note: only for SHM hugetlbfs file, the inode
+ * flag S_PRIVATE is set.
+ */
+ if (inode->i_flags & S_PRIVATE)
+ vm_flags |= VM_NORESERVE;
+
if (!hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
- vma->vm_flags))
+ vm_flags))
goto out;
ret = 0;
@@ -340,7 +351,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
} else {
folio_unlock(folio);
- if (!folio_test_has_hwpoisoned(folio))
+ if (!folio_test_hwpoison(folio))
want = nr;
else {
/*
@@ -922,7 +933,7 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
unsigned int ia_valid = attr->ia_valid;
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
- error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
+ error = setattr_prepare(idmap, dentry, attr);
if (error)
return error;
@@ -939,7 +950,7 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
hugetlb_vmtruncate(inode, newsize);
}
- setattr_copy(&nop_mnt_idmap, inode, attr);
+ setattr_copy(idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -974,6 +985,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
static struct inode *hugetlbfs_get_inode(struct super_block *sb,
+ struct mnt_idmap *idmap,
struct inode *dir,
umode_t mode, dev_t dev)
{
@@ -995,7 +1007,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
inode->i_ino = get_next_ino();
- inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
&hugetlbfs_i_mmap_rwsem_key);
inode->i_mapping->a_ops = &hugetlbfs_aops;
@@ -1039,7 +1051,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
{
struct inode *inode;
- inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
+ inode = hugetlbfs_get_inode(dir->i_sb, idmap, dir, mode, dev);
if (!inode)
return -ENOSPC;
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
@@ -1051,7 +1063,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
static int hugetlbfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
- int retval = hugetlbfs_mknod(&nop_mnt_idmap, dir, dentry,
+ int retval = hugetlbfs_mknod(idmap, dir, dentry,
mode | S_IFDIR, 0);
if (!retval)
inc_nlink(dir);
@@ -1062,7 +1074,7 @@ static int hugetlbfs_create(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl)
{
- return hugetlbfs_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFREG, 0);
+ return hugetlbfs_mknod(idmap, dir, dentry, mode | S_IFREG, 0);
}
static int hugetlbfs_tmpfile(struct mnt_idmap *idmap,
@@ -1071,7 +1083,7 @@ static int hugetlbfs_tmpfile(struct mnt_idmap *idmap,
{
struct inode *inode;
- inode = hugetlbfs_get_inode(dir->i_sb, dir, mode | S_IFREG, 0);
+ inode = hugetlbfs_get_inode(dir->i_sb, idmap, dir, mode | S_IFREG, 0);
if (!inode)
return -ENOSPC;
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
@@ -1083,10 +1095,11 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
const char *symname)
{
+ const umode_t mode = S_IFLNK|S_IRWXUGO;
struct inode *inode;
int error = -ENOSPC;
- inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
+ inode = hugetlbfs_get_inode(dir->i_sb, idmap, dir, mode, 0);
if (inode) {
int l = strlen(symname)+1;
error = page_symlink(inode, symname, l);
@@ -1354,6 +1367,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
{
struct hugetlbfs_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
+ struct hstate *h;
char *rest;
unsigned long ps;
int opt;
@@ -1398,11 +1412,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
case Opt_pagesize:
ps = memparse(param->string, &rest);
- ctx->hstate = size_to_hstate(ps);
- if (!ctx->hstate) {
+ h = size_to_hstate(ps);
+ if (!h) {
pr_err("Unsupported page size %lu MB\n", ps / SZ_1M);
return -EINVAL;
}
+ ctx->hstate = h;
return 0;
case Opt_min_size:
@@ -1553,6 +1568,7 @@ static struct file_system_type hugetlbfs_fs_type = {
.init_fs_context = hugetlbfs_init_fs_context,
.parameters = hugetlb_fs_parameters,
.kill_sb = kill_litter_super,
+ .fs_flags = FS_ALLOW_IDMAP,
};
static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
@@ -1606,7 +1622,9 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
}
file = ERR_PTR(-ENOSPC);
- inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0);
+ /* hugetlbfs_vfsmount[] mounts do not use idmapped mounts. */
+ inode = hugetlbfs_get_inode(mnt->mnt_sb, &nop_mnt_idmap, NULL,
+ S_IFREG | S_IRWXUGO, 0);
if (!inode)
goto out;
if (creat_flags == HUGETLB_SHMFS_INODE)
diff --git a/fs/inode.c b/fs/inode.c
index 91048c4c9c9e..d290f007b3d1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -20,6 +20,7 @@
#include <linux/ratelimit.h>
#include <linux/list_lru.h>
#include <linux/iversion.h>
+#include <linux/rw_hint.h>
#include <trace/events/writeback.h>
#include "internal.h"
@@ -588,7 +589,8 @@ void dump_mapping(const struct address_space *mapping)
}
dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
- if (get_kernel_nofault(dentry, dentry_ptr)) {
+ if (get_kernel_nofault(dentry, dentry_ptr) ||
+ !dentry.d_parent || !dentry.d_name.name) {
pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
a_ops, ino, dentry_ptr);
return;
@@ -2285,7 +2287,7 @@ void __init inode_init(void)
sizeof(struct inode),
0,
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
/* Hash may have been set up in inode_init_early */
@@ -2509,7 +2511,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
{
struct timespec64 now = current_time(inode);
- inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
+ inode_set_ctime_to_ts(inode, now);
return now;
}
EXPORT_SYMBOL(inode_set_ctime_current);
diff --git a/fs/internal.h b/fs/internal.h
index b67406435fc0..49c1fcfee4b3 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -183,6 +183,7 @@ extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
+long do_ftruncate(struct file *file, loff_t length, int small);
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
int chmod_common(const struct path *path, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
@@ -310,3 +311,10 @@ ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *po
struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns);
struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap);
void mnt_idmap_put(struct mnt_idmap *idmap);
+struct stashed_operations {
+ void (*put_data)(void *data);
+ void (*init_inode)(struct inode *inode, void *data);
+};
+int path_from_stashed(struct dentry **stashed, unsigned long ino,
+ struct vfsmount *mnt, void *data, struct path *path);
+void stashed_dentry_prune(struct dentry *dentry);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 76cf22ac97d7..1d5abfdf0f22 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -763,6 +763,33 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp)
return err;
}
+static int ioctl_getfsuuid(struct file *file, void __user *argp)
+{
+ struct super_block *sb = file_inode(file)->i_sb;
+ struct fsuuid2 u = { .len = sb->s_uuid_len, };
+
+ if (!sb->s_uuid_len)
+ return -ENOIOCTLCMD;
+
+ memcpy(&u.uuid[0], &sb->s_uuid, sb->s_uuid_len);
+
+ return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0;
+}
+
+static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp)
+{
+ struct super_block *sb = file_inode(file)->i_sb;
+
+ if (!strlen(sb->s_sysfs_name))
+ return -ENOIOCTLCMD;
+
+ struct fs_sysfs_path u = {};
+
+ u.len = scnprintf(u.name, sizeof(u.name), "%s/%s", sb->s_type->name, sb->s_sysfs_name);
+
+ return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0;
+}
+
/*
* do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
* It's just a simple helper for sys_ioctl and compat_sys_ioctl.
@@ -845,6 +872,12 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
case FS_IOC_FSSETXATTR:
return ioctl_fssetxattr(filp, argp);
+ case FS_IOC_GETFSUUID:
+ return ioctl_getfsuuid(filp, argp);
+
+ case FS_IOC_GETFSSYSFSPATH:
+ return ioctl_get_fs_sysfs_path(filp, argp);
+
default:
if (S_ISREG(inode->i_mode))
return file_ioctl(filp, cmd, argp);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 093c4515b22a..4e8e41c8b3c0 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010 Red Hat, Inc.
- * Copyright (C) 2016-2019 Christoph Hellwig.
+ * Copyright (C) 2016-2023 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
@@ -95,6 +95,44 @@ static inline bool ifs_block_is_dirty(struct folio *folio,
return test_bit(block + blks_per_folio, ifs->state);
}
+static unsigned ifs_find_dirty_range(struct folio *folio,
+ struct iomap_folio_state *ifs, u64 *range_start, u64 range_end)
+{
+ struct inode *inode = folio->mapping->host;
+ unsigned start_blk =
+ offset_in_folio(folio, *range_start) >> inode->i_blkbits;
+ unsigned end_blk = min_not_zero(
+ offset_in_folio(folio, range_end) >> inode->i_blkbits,
+ i_blocks_per_folio(inode, folio));
+ unsigned nblks = 1;
+
+ while (!ifs_block_is_dirty(folio, ifs, start_blk))
+ if (++start_blk == end_blk)
+ return 0;
+
+ while (start_blk + nblks < end_blk) {
+ if (!ifs_block_is_dirty(folio, ifs, start_blk + nblks))
+ break;
+ nblks++;
+ }
+
+ *range_start = folio_pos(folio) + (start_blk << inode->i_blkbits);
+ return nblks << inode->i_blkbits;
+}
+
+static unsigned iomap_find_dirty_range(struct folio *folio, u64 *range_start,
+ u64 range_end)
+{
+ struct iomap_folio_state *ifs = folio->private;
+
+ if (*range_start >= range_end)
+ return 0;
+
+ if (ifs)
+ return ifs_find_dirty_range(folio, ifs, range_start, range_end);
+ return range_end - *range_start;
+}
+
static void ifs_clear_range_dirty(struct folio *folio,
struct iomap_folio_state *ifs, size_t off, size_t len)
{
@@ -1454,15 +1492,10 @@ out_unlock:
EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
- size_t len, int error)
+ size_t len)
{
struct iomap_folio_state *ifs = folio->private;
- if (error) {
- folio_set_error(folio);
- mapping_set_error(inode->i_mapping, error);
- }
-
WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs);
WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) <= 0);
@@ -1479,40 +1512,29 @@ static u32
iomap_finish_ioend(struct iomap_ioend *ioend, int error)
{
struct inode *inode = ioend->io_inode;
- struct bio *bio = &ioend->io_inline_bio;
- struct bio *last = ioend->io_bio, *next;
- u64 start = bio->bi_iter.bi_sector;
- loff_t offset = ioend->io_offset;
- bool quiet = bio_flagged(bio, BIO_QUIET);
+ struct bio *bio = &ioend->io_bio;
+ struct folio_iter fi;
u32 folio_count = 0;
- for (bio = &ioend->io_inline_bio; bio; bio = next) {
- struct folio_iter fi;
-
- /*
- * For the last bio, bi_private points to the ioend, so we
- * need to explicitly end the iteration here.
- */
- if (bio == last)
- next = NULL;
- else
- next = bio->bi_private;
-
- /* walk all folios in bio, ending page IO on them */
- bio_for_each_folio_all(fi, bio) {
- iomap_finish_folio_write(inode, fi.folio, fi.length,
- error);
- folio_count++;
+ if (error) {
+ mapping_set_error(inode->i_mapping, error);
+ if (!bio_flagged(bio, BIO_QUIET)) {
+ pr_err_ratelimited(
+"%s: writeback error on inode %lu, offset %lld, sector %llu",
+ inode->i_sb->s_id, inode->i_ino,
+ ioend->io_offset, ioend->io_sector);
}
- bio_put(bio);
}
- /* The ioend has been freed by bio_put() */
- if (unlikely(error && !quiet)) {
- printk_ratelimited(KERN_ERR
-"%s: writeback error on inode %lu, offset %lld, sector %llu",
- inode->i_sb->s_id, inode->i_ino, offset, start);
+ /* walk all folios in bio, ending page IO on them */
+ bio_for_each_folio_all(fi, bio) {
+ if (error)
+ folio_set_error(fi.folio);
+ iomap_finish_folio_write(inode, fi.folio, fi.length);
+ folio_count++;
}
+
+ bio_put(bio); /* frees the ioend */
return folio_count;
}
@@ -1553,7 +1575,7 @@ EXPORT_SYMBOL_GPL(iomap_finish_ioends);
static bool
iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
{
- if (ioend->io_bio->bi_status != next->io_bio->bi_status)
+ if (ioend->io_bio.bi_status != next->io_bio.bi_status)
return false;
if ((ioend->io_flags & IOMAP_F_SHARED) ^
(next->io_flags & IOMAP_F_SHARED))
@@ -1618,47 +1640,46 @@ EXPORT_SYMBOL_GPL(iomap_sort_ioends);
static void iomap_writepage_end_bio(struct bio *bio)
{
- struct iomap_ioend *ioend = bio->bi_private;
-
- iomap_finish_ioend(ioend, blk_status_to_errno(bio->bi_status));
+ iomap_finish_ioend(iomap_ioend_from_bio(bio),
+ blk_status_to_errno(bio->bi_status));
}
/*
* Submit the final bio for an ioend.
*
* If @error is non-zero, it means that we have a situation where some part of
- * the submission process has failed after we've marked pages for writeback
- * and unlocked them. In this situation, we need to fail the bio instead of
- * submitting it. This typically only happens on a filesystem shutdown.
+ * the submission process has failed after we've marked pages for writeback.
+ * We cannot cancel ioend directly in that case, so call the bio end I/O handler
+ * with the error status here to run the normal I/O completion handler to clear
+ * the writeback bit and let the file system proess the errors.
*/
-static int
-iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
- int error)
+static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error)
{
- ioend->io_bio->bi_private = ioend;
- ioend->io_bio->bi_end_io = iomap_writepage_end_bio;
+ if (!wpc->ioend)
+ return error;
+ /*
+ * Let the file systems prepare the I/O submission and hook in an I/O
+ * comletion handler. This also needs to happen in case after a
+ * failure happened so that the file system end I/O handler gets called
+ * to clean up.
+ */
if (wpc->ops->prepare_ioend)
- error = wpc->ops->prepare_ioend(ioend, error);
+ error = wpc->ops->prepare_ioend(wpc->ioend, error);
+
if (error) {
- /*
- * If we're failing the IO now, just mark the ioend with an
- * error and finish it. This will run IO completion immediately
- * as there is only one reference to the ioend at this point in
- * time.
- */
- ioend->io_bio->bi_status = errno_to_blk_status(error);
- bio_endio(ioend->io_bio);
- return error;
+ wpc->ioend->io_bio.bi_status = errno_to_blk_status(error);
+ bio_endio(&wpc->ioend->io_bio);
+ } else {
+ submit_bio(&wpc->ioend->io_bio);
}
- submit_bio(ioend->io_bio);
- return 0;
+ wpc->ioend = NULL;
+ return error;
}
-static struct iomap_ioend *
-iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
- loff_t offset, sector_t sector, struct writeback_control *wbc)
+static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
+ struct writeback_control *wbc, struct inode *inode, loff_t pos)
{
struct iomap_ioend *ioend;
struct bio *bio;
@@ -1666,63 +1687,42 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS,
REQ_OP_WRITE | wbc_to_write_flags(wbc),
GFP_NOFS, &iomap_ioend_bioset);
- bio->bi_iter.bi_sector = sector;
+ bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos);
+ bio->bi_end_io = iomap_writepage_end_bio;
wbc_init_bio(wbc, bio);
+ bio->bi_write_hint = inode->i_write_hint;
- ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
+ ioend = iomap_ioend_from_bio(bio);
INIT_LIST_HEAD(&ioend->io_list);
ioend->io_type = wpc->iomap.type;
ioend->io_flags = wpc->iomap.flags;
ioend->io_inode = inode;
ioend->io_size = 0;
- ioend->io_folios = 0;
- ioend->io_offset = offset;
- ioend->io_bio = bio;
- ioend->io_sector = sector;
- return ioend;
-}
-
-/*
- * Allocate a new bio, and chain the old bio to the new one.
- *
- * Note that we have to perform the chaining in this unintuitive order
- * so that the bi_private linkage is set up in the right direction for the
- * traversal in iomap_finish_ioend().
- */
-static struct bio *
-iomap_chain_bio(struct bio *prev)
-{
- struct bio *new;
-
- new = bio_alloc(prev->bi_bdev, BIO_MAX_VECS, prev->bi_opf, GFP_NOFS);
- bio_clone_blkg_association(new, prev);
- new->bi_iter.bi_sector = bio_end_sector(prev);
+ ioend->io_offset = pos;
+ ioend->io_sector = bio->bi_iter.bi_sector;
- bio_chain(prev, new);
- bio_get(prev); /* for iomap_finish_ioend */
- submit_bio(prev);
- return new;
+ wpc->nr_folios = 0;
+ return ioend;
}
-static bool
-iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
- sector_t sector)
+static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
{
if ((wpc->iomap.flags & IOMAP_F_SHARED) !=
(wpc->ioend->io_flags & IOMAP_F_SHARED))
return false;
if (wpc->iomap.type != wpc->ioend->io_type)
return false;
- if (offset != wpc->ioend->io_offset + wpc->ioend->io_size)
+ if (pos != wpc->ioend->io_offset + wpc->ioend->io_size)
return false;
- if (sector != bio_end_sector(wpc->ioend->io_bio))
+ if (iomap_sector(&wpc->iomap, pos) !=
+ bio_end_sector(&wpc->ioend->io_bio))
return false;
/*
* Limit ioend bio chain lengths to minimise IO completion latency. This
* also prevents long tight loops ending page writeback on all the
* folios in the ioend.
*/
- if (wpc->ioend->io_folios >= IOEND_BATCH_SIZE)
+ if (wpc->nr_folios >= IOEND_BATCH_SIZE)
return false;
return true;
}
@@ -1730,255 +1730,238 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
/*
* Test to see if we have an existing ioend structure that we could append to
* first; otherwise finish off the current ioend and start another.
+ *
+ * If a new ioend is created and cached, the old ioend is submitted to the block
+ * layer instantly. Batching optimisations are provided by higher level block
+ * plugging.
+ *
+ * At the end of a writeback pass, there will be a cached ioend remaining on the
+ * writepage context that the caller will need to submit.
*/
-static void
-iomap_add_to_ioend(struct inode *inode, loff_t pos, struct folio *folio,
- struct iomap_folio_state *ifs, struct iomap_writepage_ctx *wpc,
- struct writeback_control *wbc, struct list_head *iolist)
+static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
+ struct writeback_control *wbc, struct folio *folio,
+ struct inode *inode, loff_t pos, unsigned len)
{
- sector_t sector = iomap_sector(&wpc->iomap, pos);
- unsigned len = i_blocksize(inode);
+ struct iomap_folio_state *ifs = folio->private;
size_t poff = offset_in_folio(folio, pos);
+ int error;
- if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, sector)) {
- if (wpc->ioend)
- list_add(&wpc->ioend->io_list, iolist);
- wpc->ioend = iomap_alloc_ioend(inode, wpc, pos, sector, wbc);
+ if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos)) {
+new_ioend:
+ error = iomap_submit_ioend(wpc, 0);
+ if (error)
+ return error;
+ wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos);
}
- if (!bio_add_folio(wpc->ioend->io_bio, folio, len, poff)) {
- wpc->ioend->io_bio = iomap_chain_bio(wpc->ioend->io_bio);
- bio_add_folio_nofail(wpc->ioend->io_bio, folio, len, poff);
- }
+ if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff))
+ goto new_ioend;
if (ifs)
atomic_add(len, &ifs->write_bytes_pending);
wpc->ioend->io_size += len;
wbc_account_cgroup_owner(wbc, &folio->page, len);
+ return 0;
}
-/*
- * We implement an immediate ioend submission policy here to avoid needing to
- * chain multiple ioends and hence nest mempool allocations which can violate
- * the forward progress guarantees we need to provide. The current ioend we're
- * adding blocks to is cached in the writepage context, and if the new block
- * doesn't append to the cached ioend, it will create a new ioend and cache that
- * instead.
- *
- * If a new ioend is created and cached, the old ioend is returned and queued
- * locally for submission once the entire page is processed or an error has been
- * detected. While ioends are submitted immediately after they are completed,
- * batching optimisations are provided by higher level block plugging.
- *
- * At the end of a writeback pass, there will be a cached ioend remaining on the
- * writepage context that the caller will need to submit.
- */
-static int
-iomap_writepage_map(struct iomap_writepage_ctx *wpc,
- struct writeback_control *wbc, struct inode *inode,
- struct folio *folio, u64 end_pos)
+static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
+ struct writeback_control *wbc, struct folio *folio,
+ struct inode *inode, u64 pos, unsigned dirty_len,
+ unsigned *count)
{
- struct iomap_folio_state *ifs = folio->private;
- struct iomap_ioend *ioend, *next;
- unsigned len = i_blocksize(inode);
- unsigned nblocks = i_blocks_per_folio(inode, folio);
- u64 pos = folio_pos(folio);
- int error = 0, count = 0, i;
- LIST_HEAD(submit_list);
-
- WARN_ON_ONCE(end_pos <= pos);
-
- if (!ifs && nblocks > 1) {
- ifs = ifs_alloc(inode, folio, 0);
- iomap_set_range_dirty(folio, 0, end_pos - pos);
- }
+ int error;
- WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) != 0);
-
- /*
- * Walk through the folio to find areas to write back. If we
- * run off the end of the current map or find the current map
- * invalid, grab a new one.
- */
- for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) {
- if (ifs && !ifs_block_is_dirty(folio, ifs, i))
- continue;
+ do {
+ unsigned map_len;
- error = wpc->ops->map_blocks(wpc, inode, pos);
+ error = wpc->ops->map_blocks(wpc, inode, pos, dirty_len);
if (error)
break;
- trace_iomap_writepage_map(inode, &wpc->iomap);
- if (WARN_ON_ONCE(wpc->iomap.type == IOMAP_INLINE))
- continue;
- if (wpc->iomap.type == IOMAP_HOLE)
- continue;
- iomap_add_to_ioend(inode, pos, folio, ifs, wpc, wbc,
- &submit_list);
- count++;
- }
- if (count)
- wpc->ioend->io_folios++;
+ trace_iomap_writepage_map(inode, pos, dirty_len, &wpc->iomap);
- WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
- WARN_ON_ONCE(!folio_test_locked(folio));
- WARN_ON_ONCE(folio_test_writeback(folio));
- WARN_ON_ONCE(folio_test_dirty(folio));
+ map_len = min_t(u64, dirty_len,
+ wpc->iomap.offset + wpc->iomap.length - pos);
+ WARN_ON_ONCE(!folio->private && map_len < dirty_len);
+
+ switch (wpc->iomap.type) {
+ case IOMAP_INLINE:
+ WARN_ON_ONCE(1);
+ error = -EIO;
+ break;
+ case IOMAP_HOLE:
+ break;
+ default:
+ error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos,
+ map_len);
+ if (!error)
+ (*count)++;
+ break;
+ }
+ dirty_len -= map_len;
+ pos += map_len;
+ } while (dirty_len && !error);
/*
* We cannot cancel the ioend directly here on error. We may have
* already set other pages under writeback and hence we have to run I/O
* completion to mark the error state of the pages under writeback
* appropriately.
+ *
+ * Just let the file system know what portion of the folio failed to
+ * map.
*/
- if (unlikely(error)) {
- /*
- * Let the filesystem know what portion of the current page
- * failed to map. If the page hasn't been added to ioend, it
- * won't be affected by I/O completion and we must unlock it
- * now.
- */
- if (wpc->ops->discard_folio)
- wpc->ops->discard_folio(folio, pos);
- if (!count) {
- folio_unlock(folio);
- goto done;
- }
- }
-
- /*
- * We can have dirty bits set past end of file in page_mkwrite path
- * while mapping the last partial folio. Hence it's better to clear
- * all the dirty bits in the folio here.
- */
- iomap_clear_range_dirty(folio, 0, folio_size(folio));
- folio_start_writeback(folio);
- folio_unlock(folio);
-
- /*
- * Preserve the original error if there was one; catch
- * submission errors here and propagate into subsequent ioend
- * submissions.
- */
- list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
- int error2;
-
- list_del_init(&ioend->io_list);
- error2 = iomap_submit_ioend(wpc, ioend, error);
- if (error2 && !error)
- error = error2;
- }
-
- /*
- * We can end up here with no error and nothing to write only if we race
- * with a partial page truncate on a sub-page block sized filesystem.
- */
- if (!count)
- folio_end_writeback(folio);
-done:
- mapping_set_error(inode->i_mapping, error);
+ if (error && wpc->ops->discard_folio)
+ wpc->ops->discard_folio(folio, pos);
return error;
}
/*
- * Write out a dirty page.
+ * Check interaction of the folio with the file end.
*
- * For delalloc space on the page, we need to allocate space and flush it.
- * For unwritten space on the page, we need to start the conversion to
- * regular allocated space.
+ * If the folio is entirely beyond i_size, return false. If it straddles
+ * i_size, adjust end_pos and zero all data beyond i_size.
*/
-static int iomap_do_writepage(struct folio *folio,
- struct writeback_control *wbc, void *data)
+static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
+ u64 *end_pos)
{
- struct iomap_writepage_ctx *wpc = data;
- struct inode *inode = folio->mapping->host;
- u64 end_pos, isize;
-
- trace_iomap_writepage(inode, folio_pos(folio), folio_size(folio));
+ u64 isize = i_size_read(inode);
- /*
- * Refuse to write the folio out if we're called from reclaim context.
- *
- * This avoids stack overflows when called from deeply used stacks in
- * random callers for direct reclaim or memcg reclaim. We explicitly
- * allow reclaim from kswapd as the stack usage there is relatively low.
- *
- * This should never happen except in the case of a VM regression so
- * warn about it.
- */
- if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
- PF_MEMALLOC))
- goto redirty;
-
- /*
- * Is this folio beyond the end of the file?
- *
- * The folio index is less than the end_index, adjust the end_pos
- * to the highest offset that this folio should represent.
- * -----------------------------------------------------
- * | file mapping | <EOF> |
- * -----------------------------------------------------
- * | Page ... | Page N-2 | Page N-1 | Page N | |
- * ^--------------------------------^----------|--------
- * | desired writeback range | see else |
- * ---------------------------------^------------------|
- */
- isize = i_size_read(inode);
- end_pos = folio_pos(folio) + folio_size(folio);
- if (end_pos > isize) {
- /*
- * Check whether the page to write out is beyond or straddles
- * i_size or not.
- * -------------------------------------------------------
- * | file mapping | <EOF> |
- * -------------------------------------------------------
- * | Page ... | Page N-2 | Page N-1 | Page N | Beyond |
- * ^--------------------------------^-----------|---------
- * | | Straddles |
- * ---------------------------------^-----------|--------|
- */
+ if (*end_pos > isize) {
size_t poff = offset_in_folio(folio, isize);
pgoff_t end_index = isize >> PAGE_SHIFT;
/*
- * Skip the page if it's fully outside i_size, e.g.
- * due to a truncate operation that's in progress. We've
- * cleaned this page and truncate will finish things off for
- * us.
+ * If the folio is entirely ouside of i_size, skip it.
+ *
+ * This can happen due to a truncate operation that is in
+ * progress and in that case truncate will finish it off once
+ * we've dropped the folio lock.
*
- * Note that the end_index is unsigned long. If the given
- * offset is greater than 16TB on a 32-bit system then if we
- * checked if the page is fully outside i_size with
- * "if (page->index >= end_index + 1)", "end_index + 1" would
- * overflow and evaluate to 0. Hence this page would be
+ * Note that the pgoff_t used for end_index is an unsigned long.
+ * If the given offset is greater than 16TB on a 32-bit system,
+ * then if we checked if the folio is fully outside i_size with
+ * "if (folio->index >= end_index + 1)", "end_index + 1" would
+ * overflow and evaluate to 0. Hence this folio would be
* redirtied and written out repeatedly, which would result in
* an infinite loop; the user program performing this operation
* would hang. Instead, we can detect this situation by
- * checking if the page is totally beyond i_size or if its
+ * checking if the folio is totally beyond i_size or if its
* offset is just equal to the EOF.
*/
if (folio->index > end_index ||
(folio->index == end_index && poff == 0))
- goto unlock;
+ return false;
/*
- * The page straddles i_size. It must be zeroed out on each
- * and every writepage invocation because it may be mmapped.
- * "A file is mapped in multiples of the page size. For a file
- * that is not a multiple of the page size, the remaining
- * memory is zeroed when mapped, and writes to that region are
- * not written out to the file."
+ * The folio straddles i_size.
+ *
+ * It must be zeroed out on each and every writepage invocation
+ * because it may be mmapped:
+ *
+ * A file is mapped in multiples of the page size. For a
+ * file that is not a multiple of the page size, the
+ * remaining memory is zeroed when mapped, and writes to that
+ * region are not written out to the file.
+ *
+ * Also adjust the writeback range to skip all blocks entirely
+ * beyond i_size.
*/
folio_zero_segment(folio, poff, folio_size(folio));
- end_pos = isize;
+ *end_pos = round_up(isize, i_blocksize(inode));
+ }
+
+ return true;
+}
+
+static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
+ struct writeback_control *wbc, struct folio *folio)
+{
+ struct iomap_folio_state *ifs = folio->private;
+ struct inode *inode = folio->mapping->host;
+ u64 pos = folio_pos(folio);
+ u64 end_pos = pos + folio_size(folio);
+ unsigned count = 0;
+ int error = 0;
+ u32 rlen;
+
+ WARN_ON_ONCE(!folio_test_locked(folio));
+ WARN_ON_ONCE(folio_test_dirty(folio));
+ WARN_ON_ONCE(folio_test_writeback(folio));
+
+ trace_iomap_writepage(inode, pos, folio_size(folio));
+
+ if (!iomap_writepage_handle_eof(folio, inode, &end_pos)) {
+ folio_unlock(folio);
+ return 0;
+ }
+ WARN_ON_ONCE(end_pos <= pos);
+
+ if (i_blocks_per_folio(inode, folio) > 1) {
+ if (!ifs) {
+ ifs = ifs_alloc(inode, folio, 0);
+ iomap_set_range_dirty(folio, 0, end_pos - pos);
+ }
+
+ /*
+ * Keep the I/O completion handler from clearing the writeback
+ * bit until we have submitted all blocks by adding a bias to
+ * ifs->write_bytes_pending, which is dropped after submitting
+ * all blocks.
+ */
+ WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0);
+ atomic_inc(&ifs->write_bytes_pending);
}
- return iomap_writepage_map(wpc, wbc, inode, folio, end_pos);
+ /*
+ * Set the writeback bit ASAP, as the I/O completion for the single
+ * block per folio case happen hit as soon as we're submitting the bio.
+ */
+ folio_start_writeback(folio);
-redirty:
- folio_redirty_for_writepage(wbc, folio);
-unlock:
+ /*
+ * Walk through the folio to find dirty areas to write back.
+ */
+ while ((rlen = iomap_find_dirty_range(folio, &pos, end_pos))) {
+ error = iomap_writepage_map_blocks(wpc, wbc, folio, inode,
+ pos, rlen, &count);
+ if (error)
+ break;
+ pos += rlen;
+ }
+
+ if (count)
+ wpc->nr_folios++;
+
+ /*
+ * We can have dirty bits set past end of file in page_mkwrite path
+ * while mapping the last partial folio. Hence it's better to clear
+ * all the dirty bits in the folio here.
+ */
+ iomap_clear_range_dirty(folio, 0, folio_size(folio));
+
+ /*
+ * Usually the writeback bit is cleared by the I/O completion handler.
+ * But we may end up either not actually writing any blocks, or (when
+ * there are multiple blocks in a folio) all I/O might have finished
+ * already at this point. In that case we need to clear the writeback
+ * bit ourselves right after unlocking the page.
+ */
folio_unlock(folio);
- return 0;
+ if (ifs) {
+ if (atomic_dec_and_test(&ifs->write_bytes_pending))
+ folio_end_writeback(folio);
+ } else {
+ if (!count)
+ folio_end_writeback(folio);
+ }
+ mapping_set_error(inode->i_mapping, error);
+ return error;
+}
+
+static int iomap_do_writepage(struct folio *folio,
+ struct writeback_control *wbc, void *data)
+{
+ return iomap_writepage_map(data, wbc, folio);
}
int
@@ -1988,18 +1971,24 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
{
int ret;
+ /*
+ * Writeback from reclaim context should never happen except in the case
+ * of a VM regression so warn about it and refuse to write the data.
+ */
+ if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC | PF_KSWAPD)) ==
+ PF_MEMALLOC))
+ return -EIO;
+
wpc->ops = ops;
ret = write_cache_pages(mapping, wbc, iomap_do_writepage, wpc);
- if (!wpc->ioend)
- return ret;
- return iomap_submit_ioend(wpc, wpc->ioend, ret);
+ return iomap_submit_ioend(wpc, ret);
}
EXPORT_SYMBOL_GPL(iomap_writepages);
static int __init iomap_init(void)
{
return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
- offsetof(struct iomap_ioend, io_inline_bio),
+ offsetof(struct iomap_ioend, io_bio),
BIOSET_NEED_BVECS);
}
fs_initcall(iomap_init);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index bcd3f8cf5ea4..f3b43d223a46 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -380,6 +380,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
GFP_KERNEL);
bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
+ bio->bi_write_hint = inode->i_write_hint;
bio->bi_ioprio = dio->iocb->ki_ioprio;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index c16fd55f5595..0a991c4ce87d 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -154,7 +154,48 @@ DEFINE_EVENT(iomap_class, name, \
TP_ARGS(inode, iomap))
DEFINE_IOMAP_EVENT(iomap_iter_dstmap);
DEFINE_IOMAP_EVENT(iomap_iter_srcmap);
-DEFINE_IOMAP_EVENT(iomap_writepage_map);
+
+TRACE_EVENT(iomap_writepage_map,
+ TP_PROTO(struct inode *inode, u64 pos, unsigned int dirty_len,
+ struct iomap *iomap),
+ TP_ARGS(inode, pos, dirty_len, iomap),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u64, ino)
+ __field(u64, pos)
+ __field(u64, dirty_len)
+ __field(u64, addr)
+ __field(loff_t, offset)
+ __field(u64, length)
+ __field(u16, type)
+ __field(u16, flags)
+ __field(dev_t, bdev)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = pos;
+ __entry->dirty_len = dirty_len;
+ __entry->addr = iomap->addr;
+ __entry->offset = iomap->offset;
+ __entry->length = iomap->length;
+ __entry->type = iomap->type;
+ __entry->flags = iomap->flags;
+ __entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx bdev %d:%d pos 0x%llx dirty len 0x%llx "
+ "addr 0x%llx offset 0x%llx length 0x%llx type %s flags %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ MAJOR(__entry->bdev), MINOR(__entry->bdev),
+ __entry->pos,
+ __entry->dirty_len,
+ __entry->addr,
+ __entry->offset,
+ __entry->length,
+ __print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
+ __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
+);
TRACE_EVENT(iomap_iter,
TP_PROTO(struct iomap_iter *iter, const void *ops,
@@ -165,6 +206,7 @@ TRACE_EVENT(iomap_iter,
__field(u64, ino)
__field(loff_t, pos)
__field(u64, length)
+ __field(s64, processed)
__field(unsigned int, flags)
__field(const void *, ops)
__field(unsigned long, caller)
@@ -174,15 +216,17 @@ TRACE_EVENT(iomap_iter,
__entry->ino = iter->inode->i_ino;
__entry->pos = iter->pos;
__entry->length = iomap_length(iter);
+ __entry->processed = iter->processed;
__entry->flags = iter->flags;
__entry->ops = ops;
__entry->caller = caller;
),
- TP_printk("dev %d:%d ino 0x%llx pos 0x%llx length 0x%llx flags %s (0x%x) ops %ps caller %pS",
+ TP_printk("dev %d:%d ino 0x%llx pos 0x%llx length 0x%llx processed %lld flags %s (0x%x) ops %ps caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->pos,
__entry->length,
+ __entry->processed,
__print_flags(__entry->flags, "|", IOMAP_FLAGS_STRINGS),
__entry->flags,
__entry->ops,
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 3e4d53e26f94..25fca44149dd 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -93,7 +93,7 @@ static int __init init_inodecache(void)
isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
sizeof(struct iso_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (!isofs_inode_cachep)
return -ENOMEM;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index f99591a634b4..aede1be4dc0c 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -387,7 +387,7 @@ static int __init init_jffs2_fs(void)
jffs2_inode_cachep = kmem_cache_create("jffs2_i",
sizeof(struct jffs2_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
jffs2_i_init_once);
if (!jffs2_inode_cachep) {
pr_err("error: Failed to initialise inode cache\n");
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 8eec84c651bf..cb3cda1390ad 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -2763,9 +2763,7 @@ static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl)
* leafno - the number of the leaf to be updated.
* newval - the new value for the leaf.
*
- * RETURN VALUES:
- * 0 - success
- * -EIO - i/o error
+ * RETURN VALUES: none
*/
static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl)
{
@@ -2792,10 +2790,6 @@ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl)
* get the buddy size (number of words covered) of
* the new value.
*/
-
- if ((newval - tp->dmt_budmin) > BUDMIN)
- return -EIO;
-
budsz = BUDSIZE(newval, tp->dmt_budmin);
/* try to join.
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index cb6d1fda66a7..73389c68e251 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1058,7 +1058,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync)
int lmLogOpen(struct super_block *sb)
{
int rc;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct jfs_log *log;
struct jfs_sb_info *sbi = JFS_SBI(sb);
@@ -1070,7 +1070,7 @@ int lmLogOpen(struct super_block *sb)
mutex_lock(&jfs_log_mutex);
list_for_each_entry(log, &jfs_external_logs, journal_list) {
- if (log->bdev_handle->bdev->bd_dev == sbi->logdev) {
+ if (file_bdev(log->bdev_file)->bd_dev == sbi->logdev) {
if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
jfs_warn("wrong uuid on JFS journal");
mutex_unlock(&jfs_log_mutex);
@@ -1100,14 +1100,14 @@ int lmLogOpen(struct super_block *sb)
* file systems to log may have n-to-1 relationship;
*/
- bdev_handle = bdev_open_by_dev(sbi->logdev,
+ bdev_file = bdev_file_open_by_dev(sbi->logdev,
BLK_OPEN_READ | BLK_OPEN_WRITE, log, NULL);
- if (IS_ERR(bdev_handle)) {
- rc = PTR_ERR(bdev_handle);
+ if (IS_ERR(bdev_file)) {
+ rc = PTR_ERR(bdev_file);
goto free;
}
- log->bdev_handle = bdev_handle;
+ log->bdev_file = bdev_file;
uuid_copy(&log->uuid, &sbi->loguuid);
/*
@@ -1141,7 +1141,7 @@ journal_found:
lbmLogShutdown(log);
close: /* close external log device */
- bdev_release(bdev_handle);
+ fput(bdev_file);
free: /* free log descriptor */
mutex_unlock(&jfs_log_mutex);
@@ -1162,7 +1162,7 @@ static int open_inline_log(struct super_block *sb)
init_waitqueue_head(&log->syncwait);
set_bit(log_INLINELOG, &log->flag);
- log->bdev_handle = sb->s_bdev_handle;
+ log->bdev_file = sb->s_bdev_file;
log->base = addressPXD(&JFS_SBI(sb)->logpxd);
log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
(L2LOGPSIZE - sb->s_blocksize_bits);
@@ -1436,7 +1436,7 @@ int lmLogClose(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct jfs_log *log = sbi->log;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
int rc = 0;
jfs_info("lmLogClose: log:0x%p", log);
@@ -1482,10 +1482,10 @@ int lmLogClose(struct super_block *sb)
* external log as separate logical volume
*/
list_del(&log->journal_list);
- bdev_handle = log->bdev_handle;
+ bdev_file = log->bdev_file;
rc = lmLogShutdown(log);
- bdev_release(bdev_handle);
+ fput(bdev_file);
kfree(log);
@@ -1972,7 +1972,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
bp->l_flag |= lbmREAD;
- bio = bio_alloc(log->bdev_handle->bdev, 1, REQ_OP_READ, GFP_NOFS);
+ bio = bio_alloc(file_bdev(log->bdev_file), 1, REQ_OP_READ, GFP_NOFS);
bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
__bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
@@ -2115,7 +2115,7 @@ static void lbmStartIO(struct lbuf * bp)
jfs_info("lbmStartIO");
if (!log->no_integrity)
- bdev = log->bdev_handle->bdev;
+ bdev = file_bdev(log->bdev_file);
bio = bio_alloc(bdev, 1, REQ_OP_WRITE | REQ_SYNC,
GFP_NOFS);
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 84aa2d253907..8b8994e48cd0 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -356,7 +356,7 @@ struct jfs_log {
* before writing syncpt.
*/
struct list_head journal_list; /* Global list */
- struct bdev_handle *bdev_handle; /* 4: log lv pointer */
+ struct file *bdev_file; /* 4: log lv pointer */
int serial; /* 4: log mount serial number */
s64 base; /* @8: log extent address (inline log ) */
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 9b5c6a20b30c..98f9a432c336 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -431,7 +431,7 @@ int updateSuper(struct super_block *sb, uint state)
if (state == FM_MOUNT) {
/* record log's dev_t and mount serial number */
j_sb->s_logdev = cpu_to_le32(
- new_encode_dev(sbi->log->bdev_handle->bdev->bd_dev));
+ new_encode_dev(file_bdev(sbi->log->bdev_file)->bd_dev));
j_sb->s_logserial = cpu_to_le32(sbi->log->serial);
} else if (state == FM_CLEAN) {
/*
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 8d8e556bd610..73f09a762b79 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -932,7 +932,7 @@ static int __init init_jfs_fs(void)
jfs_inode_cachep =
kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info),
- 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
+ 0, SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT,
offsetof(struct jfs_inode_info, i_inline_all),
sizeof_field(struct jfs_inode_info, i_inline_all),
init_once);
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 0c93cad0f0ac..e29f4edf9572 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -358,7 +358,9 @@ int kernfs_get_tree(struct fs_context *fc)
}
sb->s_flags |= SB_ACTIVE;
- uuid_gen(&sb->s_uuid);
+ uuid_t uuid;
+ uuid_gen(&uuid);
+ super_set_uuid(sb, uuid.b, sizeof(uuid));
down_write(&root->kernfs_supers_rwsem);
list_add(&info->node, &info->root->supers);
diff --git a/fs/libfs.c b/fs/libfs.c
index eec6031b0155..0d14ae808fcf 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -23,6 +23,7 @@
#include <linux/fsnotify.h>
#include <linux/unicode.h>
#include <linux/fscrypt.h>
+#include <linux/pidfs.h>
#include <linux/uaccess.h>
@@ -240,17 +241,22 @@ const struct inode_operations simple_dir_inode_operations = {
};
EXPORT_SYMBOL(simple_dir_inode_operations);
-static void offset_set(struct dentry *dentry, u32 offset)
+/* 0 is '.', 1 is '..', so always start with offset 2 or more */
+enum {
+ DIR_OFFSET_MIN = 2,
+};
+
+static void offset_set(struct dentry *dentry, long offset)
{
- dentry->d_fsdata = (void *)((uintptr_t)(offset));
+ dentry->d_fsdata = (void *)offset;
}
-static u32 dentry2offset(struct dentry *dentry)
+static long dentry2offset(struct dentry *dentry)
{
- return (u32)((uintptr_t)(dentry->d_fsdata));
+ return (long)dentry->d_fsdata;
}
-static struct lock_class_key simple_offset_xa_lock;
+static struct lock_class_key simple_offset_lock_class;
/**
* simple_offset_init - initialize an offset_ctx
@@ -259,11 +265,9 @@ static struct lock_class_key simple_offset_xa_lock;
*/
void simple_offset_init(struct offset_ctx *octx)
{
- xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
- lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock);
-
- /* 0 is '.', 1 is '..', so always start with offset 2 */
- octx->next_offset = 2;
+ mt_init_flags(&octx->mt, MT_FLAGS_ALLOC_RANGE);
+ lockdep_set_class(&octx->mt.ma_lock, &simple_offset_lock_class);
+ octx->next_offset = DIR_OFFSET_MIN;
}
/**
@@ -271,20 +275,19 @@ void simple_offset_init(struct offset_ctx *octx)
* @octx: directory offset ctx to be updated
* @dentry: new dentry being added
*
- * Returns zero on success. @so_ctx and the dentry offset are updated.
+ * Returns zero on success. @octx and the dentry's offset are updated.
* Otherwise, a negative errno value is returned.
*/
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
{
- static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
- u32 offset;
+ unsigned long offset;
int ret;
if (dentry2offset(dentry) != 0)
return -EBUSY;
- ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
- &octx->next_offset, GFP_KERNEL);
+ ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
+ LONG_MAX, &octx->next_offset, GFP_KERNEL);
if (ret < 0)
return ret;
@@ -300,17 +303,49 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
*/
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
{
- u32 offset;
+ long offset;
offset = dentry2offset(dentry);
if (offset == 0)
return;
- xa_erase(&octx->xa, offset);
+ mtree_erase(&octx->mt, offset);
offset_set(dentry, 0);
}
/**
+ * simple_offset_empty - Check if a dentry can be unlinked
+ * @dentry: dentry to be tested
+ *
+ * Returns 0 if @dentry is a non-empty directory; otherwise returns 1.
+ */
+int simple_offset_empty(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ struct offset_ctx *octx;
+ struct dentry *child;
+ unsigned long index;
+ int ret = 1;
+
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return ret;
+
+ index = DIR_OFFSET_MIN;
+ octx = inode->i_op->get_offset_ctx(inode);
+ mt_for_each(&octx->mt, child, index, LONG_MAX) {
+ spin_lock(&child->d_lock);
+ if (simple_positive(child)) {
+ spin_unlock(&child->d_lock);
+ ret = 0;
+ break;
+ }
+ spin_unlock(&child->d_lock);
+ }
+
+ return ret;
+}
+
+/**
* simple_offset_rename_exchange - exchange rename with directory offsets
* @old_dir: parent of dentry being moved
* @old_dentry: dentry being moved
@@ -327,8 +362,8 @@ int simple_offset_rename_exchange(struct inode *old_dir,
{
struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
- u32 old_index = dentry2offset(old_dentry);
- u32 new_index = dentry2offset(new_dentry);
+ long old_index = dentry2offset(old_dentry);
+ long new_index = dentry2offset(new_dentry);
int ret;
simple_offset_remove(old_ctx, old_dentry);
@@ -354,9 +389,9 @@ int simple_offset_rename_exchange(struct inode *old_dir,
out_restore:
offset_set(old_dentry, old_index);
- xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
+ mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL);
offset_set(new_dentry, new_index);
- xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
+ mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL);
return ret;
}
@@ -369,7 +404,7 @@ out_restore:
*/
void simple_offset_destroy(struct offset_ctx *octx)
{
- xa_destroy(&octx->xa);
+ mtree_destroy(&octx->mt);
}
/**
@@ -399,15 +434,16 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
/* In this case, ->private_data is protected by f_pos_lock */
file->private_data = NULL;
- return vfs_setpos(file, offset, U32_MAX);
+ return vfs_setpos(file, offset, LONG_MAX);
}
-static struct dentry *offset_find_next(struct xa_state *xas)
+static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
{
+ MA_STATE(mas, &octx->mt, offset, offset);
struct dentry *child, *found = NULL;
rcu_read_lock();
- child = xas_next_entry(xas, U32_MAX);
+ child = mas_find(&mas, LONG_MAX);
if (!child)
goto out;
spin_lock(&child->d_lock);
@@ -421,8 +457,8 @@ out:
static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
{
- u32 offset = dentry2offset(dentry);
struct inode *inode = d_inode(dentry);
+ long offset = dentry2offset(dentry);
return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
@@ -430,12 +466,11 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
{
- struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
- XA_STATE(xas, &so_ctx->xa, ctx->pos);
+ struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
struct dentry *dentry;
while (true) {
- dentry = offset_find_next(&xas);
+ dentry = offset_find_next(octx, ctx->pos);
if (!dentry)
return ERR_PTR(-ENOENT);
@@ -444,8 +479,8 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
break;
}
+ ctx->pos = dentry2offset(dentry) + 1;
dput(dentry);
- ctx->pos = xas.xa_index + 1;
}
return NULL;
}
@@ -481,7 +516,7 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
return 0;
/* In this case, ->private_data is protected by f_pos_lock */
- if (ctx->pos == 2)
+ if (ctx->pos == DIR_OFFSET_MIN)
file->private_data = NULL;
else if (file->private_data == ERR_PTR(-ENOENT))
return 0;
@@ -1580,7 +1615,7 @@ EXPORT_SYMBOL(alloc_anon_inode);
* All arguments are ignored and it just returns -EINVAL.
*/
int
-simple_nosetlease(struct file *filp, int arg, struct file_lock **flp,
+simple_nosetlease(struct file *filp, int arg, struct file_lease **flp,
void **priv)
{
return -EINVAL;
@@ -1704,16 +1739,28 @@ bool is_empty_dir_inode(struct inode *inode)
static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name)
{
- const struct dentry *parent = READ_ONCE(dentry->d_parent);
- const struct inode *dir = READ_ONCE(parent->d_inode);
- const struct super_block *sb = dentry->d_sb;
- const struct unicode_map *um = sb->s_encoding;
- struct qstr qstr = QSTR_INIT(str, len);
+ const struct dentry *parent;
+ const struct inode *dir;
char strbuf[DNAME_INLINE_LEN];
- int ret;
+ struct qstr qstr;
+
+ /*
+ * Attempt a case-sensitive match first. It is cheaper and
+ * should cover most lookups, including all the sane
+ * applications that expect a case-sensitive filesystem.
+ *
+ * This comparison is safe under RCU because the caller
+ * guarantees the consistency between str and len. See
+ * __d_lookup_rcu_op_compare() for details.
+ */
+ if (len == name->len && !memcmp(str, name->name, len))
+ return 0;
+ parent = READ_ONCE(dentry->d_parent);
+ dir = READ_ONCE(parent->d_inode);
if (!dir || !IS_CASEFOLDED(dir))
- goto fallback;
+ return 1;
+
/*
* If the dentry name is stored in-line, then it may be concurrently
* modified by a rename. If this happens, the VFS will eventually retry
@@ -1724,20 +1771,14 @@ static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
if (len <= DNAME_INLINE_LEN - 1) {
memcpy(strbuf, str, len);
strbuf[len] = 0;
- qstr.name = strbuf;
+ str = strbuf;
/* prevent compiler from optimizing out the temporary buffer */
barrier();
}
- ret = utf8_strncasecmp(um, name, &qstr);
- if (ret >= 0)
- return ret;
+ qstr.len = len;
+ qstr.name = str;
- if (sb_has_strict_encoding(sb))
- return -EINVAL;
-fallback:
- if (len != name->len)
- return 1;
- return !!memcmp(str, name->name, len);
+ return utf8_strncasecmp(dentry->d_sb->s_encoding, name, &qstr);
}
/**
@@ -1752,7 +1793,7 @@ static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
const struct inode *dir = READ_ONCE(dentry->d_inode);
struct super_block *sb = dentry->d_sb;
const struct unicode_map *um = sb->s_encoding;
- int ret = 0;
+ int ret;
if (!dir || !IS_CASEFOLDED(dir))
return 0;
@@ -1766,73 +1807,45 @@ static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
static const struct dentry_operations generic_ci_dentry_ops = {
.d_hash = generic_ci_d_hash,
.d_compare = generic_ci_d_compare,
-};
-#endif
-
#ifdef CONFIG_FS_ENCRYPTION
-static const struct dentry_operations generic_encrypted_dentry_ops = {
.d_revalidate = fscrypt_d_revalidate,
+#endif
};
#endif
-#if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE)
-static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
- .d_hash = generic_ci_d_hash,
- .d_compare = generic_ci_d_compare,
+#ifdef CONFIG_FS_ENCRYPTION
+static const struct dentry_operations generic_encrypted_dentry_ops = {
.d_revalidate = fscrypt_d_revalidate,
};
#endif
/**
- * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
- * @dentry: dentry to set ops on
- *
- * Casefolded directories need d_hash and d_compare set, so that the dentries
- * contained in them are handled case-insensitively. Note that these operations
- * are needed on the parent directory rather than on the dentries in it, and
- * while the casefolding flag can be toggled on and off on an empty directory,
- * dentry_operations can't be changed later. As a result, if the filesystem has
- * casefolding support enabled at all, we have to give all dentries the
- * casefolding operations even if their inode doesn't have the casefolding flag
- * currently (and thus the casefolding ops would be no-ops for now).
- *
- * Encryption works differently in that the only dentry operation it needs is
- * d_revalidate, which it only needs on dentries that have the no-key name flag.
- * The no-key flag can't be set "later", so we don't have to worry about that.
+ * generic_set_sb_d_ops - helper for choosing the set of
+ * filesystem-wide dentry operations for the enabled features
+ * @sb: superblock to be configured
*
- * Finally, to maximize compatibility with overlayfs (which isn't compatible
- * with certain dentry operations) and to avoid taking an unnecessary
- * performance hit, we use custom dentry_operations for each possible
- * combination rather than always installing all operations.
+ * Filesystems supporting casefolding and/or fscrypt can call this
+ * helper at mount-time to configure sb->s_d_op to best set of dentry
+ * operations required for the enabled features. The helper must be
+ * called after these have been configured, but before the root dentry
+ * is created.
*/
-void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
+void generic_set_sb_d_ops(struct super_block *sb)
{
-#ifdef CONFIG_FS_ENCRYPTION
- bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
-#endif
#if IS_ENABLED(CONFIG_UNICODE)
- bool needs_ci_ops = dentry->d_sb->s_encoding;
-#endif
-#if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE)
- if (needs_encrypt_ops && needs_ci_ops) {
- d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
+ if (sb->s_encoding) {
+ sb->s_d_op = &generic_ci_dentry_ops;
return;
}
#endif
#ifdef CONFIG_FS_ENCRYPTION
- if (needs_encrypt_ops) {
- d_set_d_op(dentry, &generic_encrypted_dentry_ops);
- return;
- }
-#endif
-#if IS_ENABLED(CONFIG_UNICODE)
- if (needs_ci_ops) {
- d_set_d_op(dentry, &generic_ci_dentry_ops);
+ if (sb->s_cop) {
+ sb->s_d_op = &generic_encrypted_dentry_ops;
return;
}
#endif
}
-EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops);
+EXPORT_SYMBOL(generic_set_sb_d_ops);
/**
* inode_maybe_inc_iversion - increments i_version
@@ -1973,3 +1986,144 @@ struct timespec64 simple_inode_init_ts(struct inode *inode)
return ts;
}
EXPORT_SYMBOL(simple_inode_init_ts);
+
+static inline struct dentry *get_stashed_dentry(struct dentry *stashed)
+{
+ struct dentry *dentry;
+
+ guard(rcu)();
+ dentry = READ_ONCE(stashed);
+ if (!dentry)
+ return NULL;
+ if (!lockref_get_not_dead(&dentry->d_lockref))
+ return NULL;
+ return dentry;
+}
+
+static struct dentry *prepare_anon_dentry(struct dentry **stashed,
+ unsigned long ino,
+ struct super_block *sb,
+ void *data)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ const struct stashed_operations *sops = sb->s_fs_info;
+
+ dentry = d_alloc_anon(sb);
+ if (!dentry)
+ return ERR_PTR(-ENOMEM);
+
+ inode = new_inode_pseudo(sb);
+ if (!inode) {
+ dput(dentry);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ inode->i_ino = ino;
+ inode->i_flags |= S_IMMUTABLE;
+ inode->i_mode = S_IFREG;
+ simple_inode_init_ts(inode);
+ sops->init_inode(inode, data);
+
+ /* Notice when this is changed. */
+ WARN_ON_ONCE(!S_ISREG(inode->i_mode));
+ WARN_ON_ONCE(!IS_IMMUTABLE(inode));
+
+ /* Store address of location where dentry's supposed to be stashed. */
+ dentry->d_fsdata = stashed;
+
+ /* @data is now owned by the fs */
+ d_instantiate(dentry, inode);
+ return dentry;
+}
+
+static struct dentry *stash_dentry(struct dentry **stashed,
+ struct dentry *dentry)
+{
+ guard(rcu)();
+ for (;;) {
+ struct dentry *old;
+
+ /* Assume any old dentry was cleared out. */
+ old = cmpxchg(stashed, NULL, dentry);
+ if (likely(!old))
+ return dentry;
+
+ /* Check if somebody else installed a reusable dentry. */
+ if (lockref_get_not_dead(&old->d_lockref))
+ return old;
+
+ /* There's an old dead dentry there, try to take it over. */
+ if (likely(try_cmpxchg(stashed, &old, dentry)))
+ return dentry;
+ }
+}
+
+/**
+ * path_from_stashed - create path from stashed or new dentry
+ * @stashed: where to retrieve or stash dentry
+ * @ino: inode number to use
+ * @mnt: mnt of the filesystems to use
+ * @data: data to store in inode->i_private
+ * @path: path to create
+ *
+ * The function tries to retrieve a stashed dentry from @stashed. If the dentry
+ * is still valid then it will be reused. If the dentry isn't able the function
+ * will allocate a new dentry and inode. It will then check again whether it
+ * can reuse an existing dentry in case one has been added in the meantime or
+ * update @stashed with the newly added dentry.
+ *
+ * Special-purpose helper for nsfs and pidfs.
+ *
+ * Return: On success zero and on failure a negative error is returned.
+ */
+int path_from_stashed(struct dentry **stashed, unsigned long ino,
+ struct vfsmount *mnt, void *data, struct path *path)
+{
+ struct dentry *dentry;
+ const struct stashed_operations *sops = mnt->mnt_sb->s_fs_info;
+
+ /* See if dentry can be reused. */
+ path->dentry = get_stashed_dentry(*stashed);
+ if (path->dentry) {
+ sops->put_data(data);
+ goto out_path;
+ }
+
+ /* Allocate a new dentry. */
+ dentry = prepare_anon_dentry(stashed, ino, mnt->mnt_sb, data);
+ if (IS_ERR(dentry)) {
+ sops->put_data(data);
+ return PTR_ERR(dentry);
+ }
+
+ /* Added a new dentry. @data is now owned by the filesystem. */
+ path->dentry = stash_dentry(stashed, dentry);
+ if (path->dentry != dentry)
+ dput(dentry);
+
+out_path:
+ WARN_ON_ONCE(path->dentry->d_fsdata != stashed);
+ WARN_ON_ONCE(d_inode(path->dentry)->i_private != data);
+ path->mnt = mntget(mnt);
+ return 0;
+}
+
+void stashed_dentry_prune(struct dentry *dentry)
+{
+ struct dentry **stashed = dentry->d_fsdata;
+ struct inode *inode = d_inode(dentry);
+
+ if (WARN_ON_ONCE(!stashed))
+ return;
+
+ if (!inode)
+ return;
+
+ /*
+ * Only replace our own @dentry as someone else might've
+ * already cleared out @dentry and stashed their own
+ * dentry in there.
+ */
+ cmpxchg(stashed, dentry, NULL);
+}
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 8161667c976f..527458db4525 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -243,7 +243,7 @@ static void encode_nlm4_holder(struct xdr_stream *xdr,
u64 l_offset, l_len;
__be32 *p;
- encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
+ encode_bool(xdr, lock->fl.c.flc_type == F_RDLCK);
encode_int32(xdr, lock->svid);
encode_netobj(xdr, lock->oh.data, lock->oh.len);
@@ -270,7 +270,7 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
goto out_overflow;
exclusive = be32_to_cpup(p++);
lock->svid = be32_to_cpup(p);
- fl->fl_pid = (pid_t)lock->svid;
+ fl->c.flc_pid = (pid_t)lock->svid;
error = decode_netobj(xdr, &lock->oh);
if (unlikely(error))
@@ -280,8 +280,8 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
if (unlikely(p == NULL))
goto out_overflow;
- fl->fl_flags = FL_POSIX;
- fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
+ fl->c.flc_flags = FL_POSIX;
+ fl->c.flc_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
p = xdr_decode_hyper(p, &l_offset);
xdr_decode_hyper(p, &l_len);
nlm4svc_set_file_lock_range(fl, l_offset, l_len);
@@ -357,7 +357,7 @@ static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
const struct nlm_lock *lock = &args->lock;
encode_cookie(xdr, &args->cookie);
- encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_bool(xdr, lock->fl.c.flc_type == F_WRLCK);
encode_nlm4_lock(xdr, lock);
}
@@ -380,7 +380,7 @@ static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
encode_cookie(xdr, &args->cookie);
encode_bool(xdr, args->block);
- encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_bool(xdr, lock->fl.c.flc_type == F_WRLCK);
encode_nlm4_lock(xdr, lock);
encode_bool(xdr, args->reclaim);
encode_int32(xdr, args->state);
@@ -403,7 +403,7 @@ static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
encode_cookie(xdr, &args->cookie);
encode_bool(xdr, args->block);
- encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_bool(xdr, lock->fl.c.flc_type == F_WRLCK);
encode_nlm4_lock(xdr, lock);
}
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 5d85715be763..a7e0519ec024 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -185,7 +185,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
continue;
if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
continue;
- if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)), fh) != 0)
+ if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->c.flc_file)), fh) != 0)
continue;
/* Alright, we found a lock. Set the return status
* and wake up the caller
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index fba6c7fa7474..cebcc283b7ce 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -133,7 +133,8 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
char *nodename = req->a_host->h_rpcclnt->cl_nodename;
nlmclnt_next_cookie(&argp->cookie);
- memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
+ memcpy(&lock->fh, NFS_FH(file_inode(fl->c.flc_file)),
+ sizeof(struct nfs_fh));
lock->caller = nodename;
lock->oh.data = req->a_owner;
lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
@@ -142,7 +143,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
lock->svid = fl->fl_u.nfs_fl.owner->pid;
lock->fl.fl_start = fl->fl_start;
lock->fl.fl_end = fl->fl_end;
- lock->fl.fl_type = fl->fl_type;
+ lock->fl.c.flc_type = fl->c.flc_type;
}
static void nlmclnt_release_lockargs(struct nlm_rqst *req)
@@ -182,7 +183,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *dat
call->a_callback_data = data;
if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
- if (fl->fl_type != F_UNLCK) {
+ if (fl->c.flc_type != F_UNLCK) {
call->a_args.block = IS_SETLKW(cmd) ? 1 : 0;
status = nlmclnt_lock(call, fl);
} else
@@ -432,13 +433,14 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
{
int status;
- status = nlmclnt_call(nfs_file_cred(fl->fl_file), req, NLMPROC_TEST);
+ status = nlmclnt_call(nfs_file_cred(fl->c.flc_file), req,
+ NLMPROC_TEST);
if (status < 0)
goto out;
switch (req->a_res.status) {
case nlm_granted:
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
break;
case nlm_lck_denied:
/*
@@ -446,8 +448,8 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
*/
fl->fl_start = req->a_res.lock.fl.fl_start;
fl->fl_end = req->a_res.lock.fl.fl_end;
- fl->fl_type = req->a_res.lock.fl.fl_type;
- fl->fl_pid = -req->a_res.lock.fl.fl_pid;
+ fl->c.flc_type = req->a_res.lock.fl.c.flc_type;
+ fl->c.flc_pid = -req->a_res.lock.fl.c.flc_pid;
break;
default:
status = nlm_stat_to_errno(req->a_res.status);
@@ -485,14 +487,15 @@ static const struct file_lock_operations nlmclnt_lock_ops = {
static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host)
{
fl->fl_u.nfs_fl.state = 0;
- fl->fl_u.nfs_fl.owner = nlmclnt_find_lockowner(host, fl->fl_owner);
+ fl->fl_u.nfs_fl.owner = nlmclnt_find_lockowner(host,
+ fl->c.flc_owner);
INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
fl->fl_ops = &nlmclnt_lock_ops;
}
static int do_vfs_lock(struct file_lock *fl)
{
- return locks_lock_file_wait(fl->fl_file, fl);
+ return locks_lock_file_wait(fl->c.flc_file, fl);
}
/*
@@ -518,12 +521,12 @@ static int do_vfs_lock(struct file_lock *fl)
static int
nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
{
- const struct cred *cred = nfs_file_cred(fl->fl_file);
+ const struct cred *cred = nfs_file_cred(fl->c.flc_file);
struct nlm_host *host = req->a_host;
struct nlm_res *resp = &req->a_res;
struct nlm_wait block;
- unsigned char fl_flags = fl->fl_flags;
- unsigned char fl_type;
+ unsigned char flags = fl->c.flc_flags;
+ unsigned char type;
__be32 b_status;
int status = -ENOLCK;
@@ -531,9 +534,9 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
goto out;
req->a_args.state = nsm_local_state;
- fl->fl_flags |= FL_ACCESS;
+ fl->c.flc_flags |= FL_ACCESS;
status = do_vfs_lock(fl);
- fl->fl_flags = fl_flags;
+ fl->c.flc_flags = flags;
if (status < 0)
goto out;
@@ -591,11 +594,11 @@ again:
goto again;
}
/* Ensure the resulting lock will get added to granted list */
- fl->fl_flags |= FL_SLEEP;
+ fl->c.flc_flags |= FL_SLEEP;
if (do_vfs_lock(fl) < 0)
printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
up_read(&host->h_rwsem);
- fl->fl_flags = fl_flags;
+ fl->c.flc_flags = flags;
status = 0;
}
if (status < 0)
@@ -605,7 +608,7 @@ again:
* cases NLM_LCK_DENIED is returned for a permanent error. So
* turn it into an ENOLCK.
*/
- if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
+ if (resp->status == nlm_lck_denied && (flags & FL_SLEEP))
status = -ENOLCK;
else
status = nlm_stat_to_errno(resp->status);
@@ -622,13 +625,13 @@ out_unlock:
req->a_host->h_addrlen, req->a_res.status);
dprintk("lockd: lock attempt ended in fatal error.\n"
" Attempting to unlock.\n");
- fl_type = fl->fl_type;
- fl->fl_type = F_UNLCK;
+ type = fl->c.flc_type;
+ fl->c.flc_type = F_UNLCK;
down_read(&host->h_rwsem);
do_vfs_lock(fl);
up_read(&host->h_rwsem);
- fl->fl_type = fl_type;
- fl->fl_flags = fl_flags;
+ fl->c.flc_type = type;
+ fl->c.flc_flags = flags;
nlmclnt_async_call(cred, req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
return status;
}
@@ -651,12 +654,14 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl,
nlmclnt_setlockargs(req, fl);
req->a_args.reclaim = 1;
- status = nlmclnt_call(nfs_file_cred(fl->fl_file), req, NLMPROC_LOCK);
+ status = nlmclnt_call(nfs_file_cred(fl->c.flc_file), req,
+ NLMPROC_LOCK);
if (status >= 0 && req->a_res.status == nlm_granted)
return 0;
printk(KERN_WARNING "lockd: failed to reclaim lock for pid %d "
- "(errno %d, status %d)\n", fl->fl_pid,
+ "(errno %d, status %d)\n",
+ fl->c.flc_pid,
status, ntohl(req->a_res.status));
/*
@@ -683,26 +688,26 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
struct nlm_host *host = req->a_host;
struct nlm_res *resp = &req->a_res;
int status;
- unsigned char fl_flags = fl->fl_flags;
+ unsigned char flags = fl->c.flc_flags;
/*
* Note: the server is supposed to either grant us the unlock
* request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
* case, we want to unlock.
*/
- fl->fl_flags |= FL_EXISTS;
+ fl->c.flc_flags |= FL_EXISTS;
down_read(&host->h_rwsem);
status = do_vfs_lock(fl);
up_read(&host->h_rwsem);
- fl->fl_flags = fl_flags;
+ fl->c.flc_flags = flags;
if (status == -ENOENT) {
status = 0;
goto out;
}
refcount_inc(&req->a_count);
- status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
- NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
+ status = nlmclnt_async_call(nfs_file_cred(fl->c.flc_file), req,
+ NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
if (status < 0)
goto out;
@@ -795,8 +800,8 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
req->a_args.block = block;
refcount_inc(&req->a_count);
- status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
- NLMPROC_CANCEL, &nlmclnt_cancel_ops);
+ status = nlmclnt_async_call(nfs_file_cred(fl->c.flc_file), req,
+ NLMPROC_CANCEL, &nlmclnt_cancel_ops);
if (status == 0 && req->a_res.status == nlm_lck_denied)
status = -ENOLCK;
nlmclnt_release_call(req);
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 4df62f635529..a3e97278b997 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -238,7 +238,7 @@ static void encode_nlm_holder(struct xdr_stream *xdr,
u32 l_offset, l_len;
__be32 *p;
- encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
+ encode_bool(xdr, lock->fl.c.flc_type == F_RDLCK);
encode_int32(xdr, lock->svid);
encode_netobj(xdr, lock->oh.data, lock->oh.len);
@@ -265,7 +265,7 @@ static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
goto out_overflow;
exclusive = be32_to_cpup(p++);
lock->svid = be32_to_cpup(p);
- fl->fl_pid = (pid_t)lock->svid;
+ fl->c.flc_pid = (pid_t)lock->svid;
error = decode_netobj(xdr, &lock->oh);
if (unlikely(error))
@@ -275,8 +275,8 @@ static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
if (unlikely(p == NULL))
goto out_overflow;
- fl->fl_flags = FL_POSIX;
- fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
+ fl->c.flc_flags = FL_POSIX;
+ fl->c.flc_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
l_offset = be32_to_cpup(p++);
l_len = be32_to_cpup(p);
end = l_offset + l_len - 1;
@@ -357,7 +357,7 @@ static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
const struct nlm_lock *lock = &args->lock;
encode_cookie(xdr, &args->cookie);
- encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_bool(xdr, lock->fl.c.flc_type == F_WRLCK);
encode_nlm_lock(xdr, lock);
}
@@ -380,7 +380,7 @@ static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
encode_cookie(xdr, &args->cookie);
encode_bool(xdr, args->block);
- encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_bool(xdr, lock->fl.c.flc_type == F_WRLCK);
encode_nlm_lock(xdr, lock);
encode_bool(xdr, args->reclaim);
encode_int32(xdr, args->state);
@@ -403,7 +403,7 @@ static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
encode_cookie(xdr, &args->cookie);
encode_bool(xdr, args->block);
- encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_bool(xdr, lock->fl.c.flc_type == F_WRLCK);
encode_nlm_lock(xdr, lock);
}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index ce5862482097..ab8042a5b895 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -710,8 +710,6 @@ static const struct svc_version *nlmsvc_version[] = {
#endif
};
-static struct svc_stat nlmsvc_stats;
-
#define NLM_NRVERS ARRAY_SIZE(nlmsvc_version)
static struct svc_program nlmsvc_program = {
.pg_prog = NLM_PROGRAM, /* program number */
@@ -719,7 +717,6 @@ static struct svc_program nlmsvc_program = {
.pg_vers = nlmsvc_version, /* version table */
.pg_name = "lockd", /* service name */
.pg_class = "nfsd", /* share authentication with nfsd */
- .pg_stats = &nlmsvc_stats, /* stats table */
.pg_authenticate = &lockd_authenticate, /* export authentication */
.pg_init_request = svc_generic_init_request,
.pg_rpcbind_set = svc_generic_rpcbind_set,
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b72023a6b4c1..8a72c418cdcc 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -52,16 +52,16 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
*filp = file;
/* Set up the missing parts of the file_lock structure */
- lock->fl.fl_flags = FL_POSIX;
- lock->fl.fl_file = file->f_file[mode];
- lock->fl.fl_pid = current->tgid;
+ lock->fl.c.flc_flags = FL_POSIX;
+ lock->fl.c.flc_file = file->f_file[mode];
+ lock->fl.c.flc_pid = current->tgid;
lock->fl.fl_start = (loff_t)lock->lock_start;
lock->fl.fl_end = lock->lock_len ?
(loff_t)(lock->lock_start + lock->lock_len - 1) :
OFFSET_MAX;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
- if (!lock->fl.fl_owner) {
+ if (!lock->fl.c.flc_owner) {
/* lockowner allocation has failed */
nlmsvc_release_host(host);
return nlm_lck_denied_nolocks;
@@ -106,7 +106,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
- test_owner = argp->lock.fl.fl_owner;
+ test_owner = argp->lock.fl.c.flc_owner;
/* Now check for conflicting locks */
resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie);
if (resp->status == nlm_drop_reply)
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 2dc10900ad1c..1f2149db10f2 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -150,16 +150,17 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
struct file_lock *fl;
dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n",
- file, lock->fl.fl_pid,
+ file, lock->fl.c.flc_pid,
(long long)lock->fl.fl_start,
- (long long)lock->fl.fl_end, lock->fl.fl_type);
+ (long long)lock->fl.fl_end,
+ lock->fl.c.flc_type);
spin_lock(&nlm_blocked_lock);
list_for_each_entry(block, &nlm_blocked, b_list) {
fl = &block->b_call->a_args.lock.fl;
dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
- block->b_file, fl->fl_pid,
+ block->b_file, fl->c.flc_pid,
(long long)fl->fl_start,
- (long long)fl->fl_end, fl->fl_type,
+ (long long)fl->fl_end, fl->c.flc_type,
nlmdbg_cookie2a(&block->b_call->a_args.cookie));
if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
kref_get(&block->b_count);
@@ -244,7 +245,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
goto failed_free;
/* Set notifier function for VFS, and init args */
- call->a_args.lock.fl.fl_flags |= FL_SLEEP;
+ call->a_args.lock.fl.c.flc_flags |= FL_SLEEP;
call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
nlmclnt_next_cookie(&call->a_args.cookie);
@@ -402,14 +403,14 @@ static struct nlm_lockowner *nlmsvc_find_lockowner(struct nlm_host *host, pid_t
void
nlmsvc_release_lockowner(struct nlm_lock *lock)
{
- if (lock->fl.fl_owner)
- nlmsvc_put_lockowner(lock->fl.fl_owner);
+ if (lock->fl.c.flc_owner)
+ nlmsvc_put_lockowner(lock->fl.c.flc_owner);
}
void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host,
pid_t pid)
{
- fl->fl_owner = nlmsvc_find_lockowner(host, pid);
+ fl->c.flc_owner = nlmsvc_find_lockowner(host, pid);
}
/*
@@ -425,7 +426,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
/* set default data area */
call->a_args.lock.oh.data = call->a_owner;
- call->a_args.lock.svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
+ call->a_args.lock.svid = ((struct nlm_lockowner *) lock->fl.c.flc_owner)->pid;
if (lock->oh.len > NLMCLNT_OHSIZE) {
void *data = kmalloc(lock->oh.len, GFP_KERNEL);
@@ -489,7 +490,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
inode->i_sb->s_id, inode->i_ino,
- lock->fl.fl_type, lock->fl.fl_pid,
+ lock->fl.c.flc_type,
+ lock->fl.c.flc_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end,
wait);
@@ -512,7 +514,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
lock = &block->b_call->a_args.lock;
} else
- lock->fl.fl_flags &= ~FL_SLEEP;
+ lock->fl.c.flc_flags &= ~FL_SLEEP;
if (block->b_flags & B_QUEUED) {
dprintk("lockd: nlmsvc_lock deferred block %p flags %d\n",
@@ -560,10 +562,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
spin_unlock(&nlm_blocked_lock);
if (!wait)
- lock->fl.fl_flags &= ~FL_SLEEP;
+ lock->fl.c.flc_flags &= ~FL_SLEEP;
mode = lock_to_openmode(&lock->fl);
error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
- lock->fl.fl_flags &= ~FL_SLEEP;
+ lock->fl.c.flc_flags &= ~FL_SLEEP;
dprintk("lockd: vfs_lock_file returned %d\n", error);
switch (error) {
@@ -616,7 +618,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
nlmsvc_file_inode(file)->i_sb->s_id,
nlmsvc_file_inode(file)->i_ino,
- lock->fl.fl_type,
+ lock->fl.c.flc_type,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -636,19 +638,19 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
}
- if (lock->fl.fl_type == F_UNLCK) {
+ if (lock->fl.c.flc_type == F_UNLCK) {
ret = nlm_granted;
goto out;
}
dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
- lock->fl.fl_type, (long long)lock->fl.fl_start,
+ lock->fl.c.flc_type, (long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
conflock->caller = "somehost"; /* FIXME */
conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
- conflock->svid = lock->fl.fl_pid;
- conflock->fl.fl_type = lock->fl.fl_type;
+ conflock->svid = lock->fl.c.flc_pid;
+ conflock->fl.c.flc_type = lock->fl.c.flc_type;
conflock->fl.fl_start = lock->fl.fl_start;
conflock->fl.fl_end = lock->fl.fl_end;
locks_release_private(&lock->fl);
@@ -673,21 +675,21 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
nlmsvc_file_inode(file)->i_sb->s_id,
nlmsvc_file_inode(file)->i_ino,
- lock->fl.fl_pid,
+ lock->fl.c.flc_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
/* First, cancel any lock that might be there */
nlmsvc_cancel_blocked(net, file, lock);
- lock->fl.fl_type = F_UNLCK;
- lock->fl.fl_file = file->f_file[O_RDONLY];
- if (lock->fl.fl_file)
- error = vfs_lock_file(lock->fl.fl_file, F_SETLK,
+ lock->fl.c.flc_type = F_UNLCK;
+ lock->fl.c.flc_file = file->f_file[O_RDONLY];
+ if (lock->fl.c.flc_file)
+ error = vfs_lock_file(lock->fl.c.flc_file, F_SETLK,
&lock->fl, NULL);
- lock->fl.fl_file = file->f_file[O_WRONLY];
- if (lock->fl.fl_file)
- error |= vfs_lock_file(lock->fl.fl_file, F_SETLK,
+ lock->fl.c.flc_file = file->f_file[O_WRONLY];
+ if (lock->fl.c.flc_file)
+ error |= vfs_lock_file(lock->fl.c.flc_file, F_SETLK,
&lock->fl, NULL);
return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
@@ -710,7 +712,7 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
nlmsvc_file_inode(file)->i_sb->s_id,
nlmsvc_file_inode(file)->i_ino,
- lock->fl.fl_pid,
+ lock->fl.c.flc_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -863,12 +865,12 @@ nlmsvc_grant_blocked(struct nlm_block *block)
/* vfs_lock_file() can mangle fl_start and fl_end, but we need
* them unchanged for the GRANT_MSG
*/
- lock->fl.fl_flags |= FL_SLEEP;
+ lock->fl.c.flc_flags |= FL_SLEEP;
fl_start = lock->fl.fl_start;
fl_end = lock->fl.fl_end;
mode = lock_to_openmode(&lock->fl);
error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
- lock->fl.fl_flags &= ~FL_SLEEP;
+ lock->fl.c.flc_flags &= ~FL_SLEEP;
lock->fl.fl_start = fl_start;
lock->fl.fl_end = fl_end;
@@ -993,8 +995,8 @@ nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status)
/* Client doesn't want it, just unlock it */
nlmsvc_unlink_block(block);
fl = &block->b_call->a_args.lock.fl;
- fl->fl_type = F_UNLCK;
- error = vfs_lock_file(fl->fl_file, F_SETLK, fl, NULL);
+ fl->c.flc_type = F_UNLCK;
+ error = vfs_lock_file(fl->c.flc_file, F_SETLK, fl, NULL);
if (error)
pr_warn("lockd: unable to unlock lock rejected by client!\n");
break;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 32784f508c81..a03220e66ce0 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -77,12 +77,12 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Set up the missing parts of the file_lock structure */
mode = lock_to_openmode(&lock->fl);
- lock->fl.fl_flags = FL_POSIX;
- lock->fl.fl_file = file->f_file[mode];
- lock->fl.fl_pid = current->tgid;
+ lock->fl.c.flc_flags = FL_POSIX;
+ lock->fl.c.flc_file = file->f_file[mode];
+ lock->fl.c.flc_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
- if (!lock->fl.fl_owner) {
+ if (!lock->fl.c.flc_owner) {
/* lockowner allocation has failed */
nlmsvc_release_host(host);
return nlm_lck_denied_nolocks;
@@ -127,7 +127,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
- test_owner = argp->lock.fl.fl_owner;
+ test_owner = argp->lock.fl.c.flc_owner;
/* Now check for conflicting locks */
resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie));
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index e3b6229e7ae5..9103896164f6 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -73,7 +73,7 @@ static inline unsigned int file_hash(struct nfs_fh *f)
int lock_to_openmode(struct file_lock *lock)
{
- return (lock->fl_type == F_WRLCK) ? O_WRONLY : O_RDONLY;
+ return lock_is_write(lock) ? O_WRONLY : O_RDONLY;
}
/*
@@ -181,18 +181,18 @@ static int nlm_unlock_files(struct nlm_file *file, const struct file_lock *fl)
struct file_lock lock;
locks_init_lock(&lock);
- lock.fl_type = F_UNLCK;
+ lock.c.flc_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
- lock.fl_owner = fl->fl_owner;
- lock.fl_pid = fl->fl_pid;
- lock.fl_flags = FL_POSIX;
+ lock.c.flc_owner = fl->c.flc_owner;
+ lock.c.flc_pid = fl->c.flc_pid;
+ lock.c.flc_flags = FL_POSIX;
- lock.fl_file = file->f_file[O_RDONLY];
- if (lock.fl_file && vfs_lock_file(lock.fl_file, F_SETLK, &lock, NULL))
+ lock.c.flc_file = file->f_file[O_RDONLY];
+ if (lock.c.flc_file && vfs_lock_file(lock.c.flc_file, F_SETLK, &lock, NULL))
goto out_err;
- lock.fl_file = file->f_file[O_WRONLY];
- if (lock.fl_file && vfs_lock_file(lock.fl_file, F_SETLK, &lock, NULL))
+ lock.c.flc_file = file->f_file[O_WRONLY];
+ if (lock.c.flc_file && vfs_lock_file(lock.c.flc_file, F_SETLK, &lock, NULL))
goto out_err;
return 0;
out_err:
@@ -218,14 +218,14 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
again:
file->f_locks = 0;
spin_lock(&flctx->flc_lock);
- list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
+ for_each_file_lock(fl, &flctx->flc_posix) {
if (fl->fl_lmops != &nlmsvc_lock_operations)
continue;
/* update current lock count */
file->f_locks++;
- lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host;
+ lockhost = ((struct nlm_lockowner *) fl->c.flc_owner)->host;
if (match(lockhost, host)) {
spin_unlock(&flctx->flc_lock);
@@ -272,7 +272,7 @@ nlm_file_inuse(struct nlm_file *file)
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
spin_lock(&flctx->flc_lock);
- list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
+ for_each_file_lock(fl, &flctx->flc_posix) {
if (fl->fl_lmops == &nlmsvc_lock_operations) {
spin_unlock(&flctx->flc_lock);
return 1;
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 2fb5748dae0c..adfcce2bf11b 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -88,8 +88,8 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock)
return false;
locks_init_lock(fl);
- fl->fl_flags = FL_POSIX;
- fl->fl_type = F_RDLCK;
+ fl->c.flc_flags = FL_POSIX;
+ fl->c.flc_type = F_RDLCK;
end = start + len - 1;
fl->fl_start = s32_to_loff_t(start);
if (len == 0 || end < 0)
@@ -107,7 +107,7 @@ svcxdr_encode_holder(struct xdr_stream *xdr, const struct nlm_lock *lock)
s32 start, len;
/* exclusive */
- if (xdr_stream_encode_bool(xdr, fl->fl_type != F_RDLCK) < 0)
+ if (xdr_stream_encode_bool(xdr, fl->c.flc_type != F_RDLCK) < 0)
return false;
if (xdr_stream_encode_u32(xdr, lock->svid) < 0)
return false;
@@ -164,7 +164,7 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
if (!svcxdr_decode_lock(xdr, &argp->lock))
return false;
if (exclusive)
- argp->lock.fl.fl_type = F_WRLCK;
+ argp->lock.fl.c.flc_type = F_WRLCK;
return true;
}
@@ -184,7 +184,7 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
if (!svcxdr_decode_lock(xdr, &argp->lock))
return false;
if (exclusive)
- argp->lock.fl.fl_type = F_WRLCK;
+ argp->lock.fl.c.flc_type = F_WRLCK;
if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0)
return false;
if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
@@ -209,7 +209,7 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
if (!svcxdr_decode_lock(xdr, &argp->lock))
return false;
if (exclusive)
- argp->lock.fl.fl_type = F_WRLCK;
+ argp->lock.fl.c.flc_type = F_WRLCK;
return true;
}
@@ -223,7 +223,7 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (!svcxdr_decode_lock(xdr, &argp->lock))
return false;
- argp->lock.fl.fl_type = F_UNLCK;
+ argp->lock.fl.c.flc_type = F_UNLCK;
return true;
}
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 5fcbf30cd275..3d28b9c3ed15 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -89,8 +89,8 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock)
return false;
locks_init_lock(fl);
- fl->fl_flags = FL_POSIX;
- fl->fl_type = F_RDLCK;
+ fl->c.flc_flags = FL_POSIX;
+ fl->c.flc_type = F_RDLCK;
nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len);
return true;
}
@@ -102,7 +102,7 @@ svcxdr_encode_holder(struct xdr_stream *xdr, const struct nlm_lock *lock)
s64 start, len;
/* exclusive */
- if (xdr_stream_encode_bool(xdr, fl->fl_type != F_RDLCK) < 0)
+ if (xdr_stream_encode_bool(xdr, fl->c.flc_type != F_RDLCK) < 0)
return false;
if (xdr_stream_encode_u32(xdr, lock->svid) < 0)
return false;
@@ -159,7 +159,7 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
if (!svcxdr_decode_lock(xdr, &argp->lock))
return false;
if (exclusive)
- argp->lock.fl.fl_type = F_WRLCK;
+ argp->lock.fl.c.flc_type = F_WRLCK;
return true;
}
@@ -179,7 +179,7 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
if (!svcxdr_decode_lock(xdr, &argp->lock))
return false;
if (exclusive)
- argp->lock.fl.fl_type = F_WRLCK;
+ argp->lock.fl.c.flc_type = F_WRLCK;
if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0)
return false;
if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
@@ -204,7 +204,7 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
if (!svcxdr_decode_lock(xdr, &argp->lock))
return false;
if (exclusive)
- argp->lock.fl.fl_type = F_WRLCK;
+ argp->lock.fl.c.flc_type = F_WRLCK;
return true;
}
@@ -218,7 +218,7 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (!svcxdr_decode_lock(xdr, &argp->lock))
return false;
- argp->lock.fl.fl_type = F_UNLCK;
+ argp->lock.fl.c.flc_type = F_UNLCK;
return true;
}
diff --git a/fs/locks.c b/fs/locks.c
index cc7c117ee192..90c8746874de 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -48,7 +48,6 @@
* children.
*
*/
-
#include <linux/capability.h>
#include <linux/file.h>
#include <linux/fdtable.h>
@@ -70,24 +69,28 @@
#include <linux/uaccess.h>
-#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
-#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
-#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
-#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
-#define IS_REMOTELCK(fl) (fl->fl_pid <= 0)
+static struct file_lock *file_lock(struct file_lock_core *flc)
+{
+ return container_of(flc, struct file_lock, c);
+}
+
+static struct file_lease *file_lease(struct file_lock_core *flc)
+{
+ return container_of(flc, struct file_lease, c);
+}
-static bool lease_breaking(struct file_lock *fl)
+static bool lease_breaking(struct file_lease *fl)
{
- return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
+ return fl->c.flc_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
}
-static int target_leasetype(struct file_lock *fl)
+static int target_leasetype(struct file_lease *fl)
{
- if (fl->fl_flags & FL_UNLOCK_PENDING)
+ if (fl->c.flc_flags & FL_UNLOCK_PENDING)
return F_UNLCK;
- if (fl->fl_flags & FL_DOWNGRADE_PENDING)
+ if (fl->c.flc_flags & FL_DOWNGRADE_PENDING)
return F_RDLCK;
- return fl->fl_type;
+ return fl->c.flc_type;
}
static int leases_enable = 1;
@@ -168,6 +171,7 @@ static DEFINE_SPINLOCK(blocked_lock_lock);
static struct kmem_cache *flctx_cache __ro_after_init;
static struct kmem_cache *filelock_cache __ro_after_init;
+static struct kmem_cache *filelease_cache __ro_after_init;
static struct file_lock_context *
locks_get_lock_context(struct inode *inode, int type)
@@ -204,11 +208,12 @@ out:
static void
locks_dump_ctx_list(struct list_head *list, char *list_type)
{
- struct file_lock *fl;
+ struct file_lock_core *flc;
- list_for_each_entry(fl, list, fl_list) {
- pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
- }
+ list_for_each_entry(flc, list, flc_list)
+ pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
+ list_type, flc->flc_owner, flc->flc_flags,
+ flc->flc_type, flc->flc_pid);
}
static void
@@ -229,19 +234,19 @@ locks_check_ctx_lists(struct inode *inode)
}
static void
-locks_check_ctx_file_list(struct file *filp, struct list_head *list,
- char *list_type)
+locks_check_ctx_file_list(struct file *filp, struct list_head *list, char *list_type)
{
- struct file_lock *fl;
+ struct file_lock_core *flc;
struct inode *inode = file_inode(filp);
- list_for_each_entry(fl, list, fl_list)
- if (fl->fl_file == filp)
+ list_for_each_entry(flc, list, flc_list)
+ if (flc->flc_file == filp)
pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx "
" fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
list_type, MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev), inode->i_ino,
- fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
+ flc->flc_owner, flc->flc_flags,
+ flc->flc_type, flc->flc_pid);
}
void
@@ -255,13 +260,13 @@ locks_free_lock_context(struct inode *inode)
}
}
-static void locks_init_lock_heads(struct file_lock *fl)
+static void locks_init_lock_heads(struct file_lock_core *flc)
{
- INIT_HLIST_NODE(&fl->fl_link);
- INIT_LIST_HEAD(&fl->fl_list);
- INIT_LIST_HEAD(&fl->fl_blocked_requests);
- INIT_LIST_HEAD(&fl->fl_blocked_member);
- init_waitqueue_head(&fl->fl_wait);
+ INIT_HLIST_NODE(&flc->flc_link);
+ INIT_LIST_HEAD(&flc->flc_list);
+ INIT_LIST_HEAD(&flc->flc_blocked_requests);
+ INIT_LIST_HEAD(&flc->flc_blocked_member);
+ init_waitqueue_head(&flc->flc_wait);
}
/* Allocate an empty lock structure. */
@@ -270,19 +275,33 @@ struct file_lock *locks_alloc_lock(void)
struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
if (fl)
- locks_init_lock_heads(fl);
+ locks_init_lock_heads(&fl->c);
return fl;
}
EXPORT_SYMBOL_GPL(locks_alloc_lock);
+/* Allocate an empty lock structure. */
+struct file_lease *locks_alloc_lease(void)
+{
+ struct file_lease *fl = kmem_cache_zalloc(filelease_cache, GFP_KERNEL);
+
+ if (fl)
+ locks_init_lock_heads(&fl->c);
+
+ return fl;
+}
+EXPORT_SYMBOL_GPL(locks_alloc_lease);
+
void locks_release_private(struct file_lock *fl)
{
- BUG_ON(waitqueue_active(&fl->fl_wait));
- BUG_ON(!list_empty(&fl->fl_list));
- BUG_ON(!list_empty(&fl->fl_blocked_requests));
- BUG_ON(!list_empty(&fl->fl_blocked_member));
- BUG_ON(!hlist_unhashed(&fl->fl_link));
+ struct file_lock_core *flc = &fl->c;
+
+ BUG_ON(waitqueue_active(&flc->flc_wait));
+ BUG_ON(!list_empty(&flc->flc_list));
+ BUG_ON(!list_empty(&flc->flc_blocked_requests));
+ BUG_ON(!list_empty(&flc->flc_blocked_member));
+ BUG_ON(!hlist_unhashed(&flc->flc_link));
if (fl->fl_ops) {
if (fl->fl_ops->fl_release_private)
@@ -292,8 +311,8 @@ void locks_release_private(struct file_lock *fl)
if (fl->fl_lmops) {
if (fl->fl_lmops->lm_put_owner) {
- fl->fl_lmops->lm_put_owner(fl->fl_owner);
- fl->fl_owner = NULL;
+ fl->fl_lmops->lm_put_owner(flc->flc_owner);
+ flc->flc_owner = NULL;
}
fl->fl_lmops = NULL;
}
@@ -309,16 +328,15 @@ EXPORT_SYMBOL_GPL(locks_release_private);
* %true: @owner has at least one blocker
* %false: @owner has no blockers
*/
-bool locks_owner_has_blockers(struct file_lock_context *flctx,
- fl_owner_t owner)
+bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner)
{
- struct file_lock *fl;
+ struct file_lock_core *flc;
spin_lock(&flctx->flc_lock);
- list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
- if (fl->fl_owner != owner)
+ list_for_each_entry(flc, &flctx->flc_posix, flc_list) {
+ if (flc->flc_owner != owner)
continue;
- if (!list_empty(&fl->fl_blocked_requests)) {
+ if (!list_empty(&flc->flc_blocked_requests)) {
spin_unlock(&flctx->flc_lock);
return true;
}
@@ -336,35 +354,52 @@ void locks_free_lock(struct file_lock *fl)
}
EXPORT_SYMBOL(locks_free_lock);
+/* Free a lease which is not in use. */
+void locks_free_lease(struct file_lease *fl)
+{
+ kmem_cache_free(filelease_cache, fl);
+}
+EXPORT_SYMBOL(locks_free_lease);
+
static void
locks_dispose_list(struct list_head *dispose)
{
- struct file_lock *fl;
+ struct file_lock_core *flc;
while (!list_empty(dispose)) {
- fl = list_first_entry(dispose, struct file_lock, fl_list);
- list_del_init(&fl->fl_list);
- locks_free_lock(fl);
+ flc = list_first_entry(dispose, struct file_lock_core, flc_list);
+ list_del_init(&flc->flc_list);
+ if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
+ locks_free_lease(file_lease(flc));
+ else
+ locks_free_lock(file_lock(flc));
}
}
void locks_init_lock(struct file_lock *fl)
{
memset(fl, 0, sizeof(struct file_lock));
- locks_init_lock_heads(fl);
+ locks_init_lock_heads(&fl->c);
}
EXPORT_SYMBOL(locks_init_lock);
+void locks_init_lease(struct file_lease *fl)
+{
+ memset(fl, 0, sizeof(*fl));
+ locks_init_lock_heads(&fl->c);
+}
+EXPORT_SYMBOL(locks_init_lease);
+
/*
* Initialize a new lock from an existing file_lock structure.
*/
void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
{
- new->fl_owner = fl->fl_owner;
- new->fl_pid = fl->fl_pid;
- new->fl_file = NULL;
- new->fl_flags = fl->fl_flags;
- new->fl_type = fl->fl_type;
+ new->c.flc_owner = fl->c.flc_owner;
+ new->c.flc_pid = fl->c.flc_pid;
+ new->c.flc_file = NULL;
+ new->c.flc_flags = fl->c.flc_flags;
+ new->c.flc_type = fl->c.flc_type;
new->fl_start = fl->fl_start;
new->fl_end = fl->fl_end;
new->fl_lmops = fl->fl_lmops;
@@ -372,7 +407,7 @@ void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
if (fl->fl_lmops) {
if (fl->fl_lmops->lm_get_owner)
- fl->fl_lmops->lm_get_owner(fl->fl_owner);
+ fl->fl_lmops->lm_get_owner(fl->c.flc_owner);
}
}
EXPORT_SYMBOL(locks_copy_conflock);
@@ -384,7 +419,7 @@ void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
locks_copy_conflock(new, fl);
- new->fl_file = fl->fl_file;
+ new->c.flc_file = fl->c.flc_file;
new->fl_ops = fl->fl_ops;
if (fl->fl_ops) {
@@ -400,15 +435,17 @@ static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
/*
* As ctx->flc_lock is held, new requests cannot be added to
- * ->fl_blocked_requests, so we don't need a lock to check if it
+ * ->flc_blocked_requests, so we don't need a lock to check if it
* is empty.
*/
- if (list_empty(&fl->fl_blocked_requests))
+ if (list_empty(&fl->c.flc_blocked_requests))
return;
spin_lock(&blocked_lock_lock);
- list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests);
- list_for_each_entry(f, &new->fl_blocked_requests, fl_blocked_member)
- f->fl_blocker = new;
+ list_splice_init(&fl->c.flc_blocked_requests,
+ &new->c.flc_blocked_requests);
+ list_for_each_entry(f, &new->c.flc_blocked_requests,
+ c.flc_blocked_member)
+ f->c.flc_blocker = &new->c;
spin_unlock(&blocked_lock_lock);
}
@@ -429,21 +466,21 @@ static void flock_make_lock(struct file *filp, struct file_lock *fl, int type)
{
locks_init_lock(fl);
- fl->fl_file = filp;
- fl->fl_owner = filp;
- fl->fl_pid = current->tgid;
- fl->fl_flags = FL_FLOCK;
- fl->fl_type = type;
+ fl->c.flc_file = filp;
+ fl->c.flc_owner = filp;
+ fl->c.flc_pid = current->tgid;
+ fl->c.flc_flags = FL_FLOCK;
+ fl->c.flc_type = type;
fl->fl_end = OFFSET_MAX;
}
-static int assign_type(struct file_lock *fl, int type)
+static int assign_type(struct file_lock_core *flc, int type)
{
switch (type) {
case F_RDLCK:
case F_WRLCK:
case F_UNLCK:
- fl->fl_type = type;
+ flc->flc_type = type;
break;
default:
return -EINVAL;
@@ -488,14 +525,14 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
} else
fl->fl_end = OFFSET_MAX;
- fl->fl_owner = current->files;
- fl->fl_pid = current->tgid;
- fl->fl_file = filp;
- fl->fl_flags = FL_POSIX;
+ fl->c.flc_owner = current->files;
+ fl->c.flc_pid = current->tgid;
+ fl->c.flc_file = filp;
+ fl->c.flc_flags = FL_POSIX;
fl->fl_ops = NULL;
fl->fl_lmops = NULL;
- return assign_type(fl, l->l_type);
+ return assign_type(&fl->c, l->l_type);
}
/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
@@ -516,16 +553,16 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
/* default lease lock manager operations */
static bool
-lease_break_callback(struct file_lock *fl)
+lease_break_callback(struct file_lease *fl)
{
kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
return false;
}
static void
-lease_setup(struct file_lock *fl, void **priv)
+lease_setup(struct file_lease *fl, void **priv)
{
- struct file *filp = fl->fl_file;
+ struct file *filp = fl->c.flc_file;
struct fasync_struct *fa = *priv;
/*
@@ -539,7 +576,7 @@ lease_setup(struct file_lock *fl, void **priv)
__f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);
}
-static const struct lock_manager_operations lease_manager_ops = {
+static const struct lease_manager_operations lease_manager_ops = {
.lm_break = lease_break_callback,
.lm_change = lease_modify,
.lm_setup = lease_setup,
@@ -548,27 +585,24 @@ static const struct lock_manager_operations lease_manager_ops = {
/*
* Initialize a lease, use the default lock manager operations
*/
-static int lease_init(struct file *filp, int type, struct file_lock *fl)
+static int lease_init(struct file *filp, int type, struct file_lease *fl)
{
- if (assign_type(fl, type) != 0)
+ if (assign_type(&fl->c, type) != 0)
return -EINVAL;
- fl->fl_owner = filp;
- fl->fl_pid = current->tgid;
+ fl->c.flc_owner = filp;
+ fl->c.flc_pid = current->tgid;
- fl->fl_file = filp;
- fl->fl_flags = FL_LEASE;
- fl->fl_start = 0;
- fl->fl_end = OFFSET_MAX;
- fl->fl_ops = NULL;
+ fl->c.flc_file = filp;
+ fl->c.flc_flags = FL_LEASE;
fl->fl_lmops = &lease_manager_ops;
return 0;
}
/* Allocate a file_lock initialised to this type of lease */
-static struct file_lock *lease_alloc(struct file *filp, int type)
+static struct file_lease *lease_alloc(struct file *filp, int type)
{
- struct file_lock *fl = locks_alloc_lock();
+ struct file_lease *fl = locks_alloc_lease();
int error = -ENOMEM;
if (fl == NULL)
@@ -576,7 +610,7 @@ static struct file_lock *lease_alloc(struct file *filp, int type)
error = lease_init(filp, type, fl);
if (error) {
- locks_free_lock(fl);
+ locks_free_lease(fl);
return ERR_PTR(error);
}
return fl;
@@ -593,26 +627,26 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
/*
* Check whether two locks have the same owner.
*/
-static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
+static int posix_same_owner(struct file_lock_core *fl1, struct file_lock_core *fl2)
{
- return fl1->fl_owner == fl2->fl_owner;
+ return fl1->flc_owner == fl2->flc_owner;
}
/* Must be called with the flc_lock held! */
-static void locks_insert_global_locks(struct file_lock *fl)
+static void locks_insert_global_locks(struct file_lock_core *flc)
{
struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
percpu_rwsem_assert_held(&file_rwsem);
spin_lock(&fll->lock);
- fl->fl_link_cpu = smp_processor_id();
- hlist_add_head(&fl->fl_link, &fll->hlist);
+ flc->flc_link_cpu = smp_processor_id();
+ hlist_add_head(&flc->flc_link, &fll->hlist);
spin_unlock(&fll->lock);
}
/* Must be called with the flc_lock held! */
-static void locks_delete_global_locks(struct file_lock *fl)
+static void locks_delete_global_locks(struct file_lock_core *flc)
{
struct file_lock_list_struct *fll;
@@ -623,33 +657,33 @@ static void locks_delete_global_locks(struct file_lock *fl)
* is done while holding the flc_lock, and new insertions into the list
* also require that it be held.
*/
- if (hlist_unhashed(&fl->fl_link))
+ if (hlist_unhashed(&flc->flc_link))
return;
- fll = per_cpu_ptr(&file_lock_list, fl->fl_link_cpu);
+ fll = per_cpu_ptr(&file_lock_list, flc->flc_link_cpu);
spin_lock(&fll->lock);
- hlist_del_init(&fl->fl_link);
+ hlist_del_init(&flc->flc_link);
spin_unlock(&fll->lock);
}
static unsigned long
-posix_owner_key(struct file_lock *fl)
+posix_owner_key(struct file_lock_core *flc)
{
- return (unsigned long)fl->fl_owner;
+ return (unsigned long) flc->flc_owner;
}
-static void locks_insert_global_blocked(struct file_lock *waiter)
+static void locks_insert_global_blocked(struct file_lock_core *waiter)
{
lockdep_assert_held(&blocked_lock_lock);
- hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
+ hash_add(blocked_hash, &waiter->flc_link, posix_owner_key(waiter));
}
-static void locks_delete_global_blocked(struct file_lock *waiter)
+static void locks_delete_global_blocked(struct file_lock_core *waiter)
{
lockdep_assert_held(&blocked_lock_lock);
- hash_del(&waiter->fl_link);
+ hash_del(&waiter->flc_link);
}
/* Remove waiter from blocker's block list.
@@ -657,41 +691,39 @@ static void locks_delete_global_blocked(struct file_lock *waiter)
*
* Must be called with blocked_lock_lock held.
*/
-static void __locks_delete_block(struct file_lock *waiter)
+static void __locks_unlink_block(struct file_lock_core *waiter)
{
locks_delete_global_blocked(waiter);
- list_del_init(&waiter->fl_blocked_member);
+ list_del_init(&waiter->flc_blocked_member);
}
-static void __locks_wake_up_blocks(struct file_lock *blocker)
+static void __locks_wake_up_blocks(struct file_lock_core *blocker)
{
- while (!list_empty(&blocker->fl_blocked_requests)) {
- struct file_lock *waiter;
+ while (!list_empty(&blocker->flc_blocked_requests)) {
+ struct file_lock_core *waiter;
+ struct file_lock *fl;
- waiter = list_first_entry(&blocker->fl_blocked_requests,
- struct file_lock, fl_blocked_member);
- __locks_delete_block(waiter);
- if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
- waiter->fl_lmops->lm_notify(waiter);
+ waiter = list_first_entry(&blocker->flc_blocked_requests,
+ struct file_lock_core, flc_blocked_member);
+
+ fl = file_lock(waiter);
+ __locks_unlink_block(waiter);
+ if ((waiter->flc_flags & (FL_POSIX | FL_FLOCK)) &&
+ fl->fl_lmops && fl->fl_lmops->lm_notify)
+ fl->fl_lmops->lm_notify(fl);
else
- wake_up(&waiter->fl_wait);
+ locks_wake_up(fl);
/*
- * The setting of fl_blocker to NULL marks the "done"
+ * The setting of flc_blocker to NULL marks the "done"
* point in deleting a block. Paired with acquire at the top
* of locks_delete_block().
*/
- smp_store_release(&waiter->fl_blocker, NULL);
+ smp_store_release(&waiter->flc_blocker, NULL);
}
}
-/**
- * locks_delete_block - stop waiting for a file lock
- * @waiter: the lock which was waiting
- *
- * lockd/nfsd need to disconnect the lock while working on it.
- */
-int locks_delete_block(struct file_lock *waiter)
+static int __locks_delete_block(struct file_lock_core *waiter)
{
int status = -ENOENT;
@@ -716,24 +748,35 @@ int locks_delete_block(struct file_lock *waiter)
* no new locks can be inserted into its fl_blocked_requests list, and
* can avoid doing anything further if the list is empty.
*/
- if (!smp_load_acquire(&waiter->fl_blocker) &&
- list_empty(&waiter->fl_blocked_requests))
+ if (!smp_load_acquire(&waiter->flc_blocker) &&
+ list_empty(&waiter->flc_blocked_requests))
return status;
spin_lock(&blocked_lock_lock);
- if (waiter->fl_blocker)
+ if (waiter->flc_blocker)
status = 0;
__locks_wake_up_blocks(waiter);
- __locks_delete_block(waiter);
+ __locks_unlink_block(waiter);
/*
* The setting of fl_blocker to NULL marks the "done" point in deleting
* a block. Paired with acquire at the top of this function.
*/
- smp_store_release(&waiter->fl_blocker, NULL);
+ smp_store_release(&waiter->flc_blocker, NULL);
spin_unlock(&blocked_lock_lock);
return status;
}
+
+/**
+ * locks_delete_block - stop waiting for a file lock
+ * @waiter: the lock which was waiting
+ *
+ * lockd/nfsd need to disconnect the lock while working on it.
+ */
+int locks_delete_block(struct file_lock *waiter)
+{
+ return __locks_delete_block(&waiter->c);
+}
EXPORT_SYMBOL(locks_delete_block);
/* Insert waiter into blocker's block list.
@@ -751,26 +794,28 @@ EXPORT_SYMBOL(locks_delete_block);
* waiters, and add beneath any waiter that blocks the new waiter.
* Thus wakeups don't happen until needed.
*/
-static void __locks_insert_block(struct file_lock *blocker,
- struct file_lock *waiter,
- bool conflict(struct file_lock *,
- struct file_lock *))
+static void __locks_insert_block(struct file_lock_core *blocker,
+ struct file_lock_core *waiter,
+ bool conflict(struct file_lock_core *,
+ struct file_lock_core *))
{
- struct file_lock *fl;
- BUG_ON(!list_empty(&waiter->fl_blocked_member));
+ struct file_lock_core *flc;
+ BUG_ON(!list_empty(&waiter->flc_blocked_member));
new_blocker:
- list_for_each_entry(fl, &blocker->fl_blocked_requests, fl_blocked_member)
- if (conflict(fl, waiter)) {
- blocker = fl;
+ list_for_each_entry(flc, &blocker->flc_blocked_requests, flc_blocked_member)
+ if (conflict(flc, waiter)) {
+ blocker = flc;
goto new_blocker;
}
- waiter->fl_blocker = blocker;
- list_add_tail(&waiter->fl_blocked_member, &blocker->fl_blocked_requests);
- if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
+ waiter->flc_blocker = blocker;
+ list_add_tail(&waiter->flc_blocked_member,
+ &blocker->flc_blocked_requests);
+
+ if ((blocker->flc_flags & (FL_POSIX|FL_OFDLCK)) == FL_POSIX)
locks_insert_global_blocked(waiter);
- /* The requests in waiter->fl_blocked are known to conflict with
+ /* The requests in waiter->flc_blocked are known to conflict with
* waiter, but might not conflict with blocker, or the requests
* and lock which block it. So they all need to be woken.
*/
@@ -778,10 +823,10 @@ new_blocker:
}
/* Must be called with flc_lock held. */
-static void locks_insert_block(struct file_lock *blocker,
- struct file_lock *waiter,
- bool conflict(struct file_lock *,
- struct file_lock *))
+static void locks_insert_block(struct file_lock_core *blocker,
+ struct file_lock_core *waiter,
+ bool conflict(struct file_lock_core *,
+ struct file_lock_core *))
{
spin_lock(&blocked_lock_lock);
__locks_insert_block(blocker, waiter, conflict);
@@ -793,7 +838,7 @@ static void locks_insert_block(struct file_lock *blocker,
*
* Must be called with the inode->flc_lock held!
*/
-static void locks_wake_up_blocks(struct file_lock *blocker)
+static void locks_wake_up_blocks(struct file_lock_core *blocker)
{
/*
* Avoid taking global lock if list is empty. This is safe since new
@@ -802,7 +847,7 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
* fl_blocked_requests list does not require the flc_lock, so we must
* recheck list_empty() after acquiring the blocked_lock_lock.
*/
- if (list_empty(&blocker->fl_blocked_requests))
+ if (list_empty(&blocker->flc_blocked_requests))
return;
spin_lock(&blocked_lock_lock);
@@ -811,39 +856,39 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
}
static void
-locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before)
+locks_insert_lock_ctx(struct file_lock_core *fl, struct list_head *before)
{
- list_add_tail(&fl->fl_list, before);
+ list_add_tail(&fl->flc_list, before);
locks_insert_global_locks(fl);
}
static void
-locks_unlink_lock_ctx(struct file_lock *fl)
+locks_unlink_lock_ctx(struct file_lock_core *fl)
{
locks_delete_global_locks(fl);
- list_del_init(&fl->fl_list);
+ list_del_init(&fl->flc_list);
locks_wake_up_blocks(fl);
}
static void
-locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose)
+locks_delete_lock_ctx(struct file_lock_core *fl, struct list_head *dispose)
{
locks_unlink_lock_ctx(fl);
if (dispose)
- list_add(&fl->fl_list, dispose);
+ list_add(&fl->flc_list, dispose);
else
- locks_free_lock(fl);
+ locks_free_lock(file_lock(fl));
}
/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
* checks for shared/exclusive status of overlapping locks.
*/
-static bool locks_conflict(struct file_lock *caller_fl,
- struct file_lock *sys_fl)
+static bool locks_conflict(struct file_lock_core *caller_flc,
+ struct file_lock_core *sys_flc)
{
- if (sys_fl->fl_type == F_WRLCK)
+ if (sys_flc->flc_type == F_WRLCK)
return true;
- if (caller_fl->fl_type == F_WRLCK)
+ if (caller_flc->flc_type == F_WRLCK)
return true;
return false;
}
@@ -851,20 +896,23 @@ static bool locks_conflict(struct file_lock *caller_fl,
/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
* checking before calling the locks_conflict().
*/
-static bool posix_locks_conflict(struct file_lock *caller_fl,
- struct file_lock *sys_fl)
+static bool posix_locks_conflict(struct file_lock_core *caller_flc,
+ struct file_lock_core *sys_flc)
{
+ struct file_lock *caller_fl = file_lock(caller_flc);
+ struct file_lock *sys_fl = file_lock(sys_flc);
+
/* POSIX locks owned by the same process do not conflict with
* each other.
*/
- if (posix_same_owner(caller_fl, sys_fl))
+ if (posix_same_owner(caller_flc, sys_flc))
return false;
/* Check whether they overlap */
if (!locks_overlap(caller_fl, sys_fl))
return false;
- return locks_conflict(caller_fl, sys_fl);
+ return locks_conflict(caller_flc, sys_flc);
}
/* Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK
@@ -873,28 +921,31 @@ static bool posix_locks_conflict(struct file_lock *caller_fl,
static bool posix_test_locks_conflict(struct file_lock *caller_fl,
struct file_lock *sys_fl)
{
+ struct file_lock_core *caller = &caller_fl->c;
+ struct file_lock_core *sys = &sys_fl->c;
+
/* F_UNLCK checks any locks on the same fd. */
- if (caller_fl->fl_type == F_UNLCK) {
- if (!posix_same_owner(caller_fl, sys_fl))
+ if (lock_is_unlock(caller_fl)) {
+ if (!posix_same_owner(caller, sys))
return false;
return locks_overlap(caller_fl, sys_fl);
}
- return posix_locks_conflict(caller_fl, sys_fl);
+ return posix_locks_conflict(caller, sys);
}
/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
* checking before calling the locks_conflict().
*/
-static bool flock_locks_conflict(struct file_lock *caller_fl,
- struct file_lock *sys_fl)
+static bool flock_locks_conflict(struct file_lock_core *caller_flc,
+ struct file_lock_core *sys_flc)
{
/* FLOCK locks referring to the same filp do not conflict with
* each other.
*/
- if (caller_fl->fl_file == sys_fl->fl_file)
+ if (caller_flc->flc_file == sys_flc->flc_file)
return false;
- return locks_conflict(caller_fl, sys_fl);
+ return locks_conflict(caller_flc, sys_flc);
}
void
@@ -908,13 +959,13 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
ctx = locks_inode_context(inode);
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
return;
}
retry:
spin_lock(&ctx->flc_lock);
- list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
+ list_for_each_entry(cfl, &ctx->flc_posix, c.flc_list) {
if (!posix_test_locks_conflict(fl, cfl))
continue;
if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
@@ -930,7 +981,7 @@ retry:
locks_copy_conflock(fl, cfl);
goto out;
}
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
out:
spin_unlock(&ctx->flc_lock);
return;
@@ -972,25 +1023,27 @@ EXPORT_SYMBOL(posix_test_lock);
#define MAX_DEADLK_ITERATIONS 10
-/* Find a lock that the owner of the given block_fl is blocking on. */
-static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
+/* Find a lock that the owner of the given @blocker is blocking on. */
+static struct file_lock_core *what_owner_is_waiting_for(struct file_lock_core *blocker)
{
- struct file_lock *fl;
+ struct file_lock_core *flc;
- hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) {
- if (posix_same_owner(fl, block_fl)) {
- while (fl->fl_blocker)
- fl = fl->fl_blocker;
- return fl;
+ hash_for_each_possible(blocked_hash, flc, flc_link, posix_owner_key(blocker)) {
+ if (posix_same_owner(flc, blocker)) {
+ while (flc->flc_blocker)
+ flc = flc->flc_blocker;
+ return flc;
}
}
return NULL;
}
/* Must be called with the blocked_lock_lock held! */
-static int posix_locks_deadlock(struct file_lock *caller_fl,
- struct file_lock *block_fl)
+static bool posix_locks_deadlock(struct file_lock *caller_fl,
+ struct file_lock *block_fl)
{
+ struct file_lock_core *caller = &caller_fl->c;
+ struct file_lock_core *blocker = &block_fl->c;
int i = 0;
lockdep_assert_held(&blocked_lock_lock);
@@ -999,16 +1052,16 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
* This deadlock detector can't reasonably detect deadlocks with
* FL_OFDLCK locks, since they aren't owned by a process, per-se.
*/
- if (IS_OFDLCK(caller_fl))
- return 0;
+ if (caller->flc_flags & FL_OFDLCK)
+ return false;
- while ((block_fl = what_owner_is_waiting_for(block_fl))) {
+ while ((blocker = what_owner_is_waiting_for(blocker))) {
if (i++ > MAX_DEADLK_ITERATIONS)
- return 0;
- if (posix_same_owner(caller_fl, block_fl))
- return 1;
+ return false;
+ if (posix_same_owner(caller, blocker))
+ return true;
}
- return 0;
+ return false;
}
/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
@@ -1027,14 +1080,14 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
bool found = false;
LIST_HEAD(dispose);
- ctx = locks_get_lock_context(inode, request->fl_type);
+ ctx = locks_get_lock_context(inode, request->c.flc_type);
if (!ctx) {
- if (request->fl_type != F_UNLCK)
+ if (request->c.flc_type != F_UNLCK)
return -ENOMEM;
- return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0;
+ return (request->c.flc_flags & FL_EXISTS) ? -ENOENT : 0;
}
- if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
+ if (!(request->c.flc_flags & FL_ACCESS) && (request->c.flc_type != F_UNLCK)) {
new_fl = locks_alloc_lock();
if (!new_fl)
return -ENOMEM;
@@ -1042,41 +1095,41 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
- if (request->fl_flags & FL_ACCESS)
+ if (request->c.flc_flags & FL_ACCESS)
goto find_conflict;
- list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
- if (request->fl_file != fl->fl_file)
+ list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) {
+ if (request->c.flc_file != fl->c.flc_file)
continue;
- if (request->fl_type == fl->fl_type)
+ if (request->c.flc_type == fl->c.flc_type)
goto out;
found = true;
- locks_delete_lock_ctx(fl, &dispose);
+ locks_delete_lock_ctx(&fl->c, &dispose);
break;
}
- if (request->fl_type == F_UNLCK) {
- if ((request->fl_flags & FL_EXISTS) && !found)
+ if (lock_is_unlock(request)) {
+ if ((request->c.flc_flags & FL_EXISTS) && !found)
error = -ENOENT;
goto out;
}
find_conflict:
- list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
- if (!flock_locks_conflict(request, fl))
+ list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) {
+ if (!flock_locks_conflict(&request->c, &fl->c))
continue;
error = -EAGAIN;
- if (!(request->fl_flags & FL_SLEEP))
+ if (!(request->c.flc_flags & FL_SLEEP))
goto out;
error = FILE_LOCK_DEFERRED;
- locks_insert_block(fl, request, flock_locks_conflict);
+ locks_insert_block(&fl->c, &request->c, flock_locks_conflict);
goto out;
}
- if (request->fl_flags & FL_ACCESS)
+ if (request->c.flc_flags & FL_ACCESS)
goto out;
locks_copy_lock(new_fl, request);
locks_move_blocks(new_fl, request);
- locks_insert_lock_ctx(new_fl, &ctx->flc_flock);
+ locks_insert_lock_ctx(&new_fl->c, &ctx->flc_flock);
new_fl = NULL;
error = 0;
@@ -1105,9 +1158,9 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
void *owner;
void (*func)(void);
- ctx = locks_get_lock_context(inode, request->fl_type);
+ ctx = locks_get_lock_context(inode, request->c.flc_type);
if (!ctx)
- return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
+ return lock_is_unlock(request) ? 0 : -ENOMEM;
/*
* We may need two file_lock structures for this operation,
@@ -1115,8 +1168,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
*
* In some cases we can be sure, that no new locks will be needed
*/
- if (!(request->fl_flags & FL_ACCESS) &&
- (request->fl_type != F_UNLCK ||
+ if (!(request->c.flc_flags & FL_ACCESS) &&
+ (request->c.flc_type != F_UNLCK ||
request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
new_fl = locks_alloc_lock();
new_fl2 = locks_alloc_lock();
@@ -1130,9 +1183,9 @@ retry:
* there are any, either return error or put the request on the
* blocker's list of waiters and the global blocked_hash.
*/
- if (request->fl_type != F_UNLCK) {
- list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
- if (!posix_locks_conflict(request, fl))
+ if (request->c.flc_type != F_UNLCK) {
+ list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) {
+ if (!posix_locks_conflict(&request->c, &fl->c))
continue;
if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
&& (*fl->fl_lmops->lm_lock_expirable)(fl)) {
@@ -1148,7 +1201,7 @@ retry:
if (conflock)
locks_copy_conflock(conflock, fl);
error = -EAGAIN;
- if (!(request->fl_flags & FL_SLEEP))
+ if (!(request->c.flc_flags & FL_SLEEP))
goto out;
/*
* Deadlock detection and insertion into the blocked
@@ -1160,10 +1213,10 @@ retry:
* Ensure that we don't find any locks blocked on this
* request during deadlock detection.
*/
- __locks_wake_up_blocks(request);
+ __locks_wake_up_blocks(&request->c);
if (likely(!posix_locks_deadlock(request, fl))) {
error = FILE_LOCK_DEFERRED;
- __locks_insert_block(fl, request,
+ __locks_insert_block(&fl->c, &request->c,
posix_locks_conflict);
}
spin_unlock(&blocked_lock_lock);
@@ -1173,22 +1226,22 @@ retry:
/* If we're just looking for a conflict, we're done. */
error = 0;
- if (request->fl_flags & FL_ACCESS)
+ if (request->c.flc_flags & FL_ACCESS)
goto out;
/* Find the first old lock with the same owner as the new lock */
- list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
- if (posix_same_owner(request, fl))
+ list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) {
+ if (posix_same_owner(&request->c, &fl->c))
break;
}
/* Process locks with this owner. */
- list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) {
- if (!posix_same_owner(request, fl))
+ list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, c.flc_list) {
+ if (!posix_same_owner(&request->c, &fl->c))
break;
/* Detect adjacent or overlapping regions (if same lock type) */
- if (request->fl_type == fl->fl_type) {
+ if (request->c.flc_type == fl->c.flc_type) {
/* In all comparisons of start vs end, use
* "start - 1" rather than "end + 1". If end
* is OFFSET_MAX, end + 1 will become negative.
@@ -1215,7 +1268,7 @@ retry:
else
request->fl_end = fl->fl_end;
if (added) {
- locks_delete_lock_ctx(fl, &dispose);
+ locks_delete_lock_ctx(&fl->c, &dispose);
continue;
}
request = fl;
@@ -1228,7 +1281,7 @@ retry:
continue;
if (fl->fl_start > request->fl_end)
break;
- if (request->fl_type == F_UNLCK)
+ if (lock_is_unlock(request))
added = true;
if (fl->fl_start < request->fl_start)
left = fl;
@@ -1244,7 +1297,7 @@ retry:
* one (This may happen several times).
*/
if (added) {
- locks_delete_lock_ctx(fl, &dispose);
+ locks_delete_lock_ctx(&fl->c, &dispose);
continue;
}
/*
@@ -1261,8 +1314,9 @@ retry:
locks_move_blocks(new_fl, request);
request = new_fl;
new_fl = NULL;
- locks_insert_lock_ctx(request, &fl->fl_list);
- locks_delete_lock_ctx(fl, &dispose);
+ locks_insert_lock_ctx(&request->c,
+ &fl->c.flc_list);
+ locks_delete_lock_ctx(&fl->c, &dispose);
added = true;
}
}
@@ -1279,8 +1333,8 @@ retry:
error = 0;
if (!added) {
- if (request->fl_type == F_UNLCK) {
- if (request->fl_flags & FL_EXISTS)
+ if (lock_is_unlock(request)) {
+ if (request->c.flc_flags & FL_EXISTS)
error = -ENOENT;
goto out;
}
@@ -1291,7 +1345,7 @@ retry:
}
locks_copy_lock(new_fl, request);
locks_move_blocks(new_fl, request);
- locks_insert_lock_ctx(new_fl, &fl->fl_list);
+ locks_insert_lock_ctx(&new_fl->c, &fl->c.flc_list);
fl = new_fl;
new_fl = NULL;
}
@@ -1303,14 +1357,14 @@ retry:
left = new_fl2;
new_fl2 = NULL;
locks_copy_lock(left, right);
- locks_insert_lock_ctx(left, &fl->fl_list);
+ locks_insert_lock_ctx(&left->c, &fl->c.flc_list);
}
right->fl_start = request->fl_end + 1;
- locks_wake_up_blocks(right);
+ locks_wake_up_blocks(&right->c);
}
if (left) {
left->fl_end = request->fl_start - 1;
- locks_wake_up_blocks(left);
+ locks_wake_up_blocks(&left->c);
}
out:
spin_unlock(&ctx->flc_lock);
@@ -1364,8 +1418,8 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait,
- list_empty(&fl->fl_blocked_member));
+ error = wait_event_interruptible(fl->c.flc_wait,
+ list_empty(&fl->c.flc_blocked_member));
if (error)
break;
}
@@ -1373,37 +1427,37 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
return error;
}
-static void lease_clear_pending(struct file_lock *fl, int arg)
+static void lease_clear_pending(struct file_lease *fl, int arg)
{
switch (arg) {
case F_UNLCK:
- fl->fl_flags &= ~FL_UNLOCK_PENDING;
+ fl->c.flc_flags &= ~FL_UNLOCK_PENDING;
fallthrough;
case F_RDLCK:
- fl->fl_flags &= ~FL_DOWNGRADE_PENDING;
+ fl->c.flc_flags &= ~FL_DOWNGRADE_PENDING;
}
}
/* We already had a lease on this file; just change its type */
-int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose)
+int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose)
{
- int error = assign_type(fl, arg);
+ int error = assign_type(&fl->c, arg);
if (error)
return error;
lease_clear_pending(fl, arg);
- locks_wake_up_blocks(fl);
+ locks_wake_up_blocks(&fl->c);
if (arg == F_UNLCK) {
- struct file *filp = fl->fl_file;
+ struct file *filp = fl->c.flc_file;
f_delown(filp);
filp->f_owner.signum = 0;
- fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
+ fasync_helper(0, fl->c.flc_file, 0, &fl->fl_fasync);
if (fl->fl_fasync != NULL) {
printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
fl->fl_fasync = NULL;
}
- locks_delete_lock_ctx(fl, dispose);
+ locks_delete_lock_ctx(&fl->c, dispose);
}
return 0;
}
@@ -1420,11 +1474,11 @@ static bool past_time(unsigned long then)
static void time_out_leases(struct inode *inode, struct list_head *dispose)
{
struct file_lock_context *ctx = inode->i_flctx;
- struct file_lock *fl, *tmp;
+ struct file_lease *fl, *tmp;
lockdep_assert_held(&ctx->flc_lock);
- list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
+ list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
trace_time_out_leases(inode, fl);
if (past_time(fl->fl_downgrade_time))
lease_modify(fl, F_RDLCK, dispose);
@@ -1433,38 +1487,40 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
}
}
-static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
+static bool leases_conflict(struct file_lock_core *lc, struct file_lock_core *bc)
{
bool rc;
+ struct file_lease *lease = file_lease(lc);
+ struct file_lease *breaker = file_lease(bc);
if (lease->fl_lmops->lm_breaker_owns_lease
&& lease->fl_lmops->lm_breaker_owns_lease(lease))
return false;
- if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
+ if ((bc->flc_flags & FL_LAYOUT) != (lc->flc_flags & FL_LAYOUT)) {
rc = false;
goto trace;
}
- if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) {
+ if ((bc->flc_flags & FL_DELEG) && (lc->flc_flags & FL_LEASE)) {
rc = false;
goto trace;
}
- rc = locks_conflict(breaker, lease);
+ rc = locks_conflict(bc, lc);
trace:
trace_leases_conflict(rc, lease, breaker);
return rc;
}
static bool
-any_leases_conflict(struct inode *inode, struct file_lock *breaker)
+any_leases_conflict(struct inode *inode, struct file_lease *breaker)
{
struct file_lock_context *ctx = inode->i_flctx;
- struct file_lock *fl;
+ struct file_lock_core *flc;
lockdep_assert_held(&ctx->flc_lock);
- list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
- if (leases_conflict(fl, breaker))
+ list_for_each_entry(flc, &ctx->flc_lease, flc_list) {
+ if (leases_conflict(flc, &breaker->c))
return true;
}
return false;
@@ -1487,7 +1543,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
{
int error = 0;
struct file_lock_context *ctx;
- struct file_lock *new_fl, *fl, *tmp;
+ struct file_lease *new_fl, *fl, *tmp;
unsigned long break_time;
int want_write = (mode & O_ACCMODE) != O_RDONLY;
LIST_HEAD(dispose);
@@ -1495,7 +1551,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
if (IS_ERR(new_fl))
return PTR_ERR(new_fl);
- new_fl->fl_flags = type;
+ new_fl->c.flc_flags = type;
/* typically we will check that ctx is non-NULL before calling */
ctx = locks_inode_context(inode);
@@ -1519,22 +1575,22 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
break_time++; /* so that 0 means no break time */
}
- list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
- if (!leases_conflict(fl, new_fl))
+ list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
+ if (!leases_conflict(&fl->c, &new_fl->c))
continue;
if (want_write) {
- if (fl->fl_flags & FL_UNLOCK_PENDING)
+ if (fl->c.flc_flags & FL_UNLOCK_PENDING)
continue;
- fl->fl_flags |= FL_UNLOCK_PENDING;
+ fl->c.flc_flags |= FL_UNLOCK_PENDING;
fl->fl_break_time = break_time;
} else {
if (lease_breaking(fl))
continue;
- fl->fl_flags |= FL_DOWNGRADE_PENDING;
+ fl->c.flc_flags |= FL_DOWNGRADE_PENDING;
fl->fl_downgrade_time = break_time;
}
if (fl->fl_lmops->lm_break(fl))
- locks_delete_lock_ctx(fl, &dispose);
+ locks_delete_lock_ctx(&fl->c, &dispose);
}
if (list_empty(&ctx->flc_lease))
@@ -1547,26 +1603,26 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
}
restart:
- fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list);
+ fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
break_time = fl->fl_break_time;
if (break_time != 0)
break_time -= jiffies;
if (break_time == 0)
break_time++;
- locks_insert_block(fl, new_fl, leases_conflict);
+ locks_insert_block(&fl->c, &new_fl->c, leases_conflict);
trace_break_lease_block(inode, new_fl);
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
locks_dispose_list(&dispose);
- error = wait_event_interruptible_timeout(new_fl->fl_wait,
- list_empty(&new_fl->fl_blocked_member),
- break_time);
+ error = wait_event_interruptible_timeout(new_fl->c.flc_wait,
+ list_empty(&new_fl->c.flc_blocked_member),
+ break_time);
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
trace_break_lease_unblock(inode, new_fl);
- locks_delete_block(new_fl);
+ __locks_delete_block(&new_fl->c);
if (error >= 0) {
/*
* Wait for the next conflicting lease that has not been
@@ -1583,7 +1639,7 @@ out:
percpu_up_read(&file_rwsem);
locks_dispose_list(&dispose);
free_lock:
- locks_free_lock(new_fl);
+ locks_free_lease(new_fl);
return error;
}
EXPORT_SYMBOL(__break_lease);
@@ -1601,14 +1657,14 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time)
{
bool has_lease = false;
struct file_lock_context *ctx;
- struct file_lock *fl;
+ struct file_lock_core *flc;
ctx = locks_inode_context(inode);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
- fl = list_first_entry_or_null(&ctx->flc_lease,
- struct file_lock, fl_list);
- if (fl && (fl->fl_type == F_WRLCK))
+ flc = list_first_entry_or_null(&ctx->flc_lease,
+ struct file_lock_core, flc_list);
+ if (flc && flc->flc_type == F_WRLCK)
has_lease = true;
spin_unlock(&ctx->flc_lock);
}
@@ -1643,7 +1699,7 @@ EXPORT_SYMBOL(lease_get_mtime);
*/
int fcntl_getlease(struct file *filp)
{
- struct file_lock *fl;
+ struct file_lease *fl;
struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
int type = F_UNLCK;
@@ -1654,8 +1710,8 @@ int fcntl_getlease(struct file *filp)
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
- list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
- if (fl->fl_file != filp)
+ list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
+ if (fl->c.flc_file != filp)
continue;
type = target_leasetype(fl);
break;
@@ -1715,12 +1771,12 @@ check_conflicting_open(struct file *filp, const int arg, int flags)
}
static int
-generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **priv)
+generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **priv)
{
- struct file_lock *fl, *my_fl = NULL, *lease;
+ struct file_lease *fl, *my_fl = NULL, *lease;
struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
- bool is_deleg = (*flp)->fl_flags & FL_DELEG;
+ bool is_deleg = (*flp)->c.flc_flags & FL_DELEG;
int error;
LIST_HEAD(dispose);
@@ -1746,7 +1802,7 @@ generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **pri
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
- error = check_conflicting_open(filp, arg, lease->fl_flags);
+ error = check_conflicting_open(filp, arg, lease->c.flc_flags);
if (error)
goto out;
@@ -1759,9 +1815,9 @@ generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **pri
* except for this filp.
*/
error = -EAGAIN;
- list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
- if (fl->fl_file == filp &&
- fl->fl_owner == lease->fl_owner) {
+ list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
+ if (fl->c.flc_file == filp &&
+ fl->c.flc_owner == lease->c.flc_owner) {
my_fl = fl;
continue;
}
@@ -1776,7 +1832,7 @@ generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **pri
* Modifying our existing lease is OK, but no getting a
* new lease if someone else is opening for write:
*/
- if (fl->fl_flags & FL_UNLOCK_PENDING)
+ if (fl->c.flc_flags & FL_UNLOCK_PENDING)
goto out;
}
@@ -1792,7 +1848,7 @@ generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **pri
if (!leases_enable)
goto out;
- locks_insert_lock_ctx(lease, &ctx->flc_lease);
+ locks_insert_lock_ctx(&lease->c, &ctx->flc_lease);
/*
* The check in break_lease() is lockless. It's possible for another
* open to race in after we did the earlier check for a conflicting
@@ -1803,9 +1859,9 @@ generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **pri
* precedes these checks.
*/
smp_mb();
- error = check_conflicting_open(filp, arg, lease->fl_flags);
+ error = check_conflicting_open(filp, arg, lease->c.flc_flags);
if (error) {
- locks_unlink_lock_ctx(lease);
+ locks_unlink_lock_ctx(&lease->c);
goto out;
}
@@ -1826,7 +1882,7 @@ out:
static int generic_delete_lease(struct file *filp, void *owner)
{
int error = -EAGAIN;
- struct file_lock *fl, *victim = NULL;
+ struct file_lease *fl, *victim = NULL;
struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
LIST_HEAD(dispose);
@@ -1839,9 +1895,9 @@ static int generic_delete_lease(struct file *filp, void *owner)
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
- list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
- if (fl->fl_file == filp &&
- fl->fl_owner == owner) {
+ list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
+ if (fl->c.flc_file == filp &&
+ fl->c.flc_owner == owner) {
victim = fl;
break;
}
@@ -1866,21 +1922,9 @@ static int generic_delete_lease(struct file *filp, void *owner)
* The (input) flp->fl_lmops->lm_break function is required
* by break_lease().
*/
-int generic_setlease(struct file *filp, int arg, struct file_lock **flp,
+int generic_setlease(struct file *filp, int arg, struct file_lease **flp,
void **priv)
{
- struct inode *inode = file_inode(filp);
- vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode);
- int error;
-
- if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE))
- return -EACCES;
- if (!S_ISREG(inode->i_mode))
- return -EINVAL;
- error = security_file_lock(filp, arg);
- if (error)
- return error;
-
switch (arg) {
case F_UNLCK:
return generic_delete_lease(filp, *priv);
@@ -1913,7 +1957,7 @@ lease_notifier_chain_init(void)
}
static inline void
-setlease_notifier(int arg, struct file_lock *lease)
+setlease_notifier(int arg, struct file_lease *lease)
{
if (arg != F_UNLCK)
srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
@@ -1931,6 +1975,19 @@ void lease_unregister_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
+
+int
+kernel_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv)
+{
+ if (lease)
+ setlease_notifier(arg, *lease);
+ if (filp->f_op->setlease)
+ return filp->f_op->setlease(filp, arg, lease, priv);
+ else
+ return generic_setlease(filp, arg, lease, priv);
+}
+EXPORT_SYMBOL_GPL(kernel_setlease);
+
/**
* vfs_setlease - sets a lease on an open file
* @filp: file pointer
@@ -1949,20 +2006,26 @@ EXPORT_SYMBOL_GPL(lease_unregister_notifier);
* may be NULL if the lm_setup operation doesn't require it.
*/
int
-vfs_setlease(struct file *filp, int arg, struct file_lock **lease, void **priv)
+vfs_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv)
{
- if (lease)
- setlease_notifier(arg, *lease);
- if (filp->f_op->setlease)
- return filp->f_op->setlease(filp, arg, lease, priv);
- else
- return generic_setlease(filp, arg, lease, priv);
+ struct inode *inode = file_inode(filp);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode);
+ int error;
+
+ if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE))
+ return -EACCES;
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+ error = security_file_lock(filp, arg);
+ if (error)
+ return error;
+ return kernel_setlease(filp, arg, lease, priv);
}
EXPORT_SYMBOL_GPL(vfs_setlease);
static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg)
{
- struct file_lock *fl;
+ struct file_lease *fl;
struct fasync_struct *new;
int error;
@@ -1972,14 +2035,14 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg)
new = fasync_alloc();
if (!new) {
- locks_free_lock(fl);
+ locks_free_lease(fl);
return -ENOMEM;
}
new->fa_fd = fd;
error = vfs_setlease(filp, arg, &fl, (void **)&new);
if (fl)
- locks_free_lock(fl);
+ locks_free_lease(fl);
if (new)
fasync_free(new);
return error;
@@ -2017,8 +2080,8 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
error = flock_lock_inode(inode, fl);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait,
- list_empty(&fl->fl_blocked_member));
+ error = wait_event_interruptible(fl->c.flc_wait,
+ list_empty(&fl->c.flc_blocked_member));
if (error)
break;
}
@@ -2036,7 +2099,7 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
int res = 0;
- switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
+ switch (fl->c.flc_flags & (FL_POSIX|FL_FLOCK)) {
case FL_POSIX:
res = posix_lock_inode_wait(inode, fl);
break;
@@ -2098,13 +2161,13 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
flock_make_lock(f.file, &fl, type);
- error = security_file_lock(f.file, fl.fl_type);
+ error = security_file_lock(f.file, fl.c.flc_type);
if (error)
goto out_putf;
can_sleep = !(cmd & LOCK_NB);
if (can_sleep)
- fl.fl_flags |= FL_SLEEP;
+ fl.c.flc_flags |= FL_SLEEP;
if (f.file->f_op->flock)
error = f.file->f_op->flock(f.file,
@@ -2130,7 +2193,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
- WARN_ON_ONCE(filp != fl->fl_file);
+ WARN_ON_ONCE(filp != fl->c.flc_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, F_GETLK, fl);
posix_test_lock(filp, fl);
@@ -2145,25 +2208,28 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
*
* Used to translate a fl_pid into a namespace virtual pid number
*/
-static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
+static pid_t locks_translate_pid(struct file_lock_core *fl, struct pid_namespace *ns)
{
pid_t vnr;
struct pid *pid;
- if (IS_OFDLCK(fl))
+ if (fl->flc_flags & FL_OFDLCK)
return -1;
- if (IS_REMOTELCK(fl))
- return fl->fl_pid;
+
+ /* Remote locks report a negative pid value */
+ if (fl->flc_pid <= 0)
+ return fl->flc_pid;
+
/*
* If the flock owner process is dead and its pid has been already
* freed, the translation below won't work, but we still want to show
* flock owner pid number in init pidns.
*/
if (ns == &init_pid_ns)
- return (pid_t)fl->fl_pid;
+ return (pid_t) fl->flc_pid;
rcu_read_lock();
- pid = find_pid_ns(fl->fl_pid, &init_pid_ns);
+ pid = find_pid_ns(fl->flc_pid, &init_pid_ns);
vnr = pid_nr_ns(pid, ns);
rcu_read_unlock();
return vnr;
@@ -2171,7 +2237,7 @@ static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
{
- flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
+ flock->l_pid = locks_translate_pid(&fl->c, task_active_pid_ns(current));
#if BITS_PER_LONG == 32
/*
* Make sure we can represent the posix lock via
@@ -2186,19 +2252,19 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
fl->fl_end - fl->fl_start + 1;
flock->l_whence = 0;
- flock->l_type = fl->fl_type;
+ flock->l_type = fl->c.flc_type;
return 0;
}
#if BITS_PER_LONG == 32
static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
{
- flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
+ flock->l_pid = locks_translate_pid(&fl->c, task_active_pid_ns(current));
flock->l_start = fl->fl_start;
flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
fl->fl_end - fl->fl_start + 1;
flock->l_whence = 0;
- flock->l_type = fl->fl_type;
+ flock->l_type = fl->c.flc_type;
}
#endif
@@ -2227,16 +2293,16 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
if (flock->l_pid != 0)
goto out;
- fl->fl_flags |= FL_OFDLCK;
- fl->fl_owner = filp;
+ fl->c.flc_flags |= FL_OFDLCK;
+ fl->c.flc_owner = filp;
}
error = vfs_test_lock(filp, fl);
if (error)
goto out;
- flock->l_type = fl->fl_type;
- if (fl->fl_type != F_UNLCK) {
+ flock->l_type = fl->c.flc_type;
+ if (fl->c.flc_type != F_UNLCK) {
error = posix_lock_to_flock(flock, fl);
if (error)
goto out;
@@ -2283,7 +2349,7 @@ out:
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
{
- WARN_ON_ONCE(filp != fl->fl_file);
+ WARN_ON_ONCE(filp != fl->c.flc_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, cmd, fl);
else
@@ -2296,7 +2362,7 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,
{
int error;
- error = security_file_lock(filp, fl->fl_type);
+ error = security_file_lock(filp, fl->c.flc_type);
if (error)
return error;
@@ -2304,8 +2370,8 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,
error = vfs_lock_file(filp, cmd, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait,
- list_empty(&fl->fl_blocked_member));
+ error = wait_event_interruptible(fl->c.flc_wait,
+ list_empty(&fl->c.flc_blocked_member));
if (error)
break;
}
@@ -2318,13 +2384,13 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,
static int
check_fmode_for_setlk(struct file_lock *fl)
{
- switch (fl->fl_type) {
+ switch (fl->c.flc_type) {
case F_RDLCK:
- if (!(fl->fl_file->f_mode & FMODE_READ))
+ if (!(fl->c.flc_file->f_mode & FMODE_READ))
return -EBADF;
break;
case F_WRLCK:
- if (!(fl->fl_file->f_mode & FMODE_WRITE))
+ if (!(fl->c.flc_file->f_mode & FMODE_WRITE))
return -EBADF;
}
return 0;
@@ -2363,8 +2429,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
cmd = F_SETLK;
- file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = filp;
+ file_lock->c.flc_flags |= FL_OFDLCK;
+ file_lock->c.flc_owner = filp;
break;
case F_OFD_SETLKW:
error = -EINVAL;
@@ -2372,11 +2438,11 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
cmd = F_SETLKW;
- file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = filp;
+ file_lock->c.flc_flags |= FL_OFDLCK;
+ file_lock->c.flc_owner = filp;
fallthrough;
case F_SETLKW:
- file_lock->fl_flags |= FL_SLEEP;
+ file_lock->c.flc_flags |= FL_SLEEP;
}
error = do_lock_file_wait(filp, cmd, file_lock);
@@ -2386,8 +2452,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
* lock that was just acquired. There is no need to do that when we're
* unlocking though, or for OFD locks.
*/
- if (!error && file_lock->fl_type != F_UNLCK &&
- !(file_lock->fl_flags & FL_OFDLCK)) {
+ if (!error && file_lock->c.flc_type != F_UNLCK &&
+ !(file_lock->c.flc_flags & FL_OFDLCK)) {
struct files_struct *files = current->files;
/*
* We need that spin_lock here - it prevents reordering between
@@ -2398,7 +2464,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
f = files_lookup_fd_locked(files, fd);
spin_unlock(&files->file_lock);
if (f != filp) {
- file_lock->fl_type = F_UNLCK;
+ file_lock->c.flc_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);
WARN_ON_ONCE(error);
error = -EBADF;
@@ -2437,16 +2503,16 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
if (flock->l_pid != 0)
goto out;
- fl->fl_flags |= FL_OFDLCK;
- fl->fl_owner = filp;
+ fl->c.flc_flags |= FL_OFDLCK;
+ fl->c.flc_owner = filp;
}
error = vfs_test_lock(filp, fl);
if (error)
goto out;
- flock->l_type = fl->fl_type;
- if (fl->fl_type != F_UNLCK)
+ flock->l_type = fl->c.flc_type;
+ if (fl->c.flc_type != F_UNLCK)
posix_lock_to_flock64(flock, fl);
out:
@@ -2486,8 +2552,8 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
cmd = F_SETLK64;
- file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = filp;
+ file_lock->c.flc_flags |= FL_OFDLCK;
+ file_lock->c.flc_owner = filp;
break;
case F_OFD_SETLKW:
error = -EINVAL;
@@ -2495,11 +2561,11 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
cmd = F_SETLKW64;
- file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = filp;
+ file_lock->c.flc_flags |= FL_OFDLCK;
+ file_lock->c.flc_owner = filp;
fallthrough;
case F_SETLKW64:
- file_lock->fl_flags |= FL_SLEEP;
+ file_lock->c.flc_flags |= FL_SLEEP;
}
error = do_lock_file_wait(filp, cmd, file_lock);
@@ -2509,8 +2575,8 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
* lock that was just acquired. There is no need to do that when we're
* unlocking though, or for OFD locks.
*/
- if (!error && file_lock->fl_type != F_UNLCK &&
- !(file_lock->fl_flags & FL_OFDLCK)) {
+ if (!error && file_lock->c.flc_type != F_UNLCK &&
+ !(file_lock->c.flc_flags & FL_OFDLCK)) {
struct files_struct *files = current->files;
/*
* We need that spin_lock here - it prevents reordering between
@@ -2521,7 +2587,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
f = files_lookup_fd_locked(files, fd);
spin_unlock(&files->file_lock);
if (f != filp) {
- file_lock->fl_type = F_UNLCK;
+ file_lock->c.flc_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);
WARN_ON_ONCE(error);
error = -EBADF;
@@ -2555,13 +2621,13 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
return;
locks_init_lock(&lock);
- lock.fl_type = F_UNLCK;
- lock.fl_flags = FL_POSIX | FL_CLOSE;
+ lock.c.flc_type = F_UNLCK;
+ lock.c.flc_flags = FL_POSIX | FL_CLOSE;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
- lock.fl_owner = owner;
- lock.fl_pid = current->tgid;
- lock.fl_file = filp;
+ lock.c.flc_owner = owner;
+ lock.c.flc_pid = current->tgid;
+ lock.c.flc_file = filp;
lock.fl_ops = NULL;
lock.fl_lmops = NULL;
@@ -2584,7 +2650,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
return;
flock_make_lock(filp, &fl, F_UNLCK);
- fl.fl_flags |= FL_CLOSE;
+ fl.c.flc_flags |= FL_CLOSE;
if (filp->f_op->flock)
filp->f_op->flock(filp, F_SETLKW, &fl);
@@ -2599,7 +2665,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
static void
locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
{
- struct file_lock *fl, *tmp;
+ struct file_lease *fl, *tmp;
LIST_HEAD(dispose);
if (list_empty(&ctx->flc_lease))
@@ -2607,8 +2673,8 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
- list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
- if (filp == fl->fl_file)
+ list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list)
+ if (filp == fl->c.flc_file)
lease_modify(fl, F_UNLCK, &dispose);
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
@@ -2652,7 +2718,7 @@ void locks_remove_file(struct file *filp)
*/
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
- WARN_ON_ONCE(filp != fl->fl_file);
+ WARN_ON_ONCE(filp != fl->c.flc_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, F_CANCELLK, fl);
return 0;
@@ -2691,69 +2757,73 @@ struct locks_iterator {
loff_t li_pos;
};
-static void lock_get_status(struct seq_file *f, struct file_lock *fl,
+static void lock_get_status(struct seq_file *f, struct file_lock_core *flc,
loff_t id, char *pfx, int repeat)
{
struct inode *inode = NULL;
- unsigned int fl_pid;
+ unsigned int pid;
struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
- int type;
+ int type = flc->flc_type;
+ struct file_lock *fl = file_lock(flc);
+
+ pid = locks_translate_pid(flc, proc_pidns);
- fl_pid = locks_translate_pid(fl, proc_pidns);
/*
* If lock owner is dead (and pid is freed) or not visible in current
* pidns, zero is shown as a pid value. Check lock info from
* init_pid_ns to get saved lock pid value.
*/
-
- if (fl->fl_file != NULL)
- inode = file_inode(fl->fl_file);
+ if (flc->flc_file != NULL)
+ inode = file_inode(flc->flc_file);
seq_printf(f, "%lld: ", id);
if (repeat)
seq_printf(f, "%*s", repeat - 1 + (int)strlen(pfx), pfx);
- if (IS_POSIX(fl)) {
- if (fl->fl_flags & FL_ACCESS)
+ if (flc->flc_flags & FL_POSIX) {
+ if (flc->flc_flags & FL_ACCESS)
seq_puts(f, "ACCESS");
- else if (IS_OFDLCK(fl))
+ else if (flc->flc_flags & FL_OFDLCK)
seq_puts(f, "OFDLCK");
else
seq_puts(f, "POSIX ");
seq_printf(f, " %s ",
(inode == NULL) ? "*NOINODE*" : "ADVISORY ");
- } else if (IS_FLOCK(fl)) {
+ } else if (flc->flc_flags & FL_FLOCK) {
seq_puts(f, "FLOCK ADVISORY ");
- } else if (IS_LEASE(fl)) {
- if (fl->fl_flags & FL_DELEG)
+ } else if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) {
+ struct file_lease *lease = file_lease(flc);
+
+ type = target_leasetype(lease);
+
+ if (flc->flc_flags & FL_DELEG)
seq_puts(f, "DELEG ");
else
seq_puts(f, "LEASE ");
- if (lease_breaking(fl))
+ if (lease_breaking(lease))
seq_puts(f, "BREAKING ");
- else if (fl->fl_file)
+ else if (flc->flc_file)
seq_puts(f, "ACTIVE ");
else
seq_puts(f, "BREAKER ");
} else {
seq_puts(f, "UNKNOWN UNKNOWN ");
}
- type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
(type == F_RDLCK) ? "READ" : "UNLCK");
if (inode) {
/* userspace relies on this representation of dev_t */
- seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
+ seq_printf(f, "%d %02x:%02x:%lu ", pid,
MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev), inode->i_ino);
} else {
- seq_printf(f, "%d <none>:0 ", fl_pid);
+ seq_printf(f, "%d <none>:0 ", pid);
}
- if (IS_POSIX(fl)) {
+ if (flc->flc_flags & FL_POSIX) {
if (fl->fl_end == OFFSET_MAX)
seq_printf(f, "%Ld EOF\n", fl->fl_start);
else
@@ -2763,17 +2833,18 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
}
}
-static struct file_lock *get_next_blocked_member(struct file_lock *node)
+static struct file_lock_core *get_next_blocked_member(struct file_lock_core *node)
{
- struct file_lock *tmp;
+ struct file_lock_core *tmp;
/* NULL node or root node */
- if (node == NULL || node->fl_blocker == NULL)
+ if (node == NULL || node->flc_blocker == NULL)
return NULL;
/* Next member in the linked list could be itself */
- tmp = list_next_entry(node, fl_blocked_member);
- if (list_entry_is_head(tmp, &node->fl_blocker->fl_blocked_requests, fl_blocked_member)
+ tmp = list_next_entry(node, flc_blocked_member);
+ if (list_entry_is_head(tmp, &node->flc_blocker->flc_blocked_requests,
+ flc_blocked_member)
|| tmp == node) {
return NULL;
}
@@ -2784,18 +2855,18 @@ static struct file_lock *get_next_blocked_member(struct file_lock *node)
static int locks_show(struct seq_file *f, void *v)
{
struct locks_iterator *iter = f->private;
- struct file_lock *cur, *tmp;
+ struct file_lock_core *cur, *tmp;
struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
int level = 0;
- cur = hlist_entry(v, struct file_lock, fl_link);
+ cur = hlist_entry(v, struct file_lock_core, flc_link);
if (locks_translate_pid(cur, proc_pidns) == 0)
return 0;
- /* View this crossed linked list as a binary tree, the first member of fl_blocked_requests
- * is the left child of current node, the next silibing in fl_blocked_member is the
- * right child, we can alse get the parent of current node from fl_blocker, so this
+ /* View this crossed linked list as a binary tree, the first member of flc_blocked_requests
+ * is the left child of current node, the next silibing in flc_blocked_member is the
+ * right child, we can alse get the parent of current node from flc_blocker, so this
* question becomes traversal of a binary tree
*/
while (cur != NULL) {
@@ -2804,17 +2875,18 @@ static int locks_show(struct seq_file *f, void *v)
else
lock_get_status(f, cur, iter->li_pos, "", level);
- if (!list_empty(&cur->fl_blocked_requests)) {
+ if (!list_empty(&cur->flc_blocked_requests)) {
/* Turn left */
- cur = list_first_entry_or_null(&cur->fl_blocked_requests,
- struct file_lock, fl_blocked_member);
+ cur = list_first_entry_or_null(&cur->flc_blocked_requests,
+ struct file_lock_core,
+ flc_blocked_member);
level++;
} else {
/* Turn right */
tmp = get_next_blocked_member(cur);
/* Fall back to parent node */
- while (tmp == NULL && cur->fl_blocker != NULL) {
- cur = cur->fl_blocker;
+ while (tmp == NULL && cur->flc_blocker != NULL) {
+ cur = cur->flc_blocker;
level--;
tmp = get_next_blocked_member(cur);
}
@@ -2829,14 +2901,13 @@ static void __show_fd_locks(struct seq_file *f,
struct list_head *head, int *id,
struct file *filp, struct files_struct *files)
{
- struct file_lock *fl;
+ struct file_lock_core *fl;
- list_for_each_entry(fl, head, fl_list) {
+ list_for_each_entry(fl, head, flc_list) {
- if (filp != fl->fl_file)
+ if (filp != fl->flc_file)
continue;
- if (fl->fl_owner != files &&
- fl->fl_owner != filp)
+ if (fl->flc_owner != files && fl->flc_owner != filp)
continue;
(*id)++;
@@ -2915,6 +2986,9 @@ static int __init filelock_init(void)
filelock_cache = kmem_cache_create("file_lock_cache",
sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
+ filelease_cache = kmem_cache_create("file_lock_cache",
+ sizeof(struct file_lease), 0, SLAB_PANIC, NULL);
+
for_each_possible_cpu(i) {
struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 82aa7a35db26..e60a840999aa 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -426,9 +426,7 @@ EXPORT_SYMBOL(mb_cache_destroy);
static int __init mbcache_init(void)
{
- mb_entry_cache = kmem_cache_create("mbcache",
- sizeof(struct mb_cache_entry), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
+ mb_entry_cache = KMEM_CACHE(mb_cache_entry, SLAB_RECLAIM_ACCOUNT);
if (!mb_entry_cache)
return -ENOMEM;
return 0;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 73f37f298087..7cbd2b9f4d11 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -87,7 +87,7 @@ static int __init init_inodecache(void)
minix_inode_cachep = kmem_cache_create("minix_inode_cache",
sizeof(struct minix_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (minix_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
index 64c5205e2b5e..3c60f1eaca61 100644
--- a/fs/mnt_idmapping.c
+++ b/fs/mnt_idmapping.c
@@ -214,7 +214,7 @@ static int copy_mnt_idmap(struct uid_gid_map *map_from,
* anything at all.
*/
if (nr_extents == 0)
- return 0;
+ return -EINVAL;
/*
* Here we know that nr_extents is greater than zero which means
diff --git a/fs/mpage.c b/fs/mpage.c
index 738882e0766d..fa8b99a199fa 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -605,6 +605,7 @@ alloc_new:
GFP_NOFS);
bio->bi_iter.bi_sector = first_block << (blkbits - 9);
wbc_init_bio(wbc, bio);
+ bio->bi_write_hint = inode->i_write_hint;
}
/*
diff --git a/fs/namei.c b/fs/namei.c
index 4e0de939fea1..ceb9ddf8dfdd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -17,8 +17,8 @@
#include <linux/init.h>
#include <linux/export.h>
-#include <linux/kernel.h>
#include <linux/slab.h>
+#include <linux/wordpart.h>
#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/namei.h>
@@ -27,7 +27,6 @@
#include <linux/fsnotify.h>
#include <linux/personality.h>
#include <linux/security.h>
-#include <linux/ima.h>
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/audit.h>
@@ -1717,7 +1716,11 @@ static inline int may_lookup(struct mnt_idmap *idmap,
{
if (nd->flags & LOOKUP_RCU) {
int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
- if (err != -ECHILD || !try_to_unlazy(nd))
+ if (!err) // success, keep going
+ return 0;
+ if (!try_to_unlazy(nd))
+ return -ECHILD; // redo it all non-lazy
+ if (err != -ECHILD) // hard error
return err;
}
return inode_permission(idmap, nd->inode, MAY_EXEC);
@@ -2676,10 +2679,8 @@ static int lookup_one_common(struct mnt_idmap *idmap,
if (!len)
return -EACCES;
- if (unlikely(name[0] == '.')) {
- if (len < 2 || (len == 2 && name[1] == '.'))
- return -EACCES;
- }
+ if (is_dot_dotdot(name, len))
+ return -EACCES;
while (len--) {
unsigned int c = *(const unsigned char *)name++;
@@ -3640,7 +3641,7 @@ static int do_open(struct nameidata *nd,
if (!error && !(file->f_mode & FMODE_OPENED))
error = vfs_open(&nd->path, file);
if (!error)
- error = ima_file_check(file, op->acc_mode);
+ error = security_file_post_open(file, op->acc_mode);
if (!error && do_truncate)
error = handle_truncate(idmap, file);
if (unlikely(error > 0)) {
@@ -3703,7 +3704,7 @@ static int vfs_tmpfile(struct mnt_idmap *idmap,
inode->i_state |= I_LINKABLE;
spin_unlock(&inode->i_lock);
}
- ima_post_create_tmpfile(idmap, inode);
+ security_inode_post_create_tmpfile(idmap, inode);
return 0;
}
@@ -4049,8 +4050,6 @@ retry:
case 0: case S_IFREG:
error = vfs_create(idmap, path.dentry->d_inode,
dentry, mode, true);
- if (!error)
- ima_post_path_mknod(idmap, dentry);
break;
case S_IFCHR: case S_IFBLK:
error = vfs_mknod(idmap, path.dentry->d_inode,
@@ -4061,6 +4060,11 @@ retry:
dentry, mode, 0);
break;
}
+
+ if (error)
+ goto out2;
+
+ security_path_post_mknod(idmap, dentry);
out2:
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
diff --git a/fs/namespace.c b/fs/namespace.c
index 437f60e96d40..5a51315c6678 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4472,10 +4472,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
/*
* If this is an attached mount make sure it's located in the callers
* mount namespace. If it's not don't let the caller interact with it.
- * If this is a detached mount make sure it has an anonymous mount
- * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE.
+ *
+ * If this mount doesn't have a parent it's most often simply a
+ * detached mount with an anonymous mount namespace. IOW, something
+ * that's simply not attached yet. But there are apparently also users
+ * that do change mount properties on the rootfs itself. That obviously
+ * neither has a parent nor is it a detached mount so we cannot
+ * unconditionally check for detached mounts.
*/
- if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns)))
+ if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt))
goto out;
/*
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index a59e7b2edaac..3298c29b5548 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -101,7 +101,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
}
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
- if (folio_index(folio) == rreq->no_unlock_folio &&
+ if (folio->index == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
_debug("no unlock");
else
@@ -246,13 +246,13 @@ EXPORT_SYMBOL(netfs_readahead);
*/
int netfs_read_folio(struct file *file, struct folio *folio)
{
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
struct netfs_io_request *rreq;
struct netfs_inode *ctx = netfs_inode(mapping->host);
struct folio *sink = NULL;
int ret;
- _enter("%lx", folio_index(folio));
+ _enter("%lx", folio->index);
rreq = netfs_alloc_request(mapping, file,
folio_file_pos(folio), folio_size(folio),
@@ -460,7 +460,7 @@ retry:
ret = PTR_ERR(rreq);
goto error;
}
- rreq->no_unlock_folio = folio_index(folio);
+ rreq->no_unlock_folio = folio->index;
__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
ret = netfs_begin_cache_read(rreq, ctx);
@@ -518,7 +518,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len)
{
struct netfs_io_request *rreq;
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
struct netfs_inode *ctx = netfs_inode(mapping->host);
unsigned long long start = folio_pos(folio);
size_t flen = folio_size(folio);
@@ -535,7 +535,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
goto error;
}
- rreq->no_unlock_folio = folio_index(folio);
+ rreq->no_unlock_folio = folio->index;
__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
ret = netfs_begin_cache_read(rreq, ctx);
if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 93dc76f34e39..9a0d32e4b422 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -221,10 +221,11 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
if (unlikely(fault_in_iov_iter_readable(iter, part) == part))
break;
- ret = -ENOMEM;
folio = netfs_grab_folio_for_write(mapping, pos, part);
- if (!folio)
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
break;
+ }
flen = folio_size(folio);
offset = pos & (flen - 1);
@@ -343,7 +344,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
break;
default:
WARN(true, "Unexpected modify type %u ix=%lx\n",
- howto, folio_index(folio));
+ howto, folio->index);
ret = -EIO;
goto error_folio_unlock;
}
@@ -476,6 +477,9 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
_enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+ if (!iov_iter_count(from))
+ return 0;
+
if ((iocb->ki_flags & IOCB_DIRECT) ||
test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
return netfs_unbuffered_write_iter(iocb, from);
@@ -648,7 +652,7 @@ static void netfs_pages_written_back(struct netfs_io_request *wreq)
xas_for_each(&xas, folio, last) {
WARN(!folio_test_writeback(folio),
"bad %zx @%llx page %lx %lx\n",
- wreq->len, wreq->start, folio_index(folio), last);
+ wreq->len, wreq->start, folio->index, last);
if ((finfo = netfs_folio_info(folio))) {
/* Streaming writes cannot be redirtied whilst under
@@ -795,7 +799,7 @@ static void netfs_extend_writeback(struct address_space *mapping,
continue;
if (xa_is_value(folio))
break;
- if (folio_index(folio) != index) {
+ if (folio->index != index) {
xas_reset(xas);
break;
}
@@ -901,7 +905,7 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,
long count = wbc->nr_to_write;
int ret;
- _enter(",%lx,%llx-%llx,%u", folio_index(folio), start, end, caching);
+ _enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching);
wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio),
NETFS_WRITEBACK);
@@ -1047,7 +1051,7 @@ search_again:
start = folio_pos(folio); /* May regress with THPs */
- _debug("wback %lx", folio_index(folio));
+ _debug("wback %lx", folio->index);
/* At this point we hold neither the i_pages lock nor the page lock:
* the page may be truncated or invalidated (changing page->mapping to
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 60a40d293c87..bee047e20f5d 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -139,6 +139,9 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
_enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+ if (!iov_iter_count(from))
+ return 0;
+
trace_netfs_write_iter(iocb, from);
netfs_stat(&netfs_n_rh_dio_write);
@@ -146,7 +149,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret < 0)
return ret;
ret = generic_write_checks(iocb, from);
- if (ret < 0)
+ if (ret <= 0)
goto out;
ret = file_remove_privs(file);
if (ret < 0)
diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c
index d645f8b302a2..9397ed39b0b4 100644
--- a/fs/netfs/fscache_cache.c
+++ b/fs/netfs/fscache_cache.c
@@ -179,13 +179,14 @@ EXPORT_SYMBOL(fscache_acquire_cache);
void fscache_put_cache(struct fscache_cache *cache,
enum fscache_cache_trace where)
{
- unsigned int debug_id = cache->debug_id;
+ unsigned int debug_id;
bool zero;
int ref;
if (IS_ERR_OR_NULL(cache))
return;
+ debug_id = cache->debug_id;
zero = __refcount_dec_and_test(&cache->ref, &ref);
trace_fscache_cache(debug_id, ref - 1, where);
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index 4309edf33862..4261ad6c55b6 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -124,7 +124,7 @@ static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
/* We might have multiple writes from the same huge
* folio, but we mustn't unlock a folio more than once.
*/
- if (have_unlocked && folio_index(folio) <= unlocked)
+ if (have_unlocked && folio->index <= unlocked)
continue;
unlocked = folio_next_index(folio) - 1;
trace_netfs_folio(folio, netfs_folio_trace_end_copy);
@@ -748,6 +748,8 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
if (!rreq->submitted) {
netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
+ if (rreq->origin == NETFS_DIO_READ)
+ inode_dio_end(rreq->inode);
ret = 0;
goto out;
}
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 0e3af37fc924..90051ced8e2a 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -180,7 +180,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
struct netfs_folio *finfo = NULL;
size_t flen = folio_size(folio);
- _enter("{%lx},%zx,%zx", folio_index(folio), offset, length);
+ _enter("{%lx},%zx,%zx", folio->index, offset, length);
folio_wait_fscache(folio);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index b4294a8aa2d4..f1eeb4914199 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -108,7 +108,7 @@ struct pnfs_block_dev {
struct pnfs_block_dev *children;
u64 chunk_size;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
u64 disk_offset;
u64 pr_key;
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index c97ebc42ec0f..93ef7f864980 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -25,17 +25,17 @@ bl_free_device(struct pnfs_block_dev *dev)
} else {
if (dev->pr_registered) {
const struct pr_ops *ops =
- dev->bdev_handle->bdev->bd_disk->fops->pr_ops;
+ file_bdev(dev->bdev_file)->bd_disk->fops->pr_ops;
int error;
- error = ops->pr_register(dev->bdev_handle->bdev,
+ error = ops->pr_register(file_bdev(dev->bdev_file),
dev->pr_key, 0, false);
if (error)
pr_err("failed to unregister PR key.\n");
}
- if (dev->bdev_handle)
- bdev_release(dev->bdev_handle);
+ if (dev->bdev_file)
+ fput(dev->bdev_file);
}
}
@@ -169,7 +169,7 @@ static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
map->start = dev->start;
map->len = dev->len;
map->disk_offset = dev->disk_offset;
- map->bdev = dev->bdev_handle->bdev;
+ map->bdev = file_bdev(dev->bdev_file);
return true;
}
@@ -236,26 +236,26 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
dev_t dev;
dev = bl_resolve_deviceid(server, v, gfp_mask);
if (!dev)
return -EIO;
- bdev_handle = bdev_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
NULL, NULL);
- if (IS_ERR(bdev_handle)) {
+ if (IS_ERR(bdev_file)) {
printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
- MAJOR(dev), MINOR(dev), PTR_ERR(bdev_handle));
- return PTR_ERR(bdev_handle);
+ MAJOR(dev), MINOR(dev), PTR_ERR(bdev_file));
+ return PTR_ERR(bdev_file);
}
- d->bdev_handle = bdev_handle;
- d->len = bdev_nr_bytes(bdev_handle->bdev);
+ d->bdev_file = bdev_file;
+ d->len = bdev_nr_bytes(file_bdev(bdev_file));
d->map = bl_map_simple;
printk(KERN_INFO "pNFS: using block device %s\n",
- bdev_handle->bdev->bd_disk->disk_name);
+ file_bdev(bdev_file)->bd_disk->disk_name);
return 0;
}
@@ -300,10 +300,10 @@ bl_validate_designator(struct pnfs_block_volume *v)
}
}
-static struct bdev_handle *
+static struct file *
bl_open_path(struct pnfs_block_volume *v, const char *prefix)
{
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
const char *devname;
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN",
@@ -311,15 +311,15 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix)
if (!devname)
return ERR_PTR(-ENOMEM);
- bdev_handle = bdev_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE,
NULL, NULL);
- if (IS_ERR(bdev_handle)) {
+ if (IS_ERR(bdev_file)) {
pr_warn("pNFS: failed to open device %s (%ld)\n",
- devname, PTR_ERR(bdev_handle));
+ devname, PTR_ERR(bdev_file));
}
kfree(devname);
- return bdev_handle;
+ return bdev_file;
}
static int
@@ -327,7 +327,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
const struct pr_ops *ops;
int error;
@@ -340,14 +340,14 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
* On other distributions like Debian, the default SCSI by-id path will
* point to the dm-multipath device if one exists.
*/
- bdev_handle = bl_open_path(v, "dm-uuid-mpath-0x");
- if (IS_ERR(bdev_handle))
- bdev_handle = bl_open_path(v, "wwn-0x");
- if (IS_ERR(bdev_handle))
- return PTR_ERR(bdev_handle);
- d->bdev_handle = bdev_handle;
-
- d->len = bdev_nr_bytes(d->bdev_handle->bdev);
+ bdev_file = bl_open_path(v, "dm-uuid-mpath-0x");
+ if (IS_ERR(bdev_file))
+ bdev_file = bl_open_path(v, "wwn-0x");
+ if (IS_ERR(bdev_file))
+ return PTR_ERR(bdev_file);
+ d->bdev_file = bdev_file;
+
+ d->len = bdev_nr_bytes(file_bdev(d->bdev_file));
d->map = bl_map_simple;
d->pr_key = v->scsi.pr_key;
@@ -355,20 +355,20 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
return -ENODEV;
pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
- d->bdev_handle->bdev->bd_disk->disk_name, d->pr_key);
+ file_bdev(d->bdev_file)->bd_disk->disk_name, d->pr_key);
- ops = d->bdev_handle->bdev->bd_disk->fops->pr_ops;
+ ops = file_bdev(d->bdev_file)->bd_disk->fops->pr_ops;
if (!ops) {
pr_err("pNFS: block device %s does not support reservations.",
- d->bdev_handle->bdev->bd_disk->disk_name);
+ file_bdev(d->bdev_file)->bd_disk->disk_name);
error = -EINVAL;
goto out_blkdev_put;
}
- error = ops->pr_register(d->bdev_handle->bdev, 0, d->pr_key, true);
+ error = ops->pr_register(file_bdev(d->bdev_file), 0, d->pr_key, true);
if (error) {
pr_err("pNFS: failed to register key for block device %s.",
- d->bdev_handle->bdev->bd_disk->disk_name);
+ file_bdev(d->bdev_file)->bd_disk->disk_name);
goto out_blkdev_put;
}
@@ -376,7 +376,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
return 0;
out_blkdev_put:
- bdev_release(d->bdev_handle);
+ fput(d->bdev_file);
return error;
}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 760d27dd7225..8adfcd4c8c1a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -356,15 +356,12 @@ static const struct svc_version *nfs4_callback_version[] = {
[4] = &nfs4_callback_version4,
};
-static struct svc_stat nfs4_callback_stats;
-
static struct svc_program nfs4_callback_program = {
.pg_prog = NFS4_CALLBACK, /* RPC service number */
.pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
.pg_vers = nfs4_callback_version, /* version table */
.pg_name = "NFSv4 callback", /* service name */
.pg_class = "nfs", /* authentication class */
- .pg_stats = &nfs4_callback_stats,
.pg_authenticate = nfs_callback_authenticate,
.pg_init_request = svc_generic_init_request,
.pg_rpcbind_set = svc_generic_rpcbind_set,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 44eca51b2808..fbdc9ca80f71 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -246,7 +246,7 @@ void nfs_free_client(struct nfs_client *clp)
put_nfs_version(clp->cl_nfs_mod);
kfree(clp->cl_hostname);
kfree(clp->cl_acceptor);
- kfree(clp);
+ kfree_rcu(clp, rcu);
}
EXPORT_SYMBOL_GPL(nfs_free_client);
@@ -1006,6 +1006,14 @@ struct nfs_server *nfs_alloc_server(void)
}
EXPORT_SYMBOL_GPL(nfs_alloc_server);
+static void delayed_free(struct rcu_head *p)
+{
+ struct nfs_server *server = container_of(p, struct nfs_server, rcu);
+
+ nfs_free_iostats(server->io_stats);
+ kfree(server);
+}
+
/*
* Free up a server record
*/
@@ -1031,10 +1039,9 @@ void nfs_free_server(struct nfs_server *server)
ida_destroy(&server->lockowner_id);
ida_destroy(&server->openowner_id);
- nfs_free_iostats(server->io_stats);
put_cred(server->cred);
- kfree(server);
nfs_release_automount_timer();
+ call_rcu(&server->rcu, delayed_free);
}
EXPORT_SYMBOL_GPL(nfs_free_server);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index fa1a14def45c..d4a42ce0c7e3 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -156,8 +156,8 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
list = &flctx->flc_posix;
spin_lock(&flctx->flc_lock);
restart:
- list_for_each_entry(fl, list, fl_list) {
- if (nfs_file_open_context(fl->fl_file)->state != state)
+ for_each_file_lock(fl, list) {
+ if (nfs_file_open_context(fl->c.flc_file)->state != state)
continue;
spin_unlock(&flctx->flc_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c8ecbe999059..ac505671efbd 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1431,9 +1431,9 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry)
static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
{
struct inode *inode = d_inode(dentry);
- struct inode *dir = d_inode(dentry->d_parent);
+ struct inode *dir = d_inode_rcu(dentry->d_parent);
- if (!nfs_verify_change_attribute(dir, verf))
+ if (!dir || !nfs_verify_change_attribute(dir, verf))
return;
if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
nfs_set_verifier_delegated(&verf);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c03926a1cc73..7af5d270de28 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -1037,8 +1037,7 @@ int __init nfs_init_directcache(void)
{
nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
sizeof(struct nfs_direct_req),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ 0, SLAB_RECLAIM_ACCOUNT,
NULL);
if (nfs_direct_cachep == NULL)
return -ENOMEM;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8577ccf621f5..407c6e15afe2 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -720,15 +720,15 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
int status = 0;
- unsigned int saved_type = fl->fl_type;
+ unsigned int saved_type = fl->c.flc_type;
/* Try local locking first */
posix_test_lock(filp, fl);
- if (fl->fl_type != F_UNLCK) {
+ if (fl->c.flc_type != F_UNLCK) {
/* found a conflict */
goto out;
}
- fl->fl_type = saved_type;
+ fl->c.flc_type = saved_type;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
goto out_noconflict;
@@ -740,7 +740,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
out:
return status;
out_noconflict:
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
goto out;
}
@@ -765,7 +765,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
* If we're signalled while cleaning up locks on process exit, we
* still need to complete the unlock.
*/
- if (status < 0 && !(fl->fl_flags & FL_CLOSE))
+ if (status < 0 && !(fl->c.flc_flags & FL_CLOSE))
return status;
}
@@ -832,12 +832,12 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
int is_local = 0;
dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
- filp, fl->fl_type, fl->fl_flags,
+ filp, fl->c.flc_type, fl->c.flc_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
- if (fl->fl_flags & FL_RECLAIM)
+ if (fl->c.flc_flags & FL_RECLAIM)
return -ENOGRACE;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
@@ -851,7 +851,7 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
if (IS_GETLK(cmd))
ret = do_getlk(filp, cmd, fl, is_local);
- else if (fl->fl_type == F_UNLCK)
+ else if (lock_is_unlock(fl))
ret = do_unlk(filp, cmd, fl, is_local);
else
ret = do_setlk(filp, cmd, fl, is_local);
@@ -869,16 +869,16 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
int is_local = 0;
dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
- filp, fl->fl_type, fl->fl_flags);
+ filp, fl->c.flc_type, fl->c.flc_flags);
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
return -ENOLCK;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
is_local = 1;
/* We're simulating flock() locks using posix locks on the server */
- if (fl->fl_type == F_UNLCK)
+ if (lock_is_unlock(fl))
return do_unlk(filp, cmd, fl, is_local);
return do_setlk(filp, cmd, fl, is_local);
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ebb8d60e1152..93ea49a7eb61 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2372,7 +2372,7 @@ static int __init nfs_init_inodecache(void)
nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
sizeof(struct nfs_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (nfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2de66e4e8280..cbbe3f0193b8 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -963,7 +963,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
struct nfs_open_context *ctx = nfs_file_open_context(filp);
int status;
- if (fl->fl_flags & FL_CLOSE) {
+ if (fl->c.flc_flags & FL_CLOSE) {
l_ctx = nfs_get_lock_context(ctx);
if (IS_ERR(l_ctx))
l_ctx = NULL;
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index 49aaf28a6950..b6e3d8f77b91 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -1017,7 +1017,7 @@ int __init nfs4_xattr_cache_init(void)
nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
sizeof(struct nfs4_xattr_cache), 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ (SLAB_RECLAIM_ACCOUNT),
nfs4_xattr_cache_init_once);
if (nfs4_xattr_cache_cachep == NULL)
return -ENOMEM;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 581698f1b7b2..6ff41ceb9f1c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -330,7 +330,7 @@ extern int update_open_stateid(struct nfs4_state *state,
const nfs4_stateid *deleg_stateid,
fmode_t fmode);
extern int nfs4_proc_setlease(struct file *file, int arg,
- struct file_lock **lease, void **priv);
+ struct file_lease **lease, void **priv);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
extern void nfs4_update_changeattr(struct inode *dir,
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e238abc78a13..1cd9652f3c28 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -439,7 +439,7 @@ void nfs42_ssc_unregister_ops(void)
}
#endif /* CONFIG_NFS_V4_2 */
-static int nfs4_setlease(struct file *file, int arg, struct file_lock **lease,
+static int nfs4_setlease(struct file *file, int arg, struct file_lease **lease,
void **priv)
{
return nfs4_proc_setlease(file, arg, lease, priv);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 23819a756508..815996cb27fc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6800,7 +6800,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
switch (status) {
case 0:
- request->fl_type = F_UNLCK;
+ request->c.flc_type = F_UNLCK;
break;
case -NFS4ERR_DENIED:
status = 0;
@@ -7018,8 +7018,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
/* Ensure this is an unlock - when canceling a lock, the
* canceled lock is passed in, and it won't be an unlock.
*/
- fl->fl_type = F_UNLCK;
- if (fl->fl_flags & FL_CLOSE)
+ fl->c.flc_type = F_UNLCK;
+ if (fl->c.flc_flags & FL_CLOSE)
set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
@@ -7045,11 +7045,11 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
struct rpc_task *task;
struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
int status = 0;
- unsigned char fl_flags = request->fl_flags;
+ unsigned char saved_flags = request->c.flc_flags;
status = nfs4_set_lock_state(state, request);
/* Unlock _before_ we do the RPC call */
- request->fl_flags |= FL_EXISTS;
+ request->c.flc_flags |= FL_EXISTS;
/* Exclude nfs_delegation_claim_locks() */
mutex_lock(&sp->so_delegreturn_mutex);
/* Exclude nfs4_reclaim_open_stateid() - note nesting! */
@@ -7073,14 +7073,16 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
status = -ENOMEM;
if (IS_ERR(seqid))
goto out;
- task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid);
+ task = nfs4_do_unlck(request,
+ nfs_file_open_context(request->c.flc_file),
+ lsp, seqid);
status = PTR_ERR(task);
if (IS_ERR(task))
goto out;
status = rpc_wait_for_completion_task(task);
rpc_put_task(task);
out:
- request->fl_flags = fl_flags;
+ request->c.flc_flags = saved_flags;
trace_nfs4_unlock(request, state, F_SETLK, status);
return status;
}
@@ -7191,7 +7193,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
data->timestamp);
if (data->arg.new_lock && !data->cancelled) {
- data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
+ data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
goto out_restart;
}
@@ -7292,7 +7294,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE))
task_setup_data.flags |= RPC_TASK_MOVEABLE;
- data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
+ data = nfs4_alloc_lockdata(fl,
+ nfs_file_open_context(fl->c.flc_file),
fl->fl_u.nfs4_fl.owner, GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
@@ -7398,10 +7401,10 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
{
struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs4_state_owner *sp = state->owner;
- unsigned char fl_flags = request->fl_flags;
+ unsigned char flags = request->c.flc_flags;
int status;
- request->fl_flags |= FL_ACCESS;
+ request->c.flc_flags |= FL_ACCESS;
status = locks_lock_inode_wait(state->inode, request);
if (status < 0)
goto out;
@@ -7410,7 +7413,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
/* Yes: cache locks! */
/* ...but avoid races with delegation recall... */
- request->fl_flags = fl_flags & ~FL_SLEEP;
+ request->c.flc_flags = flags & ~FL_SLEEP;
status = locks_lock_inode_wait(state->inode, request);
up_read(&nfsi->rwsem);
mutex_unlock(&sp->so_delegreturn_mutex);
@@ -7420,7 +7423,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
mutex_unlock(&sp->so_delegreturn_mutex);
status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
out:
- request->fl_flags = fl_flags;
+ request->c.flc_flags = flags;
return status;
}
@@ -7562,7 +7565,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
if (!(IS_SETLK(cmd) || IS_SETLKW(cmd)))
return -EINVAL;
- if (request->fl_type == F_UNLCK) {
+ if (lock_is_unlock(request)) {
if (state != NULL)
return nfs4_proc_unlck(state, cmd, request);
return 0;
@@ -7571,7 +7574,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
if (state == NULL)
return -ENOLCK;
- if ((request->fl_flags & FL_POSIX) &&
+ if ((request->c.flc_flags & FL_POSIX) &&
!test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
return -ENOLCK;
@@ -7579,7 +7582,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
* Don't rely on the VFS having checked the file open mode,
* since it won't do this for flock() locks.
*/
- switch (request->fl_type) {
+ switch (request->c.flc_type) {
case F_RDLCK:
if (!(filp->f_mode & FMODE_READ))
return -EBADF;
@@ -7601,7 +7604,7 @@ static int nfs4_delete_lease(struct file *file, void **priv)
return generic_setlease(file, F_UNLCK, NULL, priv);
}
-static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease,
+static int nfs4_add_lease(struct file *file, int arg, struct file_lease **lease,
void **priv)
{
struct inode *inode = file_inode(file);
@@ -7619,7 +7622,7 @@ static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease,
return -EAGAIN;
}
-int nfs4_proc_setlease(struct file *file, int arg, struct file_lock **lease,
+int nfs4_proc_setlease(struct file *file, int arg, struct file_lease **lease,
void **priv)
{
switch (arg) {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9a5d911a7edc..8cfabdbda336 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -847,15 +847,15 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
*/
static struct nfs4_lock_state *
__nfs4_find_lock_state(struct nfs4_state *state,
- fl_owner_t fl_owner, fl_owner_t fl_owner2)
+ fl_owner_t owner, fl_owner_t owner2)
{
struct nfs4_lock_state *pos, *ret = NULL;
list_for_each_entry(pos, &state->lock_states, ls_locks) {
- if (pos->ls_owner == fl_owner) {
+ if (pos->ls_owner == owner) {
ret = pos;
break;
}
- if (pos->ls_owner == fl_owner2)
+ if (pos->ls_owner == owner2)
ret = pos;
}
if (ret)
@@ -868,7 +868,7 @@ __nfs4_find_lock_state(struct nfs4_state *state,
* exists, return an uninitialized one.
*
*/
-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t owner)
{
struct nfs4_lock_state *lsp;
struct nfs_server *server = state->owner->so_server;
@@ -879,7 +879,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
nfs4_init_seqid_counter(&lsp->ls_seqid);
refcount_set(&lsp->ls_count, 1);
lsp->ls_state = state;
- lsp->ls_owner = fl_owner;
+ lsp->ls_owner = owner;
lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT);
if (lsp->ls_seqid.owner_id < 0)
goto out_free;
@@ -980,7 +980,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
if (fl->fl_ops != NULL)
return 0;
- lsp = nfs4_get_lock_state(state, fl->fl_owner);
+ lsp = nfs4_get_lock_state(state, fl->c.flc_owner);
if (lsp == NULL)
return -ENOMEM;
fl->fl_u.nfs4_fl.owner = lsp;
@@ -993,7 +993,7 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
const struct nfs_lock_context *l_ctx)
{
struct nfs4_lock_state *lsp;
- fl_owner_t fl_owner, fl_flock_owner;
+ fl_owner_t owner, fl_flock_owner;
int ret = -ENOENT;
if (l_ctx == NULL)
@@ -1002,11 +1002,11 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
goto out;
- fl_owner = l_ctx->lockowner;
+ owner = l_ctx->lockowner;
fl_flock_owner = l_ctx->open_context->flock_owner;
spin_lock(&state->state_lock);
- lsp = __nfs4_find_lock_state(state, fl_owner, fl_flock_owner);
+ lsp = __nfs4_find_lock_state(state, owner, fl_flock_owner);
if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
ret = -EIO;
else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
@@ -1529,8 +1529,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
down_write(&nfsi->rwsem);
spin_lock(&flctx->flc_lock);
restart:
- list_for_each_entry(fl, list, fl_list) {
- if (nfs_file_open_context(fl->fl_file)->state != state)
+ for_each_file_lock(fl, list) {
+ if (nfs_file_open_context(fl->c.flc_file)->state != state)
continue;
spin_unlock(&flctx->flc_lock);
status = ops->recover_lock(state, fl);
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index d27919d7241d..fd7cb15b08b2 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -699,7 +699,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
__entry->error = error < 0 ? -error : 0;
__entry->cmd = cmd;
- __entry->type = request->fl_type;
+ __entry->type = request->c.flc_type;
__entry->start = request->fl_start;
__entry->end = request->fl_end;
__entry->dev = inode->i_sb->s_dev;
@@ -771,7 +771,7 @@ TRACE_EVENT(nfs4_set_lock,
__entry->error = error < 0 ? -error : 0;
__entry->cmd = cmd;
- __entry->type = request->fl_type;
+ __entry->type = request->c.flc_type;
__entry->start = request->fl_start;
__entry->end = request->fl_end;
__entry->dev = inode->i_sb->s_dev;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 69406e60f391..1416099dfcd1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1305,7 +1305,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
static inline int nfs4_lock_type(struct file_lock *fl, int block)
{
- if (fl->fl_type == F_RDLCK)
+ if (lock_is_read(fl))
return block ? NFS4_READW_LT : NFS4_READ_LT;
return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
}
@@ -5052,10 +5052,10 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
fl->fl_end = fl->fl_start + (loff_t)length - 1;
if (length == ~(uint64_t)0)
fl->fl_end = OFFSET_MAX;
- fl->fl_type = F_WRLCK;
+ fl->c.flc_type = F_WRLCK;
if (type & 1)
- fl->fl_type = F_RDLCK;
- fl->fl_pid = 0;
+ fl->c.flc_type = F_RDLCK;
+ fl->c.flc_pid = 0;
}
p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index bb79d3a886ae..84bb85264572 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1301,7 +1301,7 @@ static bool
is_whole_file_wrlock(struct file_lock *fl)
{
return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
- fl->fl_type == F_WRLCK;
+ lock_is_write(fl);
}
/* If we know the page is up to date, and we're not using byte range locks (or
@@ -1335,13 +1335,13 @@ static int nfs_can_extend_write(struct file *file, struct folio *folio,
spin_lock(&flctx->flc_lock);
if (!list_empty(&flctx->flc_posix)) {
fl = list_first_entry(&flctx->flc_posix, struct file_lock,
- fl_list);
+ c.flc_list);
if (is_whole_file_wrlock(fl))
ret = 1;
} else if (!list_empty(&flctx->flc_flock)) {
fl = list_first_entry(&flctx->flc_flock, struct file_lock,
- fl_list);
- if (fl->fl_type == F_WRLCK)
+ c.flc_list);
+ if (lock_is_write(fl))
ret = 1;
}
spin_unlock(&flctx->flc_lock);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 46fd74d91ea9..3c040c81c77d 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -328,10 +328,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
}
static void
-nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
+nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
- struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
+ struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp), 0, true);
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 4cbe0434cbb8..66a05fefae98 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -80,8 +80,6 @@ enum {
int nfsd_drc_slab_create(void);
void nfsd_drc_slab_free(void);
-int nfsd_net_reply_cache_init(struct nfsd_net *nn);
-void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 9cb7f0c33df5..ddd3e0d9cfa6 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -61,13 +61,10 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
struct nfsd_fcache_disposal {
- struct work_struct work;
spinlock_t lock;
struct list_head freeme;
};
-static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
-
static struct kmem_cache *nfsd_file_slab;
static struct kmem_cache *nfsd_file_mark_slab;
static struct list_lru nfsd_file_lru;
@@ -283,7 +280,7 @@ nfsd_file_free(struct nfsd_file *nf)
nfsd_file_mark_put(nf->nf_mark);
if (nf->nf_file) {
nfsd_file_check_write_error(nf);
- filp_close(nf->nf_file, NULL);
+ nfsd_filp_close(nf->nf_file);
}
/*
@@ -421,7 +418,37 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
spin_lock(&l->lock);
list_move_tail(&nf->nf_lru, &l->freeme);
spin_unlock(&l->lock);
- queue_work(nfsd_filecache_wq, &l->work);
+ svc_wake_up(nn->nfsd_serv);
+ }
+}
+
+/**
+ * nfsd_file_net_dispose - deal with nfsd_files waiting to be disposed.
+ * @nn: nfsd_net in which to find files to be disposed.
+ *
+ * When files held open for nfsv3 are removed from the filecache, whether
+ * due to memory pressure or garbage collection, they are queued to
+ * a per-net-ns queue. This function completes the disposal, either
+ * directly or by waking another nfsd thread to help with the work.
+ */
+void nfsd_file_net_dispose(struct nfsd_net *nn)
+{
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+
+ if (!list_empty(&l->freeme)) {
+ LIST_HEAD(dispose);
+ int i;
+
+ spin_lock(&l->lock);
+ for (i = 0; i < 8 && !list_empty(&l->freeme); i++)
+ list_move(l->freeme.next, &dispose);
+ spin_unlock(&l->lock);
+ if (!list_empty(&l->freeme))
+ /* Wake up another thread to share the work
+ * *before* doing any actual disposing.
+ */
+ svc_wake_up(nn->nfsd_serv);
+ nfsd_file_dispose_list(&dispose);
}
}
@@ -631,28 +658,6 @@ nfsd_file_close_inode_sync(struct inode *inode)
list_del_init(&nf->nf_lru);
nfsd_file_free(nf);
}
- flush_delayed_fput();
-}
-
-/**
- * nfsd_file_delayed_close - close unused nfsd_files
- * @work: dummy
- *
- * Scrape the freeme list for this nfsd_net, and then dispose of them
- * all.
- */
-static void
-nfsd_file_delayed_close(struct work_struct *work)
-{
- LIST_HEAD(head);
- struct nfsd_fcache_disposal *l = container_of(work,
- struct nfsd_fcache_disposal, work);
-
- spin_lock(&l->lock);
- list_splice_init(&l->freeme, &head);
- spin_unlock(&l->lock);
-
- nfsd_file_dispose_list(&head);
}
static int
@@ -662,8 +667,8 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
struct file_lock *fl = data;
/* Only close files for F_SETLEASE leases */
- if (fl->fl_flags & FL_LEASE)
- nfsd_file_close_inode(file_inode(fl->fl_file));
+ if (fl->c.flc_flags & FL_LEASE)
+ nfsd_file_close_inode(file_inode(fl->c.flc_file));
return 0;
}
@@ -717,25 +722,18 @@ nfsd_file_cache_init(void)
return ret;
ret = -ENOMEM;
- nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", WQ_UNBOUND, 0);
- if (!nfsd_filecache_wq)
- goto out;
-
- nfsd_file_slab = kmem_cache_create("nfsd_file",
- sizeof(struct nfsd_file), 0, 0, NULL);
+ nfsd_file_slab = KMEM_CACHE(nfsd_file, 0);
if (!nfsd_file_slab) {
pr_err("nfsd: unable to create nfsd_file_slab\n");
goto out_err;
}
- nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
- sizeof(struct nfsd_file_mark), 0, 0, NULL);
+ nfsd_file_mark_slab = KMEM_CACHE(nfsd_file_mark, 0);
if (!nfsd_file_mark_slab) {
pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
goto out_err;
}
-
ret = list_lru_init(&nfsd_file_lru);
if (ret) {
pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
@@ -785,8 +783,6 @@ out_err:
nfsd_file_slab = NULL;
kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL;
- destroy_workqueue(nfsd_filecache_wq);
- nfsd_filecache_wq = NULL;
rhltable_destroy(&nfsd_file_rhltable);
goto out;
}
@@ -832,7 +828,6 @@ nfsd_alloc_fcache_disposal(void)
l = kmalloc(sizeof(*l), GFP_KERNEL);
if (!l)
return NULL;
- INIT_WORK(&l->work, nfsd_file_delayed_close);
spin_lock_init(&l->lock);
INIT_LIST_HEAD(&l->freeme);
return l;
@@ -841,7 +836,6 @@ nfsd_alloc_fcache_disposal(void)
static void
nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
{
- cancel_work_sync(&l->work);
nfsd_file_dispose_list(&l->freeme);
kfree(l);
}
@@ -910,8 +904,6 @@ nfsd_file_cache_shutdown(void)
fsnotify_wait_marks_destroyed();
kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL;
- destroy_workqueue(nfsd_filecache_wq);
- nfsd_filecache_wq = NULL;
rhltable_destroy(&nfsd_file_rhltable);
for_each_possible_cpu(i) {
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index e54165a3224f..c61884def906 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -56,6 +56,7 @@ void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
+void nfsd_file_net_dispose(struct nfsd_net *nn);
bool nfsd_file_is_cached(struct inode *inode);
__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 74b4360779a1..d4be519b5734 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -11,8 +11,10 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/filelock.h>
+#include <linux/nfs4.h>
#include <linux/percpu_counter.h>
#include <linux/siphash.h>
+#include <linux/sunrpc/stats.h>
/* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4
@@ -26,10 +28,22 @@ struct nfsd4_client_tracking_ops;
enum {
/* cache misses due only to checksum comparison failures */
- NFSD_NET_PAYLOAD_MISSES,
+ NFSD_STATS_PAYLOAD_MISSES,
/* amount of memory (in bytes) currently consumed by the DRC */
- NFSD_NET_DRC_MEM_USAGE,
- NFSD_NET_COUNTERS_NUM
+ NFSD_STATS_DRC_MEM_USAGE,
+ NFSD_STATS_RC_HITS, /* repcache hits */
+ NFSD_STATS_RC_MISSES, /* repcache misses */
+ NFSD_STATS_RC_NOCACHE, /* uncached reqs */
+ NFSD_STATS_FH_STALE, /* FH stale error */
+ NFSD_STATS_IO_READ, /* bytes returned to read requests */
+ NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
+#ifdef CONFIG_NFSD_V4
+ NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
+ NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
+#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
+ NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
+#endif
+ NFSD_STATS_COUNTERS_NUM
};
/*
@@ -164,7 +178,10 @@ struct nfsd_net {
atomic_t num_drc_entries;
/* Per-netns stats counters */
- struct percpu_counter counter[NFSD_NET_COUNTERS_NUM];
+ struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
+
+ /* sunrpc svc stats */
+ struct svc_stat nfsd_svcstats;
/* longest hash chain seen */
unsigned int longest_chain;
@@ -192,6 +209,10 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker *nfsd_client_shrinker;
struct work_struct nfsd_shrinker_work;
+
+ /* last time an admin-revoke happened for NFSv4.0 */
+ time64_t nfs40_last_revoke;
+
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b78eceebd945..dfcc957e460d 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -71,13 +71,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp)
struct nfsd_attrs attrs = {
.na_iattr = &argp->attrs,
};
+ const struct timespec64 *guardtime = NULL;
dprintk("nfsd: SETATTR(3) %s\n",
SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
- resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs,
- argp->check_guard, argp->guardtime);
+ if (argp->check_guard)
+ guardtime = &argp->guardtime;
+ resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime);
return rpc_success;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index f32128955ec8..a7a07470c1f8 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -295,17 +295,14 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
static bool
svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args)
{
- __be32 *p;
u32 check;
if (xdr_stream_decode_bool(xdr, &check) < 0)
return false;
if (check) {
- p = xdr_inline_decode(xdr, XDR_UNIT * 2);
- if (!p)
+ if (!svcxdr_decode_nfstime3(xdr, &args->guardtime))
return false;
args->check_guard = 1;
- args->guardtime = be32_to_cpup(p);
} else
args->check_guard = 0;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 926c29879c6a..87c9547989f6 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -45,7 +45,7 @@
#define NFSDDBG_FACILITY NFSDDBG_PROC
-static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
+static void nfsd4_mark_cb_fault(struct nfs4_client *clp);
#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
@@ -85,7 +85,21 @@ static void encode_uint32(struct xdr_stream *xdr, u32 n)
static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
size_t len)
{
- WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0);
+ xdr_stream_encode_uint32_array(xdr, bitmap, len);
+}
+
+static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
+ struct nfs4_cb_fattr *fattr)
+{
+ fattr->ncf_cb_change = 0;
+ fattr->ncf_cb_fsize = 0;
+ if (bitmap[0] & FATTR4_WORD0_CHANGE)
+ if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
+ return -NFSERR_BAD_XDR;
+ if (bitmap[0] & FATTR4_WORD0_SIZE)
+ if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
+ return -NFSERR_BAD_XDR;
+ return 0;
}
static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
@@ -334,6 +348,30 @@ encode_cb_recallany4args(struct xdr_stream *xdr,
}
/*
+ * CB_GETATTR4args
+ * struct CB_GETATTR4args {
+ * nfs_fh4 fh;
+ * bitmap4 attr_request;
+ * };
+ *
+ * The size and change attributes are the only one
+ * guaranteed to be serviced by the client.
+ */
+static void
+encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
+ struct nfs4_cb_fattr *fattr)
+{
+ struct nfs4_delegation *dp =
+ container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
+ struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
+
+ encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
+ encode_nfs_fh4(xdr, fh);
+ encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap));
+ hdr->nops++;
+}
+
+/*
* CB_SEQUENCE4args
*
* struct CB_SEQUENCE4args {
@@ -469,6 +507,26 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
}
/*
+ * 20.1. Operation 3: CB_GETATTR - Get Attributes
+ */
+static void nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_getattr4args(xdr, &hdr, ncf);
+ encode_cb_nops(&hdr);
+}
+
+/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -524,6 +582,42 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
}
/*
+ * 20.1. Operation 3: CB_GETATTR - Get Attributes
+ */
+static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+ u32 bitmap[3] = {0};
+ u32 attrlen;
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
+ status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
+ if (status)
+ return status;
+ if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
+ return -NFSERR_BAD_XDR;
+ if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
+ return -NFSERR_BAD_XDR;
+ if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
+ return -NFSERR_BAD_XDR;
+ status = decode_cb_fattr4(xdr, bitmap, ncf);
+ return status;
+}
+
+/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
@@ -674,7 +768,7 @@ static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
const struct nfsd4_callback *cb = data;
const struct nfsd4_blocked_lock *nbl =
container_of(cb, struct nfsd4_blocked_lock, nbl_cb);
- struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner;
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.c.flc_owner;
struct nfs4_cb_compound_hdr hdr = {
.ident = 0,
.minorversion = cb->cb_clp->cl_minorversion,
@@ -831,6 +925,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
+ PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
@@ -887,7 +982,16 @@ static struct workqueue_struct *callback_wq;
static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
{
- return queue_work(callback_wq, &cb->cb_work);
+ trace_nfsd_cb_queue(cb->cb_clp, cb);
+ return queue_delayed_work(callback_wq, &cb->cb_work, 0);
+}
+
+static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb,
+ unsigned long msecs)
+{
+ trace_nfsd_cb_queue(cb->cb_clp, cb);
+ queue_delayed_work(callback_wq, &cb->cb_work,
+ msecs_to_jiffies(msecs));
}
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
@@ -999,18 +1103,18 @@ static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate)
{
if (clp->cl_cb_state != newstate) {
clp->cl_cb_state = newstate;
- trace_nfsd_cb_state(clp);
+ trace_nfsd_cb_new_state(clp);
}
}
-static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
+static void nfsd4_mark_cb_down(struct nfs4_client *clp)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN);
}
-static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
+static void nfsd4_mark_cb_fault(struct nfs4_client *clp)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
@@ -1022,7 +1126,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
if (task->tk_status)
- nfsd4_mark_cb_down(clp, task->tk_status);
+ nfsd4_mark_cb_down(clp);
else
nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
}
@@ -1106,6 +1210,7 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
+ trace_nfsd_cb_destroy(clp, cb);
nfsd41_cb_release_slot(cb);
if (cb->cb_ops && cb->cb_ops->release)
cb->cb_ops->release(cb);
@@ -1158,6 +1263,8 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
if (!cb->cb_holds_slot)
goto need_restart;
+ /* This is the operation status code for CB_SEQUENCE */
+ trace_nfsd_cb_seq_status(task, cb);
switch (cb->cb_seq_status) {
case 0:
/*
@@ -1171,13 +1278,23 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
break;
case -ESERVERFAULT:
++session->se_cb_seq_nr;
- fallthrough;
+ nfsd4_mark_cb_fault(cb->cb_clp);
+ ret = false;
+ break;
case 1:
+ /*
+ * cb_seq_status remains 1 if an RPC Reply was never
+ * received. NFSD can't know if the client processed
+ * the CB_SEQUENCE operation. Ask the client to send a
+ * DESTROY_SESSION to recover.
+ */
+ fallthrough;
case -NFS4ERR_BADSESSION:
- nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
+ nfsd4_mark_cb_fault(cb->cb_clp);
ret = false;
- break;
+ goto need_restart;
case -NFS4ERR_DELAY:
+ cb->cb_seq_status = 1;
if (!rpc_restart_call(task))
goto out;
@@ -1192,14 +1309,11 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
}
break;
default:
- nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
- dprintk("%s: unprocessed error %d\n", __func__,
- cb->cb_seq_status);
+ nfsd4_mark_cb_fault(cb->cb_clp);
}
-
nfsd41_cb_release_slot(cb);
- dprintk("%s: freed slot, new seqid=%d\n", __func__,
- clp->cl_cb_session->se_cb_seq_nr);
+
+ trace_nfsd_cb_free_slot(task, cb);
if (RPC_SIGNALLED(task))
goto need_restart;
@@ -1211,6 +1325,7 @@ retry_nowait:
goto out;
need_restart:
if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
+ trace_nfsd_cb_restart(clp, cb);
task->tk_status = 0;
cb->cb_need_restart = true;
}
@@ -1240,7 +1355,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
case -EIO:
case -ETIMEDOUT:
case -EACCES:
- nfsd4_mark_cb_down(clp, task->tk_status);
+ nfsd4_mark_cb_down(clp);
}
break;
default:
@@ -1295,12 +1410,13 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
nfsd41_cb_inflight_wait_complete(clp);
}
-/* requires cl_lock: */
static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
{
struct nfsd4_session *s;
struct nfsd4_conn *c;
+ lockdep_assert_held(&clp->cl_lock);
+
list_for_each_entry(s, &clp->cl_sessions, se_perclnt) {
list_for_each_entry(c, &s->se_conns, cn_persession) {
if (c->cn_flags & NFS4_CDFC4_BACK)
@@ -1324,11 +1440,14 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
struct nfsd4_conn *c;
int err;
+ trace_nfsd_cb_bc_update(clp, cb);
+
/*
* This is either an update, or the client dying; in either case,
* kill the old client:
*/
if (clp->cl_cb_client) {
+ trace_nfsd_cb_bc_shutdown(clp, cb);
rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL;
put_cred(clp->cl_cb_cred);
@@ -1340,13 +1459,15 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
}
if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
return;
+
spin_lock(&clp->cl_lock);
/*
* Only serialized callback code is allowed to clear these
* flags; main nfsd code can only set them:
*/
- BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
+ WARN_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
+
memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
c = __nfsd4_find_backchannel(clp);
if (c) {
@@ -1358,7 +1479,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses);
if (err) {
- nfsd4_mark_cb_down(clp, err);
+ nfsd4_mark_cb_down(clp);
if (c)
svc_xprt_put(c->cn_xprt);
return;
@@ -1369,25 +1490,28 @@ static void
nfsd4_run_cb_work(struct work_struct *work)
{
struct nfsd4_callback *cb =
- container_of(work, struct nfsd4_callback, cb_work);
+ container_of(work, struct nfsd4_callback, cb_work.work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
int flags;
- if (cb->cb_need_restart) {
- cb->cb_need_restart = false;
- } else {
- if (cb->cb_ops && cb->cb_ops->prepare)
- cb->cb_ops->prepare(cb);
- }
+ trace_nfsd_cb_start(clp);
if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
if (!clnt) {
- /* Callback channel broken, or client killed; give up: */
- nfsd41_destroy_cb(cb);
+ if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
+ nfsd41_destroy_cb(cb);
+ else {
+ /*
+ * XXX: Ideally, we could wait for the client to
+ * reconnect, but I haven't figured out how
+ * to do that yet.
+ */
+ nfsd4_queue_cb_delayed(cb, 25);
+ }
return;
}
@@ -1400,6 +1524,12 @@ nfsd4_run_cb_work(struct work_struct *work)
return;
}
+ if (cb->cb_need_restart) {
+ cb->cb_need_restart = false;
+ } else {
+ if (cb->cb_ops && cb->cb_ops->prepare)
+ cb->cb_ops->prepare(cb);
+ }
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
@@ -1414,8 +1544,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
cb->cb_ops = ops;
- INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
- cb->cb_seq_status = 1;
+ INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
cb->cb_need_restart = false;
cb->cb_holds_slot = false;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 5e8096bc5eaa..4f3072b5979a 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -25,7 +25,7 @@ static struct kmem_cache *nfs4_layout_cache;
static struct kmem_cache *nfs4_layout_stateid_cache;
static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
-static const struct lock_manager_operations nfsd4_layouts_lm_ops;
+static const struct lease_manager_operations nfsd4_layouts_lm_ops;
const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
#ifdef CONFIG_NFSD_FLEXFILELAYOUT
@@ -152,6 +152,23 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
#endif
}
+void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
+{
+ struct nfsd_file *fl;
+
+ spin_lock(&ls->ls_stid.sc_file->fi_lock);
+ fl = ls->ls_file;
+ ls->ls_file = NULL;
+ spin_unlock(&ls->ls_stid.sc_file->fi_lock);
+
+ if (fl) {
+ if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
+ kernel_setlease(fl->nf_file, F_UNLCK, NULL,
+ (void **)&ls);
+ nfsd_file_put(fl);
+ }
+}
+
static void
nfsd4_free_layout_stateid(struct nfs4_stid *stid)
{
@@ -169,9 +186,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
list_del_init(&ls->ls_perfile);
spin_unlock(&fp->fi_lock);
- if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
- vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
- nfsd_file_put(ls->ls_file);
+ nfsd4_close_layout(ls);
if (ls->ls_recalled)
atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -182,27 +197,26 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
static int
nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
{
- struct file_lock *fl;
+ struct file_lease *fl;
int status;
if (nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
return 0;
- fl = locks_alloc_lock();
+ fl = locks_alloc_lease();
if (!fl)
return -ENOMEM;
- locks_init_lock(fl);
+ locks_init_lease(fl);
fl->fl_lmops = &nfsd4_layouts_lm_ops;
- fl->fl_flags = FL_LAYOUT;
- fl->fl_type = F_RDLCK;
- fl->fl_end = OFFSET_MAX;
- fl->fl_owner = ls;
- fl->fl_pid = current->tgid;
- fl->fl_file = ls->ls_file->nf_file;
-
- status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
+ fl->c.flc_flags = FL_LAYOUT;
+ fl->c.flc_type = F_RDLCK;
+ fl->c.flc_owner = ls;
+ fl->c.flc_pid = current->tgid;
+ fl->c.flc_file = ls->ls_file->nf_file;
+
+ status = kernel_setlease(fl->c.flc_file, fl->c.flc_type, &fl, NULL);
if (status) {
- locks_free_lock(fl);
+ locks_free_lease(fl);
return status;
}
BUG_ON(fl != NULL);
@@ -236,7 +250,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops,
NFSPROC4_CLNT_CB_LAYOUT);
- if (parent->sc_type == NFS4_DELEG_STID)
+ if (parent->sc_type == SC_TYPE_DELEG)
ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
else
ls->ls_file = find_any_file(fp);
@@ -250,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
}
spin_lock(&clp->cl_lock);
- stp->sc_type = NFS4_LAYOUT_STID;
+ stp->sc_type = SC_TYPE_LAYOUT;
list_add(&ls->ls_perclnt, &clp->cl_lo_states);
spin_unlock(&clp->cl_lock);
@@ -269,13 +283,13 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
{
struct nfs4_layout_stateid *ls;
struct nfs4_stid *stid;
- unsigned char typemask = NFS4_LAYOUT_STID;
+ unsigned short typemask = SC_TYPE_LAYOUT;
__be32 status;
if (create)
- typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
+ typemask |= (SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG);
- status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
+ status = nfsd4_lookup_stateid(cstate, stateid, typemask, 0, &stid,
net_generic(SVC_NET(rqstp), nfsd_net_id));
if (status)
goto out;
@@ -286,7 +300,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
goto out_put_stid;
}
- if (stid->sc_type != NFS4_LAYOUT_STID) {
+ if (stid->sc_type != SC_TYPE_LAYOUT) {
ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type);
nfs4_put_stid(stid);
@@ -518,7 +532,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
lrp->lrs_present = true;
} else {
trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid);
- nfs4_unhash_stid(&ls->ls_stid);
+ ls->ls_stid.sc_status |= SC_STATUS_CLOSED;
lrp->lrs_present = false;
}
spin_unlock(&ls->ls_lock);
@@ -605,7 +619,7 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
}
static void
-nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
+nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
char addr_str[INET6_ADDRSTRLEN];
@@ -627,7 +641,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
argv[0] = (char *)nfsd_recall_failed;
argv[1] = addr_str;
- argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
+ argv[2] = file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[3] = NULL;
error = call_usermodehelper(nfsd_recall_failed, argv, envp,
@@ -657,6 +671,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
struct nfsd_net *nn;
ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops;
+ struct nfsd_file *fl;
trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
switch (task->tk_status) {
@@ -688,12 +703,17 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
* Unknown error or non-responding client, we'll need to fence.
*/
trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
-
- ops = nfsd4_layout_ops[ls->ls_layout_type];
- if (ops->fence_client)
- ops->fence_client(ls);
- else
- nfsd4_cb_layout_fail(ls);
+ rcu_read_lock();
+ fl = nfsd_file_get(ls->ls_file);
+ rcu_read_unlock();
+ if (fl) {
+ ops = nfsd4_layout_ops[ls->ls_layout_type];
+ if (ops->fence_client)
+ ops->fence_client(ls, fl);
+ else
+ nfsd4_cb_layout_fail(ls, fl);
+ nfsd_file_put(fl);
+ }
return 1;
case -NFS4ERR_NOMATCHING_LAYOUT:
trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
@@ -723,7 +743,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops = {
};
static bool
-nfsd4_layout_lm_break(struct file_lock *fl)
+nfsd4_layout_lm_break(struct file_lease *fl)
{
/*
* We don't want the locks code to timeout the lease for us;
@@ -731,19 +751,19 @@ nfsd4_layout_lm_break(struct file_lock *fl)
* in time:
*/
fl->fl_break_time = 0;
- nfsd4_recall_file_layout(fl->fl_owner);
+ nfsd4_recall_file_layout(fl->c.flc_owner);
return false;
}
static int
-nfsd4_layout_lm_change(struct file_lock *onlist, int arg,
+nfsd4_layout_lm_change(struct file_lease *onlist, int arg,
struct list_head *dispose)
{
BUG_ON(!(arg & F_UNLCK));
return lease_modify(onlist, arg, dispose);
}
-static const struct lock_manager_operations nfsd4_layouts_lm_ops = {
+static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
.lm_break = nfsd4_layout_lm_break,
.lm_change = nfsd4_layout_lm_change,
};
@@ -756,13 +776,11 @@ nfsd4_init_pnfs(void)
for (i = 0; i < DEVID_HASH_SIZE; i++)
INIT_LIST_HEAD(&nfsd_devid_hash[i]);
- nfs4_layout_cache = kmem_cache_create("nfs4_layout",
- sizeof(struct nfs4_layout), 0, 0, NULL);
+ nfs4_layout_cache = KMEM_CACHE(nfs4_layout, 0);
if (!nfs4_layout_cache)
return -ENOMEM;
- nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid",
- sizeof(struct nfs4_layout_stateid), 0, 0, NULL);
+ nfs4_layout_stateid_cache = KMEM_CACHE(nfs4_layout_stateid, 0);
if (!nfs4_layout_stateid_cache) {
kmem_cache_destroy(nfs4_layout_cache);
return -ENOMEM;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 14712fa08f76..2927b1263f08 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1143,6 +1143,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
};
struct inode *inode;
__be32 status = nfs_ok;
+ bool save_no_wcc;
int err;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
@@ -1168,8 +1169,10 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs,
- 0, (time64_t)0);
+ save_no_wcc = cstate->current_fh.fh_no_wcc;
+ cstate->current_fh.fh_no_wcc = true;
+ status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, NULL);
+ cstate->current_fh.fh_no_wcc = save_no_wcc;
if (!status)
status = nfserrno(attrs.na_labelerr);
if (!status)
@@ -2490,10 +2493,10 @@ nfsd4_proc_null(struct svc_rqst *rqstp)
return rpc_success;
}
-static inline void nfsd4_increment_op_stats(u32 opnum)
+static inline void nfsd4_increment_op_stats(struct nfsd_net *nn, u32 opnum)
{
if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_NFS4_OP(opnum)]);
}
static const struct nfsd4_operation nfsd4_ops[];
@@ -2768,7 +2771,7 @@ encode_op:
status, nfsd4_op_name(op->opnum));
nfsd4_cstate_clear_replay(cstate);
- nfsd4_increment_op_stats(op->opnum);
+ nfsd4_increment_op_stats(nn, op->opnum);
}
fh_put(current_fh);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2fa54cfd4882..1a93c7fcf76c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -87,6 +87,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
void nfsd4_end_grace(struct nfsd_net *nn);
static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
static void nfsd4_file_hash_remove(struct nfs4_file *fi);
+static void deleg_reaper(struct nfsd_net *nn);
/* Locking: */
@@ -127,6 +128,7 @@ static void free_session(struct nfsd4_session *);
static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
static struct workqueue_struct *laundry_wq;
@@ -318,6 +320,7 @@ free_nbl(struct kref *kref)
struct nfsd4_blocked_lock *nbl;
nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref);
+ locks_release_private(&nbl->nbl_lock);
kfree(nbl);
}
@@ -325,7 +328,6 @@ static void
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
{
locks_delete_block(&nbl->nbl_lock);
- locks_release_private(&nbl->nbl_lock);
kref_put(&nbl->nbl_kref, free_nbl);
}
@@ -1189,6 +1191,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
+ nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
+ &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
+ dp->dl_cb_fattr.ncf_file_modified = false;
+ dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp;
@@ -1210,6 +1216,8 @@ nfs4_put_stid(struct nfs4_stid *s)
return;
}
idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
+ atomic_dec(&s->sc_client->cl_admin_revoked);
nfs4_free_cpntf_statelist(clp->net, s);
spin_unlock(&clp->cl_lock);
s->sc_free(s);
@@ -1249,7 +1257,7 @@ static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
WARN_ON_ONCE(!fp->fi_delegees);
- vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
+ kernel_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
put_deleg_file(fp);
}
@@ -1260,11 +1268,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
nfs4_put_stid(&dp->dl_stid);
}
-void nfs4_unhash_stid(struct nfs4_stid *s)
-{
- s->sc_type = 0;
-}
-
/**
* nfs4_delegation_exists - Discover if this delegation already exists
* @clp: a pointer to the nfs4_client we're granting a delegation to
@@ -1312,11 +1315,12 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
lockdep_assert_held(&state_lock);
lockdep_assert_held(&fp->fi_lock);
+ lockdep_assert_held(&clp->cl_lock);
if (nfs4_delegation_exists(clp, fp))
return -EAGAIN;
refcount_inc(&dp->dl_stid.sc_count);
- dp->dl_stid.sc_type = NFS4_DELEG_STID;
+ dp->dl_stid.sc_type = SC_TYPE_DELEG;
list_add(&dp->dl_perfile, &fp->fi_delegations);
list_add(&dp->dl_perclnt, &clp->cl_delegations);
return 0;
@@ -1328,7 +1332,7 @@ static bool delegation_hashed(struct nfs4_delegation *dp)
}
static bool
-unhash_delegation_locked(struct nfs4_delegation *dp)
+unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short statusmask)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
@@ -1337,7 +1341,13 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
if (!delegation_hashed(dp))
return false;
- dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
+ if (statusmask == SC_STATUS_REVOKED &&
+ dp->dl_stid.sc_client->cl_minorversion == 0)
+ statusmask = SC_STATUS_CLOSED;
+ dp->dl_stid.sc_status |= statusmask;
+ if (statusmask & SC_STATUS_ADMIN_REVOKED)
+ atomic_inc(&dp->dl_stid.sc_client->cl_admin_revoked);
+
/* Ensure that deleg break won't try to requeue it */
++dp->dl_time;
spin_lock(&fp->fi_lock);
@@ -1353,7 +1363,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
bool unhashed;
spin_lock(&state_lock);
- unhashed = unhash_delegation_locked(dp);
+ unhashed = unhash_delegation_locked(dp, SC_STATUS_CLOSED);
spin_unlock(&state_lock);
if (unhashed)
destroy_unhashed_deleg(dp);
@@ -1367,9 +1377,9 @@ static void revoke_delegation(struct nfs4_delegation *dp)
trace_nfsd_stid_revoke(&dp->dl_stid);
- if (clp->cl_minorversion) {
+ if (dp->dl_stid.sc_status &
+ (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED)) {
spin_lock(&clp->cl_lock);
- dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
refcount_inc(&dp->dl_stid.sc_count);
list_add(&dp->dl_recall_lru, &clp->cl_revoked);
spin_unlock(&clp->cl_lock);
@@ -1377,8 +1387,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
destroy_unhashed_deleg(dp);
}
-/*
- * SETCLIENTID state
+/*
+ * SETCLIENTID state
*/
static unsigned int clientid_hashval(u32 id)
@@ -1531,6 +1541,8 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
}
idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
+ atomic_dec(&s->sc_client->cl_admin_revoked);
list_add(&stp->st_locks, reaplist);
}
@@ -1541,7 +1553,7 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
if (!unhash_ol_stateid(stp))
return false;
list_del_init(&stp->st_locks);
- nfs4_unhash_stid(&stp->st_stid);
+ stp->st_stid.sc_status |= SC_STATUS_CLOSED;
return true;
}
@@ -1599,7 +1611,7 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
while (!list_empty(&open_stp->st_locks)) {
stp = list_entry(open_stp->st_locks.next,
struct nfs4_ol_stateid, st_locks);
- WARN_ON(!unhash_lock_stateid(stp));
+ unhash_lock_stateid(stp);
put_ol_stateid_locked(stp, reaplist);
}
}
@@ -1620,6 +1632,7 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
LIST_HEAD(reaplist);
spin_lock(&stp->st_stid.sc_client->cl_lock);
+ stp->st_stid.sc_status |= SC_STATUS_CLOSED;
if (unhash_open_stateid(stp, &reaplist))
put_ol_stateid_locked(stp, &reaplist);
spin_unlock(&stp->st_stid.sc_client->cl_lock);
@@ -1675,6 +1688,136 @@ static void release_openowner(struct nfs4_openowner *oo)
nfs4_put_stateowner(&oo->oo_owner);
}
+static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
+ struct super_block *sb,
+ unsigned int sc_types)
+{
+ unsigned long id, tmp;
+ struct nfs4_stid *stid;
+
+ spin_lock(&clp->cl_lock);
+ idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+ if ((stid->sc_type & sc_types) &&
+ stid->sc_status == 0 &&
+ stid->sc_file->fi_inode->i_sb == sb) {
+ refcount_inc(&stid->sc_count);
+ break;
+ }
+ spin_unlock(&clp->cl_lock);
+ return stid;
+}
+
+/**
+ * nfsd4_revoke_states - revoke all nfsv4 states associated with given filesystem
+ * @net: used to identify instance of nfsd (there is one per net namespace)
+ * @sb: super_block used to identify target filesystem
+ *
+ * All nfs4 states (open, lock, delegation, layout) held by the server instance
+ * and associated with a file on the given filesystem will be revoked resulting
+ * in any files being closed and so all references from nfsd to the filesystem
+ * being released. Thus nfsd will no longer prevent the filesystem from being
+ * unmounted.
+ *
+ * The clients which own the states will subsequently being notified that the
+ * states have been "admin-revoked".
+ */
+void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ unsigned int idhashval;
+ unsigned int sc_types;
+
+ sc_types = SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG | SC_TYPE_LAYOUT;
+
+ spin_lock(&nn->client_lock);
+ for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
+ struct list_head *head = &nn->conf_id_hashtbl[idhashval];
+ struct nfs4_client *clp;
+ retry:
+ list_for_each_entry(clp, head, cl_idhash) {
+ struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
+ sc_types);
+ if (stid) {
+ struct nfs4_ol_stateid *stp;
+ struct nfs4_delegation *dp;
+ struct nfs4_layout_stateid *ls;
+
+ spin_unlock(&nn->client_lock);
+ switch (stid->sc_type) {
+ case SC_TYPE_OPEN:
+ stp = openlockstateid(stid);
+ mutex_lock_nested(&stp->st_mutex,
+ OPEN_STATEID_MUTEX);
+
+ spin_lock(&clp->cl_lock);
+ if (stid->sc_status == 0) {
+ stid->sc_status |=
+ SC_STATUS_ADMIN_REVOKED;
+ atomic_inc(&clp->cl_admin_revoked);
+ spin_unlock(&clp->cl_lock);
+ release_all_access(stp);
+ } else
+ spin_unlock(&clp->cl_lock);
+ mutex_unlock(&stp->st_mutex);
+ break;
+ case SC_TYPE_LOCK:
+ stp = openlockstateid(stid);
+ mutex_lock_nested(&stp->st_mutex,
+ LOCK_STATEID_MUTEX);
+ spin_lock(&clp->cl_lock);
+ if (stid->sc_status == 0) {
+ struct nfs4_lockowner *lo =
+ lockowner(stp->st_stateowner);
+ struct nfsd_file *nf;
+
+ stid->sc_status |=
+ SC_STATUS_ADMIN_REVOKED;
+ atomic_inc(&clp->cl_admin_revoked);
+ spin_unlock(&clp->cl_lock);
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (nf) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file,
+ (fl_owner_t)lo);
+ nfsd_file_put(nf);
+ }
+ release_all_access(stp);
+ } else
+ spin_unlock(&clp->cl_lock);
+ mutex_unlock(&stp->st_mutex);
+ break;
+ case SC_TYPE_DELEG:
+ dp = delegstateid(stid);
+ spin_lock(&state_lock);
+ if (!unhash_delegation_locked(
+ dp, SC_STATUS_ADMIN_REVOKED))
+ dp = NULL;
+ spin_unlock(&state_lock);
+ if (dp)
+ revoke_delegation(dp);
+ break;
+ case SC_TYPE_LAYOUT:
+ ls = layoutstateid(stid);
+ nfsd4_close_layout(ls);
+ break;
+ }
+ nfs4_put_stid(stid);
+ spin_lock(&nn->client_lock);
+ if (clp->cl_minorversion == 0)
+ /* Allow cleanup after a lease period.
+ * store_release ensures cleanup will
+ * see any newly revoked states if it
+ * sees the time updated.
+ */
+ nn->nfs40_last_revoke =
+ ktime_get_boottime_seconds();
+ goto retry;
+ }
+ }
+ }
+ spin_unlock(&nn->client_lock);
+}
+
static inline int
hash_sessionid(struct nfs4_sessionid *sessionid)
{
@@ -2228,7 +2371,7 @@ __destroy_client(struct nfs4_client *clp)
spin_lock(&state_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
- WARN_ON(!unhash_delegation_locked(dp));
+ unhash_delegation_locked(dp, SC_STATUS_CLOSED);
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -2460,14 +2603,16 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
}
static struct nfs4_stid *
-find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
+find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
+ unsigned short typemask, unsigned short ok_states)
{
struct nfs4_stid *s;
spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, t);
if (s != NULL) {
- if (typemask & s->sc_type)
+ if ((s->sc_status & ~ok_states) == 0 &&
+ (typemask & s->sc_type))
refcount_inc(&s->sc_count);
else
s = NULL;
@@ -2487,9 +2632,9 @@ static struct nfs4_client *get_nfsdfs_clp(struct inode *inode)
static void seq_quote_mem(struct seq_file *m, char *data, int len)
{
- seq_printf(m, "\"");
+ seq_puts(m, "\"");
seq_escape_mem(m, data, len, ESCAPE_HEX | ESCAPE_NAP | ESCAPE_APPEND, "\"\\");
- seq_printf(m, "\"");
+ seq_puts(m, "\"");
}
static const char *cb_state2str(int state)
@@ -2530,20 +2675,22 @@ static int client_info_show(struct seq_file *m, void *v)
seq_puts(m, "status: unconfirmed\n");
seq_printf(m, "seconds from last renew: %lld\n",
ktime_get_boottime_seconds() - clp->cl_time);
- seq_printf(m, "name: ");
+ seq_puts(m, "name: ");
seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
if (clp->cl_nii_domain.data) {
- seq_printf(m, "Implementation domain: ");
+ seq_puts(m, "Implementation domain: ");
seq_quote_mem(m, clp->cl_nii_domain.data,
clp->cl_nii_domain.len);
- seq_printf(m, "\nImplementation name: ");
+ seq_puts(m, "\nImplementation name: ");
seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
seq_printf(m, "\nImplementation time: [%lld, %ld]\n",
clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
}
seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
+ seq_printf(m, "admin-revoked states: %d\n",
+ atomic_read(&clp->cl_admin_revoked));
drop_client(clp);
return 0;
@@ -2602,7 +2749,7 @@ static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo)
{
- seq_printf(s, "owner: ");
+ seq_puts(s, "owner: ");
seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len);
}
@@ -2620,20 +2767,13 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
struct nfs4_stateowner *oo;
unsigned int access, deny;
- if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
- return 0; /* XXX: or SEQ_SKIP? */
ols = openlockstateid(st);
oo = ols->st_stateowner;
nf = st->sc_file;
- spin_lock(&nf->fi_lock);
- file = find_any_file_locked(nf);
- if (!file)
- goto out;
-
- seq_printf(s, "- ");
+ seq_puts(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
- seq_printf(s, ": { type: open, ");
+ seq_puts(s, ": { type: open, ");
access = bmap_to_share_mode(ols->st_access_bmap);
deny = bmap_to_share_mode(ols->st_deny_bmap);
@@ -2645,14 +2785,19 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
- nfs4_show_superblock(s, file);
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, ", ");
- nfs4_show_owner(s, oo);
- seq_printf(s, " }\n");
-out:
+ spin_lock(&nf->fi_lock);
+ file = find_any_file_locked(nf);
+ if (file) {
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_puts(s, ", ");
+ }
spin_unlock(&nf->fi_lock);
+ nfs4_show_owner(s, oo);
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
return 0;
}
@@ -2666,30 +2811,31 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
ols = openlockstateid(st);
oo = ols->st_stateowner;
nf = st->sc_file;
- spin_lock(&nf->fi_lock);
- file = find_any_file_locked(nf);
- if (!file)
- goto out;
- seq_printf(s, "- ");
+ seq_puts(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
- seq_printf(s, ": { type: lock, ");
+ seq_puts(s, ": { type: lock, ");
- /*
- * Note: a lock stateid isn't really the same thing as a lock,
- * it's the locking state held by one owner on a file, and there
- * may be multiple (or no) lock ranges associated with it.
- * (Same for the matter is true of open stateids.)
- */
+ spin_lock(&nf->fi_lock);
+ file = find_any_file_locked(nf);
+ if (file) {
+ /*
+ * Note: a lock stateid isn't really the same thing as a lock,
+ * it's the locking state held by one owner on a file, and there
+ * may be multiple (or no) lock ranges associated with it.
+ * (Same for the matter is true of open stateids.)
+ */
- nfs4_show_superblock(s, file);
- /* XXX: open stateid? */
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, ", ");
+ nfs4_show_superblock(s, file);
+ /* XXX: open stateid? */
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_puts(s, ", ");
+ }
nfs4_show_owner(s, oo);
- seq_printf(s, " }\n");
-out:
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
spin_unlock(&nf->fi_lock);
return 0;
}
@@ -2702,27 +2848,28 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
ds = delegstateid(st);
nf = st->sc_file;
- spin_lock(&nf->fi_lock);
- file = nf->fi_deleg_file;
- if (!file)
- goto out;
- seq_printf(s, "- ");
+ seq_puts(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
- seq_printf(s, ": { type: deleg, ");
+ seq_puts(s, ": { type: deleg, ");
- /* Kinda dead code as long as we only support read delegs: */
- seq_printf(s, "access: %s, ",
- ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+ seq_printf(s, "access: %s",
+ ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
/* XXX: lease time, whether it's being recalled. */
- nfs4_show_superblock(s, file);
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, " }\n");
-out:
+ spin_lock(&nf->fi_lock);
+ file = nf->fi_deleg_file;
+ if (file) {
+ seq_puts(s, ", ");
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ }
spin_unlock(&nf->fi_lock);
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
return 0;
}
@@ -2732,18 +2879,25 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
struct nfsd_file *file;
ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
- file = ls->ls_file;
- seq_printf(s, "- ");
+ seq_puts(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
- seq_printf(s, ": { type: layout, ");
+ seq_puts(s, ": { type: layout");
/* XXX: What else would be useful? */
- nfs4_show_superblock(s, file);
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, " }\n");
+ spin_lock(&ls->ls_stid.sc_file->fi_lock);
+ file = ls->ls_file;
+ if (file) {
+ seq_puts(s, ", ");
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ }
+ spin_unlock(&ls->ls_stid.sc_file->fi_lock);
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
return 0;
}
@@ -2753,13 +2907,13 @@ static int states_show(struct seq_file *s, void *v)
struct nfs4_stid *st = v;
switch (st->sc_type) {
- case NFS4_OPEN_STID:
+ case SC_TYPE_OPEN:
return nfs4_show_open(s, st);
- case NFS4_LOCK_STID:
+ case SC_TYPE_LOCK:
return nfs4_show_lock(s, st);
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
return nfs4_show_deleg(s, st);
- case NFS4_LAYOUT_STID:
+ case SC_TYPE_LAYOUT:
return nfs4_show_layout(s, st);
default:
return 0; /* XXX: or SEQ_SKIP? */
@@ -2896,11 +3050,59 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
spin_unlock(&nn->client_lock);
}
+static int
+nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
+{
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+ ncf->ncf_cb_status = task->tk_status;
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+
+ nfs4_put_stid(&dp->dl_stid);
+ clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
+ wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
+}
+
static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
.done = nfsd4_cb_recall_any_done,
.release = nfsd4_cb_recall_any_release,
};
+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
+ .done = nfsd4_cb_getattr_done,
+ .release = nfsd4_cb_getattr_release,
+};
+
+static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
+{
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+
+ if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
+ return;
+ /* set to proper status when nfsd4_cb_getattr_done runs */
+ ncf->ncf_cb_status = NFS4ERR_IO;
+
+ refcount_inc(&dp->dl_stid.sc_count);
+ nfsd4_run_cb(&ncf->ncf_getattr);
+}
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -3414,6 +3616,9 @@ out_new:
new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
+ /* Contrived initial CREATE_SESSION response */
+ new->cl_cs_slot.sl_status = nfserr_seq_misordered;
+
add_to_unconfirmed(new);
swap(new, conf);
out_copy:
@@ -3584,10 +3789,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
struct nfsd4_create_session *cr_ses = &u->create_session;
struct sockaddr *sa = svc_addr(rqstp);
struct nfs4_client *conf, *unconf;
+ struct nfsd4_clid_slot *cs_slot;
struct nfs4_client *old = NULL;
struct nfsd4_session *new;
struct nfsd4_conn *conn;
- struct nfsd4_clid_slot *cs_slot = NULL;
__be32 status = 0;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -3611,53 +3816,60 @@ nfsd4_create_session(struct svc_rqst *rqstp,
goto out_free_session;
spin_lock(&nn->client_lock);
+
+ /* RFC 8881 Section 18.36.4 Phase 1: Client record look-up. */
unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
conf = find_confirmed_client(&cr_ses->clientid, true, nn);
- WARN_ON_ONCE(conf && unconf);
+ if (!conf && !unconf) {
+ status = nfserr_stale_clientid;
+ goto out_free_conn;
+ }
- if (conf) {
- status = nfserr_wrong_cred;
- if (!nfsd4_mach_creds_match(conf, rqstp))
- goto out_free_conn;
+ /* RFC 8881 Section 18.36.4 Phase 2: Sequence ID processing. */
+ if (conf)
cs_slot = &conf->cl_cs_slot;
- status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
- if (status) {
- if (status == nfserr_replay_cache)
- status = nfsd4_replay_create_session(cr_ses, cs_slot);
+ else
+ cs_slot = &unconf->cl_cs_slot;
+ status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
+ if (status) {
+ if (status == nfserr_replay_cache) {
+ status = nfsd4_replay_create_session(cr_ses, cs_slot);
goto out_free_conn;
}
- } else if (unconf) {
+ goto out_cache_error;
+ }
+ cs_slot->sl_seqid++;
+ cr_ses->seqid = cs_slot->sl_seqid;
+
+ /* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */
+ if (conf) {
+ status = nfserr_wrong_cred;
+ if (!nfsd4_mach_creds_match(conf, rqstp))
+ goto out_cache_error;
+ } else {
status = nfserr_clid_inuse;
if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
!rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
trace_nfsd_clid_cred_mismatch(unconf, rqstp);
- goto out_free_conn;
+ goto out_cache_error;
}
status = nfserr_wrong_cred;
if (!nfsd4_mach_creds_match(unconf, rqstp))
- goto out_free_conn;
- cs_slot = &unconf->cl_cs_slot;
- status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
- if (status) {
- /* an unconfirmed replay returns misordered */
- status = nfserr_seq_misordered;
- goto out_free_conn;
- }
+ goto out_cache_error;
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
status = mark_client_expired_locked(old);
if (status) {
old = NULL;
- goto out_free_conn;
+ goto out_cache_error;
}
trace_nfsd_clid_replaced(&old->cl_clientid);
}
move_to_confirmed(unconf);
conf = unconf;
- } else {
- status = nfserr_stale_clientid;
- goto out_free_conn;
}
+
+ /* RFC 8881 Section 18.36.4 Phase 4: Session creation. */
status = nfs_ok;
/* Persistent sessions are not supported */
cr_ses->flags &= ~SESSION4_PERSIST;
@@ -3669,8 +3881,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
NFS4_MAX_SESSIONID_LEN);
- cs_slot->sl_seqid++;
- cr_ses->seqid = cs_slot->sl_seqid;
/* cache solo and embedded create sessions under the client_lock */
nfsd4_cache_create_session(cr_ses, cs_slot, status);
@@ -3683,6 +3893,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
if (old)
expire_client(old);
return status;
+
+out_cache_error:
+ nfsd4_cache_create_session(cr_ses, cs_slot, status);
out_free_conn:
spin_unlock(&nn->client_lock);
free_conn(conn);
@@ -4058,6 +4271,9 @@ out:
}
if (!list_empty(&clp->cl_revoked))
seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
+ if (atomic_read(&clp->cl_admin_revoked))
+ seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
+ trace_nfsd_seq4_status(rqstp, seq);
out_no_session:
if (conn)
free_conn(conn);
@@ -4352,32 +4568,25 @@ nfsd4_free_slabs(void)
int
nfsd4_init_slabs(void)
{
- client_slab = kmem_cache_create("nfsd4_clients",
- sizeof(struct nfs4_client), 0, 0, NULL);
+ client_slab = KMEM_CACHE(nfs4_client, 0);
if (client_slab == NULL)
goto out;
- openowner_slab = kmem_cache_create("nfsd4_openowners",
- sizeof(struct nfs4_openowner), 0, 0, NULL);
+ openowner_slab = KMEM_CACHE(nfs4_openowner, 0);
if (openowner_slab == NULL)
goto out_free_client_slab;
- lockowner_slab = kmem_cache_create("nfsd4_lockowners",
- sizeof(struct nfs4_lockowner), 0, 0, NULL);
+ lockowner_slab = KMEM_CACHE(nfs4_lockowner, 0);
if (lockowner_slab == NULL)
goto out_free_openowner_slab;
- file_slab = kmem_cache_create("nfsd4_files",
- sizeof(struct nfs4_file), 0, 0, NULL);
+ file_slab = KMEM_CACHE(nfs4_file, 0);
if (file_slab == NULL)
goto out_free_lockowner_slab;
- stateid_slab = kmem_cache_create("nfsd4_stateids",
- sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
+ stateid_slab = KMEM_CACHE(nfs4_ol_stateid, 0);
if (stateid_slab == NULL)
goto out_free_file_slab;
- deleg_slab = kmem_cache_create("nfsd4_delegations",
- sizeof(struct nfs4_delegation), 0, 0, NULL);
+ deleg_slab = KMEM_CACHE(nfs4_delegation, 0);
if (deleg_slab == NULL)
goto out_free_stateid_slab;
- odstate_slab = kmem_cache_create("nfsd4_odstate",
- sizeof(struct nfs4_clnt_odstate), 0, 0, NULL);
+ odstate_slab = KMEM_CACHE(nfs4_clnt_odstate, 0);
if (odstate_slab == NULL)
goto out_free_deleg_slab;
return 0;
@@ -4531,7 +4740,8 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
continue;
if (local->st_stateowner != &oo->oo_owner)
continue;
- if (local->st_stid.sc_type == NFS4_OPEN_STID) {
+ if (local->st_stid.sc_type == SC_TYPE_OPEN &&
+ !local->st_stid.sc_status) {
ret = local;
refcount_inc(&ret->st_stid.sc_count);
break;
@@ -4540,22 +4750,75 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
return ret;
}
-static __be32
-nfsd4_verify_open_stid(struct nfs4_stid *s)
+static void nfsd4_drop_revoked_stid(struct nfs4_stid *s)
+ __releases(&s->sc_client->cl_lock)
{
- __be32 ret = nfs_ok;
+ struct nfs4_client *cl = s->sc_client;
+ LIST_HEAD(reaplist);
+ struct nfs4_ol_stateid *stp;
+ struct nfs4_delegation *dp;
+ bool unhashed;
switch (s->sc_type) {
- default:
+ case SC_TYPE_OPEN:
+ stp = openlockstateid(s);
+ if (unhash_open_stateid(stp, &reaplist))
+ put_ol_stateid_locked(stp, &reaplist);
+ spin_unlock(&cl->cl_lock);
+ free_ol_stateid_reaplist(&reaplist);
break;
- case 0:
- case NFS4_CLOSED_STID:
- case NFS4_CLOSED_DELEG_STID:
- ret = nfserr_bad_stateid;
+ case SC_TYPE_LOCK:
+ stp = openlockstateid(s);
+ unhashed = unhash_lock_stateid(stp);
+ spin_unlock(&cl->cl_lock);
+ if (unhashed)
+ nfs4_put_stid(s);
break;
- case NFS4_REVOKED_DELEG_STID:
- ret = nfserr_deleg_revoked;
+ case SC_TYPE_DELEG:
+ dp = delegstateid(s);
+ list_del_init(&dp->dl_recall_lru);
+ spin_unlock(&cl->cl_lock);
+ nfs4_put_stid(s);
+ break;
+ default:
+ spin_unlock(&cl->cl_lock);
+ }
+}
+
+static void nfsd40_drop_revoked_stid(struct nfs4_client *cl,
+ stateid_t *stid)
+{
+ /* NFSv4.0 has no way for the client to tell the server
+ * that it can forget an admin-revoked stateid.
+ * So we keep it around until the first time that the
+ * client uses it, and drop it the first time
+ * nfserr_admin_revoked is returned.
+ * For v4.1 and later we wait until explicitly told
+ * to free the stateid.
+ */
+ if (cl->cl_minorversion == 0) {
+ struct nfs4_stid *st;
+
+ spin_lock(&cl->cl_lock);
+ st = find_stateid_locked(cl, stid);
+ if (st)
+ nfsd4_drop_revoked_stid(st);
+ else
+ spin_unlock(&cl->cl_lock);
}
+}
+
+static __be32
+nfsd4_verify_open_stid(struct nfs4_stid *s)
+{
+ __be32 ret = nfs_ok;
+
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
+ ret = nfserr_admin_revoked;
+ else if (s->sc_status & SC_STATUS_REVOKED)
+ ret = nfserr_deleg_revoked;
+ else if (s->sc_status & SC_STATUS_CLOSED)
+ ret = nfserr_bad_stateid;
return ret;
}
@@ -4567,6 +4830,10 @@ nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX);
ret = nfsd4_verify_open_stid(&stp->st_stid);
+ if (ret == nfserr_admin_revoked)
+ nfsd40_drop_revoked_stid(stp->st_stid.sc_client,
+ &stp->st_stid.sc_stateid);
+
if (ret != nfs_ok)
mutex_unlock(&stp->st_mutex);
return ret;
@@ -4641,7 +4908,7 @@ retry:
open->op_stp = NULL;
refcount_inc(&stp->st_stid.sc_count);
- stp->st_stid.sc_type = NFS4_OPEN_STID;
+ stp->st_stid.sc_type = SC_TYPE_OPEN;
INIT_LIST_HEAD(&stp->st_locks);
stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner);
get_nfs4_file(fp);
@@ -4868,9 +5135,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
- if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
- dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
- return 1;
+ if (dp->dl_stid.sc_status)
+ /* CLOSED or REVOKED */
+ return 1;
switch (task->tk_status) {
case 0:
@@ -4922,9 +5189,9 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
/* Called from break_lease() with flc_lock held. */
static bool
-nfsd_break_deleg_cb(struct file_lock *fl)
+nfsd_break_deleg_cb(struct file_lease *fl)
{
- struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
+ struct nfs4_delegation *dp = (struct nfs4_delegation *) fl->c.flc_owner;
struct nfs4_file *fp = dp->dl_stid.sc_file;
struct nfs4_client *clp = dp->dl_stid.sc_client;
struct nfsd_net *nn;
@@ -4945,10 +5212,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
*/
fl->fl_break_time = 0;
- spin_lock(&fp->fi_lock);
fp->fi_had_conflict = true;
nfsd_break_one_deleg(dp);
- spin_unlock(&fp->fi_lock);
return false;
}
@@ -4960,9 +5225,9 @@ nfsd_break_deleg_cb(struct file_lock *fl)
* %true: Lease conflict was resolved
* %false: Lease conflict was not resolved.
*/
-static bool nfsd_breaker_owns_lease(struct file_lock *fl)
+static bool nfsd_breaker_owns_lease(struct file_lease *fl)
{
- struct nfs4_delegation *dl = fl->fl_owner;
+ struct nfs4_delegation *dl = fl->c.flc_owner;
struct svc_rqst *rqst;
struct nfs4_client *clp;
@@ -4977,10 +5242,10 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl)
}
static int
-nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
+nfsd_change_deleg_cb(struct file_lease *onlist, int arg,
struct list_head *dispose)
{
- struct nfs4_delegation *dp = (struct nfs4_delegation *)onlist->fl_owner;
+ struct nfs4_delegation *dp = (struct nfs4_delegation *) onlist->c.flc_owner;
struct nfs4_client *clp = dp->dl_stid.sc_client;
if (arg & F_UNLCK) {
@@ -4991,7 +5256,7 @@ nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
return -EAGAIN;
}
-static const struct lock_manager_operations nfsd_lease_mng_ops = {
+static const struct lease_manager_operations nfsd_lease_mng_ops = {
.lm_breaker_owns_lease = nfsd_breaker_owns_lease,
.lm_break = nfsd_break_deleg_cb,
.lm_change = nfsd_change_deleg_cb,
@@ -5115,12 +5380,12 @@ static int share_access_to_flags(u32 share_access)
return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
}
-static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s)
+static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl,
+ stateid_t *s)
{
struct nfs4_stid *ret;
- ret = find_stateid_by_type(cl, s,
- NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
+ ret = find_stateid_by_type(cl, s, SC_TYPE_DELEG, SC_STATUS_REVOKED);
if (!ret)
return NULL;
return delegstateid(ret);
@@ -5143,10 +5408,15 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
if (deleg == NULL)
goto out;
- if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
+ if (deleg->dl_stid.sc_status & SC_STATUS_ADMIN_REVOKED) {
+ nfs4_put_stid(&deleg->dl_stid);
+ status = nfserr_admin_revoked;
+ goto out;
+ }
+ if (deleg->dl_stid.sc_status & SC_STATUS_REVOKED) {
nfs4_put_stid(&deleg->dl_stid);
- if (cl->cl_minorversion)
- status = nfserr_deleg_revoked;
+ nfsd40_drop_revoked_stid(cl, &open->op_delegate_stateid);
+ status = nfserr_deleg_revoked;
goto out;
}
flags = share_access_to_flags(open->op_share_access);
@@ -5191,7 +5461,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
return 0;
if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
return nfserr_inval;
- return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0);
+ return nfsd_setattr(rqstp, fh, &attrs, NULL);
}
static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
@@ -5331,21 +5601,20 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}
-static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
+static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
int flag)
{
- struct file_lock *fl;
+ struct file_lease *fl;
- fl = locks_alloc_lock();
+ fl = locks_alloc_lease();
if (!fl)
return NULL;
fl->fl_lmops = &nfsd_lease_mng_ops;
- fl->fl_flags = FL_DELEG;
- fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
- fl->fl_end = OFFSET_MAX;
- fl->fl_owner = (fl_owner_t)dp;
- fl->fl_pid = current->tgid;
- fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
+ fl->c.flc_flags = FL_DELEG;
+ fl->c.flc_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+ fl->c.flc_owner = (fl_owner_t)dp;
+ fl->c.flc_pid = current->tgid;
+ fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
return fl;
}
@@ -5463,7 +5732,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
struct nfs4_delegation *dp;
struct nfsd_file *nf = NULL;
- struct file_lock *fl;
+ struct file_lease *fl;
u32 dl_type;
/*
@@ -5533,9 +5802,10 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (!fl)
goto out_clnt_odstate;
- status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
+ status = kernel_setlease(fp->fi_deleg_file->nf_file,
+ fl->c.flc_type, &fl, NULL);
if (fl)
- locks_free_lock(fl);
+ locks_free_lease(fl);
if (status)
goto out_clnt_odstate;
@@ -5557,13 +5827,16 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (status)
goto out_unlock;
+ status = -EAGAIN;
+ if (fp->fi_had_conflict)
+ goto out_unlock;
+
spin_lock(&state_lock);
+ spin_lock(&clp->cl_lock);
spin_lock(&fp->fi_lock);
- if (fp->fi_had_conflict)
- status = -EAGAIN;
- else
- status = hash_delegation_locked(dp, fp);
+ status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
+ spin_unlock(&clp->cl_lock);
spin_unlock(&state_lock);
if (status)
@@ -5571,7 +5844,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
return dp;
out_unlock:
- vfs_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
+ kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
out_clnt_odstate:
put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_stid(&dp->dl_stid);
@@ -5635,6 +5908,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *parent = NULL;
int cb_up;
int status = 0;
+ struct kstat stat;
+ struct path path;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = false;
@@ -5672,6 +5947,18 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
+ path.mnt = currentfh->fh_export->ex_path.mnt;
+ path.dentry = currentfh->fh_dentry;
+ if (vfs_getattr(&path, &stat,
+ (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
+ AT_STATX_SYNC_AS_STAT)) {
+ nfs4_put_stid(&dp->dl_stid);
+ destroy_delegation(dp);
+ goto out_no_deleg;
+ }
+ dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
+ dp->dl_cb_fattr.ncf_initial_cinfo =
+ nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
} else {
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
@@ -5775,7 +6062,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
} else {
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true);
if (status) {
- stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_open_stateid(stp);
mutex_unlock(&stp->st_mutex);
goto out;
@@ -6129,6 +6415,43 @@ nfs4_process_client_reaplist(struct list_head *reaplist)
}
}
+static void nfs40_clean_admin_revoked(struct nfsd_net *nn,
+ struct laundry_time *lt)
+{
+ struct nfs4_client *clp;
+
+ spin_lock(&nn->client_lock);
+ if (nn->nfs40_last_revoke == 0 ||
+ nn->nfs40_last_revoke > lt->cutoff) {
+ spin_unlock(&nn->client_lock);
+ return;
+ }
+ nn->nfs40_last_revoke = 0;
+
+retry:
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ unsigned long id, tmp;
+ struct nfs4_stid *stid;
+
+ if (atomic_read(&clp->cl_admin_revoked) == 0)
+ continue;
+
+ spin_lock(&clp->cl_lock);
+ idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+ if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) {
+ refcount_inc(&stid->sc_count);
+ spin_unlock(&nn->client_lock);
+ /* this function drops ->cl_lock */
+ nfsd4_drop_revoked_stid(stid);
+ nfs4_put_stid(stid);
+ spin_lock(&nn->client_lock);
+ goto retry;
+ }
+ spin_unlock(&clp->cl_lock);
+ }
+ spin_unlock(&nn->client_lock);
+}
+
static time64_t
nfs4_laundromat(struct nfsd_net *nn)
{
@@ -6162,12 +6485,14 @@ nfs4_laundromat(struct nfsd_net *nn)
nfs4_get_client_reaplist(nn, &reaplist, &lt);
nfs4_process_client_reaplist(&reaplist);
+ nfs40_clean_admin_revoked(nn, &lt);
+
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
if (!state_expired(&lt, dp->dl_time))
break;
- WARN_ON(!unhash_delegation_locked(dp));
+ unhash_delegation_locked(dp, SC_STATUS_REVOKED);
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -6226,6 +6551,8 @@ nfs4_laundromat(struct nfsd_net *nn)
/* service the server-to-server copy delayed unmount list */
nfsd4_ssc_expire_umount(nn);
#endif
+ if (atomic_long_read(&num_delegations) >= max_delegations)
+ deleg_reaper(nn);
out:
return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
}
@@ -6287,6 +6614,8 @@ deleg_reaper(struct nfsd_net *nn)
list_del_init(&clp->cl_ra_cblist);
clp->cl_ra->ra_keep = 0;
clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG);
+ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+ BIT(RCA4_TYPE_MASK_WDATA_DLG);
trace_nfsd_cb_recall_any(clp->cl_ra);
nfsd4_run_cb(&clp->cl_ra->ra_cb);
}
@@ -6380,6 +6709,9 @@ static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_sti
if (ret == nfs_ok)
ret = check_stateid_generation(in, &s->sc_stateid, has_session);
spin_unlock(&s->sc_lock);
+ if (ret == nfserr_admin_revoked)
+ nfsd40_drop_revoked_stid(s->sc_client,
+ &s->sc_stateid);
return ret;
}
@@ -6406,32 +6738,33 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
status = nfsd4_stid_check_stateid_generation(stateid, s, 1);
if (status)
goto out_unlock;
+ status = nfsd4_verify_open_stid(s);
+ if (status)
+ goto out_unlock;
+
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
status = nfs_ok;
break;
- case NFS4_REVOKED_DELEG_STID:
- status = nfserr_deleg_revoked;
- break;
- case NFS4_OPEN_STID:
- case NFS4_LOCK_STID:
+ case SC_TYPE_OPEN:
+ case SC_TYPE_LOCK:
status = nfsd4_check_openowner_confirmed(openlockstateid(s));
break;
default:
printk("unknown stateid type %x\n", s->sc_type);
- fallthrough;
- case NFS4_CLOSED_STID:
- case NFS4_CLOSED_DELEG_STID:
status = nfserr_bad_stateid;
}
out_unlock:
spin_unlock(&cl->cl_lock);
+ if (status == nfserr_admin_revoked)
+ nfsd40_drop_revoked_stid(cl, stateid);
return status;
}
__be32
nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
- stateid_t *stateid, unsigned char typemask,
+ stateid_t *stateid,
+ unsigned short typemask, unsigned short statusmask,
struct nfs4_stid **s, struct nfsd_net *nn)
{
__be32 status;
@@ -6442,10 +6775,15 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
* only return revoked delegations if explicitly asked.
* otherwise we report revoked or bad_stateid status.
*/
- if (typemask & NFS4_REVOKED_DELEG_STID)
+ if (statusmask & SC_STATUS_REVOKED)
return_revoked = true;
- else if (typemask & NFS4_DELEG_STID)
- typemask |= NFS4_REVOKED_DELEG_STID;
+ if (typemask & SC_TYPE_DELEG)
+ /* Always allow REVOKED for DELEG so we can
+ * retturn the appropriate error.
+ */
+ statusmask |= SC_STATUS_REVOKED;
+
+ statusmask |= SC_STATUS_ADMIN_REVOKED;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
@@ -6458,14 +6796,17 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
}
if (status)
return status;
- stid = find_stateid_by_type(cstate->clp, stateid, typemask);
+ stid = find_stateid_by_type(cstate->clp, stateid, typemask, statusmask);
if (!stid)
return nfserr_bad_stateid;
- if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
+ if ((stid->sc_status & SC_STATUS_REVOKED) && !return_revoked) {
nfs4_put_stid(stid);
- if (cstate->minorversion)
- return nfserr_deleg_revoked;
- return nfserr_bad_stateid;
+ return nfserr_deleg_revoked;
+ }
+ if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) {
+ nfsd40_drop_revoked_stid(cstate->clp, stateid);
+ nfs4_put_stid(stid);
+ return nfserr_admin_revoked;
}
*s = stid;
return nfs_ok;
@@ -6476,17 +6817,17 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
{
struct nfsd_file *ret = NULL;
- if (!s)
+ if (!s || s->sc_status)
return NULL;
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
spin_lock(&s->sc_file->fi_lock);
ret = nfsd_file_get(s->sc_file->fi_deleg_file);
spin_unlock(&s->sc_file->fi_lock);
break;
- case NFS4_OPEN_STID:
- case NFS4_LOCK_STID:
+ case SC_TYPE_OPEN:
+ case SC_TYPE_LOCK:
if (flags & RD_STATE)
ret = find_readable_file(s->sc_file);
else
@@ -6599,7 +6940,8 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
goto out;
*stid = find_stateid_by_type(found, &cps->cp_p_stateid,
- NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID);
+ SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK,
+ 0);
if (*stid)
status = nfs_ok;
else
@@ -6656,8 +6998,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
}
status = nfsd4_lookup_stateid(cstate, stateid,
- NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
- &s, nn);
+ SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK,
+ 0, &s, nn);
if (status == nfserr_bad_stateid)
status = find_cpntf_state(nn, stateid, &s);
if (status)
@@ -6668,16 +7010,13 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
goto out;
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
status = nfs4_check_delegmode(delegstateid(s), flags);
break;
- case NFS4_OPEN_STID:
- case NFS4_LOCK_STID:
+ case SC_TYPE_OPEN:
+ case SC_TYPE_LOCK:
status = nfs4_check_olstateid(openlockstateid(s), flags);
break;
- default:
- status = nfserr_bad_stateid;
- break;
}
if (status)
goto out;
@@ -6756,34 +7095,39 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, stateid);
- if (!s)
+ if (!s || s->sc_status & SC_STATUS_CLOSED)
goto out_unlock;
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED) {
+ nfsd4_drop_revoked_stid(s);
+ ret = nfs_ok;
+ goto out;
+ }
spin_lock(&s->sc_lock);
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
+ if (s->sc_status & SC_STATUS_REVOKED) {
+ spin_unlock(&s->sc_lock);
+ dp = delegstateid(s);
+ list_del_init(&dp->dl_recall_lru);
+ spin_unlock(&cl->cl_lock);
+ nfs4_put_stid(s);
+ ret = nfs_ok;
+ goto out;
+ }
ret = nfserr_locks_held;
break;
- case NFS4_OPEN_STID:
+ case SC_TYPE_OPEN:
ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
if (ret)
break;
ret = nfserr_locks_held;
break;
- case NFS4_LOCK_STID:
+ case SC_TYPE_LOCK:
spin_unlock(&s->sc_lock);
refcount_inc(&s->sc_count);
spin_unlock(&cl->cl_lock);
ret = nfsd4_free_lock_stateid(stateid, s);
goto out;
- case NFS4_REVOKED_DELEG_STID:
- spin_unlock(&s->sc_lock);
- dp = delegstateid(s);
- list_del_init(&dp->dl_recall_lru);
- spin_unlock(&cl->cl_lock);
- nfs4_put_stid(s);
- ret = nfs_ok;
- goto out;
- /* Default falls through and returns nfserr_bad_stateid */
}
spin_unlock(&s->sc_lock);
out_unlock:
@@ -6825,6 +7169,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
* @seqid: seqid (provided by client)
* @stateid: stateid (provided by client)
* @typemask: mask of allowable types for this operation
+ * @statusmask: mask of allowed states: 0 or STID_CLOSED
* @stpp: return pointer for the stateid found
* @nn: net namespace for request
*
@@ -6834,7 +7179,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
*/
static __be32
nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
- stateid_t *stateid, char typemask,
+ stateid_t *stateid,
+ unsigned short typemask, unsigned short statusmask,
struct nfs4_ol_stateid **stpp,
struct nfsd_net *nn)
{
@@ -6845,7 +7191,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
trace_nfsd_preprocess(seqid, stateid);
*stpp = NULL;
- status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid,
+ typemask, statusmask, &s, nn);
if (status)
return status;
stp = openlockstateid(s);
@@ -6867,7 +7214,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
struct nfs4_ol_stateid *stp;
status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
- NFS4_OPEN_STID, &stp, nn);
+ SC_TYPE_OPEN, 0, &stp, nn);
if (status)
return status;
oo = openowner(stp->st_stateowner);
@@ -6898,8 +7245,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
status = nfs4_preprocess_seqid_op(cstate,
- oc->oc_seqid, &oc->oc_req_stateid,
- NFS4_OPEN_STID, &stp, nn);
+ oc->oc_seqid, &oc->oc_req_stateid,
+ SC_TYPE_OPEN, 0, &stp, nn);
if (status)
goto out;
oo = openowner(stp->st_stateowner);
@@ -7029,18 +7376,20 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- dprintk("NFSD: nfsd4_close on file %pd\n",
+ dprintk("NFSD: nfsd4_close on file %pd\n",
cstate->current_fh.fh_dentry);
status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
- &close->cl_stateid,
- NFS4_OPEN_STID|NFS4_CLOSED_STID,
- &stp, nn);
+ &close->cl_stateid,
+ SC_TYPE_OPEN, SC_STATUS_CLOSED,
+ &stp, nn);
nfsd4_bump_seqid(cstate, status);
if (status)
- goto out;
+ goto out;
- stp->st_stid.sc_type = NFS4_CLOSED_STID;
+ spin_lock(&stp->st_stid.sc_client->cl_lock);
+ stp->st_stid.sc_status |= SC_STATUS_CLOSED;
+ spin_unlock(&stp->st_stid.sc_client->cl_lock);
/*
* Technically we don't _really_ have to increment or copy it, since
@@ -7082,7 +7431,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
return status;
- status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, 0, &s, nn);
if (status)
goto out;
dp = delegstateid(s);
@@ -7149,7 +7498,7 @@ nfsd4_lm_put_owner(fl_owner_t owner)
static bool
nfsd4_lm_lock_expirable(struct file_lock *cfl)
{
- struct nfs4_lockowner *lo = (struct nfs4_lockowner *)cfl->fl_owner;
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *) cfl->c.flc_owner;
struct nfs4_client *clp = lo->lo_owner.so_client;
struct nfsd_net *nn;
@@ -7171,7 +7520,7 @@ nfsd4_lm_expire_lock(void)
static void
nfsd4_lm_notify(struct file_lock *fl)
{
- struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *) fl->c.flc_owner;
struct net *net = lo->lo_owner.so_client->net;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct nfsd4_blocked_lock *nbl = container_of(fl,
@@ -7208,7 +7557,7 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
struct nfs4_lockowner *lo;
if (fl->fl_lmops == &nfsd_posix_mng_ops) {
- lo = (struct nfs4_lockowner *) fl->fl_owner;
+ lo = (struct nfs4_lockowner *) fl->c.flc_owner;
xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner,
GFP_KERNEL);
if (!deny->ld_owner.data)
@@ -7227,7 +7576,7 @@ nevermind:
if (fl->fl_end != NFS4_MAX_UINT64)
deny->ld_length = fl->fl_end - fl->fl_start + 1;
deny->ld_type = NFS4_READ_LT;
- if (fl->fl_type != F_RDLCK)
+ if (fl->c.flc_type != F_RDLCK)
deny->ld_type = NFS4_WRITE_LT;
}
@@ -7349,7 +7698,7 @@ retry:
if (retstp)
goto out_found;
refcount_inc(&stp->st_stid.sc_count);
- stp->st_stid.sc_type = NFS4_LOCK_STID;
+ stp->st_stid.sc_type = SC_TYPE_LOCK;
stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
get_nfs4_file(fp);
stp->st_stid.sc_file = fp;
@@ -7493,8 +7842,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
int lkflg;
int err;
bool new = false;
- unsigned char fl_type;
- unsigned int fl_flags = FL_POSIX;
+ unsigned char type;
+ unsigned int flags = FL_POSIX;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -7536,9 +7885,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&lock_stp, &new);
} else {
status = nfs4_preprocess_seqid_op(cstate,
- lock->lk_old_lock_seqid,
- &lock->lk_old_lock_stateid,
- NFS4_LOCK_STID, &lock_stp, nn);
+ lock->lk_old_lock_seqid,
+ &lock->lk_old_lock_stateid,
+ SC_TYPE_LOCK, 0, &lock_stp,
+ nn);
}
if (status)
goto out;
@@ -7557,14 +7907,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
if (lock->lk_reclaim)
- fl_flags |= FL_RECLAIM;
+ flags |= FL_RECLAIM;
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
if (nfsd4_has_session(cstate) ||
exportfs_lock_op_is_async(sb->s_export_op))
- fl_flags |= FL_SLEEP;
+ flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
@@ -7572,12 +7922,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
spin_unlock(&fp->fi_lock);
- fl_type = F_RDLCK;
+ type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
if (nfsd4_has_session(cstate) ||
exportfs_lock_op_is_async(sb->s_export_op))
- fl_flags |= FL_SLEEP;
+ flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
@@ -7585,7 +7935,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
spin_unlock(&fp->fi_lock);
- fl_type = F_WRLCK;
+ type = F_WRLCK;
break;
default:
status = nfserr_inval;
@@ -7605,7 +7955,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* on those filesystems:
*/
if (!exportfs_lock_op_is_async(sb->s_export_op))
- fl_flags &= ~FL_SLEEP;
+ flags &= ~FL_SLEEP;
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
if (!nbl) {
@@ -7615,11 +7965,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
file_lock = &nbl->nbl_lock;
- file_lock->fl_type = fl_type;
- file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
- file_lock->fl_pid = current->tgid;
- file_lock->fl_file = nf->nf_file;
- file_lock->fl_flags = fl_flags;
+ file_lock->c.flc_type = type;
+ file_lock->c.flc_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
+ file_lock->c.flc_pid = current->tgid;
+ file_lock->c.flc_file = nf->nf_file;
+ file_lock->c.flc_flags = flags;
file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = lock->lk_offset;
file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
@@ -7632,7 +7982,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
- if (fl_flags & FL_SLEEP) {
+ if (flags & FL_SLEEP) {
nbl->nbl_time = ktime_get_boottime_seconds();
spin_lock(&nn->blocked_locks_lock);
list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
@@ -7669,7 +8019,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
out:
if (nbl) {
/* dequeue it if we queued it before */
- if (fl_flags & FL_SLEEP) {
+ if (flags & FL_SLEEP) {
spin_lock(&nn->blocked_locks_lock);
if (!list_empty(&nbl->nbl_list) &&
!list_empty(&nbl->nbl_lru)) {
@@ -7737,9 +8087,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
if (err)
goto out;
- lock->fl_file = nf->nf_file;
+ lock->c.flc_file = nf->nf_file;
err = nfserrno(vfs_test_lock(nf->nf_file, lock));
- lock->fl_file = NULL;
+ lock->c.flc_file = NULL;
out:
inode_unlock(inode);
nfsd_file_put(nf);
@@ -7784,11 +8134,11 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
switch (lockt->lt_type) {
case NFS4_READ_LT:
case NFS4_READW_LT:
- file_lock->fl_type = F_RDLCK;
+ file_lock->c.flc_type = F_RDLCK;
break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
- file_lock->fl_type = F_WRLCK;
+ file_lock->c.flc_type = F_WRLCK;
break;
default:
dprintk("NFSD: nfs4_lockt: bad lock type!\n");
@@ -7798,9 +8148,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
lo = find_lockowner_str(cstate->clp, &lockt->lt_owner);
if (lo)
- file_lock->fl_owner = (fl_owner_t)lo;
- file_lock->fl_pid = current->tgid;
- file_lock->fl_flags = FL_POSIX;
+ file_lock->c.flc_owner = (fl_owner_t)lo;
+ file_lock->c.flc_pid = current->tgid;
+ file_lock->c.flc_flags = FL_POSIX;
file_lock->fl_start = lockt->lt_offset;
file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length);
@@ -7811,7 +8161,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- if (file_lock->fl_type != F_UNLCK) {
+ if (file_lock->c.flc_type != F_UNLCK) {
status = nfserr_denied;
nfs4_set_lock_denied(file_lock, &lockt->lt_denied);
}
@@ -7851,8 +8201,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;
status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
- &locku->lu_stateid, NFS4_LOCK_STID,
- &stp, nn);
+ &locku->lu_stateid, SC_TYPE_LOCK, 0,
+ &stp, nn);
if (status)
goto out;
nf = find_any_file(stp->st_stid.sc_file);
@@ -7867,11 +8217,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto put_file;
}
- file_lock->fl_type = F_UNLCK;
- file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
- file_lock->fl_pid = current->tgid;
- file_lock->fl_file = nf->nf_file;
- file_lock->fl_flags = FL_POSIX;
+ file_lock->c.flc_type = F_UNLCK;
+ file_lock->c.flc_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
+ file_lock->c.flc_pid = current->tgid;
+ file_lock->c.flc_file = nf->nf_file;
+ file_lock->c.flc_flags = FL_POSIX;
file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = locku->lu_offset;
@@ -7911,14 +8261,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{
struct file_lock *fl;
int status = false;
- struct nfsd_file *nf = find_any_file(fp);
+ struct nfsd_file *nf;
struct inode *inode;
struct file_lock_context *flctx;
+ spin_lock(&fp->fi_lock);
+ nf = find_any_file_locked(fp);
if (!nf) {
/* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1);
- return status;
+ goto out;
}
inode = file_inode(nf->nf_file);
@@ -7926,15 +8278,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
spin_lock(&flctx->flc_lock);
- list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
- if (fl->fl_owner == (fl_owner_t)lowner) {
+ for_each_file_lock(fl, &flctx->flc_posix) {
+ if (fl->c.flc_owner == (fl_owner_t)lowner) {
status = true;
break;
}
}
spin_unlock(&flctx->flc_lock);
}
- nfsd_file_put(nf);
+out:
+ spin_unlock(&fp->fi_lock);
return status;
}
@@ -7944,10 +8297,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
* @cstate: NFSv4 COMPOUND state
* @u: RELEASE_LOCKOWNER arguments
*
- * The lockowner's so_count is bumped when a lock record is added
- * or when copying a conflicting lock. The latter case is brief,
- * but can lead to fleeting false positives when looking for
- * locks-in-use.
+ * Check if theree are any locks still held and if not - free the lockowner
+ * and any lock state that is owned.
*
* Return values:
* %nfs_ok: lockowner released or not found
@@ -7983,17 +8334,20 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
spin_unlock(&clp->cl_lock);
return nfs_ok;
}
- if (atomic_read(&lo->lo_owner.so_count) != 2) {
- spin_unlock(&clp->cl_lock);
- nfs4_put_stateowner(&lo->lo_owner);
- return nfserr_locks_held;
+
+ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+ if (check_for_locks(stp->st_stid.sc_file, lo)) {
+ spin_unlock(&clp->cl_lock);
+ nfs4_put_stateowner(&lo->lo_owner);
+ return nfserr_locks_held;
+ }
}
unhash_lockowner_locked(lo);
while (!list_empty(&lo->lo_owner.so_stateids)) {
stp = list_first_entry(&lo->lo_owner.so_stateids,
struct nfs4_ol_stateid,
st_perstateowner);
- WARN_ON(!unhash_lock_stateid(stp));
+ unhash_lock_stateid(stp);
put_ol_stateid_locked(stp, &reaplist);
}
spin_unlock(&clp->cl_lock);
@@ -8286,7 +8640,7 @@ nfs4_state_shutdown_net(struct net *net)
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- WARN_ON(!unhash_delegation_locked(dp));
+ unhash_delegation_locked(dp, SC_STATUS_CLOSED);
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -8428,6 +8782,8 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
* @rqstp: RPC transaction context
* @inode: file to be checked for a conflict
+ * @modified: return true if file was modified
+ * @size: new size of file if modified is true
*
* This function is called when there is a conflict between a write
* delegation and a change/size GETATTR from another client. The server
@@ -8436,27 +8792,30 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* delegation before replying to the GETATTR. See RFC 8881 section
* 18.7.4.
*
- * The current implementation does not support CB_GETATTR yet. However
- * this can avoid recalling the delegation could be added in follow up
- * work.
- *
* Returns 0 if there is no conflict; otherwise an nfs_stat
* code is returned.
*/
__be32
-nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
+ bool *modified, u64 *size)
{
__be32 status;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file_lock_context *ctx;
- struct file_lock *fl;
+ struct file_lease *fl;
struct nfs4_delegation *dp;
+ struct iattr attrs;
+ struct nfs4_cb_fattr *ncf;
+ *modified = false;
ctx = locks_inode_context(inode);
if (!ctx)
return 0;
spin_lock(&ctx->flc_lock);
- list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
- if (fl->fl_flags == FL_LAYOUT)
+ for_each_file_lock(fl, &ctx->flc_lease) {
+ unsigned char type = fl->c.flc_type;
+
+ if (fl->c.flc_flags == FL_LAYOUT)
continue;
if (fl->fl_lmops != &nfsd_lease_mng_ops) {
/*
@@ -8464,23 +8823,49 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
* we are done; there isn't any write delegation
* on this inode
*/
- if (fl->fl_type == F_RDLCK)
+ if (type == F_RDLCK)
break;
goto break_lease;
}
- if (fl->fl_type == F_WRLCK) {
- dp = fl->fl_owner;
+ if (type == F_WRLCK) {
+ dp = fl->c.flc_owner;
if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
spin_unlock(&ctx->flc_lock);
return 0;
}
break_lease:
+ nfsd_stats_wdeleg_getattr_inc(nn);
+ dp = fl->c.flc_owner;
+ ncf = &dp->dl_cb_fattr;
+ nfs4_cb_getattr(&dp->dl_cb_fattr);
spin_unlock(&ctx->flc_lock);
- nfsd_stats_wdeleg_getattr_inc();
- status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
- if (status != nfserr_jukebox ||
- !nfsd_wait_for_delegreturn(rqstp, inode))
- return status;
+ wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY,
+ TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
+ if (ncf->ncf_cb_status) {
+ /* Recall delegation only if client didn't respond */
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ return status;
+ }
+ if (!ncf->ncf_file_modified &&
+ (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
+ ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
+ ncf->ncf_file_modified = true;
+ if (ncf->ncf_file_modified) {
+ /*
+ * Per section 10.4.3 of RFC 8881, the server would
+ * not update the file's metadata with the client's
+ * modified size
+ */
+ attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
+ attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
+ setattr_copy(&nop_mnt_idmap, inode, &attrs);
+ mark_inode_dirty(inode);
+ ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
+ *size = ncf->ncf_cur_fsize;
+ *modified = true;
+ }
return 0;
}
break;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c719c475a068..fac938f563ad 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3507,6 +3507,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
unsigned long mask[2];
} u;
unsigned long bit;
+ bool file_modified = false;
+ u64 size = 0;
WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
@@ -3533,7 +3535,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
}
args.size = 0;
if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
- status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
+ status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
+ &file_modified, &size);
if (status)
goto out;
}
@@ -3543,7 +3546,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
AT_STATX_SYNC_AS_STAT);
if (err)
goto out_nfserr;
- args.size = args.stat.size;
+ if (file_modified)
+ args.size = size;
+ else
+ args.size = args.stat.size;
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */
@@ -5386,16 +5392,11 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs,
/*
* If the cookie is larger than the maximum number we can fit
- * in either the buffer we just got back from vfs_listxattr, or,
- * XDR-encoded, in the return buffer, it's invalid.
+ * in the buffer we just got back from vfs_listxattr, it's invalid.
*/
if (cookie > (listxattrs->lsxa_len) / (XATTR_USER_PREFIX_LEN + 2))
return nfserr_badcookie;
- if (cookie > (listxattrs->lsxa_maxcount /
- (XDR_QUADLEN(XATTR_USER_PREFIX_LEN + 2) + 4)))
- return nfserr_badcookie;
-
*offsetp = (u32)cookie;
return 0;
}
@@ -5412,6 +5413,7 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
u64 cookie;
char *sp;
__be32 status, tmp;
+ __be64 wire_cookie;
__be32 *p;
u32 nuser;
@@ -5427,7 +5429,7 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
*/
cookie_offset = xdr->buf->len;
count_offset = cookie_offset + 8;
- p = xdr_reserve_space(xdr, 12);
+ p = xdr_reserve_space(xdr, XDR_UNIT * 3);
if (!p) {
status = nfserr_resource;
goto out;
@@ -5438,7 +5440,8 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
sp = listxattrs->lsxa_buf;
nuser = 0;
- xdrleft = listxattrs->lsxa_maxcount;
+ /* Bytes left is maxcount - 8 (cookie) - 4 (array count) */
+ xdrleft = listxattrs->lsxa_maxcount - XDR_UNIT * 3;
while (left > 0 && xdrleft > 0) {
slen = strlen(sp);
@@ -5451,7 +5454,8 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
slen -= XATTR_USER_PREFIX_LEN;
xdrlen = 4 + ((slen + 3) & ~3);
- if (xdrlen > xdrleft) {
+ /* Check if both entry and eof can fit in the XDR buffer */
+ if (xdrlen + XDR_UNIT > xdrleft) {
if (count == 0) {
/*
* Can't even fit the first attribute name.
@@ -5503,7 +5507,8 @@ wreof:
cookie = offset + count;
- write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &cookie, 8);
+ wire_cookie = cpu_to_be64(cookie);
+ write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &wire_cookie, 8);
tmp = cpu_to_be32(count);
write_bytes_to_xdr_buf(xdr->buf, count_offset, &tmp, 4);
out:
@@ -5727,27 +5732,24 @@ release:
rqstp->rq_next_page = xdr->page_ptr + 1;
}
-/*
- * Encode the reply stored in the stateowner reply cache
- *
- * XDR note: do not encode rp->rp_buflen: the buffer contains the
- * previously sent already encoded operation.
+/**
+ * nfsd4_encode_replay - encode a result stored in the stateowner reply cache
+ * @xdr: send buffer's XDR stream
+ * @op: operation being replayed
+ *
+ * @op->replay->rp_buf contains the previously-sent already-encoded result.
*/
-void
-nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
+void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
{
- __be32 *p;
struct nfs4_replay *rp = op->replay;
- p = xdr_reserve_space(xdr, 8 + rp->rp_buflen);
- if (!p) {
- WARN_ON_ONCE(1);
- return;
- }
- *p++ = cpu_to_be32(op->opnum);
- *p++ = rp->rp_status; /* already xdr'ed */
+ trace_nfsd_stateowner_replay(op->opnum, rp);
- p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen);
+ if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT)
+ return;
+ if (xdr_stream_encode_be32(xdr, rp->rp_status) != XDR_UNIT)
+ return;
+ xdr_stream_encode_opaque_fixed(xdr, rp->rp_buf, rp->rp_buflen);
}
void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 5c1a4a0aa605..ba9d326b3de6 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -166,8 +166,7 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
int nfsd_drc_slab_create(void)
{
- drc_slab = kmem_cache_create("nfsd_drc",
- sizeof(struct nfsd_cacherep), 0, 0, NULL);
+ drc_slab = KMEM_CACHE(nfsd_cacherep, 0);
return drc_slab ? 0: -ENOMEM;
}
@@ -176,27 +175,6 @@ void nfsd_drc_slab_free(void)
kmem_cache_destroy(drc_slab);
}
-/**
- * nfsd_net_reply_cache_init - per net namespace reply cache set-up
- * @nn: nfsd_net being initialized
- *
- * Returns zero on succes; otherwise a negative errno is returned.
- */
-int nfsd_net_reply_cache_init(struct nfsd_net *nn)
-{
- return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
-}
-
-/**
- * nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down
- * @nn: nfsd_net being freed
- *
- */
-void nfsd_net_reply_cache_destroy(struct nfsd_net *nn)
-{
- nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
-}
-
int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
@@ -501,7 +479,7 @@ out:
int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
unsigned int len, struct nfsd_cacherep **cacherep)
{
- struct nfsd_net *nn;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct nfsd_cacherep *rp, *found;
__wsum csum;
struct nfsd_drc_bucket *b;
@@ -510,7 +488,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
int rtn = RC_DOIT;
if (type == RC_NOCACHE) {
- nfsd_stats_rc_nocache_inc();
+ nfsd_stats_rc_nocache_inc(nn);
goto out;
}
@@ -520,7 +498,6 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
* Since the common case is a cache miss followed by an insert,
* preallocate an entry.
*/
- nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
rp = nfsd_cacherep_alloc(rqstp, csum, nn);
if (!rp)
goto out;
@@ -537,7 +514,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
nfsd_cacherep_dispose(&dispose);
- nfsd_stats_rc_misses_inc();
+ nfsd_stats_rc_misses_inc(nn);
atomic_inc(&nn->num_drc_entries);
nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
goto out;
@@ -545,7 +522,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
found_entry:
/* We found a matching entry which is either in progress or done. */
nfsd_reply_cache_free_locked(NULL, rp, nn);
- nfsd_stats_rc_hits_inc();
+ nfsd_stats_rc_hits_inc(nn);
rtn = RC_DROPIT;
rp = found;
@@ -687,15 +664,15 @@ int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
atomic_read(&nn->num_drc_entries));
seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
seq_printf(m, "mem usage: %lld\n",
- percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_DRC_MEM_USAGE]));
seq_printf(m, "cache hits: %lld\n",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]));
seq_printf(m, "cache misses: %lld\n",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]));
seq_printf(m, "not cached: %lld\n",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]));
seq_printf(m, "payload misses: %lld\n",
- percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]));
seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f206ca32e7f5..ecd18bffeebc 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
* 3. Is that directory the root of an exported file system?
*/
error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
+ nfsd4_revoke_states(netns(file), path.dentry->d_sb);
path_put(&path);
return error;
@@ -1671,14 +1672,17 @@ static __net_init int nfsd_net_init(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
- retval = nfsd_net_reply_cache_init(nn);
+ retval = nfsd_stat_counters_init(nn);
if (retval)
goto out_repcache_error;
+ memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats));
+ nn->nfsd_svcstats.program = &nfsd_program;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
nfsd4_init_leases_net(nn);
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
+ nfsd_proc_stat_init(net);
return 0;
@@ -1699,7 +1703,8 @@ static __net_exit void nfsd_net_exit(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- nfsd_net_reply_cache_destroy(nn);
+ nfsd_proc_stat_shutdown(net);
+ nfsd_stat_counters_destroy(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(nn);
@@ -1722,12 +1727,9 @@ static int __init init_nfsd(void)
retval = nfsd4_init_pnfs();
if (retval)
goto out_free_slabs;
- retval = nfsd_stat_init(); /* Statistics */
- if (retval)
- goto out_free_pnfs;
retval = nfsd_drc_slab_create();
if (retval)
- goto out_free_stat;
+ goto out_free_pnfs;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
@@ -1761,8 +1763,6 @@ out_free_exports:
out_free_lockd:
nfsd_lockd_shutdown();
nfsd_drc_slab_free();
-out_free_stat:
- nfsd_stat_shutdown();
out_free_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
@@ -1780,7 +1780,6 @@ static void __exit exit_nfsd(void)
nfsd_drc_slab_free();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
- nfsd_stat_shutdown();
nfsd_lockd_shutdown();
nfsd4_free_slabs();
nfsd4_exit_pnfs();
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 304e9728b929..16c5a05f340e 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -86,6 +86,7 @@ extern struct mutex nfsd_mutex;
extern spinlock_t nfsd_drc_lock;
extern unsigned long nfsd_drc_max_mem;
extern unsigned long nfsd_drc_mem_used;
+extern atomic_t nfsd_th_cnt; /* number of available threads */
extern const struct seq_operations nfs_exports_op;
@@ -274,6 +275,7 @@ void nfsd_lockd_shutdown(void);
#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
+#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
@@ -365,6 +367,7 @@ void nfsd_lockd_shutdown(void);
#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
#define NFS4_CLIENTS_PER_GB 1024
#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */
+#define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT
/*
* The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index dbfa0ac13564..40fecf7b224f 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -327,6 +327,7 @@ out:
__be32
fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_export *exp = NULL;
struct dentry *dentry;
__be32 error;
@@ -395,7 +396,7 @@ skip_pseudoflavor_check:
out:
trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
if (error == nfserr_stale)
- nfsd_stats_fh_stale_inc(exp);
+ nfsd_stats_fh_stale_inc(nn, exp);
return error;
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a7315928a760..36370b957b63 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -103,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
}
}
- resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0);
+ resp->status = nfsd_setattr(rqstp, fhp, &attrs, NULL);
if (resp->status != nfs_ok)
goto out;
@@ -390,8 +390,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
*/
attr->ia_valid &= ATTR_SIZE;
if (attr->ia_valid)
- resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0,
- (time64_t)0);
+ resp->status = nfsd_setattr(rqstp, newfhp, &attrs,
+ NULL);
}
out_unlock:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index a667802e08e7..c0d17b92b249 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -34,6 +34,7 @@
#define NFSDDBG_FACILITY NFSDDBG_SVC
+atomic_t nfsd_th_cnt = ATOMIC_INIT(0);
extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@@ -80,7 +81,6 @@ unsigned long nfsd_drc_max_mem;
unsigned long nfsd_drc_mem_used;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-static struct svc_stat nfsd_acl_svcstats;
static const struct svc_version *nfsd_acl_version[] = {
# if defined(CONFIG_NFSD_V2_ACL)
[2] = &nfsd_acl_version2,
@@ -99,15 +99,11 @@ static struct svc_program nfsd_acl_program = {
.pg_vers = nfsd_acl_version,
.pg_name = "nfsacl",
.pg_class = "nfsd",
- .pg_stats = &nfsd_acl_svcstats,
.pg_authenticate = &svc_set_client,
.pg_init_request = nfsd_acl_init_request,
.pg_rpcbind_set = nfsd_acl_rpcbind_set,
};
-static struct svc_stat nfsd_acl_svcstats = {
- .program = &nfsd_acl_program,
-};
#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
static const struct svc_version *nfsd_version[] = {
@@ -132,7 +128,6 @@ struct svc_program nfsd_program = {
.pg_vers = nfsd_version, /* version table */
.pg_name = "nfsd", /* program name */
.pg_class = "nfsd", /* authentication class */
- .pg_stats = &nfsd_svcstats, /* version table */
.pg_authenticate = &svc_set_client, /* export authentication */
.pg_init_request = nfsd_init_request,
.pg_rpcbind_set = nfsd_rpcbind_set,
@@ -666,7 +661,8 @@ int nfsd_create_serv(struct net *net)
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions(nn);
- serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd);
+ serv = svc_create_pooled(&nfsd_program, &nn->nfsd_svcstats,
+ nfsd_max_blksize, nfsd);
if (serv == NULL)
return -ENOMEM;
@@ -929,7 +925,7 @@ nfsd(void *vrqstp)
current->fs->umask = 0;
- atomic_inc(&nfsdstats.th_cnt);
+ atomic_inc(&nfsd_th_cnt);
set_freezable();
@@ -941,9 +937,11 @@ nfsd(void *vrqstp)
rqstp->rq_server->sv_maxconn = nn->max_connections;
svc_recv(rqstp);
+
+ nfsd_file_net_dispose(nn);
}
- atomic_dec(&nfsdstats.th_cnt);
+ atomic_dec(&nfsd_th_cnt);
out:
/* Release the thread */
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index de1e0dfed06a..925817f66917 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -37,7 +37,8 @@ struct nfsd4_layout_ops {
__be32 (*proc_layoutcommit)(struct inode *inode,
struct nfsd4_layoutcommit *lcp);
- void (*fence_client)(struct nfs4_layout_stateid *ls);
+ void (*fence_client)(struct nfs4_layout_stateid *ls,
+ struct nfsd_file *file);
};
extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
@@ -72,11 +73,13 @@ void nfsd4_setup_layout_type(struct svc_export *exp);
void nfsd4_return_all_client_layouts(struct nfs4_client *);
void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
struct nfs4_file *fp);
+void nfsd4_close_layout(struct nfs4_layout_stateid *ls);
int nfsd4_init_pnfs(void);
void nfsd4_exit_pnfs(void);
#else
struct nfs4_client;
struct nfs4_file;
+struct nfs4_layout_stateid;
static inline void nfsd4_setup_layout_type(struct svc_export *exp)
{
@@ -89,6 +92,9 @@ static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
struct nfs4_file *fp)
{
}
+static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
+{
+}
static inline void nfsd4_exit_pnfs(void)
{
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 41bdc913fa71..01c6f3445646 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -68,7 +68,7 @@ struct nfsd4_callback {
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
const struct nfsd4_callback_ops *cb_ops;
- struct work_struct cb_work;
+ struct delayed_work cb_work;
int cb_seq_status;
int cb_status;
bool cb_need_restart;
@@ -88,17 +88,34 @@ struct nfsd4_callback_ops {
*/
struct nfs4_stid {
refcount_t sc_count;
-#define NFS4_OPEN_STID 1
-#define NFS4_LOCK_STID 2
-#define NFS4_DELEG_STID 4
-/* For an open stateid kept around *only* to process close replays: */
-#define NFS4_CLOSED_STID 8
+
+ /* A new stateid is added to the cl_stateids idr early before it
+ * is fully initialised. Its sc_type is then zero. After
+ * initialisation the sc_type it set under cl_lock, and then
+ * never changes.
+ */
+#define SC_TYPE_OPEN BIT(0)
+#define SC_TYPE_LOCK BIT(1)
+#define SC_TYPE_DELEG BIT(2)
+#define SC_TYPE_LAYOUT BIT(3)
+ unsigned short sc_type;
+
+/* state_lock protects sc_status for delegation stateids.
+ * ->cl_lock protects sc_status for open and lock stateids.
+ * ->st_mutex also protect sc_status for open stateids.
+ * ->ls_lock protects sc_status for layout stateids.
+ */
+/*
+ * For an open stateid kept around *only* to process close replays.
+ * For deleg stateid, kept in idr until last reference is dropped.
+ */
+#define SC_STATUS_CLOSED BIT(0)
/* For a deleg stateid kept around only to process free_stateid's: */
-#define NFS4_REVOKED_DELEG_STID 16
-#define NFS4_CLOSED_DELEG_STID 32
-#define NFS4_LAYOUT_STID 64
+#define SC_STATUS_REVOKED BIT(1)
+#define SC_STATUS_ADMIN_REVOKED BIT(2)
+ unsigned short sc_status;
+
struct list_head sc_cp_list;
- unsigned char sc_type;
stateid_t sc_stateid;
spinlock_t sc_lock;
struct nfs4_client *sc_client;
@@ -117,6 +134,24 @@ struct nfs4_cpntf_state {
time64_t cpntf_time; /* last time stateid used */
};
+struct nfs4_cb_fattr {
+ struct nfsd4_callback ncf_getattr;
+ u32 ncf_cb_status;
+ u32 ncf_cb_bmap[1];
+
+ /* from CB_GETATTR reply */
+ u64 ncf_cb_change;
+ u64 ncf_cb_fsize;
+
+ unsigned long ncf_cb_flags;
+ bool ncf_file_modified;
+ u64 ncf_initial_cinfo;
+ u64 ncf_cur_fsize;
+};
+
+/* bits for ncf_cb_flags */
+#define CB_GETATTR_BUSY 0
+
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@@ -150,6 +185,9 @@ struct nfs4_delegation {
int dl_retries;
struct nfsd4_callback dl_recall;
bool dl_recalled;
+
+ /* for CB_GETATTR */
+ struct nfs4_cb_fattr dl_cb_fattr;
};
#define cb_to_delegation(cb) \
@@ -317,8 +355,9 @@ enum {
* 0. If they are not renewed within a lease period, they become eligible for
* destruction by the laundromat.
*
- * These objects can also be destroyed prematurely by the fault injection code,
- * or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates.
+ * These objects can also be destroyed if the client sends certain forms of
+ * SETCLIENTID or EXCHANGE_ID operations.
+ *
* Care is taken *not* to do this however when the objects have an elevated
* refcount.
*
@@ -326,7 +365,7 @@ enum {
*
* o Each nfs4_clients is also hashed by name (the opaque quantity initially
* sent by the client to identify itself).
- *
+ *
* o cl_perclient list is used to ensure no dangling stateowner references
* when we expire the nfs4_client
*/
@@ -351,6 +390,7 @@ struct nfs4_client {
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion;
+ atomic_t cl_admin_revoked; /* count of admin-revoked states */
/* NFSv4.1 client implementation id: */
struct xdr_netobj cl_nii_domain;
struct xdr_netobj cl_nii_name;
@@ -640,6 +680,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
NFSPROC4_CLNT_CB_RECALL_ANY,
+ NFSPROC4_CLNT_CB_GETATTR,
};
/* Returns true iff a is later than b: */
@@ -672,15 +713,15 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
stateid_t *stateid, int flags, struct nfsd_file **filp,
struct nfs4_stid **cstid);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
- stateid_t *stateid, unsigned char typemask,
- struct nfs4_stid **s, struct nfsd_net *nn);
+ stateid_t *stateid, unsigned short typemask,
+ unsigned short statusmask,
+ struct nfs4_stid **s, struct nfsd_net *nn);
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
void (*sc_free)(struct nfs4_stid *));
int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
void nfs4_free_copy_state(struct nfsd4_copy *copy);
struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
struct nfs4_stid *p_stid);
-void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
@@ -714,6 +755,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
}
struct nfsd_file *find_any_file(struct nfs4_file *f);
+#ifdef CONFIG_NFSD_V4
+void nfsd4_revoke_states(struct net *net, struct super_block *sb);
+#else
+static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+}
+#endif
+
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
@@ -732,5 +781,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
}
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
- struct inode *inode);
+ struct inode *inode, bool *file_modified, u64 *size);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 12d79f5d4eb1..be52fb1e928e 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -27,25 +27,22 @@
#include "nfsd.h"
-struct nfsd_stats nfsdstats;
-struct svc_stat nfsd_svcstats = {
- .program = &nfsd_program,
-};
-
static int nfsd_show(struct seq_file *seq, void *v)
{
+ struct net *net = pde_data(file_inode(seq->file));
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int i;
seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]),
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]),
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]),
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]),
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]),
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_FH_STALE]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_READ]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE]));
/* thread usage: */
- seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt));
+ seq_printf(seq, "th %u 0", atomic_read(&nfsd_th_cnt));
/* deprecated thread usage histogram stats */
for (i = 0; i < 10; i++)
@@ -55,7 +52,7 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n");
/* show my rpc info */
- svc_seq_show(seq, &nfsd_svcstats);
+ svc_seq_show(seq, &nn->nfsd_svcstats);
#ifdef CONFIG_NFSD_V4
/* Show count for individual nfsv4 operations */
@@ -63,10 +60,10 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_printf(seq, "proc4ops %u", LAST_NFS4_OP + 1);
for (i = 0; i <= LAST_NFS4_OP; i++) {
seq_printf(seq, " %lld",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)]));
}
seq_printf(seq, "\nwdeleg_getattr %lld",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_WDELEG_GETATTR]));
seq_putc(seq, '\n');
#endif
@@ -108,31 +105,24 @@ void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
percpu_counter_destroy(&counters[i]);
}
-static int nfsd_stat_counters_init(void)
+int nfsd_stat_counters_init(struct nfsd_net *nn)
{
- return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
+ return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM);
}
-static void nfsd_stat_counters_destroy(void)
+void nfsd_stat_counters_destroy(struct nfsd_net *nn)
{
- nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
+ nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM);
}
-int nfsd_stat_init(void)
+void nfsd_proc_stat_init(struct net *net)
{
- int err;
-
- err = nfsd_stat_counters_init();
- if (err)
- return err;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
-
- return 0;
+ svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops);
}
-void nfsd_stat_shutdown(void)
+void nfsd_proc_stat_shutdown(struct net *net)
{
- nfsd_stat_counters_destroy();
- svc_proc_unregister(&init_net, "nfsd");
+ svc_proc_unregister(net, "nfsd");
}
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 14f50c660b61..d2753e975dfd 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -10,94 +10,72 @@
#include <uapi/linux/nfsd/stats.h>
#include <linux/percpu_counter.h>
-
-enum {
- NFSD_STATS_RC_HITS, /* repcache hits */
- NFSD_STATS_RC_MISSES, /* repcache misses */
- NFSD_STATS_RC_NOCACHE, /* uncached reqs */
- NFSD_STATS_FH_STALE, /* FH stale error */
- NFSD_STATS_IO_READ, /* bytes returned to read requests */
- NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
-#ifdef CONFIG_NFSD_V4
- NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
- NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
-#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
- NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
-#endif
- NFSD_STATS_COUNTERS_NUM
-};
-
-struct nfsd_stats {
- struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
-
- atomic_t th_cnt; /* number of available threads */
-};
-
-extern struct nfsd_stats nfsdstats;
-
-extern struct svc_stat nfsd_svcstats;
-
int nfsd_percpu_counters_init(struct percpu_counter *counters, int num);
void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num);
void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num);
-int nfsd_stat_init(void);
-void nfsd_stat_shutdown(void);
+int nfsd_stat_counters_init(struct nfsd_net *nn);
+void nfsd_stat_counters_destroy(struct nfsd_net *nn);
+void nfsd_proc_stat_init(struct net *net);
+void nfsd_proc_stat_shutdown(struct net *net);
-static inline void nfsd_stats_rc_hits_inc(void)
+static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn)
{
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_HITS]);
}
-static inline void nfsd_stats_rc_misses_inc(void)
+static inline void nfsd_stats_rc_misses_inc(struct nfsd_net *nn)
{
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_MISSES]);
}
-static inline void nfsd_stats_rc_nocache_inc(void)
+static inline void nfsd_stats_rc_nocache_inc(struct nfsd_net *nn)
{
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_NOCACHE]);
}
-static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp)
+static inline void nfsd_stats_fh_stale_inc(struct nfsd_net *nn,
+ struct svc_export *exp)
{
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_FH_STALE]);
if (exp && exp->ex_stats)
percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]);
}
-static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount)
+static inline void nfsd_stats_io_read_add(struct nfsd_net *nn,
+ struct svc_export *exp, s64 amount)
{
- percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount);
+ percpu_counter_add(&nn->counter[NFSD_STATS_IO_READ], amount);
if (exp && exp->ex_stats)
percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount);
}
-static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount)
+static inline void nfsd_stats_io_write_add(struct nfsd_net *nn,
+ struct svc_export *exp, s64 amount)
{
- percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount);
+ percpu_counter_add(&nn->counter[NFSD_STATS_IO_WRITE], amount);
if (exp && exp->ex_stats)
percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount);
}
static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn)
{
- percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]);
}
static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount)
{
- percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
+ percpu_counter_add(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
}
static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
{
- percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
+ percpu_counter_sub(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
}
#ifdef CONFIG_NFSD_V4
-static inline void nfsd_stats_wdeleg_getattr_inc(void)
+static inline void nfsd_stats_wdeleg_getattr_inc(struct nfsd_net *nn)
{
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_WDELEG_GETATTR]);
}
#endif
#endif /* _NFSD_STATS_H */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index d1e8cf079b0f..e545e92c4408 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,8 +9,10 @@
#define _NFSD_TRACE_H
#include <linux/tracepoint.h>
+#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
#include <trace/misc/nfs.h>
+#include <trace/misc/sunrpc.h>
#include "export.h"
#include "nfsfh.h"
@@ -641,23 +643,18 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
DEFINE_STATESEQID_EVENT(preprocess);
DEFINE_STATESEQID_EVENT(open_confirm);
-TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
-TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
-TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
-TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
-
#define show_stid_type(x) \
__print_flags(x, "|", \
- { NFS4_OPEN_STID, "OPEN" }, \
- { NFS4_LOCK_STID, "LOCK" }, \
- { NFS4_DELEG_STID, "DELEG" }, \
- { NFS4_CLOSED_STID, "CLOSED" }, \
- { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
- { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
- { NFS4_LAYOUT_STID, "LAYOUT" })
+ { SC_TYPE_OPEN, "OPEN" }, \
+ { SC_TYPE_LOCK, "LOCK" }, \
+ { SC_TYPE_DELEG, "DELEG" }, \
+ { SC_TYPE_LAYOUT, "LAYOUT" })
+
+#define show_stid_status(x) \
+ __print_flags(x, "|", \
+ { SC_STATUS_CLOSED, "CLOSED" }, \
+ { SC_STATUS_REVOKED, "REVOKED" }, \
+ { SC_STATUS_ADMIN_REVOKED, "ADMIN_REVOKED" })
DECLARE_EVENT_CLASS(nfsd_stid_class,
TP_PROTO(
@@ -666,6 +663,7 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
TP_ARGS(stid),
TP_STRUCT__entry(
__field(unsigned long, sc_type)
+ __field(unsigned long, sc_status)
__field(int, sc_count)
__field(u32, cl_boot)
__field(u32, cl_id)
@@ -676,16 +674,18 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
const stateid_t *stp = &stid->sc_stateid;
__entry->sc_type = stid->sc_type;
+ __entry->sc_status = stid->sc_status;
__entry->sc_count = refcount_read(&stid->sc_count);
__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
__entry->cl_id = stp->si_opaque.so_clid.cl_id;
__entry->si_id = stp->si_opaque.so_id;
__entry->si_generation = stp->si_generation;
),
- TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
+ TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s state=%s",
__entry->cl_boot, __entry->cl_id,
__entry->si_id, __entry->si_generation,
- __entry->sc_count, show_stid_type(__entry->sc_type)
+ __entry->sc_count, show_stid_type(__entry->sc_type),
+ show_stid_status(__entry->sc_status)
)
);
@@ -696,6 +696,59 @@ DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \
DEFINE_STID_EVENT(revoke);
+TRACE_EVENT(nfsd_stateowner_replay,
+ TP_PROTO(
+ u32 opnum,
+ const struct nfs4_replay *rp
+ ),
+ TP_ARGS(opnum, rp),
+ TP_STRUCT__entry(
+ __field(unsigned long, status)
+ __field(u32, opnum)
+ ),
+ TP_fast_assign(
+ __entry->status = be32_to_cpu(rp->rp_status);
+ __entry->opnum = opnum;
+ ),
+ TP_printk("opnum=%u status=%lu",
+ __entry->opnum, __entry->status)
+);
+
+TRACE_EVENT_CONDITION(nfsd_seq4_status,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct nfsd4_sequence *sequence
+ ),
+ TP_ARGS(rqstp, sequence),
+ TP_CONDITION(sequence->status_flags),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(u32, xid)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, seqno)
+ __field(u32, reserved)
+ __field(unsigned long, status_flags)
+ ),
+ TP_fast_assign(
+ const struct nfsd4_sessionid *sid =
+ (struct nfsd4_sessionid *)&sequence->sessionid;
+
+ __entry->netns_ino = SVC_NET(rqstp)->ns.inum;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->cl_boot = sid->clientid.cl_boot;
+ __entry->cl_id = sid->clientid.cl_id;
+ __entry->seqno = sid->sequence;
+ __entry->reserved = sid->reserved;
+ __entry->status_flags = sequence->status_flags;
+ ),
+ TP_printk("xid=0x%08x sessionid=%08x:%08x:%08x:%08x status_flags=%s",
+ __entry->xid, __entry->cl_boot, __entry->cl_id,
+ __entry->seqno, __entry->reserved,
+ show_nfs4_seq4_status(__entry->status_flags)
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_clientid_class,
TP_PROTO(const clientid_t *clid),
TP_ARGS(clid),
@@ -1334,7 +1387,8 @@ DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \
TP_PROTO(const struct nfs4_client *clp), \
TP_ARGS(clp))
-DEFINE_NFSD_CB_EVENT(state);
+DEFINE_NFSD_CB_EVENT(start);
+DEFINE_NFSD_CB_EVENT(new_state);
DEFINE_NFSD_CB_EVENT(probe);
DEFINE_NFSD_CB_EVENT(lost);
DEFINE_NFSD_CB_EVENT(shutdown);
@@ -1405,6 +1459,128 @@ TRACE_EVENT(nfsd_cb_setup_err,
__entry->error)
);
+DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfsd4_callback *cb
+ ),
+ TP_ARGS(clp, cb),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(const void *, cb)
+ __field(bool, need_restart)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __entry->cb = cb;
+ __entry->need_restart = cb->cb_need_restart;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x cb=%p%s",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->cb, __entry->need_restart ?
+ " (need restart)" : " (first try)"
+ )
+);
+
+#define DEFINE_NFSD_CB_LIFETIME_EVENT(name) \
+DEFINE_EVENT(nfsd_cb_lifetime_class, nfsd_cb_##name, \
+ TP_PROTO( \
+ const struct nfs4_client *clp, \
+ const struct nfsd4_callback *cb \
+ ), \
+ TP_ARGS(clp, cb))
+
+DEFINE_NFSD_CB_LIFETIME_EVENT(queue);
+DEFINE_NFSD_CB_LIFETIME_EVENT(destroy);
+DEFINE_NFSD_CB_LIFETIME_EVENT(restart);
+DEFINE_NFSD_CB_LIFETIME_EVENT(bc_update);
+DEFINE_NFSD_CB_LIFETIME_EVENT(bc_shutdown);
+
+TRACE_EVENT(nfsd_cb_seq_status,
+ TP_PROTO(
+ const struct rpc_task *task,
+ const struct nfsd4_callback *cb
+ ),
+ TP_ARGS(task, cb),
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, seqno)
+ __field(u32, reserved)
+ __field(int, tk_status)
+ __field(int, seq_status)
+ ),
+ TP_fast_assign(
+ const struct nfs4_client *clp = cb->cb_clp;
+ const struct nfsd4_session *session = clp->cl_cb_session;
+ const struct nfsd4_sessionid *sid =
+ (struct nfsd4_sessionid *)&session->se_sessionid;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client ?
+ task->tk_client->cl_clid : -1;
+ __entry->cl_boot = sid->clientid.cl_boot;
+ __entry->cl_id = sid->clientid.cl_id;
+ __entry->seqno = sid->sequence;
+ __entry->reserved = sid->reserved;
+ __entry->tk_status = task->tk_status;
+ __entry->seq_status = cb->cb_seq_status;
+ ),
+ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
+ " sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d\n",
+ __entry->task_id, __entry->client_id,
+ __entry->cl_boot, __entry->cl_id,
+ __entry->seqno, __entry->reserved,
+ __entry->tk_status, __entry->seq_status
+ )
+);
+
+TRACE_EVENT(nfsd_cb_free_slot,
+ TP_PROTO(
+ const struct rpc_task *task,
+ const struct nfsd4_callback *cb
+ ),
+ TP_ARGS(task, cb),
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, seqno)
+ __field(u32, reserved)
+ __field(u32, slot_seqno)
+ ),
+ TP_fast_assign(
+ const struct nfs4_client *clp = cb->cb_clp;
+ const struct nfsd4_session *session = clp->cl_cb_session;
+ const struct nfsd4_sessionid *sid =
+ (struct nfsd4_sessionid *)&session->se_sessionid;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client ?
+ task->tk_client->cl_clid : -1;
+ __entry->cl_boot = sid->clientid.cl_boot;
+ __entry->cl_id = sid->clientid.cl_id;
+ __entry->seqno = sid->sequence;
+ __entry->reserved = sid->reserved;
+ __entry->slot_seqno = session->se_cb_seq_nr;
+ ),
+ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
+ " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u\n",
+ __entry->task_id, __entry->client_id,
+ __entry->cl_boot, __entry->cl_id,
+ __entry->seqno, __entry->reserved,
+ __entry->slot_seqno
+ )
+);
+
TRACE_EVENT_CONDITION(nfsd_cb_recall,
TP_PROTO(
const struct nfs4_stid *stid
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b7c7a9273ea0..6a9464262fae 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -25,7 +25,6 @@
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
#include <linux/jhash.h>
-#include <linux/ima.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -476,7 +475,6 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
* @rqstp: controlling RPC transaction
* @fhp: filehandle of target
* @attr: attributes to set
- * @check_guard: set to 1 if guardtime is a valid timestamp
* @guardtime: do not act if ctime.tv_sec does not match this timestamp
*
* This call may adjust the contents of @attr (in particular, this
@@ -488,8 +486,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
*/
__be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct nfsd_attrs *attr,
- int check_guard, time64_t guardtime)
+ struct nfsd_attrs *attr, const struct timespec64 *guardtime)
{
struct dentry *dentry;
struct inode *inode;
@@ -497,7 +494,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int accmode = NFSD_MAY_SATTR;
umode_t ftype = 0;
__be32 err;
- int host_err;
+ int host_err = 0;
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
int retries;
@@ -538,9 +535,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_sanitize_attrs(inode, iap);
- if (check_guard && guardtime != inode_get_ctime_sec(inode))
- return nfserr_notsync;
-
/*
* The size case is special, it changes the file in addition to the
* attributes, and file systems don't expect it to be mixed with
@@ -555,6 +549,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
inode_lock(inode);
+ err = fh_fill_pre_attrs(fhp);
+ if (err)
+ goto out_unlock;
+
+ if (guardtime) {
+ struct timespec64 ctime = inode_get_ctime(inode);
+ if ((u32)guardtime->tv_sec != (u32)ctime.tv_sec ||
+ guardtime->tv_nsec != ctime.tv_nsec) {
+ err = nfserr_notsync;
+ goto out_fill_attrs;
+ }
+ }
+
for (retries = 1;;) {
struct iattr attrs;
@@ -582,13 +589,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
dentry, ACL_TYPE_DEFAULT,
attr->na_dpacl);
+out_fill_attrs:
+ /*
+ * RFC 1813 Section 3.3.2 does not mandate that an NFS server
+ * returns wcc_data for SETATTR. Some client implementations
+ * depend on receiving wcc_data, however, to sort out partial
+ * updates (eg., the client requested that size and mode be
+ * modified, but the server changed only the file mode).
+ */
+ fh_fill_post_attrs(fhp);
+out_unlock:
inode_unlock(inode);
if (size_change)
put_write_access(inode);
out:
if (!host_err)
host_err = commit_metadata(fhp);
- return nfserrno(host_err);
+ return err != 0 ? err : nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
@@ -877,7 +894,7 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
goto out;
}
- host_err = ima_file_check(file, may_flags);
+ host_err = security_file_post_open(file, may_flags);
if (host_err) {
fput(file);
goto out;
@@ -1002,7 +1019,9 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned long *count, u32 *eof, ssize_t host_err)
{
if (host_err >= 0) {
- nfsd_stats_io_read_add(fhp->fh_export, host_err);
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+ nfsd_stats_io_read_add(nn, fhp->fh_export, host_err);
*eof = nfsd_eof_on_read(file, offset, host_err, *count);
*count = host_err;
fsnotify_access(file);
@@ -1185,7 +1204,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
goto out_nfserr;
}
*cnt = host_err;
- nfsd_stats_io_write_add(exp, *cnt);
+ nfsd_stats_io_write_add(nn, exp, *cnt);
fsnotify_modify(file);
host_err = filemap_check_wb_err(file->f_mapping, since);
if (host_err < 0)
@@ -1404,7 +1423,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
* if the attributes have not changed.
*/
if (iap->ia_valid)
- status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0);
+ status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
else
status = nfserrno(commit_metadata(resfhp));
@@ -1906,10 +1925,10 @@ out_unlock:
fh_drop_write(ffhp);
/*
- * If the target dentry has cached open files, then we need to try to
- * close them prior to doing the rename. Flushing delayed fput
- * shouldn't be done with locks held however, so we delay it until this
- * point and then reattempt the whole shebang.
+ * If the target dentry has cached open files, then we need to
+ * try to close them prior to doing the rename. Final fput
+ * shouldn't be done with locks held however, so we delay it
+ * until this point and then reattempt the whole shebang.
*/
if (close_cached) {
close_cached = false;
@@ -2177,11 +2196,43 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */
out_close:
- fput(file);
+ nfsd_filp_close(file);
out:
return err;
}
+/**
+ * nfsd_filp_close: close a file synchronously
+ * @fp: the file to close
+ *
+ * nfsd_filp_close() is similar in behaviour to filp_close().
+ * The difference is that if this is the final close on the
+ * file, the that finalisation happens immediately, rather then
+ * being handed over to a work_queue, as it the case for
+ * filp_close().
+ * When a user-space process closes a file (even when using
+ * filp_close() the finalisation happens before returning to
+ * userspace, so it is effectively synchronous. When a kernel thread
+ * uses file_close(), on the other hand, the handling is completely
+ * asynchronous. This means that any cost imposed by that finalisation
+ * is not imposed on the nfsd thread, and nfsd could potentually
+ * close files more quickly than the work queue finalises the close,
+ * which would lead to unbounded growth in the queue.
+ *
+ * In some contexts is it not safe to synchronously wait for
+ * close finalisation (see comment for __fput_sync()), but nfsd
+ * does not match those contexts. In partcilarly it does not, at the
+ * time that this function is called, hold and locks and no finalisation
+ * of any file, socket, or device driver would have any cause to wait
+ * for nfsd to make progress.
+ */
+void nfsd_filp_close(struct file *fp)
+{
+ get_file(fp);
+ filp_close(fp, NULL);
+ __fput_sync(fp);
+}
+
/*
* Get file system stats
* N.B. After this call fhp needs an fh_put
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 702fbc4483bf..c60fdb6200fd 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -69,7 +69,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, unsigned int,
struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
- struct nfsd_attrs *, int, time64_t);
+ struct nfsd_attrs *, const struct timespec64 *);
int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
@@ -148,6 +148,8 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
+void nfsd_filp_close(struct file *fp);
+
static inline int fh_want_write(struct svc_fh *fh)
{
int ret;
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 03fe4e21306c..522067b7fd75 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -14,7 +14,7 @@ struct nfsd3_sattrargs {
struct svc_fh fh;
struct iattr attrs;
int check_guard;
- time64_t guardtime;
+ struct timespec64 guardtime;
};
struct nfsd3_diropargs {
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 0d39af1b00a0..e8b00309c449 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -54,3 +54,21 @@
#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+
+/*
+ * 1: CB_GETATTR opcode (32-bit)
+ * N: file_handle
+ * 1: number of entry in attribute array (32-bit)
+ * 1: entry 0 in attribute array (32-bit)
+ */
+#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + enc_nfs4_fh_sz + 1 + 1)
+/*
+ * 4: fattr_bitmap_maxsz
+ * 1: attribute array len
+ * 2: change attr (64-bit)
+ * 2: size (64-bit)
+ */
+#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz)
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index bec33b89a075..0e3fc5ba33c7 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -107,7 +107,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);
mapped:
- folio_wait_stable(folio);
+ /*
+ * Since checksumming including data blocks is performed to determine
+ * the validity of the log to be written and used for recovery, it is
+ * necessary to wait for writeback to finish here, regardless of the
+ * stable write requirement of the backing device.
+ */
+ folio_wait_writeback(folio);
out:
sb_end_pagefault(inode->i_sb);
return vmf_fs_error(ret);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 0955b657938f..a9b8d77c8c1d 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
- struct page *page)
+ loff_t pos, struct page *page)
{
struct buffer_head *bh_org;
+ size_t from = pos & ~PAGE_MASK;
void *kaddr;
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
@@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
return -EIO;
kaddr = kmap_atomic(page);
- memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
+ memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
kunmap_atomic(kaddr);
brelse(bh_org);
return 0;
@@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
goto failed_inode;
}
- err = nilfs_recovery_copy_block(nilfs, rb, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
if (unlikely(err))
goto failed_page;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 2590a0860eab..2bfb08052d39 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1703,7 +1703,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- set_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_folio != bd_folio) {
folio_lock(bd_folio);
@@ -1714,6 +1713,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
}
break;
}
+ set_buffer_async_write(bh);
if (bh->b_folio != fs_folio) {
nilfs_begin_folio_io(fs_folio);
fs_folio = bh->b_folio;
@@ -1800,7 +1800,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
clear_buffer_uptodate(bh);
if (bh->b_folio != bd_folio) {
@@ -1809,6 +1808,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
}
break;
}
+ clear_buffer_async_write(bh);
if (bh->b_folio != fs_folio) {
nilfs_end_folio_io(fs_folio, err);
fs_folio = bh->b_folio;
@@ -1896,8 +1896,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Redirected));
- set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh == segbuf->sb_super_root) {
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
if (bh->b_folio != bd_folio) {
folio_end_writeback(bd_folio);
bd_folio = bh->b_folio;
@@ -1905,6 +1906,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
update_sr = true;
break;
}
+ set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh->b_folio != fs_folio) {
nilfs_end_folio_io(fs_folio, 0);
fs_folio = bh->b_folio;
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 34e1e3e36733..7aaafb5cb9fc 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -27,26 +27,17 @@ static const struct file_operations ns_file_operations = {
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
{
struct inode *inode = d_inode(dentry);
- const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
+ struct ns_common *ns = inode->i_private;
+ const struct proc_ns_operations *ns_ops = ns->ops;
return dynamic_dname(buffer, buflen, "%s:[%lu]",
ns_ops->name, inode->i_ino);
}
-static void ns_prune_dentry(struct dentry *dentry)
-{
- struct inode *inode = d_inode(dentry);
- if (inode) {
- struct ns_common *ns = inode->i_private;
- atomic_long_set(&ns->stashed, 0);
- }
-}
-
-const struct dentry_operations ns_dentry_operations =
-{
- .d_prune = ns_prune_dentry,
+const struct dentry_operations ns_dentry_operations = {
.d_delete = always_delete_dentry,
.d_dname = ns_dname,
+ .d_prune = stashed_dentry_prune,
};
static void nsfs_evict(struct inode *inode)
@@ -56,67 +47,16 @@ static void nsfs_evict(struct inode *inode)
ns->ops->put(ns);
}
-static int __ns_get_path(struct path *path, struct ns_common *ns)
-{
- struct vfsmount *mnt = nsfs_mnt;
- struct dentry *dentry;
- struct inode *inode;
- unsigned long d;
-
- rcu_read_lock();
- d = atomic_long_read(&ns->stashed);
- if (!d)
- goto slow;
- dentry = (struct dentry *)d;
- if (!lockref_get_not_dead(&dentry->d_lockref))
- goto slow;
- rcu_read_unlock();
- ns->ops->put(ns);
-got_it:
- path->mnt = mntget(mnt);
- path->dentry = dentry;
- return 0;
-slow:
- rcu_read_unlock();
- inode = new_inode_pseudo(mnt->mnt_sb);
- if (!inode) {
- ns->ops->put(ns);
- return -ENOMEM;
- }
- inode->i_ino = ns->inum;
- simple_inode_init_ts(inode);
- inode->i_flags |= S_IMMUTABLE;
- inode->i_mode = S_IFREG | S_IRUGO;
- inode->i_fop = &ns_file_operations;
- inode->i_private = ns;
-
- dentry = d_make_root(inode); /* not the normal use, but... */
- if (!dentry)
- return -ENOMEM;
- dentry->d_fsdata = (void *)ns->ops;
- d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
- if (d) {
- d_delete(dentry); /* make sure ->d_prune() does nothing */
- dput(dentry);
- cpu_relax();
- return -EAGAIN;
- }
- goto got_it;
-}
-
int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb,
void *private_data)
{
- int ret;
+ struct ns_common *ns;
- do {
- struct ns_common *ns = ns_get_cb(private_data);
- if (!ns)
- return -ENOENT;
- ret = __ns_get_path(path, ns);
- } while (ret == -EAGAIN);
+ ns = ns_get_cb(private_data);
+ if (!ns)
+ return -ENOENT;
- return ret;
+ return path_from_stashed(&ns->stashed, ns->inum, nsfs_mnt, ns, path);
}
struct ns_get_path_task_args {
@@ -146,6 +86,7 @@ int open_related_ns(struct ns_common *ns,
struct ns_common *(*get_ns)(struct ns_common *ns))
{
struct path path = {};
+ struct ns_common *relative;
struct file *f;
int err;
int fd;
@@ -154,19 +95,15 @@ int open_related_ns(struct ns_common *ns,
if (fd < 0)
return fd;
- do {
- struct ns_common *relative;
-
- relative = get_ns(ns);
- if (IS_ERR(relative)) {
- put_unused_fd(fd);
- return PTR_ERR(relative);
- }
-
- err = __ns_get_path(&path, relative);
- } while (err == -EAGAIN);
+ relative = get_ns(ns);
+ if (IS_ERR(relative)) {
+ put_unused_fd(fd);
+ return PTR_ERR(relative);
+ }
- if (err) {
+ err = path_from_stashed(&relative->stashed, relative->inum, nsfs_mnt,
+ relative, &path);
+ if (err < 0) {
put_unused_fd(fd);
return err;
}
@@ -249,7 +186,8 @@ bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino)
static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
- const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
+ const struct ns_common *ns = inode->i_private;
+ const struct proc_ns_operations *ns_ops = ns->ops;
seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino);
return 0;
@@ -261,6 +199,24 @@ static const struct super_operations nsfs_ops = {
.show_path = nsfs_show_path,
};
+static void nsfs_init_inode(struct inode *inode, void *data)
+{
+ inode->i_private = data;
+ inode->i_mode |= S_IRUGO;
+ inode->i_fop = &ns_file_operations;
+}
+
+static void nsfs_put_data(void *data)
+{
+ struct ns_common *ns = data;
+ ns->ops->put(ns);
+}
+
+static const struct stashed_operations nsfs_stashed_ops = {
+ .init_inode = nsfs_init_inode,
+ .put_data = nsfs_put_data,
+};
+
static int nsfs_init_fs_context(struct fs_context *fc)
{
struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC);
@@ -268,6 +224,7 @@ static int nsfs_init_fs_context(struct fs_context *fc)
return -ENOMEM;
ctx->ops = &nsfs_ops;
ctx->dops = &ns_dentry_operations;
+ fc->s_fs_info = (void *)&nsfs_stashed_ops;
return 0;
}
diff --git a/fs/ntfs/Kconfig b/fs/ntfs/Kconfig
deleted file mode 100644
index 7b2509741735..000000000000
--- a/fs/ntfs/Kconfig
+++ /dev/null
@@ -1,81 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config NTFS_FS
- tristate "NTFS file system support"
- select BUFFER_HEAD
- select NLS
- help
- NTFS is the file system of Microsoft Windows NT, 2000, XP and 2003.
-
- Saying Y or M here enables read support. There is partial, but
- safe, write support available. For write support you must also
- say Y to "NTFS write support" below.
-
- There are also a number of user-space tools available, called
- ntfsprogs. These include ntfsundelete and ntfsresize, that work
- without NTFS support enabled in the kernel.
-
- This is a rewrite from scratch of Linux NTFS support and replaced
- the old NTFS code starting with Linux 2.5.11. A backport to
- the Linux 2.4 kernel series is separately available as a patch
- from the project web site.
-
- For more information see <file:Documentation/filesystems/ntfs.rst>
- and <http://www.linux-ntfs.org/>.
-
- To compile this file system support as a module, choose M here: the
- module will be called ntfs.
-
- If you are not using Windows NT, 2000, XP or 2003 in addition to
- Linux on your computer it is safe to say N.
-
-config NTFS_DEBUG
- bool "NTFS debugging support"
- depends on NTFS_FS
- help
- If you are experiencing any problems with the NTFS file system, say
- Y here. This will result in additional consistency checks to be
- performed by the driver as well as additional debugging messages to
- be written to the system log. Note that debugging messages are
- disabled by default. To enable them, supply the option debug_msgs=1
- at the kernel command line when booting the kernel or as an option
- to insmod when loading the ntfs module. Once the driver is active,
- you can enable debugging messages by doing (as root):
- echo 1 > /proc/sys/fs/ntfs-debug
- Replacing the "1" with "0" would disable debug messages.
-
- If you leave debugging messages disabled, this results in little
- overhead, but enabling debug messages results in very significant
- slowdown of the system.
-
- When reporting bugs, please try to have available a full dump of
- debugging messages while the misbehaviour was occurring.
-
-config NTFS_RW
- bool "NTFS write support"
- depends on NTFS_FS
- depends on PAGE_SIZE_LESS_THAN_64KB
- help
- This enables the partial, but safe, write support in the NTFS driver.
-
- The only supported operation is overwriting existing files, without
- changing the file length. No file or directory creation, deletion or
- renaming is possible. Note only non-resident files can be written to
- so you may find that some very small files (<500 bytes or so) cannot
- be written to.
-
- While we cannot guarantee that it will not damage any data, we have
- so far not received a single report where the driver would have
- damaged someones data so we assume it is perfectly safe to use.
-
- Note: While write support is safe in this version (a rewrite from
- scratch of the NTFS support), it should be noted that the old NTFS
- write support, included in Linux 2.5.10 and before (since 1997),
- is not safe.
-
- This is currently useful with TopologiLinux. TopologiLinux is run
- on top of any DOS/Microsoft Windows system without partitioning your
- hard disk. Unlike other Linux distributions TopologiLinux does not
- need its own partition. For more information see
- <http://topologi-linux.sourceforge.net/>
-
- It is perfectly safe to say N here.
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
deleted file mode 100644
index 3e736572ed00..000000000000
--- a/fs/ntfs/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Rules for making the NTFS driver.
-
-obj-$(CONFIG_NTFS_FS) += ntfs.o
-
-ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
- index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
- unistr.o upcase.o
-
-ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
-
-ccflags-y := -DNTFS_VERSION=\"2.1.32\"
-ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
-ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
-
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
deleted file mode 100644
index 2d01517a2d59..000000000000
--- a/fs/ntfs/aops.c
+++ /dev/null
@@ -1,1744 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * aops.c - NTFS kernel address space operations and page cache handling.
- *
- * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
- * Copyright (c) 2002 Richard Russon
- */
-
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/gfp.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <linux/buffer_head.h>
-#include <linux/writeback.h>
-#include <linux/bit_spinlock.h>
-#include <linux/bio.h>
-
-#include "aops.h"
-#include "attrib.h"
-#include "debug.h"
-#include "inode.h"
-#include "mft.h"
-#include "runlist.h"
-#include "types.h"
-#include "ntfs.h"
-
-/**
- * ntfs_end_buffer_async_read - async io completion for reading attributes
- * @bh: buffer head on which io is completed
- * @uptodate: whether @bh is now uptodate or not
- *
- * Asynchronous I/O completion handler for reading pages belonging to the
- * attribute address space of an inode. The inodes can either be files or
- * directories or they can be fake inodes describing some attribute.
- *
- * If NInoMstProtected(), perform the post read mst fixups when all IO on the
- * page has been completed and mark the page uptodate or set the error bit on
- * the page. To determine the size of the records that need fixing up, we
- * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
- * record size, and index_block_size_bits, to the log(base 2) of the ntfs
- * record size.
- */
-static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
-{
- unsigned long flags;
- struct buffer_head *first, *tmp;
- struct page *page;
- struct inode *vi;
- ntfs_inode *ni;
- int page_uptodate = 1;
-
- page = bh->b_page;
- vi = page->mapping->host;
- ni = NTFS_I(vi);
-
- if (likely(uptodate)) {
- loff_t i_size;
- s64 file_ofs, init_size;
-
- set_buffer_uptodate(bh);
-
- file_ofs = ((s64)page->index << PAGE_SHIFT) +
- bh_offset(bh);
- read_lock_irqsave(&ni->size_lock, flags);
- init_size = ni->initialized_size;
- i_size = i_size_read(vi);
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (unlikely(init_size > i_size)) {
- /* Race with shrinking truncate. */
- init_size = i_size;
- }
- /* Check for the current buffer head overflowing. */
- if (unlikely(file_ofs + bh->b_size > init_size)) {
- int ofs;
- void *kaddr;
-
- ofs = 0;
- if (file_ofs < init_size)
- ofs = init_size - file_ofs;
- kaddr = kmap_atomic(page);
- memset(kaddr + bh_offset(bh) + ofs, 0,
- bh->b_size - ofs);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
- }
- } else {
- clear_buffer_uptodate(bh);
- SetPageError(page);
- ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
- "0x%llx.", (unsigned long long)bh->b_blocknr);
- }
- first = page_buffers(page);
- spin_lock_irqsave(&first->b_uptodate_lock, flags);
- clear_buffer_async_read(bh);
- unlock_buffer(bh);
- tmp = bh;
- do {
- if (!buffer_uptodate(tmp))
- page_uptodate = 0;
- if (buffer_async_read(tmp)) {
- if (likely(buffer_locked(tmp)))
- goto still_busy;
- /* Async buffers must be locked. */
- BUG();
- }
- tmp = tmp->b_this_page;
- } while (tmp != bh);
- spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
- /*
- * If none of the buffers had errors then we can set the page uptodate,
- * but we first have to perform the post read mst fixups, if the
- * attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
- * Note we ignore fixup errors as those are detected when
- * map_mft_record() is called which gives us per record granularity
- * rather than per page granularity.
- */
- if (!NInoMstProtected(ni)) {
- if (likely(page_uptodate && !PageError(page)))
- SetPageUptodate(page);
- } else {
- u8 *kaddr;
- unsigned int i, recs;
- u32 rec_size;
-
- rec_size = ni->itype.index.block_size;
- recs = PAGE_SIZE / rec_size;
- /* Should have been verified before we got here... */
- BUG_ON(!recs);
- kaddr = kmap_atomic(page);
- for (i = 0; i < recs; i++)
- post_read_mst_fixup((NTFS_RECORD*)(kaddr +
- i * rec_size), rec_size);
- kunmap_atomic(kaddr);
- flush_dcache_page(page);
- if (likely(page_uptodate && !PageError(page)))
- SetPageUptodate(page);
- }
- unlock_page(page);
- return;
-still_busy:
- spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
- return;
-}
-
-/**
- * ntfs_read_block - fill a @folio of an address space with data
- * @folio: page cache folio to fill with data
- *
- * We read each buffer asynchronously and when all buffers are read in, our io
- * completion handler ntfs_end_buffer_read_async(), if required, automatically
- * applies the mst fixups to the folio before finally marking it uptodate and
- * unlocking it.
- *
- * We only enforce allocated_size limit because i_size is checked for in
- * generic_file_read().
- *
- * Return 0 on success and -errno on error.
- *
- * Contains an adapted version of fs/buffer.c::block_read_full_folio().
- */
-static int ntfs_read_block(struct folio *folio)
-{
- loff_t i_size;
- VCN vcn;
- LCN lcn;
- s64 init_size;
- struct inode *vi;
- ntfs_inode *ni;
- ntfs_volume *vol;
- runlist_element *rl;
- struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
- sector_t iblock, lblock, zblock;
- unsigned long flags;
- unsigned int blocksize, vcn_ofs;
- int i, nr;
- unsigned char blocksize_bits;
-
- vi = folio->mapping->host;
- ni = NTFS_I(vi);
- vol = ni->vol;
-
- /* $MFT/$DATA must have its complete runlist in memory at all times. */
- BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
-
- blocksize = vol->sb->s_blocksize;
- blocksize_bits = vol->sb->s_blocksize_bits;
-
- head = folio_buffers(folio);
- if (!head)
- head = create_empty_buffers(folio, blocksize, 0);
- bh = head;
-
- /*
- * We may be racing with truncate. To avoid some of the problems we
- * now take a snapshot of the various sizes and use those for the whole
- * of the function. In case of an extending truncate it just means we
- * may leave some buffers unmapped which are now allocated. This is
- * not a problem since these buffers will just get mapped when a write
- * occurs. In case of a shrinking truncate, we will detect this later
- * on due to the runlist being incomplete and if the folio is being
- * fully truncated, truncate will throw it away as soon as we unlock
- * it so no need to worry what we do with it.
- */
- iblock = (s64)folio->index << (PAGE_SHIFT - blocksize_bits);
- read_lock_irqsave(&ni->size_lock, flags);
- lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
- init_size = ni->initialized_size;
- i_size = i_size_read(vi);
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (unlikely(init_size > i_size)) {
- /* Race with shrinking truncate. */
- init_size = i_size;
- }
- zblock = (init_size + blocksize - 1) >> blocksize_bits;
-
- /* Loop through all the buffers in the folio. */
- rl = NULL;
- nr = i = 0;
- do {
- int err = 0;
-
- if (unlikely(buffer_uptodate(bh)))
- continue;
- if (unlikely(buffer_mapped(bh))) {
- arr[nr++] = bh;
- continue;
- }
- bh->b_bdev = vol->sb->s_bdev;
- /* Is the block within the allowed limits? */
- if (iblock < lblock) {
- bool is_retry = false;
-
- /* Convert iblock into corresponding vcn and offset. */
- vcn = (VCN)iblock << blocksize_bits >>
- vol->cluster_size_bits;
- vcn_ofs = ((VCN)iblock << blocksize_bits) &
- vol->cluster_size_mask;
- if (!rl) {
-lock_retry_remap:
- down_read(&ni->runlist.lock);
- rl = ni->runlist.rl;
- }
- if (likely(rl != NULL)) {
- /* Seek to element containing target vcn. */
- while (rl->length && rl[1].vcn <= vcn)
- rl++;
- lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
- } else
- lcn = LCN_RL_NOT_MAPPED;
- /* Successful remap. */
- if (lcn >= 0) {
- /* Setup buffer head to correct block. */
- bh->b_blocknr = ((lcn << vol->cluster_size_bits)
- + vcn_ofs) >> blocksize_bits;
- set_buffer_mapped(bh);
- /* Only read initialized data blocks. */
- if (iblock < zblock) {
- arr[nr++] = bh;
- continue;
- }
- /* Fully non-initialized data block, zero it. */
- goto handle_zblock;
- }
- /* It is a hole, need to zero it. */
- if (lcn == LCN_HOLE)
- goto handle_hole;
- /* If first try and runlist unmapped, map and retry. */
- if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
- is_retry = true;
- /*
- * Attempt to map runlist, dropping lock for
- * the duration.
- */
- up_read(&ni->runlist.lock);
- err = ntfs_map_runlist(ni, vcn);
- if (likely(!err))
- goto lock_retry_remap;
- rl = NULL;
- } else if (!rl)
- up_read(&ni->runlist.lock);
- /*
- * If buffer is outside the runlist, treat it as a
- * hole. This can happen due to concurrent truncate
- * for example.
- */
- if (err == -ENOENT || lcn == LCN_ENOENT) {
- err = 0;
- goto handle_hole;
- }
- /* Hard error, zero out region. */
- if (!err)
- err = -EIO;
- bh->b_blocknr = -1;
- folio_set_error(folio);
- ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
- "attribute type 0x%x, vcn 0x%llx, "
- "offset 0x%x because its location on "
- "disk could not be determined%s "
- "(error code %i).", ni->mft_no,
- ni->type, (unsigned long long)vcn,
- vcn_ofs, is_retry ? " even after "
- "retrying" : "", err);
- }
- /*
- * Either iblock was outside lblock limits or
- * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
- * of the folio and set the buffer uptodate.
- */
-handle_hole:
- bh->b_blocknr = -1UL;
- clear_buffer_mapped(bh);
-handle_zblock:
- folio_zero_range(folio, i * blocksize, blocksize);
- if (likely(!err))
- set_buffer_uptodate(bh);
- } while (i++, iblock++, (bh = bh->b_this_page) != head);
-
- /* Release the lock if we took it. */
- if (rl)
- up_read(&ni->runlist.lock);
-
- /* Check we have at least one buffer ready for i/o. */
- if (nr) {
- struct buffer_head *tbh;
-
- /* Lock the buffers. */
- for (i = 0; i < nr; i++) {
- tbh = arr[i];
- lock_buffer(tbh);
- tbh->b_end_io = ntfs_end_buffer_async_read;
- set_buffer_async_read(tbh);
- }
- /* Finally, start i/o on the buffers. */
- for (i = 0; i < nr; i++) {
- tbh = arr[i];
- if (likely(!buffer_uptodate(tbh)))
- submit_bh(REQ_OP_READ, tbh);
- else
- ntfs_end_buffer_async_read(tbh, 1);
- }
- return 0;
- }
- /* No i/o was scheduled on any of the buffers. */
- if (likely(!folio_test_error(folio)))
- folio_mark_uptodate(folio);
- else /* Signal synchronous i/o error. */
- nr = -EIO;
- folio_unlock(folio);
- return nr;
-}
-
-/**
- * ntfs_read_folio - fill a @folio of a @file with data from the device
- * @file: open file to which the folio @folio belongs or NULL
- * @folio: page cache folio to fill with data
- *
- * For non-resident attributes, ntfs_read_folio() fills the @folio of the open
- * file @file by calling the ntfs version of the generic block_read_full_folio()
- * function, ntfs_read_block(), which in turn creates and reads in the buffers
- * associated with the folio asynchronously.
- *
- * For resident attributes, OTOH, ntfs_read_folio() fills @folio by copying the
- * data from the mft record (which at this stage is most likely in memory) and
- * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
- * even if the mft record is not cached at this point in time, we need to wait
- * for it to be read in before we can do the copy.
- *
- * Return 0 on success and -errno on error.
- */
-static int ntfs_read_folio(struct file *file, struct folio *folio)
-{
- struct page *page = &folio->page;
- loff_t i_size;
- struct inode *vi;
- ntfs_inode *ni, *base_ni;
- u8 *addr;
- ntfs_attr_search_ctx *ctx;
- MFT_RECORD *mrec;
- unsigned long flags;
- u32 attr_len;
- int err = 0;
-
-retry_readpage:
- BUG_ON(!PageLocked(page));
- vi = page->mapping->host;
- i_size = i_size_read(vi);
- /* Is the page fully outside i_size? (truncate in progress) */
- if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >>
- PAGE_SHIFT)) {
- zero_user(page, 0, PAGE_SIZE);
- ntfs_debug("Read outside i_size - truncated?");
- goto done;
- }
- /*
- * This can potentially happen because we clear PageUptodate() during
- * ntfs_writepage() of MstProtected() attributes.
- */
- if (PageUptodate(page)) {
- unlock_page(page);
- return 0;
- }
- ni = NTFS_I(vi);
- /*
- * Only $DATA attributes can be encrypted and only unnamed $DATA
- * attributes can be compressed. Index root can have the flags set but
- * this means to create compressed/encrypted files, not that the
- * attribute is compressed/encrypted. Note we need to check for
- * AT_INDEX_ALLOCATION since this is the type of both directory and
- * index inodes.
- */
- if (ni->type != AT_INDEX_ALLOCATION) {
- /* If attribute is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- BUG_ON(ni->type != AT_DATA);
- err = -EACCES;
- goto err_out;
- }
- /* Compressed data streams are handled in compress.c. */
- if (NInoNonResident(ni) && NInoCompressed(ni)) {
- BUG_ON(ni->type != AT_DATA);
- BUG_ON(ni->name_len);
- return ntfs_read_compressed_block(page);
- }
- }
- /* NInoNonResident() == NInoIndexAllocPresent() */
- if (NInoNonResident(ni)) {
- /* Normal, non-resident data stream. */
- return ntfs_read_block(folio);
- }
- /*
- * Attribute is resident, implying it is not compressed or encrypted.
- * This also means the attribute is smaller than an mft record and
- * hence smaller than a page, so can simply zero out any pages with
- * index above 0. Note the attribute can actually be marked compressed
- * but if it is resident the actual data is not compressed so we are
- * ok to ignore the compressed flag here.
- */
- if (unlikely(page->index > 0)) {
- zero_user(page, 0, PAGE_SIZE);
- goto done;
- }
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- /* Map, pin, and lock the mft record. */
- mrec = map_mft_record(base_ni);
- if (IS_ERR(mrec)) {
- err = PTR_ERR(mrec);
- goto err_out;
- }
- /*
- * If a parallel write made the attribute non-resident, drop the mft
- * record and retry the read_folio.
- */
- if (unlikely(NInoNonResident(ni))) {
- unmap_mft_record(base_ni);
- goto retry_readpage;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto unm_err_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err))
- goto put_unm_err_out;
- attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
- read_lock_irqsave(&ni->size_lock, flags);
- if (unlikely(attr_len > ni->initialized_size))
- attr_len = ni->initialized_size;
- i_size = i_size_read(vi);
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (unlikely(attr_len > i_size)) {
- /* Race with shrinking truncate. */
- attr_len = i_size;
- }
- addr = kmap_atomic(page);
- /* Copy the data to the page. */
- memcpy(addr, (u8*)ctx->attr +
- le16_to_cpu(ctx->attr->data.resident.value_offset),
- attr_len);
- /* Zero the remainder of the page. */
- memset(addr + attr_len, 0, PAGE_SIZE - attr_len);
- flush_dcache_page(page);
- kunmap_atomic(addr);
-put_unm_err_out:
- ntfs_attr_put_search_ctx(ctx);
-unm_err_out:
- unmap_mft_record(base_ni);
-done:
- SetPageUptodate(page);
-err_out:
- unlock_page(page);
- return err;
-}
-
-#ifdef NTFS_RW
-
-/**
- * ntfs_write_block - write a @folio to the backing store
- * @folio: page cache folio to write out
- * @wbc: writeback control structure
- *
- * This function is for writing folios belonging to non-resident, non-mst
- * protected attributes to their backing store.
- *
- * For a folio with buffers, map and write the dirty buffers asynchronously
- * under folio writeback. For a folio without buffers, create buffers for the
- * folio, then proceed as above.
- *
- * If a folio doesn't have buffers the folio dirty state is definitive. If
- * a folio does have buffers, the folio dirty state is just a hint,
- * and the buffer dirty state is definitive. (A hint which has rules:
- * dirty buffers against a clean folio is illegal. Other combinations are
- * legal and need to be handled. In particular a dirty folio containing
- * clean buffers for example.)
- *
- * Return 0 on success and -errno on error.
- *
- * Based on ntfs_read_block() and __block_write_full_folio().
- */
-static int ntfs_write_block(struct folio *folio, struct writeback_control *wbc)
-{
- VCN vcn;
- LCN lcn;
- s64 initialized_size;
- loff_t i_size;
- sector_t block, dblock, iblock;
- struct inode *vi;
- ntfs_inode *ni;
- ntfs_volume *vol;
- runlist_element *rl;
- struct buffer_head *bh, *head;
- unsigned long flags;
- unsigned int blocksize, vcn_ofs;
- int err;
- bool need_end_writeback;
- unsigned char blocksize_bits;
-
- vi = folio->mapping->host;
- ni = NTFS_I(vi);
- vol = ni->vol;
-
- ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
- "0x%lx.", ni->mft_no, ni->type, folio->index);
-
- BUG_ON(!NInoNonResident(ni));
- BUG_ON(NInoMstProtected(ni));
- blocksize = vol->sb->s_blocksize;
- blocksize_bits = vol->sb->s_blocksize_bits;
- head = folio_buffers(folio);
- if (!head) {
- BUG_ON(!folio_test_uptodate(folio));
- head = create_empty_buffers(folio, blocksize,
- (1 << BH_Uptodate) | (1 << BH_Dirty));
- }
- bh = head;
-
- /* NOTE: Different naming scheme to ntfs_read_block()! */
-
- /* The first block in the folio. */
- block = (s64)folio->index << (PAGE_SHIFT - blocksize_bits);
-
- read_lock_irqsave(&ni->size_lock, flags);
- i_size = i_size_read(vi);
- initialized_size = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
-
- /* The first out of bounds block for the data size. */
- dblock = (i_size + blocksize - 1) >> blocksize_bits;
-
- /* The last (fully or partially) initialized block. */
- iblock = initialized_size >> blocksize_bits;
-
- /*
- * Be very careful. We have no exclusion from block_dirty_folio
- * here, and the (potentially unmapped) buffers may become dirty at
- * any time. If a buffer becomes dirty here after we've inspected it
- * then we just miss that fact, and the folio stays dirty.
- *
- * Buffers outside i_size may be dirtied by block_dirty_folio;
- * handle that here by just cleaning them.
- */
-
- /*
- * Loop through all the buffers in the folio, mapping all the dirty
- * buffers to disk addresses and handling any aliases from the
- * underlying block device's mapping.
- */
- rl = NULL;
- err = 0;
- do {
- bool is_retry = false;
-
- if (unlikely(block >= dblock)) {
- /*
- * Mapped buffers outside i_size will occur, because
- * this folio can be outside i_size when there is a
- * truncate in progress. The contents of such buffers
- * were zeroed by ntfs_writepage().
- *
- * FIXME: What about the small race window where
- * ntfs_writepage() has not done any clearing because
- * the folio was within i_size but before we get here,
- * vmtruncate() modifies i_size?
- */
- clear_buffer_dirty(bh);
- set_buffer_uptodate(bh);
- continue;
- }
-
- /* Clean buffers are not written out, so no need to map them. */
- if (!buffer_dirty(bh))
- continue;
-
- /* Make sure we have enough initialized size. */
- if (unlikely((block >= iblock) &&
- (initialized_size < i_size))) {
- /*
- * If this folio is fully outside initialized
- * size, zero out all folios between the current
- * initialized size and the current folio. Just
- * use ntfs_read_folio() to do the zeroing
- * transparently.
- */
- if (block > iblock) {
- // TODO:
- // For each folio do:
- // - read_cache_folio()
- // Again for each folio do:
- // - wait_on_folio_locked()
- // - Check (folio_test_uptodate(folio) &&
- // !folio_test_error(folio))
- // Update initialized size in the attribute and
- // in the inode.
- // Again, for each folio do:
- // block_dirty_folio();
- // folio_put()
- // We don't need to wait on the writes.
- // Update iblock.
- }
- /*
- * The current folio straddles initialized size. Zero
- * all non-uptodate buffers and set them uptodate (and
- * dirty?). Note, there aren't any non-uptodate buffers
- * if the folio is uptodate.
- * FIXME: For an uptodate folio, the buffers may need to
- * be written out because they were not initialized on
- * disk before.
- */
- if (!folio_test_uptodate(folio)) {
- // TODO:
- // Zero any non-uptodate buffers up to i_size.
- // Set them uptodate and dirty.
- }
- // TODO:
- // Update initialized size in the attribute and in the
- // inode (up to i_size).
- // Update iblock.
- // FIXME: This is inefficient. Try to batch the two
- // size changes to happen in one go.
- ntfs_error(vol->sb, "Writing beyond initialized size "
- "is not supported yet. Sorry.");
- err = -EOPNOTSUPP;
- break;
- // Do NOT set_buffer_new() BUT DO clear buffer range
- // outside write request range.
- // set_buffer_uptodate() on complete buffers as well as
- // set_buffer_dirty().
- }
-
- /* No need to map buffers that are already mapped. */
- if (buffer_mapped(bh))
- continue;
-
- /* Unmapped, dirty buffer. Need to map it. */
- bh->b_bdev = vol->sb->s_bdev;
-
- /* Convert block into corresponding vcn and offset. */
- vcn = (VCN)block << blocksize_bits;
- vcn_ofs = vcn & vol->cluster_size_mask;
- vcn >>= vol->cluster_size_bits;
- if (!rl) {
-lock_retry_remap:
- down_read(&ni->runlist.lock);
- rl = ni->runlist.rl;
- }
- if (likely(rl != NULL)) {
- /* Seek to element containing target vcn. */
- while (rl->length && rl[1].vcn <= vcn)
- rl++;
- lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
- } else
- lcn = LCN_RL_NOT_MAPPED;
- /* Successful remap. */
- if (lcn >= 0) {
- /* Setup buffer head to point to correct block. */
- bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
- vcn_ofs) >> blocksize_bits;
- set_buffer_mapped(bh);
- continue;
- }
- /* It is a hole, need to instantiate it. */
- if (lcn == LCN_HOLE) {
- u8 *kaddr;
- unsigned long *bpos, *bend;
-
- /* Check if the buffer is zero. */
- kaddr = kmap_local_folio(folio, bh_offset(bh));
- bpos = (unsigned long *)kaddr;
- bend = (unsigned long *)(kaddr + blocksize);
- do {
- if (unlikely(*bpos))
- break;
- } while (likely(++bpos < bend));
- kunmap_local(kaddr);
- if (bpos == bend) {
- /*
- * Buffer is zero and sparse, no need to write
- * it.
- */
- bh->b_blocknr = -1;
- clear_buffer_dirty(bh);
- continue;
- }
- // TODO: Instantiate the hole.
- // clear_buffer_new(bh);
- // clean_bdev_bh_alias(bh);
- ntfs_error(vol->sb, "Writing into sparse regions is "
- "not supported yet. Sorry.");
- err = -EOPNOTSUPP;
- break;
- }
- /* If first try and runlist unmapped, map and retry. */
- if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
- is_retry = true;
- /*
- * Attempt to map runlist, dropping lock for
- * the duration.
- */
- up_read(&ni->runlist.lock);
- err = ntfs_map_runlist(ni, vcn);
- if (likely(!err))
- goto lock_retry_remap;
- rl = NULL;
- } else if (!rl)
- up_read(&ni->runlist.lock);
- /*
- * If buffer is outside the runlist, truncate has cut it out
- * of the runlist. Just clean and clear the buffer and set it
- * uptodate so it can get discarded by the VM.
- */
- if (err == -ENOENT || lcn == LCN_ENOENT) {
- bh->b_blocknr = -1;
- clear_buffer_dirty(bh);
- folio_zero_range(folio, bh_offset(bh), blocksize);
- set_buffer_uptodate(bh);
- err = 0;
- continue;
- }
- /* Failed to map the buffer, even after retrying. */
- if (!err)
- err = -EIO;
- bh->b_blocknr = -1;
- ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
- "attribute type 0x%x, vcn 0x%llx, offset 0x%x "
- "because its location on disk could not be "
- "determined%s (error code %i).", ni->mft_no,
- ni->type, (unsigned long long)vcn,
- vcn_ofs, is_retry ? " even after "
- "retrying" : "", err);
- break;
- } while (block++, (bh = bh->b_this_page) != head);
-
- /* Release the lock if we took it. */
- if (rl)
- up_read(&ni->runlist.lock);
-
- /* For the error case, need to reset bh to the beginning. */
- bh = head;
-
- /* Just an optimization, so ->read_folio() is not called later. */
- if (unlikely(!folio_test_uptodate(folio))) {
- int uptodate = 1;
- do {
- if (!buffer_uptodate(bh)) {
- uptodate = 0;
- bh = head;
- break;
- }
- } while ((bh = bh->b_this_page) != head);
- if (uptodate)
- folio_mark_uptodate(folio);
- }
-
- /* Setup all mapped, dirty buffers for async write i/o. */
- do {
- if (buffer_mapped(bh) && buffer_dirty(bh)) {
- lock_buffer(bh);
- if (test_clear_buffer_dirty(bh)) {
- BUG_ON(!buffer_uptodate(bh));
- mark_buffer_async_write(bh);
- } else
- unlock_buffer(bh);
- } else if (unlikely(err)) {
- /*
- * For the error case. The buffer may have been set
- * dirty during attachment to a dirty folio.
- */
- if (err != -ENOMEM)
- clear_buffer_dirty(bh);
- }
- } while ((bh = bh->b_this_page) != head);
-
- if (unlikely(err)) {
- // TODO: Remove the -EOPNOTSUPP check later on...
- if (unlikely(err == -EOPNOTSUPP))
- err = 0;
- else if (err == -ENOMEM) {
- ntfs_warning(vol->sb, "Error allocating memory. "
- "Redirtying folio so we try again "
- "later.");
- /*
- * Put the folio back on mapping->dirty_pages, but
- * leave its buffer's dirty state as-is.
- */
- folio_redirty_for_writepage(wbc, folio);
- err = 0;
- } else
- folio_set_error(folio);
- }
-
- BUG_ON(folio_test_writeback(folio));
- folio_start_writeback(folio); /* Keeps try_to_free_buffers() away. */
-
- /* Submit the prepared buffers for i/o. */
- need_end_writeback = true;
- do {
- struct buffer_head *next = bh->b_this_page;
- if (buffer_async_write(bh)) {
- submit_bh(REQ_OP_WRITE, bh);
- need_end_writeback = false;
- }
- bh = next;
- } while (bh != head);
- folio_unlock(folio);
-
- /* If no i/o was started, need to end writeback here. */
- if (unlikely(need_end_writeback))
- folio_end_writeback(folio);
-
- ntfs_debug("Done.");
- return err;
-}
-
-/**
- * ntfs_write_mst_block - write a @page to the backing store
- * @page: page cache page to write out
- * @wbc: writeback control structure
- *
- * This function is for writing pages belonging to non-resident, mst protected
- * attributes to their backing store. The only supported attributes are index
- * allocation and $MFT/$DATA. Both directory inodes and index inodes are
- * supported for the index allocation case.
- *
- * The page must remain locked for the duration of the write because we apply
- * the mst fixups, write, and then undo the fixups, so if we were to unlock the
- * page before undoing the fixups, any other user of the page will see the
- * page contents as corrupt.
- *
- * We clear the page uptodate flag for the duration of the function to ensure
- * exclusion for the $MFT/$DATA case against someone mapping an mft record we
- * are about to apply the mst fixups to.
- *
- * Return 0 on success and -errno on error.
- *
- * Based on ntfs_write_block(), ntfs_mft_writepage(), and
- * write_mft_record_nolock().
- */
-static int ntfs_write_mst_block(struct page *page,
- struct writeback_control *wbc)
-{
- sector_t block, dblock, rec_block;
- struct inode *vi = page->mapping->host;
- ntfs_inode *ni = NTFS_I(vi);
- ntfs_volume *vol = ni->vol;
- u8 *kaddr;
- unsigned int rec_size = ni->itype.index.block_size;
- ntfs_inode *locked_nis[PAGE_SIZE / NTFS_BLOCK_SIZE];
- struct buffer_head *bh, *head, *tbh, *rec_start_bh;
- struct buffer_head *bhs[MAX_BUF_PER_PAGE];
- runlist_element *rl;
- int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
- unsigned bh_size, rec_size_bits;
- bool sync, is_mft, page_is_dirty, rec_is_dirty;
- unsigned char bh_size_bits;
-
- if (WARN_ON(rec_size < NTFS_BLOCK_SIZE))
- return -EINVAL;
-
- ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
- "0x%lx.", vi->i_ino, ni->type, page->index);
- BUG_ON(!NInoNonResident(ni));
- BUG_ON(!NInoMstProtected(ni));
- is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
- /*
- * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
- * in its page cache were to be marked dirty. However this should
- * never happen with the current driver and considering we do not
- * handle this case here we do want to BUG(), at least for now.
- */
- BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
- (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
- bh_size = vol->sb->s_blocksize;
- bh_size_bits = vol->sb->s_blocksize_bits;
- max_bhs = PAGE_SIZE / bh_size;
- BUG_ON(!max_bhs);
- BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
-
- /* Were we called for sync purposes? */
- sync = (wbc->sync_mode == WB_SYNC_ALL);
-
- /* Make sure we have mapped buffers. */
- bh = head = page_buffers(page);
- BUG_ON(!bh);
-
- rec_size_bits = ni->itype.index.block_size_bits;
- BUG_ON(!(PAGE_SIZE >> rec_size_bits));
- bhs_per_rec = rec_size >> bh_size_bits;
- BUG_ON(!bhs_per_rec);
-
- /* The first block in the page. */
- rec_block = block = (sector_t)page->index <<
- (PAGE_SHIFT - bh_size_bits);
-
- /* The first out of bounds block for the data size. */
- dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
-
- rl = NULL;
- err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
- page_is_dirty = rec_is_dirty = false;
- rec_start_bh = NULL;
- do {
- bool is_retry = false;
-
- if (likely(block < rec_block)) {
- if (unlikely(block >= dblock)) {
- clear_buffer_dirty(bh);
- set_buffer_uptodate(bh);
- continue;
- }
- /*
- * This block is not the first one in the record. We
- * ignore the buffer's dirty state because we could
- * have raced with a parallel mark_ntfs_record_dirty().
- */
- if (!rec_is_dirty)
- continue;
- if (unlikely(err2)) {
- if (err2 != -ENOMEM)
- clear_buffer_dirty(bh);
- continue;
- }
- } else /* if (block == rec_block) */ {
- BUG_ON(block > rec_block);
- /* This block is the first one in the record. */
- rec_block += bhs_per_rec;
- err2 = 0;
- if (unlikely(block >= dblock)) {
- clear_buffer_dirty(bh);
- continue;
- }
- if (!buffer_dirty(bh)) {
- /* Clean records are not written out. */
- rec_is_dirty = false;
- continue;
- }
- rec_is_dirty = true;
- rec_start_bh = bh;
- }
- /* Need to map the buffer if it is not mapped already. */
- if (unlikely(!buffer_mapped(bh))) {
- VCN vcn;
- LCN lcn;
- unsigned int vcn_ofs;
-
- bh->b_bdev = vol->sb->s_bdev;
- /* Obtain the vcn and offset of the current block. */
- vcn = (VCN)block << bh_size_bits;
- vcn_ofs = vcn & vol->cluster_size_mask;
- vcn >>= vol->cluster_size_bits;
- if (!rl) {
-lock_retry_remap:
- down_read(&ni->runlist.lock);
- rl = ni->runlist.rl;
- }
- if (likely(rl != NULL)) {
- /* Seek to element containing target vcn. */
- while (rl->length && rl[1].vcn <= vcn)
- rl++;
- lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
- } else
- lcn = LCN_RL_NOT_MAPPED;
- /* Successful remap. */
- if (likely(lcn >= 0)) {
- /* Setup buffer head to correct block. */
- bh->b_blocknr = ((lcn <<
- vol->cluster_size_bits) +
- vcn_ofs) >> bh_size_bits;
- set_buffer_mapped(bh);
- } else {
- /*
- * Remap failed. Retry to map the runlist once
- * unless we are working on $MFT which always
- * has the whole of its runlist in memory.
- */
- if (!is_mft && !is_retry &&
- lcn == LCN_RL_NOT_MAPPED) {
- is_retry = true;
- /*
- * Attempt to map runlist, dropping
- * lock for the duration.
- */
- up_read(&ni->runlist.lock);
- err2 = ntfs_map_runlist(ni, vcn);
- if (likely(!err2))
- goto lock_retry_remap;
- if (err2 == -ENOMEM)
- page_is_dirty = true;
- lcn = err2;
- } else {
- err2 = -EIO;
- if (!rl)
- up_read(&ni->runlist.lock);
- }
- /* Hard error. Abort writing this record. */
- if (!err || err == -ENOMEM)
- err = err2;
- bh->b_blocknr = -1;
- ntfs_error(vol->sb, "Cannot write ntfs record "
- "0x%llx (inode 0x%lx, "
- "attribute type 0x%x) because "
- "its location on disk could "
- "not be determined (error "
- "code %lli).",
- (long long)block <<
- bh_size_bits >>
- vol->mft_record_size_bits,
- ni->mft_no, ni->type,
- (long long)lcn);
- /*
- * If this is not the first buffer, remove the
- * buffers in this record from the list of
- * buffers to write and clear their dirty bit
- * if not error -ENOMEM.
- */
- if (rec_start_bh != bh) {
- while (bhs[--nr_bhs] != rec_start_bh)
- ;
- if (err2 != -ENOMEM) {
- do {
- clear_buffer_dirty(
- rec_start_bh);
- } while ((rec_start_bh =
- rec_start_bh->
- b_this_page) !=
- bh);
- }
- }
- continue;
- }
- }
- BUG_ON(!buffer_uptodate(bh));
- BUG_ON(nr_bhs >= max_bhs);
- bhs[nr_bhs++] = bh;
- } while (block++, (bh = bh->b_this_page) != head);
- if (unlikely(rl))
- up_read(&ni->runlist.lock);
- /* If there were no dirty buffers, we are done. */
- if (!nr_bhs)
- goto done;
- /* Map the page so we can access its contents. */
- kaddr = kmap(page);
- /* Clear the page uptodate flag whilst the mst fixups are applied. */
- BUG_ON(!PageUptodate(page));
- ClearPageUptodate(page);
- for (i = 0; i < nr_bhs; i++) {
- unsigned int ofs;
-
- /* Skip buffers which are not at the beginning of records. */
- if (i % bhs_per_rec)
- continue;
- tbh = bhs[i];
- ofs = bh_offset(tbh);
- if (is_mft) {
- ntfs_inode *tni;
- unsigned long mft_no;
-
- /* Get the mft record number. */
- mft_no = (((s64)page->index << PAGE_SHIFT) + ofs)
- >> rec_size_bits;
- /* Check whether to write this mft record. */
- tni = NULL;
- if (!ntfs_may_write_mft_record(vol, mft_no,
- (MFT_RECORD*)(kaddr + ofs), &tni)) {
- /*
- * The record should not be written. This
- * means we need to redirty the page before
- * returning.
- */
- page_is_dirty = true;
- /*
- * Remove the buffers in this mft record from
- * the list of buffers to write.
- */
- do {
- bhs[i] = NULL;
- } while (++i % bhs_per_rec);
- continue;
- }
- /*
- * The record should be written. If a locked ntfs
- * inode was returned, add it to the array of locked
- * ntfs inodes.
- */
- if (tni)
- locked_nis[nr_locked_nis++] = tni;
- }
- /* Apply the mst protection fixups. */
- err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
- rec_size);
- if (unlikely(err2)) {
- if (!err || err == -ENOMEM)
- err = -EIO;
- ntfs_error(vol->sb, "Failed to apply mst fixups "
- "(inode 0x%lx, attribute type 0x%x, "
- "page index 0x%lx, page offset 0x%x)!"
- " Unmount and run chkdsk.", vi->i_ino,
- ni->type, page->index, ofs);
- /*
- * Mark all the buffers in this record clean as we do
- * not want to write corrupt data to disk.
- */
- do {
- clear_buffer_dirty(bhs[i]);
- bhs[i] = NULL;
- } while (++i % bhs_per_rec);
- continue;
- }
- nr_recs++;
- }
- /* If no records are to be written out, we are done. */
- if (!nr_recs)
- goto unm_done;
- flush_dcache_page(page);
- /* Lock buffers and start synchronous write i/o on them. */
- for (i = 0; i < nr_bhs; i++) {
- tbh = bhs[i];
- if (!tbh)
- continue;
- if (!trylock_buffer(tbh))
- BUG();
- /* The buffer dirty state is now irrelevant, just clean it. */
- clear_buffer_dirty(tbh);
- BUG_ON(!buffer_uptodate(tbh));
- BUG_ON(!buffer_mapped(tbh));
- get_bh(tbh);
- tbh->b_end_io = end_buffer_write_sync;
- submit_bh(REQ_OP_WRITE, tbh);
- }
- /* Synchronize the mft mirror now if not @sync. */
- if (is_mft && !sync)
- goto do_mirror;
-do_wait:
- /* Wait on i/o completion of buffers. */
- for (i = 0; i < nr_bhs; i++) {
- tbh = bhs[i];
- if (!tbh)
- continue;
- wait_on_buffer(tbh);
- if (unlikely(!buffer_uptodate(tbh))) {
- ntfs_error(vol->sb, "I/O error while writing ntfs "
- "record buffer (inode 0x%lx, "
- "attribute type 0x%x, page index "
- "0x%lx, page offset 0x%lx)! Unmount "
- "and run chkdsk.", vi->i_ino, ni->type,
- page->index, bh_offset(tbh));
- if (!err || err == -ENOMEM)
- err = -EIO;
- /*
- * Set the buffer uptodate so the page and buffer
- * states do not become out of sync.
- */
- set_buffer_uptodate(tbh);
- }
- }
- /* If @sync, now synchronize the mft mirror. */
- if (is_mft && sync) {
-do_mirror:
- for (i = 0; i < nr_bhs; i++) {
- unsigned long mft_no;
- unsigned int ofs;
-
- /*
- * Skip buffers which are not at the beginning of
- * records.
- */
- if (i % bhs_per_rec)
- continue;
- tbh = bhs[i];
- /* Skip removed buffers (and hence records). */
- if (!tbh)
- continue;
- ofs = bh_offset(tbh);
- /* Get the mft record number. */
- mft_no = (((s64)page->index << PAGE_SHIFT) + ofs)
- >> rec_size_bits;
- if (mft_no < vol->mftmirr_size)
- ntfs_sync_mft_mirror(vol, mft_no,
- (MFT_RECORD*)(kaddr + ofs),
- sync);
- }
- if (!sync)
- goto do_wait;
- }
- /* Remove the mst protection fixups again. */
- for (i = 0; i < nr_bhs; i++) {
- if (!(i % bhs_per_rec)) {
- tbh = bhs[i];
- if (!tbh)
- continue;
- post_write_mst_fixup((NTFS_RECORD*)(kaddr +
- bh_offset(tbh)));
- }
- }
- flush_dcache_page(page);
-unm_done:
- /* Unlock any locked inodes. */
- while (nr_locked_nis-- > 0) {
- ntfs_inode *tni, *base_tni;
-
- tni = locked_nis[nr_locked_nis];
- /* Get the base inode. */
- mutex_lock(&tni->extent_lock);
- if (tni->nr_extents >= 0)
- base_tni = tni;
- else {
- base_tni = tni->ext.base_ntfs_ino;
- BUG_ON(!base_tni);
- }
- mutex_unlock(&tni->extent_lock);
- ntfs_debug("Unlocking %s inode 0x%lx.",
- tni == base_tni ? "base" : "extent",
- tni->mft_no);
- mutex_unlock(&tni->mrec_lock);
- atomic_dec(&tni->count);
- iput(VFS_I(base_tni));
- }
- SetPageUptodate(page);
- kunmap(page);
-done:
- if (unlikely(err && err != -ENOMEM)) {
- /*
- * Set page error if there is only one ntfs record in the page.
- * Otherwise we would loose per-record granularity.
- */
- if (ni->itype.index.block_size == PAGE_SIZE)
- SetPageError(page);
- NVolSetErrors(vol);
- }
- if (page_is_dirty) {
- ntfs_debug("Page still contains one or more dirty ntfs "
- "records. Redirtying the page starting at "
- "record 0x%lx.", page->index <<
- (PAGE_SHIFT - rec_size_bits));
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- } else {
- /*
- * Keep the VM happy. This must be done otherwise the
- * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
- * the page is clean.
- */
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
- unlock_page(page);
- end_page_writeback(page);
- }
- if (likely(!err))
- ntfs_debug("Done.");
- return err;
-}
-
-/**
- * ntfs_writepage - write a @page to the backing store
- * @page: page cache page to write out
- * @wbc: writeback control structure
- *
- * This is called from the VM when it wants to have a dirty ntfs page cache
- * page cleaned. The VM has already locked the page and marked it clean.
- *
- * For non-resident attributes, ntfs_writepage() writes the @page by calling
- * the ntfs version of the generic block_write_full_folio() function,
- * ntfs_write_block(), which in turn if necessary creates and writes the
- * buffers associated with the page asynchronously.
- *
- * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
- * the data to the mft record (which at this stage is most likely in memory).
- * The mft record is then marked dirty and written out asynchronously via the
- * vfs inode dirty code path for the inode the mft record belongs to or via the
- * vm page dirty code path for the page the mft record is in.
- *
- * Based on ntfs_read_folio() and fs/buffer.c::block_write_full_folio().
- *
- * Return 0 on success and -errno on error.
- */
-static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct folio *folio = page_folio(page);
- loff_t i_size;
- struct inode *vi = folio->mapping->host;
- ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
- char *addr;
- ntfs_attr_search_ctx *ctx = NULL;
- MFT_RECORD *m = NULL;
- u32 attr_len;
- int err;
-
-retry_writepage:
- BUG_ON(!folio_test_locked(folio));
- i_size = i_size_read(vi);
- /* Is the folio fully outside i_size? (truncate in progress) */
- if (unlikely(folio->index >= (i_size + PAGE_SIZE - 1) >>
- PAGE_SHIFT)) {
- /*
- * The folio may have dirty, unmapped buffers. Make them
- * freeable here, so the page does not leak.
- */
- block_invalidate_folio(folio, 0, folio_size(folio));
- folio_unlock(folio);
- ntfs_debug("Write outside i_size - truncated?");
- return 0;
- }
- /*
- * Only $DATA attributes can be encrypted and only unnamed $DATA
- * attributes can be compressed. Index root can have the flags set but
- * this means to create compressed/encrypted files, not that the
- * attribute is compressed/encrypted. Note we need to check for
- * AT_INDEX_ALLOCATION since this is the type of both directory and
- * index inodes.
- */
- if (ni->type != AT_INDEX_ALLOCATION) {
- /* If file is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- folio_unlock(folio);
- BUG_ON(ni->type != AT_DATA);
- ntfs_debug("Denying write access to encrypted file.");
- return -EACCES;
- }
- /* Compressed data streams are handled in compress.c. */
- if (NInoNonResident(ni) && NInoCompressed(ni)) {
- BUG_ON(ni->type != AT_DATA);
- BUG_ON(ni->name_len);
- // TODO: Implement and replace this with
- // return ntfs_write_compressed_block(page);
- folio_unlock(folio);
- ntfs_error(vi->i_sb, "Writing to compressed files is "
- "not supported yet. Sorry.");
- return -EOPNOTSUPP;
- }
- // TODO: Implement and remove this check.
- if (NInoNonResident(ni) && NInoSparse(ni)) {
- folio_unlock(folio);
- ntfs_error(vi->i_sb, "Writing to sparse files is not "
- "supported yet. Sorry.");
- return -EOPNOTSUPP;
- }
- }
- /* NInoNonResident() == NInoIndexAllocPresent() */
- if (NInoNonResident(ni)) {
- /* We have to zero every time due to mmap-at-end-of-file. */
- if (folio->index >= (i_size >> PAGE_SHIFT)) {
- /* The folio straddles i_size. */
- unsigned int ofs = i_size & (folio_size(folio) - 1);
- folio_zero_segment(folio, ofs, folio_size(folio));
- }
- /* Handle mst protected attributes. */
- if (NInoMstProtected(ni))
- return ntfs_write_mst_block(page, wbc);
- /* Normal, non-resident data stream. */
- return ntfs_write_block(folio, wbc);
- }
- /*
- * Attribute is resident, implying it is not compressed, encrypted, or
- * mst protected. This also means the attribute is smaller than an mft
- * record and hence smaller than a folio, so can simply return error on
- * any folios with index above 0. Note the attribute can actually be
- * marked compressed but if it is resident the actual data is not
- * compressed so we are ok to ignore the compressed flag here.
- */
- BUG_ON(folio_buffers(folio));
- BUG_ON(!folio_test_uptodate(folio));
- if (unlikely(folio->index > 0)) {
- ntfs_error(vi->i_sb, "BUG()! folio->index (0x%lx) > 0. "
- "Aborting write.", folio->index);
- BUG_ON(folio_test_writeback(folio));
- folio_start_writeback(folio);
- folio_unlock(folio);
- folio_end_writeback(folio);
- return -EIO;
- }
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- /* Map, pin, and lock the mft record. */
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- ctx = NULL;
- goto err_out;
- }
- /*
- * If a parallel write made the attribute non-resident, drop the mft
- * record and retry the writepage.
- */
- if (unlikely(NInoNonResident(ni))) {
- unmap_mft_record(base_ni);
- goto retry_writepage;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err))
- goto err_out;
- /*
- * Keep the VM happy. This must be done otherwise
- * PAGECACHE_TAG_DIRTY remains set even though the folio is clean.
- */
- BUG_ON(folio_test_writeback(folio));
- folio_start_writeback(folio);
- folio_unlock(folio);
- attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
- i_size = i_size_read(vi);
- if (unlikely(attr_len > i_size)) {
- /* Race with shrinking truncate or a failed truncate. */
- attr_len = i_size;
- /*
- * If the truncate failed, fix it up now. If a concurrent
- * truncate, we do its job, so it does not have to do anything.
- */
- err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
- attr_len);
- /* Shrinking cannot fail. */
- BUG_ON(err);
- }
- addr = kmap_local_folio(folio, 0);
- /* Copy the data from the folio to the mft record. */
- memcpy((u8*)ctx->attr +
- le16_to_cpu(ctx->attr->data.resident.value_offset),
- addr, attr_len);
- /* Zero out of bounds area in the page cache folio. */
- memset(addr + attr_len, 0, folio_size(folio) - attr_len);
- kunmap_local(addr);
- flush_dcache_folio(folio);
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- /* We are done with the folio. */
- folio_end_writeback(folio);
- /* Finally, mark the mft record dirty, so it gets written back. */
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- return 0;
-err_out:
- if (err == -ENOMEM) {
- ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
- "page so we try again later.");
- /*
- * Put the folio back on mapping->dirty_pages, but leave its
- * buffers' dirty state as-is.
- */
- folio_redirty_for_writepage(wbc, folio);
- err = 0;
- } else {
- ntfs_error(vi->i_sb, "Resident attribute write failed with "
- "error %i.", err);
- folio_set_error(folio);
- NVolSetErrors(ni->vol);
- }
- folio_unlock(folio);
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
- return err;
-}
-
-#endif /* NTFS_RW */
-
-/**
- * ntfs_bmap - map logical file block to physical device block
- * @mapping: address space mapping to which the block to be mapped belongs
- * @block: logical block to map to its physical device block
- *
- * For regular, non-resident files (i.e. not compressed and not encrypted), map
- * the logical @block belonging to the file described by the address space
- * mapping @mapping to its physical device block.
- *
- * The size of the block is equal to the @s_blocksize field of the super block
- * of the mounted file system which is guaranteed to be smaller than or equal
- * to the cluster size thus the block is guaranteed to fit entirely inside the
- * cluster which means we do not need to care how many contiguous bytes are
- * available after the beginning of the block.
- *
- * Return the physical device block if the mapping succeeded or 0 if the block
- * is sparse or there was an error.
- *
- * Note: This is a problem if someone tries to run bmap() on $Boot system file
- * as that really is in block zero but there is nothing we can do. bmap() is
- * just broken in that respect (just like it cannot distinguish sparse from
- * not available or error).
- */
-static sector_t ntfs_bmap(struct address_space *mapping, sector_t block)
-{
- s64 ofs, size;
- loff_t i_size;
- LCN lcn;
- unsigned long blocksize, flags;
- ntfs_inode *ni = NTFS_I(mapping->host);
- ntfs_volume *vol = ni->vol;
- unsigned delta;
- unsigned char blocksize_bits, cluster_size_shift;
-
- ntfs_debug("Entering for mft_no 0x%lx, logical block 0x%llx.",
- ni->mft_no, (unsigned long long)block);
- if (ni->type != AT_DATA || !NInoNonResident(ni) || NInoEncrypted(ni)) {
- ntfs_error(vol->sb, "BMAP does not make sense for %s "
- "attributes, returning 0.",
- (ni->type != AT_DATA) ? "non-data" :
- (!NInoNonResident(ni) ? "resident" :
- "encrypted"));
- return 0;
- }
- /* None of these can happen. */
- BUG_ON(NInoCompressed(ni));
- BUG_ON(NInoMstProtected(ni));
- blocksize = vol->sb->s_blocksize;
- blocksize_bits = vol->sb->s_blocksize_bits;
- ofs = (s64)block << blocksize_bits;
- read_lock_irqsave(&ni->size_lock, flags);
- size = ni->initialized_size;
- i_size = i_size_read(VFS_I(ni));
- read_unlock_irqrestore(&ni->size_lock, flags);
- /*
- * If the offset is outside the initialized size or the block straddles
- * the initialized size then pretend it is a hole unless the
- * initialized size equals the file size.
- */
- if (unlikely(ofs >= size || (ofs + blocksize > size && size < i_size)))
- goto hole;
- cluster_size_shift = vol->cluster_size_bits;
- down_read(&ni->runlist.lock);
- lcn = ntfs_attr_vcn_to_lcn_nolock(ni, ofs >> cluster_size_shift, false);
- up_read(&ni->runlist.lock);
- if (unlikely(lcn < LCN_HOLE)) {
- /*
- * Step down to an integer to avoid gcc doing a long long
- * comparision in the switch when we know @lcn is between
- * LCN_HOLE and LCN_EIO (i.e. -1 to -5).
- *
- * Otherwise older gcc (at least on some architectures) will
- * try to use __cmpdi2() which is of course not available in
- * the kernel.
- */
- switch ((int)lcn) {
- case LCN_ENOENT:
- /*
- * If the offset is out of bounds then pretend it is a
- * hole.
- */
- goto hole;
- case LCN_ENOMEM:
- ntfs_error(vol->sb, "Not enough memory to complete "
- "mapping for inode 0x%lx. "
- "Returning 0.", ni->mft_no);
- break;
- default:
- ntfs_error(vol->sb, "Failed to complete mapping for "
- "inode 0x%lx. Run chkdsk. "
- "Returning 0.", ni->mft_no);
- break;
- }
- return 0;
- }
- if (lcn < 0) {
- /* It is a hole. */
-hole:
- ntfs_debug("Done (returning hole).");
- return 0;
- }
- /*
- * The block is really allocated and fullfils all our criteria.
- * Convert the cluster to units of block size and return the result.
- */
- delta = ofs & vol->cluster_size_mask;
- if (unlikely(sizeof(block) < sizeof(lcn))) {
- block = lcn = ((lcn << cluster_size_shift) + delta) >>
- blocksize_bits;
- /* If the block number was truncated return 0. */
- if (unlikely(block != lcn)) {
- ntfs_error(vol->sb, "Physical block 0x%llx is too "
- "large to be returned, returning 0.",
- (long long)lcn);
- return 0;
- }
- } else
- block = ((lcn << cluster_size_shift) + delta) >>
- blocksize_bits;
- ntfs_debug("Done (returning block 0x%llx).", (unsigned long long)lcn);
- return block;
-}
-
-/*
- * ntfs_normal_aops - address space operations for normal inodes and attributes
- *
- * Note these are not used for compressed or mst protected inodes and
- * attributes.
- */
-const struct address_space_operations ntfs_normal_aops = {
- .read_folio = ntfs_read_folio,
-#ifdef NTFS_RW
- .writepage = ntfs_writepage,
- .dirty_folio = block_dirty_folio,
-#endif /* NTFS_RW */
- .bmap = ntfs_bmap,
- .migrate_folio = buffer_migrate_folio,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_folio = generic_error_remove_folio,
-};
-
-/*
- * ntfs_compressed_aops - address space operations for compressed inodes
- */
-const struct address_space_operations ntfs_compressed_aops = {
- .read_folio = ntfs_read_folio,
-#ifdef NTFS_RW
- .writepage = ntfs_writepage,
- .dirty_folio = block_dirty_folio,
-#endif /* NTFS_RW */
- .migrate_folio = buffer_migrate_folio,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_folio = generic_error_remove_folio,
-};
-
-/*
- * ntfs_mst_aops - general address space operations for mst protecteed inodes
- * and attributes
- */
-const struct address_space_operations ntfs_mst_aops = {
- .read_folio = ntfs_read_folio, /* Fill page with data. */
-#ifdef NTFS_RW
- .writepage = ntfs_writepage, /* Write dirty page to disk. */
- .dirty_folio = filemap_dirty_folio,
-#endif /* NTFS_RW */
- .migrate_folio = buffer_migrate_folio,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_folio = generic_error_remove_folio,
-};
-
-#ifdef NTFS_RW
-
-/**
- * mark_ntfs_record_dirty - mark an ntfs record dirty
- * @page: page containing the ntfs record to mark dirty
- * @ofs: byte offset within @page at which the ntfs record begins
- *
- * Set the buffers and the page in which the ntfs record is located dirty.
- *
- * The latter also marks the vfs inode the ntfs record belongs to dirty
- * (I_DIRTY_PAGES only).
- *
- * If the page does not have buffers, we create them and set them uptodate.
- * The page may not be locked which is why we need to handle the buffers under
- * the mapping->i_private_lock. Once the buffers are marked dirty we no longer
- * need the lock since try_to_free_buffers() does not free dirty buffers.
- */
-void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
- struct address_space *mapping = page->mapping;
- ntfs_inode *ni = NTFS_I(mapping->host);
- struct buffer_head *bh, *head, *buffers_to_free = NULL;
- unsigned int end, bh_size, bh_ofs;
-
- BUG_ON(!PageUptodate(page));
- end = ofs + ni->itype.index.block_size;
- bh_size = VFS_I(ni)->i_sb->s_blocksize;
- spin_lock(&mapping->i_private_lock);
- if (unlikely(!page_has_buffers(page))) {
- spin_unlock(&mapping->i_private_lock);
- bh = head = alloc_page_buffers(page, bh_size, true);
- spin_lock(&mapping->i_private_lock);
- if (likely(!page_has_buffers(page))) {
- struct buffer_head *tail;
-
- do {
- set_buffer_uptodate(bh);
- tail = bh;
- bh = bh->b_this_page;
- } while (bh);
- tail->b_this_page = head;
- attach_page_private(page, head);
- } else
- buffers_to_free = bh;
- }
- bh = head = page_buffers(page);
- BUG_ON(!bh);
- do {
- bh_ofs = bh_offset(bh);
- if (bh_ofs + bh_size <= ofs)
- continue;
- if (unlikely(bh_ofs >= end))
- break;
- set_buffer_dirty(bh);
- } while ((bh = bh->b_this_page) != head);
- spin_unlock(&mapping->i_private_lock);
- filemap_dirty_folio(mapping, page_folio(page));
- if (unlikely(buffers_to_free)) {
- do {
- bh = buffers_to_free->b_this_page;
- free_buffer_head(buffers_to_free);
- buffers_to_free = bh;
- } while (buffers_to_free);
- }
-}
-
-#endif /* NTFS_RW */
diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h
deleted file mode 100644
index 8d0958a149cb..000000000000
--- a/fs/ntfs/aops.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * aops.h - Defines for NTFS kernel address space operations and page cache
- * handling. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2004 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
- */
-
-#ifndef _LINUX_NTFS_AOPS_H
-#define _LINUX_NTFS_AOPS_H
-
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/fs.h>
-
-#include "inode.h"
-
-/**
- * ntfs_unmap_page - release a page that was mapped using ntfs_map_page()
- * @page: the page to release
- *
- * Unpin, unmap and release a page that was obtained from ntfs_map_page().
- */
-static inline void ntfs_unmap_page(struct page *page)
-{
- kunmap(page);
- put_page(page);
-}
-
-/**
- * ntfs_map_page - map a page into accessible memory, reading it if necessary
- * @mapping: address space for which to obtain the page
- * @index: index into the page cache for @mapping of the page to map
- *
- * Read a page from the page cache of the address space @mapping at position
- * @index, where @index is in units of PAGE_SIZE, and not in bytes.
- *
- * If the page is not in memory it is loaded from disk first using the
- * read_folio method defined in the address space operations of @mapping
- * and the page is added to the page cache of @mapping in the process.
- *
- * If the page belongs to an mst protected attribute and it is marked as such
- * in its ntfs inode (NInoMstProtected()) the mst fixups are applied but no
- * error checking is performed. This means the caller has to verify whether
- * the ntfs record(s) contained in the page are valid or not using one of the
- * ntfs_is_XXXX_record{,p}() macros, where XXXX is the record type you are
- * expecting to see. (For details of the macros, see fs/ntfs/layout.h.)
- *
- * If the page is in high memory it is mapped into memory directly addressible
- * by the kernel.
- *
- * Finally the page count is incremented, thus pinning the page into place.
- *
- * The above means that page_address(page) can be used on all pages obtained
- * with ntfs_map_page() to get the kernel virtual address of the page.
- *
- * When finished with the page, the caller has to call ntfs_unmap_page() to
- * unpin, unmap and release the page.
- *
- * Note this does not grant exclusive access. If such is desired, the caller
- * must provide it independently of the ntfs_{un}map_page() calls by using
- * a {rw_}semaphore or other means of serialization. A spin lock cannot be
- * used as ntfs_map_page() can block.
- *
- * The unlocked and uptodate page is returned on success or an encoded error
- * on failure. Caller has to test for error using the IS_ERR() macro on the
- * return value. If that evaluates to 'true', the negative error code can be
- * obtained using PTR_ERR() on the return value of ntfs_map_page().
- */
-static inline struct page *ntfs_map_page(struct address_space *mapping,
- unsigned long index)
-{
- struct page *page = read_mapping_page(mapping, index, NULL);
-
- if (!IS_ERR(page))
- kmap(page);
- return page;
-}
-
-#ifdef NTFS_RW
-
-extern void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs);
-
-#endif /* NTFS_RW */
-
-#endif /* _LINUX_NTFS_AOPS_H */
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
deleted file mode 100644
index f79408f9127a..000000000000
--- a/fs/ntfs/attrib.c
+++ /dev/null
@@ -1,2624 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
- * Copyright (c) 2002 Richard Russon
- */
-
-#include <linux/buffer_head.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/writeback.h>
-
-#include "attrib.h"
-#include "debug.h"
-#include "layout.h"
-#include "lcnalloc.h"
-#include "malloc.h"
-#include "mft.h"
-#include "ntfs.h"
-#include "types.h"
-
-/**
- * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
- * @ni: ntfs inode for which to map (part of) a runlist
- * @vcn: map runlist part containing this vcn
- * @ctx: active attribute search context if present or NULL if not
- *
- * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
- *
- * If @ctx is specified, it is an active search context of @ni and its base mft
- * record. This is needed when ntfs_map_runlist_nolock() encounters unmapped
- * runlist fragments and allows their mapping. If you do not have the mft
- * record mapped, you can specify @ctx as NULL and ntfs_map_runlist_nolock()
- * will perform the necessary mapping and unmapping.
- *
- * Note, ntfs_map_runlist_nolock() saves the state of @ctx on entry and
- * restores it before returning. Thus, @ctx will be left pointing to the same
- * attribute on return as on entry. However, the actual pointers in @ctx may
- * point to different memory locations on return, so you must remember to reset
- * any cached pointers from the @ctx, i.e. after the call to
- * ntfs_map_runlist_nolock(), you will probably want to do:
- * m = ctx->mrec;
- * a = ctx->attr;
- * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
- * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
- *
- * Return 0 on success and -errno on error. There is one special error code
- * which is not an error as such. This is -ENOENT. It means that @vcn is out
- * of bounds of the runlist.
- *
- * Note the runlist can be NULL after this function returns if @vcn is zero and
- * the attribute has zero allocated size, i.e. there simply is no runlist.
- *
- * WARNING: If @ctx is supplied, regardless of whether success or failure is
- * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
- * is no longer valid, i.e. you need to either call
- * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
- * In that case PTR_ERR(@ctx->mrec) will give you the error code for
- * why the mapping of the old inode failed.
- *
- * Locking: - The runlist described by @ni must be locked for writing on entry
- * and is locked on return. Note the runlist will be modified.
- * - If @ctx is NULL, the base mft record of @ni must not be mapped on
- * entry and it will be left unmapped on return.
- * - If @ctx is not NULL, the base mft record must be mapped on entry
- * and it will be left mapped on return.
- */
-int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx)
-{
- VCN end_vcn;
- unsigned long flags;
- ntfs_inode *base_ni;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- runlist_element *rl;
- struct page *put_this_page = NULL;
- int err = 0;
- bool ctx_is_temporary, ctx_needs_reset;
- ntfs_attr_search_ctx old_ctx = { NULL, };
-
- ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
- (unsigned long long)vcn);
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- if (!ctx) {
- ctx_is_temporary = ctx_needs_reset = true;
- m = map_mft_record(base_ni);
- if (IS_ERR(m))
- return PTR_ERR(m);
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- } else {
- VCN allocated_size_vcn;
-
- BUG_ON(IS_ERR(ctx->mrec));
- a = ctx->attr;
- BUG_ON(!a->non_resident);
- ctx_is_temporary = false;
- end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
- read_lock_irqsave(&ni->size_lock, flags);
- allocated_size_vcn = ni->allocated_size >>
- ni->vol->cluster_size_bits;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (!a->data.non_resident.lowest_vcn && end_vcn <= 0)
- end_vcn = allocated_size_vcn - 1;
- /*
- * If we already have the attribute extent containing @vcn in
- * @ctx, no need to look it up again. We slightly cheat in
- * that if vcn exceeds the allocated size, we will refuse to
- * map the runlist below, so there is definitely no need to get
- * the right attribute extent.
- */
- if (vcn >= allocated_size_vcn || (a->type == ni->type &&
- a->name_length == ni->name_len &&
- !memcmp((u8*)a + le16_to_cpu(a->name_offset),
- ni->name, ni->name_len) &&
- sle64_to_cpu(a->data.non_resident.lowest_vcn)
- <= vcn && end_vcn >= vcn))
- ctx_needs_reset = false;
- else {
- /* Save the old search context. */
- old_ctx = *ctx;
- /*
- * If the currently mapped (extent) inode is not the
- * base inode we will unmap it when we reinitialize the
- * search context which means we need to get a
- * reference to the page containing the mapped mft
- * record so we do not accidentally drop changes to the
- * mft record when it has not been marked dirty yet.
- */
- if (old_ctx.base_ntfs_ino && old_ctx.ntfs_ino !=
- old_ctx.base_ntfs_ino) {
- put_this_page = old_ctx.ntfs_ino->page;
- get_page(put_this_page);
- }
- /*
- * Reinitialize the search context so we can lookup the
- * needed attribute extent.
- */
- ntfs_attr_reinit_search_ctx(ctx);
- ctx_needs_reset = true;
- }
- }
- if (ctx_needs_reset) {
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, vcn, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- BUG_ON(!ctx->attr->non_resident);
- }
- a = ctx->attr;
- /*
- * Only decompress the mapping pairs if @vcn is inside it. Otherwise
- * we get into problems when we try to map an out of bounds vcn because
- * we then try to map the already mapped runlist fragment and
- * ntfs_mapping_pairs_decompress() fails.
- */
- end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
- if (unlikely(vcn && vcn >= end_vcn)) {
- err = -ENOENT;
- goto err_out;
- }
- rl = ntfs_mapping_pairs_decompress(ni->vol, a, ni->runlist.rl);
- if (IS_ERR(rl))
- err = PTR_ERR(rl);
- else
- ni->runlist.rl = rl;
-err_out:
- if (ctx_is_temporary) {
- if (likely(ctx))
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- } else if (ctx_needs_reset) {
- /*
- * If there is no attribute list, restoring the search context
- * is accomplished simply by copying the saved context back over
- * the caller supplied context. If there is an attribute list,
- * things are more complicated as we need to deal with mapping
- * of mft records and resulting potential changes in pointers.
- */
- if (NInoAttrList(base_ni)) {
- /*
- * If the currently mapped (extent) inode is not the
- * one we had before, we need to unmap it and map the
- * old one.
- */
- if (ctx->ntfs_ino != old_ctx.ntfs_ino) {
- /*
- * If the currently mapped inode is not the
- * base inode, unmap it.
- */
- if (ctx->base_ntfs_ino && ctx->ntfs_ino !=
- ctx->base_ntfs_ino) {
- unmap_extent_mft_record(ctx->ntfs_ino);
- ctx->mrec = ctx->base_mrec;
- BUG_ON(!ctx->mrec);
- }
- /*
- * If the old mapped inode is not the base
- * inode, map it.
- */
- if (old_ctx.base_ntfs_ino &&
- old_ctx.ntfs_ino !=
- old_ctx.base_ntfs_ino) {
-retry_map:
- ctx->mrec = map_mft_record(
- old_ctx.ntfs_ino);
- /*
- * Something bad has happened. If out
- * of memory retry till it succeeds.
- * Any other errors are fatal and we
- * return the error code in ctx->mrec.
- * Let the caller deal with it... We
- * just need to fudge things so the
- * caller can reinit and/or put the
- * search context safely.
- */
- if (IS_ERR(ctx->mrec)) {
- if (PTR_ERR(ctx->mrec) ==
- -ENOMEM) {
- schedule();
- goto retry_map;
- } else
- old_ctx.ntfs_ino =
- old_ctx.
- base_ntfs_ino;
- }
- }
- }
- /* Update the changed pointers in the saved context. */
- if (ctx->mrec != old_ctx.mrec) {
- if (!IS_ERR(ctx->mrec))
- old_ctx.attr = (ATTR_RECORD*)(
- (u8*)ctx->mrec +
- ((u8*)old_ctx.attr -
- (u8*)old_ctx.mrec));
- old_ctx.mrec = ctx->mrec;
- }
- }
- /* Restore the search context to the saved one. */
- *ctx = old_ctx;
- /*
- * We drop the reference on the page we took earlier. In the
- * case that IS_ERR(ctx->mrec) is true this means we might lose
- * some changes to the mft record that had been made between
- * the last time it was marked dirty/written out and now. This
- * at this stage is not a problem as the mapping error is fatal
- * enough that the mft record cannot be written out anyway and
- * the caller is very likely to shutdown the whole inode
- * immediately and mark the volume dirty for chkdsk to pick up
- * the pieces anyway.
- */
- if (put_this_page)
- put_page(put_this_page);
- }
- return err;
-}
-
-/**
- * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
- * @ni: ntfs inode for which to map (part of) a runlist
- * @vcn: map runlist part containing this vcn
- *
- * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
- *
- * Return 0 on success and -errno on error. There is one special error code
- * which is not an error as such. This is -ENOENT. It means that @vcn is out
- * of bounds of the runlist.
- *
- * Locking: - The runlist must be unlocked on entry and is unlocked on return.
- * - This function takes the runlist lock for writing and may modify
- * the runlist.
- */
-int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
-{
- int err = 0;
-
- down_write(&ni->runlist.lock);
- /* Make sure someone else didn't do the work while we were sleeping. */
- if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
- LCN_RL_NOT_MAPPED))
- err = ntfs_map_runlist_nolock(ni, vcn, NULL);
- up_write(&ni->runlist.lock);
- return err;
-}
-
-/**
- * ntfs_attr_vcn_to_lcn_nolock - convert a vcn into a lcn given an ntfs inode
- * @ni: ntfs inode of the attribute whose runlist to search
- * @vcn: vcn to convert
- * @write_locked: true if the runlist is locked for writing
- *
- * Find the virtual cluster number @vcn in the runlist of the ntfs attribute
- * described by the ntfs inode @ni and return the corresponding logical cluster
- * number (lcn).
- *
- * If the @vcn is not mapped yet, the attempt is made to map the attribute
- * extent containing the @vcn and the vcn to lcn conversion is retried.
- *
- * If @write_locked is true the caller has locked the runlist for writing and
- * if false for reading.
- *
- * Since lcns must be >= 0, we use negative return codes with special meaning:
- *
- * Return code Meaning / Description
- * ==========================================
- * LCN_HOLE Hole / not allocated on disk.
- * LCN_ENOENT There is no such vcn in the runlist, i.e. @vcn is out of bounds.
- * LCN_ENOMEM Not enough memory to map runlist.
- * LCN_EIO Critical error (runlist/file is corrupt, i/o error, etc).
- *
- * Locking: - The runlist must be locked on entry and is left locked on return.
- * - If @write_locked is 'false', i.e. the runlist is locked for reading,
- * the lock may be dropped inside the function so you cannot rely on
- * the runlist still being the same when this function returns.
- */
-LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
- const bool write_locked)
-{
- LCN lcn;
- unsigned long flags;
- bool is_retry = false;
-
- BUG_ON(!ni);
- ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
- ni->mft_no, (unsigned long long)vcn,
- write_locked ? "write" : "read");
- BUG_ON(!NInoNonResident(ni));
- BUG_ON(vcn < 0);
- if (!ni->runlist.rl) {
- read_lock_irqsave(&ni->size_lock, flags);
- if (!ni->allocated_size) {
- read_unlock_irqrestore(&ni->size_lock, flags);
- return LCN_ENOENT;
- }
- read_unlock_irqrestore(&ni->size_lock, flags);
- }
-retry_remap:
- /* Convert vcn to lcn. If that fails map the runlist and retry once. */
- lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn);
- if (likely(lcn >= LCN_HOLE)) {
- ntfs_debug("Done, lcn 0x%llx.", (long long)lcn);
- return lcn;
- }
- if (lcn != LCN_RL_NOT_MAPPED) {
- if (lcn != LCN_ENOENT)
- lcn = LCN_EIO;
- } else if (!is_retry) {
- int err;
-
- if (!write_locked) {
- up_read(&ni->runlist.lock);
- down_write(&ni->runlist.lock);
- if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
- LCN_RL_NOT_MAPPED)) {
- up_write(&ni->runlist.lock);
- down_read(&ni->runlist.lock);
- goto retry_remap;
- }
- }
- err = ntfs_map_runlist_nolock(ni, vcn, NULL);
- if (!write_locked) {
- up_write(&ni->runlist.lock);
- down_read(&ni->runlist.lock);
- }
- if (likely(!err)) {
- is_retry = true;
- goto retry_remap;
- }
- if (err == -ENOENT)
- lcn = LCN_ENOENT;
- else if (err == -ENOMEM)
- lcn = LCN_ENOMEM;
- else
- lcn = LCN_EIO;
- }
- if (lcn != LCN_ENOENT)
- ntfs_error(ni->vol->sb, "Failed with error code %lli.",
- (long long)lcn);
- return lcn;
-}
-
-/**
- * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode
- * @ni: ntfs inode describing the runlist to search
- * @vcn: vcn to find
- * @ctx: active attribute search context if present or NULL if not
- *
- * Find the virtual cluster number @vcn in the runlist described by the ntfs
- * inode @ni and return the address of the runlist element containing the @vcn.
- *
- * If the @vcn is not mapped yet, the attempt is made to map the attribute
- * extent containing the @vcn and the vcn to lcn conversion is retried.
- *
- * If @ctx is specified, it is an active search context of @ni and its base mft
- * record. This is needed when ntfs_attr_find_vcn_nolock() encounters unmapped
- * runlist fragments and allows their mapping. If you do not have the mft
- * record mapped, you can specify @ctx as NULL and ntfs_attr_find_vcn_nolock()
- * will perform the necessary mapping and unmapping.
- *
- * Note, ntfs_attr_find_vcn_nolock() saves the state of @ctx on entry and
- * restores it before returning. Thus, @ctx will be left pointing to the same
- * attribute on return as on entry. However, the actual pointers in @ctx may
- * point to different memory locations on return, so you must remember to reset
- * any cached pointers from the @ctx, i.e. after the call to
- * ntfs_attr_find_vcn_nolock(), you will probably want to do:
- * m = ctx->mrec;
- * a = ctx->attr;
- * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
- * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
- * Note you need to distinguish between the lcn of the returned runlist element
- * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on
- * read and allocate clusters on write.
- *
- * Return the runlist element containing the @vcn on success and
- * ERR_PTR(-errno) on error. You need to test the return value with IS_ERR()
- * to decide if the return is success or failure and PTR_ERR() to get to the
- * error code if IS_ERR() is true.
- *
- * The possible error return codes are:
- * -ENOENT - No such vcn in the runlist, i.e. @vcn is out of bounds.
- * -ENOMEM - Not enough memory to map runlist.
- * -EIO - Critical error (runlist/file is corrupt, i/o error, etc).
- *
- * WARNING: If @ctx is supplied, regardless of whether success or failure is
- * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
- * is no longer valid, i.e. you need to either call
- * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
- * In that case PTR_ERR(@ctx->mrec) will give you the error code for
- * why the mapping of the old inode failed.
- *
- * Locking: - The runlist described by @ni must be locked for writing on entry
- * and is locked on return. Note the runlist may be modified when
- * needed runlist fragments need to be mapped.
- * - If @ctx is NULL, the base mft record of @ni must not be mapped on
- * entry and it will be left unmapped on return.
- * - If @ctx is not NULL, the base mft record must be mapped on entry
- * and it will be left mapped on return.
- */
-runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
- ntfs_attr_search_ctx *ctx)
-{
- unsigned long flags;
- runlist_element *rl;
- int err = 0;
- bool is_retry = false;
-
- BUG_ON(!ni);
- ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.",
- ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out");
- BUG_ON(!NInoNonResident(ni));
- BUG_ON(vcn < 0);
- if (!ni->runlist.rl) {
- read_lock_irqsave(&ni->size_lock, flags);
- if (!ni->allocated_size) {
- read_unlock_irqrestore(&ni->size_lock, flags);
- return ERR_PTR(-ENOENT);
- }
- read_unlock_irqrestore(&ni->size_lock, flags);
- }
-retry_remap:
- rl = ni->runlist.rl;
- if (likely(rl && vcn >= rl[0].vcn)) {
- while (likely(rl->length)) {
- if (unlikely(vcn < rl[1].vcn)) {
- if (likely(rl->lcn >= LCN_HOLE)) {
- ntfs_debug("Done.");
- return rl;
- }
- break;
- }
- rl++;
- }
- if (likely(rl->lcn != LCN_RL_NOT_MAPPED)) {
- if (likely(rl->lcn == LCN_ENOENT))
- err = -ENOENT;
- else
- err = -EIO;
- }
- }
- if (!err && !is_retry) {
- /*
- * If the search context is invalid we cannot map the unmapped
- * region.
- */
- if (IS_ERR(ctx->mrec))
- err = PTR_ERR(ctx->mrec);
- else {
- /*
- * The @vcn is in an unmapped region, map the runlist
- * and retry.
- */
- err = ntfs_map_runlist_nolock(ni, vcn, ctx);
- if (likely(!err)) {
- is_retry = true;
- goto retry_remap;
- }
- }
- if (err == -EINVAL)
- err = -EIO;
- } else if (!err)
- err = -EIO;
- if (err != -ENOENT)
- ntfs_error(ni->vol->sb, "Failed with error code %i.", err);
- return ERR_PTR(err);
-}
-
-/**
- * ntfs_attr_find - find (next) attribute in mft record
- * @type: attribute type to find
- * @name: attribute name to find (optional, i.e. NULL means don't care)
- * @name_len: attribute name length (only needed if @name present)
- * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present)
- * @val: attribute value to find (optional, resident attributes only)
- * @val_len: attribute value length
- * @ctx: search context with mft record and attribute to search from
- *
- * You should not need to call this function directly. Use ntfs_attr_lookup()
- * instead.
- *
- * ntfs_attr_find() takes a search context @ctx as parameter and searches the
- * mft record specified by @ctx->mrec, beginning at @ctx->attr, for an
- * attribute of @type, optionally @name and @val.
- *
- * If the attribute is found, ntfs_attr_find() returns 0 and @ctx->attr will
- * point to the found attribute.
- *
- * If the attribute is not found, ntfs_attr_find() returns -ENOENT and
- * @ctx->attr will point to the attribute before which the attribute being
- * searched for would need to be inserted if such an action were to be desired.
- *
- * On actual error, ntfs_attr_find() returns -EIO. In this case @ctx->attr is
- * undefined and in particular do not rely on it not changing.
- *
- * If @ctx->is_first is 'true', the search begins with @ctx->attr itself. If it
- * is 'false', the search begins after @ctx->attr.
- *
- * If @ic is IGNORE_CASE, the @name comparisson is not case sensitive and
- * @ctx->ntfs_ino must be set to the ntfs inode to which the mft record
- * @ctx->mrec belongs. This is so we can get at the ntfs volume and hence at
- * the upcase table. If @ic is CASE_SENSITIVE, the comparison is case
- * sensitive. When @name is present, @name_len is the @name length in Unicode
- * characters.
- *
- * If @name is not present (NULL), we assume that the unnamed attribute is
- * being searched for.
- *
- * Finally, the resident attribute value @val is looked for, if present. If
- * @val is not present (NULL), @val_len is ignored.
- *
- * ntfs_attr_find() only searches the specified mft record and it ignores the
- * presence of an attribute list attribute (unless it is the one being searched
- * for, obviously). If you need to take attribute lists into consideration,
- * use ntfs_attr_lookup() instead (see below). This also means that you cannot
- * use ntfs_attr_find() to search for extent records of non-resident
- * attributes, as extents with lowest_vcn != 0 are usually described by the
- * attribute list attribute only. - Note that it is possible that the first
- * extent is only in the attribute list while the last extent is in the base
- * mft record, so do not rely on being able to find the first extent in the
- * base mft record.
- *
- * Warning: Never use @val when looking for attribute types which can be
- * non-resident as this most likely will result in a crash!
- */
-static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name,
- const u32 name_len, const IGNORE_CASE_BOOL ic,
- const u8 *val, const u32 val_len, ntfs_attr_search_ctx *ctx)
-{
- ATTR_RECORD *a;
- ntfs_volume *vol = ctx->ntfs_ino->vol;
- ntfschar *upcase = vol->upcase;
- u32 upcase_len = vol->upcase_len;
-
- /*
- * Iterate over attributes in mft record starting at @ctx->attr, or the
- * attribute following that, if @ctx->is_first is 'true'.
- */
- if (ctx->is_first) {
- a = ctx->attr;
- ctx->is_first = false;
- } else
- a = (ATTR_RECORD*)((u8*)ctx->attr +
- le32_to_cpu(ctx->attr->length));
- for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
- u8 *mrec_end = (u8 *)ctx->mrec +
- le32_to_cpu(ctx->mrec->bytes_allocated);
- u8 *name_end;
-
- /* check whether ATTR_RECORD wrap */
- if ((u8 *)a < (u8 *)ctx->mrec)
- break;
-
- /* check whether Attribute Record Header is within bounds */
- if ((u8 *)a > mrec_end ||
- (u8 *)a + sizeof(ATTR_RECORD) > mrec_end)
- break;
-
- /* check whether ATTR_RECORD's name is within bounds */
- name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
- a->name_length * sizeof(ntfschar);
- if (name_end > mrec_end)
- break;
-
- ctx->attr = a;
- if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
- a->type == AT_END))
- return -ENOENT;
- if (unlikely(!a->length))
- break;
-
- /* check whether ATTR_RECORD's length wrap */
- if ((u8 *)a + le32_to_cpu(a->length) < (u8 *)a)
- break;
- /* check whether ATTR_RECORD's length is within bounds */
- if ((u8 *)a + le32_to_cpu(a->length) > mrec_end)
- break;
-
- if (a->type != type)
- continue;
- /*
- * If @name is present, compare the two names. If @name is
- * missing, assume we want an unnamed attribute.
- */
- if (!name) {
- /* The search failed if the found attribute is named. */
- if (a->name_length)
- return -ENOENT;
- } else if (!ntfs_are_names_equal(name, name_len,
- (ntfschar*)((u8*)a + le16_to_cpu(a->name_offset)),
- a->name_length, ic, upcase, upcase_len)) {
- register int rc;
-
- rc = ntfs_collate_names(name, name_len,
- (ntfschar*)((u8*)a +
- le16_to_cpu(a->name_offset)),
- a->name_length, 1, IGNORE_CASE,
- upcase, upcase_len);
- /*
- * If @name collates before a->name, there is no
- * matching attribute.
- */
- if (rc == -1)
- return -ENOENT;
- /* If the strings are not equal, continue search. */
- if (rc)
- continue;
- rc = ntfs_collate_names(name, name_len,
- (ntfschar*)((u8*)a +
- le16_to_cpu(a->name_offset)),
- a->name_length, 1, CASE_SENSITIVE,
- upcase, upcase_len);
- if (rc == -1)
- return -ENOENT;
- if (rc)
- continue;
- }
- /*
- * The names match or @name not present and attribute is
- * unnamed. If no @val specified, we have found the attribute
- * and are done.
- */
- if (!val)
- return 0;
- /* @val is present; compare values. */
- else {
- register int rc;
-
- rc = memcmp(val, (u8*)a + le16_to_cpu(
- a->data.resident.value_offset),
- min_t(u32, val_len, le32_to_cpu(
- a->data.resident.value_length)));
- /*
- * If @val collates before the current attribute's
- * value, there is no matching attribute.
- */
- if (!rc) {
- register u32 avl;
-
- avl = le32_to_cpu(
- a->data.resident.value_length);
- if (val_len == avl)
- return 0;
- if (val_len < avl)
- return -ENOENT;
- } else if (rc < 0)
- return -ENOENT;
- }
- }
- ntfs_error(vol->sb, "Inode is corrupt. Run chkdsk.");
- NVolSetErrors(vol);
- return -EIO;
-}
-
-/**
- * load_attribute_list - load an attribute list into memory
- * @vol: ntfs volume from which to read
- * @runlist: runlist of the attribute list
- * @al_start: destination buffer
- * @size: size of the destination buffer in bytes
- * @initialized_size: initialized size of the attribute list
- *
- * Walk the runlist @runlist and load all clusters from it copying them into
- * the linear buffer @al. The maximum number of bytes copied to @al is @size
- * bytes. Note, @size does not need to be a multiple of the cluster size. If
- * @initialized_size is less than @size, the region in @al between
- * @initialized_size and @size will be zeroed and not read from disk.
- *
- * Return 0 on success or -errno on error.
- */
-int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start,
- const s64 size, const s64 initialized_size)
-{
- LCN lcn;
- u8 *al = al_start;
- u8 *al_end = al + initialized_size;
- runlist_element *rl;
- struct buffer_head *bh;
- struct super_block *sb;
- unsigned long block_size;
- unsigned long block, max_block;
- int err = 0;
- unsigned char block_size_bits;
-
- ntfs_debug("Entering.");
- if (!vol || !runlist || !al || size <= 0 || initialized_size < 0 ||
- initialized_size > size)
- return -EINVAL;
- if (!initialized_size) {
- memset(al, 0, size);
- return 0;
- }
- sb = vol->sb;
- block_size = sb->s_blocksize;
- block_size_bits = sb->s_blocksize_bits;
- down_read(&runlist->lock);
- rl = runlist->rl;
- if (!rl) {
- ntfs_error(sb, "Cannot read attribute list since runlist is "
- "missing.");
- goto err_out;
- }
- /* Read all clusters specified by the runlist one run at a time. */
- while (rl->length) {
- lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn);
- ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.",
- (unsigned long long)rl->vcn,
- (unsigned long long)lcn);
- /* The attribute list cannot be sparse. */
- if (lcn < 0) {
- ntfs_error(sb, "ntfs_rl_vcn_to_lcn() failed. Cannot "
- "read attribute list.");
- goto err_out;
- }
- block = lcn << vol->cluster_size_bits >> block_size_bits;
- /* Read the run from device in chunks of block_size bytes. */
- max_block = block + (rl->length << vol->cluster_size_bits >>
- block_size_bits);
- ntfs_debug("max_block = 0x%lx.", max_block);
- do {
- ntfs_debug("Reading block = 0x%lx.", block);
- bh = sb_bread(sb, block);
- if (!bh) {
- ntfs_error(sb, "sb_bread() failed. Cannot "
- "read attribute list.");
- goto err_out;
- }
- if (al + block_size >= al_end)
- goto do_final;
- memcpy(al, bh->b_data, block_size);
- brelse(bh);
- al += block_size;
- } while (++block < max_block);
- rl++;
- }
- if (initialized_size < size) {
-initialize:
- memset(al_start + initialized_size, 0, size - initialized_size);
- }
-done:
- up_read(&runlist->lock);
- return err;
-do_final:
- if (al < al_end) {
- /*
- * Partial block.
- *
- * Note: The attribute list can be smaller than its allocation
- * by multiple clusters. This has been encountered by at least
- * two people running Windows XP, thus we cannot do any
- * truncation sanity checking here. (AIA)
- */
- memcpy(al, bh->b_data, al_end - al);
- brelse(bh);
- if (initialized_size < size)
- goto initialize;
- goto done;
- }
- brelse(bh);
- /* Real overflow! */
- ntfs_error(sb, "Attribute list buffer overflow. Read attribute list "
- "is truncated.");
-err_out:
- err = -EIO;
- goto done;
-}
-
-/**
- * ntfs_external_attr_find - find an attribute in the attribute list of an inode
- * @type: attribute type to find
- * @name: attribute name to find (optional, i.e. NULL means don't care)
- * @name_len: attribute name length (only needed if @name present)
- * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present)
- * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only)
- * @val: attribute value to find (optional, resident attributes only)
- * @val_len: attribute value length
- * @ctx: search context with mft record and attribute to search from
- *
- * You should not need to call this function directly. Use ntfs_attr_lookup()
- * instead.
- *
- * Find an attribute by searching the attribute list for the corresponding
- * attribute list entry. Having found the entry, map the mft record if the
- * attribute is in a different mft record/inode, ntfs_attr_find() the attribute
- * in there and return it.
- *
- * On first search @ctx->ntfs_ino must be the base mft record and @ctx must
- * have been obtained from a call to ntfs_attr_get_search_ctx(). On subsequent
- * calls @ctx->ntfs_ino can be any extent inode, too (@ctx->base_ntfs_ino is
- * then the base inode).
- *
- * After finishing with the attribute/mft record you need to call
- * ntfs_attr_put_search_ctx() to cleanup the search context (unmapping any
- * mapped inodes, etc).
- *
- * If the attribute is found, ntfs_external_attr_find() returns 0 and
- * @ctx->attr will point to the found attribute. @ctx->mrec will point to the
- * mft record in which @ctx->attr is located and @ctx->al_entry will point to
- * the attribute list entry for the attribute.
- *
- * If the attribute is not found, ntfs_external_attr_find() returns -ENOENT and
- * @ctx->attr will point to the attribute in the base mft record before which
- * the attribute being searched for would need to be inserted if such an action
- * were to be desired. @ctx->mrec will point to the mft record in which
- * @ctx->attr is located and @ctx->al_entry will point to the attribute list
- * entry of the attribute before which the attribute being searched for would
- * need to be inserted if such an action were to be desired.
- *
- * Thus to insert the not found attribute, one wants to add the attribute to
- * @ctx->mrec (the base mft record) and if there is not enough space, the
- * attribute should be placed in a newly allocated extent mft record. The
- * attribute list entry for the inserted attribute should be inserted in the
- * attribute list attribute at @ctx->al_entry.
- *
- * On actual error, ntfs_external_attr_find() returns -EIO. In this case
- * @ctx->attr is undefined and in particular do not rely on it not changing.
- */
-static int ntfs_external_attr_find(const ATTR_TYPE type,
- const ntfschar *name, const u32 name_len,
- const IGNORE_CASE_BOOL ic, const VCN lowest_vcn,
- const u8 *val, const u32 val_len, ntfs_attr_search_ctx *ctx)
-{
- ntfs_inode *base_ni, *ni;
- ntfs_volume *vol;
- ATTR_LIST_ENTRY *al_entry, *next_al_entry;
- u8 *al_start, *al_end;
- ATTR_RECORD *a;
- ntfschar *al_name;
- u32 al_name_len;
- int err = 0;
- static const char *es = " Unmount and run chkdsk.";
-
- ni = ctx->ntfs_ino;
- base_ni = ctx->base_ntfs_ino;
- ntfs_debug("Entering for inode 0x%lx, type 0x%x.", ni->mft_no, type);
- if (!base_ni) {
- /* First call happens with the base mft record. */
- base_ni = ctx->base_ntfs_ino = ctx->ntfs_ino;
- ctx->base_mrec = ctx->mrec;
- }
- if (ni == base_ni)
- ctx->base_attr = ctx->attr;
- if (type == AT_END)
- goto not_found;
- vol = base_ni->vol;
- al_start = base_ni->attr_list;
- al_end = al_start + base_ni->attr_list_size;
- if (!ctx->al_entry)
- ctx->al_entry = (ATTR_LIST_ENTRY*)al_start;
- /*
- * Iterate over entries in attribute list starting at @ctx->al_entry,
- * or the entry following that, if @ctx->is_first is 'true'.
- */
- if (ctx->is_first) {
- al_entry = ctx->al_entry;
- ctx->is_first = false;
- } else
- al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry +
- le16_to_cpu(ctx->al_entry->length));
- for (;; al_entry = next_al_entry) {
- /* Out of bounds check. */
- if ((u8*)al_entry < base_ni->attr_list ||
- (u8*)al_entry > al_end)
- break; /* Inode is corrupt. */
- ctx->al_entry = al_entry;
- /* Catch the end of the attribute list. */
- if ((u8*)al_entry == al_end)
- goto not_found;
- if (!al_entry->length)
- break;
- if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
- le16_to_cpu(al_entry->length) > al_end)
- break;
- next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
- le16_to_cpu(al_entry->length));
- if (le32_to_cpu(al_entry->type) > le32_to_cpu(type))
- goto not_found;
- if (type != al_entry->type)
- continue;
- /*
- * If @name is present, compare the two names. If @name is
- * missing, assume we want an unnamed attribute.
- */
- al_name_len = al_entry->name_length;
- al_name = (ntfschar*)((u8*)al_entry + al_entry->name_offset);
- if (!name) {
- if (al_name_len)
- goto not_found;
- } else if (!ntfs_are_names_equal(al_name, al_name_len, name,
- name_len, ic, vol->upcase, vol->upcase_len)) {
- register int rc;
-
- rc = ntfs_collate_names(name, name_len, al_name,
- al_name_len, 1, IGNORE_CASE,
- vol->upcase, vol->upcase_len);
- /*
- * If @name collates before al_name, there is no
- * matching attribute.
- */
- if (rc == -1)
- goto not_found;
- /* If the strings are not equal, continue search. */
- if (rc)
- continue;
- /*
- * FIXME: Reverse engineering showed 0, IGNORE_CASE but
- * that is inconsistent with ntfs_attr_find(). The
- * subsequent rc checks were also different. Perhaps I
- * made a mistake in one of the two. Need to recheck
- * which is correct or at least see what is going on...
- * (AIA)
- */
- rc = ntfs_collate_names(name, name_len, al_name,
- al_name_len, 1, CASE_SENSITIVE,
- vol->upcase, vol->upcase_len);
- if (rc == -1)
- goto not_found;
- if (rc)
- continue;
- }
- /*
- * The names match or @name not present and attribute is
- * unnamed. Now check @lowest_vcn. Continue search if the
- * next attribute list entry still fits @lowest_vcn. Otherwise
- * we have reached the right one or the search has failed.
- */
- if (lowest_vcn && (u8*)next_al_entry >= al_start &&
- (u8*)next_al_entry + 6 < al_end &&
- (u8*)next_al_entry + le16_to_cpu(
- next_al_entry->length) <= al_end &&
- sle64_to_cpu(next_al_entry->lowest_vcn) <=
- lowest_vcn &&
- next_al_entry->type == al_entry->type &&
- next_al_entry->name_length == al_name_len &&
- ntfs_are_names_equal((ntfschar*)((u8*)
- next_al_entry +
- next_al_entry->name_offset),
- next_al_entry->name_length,
- al_name, al_name_len, CASE_SENSITIVE,
- vol->upcase, vol->upcase_len))
- continue;
- if (MREF_LE(al_entry->mft_reference) == ni->mft_no) {
- if (MSEQNO_LE(al_entry->mft_reference) != ni->seq_no) {
- ntfs_error(vol->sb, "Found stale mft "
- "reference in attribute list "
- "of base inode 0x%lx.%s",
- base_ni->mft_no, es);
- err = -EIO;
- break;
- }
- } else { /* Mft references do not match. */
- /* If there is a mapped record unmap it first. */
- if (ni != base_ni)
- unmap_extent_mft_record(ni);
- /* Do we want the base record back? */
- if (MREF_LE(al_entry->mft_reference) ==
- base_ni->mft_no) {
- ni = ctx->ntfs_ino = base_ni;
- ctx->mrec = ctx->base_mrec;
- } else {
- /* We want an extent record. */
- ctx->mrec = map_extent_mft_record(base_ni,
- le64_to_cpu(
- al_entry->mft_reference), &ni);
- if (IS_ERR(ctx->mrec)) {
- ntfs_error(vol->sb, "Failed to map "
- "extent mft record "
- "0x%lx of base inode "
- "0x%lx.%s",
- MREF_LE(al_entry->
- mft_reference),
- base_ni->mft_no, es);
- err = PTR_ERR(ctx->mrec);
- if (err == -ENOENT)
- err = -EIO;
- /* Cause @ctx to be sanitized below. */
- ni = NULL;
- break;
- }
- ctx->ntfs_ino = ni;
- }
- ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec +
- le16_to_cpu(ctx->mrec->attrs_offset));
- }
- /*
- * ctx->vfs_ino, ctx->mrec, and ctx->attr now point to the
- * mft record containing the attribute represented by the
- * current al_entry.
- */
- /*
- * We could call into ntfs_attr_find() to find the right
- * attribute in this mft record but this would be less
- * efficient and not quite accurate as ntfs_attr_find() ignores
- * the attribute instance numbers for example which become
- * important when one plays with attribute lists. Also,
- * because a proper match has been found in the attribute list
- * entry above, the comparison can now be optimized. So it is
- * worth re-implementing a simplified ntfs_attr_find() here.
- */
- a = ctx->attr;
- /*
- * Use a manual loop so we can still use break and continue
- * with the same meanings as above.
- */
-do_next_attr_loop:
- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
- le32_to_cpu(ctx->mrec->bytes_allocated))
- break;
- if (a->type == AT_END)
- break;
- if (!a->length)
- break;
- if (al_entry->instance != a->instance)
- goto do_next_attr;
- /*
- * If the type and/or the name are mismatched between the
- * attribute list entry and the attribute record, there is
- * corruption so we break and return error EIO.
- */
- if (al_entry->type != a->type)
- break;
- if (!ntfs_are_names_equal((ntfschar*)((u8*)a +
- le16_to_cpu(a->name_offset)), a->name_length,
- al_name, al_name_len, CASE_SENSITIVE,
- vol->upcase, vol->upcase_len))
- break;
- ctx->attr = a;
- /*
- * If no @val specified or @val specified and it matches, we
- * have found it!
- */
- if (!val || (!a->non_resident && le32_to_cpu(
- a->data.resident.value_length) == val_len &&
- !memcmp((u8*)a +
- le16_to_cpu(a->data.resident.value_offset),
- val, val_len))) {
- ntfs_debug("Done, found.");
- return 0;
- }
-do_next_attr:
- /* Proceed to the next attribute in the current mft record. */
- a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length));
- goto do_next_attr_loop;
- }
- if (!err) {
- ntfs_error(vol->sb, "Base inode 0x%lx contains corrupt "
- "attribute list attribute.%s", base_ni->mft_no,
- es);
- err = -EIO;
- }
- if (ni != base_ni) {
- if (ni)
- unmap_extent_mft_record(ni);
- ctx->ntfs_ino = base_ni;
- ctx->mrec = ctx->base_mrec;
- ctx->attr = ctx->base_attr;
- }
- if (err != -ENOMEM)
- NVolSetErrors(vol);
- return err;
-not_found:
- /*
- * If we were looking for AT_END, we reset the search context @ctx and
- * use ntfs_attr_find() to seek to the end of the base mft record.
- */
- if (type == AT_END) {
- ntfs_attr_reinit_search_ctx(ctx);
- return ntfs_attr_find(AT_END, name, name_len, ic, val, val_len,
- ctx);
- }
- /*
- * The attribute was not found. Before we return, we want to ensure
- * @ctx->mrec and @ctx->attr indicate the position at which the
- * attribute should be inserted in the base mft record. Since we also
- * want to preserve @ctx->al_entry we cannot reinitialize the search
- * context using ntfs_attr_reinit_search_ctx() as this would set
- * @ctx->al_entry to NULL. Thus we do the necessary bits manually (see
- * ntfs_attr_init_search_ctx() below). Note, we _only_ preserve
- * @ctx->al_entry as the remaining fields (base_*) are identical to
- * their non base_ counterparts and we cannot set @ctx->base_attr
- * correctly yet as we do not know what @ctx->attr will be set to by
- * the call to ntfs_attr_find() below.
- */
- if (ni != base_ni)
- unmap_extent_mft_record(ni);
- ctx->mrec = ctx->base_mrec;
- ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec +
- le16_to_cpu(ctx->mrec->attrs_offset));
- ctx->is_first = true;
- ctx->ntfs_ino = base_ni;
- ctx->base_ntfs_ino = NULL;
- ctx->base_mrec = NULL;
- ctx->base_attr = NULL;
- /*
- * In case there are multiple matches in the base mft record, need to
- * keep enumerating until we get an attribute not found response (or
- * another error), otherwise we would keep returning the same attribute
- * over and over again and all programs using us for enumeration would
- * lock up in a tight loop.
- */
- do {
- err = ntfs_attr_find(type, name, name_len, ic, val, val_len,
- ctx);
- } while (!err);
- ntfs_debug("Done, not found.");
- return err;
-}
-
-/**
- * ntfs_attr_lookup - find an attribute in an ntfs inode
- * @type: attribute type to find
- * @name: attribute name to find (optional, i.e. NULL means don't care)
- * @name_len: attribute name length (only needed if @name present)
- * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present)
- * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only)
- * @val: attribute value to find (optional, resident attributes only)
- * @val_len: attribute value length
- * @ctx: search context with mft record and attribute to search from
- *
- * Find an attribute in an ntfs inode. On first search @ctx->ntfs_ino must
- * be the base mft record and @ctx must have been obtained from a call to
- * ntfs_attr_get_search_ctx().
- *
- * This function transparently handles attribute lists and @ctx is used to
- * continue searches where they were left off at.
- *
- * After finishing with the attribute/mft record you need to call
- * ntfs_attr_put_search_ctx() to cleanup the search context (unmapping any
- * mapped inodes, etc).
- *
- * Return 0 if the search was successful and -errno if not.
- *
- * When 0, @ctx->attr is the found attribute and it is in mft record
- * @ctx->mrec. If an attribute list attribute is present, @ctx->al_entry is
- * the attribute list entry of the found attribute.
- *
- * When -ENOENT, @ctx->attr is the attribute which collates just after the
- * attribute being searched for, i.e. if one wants to add the attribute to the
- * mft record this is the correct place to insert it into. If an attribute
- * list attribute is present, @ctx->al_entry is the attribute list entry which
- * collates just after the attribute list entry of the attribute being searched
- * for, i.e. if one wants to add the attribute to the mft record this is the
- * correct place to insert its attribute list entry into.
- *
- * When -errno != -ENOENT, an error occurred during the lookup. @ctx->attr is
- * then undefined and in particular you should not rely on it not changing.
- */
-int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
- const u32 name_len, const IGNORE_CASE_BOOL ic,
- const VCN lowest_vcn, const u8 *val, const u32 val_len,
- ntfs_attr_search_ctx *ctx)
-{
- ntfs_inode *base_ni;
-
- ntfs_debug("Entering.");
- BUG_ON(IS_ERR(ctx->mrec));
- if (ctx->base_ntfs_ino)
- base_ni = ctx->base_ntfs_ino;
- else
- base_ni = ctx->ntfs_ino;
- /* Sanity check, just for debugging really. */
- BUG_ON(!base_ni);
- if (!NInoAttrList(base_ni) || type == AT_ATTRIBUTE_LIST)
- return ntfs_attr_find(type, name, name_len, ic, val, val_len,
- ctx);
- return ntfs_external_attr_find(type, name, name_len, ic, lowest_vcn,
- val, val_len, ctx);
-}
-
-/**
- * ntfs_attr_init_search_ctx - initialize an attribute search context
- * @ctx: attribute search context to initialize
- * @ni: ntfs inode with which to initialize the search context
- * @mrec: mft record with which to initialize the search context
- *
- * Initialize the attribute search context @ctx with @ni and @mrec.
- */
-static inline void ntfs_attr_init_search_ctx(ntfs_attr_search_ctx *ctx,
- ntfs_inode *ni, MFT_RECORD *mrec)
-{
- *ctx = (ntfs_attr_search_ctx) {
- .mrec = mrec,
- /* Sanity checks are performed elsewhere. */
- .attr = (ATTR_RECORD*)((u8*)mrec +
- le16_to_cpu(mrec->attrs_offset)),
- .is_first = true,
- .ntfs_ino = ni,
- };
-}
-
-/**
- * ntfs_attr_reinit_search_ctx - reinitialize an attribute search context
- * @ctx: attribute search context to reinitialize
- *
- * Reinitialize the attribute search context @ctx, unmapping an associated
- * extent mft record if present, and initialize the search context again.
- *
- * This is used when a search for a new attribute is being started to reset
- * the search context to the beginning.
- */
-void ntfs_attr_reinit_search_ctx(ntfs_attr_search_ctx *ctx)
-{
- if (likely(!ctx->base_ntfs_ino)) {
- /* No attribute list. */
- ctx->is_first = true;
- /* Sanity checks are performed elsewhere. */
- ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec +
- le16_to_cpu(ctx->mrec->attrs_offset));
- /*
- * This needs resetting due to ntfs_external_attr_find() which
- * can leave it set despite having zeroed ctx->base_ntfs_ino.
- */
- ctx->al_entry = NULL;
- return;
- } /* Attribute list. */
- if (ctx->ntfs_ino != ctx->base_ntfs_ino)
- unmap_extent_mft_record(ctx->ntfs_ino);
- ntfs_attr_init_search_ctx(ctx, ctx->base_ntfs_ino, ctx->base_mrec);
- return;
-}
-
-/**
- * ntfs_attr_get_search_ctx - allocate/initialize a new attribute search context
- * @ni: ntfs inode with which to initialize the search context
- * @mrec: mft record with which to initialize the search context
- *
- * Allocate a new attribute search context, initialize it with @ni and @mrec,
- * and return it. Return NULL if allocation failed.
- */
-ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec)
-{
- ntfs_attr_search_ctx *ctx;
-
- ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, GFP_NOFS);
- if (ctx)
- ntfs_attr_init_search_ctx(ctx, ni, mrec);
- return ctx;
-}
-
-/**
- * ntfs_attr_put_search_ctx - release an attribute search context
- * @ctx: attribute search context to free
- *
- * Release the attribute search context @ctx, unmapping an associated extent
- * mft record if present.
- */
-void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx)
-{
- if (ctx->base_ntfs_ino && ctx->ntfs_ino != ctx->base_ntfs_ino)
- unmap_extent_mft_record(ctx->ntfs_ino);
- kmem_cache_free(ntfs_attr_ctx_cache, ctx);
- return;
-}
-
-#ifdef NTFS_RW
-
-/**
- * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file
- * @vol: ntfs volume to which the attribute belongs
- * @type: attribute type which to find
- *
- * Search for the attribute definition record corresponding to the attribute
- * @type in the $AttrDef system file.
- *
- * Return the attribute type definition record if found and NULL if not found.
- */
-static ATTR_DEF *ntfs_attr_find_in_attrdef(const ntfs_volume *vol,
- const ATTR_TYPE type)
-{
- ATTR_DEF *ad;
-
- BUG_ON(!vol->attrdef);
- BUG_ON(!type);
- for (ad = vol->attrdef; (u8*)ad - (u8*)vol->attrdef <
- vol->attrdef_size && ad->type; ++ad) {
- /* We have not found it yet, carry on searching. */
- if (likely(le32_to_cpu(ad->type) < le32_to_cpu(type)))
- continue;
- /* We found the attribute; return it. */
- if (likely(ad->type == type))
- return ad;
- /* We have gone too far already. No point in continuing. */
- break;
- }
- /* Attribute not found. */
- ntfs_debug("Attribute type 0x%x not found in $AttrDef.",
- le32_to_cpu(type));
- return NULL;
-}
-
-/**
- * ntfs_attr_size_bounds_check - check a size of an attribute type for validity
- * @vol: ntfs volume to which the attribute belongs
- * @type: attribute type which to check
- * @size: size which to check
- *
- * Check whether the @size in bytes is valid for an attribute of @type on the
- * ntfs volume @vol. This information is obtained from $AttrDef system file.
- *
- * Return 0 if valid, -ERANGE if not valid, or -ENOENT if the attribute is not
- * listed in $AttrDef.
- */
-int ntfs_attr_size_bounds_check(const ntfs_volume *vol, const ATTR_TYPE type,
- const s64 size)
-{
- ATTR_DEF *ad;
-
- BUG_ON(size < 0);
- /*
- * $ATTRIBUTE_LIST has a maximum size of 256kiB, but this is not
- * listed in $AttrDef.
- */
- if (unlikely(type == AT_ATTRIBUTE_LIST && size > 256 * 1024))
- return -ERANGE;
- /* Get the $AttrDef entry for the attribute @type. */
- ad = ntfs_attr_find_in_attrdef(vol, type);
- if (unlikely(!ad))
- return -ENOENT;
- /* Do the bounds check. */
- if (((sle64_to_cpu(ad->min_size) > 0) &&
- size < sle64_to_cpu(ad->min_size)) ||
- ((sle64_to_cpu(ad->max_size) > 0) && size >
- sle64_to_cpu(ad->max_size)))
- return -ERANGE;
- return 0;
-}
-
-/**
- * ntfs_attr_can_be_non_resident - check if an attribute can be non-resident
- * @vol: ntfs volume to which the attribute belongs
- * @type: attribute type which to check
- *
- * Check whether the attribute of @type on the ntfs volume @vol is allowed to
- * be non-resident. This information is obtained from $AttrDef system file.
- *
- * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, and
- * -ENOENT if the attribute is not listed in $AttrDef.
- */
-int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
-{
- ATTR_DEF *ad;
-
- /* Find the attribute definition record in $AttrDef. */
- ad = ntfs_attr_find_in_attrdef(vol, type);
- if (unlikely(!ad))
- return -ENOENT;
- /* Check the flags and return the result. */
- if (ad->flags & ATTR_DEF_RESIDENT)
- return -EPERM;
- return 0;
-}
-
-/**
- * ntfs_attr_can_be_resident - check if an attribute can be resident
- * @vol: ntfs volume to which the attribute belongs
- * @type: attribute type which to check
- *
- * Check whether the attribute of @type on the ntfs volume @vol is allowed to
- * be resident. This information is derived from our ntfs knowledge and may
- * not be completely accurate, especially when user defined attributes are
- * present. Basically we allow everything to be resident except for index
- * allocation and $EA attributes.
- *
- * Return 0 if the attribute is allowed to be non-resident and -EPERM if not.
- *
- * Warning: In the system file $MFT the attribute $Bitmap must be non-resident
- * otherwise windows will not boot (blue screen of death)! We cannot
- * check for this here as we do not know which inode's $Bitmap is
- * being asked about so the caller needs to special case this.
- */
-int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type)
-{
- if (type == AT_INDEX_ALLOCATION)
- return -EPERM;
- return 0;
-}
-
-/**
- * ntfs_attr_record_resize - resize an attribute record
- * @m: mft record containing attribute record
- * @a: attribute record to resize
- * @new_size: new size in bytes to which to resize the attribute record @a
- *
- * Resize the attribute record @a, i.e. the resident part of the attribute, in
- * the mft record @m to @new_size bytes.
- *
- * Return 0 on success and -errno on error. The following error codes are
- * defined:
- * -ENOSPC - Not enough space in the mft record @m to perform the resize.
- *
- * Note: On error, no modifications have been performed whatsoever.
- *
- * Warning: If you make a record smaller without having copied all the data you
- * are interested in the data may be overwritten.
- */
-int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
-{
- ntfs_debug("Entering for new_size %u.", new_size);
- /* Align to 8 bytes if it is not already done. */
- if (new_size & 7)
- new_size = (new_size + 7) & ~7;
- /* If the actual attribute length has changed, move things around. */
- if (new_size != le32_to_cpu(a->length)) {
- u32 new_muse = le32_to_cpu(m->bytes_in_use) -
- le32_to_cpu(a->length) + new_size;
- /* Not enough space in this mft record. */
- if (new_muse > le32_to_cpu(m->bytes_allocated))
- return -ENOSPC;
- /* Move attributes following @a to their new location. */
- memmove((u8*)a + new_size, (u8*)a + le32_to_cpu(a->length),
- le32_to_cpu(m->bytes_in_use) - ((u8*)a -
- (u8*)m) - le32_to_cpu(a->length));
- /* Adjust @m to reflect the change in used space. */
- m->bytes_in_use = cpu_to_le32(new_muse);
- /* Adjust @a to reflect the new size. */
- if (new_size >= offsetof(ATTR_REC, length) + sizeof(a->length))
- a->length = cpu_to_le32(new_size);
- }
- return 0;
-}
-
-/**
- * ntfs_resident_attr_value_resize - resize the value of a resident attribute
- * @m: mft record containing attribute record
- * @a: attribute record whose value to resize
- * @new_size: new size in bytes to which to resize the attribute value of @a
- *
- * Resize the value of the attribute @a in the mft record @m to @new_size bytes.
- * If the value is made bigger, the newly allocated space is cleared.
- *
- * Return 0 on success and -errno on error. The following error codes are
- * defined:
- * -ENOSPC - Not enough space in the mft record @m to perform the resize.
- *
- * Note: On error, no modifications have been performed whatsoever.
- *
- * Warning: If you make a record smaller without having copied all the data you
- * are interested in the data may be overwritten.
- */
-int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
- const u32 new_size)
-{
- u32 old_size;
-
- /* Resize the resident part of the attribute record. */
- if (ntfs_attr_record_resize(m, a,
- le16_to_cpu(a->data.resident.value_offset) + new_size))
- return -ENOSPC;
- /*
- * The resize succeeded! If we made the attribute value bigger, clear
- * the area between the old size and @new_size.
- */
- old_size = le32_to_cpu(a->data.resident.value_length);
- if (new_size > old_size)
- memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
- old_size, 0, new_size - old_size);
- /* Finally update the length of the attribute value. */
- a->data.resident.value_length = cpu_to_le32(new_size);
- return 0;
-}
-
-/**
- * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
- * @ni: ntfs inode describing the attribute to convert
- * @data_size: size of the resident data to copy to the non-resident attribute
- *
- * Convert the resident ntfs attribute described by the ntfs inode @ni to a
- * non-resident one.
- *
- * @data_size must be equal to the attribute value size. This is needed since
- * we need to know the size before we can map the mft record and our callers
- * always know it. The reason we cannot simply read the size from the vfs
- * inode i_size is that this is not necessarily uptodate. This happens when
- * ntfs_attr_make_non_resident() is called in the ->truncate call path(s).
- *
- * Return 0 on success and -errno on error. The following error return codes
- * are defined:
- * -EPERM - The attribute is not allowed to be non-resident.
- * -ENOMEM - Not enough memory.
- * -ENOSPC - Not enough disk space.
- * -EINVAL - Attribute not defined on the volume.
- * -EIO - I/o error or other error.
- * Note that -ENOSPC is also returned in the case that there is not enough
- * space in the mft record to do the conversion. This can happen when the mft
- * record is already very full. The caller is responsible for trying to make
- * space in the mft record and trying again. FIXME: Do we need a separate
- * error return code for this kind of -ENOSPC or is it always worth trying
- * again in case the attribute may then fit in a resident state so no need to
- * make it non-resident at all? Ho-hum... (AIA)
- *
- * NOTE to self: No changes in the attribute list are required to move from
- * a resident to a non-resident attribute.
- *
- * Locking: - The caller must hold i_mutex on the inode.
- */
-int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
-{
- s64 new_size;
- struct inode *vi = VFS_I(ni);
- ntfs_volume *vol = ni->vol;
- ntfs_inode *base_ni;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- ntfs_attr_search_ctx *ctx;
- struct page *page;
- runlist_element *rl;
- u8 *kaddr;
- unsigned long flags;
- int mp_size, mp_ofs, name_ofs, arec_size, err, err2;
- u32 attr_size;
- u8 old_res_attr_flags;
-
- /* Check that the attribute is allowed to be non-resident. */
- err = ntfs_attr_can_be_non_resident(vol, ni->type);
- if (unlikely(err)) {
- if (err == -EPERM)
- ntfs_debug("Attribute is not allowed to be "
- "non-resident.");
- else
- ntfs_debug("Attribute not defined on the NTFS "
- "volume!");
- return err;
- }
- /*
- * FIXME: Compressed and encrypted attributes are not supported when
- * writing and we should never have gotten here for them.
- */
- BUG_ON(NInoCompressed(ni));
- BUG_ON(NInoEncrypted(ni));
- /*
- * The size needs to be aligned to a cluster boundary for allocation
- * purposes.
- */
- new_size = (data_size + vol->cluster_size - 1) &
- ~(vol->cluster_size - 1);
- if (new_size > 0) {
- /*
- * Will need the page later and since the page lock nests
- * outside all ntfs locks, we need to get the page now.
- */
- page = find_or_create_page(vi->i_mapping, 0,
- mapping_gfp_mask(vi->i_mapping));
- if (unlikely(!page))
- return -ENOMEM;
- /* Start by allocating clusters to hold the attribute value. */
- rl = ntfs_cluster_alloc(vol, 0, new_size >>
- vol->cluster_size_bits, -1, DATA_ZONE, true);
- if (IS_ERR(rl)) {
- err = PTR_ERR(rl);
- ntfs_debug("Failed to allocate cluster%s, error code "
- "%i.", (new_size >>
- vol->cluster_size_bits) > 1 ? "s" : "",
- err);
- goto page_err_out;
- }
- } else {
- rl = NULL;
- page = NULL;
- }
- /* Determine the size of the mapping pairs array. */
- mp_size = ntfs_get_size_for_mapping_pairs(vol, rl, 0, -1);
- if (unlikely(mp_size < 0)) {
- err = mp_size;
- ntfs_debug("Failed to get size for mapping pairs array, error "
- "code %i.", err);
- goto rl_err_out;
- }
- down_write(&ni->runlist.lock);
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- ctx = NULL;
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- m = ctx->mrec;
- a = ctx->attr;
- BUG_ON(NInoNonResident(ni));
- BUG_ON(a->non_resident);
- /*
- * Calculate new offsets for the name and the mapping pairs array.
- */
- if (NInoSparse(ni) || NInoCompressed(ni))
- name_ofs = (offsetof(ATTR_REC,
- data.non_resident.compressed_size) +
- sizeof(a->data.non_resident.compressed_size) +
- 7) & ~7;
- else
- name_ofs = (offsetof(ATTR_REC,
- data.non_resident.compressed_size) + 7) & ~7;
- mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
- /*
- * Determine the size of the resident part of the now non-resident
- * attribute record.
- */
- arec_size = (mp_ofs + mp_size + 7) & ~7;
- /*
- * If the page is not uptodate bring it uptodate by copying from the
- * attribute value.
- */
- attr_size = le32_to_cpu(a->data.resident.value_length);
- BUG_ON(attr_size != data_size);
- if (page && !PageUptodate(page)) {
- kaddr = kmap_atomic(page);
- memcpy(kaddr, (u8*)a +
- le16_to_cpu(a->data.resident.value_offset),
- attr_size);
- memset(kaddr + attr_size, 0, PAGE_SIZE - attr_size);
- kunmap_atomic(kaddr);
- flush_dcache_page(page);
- SetPageUptodate(page);
- }
- /* Backup the attribute flag. */
- old_res_attr_flags = a->data.resident.flags;
- /* Resize the resident part of the attribute record. */
- err = ntfs_attr_record_resize(m, a, arec_size);
- if (unlikely(err))
- goto err_out;
- /*
- * Convert the resident part of the attribute record to describe a
- * non-resident attribute.
- */
- a->non_resident = 1;
- /* Move the attribute name if it exists and update the offset. */
- if (a->name_length)
- memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
- a->name_length * sizeof(ntfschar));
- a->name_offset = cpu_to_le16(name_ofs);
- /* Setup the fields specific to non-resident attributes. */
- a->data.non_resident.lowest_vcn = 0;
- a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >>
- vol->cluster_size_bits);
- a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs);
- memset(&a->data.non_resident.reserved, 0,
- sizeof(a->data.non_resident.reserved));
- a->data.non_resident.allocated_size = cpu_to_sle64(new_size);
- a->data.non_resident.data_size =
- a->data.non_resident.initialized_size =
- cpu_to_sle64(attr_size);
- if (NInoSparse(ni) || NInoCompressed(ni)) {
- a->data.non_resident.compression_unit = 0;
- if (NInoCompressed(ni) || vol->major_ver < 3)
- a->data.non_resident.compression_unit = 4;
- a->data.non_resident.compressed_size =
- a->data.non_resident.allocated_size;
- } else
- a->data.non_resident.compression_unit = 0;
- /* Generate the mapping pairs array into the attribute record. */
- err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
- arec_size - mp_ofs, rl, 0, -1, NULL);
- if (unlikely(err)) {
- ntfs_debug("Failed to build mapping pairs, error code %i.",
- err);
- goto undo_err_out;
- }
- /* Setup the in-memory attribute structure to be non-resident. */
- ni->runlist.rl = rl;
- write_lock_irqsave(&ni->size_lock, flags);
- ni->allocated_size = new_size;
- if (NInoSparse(ni) || NInoCompressed(ni)) {
- ni->itype.compressed.size = ni->allocated_size;
- if (a->data.non_resident.compression_unit) {
- ni->itype.compressed.block_size = 1U << (a->data.
- non_resident.compression_unit +
- vol->cluster_size_bits);
- ni->itype.compressed.block_size_bits =
- ffs(ni->itype.compressed.block_size) -
- 1;
- ni->itype.compressed.block_clusters = 1U <<
- a->data.non_resident.compression_unit;
- } else {
- ni->itype.compressed.block_size = 0;
- ni->itype.compressed.block_size_bits = 0;
- ni->itype.compressed.block_clusters = 0;
- }
- vi->i_blocks = ni->itype.compressed.size >> 9;
- } else
- vi->i_blocks = ni->allocated_size >> 9;
- write_unlock_irqrestore(&ni->size_lock, flags);
- /*
- * This needs to be last since the address space operations ->read_folio
- * and ->writepage can run concurrently with us as they are not
- * serialized on i_mutex. Note, we are not allowed to fail once we flip
- * this switch, which is another reason to do this last.
- */
- NInoSetNonResident(ni);
- /* Mark the mft record dirty, so it gets written back. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- up_write(&ni->runlist.lock);
- if (page) {
- set_page_dirty(page);
- unlock_page(page);
- put_page(page);
- }
- ntfs_debug("Done.");
- return 0;
-undo_err_out:
- /* Convert the attribute back into a resident attribute. */
- a->non_resident = 0;
- /* Move the attribute name if it exists and update the offset. */
- name_ofs = (offsetof(ATTR_RECORD, data.resident.reserved) +
- sizeof(a->data.resident.reserved) + 7) & ~7;
- if (a->name_length)
- memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
- a->name_length * sizeof(ntfschar));
- mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
- a->name_offset = cpu_to_le16(name_ofs);
- arec_size = (mp_ofs + attr_size + 7) & ~7;
- /* Resize the resident part of the attribute record. */
- err2 = ntfs_attr_record_resize(m, a, arec_size);
- if (unlikely(err2)) {
- /*
- * This cannot happen (well if memory corruption is at work it
- * could happen in theory), but deal with it as well as we can.
- * If the old size is too small, truncate the attribute,
- * otherwise simply give it a larger allocated size.
- * FIXME: Should check whether chkdsk complains when the
- * allocated size is much bigger than the resident value size.
- */
- arec_size = le32_to_cpu(a->length);
- if ((mp_ofs + attr_size) > arec_size) {
- err2 = attr_size;
- attr_size = arec_size - mp_ofs;
- ntfs_error(vol->sb, "Failed to undo partial resident "
- "to non-resident attribute "
- "conversion. Truncating inode 0x%lx, "
- "attribute type 0x%x from %i bytes to "
- "%i bytes to maintain metadata "
- "consistency. THIS MEANS YOU ARE "
- "LOSING %i BYTES DATA FROM THIS %s.",
- vi->i_ino,
- (unsigned)le32_to_cpu(ni->type),
- err2, attr_size, err2 - attr_size,
- ((ni->type == AT_DATA) &&
- !ni->name_len) ? "FILE": "ATTRIBUTE");
- write_lock_irqsave(&ni->size_lock, flags);
- ni->initialized_size = attr_size;
- i_size_write(vi, attr_size);
- write_unlock_irqrestore(&ni->size_lock, flags);
- }
- }
- /* Setup the fields specific to resident attributes. */
- a->data.resident.value_length = cpu_to_le32(attr_size);
- a->data.resident.value_offset = cpu_to_le16(mp_ofs);
- a->data.resident.flags = old_res_attr_flags;
- memset(&a->data.resident.reserved, 0,
- sizeof(a->data.resident.reserved));
- /* Copy the data from the page back to the attribute value. */
- if (page) {
- kaddr = kmap_atomic(page);
- memcpy((u8*)a + mp_ofs, kaddr, attr_size);
- kunmap_atomic(kaddr);
- }
- /* Setup the allocated size in the ntfs inode in case it changed. */
- write_lock_irqsave(&ni->size_lock, flags);
- ni->allocated_size = arec_size - mp_ofs;
- write_unlock_irqrestore(&ni->size_lock, flags);
- /* Mark the mft record dirty, so it gets written back. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
-err_out:
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
- ni->runlist.rl = NULL;
- up_write(&ni->runlist.lock);
-rl_err_out:
- if (rl) {
- if (ntfs_cluster_free_from_rl(vol, rl) < 0) {
- ntfs_error(vol->sb, "Failed to release allocated "
- "cluster(s) in error code path. Run "
- "chkdsk to recover the lost "
- "cluster(s).");
- NVolSetErrors(vol);
- }
- ntfs_free(rl);
-page_err_out:
- unlock_page(page);
- put_page(page);
- }
- if (err == -EINVAL)
- err = -EIO;
- return err;
-}
-
-/**
- * ntfs_attr_extend_allocation - extend the allocated space of an attribute
- * @ni: ntfs inode of the attribute whose allocation to extend
- * @new_alloc_size: new size in bytes to which to extend the allocation to
- * @new_data_size: new size in bytes to which to extend the data to
- * @data_start: beginning of region which is required to be non-sparse
- *
- * Extend the allocated space of an attribute described by the ntfs inode @ni
- * to @new_alloc_size bytes. If @data_start is -1, the whole extension may be
- * implemented as a hole in the file (as long as both the volume and the ntfs
- * inode @ni have sparse support enabled). If @data_start is >= 0, then the
- * region between the old allocated size and @data_start - 1 may be made sparse
- * but the regions between @data_start and @new_alloc_size must be backed by
- * actual clusters.
- *
- * If @new_data_size is -1, it is ignored. If it is >= 0, then the data size
- * of the attribute is extended to @new_data_size. Note that the i_size of the
- * vfs inode is not updated. Only the data size in the base attribute record
- * is updated. The caller has to update i_size separately if this is required.
- * WARNING: It is a BUG() for @new_data_size to be smaller than the old data
- * size as well as for @new_data_size to be greater than @new_alloc_size.
- *
- * For resident attributes this involves resizing the attribute record and if
- * necessary moving it and/or other attributes into extent mft records and/or
- * converting the attribute to a non-resident attribute which in turn involves
- * extending the allocation of a non-resident attribute as described below.
- *
- * For non-resident attributes this involves allocating clusters in the data
- * zone on the volume (except for regions that are being made sparse) and
- * extending the run list to describe the allocated clusters as well as
- * updating the mapping pairs array of the attribute. This in turn involves
- * resizing the attribute record and if necessary moving it and/or other
- * attributes into extent mft records and/or splitting the attribute record
- * into multiple extent attribute records.
- *
- * Also, the attribute list attribute is updated if present and in some of the
- * above cases (the ones where extent mft records/attributes come into play),
- * an attribute list attribute is created if not already present.
- *
- * Return the new allocated size on success and -errno on error. In the case
- * that an error is encountered but a partial extension at least up to
- * @data_start (if present) is possible, the allocation is partially extended
- * and this is returned. This means the caller must check the returned size to
- * determine if the extension was partial. If @data_start is -1 then partial
- * allocations are not performed.
- *
- * WARNING: Do not call ntfs_attr_extend_allocation() for $MFT/$DATA.
- *
- * Locking: This function takes the runlist lock of @ni for writing as well as
- * locking the mft record of the base ntfs inode. These locks are maintained
- * throughout execution of the function. These locks are required so that the
- * attribute can be resized safely and so that it can for example be converted
- * from resident to non-resident safely.
- *
- * TODO: At present attribute list attribute handling is not implemented.
- *
- * TODO: At present it is not safe to call this function for anything other
- * than the $DATA attribute(s) of an uncompressed and unencrypted file.
- */
-s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
- const s64 new_data_size, const s64 data_start)
-{
- VCN vcn;
- s64 ll, allocated_size, start = data_start;
- struct inode *vi = VFS_I(ni);
- ntfs_volume *vol = ni->vol;
- ntfs_inode *base_ni;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- ntfs_attr_search_ctx *ctx;
- runlist_element *rl, *rl2;
- unsigned long flags;
- int err, mp_size;
- u32 attr_len = 0; /* Silence stupid gcc warning. */
- bool mp_rebuilt;
-
-#ifdef DEBUG
- read_lock_irqsave(&ni->size_lock, flags);
- allocated_size = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
- "old_allocated_size 0x%llx, "
- "new_allocated_size 0x%llx, new_data_size 0x%llx, "
- "data_start 0x%llx.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type),
- (unsigned long long)allocated_size,
- (unsigned long long)new_alloc_size,
- (unsigned long long)new_data_size,
- (unsigned long long)start);
-#endif
-retry_extend:
- /*
- * For non-resident attributes, @start and @new_size need to be aligned
- * to cluster boundaries for allocation purposes.
- */
- if (NInoNonResident(ni)) {
- if (start > 0)
- start &= ~(s64)vol->cluster_size_mask;
- new_alloc_size = (new_alloc_size + vol->cluster_size - 1) &
- ~(s64)vol->cluster_size_mask;
- }
- BUG_ON(new_data_size >= 0 && new_data_size > new_alloc_size);
- /* Check if new size is allowed in $AttrDef. */
- err = ntfs_attr_size_bounds_check(vol, ni->type, new_alloc_size);
- if (unlikely(err)) {
- /* Only emit errors when the write will fail completely. */
- read_lock_irqsave(&ni->size_lock, flags);
- allocated_size = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (start < 0 || start >= allocated_size) {
- if (err == -ERANGE) {
- ntfs_error(vol->sb, "Cannot extend allocation "
- "of inode 0x%lx, attribute "
- "type 0x%x, because the new "
- "allocation would exceed the "
- "maximum allowed size for "
- "this attribute type.",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type));
- } else {
- ntfs_error(vol->sb, "Cannot extend allocation "
- "of inode 0x%lx, attribute "
- "type 0x%x, because this "
- "attribute type is not "
- "defined on the NTFS volume. "
- "Possible corruption! You "
- "should run chkdsk!",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type));
- }
- }
- /* Translate error code to be POSIX conformant for write(2). */
- if (err == -ERANGE)
- err = -EFBIG;
- else
- err = -EIO;
- return err;
- }
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- /*
- * We will be modifying both the runlist (if non-resident) and the mft
- * record so lock them both down.
- */
- down_write(&ni->runlist.lock);
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- ctx = NULL;
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- read_lock_irqsave(&ni->size_lock, flags);
- allocated_size = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- /*
- * If non-resident, seek to the last extent. If resident, there is
- * only one extent, so seek to that.
- */
- vcn = NInoNonResident(ni) ? allocated_size >> vol->cluster_size_bits :
- 0;
- /*
- * Abort if someone did the work whilst we waited for the locks. If we
- * just converted the attribute from resident to non-resident it is
- * likely that exactly this has happened already. We cannot quite
- * abort if we need to update the data size.
- */
- if (unlikely(new_alloc_size <= allocated_size)) {
- ntfs_debug("Allocated size already exceeds requested size.");
- new_alloc_size = allocated_size;
- if (new_data_size < 0)
- goto done;
- /*
- * We want the first attribute extent so that we can update the
- * data size.
- */
- vcn = 0;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, vcn, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- m = ctx->mrec;
- a = ctx->attr;
- /* Use goto to reduce indentation. */
- if (a->non_resident)
- goto do_non_resident_extend;
- BUG_ON(NInoNonResident(ni));
- /* The total length of the attribute value. */
- attr_len = le32_to_cpu(a->data.resident.value_length);
- /*
- * Extend the attribute record to be able to store the new attribute
- * size. ntfs_attr_record_resize() will not do anything if the size is
- * not changing.
- */
- if (new_alloc_size < vol->mft_record_size &&
- !ntfs_attr_record_resize(m, a,
- le16_to_cpu(a->data.resident.value_offset) +
- new_alloc_size)) {
- /* The resize succeeded! */
- write_lock_irqsave(&ni->size_lock, flags);
- ni->allocated_size = le32_to_cpu(a->length) -
- le16_to_cpu(a->data.resident.value_offset);
- write_unlock_irqrestore(&ni->size_lock, flags);
- if (new_data_size >= 0) {
- BUG_ON(new_data_size < attr_len);
- a->data.resident.value_length =
- cpu_to_le32((u32)new_data_size);
- }
- goto flush_done;
- }
- /*
- * We have to drop all the locks so we can call
- * ntfs_attr_make_non_resident(). This could be optimised by try-
- * locking the first page cache page and only if that fails dropping
- * the locks, locking the page, and redoing all the locking and
- * lookups. While this would be a huge optimisation, it is not worth
- * it as this is definitely a slow code path.
- */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- up_write(&ni->runlist.lock);
- /*
- * Not enough space in the mft record, try to make the attribute
- * non-resident and if successful restart the extension process.
- */
- err = ntfs_attr_make_non_resident(ni, attr_len);
- if (likely(!err))
- goto retry_extend;
- /*
- * Could not make non-resident. If this is due to this not being
- * permitted for this attribute type or there not being enough space,
- * try to make other attributes non-resident. Otherwise fail.
- */
- if (unlikely(err != -EPERM && err != -ENOSPC)) {
- /* Only emit errors when the write will fail completely. */
- read_lock_irqsave(&ni->size_lock, flags);
- allocated_size = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (start < 0 || start >= allocated_size)
- ntfs_error(vol->sb, "Cannot extend allocation of "
- "inode 0x%lx, attribute type 0x%x, "
- "because the conversion from resident "
- "to non-resident attribute failed "
- "with error code %i.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- if (err != -ENOMEM)
- err = -EIO;
- goto conv_err_out;
- }
- /* TODO: Not implemented from here, abort. */
- read_lock_irqsave(&ni->size_lock, flags);
- allocated_size = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (start < 0 || start >= allocated_size) {
- if (err == -ENOSPC)
- ntfs_error(vol->sb, "Not enough space in the mft "
- "record/on disk for the non-resident "
- "attribute value. This case is not "
- "implemented yet.");
- else /* if (err == -EPERM) */
- ntfs_error(vol->sb, "This attribute type may not be "
- "non-resident. This case is not "
- "implemented yet.");
- }
- err = -EOPNOTSUPP;
- goto conv_err_out;
-#if 0
- // TODO: Attempt to make other attributes non-resident.
- if (!err)
- goto do_resident_extend;
- /*
- * Both the attribute list attribute and the standard information
- * attribute must remain in the base inode. Thus, if this is one of
- * these attributes, we have to try to move other attributes out into
- * extent mft records instead.
- */
- if (ni->type == AT_ATTRIBUTE_LIST ||
- ni->type == AT_STANDARD_INFORMATION) {
- // TODO: Attempt to move other attributes into extent mft
- // records.
- err = -EOPNOTSUPP;
- if (!err)
- goto do_resident_extend;
- goto err_out;
- }
- // TODO: Attempt to move this attribute to an extent mft record, but
- // only if it is not already the only attribute in an mft record in
- // which case there would be nothing to gain.
- err = -EOPNOTSUPP;
- if (!err)
- goto do_resident_extend;
- /* There is nothing we can do to make enough space. )-: */
- goto err_out;
-#endif
-do_non_resident_extend:
- BUG_ON(!NInoNonResident(ni));
- if (new_alloc_size == allocated_size) {
- BUG_ON(vcn);
- goto alloc_done;
- }
- /*
- * If the data starts after the end of the old allocation, this is a
- * $DATA attribute and sparse attributes are enabled on the volume and
- * for this inode, then create a sparse region between the old
- * allocated size and the start of the data. Otherwise simply proceed
- * with filling the whole space between the old allocated size and the
- * new allocated size with clusters.
- */
- if ((start >= 0 && start <= allocated_size) || ni->type != AT_DATA ||
- !NVolSparseEnabled(vol) || NInoSparseDisabled(ni))
- goto skip_sparse;
- // TODO: This is not implemented yet. We just fill in with real
- // clusters for now...
- ntfs_debug("Inserting holes is not-implemented yet. Falling back to "
- "allocating real clusters instead.");
-skip_sparse:
- rl = ni->runlist.rl;
- if (likely(rl)) {
- /* Seek to the end of the runlist. */
- while (rl->length)
- rl++;
- }
- /* If this attribute extent is not mapped, map it now. */
- if (unlikely(!rl || rl->lcn == LCN_RL_NOT_MAPPED ||
- (rl->lcn == LCN_ENOENT && rl > ni->runlist.rl &&
- (rl-1)->lcn == LCN_RL_NOT_MAPPED))) {
- if (!rl && !allocated_size)
- goto first_alloc;
- rl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
- if (IS_ERR(rl)) {
- err = PTR_ERR(rl);
- if (start < 0 || start >= allocated_size)
- ntfs_error(vol->sb, "Cannot extend allocation "
- "of inode 0x%lx, attribute "
- "type 0x%x, because the "
- "mapping of a runlist "
- "fragment failed with error "
- "code %i.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type),
- err);
- if (err != -ENOMEM)
- err = -EIO;
- goto err_out;
- }
- ni->runlist.rl = rl;
- /* Seek to the end of the runlist. */
- while (rl->length)
- rl++;
- }
- /*
- * We now know the runlist of the last extent is mapped and @rl is at
- * the end of the runlist. We want to begin allocating clusters
- * starting at the last allocated cluster to reduce fragmentation. If
- * there are no valid LCNs in the attribute we let the cluster
- * allocator choose the starting cluster.
- */
- /* If the last LCN is a hole or simillar seek back to last real LCN. */
- while (rl->lcn < 0 && rl > ni->runlist.rl)
- rl--;
-first_alloc:
- // FIXME: Need to implement partial allocations so at least part of the
- // write can be performed when start >= 0. (Needed for POSIX write(2)
- // conformance.)
- rl2 = ntfs_cluster_alloc(vol, allocated_size >> vol->cluster_size_bits,
- (new_alloc_size - allocated_size) >>
- vol->cluster_size_bits, (rl && (rl->lcn >= 0)) ?
- rl->lcn + rl->length : -1, DATA_ZONE, true);
- if (IS_ERR(rl2)) {
- err = PTR_ERR(rl2);
- if (start < 0 || start >= allocated_size)
- ntfs_error(vol->sb, "Cannot extend allocation of "
- "inode 0x%lx, attribute type 0x%x, "
- "because the allocation of clusters "
- "failed with error code %i.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- if (err != -ENOMEM && err != -ENOSPC)
- err = -EIO;
- goto err_out;
- }
- rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
- if (IS_ERR(rl)) {
- err = PTR_ERR(rl);
- if (start < 0 || start >= allocated_size)
- ntfs_error(vol->sb, "Cannot extend allocation of "
- "inode 0x%lx, attribute type 0x%x, "
- "because the runlist merge failed "
- "with error code %i.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- if (err != -ENOMEM)
- err = -EIO;
- if (ntfs_cluster_free_from_rl(vol, rl2)) {
- ntfs_error(vol->sb, "Failed to release allocated "
- "cluster(s) in error code path. Run "
- "chkdsk to recover the lost "
- "cluster(s).");
- NVolSetErrors(vol);
- }
- ntfs_free(rl2);
- goto err_out;
- }
- ni->runlist.rl = rl;
- ntfs_debug("Allocated 0x%llx clusters.", (long long)(new_alloc_size -
- allocated_size) >> vol->cluster_size_bits);
- /* Find the runlist element with which the attribute extent starts. */
- ll = sle64_to_cpu(a->data.non_resident.lowest_vcn);
- rl2 = ntfs_rl_find_vcn_nolock(rl, ll);
- BUG_ON(!rl2);
- BUG_ON(!rl2->length);
- BUG_ON(rl2->lcn < LCN_HOLE);
- mp_rebuilt = false;
- /* Get the size for the new mapping pairs array for this extent. */
- mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
- if (unlikely(mp_size <= 0)) {
- err = mp_size;
- if (start < 0 || start >= allocated_size)
- ntfs_error(vol->sb, "Cannot extend allocation of "
- "inode 0x%lx, attribute type 0x%x, "
- "because determining the size for the "
- "mapping pairs failed with error code "
- "%i.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- err = -EIO;
- goto undo_alloc;
- }
- /* Extend the attribute record to fit the bigger mapping pairs array. */
- attr_len = le32_to_cpu(a->length);
- err = ntfs_attr_record_resize(m, a, mp_size +
- le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
- if (unlikely(err)) {
- BUG_ON(err != -ENOSPC);
- // TODO: Deal with this by moving this extent to a new mft
- // record or by starting a new extent in a new mft record,
- // possibly by extending this extent partially and filling it
- // and creating a new extent for the remainder, or by making
- // other attributes non-resident and/or by moving other
- // attributes out of this mft record.
- if (start < 0 || start >= allocated_size)
- ntfs_error(vol->sb, "Not enough space in the mft "
- "record for the extended attribute "
- "record. This case is not "
- "implemented yet.");
- err = -EOPNOTSUPP;
- goto undo_alloc;
- }
- mp_rebuilt = true;
- /* Generate the mapping pairs array directly into the attr record. */
- err = ntfs_mapping_pairs_build(vol, (u8*)a +
- le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
- mp_size, rl2, ll, -1, NULL);
- if (unlikely(err)) {
- if (start < 0 || start >= allocated_size)
- ntfs_error(vol->sb, "Cannot extend allocation of "
- "inode 0x%lx, attribute type 0x%x, "
- "because building the mapping pairs "
- "failed with error code %i.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- err = -EIO;
- goto undo_alloc;
- }
- /* Update the highest_vcn. */
- a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
- vol->cluster_size_bits) - 1);
- /*
- * We now have extended the allocated size of the attribute. Reflect
- * this in the ntfs_inode structure and the attribute record.
- */
- if (a->data.non_resident.lowest_vcn) {
- /*
- * We are not in the first attribute extent, switch to it, but
- * first ensure the changes will make it to disk later.
- */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_reinit_search_ctx(ctx);
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err))
- goto restore_undo_alloc;
- /* @m is not used any more so no need to set it. */
- a = ctx->attr;
- }
- write_lock_irqsave(&ni->size_lock, flags);
- ni->allocated_size = new_alloc_size;
- a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
- /*
- * FIXME: This would fail if @ni is a directory, $MFT, or an index,
- * since those can have sparse/compressed set. For example can be
- * set compressed even though it is not compressed itself and in that
- * case the bit means that files are to be created compressed in the
- * directory... At present this is ok as this code is only called for
- * regular files, and only for their $DATA attribute(s).
- * FIXME: The calculation is wrong if we created a hole above. For now
- * it does not matter as we never create holes.
- */
- if (NInoSparse(ni) || NInoCompressed(ni)) {
- ni->itype.compressed.size += new_alloc_size - allocated_size;
- a->data.non_resident.compressed_size =
- cpu_to_sle64(ni->itype.compressed.size);
- vi->i_blocks = ni->itype.compressed.size >> 9;
- } else
- vi->i_blocks = new_alloc_size >> 9;
- write_unlock_irqrestore(&ni->size_lock, flags);
-alloc_done:
- if (new_data_size >= 0) {
- BUG_ON(new_data_size <
- sle64_to_cpu(a->data.non_resident.data_size));
- a->data.non_resident.data_size = cpu_to_sle64(new_data_size);
- }
-flush_done:
- /* Ensure the changes make it to disk. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
-done:
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- up_write(&ni->runlist.lock);
- ntfs_debug("Done, new_allocated_size 0x%llx.",
- (unsigned long long)new_alloc_size);
- return new_alloc_size;
-restore_undo_alloc:
- if (start < 0 || start >= allocated_size)
- ntfs_error(vol->sb, "Cannot complete extension of allocation "
- "of inode 0x%lx, attribute type 0x%x, because "
- "lookup of first attribute extent failed with "
- "error code %i.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- if (err == -ENOENT)
- err = -EIO;
- ntfs_attr_reinit_search_ctx(ctx);
- if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
- allocated_size >> vol->cluster_size_bits, NULL, 0,
- ctx)) {
- ntfs_error(vol->sb, "Failed to find last attribute extent of "
- "attribute in error code path. Run chkdsk to "
- "recover.");
- write_lock_irqsave(&ni->size_lock, flags);
- ni->allocated_size = new_alloc_size;
- /*
- * FIXME: This would fail if @ni is a directory... See above.
- * FIXME: The calculation is wrong if we created a hole above.
- * For now it does not matter as we never create holes.
- */
- if (NInoSparse(ni) || NInoCompressed(ni)) {
- ni->itype.compressed.size += new_alloc_size -
- allocated_size;
- vi->i_blocks = ni->itype.compressed.size >> 9;
- } else
- vi->i_blocks = new_alloc_size >> 9;
- write_unlock_irqrestore(&ni->size_lock, flags);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- up_write(&ni->runlist.lock);
- /*
- * The only thing that is now wrong is the allocated size of the
- * base attribute extent which chkdsk should be able to fix.
- */
- NVolSetErrors(vol);
- return err;
- }
- ctx->attr->data.non_resident.highest_vcn = cpu_to_sle64(
- (allocated_size >> vol->cluster_size_bits) - 1);
-undo_alloc:
- ll = allocated_size >> vol->cluster_size_bits;
- if (ntfs_cluster_free(ni, ll, -1, ctx) < 0) {
- ntfs_error(vol->sb, "Failed to release allocated cluster(s) "
- "in error code path. Run chkdsk to recover "
- "the lost cluster(s).");
- NVolSetErrors(vol);
- }
- m = ctx->mrec;
- a = ctx->attr;
- /*
- * If the runlist truncation fails and/or the search context is no
- * longer valid, we cannot resize the attribute record or build the
- * mapping pairs array thus we mark the inode bad so that no access to
- * the freed clusters can happen.
- */
- if (ntfs_rl_truncate_nolock(vol, &ni->runlist, ll) || IS_ERR(m)) {
- ntfs_error(vol->sb, "Failed to %s in error code path. Run "
- "chkdsk to recover.", IS_ERR(m) ?
- "restore attribute search context" :
- "truncate attribute runlist");
- NVolSetErrors(vol);
- } else if (mp_rebuilt) {
- if (ntfs_attr_record_resize(m, a, attr_len)) {
- ntfs_error(vol->sb, "Failed to restore attribute "
- "record in error code path. Run "
- "chkdsk to recover.");
- NVolSetErrors(vol);
- } else /* if (success) */ {
- if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
- a->data.non_resident.
- mapping_pairs_offset), attr_len -
- le16_to_cpu(a->data.non_resident.
- mapping_pairs_offset), rl2, ll, -1,
- NULL)) {
- ntfs_error(vol->sb, "Failed to restore "
- "mapping pairs array in error "
- "code path. Run chkdsk to "
- "recover.");
- NVolSetErrors(vol);
- }
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- }
- }
-err_out:
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
- up_write(&ni->runlist.lock);
-conv_err_out:
- ntfs_debug("Failed. Returning error code %i.", err);
- return err;
-}
-
-/**
- * ntfs_attr_set - fill (a part of) an attribute with a byte
- * @ni: ntfs inode describing the attribute to fill
- * @ofs: offset inside the attribute at which to start to fill
- * @cnt: number of bytes to fill
- * @val: the unsigned 8-bit value with which to fill the attribute
- *
- * Fill @cnt bytes of the attribute described by the ntfs inode @ni starting at
- * byte offset @ofs inside the attribute with the constant byte @val.
- *
- * This function is effectively like memset() applied to an ntfs attribute.
- * Note this function actually only operates on the page cache pages belonging
- * to the ntfs attribute and it marks them dirty after doing the memset().
- * Thus it relies on the vm dirty page write code paths to cause the modified
- * pages to be written to the mft record/disk.
- *
- * Return 0 on success and -errno on error. An error code of -ESPIPE means
- * that @ofs + @cnt were outside the end of the attribute and no write was
- * performed.
- */
-int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
-{
- ntfs_volume *vol = ni->vol;
- struct address_space *mapping;
- struct page *page;
- u8 *kaddr;
- pgoff_t idx, end;
- unsigned start_ofs, end_ofs, size;
-
- ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%hx.",
- (long long)ofs, (long long)cnt, val);
- BUG_ON(ofs < 0);
- BUG_ON(cnt < 0);
- if (!cnt)
- goto done;
- /*
- * FIXME: Compressed and encrypted attributes are not supported when
- * writing and we should never have gotten here for them.
- */
- BUG_ON(NInoCompressed(ni));
- BUG_ON(NInoEncrypted(ni));
- mapping = VFS_I(ni)->i_mapping;
- /* Work out the starting index and page offset. */
- idx = ofs >> PAGE_SHIFT;
- start_ofs = ofs & ~PAGE_MASK;
- /* Work out the ending index and page offset. */
- end = ofs + cnt;
- end_ofs = end & ~PAGE_MASK;
- /* If the end is outside the inode size return -ESPIPE. */
- if (unlikely(end > i_size_read(VFS_I(ni)))) {
- ntfs_error(vol->sb, "Request exceeds end of attribute.");
- return -ESPIPE;
- }
- end >>= PAGE_SHIFT;
- /* If there is a first partial page, need to do it the slow way. */
- if (start_ofs) {
- page = read_mapping_page(mapping, idx, NULL);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Failed to read first partial "
- "page (error, index 0x%lx).", idx);
- return PTR_ERR(page);
- }
- /*
- * If the last page is the same as the first page, need to
- * limit the write to the end offset.
- */
- size = PAGE_SIZE;
- if (idx == end)
- size = end_ofs;
- kaddr = kmap_atomic(page);
- memset(kaddr + start_ofs, val, size - start_ofs);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
- set_page_dirty(page);
- put_page(page);
- balance_dirty_pages_ratelimited(mapping);
- cond_resched();
- if (idx == end)
- goto done;
- idx++;
- }
- /* Do the whole pages the fast way. */
- for (; idx < end; idx++) {
- /* Find or create the current page. (The page is locked.) */
- page = grab_cache_page(mapping, idx);
- if (unlikely(!page)) {
- ntfs_error(vol->sb, "Insufficient memory to grab "
- "page (index 0x%lx).", idx);
- return -ENOMEM;
- }
- kaddr = kmap_atomic(page);
- memset(kaddr, val, PAGE_SIZE);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
- /*
- * If the page has buffers, mark them uptodate since buffer
- * state and not page state is definitive in 2.6 kernels.
- */
- if (page_has_buffers(page)) {
- struct buffer_head *bh, *head;
-
- bh = head = page_buffers(page);
- do {
- set_buffer_uptodate(bh);
- } while ((bh = bh->b_this_page) != head);
- }
- /* Now that buffers are uptodate, set the page uptodate, too. */
- SetPageUptodate(page);
- /*
- * Set the page and all its buffers dirty and mark the inode
- * dirty, too. The VM will write the page later on.
- */
- set_page_dirty(page);
- /* Finally unlock and release the page. */
- unlock_page(page);
- put_page(page);
- balance_dirty_pages_ratelimited(mapping);
- cond_resched();
- }
- /* If there is a last partial page, need to do it the slow way. */
- if (end_ofs) {
- page = read_mapping_page(mapping, idx, NULL);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Failed to read last partial page "
- "(error, index 0x%lx).", idx);
- return PTR_ERR(page);
- }
- kaddr = kmap_atomic(page);
- memset(kaddr, val, end_ofs);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
- set_page_dirty(page);
- put_page(page);
- balance_dirty_pages_ratelimited(mapping);
- cond_resched();
- }
-done:
- ntfs_debug("Done.");
- return 0;
-}
-
-#endif /* NTFS_RW */
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
deleted file mode 100644
index fe0890d3d072..000000000000
--- a/fs/ntfs/attrib.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * attrib.h - Defines for attribute handling in NTFS Linux kernel driver.
- * Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2005 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
- */
-
-#ifndef _LINUX_NTFS_ATTRIB_H
-#define _LINUX_NTFS_ATTRIB_H
-
-#include "endian.h"
-#include "types.h"
-#include "layout.h"
-#include "inode.h"
-#include "runlist.h"
-#include "volume.h"
-
-/**
- * ntfs_attr_search_ctx - used in attribute search functions
- * @mrec: buffer containing mft record to search
- * @attr: attribute record in @mrec where to begin/continue search
- * @is_first: if true ntfs_attr_lookup() begins search with @attr, else after
- *
- * Structure must be initialized to zero before the first call to one of the
- * attribute search functions. Initialize @mrec to point to the mft record to
- * search, and @attr to point to the first attribute within @mrec (not necessary
- * if calling the _first() functions), and set @is_first to 'true' (not necessary
- * if calling the _first() functions).
- *
- * If @is_first is 'true', the search begins with @attr. If @is_first is 'false',
- * the search begins after @attr. This is so that, after the first call to one
- * of the search attribute functions, we can call the function again, without
- * any modification of the search context, to automagically get the next
- * matching attribute.
- */
-typedef struct {
- MFT_RECORD *mrec;
- ATTR_RECORD *attr;
- bool is_first;
- ntfs_inode *ntfs_ino;
- ATTR_LIST_ENTRY *al_entry;
- ntfs_inode *base_ntfs_ino;
- MFT_RECORD *base_mrec;
- ATTR_RECORD *base_attr;
-} ntfs_attr_search_ctx;
-
-extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn,
- ntfs_attr_search_ctx *ctx);
-extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
-
-extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
- const bool write_locked);
-
-extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni,
- const VCN vcn, ntfs_attr_search_ctx *ctx);
-
-int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
- const u32 name_len, const IGNORE_CASE_BOOL ic,
- const VCN lowest_vcn, const u8 *val, const u32 val_len,
- ntfs_attr_search_ctx *ctx);
-
-extern int load_attribute_list(ntfs_volume *vol, runlist *rl, u8 *al_start,
- const s64 size, const s64 initialized_size);
-
-static inline s64 ntfs_attr_size(const ATTR_RECORD *a)
-{
- if (!a->non_resident)
- return (s64)le32_to_cpu(a->data.resident.value_length);
- return sle64_to_cpu(a->data.non_resident.data_size);
-}
-
-extern void ntfs_attr_reinit_search_ctx(ntfs_attr_search_ctx *ctx);
-extern ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni,
- MFT_RECORD *mrec);
-extern void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx);
-
-#ifdef NTFS_RW
-
-extern int ntfs_attr_size_bounds_check(const ntfs_volume *vol,
- const ATTR_TYPE type, const s64 size);
-extern int ntfs_attr_can_be_non_resident(const ntfs_volume *vol,
- const ATTR_TYPE type);
-extern int ntfs_attr_can_be_resident(const ntfs_volume *vol,
- const ATTR_TYPE type);
-
-extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
-extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
- const u32 new_size);
-
-extern int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size);
-
-extern s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
- const s64 new_data_size, const s64 data_start);
-
-extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt,
- const u8 val);
-
-#endif /* NTFS_RW */
-
-#endif /* _LINUX_NTFS_ATTRIB_H */
diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c
deleted file mode 100644
index 0675b2400873..000000000000
--- a/fs/ntfs/bitmap.c
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * bitmap.c - NTFS kernel bitmap handling. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2004-2005 Anton Altaparmakov
- */
-
-#ifdef NTFS_RW
-
-#include <linux/pagemap.h>
-
-#include "bitmap.h"
-#include "debug.h"
-#include "aops.h"
-#include "ntfs.h"
-
-/**
- * __ntfs_bitmap_set_bits_in_run - set a run of bits in a bitmap to a value
- * @vi: vfs inode describing the bitmap
- * @start_bit: first bit to set
- * @count: number of bits to set
- * @value: value to set the bits to (i.e. 0 or 1)
- * @is_rollback: if 'true' this is a rollback operation
- *
- * Set @count bits starting at bit @start_bit in the bitmap described by the
- * vfs inode @vi to @value, where @value is either 0 or 1.
- *
- * @is_rollback should always be 'false', it is for internal use to rollback
- * errors. You probably want to use ntfs_bitmap_set_bits_in_run() instead.
- *
- * Return 0 on success and -errno on error.
- */
-int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
- const s64 count, const u8 value, const bool is_rollback)
-{
- s64 cnt = count;
- pgoff_t index, end_index;
- struct address_space *mapping;
- struct page *page;
- u8 *kaddr;
- int pos, len;
- u8 bit;
-
- BUG_ON(!vi);
- ntfs_debug("Entering for i_ino 0x%lx, start_bit 0x%llx, count 0x%llx, "
- "value %u.%s", vi->i_ino, (unsigned long long)start_bit,
- (unsigned long long)cnt, (unsigned int)value,
- is_rollback ? " (rollback)" : "");
- BUG_ON(start_bit < 0);
- BUG_ON(cnt < 0);
- BUG_ON(value > 1);
- /*
- * Calculate the indices for the pages containing the first and last
- * bits, i.e. @start_bit and @start_bit + @cnt - 1, respectively.
- */
- index = start_bit >> (3 + PAGE_SHIFT);
- end_index = (start_bit + cnt - 1) >> (3 + PAGE_SHIFT);
-
- /* Get the page containing the first bit (@start_bit). */
- mapping = vi->i_mapping;
- page = ntfs_map_page(mapping, index);
- if (IS_ERR(page)) {
- if (!is_rollback)
- ntfs_error(vi->i_sb, "Failed to map first page (error "
- "%li), aborting.", PTR_ERR(page));
- return PTR_ERR(page);
- }
- kaddr = page_address(page);
-
- /* Set @pos to the position of the byte containing @start_bit. */
- pos = (start_bit >> 3) & ~PAGE_MASK;
-
- /* Calculate the position of @start_bit in the first byte. */
- bit = start_bit & 7;
-
- /* If the first byte is partial, modify the appropriate bits in it. */
- if (bit) {
- u8 *byte = kaddr + pos;
- while ((bit & 7) && cnt) {
- cnt--;
- if (value)
- *byte |= 1 << bit++;
- else
- *byte &= ~(1 << bit++);
- }
- /* If we are done, unmap the page and return success. */
- if (!cnt)
- goto done;
-
- /* Update @pos to the new position. */
- pos++;
- }
- /*
- * Depending on @value, modify all remaining whole bytes in the page up
- * to @cnt.
- */
- len = min_t(s64, cnt >> 3, PAGE_SIZE - pos);
- memset(kaddr + pos, value ? 0xff : 0, len);
- cnt -= len << 3;
-
- /* Update @len to point to the first not-done byte in the page. */
- if (cnt < 8)
- len += pos;
-
- /* If we are not in the last page, deal with all subsequent pages. */
- while (index < end_index) {
- BUG_ON(cnt <= 0);
-
- /* Update @index and get the next page. */
- flush_dcache_page(page);
- set_page_dirty(page);
- ntfs_unmap_page(page);
- page = ntfs_map_page(mapping, ++index);
- if (IS_ERR(page))
- goto rollback;
- kaddr = page_address(page);
- /*
- * Depending on @value, modify all remaining whole bytes in the
- * page up to @cnt.
- */
- len = min_t(s64, cnt >> 3, PAGE_SIZE);
- memset(kaddr, value ? 0xff : 0, len);
- cnt -= len << 3;
- }
- /*
- * The currently mapped page is the last one. If the last byte is
- * partial, modify the appropriate bits in it. Note, @len is the
- * position of the last byte inside the page.
- */
- if (cnt) {
- u8 *byte;
-
- BUG_ON(cnt > 7);
-
- bit = cnt;
- byte = kaddr + len;
- while (bit--) {
- if (value)
- *byte |= 1 << bit;
- else
- *byte &= ~(1 << bit);
- }
- }
-done:
- /* We are done. Unmap the page and return success. */
- flush_dcache_page(page);
- set_page_dirty(page);
- ntfs_unmap_page(page);
- ntfs_debug("Done.");
- return 0;
-rollback:
- /*
- * Current state:
- * - no pages are mapped
- * - @count - @cnt is the number of bits that have been modified
- */
- if (is_rollback)
- return PTR_ERR(page);
- if (count != cnt)
- pos = __ntfs_bitmap_set_bits_in_run(vi, start_bit, count - cnt,
- value ? 0 : 1, true);
- else
- pos = 0;
- if (!pos) {
- /* Rollback was successful. */
- ntfs_error(vi->i_sb, "Failed to map subsequent page (error "
- "%li), aborting.", PTR_ERR(page));
- } else {
- /* Rollback failed. */
- ntfs_error(vi->i_sb, "Failed to map subsequent page (error "
- "%li) and rollback failed (error %i). "
- "Aborting and leaving inconsistent metadata. "
- "Unmount and run chkdsk.", PTR_ERR(page), pos);
- NVolSetErrors(NTFS_SB(vi->i_sb));
- }
- return PTR_ERR(page);
-}
-
-#endif /* NTFS_RW */
diff --git a/fs/ntfs/bitmap.h b/fs/ntfs/bitmap.h
deleted file mode 100644
index 9dd2224ca9c4..000000000000
--- a/fs/ntfs/bitmap.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * bitmap.h - Defines for NTFS kernel bitmap handling. Part of the Linux-NTFS
- * project.
- *
- * Copyright (c) 2004 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_BITMAP_H
-#define _LINUX_NTFS_BITMAP_H
-
-#ifdef NTFS_RW
-
-#include <linux/fs.h>
-
-#include "types.h"
-
-extern int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
- const s64 count, const u8 value, const bool is_rollback);
-
-/**
- * ntfs_bitmap_set_bits_in_run - set a run of bits in a bitmap to a value
- * @vi: vfs inode describing the bitmap
- * @start_bit: first bit to set
- * @count: number of bits to set
- * @value: value to set the bits to (i.e. 0 or 1)
- *
- * Set @count bits starting at bit @start_bit in the bitmap described by the
- * vfs inode @vi to @value, where @value is either 0 or 1.
- *
- * Return 0 on success and -errno on error.
- */
-static inline int ntfs_bitmap_set_bits_in_run(struct inode *vi,
- const s64 start_bit, const s64 count, const u8 value)
-{
- return __ntfs_bitmap_set_bits_in_run(vi, start_bit, count, value,
- false);
-}
-
-/**
- * ntfs_bitmap_set_run - set a run of bits in a bitmap
- * @vi: vfs inode describing the bitmap
- * @start_bit: first bit to set
- * @count: number of bits to set
- *
- * Set @count bits starting at bit @start_bit in the bitmap described by the
- * vfs inode @vi.
- *
- * Return 0 on success and -errno on error.
- */
-static inline int ntfs_bitmap_set_run(struct inode *vi, const s64 start_bit,
- const s64 count)
-{
- return ntfs_bitmap_set_bits_in_run(vi, start_bit, count, 1);
-}
-
-/**
- * ntfs_bitmap_clear_run - clear a run of bits in a bitmap
- * @vi: vfs inode describing the bitmap
- * @start_bit: first bit to clear
- * @count: number of bits to clear
- *
- * Clear @count bits starting at bit @start_bit in the bitmap described by the
- * vfs inode @vi.
- *
- * Return 0 on success and -errno on error.
- */
-static inline int ntfs_bitmap_clear_run(struct inode *vi, const s64 start_bit,
- const s64 count)
-{
- return ntfs_bitmap_set_bits_in_run(vi, start_bit, count, 0);
-}
-
-/**
- * ntfs_bitmap_set_bit - set a bit in a bitmap
- * @vi: vfs inode describing the bitmap
- * @bit: bit to set
- *
- * Set bit @bit in the bitmap described by the vfs inode @vi.
- *
- * Return 0 on success and -errno on error.
- */
-static inline int ntfs_bitmap_set_bit(struct inode *vi, const s64 bit)
-{
- return ntfs_bitmap_set_run(vi, bit, 1);
-}
-
-/**
- * ntfs_bitmap_clear_bit - clear a bit in a bitmap
- * @vi: vfs inode describing the bitmap
- * @bit: bit to clear
- *
- * Clear bit @bit in the bitmap described by the vfs inode @vi.
- *
- * Return 0 on success and -errno on error.
- */
-static inline int ntfs_bitmap_clear_bit(struct inode *vi, const s64 bit)
-{
- return ntfs_bitmap_clear_run(vi, bit, 1);
-}
-
-#endif /* NTFS_RW */
-
-#endif /* defined _LINUX_NTFS_BITMAP_H */
diff --git a/fs/ntfs/collate.c b/fs/ntfs/collate.c
deleted file mode 100644
index 3ab6ec96abfe..000000000000
--- a/fs/ntfs/collate.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * collate.c - NTFS kernel collation handling. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2004 Anton Altaparmakov
- */
-
-#include "collate.h"
-#include "debug.h"
-#include "ntfs.h"
-
-static int ntfs_collate_binary(ntfs_volume *vol,
- const void *data1, const int data1_len,
- const void *data2, const int data2_len)
-{
- int rc;
-
- ntfs_debug("Entering.");
- rc = memcmp(data1, data2, min(data1_len, data2_len));
- if (!rc && (data1_len != data2_len)) {
- if (data1_len < data2_len)
- rc = -1;
- else
- rc = 1;
- }
- ntfs_debug("Done, returning %i", rc);
- return rc;
-}
-
-static int ntfs_collate_ntofs_ulong(ntfs_volume *vol,
- const void *data1, const int data1_len,
- const void *data2, const int data2_len)
-{
- int rc;
- u32 d1, d2;
-
- ntfs_debug("Entering.");
- // FIXME: We don't really want to bug here.
- BUG_ON(data1_len != data2_len);
- BUG_ON(data1_len != 4);
- d1 = le32_to_cpup(data1);
- d2 = le32_to_cpup(data2);
- if (d1 < d2)
- rc = -1;
- else {
- if (d1 == d2)
- rc = 0;
- else
- rc = 1;
- }
- ntfs_debug("Done, returning %i", rc);
- return rc;
-}
-
-typedef int (*ntfs_collate_func_t)(ntfs_volume *, const void *, const int,
- const void *, const int);
-
-static ntfs_collate_func_t ntfs_do_collate0x0[3] = {
- ntfs_collate_binary,
- NULL/*ntfs_collate_file_name*/,
- NULL/*ntfs_collate_unicode_string*/,
-};
-
-static ntfs_collate_func_t ntfs_do_collate0x1[4] = {
- ntfs_collate_ntofs_ulong,
- NULL/*ntfs_collate_ntofs_sid*/,
- NULL/*ntfs_collate_ntofs_security_hash*/,
- NULL/*ntfs_collate_ntofs_ulongs*/,
-};
-
-/**
- * ntfs_collate - collate two data items using a specified collation rule
- * @vol: ntfs volume to which the data items belong
- * @cr: collation rule to use when comparing the items
- * @data1: first data item to collate
- * @data1_len: length in bytes of @data1
- * @data2: second data item to collate
- * @data2_len: length in bytes of @data2
- *
- * Collate the two data items @data1 and @data2 using the collation rule @cr
- * and return -1, 0, ir 1 if @data1 is found, respectively, to collate before,
- * to match, or to collate after @data2.
- *
- * For speed we use the collation rule @cr as an index into two tables of
- * function pointers to call the appropriate collation function.
- */
-int ntfs_collate(ntfs_volume *vol, COLLATION_RULE cr,
- const void *data1, const int data1_len,
- const void *data2, const int data2_len) {
- int i;
-
- ntfs_debug("Entering.");
- /*
- * FIXME: At the moment we only support COLLATION_BINARY and
- * COLLATION_NTOFS_ULONG, so we BUG() for everything else for now.
- */
- BUG_ON(cr != COLLATION_BINARY && cr != COLLATION_NTOFS_ULONG);
- i = le32_to_cpu(cr);
- BUG_ON(i < 0);
- if (i <= 0x02)
- return ntfs_do_collate0x0[i](vol, data1, data1_len,
- data2, data2_len);
- BUG_ON(i < 0x10);
- i -= 0x10;
- if (likely(i <= 3))
- return ntfs_do_collate0x1[i](vol, data1, data1_len,
- data2, data2_len);
- BUG();
- return 0;
-}
diff --git a/fs/ntfs/collate.h b/fs/ntfs/collate.h
deleted file mode 100644
index f2255619b4f4..000000000000
--- a/fs/ntfs/collate.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * collate.h - Defines for NTFS kernel collation handling. Part of the
- * Linux-NTFS project.
- *
- * Copyright (c) 2004 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_COLLATE_H
-#define _LINUX_NTFS_COLLATE_H
-
-#include "types.h"
-#include "volume.h"
-
-static inline bool ntfs_is_collation_rule_supported(COLLATION_RULE cr) {
- int i;
-
- /*
- * FIXME: At the moment we only support COLLATION_BINARY and
- * COLLATION_NTOFS_ULONG, so we return false for everything else for
- * now.
- */
- if (unlikely(cr != COLLATION_BINARY && cr != COLLATION_NTOFS_ULONG))
- return false;
- i = le32_to_cpu(cr);
- if (likely(((i >= 0) && (i <= 0x02)) ||
- ((i >= 0x10) && (i <= 0x13))))
- return true;
- return false;
-}
-
-extern int ntfs_collate(ntfs_volume *vol, COLLATION_RULE cr,
- const void *data1, const int data1_len,
- const void *data2, const int data2_len);
-
-#endif /* _LINUX_NTFS_COLLATE_H */
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
deleted file mode 100644
index 761aaa0195d6..000000000000
--- a/fs/ntfs/compress.c
+++ /dev/null
@@ -1,950 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * compress.c - NTFS kernel compressed attributes handling.
- * Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2004 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
- */
-
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/blkdev.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include "attrib.h"
-#include "inode.h"
-#include "debug.h"
-#include "ntfs.h"
-
-/**
- * ntfs_compression_constants - enum of constants used in the compression code
- */
-typedef enum {
- /* Token types and access mask. */
- NTFS_SYMBOL_TOKEN = 0,
- NTFS_PHRASE_TOKEN = 1,
- NTFS_TOKEN_MASK = 1,
-
- /* Compression sub-block constants. */
- NTFS_SB_SIZE_MASK = 0x0fff,
- NTFS_SB_SIZE = 0x1000,
- NTFS_SB_IS_COMPRESSED = 0x8000,
-
- /*
- * The maximum compression block size is by definition 16 * the cluster
- * size, with the maximum supported cluster size being 4kiB. Thus the
- * maximum compression buffer size is 64kiB, so we use this when
- * initializing the compression buffer.
- */
- NTFS_MAX_CB_SIZE = 64 * 1024,
-} ntfs_compression_constants;
-
-/*
- * ntfs_compression_buffer - one buffer for the decompression engine
- */
-static u8 *ntfs_compression_buffer;
-
-/*
- * ntfs_cb_lock - spinlock which protects ntfs_compression_buffer
- */
-static DEFINE_SPINLOCK(ntfs_cb_lock);
-
-/**
- * allocate_compression_buffers - allocate the decompression buffers
- *
- * Caller has to hold the ntfs_lock mutex.
- *
- * Return 0 on success or -ENOMEM if the allocations failed.
- */
-int allocate_compression_buffers(void)
-{
- BUG_ON(ntfs_compression_buffer);
-
- ntfs_compression_buffer = vmalloc(NTFS_MAX_CB_SIZE);
- if (!ntfs_compression_buffer)
- return -ENOMEM;
- return 0;
-}
-
-/**
- * free_compression_buffers - free the decompression buffers
- *
- * Caller has to hold the ntfs_lock mutex.
- */
-void free_compression_buffers(void)
-{
- BUG_ON(!ntfs_compression_buffer);
- vfree(ntfs_compression_buffer);
- ntfs_compression_buffer = NULL;
-}
-
-/**
- * zero_partial_compressed_page - zero out of bounds compressed page region
- */
-static void zero_partial_compressed_page(struct page *page,
- const s64 initialized_size)
-{
- u8 *kp = page_address(page);
- unsigned int kp_ofs;
-
- ntfs_debug("Zeroing page region outside initialized size.");
- if (((s64)page->index << PAGE_SHIFT) >= initialized_size) {
- clear_page(kp);
- return;
- }
- kp_ofs = initialized_size & ~PAGE_MASK;
- memset(kp + kp_ofs, 0, PAGE_SIZE - kp_ofs);
- return;
-}
-
-/**
- * handle_bounds_compressed_page - test for&handle out of bounds compressed page
- */
-static inline void handle_bounds_compressed_page(struct page *page,
- const loff_t i_size, const s64 initialized_size)
-{
- if ((page->index >= (initialized_size >> PAGE_SHIFT)) &&
- (initialized_size < i_size))
- zero_partial_compressed_page(page, initialized_size);
- return;
-}
-
-/**
- * ntfs_decompress - decompress a compression block into an array of pages
- * @dest_pages: destination array of pages
- * @completed_pages: scratch space to track completed pages
- * @dest_index: current index into @dest_pages (IN/OUT)
- * @dest_ofs: current offset within @dest_pages[@dest_index] (IN/OUT)
- * @dest_max_index: maximum index into @dest_pages (IN)
- * @dest_max_ofs: maximum offset within @dest_pages[@dest_max_index] (IN)
- * @xpage: the target page (-1 if none) (IN)
- * @xpage_done: set to 1 if xpage was completed successfully (IN/OUT)
- * @cb_start: compression block to decompress (IN)
- * @cb_size: size of compression block @cb_start in bytes (IN)
- * @i_size: file size when we started the read (IN)
- * @initialized_size: initialized file size when we started the read (IN)
- *
- * The caller must have disabled preemption. ntfs_decompress() reenables it when
- * the critical section is finished.
- *
- * This decompresses the compression block @cb_start into the array of
- * destination pages @dest_pages starting at index @dest_index into @dest_pages
- * and at offset @dest_pos into the page @dest_pages[@dest_index].
- *
- * When the page @dest_pages[@xpage] is completed, @xpage_done is set to 1.
- * If xpage is -1 or @xpage has not been completed, @xpage_done is not modified.
- *
- * @cb_start is a pointer to the compression block which needs decompressing
- * and @cb_size is the size of @cb_start in bytes (8-64kiB).
- *
- * Return 0 if success or -EOVERFLOW on error in the compressed stream.
- * @xpage_done indicates whether the target page (@dest_pages[@xpage]) was
- * completed during the decompression of the compression block (@cb_start).
- *
- * Warning: This function *REQUIRES* PAGE_SIZE >= 4096 or it will blow up
- * unpredicatbly! You have been warned!
- *
- * Note to hackers: This function may not sleep until it has finished accessing
- * the compression block @cb_start as it is a per-CPU buffer.
- */
-static int ntfs_decompress(struct page *dest_pages[], int completed_pages[],
- int *dest_index, int *dest_ofs, const int dest_max_index,
- const int dest_max_ofs, const int xpage, char *xpage_done,
- u8 *const cb_start, const u32 cb_size, const loff_t i_size,
- const s64 initialized_size)
-{
- /*
- * Pointers into the compressed data, i.e. the compression block (cb),
- * and the therein contained sub-blocks (sb).
- */
- u8 *cb_end = cb_start + cb_size; /* End of cb. */
- u8 *cb = cb_start; /* Current position in cb. */
- u8 *cb_sb_start; /* Beginning of the current sb in the cb. */
- u8 *cb_sb_end; /* End of current sb / beginning of next sb. */
-
- /* Variables for uncompressed data / destination. */
- struct page *dp; /* Current destination page being worked on. */
- u8 *dp_addr; /* Current pointer into dp. */
- u8 *dp_sb_start; /* Start of current sub-block in dp. */
- u8 *dp_sb_end; /* End of current sb in dp (dp_sb_start +
- NTFS_SB_SIZE). */
- u16 do_sb_start; /* @dest_ofs when starting this sub-block. */
- u16 do_sb_end; /* @dest_ofs of end of this sb (do_sb_start +
- NTFS_SB_SIZE). */
-
- /* Variables for tag and token parsing. */
- u8 tag; /* Current tag. */
- int token; /* Loop counter for the eight tokens in tag. */
- int nr_completed_pages = 0;
-
- /* Default error code. */
- int err = -EOVERFLOW;
-
- ntfs_debug("Entering, cb_size = 0x%x.", cb_size);
-do_next_sb:
- ntfs_debug("Beginning sub-block at offset = 0x%zx in the cb.",
- cb - cb_start);
- /*
- * Have we reached the end of the compression block or the end of the
- * decompressed data? The latter can happen for example if the current
- * position in the compression block is one byte before its end so the
- * first two checks do not detect it.
- */
- if (cb == cb_end || !le16_to_cpup((le16*)cb) ||
- (*dest_index == dest_max_index &&
- *dest_ofs == dest_max_ofs)) {
- int i;
-
- ntfs_debug("Completed. Returning success (0).");
- err = 0;
-return_error:
- /* We can sleep from now on, so we drop lock. */
- spin_unlock(&ntfs_cb_lock);
- /* Second stage: finalize completed pages. */
- if (nr_completed_pages > 0) {
- for (i = 0; i < nr_completed_pages; i++) {
- int di = completed_pages[i];
-
- dp = dest_pages[di];
- /*
- * If we are outside the initialized size, zero
- * the out of bounds page range.
- */
- handle_bounds_compressed_page(dp, i_size,
- initialized_size);
- flush_dcache_page(dp);
- kunmap(dp);
- SetPageUptodate(dp);
- unlock_page(dp);
- if (di == xpage)
- *xpage_done = 1;
- else
- put_page(dp);
- dest_pages[di] = NULL;
- }
- }
- return err;
- }
-
- /* Setup offsets for the current sub-block destination. */
- do_sb_start = *dest_ofs;
- do_sb_end = do_sb_start + NTFS_SB_SIZE;
-
- /* Check that we are still within allowed boundaries. */
- if (*dest_index == dest_max_index && do_sb_end > dest_max_ofs)
- goto return_overflow;
-
- /* Does the minimum size of a compressed sb overflow valid range? */
- if (cb + 6 > cb_end)
- goto return_overflow;
-
- /* Setup the current sub-block source pointers and validate range. */
- cb_sb_start = cb;
- cb_sb_end = cb_sb_start + (le16_to_cpup((le16*)cb) & NTFS_SB_SIZE_MASK)
- + 3;
- if (cb_sb_end > cb_end)
- goto return_overflow;
-
- /* Get the current destination page. */
- dp = dest_pages[*dest_index];
- if (!dp) {
- /* No page present. Skip decompression of this sub-block. */
- cb = cb_sb_end;
-
- /* Advance destination position to next sub-block. */
- *dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_MASK;
- if (!*dest_ofs && (++*dest_index > dest_max_index))
- goto return_overflow;
- goto do_next_sb;
- }
-
- /* We have a valid destination page. Setup the destination pointers. */
- dp_addr = (u8*)page_address(dp) + do_sb_start;
-
- /* Now, we are ready to process the current sub-block (sb). */
- if (!(le16_to_cpup((le16*)cb) & NTFS_SB_IS_COMPRESSED)) {
- ntfs_debug("Found uncompressed sub-block.");
- /* This sb is not compressed, just copy it into destination. */
-
- /* Advance source position to first data byte. */
- cb += 2;
-
- /* An uncompressed sb must be full size. */
- if (cb_sb_end - cb != NTFS_SB_SIZE)
- goto return_overflow;
-
- /* Copy the block and advance the source position. */
- memcpy(dp_addr, cb, NTFS_SB_SIZE);
- cb += NTFS_SB_SIZE;
-
- /* Advance destination position to next sub-block. */
- *dest_ofs += NTFS_SB_SIZE;
- if (!(*dest_ofs &= ~PAGE_MASK)) {
-finalize_page:
- /*
- * First stage: add current page index to array of
- * completed pages.
- */
- completed_pages[nr_completed_pages++] = *dest_index;
- if (++*dest_index > dest_max_index)
- goto return_overflow;
- }
- goto do_next_sb;
- }
- ntfs_debug("Found compressed sub-block.");
- /* This sb is compressed, decompress it into destination. */
-
- /* Setup destination pointers. */
- dp_sb_start = dp_addr;
- dp_sb_end = dp_sb_start + NTFS_SB_SIZE;
-
- /* Forward to the first tag in the sub-block. */
- cb += 2;
-do_next_tag:
- if (cb == cb_sb_end) {
- /* Check if the decompressed sub-block was not full-length. */
- if (dp_addr < dp_sb_end) {
- int nr_bytes = do_sb_end - *dest_ofs;
-
- ntfs_debug("Filling incomplete sub-block with "
- "zeroes.");
- /* Zero remainder and update destination position. */
- memset(dp_addr, 0, nr_bytes);
- *dest_ofs += nr_bytes;
- }
- /* We have finished the current sub-block. */
- if (!(*dest_ofs &= ~PAGE_MASK))
- goto finalize_page;
- goto do_next_sb;
- }
-
- /* Check we are still in range. */
- if (cb > cb_sb_end || dp_addr > dp_sb_end)
- goto return_overflow;
-
- /* Get the next tag and advance to first token. */
- tag = *cb++;
-
- /* Parse the eight tokens described by the tag. */
- for (token = 0; token < 8; token++, tag >>= 1) {
- u16 lg, pt, length, max_non_overlap;
- register u16 i;
- u8 *dp_back_addr;
-
- /* Check if we are done / still in range. */
- if (cb >= cb_sb_end || dp_addr > dp_sb_end)
- break;
-
- /* Determine token type and parse appropriately.*/
- if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
- /*
- * We have a symbol token, copy the symbol across, and
- * advance the source and destination positions.
- */
- *dp_addr++ = *cb++;
- ++*dest_ofs;
-
- /* Continue with the next token. */
- continue;
- }
-
- /*
- * We have a phrase token. Make sure it is not the first tag in
- * the sb as this is illegal and would confuse the code below.
- */
- if (dp_addr == dp_sb_start)
- goto return_overflow;
-
- /*
- * Determine the number of bytes to go back (p) and the number
- * of bytes to copy (l). We use an optimized algorithm in which
- * we first calculate log2(current destination position in sb),
- * which allows determination of l and p in O(1) rather than
- * O(n). We just need an arch-optimized log2() function now.
- */
- lg = 0;
- for (i = *dest_ofs - do_sb_start - 1; i >= 0x10; i >>= 1)
- lg++;
-
- /* Get the phrase token into i. */
- pt = le16_to_cpup((le16*)cb);
-
- /*
- * Calculate starting position of the byte sequence in
- * the destination using the fact that p = (pt >> (12 - lg)) + 1
- * and make sure we don't go too far back.
- */
- dp_back_addr = dp_addr - (pt >> (12 - lg)) - 1;
- if (dp_back_addr < dp_sb_start)
- goto return_overflow;
-
- /* Now calculate the length of the byte sequence. */
- length = (pt & (0xfff >> lg)) + 3;
-
- /* Advance destination position and verify it is in range. */
- *dest_ofs += length;
- if (*dest_ofs > do_sb_end)
- goto return_overflow;
-
- /* The number of non-overlapping bytes. */
- max_non_overlap = dp_addr - dp_back_addr;
-
- if (length <= max_non_overlap) {
- /* The byte sequence doesn't overlap, just copy it. */
- memcpy(dp_addr, dp_back_addr, length);
-
- /* Advance destination pointer. */
- dp_addr += length;
- } else {
- /*
- * The byte sequence does overlap, copy non-overlapping
- * part and then do a slow byte by byte copy for the
- * overlapping part. Also, advance the destination
- * pointer.
- */
- memcpy(dp_addr, dp_back_addr, max_non_overlap);
- dp_addr += max_non_overlap;
- dp_back_addr += max_non_overlap;
- length -= max_non_overlap;
- while (length--)
- *dp_addr++ = *dp_back_addr++;
- }
-
- /* Advance source position and continue with the next token. */
- cb += 2;
- }
-
- /* No tokens left in the current tag. Continue with the next tag. */
- goto do_next_tag;
-
-return_overflow:
- ntfs_error(NULL, "Failed. Returning -EOVERFLOW.");
- goto return_error;
-}
-
-/**
- * ntfs_read_compressed_block - read a compressed block into the page cache
- * @page: locked page in the compression block(s) we need to read
- *
- * When we are called the page has already been verified to be locked and the
- * attribute is known to be non-resident, not encrypted, but compressed.
- *
- * 1. Determine which compression block(s) @page is in.
- * 2. Get hold of all pages corresponding to this/these compression block(s).
- * 3. Read the (first) compression block.
- * 4. Decompress it into the corresponding pages.
- * 5. Throw the compressed data away and proceed to 3. for the next compression
- * block or return success if no more compression blocks left.
- *
- * Warning: We have to be careful what we do about existing pages. They might
- * have been written to so that we would lose data if we were to just overwrite
- * them with the out-of-date uncompressed data.
- *
- * FIXME: For PAGE_SIZE > cb_size we are not doing the Right Thing(TM) at
- * the end of the file I think. We need to detect this case and zero the out
- * of bounds remainder of the page in question and mark it as handled. At the
- * moment we would just return -EIO on such a page. This bug will only become
- * apparent if pages are above 8kiB and the NTFS volume only uses 512 byte
- * clusters so is probably not going to be seen by anyone. Still this should
- * be fixed. (AIA)
- *
- * FIXME: Again for PAGE_SIZE > cb_size we are screwing up both in
- * handling sparse and compressed cbs. (AIA)
- *
- * FIXME: At the moment we don't do any zeroing out in the case that
- * initialized_size is less than data_size. This should be safe because of the
- * nature of the compression algorithm used. Just in case we check and output
- * an error message in read inode if the two sizes are not equal for a
- * compressed file. (AIA)
- */
-int ntfs_read_compressed_block(struct page *page)
-{
- loff_t i_size;
- s64 initialized_size;
- struct address_space *mapping = page->mapping;
- ntfs_inode *ni = NTFS_I(mapping->host);
- ntfs_volume *vol = ni->vol;
- struct super_block *sb = vol->sb;
- runlist_element *rl;
- unsigned long flags, block_size = sb->s_blocksize;
- unsigned char block_size_bits = sb->s_blocksize_bits;
- u8 *cb, *cb_pos, *cb_end;
- struct buffer_head **bhs;
- unsigned long offset, index = page->index;
- u32 cb_size = ni->itype.compressed.block_size;
- u64 cb_size_mask = cb_size - 1UL;
- VCN vcn;
- LCN lcn;
- /* The first wanted vcn (minimum alignment is PAGE_SIZE). */
- VCN start_vcn = (((s64)index << PAGE_SHIFT) & ~cb_size_mask) >>
- vol->cluster_size_bits;
- /*
- * The first vcn after the last wanted vcn (minimum alignment is again
- * PAGE_SIZE.
- */
- VCN end_vcn = ((((s64)(index + 1UL) << PAGE_SHIFT) + cb_size - 1)
- & ~cb_size_mask) >> vol->cluster_size_bits;
- /* Number of compression blocks (cbs) in the wanted vcn range. */
- unsigned int nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits
- >> ni->itype.compressed.block_size_bits;
- /*
- * Number of pages required to store the uncompressed data from all
- * compression blocks (cbs) overlapping @page. Due to alignment
- * guarantees of start_vcn and end_vcn, no need to round up here.
- */
- unsigned int nr_pages = (end_vcn - start_vcn) <<
- vol->cluster_size_bits >> PAGE_SHIFT;
- unsigned int xpage, max_page, cur_page, cur_ofs, i;
- unsigned int cb_clusters, cb_max_ofs;
- int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0;
- struct page **pages;
- int *completed_pages;
- unsigned char xpage_done = 0;
-
- ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = "
- "%i.", index, cb_size, nr_pages);
- /*
- * Bad things happen if we get here for anything that is not an
- * unnamed $DATA attribute.
- */
- BUG_ON(ni->type != AT_DATA);
- BUG_ON(ni->name_len);
-
- pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS);
- completed_pages = kmalloc_array(nr_pages + 1, sizeof(int), GFP_NOFS);
-
- /* Allocate memory to store the buffer heads we need. */
- bhs_size = cb_size / block_size * sizeof(struct buffer_head *);
- bhs = kmalloc(bhs_size, GFP_NOFS);
-
- if (unlikely(!pages || !bhs || !completed_pages)) {
- kfree(bhs);
- kfree(pages);
- kfree(completed_pages);
- unlock_page(page);
- ntfs_error(vol->sb, "Failed to allocate internal buffers.");
- return -ENOMEM;
- }
-
- /*
- * We have already been given one page, this is the one we must do.
- * Once again, the alignment guarantees keep it simple.
- */
- offset = start_vcn << vol->cluster_size_bits >> PAGE_SHIFT;
- xpage = index - offset;
- pages[xpage] = page;
- /*
- * The remaining pages need to be allocated and inserted into the page
- * cache, alignment guarantees keep all the below much simpler. (-8
- */
- read_lock_irqsave(&ni->size_lock, flags);
- i_size = i_size_read(VFS_I(ni));
- initialized_size = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- max_page = ((i_size + PAGE_SIZE - 1) >> PAGE_SHIFT) -
- offset;
- /* Is the page fully outside i_size? (truncate in progress) */
- if (xpage >= max_page) {
- kfree(bhs);
- kfree(pages);
- kfree(completed_pages);
- zero_user(page, 0, PAGE_SIZE);
- ntfs_debug("Compressed read outside i_size - truncated?");
- SetPageUptodate(page);
- unlock_page(page);
- return 0;
- }
- if (nr_pages < max_page)
- max_page = nr_pages;
- for (i = 0; i < max_page; i++, offset++) {
- if (i != xpage)
- pages[i] = grab_cache_page_nowait(mapping, offset);
- page = pages[i];
- if (page) {
- /*
- * We only (re)read the page if it isn't already read
- * in and/or dirty or we would be losing data or at
- * least wasting our time.
- */
- if (!PageDirty(page) && (!PageUptodate(page) ||
- PageError(page))) {
- ClearPageError(page);
- kmap(page);
- continue;
- }
- unlock_page(page);
- put_page(page);
- pages[i] = NULL;
- }
- }
-
- /*
- * We have the runlist, and all the destination pages we need to fill.
- * Now read the first compression block.
- */
- cur_page = 0;
- cur_ofs = 0;
- cb_clusters = ni->itype.compressed.block_clusters;
-do_next_cb:
- nr_cbs--;
- nr_bhs = 0;
-
- /* Read all cb buffer heads one cluster at a time. */
- rl = NULL;
- for (vcn = start_vcn, start_vcn += cb_clusters; vcn < start_vcn;
- vcn++) {
- bool is_retry = false;
-
- if (!rl) {
-lock_retry_remap:
- down_read(&ni->runlist.lock);
- rl = ni->runlist.rl;
- }
- if (likely(rl != NULL)) {
- /* Seek to element containing target vcn. */
- while (rl->length && rl[1].vcn <= vcn)
- rl++;
- lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
- } else
- lcn = LCN_RL_NOT_MAPPED;
- ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.",
- (unsigned long long)vcn,
- (unsigned long long)lcn);
- if (lcn < 0) {
- /*
- * When we reach the first sparse cluster we have
- * finished with the cb.
- */
- if (lcn == LCN_HOLE)
- break;
- if (is_retry || lcn != LCN_RL_NOT_MAPPED)
- goto rl_err;
- is_retry = true;
- /*
- * Attempt to map runlist, dropping lock for the
- * duration.
- */
- up_read(&ni->runlist.lock);
- if (!ntfs_map_runlist(ni, vcn))
- goto lock_retry_remap;
- goto map_rl_err;
- }
- block = lcn << vol->cluster_size_bits >> block_size_bits;
- /* Read the lcn from device in chunks of block_size bytes. */
- max_block = block + (vol->cluster_size >> block_size_bits);
- do {
- ntfs_debug("block = 0x%x.", block);
- if (unlikely(!(bhs[nr_bhs] = sb_getblk(sb, block))))
- goto getblk_err;
- nr_bhs++;
- } while (++block < max_block);
- }
-
- /* Release the lock if we took it. */
- if (rl)
- up_read(&ni->runlist.lock);
-
- /* Setup and initiate io on all buffer heads. */
- for (i = 0; i < nr_bhs; i++) {
- struct buffer_head *tbh = bhs[i];
-
- if (!trylock_buffer(tbh))
- continue;
- if (unlikely(buffer_uptodate(tbh))) {
- unlock_buffer(tbh);
- continue;
- }
- get_bh(tbh);
- tbh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, tbh);
- }
-
- /* Wait for io completion on all buffer heads. */
- for (i = 0; i < nr_bhs; i++) {
- struct buffer_head *tbh = bhs[i];
-
- if (buffer_uptodate(tbh))
- continue;
- wait_on_buffer(tbh);
- /*
- * We need an optimization barrier here, otherwise we start
- * hitting the below fixup code when accessing a loopback
- * mounted ntfs partition. This indicates either there is a
- * race condition in the loop driver or, more likely, gcc
- * overoptimises the code without the barrier and it doesn't
- * do the Right Thing(TM).
- */
- barrier();
- if (unlikely(!buffer_uptodate(tbh))) {
- ntfs_warning(vol->sb, "Buffer is unlocked but not "
- "uptodate! Unplugging the disk queue "
- "and rescheduling.");
- get_bh(tbh);
- io_schedule();
- put_bh(tbh);
- if (unlikely(!buffer_uptodate(tbh)))
- goto read_err;
- ntfs_warning(vol->sb, "Buffer is now uptodate. Good.");
- }
- }
-
- /*
- * Get the compression buffer. We must not sleep any more
- * until we are finished with it.
- */
- spin_lock(&ntfs_cb_lock);
- cb = ntfs_compression_buffer;
-
- BUG_ON(!cb);
-
- cb_pos = cb;
- cb_end = cb + cb_size;
-
- /* Copy the buffer heads into the contiguous buffer. */
- for (i = 0; i < nr_bhs; i++) {
- memcpy(cb_pos, bhs[i]->b_data, block_size);
- cb_pos += block_size;
- }
-
- /* Just a precaution. */
- if (cb_pos + 2 <= cb + cb_size)
- *(u16*)cb_pos = 0;
-
- /* Reset cb_pos back to the beginning. */
- cb_pos = cb;
-
- /* We now have both source (if present) and destination. */
- ntfs_debug("Successfully read the compression block.");
-
- /* The last page and maximum offset within it for the current cb. */
- cb_max_page = (cur_page << PAGE_SHIFT) + cur_ofs + cb_size;
- cb_max_ofs = cb_max_page & ~PAGE_MASK;
- cb_max_page >>= PAGE_SHIFT;
-
- /* Catch end of file inside a compression block. */
- if (cb_max_page > max_page)
- cb_max_page = max_page;
-
- if (vcn == start_vcn - cb_clusters) {
- /* Sparse cb, zero out page range overlapping the cb. */
- ntfs_debug("Found sparse compression block.");
- /* We can sleep from now on, so we drop lock. */
- spin_unlock(&ntfs_cb_lock);
- if (cb_max_ofs)
- cb_max_page--;
- for (; cur_page < cb_max_page; cur_page++) {
- page = pages[cur_page];
- if (page) {
- if (likely(!cur_ofs))
- clear_page(page_address(page));
- else
- memset(page_address(page) + cur_ofs, 0,
- PAGE_SIZE -
- cur_ofs);
- flush_dcache_page(page);
- kunmap(page);
- SetPageUptodate(page);
- unlock_page(page);
- if (cur_page == xpage)
- xpage_done = 1;
- else
- put_page(page);
- pages[cur_page] = NULL;
- }
- cb_pos += PAGE_SIZE - cur_ofs;
- cur_ofs = 0;
- if (cb_pos >= cb_end)
- break;
- }
- /* If we have a partial final page, deal with it now. */
- if (cb_max_ofs && cb_pos < cb_end) {
- page = pages[cur_page];
- if (page)
- memset(page_address(page) + cur_ofs, 0,
- cb_max_ofs - cur_ofs);
- /*
- * No need to update cb_pos at this stage:
- * cb_pos += cb_max_ofs - cur_ofs;
- */
- cur_ofs = cb_max_ofs;
- }
- } else if (vcn == start_vcn) {
- /* We can't sleep so we need two stages. */
- unsigned int cur2_page = cur_page;
- unsigned int cur_ofs2 = cur_ofs;
- u8 *cb_pos2 = cb_pos;
-
- ntfs_debug("Found uncompressed compression block.");
- /* Uncompressed cb, copy it to the destination pages. */
- /*
- * TODO: As a big optimization, we could detect this case
- * before we read all the pages and use block_read_full_folio()
- * on all full pages instead (we still have to treat partial
- * pages especially but at least we are getting rid of the
- * synchronous io for the majority of pages.
- * Or if we choose not to do the read-ahead/-behind stuff, we
- * could just return block_read_full_folio(pages[xpage]) as long
- * as PAGE_SIZE <= cb_size.
- */
- if (cb_max_ofs)
- cb_max_page--;
- /* First stage: copy data into destination pages. */
- for (; cur_page < cb_max_page; cur_page++) {
- page = pages[cur_page];
- if (page)
- memcpy(page_address(page) + cur_ofs, cb_pos,
- PAGE_SIZE - cur_ofs);
- cb_pos += PAGE_SIZE - cur_ofs;
- cur_ofs = 0;
- if (cb_pos >= cb_end)
- break;
- }
- /* If we have a partial final page, deal with it now. */
- if (cb_max_ofs && cb_pos < cb_end) {
- page = pages[cur_page];
- if (page)
- memcpy(page_address(page) + cur_ofs, cb_pos,
- cb_max_ofs - cur_ofs);
- cb_pos += cb_max_ofs - cur_ofs;
- cur_ofs = cb_max_ofs;
- }
- /* We can sleep from now on, so drop lock. */
- spin_unlock(&ntfs_cb_lock);
- /* Second stage: finalize pages. */
- for (; cur2_page < cb_max_page; cur2_page++) {
- page = pages[cur2_page];
- if (page) {
- /*
- * If we are outside the initialized size, zero
- * the out of bounds page range.
- */
- handle_bounds_compressed_page(page, i_size,
- initialized_size);
- flush_dcache_page(page);
- kunmap(page);
- SetPageUptodate(page);
- unlock_page(page);
- if (cur2_page == xpage)
- xpage_done = 1;
- else
- put_page(page);
- pages[cur2_page] = NULL;
- }
- cb_pos2 += PAGE_SIZE - cur_ofs2;
- cur_ofs2 = 0;
- if (cb_pos2 >= cb_end)
- break;
- }
- } else {
- /* Compressed cb, decompress it into the destination page(s). */
- unsigned int prev_cur_page = cur_page;
-
- ntfs_debug("Found compressed compression block.");
- err = ntfs_decompress(pages, completed_pages, &cur_page,
- &cur_ofs, cb_max_page, cb_max_ofs, xpage,
- &xpage_done, cb_pos, cb_size - (cb_pos - cb),
- i_size, initialized_size);
- /*
- * We can sleep from now on, lock already dropped by
- * ntfs_decompress().
- */
- if (err) {
- ntfs_error(vol->sb, "ntfs_decompress() failed in inode "
- "0x%lx with error code %i. Skipping "
- "this compression block.",
- ni->mft_no, -err);
- /* Release the unfinished pages. */
- for (; prev_cur_page < cur_page; prev_cur_page++) {
- page = pages[prev_cur_page];
- if (page) {
- flush_dcache_page(page);
- kunmap(page);
- unlock_page(page);
- if (prev_cur_page != xpage)
- put_page(page);
- pages[prev_cur_page] = NULL;
- }
- }
- }
- }
-
- /* Release the buffer heads. */
- for (i = 0; i < nr_bhs; i++)
- brelse(bhs[i]);
-
- /* Do we have more work to do? */
- if (nr_cbs)
- goto do_next_cb;
-
- /* We no longer need the list of buffer heads. */
- kfree(bhs);
-
- /* Clean up if we have any pages left. Should never happen. */
- for (cur_page = 0; cur_page < max_page; cur_page++) {
- page = pages[cur_page];
- if (page) {
- ntfs_error(vol->sb, "Still have pages left! "
- "Terminating them with extreme "
- "prejudice. Inode 0x%lx, page index "
- "0x%lx.", ni->mft_no, page->index);
- flush_dcache_page(page);
- kunmap(page);
- unlock_page(page);
- if (cur_page != xpage)
- put_page(page);
- pages[cur_page] = NULL;
- }
- }
-
- /* We no longer need the list of pages. */
- kfree(pages);
- kfree(completed_pages);
-
- /* If we have completed the requested page, we return success. */
- if (likely(xpage_done))
- return 0;
-
- ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ?
- "EOVERFLOW" : (!err ? "EIO" : "unknown error"));
- return err < 0 ? err : -EIO;
-
-read_err:
- ntfs_error(vol->sb, "IO error while reading compressed data.");
- /* Release the buffer heads. */
- for (i = 0; i < nr_bhs; i++)
- brelse(bhs[i]);
- goto err_out;
-
-map_rl_err:
- ntfs_error(vol->sb, "ntfs_map_runlist() failed. Cannot read "
- "compression block.");
- goto err_out;
-
-rl_err:
- up_read(&ni->runlist.lock);
- ntfs_error(vol->sb, "ntfs_rl_vcn_to_lcn() failed. Cannot read "
- "compression block.");
- goto err_out;
-
-getblk_err:
- up_read(&ni->runlist.lock);
- ntfs_error(vol->sb, "getblk() failed. Cannot read compression block.");
-
-err_out:
- kfree(bhs);
- for (i = cur_page; i < max_page; i++) {
- page = pages[i];
- if (page) {
- flush_dcache_page(page);
- kunmap(page);
- unlock_page(page);
- if (i != xpage)
- put_page(page);
- }
- }
- kfree(pages);
- kfree(completed_pages);
- return -EIO;
-}
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c
deleted file mode 100644
index a3c1c5656f8f..000000000000
--- a/fs/ntfs/debug.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * debug.c - NTFS kernel debug support. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2004 Anton Altaparmakov
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include "debug.h"
-
-/**
- * __ntfs_warning - output a warning to the syslog
- * @function: name of function outputting the warning
- * @sb: super block of mounted ntfs filesystem
- * @fmt: warning string containing format specifications
- * @...: a variable number of arguments specified in @fmt
- *
- * Outputs a warning to the syslog for the mounted ntfs filesystem described
- * by @sb.
- *
- * @fmt and the corresponding @... is printf style format string containing
- * the warning string and the corresponding format arguments, respectively.
- *
- * @function is the name of the function from which __ntfs_warning is being
- * called.
- *
- * Note, you should be using debug.h::ntfs_warning(@sb, @fmt, @...) instead
- * as this provides the @function parameter automatically.
- */
-void __ntfs_warning(const char *function, const struct super_block *sb,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
- int flen = 0;
-
-#ifndef DEBUG
- if (!printk_ratelimit())
- return;
-#endif
- if (function)
- flen = strlen(function);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (sb)
- pr_warn("(device %s): %s(): %pV\n",
- sb->s_id, flen ? function : "", &vaf);
- else
- pr_warn("%s(): %pV\n", flen ? function : "", &vaf);
- va_end(args);
-}
-
-/**
- * __ntfs_error - output an error to the syslog
- * @function: name of function outputting the error
- * @sb: super block of mounted ntfs filesystem
- * @fmt: error string containing format specifications
- * @...: a variable number of arguments specified in @fmt
- *
- * Outputs an error to the syslog for the mounted ntfs filesystem described
- * by @sb.
- *
- * @fmt and the corresponding @... is printf style format string containing
- * the error string and the corresponding format arguments, respectively.
- *
- * @function is the name of the function from which __ntfs_error is being
- * called.
- *
- * Note, you should be using debug.h::ntfs_error(@sb, @fmt, @...) instead
- * as this provides the @function parameter automatically.
- */
-void __ntfs_error(const char *function, const struct super_block *sb,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
- int flen = 0;
-
-#ifndef DEBUG
- if (!printk_ratelimit())
- return;
-#endif
- if (function)
- flen = strlen(function);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (sb)
- pr_err("(device %s): %s(): %pV\n",
- sb->s_id, flen ? function : "", &vaf);
- else
- pr_err("%s(): %pV\n", flen ? function : "", &vaf);
- va_end(args);
-}
-
-#ifdef DEBUG
-
-/* If 1, output debug messages, and if 0, don't. */
-int debug_msgs = 0;
-
-void __ntfs_debug(const char *file, int line, const char *function,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
- int flen = 0;
-
- if (!debug_msgs)
- return;
- if (function)
- flen = strlen(function);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- pr_debug("(%s, %d): %s(): %pV", file, line, flen ? function : "", &vaf);
- va_end(args);
-}
-
-/* Dump a runlist. Caller has to provide synchronisation for @rl. */
-void ntfs_debug_dump_runlist(const runlist_element *rl)
-{
- int i;
- const char *lcn_str[5] = { "LCN_HOLE ", "LCN_RL_NOT_MAPPED",
- "LCN_ENOENT ", "LCN_unknown " };
-
- if (!debug_msgs)
- return;
- pr_debug("Dumping runlist (values in hex):\n");
- if (!rl) {
- pr_debug("Run list not present.\n");
- return;
- }
- pr_debug("VCN LCN Run length\n");
- for (i = 0; ; i++) {
- LCN lcn = (rl + i)->lcn;
-
- if (lcn < (LCN)0) {
- int index = -lcn - 1;
-
- if (index > -LCN_ENOENT - 1)
- index = 3;
- pr_debug("%-16Lx %s %-16Lx%s\n",
- (long long)(rl + i)->vcn, lcn_str[index],
- (long long)(rl + i)->length,
- (rl + i)->length ? "" :
- " (runlist end)");
- } else
- pr_debug("%-16Lx %-16Lx %-16Lx%s\n",
- (long long)(rl + i)->vcn,
- (long long)(rl + i)->lcn,
- (long long)(rl + i)->length,
- (rl + i)->length ? "" :
- " (runlist end)");
- if (!(rl + i)->length)
- break;
- }
-}
-
-#endif
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
deleted file mode 100644
index 6fdef388f129..000000000000
--- a/fs/ntfs/debug.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * debug.h - NTFS kernel debug support. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2004 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_DEBUG_H
-#define _LINUX_NTFS_DEBUG_H
-
-#include <linux/fs.h>
-
-#include "runlist.h"
-
-#ifdef DEBUG
-
-extern int debug_msgs;
-
-extern __printf(4, 5)
-void __ntfs_debug(const char *file, int line, const char *function,
- const char *format, ...);
-/**
- * ntfs_debug - write a debug level message to syslog
- * @f: a printf format string containing the message
- * @...: the variables to substitute into @f
- *
- * ntfs_debug() writes a DEBUG level message to the syslog but only if the
- * driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP.
- */
-#define ntfs_debug(f, a...) \
- __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a)
-
-extern void ntfs_debug_dump_runlist(const runlist_element *rl);
-
-#else /* !DEBUG */
-
-#define ntfs_debug(fmt, ...) \
-do { \
- if (0) \
- no_printk(fmt, ##__VA_ARGS__); \
-} while (0)
-
-#define ntfs_debug_dump_runlist(rl) do {} while (0)
-
-#endif /* !DEBUG */
-
-extern __printf(3, 4)
-void __ntfs_warning(const char *function, const struct super_block *sb,
- const char *fmt, ...);
-#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a)
-
-extern __printf(3, 4)
-void __ntfs_error(const char *function, const struct super_block *sb,
- const char *fmt, ...);
-#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a)
-
-#endif /* _LINUX_NTFS_DEBUG_H */
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
deleted file mode 100644
index 629723a8d712..000000000000
--- a/fs/ntfs/dir.c
+++ /dev/null
@@ -1,1540 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2007 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
- */
-
-#include <linux/buffer_head.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-
-#include "dir.h"
-#include "aops.h"
-#include "attrib.h"
-#include "mft.h"
-#include "debug.h"
-#include "ntfs.h"
-
-/*
- * The little endian Unicode string $I30 as a global constant.
- */
-ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
- cpu_to_le16('3'), cpu_to_le16('0'), 0 };
-
-/**
- * ntfs_lookup_inode_by_name - find an inode in a directory given its name
- * @dir_ni: ntfs inode of the directory in which to search for the name
- * @uname: Unicode name for which to search in the directory
- * @uname_len: length of the name @uname in Unicode characters
- * @res: return the found file name if necessary (see below)
- *
- * Look for an inode with name @uname in the directory with inode @dir_ni.
- * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
- * the Unicode name. If the name is found in the directory, the corresponding
- * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
- * is a 64-bit number containing the sequence number.
- *
- * On error, a negative value is returned corresponding to the error code. In
- * particular if the inode is not found -ENOENT is returned. Note that you
- * can't just check the return value for being negative, you have to check the
- * inode number for being negative which you can extract using MREC(return
- * value).
- *
- * Note, @uname_len does not include the (optional) terminating NULL character.
- *
- * Note, we look for a case sensitive match first but we also look for a case
- * insensitive match at the same time. If we find a case insensitive match, we
- * save that for the case that we don't find an exact match, where we return
- * the case insensitive match and setup @res (which we allocate!) with the mft
- * reference, the file name type, length and with a copy of the little endian
- * Unicode file name itself. If we match a file name which is in the DOS name
- * space, we only return the mft reference and file name type in @res.
- * ntfs_lookup() then uses this to find the long file name in the inode itself.
- * This is to avoid polluting the dcache with short file names. We want them to
- * work but we don't care for how quickly one can access them. This also fixes
- * the dcache aliasing issues.
- *
- * Locking: - Caller must hold i_mutex on the directory.
- * - Each page cache page in the index allocation mapping must be
- * locked whilst being accessed otherwise we may find a corrupt
- * page due to it being under ->writepage at the moment which
- * applies the mst protection fixups before writing out and then
- * removes them again after the write is complete after which it
- * unlocks the page.
- */
-MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
- const int uname_len, ntfs_name **res)
-{
- ntfs_volume *vol = dir_ni->vol;
- struct super_block *sb = vol->sb;
- MFT_RECORD *m;
- INDEX_ROOT *ir;
- INDEX_ENTRY *ie;
- INDEX_ALLOCATION *ia;
- u8 *index_end;
- u64 mref;
- ntfs_attr_search_ctx *ctx;
- int err, rc;
- VCN vcn, old_vcn;
- struct address_space *ia_mapping;
- struct page *page;
- u8 *kaddr;
- ntfs_name *name = NULL;
-
- BUG_ON(!S_ISDIR(VFS_I(dir_ni)->i_mode));
- BUG_ON(NInoAttr(dir_ni));
- /* Get hold of the mft record for the directory. */
- m = map_mft_record(dir_ni);
- if (IS_ERR(m)) {
- ntfs_error(sb, "map_mft_record() failed with error code %ld.",
- -PTR_ERR(m));
- return ERR_MREF(PTR_ERR(m));
- }
- ctx = ntfs_attr_get_search_ctx(dir_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- /* Find the index root attribute in the mft record. */
- err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
- 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT) {
- ntfs_error(sb, "Index root attribute missing in "
- "directory inode 0x%lx.",
- dir_ni->mft_no);
- err = -EIO;
- }
- goto err_out;
- }
- /* Get to the index root value (it's been verified in read_inode). */
- ir = (INDEX_ROOT*)((u8*)ctx->attr +
- le16_to_cpu(ctx->attr->data.resident.value_offset));
- index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
- /* The first index entry. */
- ie = (INDEX_ENTRY*)((u8*)&ir->index +
- le32_to_cpu(ir->index.entries_offset));
- /*
- * Loop until we exceed valid memory (corruption case) or until we
- * reach the last entry.
- */
- for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
- /* Bounds checks. */
- if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
- sizeof(INDEX_ENTRY_HEADER) > index_end ||
- (u8*)ie + le16_to_cpu(ie->key_length) >
- index_end)
- goto dir_err_out;
- /*
- * The last entry cannot contain a name. It can however contain
- * a pointer to a child node in the B+tree so we just break out.
- */
- if (ie->flags & INDEX_ENTRY_END)
- break;
- /*
- * We perform a case sensitive comparison and if that matches
- * we are done and return the mft reference of the inode (i.e.
- * the inode number together with the sequence number for
- * consistency checking). We convert it to cpu format before
- * returning.
- */
- if (ntfs_are_names_equal(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length,
- CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
-found_it:
- /*
- * We have a perfect match, so we don't need to care
- * about having matched imperfectly before, so we can
- * free name and set *res to NULL.
- * However, if the perfect match is a short file name,
- * we need to signal this through *res, so that
- * ntfs_lookup() can fix dcache aliasing issues.
- * As an optimization we just reuse an existing
- * allocation of *res.
- */
- if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
- if (!name) {
- name = kmalloc(sizeof(ntfs_name),
- GFP_NOFS);
- if (!name) {
- err = -ENOMEM;
- goto err_out;
- }
- }
- name->mref = le64_to_cpu(
- ie->data.dir.indexed_file);
- name->type = FILE_NAME_DOS;
- name->len = 0;
- *res = name;
- } else {
- kfree(name);
- *res = NULL;
- }
- mref = le64_to_cpu(ie->data.dir.indexed_file);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(dir_ni);
- return mref;
- }
- /*
- * For a case insensitive mount, we also perform a case
- * insensitive comparison (provided the file name is not in the
- * POSIX namespace). If the comparison matches, and the name is
- * in the WIN32 namespace, we cache the filename in *res so
- * that the caller, ntfs_lookup(), can work on it. If the
- * comparison matches, and the name is in the DOS namespace, we
- * only cache the mft reference and the file name type (we set
- * the name length to zero for simplicity).
- */
- if (!NVolCaseSensitive(vol) &&
- ie->key.file_name.file_name_type &&
- ntfs_are_names_equal(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length,
- IGNORE_CASE, vol->upcase, vol->upcase_len)) {
- int name_size = sizeof(ntfs_name);
- u8 type = ie->key.file_name.file_name_type;
- u8 len = ie->key.file_name.file_name_length;
-
- /* Only one case insensitive matching name allowed. */
- if (name) {
- ntfs_error(sb, "Found already allocated name "
- "in phase 1. Please run chkdsk "
- "and if that doesn't find any "
- "errors please report you saw "
- "this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net.");
- goto dir_err_out;
- }
-
- if (type != FILE_NAME_DOS)
- name_size += len * sizeof(ntfschar);
- name = kmalloc(name_size, GFP_NOFS);
- if (!name) {
- err = -ENOMEM;
- goto err_out;
- }
- name->mref = le64_to_cpu(ie->data.dir.indexed_file);
- name->type = type;
- if (type != FILE_NAME_DOS) {
- name->len = len;
- memcpy(name->name, ie->key.file_name.file_name,
- len * sizeof(ntfschar));
- } else
- name->len = 0;
- *res = name;
- }
- /*
- * Not a perfect match, need to do full blown collation so we
- * know which way in the B+tree we have to go.
- */
- rc = ntfs_collate_names(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, 1,
- IGNORE_CASE, vol->upcase, vol->upcase_len);
- /*
- * If uname collates before the name of the current entry, there
- * is definitely no such name in this index but we might need to
- * descend into the B+tree so we just break out of the loop.
- */
- if (rc == -1)
- break;
- /* The names are not equal, continue the search. */
- if (rc)
- continue;
- /*
- * Names match with case insensitive comparison, now try the
- * case sensitive comparison, which is required for proper
- * collation.
- */
- rc = ntfs_collate_names(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, 1,
- CASE_SENSITIVE, vol->upcase, vol->upcase_len);
- if (rc == -1)
- break;
- if (rc)
- continue;
- /*
- * Perfect match, this will never happen as the
- * ntfs_are_names_equal() call will have gotten a match but we
- * still treat it correctly.
- */
- goto found_it;
- }
- /*
- * We have finished with this index without success. Check for the
- * presence of a child node and if not present return -ENOENT, unless
- * we have got a matching name cached in name in which case return the
- * mft reference associated with it.
- */
- if (!(ie->flags & INDEX_ENTRY_NODE)) {
- if (name) {
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(dir_ni);
- return name->mref;
- }
- ntfs_debug("Entry not found.");
- err = -ENOENT;
- goto err_out;
- } /* Child node present, descend into it. */
- /* Consistency check: Verify that an index allocation exists. */
- if (!NInoIndexAllocPresent(dir_ni)) {
- ntfs_error(sb, "No index allocation attribute but index entry "
- "requires one. Directory inode 0x%lx is "
- "corrupt or driver bug.", dir_ni->mft_no);
- goto err_out;
- }
- /* Get the starting vcn of the index_block holding the child node. */
- vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
- ia_mapping = VFS_I(dir_ni)->i_mapping;
- /*
- * We are done with the index root and the mft record. Release them,
- * otherwise we deadlock with ntfs_map_page().
- */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(dir_ni);
- m = NULL;
- ctx = NULL;
-descend_into_child_node:
- /*
- * Convert vcn to index into the index allocation attribute in units
- * of PAGE_SIZE and map the page cache page, reading it from
- * disk if necessary.
- */
- page = ntfs_map_page(ia_mapping, vcn <<
- dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
- if (IS_ERR(page)) {
- ntfs_error(sb, "Failed to map directory index page, error %ld.",
- -PTR_ERR(page));
- err = PTR_ERR(page);
- goto err_out;
- }
- lock_page(page);
- kaddr = (u8*)page_address(page);
-fast_descend_into_child_node:
- /* Get to the index allocation block. */
- ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
- dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
- /* Bounds checks. */
- if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
- ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
- "inode 0x%lx or driver bug.", dir_ni->mft_no);
- goto unm_err_out;
- }
- /* Catch multi sector transfer fixup errors. */
- if (unlikely(!ntfs_is_indx_record(ia->magic))) {
- ntfs_error(sb, "Directory index record with vcn 0x%llx is "
- "corrupt. Corrupt inode 0x%lx. Run chkdsk.",
- (unsigned long long)vcn, dir_ni->mft_no);
- goto unm_err_out;
- }
- if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
- ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
- "different from expected VCN (0x%llx). "
- "Directory inode 0x%lx is corrupt or driver "
- "bug.", (unsigned long long)
- sle64_to_cpu(ia->index_block_vcn),
- (unsigned long long)vcn, dir_ni->mft_no);
- goto unm_err_out;
- }
- if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
- dir_ni->itype.index.block_size) {
- ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
- "0x%lx has a size (%u) differing from the "
- "directory specified size (%u). Directory "
- "inode is corrupt or driver bug.",
- (unsigned long long)vcn, dir_ni->mft_no,
- le32_to_cpu(ia->index.allocated_size) + 0x18,
- dir_ni->itype.index.block_size);
- goto unm_err_out;
- }
- index_end = (u8*)ia + dir_ni->itype.index.block_size;
- if (index_end > kaddr + PAGE_SIZE) {
- ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
- "0x%lx crosses page boundary. Impossible! "
- "Cannot access! This is probably a bug in the "
- "driver.", (unsigned long long)vcn,
- dir_ni->mft_no);
- goto unm_err_out;
- }
- index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
- if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
- ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
- "inode 0x%lx exceeds maximum size.",
- (unsigned long long)vcn, dir_ni->mft_no);
- goto unm_err_out;
- }
- /* The first index entry. */
- ie = (INDEX_ENTRY*)((u8*)&ia->index +
- le32_to_cpu(ia->index.entries_offset));
- /*
- * Iterate similar to above big loop but applied to index buffer, thus
- * loop until we exceed valid memory (corruption case) or until we
- * reach the last entry.
- */
- for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
- /* Bounds check. */
- if ((u8*)ie < (u8*)ia || (u8*)ie +
- sizeof(INDEX_ENTRY_HEADER) > index_end ||
- (u8*)ie + le16_to_cpu(ie->key_length) >
- index_end) {
- ntfs_error(sb, "Index entry out of bounds in "
- "directory inode 0x%lx.",
- dir_ni->mft_no);
- goto unm_err_out;
- }
- /*
- * The last entry cannot contain a name. It can however contain
- * a pointer to a child node in the B+tree so we just break out.
- */
- if (ie->flags & INDEX_ENTRY_END)
- break;
- /*
- * We perform a case sensitive comparison and if that matches
- * we are done and return the mft reference of the inode (i.e.
- * the inode number together with the sequence number for
- * consistency checking). We convert it to cpu format before
- * returning.
- */
- if (ntfs_are_names_equal(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length,
- CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
-found_it2:
- /*
- * We have a perfect match, so we don't need to care
- * about having matched imperfectly before, so we can
- * free name and set *res to NULL.
- * However, if the perfect match is a short file name,
- * we need to signal this through *res, so that
- * ntfs_lookup() can fix dcache aliasing issues.
- * As an optimization we just reuse an existing
- * allocation of *res.
- */
- if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
- if (!name) {
- name = kmalloc(sizeof(ntfs_name),
- GFP_NOFS);
- if (!name) {
- err = -ENOMEM;
- goto unm_err_out;
- }
- }
- name->mref = le64_to_cpu(
- ie->data.dir.indexed_file);
- name->type = FILE_NAME_DOS;
- name->len = 0;
- *res = name;
- } else {
- kfree(name);
- *res = NULL;
- }
- mref = le64_to_cpu(ie->data.dir.indexed_file);
- unlock_page(page);
- ntfs_unmap_page(page);
- return mref;
- }
- /*
- * For a case insensitive mount, we also perform a case
- * insensitive comparison (provided the file name is not in the
- * POSIX namespace). If the comparison matches, and the name is
- * in the WIN32 namespace, we cache the filename in *res so
- * that the caller, ntfs_lookup(), can work on it. If the
- * comparison matches, and the name is in the DOS namespace, we
- * only cache the mft reference and the file name type (we set
- * the name length to zero for simplicity).
- */
- if (!NVolCaseSensitive(vol) &&
- ie->key.file_name.file_name_type &&
- ntfs_are_names_equal(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length,
- IGNORE_CASE, vol->upcase, vol->upcase_len)) {
- int name_size = sizeof(ntfs_name);
- u8 type = ie->key.file_name.file_name_type;
- u8 len = ie->key.file_name.file_name_length;
-
- /* Only one case insensitive matching name allowed. */
- if (name) {
- ntfs_error(sb, "Found already allocated name "
- "in phase 2. Please run chkdsk "
- "and if that doesn't find any "
- "errors please report you saw "
- "this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net.");
- unlock_page(page);
- ntfs_unmap_page(page);
- goto dir_err_out;
- }
-
- if (type != FILE_NAME_DOS)
- name_size += len * sizeof(ntfschar);
- name = kmalloc(name_size, GFP_NOFS);
- if (!name) {
- err = -ENOMEM;
- goto unm_err_out;
- }
- name->mref = le64_to_cpu(ie->data.dir.indexed_file);
- name->type = type;
- if (type != FILE_NAME_DOS) {
- name->len = len;
- memcpy(name->name, ie->key.file_name.file_name,
- len * sizeof(ntfschar));
- } else
- name->len = 0;
- *res = name;
- }
- /*
- * Not a perfect match, need to do full blown collation so we
- * know which way in the B+tree we have to go.
- */
- rc = ntfs_collate_names(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, 1,
- IGNORE_CASE, vol->upcase, vol->upcase_len);
- /*
- * If uname collates before the name of the current entry, there
- * is definitely no such name in this index but we might need to
- * descend into the B+tree so we just break out of the loop.
- */
- if (rc == -1)
- break;
- /* The names are not equal, continue the search. */
- if (rc)
- continue;
- /*
- * Names match with case insensitive comparison, now try the
- * case sensitive comparison, which is required for proper
- * collation.
- */
- rc = ntfs_collate_names(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, 1,
- CASE_SENSITIVE, vol->upcase, vol->upcase_len);
- if (rc == -1)
- break;
- if (rc)
- continue;
- /*
- * Perfect match, this will never happen as the
- * ntfs_are_names_equal() call will have gotten a match but we
- * still treat it correctly.
- */
- goto found_it2;
- }
- /*
- * We have finished with this index buffer without success. Check for
- * the presence of a child node.
- */
- if (ie->flags & INDEX_ENTRY_NODE) {
- if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
- ntfs_error(sb, "Index entry with child node found in "
- "a leaf node in directory inode 0x%lx.",
- dir_ni->mft_no);
- goto unm_err_out;
- }
- /* Child node present, descend into it. */
- old_vcn = vcn;
- vcn = sle64_to_cpup((sle64*)((u8*)ie +
- le16_to_cpu(ie->length) - 8));
- if (vcn >= 0) {
- /* If vcn is in the same page cache page as old_vcn we
- * recycle the mapped page. */
- if (old_vcn << vol->cluster_size_bits >>
- PAGE_SHIFT == vcn <<
- vol->cluster_size_bits >>
- PAGE_SHIFT)
- goto fast_descend_into_child_node;
- unlock_page(page);
- ntfs_unmap_page(page);
- goto descend_into_child_node;
- }
- ntfs_error(sb, "Negative child node vcn in directory inode "
- "0x%lx.", dir_ni->mft_no);
- goto unm_err_out;
- }
- /*
- * No child node present, return -ENOENT, unless we have got a matching
- * name cached in name in which case return the mft reference
- * associated with it.
- */
- if (name) {
- unlock_page(page);
- ntfs_unmap_page(page);
- return name->mref;
- }
- ntfs_debug("Entry not found.");
- err = -ENOENT;
-unm_err_out:
- unlock_page(page);
- ntfs_unmap_page(page);
-err_out:
- if (!err)
- err = -EIO;
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(dir_ni);
- if (name) {
- kfree(name);
- *res = NULL;
- }
- return ERR_MREF(err);
-dir_err_out:
- ntfs_error(sb, "Corrupt directory. Aborting lookup.");
- goto err_out;
-}
-
-#if 0
-
-// TODO: (AIA)
-// The algorithm embedded in this code will be required for the time when we
-// want to support adding of entries to directories, where we require correct
-// collation of file names in order not to cause corruption of the filesystem.
-
-/**
- * ntfs_lookup_inode_by_name - find an inode in a directory given its name
- * @dir_ni: ntfs inode of the directory in which to search for the name
- * @uname: Unicode name for which to search in the directory
- * @uname_len: length of the name @uname in Unicode characters
- *
- * Look for an inode with name @uname in the directory with inode @dir_ni.
- * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
- * the Unicode name. If the name is found in the directory, the corresponding
- * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
- * is a 64-bit number containing the sequence number.
- *
- * On error, a negative value is returned corresponding to the error code. In
- * particular if the inode is not found -ENOENT is returned. Note that you
- * can't just check the return value for being negative, you have to check the
- * inode number for being negative which you can extract using MREC(return
- * value).
- *
- * Note, @uname_len does not include the (optional) terminating NULL character.
- */
-u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
- const int uname_len)
-{
- ntfs_volume *vol = dir_ni->vol;
- struct super_block *sb = vol->sb;
- MFT_RECORD *m;
- INDEX_ROOT *ir;
- INDEX_ENTRY *ie;
- INDEX_ALLOCATION *ia;
- u8 *index_end;
- u64 mref;
- ntfs_attr_search_ctx *ctx;
- int err, rc;
- IGNORE_CASE_BOOL ic;
- VCN vcn, old_vcn;
- struct address_space *ia_mapping;
- struct page *page;
- u8 *kaddr;
-
- /* Get hold of the mft record for the directory. */
- m = map_mft_record(dir_ni);
- if (IS_ERR(m)) {
- ntfs_error(sb, "map_mft_record() failed with error code %ld.",
- -PTR_ERR(m));
- return ERR_MREF(PTR_ERR(m));
- }
- ctx = ntfs_attr_get_search_ctx(dir_ni, m);
- if (!ctx) {
- err = -ENOMEM;
- goto err_out;
- }
- /* Find the index root attribute in the mft record. */
- err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
- 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT) {
- ntfs_error(sb, "Index root attribute missing in "
- "directory inode 0x%lx.",
- dir_ni->mft_no);
- err = -EIO;
- }
- goto err_out;
- }
- /* Get to the index root value (it's been verified in read_inode). */
- ir = (INDEX_ROOT*)((u8*)ctx->attr +
- le16_to_cpu(ctx->attr->data.resident.value_offset));
- index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
- /* The first index entry. */
- ie = (INDEX_ENTRY*)((u8*)&ir->index +
- le32_to_cpu(ir->index.entries_offset));
- /*
- * Loop until we exceed valid memory (corruption case) or until we
- * reach the last entry.
- */
- for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
- /* Bounds checks. */
- if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
- sizeof(INDEX_ENTRY_HEADER) > index_end ||
- (u8*)ie + le16_to_cpu(ie->key_length) >
- index_end)
- goto dir_err_out;
- /*
- * The last entry cannot contain a name. It can however contain
- * a pointer to a child node in the B+tree so we just break out.
- */
- if (ie->flags & INDEX_ENTRY_END)
- break;
- /*
- * If the current entry has a name type of POSIX, the name is
- * case sensitive and not otherwise. This has the effect of us
- * not being able to access any POSIX file names which collate
- * after the non-POSIX one when they only differ in case, but
- * anyone doing screwy stuff like that deserves to burn in
- * hell... Doing that kind of stuff on NT4 actually causes
- * corruption on the partition even when using SP6a and Linux
- * is not involved at all.
- */
- ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
- CASE_SENSITIVE;
- /*
- * If the names match perfectly, we are done and return the
- * mft reference of the inode (i.e. the inode number together
- * with the sequence number for consistency checking. We
- * convert it to cpu format before returning.
- */
- if (ntfs_are_names_equal(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, ic,
- vol->upcase, vol->upcase_len)) {
-found_it:
- mref = le64_to_cpu(ie->data.dir.indexed_file);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(dir_ni);
- return mref;
- }
- /*
- * Not a perfect match, need to do full blown collation so we
- * know which way in the B+tree we have to go.
- */
- rc = ntfs_collate_names(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, 1,
- IGNORE_CASE, vol->upcase, vol->upcase_len);
- /*
- * If uname collates before the name of the current entry, there
- * is definitely no such name in this index but we might need to
- * descend into the B+tree so we just break out of the loop.
- */
- if (rc == -1)
- break;
- /* The names are not equal, continue the search. */
- if (rc)
- continue;
- /*
- * Names match with case insensitive comparison, now try the
- * case sensitive comparison, which is required for proper
- * collation.
- */
- rc = ntfs_collate_names(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, 1,
- CASE_SENSITIVE, vol->upcase, vol->upcase_len);
- if (rc == -1)
- break;
- if (rc)
- continue;
- /*
- * Perfect match, this will never happen as the
- * ntfs_are_names_equal() call will have gotten a match but we
- * still treat it correctly.
- */
- goto found_it;
- }
- /*
- * We have finished with this index without success. Check for the
- * presence of a child node.
- */
- if (!(ie->flags & INDEX_ENTRY_NODE)) {
- /* No child node, return -ENOENT. */
- err = -ENOENT;
- goto err_out;
- } /* Child node present, descend into it. */
- /* Consistency check: Verify that an index allocation exists. */
- if (!NInoIndexAllocPresent(dir_ni)) {
- ntfs_error(sb, "No index allocation attribute but index entry "
- "requires one. Directory inode 0x%lx is "
- "corrupt or driver bug.", dir_ni->mft_no);
- goto err_out;
- }
- /* Get the starting vcn of the index_block holding the child node. */
- vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
- ia_mapping = VFS_I(dir_ni)->i_mapping;
- /*
- * We are done with the index root and the mft record. Release them,
- * otherwise we deadlock with ntfs_map_page().
- */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(dir_ni);
- m = NULL;
- ctx = NULL;
-descend_into_child_node:
- /*
- * Convert vcn to index into the index allocation attribute in units
- * of PAGE_SIZE and map the page cache page, reading it from
- * disk if necessary.
- */
- page = ntfs_map_page(ia_mapping, vcn <<
- dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
- if (IS_ERR(page)) {
- ntfs_error(sb, "Failed to map directory index page, error %ld.",
- -PTR_ERR(page));
- err = PTR_ERR(page);
- goto err_out;
- }
- lock_page(page);
- kaddr = (u8*)page_address(page);
-fast_descend_into_child_node:
- /* Get to the index allocation block. */
- ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
- dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
- /* Bounds checks. */
- if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
- ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
- "inode 0x%lx or driver bug.", dir_ni->mft_no);
- goto unm_err_out;
- }
- /* Catch multi sector transfer fixup errors. */
- if (unlikely(!ntfs_is_indx_record(ia->magic))) {
- ntfs_error(sb, "Directory index record with vcn 0x%llx is "
- "corrupt. Corrupt inode 0x%lx. Run chkdsk.",
- (unsigned long long)vcn, dir_ni->mft_no);
- goto unm_err_out;
- }
- if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
- ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
- "different from expected VCN (0x%llx). "
- "Directory inode 0x%lx is corrupt or driver "
- "bug.", (unsigned long long)
- sle64_to_cpu(ia->index_block_vcn),
- (unsigned long long)vcn, dir_ni->mft_no);
- goto unm_err_out;
- }
- if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
- dir_ni->itype.index.block_size) {
- ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
- "0x%lx has a size (%u) differing from the "
- "directory specified size (%u). Directory "
- "inode is corrupt or driver bug.",
- (unsigned long long)vcn, dir_ni->mft_no,
- le32_to_cpu(ia->index.allocated_size) + 0x18,
- dir_ni->itype.index.block_size);
- goto unm_err_out;
- }
- index_end = (u8*)ia + dir_ni->itype.index.block_size;
- if (index_end > kaddr + PAGE_SIZE) {
- ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
- "0x%lx crosses page boundary. Impossible! "
- "Cannot access! This is probably a bug in the "
- "driver.", (unsigned long long)vcn,
- dir_ni->mft_no);
- goto unm_err_out;
- }
- index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
- if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
- ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
- "inode 0x%lx exceeds maximum size.",
- (unsigned long long)vcn, dir_ni->mft_no);
- goto unm_err_out;
- }
- /* The first index entry. */
- ie = (INDEX_ENTRY*)((u8*)&ia->index +
- le32_to_cpu(ia->index.entries_offset));
- /*
- * Iterate similar to above big loop but applied to index buffer, thus
- * loop until we exceed valid memory (corruption case) or until we
- * reach the last entry.
- */
- for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
- /* Bounds check. */
- if ((u8*)ie < (u8*)ia || (u8*)ie +
- sizeof(INDEX_ENTRY_HEADER) > index_end ||
- (u8*)ie + le16_to_cpu(ie->key_length) >
- index_end) {
- ntfs_error(sb, "Index entry out of bounds in "
- "directory inode 0x%lx.",
- dir_ni->mft_no);
- goto unm_err_out;
- }
- /*
- * The last entry cannot contain a name. It can however contain
- * a pointer to a child node in the B+tree so we just break out.
- */
- if (ie->flags & INDEX_ENTRY_END)
- break;
- /*
- * If the current entry has a name type of POSIX, the name is
- * case sensitive and not otherwise. This has the effect of us
- * not being able to access any POSIX file names which collate
- * after the non-POSIX one when they only differ in case, but
- * anyone doing screwy stuff like that deserves to burn in
- * hell... Doing that kind of stuff on NT4 actually causes
- * corruption on the partition even when using SP6a and Linux
- * is not involved at all.
- */
- ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
- CASE_SENSITIVE;
- /*
- * If the names match perfectly, we are done and return the
- * mft reference of the inode (i.e. the inode number together
- * with the sequence number for consistency checking. We
- * convert it to cpu format before returning.
- */
- if (ntfs_are_names_equal(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, ic,
- vol->upcase, vol->upcase_len)) {
-found_it2:
- mref = le64_to_cpu(ie->data.dir.indexed_file);
- unlock_page(page);
- ntfs_unmap_page(page);
- return mref;
- }
- /*
- * Not a perfect match, need to do full blown collation so we
- * know which way in the B+tree we have to go.
- */
- rc = ntfs_collate_names(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, 1,
- IGNORE_CASE, vol->upcase, vol->upcase_len);
- /*
- * If uname collates before the name of the current entry, there
- * is definitely no such name in this index but we might need to
- * descend into the B+tree so we just break out of the loop.
- */
- if (rc == -1)
- break;
- /* The names are not equal, continue the search. */
- if (rc)
- continue;
- /*
- * Names match with case insensitive comparison, now try the
- * case sensitive comparison, which is required for proper
- * collation.
- */
- rc = ntfs_collate_names(uname, uname_len,
- (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, 1,
- CASE_SENSITIVE, vol->upcase, vol->upcase_len);
- if (rc == -1)
- break;
- if (rc)
- continue;
- /*
- * Perfect match, this will never happen as the
- * ntfs_are_names_equal() call will have gotten a match but we
- * still treat it correctly.
- */
- goto found_it2;
- }
- /*
- * We have finished with this index buffer without success. Check for
- * the presence of a child node.
- */
- if (ie->flags & INDEX_ENTRY_NODE) {
- if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
- ntfs_error(sb, "Index entry with child node found in "
- "a leaf node in directory inode 0x%lx.",
- dir_ni->mft_no);
- goto unm_err_out;
- }
- /* Child node present, descend into it. */
- old_vcn = vcn;
- vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
- if (vcn >= 0) {
- /* If vcn is in the same page cache page as old_vcn we
- * recycle the mapped page. */
- if (old_vcn << vol->cluster_size_bits >>
- PAGE_SHIFT == vcn <<
- vol->cluster_size_bits >>
- PAGE_SHIFT)
- goto fast_descend_into_child_node;
- unlock_page(page);
- ntfs_unmap_page(page);
- goto descend_into_child_node;
- }
- ntfs_error(sb, "Negative child node vcn in directory inode "
- "0x%lx.", dir_ni->mft_no);
- goto unm_err_out;
- }
- /* No child node, return -ENOENT. */
- ntfs_debug("Entry not found.");
- err = -ENOENT;
-unm_err_out:
- unlock_page(page);
- ntfs_unmap_page(page);
-err_out:
- if (!err)
- err = -EIO;
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(dir_ni);
- return ERR_MREF(err);
-dir_err_out:
- ntfs_error(sb, "Corrupt directory. Aborting lookup.");
- goto err_out;
-}
-
-#endif
-
-/**
- * ntfs_filldir - ntfs specific filldir method
- * @vol: current ntfs volume
- * @ndir: ntfs inode of current directory
- * @ia_page: page in which the index allocation buffer @ie is in resides
- * @ie: current index entry
- * @name: buffer to use for the converted name
- * @actor: what to feed the entries to
- *
- * Convert the Unicode @name to the loaded NLS and pass it to the @filldir
- * callback.
- *
- * If @ia_page is not NULL it is the locked page containing the index
- * allocation block containing the index entry @ie.
- *
- * Note, we drop (and then reacquire) the page lock on @ia_page across the
- * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup
- * since ntfs_lookup() will lock the same page. As an optimization, we do not
- * retake the lock if we are returning a non-zero value as ntfs_readdir()
- * would need to drop the lock immediately anyway.
- */
-static inline int ntfs_filldir(ntfs_volume *vol,
- ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie,
- u8 *name, struct dir_context *actor)
-{
- unsigned long mref;
- int name_len;
- unsigned dt_type;
- FILE_NAME_TYPE_FLAGS name_type;
-
- name_type = ie->key.file_name.file_name_type;
- if (name_type == FILE_NAME_DOS) {
- ntfs_debug("Skipping DOS name space entry.");
- return 0;
- }
- if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) {
- ntfs_debug("Skipping root directory self reference entry.");
- return 0;
- }
- if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user &&
- !NVolShowSystemFiles(vol)) {
- ntfs_debug("Skipping system file.");
- return 0;
- }
- name_len = ntfs_ucstonls(vol, (ntfschar*)&ie->key.file_name.file_name,
- ie->key.file_name.file_name_length, &name,
- NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
- if (name_len <= 0) {
- ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
- (long long)MREF_LE(ie->data.dir.indexed_file));
- return 0;
- }
- if (ie->key.file_name.file_attributes &
- FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT)
- dt_type = DT_DIR;
- else
- dt_type = DT_REG;
- mref = MREF_LE(ie->data.dir.indexed_file);
- /*
- * Drop the page lock otherwise we deadlock with NFS when it calls
- * ->lookup since ntfs_lookup() will lock the same page.
- */
- if (ia_page)
- unlock_page(ia_page);
- ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode "
- "0x%lx, DT_%s.", name, name_len, actor->pos, mref,
- dt_type == DT_DIR ? "DIR" : "REG");
- if (!dir_emit(actor, name, name_len, mref, dt_type))
- return 1;
- /* Relock the page but not if we are aborting ->readdir. */
- if (ia_page)
- lock_page(ia_page);
- return 0;
-}
-
-/*
- * We use the same basic approach as the old NTFS driver, i.e. we parse the
- * index root entries and then the index allocation entries that are marked
- * as in use in the index bitmap.
- *
- * While this will return the names in random order this doesn't matter for
- * ->readdir but OTOH results in a faster ->readdir.
- *
- * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS
- * parts (e.g. ->f_pos and ->i_size, and it also protects against directory
- * modifications).
- *
- * Locking: - Caller must hold i_mutex on the directory.
- * - Each page cache page in the index allocation mapping must be
- * locked whilst being accessed otherwise we may find a corrupt
- * page due to it being under ->writepage at the moment which
- * applies the mst protection fixups before writing out and then
- * removes them again after the write is complete after which it
- * unlocks the page.
- */
-static int ntfs_readdir(struct file *file, struct dir_context *actor)
-{
- s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
- loff_t i_size;
- struct inode *bmp_vi, *vdir = file_inode(file);
- struct super_block *sb = vdir->i_sb;
- ntfs_inode *ndir = NTFS_I(vdir);
- ntfs_volume *vol = NTFS_SB(sb);
- MFT_RECORD *m;
- INDEX_ROOT *ir = NULL;
- INDEX_ENTRY *ie;
- INDEX_ALLOCATION *ia;
- u8 *name = NULL;
- int rc, err, ir_pos, cur_bmp_pos;
- struct address_space *ia_mapping, *bmp_mapping;
- struct page *bmp_page = NULL, *ia_page = NULL;
- u8 *kaddr, *bmp, *index_end;
- ntfs_attr_search_ctx *ctx;
-
- ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.",
- vdir->i_ino, actor->pos);
- rc = err = 0;
- /* Are we at end of dir yet? */
- i_size = i_size_read(vdir);
- if (actor->pos >= i_size + vol->mft_record_size)
- return 0;
- /* Emulate . and .. for all directories. */
- if (!dir_emit_dots(file, actor))
- return 0;
- m = NULL;
- ctx = NULL;
- /*
- * Allocate a buffer to store the current name being processed
- * converted to format determined by current NLS.
- */
- name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
- if (unlikely(!name)) {
- err = -ENOMEM;
- goto err_out;
- }
- /* Are we jumping straight into the index allocation attribute? */
- if (actor->pos >= vol->mft_record_size)
- goto skip_index_root;
- /* Get hold of the mft record for the directory. */
- m = map_mft_record(ndir);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(ndir, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- /* Get the offset into the index root attribute. */
- ir_pos = (s64)actor->pos;
- /* Find the index root attribute in the mft record. */
- err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
- 0, ctx);
- if (unlikely(err)) {
- ntfs_error(sb, "Index root attribute missing in directory "
- "inode 0x%lx.", vdir->i_ino);
- goto err_out;
- }
- /*
- * Copy the index root attribute value to a buffer so that we can put
- * the search context and unmap the mft record before calling the
- * filldir() callback. We need to do this because of NFSd which calls
- * ->lookup() from its filldir callback() and this causes NTFS to
- * deadlock as ntfs_lookup() maps the mft record of the directory and
- * we have got it mapped here already. The only solution is for us to
- * unmap the mft record here so that a call to ntfs_lookup() is able to
- * map the mft record without deadlocking.
- */
- rc = le32_to_cpu(ctx->attr->data.resident.value_length);
- ir = kmalloc(rc, GFP_NOFS);
- if (unlikely(!ir)) {
- err = -ENOMEM;
- goto err_out;
- }
- /* Copy the index root value (it has been verified in read_inode). */
- memcpy(ir, (u8*)ctx->attr +
- le16_to_cpu(ctx->attr->data.resident.value_offset), rc);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ndir);
- ctx = NULL;
- m = NULL;
- index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
- /* The first index entry. */
- ie = (INDEX_ENTRY*)((u8*)&ir->index +
- le32_to_cpu(ir->index.entries_offset));
- /*
- * Loop until we exceed valid memory (corruption case) or until we
- * reach the last entry or until filldir tells us it has had enough
- * or signals an error (both covered by the rc test).
- */
- for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
- ntfs_debug("In index root, offset 0x%zx.", (u8*)ie - (u8*)ir);
- /* Bounds checks. */
- if (unlikely((u8*)ie < (u8*)ir || (u8*)ie +
- sizeof(INDEX_ENTRY_HEADER) > index_end ||
- (u8*)ie + le16_to_cpu(ie->key_length) >
- index_end))
- goto err_out;
- /* The last entry cannot contain a name. */
- if (ie->flags & INDEX_ENTRY_END)
- break;
- /* Skip index root entry if continuing previous readdir. */
- if (ir_pos > (u8*)ie - (u8*)ir)
- continue;
- /* Advance the position even if going to skip the entry. */
- actor->pos = (u8*)ie - (u8*)ir;
- /* Submit the name to the filldir callback. */
- rc = ntfs_filldir(vol, ndir, NULL, ie, name, actor);
- if (rc) {
- kfree(ir);
- goto abort;
- }
- }
- /* We are done with the index root and can free the buffer. */
- kfree(ir);
- ir = NULL;
- /* If there is no index allocation attribute we are finished. */
- if (!NInoIndexAllocPresent(ndir))
- goto EOD;
- /* Advance fpos to the beginning of the index allocation. */
- actor->pos = vol->mft_record_size;
-skip_index_root:
- kaddr = NULL;
- prev_ia_pos = -1LL;
- /* Get the offset into the index allocation attribute. */
- ia_pos = (s64)actor->pos - vol->mft_record_size;
- ia_mapping = vdir->i_mapping;
- ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino);
- bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4);
- if (IS_ERR(bmp_vi)) {
- ntfs_error(sb, "Failed to get bitmap attribute.");
- err = PTR_ERR(bmp_vi);
- goto err_out;
- }
- bmp_mapping = bmp_vi->i_mapping;
- /* Get the starting bitmap bit position and sanity check it. */
- bmp_pos = ia_pos >> ndir->itype.index.block_size_bits;
- if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
- ntfs_error(sb, "Current index allocation position exceeds "
- "index bitmap size.");
- goto iput_err_out;
- }
- /* Get the starting bit position in the current bitmap page. */
- cur_bmp_pos = bmp_pos & ((PAGE_SIZE * 8) - 1);
- bmp_pos &= ~(u64)((PAGE_SIZE * 8) - 1);
-get_next_bmp_page:
- ntfs_debug("Reading bitmap with page index 0x%llx, bit ofs 0x%llx",
- (unsigned long long)bmp_pos >> (3 + PAGE_SHIFT),
- (unsigned long long)bmp_pos &
- (unsigned long long)((PAGE_SIZE * 8) - 1));
- bmp_page = ntfs_map_page(bmp_mapping,
- bmp_pos >> (3 + PAGE_SHIFT));
- if (IS_ERR(bmp_page)) {
- ntfs_error(sb, "Reading index bitmap failed.");
- err = PTR_ERR(bmp_page);
- bmp_page = NULL;
- goto iput_err_out;
- }
- bmp = (u8*)page_address(bmp_page);
- /* Find next index block in use. */
- while (!(bmp[cur_bmp_pos >> 3] & (1 << (cur_bmp_pos & 7)))) {
-find_next_index_buffer:
- cur_bmp_pos++;
- /*
- * If we have reached the end of the bitmap page, get the next
- * page, and put away the old one.
- */
- if (unlikely((cur_bmp_pos >> 3) >= PAGE_SIZE)) {
- ntfs_unmap_page(bmp_page);
- bmp_pos += PAGE_SIZE * 8;
- cur_bmp_pos = 0;
- goto get_next_bmp_page;
- }
- /* If we have reached the end of the bitmap, we are done. */
- if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= i_size))
- goto unm_EOD;
- ia_pos = (bmp_pos + cur_bmp_pos) <<
- ndir->itype.index.block_size_bits;
- }
- ntfs_debug("Handling index buffer 0x%llx.",
- (unsigned long long)bmp_pos + cur_bmp_pos);
- /* If the current index buffer is in the same page we reuse the page. */
- if ((prev_ia_pos & (s64)PAGE_MASK) !=
- (ia_pos & (s64)PAGE_MASK)) {
- prev_ia_pos = ia_pos;
- if (likely(ia_page != NULL)) {
- unlock_page(ia_page);
- ntfs_unmap_page(ia_page);
- }
- /*
- * Map the page cache page containing the current ia_pos,
- * reading it from disk if necessary.
- */
- ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_SHIFT);
- if (IS_ERR(ia_page)) {
- ntfs_error(sb, "Reading index allocation data failed.");
- err = PTR_ERR(ia_page);
- ia_page = NULL;
- goto err_out;
- }
- lock_page(ia_page);
- kaddr = (u8*)page_address(ia_page);
- }
- /* Get the current index buffer. */
- ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_MASK &
- ~(s64)(ndir->itype.index.block_size - 1)));
- /* Bounds checks. */
- if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE)) {
- ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
- "inode 0x%lx or driver bug.", vdir->i_ino);
- goto err_out;
- }
- /* Catch multi sector transfer fixup errors. */
- if (unlikely(!ntfs_is_indx_record(ia->magic))) {
- ntfs_error(sb, "Directory index record with vcn 0x%llx is "
- "corrupt. Corrupt inode 0x%lx. Run chkdsk.",
- (unsigned long long)ia_pos >>
- ndir->itype.index.vcn_size_bits, vdir->i_ino);
- goto err_out;
- }
- if (unlikely(sle64_to_cpu(ia->index_block_vcn) != (ia_pos &
- ~(s64)(ndir->itype.index.block_size - 1)) >>
- ndir->itype.index.vcn_size_bits)) {
- ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
- "different from expected VCN (0x%llx). "
- "Directory inode 0x%lx is corrupt or driver "
- "bug. ", (unsigned long long)
- sle64_to_cpu(ia->index_block_vcn),
- (unsigned long long)ia_pos >>
- ndir->itype.index.vcn_size_bits, vdir->i_ino);
- goto err_out;
- }
- if (unlikely(le32_to_cpu(ia->index.allocated_size) + 0x18 !=
- ndir->itype.index.block_size)) {
- ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
- "0x%lx has a size (%u) differing from the "
- "directory specified size (%u). Directory "
- "inode is corrupt or driver bug.",
- (unsigned long long)ia_pos >>
- ndir->itype.index.vcn_size_bits, vdir->i_ino,
- le32_to_cpu(ia->index.allocated_size) + 0x18,
- ndir->itype.index.block_size);
- goto err_out;
- }
- index_end = (u8*)ia + ndir->itype.index.block_size;
- if (unlikely(index_end > kaddr + PAGE_SIZE)) {
- ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
- "0x%lx crosses page boundary. Impossible! "
- "Cannot access! This is probably a bug in the "
- "driver.", (unsigned long long)ia_pos >>
- ndir->itype.index.vcn_size_bits, vdir->i_ino);
- goto err_out;
- }
- ia_start = ia_pos & ~(s64)(ndir->itype.index.block_size - 1);
- index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
- if (unlikely(index_end > (u8*)ia + ndir->itype.index.block_size)) {
- ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
- "inode 0x%lx exceeds maximum size.",
- (unsigned long long)ia_pos >>
- ndir->itype.index.vcn_size_bits, vdir->i_ino);
- goto err_out;
- }
- /* The first index entry in this index buffer. */
- ie = (INDEX_ENTRY*)((u8*)&ia->index +
- le32_to_cpu(ia->index.entries_offset));
- /*
- * Loop until we exceed valid memory (corruption case) or until we
- * reach the last entry or until filldir tells us it has had enough
- * or signals an error (both covered by the rc test).
- */
- for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
- ntfs_debug("In index allocation, offset 0x%llx.",
- (unsigned long long)ia_start +
- (unsigned long long)((u8*)ie - (u8*)ia));
- /* Bounds checks. */
- if (unlikely((u8*)ie < (u8*)ia || (u8*)ie +
- sizeof(INDEX_ENTRY_HEADER) > index_end ||
- (u8*)ie + le16_to_cpu(ie->key_length) >
- index_end))
- goto err_out;
- /* The last entry cannot contain a name. */
- if (ie->flags & INDEX_ENTRY_END)
- break;
- /* Skip index block entry if continuing previous readdir. */
- if (ia_pos - ia_start > (u8*)ie - (u8*)ia)
- continue;
- /* Advance the position even if going to skip the entry. */
- actor->pos = (u8*)ie - (u8*)ia +
- (sle64_to_cpu(ia->index_block_vcn) <<
- ndir->itype.index.vcn_size_bits) +
- vol->mft_record_size;
- /*
- * Submit the name to the @filldir callback. Note,
- * ntfs_filldir() drops the lock on @ia_page but it retakes it
- * before returning, unless a non-zero value is returned in
- * which case the page is left unlocked.
- */
- rc = ntfs_filldir(vol, ndir, ia_page, ie, name, actor);
- if (rc) {
- /* @ia_page is already unlocked in this case. */
- ntfs_unmap_page(ia_page);
- ntfs_unmap_page(bmp_page);
- iput(bmp_vi);
- goto abort;
- }
- }
- goto find_next_index_buffer;
-unm_EOD:
- if (ia_page) {
- unlock_page(ia_page);
- ntfs_unmap_page(ia_page);
- }
- ntfs_unmap_page(bmp_page);
- iput(bmp_vi);
-EOD:
- /* We are finished, set fpos to EOD. */
- actor->pos = i_size + vol->mft_record_size;
-abort:
- kfree(name);
- return 0;
-err_out:
- if (bmp_page) {
- ntfs_unmap_page(bmp_page);
-iput_err_out:
- iput(bmp_vi);
- }
- if (ia_page) {
- unlock_page(ia_page);
- ntfs_unmap_page(ia_page);
- }
- kfree(ir);
- kfree(name);
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(ndir);
- if (!err)
- err = -EIO;
- ntfs_debug("Failed. Returning error code %i.", -err);
- return err;
-}
-
-/**
- * ntfs_dir_open - called when an inode is about to be opened
- * @vi: inode to be opened
- * @filp: file structure describing the inode
- *
- * Limit directory size to the page cache limit on architectures where unsigned
- * long is 32-bits. This is the most we can do for now without overflowing the
- * page cache page index. Doing it this way means we don't run into problems
- * because of existing too large directories. It would be better to allow the
- * user to read the accessible part of the directory but I doubt very much
- * anyone is going to hit this check on a 32-bit architecture, so there is no
- * point in adding the extra complexity required to support this.
- *
- * On 64-bit architectures, the check is hopefully optimized away by the
- * compiler.
- */
-static int ntfs_dir_open(struct inode *vi, struct file *filp)
-{
- if (sizeof(unsigned long) < 8) {
- if (i_size_read(vi) > MAX_LFS_FILESIZE)
- return -EFBIG;
- }
- return 0;
-}
-
-#ifdef NTFS_RW
-
-/**
- * ntfs_dir_fsync - sync a directory to disk
- * @filp: directory to be synced
- * @start: offset in bytes of the beginning of data range to sync
- * @end: offset in bytes of the end of data range (inclusive)
- * @datasync: if non-zero only flush user data and not metadata
- *
- * Data integrity sync of a directory to disk. Used for fsync, fdatasync, and
- * msync system calls. This function is based on file.c::ntfs_file_fsync().
- *
- * Write the mft record and all associated extent mft records as well as the
- * $INDEX_ALLOCATION and $BITMAP attributes and then sync the block device.
- *
- * If @datasync is true, we do not wait on the inode(s) to be written out
- * but we always wait on the page cache pages to be written out.
- *
- * Note: In the past @filp could be NULL so we ignore it as we don't need it
- * anyway.
- *
- * Locking: Caller must hold i_mutex on the inode.
- *
- * TODO: We should probably also write all attribute/index inodes associated
- * with this inode but since we have no simple way of getting to them we ignore
- * this problem for now. We do write the $BITMAP attribute if it is present
- * which is the important one for a directory so things are not too bad.
- */
-static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
- int datasync)
-{
- struct inode *bmp_vi, *vi = filp->f_mapping->host;
- int err, ret;
- ntfs_attr na;
-
- ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
-
- err = file_write_and_wait_range(filp, start, end);
- if (err)
- return err;
- inode_lock(vi);
-
- BUG_ON(!S_ISDIR(vi->i_mode));
- /* If the bitmap attribute inode is in memory sync it, too. */
- na.mft_no = vi->i_ino;
- na.type = AT_BITMAP;
- na.name = I30;
- na.name_len = 4;
- bmp_vi = ilookup5(vi->i_sb, vi->i_ino, ntfs_test_inode, &na);
- if (bmp_vi) {
- write_inode_now(bmp_vi, !datasync);
- iput(bmp_vi);
- }
- ret = __ntfs_write_inode(vi, 1);
- write_inode_now(vi, !datasync);
- err = sync_blockdev(vi->i_sb->s_bdev);
- if (unlikely(err && !ret))
- ret = err;
- if (likely(!ret))
- ntfs_debug("Done.");
- else
- ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
- "%u.", datasync ? "data" : "", vi->i_ino, -ret);
- inode_unlock(vi);
- return ret;
-}
-
-#endif /* NTFS_RW */
-
-WRAP_DIR_ITER(ntfs_readdir) // FIXME!
-const struct file_operations ntfs_dir_ops = {
- .llseek = generic_file_llseek, /* Seek inside directory. */
- .read = generic_read_dir, /* Return -EISDIR. */
- .iterate_shared = shared_ntfs_readdir, /* Read directory contents. */
-#ifdef NTFS_RW
- .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */
-#endif /* NTFS_RW */
- /*.ioctl = ,*/ /* Perform function on the
- mounted filesystem. */
- .open = ntfs_dir_open, /* Open directory. */
-};
diff --git a/fs/ntfs/dir.h b/fs/ntfs/dir.h
deleted file mode 100644
index 0e326753df40..000000000000
--- a/fs/ntfs/dir.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * dir.h - Defines for directory handling in NTFS Linux kernel driver. Part of
- * the Linux-NTFS project.
- *
- * Copyright (c) 2002-2004 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_DIR_H
-#define _LINUX_NTFS_DIR_H
-
-#include "layout.h"
-#include "inode.h"
-#include "types.h"
-
-/*
- * ntfs_name is used to return the file name to the caller of
- * ntfs_lookup_inode_by_name() in order for the caller (namei.c::ntfs_lookup())
- * to be able to deal with dcache aliasing issues.
- */
-typedef struct {
- MFT_REF mref;
- FILE_NAME_TYPE_FLAGS type;
- u8 len;
- ntfschar name[0];
-} __attribute__ ((__packed__)) ntfs_name;
-
-/* The little endian Unicode string $I30 as a global constant. */
-extern ntfschar I30[5];
-
-extern MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni,
- const ntfschar *uname, const int uname_len, ntfs_name **res);
-
-#endif /* _LINUX_NTFS_FS_DIR_H */
diff --git a/fs/ntfs/endian.h b/fs/ntfs/endian.h
deleted file mode 100644
index f30c139bf9ae..000000000000
--- a/fs/ntfs/endian.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * endian.h - Defines for endianness handling in NTFS Linux kernel driver.
- * Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2004 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_ENDIAN_H
-#define _LINUX_NTFS_ENDIAN_H
-
-#include <asm/byteorder.h>
-#include "types.h"
-
-/*
- * Signed endianness conversion functions.
- */
-
-static inline s16 sle16_to_cpu(sle16 x)
-{
- return le16_to_cpu((__force le16)x);
-}
-
-static inline s32 sle32_to_cpu(sle32 x)
-{
- return le32_to_cpu((__force le32)x);
-}
-
-static inline s64 sle64_to_cpu(sle64 x)
-{
- return le64_to_cpu((__force le64)x);
-}
-
-static inline s16 sle16_to_cpup(sle16 *x)
-{
- return le16_to_cpu(*(__force le16*)x);
-}
-
-static inline s32 sle32_to_cpup(sle32 *x)
-{
- return le32_to_cpu(*(__force le32*)x);
-}
-
-static inline s64 sle64_to_cpup(sle64 *x)
-{
- return le64_to_cpu(*(__force le64*)x);
-}
-
-static inline sle16 cpu_to_sle16(s16 x)
-{
- return (__force sle16)cpu_to_le16(x);
-}
-
-static inline sle32 cpu_to_sle32(s32 x)
-{
- return (__force sle32)cpu_to_le32(x);
-}
-
-static inline sle64 cpu_to_sle64(s64 x)
-{
- return (__force sle64)cpu_to_le64(x);
-}
-
-static inline sle16 cpu_to_sle16p(s16 *x)
-{
- return (__force sle16)cpu_to_le16(*x);
-}
-
-static inline sle32 cpu_to_sle32p(s32 *x)
-{
- return (__force sle32)cpu_to_le32(*x);
-}
-
-static inline sle64 cpu_to_sle64p(s64 *x)
-{
- return (__force sle64)cpu_to_le64(*x);
-}
-
-#endif /* _LINUX_NTFS_ENDIAN_H */
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
deleted file mode 100644
index 297c0b9db621..000000000000
--- a/fs/ntfs/file.c
+++ /dev/null
@@ -1,1997 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
- */
-
-#include <linux/blkdev.h>
-#include <linux/backing-dev.h>
-#include <linux/buffer_head.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/pagevec.h>
-#include <linux/sched/signal.h>
-#include <linux/swap.h>
-#include <linux/uio.h>
-#include <linux/writeback.h>
-
-#include <asm/page.h>
-#include <linux/uaccess.h>
-
-#include "attrib.h"
-#include "bitmap.h"
-#include "inode.h"
-#include "debug.h"
-#include "lcnalloc.h"
-#include "malloc.h"
-#include "mft.h"
-#include "ntfs.h"
-
-/**
- * ntfs_file_open - called when an inode is about to be opened
- * @vi: inode to be opened
- * @filp: file structure describing the inode
- *
- * Limit file size to the page cache limit on architectures where unsigned long
- * is 32-bits. This is the most we can do for now without overflowing the page
- * cache page index. Doing it this way means we don't run into problems because
- * of existing too large files. It would be better to allow the user to read
- * the beginning of the file but I doubt very much anyone is going to hit this
- * check on a 32-bit architecture, so there is no point in adding the extra
- * complexity required to support this.
- *
- * On 64-bit architectures, the check is hopefully optimized away by the
- * compiler.
- *
- * After the check passes, just call generic_file_open() to do its work.
- */
-static int ntfs_file_open(struct inode *vi, struct file *filp)
-{
- if (sizeof(unsigned long) < 8) {
- if (i_size_read(vi) > MAX_LFS_FILESIZE)
- return -EOVERFLOW;
- }
- return generic_file_open(vi, filp);
-}
-
-#ifdef NTFS_RW
-
-/**
- * ntfs_attr_extend_initialized - extend the initialized size of an attribute
- * @ni: ntfs inode of the attribute to extend
- * @new_init_size: requested new initialized size in bytes
- *
- * Extend the initialized size of an attribute described by the ntfs inode @ni
- * to @new_init_size bytes. This involves zeroing any non-sparse space between
- * the old initialized size and @new_init_size both in the page cache and on
- * disk (if relevant complete pages are already uptodate in the page cache then
- * these are simply marked dirty).
- *
- * As a side-effect, the file size (vfs inode->i_size) may be incremented as,
- * in the resident attribute case, it is tied to the initialized size and, in
- * the non-resident attribute case, it may not fall below the initialized size.
- *
- * Note that if the attribute is resident, we do not need to touch the page
- * cache at all. This is because if the page cache page is not uptodate we
- * bring it uptodate later, when doing the write to the mft record since we
- * then already have the page mapped. And if the page is uptodate, the
- * non-initialized region will already have been zeroed when the page was
- * brought uptodate and the region may in fact already have been overwritten
- * with new data via mmap() based writes, so we cannot just zero it. And since
- * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped
- * is unspecified, we choose not to do zeroing and thus we do not need to touch
- * the page at all. For a more detailed explanation see ntfs_truncate() in
- * fs/ntfs/inode.c.
- *
- * Return 0 on success and -errno on error. In the case that an error is
- * encountered it is possible that the initialized size will already have been
- * incremented some way towards @new_init_size but it is guaranteed that if
- * this is the case, the necessary zeroing will also have happened and that all
- * metadata is self-consistent.
- *
- * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be
- * held by the caller.
- */
-static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size)
-{
- s64 old_init_size;
- loff_t old_i_size;
- pgoff_t index, end_index;
- unsigned long flags;
- struct inode *vi = VFS_I(ni);
- ntfs_inode *base_ni;
- MFT_RECORD *m = NULL;
- ATTR_RECORD *a;
- ntfs_attr_search_ctx *ctx = NULL;
- struct address_space *mapping;
- struct page *page = NULL;
- u8 *kattr;
- int err;
- u32 attr_len;
-
- read_lock_irqsave(&ni->size_lock, flags);
- old_init_size = ni->initialized_size;
- old_i_size = i_size_read(vi);
- BUG_ON(new_init_size > ni->allocated_size);
- read_unlock_irqrestore(&ni->size_lock, flags);
- ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
- "old_initialized_size 0x%llx, "
- "new_initialized_size 0x%llx, i_size 0x%llx.",
- vi->i_ino, (unsigned)le32_to_cpu(ni->type),
- (unsigned long long)old_init_size,
- (unsigned long long)new_init_size, old_i_size);
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- /* Use goto to reduce indentation and we need the label below anyway. */
- if (NInoNonResident(ni))
- goto do_non_resident_extend;
- BUG_ON(old_init_size != old_i_size);
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- m = ctx->mrec;
- a = ctx->attr;
- BUG_ON(a->non_resident);
- /* The total length of the attribute value. */
- attr_len = le32_to_cpu(a->data.resident.value_length);
- BUG_ON(old_i_size != (loff_t)attr_len);
- /*
- * Do the zeroing in the mft record and update the attribute size in
- * the mft record.
- */
- kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
- memset(kattr + attr_len, 0, new_init_size - attr_len);
- a->data.resident.value_length = cpu_to_le32((u32)new_init_size);
- /* Finally, update the sizes in the vfs and ntfs inodes. */
- write_lock_irqsave(&ni->size_lock, flags);
- i_size_write(vi, new_init_size);
- ni->initialized_size = new_init_size;
- write_unlock_irqrestore(&ni->size_lock, flags);
- goto done;
-do_non_resident_extend:
- /*
- * If the new initialized size @new_init_size exceeds the current file
- * size (vfs inode->i_size), we need to extend the file size to the
- * new initialized size.
- */
- if (new_init_size > old_i_size) {
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- m = ctx->mrec;
- a = ctx->attr;
- BUG_ON(!a->non_resident);
- BUG_ON(old_i_size != (loff_t)
- sle64_to_cpu(a->data.non_resident.data_size));
- a->data.non_resident.data_size = cpu_to_sle64(new_init_size);
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- /* Update the file size in the vfs inode. */
- i_size_write(vi, new_init_size);
- ntfs_attr_put_search_ctx(ctx);
- ctx = NULL;
- unmap_mft_record(base_ni);
- m = NULL;
- }
- mapping = vi->i_mapping;
- index = old_init_size >> PAGE_SHIFT;
- end_index = (new_init_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- do {
- /*
- * Read the page. If the page is not present, this will zero
- * the uninitialized regions for us.
- */
- page = read_mapping_page(mapping, index, NULL);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto init_err_out;
- }
- /*
- * Update the initialized size in the ntfs inode. This is
- * enough to make ntfs_writepage() work.
- */
- write_lock_irqsave(&ni->size_lock, flags);
- ni->initialized_size = (s64)(index + 1) << PAGE_SHIFT;
- if (ni->initialized_size > new_init_size)
- ni->initialized_size = new_init_size;
- write_unlock_irqrestore(&ni->size_lock, flags);
- /* Set the page dirty so it gets written out. */
- set_page_dirty(page);
- put_page(page);
- /*
- * Play nice with the vm and the rest of the system. This is
- * very much needed as we can potentially be modifying the
- * initialised size from a very small value to a really huge
- * value, e.g.
- * f = open(somefile, O_TRUNC);
- * truncate(f, 10GiB);
- * seek(f, 10GiB);
- * write(f, 1);
- * And this would mean we would be marking dirty hundreds of
- * thousands of pages or as in the above example more than
- * two and a half million pages!
- *
- * TODO: For sparse pages could optimize this workload by using
- * the FsMisc / MiscFs page bit as a "PageIsSparse" bit. This
- * would be set in read_folio for sparse pages and here we would
- * not need to mark dirty any pages which have this bit set.
- * The only caveat is that we have to clear the bit everywhere
- * where we allocate any clusters that lie in the page or that
- * contain the page.
- *
- * TODO: An even greater optimization would be for us to only
- * call read_folio() on pages which are not in sparse regions as
- * determined from the runlist. This would greatly reduce the
- * number of pages we read and make dirty in the case of sparse
- * files.
- */
- balance_dirty_pages_ratelimited(mapping);
- cond_resched();
- } while (++index < end_index);
- read_lock_irqsave(&ni->size_lock, flags);
- BUG_ON(ni->initialized_size != new_init_size);
- read_unlock_irqrestore(&ni->size_lock, flags);
- /* Now bring in sync the initialized_size in the mft record. */
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- goto init_err_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto init_err_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto init_err_out;
- }
- m = ctx->mrec;
- a = ctx->attr;
- BUG_ON(!a->non_resident);
- a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size);
-done:
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
- ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.",
- (unsigned long long)new_init_size, i_size_read(vi));
- return 0;
-init_err_out:
- write_lock_irqsave(&ni->size_lock, flags);
- ni->initialized_size = old_init_size;
- write_unlock_irqrestore(&ni->size_lock, flags);
-err_out:
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
- ntfs_debug("Failed. Returning error code %i.", err);
- return err;
-}
-
-static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb,
- struct iov_iter *from)
-{
- loff_t pos;
- s64 end, ll;
- ssize_t err;
- unsigned long flags;
- struct file *file = iocb->ki_filp;
- struct inode *vi = file_inode(file);
- ntfs_inode *ni = NTFS_I(vi);
- ntfs_volume *vol = ni->vol;
-
- ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
- "0x%llx, count 0x%zx.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type),
- (unsigned long long)iocb->ki_pos,
- iov_iter_count(from));
- err = generic_write_checks(iocb, from);
- if (unlikely(err <= 0))
- goto out;
- /*
- * All checks have passed. Before we start doing any writing we want
- * to abort any totally illegal writes.
- */
- BUG_ON(NInoMstProtected(ni));
- BUG_ON(ni->type != AT_DATA);
- /* If file is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- /* Only $DATA attributes can be encrypted. */
- /*
- * Reminder for later: Encrypted files are _always_
- * non-resident so that the content can always be encrypted.
- */
- ntfs_debug("Denying write access to encrypted file.");
- err = -EACCES;
- goto out;
- }
- if (NInoCompressed(ni)) {
- /* Only unnamed $DATA attribute can be compressed. */
- BUG_ON(ni->name_len);
- /*
- * Reminder for later: If resident, the data is not actually
- * compressed. Only on the switch to non-resident does
- * compression kick in. This is in contrast to encrypted files
- * (see above).
- */
- ntfs_error(vi->i_sb, "Writing to compressed files is not "
- "implemented yet. Sorry.");
- err = -EOPNOTSUPP;
- goto out;
- }
- err = file_remove_privs(file);
- if (unlikely(err))
- goto out;
- /*
- * Our ->update_time method always succeeds thus file_update_time()
- * cannot fail either so there is no need to check the return code.
- */
- file_update_time(file);
- pos = iocb->ki_pos;
- /* The first byte after the last cluster being written to. */
- end = (pos + iov_iter_count(from) + vol->cluster_size_mask) &
- ~(u64)vol->cluster_size_mask;
- /*
- * If the write goes beyond the allocated size, extend the allocation
- * to cover the whole of the write, rounded up to the nearest cluster.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (end > ll) {
- /*
- * Extend the allocation without changing the data size.
- *
- * Note we ensure the allocation is big enough to at least
- * write some data but we do not require the allocation to be
- * complete, i.e. it may be partial.
- */
- ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
- if (likely(ll >= 0)) {
- BUG_ON(pos >= ll);
- /* If the extension was partial truncate the write. */
- if (end > ll) {
- ntfs_debug("Truncating write to inode 0x%lx, "
- "attribute type 0x%x, because "
- "the allocation was only "
- "partially extended.",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type));
- iov_iter_truncate(from, ll - pos);
- }
- } else {
- err = ll;
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- /* Perform a partial write if possible or fail. */
- if (pos < ll) {
- ntfs_debug("Truncating write to inode 0x%lx "
- "attribute type 0x%x, because "
- "extending the allocation "
- "failed (error %d).",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type),
- (int)-err);
- iov_iter_truncate(from, ll - pos);
- } else {
- if (err != -ENOSPC)
- ntfs_error(vi->i_sb, "Cannot perform "
- "write to inode "
- "0x%lx, attribute "
- "type 0x%x, because "
- "extending the "
- "allocation failed "
- "(error %ld).",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type),
- (long)-err);
- else
- ntfs_debug("Cannot perform write to "
- "inode 0x%lx, "
- "attribute type 0x%x, "
- "because there is not "
- "space left.",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type));
- goto out;
- }
- }
- }
- /*
- * If the write starts beyond the initialized size, extend it up to the
- * beginning of the write and initialize all non-sparse space between
- * the old initialized size and the new one. This automatically also
- * increments the vfs inode->i_size to keep it above or equal to the
- * initialized_size.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (pos > ll) {
- /*
- * Wait for ongoing direct i/o to complete before proceeding.
- * New direct i/o cannot start as we hold i_mutex.
- */
- inode_dio_wait(vi);
- err = ntfs_attr_extend_initialized(ni, pos);
- if (unlikely(err < 0))
- ntfs_error(vi->i_sb, "Cannot perform write to inode "
- "0x%lx, attribute type 0x%x, because "
- "extending the initialized size "
- "failed (error %d).", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type),
- (int)-err);
- }
-out:
- return err;
-}
-
-/**
- * __ntfs_grab_cache_pages - obtain a number of locked pages
- * @mapping: address space mapping from which to obtain page cache pages
- * @index: starting index in @mapping at which to begin obtaining pages
- * @nr_pages: number of page cache pages to obtain
- * @pages: array of pages in which to return the obtained page cache pages
- * @cached_page: allocated but as yet unused page
- *
- * Obtain @nr_pages locked page cache pages from the mapping @mapping and
- * starting at index @index.
- *
- * If a page is newly created, add it to lru list
- *
- * Note, the page locks are obtained in ascending page index order.
- */
-static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
- pgoff_t index, const unsigned nr_pages, struct page **pages,
- struct page **cached_page)
-{
- int err, nr;
-
- BUG_ON(!nr_pages);
- err = nr = 0;
- do {
- pages[nr] = find_get_page_flags(mapping, index, FGP_LOCK |
- FGP_ACCESSED);
- if (!pages[nr]) {
- if (!*cached_page) {
- *cached_page = page_cache_alloc(mapping);
- if (unlikely(!*cached_page)) {
- err = -ENOMEM;
- goto err_out;
- }
- }
- err = add_to_page_cache_lru(*cached_page, mapping,
- index,
- mapping_gfp_constraint(mapping, GFP_KERNEL));
- if (unlikely(err)) {
- if (err == -EEXIST)
- continue;
- goto err_out;
- }
- pages[nr] = *cached_page;
- *cached_page = NULL;
- }
- index++;
- nr++;
- } while (nr < nr_pages);
-out:
- return err;
-err_out:
- while (nr > 0) {
- unlock_page(pages[--nr]);
- put_page(pages[nr]);
- }
- goto out;
-}
-
-static inline void ntfs_submit_bh_for_read(struct buffer_head *bh)
-{
- lock_buffer(bh);
- get_bh(bh);
- bh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, bh);
-}
-
-/**
- * ntfs_prepare_pages_for_non_resident_write - prepare pages for receiving data
- * @pages: array of destination pages
- * @nr_pages: number of pages in @pages
- * @pos: byte position in file at which the write begins
- * @bytes: number of bytes to be written
- *
- * This is called for non-resident attributes from ntfs_file_buffered_write()
- * with i_mutex held on the inode (@pages[0]->mapping->host). There are
- * @nr_pages pages in @pages which are locked but not kmap()ped. The source
- * data has not yet been copied into the @pages.
- *
- * Need to fill any holes with actual clusters, allocate buffers if necessary,
- * ensure all the buffers are mapped, and bring uptodate any buffers that are
- * only partially being written to.
- *
- * If @nr_pages is greater than one, we are guaranteed that the cluster size is
- * greater than PAGE_SIZE, that all pages in @pages are entirely inside
- * the same cluster and that they are the entirety of that cluster, and that
- * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole.
- *
- * i_size is not to be modified yet.
- *
- * Return 0 on success or -errno on error.
- */
-static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
- unsigned nr_pages, s64 pos, size_t bytes)
-{
- VCN vcn, highest_vcn = 0, cpos, cend, bh_cpos, bh_cend;
- LCN lcn;
- s64 bh_pos, vcn_len, end, initialized_size;
- sector_t lcn_block;
- struct folio *folio;
- struct inode *vi;
- ntfs_inode *ni, *base_ni = NULL;
- ntfs_volume *vol;
- runlist_element *rl, *rl2;
- struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
- ntfs_attr_search_ctx *ctx = NULL;
- MFT_RECORD *m = NULL;
- ATTR_RECORD *a = NULL;
- unsigned long flags;
- u32 attr_rec_len = 0;
- unsigned blocksize, u;
- int err, mp_size;
- bool rl_write_locked, was_hole, is_retry;
- unsigned char blocksize_bits;
- struct {
- u8 runlist_merged:1;
- u8 mft_attr_mapped:1;
- u8 mp_rebuilt:1;
- u8 attr_switched:1;
- } status = { 0, 0, 0, 0 };
-
- BUG_ON(!nr_pages);
- BUG_ON(!pages);
- BUG_ON(!*pages);
- vi = pages[0]->mapping->host;
- ni = NTFS_I(vi);
- vol = ni->vol;
- ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
- "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
- vi->i_ino, ni->type, pages[0]->index, nr_pages,
- (long long)pos, bytes);
- blocksize = vol->sb->s_blocksize;
- blocksize_bits = vol->sb->s_blocksize_bits;
- rl_write_locked = false;
- rl = NULL;
- err = 0;
- vcn = lcn = -1;
- vcn_len = 0;
- lcn_block = -1;
- was_hole = false;
- cpos = pos >> vol->cluster_size_bits;
- end = pos + bytes;
- cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits;
- /*
- * Loop over each buffer in each folio. Use goto to
- * reduce indentation.
- */
- u = 0;
-do_next_folio:
- folio = page_folio(pages[u]);
- bh_pos = folio_pos(folio);
- head = folio_buffers(folio);
- if (!head)
- /*
- * create_empty_buffers() will create uptodate/dirty
- * buffers if the folio is uptodate/dirty.
- */
- head = create_empty_buffers(folio, blocksize, 0);
- bh = head;
- do {
- VCN cdelta;
- s64 bh_end;
- unsigned bh_cofs;
-
- /* Clear buffer_new on all buffers to reinitialise state. */
- if (buffer_new(bh))
- clear_buffer_new(bh);
- bh_end = bh_pos + blocksize;
- bh_cpos = bh_pos >> vol->cluster_size_bits;
- bh_cofs = bh_pos & vol->cluster_size_mask;
- if (buffer_mapped(bh)) {
- /*
- * The buffer is already mapped. If it is uptodate,
- * ignore it.
- */
- if (buffer_uptodate(bh))
- continue;
- /*
- * The buffer is not uptodate. If the folio is uptodate
- * set the buffer uptodate and otherwise ignore it.
- */
- if (folio_test_uptodate(folio)) {
- set_buffer_uptodate(bh);
- continue;
- }
- /*
- * Neither the folio nor the buffer are uptodate. If
- * the buffer is only partially being written to, we
- * need to read it in before the write, i.e. now.
- */
- if ((bh_pos < pos && bh_end > pos) ||
- (bh_pos < end && bh_end > end)) {
- /*
- * If the buffer is fully or partially within
- * the initialized size, do an actual read.
- * Otherwise, simply zero the buffer.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- initialized_size = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (bh_pos < initialized_size) {
- ntfs_submit_bh_for_read(bh);
- *wait_bh++ = bh;
- } else {
- folio_zero_range(folio, bh_offset(bh),
- blocksize);
- set_buffer_uptodate(bh);
- }
- }
- continue;
- }
- /* Unmapped buffer. Need to map it. */
- bh->b_bdev = vol->sb->s_bdev;
- /*
- * If the current buffer is in the same clusters as the map
- * cache, there is no need to check the runlist again. The
- * map cache is made up of @vcn, which is the first cached file
- * cluster, @vcn_len which is the number of cached file
- * clusters, @lcn is the device cluster corresponding to @vcn,
- * and @lcn_block is the block number corresponding to @lcn.
- */
- cdelta = bh_cpos - vcn;
- if (likely(!cdelta || (cdelta > 0 && cdelta < vcn_len))) {
-map_buffer_cached:
- BUG_ON(lcn < 0);
- bh->b_blocknr = lcn_block +
- (cdelta << (vol->cluster_size_bits -
- blocksize_bits)) +
- (bh_cofs >> blocksize_bits);
- set_buffer_mapped(bh);
- /*
- * If the folio is uptodate so is the buffer. If the
- * buffer is fully outside the write, we ignore it if
- * it was already allocated and we mark it dirty so it
- * gets written out if we allocated it. On the other
- * hand, if we allocated the buffer but we are not
- * marking it dirty we set buffer_new so we can do
- * error recovery.
- */
- if (folio_test_uptodate(folio)) {
- if (!buffer_uptodate(bh))
- set_buffer_uptodate(bh);
- if (unlikely(was_hole)) {
- /* We allocated the buffer. */
- clean_bdev_bh_alias(bh);
- if (bh_end <= pos || bh_pos >= end)
- mark_buffer_dirty(bh);
- else
- set_buffer_new(bh);
- }
- continue;
- }
- /* Page is _not_ uptodate. */
- if (likely(!was_hole)) {
- /*
- * Buffer was already allocated. If it is not
- * uptodate and is only partially being written
- * to, we need to read it in before the write,
- * i.e. now.
- */
- if (!buffer_uptodate(bh) && bh_pos < end &&
- bh_end > pos &&
- (bh_pos < pos ||
- bh_end > end)) {
- /*
- * If the buffer is fully or partially
- * within the initialized size, do an
- * actual read. Otherwise, simply zero
- * the buffer.
- */
- read_lock_irqsave(&ni->size_lock,
- flags);
- initialized_size = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock,
- flags);
- if (bh_pos < initialized_size) {
- ntfs_submit_bh_for_read(bh);
- *wait_bh++ = bh;
- } else {
- folio_zero_range(folio,
- bh_offset(bh),
- blocksize);
- set_buffer_uptodate(bh);
- }
- }
- continue;
- }
- /* We allocated the buffer. */
- clean_bdev_bh_alias(bh);
- /*
- * If the buffer is fully outside the write, zero it,
- * set it uptodate, and mark it dirty so it gets
- * written out. If it is partially being written to,
- * zero region surrounding the write but leave it to
- * commit write to do anything else. Finally, if the
- * buffer is fully being overwritten, do nothing.
- */
- if (bh_end <= pos || bh_pos >= end) {
- if (!buffer_uptodate(bh)) {
- folio_zero_range(folio, bh_offset(bh),
- blocksize);
- set_buffer_uptodate(bh);
- }
- mark_buffer_dirty(bh);
- continue;
- }
- set_buffer_new(bh);
- if (!buffer_uptodate(bh) &&
- (bh_pos < pos || bh_end > end)) {
- u8 *kaddr;
- unsigned pofs;
-
- kaddr = kmap_local_folio(folio, 0);
- if (bh_pos < pos) {
- pofs = bh_pos & ~PAGE_MASK;
- memset(kaddr + pofs, 0, pos - bh_pos);
- }
- if (bh_end > end) {
- pofs = end & ~PAGE_MASK;
- memset(kaddr + pofs, 0, bh_end - end);
- }
- kunmap_local(kaddr);
- flush_dcache_folio(folio);
- }
- continue;
- }
- /*
- * Slow path: this is the first buffer in the cluster. If it
- * is outside allocated size and is not uptodate, zero it and
- * set it uptodate.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- initialized_size = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (bh_pos > initialized_size) {
- if (folio_test_uptodate(folio)) {
- if (!buffer_uptodate(bh))
- set_buffer_uptodate(bh);
- } else if (!buffer_uptodate(bh)) {
- folio_zero_range(folio, bh_offset(bh),
- blocksize);
- set_buffer_uptodate(bh);
- }
- continue;
- }
- is_retry = false;
- if (!rl) {
- down_read(&ni->runlist.lock);
-retry_remap:
- rl = ni->runlist.rl;
- }
- if (likely(rl != NULL)) {
- /* Seek to element containing target cluster. */
- while (rl->length && rl[1].vcn <= bh_cpos)
- rl++;
- lcn = ntfs_rl_vcn_to_lcn(rl, bh_cpos);
- if (likely(lcn >= 0)) {
- /*
- * Successful remap, setup the map cache and
- * use that to deal with the buffer.
- */
- was_hole = false;
- vcn = bh_cpos;
- vcn_len = rl[1].vcn - vcn;
- lcn_block = lcn << (vol->cluster_size_bits -
- blocksize_bits);
- cdelta = 0;
- /*
- * If the number of remaining clusters touched
- * by the write is smaller or equal to the
- * number of cached clusters, unlock the
- * runlist as the map cache will be used from
- * now on.
- */
- if (likely(vcn + vcn_len >= cend)) {
- if (rl_write_locked) {
- up_write(&ni->runlist.lock);
- rl_write_locked = false;
- } else
- up_read(&ni->runlist.lock);
- rl = NULL;
- }
- goto map_buffer_cached;
- }
- } else
- lcn = LCN_RL_NOT_MAPPED;
- /*
- * If it is not a hole and not out of bounds, the runlist is
- * probably unmapped so try to map it now.
- */
- if (unlikely(lcn != LCN_HOLE && lcn != LCN_ENOENT)) {
- if (likely(!is_retry && lcn == LCN_RL_NOT_MAPPED)) {
- /* Attempt to map runlist. */
- if (!rl_write_locked) {
- /*
- * We need the runlist locked for
- * writing, so if it is locked for
- * reading relock it now and retry in
- * case it changed whilst we dropped
- * the lock.
- */
- up_read(&ni->runlist.lock);
- down_write(&ni->runlist.lock);
- rl_write_locked = true;
- goto retry_remap;
- }
- err = ntfs_map_runlist_nolock(ni, bh_cpos,
- NULL);
- if (likely(!err)) {
- is_retry = true;
- goto retry_remap;
- }
- /*
- * If @vcn is out of bounds, pretend @lcn is
- * LCN_ENOENT. As long as the buffer is out
- * of bounds this will work fine.
- */
- if (err == -ENOENT) {
- lcn = LCN_ENOENT;
- err = 0;
- goto rl_not_mapped_enoent;
- }
- } else
- err = -EIO;
- /* Failed to map the buffer, even after retrying. */
- bh->b_blocknr = -1;
- ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
- "attribute type 0x%x, vcn 0x%llx, "
- "vcn offset 0x%x, because its "
- "location on disk could not be "
- "determined%s (error code %i).",
- ni->mft_no, ni->type,
- (unsigned long long)bh_cpos,
- (unsigned)bh_pos &
- vol->cluster_size_mask,
- is_retry ? " even after retrying" : "",
- err);
- break;
- }
-rl_not_mapped_enoent:
- /*
- * The buffer is in a hole or out of bounds. We need to fill
- * the hole, unless the buffer is in a cluster which is not
- * touched by the write, in which case we just leave the buffer
- * unmapped. This can only happen when the cluster size is
- * less than the page cache size.
- */
- if (unlikely(vol->cluster_size < PAGE_SIZE)) {
- bh_cend = (bh_end + vol->cluster_size - 1) >>
- vol->cluster_size_bits;
- if ((bh_cend <= cpos || bh_cpos >= cend)) {
- bh->b_blocknr = -1;
- /*
- * If the buffer is uptodate we skip it. If it
- * is not but the folio is uptodate, we can set
- * the buffer uptodate. If the folio is not
- * uptodate, we can clear the buffer and set it
- * uptodate. Whether this is worthwhile is
- * debatable and this could be removed.
- */
- if (folio_test_uptodate(folio)) {
- if (!buffer_uptodate(bh))
- set_buffer_uptodate(bh);
- } else if (!buffer_uptodate(bh)) {
- folio_zero_range(folio, bh_offset(bh),
- blocksize);
- set_buffer_uptodate(bh);
- }
- continue;
- }
- }
- /*
- * Out of bounds buffer is invalid if it was not really out of
- * bounds.
- */
- BUG_ON(lcn != LCN_HOLE);
- /*
- * We need the runlist locked for writing, so if it is locked
- * for reading relock it now and retry in case it changed
- * whilst we dropped the lock.
- */
- BUG_ON(!rl);
- if (!rl_write_locked) {
- up_read(&ni->runlist.lock);
- down_write(&ni->runlist.lock);
- rl_write_locked = true;
- goto retry_remap;
- }
- /* Find the previous last allocated cluster. */
- BUG_ON(rl->lcn != LCN_HOLE);
- lcn = -1;
- rl2 = rl;
- while (--rl2 >= ni->runlist.rl) {
- if (rl2->lcn >= 0) {
- lcn = rl2->lcn + rl2->length;
- break;
- }
- }
- rl2 = ntfs_cluster_alloc(vol, bh_cpos, 1, lcn, DATA_ZONE,
- false);
- if (IS_ERR(rl2)) {
- err = PTR_ERR(rl2);
- ntfs_debug("Failed to allocate cluster, error code %i.",
- err);
- break;
- }
- lcn = rl2->lcn;
- rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
- if (IS_ERR(rl)) {
- err = PTR_ERR(rl);
- if (err != -ENOMEM)
- err = -EIO;
- if (ntfs_cluster_free_from_rl(vol, rl2)) {
- ntfs_error(vol->sb, "Failed to release "
- "allocated cluster in error "
- "code path. Run chkdsk to "
- "recover the lost cluster.");
- NVolSetErrors(vol);
- }
- ntfs_free(rl2);
- break;
- }
- ni->runlist.rl = rl;
- status.runlist_merged = 1;
- ntfs_debug("Allocated cluster, lcn 0x%llx.",
- (unsigned long long)lcn);
- /* Map and lock the mft record and get the attribute record. */
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- break;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- unmap_mft_record(base_ni);
- break;
- }
- status.mft_attr_mapped = 1;
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, bh_cpos, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- break;
- }
- m = ctx->mrec;
- a = ctx->attr;
- /*
- * Find the runlist element with which the attribute extent
- * starts. Note, we cannot use the _attr_ version because we
- * have mapped the mft record. That is ok because we know the
- * runlist fragment must be mapped already to have ever gotten
- * here, so we can just use the _rl_ version.
- */
- vcn = sle64_to_cpu(a->data.non_resident.lowest_vcn);
- rl2 = ntfs_rl_find_vcn_nolock(rl, vcn);
- BUG_ON(!rl2);
- BUG_ON(!rl2->length);
- BUG_ON(rl2->lcn < LCN_HOLE);
- highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
- /*
- * If @highest_vcn is zero, calculate the real highest_vcn
- * (which can really be zero).
- */
- if (!highest_vcn)
- highest_vcn = (sle64_to_cpu(
- a->data.non_resident.allocated_size) >>
- vol->cluster_size_bits) - 1;
- /*
- * Determine the size of the mapping pairs array for the new
- * extent, i.e. the old extent with the hole filled.
- */
- mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, vcn,
- highest_vcn);
- if (unlikely(mp_size <= 0)) {
- if (!(err = mp_size))
- err = -EIO;
- ntfs_debug("Failed to get size for mapping pairs "
- "array, error code %i.", err);
- break;
- }
- /*
- * Resize the attribute record to fit the new mapping pairs
- * array.
- */
- attr_rec_len = le32_to_cpu(a->length);
- err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu(
- a->data.non_resident.mapping_pairs_offset));
- if (unlikely(err)) {
- BUG_ON(err != -ENOSPC);
- // TODO: Deal with this by using the current attribute
- // and fill it with as much of the mapping pairs
- // array as possible. Then loop over each attribute
- // extent rewriting the mapping pairs arrays as we go
- // along and if when we reach the end we have not
- // enough space, try to resize the last attribute
- // extent and if even that fails, add a new attribute
- // extent.
- // We could also try to resize at each step in the hope
- // that we will not need to rewrite every single extent.
- // Note, we may need to decompress some extents to fill
- // the runlist as we are walking the extents...
- ntfs_error(vol->sb, "Not enough space in the mft "
- "record for the extended attribute "
- "record. This case is not "
- "implemented yet.");
- err = -EOPNOTSUPP;
- break ;
- }
- status.mp_rebuilt = 1;
- /*
- * Generate the mapping pairs array directly into the attribute
- * record.
- */
- err = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
- a->data.non_resident.mapping_pairs_offset),
- mp_size, rl2, vcn, highest_vcn, NULL);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Cannot fill hole in inode 0x%lx, "
- "attribute type 0x%x, because building "
- "the mapping pairs failed with error "
- "code %i.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- err = -EIO;
- break;
- }
- /* Update the highest_vcn but only if it was not set. */
- if (unlikely(!a->data.non_resident.highest_vcn))
- a->data.non_resident.highest_vcn =
- cpu_to_sle64(highest_vcn);
- /*
- * If the attribute is sparse/compressed, update the compressed
- * size in the ntfs_inode structure and the attribute record.
- */
- if (likely(NInoSparse(ni) || NInoCompressed(ni))) {
- /*
- * If we are not in the first attribute extent, switch
- * to it, but first ensure the changes will make it to
- * disk later.
- */
- if (a->data.non_resident.lowest_vcn) {
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_reinit_search_ctx(ctx);
- err = ntfs_attr_lookup(ni->type, ni->name,
- ni->name_len, CASE_SENSITIVE,
- 0, NULL, 0, ctx);
- if (unlikely(err)) {
- status.attr_switched = 1;
- break;
- }
- /* @m is not used any more so do not set it. */
- a = ctx->attr;
- }
- write_lock_irqsave(&ni->size_lock, flags);
- ni->itype.compressed.size += vol->cluster_size;
- a->data.non_resident.compressed_size =
- cpu_to_sle64(ni->itype.compressed.size);
- write_unlock_irqrestore(&ni->size_lock, flags);
- }
- /* Ensure the changes make it to disk. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- /* Successfully filled the hole. */
- status.runlist_merged = 0;
- status.mft_attr_mapped = 0;
- status.mp_rebuilt = 0;
- /* Setup the map cache and use that to deal with the buffer. */
- was_hole = true;
- vcn = bh_cpos;
- vcn_len = 1;
- lcn_block = lcn << (vol->cluster_size_bits - blocksize_bits);
- cdelta = 0;
- /*
- * If the number of remaining clusters in the @pages is smaller
- * or equal to the number of cached clusters, unlock the
- * runlist as the map cache will be used from now on.
- */
- if (likely(vcn + vcn_len >= cend)) {
- up_write(&ni->runlist.lock);
- rl_write_locked = false;
- rl = NULL;
- }
- goto map_buffer_cached;
- } while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
- /* If there are no errors, do the next page. */
- if (likely(!err && ++u < nr_pages))
- goto do_next_folio;
- /* If there are no errors, release the runlist lock if we took it. */
- if (likely(!err)) {
- if (unlikely(rl_write_locked)) {
- up_write(&ni->runlist.lock);
- rl_write_locked = false;
- } else if (unlikely(rl))
- up_read(&ni->runlist.lock);
- rl = NULL;
- }
- /* If we issued read requests, let them complete. */
- read_lock_irqsave(&ni->size_lock, flags);
- initialized_size = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- while (wait_bh > wait) {
- bh = *--wait_bh;
- wait_on_buffer(bh);
- if (likely(buffer_uptodate(bh))) {
- folio = bh->b_folio;
- bh_pos = folio_pos(folio) + bh_offset(bh);
- /*
- * If the buffer overflows the initialized size, need
- * to zero the overflowing region.
- */
- if (unlikely(bh_pos + blocksize > initialized_size)) {
- int ofs = 0;
-
- if (likely(bh_pos < initialized_size))
- ofs = initialized_size - bh_pos;
- folio_zero_segment(folio, bh_offset(bh) + ofs,
- blocksize);
- }
- } else /* if (unlikely(!buffer_uptodate(bh))) */
- err = -EIO;
- }
- if (likely(!err)) {
- /* Clear buffer_new on all buffers. */
- u = 0;
- do {
- bh = head = page_buffers(pages[u]);
- do {
- if (buffer_new(bh))
- clear_buffer_new(bh);
- } while ((bh = bh->b_this_page) != head);
- } while (++u < nr_pages);
- ntfs_debug("Done.");
- return err;
- }
- if (status.attr_switched) {
- /* Get back to the attribute extent we modified. */
- ntfs_attr_reinit_search_ctx(ctx);
- if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, bh_cpos, NULL, 0, ctx)) {
- ntfs_error(vol->sb, "Failed to find required "
- "attribute extent of attribute in "
- "error code path. Run chkdsk to "
- "recover.");
- write_lock_irqsave(&ni->size_lock, flags);
- ni->itype.compressed.size += vol->cluster_size;
- write_unlock_irqrestore(&ni->size_lock, flags);
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- /*
- * The only thing that is now wrong is the compressed
- * size of the base attribute extent which chkdsk
- * should be able to fix.
- */
- NVolSetErrors(vol);
- } else {
- m = ctx->mrec;
- a = ctx->attr;
- status.attr_switched = 0;
- }
- }
- /*
- * If the runlist has been modified, need to restore it by punching a
- * hole into it and we then need to deallocate the on-disk cluster as
- * well. Note, we only modify the runlist if we are able to generate a
- * new mapping pairs array, i.e. only when the mapped attribute extent
- * is not switched.
- */
- if (status.runlist_merged && !status.attr_switched) {
- BUG_ON(!rl_write_locked);
- /* Make the file cluster we allocated sparse in the runlist. */
- if (ntfs_rl_punch_nolock(vol, &ni->runlist, bh_cpos, 1)) {
- ntfs_error(vol->sb, "Failed to punch hole into "
- "attribute runlist in error code "
- "path. Run chkdsk to recover the "
- "lost cluster.");
- NVolSetErrors(vol);
- } else /* if (success) */ {
- status.runlist_merged = 0;
- /*
- * Deallocate the on-disk cluster we allocated but only
- * if we succeeded in punching its vcn out of the
- * runlist.
- */
- down_write(&vol->lcnbmp_lock);
- if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) {
- ntfs_error(vol->sb, "Failed to release "
- "allocated cluster in error "
- "code path. Run chkdsk to "
- "recover the lost cluster.");
- NVolSetErrors(vol);
- }
- up_write(&vol->lcnbmp_lock);
- }
- }
- /*
- * Resize the attribute record to its old size and rebuild the mapping
- * pairs array. Note, we only can do this if the runlist has been
- * restored to its old state which also implies that the mapped
- * attribute extent is not switched.
- */
- if (status.mp_rebuilt && !status.runlist_merged) {
- if (ntfs_attr_record_resize(m, a, attr_rec_len)) {
- ntfs_error(vol->sb, "Failed to restore attribute "
- "record in error code path. Run "
- "chkdsk to recover.");
- NVolSetErrors(vol);
- } else /* if (success) */ {
- if (ntfs_mapping_pairs_build(vol, (u8*)a +
- le16_to_cpu(a->data.non_resident.
- mapping_pairs_offset), attr_rec_len -
- le16_to_cpu(a->data.non_resident.
- mapping_pairs_offset), ni->runlist.rl,
- vcn, highest_vcn, NULL)) {
- ntfs_error(vol->sb, "Failed to restore "
- "mapping pairs array in error "
- "code path. Run chkdsk to "
- "recover.");
- NVolSetErrors(vol);
- }
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- }
- }
- /* Release the mft record and the attribute. */
- if (status.mft_attr_mapped) {
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- }
- /* Release the runlist lock. */
- if (rl_write_locked)
- up_write(&ni->runlist.lock);
- else if (rl)
- up_read(&ni->runlist.lock);
- /*
- * Zero out any newly allocated blocks to avoid exposing stale data.
- * If BH_New is set, we know that the block was newly allocated above
- * and that it has not been fully zeroed and marked dirty yet.
- */
- nr_pages = u;
- u = 0;
- end = bh_cpos << vol->cluster_size_bits;
- do {
- folio = page_folio(pages[u]);
- bh = head = folio_buffers(folio);
- do {
- if (u == nr_pages &&
- folio_pos(folio) + bh_offset(bh) >= end)
- break;
- if (!buffer_new(bh))
- continue;
- clear_buffer_new(bh);
- if (!buffer_uptodate(bh)) {
- if (folio_test_uptodate(folio))
- set_buffer_uptodate(bh);
- else {
- folio_zero_range(folio, bh_offset(bh),
- blocksize);
- set_buffer_uptodate(bh);
- }
- }
- mark_buffer_dirty(bh);
- } while ((bh = bh->b_this_page) != head);
- } while (++u <= nr_pages);
- ntfs_error(vol->sb, "Failed. Returning error code %i.", err);
- return err;
-}
-
-static inline void ntfs_flush_dcache_pages(struct page **pages,
- unsigned nr_pages)
-{
- BUG_ON(!nr_pages);
- /*
- * Warning: Do not do the decrement at the same time as the call to
- * flush_dcache_page() because it is a NULL macro on i386 and hence the
- * decrement never happens so the loop never terminates.
- */
- do {
- --nr_pages;
- flush_dcache_page(pages[nr_pages]);
- } while (nr_pages > 0);
-}
-
-/**
- * ntfs_commit_pages_after_non_resident_write - commit the received data
- * @pages: array of destination pages
- * @nr_pages: number of pages in @pages
- * @pos: byte position in file at which the write begins
- * @bytes: number of bytes to be written
- *
- * See description of ntfs_commit_pages_after_write(), below.
- */
-static inline int ntfs_commit_pages_after_non_resident_write(
- struct page **pages, const unsigned nr_pages,
- s64 pos, size_t bytes)
-{
- s64 end, initialized_size;
- struct inode *vi;
- ntfs_inode *ni, *base_ni;
- struct buffer_head *bh, *head;
- ntfs_attr_search_ctx *ctx;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- unsigned long flags;
- unsigned blocksize, u;
- int err;
-
- vi = pages[0]->mapping->host;
- ni = NTFS_I(vi);
- blocksize = vi->i_sb->s_blocksize;
- end = pos + bytes;
- u = 0;
- do {
- s64 bh_pos;
- struct page *page;
- bool partial;
-
- page = pages[u];
- bh_pos = (s64)page->index << PAGE_SHIFT;
- bh = head = page_buffers(page);
- partial = false;
- do {
- s64 bh_end;
-
- bh_end = bh_pos + blocksize;
- if (bh_end <= pos || bh_pos >= end) {
- if (!buffer_uptodate(bh))
- partial = true;
- } else {
- set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
- }
- } while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
- /*
- * If all buffers are now uptodate but the page is not, set the
- * page uptodate.
- */
- if (!partial && !PageUptodate(page))
- SetPageUptodate(page);
- } while (++u < nr_pages);
- /*
- * Finally, if we do not need to update initialized_size or i_size we
- * are finished.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- initialized_size = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (end <= initialized_size) {
- ntfs_debug("Done.");
- return 0;
- }
- /*
- * Update initialized_size/i_size as appropriate, both in the inode and
- * the mft record.
- */
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- /* Map, pin, and lock the mft record. */
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- ctx = NULL;
- goto err_out;
- }
- BUG_ON(!NInoNonResident(ni));
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- a = ctx->attr;
- BUG_ON(!a->non_resident);
- write_lock_irqsave(&ni->size_lock, flags);
- BUG_ON(end > ni->allocated_size);
- ni->initialized_size = end;
- a->data.non_resident.initialized_size = cpu_to_sle64(end);
- if (end > i_size_read(vi)) {
- i_size_write(vi, end);
- a->data.non_resident.data_size =
- a->data.non_resident.initialized_size;
- }
- write_unlock_irqrestore(&ni->size_lock, flags);
- /* Mark the mft record dirty, so it gets written back. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- ntfs_debug("Done.");
- return 0;
-err_out:
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
- ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error "
- "code %i).", err);
- if (err != -ENOMEM)
- NVolSetErrors(ni->vol);
- return err;
-}
-
-/**
- * ntfs_commit_pages_after_write - commit the received data
- * @pages: array of destination pages
- * @nr_pages: number of pages in @pages
- * @pos: byte position in file at which the write begins
- * @bytes: number of bytes to be written
- *
- * This is called from ntfs_file_buffered_write() with i_mutex held on the inode
- * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are
- * locked but not kmap()ped. The source data has already been copied into the
- * @page. ntfs_prepare_pages_for_non_resident_write() has been called before
- * the data was copied (for non-resident attributes only) and it returned
- * success.
- *
- * Need to set uptodate and mark dirty all buffers within the boundary of the
- * write. If all buffers in a page are uptodate we set the page uptodate, too.
- *
- * Setting the buffers dirty ensures that they get written out later when
- * ntfs_writepage() is invoked by the VM.
- *
- * Finally, we need to update i_size and initialized_size as appropriate both
- * in the inode and the mft record.
- *
- * This is modelled after fs/buffer.c::generic_commit_write(), which marks
- * buffers uptodate and dirty, sets the page uptodate if all buffers in the
- * page are uptodate, and updates i_size if the end of io is beyond i_size. In
- * that case, it also marks the inode dirty.
- *
- * If things have gone as outlined in
- * ntfs_prepare_pages_for_non_resident_write(), we do not need to do any page
- * content modifications here for non-resident attributes. For resident
- * attributes we need to do the uptodate bringing here which we combine with
- * the copying into the mft record which means we save one atomic kmap.
- *
- * Return 0 on success or -errno on error.
- */
-static int ntfs_commit_pages_after_write(struct page **pages,
- const unsigned nr_pages, s64 pos, size_t bytes)
-{
- s64 end, initialized_size;
- loff_t i_size;
- struct inode *vi;
- ntfs_inode *ni, *base_ni;
- struct page *page;
- ntfs_attr_search_ctx *ctx;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- char *kattr, *kaddr;
- unsigned long flags;
- u32 attr_len;
- int err;
-
- BUG_ON(!nr_pages);
- BUG_ON(!pages);
- page = pages[0];
- BUG_ON(!page);
- vi = page->mapping->host;
- ni = NTFS_I(vi);
- ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
- "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
- vi->i_ino, ni->type, page->index, nr_pages,
- (long long)pos, bytes);
- if (NInoNonResident(ni))
- return ntfs_commit_pages_after_non_resident_write(pages,
- nr_pages, pos, bytes);
- BUG_ON(nr_pages > 1);
- /*
- * Attribute is resident, implying it is not compressed, encrypted, or
- * sparse.
- */
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- BUG_ON(NInoNonResident(ni));
- /* Map, pin, and lock the mft record. */
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- ctx = NULL;
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- a = ctx->attr;
- BUG_ON(a->non_resident);
- /* The total length of the attribute value. */
- attr_len = le32_to_cpu(a->data.resident.value_length);
- i_size = i_size_read(vi);
- BUG_ON(attr_len != i_size);
- BUG_ON(pos > attr_len);
- end = pos + bytes;
- BUG_ON(end > le32_to_cpu(a->length) -
- le16_to_cpu(a->data.resident.value_offset));
- kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
- kaddr = kmap_atomic(page);
- /* Copy the received data from the page to the mft record. */
- memcpy(kattr + pos, kaddr + pos, bytes);
- /* Update the attribute length if necessary. */
- if (end > attr_len) {
- attr_len = end;
- a->data.resident.value_length = cpu_to_le32(attr_len);
- }
- /*
- * If the page is not uptodate, bring the out of bounds area(s)
- * uptodate by copying data from the mft record to the page.
- */
- if (!PageUptodate(page)) {
- if (pos > 0)
- memcpy(kaddr, kattr, pos);
- if (end < attr_len)
- memcpy(kaddr + end, kattr + end, attr_len - end);
- /* Zero the region outside the end of the attribute value. */
- memset(kaddr + attr_len, 0, PAGE_SIZE - attr_len);
- flush_dcache_page(page);
- SetPageUptodate(page);
- }
- kunmap_atomic(kaddr);
- /* Update initialized_size/i_size if necessary. */
- read_lock_irqsave(&ni->size_lock, flags);
- initialized_size = ni->initialized_size;
- BUG_ON(end > ni->allocated_size);
- read_unlock_irqrestore(&ni->size_lock, flags);
- BUG_ON(initialized_size != i_size);
- if (end > initialized_size) {
- write_lock_irqsave(&ni->size_lock, flags);
- ni->initialized_size = end;
- i_size_write(vi, end);
- write_unlock_irqrestore(&ni->size_lock, flags);
- }
- /* Mark the mft record dirty, so it gets written back. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- ntfs_debug("Done.");
- return 0;
-err_out:
- if (err == -ENOMEM) {
- ntfs_warning(vi->i_sb, "Error allocating memory required to "
- "commit the write.");
- if (PageUptodate(page)) {
- ntfs_warning(vi->i_sb, "Page is uptodate, setting "
- "dirty so the write will be retried "
- "later on by the VM.");
- /*
- * Put the page on mapping->dirty_pages, but leave its
- * buffers' dirty state as-is.
- */
- __set_page_dirty_nobuffers(page);
- err = 0;
- } else
- ntfs_error(vi->i_sb, "Page is not uptodate. Written "
- "data has been lost.");
- } else {
- ntfs_error(vi->i_sb, "Resident attribute commit write failed "
- "with error %i.", err);
- NVolSetErrors(ni->vol);
- }
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
- return err;
-}
-
-/*
- * Copy as much as we can into the pages and return the number of bytes which
- * were successfully copied. If a fault is encountered then clear the pages
- * out to (ofs + bytes) and return the number of bytes which were copied.
- */
-static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
- unsigned ofs, struct iov_iter *i, size_t bytes)
-{
- struct page **last_page = pages + nr_pages;
- size_t total = 0;
- unsigned len, copied;
-
- do {
- len = PAGE_SIZE - ofs;
- if (len > bytes)
- len = bytes;
- copied = copy_page_from_iter_atomic(*pages, ofs, len, i);
- total += copied;
- bytes -= copied;
- if (!bytes)
- break;
- if (copied < len)
- goto err;
- ofs = 0;
- } while (++pages < last_page);
-out:
- return total;
-err:
- /* Zero the rest of the target like __copy_from_user(). */
- len = PAGE_SIZE - copied;
- do {
- if (len > bytes)
- len = bytes;
- zero_user(*pages, copied, len);
- bytes -= len;
- copied = 0;
- len = PAGE_SIZE;
- } while (++pages < last_page);
- goto out;
-}
-
-/**
- * ntfs_perform_write - perform buffered write to a file
- * @file: file to write to
- * @i: iov_iter with data to write
- * @pos: byte offset in file at which to begin writing to
- */
-static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
- loff_t pos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *vi = mapping->host;
- ntfs_inode *ni = NTFS_I(vi);
- ntfs_volume *vol = ni->vol;
- struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
- struct page *cached_page = NULL;
- VCN last_vcn;
- LCN lcn;
- size_t bytes;
- ssize_t status, written = 0;
- unsigned nr_pages;
-
- ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
- "0x%llx, count 0x%lx.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type),
- (unsigned long long)pos,
- (unsigned long)iov_iter_count(i));
- /*
- * If a previous ntfs_truncate() failed, repeat it and abort if it
- * fails again.
- */
- if (unlikely(NInoTruncateFailed(ni))) {
- int err;
-
- inode_dio_wait(vi);
- err = ntfs_truncate(vi);
- if (err || NInoTruncateFailed(ni)) {
- if (!err)
- err = -EIO;
- ntfs_error(vol->sb, "Cannot perform write to inode "
- "0x%lx, attribute type 0x%x, because "
- "ntfs_truncate() failed (error code "
- "%i).", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- return err;
- }
- }
- /*
- * Determine the number of pages per cluster for non-resident
- * attributes.
- */
- nr_pages = 1;
- if (vol->cluster_size > PAGE_SIZE && NInoNonResident(ni))
- nr_pages = vol->cluster_size >> PAGE_SHIFT;
- last_vcn = -1;
- do {
- VCN vcn;
- pgoff_t start_idx;
- unsigned ofs, do_pages, u;
- size_t copied;
-
- start_idx = pos >> PAGE_SHIFT;
- ofs = pos & ~PAGE_MASK;
- bytes = PAGE_SIZE - ofs;
- do_pages = 1;
- if (nr_pages > 1) {
- vcn = pos >> vol->cluster_size_bits;
- if (vcn != last_vcn) {
- last_vcn = vcn;
- /*
- * Get the lcn of the vcn the write is in. If
- * it is a hole, need to lock down all pages in
- * the cluster.
- */
- down_read(&ni->runlist.lock);
- lcn = ntfs_attr_vcn_to_lcn_nolock(ni, pos >>
- vol->cluster_size_bits, false);
- up_read(&ni->runlist.lock);
- if (unlikely(lcn < LCN_HOLE)) {
- if (lcn == LCN_ENOMEM)
- status = -ENOMEM;
- else {
- status = -EIO;
- ntfs_error(vol->sb, "Cannot "
- "perform write to "
- "inode 0x%lx, "
- "attribute type 0x%x, "
- "because the attribute "
- "is corrupt.",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type));
- }
- break;
- }
- if (lcn == LCN_HOLE) {
- start_idx = (pos & ~(s64)
- vol->cluster_size_mask)
- >> PAGE_SHIFT;
- bytes = vol->cluster_size - (pos &
- vol->cluster_size_mask);
- do_pages = nr_pages;
- }
- }
- }
- if (bytes > iov_iter_count(i))
- bytes = iov_iter_count(i);
-again:
- /*
- * Bring in the user page(s) that we will copy from _first_.
- * Otherwise there is a nasty deadlock on copying from the same
- * page(s) as we are writing to, without it/them being marked
- * up-to-date. Note, at present there is nothing to stop the
- * pages being swapped out between us bringing them into memory
- * and doing the actual copying.
- */
- if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
- status = -EFAULT;
- break;
- }
- /* Get and lock @do_pages starting at index @start_idx. */
- status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
- pages, &cached_page);
- if (unlikely(status))
- break;
- /*
- * For non-resident attributes, we need to fill any holes with
- * actual clusters and ensure all bufferes are mapped. We also
- * need to bring uptodate any buffers that are only partially
- * being written to.
- */
- if (NInoNonResident(ni)) {
- status = ntfs_prepare_pages_for_non_resident_write(
- pages, do_pages, pos, bytes);
- if (unlikely(status)) {
- do {
- unlock_page(pages[--do_pages]);
- put_page(pages[do_pages]);
- } while (do_pages);
- break;
- }
- }
- u = (pos >> PAGE_SHIFT) - pages[0]->index;
- copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
- i, bytes);
- ntfs_flush_dcache_pages(pages + u, do_pages - u);
- status = 0;
- if (likely(copied == bytes)) {
- status = ntfs_commit_pages_after_write(pages, do_pages,
- pos, bytes);
- }
- do {
- unlock_page(pages[--do_pages]);
- put_page(pages[do_pages]);
- } while (do_pages);
- if (unlikely(status < 0)) {
- iov_iter_revert(i, copied);
- break;
- }
- cond_resched();
- if (unlikely(copied < bytes)) {
- iov_iter_revert(i, copied);
- if (copied)
- bytes = copied;
- else if (bytes > PAGE_SIZE - ofs)
- bytes = PAGE_SIZE - ofs;
- goto again;
- }
- pos += copied;
- written += copied;
- balance_dirty_pages_ratelimited(mapping);
- if (fatal_signal_pending(current)) {
- status = -EINTR;
- break;
- }
- } while (iov_iter_count(i));
- if (cached_page)
- put_page(cached_page);
- ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
- written ? "written" : "status", (unsigned long)written,
- (long)status);
- return written ? written : status;
-}
-
-/**
- * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
- * @iocb: IO state structure
- * @from: iov_iter with data to write
- *
- * Basically the same as generic_file_write_iter() except that it ends up
- * up calling ntfs_perform_write() instead of generic_perform_write() and that
- * O_DIRECT is not implemented.
- */
-static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- struct file *file = iocb->ki_filp;
- struct inode *vi = file_inode(file);
- ssize_t written = 0;
- ssize_t err;
-
- inode_lock(vi);
- /* We can write back this queue in page reclaim. */
- err = ntfs_prepare_file_for_write(iocb, from);
- if (iov_iter_count(from) && !err)
- written = ntfs_perform_write(file, from, iocb->ki_pos);
- inode_unlock(vi);
- iocb->ki_pos += written;
- if (likely(written > 0))
- written = generic_write_sync(iocb, written);
- return written ? written : err;
-}
-
-/**
- * ntfs_file_fsync - sync a file to disk
- * @filp: file to be synced
- * @datasync: if non-zero only flush user data and not metadata
- *
- * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync
- * system calls. This function is inspired by fs/buffer.c::file_fsync().
- *
- * If @datasync is false, write the mft record and all associated extent mft
- * records as well as the $DATA attribute and then sync the block device.
- *
- * If @datasync is true and the attribute is non-resident, we skip the writing
- * of the mft record and all associated extent mft records (this might still
- * happen due to the write_inode_now() call).
- *
- * Also, if @datasync is true, we do not wait on the inode to be written out
- * but we always wait on the page cache pages to be written out.
- *
- * Locking: Caller must hold i_mutex on the inode.
- *
- * TODO: We should probably also write all attribute/index inodes associated
- * with this inode but since we have no simple way of getting to them we ignore
- * this problem for now.
- */
-static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
- int datasync)
-{
- struct inode *vi = filp->f_mapping->host;
- int err, ret = 0;
-
- ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
-
- err = file_write_and_wait_range(filp, start, end);
- if (err)
- return err;
- inode_lock(vi);
-
- BUG_ON(S_ISDIR(vi->i_mode));
- if (!datasync || !NInoNonResident(NTFS_I(vi)))
- ret = __ntfs_write_inode(vi, 1);
- write_inode_now(vi, !datasync);
- /*
- * NOTE: If we were to use mapping->private_list (see ext2 and
- * fs/buffer.c) for dirty blocks then we could optimize the below to be
- * sync_mapping_buffers(vi->i_mapping).
- */
- err = sync_blockdev(vi->i_sb->s_bdev);
- if (unlikely(err && !ret))
- ret = err;
- if (likely(!ret))
- ntfs_debug("Done.");
- else
- ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
- "%u.", datasync ? "data" : "", vi->i_ino, -ret);
- inode_unlock(vi);
- return ret;
-}
-
-#endif /* NTFS_RW */
-
-const struct file_operations ntfs_file_ops = {
- .llseek = generic_file_llseek,
- .read_iter = generic_file_read_iter,
-#ifdef NTFS_RW
- .write_iter = ntfs_file_write_iter,
- .fsync = ntfs_file_fsync,
-#endif /* NTFS_RW */
- .mmap = generic_file_mmap,
- .open = ntfs_file_open,
- .splice_read = filemap_splice_read,
-};
-
-const struct inode_operations ntfs_file_inode_ops = {
-#ifdef NTFS_RW
- .setattr = ntfs_setattr,
-#endif /* NTFS_RW */
-};
-
-const struct file_operations ntfs_empty_file_ops = {};
-
-const struct inode_operations ntfs_empty_inode_ops = {};
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
deleted file mode 100644
index d46c2c03a032..000000000000
--- a/fs/ntfs/index.c
+++ /dev/null
@@ -1,440 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * index.c - NTFS kernel index handling. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2004-2005 Anton Altaparmakov
- */
-
-#include <linux/slab.h>
-
-#include "aops.h"
-#include "collate.h"
-#include "debug.h"
-#include "index.h"
-#include "ntfs.h"
-
-/**
- * ntfs_index_ctx_get - allocate and initialize a new index context
- * @idx_ni: ntfs index inode with which to initialize the context
- *
- * Allocate a new index context, initialize it with @idx_ni and return it.
- * Return NULL if allocation failed.
- *
- * Locking: Caller must hold i_mutex on the index inode.
- */
-ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
-{
- ntfs_index_context *ictx;
-
- ictx = kmem_cache_alloc(ntfs_index_ctx_cache, GFP_NOFS);
- if (ictx)
- *ictx = (ntfs_index_context){ .idx_ni = idx_ni };
- return ictx;
-}
-
-/**
- * ntfs_index_ctx_put - release an index context
- * @ictx: index context to free
- *
- * Release the index context @ictx, releasing all associated resources.
- *
- * Locking: Caller must hold i_mutex on the index inode.
- */
-void ntfs_index_ctx_put(ntfs_index_context *ictx)
-{
- if (ictx->entry) {
- if (ictx->is_in_root) {
- if (ictx->actx)
- ntfs_attr_put_search_ctx(ictx->actx);
- if (ictx->base_ni)
- unmap_mft_record(ictx->base_ni);
- } else {
- struct page *page = ictx->page;
- if (page) {
- BUG_ON(!PageLocked(page));
- unlock_page(page);
- ntfs_unmap_page(page);
- }
- }
- }
- kmem_cache_free(ntfs_index_ctx_cache, ictx);
- return;
-}
-
-/**
- * ntfs_index_lookup - find a key in an index and return its index entry
- * @key: [IN] key for which to search in the index
- * @key_len: [IN] length of @key in bytes
- * @ictx: [IN/OUT] context describing the index and the returned entry
- *
- * Before calling ntfs_index_lookup(), @ictx must have been obtained from a
- * call to ntfs_index_ctx_get().
- *
- * Look for the @key in the index specified by the index lookup context @ictx.
- * ntfs_index_lookup() walks the contents of the index looking for the @key.
- *
- * If the @key is found in the index, 0 is returned and @ictx is setup to
- * describe the index entry containing the matching @key. @ictx->entry is the
- * index entry and @ictx->data and @ictx->data_len are the index entry data and
- * its length in bytes, respectively.
- *
- * If the @key is not found in the index, -ENOENT is returned and @ictx is
- * setup to describe the index entry whose key collates immediately after the
- * search @key, i.e. this is the position in the index at which an index entry
- * with a key of @key would need to be inserted.
- *
- * If an error occurs return the negative error code and @ictx is left
- * untouched.
- *
- * When finished with the entry and its data, call ntfs_index_ctx_put() to free
- * the context and other associated resources.
- *
- * If the index entry was modified, call flush_dcache_index_entry_page()
- * immediately after the modification and either ntfs_index_entry_mark_dirty()
- * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to
- * ensure that the changes are written to disk.
- *
- * Locking: - Caller must hold i_mutex on the index inode.
- * - Each page cache page in the index allocation mapping must be
- * locked whilst being accessed otherwise we may find a corrupt
- * page due to it being under ->writepage at the moment which
- * applies the mst protection fixups before writing out and then
- * removes them again after the write is complete after which it
- * unlocks the page.
- */
-int ntfs_index_lookup(const void *key, const int key_len,
- ntfs_index_context *ictx)
-{
- VCN vcn, old_vcn;
- ntfs_inode *idx_ni = ictx->idx_ni;
- ntfs_volume *vol = idx_ni->vol;
- struct super_block *sb = vol->sb;
- ntfs_inode *base_ni = idx_ni->ext.base_ntfs_ino;
- MFT_RECORD *m;
- INDEX_ROOT *ir;
- INDEX_ENTRY *ie;
- INDEX_ALLOCATION *ia;
- u8 *index_end, *kaddr;
- ntfs_attr_search_ctx *actx;
- struct address_space *ia_mapping;
- struct page *page;
- int rc, err = 0;
-
- ntfs_debug("Entering.");
- BUG_ON(!NInoAttr(idx_ni));
- BUG_ON(idx_ni->type != AT_INDEX_ALLOCATION);
- BUG_ON(idx_ni->nr_extents != -1);
- BUG_ON(!base_ni);
- BUG_ON(!key);
- BUG_ON(key_len <= 0);
- if (!ntfs_is_collation_rule_supported(
- idx_ni->itype.index.collation_rule)) {
- ntfs_error(sb, "Index uses unsupported collation rule 0x%x. "
- "Aborting lookup.", le32_to_cpu(
- idx_ni->itype.index.collation_rule));
- return -EOPNOTSUPP;
- }
- /* Get hold of the mft record for the index inode. */
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- ntfs_error(sb, "map_mft_record() failed with error code %ld.",
- -PTR_ERR(m));
- return PTR_ERR(m);
- }
- actx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!actx)) {
- err = -ENOMEM;
- goto err_out;
- }
- /* Find the index root attribute in the mft record. */
- err = ntfs_attr_lookup(AT_INDEX_ROOT, idx_ni->name, idx_ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, actx);
- if (unlikely(err)) {
- if (err == -ENOENT) {
- ntfs_error(sb, "Index root attribute missing in inode "
- "0x%lx.", idx_ni->mft_no);
- err = -EIO;
- }
- goto err_out;
- }
- /* Get to the index root value (it has been verified in read_inode). */
- ir = (INDEX_ROOT*)((u8*)actx->attr +
- le16_to_cpu(actx->attr->data.resident.value_offset));
- index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
- /* The first index entry. */
- ie = (INDEX_ENTRY*)((u8*)&ir->index +
- le32_to_cpu(ir->index.entries_offset));
- /*
- * Loop until we exceed valid memory (corruption case) or until we
- * reach the last entry.
- */
- for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
- /* Bounds checks. */
- if ((u8*)ie < (u8*)actx->mrec || (u8*)ie +
- sizeof(INDEX_ENTRY_HEADER) > index_end ||
- (u8*)ie + le16_to_cpu(ie->length) > index_end)
- goto idx_err_out;
- /*
- * The last entry cannot contain a key. It can however contain
- * a pointer to a child node in the B+tree so we just break out.
- */
- if (ie->flags & INDEX_ENTRY_END)
- break;
- /* Further bounds checks. */
- if ((u32)sizeof(INDEX_ENTRY_HEADER) +
- le16_to_cpu(ie->key_length) >
- le16_to_cpu(ie->data.vi.data_offset) ||
- (u32)le16_to_cpu(ie->data.vi.data_offset) +
- le16_to_cpu(ie->data.vi.data_length) >
- le16_to_cpu(ie->length))
- goto idx_err_out;
- /* If the keys match perfectly, we setup @ictx and return 0. */
- if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key,
- &ie->key, key_len)) {
-ir_done:
- ictx->is_in_root = true;
- ictx->ir = ir;
- ictx->actx = actx;
- ictx->base_ni = base_ni;
- ictx->ia = NULL;
- ictx->page = NULL;
-done:
- ictx->entry = ie;
- ictx->data = (u8*)ie +
- le16_to_cpu(ie->data.vi.data_offset);
- ictx->data_len = le16_to_cpu(ie->data.vi.data_length);
- ntfs_debug("Done.");
- return err;
- }
- /*
- * Not a perfect match, need to do full blown collation so we
- * know which way in the B+tree we have to go.
- */
- rc = ntfs_collate(vol, idx_ni->itype.index.collation_rule, key,
- key_len, &ie->key, le16_to_cpu(ie->key_length));
- /*
- * If @key collates before the key of the current entry, there
- * is definitely no such key in this index but we might need to
- * descend into the B+tree so we just break out of the loop.
- */
- if (rc == -1)
- break;
- /*
- * A match should never happen as the memcmp() call should have
- * cought it, but we still treat it correctly.
- */
- if (!rc)
- goto ir_done;
- /* The keys are not equal, continue the search. */
- }
- /*
- * We have finished with this index without success. Check for the
- * presence of a child node and if not present setup @ictx and return
- * -ENOENT.
- */
- if (!(ie->flags & INDEX_ENTRY_NODE)) {
- ntfs_debug("Entry not found.");
- err = -ENOENT;
- goto ir_done;
- } /* Child node present, descend into it. */
- /* Consistency check: Verify that an index allocation exists. */
- if (!NInoIndexAllocPresent(idx_ni)) {
- ntfs_error(sb, "No index allocation attribute but index entry "
- "requires one. Inode 0x%lx is corrupt or "
- "driver bug.", idx_ni->mft_no);
- goto err_out;
- }
- /* Get the starting vcn of the index_block holding the child node. */
- vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
- ia_mapping = VFS_I(idx_ni)->i_mapping;
- /*
- * We are done with the index root and the mft record. Release them,
- * otherwise we deadlock with ntfs_map_page().
- */
- ntfs_attr_put_search_ctx(actx);
- unmap_mft_record(base_ni);
- m = NULL;
- actx = NULL;
-descend_into_child_node:
- /*
- * Convert vcn to index into the index allocation attribute in units
- * of PAGE_SIZE and map the page cache page, reading it from
- * disk if necessary.
- */
- page = ntfs_map_page(ia_mapping, vcn <<
- idx_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
- if (IS_ERR(page)) {
- ntfs_error(sb, "Failed to map index page, error %ld.",
- -PTR_ERR(page));
- err = PTR_ERR(page);
- goto err_out;
- }
- lock_page(page);
- kaddr = (u8*)page_address(page);
-fast_descend_into_child_node:
- /* Get to the index allocation block. */
- ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
- idx_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
- /* Bounds checks. */
- if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
- ntfs_error(sb, "Out of bounds check failed. Corrupt inode "
- "0x%lx or driver bug.", idx_ni->mft_no);
- goto unm_err_out;
- }
- /* Catch multi sector transfer fixup errors. */
- if (unlikely(!ntfs_is_indx_record(ia->magic))) {
- ntfs_error(sb, "Index record with vcn 0x%llx is corrupt. "
- "Corrupt inode 0x%lx. Run chkdsk.",
- (long long)vcn, idx_ni->mft_no);
- goto unm_err_out;
- }
- if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
- ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
- "different from expected VCN (0x%llx). Inode "
- "0x%lx is corrupt or driver bug.",
- (unsigned long long)
- sle64_to_cpu(ia->index_block_vcn),
- (unsigned long long)vcn, idx_ni->mft_no);
- goto unm_err_out;
- }
- if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
- idx_ni->itype.index.block_size) {
- ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx has "
- "a size (%u) differing from the index "
- "specified size (%u). Inode is corrupt or "
- "driver bug.", (unsigned long long)vcn,
- idx_ni->mft_no,
- le32_to_cpu(ia->index.allocated_size) + 0x18,
- idx_ni->itype.index.block_size);
- goto unm_err_out;
- }
- index_end = (u8*)ia + idx_ni->itype.index.block_size;
- if (index_end > kaddr + PAGE_SIZE) {
- ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx "
- "crosses page boundary. Impossible! Cannot "
- "access! This is probably a bug in the "
- "driver.", (unsigned long long)vcn,
- idx_ni->mft_no);
- goto unm_err_out;
- }
- index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
- if (index_end > (u8*)ia + idx_ni->itype.index.block_size) {
- ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of inode "
- "0x%lx exceeds maximum size.",
- (unsigned long long)vcn, idx_ni->mft_no);
- goto unm_err_out;
- }
- /* The first index entry. */
- ie = (INDEX_ENTRY*)((u8*)&ia->index +
- le32_to_cpu(ia->index.entries_offset));
- /*
- * Iterate similar to above big loop but applied to index buffer, thus
- * loop until we exceed valid memory (corruption case) or until we
- * reach the last entry.
- */
- for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
- /* Bounds checks. */
- if ((u8*)ie < (u8*)ia || (u8*)ie +
- sizeof(INDEX_ENTRY_HEADER) > index_end ||
- (u8*)ie + le16_to_cpu(ie->length) > index_end) {
- ntfs_error(sb, "Index entry out of bounds in inode "
- "0x%lx.", idx_ni->mft_no);
- goto unm_err_out;
- }
- /*
- * The last entry cannot contain a key. It can however contain
- * a pointer to a child node in the B+tree so we just break out.
- */
- if (ie->flags & INDEX_ENTRY_END)
- break;
- /* Further bounds checks. */
- if ((u32)sizeof(INDEX_ENTRY_HEADER) +
- le16_to_cpu(ie->key_length) >
- le16_to_cpu(ie->data.vi.data_offset) ||
- (u32)le16_to_cpu(ie->data.vi.data_offset) +
- le16_to_cpu(ie->data.vi.data_length) >
- le16_to_cpu(ie->length)) {
- ntfs_error(sb, "Index entry out of bounds in inode "
- "0x%lx.", idx_ni->mft_no);
- goto unm_err_out;
- }
- /* If the keys match perfectly, we setup @ictx and return 0. */
- if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key,
- &ie->key, key_len)) {
-ia_done:
- ictx->is_in_root = false;
- ictx->actx = NULL;
- ictx->base_ni = NULL;
- ictx->ia = ia;
- ictx->page = page;
- goto done;
- }
- /*
- * Not a perfect match, need to do full blown collation so we
- * know which way in the B+tree we have to go.
- */
- rc = ntfs_collate(vol, idx_ni->itype.index.collation_rule, key,
- key_len, &ie->key, le16_to_cpu(ie->key_length));
- /*
- * If @key collates before the key of the current entry, there
- * is definitely no such key in this index but we might need to
- * descend into the B+tree so we just break out of the loop.
- */
- if (rc == -1)
- break;
- /*
- * A match should never happen as the memcmp() call should have
- * cought it, but we still treat it correctly.
- */
- if (!rc)
- goto ia_done;
- /* The keys are not equal, continue the search. */
- }
- /*
- * We have finished with this index buffer without success. Check for
- * the presence of a child node and if not present return -ENOENT.
- */
- if (!(ie->flags & INDEX_ENTRY_NODE)) {
- ntfs_debug("Entry not found.");
- err = -ENOENT;
- goto ia_done;
- }
- if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
- ntfs_error(sb, "Index entry with child node found in a leaf "
- "node in inode 0x%lx.", idx_ni->mft_no);
- goto unm_err_out;
- }
- /* Child node present, descend into it. */
- old_vcn = vcn;
- vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
- if (vcn >= 0) {
- /*
- * If vcn is in the same page cache page as old_vcn we recycle
- * the mapped page.
- */
- if (old_vcn << vol->cluster_size_bits >>
- PAGE_SHIFT == vcn <<
- vol->cluster_size_bits >>
- PAGE_SHIFT)
- goto fast_descend_into_child_node;
- unlock_page(page);
- ntfs_unmap_page(page);
- goto descend_into_child_node;
- }
- ntfs_error(sb, "Negative child node vcn in inode 0x%lx.",
- idx_ni->mft_no);
-unm_err_out:
- unlock_page(page);
- ntfs_unmap_page(page);
-err_out:
- if (!err)
- err = -EIO;
- if (actx)
- ntfs_attr_put_search_ctx(actx);
- if (m)
- unmap_mft_record(base_ni);
- return err;
-idx_err_out:
- ntfs_error(sb, "Corrupt index. Aborting lookup.");
- goto err_out;
-}
diff --git a/fs/ntfs/index.h b/fs/ntfs/index.h
deleted file mode 100644
index bb3c3ae55138..000000000000
--- a/fs/ntfs/index.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * index.h - Defines for NTFS kernel index handling. Part of the Linux-NTFS
- * project.
- *
- * Copyright (c) 2004 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_INDEX_H
-#define _LINUX_NTFS_INDEX_H
-
-#include <linux/fs.h>
-
-#include "types.h"
-#include "layout.h"
-#include "inode.h"
-#include "attrib.h"
-#include "mft.h"
-#include "aops.h"
-
-/**
- * @idx_ni: index inode containing the @entry described by this context
- * @entry: index entry (points into @ir or @ia)
- * @data: index entry data (points into @entry)
- * @data_len: length in bytes of @data
- * @is_in_root: 'true' if @entry is in @ir and 'false' if it is in @ia
- * @ir: index root if @is_in_root and NULL otherwise
- * @actx: attribute search context if @is_in_root and NULL otherwise
- * @base_ni: base inode if @is_in_root and NULL otherwise
- * @ia: index block if @is_in_root is 'false' and NULL otherwise
- * @page: page if @is_in_root is 'false' and NULL otherwise
- *
- * @idx_ni is the index inode this context belongs to.
- *
- * @entry is the index entry described by this context. @data and @data_len
- * are the index entry data and its length in bytes, respectively. @data
- * simply points into @entry. This is probably what the user is interested in.
- *
- * If @is_in_root is 'true', @entry is in the index root attribute @ir described
- * by the attribute search context @actx and the base inode @base_ni. @ia and
- * @page are NULL in this case.
- *
- * If @is_in_root is 'false', @entry is in the index allocation attribute and @ia
- * and @page point to the index allocation block and the mapped, locked page it
- * is in, respectively. @ir, @actx and @base_ni are NULL in this case.
- *
- * To obtain a context call ntfs_index_ctx_get().
- *
- * We use this context to allow ntfs_index_lookup() to return the found index
- * @entry and its @data without having to allocate a buffer and copy the @entry
- * and/or its @data into it.
- *
- * When finished with the @entry and its @data, call ntfs_index_ctx_put() to
- * free the context and other associated resources.
- *
- * If the index entry was modified, call flush_dcache_index_entry_page()
- * immediately after the modification and either ntfs_index_entry_mark_dirty()
- * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to
- * ensure that the changes are written to disk.
- */
-typedef struct {
- ntfs_inode *idx_ni;
- INDEX_ENTRY *entry;
- void *data;
- u16 data_len;
- bool is_in_root;
- INDEX_ROOT *ir;
- ntfs_attr_search_ctx *actx;
- ntfs_inode *base_ni;
- INDEX_ALLOCATION *ia;
- struct page *page;
-} ntfs_index_context;
-
-extern ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni);
-extern void ntfs_index_ctx_put(ntfs_index_context *ictx);
-
-extern int ntfs_index_lookup(const void *key, const int key_len,
- ntfs_index_context *ictx);
-
-#ifdef NTFS_RW
-
-/**
- * ntfs_index_entry_flush_dcache_page - flush_dcache_page() for index entries
- * @ictx: ntfs index context describing the index entry
- *
- * Call flush_dcache_page() for the page in which an index entry resides.
- *
- * This must be called every time an index entry is modified, just after the
- * modification.
- *
- * If the index entry is in the index root attribute, simply flush the page
- * containing the mft record containing the index root attribute.
- *
- * If the index entry is in an index block belonging to the index allocation
- * attribute, simply flush the page cache page containing the index block.
- */
-static inline void ntfs_index_entry_flush_dcache_page(ntfs_index_context *ictx)
-{
- if (ictx->is_in_root)
- flush_dcache_mft_record_page(ictx->actx->ntfs_ino);
- else
- flush_dcache_page(ictx->page);
-}
-
-/**
- * ntfs_index_entry_mark_dirty - mark an index entry dirty
- * @ictx: ntfs index context describing the index entry
- *
- * Mark the index entry described by the index entry context @ictx dirty.
- *
- * If the index entry is in the index root attribute, simply mark the mft
- * record containing the index root attribute dirty. This ensures the mft
- * record, and hence the index root attribute, will be written out to disk
- * later.
- *
- * If the index entry is in an index block belonging to the index allocation
- * attribute, mark the buffers belonging to the index record as well as the
- * page cache page the index block is in dirty. This automatically marks the
- * VFS inode of the ntfs index inode to which the index entry belongs dirty,
- * too (I_DIRTY_PAGES) and this in turn ensures the page buffers, and hence the
- * dirty index block, will be written out to disk later.
- */
-static inline void ntfs_index_entry_mark_dirty(ntfs_index_context *ictx)
-{
- if (ictx->is_in_root)
- mark_mft_record_dirty(ictx->actx->ntfs_ino);
- else
- mark_ntfs_record_dirty(ictx->page,
- (u8*)ictx->ia - (u8*)page_address(ictx->page));
-}
-
-#endif /* NTFS_RW */
-
-#endif /* _LINUX_NTFS_INDEX_H */
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
deleted file mode 100644
index aba1e22db4e9..000000000000
--- a/fs/ntfs/inode.c
+++ /dev/null
@@ -1,3102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * inode.c - NTFS kernel inode handling.
- *
- * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
- */
-
-#include <linux/buffer_head.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/mount.h>
-#include <linux/mutex.h>
-#include <linux/pagemap.h>
-#include <linux/quotaops.h>
-#include <linux/slab.h>
-#include <linux/log2.h>
-
-#include "aops.h"
-#include "attrib.h"
-#include "bitmap.h"
-#include "dir.h"
-#include "debug.h"
-#include "inode.h"
-#include "lcnalloc.h"
-#include "malloc.h"
-#include "mft.h"
-#include "time.h"
-#include "ntfs.h"
-
-/**
- * ntfs_test_inode - compare two (possibly fake) inodes for equality
- * @vi: vfs inode which to test
- * @data: data which is being tested with
- *
- * Compare the ntfs attribute embedded in the ntfs specific part of the vfs
- * inode @vi for equality with the ntfs attribute @data.
- *
- * If searching for the normal file/directory inode, set @na->type to AT_UNUSED.
- * @na->name and @na->name_len are then ignored.
- *
- * Return 1 if the attributes match and 0 if not.
- *
- * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
- * allowed to sleep.
- */
-int ntfs_test_inode(struct inode *vi, void *data)
-{
- ntfs_attr *na = (ntfs_attr *)data;
- ntfs_inode *ni;
-
- if (vi->i_ino != na->mft_no)
- return 0;
- ni = NTFS_I(vi);
- /* If !NInoAttr(ni), @vi is a normal file or directory inode. */
- if (likely(!NInoAttr(ni))) {
- /* If not looking for a normal inode this is a mismatch. */
- if (unlikely(na->type != AT_UNUSED))
- return 0;
- } else {
- /* A fake inode describing an attribute. */
- if (ni->type != na->type)
- return 0;
- if (ni->name_len != na->name_len)
- return 0;
- if (na->name_len && memcmp(ni->name, na->name,
- na->name_len * sizeof(ntfschar)))
- return 0;
- }
- /* Match! */
- return 1;
-}
-
-/**
- * ntfs_init_locked_inode - initialize an inode
- * @vi: vfs inode to initialize
- * @data: data which to initialize @vi to
- *
- * Initialize the vfs inode @vi with the values from the ntfs attribute @data in
- * order to enable ntfs_test_inode() to do its work.
- *
- * If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
- * In that case, @na->name and @na->name_len should be set to NULL and 0,
- * respectively. Although that is not strictly necessary as
- * ntfs_read_locked_inode() will fill them in later.
- *
- * Return 0 on success and -errno on error.
- *
- * NOTE: This function runs with the inode->i_lock spin lock held so it is not
- * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
- */
-static int ntfs_init_locked_inode(struct inode *vi, void *data)
-{
- ntfs_attr *na = (ntfs_attr *)data;
- ntfs_inode *ni = NTFS_I(vi);
-
- vi->i_ino = na->mft_no;
-
- ni->type = na->type;
- if (na->type == AT_INDEX_ALLOCATION)
- NInoSetMstProtected(ni);
-
- ni->name = na->name;
- ni->name_len = na->name_len;
-
- /* If initializing a normal inode, we are done. */
- if (likely(na->type == AT_UNUSED)) {
- BUG_ON(na->name);
- BUG_ON(na->name_len);
- return 0;
- }
-
- /* It is a fake inode. */
- NInoSetAttr(ni);
-
- /*
- * We have I30 global constant as an optimization as it is the name
- * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC
- * allocation but that is ok. And most attributes are unnamed anyway,
- * thus the fraction of named attributes with name != I30 is actually
- * absolutely tiny.
- */
- if (na->name_len && na->name != I30) {
- unsigned int i;
-
- BUG_ON(!na->name);
- i = na->name_len * sizeof(ntfschar);
- ni->name = kmalloc(i + sizeof(ntfschar), GFP_ATOMIC);
- if (!ni->name)
- return -ENOMEM;
- memcpy(ni->name, na->name, i);
- ni->name[na->name_len] = 0;
- }
- return 0;
-}
-
-static int ntfs_read_locked_inode(struct inode *vi);
-static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi);
-static int ntfs_read_locked_index_inode(struct inode *base_vi,
- struct inode *vi);
-
-/**
- * ntfs_iget - obtain a struct inode corresponding to a specific normal inode
- * @sb: super block of mounted volume
- * @mft_no: mft record number / inode number to obtain
- *
- * Obtain the struct inode corresponding to a specific normal inode (i.e. a
- * file or directory).
- *
- * If the inode is in the cache, it is just returned with an increased
- * reference count. Otherwise, a new struct inode is allocated and initialized,
- * and finally ntfs_read_locked_inode() is called to read in the inode and
- * fill in the remainder of the inode structure.
- *
- * Return the struct inode on success. Check the return value with IS_ERR() and
- * if true, the function failed and the error code is obtained from PTR_ERR().
- */
-struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
-{
- struct inode *vi;
- int err;
- ntfs_attr na;
-
- na.mft_no = mft_no;
- na.type = AT_UNUSED;
- na.name = NULL;
- na.name_len = 0;
-
- vi = iget5_locked(sb, mft_no, ntfs_test_inode,
- ntfs_init_locked_inode, &na);
- if (unlikely(!vi))
- return ERR_PTR(-ENOMEM);
-
- err = 0;
-
- /* If this is a freshly allocated inode, need to read it now. */
- if (vi->i_state & I_NEW) {
- err = ntfs_read_locked_inode(vi);
- unlock_new_inode(vi);
- }
- /*
- * There is no point in keeping bad inodes around if the failure was
- * due to ENOMEM. We want to be able to retry again later.
- */
- if (unlikely(err == -ENOMEM)) {
- iput(vi);
- vi = ERR_PTR(err);
- }
- return vi;
-}
-
-/**
- * ntfs_attr_iget - obtain a struct inode corresponding to an attribute
- * @base_vi: vfs base inode containing the attribute
- * @type: attribute type
- * @name: Unicode name of the attribute (NULL if unnamed)
- * @name_len: length of @name in Unicode characters (0 if unnamed)
- *
- * Obtain the (fake) struct inode corresponding to the attribute specified by
- * @type, @name, and @name_len, which is present in the base mft record
- * specified by the vfs inode @base_vi.
- *
- * If the attribute inode is in the cache, it is just returned with an
- * increased reference count. Otherwise, a new struct inode is allocated and
- * initialized, and finally ntfs_read_locked_attr_inode() is called to read the
- * attribute and fill in the inode structure.
- *
- * Note, for index allocation attributes, you need to use ntfs_index_iget()
- * instead of ntfs_attr_iget() as working with indices is a lot more complex.
- *
- * Return the struct inode of the attribute inode on success. Check the return
- * value with IS_ERR() and if true, the function failed and the error code is
- * obtained from PTR_ERR().
- */
-struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
- ntfschar *name, u32 name_len)
-{
- struct inode *vi;
- int err;
- ntfs_attr na;
-
- /* Make sure no one calls ntfs_attr_iget() for indices. */
- BUG_ON(type == AT_INDEX_ALLOCATION);
-
- na.mft_no = base_vi->i_ino;
- na.type = type;
- na.name = name;
- na.name_len = name_len;
-
- vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
- ntfs_init_locked_inode, &na);
- if (unlikely(!vi))
- return ERR_PTR(-ENOMEM);
-
- err = 0;
-
- /* If this is a freshly allocated inode, need to read it now. */
- if (vi->i_state & I_NEW) {
- err = ntfs_read_locked_attr_inode(base_vi, vi);
- unlock_new_inode(vi);
- }
- /*
- * There is no point in keeping bad attribute inodes around. This also
- * simplifies things in that we never need to check for bad attribute
- * inodes elsewhere.
- */
- if (unlikely(err)) {
- iput(vi);
- vi = ERR_PTR(err);
- }
- return vi;
-}
-
-/**
- * ntfs_index_iget - obtain a struct inode corresponding to an index
- * @base_vi: vfs base inode containing the index related attributes
- * @name: Unicode name of the index
- * @name_len: length of @name in Unicode characters
- *
- * Obtain the (fake) struct inode corresponding to the index specified by @name
- * and @name_len, which is present in the base mft record specified by the vfs
- * inode @base_vi.
- *
- * If the index inode is in the cache, it is just returned with an increased
- * reference count. Otherwise, a new struct inode is allocated and
- * initialized, and finally ntfs_read_locked_index_inode() is called to read
- * the index related attributes and fill in the inode structure.
- *
- * Return the struct inode of the index inode on success. Check the return
- * value with IS_ERR() and if true, the function failed and the error code is
- * obtained from PTR_ERR().
- */
-struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
- u32 name_len)
-{
- struct inode *vi;
- int err;
- ntfs_attr na;
-
- na.mft_no = base_vi->i_ino;
- na.type = AT_INDEX_ALLOCATION;
- na.name = name;
- na.name_len = name_len;
-
- vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
- ntfs_init_locked_inode, &na);
- if (unlikely(!vi))
- return ERR_PTR(-ENOMEM);
-
- err = 0;
-
- /* If this is a freshly allocated inode, need to read it now. */
- if (vi->i_state & I_NEW) {
- err = ntfs_read_locked_index_inode(base_vi, vi);
- unlock_new_inode(vi);
- }
- /*
- * There is no point in keeping bad index inodes around. This also
- * simplifies things in that we never need to check for bad index
- * inodes elsewhere.
- */
- if (unlikely(err)) {
- iput(vi);
- vi = ERR_PTR(err);
- }
- return vi;
-}
-
-struct inode *ntfs_alloc_big_inode(struct super_block *sb)
-{
- ntfs_inode *ni;
-
- ntfs_debug("Entering.");
- ni = alloc_inode_sb(sb, ntfs_big_inode_cache, GFP_NOFS);
- if (likely(ni != NULL)) {
- ni->state = 0;
- return VFS_I(ni);
- }
- ntfs_error(sb, "Allocation of NTFS big inode structure failed.");
- return NULL;
-}
-
-void ntfs_free_big_inode(struct inode *inode)
-{
- kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
-}
-
-static inline ntfs_inode *ntfs_alloc_extent_inode(void)
-{
- ntfs_inode *ni;
-
- ntfs_debug("Entering.");
- ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS);
- if (likely(ni != NULL)) {
- ni->state = 0;
- return ni;
- }
- ntfs_error(NULL, "Allocation of NTFS inode structure failed.");
- return NULL;
-}
-
-static void ntfs_destroy_extent_inode(ntfs_inode *ni)
-{
- ntfs_debug("Entering.");
- BUG_ON(ni->page);
- if (!atomic_dec_and_test(&ni->count))
- BUG();
- kmem_cache_free(ntfs_inode_cache, ni);
-}
-
-/*
- * The attribute runlist lock has separate locking rules from the
- * normal runlist lock, so split the two lock-classes:
- */
-static struct lock_class_key attr_list_rl_lock_class;
-
-/**
- * __ntfs_init_inode - initialize ntfs specific part of an inode
- * @sb: super block of mounted volume
- * @ni: freshly allocated ntfs inode which to initialize
- *
- * Initialize an ntfs inode to defaults.
- *
- * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left
- * untouched. Make sure to initialize them elsewhere.
- *
- * Return zero on success and -ENOMEM on error.
- */
-void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
-{
- ntfs_debug("Entering.");
- rwlock_init(&ni->size_lock);
- ni->initialized_size = ni->allocated_size = 0;
- ni->seq_no = 0;
- atomic_set(&ni->count, 1);
- ni->vol = NTFS_SB(sb);
- ntfs_init_runlist(&ni->runlist);
- mutex_init(&ni->mrec_lock);
- ni->page = NULL;
- ni->page_ofs = 0;
- ni->attr_list_size = 0;
- ni->attr_list = NULL;
- ntfs_init_runlist(&ni->attr_list_rl);
- lockdep_set_class(&ni->attr_list_rl.lock,
- &attr_list_rl_lock_class);
- ni->itype.index.block_size = 0;
- ni->itype.index.vcn_size = 0;
- ni->itype.index.collation_rule = 0;
- ni->itype.index.block_size_bits = 0;
- ni->itype.index.vcn_size_bits = 0;
- mutex_init(&ni->extent_lock);
- ni->nr_extents = 0;
- ni->ext.base_ntfs_ino = NULL;
-}
-
-/*
- * Extent inodes get MFT-mapped in a nested way, while the base inode
- * is still mapped. Teach this nesting to the lock validator by creating
- * a separate class for nested inode's mrec_lock's:
- */
-static struct lock_class_key extent_inode_mrec_lock_key;
-
-inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
- unsigned long mft_no)
-{
- ntfs_inode *ni = ntfs_alloc_extent_inode();
-
- ntfs_debug("Entering.");
- if (likely(ni != NULL)) {
- __ntfs_init_inode(sb, ni);
- lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
- ni->mft_no = mft_no;
- ni->type = AT_UNUSED;
- ni->name = NULL;
- ni->name_len = 0;
- }
- return ni;
-}
-
-/**
- * ntfs_is_extended_system_file - check if a file is in the $Extend directory
- * @ctx: initialized attribute search context
- *
- * Search all file name attributes in the inode described by the attribute
- * search context @ctx and check if any of the names are in the $Extend system
- * directory.
- *
- * Return values:
- * 1: file is in $Extend directory
- * 0: file is not in $Extend directory
- * -errno: failed to determine if the file is in the $Extend directory
- */
-static int ntfs_is_extended_system_file(ntfs_attr_search_ctx *ctx)
-{
- int nr_links, err;
-
- /* Restart search. */
- ntfs_attr_reinit_search_ctx(ctx);
-
- /* Get number of hard links. */
- nr_links = le16_to_cpu(ctx->mrec->link_count);
-
- /* Loop through all hard links. */
- while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0,
- ctx))) {
- FILE_NAME_ATTR *file_name_attr;
- ATTR_RECORD *attr = ctx->attr;
- u8 *p, *p2;
-
- nr_links--;
- /*
- * Maximum sanity checking as we are called on an inode that
- * we suspect might be corrupt.
- */
- p = (u8*)attr + le32_to_cpu(attr->length);
- if (p < (u8*)ctx->mrec || (u8*)p > (u8*)ctx->mrec +
- le32_to_cpu(ctx->mrec->bytes_in_use)) {
-err_corrupt_attr:
- ntfs_error(ctx->ntfs_ino->vol->sb, "Corrupt file name "
- "attribute. You should run chkdsk.");
- return -EIO;
- }
- if (attr->non_resident) {
- ntfs_error(ctx->ntfs_ino->vol->sb, "Non-resident file "
- "name. You should run chkdsk.");
- return -EIO;
- }
- if (attr->flags) {
- ntfs_error(ctx->ntfs_ino->vol->sb, "File name with "
- "invalid flags. You should run "
- "chkdsk.");
- return -EIO;
- }
- if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) {
- ntfs_error(ctx->ntfs_ino->vol->sb, "Unindexed file "
- "name. You should run chkdsk.");
- return -EIO;
- }
- file_name_attr = (FILE_NAME_ATTR*)((u8*)attr +
- le16_to_cpu(attr->data.resident.value_offset));
- p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
- if (p2 < (u8*)attr || p2 > p)
- goto err_corrupt_attr;
- /* This attribute is ok, but is it in the $Extend directory? */
- if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend)
- return 1; /* YES, it's an extended system file. */
- }
- if (unlikely(err != -ENOENT))
- return err;
- if (unlikely(nr_links)) {
- ntfs_error(ctx->ntfs_ino->vol->sb, "Inode hard link count "
- "doesn't match number of name attributes. You "
- "should run chkdsk.");
- return -EIO;
- }
- return 0; /* NO, it is not an extended system file. */
-}
-
-/**
- * ntfs_read_locked_inode - read an inode from its device
- * @vi: inode to read
- *
- * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode
- * described by @vi into memory from the device.
- *
- * The only fields in @vi that we need to/can look at when the function is
- * called are i_sb, pointing to the mounted device's super block, and i_ino,
- * the number of the inode to load.
- *
- * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino
- * for reading and sets up the necessary @vi fields as well as initializing
- * the ntfs inode.
- *
- * Q: What locks are held when the function is called?
- * A: i_state has I_NEW set, hence the inode is locked, also
- * i_count is set to 1, so it is not going to go away
- * i_flags is set to 0 and we have no business touching it. Only an ioctl()
- * is allowed to write to them. We should of course be honouring them but
- * we need to do that using the IS_* macros defined in include/linux/fs.h.
- * In any case ntfs_read_locked_inode() has nothing to do with i_flags.
- *
- * Return 0 on success and -errno on error. In the error case, the inode will
- * have had make_bad_inode() executed on it.
- */
-static int ntfs_read_locked_inode(struct inode *vi)
-{
- ntfs_volume *vol = NTFS_SB(vi->i_sb);
- ntfs_inode *ni;
- struct inode *bvi;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- STANDARD_INFORMATION *si;
- ntfs_attr_search_ctx *ctx;
- int err = 0;
-
- ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
-
- /* Setup the generic vfs inode parts now. */
- vi->i_uid = vol->uid;
- vi->i_gid = vol->gid;
- vi->i_mode = 0;
-
- /*
- * Initialize the ntfs specific part of @vi special casing
- * FILE_MFT which we need to do at mount time.
- */
- if (vi->i_ino != FILE_MFT)
- ntfs_init_big_inode(vi);
- ni = NTFS_I(vi);
-
- m = map_mft_record(ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(ni, m);
- if (!ctx) {
- err = -ENOMEM;
- goto unm_err_out;
- }
-
- if (!(m->flags & MFT_RECORD_IN_USE)) {
- ntfs_error(vi->i_sb, "Inode is not in use!");
- goto unm_err_out;
- }
- if (m->base_mft_record) {
- ntfs_error(vi->i_sb, "Inode is an extent inode!");
- goto unm_err_out;
- }
-
- /* Transfer information from mft record into vfs and ntfs inodes. */
- vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
-
- /*
- * FIXME: Keep in mind that link_count is two for files which have both
- * a long file name and a short file name as separate entries, so if
- * we are hiding short file names this will be too high. Either we need
- * to account for the short file names by subtracting them or we need
- * to make sure we delete files even though i_nlink is not zero which
- * might be tricky due to vfs interactions. Need to think about this
- * some more when implementing the unlink command.
- */
- set_nlink(vi, le16_to_cpu(m->link_count));
- /*
- * FIXME: Reparse points can have the directory bit set even though
- * they would be S_IFLNK. Need to deal with this further below when we
- * implement reparse points / symbolic links but it will do for now.
- * Also if not a directory, it could be something else, rather than
- * a regular file. But again, will do for now.
- */
- /* Everyone gets all permissions. */
- vi->i_mode |= S_IRWXUGO;
- /* If read-only, no one gets write permissions. */
- if (IS_RDONLY(vi))
- vi->i_mode &= ~S_IWUGO;
- if (m->flags & MFT_RECORD_IS_DIRECTORY) {
- vi->i_mode |= S_IFDIR;
- /*
- * Apply the directory permissions mask set in the mount
- * options.
- */
- vi->i_mode &= ~vol->dmask;
- /* Things break without this kludge! */
- if (vi->i_nlink > 1)
- set_nlink(vi, 1);
- } else {
- vi->i_mode |= S_IFREG;
- /* Apply the file permissions mask set in the mount options. */
- vi->i_mode &= ~vol->fmask;
- }
- /*
- * Find the standard information attribute in the mft record. At this
- * stage we haven't setup the attribute list stuff yet, so this could
- * in fact fail if the standard information is in an extent record, but
- * I don't think this actually ever happens.
- */
- err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0,
- ctx);
- if (unlikely(err)) {
- if (err == -ENOENT) {
- /*
- * TODO: We should be performing a hot fix here (if the
- * recover mount option is set) by creating a new
- * attribute.
- */
- ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute "
- "is missing.");
- }
- goto unm_err_out;
- }
- a = ctx->attr;
- /* Get the standard information attribute value. */
- if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
- + le32_to_cpu(a->data.resident.value_length) >
- (u8 *)ctx->mrec + vol->mft_record_size) {
- ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
- goto unm_err_out;
- }
- si = (STANDARD_INFORMATION*)((u8*)a +
- le16_to_cpu(a->data.resident.value_offset));
-
- /* Transfer information from the standard information into vi. */
- /*
- * Note: The i_?times do not quite map perfectly onto the NTFS times,
- * but they are close enough, and in the end it doesn't really matter
- * that much...
- */
- /*
- * mtime is the last change of the data within the file. Not changed
- * when only metadata is changed, e.g. a rename doesn't affect mtime.
- */
- inode_set_mtime_to_ts(vi, ntfs2utc(si->last_data_change_time));
- /*
- * ctime is the last change of the metadata of the file. This obviously
- * always changes, when mtime is changed. ctime can be changed on its
- * own, mtime is then not changed, e.g. when a file is renamed.
- */
- inode_set_ctime_to_ts(vi, ntfs2utc(si->last_mft_change_time));
- /*
- * Last access to the data within the file. Not changed during a rename
- * for example but changed whenever the file is written to.
- */
- inode_set_atime_to_ts(vi, ntfs2utc(si->last_access_time));
-
- /* Find the attribute list attribute if present. */
- ntfs_attr_reinit_search_ctx(ctx);
- err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
- if (err) {
- if (unlikely(err != -ENOENT)) {
- ntfs_error(vi->i_sb, "Failed to lookup attribute list "
- "attribute.");
- goto unm_err_out;
- }
- } else /* if (!err) */ {
- if (vi->i_ino == FILE_MFT)
- goto skip_attr_list_load;
- ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino);
- NInoSetAttrList(ni);
- a = ctx->attr;
- if (a->flags & ATTR_COMPRESSION_MASK) {
- ntfs_error(vi->i_sb, "Attribute list attribute is "
- "compressed.");
- goto unm_err_out;
- }
- if (a->flags & ATTR_IS_ENCRYPTED ||
- a->flags & ATTR_IS_SPARSE) {
- if (a->non_resident) {
- ntfs_error(vi->i_sb, "Non-resident attribute "
- "list attribute is encrypted/"
- "sparse.");
- goto unm_err_out;
- }
- ntfs_warning(vi->i_sb, "Resident attribute list "
- "attribute in inode 0x%lx is marked "
- "encrypted/sparse which is not true. "
- "However, Windows allows this and "
- "chkdsk does not detect or correct it "
- "so we will just ignore the invalid "
- "flags and pretend they are not set.",
- vi->i_ino);
- }
- /* Now allocate memory for the attribute list. */
- ni->attr_list_size = (u32)ntfs_attr_size(a);
- ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
- if (!ni->attr_list) {
- ntfs_error(vi->i_sb, "Not enough memory to allocate "
- "buffer for attribute list.");
- err = -ENOMEM;
- goto unm_err_out;
- }
- if (a->non_resident) {
- NInoSetAttrListNonResident(ni);
- if (a->data.non_resident.lowest_vcn) {
- ntfs_error(vi->i_sb, "Attribute list has non "
- "zero lowest_vcn.");
- goto unm_err_out;
- }
- /*
- * Setup the runlist. No need for locking as we have
- * exclusive access to the inode at this time.
- */
- ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
- a, NULL);
- if (IS_ERR(ni->attr_list_rl.rl)) {
- err = PTR_ERR(ni->attr_list_rl.rl);
- ni->attr_list_rl.rl = NULL;
- ntfs_error(vi->i_sb, "Mapping pairs "
- "decompression failed.");
- goto unm_err_out;
- }
- /* Now load the attribute list. */
- if ((err = load_attribute_list(vol, &ni->attr_list_rl,
- ni->attr_list, ni->attr_list_size,
- sle64_to_cpu(a->data.non_resident.
- initialized_size)))) {
- ntfs_error(vi->i_sb, "Failed to load "
- "attribute list attribute.");
- goto unm_err_out;
- }
- } else /* if (!a->non_resident) */ {
- if ((u8*)a + le16_to_cpu(a->data.resident.value_offset)
- + le32_to_cpu(
- a->data.resident.value_length) >
- (u8*)ctx->mrec + vol->mft_record_size) {
- ntfs_error(vi->i_sb, "Corrupt attribute list "
- "in inode.");
- goto unm_err_out;
- }
- /* Now copy the attribute list. */
- memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
- a->data.resident.value_offset),
- le32_to_cpu(
- a->data.resident.value_length));
- }
- }
-skip_attr_list_load:
- /*
- * If an attribute list is present we now have the attribute list value
- * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
- */
- if (S_ISDIR(vi->i_mode)) {
- loff_t bvi_size;
- ntfs_inode *bni;
- INDEX_ROOT *ir;
- u8 *ir_end, *index_end;
-
- /* It is a directory, find index root attribute. */
- ntfs_attr_reinit_search_ctx(ctx);
- err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE,
- 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT) {
- // FIXME: File is corrupt! Hot-fix with empty
- // index root attribute if recovery option is
- // set.
- ntfs_error(vi->i_sb, "$INDEX_ROOT attribute "
- "is missing.");
- }
- goto unm_err_out;
- }
- a = ctx->attr;
- /* Set up the state. */
- if (unlikely(a->non_resident)) {
- ntfs_error(vol->sb, "$INDEX_ROOT attribute is not "
- "resident.");
- goto unm_err_out;
- }
- /* Ensure the attribute name is placed before the value. */
- if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
- le16_to_cpu(a->data.resident.value_offset)))) {
- ntfs_error(vol->sb, "$INDEX_ROOT attribute name is "
- "placed after the attribute value.");
- goto unm_err_out;
- }
- /*
- * Compressed/encrypted index root just means that the newly
- * created files in that directory should be created compressed/
- * encrypted. However index root cannot be both compressed and
- * encrypted.
- */
- if (a->flags & ATTR_COMPRESSION_MASK)
- NInoSetCompressed(ni);
- if (a->flags & ATTR_IS_ENCRYPTED) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- ntfs_error(vi->i_sb, "Found encrypted and "
- "compressed attribute.");
- goto unm_err_out;
- }
- NInoSetEncrypted(ni);
- }
- if (a->flags & ATTR_IS_SPARSE)
- NInoSetSparse(ni);
- ir = (INDEX_ROOT*)((u8*)a +
- le16_to_cpu(a->data.resident.value_offset));
- ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
- if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
- ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
- "corrupt.");
- goto unm_err_out;
- }
- index_end = (u8*)&ir->index +
- le32_to_cpu(ir->index.index_length);
- if (index_end > ir_end) {
- ntfs_error(vi->i_sb, "Directory index is corrupt.");
- goto unm_err_out;
- }
- if (ir->type != AT_FILE_NAME) {
- ntfs_error(vi->i_sb, "Indexed attribute is not "
- "$FILE_NAME.");
- goto unm_err_out;
- }
- if (ir->collation_rule != COLLATION_FILE_NAME) {
- ntfs_error(vi->i_sb, "Index collation rule is not "
- "COLLATION_FILE_NAME.");
- goto unm_err_out;
- }
- ni->itype.index.collation_rule = ir->collation_rule;
- ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
- if (ni->itype.index.block_size &
- (ni->itype.index.block_size - 1)) {
- ntfs_error(vi->i_sb, "Index block size (%u) is not a "
- "power of two.",
- ni->itype.index.block_size);
- goto unm_err_out;
- }
- if (ni->itype.index.block_size > PAGE_SIZE) {
- ntfs_error(vi->i_sb, "Index block size (%u) > "
- "PAGE_SIZE (%ld) is not "
- "supported. Sorry.",
- ni->itype.index.block_size,
- PAGE_SIZE);
- err = -EOPNOTSUPP;
- goto unm_err_out;
- }
- if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
- ntfs_error(vi->i_sb, "Index block size (%u) < "
- "NTFS_BLOCK_SIZE (%i) is not "
- "supported. Sorry.",
- ni->itype.index.block_size,
- NTFS_BLOCK_SIZE);
- err = -EOPNOTSUPP;
- goto unm_err_out;
- }
- ni->itype.index.block_size_bits =
- ffs(ni->itype.index.block_size) - 1;
- /* Determine the size of a vcn in the directory index. */
- if (vol->cluster_size <= ni->itype.index.block_size) {
- ni->itype.index.vcn_size = vol->cluster_size;
- ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
- } else {
- ni->itype.index.vcn_size = vol->sector_size;
- ni->itype.index.vcn_size_bits = vol->sector_size_bits;
- }
-
- /* Setup the index allocation attribute, even if not present. */
- NInoSetMstProtected(ni);
- ni->type = AT_INDEX_ALLOCATION;
- ni->name = I30;
- ni->name_len = 4;
-
- if (!(ir->index.flags & LARGE_INDEX)) {
- /* No index allocation. */
- vi->i_size = ni->initialized_size =
- ni->allocated_size = 0;
- /* We are done with the mft record, so we release it. */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ni);
- m = NULL;
- ctx = NULL;
- goto skip_large_dir_stuff;
- } /* LARGE_INDEX: Index allocation present. Setup state. */
- NInoSetIndexAllocPresent(ni);
- /* Find index allocation attribute. */
- ntfs_attr_reinit_search_ctx(ctx);
- err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, I30, 4,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- ntfs_error(vi->i_sb, "$INDEX_ALLOCATION "
- "attribute is not present but "
- "$INDEX_ROOT indicated it is.");
- else
- ntfs_error(vi->i_sb, "Failed to lookup "
- "$INDEX_ALLOCATION "
- "attribute.");
- goto unm_err_out;
- }
- a = ctx->attr;
- if (!a->non_resident) {
- ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
- "is resident.");
- goto unm_err_out;
- }
- /*
- * Ensure the attribute name is placed before the mapping pairs
- * array.
- */
- if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
- le16_to_cpu(
- a->data.non_resident.mapping_pairs_offset)))) {
- ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name "
- "is placed after the mapping pairs "
- "array.");
- goto unm_err_out;
- }
- if (a->flags & ATTR_IS_ENCRYPTED) {
- ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
- "is encrypted.");
- goto unm_err_out;
- }
- if (a->flags & ATTR_IS_SPARSE) {
- ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
- "is sparse.");
- goto unm_err_out;
- }
- if (a->flags & ATTR_COMPRESSION_MASK) {
- ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
- "is compressed.");
- goto unm_err_out;
- }
- if (a->data.non_resident.lowest_vcn) {
- ntfs_error(vi->i_sb, "First extent of "
- "$INDEX_ALLOCATION attribute has non "
- "zero lowest_vcn.");
- goto unm_err_out;
- }
- vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
- ni->initialized_size = sle64_to_cpu(
- a->data.non_resident.initialized_size);
- ni->allocated_size = sle64_to_cpu(
- a->data.non_resident.allocated_size);
- /*
- * We are done with the mft record, so we release it. Otherwise
- * we would deadlock in ntfs_attr_iget().
- */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ni);
- m = NULL;
- ctx = NULL;
- /* Get the index bitmap attribute inode. */
- bvi = ntfs_attr_iget(vi, AT_BITMAP, I30, 4);
- if (IS_ERR(bvi)) {
- ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
- err = PTR_ERR(bvi);
- goto unm_err_out;
- }
- bni = NTFS_I(bvi);
- if (NInoCompressed(bni) || NInoEncrypted(bni) ||
- NInoSparse(bni)) {
- ntfs_error(vi->i_sb, "$BITMAP attribute is compressed "
- "and/or encrypted and/or sparse.");
- goto iput_unm_err_out;
- }
- /* Consistency check bitmap size vs. index allocation size. */
- bvi_size = i_size_read(bvi);
- if ((bvi_size << 3) < (vi->i_size >>
- ni->itype.index.block_size_bits)) {
- ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) "
- "for index allocation (0x%llx).",
- bvi_size << 3, vi->i_size);
- goto iput_unm_err_out;
- }
- /* No longer need the bitmap attribute inode. */
- iput(bvi);
-skip_large_dir_stuff:
- /* Setup the operations for this inode. */
- vi->i_op = &ntfs_dir_inode_ops;
- vi->i_fop = &ntfs_dir_ops;
- vi->i_mapping->a_ops = &ntfs_mst_aops;
- } else {
- /* It is a file. */
- ntfs_attr_reinit_search_ctx(ctx);
-
- /* Setup the data attribute, even if not present. */
- ni->type = AT_DATA;
- ni->name = NULL;
- ni->name_len = 0;
-
- /* Find first extent of the unnamed data attribute. */
- err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- vi->i_size = ni->initialized_size =
- ni->allocated_size = 0;
- if (err != -ENOENT) {
- ntfs_error(vi->i_sb, "Failed to lookup $DATA "
- "attribute.");
- goto unm_err_out;
- }
- /*
- * FILE_Secure does not have an unnamed $DATA
- * attribute, so we special case it here.
- */
- if (vi->i_ino == FILE_Secure)
- goto no_data_attr_special_case;
- /*
- * Most if not all the system files in the $Extend
- * system directory do not have unnamed data
- * attributes so we need to check if the parent
- * directory of the file is FILE_Extend and if it is
- * ignore this error. To do this we need to get the
- * name of this inode from the mft record as the name
- * contains the back reference to the parent directory.
- */
- if (ntfs_is_extended_system_file(ctx) > 0)
- goto no_data_attr_special_case;
- // FIXME: File is corrupt! Hot-fix with empty data
- // attribute if recovery option is set.
- ntfs_error(vi->i_sb, "$DATA attribute is missing.");
- goto unm_err_out;
- }
- a = ctx->attr;
- /* Setup the state. */
- if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- NInoSetCompressed(ni);
- if (vol->cluster_size > 4096) {
- ntfs_error(vi->i_sb, "Found "
- "compressed data but "
- "compression is "
- "disabled due to "
- "cluster size (%i) > "
- "4kiB.",
- vol->cluster_size);
- goto unm_err_out;
- }
- if ((a->flags & ATTR_COMPRESSION_MASK)
- != ATTR_IS_COMPRESSED) {
- ntfs_error(vi->i_sb, "Found unknown "
- "compression method "
- "or corrupt file.");
- goto unm_err_out;
- }
- }
- if (a->flags & ATTR_IS_SPARSE)
- NInoSetSparse(ni);
- }
- if (a->flags & ATTR_IS_ENCRYPTED) {
- if (NInoCompressed(ni)) {
- ntfs_error(vi->i_sb, "Found encrypted and "
- "compressed data.");
- goto unm_err_out;
- }
- NInoSetEncrypted(ni);
- }
- if (a->non_resident) {
- NInoSetNonResident(ni);
- if (NInoCompressed(ni) || NInoSparse(ni)) {
- if (NInoCompressed(ni) && a->data.non_resident.
- compression_unit != 4) {
- ntfs_error(vi->i_sb, "Found "
- "non-standard "
- "compression unit (%u "
- "instead of 4). "
- "Cannot handle this.",
- a->data.non_resident.
- compression_unit);
- err = -EOPNOTSUPP;
- goto unm_err_out;
- }
- if (a->data.non_resident.compression_unit) {
- ni->itype.compressed.block_size = 1U <<
- (a->data.non_resident.
- compression_unit +
- vol->cluster_size_bits);
- ni->itype.compressed.block_size_bits =
- ffs(ni->itype.
- compressed.
- block_size) - 1;
- ni->itype.compressed.block_clusters =
- 1U << a->data.
- non_resident.
- compression_unit;
- } else {
- ni->itype.compressed.block_size = 0;
- ni->itype.compressed.block_size_bits =
- 0;
- ni->itype.compressed.block_clusters =
- 0;
- }
- ni->itype.compressed.size = sle64_to_cpu(
- a->data.non_resident.
- compressed_size);
- }
- if (a->data.non_resident.lowest_vcn) {
- ntfs_error(vi->i_sb, "First extent of $DATA "
- "attribute has non zero "
- "lowest_vcn.");
- goto unm_err_out;
- }
- vi->i_size = sle64_to_cpu(
- a->data.non_resident.data_size);
- ni->initialized_size = sle64_to_cpu(
- a->data.non_resident.initialized_size);
- ni->allocated_size = sle64_to_cpu(
- a->data.non_resident.allocated_size);
- } else { /* Resident attribute. */
- vi->i_size = ni->initialized_size = le32_to_cpu(
- a->data.resident.value_length);
- ni->allocated_size = le32_to_cpu(a->length) -
- le16_to_cpu(
- a->data.resident.value_offset);
- if (vi->i_size > ni->allocated_size) {
- ntfs_error(vi->i_sb, "Resident data attribute "
- "is corrupt (size exceeds "
- "allocation).");
- goto unm_err_out;
- }
- }
-no_data_attr_special_case:
- /* We are done with the mft record, so we release it. */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ni);
- m = NULL;
- ctx = NULL;
- /* Setup the operations for this inode. */
- vi->i_op = &ntfs_file_inode_ops;
- vi->i_fop = &ntfs_file_ops;
- vi->i_mapping->a_ops = &ntfs_normal_aops;
- if (NInoMstProtected(ni))
- vi->i_mapping->a_ops = &ntfs_mst_aops;
- else if (NInoCompressed(ni))
- vi->i_mapping->a_ops = &ntfs_compressed_aops;
- }
- /*
- * The number of 512-byte blocks used on disk (for stat). This is in so
- * far inaccurate as it doesn't account for any named streams or other
- * special non-resident attributes, but that is how Windows works, too,
- * so we are at least consistent with Windows, if not entirely
- * consistent with the Linux Way. Doing it the Linux Way would cause a
- * significant slowdown as it would involve iterating over all
- * attributes in the mft record and adding the allocated/compressed
- * sizes of all non-resident attributes present to give us the Linux
- * correct size that should go into i_blocks (after division by 512).
- */
- if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) || NInoSparse(ni)))
- vi->i_blocks = ni->itype.compressed.size >> 9;
- else
- vi->i_blocks = ni->allocated_size >> 9;
- ntfs_debug("Done.");
- return 0;
-iput_unm_err_out:
- iput(bvi);
-unm_err_out:
- if (!err)
- err = -EIO;
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(ni);
-err_out:
- ntfs_error(vol->sb, "Failed with error code %i. Marking corrupt "
- "inode 0x%lx as bad. Run chkdsk.", err, vi->i_ino);
- make_bad_inode(vi);
- if (err != -EOPNOTSUPP && err != -ENOMEM)
- NVolSetErrors(vol);
- return err;
-}
-
-/**
- * ntfs_read_locked_attr_inode - read an attribute inode from its base inode
- * @base_vi: base inode
- * @vi: attribute inode to read
- *
- * ntfs_read_locked_attr_inode() is called from ntfs_attr_iget() to read the
- * attribute inode described by @vi into memory from the base mft record
- * described by @base_ni.
- *
- * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for
- * reading and looks up the attribute described by @vi before setting up the
- * necessary fields in @vi as well as initializing the ntfs inode.
- *
- * Q: What locks are held when the function is called?
- * A: i_state has I_NEW set, hence the inode is locked, also
- * i_count is set to 1, so it is not going to go away
- *
- * Return 0 on success and -errno on error. In the error case, the inode will
- * have had make_bad_inode() executed on it.
- *
- * Note this cannot be called for AT_INDEX_ALLOCATION.
- */
-static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
-{
- ntfs_volume *vol = NTFS_SB(vi->i_sb);
- ntfs_inode *ni, *base_ni;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- ntfs_attr_search_ctx *ctx;
- int err = 0;
-
- ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
-
- ntfs_init_big_inode(vi);
-
- ni = NTFS_I(vi);
- base_ni = NTFS_I(base_vi);
-
- /* Just mirror the values from the base inode. */
- vi->i_uid = base_vi->i_uid;
- vi->i_gid = base_vi->i_gid;
- set_nlink(vi, base_vi->i_nlink);
- inode_set_mtime_to_ts(vi, inode_get_mtime(base_vi));
- inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
- inode_set_atime_to_ts(vi, inode_get_atime(base_vi));
- vi->i_generation = ni->seq_no = base_ni->seq_no;
-
- /* Set inode type to zero but preserve permissions. */
- vi->i_mode = base_vi->i_mode & ~S_IFMT;
-
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (!ctx) {
- err = -ENOMEM;
- goto unm_err_out;
- }
- /* Find the attribute. */
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err))
- goto unm_err_out;
- a = ctx->attr;
- if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- NInoSetCompressed(ni);
- if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
- ni->name_len)) {
- ntfs_error(vi->i_sb, "Found compressed "
- "non-data or named data "
- "attribute. Please report "
- "you saw this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net");
- goto unm_err_out;
- }
- if (vol->cluster_size > 4096) {
- ntfs_error(vi->i_sb, "Found compressed "
- "attribute but compression is "
- "disabled due to cluster size "
- "(%i) > 4kiB.",
- vol->cluster_size);
- goto unm_err_out;
- }
- if ((a->flags & ATTR_COMPRESSION_MASK) !=
- ATTR_IS_COMPRESSED) {
- ntfs_error(vi->i_sb, "Found unknown "
- "compression method.");
- goto unm_err_out;
- }
- }
- /*
- * The compressed/sparse flag set in an index root just means
- * to compress all files.
- */
- if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
- ntfs_error(vi->i_sb, "Found mst protected attribute "
- "but the attribute is %s. Please "
- "report you saw this message to "
- "linux-ntfs-dev@lists.sourceforge.net",
- NInoCompressed(ni) ? "compressed" :
- "sparse");
- goto unm_err_out;
- }
- if (a->flags & ATTR_IS_SPARSE)
- NInoSetSparse(ni);
- }
- if (a->flags & ATTR_IS_ENCRYPTED) {
- if (NInoCompressed(ni)) {
- ntfs_error(vi->i_sb, "Found encrypted and compressed "
- "data.");
- goto unm_err_out;
- }
- /*
- * The encryption flag set in an index root just means to
- * encrypt all files.
- */
- if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
- ntfs_error(vi->i_sb, "Found mst protected attribute "
- "but the attribute is encrypted. "
- "Please report you saw this message "
- "to linux-ntfs-dev@lists.sourceforge."
- "net");
- goto unm_err_out;
- }
- if (ni->type != AT_DATA) {
- ntfs_error(vi->i_sb, "Found encrypted non-data "
- "attribute.");
- goto unm_err_out;
- }
- NInoSetEncrypted(ni);
- }
- if (!a->non_resident) {
- /* Ensure the attribute name is placed before the value. */
- if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
- le16_to_cpu(a->data.resident.value_offset)))) {
- ntfs_error(vol->sb, "Attribute name is placed after "
- "the attribute value.");
- goto unm_err_out;
- }
- if (NInoMstProtected(ni)) {
- ntfs_error(vi->i_sb, "Found mst protected attribute "
- "but the attribute is resident. "
- "Please report you saw this message to "
- "linux-ntfs-dev@lists.sourceforge.net");
- goto unm_err_out;
- }
- vi->i_size = ni->initialized_size = le32_to_cpu(
- a->data.resident.value_length);
- ni->allocated_size = le32_to_cpu(a->length) -
- le16_to_cpu(a->data.resident.value_offset);
- if (vi->i_size > ni->allocated_size) {
- ntfs_error(vi->i_sb, "Resident attribute is corrupt "
- "(size exceeds allocation).");
- goto unm_err_out;
- }
- } else {
- NInoSetNonResident(ni);
- /*
- * Ensure the attribute name is placed before the mapping pairs
- * array.
- */
- if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
- le16_to_cpu(
- a->data.non_resident.mapping_pairs_offset)))) {
- ntfs_error(vol->sb, "Attribute name is placed after "
- "the mapping pairs array.");
- goto unm_err_out;
- }
- if (NInoCompressed(ni) || NInoSparse(ni)) {
- if (NInoCompressed(ni) && a->data.non_resident.
- compression_unit != 4) {
- ntfs_error(vi->i_sb, "Found non-standard "
- "compression unit (%u instead "
- "of 4). Cannot handle this.",
- a->data.non_resident.
- compression_unit);
- err = -EOPNOTSUPP;
- goto unm_err_out;
- }
- if (a->data.non_resident.compression_unit) {
- ni->itype.compressed.block_size = 1U <<
- (a->data.non_resident.
- compression_unit +
- vol->cluster_size_bits);
- ni->itype.compressed.block_size_bits =
- ffs(ni->itype.compressed.
- block_size) - 1;
- ni->itype.compressed.block_clusters = 1U <<
- a->data.non_resident.
- compression_unit;
- } else {
- ni->itype.compressed.block_size = 0;
- ni->itype.compressed.block_size_bits = 0;
- ni->itype.compressed.block_clusters = 0;
- }
- ni->itype.compressed.size = sle64_to_cpu(
- a->data.non_resident.compressed_size);
- }
- if (a->data.non_resident.lowest_vcn) {
- ntfs_error(vi->i_sb, "First extent of attribute has "
- "non-zero lowest_vcn.");
- goto unm_err_out;
- }
- vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
- ni->initialized_size = sle64_to_cpu(
- a->data.non_resident.initialized_size);
- ni->allocated_size = sle64_to_cpu(
- a->data.non_resident.allocated_size);
- }
- vi->i_mapping->a_ops = &ntfs_normal_aops;
- if (NInoMstProtected(ni))
- vi->i_mapping->a_ops = &ntfs_mst_aops;
- else if (NInoCompressed(ni))
- vi->i_mapping->a_ops = &ntfs_compressed_aops;
- if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
- vi->i_blocks = ni->itype.compressed.size >> 9;
- else
- vi->i_blocks = ni->allocated_size >> 9;
- /*
- * Make sure the base inode does not go away and attach it to the
- * attribute inode.
- */
- igrab(base_vi);
- ni->ext.base_ntfs_ino = base_ni;
- ni->nr_extents = -1;
-
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
-
- ntfs_debug("Done.");
- return 0;
-
-unm_err_out:
- if (!err)
- err = -EIO;
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
-err_out:
- ntfs_error(vol->sb, "Failed with error code %i while reading attribute "
- "inode (mft_no 0x%lx, type 0x%x, name_len %i). "
- "Marking corrupt inode and base inode 0x%lx as bad. "
- "Run chkdsk.", err, vi->i_ino, ni->type, ni->name_len,
- base_vi->i_ino);
- make_bad_inode(vi);
- if (err != -ENOMEM)
- NVolSetErrors(vol);
- return err;
-}
-
-/**
- * ntfs_read_locked_index_inode - read an index inode from its base inode
- * @base_vi: base inode
- * @vi: index inode to read
- *
- * ntfs_read_locked_index_inode() is called from ntfs_index_iget() to read the
- * index inode described by @vi into memory from the base mft record described
- * by @base_ni.
- *
- * ntfs_read_locked_index_inode() maps, pins and locks the base inode for
- * reading and looks up the attributes relating to the index described by @vi
- * before setting up the necessary fields in @vi as well as initializing the
- * ntfs inode.
- *
- * Note, index inodes are essentially attribute inodes (NInoAttr() is true)
- * with the attribute type set to AT_INDEX_ALLOCATION. Apart from that, they
- * are setup like directory inodes since directories are a special case of
- * indices ao they need to be treated in much the same way. Most importantly,
- * for small indices the index allocation attribute might not actually exist.
- * However, the index root attribute always exists but this does not need to
- * have an inode associated with it and this is why we define a new inode type
- * index. Also, like for directories, we need to have an attribute inode for
- * the bitmap attribute corresponding to the index allocation attribute and we
- * can store this in the appropriate field of the inode, just like we do for
- * normal directory inodes.
- *
- * Q: What locks are held when the function is called?
- * A: i_state has I_NEW set, hence the inode is locked, also
- * i_count is set to 1, so it is not going to go away
- *
- * Return 0 on success and -errno on error. In the error case, the inode will
- * have had make_bad_inode() executed on it.
- */
-static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
-{
- loff_t bvi_size;
- ntfs_volume *vol = NTFS_SB(vi->i_sb);
- ntfs_inode *ni, *base_ni, *bni;
- struct inode *bvi;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- ntfs_attr_search_ctx *ctx;
- INDEX_ROOT *ir;
- u8 *ir_end, *index_end;
- int err = 0;
-
- ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
- ntfs_init_big_inode(vi);
- ni = NTFS_I(vi);
- base_ni = NTFS_I(base_vi);
- /* Just mirror the values from the base inode. */
- vi->i_uid = base_vi->i_uid;
- vi->i_gid = base_vi->i_gid;
- set_nlink(vi, base_vi->i_nlink);
- inode_set_mtime_to_ts(vi, inode_get_mtime(base_vi));
- inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
- inode_set_atime_to_ts(vi, inode_get_atime(base_vi));
- vi->i_generation = ni->seq_no = base_ni->seq_no;
- /* Set inode type to zero but preserve permissions. */
- vi->i_mode = base_vi->i_mode & ~S_IFMT;
- /* Map the mft record for the base inode. */
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (!ctx) {
- err = -ENOMEM;
- goto unm_err_out;
- }
- /* Find the index root attribute. */
- err = ntfs_attr_lookup(AT_INDEX_ROOT, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
- "missing.");
- goto unm_err_out;
- }
- a = ctx->attr;
- /* Set up the state. */
- if (unlikely(a->non_resident)) {
- ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
- goto unm_err_out;
- }
- /* Ensure the attribute name is placed before the value. */
- if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
- le16_to_cpu(a->data.resident.value_offset)))) {
- ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed "
- "after the attribute value.");
- goto unm_err_out;
- }
- /*
- * Compressed/encrypted/sparse index root is not allowed, except for
- * directories of course but those are not dealt with here.
- */
- if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
- ATTR_IS_SPARSE)) {
- ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
- "root attribute.");
- goto unm_err_out;
- }
- ir = (INDEX_ROOT*)((u8*)a + le16_to_cpu(a->data.resident.value_offset));
- ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
- if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
- ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt.");
- goto unm_err_out;
- }
- index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
- if (index_end > ir_end) {
- ntfs_error(vi->i_sb, "Index is corrupt.");
- goto unm_err_out;
- }
- if (ir->type) {
- ntfs_error(vi->i_sb, "Index type is not 0 (type is 0x%x).",
- le32_to_cpu(ir->type));
- goto unm_err_out;
- }
- ni->itype.index.collation_rule = ir->collation_rule;
- ntfs_debug("Index collation rule is 0x%x.",
- le32_to_cpu(ir->collation_rule));
- ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
- if (!is_power_of_2(ni->itype.index.block_size)) {
- ntfs_error(vi->i_sb, "Index block size (%u) is not a power of "
- "two.", ni->itype.index.block_size);
- goto unm_err_out;
- }
- if (ni->itype.index.block_size > PAGE_SIZE) {
- ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_SIZE "
- "(%ld) is not supported. Sorry.",
- ni->itype.index.block_size, PAGE_SIZE);
- err = -EOPNOTSUPP;
- goto unm_err_out;
- }
- if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
- ntfs_error(vi->i_sb, "Index block size (%u) < NTFS_BLOCK_SIZE "
- "(%i) is not supported. Sorry.",
- ni->itype.index.block_size, NTFS_BLOCK_SIZE);
- err = -EOPNOTSUPP;
- goto unm_err_out;
- }
- ni->itype.index.block_size_bits = ffs(ni->itype.index.block_size) - 1;
- /* Determine the size of a vcn in the index. */
- if (vol->cluster_size <= ni->itype.index.block_size) {
- ni->itype.index.vcn_size = vol->cluster_size;
- ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
- } else {
- ni->itype.index.vcn_size = vol->sector_size;
- ni->itype.index.vcn_size_bits = vol->sector_size_bits;
- }
- /* Check for presence of index allocation attribute. */
- if (!(ir->index.flags & LARGE_INDEX)) {
- /* No index allocation. */
- vi->i_size = ni->initialized_size = ni->allocated_size = 0;
- /* We are done with the mft record, so we release it. */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- m = NULL;
- ctx = NULL;
- goto skip_large_index_stuff;
- } /* LARGE_INDEX: Index allocation present. Setup state. */
- NInoSetIndexAllocPresent(ni);
- /* Find index allocation attribute. */
- ntfs_attr_reinit_search_ctx(ctx);
- err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
- "not present but $INDEX_ROOT "
- "indicated it is.");
- else
- ntfs_error(vi->i_sb, "Failed to lookup "
- "$INDEX_ALLOCATION attribute.");
- goto unm_err_out;
- }
- a = ctx->attr;
- if (!a->non_resident) {
- ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
- "resident.");
- goto unm_err_out;
- }
- /*
- * Ensure the attribute name is placed before the mapping pairs array.
- */
- if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
- le16_to_cpu(
- a->data.non_resident.mapping_pairs_offset)))) {
- ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is "
- "placed after the mapping pairs array.");
- goto unm_err_out;
- }
- if (a->flags & ATTR_IS_ENCRYPTED) {
- ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
- "encrypted.");
- goto unm_err_out;
- }
- if (a->flags & ATTR_IS_SPARSE) {
- ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
- goto unm_err_out;
- }
- if (a->flags & ATTR_COMPRESSION_MASK) {
- ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
- "compressed.");
- goto unm_err_out;
- }
- if (a->data.non_resident.lowest_vcn) {
- ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION "
- "attribute has non zero lowest_vcn.");
- goto unm_err_out;
- }
- vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
- ni->initialized_size = sle64_to_cpu(
- a->data.non_resident.initialized_size);
- ni->allocated_size = sle64_to_cpu(a->data.non_resident.allocated_size);
- /*
- * We are done with the mft record, so we release it. Otherwise
- * we would deadlock in ntfs_attr_iget().
- */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- m = NULL;
- ctx = NULL;
- /* Get the index bitmap attribute inode. */
- bvi = ntfs_attr_iget(base_vi, AT_BITMAP, ni->name, ni->name_len);
- if (IS_ERR(bvi)) {
- ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
- err = PTR_ERR(bvi);
- goto unm_err_out;
- }
- bni = NTFS_I(bvi);
- if (NInoCompressed(bni) || NInoEncrypted(bni) ||
- NInoSparse(bni)) {
- ntfs_error(vi->i_sb, "$BITMAP attribute is compressed and/or "
- "encrypted and/or sparse.");
- goto iput_unm_err_out;
- }
- /* Consistency check bitmap size vs. index allocation size. */
- bvi_size = i_size_read(bvi);
- if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) {
- ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for "
- "index allocation (0x%llx).", bvi_size << 3,
- vi->i_size);
- goto iput_unm_err_out;
- }
- iput(bvi);
-skip_large_index_stuff:
- /* Setup the operations for this index inode. */
- vi->i_mapping->a_ops = &ntfs_mst_aops;
- vi->i_blocks = ni->allocated_size >> 9;
- /*
- * Make sure the base inode doesn't go away and attach it to the
- * index inode.
- */
- igrab(base_vi);
- ni->ext.base_ntfs_ino = base_ni;
- ni->nr_extents = -1;
-
- ntfs_debug("Done.");
- return 0;
-iput_unm_err_out:
- iput(bvi);
-unm_err_out:
- if (!err)
- err = -EIO;
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
-err_out:
- ntfs_error(vi->i_sb, "Failed with error code %i while reading index "
- "inode (mft_no 0x%lx, name_len %i.", err, vi->i_ino,
- ni->name_len);
- make_bad_inode(vi);
- if (err != -EOPNOTSUPP && err != -ENOMEM)
- NVolSetErrors(vol);
- return err;
-}
-
-/*
- * The MFT inode has special locking, so teach the lock validator
- * about this by splitting off the locking rules of the MFT from
- * the locking rules of other inodes. The MFT inode can never be
- * accessed from the VFS side (or even internally), only by the
- * map_mft functions.
- */
-static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
-
-/**
- * ntfs_read_inode_mount - special read_inode for mount time use only
- * @vi: inode to read
- *
- * Read inode FILE_MFT at mount time, only called with super_block lock
- * held from within the read_super() code path.
- *
- * This function exists because when it is called the page cache for $MFT/$DATA
- * is not initialized and hence we cannot get at the contents of mft records
- * by calling map_mft_record*().
- *
- * Further it needs to cope with the circular references problem, i.e. cannot
- * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because
- * we do not know where the other extent mft records are yet and again, because
- * we cannot call map_mft_record*() yet. Obviously this applies only when an
- * attribute list is actually present in $MFT inode.
- *
- * We solve these problems by starting with the $DATA attribute before anything
- * else and iterating using ntfs_attr_lookup($DATA) over all extents. As each
- * extent is found, we ntfs_mapping_pairs_decompress() including the implied
- * ntfs_runlists_merge(). Each step of the iteration necessarily provides
- * sufficient information for the next step to complete.
- *
- * This should work but there are two possible pit falls (see inline comments
- * below), but only time will tell if they are real pits or just smoke...
- */
-int ntfs_read_inode_mount(struct inode *vi)
-{
- VCN next_vcn, last_vcn, highest_vcn;
- s64 block;
- struct super_block *sb = vi->i_sb;
- ntfs_volume *vol = NTFS_SB(sb);
- struct buffer_head *bh;
- ntfs_inode *ni;
- MFT_RECORD *m = NULL;
- ATTR_RECORD *a;
- ntfs_attr_search_ctx *ctx;
- unsigned int i, nr_blocks;
- int err;
-
- ntfs_debug("Entering.");
-
- /* Initialize the ntfs specific part of @vi. */
- ntfs_init_big_inode(vi);
-
- ni = NTFS_I(vi);
-
- /* Setup the data attribute. It is special as it is mst protected. */
- NInoSetNonResident(ni);
- NInoSetMstProtected(ni);
- NInoSetSparseDisabled(ni);
- ni->type = AT_DATA;
- ni->name = NULL;
- ni->name_len = 0;
- /*
- * This sets up our little cheat allowing us to reuse the async read io
- * completion handler for directories.
- */
- ni->itype.index.block_size = vol->mft_record_size;
- ni->itype.index.block_size_bits = vol->mft_record_size_bits;
-
- /* Very important! Needed to be able to call map_mft_record*(). */
- vol->mft_ino = vi;
-
- /* Allocate enough memory to read the first mft record. */
- if (vol->mft_record_size > 64 * 1024) {
- ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).",
- vol->mft_record_size);
- goto err_out;
- }
- i = vol->mft_record_size;
- if (i < sb->s_blocksize)
- i = sb->s_blocksize;
- m = (MFT_RECORD*)ntfs_malloc_nofs(i);
- if (!m) {
- ntfs_error(sb, "Failed to allocate buffer for $MFT record 0.");
- goto err_out;
- }
-
- /* Determine the first block of the $MFT/$DATA attribute. */
- block = vol->mft_lcn << vol->cluster_size_bits >>
- sb->s_blocksize_bits;
- nr_blocks = vol->mft_record_size >> sb->s_blocksize_bits;
- if (!nr_blocks)
- nr_blocks = 1;
-
- /* Load $MFT/$DATA's first mft record. */
- for (i = 0; i < nr_blocks; i++) {
- bh = sb_bread(sb, block++);
- if (!bh) {
- ntfs_error(sb, "Device read failed.");
- goto err_out;
- }
- memcpy((char*)m + (i << sb->s_blocksize_bits), bh->b_data,
- sb->s_blocksize);
- brelse(bh);
- }
-
- if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
- ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
- le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
- goto err_out;
- }
-
- /* Apply the mst fixups. */
- if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) {
- /* FIXME: Try to use the $MFTMirr now. */
- ntfs_error(sb, "MST fixup failed. $MFT is corrupt.");
- goto err_out;
- }
-
- /* Sanity check offset to the first attribute */
- if (le16_to_cpu(m->attrs_offset) >= le32_to_cpu(m->bytes_allocated)) {
- ntfs_error(sb, "Incorrect mft offset to the first attribute %u in superblock.",
- le16_to_cpu(m->attrs_offset));
- goto err_out;
- }
-
- /* Need this to sanity check attribute list references to $MFT. */
- vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
-
- /* Provides read_folio() for map_mft_record(). */
- vi->i_mapping->a_ops = &ntfs_mst_aops;
-
- ctx = ntfs_attr_get_search_ctx(ni, m);
- if (!ctx) {
- err = -ENOMEM;
- goto err_out;
- }
-
- /* Find the attribute list attribute if present. */
- err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
- if (err) {
- if (unlikely(err != -ENOENT)) {
- ntfs_error(sb, "Failed to lookup attribute list "
- "attribute. You should run chkdsk.");
- goto put_err_out;
- }
- } else /* if (!err) */ {
- ATTR_LIST_ENTRY *al_entry, *next_al_entry;
- u8 *al_end;
- static const char *es = " Not allowed. $MFT is corrupt. "
- "You should run chkdsk.";
-
- ntfs_debug("Attribute list attribute found in $MFT.");
- NInoSetAttrList(ni);
- a = ctx->attr;
- if (a->flags & ATTR_COMPRESSION_MASK) {
- ntfs_error(sb, "Attribute list attribute is "
- "compressed.%s", es);
- goto put_err_out;
- }
- if (a->flags & ATTR_IS_ENCRYPTED ||
- a->flags & ATTR_IS_SPARSE) {
- if (a->non_resident) {
- ntfs_error(sb, "Non-resident attribute list "
- "attribute is encrypted/"
- "sparse.%s", es);
- goto put_err_out;
- }
- ntfs_warning(sb, "Resident attribute list attribute "
- "in $MFT system file is marked "
- "encrypted/sparse which is not true. "
- "However, Windows allows this and "
- "chkdsk does not detect or correct it "
- "so we will just ignore the invalid "
- "flags and pretend they are not set.");
- }
- /* Now allocate memory for the attribute list. */
- ni->attr_list_size = (u32)ntfs_attr_size(a);
- if (!ni->attr_list_size) {
- ntfs_error(sb, "Attr_list_size is zero");
- goto put_err_out;
- }
- ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
- if (!ni->attr_list) {
- ntfs_error(sb, "Not enough memory to allocate buffer "
- "for attribute list.");
- goto put_err_out;
- }
- if (a->non_resident) {
- NInoSetAttrListNonResident(ni);
- if (a->data.non_resident.lowest_vcn) {
- ntfs_error(sb, "Attribute list has non zero "
- "lowest_vcn. $MFT is corrupt. "
- "You should run chkdsk.");
- goto put_err_out;
- }
- /* Setup the runlist. */
- ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
- a, NULL);
- if (IS_ERR(ni->attr_list_rl.rl)) {
- err = PTR_ERR(ni->attr_list_rl.rl);
- ni->attr_list_rl.rl = NULL;
- ntfs_error(sb, "Mapping pairs decompression "
- "failed with error code %i.",
- -err);
- goto put_err_out;
- }
- /* Now load the attribute list. */
- if ((err = load_attribute_list(vol, &ni->attr_list_rl,
- ni->attr_list, ni->attr_list_size,
- sle64_to_cpu(a->data.
- non_resident.initialized_size)))) {
- ntfs_error(sb, "Failed to load attribute list "
- "attribute with error code %i.",
- -err);
- goto put_err_out;
- }
- } else /* if (!ctx.attr->non_resident) */ {
- if ((u8*)a + le16_to_cpu(
- a->data.resident.value_offset) +
- le32_to_cpu(
- a->data.resident.value_length) >
- (u8*)ctx->mrec + vol->mft_record_size) {
- ntfs_error(sb, "Corrupt attribute list "
- "attribute.");
- goto put_err_out;
- }
- /* Now copy the attribute list. */
- memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
- a->data.resident.value_offset),
- le32_to_cpu(
- a->data.resident.value_length));
- }
- /* The attribute list is now setup in memory. */
- /*
- * FIXME: I don't know if this case is actually possible.
- * According to logic it is not possible but I have seen too
- * many weird things in MS software to rely on logic... Thus we
- * perform a manual search and make sure the first $MFT/$DATA
- * extent is in the base inode. If it is not we abort with an
- * error and if we ever see a report of this error we will need
- * to do some magic in order to have the necessary mft record
- * loaded and in the right place in the page cache. But
- * hopefully logic will prevail and this never happens...
- */
- al_entry = (ATTR_LIST_ENTRY*)ni->attr_list;
- al_end = (u8*)al_entry + ni->attr_list_size;
- for (;; al_entry = next_al_entry) {
- /* Out of bounds check. */
- if ((u8*)al_entry < ni->attr_list ||
- (u8*)al_entry > al_end)
- goto em_put_err_out;
- /* Catch the end of the attribute list. */
- if ((u8*)al_entry == al_end)
- goto em_put_err_out;
- if (!al_entry->length)
- goto em_put_err_out;
- if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
- le16_to_cpu(al_entry->length) > al_end)
- goto em_put_err_out;
- next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
- le16_to_cpu(al_entry->length));
- if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
- goto em_put_err_out;
- if (AT_DATA != al_entry->type)
- continue;
- /* We want an unnamed attribute. */
- if (al_entry->name_length)
- goto em_put_err_out;
- /* Want the first entry, i.e. lowest_vcn == 0. */
- if (al_entry->lowest_vcn)
- goto em_put_err_out;
- /* First entry has to be in the base mft record. */
- if (MREF_LE(al_entry->mft_reference) != vi->i_ino) {
- /* MFT references do not match, logic fails. */
- ntfs_error(sb, "BUG: The first $DATA extent "
- "of $MFT is not in the base "
- "mft record. Please report "
- "you saw this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net");
- goto put_err_out;
- } else {
- /* Sequence numbers must match. */
- if (MSEQNO_LE(al_entry->mft_reference) !=
- ni->seq_no)
- goto em_put_err_out;
- /* Got it. All is ok. We can stop now. */
- break;
- }
- }
- }
-
- ntfs_attr_reinit_search_ctx(ctx);
-
- /* Now load all attribute extents. */
- a = NULL;
- next_vcn = last_vcn = highest_vcn = 0;
- while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0,
- ctx))) {
- runlist_element *nrl;
-
- /* Cache the current attribute. */
- a = ctx->attr;
- /* $MFT must be non-resident. */
- if (!a->non_resident) {
- ntfs_error(sb, "$MFT must be non-resident but a "
- "resident extent was found. $MFT is "
- "corrupt. Run chkdsk.");
- goto put_err_out;
- }
- /* $MFT must be uncompressed and unencrypted. */
- if (a->flags & ATTR_COMPRESSION_MASK ||
- a->flags & ATTR_IS_ENCRYPTED ||
- a->flags & ATTR_IS_SPARSE) {
- ntfs_error(sb, "$MFT must be uncompressed, "
- "non-sparse, and unencrypted but a "
- "compressed/sparse/encrypted extent "
- "was found. $MFT is corrupt. Run "
- "chkdsk.");
- goto put_err_out;
- }
- /*
- * Decompress the mapping pairs array of this extent and merge
- * the result into the existing runlist. No need for locking
- * as we have exclusive access to the inode at this time and we
- * are a mount in progress task, too.
- */
- nrl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
- if (IS_ERR(nrl)) {
- ntfs_error(sb, "ntfs_mapping_pairs_decompress() "
- "failed with error code %ld. $MFT is "
- "corrupt.", PTR_ERR(nrl));
- goto put_err_out;
- }
- ni->runlist.rl = nrl;
-
- /* Are we in the first extent? */
- if (!next_vcn) {
- if (a->data.non_resident.lowest_vcn) {
- ntfs_error(sb, "First extent of $DATA "
- "attribute has non zero "
- "lowest_vcn. $MFT is corrupt. "
- "You should run chkdsk.");
- goto put_err_out;
- }
- /* Get the last vcn in the $DATA attribute. */
- last_vcn = sle64_to_cpu(
- a->data.non_resident.allocated_size)
- >> vol->cluster_size_bits;
- /* Fill in the inode size. */
- vi->i_size = sle64_to_cpu(
- a->data.non_resident.data_size);
- ni->initialized_size = sle64_to_cpu(
- a->data.non_resident.initialized_size);
- ni->allocated_size = sle64_to_cpu(
- a->data.non_resident.allocated_size);
- /*
- * Verify the number of mft records does not exceed
- * 2^32 - 1.
- */
- if ((vi->i_size >> vol->mft_record_size_bits) >=
- (1ULL << 32)) {
- ntfs_error(sb, "$MFT is too big! Aborting.");
- goto put_err_out;
- }
- /*
- * We have got the first extent of the runlist for
- * $MFT which means it is now relatively safe to call
- * the normal ntfs_read_inode() function.
- * Complete reading the inode, this will actually
- * re-read the mft record for $MFT, this time entering
- * it into the page cache with which we complete the
- * kick start of the volume. It should be safe to do
- * this now as the first extent of $MFT/$DATA is
- * already known and we would hope that we don't need
- * further extents in order to find the other
- * attributes belonging to $MFT. Only time will tell if
- * this is really the case. If not we will have to play
- * magic at this point, possibly duplicating a lot of
- * ntfs_read_inode() at this point. We will need to
- * ensure we do enough of its work to be able to call
- * ntfs_read_inode() on extents of $MFT/$DATA. But lets
- * hope this never happens...
- */
- ntfs_read_locked_inode(vi);
- if (is_bad_inode(vi)) {
- ntfs_error(sb, "ntfs_read_inode() of $MFT "
- "failed. BUG or corrupt $MFT. "
- "Run chkdsk and if no errors "
- "are found, please report you "
- "saw this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net");
- ntfs_attr_put_search_ctx(ctx);
- /* Revert to the safe super operations. */
- ntfs_free(m);
- return -1;
- }
- /*
- * Re-initialize some specifics about $MFT's inode as
- * ntfs_read_inode() will have set up the default ones.
- */
- /* Set uid and gid to root. */
- vi->i_uid = GLOBAL_ROOT_UID;
- vi->i_gid = GLOBAL_ROOT_GID;
- /* Regular file. No access for anyone. */
- vi->i_mode = S_IFREG;
- /* No VFS initiated operations allowed for $MFT. */
- vi->i_op = &ntfs_empty_inode_ops;
- vi->i_fop = &ntfs_empty_file_ops;
- }
-
- /* Get the lowest vcn for the next extent. */
- highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
- next_vcn = highest_vcn + 1;
-
- /* Only one extent or error, which we catch below. */
- if (next_vcn <= 0)
- break;
-
- /* Avoid endless loops due to corruption. */
- if (next_vcn < sle64_to_cpu(
- a->data.non_resident.lowest_vcn)) {
- ntfs_error(sb, "$MFT has corrupt attribute list "
- "attribute. Run chkdsk.");
- goto put_err_out;
- }
- }
- if (err != -ENOENT) {
- ntfs_error(sb, "Failed to lookup $MFT/$DATA attribute extent. "
- "$MFT is corrupt. Run chkdsk.");
- goto put_err_out;
- }
- if (!a) {
- ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is "
- "corrupt. Run chkdsk.");
- goto put_err_out;
- }
- if (highest_vcn && highest_vcn != last_vcn - 1) {
- ntfs_error(sb, "Failed to load the complete runlist for "
- "$MFT/$DATA. Driver bug or corrupt $MFT. "
- "Run chkdsk.");
- ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx",
- (unsigned long long)highest_vcn,
- (unsigned long long)last_vcn - 1);
- goto put_err_out;
- }
- ntfs_attr_put_search_ctx(ctx);
- ntfs_debug("Done.");
- ntfs_free(m);
-
- /*
- * Split the locking rules of the MFT inode from the
- * locking rules of other inodes:
- */
- lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
- lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
-
- return 0;
-
-em_put_err_out:
- ntfs_error(sb, "Couldn't find first extent of $DATA attribute in "
- "attribute list. $MFT is corrupt. Run chkdsk.");
-put_err_out:
- ntfs_attr_put_search_ctx(ctx);
-err_out:
- ntfs_error(sb, "Failed. Marking inode as bad.");
- make_bad_inode(vi);
- ntfs_free(m);
- return -1;
-}
-
-static void __ntfs_clear_inode(ntfs_inode *ni)
-{
- /* Free all alocated memory. */
- down_write(&ni->runlist.lock);
- if (ni->runlist.rl) {
- ntfs_free(ni->runlist.rl);
- ni->runlist.rl = NULL;
- }
- up_write(&ni->runlist.lock);
-
- if (ni->attr_list) {
- ntfs_free(ni->attr_list);
- ni->attr_list = NULL;
- }
-
- down_write(&ni->attr_list_rl.lock);
- if (ni->attr_list_rl.rl) {
- ntfs_free(ni->attr_list_rl.rl);
- ni->attr_list_rl.rl = NULL;
- }
- up_write(&ni->attr_list_rl.lock);
-
- if (ni->name_len && ni->name != I30) {
- /* Catch bugs... */
- BUG_ON(!ni->name);
- kfree(ni->name);
- }
-}
-
-void ntfs_clear_extent_inode(ntfs_inode *ni)
-{
- ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
-
- BUG_ON(NInoAttr(ni));
- BUG_ON(ni->nr_extents != -1);
-
-#ifdef NTFS_RW
- if (NInoDirty(ni)) {
- if (!is_bad_inode(VFS_I(ni->ext.base_ntfs_ino)))
- ntfs_error(ni->vol->sb, "Clearing dirty extent inode! "
- "Losing data! This is a BUG!!!");
- // FIXME: Do something!!!
- }
-#endif /* NTFS_RW */
-
- __ntfs_clear_inode(ni);
-
- /* Bye, bye... */
- ntfs_destroy_extent_inode(ni);
-}
-
-/**
- * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
- * @vi: vfs inode pending annihilation
- *
- * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
- * is called, which deallocates all memory belonging to the NTFS specific part
- * of the inode and returns.
- *
- * If the MFT record is dirty, we commit it before doing anything else.
- */
-void ntfs_evict_big_inode(struct inode *vi)
-{
- ntfs_inode *ni = NTFS_I(vi);
-
- truncate_inode_pages_final(&vi->i_data);
- clear_inode(vi);
-
-#ifdef NTFS_RW
- if (NInoDirty(ni)) {
- bool was_bad = (is_bad_inode(vi));
-
- /* Committing the inode also commits all extent inodes. */
- ntfs_commit_inode(vi);
-
- if (!was_bad && (is_bad_inode(vi) || NInoDirty(ni))) {
- ntfs_error(vi->i_sb, "Failed to commit dirty inode "
- "0x%lx. Losing data!", vi->i_ino);
- // FIXME: Do something!!!
- }
- }
-#endif /* NTFS_RW */
-
- /* No need to lock at this stage as no one else has a reference. */
- if (ni->nr_extents > 0) {
- int i;
-
- for (i = 0; i < ni->nr_extents; i++)
- ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]);
- kfree(ni->ext.extent_ntfs_inos);
- }
-
- __ntfs_clear_inode(ni);
-
- if (NInoAttr(ni)) {
- /* Release the base inode if we are holding it. */
- if (ni->nr_extents == -1) {
- iput(VFS_I(ni->ext.base_ntfs_ino));
- ni->nr_extents = 0;
- ni->ext.base_ntfs_ino = NULL;
- }
- }
- BUG_ON(ni->page);
- if (!atomic_dec_and_test(&ni->count))
- BUG();
- return;
-}
-
-/**
- * ntfs_show_options - show mount options in /proc/mounts
- * @sf: seq_file in which to write our mount options
- * @root: root of the mounted tree whose mount options to display
- *
- * Called by the VFS once for each mounted ntfs volume when someone reads
- * /proc/mounts in order to display the NTFS specific mount options of each
- * mount. The mount options of fs specified by @root are written to the seq file
- * @sf and success is returned.
- */
-int ntfs_show_options(struct seq_file *sf, struct dentry *root)
-{
- ntfs_volume *vol = NTFS_SB(root->d_sb);
- int i;
-
- seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid));
- seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid));
- if (vol->fmask == vol->dmask)
- seq_printf(sf, ",umask=0%o", vol->fmask);
- else {
- seq_printf(sf, ",fmask=0%o", vol->fmask);
- seq_printf(sf, ",dmask=0%o", vol->dmask);
- }
- seq_printf(sf, ",nls=%s", vol->nls_map->charset);
- if (NVolCaseSensitive(vol))
- seq_printf(sf, ",case_sensitive");
- if (NVolShowSystemFiles(vol))
- seq_printf(sf, ",show_sys_files");
- if (!NVolSparseEnabled(vol))
- seq_printf(sf, ",disable_sparse");
- for (i = 0; on_errors_arr[i].val; i++) {
- if (on_errors_arr[i].val & vol->on_errors)
- seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
- }
- seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier);
- return 0;
-}
-
-#ifdef NTFS_RW
-
-static const char *es = " Leaving inconsistent metadata. Unmount and run "
- "chkdsk.";
-
-/**
- * ntfs_truncate - called when the i_size of an ntfs inode is changed
- * @vi: inode for which the i_size was changed
- *
- * We only support i_size changes for normal files at present, i.e. not
- * compressed and not encrypted. This is enforced in ntfs_setattr(), see
- * below.
- *
- * The kernel guarantees that @vi is a regular file (S_ISREG() is true) and
- * that the change is allowed.
- *
- * This implies for us that @vi is a file inode rather than a directory, index,
- * or attribute inode as well as that @vi is a base inode.
- *
- * Returns 0 on success or -errno on error.
- *
- * Called with ->i_mutex held.
- */
-int ntfs_truncate(struct inode *vi)
-{
- s64 new_size, old_size, nr_freed, new_alloc_size, old_alloc_size;
- VCN highest_vcn;
- unsigned long flags;
- ntfs_inode *base_ni, *ni = NTFS_I(vi);
- ntfs_volume *vol = ni->vol;
- ntfs_attr_search_ctx *ctx;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- const char *te = " Leaving file length out of sync with i_size.";
- int err, mp_size, size_change, alloc_change;
-
- ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
- BUG_ON(NInoAttr(ni));
- BUG_ON(S_ISDIR(vi->i_mode));
- BUG_ON(NInoMstProtected(ni));
- BUG_ON(ni->nr_extents < 0);
-retry_truncate:
- /*
- * Lock the runlist for writing and map the mft record to ensure it is
- * safe to mess with the attribute runlist and sizes.
- */
- down_write(&ni->runlist.lock);
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx "
- "(error code %d).%s", vi->i_ino, err, te);
- ctx = NULL;
- m = NULL;
- goto old_bad_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- ntfs_error(vi->i_sb, "Failed to allocate a search context for "
- "inode 0x%lx (not enough memory).%s",
- vi->i_ino, te);
- err = -ENOMEM;
- goto old_bad_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT) {
- ntfs_error(vi->i_sb, "Open attribute is missing from "
- "mft record. Inode 0x%lx is corrupt. "
- "Run chkdsk.%s", vi->i_ino, te);
- err = -EIO;
- } else
- ntfs_error(vi->i_sb, "Failed to lookup attribute in "
- "inode 0x%lx (error code %d).%s",
- vi->i_ino, err, te);
- goto old_bad_out;
- }
- m = ctx->mrec;
- a = ctx->attr;
- /*
- * The i_size of the vfs inode is the new size for the attribute value.
- */
- new_size = i_size_read(vi);
- /* The current size of the attribute value is the old size. */
- old_size = ntfs_attr_size(a);
- /* Calculate the new allocated size. */
- if (NInoNonResident(ni))
- new_alloc_size = (new_size + vol->cluster_size - 1) &
- ~(s64)vol->cluster_size_mask;
- else
- new_alloc_size = (new_size + 7) & ~7;
- /* The current allocated size is the old allocated size. */
- read_lock_irqsave(&ni->size_lock, flags);
- old_alloc_size = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- /*
- * The change in the file size. This will be 0 if no change, >0 if the
- * size is growing, and <0 if the size is shrinking.
- */
- size_change = -1;
- if (new_size - old_size >= 0) {
- size_change = 1;
- if (new_size == old_size)
- size_change = 0;
- }
- /* As above for the allocated size. */
- alloc_change = -1;
- if (new_alloc_size - old_alloc_size >= 0) {
- alloc_change = 1;
- if (new_alloc_size == old_alloc_size)
- alloc_change = 0;
- }
- /*
- * If neither the size nor the allocation are being changed there is
- * nothing to do.
- */
- if (!size_change && !alloc_change)
- goto unm_done;
- /* If the size is changing, check if new size is allowed in $AttrDef. */
- if (size_change) {
- err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
- if (unlikely(err)) {
- if (err == -ERANGE) {
- ntfs_error(vol->sb, "Truncate would cause the "
- "inode 0x%lx to %simum size "
- "for its attribute type "
- "(0x%x). Aborting truncate.",
- vi->i_ino,
- new_size > old_size ? "exceed "
- "the max" : "go under the min",
- le32_to_cpu(ni->type));
- err = -EFBIG;
- } else {
- ntfs_error(vol->sb, "Inode 0x%lx has unknown "
- "attribute type 0x%x. "
- "Aborting truncate.",
- vi->i_ino,
- le32_to_cpu(ni->type));
- err = -EIO;
- }
- /* Reset the vfs inode size to the old size. */
- i_size_write(vi, old_size);
- goto err_out;
- }
- }
- if (NInoCompressed(ni) || NInoEncrypted(ni)) {
- ntfs_warning(vi->i_sb, "Changes in inode size are not "
- "supported yet for %s files, ignoring.",
- NInoCompressed(ni) ? "compressed" :
- "encrypted");
- err = -EOPNOTSUPP;
- goto bad_out;
- }
- if (a->non_resident)
- goto do_non_resident_truncate;
- BUG_ON(NInoNonResident(ni));
- /* Resize the attribute record to best fit the new attribute size. */
- if (new_size < vol->mft_record_size &&
- !ntfs_resident_attr_value_resize(m, a, new_size)) {
- /* The resize succeeded! */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- write_lock_irqsave(&ni->size_lock, flags);
- /* Update the sizes in the ntfs inode and all is done. */
- ni->allocated_size = le32_to_cpu(a->length) -
- le16_to_cpu(a->data.resident.value_offset);
- /*
- * Note ntfs_resident_attr_value_resize() has already done any
- * necessary data clearing in the attribute record. When the
- * file is being shrunk vmtruncate() will already have cleared
- * the top part of the last partial page, i.e. since this is
- * the resident case this is the page with index 0. However,
- * when the file is being expanded, the page cache page data
- * between the old data_size, i.e. old_size, and the new_size
- * has not been zeroed. Fortunately, we do not need to zero it
- * either since on one hand it will either already be zero due
- * to both read_folio and writepage clearing partial page data
- * beyond i_size in which case there is nothing to do or in the
- * case of the file being mmap()ped at the same time, POSIX
- * specifies that the behaviour is unspecified thus we do not
- * have to do anything. This means that in our implementation
- * in the rare case that the file is mmap()ped and a write
- * occurred into the mmap()ped region just beyond the file size
- * and writepage has not yet been called to write out the page
- * (which would clear the area beyond the file size) and we now
- * extend the file size to incorporate this dirty region
- * outside the file size, a write of the page would result in
- * this data being written to disk instead of being cleared.
- * Given both POSIX and the Linux mmap(2) man page specify that
- * this corner case is undefined, we choose to leave it like
- * that as this is much simpler for us as we cannot lock the
- * relevant page now since we are holding too many ntfs locks
- * which would result in a lock reversal deadlock.
- */
- ni->initialized_size = new_size;
- write_unlock_irqrestore(&ni->size_lock, flags);
- goto unm_done;
- }
- /* If the above resize failed, this must be an attribute extension. */
- BUG_ON(size_change < 0);
- /*
- * We have to drop all the locks so we can call
- * ntfs_attr_make_non_resident(). This could be optimised by try-
- * locking the first page cache page and only if that fails dropping
- * the locks, locking the page, and redoing all the locking and
- * lookups. While this would be a huge optimisation, it is not worth
- * it as this is definitely a slow code path as it only ever can happen
- * once for any given file.
- */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- up_write(&ni->runlist.lock);
- /*
- * Not enough space in the mft record, try to make the attribute
- * non-resident and if successful restart the truncation process.
- */
- err = ntfs_attr_make_non_resident(ni, old_size);
- if (likely(!err))
- goto retry_truncate;
- /*
- * Could not make non-resident. If this is due to this not being
- * permitted for this attribute type or there not being enough space,
- * try to make other attributes non-resident. Otherwise fail.
- */
- if (unlikely(err != -EPERM && err != -ENOSPC)) {
- ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, attribute "
- "type 0x%x, because the conversion from "
- "resident to non-resident attribute failed "
- "with error code %i.", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- if (err != -ENOMEM)
- err = -EIO;
- goto conv_err_out;
- }
- /* TODO: Not implemented from here, abort. */
- if (err == -ENOSPC)
- ntfs_error(vol->sb, "Not enough space in the mft record/on "
- "disk for the non-resident attribute value. "
- "This case is not implemented yet.");
- else /* if (err == -EPERM) */
- ntfs_error(vol->sb, "This attribute type may not be "
- "non-resident. This case is not implemented "
- "yet.");
- err = -EOPNOTSUPP;
- goto conv_err_out;
-#if 0
- // TODO: Attempt to make other attributes non-resident.
- if (!err)
- goto do_resident_extend;
- /*
- * Both the attribute list attribute and the standard information
- * attribute must remain in the base inode. Thus, if this is one of
- * these attributes, we have to try to move other attributes out into
- * extent mft records instead.
- */
- if (ni->type == AT_ATTRIBUTE_LIST ||
- ni->type == AT_STANDARD_INFORMATION) {
- // TODO: Attempt to move other attributes into extent mft
- // records.
- err = -EOPNOTSUPP;
- if (!err)
- goto do_resident_extend;
- goto err_out;
- }
- // TODO: Attempt to move this attribute to an extent mft record, but
- // only if it is not already the only attribute in an mft record in
- // which case there would be nothing to gain.
- err = -EOPNOTSUPP;
- if (!err)
- goto do_resident_extend;
- /* There is nothing we can do to make enough space. )-: */
- goto err_out;
-#endif
-do_non_resident_truncate:
- BUG_ON(!NInoNonResident(ni));
- if (alloc_change < 0) {
- highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
- if (highest_vcn > 0 &&
- old_alloc_size >> vol->cluster_size_bits >
- highest_vcn + 1) {
- /*
- * This attribute has multiple extents. Not yet
- * supported.
- */
- ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, "
- "attribute type 0x%x, because the "
- "attribute is highly fragmented (it "
- "consists of multiple extents) and "
- "this case is not implemented yet.",
- vi->i_ino,
- (unsigned)le32_to_cpu(ni->type));
- err = -EOPNOTSUPP;
- goto bad_out;
- }
- }
- /*
- * If the size is shrinking, need to reduce the initialized_size and
- * the data_size before reducing the allocation.
- */
- if (size_change < 0) {
- /*
- * Make the valid size smaller (i_size is already up-to-date).
- */
- write_lock_irqsave(&ni->size_lock, flags);
- if (new_size < ni->initialized_size) {
- ni->initialized_size = new_size;
- a->data.non_resident.initialized_size =
- cpu_to_sle64(new_size);
- }
- a->data.non_resident.data_size = cpu_to_sle64(new_size);
- write_unlock_irqrestore(&ni->size_lock, flags);
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- /* If the allocated size is not changing, we are done. */
- if (!alloc_change)
- goto unm_done;
- /*
- * If the size is shrinking it makes no sense for the
- * allocation to be growing.
- */
- BUG_ON(alloc_change > 0);
- } else /* if (size_change >= 0) */ {
- /*
- * The file size is growing or staying the same but the
- * allocation can be shrinking, growing or staying the same.
- */
- if (alloc_change > 0) {
- /*
- * We need to extend the allocation and possibly update
- * the data size. If we are updating the data size,
- * since we are not touching the initialized_size we do
- * not need to worry about the actual data on disk.
- * And as far as the page cache is concerned, there
- * will be no pages beyond the old data size and any
- * partial region in the last page between the old and
- * new data size (or the end of the page if the new
- * data size is outside the page) does not need to be
- * modified as explained above for the resident
- * attribute truncate case. To do this, we simply drop
- * the locks we hold and leave all the work to our
- * friendly helper ntfs_attr_extend_allocation().
- */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- up_write(&ni->runlist.lock);
- err = ntfs_attr_extend_allocation(ni, new_size,
- size_change > 0 ? new_size : -1, -1);
- /*
- * ntfs_attr_extend_allocation() will have done error
- * output already.
- */
- goto done;
- }
- if (!alloc_change)
- goto alloc_done;
- }
- /* alloc_change < 0 */
- /* Free the clusters. */
- nr_freed = ntfs_cluster_free(ni, new_alloc_size >>
- vol->cluster_size_bits, -1, ctx);
- m = ctx->mrec;
- a = ctx->attr;
- if (unlikely(nr_freed < 0)) {
- ntfs_error(vol->sb, "Failed to release cluster(s) (error code "
- "%lli). Unmount and run chkdsk to recover "
- "the lost cluster(s).", (long long)nr_freed);
- NVolSetErrors(vol);
- nr_freed = 0;
- }
- /* Truncate the runlist. */
- err = ntfs_rl_truncate_nolock(vol, &ni->runlist,
- new_alloc_size >> vol->cluster_size_bits);
- /*
- * If the runlist truncation failed and/or the search context is no
- * longer valid, we cannot resize the attribute record or build the
- * mapping pairs array thus we mark the inode bad so that no access to
- * the freed clusters can happen.
- */
- if (unlikely(err || IS_ERR(m))) {
- ntfs_error(vol->sb, "Failed to %s (error code %li).%s",
- IS_ERR(m) ?
- "restore attribute search context" :
- "truncate attribute runlist",
- IS_ERR(m) ? PTR_ERR(m) : err, es);
- err = -EIO;
- goto bad_out;
- }
- /* Get the size for the shrunk mapping pairs array for the runlist. */
- mp_size = ntfs_get_size_for_mapping_pairs(vol, ni->runlist.rl, 0, -1);
- if (unlikely(mp_size <= 0)) {
- ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
- "attribute type 0x%x, because determining the "
- "size for the mapping pairs failed with error "
- "code %i.%s", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), mp_size, es);
- err = -EIO;
- goto bad_out;
- }
- /*
- * Shrink the attribute record for the new mapping pairs array. Note,
- * this cannot fail since we are making the attribute smaller thus by
- * definition there is enough space to do so.
- */
- err = ntfs_attr_record_resize(m, a, mp_size +
- le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
- BUG_ON(err);
- /*
- * Generate the mapping pairs array directly into the attribute record.
- */
- err = ntfs_mapping_pairs_build(vol, (u8*)a +
- le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
- mp_size, ni->runlist.rl, 0, -1, NULL);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
- "attribute type 0x%x, because building the "
- "mapping pairs failed with error code %i.%s",
- vi->i_ino, (unsigned)le32_to_cpu(ni->type),
- err, es);
- err = -EIO;
- goto bad_out;
- }
- /* Update the allocated/compressed size as well as the highest vcn. */
- a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
- vol->cluster_size_bits) - 1);
- write_lock_irqsave(&ni->size_lock, flags);
- ni->allocated_size = new_alloc_size;
- a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
- if (NInoSparse(ni) || NInoCompressed(ni)) {
- if (nr_freed) {
- ni->itype.compressed.size -= nr_freed <<
- vol->cluster_size_bits;
- BUG_ON(ni->itype.compressed.size < 0);
- a->data.non_resident.compressed_size = cpu_to_sle64(
- ni->itype.compressed.size);
- vi->i_blocks = ni->itype.compressed.size >> 9;
- }
- } else
- vi->i_blocks = new_alloc_size >> 9;
- write_unlock_irqrestore(&ni->size_lock, flags);
- /*
- * We have shrunk the allocation. If this is a shrinking truncate we
- * have already dealt with the initialized_size and the data_size above
- * and we are done. If the truncate is only changing the allocation
- * and not the data_size, we are also done. If this is an extending
- * truncate, need to extend the data_size now which is ensured by the
- * fact that @size_change is positive.
- */
-alloc_done:
- /*
- * If the size is growing, need to update it now. If it is shrinking,
- * we have already updated it above (before the allocation change).
- */
- if (size_change > 0)
- a->data.non_resident.data_size = cpu_to_sle64(new_size);
- /* Ensure the modified mft record is written out. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
-unm_done:
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- up_write(&ni->runlist.lock);
-done:
- /* Update the mtime and ctime on the base inode. */
- /* normally ->truncate shouldn't update ctime or mtime,
- * but ntfs did before so it got a copy & paste version
- * of file_update_time. one day someone should fix this
- * for real.
- */
- if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
- struct timespec64 now = current_time(VFS_I(base_ni));
- struct timespec64 ctime = inode_get_ctime(VFS_I(base_ni));
- struct timespec64 mtime = inode_get_mtime(VFS_I(base_ni));
- int sync_it = 0;
-
- if (!timespec64_equal(&mtime, &now) ||
- !timespec64_equal(&ctime, &now))
- sync_it = 1;
- inode_set_ctime_to_ts(VFS_I(base_ni), now);
- inode_set_mtime_to_ts(VFS_I(base_ni), now);
-
- if (sync_it)
- mark_inode_dirty_sync(VFS_I(base_ni));
- }
-
- if (likely(!err)) {
- NInoClearTruncateFailed(ni);
- ntfs_debug("Done.");
- }
- return err;
-old_bad_out:
- old_size = -1;
-bad_out:
- if (err != -ENOMEM && err != -EOPNOTSUPP)
- NVolSetErrors(vol);
- if (err != -EOPNOTSUPP)
- NInoSetTruncateFailed(ni);
- else if (old_size >= 0)
- i_size_write(vi, old_size);
-err_out:
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
- up_write(&ni->runlist.lock);
-out:
- ntfs_debug("Failed. Returning error code %i.", err);
- return err;
-conv_err_out:
- if (err != -ENOMEM && err != -EOPNOTSUPP)
- NVolSetErrors(vol);
- if (err != -EOPNOTSUPP)
- NInoSetTruncateFailed(ni);
- else
- i_size_write(vi, old_size);
- goto out;
-}
-
-/**
- * ntfs_truncate_vfs - wrapper for ntfs_truncate() that has no return value
- * @vi: inode for which the i_size was changed
- *
- * Wrapper for ntfs_truncate() that has no return value.
- *
- * See ntfs_truncate() description above for details.
- */
-#ifdef NTFS_RW
-void ntfs_truncate_vfs(struct inode *vi) {
- ntfs_truncate(vi);
-}
-#endif
-
-/**
- * ntfs_setattr - called from notify_change() when an attribute is being changed
- * @idmap: idmap of the mount the inode was found from
- * @dentry: dentry whose attributes to change
- * @attr: structure describing the attributes and the changes
- *
- * We have to trap VFS attempts to truncate the file described by @dentry as
- * soon as possible, because we do not implement changes in i_size yet. So we
- * abort all i_size changes here.
- *
- * We also abort all changes of user, group, and mode as we do not implement
- * the NTFS ACLs yet.
- *
- * Called with ->i_mutex held.
- */
-int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
- struct iattr *attr)
-{
- struct inode *vi = d_inode(dentry);
- int err;
- unsigned int ia_valid = attr->ia_valid;
-
- err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
- if (err)
- goto out;
- /* We do not support NTFS ACLs yet. */
- if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) {
- ntfs_warning(vi->i_sb, "Changes in user/group/mode are not "
- "supported yet, ignoring.");
- err = -EOPNOTSUPP;
- goto out;
- }
- if (ia_valid & ATTR_SIZE) {
- if (attr->ia_size != i_size_read(vi)) {
- ntfs_inode *ni = NTFS_I(vi);
- /*
- * FIXME: For now we do not support resizing of
- * compressed or encrypted files yet.
- */
- if (NInoCompressed(ni) || NInoEncrypted(ni)) {
- ntfs_warning(vi->i_sb, "Changes in inode size "
- "are not supported yet for "
- "%s files, ignoring.",
- NInoCompressed(ni) ?
- "compressed" : "encrypted");
- err = -EOPNOTSUPP;
- } else {
- truncate_setsize(vi, attr->ia_size);
- ntfs_truncate_vfs(vi);
- }
- if (err || ia_valid == ATTR_SIZE)
- goto out;
- } else {
- /*
- * We skipped the truncate but must still update
- * timestamps.
- */
- ia_valid |= ATTR_MTIME | ATTR_CTIME;
- }
- }
- if (ia_valid & ATTR_ATIME)
- inode_set_atime_to_ts(vi, attr->ia_atime);
- if (ia_valid & ATTR_MTIME)
- inode_set_mtime_to_ts(vi, attr->ia_mtime);
- if (ia_valid & ATTR_CTIME)
- inode_set_ctime_to_ts(vi, attr->ia_ctime);
- mark_inode_dirty(vi);
-out:
- return err;
-}
-
-/**
- * __ntfs_write_inode - write out a dirty inode
- * @vi: inode to write out
- * @sync: if true, write out synchronously
- *
- * Write out a dirty inode to disk including any extent inodes if present.
- *
- * If @sync is true, commit the inode to disk and wait for io completion. This
- * is done using write_mft_record().
- *
- * If @sync is false, just schedule the write to happen but do not wait for i/o
- * completion. In 2.6 kernels, scheduling usually happens just by virtue of
- * marking the page (and in this case mft record) dirty but we do not implement
- * this yet as write_mft_record() largely ignores the @sync parameter and
- * always performs synchronous writes.
- *
- * Return 0 on success and -errno on error.
- */
-int __ntfs_write_inode(struct inode *vi, int sync)
-{
- sle64 nt;
- ntfs_inode *ni = NTFS_I(vi);
- ntfs_attr_search_ctx *ctx;
- MFT_RECORD *m;
- STANDARD_INFORMATION *si;
- int err = 0;
- bool modified = false;
-
- ntfs_debug("Entering for %sinode 0x%lx.", NInoAttr(ni) ? "attr " : "",
- vi->i_ino);
- /*
- * Dirty attribute inodes are written via their real inodes so just
- * clean them here. Access time updates are taken care off when the
- * real inode is written.
- */
- if (NInoAttr(ni)) {
- NInoClearDirty(ni);
- ntfs_debug("Done.");
- return 0;
- }
- /* Map, pin, and lock the mft record belonging to the inode. */
- m = map_mft_record(ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- goto err_out;
- }
- /* Update the access times in the standard information attribute. */
- ctx = ntfs_attr_get_search_ctx(ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto unm_err_out;
- }
- err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- ntfs_attr_put_search_ctx(ctx);
- goto unm_err_out;
- }
- si = (STANDARD_INFORMATION*)((u8*)ctx->attr +
- le16_to_cpu(ctx->attr->data.resident.value_offset));
- /* Update the access times if they have changed. */
- nt = utc2ntfs(inode_get_mtime(vi));
- if (si->last_data_change_time != nt) {
- ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, "
- "new = 0x%llx", vi->i_ino, (long long)
- sle64_to_cpu(si->last_data_change_time),
- (long long)sle64_to_cpu(nt));
- si->last_data_change_time = nt;
- modified = true;
- }
- nt = utc2ntfs(inode_get_ctime(vi));
- if (si->last_mft_change_time != nt) {
- ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
- "new = 0x%llx", vi->i_ino, (long long)
- sle64_to_cpu(si->last_mft_change_time),
- (long long)sle64_to_cpu(nt));
- si->last_mft_change_time = nt;
- modified = true;
- }
- nt = utc2ntfs(inode_get_atime(vi));
- if (si->last_access_time != nt) {
- ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, "
- "new = 0x%llx", vi->i_ino,
- (long long)sle64_to_cpu(si->last_access_time),
- (long long)sle64_to_cpu(nt));
- si->last_access_time = nt;
- modified = true;
- }
- /*
- * If we just modified the standard information attribute we need to
- * mark the mft record it is in dirty. We do this manually so that
- * mark_inode_dirty() is not called which would redirty the inode and
- * hence result in an infinite loop of trying to write the inode.
- * There is no need to mark the base inode nor the base mft record
- * dirty, since we are going to write this mft record below in any case
- * and the base mft record may actually not have been modified so it
- * might not need to be written out.
- * NOTE: It is not a problem when the inode for $MFT itself is being
- * written out as mark_ntfs_record_dirty() will only set I_DIRTY_PAGES
- * on the $MFT inode and hence __ntfs_write_inode() will not be
- * re-invoked because of it which in turn is ok since the dirtied mft
- * record will be cleaned and written out to disk below, i.e. before
- * this function returns.
- */
- if (modified) {
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- if (!NInoTestSetDirty(ctx->ntfs_ino))
- mark_ntfs_record_dirty(ctx->ntfs_ino->page,
- ctx->ntfs_ino->page_ofs);
- }
- ntfs_attr_put_search_ctx(ctx);
- /* Now the access times are updated, write the base mft record. */
- if (NInoDirty(ni))
- err = write_mft_record(ni, m, sync);
- /* Write all attached extent mft records. */
- mutex_lock(&ni->extent_lock);
- if (ni->nr_extents > 0) {
- ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
- int i;
-
- ntfs_debug("Writing %i extent inodes.", ni->nr_extents);
- for (i = 0; i < ni->nr_extents; i++) {
- ntfs_inode *tni = extent_nis[i];
-
- if (NInoDirty(tni)) {
- MFT_RECORD *tm = map_mft_record(tni);
- int ret;
-
- if (IS_ERR(tm)) {
- if (!err || err == -ENOMEM)
- err = PTR_ERR(tm);
- continue;
- }
- ret = write_mft_record(tni, tm, sync);
- unmap_mft_record(tni);
- if (unlikely(ret)) {
- if (!err || err == -ENOMEM)
- err = ret;
- }
- }
- }
- }
- mutex_unlock(&ni->extent_lock);
- unmap_mft_record(ni);
- if (unlikely(err))
- goto err_out;
- ntfs_debug("Done.");
- return 0;
-unm_err_out:
- unmap_mft_record(ni);
-err_out:
- if (err == -ENOMEM) {
- ntfs_warning(vi->i_sb, "Not enough memory to write inode. "
- "Marking the inode dirty again, so the VFS "
- "retries later.");
- mark_inode_dirty(vi);
- } else {
- ntfs_error(vi->i_sb, "Failed (error %i): Run chkdsk.", -err);
- NVolSetErrors(ni->vol);
- }
- return err;
-}
-
-#endif /* NTFS_RW */
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
deleted file mode 100644
index 147ef4ddb691..000000000000
--- a/fs/ntfs/inode.h
+++ /dev/null
@@ -1,310 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of
- * the Linux-NTFS project.
- *
- * Copyright (c) 2001-2007 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
- */
-
-#ifndef _LINUX_NTFS_INODE_H
-#define _LINUX_NTFS_INODE_H
-
-#include <linux/atomic.h>
-
-#include <linux/fs.h>
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/seq_file.h>
-
-#include "layout.h"
-#include "volume.h"
-#include "types.h"
-#include "runlist.h"
-#include "debug.h"
-
-typedef struct _ntfs_inode ntfs_inode;
-
-/*
- * The NTFS in-memory inode structure. It is just used as an extension to the
- * fields already provided in the VFS inode.
- */
-struct _ntfs_inode {
- rwlock_t size_lock; /* Lock serializing access to inode sizes. */
- s64 initialized_size; /* Copy from the attribute record. */
- s64 allocated_size; /* Copy from the attribute record. */
- unsigned long state; /* NTFS specific flags describing this inode.
- See ntfs_inode_state_bits below. */
- unsigned long mft_no; /* Number of the mft record / inode. */
- u16 seq_no; /* Sequence number of the mft record. */
- atomic_t count; /* Inode reference count for book keeping. */
- ntfs_volume *vol; /* Pointer to the ntfs volume of this inode. */
- /*
- * If NInoAttr() is true, the below fields describe the attribute which
- * this fake inode belongs to. The actual inode of this attribute is
- * pointed to by base_ntfs_ino and nr_extents is always set to -1 (see
- * below). For real inodes, we also set the type (AT_DATA for files and
- * AT_INDEX_ALLOCATION for directories), with the name = NULL and
- * name_len = 0 for files and name = I30 (global constant) and
- * name_len = 4 for directories.
- */
- ATTR_TYPE type; /* Attribute type of this fake inode. */
- ntfschar *name; /* Attribute name of this fake inode. */
- u32 name_len; /* Attribute name length of this fake inode. */
- runlist runlist; /* If state has the NI_NonResident bit set,
- the runlist of the unnamed data attribute
- (if a file) or of the index allocation
- attribute (directory) or of the attribute
- described by the fake inode (if NInoAttr()).
- If runlist.rl is NULL, the runlist has not
- been read in yet or has been unmapped. If
- NI_NonResident is clear, the attribute is
- resident (file and fake inode) or there is
- no $I30 index allocation attribute
- (small directory). In the latter case
- runlist.rl is always NULL.*/
- /*
- * The following fields are only valid for real inodes and extent
- * inodes.
- */
- struct mutex mrec_lock; /* Lock for serializing access to the
- mft record belonging to this inode. */
- struct page *page; /* The page containing the mft record of the
- inode. This should only be touched by the
- (un)map_mft_record*() functions. */
- int page_ofs; /* Offset into the page at which the mft record
- begins. This should only be touched by the
- (un)map_mft_record*() functions. */
- /*
- * Attribute list support (only for use by the attribute lookup
- * functions). Setup during read_inode for all inodes with attribute
- * lists. Only valid if NI_AttrList is set in state, and attr_list_rl is
- * further only valid if NI_AttrListNonResident is set.
- */
- u32 attr_list_size; /* Length of attribute list value in bytes. */
- u8 *attr_list; /* Attribute list value itself. */
- runlist attr_list_rl; /* Run list for the attribute list value. */
- union {
- struct { /* It is a directory, $MFT, or an index inode. */
- u32 block_size; /* Size of an index block. */
- u32 vcn_size; /* Size of a vcn in this
- index. */
- COLLATION_RULE collation_rule; /* The collation rule
- for the index. */
- u8 block_size_bits; /* Log2 of the above. */
- u8 vcn_size_bits; /* Log2 of the above. */
- } index;
- struct { /* It is a compressed/sparse file/attribute inode. */
- s64 size; /* Copy of compressed_size from
- $DATA. */
- u32 block_size; /* Size of a compression block
- (cb). */
- u8 block_size_bits; /* Log2 of the size of a cb. */
- u8 block_clusters; /* Number of clusters per cb. */
- } compressed;
- } itype;
- struct mutex extent_lock; /* Lock for accessing/modifying the
- below . */
- s32 nr_extents; /* For a base mft record, the number of attached extent
- inodes (0 if none), for extent records and for fake
- inodes describing an attribute this is -1. */
- union { /* This union is only used if nr_extents != 0. */
- ntfs_inode **extent_ntfs_inos; /* For nr_extents > 0, array of
- the ntfs inodes of the extent
- mft records belonging to
- this base inode which have
- been loaded. */
- ntfs_inode *base_ntfs_ino; /* For nr_extents == -1, the
- ntfs inode of the base mft
- record. For fake inodes, the
- real (base) inode to which
- the attribute belongs. */
- } ext;
-};
-
-/*
- * Defined bits for the state field in the ntfs_inode structure.
- * (f) = files only, (d) = directories only, (a) = attributes/fake inodes only
- */
-typedef enum {
- NI_Dirty, /* 1: Mft record needs to be written to disk. */
- NI_AttrList, /* 1: Mft record contains an attribute list. */
- NI_AttrListNonResident, /* 1: Attribute list is non-resident. Implies
- NI_AttrList is set. */
-
- NI_Attr, /* 1: Fake inode for attribute i/o.
- 0: Real inode or extent inode. */
-
- NI_MstProtected, /* 1: Attribute is protected by MST fixups.
- 0: Attribute is not protected by fixups. */
- NI_NonResident, /* 1: Unnamed data attr is non-resident (f).
- 1: Attribute is non-resident (a). */
- NI_IndexAllocPresent = NI_NonResident, /* 1: $I30 index alloc attr is
- present (d). */
- NI_Compressed, /* 1: Unnamed data attr is compressed (f).
- 1: Create compressed files by default (d).
- 1: Attribute is compressed (a). */
- NI_Encrypted, /* 1: Unnamed data attr is encrypted (f).
- 1: Create encrypted files by default (d).
- 1: Attribute is encrypted (a). */
- NI_Sparse, /* 1: Unnamed data attr is sparse (f).
- 1: Create sparse files by default (d).
- 1: Attribute is sparse (a). */
- NI_SparseDisabled, /* 1: May not create sparse regions. */
- NI_TruncateFailed, /* 1: Last ntfs_truncate() call failed. */
-} ntfs_inode_state_bits;
-
-/*
- * NOTE: We should be adding dirty mft records to a list somewhere and they
- * should be independent of the (ntfs/vfs) inode structure so that an inode can
- * be removed but the record can be left dirty for syncing later.
- */
-
-/*
- * Macro tricks to expand the NInoFoo(), NInoSetFoo(), and NInoClearFoo()
- * functions.
- */
-#define NINO_FNS(flag) \
-static inline int NIno##flag(ntfs_inode *ni) \
-{ \
- return test_bit(NI_##flag, &(ni)->state); \
-} \
-static inline void NInoSet##flag(ntfs_inode *ni) \
-{ \
- set_bit(NI_##flag, &(ni)->state); \
-} \
-static inline void NInoClear##flag(ntfs_inode *ni) \
-{ \
- clear_bit(NI_##flag, &(ni)->state); \
-}
-
-/*
- * As above for NInoTestSetFoo() and NInoTestClearFoo().
- */
-#define TAS_NINO_FNS(flag) \
-static inline int NInoTestSet##flag(ntfs_inode *ni) \
-{ \
- return test_and_set_bit(NI_##flag, &(ni)->state); \
-} \
-static inline int NInoTestClear##flag(ntfs_inode *ni) \
-{ \
- return test_and_clear_bit(NI_##flag, &(ni)->state); \
-}
-
-/* Emit the ntfs inode bitops functions. */
-NINO_FNS(Dirty)
-TAS_NINO_FNS(Dirty)
-NINO_FNS(AttrList)
-NINO_FNS(AttrListNonResident)
-NINO_FNS(Attr)
-NINO_FNS(MstProtected)
-NINO_FNS(NonResident)
-NINO_FNS(IndexAllocPresent)
-NINO_FNS(Compressed)
-NINO_FNS(Encrypted)
-NINO_FNS(Sparse)
-NINO_FNS(SparseDisabled)
-NINO_FNS(TruncateFailed)
-
-/*
- * The full structure containing a ntfs_inode and a vfs struct inode. Used for
- * all real and fake inodes but not for extent inodes which lack the vfs struct
- * inode.
- */
-typedef struct {
- ntfs_inode ntfs_inode;
- struct inode vfs_inode; /* The vfs inode structure. */
-} big_ntfs_inode;
-
-/**
- * NTFS_I - return the ntfs inode given a vfs inode
- * @inode: VFS inode
- *
- * NTFS_I() returns the ntfs inode associated with the VFS @inode.
- */
-static inline ntfs_inode *NTFS_I(struct inode *inode)
-{
- return (ntfs_inode *)container_of(inode, big_ntfs_inode, vfs_inode);
-}
-
-static inline struct inode *VFS_I(ntfs_inode *ni)
-{
- return &((big_ntfs_inode *)ni)->vfs_inode;
-}
-
-/**
- * ntfs_attr - ntfs in memory attribute structure
- * @mft_no: mft record number of the base mft record of this attribute
- * @name: Unicode name of the attribute (NULL if unnamed)
- * @name_len: length of @name in Unicode characters (0 if unnamed)
- * @type: attribute type (see layout.h)
- *
- * This structure exists only to provide a small structure for the
- * ntfs_{attr_}iget()/ntfs_test_inode()/ntfs_init_locked_inode() mechanism.
- *
- * NOTE: Elements are ordered by size to make the structure as compact as
- * possible on all architectures.
- */
-typedef struct {
- unsigned long mft_no;
- ntfschar *name;
- u32 name_len;
- ATTR_TYPE type;
-} ntfs_attr;
-
-extern int ntfs_test_inode(struct inode *vi, void *data);
-
-extern struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no);
-extern struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
- ntfschar *name, u32 name_len);
-extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
- u32 name_len);
-
-extern struct inode *ntfs_alloc_big_inode(struct super_block *sb);
-extern void ntfs_free_big_inode(struct inode *inode);
-extern void ntfs_evict_big_inode(struct inode *vi);
-
-extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni);
-
-static inline void ntfs_init_big_inode(struct inode *vi)
-{
- ntfs_inode *ni = NTFS_I(vi);
-
- ntfs_debug("Entering.");
- __ntfs_init_inode(vi->i_sb, ni);
- ni->mft_no = vi->i_ino;
-}
-
-extern ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
- unsigned long mft_no);
-extern void ntfs_clear_extent_inode(ntfs_inode *ni);
-
-extern int ntfs_read_inode_mount(struct inode *vi);
-
-extern int ntfs_show_options(struct seq_file *sf, struct dentry *root);
-
-#ifdef NTFS_RW
-
-extern int ntfs_truncate(struct inode *vi);
-extern void ntfs_truncate_vfs(struct inode *vi);
-
-extern int ntfs_setattr(struct mnt_idmap *idmap,
- struct dentry *dentry, struct iattr *attr);
-
-extern int __ntfs_write_inode(struct inode *vi, int sync);
-
-static inline void ntfs_commit_inode(struct inode *vi)
-{
- if (!is_bad_inode(vi))
- __ntfs_write_inode(vi, 1);
- return;
-}
-
-#else
-
-static inline void ntfs_truncate_vfs(struct inode *vi) {}
-
-#endif /* NTFS_RW */
-
-#endif /* _LINUX_NTFS_INODE_H */
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
deleted file mode 100644
index 5d4bf7a3259f..000000000000
--- a/fs/ntfs/layout.h
+++ /dev/null
@@ -1,2421 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS
- * project.
- *
- * Copyright (c) 2001-2005 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
- */
-
-#ifndef _LINUX_NTFS_LAYOUT_H
-#define _LINUX_NTFS_LAYOUT_H
-
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <linux/list.h>
-#include <asm/byteorder.h>
-
-#include "types.h"
-
-/* The NTFS oem_id "NTFS " */
-#define magicNTFS cpu_to_le64(0x202020205346544eULL)
-
-/*
- * Location of bootsector on partition:
- * The standard NTFS_BOOT_SECTOR is on sector 0 of the partition.
- * On NT4 and above there is one backup copy of the boot sector to
- * be found on the last sector of the partition (not normally accessible
- * from within Windows as the bootsector contained number of sectors
- * value is one less than the actual value!).
- * On versions of NT 3.51 and earlier, the backup copy was located at
- * number of sectors/2 (integer divide), i.e. in the middle of the volume.
- */
-
-/*
- * BIOS parameter block (bpb) structure.
- */
-typedef struct {
- le16 bytes_per_sector; /* Size of a sector in bytes. */
- u8 sectors_per_cluster; /* Size of a cluster in sectors. */
- le16 reserved_sectors; /* zero */
- u8 fats; /* zero */
- le16 root_entries; /* zero */
- le16 sectors; /* zero */
- u8 media_type; /* 0xf8 = hard disk */
- le16 sectors_per_fat; /* zero */
- le16 sectors_per_track; /* irrelevant */
- le16 heads; /* irrelevant */
- le32 hidden_sectors; /* zero */
- le32 large_sectors; /* zero */
-} __attribute__ ((__packed__)) BIOS_PARAMETER_BLOCK;
-
-/*
- * NTFS boot sector structure.
- */
-typedef struct {
- u8 jump[3]; /* Irrelevant (jump to boot up code).*/
- le64 oem_id; /* Magic "NTFS ". */
- BIOS_PARAMETER_BLOCK bpb; /* See BIOS_PARAMETER_BLOCK. */
- u8 unused[4]; /* zero, NTFS diskedit.exe states that
- this is actually:
- __u8 physical_drive; // 0x80
- __u8 current_head; // zero
- __u8 extended_boot_signature;
- // 0x80
- __u8 unused; // zero
- */
-/*0x28*/sle64 number_of_sectors; /* Number of sectors in volume. Gives
- maximum volume size of 2^63 sectors.
- Assuming standard sector size of 512
- bytes, the maximum byte size is
- approx. 4.7x10^21 bytes. (-; */
- sle64 mft_lcn; /* Cluster location of mft data. */
- sle64 mftmirr_lcn; /* Cluster location of copy of mft. */
- s8 clusters_per_mft_record; /* Mft record size in clusters. */
- u8 reserved0[3]; /* zero */
- s8 clusters_per_index_record; /* Index block size in clusters. */
- u8 reserved1[3]; /* zero */
- le64 volume_serial_number; /* Irrelevant (serial number). */
- le32 checksum; /* Boot sector checksum. */
-/*0x54*/u8 bootstrap[426]; /* Irrelevant (boot up code). */
- le16 end_of_sector_marker; /* End of bootsector magic. Always is
- 0xaa55 in little endian. */
-/* sizeof() = 512 (0x200) bytes */
-} __attribute__ ((__packed__)) NTFS_BOOT_SECTOR;
-
-/*
- * Magic identifiers present at the beginning of all ntfs record containing
- * records (like mft records for example).
- */
-enum {
- /* Found in $MFT/$DATA. */
- magic_FILE = cpu_to_le32(0x454c4946), /* Mft entry. */
- magic_INDX = cpu_to_le32(0x58444e49), /* Index buffer. */
- magic_HOLE = cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */
-
- /* Found in $LogFile/$DATA. */
- magic_RSTR = cpu_to_le32(0x52545352), /* Restart page. */
- magic_RCRD = cpu_to_le32(0x44524352), /* Log record page. */
-
- /* Found in $LogFile/$DATA. (May be found in $MFT/$DATA, also?) */
- magic_CHKD = cpu_to_le32(0x444b4843), /* Modified by chkdsk. */
-
- /* Found in all ntfs record containing records. */
- magic_BAAD = cpu_to_le32(0x44414142), /* Failed multi sector
- transfer was detected. */
- /*
- * Found in $LogFile/$DATA when a page is full of 0xff bytes and is
- * thus not initialized. Page must be initialized before using it.
- */
- magic_empty = cpu_to_le32(0xffffffff) /* Record is empty. */
-};
-
-typedef le32 NTFS_RECORD_TYPE;
-
-/*
- * Generic magic comparison macros. Finally found a use for the ## preprocessor
- * operator! (-8
- */
-
-static inline bool __ntfs_is_magic(le32 x, NTFS_RECORD_TYPE r)
-{
- return (x == r);
-}
-#define ntfs_is_magic(x, m) __ntfs_is_magic(x, magic_##m)
-
-static inline bool __ntfs_is_magicp(le32 *p, NTFS_RECORD_TYPE r)
-{
- return (*p == r);
-}
-#define ntfs_is_magicp(p, m) __ntfs_is_magicp(p, magic_##m)
-
-/*
- * Specialised magic comparison macros for the NTFS_RECORD_TYPEs defined above.
- */
-#define ntfs_is_file_record(x) ( ntfs_is_magic (x, FILE) )
-#define ntfs_is_file_recordp(p) ( ntfs_is_magicp(p, FILE) )
-#define ntfs_is_mft_record(x) ( ntfs_is_file_record (x) )
-#define ntfs_is_mft_recordp(p) ( ntfs_is_file_recordp(p) )
-#define ntfs_is_indx_record(x) ( ntfs_is_magic (x, INDX) )
-#define ntfs_is_indx_recordp(p) ( ntfs_is_magicp(p, INDX) )
-#define ntfs_is_hole_record(x) ( ntfs_is_magic (x, HOLE) )
-#define ntfs_is_hole_recordp(p) ( ntfs_is_magicp(p, HOLE) )
-
-#define ntfs_is_rstr_record(x) ( ntfs_is_magic (x, RSTR) )
-#define ntfs_is_rstr_recordp(p) ( ntfs_is_magicp(p, RSTR) )
-#define ntfs_is_rcrd_record(x) ( ntfs_is_magic (x, RCRD) )
-#define ntfs_is_rcrd_recordp(p) ( ntfs_is_magicp(p, RCRD) )
-
-#define ntfs_is_chkd_record(x) ( ntfs_is_magic (x, CHKD) )
-#define ntfs_is_chkd_recordp(p) ( ntfs_is_magicp(p, CHKD) )
-
-#define ntfs_is_baad_record(x) ( ntfs_is_magic (x, BAAD) )
-#define ntfs_is_baad_recordp(p) ( ntfs_is_magicp(p, BAAD) )
-
-#define ntfs_is_empty_record(x) ( ntfs_is_magic (x, empty) )
-#define ntfs_is_empty_recordp(p) ( ntfs_is_magicp(p, empty) )
-
-/*
- * The Update Sequence Array (usa) is an array of the le16 values which belong
- * to the end of each sector protected by the update sequence record in which
- * this array is contained. Note that the first entry is the Update Sequence
- * Number (usn), a cyclic counter of how many times the protected record has
- * been written to disk. The values 0 and -1 (ie. 0xffff) are not used. All
- * last le16's of each sector have to be equal to the usn (during reading) or
- * are set to it (during writing). If they are not, an incomplete multi sector
- * transfer has occurred when the data was written.
- * The maximum size for the update sequence array is fixed to:
- * maximum size = usa_ofs + (usa_count * 2) = 510 bytes
- * The 510 bytes comes from the fact that the last le16 in the array has to
- * (obviously) finish before the last le16 of the first 512-byte sector.
- * This formula can be used as a consistency check in that usa_ofs +
- * (usa_count * 2) has to be less than or equal to 510.
- */
-typedef struct {
- NTFS_RECORD_TYPE magic; /* A four-byte magic identifying the record
- type and/or status. */
- le16 usa_ofs; /* Offset to the Update Sequence Array (usa)
- from the start of the ntfs record. */
- le16 usa_count; /* Number of le16 sized entries in the usa
- including the Update Sequence Number (usn),
- thus the number of fixups is the usa_count
- minus 1. */
-} __attribute__ ((__packed__)) NTFS_RECORD;
-
-/*
- * System files mft record numbers. All these files are always marked as used
- * in the bitmap attribute of the mft; presumably in order to avoid accidental
- * allocation for random other mft records. Also, the sequence number for each
- * of the system files is always equal to their mft record number and it is
- * never modified.
- */
-typedef enum {
- FILE_MFT = 0, /* Master file table (mft). Data attribute
- contains the entries and bitmap attribute
- records which ones are in use (bit==1). */
- FILE_MFTMirr = 1, /* Mft mirror: copy of first four mft records
- in data attribute. If cluster size > 4kiB,
- copy of first N mft records, with
- N = cluster_size / mft_record_size. */
- FILE_LogFile = 2, /* Journalling log in data attribute. */
- FILE_Volume = 3, /* Volume name attribute and volume information
- attribute (flags and ntfs version). Windows
- refers to this file as volume DASD (Direct
- Access Storage Device). */
- FILE_AttrDef = 4, /* Array of attribute definitions in data
- attribute. */
- FILE_root = 5, /* Root directory. */
- FILE_Bitmap = 6, /* Allocation bitmap of all clusters (lcns) in
- data attribute. */
- FILE_Boot = 7, /* Boot sector (always at cluster 0) in data
- attribute. */
- FILE_BadClus = 8, /* Contains all bad clusters in the non-resident
- data attribute. */
- FILE_Secure = 9, /* Shared security descriptors in data attribute
- and two indexes into the descriptors.
- Appeared in Windows 2000. Before that, this
- file was named $Quota but was unused. */
- FILE_UpCase = 10, /* Uppercase equivalents of all 65536 Unicode
- characters in data attribute. */
- FILE_Extend = 11, /* Directory containing other system files (eg.
- $ObjId, $Quota, $Reparse and $UsnJrnl). This
- is new to NTFS3.0. */
- FILE_reserved12 = 12, /* Reserved for future use (records 12-15). */
- FILE_reserved13 = 13,
- FILE_reserved14 = 14,
- FILE_reserved15 = 15,
- FILE_first_user = 16, /* First user file, used as test limit for
- whether to allow opening a file or not. */
-} NTFS_SYSTEM_FILES;
-
-/*
- * These are the so far known MFT_RECORD_* flags (16-bit) which contain
- * information about the mft record in which they are present.
- */
-enum {
- MFT_RECORD_IN_USE = cpu_to_le16(0x0001),
- MFT_RECORD_IS_DIRECTORY = cpu_to_le16(0x0002),
-} __attribute__ ((__packed__));
-
-typedef le16 MFT_RECORD_FLAGS;
-
-/*
- * mft references (aka file references or file record segment references) are
- * used whenever a structure needs to refer to a record in the mft.
- *
- * A reference consists of a 48-bit index into the mft and a 16-bit sequence
- * number used to detect stale references.
- *
- * For error reporting purposes we treat the 48-bit index as a signed quantity.
- *
- * The sequence number is a circular counter (skipping 0) describing how many
- * times the referenced mft record has been (re)used. This has to match the
- * sequence number of the mft record being referenced, otherwise the reference
- * is considered stale and removed (FIXME: only ntfsck or the driver itself?).
- *
- * If the sequence number is zero it is assumed that no sequence number
- * consistency checking should be performed.
- *
- * FIXME: Since inodes are 32-bit as of now, the driver needs to always check
- * for high_part being 0 and if not either BUG(), cause a panic() or handle
- * the situation in some other way. This shouldn't be a problem as a volume has
- * to become HUGE in order to need more than 32-bits worth of mft records.
- * Assuming the standard mft record size of 1kb only the records (never mind
- * the non-resident attributes, etc.) would require 4Tb of space on their own
- * for the first 32 bits worth of records. This is only if some strange person
- * doesn't decide to foul play and make the mft sparse which would be a really
- * horrible thing to do as it would trash our current driver implementation. )-:
- * Do I hear screams "we want 64-bit inodes!" ?!? (-;
- *
- * FIXME: The mft zone is defined as the first 12% of the volume. This space is
- * reserved so that the mft can grow contiguously and hence doesn't become
- * fragmented. Volume free space includes the empty part of the mft zone and
- * when the volume's free 88% are used up, the mft zone is shrunk by a factor
- * of 2, thus making more space available for more files/data. This process is
- * repeated every time there is no more free space except for the mft zone until
- * there really is no more free space.
- */
-
-/*
- * Typedef the MFT_REF as a 64-bit value for easier handling.
- * Also define two unpacking macros to get to the reference (MREF) and
- * sequence number (MSEQNO) respectively.
- * The _LE versions are to be applied on little endian MFT_REFs.
- * Note: The _LE versions will return a CPU endian formatted value!
- */
-#define MFT_REF_MASK_CPU 0x0000ffffffffffffULL
-#define MFT_REF_MASK_LE cpu_to_le64(MFT_REF_MASK_CPU)
-
-typedef u64 MFT_REF;
-typedef le64 leMFT_REF;
-
-#define MK_MREF(m, s) ((MFT_REF)(((MFT_REF)(s) << 48) | \
- ((MFT_REF)(m) & MFT_REF_MASK_CPU)))
-#define MK_LE_MREF(m, s) cpu_to_le64(MK_MREF(m, s))
-
-#define MREF(x) ((unsigned long)((x) & MFT_REF_MASK_CPU))
-#define MSEQNO(x) ((u16)(((x) >> 48) & 0xffff))
-#define MREF_LE(x) ((unsigned long)(le64_to_cpu(x) & MFT_REF_MASK_CPU))
-#define MSEQNO_LE(x) ((u16)((le64_to_cpu(x) >> 48) & 0xffff))
-
-#define IS_ERR_MREF(x) (((x) & 0x0000800000000000ULL) ? true : false)
-#define ERR_MREF(x) ((u64)((s64)(x)))
-#define MREF_ERR(x) ((int)((s64)(x)))
-
-/*
- * The mft record header present at the beginning of every record in the mft.
- * This is followed by a sequence of variable length attribute records which
- * is terminated by an attribute of type AT_END which is a truncated attribute
- * in that it only consists of the attribute type code AT_END and none of the
- * other members of the attribute structure are present.
- */
-typedef struct {
-/*Ofs*/
-/* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */
- NTFS_RECORD_TYPE magic; /* Usually the magic is "FILE". */
- le16 usa_ofs; /* See NTFS_RECORD definition above. */
- le16 usa_count; /* See NTFS_RECORD definition above. */
-
-/* 8*/ le64 lsn; /* $LogFile sequence number for this record.
- Changed every time the record is modified. */
-/* 16*/ le16 sequence_number; /* Number of times this mft record has been
- reused. (See description for MFT_REF
- above.) NOTE: The increment (skipping zero)
- is done when the file is deleted. NOTE: If
- this is zero it is left zero. */
-/* 18*/ le16 link_count; /* Number of hard links, i.e. the number of
- directory entries referencing this record.
- NOTE: Only used in mft base records.
- NOTE: When deleting a directory entry we
- check the link_count and if it is 1 we
- delete the file. Otherwise we delete the
- FILE_NAME_ATTR being referenced by the
- directory entry from the mft record and
- decrement the link_count.
- FIXME: Careful with Win32 + DOS names! */
-/* 20*/ le16 attrs_offset; /* Byte offset to the first attribute in this
- mft record from the start of the mft record.
- NOTE: Must be aligned to 8-byte boundary. */
-/* 22*/ MFT_RECORD_FLAGS flags; /* Bit array of MFT_RECORD_FLAGS. When a file
- is deleted, the MFT_RECORD_IN_USE flag is
- set to zero. */
-/* 24*/ le32 bytes_in_use; /* Number of bytes used in this mft record.
- NOTE: Must be aligned to 8-byte boundary. */
-/* 28*/ le32 bytes_allocated; /* Number of bytes allocated for this mft
- record. This should be equal to the mft
- record size. */
-/* 32*/ leMFT_REF base_mft_record;/* This is zero for base mft records.
- When it is not zero it is a mft reference
- pointing to the base mft record to which
- this record belongs (this is then used to
- locate the attribute list attribute present
- in the base record which describes this
- extension record and hence might need
- modification when the extension record
- itself is modified, also locating the
- attribute list also means finding the other
- potential extents, belonging to the non-base
- mft record). */
-/* 40*/ le16 next_attr_instance;/* The instance number that will be assigned to
- the next attribute added to this mft record.
- NOTE: Incremented each time after it is used.
- NOTE: Every time the mft record is reused
- this number is set to zero. NOTE: The first
- instance number is always 0. */
-/* The below fields are specific to NTFS 3.1+ (Windows XP and above): */
-/* 42*/ le16 reserved; /* Reserved/alignment. */
-/* 44*/ le32 mft_record_number; /* Number of this mft record. */
-/* sizeof() = 48 bytes */
-/*
- * When (re)using the mft record, we place the update sequence array at this
- * offset, i.e. before we start with the attributes. This also makes sense,
- * otherwise we could run into problems with the update sequence array
- * containing in itself the last two bytes of a sector which would mean that
- * multi sector transfer protection wouldn't work. As you can't protect data
- * by overwriting it since you then can't get it back...
- * When reading we obviously use the data from the ntfs record header.
- */
-} __attribute__ ((__packed__)) MFT_RECORD;
-
-/* This is the version without the NTFS 3.1+ specific fields. */
-typedef struct {
-/*Ofs*/
-/* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */
- NTFS_RECORD_TYPE magic; /* Usually the magic is "FILE". */
- le16 usa_ofs; /* See NTFS_RECORD definition above. */
- le16 usa_count; /* See NTFS_RECORD definition above. */
-
-/* 8*/ le64 lsn; /* $LogFile sequence number for this record.
- Changed every time the record is modified. */
-/* 16*/ le16 sequence_number; /* Number of times this mft record has been
- reused. (See description for MFT_REF
- above.) NOTE: The increment (skipping zero)
- is done when the file is deleted. NOTE: If
- this is zero it is left zero. */
-/* 18*/ le16 link_count; /* Number of hard links, i.e. the number of
- directory entries referencing this record.
- NOTE: Only used in mft base records.
- NOTE: When deleting a directory entry we
- check the link_count and if it is 1 we
- delete the file. Otherwise we delete the
- FILE_NAME_ATTR being referenced by the
- directory entry from the mft record and
- decrement the link_count.
- FIXME: Careful with Win32 + DOS names! */
-/* 20*/ le16 attrs_offset; /* Byte offset to the first attribute in this
- mft record from the start of the mft record.
- NOTE: Must be aligned to 8-byte boundary. */
-/* 22*/ MFT_RECORD_FLAGS flags; /* Bit array of MFT_RECORD_FLAGS. When a file
- is deleted, the MFT_RECORD_IN_USE flag is
- set to zero. */
-/* 24*/ le32 bytes_in_use; /* Number of bytes used in this mft record.
- NOTE: Must be aligned to 8-byte boundary. */
-/* 28*/ le32 bytes_allocated; /* Number of bytes allocated for this mft
- record. This should be equal to the mft
- record size. */
-/* 32*/ leMFT_REF base_mft_record;/* This is zero for base mft records.
- When it is not zero it is a mft reference
- pointing to the base mft record to which
- this record belongs (this is then used to
- locate the attribute list attribute present
- in the base record which describes this
- extension record and hence might need
- modification when the extension record
- itself is modified, also locating the
- attribute list also means finding the other
- potential extents, belonging to the non-base
- mft record). */
-/* 40*/ le16 next_attr_instance;/* The instance number that will be assigned to
- the next attribute added to this mft record.
- NOTE: Incremented each time after it is used.
- NOTE: Every time the mft record is reused
- this number is set to zero. NOTE: The first
- instance number is always 0. */
-/* sizeof() = 42 bytes */
-/*
- * When (re)using the mft record, we place the update sequence array at this
- * offset, i.e. before we start with the attributes. This also makes sense,
- * otherwise we could run into problems with the update sequence array
- * containing in itself the last two bytes of a sector which would mean that
- * multi sector transfer protection wouldn't work. As you can't protect data
- * by overwriting it since you then can't get it back...
- * When reading we obviously use the data from the ntfs record header.
- */
-} __attribute__ ((__packed__)) MFT_RECORD_OLD;
-
-/*
- * System defined attributes (32-bit). Each attribute type has a corresponding
- * attribute name (Unicode string of maximum 64 character length) as described
- * by the attribute definitions present in the data attribute of the $AttrDef
- * system file. On NTFS 3.0 volumes the names are just as the types are named
- * in the below defines exchanging AT_ for the dollar sign ($). If that is not
- * a revealing choice of symbol I do not know what is... (-;
- */
-enum {
- AT_UNUSED = cpu_to_le32( 0),
- AT_STANDARD_INFORMATION = cpu_to_le32( 0x10),
- AT_ATTRIBUTE_LIST = cpu_to_le32( 0x20),
- AT_FILE_NAME = cpu_to_le32( 0x30),
- AT_OBJECT_ID = cpu_to_le32( 0x40),
- AT_SECURITY_DESCRIPTOR = cpu_to_le32( 0x50),
- AT_VOLUME_NAME = cpu_to_le32( 0x60),
- AT_VOLUME_INFORMATION = cpu_to_le32( 0x70),
- AT_DATA = cpu_to_le32( 0x80),
- AT_INDEX_ROOT = cpu_to_le32( 0x90),
- AT_INDEX_ALLOCATION = cpu_to_le32( 0xa0),
- AT_BITMAP = cpu_to_le32( 0xb0),
- AT_REPARSE_POINT = cpu_to_le32( 0xc0),
- AT_EA_INFORMATION = cpu_to_le32( 0xd0),
- AT_EA = cpu_to_le32( 0xe0),
- AT_PROPERTY_SET = cpu_to_le32( 0xf0),
- AT_LOGGED_UTILITY_STREAM = cpu_to_le32( 0x100),
- AT_FIRST_USER_DEFINED_ATTRIBUTE = cpu_to_le32( 0x1000),
- AT_END = cpu_to_le32(0xffffffff)
-};
-
-typedef le32 ATTR_TYPE;
-
-/*
- * The collation rules for sorting views/indexes/etc (32-bit).
- *
- * COLLATION_BINARY - Collate by binary compare where the first byte is most
- * significant.
- * COLLATION_UNICODE_STRING - Collate Unicode strings by comparing their binary
- * Unicode values, except that when a character can be uppercased, the
- * upper case value collates before the lower case one.
- * COLLATION_FILE_NAME - Collate file names as Unicode strings. The collation
- * is done very much like COLLATION_UNICODE_STRING. In fact I have no idea
- * what the difference is. Perhaps the difference is that file names
- * would treat some special characters in an odd way (see
- * unistr.c::ntfs_collate_names() and unistr.c::legal_ansi_char_array[]
- * for what I mean but COLLATION_UNICODE_STRING would not give any special
- * treatment to any characters at all, but this is speculation.
- * COLLATION_NTOFS_ULONG - Sorting is done according to ascending le32 key
- * values. E.g. used for $SII index in FILE_Secure, which sorts by
- * security_id (le32).
- * COLLATION_NTOFS_SID - Sorting is done according to ascending SID values.
- * E.g. used for $O index in FILE_Extend/$Quota.
- * COLLATION_NTOFS_SECURITY_HASH - Sorting is done first by ascending hash
- * values and second by ascending security_id values. E.g. used for $SDH
- * index in FILE_Secure.
- * COLLATION_NTOFS_ULONGS - Sorting is done according to a sequence of ascending
- * le32 key values. E.g. used for $O index in FILE_Extend/$ObjId, which
- * sorts by object_id (16-byte), by splitting up the object_id in four
- * le32 values and using them as individual keys. E.g. take the following
- * two security_ids, stored as follows on disk:
- * 1st: a1 61 65 b7 65 7b d4 11 9e 3d 00 e0 81 10 42 59
- * 2nd: 38 14 37 d2 d2 f3 d4 11 a5 21 c8 6b 79 b1 97 45
- * To compare them, they are split into four le32 values each, like so:
- * 1st: 0xb76561a1 0x11d47b65 0xe0003d9e 0x59421081
- * 2nd: 0xd2371438 0x11d4f3d2 0x6bc821a5 0x4597b179
- * Now, it is apparent why the 2nd object_id collates after the 1st: the
- * first le32 value of the 1st object_id is less than the first le32 of
- * the 2nd object_id. If the first le32 values of both object_ids were
- * equal then the second le32 values would be compared, etc.
- */
-enum {
- COLLATION_BINARY = cpu_to_le32(0x00),
- COLLATION_FILE_NAME = cpu_to_le32(0x01),
- COLLATION_UNICODE_STRING = cpu_to_le32(0x02),
- COLLATION_NTOFS_ULONG = cpu_to_le32(0x10),
- COLLATION_NTOFS_SID = cpu_to_le32(0x11),
- COLLATION_NTOFS_SECURITY_HASH = cpu_to_le32(0x12),
- COLLATION_NTOFS_ULONGS = cpu_to_le32(0x13),
-};
-
-typedef le32 COLLATION_RULE;
-
-/*
- * The flags (32-bit) describing attribute properties in the attribute
- * definition structure. FIXME: This information is based on Regis's
- * information and, according to him, it is not certain and probably
- * incomplete. The INDEXABLE flag is fairly certainly correct as only the file
- * name attribute has this flag set and this is the only attribute indexed in
- * NT4.
- */
-enum {
- ATTR_DEF_INDEXABLE = cpu_to_le32(0x02), /* Attribute can be
- indexed. */
- ATTR_DEF_MULTIPLE = cpu_to_le32(0x04), /* Attribute type
- can be present multiple times in the
- mft records of an inode. */
- ATTR_DEF_NOT_ZERO = cpu_to_le32(0x08), /* Attribute value
- must contain at least one non-zero
- byte. */
- ATTR_DEF_INDEXED_UNIQUE = cpu_to_le32(0x10), /* Attribute must be
- indexed and the attribute value must be
- unique for the attribute type in all of
- the mft records of an inode. */
- ATTR_DEF_NAMED_UNIQUE = cpu_to_le32(0x20), /* Attribute must be
- named and the name must be unique for
- the attribute type in all of the mft
- records of an inode. */
- ATTR_DEF_RESIDENT = cpu_to_le32(0x40), /* Attribute must be
- resident. */
- ATTR_DEF_ALWAYS_LOG = cpu_to_le32(0x80), /* Always log
- modifications to this attribute,
- regardless of whether it is resident or
- non-resident. Without this, only log
- modifications if the attribute is
- resident. */
-};
-
-typedef le32 ATTR_DEF_FLAGS;
-
-/*
- * The data attribute of FILE_AttrDef contains a sequence of attribute
- * definitions for the NTFS volume. With this, it is supposed to be safe for an
- * older NTFS driver to mount a volume containing a newer NTFS version without
- * damaging it (that's the theory. In practice it's: not damaging it too much).
- * Entries are sorted by attribute type. The flags describe whether the
- * attribute can be resident/non-resident and possibly other things, but the
- * actual bits are unknown.
- */
-typedef struct {
-/*hex ofs*/
-/* 0*/ ntfschar name[0x40]; /* Unicode name of the attribute. Zero
- terminated. */
-/* 80*/ ATTR_TYPE type; /* Type of the attribute. */
-/* 84*/ le32 display_rule; /* Default display rule.
- FIXME: What does it mean? (AIA) */
-/* 88*/ COLLATION_RULE collation_rule; /* Default collation rule. */
-/* 8c*/ ATTR_DEF_FLAGS flags; /* Flags describing the attribute. */
-/* 90*/ sle64 min_size; /* Optional minimum attribute size. */
-/* 98*/ sle64 max_size; /* Maximum size of attribute. */
-/* sizeof() = 0xa0 or 160 bytes */
-} __attribute__ ((__packed__)) ATTR_DEF;
-
-/*
- * Attribute flags (16-bit).
- */
-enum {
- ATTR_IS_COMPRESSED = cpu_to_le16(0x0001),
- ATTR_COMPRESSION_MASK = cpu_to_le16(0x00ff), /* Compression method
- mask. Also, first
- illegal value. */
- ATTR_IS_ENCRYPTED = cpu_to_le16(0x4000),
- ATTR_IS_SPARSE = cpu_to_le16(0x8000),
-} __attribute__ ((__packed__));
-
-typedef le16 ATTR_FLAGS;
-
-/*
- * Attribute compression.
- *
- * Only the data attribute is ever compressed in the current ntfs driver in
- * Windows. Further, compression is only applied when the data attribute is
- * non-resident. Finally, to use compression, the maximum allowed cluster size
- * on a volume is 4kib.
- *
- * The compression method is based on independently compressing blocks of X
- * clusters, where X is determined from the compression_unit value found in the
- * non-resident attribute record header (more precisely: X = 2^compression_unit
- * clusters). On Windows NT/2k, X always is 16 clusters (compression_unit = 4).
- *
- * There are three different cases of how a compression block of X clusters
- * can be stored:
- *
- * 1) The data in the block is all zero (a sparse block):
- * This is stored as a sparse block in the runlist, i.e. the runlist
- * entry has length = X and lcn = -1. The mapping pairs array actually
- * uses a delta_lcn value length of 0, i.e. delta_lcn is not present at
- * all, which is then interpreted by the driver as lcn = -1.
- * NOTE: Even uncompressed files can be sparse on NTFS 3.0 volumes, then
- * the same principles apply as above, except that the length is not
- * restricted to being any particular value.
- *
- * 2) The data in the block is not compressed:
- * This happens when compression doesn't reduce the size of the block
- * in clusters. I.e. if compression has a small effect so that the
- * compressed data still occupies X clusters, then the uncompressed data
- * is stored in the block.
- * This case is recognised by the fact that the runlist entry has
- * length = X and lcn >= 0. The mapping pairs array stores this as
- * normal with a run length of X and some specific delta_lcn, i.e.
- * delta_lcn has to be present.
- *
- * 3) The data in the block is compressed:
- * The common case. This case is recognised by the fact that the run
- * list entry has length L < X and lcn >= 0. The mapping pairs array
- * stores this as normal with a run length of X and some specific
- * delta_lcn, i.e. delta_lcn has to be present. This runlist entry is
- * immediately followed by a sparse entry with length = X - L and
- * lcn = -1. The latter entry is to make up the vcn counting to the
- * full compression block size X.
- *
- * In fact, life is more complicated because adjacent entries of the same type
- * can be coalesced. This means that one has to keep track of the number of
- * clusters handled and work on a basis of X clusters at a time being one
- * block. An example: if length L > X this means that this particular runlist
- * entry contains a block of length X and part of one or more blocks of length
- * L - X. Another example: if length L < X, this does not necessarily mean that
- * the block is compressed as it might be that the lcn changes inside the block
- * and hence the following runlist entry describes the continuation of the
- * potentially compressed block. The block would be compressed if the
- * following runlist entry describes at least X - L sparse clusters, thus
- * making up the compression block length as described in point 3 above. (Of
- * course, there can be several runlist entries with small lengths so that the
- * sparse entry does not follow the first data containing entry with
- * length < X.)
- *
- * NOTE: At the end of the compressed attribute value, there most likely is not
- * just the right amount of data to make up a compression block, thus this data
- * is not even attempted to be compressed. It is just stored as is, unless
- * the number of clusters it occupies is reduced when compressed in which case
- * it is stored as a compressed compression block, complete with sparse
- * clusters at the end.
- */
-
-/*
- * Flags of resident attributes (8-bit).
- */
-enum {
- RESIDENT_ATTR_IS_INDEXED = 0x01, /* Attribute is referenced in an index
- (has implications for deleting and
- modifying the attribute). */
-} __attribute__ ((__packed__));
-
-typedef u8 RESIDENT_ATTR_FLAGS;
-
-/*
- * Attribute record header. Always aligned to 8-byte boundary.
- */
-typedef struct {
-/*Ofs*/
-/* 0*/ ATTR_TYPE type; /* The (32-bit) type of the attribute. */
-/* 4*/ le32 length; /* Byte size of the resident part of the
- attribute (aligned to 8-byte boundary).
- Used to get to the next attribute. */
-/* 8*/ u8 non_resident; /* If 0, attribute is resident.
- If 1, attribute is non-resident. */
-/* 9*/ u8 name_length; /* Unicode character size of name of attribute.
- 0 if unnamed. */
-/* 10*/ le16 name_offset; /* If name_length != 0, the byte offset to the
- beginning of the name from the attribute
- record. Note that the name is stored as a
- Unicode string. When creating, place offset
- just at the end of the record header. Then,
- follow with attribute value or mapping pairs
- array, resident and non-resident attributes
- respectively, aligning to an 8-byte
- boundary. */
-/* 12*/ ATTR_FLAGS flags; /* Flags describing the attribute. */
-/* 14*/ le16 instance; /* The instance of this attribute record. This
- number is unique within this mft record (see
- MFT_RECORD/next_attribute_instance notes in
- mft.h for more details). */
-/* 16*/ union {
- /* Resident attributes. */
- struct {
-/* 16 */ le32 value_length;/* Byte size of attribute value. */
-/* 20 */ le16 value_offset;/* Byte offset of the attribute
- value from the start of the
- attribute record. When creating,
- align to 8-byte boundary if we
- have a name present as this might
- not have a length of a multiple
- of 8-bytes. */
-/* 22 */ RESIDENT_ATTR_FLAGS flags; /* See above. */
-/* 23 */ s8 reserved; /* Reserved/alignment to 8-byte
- boundary. */
- } __attribute__ ((__packed__)) resident;
- /* Non-resident attributes. */
- struct {
-/* 16*/ leVCN lowest_vcn;/* Lowest valid virtual cluster number
- for this portion of the attribute value or
- 0 if this is the only extent (usually the
- case). - Only when an attribute list is used
- does lowest_vcn != 0 ever occur. */
-/* 24*/ leVCN highest_vcn;/* Highest valid vcn of this extent of
- the attribute value. - Usually there is only one
- portion, so this usually equals the attribute
- value size in clusters minus 1. Can be -1 for
- zero length files. Can be 0 for "single extent"
- attributes. */
-/* 32*/ le16 mapping_pairs_offset; /* Byte offset from the
- beginning of the structure to the mapping pairs
- array which contains the mappings between the
- vcns and the logical cluster numbers (lcns).
- When creating, place this at the end of this
- record header aligned to 8-byte boundary. */
-/* 34*/ u8 compression_unit; /* The compression unit expressed
- as the log to the base 2 of the number of
- clusters in a compression unit. 0 means not
- compressed. (This effectively limits the
- compression unit size to be a power of two
- clusters.) WinNT4 only uses a value of 4.
- Sparse files have this set to 0 on XPSP2. */
-/* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */
-/* The sizes below are only used when lowest_vcn is zero, as otherwise it would
- be difficult to keep them up-to-date.*/
-/* 40*/ sle64 allocated_size; /* Byte size of disk space
- allocated to hold the attribute value. Always
- is a multiple of the cluster size. When a file
- is compressed, this field is a multiple of the
- compression block size (2^compression_unit) and
- it represents the logically allocated space
- rather than the actual on disk usage. For this
- use the compressed_size (see below). */
-/* 48*/ sle64 data_size; /* Byte size of the attribute
- value. Can be larger than allocated_size if
- attribute value is compressed or sparse. */
-/* 56*/ sle64 initialized_size; /* Byte size of initialized
- portion of the attribute value. Usually equals
- data_size. */
-/* sizeof(uncompressed attr) = 64*/
-/* 64*/ sle64 compressed_size; /* Byte size of the attribute
- value after compression. Only present when
- compressed or sparse. Always is a multiple of
- the cluster size. Represents the actual amount
- of disk space being used on the disk. */
-/* sizeof(compressed attr) = 72*/
- } __attribute__ ((__packed__)) non_resident;
- } __attribute__ ((__packed__)) data;
-} __attribute__ ((__packed__)) ATTR_RECORD;
-
-typedef ATTR_RECORD ATTR_REC;
-
-/*
- * File attribute flags (32-bit) appearing in the file_attributes fields of the
- * STANDARD_INFORMATION attribute of MFT_RECORDs and the FILENAME_ATTR
- * attributes of MFT_RECORDs and directory index entries.
- *
- * All of the below flags appear in the directory index entries but only some
- * appear in the STANDARD_INFORMATION attribute whilst only some others appear
- * in the FILENAME_ATTR attribute of MFT_RECORDs. Unless otherwise stated the
- * flags appear in all of the above.
- */
-enum {
- FILE_ATTR_READONLY = cpu_to_le32(0x00000001),
- FILE_ATTR_HIDDEN = cpu_to_le32(0x00000002),
- FILE_ATTR_SYSTEM = cpu_to_le32(0x00000004),
- /* Old DOS volid. Unused in NT. = cpu_to_le32(0x00000008), */
-
- FILE_ATTR_DIRECTORY = cpu_to_le32(0x00000010),
- /* Note, FILE_ATTR_DIRECTORY is not considered valid in NT. It is
- reserved for the DOS SUBDIRECTORY flag. */
- FILE_ATTR_ARCHIVE = cpu_to_le32(0x00000020),
- FILE_ATTR_DEVICE = cpu_to_le32(0x00000040),
- FILE_ATTR_NORMAL = cpu_to_le32(0x00000080),
-
- FILE_ATTR_TEMPORARY = cpu_to_le32(0x00000100),
- FILE_ATTR_SPARSE_FILE = cpu_to_le32(0x00000200),
- FILE_ATTR_REPARSE_POINT = cpu_to_le32(0x00000400),
- FILE_ATTR_COMPRESSED = cpu_to_le32(0x00000800),
-
- FILE_ATTR_OFFLINE = cpu_to_le32(0x00001000),
- FILE_ATTR_NOT_CONTENT_INDEXED = cpu_to_le32(0x00002000),
- FILE_ATTR_ENCRYPTED = cpu_to_le32(0x00004000),
-
- FILE_ATTR_VALID_FLAGS = cpu_to_le32(0x00007fb7),
- /* Note, FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the
- FILE_ATTR_DEVICE and preserves everything else. This mask is used
- to obtain all flags that are valid for reading. */
- FILE_ATTR_VALID_SET_FLAGS = cpu_to_le32(0x000031a7),
- /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
- F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
- F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask
- is used to obtain all flags that are valid for setting. */
- /*
- * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all
- * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
- * attribute of an mft record.
- */
- FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = cpu_to_le32(0x10000000),
- /* Note, this is a copy of the corresponding bit from the mft record,
- telling us whether this is a directory or not, i.e. whether it has
- an index root attribute or not. */
- FILE_ATTR_DUP_VIEW_INDEX_PRESENT = cpu_to_le32(0x20000000),
- /* Note, this is a copy of the corresponding bit from the mft record,
- telling us whether this file has a view index present (eg. object id
- index, quota index, one of the security indexes or the encrypting
- filesystem related indexes). */
-};
-
-typedef le32 FILE_ATTR_FLAGS;
-
-/*
- * NOTE on times in NTFS: All times are in MS standard time format, i.e. they
- * are the number of 100-nanosecond intervals since 1st January 1601, 00:00:00
- * universal coordinated time (UTC). (In Linux time starts 1st January 1970,
- * 00:00:00 UTC and is stored as the number of 1-second intervals since then.)
- */
-
-/*
- * Attribute: Standard information (0x10).
- *
- * NOTE: Always resident.
- * NOTE: Present in all base file records on a volume.
- * NOTE: There is conflicting information about the meaning of each of the time
- * fields but the meaning as defined below has been verified to be
- * correct by practical experimentation on Windows NT4 SP6a and is hence
- * assumed to be the one and only correct interpretation.
- */
-typedef struct {
-/*Ofs*/
-/* 0*/ sle64 creation_time; /* Time file was created. Updated when
- a filename is changed(?). */
-/* 8*/ sle64 last_data_change_time; /* Time the data attribute was last
- modified. */
-/* 16*/ sle64 last_mft_change_time; /* Time this mft record was last
- modified. */
-/* 24*/ sle64 last_access_time; /* Approximate time when the file was
- last accessed (obviously this is not
- updated on read-only volumes). In
- Windows this is only updated when
- accessed if some time delta has
- passed since the last update. Also,
- last access time updates can be
- disabled altogether for speed. */
-/* 32*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */
-/* 36*/ union {
- /* NTFS 1.2 */
- struct {
- /* 36*/ u8 reserved12[12]; /* Reserved/alignment to 8-byte
- boundary. */
- } __attribute__ ((__packed__)) v1;
- /* sizeof() = 48 bytes */
- /* NTFS 3.x */
- struct {
-/*
- * If a volume has been upgraded from a previous NTFS version, then these
- * fields are present only if the file has been accessed since the upgrade.
- * Recognize the difference by comparing the length of the resident attribute
- * value. If it is 48, then the following fields are missing. If it is 72 then
- * the fields are present. Maybe just check like this:
- * if (resident.ValueLength < sizeof(STANDARD_INFORMATION)) {
- * Assume NTFS 1.2- format.
- * If (volume version is 3.x)
- * Upgrade attribute to NTFS 3.x format.
- * else
- * Use NTFS 1.2- format for access.
- * } else
- * Use NTFS 3.x format for access.
- * Only problem is that it might be legal to set the length of the value to
- * arbitrarily large values thus spoiling this check. - But chkdsk probably
- * views that as a corruption, assuming that it behaves like this for all
- * attributes.
- */
- /* 36*/ le32 maximum_versions; /* Maximum allowed versions for
- file. Zero if version numbering is disabled. */
- /* 40*/ le32 version_number; /* This file's version (if any).
- Set to zero if maximum_versions is zero. */
- /* 44*/ le32 class_id; /* Class id from bidirectional
- class id index (?). */
- /* 48*/ le32 owner_id; /* Owner_id of the user owning
- the file. Translate via $Q index in FILE_Extend
- /$Quota to the quota control entry for the user
- owning the file. Zero if quotas are disabled. */
- /* 52*/ le32 security_id; /* Security_id for the file.
- Translate via $SII index and $SDS data stream
- in FILE_Secure to the security descriptor. */
- /* 56*/ le64 quota_charged; /* Byte size of the charge to
- the quota for all streams of the file. Note: Is
- zero if quotas are disabled. */
- /* 64*/ leUSN usn; /* Last update sequence number
- of the file. This is a direct index into the
- transaction log file ($UsnJrnl). It is zero if
- the usn journal is disabled or this file has
- not been subject to logging yet. See usnjrnl.h
- for details. */
- } __attribute__ ((__packed__)) v3;
- /* sizeof() = 72 bytes (NTFS 3.x) */
- } __attribute__ ((__packed__)) ver;
-} __attribute__ ((__packed__)) STANDARD_INFORMATION;
-
-/*
- * Attribute: Attribute list (0x20).
- *
- * - Can be either resident or non-resident.
- * - Value consists of a sequence of variable length, 8-byte aligned,
- * ATTR_LIST_ENTRY records.
- * - The list is not terminated by anything at all! The only way to know when
- * the end is reached is to keep track of the current offset and compare it to
- * the attribute value size.
- * - The attribute list attribute contains one entry for each attribute of
- * the file in which the list is located, except for the list attribute
- * itself. The list is sorted: first by attribute type, second by attribute
- * name (if present), third by instance number. The extents of one
- * non-resident attribute (if present) immediately follow after the initial
- * extent. They are ordered by lowest_vcn and have their instace set to zero.
- * It is not allowed to have two attributes with all sorting keys equal.
- * - Further restrictions:
- * - If not resident, the vcn to lcn mapping array has to fit inside the
- * base mft record.
- * - The attribute list attribute value has a maximum size of 256kb. This
- * is imposed by the Windows cache manager.
- * - Attribute lists are only used when the attributes of mft record do not
- * fit inside the mft record despite all attributes (that can be made
- * non-resident) having been made non-resident. This can happen e.g. when:
- * - File has a large number of hard links (lots of file name
- * attributes present).
- * - The mapping pairs array of some non-resident attribute becomes so
- * large due to fragmentation that it overflows the mft record.
- * - The security descriptor is very complex (not applicable to
- * NTFS 3.0 volumes).
- * - There are many named streams.
- */
-typedef struct {
-/*Ofs*/
-/* 0*/ ATTR_TYPE type; /* Type of referenced attribute. */
-/* 4*/ le16 length; /* Byte size of this entry (8-byte aligned). */
-/* 6*/ u8 name_length; /* Size in Unicode chars of the name of the
- attribute or 0 if unnamed. */
-/* 7*/ u8 name_offset; /* Byte offset to beginning of attribute name
- (always set this to where the name would
- start even if unnamed). */
-/* 8*/ leVCN lowest_vcn; /* Lowest virtual cluster number of this portion
- of the attribute value. This is usually 0. It
- is non-zero for the case where one attribute
- does not fit into one mft record and thus
- several mft records are allocated to hold
- this attribute. In the latter case, each mft
- record holds one extent of the attribute and
- there is one attribute list entry for each
- extent. NOTE: This is DEFINITELY a signed
- value! The windows driver uses cmp, followed
- by jg when comparing this, thus it treats it
- as signed. */
-/* 16*/ leMFT_REF mft_reference;/* The reference of the mft record holding
- the ATTR_RECORD for this portion of the
- attribute value. */
-/* 24*/ le16 instance; /* If lowest_vcn = 0, the instance of the
- attribute being referenced; otherwise 0. */
-/* 26*/ ntfschar name[0]; /* Use when creating only. When reading use
- name_offset to determine the location of the
- name. */
-/* sizeof() = 26 + (attribute_name_length * 2) bytes */
-} __attribute__ ((__packed__)) ATTR_LIST_ENTRY;
-
-/*
- * The maximum allowed length for a file name.
- */
-#define MAXIMUM_FILE_NAME_LENGTH 255
-
-/*
- * Possible namespaces for filenames in ntfs (8-bit).
- */
-enum {
- FILE_NAME_POSIX = 0x00,
- /* This is the largest namespace. It is case sensitive and allows all
- Unicode characters except for: '\0' and '/'. Beware that in
- WinNT/2k/2003 by default files which eg have the same name except
- for their case will not be distinguished by the standard utilities
- and thus a "del filename" will delete both "filename" and "fileName"
- without warning. However if for example Services For Unix (SFU) are
- installed and the case sensitive option was enabled at installation
- time, then you can create/access/delete such files.
- Note that even SFU places restrictions on the filenames beyond the
- '\0' and '/' and in particular the following set of characters is
- not allowed: '"', '/', '<', '>', '\'. All other characters,
- including the ones no allowed in WIN32 namespace are allowed.
- Tested with SFU 3.5 (this is now free) running on Windows XP. */
- FILE_NAME_WIN32 = 0x01,
- /* The standard WinNT/2k NTFS long filenames. Case insensitive. All
- Unicode chars except: '\0', '"', '*', '/', ':', '<', '>', '?', '\',
- and '|'. Further, names cannot end with a '.' or a space. */
- FILE_NAME_DOS = 0x02,
- /* The standard DOS filenames (8.3 format). Uppercase only. All 8-bit
- characters greater space, except: '"', '*', '+', ',', '/', ':', ';',
- '<', '=', '>', '?', and '\'. */
- FILE_NAME_WIN32_AND_DOS = 0x03,
- /* 3 means that both the Win32 and the DOS filenames are identical and
- hence have been saved in this single filename record. */
-} __attribute__ ((__packed__));
-
-typedef u8 FILE_NAME_TYPE_FLAGS;
-
-/*
- * Attribute: Filename (0x30).
- *
- * NOTE: Always resident.
- * NOTE: All fields, except the parent_directory, are only updated when the
- * filename is changed. Until then, they just become out of sync with
- * reality and the more up to date values are present in the standard
- * information attribute.
- * NOTE: There is conflicting information about the meaning of each of the time
- * fields but the meaning as defined below has been verified to be
- * correct by practical experimentation on Windows NT4 SP6a and is hence
- * assumed to be the one and only correct interpretation.
- */
-typedef struct {
-/*hex ofs*/
-/* 0*/ leMFT_REF parent_directory; /* Directory this filename is
- referenced from. */
-/* 8*/ sle64 creation_time; /* Time file was created. */
-/* 10*/ sle64 last_data_change_time; /* Time the data attribute was last
- modified. */
-/* 18*/ sle64 last_mft_change_time; /* Time this mft record was last
- modified. */
-/* 20*/ sle64 last_access_time; /* Time this mft record was last
- accessed. */
-/* 28*/ sle64 allocated_size; /* Byte size of on-disk allocated space
- for the unnamed data attribute. So
- for normal $DATA, this is the
- allocated_size from the unnamed
- $DATA attribute and for compressed
- and/or sparse $DATA, this is the
- compressed_size from the unnamed
- $DATA attribute. For a directory or
- other inode without an unnamed $DATA
- attribute, this is always 0. NOTE:
- This is a multiple of the cluster
- size. */
-/* 30*/ sle64 data_size; /* Byte size of actual data in unnamed
- data attribute. For a directory or
- other inode without an unnamed $DATA
- attribute, this is always 0. */
-/* 38*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */
-/* 3c*/ union {
- /* 3c*/ struct {
- /* 3c*/ le16 packed_ea_size; /* Size of the buffer needed to
- pack the extended attributes
- (EAs), if such are present.*/
- /* 3e*/ le16 reserved; /* Reserved for alignment. */
- } __attribute__ ((__packed__)) ea;
- /* 3c*/ struct {
- /* 3c*/ le32 reparse_point_tag; /* Type of reparse point,
- present only in reparse
- points and only if there are
- no EAs. */
- } __attribute__ ((__packed__)) rp;
- } __attribute__ ((__packed__)) type;
-/* 40*/ u8 file_name_length; /* Length of file name in
- (Unicode) characters. */
-/* 41*/ FILE_NAME_TYPE_FLAGS file_name_type; /* Namespace of the file name.*/
-/* 42*/ ntfschar file_name[0]; /* File name in Unicode. */
-} __attribute__ ((__packed__)) FILE_NAME_ATTR;
-
-/*
- * GUID structures store globally unique identifiers (GUID). A GUID is a
- * 128-bit value consisting of one group of eight hexadecimal digits, followed
- * by three groups of four hexadecimal digits each, followed by one group of
- * twelve hexadecimal digits. GUIDs are Microsoft's implementation of the
- * distributed computing environment (DCE) universally unique identifier (UUID).
- * Example of a GUID:
- * 1F010768-5A73-BC91-0010A52216A7
- */
-typedef struct {
- le32 data1; /* The first eight hexadecimal digits of the GUID. */
- le16 data2; /* The first group of four hexadecimal digits. */
- le16 data3; /* The second group of four hexadecimal digits. */
- u8 data4[8]; /* The first two bytes are the third group of four
- hexadecimal digits. The remaining six bytes are the
- final 12 hexadecimal digits. */
-} __attribute__ ((__packed__)) GUID;
-
-/*
- * FILE_Extend/$ObjId contains an index named $O. This index contains all
- * object_ids present on the volume as the index keys and the corresponding
- * mft_record numbers as the index entry data parts. The data part (defined
- * below) also contains three other object_ids:
- * birth_volume_id - object_id of FILE_Volume on which the file was first
- * created. Optional (i.e. can be zero).
- * birth_object_id - object_id of file when it was first created. Usually
- * equals the object_id. Optional (i.e. can be zero).
- * domain_id - Reserved (always zero).
- */
-typedef struct {
- leMFT_REF mft_reference;/* Mft record containing the object_id in
- the index entry key. */
- union {
- struct {
- GUID birth_volume_id;
- GUID birth_object_id;
- GUID domain_id;
- } __attribute__ ((__packed__)) origin;
- u8 extended_info[48];
- } __attribute__ ((__packed__)) opt;
-} __attribute__ ((__packed__)) OBJ_ID_INDEX_DATA;
-
-/*
- * Attribute: Object id (NTFS 3.0+) (0x40).
- *
- * NOTE: Always resident.
- */
-typedef struct {
- GUID object_id; /* Unique id assigned to the
- file.*/
- /* The following fields are optional. The attribute value size is 16
- bytes, i.e. sizeof(GUID), if these are not present at all. Note,
- the entries can be present but one or more (or all) can be zero
- meaning that that particular value(s) is(are) not defined. */
- union {
- struct {
- GUID birth_volume_id; /* Unique id of volume on which
- the file was first created.*/
- GUID birth_object_id; /* Unique id of file when it was
- first created. */
- GUID domain_id; /* Reserved, zero. */
- } __attribute__ ((__packed__)) origin;
- u8 extended_info[48];
- } __attribute__ ((__packed__)) opt;
-} __attribute__ ((__packed__)) OBJECT_ID_ATTR;
-
-/*
- * The pre-defined IDENTIFIER_AUTHORITIES used as SID_IDENTIFIER_AUTHORITY in
- * the SID structure (see below).
- */
-//typedef enum { /* SID string prefix. */
-// SECURITY_NULL_SID_AUTHORITY = {0, 0, 0, 0, 0, 0}, /* S-1-0 */
-// SECURITY_WORLD_SID_AUTHORITY = {0, 0, 0, 0, 0, 1}, /* S-1-1 */
-// SECURITY_LOCAL_SID_AUTHORITY = {0, 0, 0, 0, 0, 2}, /* S-1-2 */
-// SECURITY_CREATOR_SID_AUTHORITY = {0, 0, 0, 0, 0, 3}, /* S-1-3 */
-// SECURITY_NON_UNIQUE_AUTHORITY = {0, 0, 0, 0, 0, 4}, /* S-1-4 */
-// SECURITY_NT_SID_AUTHORITY = {0, 0, 0, 0, 0, 5}, /* S-1-5 */
-//} IDENTIFIER_AUTHORITIES;
-
-/*
- * These relative identifiers (RIDs) are used with the above identifier
- * authorities to make up universal well-known SIDs.
- *
- * Note: The relative identifier (RID) refers to the portion of a SID, which
- * identifies a user or group in relation to the authority that issued the SID.
- * For example, the universal well-known SID Creator Owner ID (S-1-3-0) is
- * made up of the identifier authority SECURITY_CREATOR_SID_AUTHORITY (3) and
- * the relative identifier SECURITY_CREATOR_OWNER_RID (0).
- */
-typedef enum { /* Identifier authority. */
- SECURITY_NULL_RID = 0, /* S-1-0 */
- SECURITY_WORLD_RID = 0, /* S-1-1 */
- SECURITY_LOCAL_RID = 0, /* S-1-2 */
-
- SECURITY_CREATOR_OWNER_RID = 0, /* S-1-3 */
- SECURITY_CREATOR_GROUP_RID = 1, /* S-1-3 */
-
- SECURITY_CREATOR_OWNER_SERVER_RID = 2, /* S-1-3 */
- SECURITY_CREATOR_GROUP_SERVER_RID = 3, /* S-1-3 */
-
- SECURITY_DIALUP_RID = 1,
- SECURITY_NETWORK_RID = 2,
- SECURITY_BATCH_RID = 3,
- SECURITY_INTERACTIVE_RID = 4,
- SECURITY_SERVICE_RID = 6,
- SECURITY_ANONYMOUS_LOGON_RID = 7,
- SECURITY_PROXY_RID = 8,
- SECURITY_ENTERPRISE_CONTROLLERS_RID=9,
- SECURITY_SERVER_LOGON_RID = 9,
- SECURITY_PRINCIPAL_SELF_RID = 0xa,
- SECURITY_AUTHENTICATED_USER_RID = 0xb,
- SECURITY_RESTRICTED_CODE_RID = 0xc,
- SECURITY_TERMINAL_SERVER_RID = 0xd,
-
- SECURITY_LOGON_IDS_RID = 5,
- SECURITY_LOGON_IDS_RID_COUNT = 3,
-
- SECURITY_LOCAL_SYSTEM_RID = 0x12,
-
- SECURITY_NT_NON_UNIQUE = 0x15,
-
- SECURITY_BUILTIN_DOMAIN_RID = 0x20,
-
- /*
- * Well-known domain relative sub-authority values (RIDs).
- */
-
- /* Users. */
- DOMAIN_USER_RID_ADMIN = 0x1f4,
- DOMAIN_USER_RID_GUEST = 0x1f5,
- DOMAIN_USER_RID_KRBTGT = 0x1f6,
-
- /* Groups. */
- DOMAIN_GROUP_RID_ADMINS = 0x200,
- DOMAIN_GROUP_RID_USERS = 0x201,
- DOMAIN_GROUP_RID_GUESTS = 0x202,
- DOMAIN_GROUP_RID_COMPUTERS = 0x203,
- DOMAIN_GROUP_RID_CONTROLLERS = 0x204,
- DOMAIN_GROUP_RID_CERT_ADMINS = 0x205,
- DOMAIN_GROUP_RID_SCHEMA_ADMINS = 0x206,
- DOMAIN_GROUP_RID_ENTERPRISE_ADMINS= 0x207,
- DOMAIN_GROUP_RID_POLICY_ADMINS = 0x208,
-
- /* Aliases. */
- DOMAIN_ALIAS_RID_ADMINS = 0x220,
- DOMAIN_ALIAS_RID_USERS = 0x221,
- DOMAIN_ALIAS_RID_GUESTS = 0x222,
- DOMAIN_ALIAS_RID_POWER_USERS = 0x223,
-
- DOMAIN_ALIAS_RID_ACCOUNT_OPS = 0x224,
- DOMAIN_ALIAS_RID_SYSTEM_OPS = 0x225,
- DOMAIN_ALIAS_RID_PRINT_OPS = 0x226,
- DOMAIN_ALIAS_RID_BACKUP_OPS = 0x227,
-
- DOMAIN_ALIAS_RID_REPLICATOR = 0x228,
- DOMAIN_ALIAS_RID_RAS_SERVERS = 0x229,
- DOMAIN_ALIAS_RID_PREW2KCOMPACCESS = 0x22a,
-} RELATIVE_IDENTIFIERS;
-
-/*
- * The universal well-known SIDs:
- *
- * NULL_SID S-1-0-0
- * WORLD_SID S-1-1-0
- * LOCAL_SID S-1-2-0
- * CREATOR_OWNER_SID S-1-3-0
- * CREATOR_GROUP_SID S-1-3-1
- * CREATOR_OWNER_SERVER_SID S-1-3-2
- * CREATOR_GROUP_SERVER_SID S-1-3-3
- *
- * (Non-unique IDs) S-1-4
- *
- * NT well-known SIDs:
- *
- * NT_AUTHORITY_SID S-1-5
- * DIALUP_SID S-1-5-1
- *
- * NETWORD_SID S-1-5-2
- * BATCH_SID S-1-5-3
- * INTERACTIVE_SID S-1-5-4
- * SERVICE_SID S-1-5-6
- * ANONYMOUS_LOGON_SID S-1-5-7 (aka null logon session)
- * PROXY_SID S-1-5-8
- * SERVER_LOGON_SID S-1-5-9 (aka domain controller account)
- * SELF_SID S-1-5-10 (self RID)
- * AUTHENTICATED_USER_SID S-1-5-11
- * RESTRICTED_CODE_SID S-1-5-12 (running restricted code)
- * TERMINAL_SERVER_SID S-1-5-13 (running on terminal server)
- *
- * (Logon IDs) S-1-5-5-X-Y
- *
- * (NT non-unique IDs) S-1-5-0x15-...
- *
- * (Built-in domain) S-1-5-0x20
- */
-
-/*
- * The SID_IDENTIFIER_AUTHORITY is a 48-bit value used in the SID structure.
- *
- * NOTE: This is stored as a big endian number, hence the high_part comes
- * before the low_part.
- */
-typedef union {
- struct {
- u16 high_part; /* High 16-bits. */
- u32 low_part; /* Low 32-bits. */
- } __attribute__ ((__packed__)) parts;
- u8 value[6]; /* Value as individual bytes. */
-} __attribute__ ((__packed__)) SID_IDENTIFIER_AUTHORITY;
-
-/*
- * The SID structure is a variable-length structure used to uniquely identify
- * users or groups. SID stands for security identifier.
- *
- * The standard textual representation of the SID is of the form:
- * S-R-I-S-S...
- * Where:
- * - The first "S" is the literal character 'S' identifying the following
- * digits as a SID.
- * - R is the revision level of the SID expressed as a sequence of digits
- * either in decimal or hexadecimal (if the later, prefixed by "0x").
- * - I is the 48-bit identifier_authority, expressed as digits as R above.
- * - S... is one or more sub_authority values, expressed as digits as above.
- *
- * Example SID; the domain-relative SID of the local Administrators group on
- * Windows NT/2k:
- * S-1-5-32-544
- * This translates to a SID with:
- * revision = 1,
- * sub_authority_count = 2,
- * identifier_authority = {0,0,0,0,0,5}, // SECURITY_NT_AUTHORITY
- * sub_authority[0] = 32, // SECURITY_BUILTIN_DOMAIN_RID
- * sub_authority[1] = 544 // DOMAIN_ALIAS_RID_ADMINS
- */
-typedef struct {
- u8 revision;
- u8 sub_authority_count;
- SID_IDENTIFIER_AUTHORITY identifier_authority;
- le32 sub_authority[1]; /* At least one sub_authority. */
-} __attribute__ ((__packed__)) SID;
-
-/*
- * Current constants for SIDs.
- */
-typedef enum {
- SID_REVISION = 1, /* Current revision level. */
- SID_MAX_SUB_AUTHORITIES = 15, /* Maximum number of those. */
- SID_RECOMMENDED_SUB_AUTHORITIES = 1, /* Will change to around 6 in
- a future revision. */
-} SID_CONSTANTS;
-
-/*
- * The predefined ACE types (8-bit, see below).
- */
-enum {
- ACCESS_MIN_MS_ACE_TYPE = 0,
- ACCESS_ALLOWED_ACE_TYPE = 0,
- ACCESS_DENIED_ACE_TYPE = 1,
- SYSTEM_AUDIT_ACE_TYPE = 2,
- SYSTEM_ALARM_ACE_TYPE = 3, /* Not implemented as of Win2k. */
- ACCESS_MAX_MS_V2_ACE_TYPE = 3,
-
- ACCESS_ALLOWED_COMPOUND_ACE_TYPE= 4,
- ACCESS_MAX_MS_V3_ACE_TYPE = 4,
-
- /* The following are Win2k only. */
- ACCESS_MIN_MS_OBJECT_ACE_TYPE = 5,
- ACCESS_ALLOWED_OBJECT_ACE_TYPE = 5,
- ACCESS_DENIED_OBJECT_ACE_TYPE = 6,
- SYSTEM_AUDIT_OBJECT_ACE_TYPE = 7,
- SYSTEM_ALARM_OBJECT_ACE_TYPE = 8,
- ACCESS_MAX_MS_OBJECT_ACE_TYPE = 8,
-
- ACCESS_MAX_MS_V4_ACE_TYPE = 8,
-
- /* This one is for WinNT/2k. */
- ACCESS_MAX_MS_ACE_TYPE = 8,
-} __attribute__ ((__packed__));
-
-typedef u8 ACE_TYPES;
-
-/*
- * The ACE flags (8-bit) for audit and inheritance (see below).
- *
- * SUCCESSFUL_ACCESS_ACE_FLAG is only used with system audit and alarm ACE
- * types to indicate that a message is generated (in Windows!) for successful
- * accesses.
- *
- * FAILED_ACCESS_ACE_FLAG is only used with system audit and alarm ACE types
- * to indicate that a message is generated (in Windows!) for failed accesses.
- */
-enum {
- /* The inheritance flags. */
- OBJECT_INHERIT_ACE = 0x01,
- CONTAINER_INHERIT_ACE = 0x02,
- NO_PROPAGATE_INHERIT_ACE = 0x04,
- INHERIT_ONLY_ACE = 0x08,
- INHERITED_ACE = 0x10, /* Win2k only. */
- VALID_INHERIT_FLAGS = 0x1f,
-
- /* The audit flags. */
- SUCCESSFUL_ACCESS_ACE_FLAG = 0x40,
- FAILED_ACCESS_ACE_FLAG = 0x80,
-} __attribute__ ((__packed__));
-
-typedef u8 ACE_FLAGS;
-
-/*
- * An ACE is an access-control entry in an access-control list (ACL).
- * An ACE defines access to an object for a specific user or group or defines
- * the types of access that generate system-administration messages or alarms
- * for a specific user or group. The user or group is identified by a security
- * identifier (SID).
- *
- * Each ACE starts with an ACE_HEADER structure (aligned on 4-byte boundary),
- * which specifies the type and size of the ACE. The format of the subsequent
- * data depends on the ACE type.
- */
-typedef struct {
-/*Ofs*/
-/* 0*/ ACE_TYPES type; /* Type of the ACE. */
-/* 1*/ ACE_FLAGS flags; /* Flags describing the ACE. */
-/* 2*/ le16 size; /* Size in bytes of the ACE. */
-} __attribute__ ((__packed__)) ACE_HEADER;
-
-/*
- * The access mask (32-bit). Defines the access rights.
- *
- * The specific rights (bits 0 to 15). These depend on the type of the object
- * being secured by the ACE.
- */
-enum {
- /* Specific rights for files and directories are as follows: */
-
- /* Right to read data from the file. (FILE) */
- FILE_READ_DATA = cpu_to_le32(0x00000001),
- /* Right to list contents of a directory. (DIRECTORY) */
- FILE_LIST_DIRECTORY = cpu_to_le32(0x00000001),
-
- /* Right to write data to the file. (FILE) */
- FILE_WRITE_DATA = cpu_to_le32(0x00000002),
- /* Right to create a file in the directory. (DIRECTORY) */
- FILE_ADD_FILE = cpu_to_le32(0x00000002),
-
- /* Right to append data to the file. (FILE) */
- FILE_APPEND_DATA = cpu_to_le32(0x00000004),
- /* Right to create a subdirectory. (DIRECTORY) */
- FILE_ADD_SUBDIRECTORY = cpu_to_le32(0x00000004),
-
- /* Right to read extended attributes. (FILE/DIRECTORY) */
- FILE_READ_EA = cpu_to_le32(0x00000008),
-
- /* Right to write extended attributes. (FILE/DIRECTORY) */
- FILE_WRITE_EA = cpu_to_le32(0x00000010),
-
- /* Right to execute a file. (FILE) */
- FILE_EXECUTE = cpu_to_le32(0x00000020),
- /* Right to traverse the directory. (DIRECTORY) */
- FILE_TRAVERSE = cpu_to_le32(0x00000020),
-
- /*
- * Right to delete a directory and all the files it contains (its
- * children), even if the files are read-only. (DIRECTORY)
- */
- FILE_DELETE_CHILD = cpu_to_le32(0x00000040),
-
- /* Right to read file attributes. (FILE/DIRECTORY) */
- FILE_READ_ATTRIBUTES = cpu_to_le32(0x00000080),
-
- /* Right to change file attributes. (FILE/DIRECTORY) */
- FILE_WRITE_ATTRIBUTES = cpu_to_le32(0x00000100),
-
- /*
- * The standard rights (bits 16 to 23). These are independent of the
- * type of object being secured.
- */
-
- /* Right to delete the object. */
- DELETE = cpu_to_le32(0x00010000),
-
- /*
- * Right to read the information in the object's security descriptor,
- * not including the information in the SACL, i.e. right to read the
- * security descriptor and owner.
- */
- READ_CONTROL = cpu_to_le32(0x00020000),
-
- /* Right to modify the DACL in the object's security descriptor. */
- WRITE_DAC = cpu_to_le32(0x00040000),
-
- /* Right to change the owner in the object's security descriptor. */
- WRITE_OWNER = cpu_to_le32(0x00080000),
-
- /*
- * Right to use the object for synchronization. Enables a process to
- * wait until the object is in the signalled state. Some object types
- * do not support this access right.
- */
- SYNCHRONIZE = cpu_to_le32(0x00100000),
-
- /*
- * The following STANDARD_RIGHTS_* are combinations of the above for
- * convenience and are defined by the Win32 API.
- */
-
- /* These are currently defined to READ_CONTROL. */
- STANDARD_RIGHTS_READ = cpu_to_le32(0x00020000),
- STANDARD_RIGHTS_WRITE = cpu_to_le32(0x00020000),
- STANDARD_RIGHTS_EXECUTE = cpu_to_le32(0x00020000),
-
- /* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */
- STANDARD_RIGHTS_REQUIRED = cpu_to_le32(0x000f0000),
-
- /*
- * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and
- * SYNCHRONIZE access.
- */
- STANDARD_RIGHTS_ALL = cpu_to_le32(0x001f0000),
-
- /*
- * The access system ACL and maximum allowed access types (bits 24 to
- * 25, bits 26 to 27 are reserved).
- */
- ACCESS_SYSTEM_SECURITY = cpu_to_le32(0x01000000),
- MAXIMUM_ALLOWED = cpu_to_le32(0x02000000),
-
- /*
- * The generic rights (bits 28 to 31). These map onto the standard and
- * specific rights.
- */
-
- /* Read, write, and execute access. */
- GENERIC_ALL = cpu_to_le32(0x10000000),
-
- /* Execute access. */
- GENERIC_EXECUTE = cpu_to_le32(0x20000000),
-
- /*
- * Write access. For files, this maps onto:
- * FILE_APPEND_DATA | FILE_WRITE_ATTRIBUTES | FILE_WRITE_DATA |
- * FILE_WRITE_EA | STANDARD_RIGHTS_WRITE | SYNCHRONIZE
- * For directories, the mapping has the same numerical value. See
- * above for the descriptions of the rights granted.
- */
- GENERIC_WRITE = cpu_to_le32(0x40000000),
-
- /*
- * Read access. For files, this maps onto:
- * FILE_READ_ATTRIBUTES | FILE_READ_DATA | FILE_READ_EA |
- * STANDARD_RIGHTS_READ | SYNCHRONIZE
- * For directories, the mapping has the same numberical value. See
- * above for the descriptions of the rights granted.
- */
- GENERIC_READ = cpu_to_le32(0x80000000),
-};
-
-typedef le32 ACCESS_MASK;
-
-/*
- * The generic mapping array. Used to denote the mapping of each generic
- * access right to a specific access mask.
- *
- * FIXME: What exactly is this and what is it for? (AIA)
- */
-typedef struct {
- ACCESS_MASK generic_read;
- ACCESS_MASK generic_write;
- ACCESS_MASK generic_execute;
- ACCESS_MASK generic_all;
-} __attribute__ ((__packed__)) GENERIC_MAPPING;
-
-/*
- * The predefined ACE type structures are as defined below.
- */
-
-/*
- * ACCESS_ALLOWED_ACE, ACCESS_DENIED_ACE, SYSTEM_AUDIT_ACE, SYSTEM_ALARM_ACE
- */
-typedef struct {
-/* 0 ACE_HEADER; -- Unfolded here as gcc doesn't like unnamed structs. */
- ACE_TYPES type; /* Type of the ACE. */
- ACE_FLAGS flags; /* Flags describing the ACE. */
- le16 size; /* Size in bytes of the ACE. */
-/* 4*/ ACCESS_MASK mask; /* Access mask associated with the ACE. */
-
-/* 8*/ SID sid; /* The SID associated with the ACE. */
-} __attribute__ ((__packed__)) ACCESS_ALLOWED_ACE, ACCESS_DENIED_ACE,
- SYSTEM_AUDIT_ACE, SYSTEM_ALARM_ACE;
-
-/*
- * The object ACE flags (32-bit).
- */
-enum {
- ACE_OBJECT_TYPE_PRESENT = cpu_to_le32(1),
- ACE_INHERITED_OBJECT_TYPE_PRESENT = cpu_to_le32(2),
-};
-
-typedef le32 OBJECT_ACE_FLAGS;
-
-typedef struct {
-/* 0 ACE_HEADER; -- Unfolded here as gcc doesn't like unnamed structs. */
- ACE_TYPES type; /* Type of the ACE. */
- ACE_FLAGS flags; /* Flags describing the ACE. */
- le16 size; /* Size in bytes of the ACE. */
-/* 4*/ ACCESS_MASK mask; /* Access mask associated with the ACE. */
-
-/* 8*/ OBJECT_ACE_FLAGS object_flags; /* Flags describing the object ACE. */
-/* 12*/ GUID object_type;
-/* 28*/ GUID inherited_object_type;
-
-/* 44*/ SID sid; /* The SID associated with the ACE. */
-} __attribute__ ((__packed__)) ACCESS_ALLOWED_OBJECT_ACE,
- ACCESS_DENIED_OBJECT_ACE,
- SYSTEM_AUDIT_OBJECT_ACE,
- SYSTEM_ALARM_OBJECT_ACE;
-
-/*
- * An ACL is an access-control list (ACL).
- * An ACL starts with an ACL header structure, which specifies the size of
- * the ACL and the number of ACEs it contains. The ACL header is followed by
- * zero or more access control entries (ACEs). The ACL as well as each ACE
- * are aligned on 4-byte boundaries.
- */
-typedef struct {
- u8 revision; /* Revision of this ACL. */
- u8 alignment1;
- le16 size; /* Allocated space in bytes for ACL. Includes this
- header, the ACEs and the remaining free space. */
- le16 ace_count; /* Number of ACEs in the ACL. */
- le16 alignment2;
-/* sizeof() = 8 bytes */
-} __attribute__ ((__packed__)) ACL;
-
-/*
- * Current constants for ACLs.
- */
-typedef enum {
- /* Current revision. */
- ACL_REVISION = 2,
- ACL_REVISION_DS = 4,
-
- /* History of revisions. */
- ACL_REVISION1 = 1,
- MIN_ACL_REVISION = 2,
- ACL_REVISION2 = 2,
- ACL_REVISION3 = 3,
- ACL_REVISION4 = 4,
- MAX_ACL_REVISION = 4,
-} ACL_CONSTANTS;
-
-/*
- * The security descriptor control flags (16-bit).
- *
- * SE_OWNER_DEFAULTED - This boolean flag, when set, indicates that the SID
- * pointed to by the Owner field was provided by a defaulting mechanism
- * rather than explicitly provided by the original provider of the
- * security descriptor. This may affect the treatment of the SID with
- * respect to inheritance of an owner.
- *
- * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in
- * the Group field was provided by a defaulting mechanism rather than
- * explicitly provided by the original provider of the security
- * descriptor. This may affect the treatment of the SID with respect to
- * inheritance of a primary group.
- *
- * SE_DACL_PRESENT - This boolean flag, when set, indicates that the security
- * descriptor contains a discretionary ACL. If this flag is set and the
- * Dacl field of the SECURITY_DESCRIPTOR is null, then a null ACL is
- * explicitly being specified.
- *
- * SE_DACL_DEFAULTED - This boolean flag, when set, indicates that the ACL
- * pointed to by the Dacl field was provided by a defaulting mechanism
- * rather than explicitly provided by the original provider of the
- * security descriptor. This may affect the treatment of the ACL with
- * respect to inheritance of an ACL. This flag is ignored if the
- * DaclPresent flag is not set.
- *
- * SE_SACL_PRESENT - This boolean flag, when set, indicates that the security
- * descriptor contains a system ACL pointed to by the Sacl field. If this
- * flag is set and the Sacl field of the SECURITY_DESCRIPTOR is null, then
- * an empty (but present) ACL is being specified.
- *
- * SE_SACL_DEFAULTED - This boolean flag, when set, indicates that the ACL
- * pointed to by the Sacl field was provided by a defaulting mechanism
- * rather than explicitly provided by the original provider of the
- * security descriptor. This may affect the treatment of the ACL with
- * respect to inheritance of an ACL. This flag is ignored if the
- * SaclPresent flag is not set.
- *
- * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security
- * descriptor is in self-relative form. In this form, all fields of the
- * security descriptor are contiguous in memory and all pointer fields are
- * expressed as offsets from the beginning of the security descriptor.
- */
-enum {
- SE_OWNER_DEFAULTED = cpu_to_le16(0x0001),
- SE_GROUP_DEFAULTED = cpu_to_le16(0x0002),
- SE_DACL_PRESENT = cpu_to_le16(0x0004),
- SE_DACL_DEFAULTED = cpu_to_le16(0x0008),
-
- SE_SACL_PRESENT = cpu_to_le16(0x0010),
- SE_SACL_DEFAULTED = cpu_to_le16(0x0020),
-
- SE_DACL_AUTO_INHERIT_REQ = cpu_to_le16(0x0100),
- SE_SACL_AUTO_INHERIT_REQ = cpu_to_le16(0x0200),
- SE_DACL_AUTO_INHERITED = cpu_to_le16(0x0400),
- SE_SACL_AUTO_INHERITED = cpu_to_le16(0x0800),
-
- SE_DACL_PROTECTED = cpu_to_le16(0x1000),
- SE_SACL_PROTECTED = cpu_to_le16(0x2000),
- SE_RM_CONTROL_VALID = cpu_to_le16(0x4000),
- SE_SELF_RELATIVE = cpu_to_le16(0x8000)
-} __attribute__ ((__packed__));
-
-typedef le16 SECURITY_DESCRIPTOR_CONTROL;
-
-/*
- * Self-relative security descriptor. Contains the owner and group SIDs as well
- * as the sacl and dacl ACLs inside the security descriptor itself.
- */
-typedef struct {
- u8 revision; /* Revision level of the security descriptor. */
- u8 alignment;
- SECURITY_DESCRIPTOR_CONTROL control; /* Flags qualifying the type of
- the descriptor as well as the following fields. */
- le32 owner; /* Byte offset to a SID representing an object's
- owner. If this is NULL, no owner SID is present in
- the descriptor. */
- le32 group; /* Byte offset to a SID representing an object's
- primary group. If this is NULL, no primary group
- SID is present in the descriptor. */
- le32 sacl; /* Byte offset to a system ACL. Only valid, if
- SE_SACL_PRESENT is set in the control field. If
- SE_SACL_PRESENT is set but sacl is NULL, a NULL ACL
- is specified. */
- le32 dacl; /* Byte offset to a discretionary ACL. Only valid, if
- SE_DACL_PRESENT is set in the control field. If
- SE_DACL_PRESENT is set but dacl is NULL, a NULL ACL
- (unconditionally granting access) is specified. */
-/* sizeof() = 0x14 bytes */
-} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_RELATIVE;
-
-/*
- * Absolute security descriptor. Does not contain the owner and group SIDs, nor
- * the sacl and dacl ACLs inside the security descriptor. Instead, it contains
- * pointers to these structures in memory. Obviously, absolute security
- * descriptors are only useful for in memory representations of security
- * descriptors. On disk, a self-relative security descriptor is used.
- */
-typedef struct {
- u8 revision; /* Revision level of the security descriptor. */
- u8 alignment;
- SECURITY_DESCRIPTOR_CONTROL control; /* Flags qualifying the type of
- the descriptor as well as the following fields. */
- SID *owner; /* Points to a SID representing an object's owner. If
- this is NULL, no owner SID is present in the
- descriptor. */
- SID *group; /* Points to a SID representing an object's primary
- group. If this is NULL, no primary group SID is
- present in the descriptor. */
- ACL *sacl; /* Points to a system ACL. Only valid, if
- SE_SACL_PRESENT is set in the control field. If
- SE_SACL_PRESENT is set but sacl is NULL, a NULL ACL
- is specified. */
- ACL *dacl; /* Points to a discretionary ACL. Only valid, if
- SE_DACL_PRESENT is set in the control field. If
- SE_DACL_PRESENT is set but dacl is NULL, a NULL ACL
- (unconditionally granting access) is specified. */
-} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR;
-
-/*
- * Current constants for security descriptors.
- */
-typedef enum {
- /* Current revision. */
- SECURITY_DESCRIPTOR_REVISION = 1,
- SECURITY_DESCRIPTOR_REVISION1 = 1,
-
- /* The sizes of both the absolute and relative security descriptors is
- the same as pointers, at least on ia32 architecture are 32-bit. */
- SECURITY_DESCRIPTOR_MIN_LENGTH = sizeof(SECURITY_DESCRIPTOR),
-} SECURITY_DESCRIPTOR_CONSTANTS;
-
-/*
- * Attribute: Security descriptor (0x50). A standard self-relative security
- * descriptor.
- *
- * NOTE: Can be resident or non-resident.
- * NOTE: Not used in NTFS 3.0+, as security descriptors are stored centrally
- * in FILE_Secure and the correct descriptor is found using the security_id
- * from the standard information attribute.
- */
-typedef SECURITY_DESCRIPTOR_RELATIVE SECURITY_DESCRIPTOR_ATTR;
-
-/*
- * On NTFS 3.0+, all security descriptors are stored in FILE_Secure. Only one
- * referenced instance of each unique security descriptor is stored.
- *
- * FILE_Secure contains no unnamed data attribute, i.e. it has zero length. It
- * does, however, contain two indexes ($SDH and $SII) as well as a named data
- * stream ($SDS).
- *
- * Every unique security descriptor is assigned a unique security identifier
- * (security_id, not to be confused with a SID). The security_id is unique for
- * the NTFS volume and is used as an index into the $SII index, which maps
- * security_ids to the security descriptor's storage location within the $SDS
- * data attribute. The $SII index is sorted by ascending security_id.
- *
- * A simple hash is computed from each security descriptor. This hash is used
- * as an index into the $SDH index, which maps security descriptor hashes to
- * the security descriptor's storage location within the $SDS data attribute.
- * The $SDH index is sorted by security descriptor hash and is stored in a B+
- * tree. When searching $SDH (with the intent of determining whether or not a
- * new security descriptor is already present in the $SDS data stream), if a
- * matching hash is found, but the security descriptors do not match, the
- * search in the $SDH index is continued, searching for a next matching hash.
- *
- * When a precise match is found, the security_id coresponding to the security
- * descriptor in the $SDS attribute is read from the found $SDH index entry and
- * is stored in the $STANDARD_INFORMATION attribute of the file/directory to
- * which the security descriptor is being applied. The $STANDARD_INFORMATION
- * attribute is present in all base mft records (i.e. in all files and
- * directories).
- *
- * If a match is not found, the security descriptor is assigned a new unique
- * security_id and is added to the $SDS data attribute. Then, entries
- * referencing the this security descriptor in the $SDS data attribute are
- * added to the $SDH and $SII indexes.
- *
- * Note: Entries are never deleted from FILE_Secure, even if nothing
- * references an entry any more.
- */
-
-/*
- * This header precedes each security descriptor in the $SDS data stream.
- * This is also the index entry data part of both the $SII and $SDH indexes.
- */
-typedef struct {
- le32 hash; /* Hash of the security descriptor. */
- le32 security_id; /* The security_id assigned to the descriptor. */
- le64 offset; /* Byte offset of this entry in the $SDS stream. */
- le32 length; /* Size in bytes of this entry in $SDS stream. */
-} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_HEADER;
-
-/*
- * The $SDS data stream contains the security descriptors, aligned on 16-byte
- * boundaries, sorted by security_id in a B+ tree. Security descriptors cannot
- * cross 256kib boundaries (this restriction is imposed by the Windows cache
- * manager). Each security descriptor is contained in a SDS_ENTRY structure.
- * Also, each security descriptor is stored twice in the $SDS stream with a
- * fixed offset of 0x40000 bytes (256kib, the Windows cache manager's max size)
- * between them; i.e. if a SDS_ENTRY specifies an offset of 0x51d0, then the
- * first copy of the security descriptor will be at offset 0x51d0 in the
- * $SDS data stream and the second copy will be at offset 0x451d0.
- */
-typedef struct {
-/*Ofs*/
-/* 0 SECURITY_DESCRIPTOR_HEADER; -- Unfolded here as gcc doesn't like
- unnamed structs. */
- le32 hash; /* Hash of the security descriptor. */
- le32 security_id; /* The security_id assigned to the descriptor. */
- le64 offset; /* Byte offset of this entry in the $SDS stream. */
- le32 length; /* Size in bytes of this entry in $SDS stream. */
-/* 20*/ SECURITY_DESCRIPTOR_RELATIVE sid; /* The self-relative security
- descriptor. */
-} __attribute__ ((__packed__)) SDS_ENTRY;
-
-/*
- * The index entry key used in the $SII index. The collation type is
- * COLLATION_NTOFS_ULONG.
- */
-typedef struct {
- le32 security_id; /* The security_id assigned to the descriptor. */
-} __attribute__ ((__packed__)) SII_INDEX_KEY;
-
-/*
- * The index entry key used in the $SDH index. The keys are sorted first by
- * hash and then by security_id. The collation rule is
- * COLLATION_NTOFS_SECURITY_HASH.
- */
-typedef struct {
- le32 hash; /* Hash of the security descriptor. */
- le32 security_id; /* The security_id assigned to the descriptor. */
-} __attribute__ ((__packed__)) SDH_INDEX_KEY;
-
-/*
- * Attribute: Volume name (0x60).
- *
- * NOTE: Always resident.
- * NOTE: Present only in FILE_Volume.
- */
-typedef struct {
- ntfschar name[0]; /* The name of the volume in Unicode. */
-} __attribute__ ((__packed__)) VOLUME_NAME;
-
-/*
- * Possible flags for the volume (16-bit).
- */
-enum {
- VOLUME_IS_DIRTY = cpu_to_le16(0x0001),
- VOLUME_RESIZE_LOG_FILE = cpu_to_le16(0x0002),
- VOLUME_UPGRADE_ON_MOUNT = cpu_to_le16(0x0004),
- VOLUME_MOUNTED_ON_NT4 = cpu_to_le16(0x0008),
-
- VOLUME_DELETE_USN_UNDERWAY = cpu_to_le16(0x0010),
- VOLUME_REPAIR_OBJECT_ID = cpu_to_le16(0x0020),
-
- VOLUME_CHKDSK_UNDERWAY = cpu_to_le16(0x4000),
- VOLUME_MODIFIED_BY_CHKDSK = cpu_to_le16(0x8000),
-
- VOLUME_FLAGS_MASK = cpu_to_le16(0xc03f),
-
- /* To make our life easier when checking if we must mount read-only. */
- VOLUME_MUST_MOUNT_RO_MASK = cpu_to_le16(0xc027),
-} __attribute__ ((__packed__));
-
-typedef le16 VOLUME_FLAGS;
-
-/*
- * Attribute: Volume information (0x70).
- *
- * NOTE: Always resident.
- * NOTE: Present only in FILE_Volume.
- * NOTE: Windows 2000 uses NTFS 3.0 while Windows NT4 service pack 6a uses
- * NTFS 1.2. I haven't personally seen other values yet.
- */
-typedef struct {
- le64 reserved; /* Not used (yet?). */
- u8 major_ver; /* Major version of the ntfs format. */
- u8 minor_ver; /* Minor version of the ntfs format. */
- VOLUME_FLAGS flags; /* Bit array of VOLUME_* flags. */
-} __attribute__ ((__packed__)) VOLUME_INFORMATION;
-
-/*
- * Attribute: Data attribute (0x80).
- *
- * NOTE: Can be resident or non-resident.
- *
- * Data contents of a file (i.e. the unnamed stream) or of a named stream.
- */
-typedef struct {
- u8 data[0]; /* The file's data contents. */
-} __attribute__ ((__packed__)) DATA_ATTR;
-
-/*
- * Index header flags (8-bit).
- */
-enum {
- /*
- * When index header is in an index root attribute:
- */
- SMALL_INDEX = 0, /* The index is small enough to fit inside the index
- root attribute and there is no index allocation
- attribute present. */
- LARGE_INDEX = 1, /* The index is too large to fit in the index root
- attribute and/or an index allocation attribute is
- present. */
- /*
- * When index header is in an index block, i.e. is part of index
- * allocation attribute:
- */
- LEAF_NODE = 0, /* This is a leaf node, i.e. there are no more nodes
- branching off it. */
- INDEX_NODE = 1, /* This node indexes other nodes, i.e. it is not a leaf
- node. */
- NODE_MASK = 1, /* Mask for accessing the *_NODE bits. */
-} __attribute__ ((__packed__));
-
-typedef u8 INDEX_HEADER_FLAGS;
-
-/*
- * This is the header for indexes, describing the INDEX_ENTRY records, which
- * follow the INDEX_HEADER. Together the index header and the index entries
- * make up a complete index.
- *
- * IMPORTANT NOTE: The offset, length and size structure members are counted
- * relative to the start of the index header structure and not relative to the
- * start of the index root or index allocation structures themselves.
- */
-typedef struct {
- le32 entries_offset; /* Byte offset to first INDEX_ENTRY
- aligned to 8-byte boundary. */
- le32 index_length; /* Data size of the index in bytes,
- i.e. bytes used from allocated
- size, aligned to 8-byte boundary. */
- le32 allocated_size; /* Byte size of this index (block),
- multiple of 8 bytes. */
- /* NOTE: For the index root attribute, the above two numbers are always
- equal, as the attribute is resident and it is resized as needed. In
- the case of the index allocation attribute the attribute is not
- resident and hence the allocated_size is a fixed value and must
- equal the index_block_size specified by the INDEX_ROOT attribute
- corresponding to the INDEX_ALLOCATION attribute this INDEX_BLOCK
- belongs to. */
- INDEX_HEADER_FLAGS flags; /* Bit field of INDEX_HEADER_FLAGS. */
- u8 reserved[3]; /* Reserved/align to 8-byte boundary. */
-} __attribute__ ((__packed__)) INDEX_HEADER;
-
-/*
- * Attribute: Index root (0x90).
- *
- * NOTE: Always resident.
- *
- * This is followed by a sequence of index entries (INDEX_ENTRY structures)
- * as described by the index header.
- *
- * When a directory is small enough to fit inside the index root then this
- * is the only attribute describing the directory. When the directory is too
- * large to fit in the index root, on the other hand, two additional attributes
- * are present: an index allocation attribute, containing sub-nodes of the B+
- * directory tree (see below), and a bitmap attribute, describing which virtual
- * cluster numbers (vcns) in the index allocation attribute are in use by an
- * index block.
- *
- * NOTE: The root directory (FILE_root) contains an entry for itself. Other
- * directories do not contain entries for themselves, though.
- */
-typedef struct {
- ATTR_TYPE type; /* Type of the indexed attribute. Is
- $FILE_NAME for directories, zero
- for view indexes. No other values
- allowed. */
- COLLATION_RULE collation_rule; /* Collation rule used to sort the
- index entries. If type is $FILE_NAME,
- this must be COLLATION_FILE_NAME. */
- le32 index_block_size; /* Size of each index block in bytes (in
- the index allocation attribute). */
- u8 clusters_per_index_block; /* Cluster size of each index block (in
- the index allocation attribute), when
- an index block is >= than a cluster,
- otherwise this will be the log of
- the size (like how the encoding of
- the mft record size and the index
- record size found in the boot sector
- work). Has to be a power of 2. */
- u8 reserved[3]; /* Reserved/align to 8-byte boundary. */
- INDEX_HEADER index; /* Index header describing the
- following index entries. */
-} __attribute__ ((__packed__)) INDEX_ROOT;
-
-/*
- * Attribute: Index allocation (0xa0).
- *
- * NOTE: Always non-resident (doesn't make sense to be resident anyway!).
- *
- * This is an array of index blocks. Each index block starts with an
- * INDEX_BLOCK structure containing an index header, followed by a sequence of
- * index entries (INDEX_ENTRY structures), as described by the INDEX_HEADER.
- */
-typedef struct {
-/* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */
- NTFS_RECORD_TYPE magic; /* Magic is "INDX". */
- le16 usa_ofs; /* See NTFS_RECORD definition. */
- le16 usa_count; /* See NTFS_RECORD definition. */
-
-/* 8*/ sle64 lsn; /* $LogFile sequence number of the last
- modification of this index block. */
-/* 16*/ leVCN index_block_vcn; /* Virtual cluster number of the index block.
- If the cluster_size on the volume is <= the
- index_block_size of the directory,
- index_block_vcn counts in units of clusters,
- and in units of sectors otherwise. */
-/* 24*/ INDEX_HEADER index; /* Describes the following index entries. */
-/* sizeof()= 40 (0x28) bytes */
-/*
- * When creating the index block, we place the update sequence array at this
- * offset, i.e. before we start with the index entries. This also makes sense,
- * otherwise we could run into problems with the update sequence array
- * containing in itself the last two bytes of a sector which would mean that
- * multi sector transfer protection wouldn't work. As you can't protect data
- * by overwriting it since you then can't get it back...
- * When reading use the data from the ntfs record header.
- */
-} __attribute__ ((__packed__)) INDEX_BLOCK;
-
-typedef INDEX_BLOCK INDEX_ALLOCATION;
-
-/*
- * The system file FILE_Extend/$Reparse contains an index named $R listing
- * all reparse points on the volume. The index entry keys are as defined
- * below. Note, that there is no index data associated with the index entries.
- *
- * The index entries are sorted by the index key file_id. The collation rule is
- * COLLATION_NTOFS_ULONGS. FIXME: Verify whether the reparse_tag is not the
- * primary key / is not a key at all. (AIA)
- */
-typedef struct {
- le32 reparse_tag; /* Reparse point type (inc. flags). */
- leMFT_REF file_id; /* Mft record of the file containing the
- reparse point attribute. */
-} __attribute__ ((__packed__)) REPARSE_INDEX_KEY;
-
-/*
- * Quota flags (32-bit).
- *
- * The user quota flags. Names explain meaning.
- */
-enum {
- QUOTA_FLAG_DEFAULT_LIMITS = cpu_to_le32(0x00000001),
- QUOTA_FLAG_LIMIT_REACHED = cpu_to_le32(0x00000002),
- QUOTA_FLAG_ID_DELETED = cpu_to_le32(0x00000004),
-
- QUOTA_FLAG_USER_MASK = cpu_to_le32(0x00000007),
- /* This is a bit mask for the user quota flags. */
-
- /*
- * These flags are only present in the quota defaults index entry, i.e.
- * in the entry where owner_id = QUOTA_DEFAULTS_ID.
- */
- QUOTA_FLAG_TRACKING_ENABLED = cpu_to_le32(0x00000010),
- QUOTA_FLAG_ENFORCEMENT_ENABLED = cpu_to_le32(0x00000020),
- QUOTA_FLAG_TRACKING_REQUESTED = cpu_to_le32(0x00000040),
- QUOTA_FLAG_LOG_THRESHOLD = cpu_to_le32(0x00000080),
-
- QUOTA_FLAG_LOG_LIMIT = cpu_to_le32(0x00000100),
- QUOTA_FLAG_OUT_OF_DATE = cpu_to_le32(0x00000200),
- QUOTA_FLAG_CORRUPT = cpu_to_le32(0x00000400),
- QUOTA_FLAG_PENDING_DELETES = cpu_to_le32(0x00000800),
-};
-
-typedef le32 QUOTA_FLAGS;
-
-/*
- * The system file FILE_Extend/$Quota contains two indexes $O and $Q. Quotas
- * are on a per volume and per user basis.
- *
- * The $Q index contains one entry for each existing user_id on the volume. The
- * index key is the user_id of the user/group owning this quota control entry,
- * i.e. the key is the owner_id. The user_id of the owner of a file, i.e. the
- * owner_id, is found in the standard information attribute. The collation rule
- * for $Q is COLLATION_NTOFS_ULONG.
- *
- * The $O index contains one entry for each user/group who has been assigned
- * a quota on that volume. The index key holds the SID of the user_id the
- * entry belongs to, i.e. the owner_id. The collation rule for $O is
- * COLLATION_NTOFS_SID.
- *
- * The $O index entry data is the user_id of the user corresponding to the SID.
- * This user_id is used as an index into $Q to find the quota control entry
- * associated with the SID.
- *
- * The $Q index entry data is the quota control entry and is defined below.
- */
-typedef struct {
- le32 version; /* Currently equals 2. */
- QUOTA_FLAGS flags; /* Flags describing this quota entry. */
- le64 bytes_used; /* How many bytes of the quota are in use. */
- sle64 change_time; /* Last time this quota entry was changed. */
- sle64 threshold; /* Soft quota (-1 if not limited). */
- sle64 limit; /* Hard quota (-1 if not limited). */
- sle64 exceeded_time; /* How long the soft quota has been exceeded. */
- SID sid; /* The SID of the user/object associated with
- this quota entry. Equals zero for the quota
- defaults entry (and in fact on a WinXP
- volume, it is not present at all). */
-} __attribute__ ((__packed__)) QUOTA_CONTROL_ENTRY;
-
-/*
- * Predefined owner_id values (32-bit).
- */
-enum {
- QUOTA_INVALID_ID = cpu_to_le32(0x00000000),
- QUOTA_DEFAULTS_ID = cpu_to_le32(0x00000001),
- QUOTA_FIRST_USER_ID = cpu_to_le32(0x00000100),
-};
-
-/*
- * Current constants for quota control entries.
- */
-typedef enum {
- /* Current version. */
- QUOTA_VERSION = 2,
-} QUOTA_CONTROL_ENTRY_CONSTANTS;
-
-/*
- * Index entry flags (16-bit).
- */
-enum {
- INDEX_ENTRY_NODE = cpu_to_le16(1), /* This entry contains a
- sub-node, i.e. a reference to an index block in form of
- a virtual cluster number (see below). */
- INDEX_ENTRY_END = cpu_to_le16(2), /* This signifies the last
- entry in an index block. The index entry does not
- represent a file but it can point to a sub-node. */
-
- INDEX_ENTRY_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force
- enum bit width to 16-bit. */
-} __attribute__ ((__packed__));
-
-typedef le16 INDEX_ENTRY_FLAGS;
-
-/*
- * This the index entry header (see below).
- */
-typedef struct {
-/* 0*/ union {
- struct { /* Only valid when INDEX_ENTRY_END is not set. */
- leMFT_REF indexed_file; /* The mft reference of the file
- described by this index
- entry. Used for directory
- indexes. */
- } __attribute__ ((__packed__)) dir;
- struct { /* Used for views/indexes to find the entry's data. */
- le16 data_offset; /* Data byte offset from this
- INDEX_ENTRY. Follows the
- index key. */
- le16 data_length; /* Data length in bytes. */
- le32 reservedV; /* Reserved (zero). */
- } __attribute__ ((__packed__)) vi;
- } __attribute__ ((__packed__)) data;
-/* 8*/ le16 length; /* Byte size of this index entry, multiple of
- 8-bytes. */
-/* 10*/ le16 key_length; /* Byte size of the key value, which is in the
- index entry. It follows field reserved. Not
- multiple of 8-bytes. */
-/* 12*/ INDEX_ENTRY_FLAGS flags; /* Bit field of INDEX_ENTRY_* flags. */
-/* 14*/ le16 reserved; /* Reserved/align to 8-byte boundary. */
-/* sizeof() = 16 bytes */
-} __attribute__ ((__packed__)) INDEX_ENTRY_HEADER;
-
-/*
- * This is an index entry. A sequence of such entries follows each INDEX_HEADER
- * structure. Together they make up a complete index. The index follows either
- * an index root attribute or an index allocation attribute.
- *
- * NOTE: Before NTFS 3.0 only filename attributes were indexed.
- */
-typedef struct {
-/*Ofs*/
-/* 0 INDEX_ENTRY_HEADER; -- Unfolded here as gcc dislikes unnamed structs. */
- union {
- struct { /* Only valid when INDEX_ENTRY_END is not set. */
- leMFT_REF indexed_file; /* The mft reference of the file
- described by this index
- entry. Used for directory
- indexes. */
- } __attribute__ ((__packed__)) dir;
- struct { /* Used for views/indexes to find the entry's data. */
- le16 data_offset; /* Data byte offset from this
- INDEX_ENTRY. Follows the
- index key. */
- le16 data_length; /* Data length in bytes. */
- le32 reservedV; /* Reserved (zero). */
- } __attribute__ ((__packed__)) vi;
- } __attribute__ ((__packed__)) data;
- le16 length; /* Byte size of this index entry, multiple of
- 8-bytes. */
- le16 key_length; /* Byte size of the key value, which is in the
- index entry. It follows field reserved. Not
- multiple of 8-bytes. */
- INDEX_ENTRY_FLAGS flags; /* Bit field of INDEX_ENTRY_* flags. */
- le16 reserved; /* Reserved/align to 8-byte boundary. */
-
-/* 16*/ union { /* The key of the indexed attribute. NOTE: Only present
- if INDEX_ENTRY_END bit in flags is not set. NOTE: On
- NTFS versions before 3.0 the only valid key is the
- FILE_NAME_ATTR. On NTFS 3.0+ the following
- additional index keys are defined: */
- FILE_NAME_ATTR file_name;/* $I30 index in directories. */
- SII_INDEX_KEY sii; /* $SII index in $Secure. */
- SDH_INDEX_KEY sdh; /* $SDH index in $Secure. */
- GUID object_id; /* $O index in FILE_Extend/$ObjId: The
- object_id of the mft record found in
- the data part of the index. */
- REPARSE_INDEX_KEY reparse; /* $R index in
- FILE_Extend/$Reparse. */
- SID sid; /* $O index in FILE_Extend/$Quota:
- SID of the owner of the user_id. */
- le32 owner_id; /* $Q index in FILE_Extend/$Quota:
- user_id of the owner of the quota
- control entry in the data part of
- the index. */
- } __attribute__ ((__packed__)) key;
- /* The (optional) index data is inserted here when creating. */
- // leVCN vcn; /* If INDEX_ENTRY_NODE bit in flags is set, the last
- // eight bytes of this index entry contain the virtual
- // cluster number of the index block that holds the
- // entries immediately preceding the current entry (the
- // vcn references the corresponding cluster in the data
- // of the non-resident index allocation attribute). If
- // the key_length is zero, then the vcn immediately
- // follows the INDEX_ENTRY_HEADER. Regardless of
- // key_length, the address of the 8-byte boundary
- // aligned vcn of INDEX_ENTRY{_HEADER} *ie is given by
- // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN),
- // where sizeof(VCN) can be hardcoded as 8 if wanted. */
-} __attribute__ ((__packed__)) INDEX_ENTRY;
-
-/*
- * Attribute: Bitmap (0xb0).
- *
- * Contains an array of bits (aka a bitfield).
- *
- * When used in conjunction with the index allocation attribute, each bit
- * corresponds to one index block within the index allocation attribute. Thus
- * the number of bits in the bitmap * index block size / cluster size is the
- * number of clusters in the index allocation attribute.
- */
-typedef struct {
- u8 bitmap[0]; /* Array of bits. */
-} __attribute__ ((__packed__)) BITMAP_ATTR;
-
-/*
- * The reparse point tag defines the type of the reparse point. It also
- * includes several flags, which further describe the reparse point.
- *
- * The reparse point tag is an unsigned 32-bit value divided in three parts:
- *
- * 1. The least significant 16 bits (i.e. bits 0 to 15) specifiy the type of
- * the reparse point.
- * 2. The 13 bits after this (i.e. bits 16 to 28) are reserved for future use.
- * 3. The most significant three bits are flags describing the reparse point.
- * They are defined as follows:
- * bit 29: Name surrogate bit. If set, the filename is an alias for
- * another object in the system.
- * bit 30: High-latency bit. If set, accessing the first byte of data will
- * be slow. (E.g. the data is stored on a tape drive.)
- * bit 31: Microsoft bit. If set, the tag is owned by Microsoft. User
- * defined tags have to use zero here.
- *
- * These are the predefined reparse point tags:
- */
-enum {
- IO_REPARSE_TAG_IS_ALIAS = cpu_to_le32(0x20000000),
- IO_REPARSE_TAG_IS_HIGH_LATENCY = cpu_to_le32(0x40000000),
- IO_REPARSE_TAG_IS_MICROSOFT = cpu_to_le32(0x80000000),
-
- IO_REPARSE_TAG_RESERVED_ZERO = cpu_to_le32(0x00000000),
- IO_REPARSE_TAG_RESERVED_ONE = cpu_to_le32(0x00000001),
- IO_REPARSE_TAG_RESERVED_RANGE = cpu_to_le32(0x00000001),
-
- IO_REPARSE_TAG_NSS = cpu_to_le32(0x68000005),
- IO_REPARSE_TAG_NSS_RECOVER = cpu_to_le32(0x68000006),
- IO_REPARSE_TAG_SIS = cpu_to_le32(0x68000007),
- IO_REPARSE_TAG_DFS = cpu_to_le32(0x68000008),
-
- IO_REPARSE_TAG_MOUNT_POINT = cpu_to_le32(0x88000003),
-
- IO_REPARSE_TAG_HSM = cpu_to_le32(0xa8000004),
-
- IO_REPARSE_TAG_SYMBOLIC_LINK = cpu_to_le32(0xe8000000),
-
- IO_REPARSE_TAG_VALID_VALUES = cpu_to_le32(0xe000ffff),
-};
-
-/*
- * Attribute: Reparse point (0xc0).
- *
- * NOTE: Can be resident or non-resident.
- */
-typedef struct {
- le32 reparse_tag; /* Reparse point type (inc. flags). */
- le16 reparse_data_length; /* Byte size of reparse data. */
- le16 reserved; /* Align to 8-byte boundary. */
- u8 reparse_data[0]; /* Meaning depends on reparse_tag. */
-} __attribute__ ((__packed__)) REPARSE_POINT;
-
-/*
- * Attribute: Extended attribute (EA) information (0xd0).
- *
- * NOTE: Always resident. (Is this true???)
- */
-typedef struct {
- le16 ea_length; /* Byte size of the packed extended
- attributes. */
- le16 need_ea_count; /* The number of extended attributes which have
- the NEED_EA bit set. */
- le32 ea_query_length; /* Byte size of the buffer required to query
- the extended attributes when calling
- ZwQueryEaFile() in Windows NT/2k. I.e. the
- byte size of the unpacked extended
- attributes. */
-} __attribute__ ((__packed__)) EA_INFORMATION;
-
-/*
- * Extended attribute flags (8-bit).
- */
-enum {
- NEED_EA = 0x80 /* If set the file to which the EA belongs
- cannot be interpreted without understanding
- the associates extended attributes. */
-} __attribute__ ((__packed__));
-
-typedef u8 EA_FLAGS;
-
-/*
- * Attribute: Extended attribute (EA) (0xe0).
- *
- * NOTE: Can be resident or non-resident.
- *
- * Like the attribute list and the index buffer list, the EA attribute value is
- * a sequence of EA_ATTR variable length records.
- */
-typedef struct {
- le32 next_entry_offset; /* Offset to the next EA_ATTR. */
- EA_FLAGS flags; /* Flags describing the EA. */
- u8 ea_name_length; /* Length of the name of the EA in bytes
- excluding the '\0' byte terminator. */
- le16 ea_value_length; /* Byte size of the EA's value. */
- u8 ea_name[0]; /* Name of the EA. Note this is ASCII, not
- Unicode and it is zero terminated. */
- u8 ea_value[0]; /* The value of the EA. Immediately follows
- the name. */
-} __attribute__ ((__packed__)) EA_ATTR;
-
-/*
- * Attribute: Property set (0xf0).
- *
- * Intended to support Native Structure Storage (NSS) - a feature removed from
- * NTFS 3.0 during beta testing.
- */
-typedef struct {
- /* Irrelevant as feature unused. */
-} __attribute__ ((__packed__)) PROPERTY_SET;
-
-/*
- * Attribute: Logged utility stream (0x100).
- *
- * NOTE: Can be resident or non-resident.
- *
- * Operations on this attribute are logged to the journal ($LogFile) like
- * normal metadata changes.
- *
- * Used by the Encrypting File System (EFS). All encrypted files have this
- * attribute with the name $EFS.
- */
-typedef struct {
- /* Can be anything the creator chooses. */
- /* EFS uses it as follows: */
- // FIXME: Type this info, verifying it along the way. (AIA)
-} __attribute__ ((__packed__)) LOGGED_UTILITY_STREAM, EFS_ATTR;
-
-#endif /* _LINUX_NTFS_LAYOUT_H */
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
deleted file mode 100644
index eda9972e6159..000000000000
--- a/fs/ntfs/lcnalloc.c
+++ /dev/null
@@ -1,1000 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2004-2005 Anton Altaparmakov
- */
-
-#ifdef NTFS_RW
-
-#include <linux/pagemap.h>
-
-#include "lcnalloc.h"
-#include "debug.h"
-#include "bitmap.h"
-#include "inode.h"
-#include "volume.h"
-#include "attrib.h"
-#include "malloc.h"
-#include "aops.h"
-#include "ntfs.h"
-
-/**
- * ntfs_cluster_free_from_rl_nolock - free clusters from runlist
- * @vol: mounted ntfs volume on which to free the clusters
- * @rl: runlist describing the clusters to free
- *
- * Free all the clusters described by the runlist @rl on the volume @vol. In
- * the case of an error being returned, at least some of the clusters were not
- * freed.
- *
- * Return 0 on success and -errno on error.
- *
- * Locking: - The volume lcn bitmap must be locked for writing on entry and is
- * left locked on return.
- */
-int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
- const runlist_element *rl)
-{
- struct inode *lcnbmp_vi = vol->lcnbmp_ino;
- int ret = 0;
-
- ntfs_debug("Entering.");
- if (!rl)
- return 0;
- for (; rl->length; rl++) {
- int err;
-
- if (rl->lcn < 0)
- continue;
- err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length);
- if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err))
- ret = err;
- }
- ntfs_debug("Done.");
- return ret;
-}
-
-/**
- * ntfs_cluster_alloc - allocate clusters on an ntfs volume
- * @vol: mounted ntfs volume on which to allocate the clusters
- * @start_vcn: vcn to use for the first allocated cluster
- * @count: number of clusters to allocate
- * @start_lcn: starting lcn at which to allocate the clusters (or -1 if none)
- * @zone: zone from which to allocate the clusters
- * @is_extension: if 'true', this is an attribute extension
- *
- * Allocate @count clusters preferably starting at cluster @start_lcn or at the
- * current allocator position if @start_lcn is -1, on the mounted ntfs volume
- * @vol. @zone is either DATA_ZONE for allocation of normal clusters or
- * MFT_ZONE for allocation of clusters for the master file table, i.e. the
- * $MFT/$DATA attribute.
- *
- * @start_vcn specifies the vcn of the first allocated cluster. This makes
- * merging the resulting runlist with the old runlist easier.
- *
- * If @is_extension is 'true', the caller is allocating clusters to extend an
- * attribute and if it is 'false', the caller is allocating clusters to fill a
- * hole in an attribute. Practically the difference is that if @is_extension
- * is 'true' the returned runlist will be terminated with LCN_ENOENT and if
- * @is_extension is 'false' the runlist will be terminated with
- * LCN_RL_NOT_MAPPED.
- *
- * You need to check the return value with IS_ERR(). If this is false, the
- * function was successful and the return value is a runlist describing the
- * allocated cluster(s). If IS_ERR() is true, the function failed and
- * PTR_ERR() gives you the error code.
- *
- * Notes on the allocation algorithm
- * =================================
- *
- * There are two data zones. First is the area between the end of the mft zone
- * and the end of the volume, and second is the area between the start of the
- * volume and the start of the mft zone. On unmodified/standard NTFS 1.x
- * volumes, the second data zone does not exist due to the mft zone being
- * expanded to cover the start of the volume in order to reserve space for the
- * mft bitmap attribute.
- *
- * This is not the prettiest function but the complexity stems from the need of
- * implementing the mft vs data zoned approach and from the fact that we have
- * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we
- * need to cope with crossing over boundaries of two buffers. Further, the
- * fact that the allocator allows for caller supplied hints as to the location
- * of where allocation should begin and the fact that the allocator keeps track
- * of where in the data zones the next natural allocation should occur,
- * contribute to the complexity of the function. But it should all be
- * worthwhile, because this allocator should: 1) be a full implementation of
- * the MFT zone approach used by Windows NT, 2) cause reduction in
- * fragmentation, and 3) be speedy in allocations (the code is not optimized
- * for speed, but the algorithm is, so further speed improvements are probably
- * possible).
- *
- * FIXME: We should be monitoring cluster allocation and increment the MFT zone
- * size dynamically but this is something for the future. We will just cause
- * heavier fragmentation by not doing it and I am not even sure Windows would
- * grow the MFT zone dynamically, so it might even be correct not to do this.
- * The overhead in doing dynamic MFT zone expansion would be very large and
- * unlikely worth the effort. (AIA)
- *
- * TODO: I have added in double the required zone position pointer wrap around
- * logic which can be optimized to having only one of the two logic sets.
- * However, having the double logic will work fine, but if we have only one of
- * the sets and we get it wrong somewhere, then we get into trouble, so
- * removing the duplicate logic requires _very_ careful consideration of _all_
- * possible code paths. So at least for now, I am leaving the double logic -
- * better safe than sorry... (AIA)
- *
- * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked
- * on return.
- * - This function takes the volume lcn bitmap lock for writing and
- * modifies the bitmap contents.
- */
-runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
- const s64 count, const LCN start_lcn,
- const NTFS_CLUSTER_ALLOCATION_ZONES zone,
- const bool is_extension)
-{
- LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
- LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
- s64 clusters;
- loff_t i_size;
- struct inode *lcnbmp_vi;
- runlist_element *rl = NULL;
- struct address_space *mapping;
- struct page *page = NULL;
- u8 *buf, *byte;
- int err = 0, rlpos, rlsize, buf_size;
- u8 pass, done_zones, search_zone, need_writeback = 0, bit;
-
- ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn "
- "0x%llx, zone %s_ZONE.", (unsigned long long)start_vcn,
- (unsigned long long)count,
- (unsigned long long)start_lcn,
- zone == MFT_ZONE ? "MFT" : "DATA");
- BUG_ON(!vol);
- lcnbmp_vi = vol->lcnbmp_ino;
- BUG_ON(!lcnbmp_vi);
- BUG_ON(start_vcn < 0);
- BUG_ON(count < 0);
- BUG_ON(start_lcn < -1);
- BUG_ON(zone < FIRST_ZONE);
- BUG_ON(zone > LAST_ZONE);
-
- /* Return NULL if @count is zero. */
- if (!count)
- return NULL;
- /* Take the lcnbmp lock for writing. */
- down_write(&vol->lcnbmp_lock);
- /*
- * If no specific @start_lcn was requested, use the current data zone
- * position, otherwise use the requested @start_lcn but make sure it
- * lies outside the mft zone. Also set done_zones to 0 (no zones done)
- * and pass depending on whether we are starting inside a zone (1) or
- * at the beginning of a zone (2). If requesting from the MFT_ZONE,
- * we either start at the current position within the mft zone or at
- * the specified position. If the latter is out of bounds then we start
- * at the beginning of the MFT_ZONE.
- */
- done_zones = 0;
- pass = 1;
- /*
- * zone_start and zone_end are the current search range. search_zone
- * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of
- * volume) and 4 for data zone 2 (start of volume till start of mft
- * zone).
- */
- zone_start = start_lcn;
- if (zone_start < 0) {
- if (zone == DATA_ZONE)
- zone_start = vol->data1_zone_pos;
- else
- zone_start = vol->mft_zone_pos;
- if (!zone_start) {
- /*
- * Zone starts at beginning of volume which means a
- * single pass is sufficient.
- */
- pass = 2;
- }
- } else if (zone == DATA_ZONE && zone_start >= vol->mft_zone_start &&
- zone_start < vol->mft_zone_end) {
- zone_start = vol->mft_zone_end;
- /*
- * Starting at beginning of data1_zone which means a single
- * pass in this zone is sufficient.
- */
- pass = 2;
- } else if (zone == MFT_ZONE && (zone_start < vol->mft_zone_start ||
- zone_start >= vol->mft_zone_end)) {
- zone_start = vol->mft_lcn;
- if (!vol->mft_zone_end)
- zone_start = 0;
- /*
- * Starting at beginning of volume which means a single pass
- * is sufficient.
- */
- pass = 2;
- }
- if (zone == MFT_ZONE) {
- zone_end = vol->mft_zone_end;
- search_zone = 1;
- } else /* if (zone == DATA_ZONE) */ {
- /* Skip searching the mft zone. */
- done_zones |= 1;
- if (zone_start >= vol->mft_zone_end) {
- zone_end = vol->nr_clusters;
- search_zone = 2;
- } else {
- zone_end = vol->mft_zone_start;
- search_zone = 4;
- }
- }
- /*
- * bmp_pos is the current bit position inside the bitmap. We use
- * bmp_initial_pos to determine whether or not to do a zone switch.
- */
- bmp_pos = bmp_initial_pos = zone_start;
-
- /* Loop until all clusters are allocated, i.e. clusters == 0. */
- clusters = count;
- rlpos = rlsize = 0;
- mapping = lcnbmp_vi->i_mapping;
- i_size = i_size_read(lcnbmp_vi);
- while (1) {
- ntfs_debug("Start of outer while loop: done_zones 0x%x, "
- "search_zone %i, pass %i, zone_start 0x%llx, "
- "zone_end 0x%llx, bmp_initial_pos 0x%llx, "
- "bmp_pos 0x%llx, rlpos %i, rlsize %i.",
- done_zones, search_zone, pass,
- (unsigned long long)zone_start,
- (unsigned long long)zone_end,
- (unsigned long long)bmp_initial_pos,
- (unsigned long long)bmp_pos, rlpos, rlsize);
- /* Loop until we run out of free clusters. */
- last_read_pos = bmp_pos >> 3;
- ntfs_debug("last_read_pos 0x%llx.",
- (unsigned long long)last_read_pos);
- if (last_read_pos > i_size) {
- ntfs_debug("End of attribute reached. "
- "Skipping to zone_pass_done.");
- goto zone_pass_done;
- }
- if (likely(page)) {
- if (need_writeback) {
- ntfs_debug("Marking page dirty.");
- flush_dcache_page(page);
- set_page_dirty(page);
- need_writeback = 0;
- }
- ntfs_unmap_page(page);
- }
- page = ntfs_map_page(mapping, last_read_pos >>
- PAGE_SHIFT);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- ntfs_error(vol->sb, "Failed to map page.");
- goto out;
- }
- buf_size = last_read_pos & ~PAGE_MASK;
- buf = page_address(page) + buf_size;
- buf_size = PAGE_SIZE - buf_size;
- if (unlikely(last_read_pos + buf_size > i_size))
- buf_size = i_size - last_read_pos;
- buf_size <<= 3;
- lcn = bmp_pos & 7;
- bmp_pos &= ~(LCN)7;
- ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, "
- "bmp_pos 0x%llx, need_writeback %i.", buf_size,
- (unsigned long long)lcn,
- (unsigned long long)bmp_pos, need_writeback);
- while (lcn < buf_size && lcn + bmp_pos < zone_end) {
- byte = buf + (lcn >> 3);
- ntfs_debug("In inner while loop: buf_size %i, "
- "lcn 0x%llx, bmp_pos 0x%llx, "
- "need_writeback %i, byte ofs 0x%x, "
- "*byte 0x%x.", buf_size,
- (unsigned long long)lcn,
- (unsigned long long)bmp_pos,
- need_writeback,
- (unsigned int)(lcn >> 3),
- (unsigned int)*byte);
- /* Skip full bytes. */
- if (*byte == 0xff) {
- lcn = (lcn + 8) & ~(LCN)7;
- ntfs_debug("Continuing while loop 1.");
- continue;
- }
- bit = 1 << (lcn & 7);
- ntfs_debug("bit 0x%x.", bit);
- /* If the bit is already set, go onto the next one. */
- if (*byte & bit) {
- lcn++;
- ntfs_debug("Continuing while loop 2.");
- continue;
- }
- /*
- * Allocate more memory if needed, including space for
- * the terminator element.
- * ntfs_malloc_nofs() operates on whole pages only.
- */
- if ((rlpos + 2) * sizeof(*rl) > rlsize) {
- runlist_element *rl2;
-
- ntfs_debug("Reallocating memory.");
- if (!rl)
- ntfs_debug("First free bit is at LCN "
- "0x%llx.",
- (unsigned long long)
- (lcn + bmp_pos));
- rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE);
- if (unlikely(!rl2)) {
- err = -ENOMEM;
- ntfs_error(vol->sb, "Failed to "
- "allocate memory.");
- goto out;
- }
- memcpy(rl2, rl, rlsize);
- ntfs_free(rl);
- rl = rl2;
- rlsize += PAGE_SIZE;
- ntfs_debug("Reallocated memory, rlsize 0x%x.",
- rlsize);
- }
- /* Allocate the bitmap bit. */
- *byte |= bit;
- /* We need to write this bitmap page to disk. */
- need_writeback = 1;
- ntfs_debug("*byte 0x%x, need_writeback is set.",
- (unsigned int)*byte);
- /*
- * Coalesce with previous run if adjacent LCNs.
- * Otherwise, append a new run.
- */
- ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), "
- "prev_lcn 0x%llx, lcn 0x%llx, "
- "bmp_pos 0x%llx, prev_run_len 0x%llx, "
- "rlpos %i.",
- (unsigned long long)(lcn + bmp_pos),
- 1ULL, (unsigned long long)prev_lcn,
- (unsigned long long)lcn,
- (unsigned long long)bmp_pos,
- (unsigned long long)prev_run_len,
- rlpos);
- if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) {
- ntfs_debug("Coalescing to run (lcn 0x%llx, "
- "len 0x%llx).",
- (unsigned long long)
- rl[rlpos - 1].lcn,
- (unsigned long long)
- rl[rlpos - 1].length);
- rl[rlpos - 1].length = ++prev_run_len;
- ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), "
- "prev_run_len 0x%llx.",
- (unsigned long long)
- rl[rlpos - 1].lcn,
- (unsigned long long)
- rl[rlpos - 1].length,
- (unsigned long long)
- prev_run_len);
- } else {
- if (likely(rlpos)) {
- ntfs_debug("Adding new run, (previous "
- "run lcn 0x%llx, "
- "len 0x%llx).",
- (unsigned long long)
- rl[rlpos - 1].lcn,
- (unsigned long long)
- rl[rlpos - 1].length);
- rl[rlpos].vcn = rl[rlpos - 1].vcn +
- prev_run_len;
- } else {
- ntfs_debug("Adding new run, is first "
- "run.");
- rl[rlpos].vcn = start_vcn;
- }
- rl[rlpos].lcn = prev_lcn = lcn + bmp_pos;
- rl[rlpos].length = prev_run_len = 1;
- rlpos++;
- }
- /* Done? */
- if (!--clusters) {
- LCN tc;
- /*
- * Update the current zone position. Positions
- * of already scanned zones have been updated
- * during the respective zone switches.
- */
- tc = lcn + bmp_pos + 1;
- ntfs_debug("Done. Updating current zone "
- "position, tc 0x%llx, "
- "search_zone %i.",
- (unsigned long long)tc,
- search_zone);
- switch (search_zone) {
- case 1:
- ntfs_debug("Before checks, "
- "vol->mft_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->mft_zone_pos);
- if (tc >= vol->mft_zone_end) {
- vol->mft_zone_pos =
- vol->mft_lcn;
- if (!vol->mft_zone_end)
- vol->mft_zone_pos = 0;
- } else if ((bmp_initial_pos >=
- vol->mft_zone_pos ||
- tc > vol->mft_zone_pos)
- && tc >= vol->mft_lcn)
- vol->mft_zone_pos = tc;
- ntfs_debug("After checks, "
- "vol->mft_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->mft_zone_pos);
- break;
- case 2:
- ntfs_debug("Before checks, "
- "vol->data1_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->data1_zone_pos);
- if (tc >= vol->nr_clusters)
- vol->data1_zone_pos =
- vol->mft_zone_end;
- else if ((bmp_initial_pos >=
- vol->data1_zone_pos ||
- tc > vol->data1_zone_pos)
- && tc >= vol->mft_zone_end)
- vol->data1_zone_pos = tc;
- ntfs_debug("After checks, "
- "vol->data1_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->data1_zone_pos);
- break;
- case 4:
- ntfs_debug("Before checks, "
- "vol->data2_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->data2_zone_pos);
- if (tc >= vol->mft_zone_start)
- vol->data2_zone_pos = 0;
- else if (bmp_initial_pos >=
- vol->data2_zone_pos ||
- tc > vol->data2_zone_pos)
- vol->data2_zone_pos = tc;
- ntfs_debug("After checks, "
- "vol->data2_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->data2_zone_pos);
- break;
- default:
- BUG();
- }
- ntfs_debug("Finished. Going to out.");
- goto out;
- }
- lcn++;
- }
- bmp_pos += buf_size;
- ntfs_debug("After inner while loop: buf_size 0x%x, lcn "
- "0x%llx, bmp_pos 0x%llx, need_writeback %i.",
- buf_size, (unsigned long long)lcn,
- (unsigned long long)bmp_pos, need_writeback);
- if (bmp_pos < zone_end) {
- ntfs_debug("Continuing outer while loop, "
- "bmp_pos 0x%llx, zone_end 0x%llx.",
- (unsigned long long)bmp_pos,
- (unsigned long long)zone_end);
- continue;
- }
-zone_pass_done: /* Finished with the current zone pass. */
- ntfs_debug("At zone_pass_done, pass %i.", pass);
- if (pass == 1) {
- /*
- * Now do pass 2, scanning the first part of the zone
- * we omitted in pass 1.
- */
- pass = 2;
- zone_end = zone_start;
- switch (search_zone) {
- case 1: /* mft_zone */
- zone_start = vol->mft_zone_start;
- break;
- case 2: /* data1_zone */
- zone_start = vol->mft_zone_end;
- break;
- case 4: /* data2_zone */
- zone_start = 0;
- break;
- default:
- BUG();
- }
- /* Sanity check. */
- if (zone_end < zone_start)
- zone_end = zone_start;
- bmp_pos = zone_start;
- ntfs_debug("Continuing outer while loop, pass 2, "
- "zone_start 0x%llx, zone_end 0x%llx, "
- "bmp_pos 0x%llx.",
- (unsigned long long)zone_start,
- (unsigned long long)zone_end,
- (unsigned long long)bmp_pos);
- continue;
- } /* pass == 2 */
-done_zones_check:
- ntfs_debug("At done_zones_check, search_zone %i, done_zones "
- "before 0x%x, done_zones after 0x%x.",
- search_zone, done_zones,
- done_zones | search_zone);
- done_zones |= search_zone;
- if (done_zones < 7) {
- ntfs_debug("Switching zone.");
- /* Now switch to the next zone we haven't done yet. */
- pass = 1;
- switch (search_zone) {
- case 1:
- ntfs_debug("Switching from mft zone to data1 "
- "zone.");
- /* Update mft zone position. */
- if (rlpos) {
- LCN tc;
-
- ntfs_debug("Before checks, "
- "vol->mft_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->mft_zone_pos);
- tc = rl[rlpos - 1].lcn +
- rl[rlpos - 1].length;
- if (tc >= vol->mft_zone_end) {
- vol->mft_zone_pos =
- vol->mft_lcn;
- if (!vol->mft_zone_end)
- vol->mft_zone_pos = 0;
- } else if ((bmp_initial_pos >=
- vol->mft_zone_pos ||
- tc > vol->mft_zone_pos)
- && tc >= vol->mft_lcn)
- vol->mft_zone_pos = tc;
- ntfs_debug("After checks, "
- "vol->mft_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->mft_zone_pos);
- }
- /* Switch from mft zone to data1 zone. */
-switch_to_data1_zone: search_zone = 2;
- zone_start = bmp_initial_pos =
- vol->data1_zone_pos;
- zone_end = vol->nr_clusters;
- if (zone_start == vol->mft_zone_end)
- pass = 2;
- if (zone_start >= zone_end) {
- vol->data1_zone_pos = zone_start =
- vol->mft_zone_end;
- pass = 2;
- }
- break;
- case 2:
- ntfs_debug("Switching from data1 zone to "
- "data2 zone.");
- /* Update data1 zone position. */
- if (rlpos) {
- LCN tc;
-
- ntfs_debug("Before checks, "
- "vol->data1_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->data1_zone_pos);
- tc = rl[rlpos - 1].lcn +
- rl[rlpos - 1].length;
- if (tc >= vol->nr_clusters)
- vol->data1_zone_pos =
- vol->mft_zone_end;
- else if ((bmp_initial_pos >=
- vol->data1_zone_pos ||
- tc > vol->data1_zone_pos)
- && tc >= vol->mft_zone_end)
- vol->data1_zone_pos = tc;
- ntfs_debug("After checks, "
- "vol->data1_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->data1_zone_pos);
- }
- /* Switch from data1 zone to data2 zone. */
- search_zone = 4;
- zone_start = bmp_initial_pos =
- vol->data2_zone_pos;
- zone_end = vol->mft_zone_start;
- if (!zone_start)
- pass = 2;
- if (zone_start >= zone_end) {
- vol->data2_zone_pos = zone_start =
- bmp_initial_pos = 0;
- pass = 2;
- }
- break;
- case 4:
- ntfs_debug("Switching from data2 zone to "
- "data1 zone.");
- /* Update data2 zone position. */
- if (rlpos) {
- LCN tc;
-
- ntfs_debug("Before checks, "
- "vol->data2_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->data2_zone_pos);
- tc = rl[rlpos - 1].lcn +
- rl[rlpos - 1].length;
- if (tc >= vol->mft_zone_start)
- vol->data2_zone_pos = 0;
- else if (bmp_initial_pos >=
- vol->data2_zone_pos ||
- tc > vol->data2_zone_pos)
- vol->data2_zone_pos = tc;
- ntfs_debug("After checks, "
- "vol->data2_zone_pos "
- "0x%llx.",
- (unsigned long long)
- vol->data2_zone_pos);
- }
- /* Switch from data2 zone to data1 zone. */
- goto switch_to_data1_zone;
- default:
- BUG();
- }
- ntfs_debug("After zone switch, search_zone %i, "
- "pass %i, bmp_initial_pos 0x%llx, "
- "zone_start 0x%llx, zone_end 0x%llx.",
- search_zone, pass,
- (unsigned long long)bmp_initial_pos,
- (unsigned long long)zone_start,
- (unsigned long long)zone_end);
- bmp_pos = zone_start;
- if (zone_start == zone_end) {
- ntfs_debug("Empty zone, going to "
- "done_zones_check.");
- /* Empty zone. Don't bother searching it. */
- goto done_zones_check;
- }
- ntfs_debug("Continuing outer while loop.");
- continue;
- } /* done_zones == 7 */
- ntfs_debug("All zones are finished.");
- /*
- * All zones are finished! If DATA_ZONE, shrink mft zone. If
- * MFT_ZONE, we have really run out of space.
- */
- mft_zone_size = vol->mft_zone_end - vol->mft_zone_start;
- ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end "
- "0x%llx, mft_zone_size 0x%llx.",
- (unsigned long long)vol->mft_zone_start,
- (unsigned long long)vol->mft_zone_end,
- (unsigned long long)mft_zone_size);
- if (zone == MFT_ZONE || mft_zone_size <= 0) {
- ntfs_debug("No free clusters left, going to out.");
- /* Really no more space left on device. */
- err = -ENOSPC;
- goto out;
- } /* zone == DATA_ZONE && mft_zone_size > 0 */
- ntfs_debug("Shrinking mft zone.");
- zone_end = vol->mft_zone_end;
- mft_zone_size >>= 1;
- if (mft_zone_size > 0)
- vol->mft_zone_end = vol->mft_zone_start + mft_zone_size;
- else /* mft zone and data2 zone no longer exist. */
- vol->data2_zone_pos = vol->mft_zone_start =
- vol->mft_zone_end = 0;
- if (vol->mft_zone_pos >= vol->mft_zone_end) {
- vol->mft_zone_pos = vol->mft_lcn;
- if (!vol->mft_zone_end)
- vol->mft_zone_pos = 0;
- }
- bmp_pos = zone_start = bmp_initial_pos =
- vol->data1_zone_pos = vol->mft_zone_end;
- search_zone = 2;
- pass = 2;
- done_zones &= ~2;
- ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, "
- "vol->mft_zone_start 0x%llx, "
- "vol->mft_zone_end 0x%llx, "
- "vol->mft_zone_pos 0x%llx, search_zone 2, "
- "pass 2, dones_zones 0x%x, zone_start 0x%llx, "
- "zone_end 0x%llx, vol->data1_zone_pos 0x%llx, "
- "continuing outer while loop.",
- (unsigned long long)mft_zone_size,
- (unsigned long long)vol->mft_zone_start,
- (unsigned long long)vol->mft_zone_end,
- (unsigned long long)vol->mft_zone_pos,
- done_zones, (unsigned long long)zone_start,
- (unsigned long long)zone_end,
- (unsigned long long)vol->data1_zone_pos);
- }
- ntfs_debug("After outer while loop.");
-out:
- ntfs_debug("At out.");
- /* Add runlist terminator element. */
- if (likely(rl)) {
- rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
- rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED;
- rl[rlpos].length = 0;
- }
- if (likely(page && !IS_ERR(page))) {
- if (need_writeback) {
- ntfs_debug("Marking page dirty.");
- flush_dcache_page(page);
- set_page_dirty(page);
- need_writeback = 0;
- }
- ntfs_unmap_page(page);
- }
- if (likely(!err)) {
- up_write(&vol->lcnbmp_lock);
- ntfs_debug("Done.");
- return rl;
- }
- ntfs_error(vol->sb, "Failed to allocate clusters, aborting "
- "(error %i).", err);
- if (rl) {
- int err2;
-
- if (err == -ENOSPC)
- ntfs_debug("Not enough space to complete allocation, "
- "err -ENOSPC, first free lcn 0x%llx, "
- "could allocate up to 0x%llx "
- "clusters.",
- (unsigned long long)rl[0].lcn,
- (unsigned long long)(count - clusters));
- /* Deallocate all allocated clusters. */
- ntfs_debug("Attempting rollback...");
- err2 = ntfs_cluster_free_from_rl_nolock(vol, rl);
- if (err2) {
- ntfs_error(vol->sb, "Failed to rollback (error %i). "
- "Leaving inconsistent metadata! "
- "Unmount and run chkdsk.", err2);
- NVolSetErrors(vol);
- }
- /* Free the runlist. */
- ntfs_free(rl);
- } else if (err == -ENOSPC)
- ntfs_debug("No space left at all, err = -ENOSPC, first free "
- "lcn = 0x%llx.",
- (long long)vol->data1_zone_pos);
- up_write(&vol->lcnbmp_lock);
- return ERR_PTR(err);
-}
-
-/**
- * __ntfs_cluster_free - free clusters on an ntfs volume
- * @ni: ntfs inode whose runlist describes the clusters to free
- * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters
- * @count: number of clusters to free or -1 for all clusters
- * @ctx: active attribute search context if present or NULL if not
- * @is_rollback: true if this is a rollback operation
- *
- * Free @count clusters starting at the cluster @start_vcn in the runlist
- * described by the vfs inode @ni.
- *
- * If @count is -1, all clusters from @start_vcn to the end of the runlist are
- * deallocated. Thus, to completely free all clusters in a runlist, use
- * @start_vcn = 0 and @count = -1.
- *
- * If @ctx is specified, it is an active search context of @ni and its base mft
- * record. This is needed when __ntfs_cluster_free() encounters unmapped
- * runlist fragments and allows their mapping. If you do not have the mft
- * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will
- * perform the necessary mapping and unmapping.
- *
- * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it
- * before returning. Thus, @ctx will be left pointing to the same attribute on
- * return as on entry. However, the actual pointers in @ctx may point to
- * different memory locations on return, so you must remember to reset any
- * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(),
- * you will probably want to do:
- * m = ctx->mrec;
- * a = ctx->attr;
- * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
- * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
- *
- * @is_rollback should always be 'false', it is for internal use to rollback
- * errors. You probably want to use ntfs_cluster_free() instead.
- *
- * Note, __ntfs_cluster_free() does not modify the runlist, so you have to
- * remove from the runlist or mark sparse the freed runs later.
- *
- * Return the number of deallocated clusters (not counting sparse ones) on
- * success and -errno on error.
- *
- * WARNING: If @ctx is supplied, regardless of whether success or failure is
- * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
- * is no longer valid, i.e. you need to either call
- * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
- * In that case PTR_ERR(@ctx->mrec) will give you the error code for
- * why the mapping of the old inode failed.
- *
- * Locking: - The runlist described by @ni must be locked for writing on entry
- * and is locked on return. Note the runlist may be modified when
- * needed runlist fragments need to be mapped.
- * - The volume lcn bitmap must be unlocked on entry and is unlocked
- * on return.
- * - This function takes the volume lcn bitmap lock for writing and
- * modifies the bitmap contents.
- * - If @ctx is NULL, the base mft record of @ni must not be mapped on
- * entry and it will be left unmapped on return.
- * - If @ctx is not NULL, the base mft record must be mapped on entry
- * and it will be left mapped on return.
- */
-s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
- ntfs_attr_search_ctx *ctx, const bool is_rollback)
-{
- s64 delta, to_free, total_freed, real_freed;
- ntfs_volume *vol;
- struct inode *lcnbmp_vi;
- runlist_element *rl;
- int err;
-
- BUG_ON(!ni);
- ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count "
- "0x%llx.%s", ni->mft_no, (unsigned long long)start_vcn,
- (unsigned long long)count,
- is_rollback ? " (rollback)" : "");
- vol = ni->vol;
- lcnbmp_vi = vol->lcnbmp_ino;
- BUG_ON(!lcnbmp_vi);
- BUG_ON(start_vcn < 0);
- BUG_ON(count < -1);
- /*
- * Lock the lcn bitmap for writing but only if not rolling back. We
- * must hold the lock all the way including through rollback otherwise
- * rollback is not possible because once we have cleared a bit and
- * dropped the lock, anyone could have set the bit again, thus
- * allocating the cluster for another use.
- */
- if (likely(!is_rollback))
- down_write(&vol->lcnbmp_lock);
-
- total_freed = real_freed = 0;
-
- rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
- if (IS_ERR(rl)) {
- if (!is_rollback)
- ntfs_error(vol->sb, "Failed to find first runlist "
- "element (error %li), aborting.",
- PTR_ERR(rl));
- err = PTR_ERR(rl);
- goto err_out;
- }
- if (unlikely(rl->lcn < LCN_HOLE)) {
- if (!is_rollback)
- ntfs_error(vol->sb, "First runlist element has "
- "invalid lcn, aborting.");
- err = -EIO;
- goto err_out;
- }
- /* Find the starting cluster inside the run that needs freeing. */
- delta = start_vcn - rl->vcn;
-
- /* The number of clusters in this run that need freeing. */
- to_free = rl->length - delta;
- if (count >= 0 && to_free > count)
- to_free = count;
-
- if (likely(rl->lcn >= 0)) {
- /* Do the actual freeing of the clusters in this run. */
- err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta,
- to_free, likely(!is_rollback) ? 0 : 1);
- if (unlikely(err)) {
- if (!is_rollback)
- ntfs_error(vol->sb, "Failed to clear first run "
- "(error %i), aborting.", err);
- goto err_out;
- }
- /* We have freed @to_free real clusters. */
- real_freed = to_free;
- };
- /* Go to the next run and adjust the number of clusters left to free. */
- ++rl;
- if (count >= 0)
- count -= to_free;
-
- /* Keep track of the total "freed" clusters, including sparse ones. */
- total_freed = to_free;
- /*
- * Loop over the remaining runs, using @count as a capping value, and
- * free them.
- */
- for (; rl->length && count != 0; ++rl) {
- if (unlikely(rl->lcn < LCN_HOLE)) {
- VCN vcn;
-
- /* Attempt to map runlist. */
- vcn = rl->vcn;
- rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx);
- if (IS_ERR(rl)) {
- err = PTR_ERR(rl);
- if (!is_rollback)
- ntfs_error(vol->sb, "Failed to map "
- "runlist fragment or "
- "failed to find "
- "subsequent runlist "
- "element.");
- goto err_out;
- }
- if (unlikely(rl->lcn < LCN_HOLE)) {
- if (!is_rollback)
- ntfs_error(vol->sb, "Runlist element "
- "has invalid lcn "
- "(0x%llx).",
- (unsigned long long)
- rl->lcn);
- err = -EIO;
- goto err_out;
- }
- }
- /* The number of clusters in this run that need freeing. */
- to_free = rl->length;
- if (count >= 0 && to_free > count)
- to_free = count;
-
- if (likely(rl->lcn >= 0)) {
- /* Do the actual freeing of the clusters in the run. */
- err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn,
- to_free, likely(!is_rollback) ? 0 : 1);
- if (unlikely(err)) {
- if (!is_rollback)
- ntfs_error(vol->sb, "Failed to clear "
- "subsequent run.");
- goto err_out;
- }
- /* We have freed @to_free real clusters. */
- real_freed += to_free;
- }
- /* Adjust the number of clusters left to free. */
- if (count >= 0)
- count -= to_free;
-
- /* Update the total done clusters. */
- total_freed += to_free;
- }
- if (likely(!is_rollback))
- up_write(&vol->lcnbmp_lock);
-
- BUG_ON(count > 0);
-
- /* We are done. Return the number of actually freed clusters. */
- ntfs_debug("Done.");
- return real_freed;
-err_out:
- if (is_rollback)
- return err;
- /* If no real clusters were freed, no need to rollback. */
- if (!real_freed) {
- up_write(&vol->lcnbmp_lock);
- return err;
- }
- /*
- * Attempt to rollback and if that succeeds just return the error code.
- * If rollback fails, set the volume errors flag, emit an error
- * message, and return the error code.
- */
- delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, true);
- if (delta < 0) {
- ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving "
- "inconsistent metadata! Unmount and run "
- "chkdsk.", (int)delta);
- NVolSetErrors(vol);
- }
- up_write(&vol->lcnbmp_lock);
- ntfs_error(vol->sb, "Aborting (error %i).", err);
- return err;
-}
-
-#endif /* NTFS_RW */
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h
deleted file mode 100644
index 1589a6d8434b..000000000000
--- a/fs/ntfs/lcnalloc.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * lcnalloc.h - Exports for NTFS kernel cluster (de)allocation. Part of the
- * Linux-NTFS project.
- *
- * Copyright (c) 2004-2005 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_LCNALLOC_H
-#define _LINUX_NTFS_LCNALLOC_H
-
-#ifdef NTFS_RW
-
-#include <linux/fs.h>
-
-#include "attrib.h"
-#include "types.h"
-#include "inode.h"
-#include "runlist.h"
-#include "volume.h"
-
-typedef enum {
- FIRST_ZONE = 0, /* For sanity checking. */
- MFT_ZONE = 0, /* Allocate from $MFT zone. */
- DATA_ZONE = 1, /* Allocate from $DATA zone. */
- LAST_ZONE = 1, /* For sanity checking. */
-} NTFS_CLUSTER_ALLOCATION_ZONES;
-
-extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol,
- const VCN start_vcn, const s64 count, const LCN start_lcn,
- const NTFS_CLUSTER_ALLOCATION_ZONES zone,
- const bool is_extension);
-
-extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
- s64 count, ntfs_attr_search_ctx *ctx, const bool is_rollback);
-
-/**
- * ntfs_cluster_free - free clusters on an ntfs volume
- * @ni: ntfs inode whose runlist describes the clusters to free
- * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters
- * @count: number of clusters to free or -1 for all clusters
- * @ctx: active attribute search context if present or NULL if not
- *
- * Free @count clusters starting at the cluster @start_vcn in the runlist
- * described by the ntfs inode @ni.
- *
- * If @count is -1, all clusters from @start_vcn to the end of the runlist are
- * deallocated. Thus, to completely free all clusters in a runlist, use
- * @start_vcn = 0 and @count = -1.
- *
- * If @ctx is specified, it is an active search context of @ni and its base mft
- * record. This is needed when ntfs_cluster_free() encounters unmapped runlist
- * fragments and allows their mapping. If you do not have the mft record
- * mapped, you can specify @ctx as NULL and ntfs_cluster_free() will perform
- * the necessary mapping and unmapping.
- *
- * Note, ntfs_cluster_free() saves the state of @ctx on entry and restores it
- * before returning. Thus, @ctx will be left pointing to the same attribute on
- * return as on entry. However, the actual pointers in @ctx may point to
- * different memory locations on return, so you must remember to reset any
- * cached pointers from the @ctx, i.e. after the call to ntfs_cluster_free(),
- * you will probably want to do:
- * m = ctx->mrec;
- * a = ctx->attr;
- * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
- * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
- *
- * Note, ntfs_cluster_free() does not modify the runlist, so you have to remove
- * from the runlist or mark sparse the freed runs later.
- *
- * Return the number of deallocated clusters (not counting sparse ones) on
- * success and -errno on error.
- *
- * WARNING: If @ctx is supplied, regardless of whether success or failure is
- * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
- * is no longer valid, i.e. you need to either call
- * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
- * In that case PTR_ERR(@ctx->mrec) will give you the error code for
- * why the mapping of the old inode failed.
- *
- * Locking: - The runlist described by @ni must be locked for writing on entry
- * and is locked on return. Note the runlist may be modified when
- * needed runlist fragments need to be mapped.
- * - The volume lcn bitmap must be unlocked on entry and is unlocked
- * on return.
- * - This function takes the volume lcn bitmap lock for writing and
- * modifies the bitmap contents.
- * - If @ctx is NULL, the base mft record of @ni must not be mapped on
- * entry and it will be left unmapped on return.
- * - If @ctx is not NULL, the base mft record must be mapped on entry
- * and it will be left mapped on return.
- */
-static inline s64 ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
- s64 count, ntfs_attr_search_ctx *ctx)
-{
- return __ntfs_cluster_free(ni, start_vcn, count, ctx, false);
-}
-
-extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
- const runlist_element *rl);
-
-/**
- * ntfs_cluster_free_from_rl - free clusters from runlist
- * @vol: mounted ntfs volume on which to free the clusters
- * @rl: runlist describing the clusters to free
- *
- * Free all the clusters described by the runlist @rl on the volume @vol. In
- * the case of an error being returned, at least some of the clusters were not
- * freed.
- *
- * Return 0 on success and -errno on error.
- *
- * Locking: - This function takes the volume lcn bitmap lock for writing and
- * modifies the bitmap contents.
- * - The caller must have locked the runlist @rl for reading or
- * writing.
- */
-static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol,
- const runlist_element *rl)
-{
- int ret;
-
- down_write(&vol->lcnbmp_lock);
- ret = ntfs_cluster_free_from_rl_nolock(vol, rl);
- up_write(&vol->lcnbmp_lock);
- return ret;
-}
-
-#endif /* NTFS_RW */
-
-#endif /* defined _LINUX_NTFS_LCNALLOC_H */
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
deleted file mode 100644
index 6ce60ffc6ac0..000000000000
--- a/fs/ntfs/logfile.c
+++ /dev/null
@@ -1,849 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2002-2007 Anton Altaparmakov
- */
-
-#ifdef NTFS_RW
-
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/highmem.h>
-#include <linux/buffer_head.h>
-#include <linux/bitops.h>
-#include <linux/log2.h>
-#include <linux/bio.h>
-
-#include "attrib.h"
-#include "aops.h"
-#include "debug.h"
-#include "logfile.h"
-#include "malloc.h"
-#include "volume.h"
-#include "ntfs.h"
-
-/**
- * ntfs_check_restart_page_header - check the page header for consistency
- * @vi: $LogFile inode to which the restart page header belongs
- * @rp: restart page header to check
- * @pos: position in @vi at which the restart page header resides
- *
- * Check the restart page header @rp for consistency and return 'true' if it is
- * consistent and 'false' otherwise.
- *
- * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not
- * require the full restart page.
- */
-static bool ntfs_check_restart_page_header(struct inode *vi,
- RESTART_PAGE_HEADER *rp, s64 pos)
-{
- u32 logfile_system_page_size, logfile_log_page_size;
- u16 ra_ofs, usa_count, usa_ofs, usa_end = 0;
- bool have_usa = true;
-
- ntfs_debug("Entering.");
- /*
- * If the system or log page sizes are smaller than the ntfs block size
- * or either is not a power of 2 we cannot handle this log file.
- */
- logfile_system_page_size = le32_to_cpu(rp->system_page_size);
- logfile_log_page_size = le32_to_cpu(rp->log_page_size);
- if (logfile_system_page_size < NTFS_BLOCK_SIZE ||
- logfile_log_page_size < NTFS_BLOCK_SIZE ||
- logfile_system_page_size &
- (logfile_system_page_size - 1) ||
- !is_power_of_2(logfile_log_page_size)) {
- ntfs_error(vi->i_sb, "$LogFile uses unsupported page size.");
- return false;
- }
- /*
- * We must be either at !pos (1st restart page) or at pos = system page
- * size (2nd restart page).
- */
- if (pos && pos != logfile_system_page_size) {
- ntfs_error(vi->i_sb, "Found restart area in incorrect "
- "position in $LogFile.");
- return false;
- }
- /* We only know how to handle version 1.1. */
- if (sle16_to_cpu(rp->major_ver) != 1 ||
- sle16_to_cpu(rp->minor_ver) != 1) {
- ntfs_error(vi->i_sb, "$LogFile version %i.%i is not "
- "supported. (This driver supports version "
- "1.1 only.)", (int)sle16_to_cpu(rp->major_ver),
- (int)sle16_to_cpu(rp->minor_ver));
- return false;
- }
- /*
- * If chkdsk has been run the restart page may not be protected by an
- * update sequence array.
- */
- if (ntfs_is_chkd_record(rp->magic) && !le16_to_cpu(rp->usa_count)) {
- have_usa = false;
- goto skip_usa_checks;
- }
- /* Verify the size of the update sequence array. */
- usa_count = 1 + (logfile_system_page_size >> NTFS_BLOCK_SIZE_BITS);
- if (usa_count != le16_to_cpu(rp->usa_count)) {
- ntfs_error(vi->i_sb, "$LogFile restart page specifies "
- "inconsistent update sequence array count.");
- return false;
- }
- /* Verify the position of the update sequence array. */
- usa_ofs = le16_to_cpu(rp->usa_ofs);
- usa_end = usa_ofs + usa_count * sizeof(u16);
- if (usa_ofs < sizeof(RESTART_PAGE_HEADER) ||
- usa_end > NTFS_BLOCK_SIZE - sizeof(u16)) {
- ntfs_error(vi->i_sb, "$LogFile restart page specifies "
- "inconsistent update sequence array offset.");
- return false;
- }
-skip_usa_checks:
- /*
- * Verify the position of the restart area. It must be:
- * - aligned to 8-byte boundary,
- * - after the update sequence array, and
- * - within the system page size.
- */
- ra_ofs = le16_to_cpu(rp->restart_area_offset);
- if (ra_ofs & 7 || (have_usa ? ra_ofs < usa_end :
- ra_ofs < sizeof(RESTART_PAGE_HEADER)) ||
- ra_ofs > logfile_system_page_size) {
- ntfs_error(vi->i_sb, "$LogFile restart page specifies "
- "inconsistent restart area offset.");
- return false;
- }
- /*
- * Only restart pages modified by chkdsk are allowed to have chkdsk_lsn
- * set.
- */
- if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) {
- ntfs_error(vi->i_sb, "$LogFile restart page is not modified "
- "by chkdsk but a chkdsk LSN is specified.");
- return false;
- }
- ntfs_debug("Done.");
- return true;
-}
-
-/**
- * ntfs_check_restart_area - check the restart area for consistency
- * @vi: $LogFile inode to which the restart page belongs
- * @rp: restart page whose restart area to check
- *
- * Check the restart area of the restart page @rp for consistency and return
- * 'true' if it is consistent and 'false' otherwise.
- *
- * This function assumes that the restart page header has already been
- * consistency checked.
- *
- * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not
- * require the full restart page.
- */
-static bool ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp)
-{
- u64 file_size;
- RESTART_AREA *ra;
- u16 ra_ofs, ra_len, ca_ofs;
- u8 fs_bits;
-
- ntfs_debug("Entering.");
- ra_ofs = le16_to_cpu(rp->restart_area_offset);
- ra = (RESTART_AREA*)((u8*)rp + ra_ofs);
- /*
- * Everything before ra->file_size must be before the first word
- * protected by an update sequence number. This ensures that it is
- * safe to access ra->client_array_offset.
- */
- if (ra_ofs + offsetof(RESTART_AREA, file_size) >
- NTFS_BLOCK_SIZE - sizeof(u16)) {
- ntfs_error(vi->i_sb, "$LogFile restart area specifies "
- "inconsistent file offset.");
- return false;
- }
- /*
- * Now that we can access ra->client_array_offset, make sure everything
- * up to the log client array is before the first word protected by an
- * update sequence number. This ensures we can access all of the
- * restart area elements safely. Also, the client array offset must be
- * aligned to an 8-byte boundary.
- */
- ca_ofs = le16_to_cpu(ra->client_array_offset);
- if (((ca_ofs + 7) & ~7) != ca_ofs ||
- ra_ofs + ca_ofs > NTFS_BLOCK_SIZE - sizeof(u16)) {
- ntfs_error(vi->i_sb, "$LogFile restart area specifies "
- "inconsistent client array offset.");
- return false;
- }
- /*
- * The restart area must end within the system page size both when
- * calculated manually and as specified by ra->restart_area_length.
- * Also, the calculated length must not exceed the specified length.
- */
- ra_len = ca_ofs + le16_to_cpu(ra->log_clients) *
- sizeof(LOG_CLIENT_RECORD);
- if (ra_ofs + ra_len > le32_to_cpu(rp->system_page_size) ||
- ra_ofs + le16_to_cpu(ra->restart_area_length) >
- le32_to_cpu(rp->system_page_size) ||
- ra_len > le16_to_cpu(ra->restart_area_length)) {
- ntfs_error(vi->i_sb, "$LogFile restart area is out of bounds "
- "of the system page size specified by the "
- "restart page header and/or the specified "
- "restart area length is inconsistent.");
- return false;
- }
- /*
- * The ra->client_free_list and ra->client_in_use_list must be either
- * LOGFILE_NO_CLIENT or less than ra->log_clients or they are
- * overflowing the client array.
- */
- if ((ra->client_free_list != LOGFILE_NO_CLIENT &&
- le16_to_cpu(ra->client_free_list) >=
- le16_to_cpu(ra->log_clients)) ||
- (ra->client_in_use_list != LOGFILE_NO_CLIENT &&
- le16_to_cpu(ra->client_in_use_list) >=
- le16_to_cpu(ra->log_clients))) {
- ntfs_error(vi->i_sb, "$LogFile restart area specifies "
- "overflowing client free and/or in use lists.");
- return false;
- }
- /*
- * Check ra->seq_number_bits against ra->file_size for consistency.
- * We cannot just use ffs() because the file size is not a power of 2.
- */
- file_size = (u64)sle64_to_cpu(ra->file_size);
- fs_bits = 0;
- while (file_size) {
- file_size >>= 1;
- fs_bits++;
- }
- if (le32_to_cpu(ra->seq_number_bits) != 67 - fs_bits) {
- ntfs_error(vi->i_sb, "$LogFile restart area specifies "
- "inconsistent sequence number bits.");
- return false;
- }
- /* The log record header length must be a multiple of 8. */
- if (((le16_to_cpu(ra->log_record_header_length) + 7) & ~7) !=
- le16_to_cpu(ra->log_record_header_length)) {
- ntfs_error(vi->i_sb, "$LogFile restart area specifies "
- "inconsistent log record header length.");
- return false;
- }
- /* Dito for the log page data offset. */
- if (((le16_to_cpu(ra->log_page_data_offset) + 7) & ~7) !=
- le16_to_cpu(ra->log_page_data_offset)) {
- ntfs_error(vi->i_sb, "$LogFile restart area specifies "
- "inconsistent log page data offset.");
- return false;
- }
- ntfs_debug("Done.");
- return true;
-}
-
-/**
- * ntfs_check_log_client_array - check the log client array for consistency
- * @vi: $LogFile inode to which the restart page belongs
- * @rp: restart page whose log client array to check
- *
- * Check the log client array of the restart page @rp for consistency and
- * return 'true' if it is consistent and 'false' otherwise.
- *
- * This function assumes that the restart page header and the restart area have
- * already been consistency checked.
- *
- * Unlike ntfs_check_restart_page_header() and ntfs_check_restart_area(), this
- * function needs @rp->system_page_size bytes in @rp, i.e. it requires the full
- * restart page and the page must be multi sector transfer deprotected.
- */
-static bool ntfs_check_log_client_array(struct inode *vi,
- RESTART_PAGE_HEADER *rp)
-{
- RESTART_AREA *ra;
- LOG_CLIENT_RECORD *ca, *cr;
- u16 nr_clients, idx;
- bool in_free_list, idx_is_first;
-
- ntfs_debug("Entering.");
- ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
- ca = (LOG_CLIENT_RECORD*)((u8*)ra +
- le16_to_cpu(ra->client_array_offset));
- /*
- * Check the ra->client_free_list first and then check the
- * ra->client_in_use_list. Check each of the log client records in
- * each of the lists and check that the array does not overflow the
- * ra->log_clients value. Also keep track of the number of records
- * visited as there cannot be more than ra->log_clients records and
- * that way we detect eventual loops in within a list.
- */
- nr_clients = le16_to_cpu(ra->log_clients);
- idx = le16_to_cpu(ra->client_free_list);
- in_free_list = true;
-check_list:
- for (idx_is_first = true; idx != LOGFILE_NO_CLIENT_CPU; nr_clients--,
- idx = le16_to_cpu(cr->next_client)) {
- if (!nr_clients || idx >= le16_to_cpu(ra->log_clients))
- goto err_out;
- /* Set @cr to the current log client record. */
- cr = ca + idx;
- /* The first log client record must not have a prev_client. */
- if (idx_is_first) {
- if (cr->prev_client != LOGFILE_NO_CLIENT)
- goto err_out;
- idx_is_first = false;
- }
- }
- /* Switch to and check the in use list if we just did the free list. */
- if (in_free_list) {
- in_free_list = false;
- idx = le16_to_cpu(ra->client_in_use_list);
- goto check_list;
- }
- ntfs_debug("Done.");
- return true;
-err_out:
- ntfs_error(vi->i_sb, "$LogFile log client array is corrupt.");
- return false;
-}
-
-/**
- * ntfs_check_and_load_restart_page - check the restart page for consistency
- * @vi: $LogFile inode to which the restart page belongs
- * @rp: restart page to check
- * @pos: position in @vi at which the restart page resides
- * @wrp: [OUT] copy of the multi sector transfer deprotected restart page
- * @lsn: [OUT] set to the current logfile lsn on success
- *
- * Check the restart page @rp for consistency and return 0 if it is consistent
- * and -errno otherwise. The restart page may have been modified by chkdsk in
- * which case its magic is CHKD instead of RSTR.
- *
- * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not
- * require the full restart page.
- *
- * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a
- * copy of the complete multi sector transfer deprotected page. On failure,
- * *@wrp is undefined.
- *
- * Simillarly, if @lsn is not NULL, on success *@lsn will be set to the current
- * logfile lsn according to this restart page. On failure, *@lsn is undefined.
- *
- * The following error codes are defined:
- * -EINVAL - The restart page is inconsistent.
- * -ENOMEM - Not enough memory to load the restart page.
- * -EIO - Failed to reading from $LogFile.
- */
-static int ntfs_check_and_load_restart_page(struct inode *vi,
- RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp,
- LSN *lsn)
-{
- RESTART_AREA *ra;
- RESTART_PAGE_HEADER *trp;
- int size, err;
-
- ntfs_debug("Entering.");
- /* Check the restart page header for consistency. */
- if (!ntfs_check_restart_page_header(vi, rp, pos)) {
- /* Error output already done inside the function. */
- return -EINVAL;
- }
- /* Check the restart area for consistency. */
- if (!ntfs_check_restart_area(vi, rp)) {
- /* Error output already done inside the function. */
- return -EINVAL;
- }
- ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
- /*
- * Allocate a buffer to store the whole restart page so we can multi
- * sector transfer deprotect it.
- */
- trp = ntfs_malloc_nofs(le32_to_cpu(rp->system_page_size));
- if (!trp) {
- ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile "
- "restart page buffer.");
- return -ENOMEM;
- }
- /*
- * Read the whole of the restart page into the buffer. If it fits
- * completely inside @rp, just copy it from there. Otherwise map all
- * the required pages and copy the data from them.
- */
- size = PAGE_SIZE - (pos & ~PAGE_MASK);
- if (size >= le32_to_cpu(rp->system_page_size)) {
- memcpy(trp, rp, le32_to_cpu(rp->system_page_size));
- } else {
- pgoff_t idx;
- struct page *page;
- int have_read, to_read;
-
- /* First copy what we already have in @rp. */
- memcpy(trp, rp, size);
- /* Copy the remaining data one page at a time. */
- have_read = size;
- to_read = le32_to_cpu(rp->system_page_size) - size;
- idx = (pos + size) >> PAGE_SHIFT;
- BUG_ON((pos + size) & ~PAGE_MASK);
- do {
- page = ntfs_map_page(vi->i_mapping, idx);
- if (IS_ERR(page)) {
- ntfs_error(vi->i_sb, "Error mapping $LogFile "
- "page (index %lu).", idx);
- err = PTR_ERR(page);
- if (err != -EIO && err != -ENOMEM)
- err = -EIO;
- goto err_out;
- }
- size = min_t(int, to_read, PAGE_SIZE);
- memcpy((u8*)trp + have_read, page_address(page), size);
- ntfs_unmap_page(page);
- have_read += size;
- to_read -= size;
- idx++;
- } while (to_read > 0);
- }
- /*
- * Perform the multi sector transfer deprotection on the buffer if the
- * restart page is protected.
- */
- if ((!ntfs_is_chkd_record(trp->magic) || le16_to_cpu(trp->usa_count))
- && post_read_mst_fixup((NTFS_RECORD*)trp,
- le32_to_cpu(rp->system_page_size))) {
- /*
- * A multi sector tranfer error was detected. We only need to
- * abort if the restart page contents exceed the multi sector
- * transfer fixup of the first sector.
- */
- if (le16_to_cpu(rp->restart_area_offset) +
- le16_to_cpu(ra->restart_area_length) >
- NTFS_BLOCK_SIZE - sizeof(u16)) {
- ntfs_error(vi->i_sb, "Multi sector transfer error "
- "detected in $LogFile restart page.");
- err = -EINVAL;
- goto err_out;
- }
- }
- /*
- * If the restart page is modified by chkdsk or there are no active
- * logfile clients, the logfile is consistent. Otherwise, need to
- * check the log client records for consistency, too.
- */
- err = 0;
- if (ntfs_is_rstr_record(rp->magic) &&
- ra->client_in_use_list != LOGFILE_NO_CLIENT) {
- if (!ntfs_check_log_client_array(vi, trp)) {
- err = -EINVAL;
- goto err_out;
- }
- }
- if (lsn) {
- if (ntfs_is_rstr_record(rp->magic))
- *lsn = sle64_to_cpu(ra->current_lsn);
- else /* if (ntfs_is_chkd_record(rp->magic)) */
- *lsn = sle64_to_cpu(rp->chkdsk_lsn);
- }
- ntfs_debug("Done.");
- if (wrp)
- *wrp = trp;
- else {
-err_out:
- ntfs_free(trp);
- }
- return err;
-}
-
-/**
- * ntfs_check_logfile - check the journal for consistency
- * @log_vi: struct inode of loaded journal $LogFile to check
- * @rp: [OUT] on success this is a copy of the current restart page
- *
- * Check the $LogFile journal for consistency and return 'true' if it is
- * consistent and 'false' if not. On success, the current restart page is
- * returned in *@rp. Caller must call ntfs_free(*@rp) when finished with it.
- *
- * At present we only check the two restart pages and ignore the log record
- * pages.
- *
- * Note that the MstProtected flag is not set on the $LogFile inode and hence
- * when reading pages they are not deprotected. This is because we do not know
- * if the $LogFile was created on a system with a different page size to ours
- * yet and mst deprotection would fail if our page size is smaller.
- */
-bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
-{
- s64 size, pos;
- LSN rstr1_lsn, rstr2_lsn;
- ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
- struct address_space *mapping = log_vi->i_mapping;
- struct page *page = NULL;
- u8 *kaddr = NULL;
- RESTART_PAGE_HEADER *rstr1_ph = NULL;
- RESTART_PAGE_HEADER *rstr2_ph = NULL;
- int log_page_size, err;
- bool logfile_is_empty = true;
- u8 log_page_bits;
-
- ntfs_debug("Entering.");
- /* An empty $LogFile must have been clean before it got emptied. */
- if (NVolLogFileEmpty(vol))
- goto is_empty;
- size = i_size_read(log_vi);
- /* Make sure the file doesn't exceed the maximum allowed size. */
- if (size > MaxLogFileSize)
- size = MaxLogFileSize;
- /*
- * Truncate size to a multiple of the page cache size or the default
- * log page size if the page cache size is between the default log page
- * log page size if the page cache size is between the default log page
- * size and twice that.
- */
- if (PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <=
- DefaultLogPageSize * 2)
- log_page_size = DefaultLogPageSize;
- else
- log_page_size = PAGE_SIZE;
- /*
- * Use ntfs_ffs() instead of ffs() to enable the compiler to
- * optimize log_page_size and log_page_bits into constants.
- */
- log_page_bits = ntfs_ffs(log_page_size) - 1;
- size &= ~(s64)(log_page_size - 1);
- /*
- * Ensure the log file is big enough to store at least the two restart
- * pages and the minimum number of log record pages.
- */
- if (size < log_page_size * 2 || (size - log_page_size * 2) >>
- log_page_bits < MinLogRecordPages) {
- ntfs_error(vol->sb, "$LogFile is too small.");
- return false;
- }
- /*
- * Read through the file looking for a restart page. Since the restart
- * page header is at the beginning of a page we only need to search at
- * what could be the beginning of a page (for each page size) rather
- * than scanning the whole file byte by byte. If all potential places
- * contain empty and uninitialzed records, the log file can be assumed
- * to be empty.
- */
- for (pos = 0; pos < size; pos <<= 1) {
- pgoff_t idx = pos >> PAGE_SHIFT;
- if (!page || page->index != idx) {
- if (page)
- ntfs_unmap_page(page);
- page = ntfs_map_page(mapping, idx);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Error mapping $LogFile "
- "page (index %lu).", idx);
- goto err_out;
- }
- }
- kaddr = (u8*)page_address(page) + (pos & ~PAGE_MASK);
- /*
- * A non-empty block means the logfile is not empty while an
- * empty block after a non-empty block has been encountered
- * means we are done.
- */
- if (!ntfs_is_empty_recordp((le32*)kaddr))
- logfile_is_empty = false;
- else if (!logfile_is_empty)
- break;
- /*
- * A log record page means there cannot be a restart page after
- * this so no need to continue searching.
- */
- if (ntfs_is_rcrd_recordp((le32*)kaddr))
- break;
- /* If not a (modified by chkdsk) restart page, continue. */
- if (!ntfs_is_rstr_recordp((le32*)kaddr) &&
- !ntfs_is_chkd_recordp((le32*)kaddr)) {
- if (!pos)
- pos = NTFS_BLOCK_SIZE >> 1;
- continue;
- }
- /*
- * Check the (modified by chkdsk) restart page for consistency
- * and get a copy of the complete multi sector transfer
- * deprotected restart page.
- */
- err = ntfs_check_and_load_restart_page(log_vi,
- (RESTART_PAGE_HEADER*)kaddr, pos,
- !rstr1_ph ? &rstr1_ph : &rstr2_ph,
- !rstr1_ph ? &rstr1_lsn : &rstr2_lsn);
- if (!err) {
- /*
- * If we have now found the first (modified by chkdsk)
- * restart page, continue looking for the second one.
- */
- if (!pos) {
- pos = NTFS_BLOCK_SIZE >> 1;
- continue;
- }
- /*
- * We have now found the second (modified by chkdsk)
- * restart page, so we can stop looking.
- */
- break;
- }
- /*
- * Error output already done inside the function. Note, we do
- * not abort if the restart page was invalid as we might still
- * find a valid one further in the file.
- */
- if (err != -EINVAL) {
- ntfs_unmap_page(page);
- goto err_out;
- }
- /* Continue looking. */
- if (!pos)
- pos = NTFS_BLOCK_SIZE >> 1;
- }
- if (page)
- ntfs_unmap_page(page);
- if (logfile_is_empty) {
- NVolSetLogFileEmpty(vol);
-is_empty:
- ntfs_debug("Done. ($LogFile is empty.)");
- return true;
- }
- if (!rstr1_ph) {
- BUG_ON(rstr2_ph);
- ntfs_error(vol->sb, "Did not find any restart pages in "
- "$LogFile and it was not empty.");
- return false;
- }
- /* If both restart pages were found, use the more recent one. */
- if (rstr2_ph) {
- /*
- * If the second restart area is more recent, switch to it.
- * Otherwise just throw it away.
- */
- if (rstr2_lsn > rstr1_lsn) {
- ntfs_debug("Using second restart page as it is more "
- "recent.");
- ntfs_free(rstr1_ph);
- rstr1_ph = rstr2_ph;
- /* rstr1_lsn = rstr2_lsn; */
- } else {
- ntfs_debug("Using first restart page as it is more "
- "recent.");
- ntfs_free(rstr2_ph);
- }
- rstr2_ph = NULL;
- }
- /* All consistency checks passed. */
- if (rp)
- *rp = rstr1_ph;
- else
- ntfs_free(rstr1_ph);
- ntfs_debug("Done.");
- return true;
-err_out:
- if (rstr1_ph)
- ntfs_free(rstr1_ph);
- return false;
-}
-
-/**
- * ntfs_is_logfile_clean - check in the journal if the volume is clean
- * @log_vi: struct inode of loaded journal $LogFile to check
- * @rp: copy of the current restart page
- *
- * Analyze the $LogFile journal and return 'true' if it indicates the volume was
- * shutdown cleanly and 'false' if not.
- *
- * At present we only look at the two restart pages and ignore the log record
- * pages. This is a little bit crude in that there will be a very small number
- * of cases where we think that a volume is dirty when in fact it is clean.
- * This should only affect volumes that have not been shutdown cleanly but did
- * not have any pending, non-check-pointed i/o, i.e. they were completely idle
- * at least for the five seconds preceding the unclean shutdown.
- *
- * This function assumes that the $LogFile journal has already been consistency
- * checked by a call to ntfs_check_logfile() and in particular if the $LogFile
- * is empty this function requires that NVolLogFileEmpty() is true otherwise an
- * empty volume will be reported as dirty.
- */
-bool ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp)
-{
- ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
- RESTART_AREA *ra;
-
- ntfs_debug("Entering.");
- /* An empty $LogFile must have been clean before it got emptied. */
- if (NVolLogFileEmpty(vol)) {
- ntfs_debug("Done. ($LogFile is empty.)");
- return true;
- }
- BUG_ON(!rp);
- if (!ntfs_is_rstr_record(rp->magic) &&
- !ntfs_is_chkd_record(rp->magic)) {
- ntfs_error(vol->sb, "Restart page buffer is invalid. This is "
- "probably a bug in that the $LogFile should "
- "have been consistency checked before calling "
- "this function.");
- return false;
- }
- ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
- /*
- * If the $LogFile has active clients, i.e. it is open, and we do not
- * have the RESTART_VOLUME_IS_CLEAN bit set in the restart area flags,
- * we assume there was an unclean shutdown.
- */
- if (ra->client_in_use_list != LOGFILE_NO_CLIENT &&
- !(ra->flags & RESTART_VOLUME_IS_CLEAN)) {
- ntfs_debug("Done. $LogFile indicates a dirty shutdown.");
- return false;
- }
- /* $LogFile indicates a clean shutdown. */
- ntfs_debug("Done. $LogFile indicates a clean shutdown.");
- return true;
-}
-
-/**
- * ntfs_empty_logfile - empty the contents of the $LogFile journal
- * @log_vi: struct inode of loaded journal $LogFile to empty
- *
- * Empty the contents of the $LogFile journal @log_vi and return 'true' on
- * success and 'false' on error.
- *
- * This function assumes that the $LogFile journal has already been consistency
- * checked by a call to ntfs_check_logfile() and that ntfs_is_logfile_clean()
- * has been used to ensure that the $LogFile is clean.
- */
-bool ntfs_empty_logfile(struct inode *log_vi)
-{
- VCN vcn, end_vcn;
- ntfs_inode *log_ni = NTFS_I(log_vi);
- ntfs_volume *vol = log_ni->vol;
- struct super_block *sb = vol->sb;
- runlist_element *rl;
- unsigned long flags;
- unsigned block_size, block_size_bits;
- int err;
- bool should_wait = true;
-
- ntfs_debug("Entering.");
- if (NVolLogFileEmpty(vol)) {
- ntfs_debug("Done.");
- return true;
- }
- /*
- * We cannot use ntfs_attr_set() because we may be still in the middle
- * of a mount operation. Thus we do the emptying by hand by first
- * zapping the page cache pages for the $LogFile/$DATA attribute and
- * then emptying each of the buffers in each of the clusters specified
- * by the runlist by hand.
- */
- block_size = sb->s_blocksize;
- block_size_bits = sb->s_blocksize_bits;
- vcn = 0;
- read_lock_irqsave(&log_ni->size_lock, flags);
- end_vcn = (log_ni->initialized_size + vol->cluster_size_mask) >>
- vol->cluster_size_bits;
- read_unlock_irqrestore(&log_ni->size_lock, flags);
- truncate_inode_pages(log_vi->i_mapping, 0);
- down_write(&log_ni->runlist.lock);
- rl = log_ni->runlist.rl;
- if (unlikely(!rl || vcn < rl->vcn || !rl->length)) {
-map_vcn:
- err = ntfs_map_runlist_nolock(log_ni, vcn, NULL);
- if (err) {
- ntfs_error(sb, "Failed to map runlist fragment (error "
- "%d).", -err);
- goto err;
- }
- rl = log_ni->runlist.rl;
- BUG_ON(!rl || vcn < rl->vcn || !rl->length);
- }
- /* Seek to the runlist element containing @vcn. */
- while (rl->length && vcn >= rl[1].vcn)
- rl++;
- do {
- LCN lcn;
- sector_t block, end_block;
- s64 len;
-
- /*
- * If this run is not mapped map it now and start again as the
- * runlist will have been updated.
- */
- lcn = rl->lcn;
- if (unlikely(lcn == LCN_RL_NOT_MAPPED)) {
- vcn = rl->vcn;
- goto map_vcn;
- }
- /* If this run is not valid abort with an error. */
- if (unlikely(!rl->length || lcn < LCN_HOLE))
- goto rl_err;
- /* Skip holes. */
- if (lcn == LCN_HOLE)
- continue;
- block = lcn << vol->cluster_size_bits >> block_size_bits;
- len = rl->length;
- if (rl[1].vcn > end_vcn)
- len = end_vcn - rl->vcn;
- end_block = (lcn + len) << vol->cluster_size_bits >>
- block_size_bits;
- /* Iterate over the blocks in the run and empty them. */
- do {
- struct buffer_head *bh;
-
- /* Obtain the buffer, possibly not uptodate. */
- bh = sb_getblk(sb, block);
- BUG_ON(!bh);
- /* Setup buffer i/o submission. */
- lock_buffer(bh);
- bh->b_end_io = end_buffer_write_sync;
- get_bh(bh);
- /* Set the entire contents of the buffer to 0xff. */
- memset(bh->b_data, -1, block_size);
- if (!buffer_uptodate(bh))
- set_buffer_uptodate(bh);
- if (buffer_dirty(bh))
- clear_buffer_dirty(bh);
- /*
- * Submit the buffer and wait for i/o to complete but
- * only for the first buffer so we do not miss really
- * serious i/o errors. Once the first buffer has
- * completed ignore errors afterwards as we can assume
- * that if one buffer worked all of them will work.
- */
- submit_bh(REQ_OP_WRITE, bh);
- if (should_wait) {
- should_wait = false;
- wait_on_buffer(bh);
- if (unlikely(!buffer_uptodate(bh)))
- goto io_err;
- }
- brelse(bh);
- } while (++block < end_block);
- } while ((++rl)->vcn < end_vcn);
- up_write(&log_ni->runlist.lock);
- /*
- * Zap the pages again just in case any got instantiated whilst we were
- * emptying the blocks by hand. FIXME: We may not have completed
- * writing to all the buffer heads yet so this may happen too early.
- * We really should use a kernel thread to do the emptying
- * asynchronously and then we can also set the volume dirty and output
- * an error message if emptying should fail.
- */
- truncate_inode_pages(log_vi->i_mapping, 0);
- /* Set the flag so we do not have to do it again on remount. */
- NVolSetLogFileEmpty(vol);
- ntfs_debug("Done.");
- return true;
-io_err:
- ntfs_error(sb, "Failed to write buffer. Unmount and run chkdsk.");
- goto dirty_err;
-rl_err:
- ntfs_error(sb, "Runlist is corrupt. Unmount and run chkdsk.");
-dirty_err:
- NVolSetErrors(vol);
- err = -EIO;
-err:
- up_write(&log_ni->runlist.lock);
- ntfs_error(sb, "Failed to fill $LogFile with 0xff bytes (error %d).",
- -err);
- return false;
-}
-
-#endif /* NTFS_RW */
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
deleted file mode 100644
index 429d4909cc72..000000000000
--- a/fs/ntfs/logfile.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of
- * the Linux-NTFS project.
- *
- * Copyright (c) 2000-2005 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_LOGFILE_H
-#define _LINUX_NTFS_LOGFILE_H
-
-#ifdef NTFS_RW
-
-#include <linux/fs.h>
-
-#include "types.h"
-#include "endian.h"
-#include "layout.h"
-
-/*
- * Journal ($LogFile) organization:
- *
- * Two restart areas present in the first two pages (restart pages, one restart
- * area in each page). When the volume is dismounted they should be identical,
- * except for the update sequence array which usually has a different update
- * sequence number.
- *
- * These are followed by log records organized in pages headed by a log record
- * header going up to log file size. Not all pages contain log records when a
- * volume is first formatted, but as the volume ages, all records will be used.
- * When the log file fills up, the records at the beginning are purged (by
- * modifying the oldest_lsn to a higher value presumably) and writing begins
- * at the beginning of the file. Effectively, the log file is viewed as a
- * circular entity.
- *
- * NOTE: Windows NT, 2000, and XP all use log file version 1.1 but they accept
- * versions <= 1.x, including 0.-1. (Yes, that is a minus one in there!) We
- * probably only want to support 1.1 as this seems to be the current version
- * and we don't know how that differs from the older versions. The only
- * exception is if the journal is clean as marked by the two restart pages
- * then it doesn't matter whether we are on an earlier version. We can just
- * reinitialize the logfile and start again with version 1.1.
- */
-
-/* Some $LogFile related constants. */
-#define MaxLogFileSize 0x100000000ULL
-#define DefaultLogPageSize 4096
-#define MinLogRecordPages 48
-
-/*
- * Log file restart page header (begins the restart area).
- */
-typedef struct {
-/*Ofs*/
-/* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */
-/* 0*/ NTFS_RECORD_TYPE magic; /* The magic is "RSTR". */
-/* 4*/ le16 usa_ofs; /* See NTFS_RECORD definition in layout.h.
- When creating, set this to be immediately
- after this header structure (without any
- alignment). */
-/* 6*/ le16 usa_count; /* See NTFS_RECORD definition in layout.h. */
-
-/* 8*/ leLSN chkdsk_lsn; /* The last log file sequence number found by
- chkdsk. Only used when the magic is changed
- to "CHKD". Otherwise this is zero. */
-/* 16*/ le32 system_page_size; /* Byte size of system pages when the log file
- was created, has to be >= 512 and a power of
- 2. Use this to calculate the required size
- of the usa (usa_count) and add it to usa_ofs.
- Then verify that the result is less than the
- value of the restart_area_offset. */
-/* 20*/ le32 log_page_size; /* Byte size of log file pages, has to be >=
- 512 and a power of 2. The default is 4096
- and is used when the system page size is
- between 4096 and 8192. Otherwise this is
- set to the system page size instead. */
-/* 24*/ le16 restart_area_offset;/* Byte offset from the start of this header to
- the RESTART_AREA. Value has to be aligned
- to 8-byte boundary. When creating, set this
- to be after the usa. */
-/* 26*/ sle16 minor_ver; /* Log file minor version. Only check if major
- version is 1. */
-/* 28*/ sle16 major_ver; /* Log file major version. We only support
- version 1.1. */
-/* sizeof() = 30 (0x1e) bytes */
-} __attribute__ ((__packed__)) RESTART_PAGE_HEADER;
-
-/*
- * Constant for the log client indices meaning that there are no client records
- * in this particular client array. Also inside the client records themselves,
- * this means that there are no client records preceding or following this one.
- */
-#define LOGFILE_NO_CLIENT cpu_to_le16(0xffff)
-#define LOGFILE_NO_CLIENT_CPU 0xffff
-
-/*
- * These are the so far known RESTART_AREA_* flags (16-bit) which contain
- * information about the log file in which they are present.
- */
-enum {
- RESTART_VOLUME_IS_CLEAN = cpu_to_le16(0x0002),
- RESTART_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */
-} __attribute__ ((__packed__));
-
-typedef le16 RESTART_AREA_FLAGS;
-
-/*
- * Log file restart area record. The offset of this record is found by adding
- * the offset of the RESTART_PAGE_HEADER to the restart_area_offset value found
- * in it. See notes at restart_area_offset above.
- */
-typedef struct {
-/*Ofs*/
-/* 0*/ leLSN current_lsn; /* The current, i.e. last LSN inside the log
- when the restart area was last written.
- This happens often but what is the interval?
- Is it just fixed time or is it every time a
- check point is written or somethine else?
- On create set to 0. */
-/* 8*/ le16 log_clients; /* Number of log client records in the array of
- log client records which follows this
- restart area. Must be 1. */
-/* 10*/ le16 client_free_list; /* The index of the first free log client record
- in the array of log client records.
- LOGFILE_NO_CLIENT means that there are no
- free log client records in the array.
- If != LOGFILE_NO_CLIENT, check that
- log_clients > client_free_list. On Win2k
- and presumably earlier, on a clean volume
- this is != LOGFILE_NO_CLIENT, and it should
- be 0, i.e. the first (and only) client
- record is free and thus the logfile is
- closed and hence clean. A dirty volume
- would have left the logfile open and hence
- this would be LOGFILE_NO_CLIENT. On WinXP
- and presumably later, the logfile is always
- open, even on clean shutdown so this should
- always be LOGFILE_NO_CLIENT. */
-/* 12*/ le16 client_in_use_list;/* The index of the first in-use log client
- record in the array of log client records.
- LOGFILE_NO_CLIENT means that there are no
- in-use log client records in the array. If
- != LOGFILE_NO_CLIENT check that log_clients
- > client_in_use_list. On Win2k and
- presumably earlier, on a clean volume this
- is LOGFILE_NO_CLIENT, i.e. there are no
- client records in use and thus the logfile
- is closed and hence clean. A dirty volume
- would have left the logfile open and hence
- this would be != LOGFILE_NO_CLIENT, and it
- should be 0, i.e. the first (and only)
- client record is in use. On WinXP and
- presumably later, the logfile is always
- open, even on clean shutdown so this should
- always be 0. */
-/* 14*/ RESTART_AREA_FLAGS flags;/* Flags modifying LFS behaviour. On Win2k
- and presumably earlier this is always 0. On
- WinXP and presumably later, if the logfile
- was shutdown cleanly, the second bit,
- RESTART_VOLUME_IS_CLEAN, is set. This bit
- is cleared when the volume is mounted by
- WinXP and set when the volume is dismounted,
- thus if the logfile is dirty, this bit is
- clear. Thus we don't need to check the
- Windows version to determine if the logfile
- is clean. Instead if the logfile is closed,
- we know it must be clean. If it is open and
- this bit is set, we also know it must be
- clean. If on the other hand the logfile is
- open and this bit is clear, we can be almost
- certain that the logfile is dirty. */
-/* 16*/ le32 seq_number_bits; /* How many bits to use for the sequence
- number. This is calculated as 67 - the
- number of bits required to store the logfile
- size in bytes and this can be used in with
- the specified file_size as a consistency
- check. */
-/* 20*/ le16 restart_area_length;/* Length of the restart area including the
- client array. Following checks required if
- version matches. Otherwise, skip them.
- restart_area_offset + restart_area_length
- has to be <= system_page_size. Also,
- restart_area_length has to be >=
- client_array_offset + (log_clients *
- sizeof(log client record)). */
-/* 22*/ le16 client_array_offset;/* Offset from the start of this record to
- the first log client record if versions are
- matched. When creating, set this to be
- after this restart area structure, aligned
- to 8-bytes boundary. If the versions do not
- match, this is ignored and the offset is
- assumed to be (sizeof(RESTART_AREA) + 7) &
- ~7, i.e. rounded up to first 8-byte
- boundary. Either way, client_array_offset
- has to be aligned to an 8-byte boundary.
- Also, restart_area_offset +
- client_array_offset has to be <= 510.
- Finally, client_array_offset + (log_clients
- * sizeof(log client record)) has to be <=
- system_page_size. On Win2k and presumably
- earlier, this is 0x30, i.e. immediately
- following this record. On WinXP and
- presumably later, this is 0x40, i.e. there
- are 16 extra bytes between this record and
- the client array. This probably means that
- the RESTART_AREA record is actually bigger
- in WinXP and later. */
-/* 24*/ sle64 file_size; /* Usable byte size of the log file. If the
- restart_area_offset + the offset of the
- file_size are > 510 then corruption has
- occurred. This is the very first check when
- starting with the restart_area as if it
- fails it means that some of the above values
- will be corrupted by the multi sector
- transfer protection. The file_size has to
- be rounded down to be a multiple of the
- log_page_size in the RESTART_PAGE_HEADER and
- then it has to be at least big enough to
- store the two restart pages and 48 (0x30)
- log record pages. */
-/* 32*/ le32 last_lsn_data_length;/* Length of data of last LSN, not including
- the log record header. On create set to
- 0. */
-/* 36*/ le16 log_record_header_length;/* Byte size of the log record header.
- If the version matches then check that the
- value of log_record_header_length is a
- multiple of 8, i.e.
- (log_record_header_length + 7) & ~7 ==
- log_record_header_length. When creating set
- it to sizeof(LOG_RECORD_HEADER), aligned to
- 8 bytes. */
-/* 38*/ le16 log_page_data_offset;/* Offset to the start of data in a log record
- page. Must be a multiple of 8. On create
- set it to immediately after the update
- sequence array of the log record page. */
-/* 40*/ le32 restart_log_open_count;/* A counter that gets incremented every
- time the logfile is restarted which happens
- at mount time when the logfile is opened.
- When creating set to a random value. Win2k
- sets it to the low 32 bits of the current
- system time in NTFS format (see time.h). */
-/* 44*/ le32 reserved; /* Reserved/alignment to 8-byte boundary. */
-/* sizeof() = 48 (0x30) bytes */
-} __attribute__ ((__packed__)) RESTART_AREA;
-
-/*
- * Log client record. The offset of this record is found by adding the offset
- * of the RESTART_AREA to the client_array_offset value found in it.
- */
-typedef struct {
-/*Ofs*/
-/* 0*/ leLSN oldest_lsn; /* Oldest LSN needed by this client. On create
- set to 0. */
-/* 8*/ leLSN client_restart_lsn;/* LSN at which this client needs to restart
- the volume, i.e. the current position within
- the log file. At present, if clean this
- should = current_lsn in restart area but it
- probably also = current_lsn when dirty most
- of the time. At create set to 0. */
-/* 16*/ le16 prev_client; /* The offset to the previous log client record
- in the array of log client records.
- LOGFILE_NO_CLIENT means there is no previous
- client record, i.e. this is the first one.
- This is always LOGFILE_NO_CLIENT. */
-/* 18*/ le16 next_client; /* The offset to the next log client record in
- the array of log client records.
- LOGFILE_NO_CLIENT means there are no next
- client records, i.e. this is the last one.
- This is always LOGFILE_NO_CLIENT. */
-/* 20*/ le16 seq_number; /* On Win2k and presumably earlier, this is set
- to zero every time the logfile is restarted
- and it is incremented when the logfile is
- closed at dismount time. Thus it is 0 when
- dirty and 1 when clean. On WinXP and
- presumably later, this is always 0. */
-/* 22*/ u8 reserved[6]; /* Reserved/alignment. */
-/* 28*/ le32 client_name_length;/* Length of client name in bytes. Should
- always be 8. */
-/* 32*/ ntfschar client_name[64];/* Name of the client in Unicode. Should
- always be "NTFS" with the remaining bytes
- set to 0. */
-/* sizeof() = 160 (0xa0) bytes */
-} __attribute__ ((__packed__)) LOG_CLIENT_RECORD;
-
-extern bool ntfs_check_logfile(struct inode *log_vi,
- RESTART_PAGE_HEADER **rp);
-
-extern bool ntfs_is_logfile_clean(struct inode *log_vi,
- const RESTART_PAGE_HEADER *rp);
-
-extern bool ntfs_empty_logfile(struct inode *log_vi);
-
-#endif /* NTFS_RW */
-
-#endif /* _LINUX_NTFS_LOGFILE_H */
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
deleted file mode 100644
index 7068425735f1..000000000000
--- a/fs/ntfs/malloc.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2005 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_MALLOC_H
-#define _LINUX_NTFS_MALLOC_H
-
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-
-/**
- * __ntfs_malloc - allocate memory in multiples of pages
- * @size: number of bytes to allocate
- * @gfp_mask: extra flags for the allocator
- *
- * Internal function. You probably want ntfs_malloc_nofs()...
- *
- * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
- * returns a pointer to the allocated memory.
- *
- * If there was insufficient memory to complete the request, return NULL.
- * Depending on @gfp_mask the allocation may be guaranteed to succeed.
- */
-static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
-{
- if (likely(size <= PAGE_SIZE)) {
- BUG_ON(!size);
- /* kmalloc() has per-CPU caches so is faster for now. */
- return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
- /* return (void *)__get_free_page(gfp_mask); */
- }
- if (likely((size >> PAGE_SHIFT) < totalram_pages()))
- return __vmalloc(size, gfp_mask);
- return NULL;
-}
-
-/**
- * ntfs_malloc_nofs - allocate memory in multiples of pages
- * @size: number of bytes to allocate
- *
- * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
- * returns a pointer to the allocated memory.
- *
- * If there was insufficient memory to complete the request, return NULL.
- */
-static inline void *ntfs_malloc_nofs(unsigned long size)
-{
- return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM);
-}
-
-/**
- * ntfs_malloc_nofs_nofail - allocate memory in multiples of pages
- * @size: number of bytes to allocate
- *
- * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
- * returns a pointer to the allocated memory.
- *
- * This function guarantees that the allocation will succeed. It will sleep
- * for as long as it takes to complete the allocation.
- *
- * If there was insufficient memory to complete the request, return NULL.
- */
-static inline void *ntfs_malloc_nofs_nofail(unsigned long size)
-{
- return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOFAIL);
-}
-
-static inline void ntfs_free(void *addr)
-{
- kvfree(addr);
-}
-
-#endif /* _LINUX_NTFS_MALLOC_H */
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
deleted file mode 100644
index 6fd1dc4b08c8..000000000000
--- a/fs/ntfs/mft.c
+++ /dev/null
@@ -1,2907 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
- * Copyright (c) 2002 Richard Russon
- */
-
-#include <linux/buffer_head.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/bio.h>
-
-#include "attrib.h"
-#include "aops.h"
-#include "bitmap.h"
-#include "debug.h"
-#include "dir.h"
-#include "lcnalloc.h"
-#include "malloc.h"
-#include "mft.h"
-#include "ntfs.h"
-
-#define MAX_BHS (PAGE_SIZE / NTFS_BLOCK_SIZE)
-
-/**
- * map_mft_record_page - map the page in which a specific mft record resides
- * @ni: ntfs inode whose mft record page to map
- *
- * This maps the page in which the mft record of the ntfs inode @ni is situated
- * and returns a pointer to the mft record within the mapped page.
- *
- * Return value needs to be checked with IS_ERR() and if that is true PTR_ERR()
- * contains the negative error code returned.
- */
-static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
-{
- loff_t i_size;
- ntfs_volume *vol = ni->vol;
- struct inode *mft_vi = vol->mft_ino;
- struct page *page;
- unsigned long index, end_index;
- unsigned ofs;
-
- BUG_ON(ni->page);
- /*
- * The index into the page cache and the offset within the page cache
- * page of the wanted mft record. FIXME: We need to check for
- * overflowing the unsigned long, but I don't think we would ever get
- * here if the volume was that big...
- */
- index = (u64)ni->mft_no << vol->mft_record_size_bits >>
- PAGE_SHIFT;
- ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
-
- i_size = i_size_read(mft_vi);
- /* The maximum valid index into the page cache for $MFT's data. */
- end_index = i_size >> PAGE_SHIFT;
-
- /* If the wanted index is out of bounds the mft record doesn't exist. */
- if (unlikely(index >= end_index)) {
- if (index > end_index || (i_size & ~PAGE_MASK) < ofs +
- vol->mft_record_size) {
- page = ERR_PTR(-ENOENT);
- ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, "
- "which is beyond the end of the mft. "
- "This is probably a bug in the ntfs "
- "driver.", ni->mft_no);
- goto err_out;
- }
- }
- /* Read, map, and pin the page. */
- page = ntfs_map_page(mft_vi->i_mapping, index);
- if (!IS_ERR(page)) {
- /* Catch multi sector transfer fixup errors. */
- if (likely(ntfs_is_mft_recordp((le32*)(page_address(page) +
- ofs)))) {
- ni->page = page;
- ni->page_ofs = ofs;
- return page_address(page) + ofs;
- }
- ntfs_error(vol->sb, "Mft record 0x%lx is corrupt. "
- "Run chkdsk.", ni->mft_no);
- ntfs_unmap_page(page);
- page = ERR_PTR(-EIO);
- NVolSetErrors(vol);
- }
-err_out:
- ni->page = NULL;
- ni->page_ofs = 0;
- return (void*)page;
-}
-
-/**
- * map_mft_record - map, pin and lock an mft record
- * @ni: ntfs inode whose MFT record to map
- *
- * First, take the mrec_lock mutex. We might now be sleeping, while waiting
- * for the mutex if it was already locked by someone else.
- *
- * The page of the record is mapped using map_mft_record_page() before being
- * returned to the caller.
- *
- * This in turn uses ntfs_map_page() to get the page containing the wanted mft
- * record (it in turn calls read_cache_page() which reads it in from disk if
- * necessary, increments the use count on the page so that it cannot disappear
- * under us and returns a reference to the page cache page).
- *
- * If read_cache_page() invokes ntfs_readpage() to load the page from disk, it
- * sets PG_locked and clears PG_uptodate on the page. Once I/O has completed
- * and the post-read mst fixups on each mft record in the page have been
- * performed, the page gets PG_uptodate set and PG_locked cleared (this is done
- * in our asynchronous I/O completion handler end_buffer_read_mft_async()).
- * ntfs_map_page() waits for PG_locked to become clear and checks if
- * PG_uptodate is set and returns an error code if not. This provides
- * sufficient protection against races when reading/using the page.
- *
- * However there is the write mapping to think about. Doing the above described
- * checking here will be fine, because when initiating the write we will set
- * PG_locked and clear PG_uptodate making sure nobody is touching the page
- * contents. Doing the locking this way means that the commit to disk code in
- * the page cache code paths is automatically sufficiently locked with us as
- * we will not touch a page that has been locked or is not uptodate. The only
- * locking problem then is them locking the page while we are accessing it.
- *
- * So that code will end up having to own the mrec_lock of all mft
- * records/inodes present in the page before I/O can proceed. In that case we
- * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be
- * accessing anything without owning the mrec_lock mutex. But we do need to
- * use them because of the read_cache_page() invocation and the code becomes so
- * much simpler this way that it is well worth it.
- *
- * The mft record is now ours and we return a pointer to it. You need to check
- * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return
- * the error code.
- *
- * NOTE: Caller is responsible for setting the mft record dirty before calling
- * unmap_mft_record(). This is obviously only necessary if the caller really
- * modified the mft record...
- * Q: Do we want to recycle one of the VFS inode state bits instead?
- * A: No, the inode ones mean we want to change the mft record, not we want to
- * write it out.
- */
-MFT_RECORD *map_mft_record(ntfs_inode *ni)
-{
- MFT_RECORD *m;
-
- ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no);
-
- /* Make sure the ntfs inode doesn't go away. */
- atomic_inc(&ni->count);
-
- /* Serialize access to this mft record. */
- mutex_lock(&ni->mrec_lock);
-
- m = map_mft_record_page(ni);
- if (!IS_ERR(m))
- return m;
-
- mutex_unlock(&ni->mrec_lock);
- atomic_dec(&ni->count);
- ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m));
- return m;
-}
-
-/**
- * unmap_mft_record_page - unmap the page in which a specific mft record resides
- * @ni: ntfs inode whose mft record page to unmap
- *
- * This unmaps the page in which the mft record of the ntfs inode @ni is
- * situated and returns. This is a NOOP if highmem is not configured.
- *
- * The unmap happens via ntfs_unmap_page() which in turn decrements the use
- * count on the page thus releasing it from the pinned state.
- *
- * We do not actually unmap the page from memory of course, as that will be
- * done by the page cache code itself when memory pressure increases or
- * whatever.
- */
-static inline void unmap_mft_record_page(ntfs_inode *ni)
-{
- BUG_ON(!ni->page);
-
- // TODO: If dirty, blah...
- ntfs_unmap_page(ni->page);
- ni->page = NULL;
- ni->page_ofs = 0;
- return;
-}
-
-/**
- * unmap_mft_record - release a mapped mft record
- * @ni: ntfs inode whose MFT record to unmap
- *
- * We release the page mapping and the mrec_lock mutex which unmaps the mft
- * record and releases it for others to get hold of. We also release the ntfs
- * inode by decrementing the ntfs inode reference count.
- *
- * NOTE: If caller has modified the mft record, it is imperative to set the mft
- * record dirty BEFORE calling unmap_mft_record().
- */
-void unmap_mft_record(ntfs_inode *ni)
-{
- struct page *page = ni->page;
-
- BUG_ON(!page);
-
- ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no);
-
- unmap_mft_record_page(ni);
- mutex_unlock(&ni->mrec_lock);
- atomic_dec(&ni->count);
- /*
- * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to
- * ntfs_clear_extent_inode() in the extent inode case, and to the
- * caller in the non-extent, yet pure ntfs inode case, to do the actual
- * tear down of all structures and freeing of all allocated memory.
- */
- return;
-}
-
-/**
- * map_extent_mft_record - load an extent inode and attach it to its base
- * @base_ni: base ntfs inode
- * @mref: mft reference of the extent inode to load
- * @ntfs_ino: on successful return, pointer to the ntfs_inode structure
- *
- * Load the extent mft record @mref and attach it to its base inode @base_ni.
- * Return the mapped extent mft record if IS_ERR(result) is false. Otherwise
- * PTR_ERR(result) gives the negative error code.
- *
- * On successful return, @ntfs_ino contains a pointer to the ntfs_inode
- * structure of the mapped extent inode.
- */
-MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
- ntfs_inode **ntfs_ino)
-{
- MFT_RECORD *m;
- ntfs_inode *ni = NULL;
- ntfs_inode **extent_nis = NULL;
- int i;
- unsigned long mft_no = MREF(mref);
- u16 seq_no = MSEQNO(mref);
- bool destroy_ni = false;
-
- ntfs_debug("Mapping extent mft record 0x%lx (base mft record 0x%lx).",
- mft_no, base_ni->mft_no);
- /* Make sure the base ntfs inode doesn't go away. */
- atomic_inc(&base_ni->count);
- /*
- * Check if this extent inode has already been added to the base inode,
- * in which case just return it. If not found, add it to the base
- * inode before returning it.
- */
- mutex_lock(&base_ni->extent_lock);
- if (base_ni->nr_extents > 0) {
- extent_nis = base_ni->ext.extent_ntfs_inos;
- for (i = 0; i < base_ni->nr_extents; i++) {
- if (mft_no != extent_nis[i]->mft_no)
- continue;
- ni = extent_nis[i];
- /* Make sure the ntfs inode doesn't go away. */
- atomic_inc(&ni->count);
- break;
- }
- }
- if (likely(ni != NULL)) {
- mutex_unlock(&base_ni->extent_lock);
- atomic_dec(&base_ni->count);
- /* We found the record; just have to map and return it. */
- m = map_mft_record(ni);
- /* map_mft_record() has incremented this on success. */
- atomic_dec(&ni->count);
- if (!IS_ERR(m)) {
- /* Verify the sequence number. */
- if (likely(le16_to_cpu(m->sequence_number) == seq_no)) {
- ntfs_debug("Done 1.");
- *ntfs_ino = ni;
- return m;
- }
- unmap_mft_record(ni);
- ntfs_error(base_ni->vol->sb, "Found stale extent mft "
- "reference! Corrupt filesystem. "
- "Run chkdsk.");
- return ERR_PTR(-EIO);
- }
-map_err_out:
- ntfs_error(base_ni->vol->sb, "Failed to map extent "
- "mft record, error code %ld.", -PTR_ERR(m));
- return m;
- }
- /* Record wasn't there. Get a new ntfs inode and initialize it. */
- ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no);
- if (unlikely(!ni)) {
- mutex_unlock(&base_ni->extent_lock);
- atomic_dec(&base_ni->count);
- return ERR_PTR(-ENOMEM);
- }
- ni->vol = base_ni->vol;
- ni->seq_no = seq_no;
- ni->nr_extents = -1;
- ni->ext.base_ntfs_ino = base_ni;
- /* Now map the record. */
- m = map_mft_record(ni);
- if (IS_ERR(m)) {
- mutex_unlock(&base_ni->extent_lock);
- atomic_dec(&base_ni->count);
- ntfs_clear_extent_inode(ni);
- goto map_err_out;
- }
- /* Verify the sequence number if it is present. */
- if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) {
- ntfs_error(base_ni->vol->sb, "Found stale extent mft "
- "reference! Corrupt filesystem. Run chkdsk.");
- destroy_ni = true;
- m = ERR_PTR(-EIO);
- goto unm_err_out;
- }
- /* Attach extent inode to base inode, reallocating memory if needed. */
- if (!(base_ni->nr_extents & 3)) {
- ntfs_inode **tmp;
- int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
-
- tmp = kmalloc(new_size, GFP_NOFS);
- if (unlikely(!tmp)) {
- ntfs_error(base_ni->vol->sb, "Failed to allocate "
- "internal buffer.");
- destroy_ni = true;
- m = ERR_PTR(-ENOMEM);
- goto unm_err_out;
- }
- if (base_ni->nr_extents) {
- BUG_ON(!base_ni->ext.extent_ntfs_inos);
- memcpy(tmp, base_ni->ext.extent_ntfs_inos, new_size -
- 4 * sizeof(ntfs_inode *));
- kfree(base_ni->ext.extent_ntfs_inos);
- }
- base_ni->ext.extent_ntfs_inos = tmp;
- }
- base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni;
- mutex_unlock(&base_ni->extent_lock);
- atomic_dec(&base_ni->count);
- ntfs_debug("Done 2.");
- *ntfs_ino = ni;
- return m;
-unm_err_out:
- unmap_mft_record(ni);
- mutex_unlock(&base_ni->extent_lock);
- atomic_dec(&base_ni->count);
- /*
- * If the extent inode was not attached to the base inode we need to
- * release it or we will leak memory.
- */
- if (destroy_ni)
- ntfs_clear_extent_inode(ni);
- return m;
-}
-
-#ifdef NTFS_RW
-
-/**
- * __mark_mft_record_dirty - set the mft record and the page containing it dirty
- * @ni: ntfs inode describing the mapped mft record
- *
- * Internal function. Users should call mark_mft_record_dirty() instead.
- *
- * Set the mapped (extent) mft record of the (base or extent) ntfs inode @ni,
- * as well as the page containing the mft record, dirty. Also, mark the base
- * vfs inode dirty. This ensures that any changes to the mft record are
- * written out to disk.
- *
- * NOTE: We only set I_DIRTY_DATASYNC (and not I_DIRTY_PAGES)
- * on the base vfs inode, because even though file data may have been modified,
- * it is dirty in the inode meta data rather than the data page cache of the
- * inode, and thus there are no data pages that need writing out. Therefore, a
- * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the
- * other hand, is not sufficient, because ->write_inode needs to be called even
- * in case of fdatasync. This needs to happen or the file data would not
- * necessarily hit the device synchronously, even though the vfs inode has the
- * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just
- * I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
- * which is not what I_DIRTY_SYNC on its own would suggest.
- */
-void __mark_mft_record_dirty(ntfs_inode *ni)
-{
- ntfs_inode *base_ni;
-
- ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
- BUG_ON(NInoAttr(ni));
- mark_ntfs_record_dirty(ni->page, ni->page_ofs);
- /* Determine the base vfs inode and mark it dirty, too. */
- mutex_lock(&ni->extent_lock);
- if (likely(ni->nr_extents >= 0))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- mutex_unlock(&ni->extent_lock);
- __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_DATASYNC);
-}
-
-static const char *ntfs_please_email = "Please email "
- "linux-ntfs-dev@lists.sourceforge.net and say that you saw "
- "this message. Thank you.";
-
-/**
- * ntfs_sync_mft_mirror_umount - synchronise an mft record to the mft mirror
- * @vol: ntfs volume on which the mft record to synchronize resides
- * @mft_no: mft record number of mft record to synchronize
- * @m: mapped, mst protected (extent) mft record to synchronize
- *
- * Write the mapped, mst protected (extent) mft record @m with mft record
- * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol,
- * bypassing the page cache and the $MFTMirr inode itself.
- *
- * This function is only for use at umount time when the mft mirror inode has
- * already been disposed off. We BUG() if we are called while the mft mirror
- * inode is still attached to the volume.
- *
- * On success return 0. On error return -errno.
- *
- * NOTE: This function is not implemented yet as I am not convinced it can
- * actually be triggered considering the sequence of commits we do in super.c::
- * ntfs_put_super(). But just in case we provide this place holder as the
- * alternative would be either to BUG() or to get a NULL pointer dereference
- * and Oops.
- */
-static int ntfs_sync_mft_mirror_umount(ntfs_volume *vol,
- const unsigned long mft_no, MFT_RECORD *m)
-{
- BUG_ON(vol->mftmirr_ino);
- ntfs_error(vol->sb, "Umount time mft mirror syncing is not "
- "implemented yet. %s", ntfs_please_email);
- return -EOPNOTSUPP;
-}
-
-/**
- * ntfs_sync_mft_mirror - synchronize an mft record to the mft mirror
- * @vol: ntfs volume on which the mft record to synchronize resides
- * @mft_no: mft record number of mft record to synchronize
- * @m: mapped, mst protected (extent) mft record to synchronize
- * @sync: if true, wait for i/o completion
- *
- * Write the mapped, mst protected (extent) mft record @m with mft record
- * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol.
- *
- * On success return 0. On error return -errno and set the volume errors flag
- * in the ntfs volume @vol.
- *
- * NOTE: We always perform synchronous i/o and ignore the @sync parameter.
- *
- * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just
- * schedule i/o via ->writepage or do it via kntfsd or whatever.
- */
-int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
- MFT_RECORD *m, int sync)
-{
- struct page *page;
- unsigned int blocksize = vol->sb->s_blocksize;
- int max_bhs = vol->mft_record_size / blocksize;
- struct buffer_head *bhs[MAX_BHS];
- struct buffer_head *bh, *head;
- u8 *kmirr;
- runlist_element *rl;
- unsigned int block_start, block_end, m_start, m_end, page_ofs;
- int i_bhs, nr_bhs, err = 0;
- unsigned char blocksize_bits = vol->sb->s_blocksize_bits;
-
- ntfs_debug("Entering for inode 0x%lx.", mft_no);
- BUG_ON(!max_bhs);
- if (WARN_ON(max_bhs > MAX_BHS))
- return -EINVAL;
- if (unlikely(!vol->mftmirr_ino)) {
- /* This could happen during umount... */
- err = ntfs_sync_mft_mirror_umount(vol, mft_no, m);
- if (likely(!err))
- return err;
- goto err_out;
- }
- /* Get the page containing the mirror copy of the mft record @m. */
- page = ntfs_map_page(vol->mftmirr_ino->i_mapping, mft_no >>
- (PAGE_SHIFT - vol->mft_record_size_bits));
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Failed to map mft mirror page.");
- err = PTR_ERR(page);
- goto err_out;
- }
- lock_page(page);
- BUG_ON(!PageUptodate(page));
- ClearPageUptodate(page);
- /* Offset of the mft mirror record inside the page. */
- page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
- /* The address in the page of the mirror copy of the mft record @m. */
- kmirr = page_address(page) + page_ofs;
- /* Copy the mst protected mft record to the mirror. */
- memcpy(kmirr, m, vol->mft_record_size);
- /* Create uptodate buffers if not present. */
- if (unlikely(!page_has_buffers(page))) {
- struct buffer_head *tail;
-
- bh = head = alloc_page_buffers(page, blocksize, true);
- do {
- set_buffer_uptodate(bh);
- tail = bh;
- bh = bh->b_this_page;
- } while (bh);
- tail->b_this_page = head;
- attach_page_private(page, head);
- }
- bh = head = page_buffers(page);
- BUG_ON(!bh);
- rl = NULL;
- nr_bhs = 0;
- block_start = 0;
- m_start = kmirr - (u8*)page_address(page);
- m_end = m_start + vol->mft_record_size;
- do {
- block_end = block_start + blocksize;
- /* If the buffer is outside the mft record, skip it. */
- if (block_end <= m_start)
- continue;
- if (unlikely(block_start >= m_end))
- break;
- /* Need to map the buffer if it is not mapped already. */
- if (unlikely(!buffer_mapped(bh))) {
- VCN vcn;
- LCN lcn;
- unsigned int vcn_ofs;
-
- bh->b_bdev = vol->sb->s_bdev;
- /* Obtain the vcn and offset of the current block. */
- vcn = ((VCN)mft_no << vol->mft_record_size_bits) +
- (block_start - m_start);
- vcn_ofs = vcn & vol->cluster_size_mask;
- vcn >>= vol->cluster_size_bits;
- if (!rl) {
- down_read(&NTFS_I(vol->mftmirr_ino)->
- runlist.lock);
- rl = NTFS_I(vol->mftmirr_ino)->runlist.rl;
- /*
- * $MFTMirr always has the whole of its runlist
- * in memory.
- */
- BUG_ON(!rl);
- }
- /* Seek to element containing target vcn. */
- while (rl->length && rl[1].vcn <= vcn)
- rl++;
- lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
- /* For $MFTMirr, only lcn >= 0 is a successful remap. */
- if (likely(lcn >= 0)) {
- /* Setup buffer head to correct block. */
- bh->b_blocknr = ((lcn <<
- vol->cluster_size_bits) +
- vcn_ofs) >> blocksize_bits;
- set_buffer_mapped(bh);
- } else {
- bh->b_blocknr = -1;
- ntfs_error(vol->sb, "Cannot write mft mirror "
- "record 0x%lx because its "
- "location on disk could not "
- "be determined (error code "
- "%lli).", mft_no,
- (long long)lcn);
- err = -EIO;
- }
- }
- BUG_ON(!buffer_uptodate(bh));
- BUG_ON(!nr_bhs && (m_start != block_start));
- BUG_ON(nr_bhs >= max_bhs);
- bhs[nr_bhs++] = bh;
- BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));
- } while (block_start = block_end, (bh = bh->b_this_page) != head);
- if (unlikely(rl))
- up_read(&NTFS_I(vol->mftmirr_ino)->runlist.lock);
- if (likely(!err)) {
- /* Lock buffers and start synchronous write i/o on them. */
- for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
- struct buffer_head *tbh = bhs[i_bhs];
-
- if (!trylock_buffer(tbh))
- BUG();
- BUG_ON(!buffer_uptodate(tbh));
- clear_buffer_dirty(tbh);
- get_bh(tbh);
- tbh->b_end_io = end_buffer_write_sync;
- submit_bh(REQ_OP_WRITE, tbh);
- }
- /* Wait on i/o completion of buffers. */
- for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
- struct buffer_head *tbh = bhs[i_bhs];
-
- wait_on_buffer(tbh);
- if (unlikely(!buffer_uptodate(tbh))) {
- err = -EIO;
- /*
- * Set the buffer uptodate so the page and
- * buffer states do not become out of sync.
- */
- set_buffer_uptodate(tbh);
- }
- }
- } else /* if (unlikely(err)) */ {
- /* Clean the buffers. */
- for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++)
- clear_buffer_dirty(bhs[i_bhs]);
- }
- /* Current state: all buffers are clean, unlocked, and uptodate. */
- /* Remove the mst protection fixups again. */
- post_write_mst_fixup((NTFS_RECORD*)kmirr);
- flush_dcache_page(page);
- SetPageUptodate(page);
- unlock_page(page);
- ntfs_unmap_page(page);
- if (likely(!err)) {
- ntfs_debug("Done.");
- } else {
- ntfs_error(vol->sb, "I/O error while writing mft mirror "
- "record 0x%lx!", mft_no);
-err_out:
- ntfs_error(vol->sb, "Failed to synchronize $MFTMirr (error "
- "code %i). Volume will be left marked dirty "
- "on umount. Run ntfsfix on the partition "
- "after umounting to correct this.", -err);
- NVolSetErrors(vol);
- }
- return err;
-}
-
-/**
- * write_mft_record_nolock - write out a mapped (extent) mft record
- * @ni: ntfs inode describing the mapped (extent) mft record
- * @m: mapped (extent) mft record to write
- * @sync: if true, wait for i/o completion
- *
- * Write the mapped (extent) mft record @m described by the (regular or extent)
- * ntfs inode @ni to backing store. If the mft record @m has a counterpart in
- * the mft mirror, that is also updated.
- *
- * We only write the mft record if the ntfs inode @ni is dirty and the first
- * buffer belonging to its mft record is dirty, too. We ignore the dirty state
- * of subsequent buffers because we could have raced with
- * fs/ntfs/aops.c::mark_ntfs_record_dirty().
- *
- * On success, clean the mft record and return 0. On error, leave the mft
- * record dirty and return -errno.
- *
- * NOTE: We always perform synchronous i/o and ignore the @sync parameter.
- * However, if the mft record has a counterpart in the mft mirror and @sync is
- * true, we write the mft record, wait for i/o completion, and only then write
- * the mft mirror copy. This ensures that if the system crashes either the mft
- * or the mft mirror will contain a self-consistent mft record @m. If @sync is
- * false on the other hand, we start i/o on both and then wait for completion
- * on them. This provides a speedup but no longer guarantees that you will end
- * up with a self-consistent mft record in the case of a crash but if you asked
- * for asynchronous writing you probably do not care about that anyway.
- *
- * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just
- * schedule i/o via ->writepage or do it via kntfsd or whatever.
- */
-int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
-{
- ntfs_volume *vol = ni->vol;
- struct page *page = ni->page;
- unsigned int blocksize = vol->sb->s_blocksize;
- unsigned char blocksize_bits = vol->sb->s_blocksize_bits;
- int max_bhs = vol->mft_record_size / blocksize;
- struct buffer_head *bhs[MAX_BHS];
- struct buffer_head *bh, *head;
- runlist_element *rl;
- unsigned int block_start, block_end, m_start, m_end;
- int i_bhs, nr_bhs, err = 0;
-
- ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
- BUG_ON(NInoAttr(ni));
- BUG_ON(!max_bhs);
- BUG_ON(!PageLocked(page));
- if (WARN_ON(max_bhs > MAX_BHS)) {
- err = -EINVAL;
- goto err_out;
- }
- /*
- * If the ntfs_inode is clean no need to do anything. If it is dirty,
- * mark it as clean now so that it can be redirtied later on if needed.
- * There is no danger of races since the caller is holding the locks
- * for the mft record @m and the page it is in.
- */
- if (!NInoTestClearDirty(ni))
- goto done;
- bh = head = page_buffers(page);
- BUG_ON(!bh);
- rl = NULL;
- nr_bhs = 0;
- block_start = 0;
- m_start = ni->page_ofs;
- m_end = m_start + vol->mft_record_size;
- do {
- block_end = block_start + blocksize;
- /* If the buffer is outside the mft record, skip it. */
- if (block_end <= m_start)
- continue;
- if (unlikely(block_start >= m_end))
- break;
- /*
- * If this block is not the first one in the record, we ignore
- * the buffer's dirty state because we could have raced with a
- * parallel mark_ntfs_record_dirty().
- */
- if (block_start == m_start) {
- /* This block is the first one in the record. */
- if (!buffer_dirty(bh)) {
- BUG_ON(nr_bhs);
- /* Clean records are not written out. */
- break;
- }
- }
- /* Need to map the buffer if it is not mapped already. */
- if (unlikely(!buffer_mapped(bh))) {
- VCN vcn;
- LCN lcn;
- unsigned int vcn_ofs;
-
- bh->b_bdev = vol->sb->s_bdev;
- /* Obtain the vcn and offset of the current block. */
- vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) +
- (block_start - m_start);
- vcn_ofs = vcn & vol->cluster_size_mask;
- vcn >>= vol->cluster_size_bits;
- if (!rl) {
- down_read(&NTFS_I(vol->mft_ino)->runlist.lock);
- rl = NTFS_I(vol->mft_ino)->runlist.rl;
- BUG_ON(!rl);
- }
- /* Seek to element containing target vcn. */
- while (rl->length && rl[1].vcn <= vcn)
- rl++;
- lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
- /* For $MFT, only lcn >= 0 is a successful remap. */
- if (likely(lcn >= 0)) {
- /* Setup buffer head to correct block. */
- bh->b_blocknr = ((lcn <<
- vol->cluster_size_bits) +
- vcn_ofs) >> blocksize_bits;
- set_buffer_mapped(bh);
- } else {
- bh->b_blocknr = -1;
- ntfs_error(vol->sb, "Cannot write mft record "
- "0x%lx because its location "
- "on disk could not be "
- "determined (error code %lli).",
- ni->mft_no, (long long)lcn);
- err = -EIO;
- }
- }
- BUG_ON(!buffer_uptodate(bh));
- BUG_ON(!nr_bhs && (m_start != block_start));
- BUG_ON(nr_bhs >= max_bhs);
- bhs[nr_bhs++] = bh;
- BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));
- } while (block_start = block_end, (bh = bh->b_this_page) != head);
- if (unlikely(rl))
- up_read(&NTFS_I(vol->mft_ino)->runlist.lock);
- if (!nr_bhs)
- goto done;
- if (unlikely(err))
- goto cleanup_out;
- /* Apply the mst protection fixups. */
- err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size);
- if (err) {
- ntfs_error(vol->sb, "Failed to apply mst fixups!");
- goto cleanup_out;
- }
- flush_dcache_mft_record_page(ni);
- /* Lock buffers and start synchronous write i/o on them. */
- for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
- struct buffer_head *tbh = bhs[i_bhs];
-
- if (!trylock_buffer(tbh))
- BUG();
- BUG_ON(!buffer_uptodate(tbh));
- clear_buffer_dirty(tbh);
- get_bh(tbh);
- tbh->b_end_io = end_buffer_write_sync;
- submit_bh(REQ_OP_WRITE, tbh);
- }
- /* Synchronize the mft mirror now if not @sync. */
- if (!sync && ni->mft_no < vol->mftmirr_size)
- ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync);
- /* Wait on i/o completion of buffers. */
- for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
- struct buffer_head *tbh = bhs[i_bhs];
-
- wait_on_buffer(tbh);
- if (unlikely(!buffer_uptodate(tbh))) {
- err = -EIO;
- /*
- * Set the buffer uptodate so the page and buffer
- * states do not become out of sync.
- */
- if (PageUptodate(page))
- set_buffer_uptodate(tbh);
- }
- }
- /* If @sync, now synchronize the mft mirror. */
- if (sync && ni->mft_no < vol->mftmirr_size)
- ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync);
- /* Remove the mst protection fixups again. */
- post_write_mst_fixup((NTFS_RECORD*)m);
- flush_dcache_mft_record_page(ni);
- if (unlikely(err)) {
- /* I/O error during writing. This is really bad! */
- ntfs_error(vol->sb, "I/O error while writing mft record "
- "0x%lx! Marking base inode as bad. You "
- "should unmount the volume and run chkdsk.",
- ni->mft_no);
- goto err_out;
- }
-done:
- ntfs_debug("Done.");
- return 0;
-cleanup_out:
- /* Clean the buffers. */
- for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++)
- clear_buffer_dirty(bhs[i_bhs]);
-err_out:
- /*
- * Current state: all buffers are clean, unlocked, and uptodate.
- * The caller should mark the base inode as bad so that no more i/o
- * happens. ->clear_inode() will still be invoked so all extent inodes
- * and other allocated memory will be freed.
- */
- if (err == -ENOMEM) {
- ntfs_error(vol->sb, "Not enough memory to write mft record. "
- "Redirtying so the write is retried later.");
- mark_mft_record_dirty(ni);
- err = 0;
- } else
- NVolSetErrors(vol);
- return err;
-}
-
-/**
- * ntfs_may_write_mft_record - check if an mft record may be written out
- * @vol: [IN] ntfs volume on which the mft record to check resides
- * @mft_no: [IN] mft record number of the mft record to check
- * @m: [IN] mapped mft record to check
- * @locked_ni: [OUT] caller has to unlock this ntfs inode if one is returned
- *
- * Check if the mapped (base or extent) mft record @m with mft record number
- * @mft_no belonging to the ntfs volume @vol may be written out. If necessary
- * and possible the ntfs inode of the mft record is locked and the base vfs
- * inode is pinned. The locked ntfs inode is then returned in @locked_ni. The
- * caller is responsible for unlocking the ntfs inode and unpinning the base
- * vfs inode.
- *
- * Return 'true' if the mft record may be written out and 'false' if not.
- *
- * The caller has locked the page and cleared the uptodate flag on it which
- * means that we can safely write out any dirty mft records that do not have
- * their inodes in icache as determined by ilookup5() as anyone
- * opening/creating such an inode would block when attempting to map the mft
- * record in read_cache_page() until we are finished with the write out.
- *
- * Here is a description of the tests we perform:
- *
- * If the inode is found in icache we know the mft record must be a base mft
- * record. If it is dirty, we do not write it and return 'false' as the vfs
- * inode write paths will result in the access times being updated which would
- * cause the base mft record to be redirtied and written out again. (We know
- * the access time update will modify the base mft record because Windows
- * chkdsk complains if the standard information attribute is not in the base
- * mft record.)
- *
- * If the inode is in icache and not dirty, we attempt to lock the mft record
- * and if we find the lock was already taken, it is not safe to write the mft
- * record and we return 'false'.
- *
- * If we manage to obtain the lock we have exclusive access to the mft record,
- * which also allows us safe writeout of the mft record. We then set
- * @locked_ni to the locked ntfs inode and return 'true'.
- *
- * Note we cannot just lock the mft record and sleep while waiting for the lock
- * because this would deadlock due to lock reversal (normally the mft record is
- * locked before the page is locked but we already have the page locked here
- * when we try to lock the mft record).
- *
- * If the inode is not in icache we need to perform further checks.
- *
- * If the mft record is not a FILE record or it is a base mft record, we can
- * safely write it and return 'true'.
- *
- * We now know the mft record is an extent mft record. We check if the inode
- * corresponding to its base mft record is in icache and obtain a reference to
- * it if it is. If it is not, we can safely write it and return 'true'.
- *
- * We now have the base inode for the extent mft record. We check if it has an
- * ntfs inode for the extent mft record attached and if not it is safe to write
- * the extent mft record and we return 'true'.
- *
- * The ntfs inode for the extent mft record is attached to the base inode so we
- * attempt to lock the extent mft record and if we find the lock was already
- * taken, it is not safe to write the extent mft record and we return 'false'.
- *
- * If we manage to obtain the lock we have exclusive access to the extent mft
- * record, which also allows us safe writeout of the extent mft record. We
- * set the ntfs inode of the extent mft record clean and then set @locked_ni to
- * the now locked ntfs inode and return 'true'.
- *
- * Note, the reason for actually writing dirty mft records here and not just
- * relying on the vfs inode dirty code paths is that we can have mft records
- * modified without them ever having actual inodes in memory. Also we can have
- * dirty mft records with clean ntfs inodes in memory. None of the described
- * cases would result in the dirty mft records being written out if we only
- * relied on the vfs inode dirty code paths. And these cases can really occur
- * during allocation of new mft records and in particular when the
- * initialized_size of the $MFT/$DATA attribute is extended and the new space
- * is initialized using ntfs_mft_record_format(). The clean inode can then
- * appear if the mft record is reused for a new inode before it got written
- * out.
- */
-bool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
- const MFT_RECORD *m, ntfs_inode **locked_ni)
-{
- struct super_block *sb = vol->sb;
- struct inode *mft_vi = vol->mft_ino;
- struct inode *vi;
- ntfs_inode *ni, *eni, **extent_nis;
- int i;
- ntfs_attr na;
-
- ntfs_debug("Entering for inode 0x%lx.", mft_no);
- /*
- * Normally we do not return a locked inode so set @locked_ni to NULL.
- */
- BUG_ON(!locked_ni);
- *locked_ni = NULL;
- /*
- * Check if the inode corresponding to this mft record is in the VFS
- * inode cache and obtain a reference to it if it is.
- */
- ntfs_debug("Looking for inode 0x%lx in icache.", mft_no);
- na.mft_no = mft_no;
- na.name = NULL;
- na.name_len = 0;
- na.type = AT_UNUSED;
- /*
- * Optimize inode 0, i.e. $MFT itself, since we have it in memory and
- * we get here for it rather often.
- */
- if (!mft_no) {
- /* Balance the below iput(). */
- vi = igrab(mft_vi);
- BUG_ON(vi != mft_vi);
- } else {
- /*
- * Have to use ilookup5_nowait() since ilookup5() waits for the
- * inode lock which causes ntfs to deadlock when a concurrent
- * inode write via the inode dirty code paths and the page
- * dirty code path of the inode dirty code path when writing
- * $MFT occurs.
- */
- vi = ilookup5_nowait(sb, mft_no, ntfs_test_inode, &na);
- }
- if (vi) {
- ntfs_debug("Base inode 0x%lx is in icache.", mft_no);
- /* The inode is in icache. */
- ni = NTFS_I(vi);
- /* Take a reference to the ntfs inode. */
- atomic_inc(&ni->count);
- /* If the inode is dirty, do not write this record. */
- if (NInoDirty(ni)) {
- ntfs_debug("Inode 0x%lx is dirty, do not write it.",
- mft_no);
- atomic_dec(&ni->count);
- iput(vi);
- return false;
- }
- ntfs_debug("Inode 0x%lx is not dirty.", mft_no);
- /* The inode is not dirty, try to take the mft record lock. */
- if (unlikely(!mutex_trylock(&ni->mrec_lock))) {
- ntfs_debug("Mft record 0x%lx is already locked, do "
- "not write it.", mft_no);
- atomic_dec(&ni->count);
- iput(vi);
- return false;
- }
- ntfs_debug("Managed to lock mft record 0x%lx, write it.",
- mft_no);
- /*
- * The write has to occur while we hold the mft record lock so
- * return the locked ntfs inode.
- */
- *locked_ni = ni;
- return true;
- }
- ntfs_debug("Inode 0x%lx is not in icache.", mft_no);
- /* The inode is not in icache. */
- /* Write the record if it is not a mft record (type "FILE"). */
- if (!ntfs_is_mft_record(m->magic)) {
- ntfs_debug("Mft record 0x%lx is not a FILE record, write it.",
- mft_no);
- return true;
- }
- /* Write the mft record if it is a base inode. */
- if (!m->base_mft_record) {
- ntfs_debug("Mft record 0x%lx is a base record, write it.",
- mft_no);
- return true;
- }
- /*
- * This is an extent mft record. Check if the inode corresponding to
- * its base mft record is in icache and obtain a reference to it if it
- * is.
- */
- na.mft_no = MREF_LE(m->base_mft_record);
- ntfs_debug("Mft record 0x%lx is an extent record. Looking for base "
- "inode 0x%lx in icache.", mft_no, na.mft_no);
- if (!na.mft_no) {
- /* Balance the below iput(). */
- vi = igrab(mft_vi);
- BUG_ON(vi != mft_vi);
- } else
- vi = ilookup5_nowait(sb, na.mft_no, ntfs_test_inode,
- &na);
- if (!vi) {
- /*
- * The base inode is not in icache, write this extent mft
- * record.
- */
- ntfs_debug("Base inode 0x%lx is not in icache, write the "
- "extent record.", na.mft_no);
- return true;
- }
- ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no);
- /*
- * The base inode is in icache. Check if it has the extent inode
- * corresponding to this extent mft record attached.
- */
- ni = NTFS_I(vi);
- mutex_lock(&ni->extent_lock);
- if (ni->nr_extents <= 0) {
- /*
- * The base inode has no attached extent inodes, write this
- * extent mft record.
- */
- mutex_unlock(&ni->extent_lock);
- iput(vi);
- ntfs_debug("Base inode 0x%lx has no attached extent inodes, "
- "write the extent record.", na.mft_no);
- return true;
- }
- /* Iterate over the attached extent inodes. */
- extent_nis = ni->ext.extent_ntfs_inos;
- for (eni = NULL, i = 0; i < ni->nr_extents; ++i) {
- if (mft_no == extent_nis[i]->mft_no) {
- /*
- * Found the extent inode corresponding to this extent
- * mft record.
- */
- eni = extent_nis[i];
- break;
- }
- }
- /*
- * If the extent inode was not attached to the base inode, write this
- * extent mft record.
- */
- if (!eni) {
- mutex_unlock(&ni->extent_lock);
- iput(vi);
- ntfs_debug("Extent inode 0x%lx is not attached to its base "
- "inode 0x%lx, write the extent record.",
- mft_no, na.mft_no);
- return true;
- }
- ntfs_debug("Extent inode 0x%lx is attached to its base inode 0x%lx.",
- mft_no, na.mft_no);
- /* Take a reference to the extent ntfs inode. */
- atomic_inc(&eni->count);
- mutex_unlock(&ni->extent_lock);
- /*
- * Found the extent inode coresponding to this extent mft record.
- * Try to take the mft record lock.
- */
- if (unlikely(!mutex_trylock(&eni->mrec_lock))) {
- atomic_dec(&eni->count);
- iput(vi);
- ntfs_debug("Extent mft record 0x%lx is already locked, do "
- "not write it.", mft_no);
- return false;
- }
- ntfs_debug("Managed to lock extent mft record 0x%lx, write it.",
- mft_no);
- if (NInoTestClearDirty(eni))
- ntfs_debug("Extent inode 0x%lx is dirty, marking it clean.",
- mft_no);
- /*
- * The write has to occur while we hold the mft record lock so return
- * the locked extent ntfs inode.
- */
- *locked_ni = eni;
- return true;
-}
-
-static const char *es = " Leaving inconsistent metadata. Unmount and run "
- "chkdsk.";
-
-/**
- * ntfs_mft_bitmap_find_and_alloc_free_rec_nolock - see name
- * @vol: volume on which to search for a free mft record
- * @base_ni: open base inode if allocating an extent mft record or NULL
- *
- * Search for a free mft record in the mft bitmap attribute on the ntfs volume
- * @vol.
- *
- * If @base_ni is NULL start the search at the default allocator position.
- *
- * If @base_ni is not NULL start the search at the mft record after the base
- * mft record @base_ni.
- *
- * Return the free mft record on success and -errno on error. An error code of
- * -ENOSPC means that there are no free mft records in the currently
- * initialized mft bitmap.
- *
- * Locking: Caller must hold vol->mftbmp_lock for writing.
- */
-static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
- ntfs_inode *base_ni)
-{
- s64 pass_end, ll, data_pos, pass_start, ofs, bit;
- unsigned long flags;
- struct address_space *mftbmp_mapping;
- u8 *buf, *byte;
- struct page *page;
- unsigned int page_ofs, size;
- u8 pass, b;
-
- ntfs_debug("Searching for free mft record in the currently "
- "initialized mft bitmap.");
- mftbmp_mapping = vol->mftbmp_ino->i_mapping;
- /*
- * Set the end of the pass making sure we do not overflow the mft
- * bitmap.
- */
- read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags);
- pass_end = NTFS_I(vol->mft_ino)->allocated_size >>
- vol->mft_record_size_bits;
- read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags);
- read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
- ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3;
- read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
- if (pass_end > ll)
- pass_end = ll;
- pass = 1;
- if (!base_ni)
- data_pos = vol->mft_data_pos;
- else
- data_pos = base_ni->mft_no + 1;
- if (data_pos < 24)
- data_pos = 24;
- if (data_pos >= pass_end) {
- data_pos = 24;
- pass = 2;
- /* This happens on a freshly formatted volume. */
- if (data_pos >= pass_end)
- return -ENOSPC;
- }
- pass_start = data_pos;
- ntfs_debug("Starting bitmap search: pass %u, pass_start 0x%llx, "
- "pass_end 0x%llx, data_pos 0x%llx.", pass,
- (long long)pass_start, (long long)pass_end,
- (long long)data_pos);
- /* Loop until a free mft record is found. */
- for (; pass <= 2;) {
- /* Cap size to pass_end. */
- ofs = data_pos >> 3;
- page_ofs = ofs & ~PAGE_MASK;
- size = PAGE_SIZE - page_ofs;
- ll = ((pass_end + 7) >> 3) - ofs;
- if (size > ll)
- size = ll;
- size <<= 3;
- /*
- * If we are still within the active pass, search the next page
- * for a zero bit.
- */
- if (size) {
- page = ntfs_map_page(mftbmp_mapping,
- ofs >> PAGE_SHIFT);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Failed to read mft "
- "bitmap, aborting.");
- return PTR_ERR(page);
- }
- buf = (u8*)page_address(page) + page_ofs;
- bit = data_pos & 7;
- data_pos &= ~7ull;
- ntfs_debug("Before inner for loop: size 0x%x, "
- "data_pos 0x%llx, bit 0x%llx", size,
- (long long)data_pos, (long long)bit);
- for (; bit < size && data_pos + bit < pass_end;
- bit &= ~7ull, bit += 8) {
- byte = buf + (bit >> 3);
- if (*byte == 0xff)
- continue;
- b = ffz((unsigned long)*byte);
- if (b < 8 && b >= (bit & 7)) {
- ll = data_pos + (bit & ~7ull) + b;
- if (unlikely(ll > (1ll << 32))) {
- ntfs_unmap_page(page);
- return -ENOSPC;
- }
- *byte |= 1 << b;
- flush_dcache_page(page);
- set_page_dirty(page);
- ntfs_unmap_page(page);
- ntfs_debug("Done. (Found and "
- "allocated mft record "
- "0x%llx.)",
- (long long)ll);
- return ll;
- }
- }
- ntfs_debug("After inner for loop: size 0x%x, "
- "data_pos 0x%llx, bit 0x%llx", size,
- (long long)data_pos, (long long)bit);
- data_pos += size;
- ntfs_unmap_page(page);
- /*
- * If the end of the pass has not been reached yet,
- * continue searching the mft bitmap for a zero bit.
- */
- if (data_pos < pass_end)
- continue;
- }
- /* Do the next pass. */
- if (++pass == 2) {
- /*
- * Starting the second pass, in which we scan the first
- * part of the zone which we omitted earlier.
- */
- pass_end = pass_start;
- data_pos = pass_start = 24;
- ntfs_debug("pass %i, pass_start 0x%llx, pass_end "
- "0x%llx.", pass, (long long)pass_start,
- (long long)pass_end);
- if (data_pos >= pass_end)
- break;
- }
- }
- /* No free mft records in currently initialized mft bitmap. */
- ntfs_debug("Done. (No free mft records left in currently initialized "
- "mft bitmap.)");
- return -ENOSPC;
-}
-
-/**
- * ntfs_mft_bitmap_extend_allocation_nolock - extend mft bitmap by a cluster
- * @vol: volume on which to extend the mft bitmap attribute
- *
- * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster.
- *
- * Note: Only changes allocated_size, i.e. does not touch initialized_size or
- * data_size.
- *
- * Return 0 on success and -errno on error.
- *
- * Locking: - Caller must hold vol->mftbmp_lock for writing.
- * - This function takes NTFS_I(vol->mftbmp_ino)->runlist.lock for
- * writing and releases it before returning.
- * - This function takes vol->lcnbmp_lock for writing and releases it
- * before returning.
- */
-static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
-{
- LCN lcn;
- s64 ll;
- unsigned long flags;
- struct page *page;
- ntfs_inode *mft_ni, *mftbmp_ni;
- runlist_element *rl, *rl2 = NULL;
- ntfs_attr_search_ctx *ctx = NULL;
- MFT_RECORD *mrec;
- ATTR_RECORD *a = NULL;
- int ret, mp_size;
- u32 old_alen = 0;
- u8 *b, tb;
- struct {
- u8 added_cluster:1;
- u8 added_run:1;
- u8 mp_rebuilt:1;
- } status = { 0, 0, 0 };
-
- ntfs_debug("Extending mft bitmap allocation.");
- mft_ni = NTFS_I(vol->mft_ino);
- mftbmp_ni = NTFS_I(vol->mftbmp_ino);
- /*
- * Determine the last lcn of the mft bitmap. The allocated size of the
- * mft bitmap cannot be zero so we are ok to do this.
- */
- down_write(&mftbmp_ni->runlist.lock);
- read_lock_irqsave(&mftbmp_ni->size_lock, flags);
- ll = mftbmp_ni->allocated_size;
- read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
- rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
- (ll - 1) >> vol->cluster_size_bits, NULL);
- if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) {
- up_write(&mftbmp_ni->runlist.lock);
- ntfs_error(vol->sb, "Failed to determine last allocated "
- "cluster of mft bitmap attribute.");
- if (!IS_ERR(rl))
- ret = -EIO;
- else
- ret = PTR_ERR(rl);
- return ret;
- }
- lcn = rl->lcn + rl->length;
- ntfs_debug("Last lcn of mft bitmap attribute is 0x%llx.",
- (long long)lcn);
- /*
- * Attempt to get the cluster following the last allocated cluster by
- * hand as it may be in the MFT zone so the allocator would not give it
- * to us.
- */
- ll = lcn >> 3;
- page = ntfs_map_page(vol->lcnbmp_ino->i_mapping,
- ll >> PAGE_SHIFT);
- if (IS_ERR(page)) {
- up_write(&mftbmp_ni->runlist.lock);
- ntfs_error(vol->sb, "Failed to read from lcn bitmap.");
- return PTR_ERR(page);
- }
- b = (u8*)page_address(page) + (ll & ~PAGE_MASK);
- tb = 1 << (lcn & 7ull);
- down_write(&vol->lcnbmp_lock);
- if (*b != 0xff && !(*b & tb)) {
- /* Next cluster is free, allocate it. */
- *b |= tb;
- flush_dcache_page(page);
- set_page_dirty(page);
- up_write(&vol->lcnbmp_lock);
- ntfs_unmap_page(page);
- /* Update the mft bitmap runlist. */
- rl->length++;
- rl[1].vcn++;
- status.added_cluster = 1;
- ntfs_debug("Appending one cluster to mft bitmap.");
- } else {
- up_write(&vol->lcnbmp_lock);
- ntfs_unmap_page(page);
- /* Allocate a cluster from the DATA_ZONE. */
- rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE,
- true);
- if (IS_ERR(rl2)) {
- up_write(&mftbmp_ni->runlist.lock);
- ntfs_error(vol->sb, "Failed to allocate a cluster for "
- "the mft bitmap.");
- return PTR_ERR(rl2);
- }
- rl = ntfs_runlists_merge(mftbmp_ni->runlist.rl, rl2);
- if (IS_ERR(rl)) {
- up_write(&mftbmp_ni->runlist.lock);
- ntfs_error(vol->sb, "Failed to merge runlists for mft "
- "bitmap.");
- if (ntfs_cluster_free_from_rl(vol, rl2)) {
- ntfs_error(vol->sb, "Failed to deallocate "
- "allocated cluster.%s", es);
- NVolSetErrors(vol);
- }
- ntfs_free(rl2);
- return PTR_ERR(rl);
- }
- mftbmp_ni->runlist.rl = rl;
- status.added_run = 1;
- ntfs_debug("Adding one run to mft bitmap.");
- /* Find the last run in the new runlist. */
- for (; rl[1].length; rl++)
- ;
- }
- /*
- * Update the attribute record as well. Note: @rl is the last
- * (non-terminator) runlist element of mft bitmap.
- */
- mrec = map_mft_record(mft_ni);
- if (IS_ERR(mrec)) {
- ntfs_error(vol->sb, "Failed to map mft record.");
- ret = PTR_ERR(mrec);
- goto undo_alloc;
- }
- ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
- if (unlikely(!ctx)) {
- ntfs_error(vol->sb, "Failed to get search context.");
- ret = -ENOMEM;
- goto undo_alloc;
- }
- ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
- mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL,
- 0, ctx);
- if (unlikely(ret)) {
- ntfs_error(vol->sb, "Failed to find last attribute extent of "
- "mft bitmap attribute.");
- if (ret == -ENOENT)
- ret = -EIO;
- goto undo_alloc;
- }
- a = ctx->attr;
- ll = sle64_to_cpu(a->data.non_resident.lowest_vcn);
- /* Search back for the previous last allocated cluster of mft bitmap. */
- for (rl2 = rl; rl2 > mftbmp_ni->runlist.rl; rl2--) {
- if (ll >= rl2->vcn)
- break;
- }
- BUG_ON(ll < rl2->vcn);
- BUG_ON(ll >= rl2->vcn + rl2->length);
- /* Get the size for the new mapping pairs array for this extent. */
- mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
- if (unlikely(mp_size <= 0)) {
- ntfs_error(vol->sb, "Get size for mapping pairs failed for "
- "mft bitmap attribute extent.");
- ret = mp_size;
- if (!ret)
- ret = -EIO;
- goto undo_alloc;
- }
- /* Expand the attribute record if necessary. */
- old_alen = le32_to_cpu(a->length);
- ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size +
- le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
- if (unlikely(ret)) {
- if (ret != -ENOSPC) {
- ntfs_error(vol->sb, "Failed to resize attribute "
- "record for mft bitmap attribute.");
- goto undo_alloc;
- }
- // TODO: Deal with this by moving this extent to a new mft
- // record or by starting a new extent in a new mft record or by
- // moving other attributes out of this mft record.
- // Note: It will need to be a special mft record and if none of
- // those are available it gets rather complicated...
- ntfs_error(vol->sb, "Not enough space in this mft record to "
- "accommodate extended mft bitmap attribute "
- "extent. Cannot handle this yet.");
- ret = -EOPNOTSUPP;
- goto undo_alloc;
- }
- status.mp_rebuilt = 1;
- /* Generate the mapping pairs array directly into the attr record. */
- ret = ntfs_mapping_pairs_build(vol, (u8*)a +
- le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
- mp_size, rl2, ll, -1, NULL);
- if (unlikely(ret)) {
- ntfs_error(vol->sb, "Failed to build mapping pairs array for "
- "mft bitmap attribute.");
- goto undo_alloc;
- }
- /* Update the highest_vcn. */
- a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 1);
- /*
- * We now have extended the mft bitmap allocated_size by one cluster.
- * Reflect this in the ntfs_inode structure and the attribute record.
- */
- if (a->data.non_resident.lowest_vcn) {
- /*
- * We are not in the first attribute extent, switch to it, but
- * first ensure the changes will make it to disk later.
- */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_reinit_search_ctx(ctx);
- ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
- mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL,
- 0, ctx);
- if (unlikely(ret)) {
- ntfs_error(vol->sb, "Failed to find first attribute "
- "extent of mft bitmap attribute.");
- goto restore_undo_alloc;
- }
- a = ctx->attr;
- }
- write_lock_irqsave(&mftbmp_ni->size_lock, flags);
- mftbmp_ni->allocated_size += vol->cluster_size;
- a->data.non_resident.allocated_size =
- cpu_to_sle64(mftbmp_ni->allocated_size);
- write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
- /* Ensure the changes make it to disk. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(mft_ni);
- up_write(&mftbmp_ni->runlist.lock);
- ntfs_debug("Done.");
- return 0;
-restore_undo_alloc:
- ntfs_attr_reinit_search_ctx(ctx);
- if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
- mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL,
- 0, ctx)) {
- ntfs_error(vol->sb, "Failed to find last attribute extent of "
- "mft bitmap attribute.%s", es);
- write_lock_irqsave(&mftbmp_ni->size_lock, flags);
- mftbmp_ni->allocated_size += vol->cluster_size;
- write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(mft_ni);
- up_write(&mftbmp_ni->runlist.lock);
- /*
- * The only thing that is now wrong is ->allocated_size of the
- * base attribute extent which chkdsk should be able to fix.
- */
- NVolSetErrors(vol);
- return ret;
- }
- a = ctx->attr;
- a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 2);
-undo_alloc:
- if (status.added_cluster) {
- /* Truncate the last run in the runlist by one cluster. */
- rl->length--;
- rl[1].vcn--;
- } else if (status.added_run) {
- lcn = rl->lcn;
- /* Remove the last run from the runlist. */
- rl->lcn = rl[1].lcn;
- rl->length = 0;
- }
- /* Deallocate the cluster. */
- down_write(&vol->lcnbmp_lock);
- if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) {
- ntfs_error(vol->sb, "Failed to free allocated cluster.%s", es);
- NVolSetErrors(vol);
- }
- up_write(&vol->lcnbmp_lock);
- if (status.mp_rebuilt) {
- if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
- a->data.non_resident.mapping_pairs_offset),
- old_alen - le16_to_cpu(
- a->data.non_resident.mapping_pairs_offset),
- rl2, ll, -1, NULL)) {
- ntfs_error(vol->sb, "Failed to restore mapping pairs "
- "array.%s", es);
- NVolSetErrors(vol);
- }
- if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) {
- ntfs_error(vol->sb, "Failed to restore attribute "
- "record.%s", es);
- NVolSetErrors(vol);
- }
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- }
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (!IS_ERR(mrec))
- unmap_mft_record(mft_ni);
- up_write(&mftbmp_ni->runlist.lock);
- return ret;
-}
-
-/**
- * ntfs_mft_bitmap_extend_initialized_nolock - extend mftbmp initialized data
- * @vol: volume on which to extend the mft bitmap attribute
- *
- * Extend the initialized portion of the mft bitmap attribute on the ntfs
- * volume @vol by 8 bytes.
- *
- * Note: Only changes initialized_size and data_size, i.e. requires that
- * allocated_size is big enough to fit the new initialized_size.
- *
- * Return 0 on success and -error on error.
- *
- * Locking: Caller must hold vol->mftbmp_lock for writing.
- */
-static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
-{
- s64 old_data_size, old_initialized_size;
- unsigned long flags;
- struct inode *mftbmp_vi;
- ntfs_inode *mft_ni, *mftbmp_ni;
- ntfs_attr_search_ctx *ctx;
- MFT_RECORD *mrec;
- ATTR_RECORD *a;
- int ret;
-
- ntfs_debug("Extending mft bitmap initiailized (and data) size.");
- mft_ni = NTFS_I(vol->mft_ino);
- mftbmp_vi = vol->mftbmp_ino;
- mftbmp_ni = NTFS_I(mftbmp_vi);
- /* Get the attribute record. */
- mrec = map_mft_record(mft_ni);
- if (IS_ERR(mrec)) {
- ntfs_error(vol->sb, "Failed to map mft record.");
- return PTR_ERR(mrec);
- }
- ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
- if (unlikely(!ctx)) {
- ntfs_error(vol->sb, "Failed to get search context.");
- ret = -ENOMEM;
- goto unm_err_out;
- }
- ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
- mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(ret)) {
- ntfs_error(vol->sb, "Failed to find first attribute extent of "
- "mft bitmap attribute.");
- if (ret == -ENOENT)
- ret = -EIO;
- goto put_err_out;
- }
- a = ctx->attr;
- write_lock_irqsave(&mftbmp_ni->size_lock, flags);
- old_data_size = i_size_read(mftbmp_vi);
- old_initialized_size = mftbmp_ni->initialized_size;
- /*
- * We can simply update the initialized_size before filling the space
- * with zeroes because the caller is holding the mft bitmap lock for
- * writing which ensures that no one else is trying to access the data.
- */
- mftbmp_ni->initialized_size += 8;
- a->data.non_resident.initialized_size =
- cpu_to_sle64(mftbmp_ni->initialized_size);
- if (mftbmp_ni->initialized_size > old_data_size) {
- i_size_write(mftbmp_vi, mftbmp_ni->initialized_size);
- a->data.non_resident.data_size =
- cpu_to_sle64(mftbmp_ni->initialized_size);
- }
- write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
- /* Ensure the changes make it to disk. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(mft_ni);
- /* Initialize the mft bitmap attribute value with zeroes. */
- ret = ntfs_attr_set(mftbmp_ni, old_initialized_size, 8, 0);
- if (likely(!ret)) {
- ntfs_debug("Done. (Wrote eight initialized bytes to mft "
- "bitmap.");
- return 0;
- }
- ntfs_error(vol->sb, "Failed to write to mft bitmap.");
- /* Try to recover from the error. */
- mrec = map_mft_record(mft_ni);
- if (IS_ERR(mrec)) {
- ntfs_error(vol->sb, "Failed to map mft record.%s", es);
- NVolSetErrors(vol);
- return ret;
- }
- ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
- if (unlikely(!ctx)) {
- ntfs_error(vol->sb, "Failed to get search context.%s", es);
- NVolSetErrors(vol);
- goto unm_err_out;
- }
- if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
- mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx)) {
- ntfs_error(vol->sb, "Failed to find first attribute extent of "
- "mft bitmap attribute.%s", es);
- NVolSetErrors(vol);
-put_err_out:
- ntfs_attr_put_search_ctx(ctx);
-unm_err_out:
- unmap_mft_record(mft_ni);
- goto err_out;
- }
- a = ctx->attr;
- write_lock_irqsave(&mftbmp_ni->size_lock, flags);
- mftbmp_ni->initialized_size = old_initialized_size;
- a->data.non_resident.initialized_size =
- cpu_to_sle64(old_initialized_size);
- if (i_size_read(mftbmp_vi) != old_data_size) {
- i_size_write(mftbmp_vi, old_data_size);
- a->data.non_resident.data_size = cpu_to_sle64(old_data_size);
- }
- write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(mft_ni);
-#ifdef DEBUG
- read_lock_irqsave(&mftbmp_ni->size_lock, flags);
- ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, "
- "data_size 0x%llx, initialized_size 0x%llx.",
- (long long)mftbmp_ni->allocated_size,
- (long long)i_size_read(mftbmp_vi),
- (long long)mftbmp_ni->initialized_size);
- read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
-#endif /* DEBUG */
-err_out:
- return ret;
-}
-
-/**
- * ntfs_mft_data_extend_allocation_nolock - extend mft data attribute
- * @vol: volume on which to extend the mft data attribute
- *
- * Extend the mft data attribute on the ntfs volume @vol by 16 mft records
- * worth of clusters or if not enough space for this by one mft record worth
- * of clusters.
- *
- * Note: Only changes allocated_size, i.e. does not touch initialized_size or
- * data_size.
- *
- * Return 0 on success and -errno on error.
- *
- * Locking: - Caller must hold vol->mftbmp_lock for writing.
- * - This function takes NTFS_I(vol->mft_ino)->runlist.lock for
- * writing and releases it before returning.
- * - This function calls functions which take vol->lcnbmp_lock for
- * writing and release it before returning.
- */
-static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
-{
- LCN lcn;
- VCN old_last_vcn;
- s64 min_nr, nr, ll;
- unsigned long flags;
- ntfs_inode *mft_ni;
- runlist_element *rl, *rl2;
- ntfs_attr_search_ctx *ctx = NULL;
- MFT_RECORD *mrec;
- ATTR_RECORD *a = NULL;
- int ret, mp_size;
- u32 old_alen = 0;
- bool mp_rebuilt = false;
-
- ntfs_debug("Extending mft data allocation.");
- mft_ni = NTFS_I(vol->mft_ino);
- /*
- * Determine the preferred allocation location, i.e. the last lcn of
- * the mft data attribute. The allocated size of the mft data
- * attribute cannot be zero so we are ok to do this.
- */
- down_write(&mft_ni->runlist.lock);
- read_lock_irqsave(&mft_ni->size_lock, flags);
- ll = mft_ni->allocated_size;
- read_unlock_irqrestore(&mft_ni->size_lock, flags);
- rl = ntfs_attr_find_vcn_nolock(mft_ni,
- (ll - 1) >> vol->cluster_size_bits, NULL);
- if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) {
- up_write(&mft_ni->runlist.lock);
- ntfs_error(vol->sb, "Failed to determine last allocated "
- "cluster of mft data attribute.");
- if (!IS_ERR(rl))
- ret = -EIO;
- else
- ret = PTR_ERR(rl);
- return ret;
- }
- lcn = rl->lcn + rl->length;
- ntfs_debug("Last lcn of mft data attribute is 0x%llx.", (long long)lcn);
- /* Minimum allocation is one mft record worth of clusters. */
- min_nr = vol->mft_record_size >> vol->cluster_size_bits;
- if (!min_nr)
- min_nr = 1;
- /* Want to allocate 16 mft records worth of clusters. */
- nr = vol->mft_record_size << 4 >> vol->cluster_size_bits;
- if (!nr)
- nr = min_nr;
- /* Ensure we do not go above 2^32-1 mft records. */
- read_lock_irqsave(&mft_ni->size_lock, flags);
- ll = mft_ni->allocated_size;
- read_unlock_irqrestore(&mft_ni->size_lock, flags);
- if (unlikely((ll + (nr << vol->cluster_size_bits)) >>
- vol->mft_record_size_bits >= (1ll << 32))) {
- nr = min_nr;
- if (unlikely((ll + (nr << vol->cluster_size_bits)) >>
- vol->mft_record_size_bits >= (1ll << 32))) {
- ntfs_warning(vol->sb, "Cannot allocate mft record "
- "because the maximum number of inodes "
- "(2^32) has already been reached.");
- up_write(&mft_ni->runlist.lock);
- return -ENOSPC;
- }
- }
- ntfs_debug("Trying mft data allocation with %s cluster count %lli.",
- nr > min_nr ? "default" : "minimal", (long long)nr);
- old_last_vcn = rl[1].vcn;
- do {
- rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE,
- true);
- if (!IS_ERR(rl2))
- break;
- if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) {
- ntfs_error(vol->sb, "Failed to allocate the minimal "
- "number of clusters (%lli) for the "
- "mft data attribute.", (long long)nr);
- up_write(&mft_ni->runlist.lock);
- return PTR_ERR(rl2);
- }
- /*
- * There is not enough space to do the allocation, but there
- * might be enough space to do a minimal allocation so try that
- * before failing.
- */
- nr = min_nr;
- ntfs_debug("Retrying mft data allocation with minimal cluster "
- "count %lli.", (long long)nr);
- } while (1);
- rl = ntfs_runlists_merge(mft_ni->runlist.rl, rl2);
- if (IS_ERR(rl)) {
- up_write(&mft_ni->runlist.lock);
- ntfs_error(vol->sb, "Failed to merge runlists for mft data "
- "attribute.");
- if (ntfs_cluster_free_from_rl(vol, rl2)) {
- ntfs_error(vol->sb, "Failed to deallocate clusters "
- "from the mft data attribute.%s", es);
- NVolSetErrors(vol);
- }
- ntfs_free(rl2);
- return PTR_ERR(rl);
- }
- mft_ni->runlist.rl = rl;
- ntfs_debug("Allocated %lli clusters.", (long long)nr);
- /* Find the last run in the new runlist. */
- for (; rl[1].length; rl++)
- ;
- /* Update the attribute record as well. */
- mrec = map_mft_record(mft_ni);
- if (IS_ERR(mrec)) {
- ntfs_error(vol->sb, "Failed to map mft record.");
- ret = PTR_ERR(mrec);
- goto undo_alloc;
- }
- ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
- if (unlikely(!ctx)) {
- ntfs_error(vol->sb, "Failed to get search context.");
- ret = -ENOMEM;
- goto undo_alloc;
- }
- ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len,
- CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx);
- if (unlikely(ret)) {
- ntfs_error(vol->sb, "Failed to find last attribute extent of "
- "mft data attribute.");
- if (ret == -ENOENT)
- ret = -EIO;
- goto undo_alloc;
- }
- a = ctx->attr;
- ll = sle64_to_cpu(a->data.non_resident.lowest_vcn);
- /* Search back for the previous last allocated cluster of mft bitmap. */
- for (rl2 = rl; rl2 > mft_ni->runlist.rl; rl2--) {
- if (ll >= rl2->vcn)
- break;
- }
- BUG_ON(ll < rl2->vcn);
- BUG_ON(ll >= rl2->vcn + rl2->length);
- /* Get the size for the new mapping pairs array for this extent. */
- mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
- if (unlikely(mp_size <= 0)) {
- ntfs_error(vol->sb, "Get size for mapping pairs failed for "
- "mft data attribute extent.");
- ret = mp_size;
- if (!ret)
- ret = -EIO;
- goto undo_alloc;
- }
- /* Expand the attribute record if necessary. */
- old_alen = le32_to_cpu(a->length);
- ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size +
- le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
- if (unlikely(ret)) {
- if (ret != -ENOSPC) {
- ntfs_error(vol->sb, "Failed to resize attribute "
- "record for mft data attribute.");
- goto undo_alloc;
- }
- // TODO: Deal with this by moving this extent to a new mft
- // record or by starting a new extent in a new mft record or by
- // moving other attributes out of this mft record.
- // Note: Use the special reserved mft records and ensure that
- // this extent is not required to find the mft record in
- // question. If no free special records left we would need to
- // move an existing record away, insert ours in its place, and
- // then place the moved record into the newly allocated space
- // and we would then need to update all references to this mft
- // record appropriately. This is rather complicated...
- ntfs_error(vol->sb, "Not enough space in this mft record to "
- "accommodate extended mft data attribute "
- "extent. Cannot handle this yet.");
- ret = -EOPNOTSUPP;
- goto undo_alloc;
- }
- mp_rebuilt = true;
- /* Generate the mapping pairs array directly into the attr record. */
- ret = ntfs_mapping_pairs_build(vol, (u8*)a +
- le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
- mp_size, rl2, ll, -1, NULL);
- if (unlikely(ret)) {
- ntfs_error(vol->sb, "Failed to build mapping pairs array of "
- "mft data attribute.");
- goto undo_alloc;
- }
- /* Update the highest_vcn. */
- a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 1);
- /*
- * We now have extended the mft data allocated_size by nr clusters.
- * Reflect this in the ntfs_inode structure and the attribute record.
- * @rl is the last (non-terminator) runlist element of mft data
- * attribute.
- */
- if (a->data.non_resident.lowest_vcn) {
- /*
- * We are not in the first attribute extent, switch to it, but
- * first ensure the changes will make it to disk later.
- */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_reinit_search_ctx(ctx);
- ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name,
- mft_ni->name_len, CASE_SENSITIVE, 0, NULL, 0,
- ctx);
- if (unlikely(ret)) {
- ntfs_error(vol->sb, "Failed to find first attribute "
- "extent of mft data attribute.");
- goto restore_undo_alloc;
- }
- a = ctx->attr;
- }
- write_lock_irqsave(&mft_ni->size_lock, flags);
- mft_ni->allocated_size += nr << vol->cluster_size_bits;
- a->data.non_resident.allocated_size =
- cpu_to_sle64(mft_ni->allocated_size);
- write_unlock_irqrestore(&mft_ni->size_lock, flags);
- /* Ensure the changes make it to disk. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(mft_ni);
- up_write(&mft_ni->runlist.lock);
- ntfs_debug("Done.");
- return 0;
-restore_undo_alloc:
- ntfs_attr_reinit_search_ctx(ctx);
- if (ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len,
- CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) {
- ntfs_error(vol->sb, "Failed to find last attribute extent of "
- "mft data attribute.%s", es);
- write_lock_irqsave(&mft_ni->size_lock, flags);
- mft_ni->allocated_size += nr << vol->cluster_size_bits;
- write_unlock_irqrestore(&mft_ni->size_lock, flags);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(mft_ni);
- up_write(&mft_ni->runlist.lock);
- /*
- * The only thing that is now wrong is ->allocated_size of the
- * base attribute extent which chkdsk should be able to fix.
- */
- NVolSetErrors(vol);
- return ret;
- }
- ctx->attr->data.non_resident.highest_vcn =
- cpu_to_sle64(old_last_vcn - 1);
-undo_alloc:
- if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) {
- ntfs_error(vol->sb, "Failed to free clusters from mft data "
- "attribute.%s", es);
- NVolSetErrors(vol);
- }
-
- if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) {
- ntfs_error(vol->sb, "Failed to truncate mft data attribute "
- "runlist.%s", es);
- NVolSetErrors(vol);
- }
- if (ctx) {
- a = ctx->attr;
- if (mp_rebuilt && !IS_ERR(ctx->mrec)) {
- if (ntfs_mapping_pairs_build(vol, (u8 *)a + le16_to_cpu(
- a->data.non_resident.mapping_pairs_offset),
- old_alen - le16_to_cpu(
- a->data.non_resident.mapping_pairs_offset),
- rl2, ll, -1, NULL)) {
- ntfs_error(vol->sb, "Failed to restore mapping pairs "
- "array.%s", es);
- NVolSetErrors(vol);
- }
- if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) {
- ntfs_error(vol->sb, "Failed to restore attribute "
- "record.%s", es);
- NVolSetErrors(vol);
- }
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- } else if (IS_ERR(ctx->mrec)) {
- ntfs_error(vol->sb, "Failed to restore attribute search "
- "context.%s", es);
- NVolSetErrors(vol);
- }
- ntfs_attr_put_search_ctx(ctx);
- }
- if (!IS_ERR(mrec))
- unmap_mft_record(mft_ni);
- up_write(&mft_ni->runlist.lock);
- return ret;
-}
-
-/**
- * ntfs_mft_record_layout - layout an mft record into a memory buffer
- * @vol: volume to which the mft record will belong
- * @mft_no: mft reference specifying the mft record number
- * @m: destination buffer of size >= @vol->mft_record_size bytes
- *
- * Layout an empty, unused mft record with the mft record number @mft_no into
- * the buffer @m. The volume @vol is needed because the mft record structure
- * was modified in NTFS 3.1 so we need to know which volume version this mft
- * record will be used on.
- *
- * Return 0 on success and -errno on error.
- */
-static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no,
- MFT_RECORD *m)
-{
- ATTR_RECORD *a;
-
- ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no);
- if (mft_no >= (1ll << 32)) {
- ntfs_error(vol->sb, "Mft record number 0x%llx exceeds "
- "maximum of 2^32.", (long long)mft_no);
- return -ERANGE;
- }
- /* Start by clearing the whole mft record to gives us a clean slate. */
- memset(m, 0, vol->mft_record_size);
- /* Aligned to 2-byte boundary. */
- if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver))
- m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1);
- else {
- m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1);
- /*
- * Set the NTFS 3.1+ specific fields while we know that the
- * volume version is 3.1+.
- */
- m->reserved = 0;
- m->mft_record_number = cpu_to_le32((u32)mft_no);
- }
- m->magic = magic_FILE;
- if (vol->mft_record_size >= NTFS_BLOCK_SIZE)
- m->usa_count = cpu_to_le16(vol->mft_record_size /
- NTFS_BLOCK_SIZE + 1);
- else {
- m->usa_count = cpu_to_le16(1);
- ntfs_warning(vol->sb, "Sector size is bigger than mft record "
- "size. Setting usa_count to 1. If chkdsk "
- "reports this as corruption, please email "
- "linux-ntfs-dev@lists.sourceforge.net stating "
- "that you saw this message and that the "
- "modified filesystem created was corrupt. "
- "Thank you.");
- }
- /* Set the update sequence number to 1. */
- *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1);
- m->lsn = 0;
- m->sequence_number = cpu_to_le16(1);
- m->link_count = 0;
- /*
- * Place the attributes straight after the update sequence array,
- * aligned to 8-byte boundary.
- */
- m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) +
- (le16_to_cpu(m->usa_count) << 1) + 7) & ~7);
- m->flags = 0;
- /*
- * Using attrs_offset plus eight bytes (for the termination attribute).
- * attrs_offset is already aligned to 8-byte boundary, so no need to
- * align again.
- */
- m->bytes_in_use = cpu_to_le32(le16_to_cpu(m->attrs_offset) + 8);
- m->bytes_allocated = cpu_to_le32(vol->mft_record_size);
- m->base_mft_record = 0;
- m->next_attr_instance = 0;
- /* Add the termination attribute. */
- a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset));
- a->type = AT_END;
- a->length = 0;
- ntfs_debug("Done.");
- return 0;
-}
-
-/**
- * ntfs_mft_record_format - format an mft record on an ntfs volume
- * @vol: volume on which to format the mft record
- * @mft_no: mft record number to format
- *
- * Format the mft record @mft_no in $MFT/$DATA, i.e. lay out an empty, unused
- * mft record into the appropriate place of the mft data attribute. This is
- * used when extending the mft data attribute.
- *
- * Return 0 on success and -errno on error.
- */
-static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
-{
- loff_t i_size;
- struct inode *mft_vi = vol->mft_ino;
- struct page *page;
- MFT_RECORD *m;
- pgoff_t index, end_index;
- unsigned int ofs;
- int err;
-
- ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no);
- /*
- * The index into the page cache and the offset within the page cache
- * page of the wanted mft record.
- */
- index = mft_no << vol->mft_record_size_bits >> PAGE_SHIFT;
- ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
- /* The maximum valid index into the page cache for $MFT's data. */
- i_size = i_size_read(mft_vi);
- end_index = i_size >> PAGE_SHIFT;
- if (unlikely(index >= end_index)) {
- if (unlikely(index > end_index || ofs + vol->mft_record_size >=
- (i_size & ~PAGE_MASK))) {
- ntfs_error(vol->sb, "Tried to format non-existing mft "
- "record 0x%llx.", (long long)mft_no);
- return -ENOENT;
- }
- }
- /* Read, map, and pin the page containing the mft record. */
- page = ntfs_map_page(mft_vi->i_mapping, index);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Failed to map page containing mft record "
- "to format 0x%llx.", (long long)mft_no);
- return PTR_ERR(page);
- }
- lock_page(page);
- BUG_ON(!PageUptodate(page));
- ClearPageUptodate(page);
- m = (MFT_RECORD*)((u8*)page_address(page) + ofs);
- err = ntfs_mft_record_layout(vol, mft_no, m);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Failed to layout mft record 0x%llx.",
- (long long)mft_no);
- SetPageUptodate(page);
- unlock_page(page);
- ntfs_unmap_page(page);
- return err;
- }
- flush_dcache_page(page);
- SetPageUptodate(page);
- unlock_page(page);
- /*
- * Make sure the mft record is written out to disk. We could use
- * ilookup5() to check if an inode is in icache and so on but this is
- * unnecessary as ntfs_writepage() will write the dirty record anyway.
- */
- mark_ntfs_record_dirty(page, ofs);
- ntfs_unmap_page(page);
- ntfs_debug("Done.");
- return 0;
-}
-
-/**
- * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume
- * @vol: [IN] volume on which to allocate the mft record
- * @mode: [IN] mode if want a file or directory, i.e. base inode or 0
- * @base_ni: [IN] open base inode if allocating an extent mft record or NULL
- * @mrec: [OUT] on successful return this is the mapped mft record
- *
- * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol.
- *
- * If @base_ni is NULL make the mft record a base mft record, i.e. a file or
- * direvctory inode, and allocate it at the default allocator position. In
- * this case @mode is the file mode as given to us by the caller. We in
- * particular use @mode to distinguish whether a file or a directory is being
- * created (S_IFDIR(mode) and S_IFREG(mode), respectively).
- *
- * If @base_ni is not NULL make the allocated mft record an extent record,
- * allocate it starting at the mft record after the base mft record and attach
- * the allocated and opened ntfs inode to the base inode @base_ni. In this
- * case @mode must be 0 as it is meaningless for extent inodes.
- *
- * You need to check the return value with IS_ERR(). If false, the function
- * was successful and the return value is the now opened ntfs inode of the
- * allocated mft record. *@mrec is then set to the allocated, mapped, pinned,
- * and locked mft record. If IS_ERR() is true, the function failed and the
- * error code is obtained from PTR_ERR(return value). *@mrec is undefined in
- * this case.
- *
- * Allocation strategy:
- *
- * To find a free mft record, we scan the mft bitmap for a zero bit. To
- * optimize this we start scanning at the place specified by @base_ni or if
- * @base_ni is NULL we start where we last stopped and we perform wrap around
- * when we reach the end. Note, we do not try to allocate mft records below
- * number 24 because numbers 0 to 15 are the defined system files anyway and 16
- * to 24 are special in that they are used for storing extension mft records
- * for the $DATA attribute of $MFT. This is required to avoid the possibility
- * of creating a runlist with a circular dependency which once written to disk
- * can never be read in again. Windows will only use records 16 to 24 for
- * normal files if the volume is completely out of space. We never use them
- * which means that when the volume is really out of space we cannot create any
- * more files while Windows can still create up to 8 small files. We can start
- * doing this at some later time, it does not matter much for now.
- *
- * When scanning the mft bitmap, we only search up to the last allocated mft
- * record. If there are no free records left in the range 24 to number of
- * allocated mft records, then we extend the $MFT/$DATA attribute in order to
- * create free mft records. We extend the allocated size of $MFT/$DATA by 16
- * records at a time or one cluster, if cluster size is above 16kiB. If there
- * is not sufficient space to do this, we try to extend by a single mft record
- * or one cluster, if cluster size is above the mft record size.
- *
- * No matter how many mft records we allocate, we initialize only the first
- * allocated mft record, incrementing mft data size and initialized size
- * accordingly, open an ntfs_inode for it and return it to the caller, unless
- * there are less than 24 mft records, in which case we allocate and initialize
- * mft records until we reach record 24 which we consider as the first free mft
- * record for use by normal files.
- *
- * If during any stage we overflow the initialized data in the mft bitmap, we
- * extend the initialized size (and data size) by 8 bytes, allocating another
- * cluster if required. The bitmap data size has to be at least equal to the
- * number of mft records in the mft, but it can be bigger, in which case the
- * superflous bits are padded with zeroes.
- *
- * Thus, when we return successfully (IS_ERR() is false), we will have:
- * - initialized / extended the mft bitmap if necessary,
- * - initialized / extended the mft data if necessary,
- * - set the bit corresponding to the mft record being allocated in the
- * mft bitmap,
- * - opened an ntfs_inode for the allocated mft record, and we will have
- * - returned the ntfs_inode as well as the allocated mapped, pinned, and
- * locked mft record.
- *
- * On error, the volume will be left in a consistent state and no record will
- * be allocated. If rolling back a partial operation fails, we may leave some
- * inconsistent metadata in which case we set NVolErrors() so the volume is
- * left dirty when unmounted.
- *
- * Note, this function cannot make use of most of the normal functions, like
- * for example for attribute resizing, etc, because when the run list overflows
- * the base mft record and an attribute list is used, it is very important that
- * the extension mft records used to store the $DATA attribute of $MFT can be
- * reached without having to read the information contained inside them, as
- * this would make it impossible to find them in the first place after the
- * volume is unmounted. $MFT/$BITMAP probably does not need to follow this
- * rule because the bitmap is not essential for finding the mft records, but on
- * the other hand, handling the bitmap in this special way would make life
- * easier because otherwise there might be circular invocations of functions
- * when reading the bitmap.
- */
-ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
- ntfs_inode *base_ni, MFT_RECORD **mrec)
-{
- s64 ll, bit, old_data_initialized, old_data_size;
- unsigned long flags;
- struct inode *vi;
- struct page *page;
- ntfs_inode *mft_ni, *mftbmp_ni, *ni;
- ntfs_attr_search_ctx *ctx;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- pgoff_t index;
- unsigned int ofs;
- int err;
- le16 seq_no, usn;
- bool record_formatted = false;
-
- if (base_ni) {
- ntfs_debug("Entering (allocating an extent mft record for "
- "base mft record 0x%llx).",
- (long long)base_ni->mft_no);
- /* @mode and @base_ni are mutually exclusive. */
- BUG_ON(mode);
- } else
- ntfs_debug("Entering (allocating a base mft record).");
- if (mode) {
- /* @mode and @base_ni are mutually exclusive. */
- BUG_ON(base_ni);
- /* We only support creation of normal files and directories. */
- if (!S_ISREG(mode) && !S_ISDIR(mode))
- return ERR_PTR(-EOPNOTSUPP);
- }
- BUG_ON(!mrec);
- mft_ni = NTFS_I(vol->mft_ino);
- mftbmp_ni = NTFS_I(vol->mftbmp_ino);
- down_write(&vol->mftbmp_lock);
- bit = ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(vol, base_ni);
- if (bit >= 0) {
- ntfs_debug("Found and allocated free record (#1), bit 0x%llx.",
- (long long)bit);
- goto have_alloc_rec;
- }
- if (bit != -ENOSPC) {
- up_write(&vol->mftbmp_lock);
- return ERR_PTR(bit);
- }
- /*
- * No free mft records left. If the mft bitmap already covers more
- * than the currently used mft records, the next records are all free,
- * so we can simply allocate the first unused mft record.
- * Note: We also have to make sure that the mft bitmap at least covers
- * the first 24 mft records as they are special and whilst they may not
- * be in use, we do not allocate from them.
- */
- read_lock_irqsave(&mft_ni->size_lock, flags);
- ll = mft_ni->initialized_size >> vol->mft_record_size_bits;
- read_unlock_irqrestore(&mft_ni->size_lock, flags);
- read_lock_irqsave(&mftbmp_ni->size_lock, flags);
- old_data_initialized = mftbmp_ni->initialized_size;
- read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
- if (old_data_initialized << 3 > ll && old_data_initialized > 3) {
- bit = ll;
- if (bit < 24)
- bit = 24;
- if (unlikely(bit >= (1ll << 32)))
- goto max_err_out;
- ntfs_debug("Found free record (#2), bit 0x%llx.",
- (long long)bit);
- goto found_free_rec;
- }
- /*
- * The mft bitmap needs to be expanded until it covers the first unused
- * mft record that we can allocate.
- * Note: The smallest mft record we allocate is mft record 24.
- */
- bit = old_data_initialized << 3;
- if (unlikely(bit >= (1ll << 32)))
- goto max_err_out;
- read_lock_irqsave(&mftbmp_ni->size_lock, flags);
- old_data_size = mftbmp_ni->allocated_size;
- ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, "
- "data_size 0x%llx, initialized_size 0x%llx.",
- (long long)old_data_size,
- (long long)i_size_read(vol->mftbmp_ino),
- (long long)old_data_initialized);
- read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
- if (old_data_initialized + 8 > old_data_size) {
- /* Need to extend bitmap by one more cluster. */
- ntfs_debug("mftbmp: initialized_size + 8 > allocated_size.");
- err = ntfs_mft_bitmap_extend_allocation_nolock(vol);
- if (unlikely(err)) {
- up_write(&vol->mftbmp_lock);
- goto err_out;
- }
-#ifdef DEBUG
- read_lock_irqsave(&mftbmp_ni->size_lock, flags);
- ntfs_debug("Status of mftbmp after allocation extension: "
- "allocated_size 0x%llx, data_size 0x%llx, "
- "initialized_size 0x%llx.",
- (long long)mftbmp_ni->allocated_size,
- (long long)i_size_read(vol->mftbmp_ino),
- (long long)mftbmp_ni->initialized_size);
- read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
-#endif /* DEBUG */
- }
- /*
- * We now have sufficient allocated space, extend the initialized_size
- * as well as the data_size if necessary and fill the new space with
- * zeroes.
- */
- err = ntfs_mft_bitmap_extend_initialized_nolock(vol);
- if (unlikely(err)) {
- up_write(&vol->mftbmp_lock);
- goto err_out;
- }
-#ifdef DEBUG
- read_lock_irqsave(&mftbmp_ni->size_lock, flags);
- ntfs_debug("Status of mftbmp after initialized extension: "
- "allocated_size 0x%llx, data_size 0x%llx, "
- "initialized_size 0x%llx.",
- (long long)mftbmp_ni->allocated_size,
- (long long)i_size_read(vol->mftbmp_ino),
- (long long)mftbmp_ni->initialized_size);
- read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
-#endif /* DEBUG */
- ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit);
-found_free_rec:
- /* @bit is the found free mft record, allocate it in the mft bitmap. */
- ntfs_debug("At found_free_rec.");
- err = ntfs_bitmap_set_bit(vol->mftbmp_ino, bit);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Failed to allocate bit in mft bitmap.");
- up_write(&vol->mftbmp_lock);
- goto err_out;
- }
- ntfs_debug("Set bit 0x%llx in mft bitmap.", (long long)bit);
-have_alloc_rec:
- /*
- * The mft bitmap is now uptodate. Deal with mft data attribute now.
- * Note, we keep hold of the mft bitmap lock for writing until all
- * modifications to the mft data attribute are complete, too, as they
- * will impact decisions for mft bitmap and mft record allocation done
- * by a parallel allocation and if the lock is not maintained a
- * parallel allocation could allocate the same mft record as this one.
- */
- ll = (bit + 1) << vol->mft_record_size_bits;
- read_lock_irqsave(&mft_ni->size_lock, flags);
- old_data_initialized = mft_ni->initialized_size;
- read_unlock_irqrestore(&mft_ni->size_lock, flags);
- if (ll <= old_data_initialized) {
- ntfs_debug("Allocated mft record already initialized.");
- goto mft_rec_already_initialized;
- }
- ntfs_debug("Initializing allocated mft record.");
- /*
- * The mft record is outside the initialized data. Extend the mft data
- * attribute until it covers the allocated record. The loop is only
- * actually traversed more than once when a freshly formatted volume is
- * first written to so it optimizes away nicely in the common case.
- */
- read_lock_irqsave(&mft_ni->size_lock, flags);
- ntfs_debug("Status of mft data before extension: "
- "allocated_size 0x%llx, data_size 0x%llx, "
- "initialized_size 0x%llx.",
- (long long)mft_ni->allocated_size,
- (long long)i_size_read(vol->mft_ino),
- (long long)mft_ni->initialized_size);
- while (ll > mft_ni->allocated_size) {
- read_unlock_irqrestore(&mft_ni->size_lock, flags);
- err = ntfs_mft_data_extend_allocation_nolock(vol);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Failed to extend mft data "
- "allocation.");
- goto undo_mftbmp_alloc_nolock;
- }
- read_lock_irqsave(&mft_ni->size_lock, flags);
- ntfs_debug("Status of mft data after allocation extension: "
- "allocated_size 0x%llx, data_size 0x%llx, "
- "initialized_size 0x%llx.",
- (long long)mft_ni->allocated_size,
- (long long)i_size_read(vol->mft_ino),
- (long long)mft_ni->initialized_size);
- }
- read_unlock_irqrestore(&mft_ni->size_lock, flags);
- /*
- * Extend mft data initialized size (and data size of course) to reach
- * the allocated mft record, formatting the mft records allong the way.
- * Note: We only modify the ntfs_inode structure as that is all that is
- * needed by ntfs_mft_record_format(). We will update the attribute
- * record itself in one fell swoop later on.
- */
- write_lock_irqsave(&mft_ni->size_lock, flags);
- old_data_initialized = mft_ni->initialized_size;
- old_data_size = vol->mft_ino->i_size;
- while (ll > mft_ni->initialized_size) {
- s64 new_initialized_size, mft_no;
-
- new_initialized_size = mft_ni->initialized_size +
- vol->mft_record_size;
- mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits;
- if (new_initialized_size > i_size_read(vol->mft_ino))
- i_size_write(vol->mft_ino, new_initialized_size);
- write_unlock_irqrestore(&mft_ni->size_lock, flags);
- ntfs_debug("Initializing mft record 0x%llx.",
- (long long)mft_no);
- err = ntfs_mft_record_format(vol, mft_no);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Failed to format mft record.");
- goto undo_data_init;
- }
- write_lock_irqsave(&mft_ni->size_lock, flags);
- mft_ni->initialized_size = new_initialized_size;
- }
- write_unlock_irqrestore(&mft_ni->size_lock, flags);
- record_formatted = true;
- /* Update the mft data attribute record to reflect the new sizes. */
- m = map_mft_record(mft_ni);
- if (IS_ERR(m)) {
- ntfs_error(vol->sb, "Failed to map mft record.");
- err = PTR_ERR(m);
- goto undo_data_init;
- }
- ctx = ntfs_attr_get_search_ctx(mft_ni, m);
- if (unlikely(!ctx)) {
- ntfs_error(vol->sb, "Failed to get search context.");
- err = -ENOMEM;
- unmap_mft_record(mft_ni);
- goto undo_data_init;
- }
- err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Failed to find first attribute extent of "
- "mft data attribute.");
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(mft_ni);
- goto undo_data_init;
- }
- a = ctx->attr;
- read_lock_irqsave(&mft_ni->size_lock, flags);
- a->data.non_resident.initialized_size =
- cpu_to_sle64(mft_ni->initialized_size);
- a->data.non_resident.data_size =
- cpu_to_sle64(i_size_read(vol->mft_ino));
- read_unlock_irqrestore(&mft_ni->size_lock, flags);
- /* Ensure the changes make it to disk. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(mft_ni);
- read_lock_irqsave(&mft_ni->size_lock, flags);
- ntfs_debug("Status of mft data after mft record initialization: "
- "allocated_size 0x%llx, data_size 0x%llx, "
- "initialized_size 0x%llx.",
- (long long)mft_ni->allocated_size,
- (long long)i_size_read(vol->mft_ino),
- (long long)mft_ni->initialized_size);
- BUG_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size);
- BUG_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino));
- read_unlock_irqrestore(&mft_ni->size_lock, flags);
-mft_rec_already_initialized:
- /*
- * We can finally drop the mft bitmap lock as the mft data attribute
- * has been fully updated. The only disparity left is that the
- * allocated mft record still needs to be marked as in use to match the
- * set bit in the mft bitmap but this is actually not a problem since
- * this mft record is not referenced from anywhere yet and the fact
- * that it is allocated in the mft bitmap means that no-one will try to
- * allocate it either.
- */
- up_write(&vol->mftbmp_lock);
- /*
- * We now have allocated and initialized the mft record. Calculate the
- * index of and the offset within the page cache page the record is in.
- */
- index = bit << vol->mft_record_size_bits >> PAGE_SHIFT;
- ofs = (bit << vol->mft_record_size_bits) & ~PAGE_MASK;
- /* Read, map, and pin the page containing the mft record. */
- page = ntfs_map_page(vol->mft_ino->i_mapping, index);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Failed to map page containing allocated "
- "mft record 0x%llx.", (long long)bit);
- err = PTR_ERR(page);
- goto undo_mftbmp_alloc;
- }
- lock_page(page);
- BUG_ON(!PageUptodate(page));
- ClearPageUptodate(page);
- m = (MFT_RECORD*)((u8*)page_address(page) + ofs);
- /* If we just formatted the mft record no need to do it again. */
- if (!record_formatted) {
- /* Sanity check that the mft record is really not in use. */
- if (ntfs_is_file_record(m->magic) &&
- (m->flags & MFT_RECORD_IN_USE)) {
- ntfs_error(vol->sb, "Mft record 0x%llx was marked "
- "free in mft bitmap but is marked "
- "used itself. Corrupt filesystem. "
- "Unmount and run chkdsk.",
- (long long)bit);
- err = -EIO;
- SetPageUptodate(page);
- unlock_page(page);
- ntfs_unmap_page(page);
- NVolSetErrors(vol);
- goto undo_mftbmp_alloc;
- }
- /*
- * We need to (re-)format the mft record, preserving the
- * sequence number if it is not zero as well as the update
- * sequence number if it is not zero or -1 (0xffff). This
- * means we do not need to care whether or not something went
- * wrong with the previous mft record.
- */
- seq_no = m->sequence_number;
- usn = *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs));
- err = ntfs_mft_record_layout(vol, bit, m);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Failed to layout allocated mft "
- "record 0x%llx.", (long long)bit);
- SetPageUptodate(page);
- unlock_page(page);
- ntfs_unmap_page(page);
- goto undo_mftbmp_alloc;
- }
- if (seq_no)
- m->sequence_number = seq_no;
- if (usn && le16_to_cpu(usn) != 0xffff)
- *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn;
- }
- /* Set the mft record itself in use. */
- m->flags |= MFT_RECORD_IN_USE;
- if (S_ISDIR(mode))
- m->flags |= MFT_RECORD_IS_DIRECTORY;
- flush_dcache_page(page);
- SetPageUptodate(page);
- if (base_ni) {
- MFT_RECORD *m_tmp;
-
- /*
- * Setup the base mft record in the extent mft record. This
- * completes initialization of the allocated extent mft record
- * and we can simply use it with map_extent_mft_record().
- */
- m->base_mft_record = MK_LE_MREF(base_ni->mft_no,
- base_ni->seq_no);
- /*
- * Allocate an extent inode structure for the new mft record,
- * attach it to the base inode @base_ni and map, pin, and lock
- * its, i.e. the allocated, mft record.
- */
- m_tmp = map_extent_mft_record(base_ni, bit, &ni);
- if (IS_ERR(m_tmp)) {
- ntfs_error(vol->sb, "Failed to map allocated extent "
- "mft record 0x%llx.", (long long)bit);
- err = PTR_ERR(m_tmp);
- /* Set the mft record itself not in use. */
- m->flags &= cpu_to_le16(
- ~le16_to_cpu(MFT_RECORD_IN_USE));
- flush_dcache_page(page);
- /* Make sure the mft record is written out to disk. */
- mark_ntfs_record_dirty(page, ofs);
- unlock_page(page);
- ntfs_unmap_page(page);
- goto undo_mftbmp_alloc;
- }
- BUG_ON(m != m_tmp);
- /*
- * Make sure the allocated mft record is written out to disk.
- * No need to set the inode dirty because the caller is going
- * to do that anyway after finishing with the new extent mft
- * record (e.g. at a minimum a new attribute will be added to
- * the mft record.
- */
- mark_ntfs_record_dirty(page, ofs);
- unlock_page(page);
- /*
- * Need to unmap the page since map_extent_mft_record() mapped
- * it as well so we have it mapped twice at the moment.
- */
- ntfs_unmap_page(page);
- } else {
- /*
- * Allocate a new VFS inode and set it up. NOTE: @vi->i_nlink
- * is set to 1 but the mft record->link_count is 0. The caller
- * needs to bear this in mind.
- */
- vi = new_inode(vol->sb);
- if (unlikely(!vi)) {
- err = -ENOMEM;
- /* Set the mft record itself not in use. */
- m->flags &= cpu_to_le16(
- ~le16_to_cpu(MFT_RECORD_IN_USE));
- flush_dcache_page(page);
- /* Make sure the mft record is written out to disk. */
- mark_ntfs_record_dirty(page, ofs);
- unlock_page(page);
- ntfs_unmap_page(page);
- goto undo_mftbmp_alloc;
- }
- vi->i_ino = bit;
-
- /* The owner and group come from the ntfs volume. */
- vi->i_uid = vol->uid;
- vi->i_gid = vol->gid;
-
- /* Initialize the ntfs specific part of @vi. */
- ntfs_init_big_inode(vi);
- ni = NTFS_I(vi);
- /*
- * Set the appropriate mode, attribute type, and name. For
- * directories, also setup the index values to the defaults.
- */
- if (S_ISDIR(mode)) {
- vi->i_mode = S_IFDIR | S_IRWXUGO;
- vi->i_mode &= ~vol->dmask;
-
- NInoSetMstProtected(ni);
- ni->type = AT_INDEX_ALLOCATION;
- ni->name = I30;
- ni->name_len = 4;
-
- ni->itype.index.block_size = 4096;
- ni->itype.index.block_size_bits = ntfs_ffs(4096) - 1;
- ni->itype.index.collation_rule = COLLATION_FILE_NAME;
- if (vol->cluster_size <= ni->itype.index.block_size) {
- ni->itype.index.vcn_size = vol->cluster_size;
- ni->itype.index.vcn_size_bits =
- vol->cluster_size_bits;
- } else {
- ni->itype.index.vcn_size = vol->sector_size;
- ni->itype.index.vcn_size_bits =
- vol->sector_size_bits;
- }
- } else {
- vi->i_mode = S_IFREG | S_IRWXUGO;
- vi->i_mode &= ~vol->fmask;
-
- ni->type = AT_DATA;
- ni->name = NULL;
- ni->name_len = 0;
- }
- if (IS_RDONLY(vi))
- vi->i_mode &= ~S_IWUGO;
-
- /* Set the inode times to the current time. */
- simple_inode_init_ts(vi);
- /*
- * Set the file size to 0, the ntfs inode sizes are set to 0 by
- * the call to ntfs_init_big_inode() below.
- */
- vi->i_size = 0;
- vi->i_blocks = 0;
-
- /* Set the sequence number. */
- vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
- /*
- * Manually map, pin, and lock the mft record as we already
- * have its page mapped and it is very easy to do.
- */
- atomic_inc(&ni->count);
- mutex_lock(&ni->mrec_lock);
- ni->page = page;
- ni->page_ofs = ofs;
- /*
- * Make sure the allocated mft record is written out to disk.
- * NOTE: We do not set the ntfs inode dirty because this would
- * fail in ntfs_write_inode() because the inode does not have a
- * standard information attribute yet. Also, there is no need
- * to set the inode dirty because the caller is going to do
- * that anyway after finishing with the new mft record (e.g. at
- * a minimum some new attributes will be added to the mft
- * record.
- */
- mark_ntfs_record_dirty(page, ofs);
- unlock_page(page);
-
- /* Add the inode to the inode hash for the superblock. */
- insert_inode_hash(vi);
-
- /* Update the default mft allocation position. */
- vol->mft_data_pos = bit + 1;
- }
- /*
- * Return the opened, allocated inode of the allocated mft record as
- * well as the mapped, pinned, and locked mft record.
- */
- ntfs_debug("Returning opened, allocated %sinode 0x%llx.",
- base_ni ? "extent " : "", (long long)bit);
- *mrec = m;
- return ni;
-undo_data_init:
- write_lock_irqsave(&mft_ni->size_lock, flags);
- mft_ni->initialized_size = old_data_initialized;
- i_size_write(vol->mft_ino, old_data_size);
- write_unlock_irqrestore(&mft_ni->size_lock, flags);
- goto undo_mftbmp_alloc_nolock;
-undo_mftbmp_alloc:
- down_write(&vol->mftbmp_lock);
-undo_mftbmp_alloc_nolock:
- if (ntfs_bitmap_clear_bit(vol->mftbmp_ino, bit)) {
- ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es);
- NVolSetErrors(vol);
- }
- up_write(&vol->mftbmp_lock);
-err_out:
- return ERR_PTR(err);
-max_err_out:
- ntfs_warning(vol->sb, "Cannot allocate mft record because the maximum "
- "number of inodes (2^32) has already been reached.");
- up_write(&vol->mftbmp_lock);
- return ERR_PTR(-ENOSPC);
-}
-
-/**
- * ntfs_extent_mft_record_free - free an extent mft record on an ntfs volume
- * @ni: ntfs inode of the mapped extent mft record to free
- * @m: mapped extent mft record of the ntfs inode @ni
- *
- * Free the mapped extent mft record @m of the extent ntfs inode @ni.
- *
- * Note that this function unmaps the mft record and closes and destroys @ni
- * internally and hence you cannot use either @ni nor @m any more after this
- * function returns success.
- *
- * On success return 0 and on error return -errno. @ni and @m are still valid
- * in this case and have not been freed.
- *
- * For some errors an error message is displayed and the success code 0 is
- * returned and the volume is then left dirty on umount. This makes sense in
- * case we could not rollback the changes that were already done since the
- * caller no longer wants to reference this mft record so it does not matter to
- * the caller if something is wrong with it as long as it is properly detached
- * from the base inode.
- */
-int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
-{
- unsigned long mft_no = ni->mft_no;
- ntfs_volume *vol = ni->vol;
- ntfs_inode *base_ni;
- ntfs_inode **extent_nis;
- int i, err;
- le16 old_seq_no;
- u16 seq_no;
-
- BUG_ON(NInoAttr(ni));
- BUG_ON(ni->nr_extents != -1);
-
- mutex_lock(&ni->extent_lock);
- base_ni = ni->ext.base_ntfs_ino;
- mutex_unlock(&ni->extent_lock);
-
- BUG_ON(base_ni->nr_extents <= 0);
-
- ntfs_debug("Entering for extent inode 0x%lx, base inode 0x%lx.\n",
- mft_no, base_ni->mft_no);
-
- mutex_lock(&base_ni->extent_lock);
-
- /* Make sure we are holding the only reference to the extent inode. */
- if (atomic_read(&ni->count) > 2) {
- ntfs_error(vol->sb, "Tried to free busy extent inode 0x%lx, "
- "not freeing.", base_ni->mft_no);
- mutex_unlock(&base_ni->extent_lock);
- return -EBUSY;
- }
-
- /* Dissociate the ntfs inode from the base inode. */
- extent_nis = base_ni->ext.extent_ntfs_inos;
- err = -ENOENT;
- for (i = 0; i < base_ni->nr_extents; i++) {
- if (ni != extent_nis[i])
- continue;
- extent_nis += i;
- base_ni->nr_extents--;
- memmove(extent_nis, extent_nis + 1, (base_ni->nr_extents - i) *
- sizeof(ntfs_inode*));
- err = 0;
- break;
- }
-
- mutex_unlock(&base_ni->extent_lock);
-
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Extent inode 0x%lx is not attached to "
- "its base inode 0x%lx.", mft_no,
- base_ni->mft_no);
- BUG();
- }
-
- /*
- * The extent inode is no longer attached to the base inode so no one
- * can get a reference to it any more.
- */
-
- /* Mark the mft record as not in use. */
- m->flags &= ~MFT_RECORD_IN_USE;
-
- /* Increment the sequence number, skipping zero, if it is not zero. */
- old_seq_no = m->sequence_number;
- seq_no = le16_to_cpu(old_seq_no);
- if (seq_no == 0xffff)
- seq_no = 1;
- else if (seq_no)
- seq_no++;
- m->sequence_number = cpu_to_le16(seq_no);
-
- /*
- * Set the ntfs inode dirty and write it out. We do not need to worry
- * about the base inode here since whatever caused the extent mft
- * record to be freed is guaranteed to do it already.
- */
- NInoSetDirty(ni);
- err = write_mft_record(ni, m, 0);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Failed to write mft record 0x%lx, not "
- "freeing.", mft_no);
- goto rollback;
- }
-rollback_error:
- /* Unmap and throw away the now freed extent inode. */
- unmap_extent_mft_record(ni);
- ntfs_clear_extent_inode(ni);
-
- /* Clear the bit in the $MFT/$BITMAP corresponding to this record. */
- down_write(&vol->mftbmp_lock);
- err = ntfs_bitmap_clear_bit(vol->mftbmp_ino, mft_no);
- up_write(&vol->mftbmp_lock);
- if (unlikely(err)) {
- /*
- * The extent inode is gone but we failed to deallocate it in
- * the mft bitmap. Just emit a warning and leave the volume
- * dirty on umount.
- */
- ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es);
- NVolSetErrors(vol);
- }
- return 0;
-rollback:
- /* Rollback what we did... */
- mutex_lock(&base_ni->extent_lock);
- extent_nis = base_ni->ext.extent_ntfs_inos;
- if (!(base_ni->nr_extents & 3)) {
- int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*);
-
- extent_nis = kmalloc(new_size, GFP_NOFS);
- if (unlikely(!extent_nis)) {
- ntfs_error(vol->sb, "Failed to allocate internal "
- "buffer during rollback.%s", es);
- mutex_unlock(&base_ni->extent_lock);
- NVolSetErrors(vol);
- goto rollback_error;
- }
- if (base_ni->nr_extents) {
- BUG_ON(!base_ni->ext.extent_ntfs_inos);
- memcpy(extent_nis, base_ni->ext.extent_ntfs_inos,
- new_size - 4 * sizeof(ntfs_inode*));
- kfree(base_ni->ext.extent_ntfs_inos);
- }
- base_ni->ext.extent_ntfs_inos = extent_nis;
- }
- m->flags |= MFT_RECORD_IN_USE;
- m->sequence_number = old_seq_no;
- extent_nis[base_ni->nr_extents++] = ni;
- mutex_unlock(&base_ni->extent_lock);
- mark_mft_record_dirty(ni);
- return err;
-}
-#endif /* NTFS_RW */
diff --git a/fs/ntfs/mft.h b/fs/ntfs/mft.h
deleted file mode 100644
index 49c001af16ed..000000000000
--- a/fs/ntfs/mft.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * mft.h - Defines for mft record handling in NTFS Linux kernel driver.
- * Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2004 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_MFT_H
-#define _LINUX_NTFS_MFT_H
-
-#include <linux/fs.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-
-#include "inode.h"
-
-extern MFT_RECORD *map_mft_record(ntfs_inode *ni);
-extern void unmap_mft_record(ntfs_inode *ni);
-
-extern MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
- ntfs_inode **ntfs_ino);
-
-static inline void unmap_extent_mft_record(ntfs_inode *ni)
-{
- unmap_mft_record(ni);
- return;
-}
-
-#ifdef NTFS_RW
-
-/**
- * flush_dcache_mft_record_page - flush_dcache_page() for mft records
- * @ni: ntfs inode structure of mft record
- *
- * Call flush_dcache_page() for the page in which an mft record resides.
- *
- * This must be called every time an mft record is modified, just after the
- * modification.
- */
-static inline void flush_dcache_mft_record_page(ntfs_inode *ni)
-{
- flush_dcache_page(ni->page);
-}
-
-extern void __mark_mft_record_dirty(ntfs_inode *ni);
-
-/**
- * mark_mft_record_dirty - set the mft record and the page containing it dirty
- * @ni: ntfs inode describing the mapped mft record
- *
- * Set the mapped (extent) mft record of the (base or extent) ntfs inode @ni,
- * as well as the page containing the mft record, dirty. Also, mark the base
- * vfs inode dirty. This ensures that any changes to the mft record are
- * written out to disk.
- *
- * NOTE: Do not do anything if the mft record is already marked dirty.
- */
-static inline void mark_mft_record_dirty(ntfs_inode *ni)
-{
- if (!NInoTestSetDirty(ni))
- __mark_mft_record_dirty(ni);
-}
-
-extern int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
- MFT_RECORD *m, int sync);
-
-extern int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync);
-
-/**
- * write_mft_record - write out a mapped (extent) mft record
- * @ni: ntfs inode describing the mapped (extent) mft record
- * @m: mapped (extent) mft record to write
- * @sync: if true, wait for i/o completion
- *
- * This is just a wrapper for write_mft_record_nolock() (see mft.c), which
- * locks the page for the duration of the write. This ensures that there are
- * no race conditions between writing the mft record via the dirty inode code
- * paths and via the page cache write back code paths or between writing
- * neighbouring mft records residing in the same page.
- *
- * Locking the page also serializes us against ->read_folio() if the page is not
- * uptodate.
- *
- * On success, clean the mft record and return 0. On error, leave the mft
- * record dirty and return -errno.
- */
-static inline int write_mft_record(ntfs_inode *ni, MFT_RECORD *m, int sync)
-{
- struct page *page = ni->page;
- int err;
-
- BUG_ON(!page);
- lock_page(page);
- err = write_mft_record_nolock(ni, m, sync);
- unlock_page(page);
- return err;
-}
-
-extern bool ntfs_may_write_mft_record(ntfs_volume *vol,
- const unsigned long mft_no, const MFT_RECORD *m,
- ntfs_inode **locked_ni);
-
-extern ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
- ntfs_inode *base_ni, MFT_RECORD **mrec);
-extern int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m);
-
-#endif /* NTFS_RW */
-
-#endif /* _LINUX_NTFS_MFT_H */
diff --git a/fs/ntfs/mst.c b/fs/ntfs/mst.c
deleted file mode 100644
index 16b3c884abfc..000000000000
--- a/fs/ntfs/mst.c
+++ /dev/null
@@ -1,189 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * mst.c - NTFS multi sector transfer protection handling code. Part of the
- * Linux-NTFS project.
- *
- * Copyright (c) 2001-2004 Anton Altaparmakov
- */
-
-#include "ntfs.h"
-
-/**
- * post_read_mst_fixup - deprotect multi sector transfer protected data
- * @b: pointer to the data to deprotect
- * @size: size in bytes of @b
- *
- * Perform the necessary post read multi sector transfer fixup and detect the
- * presence of incomplete multi sector transfers. - In that case, overwrite the
- * magic of the ntfs record header being processed with "BAAD" (in memory only!)
- * and abort processing.
- *
- * Return 0 on success and -EINVAL on error ("BAAD" magic will be present).
- *
- * NOTE: We consider the absence / invalidity of an update sequence array to
- * mean that the structure is not protected at all and hence doesn't need to
- * be fixed up. Thus, we return success and not failure in this case. This is
- * in contrast to pre_write_mst_fixup(), see below.
- */
-int post_read_mst_fixup(NTFS_RECORD *b, const u32 size)
-{
- u16 usa_ofs, usa_count, usn;
- u16 *usa_pos, *data_pos;
-
- /* Setup the variables. */
- usa_ofs = le16_to_cpu(b->usa_ofs);
- /* Decrement usa_count to get number of fixups. */
- usa_count = le16_to_cpu(b->usa_count) - 1;
- /* Size and alignment checks. */
- if ( size & (NTFS_BLOCK_SIZE - 1) ||
- usa_ofs & 1 ||
- usa_ofs + (usa_count * 2) > size ||
- (size >> NTFS_BLOCK_SIZE_BITS) != usa_count)
- return 0;
- /* Position of usn in update sequence array. */
- usa_pos = (u16*)b + usa_ofs/sizeof(u16);
- /*
- * The update sequence number which has to be equal to each of the
- * u16 values before they are fixed up. Note no need to care for
- * endianness since we are comparing and moving data for on disk
- * structures which means the data is consistent. - If it is
- * consistenty the wrong endianness it doesn't make any difference.
- */
- usn = *usa_pos;
- /*
- * Position in protected data of first u16 that needs fixing up.
- */
- data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1;
- /*
- * Check for incomplete multi sector transfer(s).
- */
- while (usa_count--) {
- if (*data_pos != usn) {
- /*
- * Incomplete multi sector transfer detected! )-:
- * Set the magic to "BAAD" and return failure.
- * Note that magic_BAAD is already converted to le32.
- */
- b->magic = magic_BAAD;
- return -EINVAL;
- }
- data_pos += NTFS_BLOCK_SIZE/sizeof(u16);
- }
- /* Re-setup the variables. */
- usa_count = le16_to_cpu(b->usa_count) - 1;
- data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1;
- /* Fixup all sectors. */
- while (usa_count--) {
- /*
- * Increment position in usa and restore original data from
- * the usa into the data buffer.
- */
- *data_pos = *(++usa_pos);
- /* Increment position in data as well. */
- data_pos += NTFS_BLOCK_SIZE/sizeof(u16);
- }
- return 0;
-}
-
-/**
- * pre_write_mst_fixup - apply multi sector transfer protection
- * @b: pointer to the data to protect
- * @size: size in bytes of @b
- *
- * Perform the necessary pre write multi sector transfer fixup on the data
- * pointer to by @b of @size.
- *
- * Return 0 if fixup applied (success) or -EINVAL if no fixup was performed
- * (assumed not needed). This is in contrast to post_read_mst_fixup() above.
- *
- * NOTE: We consider the absence / invalidity of an update sequence array to
- * mean that the structure is not subject to protection and hence doesn't need
- * to be fixed up. This means that you have to create a valid update sequence
- * array header in the ntfs record before calling this function, otherwise it
- * will fail (the header needs to contain the position of the update sequence
- * array together with the number of elements in the array). You also need to
- * initialise the update sequence number before calling this function
- * otherwise a random word will be used (whatever was in the record at that
- * position at that time).
- */
-int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size)
-{
- le16 *usa_pos, *data_pos;
- u16 usa_ofs, usa_count, usn;
- le16 le_usn;
-
- /* Sanity check + only fixup if it makes sense. */
- if (!b || ntfs_is_baad_record(b->magic) ||
- ntfs_is_hole_record(b->magic))
- return -EINVAL;
- /* Setup the variables. */
- usa_ofs = le16_to_cpu(b->usa_ofs);
- /* Decrement usa_count to get number of fixups. */
- usa_count = le16_to_cpu(b->usa_count) - 1;
- /* Size and alignment checks. */
- if ( size & (NTFS_BLOCK_SIZE - 1) ||
- usa_ofs & 1 ||
- usa_ofs + (usa_count * 2) > size ||
- (size >> NTFS_BLOCK_SIZE_BITS) != usa_count)
- return -EINVAL;
- /* Position of usn in update sequence array. */
- usa_pos = (le16*)((u8*)b + usa_ofs);
- /*
- * Cyclically increment the update sequence number
- * (skipping 0 and -1, i.e. 0xffff).
- */
- usn = le16_to_cpup(usa_pos) + 1;
- if (usn == 0xffff || !usn)
- usn = 1;
- le_usn = cpu_to_le16(usn);
- *usa_pos = le_usn;
- /* Position in data of first u16 that needs fixing up. */
- data_pos = (le16*)b + NTFS_BLOCK_SIZE/sizeof(le16) - 1;
- /* Fixup all sectors. */
- while (usa_count--) {
- /*
- * Increment the position in the usa and save the
- * original data from the data buffer into the usa.
- */
- *(++usa_pos) = *data_pos;
- /* Apply fixup to data. */
- *data_pos = le_usn;
- /* Increment position in data as well. */
- data_pos += NTFS_BLOCK_SIZE/sizeof(le16);
- }
- return 0;
-}
-
-/**
- * post_write_mst_fixup - fast deprotect multi sector transfer protected data
- * @b: pointer to the data to deprotect
- *
- * Perform the necessary post write multi sector transfer fixup, not checking
- * for any errors, because we assume we have just used pre_write_mst_fixup(),
- * thus the data will be fine or we would never have gotten here.
- */
-void post_write_mst_fixup(NTFS_RECORD *b)
-{
- le16 *usa_pos, *data_pos;
-
- u16 usa_ofs = le16_to_cpu(b->usa_ofs);
- u16 usa_count = le16_to_cpu(b->usa_count) - 1;
-
- /* Position of usn in update sequence array. */
- usa_pos = (le16*)b + usa_ofs/sizeof(le16);
-
- /* Position in protected data of first u16 that needs fixing up. */
- data_pos = (le16*)b + NTFS_BLOCK_SIZE/sizeof(le16) - 1;
-
- /* Fixup all sectors. */
- while (usa_count--) {
- /*
- * Increment position in usa and restore original data from
- * the usa into the data buffer.
- */
- *data_pos = *(++usa_pos);
-
- /* Increment position in data as well. */
- data_pos += NTFS_BLOCK_SIZE/sizeof(le16);
- }
-}
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
deleted file mode 100644
index d7498ddc4a72..000000000000
--- a/fs/ntfs/namei.c
+++ /dev/null
@@ -1,392 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * namei.c - NTFS kernel directory inode operations. Part of the Linux-NTFS
- * project.
- *
- * Copyright (c) 2001-2006 Anton Altaparmakov
- */
-
-#include <linux/dcache.h>
-#include <linux/exportfs.h>
-#include <linux/security.h>
-#include <linux/slab.h>
-
-#include "attrib.h"
-#include "debug.h"
-#include "dir.h"
-#include "mft.h"
-#include "ntfs.h"
-
-/**
- * ntfs_lookup - find the inode represented by a dentry in a directory inode
- * @dir_ino: directory inode in which to look for the inode
- * @dent: dentry representing the inode to look for
- * @flags: lookup flags
- *
- * In short, ntfs_lookup() looks for the inode represented by the dentry @dent
- * in the directory inode @dir_ino and if found attaches the inode to the
- * dentry @dent.
- *
- * In more detail, the dentry @dent specifies which inode to look for by
- * supplying the name of the inode in @dent->d_name.name. ntfs_lookup()
- * converts the name to Unicode and walks the contents of the directory inode
- * @dir_ino looking for the converted Unicode name. If the name is found in the
- * directory, the corresponding inode is loaded by calling ntfs_iget() on its
- * inode number and the inode is associated with the dentry @dent via a call to
- * d_splice_alias().
- *
- * If the name is not found in the directory, a NULL inode is inserted into the
- * dentry @dent via a call to d_add(). The dentry is then termed a negative
- * dentry.
- *
- * Only if an actual error occurs, do we return an error via ERR_PTR().
- *
- * In order to handle the case insensitivity issues of NTFS with regards to the
- * dcache and the dcache requiring only one dentry per directory, we deal with
- * dentry aliases that only differ in case in ->ntfs_lookup() while maintaining
- * a case sensitive dcache. This means that we get the full benefit of dcache
- * speed when the file/directory is looked up with the same case as returned by
- * ->ntfs_readdir() but that a lookup for any other case (or for the short file
- * name) will not find anything in dcache and will enter ->ntfs_lookup()
- * instead, where we search the directory for a fully matching file name
- * (including case) and if that is not found, we search for a file name that
- * matches with different case and if that has non-POSIX semantics we return
- * that. We actually do only one search (case sensitive) and keep tabs on
- * whether we have found a case insensitive match in the process.
- *
- * To simplify matters for us, we do not treat the short vs long filenames as
- * two hard links but instead if the lookup matches a short filename, we
- * return the dentry for the corresponding long filename instead.
- *
- * There are three cases we need to distinguish here:
- *
- * 1) @dent perfectly matches (i.e. including case) a directory entry with a
- * file name in the WIN32 or POSIX namespaces. In this case
- * ntfs_lookup_inode_by_name() will return with name set to NULL and we
- * just d_splice_alias() @dent.
- * 2) @dent matches (not including case) a directory entry with a file name in
- * the WIN32 namespace. In this case ntfs_lookup_inode_by_name() will return
- * with name set to point to a kmalloc()ed ntfs_name structure containing
- * the properly cased little endian Unicode name. We convert the name to the
- * current NLS code page, search if a dentry with this name already exists
- * and if so return that instead of @dent. At this point things are
- * complicated by the possibility of 'disconnected' dentries due to NFS
- * which we deal with appropriately (see the code comments). The VFS will
- * then destroy the old @dent and use the one we returned. If a dentry is
- * not found, we allocate a new one, d_splice_alias() it, and return it as
- * above.
- * 3) @dent matches either perfectly or not (i.e. we don't care about case) a
- * directory entry with a file name in the DOS namespace. In this case
- * ntfs_lookup_inode_by_name() will return with name set to point to a
- * kmalloc()ed ntfs_name structure containing the mft reference (cpu endian)
- * of the inode. We use the mft reference to read the inode and to find the
- * file name in the WIN32 namespace corresponding to the matched short file
- * name. We then convert the name to the current NLS code page, and proceed
- * searching for a dentry with this name, etc, as in case 2), above.
- *
- * Locking: Caller must hold i_mutex on the directory.
- */
-static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
- unsigned int flags)
-{
- ntfs_volume *vol = NTFS_SB(dir_ino->i_sb);
- struct inode *dent_inode;
- ntfschar *uname;
- ntfs_name *name = NULL;
- MFT_REF mref;
- unsigned long dent_ino;
- int uname_len;
-
- ntfs_debug("Looking up %pd in directory inode 0x%lx.",
- dent, dir_ino->i_ino);
- /* Convert the name of the dentry to Unicode. */
- uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len,
- &uname);
- if (uname_len < 0) {
- if (uname_len != -ENAMETOOLONG)
- ntfs_error(vol->sb, "Failed to convert name to "
- "Unicode.");
- return ERR_PTR(uname_len);
- }
- mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len,
- &name);
- kmem_cache_free(ntfs_name_cache, uname);
- if (!IS_ERR_MREF(mref)) {
- dent_ino = MREF(mref);
- ntfs_debug("Found inode 0x%lx. Calling ntfs_iget.", dent_ino);
- dent_inode = ntfs_iget(vol->sb, dent_ino);
- if (!IS_ERR(dent_inode)) {
- /* Consistency check. */
- if (is_bad_inode(dent_inode) || MSEQNO(mref) ==
- NTFS_I(dent_inode)->seq_no ||
- dent_ino == FILE_MFT) {
- /* Perfect WIN32/POSIX match. -- Case 1. */
- if (!name) {
- ntfs_debug("Done. (Case 1.)");
- return d_splice_alias(dent_inode, dent);
- }
- /*
- * We are too indented. Handle imperfect
- * matches and short file names further below.
- */
- goto handle_name;
- }
- ntfs_error(vol->sb, "Found stale reference to inode "
- "0x%lx (reference sequence number = "
- "0x%x, inode sequence number = 0x%x), "
- "returning -EIO. Run chkdsk.",
- dent_ino, MSEQNO(mref),
- NTFS_I(dent_inode)->seq_no);
- iput(dent_inode);
- dent_inode = ERR_PTR(-EIO);
- } else
- ntfs_error(vol->sb, "ntfs_iget(0x%lx) failed with "
- "error code %li.", dent_ino,
- PTR_ERR(dent_inode));
- kfree(name);
- /* Return the error code. */
- return ERR_CAST(dent_inode);
- }
- /* It is guaranteed that @name is no longer allocated at this point. */
- if (MREF_ERR(mref) == -ENOENT) {
- ntfs_debug("Entry was not found, adding negative dentry.");
- /* The dcache will handle negative entries. */
- d_add(dent, NULL);
- ntfs_debug("Done.");
- return NULL;
- }
- ntfs_error(vol->sb, "ntfs_lookup_ino_by_name() failed with error "
- "code %i.", -MREF_ERR(mref));
- return ERR_PTR(MREF_ERR(mref));
- // TODO: Consider moving this lot to a separate function! (AIA)
-handle_name:
- {
- MFT_RECORD *m;
- ntfs_attr_search_ctx *ctx;
- ntfs_inode *ni = NTFS_I(dent_inode);
- int err;
- struct qstr nls_name;
-
- nls_name.name = NULL;
- if (name->type != FILE_NAME_DOS) { /* Case 2. */
- ntfs_debug("Case 2.");
- nls_name.len = (unsigned)ntfs_ucstonls(vol,
- (ntfschar*)&name->name, name->len,
- (unsigned char**)&nls_name.name, 0);
- kfree(name);
- } else /* if (name->type == FILE_NAME_DOS) */ { /* Case 3. */
- FILE_NAME_ATTR *fn;
-
- ntfs_debug("Case 3.");
- kfree(name);
-
- /* Find the WIN32 name corresponding to the matched DOS name. */
- ni = NTFS_I(dent_inode);
- m = map_mft_record(ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- ctx = NULL;
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- do {
- ATTR_RECORD *a;
- u32 val_len;
-
- err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0,
- NULL, 0, ctx);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Inode corrupt: No WIN32 "
- "namespace counterpart to DOS "
- "file name. Run chkdsk.");
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- /* Consistency checks. */
- a = ctx->attr;
- if (a->non_resident || a->flags)
- goto eio_err_out;
- val_len = le32_to_cpu(a->data.resident.value_length);
- if (le16_to_cpu(a->data.resident.value_offset) +
- val_len > le32_to_cpu(a->length))
- goto eio_err_out;
- fn = (FILE_NAME_ATTR*)((u8*)ctx->attr + le16_to_cpu(
- ctx->attr->data.resident.value_offset));
- if ((u32)(fn->file_name_length * sizeof(ntfschar) +
- sizeof(FILE_NAME_ATTR)) > val_len)
- goto eio_err_out;
- } while (fn->file_name_type != FILE_NAME_WIN32);
-
- /* Convert the found WIN32 name to current NLS code page. */
- nls_name.len = (unsigned)ntfs_ucstonls(vol,
- (ntfschar*)&fn->file_name, fn->file_name_length,
- (unsigned char**)&nls_name.name, 0);
-
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ni);
- }
- m = NULL;
- ctx = NULL;
-
- /* Check if a conversion error occurred. */
- if ((signed)nls_name.len < 0) {
- err = (signed)nls_name.len;
- goto err_out;
- }
- nls_name.hash = full_name_hash(dent, nls_name.name, nls_name.len);
-
- dent = d_add_ci(dent, dent_inode, &nls_name);
- kfree(nls_name.name);
- return dent;
-
-eio_err_out:
- ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk.");
- err = -EIO;
-err_out:
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(ni);
- iput(dent_inode);
- ntfs_error(vol->sb, "Failed, returning error code %i.", err);
- return ERR_PTR(err);
- }
-}
-
-/*
- * Inode operations for directories.
- */
-const struct inode_operations ntfs_dir_inode_ops = {
- .lookup = ntfs_lookup, /* VFS: Lookup directory. */
-};
-
-/**
- * ntfs_get_parent - find the dentry of the parent of a given directory dentry
- * @child_dent: dentry of the directory whose parent directory to find
- *
- * Find the dentry for the parent directory of the directory specified by the
- * dentry @child_dent. This function is called from
- * fs/exportfs/expfs.c::find_exported_dentry() which in turn is called from the
- * default ->decode_fh() which is export_decode_fh() in the same file.
- *
- * The code is based on the ext3 ->get_parent() implementation found in
- * fs/ext3/namei.c::ext3_get_parent().
- *
- * Note: ntfs_get_parent() is called with @d_inode(child_dent)->i_mutex down.
- *
- * Return the dentry of the parent directory on success or the error code on
- * error (IS_ERR() is true).
- */
-static struct dentry *ntfs_get_parent(struct dentry *child_dent)
-{
- struct inode *vi = d_inode(child_dent);
- ntfs_inode *ni = NTFS_I(vi);
- MFT_RECORD *mrec;
- ntfs_attr_search_ctx *ctx;
- ATTR_RECORD *attr;
- FILE_NAME_ATTR *fn;
- unsigned long parent_ino;
- int err;
-
- ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
- /* Get the mft record of the inode belonging to the child dentry. */
- mrec = map_mft_record(ni);
- if (IS_ERR(mrec))
- return ERR_CAST(mrec);
- /* Find the first file name attribute in the mft record. */
- ctx = ntfs_attr_get_search_ctx(ni, mrec);
- if (unlikely(!ctx)) {
- unmap_mft_record(ni);
- return ERR_PTR(-ENOMEM);
- }
-try_next:
- err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, CASE_SENSITIVE, 0, NULL,
- 0, ctx);
- if (unlikely(err)) {
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ni);
- if (err == -ENOENT)
- ntfs_error(vi->i_sb, "Inode 0x%lx does not have a "
- "file name attribute. Run chkdsk.",
- vi->i_ino);
- return ERR_PTR(err);
- }
- attr = ctx->attr;
- if (unlikely(attr->non_resident))
- goto try_next;
- fn = (FILE_NAME_ATTR *)((u8 *)attr +
- le16_to_cpu(attr->data.resident.value_offset));
- if (unlikely((u8 *)fn + le32_to_cpu(attr->data.resident.value_length) >
- (u8*)attr + le32_to_cpu(attr->length)))
- goto try_next;
- /* Get the inode number of the parent directory. */
- parent_ino = MREF_LE(fn->parent_directory);
- /* Release the search context and the mft record of the child. */
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ni);
-
- return d_obtain_alias(ntfs_iget(vi->i_sb, parent_ino));
-}
-
-static struct inode *ntfs_nfs_get_inode(struct super_block *sb,
- u64 ino, u32 generation)
-{
- struct inode *inode;
-
- inode = ntfs_iget(sb, ino);
- if (!IS_ERR(inode)) {
- if (is_bad_inode(inode) || inode->i_generation != generation) {
- iput(inode);
- inode = ERR_PTR(-ESTALE);
- }
- }
-
- return inode;
-}
-
-static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
- ntfs_nfs_get_inode);
-}
-
-static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- return generic_fh_to_parent(sb, fid, fh_len, fh_type,
- ntfs_nfs_get_inode);
-}
-
-/*
- * Export operations allowing NFS exporting of mounted NTFS partitions.
- *
- * We use the default ->encode_fh() for now. Note that they
- * use 32 bits to store the inode number which is an unsigned long so on 64-bit
- * architectures is usually 64 bits so it would all fail horribly on huge
- * volumes. I guess we need to define our own encode and decode fh functions
- * that store 64-bit inode numbers at some point but for now we will ignore the
- * problem...
- *
- * We also use the default ->get_name() helper (used by ->decode_fh() via
- * fs/exportfs/expfs.c::find_exported_dentry()) as that is completely fs
- * independent.
- *
- * The default ->get_parent() just returns -EACCES so we have to provide our
- * own and the default ->get_dentry() is incompatible with NTFS due to not
- * allowing the inode number 0 which is used in NTFS for the system file $MFT
- * and due to using iget() whereas NTFS needs ntfs_iget().
- */
-const struct export_operations ntfs_export_ops = {
- .encode_fh = generic_encode_ino32_fh,
- .get_parent = ntfs_get_parent, /* Find the parent of a given
- directory. */
- .fh_to_dentry = ntfs_fh_to_dentry,
- .fh_to_parent = ntfs_fh_to_parent,
-};
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
deleted file mode 100644
index e81376ea9152..000000000000
--- a/fs/ntfs/ntfs.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * ntfs.h - Defines for NTFS Linux kernel driver.
- *
- * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
- * Copyright (C) 2002 Richard Russon
- */
-
-#ifndef _LINUX_NTFS_H
-#define _LINUX_NTFS_H
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/compiler.h>
-#include <linux/fs.h>
-#include <linux/nls.h>
-#include <linux/smp.h>
-#include <linux/pagemap.h>
-
-#include "types.h"
-#include "volume.h"
-#include "layout.h"
-
-typedef enum {
- NTFS_BLOCK_SIZE = 512,
- NTFS_BLOCK_SIZE_BITS = 9,
- NTFS_SB_MAGIC = 0x5346544e, /* 'NTFS' */
- NTFS_MAX_NAME_LEN = 255,
- NTFS_MAX_ATTR_NAME_LEN = 255,
- NTFS_MAX_CLUSTER_SIZE = 64 * 1024, /* 64kiB */
- NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_SIZE,
-} NTFS_CONSTANTS;
-
-/* Global variables. */
-
-/* Slab caches (from super.c). */
-extern struct kmem_cache *ntfs_name_cache;
-extern struct kmem_cache *ntfs_inode_cache;
-extern struct kmem_cache *ntfs_big_inode_cache;
-extern struct kmem_cache *ntfs_attr_ctx_cache;
-extern struct kmem_cache *ntfs_index_ctx_cache;
-
-/* The various operations structs defined throughout the driver files. */
-extern const struct address_space_operations ntfs_normal_aops;
-extern const struct address_space_operations ntfs_compressed_aops;
-extern const struct address_space_operations ntfs_mst_aops;
-
-extern const struct file_operations ntfs_file_ops;
-extern const struct inode_operations ntfs_file_inode_ops;
-
-extern const struct file_operations ntfs_dir_ops;
-extern const struct inode_operations ntfs_dir_inode_ops;
-
-extern const struct file_operations ntfs_empty_file_ops;
-extern const struct inode_operations ntfs_empty_inode_ops;
-
-extern const struct export_operations ntfs_export_ops;
-
-/**
- * NTFS_SB - return the ntfs volume given a vfs super block
- * @sb: VFS super block
- *
- * NTFS_SB() returns the ntfs volume associated with the VFS super block @sb.
- */
-static inline ntfs_volume *NTFS_SB(struct super_block *sb)
-{
- return sb->s_fs_info;
-}
-
-/* Declarations of functions and global variables. */
-
-/* From fs/ntfs/compress.c */
-extern int ntfs_read_compressed_block(struct page *page);
-extern int allocate_compression_buffers(void);
-extern void free_compression_buffers(void);
-
-/* From fs/ntfs/super.c */
-#define default_upcase_len 0x10000
-extern struct mutex ntfs_lock;
-
-typedef struct {
- int val;
- char *str;
-} option_t;
-extern const option_t on_errors_arr[];
-
-/* From fs/ntfs/mst.c */
-extern int post_read_mst_fixup(NTFS_RECORD *b, const u32 size);
-extern int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size);
-extern void post_write_mst_fixup(NTFS_RECORD *b);
-
-/* From fs/ntfs/unistr.c */
-extern bool ntfs_are_names_equal(const ntfschar *s1, size_t s1_len,
- const ntfschar *s2, size_t s2_len,
- const IGNORE_CASE_BOOL ic,
- const ntfschar *upcase, const u32 upcase_size);
-extern int ntfs_collate_names(const ntfschar *name1, const u32 name1_len,
- const ntfschar *name2, const u32 name2_len,
- const int err_val, const IGNORE_CASE_BOOL ic,
- const ntfschar *upcase, const u32 upcase_len);
-extern int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n);
-extern int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
- const ntfschar *upcase, const u32 upcase_size);
-extern void ntfs_upcase_name(ntfschar *name, u32 name_len,
- const ntfschar *upcase, const u32 upcase_len);
-extern void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr,
- const ntfschar *upcase, const u32 upcase_len);
-extern int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1,
- FILE_NAME_ATTR *file_name_attr2,
- const int err_val, const IGNORE_CASE_BOOL ic,
- const ntfschar *upcase, const u32 upcase_len);
-extern int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
- const int ins_len, ntfschar **outs);
-extern int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
- const int ins_len, unsigned char **outs, int outs_len);
-
-/* From fs/ntfs/upcase.c */
-extern ntfschar *generate_default_upcase(void);
-
-static inline int ntfs_ffs(int x)
-{
- int r = 1;
-
- if (!x)
- return 0;
- if (!(x & 0xffff)) {
- x >>= 16;
- r += 16;
- }
- if (!(x & 0xff)) {
- x >>= 8;
- r += 8;
- }
- if (!(x & 0xf)) {
- x >>= 4;
- r += 4;
- }
- if (!(x & 3)) {
- x >>= 2;
- r += 2;
- }
- if (!(x & 1)) {
- x >>= 1;
- r += 1;
- }
- return r;
-}
-
-#endif /* _LINUX_NTFS_H */
diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c
deleted file mode 100644
index 9160480222fd..000000000000
--- a/fs/ntfs/quota.c
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * quota.c - NTFS kernel quota ($Quota) handling. Part of the Linux-NTFS
- * project.
- *
- * Copyright (c) 2004 Anton Altaparmakov
- */
-
-#ifdef NTFS_RW
-
-#include "index.h"
-#include "quota.h"
-#include "debug.h"
-#include "ntfs.h"
-
-/**
- * ntfs_mark_quotas_out_of_date - mark the quotas out of date on an ntfs volume
- * @vol: ntfs volume on which to mark the quotas out of date
- *
- * Mark the quotas out of date on the ntfs volume @vol and return 'true' on
- * success and 'false' on error.
- */
-bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
-{
- ntfs_index_context *ictx;
- QUOTA_CONTROL_ENTRY *qce;
- const le32 qid = QUOTA_DEFAULTS_ID;
- int err;
-
- ntfs_debug("Entering.");
- if (NVolQuotaOutOfDate(vol))
- goto done;
- if (!vol->quota_ino || !vol->quota_q_ino) {
- ntfs_error(vol->sb, "Quota inodes are not open.");
- return false;
- }
- inode_lock(vol->quota_q_ino);
- ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
- if (!ictx) {
- ntfs_error(vol->sb, "Failed to get index context.");
- goto err_out;
- }
- err = ntfs_index_lookup(&qid, sizeof(qid), ictx);
- if (err) {
- if (err == -ENOENT)
- ntfs_error(vol->sb, "Quota defaults entry is not "
- "present.");
- else
- ntfs_error(vol->sb, "Lookup of quota defaults entry "
- "failed.");
- goto err_out;
- }
- if (ictx->data_len < offsetof(QUOTA_CONTROL_ENTRY, sid)) {
- ntfs_error(vol->sb, "Quota defaults entry size is invalid. "
- "Run chkdsk.");
- goto err_out;
- }
- qce = (QUOTA_CONTROL_ENTRY*)ictx->data;
- if (le32_to_cpu(qce->version) != QUOTA_VERSION) {
- ntfs_error(vol->sb, "Quota defaults entry version 0x%x is not "
- "supported.", le32_to_cpu(qce->version));
- goto err_out;
- }
- ntfs_debug("Quota defaults flags = 0x%x.", le32_to_cpu(qce->flags));
- /* If quotas are already marked out of date, no need to do anything. */
- if (qce->flags & QUOTA_FLAG_OUT_OF_DATE)
- goto set_done;
- /*
- * If quota tracking is neither requested, nor enabled and there are no
- * pending deletes, no need to mark the quotas out of date.
- */
- if (!(qce->flags & (QUOTA_FLAG_TRACKING_ENABLED |
- QUOTA_FLAG_TRACKING_REQUESTED |
- QUOTA_FLAG_PENDING_DELETES)))
- goto set_done;
- /*
- * Set the QUOTA_FLAG_OUT_OF_DATE bit thus marking quotas out of date.
- * This is verified on WinXP to be sufficient to cause windows to
- * rescan the volume on boot and update all quota entries.
- */
- qce->flags |= QUOTA_FLAG_OUT_OF_DATE;
- /* Ensure the modified flags are written to disk. */
- ntfs_index_entry_flush_dcache_page(ictx);
- ntfs_index_entry_mark_dirty(ictx);
-set_done:
- ntfs_index_ctx_put(ictx);
- inode_unlock(vol->quota_q_ino);
- /*
- * We set the flag so we do not try to mark the quotas out of date
- * again on remount.
- */
- NVolSetQuotaOutOfDate(vol);
-done:
- ntfs_debug("Done.");
- return true;
-err_out:
- if (ictx)
- ntfs_index_ctx_put(ictx);
- inode_unlock(vol->quota_q_ino);
- return false;
-}
-
-#endif /* NTFS_RW */
diff --git a/fs/ntfs/quota.h b/fs/ntfs/quota.h
deleted file mode 100644
index fe3132a3d6d2..000000000000
--- a/fs/ntfs/quota.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * quota.h - Defines for NTFS kernel quota ($Quota) handling. Part of the
- * Linux-NTFS project.
- *
- * Copyright (c) 2004 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_QUOTA_H
-#define _LINUX_NTFS_QUOTA_H
-
-#ifdef NTFS_RW
-
-#include "types.h"
-#include "volume.h"
-
-extern bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol);
-
-#endif /* NTFS_RW */
-
-#endif /* _LINUX_NTFS_QUOTA_H */
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
deleted file mode 100644
index 0d448e9881f7..000000000000
--- a/fs/ntfs/runlist.c
+++ /dev/null
@@ -1,1893 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2007 Anton Altaparmakov
- * Copyright (c) 2002-2005 Richard Russon
- */
-
-#include "debug.h"
-#include "dir.h"
-#include "endian.h"
-#include "malloc.h"
-#include "ntfs.h"
-
-/**
- * ntfs_rl_mm - runlist memmove
- *
- * It is up to the caller to serialize access to the runlist @base.
- */
-static inline void ntfs_rl_mm(runlist_element *base, int dst, int src,
- int size)
-{
- if (likely((dst != src) && (size > 0)))
- memmove(base + dst, base + src, size * sizeof(*base));
-}
-
-/**
- * ntfs_rl_mc - runlist memory copy
- *
- * It is up to the caller to serialize access to the runlists @dstbase and
- * @srcbase.
- */
-static inline void ntfs_rl_mc(runlist_element *dstbase, int dst,
- runlist_element *srcbase, int src, int size)
-{
- if (likely(size > 0))
- memcpy(dstbase + dst, srcbase + src, size * sizeof(*dstbase));
-}
-
-/**
- * ntfs_rl_realloc - Reallocate memory for runlists
- * @rl: original runlist
- * @old_size: number of runlist elements in the original runlist @rl
- * @new_size: number of runlist elements we need space for
- *
- * As the runlists grow, more memory will be required. To prevent the
- * kernel having to allocate and reallocate large numbers of small bits of
- * memory, this function returns an entire page of memory.
- *
- * It is up to the caller to serialize access to the runlist @rl.
- *
- * N.B. If the new allocation doesn't require a different number of pages in
- * memory, the function will return the original pointer.
- *
- * On success, return a pointer to the newly allocated, or recycled, memory.
- * On error, return -errno. The following error codes are defined:
- * -ENOMEM - Not enough memory to allocate runlist array.
- * -EINVAL - Invalid parameters were passed in.
- */
-static inline runlist_element *ntfs_rl_realloc(runlist_element *rl,
- int old_size, int new_size)
-{
- runlist_element *new_rl;
-
- old_size = PAGE_ALIGN(old_size * sizeof(*rl));
- new_size = PAGE_ALIGN(new_size * sizeof(*rl));
- if (old_size == new_size)
- return rl;
-
- new_rl = ntfs_malloc_nofs(new_size);
- if (unlikely(!new_rl))
- return ERR_PTR(-ENOMEM);
-
- if (likely(rl != NULL)) {
- if (unlikely(old_size > new_size))
- old_size = new_size;
- memcpy(new_rl, rl, old_size);
- ntfs_free(rl);
- }
- return new_rl;
-}
-
-/**
- * ntfs_rl_realloc_nofail - Reallocate memory for runlists
- * @rl: original runlist
- * @old_size: number of runlist elements in the original runlist @rl
- * @new_size: number of runlist elements we need space for
- *
- * As the runlists grow, more memory will be required. To prevent the
- * kernel having to allocate and reallocate large numbers of small bits of
- * memory, this function returns an entire page of memory.
- *
- * This function guarantees that the allocation will succeed. It will sleep
- * for as long as it takes to complete the allocation.
- *
- * It is up to the caller to serialize access to the runlist @rl.
- *
- * N.B. If the new allocation doesn't require a different number of pages in
- * memory, the function will return the original pointer.
- *
- * On success, return a pointer to the newly allocated, or recycled, memory.
- * On error, return -errno. The following error codes are defined:
- * -ENOMEM - Not enough memory to allocate runlist array.
- * -EINVAL - Invalid parameters were passed in.
- */
-static inline runlist_element *ntfs_rl_realloc_nofail(runlist_element *rl,
- int old_size, int new_size)
-{
- runlist_element *new_rl;
-
- old_size = PAGE_ALIGN(old_size * sizeof(*rl));
- new_size = PAGE_ALIGN(new_size * sizeof(*rl));
- if (old_size == new_size)
- return rl;
-
- new_rl = ntfs_malloc_nofs_nofail(new_size);
- BUG_ON(!new_rl);
-
- if (likely(rl != NULL)) {
- if (unlikely(old_size > new_size))
- old_size = new_size;
- memcpy(new_rl, rl, old_size);
- ntfs_free(rl);
- }
- return new_rl;
-}
-
-/**
- * ntfs_are_rl_mergeable - test if two runlists can be joined together
- * @dst: original runlist
- * @src: new runlist to test for mergeability with @dst
- *
- * Test if two runlists can be joined together. For this, their VCNs and LCNs
- * must be adjacent.
- *
- * It is up to the caller to serialize access to the runlists @dst and @src.
- *
- * Return: true Success, the runlists can be merged.
- * false Failure, the runlists cannot be merged.
- */
-static inline bool ntfs_are_rl_mergeable(runlist_element *dst,
- runlist_element *src)
-{
- BUG_ON(!dst);
- BUG_ON(!src);
-
- /* We can merge unmapped regions even if they are misaligned. */
- if ((dst->lcn == LCN_RL_NOT_MAPPED) && (src->lcn == LCN_RL_NOT_MAPPED))
- return true;
- /* If the runs are misaligned, we cannot merge them. */
- if ((dst->vcn + dst->length) != src->vcn)
- return false;
- /* If both runs are non-sparse and contiguous, we can merge them. */
- if ((dst->lcn >= 0) && (src->lcn >= 0) &&
- ((dst->lcn + dst->length) == src->lcn))
- return true;
- /* If we are merging two holes, we can merge them. */
- if ((dst->lcn == LCN_HOLE) && (src->lcn == LCN_HOLE))
- return true;
- /* Cannot merge. */
- return false;
-}
-
-/**
- * __ntfs_rl_merge - merge two runlists without testing if they can be merged
- * @dst: original, destination runlist
- * @src: new runlist to merge with @dst
- *
- * Merge the two runlists, writing into the destination runlist @dst. The
- * caller must make sure the runlists can be merged or this will corrupt the
- * destination runlist.
- *
- * It is up to the caller to serialize access to the runlists @dst and @src.
- */
-static inline void __ntfs_rl_merge(runlist_element *dst, runlist_element *src)
-{
- dst->length += src->length;
-}
-
-/**
- * ntfs_rl_append - append a runlist after a given element
- * @dst: original runlist to be worked on
- * @dsize: number of elements in @dst (including end marker)
- * @src: runlist to be inserted into @dst
- * @ssize: number of elements in @src (excluding end marker)
- * @loc: append the new runlist @src after this element in @dst
- *
- * Append the runlist @src after element @loc in @dst. Merge the right end of
- * the new runlist, if necessary. Adjust the size of the hole before the
- * appended runlist.
- *
- * It is up to the caller to serialize access to the runlists @dst and @src.
- *
- * On success, return a pointer to the new, combined, runlist. Note, both
- * runlists @dst and @src are deallocated before returning so you cannot use
- * the pointers for anything any more. (Strictly speaking the returned runlist
- * may be the same as @dst but this is irrelevant.)
- *
- * On error, return -errno. Both runlists are left unmodified. The following
- * error codes are defined:
- * -ENOMEM - Not enough memory to allocate runlist array.
- * -EINVAL - Invalid parameters were passed in.
- */
-static inline runlist_element *ntfs_rl_append(runlist_element *dst,
- int dsize, runlist_element *src, int ssize, int loc)
-{
- bool right = false; /* Right end of @src needs merging. */
- int marker; /* End of the inserted runs. */
-
- BUG_ON(!dst);
- BUG_ON(!src);
-
- /* First, check if the right hand end needs merging. */
- if ((loc + 1) < dsize)
- right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
-
- /* Space required: @dst size + @src size, less one if we merged. */
- dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right);
- if (IS_ERR(dst))
- return dst;
- /*
- * We are guaranteed to succeed from here so can start modifying the
- * original runlists.
- */
-
- /* First, merge the right hand end, if necessary. */
- if (right)
- __ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
-
- /* First run after the @src runs that have been inserted. */
- marker = loc + ssize + 1;
-
- /* Move the tail of @dst out of the way, then copy in @src. */
- ntfs_rl_mm(dst, marker, loc + 1 + right, dsize - (loc + 1 + right));
- ntfs_rl_mc(dst, loc + 1, src, 0, ssize);
-
- /* Adjust the size of the preceding hole. */
- dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
-
- /* We may have changed the length of the file, so fix the end marker */
- if (dst[marker].lcn == LCN_ENOENT)
- dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
-
- return dst;
-}
-
-/**
- * ntfs_rl_insert - insert a runlist into another
- * @dst: original runlist to be worked on
- * @dsize: number of elements in @dst (including end marker)
- * @src: new runlist to be inserted
- * @ssize: number of elements in @src (excluding end marker)
- * @loc: insert the new runlist @src before this element in @dst
- *
- * Insert the runlist @src before element @loc in the runlist @dst. Merge the
- * left end of the new runlist, if necessary. Adjust the size of the hole
- * after the inserted runlist.
- *
- * It is up to the caller to serialize access to the runlists @dst and @src.
- *
- * On success, return a pointer to the new, combined, runlist. Note, both
- * runlists @dst and @src are deallocated before returning so you cannot use
- * the pointers for anything any more. (Strictly speaking the returned runlist
- * may be the same as @dst but this is irrelevant.)
- *
- * On error, return -errno. Both runlists are left unmodified. The following
- * error codes are defined:
- * -ENOMEM - Not enough memory to allocate runlist array.
- * -EINVAL - Invalid parameters were passed in.
- */
-static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
- int dsize, runlist_element *src, int ssize, int loc)
-{
- bool left = false; /* Left end of @src needs merging. */
- bool disc = false; /* Discontinuity between @dst and @src. */
- int marker; /* End of the inserted runs. */
-
- BUG_ON(!dst);
- BUG_ON(!src);
-
- /*
- * disc => Discontinuity between the end of @dst and the start of @src.
- * This means we might need to insert a "not mapped" run.
- */
- if (loc == 0)
- disc = (src[0].vcn > 0);
- else {
- s64 merged_length;
-
- left = ntfs_are_rl_mergeable(dst + loc - 1, src);
-
- merged_length = dst[loc - 1].length;
- if (left)
- merged_length += src->length;
-
- disc = (src[0].vcn > dst[loc - 1].vcn + merged_length);
- }
- /*
- * Space required: @dst size + @src size, less one if we merged, plus
- * one if there was a discontinuity.
- */
- dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc);
- if (IS_ERR(dst))
- return dst;
- /*
- * We are guaranteed to succeed from here so can start modifying the
- * original runlist.
- */
- if (left)
- __ntfs_rl_merge(dst + loc - 1, src);
- /*
- * First run after the @src runs that have been inserted.
- * Nominally, @marker equals @loc + @ssize, i.e. location + number of
- * runs in @src. However, if @left, then the first run in @src has
- * been merged with one in @dst. And if @disc, then @dst and @src do
- * not meet and we need an extra run to fill the gap.
- */
- marker = loc + ssize - left + disc;
-
- /* Move the tail of @dst out of the way, then copy in @src. */
- ntfs_rl_mm(dst, marker, loc, dsize - loc);
- ntfs_rl_mc(dst, loc + disc, src, left, ssize - left);
-
- /* Adjust the VCN of the first run after the insertion... */
- dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
- /* ... and the length. */
- if (dst[marker].lcn == LCN_HOLE || dst[marker].lcn == LCN_RL_NOT_MAPPED)
- dst[marker].length = dst[marker + 1].vcn - dst[marker].vcn;
-
- /* Writing beyond the end of the file and there is a discontinuity. */
- if (disc) {
- if (loc > 0) {
- dst[loc].vcn = dst[loc - 1].vcn + dst[loc - 1].length;
- dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
- } else {
- dst[loc].vcn = 0;
- dst[loc].length = dst[loc + 1].vcn;
- }
- dst[loc].lcn = LCN_RL_NOT_MAPPED;
- }
- return dst;
-}
-
-/**
- * ntfs_rl_replace - overwrite a runlist element with another runlist
- * @dst: original runlist to be worked on
- * @dsize: number of elements in @dst (including end marker)
- * @src: new runlist to be inserted
- * @ssize: number of elements in @src (excluding end marker)
- * @loc: index in runlist @dst to overwrite with @src
- *
- * Replace the runlist element @dst at @loc with @src. Merge the left and
- * right ends of the inserted runlist, if necessary.
- *
- * It is up to the caller to serialize access to the runlists @dst and @src.
- *
- * On success, return a pointer to the new, combined, runlist. Note, both
- * runlists @dst and @src are deallocated before returning so you cannot use
- * the pointers for anything any more. (Strictly speaking the returned runlist
- * may be the same as @dst but this is irrelevant.)
- *
- * On error, return -errno. Both runlists are left unmodified. The following
- * error codes are defined:
- * -ENOMEM - Not enough memory to allocate runlist array.
- * -EINVAL - Invalid parameters were passed in.
- */
-static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
- int dsize, runlist_element *src, int ssize, int loc)
-{
- signed delta;
- bool left = false; /* Left end of @src needs merging. */
- bool right = false; /* Right end of @src needs merging. */
- int tail; /* Start of tail of @dst. */
- int marker; /* End of the inserted runs. */
-
- BUG_ON(!dst);
- BUG_ON(!src);
-
- /* First, see if the left and right ends need merging. */
- if ((loc + 1) < dsize)
- right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
- if (loc > 0)
- left = ntfs_are_rl_mergeable(dst + loc - 1, src);
- /*
- * Allocate some space. We will need less if the left, right, or both
- * ends get merged. The -1 accounts for the run being replaced.
- */
- delta = ssize - 1 - left - right;
- if (delta > 0) {
- dst = ntfs_rl_realloc(dst, dsize, dsize + delta);
- if (IS_ERR(dst))
- return dst;
- }
- /*
- * We are guaranteed to succeed from here so can start modifying the
- * original runlists.
- */
-
- /* First, merge the left and right ends, if necessary. */
- if (right)
- __ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
- if (left)
- __ntfs_rl_merge(dst + loc - 1, src);
- /*
- * Offset of the tail of @dst. This needs to be moved out of the way
- * to make space for the runs to be copied from @src, i.e. the first
- * run of the tail of @dst.
- * Nominally, @tail equals @loc + 1, i.e. location, skipping the
- * replaced run. However, if @right, then one of @dst's runs is
- * already merged into @src.
- */
- tail = loc + right + 1;
- /*
- * First run after the @src runs that have been inserted, i.e. where
- * the tail of @dst needs to be moved to.
- * Nominally, @marker equals @loc + @ssize, i.e. location + number of
- * runs in @src. However, if @left, then the first run in @src has
- * been merged with one in @dst.
- */
- marker = loc + ssize - left;
-
- /* Move the tail of @dst out of the way, then copy in @src. */
- ntfs_rl_mm(dst, marker, tail, dsize - tail);
- ntfs_rl_mc(dst, loc, src, left, ssize - left);
-
- /* We may have changed the length of the file, so fix the end marker. */
- if (dsize - tail > 0 && dst[marker].lcn == LCN_ENOENT)
- dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
- return dst;
-}
-
-/**
- * ntfs_rl_split - insert a runlist into the centre of a hole
- * @dst: original runlist to be worked on
- * @dsize: number of elements in @dst (including end marker)
- * @src: new runlist to be inserted
- * @ssize: number of elements in @src (excluding end marker)
- * @loc: index in runlist @dst at which to split and insert @src
- *
- * Split the runlist @dst at @loc into two and insert @new in between the two
- * fragments. No merging of runlists is necessary. Adjust the size of the
- * holes either side.
- *
- * It is up to the caller to serialize access to the runlists @dst and @src.
- *
- * On success, return a pointer to the new, combined, runlist. Note, both
- * runlists @dst and @src are deallocated before returning so you cannot use
- * the pointers for anything any more. (Strictly speaking the returned runlist
- * may be the same as @dst but this is irrelevant.)
- *
- * On error, return -errno. Both runlists are left unmodified. The following
- * error codes are defined:
- * -ENOMEM - Not enough memory to allocate runlist array.
- * -EINVAL - Invalid parameters were passed in.
- */
-static inline runlist_element *ntfs_rl_split(runlist_element *dst, int dsize,
- runlist_element *src, int ssize, int loc)
-{
- BUG_ON(!dst);
- BUG_ON(!src);
-
- /* Space required: @dst size + @src size + one new hole. */
- dst = ntfs_rl_realloc(dst, dsize, dsize + ssize + 1);
- if (IS_ERR(dst))
- return dst;
- /*
- * We are guaranteed to succeed from here so can start modifying the
- * original runlists.
- */
-
- /* Move the tail of @dst out of the way, then copy in @src. */
- ntfs_rl_mm(dst, loc + 1 + ssize, loc, dsize - loc);
- ntfs_rl_mc(dst, loc + 1, src, 0, ssize);
-
- /* Adjust the size of the holes either size of @src. */
- dst[loc].length = dst[loc+1].vcn - dst[loc].vcn;
- dst[loc+ssize+1].vcn = dst[loc+ssize].vcn + dst[loc+ssize].length;
- dst[loc+ssize+1].length = dst[loc+ssize+2].vcn - dst[loc+ssize+1].vcn;
-
- return dst;
-}
-
-/**
- * ntfs_runlists_merge - merge two runlists into one
- * @drl: original runlist to be worked on
- * @srl: new runlist to be merged into @drl
- *
- * First we sanity check the two runlists @srl and @drl to make sure that they
- * are sensible and can be merged. The runlist @srl must be either after the
- * runlist @drl or completely within a hole (or unmapped region) in @drl.
- *
- * It is up to the caller to serialize access to the runlists @drl and @srl.
- *
- * Merging of runlists is necessary in two cases:
- * 1. When attribute lists are used and a further extent is being mapped.
- * 2. When new clusters are allocated to fill a hole or extend a file.
- *
- * There are four possible ways @srl can be merged. It can:
- * - be inserted at the beginning of a hole,
- * - split the hole in two and be inserted between the two fragments,
- * - be appended at the end of a hole, or it can
- * - replace the whole hole.
- * It can also be appended to the end of the runlist, which is just a variant
- * of the insert case.
- *
- * On success, return a pointer to the new, combined, runlist. Note, both
- * runlists @drl and @srl are deallocated before returning so you cannot use
- * the pointers for anything any more. (Strictly speaking the returned runlist
- * may be the same as @dst but this is irrelevant.)
- *
- * On error, return -errno. Both runlists are left unmodified. The following
- * error codes are defined:
- * -ENOMEM - Not enough memory to allocate runlist array.
- * -EINVAL - Invalid parameters were passed in.
- * -ERANGE - The runlists overlap and cannot be merged.
- */
-runlist_element *ntfs_runlists_merge(runlist_element *drl,
- runlist_element *srl)
-{
- int di, si; /* Current index into @[ds]rl. */
- int sstart; /* First index with lcn > LCN_RL_NOT_MAPPED. */
- int dins; /* Index into @drl at which to insert @srl. */
- int dend, send; /* Last index into @[ds]rl. */
- int dfinal, sfinal; /* The last index into @[ds]rl with
- lcn >= LCN_HOLE. */
- int marker = 0;
- VCN marker_vcn = 0;
-
-#ifdef DEBUG
- ntfs_debug("dst:");
- ntfs_debug_dump_runlist(drl);
- ntfs_debug("src:");
- ntfs_debug_dump_runlist(srl);
-#endif
-
- /* Check for silly calling... */
- if (unlikely(!srl))
- return drl;
- if (IS_ERR(srl) || IS_ERR(drl))
- return ERR_PTR(-EINVAL);
-
- /* Check for the case where the first mapping is being done now. */
- if (unlikely(!drl)) {
- drl = srl;
- /* Complete the source runlist if necessary. */
- if (unlikely(drl[0].vcn)) {
- /* Scan to the end of the source runlist. */
- for (dend = 0; likely(drl[dend].length); dend++)
- ;
- dend++;
- drl = ntfs_rl_realloc(drl, dend, dend + 1);
- if (IS_ERR(drl))
- return drl;
- /* Insert start element at the front of the runlist. */
- ntfs_rl_mm(drl, 1, 0, dend);
- drl[0].vcn = 0;
- drl[0].lcn = LCN_RL_NOT_MAPPED;
- drl[0].length = drl[1].vcn;
- }
- goto finished;
- }
-
- si = di = 0;
-
- /* Skip any unmapped start element(s) in the source runlist. */
- while (srl[si].length && srl[si].lcn < LCN_HOLE)
- si++;
-
- /* Can't have an entirely unmapped source runlist. */
- BUG_ON(!srl[si].length);
-
- /* Record the starting points. */
- sstart = si;
-
- /*
- * Skip forward in @drl until we reach the position where @srl needs to
- * be inserted. If we reach the end of @drl, @srl just needs to be
- * appended to @drl.
- */
- for (; drl[di].length; di++) {
- if (drl[di].vcn + drl[di].length > srl[sstart].vcn)
- break;
- }
- dins = di;
-
- /* Sanity check for illegal overlaps. */
- if ((drl[di].vcn == srl[si].vcn) && (drl[di].lcn >= 0) &&
- (srl[si].lcn >= 0)) {
- ntfs_error(NULL, "Run lists overlap. Cannot merge!");
- return ERR_PTR(-ERANGE);
- }
-
- /* Scan to the end of both runlists in order to know their sizes. */
- for (send = si; srl[send].length; send++)
- ;
- for (dend = di; drl[dend].length; dend++)
- ;
-
- if (srl[send].lcn == LCN_ENOENT)
- marker_vcn = srl[marker = send].vcn;
-
- /* Scan to the last element with lcn >= LCN_HOLE. */
- for (sfinal = send; sfinal >= 0 && srl[sfinal].lcn < LCN_HOLE; sfinal--)
- ;
- for (dfinal = dend; dfinal >= 0 && drl[dfinal].lcn < LCN_HOLE; dfinal--)
- ;
-
- {
- bool start;
- bool finish;
- int ds = dend + 1; /* Number of elements in drl & srl */
- int ss = sfinal - sstart + 1;
-
- start = ((drl[dins].lcn < LCN_RL_NOT_MAPPED) || /* End of file */
- (drl[dins].vcn == srl[sstart].vcn)); /* Start of hole */
- finish = ((drl[dins].lcn >= LCN_RL_NOT_MAPPED) && /* End of file */
- ((drl[dins].vcn + drl[dins].length) <= /* End of hole */
- (srl[send - 1].vcn + srl[send - 1].length)));
-
- /* Or we will lose an end marker. */
- if (finish && !drl[dins].length)
- ss++;
- if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn))
- finish = false;
-#if 0
- ntfs_debug("dfinal = %i, dend = %i", dfinal, dend);
- ntfs_debug("sstart = %i, sfinal = %i, send = %i", sstart, sfinal, send);
- ntfs_debug("start = %i, finish = %i", start, finish);
- ntfs_debug("ds = %i, ss = %i, dins = %i", ds, ss, dins);
-#endif
- if (start) {
- if (finish)
- drl = ntfs_rl_replace(drl, ds, srl + sstart, ss, dins);
- else
- drl = ntfs_rl_insert(drl, ds, srl + sstart, ss, dins);
- } else {
- if (finish)
- drl = ntfs_rl_append(drl, ds, srl + sstart, ss, dins);
- else
- drl = ntfs_rl_split(drl, ds, srl + sstart, ss, dins);
- }
- if (IS_ERR(drl)) {
- ntfs_error(NULL, "Merge failed.");
- return drl;
- }
- ntfs_free(srl);
- if (marker) {
- ntfs_debug("Triggering marker code.");
- for (ds = dend; drl[ds].length; ds++)
- ;
- /* We only need to care if @srl ended after @drl. */
- if (drl[ds].vcn <= marker_vcn) {
- int slots = 0;
-
- if (drl[ds].vcn == marker_vcn) {
- ntfs_debug("Old marker = 0x%llx, replacing "
- "with LCN_ENOENT.",
- (unsigned long long)
- drl[ds].lcn);
- drl[ds].lcn = LCN_ENOENT;
- goto finished;
- }
- /*
- * We need to create an unmapped runlist element in
- * @drl or extend an existing one before adding the
- * ENOENT terminator.
- */
- if (drl[ds].lcn == LCN_ENOENT) {
- ds--;
- slots = 1;
- }
- if (drl[ds].lcn != LCN_RL_NOT_MAPPED) {
- /* Add an unmapped runlist element. */
- if (!slots) {
- drl = ntfs_rl_realloc_nofail(drl, ds,
- ds + 2);
- slots = 2;
- }
- ds++;
- /* Need to set vcn if it isn't set already. */
- if (slots != 1)
- drl[ds].vcn = drl[ds - 1].vcn +
- drl[ds - 1].length;
- drl[ds].lcn = LCN_RL_NOT_MAPPED;
- /* We now used up a slot. */
- slots--;
- }
- drl[ds].length = marker_vcn - drl[ds].vcn;
- /* Finally add the ENOENT terminator. */
- ds++;
- if (!slots)
- drl = ntfs_rl_realloc_nofail(drl, ds, ds + 1);
- drl[ds].vcn = marker_vcn;
- drl[ds].lcn = LCN_ENOENT;
- drl[ds].length = (s64)0;
- }
- }
- }
-
-finished:
- /* The merge was completed successfully. */
- ntfs_debug("Merged runlist:");
- ntfs_debug_dump_runlist(drl);
- return drl;
-}
-
-/**
- * ntfs_mapping_pairs_decompress - convert mapping pairs array to runlist
- * @vol: ntfs volume on which the attribute resides
- * @attr: attribute record whose mapping pairs array to decompress
- * @old_rl: optional runlist in which to insert @attr's runlist
- *
- * It is up to the caller to serialize access to the runlist @old_rl.
- *
- * Decompress the attribute @attr's mapping pairs array into a runlist. On
- * success, return the decompressed runlist.
- *
- * If @old_rl is not NULL, decompressed runlist is inserted into the
- * appropriate place in @old_rl and the resultant, combined runlist is
- * returned. The original @old_rl is deallocated.
- *
- * On error, return -errno. @old_rl is left unmodified in that case.
- *
- * The following error codes are defined:
- * -ENOMEM - Not enough memory to allocate runlist array.
- * -EIO - Corrupt runlist.
- * -EINVAL - Invalid parameters were passed in.
- * -ERANGE - The two runlists overlap.
- *
- * FIXME: For now we take the conceptionally simplest approach of creating the
- * new runlist disregarding the already existing one and then splicing the
- * two into one, if that is possible (we check for overlap and discard the new
- * runlist if overlap present before returning ERR_PTR(-ERANGE)).
- */
-runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol,
- const ATTR_RECORD *attr, runlist_element *old_rl)
-{
- VCN vcn; /* Current vcn. */
- LCN lcn; /* Current lcn. */
- s64 deltaxcn; /* Change in [vl]cn. */
- runlist_element *rl; /* The output runlist. */
- u8 *buf; /* Current position in mapping pairs array. */
- u8 *attr_end; /* End of attribute. */
- int rlsize; /* Size of runlist buffer. */
- u16 rlpos; /* Current runlist position in units of
- runlist_elements. */
- u8 b; /* Current byte offset in buf. */
-
-#ifdef DEBUG
- /* Make sure attr exists and is non-resident. */
- if (!attr || !attr->non_resident || sle64_to_cpu(
- attr->data.non_resident.lowest_vcn) < (VCN)0) {
- ntfs_error(vol->sb, "Invalid arguments.");
- return ERR_PTR(-EINVAL);
- }
-#endif
- /* Start at vcn = lowest_vcn and lcn 0. */
- vcn = sle64_to_cpu(attr->data.non_resident.lowest_vcn);
- lcn = 0;
- /* Get start of the mapping pairs array. */
- buf = (u8*)attr + le16_to_cpu(
- attr->data.non_resident.mapping_pairs_offset);
- attr_end = (u8*)attr + le32_to_cpu(attr->length);
- if (unlikely(buf < (u8*)attr || buf > attr_end)) {
- ntfs_error(vol->sb, "Corrupt attribute.");
- return ERR_PTR(-EIO);
- }
- /* If the mapping pairs array is valid but empty, nothing to do. */
- if (!vcn && !*buf)
- return old_rl;
- /* Current position in runlist array. */
- rlpos = 0;
- /* Allocate first page and set current runlist size to one page. */
- rl = ntfs_malloc_nofs(rlsize = PAGE_SIZE);
- if (unlikely(!rl))
- return ERR_PTR(-ENOMEM);
- /* Insert unmapped starting element if necessary. */
- if (vcn) {
- rl->vcn = 0;
- rl->lcn = LCN_RL_NOT_MAPPED;
- rl->length = vcn;
- rlpos++;
- }
- while (buf < attr_end && *buf) {
- /*
- * Allocate more memory if needed, including space for the
- * not-mapped and terminator elements. ntfs_malloc_nofs()
- * operates on whole pages only.
- */
- if (((rlpos + 3) * sizeof(*old_rl)) > rlsize) {
- runlist_element *rl2;
-
- rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE);
- if (unlikely(!rl2)) {
- ntfs_free(rl);
- return ERR_PTR(-ENOMEM);
- }
- memcpy(rl2, rl, rlsize);
- ntfs_free(rl);
- rl = rl2;
- rlsize += PAGE_SIZE;
- }
- /* Enter the current vcn into the current runlist element. */
- rl[rlpos].vcn = vcn;
- /*
- * Get the change in vcn, i.e. the run length in clusters.
- * Doing it this way ensures that we signextend negative values.
- * A negative run length doesn't make any sense, but hey, I
- * didn't make up the NTFS specs and Windows NT4 treats the run
- * length as a signed value so that's how it is...
- */
- b = *buf & 0xf;
- if (b) {
- if (unlikely(buf + b > attr_end))
- goto io_error;
- for (deltaxcn = (s8)buf[b--]; b; b--)
- deltaxcn = (deltaxcn << 8) + buf[b];
- } else { /* The length entry is compulsory. */
- ntfs_error(vol->sb, "Missing length entry in mapping "
- "pairs array.");
- deltaxcn = (s64)-1;
- }
- /*
- * Assume a negative length to indicate data corruption and
- * hence clean-up and return NULL.
- */
- if (unlikely(deltaxcn < 0)) {
- ntfs_error(vol->sb, "Invalid length in mapping pairs "
- "array.");
- goto err_out;
- }
- /*
- * Enter the current run length into the current runlist
- * element.
- */
- rl[rlpos].length = deltaxcn;
- /* Increment the current vcn by the current run length. */
- vcn += deltaxcn;
- /*
- * There might be no lcn change at all, as is the case for
- * sparse clusters on NTFS 3.0+, in which case we set the lcn
- * to LCN_HOLE.
- */
- if (!(*buf & 0xf0))
- rl[rlpos].lcn = LCN_HOLE;
- else {
- /* Get the lcn change which really can be negative. */
- u8 b2 = *buf & 0xf;
- b = b2 + ((*buf >> 4) & 0xf);
- if (buf + b > attr_end)
- goto io_error;
- for (deltaxcn = (s8)buf[b--]; b > b2; b--)
- deltaxcn = (deltaxcn << 8) + buf[b];
- /* Change the current lcn to its new value. */
- lcn += deltaxcn;
-#ifdef DEBUG
- /*
- * On NTFS 1.2-, apparently can have lcn == -1 to
- * indicate a hole. But we haven't verified ourselves
- * whether it is really the lcn or the deltaxcn that is
- * -1. So if either is found give us a message so we
- * can investigate it further!
- */
- if (vol->major_ver < 3) {
- if (unlikely(deltaxcn == (LCN)-1))
- ntfs_error(vol->sb, "lcn delta == -1");
- if (unlikely(lcn == (LCN)-1))
- ntfs_error(vol->sb, "lcn == -1");
- }
-#endif
- /* Check lcn is not below -1. */
- if (unlikely(lcn < (LCN)-1)) {
- ntfs_error(vol->sb, "Invalid LCN < -1 in "
- "mapping pairs array.");
- goto err_out;
- }
- /* Enter the current lcn into the runlist element. */
- rl[rlpos].lcn = lcn;
- }
- /* Get to the next runlist element. */
- rlpos++;
- /* Increment the buffer position to the next mapping pair. */
- buf += (*buf & 0xf) + ((*buf >> 4) & 0xf) + 1;
- }
- if (unlikely(buf >= attr_end))
- goto io_error;
- /*
- * If there is a highest_vcn specified, it must be equal to the final
- * vcn in the runlist - 1, or something has gone badly wrong.
- */
- deltaxcn = sle64_to_cpu(attr->data.non_resident.highest_vcn);
- if (unlikely(deltaxcn && vcn - 1 != deltaxcn)) {
-mpa_err:
- ntfs_error(vol->sb, "Corrupt mapping pairs array in "
- "non-resident attribute.");
- goto err_out;
- }
- /* Setup not mapped runlist element if this is the base extent. */
- if (!attr->data.non_resident.lowest_vcn) {
- VCN max_cluster;
-
- max_cluster = ((sle64_to_cpu(
- attr->data.non_resident.allocated_size) +
- vol->cluster_size - 1) >>
- vol->cluster_size_bits) - 1;
- /*
- * A highest_vcn of zero means this is a single extent
- * attribute so simply terminate the runlist with LCN_ENOENT).
- */
- if (deltaxcn) {
- /*
- * If there is a difference between the highest_vcn and
- * the highest cluster, the runlist is either corrupt
- * or, more likely, there are more extents following
- * this one.
- */
- if (deltaxcn < max_cluster) {
- ntfs_debug("More extents to follow; deltaxcn "
- "= 0x%llx, max_cluster = "
- "0x%llx",
- (unsigned long long)deltaxcn,
- (unsigned long long)
- max_cluster);
- rl[rlpos].vcn = vcn;
- vcn += rl[rlpos].length = max_cluster -
- deltaxcn;
- rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
- rlpos++;
- } else if (unlikely(deltaxcn > max_cluster)) {
- ntfs_error(vol->sb, "Corrupt attribute. "
- "deltaxcn = 0x%llx, "
- "max_cluster = 0x%llx",
- (unsigned long long)deltaxcn,
- (unsigned long long)
- max_cluster);
- goto mpa_err;
- }
- }
- rl[rlpos].lcn = LCN_ENOENT;
- } else /* Not the base extent. There may be more extents to follow. */
- rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
-
- /* Setup terminating runlist element. */
- rl[rlpos].vcn = vcn;
- rl[rlpos].length = (s64)0;
- /* If no existing runlist was specified, we are done. */
- if (!old_rl) {
- ntfs_debug("Mapping pairs array successfully decompressed:");
- ntfs_debug_dump_runlist(rl);
- return rl;
- }
- /* Now combine the new and old runlists checking for overlaps. */
- old_rl = ntfs_runlists_merge(old_rl, rl);
- if (!IS_ERR(old_rl))
- return old_rl;
- ntfs_free(rl);
- ntfs_error(vol->sb, "Failed to merge runlists.");
- return old_rl;
-io_error:
- ntfs_error(vol->sb, "Corrupt attribute.");
-err_out:
- ntfs_free(rl);
- return ERR_PTR(-EIO);
-}
-
-/**
- * ntfs_rl_vcn_to_lcn - convert a vcn into a lcn given a runlist
- * @rl: runlist to use for conversion
- * @vcn: vcn to convert
- *
- * Convert the virtual cluster number @vcn of an attribute into a logical
- * cluster number (lcn) of a device using the runlist @rl to map vcns to their
- * corresponding lcns.
- *
- * It is up to the caller to serialize access to the runlist @rl.
- *
- * Since lcns must be >= 0, we use negative return codes with special meaning:
- *
- * Return code Meaning / Description
- * ==================================================
- * LCN_HOLE Hole / not allocated on disk.
- * LCN_RL_NOT_MAPPED This is part of the runlist which has not been
- * inserted into the runlist yet.
- * LCN_ENOENT There is no such vcn in the attribute.
- *
- * Locking: - The caller must have locked the runlist (for reading or writing).
- * - This function does not touch the lock, nor does it modify the
- * runlist.
- */
-LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn)
-{
- int i;
-
- BUG_ON(vcn < 0);
- /*
- * If rl is NULL, assume that we have found an unmapped runlist. The
- * caller can then attempt to map it and fail appropriately if
- * necessary.
- */
- if (unlikely(!rl))
- return LCN_RL_NOT_MAPPED;
-
- /* Catch out of lower bounds vcn. */
- if (unlikely(vcn < rl[0].vcn))
- return LCN_ENOENT;
-
- for (i = 0; likely(rl[i].length); i++) {
- if (unlikely(vcn < rl[i+1].vcn)) {
- if (likely(rl[i].lcn >= (LCN)0))
- return rl[i].lcn + (vcn - rl[i].vcn);
- return rl[i].lcn;
- }
- }
- /*
- * The terminator element is setup to the correct value, i.e. one of
- * LCN_HOLE, LCN_RL_NOT_MAPPED, or LCN_ENOENT.
- */
- if (likely(rl[i].lcn < (LCN)0))
- return rl[i].lcn;
- /* Just in case... We could replace this with BUG() some day. */
- return LCN_ENOENT;
-}
-
-#ifdef NTFS_RW
-
-/**
- * ntfs_rl_find_vcn_nolock - find a vcn in a runlist
- * @rl: runlist to search
- * @vcn: vcn to find
- *
- * Find the virtual cluster number @vcn in the runlist @rl and return the
- * address of the runlist element containing the @vcn on success.
- *
- * Return NULL if @rl is NULL or @vcn is in an unmapped part/out of bounds of
- * the runlist.
- *
- * Locking: The runlist must be locked on entry.
- */
-runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl, const VCN vcn)
-{
- BUG_ON(vcn < 0);
- if (unlikely(!rl || vcn < rl[0].vcn))
- return NULL;
- while (likely(rl->length)) {
- if (unlikely(vcn < rl[1].vcn)) {
- if (likely(rl->lcn >= LCN_HOLE))
- return rl;
- return NULL;
- }
- rl++;
- }
- if (likely(rl->lcn == LCN_ENOENT))
- return rl;
- return NULL;
-}
-
-/**
- * ntfs_get_nr_significant_bytes - get number of bytes needed to store a number
- * @n: number for which to get the number of bytes for
- *
- * Return the number of bytes required to store @n unambiguously as
- * a signed number.
- *
- * This is used in the context of the mapping pairs array to determine how
- * many bytes will be needed in the array to store a given logical cluster
- * number (lcn) or a specific run length.
- *
- * Return the number of bytes written. This function cannot fail.
- */
-static inline int ntfs_get_nr_significant_bytes(const s64 n)
-{
- s64 l = n;
- int i;
- s8 j;
-
- i = 0;
- do {
- l >>= 8;
- i++;
- } while (l != 0 && l != -1);
- j = (n >> 8 * (i - 1)) & 0xff;
- /* If the sign bit is wrong, we need an extra byte. */
- if ((n < 0 && j >= 0) || (n > 0 && j < 0))
- i++;
- return i;
-}
-
-/**
- * ntfs_get_size_for_mapping_pairs - get bytes needed for mapping pairs array
- * @vol: ntfs volume (needed for the ntfs version)
- * @rl: locked runlist to determine the size of the mapping pairs of
- * @first_vcn: first vcn which to include in the mapping pairs array
- * @last_vcn: last vcn which to include in the mapping pairs array
- *
- * Walk the locked runlist @rl and calculate the size in bytes of the mapping
- * pairs array corresponding to the runlist @rl, starting at vcn @first_vcn and
- * finishing with vcn @last_vcn.
- *
- * A @last_vcn of -1 means end of runlist and in that case the size of the
- * mapping pairs array corresponding to the runlist starting at vcn @first_vcn
- * and finishing at the end of the runlist is determined.
- *
- * This for example allows us to allocate a buffer of the right size when
- * building the mapping pairs array.
- *
- * If @rl is NULL, just return 1 (for the single terminator byte).
- *
- * Return the calculated size in bytes on success. On error, return -errno.
- * The following error codes are defined:
- * -EINVAL - Run list contains unmapped elements. Make sure to only pass
- * fully mapped runlists to this function.
- * -EIO - The runlist is corrupt.
- *
- * Locking: @rl must be locked on entry (either for reading or writing), it
- * remains locked throughout, and is left locked upon return.
- */
-int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
- const runlist_element *rl, const VCN first_vcn,
- const VCN last_vcn)
-{
- LCN prev_lcn;
- int rls;
- bool the_end = false;
-
- BUG_ON(first_vcn < 0);
- BUG_ON(last_vcn < -1);
- BUG_ON(last_vcn >= 0 && first_vcn > last_vcn);
- if (!rl) {
- BUG_ON(first_vcn);
- BUG_ON(last_vcn > 0);
- return 1;
- }
- /* Skip to runlist element containing @first_vcn. */
- while (rl->length && first_vcn >= rl[1].vcn)
- rl++;
- if (unlikely((!rl->length && first_vcn > rl->vcn) ||
- first_vcn < rl->vcn))
- return -EINVAL;
- prev_lcn = 0;
- /* Always need the termining zero byte. */
- rls = 1;
- /* Do the first partial run if present. */
- if (first_vcn > rl->vcn) {
- s64 delta, length = rl->length;
-
- /* We know rl->length != 0 already. */
- if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
- goto err_out;
- /*
- * If @stop_vcn is given and finishes inside this run, cap the
- * run length.
- */
- if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
- s64 s1 = last_vcn + 1;
- if (unlikely(rl[1].vcn > s1))
- length = s1 - rl->vcn;
- the_end = true;
- }
- delta = first_vcn - rl->vcn;
- /* Header byte + length. */
- rls += 1 + ntfs_get_nr_significant_bytes(length - delta);
- /*
- * If the logical cluster number (lcn) denotes a hole and we
- * are on NTFS 3.0+, we don't store it at all, i.e. we need
- * zero space. On earlier NTFS versions we just store the lcn.
- * Note: this assumes that on NTFS 1.2-, holes are stored with
- * an lcn of -1 and not a delta_lcn of -1 (unless both are -1).
- */
- if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
- prev_lcn = rl->lcn;
- if (likely(rl->lcn >= 0))
- prev_lcn += delta;
- /* Change in lcn. */
- rls += ntfs_get_nr_significant_bytes(prev_lcn);
- }
- /* Go to next runlist element. */
- rl++;
- }
- /* Do the full runs. */
- for (; rl->length && !the_end; rl++) {
- s64 length = rl->length;
-
- if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
- goto err_out;
- /*
- * If @stop_vcn is given and finishes inside this run, cap the
- * run length.
- */
- if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
- s64 s1 = last_vcn + 1;
- if (unlikely(rl[1].vcn > s1))
- length = s1 - rl->vcn;
- the_end = true;
- }
- /* Header byte + length. */
- rls += 1 + ntfs_get_nr_significant_bytes(length);
- /*
- * If the logical cluster number (lcn) denotes a hole and we
- * are on NTFS 3.0+, we don't store it at all, i.e. we need
- * zero space. On earlier NTFS versions we just store the lcn.
- * Note: this assumes that on NTFS 1.2-, holes are stored with
- * an lcn of -1 and not a delta_lcn of -1 (unless both are -1).
- */
- if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
- /* Change in lcn. */
- rls += ntfs_get_nr_significant_bytes(rl->lcn -
- prev_lcn);
- prev_lcn = rl->lcn;
- }
- }
- return rls;
-err_out:
- if (rl->lcn == LCN_RL_NOT_MAPPED)
- rls = -EINVAL;
- else
- rls = -EIO;
- return rls;
-}
-
-/**
- * ntfs_write_significant_bytes - write the significant bytes of a number
- * @dst: destination buffer to write to
- * @dst_max: pointer to last byte of destination buffer for bounds checking
- * @n: number whose significant bytes to write
- *
- * Store in @dst, the minimum bytes of the number @n which are required to
- * identify @n unambiguously as a signed number, taking care not to exceed
- * @dest_max, the maximum position within @dst to which we are allowed to
- * write.
- *
- * This is used when building the mapping pairs array of a runlist to compress
- * a given logical cluster number (lcn) or a specific run length to the minimum
- * size possible.
- *
- * Return the number of bytes written on success. On error, i.e. the
- * destination buffer @dst is too small, return -ENOSPC.
- */
-static inline int ntfs_write_significant_bytes(s8 *dst, const s8 *dst_max,
- const s64 n)
-{
- s64 l = n;
- int i;
- s8 j;
-
- i = 0;
- do {
- if (unlikely(dst > dst_max))
- goto err_out;
- *dst++ = l & 0xffll;
- l >>= 8;
- i++;
- } while (l != 0 && l != -1);
- j = (n >> 8 * (i - 1)) & 0xff;
- /* If the sign bit is wrong, we need an extra byte. */
- if (n < 0 && j >= 0) {
- if (unlikely(dst > dst_max))
- goto err_out;
- i++;
- *dst = (s8)-1;
- } else if (n > 0 && j < 0) {
- if (unlikely(dst > dst_max))
- goto err_out;
- i++;
- *dst = (s8)0;
- }
- return i;
-err_out:
- return -ENOSPC;
-}
-
-/**
- * ntfs_mapping_pairs_build - build the mapping pairs array from a runlist
- * @vol: ntfs volume (needed for the ntfs version)
- * @dst: destination buffer to which to write the mapping pairs array
- * @dst_len: size of destination buffer @dst in bytes
- * @rl: locked runlist for which to build the mapping pairs array
- * @first_vcn: first vcn which to include in the mapping pairs array
- * @last_vcn: last vcn which to include in the mapping pairs array
- * @stop_vcn: first vcn outside destination buffer on success or -ENOSPC
- *
- * Create the mapping pairs array from the locked runlist @rl, starting at vcn
- * @first_vcn and finishing with vcn @last_vcn and save the array in @dst.
- * @dst_len is the size of @dst in bytes and it should be at least equal to the
- * value obtained by calling ntfs_get_size_for_mapping_pairs().
- *
- * A @last_vcn of -1 means end of runlist and in that case the mapping pairs
- * array corresponding to the runlist starting at vcn @first_vcn and finishing
- * at the end of the runlist is created.
- *
- * If @rl is NULL, just write a single terminator byte to @dst.
- *
- * On success or -ENOSPC error, if @stop_vcn is not NULL, *@stop_vcn is set to
- * the first vcn outside the destination buffer. Note that on error, @dst has
- * been filled with all the mapping pairs that will fit, thus it can be treated
- * as partial success, in that a new attribute extent needs to be created or
- * the next extent has to be used and the mapping pairs build has to be
- * continued with @first_vcn set to *@stop_vcn.
- *
- * Return 0 on success and -errno on error. The following error codes are
- * defined:
- * -EINVAL - Run list contains unmapped elements. Make sure to only pass
- * fully mapped runlists to this function.
- * -EIO - The runlist is corrupt.
- * -ENOSPC - The destination buffer is too small.
- *
- * Locking: @rl must be locked on entry (either for reading or writing), it
- * remains locked throughout, and is left locked upon return.
- */
-int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
- const int dst_len, const runlist_element *rl,
- const VCN first_vcn, const VCN last_vcn, VCN *const stop_vcn)
-{
- LCN prev_lcn;
- s8 *dst_max, *dst_next;
- int err = -ENOSPC;
- bool the_end = false;
- s8 len_len, lcn_len;
-
- BUG_ON(first_vcn < 0);
- BUG_ON(last_vcn < -1);
- BUG_ON(last_vcn >= 0 && first_vcn > last_vcn);
- BUG_ON(dst_len < 1);
- if (!rl) {
- BUG_ON(first_vcn);
- BUG_ON(last_vcn > 0);
- if (stop_vcn)
- *stop_vcn = 0;
- /* Terminator byte. */
- *dst = 0;
- return 0;
- }
- /* Skip to runlist element containing @first_vcn. */
- while (rl->length && first_vcn >= rl[1].vcn)
- rl++;
- if (unlikely((!rl->length && first_vcn > rl->vcn) ||
- first_vcn < rl->vcn))
- return -EINVAL;
- /*
- * @dst_max is used for bounds checking in
- * ntfs_write_significant_bytes().
- */
- dst_max = dst + dst_len - 1;
- prev_lcn = 0;
- /* Do the first partial run if present. */
- if (first_vcn > rl->vcn) {
- s64 delta, length = rl->length;
-
- /* We know rl->length != 0 already. */
- if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
- goto err_out;
- /*
- * If @stop_vcn is given and finishes inside this run, cap the
- * run length.
- */
- if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
- s64 s1 = last_vcn + 1;
- if (unlikely(rl[1].vcn > s1))
- length = s1 - rl->vcn;
- the_end = true;
- }
- delta = first_vcn - rl->vcn;
- /* Write length. */
- len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
- length - delta);
- if (unlikely(len_len < 0))
- goto size_err;
- /*
- * If the logical cluster number (lcn) denotes a hole and we
- * are on NTFS 3.0+, we don't store it at all, i.e. we need
- * zero space. On earlier NTFS versions we just write the lcn
- * change. FIXME: Do we need to write the lcn change or just
- * the lcn in that case? Not sure as I have never seen this
- * case on NT4. - We assume that we just need to write the lcn
- * change until someone tells us otherwise... (AIA)
- */
- if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
- prev_lcn = rl->lcn;
- if (likely(rl->lcn >= 0))
- prev_lcn += delta;
- /* Write change in lcn. */
- lcn_len = ntfs_write_significant_bytes(dst + 1 +
- len_len, dst_max, prev_lcn);
- if (unlikely(lcn_len < 0))
- goto size_err;
- } else
- lcn_len = 0;
- dst_next = dst + len_len + lcn_len + 1;
- if (unlikely(dst_next > dst_max))
- goto size_err;
- /* Update header byte. */
- *dst = lcn_len << 4 | len_len;
- /* Position at next mapping pairs array element. */
- dst = dst_next;
- /* Go to next runlist element. */
- rl++;
- }
- /* Do the full runs. */
- for (; rl->length && !the_end; rl++) {
- s64 length = rl->length;
-
- if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
- goto err_out;
- /*
- * If @stop_vcn is given and finishes inside this run, cap the
- * run length.
- */
- if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
- s64 s1 = last_vcn + 1;
- if (unlikely(rl[1].vcn > s1))
- length = s1 - rl->vcn;
- the_end = true;
- }
- /* Write length. */
- len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
- length);
- if (unlikely(len_len < 0))
- goto size_err;
- /*
- * If the logical cluster number (lcn) denotes a hole and we
- * are on NTFS 3.0+, we don't store it at all, i.e. we need
- * zero space. On earlier NTFS versions we just write the lcn
- * change. FIXME: Do we need to write the lcn change or just
- * the lcn in that case? Not sure as I have never seen this
- * case on NT4. - We assume that we just need to write the lcn
- * change until someone tells us otherwise... (AIA)
- */
- if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
- /* Write change in lcn. */
- lcn_len = ntfs_write_significant_bytes(dst + 1 +
- len_len, dst_max, rl->lcn - prev_lcn);
- if (unlikely(lcn_len < 0))
- goto size_err;
- prev_lcn = rl->lcn;
- } else
- lcn_len = 0;
- dst_next = dst + len_len + lcn_len + 1;
- if (unlikely(dst_next > dst_max))
- goto size_err;
- /* Update header byte. */
- *dst = lcn_len << 4 | len_len;
- /* Position at next mapping pairs array element. */
- dst = dst_next;
- }
- /* Success. */
- err = 0;
-size_err:
- /* Set stop vcn. */
- if (stop_vcn)
- *stop_vcn = rl->vcn;
- /* Add terminator byte. */
- *dst = 0;
- return err;
-err_out:
- if (rl->lcn == LCN_RL_NOT_MAPPED)
- err = -EINVAL;
- else
- err = -EIO;
- return err;
-}
-
-/**
- * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn
- * @vol: ntfs volume (needed for error output)
- * @runlist: runlist to truncate
- * @new_length: the new length of the runlist in VCNs
- *
- * Truncate the runlist described by @runlist as well as the memory buffer
- * holding the runlist elements to a length of @new_length VCNs.
- *
- * If @new_length lies within the runlist, the runlist elements with VCNs of
- * @new_length and above are discarded. As a special case if @new_length is
- * zero, the runlist is discarded and set to NULL.
- *
- * If @new_length lies beyond the runlist, a sparse runlist element is added to
- * the end of the runlist @runlist or if the last runlist element is a sparse
- * one already, this is extended.
- *
- * Note, no checking is done for unmapped runlist elements. It is assumed that
- * the caller has mapped any elements that need to be mapped already.
- *
- * Return 0 on success and -errno on error.
- *
- * Locking: The caller must hold @runlist->lock for writing.
- */
-int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
- const s64 new_length)
-{
- runlist_element *rl;
- int old_size;
-
- ntfs_debug("Entering for new_length 0x%llx.", (long long)new_length);
- BUG_ON(!runlist);
- BUG_ON(new_length < 0);
- rl = runlist->rl;
- if (!new_length) {
- ntfs_debug("Freeing runlist.");
- runlist->rl = NULL;
- if (rl)
- ntfs_free(rl);
- return 0;
- }
- if (unlikely(!rl)) {
- /*
- * Create a runlist consisting of a sparse runlist element of
- * length @new_length followed by a terminator runlist element.
- */
- rl = ntfs_malloc_nofs(PAGE_SIZE);
- if (unlikely(!rl)) {
- ntfs_error(vol->sb, "Not enough memory to allocate "
- "runlist element buffer.");
- return -ENOMEM;
- }
- runlist->rl = rl;
- rl[1].length = rl->vcn = 0;
- rl->lcn = LCN_HOLE;
- rl[1].vcn = rl->length = new_length;
- rl[1].lcn = LCN_ENOENT;
- return 0;
- }
- BUG_ON(new_length < rl->vcn);
- /* Find @new_length in the runlist. */
- while (likely(rl->length && new_length >= rl[1].vcn))
- rl++;
- /*
- * If not at the end of the runlist we need to shrink it.
- * If at the end of the runlist we need to expand it.
- */
- if (rl->length) {
- runlist_element *trl;
- bool is_end;
-
- ntfs_debug("Shrinking runlist.");
- /* Determine the runlist size. */
- trl = rl + 1;
- while (likely(trl->length))
- trl++;
- old_size = trl - runlist->rl + 1;
- /* Truncate the run. */
- rl->length = new_length - rl->vcn;
- /*
- * If a run was partially truncated, make the following runlist
- * element a terminator.
- */
- is_end = false;
- if (rl->length) {
- rl++;
- if (!rl->length)
- is_end = true;
- rl->vcn = new_length;
- rl->length = 0;
- }
- rl->lcn = LCN_ENOENT;
- /* Reallocate memory if necessary. */
- if (!is_end) {
- int new_size = rl - runlist->rl + 1;
- rl = ntfs_rl_realloc(runlist->rl, old_size, new_size);
- if (IS_ERR(rl))
- ntfs_warning(vol->sb, "Failed to shrink "
- "runlist buffer. This just "
- "wastes a bit of memory "
- "temporarily so we ignore it "
- "and return success.");
- else
- runlist->rl = rl;
- }
- } else if (likely(/* !rl->length && */ new_length > rl->vcn)) {
- ntfs_debug("Expanding runlist.");
- /*
- * If there is a previous runlist element and it is a sparse
- * one, extend it. Otherwise need to add a new, sparse runlist
- * element.
- */
- if ((rl > runlist->rl) && ((rl - 1)->lcn == LCN_HOLE))
- (rl - 1)->length = new_length - (rl - 1)->vcn;
- else {
- /* Determine the runlist size. */
- old_size = rl - runlist->rl + 1;
- /* Reallocate memory if necessary. */
- rl = ntfs_rl_realloc(runlist->rl, old_size,
- old_size + 1);
- if (IS_ERR(rl)) {
- ntfs_error(vol->sb, "Failed to expand runlist "
- "buffer, aborting.");
- return PTR_ERR(rl);
- }
- runlist->rl = rl;
- /*
- * Set @rl to the same runlist element in the new
- * runlist as before in the old runlist.
- */
- rl += old_size - 1;
- /* Add a new, sparse runlist element. */
- rl->lcn = LCN_HOLE;
- rl->length = new_length - rl->vcn;
- /* Add a new terminator runlist element. */
- rl++;
- rl->length = 0;
- }
- rl->vcn = new_length;
- rl->lcn = LCN_ENOENT;
- } else /* if (unlikely(!rl->length && new_length == rl->vcn)) */ {
- /* Runlist already has same size as requested. */
- rl->lcn = LCN_ENOENT;
- }
- ntfs_debug("Done.");
- return 0;
-}
-
-/**
- * ntfs_rl_punch_nolock - punch a hole into a runlist
- * @vol: ntfs volume (needed for error output)
- * @runlist: runlist to punch a hole into
- * @start: starting VCN of the hole to be created
- * @length: size of the hole to be created in units of clusters
- *
- * Punch a hole into the runlist @runlist starting at VCN @start and of size
- * @length clusters.
- *
- * Return 0 on success and -errno on error, in which case @runlist has not been
- * modified.
- *
- * If @start and/or @start + @length are outside the runlist return error code
- * -ENOENT.
- *
- * If the runlist contains unmapped or error elements between @start and @start
- * + @length return error code -EINVAL.
- *
- * Locking: The caller must hold @runlist->lock for writing.
- */
-int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
- const VCN start, const s64 length)
-{
- const VCN end = start + length;
- s64 delta;
- runlist_element *rl, *rl_end, *rl_real_end, *trl;
- int old_size;
- bool lcn_fixup = false;
-
- ntfs_debug("Entering for start 0x%llx, length 0x%llx.",
- (long long)start, (long long)length);
- BUG_ON(!runlist);
- BUG_ON(start < 0);
- BUG_ON(length < 0);
- BUG_ON(end < 0);
- rl = runlist->rl;
- if (unlikely(!rl)) {
- if (likely(!start && !length))
- return 0;
- return -ENOENT;
- }
- /* Find @start in the runlist. */
- while (likely(rl->length && start >= rl[1].vcn))
- rl++;
- rl_end = rl;
- /* Find @end in the runlist. */
- while (likely(rl_end->length && end >= rl_end[1].vcn)) {
- /* Verify there are no unmapped or error elements. */
- if (unlikely(rl_end->lcn < LCN_HOLE))
- return -EINVAL;
- rl_end++;
- }
- /* Check the last element. */
- if (unlikely(rl_end->length && rl_end->lcn < LCN_HOLE))
- return -EINVAL;
- /* This covers @start being out of bounds, too. */
- if (!rl_end->length && end > rl_end->vcn)
- return -ENOENT;
- if (!length)
- return 0;
- if (!rl->length)
- return -ENOENT;
- rl_real_end = rl_end;
- /* Determine the runlist size. */
- while (likely(rl_real_end->length))
- rl_real_end++;
- old_size = rl_real_end - runlist->rl + 1;
- /* If @start is in a hole simply extend the hole. */
- if (rl->lcn == LCN_HOLE) {
- /*
- * If both @start and @end are in the same sparse run, we are
- * done.
- */
- if (end <= rl[1].vcn) {
- ntfs_debug("Done (requested hole is already sparse).");
- return 0;
- }
-extend_hole:
- /* Extend the hole. */
- rl->length = end - rl->vcn;
- /* If @end is in a hole, merge it with the current one. */
- if (rl_end->lcn == LCN_HOLE) {
- rl_end++;
- rl->length = rl_end->vcn - rl->vcn;
- }
- /* We have done the hole. Now deal with the remaining tail. */
- rl++;
- /* Cut out all runlist elements up to @end. */
- if (rl < rl_end)
- memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
- sizeof(*rl));
- /* Adjust the beginning of the tail if necessary. */
- if (end > rl->vcn) {
- delta = end - rl->vcn;
- rl->vcn = end;
- rl->length -= delta;
- /* Only adjust the lcn if it is real. */
- if (rl->lcn >= 0)
- rl->lcn += delta;
- }
-shrink_allocation:
- /* Reallocate memory if the allocation changed. */
- if (rl < rl_end) {
- rl = ntfs_rl_realloc(runlist->rl, old_size,
- old_size - (rl_end - rl));
- if (IS_ERR(rl))
- ntfs_warning(vol->sb, "Failed to shrink "
- "runlist buffer. This just "
- "wastes a bit of memory "
- "temporarily so we ignore it "
- "and return success.");
- else
- runlist->rl = rl;
- }
- ntfs_debug("Done (extend hole).");
- return 0;
- }
- /*
- * If @start is at the beginning of a run things are easier as there is
- * no need to split the first run.
- */
- if (start == rl->vcn) {
- /*
- * @start is at the beginning of a run.
- *
- * If the previous run is sparse, extend its hole.
- *
- * If @end is not in the same run, switch the run to be sparse
- * and extend the newly created hole.
- *
- * Thus both of these cases reduce the problem to the above
- * case of "@start is in a hole".
- */
- if (rl > runlist->rl && (rl - 1)->lcn == LCN_HOLE) {
- rl--;
- goto extend_hole;
- }
- if (end >= rl[1].vcn) {
- rl->lcn = LCN_HOLE;
- goto extend_hole;
- }
- /*
- * The final case is when @end is in the same run as @start.
- * For this need to split the run into two. One run for the
- * sparse region between the beginning of the old run, i.e.
- * @start, and @end and one for the remaining non-sparse
- * region, i.e. between @end and the end of the old run.
- */
- trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
- if (IS_ERR(trl))
- goto enomem_out;
- old_size++;
- if (runlist->rl != trl) {
- rl = trl + (rl - runlist->rl);
- rl_end = trl + (rl_end - runlist->rl);
- rl_real_end = trl + (rl_real_end - runlist->rl);
- runlist->rl = trl;
- }
-split_end:
- /* Shift all the runs up by one. */
- memmove(rl + 1, rl, (rl_real_end - rl + 1) * sizeof(*rl));
- /* Finally, setup the two split runs. */
- rl->lcn = LCN_HOLE;
- rl->length = length;
- rl++;
- rl->vcn += length;
- /* Only adjust the lcn if it is real. */
- if (rl->lcn >= 0 || lcn_fixup)
- rl->lcn += length;
- rl->length -= length;
- ntfs_debug("Done (split one).");
- return 0;
- }
- /*
- * @start is neither in a hole nor at the beginning of a run.
- *
- * If @end is in a hole, things are easier as simply truncating the run
- * @start is in to end at @start - 1, deleting all runs after that up
- * to @end, and finally extending the beginning of the run @end is in
- * to be @start is all that is needed.
- */
- if (rl_end->lcn == LCN_HOLE) {
- /* Truncate the run containing @start. */
- rl->length = start - rl->vcn;
- rl++;
- /* Cut out all runlist elements up to @end. */
- if (rl < rl_end)
- memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
- sizeof(*rl));
- /* Extend the beginning of the run @end is in to be @start. */
- rl->vcn = start;
- rl->length = rl[1].vcn - start;
- goto shrink_allocation;
- }
- /*
- * If @end is not in a hole there are still two cases to distinguish.
- * Either @end is or is not in the same run as @start.
- *
- * The second case is easier as it can be reduced to an already solved
- * problem by truncating the run @start is in to end at @start - 1.
- * Then, if @end is in the next run need to split the run into a sparse
- * run followed by a non-sparse run (already covered above) and if @end
- * is not in the next run switching it to be sparse, again reduces the
- * problem to the already covered case of "@start is in a hole".
- */
- if (end >= rl[1].vcn) {
- /*
- * If @end is not in the next run, reduce the problem to the
- * case of "@start is in a hole".
- */
- if (rl[1].length && end >= rl[2].vcn) {
- /* Truncate the run containing @start. */
- rl->length = start - rl->vcn;
- rl++;
- rl->vcn = start;
- rl->lcn = LCN_HOLE;
- goto extend_hole;
- }
- trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
- if (IS_ERR(trl))
- goto enomem_out;
- old_size++;
- if (runlist->rl != trl) {
- rl = trl + (rl - runlist->rl);
- rl_end = trl + (rl_end - runlist->rl);
- rl_real_end = trl + (rl_real_end - runlist->rl);
- runlist->rl = trl;
- }
- /* Truncate the run containing @start. */
- rl->length = start - rl->vcn;
- rl++;
- /*
- * @end is in the next run, reduce the problem to the case
- * where "@start is at the beginning of a run and @end is in
- * the same run as @start".
- */
- delta = rl->vcn - start;
- rl->vcn = start;
- if (rl->lcn >= 0) {
- rl->lcn -= delta;
- /* Need this in case the lcn just became negative. */
- lcn_fixup = true;
- }
- rl->length += delta;
- goto split_end;
- }
- /*
- * The first case from above, i.e. @end is in the same run as @start.
- * We need to split the run into three. One run for the non-sparse
- * region between the beginning of the old run and @start, one for the
- * sparse region between @start and @end, and one for the remaining
- * non-sparse region, i.e. between @end and the end of the old run.
- */
- trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 2);
- if (IS_ERR(trl))
- goto enomem_out;
- old_size += 2;
- if (runlist->rl != trl) {
- rl = trl + (rl - runlist->rl);
- rl_end = trl + (rl_end - runlist->rl);
- rl_real_end = trl + (rl_real_end - runlist->rl);
- runlist->rl = trl;
- }
- /* Shift all the runs up by two. */
- memmove(rl + 2, rl, (rl_real_end - rl + 1) * sizeof(*rl));
- /* Finally, setup the three split runs. */
- rl->length = start - rl->vcn;
- rl++;
- rl->vcn = start;
- rl->lcn = LCN_HOLE;
- rl->length = length;
- rl++;
- delta = end - rl->vcn;
- rl->vcn = end;
- rl->lcn += delta;
- rl->length -= delta;
- ntfs_debug("Done (split both).");
- return 0;
-enomem_out:
- ntfs_error(vol->sb, "Not enough memory to extend runlist buffer.");
- return -ENOMEM;
-}
-
-#endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
deleted file mode 100644
index 38de0a375f59..000000000000
--- a/fs/ntfs/runlist.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * runlist.h - Defines for runlist handling in NTFS Linux kernel driver.
- * Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2005 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
- */
-
-#ifndef _LINUX_NTFS_RUNLIST_H
-#define _LINUX_NTFS_RUNLIST_H
-
-#include "types.h"
-#include "layout.h"
-#include "volume.h"
-
-/**
- * runlist_element - in memory vcn to lcn mapping array element
- * @vcn: starting vcn of the current array element
- * @lcn: starting lcn of the current array element
- * @length: length in clusters of the current array element
- *
- * The last vcn (in fact the last vcn + 1) is reached when length == 0.
- *
- * When lcn == -1 this means that the count vcns starting at vcn are not
- * physically allocated (i.e. this is a hole / data is sparse).
- */
-typedef struct { /* In memory vcn to lcn mapping structure element. */
- VCN vcn; /* vcn = Starting virtual cluster number. */
- LCN lcn; /* lcn = Starting logical cluster number. */
- s64 length; /* Run length in clusters. */
-} runlist_element;
-
-/**
- * runlist - in memory vcn to lcn mapping array including a read/write lock
- * @rl: pointer to an array of runlist elements
- * @lock: read/write spinlock for serializing access to @rl
- *
- */
-typedef struct {
- runlist_element *rl;
- struct rw_semaphore lock;
-} runlist;
-
-static inline void ntfs_init_runlist(runlist *rl)
-{
- rl->rl = NULL;
- init_rwsem(&rl->lock);
-}
-
-typedef enum {
- LCN_HOLE = -1, /* Keep this as highest value or die! */
- LCN_RL_NOT_MAPPED = -2,
- LCN_ENOENT = -3,
- LCN_ENOMEM = -4,
- LCN_EIO = -5,
-} LCN_SPECIAL_VALUES;
-
-extern runlist_element *ntfs_runlists_merge(runlist_element *drl,
- runlist_element *srl);
-
-extern runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol,
- const ATTR_RECORD *attr, runlist_element *old_rl);
-
-extern LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn);
-
-#ifdef NTFS_RW
-
-extern runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl,
- const VCN vcn);
-
-extern int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
- const runlist_element *rl, const VCN first_vcn,
- const VCN last_vcn);
-
-extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
- const int dst_len, const runlist_element *rl,
- const VCN first_vcn, const VCN last_vcn, VCN *const stop_vcn);
-
-extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol,
- runlist *const runlist, const s64 new_length);
-
-int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
- const VCN start, const s64 length);
-
-#endif /* NTFS_RW */
-
-#endif /* _LINUX_NTFS_RUNLIST_H */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
deleted file mode 100644
index 56a7d5bd33e4..000000000000
--- a/fs/ntfs/super.c
+++ /dev/null
@@ -1,3202 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
- * Copyright (c) 2001,2002 Richard Russon
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/stddef.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/blkdev.h> /* For bdev_logical_block_size(). */
-#include <linux/backing-dev.h>
-#include <linux/buffer_head.h>
-#include <linux/vfs.h>
-#include <linux/moduleparam.h>
-#include <linux/bitmap.h>
-
-#include "sysctl.h"
-#include "logfile.h"
-#include "quota.h"
-#include "usnjrnl.h"
-#include "dir.h"
-#include "debug.h"
-#include "index.h"
-#include "inode.h"
-#include "aops.h"
-#include "layout.h"
-#include "malloc.h"
-#include "ntfs.h"
-
-/* Number of mounted filesystems which have compression enabled. */
-static unsigned long ntfs_nr_compression_users;
-
-/* A global default upcase table and a corresponding reference count. */
-static ntfschar *default_upcase;
-static unsigned long ntfs_nr_upcase_users;
-
-/* Error constants/strings used in inode.c::ntfs_show_options(). */
-typedef enum {
- /* One of these must be present, default is ON_ERRORS_CONTINUE. */
- ON_ERRORS_PANIC = 0x01,
- ON_ERRORS_REMOUNT_RO = 0x02,
- ON_ERRORS_CONTINUE = 0x04,
- /* Optional, can be combined with any of the above. */
- ON_ERRORS_RECOVER = 0x10,
-} ON_ERRORS_ACTIONS;
-
-const option_t on_errors_arr[] = {
- { ON_ERRORS_PANIC, "panic" },
- { ON_ERRORS_REMOUNT_RO, "remount-ro", },
- { ON_ERRORS_CONTINUE, "continue", },
- { ON_ERRORS_RECOVER, "recover" },
- { 0, NULL }
-};
-
-/**
- * simple_getbool - convert input string to a boolean value
- * @s: input string to convert
- * @setval: where to store the output boolean value
- *
- * Copied from old ntfs driver (which copied from vfat driver).
- *
- * "1", "yes", "true", or an empty string are converted to %true.
- * "0", "no", and "false" are converted to %false.
- *
- * Return: %1 if the string is converted or was empty and *setval contains it;
- * %0 if the string was not valid.
- */
-static int simple_getbool(char *s, bool *setval)
-{
- if (s) {
- if (!strcmp(s, "1") || !strcmp(s, "yes") || !strcmp(s, "true"))
- *setval = true;
- else if (!strcmp(s, "0") || !strcmp(s, "no") ||
- !strcmp(s, "false"))
- *setval = false;
- else
- return 0;
- } else
- *setval = true;
- return 1;
-}
-
-/**
- * parse_options - parse the (re)mount options
- * @vol: ntfs volume
- * @opt: string containing the (re)mount options
- *
- * Parse the recognized options in @opt for the ntfs volume described by @vol.
- */
-static bool parse_options(ntfs_volume *vol, char *opt)
-{
- char *p, *v, *ov;
- static char *utf8 = "utf8";
- int errors = 0, sloppy = 0;
- kuid_t uid = INVALID_UID;
- kgid_t gid = INVALID_GID;
- umode_t fmask = (umode_t)-1, dmask = (umode_t)-1;
- int mft_zone_multiplier = -1, on_errors = -1;
- int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
- struct nls_table *nls_map = NULL, *old_nls;
-
- /* I am lazy... (-8 */
-#define NTFS_GETOPT_WITH_DEFAULT(option, variable, default_value) \
- if (!strcmp(p, option)) { \
- if (!v || !*v) \
- variable = default_value; \
- else { \
- variable = simple_strtoul(ov = v, &v, 0); \
- if (*v) \
- goto needs_val; \
- } \
- }
-#define NTFS_GETOPT(option, variable) \
- if (!strcmp(p, option)) { \
- if (!v || !*v) \
- goto needs_arg; \
- variable = simple_strtoul(ov = v, &v, 0); \
- if (*v) \
- goto needs_val; \
- }
-#define NTFS_GETOPT_UID(option, variable) \
- if (!strcmp(p, option)) { \
- uid_t uid_value; \
- if (!v || !*v) \
- goto needs_arg; \
- uid_value = simple_strtoul(ov = v, &v, 0); \
- if (*v) \
- goto needs_val; \
- variable = make_kuid(current_user_ns(), uid_value); \
- if (!uid_valid(variable)) \
- goto needs_val; \
- }
-#define NTFS_GETOPT_GID(option, variable) \
- if (!strcmp(p, option)) { \
- gid_t gid_value; \
- if (!v || !*v) \
- goto needs_arg; \
- gid_value = simple_strtoul(ov = v, &v, 0); \
- if (*v) \
- goto needs_val; \
- variable = make_kgid(current_user_ns(), gid_value); \
- if (!gid_valid(variable)) \
- goto needs_val; \
- }
-#define NTFS_GETOPT_OCTAL(option, variable) \
- if (!strcmp(p, option)) { \
- if (!v || !*v) \
- goto needs_arg; \
- variable = simple_strtoul(ov = v, &v, 8); \
- if (*v) \
- goto needs_val; \
- }
-#define NTFS_GETOPT_BOOL(option, variable) \
- if (!strcmp(p, option)) { \
- bool val; \
- if (!simple_getbool(v, &val)) \
- goto needs_bool; \
- variable = val; \
- }
-#define NTFS_GETOPT_OPTIONS_ARRAY(option, variable, opt_array) \
- if (!strcmp(p, option)) { \
- int _i; \
- if (!v || !*v) \
- goto needs_arg; \
- ov = v; \
- if (variable == -1) \
- variable = 0; \
- for (_i = 0; opt_array[_i].str && *opt_array[_i].str; _i++) \
- if (!strcmp(opt_array[_i].str, v)) { \
- variable |= opt_array[_i].val; \
- break; \
- } \
- if (!opt_array[_i].str || !*opt_array[_i].str) \
- goto needs_val; \
- }
- if (!opt || !*opt)
- goto no_mount_options;
- ntfs_debug("Entering with mount options string: %s", opt);
- while ((p = strsep(&opt, ","))) {
- if ((v = strchr(p, '=')))
- *v++ = 0;
- NTFS_GETOPT_UID("uid", uid)
- else NTFS_GETOPT_GID("gid", gid)
- else NTFS_GETOPT_OCTAL("umask", fmask = dmask)
- else NTFS_GETOPT_OCTAL("fmask", fmask)
- else NTFS_GETOPT_OCTAL("dmask", dmask)
- else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier)
- else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, true)
- else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files)
- else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive)
- else NTFS_GETOPT_BOOL("disable_sparse", disable_sparse)
- else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors,
- on_errors_arr)
- else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes"))
- ntfs_warning(vol->sb, "Ignoring obsolete option %s.",
- p);
- else if (!strcmp(p, "nls") || !strcmp(p, "iocharset")) {
- if (!strcmp(p, "iocharset"))
- ntfs_warning(vol->sb, "Option iocharset is "
- "deprecated. Please use "
- "option nls=<charsetname> in "
- "the future.");
- if (!v || !*v)
- goto needs_arg;
-use_utf8:
- old_nls = nls_map;
- nls_map = load_nls(v);
- if (!nls_map) {
- if (!old_nls) {
- ntfs_error(vol->sb, "NLS character set "
- "%s not found.", v);
- return false;
- }
- ntfs_error(vol->sb, "NLS character set %s not "
- "found. Using previous one %s.",
- v, old_nls->charset);
- nls_map = old_nls;
- } else /* nls_map */ {
- unload_nls(old_nls);
- }
- } else if (!strcmp(p, "utf8")) {
- bool val = false;
- ntfs_warning(vol->sb, "Option utf8 is no longer "
- "supported, using option nls=utf8. Please "
- "use option nls=utf8 in the future and "
- "make sure utf8 is compiled either as a "
- "module or into the kernel.");
- if (!v || !*v)
- val = true;
- else if (!simple_getbool(v, &val))
- goto needs_bool;
- if (val) {
- v = utf8;
- goto use_utf8;
- }
- } else {
- ntfs_error(vol->sb, "Unrecognized mount option %s.", p);
- if (errors < INT_MAX)
- errors++;
- }
-#undef NTFS_GETOPT_OPTIONS_ARRAY
-#undef NTFS_GETOPT_BOOL
-#undef NTFS_GETOPT
-#undef NTFS_GETOPT_WITH_DEFAULT
- }
-no_mount_options:
- if (errors && !sloppy)
- return false;
- if (sloppy)
- ntfs_warning(vol->sb, "Sloppy option given. Ignoring "
- "unrecognized mount option(s) and continuing.");
- /* Keep this first! */
- if (on_errors != -1) {
- if (!on_errors) {
- ntfs_error(vol->sb, "Invalid errors option argument "
- "or bug in options parser.");
- return false;
- }
- }
- if (nls_map) {
- if (vol->nls_map && vol->nls_map != nls_map) {
- ntfs_error(vol->sb, "Cannot change NLS character set "
- "on remount.");
- return false;
- } /* else (!vol->nls_map) */
- ntfs_debug("Using NLS character set %s.", nls_map->charset);
- vol->nls_map = nls_map;
- } else /* (!nls_map) */ {
- if (!vol->nls_map) {
- vol->nls_map = load_nls_default();
- if (!vol->nls_map) {
- ntfs_error(vol->sb, "Failed to load default "
- "NLS character set.");
- return false;
- }
- ntfs_debug("Using default NLS character set (%s).",
- vol->nls_map->charset);
- }
- }
- if (mft_zone_multiplier != -1) {
- if (vol->mft_zone_multiplier && vol->mft_zone_multiplier !=
- mft_zone_multiplier) {
- ntfs_error(vol->sb, "Cannot change mft_zone_multiplier "
- "on remount.");
- return false;
- }
- if (mft_zone_multiplier < 1 || mft_zone_multiplier > 4) {
- ntfs_error(vol->sb, "Invalid mft_zone_multiplier. "
- "Using default value, i.e. 1.");
- mft_zone_multiplier = 1;
- }
- vol->mft_zone_multiplier = mft_zone_multiplier;
- }
- if (!vol->mft_zone_multiplier)
- vol->mft_zone_multiplier = 1;
- if (on_errors != -1)
- vol->on_errors = on_errors;
- if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER)
- vol->on_errors |= ON_ERRORS_CONTINUE;
- if (uid_valid(uid))
- vol->uid = uid;
- if (gid_valid(gid))
- vol->gid = gid;
- if (fmask != (umode_t)-1)
- vol->fmask = fmask;
- if (dmask != (umode_t)-1)
- vol->dmask = dmask;
- if (show_sys_files != -1) {
- if (show_sys_files)
- NVolSetShowSystemFiles(vol);
- else
- NVolClearShowSystemFiles(vol);
- }
- if (case_sensitive != -1) {
- if (case_sensitive)
- NVolSetCaseSensitive(vol);
- else
- NVolClearCaseSensitive(vol);
- }
- if (disable_sparse != -1) {
- if (disable_sparse)
- NVolClearSparseEnabled(vol);
- else {
- if (!NVolSparseEnabled(vol) &&
- vol->major_ver && vol->major_ver < 3)
- ntfs_warning(vol->sb, "Not enabling sparse "
- "support due to NTFS volume "
- "version %i.%i (need at least "
- "version 3.0).", vol->major_ver,
- vol->minor_ver);
- else
- NVolSetSparseEnabled(vol);
- }
- }
- return true;
-needs_arg:
- ntfs_error(vol->sb, "The %s option requires an argument.", p);
- return false;
-needs_bool:
- ntfs_error(vol->sb, "The %s option requires a boolean argument.", p);
- return false;
-needs_val:
- ntfs_error(vol->sb, "Invalid %s option argument: %s", p, ov);
- return false;
-}
-
-#ifdef NTFS_RW
-
-/**
- * ntfs_write_volume_flags - write new flags to the volume information flags
- * @vol: ntfs volume on which to modify the flags
- * @flags: new flags value for the volume information flags
- *
- * Internal function. You probably want to use ntfs_{set,clear}_volume_flags()
- * instead (see below).
- *
- * Replace the volume information flags on the volume @vol with the value
- * supplied in @flags. Note, this overwrites the volume information flags, so
- * make sure to combine the flags you want to modify with the old flags and use
- * the result when calling ntfs_write_volume_flags().
- *
- * Return 0 on success and -errno on error.
- */
-static int ntfs_write_volume_flags(ntfs_volume *vol, const VOLUME_FLAGS flags)
-{
- ntfs_inode *ni = NTFS_I(vol->vol_ino);
- MFT_RECORD *m;
- VOLUME_INFORMATION *vi;
- ntfs_attr_search_ctx *ctx;
- int err;
-
- ntfs_debug("Entering, old flags = 0x%x, new flags = 0x%x.",
- le16_to_cpu(vol->vol_flags), le16_to_cpu(flags));
- if (vol->vol_flags == flags)
- goto done;
- BUG_ON(!ni);
- m = map_mft_record(ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(ni, m);
- if (!ctx) {
- err = -ENOMEM;
- goto put_unm_err_out;
- }
- err = ntfs_attr_lookup(AT_VOLUME_INFORMATION, NULL, 0, 0, 0, NULL, 0,
- ctx);
- if (err)
- goto put_unm_err_out;
- vi = (VOLUME_INFORMATION*)((u8*)ctx->attr +
- le16_to_cpu(ctx->attr->data.resident.value_offset));
- vol->vol_flags = vi->flags = flags;
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ni);
-done:
- ntfs_debug("Done.");
- return 0;
-put_unm_err_out:
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ni);
-err_out:
- ntfs_error(vol->sb, "Failed with error code %i.", -err);
- return err;
-}
-
-/**
- * ntfs_set_volume_flags - set bits in the volume information flags
- * @vol: ntfs volume on which to modify the flags
- * @flags: flags to set on the volume
- *
- * Set the bits in @flags in the volume information flags on the volume @vol.
- *
- * Return 0 on success and -errno on error.
- */
-static inline int ntfs_set_volume_flags(ntfs_volume *vol, VOLUME_FLAGS flags)
-{
- flags &= VOLUME_FLAGS_MASK;
- return ntfs_write_volume_flags(vol, vol->vol_flags | flags);
-}
-
-/**
- * ntfs_clear_volume_flags - clear bits in the volume information flags
- * @vol: ntfs volume on which to modify the flags
- * @flags: flags to clear on the volume
- *
- * Clear the bits in @flags in the volume information flags on the volume @vol.
- *
- * Return 0 on success and -errno on error.
- */
-static inline int ntfs_clear_volume_flags(ntfs_volume *vol, VOLUME_FLAGS flags)
-{
- flags &= VOLUME_FLAGS_MASK;
- flags = vol->vol_flags & cpu_to_le16(~le16_to_cpu(flags));
- return ntfs_write_volume_flags(vol, flags);
-}
-
-#endif /* NTFS_RW */
-
-/**
- * ntfs_remount - change the mount options of a mounted ntfs filesystem
- * @sb: superblock of mounted ntfs filesystem
- * @flags: remount flags
- * @opt: remount options string
- *
- * Change the mount options of an already mounted ntfs filesystem.
- *
- * NOTE: The VFS sets the @sb->s_flags remount flags to @flags after
- * ntfs_remount() returns successfully (i.e. returns 0). Otherwise,
- * @sb->s_flags are not changed.
- */
-static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
-{
- ntfs_volume *vol = NTFS_SB(sb);
-
- ntfs_debug("Entering with remount options string: %s", opt);
-
- sync_filesystem(sb);
-
-#ifndef NTFS_RW
- /* For read-only compiled driver, enforce read-only flag. */
- *flags |= SB_RDONLY;
-#else /* NTFS_RW */
- /*
- * For the read-write compiled driver, if we are remounting read-write,
- * make sure there are no volume errors and that no unsupported volume
- * flags are set. Also, empty the logfile journal as it would become
- * stale as soon as something is written to the volume and mark the
- * volume dirty so that chkdsk is run if the volume is not umounted
- * cleanly. Finally, mark the quotas out of date so Windows rescans
- * the volume on boot and updates them.
- *
- * When remounting read-only, mark the volume clean if no volume errors
- * have occurred.
- */
- if (sb_rdonly(sb) && !(*flags & SB_RDONLY)) {
- static const char *es = ". Cannot remount read-write.";
-
- /* Remounting read-write. */
- if (NVolErrors(vol)) {
- ntfs_error(sb, "Volume has errors and is read-only%s",
- es);
- return -EROFS;
- }
- if (vol->vol_flags & VOLUME_IS_DIRTY) {
- ntfs_error(sb, "Volume is dirty and read-only%s", es);
- return -EROFS;
- }
- if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
- ntfs_error(sb, "Volume has been modified by chkdsk "
- "and is read-only%s", es);
- return -EROFS;
- }
- if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
- ntfs_error(sb, "Volume has unsupported flags set "
- "(0x%x) and is read-only%s",
- (unsigned)le16_to_cpu(vol->vol_flags),
- es);
- return -EROFS;
- }
- if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
- ntfs_error(sb, "Failed to set dirty bit in volume "
- "information flags%s", es);
- return -EROFS;
- }
-#if 0
- // TODO: Enable this code once we start modifying anything that
- // is different between NTFS 1.2 and 3.x...
- /* Set NT4 compatibility flag on newer NTFS version volumes. */
- if ((vol->major_ver > 1)) {
- if (ntfs_set_volume_flags(vol, VOLUME_MOUNTED_ON_NT4)) {
- ntfs_error(sb, "Failed to set NT4 "
- "compatibility flag%s", es);
- NVolSetErrors(vol);
- return -EROFS;
- }
- }
-#endif
- if (!ntfs_empty_logfile(vol->logfile_ino)) {
- ntfs_error(sb, "Failed to empty journal $LogFile%s",
- es);
- NVolSetErrors(vol);
- return -EROFS;
- }
- if (!ntfs_mark_quotas_out_of_date(vol)) {
- ntfs_error(sb, "Failed to mark quotas out of date%s",
- es);
- NVolSetErrors(vol);
- return -EROFS;
- }
- if (!ntfs_stamp_usnjrnl(vol)) {
- ntfs_error(sb, "Failed to stamp transaction log "
- "($UsnJrnl)%s", es);
- NVolSetErrors(vol);
- return -EROFS;
- }
- } else if (!sb_rdonly(sb) && (*flags & SB_RDONLY)) {
- /* Remounting read-only. */
- if (!NVolErrors(vol)) {
- if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY))
- ntfs_warning(sb, "Failed to clear dirty bit "
- "in volume information "
- "flags. Run chkdsk.");
- }
- }
-#endif /* NTFS_RW */
-
- // TODO: Deal with *flags.
-
- if (!parse_options(vol, opt))
- return -EINVAL;
-
- ntfs_debug("Done.");
- return 0;
-}
-
-/**
- * is_boot_sector_ntfs - check whether a boot sector is a valid NTFS boot sector
- * @sb: Super block of the device to which @b belongs.
- * @b: Boot sector of device @sb to check.
- * @silent: If 'true', all output will be silenced.
- *
- * is_boot_sector_ntfs() checks whether the boot sector @b is a valid NTFS boot
- * sector. Returns 'true' if it is valid and 'false' if not.
- *
- * @sb is only needed for warning/error output, i.e. it can be NULL when silent
- * is 'true'.
- */
-static bool is_boot_sector_ntfs(const struct super_block *sb,
- const NTFS_BOOT_SECTOR *b, const bool silent)
-{
- /*
- * Check that checksum == sum of u32 values from b to the checksum
- * field. If checksum is zero, no checking is done. We will work when
- * the checksum test fails, since some utilities update the boot sector
- * ignoring the checksum which leaves the checksum out-of-date. We
- * report a warning if this is the case.
- */
- if ((void*)b < (void*)&b->checksum && b->checksum && !silent) {
- le32 *u;
- u32 i;
-
- for (i = 0, u = (le32*)b; u < (le32*)(&b->checksum); ++u)
- i += le32_to_cpup(u);
- if (le32_to_cpu(b->checksum) != i)
- ntfs_warning(sb, "Invalid boot sector checksum.");
- }
- /* Check OEMidentifier is "NTFS " */
- if (b->oem_id != magicNTFS)
- goto not_ntfs;
- /* Check bytes per sector value is between 256 and 4096. */
- if (le16_to_cpu(b->bpb.bytes_per_sector) < 0x100 ||
- le16_to_cpu(b->bpb.bytes_per_sector) > 0x1000)
- goto not_ntfs;
- /* Check sectors per cluster value is valid. */
- switch (b->bpb.sectors_per_cluster) {
- case 1: case 2: case 4: case 8: case 16: case 32: case 64: case 128:
- break;
- default:
- goto not_ntfs;
- }
- /* Check the cluster size is not above the maximum (64kiB). */
- if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) *
- b->bpb.sectors_per_cluster > NTFS_MAX_CLUSTER_SIZE)
- goto not_ntfs;
- /* Check reserved/unused fields are really zero. */
- if (le16_to_cpu(b->bpb.reserved_sectors) ||
- le16_to_cpu(b->bpb.root_entries) ||
- le16_to_cpu(b->bpb.sectors) ||
- le16_to_cpu(b->bpb.sectors_per_fat) ||
- le32_to_cpu(b->bpb.large_sectors) || b->bpb.fats)
- goto not_ntfs;
- /* Check clusters per file mft record value is valid. */
- if ((u8)b->clusters_per_mft_record < 0xe1 ||
- (u8)b->clusters_per_mft_record > 0xf7)
- switch (b->clusters_per_mft_record) {
- case 1: case 2: case 4: case 8: case 16: case 32: case 64:
- break;
- default:
- goto not_ntfs;
- }
- /* Check clusters per index block value is valid. */
- if ((u8)b->clusters_per_index_record < 0xe1 ||
- (u8)b->clusters_per_index_record > 0xf7)
- switch (b->clusters_per_index_record) {
- case 1: case 2: case 4: case 8: case 16: case 32: case 64:
- break;
- default:
- goto not_ntfs;
- }
- /*
- * Check for valid end of sector marker. We will work without it, but
- * many BIOSes will refuse to boot from a bootsector if the magic is
- * incorrect, so we emit a warning.
- */
- if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55))
- ntfs_warning(sb, "Invalid end of sector marker.");
- return true;
-not_ntfs:
- return false;
-}
-
-/**
- * read_ntfs_boot_sector - read the NTFS boot sector of a device
- * @sb: super block of device to read the boot sector from
- * @silent: if true, suppress all output
- *
- * Reads the boot sector from the device and validates it. If that fails, tries
- * to read the backup boot sector, first from the end of the device a-la NT4 and
- * later and then from the middle of the device a-la NT3.51 and before.
- *
- * If a valid boot sector is found but it is not the primary boot sector, we
- * repair the primary boot sector silently (unless the device is read-only or
- * the primary boot sector is not accessible).
- *
- * NOTE: To call this function, @sb must have the fields s_dev, the ntfs super
- * block (u.ntfs_sb), nr_blocks and the device flags (s_flags) initialized
- * to their respective values.
- *
- * Return the unlocked buffer head containing the boot sector or NULL on error.
- */
-static struct buffer_head *read_ntfs_boot_sector(struct super_block *sb,
- const int silent)
-{
- const char *read_err_str = "Unable to read %s boot sector.";
- struct buffer_head *bh_primary, *bh_backup;
- sector_t nr_blocks = NTFS_SB(sb)->nr_blocks;
-
- /* Try to read primary boot sector. */
- if ((bh_primary = sb_bread(sb, 0))) {
- if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*)
- bh_primary->b_data, silent))
- return bh_primary;
- if (!silent)
- ntfs_error(sb, "Primary boot sector is invalid.");
- } else if (!silent)
- ntfs_error(sb, read_err_str, "primary");
- if (!(NTFS_SB(sb)->on_errors & ON_ERRORS_RECOVER)) {
- if (bh_primary)
- brelse(bh_primary);
- if (!silent)
- ntfs_error(sb, "Mount option errors=recover not used. "
- "Aborting without trying to recover.");
- return NULL;
- }
- /* Try to read NT4+ backup boot sector. */
- if ((bh_backup = sb_bread(sb, nr_blocks - 1))) {
- if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*)
- bh_backup->b_data, silent))
- goto hotfix_primary_boot_sector;
- brelse(bh_backup);
- } else if (!silent)
- ntfs_error(sb, read_err_str, "backup");
- /* Try to read NT3.51- backup boot sector. */
- if ((bh_backup = sb_bread(sb, nr_blocks >> 1))) {
- if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*)
- bh_backup->b_data, silent))
- goto hotfix_primary_boot_sector;
- if (!silent)
- ntfs_error(sb, "Could not find a valid backup boot "
- "sector.");
- brelse(bh_backup);
- } else if (!silent)
- ntfs_error(sb, read_err_str, "backup");
- /* We failed. Cleanup and return. */
- if (bh_primary)
- brelse(bh_primary);
- return NULL;
-hotfix_primary_boot_sector:
- if (bh_primary) {
- /*
- * If we managed to read sector zero and the volume is not
- * read-only, copy the found, valid backup boot sector to the
- * primary boot sector. Note we only copy the actual boot
- * sector structure, not the actual whole device sector as that
- * may be bigger and would potentially damage the $Boot system
- * file (FIXME: Would be nice to know if the backup boot sector
- * on a large sector device contains the whole boot loader or
- * just the first 512 bytes).
- */
- if (!sb_rdonly(sb)) {
- ntfs_warning(sb, "Hot-fix: Recovering invalid primary "
- "boot sector from backup copy.");
- memcpy(bh_primary->b_data, bh_backup->b_data,
- NTFS_BLOCK_SIZE);
- mark_buffer_dirty(bh_primary);
- sync_dirty_buffer(bh_primary);
- if (buffer_uptodate(bh_primary)) {
- brelse(bh_backup);
- return bh_primary;
- }
- ntfs_error(sb, "Hot-fix: Device write error while "
- "recovering primary boot sector.");
- } else {
- ntfs_warning(sb, "Hot-fix: Recovery of primary boot "
- "sector failed: Read-only mount.");
- }
- brelse(bh_primary);
- }
- ntfs_warning(sb, "Using backup boot sector.");
- return bh_backup;
-}
-
-/**
- * parse_ntfs_boot_sector - parse the boot sector and store the data in @vol
- * @vol: volume structure to initialise with data from boot sector
- * @b: boot sector to parse
- *
- * Parse the ntfs boot sector @b and store all imporant information therein in
- * the ntfs super block @vol. Return 'true' on success and 'false' on error.
- */
-static bool parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
-{
- unsigned int sectors_per_cluster_bits, nr_hidden_sects;
- int clusters_per_mft_record, clusters_per_index_record;
- s64 ll;
-
- vol->sector_size = le16_to_cpu(b->bpb.bytes_per_sector);
- vol->sector_size_bits = ffs(vol->sector_size) - 1;
- ntfs_debug("vol->sector_size = %i (0x%x)", vol->sector_size,
- vol->sector_size);
- ntfs_debug("vol->sector_size_bits = %i (0x%x)", vol->sector_size_bits,
- vol->sector_size_bits);
- if (vol->sector_size < vol->sb->s_blocksize) {
- ntfs_error(vol->sb, "Sector size (%i) is smaller than the "
- "device block size (%lu). This is not "
- "supported. Sorry.", vol->sector_size,
- vol->sb->s_blocksize);
- return false;
- }
- ntfs_debug("sectors_per_cluster = 0x%x", b->bpb.sectors_per_cluster);
- sectors_per_cluster_bits = ffs(b->bpb.sectors_per_cluster) - 1;
- ntfs_debug("sectors_per_cluster_bits = 0x%x",
- sectors_per_cluster_bits);
- nr_hidden_sects = le32_to_cpu(b->bpb.hidden_sectors);
- ntfs_debug("number of hidden sectors = 0x%x", nr_hidden_sects);
- vol->cluster_size = vol->sector_size << sectors_per_cluster_bits;
- vol->cluster_size_mask = vol->cluster_size - 1;
- vol->cluster_size_bits = ffs(vol->cluster_size) - 1;
- ntfs_debug("vol->cluster_size = %i (0x%x)", vol->cluster_size,
- vol->cluster_size);
- ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask);
- ntfs_debug("vol->cluster_size_bits = %i", vol->cluster_size_bits);
- if (vol->cluster_size < vol->sector_size) {
- ntfs_error(vol->sb, "Cluster size (%i) is smaller than the "
- "sector size (%i). This is not supported. "
- "Sorry.", vol->cluster_size, vol->sector_size);
- return false;
- }
- clusters_per_mft_record = b->clusters_per_mft_record;
- ntfs_debug("clusters_per_mft_record = %i (0x%x)",
- clusters_per_mft_record, clusters_per_mft_record);
- if (clusters_per_mft_record > 0)
- vol->mft_record_size = vol->cluster_size <<
- (ffs(clusters_per_mft_record) - 1);
- else
- /*
- * When mft_record_size < cluster_size, clusters_per_mft_record
- * = -log2(mft_record_size) bytes. mft_record_size normaly is
- * 1024 bytes, which is encoded as 0xF6 (-10 in decimal).
- */
- vol->mft_record_size = 1 << -clusters_per_mft_record;
- vol->mft_record_size_mask = vol->mft_record_size - 1;
- vol->mft_record_size_bits = ffs(vol->mft_record_size) - 1;
- ntfs_debug("vol->mft_record_size = %i (0x%x)", vol->mft_record_size,
- vol->mft_record_size);
- ntfs_debug("vol->mft_record_size_mask = 0x%x",
- vol->mft_record_size_mask);
- ntfs_debug("vol->mft_record_size_bits = %i (0x%x)",
- vol->mft_record_size_bits, vol->mft_record_size_bits);
- /*
- * We cannot support mft record sizes above the PAGE_SIZE since
- * we store $MFT/$DATA, the table of mft records in the page cache.
- */
- if (vol->mft_record_size > PAGE_SIZE) {
- ntfs_error(vol->sb, "Mft record size (%i) exceeds the "
- "PAGE_SIZE on your system (%lu). "
- "This is not supported. Sorry.",
- vol->mft_record_size, PAGE_SIZE);
- return false;
- }
- /* We cannot support mft record sizes below the sector size. */
- if (vol->mft_record_size < vol->sector_size) {
- ntfs_error(vol->sb, "Mft record size (%i) is smaller than the "
- "sector size (%i). This is not supported. "
- "Sorry.", vol->mft_record_size,
- vol->sector_size);
- return false;
- }
- clusters_per_index_record = b->clusters_per_index_record;
- ntfs_debug("clusters_per_index_record = %i (0x%x)",
- clusters_per_index_record, clusters_per_index_record);
- if (clusters_per_index_record > 0)
- vol->index_record_size = vol->cluster_size <<
- (ffs(clusters_per_index_record) - 1);
- else
- /*
- * When index_record_size < cluster_size,
- * clusters_per_index_record = -log2(index_record_size) bytes.
- * index_record_size normaly equals 4096 bytes, which is
- * encoded as 0xF4 (-12 in decimal).
- */
- vol->index_record_size = 1 << -clusters_per_index_record;
- vol->index_record_size_mask = vol->index_record_size - 1;
- vol->index_record_size_bits = ffs(vol->index_record_size) - 1;
- ntfs_debug("vol->index_record_size = %i (0x%x)",
- vol->index_record_size, vol->index_record_size);
- ntfs_debug("vol->index_record_size_mask = 0x%x",
- vol->index_record_size_mask);
- ntfs_debug("vol->index_record_size_bits = %i (0x%x)",
- vol->index_record_size_bits,
- vol->index_record_size_bits);
- /* We cannot support index record sizes below the sector size. */
- if (vol->index_record_size < vol->sector_size) {
- ntfs_error(vol->sb, "Index record size (%i) is smaller than "
- "the sector size (%i). This is not "
- "supported. Sorry.", vol->index_record_size,
- vol->sector_size);
- return false;
- }
- /*
- * Get the size of the volume in clusters and check for 64-bit-ness.
- * Windows currently only uses 32 bits to save the clusters so we do
- * the same as it is much faster on 32-bit CPUs.
- */
- ll = sle64_to_cpu(b->number_of_sectors) >> sectors_per_cluster_bits;
- if ((u64)ll >= 1ULL << 32) {
- ntfs_error(vol->sb, "Cannot handle 64-bit clusters. Sorry.");
- return false;
- }
- vol->nr_clusters = ll;
- ntfs_debug("vol->nr_clusters = 0x%llx", (long long)vol->nr_clusters);
- /*
- * On an architecture where unsigned long is 32-bits, we restrict the
- * volume size to 2TiB (2^41). On a 64-bit architecture, the compiler
- * will hopefully optimize the whole check away.
- */
- if (sizeof(unsigned long) < 8) {
- if ((ll << vol->cluster_size_bits) >= (1ULL << 41)) {
- ntfs_error(vol->sb, "Volume size (%lluTiB) is too "
- "large for this architecture. "
- "Maximum supported is 2TiB. Sorry.",
- (unsigned long long)ll >> (40 -
- vol->cluster_size_bits));
- return false;
- }
- }
- ll = sle64_to_cpu(b->mft_lcn);
- if (ll >= vol->nr_clusters) {
- ntfs_error(vol->sb, "MFT LCN (%lli, 0x%llx) is beyond end of "
- "volume. Weird.", (unsigned long long)ll,
- (unsigned long long)ll);
- return false;
- }
- vol->mft_lcn = ll;
- ntfs_debug("vol->mft_lcn = 0x%llx", (long long)vol->mft_lcn);
- ll = sle64_to_cpu(b->mftmirr_lcn);
- if (ll >= vol->nr_clusters) {
- ntfs_error(vol->sb, "MFTMirr LCN (%lli, 0x%llx) is beyond end "
- "of volume. Weird.", (unsigned long long)ll,
- (unsigned long long)ll);
- return false;
- }
- vol->mftmirr_lcn = ll;
- ntfs_debug("vol->mftmirr_lcn = 0x%llx", (long long)vol->mftmirr_lcn);
-#ifdef NTFS_RW
- /*
- * Work out the size of the mft mirror in number of mft records. If the
- * cluster size is less than or equal to the size taken by four mft
- * records, the mft mirror stores the first four mft records. If the
- * cluster size is bigger than the size taken by four mft records, the
- * mft mirror contains as many mft records as will fit into one
- * cluster.
- */
- if (vol->cluster_size <= (4 << vol->mft_record_size_bits))
- vol->mftmirr_size = 4;
- else
- vol->mftmirr_size = vol->cluster_size >>
- vol->mft_record_size_bits;
- ntfs_debug("vol->mftmirr_size = %i", vol->mftmirr_size);
-#endif /* NTFS_RW */
- vol->serial_no = le64_to_cpu(b->volume_serial_number);
- ntfs_debug("vol->serial_no = 0x%llx",
- (unsigned long long)vol->serial_no);
- return true;
-}
-
-/**
- * ntfs_setup_allocators - initialize the cluster and mft allocators
- * @vol: volume structure for which to setup the allocators
- *
- * Setup the cluster (lcn) and mft allocators to the starting values.
- */
-static void ntfs_setup_allocators(ntfs_volume *vol)
-{
-#ifdef NTFS_RW
- LCN mft_zone_size, mft_lcn;
-#endif /* NTFS_RW */
-
- ntfs_debug("vol->mft_zone_multiplier = 0x%x",
- vol->mft_zone_multiplier);
-#ifdef NTFS_RW
- /* Determine the size of the MFT zone. */
- mft_zone_size = vol->nr_clusters;
- switch (vol->mft_zone_multiplier) { /* % of volume size in clusters */
- case 4:
- mft_zone_size >>= 1; /* 50% */
- break;
- case 3:
- mft_zone_size = (mft_zone_size +
- (mft_zone_size >> 1)) >> 2; /* 37.5% */
- break;
- case 2:
- mft_zone_size >>= 2; /* 25% */
- break;
- /* case 1: */
- default:
- mft_zone_size >>= 3; /* 12.5% */
- break;
- }
- /* Setup the mft zone. */
- vol->mft_zone_start = vol->mft_zone_pos = vol->mft_lcn;
- ntfs_debug("vol->mft_zone_pos = 0x%llx",
- (unsigned long long)vol->mft_zone_pos);
- /*
- * Calculate the mft_lcn for an unmodified NTFS volume (see mkntfs
- * source) and if the actual mft_lcn is in the expected place or even
- * further to the front of the volume, extend the mft_zone to cover the
- * beginning of the volume as well. This is in order to protect the
- * area reserved for the mft bitmap as well within the mft_zone itself.
- * On non-standard volumes we do not protect it as the overhead would
- * be higher than the speed increase we would get by doing it.
- */
- mft_lcn = (8192 + 2 * vol->cluster_size - 1) / vol->cluster_size;
- if (mft_lcn * vol->cluster_size < 16 * 1024)
- mft_lcn = (16 * 1024 + vol->cluster_size - 1) /
- vol->cluster_size;
- if (vol->mft_zone_start <= mft_lcn)
- vol->mft_zone_start = 0;
- ntfs_debug("vol->mft_zone_start = 0x%llx",
- (unsigned long long)vol->mft_zone_start);
- /*
- * Need to cap the mft zone on non-standard volumes so that it does
- * not point outside the boundaries of the volume. We do this by
- * halving the zone size until we are inside the volume.
- */
- vol->mft_zone_end = vol->mft_lcn + mft_zone_size;
- while (vol->mft_zone_end >= vol->nr_clusters) {
- mft_zone_size >>= 1;
- vol->mft_zone_end = vol->mft_lcn + mft_zone_size;
- }
- ntfs_debug("vol->mft_zone_end = 0x%llx",
- (unsigned long long)vol->mft_zone_end);
- /*
- * Set the current position within each data zone to the start of the
- * respective zone.
- */
- vol->data1_zone_pos = vol->mft_zone_end;
- ntfs_debug("vol->data1_zone_pos = 0x%llx",
- (unsigned long long)vol->data1_zone_pos);
- vol->data2_zone_pos = 0;
- ntfs_debug("vol->data2_zone_pos = 0x%llx",
- (unsigned long long)vol->data2_zone_pos);
-
- /* Set the mft data allocation position to mft record 24. */
- vol->mft_data_pos = 24;
- ntfs_debug("vol->mft_data_pos = 0x%llx",
- (unsigned long long)vol->mft_data_pos);
-#endif /* NTFS_RW */
-}
-
-#ifdef NTFS_RW
-
-/**
- * load_and_init_mft_mirror - load and setup the mft mirror inode for a volume
- * @vol: ntfs super block describing device whose mft mirror to load
- *
- * Return 'true' on success or 'false' on error.
- */
-static bool load_and_init_mft_mirror(ntfs_volume *vol)
-{
- struct inode *tmp_ino;
- ntfs_inode *tmp_ni;
-
- ntfs_debug("Entering.");
- /* Get mft mirror inode. */
- tmp_ino = ntfs_iget(vol->sb, FILE_MFTMirr);
- if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) {
- if (!IS_ERR(tmp_ino))
- iput(tmp_ino);
- /* Caller will display error message. */
- return false;
- }
- /*
- * Re-initialize some specifics about $MFTMirr's inode as
- * ntfs_read_inode() will have set up the default ones.
- */
- /* Set uid and gid to root. */
- tmp_ino->i_uid = GLOBAL_ROOT_UID;
- tmp_ino->i_gid = GLOBAL_ROOT_GID;
- /* Regular file. No access for anyone. */
- tmp_ino->i_mode = S_IFREG;
- /* No VFS initiated operations allowed for $MFTMirr. */
- tmp_ino->i_op = &ntfs_empty_inode_ops;
- tmp_ino->i_fop = &ntfs_empty_file_ops;
- /* Put in our special address space operations. */
- tmp_ino->i_mapping->a_ops = &ntfs_mst_aops;
- tmp_ni = NTFS_I(tmp_ino);
- /* The $MFTMirr, like the $MFT is multi sector transfer protected. */
- NInoSetMstProtected(tmp_ni);
- NInoSetSparseDisabled(tmp_ni);
- /*
- * Set up our little cheat allowing us to reuse the async read io
- * completion handler for directories.
- */
- tmp_ni->itype.index.block_size = vol->mft_record_size;
- tmp_ni->itype.index.block_size_bits = vol->mft_record_size_bits;
- vol->mftmirr_ino = tmp_ino;
- ntfs_debug("Done.");
- return true;
-}
-
-/**
- * check_mft_mirror - compare contents of the mft mirror with the mft
- * @vol: ntfs super block describing device whose mft mirror to check
- *
- * Return 'true' on success or 'false' on error.
- *
- * Note, this function also results in the mft mirror runlist being completely
- * mapped into memory. The mft mirror write code requires this and will BUG()
- * should it find an unmapped runlist element.
- */
-static bool check_mft_mirror(ntfs_volume *vol)
-{
- struct super_block *sb = vol->sb;
- ntfs_inode *mirr_ni;
- struct page *mft_page, *mirr_page;
- u8 *kmft, *kmirr;
- runlist_element *rl, rl2[2];
- pgoff_t index;
- int mrecs_per_page, i;
-
- ntfs_debug("Entering.");
- /* Compare contents of $MFT and $MFTMirr. */
- mrecs_per_page = PAGE_SIZE / vol->mft_record_size;
- BUG_ON(!mrecs_per_page);
- BUG_ON(!vol->mftmirr_size);
- mft_page = mirr_page = NULL;
- kmft = kmirr = NULL;
- index = i = 0;
- do {
- u32 bytes;
-
- /* Switch pages if necessary. */
- if (!(i % mrecs_per_page)) {
- if (index) {
- ntfs_unmap_page(mft_page);
- ntfs_unmap_page(mirr_page);
- }
- /* Get the $MFT page. */
- mft_page = ntfs_map_page(vol->mft_ino->i_mapping,
- index);
- if (IS_ERR(mft_page)) {
- ntfs_error(sb, "Failed to read $MFT.");
- return false;
- }
- kmft = page_address(mft_page);
- /* Get the $MFTMirr page. */
- mirr_page = ntfs_map_page(vol->mftmirr_ino->i_mapping,
- index);
- if (IS_ERR(mirr_page)) {
- ntfs_error(sb, "Failed to read $MFTMirr.");
- goto mft_unmap_out;
- }
- kmirr = page_address(mirr_page);
- ++index;
- }
- /* Do not check the record if it is not in use. */
- if (((MFT_RECORD*)kmft)->flags & MFT_RECORD_IN_USE) {
- /* Make sure the record is ok. */
- if (ntfs_is_baad_recordp((le32*)kmft)) {
- ntfs_error(sb, "Incomplete multi sector "
- "transfer detected in mft "
- "record %i.", i);
-mm_unmap_out:
- ntfs_unmap_page(mirr_page);
-mft_unmap_out:
- ntfs_unmap_page(mft_page);
- return false;
- }
- }
- /* Do not check the mirror record if it is not in use. */
- if (((MFT_RECORD*)kmirr)->flags & MFT_RECORD_IN_USE) {
- if (ntfs_is_baad_recordp((le32*)kmirr)) {
- ntfs_error(sb, "Incomplete multi sector "
- "transfer detected in mft "
- "mirror record %i.", i);
- goto mm_unmap_out;
- }
- }
- /* Get the amount of data in the current record. */
- bytes = le32_to_cpu(((MFT_RECORD*)kmft)->bytes_in_use);
- if (bytes < sizeof(MFT_RECORD_OLD) ||
- bytes > vol->mft_record_size ||
- ntfs_is_baad_recordp((le32*)kmft)) {
- bytes = le32_to_cpu(((MFT_RECORD*)kmirr)->bytes_in_use);
- if (bytes < sizeof(MFT_RECORD_OLD) ||
- bytes > vol->mft_record_size ||
- ntfs_is_baad_recordp((le32*)kmirr))
- bytes = vol->mft_record_size;
- }
- /* Compare the two records. */
- if (memcmp(kmft, kmirr, bytes)) {
- ntfs_error(sb, "$MFT and $MFTMirr (record %i) do not "
- "match. Run ntfsfix or chkdsk.", i);
- goto mm_unmap_out;
- }
- kmft += vol->mft_record_size;
- kmirr += vol->mft_record_size;
- } while (++i < vol->mftmirr_size);
- /* Release the last pages. */
- ntfs_unmap_page(mft_page);
- ntfs_unmap_page(mirr_page);
-
- /* Construct the mft mirror runlist by hand. */
- rl2[0].vcn = 0;
- rl2[0].lcn = vol->mftmirr_lcn;
- rl2[0].length = (vol->mftmirr_size * vol->mft_record_size +
- vol->cluster_size - 1) / vol->cluster_size;
- rl2[1].vcn = rl2[0].length;
- rl2[1].lcn = LCN_ENOENT;
- rl2[1].length = 0;
- /*
- * Because we have just read all of the mft mirror, we know we have
- * mapped the full runlist for it.
- */
- mirr_ni = NTFS_I(vol->mftmirr_ino);
- down_read(&mirr_ni->runlist.lock);
- rl = mirr_ni->runlist.rl;
- /* Compare the two runlists. They must be identical. */
- i = 0;
- do {
- if (rl2[i].vcn != rl[i].vcn || rl2[i].lcn != rl[i].lcn ||
- rl2[i].length != rl[i].length) {
- ntfs_error(sb, "$MFTMirr location mismatch. "
- "Run chkdsk.");
- up_read(&mirr_ni->runlist.lock);
- return false;
- }
- } while (rl2[i++].length);
- up_read(&mirr_ni->runlist.lock);
- ntfs_debug("Done.");
- return true;
-}
-
-/**
- * load_and_check_logfile - load and check the logfile inode for a volume
- * @vol: ntfs super block describing device whose logfile to load
- *
- * Return 'true' on success or 'false' on error.
- */
-static bool load_and_check_logfile(ntfs_volume *vol,
- RESTART_PAGE_HEADER **rp)
-{
- struct inode *tmp_ino;
-
- ntfs_debug("Entering.");
- tmp_ino = ntfs_iget(vol->sb, FILE_LogFile);
- if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) {
- if (!IS_ERR(tmp_ino))
- iput(tmp_ino);
- /* Caller will display error message. */
- return false;
- }
- if (!ntfs_check_logfile(tmp_ino, rp)) {
- iput(tmp_ino);
- /* ntfs_check_logfile() will have displayed error output. */
- return false;
- }
- NInoSetSparseDisabled(NTFS_I(tmp_ino));
- vol->logfile_ino = tmp_ino;
- ntfs_debug("Done.");
- return true;
-}
-
-#define NTFS_HIBERFIL_HEADER_SIZE 4096
-
-/**
- * check_windows_hibernation_status - check if Windows is suspended on a volume
- * @vol: ntfs super block of device to check
- *
- * Check if Windows is hibernated on the ntfs volume @vol. This is done by
- * looking for the file hiberfil.sys in the root directory of the volume. If
- * the file is not present Windows is definitely not suspended.
- *
- * If hiberfil.sys exists and is less than 4kiB in size it means Windows is
- * definitely suspended (this volume is not the system volume). Caveat: on a
- * system with many volumes it is possible that the < 4kiB check is bogus but
- * for now this should do fine.
- *
- * If hiberfil.sys exists and is larger than 4kiB in size, we need to read the
- * hiberfil header (which is the first 4kiB). If this begins with "hibr",
- * Windows is definitely suspended. If it is completely full of zeroes,
- * Windows is definitely not hibernated. Any other case is treated as if
- * Windows is suspended. This caters for the above mentioned caveat of a
- * system with many volumes where no "hibr" magic would be present and there is
- * no zero header.
- *
- * Return 0 if Windows is not hibernated on the volume, >0 if Windows is
- * hibernated on the volume, and -errno on error.
- */
-static int check_windows_hibernation_status(ntfs_volume *vol)
-{
- MFT_REF mref;
- struct inode *vi;
- struct page *page;
- u32 *kaddr, *kend;
- ntfs_name *name = NULL;
- int ret = 1;
- static const ntfschar hiberfil[13] = { cpu_to_le16('h'),
- cpu_to_le16('i'), cpu_to_le16('b'),
- cpu_to_le16('e'), cpu_to_le16('r'),
- cpu_to_le16('f'), cpu_to_le16('i'),
- cpu_to_le16('l'), cpu_to_le16('.'),
- cpu_to_le16('s'), cpu_to_le16('y'),
- cpu_to_le16('s'), 0 };
-
- ntfs_debug("Entering.");
- /*
- * Find the inode number for the hibernation file by looking up the
- * filename hiberfil.sys in the root directory.
- */
- inode_lock(vol->root_ino);
- mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
- &name);
- inode_unlock(vol->root_ino);
- if (IS_ERR_MREF(mref)) {
- ret = MREF_ERR(mref);
- /* If the file does not exist, Windows is not hibernated. */
- if (ret == -ENOENT) {
- ntfs_debug("hiberfil.sys not present. Windows is not "
- "hibernated on the volume.");
- return 0;
- }
- /* A real error occurred. */
- ntfs_error(vol->sb, "Failed to find inode number for "
- "hiberfil.sys.");
- return ret;
- }
- /* We do not care for the type of match that was found. */
- kfree(name);
- /* Get the inode. */
- vi = ntfs_iget(vol->sb, MREF(mref));
- if (IS_ERR(vi) || is_bad_inode(vi)) {
- if (!IS_ERR(vi))
- iput(vi);
- ntfs_error(vol->sb, "Failed to load hiberfil.sys.");
- return IS_ERR(vi) ? PTR_ERR(vi) : -EIO;
- }
- if (unlikely(i_size_read(vi) < NTFS_HIBERFIL_HEADER_SIZE)) {
- ntfs_debug("hiberfil.sys is smaller than 4kiB (0x%llx). "
- "Windows is hibernated on the volume. This "
- "is not the system volume.", i_size_read(vi));
- goto iput_out;
- }
- page = ntfs_map_page(vi->i_mapping, 0);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Failed to read from hiberfil.sys.");
- ret = PTR_ERR(page);
- goto iput_out;
- }
- kaddr = (u32*)page_address(page);
- if (*(le32*)kaddr == cpu_to_le32(0x72626968)/*'hibr'*/) {
- ntfs_debug("Magic \"hibr\" found in hiberfil.sys. Windows is "
- "hibernated on the volume. This is the "
- "system volume.");
- goto unm_iput_out;
- }
- kend = kaddr + NTFS_HIBERFIL_HEADER_SIZE/sizeof(*kaddr);
- do {
- if (unlikely(*kaddr)) {
- ntfs_debug("hiberfil.sys is larger than 4kiB "
- "(0x%llx), does not contain the "
- "\"hibr\" magic, and does not have a "
- "zero header. Windows is hibernated "
- "on the volume. This is not the "
- "system volume.", i_size_read(vi));
- goto unm_iput_out;
- }
- } while (++kaddr < kend);
- ntfs_debug("hiberfil.sys contains a zero header. Windows is not "
- "hibernated on the volume. This is the system "
- "volume.");
- ret = 0;
-unm_iput_out:
- ntfs_unmap_page(page);
-iput_out:
- iput(vi);
- return ret;
-}
-
-/**
- * load_and_init_quota - load and setup the quota file for a volume if present
- * @vol: ntfs super block describing device whose quota file to load
- *
- * Return 'true' on success or 'false' on error. If $Quota is not present, we
- * leave vol->quota_ino as NULL and return success.
- */
-static bool load_and_init_quota(ntfs_volume *vol)
-{
- MFT_REF mref;
- struct inode *tmp_ino;
- ntfs_name *name = NULL;
- static const ntfschar Quota[7] = { cpu_to_le16('$'),
- cpu_to_le16('Q'), cpu_to_le16('u'),
- cpu_to_le16('o'), cpu_to_le16('t'),
- cpu_to_le16('a'), 0 };
- static ntfschar Q[3] = { cpu_to_le16('$'),
- cpu_to_le16('Q'), 0 };
-
- ntfs_debug("Entering.");
- /*
- * Find the inode number for the quota file by looking up the filename
- * $Quota in the extended system files directory $Extend.
- */
- inode_lock(vol->extend_ino);
- mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
- &name);
- inode_unlock(vol->extend_ino);
- if (IS_ERR_MREF(mref)) {
- /*
- * If the file does not exist, quotas are disabled and have
- * never been enabled on this volume, just return success.
- */
- if (MREF_ERR(mref) == -ENOENT) {
- ntfs_debug("$Quota not present. Volume does not have "
- "quotas enabled.");
- /*
- * No need to try to set quotas out of date if they are
- * not enabled.
- */
- NVolSetQuotaOutOfDate(vol);
- return true;
- }
- /* A real error occurred. */
- ntfs_error(vol->sb, "Failed to find inode number for $Quota.");
- return false;
- }
- /* We do not care for the type of match that was found. */
- kfree(name);
- /* Get the inode. */
- tmp_ino = ntfs_iget(vol->sb, MREF(mref));
- if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) {
- if (!IS_ERR(tmp_ino))
- iput(tmp_ino);
- ntfs_error(vol->sb, "Failed to load $Quota.");
- return false;
- }
- vol->quota_ino = tmp_ino;
- /* Get the $Q index allocation attribute. */
- tmp_ino = ntfs_index_iget(vol->quota_ino, Q, 2);
- if (IS_ERR(tmp_ino)) {
- ntfs_error(vol->sb, "Failed to load $Quota/$Q index.");
- return false;
- }
- vol->quota_q_ino = tmp_ino;
- ntfs_debug("Done.");
- return true;
-}
-
-/**
- * load_and_init_usnjrnl - load and setup the transaction log if present
- * @vol: ntfs super block describing device whose usnjrnl file to load
- *
- * Return 'true' on success or 'false' on error.
- *
- * If $UsnJrnl is not present or in the process of being disabled, we set
- * NVolUsnJrnlStamped() and return success.
- *
- * If the $UsnJrnl $DATA/$J attribute has a size equal to the lowest valid usn,
- * i.e. transaction logging has only just been enabled or the journal has been
- * stamped and nothing has been logged since, we also set NVolUsnJrnlStamped()
- * and return success.
- */
-static bool load_and_init_usnjrnl(ntfs_volume *vol)
-{
- MFT_REF mref;
- struct inode *tmp_ino;
- ntfs_inode *tmp_ni;
- struct page *page;
- ntfs_name *name = NULL;
- USN_HEADER *uh;
- static const ntfschar UsnJrnl[9] = { cpu_to_le16('$'),
- cpu_to_le16('U'), cpu_to_le16('s'),
- cpu_to_le16('n'), cpu_to_le16('J'),
- cpu_to_le16('r'), cpu_to_le16('n'),
- cpu_to_le16('l'), 0 };
- static ntfschar Max[5] = { cpu_to_le16('$'),
- cpu_to_le16('M'), cpu_to_le16('a'),
- cpu_to_le16('x'), 0 };
- static ntfschar J[3] = { cpu_to_le16('$'),
- cpu_to_le16('J'), 0 };
-
- ntfs_debug("Entering.");
- /*
- * Find the inode number for the transaction log file by looking up the
- * filename $UsnJrnl in the extended system files directory $Extend.
- */
- inode_lock(vol->extend_ino);
- mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
- &name);
- inode_unlock(vol->extend_ino);
- if (IS_ERR_MREF(mref)) {
- /*
- * If the file does not exist, transaction logging is disabled,
- * just return success.
- */
- if (MREF_ERR(mref) == -ENOENT) {
- ntfs_debug("$UsnJrnl not present. Volume does not "
- "have transaction logging enabled.");
-not_enabled:
- /*
- * No need to try to stamp the transaction log if
- * transaction logging is not enabled.
- */
- NVolSetUsnJrnlStamped(vol);
- return true;
- }
- /* A real error occurred. */
- ntfs_error(vol->sb, "Failed to find inode number for "
- "$UsnJrnl.");
- return false;
- }
- /* We do not care for the type of match that was found. */
- kfree(name);
- /* Get the inode. */
- tmp_ino = ntfs_iget(vol->sb, MREF(mref));
- if (IS_ERR(tmp_ino) || unlikely(is_bad_inode(tmp_ino))) {
- if (!IS_ERR(tmp_ino))
- iput(tmp_ino);
- ntfs_error(vol->sb, "Failed to load $UsnJrnl.");
- return false;
- }
- vol->usnjrnl_ino = tmp_ino;
- /*
- * If the transaction log is in the process of being deleted, we can
- * ignore it.
- */
- if (unlikely(vol->vol_flags & VOLUME_DELETE_USN_UNDERWAY)) {
- ntfs_debug("$UsnJrnl in the process of being disabled. "
- "Volume does not have transaction logging "
- "enabled.");
- goto not_enabled;
- }
- /* Get the $DATA/$Max attribute. */
- tmp_ino = ntfs_attr_iget(vol->usnjrnl_ino, AT_DATA, Max, 4);
- if (IS_ERR(tmp_ino)) {
- ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$Max "
- "attribute.");
- return false;
- }
- vol->usnjrnl_max_ino = tmp_ino;
- if (unlikely(i_size_read(tmp_ino) < sizeof(USN_HEADER))) {
- ntfs_error(vol->sb, "Found corrupt $UsnJrnl/$DATA/$Max "
- "attribute (size is 0x%llx but should be at "
- "least 0x%zx bytes).", i_size_read(tmp_ino),
- sizeof(USN_HEADER));
- return false;
- }
- /* Get the $DATA/$J attribute. */
- tmp_ino = ntfs_attr_iget(vol->usnjrnl_ino, AT_DATA, J, 2);
- if (IS_ERR(tmp_ino)) {
- ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$J "
- "attribute.");
- return false;
- }
- vol->usnjrnl_j_ino = tmp_ino;
- /* Verify $J is non-resident and sparse. */
- tmp_ni = NTFS_I(vol->usnjrnl_j_ino);
- if (unlikely(!NInoNonResident(tmp_ni) || !NInoSparse(tmp_ni))) {
- ntfs_error(vol->sb, "$UsnJrnl/$DATA/$J attribute is resident "
- "and/or not sparse.");
- return false;
- }
- /* Read the USN_HEADER from $DATA/$Max. */
- page = ntfs_map_page(vol->usnjrnl_max_ino->i_mapping, 0);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Failed to read from $UsnJrnl/$DATA/$Max "
- "attribute.");
- return false;
- }
- uh = (USN_HEADER*)page_address(page);
- /* Sanity check the $Max. */
- if (unlikely(sle64_to_cpu(uh->allocation_delta) >
- sle64_to_cpu(uh->maximum_size))) {
- ntfs_error(vol->sb, "Allocation delta (0x%llx) exceeds "
- "maximum size (0x%llx). $UsnJrnl is corrupt.",
- (long long)sle64_to_cpu(uh->allocation_delta),
- (long long)sle64_to_cpu(uh->maximum_size));
- ntfs_unmap_page(page);
- return false;
- }
- /*
- * If the transaction log has been stamped and nothing has been written
- * to it since, we do not need to stamp it.
- */
- if (unlikely(sle64_to_cpu(uh->lowest_valid_usn) >=
- i_size_read(vol->usnjrnl_j_ino))) {
- if (likely(sle64_to_cpu(uh->lowest_valid_usn) ==
- i_size_read(vol->usnjrnl_j_ino))) {
- ntfs_unmap_page(page);
- ntfs_debug("$UsnJrnl is enabled but nothing has been "
- "logged since it was last stamped. "
- "Treating this as if the volume does "
- "not have transaction logging "
- "enabled.");
- goto not_enabled;
- }
- ntfs_error(vol->sb, "$UsnJrnl has lowest valid usn (0x%llx) "
- "which is out of bounds (0x%llx). $UsnJrnl "
- "is corrupt.",
- (long long)sle64_to_cpu(uh->lowest_valid_usn),
- i_size_read(vol->usnjrnl_j_ino));
- ntfs_unmap_page(page);
- return false;
- }
- ntfs_unmap_page(page);
- ntfs_debug("Done.");
- return true;
-}
-
-/**
- * load_and_init_attrdef - load the attribute definitions table for a volume
- * @vol: ntfs super block describing device whose attrdef to load
- *
- * Return 'true' on success or 'false' on error.
- */
-static bool load_and_init_attrdef(ntfs_volume *vol)
-{
- loff_t i_size;
- struct super_block *sb = vol->sb;
- struct inode *ino;
- struct page *page;
- pgoff_t index, max_index;
- unsigned int size;
-
- ntfs_debug("Entering.");
- /* Read attrdef table and setup vol->attrdef and vol->attrdef_size. */
- ino = ntfs_iget(sb, FILE_AttrDef);
- if (IS_ERR(ino) || is_bad_inode(ino)) {
- if (!IS_ERR(ino))
- iput(ino);
- goto failed;
- }
- NInoSetSparseDisabled(NTFS_I(ino));
- /* The size of FILE_AttrDef must be above 0 and fit inside 31 bits. */
- i_size = i_size_read(ino);
- if (i_size <= 0 || i_size > 0x7fffffff)
- goto iput_failed;
- vol->attrdef = (ATTR_DEF*)ntfs_malloc_nofs(i_size);
- if (!vol->attrdef)
- goto iput_failed;
- index = 0;
- max_index = i_size >> PAGE_SHIFT;
- size = PAGE_SIZE;
- while (index < max_index) {
- /* Read the attrdef table and copy it into the linear buffer. */
-read_partial_attrdef_page:
- page = ntfs_map_page(ino->i_mapping, index);
- if (IS_ERR(page))
- goto free_iput_failed;
- memcpy((u8*)vol->attrdef + (index++ << PAGE_SHIFT),
- page_address(page), size);
- ntfs_unmap_page(page);
- }
- if (size == PAGE_SIZE) {
- size = i_size & ~PAGE_MASK;
- if (size)
- goto read_partial_attrdef_page;
- }
- vol->attrdef_size = i_size;
- ntfs_debug("Read %llu bytes from $AttrDef.", i_size);
- iput(ino);
- return true;
-free_iput_failed:
- ntfs_free(vol->attrdef);
- vol->attrdef = NULL;
-iput_failed:
- iput(ino);
-failed:
- ntfs_error(sb, "Failed to initialize attribute definition table.");
- return false;
-}
-
-#endif /* NTFS_RW */
-
-/**
- * load_and_init_upcase - load the upcase table for an ntfs volume
- * @vol: ntfs super block describing device whose upcase to load
- *
- * Return 'true' on success or 'false' on error.
- */
-static bool load_and_init_upcase(ntfs_volume *vol)
-{
- loff_t i_size;
- struct super_block *sb = vol->sb;
- struct inode *ino;
- struct page *page;
- pgoff_t index, max_index;
- unsigned int size;
- int i, max;
-
- ntfs_debug("Entering.");
- /* Read upcase table and setup vol->upcase and vol->upcase_len. */
- ino = ntfs_iget(sb, FILE_UpCase);
- if (IS_ERR(ino) || is_bad_inode(ino)) {
- if (!IS_ERR(ino))
- iput(ino);
- goto upcase_failed;
- }
- /*
- * The upcase size must not be above 64k Unicode characters, must not
- * be zero and must be a multiple of sizeof(ntfschar).
- */
- i_size = i_size_read(ino);
- if (!i_size || i_size & (sizeof(ntfschar) - 1) ||
- i_size > 64ULL * 1024 * sizeof(ntfschar))
- goto iput_upcase_failed;
- vol->upcase = (ntfschar*)ntfs_malloc_nofs(i_size);
- if (!vol->upcase)
- goto iput_upcase_failed;
- index = 0;
- max_index = i_size >> PAGE_SHIFT;
- size = PAGE_SIZE;
- while (index < max_index) {
- /* Read the upcase table and copy it into the linear buffer. */
-read_partial_upcase_page:
- page = ntfs_map_page(ino->i_mapping, index);
- if (IS_ERR(page))
- goto iput_upcase_failed;
- memcpy((char*)vol->upcase + (index++ << PAGE_SHIFT),
- page_address(page), size);
- ntfs_unmap_page(page);
- }
- if (size == PAGE_SIZE) {
- size = i_size & ~PAGE_MASK;
- if (size)
- goto read_partial_upcase_page;
- }
- vol->upcase_len = i_size >> UCHAR_T_SIZE_BITS;
- ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).",
- i_size, 64 * 1024 * sizeof(ntfschar));
- iput(ino);
- mutex_lock(&ntfs_lock);
- if (!default_upcase) {
- ntfs_debug("Using volume specified $UpCase since default is "
- "not present.");
- mutex_unlock(&ntfs_lock);
- return true;
- }
- max = default_upcase_len;
- if (max > vol->upcase_len)
- max = vol->upcase_len;
- for (i = 0; i < max; i++)
- if (vol->upcase[i] != default_upcase[i])
- break;
- if (i == max) {
- ntfs_free(vol->upcase);
- vol->upcase = default_upcase;
- vol->upcase_len = max;
- ntfs_nr_upcase_users++;
- mutex_unlock(&ntfs_lock);
- ntfs_debug("Volume specified $UpCase matches default. Using "
- "default.");
- return true;
- }
- mutex_unlock(&ntfs_lock);
- ntfs_debug("Using volume specified $UpCase since it does not match "
- "the default.");
- return true;
-iput_upcase_failed:
- iput(ino);
- ntfs_free(vol->upcase);
- vol->upcase = NULL;
-upcase_failed:
- mutex_lock(&ntfs_lock);
- if (default_upcase) {
- vol->upcase = default_upcase;
- vol->upcase_len = default_upcase_len;
- ntfs_nr_upcase_users++;
- mutex_unlock(&ntfs_lock);
- ntfs_error(sb, "Failed to load $UpCase from the volume. Using "
- "default.");
- return true;
- }
- mutex_unlock(&ntfs_lock);
- ntfs_error(sb, "Failed to initialize upcase table.");
- return false;
-}
-
-/*
- * The lcn and mft bitmap inodes are NTFS-internal inodes with
- * their own special locking rules:
- */
-static struct lock_class_key
- lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key,
- mftbmp_runlist_lock_key, mftbmp_mrec_lock_key;
-
-/**
- * load_system_files - open the system files using normal functions
- * @vol: ntfs super block describing device whose system files to load
- *
- * Open the system files with normal access functions and complete setting up
- * the ntfs super block @vol.
- *
- * Return 'true' on success or 'false' on error.
- */
-static bool load_system_files(ntfs_volume *vol)
-{
- struct super_block *sb = vol->sb;
- MFT_RECORD *m;
- VOLUME_INFORMATION *vi;
- ntfs_attr_search_ctx *ctx;
-#ifdef NTFS_RW
- RESTART_PAGE_HEADER *rp;
- int err;
-#endif /* NTFS_RW */
-
- ntfs_debug("Entering.");
-#ifdef NTFS_RW
- /* Get mft mirror inode compare the contents of $MFT and $MFTMirr. */
- if (!load_and_init_mft_mirror(vol) || !check_mft_mirror(vol)) {
- static const char *es1 = "Failed to load $MFTMirr";
- static const char *es2 = "$MFTMirr does not match $MFT";
- static const char *es3 = ". Run ntfsfix and/or chkdsk.";
-
- /* If a read-write mount, convert it to a read-only mount. */
- if (!sb_rdonly(sb)) {
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors="
- "continue nor on_errors="
- "remount-ro was specified%s",
- !vol->mftmirr_ino ? es1 : es2,
- es3);
- goto iput_mirr_err_out;
- }
- sb->s_flags |= SB_RDONLY;
- ntfs_error(sb, "%s. Mounting read-only%s",
- !vol->mftmirr_ino ? es1 : es2, es3);
- } else
- ntfs_warning(sb, "%s. Will not be able to remount "
- "read-write%s",
- !vol->mftmirr_ino ? es1 : es2, es3);
- /* This will prevent a read-write remount. */
- NVolSetErrors(vol);
- }
-#endif /* NTFS_RW */
- /* Get mft bitmap attribute inode. */
- vol->mftbmp_ino = ntfs_attr_iget(vol->mft_ino, AT_BITMAP, NULL, 0);
- if (IS_ERR(vol->mftbmp_ino)) {
- ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute.");
- goto iput_mirr_err_out;
- }
- lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock,
- &mftbmp_runlist_lock_key);
- lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock,
- &mftbmp_mrec_lock_key);
- /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */
- if (!load_and_init_upcase(vol))
- goto iput_mftbmp_err_out;
-#ifdef NTFS_RW
- /*
- * Read attribute definitions table and setup @vol->attrdef and
- * @vol->attrdef_size.
- */
- if (!load_and_init_attrdef(vol))
- goto iput_upcase_err_out;
-#endif /* NTFS_RW */
- /*
- * Get the cluster allocation bitmap inode and verify the size, no
- * need for any locking at this stage as we are already running
- * exclusively as we are mount in progress task.
- */
- vol->lcnbmp_ino = ntfs_iget(sb, FILE_Bitmap);
- if (IS_ERR(vol->lcnbmp_ino) || is_bad_inode(vol->lcnbmp_ino)) {
- if (!IS_ERR(vol->lcnbmp_ino))
- iput(vol->lcnbmp_ino);
- goto bitmap_failed;
- }
- lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock,
- &lcnbmp_runlist_lock_key);
- lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock,
- &lcnbmp_mrec_lock_key);
-
- NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
- if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
- iput(vol->lcnbmp_ino);
-bitmap_failed:
- ntfs_error(sb, "Failed to load $Bitmap.");
- goto iput_attrdef_err_out;
- }
- /*
- * Get the volume inode and setup our cache of the volume flags and
- * version.
- */
- vol->vol_ino = ntfs_iget(sb, FILE_Volume);
- if (IS_ERR(vol->vol_ino) || is_bad_inode(vol->vol_ino)) {
- if (!IS_ERR(vol->vol_ino))
- iput(vol->vol_ino);
-volume_failed:
- ntfs_error(sb, "Failed to load $Volume.");
- goto iput_lcnbmp_err_out;
- }
- m = map_mft_record(NTFS_I(vol->vol_ino));
- if (IS_ERR(m)) {
-iput_volume_failed:
- iput(vol->vol_ino);
- goto volume_failed;
- }
- if (!(ctx = ntfs_attr_get_search_ctx(NTFS_I(vol->vol_ino), m))) {
- ntfs_error(sb, "Failed to get attribute search context.");
- goto get_ctx_vol_failed;
- }
- if (ntfs_attr_lookup(AT_VOLUME_INFORMATION, NULL, 0, 0, 0, NULL, 0,
- ctx) || ctx->attr->non_resident || ctx->attr->flags) {
-err_put_vol:
- ntfs_attr_put_search_ctx(ctx);
-get_ctx_vol_failed:
- unmap_mft_record(NTFS_I(vol->vol_ino));
- goto iput_volume_failed;
- }
- vi = (VOLUME_INFORMATION*)((char*)ctx->attr +
- le16_to_cpu(ctx->attr->data.resident.value_offset));
- /* Some bounds checks. */
- if ((u8*)vi < (u8*)ctx->attr || (u8*)vi +
- le32_to_cpu(ctx->attr->data.resident.value_length) >
- (u8*)ctx->attr + le32_to_cpu(ctx->attr->length))
- goto err_put_vol;
- /* Copy the volume flags and version to the ntfs_volume structure. */
- vol->vol_flags = vi->flags;
- vol->major_ver = vi->major_ver;
- vol->minor_ver = vi->minor_ver;
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(NTFS_I(vol->vol_ino));
- pr_info("volume version %i.%i.\n", vol->major_ver,
- vol->minor_ver);
- if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
- ntfs_warning(vol->sb, "Disabling sparse support due to NTFS "
- "volume version %i.%i (need at least version "
- "3.0).", vol->major_ver, vol->minor_ver);
- NVolClearSparseEnabled(vol);
- }
-#ifdef NTFS_RW
- /* Make sure that no unsupported volume flags are set. */
- if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
- static const char *es1a = "Volume is dirty";
- static const char *es1b = "Volume has been modified by chkdsk";
- static const char *es1c = "Volume has unsupported flags set";
- static const char *es2a = ". Run chkdsk and mount in Windows.";
- static const char *es2b = ". Mount in Windows.";
- const char *es1, *es2;
-
- es2 = es2a;
- if (vol->vol_flags & VOLUME_IS_DIRTY)
- es1 = es1a;
- else if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
- es1 = es1b;
- es2 = es2b;
- } else {
- es1 = es1c;
- ntfs_warning(sb, "Unsupported volume flags 0x%x "
- "encountered.",
- (unsigned)le16_to_cpu(vol->vol_flags));
- }
- /* If a read-write mount, convert it to a read-only mount. */
- if (!sb_rdonly(sb)) {
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors="
- "continue nor on_errors="
- "remount-ro was specified%s",
- es1, es2);
- goto iput_vol_err_out;
- }
- sb->s_flags |= SB_RDONLY;
- ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- } else
- ntfs_warning(sb, "%s. Will not be able to remount "
- "read-write%s", es1, es2);
- /*
- * Do not set NVolErrors() because ntfs_remount() re-checks the
- * flags which we need to do in case any flags have changed.
- */
- }
- /*
- * Get the inode for the logfile, check it and determine if the volume
- * was shutdown cleanly.
- */
- rp = NULL;
- if (!load_and_check_logfile(vol, &rp) ||
- !ntfs_is_logfile_clean(vol->logfile_ino, rp)) {
- static const char *es1a = "Failed to load $LogFile";
- static const char *es1b = "$LogFile is not clean";
- static const char *es2 = ". Mount in Windows.";
- const char *es1;
-
- es1 = !vol->logfile_ino ? es1a : es1b;
- /* If a read-write mount, convert it to a read-only mount. */
- if (!sb_rdonly(sb)) {
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors="
- "continue nor on_errors="
- "remount-ro was specified%s",
- es1, es2);
- if (vol->logfile_ino) {
- BUG_ON(!rp);
- ntfs_free(rp);
- }
- goto iput_logfile_err_out;
- }
- sb->s_flags |= SB_RDONLY;
- ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- } else
- ntfs_warning(sb, "%s. Will not be able to remount "
- "read-write%s", es1, es2);
- /* This will prevent a read-write remount. */
- NVolSetErrors(vol);
- }
- ntfs_free(rp);
-#endif /* NTFS_RW */
- /* Get the root directory inode so we can do path lookups. */
- vol->root_ino = ntfs_iget(sb, FILE_root);
- if (IS_ERR(vol->root_ino) || is_bad_inode(vol->root_ino)) {
- if (!IS_ERR(vol->root_ino))
- iput(vol->root_ino);
- ntfs_error(sb, "Failed to load root directory.");
- goto iput_logfile_err_out;
- }
-#ifdef NTFS_RW
- /*
- * Check if Windows is suspended to disk on the target volume. If it
- * is hibernated, we must not write *anything* to the disk so set
- * NVolErrors() without setting the dirty volume flag and mount
- * read-only. This will prevent read-write remounting and it will also
- * prevent all writes.
- */
- err = check_windows_hibernation_status(vol);
- if (unlikely(err)) {
- static const char *es1a = "Failed to determine if Windows is "
- "hibernated";
- static const char *es1b = "Windows is hibernated";
- static const char *es2 = ". Run chkdsk.";
- const char *es1;
-
- es1 = err < 0 ? es1a : es1b;
- /* If a read-write mount, convert it to a read-only mount. */
- if (!sb_rdonly(sb)) {
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors="
- "continue nor on_errors="
- "remount-ro was specified%s",
- es1, es2);
- goto iput_root_err_out;
- }
- sb->s_flags |= SB_RDONLY;
- ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- } else
- ntfs_warning(sb, "%s. Will not be able to remount "
- "read-write%s", es1, es2);
- /* This will prevent a read-write remount. */
- NVolSetErrors(vol);
- }
- /* If (still) a read-write mount, mark the volume dirty. */
- if (!sb_rdonly(sb) && ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
- static const char *es1 = "Failed to set dirty bit in volume "
- "information flags";
- static const char *es2 = ". Run chkdsk.";
-
- /* Convert to a read-only mount. */
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors=continue nor "
- "on_errors=remount-ro was specified%s",
- es1, es2);
- goto iput_root_err_out;
- }
- ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= SB_RDONLY;
- /*
- * Do not set NVolErrors() because ntfs_remount() might manage
- * to set the dirty flag in which case all would be well.
- */
- }
-#if 0
- // TODO: Enable this code once we start modifying anything that is
- // different between NTFS 1.2 and 3.x...
- /*
- * If (still) a read-write mount, set the NT4 compatibility flag on
- * newer NTFS version volumes.
- */
- if (!(sb->s_flags & SB_RDONLY) && (vol->major_ver > 1) &&
- ntfs_set_volume_flags(vol, VOLUME_MOUNTED_ON_NT4)) {
- static const char *es1 = "Failed to set NT4 compatibility flag";
- static const char *es2 = ". Run chkdsk.";
-
- /* Convert to a read-only mount. */
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors=continue nor "
- "on_errors=remount-ro was specified%s",
- es1, es2);
- goto iput_root_err_out;
- }
- ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= SB_RDONLY;
- NVolSetErrors(vol);
- }
-#endif
- /* If (still) a read-write mount, empty the logfile. */
- if (!sb_rdonly(sb) && !ntfs_empty_logfile(vol->logfile_ino)) {
- static const char *es1 = "Failed to empty $LogFile";
- static const char *es2 = ". Mount in Windows.";
-
- /* Convert to a read-only mount. */
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors=continue nor "
- "on_errors=remount-ro was specified%s",
- es1, es2);
- goto iput_root_err_out;
- }
- ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= SB_RDONLY;
- NVolSetErrors(vol);
- }
-#endif /* NTFS_RW */
- /* If on NTFS versions before 3.0, we are done. */
- if (unlikely(vol->major_ver < 3))
- return true;
- /* NTFS 3.0+ specific initialization. */
- /* Get the security descriptors inode. */
- vol->secure_ino = ntfs_iget(sb, FILE_Secure);
- if (IS_ERR(vol->secure_ino) || is_bad_inode(vol->secure_ino)) {
- if (!IS_ERR(vol->secure_ino))
- iput(vol->secure_ino);
- ntfs_error(sb, "Failed to load $Secure.");
- goto iput_root_err_out;
- }
- // TODO: Initialize security.
- /* Get the extended system files' directory inode. */
- vol->extend_ino = ntfs_iget(sb, FILE_Extend);
- if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino) ||
- !S_ISDIR(vol->extend_ino->i_mode)) {
- if (!IS_ERR(vol->extend_ino))
- iput(vol->extend_ino);
- ntfs_error(sb, "Failed to load $Extend.");
- goto iput_sec_err_out;
- }
-#ifdef NTFS_RW
- /* Find the quota file, load it if present, and set it up. */
- if (!load_and_init_quota(vol)) {
- static const char *es1 = "Failed to load $Quota";
- static const char *es2 = ". Run chkdsk.";
-
- /* If a read-write mount, convert it to a read-only mount. */
- if (!sb_rdonly(sb)) {
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors="
- "continue nor on_errors="
- "remount-ro was specified%s",
- es1, es2);
- goto iput_quota_err_out;
- }
- sb->s_flags |= SB_RDONLY;
- ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- } else
- ntfs_warning(sb, "%s. Will not be able to remount "
- "read-write%s", es1, es2);
- /* This will prevent a read-write remount. */
- NVolSetErrors(vol);
- }
- /* If (still) a read-write mount, mark the quotas out of date. */
- if (!sb_rdonly(sb) && !ntfs_mark_quotas_out_of_date(vol)) {
- static const char *es1 = "Failed to mark quotas out of date";
- static const char *es2 = ". Run chkdsk.";
-
- /* Convert to a read-only mount. */
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors=continue nor "
- "on_errors=remount-ro was specified%s",
- es1, es2);
- goto iput_quota_err_out;
- }
- ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= SB_RDONLY;
- NVolSetErrors(vol);
- }
- /*
- * Find the transaction log file ($UsnJrnl), load it if present, check
- * it, and set it up.
- */
- if (!load_and_init_usnjrnl(vol)) {
- static const char *es1 = "Failed to load $UsnJrnl";
- static const char *es2 = ". Run chkdsk.";
-
- /* If a read-write mount, convert it to a read-only mount. */
- if (!sb_rdonly(sb)) {
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors="
- "continue nor on_errors="
- "remount-ro was specified%s",
- es1, es2);
- goto iput_usnjrnl_err_out;
- }
- sb->s_flags |= SB_RDONLY;
- ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- } else
- ntfs_warning(sb, "%s. Will not be able to remount "
- "read-write%s", es1, es2);
- /* This will prevent a read-write remount. */
- NVolSetErrors(vol);
- }
- /* If (still) a read-write mount, stamp the transaction log. */
- if (!sb_rdonly(sb) && !ntfs_stamp_usnjrnl(vol)) {
- static const char *es1 = "Failed to stamp transaction log "
- "($UsnJrnl)";
- static const char *es2 = ". Run chkdsk.";
-
- /* Convert to a read-only mount. */
- if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
- ON_ERRORS_CONTINUE))) {
- ntfs_error(sb, "%s and neither on_errors=continue nor "
- "on_errors=remount-ro was specified%s",
- es1, es2);
- goto iput_usnjrnl_err_out;
- }
- ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= SB_RDONLY;
- NVolSetErrors(vol);
- }
-#endif /* NTFS_RW */
- return true;
-#ifdef NTFS_RW
-iput_usnjrnl_err_out:
- iput(vol->usnjrnl_j_ino);
- iput(vol->usnjrnl_max_ino);
- iput(vol->usnjrnl_ino);
-iput_quota_err_out:
- iput(vol->quota_q_ino);
- iput(vol->quota_ino);
- iput(vol->extend_ino);
-#endif /* NTFS_RW */
-iput_sec_err_out:
- iput(vol->secure_ino);
-iput_root_err_out:
- iput(vol->root_ino);
-iput_logfile_err_out:
-#ifdef NTFS_RW
- iput(vol->logfile_ino);
-iput_vol_err_out:
-#endif /* NTFS_RW */
- iput(vol->vol_ino);
-iput_lcnbmp_err_out:
- iput(vol->lcnbmp_ino);
-iput_attrdef_err_out:
- vol->attrdef_size = 0;
- if (vol->attrdef) {
- ntfs_free(vol->attrdef);
- vol->attrdef = NULL;
- }
-#ifdef NTFS_RW
-iput_upcase_err_out:
-#endif /* NTFS_RW */
- vol->upcase_len = 0;
- mutex_lock(&ntfs_lock);
- if (vol->upcase == default_upcase) {
- ntfs_nr_upcase_users--;
- vol->upcase = NULL;
- }
- mutex_unlock(&ntfs_lock);
- if (vol->upcase) {
- ntfs_free(vol->upcase);
- vol->upcase = NULL;
- }
-iput_mftbmp_err_out:
- iput(vol->mftbmp_ino);
-iput_mirr_err_out:
-#ifdef NTFS_RW
- iput(vol->mftmirr_ino);
-#endif /* NTFS_RW */
- return false;
-}
-
-/**
- * ntfs_put_super - called by the vfs to unmount a volume
- * @sb: vfs superblock of volume to unmount
- *
- * ntfs_put_super() is called by the VFS (from fs/super.c::do_umount()) when
- * the volume is being unmounted (umount system call has been invoked) and it
- * releases all inodes and memory belonging to the NTFS specific part of the
- * super block.
- */
-static void ntfs_put_super(struct super_block *sb)
-{
- ntfs_volume *vol = NTFS_SB(sb);
-
- ntfs_debug("Entering.");
-
-#ifdef NTFS_RW
- /*
- * Commit all inodes while they are still open in case some of them
- * cause others to be dirtied.
- */
- ntfs_commit_inode(vol->vol_ino);
-
- /* NTFS 3.0+ specific. */
- if (vol->major_ver >= 3) {
- if (vol->usnjrnl_j_ino)
- ntfs_commit_inode(vol->usnjrnl_j_ino);
- if (vol->usnjrnl_max_ino)
- ntfs_commit_inode(vol->usnjrnl_max_ino);
- if (vol->usnjrnl_ino)
- ntfs_commit_inode(vol->usnjrnl_ino);
- if (vol->quota_q_ino)
- ntfs_commit_inode(vol->quota_q_ino);
- if (vol->quota_ino)
- ntfs_commit_inode(vol->quota_ino);
- if (vol->extend_ino)
- ntfs_commit_inode(vol->extend_ino);
- if (vol->secure_ino)
- ntfs_commit_inode(vol->secure_ino);
- }
-
- ntfs_commit_inode(vol->root_ino);
-
- down_write(&vol->lcnbmp_lock);
- ntfs_commit_inode(vol->lcnbmp_ino);
- up_write(&vol->lcnbmp_lock);
-
- down_write(&vol->mftbmp_lock);
- ntfs_commit_inode(vol->mftbmp_ino);
- up_write(&vol->mftbmp_lock);
-
- if (vol->logfile_ino)
- ntfs_commit_inode(vol->logfile_ino);
-
- if (vol->mftmirr_ino)
- ntfs_commit_inode(vol->mftmirr_ino);
- ntfs_commit_inode(vol->mft_ino);
-
- /*
- * If a read-write mount and no volume errors have occurred, mark the
- * volume clean. Also, re-commit all affected inodes.
- */
- if (!sb_rdonly(sb)) {
- if (!NVolErrors(vol)) {
- if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY))
- ntfs_warning(sb, "Failed to clear dirty bit "
- "in volume information "
- "flags. Run chkdsk.");
- ntfs_commit_inode(vol->vol_ino);
- ntfs_commit_inode(vol->root_ino);
- if (vol->mftmirr_ino)
- ntfs_commit_inode(vol->mftmirr_ino);
- ntfs_commit_inode(vol->mft_ino);
- } else {
- ntfs_warning(sb, "Volume has errors. Leaving volume "
- "marked dirty. Run chkdsk.");
- }
- }
-#endif /* NTFS_RW */
-
- iput(vol->vol_ino);
- vol->vol_ino = NULL;
-
- /* NTFS 3.0+ specific clean up. */
- if (vol->major_ver >= 3) {
-#ifdef NTFS_RW
- if (vol->usnjrnl_j_ino) {
- iput(vol->usnjrnl_j_ino);
- vol->usnjrnl_j_ino = NULL;
- }
- if (vol->usnjrnl_max_ino) {
- iput(vol->usnjrnl_max_ino);
- vol->usnjrnl_max_ino = NULL;
- }
- if (vol->usnjrnl_ino) {
- iput(vol->usnjrnl_ino);
- vol->usnjrnl_ino = NULL;
- }
- if (vol->quota_q_ino) {
- iput(vol->quota_q_ino);
- vol->quota_q_ino = NULL;
- }
- if (vol->quota_ino) {
- iput(vol->quota_ino);
- vol->quota_ino = NULL;
- }
-#endif /* NTFS_RW */
- if (vol->extend_ino) {
- iput(vol->extend_ino);
- vol->extend_ino = NULL;
- }
- if (vol->secure_ino) {
- iput(vol->secure_ino);
- vol->secure_ino = NULL;
- }
- }
-
- iput(vol->root_ino);
- vol->root_ino = NULL;
-
- down_write(&vol->lcnbmp_lock);
- iput(vol->lcnbmp_ino);
- vol->lcnbmp_ino = NULL;
- up_write(&vol->lcnbmp_lock);
-
- down_write(&vol->mftbmp_lock);
- iput(vol->mftbmp_ino);
- vol->mftbmp_ino = NULL;
- up_write(&vol->mftbmp_lock);
-
-#ifdef NTFS_RW
- if (vol->logfile_ino) {
- iput(vol->logfile_ino);
- vol->logfile_ino = NULL;
- }
- if (vol->mftmirr_ino) {
- /* Re-commit the mft mirror and mft just in case. */
- ntfs_commit_inode(vol->mftmirr_ino);
- ntfs_commit_inode(vol->mft_ino);
- iput(vol->mftmirr_ino);
- vol->mftmirr_ino = NULL;
- }
- /*
- * We should have no dirty inodes left, due to
- * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
- * the underlying mft records are written out and cleaned.
- */
- ntfs_commit_inode(vol->mft_ino);
- write_inode_now(vol->mft_ino, 1);
-#endif /* NTFS_RW */
-
- iput(vol->mft_ino);
- vol->mft_ino = NULL;
-
- /* Throw away the table of attribute definitions. */
- vol->attrdef_size = 0;
- if (vol->attrdef) {
- ntfs_free(vol->attrdef);
- vol->attrdef = NULL;
- }
- vol->upcase_len = 0;
- /*
- * Destroy the global default upcase table if necessary. Also decrease
- * the number of upcase users if we are a user.
- */
- mutex_lock(&ntfs_lock);
- if (vol->upcase == default_upcase) {
- ntfs_nr_upcase_users--;
- vol->upcase = NULL;
- }
- if (!ntfs_nr_upcase_users && default_upcase) {
- ntfs_free(default_upcase);
- default_upcase = NULL;
- }
- if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users)
- free_compression_buffers();
- mutex_unlock(&ntfs_lock);
- if (vol->upcase) {
- ntfs_free(vol->upcase);
- vol->upcase = NULL;
- }
-
- unload_nls(vol->nls_map);
-
- sb->s_fs_info = NULL;
- kfree(vol);
-}
-
-/**
- * get_nr_free_clusters - return the number of free clusters on a volume
- * @vol: ntfs volume for which to obtain free cluster count
- *
- * Calculate the number of free clusters on the mounted NTFS volume @vol. We
- * actually calculate the number of clusters in use instead because this
- * allows us to not care about partial pages as these will be just zero filled
- * and hence not be counted as allocated clusters.
- *
- * The only particularity is that clusters beyond the end of the logical ntfs
- * volume will be marked as allocated to prevent errors which means we have to
- * discount those at the end. This is important as the cluster bitmap always
- * has a size in multiples of 8 bytes, i.e. up to 63 clusters could be outside
- * the logical volume and marked in use when they are not as they do not exist.
- *
- * If any pages cannot be read we assume all clusters in the erroring pages are
- * in use. This means we return an underestimate on errors which is better than
- * an overestimate.
- */
-static s64 get_nr_free_clusters(ntfs_volume *vol)
-{
- s64 nr_free = vol->nr_clusters;
- struct address_space *mapping = vol->lcnbmp_ino->i_mapping;
- struct page *page;
- pgoff_t index, max_index;
-
- ntfs_debug("Entering.");
- /* Serialize accesses to the cluster bitmap. */
- down_read(&vol->lcnbmp_lock);
- /*
- * Convert the number of bits into bytes rounded up, then convert into
- * multiples of PAGE_SIZE, rounding up so that if we have one
- * full and one partial page max_index = 2.
- */
- max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_SIZE - 1) >>
- PAGE_SHIFT;
- /* Use multiples of 4 bytes, thus max_size is PAGE_SIZE / 4. */
- ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.",
- max_index, PAGE_SIZE / 4);
- for (index = 0; index < max_index; index++) {
- unsigned long *kaddr;
-
- /*
- * Read the page from page cache, getting it from backing store
- * if necessary, and increment the use count.
- */
- page = read_mapping_page(mapping, index, NULL);
- /* Ignore pages which errored synchronously. */
- if (IS_ERR(page)) {
- ntfs_debug("read_mapping_page() error. Skipping "
- "page (index 0x%lx).", index);
- nr_free -= PAGE_SIZE * 8;
- continue;
- }
- kaddr = kmap_atomic(page);
- /*
- * Subtract the number of set bits. If this
- * is the last page and it is partial we don't really care as
- * it just means we do a little extra work but it won't affect
- * the result as all out of range bytes are set to zero by
- * ntfs_readpage().
- */
- nr_free -= bitmap_weight(kaddr,
- PAGE_SIZE * BITS_PER_BYTE);
- kunmap_atomic(kaddr);
- put_page(page);
- }
- ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1);
- /*
- * Fixup for eventual bits outside logical ntfs volume (see function
- * description above).
- */
- if (vol->nr_clusters & 63)
- nr_free += 64 - (vol->nr_clusters & 63);
- up_read(&vol->lcnbmp_lock);
- /* If errors occurred we may well have gone below zero, fix this. */
- if (nr_free < 0)
- nr_free = 0;
- ntfs_debug("Exiting.");
- return nr_free;
-}
-
-/**
- * __get_nr_free_mft_records - return the number of free inodes on a volume
- * @vol: ntfs volume for which to obtain free inode count
- * @nr_free: number of mft records in filesystem
- * @max_index: maximum number of pages containing set bits
- *
- * Calculate the number of free mft records (inodes) on the mounted NTFS
- * volume @vol. We actually calculate the number of mft records in use instead
- * because this allows us to not care about partial pages as these will be just
- * zero filled and hence not be counted as allocated mft record.
- *
- * If any pages cannot be read we assume all mft records in the erroring pages
- * are in use. This means we return an underestimate on errors which is better
- * than an overestimate.
- *
- * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing.
- */
-static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
- s64 nr_free, const pgoff_t max_index)
-{
- struct address_space *mapping = vol->mftbmp_ino->i_mapping;
- struct page *page;
- pgoff_t index;
-
- ntfs_debug("Entering.");
- /* Use multiples of 4 bytes, thus max_size is PAGE_SIZE / 4. */
- ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = "
- "0x%lx.", max_index, PAGE_SIZE / 4);
- for (index = 0; index < max_index; index++) {
- unsigned long *kaddr;
-
- /*
- * Read the page from page cache, getting it from backing store
- * if necessary, and increment the use count.
- */
- page = read_mapping_page(mapping, index, NULL);
- /* Ignore pages which errored synchronously. */
- if (IS_ERR(page)) {
- ntfs_debug("read_mapping_page() error. Skipping "
- "page (index 0x%lx).", index);
- nr_free -= PAGE_SIZE * 8;
- continue;
- }
- kaddr = kmap_atomic(page);
- /*
- * Subtract the number of set bits. If this
- * is the last page and it is partial we don't really care as
- * it just means we do a little extra work but it won't affect
- * the result as all out of range bytes are set to zero by
- * ntfs_readpage().
- */
- nr_free -= bitmap_weight(kaddr,
- PAGE_SIZE * BITS_PER_BYTE);
- kunmap_atomic(kaddr);
- put_page(page);
- }
- ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
- index - 1);
- /* If errors occurred we may well have gone below zero, fix this. */
- if (nr_free < 0)
- nr_free = 0;
- ntfs_debug("Exiting.");
- return nr_free;
-}
-
-/**
- * ntfs_statfs - return information about mounted NTFS volume
- * @dentry: dentry from mounted volume
- * @sfs: statfs structure in which to return the information
- *
- * Return information about the mounted NTFS volume @dentry in the statfs structure
- * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is
- * called). We interpret the values to be correct of the moment in time at
- * which we are called. Most values are variable otherwise and this isn't just
- * the free values but the totals as well. For example we can increase the
- * total number of file nodes if we run out and we can keep doing this until
- * there is no more space on the volume left at all.
- *
- * Called from vfs_statfs which is used to handle the statfs, fstatfs, and
- * ustat system calls.
- *
- * Return 0 on success or -errno on error.
- */
-static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
-{
- struct super_block *sb = dentry->d_sb;
- s64 size;
- ntfs_volume *vol = NTFS_SB(sb);
- ntfs_inode *mft_ni = NTFS_I(vol->mft_ino);
- pgoff_t max_index;
- unsigned long flags;
-
- ntfs_debug("Entering.");
- /* Type of filesystem. */
- sfs->f_type = NTFS_SB_MAGIC;
- /* Optimal transfer block size. */
- sfs->f_bsize = PAGE_SIZE;
- /*
- * Total data blocks in filesystem in units of f_bsize and since
- * inodes are also stored in data blocs ($MFT is a file) this is just
- * the total clusters.
- */
- sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >>
- PAGE_SHIFT;
- /* Free data blocks in filesystem in units of f_bsize. */
- size = get_nr_free_clusters(vol) << vol->cluster_size_bits >>
- PAGE_SHIFT;
- if (size < 0LL)
- size = 0LL;
- /* Free blocks avail to non-superuser, same as above on NTFS. */
- sfs->f_bavail = sfs->f_bfree = size;
- /* Serialize accesses to the inode bitmap. */
- down_read(&vol->mftbmp_lock);
- read_lock_irqsave(&mft_ni->size_lock, flags);
- size = i_size_read(vol->mft_ino) >> vol->mft_record_size_bits;
- /*
- * Convert the maximum number of set bits into bytes rounded up, then
- * convert into multiples of PAGE_SIZE, rounding up so that if we
- * have one full and one partial page max_index = 2.
- */
- max_index = ((((mft_ni->initialized_size >> vol->mft_record_size_bits)
- + 7) >> 3) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- read_unlock_irqrestore(&mft_ni->size_lock, flags);
- /* Number of inodes in filesystem (at this point in time). */
- sfs->f_files = size;
- /* Free inodes in fs (based on current total count). */
- sfs->f_ffree = __get_nr_free_mft_records(vol, size, max_index);
- up_read(&vol->mftbmp_lock);
- /*
- * File system id. This is extremely *nix flavour dependent and even
- * within Linux itself all fs do their own thing. I interpret this to
- * mean a unique id associated with the mounted fs and not the id
- * associated with the filesystem driver, the latter is already given
- * by the filesystem type in sfs->f_type. Thus we use the 64-bit
- * volume serial number splitting it into two 32-bit parts. We enter
- * the least significant 32-bits in f_fsid[0] and the most significant
- * 32-bits in f_fsid[1].
- */
- sfs->f_fsid = u64_to_fsid(vol->serial_no);
- /* Maximum length of filenames. */
- sfs->f_namelen = NTFS_MAX_NAME_LEN;
- return 0;
-}
-
-#ifdef NTFS_RW
-static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc)
-{
- return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL);
-}
-#endif
-
-/*
- * The complete super operations.
- */
-static const struct super_operations ntfs_sops = {
- .alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */
- .free_inode = ntfs_free_big_inode, /* VFS: Deallocate inode. */
-#ifdef NTFS_RW
- .write_inode = ntfs_write_inode, /* VFS: Write dirty inode to
- disk. */
-#endif /* NTFS_RW */
- .put_super = ntfs_put_super, /* Syscall: umount. */
- .statfs = ntfs_statfs, /* Syscall: statfs */
- .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */
- .evict_inode = ntfs_evict_big_inode, /* VFS: Called when an inode is
- removed from memory. */
- .show_options = ntfs_show_options, /* Show mount options in
- proc. */
-};
-
-/**
- * ntfs_fill_super - mount an ntfs filesystem
- * @sb: super block of ntfs filesystem to mount
- * @opt: string containing the mount options
- * @silent: silence error output
- *
- * ntfs_fill_super() is called by the VFS to mount the device described by @sb
- * with the mount otions in @data with the NTFS filesystem.
- *
- * If @silent is true, remain silent even if errors are detected. This is used
- * during bootup, when the kernel tries to mount the root filesystem with all
- * registered filesystems one after the other until one succeeds. This implies
- * that all filesystems except the correct one will quite correctly and
- * expectedly return an error, but nobody wants to see error messages when in
- * fact this is what is supposed to happen.
- *
- * NOTE: @sb->s_flags contains the mount options flags.
- */
-static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
-{
- ntfs_volume *vol;
- struct buffer_head *bh;
- struct inode *tmp_ino;
- int blocksize, result;
-
- /*
- * We do a pretty difficult piece of bootstrap by reading the
- * MFT (and other metadata) from disk into memory. We'll only
- * release this metadata during umount, so the locking patterns
- * observed during bootstrap do not count. So turn off the
- * observation of locking patterns (strictly for this context
- * only) while mounting NTFS. [The validator is still active
- * otherwise, even for this context: it will for example record
- * lock class registrations.]
- */
- lockdep_off();
- ntfs_debug("Entering.");
-#ifndef NTFS_RW
- sb->s_flags |= SB_RDONLY;
-#endif /* ! NTFS_RW */
- /* Allocate a new ntfs_volume and place it in sb->s_fs_info. */
- sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS);
- vol = NTFS_SB(sb);
- if (!vol) {
- if (!silent)
- ntfs_error(sb, "Allocation of NTFS volume structure "
- "failed. Aborting mount...");
- lockdep_on();
- return -ENOMEM;
- }
- /* Initialize ntfs_volume structure. */
- *vol = (ntfs_volume) {
- .sb = sb,
- /*
- * Default is group and other don't have any access to files or
- * directories while owner has full access. Further, files by
- * default are not executable but directories are of course
- * browseable.
- */
- .fmask = 0177,
- .dmask = 0077,
- };
- init_rwsem(&vol->mftbmp_lock);
- init_rwsem(&vol->lcnbmp_lock);
-
- /* By default, enable sparse support. */
- NVolSetSparseEnabled(vol);
-
- /* Important to get the mount options dealt with now. */
- if (!parse_options(vol, (char*)opt))
- goto err_out_now;
-
- /* We support sector sizes up to the PAGE_SIZE. */
- if (bdev_logical_block_size(sb->s_bdev) > PAGE_SIZE) {
- if (!silent)
- ntfs_error(sb, "Device has unsupported sector size "
- "(%i). The maximum supported sector "
- "size on this architecture is %lu "
- "bytes.",
- bdev_logical_block_size(sb->s_bdev),
- PAGE_SIZE);
- goto err_out_now;
- }
- /*
- * Setup the device access block size to NTFS_BLOCK_SIZE or the hard
- * sector size, whichever is bigger.
- */
- blocksize = sb_min_blocksize(sb, NTFS_BLOCK_SIZE);
- if (blocksize < NTFS_BLOCK_SIZE) {
- if (!silent)
- ntfs_error(sb, "Unable to set device block size.");
- goto err_out_now;
- }
- BUG_ON(blocksize != sb->s_blocksize);
- ntfs_debug("Set device block size to %i bytes (block size bits %i).",
- blocksize, sb->s_blocksize_bits);
- /* Determine the size of the device in units of block_size bytes. */
- vol->nr_blocks = sb_bdev_nr_blocks(sb);
- if (!vol->nr_blocks) {
- if (!silent)
- ntfs_error(sb, "Unable to determine device size.");
- goto err_out_now;
- }
- /* Read the boot sector and return unlocked buffer head to it. */
- if (!(bh = read_ntfs_boot_sector(sb, silent))) {
- if (!silent)
- ntfs_error(sb, "Not an NTFS volume.");
- goto err_out_now;
- }
- /*
- * Extract the data from the boot sector and setup the ntfs volume
- * using it.
- */
- result = parse_ntfs_boot_sector(vol, (NTFS_BOOT_SECTOR*)bh->b_data);
- brelse(bh);
- if (!result) {
- if (!silent)
- ntfs_error(sb, "Unsupported NTFS filesystem.");
- goto err_out_now;
- }
- /*
- * If the boot sector indicates a sector size bigger than the current
- * device block size, switch the device block size to the sector size.
- * TODO: It may be possible to support this case even when the set
- * below fails, we would just be breaking up the i/o for each sector
- * into multiple blocks for i/o purposes but otherwise it should just
- * work. However it is safer to leave disabled until someone hits this
- * error message and then we can get them to try it without the setting
- * so we know for sure that it works.
- */
- if (vol->sector_size > blocksize) {
- blocksize = sb_set_blocksize(sb, vol->sector_size);
- if (blocksize != vol->sector_size) {
- if (!silent)
- ntfs_error(sb, "Unable to set device block "
- "size to sector size (%i).",
- vol->sector_size);
- goto err_out_now;
- }
- BUG_ON(blocksize != sb->s_blocksize);
- vol->nr_blocks = sb_bdev_nr_blocks(sb);
- ntfs_debug("Changed device block size to %i bytes (block size "
- "bits %i) to match volume sector size.",
- blocksize, sb->s_blocksize_bits);
- }
- /* Initialize the cluster and mft allocators. */
- ntfs_setup_allocators(vol);
- /* Setup remaining fields in the super block. */
- sb->s_magic = NTFS_SB_MAGIC;
- /*
- * Ntfs allows 63 bits for the file size, i.e. correct would be:
- * sb->s_maxbytes = ~0ULL >> 1;
- * But the kernel uses a long as the page cache page index which on
- * 32-bit architectures is only 32-bits. MAX_LFS_FILESIZE is kernel
- * defined to the maximum the page cache page index can cope with
- * without overflowing the index or to 2^63 - 1, whichever is smaller.
- */
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- /* Ntfs measures time in 100ns intervals. */
- sb->s_time_gran = 100;
- /*
- * Now load the metadata required for the page cache and our address
- * space operations to function. We do this by setting up a specialised
- * read_inode method and then just calling the normal iget() to obtain
- * the inode for $MFT which is sufficient to allow our normal inode
- * operations and associated address space operations to function.
- */
- sb->s_op = &ntfs_sops;
- tmp_ino = new_inode(sb);
- if (!tmp_ino) {
- if (!silent)
- ntfs_error(sb, "Failed to load essential metadata.");
- goto err_out_now;
- }
- tmp_ino->i_ino = FILE_MFT;
- insert_inode_hash(tmp_ino);
- if (ntfs_read_inode_mount(tmp_ino) < 0) {
- if (!silent)
- ntfs_error(sb, "Failed to load essential metadata.");
- goto iput_tmp_ino_err_out_now;
- }
- mutex_lock(&ntfs_lock);
- /*
- * The current mount is a compression user if the cluster size is
- * less than or equal 4kiB.
- */
- if (vol->cluster_size <= 4096 && !ntfs_nr_compression_users++) {
- result = allocate_compression_buffers();
- if (result) {
- ntfs_error(NULL, "Failed to allocate buffers "
- "for compression engine.");
- ntfs_nr_compression_users--;
- mutex_unlock(&ntfs_lock);
- goto iput_tmp_ino_err_out_now;
- }
- }
- /*
- * Generate the global default upcase table if necessary. Also
- * temporarily increment the number of upcase users to avoid race
- * conditions with concurrent (u)mounts.
- */
- if (!default_upcase)
- default_upcase = generate_default_upcase();
- ntfs_nr_upcase_users++;
- mutex_unlock(&ntfs_lock);
- /*
- * From now on, ignore @silent parameter. If we fail below this line,
- * it will be due to a corrupt fs or a system error, so we report it.
- */
- /*
- * Open the system files with normal access functions and complete
- * setting up the ntfs super block.
- */
- if (!load_system_files(vol)) {
- ntfs_error(sb, "Failed to load system files.");
- goto unl_upcase_iput_tmp_ino_err_out_now;
- }
-
- /* We grab a reference, simulating an ntfs_iget(). */
- ihold(vol->root_ino);
- if ((sb->s_root = d_make_root(vol->root_ino))) {
- ntfs_debug("Exiting, status successful.");
- /* Release the default upcase if it has no users. */
- mutex_lock(&ntfs_lock);
- if (!--ntfs_nr_upcase_users && default_upcase) {
- ntfs_free(default_upcase);
- default_upcase = NULL;
- }
- mutex_unlock(&ntfs_lock);
- sb->s_export_op = &ntfs_export_ops;
- lockdep_on();
- return 0;
- }
- ntfs_error(sb, "Failed to allocate root directory.");
- /* Clean up after the successful load_system_files() call from above. */
- // TODO: Use ntfs_put_super() instead of repeating all this code...
- // FIXME: Should mark the volume clean as the error is most likely
- // -ENOMEM.
- iput(vol->vol_ino);
- vol->vol_ino = NULL;
- /* NTFS 3.0+ specific clean up. */
- if (vol->major_ver >= 3) {
-#ifdef NTFS_RW
- if (vol->usnjrnl_j_ino) {
- iput(vol->usnjrnl_j_ino);
- vol->usnjrnl_j_ino = NULL;
- }
- if (vol->usnjrnl_max_ino) {
- iput(vol->usnjrnl_max_ino);
- vol->usnjrnl_max_ino = NULL;
- }
- if (vol->usnjrnl_ino) {
- iput(vol->usnjrnl_ino);
- vol->usnjrnl_ino = NULL;
- }
- if (vol->quota_q_ino) {
- iput(vol->quota_q_ino);
- vol->quota_q_ino = NULL;
- }
- if (vol->quota_ino) {
- iput(vol->quota_ino);
- vol->quota_ino = NULL;
- }
-#endif /* NTFS_RW */
- if (vol->extend_ino) {
- iput(vol->extend_ino);
- vol->extend_ino = NULL;
- }
- if (vol->secure_ino) {
- iput(vol->secure_ino);
- vol->secure_ino = NULL;
- }
- }
- iput(vol->root_ino);
- vol->root_ino = NULL;
- iput(vol->lcnbmp_ino);
- vol->lcnbmp_ino = NULL;
- iput(vol->mftbmp_ino);
- vol->mftbmp_ino = NULL;
-#ifdef NTFS_RW
- if (vol->logfile_ino) {
- iput(vol->logfile_ino);
- vol->logfile_ino = NULL;
- }
- if (vol->mftmirr_ino) {
- iput(vol->mftmirr_ino);
- vol->mftmirr_ino = NULL;
- }
-#endif /* NTFS_RW */
- /* Throw away the table of attribute definitions. */
- vol->attrdef_size = 0;
- if (vol->attrdef) {
- ntfs_free(vol->attrdef);
- vol->attrdef = NULL;
- }
- vol->upcase_len = 0;
- mutex_lock(&ntfs_lock);
- if (vol->upcase == default_upcase) {
- ntfs_nr_upcase_users--;
- vol->upcase = NULL;
- }
- mutex_unlock(&ntfs_lock);
- if (vol->upcase) {
- ntfs_free(vol->upcase);
- vol->upcase = NULL;
- }
- if (vol->nls_map) {
- unload_nls(vol->nls_map);
- vol->nls_map = NULL;
- }
- /* Error exit code path. */
-unl_upcase_iput_tmp_ino_err_out_now:
- /*
- * Decrease the number of upcase users and destroy the global default
- * upcase table if necessary.
- */
- mutex_lock(&ntfs_lock);
- if (!--ntfs_nr_upcase_users && default_upcase) {
- ntfs_free(default_upcase);
- default_upcase = NULL;
- }
- if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users)
- free_compression_buffers();
- mutex_unlock(&ntfs_lock);
-iput_tmp_ino_err_out_now:
- iput(tmp_ino);
- if (vol->mft_ino && vol->mft_ino != tmp_ino)
- iput(vol->mft_ino);
- vol->mft_ino = NULL;
- /* Errors at this stage are irrelevant. */
-err_out_now:
- sb->s_fs_info = NULL;
- kfree(vol);
- ntfs_debug("Failed, returning -EINVAL.");
- lockdep_on();
- return -EINVAL;
-}
-
-/*
- * This is a slab cache to optimize allocations and deallocations of Unicode
- * strings of the maximum length allowed by NTFS, which is NTFS_MAX_NAME_LEN
- * (255) Unicode characters + a terminating NULL Unicode character.
- */
-struct kmem_cache *ntfs_name_cache;
-
-/* Slab caches for efficient allocation/deallocation of inodes. */
-struct kmem_cache *ntfs_inode_cache;
-struct kmem_cache *ntfs_big_inode_cache;
-
-/* Init once constructor for the inode slab cache. */
-static void ntfs_big_inode_init_once(void *foo)
-{
- ntfs_inode *ni = (ntfs_inode *)foo;
-
- inode_init_once(VFS_I(ni));
-}
-
-/*
- * Slab caches to optimize allocations and deallocations of attribute search
- * contexts and index contexts, respectively.
- */
-struct kmem_cache *ntfs_attr_ctx_cache;
-struct kmem_cache *ntfs_index_ctx_cache;
-
-/* Driver wide mutex. */
-DEFINE_MUTEX(ntfs_lock);
-
-static struct dentry *ntfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
-{
- return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
-}
-
-static struct file_system_type ntfs_fs_type = {
- .owner = THIS_MODULE,
- .name = "ntfs",
- .mount = ntfs_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-MODULE_ALIAS_FS("ntfs");
-
-/* Stable names for the slab caches. */
-static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache";
-static const char ntfs_attr_ctx_cache_name[] = "ntfs_attr_ctx_cache";
-static const char ntfs_name_cache_name[] = "ntfs_name_cache";
-static const char ntfs_inode_cache_name[] = "ntfs_inode_cache";
-static const char ntfs_big_inode_cache_name[] = "ntfs_big_inode_cache";
-
-static int __init init_ntfs_fs(void)
-{
- int err = 0;
-
- /* This may be ugly but it results in pretty output so who cares. (-8 */
- pr_info("driver " NTFS_VERSION " [Flags: R/"
-#ifdef NTFS_RW
- "W"
-#else
- "O"
-#endif
-#ifdef DEBUG
- " DEBUG"
-#endif
-#ifdef MODULE
- " MODULE"
-#endif
- "].\n");
-
- ntfs_debug("Debug messages are enabled.");
-
- ntfs_index_ctx_cache = kmem_cache_create(ntfs_index_ctx_cache_name,
- sizeof(ntfs_index_context), 0 /* offset */,
- SLAB_HWCACHE_ALIGN, NULL /* ctor */);
- if (!ntfs_index_ctx_cache) {
- pr_crit("Failed to create %s!\n", ntfs_index_ctx_cache_name);
- goto ictx_err_out;
- }
- ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name,
- sizeof(ntfs_attr_search_ctx), 0 /* offset */,
- SLAB_HWCACHE_ALIGN, NULL /* ctor */);
- if (!ntfs_attr_ctx_cache) {
- pr_crit("NTFS: Failed to create %s!\n",
- ntfs_attr_ctx_cache_name);
- goto actx_err_out;
- }
-
- ntfs_name_cache = kmem_cache_create(ntfs_name_cache_name,
- (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!ntfs_name_cache) {
- pr_crit("Failed to create %s!\n", ntfs_name_cache_name);
- goto name_err_out;
- }
-
- ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name,
- sizeof(ntfs_inode), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
- if (!ntfs_inode_cache) {
- pr_crit("Failed to create %s!\n", ntfs_inode_cache_name);
- goto inode_err_out;
- }
-
- ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name,
- sizeof(big_ntfs_inode), 0,
- SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
- SLAB_ACCOUNT, ntfs_big_inode_init_once);
- if (!ntfs_big_inode_cache) {
- pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name);
- goto big_inode_err_out;
- }
-
- /* Register the ntfs sysctls. */
- err = ntfs_sysctl(1);
- if (err) {
- pr_crit("Failed to register NTFS sysctls!\n");
- goto sysctl_err_out;
- }
-
- err = register_filesystem(&ntfs_fs_type);
- if (!err) {
- ntfs_debug("NTFS driver registered successfully.");
- return 0; /* Success! */
- }
- pr_crit("Failed to register NTFS filesystem driver!\n");
-
- /* Unregister the ntfs sysctls. */
- ntfs_sysctl(0);
-sysctl_err_out:
- kmem_cache_destroy(ntfs_big_inode_cache);
-big_inode_err_out:
- kmem_cache_destroy(ntfs_inode_cache);
-inode_err_out:
- kmem_cache_destroy(ntfs_name_cache);
-name_err_out:
- kmem_cache_destroy(ntfs_attr_ctx_cache);
-actx_err_out:
- kmem_cache_destroy(ntfs_index_ctx_cache);
-ictx_err_out:
- if (!err) {
- pr_crit("Aborting NTFS filesystem driver registration...\n");
- err = -ENOMEM;
- }
- return err;
-}
-
-static void __exit exit_ntfs_fs(void)
-{
- ntfs_debug("Unregistering NTFS driver.");
-
- unregister_filesystem(&ntfs_fs_type);
-
- /*
- * Make sure all delayed rcu free inodes are flushed before we
- * destroy cache.
- */
- rcu_barrier();
- kmem_cache_destroy(ntfs_big_inode_cache);
- kmem_cache_destroy(ntfs_inode_cache);
- kmem_cache_destroy(ntfs_name_cache);
- kmem_cache_destroy(ntfs_attr_ctx_cache);
- kmem_cache_destroy(ntfs_index_ctx_cache);
- /* Unregister the ntfs sysctls. */
- ntfs_sysctl(0);
-}
-
-MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>");
-MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.");
-MODULE_VERSION(NTFS_VERSION);
-MODULE_LICENSE("GPL");
-#ifdef DEBUG
-module_param(debug_msgs, bint, 0);
-MODULE_PARM_DESC(debug_msgs, "Enable debug messages.");
-#endif
-
-module_init(init_ntfs_fs)
-module_exit(exit_ntfs_fs)
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
deleted file mode 100644
index 4e980170d86a..000000000000
--- a/fs/ntfs/sysctl.c
+++ /dev/null
@@ -1,58 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * sysctl.c - Code for sysctl handling in NTFS Linux kernel driver. Part of
- * the Linux-NTFS project. Adapted from the old NTFS driver,
- * Copyright (C) 1997 Martin von Löwis, Régis Duchesne
- *
- * Copyright (c) 2002-2005 Anton Altaparmakov
- */
-
-#ifdef DEBUG
-
-#include <linux/module.h>
-
-#ifdef CONFIG_SYSCTL
-
-#include <linux/proc_fs.h>
-#include <linux/sysctl.h>
-
-#include "sysctl.h"
-#include "debug.h"
-
-/* Definition of the ntfs sysctl. */
-static struct ctl_table ntfs_sysctls[] = {
- {
- .procname = "ntfs-debug",
- .data = &debug_msgs, /* Data pointer and size. */
- .maxlen = sizeof(debug_msgs),
- .mode = 0644, /* Mode, proc handler. */
- .proc_handler = proc_dointvec
- },
-};
-
-/* Storage for the sysctls header. */
-static struct ctl_table_header *sysctls_root_table;
-
-/**
- * ntfs_sysctl - add or remove the debug sysctl
- * @add: add (1) or remove (0) the sysctl
- *
- * Add or remove the debug sysctl. Return 0 on success or -errno on error.
- */
-int ntfs_sysctl(int add)
-{
- if (add) {
- BUG_ON(sysctls_root_table);
- sysctls_root_table = register_sysctl("fs", ntfs_sysctls);
- if (!sysctls_root_table)
- return -ENOMEM;
- } else {
- BUG_ON(!sysctls_root_table);
- unregister_sysctl_table(sysctls_root_table);
- sysctls_root_table = NULL;
- }
- return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-#endif /* DEBUG */
diff --git a/fs/ntfs/sysctl.h b/fs/ntfs/sysctl.h
deleted file mode 100644
index 96bb2299d2d5..000000000000
--- a/fs/ntfs/sysctl.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * sysctl.h - Defines for sysctl handling in NTFS Linux kernel driver. Part of
- * the Linux-NTFS project. Adapted from the old NTFS driver,
- * Copyright (C) 1997 Martin von Löwis, Régis Duchesne
- *
- * Copyright (c) 2002-2004 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_SYSCTL_H
-#define _LINUX_NTFS_SYSCTL_H
-
-
-#if defined(DEBUG) && defined(CONFIG_SYSCTL)
-
-extern int ntfs_sysctl(int add);
-
-#else
-
-/* Just return success. */
-static inline int ntfs_sysctl(int add)
-{
- return 0;
-}
-
-#endif /* DEBUG && CONFIG_SYSCTL */
-#endif /* _LINUX_NTFS_SYSCTL_H */
diff --git a/fs/ntfs/time.h b/fs/ntfs/time.h
deleted file mode 100644
index 6b63261300cc..000000000000
--- a/fs/ntfs/time.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * time.h - NTFS time conversion functions. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2005 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_TIME_H
-#define _LINUX_NTFS_TIME_H
-
-#include <linux/time.h> /* For current_kernel_time(). */
-#include <asm/div64.h> /* For do_div(). */
-
-#include "endian.h"
-
-#define NTFS_TIME_OFFSET ((s64)(369 * 365 + 89) * 24 * 3600 * 10000000)
-
-/**
- * utc2ntfs - convert Linux UTC time to NTFS time
- * @ts: Linux UTC time to convert to NTFS time
- *
- * Convert the Linux UTC time @ts to its corresponding NTFS time and return
- * that in little endian format.
- *
- * Linux stores time in a struct timespec64 consisting of a time64_t tv_sec
- * and a long tv_nsec where tv_sec is the number of 1-second intervals since
- * 1st January 1970, 00:00:00 UTC and tv_nsec is the number of 1-nano-second
- * intervals since the value of tv_sec.
- *
- * NTFS uses Microsoft's standard time format which is stored in a s64 and is
- * measured as the number of 100-nano-second intervals since 1st January 1601,
- * 00:00:00 UTC.
- */
-static inline sle64 utc2ntfs(const struct timespec64 ts)
-{
- /*
- * Convert the seconds to 100ns intervals, add the nano-seconds
- * converted to 100ns intervals, and then add the NTFS time offset.
- */
- return cpu_to_sle64((s64)ts.tv_sec * 10000000 + ts.tv_nsec / 100 +
- NTFS_TIME_OFFSET);
-}
-
-/**
- * get_current_ntfs_time - get the current time in little endian NTFS format
- *
- * Get the current time from the Linux kernel, convert it to its corresponding
- * NTFS time and return that in little endian format.
- */
-static inline sle64 get_current_ntfs_time(void)
-{
- struct timespec64 ts;
-
- ktime_get_coarse_real_ts64(&ts);
- return utc2ntfs(ts);
-}
-
-/**
- * ntfs2utc - convert NTFS time to Linux time
- * @time: NTFS time (little endian) to convert to Linux UTC
- *
- * Convert the little endian NTFS time @time to its corresponding Linux UTC
- * time and return that in cpu format.
- *
- * Linux stores time in a struct timespec64 consisting of a time64_t tv_sec
- * and a long tv_nsec where tv_sec is the number of 1-second intervals since
- * 1st January 1970, 00:00:00 UTC and tv_nsec is the number of 1-nano-second
- * intervals since the value of tv_sec.
- *
- * NTFS uses Microsoft's standard time format which is stored in a s64 and is
- * measured as the number of 100 nano-second intervals since 1st January 1601,
- * 00:00:00 UTC.
- */
-static inline struct timespec64 ntfs2utc(const sle64 time)
-{
- struct timespec64 ts;
-
- /* Subtract the NTFS time offset. */
- u64 t = (u64)(sle64_to_cpu(time) - NTFS_TIME_OFFSET);
- /*
- * Convert the time to 1-second intervals and the remainder to
- * 1-nano-second intervals.
- */
- ts.tv_nsec = do_div(t, 10000000) * 100;
- ts.tv_sec = t;
- return ts;
-}
-
-#endif /* _LINUX_NTFS_TIME_H */
diff --git a/fs/ntfs/types.h b/fs/ntfs/types.h
deleted file mode 100644
index 9a47859e7a06..000000000000
--- a/fs/ntfs/types.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * types.h - Defines for NTFS Linux kernel driver specific types.
- * Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2005 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_TYPES_H
-#define _LINUX_NTFS_TYPES_H
-
-#include <linux/types.h>
-
-typedef __le16 le16;
-typedef __le32 le32;
-typedef __le64 le64;
-typedef __u16 __bitwise sle16;
-typedef __u32 __bitwise sle32;
-typedef __u64 __bitwise sle64;
-
-/* 2-byte Unicode character type. */
-typedef le16 ntfschar;
-#define UCHAR_T_SIZE_BITS 1
-
-/*
- * Clusters are signed 64-bit values on NTFS volumes. We define two types, LCN
- * and VCN, to allow for type checking and better code readability.
- */
-typedef s64 VCN;
-typedef sle64 leVCN;
-typedef s64 LCN;
-typedef sle64 leLCN;
-
-/*
- * The NTFS journal $LogFile uses log sequence numbers which are signed 64-bit
- * values. We define our own type LSN, to allow for type checking and better
- * code readability.
- */
-typedef s64 LSN;
-typedef sle64 leLSN;
-
-/*
- * The NTFS transaction log $UsnJrnl uses usn which are signed 64-bit values.
- * We define our own type USN, to allow for type checking and better code
- * readability.
- */
-typedef s64 USN;
-typedef sle64 leUSN;
-
-typedef enum {
- CASE_SENSITIVE = 0,
- IGNORE_CASE = 1,
-} IGNORE_CASE_BOOL;
-
-#endif /* _LINUX_NTFS_TYPES_H */
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
deleted file mode 100644
index a6b6c64f14a9..000000000000
--- a/fs/ntfs/unistr.c
+++ /dev/null
@@ -1,384 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2006 Anton Altaparmakov
- */
-
-#include <linux/slab.h>
-
-#include "types.h"
-#include "debug.h"
-#include "ntfs.h"
-
-/*
- * IMPORTANT
- * =========
- *
- * All these routines assume that the Unicode characters are in little endian
- * encoding inside the strings!!!
- */
-
-/*
- * This is used by the name collation functions to quickly determine what
- * characters are (in)valid.
- */
-static const u8 legal_ansi_char_array[0x40] = {
- 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
- 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-
- 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
- 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-
- 0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
- 0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
-
- 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
- 0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
-};
-
-/**
- * ntfs_are_names_equal - compare two Unicode names for equality
- * @s1: name to compare to @s2
- * @s1_len: length in Unicode characters of @s1
- * @s2: name to compare to @s1
- * @s2_len: length in Unicode characters of @s2
- * @ic: ignore case bool
- * @upcase: upcase table (only if @ic == IGNORE_CASE)
- * @upcase_size: length in Unicode characters of @upcase (if present)
- *
- * Compare the names @s1 and @s2 and return 'true' (1) if the names are
- * identical, or 'false' (0) if they are not identical. If @ic is IGNORE_CASE,
- * the @upcase table is used to performa a case insensitive comparison.
- */
-bool ntfs_are_names_equal(const ntfschar *s1, size_t s1_len,
- const ntfschar *s2, size_t s2_len, const IGNORE_CASE_BOOL ic,
- const ntfschar *upcase, const u32 upcase_size)
-{
- if (s1_len != s2_len)
- return false;
- if (ic == CASE_SENSITIVE)
- return !ntfs_ucsncmp(s1, s2, s1_len);
- return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size);
-}
-
-/**
- * ntfs_collate_names - collate two Unicode names
- * @name1: first Unicode name to compare
- * @name2: second Unicode name to compare
- * @err_val: if @name1 contains an invalid character return this value
- * @ic: either CASE_SENSITIVE or IGNORE_CASE
- * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE)
- * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE)
- *
- * ntfs_collate_names collates two Unicode names and returns:
- *
- * -1 if the first name collates before the second one,
- * 0 if the names match,
- * 1 if the second name collates before the first one, or
- * @err_val if an invalid character is found in @name1 during the comparison.
- *
- * The following characters are considered invalid: '"', '*', '<', '>' and '?'.
- */
-int ntfs_collate_names(const ntfschar *name1, const u32 name1_len,
- const ntfschar *name2, const u32 name2_len,
- const int err_val, const IGNORE_CASE_BOOL ic,
- const ntfschar *upcase, const u32 upcase_len)
-{
- u32 cnt, min_len;
- u16 c1, c2;
-
- min_len = name1_len;
- if (name1_len > name2_len)
- min_len = name2_len;
- for (cnt = 0; cnt < min_len; ++cnt) {
- c1 = le16_to_cpu(*name1++);
- c2 = le16_to_cpu(*name2++);
- if (ic) {
- if (c1 < upcase_len)
- c1 = le16_to_cpu(upcase[c1]);
- if (c2 < upcase_len)
- c2 = le16_to_cpu(upcase[c2]);
- }
- if (c1 < 64 && legal_ansi_char_array[c1] & 8)
- return err_val;
- if (c1 < c2)
- return -1;
- if (c1 > c2)
- return 1;
- }
- if (name1_len < name2_len)
- return -1;
- if (name1_len == name2_len)
- return 0;
- /* name1_len > name2_len */
- c1 = le16_to_cpu(*name1);
- if (c1 < 64 && legal_ansi_char_array[c1] & 8)
- return err_val;
- return 1;
-}
-
-/**
- * ntfs_ucsncmp - compare two little endian Unicode strings
- * @s1: first string
- * @s2: second string
- * @n: maximum unicode characters to compare
- *
- * Compare the first @n characters of the Unicode strings @s1 and @s2,
- * The strings in little endian format and appropriate le16_to_cpu()
- * conversion is performed on non-little endian machines.
- *
- * The function returns an integer less than, equal to, or greater than zero
- * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
- * to be less than, to match, or be greater than @s2.
- */
-int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)
-{
- u16 c1, c2;
- size_t i;
-
- for (i = 0; i < n; ++i) {
- c1 = le16_to_cpu(s1[i]);
- c2 = le16_to_cpu(s2[i]);
- if (c1 < c2)
- return -1;
- if (c1 > c2)
- return 1;
- if (!c1)
- break;
- }
- return 0;
-}
-
-/**
- * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
- * @s1: first string
- * @s2: second string
- * @n: maximum unicode characters to compare
- * @upcase: upcase table
- * @upcase_size: upcase table size in Unicode characters
- *
- * Compare the first @n characters of the Unicode strings @s1 and @s2,
- * ignoring case. The strings in little endian format and appropriate
- * le16_to_cpu() conversion is performed on non-little endian machines.
- *
- * Each character is uppercased using the @upcase table before the comparison.
- *
- * The function returns an integer less than, equal to, or greater than zero
- * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
- * to be less than, to match, or be greater than @s2.
- */
-int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
- const ntfschar *upcase, const u32 upcase_size)
-{
- size_t i;
- u16 c1, c2;
-
- for (i = 0; i < n; ++i) {
- if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
- c1 = le16_to_cpu(upcase[c1]);
- if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
- c2 = le16_to_cpu(upcase[c2]);
- if (c1 < c2)
- return -1;
- if (c1 > c2)
- return 1;
- if (!c1)
- break;
- }
- return 0;
-}
-
-void ntfs_upcase_name(ntfschar *name, u32 name_len, const ntfschar *upcase,
- const u32 upcase_len)
-{
- u32 i;
- u16 u;
-
- for (i = 0; i < name_len; i++)
- if ((u = le16_to_cpu(name[i])) < upcase_len)
- name[i] = upcase[u];
-}
-
-void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr,
- const ntfschar *upcase, const u32 upcase_len)
-{
- ntfs_upcase_name((ntfschar*)&file_name_attr->file_name,
- file_name_attr->file_name_length, upcase, upcase_len);
-}
-
-int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1,
- FILE_NAME_ATTR *file_name_attr2,
- const int err_val, const IGNORE_CASE_BOOL ic,
- const ntfschar *upcase, const u32 upcase_len)
-{
- return ntfs_collate_names((ntfschar*)&file_name_attr1->file_name,
- file_name_attr1->file_name_length,
- (ntfschar*)&file_name_attr2->file_name,
- file_name_attr2->file_name_length,
- err_val, ic, upcase, upcase_len);
-}
-
-/**
- * ntfs_nlstoucs - convert NLS string to little endian Unicode string
- * @vol: ntfs volume which we are working with
- * @ins: input NLS string buffer
- * @ins_len: length of input string in bytes
- * @outs: on return contains the allocated output Unicode string buffer
- *
- * Convert the input string @ins, which is in whatever format the loaded NLS
- * map dictates, into a little endian, 2-byte Unicode string.
- *
- * This function allocates the string and the caller is responsible for
- * calling kmem_cache_free(ntfs_name_cache, *@outs); when finished with it.
- *
- * On success the function returns the number of Unicode characters written to
- * the output string *@outs (>= 0), not counting the terminating Unicode NULL
- * character. *@outs is set to the allocated output string buffer.
- *
- * On error, a negative number corresponding to the error code is returned. In
- * that case the output string is not allocated. Both *@outs and *@outs_len
- * are then undefined.
- *
- * This might look a bit odd due to fast path optimization...
- */
-int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
- const int ins_len, ntfschar **outs)
-{
- struct nls_table *nls = vol->nls_map;
- ntfschar *ucs;
- wchar_t wc;
- int i, o, wc_len;
-
- /* We do not trust outside sources. */
- if (likely(ins)) {
- ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS);
- if (likely(ucs)) {
- for (i = o = 0; i < ins_len; i += wc_len) {
- wc_len = nls->char2uni(ins + i, ins_len - i,
- &wc);
- if (likely(wc_len >= 0 &&
- o < NTFS_MAX_NAME_LEN)) {
- if (likely(wc)) {
- ucs[o++] = cpu_to_le16(wc);
- continue;
- } /* else if (!wc) */
- break;
- } /* else if (wc_len < 0 ||
- o >= NTFS_MAX_NAME_LEN) */
- goto name_err;
- }
- ucs[o] = 0;
- *outs = ucs;
- return o;
- } /* else if (!ucs) */
- ntfs_error(vol->sb, "Failed to allocate buffer for converted "
- "name from ntfs_name_cache.");
- return -ENOMEM;
- } /* else if (!ins) */
- ntfs_error(vol->sb, "Received NULL pointer.");
- return -EINVAL;
-name_err:
- kmem_cache_free(ntfs_name_cache, ucs);
- if (wc_len < 0) {
- ntfs_error(vol->sb, "Name using character set %s contains "
- "characters that cannot be converted to "
- "Unicode.", nls->charset);
- i = -EILSEQ;
- } else /* if (o >= NTFS_MAX_NAME_LEN) */ {
- ntfs_error(vol->sb, "Name is too long (maximum length for a "
- "name on NTFS is %d Unicode characters.",
- NTFS_MAX_NAME_LEN);
- i = -ENAMETOOLONG;
- }
- return i;
-}
-
-/**
- * ntfs_ucstonls - convert little endian Unicode string to NLS string
- * @vol: ntfs volume which we are working with
- * @ins: input Unicode string buffer
- * @ins_len: length of input string in Unicode characters
- * @outs: on return contains the (allocated) output NLS string buffer
- * @outs_len: length of output string buffer in bytes
- *
- * Convert the input little endian, 2-byte Unicode string @ins, of length
- * @ins_len into the string format dictated by the loaded NLS.
- *
- * If *@outs is NULL, this function allocates the string and the caller is
- * responsible for calling kfree(*@outs); when finished with it. In this case
- * @outs_len is ignored and can be 0.
- *
- * On success the function returns the number of bytes written to the output
- * string *@outs (>= 0), not counting the terminating NULL byte. If the output
- * string buffer was allocated, *@outs is set to it.
- *
- * On error, a negative number corresponding to the error code is returned. In
- * that case the output string is not allocated. The contents of *@outs are
- * then undefined.
- *
- * This might look a bit odd due to fast path optimization...
- */
-int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
- const int ins_len, unsigned char **outs, int outs_len)
-{
- struct nls_table *nls = vol->nls_map;
- unsigned char *ns;
- int i, o, ns_len, wc;
-
- /* We don't trust outside sources. */
- if (ins) {
- ns = *outs;
- ns_len = outs_len;
- if (ns && !ns_len) {
- wc = -ENAMETOOLONG;
- goto conversion_err;
- }
- if (!ns) {
- ns_len = ins_len * NLS_MAX_CHARSET_SIZE;
- ns = kmalloc(ns_len + 1, GFP_NOFS);
- if (!ns)
- goto mem_err_out;
- }
- for (i = o = 0; i < ins_len; i++) {
-retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
- ns_len - o);
- if (wc > 0) {
- o += wc;
- continue;
- } else if (!wc)
- break;
- else if (wc == -ENAMETOOLONG && ns != *outs) {
- unsigned char *tc;
- /* Grow in multiples of 64 bytes. */
- tc = kmalloc((ns_len + 64) &
- ~63, GFP_NOFS);
- if (tc) {
- memcpy(tc, ns, ns_len);
- ns_len = ((ns_len + 64) & ~63) - 1;
- kfree(ns);
- ns = tc;
- goto retry;
- } /* No memory so goto conversion_error; */
- } /* wc < 0, real error. */
- goto conversion_err;
- }
- ns[o] = 0;
- *outs = ns;
- return o;
- } /* else (!ins) */
- ntfs_error(vol->sb, "Received NULL pointer.");
- return -EINVAL;
-conversion_err:
- ntfs_error(vol->sb, "Unicode name contains characters that cannot be "
- "converted to character set %s. You might want to "
- "try to use the mount option nls=utf8.", nls->charset);
- if (ns != *outs)
- kfree(ns);
- if (wc != -ENAMETOOLONG)
- wc = -EILSEQ;
- return wc;
-mem_err_out:
- ntfs_error(vol->sb, "Failed to allocate name!");
- return -ENOMEM;
-}
diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c
deleted file mode 100644
index 4ebe84a78dea..000000000000
--- a/fs/ntfs/upcase.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * upcase.c - Generate the full NTFS Unicode upcase table in little endian.
- * Part of the Linux-NTFS project.
- *
- * Copyright (c) 2001 Richard Russon <ntfs@flatcap.org>
- * Copyright (c) 2001-2006 Anton Altaparmakov
- */
-
-#include "malloc.h"
-#include "ntfs.h"
-
-ntfschar *generate_default_upcase(void)
-{
- static const int uc_run_table[][3] = { /* Start, End, Add */
- {0x0061, 0x007B, -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72, 74},
- {0x00E0, 0x00F7, -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76, 86},
- {0x00F8, 0x00FF, -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100},
- {0x0256, 0x0258, -205}, {0x1F00, 0x1F08, 8}, {0x1F78, 0x1F7A, 128},
- {0x028A, 0x028C, -217}, {0x1F10, 0x1F16, 8}, {0x1F7A, 0x1F7C, 112},
- {0x03AC, 0x03AD, -38}, {0x1F20, 0x1F28, 8}, {0x1F7C, 0x1F7E, 126},
- {0x03AD, 0x03B0, -37}, {0x1F30, 0x1F38, 8}, {0x1FB0, 0x1FB2, 8},
- {0x03B1, 0x03C2, -32}, {0x1F40, 0x1F46, 8}, {0x1FD0, 0x1FD2, 8},
- {0x03C2, 0x03C3, -31}, {0x1F51, 0x1F52, 8}, {0x1FE0, 0x1FE2, 8},
- {0x03C3, 0x03CC, -32}, {0x1F53, 0x1F54, 8}, {0x1FE5, 0x1FE6, 7},
- {0x03CC, 0x03CD, -64}, {0x1F55, 0x1F56, 8}, {0x2170, 0x2180, -16},
- {0x03CD, 0x03CF, -63}, {0x1F57, 0x1F58, 8}, {0x24D0, 0x24EA, -26},
- {0x0430, 0x0450, -32}, {0x1F60, 0x1F68, 8}, {0xFF41, 0xFF5B, -32},
- {0}
- };
-
- static const int uc_dup_table[][2] = { /* Start, End */
- {0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC},
- {0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB},
- {0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5},
- {0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9},
- {0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95},
- {0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9},
- {0}
- };
-
- static const int uc_word_table[][2] = { /* Offset, Value */
- {0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196},
- {0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C},
- {0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D},
- {0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F},
- {0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9},
- {0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE},
- {0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7},
- {0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197},
- {0}
- };
-
- int i, r;
- ntfschar *uc;
-
- uc = ntfs_malloc_nofs(default_upcase_len * sizeof(ntfschar));
- if (!uc)
- return uc;
- memset(uc, 0, default_upcase_len * sizeof(ntfschar));
- /* Generate the little endian Unicode upcase table used by ntfs. */
- for (i = 0; i < default_upcase_len; i++)
- uc[i] = cpu_to_le16(i);
- for (r = 0; uc_run_table[r][0]; r++)
- for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
- le16_add_cpu(&uc[i], uc_run_table[r][2]);
- for (r = 0; uc_dup_table[r][0]; r++)
- for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
- le16_add_cpu(&uc[i + 1], -1);
- for (r = 0; uc_word_table[r][0]; r++)
- uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]);
- return uc;
-}
diff --git a/fs/ntfs/usnjrnl.c b/fs/ntfs/usnjrnl.c
deleted file mode 100644
index 9097a0b4ef25..000000000000
--- a/fs/ntfs/usnjrnl.c
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * usnjrnl.h - NTFS kernel transaction log ($UsnJrnl) handling. Part of the
- * Linux-NTFS project.
- *
- * Copyright (c) 2005 Anton Altaparmakov
- */
-
-#ifdef NTFS_RW
-
-#include <linux/fs.h>
-#include <linux/highmem.h>
-#include <linux/mm.h>
-
-#include "aops.h"
-#include "debug.h"
-#include "endian.h"
-#include "time.h"
-#include "types.h"
-#include "usnjrnl.h"
-#include "volume.h"
-
-/**
- * ntfs_stamp_usnjrnl - stamp the transaction log ($UsnJrnl) on an ntfs volume
- * @vol: ntfs volume on which to stamp the transaction log
- *
- * Stamp the transaction log ($UsnJrnl) on the ntfs volume @vol and return
- * 'true' on success and 'false' on error.
- *
- * This function assumes that the transaction log has already been loaded and
- * consistency checked by a call to fs/ntfs/super.c::load_and_init_usnjrnl().
- */
-bool ntfs_stamp_usnjrnl(ntfs_volume *vol)
-{
- ntfs_debug("Entering.");
- if (likely(!NVolUsnJrnlStamped(vol))) {
- sle64 stamp;
- struct page *page;
- USN_HEADER *uh;
-
- page = ntfs_map_page(vol->usnjrnl_max_ino->i_mapping, 0);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Failed to read from "
- "$UsnJrnl/$DATA/$Max attribute.");
- return false;
- }
- uh = (USN_HEADER*)page_address(page);
- stamp = get_current_ntfs_time();
- ntfs_debug("Stamping transaction log ($UsnJrnl): old "
- "journal_id 0x%llx, old lowest_valid_usn "
- "0x%llx, new journal_id 0x%llx, new "
- "lowest_valid_usn 0x%llx.",
- (long long)sle64_to_cpu(uh->journal_id),
- (long long)sle64_to_cpu(uh->lowest_valid_usn),
- (long long)sle64_to_cpu(stamp),
- i_size_read(vol->usnjrnl_j_ino));
- uh->lowest_valid_usn =
- cpu_to_sle64(i_size_read(vol->usnjrnl_j_ino));
- uh->journal_id = stamp;
- flush_dcache_page(page);
- set_page_dirty(page);
- ntfs_unmap_page(page);
- /* Set the flag so we do not have to do it again on remount. */
- NVolSetUsnJrnlStamped(vol);
- }
- ntfs_debug("Done.");
- return true;
-}
-
-#endif /* NTFS_RW */
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
deleted file mode 100644
index 85f531b59395..000000000000
--- a/fs/ntfs/usnjrnl.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * usnjrnl.h - Defines for NTFS kernel transaction log ($UsnJrnl) handling.
- * Part of the Linux-NTFS project.
- *
- * Copyright (c) 2005 Anton Altaparmakov
- */
-
-#ifndef _LINUX_NTFS_USNJRNL_H
-#define _LINUX_NTFS_USNJRNL_H
-
-#ifdef NTFS_RW
-
-#include "types.h"
-#include "endian.h"
-#include "layout.h"
-#include "volume.h"
-
-/*
- * Transaction log ($UsnJrnl) organization:
- *
- * The transaction log records whenever a file is modified in any way. So for
- * example it will record that file "blah" was written to at a particular time
- * but not what was written. If will record that a file was deleted or
- * created, that a file was truncated, etc. See below for all the reason
- * codes used.
- *
- * The transaction log is in the $Extend directory which is in the root
- * directory of each volume. If it is not present it means transaction
- * logging is disabled. If it is present it means transaction logging is
- * either enabled or in the process of being disabled in which case we can
- * ignore it as it will go away as soon as Windows gets its hands on it.
- *
- * To determine whether the transaction logging is enabled or in the process
- * of being disabled, need to check the volume flags in the
- * $VOLUME_INFORMATION attribute in the $Volume system file (which is present
- * in the root directory and has a fixed mft record number, see layout.h).
- * If the flag VOLUME_DELETE_USN_UNDERWAY is set it means the transaction log
- * is in the process of being disabled and if this flag is clear it means the
- * transaction log is enabled.
- *
- * The transaction log consists of two parts; the $DATA/$Max attribute as well
- * as the $DATA/$J attribute. $Max is a header describing the transaction
- * log whilst $J is the transaction log data itself as a sequence of variable
- * sized USN_RECORDs (see below for all the structures).
- *
- * We do not care about transaction logging at this point in time but we still
- * need to let windows know that the transaction log is out of date. To do
- * this we need to stamp the transaction log. This involves setting the
- * lowest_valid_usn field in the $DATA/$Max attribute to the usn to be used
- * for the next added USN_RECORD to the $DATA/$J attribute as well as
- * generating a new journal_id in $DATA/$Max.
- *
- * The journal_id is as of the current version (2.0) of the transaction log
- * simply the 64-bit timestamp of when the journal was either created or last
- * stamped.
- *
- * To determine the next usn there are two ways. The first is to parse
- * $DATA/$J and to find the last USN_RECORD in it and to add its record_length
- * to its usn (which is the byte offset in the $DATA/$J attribute). The
- * second is simply to take the data size of the attribute. Since the usns
- * are simply byte offsets into $DATA/$J, this is exactly the next usn. For
- * obvious reasons we use the second method as it is much simpler and faster.
- *
- * As an aside, note that to actually disable the transaction log, one would
- * need to set the VOLUME_DELETE_USN_UNDERWAY flag (see above), then go
- * through all the mft records on the volume and set the usn field in their
- * $STANDARD_INFORMATION attribute to zero. Once that is done, one would need
- * to delete the transaction log file, i.e. \$Extent\$UsnJrnl, and finally,
- * one would need to clear the VOLUME_DELETE_USN_UNDERWAY flag.
- *
- * Note that if a volume is unmounted whilst the transaction log is being
- * disabled, the process will continue the next time the volume is mounted.
- * This is why we can safely mount read-write when we see a transaction log
- * in the process of being deleted.
- */
-
-/* Some $UsnJrnl related constants. */
-#define UsnJrnlMajorVer 2
-#define UsnJrnlMinorVer 0
-
-/*
- * $DATA/$Max attribute. This is (always?) resident and has a fixed size of
- * 32 bytes. It contains the header describing the transaction log.
- */
-typedef struct {
-/*Ofs*/
-/* 0*/sle64 maximum_size; /* The maximum on-disk size of the $DATA/$J
- attribute. */
-/* 8*/sle64 allocation_delta; /* Number of bytes by which to increase the
- size of the $DATA/$J attribute. */
-/*0x10*/sle64 journal_id; /* Current id of the transaction log. */
-/*0x18*/leUSN lowest_valid_usn; /* Lowest valid usn in $DATA/$J for the
- current journal_id. */
-/* sizeof() = 32 (0x20) bytes */
-} __attribute__ ((__packed__)) USN_HEADER;
-
-/*
- * Reason flags (32-bit). Cumulative flags describing the change(s) to the
- * file since it was last opened. I think the names speak for themselves but
- * if you disagree check out the descriptions in the Linux NTFS project NTFS
- * documentation: http://www.linux-ntfs.org/
- */
-enum {
- USN_REASON_DATA_OVERWRITE = cpu_to_le32(0x00000001),
- USN_REASON_DATA_EXTEND = cpu_to_le32(0x00000002),
- USN_REASON_DATA_TRUNCATION = cpu_to_le32(0x00000004),
- USN_REASON_NAMED_DATA_OVERWRITE = cpu_to_le32(0x00000010),
- USN_REASON_NAMED_DATA_EXTEND = cpu_to_le32(0x00000020),
- USN_REASON_NAMED_DATA_TRUNCATION= cpu_to_le32(0x00000040),
- USN_REASON_FILE_CREATE = cpu_to_le32(0x00000100),
- USN_REASON_FILE_DELETE = cpu_to_le32(0x00000200),
- USN_REASON_EA_CHANGE = cpu_to_le32(0x00000400),
- USN_REASON_SECURITY_CHANGE = cpu_to_le32(0x00000800),
- USN_REASON_RENAME_OLD_NAME = cpu_to_le32(0x00001000),
- USN_REASON_RENAME_NEW_NAME = cpu_to_le32(0x00002000),
- USN_REASON_INDEXABLE_CHANGE = cpu_to_le32(0x00004000),
- USN_REASON_BASIC_INFO_CHANGE = cpu_to_le32(0x00008000),
- USN_REASON_HARD_LINK_CHANGE = cpu_to_le32(0x00010000),
- USN_REASON_COMPRESSION_CHANGE = cpu_to_le32(0x00020000),
- USN_REASON_ENCRYPTION_CHANGE = cpu_to_le32(0x00040000),
- USN_REASON_OBJECT_ID_CHANGE = cpu_to_le32(0x00080000),
- USN_REASON_REPARSE_POINT_CHANGE = cpu_to_le32(0x00100000),
- USN_REASON_STREAM_CHANGE = cpu_to_le32(0x00200000),
- USN_REASON_CLOSE = cpu_to_le32(0x80000000),
-};
-
-typedef le32 USN_REASON_FLAGS;
-
-/*
- * Source info flags (32-bit). Information about the source of the change(s)
- * to the file. For detailed descriptions of what these mean, see the Linux
- * NTFS project NTFS documentation:
- * http://www.linux-ntfs.org/
- */
-enum {
- USN_SOURCE_DATA_MANAGEMENT = cpu_to_le32(0x00000001),
- USN_SOURCE_AUXILIARY_DATA = cpu_to_le32(0x00000002),
- USN_SOURCE_REPLICATION_MANAGEMENT = cpu_to_le32(0x00000004),
-};
-
-typedef le32 USN_SOURCE_INFO_FLAGS;
-
-/*
- * $DATA/$J attribute. This is always non-resident, is marked as sparse, and
- * is of variabled size. It consists of a sequence of variable size
- * USN_RECORDS. The minimum allocated_size is allocation_delta as
- * specified in $DATA/$Max. When the maximum_size specified in $DATA/$Max is
- * exceeded by more than allocation_delta bytes, allocation_delta bytes are
- * allocated and appended to the $DATA/$J attribute and an equal number of
- * bytes at the beginning of the attribute are freed and made sparse. Note the
- * making sparse only happens at volume checkpoints and hence the actual
- * $DATA/$J size can exceed maximum_size + allocation_delta temporarily.
- */
-typedef struct {
-/*Ofs*/
-/* 0*/le32 length; /* Byte size of this record (8-byte
- aligned). */
-/* 4*/le16 major_ver; /* Major version of the transaction log used
- for this record. */
-/* 6*/le16 minor_ver; /* Minor version of the transaction log used
- for this record. */
-/* 8*/leMFT_REF mft_reference;/* The mft reference of the file (or
- directory) described by this record. */
-/*0x10*/leMFT_REF parent_directory;/* The mft reference of the parent
- directory of the file described by this
- record. */
-/*0x18*/leUSN usn; /* The usn of this record. Equals the offset
- within the $DATA/$J attribute. */
-/*0x20*/sle64 time; /* Time when this record was created. */
-/*0x28*/USN_REASON_FLAGS reason;/* Reason flags (see above). */
-/*0x2c*/USN_SOURCE_INFO_FLAGS source_info;/* Source info flags (see above). */
-/*0x30*/le32 security_id; /* File security_id copied from
- $STANDARD_INFORMATION. */
-/*0x34*/FILE_ATTR_FLAGS file_attributes; /* File attributes copied from
- $STANDARD_INFORMATION or $FILE_NAME (not
- sure which). */
-/*0x38*/le16 file_name_size; /* Size of the file name in bytes. */
-/*0x3a*/le16 file_name_offset; /* Offset to the file name in bytes from the
- start of this record. */
-/*0x3c*/ntfschar file_name[0]; /* Use when creating only. When reading use
- file_name_offset to determine the location
- of the name. */
-/* sizeof() = 60 (0x3c) bytes */
-} __attribute__ ((__packed__)) USN_RECORD;
-
-extern bool ntfs_stamp_usnjrnl(ntfs_volume *vol);
-
-#endif /* NTFS_RW */
-
-#endif /* _LINUX_NTFS_USNJRNL_H */
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
deleted file mode 100644
index 930a9ae8a053..000000000000
--- a/fs/ntfs/volume.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part
- * of the Linux-NTFS project.
- *
- * Copyright (c) 2001-2006 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
- */
-
-#ifndef _LINUX_NTFS_VOLUME_H
-#define _LINUX_NTFS_VOLUME_H
-
-#include <linux/rwsem.h>
-#include <linux/uidgid.h>
-
-#include "types.h"
-#include "layout.h"
-
-/*
- * The NTFS in memory super block structure.
- */
-typedef struct {
- /*
- * FIXME: Reorder to have commonly used together element within the
- * same cache line, aiming at a cache line size of 32 bytes. Aim for
- * 64 bytes for less commonly used together elements. Put most commonly
- * used elements to front of structure. Obviously do this only when the
- * structure has stabilized... (AIA)
- */
- /* Device specifics. */
- struct super_block *sb; /* Pointer back to the super_block. */
- LCN nr_blocks; /* Number of sb->s_blocksize bytes
- sized blocks on the device. */
- /* Configuration provided by user at mount time. */
- unsigned long flags; /* Miscellaneous flags, see below. */
- kuid_t uid; /* uid that files will be mounted as. */
- kgid_t gid; /* gid that files will be mounted as. */
- umode_t fmask; /* The mask for file permissions. */
- umode_t dmask; /* The mask for directory
- permissions. */
- u8 mft_zone_multiplier; /* Initial mft zone multiplier. */
- u8 on_errors; /* What to do on filesystem errors. */
- /* NTFS bootsector provided information. */
- u16 sector_size; /* in bytes */
- u8 sector_size_bits; /* log2(sector_size) */
- u32 cluster_size; /* in bytes */
- u32 cluster_size_mask; /* cluster_size - 1 */
- u8 cluster_size_bits; /* log2(cluster_size) */
- u32 mft_record_size; /* in bytes */
- u32 mft_record_size_mask; /* mft_record_size - 1 */
- u8 mft_record_size_bits; /* log2(mft_record_size) */
- u32 index_record_size; /* in bytes */
- u32 index_record_size_mask; /* index_record_size - 1 */
- u8 index_record_size_bits; /* log2(index_record_size) */
- LCN nr_clusters; /* Volume size in clusters == number of
- bits in lcn bitmap. */
- LCN mft_lcn; /* Cluster location of mft data. */
- LCN mftmirr_lcn; /* Cluster location of copy of mft. */
- u64 serial_no; /* The volume serial number. */
- /* Mount specific NTFS information. */
- u32 upcase_len; /* Number of entries in upcase[]. */
- ntfschar *upcase; /* The upcase table. */
-
- s32 attrdef_size; /* Size of the attribute definition
- table in bytes. */
- ATTR_DEF *attrdef; /* Table of attribute definitions.
- Obtained from FILE_AttrDef. */
-
-#ifdef NTFS_RW
- /* Variables used by the cluster and mft allocators. */
- s64 mft_data_pos; /* Mft record number at which to
- allocate the next mft record. */
- LCN mft_zone_start; /* First cluster of the mft zone. */
- LCN mft_zone_end; /* First cluster beyond the mft zone. */
- LCN mft_zone_pos; /* Current position in the mft zone. */
- LCN data1_zone_pos; /* Current position in the first data
- zone. */
- LCN data2_zone_pos; /* Current position in the second data
- zone. */
-#endif /* NTFS_RW */
-
- struct inode *mft_ino; /* The VFS inode of $MFT. */
-
- struct inode *mftbmp_ino; /* Attribute inode for $MFT/$BITMAP. */
- struct rw_semaphore mftbmp_lock; /* Lock for serializing accesses to the
- mft record bitmap ($MFT/$BITMAP). */
-#ifdef NTFS_RW
- struct inode *mftmirr_ino; /* The VFS inode of $MFTMirr. */
- int mftmirr_size; /* Size of mft mirror in mft records. */
-
- struct inode *logfile_ino; /* The VFS inode of $LogFile. */
-#endif /* NTFS_RW */
-
- struct inode *lcnbmp_ino; /* The VFS inode of $Bitmap. */
- struct rw_semaphore lcnbmp_lock; /* Lock for serializing accesses to the
- cluster bitmap ($Bitmap/$DATA). */
-
- struct inode *vol_ino; /* The VFS inode of $Volume. */
- VOLUME_FLAGS vol_flags; /* Volume flags. */
- u8 major_ver; /* Ntfs major version of volume. */
- u8 minor_ver; /* Ntfs minor version of volume. */
-
- struct inode *root_ino; /* The VFS inode of the root
- directory. */
- struct inode *secure_ino; /* The VFS inode of $Secure (NTFS3.0+
- only, otherwise NULL). */
- struct inode *extend_ino; /* The VFS inode of $Extend (NTFS3.0+
- only, otherwise NULL). */
-#ifdef NTFS_RW
- /* $Quota stuff is NTFS3.0+ specific. Unused/NULL otherwise. */
- struct inode *quota_ino; /* The VFS inode of $Quota. */
- struct inode *quota_q_ino; /* Attribute inode for $Quota/$Q. */
- /* $UsnJrnl stuff is NTFS3.0+ specific. Unused/NULL otherwise. */
- struct inode *usnjrnl_ino; /* The VFS inode of $UsnJrnl. */
- struct inode *usnjrnl_max_ino; /* Attribute inode for $UsnJrnl/$Max. */
- struct inode *usnjrnl_j_ino; /* Attribute inode for $UsnJrnl/$J. */
-#endif /* NTFS_RW */
- struct nls_table *nls_map;
-} ntfs_volume;
-
-/*
- * Defined bits for the flags field in the ntfs_volume structure.
- */
-typedef enum {
- NV_Errors, /* 1: Volume has errors, prevent remount rw. */
- NV_ShowSystemFiles, /* 1: Return system files in ntfs_readdir(). */
- NV_CaseSensitive, /* 1: Treat file names as case sensitive and
- create filenames in the POSIX namespace.
- Otherwise be case insensitive but still
- create file names in POSIX namespace. */
- NV_LogFileEmpty, /* 1: $LogFile journal is empty. */
- NV_QuotaOutOfDate, /* 1: $Quota is out of date. */
- NV_UsnJrnlStamped, /* 1: $UsnJrnl has been stamped. */
- NV_SparseEnabled, /* 1: May create sparse files. */
-} ntfs_volume_flags;
-
-/*
- * Macro tricks to expand the NVolFoo(), NVolSetFoo(), and NVolClearFoo()
- * functions.
- */
-#define DEFINE_NVOL_BIT_OPS(flag) \
-static inline int NVol##flag(ntfs_volume *vol) \
-{ \
- return test_bit(NV_##flag, &(vol)->flags); \
-} \
-static inline void NVolSet##flag(ntfs_volume *vol) \
-{ \
- set_bit(NV_##flag, &(vol)->flags); \
-} \
-static inline void NVolClear##flag(ntfs_volume *vol) \
-{ \
- clear_bit(NV_##flag, &(vol)->flags); \
-}
-
-/* Emit the ntfs volume bitops functions. */
-DEFINE_NVOL_BIT_OPS(Errors)
-DEFINE_NVOL_BIT_OPS(ShowSystemFiles)
-DEFINE_NVOL_BIT_OPS(CaseSensitive)
-DEFINE_NVOL_BIT_OPS(LogFileEmpty)
-DEFINE_NVOL_BIT_OPS(QuotaOutOfDate)
-DEFINE_NVOL_BIT_OPS(UsnJrnlStamped)
-DEFINE_NVOL_BIT_OPS(SparseEnabled)
-
-#endif /* _LINUX_NTFS_VOLUME_H */
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
index 63f70259edc0..7aadf5010999 100644
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -886,7 +886,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
struct runs_tree *run = &ni->file.run;
struct ntfs_sb_info *sbi;
u8 cluster_bits;
- struct ATTRIB *attr = NULL, *attr_b;
+ struct ATTRIB *attr, *attr_b;
struct ATTR_LIST_ENTRY *le, *le_b;
struct mft_inode *mi, *mi_b;
CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0, alen;
@@ -904,12 +904,8 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
*len = 0;
up_read(&ni->file.run_lock);
- if (*len) {
- if (*lcn != SPARSE_LCN || !new)
- return 0; /* Fast normal way without allocation. */
- else if (clen > *len)
- clen = *len;
- }
+ if (*len && (*lcn != SPARSE_LCN || !new))
+ return 0; /* Fast normal way without allocation. */
/* No cluster in cache or we need to allocate cluster in hole. */
sbi = ni->mi.sbi;
@@ -918,6 +914,17 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
ni_lock(ni);
down_write(&ni->file.run_lock);
+ /* Repeat the code above (under write lock). */
+ if (!run_lookup_entry(run, vcn, lcn, len, NULL))
+ *len = 0;
+
+ if (*len) {
+ if (*lcn != SPARSE_LCN || !new)
+ goto out; /* normal way without allocation. */
+ if (clen > *len)
+ clen = *len;
+ }
+
le_b = NULL;
attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
if (!attr_b) {
@@ -1736,8 +1743,10 @@ repack:
le_b = NULL;
attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL,
0, NULL, &mi_b);
- if (!attr_b)
- return -ENOENT;
+ if (!attr_b) {
+ err = -ENOENT;
+ goto out;
+ }
attr = attr_b;
le = le_b;
@@ -1818,13 +1827,15 @@ ins_ext:
ok:
run_truncate_around(run, vcn);
out:
- if (new_valid > data_size)
- new_valid = data_size;
+ if (attr_b) {
+ if (new_valid > data_size)
+ new_valid = data_size;
- valid_size = le64_to_cpu(attr_b->nres.valid_size);
- if (new_valid != valid_size) {
- attr_b->nres.valid_size = cpu_to_le64(valid_size);
- mi_b->dirty = true;
+ valid_size = le64_to_cpu(attr_b->nres.valid_size);
+ if (new_valid != valid_size) {
+ attr_b->nres.valid_size = cpu_to_le64(valid_size);
+ mi_b->dirty = true;
+ }
}
return err;
@@ -2073,7 +2084,7 @@ next_attr:
/* Update inode size. */
ni->i_valid = valid_size;
- ni->vfs_inode.i_size = data_size;
+ i_size_write(&ni->vfs_inode, data_size);
inode_set_bytes(&ni->vfs_inode, total_size);
ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
mark_inode_dirty(&ni->vfs_inode);
@@ -2488,7 +2499,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
mi_b->dirty = true;
done:
- ni->vfs_inode.i_size += bytes;
+ i_size_write(&ni->vfs_inode, ni->vfs_inode.i_size + bytes);
ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
mark_inode_dirty(&ni->vfs_inode);
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c
index 7c01735d1219..9f4bd8d26090 100644
--- a/fs/ntfs3/attrlist.c
+++ b/fs/ntfs3/attrlist.c
@@ -29,7 +29,7 @@ static inline bool al_is_valid_le(const struct ntfs_inode *ni,
void al_destroy(struct ntfs_inode *ni)
{
run_close(&ni->attr_list.run);
- kfree(ni->attr_list.le);
+ kvfree(ni->attr_list.le);
ni->attr_list.le = NULL;
ni->attr_list.size = 0;
ni->attr_list.dirty = false;
@@ -127,12 +127,13 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
{
size_t off;
u16 sz;
+ const unsigned le_min_size = le_size(0);
if (!le) {
le = ni->attr_list.le;
} else {
sz = le16_to_cpu(le->size);
- if (sz < sizeof(struct ATTR_LIST_ENTRY)) {
+ if (sz < le_min_size) {
/* Impossible 'cause we should not return such le. */
return NULL;
}
@@ -141,7 +142,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
/* Check boundary. */
off = PtrOffset(ni->attr_list.le, le);
- if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) {
+ if (off + le_min_size > ni->attr_list.size) {
/* The regular end of list. */
return NULL;
}
@@ -149,8 +150,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
sz = le16_to_cpu(le->size);
/* Check le for errors. */
- if (sz < sizeof(struct ATTR_LIST_ENTRY) ||
- off + sz > ni->attr_list.size ||
+ if (sz < le_min_size || off + sz > ni->attr_list.size ||
sz < le->name_off + le->name_len * sizeof(short)) {
return NULL;
}
@@ -318,7 +318,7 @@ int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name,
memcpy(ptr, al->le, off);
memcpy(Add2Ptr(ptr, off + sz), le, old_size - off);
le = Add2Ptr(ptr, off);
- kfree(al->le);
+ kvfree(al->le);
al->le = ptr;
} else {
memmove(Add2Ptr(le, sz), le, old_size - off);
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index 63f14a0232f6..845f9b22deef 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -124,7 +124,7 @@ void wnd_close(struct wnd_bitmap *wnd)
{
struct rb_node *node, *next;
- kfree(wnd->free_bits);
+ kvfree(wnd->free_bits);
wnd->free_bits = NULL;
run_close(&wnd->run);
@@ -1360,7 +1360,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short));
memset(new_free + wnd->nwnd, 0,
(new_wnd - wnd->nwnd) * sizeof(short));
- kfree(wnd->free_bits);
+ kvfree(wnd->free_bits);
wnd->free_bits = new_free;
}
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index ec0566b322d5..5cf3d9decf64 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
return 0;
}
- /* NTFS: symlinks are "dir + reparse" or "file + reparse" */
- if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT)
- dt_type = DT_LNK;
- else
- dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
+ /*
+ * NTFS: symlinks are "dir + reparse" or "file + reparse"
+ * Unfortunately reparse attribute is used for many purposes (several dozens).
+ * It is not possible here to know is this name symlink or not.
+ * To get exactly the type of name we should to open inode (read mft).
+ * getattr for opened file (fstat) correctly returns symlink.
+ */
+ dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
+
+ /*
+ * It is not reliable to detect the type of name using duplicated information
+ * stored in parent directory.
+ * The only correct way to get the type of name - read MFT record and find ATTR_STD.
+ * The code below is not good idea.
+ * It does additional locks/reads just to get the type of name.
+ * Should we use additional mount option to enable branch below?
+ */
+ if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) &&
+ ino != ni->mi.rno) {
+ struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL);
+ if (!IS_ERR_OR_NULL(inode)) {
+ dt_type = fs_umode_to_dtype(inode->i_mode);
+ iput(inode);
+ }
+ }
return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
}
@@ -495,11 +515,9 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
struct INDEX_HDR *hdr;
const struct ATTR_FILE_NAME *fname;
u32 e_size, off, end;
- u64 vbo = 0;
size_t drs = 0, fles = 0, bit = 0;
- loff_t i_size = ni->vfs_inode.i_size;
struct indx_node *node = NULL;
- u8 index_bits = ni->dir.index_bits;
+ size_t max_indx = i_size_read(&ni->vfs_inode) >> ni->dir.index_bits;
if (is_empty)
*is_empty = true;
@@ -518,8 +536,10 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
e = Add2Ptr(hdr, off);
e_size = le16_to_cpu(e->size);
if (e_size < sizeof(struct NTFS_DE) ||
- off + e_size > end)
+ off + e_size > end) {
+ /* Looks like corruption. */
break;
+ }
if (de_is_last(e))
break;
@@ -543,7 +563,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
fles += 1;
}
- if (vbo >= i_size)
+ if (bit >= max_indx)
goto out;
err = indx_used_bit(&ni->dir, ni, &bit);
@@ -553,8 +573,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
if (bit == MINUS_ONE_T)
goto out;
- vbo = (u64)bit << index_bits;
- if (vbo >= i_size)
+ if (bit >= max_indx)
goto out;
err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits,
@@ -564,7 +583,6 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
hdr = &node->index->ihdr;
bit += 1;
- vbo = (u64)bit << ni->dir.idx2vbn_bits;
}
out:
@@ -593,5 +611,9 @@ const struct file_operations ntfs_dir_operations = {
.iterate_shared = ntfs_readdir,
.fsync = generic_file_fsync,
.open = ntfs_file_open,
+ .unlocked_ioctl = ntfs_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ntfs_compat_ioctl,
+#endif
};
// clang-format on
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index a5a30a24ce5d..5418662c80d8 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -48,7 +48,7 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg)
return 0;
}
-static long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
+long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
@@ -61,7 +61,7 @@ static long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
}
#ifdef CONFIG_COMPAT
-static long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg)
+long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg)
{
return ntfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
@@ -188,6 +188,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
u32 bh_next, bh_off, to;
sector_t iblock;
struct folio *folio;
+ bool dirty = false;
for (; idx < idx_end; idx += 1, from = 0) {
page_off = (loff_t)idx << PAGE_SHIFT;
@@ -223,29 +224,27 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
/* Ok, it's mapped. Make sure it's up-to-date. */
if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
-
- if (!buffer_uptodate(bh)) {
- err = bh_read(bh, 0);
- if (err < 0) {
- folio_unlock(folio);
- folio_put(folio);
- goto out;
- }
+ else if (bh_read(bh, 0) < 0) {
+ err = -EIO;
+ folio_unlock(folio);
+ folio_put(folio);
+ goto out;
}
mark_buffer_dirty(bh);
-
} while (bh_off = bh_next, iblock += 1,
head != (bh = bh->b_this_page));
folio_zero_segment(folio, from, to);
+ dirty = true;
folio_unlock(folio);
folio_put(folio);
cond_resched();
}
out:
- mark_inode_dirty(inode);
+ if (dirty)
+ mark_inode_dirty(inode);
return err;
}
@@ -261,6 +260,9 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
bool rw = vma->vm_flags & VM_WRITE;
int err;
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
if (is_encrypted(ni)) {
ntfs_inode_warn(inode, "mmap encrypted not supported");
return -EOPNOTSUPP;
@@ -499,10 +501,14 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
ni_lock(ni);
err = attr_punch_hole(ni, vbo, len, &frame_size);
ni_unlock(ni);
+ if (!err)
+ goto ok;
+
if (err != E_NTFS_NOTALIGNED)
goto out;
/* Process not aligned punch. */
+ err = 0;
mask = frame_size - 1;
vbo_a = (vbo + mask) & ~mask;
end_a = end & ~mask;
@@ -525,6 +531,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
ni_lock(ni);
err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL);
ni_unlock(ni);
+ if (err)
+ goto out;
}
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
/*
@@ -564,6 +572,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
ni_lock(ni);
err = attr_insert_range(ni, vbo, len);
ni_unlock(ni);
+ if (err)
+ goto out;
} else {
/* Check new size. */
u8 cluster_bits = sbi->cluster_bits;
@@ -633,11 +643,18 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
&ni->file.run, i_size, &ni->i_valid,
true, NULL);
ni_unlock(ni);
+ if (err)
+ goto out;
} else if (new_size > i_size) {
- inode->i_size = new_size;
+ i_size_write(inode, new_size);
}
}
+ok:
+ err = file_modified(file);
+ if (err)
+ goto out;
+
out:
if (map_locked)
filemap_invalidate_unlock(mapping);
@@ -663,6 +680,9 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
umode_t mode = inode->i_mode;
int err;
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
err = setattr_prepare(idmap, dentry, attr);
if (err)
goto out;
@@ -676,7 +696,7 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
goto out;
}
inode_dio_wait(inode);
- oldsize = inode->i_size;
+ oldsize = i_size_read(inode);
newsize = attr->ia_size;
if (newsize <= oldsize)
@@ -688,7 +708,7 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
goto out;
ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
- inode->i_size = newsize;
+ i_size_write(inode, newsize);
}
setattr_copy(idmap, inode, attr);
@@ -718,6 +738,9 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct inode *inode = file->f_mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
if (is_encrypted(ni)) {
ntfs_inode_warn(inode, "encrypted i/o not supported");
return -EOPNOTSUPP;
@@ -752,6 +775,9 @@ static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos,
struct inode *inode = in->f_mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
if (is_encrypted(ni)) {
ntfs_inode_warn(inode, "encrypted i/o not supported");
return -EOPNOTSUPP;
@@ -821,7 +847,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
size_t count = iov_iter_count(from);
loff_t pos = iocb->ki_pos;
struct inode *inode = file_inode(file);
- loff_t i_size = inode->i_size;
+ loff_t i_size = i_size_read(inode);
struct address_space *mapping = inode->i_mapping;
struct ntfs_inode *ni = ntfs_i(inode);
u64 valid = ni->i_valid;
@@ -1028,6 +1054,8 @@ out:
iocb->ki_pos += written;
if (iocb->ki_pos > ni->i_valid)
ni->i_valid = iocb->ki_pos;
+ if (iocb->ki_pos > i_size)
+ i_size_write(inode, iocb->ki_pos);
return written;
}
@@ -1041,8 +1069,12 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
+ int err;
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
if (is_encrypted(ni)) {
ntfs_inode_warn(inode, "encrypted i/o not supported");
return -EOPNOTSUPP;
@@ -1068,6 +1100,12 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret <= 0)
goto out;
+ err = file_modified(iocb->ki_filp);
+ if (err) {
+ ret = err;
+ goto out;
+ }
+
if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
/* Should never be here, see ntfs_file_open(). */
ret = -EOPNOTSUPP;
@@ -1097,6 +1135,9 @@ int ntfs_file_open(struct inode *inode, struct file *file)
{
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
if (unlikely((is_compressed(ni) || is_encrypted(ni)) &&
(file->f_flags & O_DIRECT))) {
return -EOPNOTSUPP;
@@ -1138,7 +1179,8 @@ static int ntfs_file_release(struct inode *inode, struct file *file)
down_write(&ni->file.run_lock);
err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
- inode->i_size, &ni->i_valid, false, NULL);
+ i_size_read(inode), &ni->i_valid, false,
+ NULL);
up_write(&ni->file.run_lock);
ni_unlock(ni);
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 3df2d9e34b91..7f27382e0ce2 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -778,7 +778,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
run_deallocate(sbi, &ni->attr_list.run, true);
run_close(&ni->attr_list.run);
ni->attr_list.size = 0;
- kfree(ni->attr_list.le);
+ kvfree(ni->attr_list.le);
ni->attr_list.le = NULL;
ni->attr_list.dirty = false;
@@ -927,7 +927,7 @@ int ni_create_attr_list(struct ntfs_inode *ni)
return 0;
out:
- kfree(ni->attr_list.le);
+ kvfree(ni->attr_list.le);
ni->attr_list.le = NULL;
ni->attr_list.size = 0;
return err;
@@ -2099,7 +2099,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
gfp_t gfp_mask;
struct page *pg;
- if (vbo >= ni->vfs_inode.i_size) {
+ if (vbo >= i_size_read(&ni->vfs_inode)) {
SetPageUptodate(page);
err = 0;
goto out;
@@ -2173,7 +2173,7 @@ int ni_decompress_file(struct ntfs_inode *ni)
{
struct ntfs_sb_info *sbi = ni->mi.sbi;
struct inode *inode = &ni->vfs_inode;
- loff_t i_size = inode->i_size;
+ loff_t i_size = i_size_read(inode);
struct address_space *mapping = inode->i_mapping;
gfp_t gfp_mask = mapping_gfp_mask(mapping);
struct page **pages = NULL;
@@ -2508,6 +2508,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
err = -EOPNOTSUPP;
goto out1;
#else
+ loff_t i_size = i_size_read(&ni->vfs_inode);
u32 frame_bits = ni_ext_compress_bits(ni);
u64 frame64 = frame_vbo >> frame_bits;
u64 frames, vbo_data;
@@ -2548,7 +2549,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
}
}
- frames = (ni->vfs_inode.i_size - 1) >> frame_bits;
+ frames = (i_size - 1) >> frame_bits;
err = attr_wof_frame_info(ni, attr, run, frame64, frames,
frame_bits, &ondisk_size, &vbo_data);
@@ -2556,8 +2557,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
goto out2;
if (frame64 == frames) {
- unc_size = 1 + ((ni->vfs_inode.i_size - 1) &
- (frame_size - 1));
+ unc_size = 1 + ((i_size - 1) & (frame_size - 1));
ondisk_size = attr_size(attr) - vbo_data;
} else {
unc_size = frame_size;
@@ -3259,6 +3259,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
if (is_bad_inode(inode) || sb_rdonly(sb))
return 0;
+ if (unlikely(ntfs3_forced_shutdown(sb)))
+ return -EIO;
+
if (!ni_trylock(ni)) {
/* 'ni' is under modification, skip for now. */
mark_inode_dirty_sync(inode);
@@ -3288,7 +3291,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
modified = true;
}
- ts = inode_get_mtime(inode);
+ ts = inode_get_ctime(inode);
dup.c_time = kernel2nt(&ts);
if (std->c_time != dup.c_time) {
std->c_time = dup.c_time;
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index 98ccb6650858..855519713bf7 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -465,7 +465,7 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
{
const struct RESTART_AREA *ra;
u16 cl, fl, ul;
- u32 off, l_size, file_dat_bits, file_size_round;
+ u32 off, l_size, seq_bits;
u16 ro = le16_to_cpu(rhdr->ra_off);
u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
@@ -511,13 +511,15 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
/* Make sure the sequence number bits match the log file size. */
l_size = le64_to_cpu(ra->l_size);
- file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits);
- file_size_round = 1u << (file_dat_bits + 3);
- if (file_size_round != l_size &&
- (file_size_round < l_size || (file_size_round / 2) > l_size)) {
- return false;
+ seq_bits = sizeof(u64) * 8 + 3;
+ while (l_size) {
+ l_size >>= 1;
+ seq_bits -= 1;
}
+ if (seq_bits != ra->seq_num_bits)
+ return false;
+
/* The log page data offset and record header length must be quad-aligned. */
if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) ||
!IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8))
@@ -974,6 +976,16 @@ skip_looking:
return e;
}
+struct restart_info {
+ u64 last_lsn;
+ struct RESTART_HDR *r_page;
+ u32 vbo;
+ bool chkdsk_was_run;
+ bool valid_page;
+ bool initialized;
+ bool restart;
+};
+
#define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001)
#define NTFSLOG_WRAPPED 0x00000001
@@ -987,6 +999,7 @@ struct ntfs_log {
struct ntfs_inode *ni;
u32 l_size;
+ u32 orig_file_size;
u32 sys_page_size;
u32 sys_page_mask;
u32 page_size;
@@ -1040,6 +1053,8 @@ struct ntfs_log {
struct CLIENT_ID client_id;
u32 client_undo_commit;
+
+ struct restart_info rst_info, rst_info2;
};
static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn)
@@ -1105,16 +1120,6 @@ static inline bool verify_client_lsn(struct ntfs_log *log,
lsn <= le64_to_cpu(log->ra->current_lsn) && lsn;
}
-struct restart_info {
- u64 last_lsn;
- struct RESTART_HDR *r_page;
- u32 vbo;
- bool chkdsk_was_run;
- bool valid_page;
- bool initialized;
- bool restart;
-};
-
static int read_log_page(struct ntfs_log *log, u32 vbo,
struct RECORD_PAGE_HDR **buffer, bool *usa_error)
{
@@ -1176,7 +1181,7 @@ out:
* restart page header. It will stop the first time we find a
* valid page header.
*/
-static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
+static int log_read_rst(struct ntfs_log *log, bool first,
struct restart_info *info)
{
u32 skip, vbo;
@@ -1192,7 +1197,7 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
}
/* Loop continuously until we succeed. */
- for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) {
+ for (; vbo < log->l_size; vbo = 2 * vbo + skip, skip = 0) {
bool usa_error;
bool brst, bchk;
struct RESTART_AREA *ra;
@@ -1285,22 +1290,17 @@ check_result:
/*
* Ilog_init_pg_hdr - Init @log from restart page header.
*/
-static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size,
- u32 page_size, u16 major_ver, u16 minor_ver)
+static void log_init_pg_hdr(struct ntfs_log *log, u16 major_ver, u16 minor_ver)
{
- log->sys_page_size = sys_page_size;
- log->sys_page_mask = sys_page_size - 1;
- log->page_size = page_size;
- log->page_mask = page_size - 1;
- log->page_bits = blksize_bits(page_size);
+ log->sys_page_size = log->page_size;
+ log->sys_page_mask = log->page_mask;
log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits;
if (!log->clst_per_page)
log->clst_per_page = 1;
- log->first_page = major_ver >= 2 ?
- 0x22 * page_size :
- ((sys_page_size << 1) + (page_size << 1));
+ log->first_page = major_ver >= 2 ? 0x22 * log->page_size :
+ 4 * log->page_size;
log->major_ver = major_ver;
log->minor_ver = minor_ver;
}
@@ -1308,12 +1308,11 @@ static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size,
/*
* log_create - Init @log in cases when we don't have a restart area to use.
*/
-static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn,
+static void log_create(struct ntfs_log *log, const u64 last_lsn,
u32 open_log_count, bool wrapped, bool use_multi_page)
{
- log->l_size = l_size;
/* All file offsets must be quadword aligned. */
- log->file_data_bits = blksize_bits(l_size) - 3;
+ log->file_data_bits = blksize_bits(log->l_size) - 3;
log->seq_num_mask = (8 << log->file_data_bits) - 1;
log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits;
log->seq_num = (last_lsn >> log->file_data_bits) + 2;
@@ -3720,10 +3719,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
struct ntfs_sb_info *sbi = ni->mi.sbi;
struct ntfs_log *log;
- struct restart_info rst_info, rst_info2;
- u64 rec_lsn, ra_lsn, checkpt_lsn = 0, rlsn = 0;
+ u64 rec_lsn, checkpt_lsn = 0, rlsn = 0;
struct ATTR_NAME_ENTRY *attr_names = NULL;
- struct ATTR_NAME_ENTRY *ane;
struct RESTART_TABLE *dptbl = NULL;
struct RESTART_TABLE *trtbl = NULL;
const struct RESTART_TABLE *rt;
@@ -3741,9 +3738,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
struct TRANSACTION_ENTRY *tr;
struct DIR_PAGE_ENTRY *dp;
u32 i, bytes_per_attr_entry;
- u32 l_size = ni->vfs_inode.i_size;
- u32 orig_file_size = l_size;
- u32 page_size, vbo, tail, off, dlen;
+ u32 vbo, tail, off, dlen;
u32 saved_len, rec_len, transact_id;
bool use_second_page;
struct RESTART_AREA *ra2, *ra = NULL;
@@ -3758,52 +3753,50 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
u16 t16;
u32 t32;
- /* Get the size of page. NOTE: To replay we can use default page. */
-#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
- page_size = norm_file_page(PAGE_SIZE, &l_size, true);
-#else
- page_size = norm_file_page(PAGE_SIZE, &l_size, false);
-#endif
- if (!page_size)
- return -EINVAL;
-
log = kzalloc(sizeof(struct ntfs_log), GFP_NOFS);
if (!log)
return -ENOMEM;
log->ni = ni;
- log->l_size = l_size;
- log->one_page_buf = kmalloc(page_size, GFP_NOFS);
+ log->l_size = log->orig_file_size = ni->vfs_inode.i_size;
+ /* Get the size of page. NOTE: To replay we can use default page. */
+#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
+ log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, true);
+#else
+ log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, false);
+#endif
+ if (!log->page_size) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ log->one_page_buf = kmalloc(log->page_size, GFP_NOFS);
if (!log->one_page_buf) {
err = -ENOMEM;
goto out;
}
- log->page_size = page_size;
- log->page_mask = page_size - 1;
- log->page_bits = blksize_bits(page_size);
+ log->page_mask = log->page_size - 1;
+ log->page_bits = blksize_bits(log->page_size);
/* Look for a restart area on the disk. */
- memset(&rst_info, 0, sizeof(struct restart_info));
- err = log_read_rst(log, l_size, true, &rst_info);
+ err = log_read_rst(log, true, &log->rst_info);
if (err)
goto out;
/* remember 'initialized' */
- *initialized = rst_info.initialized;
+ *initialized = log->rst_info.initialized;
- if (!rst_info.restart) {
- if (rst_info.initialized) {
+ if (!log->rst_info.restart) {
+ if (log->rst_info.initialized) {
/* No restart area but the file is not initialized. */
err = -EINVAL;
goto out;
}
- log_init_pg_hdr(log, page_size, page_size, 1, 1);
- log_create(log, l_size, 0, get_random_u32(), false, false);
-
- log->ra = ra;
+ log_init_pg_hdr(log, 1, 1);
+ log_create(log, 0, get_random_u32(), false, false);
ra = log_create_ra(log);
if (!ra) {
@@ -3820,25 +3813,26 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
* If the restart offset above wasn't zero then we won't
* look for a second restart.
*/
- if (rst_info.vbo)
+ if (log->rst_info.vbo)
goto check_restart_area;
- memset(&rst_info2, 0, sizeof(struct restart_info));
- err = log_read_rst(log, l_size, false, &rst_info2);
+ err = log_read_rst(log, false, &log->rst_info2);
if (err)
goto out;
/* Determine which restart area to use. */
- if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn)
+ if (!log->rst_info2.restart ||
+ log->rst_info2.last_lsn <= log->rst_info.last_lsn)
goto use_first_page;
use_second_page = true;
- if (rst_info.chkdsk_was_run && page_size != rst_info.vbo) {
+ if (log->rst_info.chkdsk_was_run &&
+ log->page_size != log->rst_info.vbo) {
struct RECORD_PAGE_HDR *sp = NULL;
bool usa_error;
- if (!read_log_page(log, page_size, &sp, &usa_error) &&
+ if (!read_log_page(log, log->page_size, &sp, &usa_error) &&
sp->rhdr.sign == NTFS_CHKD_SIGNATURE) {
use_second_page = false;
}
@@ -3846,52 +3840,43 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
}
if (use_second_page) {
- kfree(rst_info.r_page);
- memcpy(&rst_info, &rst_info2, sizeof(struct restart_info));
- rst_info2.r_page = NULL;
+ kfree(log->rst_info.r_page);
+ memcpy(&log->rst_info, &log->rst_info2,
+ sizeof(struct restart_info));
+ log->rst_info2.r_page = NULL;
}
use_first_page:
- kfree(rst_info2.r_page);
+ kfree(log->rst_info2.r_page);
check_restart_area:
/*
* If the restart area is at offset 0, we want
* to write the second restart area first.
*/
- log->init_ra = !!rst_info.vbo;
+ log->init_ra = !!log->rst_info.vbo;
/* If we have a valid page then grab a pointer to the restart area. */
- ra2 = rst_info.valid_page ?
- Add2Ptr(rst_info.r_page,
- le16_to_cpu(rst_info.r_page->ra_off)) :
+ ra2 = log->rst_info.valid_page ?
+ Add2Ptr(log->rst_info.r_page,
+ le16_to_cpu(log->rst_info.r_page->ra_off)) :
NULL;
- if (rst_info.chkdsk_was_run ||
+ if (log->rst_info.chkdsk_was_run ||
(ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) {
bool wrapped = false;
bool use_multi_page = false;
u32 open_log_count;
/* Do some checks based on whether we have a valid log page. */
- if (!rst_info.valid_page) {
- open_log_count = get_random_u32();
- goto init_log_instance;
- }
- open_log_count = le32_to_cpu(ra2->open_log_count);
-
- /*
- * If the restart page size isn't changing then we want to
- * check how much work we need to do.
- */
- if (page_size != le32_to_cpu(rst_info.r_page->sys_page_size))
- goto init_log_instance;
+ open_log_count = log->rst_info.valid_page ?
+ le32_to_cpu(ra2->open_log_count) :
+ get_random_u32();
-init_log_instance:
- log_init_pg_hdr(log, page_size, page_size, 1, 1);
+ log_init_pg_hdr(log, 1, 1);
- log_create(log, l_size, rst_info.last_lsn, open_log_count,
- wrapped, use_multi_page);
+ log_create(log, log->rst_info.last_lsn, open_log_count, wrapped,
+ use_multi_page);
ra = log_create_ra(log);
if (!ra) {
@@ -3916,28 +3901,27 @@ init_log_instance:
* use the log file. We must use the system page size instead of the
* default size if there is not a clean shutdown.
*/
- t32 = le32_to_cpu(rst_info.r_page->sys_page_size);
- if (page_size != t32) {
- l_size = orig_file_size;
- page_size =
- norm_file_page(t32, &l_size, t32 == DefaultLogPageSize);
+ t32 = le32_to_cpu(log->rst_info.r_page->sys_page_size);
+ if (log->page_size != t32) {
+ log->l_size = log->orig_file_size;
+ log->page_size = norm_file_page(t32, &log->l_size,
+ t32 == DefaultLogPageSize);
}
- if (page_size != t32 ||
- page_size != le32_to_cpu(rst_info.r_page->page_size)) {
+ if (log->page_size != t32 ||
+ log->page_size != le32_to_cpu(log->rst_info.r_page->page_size)) {
err = -EINVAL;
goto out;
}
/* If the file size has shrunk then we won't mount it. */
- if (l_size < le64_to_cpu(ra2->l_size)) {
+ if (log->l_size < le64_to_cpu(ra2->l_size)) {
err = -EINVAL;
goto out;
}
- log_init_pg_hdr(log, page_size, page_size,
- le16_to_cpu(rst_info.r_page->major_ver),
- le16_to_cpu(rst_info.r_page->minor_ver));
+ log_init_pg_hdr(log, le16_to_cpu(log->rst_info.r_page->major_ver),
+ le16_to_cpu(log->rst_info.r_page->minor_ver));
log->l_size = le64_to_cpu(ra2->l_size);
log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits);
@@ -3945,7 +3929,7 @@ init_log_instance:
log->seq_num_mask = (8 << log->file_data_bits) - 1;
log->last_lsn = le64_to_cpu(ra2->current_lsn);
log->seq_num = log->last_lsn >> log->file_data_bits;
- log->ra_off = le16_to_cpu(rst_info.r_page->ra_off);
+ log->ra_off = le16_to_cpu(log->rst_info.r_page->ra_off);
log->restart_size = log->sys_page_size - log->ra_off;
log->record_header_len = le16_to_cpu(ra2->rec_hdr_len);
log->ra_size = le16_to_cpu(ra2->ra_len);
@@ -4045,7 +4029,7 @@ find_oldest:
log->current_avail = current_log_avail(log);
/* Remember which restart area to write first. */
- log->init_ra = rst_info.vbo;
+ log->init_ra = log->rst_info.vbo;
process_log:
/* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values. */
@@ -4105,7 +4089,7 @@ process_log:
log->client_id.seq_num = cr->seq_num;
log->client_id.client_idx = client;
- err = read_rst_area(log, &rst, &ra_lsn);
+ err = read_rst_area(log, &rst, &checkpt_lsn);
if (err)
goto out;
@@ -4114,9 +4098,8 @@ process_log:
bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28;
- checkpt_lsn = le64_to_cpu(rst->check_point_start);
- if (!checkpt_lsn)
- checkpt_lsn = ra_lsn;
+ if (rst->check_point_start)
+ checkpt_lsn = le64_to_cpu(rst->check_point_start);
/* Allocate and Read the Transaction Table. */
if (!rst->transact_table_len)
@@ -4330,23 +4313,20 @@ check_attr_table:
lcb = NULL;
check_attribute_names2:
- if (!rst->attr_names_len)
- goto trace_attribute_table;
-
- ane = attr_names;
- if (!oatbl)
- goto trace_attribute_table;
- while (ane->off) {
- /* TODO: Clear table on exit! */
- oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
- t16 = le16_to_cpu(ane->name_bytes);
- oe->name_len = t16 / sizeof(short);
- oe->ptr = ane->name;
- oe->is_attr_name = 2;
- ane = Add2Ptr(ane, sizeof(struct ATTR_NAME_ENTRY) + t16);
- }
-
-trace_attribute_table:
+ if (rst->attr_names_len && oatbl) {
+ struct ATTR_NAME_ENTRY *ane = attr_names;
+ while (ane->off) {
+ /* TODO: Clear table on exit! */
+ oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
+ t16 = le16_to_cpu(ane->name_bytes);
+ oe->name_len = t16 / sizeof(short);
+ oe->ptr = ane->name;
+ oe->is_attr_name = 2;
+ ane = Add2Ptr(ane,
+ sizeof(struct ATTR_NAME_ENTRY) + t16);
+ }
+ }
+
/*
* If the checkpt_lsn is zero, then this is a freshly
* formatted disk and we have no work to do.
@@ -5189,7 +5169,7 @@ out:
kfree(oatbl);
kfree(dptbl);
kfree(attr_names);
- kfree(rst_info.r_page);
+ kfree(log->rst_info.r_page);
kfree(ra);
kfree(log->one_page_buf);
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index fbfe21dbb425..ae2ef5c11868 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -853,7 +853,8 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
/*
* sb can be NULL here. In this case sbi->flags should be 0 too.
*/
- if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR))
+ if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR) ||
+ unlikely(ntfs3_forced_shutdown(sb)))
return;
blocksize = sb->s_blocksize;
@@ -1006,6 +1007,30 @@ static inline __le32 security_hash(const void *sd, size_t bytes)
return cpu_to_le32(hash);
}
+/*
+ * simple wrapper for sb_bread_unmovable.
+ */
+struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block)
+{
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct buffer_head *bh;
+
+ if (unlikely(block >= sbi->volume.blocks)) {
+ /* prevent generic message "attempt to access beyond end of device" */
+ ntfs_err(sb, "try to read out of volume at offset 0x%llx",
+ (u64)block << sb->s_blocksize_bits);
+ return NULL;
+ }
+
+ bh = sb_bread_unmovable(sb, block);
+ if (bh)
+ return bh;
+
+ ntfs_err(sb, "failed to read volume at offset 0x%llx",
+ (u64)block << sb->s_blocksize_bits);
+ return NULL;
+}
+
int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer)
{
struct block_device *bdev = sb->s_bdev;
@@ -2128,8 +2153,8 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi,
if (le32_to_cpu(d_security->size) == new_sec_size &&
d_security->key.hash == hash_key.hash &&
!memcmp(d_security + 1, sd, size_sd)) {
- *security_id = d_security->key.sec_id;
/* Such security already exists. */
+ *security_id = d_security->key.sec_id;
err = 0;
goto out;
}
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index cf92b2433f7a..daabaad63aaf 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1462,7 +1462,7 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
goto out2;
if (in->name == I30_NAME) {
- ni->vfs_inode.i_size = data_size;
+ i_size_write(&ni->vfs_inode, data_size);
inode_set_bytes(&ni->vfs_inode, alloc_size);
}
@@ -1544,7 +1544,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
}
if (in->name == I30_NAME)
- ni->vfs_inode.i_size = data_size;
+ i_size_write(&ni->vfs_inode, data_size);
*vbn = bit << indx->idx2vbn_bits;
@@ -2090,7 +2090,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni,
return err;
if (in->name == I30_NAME)
- ni->vfs_inode.i_size = new_data;
+ i_size_write(&ni->vfs_inode, new_data);
bpb = bitmap_size(bit);
if (bpb * 8 == nbits)
@@ -2576,7 +2576,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
&indx->alloc_run, 0, NULL, false, NULL);
if (in->name == I30_NAME)
- ni->vfs_inode.i_size = 0;
+ i_size_write(&ni->vfs_inode, 0);
err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len,
false, NULL);
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 5e3d71374918..eb7a8c9fba01 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -345,9 +345,7 @@ next_attr:
inode->i_size = le16_to_cpu(rp.SymbolicLinkReparseBuffer
.PrintNameLength) /
sizeof(u16);
-
ni->i_valid = inode->i_size;
-
/* Clear directory bit. */
if (ni->ni_flags & NI_FLAG_DIR) {
indx_clear(&ni->dir);
@@ -412,7 +410,6 @@ end_enum:
goto out;
if (!is_match && name) {
- /* Reuse rec as buffer for ascii name. */
err = -ENOENT;
goto out;
}
@@ -427,6 +424,7 @@ end_enum:
if (names != le16_to_cpu(rec->hard_links)) {
/* Correct minor error on the fly. Do not mark inode as dirty. */
+ ntfs_inode_warn(inode, "Correct links count -> %u.", names);
rec->hard_links = cpu_to_le16(names);
ni->mi.dirty = true;
}
@@ -653,9 +651,10 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo,
off = vbo & (PAGE_SIZE - 1);
folio_set_bh(bh, folio, off);
- err = bh_read(bh, 0);
- if (err < 0)
+ if (bh_read(bh, 0) < 0) {
+ err = -EIO;
goto out;
+ }
folio_zero_segment(folio, off + voff, off + block_size);
}
}
@@ -853,9 +852,13 @@ static int ntfs_resident_writepage(struct folio *folio,
struct writeback_control *wbc, void *data)
{
struct address_space *mapping = data;
- struct ntfs_inode *ni = ntfs_i(mapping->host);
+ struct inode *inode = mapping->host;
+ struct ntfs_inode *ni = ntfs_i(inode);
int ret;
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
ni_lock(ni);
ret = attr_data_write_resident(ni, &folio->page);
ni_unlock(ni);
@@ -869,7 +872,12 @@ static int ntfs_resident_writepage(struct folio *folio,
static int ntfs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- if (is_resident(ntfs_i(mapping->host)))
+ struct inode *inode = mapping->host;
+
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
+ if (is_resident(ntfs_i(inode)))
return write_cache_pages(mapping, wbc, ntfs_resident_writepage,
mapping);
return mpage_writepages(mapping, wbc, ntfs_get_block);
@@ -889,6 +897,9 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
*pagep = NULL;
if (is_resident(ni)) {
struct page *page =
@@ -974,7 +985,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
}
if (pos + err > inode->i_size) {
- inode->i_size = pos + err;
+ i_size_write(inode, pos + err);
dirty = true;
}
@@ -1306,6 +1317,11 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
goto out1;
}
+ if (unlikely(ntfs3_forced_shutdown(sb))) {
+ err = -EIO;
+ goto out2;
+ }
+
/* Mark rw ntfs as dirty. it will be cleared at umount. */
ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index ee3093be5170..084d19d78397 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -181,6 +181,9 @@ static int ntfs_unlink(struct inode *dir, struct dentry *dentry)
struct ntfs_inode *ni = ntfs_i(dir);
int err;
+ if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
+ return -EIO;
+
ni_lock_dir(ni);
err = ntfs_unlink_inode(dir, dentry);
@@ -199,6 +202,9 @@ static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
u32 size = strlen(symname);
struct inode *inode;
+ if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
+ return -EIO;
+
inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777, 0,
symname, size, NULL);
@@ -227,6 +233,9 @@ static int ntfs_rmdir(struct inode *dir, struct dentry *dentry)
struct ntfs_inode *ni = ntfs_i(dir);
int err;
+ if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
+ return -EIO;
+
ni_lock_dir(ni);
err = ntfs_unlink_inode(dir, dentry);
@@ -264,6 +273,9 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir,
1024);
static_assert(PATH_MAX >= 4 * 1024);
+ if (unlikely(ntfs3_forced_shutdown(sb)))
+ return -EIO;
+
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
@@ -419,7 +431,7 @@ static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry,
* fnd contains tree's path to insert to.
* If fnd is not NULL then dir is locked.
*/
- inode = ntfs_create_inode(mnt_idmap(file->f_path.mnt), dir, dentry, uni,
+ inode = ntfs_create_inode(file_mnt_idmap(file), dir, dentry, uni,
mode, 0, NULL, 0, fnd);
err = IS_ERR(inode) ? PTR_ERR(inode) :
finish_open(file, dentry, ntfs_file_open);
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 86aecbb01a92..9c7478150a03 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -523,12 +523,10 @@ struct ATTR_LIST_ENTRY {
__le64 vcn; // 0x08: Starting VCN of this attribute.
struct MFT_REF ref; // 0x10: MFT record number with attribute.
__le16 id; // 0x18: struct ATTRIB ID.
- __le16 name[3]; // 0x1A: Just to align. To get real name can use bNameOffset.
+ __le16 name[]; // 0x1A: To get real name use name_off.
}; // sizeof(0x20)
-static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20);
-
static inline u32 le_size(u8 name_len)
{
return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) +
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index f6706143d14b..79356fd29a14 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -61,6 +61,8 @@ enum utf16_endian;
/* sbi->flags */
#define NTFS_FLAGS_NODISCARD 0x00000001
+/* ntfs in shutdown state. */
+#define NTFS_FLAGS_SHUTDOWN_BIT 0x00000002 /* == 4*/
/* Set when LogFile is replaying. */
#define NTFS_FLAGS_LOG_REPLAYING 0x00000008
/* Set when we changed first MFT's which copy must be updated in $MftMirr. */
@@ -226,7 +228,7 @@ struct ntfs_sb_info {
u64 maxbytes; // Maximum size for normal files.
u64 maxbytes_sparse; // Maximum size for sparse file.
- u32 flags; // See NTFS_FLAGS_XXX.
+ unsigned long flags; // See NTFS_FLAGS_
CLST zone_max; // Maximum MFT zone length in clusters
CLST bad_clusters; // The count of marked bad clusters.
@@ -473,7 +475,7 @@ bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn,
int al_update(struct ntfs_inode *ni, int sync);
static inline size_t al_aligned(size_t size)
{
- return (size + 1023) & ~(size_t)1023;
+ return size_add(size, 1023) & ~(size_t)1023;
}
/* Globals from bitfunc.c */
@@ -500,6 +502,8 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
int ntfs_file_open(struct inode *inode, struct file *file);
int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
+long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg);
+long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg);
extern const struct inode_operations ntfs_special_inode_operations;
extern const struct inode_operations ntfs_file_inode_operations;
extern const struct file_operations ntfs_file_operations;
@@ -584,6 +588,7 @@ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes);
int log_replay(struct ntfs_inode *ni, bool *initialized);
/* Globals from fsntfs.c */
+struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block);
bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes);
int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes,
bool simple);
@@ -872,7 +877,7 @@ int ntfs_init_acl(struct mnt_idmap *idmap, struct inode *inode,
int ntfs_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry);
ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
-extern const struct xattr_handler * const ntfs_xattr_handlers[];
+extern const struct xattr_handler *const ntfs_xattr_handlers[];
int ntfs_save_wsl_perm(struct inode *inode, __le16 *ea_size);
void ntfs_get_wsl_perm(struct inode *inode);
@@ -999,6 +1004,11 @@ static inline struct ntfs_sb_info *ntfs_sb(struct super_block *sb)
return sb->s_fs_info;
}
+static inline int ntfs3_forced_shutdown(struct super_block *sb)
+{
+ return test_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags);
+}
+
/*
* ntfs_up_cluster - Align up on cluster boundary.
*/
@@ -1025,19 +1035,6 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size)
return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
}
-static inline struct buffer_head *ntfs_bread(struct super_block *sb,
- sector_t block)
-{
- struct buffer_head *bh = sb_bread(sb, block);
-
- if (bh)
- return bh;
-
- ntfs_err(sb, "failed to read volume at offset 0x%llx",
- (u64)block << sb->s_blocksize_bits);
- return NULL;
-}
-
static inline struct ntfs_inode *ntfs_i(struct inode *inode)
{
return container_of(inode, struct ntfs_inode, vfs_inode);
diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c
index 53629b1f65e9..6aa3a9d44df1 100644
--- a/fs/ntfs3/record.c
+++ b/fs/ntfs3/record.c
@@ -279,7 +279,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
if (t16 > asize)
return NULL;
- if (t16 + le32_to_cpu(attr->res.data_size) > asize)
+ if (le32_to_cpu(attr->res.data_size) > asize - t16)
return NULL;
t32 = sizeof(short) * attr->name_len;
@@ -535,8 +535,20 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi,
return false;
if (ni && is_attr_indexed(attr)) {
- le16_add_cpu(&ni->mi.mrec->hard_links, -1);
- ni->mi.dirty = true;
+ u16 links = le16_to_cpu(ni->mi.mrec->hard_links);
+ struct ATTR_FILE_NAME *fname =
+ attr->type != ATTR_NAME ?
+ NULL :
+ resident_data_ex(attr,
+ SIZEOF_ATTRIBUTE_FILENAME);
+ if (fname && fname->type == FILE_NAME_DOS) {
+ /* Do not decrease links count deleting DOS name. */
+ } else if (!links) {
+ /* minor error. Not critical. */
+ } else {
+ ni->mi.mrec->hard_links = cpu_to_le16(links - 1);
+ ni->mi.dirty = true;
+ }
}
used -= asize;
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 9153dffde950..9df7c20d066f 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -122,13 +122,12 @@ void ntfs_inode_printk(struct inode *inode, const char *fmt, ...)
if (name) {
struct dentry *de = d_find_alias(inode);
- const u32 name_len = ARRAY_SIZE(s_name_buf) - 1;
if (de) {
spin_lock(&de->d_lock);
- snprintf(name, name_len, " \"%s\"", de->d_name.name);
+ snprintf(name, sizeof(s_name_buf), " \"%s\"",
+ de->d_name.name);
spin_unlock(&de->d_lock);
- name[name_len] = 0; /* To be sure. */
} else {
name[0] = 0;
}
@@ -625,7 +624,7 @@ static void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
{
kfree(sbi->new_rec);
kvfree(ntfs_put_shared(sbi->upcase));
- kfree(sbi->def_table);
+ kvfree(sbi->def_table);
kfree(sbi->compress.lznt);
#ifdef CONFIG_NTFS3_LZX_XPRESS
xpress_free_decompressor(sbi->compress.xpress);
@@ -715,6 +714,14 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root)
}
/*
+ * ntfs_shutdown - super_operations::shutdown
+ */
+static void ntfs_shutdown(struct super_block *sb)
+{
+ set_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags);
+}
+
+/*
* ntfs_sync_fs - super_operations::sync_fs
*/
static int ntfs_sync_fs(struct super_block *sb, int wait)
@@ -724,6 +731,9 @@ static int ntfs_sync_fs(struct super_block *sb, int wait)
struct ntfs_inode *ni;
struct inode *inode;
+ if (unlikely(ntfs3_forced_shutdown(sb)))
+ return -EIO;
+
ni = sbi->security.ni;
if (ni) {
inode = &ni->vfs_inode;
@@ -763,6 +773,7 @@ static const struct super_operations ntfs_sops = {
.put_super = ntfs_put_super,
.statfs = ntfs_statfs,
.show_options = ntfs_show_options,
+ .shutdown = ntfs_shutdown,
.sync_fs = ntfs_sync_fs,
.write_inode = ntfs3_write_inode,
};
@@ -866,6 +877,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
u16 fn, ao;
u8 cluster_bits;
u32 boot_off = 0;
+ sector_t boot_block = 0;
const char *hint = "Primary boot";
/* Save original dev_size. Used with alternative boot. */
@@ -873,11 +885,11 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
sbi->volume.blocks = dev_size >> PAGE_SHIFT;
- bh = ntfs_bread(sb, 0);
+read_boot:
+ bh = ntfs_bread(sb, boot_block);
if (!bh)
- return -EIO;
+ return boot_block ? -EINVAL : -EIO;
-check_boot:
err = -EINVAL;
/* Corrupted image; do not read OOB */
@@ -1108,26 +1120,24 @@ check_boot:
}
out:
- if (err == -EINVAL && !bh->b_blocknr && dev_size0 > PAGE_SHIFT) {
+ brelse(bh);
+
+ if (err == -EINVAL && !boot_block && dev_size0 > PAGE_SHIFT) {
u32 block_size = min_t(u32, sector_size, PAGE_SIZE);
u64 lbo = dev_size0 - sizeof(*boot);
- /*
- * Try alternative boot (last sector)
- */
- brelse(bh);
-
- sb_set_blocksize(sb, block_size);
- bh = ntfs_bread(sb, lbo >> blksize_bits(block_size));
- if (!bh)
- return -EINVAL;
-
+ boot_block = lbo >> blksize_bits(block_size);
boot_off = lbo & (block_size - 1);
- hint = "Alternative boot";
- dev_size = dev_size0; /* restore original size. */
- goto check_boot;
+ if (boot_block && block_size >= boot_off + sizeof(*boot)) {
+ /*
+ * Try alternative boot (last sector)
+ */
+ sb_set_blocksize(sb, block_size);
+ hint = "Alternative boot";
+ dev_size = dev_size0; /* restore original size. */
+ goto read_boot;
+ }
}
- brelse(bh);
return err;
}
@@ -1815,7 +1825,7 @@ static int __init init_ntfs_fs(void)
ntfs_inode_cachep = kmem_cache_create(
"ntfs_inode_cache", sizeof(struct ntfs_inode), 0,
- (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT),
+ (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT),
init_once);
if (!ntfs_inode_cachep) {
err = -ENOMEM;
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 4274b6f31cfa..53e7d1fa036a 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -219,6 +219,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
if (!ea->name_len)
break;
+ if (ea->name_len > ea_size)
+ break;
+
if (buffer) {
/* Check if we can use field ea->name */
if (off + ea_size > size)
@@ -744,6 +747,9 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de,
int err;
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
/* Dispatch request. */
if (!strcmp(name, SYSTEM_DOS_ATTRIB)) {
/* system.dos_attrib */
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 4d7efefa98c5..1bde1281d514 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -213,7 +213,7 @@ struct o2hb_region {
unsigned int hr_num_pages;
struct page **hr_slot_data;
- struct bdev_handle *hr_bdev_handle;
+ struct file *hr_bdev_file;
struct o2hb_disk_slot *hr_slots;
/* live node map of this region */
@@ -263,7 +263,7 @@ struct o2hb_region {
static inline struct block_device *reg_bdev(struct o2hb_region *reg)
{
- return reg->hr_bdev_handle ? reg->hr_bdev_handle->bdev : NULL;
+ return reg->hr_bdev_file ? file_bdev(reg->hr_bdev_file) : NULL;
}
struct o2hb_bio_wait_ctxt {
@@ -1509,8 +1509,8 @@ static void o2hb_region_release(struct config_item *item)
kfree(reg->hr_slot_data);
}
- if (reg->hr_bdev_handle)
- bdev_release(reg->hr_bdev_handle);
+ if (reg->hr_bdev_file)
+ fput(reg->hr_bdev_file);
kfree(reg->hr_slots);
@@ -1569,7 +1569,7 @@ static ssize_t o2hb_region_block_bytes_store(struct config_item *item,
unsigned long block_bytes;
unsigned int block_bits;
- if (reg->hr_bdev_handle)
+ if (reg->hr_bdev_file)
return -EINVAL;
status = o2hb_read_block_input(reg, page, &block_bytes,
@@ -1598,7 +1598,7 @@ static ssize_t o2hb_region_start_block_store(struct config_item *item,
char *p = (char *)page;
ssize_t ret;
- if (reg->hr_bdev_handle)
+ if (reg->hr_bdev_file)
return -EINVAL;
ret = kstrtoull(p, 0, &tmp);
@@ -1623,7 +1623,7 @@ static ssize_t o2hb_region_blocks_store(struct config_item *item,
unsigned long tmp;
char *p = (char *)page;
- if (reg->hr_bdev_handle)
+ if (reg->hr_bdev_file)
return -EINVAL;
tmp = simple_strtoul(p, &p, 0);
@@ -1642,7 +1642,7 @@ static ssize_t o2hb_region_dev_show(struct config_item *item, char *page)
{
unsigned int ret = 0;
- if (to_o2hb_region(item)->hr_bdev_handle)
+ if (to_o2hb_region(item)->hr_bdev_file)
ret = sprintf(page, "%pg\n", reg_bdev(to_o2hb_region(item)));
return ret;
@@ -1753,7 +1753,7 @@ out:
}
/*
- * this is acting as commit; we set up all of hr_bdev_handle and hr_task or
+ * this is acting as commit; we set up all of hr_bdev_file and hr_task or
* nothing
*/
static ssize_t o2hb_region_dev_store(struct config_item *item,
@@ -1769,7 +1769,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
ssize_t ret = -EINVAL;
int live_threshold;
- if (reg->hr_bdev_handle)
+ if (reg->hr_bdev_file)
goto out;
/* We can't heartbeat without having had our node number
@@ -1795,11 +1795,11 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
if (!S_ISBLK(f.file->f_mapping->host->i_mode))
goto out2;
- reg->hr_bdev_handle = bdev_open_by_dev(f.file->f_mapping->host->i_rdev,
+ reg->hr_bdev_file = bdev_file_open_by_dev(f.file->f_mapping->host->i_rdev,
BLK_OPEN_WRITE | BLK_OPEN_READ, NULL, NULL);
- if (IS_ERR(reg->hr_bdev_handle)) {
- ret = PTR_ERR(reg->hr_bdev_handle);
- reg->hr_bdev_handle = NULL;
+ if (IS_ERR(reg->hr_bdev_file)) {
+ ret = PTR_ERR(reg->hr_bdev_file);
+ reg->hr_bdev_file = NULL;
goto out2;
}
@@ -1903,8 +1903,8 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
out3:
if (ret < 0) {
- bdev_release(reg->hr_bdev_handle);
- reg->hr_bdev_handle = NULL;
+ fput(reg->hr_bdev_file);
+ reg->hr_bdev_file = NULL;
}
out2:
fdput(f);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 85215162c9dd..7fc0e920eda7 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -578,7 +578,7 @@ static int __init init_dlmfs_fs(void)
dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
sizeof(struct dlmfs_inode_private),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
dlmfs_init_once);
if (!dlmfs_inode_cache) {
status = -ENOMEM;
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index f37174e79fad..6de944818c56 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -27,7 +27,7 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode,
struct ocfs2_file_private *fp = file->private_data;
struct ocfs2_lock_res *lockres = &fp->fp_flock;
- if (fl->fl_type == F_WRLCK)
+ if (lock_is_write(fl))
level = 1;
if (!IS_SETLKW(cmd))
trylock = 1;
@@ -53,8 +53,8 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode,
*/
locks_init_lock(&request);
- request.fl_type = F_UNLCK;
- request.fl_flags = FL_FLOCK;
+ request.c.flc_type = F_UNLCK;
+ request.c.flc_flags = FL_FLOCK;
locks_lock_file_wait(file, &request);
ocfs2_file_unlock(file);
@@ -100,14 +100,14 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
return -ENOLCK;
if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
ocfs2_mount_local(osb))
return locks_lock_file_wait(file, fl);
- if (fl->fl_type == F_UNLCK)
+ if (lock_is_unlock(fl))
return ocfs2_do_funlock(file, cmd, fl);
else
return ocfs2_do_flock(file, inode, cmd, fl);
@@ -118,7 +118,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- if (!(fl->fl_flags & FL_POSIX))
+ if (!(fl->c.flc_flags & FL_POSIX))
return -ENOLCK;
return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 9b76ee66aeb2..c11406cd87a8 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -744,7 +744,7 @@ static int user_plock(struct ocfs2_cluster_connection *conn,
return dlm_posix_cancel(conn->cc_lockspace, ino, file, fl);
else if (IS_GETLK(cmd))
return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
- else if (fl->fl_type == F_UNLCK)
+ else if (lock_is_unlock(fl))
return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
else
return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 6b906424902b..b3f860888e93 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1706,18 +1706,17 @@ static int ocfs2_initialize_mem_caches(void)
sizeof(struct ocfs2_inode_info),
0,
(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
ocfs2_inode_init_once);
ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
sizeof(struct ocfs2_dquot),
0,
- (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT),
NULL);
ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache",
sizeof(struct ocfs2_quota_chunk),
0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ (SLAB_RECLAIM_ACCOUNT),
NULL);
if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
!ocfs2_qf_chunk_cachep) {
@@ -2027,8 +2026,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
- memcpy(&sb->s_uuid, di->id2.i_super.s_uuid,
- sizeof(di->id2.i_super.s_uuid));
+ super_set_uuid(sb, di->id2.i_super.s_uuid,
+ sizeof(di->id2.i_super.s_uuid));
osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
diff --git a/fs/open.c b/fs/open.c
index a84d21e55c39..ee8460c83c77 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -29,7 +29,6 @@
#include <linux/audit.h>
#include <linux/falloc.h>
#include <linux/fs_struct.h>
-#include <linux/ima.h>
#include <linux/dnotify.h>
#include <linux/compat.h>
#include <linux/mnt_idmapping.h>
@@ -154,49 +153,52 @@ COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length
}
#endif
-long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+long do_ftruncate(struct file *file, loff_t length, int small)
{
struct inode *inode;
struct dentry *dentry;
- struct fd f;
int error;
- error = -EINVAL;
- if (length < 0)
- goto out;
- error = -EBADF;
- f = fdget(fd);
- if (!f.file)
- goto out;
-
/* explicitly opened as large or we are on 64-bit box */
- if (f.file->f_flags & O_LARGEFILE)
+ if (file->f_flags & O_LARGEFILE)
small = 0;
- dentry = f.file->f_path.dentry;
+ dentry = file->f_path.dentry;
inode = dentry->d_inode;
- error = -EINVAL;
- if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
- goto out_putf;
+ if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
+ return -EINVAL;
- error = -EINVAL;
/* Cannot ftruncate over 2^31 bytes without large file support */
if (small && length > MAX_NON_LFS)
- goto out_putf;
+ return -EINVAL;
- error = -EPERM;
/* Check IS_APPEND on real upper inode */
- if (IS_APPEND(file_inode(f.file)))
- goto out_putf;
+ if (IS_APPEND(file_inode(file)))
+ return -EPERM;
sb_start_write(inode->i_sb);
- error = security_file_truncate(f.file);
+ error = security_file_truncate(file);
if (!error)
- error = do_truncate(file_mnt_idmap(f.file), dentry, length,
- ATTR_MTIME | ATTR_CTIME, f.file);
+ error = do_truncate(file_mnt_idmap(file), dentry, length,
+ ATTR_MTIME | ATTR_CTIME, file);
sb_end_write(inode->i_sb);
-out_putf:
+
+ return error;
+}
+
+long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+{
+ struct fd f;
+ int error;
+
+ if (length < 0)
+ return -EINVAL;
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+
+ error = do_ftruncate(f.file, length, small);
+
fdput(f);
-out:
return error;
}
@@ -1364,7 +1366,7 @@ struct file *filp_open(const char *filename, int flags, umode_t mode)
{
struct filename *name = getname_kernel(filename);
struct file *file = ERR_CAST(name);
-
+
if (!IS_ERR(name)) {
file = file_open_name(name, flags, mode);
putname(name);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index c4b65a6d41cc..4a0779e3ef79 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -446,7 +446,7 @@ static int __init init_openprom_fs(void)
sizeof(struct op_inode_info),
0,
(SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD | SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
op_inode_init_once);
if (!op_inode_cachep)
return -ENOMEM;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index b8e25ca51016..8586e2f5d243 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -265,20 +265,18 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
if (IS_ERR(old_file))
return PTR_ERR(old_file);
+ /* Try to use clone_file_range to clone up within the same fs */
+ cloned = vfs_clone_file_range(old_file, 0, new_file, 0, len, 0);
+ if (cloned == len)
+ goto out_fput;
+
+ /* Couldn't clone, so now we try to copy the data */
error = rw_verify_area(READ, old_file, &old_pos, len);
if (!error)
error = rw_verify_area(WRITE, new_file, &new_pos, len);
if (error)
goto out_fput;
- /* Try to use clone_file_range to clone up within the same fs */
- ovl_start_write(dentry);
- cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
- ovl_end_write(dentry);
- if (cloned == len)
- goto out_fput;
- /* Couldn't clone, so now we try to copy the data */
-
/* Check if lower fs supports seek operation */
if (old_file->f_mode & FMODE_LSEEK)
skip_hole = true;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 984ffdaeed6c..5764f91d283e 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -18,10 +18,11 @@
struct ovl_lookup_data {
struct super_block *sb;
- struct vfsmount *mnt;
+ const struct ovl_layer *layer;
struct qstr name;
bool is_dir;
bool opaque;
+ bool xwhiteouts;
bool stop;
bool last;
char *redirect;
@@ -201,17 +202,13 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
return real;
}
-static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path)
-{
- return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE);
-}
-
static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
const char *name,
struct dentry *base, int len,
bool drop_negative)
{
- struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len);
+ struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), name,
+ base, len);
if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
if (drop_negative && ret->d_lockref.count == 1) {
@@ -232,10 +229,13 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
size_t prelen, const char *post,
struct dentry **ret, bool drop_negative)
{
+ struct ovl_fs *ofs = OVL_FS(d->sb);
struct dentry *this;
struct path path;
int err;
bool last_element = !post[0];
+ bool is_upper = d->layer->idx == 0;
+ char val;
this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
if (IS_ERR(this)) {
@@ -253,8 +253,8 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
}
path.dentry = this;
- path.mnt = d->mnt;
- if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) {
+ path.mnt = d->layer->mnt;
+ if (ovl_path_is_whiteout(ofs, &path)) {
d->stop = d->opaque = true;
goto put_and_out;
}
@@ -272,7 +272,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
d->stop = true;
goto put_and_out;
}
- err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path, NULL);
+ err = ovl_check_metacopy_xattr(ofs, &path, NULL);
if (err < 0)
goto out_err;
@@ -292,7 +292,12 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
if (d->last)
goto out;
- if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) {
+ /* overlay.opaque=x means xwhiteouts directory */
+ val = ovl_get_opaquedir_val(ofs, &path);
+ if (last_element && !is_upper && val == 'x') {
+ d->xwhiteouts = true;
+ ovl_layer_set_xwhiteouts(ofs, d->layer);
+ } else if (val == 'y') {
d->stop = true;
if (last_element)
d->opaque = true;
@@ -863,7 +868,8 @@ fail:
* Returns next layer in stack starting from top.
* Returns -1 if this is the last layer.
*/
-int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
+ const struct ovl_layer **layer)
{
struct ovl_entry *oe = OVL_E(dentry);
struct ovl_path *lowerstack = ovl_lowerstack(oe);
@@ -871,13 +877,16 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
BUG_ON(idx < 0);
if (idx == 0) {
ovl_path_upper(dentry, path);
- if (path->dentry)
+ if (path->dentry) {
+ *layer = &OVL_FS(dentry->d_sb)->layers[0];
return ovl_numlower(oe) ? 1 : -1;
+ }
idx++;
}
BUG_ON(idx > ovl_numlower(oe));
path->dentry = lowerstack[idx - 1].dentry;
- path->mnt = lowerstack[idx - 1].layer->mnt;
+ *layer = lowerstack[idx - 1].layer;
+ path->mnt = (*layer)->mnt;
return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
}
@@ -1055,7 +1064,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
old_cred = ovl_override_creds(dentry->d_sb);
upperdir = ovl_dentry_upper(dentry->d_parent);
if (upperdir) {
- d.mnt = ovl_upper_mnt(ofs);
+ d.layer = &ofs->layers[0];
err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
if (err)
goto out;
@@ -1111,7 +1120,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
else if (d.is_dir || !ofs->numdatalayer)
d.last = lower.layer->idx == ovl_numlower(roe);
- d.mnt = lower.layer->mnt;
+ d.layer = lower.layer;
err = ovl_lookup_layer(lower.dentry, &d, &this, false);
if (err)
goto out_put;
@@ -1278,6 +1287,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (upperopaque)
ovl_dentry_set_opaque(dentry);
+ if (d.xwhiteouts)
+ ovl_dentry_set_xwhiteouts(dentry);
if (upperdentry)
ovl_dentry_set_upper_alias(dentry);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 5ba11eb43767..ee949f3e7c77 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -50,7 +50,6 @@ enum ovl_xattr {
OVL_XATTR_METACOPY,
OVL_XATTR_PROTATTR,
OVL_XATTR_XWHITEOUT,
- OVL_XATTR_XWHITEOUTS,
};
enum ovl_inode_flag {
@@ -70,6 +69,8 @@ enum ovl_entry_flag {
OVL_E_UPPER_ALIAS,
OVL_E_OPAQUE,
OVL_E_CONNECTED,
+ /* Lower stack may contain xwhiteout entries */
+ OVL_E_XWHITEOUTS,
};
enum {
@@ -477,6 +478,10 @@ bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry);
bool ovl_dentry_is_opaque(struct dentry *dentry);
bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
+bool ovl_dentry_has_xwhiteouts(struct dentry *dentry);
+void ovl_dentry_set_xwhiteouts(struct dentry *dentry);
+void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs,
+ const struct ovl_layer *layer);
bool ovl_dentry_has_upper_alias(struct dentry *dentry);
void ovl_dentry_set_upper_alias(struct dentry *dentry);
bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
@@ -494,11 +499,10 @@ struct file *ovl_path_open(const struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry, int flags);
void ovl_copy_up_end(struct dentry *dentry);
bool ovl_already_copied_up(struct dentry *dentry, int flags);
-bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
- enum ovl_xattr ox);
+char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path,
+ enum ovl_xattr ox);
bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path);
-bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path);
bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
const struct path *upperpath);
@@ -573,7 +577,13 @@ static inline bool ovl_is_impuredir(struct super_block *sb,
.mnt = ovl_upper_mnt(ofs),
};
- return ovl_path_check_dir_xattr(ofs, &upperpath, OVL_XATTR_IMPURE);
+ return ovl_get_dir_xattr_val(ofs, &upperpath, OVL_XATTR_IMPURE) == 'y';
+}
+
+static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs,
+ const struct path *path)
+{
+ return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE);
}
static inline bool ovl_redirect_follow(struct ovl_fs *ofs)
@@ -680,7 +690,8 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool verify);
-int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
+ const struct ovl_layer **layer);
int ovl_verify_lowerdata(struct dentry *dentry);
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 5fa9c58af65f..cb449ab310a7 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -40,6 +40,8 @@ struct ovl_layer {
int idx;
/* One fsid per unique underlying sb (upper fsid == 0) */
int fsid;
+ /* xwhiteouts were found on this layer */
+ bool has_xwhiteouts;
};
struct ovl_path {
@@ -59,7 +61,7 @@ struct ovl_fs {
unsigned int numfs;
/* Number of data-only lower layers */
unsigned int numdatalayer;
- const struct ovl_layer *layers;
+ struct ovl_layer *layers;
struct ovl_sb *fs;
/* workbasedir is the path at workdir= mount option */
struct dentry *workbasedir;
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index 112b4b12f825..36dcc530ac28 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -280,12 +280,20 @@ static int ovl_mount_dir_check(struct fs_context *fc, const struct path *path,
{
struct ovl_fs_context *ctx = fc->fs_private;
- if (ovl_dentry_weird(path->dentry))
- return invalfc(fc, "filesystem on %s not supported", name);
-
if (!d_is_dir(path->dentry))
return invalfc(fc, "%s is not a directory", name);
+ /*
+ * Root dentries of case-insensitive capable filesystems might
+ * not have the dentry operations set, but still be incompatible
+ * with overlayfs. Check explicitly to prevent post-mount
+ * failures.
+ */
+ if (sb_has_encoding(path->mnt->mnt_sb))
+ return invalfc(fc, "case-insensitive capable filesystem on %s not supported", name);
+
+ if (ovl_dentry_weird(path->dentry))
+ return invalfc(fc, "filesystem on %s not supported", name);
/*
* Check whether upper path is read-only here to report failures
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index e71156baa7bc..0ca8af060b0c 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -305,8 +305,6 @@ static inline int ovl_dir_read(const struct path *realpath,
if (IS_ERR(realfile))
return PTR_ERR(realfile);
- rdd->in_xwhiteouts_dir = rdd->dentry &&
- ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
rdd->first_maybe_whiteout = NULL;
rdd->ctx.pos = 0;
do {
@@ -359,10 +357,13 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
.is_lowest = false,
};
int idx, next;
+ const struct ovl_layer *layer;
for (idx = 0; idx != -1; idx = next) {
- next = ovl_path_next(idx, dentry, &realpath);
+ next = ovl_path_next(idx, dentry, &realpath, &layer);
rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
+ rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
+ ovl_dentry_has_xwhiteouts(dentry);
if (next != -1) {
err = ovl_dir_read(&realpath, &rdd);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4ab66e3d4cff..a40fc7e05525 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -28,41 +28,38 @@ MODULE_LICENSE("GPL");
struct ovl_dir_cache;
-static struct dentry *ovl_d_real(struct dentry *dentry,
- const struct inode *inode)
+static struct dentry *ovl_d_real(struct dentry *dentry, enum d_real_type type)
{
- struct dentry *real = NULL, *lower;
+ struct dentry *upper, *lower;
int err;
- /*
- * vfs is only expected to call d_real() with NULL from d_real_inode()
- * and with overlay inode from file_dentry() on an overlay file.
- *
- * TODO: remove @inode argument from d_real() API, remove code in this
- * function that deals with non-NULL @inode and remove d_real() call
- * from file_dentry().
- */
- if (inode && d_inode(dentry) == inode)
- return dentry;
- else if (inode)
+ switch (type) {
+ case D_REAL_DATA:
+ case D_REAL_METADATA:
+ break;
+ default:
goto bug;
+ }
if (!d_is_reg(dentry)) {
/* d_real_inode() is only relevant for regular files */
return dentry;
}
- real = ovl_dentry_upper(dentry);
- if (real && (inode == d_inode(real)))
- return real;
+ upper = ovl_dentry_upper(dentry);
+ if (upper && (type == D_REAL_METADATA ||
+ ovl_has_upperdata(d_inode(dentry))))
+ return upper;
- if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
- return real;
+ if (type == D_REAL_METADATA) {
+ lower = ovl_dentry_lower(dentry);
+ goto real_lower;
+ }
/*
- * Best effort lazy lookup of lowerdata for !inode case to return
+ * Best effort lazy lookup of lowerdata for D_REAL_DATA case to return
* the real lowerdata dentry. The only current caller of d_real() with
- * NULL inode is d_real_inode() from trace_uprobe and this caller is
+ * D_REAL_DATA is d_real_inode() from trace_uprobe and this caller is
* likely going to be followed reading from the file, before placing
* uprobes on offset within the file, so lowerdata should be available
* when setting the uprobe.
@@ -73,18 +70,13 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
lower = ovl_dentry_lowerdata(dentry);
if (!lower)
goto bug;
- real = lower;
- /* Handle recursion */
- real = d_real(real, inode);
+real_lower:
+ /* Handle recursion into stacked lower fs */
+ return d_real(lower, type);
- if (!inode || inode == d_inode(real))
- return real;
bug:
- WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
- __func__, dentry, inode ? inode->i_sb->s_id : "NULL",
- inode ? inode->i_ino : 0, real,
- real && d_inode(real) ? d_inode(real)->i_ino : 0);
+ WARN(1, "%s(%pd4, %d): real dentry not found\n", __func__, dentry, type);
return dentry;
}
@@ -1249,6 +1241,7 @@ static struct dentry *ovl_get_root(struct super_block *sb,
struct ovl_entry *oe)
{
struct dentry *root;
+ struct ovl_fs *ofs = OVL_FS(sb);
struct ovl_path *lowerpath = ovl_lowerstack(oe);
unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
int fsid = lowerpath->layer->fsid;
@@ -1270,6 +1263,20 @@ static struct dentry *ovl_get_root(struct super_block *sb,
ovl_set_flag(OVL_IMPURE, d_inode(root));
}
+ /* Look for xwhiteouts marker except in the lowermost layer */
+ for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) {
+ struct path path = {
+ .mnt = lowerpath->layer->mnt,
+ .dentry = lowerpath->dentry,
+ };
+
+ /* overlay.opaque=x means xwhiteouts directory */
+ if (ovl_get_opaquedir_val(ofs, &path) == 'x') {
+ ovl_layer_set_xwhiteouts(ofs, lowerpath->layer);
+ ovl_dentry_set_xwhiteouts(root);
+ }
+ }
+
/* Root is always merge -> can have whiteouts */
ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
ovl_dentry_set_flag(OVL_E_CONNECTED, root);
@@ -1496,7 +1503,7 @@ static int __init ovl_init(void)
ovl_inode_cachep = kmem_cache_create("ovl_inode",
sizeof(struct ovl_inode), 0,
(SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
ovl_inode_init_once);
if (ovl_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 0217094c23ea..d285d1d7baad 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -461,6 +461,33 @@ void ovl_dentry_set_opaque(struct dentry *dentry)
ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
}
+bool ovl_dentry_has_xwhiteouts(struct dentry *dentry)
+{
+ return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry);
+}
+
+void ovl_dentry_set_xwhiteouts(struct dentry *dentry)
+{
+ ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry);
+}
+
+/*
+ * ovl_layer_set_xwhiteouts() is called before adding the overlay dir
+ * dentry to dcache, while readdir of that same directory happens after
+ * the overlay dir dentry is in dcache, so if some cpu observes that
+ * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts
+ * for the layers where xwhiteouts marker was found in that merge dir.
+ */
+void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs,
+ const struct ovl_layer *layer)
+{
+ if (layer->has_xwhiteouts)
+ return;
+
+ /* Write once to read-mostly layer properties */
+ ofs->layers[layer->idx].has_xwhiteouts = true;
+}
+
/*
* For hard links and decoded file handles, it's possible for ovl_dentry_upper()
* to return positive, while there's no actual upper alias for the inode.
@@ -739,19 +766,6 @@ bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
return res >= 0;
}
-bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path)
-{
- struct dentry *dentry = path->dentry;
- int res;
-
- /* xattr.whiteouts must be a directory */
- if (!d_is_dir(dentry))
- return false;
-
- res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUTS, NULL, 0);
- return res >= 0;
-}
-
/*
* Load persistent uuid from xattr into s_uuid if found, or store a new
* random generated value in s_uuid and in xattr.
@@ -760,13 +774,14 @@ bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
const struct path *upperpath)
{
bool set = false;
+ uuid_t uuid;
int res;
/* Try to load existing persistent uuid */
- res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_UUID, sb->s_uuid.b,
+ res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_UUID, uuid.b,
UUID_SIZE);
if (res == UUID_SIZE)
- return true;
+ goto set_uuid;
if (res != -ENODATA)
goto fail;
@@ -794,37 +809,37 @@ bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
}
/* Generate overlay instance uuid */
- uuid_gen(&sb->s_uuid);
+ uuid_gen(&uuid);
/* Try to store persistent uuid */
set = true;
- res = ovl_setxattr(ofs, upperpath->dentry, OVL_XATTR_UUID, sb->s_uuid.b,
+ res = ovl_setxattr(ofs, upperpath->dentry, OVL_XATTR_UUID, uuid.b,
UUID_SIZE);
- if (res == 0)
- return true;
+ if (res)
+ goto fail;
+
+set_uuid:
+ super_set_uuid(sb, uuid.b, sizeof(uuid));
+ return true;
fail:
- memset(sb->s_uuid.b, 0, UUID_SIZE);
ofs->config.uuid = OVL_UUID_NULL;
pr_warn("failed to %s uuid (%pd2, err=%i); falling back to uuid=null.\n",
set ? "set" : "get", upperpath->dentry, res);
return false;
}
-bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
- enum ovl_xattr ox)
+char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path,
+ enum ovl_xattr ox)
{
int res;
char val;
if (!d_is_dir(path->dentry))
- return false;
+ return 0;
res = ovl_path_getxattr(ofs, path, ox, &val, 1);
- if (res == 1 && val == 'y')
- return true;
-
- return false;
+ return res == 1 ? val : 0;
}
#define OVL_XATTR_OPAQUE_POSTFIX "opaque"
@@ -837,7 +852,6 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
#define OVL_XATTR_METACOPY_POSTFIX "metacopy"
#define OVL_XATTR_PROTATTR_POSTFIX "protattr"
#define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout"
-#define OVL_XATTR_XWHITEOUTS_POSTFIX "whiteouts"
#define OVL_XATTR_TAB_ENTRY(x) \
[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
@@ -854,7 +868,6 @@ const char *const ovl_xattr_table[][2] = {
OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT),
- OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUTS),
};
int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
diff --git a/fs/pidfs.c b/fs/pidfs.c
new file mode 100644
index 000000000000..8fd71a00be9c
--- /dev/null
+++ b/fs/pidfs.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/magic.h>
+#include <linux/mount.h>
+#include <linux/pid.h>
+#include <linux/pidfs.h>
+#include <linux/pid_namespace.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/proc_ns.h>
+#include <linux/pseudo_fs.h>
+#include <linux/seq_file.h>
+#include <uapi/linux/pidfd.h>
+
+#include "internal.h"
+
+static int pidfd_release(struct inode *inode, struct file *file)
+{
+#ifndef CONFIG_FS_PID
+ struct pid *pid = file->private_data;
+
+ file->private_data = NULL;
+ put_pid(pid);
+#endif
+ return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+/**
+ * pidfd_show_fdinfo - print information about a pidfd
+ * @m: proc fdinfo file
+ * @f: file referencing a pidfd
+ *
+ * Pid:
+ * This function will print the pid that a given pidfd refers to in the
+ * pid namespace of the procfs instance.
+ * If the pid namespace of the process is not a descendant of the pid
+ * namespace of the procfs instance 0 will be shown as its pid. This is
+ * similar to calling getppid() on a process whose parent is outside of
+ * its pid namespace.
+ *
+ * NSpid:
+ * If pid namespaces are supported then this function will also print
+ * the pid of a given pidfd refers to for all descendant pid namespaces
+ * starting from the current pid namespace of the instance, i.e. the
+ * Pid field and the first entry in the NSpid field will be identical.
+ * If the pid namespace of the process is not a descendant of the pid
+ * namespace of the procfs instance 0 will be shown as its first NSpid
+ * entry and no others will be shown.
+ * Note that this differs from the Pid and NSpid fields in
+ * /proc/<pid>/status where Pid and NSpid are always shown relative to
+ * the pid namespace of the procfs instance. The difference becomes
+ * obvious when sending around a pidfd between pid namespaces from a
+ * different branch of the tree, i.e. where no ancestral relation is
+ * present between the pid namespaces:
+ * - create two new pid namespaces ns1 and ns2 in the initial pid
+ * namespace (also take care to create new mount namespaces in the
+ * new pid namespace and mount procfs)
+ * - create a process with a pidfd in ns1
+ * - send pidfd from ns1 to ns2
+ * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid
+ * have exactly one entry, which is 0
+ */
+static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+ struct pid *pid = pidfd_pid(f);
+ struct pid_namespace *ns;
+ pid_t nr = -1;
+
+ if (likely(pid_has_task(pid, PIDTYPE_PID))) {
+ ns = proc_pid_ns(file_inode(m->file)->i_sb);
+ nr = pid_nr_ns(pid, ns);
+ }
+
+ seq_put_decimal_ll(m, "Pid:\t", nr);
+
+#ifdef CONFIG_PID_NS
+ seq_put_decimal_ll(m, "\nNSpid:\t", nr);
+ if (nr > 0) {
+ int i;
+
+ /* If nr is non-zero it means that 'pid' is valid and that
+ * ns, i.e. the pid namespace associated with the procfs
+ * instance, is in the pid namespace hierarchy of pid.
+ * Start at one below the already printed level.
+ */
+ for (i = ns->level + 1; i <= pid->level; i++)
+ seq_put_decimal_ll(m, "\t", pid->numbers[i].nr);
+ }
+#endif
+ seq_putc(m, '\n');
+}
+#endif
+
+/*
+ * Poll support for process exit notification.
+ */
+static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
+{
+ struct pid *pid = pidfd_pid(file);
+ bool thread = file->f_flags & PIDFD_THREAD;
+ struct task_struct *task;
+ __poll_t poll_flags = 0;
+
+ poll_wait(file, &pid->wait_pidfd, pts);
+ /*
+ * Depending on PIDFD_THREAD, inform pollers when the thread
+ * or the whole thread-group exits.
+ */
+ guard(rcu)();
+ task = pid_task(pid, PIDTYPE_PID);
+ if (!task)
+ poll_flags = EPOLLIN | EPOLLRDNORM | EPOLLHUP;
+ else if (task->exit_state && (thread || thread_group_empty(task)))
+ poll_flags = EPOLLIN | EPOLLRDNORM;
+
+ return poll_flags;
+}
+
+static const struct file_operations pidfs_file_operations = {
+ .release = pidfd_release,
+ .poll = pidfd_poll,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = pidfd_show_fdinfo,
+#endif
+};
+
+struct pid *pidfd_pid(const struct file *file)
+{
+ if (file->f_op != &pidfs_file_operations)
+ return ERR_PTR(-EBADF);
+#ifdef CONFIG_FS_PID
+ return file_inode(file)->i_private;
+#else
+ return file->private_data;
+#endif
+}
+
+#ifdef CONFIG_FS_PID
+static struct vfsmount *pidfs_mnt __ro_after_init;
+
+/*
+ * The vfs falls back to simple_setattr() if i_op->setattr() isn't
+ * implemented. Let's reject it completely until we have a clean
+ * permission concept for pidfds.
+ */
+static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
+{
+ return -EOPNOTSUPP;
+}
+
+static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
+ return 0;
+}
+
+static const struct inode_operations pidfs_inode_operations = {
+ .getattr = pidfs_getattr,
+ .setattr = pidfs_setattr,
+};
+
+static void pidfs_evict_inode(struct inode *inode)
+{
+ struct pid *pid = inode->i_private;
+
+ clear_inode(inode);
+ put_pid(pid);
+}
+
+static const struct super_operations pidfs_sops = {
+ .drop_inode = generic_delete_inode,
+ .evict_inode = pidfs_evict_inode,
+ .statfs = simple_statfs,
+};
+
+static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ return dynamic_dname(buffer, buflen, "pidfd:[%lu]",
+ d_inode(dentry)->i_ino);
+}
+
+static const struct dentry_operations pidfs_dentry_operations = {
+ .d_delete = always_delete_dentry,
+ .d_dname = pidfs_dname,
+ .d_prune = stashed_dentry_prune,
+};
+
+static void pidfs_init_inode(struct inode *inode, void *data)
+{
+ inode->i_private = data;
+ inode->i_flags |= S_PRIVATE;
+ inode->i_mode |= S_IRWXU;
+ inode->i_op = &pidfs_inode_operations;
+ inode->i_fop = &pidfs_file_operations;
+}
+
+static void pidfs_put_data(void *data)
+{
+ struct pid *pid = data;
+ put_pid(pid);
+}
+
+static const struct stashed_operations pidfs_stashed_ops = {
+ .init_inode = pidfs_init_inode,
+ .put_data = pidfs_put_data,
+};
+
+static int pidfs_init_fs_context(struct fs_context *fc)
+{
+ struct pseudo_fs_context *ctx;
+
+ ctx = init_pseudo(fc, PID_FS_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->ops = &pidfs_sops;
+ ctx->dops = &pidfs_dentry_operations;
+ fc->s_fs_info = (void *)&pidfs_stashed_ops;
+ return 0;
+}
+
+static struct file_system_type pidfs_type = {
+ .name = "pidfs",
+ .init_fs_context = pidfs_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
+{
+
+ struct file *pidfd_file;
+ struct path path;
+ int ret;
+
+ /*
+ * Inode numbering for pidfs start at RESERVED_PIDS + 1.
+ * This avoids collisions with the root inode which is 1
+ * for pseudo filesystems.
+ */
+ ret = path_from_stashed(&pid->stashed, pid->ino, pidfs_mnt,
+ get_pid(pid), &path);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ pidfd_file = dentry_open(&path, flags, current_cred());
+ path_put(&path);
+ return pidfd_file;
+}
+
+void __init pidfs_init(void)
+{
+ pidfs_mnt = kern_mount(&pidfs_type);
+ if (IS_ERR(pidfs_mnt))
+ panic("Failed to mount pidfs pseudo filesystem");
+}
+
+bool is_pidfs_sb(const struct super_block *sb)
+{
+ return sb == pidfs_mnt->mnt_sb;
+}
+
+#else /* !CONFIG_FS_PID */
+
+struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
+{
+ struct file *pidfd_file;
+
+ pidfd_file = anon_inode_getfile("[pidfd]", &pidfs_file_operations, pid,
+ flags | O_RDWR);
+ if (IS_ERR(pidfd_file))
+ return pidfd_file;
+
+ get_pid(pid);
+ return pidfd_file;
+}
+
+void __init pidfs_init(void) { }
+bool is_pidfs_sb(const struct super_block *sb)
+{
+ return false;
+}
+#endif
diff --git a/fs/pipe.c b/fs/pipe.c
index f1adbfe743d4..50c8a8596b52 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
* -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
*/
-static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+#define cmp_int(l, r) ((l > r) - (l < r))
+
+#ifdef CONFIG_PROVE_LOCKING
+static int pipe_lock_cmp_fn(const struct lockdep_map *a,
+ const struct lockdep_map *b)
{
- if (pipe->files)
- mutex_lock_nested(&pipe->mutex, subclass);
+ return cmp_int((unsigned long) a, (unsigned long) b);
}
+#endif
void pipe_lock(struct pipe_inode_info *pipe)
{
- /*
- * pipe_lock() nests non-pipe inode locks (for writing to a file)
- */
- pipe_lock_nested(pipe, I_MUTEX_PARENT);
+ if (pipe->files)
+ mutex_lock(&pipe->mutex);
}
EXPORT_SYMBOL(pipe_lock);
@@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
}
EXPORT_SYMBOL(pipe_unlock);
-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
- mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
- mutex_unlock(&pipe->mutex);
-}
-
void pipe_double_lock(struct pipe_inode_info *pipe1,
struct pipe_inode_info *pipe2)
{
BUG_ON(pipe1 == pipe2);
- if (pipe1 < pipe2) {
- pipe_lock_nested(pipe1, I_MUTEX_PARENT);
- pipe_lock_nested(pipe2, I_MUTEX_CHILD);
- } else {
- pipe_lock_nested(pipe2, I_MUTEX_PARENT);
- pipe_lock_nested(pipe1, I_MUTEX_CHILD);
- }
+ if (pipe1 > pipe2)
+ swap(pipe1, pipe2);
+
+ pipe_lock(pipe1);
+ pipe_lock(pipe2);
}
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
@@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
return 0;
ret = 0;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
/*
* We only wake up writers if the pipe was full when we started
@@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
ret = -EAGAIN;
break;
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
/*
* We only get here if we didn't actually read anything.
@@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
return -ERESTARTSYS;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
wake_next_reader = true;
}
if (pipe_empty(pipe->head, pipe->tail))
wake_next_reader = false;
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
if (was_full)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
@@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
if (unlikely(total_len == 0))
return 0;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
@@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* after waiting we need to re-check whether the pipe
* become empty while we dropped the lock.
*/
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
if (was_empty)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
was_empty = pipe_empty(pipe->head, pipe->tail);
wake_next_writer = true;
}
out:
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
wake_next_writer = false;
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
/*
* If we do do a wakeup event, we do a 'sync' wakeup, because we
@@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case FIONREAD:
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
count = 0;
head = pipe->head;
tail = pipe->tail;
@@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
count += pipe->bufs[tail & mask].len;
tail++;
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return put_user(count, (int __user *)arg);
#ifdef CONFIG_WATCH_QUEUE
case IOC_WATCH_QUEUE_SET_SIZE: {
int ret;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
ret = watch_queue_set_size(pipe, arg);
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return ret;
}
@@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
{
struct pipe_inode_info *pipe = file->private_data;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
if (file->f_mode & FMODE_READ)
pipe->readers--;
if (file->f_mode & FMODE_WRITE)
@@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
put_pipe_info(inode, pipe);
return 0;
@@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
struct pipe_inode_info *pipe = filp->private_data;
int retval = 0;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
if (filp->f_mode & FMODE_READ)
retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
/* this can happen only if on == T */
fasync_helper(-1, filp, 0, &pipe->fasync_readers);
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return retval;
}
@@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
pipe->nr_accounted = pipe_bufs;
pipe->user = user;
mutex_init(&pipe->mutex);
+ lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
return pipe;
}
@@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
filp->private_data = pipe;
/* OK, we have a pipe and it's pinned down */
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
/* We can only do regular read/write on fifos */
stream_open(inode, filp);
@@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
}
/* Ok! */
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return 0;
err_rd:
@@ -1230,7 +1221,7 @@ err_wr:
goto err;
err:
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
put_pipe_info(inode, pipe);
return ret;
@@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
if (!pipe)
return -EBADF;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
switch (cmd) {
case F_SETPIPE_SZ:
@@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
break;
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return ret;
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index e1af20893ebe..3f87297dbfdb 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -26,7 +26,6 @@
#include <linux/mnt_idmapping.h>
#include <linux/iversion.h>
#include <linux/security.h>
-#include <linux/evm.h>
#include <linux/fsnotify.h>
#include <linux/filelock.h>
@@ -786,12 +785,12 @@ struct posix_acl *posix_acl_from_xattr(struct user_namespace *userns,
return ERR_PTR(count);
if (count == 0)
return NULL;
-
+
acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);
acl_e = acl->a_entries;
-
+
for (end = entry + count; entry != end; acl_e++, entry++) {
acl_e->e_tag = le16_to_cpu(entry->e_tag);
acl_e->e_perm = le16_to_cpu(entry->e_perm);
@@ -1137,7 +1136,7 @@ retry_deleg:
error = -EIO;
if (!error) {
fsnotify_xattr(dentry);
- evm_inode_post_set_acl(dentry, acl_name, kacl);
+ security_inode_post_set_acl(dentry, acl_name, kacl);
}
out_inode_unlock:
@@ -1245,7 +1244,7 @@ retry_deleg:
error = -EIO;
if (!error) {
fsnotify_xattr(dentry);
- evm_inode_post_remove_acl(idmap, dentry, acl_name);
+ security_inode_post_remove_acl(idmap, dentry, acl_name);
}
out_inode_unlock:
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ff08a8957552..34a47fb0c57f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -477,13 +477,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
int permitted;
struct mm_struct *mm;
unsigned long long start_time;
- unsigned long cmin_flt = 0, cmaj_flt = 0;
- unsigned long min_flt = 0, maj_flt = 0;
- u64 cutime, cstime, utime, stime;
- u64 cgtime, gtime;
+ unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt;
+ u64 cutime, cstime, cgtime, utime, stime, gtime;
unsigned long rsslim = 0;
unsigned long flags;
int exit_code = task->exit_code;
+ struct signal_struct *sig = task->signal;
+ unsigned int seq = 1;
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -511,12 +511,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
sigemptyset(&sigign);
sigemptyset(&sigcatch);
- cutime = cstime = utime = stime = 0;
- cgtime = gtime = 0;
if (lock_task_sighand(task, &flags)) {
- struct signal_struct *sig = task->signal;
-
if (sig->tty) {
struct pid *pgrp = tty_get_pgrp(sig->tty);
tty_pgrp = pid_nr_ns(pgrp, ns);
@@ -527,28 +523,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
num_threads = get_nr_threads(task);
collect_sigign_sigcatch(task, &sigign, &sigcatch);
- cmin_flt = sig->cmin_flt;
- cmaj_flt = sig->cmaj_flt;
- cutime = sig->cutime;
- cstime = sig->cstime;
- cgtime = sig->cgtime;
rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
- /* add up live thread stats at the group level */
if (whole) {
- struct task_struct *t;
-
- __for_each_thread(sig, t) {
- min_flt += t->min_flt;
- maj_flt += t->maj_flt;
- gtime += task_gtime(t);
- }
-
- min_flt += sig->min_flt;
- maj_flt += sig->maj_flt;
- thread_group_cputime_adjusted(task, &utime, &stime);
- gtime += sig->gtime;
-
if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
exit_code = sig->group_exit_code;
}
@@ -562,10 +539,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
if (permitted && (!whole || num_threads < 2))
wchan = !task_is_running(task);
- if (!whole) {
+
+ do {
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
+ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+
+ cmin_flt = sig->cmin_flt;
+ cmaj_flt = sig->cmaj_flt;
+ cutime = sig->cutime;
+ cstime = sig->cstime;
+ cgtime = sig->cgtime;
+
+ if (whole) {
+ struct task_struct *t;
+
+ min_flt = sig->min_flt;
+ maj_flt = sig->maj_flt;
+ gtime = sig->gtime;
+
+ rcu_read_lock();
+ __for_each_thread(sig, t) {
+ min_flt += t->min_flt;
+ maj_flt += t->maj_flt;
+ gtime += task_gtime(t);
+ }
+ rcu_read_unlock();
+ }
+ } while (need_seqretry(&sig->stats_lock, seq));
+ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+
+ if (whole) {
+ thread_group_cputime_adjusted(task, &utime, &stime);
+ } else {
+ task_cputime_adjusted(task, &utime, &stime);
min_flt = task->min_flt;
maj_flt = task->maj_flt;
- task_cputime_adjusted(task, &utime, &stime);
gtime = task_gtime(task);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 98a031ac2648..18550c071d71 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1878,8 +1878,6 @@ void proc_pid_evict_inode(struct proc_inode *ei)
hlist_del_init_rcu(&ei->sibling_inodes);
spin_unlock(&pid->lock);
}
-
- put_pid(pid);
}
struct inode *proc_pid_make_inode(struct super_block *sb,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b33e490e3fd9..dcd513dccf55 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -30,7 +30,6 @@
static void proc_evict_inode(struct inode *inode)
{
- struct proc_dir_entry *de;
struct ctl_table_header *head;
struct proc_inode *ei = PROC_I(inode);
@@ -38,17 +37,8 @@ static void proc_evict_inode(struct inode *inode)
clear_inode(inode);
/* Stop tracking associated processes */
- if (ei->pid) {
+ if (ei->pid)
proc_pid_evict_inode(ei);
- ei->pid = NULL;
- }
-
- /* Let go of any associated proc directory entry */
- de = ei->pde;
- if (de) {
- pde_put(de);
- ei->pde = NULL;
- }
head = ei->sysctl;
if (head) {
@@ -80,6 +70,13 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
static void proc_free_inode(struct inode *inode)
{
+ struct proc_inode *ei = PROC_I(inode);
+
+ if (ei->pid)
+ put_pid(ei->pid);
+ /* Let go of any associated proc directory entry */
+ if (ei->pde)
+ pde_put(ei->pde);
kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}
@@ -95,7 +92,7 @@ void __init proc_init_kmemcache(void)
proc_inode_cachep = kmem_cache_create("proc_inode_cache",
sizeof(struct proc_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT|
+ SLAB_ACCOUNT|
SLAB_PANIC),
init_once);
pde_opener_cache =
diff --git a/fs/proc/root.c b/fs/proc/root.c
index b55dbc70287b..06a297a27ba3 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -271,7 +271,7 @@ static void proc_kill_sb(struct super_block *sb)
kill_anon_super(sb);
put_pid_ns(fs_info->pid_ns);
- kfree(fs_info);
+ kfree_rcu(fs_info, rcu);
}
static struct file_system_type proc_fs_type = {
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index d0d9bfdad30c..56815799ce79 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -307,7 +307,6 @@ int pstore_put_backend_records(struct pstore_info *psi)
{
struct pstore_private *pos, *tmp;
struct dentry *root;
- int rc = 0;
root = psinfo_lock_root();
if (!root)
@@ -317,11 +316,8 @@ int pstore_put_backend_records(struct pstore_info *psi)
list_for_each_entry_safe(pos, tmp, &records_list, list) {
if (pos->record->psi == psi) {
list_del_init(&pos->list);
- rc = simple_unlink(d_inode(root), pos->dentry);
- if (WARN_ON(rc))
- break;
- d_drop(pos->dentry);
- dput(pos->dentry);
+ d_invalidate(pos->dentry);
+ simple_unlink(d_inode(root), pos->dentry);
pos->dentry = NULL;
}
}
@@ -329,7 +325,7 @@ int pstore_put_backend_records(struct pstore_info *psi)
inode_unlock(d_inode(root));
- return rc;
+ return 0;
}
/*
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 88b34fdbf759..b1a455f42e93 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -893,6 +893,7 @@ static const struct of_device_id dt_match[] = {
{ .compatible = "ramoops" },
{}
};
+MODULE_DEVICE_TABLE(of, dt_match);
static struct platform_driver ramoops_driver = {
.probe = ramoops_probe,
diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
index 2770746bb7aa..694db616663f 100644
--- a/fs/pstore/zone.c
+++ b/fs/pstore/zone.c
@@ -973,6 +973,8 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone,
char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n",
kmsg_dump_reason_str(record->reason),
record->count);
+ if (!buf)
+ return -ENOMEM;
hlen = strlen(buf);
record->buf = krealloc(buf, hlen + size, GFP_KERNEL);
if (!record->buf) {
@@ -1215,7 +1217,6 @@ static struct pstore_zone **psz_init_zones(enum pstore_type_id type,
pr_err("allocate for zones %s failed\n", name);
return ERR_PTR(-ENOMEM);
}
- memset(zones, 0, c * sizeof(*zones));
for (i = 0; i < c; i++) {
zone = psz_init_zone(type, off, record_size);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 6eb9bb369b57..d79841e94428 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -21,6 +21,7 @@
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
+#include <linux/fs_context.h>
#include "qnx4.h"
#define QNX4_VERSION 4
@@ -30,28 +31,33 @@ static const struct super_operations qnx4_sops;
static struct inode *qnx4_alloc_inode(struct super_block *sb);
static void qnx4_free_inode(struct inode *inode);
-static int qnx4_remount(struct super_block *sb, int *flags, char *data);
static int qnx4_statfs(struct dentry *, struct kstatfs *);
+static int qnx4_get_tree(struct fs_context *fc);
static const struct super_operations qnx4_sops =
{
.alloc_inode = qnx4_alloc_inode,
.free_inode = qnx4_free_inode,
.statfs = qnx4_statfs,
- .remount_fs = qnx4_remount,
};
-static int qnx4_remount(struct super_block *sb, int *flags, char *data)
+static int qnx4_reconfigure(struct fs_context *fc)
{
+ struct super_block *sb = fc->root->d_sb;
struct qnx4_sb_info *qs;
sync_filesystem(sb);
qs = qnx4_sb(sb);
qs->Version = QNX4_VERSION;
- *flags |= SB_RDONLY;
+ fc->sb_flags |= SB_RDONLY;
return 0;
}
+static const struct fs_context_operations qnx4_context_opts = {
+ .get_tree = qnx4_get_tree,
+ .reconfigure = qnx4_reconfigure,
+};
+
static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create )
{
unsigned long phys;
@@ -183,12 +189,13 @@ static const char *qnx4_checkroot(struct super_block *sb,
return "bitmap file not found.";
}
-static int qnx4_fill_super(struct super_block *s, void *data, int silent)
+static int qnx4_fill_super(struct super_block *s, struct fs_context *fc)
{
struct buffer_head *bh;
struct inode *root;
const char *errmsg;
struct qnx4_sb_info *qs;
+ int silent = fc->sb_flags & SB_SILENT;
qs = kzalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL);
if (!qs)
@@ -216,7 +223,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
errmsg = qnx4_checkroot(s, (struct qnx4_super_block *) bh->b_data);
brelse(bh);
if (errmsg != NULL) {
- if (!silent)
+ if (!silent)
printk(KERN_ERR "qnx4: %s\n", errmsg);
return -EINVAL;
}
@@ -235,6 +242,18 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
return 0;
}
+static int qnx4_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, qnx4_fill_super);
+}
+
+static int qnx4_init_fs_context(struct fs_context *fc)
+{
+ fc->ops = &qnx4_context_opts;
+
+ return 0;
+}
+
static void qnx4_kill_sb(struct super_block *sb)
{
struct qnx4_sb_info *qs = qnx4_sb(sb);
@@ -359,7 +378,7 @@ static int init_inodecache(void)
qnx4_inode_cachep = kmem_cache_create("qnx4_inode_cache",
sizeof(struct qnx4_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (qnx4_inode_cachep == NULL)
return -ENOMEM;
@@ -376,18 +395,12 @@ static void destroy_inodecache(void)
kmem_cache_destroy(qnx4_inode_cachep);
}
-static struct dentry *qnx4_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
-{
- return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
-}
-
static struct file_system_type qnx4_fs_type = {
- .owner = THIS_MODULE,
- .name = "qnx4",
- .mount = qnx4_mount,
- .kill_sb = qnx4_kill_sb,
- .fs_flags = FS_REQUIRES_DEV,
+ .owner = THIS_MODULE,
+ .name = "qnx4",
+ .kill_sb = qnx4_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = qnx4_init_fs_context,
};
MODULE_ALIAS_FS("qnx4");
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index a286c545717f..405913f4faff 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -615,7 +615,7 @@ static int init_inodecache(void)
qnx6_inode_cachep = kmem_cache_create("qnx6_inode_cache",
sizeof(struct qnx6_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (!qnx6_inode_cachep)
return -ENOMEM;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 1f0c754416b6..eb6e9d95dea1 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2984,7 +2984,7 @@ static int __init dquot_init(void)
dquot_cachep = kmem_cache_create("dquot",
sizeof(struct dquot), sizeof(unsigned long) * 4,
(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_PANIC),
+ SLAB_PANIC),
NULL);
order = 0;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 4ac05a9e25bc..8006faaaf0ec 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -102,11 +102,20 @@ ramfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
int error = -ENOSPC;
if (inode) {
+ error = security_inode_init_security(inode, dir,
+ &dentry->d_name, NULL,
+ NULL);
+ if (error) {
+ iput(inode);
+ goto out;
+ }
+
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
error = 0;
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
}
+out:
return error;
}
@@ -134,6 +143,15 @@ static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
inode = ramfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
if (inode) {
int l = strlen(symname)+1;
+
+ error = security_inode_init_security(inode, dir,
+ &dentry->d_name, NULL,
+ NULL);
+ if (error) {
+ iput(inode);
+ goto out;
+ }
+
error = page_symlink(inode, symname, l);
if (!error) {
d_instantiate(dentry, inode);
@@ -143,6 +161,7 @@ static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
} else
iput(inode);
}
+out:
return error;
}
@@ -150,12 +169,23 @@ static int ramfs_tmpfile(struct mnt_idmap *idmap,
struct inode *dir, struct file *file, umode_t mode)
{
struct inode *inode;
+ int error;
inode = ramfs_get_inode(dir->i_sb, dir, mode, 0);
if (!inode)
return -ENOSPC;
+
+ error = security_inode_init_security(inode, dir,
+ &file_dentry(file)->d_name, NULL,
+ NULL);
+ if (error) {
+ iput(inode);
+ goto out;
+ }
+
d_tmpfile(file, inode);
- return finish_open_simple(file, 0);
+out:
+ return finish_open_simple(file, error);
}
static const struct inode_operations ramfs_dir_inode_operations = {
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 171c912af50f..6474529c4253 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2386,7 +2386,7 @@ static int journal_read(struct super_block *sb)
cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
reiserfs_info(sb, "checking transaction log (%pg)\n",
- journal->j_bdev_handle->bdev);
+ file_bdev(journal->j_bdev_file));
start = ktime_get_seconds();
/*
@@ -2447,7 +2447,7 @@ static int journal_read(struct super_block *sb)
* device and journal device to be the same
*/
d_bh =
- reiserfs_breada(journal->j_bdev_handle->bdev, cur_dblock,
+ reiserfs_breada(file_bdev(journal->j_bdev_file), cur_dblock,
sb->s_blocksize,
SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
SB_ONDISK_JOURNAL_SIZE(sb));
@@ -2588,9 +2588,9 @@ static void journal_list_init(struct super_block *sb)
static void release_journal_dev(struct reiserfs_journal *journal)
{
- if (journal->j_bdev_handle) {
- bdev_release(journal->j_bdev_handle);
- journal->j_bdev_handle = NULL;
+ if (journal->j_bdev_file) {
+ fput(journal->j_bdev_file);
+ journal->j_bdev_file = NULL;
}
}
@@ -2605,7 +2605,7 @@ static int journal_init_dev(struct super_block *super,
result = 0;
- journal->j_bdev_handle = NULL;
+ journal->j_bdev_file = NULL;
jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
@@ -2616,37 +2616,37 @@ static int journal_init_dev(struct super_block *super,
if ((!jdev_name || !jdev_name[0])) {
if (jdev == super->s_dev)
holder = NULL;
- journal->j_bdev_handle = bdev_open_by_dev(jdev, blkdev_mode,
+ journal->j_bdev_file = bdev_file_open_by_dev(jdev, blkdev_mode,
holder, NULL);
- if (IS_ERR(journal->j_bdev_handle)) {
- result = PTR_ERR(journal->j_bdev_handle);
- journal->j_bdev_handle = NULL;
+ if (IS_ERR(journal->j_bdev_file)) {
+ result = PTR_ERR(journal->j_bdev_file);
+ journal->j_bdev_file = NULL;
reiserfs_warning(super, "sh-458",
"cannot init journal device unknown-block(%u,%u): %i",
MAJOR(jdev), MINOR(jdev), result);
return result;
} else if (jdev != super->s_dev)
- set_blocksize(journal->j_bdev_handle->bdev,
+ set_blocksize(file_bdev(journal->j_bdev_file),
super->s_blocksize);
return 0;
}
- journal->j_bdev_handle = bdev_open_by_path(jdev_name, blkdev_mode,
+ journal->j_bdev_file = bdev_file_open_by_path(jdev_name, blkdev_mode,
holder, NULL);
- if (IS_ERR(journal->j_bdev_handle)) {
- result = PTR_ERR(journal->j_bdev_handle);
- journal->j_bdev_handle = NULL;
+ if (IS_ERR(journal->j_bdev_file)) {
+ result = PTR_ERR(journal->j_bdev_file);
+ journal->j_bdev_file = NULL;
reiserfs_warning(super, "sh-457",
"journal_init_dev: Cannot open '%s': %i",
jdev_name, result);
return result;
}
- set_blocksize(journal->j_bdev_handle->bdev, super->s_blocksize);
+ set_blocksize(file_bdev(journal->j_bdev_file), super->s_blocksize);
reiserfs_info(super,
"journal_init_dev: journal device: %pg\n",
- journal->j_bdev_handle->bdev);
+ file_bdev(journal->j_bdev_file));
return 0;
}
@@ -2804,7 +2804,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
"journal header magic %x (device %pg) does "
"not match to magic found in super block %x",
jh->jh_journal.jp_journal_magic,
- journal->j_bdev_handle->bdev,
+ file_bdev(journal->j_bdev_file),
sb_jp_journal_magic(rs));
brelse(bhjh);
goto free_and_return;
@@ -2828,7 +2828,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
reiserfs_info(sb, "journal params: device %pg, size %u, "
"journal first block %u, max trans len %u, max batch %u, "
"max commit age %u, max trans age %u\n",
- journal->j_bdev_handle->bdev,
+ file_bdev(journal->j_bdev_file),
SB_ONDISK_JOURNAL_SIZE(sb),
SB_ONDISK_JOURNAL_1st_BLOCK(sb),
journal->j_trans_max,
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 83cb9402e0f9..5c68a4a52d78 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -354,7 +354,7 @@ static int show_journal(struct seq_file *m, void *unused)
"prepare: \t%12lu\n"
"prepare_retry: \t%12lu\n",
DJP(jp_journal_1st_block),
- SB_JOURNAL(sb)->j_bdev_handle->bdev,
+ file_bdev(SB_JOURNAL(sb)->j_bdev_file),
DJP(jp_journal_dev),
DJP(jp_journal_size),
DJP(jp_journal_trans_max),
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 725667880e62..0554903f42a9 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -299,7 +299,7 @@ struct reiserfs_journal {
/* oldest journal block. start here for traverse */
struct reiserfs_journal_cnode *j_first;
- struct bdev_handle *j_bdev_handle;
+ struct file *j_bdev_file;
/* first block on s_dev of reserved area journal */
int j_1st_reserved_block;
@@ -2810,10 +2810,10 @@ struct reiserfs_journal_header {
/* We need these to make journal.c code more readable */
#define journal_find_get_block(s, block) __find_get_block(\
- SB_JOURNAL(s)->j_bdev_handle->bdev, block, s->s_blocksize)
-#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_bdev_handle->bdev,\
+ file_bdev(SB_JOURNAL(s)->j_bdev_file), block, s->s_blocksize)
+#define journal_getblk(s, block) __getblk(file_bdev(SB_JOURNAL(s)->j_bdev_file),\
block, s->s_blocksize)
-#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_bdev_handle->bdev,\
+#define journal_bread(s, block) __bread(file_bdev(SB_JOURNAL(s)->j_bdev_file),\
block, s->s_blocksize)
enum reiserfs_bh_state_bits {
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 67b5510beded..2cc469d481a2 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -670,7 +670,6 @@ static int __init init_inodecache(void)
sizeof(struct
reiserfs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|
SLAB_ACCOUNT),
init_once);
if (reiserfs_inode_cachep == NULL)
diff --git a/fs/remap_range.c b/fs/remap_range.c
index f8c1120b8311..de07f978ce3e 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -373,9 +373,9 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
}
EXPORT_SYMBOL(generic_remap_file_range_prep);
-loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t len, unsigned int remap_flags)
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags)
{
loff_t ret;
@@ -391,23 +391,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
if (!file_in->f_op->remap_file_range)
return -EOPNOTSUPP;
- ret = file_in->f_op->remap_file_range(file_in, pos_in,
- file_out, pos_out, len, remap_flags);
- if (ret < 0)
- return ret;
-
- fsnotify_access(file_in);
- fsnotify_modify(file_out);
- return ret;
-}
-EXPORT_SYMBOL(do_clone_file_range);
-
-loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t len, unsigned int remap_flags)
-{
- loff_t ret;
-
ret = remap_verify_area(file_in, pos_in, len, false);
if (ret)
return ret;
@@ -417,10 +400,14 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
return ret;
file_start_write(file_out);
- ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
- remap_flags);
+ ret = file_in->f_op->remap_file_range(file_in, pos_in,
+ file_out, pos_out, len, remap_flags);
file_end_write(file_out);
+ if (ret < 0)
+ return ret;
+ fsnotify_access(file_in);
+ fsnotify_modify(file_out);
return ret;
}
EXPORT_SYMBOL(vfs_clone_file_range);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 545ad44f96b8..2be227532f39 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb)
#ifdef CONFIG_ROMFS_ON_BLOCK
if (sb->s_bdev) {
sync_blockdev(sb->s_bdev);
- bdev_release(sb->s_bdev_handle);
+ fput(sb->s_bdev_file);
}
#endif
}
@@ -630,8 +630,8 @@ static int __init init_romfs_fs(void)
romfs_inode_cachep =
kmem_cache_create("romfs_i",
sizeof(struct romfs_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD |
- SLAB_ACCOUNT, romfs_i_init_once);
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
+ romfs_i_init_once);
if (!romfs_inode_cachep) {
pr_err("Failed to initialise inode cache\n");
diff --git a/fs/select.c b/fs/select.c
index 0ee55af1a55c..9515c3fa1a03 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -476,7 +476,7 @@ static inline void wait_key_set(poll_table *wait, unsigned long in,
wait->_key |= POLLOUT_SET;
}
-static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
+static noinline_for_stack int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
{
ktime_t expire, *to = NULL;
struct poll_wqueues table;
@@ -839,7 +839,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
struct poll_list {
struct poll_list *next;
- int len;
+ unsigned int len;
struct pollfd entries[];
};
@@ -975,14 +975,15 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
struct timespec64 *end_time)
{
struct poll_wqueues table;
- int err = -EFAULT, fdcount, len;
+ int err = -EFAULT, fdcount;
/* Allocate small arguments on the stack to save memory and be
faster - use long to make sure the buffer is aligned properly
on 64 bit archs to avoid unaligned access */
long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
struct poll_list *const head = (struct poll_list *)stack_pps;
struct poll_list *walk = head;
- unsigned long todo = nfds;
+ unsigned int todo = nfds;
+ unsigned int len;
if (nfds > rlimit(RLIMIT_NOFILE))
return -EINVAL;
@@ -998,9 +999,9 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
sizeof(struct pollfd) * walk->len))
goto out_fds;
- todo -= walk->len;
- if (!todo)
+ if (walk->len >= todo)
break;
+ todo -= walk->len;
len = min(todo, POLLFD_PER_PAGE);
walk = walk->next = kmalloc(struct_size(walk, entries, len),
@@ -1020,7 +1021,7 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
for (walk = head; walk; walk = walk->next) {
struct pollfd *fds = walk->entries;
- int j;
+ unsigned int j;
for (j = walk->len; j; fds++, ufds++, j--)
unsafe_put_user(fds->revents, &ufds->revents, Efault);
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 971892620504..3de5047a7ff9 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -145,21 +145,27 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
struct cached_fid *cfid;
struct cached_fids *cfids;
const char *npath;
+ int retries = 0, cur_sleep = 1;
if (tcon == NULL || tcon->cfids == NULL || tcon->nohandlecache ||
is_smb1_server(tcon->ses->server) || (dir_cache_timeout == 0))
return -EOPNOTSUPP;
ses = tcon->ses;
- server = cifs_pick_channel(ses);
cfids = tcon->cfids;
- if (!server->ops->new_lease_key)
- return -EIO;
-
if (cifs_sb->root == NULL)
return -ENOENT;
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ oplock = SMB2_OPLOCK_LEVEL_II;
+ server = cifs_pick_channel(ses);
+
+ if (!server->ops->new_lease_key)
+ return -EIO;
+
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
if (!utf16_path)
return -ENOMEM;
@@ -236,6 +242,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
.desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES,
.disposition = FILE_OPEN,
.fid = pfid,
+ .replay = !!(retries),
};
rc = SMB2_open_init(tcon, server,
@@ -268,6 +275,11 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
*/
cfid->has_lease = true;
+ if (retries) {
+ smb2_set_replay(server, &rqst[0]);
+ smb2_set_replay(server, &rqst[1]);
+ }
+
rc = compound_send_recv(xid, ses, server,
flags, 2, rqst,
resp_buftype, rsp_iov);
@@ -367,6 +379,11 @@ out:
atomic_inc(&tcon->num_remote_opens);
}
kfree(utf16_path);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c
index ef4c2e3c9fa6..6322f0f68a17 100644
--- a/fs/smb/client/cifsencrypt.c
+++ b/fs/smb/client/cifsencrypt.c
@@ -572,7 +572,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp);
UniStrupr(user);
} else {
- memset(user, '\0', 2);
+ *(u16 *)user = 0;
}
rc = crypto_shash_update(ses->server->secmech.hmacmd5,
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index e902de4e475a..e0d8c79cdde1 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -396,7 +396,7 @@ cifs_alloc_inode(struct super_block *sb)
spin_lock_init(&cifs_inode->writers_lock);
cifs_inode->writers = 0;
cifs_inode->netfs.inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
- cifs_inode->server_eof = 0;
+ cifs_inode->netfs.remote_i_size = 0;
cifs_inode->uniqueid = 0;
cifs_inode->createtime = 0;
cifs_inode->epoch = 0;
@@ -1085,7 +1085,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
}
static int
-cifs_setlease(struct file *file, int arg, struct file_lock **lease, void **priv)
+cifs_setlease(struct file *file, int arg, struct file_lease **lease, void **priv)
{
/*
* Note that this is called by vfs setlease with i_lock held to
@@ -1094,9 +1094,6 @@ cifs_setlease(struct file *file, int arg, struct file_lock **lease, void **priv)
struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
- if (!(S_ISREG(inode->i_mode)))
- return -EINVAL;
-
/* Check if file is oplocked if this is request for new lease */
if (arg == F_UNLCK ||
((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) ||
@@ -1172,6 +1169,9 @@ const char *cifs_get_link(struct dentry *dentry, struct inode *inode,
{
char *target_path;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
target_path = kmalloc(PATH_MAX, GFP_KERNEL);
if (!target_path)
return ERR_PTR(-ENOMEM);
@@ -1380,6 +1380,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
struct inode *src_inode = file_inode(src_file);
struct inode *target_inode = file_inode(dst_file);
struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode);
+ struct cifsInodeInfo *target_cifsi = CIFS_I(target_inode);
struct cifsFileInfo *smb_file_src;
struct cifsFileInfo *smb_file_target;
struct cifs_tcon *src_tcon;
@@ -1428,7 +1429,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
* Advance the EOF marker after the flush above to the end of the range
* if it's short of that.
*/
- if (src_cifsi->server_eof < off + len) {
+ if (src_cifsi->netfs.remote_i_size < off + len) {
rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len);
if (rc < 0)
goto unlock;
@@ -1452,12 +1453,22 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
/* Discard all the folios that overlap the destination region. */
truncate_inode_pages_range(&target_inode->i_data, fstart, fend);
+ fscache_invalidate(cifs_inode_cookie(target_inode), NULL,
+ i_size_read(target_inode), 0);
+
rc = file_modified(dst_file);
if (!rc) {
rc = target_tcon->ses->server->ops->copychunk_range(xid,
smb_file_src, smb_file_target, off, len, destoff);
- if (rc > 0 && destoff + rc > i_size_read(target_inode))
+ if (rc > 0 && destoff + rc > i_size_read(target_inode)) {
truncate_setsize(target_inode, destoff + rc);
+ netfs_resize_file(&target_cifsi->netfs,
+ i_size_read(target_inode), true);
+ fscache_resize_cookie(cifs_inode_cookie(target_inode),
+ i_size_read(target_inode));
+ }
+ if (rc > 0 && destoff + rc > target_cifsi->netfs.zero_point)
+ target_cifsi->netfs.zero_point = destoff + rc;
}
file_accessed(src_file);
@@ -1653,7 +1664,7 @@ cifs_init_inodecache(void)
cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
sizeof(struct cifsInodeInfo),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
cifs_init_once);
if (cifs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 20036fb16cec..53c75cfb33ab 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -50,6 +50,11 @@
#define CIFS_DEF_ACTIMEO (1 * HZ)
/*
+ * max sleep time before retry to server
+ */
+#define CIFS_MAX_SLEEP 2000
+
+/*
* max attribute cache timeout (jiffies) - 2^30
*/
#define CIFS_MAX_ACTIMEO (1 << 30)
@@ -82,7 +87,7 @@
#define SMB_INTERFACE_POLL_INTERVAL 600
/* maximum number of PDUs in one compound */
-#define MAX_COMPOUND 5
+#define MAX_COMPOUND 7
/*
* Default number of credits to keep available for SMB3.
@@ -1027,6 +1032,8 @@ struct cifs_chan {
__u8 signkey[SMB3_SIGN_KEY_SIZE];
};
+#define CIFS_SES_FLAG_SCALE_CHANNELS (0x1)
+
/*
* Session structure. One of these for each uid session with a particular host
*/
@@ -1059,6 +1066,7 @@ struct cifs_ses {
enum securityEnum sectype; /* what security flavor was specified? */
bool sign; /* is signing required? */
bool domainAuto:1;
+ unsigned int flags;
__u16 session_flags;
__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
__u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE];
@@ -1370,6 +1378,7 @@ struct cifs_open_parms {
struct cifs_fid *fid;
umode_t mode;
bool reconnect:1;
+ bool replay:1; /* indicates that this open is for a replay */
};
struct cifs_fid {
@@ -1501,6 +1510,7 @@ struct cifs_writedata {
struct smbd_mr *mr;
#endif
struct cifs_credits credits;
+ bool replay;
};
/*
@@ -1561,7 +1571,6 @@ struct cifsInodeInfo {
spinlock_t writers_lock;
unsigned int writers; /* Number of writers on this inode */
unsigned long time; /* jiffies of last update of inode */
- u64 server_eof; /* current file size on server -- protected by i_lock */
u64 uniqueid; /* server inode number */
u64 createtime; /* creation time on server */
__u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for this inode */
@@ -1831,6 +1840,13 @@ static inline bool is_retryable_error(int error)
return false;
}
+static inline bool is_replayable_error(int error)
+{
+ if (error == -EAGAIN || error == -ECONNABORTED)
+ return true;
+ return false;
+}
+
/* cifs_get_writable_file() flags */
#define FIND_WR_ANY 0
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 01e89070df5a..5eb83bafc7fd 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -2066,20 +2066,20 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
parm_data = (struct cifs_posix_lock *)
((char *)&pSMBr->hdr.Protocol + data_offset);
if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
- pLockData->fl_type = F_UNLCK;
+ pLockData->c.flc_type = F_UNLCK;
else {
if (parm_data->lock_type ==
cpu_to_le16(CIFS_RDLCK))
- pLockData->fl_type = F_RDLCK;
+ pLockData->c.flc_type = F_RDLCK;
else if (parm_data->lock_type ==
cpu_to_le16(CIFS_WRLCK))
- pLockData->fl_type = F_WRLCK;
+ pLockData->c.flc_type = F_WRLCK;
pLockData->fl_start = le64_to_cpu(parm_data->start);
pLockData->fl_end = pLockData->fl_start +
(le64_to_cpu(parm_data->length) ?
le64_to_cpu(parm_data->length) - 1 : 0);
- pLockData->fl_pid = -le32_to_cpu(parm_data->pid);
+ pLockData->c.flc_pid = -le32_to_cpu(parm_data->pid);
}
}
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index bfd568f89710..ac9595504f4b 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -233,6 +233,12 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
/* check if iface is still active */
spin_lock(&ses->chan_lock);
+ if (cifs_ses_get_chan_index(ses, server) ==
+ CIFS_INVAL_CHAN_INDEX) {
+ spin_unlock(&ses->chan_lock);
+ continue;
+ }
+
if (!cifs_chan_is_iface_active(ses, server)) {
spin_unlock(&ses->chan_lock);
cifs_chan_update_iface(ses, server);
@@ -3438,8 +3444,18 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx)
* the user on mount
*/
if ((cifs_sb->ctx->wsize == 0) ||
- (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx)))
- cifs_sb->ctx->wsize = server->ops->negotiate_wsize(tcon, ctx);
+ (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) {
+ cifs_sb->ctx->wsize =
+ round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE);
+ /*
+ * in the very unlikely event that the server sent a max write size under PAGE_SIZE,
+ * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096
+ */
+ if (cifs_sb->ctx->wsize == 0) {
+ cifs_sb->ctx->wsize = PAGE_SIZE;
+ cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n");
+ }
+ }
if ((cifs_sb->ctx->rsize == 0) ||
(cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx)))
cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx);
@@ -4228,6 +4244,11 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
/* only send once per connect */
spin_lock(&tcon->tc_lock);
+
+ /* if tcon is marked for needing reconnect, update state */
+ if (tcon->need_reconnect)
+ tcon->status = TID_NEED_TCON;
+
if (tcon->status == TID_GOOD) {
spin_unlock(&tcon->tc_lock);
return 0;
diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c
index a8a1d386da65..449c59830039 100644
--- a/fs/smb/client/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -565,6 +565,11 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
/* only send once per connect */
spin_lock(&tcon->tc_lock);
+
+ /* if tcon is marked for needing reconnect, update state */
+ if (tcon->need_reconnect)
+ tcon->status = TID_NEED_TCON;
+
if (tcon->status == TID_GOOD) {
spin_unlock(&tcon->tc_lock);
return 0;
@@ -625,8 +630,8 @@ out:
spin_lock(&tcon->tc_lock);
if (tcon->status == TID_IN_TCON)
tcon->status = TID_GOOD;
- spin_unlock(&tcon->tc_lock);
tcon->need_reconnect = false;
+ spin_unlock(&tcon->tc_lock);
}
return rc;
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 3a213432775b..c3b8e7091a4d 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -87,7 +87,7 @@ void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len
continue;
if (!folio_test_writeback(folio)) {
WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
- len, start, folio_index(folio), end);
+ len, start, folio->index, end);
continue;
}
@@ -120,7 +120,7 @@ void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len
continue;
if (!folio_test_writeback(folio)) {
WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
- len, start, folio_index(folio), end);
+ len, start, folio->index, end);
continue;
}
@@ -151,7 +151,7 @@ void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int le
xas_for_each(&xas, folio, end) {
if (!folio_test_writeback(folio)) {
WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
- len, start, folio_index(folio), end);
+ len, start, folio->index, end);
continue;
}
@@ -175,6 +175,9 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
/* only send once per connect */
spin_lock(&tcon->tc_lock);
+ if (tcon->need_reconnect)
+ tcon->status = TID_NEED_RECON;
+
if (tcon->status != TID_NEED_RECON) {
spin_unlock(&tcon->tc_lock);
return;
@@ -1312,20 +1315,20 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
down_read(&cinode->lock_sem);
exist = cifs_find_lock_conflict(cfile, offset, length, type,
- flock->fl_flags, &conf_lock,
+ flock->c.flc_flags, &conf_lock,
CIFS_LOCK_OP);
if (exist) {
flock->fl_start = conf_lock->offset;
flock->fl_end = conf_lock->offset + conf_lock->length - 1;
- flock->fl_pid = conf_lock->pid;
+ flock->c.flc_pid = conf_lock->pid;
if (conf_lock->type & server->vals->shared_lock_type)
- flock->fl_type = F_RDLCK;
+ flock->c.flc_type = F_RDLCK;
else
- flock->fl_type = F_WRLCK;
+ flock->c.flc_type = F_WRLCK;
} else if (!cinode->can_cache_brlcks)
rc = 1;
else
- flock->fl_type = F_UNLCK;
+ flock->c.flc_type = F_UNLCK;
up_read(&cinode->lock_sem);
return rc;
@@ -1401,16 +1404,16 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
{
int rc = 0;
struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
- unsigned char saved_type = flock->fl_type;
+ unsigned char saved_type = flock->c.flc_type;
- if ((flock->fl_flags & FL_POSIX) == 0)
+ if ((flock->c.flc_flags & FL_POSIX) == 0)
return 1;
down_read(&cinode->lock_sem);
posix_test_lock(file, flock);
- if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
- flock->fl_type = saved_type;
+ if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) {
+ flock->c.flc_type = saved_type;
rc = 1;
}
@@ -1431,7 +1434,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
int rc = FILE_LOCK_DEFERRED + 1;
- if ((flock->fl_flags & FL_POSIX) == 0)
+ if ((flock->c.flc_flags & FL_POSIX) == 0)
return rc;
cifs_down_write(&cinode->lock_sem);
@@ -1581,7 +1584,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
el = locks_to_send.next;
spin_lock(&flctx->flc_lock);
- list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
+ for_each_file_lock(flock, &flctx->flc_posix) {
+ unsigned char ftype = flock->c.flc_type;
+
if (el == &locks_to_send) {
/*
* The list ended. We don't have enough allocated
@@ -1591,12 +1596,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
break;
}
length = cifs_flock_len(flock);
- if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
+ if (ftype == F_RDLCK || ftype == F_SHLCK)
type = CIFS_RDLCK;
else
type = CIFS_WRLCK;
lck = list_entry(el, struct lock_to_push, llist);
- lck->pid = hash_lockowner(flock->fl_owner);
+ lck->pid = hash_lockowner(flock->c.flc_owner);
lck->netfid = cfile->fid.netfid;
lck->length = length;
lck->type = type;
@@ -1663,42 +1668,43 @@ static void
cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
bool *wait_flag, struct TCP_Server_Info *server)
{
- if (flock->fl_flags & FL_POSIX)
+ if (flock->c.flc_flags & FL_POSIX)
cifs_dbg(FYI, "Posix\n");
- if (flock->fl_flags & FL_FLOCK)
+ if (flock->c.flc_flags & FL_FLOCK)
cifs_dbg(FYI, "Flock\n");
- if (flock->fl_flags & FL_SLEEP) {
+ if (flock->c.flc_flags & FL_SLEEP) {
cifs_dbg(FYI, "Blocking lock\n");
*wait_flag = true;
}
- if (flock->fl_flags & FL_ACCESS)
+ if (flock->c.flc_flags & FL_ACCESS)
cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
- if (flock->fl_flags & FL_LEASE)
+ if (flock->c.flc_flags & FL_LEASE)
cifs_dbg(FYI, "Lease on file - not implemented yet\n");
- if (flock->fl_flags &
+ if (flock->c.flc_flags &
(~(FL_POSIX | FL_FLOCK | FL_SLEEP |
FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
- cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
+ cifs_dbg(FYI, "Unknown lock flags 0x%x\n",
+ flock->c.flc_flags);
*type = server->vals->large_lock_type;
- if (flock->fl_type == F_WRLCK) {
+ if (lock_is_write(flock)) {
cifs_dbg(FYI, "F_WRLCK\n");
*type |= server->vals->exclusive_lock_type;
*lock = 1;
- } else if (flock->fl_type == F_UNLCK) {
+ } else if (lock_is_unlock(flock)) {
cifs_dbg(FYI, "F_UNLCK\n");
*type |= server->vals->unlock_lock_type;
*unlock = 1;
/* Check if unlock includes more than one lock range */
- } else if (flock->fl_type == F_RDLCK) {
+ } else if (lock_is_read(flock)) {
cifs_dbg(FYI, "F_RDLCK\n");
*type |= server->vals->shared_lock_type;
*lock = 1;
- } else if (flock->fl_type == F_EXLCK) {
+ } else if (flock->c.flc_type == F_EXLCK) {
cifs_dbg(FYI, "F_EXLCK\n");
*type |= server->vals->exclusive_lock_type;
*lock = 1;
- } else if (flock->fl_type == F_SHLCK) {
+ } else if (flock->c.flc_type == F_SHLCK) {
cifs_dbg(FYI, "F_SHLCK\n");
*type |= server->vals->shared_lock_type;
*lock = 1;
@@ -1730,7 +1736,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
else
posix_lock_type = CIFS_WRLCK;
rc = CIFSSMBPosixLock(xid, tcon, netfid,
- hash_lockowner(flock->fl_owner),
+ hash_lockowner(flock->c.flc_owner),
flock->fl_start, length, flock,
posix_lock_type, wait_flag);
return rc;
@@ -1747,7 +1753,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
if (rc == 0) {
rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
type, 0, 1, false);
- flock->fl_type = F_UNLCK;
+ flock->c.flc_type = F_UNLCK;
if (rc != 0)
cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
rc);
@@ -1755,7 +1761,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
}
if (type & server->vals->shared_lock_type) {
- flock->fl_type = F_WRLCK;
+ flock->c.flc_type = F_WRLCK;
return 0;
}
@@ -1767,12 +1773,12 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
if (rc == 0) {
rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
type | server->vals->shared_lock_type, 0, 1, false);
- flock->fl_type = F_RDLCK;
+ flock->c.flc_type = F_RDLCK;
if (rc != 0)
cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
rc);
} else
- flock->fl_type = F_WRLCK;
+ flock->c.flc_type = F_WRLCK;
return 0;
}
@@ -1940,7 +1946,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
posix_lock_type = CIFS_UNLCK;
rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
- hash_lockowner(flock->fl_owner),
+ hash_lockowner(flock->c.flc_owner),
flock->fl_start, length,
NULL, posix_lock_type, wait_flag);
goto out;
@@ -1950,7 +1956,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
struct cifsLockInfo *lock;
lock = cifs_lock_init(flock->fl_start, length, type,
- flock->fl_flags);
+ flock->c.flc_flags);
if (!lock)
return -ENOMEM;
@@ -1989,7 +1995,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
rc = server->ops->mand_unlock_range(cfile, flock, xid);
out:
- if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
+ if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) {
/*
* If this is a request to remove all locks because we
* are closing the file, it doesn't matter if the
@@ -1998,7 +2004,7 @@ out:
*/
if (rc) {
cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
- if (!(flock->fl_flags & FL_CLOSE))
+ if (!(flock->c.flc_flags & FL_CLOSE))
return rc;
}
rc = locks_lock_file_wait(file, flock);
@@ -2019,7 +2025,7 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
xid = get_xid();
- if (!(fl->fl_flags & FL_FLOCK)) {
+ if (!(fl->c.flc_flags & FL_FLOCK)) {
rc = -ENOLCK;
free_xid(xid);
return rc;
@@ -2070,7 +2076,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
xid = get_xid();
cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
- flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
+ flock->c.flc_flags, flock->c.flc_type,
+ (long long)flock->fl_start,
(long long)flock->fl_end);
cfile = (struct cifsFileInfo *)file->private_data;
@@ -2120,8 +2127,8 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
{
loff_t end_of_write = offset + bytes_written;
- if (end_of_write > cifsi->server_eof)
- cifsi->server_eof = end_of_write;
+ if (end_of_write > cifsi->netfs.remote_i_size)
+ netfs_resize_file(&cifsi->netfs, end_of_write, true);
}
static ssize_t
@@ -2651,7 +2658,7 @@ static void cifs_extend_writeback(struct address_space *mapping,
continue;
if (xa_is_value(folio))
break;
- if (folio_index(folio) != index)
+ if (folio->index != index)
break;
if (!folio_try_get_rcu(folio)) {
xas_reset(&xas);
@@ -2899,7 +2906,7 @@ redo_folio:
goto skip_write;
}
- if (folio_mapping(folio) != mapping ||
+ if (folio->mapping != mapping ||
!folio_test_dirty(folio)) {
start += folio_size(folio);
folio_unlock(folio);
@@ -2951,7 +2958,7 @@ skip_write:
continue;
}
- folio_batch_release(&fbatch);
+ folio_batch_release(&fbatch);
cond_resched();
} while (wbc->nr_to_write > 0);
@@ -3247,8 +3254,8 @@ cifs_uncached_writev_complete(struct work_struct *work)
spin_lock(&inode->i_lock);
cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
- if (cifsi->server_eof > inode->i_size)
- i_size_write(inode, cifsi->server_eof);
+ if (cifsi->netfs.remote_i_size > inode->i_size)
+ i_size_write(inode, cifsi->netfs.remote_i_size);
spin_unlock(&inode->i_lock);
complete(&wdata->done);
@@ -3300,6 +3307,7 @@ cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
if (wdata->cfile->invalidHandle)
rc = -EAGAIN;
else {
+ wdata->replay = true;
#ifdef CONFIG_CIFS_SMB_DIRECT
if (wdata->mr) {
wdata->mr->need_invalidate = true;
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 52cbef2eeb28..4b2f5aa2ea0e 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -211,7 +211,7 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c
switch (match_token(value, cifs_secflavor_tokens, args)) {
case Opt_sec_krb5p:
- cifs_errorf(fc, "sec=krb5p is not supported!\n");
+ cifs_errorf(fc, "sec=krb5p is not supported. Use sec=krb5,seal instead\n");
return 1;
case Opt_sec_krb5i:
ctx->sign = true;
@@ -1111,6 +1111,17 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
case Opt_wsize:
ctx->wsize = result.uint_32;
ctx->got_wsize = true;
+ if (ctx->wsize % PAGE_SIZE != 0) {
+ ctx->wsize = round_down(ctx->wsize, PAGE_SIZE);
+ if (ctx->wsize == 0) {
+ ctx->wsize = PAGE_SIZE;
+ cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE);
+ } else {
+ cifs_dbg(VFS,
+ "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n",
+ ctx->wsize, PAGE_SIZE);
+ }
+ }
break;
case Opt_acregmax:
ctx->acregmax = HZ * result.uint_32;
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index f0989484f2c6..d02f8ba29cb5 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -104,7 +104,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode);
mtime = inode_get_mtime(inode);
if (timespec64_equal(&mtime, &fattr->cf_mtime) &&
- cifs_i->server_eof == fattr->cf_eof) {
+ cifs_i->netfs.remote_i_size == fattr->cf_eof) {
cifs_dbg(FYI, "%s: inode %llu is unchanged\n",
__func__, cifs_i->uniqueid);
return;
@@ -194,7 +194,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
else
clear_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags);
- cifs_i->server_eof = fattr->cf_eof;
+ cifs_i->netfs.remote_i_size = fattr->cf_eof;
/*
* Can't safely change the file size here if the client is writing to
* it due to potential races.
@@ -2858,7 +2858,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
set_size_out:
if (rc == 0) {
- cifsInode->server_eof = attrs->ia_size;
+ netfs_resize_file(&cifsInode->netfs, attrs->ia_size, true);
cifs_setsize(inode, attrs->ia_size);
/*
* i_blocks is not related to (i_size / i_blksize), but instead
@@ -3011,6 +3011,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
if ((attrs->ia_valid & ATTR_SIZE) &&
attrs->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attrs->ia_size);
+ netfs_resize_file(&cifsInode->netfs, attrs->ia_size, true);
fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size);
}
@@ -3210,6 +3211,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
if ((attrs->ia_valid & ATTR_SIZE) &&
attrs->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attrs->ia_size);
+ netfs_resize_file(&cifsInode->netfs, attrs->ia_size, true);
fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size);
}
diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c
index a6968573b775..4a517b280f2b 100644
--- a/fs/smb/client/namespace.c
+++ b/fs/smb/client/namespace.c
@@ -168,6 +168,21 @@ static char *automount_fullpath(struct dentry *dentry, void *page)
return s;
}
+static void fs_context_set_ids(struct smb3_fs_context *ctx)
+{
+ kuid_t uid = current_fsuid();
+ kgid_t gid = current_fsgid();
+
+ if (ctx->multiuser) {
+ if (!ctx->uid_specified)
+ ctx->linux_uid = uid;
+ if (!ctx->gid_specified)
+ ctx->linux_gid = gid;
+ }
+ if (!ctx->cruid_specified)
+ ctx->cred_uid = uid;
+}
+
/*
* Create a vfsmount that we can automount
*/
@@ -205,6 +220,7 @@ static struct vfsmount *cifs_do_automount(struct path *path)
tmp.leaf_fullpath = NULL;
tmp.UNC = tmp.prepath = NULL;
tmp.dfs_root_ses = NULL;
+ fs_context_set_ids(&tmp);
rc = smb3_fs_context_dup(ctx, &tmp);
if (rc) {
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index 94255401b38d..b520eea7bfce 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -141,7 +141,7 @@ retry:
if (likely(reparse_inode_match(inode, fattr))) {
fattr->cf_mode = inode->i_mode;
fattr->cf_rdev = inode->i_rdev;
- fattr->cf_eof = CIFS_I(inode)->server_eof;
+ fattr->cf_eof = CIFS_I(inode)->netfs.remote_i_size;
fattr->cf_symlink_target = NULL;
} else {
CIFS_I(inode)->time = 0;
@@ -307,14 +307,16 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
}
static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr,
- SEARCH_ID_FULL_DIR_INFO *info,
+ const void *info,
struct cifs_sb_info *cifs_sb)
{
+ const FILE_FULL_DIRECTORY_INFO *di = info;
+
__dir_info_to_fattr(fattr, info);
- /* See MS-FSCC 2.4.19 FileIdFullDirectoryInformation */
+ /* See MS-FSCC 2.4.14, 2.4.19 */
if (fattr->cf_cifsattrs & ATTR_REPARSE)
- fattr->cf_cifstag = le32_to_cpu(info->EaSize);
+ fattr->cf_cifstag = le32_to_cpu(di->EaSize);
cifs_fill_common_info(fattr, cifs_sb);
}
@@ -396,7 +398,7 @@ ffirst_retry:
} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
} else /* not srvinos - BB fixme add check for backlevel? */ {
- cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO;
+ cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO;
}
search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME;
@@ -987,10 +989,9 @@ static int cifs_filldir(char *find_entry, struct file *file,
(FIND_FILE_STANDARD_INFO *)find_entry,
cifs_sb);
break;
+ case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
case SMB_FIND_FILE_ID_FULL_DIR_INFO:
- cifs_fulldir_info_to_fattr(&fattr,
- (SEARCH_ID_FULL_DIR_INFO *)find_entry,
- cifs_sb);
+ cifs_fulldir_info_to_fattr(&fattr, find_entry, cifs_sb);
break;
default:
cifs_dir_info_to_fattr(&fattr,
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index cde81042bebd..8f37373fd333 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -75,6 +75,10 @@ cifs_ses_get_chan_index(struct cifs_ses *ses,
{
unsigned int i;
+ /* if the channel is waiting for termination */
+ if (server && server->terminate)
+ return CIFS_INVAL_CHAN_INDEX;
+
for (i = 0; i < ses->chan_count; i++) {
if (ses->chans[i].server == server)
return i;
@@ -84,7 +88,6 @@ cifs_ses_get_chan_index(struct cifs_ses *ses,
if (server)
cifs_dbg(VFS, "unable to get chan index for server: 0x%llx",
server->conn_id);
- WARN_ON(1);
return CIFS_INVAL_CHAN_INDEX;
}
@@ -269,6 +272,8 @@ int cifs_try_adding_channels(struct cifs_ses *ses)
&iface->sockaddr,
rc);
kref_put(&iface->refcount, release_iface);
+ /* failure to add chan should increase weight */
+ iface->weight_fulfilled++;
continue;
}
diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c
index e0ee96d69d49..c23478ab1cf8 100644
--- a/fs/smb/client/smb2file.c
+++ b/fs/smb/client/smb2file.c
@@ -228,7 +228,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
* flock and OFD lock are associated with an open
* file description, not the process.
*/
- if (!(flock->fl_flags & (FL_FLOCK | FL_OFDLCK)))
+ if (!(flock->c.flc_flags & (FL_FLOCK | FL_OFDLCK)))
continue;
if (cinode->can_cache_brlcks) {
/*
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index a652200540c8..05818cd6d932 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -120,6 +120,14 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
unsigned int size[2];
void *data[2];
int len;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ oplock = SMB2_OPLOCK_LEVEL_NONE;
+ num_rqst = 0;
+ server = cifs_pick_channel(ses);
vars = kzalloc(sizeof(*vars), GFP_ATOMIC);
if (vars == NULL)
@@ -127,8 +135,6 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
rqst = &vars->rqst[0];
rsp_iov = &vars->rsp_iov[0];
- server = cifs_pick_channel(ses);
-
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
@@ -463,15 +469,24 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
num_rqst++;
if (cfile) {
+ if (retries)
+ for (i = 1; i < num_rqst - 2; i++)
+ smb2_set_replay(server, &rqst[i]);
+
rc = compound_send_recv(xid, ses, server,
flags, num_rqst - 2,
&rqst[1], &resp_buftype[1],
&rsp_iov[1]);
- } else
+ } else {
+ if (retries)
+ for (i = 0; i < num_rqst; i++)
+ smb2_set_replay(server, &rqst[i]);
+
rc = compound_send_recv(xid, ses, server,
flags, num_rqst,
rqst, resp_buftype,
rsp_iov);
+ }
finished:
num_rqst = 0;
@@ -620,9 +635,6 @@ finished:
}
SMB2_close_free(&rqst[num_rqst]);
- if (cfile)
- cifsFileInfo_put(cfile);
-
num_cmds += 2;
if (out_iov && out_buftype) {
memcpy(out_iov, rsp_iov, num_cmds * sizeof(*out_iov));
@@ -632,7 +644,16 @@ finished:
for (i = 0; i < num_cmds; i++)
free_rsp_buf(resp_buftype[i], rsp_iov[i].iov_base);
}
+ num_cmds -= 2; /* correct num_cmds as there could be a retry */
kfree(vars);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
+ if (cfile)
+ cifsFileInfo_put(cfile);
+
return rc;
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index d9553c2556a2..4695433fcf39 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -619,7 +619,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
goto out;
}
- while (bytes_left >= sizeof(*p)) {
+ while (bytes_left >= (ssize_t)sizeof(*p)) {
memset(&tmp_iface, 0, sizeof(tmp_iface));
tmp_iface.speed = le64_to_cpu(p->LinkSpeed);
tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
@@ -1108,7 +1108,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
{
struct smb2_compound_vars *vars;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
struct smb_rqst *rqst;
struct kvec *rsp_iov;
__le16 *utf16_path = NULL;
@@ -1124,6 +1124,13 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
struct smb2_file_full_ea_info *ea = NULL;
struct smb2_query_info_rsp *rsp;
int rc, used_len = 0;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = CIFS_CP_CREATE_CLOSE_OP;
+ oplock = SMB2_OPLOCK_LEVEL_NONE;
+ server = cifs_pick_channel(ses);
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
@@ -1197,6 +1204,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
.disposition = FILE_OPEN,
.create_options = cifs_create_options(cifs_sb, 0),
.fid = &fid,
+ .replay = !!(retries),
};
rc = SMB2_open_init(tcon, server,
@@ -1244,6 +1252,12 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
goto sea_exit;
smb2_set_related(&rqst[2]);
+ if (retries) {
+ smb2_set_replay(server, &rqst[0]);
+ smb2_set_replay(server, &rqst[1]);
+ smb2_set_replay(server, &rqst[2]);
+ }
+
rc = compound_send_recv(xid, ses, server,
flags, 3, rqst,
resp_buftype, rsp_iov);
@@ -1260,6 +1274,11 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
kfree(vars);
out_free_path:
kfree(utf16_path);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
#endif
@@ -1484,7 +1503,7 @@ smb2_ioctl_query_info(const unsigned int xid,
struct smb_rqst *rqst;
struct kvec *rsp_iov;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
char __user *arg = (char __user *)p;
struct smb_query_info qi;
struct smb_query_info __user *pqi;
@@ -1501,6 +1520,13 @@ smb2_ioctl_query_info(const unsigned int xid,
void *data[2];
int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR;
void (*free_req1_func)(struct smb_rqst *r);
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = CIFS_CP_CREATE_CLOSE_OP;
+ oplock = SMB2_OPLOCK_LEVEL_NONE;
+ server = cifs_pick_channel(ses);
vars = kzalloc(sizeof(*vars), GFP_ATOMIC);
if (vars == NULL)
@@ -1544,6 +1570,7 @@ smb2_ioctl_query_info(const unsigned int xid,
.disposition = FILE_OPEN,
.create_options = cifs_create_options(cifs_sb, create_options),
.fid = &fid,
+ .replay = !!(retries),
};
if (qi.flags & PASSTHRU_FSCTL) {
@@ -1641,6 +1668,12 @@ smb2_ioctl_query_info(const unsigned int xid,
goto free_req_1;
smb2_set_related(&rqst[2]);
+ if (retries) {
+ smb2_set_replay(server, &rqst[0]);
+ smb2_set_replay(server, &rqst[1]);
+ smb2_set_replay(server, &rqst[2]);
+ }
+
rc = compound_send_recv(xid, ses, server,
flags, 3, rqst,
resp_buftype, rsp_iov);
@@ -1701,6 +1734,11 @@ free_output_buffer:
kfree(buffer);
free_vars:
kfree(vars);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -2227,8 +2265,14 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_open_parms oparms;
struct smb2_query_directory_rsp *qd_rsp = NULL;
struct smb2_create_rsp *op_rsp = NULL;
- struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses);
- int retry_count = 0;
+ struct TCP_Server_Info *server;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ oplock = SMB2_OPLOCK_LEVEL_NONE;
+ server = cifs_pick_channel(tcon->ses);
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
if (!utf16_path)
@@ -2253,6 +2297,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
.disposition = FILE_OPEN,
.create_options = cifs_create_options(cifs_sb, 0),
.fid = fid,
+ .replay = !!(retries),
};
rc = SMB2_open_init(tcon, server,
@@ -2278,14 +2323,15 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
smb2_set_related(&rqst[1]);
-again:
+ if (retries) {
+ smb2_set_replay(server, &rqst[0]);
+ smb2_set_replay(server, &rqst[1]);
+ }
+
rc = compound_send_recv(xid, tcon->ses, server,
flags, 2, rqst,
resp_buftype, rsp_iov);
- if (rc == -EAGAIN && retry_count++ < 10)
- goto again;
-
/* If the open failed there is nothing to do */
op_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
if (op_rsp == NULL || op_rsp->hdr.Status != STATUS_SUCCESS) {
@@ -2333,6 +2379,11 @@ again:
SMB2_query_directory_free(&rqst[1]);
free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -2458,6 +2509,22 @@ smb2_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid,
}
void
+smb2_set_replay(struct TCP_Server_Info *server, struct smb_rqst *rqst)
+{
+ struct smb2_hdr *shdr;
+
+ if (server->dialect < SMB30_PROT_ID)
+ return;
+
+ shdr = (struct smb2_hdr *)(rqst->rq_iov[0].iov_base);
+ if (shdr == NULL) {
+ cifs_dbg(FYI, "shdr NULL in smb2_set_related\n");
+ return;
+ }
+ shdr->Flags |= SMB2_FLAGS_REPLAY_OPERATION;
+}
+
+void
smb2_set_related(struct smb_rqst *rqst)
{
struct smb2_hdr *shdr;
@@ -2530,6 +2597,27 @@ smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst)
}
/*
+ * helper function for exponential backoff and check if replayable
+ */
+bool smb2_should_replay(struct cifs_tcon *tcon,
+ int *pretries,
+ int *pcur_sleep)
+{
+ if (!pretries || !pcur_sleep)
+ return false;
+
+ if (tcon->retry || (*pretries)++ < tcon->ses->server->retrans) {
+ msleep(*pcur_sleep);
+ (*pcur_sleep) = ((*pcur_sleep) << 1);
+ if ((*pcur_sleep) > CIFS_MAX_SLEEP)
+ (*pcur_sleep) = CIFS_MAX_SLEEP;
+ return true;
+ }
+
+ return false;
+}
+
+/*
* Passes the query info response back to the caller on success.
* Caller need to free this with free_rsp_buf().
*/
@@ -2542,7 +2630,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
{
struct smb2_compound_vars *vars;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
int flags = CIFS_CP_CREATE_CLOSE_OP;
struct smb_rqst *rqst;
int resp_buftype[3];
@@ -2553,6 +2641,13 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
int rc;
__le16 *utf16_path;
struct cached_fid *cfid = NULL;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = CIFS_CP_CREATE_CLOSE_OP;
+ oplock = SMB2_OPLOCK_LEVEL_NONE;
+ server = cifs_pick_channel(ses);
if (!path)
path = "";
@@ -2589,6 +2684,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
.disposition = FILE_OPEN,
.create_options = cifs_create_options(cifs_sb, 0),
.fid = &fid,
+ .replay = !!(retries),
};
rc = SMB2_open_init(tcon, server,
@@ -2633,6 +2729,14 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
goto qic_exit;
smb2_set_related(&rqst[2]);
+ if (retries) {
+ if (!cfid) {
+ smb2_set_replay(server, &rqst[0]);
+ smb2_set_replay(server, &rqst[2]);
+ }
+ smb2_set_replay(server, &rqst[1]);
+ }
+
if (cfid) {
rc = compound_send_recv(xid, ses, server,
flags, 1, &rqst[1],
@@ -2665,6 +2769,11 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
kfree(vars);
out_free_path:
kfree(utf16_path);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -3213,6 +3322,9 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
cfile->fid.volatile_fid, cfile->pid, new_size);
if (rc >= 0) {
truncate_setsize(inode, new_size);
+ netfs_resize_file(&cifsi->netfs, new_size, true);
+ if (offset < cifsi->netfs.zero_point)
+ cifsi->netfs.zero_point = offset;
fscache_resize_cookie(cifs_inode_cookie(inode), new_size);
}
}
@@ -3436,7 +3548,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, new_eof);
if (rc == 0) {
- cifsi->server_eof = new_eof;
+ netfs_resize_file(&cifsi->netfs, new_eof, true);
cifs_setsize(inode, new_eof);
cifs_truncate_page(inode->i_mapping, inode->i_size);
truncate_setsize(inode, new_eof);
@@ -3528,8 +3640,9 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
int rc;
unsigned int xid;
struct inode *inode = file_inode(file);
- struct cifsFileInfo *cfile = file->private_data;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifsFileInfo *cfile = file->private_data;
+ struct netfs_inode *ictx = &cifsi->netfs;
loff_t old_eof, new_eof;
xid = get_xid();
@@ -3549,6 +3662,7 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
goto out_2;
truncate_pagecache_range(inode, off, old_eof);
+ ictx->zero_point = old_eof;
rc = smb2_copychunk_range(xid, cfile, cfile, off + len,
old_eof - off - len, off);
@@ -3563,9 +3677,10 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
rc = 0;
- cifsi->server_eof = i_size_read(inode) - len;
- truncate_setsize(inode, cifsi->server_eof);
- fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof);
+ truncate_setsize(inode, new_eof);
+ netfs_resize_file(&cifsi->netfs, new_eof, true);
+ ictx->zero_point = new_eof;
+ fscache_resize_cookie(cifs_inode_cookie(inode), new_eof);
out_2:
filemap_invalidate_unlock(inode->i_mapping);
out:
@@ -3581,6 +3696,7 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon,
unsigned int xid;
struct cifsFileInfo *cfile = file->private_data;
struct inode *inode = file_inode(file);
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
__u64 count, old_eof, new_eof;
xid = get_xid();
@@ -3608,6 +3724,7 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon,
goto out_2;
truncate_setsize(inode, new_eof);
+ netfs_resize_file(&cifsi->netfs, i_size_read(inode), true);
fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode));
rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len);
@@ -5100,7 +5217,7 @@ static int smb2_create_reparse_symlink(const unsigned int xid,
struct inode *new;
struct kvec iov;
__le16 *path;
- char *sym;
+ char *sym, sep = CIFS_DIR_SEP(cifs_sb);
u16 len, plen;
int rc = 0;
@@ -5114,7 +5231,8 @@ static int smb2_create_reparse_symlink(const unsigned int xid,
.symlink_target = sym,
};
- path = cifs_convert_path_to_utf16(symname, cifs_sb);
+ convert_delimiter(sym, sep);
+ path = cifs_convert_path_to_utf16(sym, cifs_sb);
if (!path) {
rc = -ENOMEM;
goto out;
@@ -5137,7 +5255,10 @@ static int smb2_create_reparse_symlink(const unsigned int xid,
buf->PrintNameLength = cpu_to_le16(plen);
memcpy(buf->PathBuffer, path, plen);
buf->Flags = cpu_to_le32(*symname != '/' ? SYMLINK_FLAG_RELATIVE : 0);
+ if (*sym != sep)
+ buf->Flags = cpu_to_le32(SYMLINK_FLAG_RELATIVE);
+ convert_delimiter(sym, '/');
iov.iov_base = buf;
iov.iov_len = len;
new = smb2_get_reparse_inode(&data, inode->i_sb, xid,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 288199f0b987..608ee05491e2 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -178,6 +178,7 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses,
}
ses->chans[chan_index].server = NULL;
+ server->terminate = true;
spin_unlock(&ses->chan_lock);
/*
@@ -188,14 +189,12 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses,
*/
cifs_put_tcp_session(server, from_reconnect);
- server->terminate = true;
cifs_signal_cifsd_for_reconnect(server, false);
/* mark primary server as needing reconnect */
pserver = server->primary_server;
cifs_signal_cifsd_for_reconnect(pserver, false);
skip_terminate:
- mutex_unlock(&ses->session_mutex);
return -EHOSTDOWN;
}
@@ -400,6 +399,15 @@ skip_sess_setup:
goto out;
}
+ spin_lock(&ses->ses_lock);
+ if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) {
+ spin_unlock(&ses->ses_lock);
+ mutex_unlock(&ses->session_mutex);
+ goto skip_add_channels;
+ }
+ ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS;
+ spin_unlock(&ses->ses_lock);
+
if (!rc &&
(server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
mutex_unlock(&ses->session_mutex);
@@ -411,7 +419,7 @@ skip_sess_setup:
rc = SMB3_request_interfaces(xid, tcon, false);
free_xid(xid);
- if (rc == -EOPNOTSUPP) {
+ if (rc == -EOPNOTSUPP && ses->chan_count > 1) {
/*
* some servers like Azure SMB server do not advertise
* that multichannel has been disabled with server
@@ -429,17 +437,22 @@ skip_sess_setup:
if (ses->chan_max > ses->chan_count &&
ses->iface_count &&
!SERVER_IS_CHAN(server)) {
- if (ses->chan_count == 1)
+ if (ses->chan_count == 1) {
cifs_server_dbg(VFS, "supports multichannel now\n");
+ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
+ (SMB_INTERFACE_POLL_INTERVAL * HZ));
+ }
cifs_try_adding_channels(ses);
- queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
- (SMB_INTERFACE_POLL_INTERVAL * HZ));
}
} else {
mutex_unlock(&ses->session_mutex);
}
+
skip_add_channels:
+ spin_lock(&ses->ses_lock);
+ ses->flags &= ~CIFS_SES_FLAG_SCALE_CHANNELS;
+ spin_unlock(&ses->ses_lock);
if (smb2_command != SMB2_INTERNAL_CMD)
mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
@@ -2391,8 +2404,13 @@ create_durable_v2_buf(struct cifs_open_parms *oparms)
*/
buf->dcontext.Timeout = cpu_to_le32(oparms->tcon->handle_timeout);
buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT);
- generate_random_uuid(buf->dcontext.CreateGuid);
- memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
+
+ /* for replay, we should not overwrite the existing create guid */
+ if (!oparms->replay) {
+ generate_random_uuid(buf->dcontext.CreateGuid);
+ memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
+ } else
+ memcpy(buf->dcontext.CreateGuid, pfid->create_guid, 16);
/* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */
buf->Name[0] = 'D';
@@ -2765,7 +2783,14 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
int flags = 0;
unsigned int total_len;
__le16 *utf16_path = NULL;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ n_iov = 2;
+ server = cifs_pick_channel(ses);
cifs_dbg(FYI, "mkdir\n");
@@ -2869,6 +2894,10 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
/* no need to inc num_remote_opens because we close it just below */
trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, full_path, CREATE_NOT_FILE,
FILE_WRITE_ATTRIBUTES);
+
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
/* resource #4: response buffer */
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
@@ -2906,6 +2935,11 @@ err_free_req:
cifs_small_buf_release(req);
err_free_path:
kfree(utf16_path);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -3101,12 +3135,19 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
struct smb2_create_rsp *rsp = NULL;
struct cifs_tcon *tcon = oparms->tcon;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
struct kvec iov[SMB2_CREATE_IOV_SIZE];
struct kvec rsp_iov = {NULL, 0};
int resp_buftype = CIFS_NO_BUFFER;
int rc = 0;
int flags = 0;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ server = cifs_pick_channel(ses);
+ oparms->replay = !!(retries);
cifs_dbg(FYI, "create/open\n");
if (!ses || !server)
@@ -3128,6 +3169,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
trace_smb3_open_enter(xid, tcon->tid, tcon->ses->Suid, oparms->path,
oparms->create_options, oparms->desired_access);
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags,
&rsp_iov);
@@ -3181,6 +3225,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
creat_exit:
SMB2_open_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -3305,15 +3354,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
int resp_buftype = CIFS_NO_BUFFER;
int rc = 0;
int flags = 0;
-
- cifs_dbg(FYI, "SMB2 IOCTL\n");
-
- if (out_data != NULL)
- *out_data = NULL;
-
- /* zero out returned data len, in case of error */
- if (plen)
- *plen = 0;
+ int retries = 0, cur_sleep = 1;
if (!tcon)
return -EIO;
@@ -3322,10 +3363,23 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (!ses)
return -EIO;
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
server = cifs_pick_channel(ses);
+
if (!server)
return -EIO;
+ cifs_dbg(FYI, "SMB2 IOCTL\n");
+
+ if (out_data != NULL)
+ *out_data = NULL;
+
+ /* zero out returned data len, in case of error */
+ if (plen)
+ *plen = 0;
+
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
@@ -3340,6 +3394,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (rc)
goto ioctl_exit;
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags,
&rsp_iov);
@@ -3409,6 +3466,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
ioctl_exit:
SMB2_ioctl_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -3480,13 +3542,20 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
struct smb_rqst rqst;
struct smb2_close_rsp *rsp = NULL;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
struct kvec iov[1];
struct kvec rsp_iov;
int resp_buftype = CIFS_NO_BUFFER;
int rc = 0;
int flags = 0;
bool query_attrs = false;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ query_attrs = false;
+ server = cifs_pick_channel(ses);
cifs_dbg(FYI, "Close\n");
@@ -3512,6 +3581,9 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
goto close_exit;
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
rsp = (struct smb2_close_rsp *)rsp_iov.iov_base;
@@ -3545,6 +3617,11 @@ close_exit:
cifs_dbg(VFS, "handle cancelled close fid 0x%llx returned error %d\n",
persistent_fid, tmp_rc);
}
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -3675,12 +3752,19 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
struct TCP_Server_Info *server;
int flags = 0;
bool allocated = false;
+ int retries = 0, cur_sleep = 1;
cifs_dbg(FYI, "Query Info\n");
if (!ses)
return -EIO;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ allocated = false;
server = cifs_pick_channel(ses);
+
if (!server)
return -EIO;
@@ -3702,6 +3786,9 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
trace_smb3_query_info_enter(xid, persistent_fid, tcon->tid,
ses->Suid, info_class, (__u32)info_type);
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
@@ -3744,6 +3831,11 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
qinf_exit:
SMB2_query_info_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -3844,7 +3936,7 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
u32 *plen /* returned data len */)
{
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
struct smb_rqst rqst;
struct smb2_change_notify_rsp *smb_rsp;
struct kvec iov[1];
@@ -3852,6 +3944,12 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
int resp_buftype = CIFS_NO_BUFFER;
int flags = 0;
int rc = 0;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ server = cifs_pick_channel(ses);
cifs_dbg(FYI, "change notify\n");
if (!ses || !server)
@@ -3876,6 +3974,10 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
trace_smb3_notify_enter(xid, persistent_fid, tcon->tid, ses->Suid,
(u8)watch_tree, completion_filter);
+
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
@@ -3910,6 +4012,11 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
if (rqst.rq_iov)
cifs_small_buf_release(rqst.rq_iov[0].iov_base); /* request */
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -4152,10 +4259,16 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
struct smb_rqst rqst;
struct kvec iov[1];
struct kvec rsp_iov = {NULL, 0};
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
int resp_buftype = CIFS_NO_BUFFER;
int flags = 0;
int rc = 0;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ server = cifs_pick_channel(ses);
cifs_dbg(FYI, "flush\n");
if (!ses || !(ses->server))
@@ -4175,6 +4288,10 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
goto flush_exit;
trace_smb3_flush_enter(xid, persistent_fid, tcon->tid, ses->Suid);
+
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
@@ -4189,6 +4306,11 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
flush_exit:
SMB2_flush_free(&rqst);
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -4668,7 +4790,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
struct cifs_io_parms *io_parms = NULL;
int credit_request;
- if (!wdata->server)
+ if (!wdata->server || wdata->replay)
server = wdata->server = cifs_pick_channel(tcon->ses);
/*
@@ -4753,6 +4875,8 @@ smb2_async_writev(struct cifs_writedata *wdata,
rqst.rq_nvec = 1;
rqst.rq_iter = wdata->iter;
rqst.rq_iter_size = iov_iter_count(&rqst.rq_iter);
+ if (wdata->replay)
+ smb2_set_replay(server, &rqst);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (wdata->mr)
iov[0].iov_len += sizeof(struct smbd_buffer_descriptor_v1);
@@ -4826,18 +4950,21 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
int flags = 0;
unsigned int total_len;
struct TCP_Server_Info *server;
+ int retries = 0, cur_sleep = 1;
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
*nbytes = 0;
-
- if (n_vec < 1)
- return rc;
-
if (!io_parms->server)
io_parms->server = cifs_pick_channel(io_parms->tcon->ses);
server = io_parms->server;
if (server == NULL)
return -ECONNABORTED;
+ if (n_vec < 1)
+ return rc;
+
rc = smb2_plain_req_init(SMB2_WRITE, io_parms->tcon, server,
(void **) &req, &total_len);
if (rc)
@@ -4871,6 +4998,9 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
rqst.rq_iov = iov;
rqst.rq_nvec = n_vec + 1;
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, io_parms->tcon->ses, server,
&rqst,
&resp_buftype, flags, &rsp_iov);
@@ -4895,6 +5025,11 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
cifs_small_buf_release(req);
free_rsp_buf(resp_buftype, rsp);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(io_parms->tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -5077,6 +5212,9 @@ int SMB2_query_directory_init(const unsigned int xid,
case SMB_FIND_FILE_POSIX_INFO:
req->FileInformationClass = SMB_FIND_FILE_POSIX_INFO;
break;
+ case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
+ req->FileInformationClass = FILE_FULL_DIRECTORY_INFORMATION;
+ break;
default:
cifs_tcon_dbg(VFS, "info level %u isn't supported\n",
info_level);
@@ -5146,6 +5284,9 @@ smb2_parse_query_directory(struct cifs_tcon *tcon,
/* note that posix payload are variable size */
info_buf_size = sizeof(struct smb2_posix_info);
break;
+ case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
+ info_buf_size = sizeof(FILE_FULL_DIRECTORY_INFO);
+ break;
default:
cifs_tcon_dbg(VFS, "info level %u isn't supported\n",
srch_inf->info_level);
@@ -5206,8 +5347,14 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
struct kvec rsp_iov;
int rc = 0;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
int flags = 0;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ server = cifs_pick_channel(ses);
if (!ses || !(ses->server))
return -EIO;
@@ -5227,6 +5374,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
goto qdir_exit;
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
rsp = (struct smb2_query_directory_rsp *)rsp_iov.iov_base;
@@ -5261,6 +5411,11 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
qdir_exit:
SMB2_query_directory_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -5327,8 +5482,14 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
int resp_buftype;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
int flags = 0;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ server = cifs_pick_channel(ses);
if (!ses || !server)
return -EIO;
@@ -5356,6 +5517,8 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
+ if (retries)
+ smb2_set_replay(server, &rqst);
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags,
@@ -5371,6 +5534,11 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
free_rsp_buf(resp_buftype, rsp);
kfree(iov);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -5423,12 +5591,18 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
int rc;
struct smb2_oplock_break *req = NULL;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
int flags = CIFS_OBREAK_OP;
unsigned int total_len;
struct kvec iov[1];
struct kvec rsp_iov;
int resp_buf_type;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = CIFS_OBREAK_OP;
+ server = cifs_pick_channel(ses);
cifs_dbg(FYI, "SMB2_oplock_break\n");
rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, server,
@@ -5453,15 +5627,21 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
rqst.rq_iov = iov;
rqst.rq_nvec = 1;
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
-
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
cifs_dbg(FYI, "Send error in Oplock Break = %d\n", rc);
}
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -5547,9 +5727,15 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
int resp_buftype;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
FILE_SYSTEM_POSIX_INFO *info = NULL;
int flags = 0;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ server = cifs_pick_channel(ses);
rc = build_qfs_info_req(&iov, tcon, server,
FS_POSIX_INFORMATION,
@@ -5565,6 +5751,9 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
rqst.rq_iov = &iov;
rqst.rq_nvec = 1;
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
free_qfs_info_req(&iov);
@@ -5584,6 +5773,11 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
posix_qfsinf_exit:
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -5598,9 +5792,15 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
int resp_buftype;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
struct smb2_fs_full_size_info *info = NULL;
int flags = 0;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ server = cifs_pick_channel(ses);
rc = build_qfs_info_req(&iov, tcon, server,
FS_FULL_SIZE_INFORMATION,
@@ -5616,6 +5816,9 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
rqst.rq_iov = &iov;
rqst.rq_nvec = 1;
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
free_qfs_info_req(&iov);
@@ -5635,6 +5838,11 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
qfsinf_exit:
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -5649,9 +5857,15 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
int resp_buftype, max_len, min_len;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = cifs_pick_channel(ses);
+ struct TCP_Server_Info *server;
unsigned int rsp_len, offset;
int flags = 0;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = 0;
+ server = cifs_pick_channel(ses);
if (level == FS_DEVICE_INFORMATION) {
max_len = sizeof(FILE_SYSTEM_DEVICE_INFO);
@@ -5683,6 +5897,9 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
rqst.rq_iov = &iov;
rqst.rq_nvec = 1;
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
free_qfs_info_req(&iov);
@@ -5720,6 +5937,11 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
qfsattr_exit:
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
@@ -5737,7 +5959,13 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
unsigned int count;
int flags = CIFS_NO_RSP_BUF;
unsigned int total_len;
- struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses);
+ struct TCP_Server_Info *server;
+ int retries = 0, cur_sleep = 1;
+
+replay_again:
+ /* reinitialize for possible replay */
+ flags = CIFS_NO_RSP_BUF;
+ server = cifs_pick_channel(tcon->ses);
cifs_dbg(FYI, "smb2_lockv num lock %d\n", num_lock);
@@ -5768,6 +5996,9 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
rqst.rq_iov = iov;
rqst.rq_nvec = 2;
+ if (retries)
+ smb2_set_replay(server, &rqst);
+
rc = cifs_send_recv(xid, tcon->ses, server,
&rqst, &resp_buf_type, flags,
&rsp_iov);
@@ -5779,6 +6010,10 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
tcon->ses->Suid, rc);
}
+ if (is_replayable_error(rc) &&
+ smb2_should_replay(tcon, &retries, &cur_sleep))
+ goto replay_again;
+
return rc;
}
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index 0034b537b0b3..b3069911e9dd 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -122,6 +122,11 @@ extern unsigned long smb_rqst_len(struct TCP_Server_Info *server,
extern void smb2_set_next_command(struct cifs_tcon *tcon,
struct smb_rqst *rqst);
extern void smb2_set_related(struct smb_rqst *rqst);
+extern void smb2_set_replay(struct TCP_Server_Info *server,
+ struct smb_rqst *rqst);
+extern bool smb2_should_replay(struct cifs_tcon *tcon,
+ int *pretries,
+ int *pcur_sleep);
/*
* SMB2 Worker functions - most of protocol specific implementation details
diff --git a/fs/smb/client/smbencrypt.c b/fs/smb/client/smbencrypt.c
index f0ce26414f17..1d1ee9f18f37 100644
--- a/fs/smb/client/smbencrypt.c
+++ b/fs/smb/client/smbencrypt.c
@@ -26,13 +26,6 @@
#include "cifsproto.h"
#include "../common/md4.h"
-#ifndef false
-#define false 0
-#endif
-#ifndef true
-#define true 1
-#endif
-
/* following came from the other byteorder.h to avoid include conflicts */
#define CVAL(buf,pos) (((unsigned char *)(buf))[pos])
#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index 4f717ad7c21b..994d70193432 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -400,10 +400,17 @@ unmask:
server->conn_id, server->hostname);
}
smbd_done:
- if (rc < 0 && rc != -EINTR)
+ /*
+ * there's hardly any use for the layers above to know the
+ * actual error code here. All they should do at this point is
+ * to retry the connection and hope it goes away.
+ */
+ if (rc < 0 && rc != -EINTR && rc != -EAGAIN) {
cifs_server_dbg(VFS, "Error %d sending data on socket to server\n",
rc);
- else if (rc > 0)
+ rc = -ECONNABORTED;
+ cifs_signal_cifsd_for_reconnect(server, false);
+ } else if (rc > 0)
rc = 0;
out:
cifs_in_send_dec(server);
@@ -428,8 +435,8 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
if (!(flags & CIFS_TRANSFORM_REQ))
return __smb_send_rqst(server, num_rqst, rqst);
- if (num_rqst > MAX_COMPOUND - 1)
- return -ENOMEM;
+ if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1))
+ return -EIO;
if (!server->ops->init_transform_rq) {
cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n");
@@ -1026,6 +1033,9 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
if (!server || server->terminate)
continue;
+ if (CIFS_CHAN_NEEDS_RECONNECT(ses, i))
+ continue;
+
/*
* strictly speaking, we should pick up req_lock to read
* server->in_flight. But it shouldn't matter much here if we
diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index b7521e41402e..0ebf91ffa236 100644
--- a/fs/smb/server/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
@@ -304,7 +304,8 @@ enum ksmbd_event {
KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST,
KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE = 15,
- KSMBD_EVENT_MAX
+ __KSMBD_EVENT_MAX,
+ KSMBD_EVENT_MAX = __KSMBD_EVENT_MAX - 1
};
/*
diff --git a/fs/smb/server/misc.c b/fs/smb/server/misc.c
index 9e8afaa686e3..1a5faa6f6e7b 100644
--- a/fs/smb/server/misc.c
+++ b/fs/smb/server/misc.c
@@ -261,6 +261,7 @@ out_ascii:
/**
* ksmbd_extract_sharename() - get share name from tree connect request
+ * @um: pointer to a unicode_map structure for character encoding handling
* @treename: buffer containing tree name and share name
*
* Return: share name on success, otherwise error
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index ba7a72a6a4f4..089527a8b4ff 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -6173,8 +6173,10 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
offsetof(struct smb2_read_rsp, Buffer),
aux_payload_buf, nbytes);
- if (err)
+ if (err) {
+ kvfree(aux_payload_buf);
goto out;
+ }
kvfree(rpc_resp);
} else {
err = ksmbd_iov_pin_rsp(work, (void *)rsp,
@@ -6384,8 +6386,10 @@ int smb2_read(struct ksmbd_work *work)
err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
offsetof(struct smb2_read_rsp, Buffer),
aux_payload_buf, nbytes);
- if (err)
+ if (err) {
+ kvfree(aux_payload_buf);
goto out;
+ }
ksmbd_fd_put(work, fp);
return 0;
@@ -6760,10 +6764,10 @@ struct file_lock *smb_flock_init(struct file *f)
locks_init_lock(fl);
- fl->fl_owner = f;
- fl->fl_pid = current->tgid;
- fl->fl_file = f;
- fl->fl_flags = FL_POSIX;
+ fl->c.flc_owner = f;
+ fl->c.flc_pid = current->tgid;
+ fl->c.flc_file = f;
+ fl->c.flc_flags = FL_POSIX;
fl->fl_ops = NULL;
fl->fl_lmops = NULL;
@@ -6780,30 +6784,30 @@ static int smb2_set_flock_flags(struct file_lock *flock, int flags)
case SMB2_LOCKFLAG_SHARED:
ksmbd_debug(SMB, "received shared request\n");
cmd = F_SETLKW;
- flock->fl_type = F_RDLCK;
- flock->fl_flags |= FL_SLEEP;
+ flock->c.flc_type = F_RDLCK;
+ flock->c.flc_flags |= FL_SLEEP;
break;
case SMB2_LOCKFLAG_EXCLUSIVE:
ksmbd_debug(SMB, "received exclusive request\n");
cmd = F_SETLKW;
- flock->fl_type = F_WRLCK;
- flock->fl_flags |= FL_SLEEP;
+ flock->c.flc_type = F_WRLCK;
+ flock->c.flc_flags |= FL_SLEEP;
break;
case SMB2_LOCKFLAG_SHARED | SMB2_LOCKFLAG_FAIL_IMMEDIATELY:
ksmbd_debug(SMB,
"received shared & fail immediately request\n");
cmd = F_SETLK;
- flock->fl_type = F_RDLCK;
+ flock->c.flc_type = F_RDLCK;
break;
case SMB2_LOCKFLAG_EXCLUSIVE | SMB2_LOCKFLAG_FAIL_IMMEDIATELY:
ksmbd_debug(SMB,
"received exclusive & fail immediately request\n");
cmd = F_SETLK;
- flock->fl_type = F_WRLCK;
+ flock->c.flc_type = F_WRLCK;
break;
case SMB2_LOCKFLAG_UNLOCK:
ksmbd_debug(SMB, "received unlock request\n");
- flock->fl_type = F_UNLCK;
+ flock->c.flc_type = F_UNLCK;
cmd = F_SETLK;
break;
}
@@ -6841,13 +6845,13 @@ static void smb2_remove_blocked_lock(void **argv)
struct file_lock *flock = (struct file_lock *)argv[0];
ksmbd_vfs_posix_lock_unblock(flock);
- wake_up(&flock->fl_wait);
+ locks_wake_up(flock);
}
static inline bool lock_defer_pending(struct file_lock *fl)
{
/* check pending lock waiters */
- return waitqueue_active(&fl->fl_wait);
+ return waitqueue_active(&fl->c.flc_wait);
}
/**
@@ -6938,8 +6942,8 @@ int smb2_lock(struct ksmbd_work *work)
list_for_each_entry(cmp_lock, &lock_list, llist) {
if (cmp_lock->fl->fl_start <= flock->fl_start &&
cmp_lock->fl->fl_end >= flock->fl_end) {
- if (cmp_lock->fl->fl_type != F_UNLCK &&
- flock->fl_type != F_UNLCK) {
+ if (cmp_lock->fl->c.flc_type != F_UNLCK &&
+ flock->c.flc_type != F_UNLCK) {
pr_err("conflict two locks in one request\n");
err = -EINVAL;
locks_free_lock(flock);
@@ -6987,12 +6991,12 @@ int smb2_lock(struct ksmbd_work *work)
list_for_each_entry(conn, &conn_list, conns_list) {
spin_lock(&conn->llist_lock);
list_for_each_entry_safe(cmp_lock, tmp2, &conn->lock_list, clist) {
- if (file_inode(cmp_lock->fl->fl_file) !=
- file_inode(smb_lock->fl->fl_file))
+ if (file_inode(cmp_lock->fl->c.flc_file) !=
+ file_inode(smb_lock->fl->c.flc_file))
continue;
- if (smb_lock->fl->fl_type == F_UNLCK) {
- if (cmp_lock->fl->fl_file == smb_lock->fl->fl_file &&
+ if (lock_is_unlock(smb_lock->fl)) {
+ if (cmp_lock->fl->c.flc_file == smb_lock->fl->c.flc_file &&
cmp_lock->start == smb_lock->start &&
cmp_lock->end == smb_lock->end &&
!lock_defer_pending(cmp_lock->fl)) {
@@ -7009,7 +7013,7 @@ int smb2_lock(struct ksmbd_work *work)
continue;
}
- if (cmp_lock->fl->fl_file == smb_lock->fl->fl_file) {
+ if (cmp_lock->fl->c.flc_file == smb_lock->fl->c.flc_file) {
if (smb_lock->flags & SMB2_LOCKFLAG_SHARED)
continue;
} else {
@@ -7051,7 +7055,7 @@ int smb2_lock(struct ksmbd_work *work)
}
up_read(&conn_list_lock);
out_check_cl:
- if (smb_lock->fl->fl_type == F_UNLCK && nolock) {
+ if (lock_is_unlock(smb_lock->fl) && nolock) {
pr_err("Try to unlock nolocked range\n");
rsp->hdr.Status = STATUS_RANGE_NOT_LOCKED;
goto out;
@@ -7175,7 +7179,7 @@ out:
struct file_lock *rlock = NULL;
rlock = smb_flock_init(filp);
- rlock->fl_type = F_UNLCK;
+ rlock->c.flc_type = F_UNLCK;
rlock->fl_start = smb_lock->start;
rlock->fl_end = smb_lock->end;
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index b49d47bdafc9..f29bb03f0dc4 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -74,7 +74,7 @@ static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info)
static int handle_generic_event(struct sk_buff *skb, struct genl_info *info);
static int ksmbd_ipc_heartbeat_request(void);
-static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = {
+static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX + 1] = {
[KSMBD_EVENT_UNSPEC] = {
.len = 0,
},
@@ -403,7 +403,7 @@ static int handle_generic_event(struct sk_buff *skb, struct genl_info *info)
return -EPERM;
#endif
- if (type >= KSMBD_EVENT_MAX) {
+ if (type > KSMBD_EVENT_MAX) {
WARN_ON(1);
return -EINVAL;
}
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index 9d4222154dcc..002a3f0dc7c5 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -365,6 +365,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
* @t: TCP transport instance
* @buf: buffer to store read data from socket
* @to_read: number of bytes to read from socket
+ * @max_retries: number of retries if reading from socket fails
*
* Return: on success return number of bytes read from socket,
* otherwise return error number
@@ -416,6 +417,7 @@ static void tcp_destroy_socket(struct socket *ksmbd_socket)
/**
* create_socket - create socket for ksmbd/0
+ * @iface: interface to bind the created socket to
*
* Return: 0 on success, error number otherwise
*/
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index a6961bfe3e13..c487e834331a 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -337,18 +337,18 @@ static int check_lock_range(struct file *filp, loff_t start, loff_t end,
return 0;
spin_lock(&ctx->flc_lock);
- list_for_each_entry(flock, &ctx->flc_posix, fl_list) {
+ for_each_file_lock(flock, &ctx->flc_posix) {
/* check conflict locks */
if (flock->fl_end >= start && end >= flock->fl_start) {
- if (flock->fl_type == F_RDLCK) {
+ if (lock_is_read(flock)) {
if (type == WRITE) {
pr_err("not allow write by shared lock\n");
error = 1;
goto out;
}
- } else if (flock->fl_type == F_WRLCK) {
+ } else if (lock_is_write(flock)) {
/* check owner in lock */
- if (flock->fl_file != filp) {
+ if (flock->c.flc_file != filp) {
error = 1;
pr_err("not allow rw access by exclusive lock from other opens\n");
goto out;
@@ -1837,13 +1837,13 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
void ksmbd_vfs_posix_lock_wait(struct file_lock *flock)
{
- wait_event(flock->fl_wait, !flock->fl_blocker);
+ wait_event(flock->c.flc_wait, !flock->c.flc_blocker);
}
int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout)
{
- return wait_event_interruptible_timeout(flock->fl_wait,
- !flock->fl_blocker,
+ return wait_event_interruptible_timeout(flock->c.flc_wait,
+ !flock->c.flc_blocker,
timeout);
}
diff --git a/fs/super.c b/fs/super.c
index d35e85295489..ee05ab6b37e7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -274,9 +274,10 @@ static void destroy_super_work(struct work_struct *work)
{
struct super_block *s = container_of(work, struct super_block,
destroy_work);
- int i;
-
- for (i = 0; i < SB_FREEZE_LEVELS; i++)
+ security_sb_free(s);
+ put_user_ns(s->s_user_ns);
+ kfree(s->s_subtype);
+ for (int i = 0; i < SB_FREEZE_LEVELS; i++)
percpu_free_rwsem(&s->s_writers.rw_sem[i]);
kfree(s);
}
@@ -296,9 +297,6 @@ static void destroy_unused_super(struct super_block *s)
super_unlock_excl(s);
list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru);
- security_sb_free(s);
- put_user_ns(s->s_user_ns);
- kfree(s->s_subtype);
shrinker_free(s->s_shrink);
/* no delays needed */
destroy_super_work(&s->destroy_work);
@@ -409,9 +407,6 @@ static void __put_super(struct super_block *s)
WARN_ON(s->s_dentry_lru.node);
WARN_ON(s->s_inode_lru.node);
WARN_ON(!list_empty(&s->s_mounts));
- security_sb_free(s);
- put_user_ns(s->s_user_ns);
- kfree(s->s_subtype);
call_rcu(&s->rcu, destroy_super_rcu);
}
}
@@ -1532,16 +1527,16 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
struct fs_context *fc)
{
blk_mode_t mode = sb_open_mode(sb_flags);
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct block_device *bdev;
- bdev_handle = bdev_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
- if (IS_ERR(bdev_handle)) {
+ bdev_file = bdev_file_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
+ if (IS_ERR(bdev_file)) {
if (fc)
errorf(fc, "%s: Can't open blockdev", fc->source);
- return PTR_ERR(bdev_handle);
+ return PTR_ERR(bdev_file);
}
- bdev = bdev_handle->bdev;
+ bdev = file_bdev(bdev_file);
/*
* This really should be in blkdev_get_by_dev, but right now can't due
@@ -1549,7 +1544,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
* writable from userspace even for a read-only block device.
*/
if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
- bdev_release(bdev_handle);
+ fput(bdev_file);
return -EACCES;
}
@@ -1560,11 +1555,11 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
if (fc)
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
- bdev_release(bdev_handle);
+ fput(bdev_file);
return -EBUSY;
}
spin_lock(&sb_lock);
- sb->s_bdev_handle = bdev_handle;
+ sb->s_bdev_file = bdev_file;
sb->s_bdev = bdev;
sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
if (bdev_stable_writes(bdev))
@@ -1680,7 +1675,7 @@ void kill_block_super(struct super_block *sb)
generic_shutdown_super(sb);
if (bdev) {
sync_blockdev(bdev);
- bdev_release(sb->s_bdev_handle);
+ fput(sb->s_bdev_file);
}
}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 5a915b2e68f5..76bc2d5e75a9 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -336,7 +336,7 @@ int __init sysv_init_icache(void)
{
sysv_inode_cachep = kmem_cache_create("sysv_inode_cache",
sizeof(struct sysv_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
+ SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT,
init_once);
if (!sysv_inode_cachep)
return -ENOMEM;
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 410ab2a44d2f..19bcb51a2203 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -83,9 +83,6 @@ static inline sysv_zone_t *block_end(struct buffer_head *bh)
return (sysv_zone_t*)((char*)bh->b_data + bh->b_size);
}
-/*
- * Requires read_lock(&pointers_lock) or write_lock(&pointers_lock)
- */
static Indirect *get_branch(struct inode *inode,
int depth,
int offsets[],
@@ -105,15 +102,18 @@ static Indirect *get_branch(struct inode *inode,
bh = sb_bread(sb, block);
if (!bh)
goto failure;
+ read_lock(&pointers_lock);
if (!verify_chain(chain, p))
goto changed;
add_chain(++p, bh, (sysv_zone_t*)bh->b_data + *++offsets);
+ read_unlock(&pointers_lock);
if (!p->key)
goto no_block;
}
return NULL;
changed:
+ read_unlock(&pointers_lock);
brelse(bh);
*err = -EAGAIN;
goto no_block;
@@ -219,9 +219,7 @@ static int get_block(struct inode *inode, sector_t iblock, struct buffer_head *b
goto out;
reread:
- read_lock(&pointers_lock);
partial = get_branch(inode, depth, offsets, chain, &err);
- read_unlock(&pointers_lock);
/* Simplest case - block found, no allocation needed */
if (!partial) {
@@ -291,9 +289,9 @@ static Indirect *find_shared(struct inode *inode,
*top = 0;
for (k = depth; k > 1 && !offsets[k-1]; k--)
;
+ partial = get_branch(inode, k, offsets, chain, &err);
write_lock(&pointers_lock);
- partial = get_branch(inode, k, offsets, chain, &err);
if (!partial)
partial = chain + k-1;
/*
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 6795fda2af19..110e8a272189 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -34,7 +34,15 @@ static DEFINE_MUTEX(eventfs_mutex);
/* Choose something "unique" ;-) */
#define EVENTFS_FILE_INODE_INO 0x12c4e37
-#define EVENTFS_DIR_INODE_INO 0x134b2f5
+
+/* Just try to make something consistent and unique */
+static int eventfs_dir_ino(struct eventfs_inode *ei)
+{
+ if (!ei->ino)
+ ei->ino = get_next_ino();
+
+ return ei->ino;
+}
/*
* The eventfs_inode (ei) itself is protected by SRCU. It is released from
@@ -54,6 +62,46 @@ enum {
#define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1)
+/*
+ * eventfs_inode reference count management.
+ *
+ * NOTE! We count only references from dentries, in the
+ * form 'dentry->d_fsdata'. There are also references from
+ * directory inodes ('ti->private'), but the dentry reference
+ * count is always a superset of the inode reference count.
+ */
+static void release_ei(struct kref *ref)
+{
+ struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref);
+
+ WARN_ON_ONCE(!ei->is_freed);
+
+ kfree(ei->entry_attrs);
+ kfree_const(ei->name);
+ kfree_rcu(ei, rcu);
+}
+
+static inline void put_ei(struct eventfs_inode *ei)
+{
+ if (ei)
+ kref_put(&ei->kref, release_ei);
+}
+
+static inline void free_ei(struct eventfs_inode *ei)
+{
+ if (ei) {
+ ei->is_freed = 1;
+ put_ei(ei);
+ }
+}
+
+static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei)
+{
+ if (ei)
+ kref_get(&ei->kref);
+ return ei;
+}
+
static struct dentry *eventfs_root_lookup(struct inode *dir,
struct dentry *dentry,
unsigned int flags);
@@ -148,33 +196,30 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
return ret;
}
-static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry)
+static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb)
{
- struct inode *inode;
+ struct inode *root;
/* Only update if the "events" was on the top level */
if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL))
return;
/* Get the tracefs root inode. */
- inode = d_inode(dentry->d_sb->s_root);
- ei->attr.uid = inode->i_uid;
- ei->attr.gid = inode->i_gid;
+ root = d_inode(sb->s_root);
+ ei->attr.uid = root->i_uid;
+ ei->attr.gid = root->i_gid;
}
static void set_top_events_ownership(struct inode *inode)
{
struct tracefs_inode *ti = get_tracefs(inode);
struct eventfs_inode *ei = ti->private;
- struct dentry *dentry;
/* The top events directory doesn't get automatically updated */
if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL))
return;
- dentry = ei->dentry;
-
- update_top_events_attr(ei, dentry);
+ update_top_events_attr(ei, inode->i_sb);
if (!(ei->attr.mode & EVENTFS_SAVE_UID))
inode->i_uid = ei->attr.uid;
@@ -225,10 +270,11 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
{
struct eventfs_inode *ei;
- mutex_lock(&eventfs_mutex);
do {
- /* The parent always has an ei, except for events itself */
- ei = dentry->d_parent->d_fsdata;
+ // The parent is stable because we do not do renames
+ dentry = dentry->d_parent;
+ // ... and directories always have d_fsdata
+ ei = dentry->d_fsdata;
/*
* If the ei is being freed, the ownership of the children
@@ -238,12 +284,10 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
ei = NULL;
break;
}
-
- dentry = ei->dentry;
+ // Walk upwards until you find the events inode
} while (!ei->is_events);
- mutex_unlock(&eventfs_mutex);
- update_top_events_attr(ei, dentry);
+ update_top_events_attr(ei, dentry->d_sb);
return ei;
}
@@ -273,50 +317,11 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode,
inode->i_gid = attr->gid;
}
-static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level)
-{
- struct eventfs_inode *ei_child;
-
- /* at most we have events/system/event */
- if (WARN_ON_ONCE(level > 3))
- return;
-
- ei->attr.gid = gid;
-
- if (ei->entry_attrs) {
- for (int i = 0; i < ei->nr_entries; i++) {
- ei->entry_attrs[i].gid = gid;
- }
- }
-
- /*
- * Only eventfs_inode with dentries are updated, make sure
- * all eventfs_inodes are updated. If one of the children
- * do not have a dentry, this function must traverse it.
- */
- list_for_each_entry_srcu(ei_child, &ei->children, list,
- srcu_read_lock_held(&eventfs_srcu)) {
- if (!ei_child->dentry)
- update_gid(ei_child, gid, level + 1);
- }
-}
-
-void eventfs_update_gid(struct dentry *dentry, kgid_t gid)
-{
- struct eventfs_inode *ei = dentry->d_fsdata;
- int idx;
-
- idx = srcu_read_lock(&eventfs_srcu);
- update_gid(ei, gid, 0);
- srcu_read_unlock(&eventfs_srcu, idx);
-}
-
/**
- * create_file - create a file in the tracefs filesystem
- * @name: the name of the file to create.
+ * lookup_file - look up a file in the tracefs filesystem
+ * @dentry: the dentry to look up
* @mode: the permission that the file should have.
* @attr: saved attributes changed by user
- * @parent: parent dentry for this file.
* @data: something that the caller will want to get to later on.
* @fop: struct file_operations that should be used for this file.
*
@@ -324,30 +329,25 @@ void eventfs_update_gid(struct dentry *dentry, kgid_t gid)
* directory. The inode.i_private pointer will point to @data in the open()
* call.
*/
-static struct dentry *create_file(const char *name, umode_t mode,
+static struct dentry *lookup_file(struct eventfs_inode *parent_ei,
+ struct dentry *dentry,
+ umode_t mode,
struct eventfs_attr *attr,
- struct dentry *parent, void *data,
+ void *data,
const struct file_operations *fop)
{
struct tracefs_inode *ti;
- struct dentry *dentry;
struct inode *inode;
if (!(mode & S_IFMT))
mode |= S_IFREG;
if (WARN_ON_ONCE(!S_ISREG(mode)))
- return NULL;
-
- WARN_ON_ONCE(!parent);
- dentry = eventfs_start_creating(name, parent);
-
- if (IS_ERR(dentry))
- return dentry;
+ return ERR_PTR(-EIO);
inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
- return eventfs_failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);
/* If the user updated the directory's attributes, use them */
update_inode_attr(dentry, inode, attr, mode);
@@ -361,32 +361,31 @@ static struct dentry *create_file(const char *name, umode_t mode,
ti = get_tracefs(inode);
ti->flags |= TRACEFS_EVENT_INODE;
- d_instantiate(dentry, inode);
- fsnotify_create(dentry->d_parent->d_inode, dentry);
- return eventfs_end_creating(dentry);
+
+ // Files have their parent's ei as their fsdata
+ dentry->d_fsdata = get_ei(parent_ei);
+
+ d_add(dentry, inode);
+ return NULL;
};
/**
- * create_dir - create a dir in the tracefs filesystem
+ * lookup_dir_entry - look up a dir in the tracefs filesystem
+ * @dentry: the directory to look up
* @ei: the eventfs_inode that represents the directory to create
- * @parent: parent dentry for this file.
*
- * This function will create a dentry for a directory represented by
+ * This function will look up a dentry for a directory represented by
* a eventfs_inode.
*/
-static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
+static struct dentry *lookup_dir_entry(struct dentry *dentry,
+ struct eventfs_inode *pei, struct eventfs_inode *ei)
{
struct tracefs_inode *ti;
- struct dentry *dentry;
struct inode *inode;
- dentry = eventfs_start_creating(ei->name, parent);
- if (IS_ERR(dentry))
- return dentry;
-
inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
- return eventfs_failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);
/* If the user updated the directory's attributes, use them */
update_inode_attr(dentry, inode, &ei->attr,
@@ -396,68 +395,50 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent
inode->i_fop = &eventfs_file_operations;
/* All directories will have the same inode number */
- inode->i_ino = EVENTFS_DIR_INODE_INO;
+ inode->i_ino = eventfs_dir_ino(ei);
ti = get_tracefs(inode);
ti->flags |= TRACEFS_EVENT_INODE;
+ /* Only directories have ti->private set to an ei, not files */
+ ti->private = ei;
- inc_nlink(inode);
- d_instantiate(dentry, inode);
- inc_nlink(dentry->d_parent->d_inode);
- fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
- return eventfs_end_creating(dentry);
+ dentry->d_fsdata = get_ei(ei);
+
+ d_add(dentry, inode);
+ return NULL;
}
-static void free_ei(struct eventfs_inode *ei)
+static inline struct eventfs_inode *alloc_ei(const char *name)
{
- kfree_const(ei->name);
- kfree(ei->d_children);
- kfree(ei->entry_attrs);
- kfree(ei);
+ struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+
+ if (!ei)
+ return NULL;
+
+ ei->name = kstrdup_const(name, GFP_KERNEL);
+ if (!ei->name) {
+ kfree(ei);
+ return NULL;
+ }
+ kref_init(&ei->kref);
+ return ei;
}
/**
- * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
- * @ti: the tracefs_inode of the dentry
+ * eventfs_d_release - dentry is going away
* @dentry: dentry which has the reference to remove.
*
* Remove the association between a dentry from an eventfs_inode.
*/
-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+void eventfs_d_release(struct dentry *dentry)
{
- struct eventfs_inode *ei;
- int i;
-
- mutex_lock(&eventfs_mutex);
-
- ei = dentry->d_fsdata;
- if (!ei)
- goto out;
-
- /* This could belong to one of the files of the ei */
- if (ei->dentry != dentry) {
- for (i = 0; i < ei->nr_entries; i++) {
- if (ei->d_children[i] == dentry)
- break;
- }
- if (WARN_ON_ONCE(i == ei->nr_entries))
- goto out;
- ei->d_children[i] = NULL;
- } else if (ei->is_freed) {
- free_ei(ei);
- } else {
- ei->dentry = NULL;
- }
-
- dentry->d_fsdata = NULL;
- out:
- mutex_unlock(&eventfs_mutex);
+ put_ei(dentry->d_fsdata);
}
/**
- * create_file_dentry - create a dentry for a file of an eventfs_inode
+ * lookup_file_dentry - create a dentry for a file of an eventfs_inode
* @ei: the eventfs_inode that the file will be created under
- * @idx: the index into the d_children[] of the @ei
+ * @idx: the index into the entry_attrs[] of the @ei
* @parent: The parent dentry of the created file.
* @name: The name of the file to create
* @mode: The mode of the file.
@@ -468,163 +449,17 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
* address located at @e_dentry.
*/
static struct dentry *
-create_file_dentry(struct eventfs_inode *ei, int idx,
- struct dentry *parent, const char *name, umode_t mode, void *data,
+lookup_file_dentry(struct dentry *dentry,
+ struct eventfs_inode *ei, int idx,
+ umode_t mode, void *data,
const struct file_operations *fops)
{
struct eventfs_attr *attr = NULL;
- struct dentry **e_dentry = &ei->d_children[idx];
- struct dentry *dentry;
-
- WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
- mutex_lock(&eventfs_mutex);
- if (ei->is_freed) {
- mutex_unlock(&eventfs_mutex);
- return NULL;
- }
- /* If the e_dentry already has a dentry, use it */
- if (*e_dentry) {
- dget(*e_dentry);
- mutex_unlock(&eventfs_mutex);
- return *e_dentry;
- }
-
- /* ei->entry_attrs are protected by SRCU */
if (ei->entry_attrs)
attr = &ei->entry_attrs[idx];
- mutex_unlock(&eventfs_mutex);
-
- dentry = create_file(name, mode, attr, parent, data, fops);
-
- mutex_lock(&eventfs_mutex);
-
- if (IS_ERR_OR_NULL(dentry)) {
- /*
- * When the mutex was released, something else could have
- * created the dentry for this e_dentry. In which case
- * use that one.
- *
- * If ei->is_freed is set, the e_dentry is currently on its
- * way to being freed, don't return it. If e_dentry is NULL
- * it means it was already freed.
- */
- if (ei->is_freed) {
- dentry = NULL;
- } else {
- dentry = *e_dentry;
- dget(dentry);
- }
- mutex_unlock(&eventfs_mutex);
- return dentry;
- }
-
- if (!*e_dentry && !ei->is_freed) {
- *e_dentry = dentry;
- dentry->d_fsdata = ei;
- } else {
- /*
- * Should never happen unless we get here due to being freed.
- * Otherwise it means two dentries exist with the same name.
- */
- WARN_ON_ONCE(!ei->is_freed);
- dentry = NULL;
- }
- mutex_unlock(&eventfs_mutex);
-
- return dentry;
-}
-
-/**
- * eventfs_post_create_dir - post create dir routine
- * @ei: eventfs_inode of recently created dir
- *
- * Map the meta-data of files within an eventfs dir to their parent dentry
- */
-static void eventfs_post_create_dir(struct eventfs_inode *ei)
-{
- struct eventfs_inode *ei_child;
- struct tracefs_inode *ti;
-
- lockdep_assert_held(&eventfs_mutex);
-
- /* srcu lock already held */
- /* fill parent-child relation */
- list_for_each_entry_srcu(ei_child, &ei->children, list,
- srcu_read_lock_held(&eventfs_srcu)) {
- ei_child->d_parent = ei->dentry;
- }
-
- ti = get_tracefs(ei->dentry->d_inode);
- ti->private = ei;
-}
-
-/**
- * create_dir_dentry - Create a directory dentry for the eventfs_inode
- * @pei: The eventfs_inode parent of ei.
- * @ei: The eventfs_inode to create the directory for
- * @parent: The dentry of the parent of this directory
- *
- * This creates and attaches a directory dentry to the eventfs_inode @ei.
- */
-static struct dentry *
-create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
- struct dentry *parent)
-{
- struct dentry *dentry = NULL;
-
- WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
-
- mutex_lock(&eventfs_mutex);
- if (pei->is_freed || ei->is_freed) {
- mutex_unlock(&eventfs_mutex);
- return NULL;
- }
- if (ei->dentry) {
- /* If the eventfs_inode already has a dentry, use it */
- dentry = ei->dentry;
- dget(dentry);
- mutex_unlock(&eventfs_mutex);
- return dentry;
- }
- mutex_unlock(&eventfs_mutex);
-
- dentry = create_dir(ei, parent);
-
- mutex_lock(&eventfs_mutex);
-
- if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
- /*
- * When the mutex was released, something else could have
- * created the dentry for this e_dentry. In which case
- * use that one.
- *
- * If ei->is_freed is set, the e_dentry is currently on its
- * way to being freed.
- */
- dentry = ei->dentry;
- if (dentry)
- dget(dentry);
- mutex_unlock(&eventfs_mutex);
- return dentry;
- }
-
- if (!ei->dentry && !ei->is_freed) {
- ei->dentry = dentry;
- eventfs_post_create_dir(ei);
- dentry->d_fsdata = ei;
- } else {
- /*
- * Should never happen unless we get here due to being freed.
- * Otherwise it means two dentries exist with the same name.
- */
- WARN_ON_ONCE(!ei->is_freed);
- dentry = NULL;
- }
- mutex_unlock(&eventfs_mutex);
-
- return dentry;
+ return lookup_file(ei, dentry, mode, attr, data, fops);
}
/**
@@ -641,79 +476,50 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
struct dentry *dentry,
unsigned int flags)
{
- const struct file_operations *fops;
- const struct eventfs_entry *entry;
struct eventfs_inode *ei_child;
struct tracefs_inode *ti;
struct eventfs_inode *ei;
- struct dentry *ei_dentry = NULL;
- struct dentry *ret = NULL;
- struct dentry *d;
const char *name = dentry->d_name.name;
- umode_t mode;
- void *data;
- int idx;
- int i;
- int r;
+ struct dentry *result = NULL;
ti = get_tracefs(dir);
if (!(ti->flags & TRACEFS_EVENT_INODE))
- return NULL;
-
- /* Grab srcu to prevent the ei from going away */
- idx = srcu_read_lock(&eventfs_srcu);
+ return ERR_PTR(-EIO);
- /*
- * Grab the eventfs_mutex to consistent value from ti->private.
- * This s
- */
mutex_lock(&eventfs_mutex);
- ei = READ_ONCE(ti->private);
- if (ei && !ei->is_freed)
- ei_dentry = READ_ONCE(ei->dentry);
- mutex_unlock(&eventfs_mutex);
- if (!ei || !ei_dentry)
+ ei = ti->private;
+ if (!ei || ei->is_freed)
goto out;
- data = ei->data;
-
- list_for_each_entry_srcu(ei_child, &ei->children, list,
- srcu_read_lock_held(&eventfs_srcu)) {
+ list_for_each_entry(ei_child, &ei->children, list) {
if (strcmp(ei_child->name, name) != 0)
continue;
- ret = simple_lookup(dir, dentry, flags);
- if (IS_ERR(ret))
+ if (ei_child->is_freed)
goto out;
- d = create_dir_dentry(ei, ei_child, ei_dentry);
- dput(d);
+ result = lookup_dir_entry(dentry, ei, ei_child);
goto out;
}
- for (i = 0; i < ei->nr_entries; i++) {
- entry = &ei->entries[i];
- if (strcmp(name, entry->name) == 0) {
- void *cdata = data;
- mutex_lock(&eventfs_mutex);
- /* If ei->is_freed, then the event itself may be too */
- if (!ei->is_freed)
- r = entry->callback(name, &mode, &cdata, &fops);
- else
- r = -1;
- mutex_unlock(&eventfs_mutex);
- if (r <= 0)
- continue;
- ret = simple_lookup(dir, dentry, flags);
- if (IS_ERR(ret))
- goto out;
- d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
- dput(d);
- break;
- }
+ for (int i = 0; i < ei->nr_entries; i++) {
+ void *data;
+ umode_t mode;
+ const struct file_operations *fops;
+ const struct eventfs_entry *entry = &ei->entries[i];
+
+ if (strcmp(name, entry->name) != 0)
+ continue;
+
+ data = ei->data;
+ if (entry->callback(name, &mode, &data, &fops) <= 0)
+ goto out;
+
+ result = lookup_file_dentry(dentry, ei, i, mode, data, fops);
+ goto out;
}
out:
- srcu_read_unlock(&eventfs_srcu, idx);
- return ret;
+ mutex_unlock(&eventfs_mutex);
+ return result;
}
/*
@@ -802,7 +608,7 @@ static int eventfs_iterate(struct file *file, struct dir_context *ctx)
name = ei_child->name;
- ino = EVENTFS_DIR_INODE_INO;
+ ino = eventfs_dir_ino(ei_child);
if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
goto out_dec;
@@ -863,25 +669,10 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode
if (!parent)
return ERR_PTR(-EINVAL);
- ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+ ei = alloc_ei(name);
if (!ei)
return ERR_PTR(-ENOMEM);
- ei->name = kstrdup_const(name, GFP_KERNEL);
- if (!ei->name) {
- kfree(ei);
- return ERR_PTR(-ENOMEM);
- }
-
- if (size) {
- ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
- if (!ei->d_children) {
- kfree_const(ei->name);
- kfree(ei);
- return ERR_PTR(-ENOMEM);
- }
- }
-
ei->entries = entries;
ei->nr_entries = size;
ei->data = data;
@@ -889,10 +680,8 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode
INIT_LIST_HEAD(&ei->list);
mutex_lock(&eventfs_mutex);
- if (!parent->is_freed) {
+ if (!parent->is_freed)
list_add_tail(&ei->list, &parent->children);
- ei->d_parent = parent->dentry;
- }
mutex_unlock(&eventfs_mutex);
/* Was the parent freed? */
@@ -932,28 +721,20 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
if (IS_ERR(dentry))
return ERR_CAST(dentry);
- ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+ ei = alloc_ei(name);
if (!ei)
- goto fail_ei;
+ goto fail;
inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
goto fail;
- if (size) {
- ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
- if (!ei->d_children)
- goto fail;
- }
-
- ei->dentry = dentry;
+ // Note: we have a ref to the dentry from tracefs_start_creating()
+ ei->events_dir = dentry;
ei->entries = entries;
ei->nr_entries = size;
ei->is_events = 1;
ei->data = data;
- ei->name = kstrdup_const(name, GFP_KERNEL);
- if (!ei->name)
- goto fail;
/* Save the ownership of this directory */
uid = d_inode(dentry->d_parent)->i_uid;
@@ -984,11 +765,19 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
inode->i_op = &eventfs_root_dir_inode_operations;
inode->i_fop = &eventfs_file_operations;
- dentry->d_fsdata = ei;
+ dentry->d_fsdata = get_ei(ei);
- /* directory inodes start off with i_nlink == 2 (for "." entry) */
- inc_nlink(inode);
+ /*
+ * Keep all eventfs directories with i_nlink == 1.
+ * Due to the dynamic nature of the dentry creations and not
+ * wanting to add a pointer to the parent eventfs_inode in the
+ * eventfs_inode structure, keeping the i_nlink in sync with the
+ * number of directories would cause too much complexity for
+ * something not worth much. Keeping directory links at 1
+ * tells userspace not to trust the link number.
+ */
d_instantiate(dentry, inode);
+ /* The dentry of the "events" parent does keep track though */
inc_nlink(dentry->d_parent->d_inode);
fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
tracefs_end_creating(dentry);
@@ -996,72 +785,11 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
return ei;
fail:
- kfree(ei->d_children);
- kfree(ei);
- fail_ei:
+ free_ei(ei);
tracefs_failed_creating(dentry);
return ERR_PTR(-ENOMEM);
}
-static LLIST_HEAD(free_list);
-
-static void eventfs_workfn(struct work_struct *work)
-{
- struct eventfs_inode *ei, *tmp;
- struct llist_node *llnode;
-
- llnode = llist_del_all(&free_list);
- llist_for_each_entry_safe(ei, tmp, llnode, llist) {
- /* This dput() matches the dget() from unhook_dentry() */
- for (int i = 0; i < ei->nr_entries; i++) {
- if (ei->d_children[i])
- dput(ei->d_children[i]);
- }
- /* This should only get here if it had a dentry */
- if (!WARN_ON_ONCE(!ei->dentry))
- dput(ei->dentry);
- }
-}
-
-static DECLARE_WORK(eventfs_work, eventfs_workfn);
-
-static void free_rcu_ei(struct rcu_head *head)
-{
- struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
-
- if (ei->dentry) {
- /* Do not free the ei until all references of dentry are gone */
- if (llist_add(&ei->llist, &free_list))
- queue_work(system_unbound_wq, &eventfs_work);
- return;
- }
-
- /* If the ei doesn't have a dentry, neither should its children */
- for (int i = 0; i < ei->nr_entries; i++) {
- WARN_ON_ONCE(ei->d_children[i]);
- }
-
- free_ei(ei);
-}
-
-static void unhook_dentry(struct dentry *dentry)
-{
- if (!dentry)
- return;
- /*
- * Need to add a reference to the dentry that is expected by
- * simple_recursive_removal(), which will include a dput().
- */
- dget(dentry);
-
- /*
- * Also add a reference for the dput() in eventfs_workfn().
- * That is required as that dput() will free the ei after
- * the SRCU grace period is over.
- */
- dget(dentry);
-}
-
/**
* eventfs_remove_rec - remove eventfs dir or file from list
* @ei: eventfs_inode to be removed.
@@ -1074,8 +802,6 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
{
struct eventfs_inode *ei_child;
- if (!ei)
- return;
/*
* Check recursion depth. It should never be greater than 3:
* 0 - events/
@@ -1087,28 +813,11 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
return;
/* search for nested folders or files */
- list_for_each_entry_srcu(ei_child, &ei->children, list,
- lockdep_is_held(&eventfs_mutex)) {
- /* Children only have dentry if parent does */
- WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
+ list_for_each_entry(ei_child, &ei->children, list)
eventfs_remove_rec(ei_child, level + 1);
- }
-
-
- ei->is_freed = 1;
- for (int i = 0; i < ei->nr_entries; i++) {
- if (ei->d_children[i]) {
- /* Children only have dentry if parent does */
- WARN_ON_ONCE(!ei->dentry);
- unhook_dentry(ei->d_children[i]);
- }
- }
-
- unhook_dentry(ei->dentry);
-
- list_del_rcu(&ei->list);
- call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+ list_del(&ei->list);
+ free_ei(ei);
}
/**
@@ -1119,22 +828,12 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
*/
void eventfs_remove_dir(struct eventfs_inode *ei)
{
- struct dentry *dentry;
-
if (!ei)
return;
mutex_lock(&eventfs_mutex);
- dentry = ei->dentry;
eventfs_remove_rec(ei, 0);
mutex_unlock(&eventfs_mutex);
-
- /*
- * If any of the ei children has a dentry, then the ei itself
- * must have a dentry.
- */
- if (dentry)
- simple_recursive_removal(dentry, NULL);
}
/**
@@ -1147,7 +846,11 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei)
{
struct dentry *dentry;
- dentry = ei->dentry;
+ dentry = ei->events_dir;
+ if (!dentry)
+ return;
+
+ ei->events_dir = NULL;
eventfs_remove_dir(ei);
/*
@@ -1157,5 +860,6 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei)
* sticks around while the other ei->dentry are created
* and destroyed dynamically.
*/
+ d_invalidate(dentry);
dput(dentry);
}
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index e1b172c0e091..5545e6bf7d26 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -38,8 +38,6 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb)
if (!ti)
return NULL;
- ti->flags = 0;
-
return &ti->vfs_inode;
}
@@ -379,21 +377,30 @@ static const struct super_operations tracefs_super_operations = {
.show_options = tracefs_show_options,
};
-static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
+/*
+ * It would be cleaner if eventfs had its own dentry ops.
+ *
+ * Note that d_revalidate is called potentially under RCU,
+ * so it can't take the eventfs mutex etc. It's fine - if
+ * we open a file just as it's marked dead, things will
+ * still work just fine, and just see the old stale case.
+ */
+static void tracefs_d_release(struct dentry *dentry)
{
- struct tracefs_inode *ti;
+ if (dentry->d_fsdata)
+ eventfs_d_release(dentry);
+}
- if (!dentry || !inode)
- return;
+static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct eventfs_inode *ei = dentry->d_fsdata;
- ti = get_tracefs(inode);
- if (ti && ti->flags & TRACEFS_EVENT_INODE)
- eventfs_set_ei_status_free(ti, dentry);
- iput(inode);
+ return !(ei && ei->is_freed);
}
static const struct dentry_operations tracefs_dentry_operations = {
- .d_iput = tracefs_dentry_iput,
+ .d_revalidate = tracefs_d_revalidate,
+ .d_release = tracefs_d_release,
};
static int trace_fill_super(struct super_block *sb, void *data, int silent)
@@ -497,75 +504,6 @@ struct dentry *tracefs_end_creating(struct dentry *dentry)
return dentry;
}
-/**
- * eventfs_start_creating - start the process of creating a dentry
- * @name: Name of the file created for the dentry
- * @parent: The parent dentry where this dentry will be created
- *
- * This is a simple helper function for the dynamically created eventfs
- * files. When the directory of the eventfs files are accessed, their
- * dentries are created on the fly. This function is used to start that
- * process.
- */
-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent)
-{
- struct dentry *dentry;
- int error;
-
- /* Must always have a parent. */
- if (WARN_ON_ONCE(!parent))
- return ERR_PTR(-EINVAL);
-
- error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
- &tracefs_mount_count);
- if (error)
- return ERR_PTR(error);
-
- if (unlikely(IS_DEADDIR(parent->d_inode)))
- dentry = ERR_PTR(-ENOENT);
- else
- dentry = lookup_one_len(name, parent, strlen(name));
-
- if (!IS_ERR(dentry) && dentry->d_inode) {
- dput(dentry);
- dentry = ERR_PTR(-EEXIST);
- }
-
- if (IS_ERR(dentry))
- simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-
- return dentry;
-}
-
-/**
- * eventfs_failed_creating - clean up a failed eventfs dentry creation
- * @dentry: The dentry to clean up
- *
- * If after calling eventfs_start_creating(), a failure is detected, the
- * resources created by eventfs_start_creating() needs to be cleaned up. In
- * that case, this function should be called to perform that clean up.
- */
-struct dentry *eventfs_failed_creating(struct dentry *dentry)
-{
- dput(dentry);
- simple_release_fs(&tracefs_mount, &tracefs_mount_count);
- return NULL;
-}
-
-/**
- * eventfs_end_creating - Finish the process of creating a eventfs dentry
- * @dentry: The dentry that has successfully been created.
- *
- * This function is currently just a place holder to match
- * eventfs_start_creating(). In case any synchronization needs to be added,
- * this function will be used to implement that without having to modify
- * the callers of eventfs_start_creating().
- */
-struct dentry *eventfs_end_creating(struct dentry *dentry)
-{
- return dentry;
-}
-
/* Find the inode that this will use for default */
static struct inode *instance_inode(struct dentry *parent, struct inode *inode)
{
@@ -779,7 +717,11 @@ static void init_once(void *foo)
{
struct tracefs_inode *ti = (struct tracefs_inode *) foo;
+ /* inode_init_once() calls memset() on the vfs_inode portion */
inode_init_once(&ti->vfs_inode);
+
+ /* Zero out the rest */
+ memset_after(ti, 0, vfs_inode);
}
static int __init tracefs_init(void)
@@ -789,7 +731,6 @@ static int __init tracefs_init(void)
tracefs_inode_cachep = kmem_cache_create("tracefs_inode_cache",
sizeof(struct tracefs_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|
SLAB_ACCOUNT),
init_once);
if (!tracefs_inode_cachep)
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
index 12b7d0150ae9..beb3dcd0e434 100644
--- a/fs/tracefs/internal.h
+++ b/fs/tracefs/internal.h
@@ -11,9 +11,10 @@ enum {
};
struct tracefs_inode {
+ struct inode vfs_inode;
+ /* The below gets initialized with memset_after(ti, 0, vfs_inode) */
unsigned long flags;
void *private;
- struct inode vfs_inode;
};
/*
@@ -31,42 +32,37 @@ struct eventfs_attr {
/*
* struct eventfs_inode - hold the properties of the eventfs directories.
* @list: link list into the parent directory
+ * @rcu: Union with @list for freeing
+ * @children: link list into the child eventfs_inode
* @entries: the array of entries representing the files in the directory
* @name: the name of the directory to create
- * @children: link list into the child eventfs_inode
- * @dentry: the dentry of the directory
- * @d_parent: pointer to the parent's dentry
- * @d_children: The array of dentries to represent the files when created
+ * @events_dir: the dentry of the events directory
* @entry_attrs: Saved mode and ownership of the @d_children
- * @attr: Saved mode and ownership of eventfs_inode itself
* @data: The private data to pass to the callbacks
+ * @attr: Saved mode and ownership of eventfs_inode itself
* @is_freed: Flag set if the eventfs is on its way to be freed
* Note if is_freed is set, then dentry is corrupted.
+ * @is_events: Flag set for only the top level "events" directory
* @nr_entries: The number of items in @entries
+ * @ino: The saved inode number
*/
struct eventfs_inode {
- struct list_head list;
+ union {
+ struct list_head list;
+ struct rcu_head rcu;
+ };
+ struct list_head children;
const struct eventfs_entry *entries;
const char *name;
- struct list_head children;
- struct dentry *dentry; /* Check is_freed to access */
- struct dentry *d_parent;
- struct dentry **d_children;
+ struct dentry *events_dir;
struct eventfs_attr *entry_attrs;
- struct eventfs_attr attr;
void *data;
- /*
- * Union - used for deletion
- * @llist: for calling dput() if needed after RCU
- * @rcu: eventfs_inode to delete in RCU
- */
- union {
- struct llist_node llist;
- struct rcu_head rcu;
- };
+ struct eventfs_attr attr;
+ struct kref kref;
unsigned int is_freed:1;
unsigned int is_events:1;
unsigned int nr_entries:30;
+ unsigned int ino;
};
static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
@@ -78,10 +74,7 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent);
struct dentry *tracefs_end_creating(struct dentry *dentry);
struct dentry *tracefs_failed_creating(struct dentry *dentry);
struct inode *tracefs_get_inode(struct super_block *sb);
-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
-struct dentry *eventfs_failed_creating(struct dentry *dentry);
-struct dentry *eventfs_end_creating(struct dentry *dentry);
-void eventfs_update_gid(struct dentry *dentry, kgid_t gid);
-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+
+void eventfs_d_release(struct dentry *dentry);
#endif /* _TRACEFS_INTERNAL_H */
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e413a9cf8ee3..551148de66cd 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -205,7 +205,6 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);
err = fscrypt_prepare_lookup(dir, dentry, &nm);
- generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return d_splice_alias(NULL, dentry);
if (err)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 09e270d6ed02..7f4031a15f4d 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2239,13 +2239,14 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
goto out_umount;
}
+ generic_set_sb_d_ops(sb);
sb->s_root = d_make_root(root);
if (!sb->s_root) {
err = -ENOMEM;
goto out_umount;
}
- import_uuid(&sb->s_uuid, c->uuid);
+ super_set_uuid(sb, c->uuid, sizeof(c->uuid));
mutex_unlock(&c->umount_mutex);
return 0;
@@ -2433,8 +2434,8 @@ static int __init ubifs_init(void)
ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
sizeof(struct ubifs_inode), 0,
- SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT |
- SLAB_ACCOUNT, &inode_slab_ctor);
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
+ &inode_slab_ctor);
if (!ubifs_inode_slab)
return -ENOMEM;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 928a04d9d9e0..6f420f4ca005 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -177,7 +177,6 @@ static int __init init_inodecache(void)
udf_inode_cachep = kmem_cache_create("udf_inode_cache",
sizeof(struct udf_inode_info),
0, (SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD |
SLAB_ACCOUNT),
init_once);
if (!udf_inode_cachep)
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index a480810cd4e3..44666afc6209 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1470,8 +1470,7 @@ static int __init init_inodecache(void)
{
ufs_inode_cachep = kmem_cache_create_usercopy("ufs_inode_cache",
sizeof(struct ufs_inode_info), 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
- SLAB_ACCOUNT),
+ (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT),
offsetof(struct ufs_inode_info, i_u1.i_symlink),
sizeof_field(struct ufs_inode_info,
i_u1.i_symlink),
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index 1fb8f4df60cb..cabe8ac4fefc 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -339,8 +339,7 @@ static int vboxsf_setup(void)
vboxsf_inode_cachep =
kmem_cache_create("vboxsf_inode_cache",
sizeof(struct vboxsf_inode), 0,
- (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD |
- SLAB_ACCOUNT),
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
vboxsf_inode_init_once);
if (!vboxsf_inode_cachep) {
err = -ENOMEM;
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index a6a6b2749241..b3506f56e180 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -69,7 +69,6 @@ struct fsverity_info {
u8 file_digest[FS_VERITY_MAX_DIGEST_SIZE];
const struct inode *inode;
unsigned long *hash_block_verified;
- spinlock_t hash_page_init_lock;
};
#define FS_VERITY_MAX_SIGNATURE_SIZE (FS_VERITY_MAX_DESCRIPTOR_SIZE - \
diff --git a/fs/verity/measure.c b/fs/verity/measure.c
index bf7a5f4cccaf..3969d54158d1 100644
--- a/fs/verity/measure.c
+++ b/fs/verity/measure.c
@@ -159,9 +159,9 @@ __bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr_ker
__bpf_kfunc_end_defs();
-BTF_SET8_START(fsverity_set_ids)
+BTF_KFUNCS_START(fsverity_set_ids)
BTF_ID_FLAGS(func, bpf_get_fsverity_digest, KF_TRUSTED_ARGS)
-BTF_SET8_END(fsverity_set_ids)
+BTF_KFUNCS_END(fsverity_set_ids)
static int bpf_get_fsverity_digest_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
diff --git a/fs/verity/open.c b/fs/verity/open.c
index 6c31a871b84b..fdeb95eca3af 100644
--- a/fs/verity/open.c
+++ b/fs/verity/open.c
@@ -239,7 +239,6 @@ struct fsverity_info *fsverity_create_info(const struct inode *inode,
err = -ENOMEM;
goto fail;
}
- spin_lock_init(&vi->hash_page_init_lock);
}
return vi;
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
index 904ccd7e8e16..4fcad0825a12 100644
--- a/fs/verity/verify.c
+++ b/fs/verity/verify.c
@@ -19,7 +19,6 @@ static struct workqueue_struct *fsverity_read_workqueue;
static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
unsigned long hblock_idx)
{
- bool verified;
unsigned int blocks_per_page;
unsigned int i;
@@ -43,12 +42,20 @@ static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
* re-instantiated from the backing storage are re-verified. To do
* this, we use PG_checked again, but now it doesn't really mean
* "checked". Instead, now it just serves as an indicator for whether
- * the hash page is newly instantiated or not.
+ * the hash page is newly instantiated or not. If the page is new, as
+ * indicated by PG_checked=0, we clear the bitmap bits for the page's
+ * blocks since they are untrustworthy, then set PG_checked=1.
+ * Otherwise we return the bitmap bit for the requested block.
*
- * The first thread that sees PG_checked=0 must clear the corresponding
- * bitmap bits, then set PG_checked=1. This requires a spinlock. To
- * avoid having to take this spinlock in the common case of
- * PG_checked=1, we start with an opportunistic lockless read.
+ * Multiple threads may execute this code concurrently on the same page.
+ * This is safe because we use memory barriers to ensure that if a
+ * thread sees PG_checked=1, then it also sees the associated bitmap
+ * clearing to have occurred. Also, all writes and their corresponding
+ * reads are atomic, and all writes are safe to repeat in the event that
+ * multiple threads get into the PG_checked=0 section. (Clearing a
+ * bitmap bit again at worst causes a hash block to be verified
+ * redundantly. That event should be very rare, so it's not worth using
+ * a lock to avoid. Setting PG_checked again has no effect.)
*/
if (PageChecked(hpage)) {
/*
@@ -58,24 +65,17 @@ static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
smp_rmb();
return test_bit(hblock_idx, vi->hash_block_verified);
}
- spin_lock(&vi->hash_page_init_lock);
- if (PageChecked(hpage)) {
- verified = test_bit(hblock_idx, vi->hash_block_verified);
- } else {
- blocks_per_page = vi->tree_params.blocks_per_page;
- hblock_idx = round_down(hblock_idx, blocks_per_page);
- for (i = 0; i < blocks_per_page; i++)
- clear_bit(hblock_idx + i, vi->hash_block_verified);
- /*
- * A write memory barrier is needed here to give RELEASE
- * semantics to the below SetPageChecked() operation.
- */
- smp_wmb();
- SetPageChecked(hpage);
- verified = false;
- }
- spin_unlock(&vi->hash_page_init_lock);
- return verified;
+ blocks_per_page = vi->tree_params.blocks_per_page;
+ hblock_idx = round_down(hblock_idx, blocks_per_page);
+ for (i = 0; i < blocks_per_page; i++)
+ clear_bit(hblock_idx + i, vi->hash_block_verified);
+ /*
+ * A write memory barrier is needed here to give RELEASE semantics to
+ * the below SetPageChecked() operation.
+ */
+ smp_wmb();
+ SetPageChecked(hpage);
+ return false;
}
/*
diff --git a/fs/xattr.c b/fs/xattr.c
index 09d927603433..f8b643f91a98 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -16,7 +16,6 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/security.h>
-#include <linux/evm.h>
#include <linux/syscalls.h>
#include <linux/export.h>
#include <linux/fsnotify.h>
@@ -552,11 +551,11 @@ __vfs_removexattr_locked(struct mnt_idmap *idmap,
goto out;
error = __vfs_removexattr(idmap, dentry, name);
+ if (error)
+ return error;
- if (!error) {
- fsnotify_xattr(dentry);
- evm_inode_post_removexattr(dentry, name);
- }
+ fsnotify_xattr(dentry);
+ security_inode_post_removexattr(dentry, name);
out:
return error;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 9976a00a73f9..e965a48e7db9 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -421,10 +421,10 @@ xfs_attr_complete_op(
bool do_replace = args->op_flags & XFS_DA_OP_REPLACE;
args->op_flags &= ~XFS_DA_OP_REPLACE;
- if (do_replace) {
- args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
+ args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
+ if (do_replace)
return replace_state;
- }
+
return XFS_DAS_DONE;
}
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 31100120b2c5..e31663cb7b43 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1119,20 +1119,6 @@ xfs_rtbitmap_blockcount(
}
/*
- * Compute the maximum level number of the realtime summary file, as defined by
- * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct
- * use of rt volumes with more than 2^32 extents.
- */
-uint8_t
-xfs_compute_rextslog(
- xfs_rtbxlen_t rtextents)
-{
- if (!rtextents)
- return 0;
- return xfs_highbit64(rtextents);
-}
-
-/*
* Compute the number of rtbitmap words needed to populate every block of a
* bitmap that is large enough to track the given number of rt extents.
*/
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h
index 274dc7dae1fa..152a66750af5 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.h
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -351,20 +351,6 @@ xfs_rtfree_extent(
int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
xfs_filblks_t rtlen);
-uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
-
-/* Do we support an rt volume having this number of rtextents? */
-static inline bool
-xfs_validate_rtextents(
- xfs_rtbxlen_t rtextents)
-{
- /* No runt rt volumes */
- if (rtextents == 0)
- return false;
-
- return true;
-}
-
xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t
rtextents);
unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp,
@@ -383,8 +369,6 @@ unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp,
# define xfs_rtsummary_read_buf(a,b) (-ENOSYS)
# define xfs_rtbuf_cache_relse(a) (0)
# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
-# define xfs_compute_rextslog(rtx) (0)
-# define xfs_validate_rtextents(rtx) (false)
static inline xfs_filblks_t
xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
{
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 4a9e8588f4c9..5bb6e2bd6dee 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1377,3 +1377,17 @@ xfs_validate_stripe_geometry(
}
return true;
}
+
+/*
+ * Compute the maximum level number of the realtime summary file, as defined by
+ * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct
+ * use of rt volumes with more than 2^32 extents.
+ */
+uint8_t
+xfs_compute_rextslog(
+ xfs_rtbxlen_t rtextents)
+{
+ if (!rtextents)
+ return 0;
+ return xfs_highbit64(rtextents);
+}
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 19134b23c10b..2e8e8d63d4eb 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -38,4 +38,6 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp,
extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
__s64 sunit, __s64 swidth, int sectorsize, bool silent);
+uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
+
#endif /* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 20b5375f2d9c..62e02d5380ad 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -251,4 +251,16 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off);
bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off,
xfs_fileoff_t len);
+/* Do we support an rt volume having this number of rtextents? */
+static inline bool
+xfs_validate_rtextents(
+ xfs_rtbxlen_t rtextents)
+{
+ /* No runt rt volumes */
+ if (rtextents == 0)
+ return false;
+
+ return true;
+}
+
#endif /* __XFS_TYPES_H__ */
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 441ca9977652..46583517377f 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -15,6 +15,7 @@
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_bit.h"
+#include "xfs_sb.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index fabd0ed9dfa6..b1ff4f33324a 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -16,6 +16,7 @@
#include "xfs_rtbitmap.h"
#include "xfs_bit.h"
#include "xfs_bmap.h"
+#include "xfs_sb.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 813f85156b0c..1698507d1ac7 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -112,7 +112,7 @@ xfs_end_ioend(
* longer dirty. If we don't remove delalloc blocks here, they become
* stale and can corrupt free space accounting on unmount.
*/
- error = blk_status_to_errno(ioend->io_bio->bi_status);
+ error = blk_status_to_errno(ioend->io_bio.bi_status);
if (unlikely(error)) {
if (ioend->io_flags & IOMAP_F_SHARED) {
xfs_reflink_cancel_cow_range(ip, offset, size, true);
@@ -179,7 +179,7 @@ STATIC void
xfs_end_bio(
struct bio *bio)
{
- struct iomap_ioend *ioend = bio->bi_private;
+ struct iomap_ioend *ioend = iomap_ioend_from_bio(bio);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
unsigned long flags;
@@ -276,7 +276,8 @@ static int
xfs_map_blocks(
struct iomap_writepage_ctx *wpc,
struct inode *inode,
- loff_t offset)
+ loff_t offset,
+ unsigned int len)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@@ -444,7 +445,7 @@ xfs_prepare_ioend(
/* send ioends that might require a transaction to the completion wq */
if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
(ioend->io_flags & IOMAP_F_SHARED))
- ioend->io_bio->bi_end_io = xfs_end_bio;
+ ioend->io_bio.bi_end_io = xfs_end_bio;
return status;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8e5bd50d29fe..01b41fabbe3c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1951,7 +1951,7 @@ xfs_free_buftarg(
fs_put_dax(btp->bt_daxdev, btp->bt_mount);
/* the main block device is closed by kill_block_super */
if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
- bdev_release(btp->bt_bdev_handle);
+ fput(btp->bt_bdev_file);
kmem_free(btp);
}
@@ -1994,7 +1994,7 @@ xfs_setsize_buftarg_early(
struct xfs_buftarg *
xfs_alloc_buftarg(
struct xfs_mount *mp,
- struct bdev_handle *bdev_handle)
+ struct file *bdev_file)
{
xfs_buftarg_t *btp;
const struct dax_holder_operations *ops = NULL;
@@ -2005,9 +2005,9 @@ xfs_alloc_buftarg(
btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
btp->bt_mount = mp;
- btp->bt_bdev_handle = bdev_handle;
- btp->bt_dev = bdev_handle->bdev->bd_dev;
- btp->bt_bdev = bdev_handle->bdev;
+ btp->bt_bdev_file = bdev_file;
+ btp->bt_bdev = file_bdev(bdev_file);
+ btp->bt_dev = btp->bt_bdev->bd_dev;
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
mp, ops);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b470de08a46c..304e858d04fb 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -98,7 +98,7 @@ typedef unsigned int xfs_buf_flags_t;
*/
typedef struct xfs_buftarg {
dev_t bt_dev;
- struct bdev_handle *bt_bdev_handle;
+ struct file *bt_bdev_file;
struct block_device *bt_bdev;
struct dax_device *bt_daxdev;
u64 bt_dax_part_off;
@@ -366,7 +366,7 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
* Handling of buftargs.
*/
struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
- struct bdev_handle *bdev_handle);
+ struct file *bdev_file);
extern void xfs_free_buftarg(struct xfs_buftarg *);
extern void xfs_buftarg_wait(struct xfs_buftarg *);
extern void xfs_buftarg_drain(struct xfs_buftarg *);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index aabb25dc3efa..57fa21ad7912 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -62,7 +62,7 @@ xfs_uuid_mount(
int hole, i;
/* Publish UUID in struct super_block */
- uuid_copy(&mp->m_super->s_uuid, uuid);
+ super_set_uuid(mp->m_super, uuid->b, sizeof(*uuid));
if (xfs_has_nouuid(mp))
return 0;
@@ -706,6 +706,8 @@ xfs_mountfs(
/* enable fail_at_unmount as default */
mp->m_fail_unmount = true;
+ super_set_sysfs_name_id(mp->m_super);
+
error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype,
NULL, mp->m_super->s_id);
if (error)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index aff20ddd4a9f..59c8c0541bdd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -350,7 +350,6 @@ xfs_setup_dax_always(
return -EINVAL;
}
- xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
return 0;
disable_dax:
@@ -362,16 +361,16 @@ STATIC int
xfs_blkdev_get(
xfs_mount_t *mp,
const char *name,
- struct bdev_handle **handlep)
+ struct file **bdev_filep)
{
int error = 0;
- *handlep = bdev_open_by_path(name,
+ *bdev_filep = bdev_file_open_by_path(name,
BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
mp->m_super, &fs_holder_ops);
- if (IS_ERR(*handlep)) {
- error = PTR_ERR(*handlep);
- *handlep = NULL;
+ if (IS_ERR(*bdev_filep)) {
+ error = PTR_ERR(*bdev_filep);
+ *bdev_filep = NULL;
xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
}
@@ -436,26 +435,26 @@ xfs_open_devices(
{
struct super_block *sb = mp->m_super;
struct block_device *ddev = sb->s_bdev;
- struct bdev_handle *logdev_handle = NULL, *rtdev_handle = NULL;
+ struct file *logdev_file = NULL, *rtdev_file = NULL;
int error;
/*
* Open real time and log devices - order is important.
*/
if (mp->m_logname) {
- error = xfs_blkdev_get(mp, mp->m_logname, &logdev_handle);
+ error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file);
if (error)
return error;
}
if (mp->m_rtname) {
- error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_handle);
+ error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file);
if (error)
goto out_close_logdev;
- if (rtdev_handle->bdev == ddev ||
- (logdev_handle &&
- rtdev_handle->bdev == logdev_handle->bdev)) {
+ if (file_bdev(rtdev_file) == ddev ||
+ (logdev_file &&
+ file_bdev(rtdev_file) == file_bdev(logdev_file))) {
xfs_warn(mp,
"Cannot mount filesystem with identical rtdev and ddev/logdev.");
error = -EINVAL;
@@ -467,25 +466,25 @@ xfs_open_devices(
* Setup xfs_mount buffer target pointers
*/
error = -ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_handle);
+ mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file);
if (!mp->m_ddev_targp)
goto out_close_rtdev;
- if (rtdev_handle) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_handle);
+ if (rtdev_file) {
+ mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file);
if (!mp->m_rtdev_targp)
goto out_free_ddev_targ;
}
- if (logdev_handle && logdev_handle->bdev != ddev) {
- mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_handle);
+ if (logdev_file && file_bdev(logdev_file) != ddev) {
+ mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file);
if (!mp->m_logdev_targp)
goto out_free_rtdev_targ;
} else {
mp->m_logdev_targp = mp->m_ddev_targp;
/* Handle won't be used, drop it */
- if (logdev_handle)
- bdev_release(logdev_handle);
+ if (logdev_file)
+ fput(logdev_file);
}
return 0;
@@ -496,11 +495,11 @@ xfs_open_devices(
out_free_ddev_targ:
xfs_free_buftarg(mp->m_ddev_targp);
out_close_rtdev:
- if (rtdev_handle)
- bdev_release(rtdev_handle);
+ if (rtdev_file)
+ fput(rtdev_file);
out_close_logdev:
- if (logdev_handle)
- bdev_release(logdev_handle);
+ if (logdev_file)
+ fput(logdev_file);
return error;
}
@@ -1496,6 +1495,18 @@ xfs_fs_fill_super(
mp->m_super = sb;
+ /*
+ * Copy VFS mount flags from the context now that all parameter parsing
+ * is guaranteed to have been completed by either the old mount API or
+ * the newer fsopen/fsconfig API.
+ */
+ if (fc->sb_flags & SB_RDONLY)
+ set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+ if (fc->sb_flags & SB_DIRSYNC)
+ mp->m_features |= XFS_FEAT_DIRSYNC;
+ if (fc->sb_flags & SB_SYNCHRONOUS)
+ mp->m_features |= XFS_FEAT_WSYNC;
+
error = xfs_fs_validate_params(mp);
if (error)
return error;
@@ -1965,6 +1976,11 @@ static const struct fs_context_operations xfs_context_ops = {
.free = xfs_fs_free,
};
+/*
+ * WARNING: do not initialise any parameters in this function that depend on
+ * mount option parsing having already been performed as this can be called from
+ * fsopen() before any parameters have been set.
+ */
static int xfs_init_fs_context(
struct fs_context *fc)
{
@@ -1996,16 +2012,6 @@ static int xfs_init_fs_context(
mp->m_logbsize = -1;
mp->m_allocsize_log = 16; /* 64k */
- /*
- * Copy binary VFS mount flags we are interested in.
- */
- if (fc->sb_flags & SB_RDONLY)
- set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
- if (fc->sb_flags & SB_DIRSYNC)
- mp->m_features |= XFS_FEAT_DIRSYNC;
- if (fc->sb_flags & SB_SYNCHRONOUS)
- mp->m_features |= XFS_FEAT_WSYNC;
-
fc->s_fs_info = mp;
fc->ops = &xfs_context_ops;
@@ -2037,8 +2043,7 @@ xfs_init_caches(void)
xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
SLAB_HWCACHE_ALIGN |
- SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD,
+ SLAB_RECLAIM_ACCOUNT,
NULL);
if (!xfs_buf_cache)
goto out;
@@ -2103,14 +2108,14 @@ xfs_init_caches(void)
sizeof(struct xfs_inode), 0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD | SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
xfs_fs_inode_init_once);
if (!xfs_inode_cache)
goto out_destroy_efi_cache;
xfs_ili_cache = kmem_cache_create("xfs_ili",
sizeof(struct xfs_inode_log_item), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_RECLAIM_ACCOUNT,
NULL);
if (!xfs_ili_cache)
goto out_destroy_inode_cache;
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 6ab2318a9c8e..3b103715acc9 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -125,7 +125,8 @@ static void zonefs_readahead(struct readahead_control *rac)
* which implies that the page range can only be within the fixed inode size.
*/
static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
- struct inode *inode, loff_t offset)
+ struct inode *inode, loff_t offset,
+ unsigned int len)
{
struct zonefs_zone *z = zonefs_inode_zone(inode);
@@ -348,7 +349,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
struct zonefs_inode_info *zi = ZONEFS_I(inode);
if (error) {
- zonefs_io_error(inode, true);
+ /*
+ * For Sync IOs, error recovery is called from
+ * zonefs_file_dio_write().
+ */
+ if (!is_sync_kiocb(iocb))
+ zonefs_io_error(inode, true);
return error;
}
@@ -491,6 +497,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
ret = -EINVAL;
goto inode_unlock;
}
+ /*
+ * Advance the zone write pointer offset. This assumes that the
+ * IO will succeed, which is OK to do because we do not allow
+ * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO
+ * fails, the error path will correct the write pointer offset.
+ */
+ z->z_wpoffset += count;
+ zonefs_inode_account_active(inode);
mutex_unlock(&zi->i_truncate_mutex);
}
@@ -504,20 +518,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
if (ret == -ENOTBLK)
ret = -EBUSY;
- if (zonefs_zone_is_seq(z) &&
- (ret > 0 || ret == -EIOCBQUEUED)) {
- if (ret > 0)
- count = ret;
-
- /*
- * Update the zone write pointer offset assuming the write
- * operation succeeded. If it did not, the error recovery path
- * will correct it. Also do active seq file accounting.
- */
- mutex_lock(&zi->i_truncate_mutex);
- z->z_wpoffset += count;
- zonefs_inode_account_active(inode);
- mutex_unlock(&zi->i_truncate_mutex);
+ /*
+ * For a failed IO or partial completion, trigger error recovery
+ * to update the zone write pointer offset to a correct value.
+ * For asynchronous IOs, zonefs_file_write_dio_end_io() may already
+ * have executed error recovery if the IO already completed when we
+ * reach here. However, we cannot know that and execute error recovery
+ * again (that will not change anything).
+ */
+ if (zonefs_zone_is_seq(z)) {
+ if (ret > 0 && ret != count)
+ ret = -EIO;
+ if (ret < 0 && ret != -EIOCBQUEUED)
+ zonefs_io_error(inode, true);
}
inode_unlock:
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 93971742613a..c6a124e8d565 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -15,12 +15,13 @@
#include <linux/writeback.h>
#include <linux/quotaops.h>
#include <linux/seq_file.h>
-#include <linux/parser.h>
#include <linux/uio.h>
#include <linux/mman.h>
#include <linux/sched/mm.h>
#include <linux/crc32.h>
#include <linux/task_io_accounting_ops.h>
+#include <linux/fs_parser.h>
+#include <linux/fs_context.h>
#include "zonefs.h"
@@ -113,7 +114,7 @@ static int zonefs_zone_mgmt(struct super_block *sb,
trace_zonefs_zone_mgmt(sb, z, op);
ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector,
- z->z_size >> SECTOR_SHIFT, GFP_NOFS);
+ z->z_size >> SECTOR_SHIFT);
if (ret) {
zonefs_err(sb,
"Zone management operation %s at %llu failed %d\n",
@@ -246,16 +247,18 @@ static void zonefs_inode_update_mode(struct inode *inode)
z->z_mode = inode->i_mode;
}
-struct zonefs_ioerr_data {
- struct inode *inode;
- bool write;
-};
-
static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
void *data)
{
- struct zonefs_ioerr_data *err = data;
- struct inode *inode = err->inode;
+ struct blk_zone *z = data;
+
+ *z = *zone;
+ return 0;
+}
+
+static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
+ bool write)
+{
struct zonefs_zone *z = zonefs_inode_zone(inode);
struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
@@ -270,8 +273,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
data_size = zonefs_check_zone_condition(sb, z, zone);
isize = i_size_read(inode);
if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
- !err->write && isize == data_size)
- return 0;
+ !write && isize == data_size)
+ return;
/*
* At this point, we detected either a bad zone or an inconsistency
@@ -292,7 +295,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
* In all cases, warn about inode size inconsistency and handle the
* IO error according to the zone condition and to the mount options.
*/
- if (zonefs_zone_is_seq(z) && isize != data_size)
+ if (isize != data_size)
zonefs_warn(sb,
"inode %lu: invalid size %lld (should be %lld)\n",
inode->i_ino, isize, data_size);
@@ -352,8 +355,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
zonefs_i_size_write(inode, data_size);
z->z_wpoffset = data_size;
zonefs_inode_account_active(inode);
-
- return 0;
}
/*
@@ -367,23 +368,25 @@ void __zonefs_io_error(struct inode *inode, bool write)
{
struct zonefs_zone *z = zonefs_inode_zone(inode);
struct super_block *sb = inode->i_sb;
- struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
unsigned int noio_flag;
- unsigned int nr_zones = 1;
- struct zonefs_ioerr_data err = {
- .inode = inode,
- .write = write,
- };
+ struct blk_zone zone;
int ret;
/*
- * The only files that have more than one zone are conventional zone
- * files with aggregated conventional zones, for which the inode zone
- * size is always larger than the device zone size.
+ * Conventional zone have no write pointer and cannot become read-only
+ * or offline. So simply fake a report for a single or aggregated zone
+ * and let zonefs_handle_io_error() correct the zone inode information
+ * according to the mount options.
*/
- if (z->z_size > bdev_zone_sectors(sb->s_bdev))
- nr_zones = z->z_size >>
- (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+ if (!zonefs_zone_is_seq(z)) {
+ zone.start = z->z_sector;
+ zone.len = z->z_size >> SECTOR_SHIFT;
+ zone.wp = zone.start + zone.len;
+ zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
+ zone.cond = BLK_ZONE_COND_NOT_WP;
+ zone.capacity = zone.len;
+ goto handle_io_error;
+ }
/*
* Memory allocations in blkdev_report_zones() can trigger a memory
@@ -394,12 +397,20 @@ void __zonefs_io_error(struct inode *inode, bool write)
* the GFP_NOIO context avoids both problems.
*/
noio_flag = memalloc_noio_save();
- ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones,
- zonefs_io_error_cb, &err);
- if (ret != nr_zones)
+ ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1,
+ zonefs_io_error_cb, &zone);
+ memalloc_noio_restore(noio_flag);
+
+ if (ret != 1) {
zonefs_err(sb, "Get inode %lu zone information failed %d\n",
inode->i_ino, ret);
- memalloc_noio_restore(noio_flag);
+ zonefs_warn(sb, "remounting filesystem read-only\n");
+ sb->s_flags |= SB_RDONLY;
+ return;
+ }
+
+handle_io_error:
+ zonefs_handle_io_error(inode, &zone, write);
}
static struct kmem_cache *zonefs_inode_cachep;
@@ -460,58 +471,47 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
}
enum {
- Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair,
- Opt_explicit_open, Opt_err,
+ Opt_errors, Opt_explicit_open,
};
-static const match_table_t tokens = {
- { Opt_errors_ro, "errors=remount-ro"},
- { Opt_errors_zro, "errors=zone-ro"},
- { Opt_errors_zol, "errors=zone-offline"},
- { Opt_errors_repair, "errors=repair"},
- { Opt_explicit_open, "explicit-open" },
- { Opt_err, NULL}
+struct zonefs_context {
+ unsigned long s_mount_opts;
};
-static int zonefs_parse_options(struct super_block *sb, char *options)
-{
- struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
- substring_t args[MAX_OPT_ARGS];
- char *p;
-
- if (!options)
- return 0;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
+static const struct constant_table zonefs_param_errors[] = {
+ {"remount-ro", ZONEFS_MNTOPT_ERRORS_RO},
+ {"zone-ro", ZONEFS_MNTOPT_ERRORS_ZRO},
+ {"zone-offline", ZONEFS_MNTOPT_ERRORS_ZOL},
+ {"repair", ZONEFS_MNTOPT_ERRORS_REPAIR},
+ {}
+};
- if (!*p)
- continue;
+static const struct fs_parameter_spec zonefs_param_spec[] = {
+ fsparam_enum ("errors", Opt_errors, zonefs_param_errors),
+ fsparam_flag ("explicit-open", Opt_explicit_open),
+ {}
+};
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_errors_ro:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO;
- break;
- case Opt_errors_zro:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO;
- break;
- case Opt_errors_zol:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL;
- break;
- case Opt_errors_repair:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR;
- break;
- case Opt_explicit_open:
- sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
- break;
- default:
- return -EINVAL;
- }
+static int zonefs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct zonefs_context *ctx = fc->fs_private;
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, zonefs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_errors:
+ ctx->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ ctx->s_mount_opts |= result.uint_32;
+ break;
+ case Opt_explicit_open:
+ ctx->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
+ break;
+ default:
+ return -EINVAL;
}
return 0;
@@ -533,13 +533,6 @@ static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
return 0;
}
-static int zonefs_remount(struct super_block *sb, int *flags, char *data)
-{
- sync_filesystem(sb);
-
- return zonefs_parse_options(sb, data);
-}
-
static int zonefs_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
@@ -1195,7 +1188,6 @@ static const struct super_operations zonefs_sops = {
.alloc_inode = zonefs_alloc_inode,
.free_inode = zonefs_free_inode,
.statfs = zonefs_statfs,
- .remount_fs = zonefs_remount,
.show_options = zonefs_show_options,
};
@@ -1240,9 +1232,10 @@ static void zonefs_release_zgroup_inodes(struct super_block *sb)
* sub-directories and files according to the device zone configuration and
* format options.
*/
-static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+static int zonefs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct zonefs_sb_info *sbi;
+ struct zonefs_context *ctx = fc->fs_private;
struct inode *inode;
enum zonefs_ztype ztype;
int ret;
@@ -1279,7 +1272,7 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_uid = GLOBAL_ROOT_UID;
sbi->s_gid = GLOBAL_ROOT_GID;
sbi->s_perm = 0640;
- sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
+ sbi->s_mount_opts = ctx->s_mount_opts;
atomic_set(&sbi->s_wro_seq_files, 0);
sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev);
@@ -1290,10 +1283,6 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
if (ret)
return ret;
- ret = zonefs_parse_options(sb, data);
- if (ret)
- return ret;
-
zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev));
if (!sbi->s_max_wro_seq_files &&
@@ -1354,12 +1343,6 @@ cleanup:
return ret;
}
-static struct dentry *zonefs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
-{
- return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super);
-}
-
static void zonefs_kill_super(struct super_block *sb)
{
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
@@ -1374,22 +1357,72 @@ static void zonefs_kill_super(struct super_block *sb)
kfree(sbi);
}
+static void zonefs_free_fc(struct fs_context *fc)
+{
+ struct zonefs_context *ctx = fc->fs_private;
+
+ kfree(ctx);
+}
+
+static int zonefs_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, zonefs_fill_super);
+}
+
+static int zonefs_reconfigure(struct fs_context *fc)
+{
+ struct zonefs_context *ctx = fc->fs_private;
+ struct super_block *sb = fc->root->d_sb;
+ struct zonefs_sb_info *sbi = sb->s_fs_info;
+
+ sync_filesystem(fc->root->d_sb);
+ /* Copy new options from ctx into sbi. */
+ sbi->s_mount_opts = ctx->s_mount_opts;
+
+ return 0;
+}
+
+static const struct fs_context_operations zonefs_context_ops = {
+ .parse_param = zonefs_parse_param,
+ .get_tree = zonefs_get_tree,
+ .reconfigure = zonefs_reconfigure,
+ .free = zonefs_free_fc,
+};
+
+/*
+ * Set up the filesystem mount context.
+ */
+static int zonefs_init_fs_context(struct fs_context *fc)
+{
+ struct zonefs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct zonefs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
+ fc->ops = &zonefs_context_ops;
+ fc->fs_private = ctx;
+
+ return 0;
+}
+
/*
* File system definition and registration.
*/
static struct file_system_type zonefs_type = {
- .owner = THIS_MODULE,
- .name = "zonefs",
- .mount = zonefs_mount,
- .kill_sb = zonefs_kill_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .owner = THIS_MODULE,
+ .name = "zonefs",
+ .kill_sb = zonefs_kill_super,
+ .fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = zonefs_init_fs_context,
+ .parameters = zonefs_param_spec,
};
static int __init zonefs_init_inodecache(void)
{
zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache",
sizeof(struct zonefs_inode_info), 0,
- (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT),
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
NULL);
if (zonefs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 961f4d88f9ef..0c0695763bea 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -193,7 +193,6 @@ do { \
#ifndef smp_store_release
#define smp_store_release(p, v) \
do { \
- compiletime_assert_atomic_type(*p); \
barrier(); \
WRITE_ONCE(*p, v); \
} while (0)
@@ -203,7 +202,6 @@ do { \
#define smp_load_acquire(p) \
({ \
__unqual_scalar_typeof(*p) ___p1 = READ_ONCE(*p); \
- compiletime_assert_atomic_type(*p); \
barrier(); \
(typeof(*p))___p1; \
})
diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
index 95a1d214108a..ef3f841c6625 100644
--- a/include/asm-generic/word-at-a-time.h
+++ b/include/asm-generic/word-at-a-time.h
@@ -2,7 +2,8 @@
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
#include <asm/byteorder.h>
#ifdef __BIG_ENDIAN
diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h
index c4c423e97f06..4453906105ca 100644
--- a/include/drm/bridge/aux-bridge.h
+++ b/include/drm/bridge/aux-bridge.h
@@ -9,6 +9,8 @@
#include <drm/drm_connector.h>
+struct auxiliary_device;
+
#if IS_ENABLED(CONFIG_DRM_AUX_BRIDGE)
int drm_aux_bridge_register(struct device *parent);
#else
@@ -19,10 +21,23 @@ static inline int drm_aux_bridge_register(struct device *parent)
#endif
#if IS_ENABLED(CONFIG_DRM_AUX_HPD_BRIDGE)
+struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np);
+int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev);
struct device *drm_dp_hpd_bridge_register(struct device *parent,
struct device_node *np);
void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status);
#else
+static inline struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent,
+ struct device_node *np)
+{
+ return NULL;
+}
+
+static inline int devm_drm_dp_hpd_bridge_add(struct auxiliary_device *adev)
+{
+ return 0;
+}
+
static inline struct device *drm_dp_hpd_bridge_register(struct device *parent,
struct device_node *np)
{
diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h
index e18429f09e53..2938ba80750d 100644
--- a/include/drm/drm_gem_vram_helper.h
+++ b/include/drm/drm_gem_vram_helper.h
@@ -33,8 +33,8 @@ struct vm_area_struct;
* struct drm_gem_vram_object - GEM object backed by VRAM
* @bo: TTM buffer object
* @map: Mapping information for @bo
- * @placement: TTM placement information. Supported placements are \
- %TTM_PL_VRAM and %TTM_PL_SYSTEM
+ * @placement: TTM placement information. Supported placements are %TTM_PL_VRAM
+ * and %TTM_PL_SYSTEM
* @placements: TTM placement information.
*
* The type struct drm_gem_vram_object represents a GEM object that is
@@ -126,8 +126,8 @@ drm_gem_vram_plane_helper_cleanup_fb(struct drm_plane *plane,
struct drm_plane_state *old_state);
/**
- * DRM_GEM_VRAM_PLANE_HELPER_FUNCS -
- * Initializes struct drm_plane_helper_funcs for VRAM handling
+ * DRM_GEM_VRAM_PLANE_HELPER_FUNCS - Initializes struct drm_plane_helper_funcs
+ * for VRAM handling
*
* Drivers may use GEM BOs as VRAM helpers for the framebuffer memory. This
* macro initializes struct drm_plane_helper_funcs to use the respective helper
@@ -150,8 +150,8 @@ void drm_gem_vram_simple_display_pipe_cleanup_fb(
struct drm_plane_state *old_state);
/**
- * define DRM_GEM_VRAM_DRIVER - default callback functions for \
- &struct drm_driver
+ * define DRM_GEM_VRAM_DRIVER - default callback functions for
+ * &struct drm_driver
*
* Drivers that use VRAM MM and GEM VRAM can use this macro to initialize
* &struct drm_driver with default functions.
@@ -185,8 +185,8 @@ struct drm_vram_mm {
};
/**
- * drm_vram_mm_of_bdev() - \
- Returns the container of type &struct ttm_device for field bdev.
+ * drm_vram_mm_of_bdev() - Returns the container of type &struct ttm_device for
+ * field bdev.
* @bdev: the TTM BO device
*
* Returns:
diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index 51e0f6059410..19ac7b36f608 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -252,8 +252,11 @@
#define QCOM_ID_IPQ9510 521
#define QCOM_ID_QRB4210 523
#define QCOM_ID_QRB2210 524
+#define QCOM_ID_SM8475 530
+#define QCOM_ID_SM8475P 531
#define QCOM_ID_SA8775P 534
#define QCOM_ID_QRU1000 539
+#define QCOM_ID_SM8475_2 540
#define QCOM_ID_QDU1000 545
#define QCOM_ID_SM8650 557
#define QCOM_ID_SM4450 568
@@ -265,6 +268,8 @@
#define QCOM_ID_IPQ5322 593
#define QCOM_ID_IPQ5312 594
#define QCOM_ID_IPQ5302 595
+#define QCOM_ID_QCS8550 603
+#define QCOM_ID_QCM8550 604
#define QCOM_ID_IPQ5300 624
/*
diff --git a/include/dt-bindings/clock/exynos850.h b/include/dt-bindings/clock/exynos850.h
index 3090e09c9a55..bc15108aa3c2 100644
--- a/include/dt-bindings/clock/exynos850.h
+++ b/include/dt-bindings/clock/exynos850.h
@@ -320,6 +320,8 @@
#define CLK_GOUT_SSS_PCLK 12
#define CLK_GOUT_GPIO_CORE_PCLK 13
#define CLK_GOUT_SYSREG_CORE_PCLK 14
+#define CLK_GOUT_PDMA_CORE_ACLK 15
+#define CLK_GOUT_SPDMA_CORE_ACLK 16
/* CMU_DPU */
#define CLK_MOUT_DPU_USER 1
diff --git a/include/dt-bindings/clock/google,gs101.h b/include/dt-bindings/clock/google,gs101.h
index 21adec22387c..3dac3577788a 100644
--- a/include/dt-bindings/clock/google,gs101.h
+++ b/include/dt-bindings/clock/google,gs101.h
@@ -389,4 +389,133 @@
#define CLK_GOUT_MISC_WDT_CLUSTER1_PCLK 73
#define CLK_GOUT_MISC_XIU_D_MISC_ACLK 74
+/* CMU_PERIC0 */
+#define CLK_MOUT_PERIC0_BUS_USER 1
+#define CLK_MOUT_PERIC0_I3C_USER 2
+#define CLK_MOUT_PERIC0_USI0_UART_USER 3
+#define CLK_MOUT_PERIC0_USI14_USI_USER 4
+#define CLK_MOUT_PERIC0_USI1_USI_USER 5
+#define CLK_MOUT_PERIC0_USI2_USI_USER 6
+#define CLK_MOUT_PERIC0_USI3_USI_USER 7
+#define CLK_MOUT_PERIC0_USI4_USI_USER 8
+#define CLK_MOUT_PERIC0_USI5_USI_USER 9
+#define CLK_MOUT_PERIC0_USI6_USI_USER 10
+#define CLK_MOUT_PERIC0_USI7_USI_USER 11
+#define CLK_MOUT_PERIC0_USI8_USI_USER 12
+#define CLK_DOUT_PERIC0_I3C 13
+#define CLK_DOUT_PERIC0_USI0_UART 14
+#define CLK_DOUT_PERIC0_USI14_USI 15
+#define CLK_DOUT_PERIC0_USI1_USI 16
+#define CLK_DOUT_PERIC0_USI2_USI 17
+#define CLK_DOUT_PERIC0_USI3_USI 18
+#define CLK_DOUT_PERIC0_USI4_USI 19
+#define CLK_DOUT_PERIC0_USI5_USI 20
+#define CLK_DOUT_PERIC0_USI6_USI 21
+#define CLK_DOUT_PERIC0_USI7_USI 22
+#define CLK_DOUT_PERIC0_USI8_USI 23
+#define CLK_GOUT_PERIC0_IP 24
+#define CLK_GOUT_PERIC0_PERIC0_CMU_PERIC0_PCLK 25
+#define CLK_GOUT_PERIC0_CLK_PERIC0_OSCCLK_CLK 26
+#define CLK_GOUT_PERIC0_D_TZPC_PERIC0_PCLK 27
+#define CLK_GOUT_PERIC0_GPC_PERIC0_PCLK 28
+#define CLK_GOUT_PERIC0_GPIO_PERIC0_PCLK 29
+#define CLK_GOUT_PERIC0_LHM_AXI_P_PERIC0_I_CLK 30
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_0 31
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_1 32
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_10 33
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_11 34
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_12 35
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_13 36
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_14 37
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_15 38
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_2 39
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_3 40
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_4 41
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_5 42
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_6 43
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_7 44
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_8 45
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_9 46
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_0 47
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_1 48
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_10 49
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_11 50
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_12 51
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_13 52
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_14 53
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_15 54
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_2 55
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_3 56
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_4 57
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_5 58
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_6 59
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_7 60
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_8 61
+#define CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_9 62
+#define CLK_GOUT_PERIC0_PERIC0_TOP1_IPCLK_0 63
+#define CLK_GOUT_PERIC0_PERIC0_TOP1_IPCLK_2 64
+#define CLK_GOUT_PERIC0_PERIC0_TOP1_PCLK_0 65
+#define CLK_GOUT_PERIC0_PERIC0_TOP1_PCLK_2 66
+#define CLK_GOUT_PERIC0_CLK_PERIC0_BUSP_CLK 67
+#define CLK_GOUT_PERIC0_CLK_PERIC0_I3C_CLK 68
+#define CLK_GOUT_PERIC0_CLK_PERIC0_USI0_UART_CLK 69
+#define CLK_GOUT_PERIC0_CLK_PERIC0_USI14_USI_CLK 70
+#define CLK_GOUT_PERIC0_CLK_PERIC0_USI1_USI_CLK 71
+#define CLK_GOUT_PERIC0_CLK_PERIC0_USI2_USI_CLK 72
+#define CLK_GOUT_PERIC0_CLK_PERIC0_USI3_USI_CLK 73
+#define CLK_GOUT_PERIC0_CLK_PERIC0_USI4_USI_CLK 74
+#define CLK_GOUT_PERIC0_CLK_PERIC0_USI5_USI_CLK 75
+#define CLK_GOUT_PERIC0_CLK_PERIC0_USI6_USI_CLK 76
+#define CLK_GOUT_PERIC0_CLK_PERIC0_USI7_USI_CLK 77
+#define CLK_GOUT_PERIC0_CLK_PERIC0_USI8_USI_CLK 78
+#define CLK_GOUT_PERIC0_SYSREG_PERIC0_PCLK 79
+
+/* CMU_PERIC1 */
+#define CLK_MOUT_PERIC1_BUS_USER 1
+#define CLK_MOUT_PERIC1_I3C_USER 2
+#define CLK_MOUT_PERIC1_USI0_USI_USER 3
+#define CLK_MOUT_PERIC1_USI10_USI_USER 4
+#define CLK_MOUT_PERIC1_USI11_USI_USER 5
+#define CLK_MOUT_PERIC1_USI12_USI_USER 6
+#define CLK_MOUT_PERIC1_USI13_USI_USER 7
+#define CLK_MOUT_PERIC1_USI9_USI_USER 8
+#define CLK_DOUT_PERIC1_I3C 9
+#define CLK_DOUT_PERIC1_USI0_USI 10
+#define CLK_DOUT_PERIC1_USI10_USI 11
+#define CLK_DOUT_PERIC1_USI11_USI 12
+#define CLK_DOUT_PERIC1_USI12_USI 13
+#define CLK_DOUT_PERIC1_USI13_USI 14
+#define CLK_DOUT_PERIC1_USI9_USI 15
+#define CLK_GOUT_PERIC1_IP 16
+#define CLK_GOUT_PERIC1_PCLK 17
+#define CLK_GOUT_PERIC1_CLK_PERIC1_I3C_CLK 18
+#define CLK_GOUT_PERIC1_CLK_PERIC1_OSCCLK_CLK 19
+#define CLK_GOUT_PERIC1_D_TZPC_PERIC1_PCLK 20
+#define CLK_GOUT_PERIC1_GPC_PERIC1_PCLK 21
+#define CLK_GOUT_PERIC1_GPIO_PERIC1_PCLK 22
+#define CLK_GOUT_PERIC1_LHM_AXI_P_PERIC1_I_CLK 23
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_1 24
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_2 25
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_3 26
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_4 27
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_5 28
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_6 29
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_8 30
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_1 31
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_15 32
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_2 33
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_3 34
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_4 35
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_5 36
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_6 37
+#define CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_8 38
+#define CLK_GOUT_PERIC1_CLK_PERIC1_BUSP_CLK 39
+#define CLK_GOUT_PERIC1_CLK_PERIC1_USI0_USI_CLK 40
+#define CLK_GOUT_PERIC1_CLK_PERIC1_USI10_USI_CLK 41
+#define CLK_GOUT_PERIC1_CLK_PERIC1_USI11_USI_CLK 42
+#define CLK_GOUT_PERIC1_CLK_PERIC1_USI12_USI_CLK 43
+#define CLK_GOUT_PERIC1_CLK_PERIC1_USI13_USI_CLK 44
+#define CLK_GOUT_PERIC1_CLK_PERIC1_USI9_USI_CLK 45
+#define CLK_GOUT_PERIC1_SYSREG_PERIC1_PCLK 46
+
#endif /* _DT_BINDINGS_CLOCK_GOOGLE_GS101_H */
diff --git a/include/dt-bindings/clock/qcom,gcc-msm8953.h b/include/dt-bindings/clock/qcom,gcc-msm8953.h
index 783162da6148..13b4a62877e5 100644
--- a/include/dt-bindings/clock/qcom,gcc-msm8953.h
+++ b/include/dt-bindings/clock/qcom,gcc-msm8953.h
@@ -218,6 +218,10 @@
#define GCC_USB3PHY_PHY_BCR 3
#define GCC_USB3_PHY_BCR 4
#define GCC_USB_30_BCR 5
+#define GCC_MDSS_BCR 6
+#define GCC_CRYPTO_BCR 7
+#define GCC_SDCC1_BCR 8
+#define GCC_SDCC2_BCR 9
/* GDSCs */
#define CPP_GDSC 0
diff --git a/include/dt-bindings/clock/qcom,gcc-sc8180x.h b/include/dt-bindings/clock/qcom,gcc-sc8180x.h
index e893415ae13d..90c6e021a035 100644
--- a/include/dt-bindings/clock/qcom,gcc-sc8180x.h
+++ b/include/dt-bindings/clock/qcom,gcc-sc8180x.h
@@ -246,6 +246,8 @@
#define GCC_PCIE_3_CLKREF_CLK 236
#define GCC_USB3_PRIM_CLKREF_CLK 237
#define GCC_USB3_SEC_CLKREF_CLK 238
+#define GCC_UFS_MEM_CLKREF_EN 239
+#define GCC_UFS_CARD_CLKREF_EN 240
#define GCC_EMAC_BCR 0
#define GCC_GPU_BCR 1
diff --git a/include/dt-bindings/clock/qcom,x1e80100-camcc.h b/include/dt-bindings/clock/qcom,x1e80100-camcc.h
new file mode 100644
index 000000000000..d72fdfb06a7c
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,x1e80100-camcc.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_X1E80100_H
+#define _DT_BINDINGS_CLK_QCOM_CAM_CC_X1E80100_H
+
+/* CAM_CC clocks */
+#define CAM_CC_BPS_AHB_CLK 0
+#define CAM_CC_BPS_CLK 1
+#define CAM_CC_BPS_CLK_SRC 2
+#define CAM_CC_BPS_FAST_AHB_CLK 3
+#define CAM_CC_CAMNOC_AXI_NRT_CLK 4
+#define CAM_CC_CAMNOC_AXI_RT_CLK 5
+#define CAM_CC_CAMNOC_AXI_RT_CLK_SRC 6
+#define CAM_CC_CAMNOC_DCD_XO_CLK 7
+#define CAM_CC_CAMNOC_XO_CLK 8
+#define CAM_CC_CCI_0_CLK 9
+#define CAM_CC_CCI_0_CLK_SRC 10
+#define CAM_CC_CCI_1_CLK 11
+#define CAM_CC_CCI_1_CLK_SRC 12
+#define CAM_CC_CORE_AHB_CLK 13
+#define CAM_CC_CPAS_AHB_CLK 14
+#define CAM_CC_CPAS_BPS_CLK 15
+#define CAM_CC_CPAS_FAST_AHB_CLK 16
+#define CAM_CC_CPAS_IFE_0_CLK 17
+#define CAM_CC_CPAS_IFE_1_CLK 18
+#define CAM_CC_CPAS_IFE_LITE_CLK 19
+#define CAM_CC_CPAS_IPE_NPS_CLK 20
+#define CAM_CC_CPAS_SFE_0_CLK 21
+#define CAM_CC_CPHY_RX_CLK_SRC 22
+#define CAM_CC_CSI0PHYTIMER_CLK 23
+#define CAM_CC_CSI0PHYTIMER_CLK_SRC 24
+#define CAM_CC_CSI1PHYTIMER_CLK 25
+#define CAM_CC_CSI1PHYTIMER_CLK_SRC 26
+#define CAM_CC_CSI2PHYTIMER_CLK 27
+#define CAM_CC_CSI2PHYTIMER_CLK_SRC 28
+#define CAM_CC_CSI3PHYTIMER_CLK 29
+#define CAM_CC_CSI3PHYTIMER_CLK_SRC 30
+#define CAM_CC_CSI4PHYTIMER_CLK 31
+#define CAM_CC_CSI4PHYTIMER_CLK_SRC 32
+#define CAM_CC_CSI5PHYTIMER_CLK 33
+#define CAM_CC_CSI5PHYTIMER_CLK_SRC 34
+#define CAM_CC_CSID_CLK 35
+#define CAM_CC_CSID_CLK_SRC 36
+#define CAM_CC_CSID_CSIPHY_RX_CLK 37
+#define CAM_CC_CSIPHY0_CLK 38
+#define CAM_CC_CSIPHY1_CLK 39
+#define CAM_CC_CSIPHY2_CLK 40
+#define CAM_CC_CSIPHY3_CLK 41
+#define CAM_CC_CSIPHY4_CLK 42
+#define CAM_CC_CSIPHY5_CLK 43
+#define CAM_CC_FAST_AHB_CLK_SRC 44
+#define CAM_CC_GDSC_CLK 45
+#define CAM_CC_ICP_AHB_CLK 46
+#define CAM_CC_ICP_CLK 47
+#define CAM_CC_ICP_CLK_SRC 48
+#define CAM_CC_IFE_0_CLK 49
+#define CAM_CC_IFE_0_CLK_SRC 50
+#define CAM_CC_IFE_0_DSP_CLK 51
+#define CAM_CC_IFE_0_FAST_AHB_CLK 52
+#define CAM_CC_IFE_1_CLK 53
+#define CAM_CC_IFE_1_CLK_SRC 54
+#define CAM_CC_IFE_1_DSP_CLK 55
+#define CAM_CC_IFE_1_FAST_AHB_CLK 56
+#define CAM_CC_IFE_LITE_AHB_CLK 57
+#define CAM_CC_IFE_LITE_CLK 58
+#define CAM_CC_IFE_LITE_CLK_SRC 59
+#define CAM_CC_IFE_LITE_CPHY_RX_CLK 60
+#define CAM_CC_IFE_LITE_CSID_CLK 61
+#define CAM_CC_IFE_LITE_CSID_CLK_SRC 62
+#define CAM_CC_IPE_NPS_AHB_CLK 63
+#define CAM_CC_IPE_NPS_CLK 64
+#define CAM_CC_IPE_NPS_CLK_SRC 65
+#define CAM_CC_IPE_NPS_FAST_AHB_CLK 66
+#define CAM_CC_IPE_PPS_CLK 67
+#define CAM_CC_IPE_PPS_FAST_AHB_CLK 68
+#define CAM_CC_JPEG_CLK 69
+#define CAM_CC_JPEG_CLK_SRC 70
+#define CAM_CC_MCLK0_CLK 71
+#define CAM_CC_MCLK0_CLK_SRC 72
+#define CAM_CC_MCLK1_CLK 73
+#define CAM_CC_MCLK1_CLK_SRC 74
+#define CAM_CC_MCLK2_CLK 75
+#define CAM_CC_MCLK2_CLK_SRC 76
+#define CAM_CC_MCLK3_CLK 77
+#define CAM_CC_MCLK3_CLK_SRC 78
+#define CAM_CC_MCLK4_CLK 79
+#define CAM_CC_MCLK4_CLK_SRC 80
+#define CAM_CC_MCLK5_CLK 81
+#define CAM_CC_MCLK5_CLK_SRC 82
+#define CAM_CC_MCLK6_CLK 83
+#define CAM_CC_MCLK6_CLK_SRC 84
+#define CAM_CC_MCLK7_CLK 85
+#define CAM_CC_MCLK7_CLK_SRC 86
+#define CAM_CC_PLL0 87
+#define CAM_CC_PLL0_OUT_EVEN 88
+#define CAM_CC_PLL0_OUT_ODD 89
+#define CAM_CC_PLL1 90
+#define CAM_CC_PLL1_OUT_EVEN 91
+#define CAM_CC_PLL2 92
+#define CAM_CC_PLL3 93
+#define CAM_CC_PLL3_OUT_EVEN 94
+#define CAM_CC_PLL4 95
+#define CAM_CC_PLL4_OUT_EVEN 96
+#define CAM_CC_PLL6 97
+#define CAM_CC_PLL6_OUT_EVEN 98
+#define CAM_CC_PLL8 99
+#define CAM_CC_PLL8_OUT_EVEN 100
+#define CAM_CC_SFE_0_CLK 101
+#define CAM_CC_SFE_0_CLK_SRC 102
+#define CAM_CC_SFE_0_FAST_AHB_CLK 103
+#define CAM_CC_SLEEP_CLK 104
+#define CAM_CC_SLEEP_CLK_SRC 105
+#define CAM_CC_SLOW_AHB_CLK_SRC 106
+#define CAM_CC_XO_CLK_SRC 107
+
+/* CAM_CC power domains */
+#define CAM_CC_BPS_GDSC 0
+#define CAM_CC_IFE_0_GDSC 1
+#define CAM_CC_IFE_1_GDSC 2
+#define CAM_CC_IPE_0_GDSC 3
+#define CAM_CC_SFE_0_GDSC 4
+#define CAM_CC_TITAN_TOP_GDSC 5
+
+/* CAM_CC resets */
+#define CAM_CC_BPS_BCR 0
+#define CAM_CC_ICP_BCR 1
+#define CAM_CC_IFE_0_BCR 2
+#define CAM_CC_IFE_1_BCR 3
+#define CAM_CC_IPE_0_BCR 4
+#define CAM_CC_SFE_0_BCR 5
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,x1e80100-dispcc.h b/include/dt-bindings/clock/qcom,x1e80100-dispcc.h
new file mode 100644
index 000000000000..d4a83e4fd0d1
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,x1e80100-dispcc.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_X1E80100_DISP_CC_H
+#define _DT_BINDINGS_CLK_QCOM_X1E80100_DISP_CC_H
+
+/* DISP_CC clocks */
+#define DISP_CC_MDSS_ACCU_CLK 0
+#define DISP_CC_MDSS_AHB1_CLK 1
+#define DISP_CC_MDSS_AHB_CLK 2
+#define DISP_CC_MDSS_AHB_CLK_SRC 3
+#define DISP_CC_MDSS_BYTE0_CLK 4
+#define DISP_CC_MDSS_BYTE0_CLK_SRC 5
+#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC 6
+#define DISP_CC_MDSS_BYTE0_INTF_CLK 7
+#define DISP_CC_MDSS_BYTE1_CLK 8
+#define DISP_CC_MDSS_BYTE1_CLK_SRC 9
+#define DISP_CC_MDSS_BYTE1_DIV_CLK_SRC 10
+#define DISP_CC_MDSS_BYTE1_INTF_CLK 11
+#define DISP_CC_MDSS_DPTX0_AUX_CLK 12
+#define DISP_CC_MDSS_DPTX0_AUX_CLK_SRC 13
+#define DISP_CC_MDSS_DPTX0_LINK_CLK 14
+#define DISP_CC_MDSS_DPTX0_LINK_CLK_SRC 15
+#define DISP_CC_MDSS_DPTX0_LINK_DIV_CLK_SRC 16
+#define DISP_CC_MDSS_DPTX0_LINK_INTF_CLK 17
+#define DISP_CC_MDSS_DPTX0_PIXEL0_CLK 18
+#define DISP_CC_MDSS_DPTX0_PIXEL0_CLK_SRC 19
+#define DISP_CC_MDSS_DPTX0_PIXEL1_CLK 20
+#define DISP_CC_MDSS_DPTX0_PIXEL1_CLK_SRC 21
+#define DISP_CC_MDSS_DPTX0_USB_ROUTER_LINK_INTF_CLK 22
+#define DISP_CC_MDSS_DPTX1_AUX_CLK 23
+#define DISP_CC_MDSS_DPTX1_AUX_CLK_SRC 24
+#define DISP_CC_MDSS_DPTX1_LINK_CLK 25
+#define DISP_CC_MDSS_DPTX1_LINK_CLK_SRC 26
+#define DISP_CC_MDSS_DPTX1_LINK_DIV_CLK_SRC 27
+#define DISP_CC_MDSS_DPTX1_LINK_INTF_CLK 28
+#define DISP_CC_MDSS_DPTX1_PIXEL0_CLK 29
+#define DISP_CC_MDSS_DPTX1_PIXEL0_CLK_SRC 30
+#define DISP_CC_MDSS_DPTX1_PIXEL1_CLK 31
+#define DISP_CC_MDSS_DPTX1_PIXEL1_CLK_SRC 32
+#define DISP_CC_MDSS_DPTX1_USB_ROUTER_LINK_INTF_CLK 33
+#define DISP_CC_MDSS_DPTX2_AUX_CLK 34
+#define DISP_CC_MDSS_DPTX2_AUX_CLK_SRC 35
+#define DISP_CC_MDSS_DPTX2_LINK_CLK 36
+#define DISP_CC_MDSS_DPTX2_LINK_CLK_SRC 37
+#define DISP_CC_MDSS_DPTX2_LINK_DIV_CLK_SRC 38
+#define DISP_CC_MDSS_DPTX2_LINK_INTF_CLK 39
+#define DISP_CC_MDSS_DPTX2_PIXEL0_CLK 40
+#define DISP_CC_MDSS_DPTX2_PIXEL0_CLK_SRC 41
+#define DISP_CC_MDSS_DPTX2_PIXEL1_CLK 42
+#define DISP_CC_MDSS_DPTX2_PIXEL1_CLK_SRC 43
+#define DISP_CC_MDSS_DPTX2_USB_ROUTER_LINK_INTF_CLK 44
+#define DISP_CC_MDSS_DPTX3_AUX_CLK 45
+#define DISP_CC_MDSS_DPTX3_AUX_CLK_SRC 46
+#define DISP_CC_MDSS_DPTX3_LINK_CLK 47
+#define DISP_CC_MDSS_DPTX3_LINK_CLK_SRC 48
+#define DISP_CC_MDSS_DPTX3_LINK_DIV_CLK_SRC 49
+#define DISP_CC_MDSS_DPTX3_LINK_INTF_CLK 50
+#define DISP_CC_MDSS_DPTX3_PIXEL0_CLK 51
+#define DISP_CC_MDSS_DPTX3_PIXEL0_CLK_SRC 52
+#define DISP_CC_MDSS_ESC0_CLK 53
+#define DISP_CC_MDSS_ESC0_CLK_SRC 54
+#define DISP_CC_MDSS_ESC1_CLK 55
+#define DISP_CC_MDSS_ESC1_CLK_SRC 56
+#define DISP_CC_MDSS_MDP1_CLK 57
+#define DISP_CC_MDSS_MDP_CLK 58
+#define DISP_CC_MDSS_MDP_CLK_SRC 59
+#define DISP_CC_MDSS_MDP_LUT1_CLK 60
+#define DISP_CC_MDSS_MDP_LUT_CLK 61
+#define DISP_CC_MDSS_NON_GDSC_AHB_CLK 62
+#define DISP_CC_MDSS_PCLK0_CLK 63
+#define DISP_CC_MDSS_PCLK0_CLK_SRC 64
+#define DISP_CC_MDSS_PCLK1_CLK 65
+#define DISP_CC_MDSS_PCLK1_CLK_SRC 66
+#define DISP_CC_MDSS_RSCC_AHB_CLK 67
+#define DISP_CC_MDSS_RSCC_VSYNC_CLK 68
+#define DISP_CC_MDSS_VSYNC1_CLK 69
+#define DISP_CC_MDSS_VSYNC_CLK 70
+#define DISP_CC_MDSS_VSYNC_CLK_SRC 71
+#define DISP_CC_PLL0 72
+#define DISP_CC_PLL1 73
+#define DISP_CC_SLEEP_CLK 74
+#define DISP_CC_SLEEP_CLK_SRC 75
+#define DISP_CC_XO_CLK 76
+#define DISP_CC_XO_CLK_SRC 77
+
+/* DISP_CC resets */
+#define DISP_CC_MDSS_CORE_BCR 0
+#define DISP_CC_MDSS_CORE_INT2_BCR 1
+#define DISP_CC_MDSS_RSCC_BCR 2
+
+/* DISP_CC GDSCR */
+#define MDSS_GDSC 0
+#define MDSS_INT2_GDSC 1
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,x1e80100-gpucc.h b/include/dt-bindings/clock/qcom,x1e80100-gpucc.h
new file mode 100644
index 000000000000..61a3a8f3ac43
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,x1e80100-gpucc.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_X1E80100_GPU_CC_H
+#define _DT_BINDINGS_CLK_QCOM_X1E80100_GPU_CC_H
+
+/* GPU_CC clocks */
+#define GPU_CC_AHB_CLK 0
+#define GPU_CC_CB_CLK 1
+#define GPU_CC_CRC_AHB_CLK 2
+#define GPU_CC_CX_FF_CLK 3
+#define GPU_CC_CX_GMU_CLK 4
+#define GPU_CC_CXO_AON_CLK 5
+#define GPU_CC_CXO_CLK 6
+#define GPU_CC_DEMET_CLK 7
+#define GPU_CC_DEMET_DIV_CLK_SRC 8
+#define GPU_CC_FF_CLK_SRC 9
+#define GPU_CC_FREQ_MEASURE_CLK 10
+#define GPU_CC_GMU_CLK_SRC 11
+#define GPU_CC_GX_GMU_CLK 12
+#define GPU_CC_GX_VSENSE_CLK 13
+#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK 14
+#define GPU_CC_HUB_AON_CLK 15
+#define GPU_CC_HUB_CLK_SRC 16
+#define GPU_CC_HUB_CX_INT_CLK 17
+#define GPU_CC_MEMNOC_GFX_CLK 18
+#define GPU_CC_MND1X_0_GFX3D_CLK 19
+#define GPU_CC_MND1X_1_GFX3D_CLK 20
+#define GPU_CC_PLL0 21
+#define GPU_CC_PLL1 22
+#define GPU_CC_SLEEP_CLK 23
+#define GPU_CC_XO_CLK_SRC 24
+#define GPU_CC_XO_DIV_CLK_SRC 25
+
+/* GDSCs */
+#define GPU_CX_GDSC 0
+#define GPU_GX_GDSC 1
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,x1e80100-tcsr.h b/include/dt-bindings/clock/qcom,x1e80100-tcsr.h
new file mode 100644
index 000000000000..bae2c4654ee2
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,x1e80100-tcsr.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2023, Linaro Limited
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_X1E80100_TCSR_CC_H
+#define _DT_BINDINGS_CLK_QCOM_X1E80100_TCSR_CC_H
+
+/* TCSR CC clocks */
+#define TCSR_PCIE_2L_4_CLKREF_EN 0
+#define TCSR_PCIE_2L_5_CLKREF_EN 1
+#define TCSR_PCIE_8L_CLKREF_EN 2
+#define TCSR_USB3_MP0_CLKREF_EN 3
+#define TCSR_USB3_MP1_CLKREF_EN 4
+#define TCSR_USB2_1_CLKREF_EN 5
+#define TCSR_UFS_PHY_CLKREF_EN 6
+#define TCSR_USB4_1_CLKREF_EN 7
+#define TCSR_USB4_2_CLKREF_EN 8
+#define TCSR_USB2_2_CLKREF_EN 9
+#define TCSR_PCIE_4L_CLKREF_EN 10
+#define TCSR_EDP_CLKREF_EN 11
+
+#endif
diff --git a/include/dt-bindings/clock/renesas,r8a779h0-cpg-mssr.h b/include/dt-bindings/clock/renesas,r8a779h0-cpg-mssr.h
new file mode 100644
index 000000000000..7ab6cfbaf901
--- /dev/null
+++ b/include/dt-bindings/clock/renesas,r8a779h0-cpg-mssr.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2023 Renesas Electronics Corp.
+ */
+#ifndef __DT_BINDINGS_CLOCK_RENESAS_R8A779H0_CPG_MSSR_H__
+#define __DT_BINDINGS_CLOCK_RENESAS_R8A779H0_CPG_MSSR_H__
+
+#include <dt-bindings/clock/renesas-cpg-mssr.h>
+
+/* r8a779h0 CPG Core Clocks */
+
+#define R8A779H0_CLK_ZX 0
+#define R8A779H0_CLK_ZD 1
+#define R8A779H0_CLK_ZS 2
+#define R8A779H0_CLK_ZT 3
+#define R8A779H0_CLK_ZTR 4
+#define R8A779H0_CLK_S0D2 5
+#define R8A779H0_CLK_S0D3 6
+#define R8A779H0_CLK_S0D4 7
+#define R8A779H0_CLK_S0D1_VIO 8
+#define R8A779H0_CLK_S0D2_VIO 9
+#define R8A779H0_CLK_S0D4_VIO 10
+#define R8A779H0_CLK_S0D8_VIO 11
+#define R8A779H0_CLK_VIOBUSD1 12
+#define R8A779H0_CLK_VIOBUSD2 13
+#define R8A779H0_CLK_S0D1_VC 14
+#define R8A779H0_CLK_S0D2_VC 15
+#define R8A779H0_CLK_S0D4_VC 16
+#define R8A779H0_CLK_VCBUSD1 17
+#define R8A779H0_CLK_VCBUSD2 18
+#define R8A779H0_CLK_S0D2_MM 19
+#define R8A779H0_CLK_S0D4_MM 20
+#define R8A779H0_CLK_S0D2_U3DG 21
+#define R8A779H0_CLK_S0D4_U3DG 22
+#define R8A779H0_CLK_S0D2_RT 23
+#define R8A779H0_CLK_S0D3_RT 24
+#define R8A779H0_CLK_S0D4_RT 25
+#define R8A779H0_CLK_S0D6_RT 26
+#define R8A779H0_CLK_S0D2_PER 27
+#define R8A779H0_CLK_S0D3_PER 28
+#define R8A779H0_CLK_S0D4_PER 29
+#define R8A779H0_CLK_S0D6_PER 30
+#define R8A779H0_CLK_S0D12_PER 31
+#define R8A779H0_CLK_S0D24_PER 32
+#define R8A779H0_CLK_S0D1_HSC 33
+#define R8A779H0_CLK_S0D2_HSC 34
+#define R8A779H0_CLK_S0D4_HSC 35
+#define R8A779H0_CLK_S0D8_HSC 36
+#define R8A779H0_CLK_SVD1_IR 37
+#define R8A779H0_CLK_SVD2_IR 38
+#define R8A779H0_CLK_IMPAD1 39
+#define R8A779H0_CLK_IMPAD4 40
+#define R8A779H0_CLK_IMPB 41
+#define R8A779H0_CLK_SVD1_VIP 42
+#define R8A779H0_CLK_SVD2_VIP 43
+#define R8A779H0_CLK_CL 44
+#define R8A779H0_CLK_CL16M 45
+#define R8A779H0_CLK_CL16M_MM 46
+#define R8A779H0_CLK_CL16M_RT 47
+#define R8A779H0_CLK_CL16M_PER 48
+#define R8A779H0_CLK_CL16M_HSC 49
+#define R8A779H0_CLK_ZC0 50
+#define R8A779H0_CLK_ZC1 51
+#define R8A779H0_CLK_ZC2 52
+#define R8A779H0_CLK_ZC3 53
+#define R8A779H0_CLK_ZB3 54
+#define R8A779H0_CLK_ZB3D2 55
+#define R8A779H0_CLK_ZB3D4 56
+#define R8A779H0_CLK_ZG 57
+#define R8A779H0_CLK_SD0H 58
+#define R8A779H0_CLK_SD0 59
+#define R8A779H0_CLK_RPC 60
+#define R8A779H0_CLK_RPCD2 61
+#define R8A779H0_CLK_MSO 62
+#define R8A779H0_CLK_CANFD 63
+#define R8A779H0_CLK_CSI 64
+#define R8A779H0_CLK_FRAY 65
+#define R8A779H0_CLK_IPC 66
+#define R8A779H0_CLK_SASYNCRT 67
+#define R8A779H0_CLK_SASYNCPERD1 68
+#define R8A779H0_CLK_SASYNCPERD2 69
+#define R8A779H0_CLK_SASYNCPERD4 70
+#define R8A779H0_CLK_DSIEXT 71
+#define R8A779H0_CLK_DSIREF 72
+#define R8A779H0_CLK_ADGH 73
+#define R8A779H0_CLK_OSC 74
+#define R8A779H0_CLK_ZR0 75
+#define R8A779H0_CLK_ZR1 76
+#define R8A779H0_CLK_ZR2 77
+#define R8A779H0_CLK_RGMII 78
+#define R8A779H0_CLK_CPEX 79
+#define R8A779H0_CLK_CP 80
+#define R8A779H0_CLK_CBFUSA 81
+#define R8A779H0_CLK_R 82
+
+#endif /* __DT_BINDINGS_CLOCK_RENESAS_R8A779H0_CPG_MSSR_H__ */
diff --git a/include/dt-bindings/clock/rockchip,rk3588-cru.h b/include/dt-bindings/clock/rockchip,rk3588-cru.h
index 5790b1391201..0c7d3ca2d5bc 100644
--- a/include/dt-bindings/clock/rockchip,rk3588-cru.h
+++ b/include/dt-bindings/clock/rockchip,rk3588-cru.h
@@ -733,8 +733,7 @@
#define ACLK_AV1_PRE 718
#define PCLK_AV1_PRE 719
#define HCLK_SDIO_PRE 720
-
-#define CLK_NR_CLKS (HCLK_SDIO_PRE + 1)
+#define PCLK_VO1GRF 721
/* scmi-clocks indices */
diff --git a/include/dt-bindings/mfd/stm32f7-rcc.h b/include/dt-bindings/mfd/stm32f7-rcc.h
index 8d73a9c51e2b..a4e4f9271395 100644
--- a/include/dt-bindings/mfd/stm32f7-rcc.h
+++ b/include/dt-bindings/mfd/stm32f7-rcc.h
@@ -108,6 +108,7 @@
#define STM32F7_RCC_APB2_SAI1 22
#define STM32F7_RCC_APB2_SAI2 23
#define STM32F7_RCC_APB2_LTDC 26
+#define STM32F7_RCC_APB2_DSI 27
#define STM32F7_APB2_RESET(bit) (STM32F7_RCC_APB2_##bit + (0x24 * 8))
#define STM32F7_APB2_CLOCK(bit) (STM32F7_RCC_APB2_##bit + 0xA0)
diff --git a/include/dt-bindings/power/renesas,r8a779h0-sysc.h b/include/dt-bindings/power/renesas,r8a779h0-sysc.h
new file mode 100644
index 000000000000..f27976f523e8
--- /dev/null
+++ b/include/dt-bindings/power/renesas,r8a779h0-sysc.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2023 Renesas Electronics Corp.
+ */
+#ifndef __DT_BINDINGS_POWER_RENESAS_R8A779H0_SYSC_H__
+#define __DT_BINDINGS_POWER_RENESAS_R8A779H0_SYSC_H__
+
+/*
+ * These power domain indices match the Power Domain Register Numbers (PDR)
+ */
+
+#define R8A779H0_PD_A1E0D0C0 0
+#define R8A779H0_PD_A1E0D0C1 1
+#define R8A779H0_PD_A1E0D0C2 2
+#define R8A779H0_PD_A1E0D0C3 3
+#define R8A779H0_PD_A2E0D0 16
+#define R8A779H0_PD_A3CR0 21
+#define R8A779H0_PD_A3CR1 22
+#define R8A779H0_PD_A3CR2 23
+#define R8A779H0_PD_A33DGA 24
+#define R8A779H0_PD_A23DGB 25
+#define R8A779H0_PD_C4 31
+#define R8A779H0_PD_A1DSP0 33
+#define R8A779H0_PD_A2IMP01 34
+#define R8A779H0_PD_A2PSC 35
+#define R8A779H0_PD_A2CV0 36
+#define R8A779H0_PD_A2CV1 37
+#define R8A779H0_PD_A3IMR0 38
+#define R8A779H0_PD_A3IMR1 39
+#define R8A779H0_PD_A3VC 40
+#define R8A779H0_PD_A2CN0 42
+#define R8A779H0_PD_A1CN0 44
+#define R8A779H0_PD_A1DSP1 45
+#define R8A779H0_PD_A2DMA 47
+#define R8A779H0_PD_A2CV2 48
+#define R8A779H0_PD_A2CV3 49
+#define R8A779H0_PD_A3IMR2 50
+#define R8A779H0_PD_A3IMR3 51
+#define R8A779H0_PD_A3PCI 52
+#define R8A779H0_PD_A2PCIPHY 53
+#define R8A779H0_PD_A3VIP0 56
+#define R8A779H0_PD_A3VIP2 58
+#define R8A779H0_PD_A3ISP0 60
+#define R8A779H0_PD_A3DUL 62
+
+/* Always-on power area */
+#define R8A779H0_PD_ALWAYS_ON 64
+
+#endif /* __DT_BINDINGS_POWER_RENESAS_R8A779H0_SYSC_H__ */
diff --git a/include/dt-bindings/reset/qcom,x1e80100-gpucc.h b/include/dt-bindings/reset/qcom,x1e80100-gpucc.h
new file mode 100644
index 000000000000..32b43e71a16f
--- /dev/null
+++ b/include/dt-bindings/reset/qcom,x1e80100-gpucc.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_RESET_QCOM_X1E80100_GPU_CC_H
+#define _DT_BINDINGS_RESET_QCOM_X1E80100_GPU_CC_H
+
+#define GPUCC_GPU_CC_ACD_BCR 0
+#define GPUCC_GPU_CC_CB_BCR 1
+#define GPUCC_GPU_CC_CX_BCR 2
+#define GPUCC_GPU_CC_FAST_HUB_BCR 3
+#define GPUCC_GPU_CC_FF_BCR 4
+#define GPUCC_GPU_CC_GFX3D_AON_BCR 5
+#define GPUCC_GPU_CC_GMU_BCR 6
+#define GPUCC_GPU_CC_GX_BCR 7
+#define GPUCC_GPU_CC_XO_BCR 8
+
+#endif
diff --git a/include/kunit/test.h b/include/kunit/test.h
index fcb4a4940ace..61637ef32302 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -579,12 +579,12 @@ void __printf(2, 3) kunit_log_append(struct string_stream *log, const char *fmt,
void __noreturn __kunit_abort(struct kunit *test);
-void __kunit_do_failed_assertion(struct kunit *test,
- const struct kunit_loc *loc,
- enum kunit_assert_type type,
- const struct kunit_assert *assert,
- assert_format_t assert_format,
- const char *fmt, ...);
+void __printf(6, 7) __kunit_do_failed_assertion(struct kunit *test,
+ const struct kunit_loc *loc,
+ enum kunit_assert_type type,
+ const struct kunit_assert *assert,
+ assert_format_t assert_format,
+ const char *fmt, ...);
#define _KUNIT_FAILED(test, assert_type, assert_class, assert_format, INITIALIZER, fmt, ...) do { \
static const struct kunit_loc __loc = KUNIT_CURRENT_LOC; \
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index dc7ed2f46886..2b90c48a6a87 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -85,8 +85,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn,
u64 *value);
struct amd_iommu *get_amd_iommu(unsigned int idx);
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-int amd_iommu_snp_enable(void);
+#ifdef CONFIG_KVM_AMD_SEV
+int amd_iommu_snp_disable(void);
+#else
+static inline int amd_iommu_snp_disable(void) { return 0; }
#endif
#endif /* _ASM_X86_AMD_IOMMU_H */
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index 3d0fde57ba90..c906f666ff5d 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -209,7 +209,7 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; }
#define module_ffa_driver(__ffa_driver) \
module_driver(__ffa_driver, ffa_register, ffa_unregister)
-extern struct bus_type ffa_bus_type;
+extern const struct bus_type ffa_bus_type;
/* FFA transport related */
struct ffa_partition_info {
diff --git a/include/linux/async.h b/include/linux/async.h
index 33c9ff4afb49..19b778d08600 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -120,4 +120,5 @@ extern void async_synchronize_cookie(async_cookie_t cookie);
extern void async_synchronize_cookie_domain(async_cookie_t cookie,
struct async_domain *domain);
extern bool current_is_async(void);
+extern void async_init(void);
#endif
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 5e95faa959c4..956bcba5dbf2 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -2005,6 +2005,7 @@ raw_atomic_xchg_relaxed(atomic_t *v, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_cmpxchg() elsewhere.
*
@@ -2033,6 +2034,7 @@ raw_atomic_cmpxchg(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_cmpxchg_acquire() elsewhere.
*
@@ -2061,6 +2063,7 @@ raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_cmpxchg_release() elsewhere.
*
@@ -2088,6 +2091,7 @@ raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_cmpxchg_relaxed() elsewhere.
*
@@ -2112,7 +2116,8 @@ raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_try_cmpxchg() elsewhere.
*
@@ -2145,7 +2150,8 @@ raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_try_cmpxchg_acquire() elsewhere.
*
@@ -2178,7 +2184,8 @@ raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_try_cmpxchg_release() elsewhere.
*
@@ -2210,7 +2217,8 @@ raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_try_cmpxchg_relaxed() elsewhere.
*
@@ -2403,6 +2411,7 @@ raw_atomic_add_negative_relaxed(int i, atomic_t *v)
* @u: int value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_fetch_add_unless() elsewhere.
*
@@ -2432,6 +2441,7 @@ raw_atomic_fetch_add_unless(atomic_t *v, int a, int u)
* @u: int value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_add_unless() elsewhere.
*
@@ -2452,6 +2462,7 @@ raw_atomic_add_unless(atomic_t *v, int a, int u)
* @v: pointer to atomic_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_inc_not_zero() elsewhere.
*
@@ -2472,6 +2483,7 @@ raw_atomic_inc_not_zero(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_inc_unless_negative() elsewhere.
*
@@ -2499,6 +2511,7 @@ raw_atomic_inc_unless_negative(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_dec_unless_positive() elsewhere.
*
@@ -2526,6 +2539,7 @@ raw_atomic_dec_unless_positive(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_dec_if_positive() elsewhere.
*
@@ -4117,6 +4131,7 @@ raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_cmpxchg() elsewhere.
*
@@ -4145,6 +4160,7 @@ raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_cmpxchg_acquire() elsewhere.
*
@@ -4173,6 +4189,7 @@ raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_cmpxchg_release() elsewhere.
*
@@ -4200,6 +4217,7 @@ raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_cmpxchg_relaxed() elsewhere.
*
@@ -4224,7 +4242,8 @@ raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_try_cmpxchg() elsewhere.
*
@@ -4257,7 +4276,8 @@ raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_try_cmpxchg_acquire() elsewhere.
*
@@ -4290,7 +4310,8 @@ raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_try_cmpxchg_release() elsewhere.
*
@@ -4322,7 +4343,8 @@ raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_try_cmpxchg_relaxed() elsewhere.
*
@@ -4515,6 +4537,7 @@ raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
* @u: s64 value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_fetch_add_unless() elsewhere.
*
@@ -4544,6 +4567,7 @@ raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
* @u: s64 value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_add_unless() elsewhere.
*
@@ -4564,6 +4588,7 @@ raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
* @v: pointer to atomic64_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_inc_not_zero() elsewhere.
*
@@ -4584,6 +4609,7 @@ raw_atomic64_inc_not_zero(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_inc_unless_negative() elsewhere.
*
@@ -4611,6 +4637,7 @@ raw_atomic64_inc_unless_negative(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_dec_unless_positive() elsewhere.
*
@@ -4638,6 +4665,7 @@ raw_atomic64_dec_unless_positive(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_dec_if_positive() elsewhere.
*
@@ -4662,4 +4690,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v)
}
#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// eec048affea735b8464f58e6d96992101f8f85f1
+// 14850c0b0db20c62fdc78ccd1d42b98b88d76331
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index 54d7bbe0aeaa..debd487fe971 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -1182,6 +1182,7 @@ atomic_xchg_relaxed(atomic_t *v, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_cmpxchg() there.
*
@@ -1202,6 +1203,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_cmpxchg_acquire() there.
*
@@ -1221,6 +1223,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_cmpxchg_release() there.
*
@@ -1241,6 +1244,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_cmpxchg_relaxed() there.
*
@@ -1260,7 +1264,8 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg() there.
*
@@ -1282,7 +1287,8 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_acquire() there.
*
@@ -1303,7 +1309,8 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_release() there.
*
@@ -1325,7 +1332,8 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_relaxed() there.
*
@@ -1475,6 +1483,7 @@ atomic_add_negative_relaxed(int i, atomic_t *v)
* @u: int value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_fetch_add_unless() there.
*
@@ -1495,6 +1504,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u)
* @u: int value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_add_unless() there.
*
@@ -1513,6 +1523,7 @@ atomic_add_unless(atomic_t *v, int a, int u)
* @v: pointer to atomic_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_inc_not_zero() there.
*
@@ -1531,6 +1542,7 @@ atomic_inc_not_zero(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_inc_unless_negative() there.
*
@@ -1549,6 +1561,7 @@ atomic_inc_unless_negative(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_dec_unless_positive() there.
*
@@ -1567,6 +1580,7 @@ atomic_dec_unless_positive(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_dec_if_positive() there.
*
@@ -2746,6 +2760,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_cmpxchg() there.
*
@@ -2766,6 +2781,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_acquire() there.
*
@@ -2785,6 +2801,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_release() there.
*
@@ -2805,6 +2822,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_relaxed() there.
*
@@ -2824,7 +2842,8 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg() there.
*
@@ -2846,7 +2865,8 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_acquire() there.
*
@@ -2867,7 +2887,8 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_release() there.
*
@@ -2889,7 +2910,8 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_relaxed() there.
*
@@ -3039,6 +3061,7 @@ atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
* @u: s64 value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_fetch_add_unless() there.
*
@@ -3059,6 +3082,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
* @u: s64 value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_add_unless() there.
*
@@ -3077,6 +3101,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
* @v: pointer to atomic64_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_inc_not_zero() there.
*
@@ -3095,6 +3120,7 @@ atomic64_inc_not_zero(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_inc_unless_negative() there.
*
@@ -3113,6 +3139,7 @@ atomic64_inc_unless_negative(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_dec_unless_positive() there.
*
@@ -3131,6 +3158,7 @@ atomic64_dec_unless_positive(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_dec_if_positive() there.
*
@@ -4310,6 +4338,7 @@ atomic_long_xchg_relaxed(atomic_long_t *v, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg() there.
*
@@ -4330,6 +4359,7 @@ atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_acquire() there.
*
@@ -4349,6 +4379,7 @@ atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_release() there.
*
@@ -4369,6 +4400,7 @@ atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_relaxed() there.
*
@@ -4388,7 +4420,8 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg() there.
*
@@ -4410,7 +4443,8 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_acquire() there.
*
@@ -4431,7 +4465,8 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_release() there.
*
@@ -4453,7 +4488,8 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_relaxed() there.
*
@@ -4603,6 +4639,7 @@ atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
* @u: long value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_unless() there.
*
@@ -4623,6 +4660,7 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
* @u: long value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_add_unless() there.
*
@@ -4641,6 +4679,7 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u)
* @v: pointer to atomic_long_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_inc_not_zero() there.
*
@@ -4659,6 +4698,7 @@ atomic_long_inc_not_zero(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_inc_unless_negative() there.
*
@@ -4677,6 +4717,7 @@ atomic_long_inc_unless_negative(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_dec_unless_positive() there.
*
@@ -4695,6 +4736,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_dec_if_positive() there.
*
@@ -5008,4 +5050,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 2cc4bc990fef44d3836ec108f11b610f3f438184
+// ce5b65e0f1f8a276268b667194581d24bed219d4
diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
index c82947170ddc..3ef844b3ab8a 100644
--- a/include/linux/atomic/atomic-long.h
+++ b/include/linux/atomic/atomic-long.h
@@ -1352,6 +1352,7 @@ raw_atomic_long_xchg_relaxed(atomic_long_t *v, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_cmpxchg() elsewhere.
*
@@ -1374,6 +1375,7 @@ raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_cmpxchg_acquire() elsewhere.
*
@@ -1396,6 +1398,7 @@ raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_cmpxchg_release() elsewhere.
*
@@ -1418,6 +1421,7 @@ raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_cmpxchg_relaxed() elsewhere.
*
@@ -1440,7 +1444,8 @@ raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_try_cmpxchg() elsewhere.
*
@@ -1463,7 +1468,8 @@ raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_acquire() elsewhere.
*
@@ -1486,7 +1492,8 @@ raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_release() elsewhere.
*
@@ -1509,7 +1516,8 @@ raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_relaxed() elsewhere.
*
@@ -1677,6 +1685,7 @@ raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
* @u: long value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_fetch_add_unless() elsewhere.
*
@@ -1699,6 +1708,7 @@ raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
* @u: long value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_add_unless() elsewhere.
*
@@ -1719,6 +1729,7 @@ raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
* @v: pointer to atomic_long_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_inc_not_zero() elsewhere.
*
@@ -1739,6 +1750,7 @@ raw_atomic_long_inc_not_zero(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_inc_unless_negative() elsewhere.
*
@@ -1759,6 +1771,7 @@ raw_atomic_long_inc_unless_negative(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_dec_unless_positive() elsewhere.
*
@@ -1779,6 +1792,7 @@ raw_atomic_long_dec_unless_positive(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_dec_if_positive() elsewhere.
*
@@ -1795,4 +1809,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v)
}
#endif /* _LINUX_ATOMIC_LONG_H */
-// 4ef23f98c73cff96d239896175fd26b10b88899e
+// 1c4a26fc77f345342953770ebe3c4d08e7ce2f9a
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index ae12696ec492..2ad261082bba 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -141,8 +141,6 @@ struct bdi_writeback {
struct delayed_work dwork; /* work item used for writeback */
struct delayed_work bw_dwork; /* work item used for bandwidth estimate */
- unsigned long dirty_sleep; /* last wait */
-
struct list_head bdi_node; /* anchored at bdi->wb_list */
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -179,6 +177,11 @@ struct backing_dev_info {
* any dirty wbs, which is depended upon by bdi_has_dirty().
*/
atomic_long_t tot_write_bandwidth;
+ /*
+ * Jiffies when last process was dirty throttled on this bdi. Used by
+ * blk-wbt.
+ */
+ unsigned long last_bdp_sleep;
struct bdi_writeback wb; /* the root writeback info for this bdi */
struct list_head wb_list; /* list of all wbs */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 1a97277f99b1..8e7af9a03b41 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -38,7 +38,6 @@ struct backing_dev_info *bdi_alloc(int node_id);
void wb_start_background_writeback(struct bdi_writeback *wb);
void wb_workfn(struct work_struct *work);
-void wb_wakeup_delayed(struct bdi_writeback *wb);
void wb_wait_for_completion(struct wb_completion *done);
diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index ebfa12f69501..63928f173223 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -66,7 +66,8 @@
_pfx "mask is not constant"); \
BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \
BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \
- ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
+ ~((_mask) >> __bf_shf(_mask)) & \
+ (0 + (_val)) : 0, \
_pfx "value too large for the field"); \
BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \
__bf_cast_unsigned(_reg, ~0ull), \
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 99451431e4d6..fb3a9c93ac86 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -6,6 +6,7 @@
#include <linux/align.h>
#include <linux/bitops.h>
+#include <linux/cleanup.h>
#include <linux/errno.h>
#include <linux/find.h>
#include <linux/limits.h>
@@ -54,6 +55,7 @@ struct device;
* bitmap_full(src, nbits) Are all bits set in *src?
* bitmap_weight(src, nbits) Hamming Weight: number set bits
* bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap
+ * bitmap_weight_andnot(src1, src2, nbits) Hamming Weight of andnot'ed bitmap
* bitmap_set(dst, pos, nbits) Set specified bit area
* bitmap_clear(dst, pos, nbits) Clear specified bit area
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
@@ -62,6 +64,8 @@ struct device;
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
* bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest
* bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask)
+ * bitmap_scatter(dst, src, mask, nbits) *dst = map(dense, sparse)(src)
+ * bitmap_gather(dst, src, mask, nbits) *dst = map(sparse, dense)(src)
* bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src)
* bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit)
* bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap
@@ -127,6 +131,8 @@ unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node);
unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node);
void bitmap_free(const unsigned long *bitmap);
+DEFINE_FREE(bitmap, unsigned long *, if (_T) bitmap_free(_T))
+
/* Managed variants of the above. */
unsigned long *devm_bitmap_alloc(struct device *dev,
unsigned int nbits, gfp_t flags);
@@ -169,6 +175,8 @@ bool __bitmap_subset(const unsigned long *bitmap1,
unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
+unsigned int __bitmap_weight_andnot(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int nbits);
void __bitmap_set(unsigned long *map, unsigned int start, int len);
void __bitmap_clear(unsigned long *map, unsigned int start, int len);
@@ -425,6 +433,15 @@ unsigned long bitmap_weight_and(const unsigned long *src1,
return __bitmap_weight_and(src1, src2, nbits);
}
+static __always_inline
+unsigned long bitmap_weight_andnot(const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
+{
+ if (small_const_nbits(nbits))
+ return hweight_long(*src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits));
+ return __bitmap_weight_andnot(src1, src2, nbits);
+}
+
static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
unsigned int nbits)
{
@@ -487,6 +504,105 @@ static inline void bitmap_replace(unsigned long *dst,
__bitmap_replace(dst, old, new, mask, nbits);
}
+/**
+ * bitmap_scatter - Scatter a bitmap according to the given mask
+ * @dst: scattered bitmap
+ * @src: gathered bitmap
+ * @mask: mask representing bits to assign to in the scattered bitmap
+ * @nbits: number of bits in each of these bitmaps
+ *
+ * Scatters bitmap with sequential bits according to the given @mask.
+ *
+ * Example:
+ * If @src bitmap = 0x005a, with @mask = 0x1313, @dst will be 0x0302.
+ *
+ * Or in binary form
+ * @src @mask @dst
+ * 0000000001011010 0001001100010011 0000001100000010
+ *
+ * (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12)
+ *
+ * A more 'visual' description of the operation:
+ * src: 0000000001011010
+ * ||||||
+ * +------+|||||
+ * | +----+||||
+ * | |+----+|||
+ * | || +-+||
+ * | || | ||
+ * mask: ...v..vv...v..vv
+ * ...0..11...0..10
+ * dst: 0000001100000010
+ *
+ * A relationship exists between bitmap_scatter() and bitmap_gather().
+ * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation.
+ * See bitmap_scatter() for details related to this relationship.
+ */
+static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src,
+ const unsigned long *mask, unsigned int nbits)
+{
+ unsigned int n = 0;
+ unsigned int bit;
+
+ bitmap_zero(dst, nbits);
+
+ for_each_set_bit(bit, mask, nbits)
+ __assign_bit(bit, dst, test_bit(n++, src));
+}
+
+/**
+ * bitmap_gather - Gather a bitmap according to given mask
+ * @dst: gathered bitmap
+ * @src: scattered bitmap
+ * @mask: mask representing bits to extract from in the scattered bitmap
+ * @nbits: number of bits in each of these bitmaps
+ *
+ * Gathers bitmap with sparse bits according to the given @mask.
+ *
+ * Example:
+ * If @src bitmap = 0x0302, with @mask = 0x1313, @dst will be 0x001a.
+ *
+ * Or in binary form
+ * @src @mask @dst
+ * 0000001100000010 0001001100010011 0000000000011010
+ *
+ * (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5)
+ *
+ * A more 'visual' description of the operation:
+ * mask: ...v..vv...v..vv
+ * src: 0000001100000010
+ * ^ ^^ ^ 0
+ * | || | 10
+ * | || > 010
+ * | |+--> 1010
+ * | +--> 11010
+ * +----> 011010
+ * dst: 0000000000011010
+ *
+ * A relationship exists between bitmap_gather() and bitmap_scatter(). See
+ * bitmap_scatter() for the bitmap scatter detailed operations.
+ * Suppose scattered computed using bitmap_scatter(scattered, src, mask, n).
+ * The operation bitmap_gather(result, scattered, mask, n) leads to a result
+ * equal or equivalent to src.
+ *
+ * The result can be 'equivalent' because bitmap_scatter() and bitmap_gather()
+ * are not bijective.
+ * The result and src values are equivalent in that sense that a call to
+ * bitmap_scatter(res, src, mask, n) and a call to
+ * bitmap_scatter(res, result, mask, n) will lead to the same res value.
+ */
+static inline void bitmap_gather(unsigned long *dst, const unsigned long *src,
+ const unsigned long *mask, unsigned int nbits)
+{
+ unsigned int n = 0;
+ unsigned int bit;
+
+ bitmap_zero(dst, nbits);
+
+ for_each_set_bit(bit, mask, nbits)
+ __assign_bit(n++, dst, test_bit(bit, src));
+}
+
static inline void bitmap_next_set_region(unsigned long *bitmap,
unsigned int *rs, unsigned int *re,
unsigned int end)
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 378b2459efe2..e253e7bd0d17 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -20,6 +20,7 @@ struct blk_integrity_iter {
unsigned int data_size;
unsigned short interval;
unsigned char tuple_size;
+ unsigned char pi_offset;
const char *disk_name;
};
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 7a8150a5f051..d3d8fd8e229b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -8,6 +8,7 @@
#include <linux/scatterlist.h>
#include <linux/prefetch.h>
#include <linux/srcu.h>
+#include <linux/rw_hint.h>
struct blk_mq_tags;
struct blk_flush_queue;
@@ -135,6 +136,7 @@ struct request {
struct blk_crypto_keyslot *crypt_keyslot;
#endif
+ enum rw_hint write_hint;
unsigned short ioprio;
enum mq_rq_state state;
@@ -682,17 +684,19 @@ enum {
#define BLK_MQ_NO_HCTX_IDX (-1U)
-struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
+ struct queue_limits *lim, void *queuedata,
struct lock_class_key *lkclass);
-#define blk_mq_alloc_disk(set, queuedata) \
+#define blk_mq_alloc_disk(set, lim, queuedata) \
({ \
static struct lock_class_key __key; \
\
- __blk_mq_alloc_disk(set, queuedata, &__key); \
+ __blk_mq_alloc_disk(set, lim, queuedata, &__key); \
})
struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
struct lock_class_key *lkclass);
-struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
+struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
+ struct queue_limits *lim, void *queuedata);
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q);
void blk_mq_destroy_queue(struct request_queue *);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index f288c94374b3..cb1526ec44b5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -10,6 +10,7 @@
#include <linux/bvec.h>
#include <linux/device.h>
#include <linux/ktime.h>
+#include <linux/rw_hint.h>
struct bio_set;
struct bio;
@@ -206,52 +207,10 @@ static inline bool blk_path_error(blk_status_t error)
return true;
}
-/*
- * From most significant bit:
- * 1 bit: reserved for other usage, see below
- * 12 bits: original size of bio
- * 51 bits: issue time of bio
- */
-#define BIO_ISSUE_RES_BITS 1
-#define BIO_ISSUE_SIZE_BITS 12
-#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS)
-#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
-#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
-#define BIO_ISSUE_SIZE_MASK \
- (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
-#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
-
-/* Reserved bit for blk-throtl */
-#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
-
struct bio_issue {
u64 value;
};
-static inline u64 __bio_issue_time(u64 time)
-{
- return time & BIO_ISSUE_TIME_MASK;
-}
-
-static inline u64 bio_issue_time(struct bio_issue *issue)
-{
- return __bio_issue_time(issue->value);
-}
-
-static inline sector_t bio_issue_size(struct bio_issue *issue)
-{
- return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
-}
-
-static inline void bio_issue_init(struct bio_issue *issue,
- sector_t size)
-{
- size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
- issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
- (ktime_get_ns() & BIO_ISSUE_TIME_MASK) |
- ((u64)size << BIO_ISSUE_SIZE_SHIFT));
-}
-
typedef __u32 __bitwise blk_opf_t;
typedef unsigned int blk_qc_t;
@@ -269,6 +228,7 @@ struct bio {
*/
unsigned short bi_flags; /* BIO_* below */
unsigned short bi_ioprio;
+ enum rw_hint bi_write_hint;
blk_status_t bi_status;
atomic_t __bi_remaining;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 99e4f5e72213..f9b87c39cab0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -24,6 +24,7 @@
#include <linux/sbitmap.h>
#include <linux/uuid.h>
#include <linux/xarray.h>
+#include <linux/file.h>
struct module;
struct request_queue;
@@ -42,7 +43,7 @@ struct blk_crypto_profile;
extern const struct device_type disk_type;
extern const struct device_type part_type;
-extern struct class block_class;
+extern const struct class block_class;
/*
* Maximum number of blkcg policies allowed to be registered concurrently.
@@ -108,6 +109,7 @@ struct blk_integrity {
const struct blk_integrity_profile *profile;
unsigned char flags;
unsigned char tuple_size;
+ unsigned char pi_offset;
unsigned char interval_exp;
unsigned char tag_size;
};
@@ -189,8 +191,6 @@ struct gendisk {
* blk_mq_unfreeze_queue().
*/
unsigned int nr_zones;
- unsigned int max_open_zones;
- unsigned int max_active_zones;
unsigned long *conv_zones_bitmap;
unsigned long *seq_zones_wlock;
#endif /* CONFIG_BLK_DEV_ZONED */
@@ -292,6 +292,7 @@ struct queue_limits {
unsigned int io_opt;
unsigned int max_discard_sectors;
unsigned int max_hw_discard_sectors;
+ unsigned int max_user_discard_sectors;
unsigned int max_secure_erase_sectors;
unsigned int max_write_zeroes_sectors;
unsigned int max_zone_append_sectors;
@@ -307,6 +308,8 @@ struct queue_limits {
unsigned char discard_misaligned;
unsigned char raid_partial_stripes_expensive;
bool zoned;
+ unsigned int max_open_zones;
+ unsigned int max_active_zones;
/*
* Drivers that set dma_alignment to less than 511 must be prepared to
@@ -325,7 +328,7 @@ void disk_set_zoned(struct gendisk *disk);
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
- sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask);
+ sector_t sectors, sector_t nr_sectors);
int blk_revalidate_disk_zones(struct gendisk *disk,
void (*update_driver_data)(struct gendisk *disk));
@@ -473,6 +476,7 @@ struct request_queue {
struct mutex sysfs_lock;
struct mutex sysfs_dir_lock;
+ struct mutex limits_lock;
/*
* for reusing dead hctx instance in case of updating
@@ -639,23 +643,23 @@ static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector)
static inline void disk_set_max_open_zones(struct gendisk *disk,
unsigned int max_open_zones)
{
- disk->max_open_zones = max_open_zones;
+ disk->queue->limits.max_open_zones = max_open_zones;
}
static inline void disk_set_max_active_zones(struct gendisk *disk,
unsigned int max_active_zones)
{
- disk->max_active_zones = max_active_zones;
+ disk->queue->limits.max_active_zones = max_active_zones;
}
static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
{
- return bdev->bd_disk->max_open_zones;
+ return bdev->bd_disk->queue->limits.max_open_zones;
}
static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
{
- return bdev->bd_disk->max_active_zones;
+ return bdev->bd_disk->queue->limits.max_active_zones;
}
#else /* CONFIG_BLK_DEV_ZONED */
@@ -763,22 +767,26 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
int bdev_disk_changed(struct gendisk *disk, bool invalidate);
void put_disk(struct gendisk *disk);
-struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
+struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node,
+ struct lock_class_key *lkclass);
/**
* blk_alloc_disk - allocate a gendisk structure
+ * @lim: queue limits to be used for this disk.
* @node_id: numa node to allocate on
*
* Allocate and pre-initialize a gendisk structure for use with BIO based
* drivers.
*
+ * Returns an ERR_PTR on error, else the allocated disk.
+ *
* Context: can sleep
*/
-#define blk_alloc_disk(node_id) \
+#define blk_alloc_disk(lim, node_id) \
({ \
static struct lock_class_key __key; \
\
- __blk_alloc_disk(node_id, &__key); \
+ __blk_alloc_disk(lim, node_id, &__key); \
})
int __register_blkdev(unsigned int major, const char *name,
@@ -861,6 +869,29 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset,
return chunk_sectors - (offset & (chunk_sectors - 1));
}
+/**
+ * queue_limits_start_update - start an atomic update of queue limits
+ * @q: queue to update
+ *
+ * This functions starts an atomic update of the queue limits. It takes a lock
+ * to prevent other updates and returns a snapshot of the current limits that
+ * the caller can modify. The caller must call queue_limits_commit_update()
+ * to finish the update.
+ *
+ * Context: process context. The caller must have frozen the queue or ensured
+ * that there is outstanding I/O by other means.
+ */
+static inline struct queue_limits
+queue_limits_start_update(struct request_queue *q)
+ __acquires(q->limits_lock)
+{
+ mutex_lock(&q->limits_lock);
+ return q->limits;
+}
+int queue_limits_commit_update(struct request_queue *q,
+ struct queue_limits *lim);
+int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
+
/*
* Access functions for manipulating queue properties
*/
@@ -894,8 +925,8 @@ extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
extern void blk_set_stacking_limits(struct queue_limits *lim);
extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset);
-extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
- sector_t offset);
+void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
+ sector_t offset, const char *pfx);
extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
@@ -942,6 +973,7 @@ struct blk_plug {
/* if ios_left is > 1, we can batch tag/rq allocations */
struct request *cached_rq;
+ u64 cur_ktime;
unsigned short nr_ios;
unsigned short rq_count;
@@ -972,6 +1004,18 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async)
__blk_flush_plug(plug, async);
}
+/*
+ * tsk == current here
+ */
+static inline void blk_plug_invalidate_ts(struct task_struct *tsk)
+{
+ struct blk_plug *plug = tsk->plug;
+
+ if (plug)
+ plug->cur_ktime = 0;
+ current->flags &= ~PF_BLOCK_TS;
+}
+
int blkdev_issue_flush(struct block_device *bdev);
long nr_blockdev_pages(void);
#else /* CONFIG_BLOCK */
@@ -995,6 +1039,10 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async)
{
}
+static inline void blk_plug_invalidate_ts(struct task_struct *tsk)
+{
+}
+
static inline int blkdev_issue_flush(struct block_device *bdev)
{
return 0;
@@ -1474,26 +1522,20 @@ extern const struct blk_holder_ops fs_holder_ops;
(BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \
(((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE))
-struct bdev_handle {
- struct block_device *bdev;
- void *holder;
- blk_mode_t mode;
-};
-
-struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
+struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops);
-struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
+struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hops);
int bd_prepare_to_claim(struct block_device *bdev, void *holder,
const struct blk_holder_ops *hops);
void bd_abort_claiming(struct block_device *bdev, void *holder);
-void bdev_release(struct bdev_handle *handle);
/* just for blk-cgroup, don't use elsewhere */
struct block_device *blkdev_get_no_open(dev_t dev);
void blkdev_put_no_open(struct block_device *bdev);
struct block_device *I_BDEV(struct inode *inode);
+struct block_device *file_bdev(struct file *bdev_file);
#ifdef CONFIG_BLOCK
void invalidate_bdev(struct block_device *bdev);
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a789266feac3..fb3c3e7181e6 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -196,7 +196,8 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
({ \
int __ret = 0; \
if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \
- cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \
+ cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS) && sk && \
+ sk_fullsock(sk)) \
__ret = __cgroup_bpf_run_filter_skb(sk, skb, \
CGROUP_INET_INGRESS); \
\
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e30100597d0a..4f20f62f9d63 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -37,6 +37,7 @@ struct perf_event;
struct bpf_prog;
struct bpf_prog_aux;
struct bpf_map;
+struct bpf_arena;
struct sock;
struct seq_file;
struct btf;
@@ -52,6 +53,10 @@ struct module;
struct bpf_func_state;
struct ftrace_ops;
struct cgroup;
+struct bpf_token;
+struct user_namespace;
+struct super_block;
+struct inode;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -135,6 +140,9 @@ struct bpf_map_ops {
int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma);
__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
struct poll_table_struct *pts);
+ unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
/* Functions called by bpf_local_storage maps */
int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
@@ -247,10 +255,7 @@ struct bpf_list_node_kern {
} __attribute__((aligned(8)));
struct bpf_map {
- /* The first two cachelines with read-mostly members of which some
- * are also accessed in fast-path (e.g. ops, max_entries).
- */
- const struct bpf_map_ops *ops ____cacheline_aligned;
+ const struct bpf_map_ops *ops;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
void *security;
@@ -272,17 +277,14 @@ struct bpf_map {
struct obj_cgroup *objcg;
#endif
char name[BPF_OBJ_NAME_LEN];
- /* The 3rd and 4th cacheline with misc members to avoid false sharing
- * particularly with refcounting.
- */
- atomic64_t refcnt ____cacheline_aligned;
+ struct mutex freeze_mutex;
+ atomic64_t refcnt;
atomic64_t usercnt;
/* rcu is used before freeing and work is only used during freeing */
union {
struct work_struct work;
struct rcu_head rcu;
};
- struct mutex freeze_mutex;
atomic64_t writecnt;
/* 'Ownership' of program-containing map is claimed by the first program
* that is going to use this map or by the first program which FD is
@@ -527,8 +529,8 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock);
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
struct bpf_spin_lock *spin_lock);
-
-
+u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena);
+u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena);
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
struct bpf_offload_dev;
@@ -710,6 +712,7 @@ enum bpf_arg_type {
* on eBPF program stack
*/
ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */
+ ARG_PTR_TO_ARENA,
ARG_CONST_SIZE, /* number of bytes accessed from memory */
ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
@@ -881,6 +884,7 @@ enum bpf_reg_type {
* an explicit null check is required for this struct.
*/
PTR_TO_MEM, /* reg points to valid memory region */
+ PTR_TO_ARENA,
PTR_TO_BUF, /* reg points to a read/write buffer */
PTR_TO_FUNC, /* reg points to a bpf program function */
CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */
@@ -1185,7 +1189,6 @@ struct bpf_trampoline {
int progs_cnt[BPF_TRAMP_MAX];
/* Executable image of trampoline */
struct bpf_tramp_image *cur_image;
- struct module *mod;
};
struct bpf_attach_target_info {
@@ -1412,6 +1415,7 @@ struct bpf_jit_poke_descriptor {
struct bpf_ctx_arg_aux {
u32 offset;
enum bpf_reg_type reg_type;
+ struct btf *btf;
u32 btf_id;
};
@@ -1451,11 +1455,11 @@ struct bpf_prog_aux {
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
bool attach_tracing_prog; /* true if tracing another tracing program */
bool func_proto_unreliable;
- bool sleepable;
bool tail_call_reachable;
bool xdp_has_frags;
bool exception_cb;
bool exception_boundary;
+ struct bpf_arena *arena;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
@@ -1485,6 +1489,7 @@ struct bpf_prog_aux {
#ifdef CONFIG_SECURITY
void *security;
#endif
+ struct bpf_token *token;
struct bpf_prog_offload *offload;
struct btf *btf;
struct bpf_func_info *func_info;
@@ -1535,7 +1540,8 @@ struct bpf_prog {
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
call_get_func_ip:1, /* Do we call get_func_ip() */
- tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */
+ tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
+ sleepable:1; /* BPF program is sleepable */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
@@ -1609,6 +1615,31 @@ struct bpf_link_primer {
u32 id;
};
+struct bpf_mount_opts {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+
+ /* BPF token-related delegation options */
+ u64 delegate_cmds;
+ u64 delegate_maps;
+ u64 delegate_progs;
+ u64 delegate_attachs;
+};
+
+struct bpf_token {
+ struct work_struct work;
+ atomic64_t refcnt;
+ struct user_namespace *userns;
+ u64 allowed_cmds;
+ u64 allowed_maps;
+ u64 allowed_progs;
+ u64 allowed_attachs;
+#ifdef CONFIG_SECURITY
+ void *security;
+#endif
+};
+
struct bpf_struct_ops_value;
struct btf_member;
@@ -1673,19 +1704,64 @@ struct bpf_struct_ops {
void (*unreg)(void *kdata);
int (*update)(void *kdata, void *old_kdata);
int (*validate)(void *kdata);
- const struct btf_type *type;
- const struct btf_type *value_type;
+ void *cfi_stubs;
+ struct module *owner;
const char *name;
struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
+};
+
+/* Every member of a struct_ops type has an instance even a member is not
+ * an operator (function pointer). The "info" field will be assigned to
+ * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the
+ * argument information required by the verifier to verify the program.
+ *
+ * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the
+ * corresponding entry for an given argument.
+ */
+struct bpf_struct_ops_arg_info {
+ struct bpf_ctx_arg_aux *info;
+ u32 cnt;
+};
+
+struct bpf_struct_ops_desc {
+ struct bpf_struct_ops *st_ops;
+
+ const struct btf_type *type;
+ const struct btf_type *value_type;
u32 type_id;
u32 value_id;
- void *cfi_stubs;
+
+ /* Collection of argument information for each member */
+ struct bpf_struct_ops_arg_info *arg_info;
+};
+
+enum bpf_struct_ops_state {
+ BPF_STRUCT_OPS_STATE_INIT,
+ BPF_STRUCT_OPS_STATE_INUSE,
+ BPF_STRUCT_OPS_STATE_TOBEFREE,
+ BPF_STRUCT_OPS_STATE_READY,
+};
+
+struct bpf_struct_ops_common_value {
+ refcount_t refcnt;
+ enum bpf_struct_ops_state state;
};
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+/* This macro helps developer to register a struct_ops type and generate
+ * type information correctly. Developers should use this macro to register
+ * a struct_ops type instead of calling __register_bpf_struct_ops() directly.
+ */
+#define register_bpf_struct_ops(st_ops, type) \
+ ({ \
+ struct bpf_struct_ops_##type { \
+ struct bpf_struct_ops_common_value common; \
+ struct type data ____cacheline_aligned_in_smp; \
+ }; \
+ BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \
+ __register_bpf_struct_ops(st_ops); \
+ })
#define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
-const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id);
-void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log);
bool bpf_struct_ops_get(const void *kdata);
void bpf_struct_ops_put(const void *kdata);
int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
@@ -1694,7 +1770,9 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
struct bpf_tramp_link *link,
const struct btf_func_model *model,
void *stub_func,
- void *image, void *image_end);
+ void **image, u32 *image_off,
+ bool allow_alloc);
+void bpf_struct_ops_image_free(void *image);
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
if (owner == BPF_MODULE_OWNER)
@@ -1727,15 +1805,13 @@ struct bpf_dummy_ops {
int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
#endif
+int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
+ struct btf *btf,
+ struct bpf_verifier_log *log);
+void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map);
+void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc);
#else
-static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
-{
- return NULL;
-}
-static inline void bpf_struct_ops_init(struct btf *btf,
- struct bpf_verifier_log *log)
-{
-}
+#define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; })
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
return try_module_get(owner);
@@ -1754,6 +1830,13 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
{
return -EOPNOTSUPP;
}
+static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map)
+{
+}
+
+static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc)
+{
+}
#endif
@@ -2029,14 +2112,14 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu,
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
item = &array->items[0];
while ((prog = READ_ONCE(item->prog))) {
- if (!prog->aux->sleepable)
+ if (!prog->sleepable)
rcu_read_lock();
run_ctx.bpf_cookie = item->bpf_cookie;
ret &= run_prog(prog, ctx);
item++;
- if (!prog->aux->sleepable)
+ if (!prog->sleepable)
rcu_read_unlock();
}
bpf_reset_run_ctx(old_run_ctx);
@@ -2068,6 +2151,7 @@ static inline void bpf_enable_instrumentation(void)
migrate_enable();
}
+extern const struct super_operations bpf_super_ops;
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
extern const struct file_operations bpf_iter_fops;
@@ -2135,6 +2219,8 @@ int generic_map_delete_batch(struct bpf_map *map,
struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
+int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
+ unsigned long nr_pages, struct page **page_array);
#ifdef CONFIG_MEMCG_KMEM
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node);
@@ -2202,24 +2288,26 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map)
extern int sysctl_unprivileged_bpf_disabled;
-static inline bool bpf_allow_ptr_leaks(void)
+bool bpf_token_capable(const struct bpf_token *token, int cap);
+
+static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token)
{
- return perfmon_capable();
+ return bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_allow_uninit_stack(void)
+static inline bool bpf_allow_uninit_stack(const struct bpf_token *token)
{
- return perfmon_capable();
+ return bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_bypass_spec_v1(void)
+static inline bool bpf_bypass_spec_v1(const struct bpf_token *token)
{
- return cpu_mitigations_off() || perfmon_capable();
+ return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_bypass_spec_v4(void)
+static inline bool bpf_bypass_spec_v4(const struct bpf_token *token)
{
- return cpu_mitigations_off() || perfmon_capable();
+ return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
}
int bpf_map_new_fd(struct bpf_map *map, int flags);
@@ -2236,8 +2324,21 @@ int bpf_link_new_fd(struct bpf_link *link);
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
+void bpf_token_inc(struct bpf_token *token);
+void bpf_token_put(struct bpf_token *token);
+int bpf_token_create(union bpf_attr *attr);
+struct bpf_token *bpf_token_get_from_fd(u32 ufd);
+
+bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type);
+bool bpf_token_allow_prog_type(const struct bpf_token *token,
+ enum bpf_prog_type prog_type,
+ enum bpf_attach_type attach_type);
+
int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
+struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir,
+ umode_t mode);
#define BPF_ITER_FUNC_PREFIX "bpf_iter_"
#define DEFINE_BPF_ITER_FUNC(target, args...) \
@@ -2472,11 +2573,14 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr
struct btf *btf, const struct btf_type *t);
const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
int comp_idx, const char *tag_key);
+int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key, int last_id);
struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
-const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog);
void bpf_task_storage_free(struct task_struct *task);
void bpf_cgrp_storage_free(struct cgroup *cgroup);
bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
@@ -2595,6 +2699,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
return -EOPNOTSUPP;
}
+static inline bool bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
+}
+
+static inline void bpf_token_inc(struct bpf_token *token)
+{
+}
+
+static inline void bpf_token_put(struct bpf_token *token)
+{
+}
+
+static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static inline void __dev_flush(void)
{
}
@@ -2718,7 +2840,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log,
}
static inline const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id)
+bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return NULL;
}
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 173ec7f43ed1..dcddb0aef7d8 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -129,10 +129,36 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
struct bpf_local_storage_cache *cache,
bool bpf_ma);
-struct bpf_local_storage_data *
+void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem);
+/* If cacheit_lockit is false, this lookup function is lockless */
+static inline struct bpf_local_storage_data *
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
- bool cacheit_lockit);
+ bool cacheit_lockit)
+{
+ struct bpf_local_storage_data *sdata;
+ struct bpf_local_storage_elem *selem;
+
+ /* Fast path (cache hit) */
+ sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
+ bpf_rcu_lock_held());
+ if (sdata && rcu_access_pointer(sdata->smap) == smap)
+ return sdata;
+
+ /* Slow path (cache miss) */
+ hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
+ rcu_read_lock_trace_held())
+ if (rcu_access_pointer(SDATA(selem)->smap) == smap)
+ break;
+
+ if (!selem)
+ return NULL;
+ if (cacheit_lockit)
+ __bpf_local_storage_insert_cache(local_storage, smap, selem);
+ return SDATA(selem);
+}
void bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 94baced5a1ad..9f2a6b83b49e 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -132,6 +132,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops)
BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d07d857ca67f..7cb1b75eee38 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -449,11 +449,12 @@ struct bpf_verifier_state {
u32 jmp_history_cnt;
u32 dfs_depth;
u32 callback_unroll_depth;
+ u32 may_goto_depth;
};
#define bpf_get_spilled_reg(slot, frame, mask) \
(((slot < frame->allocated_stack / BPF_REG_SIZE) && \
- ((1 << frame->stack[slot].slot_type[0]) & (mask))) \
+ ((1 << frame->stack[slot].slot_type[BPF_REG_SIZE - 1]) & (mask))) \
? &frame->stack[slot].spilled_ptr : NULL)
/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
@@ -547,6 +548,7 @@ struct bpf_insn_aux_data {
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
bool zext_dst; /* this insn zero extends dst reg */
+ bool needs_zext; /* alu op needs to clear upper bits */
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
@@ -610,6 +612,7 @@ struct bpf_subprog_arg_info {
enum bpf_arg_type arg_type;
union {
u32 mem_size;
+ u32 btf_id;
};
};
@@ -618,6 +621,7 @@ struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
u16 stack_depth; /* max. stack depth used by this function */
+ u16 stack_extra;
bool has_tail_call: 1;
bool tail_call_reachable: 1;
bool has_ld_abs: 1;
@@ -662,6 +666,7 @@ struct bpf_verifier_env {
u32 prev_insn_idx;
struct bpf_prog *prog; /* eBPF program being verified */
const struct bpf_verifier_ops *ops;
+ struct module *attach_btf_mod; /* The owner module of prog->aux->attach_btf */
struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
int stack_size; /* number of states to be processed */
bool strict_alignment; /* perform strict pointer alignment checks */
@@ -917,6 +922,15 @@ static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env)
env->scratched_stack_slots = ~0ULL;
}
+static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_size)
+{
+#ifdef __BIG_ENDIAN
+ off -= spill_size - fill_size;
+#endif
+
+ return !(off % BPF_REG_SIZE);
+}
+
const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type);
const char *dynptr_type_str(enum bpf_dynptr_type type);
const char *iter_type_str(const struct btf *btf, u32 btf_id);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index cf5c6ff48981..f9e56fd12a9f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -137,6 +137,7 @@ struct btf_struct_metas {
extern const struct file_operations btf_fops;
+const char *btf_get_name(const struct btf *btf);
void btf_get(struct btf *btf);
void btf_put(struct btf *btf);
int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz);
@@ -494,8 +495,26 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
}
+bool btf_param_match_suffix(const struct btf *btf,
+ const struct btf_param *arg,
+ const char *suffix);
+int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto,
+ u32 arg_no);
+
struct bpf_verifier_log;
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_struct_ops;
+int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops);
+const struct bpf_struct_ops_desc *bpf_struct_ops_find_value(struct btf *btf, u32 value_id);
+const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id);
+#else
+static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id)
+{
+ return NULL;
+}
+#endif
+
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
@@ -512,10 +531,9 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
struct module *owner);
struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
-const struct btf_type *
-btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, enum bpf_prog_type prog_type,
- int arg);
+bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg);
int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type);
bool btf_types_are_same(const struct btf *btf1, u32 id1,
const struct btf *btf2, u32 id2);
@@ -555,12 +573,12 @@ static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf
{
return NULL;
}
-static inline const struct btf_member *
-btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, enum bpf_prog_type prog_type,
- int arg)
+static inline bool
+btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg)
{
- return NULL;
+ return false;
}
static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log,
enum bpf_prog_type prog_type) {
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index a9cb10b0e2e9..e24aabfe8ecc 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -8,6 +8,9 @@ struct btf_id_set {
u32 ids[];
};
+/* This flag implies BTF_SET8 holds kfunc(s) */
+#define BTF_SET8_KFUNCS (1 << 0)
+
struct btf_id_set8 {
u32 cnt;
u32 flags;
@@ -21,6 +24,7 @@ struct btf_id_set8 {
#include <linux/compiler.h> /* for __PASTE */
#include <linux/compiler_attributes.h> /* for __maybe_unused */
+#include <linux/stringify.h>
/*
* Following macros help to define lists of BTF IDs placed
@@ -183,17 +187,18 @@ extern struct btf_id_set name;
* .word (1 << 3) | (1 << 1) | (1 << 2)
*
*/
-#define __BTF_SET8_START(name, scope) \
+#define __BTF_SET8_START(name, scope, flags) \
+__BTF_ID_LIST(name, local) \
asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
"." #scope " __BTF_ID__set8__" #name "; \n" \
"__BTF_ID__set8__" #name ":; \n" \
-".zero 8 \n" \
+".zero 4 \n" \
+".long " __stringify(flags) "\n" \
".popsection; \n");
#define BTF_SET8_START(name) \
-__BTF_ID_LIST(name, local) \
-__BTF_SET8_START(name, local)
+__BTF_SET8_START(name, local, 0)
#define BTF_SET8_END(name) \
asm( \
@@ -202,6 +207,12 @@ asm( \
".popsection; \n"); \
extern struct btf_id_set8 name;
+#define BTF_KFUNCS_START(name) \
+__BTF_SET8_START(name, local, BTF_SET8_KFUNCS)
+
+#define BTF_KFUNCS_END(name) \
+BTF_SET8_END(name)
+
#else
#define BTF_ID_LIST(name) static u32 __maybe_unused name[64];
@@ -216,6 +227,8 @@ extern struct btf_id_set8 name;
#define BTF_SET_END(name)
#define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 };
#define BTF_SET8_END(name)
+#define BTF_KFUNCS_START(name) static struct btf_id_set8 __maybe_unused name = { .flags = BTF_SET8_KFUNCS };
+#define BTF_KFUNCS_END(name)
#endif /* CONFIG_DEBUG_INFO_BTF */
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 555aae5448ae..bd1e361b351c 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -83,7 +83,7 @@ struct bvec_iter {
unsigned int bi_bvec_done; /* number of bytes completed in
current bvec */
-} __packed;
+} __packed __aligned(4);
struct bvec_iter_all {
struct bio_vec bv;
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 2eaaabbe98cb..1717cc57cdac 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -283,7 +283,7 @@ struct ceph_msg {
struct kref kref;
bool more_to_follow;
bool needs_out_seq;
- bool sparse_read;
+ u64 sparse_read_total;
int front_alloc_len;
struct ceph_msgpool *pool;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index fa018d5864e7..f66f6aac74f6 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -45,6 +45,7 @@ enum ceph_sparse_read_state {
CEPH_SPARSE_READ_HDR = 0,
CEPH_SPARSE_READ_EXTENTS,
CEPH_SPARSE_READ_DATA_LEN,
+ CEPH_SPARSE_READ_DATA_PRE,
CEPH_SPARSE_READ_DATA,
};
@@ -64,7 +65,7 @@ struct ceph_sparse_read {
u64 sr_req_len; /* orig request length */
u64 sr_pos; /* current pos in buffer */
int sr_index; /* current extent index */
- __le32 sr_datalen; /* length of actual data */
+ u32 sr_datalen; /* length of actual data */
u32 sr_count; /* extent count in reply */
int sr_ext_len; /* length of extent array */
struct ceph_sparse_extent *sr_extent; /* extent array */
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 1d42d4b17327..0ad8b550bb4b 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -291,7 +291,19 @@ static inline void timer_probe(void) {}
#define TIMER_ACPI_DECLARE(name, table_id, fn) \
ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn)
-extern ulong max_cswd_read_retries;
+static inline unsigned int clocksource_get_max_watchdog_retry(void)
+{
+ /*
+ * When system is in the boot phase or under heavy workload, there
+ * can be random big latencies during the clocksource/watchdog
+ * read, so allow retries to filter the noise latency. As the
+ * latency's frequency and maximum value goes up with the number of
+ * CPUs, scale the number of retries with the number of online
+ * CPUs.
+ */
+ return (ilog2(num_online_cpus()) / 2) + 1;
+}
+
void clocksource_verify_percpu(struct clocksource *cs);
#endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h
index 16775d7d8f8d..a4fa3436940c 100644
--- a/include/linux/clocksource_ids.h
+++ b/include/linux/clocksource_ids.h
@@ -6,6 +6,9 @@
enum clocksource_ids {
CSID_GENERIC = 0,
CSID_ARM_ARCH_COUNTER,
+ CSID_X86_TSC_EARLY,
+ CSID_X86_TSC,
+ CSID_X86_KVM_CLK,
CSID_MAX,
};
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index aebb65bf95a7..aff92b1d284f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -35,7 +35,7 @@
(typeof(ptr)) (__ptr + (off)); \
})
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
#define __noretpoline __attribute__((__indirect_branch__("keep")))
#endif
@@ -64,6 +64,26 @@
__builtin_unreachable(); \
} while (0)
+/*
+ * GCC 'asm goto' with outputs miscompiles certain code sequences:
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
+ *
+ * Work around it via the same compiler barrier quirk that we used
+ * to use for the old 'asm goto' workaround.
+ *
+ * Also, always mark such 'asm goto' statements as volatile: all
+ * asm goto statements are supposed to be volatile as per the
+ * documentation, but some versions of gcc didn't actually do
+ * that for asms with outputs:
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619
+ */
+#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND
+#define asm_goto_output(x...) \
+ do { asm volatile goto(x); asm (""); } while (0)
+#endif
+
#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
#define __HAVE_BUILTIN_BSWAP32__
#define __HAVE_BUILTIN_BSWAP64__
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index bb1339c7057b..52730e423681 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -209,7 +209,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
*/
#define ___ADDRESSABLE(sym, __attrs) \
static void * __used __attrs \
- __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym;
+ __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym;
#define __ADDRESSABLE(sym) \
___ADDRESSABLE(sym, __section(".discard.addressable"))
@@ -231,6 +231,45 @@ static inline void *offset_to_ptr(const int *off)
* This returns a constant expression while determining if an argument is
* a constant expression, most importantly without evaluating the argument.
* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ *
+ * Details:
+ * - sizeof() return an integer constant expression, and does not evaluate
+ * the value of its operand; it only examines the type of its operand.
+ * - The results of comparing two integer constant expressions is also
+ * an integer constant expression.
+ * - The first literal "8" isn't important. It could be any literal value.
+ * - The second literal "8" is to avoid warnings about unaligned pointers;
+ * this could otherwise just be "1".
+ * - (long)(x) is used to avoid warnings about 64-bit types on 32-bit
+ * architectures.
+ * - The C Standard defines "null pointer constant", "(void *)0", as
+ * distinct from other void pointers.
+ * - If (x) is an integer constant expression, then the "* 0l" resolves
+ * it into an integer constant expression of value 0. Since it is cast to
+ * "void *", this makes the second operand a null pointer constant.
+ * - If (x) is not an integer constant expression, then the second operand
+ * resolves to a void pointer (but not a null pointer constant: the value
+ * is not an integer constant 0).
+ * - The conditional operator's third operand, "(int *)8", is an object
+ * pointer (to type "int").
+ * - The behavior (including the return type) of the conditional operator
+ * ("operand1 ? operand2 : operand3") depends on the kind of expressions
+ * given for the second and third operands. This is the central mechanism
+ * of the macro:
+ * - When one operand is a null pointer constant (i.e. when x is an integer
+ * constant expression) and the other is an object pointer (i.e. our
+ * third operand), the conditional operator returns the type of the
+ * object pointer operand (i.e. "int *). Here, within the sizeof(), we
+ * would then get:
+ * sizeof(*((int *)(...)) == sizeof(int) == 4
+ * - When one operand is a void pointer (i.e. when x is not an integer
+ * constant expression) and the other is an object pointer (i.e. our
+ * third operand), the conditional operator returns a "void *" type.
+ * Here, within the sizeof(), we would then get:
+ * sizeof(*((void *)(...)) == sizeof(void) == 1
+ * - The equality comparison to "sizeof(int)" therefore depends on (x):
+ * sizeof(int) == sizeof(int) (x) was a constant expression
+ * sizeof(int) != sizeof(void) (x) was not a constant expression
*/
#define __is_constexpr(x) \
(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 28566624f008..8bdf6e0918c1 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -95,11 +95,11 @@
#endif
/*
- * Optional: only supported since gcc >= 14
+ * Optional: only supported since gcc >= 15
* Optional: only supported since clang >= 18
*
* gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
- * clang: https://reviews.llvm.org/D148381
+ * clang: https://github.com/llvm/llvm-project/pull/76348
*/
#if __has_attribute(__counted_by__)
# define __counted_by(member) __attribute__((__counted_by__(member)))
@@ -334,6 +334,18 @@
#define __section(section) __attribute__((__section__(section)))
/*
+ * Optional: only supported since gcc >= 12
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-uninitialized-variable-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#uninitialized
+ */
+#if __has_attribute(__uninitialized__)
+# define __uninitialized __attribute__((__uninitialized__))
+#else
+# define __uninitialized
+#endif
+
+/*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 6f1ca49306d2..3e64ec0f7ac8 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -282,11 +282,18 @@ struct ftrace_likely_data {
#define __no_sanitize_or_inline __always_inline
#endif
+/* Do not trap wrapping arithmetic within an annotated function. */
+#ifdef CONFIG_UBSAN_SIGNED_WRAP
+# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow")))
+#else
+# define __signed_wrap
+#endif
+
/* Section for code which can't be instrumented at all */
#define __noinstr_section(section) \
noinline notrace __attribute((__section__(section))) \
__no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \
- __no_sanitize_memory
+ __no_sanitize_memory __signed_wrap
#define noinstr __noinstr_section(".noinstr.text")
@@ -362,8 +369,15 @@ struct ftrace_likely_data {
#define __member_size(p) __builtin_object_size(p, 1)
#endif
-#ifndef asm_volatile_goto
-#define asm_volatile_goto(x...) asm goto(x)
+/*
+ * Some versions of gcc do not mark 'asm goto' volatile:
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979
+ *
+ * We do it here by hand, because it doesn't hurt.
+ */
+#ifndef asm_goto_output
+#define asm_goto_output(x...) asm volatile goto(x)
#endif
#ifdef CONFIG_CC_HAS_ASM_INLINE
diff --git a/include/linux/cper.h b/include/linux/cper.h
index c1a7dc325121..265b0f8fc0b3 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -90,6 +90,29 @@ enum {
GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \
0x72, 0x2D, 0xEB, 0x41)
+/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CPER_SEC_CXL_GEN_MEDIA_GUID \
+ GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \
+ 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6)
+/*
+ * DRAM Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+#define CPER_SEC_CXL_DRAM_GUID \
+ GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \
+ 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24)
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+#define CPER_SEC_CXL_MEM_MODULE_GUID \
+ GUID_INIT(0xfe927475, 0xdd59, 0x4339, \
+ 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74)
+
/*
* Flags bits definitions for flags in struct cper_record_header
* If set, the error has been recovered
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index dcb89c987164..ae5a20cf2f9c 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -75,6 +75,8 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_gds(struct device *dev,
struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
@@ -112,7 +114,7 @@ void notify_cpu_starting(unsigned int cpu);
extern void cpu_maps_update_begin(void);
extern void cpu_maps_update_done(void);
int bringup_hibernate_cpu(unsigned int sleep_cpu);
-void bringup_nonboot_cpus(unsigned int setup_max_cpus);
+void bringup_nonboot_cpus(unsigned int max_cpus);
#else /* CONFIG_SMP */
#define cpuhp_tasks_frozen 0
@@ -196,6 +198,8 @@ void arch_cpu_idle(void);
void arch_cpu_idle_prepare(void);
void arch_cpu_idle_enter(void);
void arch_cpu_idle_exit(void);
+void arch_tick_broadcast_enter(void);
+void arch_tick_broadcast_exit(void);
void __noreturn arch_cpu_idle_dead(void);
#ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 172d0a743e5d..35e78ddb2b37 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -184,6 +184,7 @@ enum cpuhp_state {
CPUHP_AP_ARM64_ISNDEP_STARTING,
CPUHP_AP_SMPCFD_DYING,
CPUHP_AP_HRTIMERS_DYING,
+ CPUHP_AP_TICK_DYING,
CPUHP_AP_X86_TBOOT_DYING,
CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
CPUHP_AP_ONLINE,
@@ -231,6 +232,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
CPUHP_AP_PERF_CSKY_ONLINE,
+ CPUHP_AP_TMIGR_ONLINE,
CPUHP_AP_WATCHDOG_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RANDOM_ONLINE,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index cfb545841a2c..1c29947db848 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -7,6 +7,7 @@
* set of CPUs in a system, one bit position per CPU number. In general,
* only nr_cpu_ids (<= NR_CPUS) bits are valid.
*/
+#include <linux/cleanup.h>
#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/bitmap.h>
@@ -720,6 +721,19 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
}
/**
+ * cpumask_weight_andnot - Count of bits in (*srcp1 & ~*srcp2)
+ * @srcp1: the cpumask to count bits (< nr_cpu_ids) in.
+ * @srcp2: the cpumask to count bits (< nr_cpu_ids) in.
+ *
+ * Return: count of bits set in both *srcp1 and *srcp2
+ */
+static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
+ const struct cpumask *srcp2)
+{
+ return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
+}
+
+/**
* cpumask_shift_right - *dstp = *srcp >> n
* @dstp: the cpumask result
* @srcp: the input to shift
@@ -977,6 +991,8 @@ static inline bool cpumask_available(cpumask_var_t mask)
}
#endif /* CONFIG_CPUMASK_OFFSTACK */
+DEFINE_FREE(free_cpumask_var, struct cpumask *, if (_T) free_cpumask_var(_T));
+
/* It's common to want to use cpu_all_mask in struct member initializers,
* so it has to refer to an address rather than a pointer. */
extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 875d12598bd2..0ce6ff0d9c9a 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -121,11 +121,6 @@ static inline int cpuset_do_page_mem_spread(void)
return task_spread_page(current);
}
-static inline int cpuset_do_slab_mem_spread(void)
-{
- return task_spread_slab(current);
-}
-
extern bool current_cpuset_is_being_rebound(void);
extern void rebuild_sched_domains(void);
@@ -264,11 +259,6 @@ static inline int cpuset_do_page_mem_spread(void)
return 0;
}
-static inline int cpuset_do_slab_mem_spread(void)
-{
- return 0;
-}
-
static inline bool current_cpuset_is_being_rebound(void)
{
return false;
diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h
index 91125eca4c8a..03fa6d50d46f 100644
--- a/include/linux/cxl-event.h
+++ b/include/linux/cxl-event.h
@@ -140,22 +140,4 @@ struct cxl_cper_event_rec {
union cxl_event event;
} __packed;
-typedef void (*cxl_cper_callback)(enum cxl_event_type type,
- struct cxl_cper_event_rec *rec);
-
-#ifdef CONFIG_ACPI_APEI_GHES
-int cxl_cper_register_callback(cxl_cper_callback callback);
-int cxl_cper_unregister_callback(cxl_cper_callback callback);
-#else
-static inline int cxl_cper_register_callback(cxl_cper_callback callback)
-{
- return 0;
-}
-
-static inline int cxl_cper_unregister_callback(cxl_cper_callback callback)
-{
- return 0;
-}
-#endif
-
#endif /* _LINUX_CXL_EVENT_H */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 1666c387861f..bf53e3894aae 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -125,6 +125,11 @@ enum dentry_d_lock_class
DENTRY_D_LOCK_NESTED
};
+enum d_real_type {
+ D_REAL_DATA,
+ D_REAL_METADATA,
+};
+
struct dentry_operations {
int (*d_revalidate)(struct dentry *, unsigned int);
int (*d_weak_revalidate)(struct dentry *, unsigned int);
@@ -139,7 +144,7 @@ struct dentry_operations {
char *(*d_dname)(struct dentry *, char *, int);
struct vfsmount *(*d_automount)(struct path *);
int (*d_manage)(const struct path *, bool);
- struct dentry *(*d_real)(struct dentry *, const struct inode *);
+ struct dentry *(*d_real)(struct dentry *, enum d_real_type type);
} ____cacheline_aligned;
/*
@@ -173,6 +178,7 @@ struct dentry_operations {
#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */
#define DCACHE_CANT_MOUNT BIT(8)
+#define DCACHE_GENOCIDE BIT(9)
#define DCACHE_SHRINK_LIST BIT(10)
#define DCACHE_OP_WEAK_REVALIDATE BIT(11)
@@ -546,24 +552,23 @@ static inline struct inode *d_backing_inode(const struct dentry *upper)
/**
* d_real - Return the real dentry
* @dentry: the dentry to query
- * @inode: inode to select the dentry from multiple layers (can be NULL)
+ * @type: the type of real dentry (data or metadata)
*
* If dentry is on a union/overlay, then return the underlying, real dentry.
* Otherwise return the dentry itself.
*
* See also: Documentation/filesystems/vfs.rst
*/
-static inline struct dentry *d_real(struct dentry *dentry,
- const struct inode *inode)
+static inline struct dentry *d_real(struct dentry *dentry, enum d_real_type type)
{
if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
- return dentry->d_op->d_real(dentry, inode);
+ return dentry->d_op->d_real(dentry, type);
else
return dentry;
}
/**
- * d_real_inode - Return the real inode
+ * d_real_inode - Return the real inode hosting the data
* @dentry: The dentry to query
*
* If dentry is on a union/overlay, then return the underlying, real inode.
@@ -572,7 +577,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
static inline struct inode *d_real_inode(const struct dentry *dentry)
{
/* This usage of d_real() results in const dentry */
- return d_backing_inode(d_real((struct dentry *) dentry, NULL));
+ return d_inode(d_real((struct dentry *) dentry, D_REAL_DATA));
}
struct name_snapshot {
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 772ab4d74d94..82b2195efaca 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -165,7 +165,7 @@ void dm_error(const char *message);
struct dm_dev {
struct block_device *bdev;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
struct dax_device *dax_dev;
blk_mode_t mode;
char name[16];
diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h
index 75e7d8cbb532..d1503b815a78 100644
--- a/include/linux/dm-bufio.h
+++ b/include/linux/dm-bufio.h
@@ -64,6 +64,9 @@ void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start);
void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
struct dm_buffer **bp);
+void *dm_bufio_read_with_ioprio(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp, unsigned short ioprio);
+
/*
* Like dm_bufio_read, but return buffer from cache, don't read
* it. If the buffer is not in the cache, return NULL.
@@ -86,6 +89,10 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
void dm_bufio_prefetch(struct dm_bufio_client *c,
sector_t block, unsigned int n_blocks);
+void dm_bufio_prefetch_with_ioprio(struct dm_bufio_client *c,
+ sector_t block, unsigned int n_blocks,
+ unsigned short ioprio);
+
/*
* Release a reference obtained with dm_bufio_{read,get,new}. The data
* pointer and dm_buffer pointer is no longer valid after this call.
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index 7595142f3fc5..7b2968612b7e 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -80,7 +80,8 @@ void dm_io_client_destroy(struct dm_io_client *client);
* error occurred doing io to the corresponding region.
*/
int dm_io(struct dm_io_request *io_req, unsigned int num_regions,
- struct dm_io_region *region, unsigned int long *sync_error_bits);
+ struct dm_io_region *region, unsigned int long *sync_error_bits,
+ unsigned short ioprio);
#endif /* __KERNEL__ */
#endif /* _LINUX_DM_IO_H */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 3df70d6131c8..752dbde4cec1 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -953,7 +953,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
static inline bool is_slave_direction(enum dma_transfer_direction direction)
{
- return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM);
+ return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) ||
+ (direction == DMA_DEV_TO_DEV);
}
static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
diff --git a/include/linux/dpll.h b/include/linux/dpll.h
index 9cf896ea1d41..d275736230b3 100644
--- a/include/linux/dpll.h
+++ b/include/linux/dpll.h
@@ -10,6 +10,8 @@
#include <uapi/linux/dpll.h>
#include <linux/device.h>
#include <linux/netlink.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
struct dpll_device;
struct dpll_pin;
@@ -19,6 +21,7 @@ struct dpll_device_ops {
enum dpll_mode *mode, struct netlink_ext_ack *extack);
int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv,
enum dpll_lock_status *status,
+ enum dpll_lock_status_error *status_error,
struct netlink_ext_ack *extack);
int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv,
s32 *temp, struct netlink_ext_ack *extack);
@@ -120,15 +123,24 @@ struct dpll_pin_properties {
};
#if IS_ENABLED(CONFIG_DPLL)
-size_t dpll_msg_pin_handle_size(struct dpll_pin *pin);
-int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin);
+void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin);
+void dpll_netdev_pin_clear(struct net_device *dev);
+
+size_t dpll_netdev_pin_handle_size(const struct net_device *dev);
+int dpll_netdev_add_pin_handle(struct sk_buff *msg,
+ const struct net_device *dev);
#else
-static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin)
+static inline void
+dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) { }
+static inline void dpll_netdev_pin_clear(struct net_device *dev) { }
+
+static inline size_t dpll_netdev_pin_handle_size(const struct net_device *dev)
{
return 0;
}
-static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin)
+static inline int
+dpll_netdev_add_pin_handle(struct sk_buff *msg, const struct net_device *dev)
{
return 0;
}
diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 407c2f281b64..5693a4be0d9a 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -38,14 +38,22 @@
#ifdef __KERNEL__
+#include <linux/bitops.h>
#include <asm/bug.h>
+#define DQL_HIST_LEN 4
+#define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN])
+
struct dql {
/* Fields accessed in enqueue path (dql_queued) */
unsigned int num_queued; /* Total ever queued */
unsigned int adj_limit; /* limit + num_completed */
unsigned int last_obj_cnt; /* Count at last queuing */
+ unsigned long history_head; /* top 58 bits of jiffies */
+ /* stall entries, a bit per entry */
+ unsigned long history[DQL_HIST_LEN];
+
/* Fields accessed only by completion path (dql_completed) */
unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */
@@ -62,6 +70,13 @@ struct dql {
unsigned int max_limit; /* Max limit */
unsigned int min_limit; /* Minimum limit */
unsigned int slack_hold_time; /* Time to measure slack */
+
+ /* Stall threshold (in jiffies), defined by user */
+ unsigned short stall_thrs;
+ /* Longest stall detected, reported to user */
+ unsigned short stall_max;
+ unsigned long last_reap; /* Last reap (in jiffies) */
+ unsigned long stall_cnt; /* Number of stalls */
};
/* Set some static maximums */
@@ -74,6 +89,8 @@ struct dql {
*/
static inline void dql_queued(struct dql *dql, unsigned int count)
{
+ unsigned long map, now, now_hi, i;
+
BUG_ON(count > DQL_MAX_OBJECT);
dql->last_obj_cnt = count;
@@ -86,6 +103,34 @@ static inline void dql_queued(struct dql *dql, unsigned int count)
barrier();
dql->num_queued += count;
+
+ now = jiffies;
+ now_hi = now / BITS_PER_LONG;
+
+ /* The following code set a bit in the ring buffer, where each
+ * bit trackes time the packet was queued. The dql->history buffer
+ * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot
+ */
+ if (unlikely(now_hi != dql->history_head)) {
+ /* About to reuse slots, clear them */
+ for (i = 0; i < DQL_HIST_LEN; i++) {
+ /* Multiplication masks high bits */
+ if (now_hi * BITS_PER_LONG ==
+ (dql->history_head + i) * BITS_PER_LONG)
+ break;
+ DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0;
+ }
+ /* pairs with smp_rmb() in dql_check_stall() */
+ smp_wmb();
+ WRITE_ONCE(dql->history_head, now_hi);
+ }
+
+ /* __set_bit() does not guarantee WRITE_ONCE() semantics */
+ map = DQL_HIST_ENT(dql, now_hi);
+
+ /* Populate the history with an entry (bit) per queued */
+ if (!(map & BIT_MASK(now)))
+ WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now));
}
/* Returns how many objects can be queued, < 0 indicates over limit. */
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 325e0778e937..9901e563f706 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -222,6 +222,16 @@ extern int
__ethtool_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *link_ksettings);
+struct ethtool_keee {
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertised);
+ u32 tx_lpi_timer;
+ bool tx_lpi_enabled;
+ bool eee_active;
+ bool eee_enabled;
+};
+
struct kernel_ethtool_coalesce {
u8 use_cqe_mode_tx;
u8 use_cqe_mode_rx;
@@ -892,8 +902,8 @@ struct ethtool_ops {
struct ethtool_modinfo *);
int (*get_module_eeprom)(struct net_device *,
struct ethtool_eeprom *, u8 *);
- int (*get_eee)(struct net_device *, struct ethtool_eee *);
- int (*set_eee)(struct net_device *, struct ethtool_eee *);
+ int (*get_eee)(struct net_device *dev, struct ethtool_keee *eee);
+ int (*set_eee)(struct net_device *dev, struct ethtool_keee *eee);
int (*get_tunable)(struct net_device *,
const struct ethtool_tunable *, void *);
int (*set_tunable)(struct net_device *,
diff --git a/include/linux/evm.h b/include/linux/evm.h
index 36ec884320d9..d48d6da32315 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -12,52 +12,12 @@
#include <linux/integrity.h>
#include <linux/xattr.h>
-struct integrity_iint_cache;
-
#ifdef CONFIG_EVM
extern int evm_set_key(void *key, size_t keylen);
extern enum integrity_status evm_verifyxattr(struct dentry *dentry,
const char *xattr_name,
void *xattr_value,
- size_t xattr_value_len,
- struct integrity_iint_cache *iint);
-extern int evm_inode_setattr(struct mnt_idmap *idmap,
- struct dentry *dentry, struct iattr *attr);
-extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid);
-extern int evm_inode_setxattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *name,
- const void *value, size_t size);
-extern void evm_inode_post_setxattr(struct dentry *dentry,
- const char *xattr_name,
- const void *xattr_value,
- size_t xattr_value_len);
-extern int evm_inode_copy_up_xattr(const char *name);
-extern int evm_inode_removexattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *xattr_name);
-extern void evm_inode_post_removexattr(struct dentry *dentry,
- const char *xattr_name);
-static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- evm_inode_post_removexattr(dentry, acl_name);
-}
-extern int evm_inode_set_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *acl_name,
- struct posix_acl *kacl);
-static inline int evm_inode_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- return evm_inode_set_acl(idmap, dentry, acl_name, NULL);
-}
-static inline void evm_inode_post_set_acl(struct dentry *dentry,
- const char *acl_name,
- struct posix_acl *kacl)
-{
- return evm_inode_post_setxattr(dentry, acl_name, NULL, 0);
-}
-
+ size_t xattr_value_len);
int evm_inode_init_security(struct inode *inode, struct inode *dir,
const struct qstr *qstr, struct xattr *xattrs,
int *xattr_count);
@@ -85,85 +45,12 @@ static inline int evm_set_key(void *key, size_t keylen)
static inline enum integrity_status evm_verifyxattr(struct dentry *dentry,
const char *xattr_name,
void *xattr_value,
- size_t xattr_value_len,
- struct integrity_iint_cache *iint)
+ size_t xattr_value_len)
{
return INTEGRITY_UNKNOWN;
}
#endif
-static inline int evm_inode_setattr(struct mnt_idmap *idmap,
- struct dentry *dentry, struct iattr *attr)
-{
- return 0;
-}
-
-static inline void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
-{
- return;
-}
-
-static inline int evm_inode_setxattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *name,
- const void *value, size_t size)
-{
- return 0;
-}
-
-static inline void evm_inode_post_setxattr(struct dentry *dentry,
- const char *xattr_name,
- const void *xattr_value,
- size_t xattr_value_len)
-{
- return;
-}
-
-static inline int evm_inode_copy_up_xattr(const char *name)
-{
- return 0;
-}
-
-static inline int evm_inode_removexattr(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *xattr_name)
-{
- return 0;
-}
-
-static inline void evm_inode_post_removexattr(struct dentry *dentry,
- const char *xattr_name)
-{
- return;
-}
-
-static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- return;
-}
-
-static inline int evm_inode_set_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *acl_name,
- struct posix_acl *kacl)
-{
- return 0;
-}
-
-static inline int evm_inode_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- return 0;
-}
-
-static inline void evm_inode_post_set_acl(struct dentry *dentry,
- const char *acl_name,
- struct posix_acl *kacl)
-{
- return;
-}
-
static inline int evm_inode_init_security(struct inode *inode, struct inode *dir,
const struct qstr *qstr,
struct xattr *xattrs,
diff --git a/include/linux/file.h b/include/linux/file.h
index 6834a29338c4..169692cb1906 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -24,6 +24,8 @@ struct inode;
struct path;
extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
const char *, int flags, const struct file_operations *);
+extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount *,
+ const char *, int flags, const struct file_operations *);
extern struct file *alloc_file_clone(struct file *, int flags,
const struct file_operations *);
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 95e868e09e29..daee999d05f3 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -27,6 +27,7 @@
#define FILE_LOCK_DEFERRED 1
struct file_lock;
+struct file_lease;
struct file_lock_operations {
void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
@@ -39,14 +40,17 @@ struct lock_manager_operations {
void (*lm_put_owner)(fl_owner_t);
void (*lm_notify)(struct file_lock *); /* unblock callback */
int (*lm_grant)(struct file_lock *, int);
- bool (*lm_break)(struct file_lock *);
- int (*lm_change)(struct file_lock *, int, struct list_head *);
- void (*lm_setup)(struct file_lock *, void **);
- bool (*lm_breaker_owns_lease)(struct file_lock *);
bool (*lm_lock_expirable)(struct file_lock *cfl);
void (*lm_expire_lock)(void);
};
+struct lease_manager_operations {
+ bool (*lm_break)(struct file_lease *);
+ int (*lm_change)(struct file_lease *, int, struct list_head *);
+ void (*lm_setup)(struct file_lease *, void **);
+ bool (*lm_breaker_owns_lease)(struct file_lease *);
+};
+
struct lock_manager {
struct list_head list;
/*
@@ -85,31 +89,31 @@ bool opens_in_grace(struct net *);
*
* Obviously, the last two criteria only matter for POSIX locks.
*/
-struct file_lock {
- struct file_lock *fl_blocker; /* The lock, that is blocking us */
- struct list_head fl_list; /* link into file_lock_context */
- struct hlist_node fl_link; /* node in global lists */
- struct list_head fl_blocked_requests; /* list of requests with
+
+struct file_lock_core {
+ struct file_lock_core *flc_blocker; /* The lock that is blocking us */
+ struct list_head flc_list; /* link into file_lock_context */
+ struct hlist_node flc_link; /* node in global lists */
+ struct list_head flc_blocked_requests; /* list of requests with
* ->fl_blocker pointing here
*/
- struct list_head fl_blocked_member; /* node in
+ struct list_head flc_blocked_member; /* node in
* ->fl_blocker->fl_blocked_requests
*/
- fl_owner_t fl_owner;
- unsigned int fl_flags;
- unsigned char fl_type;
- unsigned int fl_pid;
- int fl_link_cpu; /* what cpu's list is this on? */
- wait_queue_head_t fl_wait;
- struct file *fl_file;
+ fl_owner_t flc_owner;
+ unsigned int flc_flags;
+ unsigned char flc_type;
+ pid_t flc_pid;
+ int flc_link_cpu; /* what cpu's list is this on? */
+ wait_queue_head_t flc_wait;
+ struct file *flc_file;
+};
+
+struct file_lock {
+ struct file_lock_core c;
loff_t fl_start;
loff_t fl_end;
- struct fasync_struct * fl_fasync; /* for lease break notifications */
- /* for lease breaks: */
- unsigned long fl_break_time;
- unsigned long fl_downgrade_time;
-
const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
union {
@@ -126,6 +130,15 @@ struct file_lock {
} fl_u;
} __randomize_layout;
+struct file_lease {
+ struct file_lock_core c;
+ struct fasync_struct * fl_fasync; /* for lease break notifications */
+ /* for lease breaks: */
+ unsigned long fl_break_time;
+ unsigned long fl_downgrade_time;
+ const struct lease_manager_operations *fl_lmops; /* Callbacks for lease managers */
+} __randomize_layout;
+
struct file_lock_context {
spinlock_t flc_lock;
struct list_head flc_flock;
@@ -147,11 +160,31 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int,
int fcntl_setlease(unsigned int fd, struct file *filp, int arg);
int fcntl_getlease(struct file *filp);
+static inline bool lock_is_unlock(struct file_lock *fl)
+{
+ return fl->c.flc_type == F_UNLCK;
+}
+
+static inline bool lock_is_read(struct file_lock *fl)
+{
+ return fl->c.flc_type == F_RDLCK;
+}
+
+static inline bool lock_is_write(struct file_lock *fl)
+{
+ return fl->c.flc_type == F_WRLCK;
+}
+
+static inline void locks_wake_up(struct file_lock *fl)
+{
+ wake_up(&fl->c.flc_wait);
+}
+
/* fs/locks.c */
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
void locks_init_lock(struct file_lock *);
-struct file_lock * locks_alloc_lock(void);
+struct file_lock *locks_alloc_lock(void);
void locks_copy_lock(struct file_lock *, struct file_lock *);
void locks_copy_conflock(struct file_lock *, struct file_lock *);
void locks_remove_posix(struct file *, fl_owner_t);
@@ -165,11 +198,16 @@ int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_l
int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
bool vfs_inode_has_locks(struct inode *inode);
int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
+
+void locks_init_lease(struct file_lease *);
+void locks_free_lease(struct file_lease *fl);
+struct file_lease *locks_alloc_lease(void);
int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
void lease_get_mtime(struct inode *, struct timespec64 *time);
-int generic_setlease(struct file *, int, struct file_lock **, void **priv);
-int vfs_setlease(struct file *, int, struct file_lock **, void **);
-int lease_modify(struct file_lock *, int, struct list_head *);
+int generic_setlease(struct file *, int, struct file_lease **, void **priv);
+int kernel_setlease(struct file *, int, struct file_lease **, void **);
+int vfs_setlease(struct file *, int, struct file_lease **, void **);
+int lease_modify(struct file_lease *, int, struct list_head *);
struct notifier_block;
int lease_register_notifier(struct notifier_block *);
@@ -223,6 +261,25 @@ static inline int fcntl_getlease(struct file *filp)
return F_UNLCK;
}
+static inline bool lock_is_unlock(struct file_lock *fl)
+{
+ return false;
+}
+
+static inline bool lock_is_read(struct file_lock *fl)
+{
+ return false;
+}
+
+static inline bool lock_is_write(struct file_lock *fl)
+{
+ return false;
+}
+
+static inline void locks_wake_up(struct file_lock *fl)
+{
+}
+
static inline void
locks_free_lock_context(struct inode *inode)
{
@@ -233,6 +290,11 @@ static inline void locks_init_lock(struct file_lock *fl)
return;
}
+static inline void locks_init_lease(struct file_lease *fl)
+{
+ return;
+}
+
static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
{
return;
@@ -307,18 +369,24 @@ static inline void lease_get_mtime(struct inode *inode,
}
static inline int generic_setlease(struct file *filp, int arg,
- struct file_lock **flp, void **priv)
+ struct file_lease **flp, void **priv)
+{
+ return -EINVAL;
+}
+
+static inline int kernel_setlease(struct file *filp, int arg,
+ struct file_lease **lease, void **priv)
{
return -EINVAL;
}
static inline int vfs_setlease(struct file *filp, int arg,
- struct file_lock **lease, void **priv)
+ struct file_lease **lease, void **priv)
{
return -EINVAL;
}
-static inline int lease_modify(struct file_lock *fl, int arg,
+static inline int lease_modify(struct file_lease *fl, int arg,
struct list_head *dispose)
{
return -EINVAL;
@@ -341,6 +409,9 @@ locks_inode_context(const struct inode *inode)
#endif /* !CONFIG_FILE_LOCKING */
+/* for walking lists of file_locks linked by fl_list */
+#define for_each_file_lock(_fl, _head) list_for_each_entry(_fl, _head, c.flc_list)
+
static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
{
return locks_lock_inode_wait(file_inode(filp), fl);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 68fb6c8142fe..c99bc3df2d28 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -72,6 +72,9 @@ struct ctl_table_header;
/* unused opcode to mark special ldsx instruction. Same as BPF_IND */
#define BPF_PROBE_MEMSX 0x40
+/* unused opcode to mark special load instruction. Same as BPF_MSH */
+#define BPF_PROBE_MEM32 0xa0
+
/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS 0xe0
@@ -547,24 +550,27 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
__BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \
u64, __ur_3, u64, __ur_4, u64, __ur_5)
-#define BPF_CALL_x(x, name, ...) \
+#define BPF_CALL_x(x, attr, name, ...) \
static __always_inline \
u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
- u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \
- u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \
+ attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \
+ attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \
{ \
return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
} \
static __always_inline \
u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
-#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__)
-#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__)
-#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__)
-#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__)
-#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__)
-#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__)
+#define __NOATTR
+#define BPF_CALL_0(name, ...) BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__)
+#define BPF_CALL_1(name, ...) BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__)
+#define BPF_CALL_2(name, ...) BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__)
+#define BPF_CALL_3(name, ...) BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__)
+#define BPF_CALL_4(name, ...) BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__)
+#define BPF_CALL_5(name, ...) BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__)
+
+#define NOTRACE_BPF_CALL_1(name, ...) BPF_CALL_x(1, notrace, name, __VA_ARGS__)
#define bpf_ctx_range(TYPE, MEMBER) \
offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
@@ -955,6 +961,8 @@ bool bpf_jit_supports_subprog_tailcalls(void);
bool bpf_jit_supports_kfunc_call(void);
bool bpf_jit_supports_far_kfunc_call(void);
bool bpf_jit_supports_exceptions(void);
+bool bpf_jit_supports_ptr_xchg(void);
+bool bpf_jit_supports_arena(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func);
@@ -1139,7 +1147,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
return false;
if (!bpf_jit_harden)
return false;
- if (bpf_jit_harden == 1 && bpf_capable())
+ if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
return true;
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 89a6888f2f9e..6aeebe0a6777 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_FORTIFY_STRING_H_
#define _LINUX_FORTIFY_STRING_H_
+#include <linux/bitfield.h>
#include <linux/bug.h>
#include <linux/const.h>
#include <linux/limits.h>
@@ -9,7 +10,46 @@
#define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable
#define __RENAME(x) __asm__(#x)
-void fortify_panic(const char *name) __noreturn __cold;
+#define FORTIFY_REASON_DIR(r) FIELD_GET(BIT(0), r)
+#define FORTIFY_REASON_FUNC(r) FIELD_GET(GENMASK(7, 1), r)
+#define FORTIFY_REASON(func, write) (FIELD_PREP(BIT(0), write) | \
+ FIELD_PREP(GENMASK(7, 1), func))
+
+#ifndef fortify_panic
+# define fortify_panic(func, write, avail, size, retfail) \
+ __fortify_panic(FORTIFY_REASON(func, write), avail, size)
+#endif
+
+#define FORTIFY_READ 0
+#define FORTIFY_WRITE 1
+
+#define EACH_FORTIFY_FUNC(macro) \
+ macro(strncpy), \
+ macro(strnlen), \
+ macro(strlen), \
+ macro(strscpy), \
+ macro(strlcat), \
+ macro(strcat), \
+ macro(strncat), \
+ macro(memset), \
+ macro(memcpy), \
+ macro(memmove), \
+ macro(memscan), \
+ macro(memcmp), \
+ macro(memchr), \
+ macro(memchr_inv), \
+ macro(kmemdup), \
+ macro(strcpy), \
+ macro(UNKNOWN),
+
+#define MAKE_FORTIFY_FUNC(func) FORTIFY_FUNC_##func
+
+enum fortify_func {
+ EACH_FORTIFY_FUNC(MAKE_FORTIFY_FUNC)
+};
+
+void __fortify_report(const u8 reason, const size_t avail, const size_t size);
+void __fortify_panic(const u8 reason, const size_t avail, const size_t size) __cold __noreturn;
void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)");
void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)");
void __read_overflow2_field(size_t avail, size_t wanted) __compiletime_warning("detected read beyond size of field (2nd parameter); maybe use struct_group()?");
@@ -143,7 +183,7 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size)
if (__compiletime_lessthan(p_size, size))
__write_overflow();
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strncpy, FORTIFY_WRITE, p_size, size, p);
return __underlying_strncpy(p, q, size);
}
@@ -174,7 +214,7 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size
/* Do not check characters beyond the end of p. */
ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
if (p_size <= ret && maxlen != ret)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strnlen, FORTIFY_READ, p_size, ret + 1, ret);
return ret;
}
@@ -210,31 +250,13 @@ __kernel_size_t __fortify_strlen(const char * const POS p)
return __underlying_strlen(p);
ret = strnlen(p, p_size);
if (p_size <= ret)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strlen, FORTIFY_READ, p_size, ret + 1, ret);
return ret;
}
/* Defined after fortified strnlen() to reuse it. */
-extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
-/**
- * strscpy - Copy a C-string into a sized buffer
- *
- * @p: Where to copy the string to
- * @q: Where to copy the string from
- * @size: Size of destination buffer
- *
- * Copy the source string @q, or as much of it as fits, into the destination
- * @p buffer. The behavior is undefined if the string buffers overlap. The
- * destination @p buffer is always NUL terminated, unless it's zero-sized.
- *
- * Preferred to strncpy() since it always returns a valid string, and
- * doesn't unnecessarily force the tail of the destination buffer to be
- * zero padded. If padding is desired please use strscpy_pad().
- *
- * Returns the number of characters copied in @p (not including the
- * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated.
- */
-__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size)
+extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy);
+__FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size)
{
/* Use string size rather than possible enclosing struct size. */
const size_t p_size = __member_size(p);
@@ -278,8 +300,8 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s
* Generate a runtime write overflow error if len is greater than
* p_size.
*/
- if (len > p_size)
- fortify_panic(__func__);
+ if (p_size < len)
+ fortify_panic(FORTIFY_FUNC_strscpy, FORTIFY_WRITE, p_size, len, -E2BIG);
/*
* We can now safely call vanilla strscpy because we are protected from:
@@ -337,7 +359,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail)
/* Give up if string is already overflowed. */
if (p_size <= p_len)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_READ, p_size, p_len + 1, wanted);
if (actual >= avail) {
copy_len = avail - p_len - 1;
@@ -346,7 +368,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail)
/* Give up if copy will overflow. */
if (p_size <= actual)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_WRITE, p_size, actual + 1, wanted);
__underlying_memcpy(p + p_len, q, copy_len);
p[actual] = '\0';
@@ -373,9 +395,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2)
char *strcat(char * const POS p, const char *q)
{
const size_t p_size = __member_size(p);
+ const size_t wanted = strlcat(p, q, p_size);
- if (strlcat(p, q, p_size) >= p_size)
- fortify_panic(__func__);
+ if (p_size <= wanted)
+ fortify_panic(FORTIFY_FUNC_strcat, FORTIFY_WRITE, p_size, wanted + 1, p);
return p;
}
@@ -404,20 +427,21 @@ char *strncat(char * const POS p, const char * const POS q, __kernel_size_t coun
{
const size_t p_size = __member_size(p);
const size_t q_size = __member_size(q);
- size_t p_len, copy_len;
+ size_t p_len, copy_len, total;
if (p_size == SIZE_MAX && q_size == SIZE_MAX)
return __underlying_strncat(p, q, count);
p_len = strlen(p);
copy_len = strnlen(q, count);
- if (p_size < p_len + copy_len + 1)
- fortify_panic(__func__);
+ total = p_len + copy_len + 1;
+ if (p_size < total)
+ fortify_panic(FORTIFY_FUNC_strncat, FORTIFY_WRITE, p_size, total, p);
__underlying_memcpy(p + p_len, q, copy_len);
p[p_len + copy_len] = '\0';
return p;
}
-__FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size,
+__FORTIFY_INLINE bool fortify_memset_chk(__kernel_size_t size,
const size_t p_size,
const size_t p_size_field)
{
@@ -452,7 +476,8 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size,
* lengths are unknown.)
*/
if (p_size != SIZE_MAX && p_size < size)
- fortify_panic("memset");
+ fortify_panic(FORTIFY_FUNC_memset, FORTIFY_WRITE, p_size, size, true);
+ return false;
}
#define __fortify_memset_chk(p, c, size, p_size, p_size_field) ({ \
@@ -506,7 +531,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
const size_t q_size,
const size_t p_size_field,
const size_t q_size_field,
- const char *func)
+ const u8 func)
{
if (__builtin_constant_p(size)) {
/*
@@ -550,9 +575,10 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
* (The SIZE_MAX test is to optimize away checks where the buffer
* lengths are unknown.)
*/
- if ((p_size != SIZE_MAX && p_size < size) ||
- (q_size != SIZE_MAX && q_size < size))
- fortify_panic(func);
+ if (p_size != SIZE_MAX && p_size < size)
+ fortify_panic(func, FORTIFY_WRITE, p_size, size, true);
+ else if (q_size != SIZE_MAX && q_size < size)
+ fortify_panic(func, FORTIFY_READ, p_size, size, true);
/*
* Warn when writing beyond destination field size.
@@ -585,7 +611,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
const size_t __q_size_field = (q_size_field); \
WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size, \
__q_size, __p_size_field, \
- __q_size_field, #op), \
+ __q_size_field, FORTIFY_FUNC_ ##op), \
#op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \
__fortify_size, \
"field \"" #p "\" at " FILE_LINE, \
@@ -652,7 +678,7 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size)
if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_memscan, FORTIFY_READ, p_size, size, NULL);
return __real_memscan(p, c, size);
}
@@ -668,8 +694,10 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t
if (__compiletime_lessthan(q_size, size))
__read_overflow2();
}
- if (p_size < size || q_size < size)
- fortify_panic(__func__);
+ if (p_size < size)
+ fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, p_size, size, INT_MIN);
+ else if (q_size < size)
+ fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, q_size, size, INT_MIN);
return __underlying_memcmp(p, q, size);
}
@@ -681,7 +709,7 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size)
if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_memchr, FORTIFY_READ, p_size, size, NULL);
return __underlying_memchr(p, c, size);
}
@@ -693,7 +721,7 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size)
if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_memchr_inv, FORTIFY_READ, p_size, size, NULL);
return __real_memchr_inv(p, c, size);
}
@@ -706,7 +734,7 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp
if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_kmemdup, FORTIFY_READ, p_size, size, NULL);
return __real_kmemdup(p, size, gfp);
}
@@ -743,7 +771,7 @@ char *strcpy(char * const POS p, const char * const POS q)
__write_overflow();
/* Run-time check for dynamic size overflow. */
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strcpy, FORTIFY_WRITE, p_size, size, p);
__underlying_memcpy(p, q, size);
return p;
}
diff --git a/include/linux/framer/framer-provider.h b/include/linux/framer/framer-provider.h
index 782cd5fc83d5..9724d4b44b9c 100644
--- a/include/linux/framer/framer-provider.h
+++ b/include/linux/framer/framer-provider.h
@@ -83,7 +83,6 @@ struct framer_ops {
/**
* struct framer_provider - represents the framer provider
* @dev: framer provider device
- * @children: can be used to override the default (dev->of_node) child node
* @owner: the module owner having of_xlate
* @list: to maintain a linked list of framer providers
* @of_xlate: function pointer to obtain framer instance from framer pointer
@@ -93,7 +92,7 @@ struct framer_provider {
struct module *owner;
struct list_head list;
struct framer * (*of_xlate)(struct device *dev,
- struct of_phandle_args *args);
+ const struct of_phandle_args *args);
};
static inline void framer_set_drvdata(struct framer *framer, void *data)
@@ -118,19 +117,19 @@ struct framer *devm_framer_create(struct device *dev, struct device_node *node,
const struct framer_ops *ops);
struct framer *framer_provider_simple_of_xlate(struct device *dev,
- struct of_phandle_args *args);
+ const struct of_phandle_args *args);
struct framer_provider *
__framer_provider_of_register(struct device *dev, struct module *owner,
struct framer *(*of_xlate)(struct device *dev,
- struct of_phandle_args *args));
+ const struct of_phandle_args *args));
void framer_provider_of_unregister(struct framer_provider *framer_provider);
struct framer_provider *
__devm_framer_provider_of_register(struct device *dev, struct module *owner,
struct framer *(*of_xlate)(struct device *dev,
- struct of_phandle_args *args));
+ const struct of_phandle_args *args));
void framer_notify_status_change(struct framer *framer);
@@ -154,7 +153,7 @@ static inline struct framer *devm_framer_create(struct device *dev, struct devic
}
static inline struct framer *framer_provider_simple_of_xlate(struct device *dev,
- struct of_phandle_args *args)
+ const struct of_phandle_args *args)
{
return ERR_PTR(-ENOSYS);
}
@@ -162,7 +161,7 @@ static inline struct framer *framer_provider_simple_of_xlate(struct device *dev,
static inline struct framer_provider *
__framer_provider_of_register(struct device *dev, struct module *owner,
struct framer *(*of_xlate)(struct device *dev,
- struct of_phandle_args *args))
+ const struct of_phandle_args *args))
{
return ERR_PTR(-ENOSYS);
}
@@ -174,7 +173,7 @@ void framer_provider_of_unregister(struct framer_provider *framer_provider)
static inline struct framer_provider *
__devm_framer_provider_of_register(struct device *dev, struct module *owner,
struct framer *(*of_xlate)(struct device *dev,
- struct of_phandle_args *args))
+ const struct of_phandle_args *args))
{
return ERR_PTR(-ENOSYS);
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5966a70495..0a22b7245982 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -43,6 +43,8 @@
#include <linux/cred.h>
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
+#include <linux/maple_tree.h>
+#include <linux/rw_hint.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -309,19 +311,6 @@ struct address_space;
struct writeback_control;
struct readahead_control;
-/*
- * Write life time hint values.
- * Stored in struct inode as u8.
- */
-enum rw_hint {
- WRITE_LIFE_NOT_SET = 0,
- WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
- WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
- WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
- WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
- WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
-};
-
/* Match RWF_* bits to IOCB bits */
#define IOCB_HIPRI (__force int) RWF_HIPRI
#define IOCB_DSYNC (__force int) RWF_DSYNC
@@ -352,6 +341,8 @@ enum rw_hint {
* unrelated IO (like cache flushing, new IO generation, etc).
*/
#define IOCB_DIO_CALLER_COMP (1 << 22)
+/* kiocb is a read or write operation submitted by fs/aio.c. */
+#define IOCB_AIO_RW (1 << 23)
/* for use in trace events */
#define TRACE_IOCB_STRINGS \
@@ -482,10 +473,10 @@ struct address_space {
pgoff_t writeback_index;
const struct address_space_operations *a_ops;
unsigned long flags;
- struct rw_semaphore i_mmap_rwsem;
errseq_t wb_err;
spinlock_t i_private_lock;
struct list_head i_private_list;
+ struct rw_semaphore i_mmap_rwsem;
void * i_private_data;
} __attribute__((aligned(sizeof(long)))) __randomize_layout;
/*
@@ -677,7 +668,7 @@ struct inode {
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
u8 i_blkbits;
- u8 i_write_hint;
+ enum rw_hint i_write_hint;
blkcnt_t i_blocks;
#ifdef __NEED_I_SIZE_ORDERED
@@ -907,7 +898,8 @@ static inline loff_t i_size_read(const struct inode *inode)
preempt_enable();
return i_size;
#else
- return inode->i_size;
+ /* Pairs with smp_store_release() in i_size_write() */
+ return smp_load_acquire(&inode->i_size);
#endif
}
@@ -929,7 +921,12 @@ static inline void i_size_write(struct inode *inode, loff_t i_size)
inode->i_size = i_size;
preempt_enable();
#else
- inode->i_size = i_size;
+ /*
+ * Pairs with smp_load_acquire() in i_size_read() to ensure
+ * changes related to inode size (such as page contents) are
+ * visible before we see the changed inode size.
+ */
+ smp_store_release(&inode->i_size, i_size);
#endif
}
@@ -1064,6 +1061,7 @@ struct file *get_file_active(struct file **f);
typedef void *fl_owner_t;
struct file_lock;
+struct file_lease;
/* The following constant reflects the upper bound of the file/locking space */
#ifndef OFFSET_MAX
@@ -1078,9 +1076,20 @@ static inline struct inode *file_inode(const struct file *f)
return f->f_inode;
}
+/*
+ * file_dentry() is a relic from the days that overlayfs was using files with a
+ * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs.
+ * In those days, file_dentry() was needed to get the underlying fs dentry that
+ * matches f_inode.
+ * Files with "fake" path should not exist nowadays, so use an assertion to make
+ * sure that file_dentry() was not papering over filesystem bugs.
+ */
static inline struct dentry *file_dentry(const struct file *file)
{
- return d_real(file->f_path.dentry, file_inode(file));
+ struct dentry *dentry = file->f_path.dentry;
+
+ WARN_ON_ONCE(d_inode(dentry) != file_inode(file));
+ return dentry;
}
struct fasync_struct {
@@ -1228,8 +1237,8 @@ struct super_block {
#endif
struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */
- struct block_device *s_bdev;
- struct bdev_handle *s_bdev_handle;
+ struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */
+ struct file *s_bdev_file;
struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd;
struct hlist_node s_instances;
@@ -1255,8 +1264,22 @@ struct super_block {
struct fsnotify_mark_connector __rcu *s_fsnotify_marks;
#endif
+ /*
+ * q: why are s_id and s_sysfs_name not the same? both are human
+ * readable strings that identify the filesystem
+ * a: s_id is allowed to change at runtime; it's used in log messages,
+ * and we want to when a device starts out as single device (s_id is dev
+ * name) but then a device is hot added and we have to switch to
+ * identifying it by UUID
+ * but s_sysfs_name is a handle for programmatic access, and can't
+ * change at runtime
+ */
char s_id[32]; /* Informational name */
uuid_t s_uuid; /* UUID */
+ u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */
+
+ /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */
+ char s_sysfs_name[UUID_STRING_LEN + 1];
unsigned int s_max_links;
@@ -2005,7 +2028,7 @@ struct file_operations {
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
void (*splice_eof)(struct file *file);
- int (*setlease)(struct file *, int, struct file_lock **, void **);
+ int (*setlease)(struct file *, int, struct file_lease **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
@@ -2101,9 +2124,6 @@ int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t *count, unsigned int remap_flags);
-extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t len, unsigned int remap_flags);
extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
@@ -2532,6 +2552,44 @@ extern __printf(2, 3)
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
extern int super_setup_bdi(struct super_block *sb);
+static inline void super_set_uuid(struct super_block *sb, const u8 *uuid, unsigned len)
+{
+ if (WARN_ON(len > sizeof(sb->s_uuid)))
+ len = sizeof(sb->s_uuid);
+ sb->s_uuid_len = len;
+ memcpy(&sb->s_uuid, uuid, len);
+}
+
+/* set sb sysfs name based on sb->s_bdev */
+static inline void super_set_sysfs_name_bdev(struct super_block *sb)
+{
+ snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pg", sb->s_bdev);
+}
+
+/* set sb sysfs name based on sb->s_uuid */
+static inline void super_set_sysfs_name_uuid(struct super_block *sb)
+{
+ WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid));
+ snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pU", sb->s_uuid.b);
+}
+
+/* set sb sysfs name based on sb->s_id */
+static inline void super_set_sysfs_name_id(struct super_block *sb)
+{
+ strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name));
+}
+
+/* try to use something standard before you use this */
+__printf(2, 3)
+static inline void super_set_sysfs_name_generic(struct super_block *sb, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), fmt, args);
+ va_end(args);
+}
+
extern int current_umask(void);
extern void ihold(struct inode * inode);
@@ -2928,6 +2986,17 @@ extern bool path_is_under(const struct path *, const struct path *);
extern char *file_path(struct file *, char *, int);
+/**
+ * is_dot_dotdot - returns true only if @name is "." or ".."
+ * @name: file name to check
+ * @len: length of file name, in bytes
+ */
+static inline bool is_dot_dotdot(const char *name, size_t len)
+{
+ return len && unlikely(name[0] == '.') &&
+ (len == 1 || (len == 2 && name[1] == '.'));
+}
+
#include <linux/err.h>
/* needed for stackable file system support */
@@ -3238,7 +3307,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
extern const struct address_space_operations ram_aops;
extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
-extern int simple_nosetlease(struct file *, int, struct file_lock **, void **);
+extern int simple_nosetlease(struct file *, int, struct file_lease **, void **);
extern const struct dentry_operations simple_dentry_operations;
extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
@@ -3260,13 +3329,14 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
const void __user *from, size_t count);
struct offset_ctx {
- struct xarray xa;
- u32 next_offset;
+ struct maple_tree mt;
+ unsigned long next_offset;
};
void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
+int simple_offset_empty(struct dentry *dentry);
int simple_offset_rename_exchange(struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
@@ -3280,7 +3350,16 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_check_addressable(unsigned, u64);
-extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
+extern void generic_set_sb_d_ops(struct super_block *sb);
+
+static inline bool sb_has_encoding(const struct super_block *sb)
+{
+#if IS_ENABLED(CONFIG_UNICODE)
+ return !!sb->s_encoding;
+#else
+ return false;
+#endif
+}
int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
unsigned int ia_valid);
@@ -3335,6 +3414,8 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
return 0;
if (unlikely(flags & ~RWF_SUPPORTED))
return -EOPNOTSUPP;
+ if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND)))
+ return -EINVAL;
if (flags & RWF_NOWAIT) {
if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
@@ -3345,6 +3426,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
if (flags & RWF_SYNC)
kiocb_flags |= IOCB_DSYNC;
+ if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) {
+ if (IS_APPEND(file_inode(ki->ki_filp)))
+ return -EPERM;
+ ki->ki_flags &= ~IOCB_APPEND;
+ }
+
ki->ki_flags |= kiocb_flags;
return 0;
}
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 12f9e455d569..772f822dc6b8 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -192,6 +192,8 @@ struct fscrypt_operations {
unsigned int *num_devs);
};
+int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags);
+
static inline struct fscrypt_inode_info *
fscrypt_get_inode_info(const struct inode *inode)
{
@@ -221,15 +223,29 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode)
}
/*
- * When d_splice_alias() moves a directory's no-key alias to its plaintext alias
- * as a result of the encryption key being added, DCACHE_NOKEY_NAME must be
- * cleared. Note that we don't have to support arbitrary moves of this flag
- * because fscrypt doesn't allow no-key names to be the source or target of a
- * rename().
+ * When d_splice_alias() moves a directory's no-key alias to its
+ * plaintext alias as a result of the encryption key being added,
+ * DCACHE_NOKEY_NAME must be cleared and there might be an opportunity
+ * to disable d_revalidate. Note that we don't have to support the
+ * inverse operation because fscrypt doesn't allow no-key names to be
+ * the source or target of a rename().
*/
static inline void fscrypt_handle_d_move(struct dentry *dentry)
{
- dentry->d_flags &= ~DCACHE_NOKEY_NAME;
+ /*
+ * VFS calls fscrypt_handle_d_move even for non-fscrypt
+ * filesystems.
+ */
+ if (dentry->d_flags & DCACHE_NOKEY_NAME) {
+ dentry->d_flags &= ~DCACHE_NOKEY_NAME;
+
+ /*
+ * Other filesystem features might be handling dentry
+ * revalidation, in which case it cannot be disabled.
+ */
+ if (dentry->d_op->d_revalidate == fscrypt_d_revalidate)
+ dentry->d_flags &= ~DCACHE_OP_REVALIDATE;
+ }
}
/**
@@ -261,6 +277,35 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry)
return dentry->d_flags & DCACHE_NOKEY_NAME;
}
+static inline void fscrypt_prepare_dentry(struct dentry *dentry,
+ bool is_nokey_name)
+{
+ /*
+ * This code tries to only take ->d_lock when necessary to write
+ * to ->d_flags. We shouldn't be peeking on d_flags for
+ * DCACHE_OP_REVALIDATE unlocked, but in the unlikely case
+ * there is a race, the worst it can happen is that we fail to
+ * unset DCACHE_OP_REVALIDATE and pay the cost of an extra
+ * d_revalidate.
+ */
+ if (is_nokey_name) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_NOKEY_NAME;
+ spin_unlock(&dentry->d_lock);
+ } else if (dentry->d_flags & DCACHE_OP_REVALIDATE &&
+ dentry->d_op->d_revalidate == fscrypt_d_revalidate) {
+ /*
+ * Unencrypted dentries and encrypted dentries where the
+ * key is available are always valid from fscrypt
+ * perspective. Avoid the cost of calling
+ * fscrypt_d_revalidate unnecessarily.
+ */
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_OP_REVALIDATE;
+ spin_unlock(&dentry->d_lock);
+ }
+}
+
/* crypto.c */
void fscrypt_enqueue_decrypt_work(struct work_struct *);
@@ -368,7 +413,6 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
bool fscrypt_match_name(const struct fscrypt_name *fname,
const u8 *de_name, u32 de_name_len);
u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name);
-int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags);
/* bio.c */
bool fscrypt_decrypt_bio(struct bio *bio);
@@ -425,6 +469,11 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry)
return false;
}
+static inline void fscrypt_prepare_dentry(struct dentry *dentry,
+ bool is_nokey_name)
+{
+}
+
/* crypto.c */
static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work)
{
@@ -982,6 +1031,9 @@ static inline int fscrypt_prepare_lookup(struct inode *dir,
fname->usr_fname = &dentry->d_name;
fname->disk_name.name = (unsigned char *)dentry->d_name.name;
fname->disk_name.len = dentry->d_name.len;
+
+ fscrypt_prepare_dentry(dentry, false);
+
return 0;
}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index de292a007138..937c2a9b6e54 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -311,15 +311,23 @@ extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
struct page_frag_cache;
+void page_frag_cache_drain(struct page_frag_cache *nc);
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
-extern void *page_frag_alloc_align(struct page_frag_cache *nc,
- unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask);
+void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
+ gfp_t gfp_mask, unsigned int align_mask);
+
+static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
+ unsigned int fragsz, gfp_t gfp_mask,
+ unsigned int align)
+{
+ WARN_ON_ONCE(!is_power_of_2(align));
+ return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
+}
static inline void *page_frag_alloc(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask)
{
- return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
+ return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
}
extern void page_frag_free(void *addr);
@@ -353,6 +361,15 @@ static inline bool gfp_has_io_fs(gfp_t gfp)
return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS);
}
+/*
+ * Check if the gfp flags allow compaction - GFP_NOIO is a really
+ * tricky context because the migration might require IO.
+ */
+static inline bool gfp_compaction_allowed(gfp_t gfp_mask)
+{
+ return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO);
+}
+
extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
#ifdef CONFIG_CONTIG_ALLOC
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 9a5c6c76e653..7f75c9a51874 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -819,6 +819,24 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
return ERR_PTR(-ENODEV);
}
+static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc)
+{
+ WARN_ON(1);
+ return ERR_PTR(-ENODEV);
+}
+
+static inline int gpio_device_get_base(struct gpio_device *gdev)
+{
+ WARN_ON(1);
+ return -ENODEV;
+}
+
+static inline const char *gpio_device_get_label(struct gpio_device *gdev)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
static inline int gpiochip_lock_as_irq(struct gpio_chip *gc,
unsigned int offset)
{
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 840cd254172d..7118ac28d468 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -77,17 +77,6 @@ enum hid_bpf_attach_flags {
int hid_bpf_device_event(struct hid_bpf_ctx *ctx);
int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx);
-/* Following functions are kfunc that we export to BPF programs */
-/* available everywhere in HID-BPF */
-__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz);
-
-/* only available in syscall */
-int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags);
-int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz,
- enum hid_report_type rtype, enum hid_class_request reqtype);
-struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id);
-void hid_bpf_release_context(struct hid_bpf_ctx *ctx);
-
/*
* Below is HID internal
*/
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 87e3bedf8eb0..aa1e65ccb615 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -18,12 +18,8 @@
#include <linux/list.h>
#include <linux/percpu-defs.h>
#include <linux/rbtree.h>
-#include <linux/seqlock.h>
#include <linux/timer.h>
-struct hrtimer_clock_base;
-struct hrtimer_cpu_base;
-
/*
* Mode arguments of xxx_hrtimer functions:
*
@@ -98,105 +94,6 @@ struct hrtimer_sleeper {
struct task_struct *task;
};
-#ifdef CONFIG_64BIT
-# define __hrtimer_clock_base_align ____cacheline_aligned
-#else
-# define __hrtimer_clock_base_align
-#endif
-
-/**
- * struct hrtimer_clock_base - the timer base for a specific clock
- * @cpu_base: per cpu clock base
- * @index: clock type index for per_cpu support when moving a
- * timer to a base on another cpu.
- * @clockid: clock id for per_cpu support
- * @seq: seqcount around __run_hrtimer
- * @running: pointer to the currently running hrtimer
- * @active: red black tree root node for the active timers
- * @get_time: function to retrieve the current time of the clock
- * @offset: offset of this clock to the monotonic base
- */
-struct hrtimer_clock_base {
- struct hrtimer_cpu_base *cpu_base;
- unsigned int index;
- clockid_t clockid;
- seqcount_raw_spinlock_t seq;
- struct hrtimer *running;
- struct timerqueue_head active;
- ktime_t (*get_time)(void);
- ktime_t offset;
-} __hrtimer_clock_base_align;
-
-enum hrtimer_base_type {
- HRTIMER_BASE_MONOTONIC,
- HRTIMER_BASE_REALTIME,
- HRTIMER_BASE_BOOTTIME,
- HRTIMER_BASE_TAI,
- HRTIMER_BASE_MONOTONIC_SOFT,
- HRTIMER_BASE_REALTIME_SOFT,
- HRTIMER_BASE_BOOTTIME_SOFT,
- HRTIMER_BASE_TAI_SOFT,
- HRTIMER_MAX_CLOCK_BASES,
-};
-
-/**
- * struct hrtimer_cpu_base - the per cpu clock bases
- * @lock: lock protecting the base and associated clock bases
- * and timers
- * @cpu: cpu number
- * @active_bases: Bitfield to mark bases with active timers
- * @clock_was_set_seq: Sequence counter of clock was set events
- * @hres_active: State of high resolution mode
- * @in_hrtirq: hrtimer_interrupt() is currently executing
- * @hang_detected: The last hrtimer interrupt detected a hang
- * @softirq_activated: displays, if the softirq is raised - update of softirq
- * related settings is not required then.
- * @nr_events: Total number of hrtimer interrupt events
- * @nr_retries: Total number of hrtimer interrupt retries
- * @nr_hangs: Total number of hrtimer interrupt hangs
- * @max_hang_time: Maximum time spent in hrtimer_interrupt
- * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
- * expired
- * @timer_waiters: A hrtimer_cancel() invocation waits for the timer
- * callback to finish.
- * @expires_next: absolute time of the next event, is required for remote
- * hrtimer enqueue; it is the total first expiry time (hard
- * and soft hrtimer are taken into account)
- * @next_timer: Pointer to the first expiring timer
- * @softirq_expires_next: Time to check, if soft queues needs also to be expired
- * @softirq_next_timer: Pointer to the first expiring softirq based timer
- * @clock_base: array of clock bases for this cpu
- *
- * Note: next_timer is just an optimization for __remove_hrtimer().
- * Do not dereference the pointer because it is not reliable on
- * cross cpu removals.
- */
-struct hrtimer_cpu_base {
- raw_spinlock_t lock;
- unsigned int cpu;
- unsigned int active_bases;
- unsigned int clock_was_set_seq;
- unsigned int hres_active : 1,
- in_hrtirq : 1,
- hang_detected : 1,
- softirq_activated : 1;
-#ifdef CONFIG_HIGH_RES_TIMERS
- unsigned int nr_events;
- unsigned short nr_retries;
- unsigned short nr_hangs;
- unsigned int max_hang_time;
-#endif
-#ifdef CONFIG_PREEMPT_RT
- spinlock_t softirq_expiry_lock;
- atomic_t timer_waiters;
-#endif
- ktime_t expires_next;
- struct hrtimer *next_timer;
- ktime_t softirq_expires_next;
- struct hrtimer *softirq_next_timer;
- struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
-} ____cacheline_aligned;
-
static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
{
timer->node.expires = time;
@@ -445,20 +342,12 @@ extern u64
hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
/**
- * hrtimer_forward_now - forward the timer expiry so it expires after now
+ * hrtimer_forward_now() - forward the timer expiry so it expires after now
* @timer: hrtimer to forward
* @interval: the interval to forward
*
- * Forward the timer expiry so it will expire after the current time
- * of the hrtimer clock base. Returns the number of overruns.
- *
- * Can be safely called from the callback function of @timer. If
- * called from other contexts @timer must neither be enqueued nor
- * running the callback and the caller needs to take care of
- * serialization.
- *
- * Note: This only updates the timer expiry value and does not requeue
- * the timer.
+ * It is a variant of hrtimer_forward(). The timer will expire after the current
+ * time of the hrtimer clock base. See hrtimer_forward() for details.
*/
static inline u64 hrtimer_forward_now(struct hrtimer *timer,
ktime_t interval)
diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index 2d3e3c5fb946..c3b4b7ed7c16 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -3,6 +3,8 @@
#define _LINUX_HRTIMER_DEFS_H
#include <linux/ktime.h>
+#include <linux/timerqueue.h>
+#include <linux/seqlock.h>
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -24,4 +26,106 @@
#endif
+#ifdef CONFIG_64BIT
+# define __hrtimer_clock_base_align ____cacheline_aligned
+#else
+# define __hrtimer_clock_base_align
+#endif
+
+/**
+ * struct hrtimer_clock_base - the timer base for a specific clock
+ * @cpu_base: per cpu clock base
+ * @index: clock type index for per_cpu support when moving a
+ * timer to a base on another cpu.
+ * @clockid: clock id for per_cpu support
+ * @seq: seqcount around __run_hrtimer
+ * @running: pointer to the currently running hrtimer
+ * @active: red black tree root node for the active timers
+ * @get_time: function to retrieve the current time of the clock
+ * @offset: offset of this clock to the monotonic base
+ */
+struct hrtimer_clock_base {
+ struct hrtimer_cpu_base *cpu_base;
+ unsigned int index;
+ clockid_t clockid;
+ seqcount_raw_spinlock_t seq;
+ struct hrtimer *running;
+ struct timerqueue_head active;
+ ktime_t (*get_time)(void);
+ ktime_t offset;
+} __hrtimer_clock_base_align;
+
+enum hrtimer_base_type {
+ HRTIMER_BASE_MONOTONIC,
+ HRTIMER_BASE_REALTIME,
+ HRTIMER_BASE_BOOTTIME,
+ HRTIMER_BASE_TAI,
+ HRTIMER_BASE_MONOTONIC_SOFT,
+ HRTIMER_BASE_REALTIME_SOFT,
+ HRTIMER_BASE_BOOTTIME_SOFT,
+ HRTIMER_BASE_TAI_SOFT,
+ HRTIMER_MAX_CLOCK_BASES,
+};
+
+/**
+ * struct hrtimer_cpu_base - the per cpu clock bases
+ * @lock: lock protecting the base and associated clock bases
+ * and timers
+ * @cpu: cpu number
+ * @active_bases: Bitfield to mark bases with active timers
+ * @clock_was_set_seq: Sequence counter of clock was set events
+ * @hres_active: State of high resolution mode
+ * @in_hrtirq: hrtimer_interrupt() is currently executing
+ * @hang_detected: The last hrtimer interrupt detected a hang
+ * @softirq_activated: displays, if the softirq is raised - update of softirq
+ * related settings is not required then.
+ * @nr_events: Total number of hrtimer interrupt events
+ * @nr_retries: Total number of hrtimer interrupt retries
+ * @nr_hangs: Total number of hrtimer interrupt hangs
+ * @max_hang_time: Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ * expired
+ * @online: CPU is online from an hrtimers point of view
+ * @timer_waiters: A hrtimer_cancel() invocation waits for the timer
+ * callback to finish.
+ * @expires_next: absolute time of the next event, is required for remote
+ * hrtimer enqueue; it is the total first expiry time (hard
+ * and soft hrtimer are taken into account)
+ * @next_timer: Pointer to the first expiring timer
+ * @softirq_expires_next: Time to check, if soft queues needs also to be expired
+ * @softirq_next_timer: Pointer to the first expiring softirq based timer
+ * @clock_base: array of clock bases for this cpu
+ *
+ * Note: next_timer is just an optimization for __remove_hrtimer().
+ * Do not dereference the pointer because it is not reliable on
+ * cross cpu removals.
+ */
+struct hrtimer_cpu_base {
+ raw_spinlock_t lock;
+ unsigned int cpu;
+ unsigned int active_bases;
+ unsigned int clock_was_set_seq;
+ unsigned int hres_active : 1,
+ in_hrtirq : 1,
+ hang_detected : 1,
+ softirq_activated : 1,
+ online : 1;
+#ifdef CONFIG_HIGH_RES_TIMERS
+ unsigned int nr_events;
+ unsigned short nr_retries;
+ unsigned short nr_hangs;
+ unsigned int max_hang_time;
+#endif
+#ifdef CONFIG_PREEMPT_RT
+ spinlock_t softirq_expiry_lock;
+ atomic_t timer_waiters;
+#endif
+ ktime_t expires_next;
+ struct hrtimer *next_timer;
+ ktime_t softirq_expires_next;
+ struct hrtimer *softirq_next_timer;
+ struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
+} ____cacheline_aligned;
+
+
#endif
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2b00faf98017..6ef0557b4bff 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -164,8 +164,28 @@ struct hv_ring_buffer {
u8 buffer[];
} __packed;
+
+/*
+ * If the requested ring buffer size is at least 8 times the size of the
+ * header, steal space from the ring buffer for the header. Otherwise, add
+ * space for the header so that is doesn't take too much of the ring buffer
+ * space.
+ *
+ * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a
+ * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring
+ * buffer size (such as 128 Kbytes) and so end up making a nearly twice as
+ * large allocation that will be almost half wasted. As a contrasting example,
+ * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the
+ * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring.
+ * In this latter case, we must add 64 Kbytes for the header and not worry
+ * about what's wasted.
+ */
+#define VMBUS_HEADER_ADJ(payload_sz) \
+ ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \
+ 0 : sizeof(struct hv_ring_buffer))
+
/* Calculate the proper size of a ringbuffer, it must be page-aligned */
-#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \
+#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \
(payload_sz))
struct hv_ring_buffer_info {
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 83c4d060a559..3385a2cc5b09 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -9,7 +9,7 @@
* Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
* Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (c) 2018 - 2023 Intel Corporation
+ * Copyright (c) 2018 - 2024 Intel Corporation
*/
#ifndef LINUX_IEEE80211_H
@@ -191,6 +191,11 @@ static inline bool ieee80211_sn_less(u16 sn1, u16 sn2)
return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1);
}
+static inline bool ieee80211_sn_less_eq(u16 sn1, u16 sn2)
+{
+ return ((sn2 - sn1) & IEEE80211_SN_MASK) <= (IEEE80211_SN_MODULO >> 1);
+}
+
static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2)
{
return (sn1 + sn2) & IEEE80211_SN_MASK;
@@ -808,6 +813,11 @@ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr)
hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG);
}
+static inline u16 ieee80211_get_sn(struct ieee80211_hdr *hdr)
+{
+ return le16_get_bits(hdr->seq_ctrl, IEEE80211_SCTL_SEQ);
+}
+
struct ieee80211s_hdr {
u8 flags;
u8 ttl;
@@ -1454,6 +1464,20 @@ struct ieee80211_mgmt {
u8 max_tod_error;
u8 max_toa_error;
} __packed wnm_timing_msr;
+ struct {
+ u8 action_code;
+ u8 dialog_token;
+ u8 variable[];
+ } __packed ttlm_req;
+ struct {
+ u8 action_code;
+ u8 dialog_token;
+ u8 status_code;
+ u8 variable[];
+ } __packed ttlm_res;
+ struct {
+ u8 action_code;
+ } __packed ttlm_tear_down;
} u;
} __packed action;
DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */
@@ -3036,6 +3060,9 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
#define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40
#define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07
+#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08
+#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30
+#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40
#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78
#define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80
@@ -3175,6 +3202,22 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len)
return len >= needed;
}
+/* must validate ieee80211_eht_oper_size_ok() first */
+static inline u16
+ieee80211_eht_oper_dis_subchan_bitmap(const struct ieee80211_eht_operation *eht_oper)
+{
+ const struct ieee80211_eht_operation_info *info =
+ (const void *)eht_oper->optional;
+
+ if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT))
+ return 0;
+
+ if (!(eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT))
+ return 0;
+
+ return get_unaligned_le16(info->optional);
+}
+
#define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1)
struct ieee80211_bandwidth_indication {
@@ -3357,6 +3400,8 @@ enum ieee80211_statuscode {
WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109,
WLAN_STATUS_SAE_HASH_TO_ELEMENT = 126,
WLAN_STATUS_SAE_PK = 127,
+ WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133,
+ WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134,
};
@@ -3682,6 +3727,7 @@ enum ieee80211_category {
WLAN_CATEGORY_UNPROT_DMG = 20,
WLAN_CATEGORY_VHT = 21,
WLAN_CATEGORY_S1G = 22,
+ WLAN_CATEGORY_PROTECTED_EHT = 37,
WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
};
@@ -3745,6 +3791,13 @@ enum ieee80211_unprotected_wnm_actioncode {
WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1,
};
+/* Protected EHT action codes */
+enum ieee80211_protected_eht_actioncode {
+ WLAN_PROTECTED_EHT_ACTION_TTLM_REQ = 0,
+ WLAN_PROTECTED_EHT_ACTION_TTLM_RES = 1,
+ WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN = 2,
+};
+
/* Security key length */
enum ieee80211_key_len {
WLAN_KEY_LEN_WEP40 = 5,
@@ -4845,6 +4898,10 @@ struct ieee80211_multi_link_elem {
#define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0x000f
#define IEEE80211_MLD_CAP_OP_SRS_SUPPORT 0x0010
#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060
+#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_NO_SUPP 0
+#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME 1
+#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_RESERVED 2
+#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_DIFF 3
#define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80
#define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000
@@ -4908,18 +4965,43 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
}
/**
+ * ieee80211_mle_get_link_id - returns the link ID
+ * @data: the basic multi link element
+ *
+ * The element is assumed to be of the correct type (BASIC) and big enough,
+ * this must be checked using ieee80211_mle_type_ok().
+ *
+ * If the BSS link ID can't be found, -1 will be returned
+ */
+static inline int ieee80211_mle_get_link_id(const u8 *data)
+{
+ const struct ieee80211_multi_link_elem *mle = (const void *)data;
+ u16 control = le16_to_cpu(mle->control);
+ const u8 *common = mle->variable;
+
+ /* common points now at the beginning of ieee80211_mle_basic_common_info */
+ common += sizeof(struct ieee80211_mle_basic_common_info);
+
+ if (!(control & IEEE80211_MLC_BASIC_PRES_LINK_ID))
+ return -1;
+
+ return *common;
+}
+
+/**
* ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count
- * @mle: the basic multi link element
+ * @data: pointer to the basic multi link element
*
* The element is assumed to be of the correct type (BASIC) and big enough,
* this must be checked using ieee80211_mle_type_ok().
*
* If the BSS parameter change count value can't be found (the presence bit
- * for it is clear), 0 will be returned.
+ * for it is clear), -1 will be returned.
*/
-static inline u8
-ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle)
+static inline int
+ieee80211_mle_get_bss_param_ch_cnt(const u8 *data)
{
+ const struct ieee80211_multi_link_elem *mle = (const void *)data;
u16 control = le16_to_cpu(mle->control);
const u8 *common = mle->variable;
@@ -4927,7 +5009,7 @@ ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle)
common += sizeof(struct ieee80211_mle_basic_common_info);
if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT))
- return 0;
+ return -1;
if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID)
common += 1;
@@ -4997,6 +5079,81 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data)
}
/**
+ * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations.
+ * @data: pointer to the multi link EHT IE
+ *
+ * The element is assumed to be of the correct type (BASIC) and big enough,
+ * this must be checked using ieee80211_mle_type_ok().
+ *
+ * If the MLD capabilities and operations field is not present, 0 will be
+ * returned.
+ */
+static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data)
+{
+ const struct ieee80211_multi_link_elem *mle = (const void *)data;
+ u16 control = le16_to_cpu(mle->control);
+ const u8 *common = mle->variable;
+
+ /*
+ * common points now at the beginning of
+ * ieee80211_mle_basic_common_info
+ */
+ common += sizeof(struct ieee80211_mle_basic_common_info);
+
+ if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP))
+ return 0;
+
+ if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID)
+ common += 1;
+ if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)
+ common += 1;
+ if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY)
+ common += 2;
+ if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA)
+ common += 2;
+
+ return get_unaligned_le16(common);
+}
+
+/**
+ * ieee80211_mle_get_mld_id - returns the MLD ID
+ * @data: pointer to the multi link element
+ *
+ * The element is assumed to be of the correct type (BASIC) and big enough,
+ * this must be checked using ieee80211_mle_type_ok().
+ *
+ * If the MLD ID is not present, 0 will be returned.
+ */
+static inline u8 ieee80211_mle_get_mld_id(const u8 *data)
+{
+ const struct ieee80211_multi_link_elem *mle = (const void *)data;
+ u16 control = le16_to_cpu(mle->control);
+ const u8 *common = mle->variable;
+
+ /*
+ * common points now at the beginning of
+ * ieee80211_mle_basic_common_info
+ */
+ common += sizeof(struct ieee80211_mle_basic_common_info);
+
+ if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_ID))
+ return 0;
+
+ if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID)
+ common += 1;
+ if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)
+ common += 1;
+ if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY)
+ common += 2;
+ if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA)
+ common += 2;
+ if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)
+ common += 2;
+
+ return *common;
+}
+
+/**
* ieee80211_mle_size_ok - validate multi-link element size
* @data: pointer to the element data
* @len: length of the containing element
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 2a7660843444..043d442994b0 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -27,44 +27,54 @@ struct tun_xdp_hdr {
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
struct socket *tun_get_socket(struct file *);
struct ptr_ring *tun_get_tx_ring(struct file *file);
+
static inline bool tun_is_xdp_frame(void *ptr)
{
- return (unsigned long)ptr & TUN_XDP_FLAG;
+ return (unsigned long)ptr & TUN_XDP_FLAG;
}
+
static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp)
{
- return (void *)((unsigned long)xdp | TUN_XDP_FLAG);
+ return (void *)((unsigned long)xdp | TUN_XDP_FLAG);
}
+
static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr)
{
- return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
+ return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
}
+
void tun_ptr_free(void *ptr);
#else
#include <linux/err.h>
#include <linux/errno.h>
struct file;
struct socket;
+
static inline struct socket *tun_get_socket(struct file *f)
{
return ERR_PTR(-EINVAL);
}
+
static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
{
return ERR_PTR(-EINVAL);
}
+
static inline bool tun_is_xdp_frame(void *ptr)
{
return false;
}
+
static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp)
{
return NULL;
}
+
static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr)
{
return NULL;
}
+
static inline void tun_ptr_free(void *ptr)
{
}
diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h
index 7852f6c9a714..719cf9cc6e1a 100644
--- a/include/linux/iio/adc/ad_sigma_delta.h
+++ b/include/linux/iio/adc/ad_sigma_delta.h
@@ -8,6 +8,8 @@
#ifndef __AD_SIGMA_DELTA_H__
#define __AD_SIGMA_DELTA_H__
+#include <linux/iio/iio.h>
+
enum ad_sigma_delta_mode {
AD_SD_MODE_CONTINUOUS = 0,
AD_SD_MODE_SINGLE = 1,
@@ -99,7 +101,7 @@ struct ad_sigma_delta {
* 'rx_buf' is up to 32 bits per sample + 64 bit timestamp,
* rounded to 16 bytes to take into account padding.
*/
- uint8_t tx_buf[4] ____cacheline_aligned;
+ uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN);
uint8_t rx_buf[16] __aligned(8);
};
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 607c3a89a647..f9ae5cdd884f 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -258,9 +258,9 @@ struct st_sensor_data {
bool hw_irq_trigger;
s64 hw_timestamp;
- char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned;
-
struct mutex odr_lock;
+
+ char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN);
};
#ifdef CONFIG_IIO_BUFFER
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index dc9ea299e088..8898966bc0f0 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -11,6 +11,7 @@
#include <linux/spi/spi.h>
#include <linux/interrupt.h>
+#include <linux/iio/iio.h>
#include <linux/iio/types.h>
#define ADIS_WRITE_REG(reg) ((0x80 | (reg)))
@@ -131,7 +132,7 @@ struct adis {
unsigned long irq_flag;
void *buffer;
- u8 tx[10] ____cacheline_aligned;
+ u8 tx[10] __aligned(IIO_DMA_MINALIGN);
u8 rx[4];
};
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 86b57757c7b1..0bae61a15b60 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -16,23 +16,6 @@ struct linux_binprm;
#ifdef CONFIG_IMA
extern enum hash_algo ima_get_current_hash_algo(void);
-extern int ima_bprm_check(struct linux_binprm *bprm);
-extern int ima_file_check(struct file *file, int mask);
-extern void ima_post_create_tmpfile(struct mnt_idmap *idmap,
- struct inode *inode);
-extern void ima_file_free(struct file *file);
-extern int ima_file_mmap(struct file *file, unsigned long reqprot,
- unsigned long prot, unsigned long flags);
-extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot);
-extern int ima_load_data(enum kernel_load_data_id id, bool contents);
-extern int ima_post_load_data(char *buf, loff_t size,
- enum kernel_load_data_id id, char *description);
-extern int ima_read_file(struct file *file, enum kernel_read_file_id id,
- bool contents);
-extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
- enum kernel_read_file_id id);
-extern void ima_post_path_mknod(struct mnt_idmap *idmap,
- struct dentry *dentry);
extern int ima_file_hash(struct file *file, char *buf, size_t buf_size);
extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size);
extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size);
@@ -57,68 +40,6 @@ static inline enum hash_algo ima_get_current_hash_algo(void)
return HASH_ALGO__LAST;
}
-static inline int ima_bprm_check(struct linux_binprm *bprm)
-{
- return 0;
-}
-
-static inline int ima_file_check(struct file *file, int mask)
-{
- return 0;
-}
-
-static inline void ima_post_create_tmpfile(struct mnt_idmap *idmap,
- struct inode *inode)
-{
-}
-
-static inline void ima_file_free(struct file *file)
-{
- return;
-}
-
-static inline int ima_file_mmap(struct file *file, unsigned long reqprot,
- unsigned long prot, unsigned long flags)
-{
- return 0;
-}
-
-static inline int ima_file_mprotect(struct vm_area_struct *vma,
- unsigned long prot)
-{
- return 0;
-}
-
-static inline int ima_load_data(enum kernel_load_data_id id, bool contents)
-{
- return 0;
-}
-
-static inline int ima_post_load_data(char *buf, loff_t size,
- enum kernel_load_data_id id,
- char *description)
-{
- return 0;
-}
-
-static inline int ima_read_file(struct file *file, enum kernel_read_file_id id,
- bool contents)
-{
- return 0;
-}
-
-static inline int ima_post_read_file(struct file *file, void *buf, loff_t size,
- enum kernel_read_file_id id)
-{
- return 0;
-}
-
-static inline void ima_post_path_mknod(struct mnt_idmap *idmap,
- struct dentry *dentry)
-{
- return;
-}
-
static inline int ima_file_hash(struct file *file, char *buf, size_t buf_size)
{
return -EOPNOTSUPP;
@@ -169,76 +90,13 @@ static inline void ima_add_kexec_buffer(struct kimage *image)
{}
#endif
-#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS
-extern void ima_post_key_create_or_update(struct key *keyring,
- struct key *key,
- const void *payload, size_t plen,
- unsigned long flags, bool create);
-#else
-static inline void ima_post_key_create_or_update(struct key *keyring,
- struct key *key,
- const void *payload,
- size_t plen,
- unsigned long flags,
- bool create) {}
-#endif /* CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS */
-
#ifdef CONFIG_IMA_APPRAISE
extern bool is_ima_appraise_enabled(void);
-extern void ima_inode_post_setattr(struct mnt_idmap *idmap,
- struct dentry *dentry);
-extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
- const void *xattr_value, size_t xattr_value_len);
-extern int ima_inode_set_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *acl_name,
- struct posix_acl *kacl);
-static inline int ima_inode_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- return ima_inode_set_acl(idmap, dentry, acl_name, NULL);
-}
-extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name);
#else
static inline bool is_ima_appraise_enabled(void)
{
return 0;
}
-
-static inline void ima_inode_post_setattr(struct mnt_idmap *idmap,
- struct dentry *dentry)
-{
- return;
-}
-
-static inline int ima_inode_setxattr(struct dentry *dentry,
- const char *xattr_name,
- const void *xattr_value,
- size_t xattr_value_len)
-{
- return 0;
-}
-
-static inline int ima_inode_set_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *acl_name,
- struct posix_acl *kacl)
-{
-
- return 0;
-}
-
-static inline int ima_inode_removexattr(struct dentry *dentry,
- const char *xattr_name)
-{
- return 0;
-}
-
-static inline int ima_inode_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- return 0;
-}
#endif /* CONFIG_IMA_APPRAISE */
#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index adb83a42a6b9..35227d47cfc9 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -2,7 +2,7 @@
#ifndef _LINUX_INDIRECT_CALL_WRAPPER_H
#define _LINUX_INDIRECT_CALL_WRAPPER_H
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
/*
* INDIRECT_CALL_$NR - wrapper for indirect calls with $NR known builtin
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 84abb30a3fbb..a9033696b0aa 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -8,6 +8,7 @@
struct inet_hashinfo;
struct inet_diag_handler {
+ struct module *owner;
void (*dump)(struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r);
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ddb27fc0ee8c..cb5280e6cc21 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -53,13 +53,15 @@ struct in_device {
};
#define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1])
+#define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr))
#define IPV4_DEVCONF_ALL(net, attr) \
IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr)
+#define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr))
-static inline int ipv4_devconf_get(struct in_device *in_dev, int index)
+static inline int ipv4_devconf_get(const struct in_device *in_dev, int index)
{
index--;
- return in_dev->cnf.data[index];
+ return READ_ONCE(in_dev->cnf.data[index]);
}
static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
@@ -67,7 +69,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
{
index--;
set_bit(index, in_dev->cnf.state);
- in_dev->cnf.data[index] = val;
+ WRITE_ONCE(in_dev->cnf.data[index], val);
}
static inline void ipv4_devconf_setall(struct in_device *in_dev)
@@ -81,18 +83,18 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val))
#define IN_DEV_ANDCONF(in_dev, attr) \
- (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \
+ (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \
IN_DEV_CONF_GET((in_dev), attr))
#define IN_DEV_NET_ORCONF(in_dev, net, attr) \
- (IPV4_DEVCONF_ALL(net, attr) || \
+ (IPV4_DEVCONF_ALL_RO(net, attr) || \
IN_DEV_CONF_GET((in_dev), attr))
#define IN_DEV_ORCONF(in_dev, attr) \
IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr)
#define IN_DEV_MAXCONF(in_dev, attr) \
- (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \
+ (max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \
IN_DEV_CONF_GET((in_dev), attr)))
#define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING)
diff --git a/include/linux/integrity.h b/include/linux/integrity.h
index 2ea0f2f65ab6..459b79683783 100644
--- a/include/linux/integrity.h
+++ b/include/linux/integrity.h
@@ -19,40 +19,13 @@ enum integrity_status {
INTEGRITY_UNKNOWN,
};
-/* List of EVM protected security xattrs */
#ifdef CONFIG_INTEGRITY
-extern struct integrity_iint_cache *integrity_inode_get(struct inode *inode);
-extern void integrity_inode_free(struct inode *inode);
extern void __init integrity_load_keys(void);
#else
-static inline struct integrity_iint_cache *
- integrity_inode_get(struct inode *inode)
-{
- return NULL;
-}
-
-static inline void integrity_inode_free(struct inode *inode)
-{
- return;
-}
-
static inline void integrity_load_keys(void)
{
}
#endif /* CONFIG_INTEGRITY */
-#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
-
-extern int integrity_kernel_module_request(char *kmod_name);
-
-#else
-
-static inline int integrity_kernel_module_request(char *kmod_name)
-{
- return 0;
-}
-
-#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */
-
#endif /* _LINUX_INTEGRITY_H */
diff --git a/include/linux/io.h b/include/linux/io.h
index 7304f2a69960..235ba7d80a8f 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -23,12 +23,19 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
#ifdef CONFIG_MMU
int ioremap_page_range(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot);
+int vmap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot);
#else
static inline int ioremap_page_range(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot)
{
return 0;
}
+static inline int vmap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
+{
+ return 0;
+}
#endif
/*
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 854ad67a5f70..e24893625085 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -2,6 +2,7 @@
#define IO_URING_TYPES_H
#include <linux/blkdev.h>
+#include <linux/hashtable.h>
#include <linux/task_work.h>
#include <linux/bitmap.h>
#include <linux/llist.h>
@@ -240,12 +241,14 @@ struct io_ring_ctx {
unsigned int poll_activated: 1;
unsigned int drain_disabled: 1;
unsigned int compat: 1;
+ unsigned int iowq_limits_set : 1;
struct task_struct *submitter_task;
struct io_rings *rings;
struct percpu_ref refs;
enum task_work_notify_mode notify_method;
+ unsigned sq_thread_idle;
} ____cacheline_aligned_in_smp;
/* submission data */
@@ -274,10 +277,20 @@ struct io_ring_ctx {
*/
struct io_rsrc_node *rsrc_node;
atomic_t cancel_seq;
+
+ /*
+ * ->iopoll_list is protected by the ctx->uring_lock for
+ * io_uring instances that don't use IORING_SETUP_SQPOLL.
+ * For SQPOLL, only the single threaded io_sq_thread() will
+ * manipulate the list, hence no extra locking is needed there.
+ */
+ bool poll_multi_queue;
+ struct io_wq_work_list iopoll_list;
+
struct io_file_table file_table;
+ struct io_mapped_ubuf **user_bufs;
unsigned nr_user_files;
unsigned nr_user_bufs;
- struct io_mapped_ubuf **user_bufs;
struct io_submit_state submit_state;
@@ -289,15 +302,6 @@ struct io_ring_ctx {
struct io_alloc_cache netmsg_cache;
/*
- * ->iopoll_list is protected by the ctx->uring_lock for
- * io_uring instances that don't use IORING_SETUP_SQPOLL.
- * For SQPOLL, only the single threaded io_sq_thread() will
- * manipulate the list, hence no extra locking is needed there.
- */
- struct io_wq_work_list iopoll_list;
- bool poll_multi_queue;
-
- /*
* Any cancelable uring_cmd is added to this list in
* ->uring_cmd() by io_uring_cmd_insert_cancelable()
*/
@@ -343,8 +347,8 @@ struct io_ring_ctx {
spinlock_t completion_lock;
/* IRQ completion list, under ->completion_lock */
- struct io_wq_work_list locked_free_list;
unsigned int locked_free_nr;
+ struct io_wq_work_list locked_free_list;
struct list_head io_buffers_comp;
struct list_head cq_overflow_list;
@@ -366,9 +370,6 @@ struct io_ring_ctx {
unsigned int file_alloc_start;
unsigned int file_alloc_end;
- struct xarray personalities;
- u32 pers_next;
-
struct list_head io_buffers_cache;
/* deferred free list, protected by ->uring_lock */
@@ -389,6 +390,9 @@ struct io_ring_ctx {
struct wait_queue_head rsrc_quiesce_wq;
unsigned rsrc_quiesce;
+ u32 pers_next;
+ struct xarray personalities;
+
/* hashed buffered write serialization */
struct io_wq_hash *hash_map;
@@ -405,11 +409,22 @@ struct io_ring_ctx {
/* io-wq management, e.g. thread count */
u32 iowq_limits[2];
- bool iowq_limits_set;
struct callback_head poll_wq_task_work;
struct list_head defer_list;
- unsigned sq_thread_idle;
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ struct list_head napi_list; /* track busy poll napi_id */
+ spinlock_t napi_lock; /* napi_list lock */
+
+ /* napi busy poll default timeout */
+ unsigned int napi_busy_poll_to;
+ bool napi_prefer_busy_poll;
+ bool napi_enabled;
+
+ DECLARE_HASHTABLE(napi_ht, 4);
+#endif
+
/* protected by ->completion_lock */
unsigned evfd_last_cq_tail;
@@ -455,7 +470,6 @@ enum {
REQ_F_SKIP_LINK_CQES_BIT,
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
- REQ_F_PARTIAL_IO_BIT,
REQ_F_APOLL_MULTISHOT_BIT,
REQ_F_CLEAR_POLLIN_BIT,
REQ_F_HASH_LOCKED_BIT,
@@ -463,75 +477,88 @@ enum {
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
REQ_F_POLL_NO_LAZY_BIT,
+ REQ_F_CANCEL_SEQ_BIT,
+ REQ_F_CAN_POLL_BIT,
+ REQ_F_BL_EMPTY_BIT,
+ REQ_F_BL_NO_RECYCLE_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
};
+typedef u64 __bitwise io_req_flags_t;
+#define IO_REQ_FLAG(bitno) ((__force io_req_flags_t) BIT_ULL((bitno)))
+
enum {
/* ctx owns file */
- REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT),
+ REQ_F_FIXED_FILE = IO_REQ_FLAG(REQ_F_FIXED_FILE_BIT),
/* drain existing IO first */
- REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT),
+ REQ_F_IO_DRAIN = IO_REQ_FLAG(REQ_F_IO_DRAIN_BIT),
/* linked sqes */
- REQ_F_LINK = BIT(REQ_F_LINK_BIT),
+ REQ_F_LINK = IO_REQ_FLAG(REQ_F_LINK_BIT),
/* doesn't sever on completion < 0 */
- REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT),
+ REQ_F_HARDLINK = IO_REQ_FLAG(REQ_F_HARDLINK_BIT),
/* IOSQE_ASYNC */
- REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
+ REQ_F_FORCE_ASYNC = IO_REQ_FLAG(REQ_F_FORCE_ASYNC_BIT),
/* IOSQE_BUFFER_SELECT */
- REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
+ REQ_F_BUFFER_SELECT = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT),
/* IOSQE_CQE_SKIP_SUCCESS */
- REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT),
+ REQ_F_CQE_SKIP = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT),
/* fail rest of links */
- REQ_F_FAIL = BIT(REQ_F_FAIL_BIT),
+ REQ_F_FAIL = IO_REQ_FLAG(REQ_F_FAIL_BIT),
/* on inflight list, should be cancelled and waited on exit reliably */
- REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT),
+ REQ_F_INFLIGHT = IO_REQ_FLAG(REQ_F_INFLIGHT_BIT),
/* read/write uses file position */
- REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
+ REQ_F_CUR_POS = IO_REQ_FLAG(REQ_F_CUR_POS_BIT),
/* must not punt to workers */
- REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
+ REQ_F_NOWAIT = IO_REQ_FLAG(REQ_F_NOWAIT_BIT),
/* has or had linked timeout */
- REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
+ REQ_F_LINK_TIMEOUT = IO_REQ_FLAG(REQ_F_LINK_TIMEOUT_BIT),
/* needs cleanup */
- REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
+ REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
/* already went through poll handler */
- REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
+ REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT),
/* buffer already selected */
- REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
+ REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
/* buffer selected from ring, needs commit */
- REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT),
+ REQ_F_BUFFER_RING = IO_REQ_FLAG(REQ_F_BUFFER_RING_BIT),
/* caller should reissue async */
- REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT),
+ REQ_F_REISSUE = IO_REQ_FLAG(REQ_F_REISSUE_BIT),
/* supports async reads/writes */
- REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT),
+ REQ_F_SUPPORT_NOWAIT = IO_REQ_FLAG(REQ_F_SUPPORT_NOWAIT_BIT),
/* regular file */
- REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
+ REQ_F_ISREG = IO_REQ_FLAG(REQ_F_ISREG_BIT),
/* has creds assigned */
- REQ_F_CREDS = BIT(REQ_F_CREDS_BIT),
+ REQ_F_CREDS = IO_REQ_FLAG(REQ_F_CREDS_BIT),
/* skip refcounting if not set */
- REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT),
+ REQ_F_REFCOUNT = IO_REQ_FLAG(REQ_F_REFCOUNT_BIT),
/* there is a linked timeout that has to be armed */
- REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT),
+ REQ_F_ARM_LTIMEOUT = IO_REQ_FLAG(REQ_F_ARM_LTIMEOUT_BIT),
/* ->async_data allocated */
- REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT),
+ REQ_F_ASYNC_DATA = IO_REQ_FLAG(REQ_F_ASYNC_DATA_BIT),
/* don't post CQEs while failing linked requests */
- REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT),
+ REQ_F_SKIP_LINK_CQES = IO_REQ_FLAG(REQ_F_SKIP_LINK_CQES_BIT),
/* single poll may be active */
- REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT),
+ REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT),
/* double poll may active */
- REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT),
- /* request has already done partial IO */
- REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
+ REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT),
/* fast poll multishot mode */
- REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
+ REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT),
/* recvmsg special flag, clear EPOLLIN */
- REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT),
+ REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT),
/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
- REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT),
+ REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
/* don't use lazy poll wake for this request */
- REQ_F_POLL_NO_LAZY = BIT(REQ_F_POLL_NO_LAZY_BIT),
+ REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
+ /* cancel sequence is set and valid */
+ REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT),
+ /* file is pollable */
+ REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT),
+ /* buffer list was empty after selection of buffer */
+ REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT),
+ /* don't recycle provided buffers for this request */
+ REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT),
};
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
@@ -592,15 +619,17 @@ struct io_kiocb {
* and after selection it points to the buffer ID itself.
*/
u16 buf_index;
- unsigned int flags;
+
+ unsigned nr_tw;
+
+ /* REQ_F_* flags */
+ io_req_flags_t flags;
struct io_cqe cqe;
struct io_ring_ctx *ctx;
struct task_struct *task;
- struct io_rsrc_node *rsrc_node;
-
union {
/* store used ubuf, so we can prevent reloading */
struct io_mapped_ubuf *imu;
@@ -621,10 +650,12 @@ struct io_kiocb {
/* cache ->apoll->events */
__poll_t apoll_events;
};
+
+ struct io_rsrc_node *rsrc_node;
+
atomic_t refs;
atomic_t poll_refs;
struct io_task_work io_task_work;
- unsigned nr_tw;
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
struct hlist_node hash_node;
/* internal polling, see IORING_FEAT_FAST_POLL */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 96dd0acbba44..6fc1c858013d 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -293,22 +293,32 @@ struct iomap_ioend {
struct list_head io_list; /* next ioend in chain */
u16 io_type;
u16 io_flags; /* IOMAP_F_* */
- u32 io_folios; /* folios added to ioend */
struct inode *io_inode; /* file being written to */
size_t io_size; /* size of the extent */
loff_t io_offset; /* offset in the file */
sector_t io_sector; /* start sector of ioend */
- struct bio *io_bio; /* bio being built */
- struct bio io_inline_bio; /* MUST BE LAST! */
+ struct bio io_bio; /* MUST BE LAST! */
};
+static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio)
+{
+ return container_of(bio, struct iomap_ioend, io_bio);
+}
+
struct iomap_writeback_ops {
/*
* Required, maps the blocks so that writeback can be performed on
* the range starting at offset.
+ *
+ * Can return arbitrarily large regions, but we need to call into it at
+ * least once per folio to allow the file systems to synchronize with
+ * the write path that could be invalidating mappings.
+ *
+ * An existing mapping from a previous call to this method can be reused
+ * by the file system if it is still valid.
*/
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
- loff_t offset);
+ loff_t offset, unsigned len);
/*
* Optional, allows the file systems to perform actions just before
@@ -329,6 +339,7 @@ struct iomap_writepage_ctx {
struct iomap iomap;
struct iomap_ioend *ioend;
const struct iomap_writeback_ops *ops;
+ u32 nr_folios; /* folios added to the ioend */
};
void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 1ea2a820e1eb..2e925b5eba53 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -14,7 +14,6 @@
#include <linux/err.h>
#include <linux/of.h>
#include <linux/iova_bitmap.h>
-#include <uapi/linux/iommu.h>
#define IOMMU_READ (1 << 0)
#define IOMMU_WRITE (1 << 1)
@@ -41,8 +40,110 @@ struct iommu_domain_ops;
struct iommu_dirty_ops;
struct notifier_block;
struct iommu_sva;
-struct iommu_fault_event;
struct iommu_dma_cookie;
+struct iommu_fault_param;
+
+#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
+#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
+#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */
+#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */
+
+/* Generic fault types, can be expanded IRQ remapping fault */
+enum iommu_fault_type {
+ IOMMU_FAULT_PAGE_REQ = 1, /* page request fault */
+};
+
+/**
+ * struct iommu_fault_page_request - Page Request data
+ * @flags: encodes whether the corresponding fields are valid and whether this
+ * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values).
+ * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response
+ * must have the same PASID value as the page request. When it is clear,
+ * the page response should not have a PASID.
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
+ * @addr: page address
+ * @private_data: device-specific private information
+ */
+struct iommu_fault_page_request {
+#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0)
+#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
+#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
+#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3)
+ u32 flags;
+ u32 pasid;
+ u32 grpid;
+ u32 perm;
+ u64 addr;
+ u64 private_data[2];
+};
+
+/**
+ * struct iommu_fault - Generic fault data
+ * @type: fault type from &enum iommu_fault_type
+ * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
+ */
+struct iommu_fault {
+ u32 type;
+ struct iommu_fault_page_request prm;
+};
+
+/**
+ * enum iommu_page_response_code - Return status of fault handlers
+ * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
+ * populated, retry the access. This is "Success" in PCI PRI.
+ * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
+ * this device if possible. This is "Response Failure" in PCI PRI.
+ * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
+ * access. This is "Invalid Request" in PCI PRI.
+ */
+enum iommu_page_response_code {
+ IOMMU_PAGE_RESP_SUCCESS = 0,
+ IOMMU_PAGE_RESP_INVALID,
+ IOMMU_PAGE_RESP_FAILURE,
+};
+
+/**
+ * struct iommu_page_response - Generic page response information
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @code: response code from &enum iommu_page_response_code
+ */
+struct iommu_page_response {
+ u32 pasid;
+ u32 grpid;
+ u32 code;
+};
+
+struct iopf_fault {
+ struct iommu_fault fault;
+ /* node for pending lists */
+ struct list_head list;
+};
+
+struct iopf_group {
+ struct iopf_fault last_fault;
+ struct list_head faults;
+ /* list node for iommu_fault_param::faults */
+ struct list_head pending_node;
+ struct work_struct work;
+ struct iommu_domain *domain;
+ /* The device's fault data parameter. */
+ struct iommu_fault_param *fault_param;
+};
+
+/**
+ * struct iopf_queue - IO Page Fault queue
+ * @wq: the fault workqueue
+ * @devices: devices attached to this queue
+ * @lock: protects the device list
+ */
+struct iopf_queue {
+ struct workqueue_struct *wq;
+ struct list_head devices;
+ struct mutex lock;
+};
/* iommu fault flags */
#define IOMMU_FAULT_READ 0x0
@@ -50,7 +151,6 @@ struct iommu_dma_cookie;
typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
struct device *, unsigned long, int, void *);
-typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *);
struct iommu_domain_geometry {
dma_addr_t aperture_start; /* First address that can be mapped */
@@ -110,8 +210,7 @@ struct iommu_domain {
unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
- enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
- void *data);
+ int (*iopf_handler)(struct iopf_group *group);
void *fault_data;
union {
struct {
@@ -468,16 +567,15 @@ struct iommu_ops {
/* Request/Free a list of reserved regions for a device */
void (*get_resv_regions)(struct device *dev, struct list_head *list);
- int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
+ int (*of_xlate)(struct device *dev, const struct of_phandle_args *args);
bool (*is_attach_deferred)(struct device *dev);
/* Per device IOMMU features */
int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
- int (*page_response)(struct device *dev,
- struct iommu_fault_event *evt,
- struct iommu_page_response *msg);
+ void (*page_response)(struct device *dev, struct iopf_fault *evt,
+ struct iommu_page_response *msg);
int (*def_domain_type)(struct device *dev);
void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid);
@@ -487,6 +585,7 @@ struct iommu_ops {
struct module *owner;
struct iommu_domain *identity_domain;
struct iommu_domain *blocked_domain;
+ struct iommu_domain *release_domain;
struct iommu_domain *default_domain;
};
@@ -578,38 +677,34 @@ struct iommu_device {
};
/**
- * struct iommu_fault_event - Generic fault event
- *
- * Can represent recoverable faults such as a page requests or
- * unrecoverable faults such as DMA or IRQ remapping faults.
- *
- * @fault: fault descriptor
- * @list: pending fault event list, used for tracking responses
- */
-struct iommu_fault_event {
- struct iommu_fault fault;
- struct list_head list;
-};
-
-/**
* struct iommu_fault_param - per-device IOMMU fault data
- * @handler: Callback function to handle IOMMU faults at device level
- * @data: handler private data
- * @faults: holds the pending faults which needs response
* @lock: protect pending faults list
+ * @users: user counter to manage the lifetime of the data
+ * @rcu: rcu head for kfree_rcu()
+ * @dev: the device that owns this param
+ * @queue: IOPF queue
+ * @queue_list: index into queue->devices
+ * @partial: faults that are part of a Page Request Group for which the last
+ * request hasn't been submitted yet.
+ * @faults: holds the pending faults which need response
*/
struct iommu_fault_param {
- iommu_dev_fault_handler_t handler;
- void *data;
- struct list_head faults;
struct mutex lock;
+ refcount_t users;
+ struct rcu_head rcu;
+
+ struct device *dev;
+ struct iopf_queue *queue;
+ struct list_head queue_list;
+
+ struct list_head partial;
+ struct list_head faults;
};
/**
* struct dev_iommu - Collection of per-device IOMMU data
*
* @fault_param: IOMMU detected device fault reporting data
- * @iopf_param: I/O Page Fault queue and data
* @fwspec: IOMMU fwspec data
* @iommu_dev: IOMMU device this device is linked to
* @priv: IOMMU Driver private data
@@ -624,8 +719,7 @@ struct iommu_fault_param {
*/
struct dev_iommu {
struct mutex lock;
- struct iommu_fault_param *fault_param;
- struct iopf_device_param *iopf_param;
+ struct iommu_fault_param __rcu *fault_param;
struct iommu_fwspec *fwspec;
struct iommu_device *iommu_dev;
void *priv;
@@ -654,6 +748,22 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
return (struct iommu_device *)dev_get_drvdata(dev);
}
+/**
+ * iommu_get_iommu_dev - Get iommu_device for a device
+ * @dev: an end-point device
+ *
+ * Note that this function must be called from the iommu_ops
+ * to retrieve the iommu_device for a device, which the core code
+ * guarentees it will not invoke the op without an attached iommu.
+ */
+static inline struct iommu_device *__iommu_get_iommu_dev(struct device *dev)
+{
+ return dev->iommu->iommu_dev;
+}
+
+#define iommu_get_iommu_dev(dev, type, member) \
+ container_of(__iommu_get_iommu_dev(dev), type, member)
+
static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
{
*gather = (struct iommu_iotlb_gather) {
@@ -719,16 +829,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data,
extern struct iommu_group *iommu_group_get(struct device *dev);
extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group);
extern void iommu_group_put(struct iommu_group *group);
-extern int iommu_register_device_fault_handler(struct device *dev,
- iommu_dev_fault_handler_t handler,
- void *data);
-
-extern int iommu_unregister_device_fault_handler(struct device *dev);
-
-extern int iommu_report_device_fault(struct device *dev,
- struct iommu_fault_event *evt);
-extern int iommu_page_response(struct device *dev,
- struct iommu_page_response *msg);
extern int iommu_group_id(struct iommu_group *group);
extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *);
@@ -892,18 +992,21 @@ struct iommu_fwspec {
struct iommu_sva {
struct device *dev;
struct iommu_domain *domain;
+ struct list_head handle_item;
+ refcount_t users;
};
struct iommu_mm_data {
u32 pasid;
struct list_head sva_domains;
+ struct list_head sva_handles;
};
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
const struct iommu_ops *ops);
void iommu_fwspec_free(struct device *dev);
-int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
-const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode);
+int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids);
+const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode);
static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
{
@@ -945,8 +1048,6 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group);
int iommu_device_claim_dma_owner(struct device *dev, void *owner);
void iommu_device_release_dma_owner(struct device *dev);
-struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
- struct mm_struct *mm);
int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
void iommu_detach_device_pasid(struct iommu_domain *domain,
@@ -1135,31 +1236,6 @@ static inline void iommu_group_put(struct iommu_group *group)
{
}
-static inline
-int iommu_register_device_fault_handler(struct device *dev,
- iommu_dev_fault_handler_t handler,
- void *data)
-{
- return -ENODEV;
-}
-
-static inline int iommu_unregister_device_fault_handler(struct device *dev)
-{
- return 0;
-}
-
-static inline
-int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
-{
- return -ENODEV;
-}
-
-static inline int iommu_page_response(struct device *dev,
- struct iommu_page_response *msg)
-{
- return -ENODEV;
-}
-
static inline int iommu_group_id(struct iommu_group *group)
{
return -ENODEV;
@@ -1253,7 +1329,7 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids,
}
static inline
-const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
+const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
{
return NULL;
}
@@ -1308,12 +1384,6 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
return -ENODEV;
}
-static inline struct iommu_domain *
-iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
-{
- return NULL;
-}
-
static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
@@ -1340,6 +1410,14 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev)
static inline void iommu_free_global_pasid(ioasid_t pasid) {}
#endif /* CONFIG_IOMMU_API */
+#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API)
+void iommu_group_mutex_assert(struct device *dev);
+#else
+static inline void iommu_group_mutex_assert(struct device *dev)
+{
+}
+#endif
+
/**
* iommu_map_sgtable - Map the given buffer to the IOMMU domain
* @domain: The IOMMU domain to perform the mapping
@@ -1453,6 +1531,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
struct mm_struct *mm);
void iommu_sva_unbind_device(struct iommu_sva *handle);
u32 iommu_sva_get_pasid(struct iommu_sva *handle);
+struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
+ struct mm_struct *mm);
#else
static inline struct iommu_sva *
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
@@ -1477,6 +1557,68 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm)
}
static inline void mm_pasid_drop(struct mm_struct *mm) {}
+
+static inline struct iommu_domain *
+iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
+{
+ return NULL;
+}
#endif /* CONFIG_IOMMU_SVA */
+#ifdef CONFIG_IOMMU_IOPF
+int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
+void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev);
+int iopf_queue_flush_dev(struct device *dev);
+struct iopf_queue *iopf_queue_alloc(const char *name);
+void iopf_queue_free(struct iopf_queue *queue);
+int iopf_queue_discard_partial(struct iopf_queue *queue);
+void iopf_free_group(struct iopf_group *group);
+void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
+void iopf_group_response(struct iopf_group *group,
+ enum iommu_page_response_code status);
+#else
+static inline int
+iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
+{
+ return -ENODEV;
+}
+
+static inline void
+iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
+{
+}
+
+static inline int iopf_queue_flush_dev(struct device *dev)
+{
+ return -ENODEV;
+}
+
+static inline struct iopf_queue *iopf_queue_alloc(const char *name)
+{
+ return NULL;
+}
+
+static inline void iopf_queue_free(struct iopf_queue *queue)
+{
+}
+
+static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
+{
+ return -ENODEV;
+}
+
+static inline void iopf_free_group(struct iopf_group *group)
+{
+}
+
+static inline void
+iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
+{
+}
+
+static inline void iopf_group_response(struct iopf_group *group,
+ enum iommu_page_response_code status)
+{
+}
+#endif /* CONFIG_IOMMU_IOPF */
#endif /* __LINUX_IOMMU_H */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5e605e384aac..383a0ea2ab91 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -3,6 +3,7 @@
#define _IPV6_H
#include <uapi/linux/ipv6.h>
+#include <linux/cache.h>
#define ipv6_optlen(p) (((p)->hdrlen+1) << 3)
#define ipv6_authlen(p) (((p)->hdrlen+2) << 2)
@@ -10,9 +11,16 @@
* This structure contains configuration options per IPv6 link.
*/
struct ipv6_devconf {
- __s32 forwarding;
+ /* RX & TX fastpath fields. */
+ __cacheline_group_begin(ipv6_devconf_read_txrx);
+ __s32 disable_ipv6;
__s32 hop_limit;
__s32 mtu6;
+ __s32 forwarding;
+ __s32 disable_policy;
+ __s32 proxy_ndp;
+ __cacheline_group_end(ipv6_devconf_read_txrx);
+
__s32 accept_ra;
__s32 accept_redirects;
__s32 autoconf;
@@ -27,6 +35,7 @@ struct ipv6_devconf {
__s32 use_tempaddr;
__s32 temp_valid_lft;
__s32 temp_prefered_lft;
+ __s32 regen_min_advance;
__s32 regen_max_retry;
__s32 max_desync_factor;
__s32 max_addresses;
@@ -44,7 +53,6 @@ struct ipv6_devconf {
__s32 accept_ra_rt_info_max_plen;
#endif
#endif
- __s32 proxy_ndp;
__s32 accept_source_route;
__s32 accept_ra_from_local;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -54,7 +62,6 @@ struct ipv6_devconf {
#ifdef CONFIG_IPV6_MROUTE
atomic_t mc_forwarding;
#endif
- __s32 disable_ipv6;
__s32 drop_unicast_in_l2_multicast;
__s32 accept_dad;
__s32 force_tllao;
@@ -75,7 +82,6 @@ struct ipv6_devconf {
#endif
__u32 enhanced_dad;
__u32 addr_gen_mode;
- __s32 disable_policy;
__s32 ndisc_tclass;
__s32 rpl_seg_enabled;
__u32 ioam6_id;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 90081afa10ce..97baa937ab5b 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -179,7 +179,7 @@ struct irq_common_data {
struct irq_data {
u32 mask;
unsigned int irq;
- unsigned long hwirq;
+ irq_hw_number_t hwirq;
struct irq_common_data *common;
struct irq_chip *chip;
struct irq_domain *domain;
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index ee0a82c60508..21ecf582a0fe 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -619,6 +619,23 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+#ifdef CONFIG_GENERIC_MSI_IRQ
+int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
+ unsigned int type);
+void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq);
+#else
+static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
+ unsigned int type)
+{
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
+{
+ WARN_ON_ONCE(1);
+}
+#endif
+
#else /* CONFIG_IRQ_DOMAIN */
static inline void irq_dispose_mapping(unsigned int virq) { }
static inline struct irq_domain *irq_find_matching_fwnode(
diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h
index c29921fd8cd1..5c1fe6f1fcde 100644
--- a/include/linux/irqdomain_defs.h
+++ b/include/linux/irqdomain_defs.h
@@ -26,6 +26,8 @@ enum irq_domain_bus_token {
DOMAIN_BUS_DMAR,
DOMAIN_BUS_AMDVI,
DOMAIN_BUS_PCI_DEVICE_IMS,
+ DOMAIN_BUS_DEVICE_MSI,
+ DOMAIN_BUS_WIRED_TO_MSI,
};
#endif /* _LINUX_IRQDOMAIN_DEFS_H */
diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h
index c30f454a9518..72dd1eb3a0e7 100644
--- a/include/linux/irqhandler.h
+++ b/include/linux/irqhandler.h
@@ -8,7 +8,7 @@
*/
struct irq_desc;
-struct irq_data;
+
typedef void (*irq_flow_handler_t)(struct irq_desc *desc);
#endif
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index e0ae2a43e0eb..d9f1435a5a13 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -102,12 +102,15 @@ static inline u64 get_jiffies_64(void)
}
#endif
-/*
- * These inlines deal with timer wrapping correctly. You are
- * strongly encouraged to use them:
- * 1. Because people otherwise forget
- * 2. Because if the timer wrap changes in future you won't have to
- * alter your driver code.
+/**
+ * DOC: General information about time_* inlines
+ *
+ * These inlines deal with timer wrapping correctly. You are strongly encouraged
+ * to use them:
+ *
+ * #. Because people otherwise forget
+ * #. Because if the timer wrap changes in future you won't have to alter your
+ * driver code.
*/
/**
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index dbb06d789e74..70d6a8f6e25d 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -429,7 +429,6 @@ struct kasan_cache {
};
size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object);
-slab_flags_t kasan_never_merge(void);
void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
slab_flags_t *flags);
@@ -446,11 +445,6 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache,
{
return 0;
}
-/* And thus nothing prevents cache merging. */
-static inline slab_flags_t kasan_never_merge(void)
-{
- return 0;
-}
/* And no cache-related metadata initialization is required. */
static inline void kasan_cache_create(struct kmem_cache *cache,
unsigned int *size,
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d9ad21058eed..d718fbec72dd 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -33,20 +33,14 @@
#include <linux/sprintf.h>
#include <linux/static_call_types.h>
#include <linux/instruction_pointer.h>
+#include <linux/wordpart.h>
+
#include <asm/byteorder.h>
#include <uapi/linux/kernel.h>
#define STACK_MAGIC 0xdeadbeef
-/**
- * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value
- * @x: value to repeat
- *
- * NOTE: @x is not checked for > 0xff; larger values produce odd results.
- */
-#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
-
/* generic data direction definitions */
#define READ 0
#define WRITE 1
@@ -60,34 +54,6 @@
} \
)
-/**
- * upper_32_bits - return bits 32-63 of a number
- * @n: the number we're accessing
- *
- * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
- * the "right shift count >= width of type" warning when that quantity is
- * 32-bits.
- */
-#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16))
-
-/**
- * lower_32_bits - return bits 0-31 of a number
- * @n: the number we're accessing
- */
-#define lower_32_bits(n) ((u32)((n) & 0xffffffff))
-
-/**
- * upper_16_bits - return bits 16-31 of a number
- * @n: the number we're accessing
- */
-#define upper_16_bits(n) ((u16)((n) >> 16))
-
-/**
- * lower_16_bits - return bits 0-15 of a number
- * @n: the number we're accessing
- */
-#define lower_16_bits(n) ((u16)((n) & 0xffff))
-
struct completion;
struct user;
@@ -199,12 +165,6 @@ static inline void might_fault(void) { }
void do_exit(long error_code) __noreturn;
-extern int get_option(char **str, int *pint);
-extern char *get_options(const char *str, int nints, int *ints);
-extern unsigned long long memparse(const char *ptr, char **retptr);
-extern bool parse_option_str(const char *str, const char *option);
-extern char *next_arg(char *args, char **param, char **val);
-
extern int core_kernel_text(unsigned long addr);
extern int __kernel_text_address(unsigned long addr);
extern int kernel_text_address(unsigned long addr);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7e7fd25b09b3..179df96b20f8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2031,6 +2031,32 @@ static inline int mmu_invalidate_retry_gfn(struct kvm *kvm,
return 1;
return 0;
}
+
+/*
+ * This lockless version of the range-based retry check *must* be paired with a
+ * call to the locked version after acquiring mmu_lock, i.e. this is safe to
+ * use only as a pre-check to avoid contending mmu_lock. This version *will*
+ * get false negatives and false positives.
+ */
+static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm,
+ unsigned long mmu_seq,
+ gfn_t gfn)
+{
+ /*
+ * Use READ_ONCE() to ensure the in-progress flag and sequence counter
+ * are always read from memory, e.g. so that checking for retry in a
+ * loop won't result in an infinite retry loop. Don't force loads for
+ * start+end, as the key to avoiding infinite retry loops is observing
+ * the 1=>0 transition of in-progress, i.e. getting false negatives
+ * due to stale start+end values is acceptable.
+ */
+ if (unlikely(READ_ONCE(kvm->mmu_invalidate_in_progress)) &&
+ gfn >= kvm->mmu_invalidate_range_start &&
+ gfn < kvm->mmu_invalidate_range_end)
+ return true;
+
+ return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq;
+}
#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 1dbb14daccfa..26d68115afb8 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -471,7 +471,7 @@ enum ata_completion_errors {
/*
* Link power management policy: If you alter this, you also need to
- * alter libata-scsi.c (for the ascii descriptions)
+ * alter libata-sata.c (for the ascii descriptions)
*/
enum ata_lpm_policy {
ATA_LPM_UNKNOWN,
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 9f565416d186..1b95fe31051f 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -375,12 +375,12 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
static inline int nlm_compare_locks(const struct file_lock *fl1,
const struct file_lock *fl2)
{
- return file_inode(fl1->fl_file) == file_inode(fl2->fl_file)
- && fl1->fl_pid == fl2->fl_pid
- && fl1->fl_owner == fl2->fl_owner
+ return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file)
+ && fl1->c.flc_pid == fl2->c.flc_pid
+ && fl1->c.flc_owner == fl2->c.flc_owner
&& fl1->fl_start == fl2->fl_start
&& fl1->fl_end == fl2->fl_end
- &&(fl1->fl_type == fl2->fl_type || fl2->fl_type == F_UNLCK);
+ &&(fl1->c.flc_type == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK);
}
extern const struct lock_manager_operations nlmsvc_lock_operations;
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index b60fbcd8cdfa..80cca9426761 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -52,7 +52,7 @@ struct nlm_lock {
* FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to
* 32 bytes.
*/
-
+
struct nlm_cookie
{
unsigned char data[NLM_MAXCOOKIELEN];
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 185924c56378..a8057a3f8de6 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -94,6 +94,8 @@ LSM_HOOK(int, 0, path_mkdir, const struct path *dir, struct dentry *dentry,
LSM_HOOK(int, 0, path_rmdir, const struct path *dir, struct dentry *dentry)
LSM_HOOK(int, 0, path_mknod, const struct path *dir, struct dentry *dentry,
umode_t mode, unsigned int dev)
+LSM_HOOK(void, LSM_RET_VOID, path_post_mknod, struct mnt_idmap *idmap,
+ struct dentry *dentry)
LSM_HOOK(int, 0, path_truncate, const struct path *path)
LSM_HOOK(int, 0, path_symlink, const struct path *dir, struct dentry *dentry,
const char *old_name)
@@ -119,6 +121,8 @@ LSM_HOOK(int, 0, inode_init_security_anon, struct inode *inode,
const struct qstr *name, const struct inode *context_inode)
LSM_HOOK(int, 0, inode_create, struct inode *dir, struct dentry *dentry,
umode_t mode)
+LSM_HOOK(void, LSM_RET_VOID, inode_post_create_tmpfile, struct mnt_idmap *idmap,
+ struct inode *inode)
LSM_HOOK(int, 0, inode_link, struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry)
LSM_HOOK(int, 0, inode_unlink, struct inode *dir, struct dentry *dentry)
@@ -135,7 +139,10 @@ LSM_HOOK(int, 0, inode_readlink, struct dentry *dentry)
LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode,
bool rcu)
LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask)
-LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr)
+LSM_HOOK(int, 0, inode_setattr, struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
+LSM_HOOK(void, LSM_RET_VOID, inode_post_setattr, struct mnt_idmap *idmap,
+ struct dentry *dentry, int ia_valid)
LSM_HOOK(int, 0, inode_getattr, const struct path *path)
LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap,
struct dentry *dentry, const char *name, const void *value,
@@ -146,12 +153,18 @@ LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name)
LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry)
LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap,
struct dentry *dentry, const char *name)
+LSM_HOOK(void, LSM_RET_VOID, inode_post_removexattr, struct dentry *dentry,
+ const char *name)
LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name, struct posix_acl *kacl)
+LSM_HOOK(void, LSM_RET_VOID, inode_post_set_acl, struct dentry *dentry,
+ const char *acl_name, struct posix_acl *kacl)
LSM_HOOK(int, 0, inode_get_acl, struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
LSM_HOOK(int, 0, inode_remove_acl, struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
+LSM_HOOK(void, LSM_RET_VOID, inode_post_remove_acl, struct mnt_idmap *idmap,
+ struct dentry *dentry, const char *acl_name)
LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry)
LSM_HOOK(int, 0, inode_killpriv, struct mnt_idmap *idmap,
struct dentry *dentry)
@@ -168,6 +181,7 @@ LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir,
struct kernfs_node *kn)
LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
LSM_HOOK(int, 0, file_alloc_security, struct file *file)
+LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file)
LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
unsigned long arg)
@@ -186,6 +200,7 @@ LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk,
struct fown_struct *fown, int sig)
LSM_HOOK(int, 0, file_receive, struct file *file)
LSM_HOOK(int, 0, file_open, struct file *file)
+LSM_HOOK(int, 0, file_post_open, struct file *file, int mask)
LSM_HOOK(int, 0, file_truncate, struct file *file)
LSM_HOOK(int, 0, task_alloc, struct task_struct *task,
unsigned long clone_flags)
@@ -315,9 +330,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname)
LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname)
LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how)
LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb)
-LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock,
+LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock,
sockptr_t optval, sockptr_t optlen, unsigned int len)
-LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock,
+LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock,
struct sk_buff *skb, u32 *secid)
LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority)
LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk)
@@ -390,6 +405,9 @@ LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key)
LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred,
enum key_need_perm need_perm)
LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer)
+LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring,
+ struct key *key, const void *payload, size_t payload_len,
+ unsigned long flags, bool create)
#endif /* CONFIG_KEYS */
#ifdef CONFIG_AUDIT
@@ -404,10 +422,17 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule)
LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size)
LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode)
LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog)
-LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map)
-LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map)
-LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux)
-LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux)
+LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token)
+LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map)
+LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token)
+LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
+LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr,
+ struct path *path)
+LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token)
+LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd)
+LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap)
#endif /* CONFIG_BPF_SYSCALL */
LSM_HOOK(int, 0, locked_down, enum lockdown_reason what)
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index b3d63123b945..a53ad4dabd7e 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -171,6 +171,7 @@ enum maple_type {
#define MT_FLAGS_LOCK_IRQ 0x100
#define MT_FLAGS_LOCK_BH 0x200
#define MT_FLAGS_LOCK_EXTERN 0x300
+#define MT_FLAGS_ALLOC_WRAPPED 0x0800
#define MAPLE_HEIGHT_MAX 31
@@ -319,6 +320,9 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first,
int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
void *entry, unsigned long size, unsigned long min,
unsigned long max, gfp_t gfp);
+int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp);
int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
void *entry, unsigned long size, unsigned long min,
unsigned long max, gfp_t gfp);
@@ -499,6 +503,9 @@ void *mas_find_range(struct ma_state *mas, unsigned long max);
void *mas_find_rev(struct ma_state *mas, unsigned long min);
void *mas_find_range_rev(struct ma_state *mas, unsigned long max);
int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp);
+int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp);
bool mas_nomem(struct ma_state *mas, gfp_t gfp);
void mas_pause(struct ma_state *mas);
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 9b54c4f0677f..693eba9869e4 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -26,6 +26,7 @@
#define MARVELL_PHY_ID_88E2110 0x002b09b0
#define MARVELL_PHY_ID_88X2222 0x01410f10
#define MARVELL_PHY_ID_88Q2110 0x002b0980
+#define MARVELL_PHY_ID_88Q2220 0x002b0b20
/* Marvel 88E1111 in Finisar SFP module with modified PHY ID */
#define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 79ceee3c8673..68f8d2e970d4 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -373,6 +373,10 @@ static inline void mii_t1_adv_m_mod_linkmode_t(unsigned long *advertising, u32 l
{
linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT,
advertising, lpa & MDIO_AN_T1_ADV_M_B10L);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT,
+ advertising, lpa & MDIO_AN_T1_ADV_M_100BT1);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT,
+ advertising, lpa & MDIO_AN_T1_ADV_M_1000BT1);
}
/**
@@ -409,6 +413,10 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising)
if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising))
result |= MDIO_AN_T1_ADV_M_B10L;
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, advertising))
+ result |= MDIO_AN_T1_ADV_M_100BT1;
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, advertising))
+ result |= MDIO_AN_T1_ADV_M_1000BT1;
return result;
}
@@ -440,6 +448,42 @@ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val)
}
/**
+ * mii_eee_cap2_mod_linkmode_sup_t()
+ * @adv: target the linkmode settings
+ * @val: register value
+ *
+ * A function that translates value of following registers to the linkmode:
+ * IEEE 802.3-2022 45.2.3.11 "EEE control and capability 2" register (3.21)
+ */
+static inline void mii_eee_cap2_mod_linkmode_sup_t(unsigned long *adv, u32 val)
+{
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+ adv, val & MDIO_EEE_2_5GT);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+ adv, val & MDIO_EEE_5GT);
+}
+
+/**
+ * mii_eee_cap2_mod_linkmode_adv_t()
+ * @adv: target the linkmode advertisement settings
+ * @val: register value
+ *
+ * A function that translates value of following registers to the linkmode:
+ * IEEE 802.3-2022 45.2.7.16 "EEE advertisement 2" register (7.62)
+ * IEEE 802.3-2022 45.2.7.17 "EEE link partner ability 2" register (7.63)
+ * Note: Currently this function is the same as mii_eee_cap2_mod_linkmode_sup_t.
+ * For certain, not yet supported, modes however the bits differ.
+ * Therefore create separate functions already.
+ */
+static inline void mii_eee_cap2_mod_linkmode_adv_t(unsigned long *adv, u32 val)
+{
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+ adv, val & MDIO_EEE_2_5GT);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+ adv, val & MDIO_EEE_5GT);
+}
+
+/**
* linkmode_to_mii_eee_cap1_t()
* @adv: the linkmode advertisement settings
*
@@ -467,6 +511,25 @@ static inline u32 linkmode_to_mii_eee_cap1_t(unsigned long *adv)
}
/**
+ * linkmode_to_mii_eee_cap2_t()
+ * @adv: the linkmode advertisement settings
+ *
+ * A function that translates linkmode to value for IEEE 802.3-2022 45.2.7.16
+ * "EEE advertisement 2" register (7.62)
+ */
+static inline u32 linkmode_to_mii_eee_cap2_t(unsigned long *adv)
+{
+ u32 result = 0;
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, adv))
+ result |= MDIO_EEE_2_5GT;
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, adv))
+ result |= MDIO_EEE_5GT;
+
+ return result;
+}
+
+/**
* mii_10base_t1_adv_mod_linkmode_t()
* @adv: linkmode advertisement settings
* @val: register value
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b695f9e946da..e2082240586d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -121,6 +121,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
int memblock_physmem_add(phys_addr_t base, phys_addr_t size);
#endif
void memblock_trim_memory(phys_addr_t align);
+unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
+ phys_addr_t base2, phys_addr_t size2);
bool memblock_overlaps_region(struct memblock_type *type,
phys_addr_t base, phys_addr_t size);
bool memblock_validate_numa_coverage(unsigned long threshold_bytes);
diff --git a/include/linux/mfd/idtRC38xxx_reg.h b/include/linux/mfd/idtRC38xxx_reg.h
new file mode 100644
index 000000000000..ec11872f51ad
--- /dev/null
+++ b/include/linux/mfd/idtRC38xxx_reg.h
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Register Map - Based on PolarBear_CSRs.RevA.xlsx (2023-04-21)
+ *
+ * Copyright (C) 2023 Integrated Device Technology, Inc., a Renesas Company.
+ */
+#ifndef MFD_IDTRC38XXX_REG
+#define MFD_IDTRC38XXX_REG
+
+/* GLOBAL */
+#define SOFT_RESET_CTRL (0x15) /* Specific to FC3W */
+#define MISC_CTRL (0x14) /* Specific to FC3A */
+#define APLL_REINIT BIT(1)
+#define APLL_REINIT_VFC3A BIT(2)
+
+#define DEVICE_ID (0x2)
+#define DEVICE_ID_MASK (0x1000) /* Bit 12 is 1 if FC3W and 0 if FC3A */
+#define DEVICE_ID_SHIFT (12)
+
+/* FOD */
+#define FOD_0 (0x300)
+#define FOD_0_VFC3A (0x400)
+#define FOD_1 (0x340)
+#define FOD_1_VFC3A (0x440)
+#define FOD_2 (0x380)
+#define FOD_2_VFC3A (0x480)
+
+/* TDCAPLL */
+#define TDC_CTRL (0x44a) /* Specific to FC3W */
+#define TDC_ENABLE_CTRL (0x169) /* Specific to FC3A */
+#define TDC_DAC_CAL_CTRL (0x16a) /* Specific to FC3A */
+#define TDC_EN BIT(0)
+#define TDC_DAC_RECAL_REQ BIT(1)
+#define TDC_DAC_RECAL_REQ_VFC3A BIT(0)
+
+#define TDC_FB_DIV_INT_CNFG (0x442)
+#define TDC_FB_DIV_INT_CNFG_VFC3A (0x162)
+#define TDC_FB_DIV_INT_MASK GENMASK(7, 0)
+#define TDC_REF_DIV_CNFG (0x443)
+#define TDC_REF_DIV_CNFG_VFC3A (0x163)
+#define TDC_REF_DIV_CONFIG_MASK GENMASK(2, 0)
+
+/* TIME SYNC CHANNEL */
+#define TIME_CLOCK_SRC (0xa01) /* Specific to FC3W */
+#define TIME_CLOCK_COUNT (0xa00) /* Specific to FC3W */
+#define TIME_CLOCK_COUNT_MASK GENMASK(5, 0)
+
+#define SUB_SYNC_GEN_CNFG (0xa04)
+
+#define TOD_COUNTER_READ_REQ (0xa5f)
+#define TOD_COUNTER_READ_REQ_VFC3A (0x6df)
+#define TOD_SYNC_LOAD_VAL_CTRL (0xa10)
+#define TOD_SYNC_LOAD_VAL_CTRL_VFC3A (0x690)
+#define SYNC_COUNTER_MASK GENMASK_ULL(51, 0)
+#define SUB_SYNC_COUNTER_MASK GENMASK(30, 0)
+#define TOD_SYNC_LOAD_REQ_CTRL (0xa21)
+#define TOD_SYNC_LOAD_REQ_CTRL_VFC3A (0x6a1)
+#define SYNC_LOAD_ENABLE BIT(1)
+#define SUB_SYNC_LOAD_ENABLE BIT(0)
+#define SYNC_LOAD_REQ BIT(0)
+
+#define LPF_MODE_CNFG (0xa80)
+#define LPF_MODE_CNFG_VFC3A (0x700)
+enum lpf_mode {
+ LPF_DISABLED = 0,
+ LPF_WP = 1,
+ LPF_HOLDOVER = 2,
+ LPF_WF = 3,
+ LPF_INVALID = 4
+};
+#define LPF_CTRL (0xa98)
+#define LPF_CTRL_VFC3A (0x718)
+#define LPF_EN BIT(0)
+
+#define LPF_BW_CNFG (0xa81)
+#define LPF_BW_SHIFT GENMASK(7, 3)
+#define LPF_BW_MULT GENMASK(2, 0)
+#define LPF_BW_SHIFT_DEFAULT (0xb)
+#define LPF_BW_MULT_DEFAULT (0x0)
+#define LPF_BW_SHIFT_1PPS (0x5)
+
+#define LPF_WR_PHASE_CTRL (0xaa8)
+#define LPF_WR_PHASE_CTRL_VFC3A (0x728)
+#define LPF_WR_FREQ_CTRL (0xab0)
+#define LPF_WR_FREQ_CTRL_VFC3A (0x730)
+
+#define TIME_CLOCK_TDC_FANOUT_CNFG (0xB00)
+#define TIME_SYNC_TO_TDC_EN BIT(0)
+#define SIG1_MUX_SEL_MASK GENMASK(7, 4)
+#define SIG2_MUX_SEL_MASK GENMASK(11, 8)
+enum tdc_mux_sel {
+ REF0 = 0,
+ REF1 = 1,
+ REF2 = 2,
+ REF3 = 3,
+ REF_CLK5 = 4,
+ REF_CLK6 = 5,
+ DPLL_FB_TO_TDC = 6,
+ DPLL_FB_DIVIDED_TO_TDC = 7,
+ TIME_CLK_DIVIDED = 8,
+ TIME_SYNC = 9,
+};
+
+#define TIME_CLOCK_MEAS_CNFG (0xB04)
+#define TDC_MEAS_MODE BIT(0)
+enum tdc_meas_mode {
+ CONTINUOUS = 0,
+ ONE_SHOT = 1,
+ MEAS_MODE_INVALID = 2,
+};
+
+#define TIME_CLOCK_MEAS_DIV_CNFG (0xB08)
+#define TIME_REF_DIV_MASK GENMASK(29, 24)
+
+#define TIME_CLOCK_MEAS_CTRL (0xB10)
+#define TDC_MEAS_EN BIT(0)
+#define TDC_MEAS_START BIT(1)
+
+#define TDC_FIFO_READ_REQ (0xB2F)
+#define TDC_FIFO_READ (0xB30)
+#define COARSE_MEAS_MASK GENMASK_ULL(39, 13)
+#define FINE_MEAS_MASK GENMASK(12, 0)
+
+#define TDC_FIFO_CTRL (0xB12)
+#define FIFO_CLEAR BIT(0)
+#define TDC_FIFO_STS (0xB38)
+#define FIFO_FULL BIT(1)
+#define FIFO_EMPTY BIT(0)
+#define TDC_FIFO_EVENT (0xB39)
+#define FIFO_OVERRUN BIT(1)
+
+/* DPLL */
+#define MAX_REFERENCE_INDEX (3)
+#define MAX_NUM_REF_PRIORITY (4)
+
+#define MAX_DPLL_INDEX (2)
+
+#define DPLL_STS (0x580)
+#define DPLL_STS_VFC3A (0x571)
+#define DPLL_STATE_STS_MASK (0x70)
+#define DPLL_STATE_STS_SHIFT (4)
+#define DPLL_REF_SEL_STS_MASK (0x6)
+#define DPLL_REF_SEL_STS_SHIFT (1)
+
+#define DPLL_REF_PRIORITY_CNFG (0x502)
+#define DPLL_REFX_PRIORITY_DISABLE_MASK (0xf)
+#define DPLL_REF0_PRIORITY_ENABLE_AND_SET_MASK (0x31)
+#define DPLL_REF1_PRIORITY_ENABLE_AND_SET_MASK (0xc2)
+#define DPLL_REF2_PRIORITY_ENABLE_AND_SET_MASK (0x304)
+#define DPLL_REF3_PRIORITY_ENABLE_AND_SET_MASK (0xc08)
+#define DPLL_REF0_PRIORITY_SHIFT (4)
+#define DPLL_REF1_PRIORITY_SHIFT (6)
+#define DPLL_REF2_PRIORITY_SHIFT (8)
+#define DPLL_REF3_PRIORITY_SHIFT (10)
+
+enum dpll_state {
+ DPLL_STATE_MIN = 0,
+ DPLL_STATE_FREERUN = DPLL_STATE_MIN,
+ DPLL_STATE_LOCKED = 1,
+ DPLL_STATE_HOLDOVER = 2,
+ DPLL_STATE_WRITE_FREQUENCY = 3,
+ DPLL_STATE_ACQUIRE = 4,
+ DPLL_STATE_HITLESS_SWITCH = 5,
+ DPLL_STATE_MAX = DPLL_STATE_HITLESS_SWITCH
+};
+
+/* REFMON */
+#define LOSMON_STS_0 (0x81e)
+#define LOSMON_STS_0_VFC3A (0x18e)
+#define LOSMON_STS_1 (0x82e)
+#define LOSMON_STS_1_VFC3A (0x19e)
+#define LOSMON_STS_2 (0x83e)
+#define LOSMON_STS_2_VFC3A (0x1ae)
+#define LOSMON_STS_3 (0x84e)
+#define LOSMON_STS_3_VFC3A (0x1be)
+#define LOS_STS_MASK (0x1)
+
+#define FREQMON_STS_0 (0x874)
+#define FREQMON_STS_0_VFC3A (0x1d4)
+#define FREQMON_STS_1 (0x894)
+#define FREQMON_STS_1_VFC3A (0x1f4)
+#define FREQMON_STS_2 (0x8b4)
+#define FREQMON_STS_2_VFC3A (0x214)
+#define FREQMON_STS_3 (0x8d4)
+#define FREQMON_STS_3_VFC3A (0x234)
+#define FREQ_FAIL_STS_SHIFT (31)
+
+/* Firmware interface */
+#define TIME_CLK_FREQ_ADDR (0xffa0)
+#define XTAL_FREQ_ADDR (0xffa1)
+
+/*
+ * Return register address and field mask based on passed in firmware version
+ */
+#define IDTFC3_FW_REG(FW, VER, REG) (((FW) < (VER)) ? (REG) : (REG##_##VER))
+#define IDTFC3_FW_FIELD(FW, VER, FIELD) (((FW) < (VER)) ? (FIELD) : (FIELD##_##VER))
+enum fw_version {
+ V_DEFAULT = 0,
+ VFC3W = 1,
+ VFC3A = 2
+};
+
+/* XTAL_FREQ_ADDR/TIME_CLK_FREQ_ADDR */
+enum {
+ FREQ_MIN = 0,
+ FREQ_25M = 1,
+ FREQ_49_152M = 2,
+ FREQ_50M = 3,
+ FREQ_100M = 4,
+ FREQ_125M = 5,
+ FREQ_250M = 6,
+ FREQ_MAX
+};
+
+struct idtfc3_hw_param {
+ u32 xtal_freq;
+ u32 time_clk_freq;
+};
+
+struct idtfc3_fwrc {
+ u8 hiaddr;
+ u8 loaddr;
+ u8 value;
+ u8 reserved;
+} __packed;
+
+static inline void idtfc3_default_hw_param(struct idtfc3_hw_param *hw_param)
+{
+ hw_param->xtal_freq = 49152000;
+ hw_param->time_clk_freq = 25000000;
+}
+
+static inline int idtfc3_set_hw_param(struct idtfc3_hw_param *hw_param,
+ u16 addr, u8 val)
+{
+ if (addr == XTAL_FREQ_ADDR)
+ switch (val) {
+ case FREQ_49_152M:
+ hw_param->xtal_freq = 49152000;
+ break;
+ case FREQ_50M:
+ hw_param->xtal_freq = 50000000;
+ break;
+ default:
+ return -EINVAL;
+ }
+ else if (addr == TIME_CLK_FREQ_ADDR)
+ switch (val) {
+ case FREQ_25M:
+ hw_param->time_clk_freq = 25000000;
+ break;
+ case FREQ_50M:
+ hw_param->time_clk_freq = 50000000;
+ break;
+ case FREQ_100M:
+ hw_param->time_clk_freq = 100000000;
+ break;
+ case FREQ_125M:
+ hw_param->time_clk_freq = 125000000;
+ break;
+ case FREQ_250M:
+ hw_param->time_clk_freq = 250000000;
+ break;
+ default:
+ return -EINVAL;
+ }
+ else
+ return -EFAULT;
+
+ return 0;
+}
+
+#endif
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8c55ff351e5f..bf9324a31ae9 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -681,6 +681,7 @@ struct mlx5e_resources {
struct mlx5_sq_bfreg bfreg;
#define MLX5_MAX_NUM_TC 8
u32 tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC];
+ bool tisn_valid;
} hw_objs;
struct net_device *uplink_netdev;
struct mutex uplink_netdev_lock;
@@ -822,6 +823,7 @@ struct mlx5_core_dev {
struct blocking_notifier_head macsec_nh;
#endif
u64 num_ipsec_offloads;
+ struct mlx5_sd *sd;
};
struct mlx5_db {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 6f7725238abc..3fb428ce7d1c 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -132,6 +132,7 @@ struct mlx5_flow_handle;
enum {
FLOW_CONTEXT_HAS_TAG = BIT(0),
+ FLOW_CONTEXT_UPLINK_HAIRPIN_EN = BIT(1),
};
struct mlx5_flow_context {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index bf5320b28b8b..49f660563e49 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1103,7 +1103,7 @@ struct mlx5_ifc_roce_cap_bits {
u8 sw_r_roce_src_udp_port[0x1];
u8 fl_rc_qp_when_roce_disabled[0x1];
u8 fl_rc_qp_when_roce_enabled[0x1];
- u8 reserved_at_7[0x1];
+ u8 roce_cc_general[0x1];
u8 qp_ooo_transmit_default[0x1];
u8 reserved_at_9[0x15];
u8 qp_ts_format[0x2];
@@ -3576,7 +3576,7 @@ struct mlx5_ifc_flow_context_bits {
u8 action[0x10];
u8 extended_destination[0x1];
- u8 reserved_at_81[0x1];
+ u8 uplink_hairpin_en[0x1];
u8 flow_source[0x2];
u8 encrypt_decrypt_type[0x4];
u8 destination_list_size[0x18];
@@ -4036,8 +4036,13 @@ struct mlx5_ifc_nic_vport_context_bits {
u8 affiliation_criteria[0x4];
u8 affiliated_vhca_id[0x10];
- u8 reserved_at_60[0xd0];
+ u8 reserved_at_60[0xa0];
+
+ u8 reserved_at_100[0x1];
+ u8 sd_group[0x3];
+ u8 reserved_at_104[0x1c];
+ u8 reserved_at_120[0x10];
u8 mtu[0x10];
u8 system_image_guid[0x40];
@@ -10122,8 +10127,7 @@ struct mlx5_ifc_mpir_reg_bits {
u8 reserved_at_20[0x20];
u8 local_port[0x8];
- u8 reserved_at_28[0x15];
- u8 sd_group[0x3];
+ u8 reserved_at_28[0x18];
u8 reserved_at_60[0x20];
};
@@ -10249,7 +10253,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
u8 mcqi[0x1];
u8 mcqs[0x1];
- u8 regs_95_to_87[0x9];
+ u8 regs_95_to_90[0x6];
+ u8 mpir[0x1];
+ u8 regs_88_to_87[0x2];
u8 mpegc[0x1];
u8 mtutc[0x1];
u8 regs_84_to_68[0x11];
@@ -10257,7 +10263,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
u8 regs_63_to_46[0x12];
u8 mrtc[0x1];
- u8 regs_44_to_32[0xd];
+ u8 regs_44_to_41[0x4];
+ u8 mfrl[0x1];
+ u8 regs_39_to_32[0x8];
u8 regs_31_to_10[0x16];
u8 mtmp[0x1];
@@ -10657,6 +10665,7 @@ enum {
MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER = 0x0,
MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED = 0x1,
MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC = 0x2,
+ MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET = 0x7,
};
enum {
@@ -12701,6 +12710,14 @@ enum mlx5_msees_oper_status {
MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5,
};
+enum mlx5_msees_failure_reason {
+ MLX5_MSEES_FAILURE_REASON_UNDEFINED_ERROR = 0x0,
+ MLX5_MSEES_FAILURE_REASON_PORT_DOWN = 0x1,
+ MLX5_MSEES_FAILURE_REASON_TOO_HIGH_FREQUENCY_DIFF = 0x2,
+ MLX5_MSEES_FAILURE_REASON_NET_SYNCHRONIZER_DEVICE_ERROR = 0x3,
+ MLX5_MSEES_FAILURE_REASON_LACK_OF_RESOURCES = 0x4,
+};
+
struct mlx5_ifc_msees_reg_bits {
u8 reserved_at_0[0x8];
u8 local_port[0x8];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index bd53cf4be7bd..f0e55bf3ec8b 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg {
union {
struct {
__be16 sz;
- u8 start[2];
+ union {
+ u8 start[2];
+ DECLARE_FLEX_ARRAY(u8, data);
+ };
} inline_hdr;
struct {
__be16 type;
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index fbb9bf447889..c36cc6d82926 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -72,6 +72,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu);
int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu);
int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
u64 *system_image_guid);
+int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group);
int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
u16 vport, u64 node_guid);
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 40d94411d492..dc7048824be8 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -156,6 +156,7 @@ calc_vm_flag_bits(unsigned long flags)
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
_calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
+ _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) |
arch_calc_vm_flag_bits(flags);
}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4ed33b127821..a497f189d988 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2013,9 +2013,9 @@ static inline int pfn_valid(unsigned long pfn)
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
ms = __pfn_to_section(pfn);
- rcu_read_lock();
+ rcu_read_lock_sched();
if (!valid_section(ms)) {
- rcu_read_unlock();
+ rcu_read_unlock_sched();
return 0;
}
/*
@@ -2023,7 +2023,7 @@ static inline int pfn_valid(unsigned long pfn)
* the entire section-sized span.
*/
ret = early_section(ms) || pfn_section_valid(ms, pfn);
- rcu_read_unlock();
+ rcu_read_unlock_sched();
return ret;
}
diff --git a/include/linux/module.h b/include/linux/module.h
index 96bc462872c0..1153b0d99a80 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -885,7 +885,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
static inline void module_bug_cleanup(struct module *mod) {}
#endif /* CONFIG_GENERIC_BUG */
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
extern bool retpoline_module_ok(bool has_retpoline);
#else
static inline bool retpoline_module_ok(bool has_retpoline)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index ddace8c34dcf..26d07e23052e 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -412,6 +412,7 @@ bool arch_restore_msi_irqs(struct pci_dev *dev);
struct irq_domain;
struct irq_domain_ops;
struct irq_chip;
+struct irq_fwspec;
struct device_node;
struct fwnode_handle;
struct msi_domain_info;
@@ -431,6 +432,8 @@ struct msi_domain_info;
* function.
* @msi_post_free: Optional function which is invoked after freeing
* all interrupts.
+ * @msi_translate: Optional translate callback to support the odd wire to
+ * MSI bridges, e.g. MBIGEN
*
* @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying
* irqdomain.
@@ -468,6 +471,8 @@ struct msi_domain_ops {
struct device *dev);
void (*msi_post_free)(struct irq_domain *domain,
struct device *dev);
+ int (*msi_translate)(struct irq_domain *domain, struct irq_fwspec *fwspec,
+ irq_hw_number_t *hwirq, unsigned int *type);
};
/**
@@ -547,6 +552,10 @@ enum {
MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5),
/* Free MSI descriptors */
MSI_FLAG_FREE_MSI_DESCS = (1 << 6),
+ /* Use dev->fwnode for MSI device domain creation */
+ MSI_FLAG_USE_DEV_FWNODE = (1 << 7),
+ /* Set parent->dev into domain->pm_dev on device domain creation */
+ MSI_FLAG_PARENT_PM_DEV = (1 << 8),
/* Mask for the generic functionality */
MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0),
@@ -572,6 +581,11 @@ enum {
* struct msi_parent_ops - MSI parent domain callbacks and configuration info
*
* @supported_flags: Required: The supported MSI flags of the parent domain
+ * @required_flags: Optional: The required MSI flags of the parent MSI domain
+ * @bus_select_token: Optional: The bus token of the real parent domain for
+ * irq_domain::select()
+ * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for
+ * irq_domain::select()
* @prefix: Optional: Prefix for the domain and chip name
* @init_dev_msi_info: Required: Callback for MSI parent domains to setup parent
* domain specific domain flags, domain ops and interrupt chip
@@ -579,6 +593,9 @@ enum {
*/
struct msi_parent_ops {
u32 supported_flags;
+ u32 required_flags;
+ u32 bus_select_token;
+ u32 bus_select_mask;
const char *prefix;
bool (*init_dev_msi_info)(struct device *dev, struct irq_domain *domain,
struct irq_domain *msi_parent_domain,
@@ -627,9 +644,6 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);
struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
struct msi_domain_info *info,
struct irq_domain *parent);
-int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
- irq_write_msi_msg_t write_msi_msg);
-void platform_msi_domain_free_irqs(struct device *dev);
/* When an MSI domain is used as an intermediate domain */
int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
@@ -656,6 +670,10 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir
void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nvec);
void *platform_msi_get_host_data(struct irq_domain *domain);
+/* Per device platform MSI */
+int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg);
+void platform_device_msi_free_irqs_all(struct device *dev);
bool msi_device_has_isolated_msi(struct device *dev);
#else /* CONFIG_GENERIC_MSI_IRQ */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 7e208d46ba5b..67edc4ca2bee 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -32,11 +32,9 @@
# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
#endif
-#ifndef CONFIG_PREEMPT_RT
-
#ifdef CONFIG_DEBUG_MUTEXES
-#define __DEBUG_MUTEX_INITIALIZER(lockname) \
+# define __DEBUG_MUTEX_INITIALIZER(lockname) \
, .magic = &lockname
extern void mutex_destroy(struct mutex *lock);
@@ -49,6 +47,7 @@ static inline void mutex_destroy(struct mutex *lock) {}
#endif
+#ifndef CONFIG_PREEMPT_RT
/**
* mutex_init - initialize the mutex
* @mutex: the mutex to be initialized
@@ -101,9 +100,6 @@ extern bool mutex_is_locked(struct mutex *lock);
extern void __mutex_rt_init(struct mutex *lock, const char *name,
struct lock_class_key *key);
-extern int mutex_trylock(struct mutex *lock);
-
-static inline void mutex_destroy(struct mutex *lock) { }
#define mutex_is_locked(l) rt_mutex_base_is_locked(&(l)->rtmutex)
diff --git a/include/linux/net.h b/include/linux/net.h
index c9b4a63791a4..15df6d5f27a7 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -299,10 +299,7 @@ do { \
net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
#else
#define net_dbg_ratelimited(fmt, ...) \
- do { \
- if (0) \
- no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
- } while (0)
+ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif
#define net_get_random_once(buf, nbytes) \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 118c40258d07..c6f6ac779b34 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -79,8 +79,6 @@ struct xdp_buff;
struct xdp_frame;
struct xdp_metadata_ops;
struct xdp_md;
-/* DPLL specific */
-struct dpll_pin;
typedef u32 xdp_features_t;
@@ -227,12 +225,6 @@ struct net_device_core_stats {
#include <linux/cache.h>
#include <linux/skbuff.h>
-#ifdef CONFIG_RPS
-#include <linux/static_key.h>
-extern struct static_key_false rps_needed;
-extern struct static_key_false rfs_needed;
-#endif
-
struct neighbour;
struct neigh_parms;
struct sk_buff;
@@ -732,86 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node
#endif
}
-#ifdef CONFIG_RPS
-/*
- * This structure holds an RPS map which can be of variable length. The
- * map is an array of CPUs.
- */
-struct rps_map {
- unsigned int len;
- struct rcu_head rcu;
- u16 cpus[];
-};
-#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
-
-/*
- * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
- * tail pointer for that CPU's input queue at the time of last enqueue, and
- * a hardware filter index.
- */
-struct rps_dev_flow {
- u16 cpu;
- u16 filter;
- unsigned int last_qtail;
-};
-#define RPS_NO_FILTER 0xffff
-
-/*
- * The rps_dev_flow_table structure contains a table of flow mappings.
- */
-struct rps_dev_flow_table {
- unsigned int mask;
- struct rcu_head rcu;
- struct rps_dev_flow flows[];
-};
-#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
- ((_num) * sizeof(struct rps_dev_flow)))
-
-/*
- * The rps_sock_flow_table contains mappings of flows to the last CPU
- * on which they were processed by the application (set in recvmsg).
- * Each entry is a 32bit value. Upper part is the high-order bits
- * of flow hash, lower part is CPU number.
- * rps_cpu_mask is used to partition the space, depending on number of
- * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
- * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
- * meaning we use 32-6=26 bits for the hash.
- */
-struct rps_sock_flow_table {
- u32 mask;
-
- u32 ents[] ____cacheline_aligned_in_smp;
-};
-#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
-
-#define RPS_NO_CPU 0xffff
-
-extern u32 rps_cpu_mask;
-extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
-
-static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
- u32 hash)
-{
- if (table && hash) {
- unsigned int index = hash & table->mask;
- u32 val = hash & ~rps_cpu_mask;
-
- /* We only give a hint, preemption can change CPU under us */
- val |= raw_smp_processor_id();
-
- /* The following WRITE_ONCE() is paired with the READ_ONCE()
- * here, and another one in get_rps_cpu().
- */
- if (READ_ONCE(table->ents[index]) != val)
- WRITE_ONCE(table->ents[index], val);
- }
-}
-
#ifdef CONFIG_RFS_ACCEL
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
u16 filter_id);
#endif
-#endif /* CONFIG_RPS */
/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
enum xps_map_type {
@@ -1062,7 +978,7 @@ struct xfrmdev_ops {
bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
struct xfrm_state *x);
void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
- void (*xdo_dev_state_update_curlft) (struct xfrm_state *x);
+ void (*xdo_dev_state_update_stats) (struct xfrm_state *x);
int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack);
void (*xdo_dev_policy_delete) (struct xfrm_policy *x);
void (*xdo_dev_policy_free) (struct xfrm_policy *x);
@@ -1815,6 +1731,15 @@ enum netdev_stat_type {
NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
};
+enum netdev_reg_state {
+ NETREG_UNINITIALIZED = 0,
+ NETREG_REGISTERED, /* completed register_netdevice */
+ NETREG_UNREGISTERING, /* called unregister_netdevice */
+ NETREG_UNREGISTERED, /* completed unregister todo */
+ NETREG_RELEASED, /* called free_netdev */
+ NETREG_DUMMY, /* dummy device for NAPI poll */
+};
+
/**
* struct net_device - The DEVICE structure.
*
@@ -2030,6 +1955,7 @@ enum netdev_stat_type {
*
* @sysfs_rx_queue_group: Space for optional per-rx queue attributes
* @rtnl_link_ops: Rtnl_link_ops
+ * @stat_ops: Optional ops for queue-aware statistics
*
* @gso_max_size: Maximum size of generic segmentation offload
* @tso_max_size: Device (as in HW) limit on the max TSO request size
@@ -2141,6 +2067,11 @@ struct net_device {
/* TXRX read-mostly hotpath */
__cacheline_group_begin(net_device_read_txrx);
+ union {
+ struct pcpu_lstats __percpu *lstats;
+ struct pcpu_sw_netstats __percpu *tstats;
+ struct pcpu_dstats __percpu *dstats;
+ };
unsigned int flags;
unsigned short hard_header_len;
netdev_features_t features;
@@ -2249,7 +2180,7 @@ struct net_device {
const struct tlsdev_ops *tlsdev_ops;
#endif
- unsigned char operstate;
+ unsigned int operstate;
unsigned char link_mode;
unsigned char if_port;
@@ -2372,13 +2303,7 @@ struct net_device {
struct list_head link_watch_list;
- enum { NETREG_UNINITIALIZED=0,
- NETREG_REGISTERED, /* completed register_netdevice */
- NETREG_UNREGISTERING, /* called unregister_netdevice */
- NETREG_UNREGISTERED, /* completed unregister todo */
- NETREG_RELEASED, /* called free_netdev */
- NETREG_DUMMY, /* dummy device for NAPI poll */
- } reg_state:8;
+ u8 reg_state;
bool dismantle;
@@ -2395,11 +2320,6 @@ struct net_device {
enum netdev_ml_priv_type ml_priv_type;
enum netdev_stat_type pcpu_stat_type:8;
- union {
- struct pcpu_lstats __percpu *lstats;
- struct pcpu_sw_netstats __percpu *tstats;
- struct pcpu_dstats __percpu *dstats;
- };
#if IS_ENABLED(CONFIG_GARP)
struct garp_port __rcu *garp_port;
@@ -2416,6 +2336,8 @@ struct net_device {
const struct rtnl_link_ops *rtnl_link_ops;
+ const struct netdev_stat_ops *stat_ops;
+
/* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SEGS 65535u
#define GSO_LEGACY_MAX_SIZE 65536u
@@ -2469,7 +2391,7 @@ struct net_device {
struct devlink_port *devlink_port;
#if IS_ENABLED(CONFIG_DPLL)
- struct dpll_pin *dpll_pin;
+ struct dpll_pin __rcu *dpll_pin;
#endif
#if IS_ENABLED(CONFIG_PAGE_POOL)
/** @page_pools: page pools created for this netdevice */
@@ -3074,8 +2996,6 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info);
-extern rwlock_t dev_base_lock; /* Device list lock */
-
#define for_each_netdev(net, d) \
list_for_each_entry(d, &(net)->dev_base_head, dev_list)
#define for_each_netdev_reverse(net, d) \
@@ -3198,7 +3118,7 @@ static inline void unregister_netdevice(struct net_device *dev)
int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev);
void netdev_freemem(struct net_device *dev);
-int init_dummy_netdev(struct net_device *dev);
+void init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
struct sk_buff *skb,
@@ -3499,6 +3419,16 @@ static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue
#endif
}
+static inline int netdev_queue_dql_avail(const struct netdev_queue *txq)
+{
+#ifdef CONFIG_BQL
+ /* Non-BQL migrated drivers will return 0, too. */
+ return dql_avail(&txq->dql);
+#else
+ return 0;
+#endif
+}
+
/**
* netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write
* @dev_queue: pointer to transmit queue
@@ -3958,7 +3888,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog);
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb);
int netif_rx(struct sk_buff *skb);
int __netif_rx(struct sk_buff *skb);
@@ -3971,8 +3901,6 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
-struct packet_offload *gro_find_receive_by_type(__be16 type);
-struct packet_offload *gro_find_complete_by_type(__be16 type);
static inline void napi_free_frags(struct napi_struct *napi)
{
@@ -4032,17 +3960,6 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
int dev_get_port_parent_id(struct net_device *dev,
struct netdev_phys_item_id *ppid, bool recurse);
bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
-void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin);
-void netdev_dpll_pin_clear(struct net_device *dev);
-
-static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev)
-{
-#if IS_ENABLED(CONFIG_DPLL)
- return dev->dpll_pin;
-#else
- return NULL;
-#endif
-}
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -4353,8 +4270,10 @@ static inline bool netif_testing(const struct net_device *dev)
*/
static inline bool netif_oper_up(const struct net_device *dev)
{
- return (dev->operstate == IF_OPER_UP ||
- dev->operstate == IF_OPER_UNKNOWN /* backward compat */);
+ unsigned int operstate = READ_ONCE(dev->operstate);
+
+ return operstate == IF_OPER_UP ||
+ operstate == IF_OPER_UNKNOWN /* backward compat */;
}
/**
@@ -4793,11 +4712,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
const struct pcpu_sw_netstats __percpu *netstats);
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
-extern int netdev_max_backlog;
-extern int dev_rx_weight;
-extern int dev_tx_weight;
-extern int gro_normal_batch;
-
enum {
NESTED_SYNC_IMM_BIT,
NESTED_SYNC_TODO_BIT,
@@ -5254,7 +5168,9 @@ static inline const char *netdev_name(const struct net_device *dev)
static inline const char *netdev_reg_state(const struct net_device *dev)
{
- switch (dev->reg_state) {
+ u8 reg_state = READ_ONCE(dev->reg_state);
+
+ switch (reg_state) {
case NETREG_UNINITIALIZED: return " (uninitialized)";
case NETREG_REGISTERED: return "";
case NETREG_UNREGISTERING: return " (unregistering)";
@@ -5263,7 +5179,7 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
case NETREG_DUMMY: return " (dummy)";
}
- WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state);
+ WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state);
return " (unknown)";
}
@@ -5305,7 +5221,6 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
#define PTYPE_HASH_SIZE (16)
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
-extern struct list_head ptype_all __read_mostly;
extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
extern struct net_device *blackhole_netdev;
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 80900d910992..2683b2b77612 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -370,7 +370,6 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
u_int8_t protocol, unsigned short family);
int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict, unsigned short family);
-int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry);
#include <net/flow.h>
@@ -474,6 +473,7 @@ struct nf_ct_hook {
const struct sk_buff *);
void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
void (*set_closing)(struct nf_conntrack *nfct);
+ int (*confirm)(struct sk_buff *skb);
};
extern const struct nf_ct_hook __rcu *nf_ct_hook;
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index e8c350a3ade1..e9f4f845d760 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -186,6 +186,8 @@ struct ip_set_type_variant {
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
+ /* Cancel ongoing garbage collectors before destroying the set*/
+ void (*cancel_gc)(struct ip_set *set);
/* Region-locking is used */
bool region_lock;
};
@@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type);
/* A generic IP set */
struct ip_set {
+ /* For call_cru in destroy */
+ struct rcu_head rcu;
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 1a4445bf2ab9..5df7340d4dab 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -291,6 +291,7 @@ struct netlink_callback {
u16 answer_flags;
u32 min_dump_alloc;
unsigned int prev_seq, seq;
+ int flags;
bool strict_check;
union {
u8 ctx[48];
@@ -323,6 +324,7 @@ struct netlink_dump_control {
void *data;
struct module *module;
u32 min_dump_alloc;
+ int flags;
};
int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index cd797e00fe35..92de074e63b9 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -124,6 +124,7 @@ struct nfs_client {
char cl_ipaddr[48];
struct net *cl_net;
struct list_head pending_cb_stateids;
+ struct rcu_head rcu;
};
/*
@@ -265,6 +266,7 @@ struct nfs_server {
const struct cred *cred;
bool has_sec_mnt_opts;
struct kobject kobj;
+ struct rcu_head rcu;
};
/* Server capabilities */
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 0f1d024bd958..7d22ea50b098 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -7,7 +7,7 @@
struct proc_ns_operations;
struct ns_common {
- atomic_long_t stashed;
+ struct dentry *stashed;
const struct proc_ns_operations *ops;
unsigned int inum;
refcount_t count;
diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
index 4dd7e6fe92fb..eb2f04d636c8 100644
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@@ -6,7 +6,11 @@
#ifndef _LINUX_NVME_RDMA_H
#define _LINUX_NVME_RDMA_H
-#define NVME_RDMA_MAX_QUEUE_SIZE 128
+#define NVME_RDMA_IP_PORT 4420
+
+#define NVME_RDMA_MAX_QUEUE_SIZE 256
+#define NVME_RDMA_MAX_METADATA_QUEUE_SIZE 128
+#define NVME_RDMA_DEFAULT_QUEUE_SIZE 128
enum nvme_rdma_cm_fmt {
NVME_RDMA_CM_FMT_1_0 = 0x0,
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 462c21e0e417..425573202295 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -23,8 +23,6 @@
#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
-#define NVME_RDMA_IP_PORT 4420
-
#define NVME_NSID_ALL 0xffffffff
enum nvme_subsys_type {
@@ -646,6 +644,7 @@ enum {
NVME_CMD_EFFECTS_NCC = 1 << 2,
NVME_CMD_EFFECTS_NIC = 1 << 3,
NVME_CMD_EFFECTS_CCC = 1 << 4,
+ NVME_CMD_EFFECTS_CSER_MASK = GENMASK(15, 14),
NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16),
NVME_CMD_EFFECTS_UUID_SEL = 1 << 19,
NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20),
@@ -816,12 +815,6 @@ struct nvme_reservation_status_ext {
struct nvme_registered_ctrl_ext regctl_eds[];
};
-enum nvme_async_event_type {
- NVME_AER_TYPE_ERROR = 0,
- NVME_AER_TYPE_SMART = 1,
- NVME_AER_TYPE_NOTICE = 2,
-};
-
/* I/O commands */
enum nvme_opcode {
@@ -1818,7 +1811,7 @@ struct nvme_command {
};
};
-static inline bool nvme_is_fabrics(struct nvme_command *cmd)
+static inline bool nvme_is_fabrics(const struct nvme_command *cmd)
{
return cmd->common.opcode == nvme_fabrics_command;
}
@@ -1837,7 +1830,7 @@ struct nvme_error_slot {
__u8 resv2[24];
};
-static inline bool nvme_is_write(struct nvme_command *cmd)
+static inline bool nvme_is_write(const struct nvme_command *cmd)
{
/*
* What a mess...
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 33212e93f4a6..b3b8d3dab52d 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -131,7 +131,7 @@
*/
.macro VALIDATE_UNRET_BEGIN
#if defined(CONFIG_NOINSTR_VALIDATION) && \
- (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
+ (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
.Lhere_\@:
.pushsection .discard.validate_unret
.long .Lhere_\@ - .
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 7b5cf4a5cd19..aa691f2119b0 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -31,8 +31,10 @@
* credit to Christian Biere.
*/
#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
-#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
-#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_max(t) __type_max(typeof(t))
+#define __type_min(T) ((T)((T)-type_max(T)-(T)1))
+#define type_min(t) __type_min(typeof(t))
/*
* Avoids triggering -Wtype-limits compilation warning,
@@ -57,46 +59,123 @@ static inline bool __must_check __must_check_overflow(bool overflow)
* @b: second addend
* @d: pointer to store sum
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted addition, but is not considered
- * "safe for use" on a non-zero return value, which indicates that the
- * sum has overflowed or been truncated.
+ * *@d holds the results of the attempted addition, regardless of whether
+ * wrap-around occurred.
*/
#define check_add_overflow(a, b, d) \
__must_check_overflow(__builtin_add_overflow(a, b, d))
/**
+ * wrapping_add() - Intentionally perform a wrapping addition
+ * @type: type for result of calculation
+ * @a: first addend
+ * @b: second addend
+ *
+ * Return the potentially wrapped-around addition without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_add(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_add_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
+ * wrapping_assign_add() - Intentionally perform a wrapping increment assignment
+ * @var: variable to be incremented
+ * @offset: amount to add
+ *
+ * Increments @var by @offset with wrap-around. Returns the resulting
+ * value of @var. Will not trip any wrap-around sanitizers.
+ *
+ * Returns the new value of @var.
+ */
+#define wrapping_assign_add(var, offset) \
+ ({ \
+ typeof(var) *__ptr = &(var); \
+ *__ptr = wrapping_add(typeof(var), *__ptr, offset); \
+ })
+
+/**
* check_sub_overflow() - Calculate subtraction with overflow checking
* @a: minuend; value to subtract from
* @b: subtrahend; value to subtract from @a
* @d: pointer to store difference
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted subtraction, but is not considered
- * "safe for use" on a non-zero return value, which indicates that the
- * difference has underflowed or been truncated.
+ * *@d holds the results of the attempted subtraction, regardless of whether
+ * wrap-around occurred.
*/
#define check_sub_overflow(a, b, d) \
__must_check_overflow(__builtin_sub_overflow(a, b, d))
/**
+ * wrapping_sub() - Intentionally perform a wrapping subtraction
+ * @type: type for result of calculation
+ * @a: minuend; value to subtract from
+ * @b: subtrahend; value to subtract from @a
+ *
+ * Return the potentially wrapped-around subtraction without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_sub(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_sub_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
+ * wrapping_assign_sub() - Intentionally perform a wrapping decrement assign
+ * @var: variable to be decremented
+ * @offset: amount to subtract
+ *
+ * Decrements @var by @offset with wrap-around. Returns the resulting
+ * value of @var. Will not trip any wrap-around sanitizers.
+ *
+ * Returns the new value of @var.
+ */
+#define wrapping_assign_sub(var, offset) \
+ ({ \
+ typeof(var) *__ptr = &(var); \
+ *__ptr = wrapping_sub(typeof(var), *__ptr, offset); \
+ })
+
+/**
* check_mul_overflow() - Calculate multiplication with overflow checking
* @a: first factor
* @b: second factor
* @d: pointer to store product
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted multiplication, but is not
- * considered "safe for use" on a non-zero return value, which indicates
- * that the product has overflowed or been truncated.
+ * *@d holds the results of the attempted multiplication, regardless of whether
+ * wrap-around occurred.
*/
#define check_mul_overflow(a, b, d) \
__must_check_overflow(__builtin_mul_overflow(a, b, d))
/**
+ * wrapping_mul() - Intentionally perform a wrapping multiplication
+ * @type: type for result of calculation
+ * @a: first factor
+ * @b: second factor
+ *
+ * Return the potentially wrapped-around multiplication without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_mul(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_mul_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
* check_shl_overflow() - Calculate a left-shifted value and check overflow
* @a: Value to be shifted
* @s: How many bits left to shift
@@ -120,7 +199,7 @@ static inline bool __must_check __must_check_overflow(bool overflow)
typeof(a) _a = a; \
typeof(s) _s = s; \
typeof(d) _d = d; \
- u64 _a_full = _a; \
+ unsigned long long _a_full = _a; \
unsigned int _to_shift = \
is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \
*_d = (_a_full << _to_shift); \
@@ -130,10 +209,10 @@ static inline bool __must_check __must_check_overflow(bool overflow)
#define __overflows_type_constexpr(x, T) ( \
is_unsigned_type(typeof(x)) ? \
- (x) > type_max(typeof(T)) : \
+ (x) > type_max(T) : \
is_unsigned_type(typeof(T)) ? \
- (x) < 0 || (x) > type_max(typeof(T)) : \
- (x) < type_min(typeof(T)) || (x) > type_max(typeof(T)))
+ (x) < 0 || (x) > type_max(T) : \
+ (x) < type_min(T) || (x) > type_max(T))
#define __overflows_type(x, T) ({ \
typeof(T) v = 0; \
diff --git a/include/linux/pci.h b/include/linux/pci.h
index add9368e6314..213109d3c601 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1422,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev,
struct pci_saved_state **state);
int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state);
int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
+int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state);
pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
void pci_pme_active(struct pci_dev *dev, bool enable);
@@ -1625,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
void *userdata);
+void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
+ void *userdata);
int pci_cfg_space_size(struct pci_dev *dev);
unsigned char pci_bus_max_busnr(struct pci_bus *bus);
void pci_setup_bridge(struct pci_bus *bus);
@@ -2025,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; }
static inline void pci_restore_state(struct pci_dev *dev) { }
static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
{ return 0; }
+static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
+{ return 0; }
static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable)
{ return 0; }
static inline pci_power_t pci_choose_state(struct pci_dev *dev,
@@ -2512,6 +2517,11 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev)
return NULL;
}
+static inline bool pci_dev_is_disconnected(const struct pci_dev *dev)
+{
+ return dev->error_state == pci_channel_io_perm_failure;
+}
+
void pci_request_acs(void);
bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags);
bool pci_acs_path_enabled(struct pci_dev *start,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 684efaeca07c..3f68b8239bb1 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -30,6 +30,7 @@
#include <linux/refcount.h>
#include <linux/atomic.h>
+#include <net/eee.h>
#define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \
SUPPORTED_TP | \
@@ -54,6 +55,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init;
extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init;
extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init;
extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init;
#define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features)
#define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features)
@@ -65,6 +67,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init;
#define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features)
#define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features)
#define PHY_EEE_CAP1_FEATURES ((unsigned long *)&phy_eee_cap1_features)
+#define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features)
extern const int phy_basic_ports_array[3];
extern const int phy_fibre_port_array[1];
@@ -329,6 +332,7 @@ struct mdio_bus_stats {
* struct phy_package_shared - Shared information in PHY packages
* @base_addr: Base PHY address of PHY package used to combine PHYs
* in one package and for offset calculation of phy_package_read/write
+ * @np: Pointer to the Device Node if PHY package defined in DT
* @refcnt: Number of PHYs connected to this shared data
* @flags: Initialization of PHY package
* @priv_size: Size of the shared private data @priv
@@ -340,6 +344,8 @@ struct mdio_bus_stats {
*/
struct phy_package_shared {
u8 base_addr;
+ /* With PHY package defined in DT this points to the PHY package node */
+ struct device_node *np;
refcount_t refcnt;
unsigned long flags;
size_t priv_size;
@@ -589,6 +595,8 @@ struct macsec_ops;
* @supported_eee: supported PHY EEE linkmodes
* @advertising_eee: Currently advertised EEE linkmodes
* @eee_enabled: Flag indicating whether the EEE feature is enabled
+ * @enable_tx_lpi: When True, MAC should transmit LPI to PHY
+ * @eee_cfg: User configuration of EEE
* @lp_advertising: Current link partner advertised linkmodes
* @host_interfaces: PHY interface modes supported by host
* @eee_broken_modes: Energy efficient ethernet modes which should be prohibited
@@ -638,7 +646,7 @@ struct phy_device {
/* Information about the PHY type */
/* And management functions */
- struct phy_driver *drv;
+ const struct phy_driver *drv;
struct device_link *devlink;
@@ -698,7 +706,7 @@ struct phy_device {
__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising);
/* used with phy_speed_down */
__ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old);
- /* used for eee validation */
+ /* used for eee validation and configuration*/
__ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee);
__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee);
bool eee_enabled;
@@ -708,6 +716,8 @@ struct phy_device {
/* Energy efficient ethernet modes which should be prohibited */
u32 eee_broken_modes;
+ bool enable_tx_lpi;
+ struct eee_config eee_cfg;
#ifdef CONFIG_LED_TRIGGER_PHY
struct phy_led_trigger *phy_led_triggers;
@@ -852,6 +862,15 @@ struct phy_plca_status {
bool pst;
};
+/* Modes for PHY LED configuration */
+enum phy_led_modes {
+ PHY_LED_ACTIVE_LOW = 0,
+ PHY_LED_INACTIVE_HIGH_IMPEDANCE = 1,
+
+ /* keep it last */
+ __PHY_LED_MODES_NUM,
+};
+
/**
* struct phy_led: An LED driven by the PHY
*
@@ -1145,6 +1164,19 @@ struct phy_driver {
int (*led_hw_control_get)(struct phy_device *dev, u8 index,
unsigned long *rules);
+ /**
+ * @led_polarity_set: Set the LED polarity modes
+ * @dev: PHY device which has the LED
+ * @index: Which LED of the PHY device
+ * @modes: bitmap of LED polarity modes
+ *
+ * Configure LED with all the required polarity modes in @modes
+ * to make it correctly turn ON or OFF.
+ *
+ * Returns 0, or an error code.
+ */
+ int (*led_polarity_set)(struct phy_device *dev, int index,
+ unsigned long modes);
};
#define to_phy_driver(d) container_of(to_mdio_common_driver(d), \
struct phy_driver, mdiodrv)
@@ -1851,7 +1883,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
/* Clause 37 */
int genphy_c37_config_aneg(struct phy_device *phydev);
-int genphy_c37_read_status(struct phy_device *phydev);
+int genphy_c37_read_status(struct phy_device *phydev, bool *changed);
/* Clause 45 PHY */
int genphy_c45_restart_aneg(struct phy_device *phydev);
@@ -1886,9 +1918,9 @@ int genphy_c45_plca_get_status(struct phy_device *phydev,
int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv,
unsigned long *lp, bool *is_enabled);
int genphy_c45_ethtool_get_eee(struct phy_device *phydev,
- struct ethtool_eee *data);
+ struct ethtool_keee *data);
int genphy_c45_ethtool_set_eee(struct phy_device *phydev,
- struct ethtool_eee *data);
+ struct ethtool_keee *data);
int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv);
int genphy_c45_an_config_eee_aneg(struct phy_device *phydev);
int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv);
@@ -1938,8 +1970,10 @@ int phy_get_rate_matching(struct phy_device *phydev,
void phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
void phy_advertise_supported(struct phy_device *phydev);
+void phy_advertise_eee_all(struct phy_device *phydev);
void phy_support_sym_pause(struct phy_device *phydev);
void phy_support_asym_pause(struct phy_device *phydev);
+void phy_support_eee(struct phy_device *phydev);
void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx,
bool autoneg);
void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx);
@@ -1966,8 +2000,8 @@ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable);
int phy_get_eee_err(struct phy_device *phydev);
-int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data);
-int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data);
+int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data);
+int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data);
int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol);
void phy_ethtool_get_wol(struct phy_device *phydev,
struct ethtool_wolinfo *wol);
@@ -1977,9 +2011,12 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev,
const struct ethtool_link_ksettings *cmd);
int phy_ethtool_nway_reset(struct net_device *ndev);
int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size);
+int of_phy_package_join(struct phy_device *phydev, size_t priv_size);
void phy_package_leave(struct phy_device *phydev);
int devm_phy_package_join(struct device *dev, struct phy_device *phydev,
int base_addr, size_t priv_size);
+int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev,
+ size_t priv_size);
int __init mdio_bus_init(void);
void mdio_bus_exit(void);
@@ -2100,7 +2137,7 @@ static inline bool phy_package_probe_once(struct phy_device *phydev)
return __phy_package_set_once(phydev, PHY_SHARED_F_PROBE_DONE);
}
-extern struct bus_type mdio_bus_type;
+extern const struct bus_type mdio_bus_type;
struct mdio_board_info {
const char *bus_id;
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index d589f89c612c..9a57deefcb07 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -480,9 +480,6 @@ void pcs_disable(struct phylink_pcs *pcs);
* negotiation completion state in @state->an_complete, and link up state
* in @state->link. If possible, @state->lp_advertising should also be
* populated.
- *
- * When present, this overrides pcs_get_state() in &struct
- * phylink_pcs_ops.
*/
void pcs_get_state(struct phylink_pcs *pcs,
struct phylink_link_state *state);
@@ -584,8 +581,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *,
struct ethtool_pauseparam *);
int phylink_get_eee_err(struct phylink *);
int phylink_init_eee(struct phylink *, bool);
-int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *);
-int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *);
+int phylink_ethtool_get_eee(struct phylink *link, struct ethtool_keee *eee);
+int phylink_ethtool_set_eee(struct phylink *link, struct ethtool_keee *eee);
int phylink_mii_ioctl(struct phylink *, struct ifreq *, int);
int phylink_speed_down(struct phylink *pl, bool sync);
int phylink_speed_up(struct phylink *pl);
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 395cacce1179..c79a0efd0258 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -55,6 +55,10 @@ struct pid
refcount_t count;
unsigned int level;
spinlock_t lock;
+#ifdef CONFIG_FS_PID
+ struct dentry *stashed;
+ unsigned long ino;
+#endif
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
struct hlist_head inodes;
@@ -66,15 +70,13 @@ struct pid
extern struct pid init_struct_pid;
-extern const struct file_operations pidfd_fops;
-
struct file;
-extern struct pid *pidfd_pid(const struct file *file);
+struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
-int pidfd_create(struct pid *pid, unsigned int flags);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
+void do_notify_pidfd(struct task_struct *task);
static inline struct pid *get_pid(struct pid *pid)
{
diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
new file mode 100644
index 000000000000..40dd325a32a6
--- /dev/null
+++ b/include/linux/pidfs.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PID_FS_H
+#define _LINUX_PID_FS_H
+
+struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
+void __init pidfs_init(void);
+bool is_pidfs_sb(const struct super_block *sb);
+
+#endif /* _LINUX_PID_FS_H */
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index 79594aeb160d..2f1b952d596a 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -154,9 +154,9 @@ struct packet_stacked_data
struct pktcdvd_device
{
- struct bdev_handle *bdev_handle; /* dev attached */
+ struct file *bdev_file; /* dev attached */
/* handle acquired for bdev during pkt_open_dev() */
- struct bdev_handle *open_bdev_handle;
+ struct file *f_open_bdev;
dev_t pkt_dev; /* our dev */
struct packet_settings settings;
struct packet_stats stats;
diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h
index f922a192fe58..ec99b7b73d1d 100644
--- a/include/linux/platform_data/brcmfmac.h
+++ b/include/linux/platform_data/brcmfmac.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 201 Broadcom Corporation
+ * Copyright (c) 2016 Broadcom Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h
index 8a5f9f0b2c52..724e1f57b81f 100644
--- a/include/linux/platform_data/mdio-bcm-unimac.h
+++ b/include/linux/platform_data/mdio-bcm-unimac.h
@@ -1,11 +1,14 @@
#ifndef __MDIO_BCM_UNIMAC_PDATA_H
#define __MDIO_BCM_UNIMAC_PDATA_H
+struct clk;
+
struct unimac_mdio_pdata {
u32 phy_mask;
int (*wait_func)(void *data);
void *wait_func_data;
const char *bus_name;
+ struct clk *clk;
};
#define UNIMAC_MDIO_DRV_NAME "unimac-mdio"
diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h
index f177416635a2..8c659db4da6b 100644
--- a/include/linux/platform_data/microchip-ksz.h
+++ b/include/linux/platform_data/microchip-ksz.h
@@ -33,6 +33,7 @@ enum ksz_chip_id {
KSZ9897_CHIP_ID = 0x00989700,
KSZ9893_CHIP_ID = 0x00989300,
KSZ9563_CHIP_ID = 0x00956300,
+ KSZ8567_CHIP_ID = 0x00856700,
KSZ9567_CHIP_ID = 0x00956700,
LAN9370_CHIP_ID = 0x00937000,
LAN9371_CHIP_ID = 0x00937100,
diff --git a/include/linux/platform_data/net-cw1200.h b/include/linux/platform_data/net-cw1200.h
index c510734405bb..89d0ec6f7d46 100644
--- a/include/linux/platform_data/net-cw1200.h
+++ b/include/linux/platform_data/net-cw1200.h
@@ -14,8 +14,6 @@ struct cw1200_platform_data_spi {
/* All others are optional */
bool have_5ghz;
- int reset; /* GPIO to RSTn signal (0 disables) */
- int powerup; /* GPIO to POWERUP signal (0 disables) */
int (*power_ctrl)(const struct cw1200_platform_data_spi *pdata,
bool enable); /* Control 3v3 / 1v8 supply */
int (*clk_ctrl)(const struct cw1200_platform_data_spi *pdata,
@@ -30,8 +28,6 @@ struct cw1200_platform_data_sdio {
/* All others are optional */
bool have_5ghz;
bool no_nptb; /* SDIO hardware does not support non-power-of-2-blocksizes */
- int reset; /* GPIO to RSTn signal (0 disables) */
- int powerup; /* GPIO to POWERUP signal (0 disables) */
int irq; /* IRQ line or 0 to use SDIO IRQ */
int (*power_ctrl)(const struct cw1200_platform_data_sdio *pdata,
bool enable); /* Control 3v3 / 1v8 supply */
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 27a7dad17eef..1f0ee2459f2a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -92,4 +92,7 @@
/********** VFS **********/
#define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA))
+/********** lib/stackdepot.c **********/
+#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA))
+
#endif
diff --git a/include/linux/poll.h b/include/linux/poll.h
index a9e0e1c2d1f2..d1ea4f3714a8 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -14,11 +14,7 @@
/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
additional memory. */
-#ifdef __clang__
-#define MAX_STACK_ALLOC 768
-#else
#define MAX_STACK_ALLOC 832
-#endif
#define FRONTEND_STACK_ALLOC 256
#define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC
#define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 8ef499ab3c1e..955e31860095 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -273,6 +273,8 @@ static inline void printk_trigger_flush(void)
}
#endif
+bool this_cpu_in_panic(void);
+
#ifdef CONFIG_SMP
extern int __printk_cpu_sync_try_get(void);
extern void __printk_cpu_sync_wait(void);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index de407e7c3b55..0b2a89854440 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -65,6 +65,7 @@ struct proc_fs_info {
kgid_t pid_gid;
enum proc_hidepid hide_pid;
enum proc_pidonly pidonly;
+ struct rcu_head rcu;
};
static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 49539bc416ce..5ea470eb4d76 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -66,7 +66,7 @@ static inline void proc_free_inum(unsigned int inum) {}
static inline int ns_alloc_inum(struct ns_common *ns)
{
- atomic_long_set(&ns->stashed, 0);
+ WRITE_ONCE(ns->stashed, NULL);
return proc_alloc_inum(&ns->inum);
}
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 7fd17e82bab4..3705c2044fc0 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -78,6 +78,36 @@ enum sev_cmd {
SEV_CMD_DBG_DECRYPT = 0x060,
SEV_CMD_DBG_ENCRYPT = 0x061,
+ /* SNP specific commands */
+ SEV_CMD_SNP_INIT = 0x081,
+ SEV_CMD_SNP_SHUTDOWN = 0x082,
+ SEV_CMD_SNP_PLATFORM_STATUS = 0x083,
+ SEV_CMD_SNP_DF_FLUSH = 0x084,
+ SEV_CMD_SNP_INIT_EX = 0x085,
+ SEV_CMD_SNP_SHUTDOWN_EX = 0x086,
+ SEV_CMD_SNP_DECOMMISSION = 0x090,
+ SEV_CMD_SNP_ACTIVATE = 0x091,
+ SEV_CMD_SNP_GUEST_STATUS = 0x092,
+ SEV_CMD_SNP_GCTX_CREATE = 0x093,
+ SEV_CMD_SNP_GUEST_REQUEST = 0x094,
+ SEV_CMD_SNP_ACTIVATE_EX = 0x095,
+ SEV_CMD_SNP_LAUNCH_START = 0x0A0,
+ SEV_CMD_SNP_LAUNCH_UPDATE = 0x0A1,
+ SEV_CMD_SNP_LAUNCH_FINISH = 0x0A2,
+ SEV_CMD_SNP_DBG_DECRYPT = 0x0B0,
+ SEV_CMD_SNP_DBG_ENCRYPT = 0x0B1,
+ SEV_CMD_SNP_PAGE_SWAP_OUT = 0x0C0,
+ SEV_CMD_SNP_PAGE_SWAP_IN = 0x0C1,
+ SEV_CMD_SNP_PAGE_MOVE = 0x0C2,
+ SEV_CMD_SNP_PAGE_MD_INIT = 0x0C3,
+ SEV_CMD_SNP_PAGE_SET_STATE = 0x0C6,
+ SEV_CMD_SNP_PAGE_RECLAIM = 0x0C7,
+ SEV_CMD_SNP_PAGE_UNSMASH = 0x0C8,
+ SEV_CMD_SNP_CONFIG = 0x0C9,
+ SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA,
+ SEV_CMD_SNP_COMMIT = 0x0CB,
+ SEV_CMD_SNP_VLEK_LOAD = 0x0CD,
+
SEV_CMD_MAX,
};
@@ -523,12 +553,269 @@ struct sev_data_attestation_report {
u32 len; /* In/Out */
} __packed;
+/**
+ * struct sev_data_snp_download_firmware - SNP_DOWNLOAD_FIRMWARE command params
+ *
+ * @address: physical address of firmware image
+ * @len: length of the firmware image
+ */
+struct sev_data_snp_download_firmware {
+ u64 address; /* In */
+ u32 len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_snp_activate - SNP_ACTIVATE command params
+ *
+ * @gctx_paddr: system physical address guest context page
+ * @asid: ASID to bind to the guest
+ */
+struct sev_data_snp_activate {
+ u64 gctx_paddr; /* In */
+ u32 asid; /* In */
+} __packed;
+
+/**
+ * struct sev_data_snp_addr - generic SNP command params
+ *
+ * @address: physical address of generic data param
+ */
+struct sev_data_snp_addr {
+ u64 address; /* In/Out */
+} __packed;
+
+/**
+ * struct sev_data_snp_launch_start - SNP_LAUNCH_START command params
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @policy: guest policy
+ * @ma_gctx_paddr: system physical address of migration agent
+ * @ma_en: the guest is associated with a migration agent
+ * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the
+ * purpose of guest-assisted migration.
+ * @rsvd: reserved
+ * @gosvw: guest OS-visible workarounds, as defined by hypervisor
+ */
+struct sev_data_snp_launch_start {
+ u64 gctx_paddr; /* In */
+ u64 policy; /* In */
+ u64 ma_gctx_paddr; /* In */
+ u32 ma_en:1; /* In */
+ u32 imi_en:1; /* In */
+ u32 rsvd:30;
+ u8 gosvw[16]; /* In */
+} __packed;
+
+/* SNP support page type */
+enum {
+ SNP_PAGE_TYPE_NORMAL = 0x1,
+ SNP_PAGE_TYPE_VMSA = 0x2,
+ SNP_PAGE_TYPE_ZERO = 0x3,
+ SNP_PAGE_TYPE_UNMEASURED = 0x4,
+ SNP_PAGE_TYPE_SECRET = 0x5,
+ SNP_PAGE_TYPE_CPUID = 0x6,
+
+ SNP_PAGE_TYPE_MAX
+};
+
+/**
+ * struct sev_data_snp_launch_update - SNP_LAUNCH_UPDATE command params
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @page_size: page size 0 indicates 4K and 1 indicates 2MB page
+ * @page_type: encoded page type
+ * @imi_page: indicates that this page is part of the IMI (Incoming Migration
+ * Image) of the guest
+ * @rsvd: reserved
+ * @rsvd2: reserved
+ * @address: system physical address of destination page to encrypt
+ * @rsvd3: reserved
+ * @vmpl1_perms: VMPL permission mask for VMPL1
+ * @vmpl2_perms: VMPL permission mask for VMPL2
+ * @vmpl3_perms: VMPL permission mask for VMPL3
+ * @rsvd4: reserved
+ */
+struct sev_data_snp_launch_update {
+ u64 gctx_paddr; /* In */
+ u32 page_size:1; /* In */
+ u32 page_type:3; /* In */
+ u32 imi_page:1; /* In */
+ u32 rsvd:27;
+ u32 rsvd2;
+ u64 address; /* In */
+ u32 rsvd3:8;
+ u32 vmpl1_perms:8; /* In */
+ u32 vmpl2_perms:8; /* In */
+ u32 vmpl3_perms:8; /* In */
+ u32 rsvd4;
+} __packed;
+
+/**
+ * struct sev_data_snp_launch_finish - SNP_LAUNCH_FINISH command params
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @id_block_paddr: system physical address of ID block
+ * @id_auth_paddr: system physical address of ID block authentication structure
+ * @id_block_en: indicates whether ID block is present
+ * @auth_key_en: indicates whether author key is present in authentication structure
+ * @rsvd: reserved
+ * @host_data: host-supplied data for guest, not interpreted by firmware
+ */
+struct sev_data_snp_launch_finish {
+ u64 gctx_paddr;
+ u64 id_block_paddr;
+ u64 id_auth_paddr;
+ u8 id_block_en:1;
+ u8 auth_key_en:1;
+ u64 rsvd:62;
+ u8 host_data[32];
+} __packed;
+
+/**
+ * struct sev_data_snp_guest_status - SNP_GUEST_STATUS command params
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @address: system physical address of guest status page
+ */
+struct sev_data_snp_guest_status {
+ u64 gctx_paddr;
+ u64 address;
+} __packed;
+
+/**
+ * struct sev_data_snp_page_reclaim - SNP_PAGE_RECLAIM command params
+ *
+ * @paddr: system physical address of page to be claimed. The 0th bit in the
+ * address indicates the page size. 0h indicates 4KB and 1h indicates
+ * 2MB page.
+ */
+struct sev_data_snp_page_reclaim {
+ u64 paddr;
+} __packed;
+
+/**
+ * struct sev_data_snp_page_unsmash - SNP_PAGE_UNSMASH command params
+ *
+ * @paddr: system physical address of page to be unsmashed. The 0th bit in the
+ * address indicates the page size. 0h indicates 4 KB and 1h indicates
+ * 2 MB page.
+ */
+struct sev_data_snp_page_unsmash {
+ u64 paddr;
+} __packed;
+
+/**
+ * struct sev_data_snp_dbg - DBG_ENCRYPT/DBG_DECRYPT command parameters
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @src_addr: source address of data to operate on
+ * @dst_addr: destination address of data to operate on
+ */
+struct sev_data_snp_dbg {
+ u64 gctx_paddr; /* In */
+ u64 src_addr; /* In */
+ u64 dst_addr; /* In */
+} __packed;
+
+/**
+ * struct sev_data_snp_guest_request - SNP_GUEST_REQUEST command params
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @req_paddr: system physical address of request page
+ * @res_paddr: system physical address of response page
+ */
+struct sev_data_snp_guest_request {
+ u64 gctx_paddr; /* In */
+ u64 req_paddr; /* In */
+ u64 res_paddr; /* In */
+} __packed;
+
+/**
+ * struct sev_data_snp_init_ex - SNP_INIT_EX structure
+ *
+ * @init_rmp: indicate that the RMP should be initialized.
+ * @list_paddr_en: indicate that list_paddr is valid
+ * @rsvd: reserved
+ * @rsvd1: reserved
+ * @list_paddr: system physical address of range list
+ * @rsvd2: reserved
+ */
+struct sev_data_snp_init_ex {
+ u32 init_rmp:1;
+ u32 list_paddr_en:1;
+ u32 rsvd:30;
+ u32 rsvd1;
+ u64 list_paddr;
+ u8 rsvd2[48];
+} __packed;
+
+/**
+ * struct sev_data_range - RANGE structure
+ *
+ * @base: system physical address of first byte of range
+ * @page_count: number of 4KB pages in this range
+ * @rsvd: reserved
+ */
+struct sev_data_range {
+ u64 base;
+ u32 page_count;
+ u32 rsvd;
+} __packed;
+
+/**
+ * struct sev_data_range_list - RANGE_LIST structure
+ *
+ * @num_elements: number of elements in RANGE_ARRAY
+ * @rsvd: reserved
+ * @ranges: array of num_elements of type RANGE
+ */
+struct sev_data_range_list {
+ u32 num_elements;
+ u32 rsvd;
+ struct sev_data_range ranges[];
+} __packed;
+
+/**
+ * struct sev_data_snp_shutdown_ex - SNP_SHUTDOWN_EX structure
+ *
+ * @len: length of the command buffer read by the PSP
+ * @iommu_snp_shutdown: Disable enforcement of SNP in the IOMMU
+ * @rsvd1: reserved
+ */
+struct sev_data_snp_shutdown_ex {
+ u32 len;
+ u32 iommu_snp_shutdown:1;
+ u32 rsvd1:31;
+} __packed;
+
+/**
+ * struct sev_platform_init_args
+ *
+ * @error: SEV firmware error code
+ * @probe: True if this is being called as part of CCP module probe, which
+ * will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed
+ * unless psp_init_on_probe module param is set
+ */
+struct sev_platform_init_args {
+ int error;
+ bool probe;
+};
+
+/**
+ * struct sev_data_snp_commit - SNP_COMMIT structure
+ *
+ * @len: length of the command buffer read by the PSP
+ */
+struct sev_data_snp_commit {
+ u32 len;
+} __packed;
+
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
/**
* sev_platform_init - perform SEV INIT command
*
- * @error: SEV command return code
+ * @args: struct sev_platform_init_args to pass in arguments
*
* Returns:
* 0 if the SEV successfully processed the command
@@ -537,7 +824,7 @@ struct sev_data_attestation_report {
* -%ETIMEDOUT if the SEV command timed out
* -%EIO if the SEV returned a non-zero return code
*/
-int sev_platform_init(int *error);
+int sev_platform_init(struct sev_platform_init_args *args);
/**
* sev_platform_status - perform SEV PLATFORM_STATUS command
@@ -637,14 +924,32 @@ int sev_guest_df_flush(int *error);
*/
int sev_guest_decommission(struct sev_data_decommission *data, int *error);
+/**
+ * sev_do_cmd - issue an SEV or an SEV-SNP command
+ *
+ * @cmd: SEV or SEV-SNP firmware command to issue
+ * @data: arguments for firmware command
+ * @psp_ret: SEV command return code
+ *
+ * Returns:
+ * 0 if the SEV device successfully processed the command
+ * -%ENODEV if the PSP device is not available
+ * -%ENOTSUPP if PSP device does not support SEV
+ * -%ETIMEDOUT if the SEV command timed out
+ * -%EIO if PSP device returned a non-zero return code
+ */
+int sev_do_cmd(int cmd, void *data, int *psp_ret);
+
void *psp_copy_user_blob(u64 uaddr, u32 len);
+void *snp_alloc_firmware_page(gfp_t mask);
+void snp_free_firmware_page(void *addr);
#else /* !CONFIG_CRYPTO_DEV_SP_PSP */
static inline int
sev_platform_status(struct sev_user_data_status *status, int *error) { return -ENODEV; }
-static inline int sev_platform_init(int *error) { return -ENODEV; }
+static inline int sev_platform_init(struct sev_platform_init_args *args) { return -ENODEV; }
static inline int
sev_guest_deactivate(struct sev_data_deactivate *data, int *error) { return -ENODEV; }
@@ -653,6 +958,9 @@ static inline int
sev_guest_decommission(struct sev_data_decommission *data, int *error) { return -ENODEV; }
static inline int
+sev_do_cmd(int cmd, void *data, int *psp_ret) { return -ENODEV; }
+
+static inline int
sev_guest_activate(struct sev_data_activate *data, int *error) { return -ENODEV; }
static inline int sev_guest_df_flush(int *error) { return -ENODEV; }
@@ -662,6 +970,13 @@ sev_issue_cmd_external_user(struct file *filep, unsigned int id, void *data, int
static inline void *psp_copy_user_blob(u64 __user uaddr, u32 len) { return ERR_PTR(-EINVAL); }
+static inline void *snp_alloc_firmware_page(gfp_t mask)
+{
+ return NULL;
+}
+
+static inline void snp_free_firmware_page(void *addr) { }
+
#endif /* CONFIG_CRYPTO_DEV_SP_PSP */
#endif /* __PSP_SEV_H__ */
diff --git a/include/linux/pti.h b/include/linux/pti.h
index 1a941efcaa62..1fbf9d6c20ef 100644
--- a/include/linux/pti.h
+++ b/include/linux/pti.h
@@ -2,7 +2,7 @@
#ifndef _INCLUDE_PTI_H
#define _INCLUDE_PTI_H
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
#include <asm/pti.h>
#else
static inline void pti_init(void) { }
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 1ef4e0f9bd2a..6e4b8206c7d0 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -200,6 +200,7 @@ struct ptp_clock;
enum ptp_clock_events {
PTP_CLOCK_ALARM,
PTP_CLOCK_EXTTS,
+ PTP_CLOCK_EXTOFF,
PTP_CLOCK_PPS,
PTP_CLOCK_PPSUSR,
};
@@ -210,6 +211,7 @@ enum ptp_clock_events {
* @type: One of the ptp_clock_events enumeration values.
* @index: Identifies the source of the event.
* @timestamp: When the event occurred (%PTP_CLOCK_EXTTS only).
+ * @offset: When the event occurred (%PTP_CLOCK_EXTOFF only).
* @pps_times: When the event occurred (%PTP_CLOCK_PPSUSR only).
*/
@@ -218,6 +220,7 @@ struct ptp_clock_event {
int index;
union {
u64 timestamp;
+ s64 offset;
struct pps_event_time pps_times;
};
};
diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h
index 746fd67c3480..e8c74fa3f455 100644
--- a/include/linux/ptp_kvm.h
+++ b/include/linux/ptp_kvm.h
@@ -8,15 +8,15 @@
#ifndef _PTP_KVM_H_
#define _PTP_KVM_H_
+#include <linux/clocksource_ids.h>
#include <linux/types.h>
struct timespec64;
-struct clocksource;
int kvm_arch_ptp_init(void);
void kvm_arch_ptp_exit(void);
int kvm_arch_ptp_get_clock(struct timespec64 *ts);
int kvm_arch_ptp_get_crosststamp(u64 *cycle,
- struct timespec64 *tspec, struct clocksource **cs);
+ struct timespec64 *tspec, enum clocksource_ids *cs_id);
#endif /* _PTP_KVM_H_ */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index eaaef3ffec22..90507d4afcd6 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -393,6 +393,10 @@ static inline void user_single_step_report(struct pt_regs *regs)
#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
#endif
+#ifndef exception_ip
+#define exception_ip(x) instruction_pointer(x)
+#endif
+
extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);
extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
diff --git a/include/linux/ras.h b/include/linux/ras.h
index 1f4048bf2674..a64182bc72ad 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -25,6 +25,7 @@ void log_non_standard_event(const guid_t *sec_type,
const guid_t *fru_id, const char *fru_text,
const u8 sev, const u8 *err, const u32 len);
void log_arm_hw_error(struct cper_sec_proc_arm *err);
+
#else
static inline void
log_non_standard_event(const guid_t *sec_type,
@@ -35,4 +36,21 @@ static inline void
log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
#endif
+struct atl_err {
+ u64 addr;
+ u64 ipid;
+ u32 cpu;
+};
+
+#if IS_ENABLED(CONFIG_AMD_ATL)
+void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *));
+void amd_atl_unregister_decoder(void);
+void amd_retire_dram_row(struct atl_err *err);
+unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
+#else
+static inline void amd_retire_dram_row(struct atl_err *err) { }
+static inline unsigned long
+amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; }
+#endif /* CONFIG_AMD_ATL */
+
#endif /* __RAS_H__ */
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 0027d4c8087c..3860dbb9107a 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -37,7 +37,6 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
}
extern void rcu_sync_init(struct rcu_sync *);
-extern void rcu_sync_enter_start(struct rcu_sync *);
extern void rcu_sync_enter(struct rcu_sync *);
extern void rcu_sync_exit(struct rcu_sync *);
extern void rcu_sync_dtor(struct rcu_sync *);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 0746b1b0b663..16f519914415 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -184,9 +184,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t);
do { \
int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \
\
- if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \
+ if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \
likely(!___rttq_nesting)) { \
- rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \
+ rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \
} else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \
!READ_ONCE((t)->trc_reader_special.b.blocked)) { \
rcu_tasks_trace_qs_blkd(t); \
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 85c6df0d1bef..59b3b752394d 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -136,7 +136,8 @@ static inline unsigned int refcount_read(const refcount_t *r)
return atomic_read(&r->refs);
}
-static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
+static inline __must_check __signed_wrap
+bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
{
int old = refcount_read(r);
@@ -177,7 +178,8 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
return __refcount_add_not_zero(i, r, NULL);
}
-static inline void __refcount_add(int i, refcount_t *r, int *oldp)
+static inline __signed_wrap
+void __refcount_add(int i, refcount_t *r, int *oldp)
{
int old = atomic_fetch_add_relaxed(i, &r->refs);
@@ -256,7 +258,8 @@ static inline void refcount_inc(refcount_t *r)
__refcount_inc(r, NULL);
}
-static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
+static inline __must_check __signed_wrap
+bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
{
int old = atomic_fetch_sub_release(i, &r->refs);
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 66942d7fba7f..a365f67131ec 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -6,6 +6,12 @@
#include <linux/list.h>
#include <linux/pid.h>
+/* CLOSID, RMID value used by the default control group */
+#define RESCTRL_RESERVED_CLOSID 0
+#define RESCTRL_RESERVED_RMID 0
+
+#define RESCTRL_PICK_ANY_CPU -1
+
#ifdef CONFIG_PROC_CPU_RESCTRL
int proc_resctrl_show(struct seq_file *m,
@@ -153,7 +159,7 @@ struct resctrl_schema;
* @cache_level: Which cache level defines scope of this resource
* @cache: Cache allocation related data
* @membw: If the component has bandwidth controls, their properties.
- * @domains: All domains for this resource
+ * @domains: RCU list of all domains for this resource
* @name: Name to use in "schemata" file.
* @data_width: Character width of data when displaying
* @default_ctrl: Specifies default cache cbm or memory B/W percent.
@@ -219,36 +225,70 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, enum resctrl_conf_type type);
int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d);
void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
+void resctrl_online_cpu(unsigned int cpu);
+void resctrl_offline_cpu(unsigned int cpu);
/**
* resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid
* for this resource and domain.
* @r: resource that the counter should be read from.
* @d: domain that the counter should be read from.
+ * @closid: closid that matches the rmid. Depending on the architecture, the
+ * counter may match traffic of both @closid and @rmid, or @rmid
+ * only.
* @rmid: rmid of the counter to read.
* @eventid: eventid to read, e.g. L3 occupancy.
* @val: result of the counter read in bytes.
+ * @arch_mon_ctx: An architecture specific value from
+ * resctrl_arch_mon_ctx_alloc(), for MPAM this identifies
+ * the hardware monitor allocated for this read request.
*
- * Call from process context on a CPU that belongs to domain @d.
+ * Some architectures need to sleep when first programming some of the counters.
+ * (specifically: arm64's MPAM cache occupancy counters can return 'not ready'
+ * for a short period of time). Call from a non-migrateable process context on
+ * a CPU that belongs to domain @d. e.g. use smp_call_on_cpu() or
+ * schedule_work_on(). This function can be called with interrupts masked,
+ * e.g. using smp_call_function_any(), but may consistently return an error.
*
* Return:
* 0 on success, or -EIO, -EINVAL etc on error.
*/
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
- u32 rmid, enum resctrl_event_id eventid, u64 *val);
+ u32 closid, u32 rmid, enum resctrl_event_id eventid,
+ u64 *val, void *arch_mon_ctx);
+
+/**
+ * resctrl_arch_rmid_read_context_check() - warn about invalid contexts
+ *
+ * When built with CONFIG_DEBUG_ATOMIC_SLEEP generate a warning when
+ * resctrl_arch_rmid_read() is called with preemption disabled.
+ *
+ * The contract with resctrl_arch_rmid_read() is that if interrupts
+ * are unmasked, it can sleep. This allows NOHZ_FULL systems to use an
+ * IPI, (and fail if the call needed to sleep), while most of the time
+ * the work is scheduled, allowing the call to sleep.
+ */
+static inline void resctrl_arch_rmid_read_context_check(void)
+{
+ if (!irqs_disabled())
+ might_sleep();
+}
/**
* resctrl_arch_reset_rmid() - Reset any private state associated with rmid
* and eventid.
* @r: The domain's resource.
* @d: The rmid's domain.
+ * @closid: closid that matches the rmid. Depending on the architecture, the
+ * counter may match traffic of both @closid and @rmid, or @rmid only.
* @rmid: The rmid whose counter values should be reset.
* @eventid: The eventid whose counter values should be reset.
*
* This can be called from any CPU.
*/
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
- u32 rmid, enum resctrl_event_id eventid);
+ u32 closid, u32 rmid,
+ enum resctrl_event_id eventid);
/**
* resctrl_arch_reset_rmid_all() - Reset all private state associated with
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 410529fca18b..cdfc897f1e3c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -47,6 +47,7 @@ extern int rtnl_lock_killable(void);
extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
extern wait_queue_head_t netdev_unregistering_wq;
+extern atomic_t dev_unreg_count;
extern struct rw_semaphore pernet_ops_rwsem;
extern struct rw_semaphore net_rwsem;
@@ -171,4 +172,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group)
return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group);
}
+void netdev_set_operstate(struct net_device *dev, int newstate);
+
#endif /* __LINUX_RTNETLINK_H */
diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h
new file mode 100644
index 000000000000..309ca72f2dfb
--- /dev/null
+++ b/include/linux/rw_hint.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RW_HINT_H
+#define _LINUX_RW_HINT_H
+
+#include <linux/build_bug.h>
+#include <linux/compiler_attributes.h>
+#include <uapi/linux/fcntl.h>
+
+/* Block storage write lifetime hint values. */
+enum rw_hint {
+ WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET,
+ WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
+ WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
+ WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
+ WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
+ WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
+} __packed;
+
+/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */
+#ifndef __CHECKER__
+static_assert(sizeof(enum rw_hint) == 1);
+#endif
+
+#endif /* _LINUX_RW_HINT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cdb8ea53c365..17cb0761ff65 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -858,6 +858,8 @@ struct task_struct {
u8 rcu_tasks_idx;
int rcu_tasks_idle_cpu;
struct list_head rcu_tasks_holdout_list;
+ int rcu_tasks_exit_cpu;
+ struct list_head rcu_tasks_exit_list;
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU
@@ -920,7 +922,7 @@ struct task_struct {
unsigned sched_rt_mutex:1;
#endif
- /* Bit to tell LSMs we're in execve(): */
+ /* Bit to tell TOMOYO we're in execve(): */
unsigned in_execve:1;
unsigned in_iowait:1;
#ifndef TIF_RESTORE_SIGMASK
@@ -1642,7 +1644,7 @@ extern struct pid *cad_pid;
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */
-#define PF__HOLE__20000000 0x20000000
+#define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */
#define PF__HOLE__40000000 0x40000000
#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
index a8b28647aafc..b04a5d04dee9 100644
--- a/include/linux/sched/sd_flags.h
+++ b/include/linux/sched/sd_flags.h
@@ -117,13 +117,13 @@ SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
SD_FLAG(SD_CLUSTER, SDF_NEEDS_GROUPS)
/*
- * Domain members share CPU package resources (i.e. caches)
+ * Domain members share CPU Last Level Caches
*
* SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share
* the same cache(s).
* NEEDS_GROUPS: Caches are shared between groups.
*/
-SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
+SD_FLAG(SD_SHARE_LLC, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
/*
* Only a single load balancing instance
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 4b7664c56208..0a0e23c45406 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -735,8 +735,6 @@ static inline int thread_group_empty(struct task_struct *p)
#define delay_group_leader(p) \
(thread_group_leader(p) && !thread_group_empty(p))
-extern bool thread_group_exited(struct pid *pid);
-
extern struct sighand_struct *__lock_task_sighand(struct task_struct *task,
unsigned long *flags);
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index a6e04b4a21d7..18572c9ea724 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -38,21 +38,21 @@ extern const struct sd_flag_debug sd_flag_debug[];
#ifdef CONFIG_SCHED_SMT
static inline int cpu_smt_flags(void)
{
- return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
}
#endif
#ifdef CONFIG_SCHED_CLUSTER
static inline int cpu_cluster_flags(void)
{
- return SD_CLUSTER | SD_SHARE_PKG_RESOURCES;
+ return SD_CLUSTER | SD_SHARE_LLC;
}
#endif
#ifdef CONFIG_SCHED_MC
static inline int cpu_core_flags(void)
{
- return SD_SHARE_PKG_RESOURCES;
+ return SD_SHARE_LLC;
}
#endif
@@ -176,6 +176,7 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
+bool cpus_equal_capacity(int this_cpu, int that_cpu);
bool cpus_share_cache(int this_cpu, int that_cpu);
bool cpus_share_resources(int this_cpu, int that_cpu);
@@ -226,6 +227,11 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
{
}
+static inline bool cpus_equal_capacity(int this_cpu, int that_cpu)
+{
+ return true;
+}
+
static inline bool cpus_share_cache(int this_cpu, int that_cpu)
{
return true;
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index f2f05fb42d28..2ee94ff0320c 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -47,6 +47,10 @@ struct scmi_clock_info {
bool rate_discrete;
bool rate_changed_notifications;
bool rate_change_requested_notifications;
+ bool state_ctrl_forbidden;
+ bool rate_ctrl_forbidden;
+ bool parent_ctrl_forbidden;
+ bool extended_config;
union {
struct {
int num_rates;
@@ -72,6 +76,13 @@ struct scmi_handle;
struct scmi_device;
struct scmi_protocol_handle;
+enum scmi_clock_oem_config {
+ SCMI_CLOCK_CFG_DUTY_CYCLE = 0x1,
+ SCMI_CLOCK_CFG_PHASE,
+ SCMI_CLOCK_CFG_OEM_START = 0x80,
+ SCMI_CLOCK_CFG_OEM_END = 0xFF,
+};
+
/**
* struct scmi_clk_proto_ops - represents the various operations provided
* by SCMI Clock Protocol
@@ -104,10 +115,11 @@ struct scmi_clk_proto_ops {
int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id,
bool *enabled, bool atomic);
int (*config_oem_get)(const struct scmi_protocol_handle *ph, u32 clk_id,
- u8 oem_type, u32 *oem_val, u32 *attributes,
- bool atomic);
+ enum scmi_clock_oem_config oem_type,
+ u32 *oem_val, u32 *attributes, bool atomic);
int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id,
- u8 oem_type, u32 oem_val, bool atomic);
+ enum scmi_clock_oem_config oem_type,
+ u32 oem_val, bool atomic);
int (*parent_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 *parent_id);
int (*parent_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 parent_id);
};
@@ -953,6 +965,8 @@ struct scmi_perf_limits_report {
unsigned int domain_id;
unsigned int range_max;
unsigned int range_min;
+ unsigned long range_max_freq;
+ unsigned long range_min_freq;
};
struct scmi_perf_level_report {
@@ -960,6 +974,7 @@ struct scmi_perf_level_report {
unsigned int agent_id;
unsigned int domain_id;
unsigned int performance_level;
+ unsigned long performance_level_freq;
};
struct scmi_sensor_trip_point_report {
diff --git a/include/linux/security.h b/include/linux/security.h
index d0eb20f90b26..f249f5b9a9d7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -32,6 +32,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/sockptr.h>
+#include <linux/bpf.h>
#include <uapi/linux/lsm.h>
struct linux_binprm;
@@ -344,6 +345,8 @@ int security_inode_init_security_anon(struct inode *inode,
const struct qstr *name,
const struct inode *context_inode);
int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode);
+void security_inode_post_create_tmpfile(struct mnt_idmap *idmap,
+ struct inode *inode);
int security_inode_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry);
int security_inode_unlink(struct inode *dir, struct dentry *dentry);
@@ -361,6 +364,8 @@ int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
int security_inode_permission(struct inode *inode, int mask);
int security_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr);
+void security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ int ia_valid);
int security_inode_getattr(const struct path *path);
int security_inode_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
@@ -368,16 +373,22 @@ int security_inode_setxattr(struct mnt_idmap *idmap,
int security_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl);
+void security_inode_post_set_acl(struct dentry *dentry, const char *acl_name,
+ struct posix_acl *kacl);
int security_inode_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name);
int security_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name);
+void security_inode_post_remove_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry,
+ const char *acl_name);
void security_inode_post_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
int security_inode_getxattr(struct dentry *dentry, const char *name);
int security_inode_listxattr(struct dentry *dentry);
int security_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name);
+void security_inode_post_removexattr(struct dentry *dentry, const char *name);
int security_inode_need_killpriv(struct dentry *dentry);
int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry);
int security_inode_getsecurity(struct mnt_idmap *idmap,
@@ -392,6 +403,7 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir,
struct kernfs_node *kn);
int security_file_permission(struct file *file, int mask);
int security_file_alloc(struct file *file);
+void security_file_release(struct file *file);
void security_file_free(struct file *file);
int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int security_file_ioctl_compat(struct file *file, unsigned int cmd,
@@ -408,6 +420,7 @@ int security_file_send_sigiotask(struct task_struct *tsk,
struct fown_struct *fown, int sig);
int security_file_receive(struct file *file);
int security_file_open(struct file *file);
+int security_file_post_open(struct file *file, int mask);
int security_file_truncate(struct file *file);
int security_task_alloc(struct task_struct *task, unsigned long clone_flags);
void security_task_free(struct task_struct *task);
@@ -806,6 +819,10 @@ static inline int security_inode_create(struct inode *dir,
return 0;
}
+static inline void
+security_inode_post_create_tmpfile(struct mnt_idmap *idmap, struct inode *inode)
+{ }
+
static inline int security_inode_link(struct dentry *old_dentry,
struct inode *dir,
struct dentry *new_dentry)
@@ -879,6 +896,11 @@ static inline int security_inode_setattr(struct mnt_idmap *idmap,
return 0;
}
+static inline void
+security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ int ia_valid)
+{ }
+
static inline int security_inode_getattr(const struct path *path)
{
return 0;
@@ -899,6 +921,11 @@ static inline int security_inode_set_acl(struct mnt_idmap *idmap,
return 0;
}
+static inline void security_inode_post_set_acl(struct dentry *dentry,
+ const char *acl_name,
+ struct posix_acl *kacl)
+{ }
+
static inline int security_inode_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
@@ -913,6 +940,11 @@ static inline int security_inode_remove_acl(struct mnt_idmap *idmap,
return 0;
}
+static inline void security_inode_post_remove_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry,
+ const char *acl_name)
+{ }
+
static inline void security_inode_post_setxattr(struct dentry *dentry,
const char *name, const void *value, size_t size, int flags)
{ }
@@ -935,6 +967,10 @@ static inline int security_inode_removexattr(struct mnt_idmap *idmap,
return cap_inode_removexattr(idmap, dentry, name);
}
+static inline void security_inode_post_removexattr(struct dentry *dentry,
+ const char *name)
+{ }
+
static inline int security_inode_need_killpriv(struct dentry *dentry)
{
return cap_inode_need_killpriv(dentry);
@@ -995,6 +1031,9 @@ static inline int security_file_alloc(struct file *file)
return 0;
}
+static inline void security_file_release(struct file *file)
+{ }
+
static inline void security_file_free(struct file *file)
{ }
@@ -1062,6 +1101,11 @@ static inline int security_file_open(struct file *file)
return 0;
}
+static inline int security_file_post_open(struct file *file, int mask)
+{
+ return 0;
+}
+
static inline int security_file_truncate(struct file *file)
{
return 0;
@@ -1871,6 +1915,7 @@ int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t m
int security_path_rmdir(const struct path *dir, struct dentry *dentry);
int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode,
unsigned int dev);
+void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry);
int security_path_truncate(const struct path *path);
int security_path_symlink(const struct path *dir, struct dentry *dentry,
const char *old_name);
@@ -1905,6 +1950,10 @@ static inline int security_path_mknod(const struct path *dir, struct dentry *den
return 0;
}
+static inline void security_path_post_mknod(struct mnt_idmap *idmap,
+ struct dentry *dentry)
+{ }
+
static inline int security_path_truncate(const struct path *path)
{
return 0;
@@ -1956,6 +2005,9 @@ void security_key_free(struct key *key);
int security_key_permission(key_ref_t key_ref, const struct cred *cred,
enum key_need_perm need_perm);
int security_key_getsecurity(struct key *key, char **_buffer);
+void security_key_post_create_or_update(struct key *keyring, struct key *key,
+ const void *payload, size_t payload_len,
+ unsigned long flags, bool create);
#else
@@ -1983,6 +2035,14 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer)
return 0;
}
+static inline void security_key_post_create_or_update(struct key *keyring,
+ struct key *key,
+ const void *payload,
+ size_t payload_len,
+ unsigned long flags,
+ bool create)
+{ }
+
#endif
#endif /* CONFIG_KEYS */
@@ -2064,15 +2124,22 @@ static inline void securityfs_remove(struct dentry *dentry)
union bpf_attr;
struct bpf_map;
struct bpf_prog;
-struct bpf_prog_aux;
+struct bpf_token;
#ifdef CONFIG_SECURITY
extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size);
extern int security_bpf_map(struct bpf_map *map, fmode_t fmode);
extern int security_bpf_prog(struct bpf_prog *prog);
-extern int security_bpf_map_alloc(struct bpf_map *map);
+extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token);
extern void security_bpf_map_free(struct bpf_map *map);
-extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux);
-extern void security_bpf_prog_free(struct bpf_prog_aux *aux);
+extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token);
+extern void security_bpf_prog_free(struct bpf_prog *prog);
+extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
+ struct path *path);
+extern void security_bpf_token_free(struct bpf_token *token);
+extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+extern int security_bpf_token_capable(const struct bpf_token *token, int cap);
#else
static inline int security_bpf(int cmd, union bpf_attr *attr,
unsigned int size)
@@ -2090,7 +2157,8 @@ static inline int security_bpf_prog(struct bpf_prog *prog)
return 0;
}
-static inline int security_bpf_map_alloc(struct bpf_map *map)
+static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token)
{
return 0;
}
@@ -2098,13 +2166,33 @@ static inline int security_bpf_map_alloc(struct bpf_map *map)
static inline void security_bpf_map_free(struct bpf_map *map)
{ }
-static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
+static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token)
+{
+ return 0;
+}
+
+static inline void security_bpf_prog_free(struct bpf_prog *prog)
+{ }
+
+static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
+ struct path *path)
{
return 0;
}
-static inline void security_bpf_prog_free(struct bpf_prog_aux *aux)
+static inline void security_bpf_token_free(struct bpf_token *token)
{ }
+
+static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
+{
+ return 0;
+}
+
+static inline int security_bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ return 0;
+}
#endif /* CONFIG_SECURITY */
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index c44f4b47b945..fe41da005970 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -2,7 +2,10 @@
#ifndef _LINUX_SEQ_BUF_H
#define _LINUX_SEQ_BUF_H
-#include <linux/fs.h>
+#include <linux/bug.h>
+#include <linux/minmax.h>
+#include <linux/seq_file.h>
+#include <linux/types.h>
/*
* Trace sequences are used to allow a function to call several other functions
@@ -10,7 +13,7 @@
*/
/**
- * seq_buf - seq buffer structure
+ * struct seq_buf - seq buffer structure
* @buffer: pointer to the buffer
* @size: size of the buffer
* @len: the amount of data inside the buffer
@@ -77,10 +80,10 @@ static inline unsigned int seq_buf_used(struct seq_buf *s)
}
/**
- * seq_buf_str - get %NUL-terminated C string from seq_buf
+ * seq_buf_str - get NUL-terminated C string from seq_buf
* @s: the seq_buf handle
*
- * This makes sure that the buffer in @s is nul terminated and
+ * This makes sure that the buffer in @s is NUL-terminated and
* safe to read as a string.
*
* Note, if this is called when the buffer has overflowed, then
@@ -90,7 +93,7 @@ static inline unsigned int seq_buf_used(struct seq_buf *s)
* After this function is called, s->buffer is safe to use
* in string operations.
*
- * Returns @s->buf after making sure it is terminated.
+ * Returns: @s->buf after making sure it is terminated.
*/
static inline const char *seq_buf_str(struct seq_buf *s)
{
@@ -110,7 +113,7 @@ static inline const char *seq_buf_str(struct seq_buf *s)
* @s: the seq_buf handle
* @bufp: the beginning of the buffer is stored here
*
- * Return the number of bytes available in the buffer, or zero if
+ * Returns: the number of bytes available in the buffer, or zero if
* there's no space.
*/
static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp)
@@ -132,7 +135,7 @@ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp)
* @num: the number of bytes to commit
*
* Commit @num bytes of data written to a buffer previously acquired
- * by seq_buf_get. To signal an error condition, or that the data
+ * by seq_buf_get_buf(). To signal an error condition, or that the data
* didn't fit in the available space, pass a negative @num value.
*/
static inline void seq_buf_commit(struct seq_buf *s, int num)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 536b2581d3e2..55b1f3ba48ac 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -748,8 +748,17 @@ struct uart_driver {
void uart_write_wakeup(struct uart_port *port);
-#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \
- for_post) \
+/**
+ * enum UART_TX_FLAGS -- flags for uart_port_tx_flags()
+ *
+ * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer
+ */
+enum UART_TX_FLAGS {
+ UART_TX_NOSTOP = BIT(0),
+};
+
+#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \
+ for_test, for_post) \
({ \
struct uart_port *__port = (uport); \
struct circ_buf *xmit = &__port->state->xmit; \
@@ -777,7 +786,7 @@ void uart_write_wakeup(struct uart_port *port);
if (pending < WAKEUP_CHARS) { \
uart_write_wakeup(__port); \
\
- if (pending == 0) \
+ if (!((flags) & UART_TX_NOSTOP) && pending == 0) \
__port->ops->stop_tx(__port); \
} \
\
@@ -812,7 +821,7 @@ void uart_write_wakeup(struct uart_port *port);
*/
#define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \
unsigned int __count = (count); \
- __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \
+ __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \
__count--); \
})
@@ -826,8 +835,21 @@ void uart_write_wakeup(struct uart_port *port);
* See uart_port_tx_limited() for more details.
*/
#define uart_port_tx(port, ch, tx_ready, put_char) \
- __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({}))
+ __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({}))
+
+/**
+ * uart_port_tx_flags -- transmit helper for uart_port with flags
+ * @port: uart port
+ * @ch: variable to store a character to be written to the HW
+ * @flags: %UART_TX_NOSTOP or similar
+ * @tx_ready: can HW accept more data function
+ * @put_char: function to write a character
+ *
+ * See uart_port_tx_limited() for more details.
+ */
+#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \
+ __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({}))
/*
* Baud rate helpers.
*/
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2dde34c29203..3023bc2be6a1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -37,6 +37,7 @@
#endif
#include <net/net_debug.h>
#include <net/dropreason-core.h>
+#include <net/netmem.h>
/**
* DOC: skb checksums
@@ -359,7 +360,11 @@ extern int sysctl_max_skb_frags;
*/
#define GSO_BY_FRAGS 0xFFFF
-typedef struct bio_vec skb_frag_t;
+typedef struct skb_frag {
+ netmem_ref netmem;
+ unsigned int len;
+ unsigned int offset;
+} skb_frag_t;
/**
* skb_frag_size() - Returns the size of a skb fragment
@@ -367,7 +372,7 @@ typedef struct bio_vec skb_frag_t;
*/
static inline unsigned int skb_frag_size(const skb_frag_t *frag)
{
- return frag->bv_len;
+ return frag->len;
}
/**
@@ -377,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
*/
static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
{
- frag->bv_len = size;
+ frag->len = size;
}
/**
@@ -387,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
*/
static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
{
- frag->bv_len += delta;
+ frag->len += delta;
}
/**
@@ -397,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
*/
static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
{
- frag->bv_len -= delta;
+ frag->len -= delta;
}
/**
@@ -417,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p)
* skb_frag_foreach_page - loop over pages in a fragment
*
* @f: skb frag to operate on
- * @f_off: offset from start of f->bv_page
+ * @f_off: offset from start of f->netmem
* @f_len: length from f_off to loop over
* @p: (temp var) current page
* @p_off: (temp var) offset from start of current page,
@@ -817,9 +822,9 @@ typedef unsigned char *sk_buff_data_t;
* @decrypted: Decrypted SKB
* @slow_gro: state present at GRO time, slower prepare step required
* @mono_delivery_time: When set, skb->tstamp has the
- * delivery_time in mono clock base (i.e. EDT). Otherwise, the
- * skb->tstamp has the (rcv) timestamp at ingress and
- * delivery_time at egress.
+ * delivery_time in mono clock base (i.e., EDT) or a clock base chosen
+ * by SO_TXTIME. If zero, skb->tstamp has the (rcv) timestamp at
+ * ingress.
* @napi_id: id of the NAPI struct this skb came from
* @sender_cpu: (aka @napi_id) source CPU in XPS
* @alloc_cpu: CPU which did the skb allocation.
@@ -1232,6 +1237,24 @@ static inline bool skb_unref(struct sk_buff *skb)
return true;
}
+static inline bool skb_data_unref(const struct sk_buff *skb,
+ struct skb_shared_info *shinfo)
+{
+ int bias;
+
+ if (!skb->cloned)
+ return true;
+
+ bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
+
+ if (atomic_read(&shinfo->dataref) == bias)
+ smp_rmb();
+ else if (atomic_sub_return(bias, &shinfo->dataref))
+ return false;
+
+ return true;
+}
+
void __fix_address
kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
@@ -1266,7 +1289,6 @@ static inline void consume_skb(struct sk_buff *skb)
void __consume_stateless_skb(struct sk_buff *skb);
void __kfree_skb(struct sk_buff *skb);
-extern struct kmem_cache *skbuff_cache;
void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
@@ -2429,22 +2451,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
return skb_headlen(skb) + __skb_pagelen(skb);
}
+static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag,
+ netmem_ref netmem, int off,
+ int size)
+{
+ frag->netmem = netmem;
+ frag->offset = off;
+ skb_frag_size_set(frag, size);
+}
+
static inline void skb_frag_fill_page_desc(skb_frag_t *frag,
struct page *page,
int off, int size)
{
- frag->bv_page = page;
- frag->bv_offset = off;
- skb_frag_size_set(frag, size);
+ skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size);
+}
+
+static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo,
+ int i, netmem_ref netmem,
+ int off, int size)
+{
+ skb_frag_t *frag = &shinfo->frags[i];
+
+ skb_frag_fill_netmem_desc(frag, netmem, off, size);
}
static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
int i, struct page *page,
int off, int size)
{
- skb_frag_t *frag = &shinfo->frags[i];
-
- skb_frag_fill_page_desc(frag, page, off, size);
+ __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off,
+ size);
}
/**
@@ -2460,10 +2497,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
}
/**
- * __skb_fill_page_desc - initialise a paged fragment in an skb
+ * __skb_fill_netmem_desc - initialise a fragment in an skb
* @skb: buffer containing fragment to be initialised
- * @i: paged fragment index to initialise
- * @page: the page to use for this fragment
+ * @i: fragment index to initialise
+ * @netmem: the netmem to use for this fragment
* @off: the offset to the data with @page
* @size: the length of the data
*
@@ -2472,10 +2509,12 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
*
* Does not take any additional reference on the fragment.
*/
-static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
- struct page *page, int off, int size)
+static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i,
+ netmem_ref netmem, int off, int size)
{
- __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
+ struct page *page = netmem_to_page(netmem);
+
+ __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size);
/* Propagate page pfmemalloc to the skb if we can. The problem is
* that not all callers have unique ownership of the page but rely
@@ -2483,7 +2522,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
*/
page = compound_head(page);
if (page_is_pfmemalloc(page))
- skb->pfmemalloc = true;
+ skb->pfmemalloc = true;
+}
+
+static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
+ struct page *page, int off, int size)
+{
+ __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
+}
+
+static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i,
+ netmem_ref netmem, int off, int size)
+{
+ __skb_fill_netmem_desc(skb, i, netmem, off, size);
+ skb_shinfo(skb)->nr_frags = i + 1;
}
/**
@@ -2503,8 +2555,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
struct page *page, int off, int size)
{
- __skb_fill_page_desc(skb, i, page, off, size);
- skb_shinfo(skb)->nr_frags = i + 1;
+ skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
}
/**
@@ -2528,8 +2579,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
shinfo->nr_frags = i + 1;
}
-void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
- int size, unsigned int truesize);
+void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
+ int off, int size, unsigned int truesize);
+
+static inline void skb_add_rx_frag(struct sk_buff *skb, int i,
+ struct page *page, int off, int size,
+ unsigned int truesize)
+{
+ skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size,
+ truesize);
+}
void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
unsigned int truesize);
@@ -2642,6 +2701,8 @@ static inline void skb_put_u8(struct sk_buff *skb, u8 val)
void *skb_push(struct sk_buff *skb, unsigned int len);
static inline void *__skb_push(struct sk_buff *skb, unsigned int len)
{
+ DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
+
skb->data -= len;
skb->len += len;
return skb->data;
@@ -2650,6 +2711,8 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len)
void *skb_pull(struct sk_buff *skb, unsigned int len);
static inline void *__skb_pull(struct sk_buff *skb, unsigned int len)
{
+ DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
+
skb->len -= len;
if (unlikely(skb->len < skb->data_len)) {
#if defined(CONFIG_DEBUG_NET)
@@ -2674,6 +2737,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta);
static inline enum skb_drop_reason
pskb_may_pull_reason(struct sk_buff *skb, unsigned int len)
{
+ DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
+
if (likely(len <= skb_headlen(skb)))
return SKB_NOT_DROPPED_YET;
@@ -2846,6 +2911,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb,
skb->inner_network_header += offset;
}
+static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb)
+{
+ return skb->inner_network_header > 0;
+}
+
static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
{
return skb->head + skb->inner_mac_header;
@@ -2983,6 +3053,7 @@ static inline int skb_transport_offset(const struct sk_buff *skb)
static inline u32 skb_network_header_len(const struct sk_buff *skb)
{
+ DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb));
return skb->transport_header - skb->network_header;
}
@@ -3378,7 +3449,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page,
*/
static inline unsigned int skb_frag_off(const skb_frag_t *frag)
{
- return frag->bv_offset;
+ return frag->offset;
}
/**
@@ -3388,7 +3459,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag)
*/
static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
{
- frag->bv_offset += delta;
+ frag->offset += delta;
}
/**
@@ -3398,7 +3469,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
*/
static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
{
- frag->bv_offset = offset;
+ frag->offset = offset;
}
/**
@@ -3409,7 +3480,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
static inline void skb_frag_off_copy(skb_frag_t *fragto,
const skb_frag_t *fragfrom)
{
- fragto->bv_offset = fragfrom->bv_offset;
+ fragto->offset = fragfrom->offset;
}
/**
@@ -3420,7 +3491,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto,
*/
static inline struct page *skb_frag_page(const skb_frag_t *frag)
{
- return frag->bv_page;
+ return netmem_to_page(frag->netmem);
}
/**
@@ -3446,6 +3517,10 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
}
+int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
+ unsigned int headroom);
+int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
+ struct bpf_prog *prog);
bool napi_pp_put_page(struct page *page, bool napi_safe);
static inline void
@@ -3524,7 +3599,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
static inline void skb_frag_page_copy(skb_frag_t *fragto,
const skb_frag_t *fragfrom)
{
- fragto->bv_page = fragfrom->bv_page;
+ fragto->netmem = fragfrom->netmem;
}
bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 888a4b217829..e65ec3fd2799 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -505,12 +505,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
return !!psock->saved_data_ready;
}
-static inline bool sk_is_udp(const struct sock *sk)
-{
- return sk->sk_type == SOCK_DGRAM &&
- sk->sk_protocol == IPPROTO_UDP;
-}
-
#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
#define BPF_F_STRPARSER (1UL << 1)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b5f5ee8308d0..e53cbfa18325 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -21,29 +21,69 @@
#include <linux/cleanup.h>
#include <linux/hash.h>
+enum _slab_flag_bits {
+ _SLAB_CONSISTENCY_CHECKS,
+ _SLAB_RED_ZONE,
+ _SLAB_POISON,
+ _SLAB_KMALLOC,
+ _SLAB_HWCACHE_ALIGN,
+ _SLAB_CACHE_DMA,
+ _SLAB_CACHE_DMA32,
+ _SLAB_STORE_USER,
+ _SLAB_PANIC,
+ _SLAB_TYPESAFE_BY_RCU,
+ _SLAB_TRACE,
+#ifdef CONFIG_DEBUG_OBJECTS
+ _SLAB_DEBUG_OBJECTS,
+#endif
+ _SLAB_NOLEAKTRACE,
+ _SLAB_NO_MERGE,
+#ifdef CONFIG_FAILSLAB
+ _SLAB_FAILSLAB,
+#endif
+#ifdef CONFIG_MEMCG_KMEM
+ _SLAB_ACCOUNT,
+#endif
+#ifdef CONFIG_KASAN_GENERIC
+ _SLAB_KASAN,
+#endif
+ _SLAB_NO_USER_FLAGS,
+#ifdef CONFIG_KFENCE
+ _SLAB_SKIP_KFENCE,
+#endif
+#ifndef CONFIG_SLUB_TINY
+ _SLAB_RECLAIM_ACCOUNT,
+#endif
+ _SLAB_OBJECT_POISON,
+ _SLAB_CMPXCHG_DOUBLE,
+ _SLAB_FLAGS_LAST_BIT
+};
+
+#define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr)))
+#define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U))
/*
* Flags to pass to kmem_cache_create().
* The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op
*/
/* DEBUG: Perform (expensive) checks on alloc/free */
-#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U)
+#define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS)
/* DEBUG: Red zone objs in a cache */
-#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U)
+#define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE)
/* DEBUG: Poison objects */
-#define SLAB_POISON ((slab_flags_t __force)0x00000800U)
+#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON)
/* Indicate a kmalloc slab */
-#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U)
+#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC)
/* Align objs on cache lines */
-#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U)
+#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
/* Use GFP_DMA memory */
-#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U)
+#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA)
/* Use GFP_DMA32 memory */
-#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U)
+#define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32)
/* DEBUG: Store the last owner for bug hunting */
-#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U)
+#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER)
/* Panic if kmem_cache_create() fails */
-#define SLAB_PANIC ((slab_flags_t __force)0x00040000U)
+#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
/*
* SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
*
@@ -95,21 +135,19 @@
* Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
*/
/* Defer freeing slabs to RCU */
-#define SLAB_TYPESAFE_BY_RCU ((slab_flags_t __force)0x00080000U)
-/* Spread some memory over cpuset */
-#define SLAB_MEM_SPREAD ((slab_flags_t __force)0x00100000U)
+#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
/* Trace allocations and frees */
-#define SLAB_TRACE ((slab_flags_t __force)0x00200000U)
+#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE)
/* Flag to prevent checks on free */
#ifdef CONFIG_DEBUG_OBJECTS
-# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00400000U)
+# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS)
#else
-# define SLAB_DEBUG_OBJECTS 0
+# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED
#endif
/* Avoid kmemleak tracing */
-#define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000U)
+#define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE)
/*
* Prevent merging with compatible kmem caches. This flag should be used
@@ -121,25 +159,25 @@
* - performance critical caches, should be very rare and consulted with slab
* maintainers, and not used together with CONFIG_SLUB_TINY
*/
-#define SLAB_NO_MERGE ((slab_flags_t __force)0x01000000U)
+#define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE)
/* Fault injection mark */
#ifdef CONFIG_FAILSLAB
-# define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000U)
+# define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB)
#else
-# define SLAB_FAILSLAB 0
+# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED
#endif
/* Account to memcg */
#ifdef CONFIG_MEMCG_KMEM
-# define SLAB_ACCOUNT ((slab_flags_t __force)0x04000000U)
+# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
#else
-# define SLAB_ACCOUNT 0
+# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED
#endif
#ifdef CONFIG_KASAN_GENERIC
-#define SLAB_KASAN ((slab_flags_t __force)0x08000000U)
+#define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN)
#else
-#define SLAB_KASAN 0
+#define SLAB_KASAN __SLAB_FLAG_UNUSED
#endif
/*
@@ -147,20 +185,20 @@
* Intended for caches created for self-tests so they have only flags
* specified in the code and other flags are ignored.
*/
-#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U)
+#define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS)
#ifdef CONFIG_KFENCE
-#define SLAB_SKIP_KFENCE ((slab_flags_t __force)0x20000000U)
+#define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE)
#else
-#define SLAB_SKIP_KFENCE 0
+#define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED
#endif
/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
#ifndef CONFIG_SLUB_TINY
-#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U)
+#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT)
#else
-#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0)
+#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED
#endif
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index e87520dc2959..fcd61dfe2af3 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -105,6 +105,12 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func,
on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
}
+/*
+ * Architecture specific boot CPU setup. Defined as empty weak function in
+ * init/main.c. Architectures can override it.
+ */
+void smp_prepare_boot_cpu(void);
+
#ifdef CONFIG_SMP
#include <linux/preempt.h>
@@ -171,12 +177,6 @@ void generic_smp_call_function_single_interrupt(void);
#define generic_smp_call_function_interrupt \
generic_smp_call_function_single_interrupt
-/*
- * Mark the boot cpu "online" so that it can call console drivers in
- * printk() and can access its per-cpu storage.
- */
-void smp_prepare_boot_cpu(void);
-
extern unsigned int setup_max_cpus;
extern void __init setup_nr_cpu_ids(void);
extern void __init smp_init(void);
@@ -203,7 +203,6 @@ static inline void up_smp_call_function(smp_call_func_t func, void *info)
(up_smp_call_function(func, info))
static inline void smp_send_reschedule(int cpu) { }
-#define smp_prepare_boot_cpu() do {} while (0)
#define smp_call_function_many(mask, func, info, wait) \
(up_smp_call_function(func, info))
static inline void call_function_init(void) { }
@@ -218,6 +217,8 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
static inline void kick_all_cpus_sync(void) { }
static inline void wake_up_all_idle_cpus(void) { }
+#define setup_max_cpus 0
+
#ifdef CONFIG_UP_LATE_INIT
extern void __init up_late_init(void);
static inline void smp_init(void) { up_late_init(); }
@@ -261,7 +262,7 @@ static inline int get_boot_cpu_id(void)
* regular asm read for the stable.
*/
#ifndef __smp_processor_id
-#define __smp_processor_id(x) raw_smp_processor_id(x)
+#define __smp_processor_id() raw_smp_processor_id()
#endif
#ifdef CONFIG_DEBUG_PREEMPT
diff --git a/include/linux/soc/andes/irq.h b/include/linux/soc/andes/irq.h
new file mode 100644
index 000000000000..edc3182d6e66
--- /dev/null
+++ b/include/linux/soc/andes/irq.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 Andes Technology Corporation
+ */
+#ifndef __ANDES_IRQ_H
+#define __ANDES_IRQ_H
+
+/* Andes PMU irq number */
+#define ANDES_RV_IRQ_PMOVI 18
+#define ANDES_RV_IRQ_LAST ANDES_RV_IRQ_PMOVI
+#define ANDES_SLI_CAUSE_BASE 256
+
+/* Andes PMU related registers */
+#define ANDES_CSR_SLIE 0x9c4
+#define ANDES_CSR_SLIP 0x9c5
+#define ANDES_CSR_SCOUNTEROF 0x9d4
+
+#endif /* __ANDES_IRQ_H */
diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h
index be98aebcb3e1..7161a3183eda 100644
--- a/include/linux/soc/qcom/apr.h
+++ b/include/linux/soc/qcom/apr.h
@@ -9,7 +9,7 @@
#include <dt-bindings/soc/qcom,apr.h>
#include <dt-bindings/soc/qcom,gpr.h>
-extern struct bus_type aprbus;
+extern const struct bus_type aprbus;
#define APR_HDR_LEN(hdr_len) ((hdr_len)/4)
diff --git a/include/linux/soc/qcom/qcom-pbs.h b/include/linux/soc/qcom/qcom-pbs.h
new file mode 100644
index 000000000000..8a46209ccf13
--- /dev/null
+++ b/include/linux/soc/qcom/qcom-pbs.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _QCOM_PBS_H
+#define _QCOM_PBS_H
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct device_node;
+struct pbs_dev;
+
+#if IS_ENABLED(CONFIG_QCOM_PBS)
+int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap);
+struct pbs_dev *get_pbs_client_device(struct device *client_dev);
+#else
+static inline int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap)
+{
+ return -ENODEV;
+}
+
+static inline struct pbs_dev *get_pbs_client_device(struct device *client_dev)
+{
+ return ERR_PTR(-ENODEV);
+}
+#endif
+
+#endif
diff --git a/include/linux/soc/samsung/exynos-pmu.h b/include/linux/soc/samsung/exynos-pmu.h
index a4f5516cc956..2bd9d12d9a52 100644
--- a/include/linux/soc/samsung/exynos-pmu.h
+++ b/include/linux/soc/samsung/exynos-pmu.h
@@ -10,6 +10,7 @@
#define __LINUX_SOC_EXYNOS_PMU_H
struct regmap;
+struct device_node;
enum sys_powerdown {
SYS_AFTR,
@@ -20,12 +21,20 @@ enum sys_powerdown {
extern void exynos_sys_powerdown_conf(enum sys_powerdown mode);
#ifdef CONFIG_EXYNOS_PMU
-extern struct regmap *exynos_get_pmu_regmap(void);
+struct regmap *exynos_get_pmu_regmap(void);
+struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np,
+ const char *propname);
#else
static inline struct regmap *exynos_get_pmu_regmap(void)
{
return ERR_PTR(-ENODEV);
}
+
+static inline struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np,
+ const char *propname)
+{
+ return ERR_PTR(-ENODEV);
+}
#endif
#endif /* __LINUX_SOC_EXYNOS_PMU_H */
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 0b9ecd8cf979..110978dc9af1 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -13,6 +13,7 @@ struct nlmsghdr;
struct sock;
struct sock_diag_handler {
+ struct module *owner;
__u8 family;
int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
int (*get_info)(struct sk_buff *skb, struct sock *sk);
@@ -22,8 +23,13 @@ struct sock_diag_handler {
int sock_diag_register(const struct sock_diag_handler *h);
void sock_diag_unregister(const struct sock_diag_handler *h);
-void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
-void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
+struct sock_diag_inet_compat {
+ struct module *owner;
+ int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh);
+};
+
+void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr);
+void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr);
u64 __sock_gen_cookie(struct sock *sk);
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 471fe2ff9066..600fbd5daf68 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -21,7 +21,7 @@
#include <uapi/linux/spi/spi.h>
/* Max no. of CS supported per spi device */
-#define SPI_CS_CNT_MAX 4
+#define SPI_CS_CNT_MAX 16
struct dma_chan;
struct software_node;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index dee5ad6e48c5..dfa1828cd756 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -127,6 +127,7 @@ struct stmmac_est {
u32 gcl_unaligned[EST_GCL];
u32 gcl[EST_GCL];
u32 gcl_size;
+ u32 max_sdu[MTL_MAX_TX_QUEUES];
};
struct stmmac_rxq_cfg {
diff --git a/include/linux/string.h b/include/linux/string.h
index ab148d8dbfc1..9ba8b4597009 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_STRING_H_
#define _LINUX_STRING_H_
+#include <linux/args.h>
#include <linux/array_size.h>
#include <linux/compiler.h> /* for inline */
#include <linux/types.h> /* for size_t */
@@ -66,12 +67,79 @@ extern char * strcpy(char *,const char *);
#ifndef __HAVE_ARCH_STRNCPY
extern char * strncpy(char *,const char *, __kernel_size_t);
#endif
-#ifndef __HAVE_ARCH_STRSCPY
-ssize_t strscpy(char *, const char *, size_t);
-#endif
+ssize_t sized_strscpy(char *, const char *, size_t);
+
+/*
+ * The 2 argument style can only be used when dst is an array with a
+ * known size.
+ */
+#define __strscpy0(dst, src, ...) \
+ sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst))
+#define __strscpy1(dst, src, size) sized_strscpy(dst, src, size)
+
+#define __strscpy_pad0(dst, src, ...) \
+ sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst))
+#define __strscpy_pad1(dst, src, size) sized_strscpy_pad(dst, src, size)
+
+/**
+ * strscpy - Copy a C-string into a sized buffer
+ * @dst: Where to copy the string to
+ * @src: Where to copy the string from
+ * @...: Size of destination buffer (optional)
+ *
+ * Copy the source string @src, or as much of it as fits, into the
+ * destination @dst buffer. The behavior is undefined if the string
+ * buffers overlap. The destination @dst buffer is always NUL terminated,
+ * unless it's zero-sized.
+ *
+ * The size argument @... is only required when @dst is not an array, or
+ * when the copy needs to be smaller than sizeof(@dst).
+ *
+ * Preferred to strncpy() since it always returns a valid string, and
+ * doesn't unnecessarily force the tail of the destination buffer to be
+ * zero padded. If padding is desired please use strscpy_pad().
+ *
+ * Returns the number of characters copied in @dst (not including the
+ * trailing %NUL) or -E2BIG if @size is 0 or the copy from @src was
+ * truncated.
+ */
+#define strscpy(dst, src, ...) \
+ CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__)
+
+#define sized_strscpy_pad(dest, src, count) ({ \
+ char *__dst = (dest); \
+ const char *__src = (src); \
+ const size_t __count = (count); \
+ ssize_t __wrote; \
+ \
+ __wrote = sized_strscpy(__dst, __src, __count); \
+ if (__wrote >= 0 && __wrote < __count) \
+ memset(__dst + __wrote + 1, 0, __count - __wrote - 1); \
+ __wrote; \
+})
-/* Wraps calls to strscpy()/memset(), no arch specific code required */
-ssize_t strscpy_pad(char *dest, const char *src, size_t count);
+/**
+ * strscpy_pad() - Copy a C-string into a sized buffer
+ * @dst: Where to copy the string to
+ * @src: Where to copy the string from
+ * @...: Size of destination buffer
+ *
+ * Copy the string, or as much of it as fits, into the dest buffer. The
+ * behavior is undefined if the string buffers overlap. The destination
+ * buffer is always %NUL terminated, unless it's zero-sized.
+ *
+ * If the source string is shorter than the destination buffer, the
+ * remaining bytes in the buffer will be filled with %NUL bytes.
+ *
+ * For full explanation of why you may want to consider using the
+ * 'strscpy' functions please see the function docstring for strscpy().
+ *
+ * Returns:
+ * * The number of characters copied (not including the trailing %NULs)
+ * * -E2BIG if count is 0 or @src was truncated.
+ */
+#define strscpy_pad(dst, src, ...) \
+ CONCATENATE(__strscpy_pad, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__)
#ifndef __HAVE_ARCH_STRCAT
extern char * strcat(char *, const char *);
@@ -217,10 +285,19 @@ extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
+extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp);
+/* lib/argv_split.c */
extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
extern void argv_free(char **argv);
+/* lib/cmdline.c */
+extern int get_option(char **str, int *pint);
+extern char *get_options(const char *str, int nints, int *ints);
+extern unsigned long long memparse(const char *ptr, char **retptr);
+extern bool parse_option_str(const char *str, const char *option);
+extern char *next_arg(char *args, char **param, char **val);
+
extern bool sysfs_streq(const char *s1, const char *s2);
int match_string(const char * const *array, size_t n, const char *string);
int __sysfs_match_string(const char * const *array, size_t n, const char *s);
diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index 3c1091941eb8..d9ebe20229f8 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -42,4 +42,15 @@ static inline const char *str_yes_no(bool v)
return v ? "yes" : "no";
}
+/**
+ * str_plural - Return the simple pluralization based on English counts
+ * @num: Number used for deciding pluralization
+ *
+ * If @num is 1, returns empty string, otherwise returns "s".
+ */
+static inline const char *str_plural(size_t num)
+{
+ return num == 1 ? "" : "s";
+}
+
#endif
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 58fb1f90eda5..e93fbb5b0c01 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -17,14 +17,18 @@ static inline bool string_is_terminated(const char *s, int len)
return memchr(s, '\0', len) ? true : false;
}
-/* Descriptions of the types of units to
- * print in */
+/* Descriptions of the types of units to print in */
enum string_size_units {
STRING_UNITS_10, /* use powers of 10^3 (standard SI) */
STRING_UNITS_2, /* use binary powers of 2^10 */
+ STRING_UNITS_MASK = BIT(0),
+
+ /* Modifiers */
+ STRING_UNITS_NO_SPACE = BIT(30),
+ STRING_UNITS_NO_BYTES = BIT(31),
};
-int string_get_size(u64 size, u64 blk_size, enum string_size_units units,
+int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
char *buf, int len);
int parse_int_array_user(const char __user *from, size_t count, int **array);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 67cf1c9efd80..23617da0e565 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -339,7 +339,6 @@ struct svc_program {
const struct svc_version **pg_vers; /* version array */
char * pg_name; /* service name */
char * pg_class; /* class name: services sharing authentication */
- struct svc_stat * pg_stats; /* rpc statistics */
enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp);
__be32 (*pg_init_request)(struct svc_rqst *,
const struct svc_program *,
@@ -411,7 +410,9 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp,
void svc_rqst_release_pages(struct svc_rqst *rqstp);
void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *);
-struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
+struct svc_serv * svc_create_pooled(struct svc_program *prog,
+ struct svc_stat *stats,
+ unsigned int bufsize,
int (*threadfn)(void *data));
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
int svc_pool_stats_open(struct svc_info *si, struct file *file);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index e7595ae62fe2..24cd199dd6f3 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -203,6 +203,30 @@ struct svc_rdma_recv_ctxt {
struct page *rc_pages[RPCSVC_MAXPAGES];
};
+/*
+ * State for sending a Write chunk.
+ * - Tracks progress of writing one chunk over all its segments
+ * - Stores arguments for the SGL constructor functions
+ */
+struct svc_rdma_write_info {
+ struct svcxprt_rdma *wi_rdma;
+ struct list_head wi_list;
+
+ const struct svc_rdma_chunk *wi_chunk;
+
+ /* write state of this chunk */
+ unsigned int wi_seg_off;
+ unsigned int wi_seg_no;
+
+ /* SGL constructor arguments */
+ const struct xdr_buf *wi_xdr;
+ unsigned char *wi_base;
+ unsigned int wi_next_off;
+
+ struct svc_rdma_chunk_ctxt wi_cc;
+ struct work_struct wi_work;
+};
+
struct svc_rdma_send_ctxt {
struct llist_node sc_node;
struct rpc_rdma_cid sc_cid;
@@ -210,9 +234,15 @@ struct svc_rdma_send_ctxt {
struct svcxprt_rdma *sc_rdma;
struct ib_send_wr sc_send_wr;
+ struct ib_send_wr *sc_wr_chain;
+ int sc_sqecount;
struct ib_cqe sc_cqe;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
+
+ struct list_head sc_write_info_list;
+ struct svc_rdma_write_info sc_reply_info;
+
void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no;
@@ -236,18 +266,27 @@ extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_rw.c */
+extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc);
extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc);
extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir);
-extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr);
-extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_recv_ctxt *rctxt,
- const struct xdr_buf *xdr);
+extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt);
+extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt);
+extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr);
+extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr);
extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head);
@@ -258,8 +297,8 @@ extern struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
-extern int svc_rdma_send(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt);
+extern int svc_rdma_post_send(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
const struct svc_rdma_pcl *write_pcl,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4db00ddad261..378ab1cd23bd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -298,7 +298,7 @@ struct swap_info_struct {
unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */
struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */
struct rb_root swap_extent_root;/* root of the swap extent rbtree */
- struct bdev_handle *bdev_handle;/* open handle of the bdev */
+ struct file *bdev_file; /* open handle of the bdev */
struct block_device *bdev; /* swap device or bdev of swap file */
struct file *swap_file; /* seldom referenced */
unsigned int old_block_size; /* seldom referenced */
@@ -549,6 +549,11 @@ static inline int swap_duplicate(swp_entry_t swp)
return 0;
}
+static inline int swapcache_prepare(swp_entry_t swp)
+{
+ return 0;
+}
+
static inline void swap_free(swp_entry_t swp)
{
}
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ecde0312dd52..ea23097e351f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -120,6 +120,8 @@ struct io_tlb_pool {
* debugfs.
* @used_hiwater: The high water mark for total_used. Used only for reporting
* in debugfs.
+ * @transient_nslabs: The total number of slots in all transient pools that
+ * are currently used across all areas.
*/
struct io_tlb_mem {
struct io_tlb_pool defpool;
@@ -137,6 +139,7 @@ struct io_tlb_mem {
#ifdef CONFIG_DEBUG_FS
atomic_long_t total_used;
atomic_long_t used_hiwater;
+ atomic_long_t transient_nslabs;
#endif
};
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cdba4d0c6d4a..77eb9b0e7685 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -128,6 +128,7 @@ struct mnt_id_req;
#define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL))
#define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a
#define __SC_CAST(t, a) (__force t) a
+#define __SC_TYPE(t, a) t
#define __SC_ARGS(t, a) a
#define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long))
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 89b290d8c8dc..55399ee2a57e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -221,8 +221,10 @@ struct tcp_sock {
u32 lost_out; /* Lost packets */
u32 sacked_out; /* SACK'd packets */
u16 tcp_header_len; /* Bytes of tcp header to send */
+ u8 scaling_ratio; /* see tcp_win_from_space() */
u8 chrono_type : 2, /* current chronograph type */
repair : 1,
+ tcp_usec_ts : 1, /* TSval values in usec */
is_sack_reneg:1, /* in recovery from loss with SACK reneg? */
is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
__cacheline_group_end(tcp_sock_read_txrx);
@@ -262,10 +264,10 @@ struct tcp_sock {
u32 pushed_seq; /* Last pushed seq, required to talk to windows */
u32 lsndtime;
u32 mdev_us; /* medium deviation */
+ u32 rtt_seq; /* sequence number to update rttvar */
u64 tcp_wstamp_ns; /* departure time for next sent data packet */
u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
u64 tcp_mstamp; /* most recent packet received/sent */
- u32 rtt_seq; /* sequence number to update rttvar */
struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */
struct sk_buff *highest_sack; /* skb just after the highest
* skb with SACKed bit set
@@ -302,7 +304,7 @@ struct tcp_sock {
__cacheline_group_end(tcp_sock_write_txrx);
/* RX read-write hotpath cache lines */
- __cacheline_group_begin(tcp_sock_write_rx);
+ __cacheline_group_begin(tcp_sock_write_rx) __aligned(8);
u64 bytes_received;
/* RFC4898 tcpEStatsAppHCThruOctetsReceived
* sum(delta(rcv_nxt)), or how many bytes
@@ -348,11 +350,9 @@ struct tcp_sock {
u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups
* total number of DSACK blocks received
*/
- u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */
u32 compressed_ack_rcv_nxt;
struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
- u8 scaling_ratio; /* see tcp_win_from_space() */
/* Information of the most recently (s)acked skb */
struct tcp_rack {
u64 mstamp; /* (Re)sent time of the skb */
@@ -368,8 +368,7 @@ struct tcp_sock {
u8 compressed_ack;
u8 dup_ack_counter:2,
tlp_retrans:1, /* TLP is a retransmission */
- tcp_usec_ts:1, /* TSval values in usec */
- unused:4;
+ unused:5;
u8 thin_lto : 1,/* Use linear timeouts for thin streams */
recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
@@ -384,12 +383,12 @@ struct tcp_sock {
syn_fastopen_ch:1, /* Active TFO re-enabling probe */
syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
+ u8 keepalive_probes; /* num of allowed keep alive probes */
u32 tcp_tx_delay; /* delay (in usec) added to TX packets */
/* RTT measurement */
u32 mdev_max_us; /* maximal mdev for the last rtt period */
- u8 keepalive_probes; /* num of allowed keep alive probes */
u32 reord_seen; /* number of data packet reordering events */
/*
@@ -402,6 +401,7 @@ struct tcp_sock {
u32 prior_cwnd; /* cwnd right before starting loss recovery */
u32 prr_delivered; /* Number of newly delivered packets to
* receiver in Recovery. */
+ u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */
struct hrtimer pacing_timer;
struct hrtimer compressed_ack_timer;
@@ -477,8 +477,8 @@ struct tcp_sock {
bool is_mptcp;
#endif
#if IS_ENABLED(CONFIG_SMC)
- bool (*smc_hs_congested)(const struct sock *sk);
bool syn_smc; /* SYN includes SMC */
+ bool (*smc_hs_congested)(const struct sock *sk);
#endif
#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 911ddf92dcee..71632e3c5f18 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -482,7 +482,7 @@ static inline bool tee_param_is_memref(struct tee_param *param)
}
}
-extern struct bus_type tee_bus_type;
+extern const struct bus_type tee_bus_type;
/**
* struct tee_client_device - tee based device
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 716d17f31c45..4924a33700b7 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -12,6 +12,7 @@
#include <linux/cpumask.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>
+#include <linux/static_key.h>
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern void __init tick_init(void);
@@ -19,16 +20,22 @@ extern void __init tick_init(void);
extern void tick_suspend_local(void);
/* Should be core only, but XEN resume magic and ARM BL switcher require it */
extern void tick_resume_local(void);
-extern void tick_handover_do_timer(void);
extern void tick_cleanup_dead_cpu(int cpu);
#else /* CONFIG_GENERIC_CLOCKEVENTS */
static inline void tick_init(void) { }
static inline void tick_suspend_local(void) { }
static inline void tick_resume_local(void) { }
-static inline void tick_handover_do_timer(void) { }
static inline void tick_cleanup_dead_cpu(int cpu) { }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
+#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU)
+extern int tick_cpu_dying(unsigned int cpu);
+extern void tick_assert_timekeeping_handover(void);
+#else
+#define tick_cpu_dying NULL
+static inline void tick_assert_timekeeping_handover(void) { }
+#endif
+
#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND)
extern void tick_freeze(void);
extern void tick_unfreeze(void);
@@ -63,18 +70,14 @@ enum tick_broadcast_state {
TICK_BROADCAST_ENTER,
};
+extern struct static_key_false arch_needs_tick_broadcast;
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern void tick_broadcast_control(enum tick_broadcast_mode mode);
#else
static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { }
#endif /* BROADCAST */
-#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
-extern void tick_offline_cpu(unsigned int cpu);
-#else
-static inline void tick_offline_cpu(unsigned int cpu) { }
-#endif
-
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state);
#else
@@ -164,9 +167,16 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
#endif /* !CONFIG_NO_HZ_COMMON */
+/*
+ * Mask of CPUs that are nohz_full.
+ *
+ * Users should be guarded by CONFIG_NO_HZ_FULL or a tick_nohz_full_cpu()
+ * check.
+ */
+extern cpumask_var_t tick_nohz_full_mask;
+
#ifdef CONFIG_NO_HZ_FULL
extern bool tick_nohz_full_running;
-extern cpumask_var_t tick_nohz_full_mask;
static inline bool tick_nohz_full_enabled(void)
{
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7c43e98cf211..7e50cbd97f86 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -268,15 +268,17 @@ struct system_device_crosststamp {
};
/**
- * struct system_counterval_t - system counter value with the pointer to the
+ * struct system_counterval_t - system counter value with the ID of the
* corresponding clocksource
* @cycles: System counter value
- * @cs: Clocksource corresponding to system counter value. Used by
- * timekeeping code to verify comparibility of two cycle values
+ * @cs_id: Clocksource ID corresponding to system counter value. Used by
+ * timekeeping code to verify comparability of two cycle values.
+ * The default ID, CSID_GENERIC, does not identify a specific
+ * clocksource.
*/
struct system_counterval_t {
u64 cycles;
- struct clocksource *cs;
+ enum clocksource_ids cs_id;
};
/*
diff --git a/include/linux/timer.h b/include/linux/timer.h
index f18a2f1eb79e..14a633ba61d6 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -36,16 +36,10 @@
* workqueue locking issues. It's not meant for executing random crap
* with interrupts disabled. Abuse is monitored!
*
- * @TIMER_PINNED: A pinned timer will not be affected by any timer
- * placement heuristics (like, NOHZ) and will always expire on the CPU
- * on which the timer was enqueued.
- *
- * Note: Because enqueuing of timers can migrate the timer from one
- * CPU to another, pinned timers are not guaranteed to stay on the
- * initialy selected CPU. They move to the CPU on which the enqueue
- * function is invoked via mod_timer() or add_timer(). If the timer
- * should be placed on a particular CPU, then add_timer_on() has to be
- * used.
+ * @TIMER_PINNED: A pinned timer will always expire on the CPU on which the
+ * timer was enqueued. When a particular CPU is required, add_timer_on()
+ * has to be used. Enqueue via mod_timer() and add_timer() is always done
+ * on the local CPU.
*/
#define TIMER_CPUMASK 0x0003FFFF
#define TIMER_MIGRATING 0x00040000
@@ -165,6 +159,8 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires);
#define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1)
extern void add_timer(struct timer_list *timer);
+extern void add_timer_local(struct timer_list *timer);
+extern void add_timer_global(struct timer_list *timer);
extern int try_to_del_timer_sync(struct timer_list *timer);
extern int timer_delete_sync(struct timer_list *timer);
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 9ec229dfddaa..1ef95c0287f0 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -9,9 +9,15 @@
/*
* Trace sequences are used to allow a function to call several other functions
* to create a string of data to use.
+ *
+ * Have the trace seq to be 8K which is typically PAGE_SIZE * 2 on
+ * most architectures. The TRACE_SEQ_BUFFER_SIZE (which is
+ * TRACE_SEQ_SIZE minus the other fields of trace_seq), is the
+ * max size the output of a trace event may be.
*/
-#define TRACE_SEQ_BUFFER_SIZE (PAGE_SIZE * 2 - \
+#define TRACE_SEQ_SIZE 8192
+#define TRACE_SEQ_BUFFER_SIZE (TRACE_SEQ_SIZE - \
(sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int)))
struct trace_seq {
diff --git a/include/linux/udp.h b/include/linux/udp.h
index d04188714dca..3748e82b627b 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -92,6 +92,9 @@ struct udp_sock {
/* This fields follows rcvbuf value, and is touched by udp_recvmsg */
int forward_threshold;
+
+ /* Cache friendly copy of sk->sk_peek_off >= 0 */
+ bool peeking_with_offset;
};
#define udp_test_bit(nr, sk) \
@@ -109,6 +112,13 @@ struct udp_sock {
#define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)
+static inline int udp_set_peek_off(struct sock *sk, int val)
+{
+ sk_set_peek_off(sk, val);
+ WRITE_ONCE(udp_sk(sk)->peeking_with_offset, val >= 0);
+ return 0;
+}
+
static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
{
udp_assign_bit(NO_CHECK6_TX, sk, val);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index bea9c89922d9..00cebe2b70de 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -40,7 +40,6 @@ struct iov_iter_state {
struct iov_iter {
u8 iter_type;
- bool copy_mc;
bool nofault;
bool data_source;
size_t iov_offset;
@@ -248,22 +247,8 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
#ifdef CONFIG_ARCH_HAS_COPY_MC
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
-static inline void iov_iter_set_copy_mc(struct iov_iter *i)
-{
- i->copy_mc = true;
-}
-
-static inline bool iov_iter_is_copy_mc(const struct iov_iter *i)
-{
- return i->copy_mc;
-}
#else
#define _copy_mc_to_iter _copy_to_iter
-static inline void iov_iter_set_copy_mc(struct iov_iter *i) { }
-static inline bool iov_iter_is_copy_mc(const struct iov_iter *i)
-{
- return false;
-}
#endif
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
@@ -355,7 +340,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
WARN_ON(direction & ~(READ | WRITE));
*i = (struct iov_iter) {
.iter_type = ITER_UBUF,
- .copy_mc = false,
.data_source = direction,
.ubuf = buf,
.count = count,
diff --git a/include/linux/units.h b/include/linux/units.h
index 45110daaf8d3..00e15de33eca 100644
--- a/include/linux/units.h
+++ b/include/linux/units.h
@@ -24,10 +24,13 @@
#define NANOHZ_PER_HZ 1000000000UL
#define MICROHZ_PER_HZ 1000000UL
#define MILLIHZ_PER_HZ 1000UL
+
#define HZ_PER_KHZ 1000UL
-#define KHZ_PER_MHZ 1000UL
#define HZ_PER_MHZ 1000000UL
+#define KHZ_PER_MHZ 1000UL
+#define KHZ_PER_GHZ 1000000UL
+
#define MILLIWATT_PER_WATT 1000UL
#define MICROWATT_PER_MILLIWATT 1000UL
#define MICROWATT_PER_WATT 1000000UL
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index a771ccc038ac..6532beb587b1 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -236,7 +236,6 @@ struct usb_ep {
unsigned max_streams:16;
unsigned mult:2;
unsigned maxburst:5;
- unsigned fifo_mode:1;
u8 address;
const struct usb_endpoint_descriptor *desc;
const struct usb_ss_ep_comp_descriptor *comp_desc;
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index cd77fc6095a1..ac95e7c89df5 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -55,7 +55,7 @@ struct giveback_urb_bh {
bool high_prio;
spinlock_t lock;
struct list_head head;
- struct tasklet_struct bh;
+ struct work_struct bh;
struct usb_host_endpoint *completing_ep;
};
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index c720be70c8dd..0f72c85a377b 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -35,6 +35,7 @@ struct iov_iter; /* in uio.h */
#else
#define VM_DEFER_KMEMLEAK 0
#endif
+#define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */
/* bits [20..32] reserved for arch specific ioremap internals */
@@ -232,6 +233,10 @@ static inline bool is_vm_area_hugepages(const void *addr)
}
#ifdef CONFIG_MMU
+int vm_area_map_pages(struct vm_struct *area, unsigned long start,
+ unsigned long end, struct page **pages);
+void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
+ unsigned long end);
void vunmap_range(unsigned long addr, unsigned long end);
static inline void set_vm_flush_reset_perms(void *addr)
{
diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h
new file mode 100644
index 000000000000..f6f8f83b15b0
--- /dev/null
+++ b/include/linux/wordpart.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_WORDPART_H
+#define _LINUX_WORDPART_H
+
+/**
+ * upper_32_bits - return bits 32-63 of a number
+ * @n: the number we're accessing
+ *
+ * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
+ * the "right shift count >= width of type" warning when that quantity is
+ * 32-bits.
+ */
+#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16))
+
+/**
+ * lower_32_bits - return bits 0-31 of a number
+ * @n: the number we're accessing
+ */
+#define lower_32_bits(n) ((u32)((n) & 0xffffffff))
+
+/**
+ * upper_16_bits - return bits 16-31 of a number
+ * @n: the number we're accessing
+ */
+#define upper_16_bits(n) ((u16)((n) >> 16))
+
+/**
+ * lower_16_bits - return bits 0-15 of a number
+ * @n: the number we're accessing
+ */
+#define lower_16_bits(n) ((u16)((n) & 0xffff))
+
+/**
+ * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value
+ * @x: value to repeat
+ *
+ * NOTE: @x is not checked for > 0xff; larger values produce odd results.
+ */
+#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
+
+#endif // _LINUX_WORDPART_H
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2cc0a9606175..158784dd189a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -22,20 +22,54 @@
*/
#define work_data_bits(work) ((unsigned long *)(&(work)->data))
-enum {
+enum work_bits {
WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
- WORK_STRUCT_INACTIVE_BIT= 1, /* work item is inactive */
- WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */
- WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */
+ WORK_STRUCT_INACTIVE_BIT, /* work item is inactive */
+ WORK_STRUCT_PWQ_BIT, /* data points to pwq */
+ WORK_STRUCT_LINKED_BIT, /* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
- WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */
- WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */
-#else
- WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */
+ WORK_STRUCT_STATIC_BIT, /* static initializer (debugobjects) */
#endif
+ WORK_STRUCT_FLAG_BITS,
+ /* color for workqueue flushing */
+ WORK_STRUCT_COLOR_SHIFT = WORK_STRUCT_FLAG_BITS,
WORK_STRUCT_COLOR_BITS = 4,
+ /*
+ * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/
+ * debugobjects turned off. This makes pwqs aligned to 256 bytes (512
+ * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors.
+ *
+ * MSB
+ * [ pwq pointer ] [ flush color ] [ STRUCT flags ]
+ * 4 bits 4 or 5 bits
+ */
+ WORK_STRUCT_PWQ_SHIFT = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS,
+
+ /*
+ * data contains off-queue information when !WORK_STRUCT_PWQ.
+ *
+ * MSB
+ * [ pool ID ] [ OFFQ flags ] [ STRUCT flags ]
+ * 1 bit 4 or 5 bits
+ */
+ WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS,
+ WORK_OFFQ_CANCELING_BIT = WORK_OFFQ_FLAG_SHIFT,
+ WORK_OFFQ_FLAG_END,
+ WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT,
+
+ /*
+ * When a work item is off queue, the high bits encode off-queue flags
+ * and the last pool it was on. Cap pool ID to 31 bits and use the
+ * highest number to indicate that no pool is associated.
+ */
+ WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS,
+ WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
+ WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+};
+
+enum work_flags {
WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT,
WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT,
@@ -45,35 +79,14 @@ enum {
#else
WORK_STRUCT_STATIC = 0,
#endif
+};
+enum wq_misc_consts {
WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS),
/* not bound to any CPU, prefer the local CPU */
WORK_CPU_UNBOUND = NR_CPUS,
- /*
- * Reserve 8 bits off of pwq pointer w/ debugobjects turned off.
- * This makes pwqs aligned to 256 bytes and allows 16 workqueue
- * flush colors.
- */
- WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT +
- WORK_STRUCT_COLOR_BITS,
-
- /* data contains off-queue information when !WORK_STRUCT_PWQ */
- WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT,
-
- __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE,
-
- /*
- * When a work item is off queue, its high bits point to the last
- * pool it was on. Cap at 31 bits and use the highest number to
- * indicate that no pool is associated.
- */
- WORK_OFFQ_FLAG_BITS = 1,
- WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
- WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
- WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
-
/* bit mask for work_busy() return values */
WORK_BUSY_PENDING = 1 << 0,
WORK_BUSY_RUNNING = 1 << 1,
@@ -83,12 +96,10 @@ enum {
};
/* Convenience constants - of type 'unsigned long', not 'enum'! */
-#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING)
+#define WORK_OFFQ_CANCELING (1ul << WORK_OFFQ_CANCELING_BIT)
#define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1)
#define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT)
-
-#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1)
-#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
+#define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1))
#define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL)
#define WORK_DATA_STATIC_INIT() \
@@ -347,7 +358,8 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
* Workqueue flags and constants. For details, please refer to
* Documentation/core-api/workqueue.rst.
*/
-enum {
+enum wq_flags {
+ WQ_BH = 1 << 0, /* execute in bottom half (softirq) context */
WQ_UNBOUND = 1 << 1, /* not bound to any cpu */
WQ_FREEZABLE = 1 << 2, /* freeze during suspend */
WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */
@@ -386,11 +398,22 @@ enum {
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
__WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
- __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */
+ /* BH wq only allows the following flags */
+ __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI,
+};
+
+enum wq_consts {
WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE,
WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2,
+
+ /*
+ * Per-node default cap on min_active. Unless explicitly set, min_active
+ * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see
+ * workqueue_struct->min_active definition.
+ */
+ WQ_DFL_MIN_ACTIVE = 8,
};
/*
@@ -420,6 +443,9 @@ enum {
* they are same as their non-power-efficient counterparts - e.g.
* system_power_efficient_wq is identical to system_wq if
* 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info.
+ *
+ * system_bh[_highpri]_wq are convenience interface to softirq. BH work items
+ * are executed in the queueing CPU's BH context in the queueing order.
*/
extern struct workqueue_struct *system_wq;
extern struct workqueue_struct *system_highpri_wq;
@@ -428,16 +454,43 @@ extern struct workqueue_struct *system_unbound_wq;
extern struct workqueue_struct *system_freezable_wq;
extern struct workqueue_struct *system_power_efficient_wq;
extern struct workqueue_struct *system_freezable_power_efficient_wq;
+extern struct workqueue_struct *system_bh_wq;
+extern struct workqueue_struct *system_bh_highpri_wq;
+
+void workqueue_softirq_action(bool highpri);
+void workqueue_softirq_dead(unsigned int cpu);
/**
* alloc_workqueue - allocate a workqueue
* @fmt: printf format for the name of the workqueue
* @flags: WQ_* flags
- * @max_active: max in-flight work items per CPU, 0 for default
+ * @max_active: max in-flight work items, 0 for default
* remaining args: args for @fmt
*
- * Allocate a workqueue with the specified parameters. For detailed
- * information on WQ_* flags, please refer to
+ * For a per-cpu workqueue, @max_active limits the number of in-flight work
+ * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be
+ * executing at most one work item for the workqueue.
+ *
+ * For unbound workqueues, @max_active limits the number of in-flight work items
+ * for the whole system. e.g. @max_active of 16 indicates that that there can be
+ * at most 16 work items executing for the workqueue in the whole system.
+ *
+ * As sharing the same active counter for an unbound workqueue across multiple
+ * NUMA nodes can be expensive, @max_active is distributed to each NUMA node
+ * according to the proportion of the number of online CPUs and enforced
+ * independently.
+ *
+ * Depending on online CPU distribution, a node may end up with per-node
+ * max_active which is significantly lower than @max_active, which can lead to
+ * deadlocks if the per-node concurrency limit is lower than the maximum number
+ * of interdependent work items for the workqueue.
+ *
+ * To guarantee forward progress regardless of online CPU distribution, the
+ * concurrency limit on every node is guaranteed to be equal to or greater than
+ * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means
+ * that the sum of per-node max_active's may be larger than @max_active.
+ *
+ * For detailed information on %WQ_* flags, please refer to
* Documentation/core-api/workqueue.rst.
*
* RETURNS:
@@ -460,8 +513,7 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);
* Pointer to the allocated workqueue on success, %NULL on failure.
*/
#define alloc_ordered_workqueue(fmt, flags, args...) \
- alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \
- __WQ_ORDERED_EXPLICIT | (flags), 1, ##args)
+ alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
#define create_workqueue(name) \
alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
@@ -471,6 +523,9 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);
#define create_singlethread_workqueue(name) \
alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
+#define from_work(var, callback_work, work_fieldname) \
+ container_of(callback_work, typeof(*var), work_fieldname)
+
extern void destroy_workqueue(struct workqueue_struct *wq);
struct workqueue_attrs *alloc_workqueue_attrs(void);
@@ -508,6 +563,8 @@ extern bool flush_rcu_work(struct rcu_work *rwork);
extern void workqueue_set_max_active(struct workqueue_struct *wq,
int max_active);
+extern void workqueue_set_min_active(struct workqueue_struct *wq,
+ int min_active);
extern struct work_struct *current_work(void);
extern bool current_is_workqueue_rescuer(void);
extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 01fa15506286..170fdee6339c 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -16,6 +16,7 @@
* @WWAN_PORT_QCDM: Qcom Modem diagnostic interface
* @WWAN_PORT_FIREHOSE: XML based command protocol
* @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems
+ * @WWAN_PORT_FASTBOOT: Fastboot protocol control
*
* @WWAN_PORT_MAX: Highest supported port types
* @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type
@@ -28,6 +29,7 @@ enum wwan_port_type {
WWAN_PORT_QCDM,
WWAN_PORT_FIREHOSE,
WWAN_PORT_XMMRPC,
+ WWAN_PORT_FASTBOOT,
/* Add new port types above this line */
diff --git a/include/net/act_api.h b/include/net/act_api.h
index e1e5e72b901e..77ee0c657e2c 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -201,6 +201,8 @@ int tcf_idr_release(struct tc_action *a, bool bind);
int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
int tcf_unregister_action(struct tc_action_ops *a,
struct pernet_operations *ops);
+#define NET_ACT_ALIAS_PREFIX "net-act-"
+#define MODULE_ALIAS_NET_ACT(kind) MODULE_ALIAS(NET_ACT_ALIAS_PREFIX kind)
int tcf_action_destroy(struct tc_action *actions[], int bind);
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 61ebe723ee4d..9d06eb945509 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -8,8 +8,9 @@
#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */
-#define TEMP_VALID_LIFETIME (7*86400)
-#define TEMP_PREFERRED_LIFETIME (86400)
+#define TEMP_VALID_LIFETIME (7*86400) /* 1 week */
+#define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */
+#define REGEN_MIN_ADVANCE (2) /* 2 seconds */
#define REGEN_MAX_RETRY (3)
#define MAX_DESYNC_FACTOR (600)
@@ -416,7 +417,7 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
if (unlikely(!idev))
return true;
- return !!idev->cnf.ignore_routes_with_linkdown;
+ return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown);
}
void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 49c4640027d8..627ea8e2d915 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -8,21 +8,29 @@
#include <linux/refcount.h>
#include <net/sock.h>
+#if IS_ENABLED(CONFIG_UNIX)
+struct unix_sock *unix_get_socket(struct file *filp);
+#else
+static inline struct unix_sock *unix_get_socket(struct file *filp)
+{
+ return NULL;
+}
+#endif
+
+extern spinlock_t unix_gc_lock;
+extern unsigned int unix_tot_inflight;
+
void unix_inflight(struct user_struct *user, struct file *fp);
void unix_notinflight(struct user_struct *user, struct file *fp);
-void unix_destruct_scm(struct sk_buff *skb);
-void io_uring_destruct_scm(struct sk_buff *skb);
void unix_gc(void);
-void wait_for_unix_gc(void);
-struct sock *unix_get_socket(struct file *filp);
+void wait_for_unix_gc(struct scm_fp_list *fpl);
+
struct sock *unix_peer_get(struct sock *sk);
#define UNIX_HASH_MOD (256 - 1)
#define UNIX_HASH_SIZE (256 * 2)
#define UNIX_HASH_BITS 8
-extern unsigned int unix_tot_inflight;
-
struct unix_address {
refcount_t refcnt;
int len;
@@ -46,12 +54,6 @@ struct scm_stat {
#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
-#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
-#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
-#define unix_state_lock_nested(s) \
- spin_lock_nested(&unix_sk(s)->lock, \
- SINGLE_DEPTH_NESTING)
-
/* The AF_UNIX socket */
struct unix_sock {
/* WARNING: sk has to be the first member */
@@ -61,7 +63,7 @@ struct unix_sock {
struct mutex iolock, bindlock;
struct sock *peer;
struct list_head link;
- atomic_long_t inflight;
+ unsigned long inflight;
spinlock_t lock;
unsigned long gc_flags;
#define UNIX_GC_CANDIDATE 0
@@ -77,6 +79,20 @@ struct unix_sock {
#define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk)
#define unix_peer(sk) (unix_sk(sk)->peer)
+#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
+#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
+enum unix_socket_lock_class {
+ U_LOCK_NORMAL,
+ U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */
+ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */
+};
+
+static inline void unix_state_lock_nested(struct sock *sk,
+ enum unix_socket_lock_class subclass)
+{
+ spin_lock_nested(&unix_sk(sk)->lock, subclass);
+}
+
#define peer_wait peer_wq.wait
long unix_inq_len(struct sock *sk);
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 7ffa8c192c3f..9fe95a22abeb 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -164,6 +164,8 @@ struct bt_voice {
#define BT_ISO_QOS_BIG_UNSET 0xff
#define BT_ISO_QOS_BIS_UNSET 0xff
+#define BT_ISO_SYNC_TIMEOUT 0x07d0 /* 20 secs */
+
struct bt_iso_io_qos {
__u32 interval;
__u16 latency;
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index bdee5d649cc6..8701ca5f31ee 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -330,6 +330,14 @@ enum {
* during the hdev->setup vendor callback.
*/
HCI_QUIRK_BROKEN_LE_CODED,
+
+ /*
+ * When this quirk is set, the HCI_OP_READ_ENC_KEY_SIZE command is
+ * skipped during an HCI_EV_ENCRYPT_CHANGE event. This is required
+ * for Actions Semiconductor ATS2851 based controllers, which erroneously
+ * claim to support it.
+ */
+ HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE,
};
/* HCI device flags */
@@ -372,6 +380,7 @@ enum {
HCI_SETUP,
HCI_CONFIG,
HCI_DEBUGFS_CREATED,
+ HCI_POWERING_DOWN,
HCI_AUTO_OFF,
HCI_RFKILLED,
HCI_MGMT,
@@ -393,7 +402,6 @@ enum {
HCI_LIMITED_PRIVACY,
HCI_RPA_EXPIRED,
HCI_RPA_RESOLVING,
- HCI_HS_ENABLED,
HCI_LE_ENABLED,
HCI_ADVERTISING,
HCI_ADVERTISING_CONNECTABLE,
@@ -437,7 +445,7 @@ enum {
#define HCI_NCMD_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */
#define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */
#define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */
-#define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */
+#define HCI_ACL_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */
#define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */
#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */
@@ -653,6 +661,7 @@ enum {
#define HCI_ERROR_PIN_OR_KEY_MISSING 0x06
#define HCI_ERROR_MEMORY_EXCEEDED 0x07
#define HCI_ERROR_CONNECTION_TIMEOUT 0x08
+#define HCI_ERROR_COMMAND_DISALLOWED 0x0c
#define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d
#define HCI_ERROR_REJ_BAD_ADDR 0x0f
#define HCI_ERROR_INVALID_PARAMETERS 0x12
@@ -661,6 +670,7 @@ enum {
#define HCI_ERROR_REMOTE_POWER_OFF 0x15
#define HCI_ERROR_LOCAL_HOST_TERM 0x16
#define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18
+#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1e
#define HCI_ERROR_INVALID_LL_PARAMS 0x1e
#define HCI_ERROR_UNSPECIFIED 0x1f
#define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c
@@ -2035,6 +2045,7 @@ struct hci_cp_le_set_per_adv_params {
} __packed;
#define HCI_MAX_PER_AD_LENGTH 252
+#define HCI_MAX_PER_AD_TOT_LEN 1650
#define HCI_OP_LE_SET_PER_ADV_DATA 0x203f
struct hci_cp_le_set_per_adv_data {
@@ -2795,6 +2806,10 @@ struct hci_ev_le_per_adv_report {
__u8 data[];
} __packed;
+#define LE_PA_DATA_COMPLETE 0x00
+#define LE_PA_DATA_MORE_TO_COME 0x01
+#define LE_PA_DATA_TRUNCATED 0x02
+
#define HCI_EV_LE_EXT_ADV_SET_TERM 0x12
struct hci_evt_le_ext_adv_set_term {
__u8 status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 8f8dd9173714..56fb42df44a3 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1,7 +1,7 @@
/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
- Copyright 2023 NXP
+ Copyright 2023-2024 NXP
Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
@@ -552,6 +552,7 @@ struct hci_dev {
__u32 req_status;
__u32 req_result;
struct sk_buff *req_skb;
+ struct sk_buff *req_rsp;
void *smp_data;
void *smp_bredr_data;
@@ -734,8 +735,9 @@ struct hci_conn {
__u16 le_supv_timeout;
__u8 le_adv_data[HCI_MAX_EXT_AD_LENGTH];
__u8 le_adv_data_len;
- __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH];
- __u8 le_per_adv_data_len;
+ __u8 le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN];
+ __u16 le_per_adv_data_len;
+ __u16 le_per_adv_data_offset;
__u8 le_tx_phy;
__u8 le_rx_phy;
__s8 rssi;
@@ -1083,6 +1085,24 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev)
return c->acl_num + c->amp_num + c->sco_num + c->le_num + c->iso_num;
}
+static inline bool hci_conn_valid(struct hci_dev *hdev, struct hci_conn *conn)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c == conn) {
+ rcu_read_unlock();
+ return true;
+ }
+ }
+ rcu_read_unlock();
+
+ return false;
+}
+
static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle)
{
struct hci_conn_hash *h = &hdev->conn_hash;
@@ -1480,7 +1500,6 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type,
bdaddr_t *dst, u8 role);
void hci_conn_del(struct hci_conn *conn);
void hci_conn_hash_flush(struct hci_dev *hdev);
-void hci_conn_check_pending(struct hci_dev *hdev);
struct hci_chan *hci_chan_create(struct hci_conn *conn);
void hci_chan_del(struct hci_chan *chan);
@@ -1494,11 +1513,13 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, bool dst_resolved, u8 sec_level,
u16 conn_timeout, u8 role);
+void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status);
struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
u8 sec_level, u8 auth_type,
- enum conn_reasons conn_reason);
+ enum conn_reasons conn_reason, u16 timeout);
struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
- __u16 setting, struct bt_codec *codec);
+ __u16 setting, struct bt_codec *codec,
+ u16 timeout);
struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos);
struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
@@ -1509,8 +1530,8 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos,
__u8 data_len, __u8 *data);
-int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
- __u8 sid, struct bt_iso_qos *qos);
+struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, __u8 sid, struct bt_iso_qos *qos);
int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
struct bt_iso_qos *qos,
__u16 sync_handle, __u8 num_bis, __u8 bis[]);
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 6efbc2152146..6a9d063e9f47 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -42,12 +42,24 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
void hci_cmd_sync_init(struct hci_dev *hdev);
void hci_cmd_sync_clear(struct hci_dev *hdev);
void hci_cmd_sync_cancel(struct hci_dev *hdev, int err);
-void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err);
+void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err);
int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
void *data, hci_cmd_sync_work_destroy_t destroy);
int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
void *data, hci_cmd_sync_work_destroy_t destroy);
+int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy);
+struct hci_cmd_sync_work_entry *
+hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy);
+void hci_cmd_sync_cancel_entry(struct hci_dev *hdev,
+ struct hci_cmd_sync_work_entry *entry);
+bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy);
+bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev,
+ hci_cmd_sync_work_func_t func, void *data,
+ hci_cmd_sync_work_destroy_t destroy);
int hci_update_eir_sync(struct hci_dev *hdev);
int hci_update_class_sync(struct hci_dev *hdev);
@@ -127,8 +139,6 @@ struct hci_conn;
int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason);
-int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn);
-
int hci_le_create_cis_sync(struct hci_dev *hdev);
int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle);
@@ -138,3 +148,9 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason);
int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle);
int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle);
+
+int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn);
+
+int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn);
+
+int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn);
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index cf393e72d6ed..a4278aa618ab 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -59,8 +59,6 @@
#define L2CAP_WAIT_ACK_POLL_PERIOD msecs_to_jiffies(200)
#define L2CAP_WAIT_ACK_TIMEOUT msecs_to_jiffies(10000)
-#define L2CAP_A2MP_DEFAULT_MTU 670
-
/* L2CAP socket address */
struct sockaddr_l2 {
sa_family_t l2_family;
@@ -109,12 +107,6 @@ struct l2cap_conninfo {
#define L2CAP_ECHO_RSP 0x09
#define L2CAP_INFO_REQ 0x0a
#define L2CAP_INFO_RSP 0x0b
-#define L2CAP_CREATE_CHAN_REQ 0x0c
-#define L2CAP_CREATE_CHAN_RSP 0x0d
-#define L2CAP_MOVE_CHAN_REQ 0x0e
-#define L2CAP_MOVE_CHAN_RSP 0x0f
-#define L2CAP_MOVE_CHAN_CFM 0x10
-#define L2CAP_MOVE_CHAN_CFM_RSP 0x11
#define L2CAP_CONN_PARAM_UPDATE_REQ 0x12
#define L2CAP_CONN_PARAM_UPDATE_RSP 0x13
#define L2CAP_LE_CONN_REQ 0x14
@@ -144,7 +136,6 @@ struct l2cap_conninfo {
/* L2CAP fixed channels */
#define L2CAP_FC_SIG_BREDR 0x02
#define L2CAP_FC_CONNLESS 0x04
-#define L2CAP_FC_A2MP 0x08
#define L2CAP_FC_ATT 0x10
#define L2CAP_FC_SIG_LE 0x20
#define L2CAP_FC_SMP_LE 0x40
@@ -267,7 +258,6 @@ struct l2cap_conn_rsp {
/* channel identifier */
#define L2CAP_CID_SIGNALING 0x0001
#define L2CAP_CID_CONN_LESS 0x0002
-#define L2CAP_CID_A2MP 0x0003
#define L2CAP_CID_ATT 0x0004
#define L2CAP_CID_LE_SIGNALING 0x0005
#define L2CAP_CID_SMP 0x0006
@@ -282,7 +272,6 @@ struct l2cap_conn_rsp {
#define L2CAP_CR_BAD_PSM 0x0002
#define L2CAP_CR_SEC_BLOCK 0x0003
#define L2CAP_CR_NO_MEM 0x0004
-#define L2CAP_CR_BAD_AMP 0x0005
#define L2CAP_CR_INVALID_SCID 0x0006
#define L2CAP_CR_SCID_IN_USE 0x0007
@@ -404,29 +393,6 @@ struct l2cap_info_rsp {
__u8 data[];
} __packed;
-struct l2cap_create_chan_req {
- __le16 psm;
- __le16 scid;
- __u8 amp_id;
-} __packed;
-
-struct l2cap_create_chan_rsp {
- __le16 dcid;
- __le16 scid;
- __le16 result;
- __le16 status;
-} __packed;
-
-struct l2cap_move_chan_req {
- __le16 icid;
- __u8 dest_amp_id;
-} __packed;
-
-struct l2cap_move_chan_rsp {
- __le16 icid;
- __le16 result;
-} __packed;
-
#define L2CAP_MR_SUCCESS 0x0000
#define L2CAP_MR_PEND 0x0001
#define L2CAP_MR_BAD_ID 0x0002
@@ -539,8 +505,6 @@ struct l2cap_seq_list {
struct l2cap_chan {
struct l2cap_conn *conn;
- struct hci_conn *hs_hcon;
- struct hci_chan *hs_hchan;
struct kref kref;
atomic_t nesting;
@@ -591,12 +555,6 @@ struct l2cap_chan {
unsigned long conn_state;
unsigned long flags;
- __u8 remote_amp_id;
- __u8 local_amp_id;
- __u8 move_id;
- __u8 move_state;
- __u8 move_role;
-
__u16 next_tx_seq;
__u16 expected_ack_seq;
__u16 expected_tx_seq;
@@ -981,7 +939,7 @@ int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid);
struct l2cap_chan *l2cap_chan_create(void);
void l2cap_chan_close(struct l2cap_chan *chan, int reason);
int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
- bdaddr_t *dst, u8 dst_type);
+ bdaddr_t *dst, u8 dst_type, u16 timeout);
int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu);
int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len);
void l2cap_chan_busy(struct l2cap_chan *chan, int busy);
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index c5e57c6bd873..9ce5ac2bfbad 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -54,6 +54,8 @@ typedef enum {
AD_MUX_DETACHED, /* mux machine */
AD_MUX_WAITING, /* mux machine */
AD_MUX_ATTACHED, /* mux machine */
+ AD_MUX_COLLECTING, /* mux machine */
+ AD_MUX_DISTRIBUTING, /* mux machine */
AD_MUX_COLLECTING_DISTRIBUTING /* mux machine */
} mux_states_t;
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 69292ecc0325..473a0147769e 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -76,6 +76,7 @@ enum {
BOND_OPT_MISSED_MAX,
BOND_OPT_NS_TARGETS,
BOND_OPT_PRIO,
+ BOND_OPT_COUPLED_CONTROL,
BOND_OPT_LAST
};
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 5b8b1b644a2d..b61fb1aa3a56 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -148,6 +148,7 @@ struct bond_params {
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr ns_targets[BOND_MAX_NS_TARGETS];
#endif
+ int coupled_control;
/* 2 bytes of padding : see ether_addr_equal_64bits() */
u8 ad_actor_system[ETH_ALEN + 2];
@@ -167,6 +168,7 @@ struct slave {
u8 backup:1, /* indicates backup slave. Value corresponds with
BOND_STATE_ACTIVE and BOND_STATE_BACKUP */
inactive:1, /* indicates inactive slave */
+ rx_disabled:1, /* indicates whether slave's Rx is disabled */
should_notify:1, /* indicates whether the state changed */
should_notify_link:1; /* indicates whether the link changed */
u8 duplex;
@@ -568,6 +570,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave,
bond_set_slave_state(slave, BOND_STATE_BACKUP, notify);
if (!slave->bond->params.all_slaves_active)
slave->inactive = 1;
+ if (BOND_MODE(slave->bond) == BOND_MODE_8023AD)
+ slave->rx_disabled = 1;
+}
+
+static inline void bond_set_slave_tx_disabled_flags(struct slave *slave,
+ bool notify)
+{
+ bond_set_slave_state(slave, BOND_STATE_BACKUP, notify);
}
static inline void bond_set_slave_active_flags(struct slave *slave,
@@ -575,6 +585,14 @@ static inline void bond_set_slave_active_flags(struct slave *slave,
{
bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify);
slave->inactive = 0;
+ if (BOND_MODE(slave->bond) == BOND_MODE_8023AD)
+ slave->rx_disabled = 0;
+}
+
+static inline void bond_set_slave_rx_enabled_flags(struct slave *slave,
+ bool notify)
+{
+ slave->rx_disabled = 0;
}
static inline bool bond_is_slave_inactive(struct slave *slave)
@@ -582,6 +600,11 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
return slave->inactive;
}
+static inline bool bond_is_slave_rx_disabled(struct slave *slave)
+{
+ return slave->rx_disabled;
+}
+
static inline void bond_propose_link_state(struct slave *slave, int state)
{
slave->link_new_state = state;
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 4dabeb6c76d3..9b09acac538e 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -48,6 +48,10 @@ void napi_busy_loop(unsigned int napi_id,
bool (*loop_end)(void *, unsigned long),
void *loop_end_arg, bool prefer_busy_poll, u16 budget);
+void napi_busy_loop_rcu(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget);
+
#else /* CONFIG_NET_RX_BUSY_POLL */
static inline unsigned long net_busy_loop_on(void)
{
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index cf79656ce09c..2e2be4fd2bb6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7,7 +7,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021, 2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/ethtool.h>
@@ -118,10 +118,13 @@ struct wiphy;
* restrictions.
* @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel.
* @IEEE80211_CHAN_DFS_CONCURRENT: See %NL80211_RRF_DFS_CONCURRENT
- * @IEEE80211_CHAN_NO_UHB_VLP_CLIENT: Client connection with VLP AP
+ * @IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT: Client connection with VLP AP
* not permitted using this channel
- * @IEEE80211_CHAN_NO_UHB_AFC_CLIENT: Client connection with AFC AP
+ * @IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT: Client connection with AFC AP
* not permitted using this channel
+ * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor
+ * mode even in the presence of other (regulatory) restrictions,
+ * even if it is otherwise disabled.
*/
enum ieee80211_channel_flags {
IEEE80211_CHAN_DISABLED = 1<<0,
@@ -146,8 +149,9 @@ enum ieee80211_channel_flags {
IEEE80211_CHAN_NO_320MHZ = 1<<19,
IEEE80211_CHAN_NO_EHT = 1<<20,
IEEE80211_CHAN_DFS_CONCURRENT = 1<<21,
- IEEE80211_CHAN_NO_UHB_VLP_CLIENT= 1<<22,
- IEEE80211_CHAN_NO_UHB_AFC_CLIENT= 1<<23,
+ IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22,
+ IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23,
+ IEEE80211_CHAN_CAN_MONITOR = 1<<24,
};
#define IEEE80211_CHAN_NO_HT40 \
@@ -808,6 +812,9 @@ struct key_params {
* chan will define the primary channel and all other
* parameters are ignored.
* @freq1_offset: offset from @center_freq1, in KHz
+ * @punctured: mask of the punctured 20 MHz subchannels, with
+ * bits turned on being disabled (punctured); numbered
+ * from lower to higher frequency (like in the spec)
*/
struct cfg80211_chan_def {
struct ieee80211_channel *chan;
@@ -816,6 +823,7 @@ struct cfg80211_chan_def {
u32 center_freq2;
struct ieee80211_edmg edmg;
u16 freq1_offset;
+ u16 punctured;
};
/*
@@ -956,7 +964,8 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1,
chandef1->width == chandef2->width &&
chandef1->center_freq1 == chandef2->center_freq1 &&
chandef1->freq1_offset == chandef2->freq1_offset &&
- chandef1->center_freq2 == chandef2->center_freq2);
+ chandef1->center_freq2 == chandef2->center_freq2 &&
+ chandef1->punctured == chandef2->punctured);
}
/**
@@ -1048,6 +1057,20 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef);
/**
+ * cfg80211_chandef_primary - calculate primary 40/80/160 MHz freq
+ * @chandef: chandef to calculate for
+ * @primary_chan_width: primary channel width to calculate center for
+ * @punctured: punctured sub-channel bitmap, will be recalculated
+ * according to the new bandwidth, can be %NULL
+ *
+ * Returns: the primary 40/80/160 MHz channel center frequency, or -1
+ * for errors, updating the punctured bitmap
+ */
+int cfg80211_chandef_primary(const struct cfg80211_chan_def *chandef,
+ enum nl80211_chan_width primary_chan_width,
+ u16 *punctured);
+
+/**
* nl80211_send_chandef - sends the channel definition.
* @msg: the msg to send channel definition
* @chandef: the channel definition to check
@@ -1457,9 +1480,6 @@ struct cfg80211_unsol_bcast_probe_resp {
* @fils_discovery: FILS discovery transmission parameters
* @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
* @mbssid_config: AP settings for multiple bssid
- * @punct_bitmap: Preamble puncturing bitmap. Each bit represents
- * a 20 MHz channel, lowest bit corresponding to the lowest channel.
- * Bit set to 1 indicates that the channel is punctured.
*/
struct cfg80211_ap_settings {
struct cfg80211_chan_def chandef;
@@ -1494,7 +1514,6 @@ struct cfg80211_ap_settings {
struct cfg80211_fils_discovery fils_discovery;
struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
struct cfg80211_mbssid_config mbssid_config;
- u16 punct_bitmap;
};
@@ -1528,9 +1547,8 @@ struct cfg80211_ap_update {
* @radar_required: whether radar detection is required on the new channel
* @block_tx: whether transmissions should be blocked while changing
* @count: number of beacons until switch
- * @punct_bitmap: Preamble puncturing bitmap. Each bit represents
- * a 20 MHz channel, lowest bit corresponding to the lowest channel.
- * Bit set to 1 indicates that the channel is punctured.
+ * @link_id: defines the link on which channel switch is expected during
+ * MLO. 0 in case of non-MLO.
*/
struct cfg80211_csa_settings {
struct cfg80211_chan_def chandef;
@@ -1543,7 +1561,7 @@ struct cfg80211_csa_settings {
bool radar_required;
bool block_tx;
u8 count;
- u16 punct_bitmap;
+ u8 link_id;
};
/**
@@ -1766,11 +1784,15 @@ struct station_parameters {
* @subtype: Management frame subtype to use for indicating removal
* (10 = Disassociation, 12 = Deauthentication)
* @reason_code: Reason code for the Disassociation/Deauthentication frame
+ * @link_id: Link ID indicating a link that stations to be flushed must be
+ * using; valid only for MLO, but can also be -1 for MLO to really
+ * remove all stations.
*/
struct station_del_parameters {
const u8 *mac;
u8 subtype;
u16 reason_code;
+ int link_id;
};
/**
@@ -2695,19 +2717,11 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
* @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match
* or no match (RSSI only)
* @rssi_thold: don't report scan results below this threshold (in s32 dBm)
- * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied
- * for filtering out scan results received. Drivers advertise this support
- * of band specific rssi based filtering through the feature capability
- * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band
- * specific rssi thresholds take precedence over rssi_thold, if specified.
- * If not specified for any band, it will be assigned with rssi_thold of
- * corresponding matchset.
*/
struct cfg80211_match_set {
struct cfg80211_ssid ssid;
u8 bssid[ETH_ALEN];
s32 rssi_thold;
- s32 per_band_rssi_thold[NUM_NL80211_BANDS];
};
/**
@@ -2910,6 +2924,8 @@ struct cfg80211_bss_ies {
* own the beacon_ies, but they're just pointers to the ones from the
* @hidden_beacon_bss struct)
* @proberesp_ies: the information elements from the last Probe Response frame
+ * @proberesp_ecsa_stuck: ECSA element is stuck in the Probe Response frame,
+ * cannot rely on it having valid data
* @hidden_beacon_bss: in case this BSS struct represents a probe response from
* a BSS that hides the SSID in its beacon, this points to the BSS struct
* that holds the beacon data. @beacon_ies is still valid, of course, and
@@ -2950,6 +2966,8 @@ struct cfg80211_bss {
u8 chains;
s8 chain_signal[IEEE80211_MAX_CHAINS];
+ u8 proberesp_ecsa_stuck:1;
+
u8 bssid_index;
u8 max_bssid_indicator;
@@ -3059,6 +3077,7 @@ struct cfg80211_assoc_link {
* @CONNECT_REQ_MLO_SUPPORT: Userspace indicates support for handling MLD links.
* Drivers shall disable MLO features for the current association if this
* flag is not set.
+ * @ASSOC_REQ_SPP_AMSDU: SPP A-MSDUs will be used on this connection (if any)
*/
enum cfg80211_assoc_req_flags {
ASSOC_REQ_DISABLE_HT = BIT(0),
@@ -3068,6 +3087,7 @@ enum cfg80211_assoc_req_flags {
ASSOC_REQ_DISABLE_HE = BIT(4),
ASSOC_REQ_DISABLE_EHT = BIT(5),
CONNECT_REQ_MLO_SUPPORT = BIT(6),
+ ASSOC_REQ_SPP_AMSDU = BIT(7),
};
/**
@@ -3592,12 +3612,15 @@ struct cfg80211_wowlan_nd_info {
* @tcp_connlost: TCP connection lost or failed to establish
* @tcp_nomoretokens: TCP data ran out of tokens
* @net_detect: if not %NULL, woke up because of net detect
+ * @unprot_deauth_disassoc: woke up due to unprotected deauth or
+ * disassoc frame (in MFP).
*/
struct cfg80211_wowlan_wakeup {
bool disconnect, magic_pkt, gtk_rekey_failure,
eap_identity_req, four_way_handshake,
rfkill_release, packet_80211,
- tcp_match, tcp_connlost, tcp_nomoretokens;
+ tcp_match, tcp_connlost, tcp_nomoretokens,
+ unprot_deauth_disassoc;
s32 pattern_idx;
u32 packet_present_len, packet_len;
const void *packet;
@@ -4919,7 +4942,7 @@ struct cfg80211_ops {
* enum wiphy_flags - wiphy capability flags
*
* @WIPHY_FLAG_SPLIT_SCAN_6GHZ: if set to true, the scan request will be split
- * into two, first for legacy bands and second for UHB.
+ * into two, first for legacy bands and second for 6 GHz.
* @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this
* wiphy at all
* @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled
@@ -6200,7 +6223,7 @@ struct wireless_dev {
int beacon_interval;
struct cfg80211_chan_def preset_chandef;
struct cfg80211_chan_def chandef;
- u8 id[IEEE80211_MAX_SSID_LEN];
+ u8 id[IEEE80211_MAX_MESH_ID_LEN];
u8 id_len, id_up_len;
} mesh;
struct {
@@ -6848,13 +6871,45 @@ cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
}
/**
+ * enum cfg80211_rnr_iter_ret - reduced neighbor report iteration state
+ * @RNR_ITER_CONTINUE: continue iterating with the next entry
+ * @RNR_ITER_BREAK: break iteration and return success
+ * @RNR_ITER_ERROR: break iteration and return error
+ */
+enum cfg80211_rnr_iter_ret {
+ RNR_ITER_CONTINUE,
+ RNR_ITER_BREAK,
+ RNR_ITER_ERROR,
+};
+
+/**
+ * cfg80211_iter_rnr - iterate reduced neighbor report entries
+ * @elems: the frame elements to iterate RNR elements and then
+ * their entries in
+ * @elems_len: length of the elements
+ * @iter: iteration function, see also &enum cfg80211_rnr_iter_ret
+ * for the return value
+ * @iter_data: additional data passed to the iteration function
+ * Return: %true on success (after successfully iterating all entries
+ * or if the iteration function returned %RNR_ITER_BREAK),
+ * %false on error (iteration function returned %RNR_ITER_ERROR
+ * or elements were malformed.)
+ */
+bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len,
+ enum cfg80211_rnr_iter_ret
+ (*iter)(void *data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len),
+ void *iter_data);
+
+/**
* cfg80211_defragment_element - Defrag the given element data into a buffer
*
* @elem: the element to defragment
* @ies: elements where @elem is contained
* @ieslen: length of @ies
- * @data: buffer to store element data
- * @data_len: length of @data
+ * @data: buffer to store element data, or %NULL to just determine size
+ * @data_len: length of @data, or 0
* @frag_id: the element ID of fragments
*
* Return: length of @data, or -EINVAL on error
@@ -7152,11 +7207,13 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen,
* from a beacon or probe response
* @CFG80211_BSS_FTYPE_BEACON: data comes from a beacon
* @CFG80211_BSS_FTYPE_PRESP: data comes from a probe response
+ * @CFG80211_BSS_FTYPE_S1G_BEACON: data comes from an S1G beacon
*/
enum cfg80211_bss_frame_type {
CFG80211_BSS_FTYPE_UNKNOWN,
CFG80211_BSS_FTYPE_BEACON,
CFG80211_BSS_FTYPE_PRESP,
+ CFG80211_BSS_FTYPE_S1G_BEACON,
};
/**
@@ -8729,14 +8786,13 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
* @dev: the device which switched channels
* @chandef: the new channel definition
* @link_id: the link ID for MLO, must be 0 for non-MLO
- * @punct_bitmap: the new puncturing bitmap
*
* Caller must hold wiphy mutex, therefore must only be called from sleepable
* driver context!
*/
void cfg80211_ch_switch_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id, u16 punct_bitmap);
+ unsigned int link_id);
/*
* cfg80211_ch_switch_started_notify - notify channel switch start
@@ -8745,7 +8801,6 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
* @link_id: the link ID for MLO, must be 0 for non-MLO
* @count: the number of TBTTs until the channel switch happens
* @quiet: whether or not immediate quiet was requested by the AP
- * @punct_bitmap: the future puncturing bitmap
*
* Inform the userspace about the channel switch that has just
* started, so that it can take appropriate actions (eg. starting
@@ -8754,7 +8809,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
void cfg80211_ch_switch_started_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
unsigned int link_id, u8 count,
- bool quiet, u16 punct_bitmap);
+ bool quiet);
/**
* ieee80211_operating_class_to_band - convert operating class to band
@@ -8768,6 +8823,19 @@ bool ieee80211_operating_class_to_band(u8 operating_class,
enum nl80211_band *band);
/**
+ * ieee80211_operating_class_to_chandef - convert operating class to chandef
+ *
+ * @operating_class: the operating class to convert
+ * @chan: the ieee80211_channel to convert
+ * @chandef: a pointer to the resulting chandef
+ *
+ * Returns %true if the conversion was successful, %false otherwise.
+ */
+bool ieee80211_operating_class_to_chandef(u8 operating_class,
+ struct ieee80211_channel *chan,
+ struct cfg80211_chan_def *chandef);
+
+/**
* ieee80211_chandef_to_operating_class - convert chandef to operation class
*
* @chandef: the chandef to convert
@@ -9373,18 +9441,6 @@ static inline int cfg80211_color_change_notify(struct net_device *dev)
}
/**
- * cfg80211_valid_disable_subchannel_bitmap - validate puncturing bitmap
- * @bitmap: bitmap to be validated
- * @chandef: channel definition
- *
- * Validate the puncturing bitmap.
- *
- * Return: %true if the bitmap is valid. %false otherwise.
- */
-bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
- const struct cfg80211_chan_def *chandef);
-
-/**
* cfg80211_links_removed - Notify about removed STA MLD setup links.
* @dev: network device.
* @link_mask: BIT mask of removed STA MLD setup link IDs.
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index cd95711b12b8..76d2cd2e2b30 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -401,6 +401,7 @@ struct ieee802154_llsec_key {
struct ieee802154_llsec_key_entry {
struct list_head list;
+ struct rcu_head rcu;
struct ieee802154_llsec_key_id id;
struct ieee802154_llsec_key *key;
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 6d3a20163260..9707ab54fdd5 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -30,6 +30,7 @@
FN(TCP_AOFAILURE) \
FN(SOCKET_BACKLOG) \
FN(TCP_FLAGS) \
+ FN(TCP_ABORT_ON_DATA) \
FN(TCP_ZEROWINDOW) \
FN(TCP_OLD_DATA) \
FN(TCP_OVERWINDOW) \
@@ -37,6 +38,7 @@
FN(TCP_RFC7323_PAWS) \
FN(TCP_OLD_SEQUENCE) \
FN(TCP_INVALID_SEQUENCE) \
+ FN(TCP_INVALID_ACK_SEQUENCE) \
FN(TCP_RESET) \
FN(TCP_INVALID_SYN) \
FN(TCP_CLOSE) \
@@ -54,6 +56,7 @@
FN(NEIGH_QUEUEFULL) \
FN(NEIGH_DEAD) \
FN(TC_EGRESS) \
+ FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
FN(CPU_BACKLOG) \
FN(XDP) \
@@ -105,7 +108,13 @@ enum skb_drop_reason {
SKB_CONSUMED,
/** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
SKB_DROP_REASON_NOT_SPECIFIED,
- /** @SKB_DROP_REASON_NO_SOCKET: socket not found */
+ /**
+ * @SKB_DROP_REASON_NO_SOCKET: no valid socket that can be used.
+ * Reason could be one of three cases:
+ * 1) no established/listening socket found during lookup process
+ * 2) no valid request socket during 3WHS process
+ * 3) no valid child socket during 3WHS process
+ */
SKB_DROP_REASON_NO_SOCKET,
/** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
SKB_DROP_REASON_PKT_TOO_SMALL,
@@ -198,6 +207,11 @@ enum skb_drop_reason {
/** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */
SKB_DROP_REASON_TCP_FLAGS,
/**
+ * @SKB_DROP_REASON_TCP_ABORT_ON_DATA: abort on data, corresponding to
+ * LINUX_MIB_TCPABORTONDATA
+ */
+ SKB_DROP_REASON_TCP_ABORT_ON_DATA,
+ /**
* @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero,
* see LINUX_MIB_TCPZEROWINDOWDROP
*/
@@ -221,13 +235,19 @@ enum skb_drop_reason {
SKB_DROP_REASON_TCP_OFOMERGE,
/**
* @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
- * LINUX_MIB_PAWSESTABREJECTED
+ * LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS,
/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
SKB_DROP_REASON_TCP_OLD_SEQUENCE,
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
+ /**
+ * @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ
+ * field because ack sequence is not in the window between snd_una
+ * and snd_nxt
+ */
+ SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE,
/** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
SKB_DROP_REASON_TCP_RESET,
/**
@@ -271,6 +291,8 @@ enum skb_drop_reason {
SKB_DROP_REASON_NEIGH_DEAD,
/** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
SKB_DROP_REASON_TC_EGRESS,
+ /** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */
+ SKB_DROP_REASON_SECURITY_HOOK,
/**
* @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
* failed to enqueue to current qdisc)
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 82135fbdb1e6..7c0da9effe4e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -991,9 +991,9 @@ struct dsa_switch_ops {
* Port's MAC EEE settings
*/
int (*set_mac_eee)(struct dsa_switch *ds, int port,
- struct ethtool_eee *e);
+ struct ethtool_keee *e);
int (*get_mac_eee)(struct dsa_switch *ds, int port,
- struct ethtool_eee *e);
+ struct ethtool_keee *e);
/* EEPROM access */
int (*get_eeprom_len)(struct dsa_switch *ds);
diff --git a/include/net/dst.h b/include/net/dst.h
index f5dfc8fb7b37..0aa331bd2fdb 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -390,7 +390,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
struct net_device *dev, int initial_obsolete,
unsigned short flags);
-struct dst_entry *dst_destroy(struct dst_entry *dst);
void dst_dev_put(struct dst_entry *dst);
static inline void dst_confirm(struct dst_entry *dst)
diff --git a/include/net/eee.h b/include/net/eee.h
new file mode 100644
index 000000000000..84837aba3cd9
--- /dev/null
+++ b/include/net/eee.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _EEE_H
+#define _EEE_H
+
+#include <linux/types.h>
+
+struct eee_config {
+ u32 tx_lpi_timer;
+ bool tx_lpi_enabled;
+ bool eee_enabled;
+};
+
+static inline bool eeecfg_mac_can_tx_lpi(const struct eee_config *eeecfg)
+{
+ /* eee_enabled is the master on/off */
+ if (!eeecfg->eee_enabled || !eeecfg->tx_lpi_enabled)
+ return false;
+
+ return true;
+}
+
+static inline void eeecfg_to_eee(struct ethtool_keee *eee,
+ const struct eee_config *eeecfg)
+{
+ eee->tx_lpi_timer = eeecfg->tx_lpi_timer;
+ eee->tx_lpi_enabled = eeecfg->tx_lpi_enabled;
+ eee->eee_enabled = eeecfg->eee_enabled;
+}
+
+static inline void eee_to_eeecfg(struct eee_config *eeecfg,
+ const struct ethtool_keee *eee)
+{
+ eeecfg->tx_lpi_timer = eee->tx_lpi_timer;
+ eeecfg->tx_lpi_enabled = eee->tx_lpi_enabled;
+ eeecfg->eee_enabled = eee->eee_enabled;
+}
+
+#endif
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index e61469129402..9ece6e5a3ea8 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -41,6 +41,8 @@ struct genl_info;
* do additional, common, filtering and return an error
* @post_doit: called after an operation's doit callback, it may
* undo operations done by pre_doit, for example release locks
+ * @bind: called when family multicast group is added to a netlink socket
+ * @unbind: called when family multicast group is removed from a netlink socket
* @module: pointer to the owning module (set to THIS_MODULE)
* @mcgrps: multicast groups used by this family
* @n_mcgrps: number of multicast groups
@@ -84,6 +86,8 @@ struct genl_family {
void (*post_doit)(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
+ int (*bind)(int mcgrp);
+ void (*unbind)(int mcgrp);
const struct genl_ops * ops;
const struct genl_small_ops *small_ops;
const struct genl_split_ops *split_ops;
@@ -149,7 +153,7 @@ static inline void *genl_info_userhdr(const struct genl_info *info)
/* Report that a root attribute is missing */
#define GENL_REQ_ATTR_CHECK(info, attr) ({ \
- struct genl_info *__info = (info); \
+ const struct genl_info *__info = (info); \
\
NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \
})
diff --git a/include/net/gro.h b/include/net/gro.h
index b435f0ddbf64..50f1e403dbbb 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _NET_IPV6_GRO_H
-#define _NET_IPV6_GRO_H
+#ifndef _NET_GRO_H
+#define _NET_GRO_H
#include <linux/indirect_call_wrapper.h>
#include <linux/ip.h>
@@ -9,6 +9,7 @@
#include <net/ip6_checksum.h>
#include <linux/skbuff.h>
#include <net/udp.h>
+#include <net/hotdata.h>
struct napi_gro_cb {
union {
@@ -139,21 +140,16 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
NAPI_GRO_CB(skb)->data_offset += len;
}
-static inline void *skb_gro_header_fast(struct sk_buff *skb,
+static inline void *skb_gro_header_fast(const struct sk_buff *skb,
unsigned int offset)
{
return NAPI_GRO_CB(skb)->frag0 + offset;
}
-static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
+static inline bool skb_gro_may_pull(const struct sk_buff *skb,
+ unsigned int hlen)
{
- return NAPI_GRO_CB(skb)->frag0_len < hlen;
-}
-
-static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
-{
- NAPI_GRO_CB(skb)->frag0 = NULL;
- NAPI_GRO_CB(skb)->frag0_len = 0;
+ return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len);
}
static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
@@ -162,28 +158,30 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
if (!pskb_may_pull(skb, hlen))
return NULL;
- skb_gro_frag0_invalidate(skb);
return skb->data + offset;
}
-static inline void *skb_gro_header(struct sk_buff *skb,
- unsigned int hlen, unsigned int offset)
+static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen,
+ unsigned int offset)
{
void *ptr;
ptr = skb_gro_header_fast(skb, offset);
- if (skb_gro_header_hard(skb, hlen))
+ if (!skb_gro_may_pull(skb, hlen))
ptr = skb_gro_header_slow(skb, hlen, offset);
return ptr;
}
-static inline void *skb_gro_network_header(struct sk_buff *skb)
+static inline void *skb_gro_network_header(const struct sk_buff *skb)
{
- return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
- skb_network_offset(skb);
+ if (skb_gro_may_pull(skb, skb_gro_offset(skb)))
+ return skb_gro_header_fast(skb, skb_network_offset(skb));
+
+ return skb_network_header(skb);
}
-static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
+static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb,
+ int proto)
{
const struct iphdr *iph = skb_gro_network_header(skb);
@@ -421,7 +419,8 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
return uh;
}
-static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
+static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
+ int proto)
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
@@ -448,7 +447,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
{
list_add_tail(&skb->list, &napi->rx_list);
napi->rx_count += segs;
- if (napi->rx_count >= READ_ONCE(gro_normal_batch))
+ if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
gro_normal_list(napi);
}
@@ -495,6 +494,7 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *
#endif
}
-extern struct list_head offload_base;
+struct packet_offload *gro_find_receive_by_type(__be16 type);
+struct packet_offload *gro_find_complete_by_type(__be16 type);
-#endif /* _NET_IPV6_GRO_H */
+#endif /* _NET_GRO_H */
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
new file mode 100644
index 000000000000..003667a1efd6
--- /dev/null
+++ b/include/net/hotdata.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_HOTDATA_H
+#define _NET_HOTDATA_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <net/protocol.h>
+
+/* Read mostly data used in network fast paths. */
+struct net_hotdata {
+#if IS_ENABLED(CONFIG_INET)
+ struct packet_offload ip_packet_offload;
+ struct net_offload tcpv4_offload;
+ struct net_protocol tcp_protocol;
+ struct net_offload udpv4_offload;
+ struct net_protocol udp_protocol;
+ struct packet_offload ipv6_packet_offload;
+ struct net_offload tcpv6_offload;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_protocol tcpv6_protocol;
+ struct inet6_protocol udpv6_protocol;
+#endif
+ struct net_offload udpv6_offload;
+#endif
+ struct list_head offload_base;
+ struct list_head ptype_all;
+ struct kmem_cache *skbuff_cache;
+ struct kmem_cache *skbuff_fclone_cache;
+ struct kmem_cache *skb_small_head_cache;
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+ u32 rps_cpu_mask;
+#endif
+ int gro_normal_batch;
+ int netdev_budget;
+ int netdev_budget_usecs;
+ int tstamp_prequeue;
+ int max_backlog;
+ int dev_tx_weight;
+ int dev_rx_weight;
+};
+
+#define inet_ehash_secret net_hotdata.tcp_protocol.secret
+#define udp_ehash_secret net_hotdata.udp_protocol.secret
+#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret
+#define tcp_ipv6_hash_secret net_hotdata.tcpv6_offload.secret
+#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret
+#define udp_ipv6_hash_secret net_hotdata.udpv6_offload.secret
+
+extern struct net_hotdata net_hotdata;
+
+#endif /* _NET_HOTDATA_H */
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index f07642264c1e..238ad3349456 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -144,7 +144,7 @@ struct ipv6_ac_socklist {
struct ifacaddr6 {
struct in6_addr aca_addr;
struct fib6_info *aca_rt;
- struct ifacaddr6 *aca_next;
+ struct ifacaddr6 __rcu *aca_next;
struct hlist_node aca_addr_lst;
int aca_users;
refcount_t aca_refcnt;
@@ -196,7 +196,7 @@ struct inet6_dev {
spinlock_t mc_report_lock; /* mld query report lock */
struct mutex mc_lock; /* mld global lock */
- struct ifacaddr6 *ac_list;
+ struct ifacaddr6 __rcu *ac_list;
rwlock_t lock;
refcount_t refcnt;
__u32 if_flags;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index d0a2f827d5f2..9ab4bf704e86 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -357,4 +357,12 @@ static inline bool inet_csk_has_ulp(const struct sock *sk)
return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops;
}
+static inline void inet_init_csk_locks(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ spin_lock_init(&icsk->icsk_accept_queue.rskq_lock);
+ spin_lock_init(&icsk->icsk_accept_queue.fastopenq.lock);
+}
+
#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index aa86453f6b9b..f9ddd47dc4f8 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -274,6 +274,7 @@ enum {
INET_FLAGS_REPFLOW = 27,
INET_FLAGS_RTALERT_ISOLATE = 28,
INET_FLAGS_SNDFLOW = 29,
+ INET_FLAGS_RTALERT = 30,
};
/* cmsg flags for inet */
@@ -307,11 +308,6 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
#define inet_assign_bit(nr, sk, val) \
assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val)
-static inline bool sk_is_inet(struct sock *sk)
-{
- return sk->sk_family == AF_INET || sk->sk_family == AF_INET6;
-}
-
/**
* sk_to_full_sk - Access to a full socket
* @sk: pointer to a socket
diff --git a/include/net/ioam6.h b/include/net/ioam6.h
index 781d2d8b2f29..2cbbee6e806a 100644
--- a/include/net/ioam6.h
+++ b/include/net/ioam6.h
@@ -12,6 +12,7 @@
#include <linux/net.h>
#include <linux/ipv6.h>
#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
#include <linux/rhashtable-types.h>
struct ioam6_namespace {
@@ -65,4 +66,7 @@ void ioam6_exit(void);
int ioam6_iptunnel_init(void);
void ioam6_iptunnel_exit(void);
+void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp,
+ void *opt, unsigned int opt_len);
+
#endif /* _NET_IOAM6_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index de0c69c57e3c..25cb688bdc62 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -767,7 +767,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
* Functions provided by ip_sockglue.c
*/
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst);
void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, int tlen, int offset);
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 9ba6413fd2e3..323c94f1845b 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -30,12 +30,6 @@
#define RT6_DEBUG 2
-#if RT6_DEBUG >= 3
-#define RT6_TRACE(x...) pr_debug(x)
-#else
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
struct rt6_info;
struct fib6_info;
@@ -179,6 +173,9 @@ struct fib6_info {
refcount_t fib6_ref;
unsigned long expires;
+
+ struct hlist_node gc_link;
+
struct dst_metrics *fib6_metrics;
#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
@@ -247,12 +244,18 @@ static inline bool fib6_requires_src(const struct fib6_info *rt)
return rt->fib6_src.plen > 0;
}
+/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
+ * been added to a table before.
+ */
static inline void fib6_clean_expires(struct fib6_info *f6i)
{
f6i->fib6_flags &= ~RTF_EXPIRES;
f6i->expires = 0;
}
+/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
+ * been added to a table before.
+ */
static inline void fib6_set_expires(struct fib6_info *f6i,
unsigned long expires)
{
@@ -333,8 +336,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
static inline void fib6_info_release(struct fib6_info *f6i)
{
- if (f6i && refcount_dec_and_test(&f6i->fib6_ref))
+ if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) {
+ DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link));
call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
+ }
}
enum fib6_walk_state {
@@ -388,6 +393,7 @@ struct fib6_table {
struct inet_peer_base tb6_peers;
unsigned int flags;
unsigned int fib_seq;
+ struct hlist_head tb6_gc_hlist; /* GC candidates */
#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0)
};
@@ -504,6 +510,38 @@ void fib6_gc_cleanup(void);
int fib6_init(void);
+/* Add the route to the gc list if it is not already there
+ *
+ * The callers should hold f6i->fib6_table->tb6_lock.
+ */
+static inline void fib6_add_gc_list(struct fib6_info *f6i)
+{
+ /* If fib6_node is null, the f6i is not in (or removed from) the
+ * table.
+ *
+ * There is a gap between finding the f6i from the table and
+ * calling this function without the protection of the tb6_lock.
+ * This check makes sure the f6i is not added to the gc list when
+ * it is not on the table.
+ */
+ if (!rcu_dereference_protected(f6i->fib6_node,
+ lockdep_is_held(&f6i->fib6_table->tb6_lock)))
+ return;
+
+ if (hlist_unhashed(&f6i->gc_link))
+ hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist);
+}
+
+/* Remove the route from the gc list if it is on the list.
+ *
+ * The callers should hold f6i->fib6_table->tb6_lock.
+ */
+static inline void fib6_remove_gc_list(struct fib6_info *f6i)
+{
+ if (!hlist_unhashed(&f6i->gc_link))
+ hlist_del_init(&f6i->gc_link);
+}
+
struct ipv6_route_iter {
struct seq_net_private p;
struct fib6_walker w;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 28b065790261..a30c6aa9e5cf 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -170,7 +170,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
struct net_device *dev, unsigned int pref,
- u32 defrtr_usr_metric);
+ u32 defrtr_usr_metric,
+ int lifetime);
void rt6_purge_dflt_routers(struct net *net);
@@ -331,7 +332,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
if (idev)
- mtu = idev->cnf.mtu6;
+ mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
out:
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index d4667b7797e3..9b2f69ba5e49 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -264,6 +264,7 @@ struct fib_dump_filter {
bool filter_set;
bool dump_routes;
bool dump_exceptions;
+ bool rtnl_held;
unsigned char protocol;
unsigned char rt_type;
unsigned int flags;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 2d746f4c9a0a..5cd64bb2104d 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -284,7 +284,8 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname);
void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
- struct rtnl_link_ops *ops);
+ struct rtnl_link_ops *ops,
+ struct list_head *dev_to_kill);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index cf25ea21d770..88a8e554f7a1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -534,13 +534,15 @@ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb)
return 0;
}
-static inline bool ipv6_accept_ra(struct inet6_dev *idev)
+static inline bool ipv6_accept_ra(const struct inet6_dev *idev)
{
+ s32 accept_ra = READ_ONCE(idev->cnf.accept_ra);
+
/* If forwarding is enabled, RA are not accepted unless the special
* hybrid mode (accept_ra=2) is enabled.
*/
- return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 :
- idev->cnf.accept_ra;
+ return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 :
+ accept_ra;
}
#define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 7e73f8e5e497..1d55ba7c45be 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -262,8 +262,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
*/
static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa)
{
- if (skb->protocol == htons(ETH_P_802_2))
- memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN);
+ memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN);
}
/**
@@ -275,8 +274,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa)
*/
static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da)
{
- if (skb->protocol == htons(ETH_P_802_2))
- memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN);
+ memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN);
}
/**
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d400fe2e8668..353488ab94a2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7,7 +7,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
#ifndef MAC80211_H
@@ -214,6 +214,10 @@ struct ieee80211_low_level_stats {
* @IEEE80211_CHANCTX_CHANGE_CHANNEL: switched to another operating channel,
* this is used only with channel switching with CSA
* @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed
+ * @IEEE80211_CHANCTX_CHANGE_AP: The AP channel definition changed, so (wider
+ * bandwidth) OFDMA settings need to be changed
+ * @IEEE80211_CHANCTX_CHANGE_PUNCTURING: The punctured channel(s) bitmap
+ * was changed.
*/
enum ieee80211_chanctx_change {
IEEE80211_CHANCTX_CHANGE_WIDTH = BIT(0),
@@ -221,6 +225,19 @@ enum ieee80211_chanctx_change {
IEEE80211_CHANCTX_CHANGE_RADAR = BIT(2),
IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(3),
IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4),
+ IEEE80211_CHANCTX_CHANGE_AP = BIT(5),
+ IEEE80211_CHANCTX_CHANGE_PUNCTURING = BIT(6),
+};
+
+/**
+ * struct ieee80211_chan_req - A channel "request"
+ * @oper: channel definition to use for operation
+ * @ap: the channel definition of the AP, if any
+ * (otherwise the chan member is %NULL)
+ */
+struct ieee80211_chan_req {
+ struct cfg80211_chan_def oper;
+ struct cfg80211_chan_def ap;
};
/**
@@ -231,6 +248,8 @@ enum ieee80211_chanctx_change {
*
* @def: the channel definition
* @min_def: the minimum channel definition currently required.
+ * @ap: the channel definition the AP actually is operating as,
+ * for use with (wider bandwidth) OFDMA
* @rx_chains_static: The number of RX chains that must always be
* active on the channel to receive MIMO transmissions
* @rx_chains_dynamic: The number of RX chains that must be enabled
@@ -243,6 +262,7 @@ enum ieee80211_chanctx_change {
struct ieee80211_chanctx_conf {
struct cfg80211_chan_def def;
struct cfg80211_chan_def min_def;
+ struct cfg80211_chan_def ap;
u8 rx_chains_static, rx_chains_dynamic;
@@ -340,8 +360,8 @@ struct ieee80211_vif_chanctx_switch {
* @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed.
* @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response
* status changed.
- * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed.
* @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed.
+ * @BSS_CHANGED_MLD_TTLM: TID to link mapping was changed
*/
enum ieee80211_bss_change {
BSS_CHANGED_ASSOC = 1<<0,
@@ -376,8 +396,8 @@ enum ieee80211_bss_change {
BSS_CHANGED_HE_BSS_COLOR = 1<<29,
BSS_CHANGED_FILS_DISCOVERY = 1<<30,
BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
- BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32),
BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33),
+ BSS_CHANGED_MLD_TTLM = BIT_ULL(34),
/* when adding here, make sure to change ieee80211_reconfig */
};
@@ -537,6 +557,10 @@ struct ieee80211_fils_discovery {
* to that BSS) that can change during the lifetime of the BSS.
*
* @vif: reference to owning VIF
+ * @bss: the cfg80211 bss descriptor. Valid only for a station, and only
+ * when associated. Note: This contains information which is not
+ * necessarily authenticated. For example, information coming from probe
+ * responses.
* @addr: (link) address used locally
* @link_id: link ID, or 0 for non-MLO
* @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE
@@ -581,7 +605,7 @@ struct ieee80211_fils_discovery {
* @mcast_rate: per-band multicast rate index + 1 (0: disabled)
* @bssid: The BSSID for this BSS
* @enable_beacon: whether beaconing should be enabled or not
- * @chandef: Channel definition for this BSS -- the hardware might be
+ * @chanreq: Channel request for this BSS -- the hardware might be
* configured a higher bandwidth than this BSS uses, for example.
* @mu_group: VHT MU-MIMO group membership data
* @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation.
@@ -642,9 +666,7 @@ struct ieee80211_fils_discovery {
* @tx_pwr_env_num: number of @tx_pwr_env.
* @pwr_reduction: power constraint of BSS.
* @eht_support: does this BSS support EHT
- * @eht_puncturing: bitmap to indicate which channels are punctured in this BSS
* @csa_active: marks whether a channel switch is going on.
- * @csa_punct_bitmap: new puncturing bitmap for channel switch
* @mu_mimo_owner: indicates interface owns MU-MIMO capability
* @chanctx_conf: The channel context this interface is assigned to, or %NULL
* when it is not assigned. This pointer is RCU-protected due to the TX
@@ -682,6 +704,7 @@ struct ieee80211_fils_discovery {
*/
struct ieee80211_bss_conf {
struct ieee80211_vif *vif;
+ struct cfg80211_bss *bss;
const u8 *bssid;
unsigned int link_id;
@@ -714,7 +737,7 @@ struct ieee80211_bss_conf {
u32 cqm_rssi_hyst;
s32 cqm_rssi_low;
s32 cqm_rssi_high;
- struct cfg80211_chan_def chandef;
+ struct ieee80211_chan_req chanreq;
struct ieee80211_mu_group_data mu_group;
bool qos;
bool hidden_ssid;
@@ -747,10 +770,8 @@ struct ieee80211_bss_conf {
u8 tx_pwr_env_num;
u8 pwr_reduction;
bool eht_support;
- u16 eht_puncturing;
bool csa_active;
- u16 csa_punct_bitmap;
bool mu_mimo_owner;
struct ieee80211_chanctx_conf __rcu *chanctx_conf;
@@ -1742,8 +1763,9 @@ struct ieee80211_conf {
* @chandef: the new channel to switch to
* @count: the number of TBTT's until the channel switch event
* @delay: maximum delay between the time the AP transmitted the last beacon in
- * current channel and the expected time of the first beacon in the new
- * channel, expressed in TU.
+ * current channel and the expected time of the first beacon in the new
+ * channel, expressed in TU.
+ * @link_id: the link ID of the link doing the channel switch, 0 for non-MLO
*/
struct ieee80211_channel_switch {
u64 timestamp;
@@ -1751,6 +1773,7 @@ struct ieee80211_channel_switch {
bool block_tx;
struct cfg80211_chan_def chandef;
u8 count;
+ u8 link_id;
u32 delay;
};
@@ -1772,6 +1795,10 @@ struct ieee80211_channel_switch {
* this is not pure P2P vif.
* @IEEE80211_VIF_EML_ACTIVE: The driver indicates that EML operation is
* enabled for the interface.
+ * @IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW: Ignore wider bandwidth OFDMA
+ * operation on this interface and request a channel context without
+ * the AP definition. Use this e.g. because the device is able to
+ * handle OFDMA (downlink and trigger for uplink) on a per-AP basis.
*/
enum ieee80211_vif_flags {
IEEE80211_VIF_BEACON_FILTER = BIT(0),
@@ -1779,6 +1806,7 @@ enum ieee80211_vif_flags {
IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2),
IEEE80211_VIF_GET_NOA_UPDATE = BIT(3),
IEEE80211_VIF_EML_ACTIVE = BIT(4),
+ IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW = BIT(5),
};
@@ -1808,9 +1836,11 @@ enum ieee80211_offload_flags {
* @ps: power-save mode (STA only). This flag is NOT affected by
* offchannel/dynamic_ps operations.
* @aid: association ID number, valid only when @assoc is true
- * @eml_cap: EML capabilities as described in P802.11be_D2.2 Figure 9-1002k.
+ * @eml_cap: EML capabilities as described in P802.11be_D4.1 Figure 9-1001j.
* @eml_med_sync_delay: Medium Synchronization delay as described in
- * P802.11be_D2.2 Figure 9-1002j.
+ * P802.11be_D4.1 Figure 9-1001i.
+ * @mld_capa_op: MLD Capabilities and Operations per P802.11be_D4.1
+ * Figure 9-1001k
* @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The
* may filter ARP queries targeted for other addresses than listed here.
* The driver must allow ARP queries targeted for all address listed here
@@ -1835,6 +1865,7 @@ struct ieee80211_vif_cfg {
u16 aid;
u16 eml_cap;
u16 eml_med_sync_delay;
+ u16 mld_capa_op;
__be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
int arp_addr_cnt;
@@ -1845,6 +1876,35 @@ struct ieee80211_vif_cfg {
u8 ap_addr[ETH_ALEN] __aligned(2);
};
+#define IEEE80211_TTLM_NUM_TIDS 8
+
+/**
+ * struct ieee80211_neg_ttlm - negotiated TID to link map info
+ *
+ * @downlink: bitmap of active links per TID for downlink, or 0 if mapping for
+ * this TID is not included.
+ * @uplink: bitmap of active links per TID for uplink, or 0 if mapping for this
+ * TID is not included.
+ * @valid: info is valid or not.
+ */
+struct ieee80211_neg_ttlm {
+ u16 downlink[IEEE80211_TTLM_NUM_TIDS];
+ u16 uplink[IEEE80211_TTLM_NUM_TIDS];
+ bool valid;
+};
+
+/**
+ * enum ieee80211_neg_ttlm_res - return value for negotiated TTLM handling
+ * @NEG_TTLM_RES_ACCEPT: accept the request
+ * @NEG_TTLM_RES_REJECT: reject the request
+ * @NEG_TTLM_RES_SUGGEST_PREFERRED: reject and suggest a new mapping
+ */
+enum ieee80211_neg_ttlm_res {
+ NEG_TTLM_RES_ACCEPT,
+ NEG_TTLM_RES_REJECT,
+ NEG_TTLM_RES_SUGGEST_PREFERRED
+};
+
/**
* struct ieee80211_vif - per-interface data
*
@@ -1863,6 +1923,11 @@ struct ieee80211_vif_cfg {
* API calls meant for that purpose.
* @dormant_links: bitmap of valid but disabled links, or 0 for non-MLO.
* Must be a subset of valid_links.
+ * @suspended_links: subset of dormant_links representing links that are
+ * suspended.
+ * 0 for non-MLO.
+ * @neg_ttlm: negotiated TID to link mapping info.
+ * see &struct ieee80211_neg_ttlm.
* @addr: address of this interface
* @p2p: indicates whether this AP or STA interface is a p2p
* interface, i.e. a GO or p2p-sta respectively
@@ -1900,7 +1965,8 @@ struct ieee80211_vif {
struct ieee80211_vif_cfg cfg;
struct ieee80211_bss_conf bss_conf;
struct ieee80211_bss_conf __rcu *link_conf[IEEE80211_MLD_MAX_NUM_LINKS];
- u16 valid_links, active_links, dormant_links;
+ u16 valid_links, active_links, dormant_links, suspended_links;
+ struct ieee80211_neg_ttlm neg_ttlm;
u8 addr[ETH_ALEN] __aligned(2);
bool p2p;
@@ -1947,6 +2013,21 @@ static inline bool ieee80211_vif_is_mld(const struct ieee80211_vif *vif)
return vif->valid_links != 0;
}
+/**
+ * ieee80211_vif_link_active - check if a given link is active
+ * @vif: the vif
+ * @link_id: the link ID to check
+ * Return: %true if the vif is an MLD and the link is active, or if
+ * the vif is not an MLD and the link ID is 0; %false otherwise.
+ */
+static inline bool ieee80211_vif_link_active(const struct ieee80211_vif *vif,
+ unsigned int link_id)
+{
+ if (!ieee80211_vif_is_mld(vif))
+ return link_id == 0;
+ return vif->active_links & BIT(link_id);
+}
+
#define for_each_vif_active_link(vif, link, link_id) \
for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \
if ((!(vif)->active_links || \
@@ -2041,6 +2122,8 @@ static inline bool lockdep_vif_wiphy_mutex_held(struct ieee80211_vif *vif)
* @IEEE80211_KEY_FLAG_GENERATE_MMIE: This flag should be set by the driver
* for a AES_CMAC key to indicate that it requires sequence number
* generation only
+ * @IEEE80211_KEY_FLAG_SPP_AMSDU: SPP A-MSDUs can be used with this key
+ * (set by mac80211 from the sta->spp_amsdu flag)
*/
enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0),
@@ -2054,6 +2137,7 @@ enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8),
IEEE80211_KEY_FLAG_NO_AUTO_TX = BIT(9),
IEEE80211_KEY_FLAG_GENERATE_MMIE = BIT(10),
+ IEEE80211_KEY_FLAG_SPP_AMSDU = BIT(11),
};
/**
@@ -2352,6 +2436,7 @@ struct ieee80211_link_sta {
* would be assigned to link[link_id] where link_id is the id assigned
* by the AP.
* @valid_links: bitmap of valid links, or 0 for non-MLO
+ * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not.
*/
struct ieee80211_sta {
u8 addr[ETH_ALEN];
@@ -2365,6 +2450,7 @@ struct ieee80211_sta {
bool tdls_initiator;
bool mfp;
bool mlo;
+ bool spp_amsdu;
u8 max_amsdu_subframes;
struct ieee80211_sta_aggregates *cur;
@@ -2692,6 +2778,11 @@ struct ieee80211_txq {
* @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT
* and connecting with a lower bandwidth instead
*
+ * @IEEE80211_HW_HANDLES_QUIET_CSA: HW/driver handles quieting for CSA, so
+ * no need to stop queues. This really should be set by a driver that
+ * implements MLO, so operation can continue on other links when one
+ * link is switching.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2750,6 +2841,7 @@ enum ieee80211_hw_flags {
IEEE80211_HW_DETECTS_COLOR_COLLISION,
IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX,
IEEE80211_HW_DISALLOW_PUNCTURING,
+ IEEE80211_HW_HANDLES_QUIET_CSA,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -4182,7 +4274,7 @@ struct ieee80211_prep_tx_info {
* after a channel switch procedure is completed, allowing the
* driver to go back to a normal configuration.
* @abort_channel_switch: This is an optional callback that is called
- * when channel switch procedure was completed, allowing the
+ * when channel switch procedure was aborted, allowing the
* driver to go back to a normal configuration.
* @channel_switch_rx_beacon: This is an optional callback that is called
* when channel switch procedure is in progress and additional beacon with
@@ -4293,6 +4385,10 @@ struct ieee80211_prep_tx_info {
* flow offloading for flows originating from the vif.
* Note that the driver must not assume that the vif driver_data is valid
* at this point, since the callback can be called during netdev teardown.
+ * @can_neg_ttlm: for managed interface, requests the driver to determine
+ * if the requested TID-To-Link mapping can be accepted or not.
+ * If it's not accepted the driver may suggest a preferred mapping and
+ * modify @ttlm parameter with the suggested TID-to-Link mapping.
*/
struct ieee80211_ops {
void (*tx)(struct ieee80211_hw *hw,
@@ -4574,7 +4670,8 @@ struct ieee80211_ops {
struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf);
void (*abort_channel_switch)(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_channel_switch *ch_switch);
@@ -4673,6 +4770,9 @@ struct ieee80211_ops {
struct net_device *dev,
enum tc_setup_type type,
void *type_data);
+ enum ieee80211_neg_ttlm_res
+ (*can_neg_ttlm)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_neg_ttlm *ttlm);
};
/**
@@ -5455,6 +5555,7 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
/**
* ieee80211_beacon_update_cntdwn - request mac80211 to decrement the beacon countdown
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
* The beacon counter should be updated after each beacon transmission.
* This function is called implicitly when
@@ -5464,7 +5565,8 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
*
* Return: new countdown value
*/
-u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif);
+u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif,
+ unsigned int link_id);
/**
* ieee80211_beacon_set_cntdwn - request mac80211 to set beacon countdown
@@ -5482,20 +5584,23 @@ void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter);
/**
* ieee80211_csa_finish - notify mac80211 about channel switch
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
* After a channel switch announcement was scheduled and the counter in this
* announcement hits 1, this function must be called by the driver to
* notify mac80211 that the channel can be changed.
*/
-void ieee80211_csa_finish(struct ieee80211_vif *vif);
+void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id);
/**
* ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
* This function returns whether the countdown reached zero.
*/
-bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif);
+bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif,
+ unsigned int link_id);
/**
* ieee80211_color_change_finish - notify mac80211 about color change
@@ -5831,6 +5936,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf);
* ieee80211_gtk_rekey_add - add a GTK key from rekeying during WoWLAN
* @vif: the virtual interface to add the key on
* @keyconf: new key data
+ * @link_id: the link id of the key or -1 for non-MLO
*
* When GTK rekeying was done while the system was suspended, (a) new
* key(s) will be available. These will be needed by mac80211 for proper
@@ -5858,7 +5964,8 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf);
*/
struct ieee80211_key_conf *
ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
- struct ieee80211_key_conf *keyconf);
+ struct ieee80211_key_conf *keyconf,
+ int link_id);
/**
* ieee80211_gtk_rekey_notify - notify userspace supplicant of rekeying
@@ -7416,11 +7523,10 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is
* aware of.
- * @gfp: allocation flags
*/
void
ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap, gfp_t gfp);
+ u64 color_bitmap);
/**
* ieee80211_is_tx_data - check if frame is a data frame
@@ -7480,4 +7586,17 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links);
void ieee80211_set_active_links_async(struct ieee80211_vif *vif,
u16 active_links);
+/* for older drivers - let's not document these ... */
+int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx);
+void ieee80211_emulate_remove_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx);
+void ieee80211_emulate_change_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx,
+ u32 changed);
+int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs,
+ enum ieee80211_chanctx_switch_mode mode);
+
#endif /* MAC80211_H */
diff --git a/include/net/mctp.h b/include/net/mctp.h
index da86e106c91d..7b17c52e8ce2 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -87,7 +87,7 @@ struct mctp_sock {
};
/* Key for matching incoming packets to sockets or reassembly contexts.
- * Packets are matched on (src,dest,tag).
+ * Packets are matched on (peer EID, local EID, tag).
*
* Lifetime / locking requirements:
*
@@ -133,6 +133,7 @@ struct mctp_sock {
* - through an expiry timeout, on a per-socket timer
*/
struct mctp_sk_key {
+ unsigned int net;
mctp_eid_t peer_addr;
mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */
__u8 tag; /* incoming tag match; invert TO for local */
@@ -249,12 +250,14 @@ struct mctp_route {
struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
mctp_eid_t daddr);
+/* always takes ownership of skb */
int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
void mctp_key_unref(struct mctp_sk_key *key);
struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
- mctp_eid_t daddr, mctp_eid_t saddr,
+ unsigned int netid,
+ mctp_eid_t local, mctp_eid_t peer,
bool manual, u8 *tagp);
/* routing <--> device interface */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 13b3a4e29fdb..20c34bd7a077 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -67,8 +67,6 @@ struct net {
*/
spinlock_t rules_mod_lock;
- atomic_t dev_unreg_count;
-
unsigned int dev_base_seq; /* protected by rtnl_mutex */
u32 ifindex;
@@ -450,6 +448,9 @@ struct pernet_operations {
void (*pre_exit)(struct net *net);
void (*exit)(struct net *net);
void (*exit_batch)(struct list_head *net_exit_list);
+ /* Following method is called with RTNL held. */
+ void (*exit_batch_rtnl)(struct list_head *net_exit_list,
+ struct list_head *dev_kill_list);
unsigned int *id;
size_t size;
};
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 8b8ed4e13d74..1ec408585373 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -4,6 +4,62 @@
#include <linux/netdevice.h>
+/* See the netdev.yaml spec for definition of each statistic */
+struct netdev_queue_stats_rx {
+ u64 bytes;
+ u64 packets;
+ u64 alloc_fail;
+};
+
+struct netdev_queue_stats_tx {
+ u64 bytes;
+ u64 packets;
+};
+
+/**
+ * struct netdev_stat_ops - netdev ops for fine grained stats
+ * @get_queue_stats_rx: get stats for a given Rx queue
+ * @get_queue_stats_tx: get stats for a given Tx queue
+ * @get_base_stats: get base stats (not belonging to any live instance)
+ *
+ * Query stats for a given object. The values of the statistics are undefined
+ * on entry (specifically they are *not* zero-initialized). Drivers should
+ * assign values only to the statistics they collect. Statistics which are not
+ * collected must be left undefined.
+ *
+ * Queue objects are not necessarily persistent, and only currently active
+ * queues are queried by the per-queue callbacks. This means that per-queue
+ * statistics will not generally add up to the total number of events for
+ * the device. The @get_base_stats callback allows filling in the delta
+ * between events for currently live queues and overall device history.
+ * When the statistics for the entire device are queried, first @get_base_stats
+ * is issued to collect the delta, and then a series of per-queue callbacks.
+ * Only statistics which are set in @get_base_stats will be reported
+ * at the device level, meaning that unlike in queue callbacks, setting
+ * a statistic to zero in @get_base_stats is a legitimate thing to do.
+ * This is because @get_base_stats has a second function of designating which
+ * statistics are in fact correct for the entire device (e.g. when history
+ * for some of the events is not maintained, and reliable "total" cannot
+ * be provided).
+ *
+ * Device drivers can assume that when collecting total device stats,
+ * the @get_base_stats and subsequent per-queue calls are performed
+ * "atomically" (without releasing the rtnl_lock).
+ *
+ * Device drivers are encouraged to reset the per-queue statistics when
+ * number of queues change. This is because the primary use case for
+ * per-queue statistics is currently to detect traffic imbalance.
+ */
+struct netdev_stat_ops {
+ void (*get_queue_stats_rx)(struct net_device *dev, int idx,
+ struct netdev_queue_stats_rx *stats);
+ void (*get_queue_stats_tx)(struct net_device *dev, int idx,
+ struct netdev_queue_stats_tx *stats);
+ void (*get_base_stats)(struct net_device *dev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx);
+};
+
/**
* DOC: Lockless queue stopping / waking helpers.
*
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 956c752ceb31..a763dd327c6e 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -276,7 +276,7 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
}
void flow_offload_route_init(struct flow_offload *flow,
- const struct nf_flow_route *route);
+ struct nf_flow_route *route);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
void flow_offload_refresh(struct nf_flowtable *flow_table,
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index c81021ab07aa..4aeffddb7586 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -35,7 +35,6 @@ struct nf_queue_handler {
void nf_register_queue_handler(const struct nf_queue_handler *qh);
void nf_unregister_queue_handler(void);
-void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_free(struct nf_queue_entry *entry);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index b157c5cafd14..e27c28b612e4 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -205,6 +205,7 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
* @nla: netlink attributes
* @portid: netlink portID of the original message
* @seq: netlink sequence number
+ * @flags: modifiers to new request
* @family: protocol family
* @level: depth of the chains
* @report: notify via unicast netlink message
@@ -282,6 +283,7 @@ struct nft_elem_priv { };
*
* @key: element key
* @key_end: closing element key
+ * @data: element data
* @priv: element private data and extensions
*/
struct nft_set_elem {
@@ -325,10 +327,10 @@ struct nft_set_iter {
* @dtype: data type
* @dlen: data length
* @objtype: object type
- * @flags: flags
* @size: number of set elements
* @policy: set policy
* @gc_int: garbage collector interval
+ * @timeout: element timeout
* @field_len: length of each field in concatenation, bytes
* @field_count: number of concatenated fields in element
* @expr: set must support for expressions
@@ -351,9 +353,9 @@ struct nft_set_desc {
/**
* enum nft_set_class - performance class
*
- * @NFT_LOOKUP_O_1: constant, O(1)
- * @NFT_LOOKUP_O_LOG_N: logarithmic, O(log N)
- * @NFT_LOOKUP_O_N: linear, O(N)
+ * @NFT_SET_CLASS_O_1: constant, O(1)
+ * @NFT_SET_CLASS_O_LOG_N: logarithmic, O(log N)
+ * @NFT_SET_CLASS_O_N: linear, O(N)
*/
enum nft_set_class {
NFT_SET_CLASS_O_1,
@@ -422,9 +424,13 @@ struct nft_set_ext;
* @remove: remove element from set
* @walk: iterate over all set elements
* @get: get set elements
+ * @commit: commit set elements
+ * @abort: abort set elements
* @privsize: function to return size of set private data
+ * @estimate: estimate the required memory size and the lookup complexity class
* @init: initialize private data of new set instance
* @destroy: destroy private data of set instance
+ * @gc_init: initialize garbage collection
* @elemsize: element private size
*
* Operations lookup, update and delete have simpler interfaces, are faster
@@ -540,13 +546,16 @@ struct nft_set_elem_expr {
* @policy: set parameterization (see enum nft_set_policies)
* @udlen: user data length
* @udata: user data
- * @expr: stateful expression
+ * @pending_update: list of pending update set element
* @ops: set ops
* @flags: set flags
* @dead: set will be freed, never cleared
* @genmask: generation mask
* @klen: key length
* @dlen: data length
+ * @num_exprs: numbers of exprs
+ * @exprs: stateful expression
+ * @catchall_list: list of catch-all set element
* @data: private set data
*/
struct nft_set {
@@ -692,6 +701,7 @@ extern const struct nft_set_ext_type nft_set_ext_types[];
*
* @len: length of extension area
* @offset: offsets of individual extension types
+ * @ext_len: length of the expected extension(used to sanity check)
*/
struct nft_set_ext_tmpl {
u16 len;
@@ -798,10 +808,16 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex
return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS);
}
-static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext,
+ u64 tstamp)
{
return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
- time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext));
+ time_after_eq64(tstamp, *nft_set_ext_expiration(ext));
+}
+
+static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+{
+ return __nft_set_elem_expired(ext, get_jiffies_64());
}
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
@@ -840,6 +856,7 @@ struct nft_expr_ops;
* @select_ops: function to select nft_expr_ops
* @release_ops: release nft_expr_ops
* @ops: default ops, used when no select_ops functions is present
+ * @inner_ops: inner ops, used for inner packet operation
* @list: used internally
* @name: Identifier
* @owner: module reference
@@ -881,14 +898,22 @@ struct nft_offload_ctx;
* struct nft_expr_ops - nf_tables expression operations
*
* @eval: Expression evaluation function
+ * @clone: Expression clone function
* @size: full expression size, including private data size
* @init: initialization function
* @activate: activate expression in the next generation
* @deactivate: deactivate expression in next generation
* @destroy: destruction function, called after synchronize_rcu
+ * @destroy_clone: destruction clone function
* @dump: function to dump parameters
- * @type: expression type
* @validate: validate expression, called during loop detection
+ * @reduce: reduce expression
+ * @gc: garbage collection expression
+ * @offload: hardware offload expression
+ * @offload_action: function to report true/false to allocate one slot or not in the flow
+ * offload array
+ * @offload_stats: function to synchronize hardware stats via updating the counter expression
+ * @type: expression type
* @data: extra data to attach to this expression operation
*/
struct nft_expr_ops {
@@ -1041,14 +1066,21 @@ struct nft_rule_blob {
/**
* struct nft_chain - nf_tables chain
*
+ * @blob_gen_0: rule blob pointer to the current generation
+ * @blob_gen_1: rule blob pointer to the future generation
* @rules: list of rules in the chain
* @list: used internally
* @rhlhead: used internally
* @table: table that this chain belongs to
* @handle: chain handle
* @use: number of jump references to this chain
- * @flags: bitmask of enum nft_chain_flags
+ * @flags: bitmask of enum NFTA_CHAIN_FLAGS
+ * @bound: bind or not
+ * @genmask: generation mask
* @name: name of the chain
+ * @udlen: user data length
+ * @udata: user data in the chain
+ * @blob_next: rule blob pointer to the next in the chain
*/
struct nft_chain {
struct nft_rule_blob __rcu *blob_gen_0;
@@ -1146,6 +1178,7 @@ struct nft_hook {
* @hook_list: list of netfilter hooks (for NFPROTO_NETDEV family)
* @type: chain type
* @policy: default policy
+ * @flags: indicate the base chain disabled or not
* @stats: per-cpu chain stats
* @chain: the chain
* @flow_block: flow block (for hardware offload)
@@ -1244,6 +1277,12 @@ static inline bool nft_table_has_owner(const struct nft_table *table)
return table->flags & NFT_TABLE_F_OWNER;
}
+static inline bool nft_table_is_orphan(const struct nft_table *table)
+{
+ return (table->flags & (NFT_TABLE_F_OWNER | NFT_TABLE_F_PERSIST)) ==
+ NFT_TABLE_F_PERSIST;
+}
+
static inline bool nft_base_chain_netdev(int family, u32 hooknum)
{
return family == NFPROTO_NETDEV ||
@@ -1274,11 +1313,13 @@ struct nft_object_hash_key {
* struct nft_object - nf_tables stateful object
*
* @list: table stateful object list node
- * @key: keys that identify this object
* @rhlhead: nft_objname_ht node
+ * @key: keys that identify this object
* @genmask: generation mask
* @use: number of references to this stateful object
* @handle: unique object handle
+ * @udlen: length of user data
+ * @udata: user data
* @ops: object operations
* @data: object data, layout depends on type
*/
@@ -1322,6 +1363,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
* @type: stateful object numeric type
* @owner: module owner
* @maxattr: maximum netlink attribute
+ * @family: address family for AF-specific object types
* @policy: netlink attribute policy
*/
struct nft_object_type {
@@ -1331,6 +1373,7 @@ struct nft_object_type {
struct list_head list;
u32 type;
unsigned int maxattr;
+ u8 family;
struct module *owner;
const struct nla_policy *policy;
};
@@ -1344,6 +1387,7 @@ struct nft_object_type {
* @destroy: release existing stateful object
* @dump: netlink dump stateful object
* @update: update stateful object
+ * @type: pointer to object type
*/
struct nft_object_ops {
void (*eval)(struct nft_object *obj,
@@ -1379,9 +1423,8 @@ void nft_unregister_obj(struct nft_object_type *obj_type);
* @genmask: generation mask
* @use: number of references to this flow table
* @handle: unique object handle
- * @dev_name: array of device names
+ * @hook_list: hook list for hooks per net_device in flowtables
* @data: rhashtable and garbage collector
- * @ops: array of hooks
*/
struct nft_flowtable {
struct list_head list;
@@ -1748,6 +1791,7 @@ struct nftables_pernet {
struct list_head notify_list;
struct mutex commit_mutex;
u64 table_handle;
+ u64 tstamp;
unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
@@ -1760,6 +1804,11 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net)
return net_generic(net, nf_tables_net_id);
}
+static inline u64 nft_net_tstamp(const struct net *net)
+{
+ return nft_pernet(net)->tstamp;
+}
+
#define __NFT_REDUCE_READONLY 1UL
#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 780a5f6ad4a6..ff27cb2e1662 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -93,7 +93,7 @@ extern const struct nft_set_type nft_set_bitmap_type;
extern const struct nft_set_type nft_set_pipapo_type;
extern const struct nft_set_type nft_set_pipapo_avx2_type;
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 43ae50337685..f3ab0b8a4b18 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -145,15 +145,14 @@ struct netlbl_lsm_cache {
* processing.
*
*/
-#define NETLBL_CATMAP_MAPTYPE u64
#define NETLBL_CATMAP_MAPCNT 4
-#define NETLBL_CATMAP_MAPSIZE (sizeof(NETLBL_CATMAP_MAPTYPE) * 8)
+#define NETLBL_CATMAP_MAPSIZE (sizeof(u64) * 8)
#define NETLBL_CATMAP_SIZE (NETLBL_CATMAP_MAPSIZE * \
NETLBL_CATMAP_MAPCNT)
-#define NETLBL_CATMAP_BIT (NETLBL_CATMAP_MAPTYPE)0x01
+#define NETLBL_CATMAP_BIT ((u64)0x01)
struct netlbl_lsm_catmap {
u32 startbit;
- NETLBL_CATMAP_MAPTYPE bitmap[NETLBL_CATMAP_MAPCNT];
+ u64 bitmap[NETLBL_CATMAP_MAPCNT];
struct netlbl_lsm_catmap *next;
};
diff --git a/include/net/netmem.h b/include/net/netmem.h
new file mode 100644
index 000000000000..d8b810245c1d
--- /dev/null
+++ b/include/net/netmem.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Network memory
+ *
+ * Author: Mina Almasry <almasrymina@google.com>
+ */
+
+#ifndef _NET_NETMEM_H
+#define _NET_NETMEM_H
+
+/**
+ * typedef netmem_ref - a nonexistent type marking a reference to generic
+ * network memory.
+ *
+ * A netmem_ref currently is always a reference to a struct page. This
+ * abstraction is introduced so support for new memory types can be added.
+ *
+ * Use the supplied helpers to obtain the underlying memory pointer and fields.
+ */
+typedef unsigned long __bitwise netmem_ref;
+
+/* This conversion fails (returns NULL) if the netmem_ref is not struct page
+ * backed.
+ *
+ * Currently struct page is the only possible netmem, and this helper never
+ * fails.
+ */
+static inline struct page *netmem_to_page(netmem_ref netmem)
+{
+ return (__force struct page *)netmem;
+}
+
+/* Converting from page to netmem is always safe, because a page can always be
+ * a netmem.
+ */
+static inline netmem_ref page_to_netmem(struct page *page)
+{
+ return (__force netmem_ref)page;
+}
+
+#endif /* _NET_NETMEM_H */
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index d92046a4a078..7ca315ad500e 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -47,6 +47,8 @@ struct nh_config {
bool nh_grp_res_has_idle_timer;
bool nh_grp_res_has_unbalanced_timer;
+ bool nh_hw_stats;
+
struct nlattr *nh_encap;
u16 nh_encap_type;
@@ -95,8 +97,14 @@ struct nh_res_table {
struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets);
};
+struct nh_grp_entry_stats {
+ u64_stats_t packets;
+ struct u64_stats_sync syncp;
+};
+
struct nh_grp_entry {
struct nexthop *nh;
+ struct nh_grp_entry_stats __percpu *stats;
u8 weight;
union {
@@ -114,6 +122,7 @@ struct nh_grp_entry {
struct list_head nh_list;
struct nexthop *nh_parent; /* nexthop of group with this entry */
+ u64 packets_hw;
};
struct nh_group {
@@ -124,6 +133,7 @@ struct nh_group {
bool resilient;
bool fdb_nh;
bool has_v4;
+ bool hw_stats;
struct nh_res_table __rcu *res_table;
struct nh_grp_entry nh_entries[] __counted_by(num_nh);
@@ -157,6 +167,7 @@ enum nexthop_event_type {
NEXTHOP_EVENT_REPLACE,
NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
NEXTHOP_EVENT_BUCKET_REPLACE,
+ NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
};
enum nh_notifier_info_type {
@@ -164,6 +175,7 @@ enum nh_notifier_info_type {
NH_NOTIFIER_INFO_TYPE_GRP,
NH_NOTIFIER_INFO_TYPE_RES_TABLE,
NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
+ NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
};
struct nh_notifier_single_info {
@@ -173,6 +185,7 @@ struct nh_notifier_single_info {
__be32 ipv4;
struct in6_addr ipv6;
};
+ u32 id;
u8 is_reject:1,
is_fdb:1,
has_encap:1;
@@ -180,13 +193,13 @@ struct nh_notifier_single_info {
struct nh_notifier_grp_entry_info {
u8 weight;
- u32 id;
struct nh_notifier_single_info nh;
};
struct nh_notifier_grp_info {
u16 num_nh;
bool is_fdb;
+ bool hw_stats;
struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
};
@@ -200,9 +213,21 @@ struct nh_notifier_res_bucket_info {
struct nh_notifier_res_table_info {
u16 num_nh_buckets;
+ bool hw_stats;
struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
};
+struct nh_notifier_grp_hw_stats_entry_info {
+ u32 id;
+ u64 packets;
+};
+
+struct nh_notifier_grp_hw_stats_info {
+ u16 num_nh;
+ bool hw_stats_used;
+ struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
+};
+
struct nh_notifier_info {
struct net *net;
struct netlink_ext_ack *extack;
@@ -213,17 +238,22 @@ struct nh_notifier_info {
struct nh_notifier_grp_info *nh_grp;
struct nh_notifier_res_table_info *nh_res_table;
struct nh_notifier_res_bucket_info *nh_res_bucket;
+ struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
};
};
int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack);
+int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
bool offload, bool trap);
void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
unsigned long *activity);
+void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
+ unsigned int nh_idx,
+ u64 delta_packets);
/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
@@ -316,7 +346,7 @@ static inline
int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
u8 rt_family)
{
- struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
int i;
for (i = 0; i < nhg->num_nh; i++) {
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 5dee575fbe86..3d07abacf08b 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -196,7 +196,7 @@ struct nfc_dev {
};
#define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev)
-extern struct class nfc_class;
+extern const struct class nfc_class;
struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
u32 supported_protocols,
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 76481c465375..5e43a08d3231 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -18,8 +18,9 @@
* Please note DMA-sync-for-CPU is still
* device driver responsibility
*/
-#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
- PP_FLAG_DMA_SYNC_DEV)
+#define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */
+#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
+ PP_FLAG_SYSTEM_POOL)
/*
* Fast allocation side cache array/stack
@@ -128,6 +129,7 @@ struct page_pool_stats {
struct page_pool {
struct page_pool_params_fast p;
+ int cpuid;
bool has_init_callback;
long frag_users;
@@ -203,21 +205,18 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
unsigned int size, gfp_t gfp);
struct page_pool *page_pool_create(const struct page_pool_params *params);
+struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
+ int cpuid);
struct xdp_mem_info;
#ifdef CONFIG_PAGE_POOL
-void page_pool_unlink_napi(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
struct xdp_mem_info *mem);
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count);
#else
-static inline void page_pool_unlink_napi(struct page_pool *pool)
-{
-}
-
static inline void page_pool_destroy(struct page_pool *pool)
{
}
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index f308e8268651..a4ee43f493bb 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -24,6 +24,8 @@ struct tcf_walker {
int register_tcf_proto_ops(struct tcf_proto_ops *ops);
void unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
+#define NET_CLS_ALIAS_PREFIX "net-cls-"
+#define MODULE_ALIAS_NET_CLS(kind) MODULE_ALIAS(NET_CLS_ALIAS_PREFIX kind)
struct tcf_block_ext_info {
enum flow_block_binder_type binder_type;
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 1e200d9a066d..d7b7b6cd4aa1 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -100,6 +100,8 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
int register_qdisc(struct Qdisc_ops *qops);
void unregister_qdisc(struct Qdisc_ops *qops);
+#define NET_SCH_ALIAS_PREFIX "net-sch-"
+#define MODULE_ALIAS_NET_SCH(id) MODULE_ALIAS(NET_SCH_ALIAS_PREFIX id)
void qdisc_get_default(char *id, size_t len);
int qdisc_set_default(const char *id);
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 6aef8cb11cc8..b2499f88f8f8 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -46,6 +46,7 @@ struct net_protocol {
* socket lookup?
*/
icmp_strict_tag_validation:1;
+ u32 secret;
};
#if IS_ENABLED(CONFIG_IPV6)
@@ -59,6 +60,7 @@ struct inet6_protocol {
__be32 info);
unsigned int flags; /* INET6_PROTO_xxx */
+ u32 secret;
};
#define INET6_PROTO_NOPOLICY 0x1
@@ -68,6 +70,7 @@ struct inet6_protocol {
struct net_offload {
struct offload_callbacks callbacks;
unsigned int flags; /* Flags used by IPv6 for now */
+ u32 secret;
};
/* This should be set for any extension header which is compatible with GSO. */
#define INET6_PROTO_GSO_EXTHDR 0x1
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 144c39db9898..8839133d6f6b 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -83,6 +83,45 @@ static inline struct sock *req_to_sk(struct request_sock *req)
return (struct sock *)req;
}
+/**
+ * skb_steal_sock - steal a socket from an sk_buff
+ * @skb: sk_buff to steal the socket from
+ * @refcounted: is set to true if the socket is reference-counted
+ * @prefetched: is set to true if the socket was assigned from bpf
+ */
+static inline struct sock *skb_steal_sock(struct sk_buff *skb,
+ bool *refcounted, bool *prefetched)
+{
+ struct sock *sk = skb->sk;
+
+ if (!sk) {
+ *prefetched = false;
+ *refcounted = false;
+ return NULL;
+ }
+
+ *prefetched = skb_sk_is_prefetched(skb);
+ if (*prefetched) {
+#if IS_ENABLED(CONFIG_SYN_COOKIES)
+ if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
+ struct request_sock *req = inet_reqsk(sk);
+
+ *refcounted = false;
+ sk = req->rsk_listener;
+ req->rsk_listener = NULL;
+ return sk;
+ }
+#endif
+ *refcounted = sk_is_refcounted(sk);
+ } else {
+ *refcounted = true;
+ }
+
+ skb->destructor = NULL;
+ skb->sk = NULL;
+ return sk;
+}
+
static inline struct request_sock *
reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
bool attach_listener)
diff --git a/include/net/route.h b/include/net/route.h
index 980ab474eabd..d4a0147942f1 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -37,9 +37,6 @@
#define RTO_ONLINK 0x01
-#define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE))
-#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
-
static inline __u8 ip_sock_rt_scope(const struct sock *sk)
{
if (sock_flag(sk, SOCK_LOCALROUTE))
@@ -163,8 +160,8 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
__u8 proto, __u8 tos, int oif)
{
flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
- RT_SCOPE_UNIVERSE, proto,
- sk ? inet_sk_flowi_flags(sk) : 0,
+ sk ? ip_sock_rt_scope(sk) : RT_SCOPE_UNIVERSE,
+ proto, sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
if (sk)
security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
diff --git a/include/net/rps.h b/include/net/rps.h
new file mode 100644
index 000000000000..7660243e905b
--- /dev/null
+++ b/include/net/rps.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_RPS_H
+#define _NET_RPS_H
+
+#include <linux/types.h>
+#include <linux/static_key.h>
+#include <net/sock.h>
+#include <net/hotdata.h>
+
+#ifdef CONFIG_RPS
+
+extern struct static_key_false rps_needed;
+extern struct static_key_false rfs_needed;
+
+/*
+ * This structure holds an RPS map which can be of variable length. The
+ * map is an array of CPUs.
+ */
+struct rps_map {
+ unsigned int len;
+ struct rcu_head rcu;
+ u16 cpus[];
+};
+#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
+
+/*
+ * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
+ * tail pointer for that CPU's input queue at the time of last enqueue, and
+ * a hardware filter index.
+ */
+struct rps_dev_flow {
+ u16 cpu;
+ u16 filter;
+ unsigned int last_qtail;
+};
+#define RPS_NO_FILTER 0xffff
+
+/*
+ * The rps_dev_flow_table structure contains a table of flow mappings.
+ */
+struct rps_dev_flow_table {
+ unsigned int mask;
+ struct rcu_head rcu;
+ struct rps_dev_flow flows[];
+};
+#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
+ ((_num) * sizeof(struct rps_dev_flow)))
+
+/*
+ * The rps_sock_flow_table contains mappings of flows to the last CPU
+ * on which they were processed by the application (set in recvmsg).
+ * Each entry is a 32bit value. Upper part is the high-order bits
+ * of flow hash, lower part is CPU number.
+ * rps_cpu_mask is used to partition the space, depending on number of
+ * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
+ * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
+ * meaning we use 32-6=26 bits for the hash.
+ */
+struct rps_sock_flow_table {
+ u32 mask;
+
+ u32 ents[] ____cacheline_aligned_in_smp;
+};
+#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
+
+#define RPS_NO_CPU 0xffff
+
+static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
+ u32 hash)
+{
+ unsigned int index = hash & table->mask;
+ u32 val = hash & ~net_hotdata.rps_cpu_mask;
+
+ /* We only give a hint, preemption can change CPU under us */
+ val |= raw_smp_processor_id();
+
+ /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ * here, and another one in get_rps_cpu().
+ */
+ if (READ_ONCE(table->ents[index]) != val)
+ WRITE_ONCE(table->ents[index], val);
+}
+
+#endif /* CONFIG_RPS */
+
+static inline void sock_rps_record_flow_hash(__u32 hash)
+{
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table *sock_flow_table;
+
+ if (!hash)
+ return;
+ rcu_read_lock();
+ sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
+ if (sock_flow_table)
+ rps_record_sock_flow(sock_flow_table, hash);
+ rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (static_branch_unlikely(&rfs_needed)) {
+ /* Reading sk->sk_rxhash might incur an expensive cache line
+ * miss.
+ *
+ * TCP_ESTABLISHED does cover almost all states where RFS
+ * might be useful, and is cheaper [1] than testing :
+ * IPv4: inet_sk(sk)->inet_daddr
+ * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
+ */
+ sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
+ }
+ }
+#endif
+}
+
+#endif /* _NET_RPS_H */
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 6506221c5fe3..3bfb80bad173 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
+ RTNL_FLAG_DUMP_UNLOCKED = BIT(2),
};
enum rtnl_kinds {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ba3e1b315de8..cefe0c4bdae3 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -238,12 +238,7 @@ static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
{
-#ifdef CONFIG_BQL
- /* Non-BQL migrated drivers will return 0, too. */
- return dql_avail(&txq->dql);
-#else
- return 0;
-#endif
+ return netdev_queue_dql_avail(txq);
}
struct Qdisc_class_ops {
@@ -375,6 +370,10 @@ struct tcf_proto_ops {
struct nlattr **tca,
struct netlink_ext_ack *extack);
void (*tmplt_destroy)(void *tmplt_priv);
+ void (*tmplt_reoffload)(struct tcf_chain *chain,
+ bool add,
+ flow_setup_cb_t *cb,
+ void *cb_priv);
struct tcf_exts * (*get_exts)(const struct tcf_proto *tp,
u32 handle);
diff --git a/include/net/scm.h b/include/net/scm.h
index cf68acec4d70..92276a2c5543 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -25,6 +25,7 @@ struct scm_creds {
struct scm_fp_list {
short count;
+ short count_unix;
short max;
struct user_struct *user;
struct file *fp[SCM_MAX_FD];
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 5a24d6d8522a..f24a1bbcb3ef 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -242,10 +242,7 @@ struct sctp_sock {
int do_auto_asconf;
};
-static inline struct sctp_sock *sctp_sk(const struct sock *sk)
-{
- return (struct sctp_sock *)sk;
-}
+#define sctp_sk(ptr) container_of_const(ptr, struct sctp_sock, inet.sk)
static inline struct sock *sctp_opt2sk(const struct sctp_sock *sp)
{
diff --git a/include/net/sock.h b/include/net/sock.h
index a7f815c7cfdf..b5e00702acc1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -378,14 +378,10 @@ struct sock {
#define sk_flags __sk_common.skc_flags
#define sk_rxhash __sk_common.skc_rxhash
- /* early demux fields */
- struct dst_entry __rcu *sk_rx_dst;
- int sk_rx_dst_ifindex;
- u32 sk_rx_dst_cookie;
+ __cacheline_group_begin(sock_write_rx);
- socket_lock_t sk_lock;
atomic_t sk_drops;
- int sk_rcvlowat;
+ __s32 sk_peek_off;
struct sk_buff_head sk_error_queue;
struct sk_buff_head sk_receive_queue;
/*
@@ -402,18 +398,24 @@ struct sock {
struct sk_buff *head;
struct sk_buff *tail;
} sk_backlog;
-
#define sk_rmem_alloc sk_backlog.rmem_alloc
- int sk_forward_alloc;
- u32 sk_reserved_mem;
+ __cacheline_group_end(sock_write_rx);
+
+ __cacheline_group_begin(sock_read_rx);
+ /* early demux fields */
+ struct dst_entry __rcu *sk_rx_dst;
+ int sk_rx_dst_ifindex;
+ u32 sk_rx_dst_cookie;
+
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_ll_usec;
- /* ===== mostly read cache line ===== */
unsigned int sk_napi_id;
+ u16 sk_busy_poll_budget;
+ u8 sk_prefer_busy_poll;
#endif
+ u8 sk_userlocks;
int sk_rcvbuf;
- int sk_disconnects;
struct sk_filter __rcu *sk_filter;
union {
@@ -422,15 +424,33 @@ struct sock {
struct socket_wq *sk_wq_raw;
/* public: */
};
+
+ void (*sk_data_ready)(struct sock *sk);
+ long sk_rcvtimeo;
+ int sk_rcvlowat;
+ __cacheline_group_end(sock_read_rx);
+
+ __cacheline_group_begin(sock_read_rxtx);
+ int sk_err;
+ struct socket *sk_socket;
+ struct mem_cgroup *sk_memcg;
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
+ __cacheline_group_end(sock_read_rxtx);
- struct dst_entry __rcu *sk_dst_cache;
+ __cacheline_group_begin(sock_write_rxtx);
+ socket_lock_t sk_lock;
+ u32 sk_reserved_mem;
+ int sk_forward_alloc;
+ u32 sk_tsflags;
+ __cacheline_group_end(sock_write_rxtx);
+
+ __cacheline_group_begin(sock_write_tx);
+ int sk_write_pending;
atomic_t sk_omem_alloc;
int sk_sndbuf;
- /* ===== cache line for TX ===== */
int sk_wmem_queued;
refcount_t sk_wmem_alloc;
unsigned long sk_tsq_flags;
@@ -439,22 +459,36 @@ struct sock {
struct rb_root tcp_rtx_queue;
};
struct sk_buff_head sk_write_queue;
- __s32 sk_peek_off;
- int sk_write_pending;
- __u32 sk_dst_pending_confirm;
+ u32 sk_dst_pending_confirm;
u32 sk_pacing_status; /* see enum sk_pacing */
- long sk_sndtimeo;
+ struct page_frag sk_frag;
struct timer_list sk_timer;
- __u32 sk_priority;
- __u32 sk_mark;
+
unsigned long sk_pacing_rate; /* bytes per second */
+ atomic_t sk_zckey;
+ atomic_t sk_tskey;
+ __cacheline_group_end(sock_write_tx);
+
+ __cacheline_group_begin(sock_read_tx);
unsigned long sk_max_pacing_rate;
- struct page_frag sk_frag;
+ long sk_sndtimeo;
+ u32 sk_priority;
+ u32 sk_mark;
+ struct dst_entry __rcu *sk_dst_cache;
netdev_features_t sk_route_caps;
- int sk_gso_type;
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+ struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
+ struct net_device *dev,
+ struct sk_buff *skb);
+#endif
+ u16 sk_gso_type;
+ u16 sk_gso_max_segs;
unsigned int sk_gso_max_size;
gfp_t sk_allocation;
- __u32 sk_txhash;
+ u32 sk_txhash;
+ u8 sk_pacing_shift;
+ bool sk_use_task_frag;
+ __cacheline_group_end(sock_read_tx);
/*
* Because of non atomicity rules, all
@@ -463,64 +497,44 @@ struct sock {
u8 sk_gso_disabled : 1,
sk_kern_sock : 1,
sk_no_check_tx : 1,
- sk_no_check_rx : 1,
- sk_userlocks : 4;
- u8 sk_pacing_shift;
+ sk_no_check_rx : 1;
+ u8 sk_shutdown;
u16 sk_type;
u16 sk_protocol;
- u16 sk_gso_max_segs;
unsigned long sk_lingertime;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
- int sk_err,
- sk_err_soft;
+ int sk_err_soft;
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
- u8 sk_txrehash;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- u8 sk_prefer_busy_poll;
- u16 sk_busy_poll_budget;
-#endif
spinlock_t sk_peer_lock;
int sk_bind_phc;
struct pid *sk_peer_pid;
const struct cred *sk_peer_cred;
- long sk_rcvtimeo;
ktime_t sk_stamp;
#if BITS_PER_LONG==32
seqlock_t sk_stamp_seq;
#endif
- atomic_t sk_tskey;
- atomic_t sk_zckey;
- u32 sk_tsflags;
- u8 sk_shutdown;
+ int sk_disconnects;
+ u8 sk_txrehash;
u8 sk_clockid;
u8 sk_txtime_deadline_mode : 1,
sk_txtime_report_errors : 1,
sk_txtime_unused : 6;
- bool sk_use_task_frag;
- struct socket *sk_socket;
void *sk_user_data;
#ifdef CONFIG_SECURITY
void *sk_security;
#endif
struct sock_cgroup_data sk_cgrp_data;
- struct mem_cgroup *sk_memcg;
void (*sk_state_change)(struct sock *sk);
- void (*sk_data_ready)(struct sock *sk);
void (*sk_write_space)(struct sock *sk);
void (*sk_error_report)(struct sock *sk);
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
-#ifdef CONFIG_SOCK_VALIDATE_XMIT
- struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
- struct net_device *dev,
- struct sk_buff *skb);
-#endif
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
#ifdef CONFIG_BPF_SYSCALL
@@ -1103,41 +1117,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}
-static inline void sock_rps_record_flow_hash(__u32 hash)
-{
-#ifdef CONFIG_RPS
- struct rps_sock_flow_table *sock_flow_table;
-
- rcu_read_lock();
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_record_sock_flow(sock_flow_table, hash);
- rcu_read_unlock();
-#endif
-}
-
-static inline void sock_rps_record_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
- if (static_branch_unlikely(&rfs_needed)) {
- /* Reading sk->sk_rxhash might incur an expensive cache line
- * miss.
- *
- * TCP_ESTABLISHED does cover almost all states where RFS
- * might be useful, and is cheaper [1] than testing :
- * IPv4: inet_sk(sk)->inet_daddr
- * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
- * OR an additional socket flag
- * [1] : sk_state and sk_prot are in the same cache line.
- */
- if (sk->sk_state == TCP_ESTABLISHED) {
- /* This READ_ONCE() is paired with the WRITE_ONCE()
- * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
- */
- sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
- }
- }
-#endif
-}
static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
@@ -1429,6 +1408,7 @@ sk_memory_allocated(const struct sock *sk)
/* 1 MB per cpu, in page units */
#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+extern int sysctl_mem_pcpu_rsv;
static inline void
sk_memory_allocated_add(struct sock *sk, int amt)
@@ -1437,7 +1417,7 @@ sk_memory_allocated_add(struct sock *sk, int amt)
preempt_disable();
local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
+ if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) {
__this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
}
@@ -1451,7 +1431,7 @@ sk_memory_allocated_sub(struct sock *sk, int amt)
preempt_disable();
local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
+ if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) {
__this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
}
@@ -2765,9 +2745,25 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
&skb_shinfo(skb)->tskey);
}
+static inline bool sk_is_inet(const struct sock *sk)
+{
+ int family = READ_ONCE(sk->sk_family);
+
+ return family == AF_INET || family == AF_INET6;
+}
+
static inline bool sk_is_tcp(const struct sock *sk)
{
- return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP;
+ return sk_is_inet(sk) &&
+ sk->sk_type == SOCK_STREAM &&
+ sk->sk_protocol == IPPROTO_TCP;
+}
+
+static inline bool sk_is_udp(const struct sock *sk)
+{
+ return sk_is_inet(sk) &&
+ sk->sk_type == SOCK_DGRAM &&
+ sk->sk_protocol == IPPROTO_UDP;
}
static inline bool sk_is_stream_unix(const struct sock *sk)
@@ -2814,31 +2810,6 @@ sk_is_refcounted(struct sock *sk)
return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE);
}
-/**
- * skb_steal_sock - steal a socket from an sk_buff
- * @skb: sk_buff to steal the socket from
- * @refcounted: is set to true if the socket is reference-counted
- * @prefetched: is set to true if the socket was assigned from bpf
- */
-static inline struct sock *
-skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched)
-{
- if (skb->sk) {
- struct sock *sk = skb->sk;
-
- *refcounted = true;
- *prefetched = skb_sk_is_prefetched(skb);
- if (*prefetched)
- *refcounted = sk_is_refcounted(sk);
- skb->destructor = NULL;
- skb->sk = NULL;
- return sk;
- }
- *prefetched = false;
- *refcounted = false;
- return NULL;
-}
-
/* Checks if this SKB belongs to an HW offloaded socket
* and whether any SW fallbacks are required based on dev.
* Check decrypted mark in case skb_orphan() cleared socket.
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index a43062d4c734..8346b0d29542 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -308,6 +308,9 @@ void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack);
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+ enum switchdev_notifier_type nt,
+ const struct switchdev_obj *obj);
int switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack);
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index a608546bcefc..ffe58a02537c 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -4,7 +4,7 @@
#include <net/pkt_cls.h>
-#if IS_ENABLED(CONFIG_RETPOLINE)
+#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
#include <linux/cpufeature.h>
#include <linux/static_key.h>
diff --git a/include/net/tcp.h b/include/net/tcp.h
index dd78a1181031..6ae35199d3b3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -348,7 +348,7 @@ void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
void tcp_delack_timer_handler(struct sock *sk);
int tcp_ioctl(struct sock *sk, int cmd, int *karg);
-int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
+enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
@@ -396,8 +396,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen,
bool *lost_race);
-int tcp_child_process(struct sock *parent, struct sock *child,
- struct sk_buff *skb);
+enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk);
void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag);
void tcp_clear_retrans(struct tcp_sock *tp);
@@ -498,6 +498,22 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
struct tcp_options_received *tcp_opt,
int mss, u32 tsoff);
+#if IS_ENABLED(CONFIG_BPF)
+struct bpf_tcp_req_attrs {
+ u32 rcv_tsval;
+ u32 rcv_tsecr;
+ u16 mss;
+ u8 rcv_wscale;
+ u8 snd_wscale;
+ u8 ecn_ok;
+ u8 wscale_ok;
+ u8 sack_ok;
+ u8 tstamp_ok;
+ u8 usec_ts_ok;
+ u8 reserved[3];
+};
+#endif
+
#ifdef CONFIG_SYN_COOKIES
/* Syncookies use a monotonic timer which increments every 60 seconds.
@@ -577,6 +593,15 @@ static inline u32 tcp_cookie_time(void)
return val;
}
+/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */
+static inline u64 tcp_ns_to_ts(bool usec_ts, u64 val)
+{
+ if (usec_ts)
+ return div_u64(val, NSEC_PER_USEC);
+
+ return div_u64(val, NSEC_PER_MSEC);
+}
+
u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
u16 *mssp);
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
@@ -590,6 +615,26 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *
dst_feature(dst, RTAX_FEATURE_ECN);
}
+#if IS_ENABLED(CONFIG_BPF)
+static inline bool cookie_bpf_ok(struct sk_buff *skb)
+{
+ return skb->sk;
+}
+
+struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb);
+#else
+static inline bool cookie_bpf_ok(struct sk_buff *skb)
+{
+ return false;
+}
+
+static inline struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return NULL;
+}
+#endif
+
/* From net/ipv6/syncookies.c */
int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th);
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
@@ -2506,7 +2551,7 @@ struct tcp_ulp_ops {
/* cleanup ulp */
void (*release)(struct sock *sk);
/* diagnostic */
- int (*get_info)(const struct sock *sk, struct sk_buff *skb);
+ int (*get_info)(struct sock *sk, struct sk_buff *skb);
size_t (*get_info_size)(const struct sock *sk);
/* clone ulp */
void (*clone)(const struct request_sock *req, struct sock *newsk,
diff --git a/include/net/tls.h b/include/net/tls.h
index 962f0c501111..340ad43971e4 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -97,9 +97,6 @@ struct tls_sw_context_tx {
struct tls_rec *open_rec;
struct list_head tx_list;
atomic_t encrypt_pending;
- /* protect crypto_wait with encrypt_pending */
- spinlock_t encrypt_compl_lock;
- int async_notify;
u8 async_capable:1;
#define BIT_TX_SCHEDULED 0
@@ -136,8 +133,6 @@ struct tls_sw_context_rx {
struct tls_strparser strp;
atomic_t decrypt_pending;
- /* protect crypto_wait with decrypt_pending*/
- spinlock_t decrypt_compl_lock;
struct sk_buff_head async_hold;
struct wait_queue_head wq;
};
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 526c1e7f505e..c9aec9ab6191 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -159,11 +159,29 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
return ret;
}
+static inline void xsk_buff_del_tail(struct xdp_buff *tail)
+{
+ struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
+
+ list_del(&xskb->xskb_list_node);
+}
+
+static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
+{
+ struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
+ struct xdp_buff_xsk *frag;
+
+ frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
+ xskb_list_node);
+ return &frag->xdp;
+}
+
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
xdp->data_meta = xdp->data;
xdp->data_end = xdp->data + size;
+ xdp->flags = 0;
}
static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
@@ -350,6 +368,15 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
return NULL;
}
+static inline void xsk_buff_del_tail(struct xdp_buff *tail)
+{
+}
+
+static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
+{
+ return NULL;
+}
+
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
}
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1d107241b901..57c743b7e4fe 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -51,8 +51,10 @@
#ifdef CONFIG_XFRM_STATISTICS
#define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field)
+#define XFRM_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.xfrm_statistics, field, val)
#else
#define XFRM_INC_STATS(net, field) ((void)(net))
+#define XFRM_ADD_STATS(net, field, val) ((void)(net))
#endif
@@ -1577,22 +1579,20 @@ struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
unsigned short family);
int xfrm_state_check_expire(struct xfrm_state *x);
+void xfrm_state_update_stats(struct net *net);
#ifdef CONFIG_XFRM_OFFLOAD
-static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x)
+static inline void xfrm_dev_state_update_stats(struct xfrm_state *x)
{
struct xfrm_dev_offload *xdo = &x->xso;
struct net_device *dev = xdo->dev;
- if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
- return;
-
if (dev && dev->xfrmdev_ops &&
- dev->xfrmdev_ops->xdo_dev_state_update_curlft)
- dev->xfrmdev_ops->xdo_dev_state_update_curlft(x);
+ dev->xfrmdev_ops->xdo_dev_state_update_stats)
+ dev->xfrmdev_ops->xdo_dev_state_update_stats(x);
}
#else
-static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) {}
+static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) {}
#endif
void xfrm_state_insert(struct xfrm_state *x);
int xfrm_state_add(struct xfrm_state *x);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 5ec1e71a09de..c38f4fe5e64c 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -100,10 +100,6 @@ struct scsi_vpd {
unsigned char data[];
};
-enum scsi_vpd_parameters {
- SCSI_VPD_HEADER_SIZE = 4,
-};
-
struct scsi_device {
struct Scsi_Host *host;
struct request_queue *request_queue;
@@ -208,6 +204,7 @@ struct scsi_device {
unsigned use_10_for_rw:1; /* first try 10-byte read / write */
unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */
+ unsigned read_before_ms:1; /* perform a READ before MODE SENSE */
unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */
unsigned no_write_same:1; /* no WRITE SAME command */
unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */
diff --git a/include/soc/qcom/qcom-spmi-pmic.h b/include/soc/qcom/qcom-spmi-pmic.h
index 17a0a8c3d656..a62d500a6fda 100644
--- a/include/soc/qcom/qcom-spmi-pmic.h
+++ b/include/soc/qcom/qcom-spmi-pmic.h
@@ -49,7 +49,7 @@
#define PMK8350_SUBTYPE 0x2f
#define PMR735B_SUBTYPE 0x34
#define PM6350_SUBTYPE 0x36
-#define PM2250_SUBTYPE 0x37
+#define PM4125_SUBTYPE 0x37
#define PMI8998_FAB_ID_SMIC 0x11
#define PMI8998_FAB_ID_GF 0x30
diff --git a/include/soc/qcom/spm.h b/include/soc/qcom/spm.h
index 4951f9d8b0bd..5b263c685812 100644
--- a/include/soc/qcom/spm.h
+++ b/include/soc/qcom/spm.h
@@ -7,11 +7,6 @@
#ifndef __SPM_H__
#define __SPM_H__
-#include <linux/cpuidle.h>
-
-#define MAX_PMIC_DATA 2
-#define MAX_SEQ_DATA 64
-
enum pm_sleep_mode {
PM_SLEEP_MODE_STBY,
PM_SLEEP_MODE_RET,
@@ -20,23 +15,7 @@ enum pm_sleep_mode {
PM_SLEEP_MODE_NR,
};
-struct spm_reg_data {
- const u16 *reg_offset;
- u32 spm_cfg;
- u32 spm_dly;
- u32 pmic_dly;
- u32 pmic_data[MAX_PMIC_DATA];
- u32 avs_ctl;
- u32 avs_limit;
- u8 seq[MAX_SEQ_DATA];
- u8 start_index[PM_SLEEP_MODE_NR];
-};
-
-struct spm_driver_data {
- void __iomem *reg_base;
- const struct spm_reg_data *reg_data;
-};
-
+struct spm_driver_data;
void spm_set_low_power_mode(struct spm_driver_data *drv,
enum pm_sleep_mode mode);
diff --git a/include/soc/tegra/fuse.h b/include/soc/tegra/fuse.h
index 3a513be50243..8f421b9f7585 100644
--- a/include/soc/tegra/fuse.h
+++ b/include/soc/tegra/fuse.h
@@ -17,6 +17,7 @@
#define TEGRA186 0x18
#define TEGRA194 0x19
#define TEGRA234 0x23
+#define TEGRA241 0x24
#define TEGRA264 0x26
#define TEGRA_FUSE_SKU_CALIB_0 0xf0
diff --git a/include/soc/tegra/pmc.h b/include/soc/tegra/pmc.h
index aadb845d281d..c545875d0ff1 100644
--- a/include/soc/tegra/pmc.h
+++ b/include/soc/tegra/pmc.h
@@ -148,10 +148,6 @@ enum tegra_io_pad {
TEGRA_IO_PAD_AO_HV,
};
-/* deprecated, use TEGRA_IO_PAD_{HDMI,LVDS} instead */
-#define TEGRA_IO_RAIL_HDMI TEGRA_IO_PAD_HDMI
-#define TEGRA_IO_RAIL_LVDS TEGRA_IO_PAD_LVDS
-
#ifdef CONFIG_SOC_TEGRA_PMC
int tegra_powergate_power_on(unsigned int id);
int tegra_powergate_power_off(unsigned int id);
@@ -164,10 +160,6 @@ int tegra_powergate_sequence_power_up(unsigned int id, struct clk *clk,
int tegra_io_pad_power_enable(enum tegra_io_pad id);
int tegra_io_pad_power_disable(enum tegra_io_pad id);
-/* deprecated, use tegra_io_pad_power_{enable,disable}() instead */
-int tegra_io_rail_power_on(unsigned int id);
-int tegra_io_rail_power_off(unsigned int id);
-
void tegra_pmc_set_suspend_mode(enum tegra_suspend_mode mode);
void tegra_pmc_enter_suspend_mode(enum tegra_suspend_mode mode);
@@ -211,16 +203,6 @@ static inline int tegra_io_pad_get_voltage(enum tegra_io_pad id)
return -ENOSYS;
}
-static inline int tegra_io_rail_power_on(unsigned int id)
-{
- return -ENOSYS;
-}
-
-static inline int tegra_io_rail_power_off(unsigned int id)
-{
- return -ENOSYS;
-}
-
static inline void tegra_pmc_set_suspend_mode(enum tegra_suspend_mode mode)
{
}
diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h
index 8c18e8b6d27d..b24716ab2750 100644
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -75,6 +75,7 @@
#define CS35L56_DSP1_AHBM_WINDOW_DEBUG_0 0x25E2040
#define CS35L56_DSP1_AHBM_WINDOW_DEBUG_1 0x25E2044
#define CS35L56_DSP1_XMEM_UNPACKED24_0 0x2800000
+#define CS35L56_DSP1_FW_VER 0x2800010
#define CS35L56_DSP1_HALO_STATE_A1 0x2801E58
#define CS35L56_DSP1_HALO_STATE 0x28021E0
#define CS35L56_DSP1_PM_CUR_STATE_A1 0x2804000
@@ -241,7 +242,7 @@
#define CS35L56_CONTROL_PORT_READY_US 2200
#define CS35L56_HALO_STATE_POLL_US 1000
-#define CS35L56_HALO_STATE_TIMEOUT_US 50000
+#define CS35L56_HALO_STATE_TIMEOUT_US 250000
#define CS35L56_RESET_PULSE_MIN_US 1100
#define CS35L56_WAKE_HOLD_TIME_US 1000
@@ -272,6 +273,7 @@ extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC];
extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC];
int cs35l56_set_patch(struct cs35l56_base *cs35l56_base);
+int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base);
int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command);
int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base);
int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base);
@@ -284,7 +286,10 @@ int cs35l56_is_fw_reload_needed(struct cs35l56_base *cs35l56_base);
int cs35l56_runtime_suspend_common(struct cs35l56_base *cs35l56_base);
int cs35l56_runtime_resume_common(struct cs35l56_base *cs35l56_base, bool is_soundwire);
void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_dsp);
+int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base,
+ bool *fw_missing, unsigned int *fw_version);
int cs35l56_hw_init(struct cs35l56_base *cs35l56_base);
+int cs35l56_get_speaker_id(struct cs35l56_base *cs35l56_base);
int cs35l56_get_bclk_freq_id(unsigned int freq);
void cs35l56_fill_supply_names(struct regulator_bulk_data *data);
diff --git a/include/sound/soc-card.h b/include/sound/soc-card.h
index ecc02e955279..1f4c39922d82 100644
--- a/include/sound/soc-card.h
+++ b/include/sound/soc-card.h
@@ -30,6 +30,8 @@ static inline void snd_soc_card_mutex_unlock(struct snd_soc_card *card)
struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card,
const char *name);
+struct snd_kcontrol *snd_soc_card_get_kcontrol_locked(struct snd_soc_card *soc_card,
+ const char *name);
int snd_soc_card_jack_new(struct snd_soc_card *card, const char *id, int type,
struct snd_soc_jack *jack);
int snd_soc_card_jack_new_pins(struct snd_soc_card *card, const char *id,
diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index b00d65417c31..9aff384941de 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -142,6 +142,7 @@ struct tasdevice_priv {
void tas2781_reset(struct tasdevice_priv *tas_dev);
int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
+ struct module *module,
void (*cont)(const struct firmware *fw, void *context));
struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c);
int tasdevice_init(struct tasdevice_priv *tas_priv);
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 8d73171cb9f0..450c44c83a5d 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -1071,6 +1071,31 @@ TRACE_EVENT(afs_file_error,
__print_symbolic(__entry->where, afs_file_errors))
);
+TRACE_EVENT(afs_bulkstat_error,
+ TP_PROTO(struct afs_operation *op, struct afs_fid *fid, unsigned int index, s32 abort),
+
+ TP_ARGS(op, fid, index, abort),
+
+ TP_STRUCT__entry(
+ __field_struct(struct afs_fid, fid)
+ __field(unsigned int, op)
+ __field(unsigned int, index)
+ __field(s32, abort)
+ ),
+
+ TP_fast_assign(
+ __entry->op = op->debug_id;
+ __entry->fid = *fid;
+ __entry->index = index;
+ __entry->abort = abort;
+ ),
+
+ TP_printk("OP=%08x[%02x] %llx:%llx:%x a=%d",
+ __entry->op, __entry->index,
+ __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+ __entry->abort)
+ );
+
TRACE_EVENT(afs_cm_no_server,
TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx),
@@ -1164,8 +1189,8 @@ TRACE_EVENT(afs_flock_op,
__entry->from = fl->fl_start;
__entry->len = fl->fl_end - fl->fl_start + 1;
__entry->op = op;
- __entry->type = fl->fl_type;
- __entry->flags = fl->fl_flags;
+ __entry->type = fl->c.flc_type;
+ __entry->flags = fl->c.flc_flags;
__entry->debug_id = fl->fl_u.afs.debug_id;
),
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 65029dfb92fb..a697f4b77162 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -772,15 +772,14 @@ TRACE_EVENT(ext4_mb_release_group_pa,
);
TRACE_EVENT(ext4_discard_preallocations,
- TP_PROTO(struct inode *inode, unsigned int len, unsigned int needed),
+ TP_PROTO(struct inode *inode, unsigned int len),
- TP_ARGS(inode, len, needed),
+ TP_ARGS(inode, len),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned int, len )
- __field( unsigned int, needed )
),
@@ -788,13 +787,11 @@ TRACE_EVENT(ext4_discard_preallocations,
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->len = len;
- __entry->needed = needed;
),
- TP_printk("dev %d,%d ino %lu len: %u needed %u",
+ TP_printk("dev %d,%d ino %lu len: %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
- (unsigned long) __entry->ino, __entry->len,
- __entry->needed)
+ (unsigned long) __entry->ino, __entry->len)
);
TRACE_EVENT(ext4_mb_discard_preallocations,
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index 1646dadd7f37..b8d1e00a7982 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -68,11 +68,11 @@ DECLARE_EVENT_CLASS(filelock_lock,
__field(struct file_lock *, fl)
__field(unsigned long, i_ino)
__field(dev_t, s_dev)
- __field(struct file_lock *, fl_blocker)
- __field(fl_owner_t, fl_owner)
- __field(unsigned int, fl_pid)
- __field(unsigned int, fl_flags)
- __field(unsigned char, fl_type)
+ __field(struct file_lock_core *, blocker)
+ __field(fl_owner_t, owner)
+ __field(unsigned int, pid)
+ __field(unsigned int, flags)
+ __field(unsigned char, type)
__field(loff_t, fl_start)
__field(loff_t, fl_end)
__field(int, ret)
@@ -82,11 +82,11 @@ DECLARE_EVENT_CLASS(filelock_lock,
__entry->fl = fl ? fl : NULL;
__entry->s_dev = inode->i_sb->s_dev;
__entry->i_ino = inode->i_ino;
- __entry->fl_blocker = fl ? fl->fl_blocker : NULL;
- __entry->fl_owner = fl ? fl->fl_owner : NULL;
- __entry->fl_pid = fl ? fl->fl_pid : 0;
- __entry->fl_flags = fl ? fl->fl_flags : 0;
- __entry->fl_type = fl ? fl->fl_type : 0;
+ __entry->blocker = fl ? fl->c.flc_blocker : NULL;
+ __entry->owner = fl ? fl->c.flc_owner : NULL;
+ __entry->pid = fl ? fl->c.flc_pid : 0;
+ __entry->flags = fl ? fl->c.flc_flags : 0;
+ __entry->type = fl ? fl->c.flc_type : 0;
__entry->fl_start = fl ? fl->fl_start : 0;
__entry->fl_end = fl ? fl->fl_end : 0;
__entry->ret = ret;
@@ -94,9 +94,9 @@ DECLARE_EVENT_CLASS(filelock_lock,
TP_printk("fl=%p dev=0x%x:0x%x ino=0x%lx fl_blocker=%p fl_owner=%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d",
__entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
- __entry->i_ino, __entry->fl_blocker, __entry->fl_owner,
- __entry->fl_pid, show_fl_flags(__entry->fl_flags),
- show_fl_type(__entry->fl_type),
+ __entry->i_ino, __entry->blocker, __entry->owner,
+ __entry->pid, show_fl_flags(__entry->flags),
+ show_fl_type(__entry->type),
__entry->fl_start, __entry->fl_end, __entry->ret)
);
@@ -117,59 +117,59 @@ DEFINE_EVENT(filelock_lock, flock_lock_inode,
TP_ARGS(inode, fl, ret));
DECLARE_EVENT_CLASS(filelock_lease,
- TP_PROTO(struct inode *inode, struct file_lock *fl),
+ TP_PROTO(struct inode *inode, struct file_lease *fl),
TP_ARGS(inode, fl),
TP_STRUCT__entry(
- __field(struct file_lock *, fl)
+ __field(struct file_lease *, fl)
__field(unsigned long, i_ino)
__field(dev_t, s_dev)
- __field(struct file_lock *, fl_blocker)
- __field(fl_owner_t, fl_owner)
- __field(unsigned int, fl_flags)
- __field(unsigned char, fl_type)
- __field(unsigned long, fl_break_time)
- __field(unsigned long, fl_downgrade_time)
+ __field(struct file_lock_core *, blocker)
+ __field(fl_owner_t, owner)
+ __field(unsigned int, flags)
+ __field(unsigned char, type)
+ __field(unsigned long, break_time)
+ __field(unsigned long, downgrade_time)
),
TP_fast_assign(
__entry->fl = fl ? fl : NULL;
__entry->s_dev = inode->i_sb->s_dev;
__entry->i_ino = inode->i_ino;
- __entry->fl_blocker = fl ? fl->fl_blocker : NULL;
- __entry->fl_owner = fl ? fl->fl_owner : NULL;
- __entry->fl_flags = fl ? fl->fl_flags : 0;
- __entry->fl_type = fl ? fl->fl_type : 0;
- __entry->fl_break_time = fl ? fl->fl_break_time : 0;
- __entry->fl_downgrade_time = fl ? fl->fl_downgrade_time : 0;
+ __entry->blocker = fl ? fl->c.flc_blocker : NULL;
+ __entry->owner = fl ? fl->c.flc_owner : NULL;
+ __entry->flags = fl ? fl->c.flc_flags : 0;
+ __entry->type = fl ? fl->c.flc_type : 0;
+ __entry->break_time = fl ? fl->fl_break_time : 0;
+ __entry->downgrade_time = fl ? fl->fl_downgrade_time : 0;
),
TP_printk("fl=%p dev=0x%x:0x%x ino=0x%lx fl_blocker=%p fl_owner=%p fl_flags=%s fl_type=%s fl_break_time=%lu fl_downgrade_time=%lu",
__entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
- __entry->i_ino, __entry->fl_blocker, __entry->fl_owner,
- show_fl_flags(__entry->fl_flags),
- show_fl_type(__entry->fl_type),
- __entry->fl_break_time, __entry->fl_downgrade_time)
+ __entry->i_ino, __entry->blocker, __entry->owner,
+ show_fl_flags(__entry->flags),
+ show_fl_type(__entry->type),
+ __entry->break_time, __entry->downgrade_time)
);
-DEFINE_EVENT(filelock_lease, break_lease_noblock, TP_PROTO(struct inode *inode, struct file_lock *fl),
+DEFINE_EVENT(filelock_lease, break_lease_noblock, TP_PROTO(struct inode *inode, struct file_lease *fl),
TP_ARGS(inode, fl));
-DEFINE_EVENT(filelock_lease, break_lease_block, TP_PROTO(struct inode *inode, struct file_lock *fl),
+DEFINE_EVENT(filelock_lease, break_lease_block, TP_PROTO(struct inode *inode, struct file_lease *fl),
TP_ARGS(inode, fl));
-DEFINE_EVENT(filelock_lease, break_lease_unblock, TP_PROTO(struct inode *inode, struct file_lock *fl),
+DEFINE_EVENT(filelock_lease, break_lease_unblock, TP_PROTO(struct inode *inode, struct file_lease *fl),
TP_ARGS(inode, fl));
-DEFINE_EVENT(filelock_lease, generic_delete_lease, TP_PROTO(struct inode *inode, struct file_lock *fl),
+DEFINE_EVENT(filelock_lease, generic_delete_lease, TP_PROTO(struct inode *inode, struct file_lease *fl),
TP_ARGS(inode, fl));
-DEFINE_EVENT(filelock_lease, time_out_leases, TP_PROTO(struct inode *inode, struct file_lock *fl),
+DEFINE_EVENT(filelock_lease, time_out_leases, TP_PROTO(struct inode *inode, struct file_lease *fl),
TP_ARGS(inode, fl));
TRACE_EVENT(generic_add_lease,
- TP_PROTO(struct inode *inode, struct file_lock *fl),
+ TP_PROTO(struct inode *inode, struct file_lease *fl),
TP_ARGS(inode, fl),
@@ -179,9 +179,9 @@ TRACE_EVENT(generic_add_lease,
__field(int, rcount)
__field(int, icount)
__field(dev_t, s_dev)
- __field(fl_owner_t, fl_owner)
- __field(unsigned int, fl_flags)
- __field(unsigned char, fl_type)
+ __field(fl_owner_t, owner)
+ __field(unsigned int, flags)
+ __field(unsigned char, type)
),
TP_fast_assign(
@@ -190,21 +190,21 @@ TRACE_EVENT(generic_add_lease,
__entry->wcount = atomic_read(&inode->i_writecount);
__entry->rcount = atomic_read(&inode->i_readcount);
__entry->icount = atomic_read(&inode->i_count);
- __entry->fl_owner = fl->fl_owner;
- __entry->fl_flags = fl->fl_flags;
- __entry->fl_type = fl->fl_type;
+ __entry->owner = fl->c.flc_owner;
+ __entry->flags = fl->c.flc_flags;
+ __entry->type = fl->c.flc_type;
),
TP_printk("dev=0x%x:0x%x ino=0x%lx wcount=%d rcount=%d icount=%d fl_owner=%p fl_flags=%s fl_type=%s",
MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
__entry->i_ino, __entry->wcount, __entry->rcount,
- __entry->icount, __entry->fl_owner,
- show_fl_flags(__entry->fl_flags),
- show_fl_type(__entry->fl_type))
+ __entry->icount, __entry->owner,
+ show_fl_flags(__entry->flags),
+ show_fl_type(__entry->type))
);
TRACE_EVENT(leases_conflict,
- TP_PROTO(bool conflict, struct file_lock *lease, struct file_lock *breaker),
+ TP_PROTO(bool conflict, struct file_lease *lease, struct file_lease *breaker),
TP_ARGS(conflict, lease, breaker),
@@ -220,11 +220,11 @@ TRACE_EVENT(leases_conflict,
TP_fast_assign(
__entry->lease = lease;
- __entry->l_fl_flags = lease->fl_flags;
- __entry->l_fl_type = lease->fl_type;
+ __entry->l_fl_flags = lease->c.flc_flags;
+ __entry->l_fl_type = lease->c.flc_type;
__entry->breaker = breaker;
- __entry->b_fl_flags = breaker->fl_flags;
- __entry->b_fl_type = breaker->fl_type;
+ __entry->b_fl_flags = breaker->c.flc_flags;
+ __entry->b_fl_type = breaker->c.flc_type;
__entry->conflict = conflict;
),
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 69454f1f98b0..e948df7ce625 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -148,7 +148,7 @@ TRACE_EVENT(io_uring_queue_async_work,
__field( void *, req )
__field( u64, user_data )
__field( u8, opcode )
- __field( unsigned int, flags )
+ __field( unsigned long long, flags )
__field( struct io_wq_work *, work )
__field( int, rw )
@@ -159,7 +159,7 @@ TRACE_EVENT(io_uring_queue_async_work,
__entry->ctx = req->ctx;
__entry->req = req;
__entry->user_data = req->cqe.user_data;
- __entry->flags = req->flags;
+ __entry->flags = (__force unsigned long long) req->flags;
__entry->opcode = req->opcode;
__entry->work = &req->work;
__entry->rw = rw;
@@ -167,10 +167,10 @@ TRACE_EVENT(io_uring_queue_async_work,
__assign_str(op_str, io_uring_get_opcode(req->opcode));
),
- TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
+ TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%llx, %s queue, work %p",
__entry->ctx, __entry->req, __entry->user_data,
- __get_str(op_str),
- __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
+ __get_str(op_str), __entry->flags,
+ __entry->rw ? "hashed" : "normal", __entry->work)
);
/**
@@ -378,7 +378,7 @@ TRACE_EVENT(io_uring_submit_req,
__field( void *, req )
__field( unsigned long long, user_data )
__field( u8, opcode )
- __field( u32, flags )
+ __field( unsigned long long, flags )
__field( bool, sq_thread )
__string( op_str, io_uring_get_opcode(req->opcode) )
@@ -389,16 +389,16 @@ TRACE_EVENT(io_uring_submit_req,
__entry->req = req;
__entry->user_data = req->cqe.user_data;
__entry->opcode = req->opcode;
- __entry->flags = req->flags;
+ __entry->flags = (__force unsigned long long) req->flags;
__entry->sq_thread = req->ctx->flags & IORING_SETUP_SQPOLL;
__assign_str(op_str, io_uring_get_opcode(req->opcode));
),
- TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
+ TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%llx, "
"sq_thread %d", __entry->ctx, __entry->req,
- __entry->user_data, __get_str(op_str),
- __entry->flags, __entry->sq_thread)
+ __entry->user_data, __get_str(op_str), __entry->flags,
+ __entry->sq_thread)
);
/*
@@ -602,29 +602,25 @@ TRACE_EVENT(io_uring_cqe_overflow,
*
* @tctx: pointer to a io_uring_task
* @count: how many functions it ran
- * @loops: how many loops it ran
*
*/
TRACE_EVENT(io_uring_task_work_run,
- TP_PROTO(void *tctx, unsigned int count, unsigned int loops),
+ TP_PROTO(void *tctx, unsigned int count),
- TP_ARGS(tctx, count, loops),
+ TP_ARGS(tctx, count),
TP_STRUCT__entry (
__field( void *, tctx )
__field( unsigned int, count )
- __field( unsigned int, loops )
),
TP_fast_assign(
__entry->tctx = tctx;
__entry->count = count;
- __entry->loops = loops;
),
- TP_printk("tctx %p, count %u, loops %u",
- __entry->tctx, __entry->count, __entry->loops)
+ TP_printk("tctx %p, count %u", __entry->tctx, __entry->count)
);
TRACE_EVENT(io_uring_short_write,
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 6678cf8b235b..dc03cf8e0369 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -36,6 +36,39 @@ TRACE_EVENT(napi_poll,
__entry->work, __entry->budget)
);
+TRACE_EVENT(dql_stall_detected,
+
+ TP_PROTO(unsigned short thrs, unsigned int len,
+ unsigned long last_reap, unsigned long hist_head,
+ unsigned long now, unsigned long *hist),
+
+ TP_ARGS(thrs, len, last_reap, hist_head, now, hist),
+
+ TP_STRUCT__entry(
+ __field( unsigned short, thrs)
+ __field( unsigned int, len)
+ __field( unsigned long, last_reap)
+ __field( unsigned long, hist_head)
+ __field( unsigned long, now)
+ __array( unsigned long, hist, 4)
+ ),
+
+ TP_fast_assign(
+ __entry->thrs = thrs;
+ __entry->len = len;
+ __entry->last_reap = last_reap;
+ __entry->hist_head = hist_head * BITS_PER_LONG;
+ __entry->now = now;
+ memcpy(__entry->hist, hist, sizeof(entry->hist));
+ ),
+
+ TP_printk("thrs %u len %u last_reap %lu hist_head %lu now %lu hist %016lx %016lx %016lx %016lx",
+ __entry->thrs, __entry->len,
+ __entry->last_reap, __entry->hist_head, __entry->now,
+ __entry->hist[0], __entry->hist[1],
+ __entry->hist[2], __entry->hist[3])
+);
+
#undef NO_DEV
#endif /* _TRACE_NAPI_H */
diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index a3995925cb05..1f4258308b96 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -81,14 +81,14 @@ TRACE_EVENT(qdisc_reset,
TP_ARGS(q),
TP_STRUCT__entry(
- __string( dev, qdisc_dev(q) )
- __string( kind, q->ops->id )
- __field( u32, parent )
- __field( u32, handle )
+ __string( dev, qdisc_dev(q)->name )
+ __string( kind, q->ops->id )
+ __field( u32, parent )
+ __field( u32, handle )
),
TP_fast_assign(
- __assign_str(dev, qdisc_dev(q));
+ __assign_str(dev, qdisc_dev(q)->name);
__assign_str(kind, q->ops->id);
__entry->parent = q->parent;
__entry->handle = q->handle;
@@ -106,14 +106,14 @@ TRACE_EVENT(qdisc_destroy,
TP_ARGS(q),
TP_STRUCT__entry(
- __string( dev, qdisc_dev(q) )
- __string( kind, q->ops->id )
- __field( u32, parent )
- __field( u32, handle )
+ __string( dev, qdisc_dev(q)->name )
+ __string( kind, q->ops->id )
+ __field( u32, parent )
+ __field( u32, handle )
),
TP_fast_assign(
- __assign_str(dev, qdisc_dev(q));
+ __assign_str(dev, qdisc_dev(q)->name);
__assign_str(kind, q->ops->id);
__entry->parent = q->parent;
__entry->handle = q->handle;
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 110c1475c527..027ac3ab457d 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -2118,6 +2118,10 @@ DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_write);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_flush);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_err);
+DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_reply);
+DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_reply_flush);
+DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_reply_err);
+
TRACE_EVENT(svcrdma_qp_error,
TP_PROTO(
const struct ib_event *event,
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 4c1ef7b3705c..a1b126a6b0d7 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -83,7 +83,7 @@
EM(rxrpc_badmsg_bad_abort, "bad-abort") \
EM(rxrpc_badmsg_bad_jumbo, "bad-jumbo") \
EM(rxrpc_badmsg_short_ack, "short-ack") \
- EM(rxrpc_badmsg_short_ack_info, "short-ack-info") \
+ EM(rxrpc_badmsg_short_ack_trailer, "short-ack-trailer") \
EM(rxrpc_badmsg_short_hdr, "short-hdr") \
EM(rxrpc_badmsg_unsupported_packet, "unsup-pkt") \
EM(rxrpc_badmsg_zero_call, "zero-call") \
@@ -119,6 +119,7 @@
EM(rxrpc_call_poke_complete, "Compl") \
EM(rxrpc_call_poke_error, "Error") \
EM(rxrpc_call_poke_idle, "Idle") \
+ EM(rxrpc_call_poke_set_timeout, "Set-timo") \
EM(rxrpc_call_poke_start, "Start") \
EM(rxrpc_call_poke_timer, "Timer") \
E_(rxrpc_call_poke_timer_now, "Timer-now")
@@ -128,6 +129,7 @@
EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \
EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \
EM(rxrpc_skb_get_conn_work, "GET conn-work") \
+ EM(rxrpc_skb_get_last_nack, "GET last-nack") \
EM(rxrpc_skb_get_local_work, "GET locl-work") \
EM(rxrpc_skb_get_reject_work, "GET rej-work ") \
EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \
@@ -141,6 +143,7 @@
EM(rxrpc_skb_put_error_report, "PUT error-rep") \
EM(rxrpc_skb_put_input, "PUT input ") \
EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \
+ EM(rxrpc_skb_put_last_nack, "PUT last-nack") \
EM(rxrpc_skb_put_purge, "PUT purge ") \
EM(rxrpc_skb_put_rotate, "PUT rotate ") \
EM(rxrpc_skb_put_unknown, "PUT unknown ") \
@@ -338,35 +341,26 @@
E_(rxrpc_rtt_rx_requested_ack, "RACK")
#define rxrpc_timer_traces \
- EM(rxrpc_timer_begin, "Begin ") \
- EM(rxrpc_timer_exp_ack, "ExpAck") \
- EM(rxrpc_timer_exp_hard, "ExpHrd") \
- EM(rxrpc_timer_exp_idle, "ExpIdl") \
- EM(rxrpc_timer_exp_keepalive, "ExpKA ") \
- EM(rxrpc_timer_exp_lost_ack, "ExpLoA") \
- EM(rxrpc_timer_exp_normal, "ExpNml") \
- EM(rxrpc_timer_exp_ping, "ExpPng") \
- EM(rxrpc_timer_exp_resend, "ExpRsn") \
- EM(rxrpc_timer_init_for_reply, "IniRpl") \
- EM(rxrpc_timer_init_for_send_reply, "SndRpl") \
- EM(rxrpc_timer_restart, "Restrt") \
- EM(rxrpc_timer_set_for_ack, "SetAck") \
- EM(rxrpc_timer_set_for_hard, "SetHrd") \
- EM(rxrpc_timer_set_for_idle, "SetIdl") \
- EM(rxrpc_timer_set_for_keepalive, "KeepAl") \
- EM(rxrpc_timer_set_for_lost_ack, "SetLoA") \
- EM(rxrpc_timer_set_for_normal, "SetNml") \
- EM(rxrpc_timer_set_for_ping, "SetPng") \
- EM(rxrpc_timer_set_for_resend, "SetRTx") \
- E_(rxrpc_timer_set_for_send, "SetSnd")
+ EM(rxrpc_timer_trace_delayed_ack, "DelayAck ") \
+ EM(rxrpc_timer_trace_expect_rx, "ExpectRx ") \
+ EM(rxrpc_timer_trace_hard, "HardLimit") \
+ EM(rxrpc_timer_trace_idle, "IdleLimit") \
+ EM(rxrpc_timer_trace_keepalive, "KeepAlive") \
+ EM(rxrpc_timer_trace_lost_ack, "LostAck ") \
+ EM(rxrpc_timer_trace_ping, "DelayPing") \
+ EM(rxrpc_timer_trace_resend, "Resend ") \
+ EM(rxrpc_timer_trace_resend_reset, "ResendRst") \
+ E_(rxrpc_timer_trace_resend_tx, "ResendTx ")
#define rxrpc_propose_ack_traces \
EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \
+ EM(rxrpc_propose_ack_delayed_ack, "DlydAck") \
EM(rxrpc_propose_ack_input_data, "DataIn ") \
EM(rxrpc_propose_ack_input_data_hole, "DataInH") \
EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \
EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \
EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \
+ EM(rxrpc_propose_ack_ping_for_0_retrans, "0-Retrn") \
EM(rxrpc_propose_ack_ping_for_old_rtt, "OldRtt ") \
EM(rxrpc_propose_ack_ping_for_params, "Params ") \
EM(rxrpc_propose_ack_ping_for_rtt, "Rtt ") \
@@ -1082,9 +1076,9 @@ TRACE_EVENT(rxrpc_tx_packet,
TRACE_EVENT(rxrpc_tx_data,
TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
- rxrpc_serial_t serial, u8 flags, bool retrans, bool lose),
+ rxrpc_serial_t serial, unsigned int flags, bool lose),
- TP_ARGS(call, seq, serial, flags, retrans, lose),
+ TP_ARGS(call, seq, serial, flags, lose),
TP_STRUCT__entry(
__field(unsigned int, call)
@@ -1092,8 +1086,7 @@ TRACE_EVENT(rxrpc_tx_data,
__field(rxrpc_serial_t, serial)
__field(u32, cid)
__field(u32, call_id)
- __field(u8, flags)
- __field(bool, retrans)
+ __field(u16, flags)
__field(bool, lose)
),
@@ -1104,7 +1097,6 @@ TRACE_EVENT(rxrpc_tx_data,
__entry->seq = seq;
__entry->serial = serial;
__entry->flags = flags;
- __entry->retrans = retrans;
__entry->lose = lose;
),
@@ -1114,8 +1106,8 @@ TRACE_EVENT(rxrpc_tx_data,
__entry->call_id,
__entry->serial,
__entry->seq,
- __entry->flags,
- __entry->retrans ? " *RETRANS*" : "",
+ __entry->flags & RXRPC_TXBUF_WIRE_FLAGS,
+ __entry->flags & RXRPC_TXBUF_RESENT ? " *RETRANS*" : "",
__entry->lose ? " *LOSE*" : "")
);
@@ -1312,90 +1304,112 @@ TRACE_EVENT(rxrpc_rtt_rx,
__entry->rto)
);
-TRACE_EVENT(rxrpc_timer,
- TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why,
- unsigned long now),
+TRACE_EVENT(rxrpc_timer_set,
+ TP_PROTO(struct rxrpc_call *call, ktime_t delay,
+ enum rxrpc_timer_trace why),
- TP_ARGS(call, why, now),
+ TP_ARGS(call, delay, why),
TP_STRUCT__entry(
__field(unsigned int, call)
__field(enum rxrpc_timer_trace, why)
- __field(long, now)
- __field(long, ack_at)
- __field(long, ack_lost_at)
- __field(long, resend_at)
- __field(long, ping_at)
- __field(long, expect_rx_by)
- __field(long, expect_req_by)
- __field(long, expect_term_by)
- __field(long, timer)
+ __field(ktime_t, delay)
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->why = why;
- __entry->now = now;
- __entry->ack_at = call->delay_ack_at;
- __entry->ack_lost_at = call->ack_lost_at;
- __entry->resend_at = call->resend_at;
- __entry->expect_rx_by = call->expect_rx_by;
- __entry->expect_req_by = call->expect_req_by;
- __entry->expect_term_by = call->expect_term_by;
- __entry->timer = call->timer.expires;
+ __entry->delay = delay;
),
- TP_printk("c=%08x %s a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld",
+ TP_printk("c=%08x %s to=%lld",
__entry->call,
__print_symbolic(__entry->why, rxrpc_timer_traces),
- __entry->ack_at - __entry->now,
- __entry->ack_lost_at - __entry->now,
- __entry->resend_at - __entry->now,
- __entry->expect_rx_by - __entry->now,
- __entry->expect_req_by - __entry->now,
- __entry->expect_term_by - __entry->now,
- __entry->timer - __entry->now)
+ ktime_to_us(__entry->delay))
+ );
+
+TRACE_EVENT(rxrpc_timer_exp,
+ TP_PROTO(struct rxrpc_call *call, ktime_t delay,
+ enum rxrpc_timer_trace why),
+
+ TP_ARGS(call, delay, why),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(enum rxrpc_timer_trace, why)
+ __field(ktime_t, delay)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->why = why;
+ __entry->delay = delay;
+ ),
+
+ TP_printk("c=%08x %s to=%lld",
+ __entry->call,
+ __print_symbolic(__entry->why, rxrpc_timer_traces),
+ ktime_to_us(__entry->delay))
+ );
+
+TRACE_EVENT(rxrpc_timer_can,
+ TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why),
+
+ TP_ARGS(call, why),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(enum rxrpc_timer_trace, why)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->why = why;
+ ),
+
+ TP_printk("c=%08x %s",
+ __entry->call,
+ __print_symbolic(__entry->why, rxrpc_timer_traces))
+ );
+
+TRACE_EVENT(rxrpc_timer_restart,
+ TP_PROTO(struct rxrpc_call *call, ktime_t delay, unsigned long delayj),
+
+ TP_ARGS(call, delay, delayj),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(unsigned long, delayj)
+ __field(ktime_t, delay)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->delayj = delayj;
+ __entry->delay = delay;
+ ),
+
+ TP_printk("c=%08x to=%lld j=%ld",
+ __entry->call,
+ ktime_to_us(__entry->delay),
+ __entry->delayj)
);
TRACE_EVENT(rxrpc_timer_expired,
- TP_PROTO(struct rxrpc_call *call, unsigned long now),
+ TP_PROTO(struct rxrpc_call *call),
- TP_ARGS(call, now),
+ TP_ARGS(call),
TP_STRUCT__entry(
__field(unsigned int, call)
- __field(long, now)
- __field(long, ack_at)
- __field(long, ack_lost_at)
- __field(long, resend_at)
- __field(long, ping_at)
- __field(long, expect_rx_by)
- __field(long, expect_req_by)
- __field(long, expect_term_by)
- __field(long, timer)
),
TP_fast_assign(
__entry->call = call->debug_id;
- __entry->now = now;
- __entry->ack_at = call->delay_ack_at;
- __entry->ack_lost_at = call->ack_lost_at;
- __entry->resend_at = call->resend_at;
- __entry->expect_rx_by = call->expect_rx_by;
- __entry->expect_req_by = call->expect_req_by;
- __entry->expect_term_by = call->expect_term_by;
- __entry->timer = call->timer.expires;
),
- TP_printk("c=%08x EXPIRED a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld",
- __entry->call,
- __entry->ack_at - __entry->now,
- __entry->ack_lost_at - __entry->now,
- __entry->resend_at - __entry->now,
- __entry->expect_rx_by - __entry->now,
- __entry->expect_req_by - __entry->now,
- __entry->expect_term_by - __entry->now,
- __entry->timer - __entry->now)
+ TP_printk("c=%08x EXPIRED",
+ __entry->call)
);
TRACE_EVENT(rxrpc_rx_lose,
@@ -1504,26 +1518,30 @@ TRACE_EVENT(rxrpc_drop_ack,
);
TRACE_EVENT(rxrpc_retransmit,
- TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, s64 expiry),
+ TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
+ rxrpc_serial_t serial, ktime_t expiry),
- TP_ARGS(call, seq, expiry),
+ TP_ARGS(call, seq, serial, expiry),
TP_STRUCT__entry(
__field(unsigned int, call)
__field(rxrpc_seq_t, seq)
- __field(s64, expiry)
+ __field(rxrpc_serial_t, serial)
+ __field(ktime_t, expiry)
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->seq = seq;
+ __entry->serial = serial;
__entry->expiry = expiry;
),
- TP_printk("c=%08x q=%x xp=%lld",
+ TP_printk("c=%08x q=%x r=%x xp=%lld",
__entry->call,
__entry->seq,
- __entry->expiry)
+ __entry->serial,
+ ktime_to_us(__entry->expiry))
);
TRACE_EVENT(rxrpc_congest,
@@ -1552,7 +1570,7 @@ TRACE_EVENT(rxrpc_congest,
memcpy(&__entry->sum, summary, sizeof(__entry->sum));
),
- TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
+ TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s",
__entry->call,
__entry->ack_serial,
__print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names),
@@ -1560,9 +1578,9 @@ TRACE_EVENT(rxrpc_congest,
__print_symbolic(__entry->sum.mode, rxrpc_congest_modes),
__entry->sum.cwnd,
__entry->sum.ssthresh,
- __entry->sum.nr_acks, __entry->sum.saw_nacks,
+ __entry->sum.nr_acks, __entry->sum.nr_retained_nacks,
__entry->sum.nr_new_acks,
- __entry->sum.nr_rot_new_acks,
+ __entry->sum.nr_new_nacks,
__entry->top - __entry->hard_ack,
__entry->sum.cumulative_acks,
__entry->sum.dup_acks,
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 7b1ddffa3dfc..699dafd204ea 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -88,7 +88,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
),
- TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s",
+ TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s",
+ __entry->skbaddr, __entry->skaddr,
show_family_name(__entry->family),
__entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
__entry->saddr_v6, __entry->daddr_v6,
@@ -258,6 +259,8 @@ TRACE_EVENT(tcp_probe,
__field(__u32, srtt)
__field(__u32, rcv_wnd)
__field(__u64, sock_cookie)
+ __field(const void *, skbaddr)
+ __field(const void *, skaddr)
),
TP_fast_assign(
@@ -285,14 +288,18 @@ TRACE_EVENT(tcp_probe,
__entry->ssthresh = tcp_current_ssthresh(sk);
__entry->srtt = tp->srtt_us >> 3;
__entry->sock_cookie = sock_gen_cookie(sk);
+
+ __entry->skbaddr = skb;
+ __entry->skaddr = sk;
),
- TP_printk("family=%s src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx",
+ TP_printk("family=%s src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx skbaddr=%p skaddr=%p",
show_family_name(__entry->family),
__entry->saddr, __entry->daddr, __entry->mark,
__entry->data_len, __entry->snd_nxt, __entry->snd_una,
__entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
- __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
+ __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie,
+ __entry->skbaddr, __entry->skaddr)
);
#define TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb) \
@@ -361,7 +368,8 @@ DECLARE_EVENT_CLASS(tcp_event_skb,
TP_STORE_ADDR_PORTS_SKB(__entry, skb);
),
- TP_printk("src=%pISpc dest=%pISpc", __entry->saddr, __entry->daddr)
+ TP_printk("skbaddr=%p src=%pISpc dest=%pISpc",
+ __entry->skbaddr, __entry->saddr, __entry->daddr)
);
DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
diff --git a/include/trace/events/timer_migration.h b/include/trace/events/timer_migration.h
new file mode 100644
index 000000000000..79f19e76a80b
--- /dev/null
+++ b/include/trace/events/timer_migration.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM timer_migration
+
+#if !defined(_TRACE_TIMER_MIGRATION_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TIMER_MIGRATION_H
+
+#include <linux/tracepoint.h>
+
+/* Group events */
+TRACE_EVENT(tmigr_group_set,
+
+ TP_PROTO(struct tmigr_group *group),
+
+ TP_ARGS(group),
+
+ TP_STRUCT__entry(
+ __field( void *, group )
+ __field( unsigned int, lvl )
+ __field( unsigned int, numa_node )
+ ),
+
+ TP_fast_assign(
+ __entry->group = group;
+ __entry->lvl = group->level;
+ __entry->numa_node = group->numa_node;
+ ),
+
+ TP_printk("group=%p lvl=%d numa=%d",
+ __entry->group, __entry->lvl, __entry->numa_node)
+);
+
+TRACE_EVENT(tmigr_connect_child_parent,
+
+ TP_PROTO(struct tmigr_group *child),
+
+ TP_ARGS(child),
+
+ TP_STRUCT__entry(
+ __field( void *, child )
+ __field( void *, parent )
+ __field( unsigned int, lvl )
+ __field( unsigned int, numa_node )
+ __field( unsigned int, num_children )
+ __field( u32, childmask )
+ ),
+
+ TP_fast_assign(
+ __entry->child = child;
+ __entry->parent = child->parent;
+ __entry->lvl = child->parent->level;
+ __entry->numa_node = child->parent->numa_node;
+ __entry->num_children = child->parent->num_children;
+ __entry->childmask = child->childmask;
+ ),
+
+ TP_printk("group=%p childmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
+ __entry->child, __entry->childmask, __entry->parent,
+ __entry->lvl, __entry->numa_node, __entry->num_children)
+);
+
+TRACE_EVENT(tmigr_connect_cpu_parent,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc),
+
+ TP_STRUCT__entry(
+ __field( void *, parent )
+ __field( unsigned int, cpu )
+ __field( unsigned int, lvl )
+ __field( unsigned int, numa_node )
+ __field( unsigned int, num_children )
+ __field( u32, childmask )
+ ),
+
+ TP_fast_assign(
+ __entry->parent = tmc->tmgroup;
+ __entry->cpu = tmc->cpuevt.cpu;
+ __entry->lvl = tmc->tmgroup->level;
+ __entry->numa_node = tmc->tmgroup->numa_node;
+ __entry->num_children = tmc->tmgroup->num_children;
+ __entry->childmask = tmc->childmask;
+ ),
+
+ TP_printk("cpu=%d childmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
+ __entry->cpu, __entry->childmask, __entry->parent,
+ __entry->lvl, __entry->numa_node, __entry->num_children)
+);
+
+DECLARE_EVENT_CLASS(tmigr_group_and_cpu,
+
+ TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),
+
+ TP_ARGS(group, state, childmask),
+
+ TP_STRUCT__entry(
+ __field( void *, group )
+ __field( void *, parent )
+ __field( unsigned int, lvl )
+ __field( unsigned int, numa_node )
+ __field( u32, childmask )
+ __field( u8, active )
+ __field( u8, migrator )
+ ),
+
+ TP_fast_assign(
+ __entry->group = group;
+ __entry->parent = group->parent;
+ __entry->lvl = group->level;
+ __entry->numa_node = group->numa_node;
+ __entry->childmask = childmask;
+ __entry->active = state.active;
+ __entry->migrator = state.migrator;
+ ),
+
+ TP_printk("group=%p lvl=%d numa=%d active=%0x migrator=%0x "
+ "parent=%p childmask=%0x",
+ __entry->group, __entry->lvl, __entry->numa_node,
+ __entry->active, __entry->migrator,
+ __entry->parent, __entry->childmask)
+);
+
+DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_inactive,
+
+ TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),
+
+ TP_ARGS(group, state, childmask)
+);
+
+DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_active,
+
+ TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),
+
+ TP_ARGS(group, state, childmask)
+);
+
+/* CPU events*/
+DECLARE_EVENT_CLASS(tmigr_cpugroup,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc),
+
+ TP_STRUCT__entry(
+ __field( u64, wakeup )
+ __field( void *, parent )
+ __field( unsigned int, cpu )
+
+ ),
+
+ TP_fast_assign(
+ __entry->wakeup = tmc->wakeup;
+ __entry->parent = tmc->tmgroup;
+ __entry->cpu = tmc->cpuevt.cpu;
+ ),
+
+ TP_printk("cpu=%d parent=%p wakeup=%llu", __entry->cpu, __entry->parent, __entry->wakeup)
+);
+
+DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_new_timer,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc)
+);
+
+DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_active,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc)
+);
+
+DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_online,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc)
+);
+
+DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_offline,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc)
+);
+
+DEFINE_EVENT(tmigr_cpugroup, tmigr_handle_remote_cpu,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc)
+);
+
+DECLARE_EVENT_CLASS(tmigr_idle,
+
+ TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt),
+
+ TP_ARGS(tmc, nextevt),
+
+ TP_STRUCT__entry(
+ __field( u64, nextevt)
+ __field( u64, wakeup)
+ __field( void *, parent)
+ __field( unsigned int, cpu)
+ ),
+
+ TP_fast_assign(
+ __entry->nextevt = nextevt;
+ __entry->wakeup = tmc->wakeup;
+ __entry->parent = tmc->tmgroup;
+ __entry->cpu = tmc->cpuevt.cpu;
+ ),
+
+ TP_printk("cpu=%d parent=%p nextevt=%llu wakeup=%llu",
+ __entry->cpu, __entry->parent, __entry->nextevt, __entry->wakeup)
+);
+
+DEFINE_EVENT(tmigr_idle, tmigr_cpu_idle,
+
+ TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt),
+
+ TP_ARGS(tmc, nextevt)
+);
+
+DEFINE_EVENT(tmigr_idle, tmigr_cpu_new_timer_idle,
+
+ TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt),
+
+ TP_ARGS(tmc, nextevt)
+);
+
+TRACE_EVENT(tmigr_update_events,
+
+ TP_PROTO(struct tmigr_group *child, struct tmigr_group *group,
+ union tmigr_state childstate, union tmigr_state groupstate,
+ u64 nextevt),
+
+ TP_ARGS(child, group, childstate, groupstate, nextevt),
+
+ TP_STRUCT__entry(
+ __field( void *, child )
+ __field( void *, group )
+ __field( u64, nextevt )
+ __field( u64, group_next_expiry )
+ __field( u64, child_evt_expiry )
+ __field( unsigned int, group_lvl )
+ __field( unsigned int, child_evtcpu )
+ __field( u8, child_active )
+ __field( u8, group_active )
+ ),
+
+ TP_fast_assign(
+ __entry->child = child;
+ __entry->group = group;
+ __entry->nextevt = nextevt;
+ __entry->group_next_expiry = group->next_expiry;
+ __entry->child_evt_expiry = child ? child->groupevt.nextevt.expires : 0;
+ __entry->group_lvl = group->level;
+ __entry->child_evtcpu = child ? child->groupevt.cpu : 0;
+ __entry->child_active = childstate.active;
+ __entry->group_active = groupstate.active;
+ ),
+
+ TP_printk("child=%p group=%p group_lvl=%d child_active=%0x group_active=%0x "
+ "nextevt=%llu next_expiry=%llu child_evt_expiry=%llu child_evtcpu=%d",
+ __entry->child, __entry->group, __entry->group_lvl, __entry->child_active,
+ __entry->group_active,
+ __entry->nextevt, __entry->group_next_expiry, __entry->child_evt_expiry,
+ __entry->child_evtcpu)
+);
+
+TRACE_EVENT(tmigr_handle_remote,
+
+ TP_PROTO(struct tmigr_group *group),
+
+ TP_ARGS(group),
+
+ TP_STRUCT__entry(
+ __field( void * , group )
+ __field( unsigned int , lvl )
+ ),
+
+ TP_fast_assign(
+ __entry->group = group;
+ __entry->lvl = group->level;
+ ),
+
+ TP_printk("group=%p lvl=%d",
+ __entry->group, __entry->lvl)
+);
+
+#endif /* _TRACE_TIMER_MIGRATION_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h
index 0d9d48dca38a..64ab5dac59ce 100644
--- a/include/trace/misc/nfs.h
+++ b/include/trace/misc/nfs.h
@@ -385,3 +385,37 @@ TRACE_DEFINE_ENUM(IOMODE_ANY);
{ SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \
{ SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \
{ SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" })
+
+TRACE_DEFINE_ENUM(OP_CB_GETATTR);
+TRACE_DEFINE_ENUM(OP_CB_RECALL);
+TRACE_DEFINE_ENUM(OP_CB_LAYOUTRECALL);
+TRACE_DEFINE_ENUM(OP_CB_NOTIFY);
+TRACE_DEFINE_ENUM(OP_CB_PUSH_DELEG);
+TRACE_DEFINE_ENUM(OP_CB_RECALL_ANY);
+TRACE_DEFINE_ENUM(OP_CB_RECALLABLE_OBJ_AVAIL);
+TRACE_DEFINE_ENUM(OP_CB_RECALL_SLOT);
+TRACE_DEFINE_ENUM(OP_CB_SEQUENCE);
+TRACE_DEFINE_ENUM(OP_CB_WANTS_CANCELLED);
+TRACE_DEFINE_ENUM(OP_CB_NOTIFY_LOCK);
+TRACE_DEFINE_ENUM(OP_CB_NOTIFY_DEVICEID);
+TRACE_DEFINE_ENUM(OP_CB_OFFLOAD);
+TRACE_DEFINE_ENUM(OP_CB_ILLEGAL);
+
+#define show_nfs4_cb_op(x) \
+ __print_symbolic(x, \
+ { 0, "CB_NULL" }, \
+ { 1, "CB_COMPOUND" }, \
+ { OP_CB_GETATTR, "CB_GETATTR" }, \
+ { OP_CB_RECALL, "CB_RECALL" }, \
+ { OP_CB_LAYOUTRECALL, "CB_LAYOUTRECALL" }, \
+ { OP_CB_NOTIFY, "CB_NOTIFY" }, \
+ { OP_CB_PUSH_DELEG, "CB_PUSH_DELEG" }, \
+ { OP_CB_RECALL_ANY, "CB_RECALL_ANY" }, \
+ { OP_CB_RECALLABLE_OBJ_AVAIL, "CB_RECALLABLE_OBJ_AVAIL" }, \
+ { OP_CB_RECALL_SLOT, "CB_RECALL_SLOT" }, \
+ { OP_CB_SEQUENCE, "CB_SEQUENCE" }, \
+ { OP_CB_WANTS_CANCELLED, "CB_WANTS_CANCELLED" }, \
+ { OP_CB_NOTIFY_LOCK, "CB_NOTIFY_LOCK" }, \
+ { OP_CB_NOTIFY_DEVICEID, "CB_NOTIFY_DEVICEID" }, \
+ { OP_CB_OFFLOAD, "CB_OFFLOAD" }, \
+ { OP_CB_ILLEGAL, "CB_ILLEGAL" })
diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
index de1944e42c65..19a13468eca5 100644
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -53,7 +53,7 @@ extern "C" {
#define DRM_IVPU_PARAM_CORE_CLOCK_RATE 3
#define DRM_IVPU_PARAM_NUM_CONTEXTS 4
#define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5
-#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6
+#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 /* Deprecated */
#define DRM_IVPU_PARAM_CONTEXT_ID 7
#define DRM_IVPU_PARAM_FW_API_VERSION 8
#define DRM_IVPU_PARAM_ENGINE_HEARTBEAT 9
@@ -64,11 +64,18 @@ extern "C" {
#define DRM_IVPU_PLATFORM_TYPE_SILICON 0
+/* Deprecated, use DRM_IVPU_JOB_PRIORITY */
#define DRM_IVPU_CONTEXT_PRIORITY_IDLE 0
#define DRM_IVPU_CONTEXT_PRIORITY_NORMAL 1
#define DRM_IVPU_CONTEXT_PRIORITY_FOCUS 2
#define DRM_IVPU_CONTEXT_PRIORITY_REALTIME 3
+#define DRM_IVPU_JOB_PRIORITY_DEFAULT 0
+#define DRM_IVPU_JOB_PRIORITY_IDLE 1
+#define DRM_IVPU_JOB_PRIORITY_NORMAL 2
+#define DRM_IVPU_JOB_PRIORITY_FOCUS 3
+#define DRM_IVPU_JOB_PRIORITY_REALTIME 4
+
/**
* DRM_IVPU_CAP_METRIC_STREAMER
*
@@ -112,10 +119,6 @@ struct drm_ivpu_param {
* %DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS:
* Lowest VPU virtual address available in the current context (read-only)
*
- * %DRM_IVPU_PARAM_CONTEXT_PRIORITY:
- * Value of current context scheduling priority (read-write).
- * See DRM_IVPU_CONTEXT_PRIORITY_* for possible values.
- *
* %DRM_IVPU_PARAM_CONTEXT_ID:
* Current context ID, always greater than 0 (read-only)
*
@@ -286,10 +289,23 @@ struct drm_ivpu_submit {
* to be executed. The offset has to be 8-byte aligned.
*/
__u32 commands_offset;
+
+ /**
+ * @priority:
+ *
+ * Priority to be set for related job command queue, can be one of the following:
+ * %DRM_IVPU_JOB_PRIORITY_DEFAULT
+ * %DRM_IVPU_JOB_PRIORITY_IDLE
+ * %DRM_IVPU_JOB_PRIORITY_NORMAL
+ * %DRM_IVPU_JOB_PRIORITY_FOCUS
+ * %DRM_IVPU_JOB_PRIORITY_REALTIME
+ */
+ __u32 priority;
};
/* drm_ivpu_bo_wait job status codes */
#define DRM_IVPU_JOB_STATUS_SUCCESS 0
+#define DRM_IVPU_JOB_STATUS_ABORTED 256
/**
* struct drm_ivpu_bo_wait - Wait for BO to become inactive
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index 0bade1592f34..77d7ff0d5b11 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -54,6 +54,20 @@ extern "C" {
*/
#define NOUVEAU_GETPARAM_EXEC_PUSH_MAX 17
+/*
+ * NOUVEAU_GETPARAM_VRAM_BAR_SIZE - query bar size
+ *
+ * Query the VRAM BAR size.
+ */
+#define NOUVEAU_GETPARAM_VRAM_BAR_SIZE 18
+
+/*
+ * NOUVEAU_GETPARAM_VRAM_USED
+ *
+ * Get remaining VRAM size.
+ */
+#define NOUVEAU_GETPARAM_VRAM_USED 19
+
struct drm_nouveau_getparam {
__u64 param;
__u64 value;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 9fa3ae324731..bb0c8a994116 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -831,11 +831,6 @@ struct drm_xe_vm_destroy {
* - %DRM_XE_VM_BIND_OP_PREFETCH
*
* and the @flags can be:
- * - %DRM_XE_VM_BIND_FLAG_READONLY
- * - %DRM_XE_VM_BIND_FLAG_ASYNC
- * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
- * MAP operation immediately rather than deferring the MAP to the page
- * fault handler.
* - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page
* tables are setup with a special bit which indicates writes are
* dropped and all reads return zero. In the future, the NULL flags
@@ -928,9 +923,8 @@ struct drm_xe_vm_bind_op {
/** @op: Bind operation to perform */
__u32 op;
-#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
-#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
+#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
/** @flags: Bind flags */
__u32 flags;
@@ -1045,20 +1039,6 @@ struct drm_xe_exec_queue_create {
#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 3
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7
-/* Monitor 128KB contiguous region with 4K sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_128K 0
-/* Monitor 2MB contiguous region with 64KB sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_2M 1
-/* Monitor 16MB contiguous region with 512KB sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_16M 2
-/* Monitor 64MB contiguous region with 2M sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_64M 3
/** @extensions: Pointer to the first extension struct, if any */
__u64 extensions;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 754e68ca8744..3c42b9f1bada 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -42,6 +42,7 @@
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
@@ -50,6 +51,10 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+enum bpf_cond_pseudo_jmp {
+ BPF_MAY_GOTO = 0,
+};
+
/* Register numbers */
enum {
BPF_REG_0 = 0,
@@ -77,12 +82,29 @@ struct bpf_insn {
__s32 imm; /* signed immediate constant */
};
-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for
+ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for
+ * the trailing flexible array member) instead.
+ */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
__u8 data[0]; /* Arbitrary size */
};
+/* Header for bpf_lpm_trie_key structs */
+struct bpf_lpm_trie_key_hdr {
+ __u32 prefixlen;
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */
+struct bpf_lpm_trie_key_u8 {
+ union {
+ struct bpf_lpm_trie_key_hdr hdr;
+ __u32 prefixlen;
+ };
+ __u8 data[]; /* Arbitrary size */
+};
+
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
__u32 attach_type; /* program attach type (enum bpf_attach_type) */
@@ -617,7 +639,11 @@ union bpf_iter_link_info {
* to NULL to begin the batched operation. After each subsequent
* **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
* *out_batch* as the *in_batch* for the next operation to
- * continue iteration from the current point.
+ * continue iteration from the current point. Both *in_batch* and
+ * *out_batch* must point to memory large enough to hold a key,
+ * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH,
+ * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters
+ * must be at least 4 bytes wide regardless of key size.
*
* The *keys* and *values* are output parameters which must point
* to memory large enough to hold *count* items based on the key
@@ -847,6 +873,36 @@ union bpf_iter_link_info {
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
+ * BPF_TOKEN_CREATE
+ * Description
+ * Create BPF token with embedded information about what
+ * BPF-related functionality it allows:
+ * - a set of allowed bpf() syscall commands;
+ * - a set of allowed BPF map types to be created with
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ * - a set of allowed BPF program types and BPF program attach
+ * types to be loaded with BPF_PROG_LOAD command, if
+ * BPF_PROG_LOAD itself is allowed.
+ *
+ * BPF token is created (derived) from an instance of BPF FS,
+ * assuming it has necessary delegation mount options specified.
+ * This BPF token can be passed as an extra parameter to various
+ * bpf() syscall commands to grant BPF subsystem functionality to
+ * unprivileged processes.
+ *
+ * When created, BPF token is "associated" with the owning
+ * user namespace of BPF FS instance (super block) that it was
+ * derived from, and subsequent BPF operations performed with
+ * BPF token would be performing capabilities checks (i.e.,
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ * that user namespace. Without BPF token, such capabilities
+ * have to be granted in init user namespace, making bpf()
+ * syscall incompatible with user namespace, for the most part.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -901,6 +957,8 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_TOKEN_CREATE,
+ __MAX_BPF_CMD,
};
enum bpf_map_type {
@@ -951,6 +1009,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_BLOOM_FILTER,
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
+ BPF_MAP_TYPE_ARENA,
+ __MAX_BPF_MAP_TYPE
};
/* Note that tracing related programs such as
@@ -995,6 +1055,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
BPF_PROG_TYPE_NETFILTER,
+ __MAX_BPF_PROG_TYPE
};
enum bpf_attach_type {
@@ -1278,6 +1339,10 @@ enum {
*/
#define BPF_PSEUDO_KFUNC_CALL 2
+enum bpf_addr_space_cast {
+ BPF_ADDR_SPACE_CAST = 1,
+};
+
/* flags for BPF_MAP_UPDATE_ELEM command */
enum {
BPF_ANY = 0, /* create new element or update existing */
@@ -1330,6 +1395,18 @@ enum {
/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
BPF_F_PATH_FD = (1U << 14),
+
+/* Flag for value_type_btf_obj_fd, the fd is available */
+ BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15),
+
+/* BPF token FD is passed in a corresponding command's token_fd field */
+ BPF_F_TOKEN_FD = (1U << 16),
+
+/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */
+ BPF_F_SEGV_ON_FAULT = (1U << 17),
+
+/* Do not translate kernel bpf_arena pointers to user pointers */
+ BPF_F_NO_USER_CONV = (1U << 18),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1401,8 +1478,20 @@ union bpf_attr {
* BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
* number of hash functions (if 0, the bloom filter will default
* to using 5 hash functions).
+ *
+ * BPF_MAP_TYPE_ARENA - contains the address where user space
+ * is going to mmap() the arena. It has to be page aligned.
*/
__u64 map_extra;
+
+ __s32 value_type_btf_obj_fd; /* fd pointing to a BTF
+ * type data for
+ * btf_vmlinux_value_type_id.
+ */
+ /* BPF token FD to use with BPF_MAP_CREATE operation.
+ * If provided, map_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 map_token_fd;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -1472,6 +1561,10 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 log_true_size;
+ /* BPF token FD to use with BPF_PROG_LOAD operation.
+ * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 prog_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1584,6 +1677,11 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 btf_log_true_size;
+ __u32 btf_flags;
+ /* BPF token FD to use with BPF_BTF_LOAD operation.
+ * If provided, btf_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 btf_token_fd;
};
struct {
@@ -1714,6 +1812,11 @@ union bpf_attr {
__u32 flags; /* extra flags */
} prog_bind_map;
+ struct { /* struct used by BPF_TOKEN_CREATE command */
+ __u32 flags;
+ __u32 bpffs_fd;
+ } token_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -4839,9 +4942,9 @@ union bpf_attr {
* going through the CPU's backlog queue.
*
* The *flags* argument is reserved and must be 0. The helper is
- * currently only supported for tc BPF program types at the ingress
- * hook and for veth device types. The peer device must reside in a
- * different network namespace.
+ * currently only supported for tc BPF program types at the
+ * ingress hook and for veth and netkit target device types. The
+ * peer device must reside in a different network namespace.
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
@@ -6487,7 +6590,7 @@ struct bpf_map_info {
__u32 btf_id;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
- __u32 :32; /* alignment pad */
+ __u32 btf_vmlinux_id;
__u64 map_extra;
} __attribute__((aligned(8)));
@@ -6563,6 +6666,7 @@ struct bpf_link_info {
__u32 count; /* in/out: kprobe_multi function count */
__u32 flags;
__u64 missed;
+ __aligned_u64 cookies;
} kprobe_multi;
struct {
__aligned_u64 path;
@@ -6582,6 +6686,7 @@ struct bpf_link_info {
__aligned_u64 file_name; /* in/out */
__u32 name_len;
__u32 offset; /* offset from file_name */
+ __u64 cookie;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
@@ -6589,14 +6694,19 @@ struct bpf_link_info {
__u32 offset; /* offset from func_name */
__u64 addr;
__u64 missed;
+ __u64 cookie;
} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
struct {
__aligned_u64 tp_name; /* in/out */
__u32 name_len;
+ __u32 :32;
+ __u64 cookie;
} tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */
struct {
__u64 config;
__u32 type;
+ __u32 :32;
+ __u64 cookie;
} event; /* BPF_PERF_EVENT_EVENT */
};
} perf_event;
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 7c29d82db9ee..cdf6ad872149 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -92,6 +92,7 @@ struct btrfs_qgroup_limit {
* struct btrfs_qgroup_inherit.flags
*/
#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
+#define BTRFS_QGROUP_INHERIT_FLAGS_SUPP (BTRFS_QGROUP_INHERIT_SET_LIMITS)
struct btrfs_qgroup_inherit {
__u64 flags;
@@ -614,6 +615,9 @@ struct btrfs_ioctl_clone_range_args {
*/
#define BTRFS_DEFRAG_RANGE_COMPRESS 1
#define BTRFS_DEFRAG_RANGE_START_IO 2
+#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \
+ BTRFS_DEFRAG_RANGE_START_IO)
+
struct btrfs_ioctl_defrag_range_args {
/* start of the defrag operation */
__u64 start;
diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 939db2388208..e78cbd85ce7c 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -193,9 +193,14 @@ struct canfd_frame {
#define CANXL_XLF 0x80 /* mandatory CAN XL frame flag (must always be set!) */
#define CANXL_SEC 0x01 /* Simple Extended Content (security/segmentation) */
+/* the 8-bit VCID is optionally placed in the canxl_frame.prio element */
+#define CANXL_VCID_OFFSET 16 /* bit offset of VCID in prio element */
+#define CANXL_VCID_VAL_MASK 0xFFUL /* VCID is an 8-bit value */
+#define CANXL_VCID_MASK (CANXL_VCID_VAL_MASK << CANXL_VCID_OFFSET)
+
/**
* struct canxl_frame - CAN with e'X'tended frame 'L'ength frame structure
- * @prio: 11 bit arbitration priority with zero'ed CAN_*_FLAG flags
+ * @prio: 11 bit arbitration priority with zero'ed CAN_*_FLAG flags / VCID
* @flags: additional flags for CAN XL
* @sdt: SDU (service data unit) type
* @len: frame payload length in byte (CANXL_MIN_DLEN .. CANXL_MAX_DLEN)
@@ -205,7 +210,7 @@ struct canfd_frame {
* @prio shares the same position as @can_id from struct can[fd]_frame.
*/
struct canxl_frame {
- canid_t prio; /* 11 bit priority for arbitration (canid_t) */
+ canid_t prio; /* 11 bit priority for arbitration / 8 bit VCID */
__u8 flags; /* additional flags for CAN XL */
__u8 sdt; /* SDU (service data unit) type */
__u16 len; /* frame payload length in byte */
diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
index 439c982f7e81..6cde62371b6f 100644
--- a/include/uapi/linux/can/isotp.h
+++ b/include/uapi/linux/can/isotp.h
@@ -137,6 +137,7 @@ struct can_isotp_ll_options {
#define CAN_ISOTP_WAIT_TX_DONE 0x0400 /* wait for tx completion */
#define CAN_ISOTP_SF_BROADCAST 0x0800 /* 1-to-N functional addressing */
#define CAN_ISOTP_CF_BROADCAST 0x1000 /* 1-to-N transmission w/o FC */
+#define CAN_ISOTP_DYN_FC_PARMS 0x2000 /* dynamic FC parameters BS/STmin */
/* protocol machine default values */
diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h
index 31622c9b7988..e024d896e278 100644
--- a/include/uapi/linux/can/raw.h
+++ b/include/uapi/linux/can/raw.h
@@ -65,6 +65,22 @@ enum {
CAN_RAW_FD_FRAMES, /* allow CAN FD frames (default:off) */
CAN_RAW_JOIN_FILTERS, /* all filters must match to trigger */
CAN_RAW_XL_FRAMES, /* allow CAN XL frames (default:off) */
+ CAN_RAW_XL_VCID_OPTS, /* CAN XL VCID configuration options */
};
+/* configuration for CAN XL virtual CAN identifier (VCID) handling */
+struct can_raw_vcid_options {
+
+ __u8 flags; /* flags for vcid (filter) behaviour */
+ __u8 tx_vcid; /* VCID value set into canxl_frame.prio */
+ __u8 rx_vcid; /* VCID value for VCID filter */
+ __u8 rx_vcid_mask; /* VCID mask for VCID filter */
+
+};
+
+/* can_raw_vcid_options.flags for CAN XL virtual CAN identifier handling */
+#define CAN_RAW_XL_VCID_TX_SET 0x01
+#define CAN_RAW_XL_VCID_TX_PASS 0x02
+#define CAN_RAW_XL_VCID_RX_FILTER 0x04
+
#endif /* !_UAPI_CAN_RAW_H */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 130cae0d3e20..2da0c7eb6710 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -614,7 +614,10 @@ enum devlink_attr {
DEVLINK_ATTR_REGION_DIRECT, /* flag */
- /* add new attributes above here, update the policy in devlink.c */
+ /* Add new attributes above here, update the spec in
+ * Documentation/netlink/specs/devlink.yaml and re-generate
+ * net/devlink/netlink_gen.c.
+ */
__DEVLINK_ATTR_MAX,
DEVLINK_ATTR_MAX = __DEVLINK_ATTR_MAX - 1
diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h
index b4e947f9bfbc..0c13d7f1a1bc 100644
--- a/include/uapi/linux/dpll.h
+++ b/include/uapi/linux/dpll.h
@@ -50,6 +50,35 @@ enum dpll_lock_status {
DPLL_LOCK_STATUS_MAX = (__DPLL_LOCK_STATUS_MAX - 1)
};
+/**
+ * enum dpll_lock_status_error - if previous status change was done due to a
+ * failure, this provides information of dpll device lock status error. Valid
+ * values for DPLL_A_LOCK_STATUS_ERROR attribute
+ * @DPLL_LOCK_STATUS_ERROR_NONE: dpll device lock status was changed without
+ * any error
+ * @DPLL_LOCK_STATUS_ERROR_UNDEFINED: dpll device lock status was changed due
+ * to undefined error. Driver fills this value up in case it is not able to
+ * obtain suitable exact error type.
+ * @DPLL_LOCK_STATUS_ERROR_MEDIA_DOWN: dpll device lock status was changed
+ * because of associated media got down. This may happen for example if dpll
+ * device was previously locked on an input pin of type
+ * PIN_TYPE_SYNCE_ETH_PORT.
+ * @DPLL_LOCK_STATUS_ERROR_FRACTIONAL_FREQUENCY_OFFSET_TOO_HIGH: the FFO
+ * (Fractional Frequency Offset) between the RX and TX symbol rate on the
+ * media got too high. This may happen for example if dpll device was
+ * previously locked on an input pin of type PIN_TYPE_SYNCE_ETH_PORT.
+ */
+enum dpll_lock_status_error {
+ DPLL_LOCK_STATUS_ERROR_NONE = 1,
+ DPLL_LOCK_STATUS_ERROR_UNDEFINED,
+ DPLL_LOCK_STATUS_ERROR_MEDIA_DOWN,
+ DPLL_LOCK_STATUS_ERROR_FRACTIONAL_FREQUENCY_OFFSET_TOO_HIGH,
+
+ /* private: */
+ __DPLL_LOCK_STATUS_ERROR_MAX,
+ DPLL_LOCK_STATUS_ERROR_MAX = (__DPLL_LOCK_STATUS_ERROR_MAX - 1)
+};
+
#define DPLL_TEMP_DIVIDER 1000
/**
@@ -150,6 +179,7 @@ enum dpll_a {
DPLL_A_LOCK_STATUS,
DPLL_A_TEMP,
DPLL_A_TYPE,
+ DPLL_A_LOCK_STATUS_ERROR,
__DPLL_A_MAX,
DPLL_A_MAX = (__DPLL_A_MAX - 1)
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 06ef6b78b7de..11fc18988bc2 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -2023,6 +2023,53 @@ static inline int ethtool_validate_duplex(__u8 duplex)
#define IPV4_FLOW 0x10 /* hash only */
#define IPV6_FLOW 0x11 /* hash only */
#define ETHER_FLOW 0x12 /* spec only (ether_spec) */
+
+/* Used for GTP-U IPv4 and IPv6.
+ * The format of GTP packets only includes
+ * elements such as TEID and GTP version.
+ * It is primarily intended for data communication of the UE.
+ */
+#define GTPU_V4_FLOW 0x13 /* hash only */
+#define GTPU_V6_FLOW 0x14 /* hash only */
+
+/* Use for GTP-C IPv4 and v6.
+ * The format of these GTP packets does not include TEID.
+ * Primarily expected to be used for communication
+ * to create sessions for UE data communication,
+ * commonly referred to as CSR (Create Session Request).
+ */
+#define GTPC_V4_FLOW 0x15 /* hash only */
+#define GTPC_V6_FLOW 0x16 /* hash only */
+
+/* Use for GTP-C IPv4 and v6.
+ * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID.
+ * After session creation, it becomes this packet.
+ * This is mainly used for requests to realize UE handover.
+ */
+#define GTPC_TEID_V4_FLOW 0x17 /* hash only */
+#define GTPC_TEID_V6_FLOW 0x18 /* hash only */
+
+/* Use for GTP-U and extended headers for the PSC (PDU Session Container).
+ * The format of these GTP packets includes TEID and QFI.
+ * In 5G communication using UPF (User Plane Function),
+ * data communication with this extended header is performed.
+ */
+#define GTPU_EH_V4_FLOW 0x19 /* hash only */
+#define GTPU_EH_V6_FLOW 0x1a /* hash only */
+
+/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers.
+ * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by
+ * UL/DL included in the PSC.
+ * There are differences in the data included based on Downlink/Uplink,
+ * and can be used to distinguish packets.
+ * The functions described so far are useful when you want to
+ * handle communication from the mobile network in UPF, PGW, etc.
+ */
+#define GTPU_UL_V4_FLOW 0x1b /* hash only */
+#define GTPU_UL_V6_FLOW 0x1c /* hash only */
+#define GTPU_DL_V4_FLOW 0x1d /* hash only */
+#define GTPU_DL_V6_FLOW 0x1e /* hash only */
+
/* Flag to enable additional fields in struct ethtool_rx_flow_spec */
#define FLOW_EXT 0x80000000
#define FLOW_MAC_EXT 0x40000000
@@ -2037,6 +2084,7 @@ static inline int ethtool_validate_duplex(__u8 duplex)
#define RXH_IP_DST (1 << 5)
#define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */
#define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */
+#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */
#define RXH_DISCARD (1 << 31)
#define RX_CLS_FLOW_DISC 0xffffffffffffffffULL
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index cfbcc4cc49ac..4f4b948ef381 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -85,4 +85,17 @@ struct epoll_event {
__u64 data;
} EPOLL_PACKED;
+struct epoll_params {
+ __u32 busy_poll_usecs;
+ __u16 busy_poll_budget;
+ __u8 prefer_busy_poll;
+
+ /* pad the struct to a multiple of 64bits */
+ __u8 __pad;
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
+
#endif /* _UAPI_LINUX_EVENTPOLL_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 48ad69f7722e..45e4e64fd664 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -64,6 +64,24 @@ struct fstrim_range {
__u64 minlen;
};
+/*
+ * We include a length field because some filesystems (vfat) have an identifier
+ * that we do want to expose as a UUID, but doesn't have the standard length.
+ *
+ * We use a fixed size buffer beacuse this interface will, by fiat, never
+ * support "UUIDs" longer than 16 bytes; we don't want to force all downstream
+ * users to have to deal with that.
+ */
+struct fsuuid2 {
+ __u8 len;
+ __u8 uuid[16];
+};
+
+struct fs_sysfs_path {
+ __u8 len;
+ __u8 name[128];
+};
+
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
@@ -215,6 +233,13 @@ struct fsxattr {
#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX])
#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX])
+/* Returns the external filesystem UUID, the same one blkid returns */
+#define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2)
+/*
+ * Returns the path component under /sys/fs/ that refers to this filesystem;
+ * also /sys/kernel/debug/ for filesystems with debugfs exports
+ */
+#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
@@ -301,9 +326,12 @@ typedef int __bitwise __kernel_rwf_t;
/* per-IO O_APPEND */
#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010)
+/* per-IO negation of O_APPEND */
+#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND)
+ RWF_APPEND | RWF_NOAPPEND)
/* Pagemap ioctl */
#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ab9bcff96e4d..ffa637b38c93 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1505,6 +1505,7 @@ enum {
IFLA_BOND_AD_LACP_ACTIVE,
IFLA_BOND_MISSED_MAX,
IFLA_BOND_NS_IP6_TARGET,
+ IFLA_BOND_COUPLED_CONTROL,
__IFLA_BOND_MAX,
};
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index 5060963707b1..f2e0b2d50e6b 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -91,8 +91,6 @@ enum iio_modifier {
IIO_MOD_CO2,
IIO_MOD_VOC,
IIO_MOD_LIGHT_UV,
- IIO_MOD_LIGHT_UVA,
- IIO_MOD_LIGHT_UVB,
IIO_MOD_LIGHT_DUV,
IIO_MOD_PM1,
IIO_MOD_PM2P5,
@@ -107,6 +105,8 @@ enum iio_modifier {
IIO_MOD_PITCH,
IIO_MOD_YAW,
IIO_MOD_ROLL,
+ IIO_MOD_LIGHT_UVA,
+ IIO_MOD_LIGHT_UVB,
};
enum iio_event_type {
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index c4c53a9ab959..ff8d21f9e95b 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -145,7 +145,7 @@ struct in6_flowlabel_req {
#define IPV6_TLV_PADN 1
#define IPV6_TLV_ROUTERALERT 5
#define IPV6_TLV_CALIPSO 7 /* RFC 5570 */
-#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */
+#define IPV6_TLV_IOAM 49 /* RFC 9486 */
#define IPV6_TLV_JUMBO 194
#define IPV6_TLV_HAO 201 /* home address option */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 7a673b52827b..7bd10201a02b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -255,6 +255,7 @@ enum io_uring_op {
IORING_OP_FUTEX_WAKE,
IORING_OP_FUTEX_WAITV,
IORING_OP_FIXED_FD_INSTALL,
+ IORING_OP_FTRUNCATE,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -570,6 +571,10 @@ enum {
/* return status information for a buffer group */
IORING_REGISTER_PBUF_STATUS = 26,
+ /* set/clear busy poll settings */
+ IORING_REGISTER_NAPI = 27,
+ IORING_UNREGISTER_NAPI = 28,
+
/* this goes last */
IORING_REGISTER_LAST,
@@ -703,6 +708,14 @@ struct io_uring_buf_status {
__u32 resv[8];
};
+/* argument for IORING_(UN)REGISTER_NAPI */
+struct io_uring_napi {
+ __u32 busy_poll_to;
+ __u8 prefer_busy_poll;
+ __u8 pad[3];
+ __u64 resv;
+};
+
/*
* io_uring_restriction->opcode values
*/
diff --git a/include/uapi/linux/ioam6_genl.h b/include/uapi/linux/ioam6_genl.h
index ca4b22833754..1733fbc51fb5 100644
--- a/include/uapi/linux/ioam6_genl.h
+++ b/include/uapi/linux/ioam6_genl.h
@@ -49,4 +49,24 @@ enum {
#define IOAM6_CMD_MAX (__IOAM6_CMD_MAX - 1)
+#define IOAM6_GENL_EV_GRP_NAME "ioam6_events"
+
+enum ioam6_event_type {
+ IOAM6_EVENT_UNSPEC,
+ IOAM6_EVENT_TRACE,
+};
+
+enum ioam6_event_attr {
+ IOAM6_EVENT_ATTR_UNSPEC,
+
+ IOAM6_EVENT_ATTR_TRACE_NAMESPACE, /* u16 */
+ IOAM6_EVENT_ATTR_TRACE_NODELEN, /* u8 */
+ IOAM6_EVENT_ATTR_TRACE_TYPE, /* u32 */
+ IOAM6_EVENT_ATTR_TRACE_DATA, /* Binary */
+
+ __IOAM6_EVENT_ATTR_MAX
+};
+
+#define IOAM6_EVENT_ATTR_MAX (__IOAM6_EVENT_ATTR_MAX - 1)
+
#endif /* _UAPI_LINUX_IOAM6_GENL_H */
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
deleted file mode 100644
index 65d8b0234f69..000000000000
--- a/include/uapi/linux/iommu.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * IOMMU user API definitions
- */
-
-#ifndef _UAPI_IOMMU_H
-#define _UAPI_IOMMU_H
-
-#include <linux/types.h>
-
-#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
-#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
-#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */
-#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */
-
-/* Generic fault types, can be expanded IRQ remapping fault */
-enum iommu_fault_type {
- IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */
- IOMMU_FAULT_PAGE_REQ, /* page request fault */
-};
-
-enum iommu_fault_reason {
- IOMMU_FAULT_REASON_UNKNOWN = 0,
-
- /* Could not access the PASID table (fetch caused external abort) */
- IOMMU_FAULT_REASON_PASID_FETCH,
-
- /* PASID entry is invalid or has configuration errors */
- IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
-
- /*
- * PASID is out of range (e.g. exceeds the maximum PASID
- * supported by the IOMMU) or disabled.
- */
- IOMMU_FAULT_REASON_PASID_INVALID,
-
- /*
- * An external abort occurred fetching (or updating) a translation
- * table descriptor
- */
- IOMMU_FAULT_REASON_WALK_EABT,
-
- /*
- * Could not access the page table entry (Bad address),
- * actual translation fault
- */
- IOMMU_FAULT_REASON_PTE_FETCH,
-
- /* Protection flag check failed */
- IOMMU_FAULT_REASON_PERMISSION,
-
- /* access flag check failed */
- IOMMU_FAULT_REASON_ACCESS,
-
- /* Output address of a translation stage caused Address Size fault */
- IOMMU_FAULT_REASON_OOR_ADDRESS,
-};
-
-/**
- * struct iommu_fault_unrecoverable - Unrecoverable fault data
- * @reason: reason of the fault, from &enum iommu_fault_reason
- * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
- * @pasid: Process Address Space ID
- * @perm: requested permission access using by the incoming transaction
- * (IOMMU_FAULT_PERM_* values)
- * @addr: offending page address
- * @fetch_addr: address that caused a fetch abort, if any
- */
-struct iommu_fault_unrecoverable {
- __u32 reason;
-#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0)
-#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1)
-#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2)
- __u32 flags;
- __u32 pasid;
- __u32 perm;
- __u64 addr;
- __u64 fetch_addr;
-};
-
-/**
- * struct iommu_fault_page_request - Page Request data
- * @flags: encodes whether the corresponding fields are valid and whether this
- * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values).
- * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response
- * must have the same PASID value as the page request. When it is clear,
- * the page response should not have a PASID.
- * @pasid: Process Address Space ID
- * @grpid: Page Request Group Index
- * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
- * @addr: page address
- * @private_data: device-specific private information
- */
-struct iommu_fault_page_request {
-#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0)
-#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
-#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
-#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3)
- __u32 flags;
- __u32 pasid;
- __u32 grpid;
- __u32 perm;
- __u64 addr;
- __u64 private_data[2];
-};
-
-/**
- * struct iommu_fault - Generic fault data
- * @type: fault type from &enum iommu_fault_type
- * @padding: reserved for future use (should be zero)
- * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV
- * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
- * @padding2: sets the fault size to allow for future extensions
- */
-struct iommu_fault {
- __u32 type;
- __u32 padding;
- union {
- struct iommu_fault_unrecoverable event;
- struct iommu_fault_page_request prm;
- __u8 padding2[56];
- };
-};
-
-/**
- * enum iommu_page_response_code - Return status of fault handlers
- * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
- * populated, retry the access. This is "Success" in PCI PRI.
- * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
- * this device if possible. This is "Response Failure" in PCI PRI.
- * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
- * access. This is "Invalid Request" in PCI PRI.
- */
-enum iommu_page_response_code {
- IOMMU_PAGE_RESP_SUCCESS = 0,
- IOMMU_PAGE_RESP_INVALID,
- IOMMU_PAGE_RESP_FAILURE,
-};
-
-/**
- * struct iommu_page_response - Generic page response information
- * @argsz: User filled size of this data
- * @version: API version of this structure
- * @flags: encodes whether the corresponding fields are valid
- * (IOMMU_FAULT_PAGE_RESPONSE_* values)
- * @pasid: Process Address Space ID
- * @grpid: Page Request Group Index
- * @code: response code from &enum iommu_page_response_code
- */
-struct iommu_page_response {
- __u32 argsz;
-#define IOMMU_PAGE_RESP_VERSION_1 1
- __u32 version;
-#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0)
- __u32 flags;
- __u32 pasid;
- __u32 grpid;
- __u32 code;
-};
-
-#endif /* _UAPI_IOMMU_H */
diff --git a/include/uapi/linux/lsm.h b/include/uapi/linux/lsm.h
index f8aef9ade549..33d8c9f4aa6b 100644
--- a/include/uapi/linux/lsm.h
+++ b/include/uapi/linux/lsm.h
@@ -62,6 +62,8 @@ struct lsm_ctx {
#define LSM_ID_LOCKDOWN 108
#define LSM_ID_BPF 109
#define LSM_ID_LANDLOCK 110
+#define LSM_ID_IMA 111
+#define LSM_ID_EVM 112
/*
* LSM_ATTR_XXX definitions identify different LSM attributes
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 6325d1d0e90f..1b40a968ba91 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -101,5 +101,6 @@
#define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */
#define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
+#define PID_FS_MAGIC 0x50494446 /* "PIDF" */
#endif /* __LINUX_MAGIC_H__ */
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 154ab56651f1..e1db65df9359 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -50,7 +50,14 @@ struct sockaddr_mctp_ext {
#define SIOCMCTPALLOCTAG (SIOCPROTOPRIVATE + 0)
#define SIOCMCTPDROPTAG (SIOCPROTOPRIVATE + 1)
+#define SIOCMCTPALLOCTAG2 (SIOCPROTOPRIVATE + 2)
+#define SIOCMCTPDROPTAG2 (SIOCPROTOPRIVATE + 3)
+/* Deprecated: use mctp_ioc_tag_ctl2 / TAG2 ioctls instead, which defines the
+ * MCTP network ID as part of the allocated tag. Using this assumes the default
+ * net ID for allocated tags, which may not give correct behaviour on system
+ * with multiple networks configured.
+ */
struct mctp_ioc_tag_ctl {
mctp_eid_t peer_addr;
@@ -65,4 +72,29 @@ struct mctp_ioc_tag_ctl {
__u16 flags;
};
+struct mctp_ioc_tag_ctl2 {
+ /* Peer details: network ID, peer EID, local EID. All set by the
+ * caller.
+ *
+ * Local EID must be MCTP_ADDR_NULL or MCTP_ADDR_ANY in current
+ * kernels.
+ */
+ unsigned int net;
+ mctp_eid_t peer_addr;
+ mctp_eid_t local_addr;
+
+ /* Set by caller, but no flags defined currently. Must be 0 */
+ __u16 flags;
+
+ /* For SIOCMCTPALLOCTAG2: must be passed as zero, kernel will
+ * populate with the allocated tag value. Returned tag value will
+ * always have TO and PREALLOC set.
+ *
+ * For SIOCMCTPDROPTAG2: userspace provides tag value to drop, from
+ * a prior SIOCMCTPALLOCTAG2 call (and so must have TO and PREALLOC set).
+ */
+ __u8 tag;
+
+};
+
#endif /* __UAPI_MCTP_H */
diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index d03863da180e..c0c8ec995b06 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -138,6 +138,8 @@
#define MDIO_PMA_SPEED_1000 0x0010 /* 1000M capable */
#define MDIO_PMA_SPEED_100 0x0020 /* 100M capable */
#define MDIO_PMA_SPEED_10 0x0040 /* 10M capable */
+#define MDIO_PMA_SPEED_2_5G 0x2000 /* 2.5G capable */
+#define MDIO_PMA_SPEED_5G 0x4000 /* 5G capable */
#define MDIO_PCS_SPEED_10P2B 0x0002 /* 10PASS-TS/2BASE-TL capable */
#define MDIO_PCS_SPEED_2_5G 0x0040 /* 2.5G capable */
#define MDIO_PCS_SPEED_5G 0x0080 /* 5G capable */
@@ -348,6 +350,8 @@
/* BASE-T1 auto-negotiation advertisement register [31:16] */
#define MDIO_AN_T1_ADV_M_B10L 0x4000 /* device is compatible with 10BASE-T1L */
+#define MDIO_AN_T1_ADV_M_1000BT1 0x0080 /* advertise 1000BASE-T1 */
+#define MDIO_AN_T1_ADV_M_100BT1 0x0020 /* advertise 100BASE-T1 */
#define MDIO_AN_T1_ADV_M_MST 0x0010 /* advertise master preference */
/* BASE-T1 auto-negotiation advertisement register [47:32] */
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 93cb411adf72..bb65ee840cda 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -70,6 +70,10 @@ enum netdev_queue_type {
NETDEV_QUEUE_TYPE_TX,
};
+enum netdev_qstats_scope {
+ NETDEV_QSTATS_SCOPE_QUEUE = 1,
+};
+
enum {
NETDEV_A_DEV_IFINDEX = 1,
NETDEV_A_DEV_PAD,
@@ -133,6 +137,21 @@ enum {
};
enum {
+ NETDEV_A_QSTATS_IFINDEX = 1,
+ NETDEV_A_QSTATS_QUEUE_TYPE,
+ NETDEV_A_QSTATS_QUEUE_ID,
+ NETDEV_A_QSTATS_SCOPE,
+ NETDEV_A_QSTATS_RX_PACKETS = 8,
+ NETDEV_A_QSTATS_RX_BYTES,
+ NETDEV_A_QSTATS_TX_PACKETS,
+ NETDEV_A_QSTATS_TX_BYTES,
+ NETDEV_A_QSTATS_RX_ALLOC_FAIL,
+
+ __NETDEV_A_QSTATS_MAX,
+ NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
+};
+
+enum {
NETDEV_CMD_DEV_GET = 1,
NETDEV_CMD_DEV_ADD_NTF,
NETDEV_CMD_DEV_DEL_NTF,
@@ -144,6 +163,7 @@ enum {
NETDEV_CMD_PAGE_POOL_STATS_GET,
NETDEV_CMD_QUEUE_GET,
NETDEV_CMD_NAPI_GET,
+ NETDEV_CMD_QSTATS_GET,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index ca30232b7bc8..aa4094ca2444 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -179,13 +179,17 @@ enum nft_hook_attributes {
* enum nft_table_flags - nf_tables table flags
*
* @NFT_TABLE_F_DORMANT: this table is not active
+ * @NFT_TABLE_F_OWNER: this table is owned by a process
+ * @NFT_TABLE_F_PERSIST: this table shall outlive its owner
*/
enum nft_table_flags {
NFT_TABLE_F_DORMANT = 0x1,
NFT_TABLE_F_OWNER = 0x2,
+ NFT_TABLE_F_PERSIST = 0x4,
};
#define NFT_TABLE_F_MASK (NFT_TABLE_F_DORMANT | \
- NFT_TABLE_F_OWNER)
+ NFT_TABLE_F_OWNER | \
+ NFT_TABLE_F_PERSIST)
/**
* enum nft_table_attributes - nf_tables table netlink attributes
@@ -285,9 +289,11 @@ enum nft_rule_attributes {
/**
* enum nft_rule_compat_flags - nf_tables rule compat flags
*
+ * @NFT_RULE_COMPAT_F_UNUSED: unused
* @NFT_RULE_COMPAT_F_INV: invert the check result
*/
enum nft_rule_compat_flags {
+ NFT_RULE_COMPAT_F_UNUSED = (1 << 0),
NFT_RULE_COMPAT_F_INV = (1 << 1),
NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV,
};
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
index d8ffa8c9ca78..dd8787f9cf39 100644
--- a/include/uapi/linux/nexthop.h
+++ b/include/uapi/linux/nexthop.h
@@ -30,6 +30,9 @@ enum {
#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1)
+#define NHA_OP_FLAG_DUMP_STATS BIT(0)
+#define NHA_OP_FLAG_DUMP_HW_STATS BIT(1)
+
enum {
NHA_UNSPEC,
NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */
@@ -60,6 +63,18 @@ enum {
/* nested; nexthop bucket attributes */
NHA_RES_BUCKET,
+ /* u32; operation-specific flags */
+ NHA_OP_FLAGS,
+
+ /* nested; nexthop group stats */
+ NHA_GROUP_STATS,
+
+ /* u32; nexthop hardware stats enable */
+ NHA_HW_STATS_ENABLE,
+
+ /* u32; read-only; whether any driver collects HW stats */
+ NHA_HW_STATS_USED,
+
__NHA_MAX,
};
@@ -101,4 +116,34 @@ enum {
#define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1)
+enum {
+ NHA_GROUP_STATS_UNSPEC,
+
+ /* nested; nexthop group entry stats */
+ NHA_GROUP_STATS_ENTRY,
+
+ __NHA_GROUP_STATS_MAX,
+};
+
+#define NHA_GROUP_STATS_MAX (__NHA_GROUP_STATS_MAX - 1)
+
+enum {
+ NHA_GROUP_STATS_ENTRY_UNSPEC,
+
+ /* u32; nexthop id of the nexthop group entry */
+ NHA_GROUP_STATS_ENTRY_ID,
+
+ /* uint; number of packets forwarded via the nexthop group entry */
+ NHA_GROUP_STATS_ENTRY_PACKETS,
+
+ /* uint; number of packets forwarded via the nexthop group entry in
+ * hardware
+ */
+ NHA_GROUP_STATS_ENTRY_PACKETS_HW,
+
+ __NHA_GROUP_STATS_ENTRY_MAX,
+};
+
+#define NHA_GROUP_STATS_ENTRY_MAX (__NHA_GROUP_STATS_ENTRY_MAX - 1)
+
#endif
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1ccdcae24372..f23ecbdd84a2 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -11,7 +11,7 @@
* Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com>
* Copyright 2008 Colin McCabe <colin@cozybit.com>
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -438,7 +438,8 @@
* %NL80211_ATTR_REASON_CODE can optionally be used to specify which type
* of disconnection indication should be sent to the station
* (Deauthentication or Disassociation frame and reason code for that
- * frame).
+ * frame). %NL80211_ATTR_MLO_LINK_ID can be used optionally to remove
+ * stations connected and using at least that link as one of its links.
*
* @NL80211_CMD_GET_MPATH: Get mesh path attributes for mesh path to
* destination %NL80211_ATTR_MAC on the interface identified by
@@ -2851,6 +2852,10 @@ enum nl80211_commands {
* mapping is as defined in section 9.4.2.314 (TID-To-Link Mapping element)
* in Draft P802.11be_D4.0.
*
+ * @NL80211_ATTR_ASSOC_SPP_AMSDU: flag attribute used with
+ * %NL80211_CMD_ASSOCIATE indicating the SPP A-MSDUs
+ * are used on this connection
+ *
* @NUM_NL80211_ATTR: total number of nl80211_attrs available
* @NL80211_ATTR_MAX: highest attribute number currently defined
* @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3394,6 +3399,8 @@ enum nl80211_attrs {
NL80211_ATTR_MLO_TTLM_DLINK,
NL80211_ATTR_MLO_TTLM_ULINK,
+ NL80211_ATTR_ASSOC_SPP_AMSDU,
+
/* add attributes here, update the policy in nl80211.c */
__NL80211_ATTR_AFTER_LAST,
@@ -3534,6 +3541,7 @@ enum nl80211_iftype {
* @NL80211_STA_FLAG_ASSOCIATED: station is associated; used with drivers
* that support %NL80211_FEATURE_FULL_AP_CLIENT_STATE to transition a
* previously added station into associated state
+ * @NL80211_STA_FLAG_SPP_AMSDU: station supports SPP A-MSDUs
* @NL80211_STA_FLAG_MAX: highest station flag number currently defined
* @__NL80211_STA_FLAG_AFTER_LAST: internal use
*/
@@ -3546,6 +3554,7 @@ enum nl80211_sta_flags {
NL80211_STA_FLAG_AUTHENTICATED,
NL80211_STA_FLAG_TDLS_PEER,
NL80211_STA_FLAG_ASSOCIATED,
+ NL80211_STA_FLAG_SPP_AMSDU,
/* keep last */
__NL80211_STA_FLAG_AFTER_LAST,
@@ -4260,10 +4269,13 @@ enum nl80211_wmm_rule {
* allowed for peer-to-peer or adhoc communication under the control
* of a DFS master which operates on the same channel (FCC-594280 D01
* Section B.3). Should be used together with %NL80211_RRF_DFS only.
- * @NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT: Client connection to VLP AP
+ * @NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP
* not allowed using this channel
- * @NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT: Client connection to AFC AP
+ * @NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP
* not allowed using this channel
+ * @NL80211_FREQUENCY_ATTR_CAN_MONITOR: This channel can be used in monitor
+ * mode despite other (regulatory) restrictions, even if the channel is
+ * otherwise completely disabled.
* @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
* currently defined
* @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
@@ -4304,8 +4316,9 @@ enum nl80211_frequency_attr {
NL80211_FREQUENCY_ATTR_NO_EHT,
NL80211_FREQUENCY_ATTR_PSD,
NL80211_FREQUENCY_ATTR_DFS_CONCURRENT,
- NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT,
- NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT,
+ NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT,
+ NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT,
+ NL80211_FREQUENCY_ATTR_CAN_MONITOR,
/* keep last */
__NL80211_FREQUENCY_ATTR_AFTER_LAST,
@@ -4318,6 +4331,10 @@ enum nl80211_frequency_attr {
#define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR
#define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \
NL80211_FREQUENCY_ATTR_IR_CONCURRENT
+#define NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT \
+ NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT
+#define NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT \
+ NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT
/**
* enum nl80211_bitrate_attr - bitrate attributes
@@ -4455,14 +4472,7 @@ enum nl80211_reg_rule_attr {
* value as specified by &struct nl80211_bss_select_rssi_adjust.
* @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching
* (this cannot be used together with SSID).
- * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Nested attribute that carries the
- * band specific minimum rssi thresholds for the bands defined in
- * enum nl80211_band. The minimum rssi threshold value(s32) specific to a
- * band shall be encapsulated in attribute with type value equals to one
- * of the NL80211_BAND_* defined in enum nl80211_band. For example, the
- * minimum rssi threshold value for 2.4GHZ band shall be encapsulated
- * within an attribute of type NL80211_BAND_2GHZ. And one or more of such
- * attributes will be nested within this attribute.
+ * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Obsolete
* @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter
* attribute number currently defined
* @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use
@@ -4475,7 +4485,7 @@ enum nl80211_sched_scan_match_attr {
NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI,
NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST,
NL80211_SCHED_SCAN_MATCH_ATTR_BSSID,
- NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI,
+ NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI, /* obsolete */
/* keep last */
__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST,
@@ -4515,8 +4525,8 @@ enum nl80211_sched_scan_match_attr {
peer-to-peer or adhoc communication under the control of a DFS master
which operates on the same channel (FCC-594280 D01 Section B.3).
Should be used together with %NL80211_RRF_DFS only.
- * @NL80211_RRF_NO_UHB_VLP_CLIENT: Client connection to VLP AP not allowed
- * @NL80211_RRF_NO_UHB_AFC_CLIENT: Client connection to AFC AP not allowed
+ * @NL80211_RRF_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP not allowed
+ * @NL80211_RRF_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP not allowed
*/
enum nl80211_reg_rule_flags {
NL80211_RRF_NO_OFDM = 1<<0,
@@ -4539,8 +4549,8 @@ enum nl80211_reg_rule_flags {
NL80211_RRF_NO_EHT = 1<<19,
NL80211_RRF_PSD = 1<<20,
NL80211_RRF_DFS_CONCURRENT = 1<<21,
- NL80211_RRF_NO_UHB_VLP_CLIENT = 1<<22,
- NL80211_RRF_NO_UHB_AFC_CLIENT = 1<<23,
+ NL80211_RRF_NO_6GHZ_VLP_CLIENT = 1<<22,
+ NL80211_RRF_NO_6GHZ_AFC_CLIENT = 1<<23,
};
#define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR
@@ -4549,6 +4559,8 @@ enum nl80211_reg_rule_flags {
#define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\
NL80211_RRF_NO_HT40PLUS)
#define NL80211_RRF_GO_CONCURRENT NL80211_RRF_IR_CONCURRENT
+#define NL80211_RRF_NO_UHB_VLP_CLIENT NL80211_RRF_NO_6GHZ_VLP_CLIENT
+#define NL80211_RRF_NO_UHB_AFC_CLIENT NL80211_RRF_NO_6GHZ_AFC_CLIENT
/* For backport compatibility with older userspace */
#define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS)
@@ -5096,14 +5108,17 @@ enum nl80211_bss_use_for {
* BSS isn't possible
* @NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY: NSTR nonprimary links aren't
* supported by the device, and this BSS entry represents one.
- * @NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH: STA is not supporting
+ * @NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH: STA is not supporting
* the AP power type (SP, VLP, AP) that the AP uses.
*/
enum nl80211_bss_cannot_use_reasons {
NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY = 1 << 0,
- NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH = 1 << 1,
+ NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH = 1 << 1,
};
+#define NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH \
+ NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH
+
/**
* enum nl80211_bss - netlink attributes for a BSS
*
@@ -5742,6 +5757,8 @@ struct nl80211_pattern_support {
* %NL80211_ATTR_SCAN_FREQUENCIES contains more than one
* frequency, it means that the match occurred in more than one
* channel.
+ * @NL80211_WOWLAN_TRIG_UNPROTECTED_DEAUTH_DISASSOC: For wakeup reporting only.
+ * Wake up happened due to unprotected deauth or disassoc frame in MFP.
* @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers
* @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number
*
@@ -5769,6 +5786,7 @@ enum nl80211_wowlan_triggers {
NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS,
NL80211_WOWLAN_TRIG_NET_DETECT,
NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS,
+ NL80211_WOWLAN_TRIG_UNPROTECTED_DEAUTH_DISASSOC,
/* keep last */
NUM_NL80211_WOWLAN_TRIG,
@@ -6410,8 +6428,7 @@ enum nl80211_feature_flags {
* @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching
* (set/del PMKSA operations) in AP mode.
*
- * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Driver supports
- * filtering of sched scan results using band specific RSSI thresholds.
+ * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Obsolete
*
* @NL80211_EXT_FEATURE_STA_TX_PWR: This driver supports controlling tx power
* to a station.
@@ -6520,6 +6537,11 @@ enum nl80211_feature_flags {
* DFS master on the same channel as described in FCC-594280 D01
* (Section B.3). This, for example, allows P2P GO and P2P clients to
* operate on DFS channels as long as there's a concurrent BSS connection.
+ *
+ * @NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT: The driver has support for SPP
+ * (signaling and payload protected) A-MSDUs and this shall be advertised
+ * in the RSNXE.
+ *
* @NUM_NL80211_EXT_FEATURES: number of extended features.
* @MAX_NL80211_EXT_FEATURES: highest extended feature index.
*/
@@ -6561,7 +6583,7 @@ enum nl80211_ext_feature_index {
NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER,
NL80211_EXT_FEATURE_AIRTIME_FAIRNESS,
NL80211_EXT_FEATURE_AP_PMKSA_CACHING,
- NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD,
+ NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD, /* obsolete */
NL80211_EXT_FEATURE_EXT_KEY_ID,
NL80211_EXT_FEATURE_STA_TX_PWR,
NL80211_EXT_FEATURE_SAE_OFFLOAD,
@@ -6594,6 +6616,7 @@ enum nl80211_ext_feature_index {
NL80211_EXT_FEATURE_OWE_OFFLOAD,
NL80211_EXT_FEATURE_OWE_OFFLOAD_AP,
NL80211_EXT_FEATURE_DFS_CONCURRENT,
+ NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT,
/* add new features before the definition below */
NUM_NL80211_EXT_FEATURES,
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 5406fbc13074..72ec000a97cd 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -7,6 +7,12 @@
#include <linux/fcntl.h>
/* Flags for pidfd_open(). */
-#define PIDFD_NONBLOCK O_NONBLOCK
+#define PIDFD_NONBLOCK O_NONBLOCK
+#define PIDFD_THREAD O_EXCL
+
+/* Flags for pidfd_send_signal(). */
+#define PIDFD_SIGNAL_THREAD (1UL << 0)
+#define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1)
+#define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2)
#endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h
index b44ba7dcdefc..b7a2c2ee35b7 100644
--- a/include/uapi/linux/psp-sev.h
+++ b/include/uapi/linux/psp-sev.h
@@ -28,6 +28,9 @@ enum {
SEV_PEK_CERT_IMPORT,
SEV_GET_ID, /* This command is deprecated, use SEV_GET_ID2 */
SEV_GET_ID2,
+ SNP_PLATFORM_STATUS,
+ SNP_COMMIT,
+ SNP_SET_CONFIG,
SEV_MAX,
};
@@ -69,6 +72,12 @@ typedef enum {
SEV_RET_RESOURCE_LIMIT,
SEV_RET_SECURE_DATA_INVALID,
SEV_RET_INVALID_KEY = 0x27,
+ SEV_RET_INVALID_PAGE_SIZE,
+ SEV_RET_INVALID_PAGE_STATE,
+ SEV_RET_INVALID_MDATA_ENTRY,
+ SEV_RET_INVALID_PAGE_OWNER,
+ SEV_RET_INVALID_PAGE_AEAD_OFLOW,
+ SEV_RET_RMP_INIT_REQUIRED,
SEV_RET_MAX,
} sev_ret_code;
@@ -156,6 +165,56 @@ struct sev_user_data_get_id2 {
} __packed;
/**
+ * struct sev_user_data_snp_status - SNP status
+ *
+ * @api_major: API major version
+ * @api_minor: API minor version
+ * @state: current platform state
+ * @is_rmp_initialized: whether RMP is initialized or not
+ * @rsvd: reserved
+ * @build_id: firmware build id for the API version
+ * @mask_chip_id: whether chip id is present in attestation reports or not
+ * @mask_chip_key: whether attestation reports are signed or not
+ * @vlek_en: VLEK (Version Loaded Endorsement Key) hashstick is loaded
+ * @rsvd1: reserved
+ * @guest_count: the number of guest currently managed by the firmware
+ * @current_tcb_version: current TCB version
+ * @reported_tcb_version: reported TCB version
+ */
+struct sev_user_data_snp_status {
+ __u8 api_major; /* Out */
+ __u8 api_minor; /* Out */
+ __u8 state; /* Out */
+ __u8 is_rmp_initialized:1; /* Out */
+ __u8 rsvd:7;
+ __u32 build_id; /* Out */
+ __u32 mask_chip_id:1; /* Out */
+ __u32 mask_chip_key:1; /* Out */
+ __u32 vlek_en:1; /* Out */
+ __u32 rsvd1:29;
+ __u32 guest_count; /* Out */
+ __u64 current_tcb_version; /* Out */
+ __u64 reported_tcb_version; /* Out */
+} __packed;
+
+/**
+ * struct sev_user_data_snp_config - system wide configuration value for SNP.
+ *
+ * @reported_tcb: the TCB version to report in the guest attestation report.
+ * @mask_chip_id: whether chip id is present in attestation reports or not
+ * @mask_chip_key: whether attestation reports are signed or not
+ * @rsvd: reserved
+ * @rsvd1: reserved
+ */
+struct sev_user_data_snp_config {
+ __u64 reported_tcb ; /* In */
+ __u32 mask_chip_id:1; /* In */
+ __u32 mask_chip_key:1; /* In */
+ __u32 rsvd:30; /* In */
+ __u8 rsvd1[52];
+} __packed;
+
+/**
* struct sev_issue_cmd - SEV ioctl parameters
*
* @cmd: SEV commands to execute
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index da700999cad4..053b40d642de 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -32,6 +32,7 @@
#define PTP_RISING_EDGE (1<<1)
#define PTP_FALLING_EDGE (1<<2)
#define PTP_STRICT_FLAGS (1<<3)
+#define PTP_EXT_OFFSET (1<<4)
#define PTP_EXTTS_EDGES (PTP_RISING_EDGE | PTP_FALLING_EDGE)
/*
@@ -40,7 +41,8 @@
#define PTP_EXTTS_VALID_FLAGS (PTP_ENABLE_FEATURE | \
PTP_RISING_EDGE | \
PTP_FALLING_EDGE | \
- PTP_STRICT_FLAGS)
+ PTP_STRICT_FLAGS | \
+ PTP_EXT_OFFSET)
/*
* flag fields valid for the original PTP_EXTTS_REQUEST ioctl.
@@ -51,6 +53,11 @@
PTP_FALLING_EDGE)
/*
+ * flag fields valid for the ptp_extts_event report.
+ */
+#define PTP_EXTTS_EVENT_VALID (PTP_ENABLE_FEATURE)
+
+/*
* Bits of the ptp_perout_request.flags field:
*/
#define PTP_PEROUT_ONE_SHOT (1<<0)
@@ -228,9 +235,9 @@ struct ptp_pin_desc {
#define PTP_MASK_EN_SINGLE _IOW(PTP_CLK_MAGIC, 20, unsigned int)
struct ptp_extts_event {
- struct ptp_clock_time t; /* Time event occured. */
+ struct ptp_clock_time t; /* Time event occurred. */
unsigned int index; /* Which channel produced the event. */
- unsigned int flags; /* Reserved for future use. */
+ unsigned int flags; /* Event type. */
unsigned int rsv[2]; /* Reserved for future use. */
};
diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h
index 9086367db043..de9b4733607e 100644
--- a/include/uapi/linux/serial.h
+++ b/include/uapi/linux/serial.h
@@ -145,12 +145,13 @@ struct serial_rs485 {
#define SER_RS485_ENABLED _BITUL(0)
#define SER_RS485_RTS_ON_SEND _BITUL(1)
#define SER_RS485_RTS_AFTER_SEND _BITUL(2)
-#define SER_RS485_RX_DURING_TX _BITUL(3)
-#define SER_RS485_TERMINATE_BUS _BITUL(4)
-#define SER_RS485_ADDRB _BITUL(5)
-#define SER_RS485_ADDR_RECV _BITUL(6)
-#define SER_RS485_ADDR_DEST _BITUL(7)
-#define SER_RS485_MODE_RS422 _BITUL(8)
+/* Placeholder for bit 3: SER_RS485_RTS_BEFORE_SEND, which isn't used anymore */
+#define SER_RS485_RX_DURING_TX _BITUL(4)
+#define SER_RS485_TERMINATE_BUS _BITUL(5)
+#define SER_RS485_ADDRB _BITUL(6)
+#define SER_RS485_ADDR_RECV _BITUL(7)
+#define SER_RS485_ADDR_DEST _BITUL(8)
+#define SER_RS485_MODE_RS422 _BITUL(9)
__u32 delay_rts_before_send;
__u32 delay_rts_after_send;
diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h
index f3e61b04fa01..f5cab7fc96ab 100644
--- a/include/uapi/linux/tc_act/tc_pedit.h
+++ b/include/uapi/linux/tc_act/tc_pedit.h
@@ -62,7 +62,7 @@ struct tc_pedit_sel {
tc_gen;
unsigned char nkeys;
unsigned char flags;
- struct tc_pedit_key keys[0];
+ struct tc_pedit_key keys[] __counted_by(nkeys);
};
#define tc_pedit tc_pedit_sel
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index b9cfc5c96268..c8dc5f8ea699 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -49,6 +49,8 @@
_IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd)
#define UBLK_U_CMD_GET_FEATURES \
_IOR('u', 0x13, struct ublksrv_ctrl_cmd)
+#define UBLK_U_CMD_DEL_DEV_ASYNC \
+ _IOR('u', 0x14, struct ublksrv_ctrl_cmd)
/*
* 64bits are enough now, and it should be easy to extend in case of
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index d5b9cfbd9cea..628d46a0da92 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image {
* *
*****************************************************************************/
-#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 16)
+#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17)
typedef unsigned long snd_pcm_uframes_t;
typedef signed long snd_pcm_sframes_t;
@@ -416,7 +416,7 @@ struct snd_pcm_hw_params {
unsigned int rmask; /* W: requested masks */
unsigned int cmask; /* R: changed masks */
unsigned int info; /* R: Info flags for returned setup */
- unsigned int msbits; /* R: used most significant bits */
+ unsigned int msbits; /* R: used most significant bits (in sample bit-width) */
unsigned int rate_num; /* R: rate numerator */
unsigned int rate_den; /* R: rate denominator */
snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */
diff --git a/include/uapi/xen/gntalloc.h b/include/uapi/xen/gntalloc.h
index 48d2790ef928..3109282672f3 100644
--- a/include/uapi/xen/gntalloc.h
+++ b/include/uapi/xen/gntalloc.h
@@ -31,7 +31,10 @@ struct ioctl_gntalloc_alloc_gref {
__u64 index;
/* The grant references of the newly created grant, one per page */
/* Variable size, depending on count */
- __u32 gref_ids[1];
+ union {
+ __u32 gref_ids[1];
+ __DECLARE_FLEX_ARRAY(__u32, gref_ids_flex);
+ };
};
#define GNTALLOC_FLAG_WRITABLE 1
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index 73eb622e7663..5d5c0b8efff2 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -19,6 +19,12 @@
#include <vdso/time32.h>
#include <vdso/time64.h>
+#ifdef CONFIG_ARM64
+#include <asm/page-def.h>
+#else
+#include <asm/page.h>
+#endif
+
#ifdef CONFIG_ARCH_HAS_VDSO_DATA
#include <asm/vdso/data.h>
#else
@@ -121,6 +127,14 @@ struct vdso_data {
extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden")));
extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden")));
+/**
+ * union vdso_data_store - Generic vDSO data page
+ */
+union vdso_data_store {
+ struct vdso_data data[CS_BASES];
+ u8 page[PAGE_SIZE];
+};
+
/*
* The generic vDSO implementation requires that gettimeofday.h
* provides:
diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h
index 9a2af9fca45e..73501149439d 100644
--- a/include/vdso/helpers.h
+++ b/include/vdso/helpers.h
@@ -30,9 +30,9 @@ static __always_inline u32 vdso_read_retry(const struct vdso_data *vd,
static __always_inline void vdso_write_begin(struct vdso_data *vd)
{
/*
- * WRITE_ONCE it is required otherwise the compiler can validly tear
+ * WRITE_ONCE() is required otherwise the compiler can validly tear
* updates to vd[x].seq and it is possible that the value seen by the
- * reader it is inconsistent.
+ * reader is inconsistent.
*/
WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1);
WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1);
@@ -43,9 +43,9 @@ static __always_inline void vdso_write_end(struct vdso_data *vd)
{
smp_wmb();
/*
- * WRITE_ONCE it is required otherwise the compiler can validly tear
+ * WRITE_ONCE() is required otherwise the compiler can validly tear
* updates to vd[x].seq and it is possible that the value seen by the
- * reader it is inconsistent.
+ * reader is inconsistent.
*/
WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1);
WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1);
diff --git a/init/Kconfig b/init/Kconfig
index 8df18f3a9748..f3ea5dea9c85 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -89,6 +89,15 @@ config CC_HAS_ASM_GOTO_TIED_OUTPUT
# Detect buggy gcc and clang, fixed in gcc-11 clang-14.
def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null)
+config GCC_ASM_GOTO_OUTPUT_WORKAROUND
+ bool
+ depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT
+ # Fixed in GCC 14, 13.3, 12.4 and 11.5
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
+ default y if GCC_VERSION < 110500
+ default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400
+ default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300
+
config TOOLS_SUPPORT_RELR
def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
@@ -106,7 +115,7 @@ config CONSTRUCTORS
bool
config IRQ_WORK
- bool
+ def_bool y if SMP
config BUILDTIME_TABLE_SORT
bool
@@ -867,14 +876,26 @@ config CC_IMPLICIT_FALLTHROUGH
default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5)
default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough)
-# Currently, disable gcc-11+ array-bounds globally.
+# Currently, disable gcc-10+ array-bounds globally.
# It's still broken in gcc-13, so no upper bound yet.
-config GCC11_NO_ARRAY_BOUNDS
+config GCC10_NO_ARRAY_BOUNDS
def_bool y
config CC_NO_ARRAY_BOUNDS
bool
- default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS
+ default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS
+
+# Currently, disable -Wstringop-overflow for GCC globally.
+config GCC_NO_STRINGOP_OVERFLOW
+ def_bool y
+
+config CC_NO_STRINGOP_OVERFLOW
+ bool
+ default y if CC_IS_GCC && GCC_NO_STRINGOP_OVERFLOW
+
+config CC_STRINGOP_OVERFLOW
+ bool
+ default y if CC_IS_GCC && !CC_NO_STRINGOP_OVERFLOW
#
# For architectures that know their GCC __int128 support is sound
@@ -1445,11 +1466,6 @@ config SYSCTL_ARCH_UNALIGN_ALLOW
config HAVE_PCSPKR_PLATFORM
bool
-# interpreter that classic socket filters depend on
-config BPF
- bool
- select CRYPTO_LIB_SHA1
-
menuconfig EXPERT
bool "Configure standard kernel features (expert users)"
# Unhide debug options, to make the on-by-default options visible
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 279ad28bf4fb..3c5fd993bc7e 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -208,6 +208,9 @@ retry:
goto out;
case -EACCES:
case -EINVAL:
+#ifdef CONFIG_BLOCK
+ init_flush_fput();
+#endif
continue;
}
/*
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 15e372b00ce7..6069ea3eb80d 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -9,6 +9,8 @@
#include <linux/major.h>
#include <linux/root_dev.h>
#include <linux/init_syscalls.h>
+#include <linux/task_work.h>
+#include <linux/file.h>
void mount_root_generic(char *name, char *pretty_name, int flags);
void mount_root(char *root_device_name);
@@ -41,3 +43,10 @@ static inline bool initrd_load(char *root_device_name)
}
#endif
+
+/* Ensure that async file closing finished to prevent spurious errors. */
+static inline void init_flush_fput(void)
+{
+ flush_delayed_fput();
+ task_work_run();
+}
diff --git a/init/init_task.c b/init/init_task.c
index 7ecb458eb3da..4daee6d761c8 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -147,6 +147,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
.rcu_tasks_holdout = false,
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
.rcu_tasks_idle_cpu = -1,
+ .rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list),
#endif
#ifdef CONFIG_TASKS_TRACE_RCU
.trc_reader_nesting = 0,
diff --git a/init/initramfs.c b/init/initramfs.c
index 76deb48c38cb..01dbd0e81501 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -16,9 +16,10 @@
#include <linux/mm.h>
#include <linux/namei.h>
#include <linux/init_syscalls.h>
-#include <linux/task_work.h>
#include <linux/umh.h>
+#include "do_mounts.h"
+
static __initdata bool csum_present;
static __initdata u32 io_csum;
@@ -679,8 +680,6 @@ static void __init populate_initrd_image(char *err)
struct file *file;
loff_t pos = 0;
- unpack_to_rootfs(__initramfs_start, __initramfs_size);
-
printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
err);
file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
@@ -736,8 +735,7 @@ done:
initrd_start = 0;
initrd_end = 0;
- flush_delayed_fput();
- task_work_run();
+ init_flush_fput();
}
static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain);
diff --git a/init/main.c b/init/main.c
index e24b0780fdff..7dce08198b13 100644
--- a/init/main.c
+++ b/init/main.c
@@ -99,6 +99,7 @@
#include <linux/init_syscalls.h>
#include <linux/stackdepot.h>
#include <linux/randomize_kstack.h>
+#include <linux/pidfs.h>
#include <net/net_namespace.h>
#include <asm/io.h>
@@ -603,7 +604,6 @@ static int __init rdinit_setup(char *str)
__setup("rdinit=", rdinit_setup);
#ifndef CONFIG_SMP
-static const unsigned int setup_max_cpus = NR_CPUS;
static inline void setup_nr_cpu_ids(void) { }
static inline void smp_prepare_cpus(unsigned int maxcpus) { }
#endif
@@ -776,6 +776,10 @@ void __init __weak smp_setup_processor_id(void)
{
}
+void __init __weak smp_prepare_boot_cpu(void)
+{
+}
+
# if THREAD_SIZE >= PAGE_SIZE
void __init __weak thread_stack_cache_init(void)
{
@@ -1059,6 +1063,7 @@ void start_kernel(void)
seq_file_init();
proc_root_init();
nsfs_init();
+ pidfs_init();
cpuset_init();
cgroup_init();
taskstats_init_early();
@@ -1545,6 +1550,7 @@ static noinline void __init kernel_init_freeable(void)
sched_init_smp();
workqueue_init_topology();
+ async_init();
padata_init();
page_alloc_init_late();
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 2cdc51825405..2e1d4e03799c 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \
statx.o net.o msg_ring.o timeout.o \
sqpoll.o fdinfo.o tctx.o poll.o \
cancel.o kbuf.o rsrc.o rw.o opdef.o \
- notif.o waitid.o register.o
+ notif.o waitid.o register.o truncate.o
obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FUTEX) += futex.o
+obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index 8a8b07dfc444..acfcdd7f059a 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -58,9 +58,8 @@ bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd)
return false;
if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
check_seq:
- if (cd->seq == req->work.cancel_seq)
+ if (io_cancel_match_sequence(req, cd->seq))
return false;
- req->work.cancel_seq = cd->seq;
}
return true;
diff --git a/io_uring/cancel.h b/io_uring/cancel.h
index c0a8e7c520b6..76b32e65c03c 100644
--- a/io_uring/cancel.h
+++ b/io_uring/cancel.h
@@ -25,4 +25,14 @@ void init_hash_table(struct io_hash_table *table, unsigned size);
int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg);
bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd);
+static inline bool io_cancel_match_sequence(struct io_kiocb *req, int sequence)
+{
+ if ((req->flags & REQ_F_CANCEL_SEQ) && sequence == req->work.cancel_seq)
+ return true;
+
+ req->flags |= REQ_F_CANCEL_SEQ;
+ req->work.cancel_seq = sequence;
+ return false;
+}
+
#endif
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index 976e9500f651..8d444dd1b0a7 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -55,6 +55,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
struct io_ring_ctx *ctx = f->private_data;
struct io_overflow_cqe *ocqe;
struct io_rings *r = ctx->rings;
+ struct rusage sq_usage;
unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
unsigned int sq_head = READ_ONCE(r->sq.head);
unsigned int sq_tail = READ_ONCE(r->sq.tail);
@@ -64,6 +65,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
unsigned int sq_shift = 0;
unsigned int sq_entries, cq_entries;
int sq_pid = -1, sq_cpu = -1;
+ u64 sq_total_time = 0, sq_work_time = 0;
bool has_lock;
unsigned int i;
@@ -145,12 +147,24 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
struct io_sq_data *sq = ctx->sq_data;
- sq_pid = sq->task_pid;
- sq_cpu = sq->sq_cpu;
+ /*
+ * sq->thread might be NULL if we raced with the sqpoll
+ * thread termination.
+ */
+ if (sq->thread) {
+ sq_pid = sq->task_pid;
+ sq_cpu = sq->sq_cpu;
+ getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
+ sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
+ + sq_usage.ru_stime.tv_usec);
+ sq_work_time = sq->work_time;
+ }
}
seq_printf(m, "SqThread:\t%d\n", sq_pid);
seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
+ seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
+ seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
struct file *f = io_file_from_index(&ctx->file_table, i);
diff --git a/io_uring/filetable.h b/io_uring/filetable.h
index b47adf170c31..b2435c4dca1f 100644
--- a/io_uring/filetable.h
+++ b/io_uring/filetable.h
@@ -17,7 +17,7 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset);
int io_register_file_alloc_range(struct io_ring_ctx *ctx,
struct io_uring_file_index_range __user *arg);
-unsigned int io_file_get_flags(struct file *file);
+io_req_flags_t io_file_get_flags(struct file *file);
static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
{
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index cd9a137ad6ce..cf348c33f485 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -59,7 +59,6 @@
#include <linux/bvec.h>
#include <linux/net.h>
#include <net/sock.h>
-#include <net/af_unix.h>
#include <linux/anon_inodes.h>
#include <linux/sched/mm.h>
#include <linux/uaccess.h>
@@ -95,6 +94,7 @@
#include "notif.h"
#include "waitid.h"
#include "futex.h"
+#include "napi.h"
#include "timeout.h"
#include "poll.h"
@@ -122,11 +122,6 @@
#define IO_COMPL_BATCH 32
#define IO_REQ_ALLOC_BATCH 8
-enum {
- IO_CHECK_CQ_OVERFLOW_BIT,
- IO_CHECK_CQ_DROPPED_BIT,
-};
-
struct io_defer_entry {
struct list_head list;
struct io_kiocb *req;
@@ -349,6 +344,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
INIT_HLIST_HEAD(&ctx->cancelable_uring_cmd);
+ io_napi_init(ctx);
+
return ctx;
err:
kfree(ctx->cancel_table.hbs);
@@ -463,7 +460,6 @@ static void io_prep_async_work(struct io_kiocb *req)
req->work.list.next = NULL;
req->work.flags = 0;
- req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
if (req->flags & REQ_F_FORCE_ASYNC)
req->work.flags |= IO_WQ_WORK_CONCURRENT;
@@ -670,7 +666,6 @@ static void io_cq_unlock_post(struct io_ring_ctx *ctx)
io_commit_cqring_flush(ctx);
}
-/* Returns true if there are no backlogged entries after the flush */
static void io_cqring_overflow_kill(struct io_ring_ctx *ctx)
{
struct io_overflow_cqe *ocqe;
@@ -949,6 +944,8 @@ bool io_fill_cqe_req_aux(struct io_kiocb *req, bool defer, s32 res, u32 cflags)
u64 user_data = req->cqe.user_data;
struct io_uring_cqe *cqe;
+ lockdep_assert(!io_wq_current_is_worker());
+
if (!defer)
return __io_post_aux_cqe(ctx, user_data, res, cflags, false);
@@ -1025,15 +1022,15 @@ static void __io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
{
- if (req->ctx->task_complete && req->ctx->submitter_task != current) {
+ struct io_ring_ctx *ctx = req->ctx;
+
+ if (ctx->task_complete && ctx->submitter_task != current) {
req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req);
} else if (!(issue_flags & IO_URING_F_UNLOCKED) ||
- !(req->ctx->flags & IORING_SETUP_IOPOLL)) {
+ !(ctx->flags & IORING_SETUP_IOPOLL)) {
__io_req_complete_post(req, issue_flags);
} else {
- struct io_ring_ctx *ctx = req->ctx;
-
mutex_lock(&ctx->uring_lock);
__io_req_complete_post(req, issue_flags & ~IO_URING_F_UNLOCKED);
mutex_unlock(&ctx->uring_lock);
@@ -1174,40 +1171,44 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
percpu_ref_put(&ctx->refs);
}
-static unsigned int handle_tw_list(struct llist_node *node,
- struct io_ring_ctx **ctx,
- struct io_tw_state *ts,
- struct llist_node *last)
+/*
+ * Run queued task_work, returning the number of entries processed in *count.
+ * If more entries than max_entries are available, stop processing once this
+ * is reached and return the rest of the list.
+ */
+struct llist_node *io_handle_tw_list(struct llist_node *node,
+ unsigned int *count,
+ unsigned int max_entries)
{
- unsigned int count = 0;
+ struct io_ring_ctx *ctx = NULL;
+ struct io_tw_state ts = { };
- while (node && node != last) {
+ do {
struct llist_node *next = node->next;
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
- prefetch(container_of(next, struct io_kiocb, io_task_work.node));
-
- if (req->ctx != *ctx) {
- ctx_flush_and_put(*ctx, ts);
- *ctx = req->ctx;
+ if (req->ctx != ctx) {
+ ctx_flush_and_put(ctx, &ts);
+ ctx = req->ctx;
/* if not contended, grab and improve batching */
- ts->locked = mutex_trylock(&(*ctx)->uring_lock);
- percpu_ref_get(&(*ctx)->refs);
+ ts.locked = mutex_trylock(&ctx->uring_lock);
+ percpu_ref_get(&ctx->refs);
}
INDIRECT_CALL_2(req->io_task_work.func,
io_poll_task_func, io_req_rw_complete,
- req, ts);
+ req, &ts);
node = next;
- count++;
+ (*count)++;
if (unlikely(need_resched())) {
- ctx_flush_and_put(*ctx, ts);
- *ctx = NULL;
+ ctx_flush_and_put(ctx, &ts);
+ ctx = NULL;
cond_resched();
}
- }
+ } while (node && *count < max_entries);
- return count;
+ ctx_flush_and_put(ctx, &ts);
+ return node;
}
/**
@@ -1224,22 +1225,6 @@ static inline struct llist_node *io_llist_xchg(struct llist_head *head,
return xchg(&head->first, new);
}
-/**
- * io_llist_cmpxchg - possibly swap all entries in a lock-less list
- * @head: the head of lock-less list to delete all entries
- * @old: expected old value of the first entry of the list
- * @new: new entry as the head of the list
- *
- * perform a cmpxchg on the first entry of the list.
- */
-
-static inline struct llist_node *io_llist_cmpxchg(struct llist_head *head,
- struct llist_node *old,
- struct llist_node *new)
-{
- return cmpxchg(&head->first, old, new);
-}
-
static __cold void io_fallback_tw(struct io_uring_task *tctx, bool sync)
{
struct llist_node *node = llist_del_all(&tctx->task_list);
@@ -1268,45 +1253,41 @@ static __cold void io_fallback_tw(struct io_uring_task *tctx, bool sync)
}
}
-void tctx_task_work(struct callback_head *cb)
+struct llist_node *tctx_task_work_run(struct io_uring_task *tctx,
+ unsigned int max_entries,
+ unsigned int *count)
{
- struct io_tw_state ts = {};
- struct io_ring_ctx *ctx = NULL;
- struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
- task_work);
- struct llist_node fake = {};
struct llist_node *node;
- unsigned int loops = 0;
- unsigned int count = 0;
if (unlikely(current->flags & PF_EXITING)) {
io_fallback_tw(tctx, true);
- return;
+ return NULL;
}
- do {
- loops++;
- node = io_llist_xchg(&tctx->task_list, &fake);
- count += handle_tw_list(node, &ctx, &ts, &fake);
-
- /* skip expensive cmpxchg if there are items in the list */
- if (READ_ONCE(tctx->task_list.first) != &fake)
- continue;
- if (ts.locked && !wq_list_empty(&ctx->submit_state.compl_reqs)) {
- io_submit_flush_completions(ctx);
- if (READ_ONCE(tctx->task_list.first) != &fake)
- continue;
- }
- node = io_llist_cmpxchg(&tctx->task_list, &fake, NULL);
- } while (node != &fake);
-
- ctx_flush_and_put(ctx, &ts);
+ node = llist_del_all(&tctx->task_list);
+ if (node) {
+ node = llist_reverse_order(node);
+ node = io_handle_tw_list(node, count, max_entries);
+ }
/* relaxed read is enough as only the task itself sets ->in_cancel */
if (unlikely(atomic_read(&tctx->in_cancel)))
io_uring_drop_tctx_refs(current);
- trace_io_uring_task_work_run(tctx, count, loops);
+ trace_io_uring_task_work_run(tctx, *count);
+ return node;
+}
+
+void tctx_task_work(struct callback_head *cb)
+{
+ struct io_uring_task *tctx;
+ struct llist_node *ret;
+ unsigned int count = 0;
+
+ tctx = container_of(cb, struct io_uring_task, task_work);
+ ret = tctx_task_work_run(tctx, UINT_MAX, &count);
+ /* can't happen */
+ WARN_ON_ONCE(ret);
}
static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
@@ -1389,6 +1370,15 @@ static void io_req_normal_work_add(struct io_kiocb *req)
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
+ /* SQPOLL doesn't need the task_work added, it'll run it itself */
+ if (ctx->flags & IORING_SETUP_SQPOLL) {
+ struct io_sq_data *sqd = ctx->sq_data;
+
+ if (wq_has_sleeper(&sqd->wait))
+ wake_up(&sqd->wait);
+ return;
+ }
+
if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
return;
@@ -1420,7 +1410,20 @@ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
}
}
-static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts)
+static bool io_run_local_work_continue(struct io_ring_ctx *ctx, int events,
+ int min_events)
+{
+ if (llist_empty(&ctx->work_llist))
+ return false;
+ if (events < min_events)
+ return true;
+ if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+ atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
+ return false;
+}
+
+static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts,
+ int min_events)
{
struct llist_node *node;
unsigned int loops = 0;
@@ -1440,7 +1443,6 @@ again:
struct llist_node *next = node->next;
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
- prefetch(container_of(next, struct io_kiocb, io_task_work.node));
INDIRECT_CALL_2(req->io_task_work.func,
io_poll_task_func, io_req_rw_complete,
req, ts);
@@ -1449,18 +1451,20 @@ again:
}
loops++;
- if (!llist_empty(&ctx->work_llist))
+ if (io_run_local_work_continue(ctx, ret, min_events))
goto again;
if (ts->locked) {
io_submit_flush_completions(ctx);
- if (!llist_empty(&ctx->work_llist))
+ if (io_run_local_work_continue(ctx, ret, min_events))
goto again;
}
+
trace_io_uring_local_work_run(ctx, ret, loops);
return ret;
}
-static inline int io_run_local_work_locked(struct io_ring_ctx *ctx)
+static inline int io_run_local_work_locked(struct io_ring_ctx *ctx,
+ int min_events)
{
struct io_tw_state ts = { .locked = true, };
int ret;
@@ -1468,20 +1472,20 @@ static inline int io_run_local_work_locked(struct io_ring_ctx *ctx)
if (llist_empty(&ctx->work_llist))
return 0;
- ret = __io_run_local_work(ctx, &ts);
+ ret = __io_run_local_work(ctx, &ts, min_events);
/* shouldn't happen! */
if (WARN_ON_ONCE(!ts.locked))
mutex_lock(&ctx->uring_lock);
return ret;
}
-static int io_run_local_work(struct io_ring_ctx *ctx)
+static int io_run_local_work(struct io_ring_ctx *ctx, int min_events)
{
struct io_tw_state ts = {};
int ret;
ts.locked = mutex_trylock(&ctx->uring_lock);
- ret = __io_run_local_work(ctx, &ts);
+ ret = __io_run_local_work(ctx, &ts, min_events);
if (ts.locked)
mutex_unlock(&ctx->uring_lock);
@@ -1677,7 +1681,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
io_task_work_pending(ctx)) {
u32 tail = ctx->cached_cq_tail;
- (void) io_run_local_work_locked(ctx);
+ (void) io_run_local_work_locked(ctx, min);
if (task_work_pending(current) ||
wq_list_empty(&ctx->iopoll_list)) {
@@ -1768,9 +1772,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
}
}
-unsigned int io_file_get_flags(struct file *file)
+io_req_flags_t io_file_get_flags(struct file *file)
{
- unsigned int res = 0;
+ io_req_flags_t res = 0;
if (S_ISREG(file_inode(file)->i_mode))
res |= REQ_F_ISREG;
@@ -1966,10 +1970,28 @@ fail:
goto fail;
}
+ /*
+ * If DEFER_TASKRUN is set, it's only allowed to post CQEs from the
+ * submitter task context. Final request completions are handed to the
+ * right context, however this is not the case of auxiliary CQEs,
+ * which is the main mean of operation for multishot requests.
+ * Don't allow any multishot execution from io-wq. It's more restrictive
+ * than necessary and also cleaner.
+ */
+ if (req->flags & REQ_F_APOLL_MULTISHOT) {
+ err = -EBADFD;
+ if (!io_file_can_poll(req))
+ goto fail;
+ err = -ECANCELED;
+ if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK)
+ goto fail;
+ return;
+ }
+
if (req->flags & REQ_F_FORCE_ASYNC) {
bool opcode_poll = def->pollin || def->pollout;
- if (opcode_poll && file_can_poll(req->file)) {
+ if (opcode_poll && io_file_can_poll(req)) {
needs_poll = true;
issue_flags |= IO_URING_F_NONBLOCK;
}
@@ -2171,7 +2193,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
/* req is partially pre-initialised, see io_preinit_req() */
req->opcode = opcode = READ_ONCE(sqe->opcode);
/* same numerical values with corresponding REQ_F_*, safe to copy */
- req->flags = sqe_flags = READ_ONCE(sqe->flags);
+ sqe_flags = READ_ONCE(sqe->flags);
+ req->flags = (io_req_flags_t) sqe_flags;
req->cqe.user_data = READ_ONCE(sqe->user_data);
req->file = NULL;
req->rsrc_node = NULL;
@@ -2475,33 +2498,6 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
return ret;
}
-struct io_wait_queue {
- struct wait_queue_entry wq;
- struct io_ring_ctx *ctx;
- unsigned cq_tail;
- unsigned nr_timeouts;
- ktime_t timeout;
-};
-
-static inline bool io_has_work(struct io_ring_ctx *ctx)
-{
- return test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq) ||
- !llist_empty(&ctx->work_llist);
-}
-
-static inline bool io_should_wake(struct io_wait_queue *iowq)
-{
- struct io_ring_ctx *ctx = iowq->ctx;
- int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail;
-
- /*
- * Wake up if we have enough events, or if a timeout occurred since we
- * started waiting. For timeouts, we always want to return to userspace,
- * regardless of event count.
- */
- return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
-}
-
static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
int wake_flags, void *key)
{
@@ -2520,7 +2516,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx)
{
if (!llist_empty(&ctx->work_llist)) {
__set_current_state(TASK_RUNNING);
- if (io_run_local_work(ctx) > 0)
+ if (io_run_local_work(ctx, INT_MAX) > 0)
return 0;
}
if (io_run_task_work() > 0)
@@ -2588,7 +2584,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
if (!io_allowed_run_tw(ctx))
return -EEXIST;
if (!llist_empty(&ctx->work_llist))
- io_run_local_work(ctx);
+ io_run_local_work(ctx, min_events);
io_run_task_work();
io_cqring_overflow_flush(ctx);
/* if user messes with these they will just get an early return */
@@ -2621,16 +2617,19 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
if (get_timespec64(&ts, uts))
return -EFAULT;
+
iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
+ io_napi_adjust_timeout(ctx, &iowq, &ts);
}
+ io_napi_busy_loop(ctx, &iowq);
+
trace_io_uring_cqring_wait(ctx, min_events);
do {
+ int nr_wait = (int) iowq.cq_tail - READ_ONCE(ctx->rings->cq.tail);
unsigned long check_cq;
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
- int nr_wait = (int) iowq.cq_tail - READ_ONCE(ctx->rings->cq.tail);
-
atomic_set(&ctx->cq_wait_nr, nr_wait);
set_current_state(TASK_INTERRUPTIBLE);
} else {
@@ -2649,7 +2648,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
*/
io_run_task_work();
if (!llist_empty(&ctx->work_llist))
- io_run_local_work(ctx);
+ io_run_local_work(ctx, nr_wait);
/*
* Non-local task_work will be run on exit to userspace, but
@@ -2917,6 +2916,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_req_caches_free(ctx);
if (ctx->hash_map)
io_wq_put_hash(ctx->hash_map);
+ io_napi_free(ctx);
kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
kfree(ctx->io_bl);
@@ -3304,7 +3304,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
io_allowed_defer_tw_run(ctx))
- ret |= io_run_local_work(ctx) > 0;
+ ret |= io_run_local_work(ctx, INT_MAX) > 0;
ret |= io_cancel_defer_files(ctx, task, cancel_all);
mutex_lock(&ctx->uring_lock);
ret |= io_poll_remove_all(ctx, task, cancel_all);
@@ -3666,7 +3666,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
* it should handle ownership problems if any.
*/
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
- (void)io_run_local_work_locked(ctx);
+ (void)io_run_local_work_locked(ctx, min_complete);
}
mutex_unlock(&ctx->uring_lock);
}
@@ -4153,7 +4153,7 @@ static int __init io_uring_init(void)
BUILD_BUG_ON(SQE_COMMON_FLAGS >= (1 << 8));
BUILD_BUG_ON((SQE_VALID_FLAGS | SQE_COMMON_FLAGS) != SQE_VALID_FLAGS);
- BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
+ BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof_field(struct io_kiocb, flags));
BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));
@@ -4175,9 +4175,8 @@ static int __init io_uring_init(void)
SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU,
offsetof(struct io_kiocb, cmd.data),
sizeof_field(struct io_kiocb, cmd.data), NULL);
- io_buf_cachep = kmem_cache_create("io_buffer", sizeof(struct io_buffer), 0,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT,
- NULL);
+ io_buf_cachep = KMEM_CACHE(io_buffer,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
#ifdef CONFIG_SYSCTL
register_sysctl_init("kernel", kernel_io_uring_disabled_table);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 04e33f25919c..6426ee382276 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -5,6 +5,7 @@
#include <linux/lockdep.h>
#include <linux/resume_user_mode.h>
#include <linux/kasan.h>
+#include <linux/poll.h>
#include <linux/io_uring_types.h>
#include <uapi/linux/eventpoll.h>
#include "io-wq.h"
@@ -15,12 +16,18 @@
#include <trace/events/io_uring.h>
#endif
-
enum {
IOU_OK = 0,
IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED,
/*
+ * Requeue the task_work to restart operations on this request. The
+ * actual value isn't important, should just be not an otherwise
+ * valid error code, yet less than -MAX_ERRNO and valid internally.
+ */
+ IOU_REQUEUE = -3072,
+
+ /*
* Intended only when both IO_URING_F_MULTISHOT is passed
* to indicate to the poll runner that multishot should be
* removed and the result is set on req->cqe.res.
@@ -28,6 +35,32 @@ enum {
IOU_STOP_MULTISHOT = -ECANCELED,
};
+struct io_wait_queue {
+ struct wait_queue_entry wq;
+ struct io_ring_ctx *ctx;
+ unsigned cq_tail;
+ unsigned nr_timeouts;
+ ktime_t timeout;
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ unsigned int napi_busy_poll_to;
+ bool napi_prefer_busy_poll;
+#endif
+};
+
+static inline bool io_should_wake(struct io_wait_queue *iowq)
+{
+ struct io_ring_ctx *ctx = iowq->ctx;
+ int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail;
+
+ /*
+ * Wake up if we have enough events, or if a timeout occurred since we
+ * started waiting. For timeouts, we always want to return to userspace,
+ * regardless of event count.
+ */
+ return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
+}
+
bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow);
void io_req_cqe_overflow(struct io_kiocb *req);
int io_run_task_work_sig(struct io_ring_ctx *ctx);
@@ -50,6 +83,8 @@ void io_queue_iowq(struct io_kiocb *req, struct io_tw_state *ts_dont_use);
void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);
void io_req_task_queue_fail(struct io_kiocb *req, int ret);
void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts);
+struct llist_node *io_handle_tw_list(struct llist_node *node, unsigned int *count, unsigned int max_entries);
+struct llist_node *tctx_task_work_run(struct io_uring_task *tctx, unsigned int max_entries, unsigned int *count);
void tctx_task_work(struct callback_head *cb);
__cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
int io_uring_alloc_task_context(struct task_struct *task,
@@ -201,7 +236,7 @@ static inline void io_ring_submit_unlock(struct io_ring_ctx *ctx,
unsigned issue_flags)
{
lockdep_assert_held(&ctx->uring_lock);
- if (issue_flags & IO_URING_F_UNLOCKED)
+ if (unlikely(issue_flags & IO_URING_F_UNLOCKED))
mutex_unlock(&ctx->uring_lock);
}
@@ -214,7 +249,7 @@ static inline void io_ring_submit_lock(struct io_ring_ctx *ctx,
* The only exception is when we've detached the request and issue it
* from an async worker thread, grab the lock for that case.
*/
- if (issue_flags & IO_URING_F_UNLOCKED)
+ if (unlikely(issue_flags & IO_URING_F_UNLOCKED))
mutex_lock(&ctx->uring_lock);
lockdep_assert_held(&ctx->uring_lock);
}
@@ -268,6 +303,8 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
static inline int io_run_task_work(void)
{
+ bool ret = false;
+
/*
* Always check-and-clear the task_work notification signal. With how
* signaling works for task_work, we can find it set with nothing to
@@ -279,18 +316,26 @@ static inline int io_run_task_work(void)
* PF_IO_WORKER never returns to userspace, so check here if we have
* notify work that needs processing.
*/
- if (current->flags & PF_IO_WORKER &&
- test_thread_flag(TIF_NOTIFY_RESUME)) {
- __set_current_state(TASK_RUNNING);
- resume_user_mode_work(NULL);
+ if (current->flags & PF_IO_WORKER) {
+ if (test_thread_flag(TIF_NOTIFY_RESUME)) {
+ __set_current_state(TASK_RUNNING);
+ resume_user_mode_work(NULL);
+ }
+ if (current->io_uring) {
+ unsigned int count = 0;
+
+ tctx_task_work_run(current->io_uring, UINT_MAX, &count);
+ if (count)
+ ret = true;
+ }
}
if (task_work_pending(current)) {
__set_current_state(TASK_RUNNING);
task_work_run();
- return 1;
+ ret = true;
}
- return 0;
+ return ret;
}
static inline bool io_task_work_pending(struct io_ring_ctx *ctx)
@@ -392,4 +437,26 @@ static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
return 2 * sizeof(struct io_uring_sqe);
return sizeof(struct io_uring_sqe);
}
+
+static inline bool io_file_can_poll(struct io_kiocb *req)
+{
+ if (req->flags & REQ_F_CAN_POLL)
+ return true;
+ if (file_can_poll(req->file)) {
+ req->flags |= REQ_F_CAN_POLL;
+ return true;
+ }
+ return false;
+}
+
+enum {
+ IO_CHECK_CQ_OVERFLOW_BIT,
+ IO_CHECK_CQ_DROPPED_BIT,
+};
+
+static inline bool io_has_work(struct io_ring_ctx *ctx)
+{
+ return test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq) ||
+ !llist_empty(&ctx->work_llist);
+}
#endif
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 18df5a9d2f5e..9be42bff936b 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -81,15 +81,6 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
struct io_buffer_list *bl;
struct io_buffer *buf;
- /*
- * For legacy provided buffer mode, don't recycle if we already did
- * IO to this buffer. For ring-mapped provided buffer mode, we should
- * increment ring->head to explicitly monopolize the buffer to avoid
- * multiple use.
- */
- if (req->flags & REQ_F_PARTIAL_IO)
- return false;
-
io_ring_submit_lock(ctx, issue_flags);
buf = req->kbuf;
@@ -102,10 +93,8 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
return true;
}
-unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags)
+void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags)
{
- unsigned int cflags;
-
/*
* We can add this buffer back to two lists:
*
@@ -118,21 +107,17 @@ unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags)
* We migrate buffers from the comp_list to the issue cache list
* when we need one.
*/
- if (req->flags & REQ_F_BUFFER_RING) {
- /* no buffers to recycle for this case */
- cflags = __io_put_kbuf_list(req, NULL);
- } else if (issue_flags & IO_URING_F_UNLOCKED) {
+ if (issue_flags & IO_URING_F_UNLOCKED) {
struct io_ring_ctx *ctx = req->ctx;
spin_lock(&ctx->completion_lock);
- cflags = __io_put_kbuf_list(req, &ctx->io_buffers_comp);
+ __io_put_kbuf_list(req, &ctx->io_buffers_comp);
spin_unlock(&ctx->completion_lock);
} else {
lockdep_assert_held(&req->ctx->uring_lock);
- cflags = __io_put_kbuf_list(req, &req->ctx->io_buffers_cache);
+ __io_put_kbuf_list(req, &req->ctx->io_buffers_cache);
}
- return cflags;
}
static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
@@ -145,6 +130,8 @@ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
list_del(&kbuf->list);
if (*len == 0 || *len > kbuf->len)
*len = kbuf->len;
+ if (list_empty(&bl->buf_list))
+ req->flags |= REQ_F_BL_EMPTY;
req->flags |= REQ_F_BUFFER_SELECTED;
req->kbuf = kbuf;
req->buf_index = kbuf->bid;
@@ -158,12 +145,16 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
unsigned int issue_flags)
{
struct io_uring_buf_ring *br = bl->buf_ring;
+ __u16 tail, head = bl->head;
struct io_uring_buf *buf;
- __u16 head = bl->head;
- if (unlikely(smp_load_acquire(&br->tail) == head))
+ tail = smp_load_acquire(&br->tail);
+ if (unlikely(tail == head))
return NULL;
+ if (head + 1 == tail)
+ req->flags |= REQ_F_BL_EMPTY;
+
head &= bl->mask;
/* mmaped buffers are always contig */
if (bl->is_mmap || head < IO_BUFFER_LIST_BUF_PER_PAGE) {
@@ -180,7 +171,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
req->buf_list = bl;
req->buf_index = buf->bid;
- if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) {
+ if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
/*
* If we came in unlocked, we have no choice but to consume the
* buffer here, otherwise nothing ensures that the buffer won't
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 53dfaa71a397..5218bfd79e87 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -57,7 +57,7 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx);
-unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
+void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
@@ -73,21 +73,9 @@ static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
* to monopolize the buffer.
*/
if (req->buf_list) {
- if (req->flags & REQ_F_PARTIAL_IO) {
- /*
- * If we end up here, then the io_uring_lock has
- * been kept held since we retrieved the buffer.
- * For the io-wq case, we already cleared
- * req->buf_list when the buffer was retrieved,
- * hence it cannot be set here for that case.
- */
- req->buf_list->head++;
- req->buf_list = NULL;
- } else {
- req->buf_index = req->buf_list->bgid;
- req->flags &= ~REQ_F_BUFFER_RING;
- return true;
- }
+ req->buf_index = req->buf_list->bgid;
+ req->flags &= ~REQ_F_BUFFER_RING;
+ return true;
}
return false;
}
@@ -101,6 +89,8 @@ static inline bool io_do_buffer_select(struct io_kiocb *req)
static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
{
+ if (req->flags & REQ_F_BL_NO_RECYCLE)
+ return false;
if (req->flags & REQ_F_BUFFER_SELECTED)
return io_kbuf_recycle_legacy(req, issue_flags);
if (req->flags & REQ_F_BUFFER_RING)
@@ -108,41 +98,54 @@ static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
return false;
}
-static inline unsigned int __io_put_kbuf_list(struct io_kiocb *req,
- struct list_head *list)
+static inline void __io_put_kbuf_ring(struct io_kiocb *req)
{
- unsigned int ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
+ if (req->buf_list) {
+ req->buf_index = req->buf_list->bgid;
+ req->buf_list->head++;
+ }
+ req->flags &= ~REQ_F_BUFFER_RING;
+}
+static inline void __io_put_kbuf_list(struct io_kiocb *req,
+ struct list_head *list)
+{
if (req->flags & REQ_F_BUFFER_RING) {
- if (req->buf_list) {
- req->buf_index = req->buf_list->bgid;
- req->buf_list->head++;
- }
- req->flags &= ~REQ_F_BUFFER_RING;
+ __io_put_kbuf_ring(req);
} else {
req->buf_index = req->kbuf->bgid;
list_add(&req->kbuf->list, list);
req->flags &= ~REQ_F_BUFFER_SELECTED;
}
-
- return ret;
}
static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req)
{
+ unsigned int ret;
+
lockdep_assert_held(&req->ctx->completion_lock);
if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
return 0;
- return __io_put_kbuf_list(req, &req->ctx->io_buffers_comp);
+
+ ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
+ __io_put_kbuf_list(req, &req->ctx->io_buffers_comp);
+ return ret;
}
static inline unsigned int io_put_kbuf(struct io_kiocb *req,
unsigned issue_flags)
{
+ unsigned int ret;
- if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
+ if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
return 0;
- return __io_put_kbuf(req, issue_flags);
+
+ ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
+ if (req->flags & REQ_F_BUFFER_RING)
+ __io_put_kbuf_ring(req);
+ else
+ __io_put_kbuf(req, issue_flags);
+ return ret;
}
#endif
diff --git a/io_uring/napi.c b/io_uring/napi.c
new file mode 100644
index 000000000000..883a1a665907
--- /dev/null
+++ b/io_uring/napi.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "io_uring.h"
+#include "napi.h"
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+
+/* Timeout for cleanout of stale entries. */
+#define NAPI_TIMEOUT (60 * SEC_CONVERSION)
+
+struct io_napi_entry {
+ unsigned int napi_id;
+ struct list_head list;
+
+ unsigned long timeout;
+ struct hlist_node node;
+
+ struct rcu_head rcu;
+};
+
+static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
+ unsigned int napi_id)
+{
+ struct io_napi_entry *e;
+
+ hlist_for_each_entry_rcu(e, hash_list, node) {
+ if (e->napi_id != napi_id)
+ continue;
+ e->timeout = jiffies + NAPI_TIMEOUT;
+ return e;
+ }
+
+ return NULL;
+}
+
+void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
+{
+ struct hlist_head *hash_list;
+ unsigned int napi_id;
+ struct sock *sk;
+ struct io_napi_entry *e;
+
+ sk = sock->sk;
+ if (!sk)
+ return;
+
+ napi_id = READ_ONCE(sk->sk_napi_id);
+
+ /* Non-NAPI IDs can be rejected. */
+ if (napi_id < MIN_NAPI_ID)
+ return;
+
+ hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
+
+ rcu_read_lock();
+ e = io_napi_hash_find(hash_list, napi_id);
+ if (e) {
+ e->timeout = jiffies + NAPI_TIMEOUT;
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+
+ e = kmalloc(sizeof(*e), GFP_NOWAIT);
+ if (!e)
+ return;
+
+ e->napi_id = napi_id;
+ e->timeout = jiffies + NAPI_TIMEOUT;
+
+ spin_lock(&ctx->napi_lock);
+ if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
+ spin_unlock(&ctx->napi_lock);
+ kfree(e);
+ return;
+ }
+
+ hlist_add_tail_rcu(&e->node, hash_list);
+ list_add_tail(&e->list, &ctx->napi_list);
+ spin_unlock(&ctx->napi_lock);
+}
+
+static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
+{
+ struct io_napi_entry *e;
+ unsigned int i;
+
+ spin_lock(&ctx->napi_lock);
+ hash_for_each(ctx->napi_ht, i, e, node) {
+ if (time_after(jiffies, e->timeout)) {
+ list_del(&e->list);
+ hash_del_rcu(&e->node);
+ kfree_rcu(e, rcu);
+ }
+ }
+ spin_unlock(&ctx->napi_lock);
+}
+
+static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
+{
+ if (is_stale)
+ __io_napi_remove_stale(ctx);
+}
+
+static inline bool io_napi_busy_loop_timeout(unsigned long start_time,
+ unsigned long bp_usec)
+{
+ if (bp_usec) {
+ unsigned long end_time = start_time + bp_usec;
+ unsigned long now = busy_loop_current_time();
+
+ return time_after(now, end_time);
+ }
+
+ return true;
+}
+
+static bool io_napi_busy_loop_should_end(void *data,
+ unsigned long start_time)
+{
+ struct io_wait_queue *iowq = data;
+
+ if (signal_pending(current))
+ return true;
+ if (io_should_wake(iowq) || io_has_work(iowq->ctx))
+ return true;
+ if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to))
+ return true;
+
+ return false;
+}
+
+static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
+ void *loop_end_arg)
+{
+ struct io_napi_entry *e;
+ bool (*loop_end)(void *, unsigned long) = NULL;
+ bool is_stale = false;
+
+ if (loop_end_arg)
+ loop_end = io_napi_busy_loop_should_end;
+
+ list_for_each_entry_rcu(e, &ctx->napi_list, list) {
+ napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
+ ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
+
+ if (time_after(jiffies, e->timeout))
+ is_stale = true;
+ }
+
+ return is_stale;
+}
+
+static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
+ struct io_wait_queue *iowq)
+{
+ unsigned long start_time = busy_loop_current_time();
+ void *loop_end_arg = NULL;
+ bool is_stale = false;
+
+ /* Singular lists use a different napi loop end check function and are
+ * only executed once.
+ */
+ if (list_is_singular(&ctx->napi_list))
+ loop_end_arg = iowq;
+
+ rcu_read_lock();
+ do {
+ is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg);
+ } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg);
+ rcu_read_unlock();
+
+ io_napi_remove_stale(ctx, is_stale);
+}
+
+/*
+ * io_napi_init() - Init napi settings
+ * @ctx: pointer to io-uring context structure
+ *
+ * Init napi settings in the io-uring context.
+ */
+void io_napi_init(struct io_ring_ctx *ctx)
+{
+ INIT_LIST_HEAD(&ctx->napi_list);
+ spin_lock_init(&ctx->napi_lock);
+ ctx->napi_prefer_busy_poll = false;
+ ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
+}
+
+/*
+ * io_napi_free() - Deallocate napi
+ * @ctx: pointer to io-uring context structure
+ *
+ * Free the napi list and the hash table in the io-uring context.
+ */
+void io_napi_free(struct io_ring_ctx *ctx)
+{
+ struct io_napi_entry *e;
+ LIST_HEAD(napi_list);
+ unsigned int i;
+
+ spin_lock(&ctx->napi_lock);
+ hash_for_each(ctx->napi_ht, i, e, node) {
+ hash_del_rcu(&e->node);
+ kfree_rcu(e, rcu);
+ }
+ spin_unlock(&ctx->napi_lock);
+}
+
+/*
+ * io_napi_register() - Register napi with io-uring
+ * @ctx: pointer to io-uring context structure
+ * @arg: pointer to io_uring_napi structure
+ *
+ * Register napi in the io-uring context.
+ */
+int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
+{
+ const struct io_uring_napi curr = {
+ .busy_poll_to = ctx->napi_busy_poll_to,
+ .prefer_busy_poll = ctx->napi_prefer_busy_poll
+ };
+ struct io_uring_napi napi;
+
+ if (copy_from_user(&napi, arg, sizeof(napi)))
+ return -EFAULT;
+ if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv)
+ return -EINVAL;
+
+ if (copy_to_user(arg, &curr, sizeof(curr)))
+ return -EFAULT;
+
+ WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to);
+ WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
+ WRITE_ONCE(ctx->napi_enabled, true);
+ return 0;
+}
+
+/*
+ * io_napi_unregister() - Unregister napi with io-uring
+ * @ctx: pointer to io-uring context structure
+ * @arg: pointer to io_uring_napi structure
+ *
+ * Unregister napi. If arg has been specified copy the busy poll timeout and
+ * prefer busy poll setting to the passed in structure.
+ */
+int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
+{
+ const struct io_uring_napi curr = {
+ .busy_poll_to = ctx->napi_busy_poll_to,
+ .prefer_busy_poll = ctx->napi_prefer_busy_poll
+ };
+
+ if (arg && copy_to_user(arg, &curr, sizeof(curr)))
+ return -EFAULT;
+
+ WRITE_ONCE(ctx->napi_busy_poll_to, 0);
+ WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
+ WRITE_ONCE(ctx->napi_enabled, false);
+ return 0;
+}
+
+/*
+ * __io_napi_adjust_timeout() - Add napi id to the busy poll list
+ * @ctx: pointer to io-uring context structure
+ * @iowq: pointer to io wait queue
+ * @ts: pointer to timespec or NULL
+ *
+ * Adjust the busy loop timeout according to timespec and busy poll timeout.
+ */
+void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
+ struct timespec64 *ts)
+{
+ unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to);
+
+ if (ts) {
+ struct timespec64 poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
+
+ if (timespec64_compare(ts, &poll_to_ts) > 0) {
+ *ts = timespec64_sub(*ts, poll_to_ts);
+ } else {
+ u64 to = timespec64_to_ns(ts);
+
+ do_div(to, 1000);
+ ts->tv_sec = 0;
+ ts->tv_nsec = 0;
+ }
+ }
+
+ iowq->napi_busy_poll_to = poll_to;
+}
+
+/*
+ * __io_napi_busy_loop() - execute busy poll loop
+ * @ctx: pointer to io-uring context structure
+ * @iowq: pointer to io wait queue
+ *
+ * Execute the busy poll loop and merge the spliced off list.
+ */
+void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
+{
+ iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
+
+ if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled)
+ io_napi_blocking_busy_loop(ctx, iowq);
+}
+
+/*
+ * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll
+ * @ctx: pointer to io-uring context structure
+ *
+ * Splice of the napi list and execute the napi busy poll loop.
+ */
+int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
+{
+ LIST_HEAD(napi_list);
+ bool is_stale = false;
+
+ if (!READ_ONCE(ctx->napi_busy_poll_to))
+ return 0;
+ if (list_empty_careful(&ctx->napi_list))
+ return 0;
+
+ rcu_read_lock();
+ is_stale = __io_napi_do_busy_loop(ctx, NULL);
+ rcu_read_unlock();
+
+ io_napi_remove_stale(ctx, is_stale);
+ return 1;
+}
+
+#endif
diff --git a/io_uring/napi.h b/io_uring/napi.h
new file mode 100644
index 000000000000..6fc0393d0dbe
--- /dev/null
+++ b/io_uring/napi.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef IOU_NAPI_H
+#define IOU_NAPI_H
+
+#include <linux/kernel.h>
+#include <linux/io_uring.h>
+#include <net/busy_poll.h>
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+
+void io_napi_init(struct io_ring_ctx *ctx);
+void io_napi_free(struct io_ring_ctx *ctx);
+
+int io_register_napi(struct io_ring_ctx *ctx, void __user *arg);
+int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
+
+void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock);
+
+void __io_napi_adjust_timeout(struct io_ring_ctx *ctx,
+ struct io_wait_queue *iowq, struct timespec64 *ts);
+void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
+int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx);
+
+static inline bool io_napi(struct io_ring_ctx *ctx)
+{
+ return !list_empty(&ctx->napi_list);
+}
+
+static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx,
+ struct io_wait_queue *iowq,
+ struct timespec64 *ts)
+{
+ if (!io_napi(ctx))
+ return;
+ __io_napi_adjust_timeout(ctx, iowq, ts);
+}
+
+static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
+ struct io_wait_queue *iowq)
+{
+ if (!io_napi(ctx))
+ return;
+ __io_napi_busy_loop(ctx, iowq);
+}
+
+/*
+ * io_napi_add() - Add napi id to the busy poll list
+ * @req: pointer to io_kiocb request
+ *
+ * Add the napi id of the socket to the napi busy poll list and hash table.
+ */
+static inline void io_napi_add(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct socket *sock;
+
+ if (!READ_ONCE(ctx->napi_busy_poll_to))
+ return;
+
+ sock = sock_from_file(req->file);
+ if (sock)
+ __io_napi_add(ctx, sock);
+}
+
+#else
+
+static inline void io_napi_init(struct io_ring_ctx *ctx)
+{
+}
+static inline void io_napi_free(struct io_ring_ctx *ctx)
+{
+}
+static inline int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
+{
+ return -EOPNOTSUPP;
+}
+static inline int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
+{
+ return -EOPNOTSUPP;
+}
+static inline bool io_napi(struct io_ring_ctx *ctx)
+{
+ return false;
+}
+static inline void io_napi_add(struct io_kiocb *req)
+{
+}
+static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx,
+ struct io_wait_queue *iowq,
+ struct timespec64 *ts)
+{
+}
+static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
+ struct io_wait_queue *iowq)
+{
+}
+static inline int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
+{
+ return 0;
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+#endif
diff --git a/io_uring/net.c b/io_uring/net.c
index 75d494dad7e2..19451f0dbf81 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -60,6 +60,7 @@ struct io_sr_msg {
unsigned len;
unsigned done_io;
unsigned msg_flags;
+ unsigned nr_multishot_loops;
u16 flags;
/* initialised and used only by !msg send variants */
u16 addr_len;
@@ -70,18 +71,12 @@ struct io_sr_msg {
struct io_kiocb *notif;
};
-static inline bool io_check_multishot(struct io_kiocb *req,
- unsigned int issue_flags)
-{
- /*
- * When ->locked_cq is set we only allow to post CQEs from the original
- * task context. Usual request completions will be handled in other
- * generic paths but multipoll may decide to post extra cqes.
- */
- return !(issue_flags & IO_URING_F_IOWQ) ||
- !(issue_flags & IO_URING_F_MULTISHOT) ||
- !req->ctx->task_complete;
-}
+/*
+ * Number of times we'll try and do receives if there's more data. If we
+ * exceed this limit, then add us to the back of the queue and retry from
+ * there. This helps fairness between flooding clients.
+ */
+#define MULTISHOT_MAX_RETRY 32
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
@@ -196,16 +191,130 @@ static int io_setup_async_msg(struct io_kiocb *req,
return -EAGAIN;
}
+#ifdef CONFIG_COMPAT
+static int io_compat_msg_copy_hdr(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg,
+ struct compat_msghdr *msg, int ddir)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct compat_iovec __user *uiov;
+ int ret;
+
+ if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
+ return -EFAULT;
+
+ uiov = compat_ptr(msg->msg_iov);
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ compat_ssize_t clen;
+
+ iomsg->free_iov = NULL;
+ if (msg->msg_iovlen == 0) {
+ sr->len = 0;
+ } else if (msg->msg_iovlen > 1) {
+ return -EINVAL;
+ } else {
+ if (!access_ok(uiov, sizeof(*uiov)))
+ return -EFAULT;
+ if (__get_user(clen, &uiov->iov_len))
+ return -EFAULT;
+ if (clen < 0)
+ return -EINVAL;
+ sr->len = clen;
+ }
+
+ return 0;
+ }
+
+ iomsg->free_iov = iomsg->fast_iov;
+ ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
+ UIO_FASTIOV, &iomsg->free_iov,
+ &iomsg->msg.msg_iter, true);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return 0;
+}
+#endif
+
+static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
+ struct user_msghdr *msg, int ddir)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ int ret;
+
+ if (!user_access_begin(sr->umsg, sizeof(*sr->umsg)))
+ return -EFAULT;
+
+ ret = -EFAULT;
+ unsafe_get_user(msg->msg_name, &sr->umsg->msg_name, ua_end);
+ unsafe_get_user(msg->msg_namelen, &sr->umsg->msg_namelen, ua_end);
+ unsafe_get_user(msg->msg_iov, &sr->umsg->msg_iov, ua_end);
+ unsafe_get_user(msg->msg_iovlen, &sr->umsg->msg_iovlen, ua_end);
+ unsafe_get_user(msg->msg_control, &sr->umsg->msg_control, ua_end);
+ unsafe_get_user(msg->msg_controllen, &sr->umsg->msg_controllen, ua_end);
+ msg->msg_flags = 0;
+
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ if (msg->msg_iovlen == 0) {
+ sr->len = iomsg->fast_iov[0].iov_len = 0;
+ iomsg->fast_iov[0].iov_base = NULL;
+ iomsg->free_iov = NULL;
+ } else if (msg->msg_iovlen > 1) {
+ ret = -EINVAL;
+ goto ua_end;
+ } else {
+ /* we only need the length for provided buffers */
+ if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
+ goto ua_end;
+ unsafe_get_user(iomsg->fast_iov[0].iov_len,
+ &msg->msg_iov[0].iov_len, ua_end);
+ sr->len = iomsg->fast_iov[0].iov_len;
+ iomsg->free_iov = NULL;
+ }
+ ret = 0;
+ua_end:
+ user_access_end();
+ return ret;
+ }
+
+ user_access_end();
+ iomsg->free_iov = iomsg->fast_iov;
+ ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV,
+ &iomsg->free_iov, &iomsg->msg.msg_iter, false);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return 0;
+}
+
static int io_sendmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct user_msghdr msg;
int ret;
iomsg->msg.msg_name = &iomsg->addr;
- iomsg->free_iov = iomsg->fast_iov;
- ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
- &iomsg->free_iov);
+ iomsg->msg.msg_iter.nr_segs = 0;
+
+#ifdef CONFIG_COMPAT
+ if (unlikely(req->ctx->compat)) {
+ struct compat_msghdr cmsg;
+
+ ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
+ if (unlikely(ret))
+ return ret;
+
+ return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
+ }
+#endif
+
+ ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
+ if (unlikely(ret))
+ return ret;
+
+ ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
+
/* save msg_control as sys_sendmsg() overwrites it */
sr->msg_control = iomsg->msg.msg_control_user;
return ret;
@@ -265,6 +374,8 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ sr->done_io = 0;
+
if (req->opcode == IORING_OP_SEND) {
if (READ_ONCE(sqe->__pad3[0]))
return -EINVAL;
@@ -287,10 +398,20 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
- sr->done_io = 0;
return 0;
}
+static void io_req_msg_cleanup(struct io_kiocb *req,
+ struct io_async_msghdr *kmsg,
+ unsigned int issue_flags)
+{
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ /* fast path, check for non-NULL to avoid function call */
+ if (kmsg->free_iov)
+ kfree(kmsg->free_iov);
+ io_netmsg_recycle(req, issue_flags);
+}
+
int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
@@ -333,18 +454,14 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
kmsg->msg.msg_controllen = 0;
kmsg->msg.msg_control = NULL;
sr->done_io += ret;
- req->flags |= REQ_F_PARTIAL_IO;
+ req->flags |= REQ_F_BL_NO_RECYCLE;
return io_setup_async_msg(req, kmsg, issue_flags);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
}
- /* fast path, check for non-NULL to avoid function call */
- if (kmsg->free_iov)
- kfree(kmsg->free_iov);
- req->flags &= ~REQ_F_NEED_CLEANUP;
- io_netmsg_recycle(req, issue_flags);
+ io_req_msg_cleanup(req, kmsg, issue_flags);
if (ret >= 0)
ret += sr->done_io;
else if (sr->done_io)
@@ -412,7 +529,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
- req->flags |= REQ_F_PARTIAL_IO;
+ req->flags |= REQ_F_BL_NO_RECYCLE;
return io_setup_async_addr(req, &__address, issue_flags);
}
if (ret == -ERESTARTSYS)
@@ -427,142 +544,77 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
return IOU_OK;
}
-static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg)
+static int io_recvmsg_mshot_prep(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg,
+ int namelen, size_t controllen)
{
- int hdr;
-
- if (iomsg->namelen < 0)
- return true;
- if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out),
- iomsg->namelen, &hdr))
- return true;
- if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr))
- return true;
+ if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
+ (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
+ int hdr;
+
+ if (unlikely(namelen < 0))
+ return -EOVERFLOW;
+ if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
+ namelen, &hdr))
+ return -EOVERFLOW;
+ if (check_add_overflow(hdr, controllen, &hdr))
+ return -EOVERFLOW;
+
+ iomsg->namelen = namelen;
+ iomsg->controllen = controllen;
+ return 0;
+ }
- return false;
+ return 0;
}
-static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
+static int io_recvmsg_copy_hdr(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct user_msghdr msg;
int ret;
- if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg)))
- return -EFAULT;
-
- ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
- if (ret)
- return ret;
-
- if (req->flags & REQ_F_BUFFER_SELECT) {
- if (msg.msg_iovlen == 0) {
- sr->len = iomsg->fast_iov[0].iov_len = 0;
- iomsg->fast_iov[0].iov_base = NULL;
- iomsg->free_iov = NULL;
- } else if (msg.msg_iovlen > 1) {
- return -EINVAL;
- } else {
- if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov)))
- return -EFAULT;
- sr->len = iomsg->fast_iov[0].iov_len;
- iomsg->free_iov = NULL;
- }
-
- if (req->flags & REQ_F_APOLL_MULTISHOT) {
- iomsg->namelen = msg.msg_namelen;
- iomsg->controllen = msg.msg_controllen;
- if (io_recvmsg_multishot_overflow(iomsg))
- return -EOVERFLOW;
- }
- } else {
- iomsg->free_iov = iomsg->fast_iov;
- ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV,
- &iomsg->free_iov, &iomsg->msg.msg_iter,
- false);
- if (ret > 0)
- ret = 0;
- }
-
- return ret;
-}
+ iomsg->msg.msg_name = &iomsg->addr;
+ iomsg->msg.msg_iter.nr_segs = 0;
#ifdef CONFIG_COMPAT
-static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- struct compat_msghdr msg;
- struct compat_iovec __user *uiov;
- int ret;
+ if (unlikely(req->ctx->compat)) {
+ struct compat_msghdr cmsg;
- if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg)))
- return -EFAULT;
-
- ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
- if (ret)
- return ret;
-
- uiov = compat_ptr(msg.msg_iov);
- if (req->flags & REQ_F_BUFFER_SELECT) {
- compat_ssize_t clen;
-
- iomsg->free_iov = NULL;
- if (msg.msg_iovlen == 0) {
- sr->len = 0;
- } else if (msg.msg_iovlen > 1) {
- return -EINVAL;
- } else {
- if (!access_ok(uiov, sizeof(*uiov)))
- return -EFAULT;
- if (__get_user(clen, &uiov->iov_len))
- return -EFAULT;
- if (clen < 0)
- return -EINVAL;
- sr->len = clen;
- }
+ ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
+ if (unlikely(ret))
+ return ret;
- if (req->flags & REQ_F_APOLL_MULTISHOT) {
- iomsg->namelen = msg.msg_namelen;
- iomsg->controllen = msg.msg_controllen;
- if (io_recvmsg_multishot_overflow(iomsg))
- return -EOVERFLOW;
- }
- } else {
- iomsg->free_iov = iomsg->fast_iov;
- ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen,
- UIO_FASTIOV, &iomsg->free_iov,
- &iomsg->msg.msg_iter, true);
- if (ret < 0)
+ ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
+ if (unlikely(ret))
return ret;
- }
- return 0;
-}
+ return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
+ cmsg.msg_controllen);
+ }
#endif
-static int io_recvmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
-{
- iomsg->msg.msg_name = &iomsg->addr;
- iomsg->msg.msg_iter.nr_segs = 0;
+ ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
+ if (unlikely(ret))
+ return ret;
-#ifdef CONFIG_COMPAT
- if (req->ctx->compat)
- return __io_compat_recvmsg_copy_hdr(req, iomsg);
-#endif
+ ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
+ if (unlikely(ret))
+ return ret;
- return __io_recvmsg_copy_hdr(req, iomsg);
+ return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
+ msg.msg_controllen);
}
int io_recvmsg_prep_async(struct io_kiocb *req)
{
+ struct io_async_msghdr *iomsg;
int ret;
if (!io_msg_alloc_async_prep(req))
return -ENOMEM;
- ret = io_recvmsg_copy_hdr(req, req->async_data);
+ iomsg = req->async_data;
+ ret = io_recvmsg_copy_hdr(req, iomsg);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
return ret;
@@ -574,6 +626,8 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ sr->done_io = 0;
+
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
@@ -610,7 +664,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
- sr->done_io = 0;
+ sr->nr_multishot_loops = 0;
return 0;
}
@@ -618,6 +672,7 @@ static inline void io_recv_prep_retry(struct io_kiocb *req)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ req->flags &= ~REQ_F_BL_EMPTY;
sr->done_io = 0;
sr->len = 0; /* get from the provided buffer */
req->buf_index = sr->buf_group;
@@ -636,32 +691,36 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
unsigned int cflags;
cflags = io_put_kbuf(req, issue_flags);
- if (msg->msg_inq && msg->msg_inq != -1)
+ if (msg->msg_inq > 0)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
- if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
- io_req_set_res(req, *ret, cflags);
- *ret = IOU_OK;
- return true;
- }
-
- if (!mshot_finished) {
- if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
- *ret, cflags | IORING_CQE_F_MORE)) {
- io_recv_prep_retry(req);
- /* Known not-empty or unknown state, retry */
- if (cflags & IORING_CQE_F_SOCK_NONEMPTY ||
- msg->msg_inq == -1)
+ /*
+ * Fill CQE for this receive and see if we should keep trying to
+ * receive from this socket.
+ */
+ if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
+ io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
+ *ret, cflags | IORING_CQE_F_MORE)) {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
+
+ io_recv_prep_retry(req);
+ /* Known not-empty or unknown state, retry */
+ if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq < 0) {
+ if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
return false;
- if (issue_flags & IO_URING_F_MULTISHOT)
- *ret = IOU_ISSUE_SKIP_COMPLETE;
- else
- *ret = -EAGAIN;
- return true;
+ /* mshot retries exceeded, force a requeue */
+ sr->nr_multishot_loops = 0;
+ mshot_retry_ret = IOU_REQUEUE;
}
- /* Otherwise stop multishot but use the current result. */
+ if (issue_flags & IO_URING_F_MULTISHOT)
+ *ret = mshot_retry_ret;
+ else
+ *ret = -EAGAIN;
+ return true;
}
+ /* Finish the request / stop multishot. */
io_req_set_res(req, *ret, cflags);
if (issue_flags & IO_URING_F_MULTISHOT)
@@ -782,8 +841,9 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return io_setup_async_msg(req, kmsg, issue_flags);
- if (!io_check_multishot(req, issue_flags))
- return io_setup_async_msg(req, kmsg, issue_flags);
+ flags = sr->msg_flags;
+ if (force_nonblock)
+ flags |= MSG_DONTWAIT;
retry_multishot:
if (io_do_buffer_select(req)) {
@@ -805,10 +865,6 @@ retry_multishot:
iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
}
- flags = sr->msg_flags;
- if (force_nonblock)
- flags |= MSG_DONTWAIT;
-
kmsg->msg.msg_get_inq = 1;
kmsg->msg.msg_inq = -1;
if (req->flags & REQ_F_APOLL_MULTISHOT) {
@@ -834,7 +890,7 @@ retry_multishot:
}
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
- req->flags |= REQ_F_PARTIAL_IO;
+ req->flags |= REQ_F_BL_NO_RECYCLE;
return io_setup_async_msg(req, kmsg, issue_flags);
}
if (ret == -ERESTARTSYS)
@@ -854,13 +910,10 @@ retry_multishot:
if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags))
goto retry_multishot;
- if (mshot_finished) {
- /* fast path, check for non-NULL to avoid function call */
- if (kmsg->free_iov)
- kfree(kmsg->free_iov);
- io_netmsg_recycle(req, issue_flags);
- req->flags &= ~REQ_F_NEED_CLEANUP;
- }
+ if (mshot_finished)
+ io_req_msg_cleanup(req, kmsg, issue_flags);
+ else if (ret == -EAGAIN)
+ return io_setup_async_msg(req, kmsg, issue_flags);
return ret;
}
@@ -879,9 +932,6 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return -EAGAIN;
- if (!io_check_multishot(req, issue_flags))
- return -EAGAIN;
-
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
@@ -894,6 +944,10 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
msg.msg_iocb = NULL;
msg.msg_ubuf = NULL;
+ flags = sr->msg_flags;
+ if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
retry_multishot:
if (io_do_buffer_select(req)) {
void __user *buf;
@@ -902,6 +956,7 @@ retry_multishot:
if (!buf)
return -ENOBUFS;
sr->buf = buf;
+ sr->len = len;
}
ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter);
@@ -911,9 +966,6 @@ retry_multishot:
msg.msg_inq = -1;
msg.msg_flags = 0;
- flags = sr->msg_flags;
- if (force_nonblock)
- flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&msg.msg_iter);
@@ -931,7 +983,7 @@ retry_multishot:
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
- req->flags |= REQ_F_PARTIAL_IO;
+ req->flags |= REQ_F_BL_NO_RECYCLE;
return -EAGAIN;
}
if (ret == -ERESTARTSYS)
@@ -981,6 +1033,8 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *notif;
+ zc->done_io = 0;
+
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
return -EINVAL;
/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
@@ -1033,8 +1087,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (zc->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
- zc->done_io = 0;
-
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
zc->msg_flags |= MSG_CMSG_COMPAT;
@@ -1174,7 +1226,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
zc->len -= ret;
zc->buf += ret;
zc->done_io += ret;
- req->flags |= REQ_F_PARTIAL_IO;
+ req->flags |= REQ_F_BL_NO_RECYCLE;
return io_setup_async_addr(req, &__address, issue_flags);
}
if (ret == -ERESTARTSYS)
@@ -1244,7 +1296,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
- req->flags |= REQ_F_PARTIAL_IO;
+ req->flags |= REQ_F_BL_NO_RECYCLE;
return io_setup_async_msg(req, kmsg, issue_flags);
}
if (ret == -ERESTARTSYS)
@@ -1279,7 +1331,7 @@ void io_sendrecv_fail(struct io_kiocb *req)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- if (req->flags & REQ_F_PARTIAL_IO)
+ if (sr->done_io)
req->cqe.res = sr->done_io;
if ((req->flags & REQ_F_NEED_CLEANUP) &&
@@ -1329,8 +1381,6 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
struct file *file;
int ret, fd;
- if (!io_check_multishot(req, issue_flags))
- return -EAGAIN;
retry:
if (!fixed) {
fd = __get_unused_fd_flags(accept->flags, accept->nofile);
@@ -1350,7 +1400,7 @@ retry:
* has already been done
*/
if (issue_flags & IO_URING_F_MULTISHOT)
- ret = IOU_ISSUE_SKIP_COMPLETE;
+ return IOU_ISSUE_SKIP_COMPLETE;
return ret;
}
if (ret == -ERESTARTSYS)
@@ -1375,7 +1425,8 @@ retry:
ret, IORING_CQE_F_MORE))
goto retry;
- return -ECANCELED;
+ io_req_set_res(req, ret, 0);
+ return IOU_STOP_MULTISHOT;
}
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 6705634e5f52..9c080aadc5a6 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -35,6 +35,7 @@
#include "rw.h"
#include "waitid.h"
#include "futex.h"
+#include "truncate.h"
static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
{
@@ -471,10 +472,15 @@ const struct io_issue_def io_issue_defs[] = {
},
[IORING_OP_FIXED_FD_INSTALL] = {
.needs_file = 1,
- .audit_skip = 1,
.prep = io_install_fixed_fd_prep,
.issue = io_install_fixed_fd,
},
+ [IORING_OP_FTRUNCATE] = {
+ .needs_file = 1,
+ .hash_reg_file = 1,
+ .prep = io_ftruncate_prep,
+ .issue = io_ftruncate,
+ },
};
const struct io_cold_def io_cold_defs[] = {
@@ -713,6 +719,9 @@ const struct io_cold_def io_cold_defs[] = {
[IORING_OP_FIXED_FD_INSTALL] = {
.name = "FIXED_FD_INSTALL",
},
+ [IORING_OP_FTRUNCATE] = {
+ .name = "FTRUNCATE",
+ },
};
const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index 0fe0dd305546..e3357dfa14ca 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -277,6 +277,10 @@ int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sq
if (flags & ~IORING_FIXED_FD_NO_CLOEXEC)
return -EINVAL;
+ /* ensure the task's creds are used when installing/receiving fds */
+ if (req->flags & REQ_F_CREDS)
+ return -EPERM;
+
/* default to O_CLOEXEC, disable if IORING_FIXED_FD_NO_CLOEXEC is set */
ifi = io_kiocb_to_cmd(req, struct io_fixed_install);
ifi->o_flags = O_CLOEXEC;
diff --git a/io_uring/poll.c b/io_uring/poll.c
index d59b74a99d4e..5f779139cae1 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -15,6 +15,7 @@
#include "io_uring.h"
#include "refs.h"
+#include "napi.h"
#include "opdef.h"
#include "kbuf.h"
#include "poll.h"
@@ -226,8 +227,29 @@ enum {
IOU_POLL_NO_ACTION = 1,
IOU_POLL_REMOVE_POLL_USE_RES = 2,
IOU_POLL_REISSUE = 3,
+ IOU_POLL_REQUEUE = 4,
};
+static void __io_poll_execute(struct io_kiocb *req, int mask)
+{
+ unsigned flags = 0;
+
+ io_req_set_res(req, mask, 0);
+ req->io_task_work.func = io_poll_task_func;
+
+ trace_io_uring_task_add(req, mask);
+
+ if (!(req->flags & REQ_F_POLL_NO_LAZY))
+ flags = IOU_F_TWQ_LAZY_WAKE;
+ __io_req_task_work_add(req, flags);
+}
+
+static inline void io_poll_execute(struct io_kiocb *req, int res)
+{
+ if (io_poll_get_ownership(req))
+ __io_poll_execute(req, res);
+}
+
/*
* All poll tw should go through this. Checks for poll events, manages
* references, does rewait, etc.
@@ -309,6 +331,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
int ret = io_poll_issue(req, ts);
if (ret == IOU_STOP_MULTISHOT)
return IOU_POLL_REMOVE_POLL_USE_RES;
+ else if (ret == IOU_REQUEUE)
+ return IOU_POLL_REQUEUE;
if (ret < 0)
return ret;
}
@@ -320,8 +344,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
* Release all references, retry if someone tried to restart
* task_work while we were executing it.
*/
- } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) &
- IO_POLL_REF_MASK);
+ v &= IO_POLL_REF_MASK;
+ } while (atomic_sub_return(v, &req->poll_refs) & IO_POLL_REF_MASK);
return IOU_POLL_NO_ACTION;
}
@@ -331,8 +355,12 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
int ret;
ret = io_poll_check_events(req, ts);
- if (ret == IOU_POLL_NO_ACTION)
+ if (ret == IOU_POLL_NO_ACTION) {
return;
+ } else if (ret == IOU_POLL_REQUEUE) {
+ __io_poll_execute(req, 0);
+ return;
+ }
io_poll_remove_entries(req);
io_poll_tw_hash_eject(req, ts);
@@ -364,26 +392,6 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
}
}
-static void __io_poll_execute(struct io_kiocb *req, int mask)
-{
- unsigned flags = 0;
-
- io_req_set_res(req, mask, 0);
- req->io_task_work.func = io_poll_task_func;
-
- trace_io_uring_task_add(req, mask);
-
- if (!(req->flags & REQ_F_POLL_NO_LAZY))
- flags = IOU_F_TWQ_LAZY_WAKE;
- __io_req_task_work_add(req, flags);
-}
-
-static inline void io_poll_execute(struct io_kiocb *req, int res)
-{
- if (io_poll_get_ownership(req))
- __io_poll_execute(req, res);
-}
-
static void io_poll_cancel_req(struct io_kiocb *req)
{
io_poll_mark_cancelled(req);
@@ -532,14 +540,6 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
poll->wait.private = (void *) wqe_private;
if (poll->events & EPOLLEXCLUSIVE) {
- /*
- * Exclusive waits may only wake a limited amount of entries
- * rather than all of them, this may interfere with lazy
- * wake if someone does wait(events > 1). Ensure we don't do
- * lazy wake for those, as we need to process each one as they
- * come in.
- */
- req->flags |= REQ_F_POLL_NO_LAZY;
add_wait_queue_exclusive(head, &poll->wait);
} else {
add_wait_queue(head, &poll->wait);
@@ -581,10 +581,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
struct io_poll_table *ipt, __poll_t mask,
unsigned issue_flags)
{
- struct io_ring_ctx *ctx = req->ctx;
-
INIT_HLIST_NODE(&req->hash_node);
- req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
io_init_poll_iocb(poll, mask);
poll->file = req->file;
req->apoll_events = poll->events;
@@ -611,6 +608,17 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
if (issue_flags & IO_URING_F_UNLOCKED)
req->flags &= ~REQ_F_HASH_LOCKED;
+
+ /*
+ * Exclusive waits may only wake a limited amount of entries
+ * rather than all of them, this may interfere with lazy
+ * wake if someone does wait(events > 1). Ensure we don't do
+ * lazy wake for those, as we need to process each one as they
+ * come in.
+ */
+ if (poll->events & EPOLLEXCLUSIVE)
+ req->flags |= REQ_F_POLL_NO_LAZY;
+
mask = vfs_poll(req->file, &ipt->pt) & poll->events;
if (unlikely(ipt->error || !ipt->nr_entries)) {
@@ -645,6 +653,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
__io_poll_execute(req, mask);
return 0;
}
+ io_napi_add(req);
if (ipt->owning) {
/*
@@ -720,7 +729,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
if (!def->pollin && !def->pollout)
return IO_APOLL_ABORTED;
- if (!file_can_poll(req->file))
+ if (!io_file_can_poll(req))
return IO_APOLL_ABORTED;
if (!(req->flags & REQ_F_APOLL_MULTISHOT))
mask |= EPOLLONESHOT;
@@ -811,9 +820,8 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
if (poll_only && req->opcode != IORING_OP_POLL_ADD)
continue;
if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
- if (cd->seq == req->work.cancel_seq)
+ if (io_cancel_match_sequence(req, cd->seq))
continue;
- req->work.cancel_seq = cd->seq;
}
*out_bucket = hb;
return req;
diff --git a/io_uring/poll.h b/io_uring/poll.h
index ff4d5d753387..1dacae9e816c 100644
--- a/io_uring/poll.h
+++ b/io_uring/poll.h
@@ -24,6 +24,15 @@ struct async_poll {
struct io_poll *double_poll;
};
+/*
+ * Must only be called inside issue_flags & IO_URING_F_MULTISHOT, or
+ * potentially other cases where we already "own" this poll request.
+ */
+static inline void io_poll_multishot_retry(struct io_kiocb *req)
+{
+ atomic_inc(&req->poll_refs);
+}
+
int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_poll_add(struct io_kiocb *req, unsigned int issue_flags);
diff --git a/io_uring/register.c b/io_uring/register.c
index 5e62c1208996..99c37775f974 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -26,6 +26,7 @@
#include "register.h"
#include "cancel.h"
#include "kbuf.h"
+#include "napi.h"
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
@@ -550,6 +551,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_register_pbuf_status(ctx, arg);
break;
+ case IORING_REGISTER_NAPI:
+ ret = -EINVAL;
+ if (!arg || nr_args != 1)
+ break;
+ ret = io_register_napi(ctx, arg);
+ break;
+ case IORING_UNREGISTER_NAPI:
+ ret = -EINVAL;
+ if (nr_args != 1)
+ break;
+ ret = io_unregister_napi(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index c6f199bbee28..e21000238954 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -2,8 +2,6 @@
#ifndef IOU_RSRC_H
#define IOU_RSRC_H
-#include <net/af_unix.h>
-
#include "alloc_cache.h"
#define IO_NODE_ALLOC_CACHE_MAX 32
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 118cc9f1cf16..47e097ab5d7e 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>
#include <linux/compat.h>
#include <linux/io_uring/cmd.h>
+#include <linux/indirect_call_wrapper.h>
#include <uapi/linux/io_uring.h>
@@ -18,6 +19,7 @@
#include "opdef.h"
#include "kbuf.h"
#include "rsrc.h"
+#include "poll.h"
#include "rw.h"
struct io_rw {
@@ -273,7 +275,7 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
* current cycle.
*/
io_req_io_end(req);
- req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
+ req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE;
return true;
}
req_set_fail(req);
@@ -340,7 +342,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
io_req_end_write(req);
if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) {
- req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
+ req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE;
return;
}
req->cqe.res = res;
@@ -681,7 +683,7 @@ static bool io_rw_should_retry(struct io_kiocb *req)
* just use poll if we can, and don't attempt if the fs doesn't
* support callback based unlocks
*/
- if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
+ if (io_file_can_poll(req) || !(req->file->f_mode & FMODE_BUF_RASYNC))
return false;
wait->wait.func = io_async_buf_func;
@@ -720,7 +722,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
struct file *file = req->file;
int ret;
- if (unlikely(!file || !(file->f_mode & mode)))
+ if (unlikely(!(file->f_mode & mode)))
return -EBADF;
if (!(req->flags & REQ_F_FIXED_FILE))
@@ -830,7 +832,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
* If we can poll, just do that. For a vectored read, we'll
* need to copy state first.
*/
- if (file_can_poll(req->file) && !io_issue_defs[req->opcode].vectored)
+ if (io_file_can_poll(req) && !io_issue_defs[req->opcode].vectored)
return -EAGAIN;
/* IOPOLL retry should happen for io-wq threads */
if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -929,7 +931,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
/*
* Multishot MUST be used on a pollable file
*/
- if (!file_can_poll(req->file))
+ if (!io_file_can_poll(req))
return -EBADFD;
ret = __io_read(req, issue_flags);
@@ -962,8 +964,15 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
if (io_fill_cqe_req_aux(req,
issue_flags & IO_URING_F_COMPLETE_DEFER,
ret, cflags | IORING_CQE_F_MORE)) {
- if (issue_flags & IO_URING_F_MULTISHOT)
+ if (issue_flags & IO_URING_F_MULTISHOT) {
+ /*
+ * Force retry, as we might have more data to
+ * be read and otherwise it won't get retried
+ * until (if ever) another poll is triggered.
+ */
+ io_poll_multishot_retry(req);
return IOU_ISSUE_SKIP_COMPLETE;
+ }
return -EAGAIN;
}
}
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 65b5dbe3c850..363052b4ea76 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -15,9 +15,11 @@
#include <uapi/linux/io_uring.h>
#include "io_uring.h"
+#include "napi.h"
#include "sqpoll.h"
#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
+#define IORING_TW_CAP_ENTRIES_VALUE 8
enum {
IO_SQ_THREAD_SHOULD_STOP = 0,
@@ -193,6 +195,9 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
ret = io_submit_sqes(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);
+ if (io_napi(ctx))
+ ret += io_napi_sqpoll_busy_poll(ctx);
+
if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
wake_up(&ctx->sqo_sq_wait);
if (creds)
@@ -219,10 +224,52 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
}
+/*
+ * Run task_work, processing the retry_list first. The retry_list holds
+ * entries that we passed on in the previous run, if we had more task_work
+ * than we were asked to process. Newly queued task_work isn't run until the
+ * retry list has been fully processed.
+ */
+static unsigned int io_sq_tw(struct llist_node **retry_list, int max_entries)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ unsigned int count = 0;
+
+ if (*retry_list) {
+ *retry_list = io_handle_tw_list(*retry_list, &count, max_entries);
+ if (count >= max_entries)
+ return count;
+ max_entries -= count;
+ }
+
+ *retry_list = tctx_task_work_run(tctx, max_entries, &count);
+ return count;
+}
+
+static bool io_sq_tw_pending(struct llist_node *retry_list)
+{
+ struct io_uring_task *tctx = current->io_uring;
+
+ return retry_list || !llist_empty(&tctx->task_list);
+}
+
+static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
+{
+ struct rusage end;
+
+ getrusage(current, RUSAGE_SELF, &end);
+ end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
+ end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
+
+ sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
+}
+
static int io_sq_thread(void *data)
{
+ struct llist_node *retry_list = NULL;
struct io_sq_data *sqd = data;
struct io_ring_ctx *ctx;
+ struct rusage start;
unsigned long timeout = 0;
char buf[TASK_COMM_LEN];
DEFINE_WAIT(wait);
@@ -251,18 +298,21 @@ static int io_sq_thread(void *data)
}
cap_entries = !list_is_singular(&sqd->ctx_list);
+ getrusage(current, RUSAGE_SELF, &start);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
int ret = __io_sq_thread(ctx, cap_entries);
if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list)))
sqt_spin = true;
}
- if (io_run_task_work())
+ if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE))
sqt_spin = true;
if (sqt_spin || !time_after(jiffies, timeout)) {
- if (sqt_spin)
+ if (sqt_spin) {
+ io_sq_update_worktime(sqd, &start);
timeout = jiffies + sqd->sq_thread_idle;
+ }
if (unlikely(need_resched())) {
mutex_unlock(&sqd->lock);
cond_resched();
@@ -273,7 +323,7 @@ static int io_sq_thread(void *data)
}
prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
- if (!io_sqd_events_pending(sqd) && !task_work_pending(current)) {
+ if (!io_sqd_events_pending(sqd) && !io_sq_tw_pending(retry_list)) {
bool needs_sched = true;
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
@@ -312,6 +362,9 @@ static int io_sq_thread(void *data)
timeout = jiffies + sqd->sq_thread_idle;
}
+ if (retry_list)
+ io_sq_tw(&retry_list, UINT_MAX);
+
io_uring_cancel_generic(true, sqd);
sqd->thread = NULL;
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
index 8df37e8c9149..4171666b1cf4 100644
--- a/io_uring/sqpoll.h
+++ b/io_uring/sqpoll.h
@@ -16,6 +16,7 @@ struct io_sq_data {
pid_t task_pid;
pid_t task_tgid;
+ u64 work_time;
unsigned long state;
struct completion exited;
};
diff --git a/io_uring/truncate.c b/io_uring/truncate.c
new file mode 100644
index 000000000000..62ee73d34d72
--- /dev/null
+++ b/io_uring/truncate.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "../fs/internal.h"
+
+#include "io_uring.h"
+#include "truncate.h"
+
+struct io_ftrunc {
+ struct file *file;
+ loff_t len;
+};
+
+int io_ftruncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_ftrunc *ft = io_kiocb_to_cmd(req, struct io_ftrunc);
+
+ if (sqe->rw_flags || sqe->addr || sqe->len || sqe->buf_index ||
+ sqe->splice_fd_in || sqe->addr3)
+ return -EINVAL;
+
+ ft->len = READ_ONCE(sqe->off);
+
+ req->flags |= REQ_F_FORCE_ASYNC;
+ return 0;
+}
+
+int io_ftruncate(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_ftrunc *ft = io_kiocb_to_cmd(req, struct io_ftrunc);
+ int ret;
+
+ WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
+
+ ret = do_ftruncate(req->file, ft->len, 1);
+
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+}
diff --git a/io_uring/truncate.h b/io_uring/truncate.h
new file mode 100644
index 000000000000..ec088293a478
--- /dev/null
+++ b/io_uring/truncate.h
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+
+int io_ftruncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_ftruncate(struct io_kiocb *req, unsigned int issue_flags);
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index c33fca585dde..42f63adfa54a 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -5,6 +5,7 @@
#include <linux/io_uring/cmd.h>
#include <linux/security.h>
#include <linux/nospec.h>
+#include <net/sock.h>
#include <uapi/linux/io_uring.h>
#include <asm/ioctls.h>
diff --git a/io_uring/xattr.c b/io_uring/xattr.c
index e1c810e0b85a..44905b82eea8 100644
--- a/io_uring/xattr.c
+++ b/io_uring/xattr.c
@@ -112,7 +112,7 @@ int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags)
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
- ret = do_getxattr(mnt_idmap(req->file->f_path.mnt),
+ ret = do_getxattr(file_mnt_idmap(req->file),
req->file->f_path.dentry,
&ix->ctx);
diff --git a/kernel/async.c b/kernel/async.c
index 97f224a5257b..4c3e6a44595f 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -64,6 +64,7 @@ static async_cookie_t next_cookie = 1;
static LIST_HEAD(async_global_pending); /* pending from all registered doms */
static ASYNC_DOMAIN(async_dfl_domain);
static DEFINE_SPINLOCK(async_lock);
+static struct workqueue_struct *async_wq;
struct async_entry {
struct list_head domain_list;
@@ -174,7 +175,7 @@ static async_cookie_t __async_schedule_node_domain(async_func_t func,
spin_unlock_irqrestore(&async_lock, flags);
/* schedule for execution */
- queue_work_node(node, system_unbound_wq, &entry->work);
+ queue_work_node(node, async_wq, &entry->work);
return newcookie;
}
@@ -345,3 +346,17 @@ bool current_is_async(void)
return worker && worker->current_func == async_run_entry_fn;
}
EXPORT_SYMBOL_GPL(current_is_async);
+
+void __init async_init(void)
+{
+ /*
+ * Async can schedule a number of interdependent work items. However,
+ * unbound workqueues can handle only upto min_active interdependent
+ * work items. The default min_active of 8 isn't sufficient for async
+ * and can lead to stalls. Let's use a dedicated workqueue with raised
+ * min_active.
+ */
+ async_wq = alloc_workqueue("async", WQ_UNBOUND, 0);
+ BUG_ON(!async_wq);
+ workqueue_set_min_active(async_wq, WQ_DFL_ACTIVE);
+}
diff --git a/kernel/audit.c b/kernel/audit.c
index 9c8e5f732c4c..e7a62ebbf4d1 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1693,9 +1693,7 @@ static int __init audit_init(void)
if (audit_initialized == AUDIT_DISABLED)
return 0;
- audit_buffer_cache = kmem_cache_create("audit_buffer",
- sizeof(struct audit_buffer),
- 0, SLAB_PANIC, NULL);
+ audit_buffer_cache = KMEM_CACHE(audit_buffer, SLAB_PANIC);
skb_queue_head_init(&audit_queue);
skb_queue_head_init(&audit_retry_queue);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 8317a37dea0b..be8c680121e4 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -788,7 +788,7 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
static inline int audit_dupe_lsm_field(struct audit_field *df,
struct audit_field *sf)
{
- int ret = 0;
+ int ret;
char *lsm_str;
/* our own copy of lsm_str */
diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c
index 370217dd7e39..a4181234232b 100644
--- a/kernel/backtracetest.c
+++ b/kernel/backtracetest.c
@@ -21,24 +21,20 @@ static void backtrace_test_normal(void)
dump_stack();
}
-static DECLARE_COMPLETION(backtrace_work);
-
-static void backtrace_test_irq_callback(unsigned long data)
+static void backtrace_test_bh_workfn(struct work_struct *work)
{
dump_stack();
- complete(&backtrace_work);
}
-static DECLARE_TASKLET_OLD(backtrace_tasklet, &backtrace_test_irq_callback);
+static DECLARE_WORK(backtrace_bh_work, &backtrace_test_bh_workfn);
-static void backtrace_test_irq(void)
+static void backtrace_test_bh(void)
{
- pr_info("Testing a backtrace from irq context.\n");
+ pr_info("Testing a backtrace from BH context.\n");
pr_info("The following trace is a kernel self test and not a bug!\n");
- init_completion(&backtrace_work);
- tasklet_schedule(&backtrace_tasklet);
- wait_for_completion(&backtrace_work);
+ queue_work(system_bh_wq, &backtrace_bh_work);
+ flush_work(&backtrace_bh_work);
}
#ifdef CONFIG_STACKTRACE
@@ -65,7 +61,7 @@ static int backtrace_regression_test(void)
pr_info("====[ backtrace testing ]===========\n");
backtrace_test_normal();
- backtrace_test_irq();
+ backtrace_test_bh();
backtrace_test_saved();
pr_info("====[ end of backtrace testing ]====\n");
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index 6a906ff93006..bc25f5098a25 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -3,6 +3,7 @@
# BPF interpreter that, for example, classic socket filters depend on.
config BPF
bool
+ select CRYPTO_LIB_SHA1
# Used by archs to tell that they support BPF JIT compiler plus which
# flavour. Only one of the two can be selected for a specific arch since
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f526b7573e97..368c5d86b5b7 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
@@ -15,6 +15,9 @@ obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o memalloc.o
+ifeq ($(CONFIG_MMU)$(CONFIG_64BIT),yy)
+obj-$(CONFIG_BPF_SYSCALL) += arena.o
+endif
obj-$(CONFIG_BPF_JIT) += dispatcher.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
new file mode 100644
index 000000000000..86571e760dd6
--- /dev/null
+++ b/kernel/bpf/arena.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <linux/btf_ids.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+
+/*
+ * bpf_arena is a sparsely populated shared memory region between bpf program and
+ * user space process.
+ *
+ * For example on x86-64 the values could be:
+ * user_vm_start 7f7d26200000 // picked by mmap()
+ * kern_vm_start ffffc90001e69000 // picked by get_vm_area()
+ * For user space all pointers within the arena are normal 8-byte addresses.
+ * In this example 7f7d26200000 is the address of the first page (pgoff=0).
+ * The bpf program will access it as: kern_vm_start + lower_32bit_of_user_ptr
+ * (u32)7f7d26200000 -> 26200000
+ * hence
+ * ffffc90001e69000 + 26200000 == ffffc90028069000 is "pgoff=0" within 4Gb
+ * kernel memory region.
+ *
+ * BPF JITs generate the following code to access arena:
+ * mov eax, eax // eax has lower 32-bit of user pointer
+ * mov word ptr [rax + r12 + off], bx
+ * where r12 == kern_vm_start and off is s16.
+ * Hence allocate 4Gb + GUARD_SZ/2 on each side.
+ *
+ * Initially kernel vm_area and user vma are not populated.
+ * User space can fault-in any address which will insert the page
+ * into kernel and user vma.
+ * bpf program can allocate a page via bpf_arena_alloc_pages() kfunc
+ * which will insert it into kernel vm_area.
+ * The later fault-in from user space will populate that page into user vma.
+ */
+
+/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */
+#define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8)
+#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ)
+
+struct bpf_arena {
+ struct bpf_map map;
+ u64 user_vm_start;
+ u64 user_vm_end;
+ struct vm_struct *kern_vm;
+ struct maple_tree mt;
+ struct list_head vma_list;
+ struct mutex lock;
+};
+
+u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena)
+{
+ return arena ? (u64) (long) arena->kern_vm->addr + GUARD_SZ / 2 : 0;
+}
+
+u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena)
+{
+ return arena ? arena->user_vm_start : 0;
+}
+
+static long arena_map_peek_elem(struct bpf_map *map, void *value)
+{
+ return -EOPNOTSUPP;
+}
+
+static long arena_map_push_elem(struct bpf_map *map, void *value, u64 flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static long arena_map_pop_elem(struct bpf_map *map, void *value)
+{
+ return -EOPNOTSUPP;
+}
+
+static long arena_map_delete_elem(struct bpf_map *map, void *value)
+{
+ return -EOPNOTSUPP;
+}
+
+static int arena_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+ return -EOPNOTSUPP;
+}
+
+static long compute_pgoff(struct bpf_arena *arena, long uaddr)
+{
+ return (u32)(uaddr - (u32)arena->user_vm_start) >> PAGE_SHIFT;
+}
+
+static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
+{
+ struct vm_struct *kern_vm;
+ int numa_node = bpf_map_attr_numa_node(attr);
+ struct bpf_arena *arena;
+ u64 vm_range;
+ int err = -ENOMEM;
+
+ if (attr->key_size || attr->value_size || attr->max_entries == 0 ||
+ /* BPF_F_MMAPABLE must be set */
+ !(attr->map_flags & BPF_F_MMAPABLE) ||
+ /* No unsupported flags present */
+ (attr->map_flags & ~(BPF_F_SEGV_ON_FAULT | BPF_F_MMAPABLE | BPF_F_NO_USER_CONV)))
+ return ERR_PTR(-EINVAL);
+
+ if (attr->map_extra & ~PAGE_MASK)
+ /* If non-zero the map_extra is an expected user VMA start address */
+ return ERR_PTR(-EINVAL);
+
+ vm_range = (u64)attr->max_entries * PAGE_SIZE;
+ if (vm_range > (1ull << 32))
+ return ERR_PTR(-E2BIG);
+
+ if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32))
+ /* user vma must not cross 32-bit boundary */
+ return ERR_PTR(-ERANGE);
+
+ kern_vm = get_vm_area(KERN_VM_SZ, VM_SPARSE | VM_USERMAP);
+ if (!kern_vm)
+ return ERR_PTR(-ENOMEM);
+
+ arena = bpf_map_area_alloc(sizeof(*arena), numa_node);
+ if (!arena)
+ goto err;
+
+ arena->kern_vm = kern_vm;
+ arena->user_vm_start = attr->map_extra;
+ if (arena->user_vm_start)
+ arena->user_vm_end = arena->user_vm_start + vm_range;
+
+ INIT_LIST_HEAD(&arena->vma_list);
+ bpf_map_init_from_attr(&arena->map, attr);
+ mt_init_flags(&arena->mt, MT_FLAGS_ALLOC_RANGE);
+ mutex_init(&arena->lock);
+
+ return &arena->map;
+err:
+ free_vm_area(kern_vm);
+ return ERR_PTR(err);
+}
+
+static int existing_page_cb(pte_t *ptep, unsigned long addr, void *data)
+{
+ struct page *page;
+ pte_t pte;
+
+ pte = ptep_get(ptep);
+ if (!pte_present(pte)) /* sanity check */
+ return 0;
+ page = pte_page(pte);
+ /*
+ * We do not update pte here:
+ * 1. Nobody should be accessing bpf_arena's range outside of a kernel bug
+ * 2. TLB flushing is batched or deferred. Even if we clear pte,
+ * the TLB entries can stick around and continue to permit access to
+ * the freed page. So it all relies on 1.
+ */
+ __free_page(page);
+ return 0;
+}
+
+static void arena_map_free(struct bpf_map *map)
+{
+ struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+
+ /*
+ * Check that user vma-s are not around when bpf map is freed.
+ * mmap() holds vm_file which holds bpf_map refcnt.
+ * munmap() must have happened on vma followed by arena_vm_close()
+ * which would clear arena->vma_list.
+ */
+ if (WARN_ON_ONCE(!list_empty(&arena->vma_list)))
+ return;
+
+ /*
+ * free_vm_area() calls remove_vm_area() that calls free_unmap_vmap_area().
+ * It unmaps everything from vmalloc area and clears pgtables.
+ * Call apply_to_existing_page_range() first to find populated ptes and
+ * free those pages.
+ */
+ apply_to_existing_page_range(&init_mm, bpf_arena_get_kern_vm_start(arena),
+ KERN_VM_SZ - GUARD_SZ, existing_page_cb, NULL);
+ free_vm_area(arena->kern_vm);
+ mtree_destroy(&arena->mt);
+ bpf_map_area_free(arena);
+}
+
+static void *arena_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static long arena_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static int arena_map_check_btf(const struct bpf_map *map, const struct btf *btf,
+ const struct btf_type *key_type, const struct btf_type *value_type)
+{
+ return 0;
+}
+
+static u64 arena_map_mem_usage(const struct bpf_map *map)
+{
+ return 0;
+}
+
+struct vma_list {
+ struct vm_area_struct *vma;
+ struct list_head head;
+};
+
+static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
+{
+ struct vma_list *vml;
+
+ vml = kmalloc(sizeof(*vml), GFP_KERNEL);
+ if (!vml)
+ return -ENOMEM;
+ vma->vm_private_data = vml;
+ vml->vma = vma;
+ list_add(&vml->head, &arena->vma_list);
+ return 0;
+}
+
+static void arena_vm_close(struct vm_area_struct *vma)
+{
+ struct bpf_map *map = vma->vm_file->private_data;
+ struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+ struct vma_list *vml;
+
+ guard(mutex)(&arena->lock);
+ vml = vma->vm_private_data;
+ list_del(&vml->head);
+ vma->vm_private_data = NULL;
+ kfree(vml);
+}
+
+#define MT_ENTRY ((void *)&arena_map_ops) /* unused. has to be valid pointer */
+
+static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
+{
+ struct bpf_map *map = vmf->vma->vm_file->private_data;
+ struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+ struct page *page;
+ long kbase, kaddr;
+ int ret;
+
+ kbase = bpf_arena_get_kern_vm_start(arena);
+ kaddr = kbase + (u32)(vmf->address & PAGE_MASK);
+
+ guard(mutex)(&arena->lock);
+ page = vmalloc_to_page((void *)kaddr);
+ if (page)
+ /* already have a page vmap-ed */
+ goto out;
+
+ if (arena->map.map_flags & BPF_F_SEGV_ON_FAULT)
+ /* User space requested to segfault when page is not allocated by bpf prog */
+ return VM_FAULT_SIGSEGV;
+
+ ret = mtree_insert(&arena->mt, vmf->pgoff, MT_ENTRY, GFP_KERNEL);
+ if (ret)
+ return VM_FAULT_SIGSEGV;
+
+ /* Account into memcg of the process that created bpf_arena */
+ ret = bpf_map_alloc_pages(map, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, 1, &page);
+ if (ret) {
+ mtree_erase(&arena->mt, vmf->pgoff);
+ return VM_FAULT_SIGSEGV;
+ }
+
+ ret = vm_area_map_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE, &page);
+ if (ret) {
+ mtree_erase(&arena->mt, vmf->pgoff);
+ __free_page(page);
+ return VM_FAULT_SIGSEGV;
+ }
+out:
+ page_ref_add(page, 1);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct arena_vm_ops = {
+ .close = arena_vm_close,
+ .fault = arena_vm_fault,
+};
+
+static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct bpf_map *map = filp->private_data;
+ struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+ long ret;
+
+ if (pgoff)
+ return -EINVAL;
+ if (len > (1ull << 32))
+ return -E2BIG;
+
+ /* if user_vm_start was specified at arena creation time */
+ if (arena->user_vm_start) {
+ if (len > arena->user_vm_end - arena->user_vm_start)
+ return -E2BIG;
+ if (len != arena->user_vm_end - arena->user_vm_start)
+ return -EINVAL;
+ if (addr != arena->user_vm_start)
+ return -EINVAL;
+ }
+
+ ret = current->mm->get_unmapped_area(filp, addr, len * 2, 0, flags);
+ if (IS_ERR_VALUE(ret))
+ return ret;
+ if ((ret >> 32) == ((ret + len - 1) >> 32))
+ return ret;
+ if (WARN_ON_ONCE(arena->user_vm_start))
+ /* checks at map creation time should prevent this */
+ return -EFAULT;
+ return round_up(ret, 1ull << 32);
+}
+
+static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
+{
+ struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+
+ guard(mutex)(&arena->lock);
+ if (arena->user_vm_start && arena->user_vm_start != vma->vm_start)
+ /*
+ * If map_extra was not specified at arena creation time then
+ * 1st user process can do mmap(NULL, ...) to pick user_vm_start
+ * 2nd user process must pass the same addr to mmap(addr, MAP_FIXED..);
+ * or
+ * specify addr in map_extra and
+ * use the same addr later with mmap(addr, MAP_FIXED..);
+ */
+ return -EBUSY;
+
+ if (arena->user_vm_end && arena->user_vm_end != vma->vm_end)
+ /* all user processes must have the same size of mmap-ed region */
+ return -EBUSY;
+
+ /* Earlier checks should prevent this */
+ if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff))
+ return -EFAULT;
+
+ if (remember_vma(arena, vma))
+ return -ENOMEM;
+
+ arena->user_vm_start = vma->vm_start;
+ arena->user_vm_end = vma->vm_end;
+ /*
+ * bpf_map_mmap() checks that it's being mmaped as VM_SHARED and
+ * clears VM_MAYEXEC. Set VM_DONTEXPAND as well to avoid
+ * potential change of user_vm_start.
+ */
+ vm_flags_set(vma, VM_DONTEXPAND);
+ vma->vm_ops = &arena_vm_ops;
+ return 0;
+}
+
+static int arena_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 off)
+{
+ struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+
+ if ((u64)off > arena->user_vm_end - arena->user_vm_start)
+ return -ERANGE;
+ *imm = (unsigned long)arena->user_vm_start;
+ return 0;
+}
+
+BTF_ID_LIST_SINGLE(bpf_arena_map_btf_ids, struct, bpf_arena)
+const struct bpf_map_ops arena_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
+ .map_alloc = arena_map_alloc,
+ .map_free = arena_map_free,
+ .map_direct_value_addr = arena_map_direct_value_addr,
+ .map_mmap = arena_map_mmap,
+ .map_get_unmapped_area = arena_get_unmapped_area,
+ .map_get_next_key = arena_map_get_next_key,
+ .map_push_elem = arena_map_push_elem,
+ .map_peek_elem = arena_map_peek_elem,
+ .map_pop_elem = arena_map_pop_elem,
+ .map_lookup_elem = arena_map_lookup_elem,
+ .map_update_elem = arena_map_update_elem,
+ .map_delete_elem = arena_map_delete_elem,
+ .map_check_btf = arena_map_check_btf,
+ .map_mem_usage = arena_map_mem_usage,
+ .map_btf_id = &bpf_arena_map_btf_ids[0],
+};
+
+static u64 clear_lo32(u64 val)
+{
+ return val & ~(u64)~0U;
+}
+
+/*
+ * Allocate pages and vmap them into kernel vmalloc area.
+ * Later the pages will be mmaped into user space vma.
+ */
+static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt, int node_id)
+{
+ /* user_vm_end/start are fixed before bpf prog runs */
+ long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
+ u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
+ struct page **pages;
+ long pgoff = 0;
+ u32 uaddr32;
+ int ret, i;
+
+ if (page_cnt > page_cnt_max)
+ return 0;
+
+ if (uaddr) {
+ if (uaddr & ~PAGE_MASK)
+ return 0;
+ pgoff = compute_pgoff(arena, uaddr);
+ if (pgoff + page_cnt > page_cnt_max)
+ /* requested address will be outside of user VMA */
+ return 0;
+ }
+
+ /* zeroing is needed, since alloc_pages_bulk_array() only fills in non-zero entries */
+ pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ return 0;
+
+ guard(mutex)(&arena->lock);
+
+ if (uaddr)
+ ret = mtree_insert_range(&arena->mt, pgoff, pgoff + page_cnt - 1,
+ MT_ENTRY, GFP_KERNEL);
+ else
+ ret = mtree_alloc_range(&arena->mt, &pgoff, MT_ENTRY,
+ page_cnt, 0, page_cnt_max - 1, GFP_KERNEL);
+ if (ret)
+ goto out_free_pages;
+
+ ret = bpf_map_alloc_pages(&arena->map, GFP_KERNEL | __GFP_ZERO,
+ node_id, page_cnt, pages);
+ if (ret)
+ goto out;
+
+ uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
+ /* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */
+ ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32,
+ kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages);
+ if (ret) {
+ for (i = 0; i < page_cnt; i++)
+ __free_page(pages[i]);
+ goto out;
+ }
+ kvfree(pages);
+ return clear_lo32(arena->user_vm_start) + uaddr32;
+out:
+ mtree_erase(&arena->mt, pgoff);
+out_free_pages:
+ kvfree(pages);
+ return 0;
+}
+
+/*
+ * If page is present in vmalloc area, unmap it from vmalloc area,
+ * unmap it from all user space vma-s,
+ * and free it.
+ */
+static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
+{
+ struct vma_list *vml;
+
+ list_for_each_entry(vml, &arena->vma_list, head)
+ zap_page_range_single(vml->vma, uaddr,
+ PAGE_SIZE * page_cnt, NULL);
+}
+
+static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
+{
+ u64 full_uaddr, uaddr_end;
+ long kaddr, pgoff, i;
+ struct page *page;
+
+ /* only aligned lower 32-bit are relevant */
+ uaddr = (u32)uaddr;
+ uaddr &= PAGE_MASK;
+ full_uaddr = clear_lo32(arena->user_vm_start) + uaddr;
+ uaddr_end = min(arena->user_vm_end, full_uaddr + (page_cnt << PAGE_SHIFT));
+ if (full_uaddr >= uaddr_end)
+ return;
+
+ page_cnt = (uaddr_end - full_uaddr) >> PAGE_SHIFT;
+
+ guard(mutex)(&arena->lock);
+
+ pgoff = compute_pgoff(arena, uaddr);
+ /* clear range */
+ mtree_store_range(&arena->mt, pgoff, pgoff + page_cnt - 1, NULL, GFP_KERNEL);
+
+ if (page_cnt > 1)
+ /* bulk zap if multiple pages being freed */
+ zap_pages(arena, full_uaddr, page_cnt);
+
+ kaddr = bpf_arena_get_kern_vm_start(arena) + uaddr;
+ for (i = 0; i < page_cnt; i++, kaddr += PAGE_SIZE, full_uaddr += PAGE_SIZE) {
+ page = vmalloc_to_page((void *)kaddr);
+ if (!page)
+ continue;
+ if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */
+ zap_pages(arena, full_uaddr, 1);
+ vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE);
+ __free_page(page);
+ }
+}
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_cnt,
+ int node_id, u64 flags)
+{
+ struct bpf_map *map = p__map;
+ struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+
+ if (map->map_type != BPF_MAP_TYPE_ARENA || flags || !page_cnt)
+ return NULL;
+
+ return (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id);
+}
+
+__bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt)
+{
+ struct bpf_map *map = p__map;
+ struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+
+ if (map->map_type != BPF_MAP_TYPE_ARENA || !page_cnt || !ptr__ign)
+ return;
+ arena_free_pages(arena, (long)ptr__ign, page_cnt);
+}
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(arena_kfuncs)
+BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE)
+BTF_KFUNCS_END(arena_kfuncs)
+
+static const struct btf_kfunc_id_set common_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &arena_kfuncs,
+};
+
+static int __init kfunc_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
+}
+late_initcall(kfunc_init);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 0bdbbbeab155..13358675ff2e 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -82,7 +82,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
- bool bypass_spec_v1 = bpf_bypass_spec_v1();
+ bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL);
u64 array_size, mask64;
struct bpf_array *array;
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 0fae79164187..112581cf97e7 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -548,7 +548,7 @@ int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
return -ENOENT;
/* Only allow sleepable program for resched-able iterator */
- if (prog->aux->sleepable && !bpf_iter_target_support_resched(tinfo))
+ if (prog->sleepable && !bpf_iter_target_support_resched(tinfo))
return -EINVAL;
link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
@@ -697,7 +697,7 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
struct bpf_run_ctx run_ctx, *old_run_ctx;
int ret;
- if (prog->aux->sleepable) {
+ if (prog->sleepable) {
rcu_read_lock_trace();
migrate_disable();
might_fault();
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 146824cc9689..bdea1a459153 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -414,47 +414,21 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
bpf_selem_unlink_storage(selem, reuse_now);
}
-/* If cacheit_lockit is false, this lookup function is lockless */
-struct bpf_local_storage_data *
-bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
- struct bpf_local_storage_map *smap,
- bool cacheit_lockit)
+void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem)
{
- struct bpf_local_storage_data *sdata;
- struct bpf_local_storage_elem *selem;
-
- /* Fast path (cache hit) */
- sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
- bpf_rcu_lock_held());
- if (sdata && rcu_access_pointer(sdata->smap) == smap)
- return sdata;
-
- /* Slow path (cache miss) */
- hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
- rcu_read_lock_trace_held())
- if (rcu_access_pointer(SDATA(selem)->smap) == smap)
- break;
-
- if (!selem)
- return NULL;
-
- sdata = SDATA(selem);
- if (cacheit_lockit) {
- unsigned long flags;
-
- /* spinlock is needed to avoid racing with the
- * parallel delete. Otherwise, publishing an already
- * deleted sdata to the cache will become a use-after-free
- * problem in the next bpf_local_storage_lookup().
- */
- raw_spin_lock_irqsave(&local_storage->lock, flags);
- if (selem_linked_to_storage(selem))
- rcu_assign_pointer(local_storage->cache[smap->cache_idx],
- sdata);
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
- }
+ unsigned long flags;
- return sdata;
+ /* spinlock is needed to avoid racing with the
+ * parallel delete. Otherwise, publishing an already
+ * deleted sdata to the cache will become a use-after-free
+ * problem in the next bpf_local_storage_lookup().
+ */
+ raw_spin_lock_irqsave(&local_storage->lock, flags);
+ if (selem_linked_to_storage(selem))
+ rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem));
+ raw_spin_unlock_irqrestore(&local_storage->lock, flags);
}
static int check_flags(const struct bpf_local_storage_data *old_sdata,
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index e8e910395bf6..68240c3c6e7d 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -260,9 +260,15 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
BTF_SET_START(sleepable_lsm_hooks)
BTF_ID(func, bpf_lsm_bpf)
BTF_ID(func, bpf_lsm_bpf_map)
-BTF_ID(func, bpf_lsm_bpf_map_alloc_security)
-BTF_ID(func, bpf_lsm_bpf_map_free_security)
+BTF_ID(func, bpf_lsm_bpf_map_create)
+BTF_ID(func, bpf_lsm_bpf_map_free)
BTF_ID(func, bpf_lsm_bpf_prog)
+BTF_ID(func, bpf_lsm_bpf_prog_load)
+BTF_ID(func, bpf_lsm_bpf_prog_free)
+BTF_ID(func, bpf_lsm_bpf_token_create)
+BTF_ID(func, bpf_lsm_bpf_token_free)
+BTF_ID(func, bpf_lsm_bpf_token_cmd)
+BTF_ID(func, bpf_lsm_bpf_token_capable)
BTF_ID(func, bpf_lsm_bprm_check_security)
BTF_ID(func, bpf_lsm_bprm_committed_creds)
BTF_ID(func, bpf_lsm_bprm_committing_creds)
@@ -276,10 +282,6 @@ BTF_ID(func, bpf_lsm_file_lock)
BTF_ID(func, bpf_lsm_file_open)
BTF_ID(func, bpf_lsm_file_receive)
-#ifdef CONFIG_SECURITY_NETWORK
-BTF_ID(func, bpf_lsm_inet_conn_established)
-#endif /* CONFIG_SECURITY_NETWORK */
-
BTF_ID(func, bpf_lsm_inode_create)
BTF_ID(func, bpf_lsm_inode_free_security)
BTF_ID(func, bpf_lsm_inode_getattr)
@@ -330,6 +332,8 @@ BTF_ID(func, bpf_lsm_sb_umount)
BTF_ID(func, bpf_lsm_settime)
#ifdef CONFIG_SECURITY_NETWORK
+BTF_ID(func, bpf_lsm_inet_conn_established)
+
BTF_ID(func, bpf_lsm_socket_accept)
BTF_ID(func, bpf_lsm_socket_bind)
BTF_ID(func, bpf_lsm_socket_connect)
@@ -357,9 +361,8 @@ BTF_ID(func, bpf_lsm_userns_create)
BTF_SET_END(sleepable_lsm_hooks)
BTF_SET_START(untrusted_lsm_hooks)
-BTF_ID(func, bpf_lsm_bpf_map_free_security)
-BTF_ID(func, bpf_lsm_bpf_prog_alloc_security)
-BTF_ID(func, bpf_lsm_bpf_prog_free_security)
+BTF_ID(func, bpf_lsm_bpf_map_free)
+BTF_ID(func, bpf_lsm_bpf_prog_free)
BTF_ID(func, bpf_lsm_file_alloc_security)
BTF_ID(func, bpf_lsm_file_free_security)
#ifdef CONFIG_SECURITY_NETWORK
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 02068bd0e4d9..43356faaa057 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -13,26 +13,17 @@
#include <linux/btf_ids.h>
#include <linux/rcupdate_wait.h>
-enum bpf_struct_ops_state {
- BPF_STRUCT_OPS_STATE_INIT,
- BPF_STRUCT_OPS_STATE_INUSE,
- BPF_STRUCT_OPS_STATE_TOBEFREE,
- BPF_STRUCT_OPS_STATE_READY,
-};
-
-#define BPF_STRUCT_OPS_COMMON_VALUE \
- refcount_t refcnt; \
- enum bpf_struct_ops_state state
-
struct bpf_struct_ops_value {
- BPF_STRUCT_OPS_COMMON_VALUE;
+ struct bpf_struct_ops_common_value common;
char data[] ____cacheline_aligned_in_smp;
};
+#define MAX_TRAMP_IMAGE_PAGES 8
+
struct bpf_struct_ops_map {
struct bpf_map map;
struct rcu_head rcu;
- const struct bpf_struct_ops *st_ops;
+ const struct bpf_struct_ops_desc *st_ops_desc;
/* protect map_update */
struct mutex lock;
/* link has all the bpf_links that is populated
@@ -40,12 +31,14 @@ struct bpf_struct_ops_map {
* (in kvalue.data).
*/
struct bpf_link **links;
- /* image is a page that has all the trampolines
+ u32 links_cnt;
+ u32 image_pages_cnt;
+ /* image_pages is an array of pages that has all the trampolines
* that stores the func args before calling the bpf_prog.
- * A PAGE_SIZE "image" is enough to store all trampoline for
- * "links[]".
*/
- void *image;
+ void *image_pages[MAX_TRAMP_IMAGE_PAGES];
+ /* The owner moduler's btf. */
+ struct btf *btf;
/* uvalue->data stores the kernel struct
* (e.g. tcp_congestion_ops) that is more useful
* to userspace than the kvalue. For example,
@@ -70,35 +63,6 @@ static DEFINE_MUTEX(update_mutex);
#define VALUE_PREFIX "bpf_struct_ops_"
#define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
-/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is
- * the map's value exposed to the userspace and its btf-type-id is
- * stored at the map->btf_vmlinux_value_type_id.
- *
- */
-#define BPF_STRUCT_OPS_TYPE(_name) \
-extern struct bpf_struct_ops bpf_##_name; \
- \
-struct bpf_struct_ops_##_name { \
- BPF_STRUCT_OPS_COMMON_VALUE; \
- struct _name data ____cacheline_aligned_in_smp; \
-};
-#include "bpf_struct_ops_types.h"
-#undef BPF_STRUCT_OPS_TYPE
-
-enum {
-#define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name,
-#include "bpf_struct_ops_types.h"
-#undef BPF_STRUCT_OPS_TYPE
- __NR_BPF_STRUCT_OPS_TYPE,
-};
-
-static struct bpf_struct_ops * const bpf_struct_ops[] = {
-#define BPF_STRUCT_OPS_TYPE(_name) \
- [BPF_STRUCT_OPS_TYPE_##_name] = &bpf_##_name,
-#include "bpf_struct_ops_types.h"
-#undef BPF_STRUCT_OPS_TYPE
-};
-
const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = {
};
@@ -108,138 +72,355 @@ const struct bpf_prog_ops bpf_struct_ops_prog_ops = {
#endif
};
-static const struct btf_type *module_type;
+BTF_ID_LIST(st_ops_ids)
+BTF_ID(struct, module)
+BTF_ID(struct, bpf_struct_ops_common_value)
+
+enum {
+ IDX_MODULE_ID,
+ IDX_ST_OPS_COMMON_VALUE_ID,
+};
+
+extern struct btf *btf_vmlinux;
-void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log)
+static bool is_valid_value_type(struct btf *btf, s32 value_id,
+ const struct btf_type *type,
+ const char *value_name)
{
- s32 type_id, value_id, module_id;
+ const struct btf_type *common_value_type;
const struct btf_member *member;
- struct bpf_struct_ops *st_ops;
- const struct btf_type *t;
- char value_name[128];
- const char *mname;
- u32 i, j;
+ const struct btf_type *vt, *mt;
- /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */
-#define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name);
-#include "bpf_struct_ops_types.h"
-#undef BPF_STRUCT_OPS_TYPE
+ vt = btf_type_by_id(btf, value_id);
+ if (btf_vlen(vt) != 2) {
+ pr_warn("The number of %s's members should be 2, but we get %d\n",
+ value_name, btf_vlen(vt));
+ return false;
+ }
+ member = btf_type_member(vt);
+ mt = btf_type_by_id(btf, member->type);
+ common_value_type = btf_type_by_id(btf_vmlinux,
+ st_ops_ids[IDX_ST_OPS_COMMON_VALUE_ID]);
+ if (mt != common_value_type) {
+ pr_warn("The first member of %s should be bpf_struct_ops_common_value\n",
+ value_name);
+ return false;
+ }
+ member++;
+ mt = btf_type_by_id(btf, member->type);
+ if (mt != type) {
+ pr_warn("The second member of %s should be %s\n",
+ value_name, btf_name_by_offset(btf, type->name_off));
+ return false;
+ }
- module_id = btf_find_by_name_kind(btf, "module", BTF_KIND_STRUCT);
- if (module_id < 0) {
- pr_warn("Cannot find struct module in btf_vmlinux\n");
- return;
+ return true;
+}
+
+static void *bpf_struct_ops_image_alloc(void)
+{
+ void *image;
+ int err;
+
+ err = bpf_jit_charge_modmem(PAGE_SIZE);
+ if (err)
+ return ERR_PTR(err);
+ image = arch_alloc_bpf_trampoline(PAGE_SIZE);
+ if (!image) {
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
+ return ERR_PTR(-ENOMEM);
}
- module_type = btf_type_by_id(btf, module_id);
- for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
- st_ops = bpf_struct_ops[i];
+ return image;
+}
- if (strlen(st_ops->name) + VALUE_PREFIX_LEN >=
- sizeof(value_name)) {
- pr_warn("struct_ops name %s is too long\n",
- st_ops->name);
- continue;
- }
- sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name);
+void bpf_struct_ops_image_free(void *image)
+{
+ if (image) {
+ arch_free_bpf_trampoline(image, PAGE_SIZE);
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
+ }
+}
+
+#define MAYBE_NULL_SUFFIX "__nullable"
+#define MAX_STUB_NAME 128
- value_id = btf_find_by_name_kind(btf, value_name,
- BTF_KIND_STRUCT);
- if (value_id < 0) {
- pr_warn("Cannot find struct %s in btf_vmlinux\n",
- value_name);
+/* Return the type info of a stub function, if it exists.
+ *
+ * The name of a stub function is made up of the name of the struct_ops and
+ * the name of the function pointer member, separated by "__". For example,
+ * if the struct_ops type is named "foo_ops" and the function pointer
+ * member is named "bar", the stub function name would be "foo_ops__bar".
+ */
+static const struct btf_type *
+find_stub_func_proto(const struct btf *btf, const char *st_op_name,
+ const char *member_name)
+{
+ char stub_func_name[MAX_STUB_NAME];
+ const struct btf_type *func_type;
+ s32 btf_id;
+ int cp;
+
+ cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s",
+ st_op_name, member_name);
+ if (cp >= MAX_STUB_NAME) {
+ pr_warn("Stub function name too long\n");
+ return NULL;
+ }
+ btf_id = btf_find_by_name_kind(btf, stub_func_name, BTF_KIND_FUNC);
+ if (btf_id < 0)
+ return NULL;
+ func_type = btf_type_by_id(btf, btf_id);
+ if (!func_type)
+ return NULL;
+
+ return btf_type_by_id(btf, func_type->type); /* FUNC_PROTO */
+}
+
+/* Prepare argument info for every nullable argument of a member of a
+ * struct_ops type.
+ *
+ * Initialize a struct bpf_struct_ops_arg_info according to type info of
+ * the arguments of a stub function. (Check kCFI for more information about
+ * stub functions.)
+ *
+ * Each member in the struct_ops type has a struct bpf_struct_ops_arg_info
+ * to provide an array of struct bpf_ctx_arg_aux, which in turn provides
+ * the information that used by the verifier to check the arguments of the
+ * BPF struct_ops program assigned to the member. Here, we only care about
+ * the arguments that are marked as __nullable.
+ *
+ * The array of struct bpf_ctx_arg_aux is eventually assigned to
+ * prog->aux->ctx_arg_info of BPF struct_ops programs and passed to the
+ * verifier. (See check_struct_ops_btf_id())
+ *
+ * arg_info->info will be the list of struct bpf_ctx_arg_aux if success. If
+ * fails, it will be kept untouched.
+ */
+static int prepare_arg_info(struct btf *btf,
+ const char *st_ops_name,
+ const char *member_name,
+ const struct btf_type *func_proto,
+ struct bpf_struct_ops_arg_info *arg_info)
+{
+ const struct btf_type *stub_func_proto, *pointed_type;
+ const struct btf_param *stub_args, *args;
+ struct bpf_ctx_arg_aux *info, *info_buf;
+ u32 nargs, arg_no, info_cnt = 0;
+ u32 arg_btf_id;
+ int offset;
+
+ stub_func_proto = find_stub_func_proto(btf, st_ops_name, member_name);
+ if (!stub_func_proto)
+ return 0;
+
+ /* Check if the number of arguments of the stub function is the same
+ * as the number of arguments of the function pointer.
+ */
+ nargs = btf_type_vlen(func_proto);
+ if (nargs != btf_type_vlen(stub_func_proto)) {
+ pr_warn("the number of arguments of the stub function %s__%s does not match the number of arguments of the member %s of struct %s\n",
+ st_ops_name, member_name, member_name, st_ops_name);
+ return -EINVAL;
+ }
+
+ if (!nargs)
+ return 0;
+
+ args = btf_params(func_proto);
+ stub_args = btf_params(stub_func_proto);
+
+ info_buf = kcalloc(nargs, sizeof(*info_buf), GFP_KERNEL);
+ if (!info_buf)
+ return -ENOMEM;
+
+ /* Prepare info for every nullable argument */
+ info = info_buf;
+ for (arg_no = 0; arg_no < nargs; arg_no++) {
+ /* Skip arguments that is not suffixed with
+ * "__nullable".
+ */
+ if (!btf_param_match_suffix(btf, &stub_args[arg_no],
+ MAYBE_NULL_SUFFIX))
continue;
+
+ /* Should be a pointer to struct */
+ pointed_type = btf_type_resolve_ptr(btf,
+ args[arg_no].type,
+ &arg_btf_id);
+ if (!pointed_type ||
+ !btf_type_is_struct(pointed_type)) {
+ pr_warn("stub function %s__%s has %s tagging to an unsupported type\n",
+ st_ops_name, member_name, MAYBE_NULL_SUFFIX);
+ goto err_out;
}
- type_id = btf_find_by_name_kind(btf, st_ops->name,
- BTF_KIND_STRUCT);
- if (type_id < 0) {
- pr_warn("Cannot find struct %s in btf_vmlinux\n",
- st_ops->name);
- continue;
+ offset = btf_ctx_arg_offset(btf, func_proto, arg_no);
+ if (offset < 0) {
+ pr_warn("stub function %s__%s has an invalid trampoline ctx offset for arg#%u\n",
+ st_ops_name, member_name, arg_no);
+ goto err_out;
}
- t = btf_type_by_id(btf, type_id);
- if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) {
- pr_warn("Cannot support #%u members in struct %s\n",
- btf_type_vlen(t), st_ops->name);
- continue;
+
+ if (args[arg_no].type != stub_args[arg_no].type) {
+ pr_warn("arg#%u type in stub function %s__%s does not match with its original func_proto\n",
+ arg_no, st_ops_name, member_name);
+ goto err_out;
}
- for_each_member(j, t, member) {
- const struct btf_type *func_proto;
+ /* Fill the information of the new argument */
+ info->reg_type =
+ PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL;
+ info->btf_id = arg_btf_id;
+ info->btf = btf;
+ info->offset = offset;
- mname = btf_name_by_offset(btf, member->name_off);
- if (!*mname) {
- pr_warn("anon member in struct %s is not supported\n",
- st_ops->name);
- break;
- }
+ info++;
+ info_cnt++;
+ }
- if (__btf_member_bitfield_size(t, member)) {
- pr_warn("bit field member %s in struct %s is not supported\n",
- mname, st_ops->name);
- break;
- }
+ if (info_cnt) {
+ arg_info->info = info_buf;
+ arg_info->cnt = info_cnt;
+ } else {
+ kfree(info_buf);
+ }
- func_proto = btf_type_resolve_func_ptr(btf,
- member->type,
- NULL);
- if (func_proto &&
- btf_distill_func_proto(log, btf,
- func_proto, mname,
- &st_ops->func_models[j])) {
- pr_warn("Error in parsing func ptr %s in struct %s\n",
- mname, st_ops->name);
- break;
- }
- }
+ return 0;
- if (j == btf_type_vlen(t)) {
- if (st_ops->init(btf)) {
- pr_warn("Error in init bpf_struct_ops %s\n",
- st_ops->name);
- } else {
- st_ops->type_id = type_id;
- st_ops->type = t;
- st_ops->value_id = value_id;
- st_ops->value_type = btf_type_by_id(btf,
- value_id);
- }
- }
- }
+err_out:
+ kfree(info_buf);
+
+ return -EINVAL;
}
-extern struct btf *btf_vmlinux;
+/* Clean up the arg_info in a struct bpf_struct_ops_desc. */
+void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc)
+{
+ struct bpf_struct_ops_arg_info *arg_info;
+ int i;
-static const struct bpf_struct_ops *
-bpf_struct_ops_find_value(u32 value_id)
+ arg_info = st_ops_desc->arg_info;
+ for (i = 0; i < btf_type_vlen(st_ops_desc->type); i++)
+ kfree(arg_info[i].info);
+
+ kfree(arg_info);
+}
+
+int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
+ struct btf *btf,
+ struct bpf_verifier_log *log)
{
- unsigned int i;
+ struct bpf_struct_ops *st_ops = st_ops_desc->st_ops;
+ struct bpf_struct_ops_arg_info *arg_info;
+ const struct btf_member *member;
+ const struct btf_type *t;
+ s32 type_id, value_id;
+ char value_name[128];
+ const char *mname;
+ int i, err;
- if (!value_id || !btf_vmlinux)
- return NULL;
+ if (strlen(st_ops->name) + VALUE_PREFIX_LEN >=
+ sizeof(value_name)) {
+ pr_warn("struct_ops name %s is too long\n",
+ st_ops->name);
+ return -EINVAL;
+ }
+ sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name);
- for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
- if (bpf_struct_ops[i]->value_id == value_id)
- return bpf_struct_ops[i];
+ if (!st_ops->cfi_stubs) {
+ pr_warn("struct_ops for %s has no cfi_stubs\n", st_ops->name);
+ return -EINVAL;
}
- return NULL;
-}
+ type_id = btf_find_by_name_kind(btf, st_ops->name,
+ BTF_KIND_STRUCT);
+ if (type_id < 0) {
+ pr_warn("Cannot find struct %s in %s\n",
+ st_ops->name, btf_get_name(btf));
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, type_id);
+ if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) {
+ pr_warn("Cannot support #%u members in struct %s\n",
+ btf_type_vlen(t), st_ops->name);
+ return -EINVAL;
+ }
-const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
-{
- unsigned int i;
+ value_id = btf_find_by_name_kind(btf, value_name,
+ BTF_KIND_STRUCT);
+ if (value_id < 0) {
+ pr_warn("Cannot find struct %s in %s\n",
+ value_name, btf_get_name(btf));
+ return -EINVAL;
+ }
+ if (!is_valid_value_type(btf, value_id, t, value_name))
+ return -EINVAL;
- if (!type_id || !btf_vmlinux)
- return NULL;
+ arg_info = kcalloc(btf_type_vlen(t), sizeof(*arg_info),
+ GFP_KERNEL);
+ if (!arg_info)
+ return -ENOMEM;
+
+ st_ops_desc->arg_info = arg_info;
+ st_ops_desc->type = t;
+ st_ops_desc->type_id = type_id;
+ st_ops_desc->value_id = value_id;
+ st_ops_desc->value_type = btf_type_by_id(btf, value_id);
+
+ for_each_member(i, t, member) {
+ const struct btf_type *func_proto;
- for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
- if (bpf_struct_ops[i]->type_id == type_id)
- return bpf_struct_ops[i];
+ mname = btf_name_by_offset(btf, member->name_off);
+ if (!*mname) {
+ pr_warn("anon member in struct %s is not supported\n",
+ st_ops->name);
+ err = -EOPNOTSUPP;
+ goto errout;
+ }
+
+ if (__btf_member_bitfield_size(t, member)) {
+ pr_warn("bit field member %s in struct %s is not supported\n",
+ mname, st_ops->name);
+ err = -EOPNOTSUPP;
+ goto errout;
+ }
+
+ func_proto = btf_type_resolve_func_ptr(btf,
+ member->type,
+ NULL);
+ if (!func_proto)
+ continue;
+
+ if (btf_distill_func_proto(log, btf,
+ func_proto, mname,
+ &st_ops->func_models[i])) {
+ pr_warn("Error in parsing func ptr %s in struct %s\n",
+ mname, st_ops->name);
+ err = -EINVAL;
+ goto errout;
+ }
+
+ err = prepare_arg_info(btf, st_ops->name, mname,
+ func_proto,
+ arg_info + i);
+ if (err)
+ goto errout;
}
- return NULL;
+ if (st_ops->init(btf)) {
+ pr_warn("Error in init bpf_struct_ops %s\n",
+ st_ops->name);
+ err = -EINVAL;
+ goto errout;
+ }
+
+ return 0;
+
+errout:
+ bpf_struct_ops_desc_release(st_ops_desc);
+
+ return err;
}
static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key,
@@ -265,7 +446,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
kvalue = &st_map->kvalue;
/* Pair with smp_store_release() during map_update */
- state = smp_load_acquire(&kvalue->state);
+ state = smp_load_acquire(&kvalue->common.state);
if (state == BPF_STRUCT_OPS_STATE_INIT) {
memset(value, 0, map->value_size);
return 0;
@@ -276,7 +457,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
*/
uvalue = value;
memcpy(uvalue, st_map->uvalue, map->value_size);
- uvalue->state = state;
+ uvalue->common.state = state;
/* This value offers the user space a general estimate of how
* many sockets are still utilizing this struct_ops for TCP
@@ -284,7 +465,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
* should sufficiently meet our present goals.
*/
refcnt = atomic64_read(&map->refcnt) - atomic64_read(&map->usercnt);
- refcount_set(&uvalue->refcnt, max_t(s64, refcnt, 0));
+ refcount_set(&uvalue->common.refcnt, max_t(s64, refcnt, 0));
return 0;
}
@@ -296,10 +477,9 @@ static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key)
static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
{
- const struct btf_type *t = st_map->st_ops->type;
u32 i;
- for (i = 0; i < btf_type_vlen(t); i++) {
+ for (i = 0; i < st_map->links_cnt; i++) {
if (st_map->links[i]) {
bpf_link_put(st_map->links[i]);
st_map->links[i] = NULL;
@@ -307,7 +487,16 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
}
}
-static int check_zero_holes(const struct btf_type *t, void *data)
+static void bpf_struct_ops_map_free_image(struct bpf_struct_ops_map *st_map)
+{
+ int i;
+
+ for (i = 0; i < st_map->image_pages_cnt; i++)
+ bpf_struct_ops_image_free(st_map->image_pages[i]);
+ st_map->image_pages_cnt = 0;
+}
+
+static int check_zero_holes(const struct btf *btf, const struct btf_type *t, void *data)
{
const struct btf_member *member;
u32 i, moff, msize, prev_mend = 0;
@@ -319,8 +508,8 @@ static int check_zero_holes(const struct btf_type *t, void *data)
memchr_inv(data + prev_mend, 0, moff - prev_mend))
return -EINVAL;
- mtype = btf_type_by_id(btf_vmlinux, member->type);
- mtype = btf_resolve_size(btf_vmlinux, mtype, &msize);
+ mtype = btf_type_by_id(btf, member->type);
+ mtype = btf_resolve_size(btf, mtype, &msize);
if (IS_ERR(mtype))
return PTR_ERR(mtype);
prev_mend = moff + msize;
@@ -352,9 +541,12 @@ const struct bpf_link_ops bpf_struct_ops_link_lops = {
int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
struct bpf_tramp_link *link,
const struct btf_func_model *model,
- void *stub_func, void *image, void *image_end)
+ void *stub_func,
+ void **_image, u32 *_image_off,
+ bool allow_alloc)
{
- u32 flags = BPF_TRAMP_F_INDIRECT;
+ u32 image_off = *_image_off, flags = BPF_TRAMP_F_INDIRECT;
+ void *image = *_image;
int size;
tlinks[BPF_TRAMP_FENTRY].links[0] = link;
@@ -364,27 +556,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
flags |= BPF_TRAMP_F_RET_FENTRY_RET;
size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
- if (size < 0)
- return size;
- if (size > (unsigned long)image_end - (unsigned long)image)
- return -E2BIG;
- return arch_prepare_bpf_trampoline(NULL, image, image_end,
+ if (size <= 0)
+ return size ? : -EFAULT;
+
+ /* Allocate image buffer if necessary */
+ if (!image || size > PAGE_SIZE - image_off) {
+ if (!allow_alloc)
+ return -E2BIG;
+
+ image = bpf_struct_ops_image_alloc();
+ if (IS_ERR(image))
+ return PTR_ERR(image);
+ image_off = 0;
+ }
+
+ size = arch_prepare_bpf_trampoline(NULL, image + image_off,
+ image + PAGE_SIZE,
model, flags, tlinks, stub_func);
+ if (size <= 0) {
+ if (image != *_image)
+ bpf_struct_ops_image_free(image);
+ return size ? : -EFAULT;
+ }
+
+ *_image = image;
+ *_image_off = image_off + size;
+ return 0;
}
static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
- const struct bpf_struct_ops *st_ops = st_map->st_ops;
+ const struct bpf_struct_ops_desc *st_ops_desc = st_map->st_ops_desc;
+ const struct bpf_struct_ops *st_ops = st_ops_desc->st_ops;
struct bpf_struct_ops_value *uvalue, *kvalue;
+ const struct btf_type *module_type;
const struct btf_member *member;
- const struct btf_type *t = st_ops->type;
+ const struct btf_type *t = st_ops_desc->type;
struct bpf_tramp_links *tlinks;
void *udata, *kdata;
int prog_fd, err;
- void *image, *image_end;
- u32 i;
+ u32 i, trampoline_start, image_off = 0;
+ void *cur_image = NULL, *image = NULL;
if (flags)
return -EINVAL;
@@ -392,16 +606,16 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (*(u32 *)key != 0)
return -E2BIG;
- err = check_zero_holes(st_ops->value_type, value);
+ err = check_zero_holes(st_map->btf, st_ops_desc->value_type, value);
if (err)
return err;
uvalue = value;
- err = check_zero_holes(t, uvalue->data);
+ err = check_zero_holes(st_map->btf, t, uvalue->data);
if (err)
return err;
- if (uvalue->state || refcount_read(&uvalue->refcnt))
+ if (uvalue->common.state || refcount_read(&uvalue->common.refcnt))
return -EINVAL;
tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
@@ -413,7 +627,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
mutex_lock(&st_map->lock);
- if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT) {
+ if (kvalue->common.state != BPF_STRUCT_OPS_STATE_INIT) {
err = -EBUSY;
goto unlock;
}
@@ -422,9 +636,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
udata = &uvalue->data;
kdata = &kvalue->data;
- image = st_map->image;
- image_end = st_map->image + PAGE_SIZE;
+ module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
for_each_member(i, t, member) {
const struct btf_type *mtype, *ptype;
struct bpf_prog *prog;
@@ -432,7 +645,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
u32 moff;
moff = __btf_member_bit_offset(t, member) / 8;
- ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
+ ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
if (ptype == module_type) {
if (*(void **)(udata + moff))
goto reset_unlock;
@@ -457,8 +670,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (!ptype || !btf_type_is_func_proto(ptype)) {
u32 msize;
- mtype = btf_type_by_id(btf_vmlinux, member->type);
- mtype = btf_resolve_size(btf_vmlinux, mtype, &msize);
+ mtype = btf_type_by_id(st_map->btf, member->type);
+ mtype = btf_resolve_size(st_map->btf, mtype, &msize);
if (IS_ERR(mtype)) {
err = PTR_ERR(mtype);
goto reset_unlock;
@@ -484,7 +697,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
}
if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
- prog->aux->attach_btf_id != st_ops->type_id ||
+ prog->aux->attach_btf_id != st_ops_desc->type_id ||
prog->expected_attach_type != i) {
bpf_prog_put(prog);
err = -EINVAL;
@@ -501,37 +714,47 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
&bpf_struct_ops_link_lops, prog);
st_map->links[i] = &link->link;
+ trampoline_start = image_off;
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
- &st_ops->func_models[i],
- *(void **)(st_ops->cfi_stubs + moff),
- image, image_end);
+ &st_ops->func_models[i],
+ *(void **)(st_ops->cfi_stubs + moff),
+ &image, &image_off,
+ st_map->image_pages_cnt < MAX_TRAMP_IMAGE_PAGES);
+ if (err)
+ goto reset_unlock;
+
+ if (cur_image != image) {
+ st_map->image_pages[st_map->image_pages_cnt++] = image;
+ cur_image = image;
+ trampoline_start = 0;
+ }
if (err < 0)
goto reset_unlock;
- *(void **)(kdata + moff) = image + cfi_get_offset();
- image += err;
+ *(void **)(kdata + moff) = image + trampoline_start + cfi_get_offset();
/* put prog_id to udata */
*(unsigned long *)(udata + moff) = prog->aux->id;
}
+ if (st_ops->validate) {
+ err = st_ops->validate(kdata);
+ if (err)
+ goto reset_unlock;
+ }
+ for (i = 0; i < st_map->image_pages_cnt; i++)
+ arch_protect_bpf_trampoline(st_map->image_pages[i], PAGE_SIZE);
+
if (st_map->map.map_flags & BPF_F_LINK) {
err = 0;
- if (st_ops->validate) {
- err = st_ops->validate(kdata);
- if (err)
- goto reset_unlock;
- }
- arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
/* Let bpf_link handle registration & unregistration.
*
* Pair with smp_load_acquire() during lookup_elem().
*/
- smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_READY);
+ smp_store_release(&kvalue->common.state, BPF_STRUCT_OPS_STATE_READY);
goto unlock;
}
- arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
err = st_ops->reg(kdata);
if (likely(!err)) {
/* This refcnt increment on the map here after
@@ -545,7 +768,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
* It ensures the above udata updates (e.g. prog->aux->id)
* can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
*/
- smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE);
+ smp_store_release(&kvalue->common.state, BPF_STRUCT_OPS_STATE_INUSE);
goto unlock;
}
@@ -554,9 +777,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
* there was a race in registering the struct_ops (under the same name) to
* a sub-system through different struct_ops's maps.
*/
- arch_unprotect_bpf_trampoline(st_map->image, PAGE_SIZE);
reset_unlock:
+ bpf_struct_ops_map_free_image(st_map);
bpf_struct_ops_map_put_progs(st_map);
memset(uvalue, 0, map->value_size);
memset(kvalue, 0, map->value_size);
@@ -575,12 +798,12 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
if (st_map->map.map_flags & BPF_F_LINK)
return -EOPNOTSUPP;
- prev_state = cmpxchg(&st_map->kvalue.state,
+ prev_state = cmpxchg(&st_map->kvalue.common.state,
BPF_STRUCT_OPS_STATE_INUSE,
BPF_STRUCT_OPS_STATE_TOBEFREE);
switch (prev_state) {
case BPF_STRUCT_OPS_STATE_INUSE:
- st_map->st_ops->unreg(&st_map->kvalue.data);
+ st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
bpf_map_put(map);
return 0;
case BPF_STRUCT_OPS_STATE_TOBEFREE:
@@ -597,6 +820,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
struct seq_file *m)
{
+ struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
void *value;
int err;
@@ -606,7 +830,8 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
if (!err) {
- btf_type_seq_show(btf_vmlinux, map->btf_vmlinux_value_type_id,
+ btf_type_seq_show(st_map->btf,
+ map->btf_vmlinux_value_type_id,
value, m);
seq_puts(m, "\n");
}
@@ -621,16 +846,22 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
if (st_map->links)
bpf_struct_ops_map_put_progs(st_map);
bpf_map_area_free(st_map->links);
- if (st_map->image) {
- arch_free_bpf_trampoline(st_map->image, PAGE_SIZE);
- bpf_jit_uncharge_modmem(PAGE_SIZE);
- }
+ bpf_struct_ops_map_free_image(st_map);
bpf_map_area_free(st_map->uvalue);
bpf_map_area_free(st_map);
}
static void bpf_struct_ops_map_free(struct bpf_map *map)
{
+ struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
+
+ /* st_ops->owner was acquired during map_alloc to implicitly holds
+ * the btf's refcnt. The acquire was only done when btf_is_module()
+ * st_map->btf cannot be NULL here.
+ */
+ if (btf_is_module(st_map->btf))
+ module_put(st_map->st_ops_desc->st_ops->owner);
+
/* The struct_ops's function may switch to another struct_ops.
*
* For example, bpf_tcp_cc_x->init() may switch to
@@ -654,29 +885,61 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
{
if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
- (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id)
+ (attr->map_flags & ~(BPF_F_LINK | BPF_F_VTYPE_BTF_OBJ_FD)) ||
+ !attr->btf_vmlinux_value_type_id)
return -EINVAL;
return 0;
}
static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
{
- const struct bpf_struct_ops *st_ops;
+ const struct bpf_struct_ops_desc *st_ops_desc;
size_t st_map_size;
struct bpf_struct_ops_map *st_map;
const struct btf_type *t, *vt;
+ struct module *mod = NULL;
struct bpf_map *map;
+ struct btf *btf;
int ret;
- st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
- if (!st_ops)
- return ERR_PTR(-ENOTSUPP);
+ if (attr->map_flags & BPF_F_VTYPE_BTF_OBJ_FD) {
+ /* The map holds btf for its whole life time. */
+ btf = btf_get_by_fd(attr->value_type_btf_obj_fd);
+ if (IS_ERR(btf))
+ return ERR_CAST(btf);
+ if (!btf_is_module(btf)) {
+ btf_put(btf);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mod = btf_try_get_module(btf);
+ /* mod holds a refcnt to btf. We don't need an extra refcnt
+ * here.
+ */
+ btf_put(btf);
+ if (!mod)
+ return ERR_PTR(-EINVAL);
+ } else {
+ btf = bpf_get_btf_vmlinux();
+ if (IS_ERR(btf))
+ return ERR_CAST(btf);
+ if (!btf)
+ return ERR_PTR(-ENOTSUPP);
+ }
+
+ st_ops_desc = bpf_struct_ops_find_value(btf, attr->btf_vmlinux_value_type_id);
+ if (!st_ops_desc) {
+ ret = -ENOTSUPP;
+ goto errout;
+ }
- vt = st_ops->value_type;
- if (attr->value_size != vt->size)
- return ERR_PTR(-EINVAL);
+ vt = st_ops_desc->value_type;
+ if (attr->value_size != vt->size) {
+ ret = -EINVAL;
+ goto errout;
+ }
- t = st_ops->type;
+ t = st_ops_desc->type;
st_map_size = sizeof(*st_map) +
/* kvalue stores the
@@ -685,48 +948,43 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
(vt->size - sizeof(struct bpf_struct_ops_value));
st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
- if (!st_map)
- return ERR_PTR(-ENOMEM);
+ if (!st_map) {
+ ret = -ENOMEM;
+ goto errout;
+ }
- st_map->st_ops = st_ops;
+ st_map->st_ops_desc = st_ops_desc;
map = &st_map->map;
- ret = bpf_jit_charge_modmem(PAGE_SIZE);
- if (ret) {
- __bpf_struct_ops_map_free(map);
- return ERR_PTR(ret);
- }
-
- st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE);
- if (!st_map->image) {
- /* __bpf_struct_ops_map_free() uses st_map->image as flag
- * for "charged or not". In this case, we need to unchange
- * here.
- */
- bpf_jit_uncharge_modmem(PAGE_SIZE);
- __bpf_struct_ops_map_free(map);
- return ERR_PTR(-ENOMEM);
- }
st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
+ st_map->links_cnt = btf_type_vlen(t);
st_map->links =
- bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *),
+ bpf_map_area_alloc(st_map->links_cnt * sizeof(struct bpf_links *),
NUMA_NO_NODE);
if (!st_map->uvalue || !st_map->links) {
- __bpf_struct_ops_map_free(map);
- return ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
+ goto errout_free;
}
+ st_map->btf = btf;
mutex_init(&st_map->lock);
bpf_map_init_from_attr(map, attr);
return map;
+
+errout_free:
+ __bpf_struct_ops_map_free(map);
+errout:
+ module_put(mod);
+
+ return ERR_PTR(ret);
}
static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
- const struct bpf_struct_ops *st_ops = st_map->st_ops;
- const struct btf_type *vt = st_ops->value_type;
+ const struct bpf_struct_ops_desc *st_ops_desc = st_map->st_ops_desc;
+ const struct btf_type *vt = st_ops_desc->value_type;
u64 usage;
usage = sizeof(*st_map) +
@@ -785,7 +1043,7 @@ static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
return map->map_type == BPF_MAP_TYPE_STRUCT_OPS &&
map->map_flags & BPF_F_LINK &&
/* Pair with smp_store_release() during map_update */
- smp_load_acquire(&st_map->kvalue.state) == BPF_STRUCT_OPS_STATE_READY;
+ smp_load_acquire(&st_map->kvalue.common.state) == BPF_STRUCT_OPS_STATE_READY;
}
static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
@@ -800,7 +1058,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
/* st_link->map can be NULL if
* bpf_struct_ops_link_create() fails to register.
*/
- st_map->st_ops->unreg(&st_map->kvalue.data);
+ st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
bpf_map_put(&st_map->map);
}
kfree(st_link);
@@ -847,7 +1105,7 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
if (!bpf_struct_ops_valid_to_reg(new_map))
return -EINVAL;
- if (!st_map->st_ops->update)
+ if (!st_map->st_ops_desc->st_ops->update)
return -EOPNOTSUPP;
mutex_lock(&update_mutex);
@@ -860,12 +1118,12 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
old_st_map = container_of(old_map, struct bpf_struct_ops_map, map);
/* The new and old struct_ops must be the same type. */
- if (st_map->st_ops != old_st_map->st_ops) {
+ if (st_map->st_ops_desc != old_st_map->st_ops_desc) {
err = -EINVAL;
goto err_out;
}
- err = st_map->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data);
+ err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data);
if (err)
goto err_out;
@@ -916,7 +1174,7 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
if (err)
goto err_out;
- err = st_map->st_ops->reg(st_map->kvalue.data);
+ err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data);
if (err) {
bpf_link_cleanup(&link_primer);
link = NULL;
@@ -931,3 +1189,10 @@ err_out:
kfree(link);
return err;
}
+
+void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map)
+{
+ struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
+
+ info->btf_vmlinux_id = btf_obj_id(st_map->btf);
+}
diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
deleted file mode 100644
index 5678a9ddf817..000000000000
--- a/kernel/bpf/bpf_struct_ops_types.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* internal file - do not include directly */
-
-#ifdef CONFIG_BPF_JIT
-#ifdef CONFIG_NET
-BPF_STRUCT_OPS_TYPE(bpf_dummy_ops)
-#endif
-#ifdef CONFIG_INET
-#include <net/tcp.h>
-BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
-#endif
-#endif
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 596471189176..90c4a32d89ff 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -19,6 +19,7 @@
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
+#include <linux/bpf.h>
#include <linux/bpf_lsm.h>
#include <linux/skmsg.h>
#include <linux/perf_event.h>
@@ -241,6 +242,12 @@ struct btf_id_dtor_kfunc_tab {
struct btf_id_dtor_kfunc dtors[];
};
+struct btf_struct_ops_tab {
+ u32 cnt;
+ u32 capacity;
+ struct bpf_struct_ops_desc ops[];
+};
+
struct btf {
void *data;
struct btf_type **types;
@@ -258,6 +265,7 @@ struct btf {
struct btf_kfunc_set_tab *kfunc_set_tab;
struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
struct btf_struct_metas *struct_meta_tab;
+ struct btf_struct_ops_tab *struct_ops_tab;
/* split BTF support */
struct btf *base_btf;
@@ -801,9 +809,23 @@ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
return __btf_name_valid(btf, offset);
}
+/* Allow any printable character in DATASEC names */
static bool btf_name_valid_section(const struct btf *btf, u32 offset)
{
- return __btf_name_valid(btf, offset);
+ /* offset must be valid */
+ const char *src = btf_str_by_offset(btf, offset);
+ const char *src_limit;
+
+ /* set a limit on identifier length */
+ src_limit = src + KSYM_NAME_LEN;
+ src++;
+ while (*src && src < src_limit) {
+ if (!isprint(*src))
+ return false;
+ src++;
+ }
+
+ return !*src;
}
static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
@@ -1688,11 +1710,27 @@ static void btf_free_struct_meta_tab(struct btf *btf)
btf->struct_meta_tab = NULL;
}
+static void btf_free_struct_ops_tab(struct btf *btf)
+{
+ struct btf_struct_ops_tab *tab = btf->struct_ops_tab;
+ u32 i;
+
+ if (!tab)
+ return;
+
+ for (i = 0; i < tab->cnt; i++)
+ bpf_struct_ops_desc_release(&tab->ops[i]);
+
+ kfree(tab);
+ btf->struct_ops_tab = NULL;
+}
+
static void btf_free(struct btf *btf)
{
btf_free_struct_meta_tab(btf);
btf_free_dtor_kfunc_tab(btf);
btf_free_kfunc_set_tab(btf);
+ btf_free_struct_ops_tab(btf);
kvfree(btf->types);
kvfree(btf->resolved_sizes);
kvfree(btf->resolved_ids);
@@ -1707,6 +1745,11 @@ static void btf_free_rcu(struct rcu_head *rcu)
btf_free(btf);
}
+const char *btf_get_name(const struct btf *btf)
+{
+ return btf->name;
+}
+
void btf_get(struct btf *btf)
{
refcount_inc(&btf->refcnt);
@@ -3310,30 +3353,48 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
return BTF_FIELD_FOUND;
}
-const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
- int comp_idx, const char *tag_key)
+int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key, int last_id)
{
- const char *value = NULL;
- int i;
+ int len = strlen(tag_key);
+ int i, n;
- for (i = 1; i < btf_nr_types(btf); i++) {
+ for (i = last_id + 1, n = btf_nr_types(btf); i < n; i++) {
const struct btf_type *t = btf_type_by_id(btf, i);
- int len = strlen(tag_key);
if (!btf_type_is_decl_tag(t))
continue;
- if (pt != btf_type_by_id(btf, t->type) ||
- btf_type_decl_tag(t)->component_idx != comp_idx)
+ if (pt != btf_type_by_id(btf, t->type))
+ continue;
+ if (btf_type_decl_tag(t)->component_idx != comp_idx)
continue;
if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len))
continue;
- /* Prevent duplicate entries for same type */
- if (value)
- return ERR_PTR(-EEXIST);
- value = __btf_name_by_offset(btf, t->name_off) + len;
+ return i;
}
- if (!value)
- return ERR_PTR(-ENOENT);
+ return -ENOENT;
+}
+
+const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key)
+{
+ const char *value = NULL;
+ const struct btf_type *t;
+ int len, id;
+
+ id = btf_find_next_decl_tag(btf, pt, comp_idx, tag_key, 0);
+ if (id < 0)
+ return ERR_PTR(id);
+
+ t = btf_type_by_id(btf, id);
+ len = strlen(tag_key);
+ value = __btf_name_by_offset(btf, t->name_off) + len;
+
+ /* Prevent duplicate entries for same type */
+ id = btf_find_next_decl_tag(btf, pt, comp_idx, tag_key, id);
+ if (id >= 0)
+ return ERR_PTR(-EEXIST);
+
return value;
}
@@ -5647,15 +5708,29 @@ static int find_kern_ctx_type_id(enum bpf_prog_type prog_type)
return ctx_type->type;
}
-const struct btf_type *
-btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, enum bpf_prog_type prog_type,
- int arg)
+bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg)
{
const struct btf_type *ctx_type;
const char *tname, *ctx_tname;
t = btf_type_by_id(btf, t->type);
+
+ /* KPROBE programs allow bpf_user_pt_regs_t typedef, which we need to
+ * check before we skip all the typedef below.
+ */
+ if (prog_type == BPF_PROG_TYPE_KPROBE) {
+ while (btf_type_is_modifier(t) && !btf_type_is_typedef(t))
+ t = btf_type_by_id(btf, t->type);
+
+ if (btf_type_is_typedef(t)) {
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (tname && strcmp(tname, "bpf_user_pt_regs_t") == 0)
+ return true;
+ }
+ }
+
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
if (!btf_type_is_struct(t)) {
@@ -5664,27 +5739,30 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
* is not supported yet.
* BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
*/
- return NULL;
+ return false;
}
tname = btf_name_by_offset(btf, t->name_off);
if (!tname) {
bpf_log(log, "arg#%d struct doesn't have a name\n", arg);
- return NULL;
+ return false;
}
ctx_type = find_canonical_prog_ctx_type(prog_type);
if (!ctx_type) {
bpf_log(log, "btf_vmlinux is malformed\n");
/* should not happen */
- return NULL;
+ return false;
}
again:
ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off);
if (!ctx_tname) {
/* should not happen */
bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n");
- return NULL;
+ return false;
}
+ /* program types without named context types work only with arg:ctx tag */
+ if (ctx_tname[0] == '\0')
+ return false;
/* only compare that prog's ctx type name is the same as
* kernel expects. No need to compare field by field.
* It's ok for bpf prog to do:
@@ -5693,20 +5771,20 @@ again:
* { // no fields of skb are ever used }
*/
if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0)
- return ctx_type;
+ return true;
if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0)
- return ctx_type;
+ return true;
if (strcmp(ctx_tname, tname)) {
/* bpf_user_pt_regs_t is a typedef, so resolve it to
* underlying struct and check name again
*/
if (!btf_type_is_modifier(ctx_type))
- return NULL;
+ return false;
while (btf_type_is_modifier(ctx_type))
ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type);
goto again;
}
- return ctx_type;
+ return true;
}
/* forward declarations for arch-specific underlying types of
@@ -5858,7 +5936,7 @@ static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
enum bpf_prog_type prog_type,
int arg)
{
- if (!btf_get_prog_ctx_type(log, btf, t, prog_type, arg))
+ if (!btf_is_prog_ctx_type(log, btf, t, prog_type, arg))
return -ENOENT;
return find_kern_ctx_type_id(prog_type);
}
@@ -5933,8 +6011,6 @@ struct btf *btf_parse_vmlinux(void)
/* btf_parse_vmlinux() runs under bpf_verifier_lock */
bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
- bpf_struct_ops_init(btf, log);
-
refcount_set(&btf->refcnt, 1);
err = btf_alloc_id(btf);
@@ -6092,6 +6168,26 @@ static bool prog_args_trusted(const struct bpf_prog *prog)
}
}
+int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto,
+ u32 arg_no)
+{
+ const struct btf_param *args;
+ const struct btf_type *t;
+ int off = 0, i;
+ u32 sz;
+
+ args = btf_params(func_proto);
+ for (i = 0; i < arg_no; i++) {
+ t = btf_type_by_id(btf, args[i].type);
+ t = btf_resolve_size(btf, t, &sz);
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+ off += roundup(sz, 8);
+ }
+
+ return off;
+}
+
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
@@ -6228,7 +6324,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
info->reg_type = ctx_arg_info->reg_type;
- info->btf = btf_vmlinux;
+ info->btf = ctx_arg_info->btf ? : btf_vmlinux;
info->btf_id = ctx_arg_info->btf_id;
return true;
}
@@ -6284,6 +6380,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
__btf_name_by_offset(btf, t->name_off));
return true;
}
+EXPORT_SYMBOL_GPL(btf_ctx_access);
enum bpf_struct_walk_result {
/* < 0 error */
@@ -6946,6 +7043,81 @@ static bool btf_is_dynptr_ptr(const struct btf *btf, const struct btf_type *t)
return false;
}
+struct bpf_cand_cache {
+ const char *name;
+ u32 name_len;
+ u16 kind;
+ u16 cnt;
+ struct {
+ const struct btf *btf;
+ u32 id;
+ } cands[];
+};
+
+static DEFINE_MUTEX(cand_cache_mutex);
+
+static struct bpf_cand_cache *
+bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id);
+
+static int btf_get_ptr_to_btf_id(struct bpf_verifier_log *log, int arg_idx,
+ const struct btf *btf, const struct btf_type *t)
+{
+ struct bpf_cand_cache *cc;
+ struct bpf_core_ctx ctx = {
+ .btf = btf,
+ .log = log,
+ };
+ u32 kern_type_id, type_id;
+ int err = 0;
+
+ /* skip PTR and modifiers */
+ type_id = t->type;
+ t = btf_type_by_id(btf, t->type);
+ while (btf_type_is_modifier(t)) {
+ type_id = t->type;
+ t = btf_type_by_id(btf, t->type);
+ }
+
+ mutex_lock(&cand_cache_mutex);
+ cc = bpf_core_find_cands(&ctx, type_id);
+ if (IS_ERR(cc)) {
+ err = PTR_ERR(cc);
+ bpf_log(log, "arg#%d reference type('%s %s') candidate matching error: %d\n",
+ arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off),
+ err);
+ goto cand_cache_unlock;
+ }
+ if (cc->cnt != 1) {
+ bpf_log(log, "arg#%d reference type('%s %s') %s\n",
+ arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off),
+ cc->cnt == 0 ? "has no matches" : "is ambiguous");
+ err = cc->cnt == 0 ? -ENOENT : -ESRCH;
+ goto cand_cache_unlock;
+ }
+ if (btf_is_module(cc->cands[0].btf)) {
+ bpf_log(log, "arg#%d reference type('%s %s') points to kernel module type (unsupported)\n",
+ arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off));
+ err = -EOPNOTSUPP;
+ goto cand_cache_unlock;
+ }
+ kern_type_id = cc->cands[0].id;
+
+cand_cache_unlock:
+ mutex_unlock(&cand_cache_mutex);
+ if (err)
+ return err;
+
+ return kern_type_id;
+}
+
+enum btf_arg_tag {
+ ARG_TAG_CTX = BIT_ULL(0),
+ ARG_TAG_NONNULL = BIT_ULL(1),
+ ARG_TAG_TRUSTED = BIT_ULL(2),
+ ARG_TAG_NULLABLE = BIT_ULL(3),
+ ARG_TAG_ARENA = BIT_ULL(4),
+};
+
/* Process BTF of a function to produce high-level expectation of function
* arguments (like ARG_PTR_TO_CTX, or ARG_PTR_TO_MEM, etc). This information
* is cached in subprog info for reuse.
@@ -7009,6 +7181,8 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
args = (const struct btf_param *)(t + 1);
nargs = btf_type_vlen(t);
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
+ if (!is_global)
+ return -EINVAL;
bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n",
tname, nargs, MAX_BPF_FUNC_REG_ARGS);
return -EINVAL;
@@ -7018,6 +7192,8 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
if (!btf_type_is_int(t) && !btf_is_any_enum(t)) {
+ if (!is_global)
+ return -EINVAL;
bpf_log(log,
"Global function %s() doesn't return scalar. Only those are supported.\n",
tname);
@@ -7027,92 +7203,134 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
* Only PTR_TO_CTX and SCALAR are supported atm.
*/
for (i = 0; i < nargs; i++) {
- bool is_nonnull = false;
- const char *tag;
+ u32 tags = 0;
+ int id = 0;
- t = btf_type_by_id(btf, args[i].type);
-
- tag = btf_find_decl_tag_value(btf, fn_t, i, "arg:");
- if (IS_ERR(tag) && PTR_ERR(tag) == -ENOENT) {
- tag = NULL;
- } else if (IS_ERR(tag)) {
- bpf_log(log, "arg#%d type's tag fetching failure: %ld\n", i, PTR_ERR(tag));
- return PTR_ERR(tag);
- }
/* 'arg:<tag>' decl_tag takes precedence over derivation of
* register type from BTF type itself
*/
- if (tag) {
+ while ((id = btf_find_next_decl_tag(btf, fn_t, i, "arg:", id)) > 0) {
+ const struct btf_type *tag_t = btf_type_by_id(btf, id);
+ const char *tag = __btf_name_by_offset(btf, tag_t->name_off) + 4;
+
/* disallow arg tags in static subprogs */
if (!is_global) {
bpf_log(log, "arg#%d type tag is not supported in static functions\n", i);
return -EOPNOTSUPP;
}
+
if (strcmp(tag, "ctx") == 0) {
- sub->args[i].arg_type = ARG_PTR_TO_CTX;
- continue;
+ tags |= ARG_TAG_CTX;
+ } else if (strcmp(tag, "trusted") == 0) {
+ tags |= ARG_TAG_TRUSTED;
+ } else if (strcmp(tag, "nonnull") == 0) {
+ tags |= ARG_TAG_NONNULL;
+ } else if (strcmp(tag, "nullable") == 0) {
+ tags |= ARG_TAG_NULLABLE;
+ } else if (strcmp(tag, "arena") == 0) {
+ tags |= ARG_TAG_ARENA;
+ } else {
+ bpf_log(log, "arg#%d has unsupported set of tags\n", i);
+ return -EOPNOTSUPP;
}
- if (strcmp(tag, "nonnull") == 0)
- is_nonnull = true;
+ }
+ if (id != -ENOENT) {
+ bpf_log(log, "arg#%d type tag fetching failure: %d\n", i, id);
+ return id;
}
+ t = btf_type_by_id(btf, args[i].type);
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_int(t) || btf_is_any_enum(t)) {
- sub->args[i].arg_type = ARG_ANYTHING;
- continue;
- }
- if (btf_type_is_ptr(t) && btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
+ if (!btf_type_is_ptr(t))
+ goto skip_pointer;
+
+ if ((tags & ARG_TAG_CTX) || btf_is_prog_ctx_type(log, btf, t, prog_type, i)) {
+ if (tags & ~ARG_TAG_CTX) {
+ bpf_log(log, "arg#%d has invalid combination of tags\n", i);
+ return -EINVAL;
+ }
+ if ((tags & ARG_TAG_CTX) &&
+ btf_validate_prog_ctx_type(log, btf, t, i, prog_type,
+ prog->expected_attach_type))
+ return -EINVAL;
sub->args[i].arg_type = ARG_PTR_TO_CTX;
continue;
}
- if (btf_type_is_ptr(t) && btf_is_dynptr_ptr(btf, t)) {
+ if (btf_is_dynptr_ptr(btf, t)) {
+ if (tags) {
+ bpf_log(log, "arg#%d has invalid combination of tags\n", i);
+ return -EINVAL;
+ }
sub->args[i].arg_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY;
continue;
}
- if (is_global && btf_type_is_ptr(t)) {
+ if (tags & ARG_TAG_TRUSTED) {
+ int kern_type_id;
+
+ if (tags & ARG_TAG_NONNULL) {
+ bpf_log(log, "arg#%d has invalid combination of tags\n", i);
+ return -EINVAL;
+ }
+
+ kern_type_id = btf_get_ptr_to_btf_id(log, i, btf, t);
+ if (kern_type_id < 0)
+ return kern_type_id;
+
+ sub->args[i].arg_type = ARG_PTR_TO_BTF_ID | PTR_TRUSTED;
+ if (tags & ARG_TAG_NULLABLE)
+ sub->args[i].arg_type |= PTR_MAYBE_NULL;
+ sub->args[i].btf_id = kern_type_id;
+ continue;
+ }
+ if (tags & ARG_TAG_ARENA) {
+ if (tags & ~ARG_TAG_ARENA) {
+ bpf_log(log, "arg#%d arena cannot be combined with any other tags\n", i);
+ return -EINVAL;
+ }
+ sub->args[i].arg_type = ARG_PTR_TO_ARENA;
+ continue;
+ }
+ if (is_global) { /* generic user data pointer */
u32 mem_size;
+ if (tags & ARG_TAG_NULLABLE) {
+ bpf_log(log, "arg#%d has invalid combination of tags\n", i);
+ return -EINVAL;
+ }
+
t = btf_type_skip_modifiers(btf, t->type, NULL);
ref_t = btf_resolve_size(btf, t, &mem_size);
if (IS_ERR(ref_t)) {
- bpf_log(log,
- "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
- i, btf_type_str(t), btf_name_by_offset(btf, t->name_off),
+ bpf_log(log, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
+ i, btf_type_str(t), btf_name_by_offset(btf, t->name_off),
PTR_ERR(ref_t));
return -EINVAL;
}
- sub->args[i].arg_type = is_nonnull ? ARG_PTR_TO_MEM : ARG_PTR_TO_MEM_OR_NULL;
+ sub->args[i].arg_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL;
+ if (tags & ARG_TAG_NONNULL)
+ sub->args[i].arg_type &= ~PTR_MAYBE_NULL;
sub->args[i].mem_size = mem_size;
continue;
}
- if (is_nonnull) {
- bpf_log(log, "arg#%d marked as non-null, but is not a pointer type\n", i);
+
+skip_pointer:
+ if (tags) {
+ bpf_log(log, "arg#%d has pointer tag, but is not a pointer type\n", i);
return -EINVAL;
}
+ if (btf_type_is_int(t) || btf_is_any_enum(t)) {
+ sub->args[i].arg_type = ARG_ANYTHING;
+ continue;
+ }
+ if (!is_global)
+ return -EINVAL;
bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n",
i, btf_type_str(t), tname);
return -EINVAL;
}
- for (i = 0; i < nargs; i++) {
- const char *tag;
-
- if (sub->args[i].arg_type != ARG_PTR_TO_CTX)
- continue;
-
- /* check if arg has "arg:ctx" tag */
- t = btf_type_by_id(btf, args[i].type);
- tag = btf_find_decl_tag_value(btf, fn_t, i, "arg:");
- if (IS_ERR_OR_NULL(tag) || strcmp(tag, "ctx") != 0)
- continue;
-
- if (btf_validate_prog_ctx_type(log, btf, t, i, prog_type,
- prog->expected_attach_type))
- return -EINVAL;
- }
-
sub->arg_cnt = nargs;
sub->args_cached = true;
@@ -7589,6 +7807,17 @@ static struct btf *btf_get_module_btf(const struct module *module)
return btf;
}
+static int check_btf_kconfigs(const struct module *module, const char *feature)
+{
+ if (!module && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+ pr_err("missing vmlinux BTF, cannot register %s\n", feature);
+ return -ENOENT;
+ }
+ if (module && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+ pr_warn("missing module BTF, cannot register %s\n", feature);
+ return 0;
+}
+
BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
{
struct btf *btf = NULL;
@@ -7949,15 +8178,8 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
int ret, i;
btf = btf_get_module_btf(kset->owner);
- if (!btf) {
- if (!kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
- pr_err("missing vmlinux BTF, cannot register kfuncs\n");
- return -ENOENT;
- }
- if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
- pr_warn("missing module BTF, cannot register kfuncs\n");
- return 0;
- }
+ if (!btf)
+ return check_btf_kconfigs(kset->owner, "kfunc");
if (IS_ERR(btf))
return PTR_ERR(btf);
@@ -7981,6 +8203,14 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
{
enum btf_kfunc_hook hook;
+ /* All kfuncs need to be tagged as such in BTF.
+ * WARN() for initcall registrations that do not check errors.
+ */
+ if (!(kset->set->flags & BTF_SET8_KFUNCS)) {
+ WARN_ON(!kset->owner);
+ return -EINVAL;
+ }
+
hook = bpf_prog_type_to_kfunc_hook(prog_type);
return __register_btf_kfunc_id_set(hook, kset);
}
@@ -8057,17 +8287,8 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c
int ret;
btf = btf_get_module_btf(owner);
- if (!btf) {
- if (!owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
- pr_err("missing vmlinux BTF, cannot register dtor kfuncs\n");
- return -ENOENT;
- }
- if (owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) {
- pr_err("missing module BTF, cannot register dtor kfuncs\n");
- return -ENOENT;
- }
- return 0;
- }
+ if (!btf)
+ return check_btf_kconfigs(owner, "dtor kfuncs");
if (IS_ERR(btf))
return PTR_ERR(btf);
@@ -8182,17 +8403,6 @@ size_t bpf_core_essential_name_len(const char *name)
return n;
}
-struct bpf_cand_cache {
- const char *name;
- u32 name_len;
- u16 kind;
- u16 cnt;
- struct {
- const struct btf *btf;
- u32 id;
- } cands[];
-};
-
static void bpf_free_cands(struct bpf_cand_cache *cands)
{
if (!cands->cnt)
@@ -8213,8 +8423,6 @@ static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE];
#define MODULE_CAND_CACHE_SIZE 31
static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE];
-static DEFINE_MUTEX(cand_cache_mutex);
-
static void __print_cand_cache(struct bpf_verifier_log *log,
struct bpf_cand_cache **cache,
int cache_size)
@@ -8645,3 +8853,141 @@ bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
return !strncmp(reg_name, arg_name, cmp_len);
}
+
+#ifdef CONFIG_BPF_JIT
+static int
+btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops,
+ struct bpf_verifier_log *log)
+{
+ struct btf_struct_ops_tab *tab, *new_tab;
+ int i, err;
+
+ tab = btf->struct_ops_tab;
+ if (!tab) {
+ tab = kzalloc(offsetof(struct btf_struct_ops_tab, ops[4]),
+ GFP_KERNEL);
+ if (!tab)
+ return -ENOMEM;
+ tab->capacity = 4;
+ btf->struct_ops_tab = tab;
+ }
+
+ for (i = 0; i < tab->cnt; i++)
+ if (tab->ops[i].st_ops == st_ops)
+ return -EEXIST;
+
+ if (tab->cnt == tab->capacity) {
+ new_tab = krealloc(tab,
+ offsetof(struct btf_struct_ops_tab,
+ ops[tab->capacity * 2]),
+ GFP_KERNEL);
+ if (!new_tab)
+ return -ENOMEM;
+ tab = new_tab;
+ tab->capacity *= 2;
+ btf->struct_ops_tab = tab;
+ }
+
+ tab->ops[btf->struct_ops_tab->cnt].st_ops = st_ops;
+
+ err = bpf_struct_ops_desc_init(&tab->ops[btf->struct_ops_tab->cnt], btf, log);
+ if (err)
+ return err;
+
+ btf->struct_ops_tab->cnt++;
+
+ return 0;
+}
+
+const struct bpf_struct_ops_desc *
+bpf_struct_ops_find_value(struct btf *btf, u32 value_id)
+{
+ const struct bpf_struct_ops_desc *st_ops_list;
+ unsigned int i;
+ u32 cnt;
+
+ if (!value_id)
+ return NULL;
+ if (!btf->struct_ops_tab)
+ return NULL;
+
+ cnt = btf->struct_ops_tab->cnt;
+ st_ops_list = btf->struct_ops_tab->ops;
+ for (i = 0; i < cnt; i++) {
+ if (st_ops_list[i].value_id == value_id)
+ return &st_ops_list[i];
+ }
+
+ return NULL;
+}
+
+const struct bpf_struct_ops_desc *
+bpf_struct_ops_find(struct btf *btf, u32 type_id)
+{
+ const struct bpf_struct_ops_desc *st_ops_list;
+ unsigned int i;
+ u32 cnt;
+
+ if (!type_id)
+ return NULL;
+ if (!btf->struct_ops_tab)
+ return NULL;
+
+ cnt = btf->struct_ops_tab->cnt;
+ st_ops_list = btf->struct_ops_tab->ops;
+ for (i = 0; i < cnt; i++) {
+ if (st_ops_list[i].type_id == type_id)
+ return &st_ops_list[i];
+ }
+
+ return NULL;
+}
+
+int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops)
+{
+ struct bpf_verifier_log *log;
+ struct btf *btf;
+ int err = 0;
+
+ btf = btf_get_module_btf(st_ops->owner);
+ if (!btf)
+ return check_btf_kconfigs(st_ops->owner, "struct_ops");
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+
+ log = kzalloc(sizeof(*log), GFP_KERNEL | __GFP_NOWARN);
+ if (!log) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ log->level = BPF_LOG_KERNEL;
+
+ err = btf_add_struct_ops(btf, st_ops, log);
+
+errout:
+ kfree(log);
+ btf_put(btf);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(__register_bpf_struct_ops);
+#endif
+
+bool btf_param_match_suffix(const struct btf *btf,
+ const struct btf_param *arg,
+ const char *suffix)
+{
+ int suffix_len = strlen(suffix), len;
+ const char *param_name;
+
+ /* In the future, this can be ported to use BTF tagging */
+ param_name = btf_name_by_offset(btf, arg->name_off);
+ if (str_is_empty(param_name))
+ return false;
+ len = strlen(param_name);
+ if (len <= suffix_len)
+ return false;
+ param_name += len - suffix_len;
+ return !strncmp(param_name, suffix, suffix_len);
+}
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 491d20038cbe..82243cb6c54d 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1358,15 +1358,12 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
enum cgroup_bpf_attach_type atype)
{
- unsigned int offset = skb->data - skb_network_header(skb);
+ unsigned int offset = -skb_network_offset(skb);
struct sock *save_sk;
void *saved_data_end;
struct cgroup *cgrp;
int ret;
- if (!sk || !sk_fullsock(sk))
- return 0;
-
if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
return 0;
@@ -1630,7 +1627,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -2191,7 +2188,7 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -2348,7 +2345,7 @@ cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ea6843be2616..696bc55de8e8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -88,13 +88,18 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
return NULL;
}
+/* tell bpf programs that include vmlinux.h kernel's PAGE_SIZE */
+enum page_size_enum {
+ __PAGE_SIZE = PAGE_SIZE
+};
+
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
{
gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
struct bpf_prog_aux *aux;
struct bpf_prog *fp;
- size = round_up(size, PAGE_SIZE);
+ size = round_up(size, __PAGE_SIZE);
fp = __vmalloc(size, gfp_flags);
if (fp == NULL)
return NULL;
@@ -682,7 +687,7 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
{
if (!bpf_prog_kallsyms_candidate(fp) ||
- !bpf_capable())
+ !bpf_token_capable(fp->aux->token, CAP_BPF))
return;
bpf_prog_ksym_set_addr(fp);
@@ -888,7 +893,12 @@ static LIST_HEAD(pack_list);
* CONFIG_MMU=n. Use PAGE_SIZE in these cases.
*/
#ifdef PMD_SIZE
-#define BPF_PROG_PACK_SIZE (PMD_SIZE * num_possible_nodes())
+/* PMD_SIZE is really big for some archs. It doesn't make sense to
+ * reserve too much memory in one allocation. Hardcode BPF_PROG_PACK_SIZE to
+ * 2MiB * num_possible_nodes(). On most architectures PMD_SIZE will be
+ * greater than or equal to 2MB.
+ */
+#define BPF_PROG_PACK_SIZE (SZ_2M * num_possible_nodes())
#else
#define BPF_PROG_PACK_SIZE PAGE_SIZE
#endif
@@ -1675,6 +1685,7 @@ bool bpf_opcode_in_insntable(u8 code)
[BPF_LD | BPF_IND | BPF_B] = true,
[BPF_LD | BPF_IND | BPF_H] = true,
[BPF_LD | BPF_IND | BPF_W] = true,
+ [BPF_JMP | BPF_JCOND] = true,
};
#undef BPF_INSN_3_TBL
#undef BPF_INSN_2_TBL
@@ -2695,7 +2706,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
bool sleepable;
u32 i;
- sleepable = aux->sleepable;
+ sleepable = aux->prog->sleepable;
for (i = 0; i < len; i++) {
map = used_maps[i];
if (map->ops->map_poke_untrack)
@@ -2779,6 +2790,7 @@ void bpf_prog_free(struct bpf_prog *fp)
if (aux->dst_prog)
bpf_prog_put(aux->dst_prog);
+ bpf_token_put(aux->token);
INIT_WORK(&aux->work, bpf_prog_free_deferred);
schedule_work(&aux->work);
}
@@ -2925,6 +2937,21 @@ bool __weak bpf_jit_supports_far_kfunc_call(void)
return false;
}
+bool __weak bpf_jit_supports_arena(void)
+{
+ return false;
+}
+
+/* Return TRUE if the JIT backend satisfies the following two conditions:
+ * 1) JIT backend supports atomic_xchg() on pointer-sized words.
+ * 2) Under the specific arch, the implementation of xchg() is the same
+ * as atomic_xchg() on pointer-sized words.
+ */
+bool __weak bpf_jit_supports_ptr_xchg(void)
+{
+ return false;
+}
+
/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
* skb_copy_bits(), so provide a weak definition of it for NET-less config.
*/
@@ -2959,6 +2986,17 @@ void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp,
{
}
+/* for configs without MMU or 32-bit */
+__weak const struct bpf_map_ops arena_map_ops;
+__weak u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena)
+{
+ return 0;
+}
+__weak u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena)
+{
+ return 0;
+}
+
#ifdef CONFIG_BPF_SYSCALL
static int __init bpf_global_ma_init(void)
{
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8a0bb80fe48a..9ee8da477465 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -24,6 +24,7 @@
#include <linux/filter.h>
#include <linux/ptr_ring.h>
#include <net/xdp.h>
+#include <net/hotdata.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
@@ -178,7 +179,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
void **frames, int n,
struct xdp_cpumap_stats *stats)
{
- struct xdp_rxq_info rxq;
+ struct xdp_rxq_info rxq = {};
struct xdp_buff xdp;
int i, nframes = 0;
@@ -326,7 +327,8 @@ static int cpu_map_kthread_run(void *data)
/* Support running another XDP prog on this CPU */
nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
if (nframes) {
- m = kmem_cache_alloc_bulk(skbuff_cache, gfp, nframes, skbs);
+ m = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
+ gfp, nframes, skbs);
if (unlikely(m == 0)) {
for (i = 0; i < nframes; i++)
skbs[i] = NULL; /* effect: xdp_return_frame */
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index 2e73533a3811..dad0fb1c8e87 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -424,7 +424,7 @@ __bpf_kfunc u32 bpf_cpumask_weight(const struct cpumask *cpumask)
__bpf_kfunc_end_defs();
-BTF_SET8_START(cpumask_kfunc_btf_ids)
+BTF_KFUNCS_START(cpumask_kfunc_btf_ids)
BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
@@ -450,7 +450,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_weight, KF_RCU)
-BTF_SET8_END(cpumask_kfunc_btf_ids)
+BTF_KFUNCS_END(cpumask_kfunc_btf_ids)
static const struct btf_kfunc_id_set cpumask_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a936c704d4e7..4e2cdbb5629f 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -130,13 +130,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
bpf_map_init_from_attr(&dtab->map, attr);
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
- dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
-
- if (!dtab->n_buckets) /* Overflow check */
+ /* hash table size must be power of 2; roundup_pow_of_two() can
+ * overflow into UB on 32-bit arches, so check that first
+ */
+ if (dtab->map.max_entries > 1UL << 31)
return -EINVAL;
- }
- if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+ dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
+
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
dtab->map.numa_node);
if (!dtab->dev_index_head)
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 49940c26a227..bd2e2dd04740 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -166,6 +166,12 @@ static bool is_movsx(const struct bpf_insn *insn)
(insn->off == 8 || insn->off == 16 || insn->off == 32);
}
+static bool is_addr_space_cast(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
+ insn->off == BPF_ADDR_SPACE_CAST;
+}
+
void print_bpf_insn(const struct bpf_insn_cbs *cbs,
const struct bpf_insn *insn,
bool allow_ptr_leaks)
@@ -184,6 +190,10 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
insn->code, class == BPF_ALU ? 'w' : 'r',
insn->dst_reg, class == BPF_ALU ? 'w' : 'r',
insn->dst_reg);
+ } else if (is_addr_space_cast(insn)) {
+ verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n",
+ insn->code, insn->dst_reg,
+ insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm);
} else if (BPF_SRC(insn->code) == BPF_X) {
verbose(cbs->private_data, "(%02x) %c%d %s %s%c%d\n",
insn->code, class == BPF_ALU ? 'w' : 'r',
@@ -322,6 +332,10 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
} else if (insn->code == (BPF_JMP | BPF_JA)) {
verbose(cbs->private_data, "(%02x) goto pc%+d\n",
insn->code, insn->off);
+ } else if (insn->code == (BPF_JMP | BPF_JCOND) &&
+ insn->src_reg == BPF_MAY_GOTO) {
+ verbose(cbs->private_data, "(%02x) may_goto pc%+d\n",
+ insn->code, insn->off);
} else if (insn->code == (BPF_JMP32 | BPF_JA)) {
verbose(cbs->private_data, "(%02x) gotol pc%+d\n",
insn->code, insn->imm);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 03a6a2500b6a..3a088a5349bc 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -499,7 +499,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
num_possible_cpus());
}
- /* hash table size must be power of 2 */
+ /* hash table size must be power of 2; roundup_pow_of_two() can overflow
+ * into UB on 32-bit arches, so check that first
+ */
+ err = -E2BIG;
+ if (htab->map.max_entries > 1UL << 31)
+ goto free_htab;
+
htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
htab->elem_size = sizeof(struct htab_elem) +
@@ -509,10 +515,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
else
htab->elem_size += round_up(htab->map.value_size, 8);
- err = -E2BIG;
- /* prevent zero size kmalloc and check for u32 overflow */
- if (htab->n_buckets == 0 ||
- htab->n_buckets > U32_MAX / sizeof(struct bucket))
+ /* check for u32 overflow */
+ if (htab->n_buckets > U32_MAX / sizeof(struct bucket))
goto free_htab;
err = bpf_map_init_elem_count(&htab->map);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index be72824f32b2..a89587859571 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -334,7 +334,7 @@ static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
__this_cpu_write(irqsave_flags, flags);
}
-notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
{
__bpf_spin_lock_irqsave(lock);
return 0;
@@ -357,7 +357,7 @@ static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
local_irq_restore(flags);
}
-notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
{
__bpf_spin_unlock_irqrestore(lock);
return 0;
@@ -1101,6 +1101,7 @@ struct bpf_hrtimer {
struct bpf_prog *prog;
void __rcu *callback_fn;
void *value;
+ struct rcu_head rcu;
};
/* the actual struct hidden inside uapi struct bpf_timer */
@@ -1332,6 +1333,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
if (in_nmi())
return -EOPNOTSUPP;
+ rcu_read_lock();
__bpf_spin_lock_irqsave(&timer->lock);
t = timer->timer;
if (!t) {
@@ -1353,6 +1355,7 @@ out:
* if it was running.
*/
ret = ret ?: hrtimer_cancel(&t->timer);
+ rcu_read_unlock();
return ret;
}
@@ -1407,13 +1410,14 @@ out:
*/
if (this_cpu_read(hrtimer_running) != t)
hrtimer_cancel(&t->timer);
- kfree(t);
+ kfree_rcu(t, rcu);
}
BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
{
unsigned long *kptr = map_value;
+ /* This helper may be inlined by verifier. */
return xchg(kptr, (unsigned long)ptr);
}
@@ -1679,7 +1683,7 @@ const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id)
+bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -1730,7 +1734,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
break;
}
- if (!bpf_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_BPF))
return NULL;
switch (func_id) {
@@ -1788,7 +1792,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
break;
}
- if (!perfmon_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
return NULL;
switch (func_id) {
@@ -2483,9 +2487,9 @@ __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
return obj;
}
-__bpf_kfunc void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
+__bpf_kfunc void *bpf_rdonly_cast(const void *obj__ign, u32 btf_id__k)
{
- return obj__ign;
+ return (void *)obj__ign;
}
__bpf_kfunc void bpf_rcu_read_lock(void)
@@ -2543,7 +2547,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
__bpf_kfunc_end_defs();
-BTF_SET8_START(generic_btf_ids)
+BTF_KFUNCS_START(generic_btf_ids)
#ifdef CONFIG_KEXEC_CORE
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
@@ -2572,7 +2576,7 @@ BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_throw)
-BTF_SET8_END(generic_btf_ids)
+BTF_KFUNCS_END(generic_btf_ids)
static const struct btf_kfunc_id_set generic_kfunc_set = {
.owner = THIS_MODULE,
@@ -2588,7 +2592,7 @@ BTF_ID(struct, cgroup)
BTF_ID(func, bpf_cgroup_release_dtor)
#endif
-BTF_SET8_START(common_btf_ids)
+BTF_KFUNCS_START(common_btf_ids)
BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
BTF_ID_FLAGS(func, bpf_rdonly_cast)
BTF_ID_FLAGS(func, bpf_rcu_read_lock)
@@ -2617,7 +2621,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null)
BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
BTF_ID_FLAGS(func, bpf_dynptr_size)
BTF_ID_FLAGS(func, bpf_dynptr_clone)
-BTF_SET8_END(common_btf_ids)
+BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 41e0a55c35f5..af5d2ffadd70 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -20,6 +20,7 @@
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
+#include <linux/kstrtox.h>
#include "preload/bpf_preload.h"
enum bpf_type {
@@ -98,9 +99,9 @@ static const struct inode_operations bpf_prog_iops = { };
static const struct inode_operations bpf_map_iops = { };
static const struct inode_operations bpf_link_iops = { };
-static struct inode *bpf_get_inode(struct super_block *sb,
- const struct inode *dir,
- umode_t mode)
+struct inode *bpf_get_inode(struct super_block *sb,
+ const struct inode *dir,
+ umode_t mode)
{
struct inode *inode;
@@ -594,6 +595,136 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ
}
EXPORT_SYMBOL(bpf_prog_get_type_path);
+struct bpffs_btf_enums {
+ const struct btf *btf;
+ const struct btf_type *cmd_t;
+ const struct btf_type *map_t;
+ const struct btf_type *prog_t;
+ const struct btf_type *attach_t;
+};
+
+static int find_bpffs_btf_enums(struct bpffs_btf_enums *info)
+{
+ const struct btf *btf;
+ const struct btf_type *t;
+ const char *name;
+ int i, n;
+
+ memset(info, 0, sizeof(*info));
+
+ btf = bpf_get_btf_vmlinux();
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+ if (!btf)
+ return -ENOENT;
+
+ info->btf = btf;
+
+ for (i = 1, n = btf_nr_types(btf); i < n; i++) {
+ t = btf_type_by_id(btf, i);
+ if (!btf_type_is_enum(t))
+ continue;
+
+ name = btf_name_by_offset(btf, t->name_off);
+ if (!name)
+ continue;
+
+ if (strcmp(name, "bpf_cmd") == 0)
+ info->cmd_t = t;
+ else if (strcmp(name, "bpf_map_type") == 0)
+ info->map_t = t;
+ else if (strcmp(name, "bpf_prog_type") == 0)
+ info->prog_t = t;
+ else if (strcmp(name, "bpf_attach_type") == 0)
+ info->attach_t = t;
+ else
+ continue;
+
+ if (info->cmd_t && info->map_t && info->prog_t && info->attach_t)
+ return 0;
+ }
+
+ return -ESRCH;
+}
+
+static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t,
+ const char *prefix, const char *str, int *value)
+{
+ const struct btf_enum *e;
+ const char *name;
+ int i, n, pfx_len = strlen(prefix);
+
+ *value = 0;
+
+ if (!btf || !enum_t)
+ return false;
+
+ for (i = 0, n = btf_vlen(enum_t); i < n; i++) {
+ e = &btf_enum(enum_t)[i];
+
+ name = btf_name_by_offset(btf, e->name_off);
+ if (!name || strncasecmp(name, prefix, pfx_len) != 0)
+ continue;
+
+ /* match symbolic name case insensitive and ignoring prefix */
+ if (strcasecmp(name + pfx_len, str) == 0) {
+ *value = e->val;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void seq_print_delegate_opts(struct seq_file *m,
+ const char *opt_name,
+ const struct btf *btf,
+ const struct btf_type *enum_t,
+ const char *prefix,
+ u64 delegate_msk, u64 any_msk)
+{
+ const struct btf_enum *e;
+ bool first = true;
+ const char *name;
+ u64 msk;
+ int i, n, pfx_len = strlen(prefix);
+
+ delegate_msk &= any_msk; /* clear unknown bits */
+
+ if (delegate_msk == 0)
+ return;
+
+ seq_printf(m, ",%s", opt_name);
+ if (delegate_msk == any_msk) {
+ seq_printf(m, "=any");
+ return;
+ }
+
+ if (btf && enum_t) {
+ for (i = 0, n = btf_vlen(enum_t); i < n; i++) {
+ e = &btf_enum(enum_t)[i];
+ name = btf_name_by_offset(btf, e->name_off);
+ if (!name || strncasecmp(name, prefix, pfx_len) != 0)
+ continue;
+ msk = 1ULL << e->val;
+ if (delegate_msk & msk) {
+ /* emit lower-case name without prefix */
+ seq_printf(m, "%c", first ? '=' : ':');
+ name += pfx_len;
+ while (*name) {
+ seq_printf(m, "%c", tolower(*name));
+ name++;
+ }
+
+ delegate_msk &= ~msk;
+ first = false;
+ }
+ }
+ }
+ if (delegate_msk)
+ seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk);
+}
+
/*
* Display the mount options in /proc/mounts.
*/
@@ -601,6 +732,8 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
{
struct inode *inode = d_inode(root);
umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX;
+ struct bpf_mount_opts *opts = root->d_sb->s_fs_info;
+ u64 mask;
if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID))
seq_printf(m, ",uid=%u",
@@ -610,6 +743,35 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
from_kgid_munged(&init_user_ns, inode->i_gid));
if (mode != S_IRWXUGO)
seq_printf(m, ",mode=%o", mode);
+
+ if (opts->delegate_cmds || opts->delegate_maps ||
+ opts->delegate_progs || opts->delegate_attachs) {
+ struct bpffs_btf_enums info;
+
+ /* ignore errors, fallback to hex */
+ (void)find_bpffs_btf_enums(&info);
+
+ mask = (1ULL << __MAX_BPF_CMD) - 1;
+ seq_print_delegate_opts(m, "delegate_cmds",
+ info.btf, info.cmd_t, "BPF_",
+ opts->delegate_cmds, mask);
+
+ mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1;
+ seq_print_delegate_opts(m, "delegate_maps",
+ info.btf, info.map_t, "BPF_MAP_TYPE_",
+ opts->delegate_maps, mask);
+
+ mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1;
+ seq_print_delegate_opts(m, "delegate_progs",
+ info.btf, info.prog_t, "BPF_PROG_TYPE_",
+ opts->delegate_progs, mask);
+
+ mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1;
+ seq_print_delegate_opts(m, "delegate_attachs",
+ info.btf, info.attach_t, "BPF_",
+ opts->delegate_attachs, mask);
+ }
+
return 0;
}
@@ -624,7 +786,7 @@ static void bpf_free_inode(struct inode *inode)
free_inode_nonrcu(inode);
}
-static const struct super_operations bpf_super_ops = {
+const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.show_options = bpf_show_options,
@@ -635,28 +797,30 @@ enum {
OPT_UID,
OPT_GID,
OPT_MODE,
+ OPT_DELEGATE_CMDS,
+ OPT_DELEGATE_MAPS,
+ OPT_DELEGATE_PROGS,
+ OPT_DELEGATE_ATTACHS,
};
static const struct fs_parameter_spec bpf_fs_parameters[] = {
fsparam_u32 ("uid", OPT_UID),
fsparam_u32 ("gid", OPT_GID),
fsparam_u32oct ("mode", OPT_MODE),
+ fsparam_string ("delegate_cmds", OPT_DELEGATE_CMDS),
+ fsparam_string ("delegate_maps", OPT_DELEGATE_MAPS),
+ fsparam_string ("delegate_progs", OPT_DELEGATE_PROGS),
+ fsparam_string ("delegate_attachs", OPT_DELEGATE_ATTACHS),
{}
};
-struct bpf_mount_opts {
- kuid_t uid;
- kgid_t gid;
- umode_t mode;
-};
-
static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- struct bpf_mount_opts *opts = fc->fs_private;
+ struct bpf_mount_opts *opts = fc->s_fs_info;
struct fs_parse_result result;
kuid_t uid;
kgid_t gid;
- int opt;
+ int opt, err;
opt = fs_parse(fc, bpf_fs_parameters, param, &result);
if (opt < 0) {
@@ -708,6 +872,67 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
case OPT_MODE:
opts->mode = result.uint_32 & S_IALLUGO;
break;
+ case OPT_DELEGATE_CMDS:
+ case OPT_DELEGATE_MAPS:
+ case OPT_DELEGATE_PROGS:
+ case OPT_DELEGATE_ATTACHS: {
+ struct bpffs_btf_enums info;
+ const struct btf_type *enum_t;
+ const char *enum_pfx;
+ u64 *delegate_msk, msk = 0;
+ char *p;
+ int val;
+
+ /* ignore errors, fallback to hex */
+ (void)find_bpffs_btf_enums(&info);
+
+ switch (opt) {
+ case OPT_DELEGATE_CMDS:
+ delegate_msk = &opts->delegate_cmds;
+ enum_t = info.cmd_t;
+ enum_pfx = "BPF_";
+ break;
+ case OPT_DELEGATE_MAPS:
+ delegate_msk = &opts->delegate_maps;
+ enum_t = info.map_t;
+ enum_pfx = "BPF_MAP_TYPE_";
+ break;
+ case OPT_DELEGATE_PROGS:
+ delegate_msk = &opts->delegate_progs;
+ enum_t = info.prog_t;
+ enum_pfx = "BPF_PROG_TYPE_";
+ break;
+ case OPT_DELEGATE_ATTACHS:
+ delegate_msk = &opts->delegate_attachs;
+ enum_t = info.attach_t;
+ enum_pfx = "BPF_";
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ while ((p = strsep(&param->string, ":"))) {
+ if (strcmp(p, "any") == 0) {
+ msk |= ~0ULL;
+ } else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) {
+ msk |= 1ULL << val;
+ } else {
+ err = kstrtou64(p, 0, &msk);
+ if (err)
+ return err;
+ }
+ }
+
+ /* Setting delegation mount options requires privileges */
+ if (msk && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ *delegate_msk |= msk;
+ break;
+ }
+ default:
+ /* ignore unknown mount options */
+ break;
}
return 0;
@@ -784,10 +1009,14 @@ out:
static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
{
static const struct tree_descr bpf_rfiles[] = { { "" } };
- struct bpf_mount_opts *opts = fc->fs_private;
+ struct bpf_mount_opts *opts = sb->s_fs_info;
struct inode *inode;
int ret;
+ /* Mounting an instance of BPF FS requires privileges */
+ if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
if (ret)
return ret;
@@ -811,7 +1040,7 @@ static int bpf_get_tree(struct fs_context *fc)
static void bpf_free_fc(struct fs_context *fc)
{
- kfree(fc->fs_private);
+ kfree(fc->s_fs_info);
}
static const struct fs_context_operations bpf_context_ops = {
@@ -835,17 +1064,32 @@ static int bpf_init_fs_context(struct fs_context *fc)
opts->uid = current_fsuid();
opts->gid = current_fsgid();
- fc->fs_private = opts;
+ /* start out with no BPF token delegation enabled */
+ opts->delegate_cmds = 0;
+ opts->delegate_maps = 0;
+ opts->delegate_progs = 0;
+ opts->delegate_attachs = 0;
+
+ fc->s_fs_info = opts;
fc->ops = &bpf_context_ops;
return 0;
}
+static void bpf_kill_super(struct super_block *sb)
+{
+ struct bpf_mount_opts *opts = sb->s_fs_info;
+
+ kill_litter_super(sb);
+ kfree(opts);
+}
+
static struct file_system_type bpf_fs_type = {
.owner = THIS_MODULE,
.name = "bpf",
.init_fs_context = bpf_init_fs_context,
.parameters = bpf_fs_parameters,
- .kill_sb = kill_litter_super,
+ .kill_sb = bpf_kill_super,
+ .fs_flags = FS_USERNS_MOUNT,
};
static int __init bpf_init(void)
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 594a234f122b..2a243cf37c60 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -9,6 +9,7 @@
#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
#include <linux/math64.h>
+#include <linux/string.h>
#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
@@ -333,7 +334,8 @@ find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
{
const struct bpf_line_info *linfo;
const struct bpf_prog *prog;
- u32 i, nr_linfo;
+ u32 nr_linfo;
+ int l, r, m;
prog = env->prog;
nr_linfo = prog->aux->nr_linfo;
@@ -342,11 +344,30 @@ find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
return NULL;
linfo = prog->aux->linfo;
- for (i = 1; i < nr_linfo; i++)
- if (insn_off < linfo[i].insn_off)
- break;
+ /* Loop invariant: linfo[l].insn_off <= insns_off.
+ * linfo[0].insn_off == 0 which always satisfies above condition.
+ * Binary search is searching for rightmost linfo entry that satisfies
+ * the above invariant, giving us the desired record that covers given
+ * instruction offset.
+ */
+ l = 0;
+ r = nr_linfo - 1;
+ while (l < r) {
+ /* (r - l + 1) / 2 means we break a tie to the right, so if:
+ * l=1, r=2, linfo[l].insn_off <= insn_off, linfo[r].insn_off > insn_off,
+ * then m=2, we see that linfo[m].insn_off > insn_off, and so
+ * r becomes 1 and we exit the loop with correct l==1.
+ * If the tie was broken to the left, m=1 would end us up in
+ * an endless loop where l and m stay at 1 and r stays at 2.
+ */
+ m = l + (r - l + 1) / 2;
+ if (linfo[m].insn_off <= insn_off)
+ l = m;
+ else
+ r = m - 1;
+ }
- return &linfo[i - 1];
+ return &linfo[l];
}
static const char *ltrim(const char *s)
@@ -361,13 +382,28 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
u32 insn_off,
const char *prefix_fmt, ...)
{
- const struct bpf_line_info *linfo;
+ const struct bpf_line_info *linfo, *prev_linfo;
+ const struct btf *btf;
+ const char *s, *fname;
if (!bpf_verifier_log_needed(&env->log))
return;
+ prev_linfo = env->prev_linfo;
linfo = find_linfo(env, insn_off);
- if (!linfo || linfo == env->prev_linfo)
+ if (!linfo || linfo == prev_linfo)
+ return;
+
+ /* It often happens that two separate linfo records point to the same
+ * source code line, but have differing column numbers. Given verifier
+ * log doesn't emit column information, from user perspective we just
+ * end up emitting the same source code line twice unnecessarily.
+ * So instead check that previous and current linfo record point to
+ * the same file (file_name_offs match) and the same line number, and
+ * avoid emitting duplicated source code line in such case.
+ */
+ if (prev_linfo && linfo->file_name_off == prev_linfo->file_name_off &&
+ BPF_LINE_INFO_LINE_NUM(linfo->line_col) == BPF_LINE_INFO_LINE_NUM(prev_linfo->line_col))
return;
if (prefix_fmt) {
@@ -378,9 +414,15 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
va_end(args);
}
- verbose(env, "%s\n",
- ltrim(btf_name_by_offset(env->prog->aux->btf,
- linfo->line_off)));
+ btf = env->prog->aux->btf;
+ s = ltrim(btf_name_by_offset(btf, linfo->line_off));
+ verbose(env, "%s", s); /* source code line */
+
+ s = btf_name_by_offset(btf, linfo->file_name_off);
+ /* leave only file name */
+ fname = strrchr(s, '/');
+ fname = fname ? fname + 1 : s;
+ verbose(env, " @ %s:%u\n", fname, BPF_LINE_INFO_LINE_NUM(linfo->line_col));
env->prev_linfo = linfo;
}
@@ -416,6 +458,7 @@ const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type)
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
[PTR_TO_MEM] = "mem",
+ [PTR_TO_ARENA] = "arena",
[PTR_TO_BUF] = "buf",
[PTR_TO_FUNC] = "func",
[PTR_TO_MAP_KEY] = "map_key",
@@ -651,6 +694,8 @@ static void print_reg_state(struct bpf_verifier_env *env,
}
verbose(env, "%s", reg_type_str(env, t));
+ if (t == PTR_TO_ARENA)
+ return;
if (t == PTR_TO_STACK) {
if (state->frameno != reg->frameno)
verbose(env, "[%d]", reg->frameno);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index b32be680da6c..050fe1ebf0f7 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -164,13 +164,13 @@ static inline int extract_bit(const u8 *data, size_t index)
*/
static size_t longest_prefix_match(const struct lpm_trie *trie,
const struct lpm_trie_node *node,
- const struct bpf_lpm_trie_key *key)
+ const struct bpf_lpm_trie_key_u8 *key)
{
u32 limit = min(node->prefixlen, key->prefixlen);
u32 prefixlen = 0, i = 0;
BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32));
- BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32));
+ BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key_u8, data) % sizeof(u32));
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT)
@@ -229,7 +229,7 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct lpm_trie_node *node, *found = NULL;
- struct bpf_lpm_trie_key *key = _key;
+ struct bpf_lpm_trie_key_u8 *key = _key;
if (key->prefixlen > trie->max_prefixlen)
return NULL;
@@ -309,7 +309,7 @@ static long trie_update_elem(struct bpf_map *map,
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
struct lpm_trie_node __rcu **slot;
- struct bpf_lpm_trie_key *key = _key;
+ struct bpf_lpm_trie_key_u8 *key = _key;
unsigned long irq_flags;
unsigned int next_bit;
size_t matchlen = 0;
@@ -437,7 +437,7 @@ out:
static long trie_delete_elem(struct bpf_map *map, void *_key)
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
- struct bpf_lpm_trie_key *key = _key;
+ struct bpf_lpm_trie_key_u8 *key = _key;
struct lpm_trie_node __rcu **trim, **trim2;
struct lpm_trie_node *node, *parent;
unsigned long irq_flags;
@@ -536,7 +536,7 @@ out:
sizeof(struct lpm_trie_node))
#define LPM_VAL_SIZE_MIN 1
-#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key) + (X))
+#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key_u8) + (X))
#define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX)
#define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
@@ -565,7 +565,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
/* copy mandatory map attributes */
bpf_map_init_from_attr(&trie->map, attr);
trie->data_size = attr->key_size -
- offsetof(struct bpf_lpm_trie_key, data);
+ offsetof(struct bpf_lpm_trie_key_u8, data);
trie->max_prefixlen = trie->data_size * 8;
spin_lock_init(&trie->lock);
@@ -616,7 +616,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
{
struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root;
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
- struct bpf_lpm_trie_key *key = _key, *next_key = _next_key;
+ struct bpf_lpm_trie_key_u8 *key = _key, *next_key = _next_key;
struct lpm_trie_node **node_stack = NULL;
int err = 0, stack_ptr = -1;
unsigned int next_bit;
@@ -703,7 +703,7 @@ find_leftmost:
}
do_copy:
next_key->prefixlen = next_node->prefixlen;
- memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data),
+ memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key_u8, data),
next_node->data, trie->data_size);
free_stack:
kfree(node_stack);
@@ -715,7 +715,7 @@ static int trie_check_btf(const struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type)
{
- /* Keys must have struct bpf_lpm_trie_key embedded. */
+ /* Keys must have struct bpf_lpm_trie_key_u8 embedded. */
return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ?
-EINVAL : 0;
}
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index 6abd7c5df4b3..9575314f40a6 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -213,9 +213,9 @@ __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map)
__bpf_kfunc_end_defs();
-BTF_SET8_START(bpf_map_iter_kfunc_ids)
+BTF_KFUNCS_START(bpf_map_iter_kfunc_ids)
BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS)
-BTF_SET8_END(bpf_map_iter_kfunc_ids)
+BTF_KFUNCS_END(bpf_map_iter_kfunc_ids)
static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index dff7ba539701..c99f8e5234ac 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -91,11 +91,14 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
} else if (value_size / 8 > sysctl_perf_event_max_stack)
return ERR_PTR(-EINVAL);
- /* hash table size must be power of 2 */
- n_buckets = roundup_pow_of_two(attr->max_entries);
- if (!n_buckets)
+ /* hash table size must be power of 2; roundup_pow_of_two() can overflow
+ * into UB on 32-bit arches, so check that first
+ */
+ if (attr->max_entries > 1UL << 31)
return ERR_PTR(-E2BIG);
+ n_buckets = roundup_pow_of_two(attr->max_entries);
+
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
if (!smap)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a1f18681721c..ae2ff73bde7e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -164,6 +164,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
if (bpf_map_is_offloaded(map)) {
return bpf_map_offload_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
+ map->map_type == BPF_MAP_TYPE_ARENA ||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
return map->ops->map_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
@@ -479,6 +480,39 @@ static void bpf_map_release_memcg(struct bpf_map *map)
}
#endif
+int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
+ unsigned long nr_pages, struct page **pages)
+{
+ unsigned long i, j;
+ struct page *pg;
+ int ret = 0;
+#ifdef CONFIG_MEMCG_KMEM
+ struct mem_cgroup *memcg, *old_memcg;
+
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
+#endif
+ for (i = 0; i < nr_pages; i++) {
+ pg = alloc_pages_node(nid, gfp | __GFP_ACCOUNT, 0);
+
+ if (pg) {
+ pages[i] = pg;
+ continue;
+ }
+ for (j = 0; j < i; j++)
+ __free_page(pages[j]);
+ ret = -ENOMEM;
+ break;
+ }
+
+#ifdef CONFIG_MEMCG_KMEM
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
+#endif
+ return ret;
+}
+
+
static int btf_field_cmp(const void *a, const void *b)
{
const struct btf_field *f1 = a, *f2 = b;
@@ -937,6 +971,21 @@ static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts)
return EPOLLERR;
}
+static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct bpf_map *map = filp->private_data;
+
+ if (map->ops->map_get_unmapped_area)
+ return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags);
+#ifdef CONFIG_MMU
+ return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+#else
+ return addr;
+#endif
+}
+
const struct file_operations bpf_map_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_map_show_fdinfo,
@@ -946,6 +995,7 @@ const struct file_operations bpf_map_fops = {
.write = bpf_dummy_write,
.mmap = bpf_map_mmap,
.poll = bpf_map_poll,
+ .get_unmapped_area = bpf_get_unmapped_area,
};
int bpf_map_new_fd(struct bpf_map *map, int flags)
@@ -1011,8 +1061,8 @@ int map_check_no_btf(const struct bpf_map *map,
return -ENOTSUPP;
}
-static int map_check_btf(struct bpf_map *map, const struct btf *btf,
- u32 btf_key_id, u32 btf_value_id)
+static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
+ const struct btf *btf, u32 btf_key_id, u32 btf_value_id)
{
const struct btf_type *key_type, *value_type;
u32 key_size, value_size;
@@ -1040,7 +1090,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
if (!IS_ERR_OR_NULL(map->record)) {
int i;
- if (!bpf_capable()) {
+ if (!bpf_token_capable(token, CAP_BPF)) {
ret = -EPERM;
goto free_map_tab;
}
@@ -1123,14 +1173,21 @@ free_map_tab:
return ret;
}
-#define BPF_MAP_CREATE_LAST_FIELD map_extra
+static bool bpf_net_capable(void)
+{
+ return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
+}
+
+#define BPF_MAP_CREATE_LAST_FIELD map_token_fd
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
const struct bpf_map_ops *ops;
+ struct bpf_token *token = NULL;
int numa_node = bpf_map_attr_numa_node(attr);
u32 map_type = attr->map_type;
struct bpf_map *map;
+ bool token_flag;
int f_flags;
int err;
@@ -1138,6 +1195,12 @@ static int map_create(union bpf_attr *attr)
if (err)
return -EINVAL;
+ /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it
+ * to avoid per-map type checks tripping on unknown flag
+ */
+ token_flag = attr->map_flags & BPF_F_TOKEN_FD;
+ attr->map_flags &= ~BPF_F_TOKEN_FD;
+
if (attr->btf_vmlinux_value_type_id) {
if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
attr->btf_key_type_id || attr->btf_value_type_id)
@@ -1147,6 +1210,7 @@ static int map_create(union bpf_attr *attr)
}
if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
+ attr->map_type != BPF_MAP_TYPE_ARENA &&
attr->map_extra != 0)
return -EINVAL;
@@ -1178,14 +1242,32 @@ static int map_create(union bpf_attr *attr)
if (!ops->map_mem_usage)
return -EINVAL;
+ if (token_flag) {
+ token = bpf_token_get_from_fd(attr->map_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+
+ /* if current token doesn't grant map creation permissions,
+ * then we can't use this token, so ignore it and rely on
+ * system-wide capabilities checks
+ */
+ if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) ||
+ !bpf_token_allow_map_type(token, attr->map_type)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ err = -EPERM;
+
/* Intent here is for unprivileged_bpf_disabled to block BPF map
* creation for unprivileged users; other actions depend
* on fd availability and access to bpffs, so are dependent on
* object creation success. Even with unprivileged BPF disabled,
* capability checks are still carried out.
*/
- if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
- return -EPERM;
+ if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF))
+ goto put_token;
/* check privileged map type permissions */
switch (map_type) {
@@ -1218,25 +1300,28 @@ static int map_create(union bpf_attr *attr)
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
case BPF_MAP_TYPE_STRUCT_OPS:
case BPF_MAP_TYPE_CPUMAP:
- if (!bpf_capable())
- return -EPERM;
+ case BPF_MAP_TYPE_ARENA:
+ if (!bpf_token_capable(token, CAP_BPF))
+ goto put_token;
break;
case BPF_MAP_TYPE_SOCKMAP:
case BPF_MAP_TYPE_SOCKHASH:
case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH:
case BPF_MAP_TYPE_XSKMAP:
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
+ if (!bpf_token_capable(token, CAP_NET_ADMIN))
+ goto put_token;
break;
default:
WARN(1, "unsupported map type %d", map_type);
- return -EPERM;
+ goto put_token;
}
map = ops->map_alloc(attr);
- if (IS_ERR(map))
- return PTR_ERR(map);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto put_token;
+ }
map->ops = ops;
map->map_type = map_type;
@@ -1273,7 +1358,7 @@ static int map_create(union bpf_attr *attr)
map->btf = btf;
if (attr->btf_value_type_id) {
- err = map_check_btf(map, btf, attr->btf_key_type_id,
+ err = map_check_btf(map, token, btf, attr->btf_key_type_id,
attr->btf_value_type_id);
if (err)
goto free_map;
@@ -1285,15 +1370,16 @@ static int map_create(union bpf_attr *attr)
attr->btf_vmlinux_value_type_id;
}
- err = security_bpf_map_alloc(map);
+ err = security_bpf_map_create(map, attr, token);
if (err)
- goto free_map;
+ goto free_map_sec;
err = bpf_map_alloc_id(map);
if (err)
goto free_map_sec;
bpf_map_save_memcg(map);
+ bpf_token_put(token);
err = bpf_map_new_fd(map, f_flags);
if (err < 0) {
@@ -1314,6 +1400,8 @@ free_map_sec:
free_map:
btf_put(map->btf);
map->ops->map_free(map);
+put_token:
+ bpf_token_put(token);
return err;
}
@@ -2144,7 +2232,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
kvfree(aux->func_info);
kfree(aux->func_info_aux);
free_uid(aux->user);
- security_bpf_prog_free(aux);
+ security_bpf_prog_free(aux->prog);
bpf_prog_free(aux->prog);
}
@@ -2160,7 +2248,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
btf_put(prog->aux->attach_btf);
if (deferred) {
- if (prog->aux->sleepable)
+ if (prog->sleepable)
call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
else
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
@@ -2590,13 +2678,15 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
}
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD log_true_size
+#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
{
enum bpf_prog_type type = attr->prog_type;
struct bpf_prog *prog, *dst_prog = NULL;
struct btf *attach_btf = NULL;
+ struct bpf_token *token = NULL;
+ bool bpf_cap;
int err;
char license[128];
@@ -2610,13 +2700,35 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
BPF_F_TEST_RND_HI32 |
BPF_F_XDP_HAS_FRAGS |
BPF_F_XDP_DEV_BOUND_ONLY |
- BPF_F_TEST_REG_INVARIANTS))
+ BPF_F_TEST_REG_INVARIANTS |
+ BPF_F_TOKEN_FD))
return -EINVAL;
+ bpf_prog_load_fixup_attach_type(attr);
+
+ if (attr->prog_flags & BPF_F_TOKEN_FD) {
+ token = bpf_token_get_from_fd(attr->prog_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+ /* if current token doesn't grant prog loading permissions,
+ * then we can't use this token, so ignore it and rely on
+ * system-wide capabilities checks
+ */
+ if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) ||
+ !bpf_token_allow_prog_type(token, attr->prog_type,
+ attr->expected_attach_type)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ bpf_cap = bpf_token_capable(token, CAP_BPF);
+ err = -EPERM;
+
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
(attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
- !bpf_capable())
- return -EPERM;
+ !bpf_cap)
+ goto put_token;
/* Intent here is for unprivileged_bpf_disabled to block BPF program
* creation for unprivileged users; other actions depend
@@ -2625,21 +2737,23 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
* capability checks are still carried out for these
* and other operations.
*/
- if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
- return -EPERM;
+ if (sysctl_unprivileged_bpf_disabled && !bpf_cap)
+ goto put_token;
if (attr->insn_cnt == 0 ||
- attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
- return -E2BIG;
+ attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) {
+ err = -E2BIG;
+ goto put_token;
+ }
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
type != BPF_PROG_TYPE_CGROUP_SKB &&
- !bpf_capable())
- return -EPERM;
+ !bpf_cap)
+ goto put_token;
- if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (is_perfmon_prog_type(type) && !perfmon_capable())
- return -EPERM;
+ if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN))
+ goto put_token;
+ if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON))
+ goto put_token;
/* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
* or btf, we need to check which one it is
@@ -2649,27 +2763,33 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (IS_ERR(dst_prog)) {
dst_prog = NULL;
attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
- if (IS_ERR(attach_btf))
- return -EINVAL;
+ if (IS_ERR(attach_btf)) {
+ err = -EINVAL;
+ goto put_token;
+ }
if (!btf_is_kernel(attach_btf)) {
/* attaching through specifying bpf_prog's BTF
* objects directly might be supported eventually
*/
btf_put(attach_btf);
- return -ENOTSUPP;
+ err = -ENOTSUPP;
+ goto put_token;
}
}
} else if (attr->attach_btf_id) {
/* fall back to vmlinux BTF, if BTF type ID is specified */
attach_btf = bpf_get_btf_vmlinux();
- if (IS_ERR(attach_btf))
- return PTR_ERR(attach_btf);
- if (!attach_btf)
- return -EINVAL;
+ if (IS_ERR(attach_btf)) {
+ err = PTR_ERR(attach_btf);
+ goto put_token;
+ }
+ if (!attach_btf) {
+ err = -EINVAL;
+ goto put_token;
+ }
btf_get(attach_btf);
}
- bpf_prog_load_fixup_attach_type(attr);
if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
attach_btf, attr->attach_btf_id,
dst_prog)) {
@@ -2677,7 +2797,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
bpf_prog_put(dst_prog);
if (attach_btf)
btf_put(attach_btf);
- return -EINVAL;
+ err = -EINVAL;
+ goto put_token;
}
/* plain bpf_prog allocation */
@@ -2687,20 +2808,21 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
bpf_prog_put(dst_prog);
if (attach_btf)
btf_put(attach_btf);
- return -ENOMEM;
+ err = -EINVAL;
+ goto put_token;
}
prog->expected_attach_type = attr->expected_attach_type;
+ prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE);
prog->aux->attach_btf = attach_btf;
prog->aux->attach_btf_id = attr->attach_btf_id;
prog->aux->dst_prog = dst_prog;
prog->aux->dev_bound = !!attr->prog_ifindex;
- prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
- err = security_bpf_prog_alloc(prog->aux);
- if (err)
- goto free_prog;
+ /* move token into prog->aux, reuse taken refcnt */
+ prog->aux->token = token;
+ token = NULL;
prog->aux->user = get_current_user();
prog->len = attr->insn_cnt;
@@ -2709,12 +2831,12 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (copy_from_bpfptr(prog->insns,
make_bpfptr(attr->insns, uattr.is_kernel),
bpf_prog_insn_size(prog)) != 0)
- goto free_prog_sec;
+ goto free_prog;
/* copy eBPF program license from user space */
if (strncpy_from_bpfptr(license,
make_bpfptr(attr->license, uattr.is_kernel),
sizeof(license) - 1) < 0)
- goto free_prog_sec;
+ goto free_prog;
license[sizeof(license) - 1] = 0;
/* eBPF programs must be GPL compatible to use GPL-ed functions */
@@ -2728,14 +2850,14 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (bpf_prog_is_dev_bound(prog->aux)) {
err = bpf_prog_dev_bound_init(prog, attr);
if (err)
- goto free_prog_sec;
+ goto free_prog;
}
if (type == BPF_PROG_TYPE_EXT && dst_prog &&
bpf_prog_is_dev_bound(dst_prog->aux)) {
err = bpf_prog_dev_bound_inherit(prog, dst_prog);
if (err)
- goto free_prog_sec;
+ goto free_prog;
}
/*
@@ -2757,12 +2879,16 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
/* find program type: socket_filter vs tracing_filter */
err = find_prog_type(type, prog);
if (err < 0)
- goto free_prog_sec;
+ goto free_prog;
prog->aux->load_time = ktime_get_boottime_ns();
err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
sizeof(attr->prog_name));
if (err < 0)
+ goto free_prog;
+
+ err = security_bpf_prog_load(prog, attr, token);
+ if (err)
goto free_prog_sec;
/* run eBPF verifier */
@@ -2808,13 +2934,16 @@ free_used_maps:
*/
__bpf_prog_put_noref(prog, prog->aux->real_func_cnt);
return err;
+
free_prog_sec:
- free_uid(prog->aux->user);
- security_bpf_prog_free(prog->aux);
+ security_bpf_prog_free(prog);
free_prog:
+ free_uid(prog->aux->user);
if (prog->aux->attach_btf)
btf_put(prog->aux->attach_btf);
bpf_prog_free(prog);
+put_token:
+ bpf_token_put(token);
return err;
}
@@ -3501,6 +3630,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
if (!kallsyms_show_value(current_cred()))
addr = 0;
info->perf_event.kprobe.addr = addr;
+ info->perf_event.kprobe.cookie = event->bpf_cookie;
return 0;
}
#endif
@@ -3526,6 +3656,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
else
info->perf_event.type = BPF_PERF_EVENT_UPROBE;
info->perf_event.uprobe.offset = offset;
+ info->perf_event.uprobe.cookie = event->bpf_cookie;
return 0;
}
#endif
@@ -3553,6 +3684,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event,
uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
ulen = info->perf_event.tracepoint.name_len;
info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
+ info->perf_event.tracepoint.cookie = event->bpf_cookie;
return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL);
}
@@ -3561,6 +3693,7 @@ static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
{
info->perf_event.event.type = event->attr.type;
info->perf_event.event.config = event->attr.config;
+ info->perf_event.event.cookie = event->bpf_cookie;
info->perf_event.type = BPF_PERF_EVENT_EVENT;
return 0;
}
@@ -3818,7 +3951,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
case BPF_PROG_TYPE_SK_LOOKUP:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
case BPF_PROG_TYPE_CGROUP_SKB:
- if (!capable(CAP_NET_ADMIN))
+ if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN))
/* cg-skb progs can be loaded by unpriv user.
* check permissions at attach time.
*/
@@ -4021,7 +4154,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
static int bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
- if (!capable(CAP_NET_ADMIN))
+ if (!bpf_net_capable())
return -EPERM;
if (CHECK_ATTR(BPF_PROG_QUERY))
return -EINVAL;
@@ -4320,6 +4453,12 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
continue;
}
+ if ((BPF_CLASS(code) == BPF_LDX || BPF_CLASS(code) == BPF_STX ||
+ BPF_CLASS(code) == BPF_ST) && BPF_MODE(code) == BPF_PROBE_MEM32) {
+ insns[i].code = BPF_CLASS(code) | BPF_SIZE(code) | BPF_MEM;
+ continue;
+ }
+
if (code != (BPF_LD | BPF_IMM | BPF_DW))
continue;
@@ -4687,6 +4826,8 @@ static int bpf_map_get_info_by_fd(struct file *file,
info.btf_value_type_id = map->btf_value_type_id;
}
info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
+ if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS)
+ bpf_map_struct_ops_info_fill(&info, map);
if (bpf_map_is_offloaded(map)) {
err = bpf_map_offload_info_fill(&info, map);
@@ -4789,15 +4930,34 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
return err;
}
-#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size
+#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
{
+ struct bpf_token *token = NULL;
+
if (CHECK_ATTR(BPF_BTF_LOAD))
return -EINVAL;
- if (!bpf_capable())
+ if (attr->btf_flags & ~BPF_F_TOKEN_FD)
+ return -EINVAL;
+
+ if (attr->btf_flags & BPF_F_TOKEN_FD) {
+ token = bpf_token_get_from_fd(attr->btf_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+ if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ if (!bpf_token_capable(token, CAP_BPF)) {
+ bpf_token_put(token);
return -EPERM;
+ }
+
+ bpf_token_put(token);
return btf_new_fd(attr, uattr, uattr_size);
}
@@ -5394,7 +5554,7 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
/* The bpf program will not access the bpf map, but for the sake of
* simplicity, increase sleepable_refcnt for sleepable program as well.
*/
- if (prog->aux->sleepable)
+ if (prog->sleepable)
atomic64_inc(&map->sleepable_refcnt);
memcpy(used_maps_new, used_maps_old,
sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
@@ -5415,6 +5575,20 @@ out_prog_put:
return ret;
}
+#define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd
+
+static int token_create(union bpf_attr *attr)
+{
+ if (CHECK_ATTR(BPF_TOKEN_CREATE))
+ return -EINVAL;
+
+ /* no flags are supported yet */
+ if (attr->token_create.flags)
+ return -EINVAL;
+
+ return bpf_token_create(attr);
+}
+
static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
@@ -5548,6 +5722,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
case BPF_PROG_BIND_MAP:
err = bpf_prog_bind_map(&attr);
break;
+ case BPF_TOKEN_CREATE:
+ err = token_create(&attr);
+ break;
default:
err = -EINVAL;
break;
@@ -5654,7 +5831,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = {
const struct bpf_func_proto * __weak
tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
BPF_CALL_1(bpf_sys_close, u32, fd)
@@ -5704,7 +5881,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_sys_bpf:
- return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto;
+ return !bpf_token_capable(prog->aux->token, CAP_PERFMON)
+ ? NULL : &bpf_sys_bpf_proto;
case BPF_FUNC_btf_find_by_name_kind:
return &bpf_btf_find_by_name_kind_proto;
case BPF_FUNC_sys_close:
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index e5c3500443c6..ec4e97c61eef 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -978,6 +978,8 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it,
BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) !=
__alignof__(struct bpf_iter_task));
+ kit->pos = NULL;
+
switch (flags) {
case BPF_TASK_ITER_ALL_THREADS:
case BPF_TASK_ITER_ALL_PROCS:
diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c
new file mode 100644
index 000000000000..d6ccf8d00eab
--- /dev/null
+++ b/kernel/bpf/token.c
@@ -0,0 +1,278 @@
+#include <linux/bpf.h>
+#include <linux/vmalloc.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/idr.h>
+#include <linux/namei.h>
+#include <linux/user_namespace.h>
+#include <linux/security.h>
+
+static bool bpf_ns_capable(struct user_namespace *ns, int cap)
+{
+ return ns_capable(ns, cap) || (cap != CAP_SYS_ADMIN && ns_capable(ns, CAP_SYS_ADMIN));
+}
+
+bool bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ struct user_namespace *userns;
+
+ /* BPF token allows ns_capable() level of capabilities */
+ userns = token ? token->userns : &init_user_ns;
+ if (!bpf_ns_capable(userns, cap))
+ return false;
+ if (token && security_bpf_token_capable(token, cap) < 0)
+ return false;
+ return true;
+}
+
+void bpf_token_inc(struct bpf_token *token)
+{
+ atomic64_inc(&token->refcnt);
+}
+
+static void bpf_token_free(struct bpf_token *token)
+{
+ security_bpf_token_free(token);
+ put_user_ns(token->userns);
+ kfree(token);
+}
+
+static void bpf_token_put_deferred(struct work_struct *work)
+{
+ struct bpf_token *token = container_of(work, struct bpf_token, work);
+
+ bpf_token_free(token);
+}
+
+void bpf_token_put(struct bpf_token *token)
+{
+ if (!token)
+ return;
+
+ if (!atomic64_dec_and_test(&token->refcnt))
+ return;
+
+ INIT_WORK(&token->work, bpf_token_put_deferred);
+ schedule_work(&token->work);
+}
+
+static int bpf_token_release(struct inode *inode, struct file *filp)
+{
+ struct bpf_token *token = filp->private_data;
+
+ bpf_token_put(token);
+ return 0;
+}
+
+static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+ struct bpf_token *token = filp->private_data;
+ u64 mask;
+
+ BUILD_BUG_ON(__MAX_BPF_CMD >= 64);
+ mask = BIT_ULL(__MAX_BPF_CMD) - 1;
+ if ((token->allowed_cmds & mask) == mask)
+ seq_printf(m, "allowed_cmds:\tany\n");
+ else
+ seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds);
+
+ BUILD_BUG_ON(__MAX_BPF_MAP_TYPE >= 64);
+ mask = BIT_ULL(__MAX_BPF_MAP_TYPE) - 1;
+ if ((token->allowed_maps & mask) == mask)
+ seq_printf(m, "allowed_maps:\tany\n");
+ else
+ seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps);
+
+ BUILD_BUG_ON(__MAX_BPF_PROG_TYPE >= 64);
+ mask = BIT_ULL(__MAX_BPF_PROG_TYPE) - 1;
+ if ((token->allowed_progs & mask) == mask)
+ seq_printf(m, "allowed_progs:\tany\n");
+ else
+ seq_printf(m, "allowed_progs:\t0x%llx\n", token->allowed_progs);
+
+ BUILD_BUG_ON(__MAX_BPF_ATTACH_TYPE >= 64);
+ mask = BIT_ULL(__MAX_BPF_ATTACH_TYPE) - 1;
+ if ((token->allowed_attachs & mask) == mask)
+ seq_printf(m, "allowed_attachs:\tany\n");
+ else
+ seq_printf(m, "allowed_attachs:\t0x%llx\n", token->allowed_attachs);
+}
+
+#define BPF_TOKEN_INODE_NAME "bpf-token"
+
+static const struct inode_operations bpf_token_iops = { };
+
+static const struct file_operations bpf_token_fops = {
+ .release = bpf_token_release,
+ .show_fdinfo = bpf_token_show_fdinfo,
+};
+
+int bpf_token_create(union bpf_attr *attr)
+{
+ struct bpf_mount_opts *mnt_opts;
+ struct bpf_token *token = NULL;
+ struct user_namespace *userns;
+ struct inode *inode;
+ struct file *file;
+ struct path path;
+ struct fd f;
+ umode_t mode;
+ int err, fd;
+
+ f = fdget(attr->token_create.bpffs_fd);
+ if (!f.file)
+ return -EBADF;
+
+ path = f.file->f_path;
+ path_get(&path);
+ fdput(f);
+
+ if (path.dentry != path.mnt->mnt_sb->s_root) {
+ err = -EINVAL;
+ goto out_path;
+ }
+ if (path.mnt->mnt_sb->s_op != &bpf_super_ops) {
+ err = -EINVAL;
+ goto out_path;
+ }
+ err = path_permission(&path, MAY_ACCESS);
+ if (err)
+ goto out_path;
+
+ userns = path.dentry->d_sb->s_user_ns;
+ /*
+ * Enforce that creators of BPF tokens are in the same user
+ * namespace as the BPF FS instance. This makes reasoning about
+ * permissions a lot easier and we can always relax this later.
+ */
+ if (current_user_ns() != userns) {
+ err = -EPERM;
+ goto out_path;
+ }
+ if (!ns_capable(userns, CAP_BPF)) {
+ err = -EPERM;
+ goto out_path;
+ }
+
+ /* Creating BPF token in init_user_ns doesn't make much sense. */
+ if (current_user_ns() == &init_user_ns) {
+ err = -EOPNOTSUPP;
+ goto out_path;
+ }
+
+ mnt_opts = path.dentry->d_sb->s_fs_info;
+ if (mnt_opts->delegate_cmds == 0 &&
+ mnt_opts->delegate_maps == 0 &&
+ mnt_opts->delegate_progs == 0 &&
+ mnt_opts->delegate_attachs == 0) {
+ err = -ENOENT; /* no BPF token delegation is set up */
+ goto out_path;
+ }
+
+ mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
+ inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_path;
+ }
+
+ inode->i_op = &bpf_token_iops;
+ inode->i_fop = &bpf_token_fops;
+ clear_nlink(inode); /* make sure it is unlinked */
+
+ file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops);
+ if (IS_ERR(file)) {
+ iput(inode);
+ err = PTR_ERR(file);
+ goto out_path;
+ }
+
+ token = kzalloc(sizeof(*token), GFP_USER);
+ if (!token) {
+ err = -ENOMEM;
+ goto out_file;
+ }
+
+ atomic64_set(&token->refcnt, 1);
+
+ /* remember bpffs owning userns for future ns_capable() checks */
+ token->userns = get_user_ns(userns);
+
+ token->allowed_cmds = mnt_opts->delegate_cmds;
+ token->allowed_maps = mnt_opts->delegate_maps;
+ token->allowed_progs = mnt_opts->delegate_progs;
+ token->allowed_attachs = mnt_opts->delegate_attachs;
+
+ err = security_bpf_token_create(token, attr, &path);
+ if (err)
+ goto out_token;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0) {
+ err = fd;
+ goto out_token;
+ }
+
+ file->private_data = token;
+ fd_install(fd, file);
+
+ path_put(&path);
+ return fd;
+
+out_token:
+ bpf_token_free(token);
+out_file:
+ fput(file);
+out_path:
+ path_put(&path);
+ return err;
+}
+
+struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+{
+ struct fd f = fdget(ufd);
+ struct bpf_token *token;
+
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+ if (f.file->f_op != &bpf_token_fops) {
+ fdput(f);
+ return ERR_PTR(-EINVAL);
+ }
+
+ token = f.file->private_data;
+ bpf_token_inc(token);
+ fdput(f);
+
+ return token;
+}
+
+bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
+{
+ if (!token)
+ return false;
+ if (!(token->allowed_cmds & BIT_ULL(cmd)))
+ return false;
+ return security_bpf_token_cmd(token, cmd) == 0;
+}
+
+bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type)
+{
+ if (!token || type >= __MAX_BPF_MAP_TYPE)
+ return false;
+
+ return token->allowed_maps & BIT_ULL(type);
+}
+
+bool bpf_token_allow_prog_type(const struct bpf_token *token,
+ enum bpf_prog_type prog_type,
+ enum bpf_attach_type attach_type)
+{
+ if (!token || prog_type >= __MAX_BPF_PROG_TYPE || attach_type >= __MAX_BPF_ATTACH_TYPE)
+ return false;
+
+ return (token->allowed_progs & BIT_ULL(prog_type)) &&
+ (token->allowed_attachs & BIT_ULL(attach_type));
+}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index d382f5ebe06c..db7599c59c78 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -1014,7 +1014,7 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
{
- bool sleepable = prog->aux->sleepable;
+ bool sleepable = prog->sleepable;
if (bpf_prog_check_recur(prog))
return sleepable ? __bpf_prog_enter_sleepable_recur :
@@ -1029,7 +1029,7 @@ bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
{
- bool sleepable = prog->aux->sleepable;
+ bool sleepable = prog->sleepable;
if (bpf_prog_check_recur(prog))
return sleepable ? __bpf_prog_exit_sleepable_recur :
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 65f598694d55..63749ad5ac6b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -528,6 +528,21 @@ static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
(bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
}
+static bool is_async_callback_calling_insn(struct bpf_insn *insn)
+{
+ return bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm);
+}
+
+static bool is_may_goto_insn(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
+}
+
+static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
+{
+ return is_may_goto_insn(&env->prog->insnsi[insn_idx]);
+}
+
static bool is_storage_get_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_sk_storage_get ||
@@ -1155,6 +1170,12 @@ static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
stack->spilled_ptr.type == SCALAR_VALUE;
}
+static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
+{
+ return stack->slot_type[0] == STACK_SPILL &&
+ stack->spilled_ptr.type == SCALAR_VALUE;
+}
+
/* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
* case they are equivalent, or it's STACK_ZERO, in which case we preserve
* more precise STACK_ZERO.
@@ -1418,6 +1439,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->dfs_depth = src->dfs_depth;
dst_state->callback_unroll_depth = src->callback_unroll_depth;
dst_state->used_as_loop_entry = src->used_as_loop_entry;
+ dst_state->may_goto_depth = src->may_goto_depth;
for (i = 0; i <= src->curframe; i++) {
dst = dst_state->frame[i];
if (!dst) {
@@ -2264,8 +2286,7 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
}
/* Mark a register as having a completely unknown (scalar) value. */
-static void __mark_reg_unknown(const struct bpf_verifier_env *env,
- struct bpf_reg_state *reg)
+static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
{
/*
* Clear type, off, and union(map_ptr, range) and
@@ -2277,10 +2298,20 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env,
reg->ref_obj_id = 0;
reg->var_off = tnum_unknown;
reg->frameno = 0;
- reg->precise = !env->bpf_capable;
+ reg->precise = false;
__mark_reg_unbounded(reg);
}
+/* Mark a register as having a completely unknown (scalar) value,
+ * initialize .precise as true when not bpf capable.
+ */
+static void __mark_reg_unknown(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg)
+{
+ __mark_reg_unknown_imprecise(reg);
+ reg->precise = !env->bpf_capable;
+}
+
static void mark_reg_unknown(struct bpf_verifier_env *env,
struct bpf_reg_state *regs, u32 regno)
{
@@ -4355,6 +4386,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_MEM:
case PTR_TO_FUNC:
case PTR_TO_MAP_KEY:
+ case PTR_TO_ARENA:
return true;
default:
return false;
@@ -4380,20 +4412,6 @@ static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
}
-static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
-{
- return tnum_is_unknown(reg->var_off) &&
- reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
- reg->umin_value == 0 && reg->umax_value == U64_MAX &&
- reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
- reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
-}
-
-static bool register_is_bounded(struct bpf_reg_state *reg)
-{
- return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
-}
-
static bool __is_pointer_value(bool allow_ptr_leaks,
const struct bpf_reg_state *reg)
{
@@ -4403,6 +4421,18 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
return reg->type != SCALAR_VALUE;
}
+static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
+ struct bpf_reg_state *src_reg)
+{
+ if (src_reg->type == SCALAR_VALUE && !src_reg->id &&
+ !tnum_is_const(src_reg->var_off))
+ /* Ensure that src_reg has a valid ID that will be copied to
+ * dst_reg and then will be used by find_equal_scalars() to
+ * propagate min/max range.
+ */
+ src_reg->id = ++env->id_gen;
+}
+
/* Copy src state preserving dst->parent and dst->live fields */
static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
{
@@ -4438,6 +4468,11 @@ static bool is_bpf_st_mem(struct bpf_insn *insn)
return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
}
+static int get_reg_width(struct bpf_reg_state *reg)
+{
+ return fls64(reg->umax_value);
+}
+
/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
@@ -4487,13 +4522,19 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
return err;
mark_stack_slot_scratched(env, spi);
- if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && env->bpf_capable) {
+ if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
+ bool reg_value_fits;
+
+ reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
+ /* Make sure that reg had an ID to build a relation on spill. */
+ if (reg_value_fits)
+ assign_scalar_id_before_mov(env, reg);
save_register_state(env, state, spi, reg, size);
/* Break the relation on a narrowing spill. */
- if (fls64(reg->umax_value) > BITS_PER_BYTE * size)
+ if (!reg_value_fits)
state->stack[spi].spilled_ptr.id = 0;
} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
- insn->imm != 0 && env->bpf_capable) {
+ env->bpf_capable) {
struct bpf_reg_state fake_reg = {};
__mark_reg_known(&fake_reg, insn->imm);
@@ -4640,7 +4681,20 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
return -EINVAL;
}
- /* Erase all spilled pointers. */
+ /* If writing_zero and the spi slot contains a spill of value 0,
+ * maintain the spill type.
+ */
+ if (writing_zero && *stype == STACK_SPILL &&
+ is_spilled_scalar_reg(&state->stack[spi])) {
+ struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
+
+ if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
+ zero_used = true;
+ continue;
+ }
+ }
+
+ /* Erase all other spilled pointers. */
state->stack[spi].spilled_ptr.type = NOT_INIT;
/* Update the slot type. */
@@ -4756,7 +4810,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
if (dst_regno < 0)
return 0;
- if (!(off % BPF_REG_SIZE) && size == spill_size) {
+ if (size <= spill_size &&
+ bpf_stack_narrow_access_ok(off, size, spill_size)) {
/* The earlier check_reg_arg() has decided the
* subreg_def for this insn. Save it first.
*/
@@ -4764,6 +4819,12 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
copy_register_state(&state->regs[dst_regno], reg);
state->regs[dst_regno].subreg_def = subreg_def;
+
+ /* Break the relation on a narrowing fill.
+ * coerce_reg_to_size will adjust the boundaries.
+ */
+ if (get_reg_width(reg) > size * BITS_PER_BYTE)
+ state->regs[dst_regno].id = 0;
} else {
int spill_cnt = 0, zero_cnt = 0;
@@ -5211,6 +5272,11 @@ bad_type:
return -EINVAL;
}
+static bool in_sleepable(struct bpf_verifier_env *env)
+{
+ return env->prog->sleepable;
+}
+
/* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
* can dereference RCU protected pointers and result is PTR_TRUSTED.
*/
@@ -5218,7 +5284,7 @@ static bool in_rcu_cs(struct bpf_verifier_env *env)
{
return env->cur_state->active_rcu_lock ||
env->cur_state->active_lock.ptr ||
- !env->prog->aux->sleepable;
+ !in_sleepable(env);
}
/* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
@@ -5227,7 +5293,9 @@ BTF_ID(struct, prog_test_ref_kfunc)
#ifdef CONFIG_CGROUPS
BTF_ID(struct, cgroup)
#endif
+#ifdef CONFIG_BPF_JIT
BTF_ID(struct, bpf_cpumask)
+#endif
BTF_ID(struct, task_struct)
BTF_SET_END(rcu_protected_types)
@@ -5761,6 +5829,8 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
case PTR_TO_XDP_SOCK:
pointer_desc = "xdp_sock ";
break;
+ case PTR_TO_ARENA:
+ return 0;
default:
break;
}
@@ -5768,6 +5838,17 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
strict);
}
+static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
+{
+ if (env->prog->jit_requested)
+ return round_up(stack_depth, 16);
+
+ /* round up to 32-bytes, since this is granularity
+ * of interpreter stack size
+ */
+ return round_up(max_t(u32, stack_depth, 1), 32);
+}
+
/* starting from main bpf function walk all instructions of the function
* and recursively walk all callees that given function can call.
* Ignore jump and exit insns.
@@ -5811,10 +5892,7 @@ process_func:
depth);
return -EACCES;
}
- /* round up to 32-bytes, since this is granularity
- * of interpreter stack size
- */
- depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
+ depth += round_up_stack_depth(env, subprog[idx].stack_depth);
if (depth > MAX_BPF_STACK) {
verbose(env, "combined stack size of %d calls is %d. Too large\n",
frame + 1, depth);
@@ -5908,7 +5986,7 @@ continue_func:
*/
if (frame == 0)
return 0;
- depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
+ depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
frame--;
i = ret_insn[frame];
idx = ret_prog[frame];
@@ -6039,10 +6117,10 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
* values are also truncated so we push 64-bit bounds into
* 32-bit bounds. Above were truncated < 32-bits already.
*/
- if (size < 4) {
+ if (size < 4)
__mark_reg32_unbounded(reg);
- reg_bounds_sync(reg);
- }
+
+ reg_bounds_sync(reg);
}
static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
@@ -6862,6 +6940,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_ARENA) {
+ if (t == BPF_READ && value_regno >= 0)
+ mark_reg_unknown(env, regs, value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str(env, reg->type));
@@ -8198,6 +8279,7 @@ found:
switch ((int)reg->type) {
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
case PTR_TO_BTF_ID | MEM_RCU:
case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
@@ -8332,6 +8414,7 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_MEM | MEM_RINGBUF:
case PTR_TO_BUF:
case PTR_TO_BUF | MEM_RDONLY:
+ case PTR_TO_ARENA:
case SCALAR_VALUE:
return 0;
/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
@@ -9296,10 +9379,34 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
bpf_log(log, "arg#%d is expected to be non-NULL\n", i);
return -EINVAL;
}
+ } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
+ /*
+ * Can pass any value and the kernel won't crash, but
+ * only PTR_TO_ARENA or SCALAR make sense. Everything
+ * else is a bug in the bpf program. Point it out to
+ * the user at the verification time instead of
+ * run-time debug nightmare.
+ */
+ if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
+ bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno);
+ return -EINVAL;
+ }
} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0);
if (ret)
return ret;
+ } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
+ struct bpf_call_arg_meta meta;
+ int err;
+
+ if (register_is_null(reg) && type_may_be_null(arg->arg_type))
+ continue;
+
+ memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
+ err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta);
+ err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type);
+ if (err)
+ return err;
} else {
bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n",
i, arg->arg_type);
@@ -9375,9 +9482,7 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
return -EFAULT;
}
- if (insn->code == (BPF_JMP | BPF_CALL) &&
- insn->src_reg == 0 &&
- insn->imm == BPF_FUNC_timer_set_callback) {
+ if (is_async_callback_calling_insn(insn)) {
struct bpf_verifier_state *async_cb;
/* there is no real recursion here. timer callbacks are async */
@@ -9436,6 +9541,13 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (subprog_is_global(env, subprog)) {
const char *sub_name = subprog_name(env, subprog);
+ /* Only global subprogs cannot be called with a lock held. */
+ if (env->cur_state->active_lock.ptr) {
+ verbose(env, "global function calls are not allowed while holding a lock,\n"
+ "use static function instead\n");
+ return -EINVAL;
+ }
+
if (err) {
verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
subprog, sub_name);
@@ -10092,7 +10204,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EINVAL;
}
- if (!env->prog->aux->sleepable && fn->might_sleep) {
+ if (!in_sleepable(env) && fn->might_sleep) {
verbose(env, "helper call might sleep in a non-sleepable prog\n");
return -EINVAL;
}
@@ -10122,7 +10234,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EINVAL;
}
- if (env->prog->aux->sleepable && is_storage_get_function(func_id))
+ if (in_sleepable(env) && is_storage_get_function(func_id))
env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
}
@@ -10618,24 +10730,6 @@ static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_RCU_PROTECTED;
}
-static bool __kfunc_param_match_suffix(const struct btf *btf,
- const struct btf_param *arg,
- const char *suffix)
-{
- int suffix_len = strlen(suffix), len;
- const char *param_name;
-
- /* In the future, this can be ported to use BTF tagging */
- param_name = btf_name_by_offset(btf, arg->name_off);
- if (str_is_empty(param_name))
- return false;
- len = strlen(param_name);
- if (len < suffix_len)
- return false;
- param_name += len - suffix_len;
- return !strncmp(param_name, suffix, suffix_len);
-}
-
static bool is_kfunc_arg_mem_size(const struct btf *btf,
const struct btf_param *arg,
const struct bpf_reg_state *reg)
@@ -10646,7 +10740,7 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf,
if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
return false;
- return __kfunc_param_match_suffix(btf, arg, "__sz");
+ return btf_param_match_suffix(btf, arg, "__sz");
}
static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
@@ -10659,47 +10753,52 @@ static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
return false;
- return __kfunc_param_match_suffix(btf, arg, "__szk");
+ return btf_param_match_suffix(btf, arg, "__szk");
}
static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg)
{
- return __kfunc_param_match_suffix(btf, arg, "__opt");
+ return btf_param_match_suffix(btf, arg, "__opt");
}
static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
{
- return __kfunc_param_match_suffix(btf, arg, "__k");
+ return btf_param_match_suffix(btf, arg, "__k");
}
static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
{
- return __kfunc_param_match_suffix(btf, arg, "__ign");
+ return btf_param_match_suffix(btf, arg, "__ign");
+}
+
+static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
+{
+ return btf_param_match_suffix(btf, arg, "__map");
}
static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
{
- return __kfunc_param_match_suffix(btf, arg, "__alloc");
+ return btf_param_match_suffix(btf, arg, "__alloc");
}
static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
{
- return __kfunc_param_match_suffix(btf, arg, "__uninit");
+ return btf_param_match_suffix(btf, arg, "__uninit");
}
static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
{
- return __kfunc_param_match_suffix(btf, arg, "__refcounted_kptr");
+ return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
}
static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
{
- return __kfunc_param_match_suffix(btf, arg, "__nullable");
+ return btf_param_match_suffix(btf, arg, "__nullable");
}
static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
{
- return __kfunc_param_match_suffix(btf, arg, "__str");
+ return btf_param_match_suffix(btf, arg, "__str");
}
static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
@@ -10846,6 +10945,7 @@ enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_RB_NODE,
KF_ARG_PTR_TO_NULL,
KF_ARG_PTR_TO_CONST_STR,
+ KF_ARG_PTR_TO_MAP,
};
enum special_kfunc_type {
@@ -10969,7 +11069,7 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
* type to our caller. When a set of conditions hold in the BTF type of
* arguments, we resolve it to a known kfunc_ptr_arg_type.
*/
- if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
+ if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
return KF_ARG_PTR_TO_CTX;
if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
@@ -10999,6 +11099,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_CONST_STR;
+ if (is_kfunc_arg_map(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_MAP;
+
if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
if (!btf_type_is_struct(ref_t)) {
verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
@@ -11481,7 +11584,7 @@ static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
return true;
fallthrough;
default:
- return env->prog->aux->sleepable;
+ return in_sleepable(env);
}
}
@@ -11599,6 +11702,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
switch (kf_arg_type) {
case KF_ARG_PTR_TO_NULL:
continue;
+ case KF_ARG_PTR_TO_MAP:
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
case KF_ARG_PTR_TO_BTF_ID:
if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
@@ -11815,6 +11919,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (ret < 0)
return ret;
break;
+ case KF_ARG_PTR_TO_MAP:
+ /* If argument has '__map' suffix expect 'struct bpf_map *' */
+ ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
+ ref_t = btf_type_by_id(btf_vmlinux, ref_id);
+ ref_tname = btf_name_by_offset(btf, ref_t->name_off);
+ fallthrough;
case KF_ARG_PTR_TO_BTF_ID:
/* Only base_type is checked, further checks are done here */
if ((base_type(reg->type) != PTR_TO_BTF_ID ||
@@ -12002,7 +12112,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
sleepable = is_kfunc_sleepable(&meta);
- if (sleepable && !env->prog->aux->sleepable) {
+ if (sleepable && !in_sleepable(env)) {
verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
return -EACCES;
}
@@ -12289,6 +12399,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
meta.func_name);
return -EFAULT;
}
+ } else if (btf_type_is_void(ptr_type)) {
+ /* kfunc returning 'void *' is equivalent to returning scalar */
+ mark_reg_unknown(env, regs, BPF_REG_0);
} else if (!__btf_type_is_struct(ptr_type)) {
if (!meta.r0_size) {
__u32 sz;
@@ -12826,6 +12939,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
}
switch (base_type(ptr_reg->type)) {
+ case PTR_TO_CTX:
+ case PTR_TO_MAP_VALUE:
+ case PTR_TO_MAP_KEY:
+ case PTR_TO_STACK:
+ case PTR_TO_PACKET_META:
+ case PTR_TO_PACKET:
+ case PTR_TO_TP_BUFFER:
+ case PTR_TO_BTF_ID:
+ case PTR_TO_MEM:
+ case PTR_TO_BUF:
+ case PTR_TO_FUNC:
+ case CONST_PTR_TO_DYNPTR:
+ break;
case PTR_TO_FLOW_KEYS:
if (known)
break;
@@ -12835,16 +12961,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
if (known && smin_val == 0 && opcode == BPF_ADD)
break;
fallthrough;
- case PTR_TO_PACKET_END:
- case PTR_TO_SOCKET:
- case PTR_TO_SOCK_COMMON:
- case PTR_TO_TCP_SOCK:
- case PTR_TO_XDP_SOCK:
+ default:
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str(env, ptr_reg->type));
return -EACCES;
- default:
- break;
}
/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
@@ -13751,6 +13871,21 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
dst_reg = &regs[insn->dst_reg];
src_reg = NULL;
+
+ if (dst_reg->type == PTR_TO_ARENA) {
+ struct bpf_insn_aux_data *aux = cur_aux(env);
+
+ if (BPF_CLASS(insn->code) == BPF_ALU64)
+ /*
+ * 32-bit operations zero upper bits automatically.
+ * 64-bit operations need to be converted to 32.
+ */
+ aux->needs_zext = true;
+
+ /* Any arithmetic operations are allowed on arena pointers */
+ return 0;
+ }
+
if (dst_reg->type != SCALAR_VALUE)
ptr_reg = dst_reg;
else
@@ -13868,19 +14003,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
} else if (opcode == BPF_MOV) {
if (BPF_SRC(insn->code) == BPF_X) {
- if (insn->imm != 0) {
- verbose(env, "BPF_MOV uses reserved fields\n");
- return -EINVAL;
- }
-
if (BPF_CLASS(insn->code) == BPF_ALU) {
- if (insn->off != 0 && insn->off != 8 && insn->off != 16) {
+ if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
+ insn->imm) {
verbose(env, "BPF_MOV uses reserved fields\n");
return -EINVAL;
}
+ } else if (insn->off == BPF_ADDR_SPACE_CAST) {
+ if (insn->imm != 1 && insn->imm != 1u << 16) {
+ verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
+ return -EINVAL;
+ }
} else {
- if (insn->off != 0 && insn->off != 8 && insn->off != 16 &&
- insn->off != 32) {
+ if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
+ insn->off != 32) || insn->imm) {
verbose(env, "BPF_MOV uses reserved fields\n");
return -EINVAL;
}
@@ -13905,20 +14041,18 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (BPF_SRC(insn->code) == BPF_X) {
struct bpf_reg_state *src_reg = regs + insn->src_reg;
struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
- bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
- !tnum_is_const(src_reg->var_off);
if (BPF_CLASS(insn->code) == BPF_ALU64) {
- if (insn->off == 0) {
+ if (insn->imm) {
+ /* off == BPF_ADDR_SPACE_CAST */
+ mark_reg_unknown(env, regs, insn->dst_reg);
+ if (insn->imm == 1) /* cast from as(1) to as(0) */
+ dst_reg->type = PTR_TO_ARENA;
+ } else if (insn->off == 0) {
/* case: R1 = R2
* copy register state to dest reg
*/
- if (need_id)
- /* Assign src and dst registers the same ID
- * that will be used by find_equal_scalars()
- * to propagate min/max range.
- */
- src_reg->id = ++env->id_gen;
+ assign_scalar_id_before_mov(env, src_reg);
copy_register_state(dst_reg, src_reg);
dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = DEF_NOT_SUBREG;
@@ -13933,8 +14067,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
bool no_sext;
no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
- if (no_sext && need_id)
- src_reg->id = ++env->id_gen;
+ if (no_sext)
+ assign_scalar_id_before_mov(env, src_reg);
copy_register_state(dst_reg, src_reg);
if (!no_sext)
dst_reg->id = 0;
@@ -13954,10 +14088,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EACCES;
} else if (src_reg->type == SCALAR_VALUE) {
if (insn->off == 0) {
- bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
+ bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
- if (is_src_reg_u32 && need_id)
- src_reg->id = ++env->id_gen;
+ if (is_src_reg_u32)
+ assign_scalar_id_before_mov(env, src_reg);
copy_register_state(dst_reg, src_reg);
/* Make sure ID is cleared if src_reg is not in u32
* range otherwise dst_reg min/max could be incorrectly
@@ -13971,8 +14105,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
/* case: W1 = (s8, s16)W2 */
bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
- if (no_sext && need_id)
- src_reg->id = ++env->id_gen;
+ if (no_sext)
+ assign_scalar_id_before_mov(env, src_reg);
copy_register_state(dst_reg, src_reg);
if (!no_sext)
dst_reg->id = 0;
@@ -14807,11 +14941,36 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
int err;
/* Only conditional jumps are expected to reach here. */
- if (opcode == BPF_JA || opcode > BPF_JSLE) {
+ if (opcode == BPF_JA || opcode > BPF_JCOND) {
verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
return -EINVAL;
}
+ if (opcode == BPF_JCOND) {
+ struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
+ int idx = *insn_idx;
+
+ if (insn->code != (BPF_JMP | BPF_JCOND) ||
+ insn->src_reg != BPF_MAY_GOTO ||
+ insn->dst_reg || insn->imm || insn->off == 0) {
+ verbose(env, "invalid may_goto off %d imm %d\n",
+ insn->off, insn->imm);
+ return -EINVAL;
+ }
+ prev_st = find_prev_entry(env, cur_st->parent, idx);
+
+ /* branch out 'fallthrough' insn as a new state to explore */
+ queued_st = push_stack(env, idx + 1, idx, false);
+ if (!queued_st)
+ return -ENOMEM;
+
+ queued_st->may_goto_depth++;
+ if (prev_st)
+ widen_imprecise_scalars(env, prev_st, queued_st);
+ *insn_idx += insn->off;
+ return 0;
+ }
+
/* check src2 operand */
err = check_reg_arg(env, insn->dst_reg, SRC_OP);
if (err)
@@ -15063,6 +15222,10 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
+ if (map->map_type == BPF_MAP_TYPE_ARENA) {
+ __mark_reg_unknown(env, dst_reg);
+ return 0;
+ }
dst_reg->type = PTR_TO_MAP_VALUE;
dst_reg->off = aux->map_off;
WARN_ON_ONCE(map->max_entries != 1);
@@ -15529,7 +15692,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
return DONE_EXPLORING;
case BPF_CALL:
- if (insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback)
+ if (is_async_callback_calling_insn(insn))
/* Mark this call insn as a prune point to trigger
* is_state_visited() check before call itself is
* processed by __check_func_call(). Otherwise new
@@ -15595,6 +15758,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
default:
/* conditional jump with two edges */
mark_prune_point(env, t);
+ if (is_may_goto_insn(insn))
+ mark_force_checkpoint(env, t);
ret = push_insn(t, t + 1, FALLTHROUGH, env);
if (ret)
@@ -16158,8 +16323,8 @@ static int check_btf_info(struct bpf_verifier_env *env,
}
/* check %cur's range satisfies %old's */
-static bool range_within(struct bpf_reg_state *old,
- struct bpf_reg_state *cur)
+static bool range_within(const struct bpf_reg_state *old,
+ const struct bpf_reg_state *cur)
{
return old->umin_value <= cur->umin_value &&
old->umax_value >= cur->umax_value &&
@@ -16323,21 +16488,28 @@ static bool regs_exact(const struct bpf_reg_state *rold,
check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
}
+enum exact_level {
+ NOT_EXACT,
+ EXACT,
+ RANGE_WITHIN
+};
+
/* Returns true if (rold safe implies rcur safe) */
static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
- struct bpf_reg_state *rcur, struct bpf_idmap *idmap, bool exact)
+ struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
+ enum exact_level exact)
{
- if (exact)
+ if (exact == EXACT)
return regs_exact(rold, rcur, idmap);
- if (!(rold->live & REG_LIVE_READ))
+ if (!(rold->live & REG_LIVE_READ) && exact == NOT_EXACT)
/* explored state didn't use this */
return true;
- if (rold->type == NOT_INIT)
- /* explored state can't have used this */
- return true;
- if (rcur->type == NOT_INIT)
- return false;
+ if (rold->type == NOT_INIT) {
+ if (exact == NOT_EXACT || rcur->type == NOT_INIT)
+ /* explored state can't have used this */
+ return true;
+ }
/* Enforce that register types have to match exactly, including their
* modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
@@ -16372,7 +16544,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
check_scalar_ids(rold->id, rcur->id, idmap);
}
- if (!rold->precise)
+ if (!rold->precise && exact == NOT_EXACT)
return true;
/* Why check_ids() for scalar registers?
*
@@ -16440,13 +16612,53 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
* the same stack frame, since fp-8 in foo != fp-8 in bar
*/
return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
+ case PTR_TO_ARENA:
+ return true;
default:
return regs_exact(rold, rcur, idmap);
}
}
+static struct bpf_reg_state unbound_reg;
+
+static __init int unbound_reg_init(void)
+{
+ __mark_reg_unknown_imprecise(&unbound_reg);
+ unbound_reg.live |= REG_LIVE_READ;
+ return 0;
+}
+late_initcall(unbound_reg_init);
+
+static bool is_stack_all_misc(struct bpf_verifier_env *env,
+ struct bpf_stack_state *stack)
+{
+ u32 i;
+
+ for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) {
+ if ((stack->slot_type[i] == STACK_MISC) ||
+ (stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack))
+ continue;
+ return false;
+ }
+
+ return true;
+}
+
+static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
+ struct bpf_stack_state *stack)
+{
+ if (is_spilled_scalar_reg64(stack))
+ return &stack->spilled_ptr;
+
+ if (is_stack_all_misc(env, stack))
+ return &unbound_reg;
+
+ return NULL;
+}
+
static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
- struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
+ struct bpf_func_state *cur, struct bpf_idmap *idmap,
+ enum exact_level exact)
{
int i, spi;
@@ -16459,12 +16671,13 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
spi = i / BPF_REG_SIZE;
- if (exact &&
+ if (exact != NOT_EXACT &&
old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
cur->stack[spi].slot_type[i % BPF_REG_SIZE])
return false;
- if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) {
+ if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
+ && exact == NOT_EXACT) {
i += BPF_REG_SIZE - 1;
/* explored state didn't use this */
continue;
@@ -16483,6 +16696,20 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
if (i >= cur->allocated_stack)
return false;
+ /* 64-bit scalar spill vs all slots MISC and vice versa.
+ * Load from all slots MISC produces unbound scalar.
+ * Construct a fake register for such stack and call
+ * regsafe() to ensure scalar ids are compared.
+ */
+ old_reg = scalar_reg_for_stack(env, &old->stack[spi]);
+ cur_reg = scalar_reg_for_stack(env, &cur->stack[spi]);
+ if (old_reg && cur_reg) {
+ if (!regsafe(env, old_reg, cur_reg, idmap, exact))
+ return false;
+ i += BPF_REG_SIZE - 1;
+ continue;
+ }
+
/* if old state was safe with misc data in the stack
* it will be safe with zero-initialized stack.
* The opposite is not true
@@ -16596,10 +16823,13 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
* the current state will reach 'bpf_exit' instruction safely
*/
static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
- struct bpf_func_state *cur, bool exact)
+ struct bpf_func_state *cur, enum exact_level exact)
{
int i;
+ if (old->callback_depth > cur->callback_depth)
+ return false;
+
for (i = 0; i < MAX_BPF_REG; i++)
if (!regsafe(env, &old->regs[i], &cur->regs[i],
&env->idmap_scratch, exact))
@@ -16623,7 +16853,7 @@ static void reset_idmap_scratch(struct bpf_verifier_env *env)
static bool states_equal(struct bpf_verifier_env *env,
struct bpf_verifier_state *old,
struct bpf_verifier_state *cur,
- bool exact)
+ enum exact_level exact)
{
int i;
@@ -16997,7 +17227,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
* => unsafe memory access at 11 would not be caught.
*/
if (is_iter_next_insn(env, insn_idx)) {
- if (states_equal(env, &sl->state, cur, true)) {
+ if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
struct bpf_func_state *cur_frame;
struct bpf_reg_state *iter_state, *iter_reg;
int spi;
@@ -17020,15 +17250,23 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
}
goto skip_inf_loop_check;
}
+ if (is_may_goto_insn_at(env, insn_idx)) {
+ if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+ update_loop_entry(cur, &sl->state);
+ goto hit;
+ }
+ goto skip_inf_loop_check;
+ }
if (calls_callback(env, insn_idx)) {
- if (states_equal(env, &sl->state, cur, true))
+ if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
goto hit;
goto skip_inf_loop_check;
}
/* attempt to detect infinite loop to avoid unnecessary doomed work */
if (states_maybe_looping(&sl->state, cur) &&
- states_equal(env, &sl->state, cur, false) &&
+ states_equal(env, &sl->state, cur, EXACT) &&
!iter_active_depths_differ(&sl->state, cur) &&
+ sl->state.may_goto_depth == cur->may_goto_depth &&
sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
verbose_linfo(env, insn_idx, "; ");
verbose(env, "infinite loop detected at insn %d\n", insn_idx);
@@ -17084,7 +17322,7 @@ skip_inf_loop_check:
*/
loop_entry = get_loop_entry(&sl->state);
force_exact = loop_entry && loop_entry->branches > 0;
- if (states_equal(env, &sl->state, cur, force_exact)) {
+ if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) {
if (force_exact)
update_loop_entry(cur, loop_entry);
hit:
@@ -17254,6 +17492,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
case PTR_TO_TCP_SOCK:
case PTR_TO_XDP_SOCK:
case PTR_TO_BTF_ID:
+ case PTR_TO_ARENA:
return false;
default:
return true;
@@ -17536,7 +17775,6 @@ static int do_check(struct bpf_verifier_env *env)
if (env->cur_state->active_lock.ptr) {
if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
- (insn->src_reg == BPF_PSEUDO_CALL) ||
(insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
(insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
verbose(env, "function calls are not allowed while holding a lock\n");
@@ -17584,14 +17822,12 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
process_bpf_exit_full:
- if (env->cur_state->active_lock.ptr &&
- !in_rbtree_lock_required_cb(env)) {
+ if (env->cur_state->active_lock.ptr && !env->cur_state->curframe) {
verbose(env, "bpf_spin_unlock is missing\n");
return -EINVAL;
}
- if (env->cur_state->active_rcu_lock &&
- !in_rbtree_lock_required_cb(env)) {
+ if (env->cur_state->active_rcu_lock && !env->cur_state->curframe) {
verbose(env, "bpf_rcu_read_unlock is missing\n");
return -EINVAL;
}
@@ -17904,7 +18140,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
return -EINVAL;
}
- if (prog->aux->sleepable)
+ if (prog->sleepable)
switch (map->map_type) {
case BPF_MAP_TYPE_HASH:
case BPF_MAP_TYPE_LRU_HASH:
@@ -17920,6 +18156,9 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_SK_STORAGE:
case BPF_MAP_TYPE_TASK_STORAGE:
case BPF_MAP_TYPE_CGRP_STORAGE:
+ case BPF_MAP_TYPE_QUEUE:
+ case BPF_MAP_TYPE_STACK:
+ case BPF_MAP_TYPE_ARENA:
break;
default:
verbose(env,
@@ -18089,7 +18328,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
return -E2BIG;
}
- if (env->prog->aux->sleepable)
+ if (env->prog->sleepable)
atomic64_inc(&map->sleepable_refcnt);
/* hold the map. If the program is rejected by verifier,
* the map will be released by release_maps() or it
@@ -18107,6 +18346,31 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
fdput(f);
return -EBUSY;
}
+ if (map->map_type == BPF_MAP_TYPE_ARENA) {
+ if (env->prog->aux->arena) {
+ verbose(env, "Only one arena per program\n");
+ fdput(f);
+ return -EBUSY;
+ }
+ if (!env->allow_ptr_leaks || !env->bpf_capable) {
+ verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
+ fdput(f);
+ return -EPERM;
+ }
+ if (!env->prog->jit_requested) {
+ verbose(env, "JIT is required to use arena\n");
+ return -EOPNOTSUPP;
+ }
+ if (!bpf_jit_supports_arena()) {
+ verbose(env, "JIT doesn't support arena\n");
+ return -EOPNOTSUPP;
+ }
+ env->prog->aux->arena = (void *)map;
+ if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
+ verbose(env, "arena's user address must be set via map_extra or mmap()\n");
+ return -EINVAL;
+ }
+ }
fdput(f);
next_insn:
@@ -18728,6 +18992,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
env->prog->aux->num_exentries++;
}
continue;
+ case PTR_TO_ARENA:
+ if (BPF_MODE(insn->code) == BPF_MEMSX) {
+ verbose(env, "sign extending loads from arena are not supported yet\n");
+ return -EOPNOTSUPP;
+ }
+ insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
+ env->prog->aux->num_exentries++;
+ continue;
default:
continue;
}
@@ -18913,13 +19185,19 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->nr_linfo = prog->aux->nr_linfo;
func[i]->aux->jited_linfo = prog->aux->jited_linfo;
func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
+ func[i]->aux->arena = prog->aux->arena;
num_exentries = 0;
insn = func[i]->insnsi;
for (j = 0; j < func[i]->len; j++, insn++) {
if (BPF_CLASS(insn->code) == BPF_LDX &&
(BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
num_exentries++;
+ if ((BPF_CLASS(insn->code) == BPF_STX ||
+ BPF_CLASS(insn->code) == BPF_ST) &&
+ BPF_MODE(insn->code) == BPF_PROBE_MEM32)
+ num_exentries++;
}
func[i]->aux->num_exentries = num_exentries;
func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
@@ -19294,7 +19572,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
struct bpf_map *map_ptr;
- int i, ret, cnt, delta = 0;
+ int i, ret, cnt, delta = 0, cur_subprog = 0;
+ struct bpf_subprog_info *subprogs = env->subprog_info;
+ u16 stack_depth = subprogs[cur_subprog].stack_depth;
+ u16 stack_depth_extra = 0;
if (env->seen_exception && !env->exception_callback_subprog) {
struct bpf_insn patch[] = {
@@ -19314,7 +19595,22 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
mark_subprog_exc_cb(env, env->exception_callback_subprog);
}
- for (i = 0; i < insn_cnt; i++, insn++) {
+ for (i = 0; i < insn_cnt;) {
+ if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
+ if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
+ (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
+ /* convert to 32-bit mov that clears upper 32-bit */
+ insn->code = BPF_ALU | BPF_MOV | BPF_X;
+ /* clear off, so it's a normal 'wX = wY' from JIT pov */
+ insn->off = 0;
+ } /* cast from as(0) to as(1) should be handled by JIT */
+ goto next_insn;
+ }
+
+ if (env->insn_aux_data[i + delta].needs_zext)
+ /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
+ insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
+
/* Make divide-by-zero exceptions impossible. */
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
@@ -19353,7 +19649,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
}
/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
@@ -19373,7 +19669,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
}
/* Rewrite pointer arithmetic to mitigate speculation attacks. */
@@ -19388,7 +19684,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
aux = &env->insn_aux_data[i + delta];
if (!aux->alu_state ||
aux->alu_state == BPF_ALU_NON_POINTER)
- continue;
+ goto next_insn;
isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
@@ -19426,19 +19722,39 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
+ }
+
+ if (is_may_goto_insn(insn)) {
+ int stack_off = -stack_depth - 8;
+
+ stack_depth_extra = 8;
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
+ insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
+ cnt = 4;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
}
if (insn->code != (BPF_JMP | BPF_CALL))
- continue;
+ goto next_insn;
if (insn->src_reg == BPF_PSEUDO_CALL)
- continue;
+ goto next_insn;
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
if (ret)
return ret;
if (cnt == 0)
- continue;
+ goto next_insn;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
@@ -19447,7 +19763,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
}
if (insn->imm == BPF_FUNC_get_route_realm)
@@ -19495,11 +19811,11 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
}
insn->imm = ret + 1;
- continue;
+ goto next_insn;
}
if (!bpf_map_ptr_unpriv(aux))
- continue;
+ goto next_insn;
/* instead of changing every JIT dealing with tail_call
* emit two extra insns:
@@ -19528,7 +19844,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
}
if (insn->imm == BPF_FUNC_timer_set_callback) {
@@ -19565,7 +19881,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
}
if (is_storage_get_function(insn->imm)) {
- if (!env->prog->aux->sleepable ||
+ if (!in_sleepable(env) ||
env->insn_aux_data[i + delta].storage_get_func_atomic)
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
else
@@ -19640,7 +19956,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
}
BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
@@ -19671,31 +19987,31 @@ patch_map_ops_generic:
switch (insn->imm) {
case BPF_FUNC_map_lookup_elem:
insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
- continue;
+ goto next_insn;
case BPF_FUNC_map_update_elem:
insn->imm = BPF_CALL_IMM(ops->map_update_elem);
- continue;
+ goto next_insn;
case BPF_FUNC_map_delete_elem:
insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
- continue;
+ goto next_insn;
case BPF_FUNC_map_push_elem:
insn->imm = BPF_CALL_IMM(ops->map_push_elem);
- continue;
+ goto next_insn;
case BPF_FUNC_map_pop_elem:
insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
- continue;
+ goto next_insn;
case BPF_FUNC_map_peek_elem:
insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
- continue;
+ goto next_insn;
case BPF_FUNC_redirect_map:
insn->imm = BPF_CALL_IMM(ops->map_redirect);
- continue;
+ goto next_insn;
case BPF_FUNC_for_each_map_elem:
insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
- continue;
+ goto next_insn;
case BPF_FUNC_map_lookup_percpu_elem:
insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
- continue;
+ goto next_insn;
}
goto patch_call_imm;
@@ -19723,7 +20039,7 @@ patch_map_ops_generic:
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
}
/* Implement bpf_get_func_arg inline. */
@@ -19748,7 +20064,7 @@ patch_map_ops_generic:
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
}
/* Implement bpf_get_func_ret inline. */
@@ -19776,7 +20092,7 @@ patch_map_ops_generic:
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
}
/* Implement get_func_arg_cnt inline. */
@@ -19791,7 +20107,7 @@ patch_map_ops_generic:
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
}
/* Implement bpf_get_func_ip inline. */
@@ -19806,9 +20122,26 @@ patch_map_ops_generic:
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
- continue;
+ goto next_insn;
}
+ /* Implement bpf_kptr_xchg inline */
+ if (prog->jit_requested && BITS_PER_LONG == 64 &&
+ insn->imm == BPF_FUNC_kptr_xchg &&
+ bpf_jit_supports_ptr_xchg()) {
+ insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
+ insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
+ cnt = 2;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
patch_call_imm:
fn = env->ops->get_func_proto(insn->imm, env->prog);
/* all functions that have prototype and verifier allowed
@@ -19821,6 +20154,40 @@ patch_call_imm:
return -EFAULT;
}
insn->imm = fn->func - __bpf_call_base;
+next_insn:
+ if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+ subprogs[cur_subprog].stack_depth += stack_depth_extra;
+ subprogs[cur_subprog].stack_extra = stack_depth_extra;
+ cur_subprog++;
+ stack_depth = subprogs[cur_subprog].stack_depth;
+ stack_depth_extra = 0;
+ }
+ i++;
+ insn++;
+ }
+
+ env->prog->aux->stack_depth = subprogs[0].stack_depth;
+ for (i = 0; i < env->subprog_cnt; i++) {
+ int subprog_start = subprogs[i].start;
+ int stack_slots = subprogs[i].stack_extra / 8;
+
+ if (!stack_slots)
+ continue;
+ if (stack_slots > 1) {
+ verbose(env, "verifier bug: stack_slots supports may_goto only\n");
+ return -EFAULT;
+ }
+
+ /* Add ST insn to subprog prologue to init extra stack */
+ insn_buf[0] = BPF_ST_MEM(BPF_DW, BPF_REG_FP,
+ -subprogs[i].stack_depth, BPF_MAX_LOOPS);
+ /* Copy first actual insn to preserve it */
+ insn_buf[1] = env->prog->insnsi[subprog_start];
+
+ new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, 2);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = prog = new_prog;
}
/* Since poke tab is now finalized, publish aux to tracker. */
@@ -20041,7 +20408,6 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
state->first_insn_idx = env->subprog_info[subprog].start;
state->last_insn_idx = -1;
-
regs = state->frame[state->curframe]->regs;
if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
const char *sub_name = subprog_name(env, subprog);
@@ -20085,6 +20451,21 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
mark_reg_known_zero(env, regs, i);
reg->mem_size = arg->mem_size;
reg->id = ++env->id_gen;
+ } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
+ reg->type = PTR_TO_BTF_ID;
+ if (arg->arg_type & PTR_MAYBE_NULL)
+ reg->type |= PTR_MAYBE_NULL;
+ if (arg->arg_type & PTR_UNTRUSTED)
+ reg->type |= PTR_UNTRUSTED;
+ if (arg->arg_type & PTR_TRUSTED)
+ reg->type |= PTR_TRUSTED;
+ mark_reg_known_zero(env, regs, i);
+ reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
+ reg->btf_id = arg->btf_id;
+ reg->id = ++env->id_gen;
+ } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
+ /* caller can pass either PTR_TO_ARENA or SCALAR */
+ mark_reg_unknown(env, regs, i);
} else {
WARN_ONCE(1, "BUG: unhandled arg#%d type %d\n",
i - BPF_REG_1, arg->arg_type);
@@ -20233,10 +20614,12 @@ static void print_verification_stats(struct bpf_verifier_env *env)
static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
{
const struct btf_type *t, *func_proto;
+ const struct bpf_struct_ops_desc *st_ops_desc;
const struct bpf_struct_ops *st_ops;
const struct btf_member *member;
struct bpf_prog *prog = env->prog;
u32 btf_id, member_idx;
+ struct btf *btf;
const char *mname;
if (!prog->gpl_compatible) {
@@ -20244,15 +20627,30 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
return -EINVAL;
}
+ if (!prog->aux->attach_btf_id)
+ return -ENOTSUPP;
+
+ btf = prog->aux->attach_btf;
+ if (btf_is_module(btf)) {
+ /* Make sure st_ops is valid through the lifetime of env */
+ env->attach_btf_mod = btf_try_get_module(btf);
+ if (!env->attach_btf_mod) {
+ verbose(env, "struct_ops module %s is not found\n",
+ btf_get_name(btf));
+ return -ENOTSUPP;
+ }
+ }
+
btf_id = prog->aux->attach_btf_id;
- st_ops = bpf_struct_ops_find(btf_id);
- if (!st_ops) {
+ st_ops_desc = bpf_struct_ops_find(btf, btf_id);
+ if (!st_ops_desc) {
verbose(env, "attach_btf_id %u is not a supported struct\n",
btf_id);
return -ENOTSUPP;
}
+ st_ops = st_ops_desc->st_ops;
- t = st_ops->type;
+ t = st_ops_desc->type;
member_idx = prog->expected_attach_type;
if (member_idx >= btf_type_vlen(t)) {
verbose(env, "attach to invalid member idx %u of struct %s\n",
@@ -20261,8 +20659,8 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
}
member = &btf_type_member(t)[member_idx];
- mname = btf_name_by_offset(btf_vmlinux, member->name_off);
- func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
+ mname = btf_name_by_offset(btf, member->name_off);
+ func_proto = btf_type_resolve_func_ptr(btf, member->type,
NULL);
if (!func_proto) {
verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
@@ -20280,6 +20678,12 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
}
}
+ /* btf_ctx_access() used this to provide argument type info */
+ prog->aux->ctx_arg_info =
+ st_ops_desc->arg_info[member_idx].info;
+ prog->aux->ctx_arg_info_size =
+ st_ops_desc->arg_info[member_idx].cnt;
+
prog->aux->attach_func_proto = func_proto;
prog->aux->attach_func_name = mname;
env->ops = st_ops->verifier_ops;
@@ -20537,7 +20941,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
}
}
- if (prog->aux->sleepable) {
+ if (prog->sleepable) {
ret = -EINVAL;
switch (prog->type) {
case BPF_PROG_TYPE_TRACING:
@@ -20648,14 +21052,14 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
u64 key;
if (prog->type == BPF_PROG_TYPE_SYSCALL) {
- if (prog->aux->sleepable)
+ if (prog->sleepable)
/* attach_btf_id checked to be zero already */
return 0;
verbose(env, "Syscall programs can only be sleepable\n");
return -EINVAL;
}
- if (prog->aux->sleepable && !can_be_sleepable(prog)) {
+ if (prog->sleepable && !can_be_sleepable(prog)) {
verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
return -EINVAL;
}
@@ -20764,7 +21168,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
env->prog = *prog;
env->ops = bpf_verifier_ops[env->prog->type];
env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
- is_priv = bpf_capable();
+
+ env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
+ env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
+ env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
+ env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
+ env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
bpf_get_btf_vmlinux();
@@ -20796,12 +21205,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
env->strict_alignment = false;
- env->allow_ptr_leaks = bpf_allow_ptr_leaks();
- env->allow_uninit_stack = bpf_allow_uninit_stack();
- env->bypass_spec_v1 = bpf_bypass_spec_v1();
- env->bypass_spec_v4 = bpf_bypass_spec_v4();
- env->bpf_capable = bpf_capable();
-
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
@@ -20967,6 +21370,8 @@ err_release_maps:
env->prog->expected_attach_type = 0;
*prog = env->prog;
+
+ module_put(env->attach_btf_mod);
err_unlock:
if (!is_priv)
mutex_unlock(&bpf_verifier_lock);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ba36c073304a..4237c8748715 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2562,7 +2562,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
update_partition_sd_lb(cs, old_prs);
out_free:
free_cpumasks(NULL, &tmp);
- return 0;
+ return retval;
}
/**
@@ -2598,9 +2598,6 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (cpumask_equal(cs->exclusive_cpus, trialcs->exclusive_cpus))
return 0;
- if (alloc_cpumasks(NULL, &tmp))
- return -ENOMEM;
-
if (*buf)
compute_effective_exclusive_cpumask(trialcs, NULL);
@@ -2615,6 +2612,9 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (retval)
return retval;
+ if (alloc_cpumasks(NULL, &tmp))
+ return -ENOMEM;
+
if (old_prs) {
if (cpumask_empty(trialcs->effective_xcpus)) {
invalidate = true;
@@ -3897,6 +3897,7 @@ static struct cftype legacy_files[] = {
},
{
+ /* obsolete, may be removed in the future */
.name = "memory_spread_slab",
.read_u64 = cpuset_read_u64,
.write_u64 = cpuset_write_u64,
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index a8350d2d63e6..07e2284bb499 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -562,10 +562,10 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
}
/* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */
-BTF_SET8_START(bpf_rstat_kfunc_ids)
+BTF_KFUNCS_START(bpf_rstat_kfunc_ids)
BTF_ID_FLAGS(func, cgroup_rstat_updated)
BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE)
-BTF_SET8_END(bpf_rstat_kfunc_ids)
+BTF_KFUNCS_END(bpf_rstat_kfunc_ids)
static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
index 4722b998a324..509ee703de15 100644
--- a/kernel/configs/debug.config
+++ b/kernel/configs/debug.config
@@ -40,6 +40,12 @@ CONFIG_UBSAN_ENUM=y
CONFIG_UBSAN_SHIFT=y
CONFIG_UBSAN_UNREACHABLE=y
#
+# Networking Debugging
+#
+CONFIG_NET_DEV_REFCNT_TRACKER=y
+CONFIG_NET_NS_REFCNT_TRACKER=y
+CONFIG_DEBUG_NET=y
+#
# Memory Debugging
#
# CONFIG_DEBUG_PAGEALLOC is not set
diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config
index 95a400f042b1..7a5bbfc024b7 100644
--- a/kernel/configs/hardening.config
+++ b/kernel/configs/hardening.config
@@ -44,7 +44,9 @@ CONFIG_UBSAN_BOUNDS=y
# CONFIG_UBSAN_BOOL
# CONFIG_UBSAN_ENUM
# CONFIG_UBSAN_ALIGNMENT
-CONFIG_UBSAN_SANITIZE_ALL=y
+
+# Sampling-based heap out-of-bounds and use-after-free detection.
+CONFIG_KFENCE=y
# Linked list integrity checking.
CONFIG_LIST_HARDENED=y
@@ -93,6 +95,3 @@ CONFIG_SYN_COOKIES=y
# Attack surface reduction: Use the modern PTY interface (devpts) only.
# CONFIG_LEGACY_PTYS is not set
-
-# Attack surface reduction: Use only modesetting video drivers.
-# CONFIG_DRM_LEGACY is not set
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 6ef0b35fc28c..70ae70d03823 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -458,6 +458,8 @@ static __always_inline void context_tracking_recursion_exit(void)
* __ct_user_enter - Inform the context tracking that the CPU is going
* to enter user or guest space mode.
*
+ * @state: userspace context-tracking state to enter.
+ *
* This function must be called right before we switch from the kernel
* to user or guest space, when it's guaranteed the remaining kernel
* instructions to execute won't use any RCU read side critical section
@@ -595,6 +597,8 @@ NOKPROBE_SYMBOL(user_enter_callable);
* __ct_user_exit - Inform the context tracking that the CPU is
* exiting user or guest mode and entering the kernel.
*
+ * @state: userspace context-tracking state being exited from.
+ *
* This function must be called after we entered the kernel from user or
* guest space before any use of RCU read side critical section. This
* potentially include any high level kernel code like syscalls, exceptions,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e6ec3ba4950b..8f6affd051f7 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -54,7 +54,6 @@
* @rollback: Perform a rollback
* @single: Single callback invocation
* @bringup: Single callback bringup or teardown selector
- * @cpu: CPU number
* @node: Remote CPU node; for multi-instance, do a
* single entry callback for install/remove
* @last: For multi-instance rollback, remember how far we got
@@ -1324,10 +1323,6 @@ static int take_cpu_down(void *_param)
*/
cpuhp_invoke_callback_range_nofail(false, cpu, st, target);
- /* Give up timekeeping duties */
- tick_handover_do_timer();
- /* Remove CPU from timer broadcasting */
- tick_offline_cpu(cpu);
/* Park the stopper thread */
stop_machine_park(cpu);
return 0;
@@ -1403,6 +1398,7 @@ void cpuhp_report_idle_dead(void)
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
BUG_ON(st->state != CPUHP_AP_OFFLINE);
+ tick_assert_timekeeping_handover();
rcutree_report_cpu_dead();
st->state = CPUHP_AP_IDLE_DEAD;
/*
@@ -1909,14 +1905,14 @@ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
#endif /* CONFIG_HOTPLUG_PARALLEL */
-void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
+void __init bringup_nonboot_cpus(unsigned int max_cpus)
{
/* Try parallel bringup optimization if enabled */
- if (cpuhp_bringup_cpus_parallel(setup_max_cpus))
+ if (cpuhp_bringup_cpus_parallel(max_cpus))
return;
/* Full per CPU serialized bringup */
- cpuhp_bringup_mask(cpu_present_mask, setup_max_cpus, CPUHP_ONLINE);
+ cpuhp_bringup_mask(cpu_present_mask, max_cpus, CPUHP_ONLINE);
}
#ifdef CONFIG_PM_SLEEP_SMP
@@ -2205,7 +2201,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = NULL,
.teardown.single = hrtimers_cpu_dying,
},
-
+ [CPUHP_AP_TICK_DYING] = {
+ .name = "tick:dying",
+ .startup.single = NULL,
+ .teardown.single = tick_cpu_dying,
+ },
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
@@ -3005,7 +3005,7 @@ static ssize_t control_show(struct device *dev,
return sysfs_emit(buf, "%d\n", cpu_smt_num_threads);
#endif
- return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
+ return sysfs_emit(buf, "%s\n", state);
}
static ssize_t control_store(struct device *dev, struct device_attribute *attr,
@@ -3018,7 +3018,7 @@ static DEVICE_ATTR_RW(control);
static ssize_t active_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
+ return sysfs_emit(buf, "%d\n", sched_smt_active());
}
static DEVICE_ATTR_RO(active);
@@ -3107,10 +3107,10 @@ const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
#ifdef CONFIG_INIT_ALL_POSSIBLE
-struct cpumask __cpu_possible_mask __read_mostly
+struct cpumask __cpu_possible_mask __ro_after_init
= {CPU_BITS_ALL};
#else
-struct cpumask __cpu_possible_mask __read_mostly;
+struct cpumask __cpu_possible_mask __ro_after_init;
#endif
EXPORT_SYMBOL(__cpu_possible_mask);
diff --git a/kernel/cred.c b/kernel/cred.c
index c033a201c808..075cfa7c896f 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -606,8 +606,8 @@ int set_cred_ucounts(struct cred *new)
void __init cred_init(void)
{
/* allocate a slab in which we can store credentials */
- cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
+ cred_jar = KMEM_CACHE(cred,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
}
/**
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 98b2e192fd69..4d543b1e9d57 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -286,7 +286,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
} else {
ret = page_address(page);
if (dma_set_decrypted(dev, ret, size))
- goto out_free_pages;
+ goto out_leak_pages;
}
memset(ret, 0, size);
@@ -307,6 +307,8 @@ out_encrypt_pages:
out_free_pages:
__dma_direct_free_pages(dev, page, size);
return NULL;
+out_leak_pages:
+ return NULL;
}
void dma_direct_free(struct device *dev, size_t size,
@@ -367,12 +369,11 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
ret = page_address(page);
if (dma_set_decrypted(dev, ret, size))
- goto out_free_pages;
+ goto out_leak_pages;
memset(ret, 0, size);
*dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
return page;
-out_free_pages:
- __dma_direct_free_pages(dev, page, size);
+out_leak_pages:
return NULL;
}
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index b079a9a8e087..77974cea3e69 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -956,6 +956,28 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
}
#endif /* CONFIG_DEBUG_FS */
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+#ifdef CONFIG_DEBUG_FS
+static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ atomic_long_add(nslots, &mem->transient_nslabs);
+}
+
+static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ atomic_long_sub(nslots, &mem->transient_nslabs);
+}
+
+#else /* !CONFIG_DEBUG_FS */
+static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
/**
* swiotlb_search_pool_area() - search one memory area in one pool
* @dev: Device which maps the buffer.
@@ -1170,6 +1192,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
spin_lock_irqsave(&dev->dma_io_tlb_lock, flags);
list_add_rcu(&pool->node, &dev->dma_io_tlb_pools);
spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
+ inc_transient_used(mem, pool->nslabs);
found:
WRITE_ONCE(dev->dma_uses_io_tlb, true);
@@ -1415,6 +1438,7 @@ static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr)
dec_used(dev->dma_io_tlb_mem, pool->nslabs);
swiotlb_del_pool(dev, pool);
+ dec_transient_used(dev->dma_io_tlb_mem, pool->nslabs);
return true;
}
@@ -1557,6 +1581,23 @@ phys_addr_t default_swiotlb_limit(void)
}
#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+static unsigned long mem_transient_used(struct io_tlb_mem *mem)
+{
+ return atomic_long_read(&mem->transient_nslabs);
+}
+
+static int io_tlb_transient_used_get(void *data, u64 *val)
+{
+ struct io_tlb_mem *mem = data;
+
+ *val = mem_transient_used(mem);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_transient_used, io_tlb_transient_used_get,
+ NULL, "%llu\n");
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
static int io_tlb_used_get(void *data, u64 *val)
{
@@ -1605,6 +1646,11 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
&fops_io_tlb_used);
debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
&fops_io_tlb_hiwater);
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ atomic_long_set(&mem->transient_nslabs, 0);
+ debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs,
+ mem, &fops_io_tlb_transient_used);
+#endif
}
static int __init swiotlb_create_default_debugfs(void)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f0f0f71213a1..724e6d7e128f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9302,10 +9302,6 @@ void perf_event_bpf_event(struct bpf_prog *prog,
{
struct perf_bpf_event bpf_event;
- if (type <= PERF_BPF_EVENT_UNKNOWN ||
- type >= PERF_BPF_EVENT_MAX)
- return;
-
switch (type) {
case PERF_BPF_EVENT_PROG_LOAD:
case PERF_BPF_EVENT_PROG_UNLOAD:
@@ -9313,7 +9309,7 @@ void perf_event_bpf_event(struct bpf_prog *prog,
perf_event_bpf_emit_ksymbols(prog, type);
break;
default:
- break;
+ return;
}
if (!atomic_read(&nr_bpf_events))
@@ -10557,7 +10553,7 @@ int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
(is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT))
return -EINVAL;
- if (prog->type == BPF_PROG_TYPE_KPROBE && prog->aux->sleepable && !is_uprobe)
+ if (prog->type == BPF_PROG_TYPE_KPROBE && prog->sleepable && !is_uprobe)
/* only uprobe programs are allowed to be sleepable */
return -EINVAL;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 485bb0389b48..929e98c62965 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -537,7 +537,7 @@ retry:
}
}
- ret = __replace_page(vma, vaddr, old_page, new_page);
+ ret = __replace_page(vma, vaddr & PAGE_MASK, old_page, new_page);
if (new_page)
put_page(new_page);
put_old:
diff --git a/kernel/exit.c b/kernel/exit.c
index 3988a02efaef..41a12630cbbc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -739,6 +739,13 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
tsk->exit_state = EXIT_ZOMBIE;
+ /*
+ * sub-thread or delay_group_leader(), wake up the
+ * PIDFD_THREAD waiters.
+ */
+ if (!thread_group_empty(tsk))
+ do_notify_pidfd(tsk);
+
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
thread_group_empty(tsk) &&
@@ -1127,17 +1134,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
* and nobody can change them.
*
* psig->stats_lock also protects us from our sub-threads
- * which can reap other children at the same time. Until
- * we change k_getrusage()-like users to rely on this lock
- * we have to take ->siglock as well.
+ * which can reap other children at the same time.
*
* We use thread_group_cputime_adjusted() to get times for
* the thread group, which consolidates times for all threads
* in the group including the group leader.
*/
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
- spin_lock_irq(&current->sighand->siglock);
- write_seqlock(&psig->stats_lock);
+ write_seqlock_irq(&psig->stats_lock);
psig->cutime += tgutime + sig->cutime;
psig->cstime += tgstime + sig->cstime;
psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
@@ -1160,8 +1164,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
psig->cmaxrss = maxrss;
task_io_accounting_add(&psig->ioac, &p->ioac);
task_io_accounting_add(&psig->ioac, &sig->ioac);
- write_sequnlock(&psig->stats_lock);
- spin_unlock_irq(&current->sighand->siglock);
+ write_sequnlock_irq(&psig->stats_lock);
}
if (wo->wo_rusage)
@@ -1893,30 +1896,6 @@ Efault:
}
#endif
-/**
- * thread_group_exited - check that a thread group has exited
- * @pid: tgid of thread group to be checked.
- *
- * Test if the thread group represented by tgid has exited (all
- * threads are zombies, dead or completely gone).
- *
- * Return: true if the thread group has exited. false otherwise.
- */
-bool thread_group_exited(struct pid *pid)
-{
- struct task_struct *task;
- bool exited;
-
- rcu_read_lock();
- task = pid_task(pid, PIDTYPE_PID);
- exited = !task ||
- (READ_ONCE(task->exit_state) && thread_group_empty(task));
- rcu_read_unlock();
-
- return exited;
-}
-EXPORT_SYMBOL(thread_group_exited);
-
/*
* This needs to be __function_aligned as GCC implicitly makes any
* implementation of abort() cold and drops alignment specified by
diff --git a/kernel/fork.c b/kernel/fork.c
index 47ff3b35352e..39a5046c2f0b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -101,6 +101,8 @@
#include <linux/user_events.h>
#include <linux/iommu.h>
#include <linux/rseq.h>
+#include <uapi/linux/pidfd.h>
+#include <linux/pidfs.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -1748,6 +1750,7 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
if (clone_flags & CLONE_FS) {
/* tsk->fs is already what we want */
spin_lock(&fs->lock);
+ /* "users" and "in_exec" locked for check_unsafe_exec() */
if (fs->in_exec) {
spin_unlock(&fs->lock);
return -EAGAIN;
@@ -1975,6 +1978,7 @@ static inline void rcu_copy_process(struct task_struct *p)
p->rcu_tasks_holdout = false;
INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
p->rcu_tasks_idle_cpu = -1;
+ INIT_LIST_HEAD(&p->rcu_tasks_exit_list);
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU
p->trc_reader_nesting = 0;
@@ -1984,119 +1988,6 @@ static inline void rcu_copy_process(struct task_struct *p)
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}
-struct pid *pidfd_pid(const struct file *file)
-{
- if (file->f_op == &pidfd_fops)
- return file->private_data;
-
- return ERR_PTR(-EBADF);
-}
-
-static int pidfd_release(struct inode *inode, struct file *file)
-{
- struct pid *pid = file->private_data;
-
- file->private_data = NULL;
- put_pid(pid);
- return 0;
-}
-
-#ifdef CONFIG_PROC_FS
-/**
- * pidfd_show_fdinfo - print information about a pidfd
- * @m: proc fdinfo file
- * @f: file referencing a pidfd
- *
- * Pid:
- * This function will print the pid that a given pidfd refers to in the
- * pid namespace of the procfs instance.
- * If the pid namespace of the process is not a descendant of the pid
- * namespace of the procfs instance 0 will be shown as its pid. This is
- * similar to calling getppid() on a process whose parent is outside of
- * its pid namespace.
- *
- * NSpid:
- * If pid namespaces are supported then this function will also print
- * the pid of a given pidfd refers to for all descendant pid namespaces
- * starting from the current pid namespace of the instance, i.e. the
- * Pid field and the first entry in the NSpid field will be identical.
- * If the pid namespace of the process is not a descendant of the pid
- * namespace of the procfs instance 0 will be shown as its first NSpid
- * entry and no others will be shown.
- * Note that this differs from the Pid and NSpid fields in
- * /proc/<pid>/status where Pid and NSpid are always shown relative to
- * the pid namespace of the procfs instance. The difference becomes
- * obvious when sending around a pidfd between pid namespaces from a
- * different branch of the tree, i.e. where no ancestral relation is
- * present between the pid namespaces:
- * - create two new pid namespaces ns1 and ns2 in the initial pid
- * namespace (also take care to create new mount namespaces in the
- * new pid namespace and mount procfs)
- * - create a process with a pidfd in ns1
- * - send pidfd from ns1 to ns2
- * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid
- * have exactly one entry, which is 0
- */
-static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
-{
- struct pid *pid = f->private_data;
- struct pid_namespace *ns;
- pid_t nr = -1;
-
- if (likely(pid_has_task(pid, PIDTYPE_PID))) {
- ns = proc_pid_ns(file_inode(m->file)->i_sb);
- nr = pid_nr_ns(pid, ns);
- }
-
- seq_put_decimal_ll(m, "Pid:\t", nr);
-
-#ifdef CONFIG_PID_NS
- seq_put_decimal_ll(m, "\nNSpid:\t", nr);
- if (nr > 0) {
- int i;
-
- /* If nr is non-zero it means that 'pid' is valid and that
- * ns, i.e. the pid namespace associated with the procfs
- * instance, is in the pid namespace hierarchy of pid.
- * Start at one below the already printed level.
- */
- for (i = ns->level + 1; i <= pid->level; i++)
- seq_put_decimal_ll(m, "\t", pid->numbers[i].nr);
- }
-#endif
- seq_putc(m, '\n');
-}
-#endif
-
-/*
- * Poll support for process exit notification.
- */
-static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
-{
- struct pid *pid = file->private_data;
- __poll_t poll_flags = 0;
-
- poll_wait(file, &pid->wait_pidfd, pts);
-
- /*
- * Inform pollers only when the whole thread group exits.
- * If the thread group leader exits before all other threads in the
- * group, then poll(2) should block, similar to the wait(2) family.
- */
- if (thread_group_exited(pid))
- poll_flags = EPOLLIN | EPOLLRDNORM;
-
- return poll_flags;
-}
-
-const struct file_operations pidfd_fops = {
- .release = pidfd_release,
- .poll = pidfd_poll,
-#ifdef CONFIG_PROC_FS
- .show_fdinfo = pidfd_show_fdinfo,
-#endif
-};
-
/**
* __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
* @pid: the struct pid for which to create a pidfd
@@ -2130,20 +2021,20 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
int pidfd;
struct file *pidfd_file;
- if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
- return -EINVAL;
-
- pidfd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+ pidfd = get_unused_fd_flags(O_CLOEXEC);
if (pidfd < 0)
return pidfd;
- pidfd_file = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
- flags | O_RDWR | O_CLOEXEC);
+ pidfd_file = pidfs_alloc_file(pid, flags | O_RDWR);
if (IS_ERR(pidfd_file)) {
put_unused_fd(pidfd);
return PTR_ERR(pidfd_file);
}
- get_pid(pid); /* held by pidfd_file now */
+ /*
+ * anon_inode_getfile() ignores everything outside of the
+ * O_ACCMODE | O_NONBLOCK mask, set PIDFD_THREAD manually.
+ */
+ pidfd_file->f_flags |= (flags & PIDFD_THREAD);
*ret = pidfd_file;
return pidfd;
}
@@ -2157,7 +2048,8 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
* caller's file descriptor table. The pidfd is reserved but not installed yet.
*
- * The helper verifies that @pid is used as a thread group leader.
+ * The helper verifies that @pid is still in use, without PIDFD_THREAD the
+ * task identified by @pid must be a thread-group leader.
*
* If this function returns successfully the caller is responsible to either
* call fd_install() passing the returned pidfd and pidfd file as arguments in
@@ -2176,7 +2068,9 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
*/
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
- if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
+ bool thread = flags & PIDFD_THREAD;
+
+ if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID))
return -EINVAL;
return __pidfd_prepare(pid, flags, ret);
@@ -2298,9 +2192,8 @@ __latent_entropy struct task_struct *copy_process(
/*
* - CLONE_DETACHED is blocked so that we can potentially
* reuse it later for CLONE_PIDFD.
- * - CLONE_THREAD is blocked until someone really needs it.
*/
- if (clone_flags & (CLONE_DETACHED | CLONE_THREAD))
+ if (clone_flags & CLONE_DETACHED)
return ERR_PTR(-EINVAL);
}
@@ -2523,8 +2416,10 @@ __latent_entropy struct task_struct *copy_process(
* if the fd table isn't shared).
*/
if (clone_flags & CLONE_PIDFD) {
+ int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0;
+
/* Note that no task has been attached to @pid yet. */
- retval = __pidfd_prepare(pid, O_RDWR | O_CLOEXEC, &pidfile);
+ retval = __pidfd_prepare(pid, flags, &pidfile);
if (retval < 0)
goto bad_fork_free_pid;
pidfd = retval;
@@ -2875,8 +2770,8 @@ pid_t kernel_clone(struct kernel_clone_args *args)
* here has the advantage that we don't need to have a separate helper
* to check for legacy clone().
*/
- if ((args->flags & CLONE_PIDFD) &&
- (args->flags & CLONE_PARENT_SETTID) &&
+ if ((clone_flags & CLONE_PIDFD) &&
+ (clone_flags & CLONE_PARENT_SETTID) &&
(args->pidfd == args->parent_tid))
return -EINVAL;
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index e0e853412c15..1e78ef24321e 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -627,12 +627,21 @@ retry:
}
/*
- * PI futexes can not be requeued and must remove themselves from the
- * hash bucket. The hash bucket lock (i.e. lock_ptr) is held.
+ * PI futexes can not be requeued and must remove themselves from the hash
+ * bucket. The hash bucket lock (i.e. lock_ptr) is held.
*/
void futex_unqueue_pi(struct futex_q *q)
{
- __futex_unqueue(q);
+ /*
+ * If the lock was not acquired (due to timeout or signal) then the
+ * rt_waiter is removed before futex_q is. If this is observed by
+ * an unlocker after dropping the rtmutex wait lock and before
+ * acquiring the hash bucket lock, then the unlocker dequeues the
+ * futex_q from the hash bucket list to guarantee consistent state
+ * vs. userspace. Therefore the dequeue here must be conditional.
+ */
+ if (!plist_node_empty(&q->list))
+ __futex_unqueue(q);
BUG_ON(!q->pi_state);
put_pi_state(q->pi_state);
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index 90e5197f4e56..5722467f2737 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -1135,6 +1135,7 @@ retry:
hb = futex_hash(&key);
spin_lock(&hb->lock);
+retry_hb:
/*
* Check waiters first. We do not trust user space values at
@@ -1177,12 +1178,17 @@ retry:
/*
* Futex vs rt_mutex waiter state -- if there are no rt_mutex
* waiters even though futex thinks there are, then the waiter
- * is leaving and the uncontended path is safe to take.
+ * is leaving. The entry needs to be removed from the list so a
+ * new futex_lock_pi() is not using this stale PI-state while
+ * the futex is available in user space again.
+ * There can be more than one task on its way out so it needs
+ * to retry.
*/
rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
if (!rt_waiter) {
+ __futex_unqueue(top_waiter);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
- goto do_uncontended;
+ goto retry_hb;
}
get_pi_state(pi_state);
@@ -1217,7 +1223,6 @@ retry:
return ret;
}
-do_uncontended:
/*
* We have no kernel internal state, i.e. no waiters in the
* kernel. Waiters which are about to queue themselves are stuck
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index dd76323ea3fd..38d6ae651ac7 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -4,10 +4,11 @@
* Copyright (C) 2020 Bartosz Golaszewski <bgolaszewski@baylibre.com>
*/
+#include <linux/cleanup.h>
+#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/irq_sim.h>
#include <linux/irq_work.h>
-#include <linux/interrupt.h>
#include <linux/slab.h>
struct irq_sim_work_ctx {
@@ -19,7 +20,6 @@ struct irq_sim_work_ctx {
};
struct irq_sim_irq_ctx {
- int irqnum;
bool enabled;
struct irq_sim_work_ctx *work_ctx;
};
@@ -164,33 +164,27 @@ static const struct irq_domain_ops irq_sim_domain_ops = {
struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode,
unsigned int num_irqs)
{
- struct irq_sim_work_ctx *work_ctx;
+ struct irq_sim_work_ctx *work_ctx __free(kfree) =
+ kmalloc(sizeof(*work_ctx), GFP_KERNEL);
- work_ctx = kmalloc(sizeof(*work_ctx), GFP_KERNEL);
if (!work_ctx)
- goto err_out;
+ return ERR_PTR(-ENOMEM);
- work_ctx->pending = bitmap_zalloc(num_irqs, GFP_KERNEL);
- if (!work_ctx->pending)
- goto err_free_work_ctx;
+ unsigned long *pending __free(bitmap) = bitmap_zalloc(num_irqs, GFP_KERNEL);
+ if (!pending)
+ return ERR_PTR(-ENOMEM);
work_ctx->domain = irq_domain_create_linear(fwnode, num_irqs,
&irq_sim_domain_ops,
work_ctx);
if (!work_ctx->domain)
- goto err_free_bitmap;
+ return ERR_PTR(-ENOMEM);
work_ctx->irq_count = num_irqs;
work_ctx->work = IRQ_WORK_INIT_HARD(irq_sim_handle_irq);
+ work_ctx->pending = no_free_ptr(pending);
- return work_ctx->domain;
-
-err_free_bitmap:
- bitmap_free(work_ctx->pending);
-err_free_work_ctx:
- kfree(work_ctx);
-err_out:
- return ERR_PTR(-ENOMEM);
+ return no_free_ptr(work_ctx)->domain;
}
EXPORT_SYMBOL_GPL(irq_domain_create_sim);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 27ca1c866f29..4c6b32318ce3 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -92,11 +92,23 @@ static void desc_smp_init(struct irq_desc *desc, int node,
#endif
}
+static void free_masks(struct irq_desc *desc)
+{
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ free_cpumask_var(desc->pending_mask);
+#endif
+ free_cpumask_var(desc->irq_common_data.affinity);
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
+}
+
#else
static inline int
alloc_masks(struct irq_desc *desc, int node) { return 0; }
static inline void
desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { }
+static inline void free_masks(struct irq_desc *desc) { }
#endif
static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
@@ -166,6 +178,39 @@ static void delete_irq_desc(unsigned int irq)
}
#ifdef CONFIG_SPARSE_IRQ
+static const struct kobj_type irq_kobj_type;
+#endif
+
+static int init_desc(struct irq_desc *desc, int irq, int node,
+ unsigned int flags,
+ const struct cpumask *affinity,
+ struct module *owner)
+{
+ desc->kstat_irqs = alloc_percpu(unsigned int);
+ if (!desc->kstat_irqs)
+ return -ENOMEM;
+
+ if (alloc_masks(desc, node)) {
+ free_percpu(desc->kstat_irqs);
+ return -ENOMEM;
+ }
+
+ raw_spin_lock_init(&desc->lock);
+ lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+ mutex_init(&desc->request_mutex);
+ init_waitqueue_head(&desc->wait_for_threads);
+ desc_set_defaults(irq, desc, node, affinity, owner);
+ irqd_set(&desc->irq_data, flags);
+ irq_resend_init(desc);
+#ifdef CONFIG_SPARSE_IRQ
+ kobject_init(&desc->kobj, &irq_kobj_type);
+ init_rcu_head(&desc->rcu);
+#endif
+
+ return 0;
+}
+
+#ifdef CONFIG_SPARSE_IRQ
static void irq_kobj_release(struct kobject *kobj);
@@ -384,21 +429,6 @@ struct irq_desc *irq_to_desc(unsigned int irq)
EXPORT_SYMBOL_GPL(irq_to_desc);
#endif
-#ifdef CONFIG_SMP
-static void free_masks(struct irq_desc *desc)
-{
-#ifdef CONFIG_GENERIC_PENDING_IRQ
- free_cpumask_var(desc->pending_mask);
-#endif
- free_cpumask_var(desc->irq_common_data.affinity);
-#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
- free_cpumask_var(desc->irq_common_data.effective_affinity);
-#endif
-}
-#else
-static inline void free_masks(struct irq_desc *desc) { }
-#endif
-
void irq_lock_sparse(void)
{
mutex_lock(&sparse_irq_lock);
@@ -414,36 +444,19 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
struct module *owner)
{
struct irq_desc *desc;
+ int ret;
desc = kzalloc_node(sizeof(*desc), GFP_KERNEL, node);
if (!desc)
return NULL;
- /* allocate based on nr_cpu_ids */
- desc->kstat_irqs = alloc_percpu(unsigned int);
- if (!desc->kstat_irqs)
- goto err_desc;
-
- if (alloc_masks(desc, node))
- goto err_kstat;
- raw_spin_lock_init(&desc->lock);
- lockdep_set_class(&desc->lock, &irq_desc_lock_class);
- mutex_init(&desc->request_mutex);
- init_rcu_head(&desc->rcu);
- init_waitqueue_head(&desc->wait_for_threads);
-
- desc_set_defaults(irq, desc, node, affinity, owner);
- irqd_set(&desc->irq_data, flags);
- kobject_init(&desc->kobj, &irq_kobj_type);
- irq_resend_init(desc);
+ ret = init_desc(desc, irq, node, flags, affinity, owner);
+ if (unlikely(ret)) {
+ kfree(desc);
+ return NULL;
+ }
return desc;
-
-err_kstat:
- free_percpu(desc->kstat_irqs);
-err_desc:
- kfree(desc);
- return NULL;
}
static void irq_kobj_release(struct kobject *kobj)
@@ -583,26 +596,29 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
int __init early_irq_init(void)
{
int count, i, node = first_online_node;
- struct irq_desc *desc;
+ int ret;
init_irq_default_affinity();
printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS);
- desc = irq_desc;
count = ARRAY_SIZE(irq_desc);
for (i = 0; i < count; i++) {
- desc[i].kstat_irqs = alloc_percpu(unsigned int);
- alloc_masks(&desc[i], node);
- raw_spin_lock_init(&desc[i].lock);
- lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
- mutex_init(&desc[i].request_mutex);
- init_waitqueue_head(&desc[i].wait_for_threads);
- desc_set_defaults(i, &desc[i], node, NULL, NULL);
- irq_resend_init(desc);
+ ret = init_desc(irq_desc + i, i, node, 0, NULL, NULL);
+ if (unlikely(ret))
+ goto __free_desc_res;
}
+
return arch_early_irq_init();
+
+__free_desc_res:
+ while (--i >= 0) {
+ free_masks(irq_desc + i);
+ free_percpu(irq_desc[i].kstat_irqs);
+ }
+
+ return ret;
}
struct irq_desc *irq_to_desc(unsigned int irq)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 0bdef4fe925b..3dd1c871e091 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -29,6 +29,7 @@ static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
unsigned int nr_irqs, int node, void *arg,
bool realloc, const struct irq_affinity_desc *affinity);
static void irq_domain_check_hierarchy(struct irq_domain *domain);
+static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq);
struct irqchip_fwid {
struct fwnode_handle fwnode;
@@ -448,7 +449,7 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
*/
mutex_lock(&irq_domain_mutex);
list_for_each_entry(h, &irq_domain_list, link) {
- if (h->ops->select && fwspec->param_count)
+ if (h->ops->select && bus_token != DOMAIN_BUS_ANY)
rc = h->ops->select(h, fwspec, bus_token);
else if (h->ops->match)
rc = h->ops->match(h, to_of_node(fwnode), bus_token);
@@ -858,8 +859,13 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
}
if (irq_domain_is_hierarchy(domain)) {
- virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE,
- fwspec, false, NULL);
+ if (irq_domain_is_msi_device(domain)) {
+ mutex_unlock(&domain->root->mutex);
+ virq = msi_device_domain_alloc_wired(domain, hwirq, type);
+ mutex_lock(&domain->root->mutex);
+ } else
+ virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE,
+ fwspec, false, NULL);
if (virq <= 0) {
virq = 0;
goto out;
@@ -914,7 +920,7 @@ void irq_dispose_mapping(unsigned int virq)
return;
if (irq_domain_is_hierarchy(domain)) {
- irq_domain_free_irqs(virq, 1);
+ irq_domain_free_one_irq(domain, virq);
} else {
irq_domain_disassociate(domain, virq);
irq_free_desc(virq);
@@ -1755,6 +1761,14 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs)
irq_free_descs(virq, nr_irqs);
}
+static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq)
+{
+ if (irq_domain_is_msi_device(domain))
+ msi_device_domain_free_wired(domain, virq);
+ else
+ irq_domain_free_irqs(virq, 1);
+}
+
/**
* irq_domain_alloc_irqs_parent - Allocate interrupts from parent domain
* @domain: Domain below which interrupts must be allocated
@@ -1907,9 +1921,9 @@ static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
return -EINVAL;
}
-static void irq_domain_check_hierarchy(struct irq_domain *domain)
-{
-}
+static void irq_domain_check_hierarchy(struct irq_domain *domain) { }
+static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq) { }
+
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1782f90cd8c6..ad3eaf2ab959 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -192,10 +192,14 @@ void irq_set_thread_affinity(struct irq_desc *desc)
struct irqaction *action;
for_each_action_of_desc(desc, action) {
- if (action->thread)
+ if (action->thread) {
set_bit(IRQTF_AFFINITY, &action->thread_flags);
- if (action->secondary && action->secondary->thread)
+ wake_up_process(action->thread);
+ }
+ if (action->secondary && action->secondary->thread) {
set_bit(IRQTF_AFFINITY, &action->secondary->thread_flags);
+ wake_up_process(action->secondary->thread);
+ }
}
}
@@ -1049,10 +1053,57 @@ static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id)
return IRQ_NONE;
}
-static int irq_wait_for_interrupt(struct irqaction *action)
+#ifdef CONFIG_SMP
+/*
+ * Check whether we need to change the affinity of the interrupt thread.
+ */
+static void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
+{
+ cpumask_var_t mask;
+ bool valid = false;
+
+ if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags))
+ return;
+
+ __set_current_state(TASK_RUNNING);
+
+ /*
+ * In case we are out of memory we set IRQTF_AFFINITY again and
+ * try again next time
+ */
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
+ set_bit(IRQTF_AFFINITY, &action->thread_flags);
+ return;
+ }
+
+ raw_spin_lock_irq(&desc->lock);
+ /*
+ * This code is triggered unconditionally. Check the affinity
+ * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
+ */
+ if (cpumask_available(desc->irq_common_data.affinity)) {
+ const struct cpumask *m;
+
+ m = irq_data_get_effective_affinity_mask(&desc->irq_data);
+ cpumask_copy(mask, m);
+ valid = true;
+ }
+ raw_spin_unlock_irq(&desc->lock);
+
+ if (valid)
+ set_cpus_allowed_ptr(current, mask);
+ free_cpumask_var(mask);
+}
+#else
+static inline void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
+#endif
+
+static int irq_wait_for_interrupt(struct irq_desc *desc,
+ struct irqaction *action)
{
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
+ irq_thread_check_affinity(desc, action);
if (kthread_should_stop()) {
/* may need to run one last time */
@@ -1129,52 +1180,6 @@ out_unlock:
chip_bus_sync_unlock(desc);
}
-#ifdef CONFIG_SMP
-/*
- * Check whether we need to change the affinity of the interrupt thread.
- */
-static void
-irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
-{
- cpumask_var_t mask;
- bool valid = true;
-
- if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags))
- return;
-
- /*
- * In case we are out of memory we set IRQTF_AFFINITY again and
- * try again next time
- */
- if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
- set_bit(IRQTF_AFFINITY, &action->thread_flags);
- return;
- }
-
- raw_spin_lock_irq(&desc->lock);
- /*
- * This code is triggered unconditionally. Check the affinity
- * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
- */
- if (cpumask_available(desc->irq_common_data.affinity)) {
- const struct cpumask *m;
-
- m = irq_data_get_effective_affinity_mask(&desc->irq_data);
- cpumask_copy(mask, m);
- } else {
- valid = false;
- }
- raw_spin_unlock_irq(&desc->lock);
-
- if (valid)
- set_cpus_allowed_ptr(current, mask);
- free_cpumask_var(mask);
-}
-#else
-static inline void
-irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
-#endif
-
/*
* Interrupts which are not explicitly requested as threaded
* interrupts rely on the implicit bh/preempt disable of the hard irq
@@ -1312,13 +1317,9 @@ static int irq_thread(void *data)
init_task_work(&on_exit_work, irq_thread_dtor);
task_work_add(current, &on_exit_work, TWA_NONE);
- irq_thread_check_affinity(desc, action);
-
- while (!irq_wait_for_interrupt(action)) {
+ while (!irq_wait_for_interrupt(desc, action)) {
irqreturn_t action_ret;
- irq_thread_check_affinity(desc, action);
-
action_ret = handler_fn(desc, action);
if (action_ret == IRQ_WAKE_THREAD)
irq_wake_secondary(desc, action);
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 75d0ae490e29..8f222d1cccec 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -8,8 +8,6 @@
#include <linux/cpu.h>
#include <linux/irq.h>
-#define IRQ_MATRIX_SIZE (BITS_TO_LONGS(IRQ_MATRIX_BITS))
-
struct cpumap {
unsigned int available;
unsigned int allocated;
@@ -17,8 +15,8 @@ struct cpumap {
unsigned int managed_allocated;
bool initialized;
bool online;
- unsigned long alloc_map[IRQ_MATRIX_SIZE];
- unsigned long managed_map[IRQ_MATRIX_SIZE];
+ unsigned long *managed_map;
+ unsigned long alloc_map[];
};
struct irq_matrix {
@@ -32,8 +30,8 @@ struct irq_matrix {
unsigned int total_allocated;
unsigned int online_maps;
struct cpumap __percpu *maps;
- unsigned long scratch_map[IRQ_MATRIX_SIZE];
- unsigned long system_map[IRQ_MATRIX_SIZE];
+ unsigned long *system_map;
+ unsigned long scratch_map[];
};
#define CREATE_TRACE_POINTS
@@ -50,24 +48,32 @@ __init struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits,
unsigned int alloc_start,
unsigned int alloc_end)
{
+ unsigned int cpu, matrix_size = BITS_TO_LONGS(matrix_bits);
struct irq_matrix *m;
- if (matrix_bits > IRQ_MATRIX_BITS)
- return NULL;
-
- m = kzalloc(sizeof(*m), GFP_KERNEL);
+ m = kzalloc(struct_size(m, scratch_map, matrix_size * 2), GFP_KERNEL);
if (!m)
return NULL;
+ m->system_map = &m->scratch_map[matrix_size];
+
m->matrix_bits = matrix_bits;
m->alloc_start = alloc_start;
m->alloc_end = alloc_end;
m->alloc_size = alloc_end - alloc_start;
- m->maps = alloc_percpu(*m->maps);
+ m->maps = __alloc_percpu(struct_size(m->maps, alloc_map, matrix_size * 2),
+ __alignof__(*m->maps));
if (!m->maps) {
kfree(m);
return NULL;
}
+
+ for_each_possible_cpu(cpu) {
+ struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
+
+ cm->managed_map = &cm->alloc_map[matrix_size];
+ }
+
return m;
}
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 79b4a58ba9c3..f90952ebc494 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -726,11 +726,26 @@ static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
+static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec,
+ irq_hw_number_t *hwirq, unsigned int *type)
+{
+ struct msi_domain_info *info = domain->host_data;
+
+ /*
+ * This will catch allocations through the regular irqdomain path except
+ * for MSI domains which really support this, e.g. MBIGEN.
+ */
+ if (!info->ops->msi_translate)
+ return -ENOTSUPP;
+ return info->ops->msi_translate(domain, fwspec, hwirq, type);
+}
+
static const struct irq_domain_ops msi_domain_ops = {
.alloc = msi_domain_alloc,
.free = msi_domain_free,
.activate = msi_domain_activate,
.deactivate = msi_domain_deactivate,
+ .translate = msi_domain_translate,
};
static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
@@ -830,8 +845,11 @@ static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0,
fwnode, &msi_domain_ops, info);
- if (domain)
+ if (domain) {
irq_domain_update_bus_token(domain, info->bus_token);
+ if (info->flags & MSI_FLAG_PARENT_PM_DEV)
+ domain->pm_dev = parent->pm_dev;
+ }
return domain;
}
@@ -945,9 +963,9 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
void *chip_data)
{
struct irq_domain *domain, *parent = dev->msi.domain;
- const struct msi_parent_ops *pops;
+ struct fwnode_handle *fwnode, *fwnalloced = NULL;
struct msi_domain_template *bundle;
- struct fwnode_handle *fwnode;
+ const struct msi_parent_ops *pops;
if (!irq_domain_is_msi_parent(parent))
return false;
@@ -970,7 +988,19 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
pops->prefix ? : "", bundle->chip.name, dev_name(dev));
bundle->chip.name = bundle->name;
- fwnode = irq_domain_alloc_named_fwnode(bundle->name);
+ /*
+ * Using the device firmware node is required for wire to MSI
+ * device domains so that the existing firmware results in a domain
+ * match.
+ * All other device domains like PCI/MSI use the named firmware
+ * node as they are not guaranteed to have a fwnode. They are never
+ * looked up and always handled in the context of the device.
+ */
+ if (bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE)
+ fwnode = dev->fwnode;
+ else
+ fwnode = fwnalloced = irq_domain_alloc_named_fwnode(bundle->name);
+
if (!fwnode)
goto free_bundle;
@@ -997,7 +1027,7 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
fail:
msi_unlock_descs(dev);
free_fwnode:
- irq_domain_free_fwnode(fwnode);
+ irq_domain_free_fwnode(fwnalloced);
free_bundle:
kfree(bundle);
return false;
@@ -1431,34 +1461,10 @@ int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int
return msi_domain_alloc_locked(dev, &ctrl);
}
-/**
- * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
- * a given index - or at the next free index
- *
- * @dev: Pointer to device struct of the device for which the interrupts
- * are allocated
- * @domid: Id of the interrupt domain to operate on
- * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation
- * uses the next free index.
- * @affdesc: Optional pointer to an interrupt affinity descriptor structure
- * @icookie: Optional pointer to a domain specific per instance cookie. If
- * non-NULL the content of the cookie is stored in msi_desc::data.
- * Must be NULL for MSI-X allocations
- *
- * This requires a MSI interrupt domain which lets the core code manage the
- * MSI descriptors.
- *
- * Return: struct msi_map
- *
- * On success msi_map::index contains the allocated index number and
- * msi_map::virq the corresponding Linux interrupt number
- *
- * On failure msi_map::index contains the error code and msi_map::virq
- * is %0.
- */
-struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
- const struct irq_affinity_desc *affdesc,
- union msi_instance_cookie *icookie)
+static struct msi_map __msi_domain_alloc_irq_at(struct device *dev, unsigned int domid,
+ unsigned int index,
+ const struct irq_affinity_desc *affdesc,
+ union msi_instance_cookie *icookie)
{
struct msi_ctrl ctrl = { .domid = domid, .nirqs = 1, };
struct irq_domain *domain;
@@ -1466,17 +1472,16 @@ struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, u
struct msi_desc *desc;
int ret;
- msi_lock_descs(dev);
domain = msi_get_device_domain(dev, domid);
if (!domain) {
map.index = -ENODEV;
- goto unlock;
+ return map;
}
desc = msi_alloc_desc(dev, 1, affdesc);
if (!desc) {
map.index = -ENOMEM;
- goto unlock;
+ return map;
}
if (icookie)
@@ -1485,7 +1490,7 @@ struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, u
ret = msi_insert_desc(dev, desc, domid, index);
if (ret) {
map.index = ret;
- goto unlock;
+ return map;
}
ctrl.first = ctrl.last = desc->msi_index;
@@ -1498,11 +1503,90 @@ struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, u
map.index = desc->msi_index;
map.virq = desc->irq;
}
-unlock:
+ return map;
+}
+
+/**
+ * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
+ * a given index - or at the next free index
+ *
+ * @dev: Pointer to device struct of the device for which the interrupts
+ * are allocated
+ * @domid: Id of the interrupt domain to operate on
+ * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation
+ * uses the next free index.
+ * @affdesc: Optional pointer to an interrupt affinity descriptor structure
+ * @icookie: Optional pointer to a domain specific per instance cookie. If
+ * non-NULL the content of the cookie is stored in msi_desc::data.
+ * Must be NULL for MSI-X allocations
+ *
+ * This requires a MSI interrupt domain which lets the core code manage the
+ * MSI descriptors.
+ *
+ * Return: struct msi_map
+ *
+ * On success msi_map::index contains the allocated index number and
+ * msi_map::virq the corresponding Linux interrupt number
+ *
+ * On failure msi_map::index contains the error code and msi_map::virq
+ * is %0.
+ */
+struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
+ const struct irq_affinity_desc *affdesc,
+ union msi_instance_cookie *icookie)
+{
+ struct msi_map map;
+
+ msi_lock_descs(dev);
+ map = __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
msi_unlock_descs(dev);
return map;
}
+/**
+ * msi_device_domain_alloc_wired - Allocate a "wired" interrupt on @domain
+ * @domain: The domain to allocate on
+ * @hwirq: The hardware interrupt number to allocate for
+ * @type: The interrupt type
+ *
+ * This weirdness supports wire to MSI controllers like MBIGEN.
+ *
+ * @hwirq is the hardware interrupt number which is handed in from
+ * irq_create_fwspec_mapping(). As the wire to MSI domain is sparse, but
+ * sized in firmware, the hardware interrupt number cannot be used as MSI
+ * index. For the underlying irq chip the MSI index is irrelevant and
+ * all it needs is the hardware interrupt number.
+ *
+ * To handle this the MSI index is allocated with MSI_ANY_INDEX and the
+ * hardware interrupt number is stored along with the type information in
+ * msi_desc::cookie so the underlying interrupt chip and domain code can
+ * retrieve it.
+ *
+ * Return: The Linux interrupt number (> 0) or an error code
+ */
+int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
+ unsigned int type)
+{
+ unsigned int domid = MSI_DEFAULT_DOMAIN;
+ union msi_instance_cookie icookie = { };
+ struct device *dev = domain->dev;
+ struct msi_map map = { };
+
+ if (WARN_ON_ONCE(!dev || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
+ return -EINVAL;
+
+ icookie.value = ((u64)type << 32) | hwirq;
+
+ msi_lock_descs(dev);
+ if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain))
+ map.index = -EINVAL;
+ else
+ map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie);
+ msi_unlock_descs(dev);
+
+ return map.index >= 0 ? map.virq : map.index;
+}
+
static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
struct msi_ctrl *ctrl)
{
@@ -1629,6 +1713,30 @@ void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
}
/**
+ * msi_device_domain_free_wired - Free a wired interrupt in @domain
+ * @domain: The domain to free the interrupt on
+ * @virq: The Linux interrupt number to free
+ *
+ * This is the counterpart of msi_device_domain_alloc_wired() for the
+ * weird wired to MSI converting domains.
+ */
+void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
+{
+ struct msi_desc *desc = irq_get_msi_desc(virq);
+ struct device *dev = domain->dev;
+
+ if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
+ return;
+
+ msi_lock_descs(dev);
+ if (!WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain)) {
+ msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index,
+ desc->msi_index);
+ }
+ msi_unlock_descs(dev);
+}
+
+/**
* msi_get_domain_info - Get the MSI interrupt domain info for @domain
* @domain: The interrupt domain to retrieve data from
*
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d5a0ee40bf66..9d9095e81792 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1993,7 +1993,7 @@ NOKPROBE_SYMBOL(__kretprobe_find_ret_addr);
unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
struct llist_node **cur)
{
- struct kretprobe_instance *ri = NULL;
+ struct kretprobe_instance *ri;
kprobe_opcode_t *ret;
if (WARN_ON_ONCE(!cur))
@@ -2802,7 +2802,7 @@ static int show_kprobe_addr(struct seq_file *pi, void *v)
{
struct hlist_head *head;
struct kprobe *p, *kp;
- const char *sym = NULL;
+ const char *sym;
unsigned int i = *(loff_t *) v;
unsigned long offset = 0;
char *modname, namebuf[KSYM_NAME_LEN];
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 185bd1c906b0..6083883c4fe0 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -223,9 +223,10 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem)
void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
{
+ bool contended = false;
+
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
- trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
/* Notify readers to take the slow path. */
rcu_sync_enter(&sem->rss);
@@ -234,8 +235,11 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
* Try set sem->block; this provides writer-writer exclusion.
* Having sem->block set makes new readers block.
*/
- if (!__percpu_down_write_trylock(sem))
+ if (!__percpu_down_write_trylock(sem)) {
+ trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
percpu_rwsem_wait(sem, /* .reader = */ false);
+ contended = true;
+ }
/* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
@@ -247,7 +251,8 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
/* Wait for all active readers to complete. */
rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
- trace_contention_end(sem, 0);
+ if (contended)
+ trace_contention_end(sem, 0);
}
EXPORT_SYMBOL_GPL(percpu_down_write);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 6a0184e9c234..ae2b12f68b90 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -294,8 +294,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
{
struct pv_node *pn = (struct pv_node *)node;
struct pv_node *pp = (struct pv_node *)prev;
+ bool __maybe_unused wait_early;
int loop;
- bool wait_early;
for (;;) {
for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 4a10e8c16fd2..88d08eeb8bc0 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -237,12 +237,13 @@ static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
*/
static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
{
- unsigned long owner, *p = (unsigned long *) &lock->owner;
+ unsigned long *p = (unsigned long *) &lock->owner;
+ unsigned long owner, new;
+ owner = READ_ONCE(*p);
do {
- owner = *p;
- } while (cmpxchg_relaxed(p, owner,
- owner | RT_MUTEX_HAS_WAITERS) != owner);
+ new = owner | RT_MUTEX_HAS_WAITERS;
+ } while (!try_cmpxchg_relaxed(p, &owner, new));
/*
* The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 2340b6d90ec6..c6d17aee4209 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -35,7 +35,7 @@
/*
* The least significant 2 bits of the owner value has the following
* meanings when set.
- * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
+ * - Bit 0: RWSEM_READER_OWNED - rwsem may be owned by readers (just a hint)
* - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock
*
* When the rwsem is reader-owned and a spinning writer has timed out,
@@ -1002,8 +1002,8 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
/*
* To prevent a constant stream of readers from starving a sleeping
- * waiter, don't attempt optimistic lock stealing if the lock is
- * currently owned by readers.
+ * writer, don't attempt optimistic lock stealing if the lock is
+ * very likely owned by readers.
*/
if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) &&
(rcnt > 1) && !(count & RWSEM_WRITER_LOCKED))
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 15781acaac1c..6ec3deec68c2 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -573,7 +573,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, flags)
if (proc_ns_file(f.file))
err = validate_ns(&nsset, ns);
else
- err = validate_nsset(&nsset, f.file->private_data);
+ err = validate_nsset(&nsset, pidfd_pid(f.file));
if (!err) {
commit_nsset(&nsset);
perf_event_namespaces(current);
diff --git a/kernel/panic.c b/kernel/panic.c
index 2807639aab51..f22d8f33ea14 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -446,6 +446,14 @@ void panic(const char *fmt, ...)
/* Do not scroll important messages printed above */
suppress_printk = 1;
+
+ /*
+ * The final messages may not have been printed if in a context that
+ * defers printing (such as NMI) and irq_work is not available.
+ * Explicitly flush the kernel log buffer one last time.
+ */
+ console_flush_on_panic(CONSOLE_FLUSH_PENDING);
+
local_irq_enable();
for (i = 0; ; i += PANIC_TIMER_STEP) {
touch_softlockup_watchdog();
diff --git a/kernel/pid.c b/kernel/pid.c
index b52b10865454..99a0c5eb24b8 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -42,6 +42,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/idr.h>
+#include <linux/pidfs.h>
#include <net/sock.h>
#include <uapi/linux/pidfd.h>
@@ -65,6 +66,13 @@ int pid_max = PID_MAX_DEFAULT;
int pid_max_min = RESERVED_PIDS + 1;
int pid_max_max = PID_MAX_LIMIT;
+#ifdef CONFIG_FS_PID
+/*
+ * Pseudo filesystems start inode numbering after one. We use Reserved
+ * PIDs as a natural offset.
+ */
+static u64 pidfs_ino = RESERVED_PIDS;
+#endif
/*
* PID-map pages start out as NULL, they get allocated upon
@@ -272,6 +280,10 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
spin_lock_irq(&pidmap_lock);
if (!(ns->pid_allocated & PIDNS_ADDING))
goto out_unlock;
+#ifdef CONFIG_FS_PID
+ pid->stashed = NULL;
+ pid->ino = ++pidfs_ino;
+#endif
for ( ; upid >= pid->numbers; --upid) {
/* Make the PID visible to find_pid_ns. */
idr_replace(&upid->ns->idr, pid, upid->nr);
@@ -349,6 +361,11 @@ static void __change_pid(struct task_struct *task, enum pid_type type,
hlist_del_rcu(&task->pid_links[type]);
*pid_ptr = new;
+ if (type == PIDTYPE_PID) {
+ WARN_ON_ONCE(pid_has_task(pid, PIDTYPE_PID));
+ wake_up_all(&pid->wait_pidfd);
+ }
+
for (tmp = PIDTYPE_MAX; --tmp >= 0; )
if (pid_has_task(pid, tmp))
return;
@@ -391,8 +408,7 @@ void exchange_tids(struct task_struct *left, struct task_struct *right)
void transfer_pid(struct task_struct *old, struct task_struct *new,
enum pid_type type)
{
- if (type == PIDTYPE_PID)
- new->thread_pid = old->thread_pid;
+ WARN_ON_ONCE(type == PIDTYPE_PID);
hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
}
@@ -552,11 +568,6 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
* Return the task associated with @pidfd. The function takes a reference on
* the returned task. The caller is responsible for releasing that reference.
*
- * Currently, the process identified by @pidfd is always a thread-group leader.
- * This restriction currently exists for all aspects of pidfds including pidfd
- * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling
- * (only supports thread group leaders).
- *
* Return: On success, the task_struct associated with the pidfd.
* On error, a negative errno number will be returned.
*/
@@ -595,7 +606,7 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
* Return: On success, a cloexec pidfd is returned.
* On error, a negative errno number will be returned.
*/
-int pidfd_create(struct pid *pid, unsigned int flags)
+static int pidfd_create(struct pid *pid, unsigned int flags)
{
int pidfd;
struct file *pidfd_file;
@@ -615,11 +626,8 @@ int pidfd_create(struct pid *pid, unsigned int flags)
* @flags: flags to pass
*
* This creates a new pid file descriptor with the O_CLOEXEC flag set for
- * the process identified by @pid. Currently, the process identified by
- * @pid must be a thread-group leader. This restriction currently exists
- * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
- * be used with CLONE_THREAD) and pidfd polling (only supports thread group
- * leaders).
+ * the task identified by @pid. Without PIDFD_THREAD flag the target task
+ * must be a thread-group leader.
*
* Return: On success, a cloexec pidfd is returned.
* On error, a negative errno number will be returned.
@@ -629,7 +637,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
int fd;
struct pid *p;
- if (flags & ~PIDFD_NONBLOCK)
+ if (flags & ~(PIDFD_NONBLOCK | PIDFD_THREAD))
return -EINVAL;
if (pid <= 0)
@@ -682,7 +690,26 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd)
up_read(&task->signal->exec_update_lock);
- return file ?: ERR_PTR(-EBADF);
+ if (!file) {
+ /*
+ * It is possible that the target thread is exiting; it can be
+ * either:
+ * 1. before exit_signals(), which gives a real fd
+ * 2. before exit_files() takes the task_lock() gives a real fd
+ * 3. after exit_files() releases task_lock(), ->files is NULL;
+ * this has PF_EXITING, since it was set in exit_signals(),
+ * __pidfd_fget() returns EBADF.
+ * In case 3 we get EBADF, but that really means ESRCH, since
+ * the task is currently exiting and has freed its files
+ * struct, so we fix it up.
+ */
+ if (task->flags & PF_EXITING)
+ file = ERR_PTR(-ESRCH);
+ else
+ file = ERR_PTR(-EBADF);
+ }
+
+ return file;
}
static int pidfd_getfd(struct pid *pid, int fd)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 6053ddddaf65..692f12fe60c1 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -222,7 +222,7 @@ int swsusp_swap_in_use(void)
*/
static unsigned short root_swap = 0xffff;
-static struct bdev_handle *hib_resume_bdev_handle;
+static struct file *hib_resume_bdev_file;
struct hib_bio_batch {
atomic_t count;
@@ -276,7 +276,7 @@ static int hib_submit_io(blk_opf_t opf, pgoff_t page_off, void *addr,
struct bio *bio;
int error = 0;
- bio = bio_alloc(hib_resume_bdev_handle->bdev, 1, opf,
+ bio = bio_alloc(file_bdev(hib_resume_bdev_file), 1, opf,
GFP_NOIO | __GFP_HIGH);
bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
@@ -357,14 +357,14 @@ static int swsusp_swap_check(void)
return res;
root_swap = res;
- hib_resume_bdev_handle = bdev_open_by_dev(swsusp_resume_device,
+ hib_resume_bdev_file = bdev_file_open_by_dev(swsusp_resume_device,
BLK_OPEN_WRITE, NULL, NULL);
- if (IS_ERR(hib_resume_bdev_handle))
- return PTR_ERR(hib_resume_bdev_handle);
+ if (IS_ERR(hib_resume_bdev_file))
+ return PTR_ERR(hib_resume_bdev_file);
- res = set_blocksize(hib_resume_bdev_handle->bdev, PAGE_SIZE);
+ res = set_blocksize(file_bdev(hib_resume_bdev_file), PAGE_SIZE);
if (res < 0)
- bdev_release(hib_resume_bdev_handle);
+ fput(hib_resume_bdev_file);
return res;
}
@@ -1523,10 +1523,10 @@ int swsusp_check(bool exclusive)
void *holder = exclusive ? &swsusp_holder : NULL;
int error;
- hib_resume_bdev_handle = bdev_open_by_dev(swsusp_resume_device,
+ hib_resume_bdev_file = bdev_file_open_by_dev(swsusp_resume_device,
BLK_OPEN_READ, holder, NULL);
- if (!IS_ERR(hib_resume_bdev_handle)) {
- set_blocksize(hib_resume_bdev_handle->bdev, PAGE_SIZE);
+ if (!IS_ERR(hib_resume_bdev_file)) {
+ set_blocksize(file_bdev(hib_resume_bdev_file), PAGE_SIZE);
clear_page(swsusp_header);
error = hib_submit_io(REQ_OP_READ, swsusp_resume_block,
swsusp_header, NULL);
@@ -1551,11 +1551,11 @@ int swsusp_check(bool exclusive)
put:
if (error)
- bdev_release(hib_resume_bdev_handle);
+ fput(hib_resume_bdev_file);
else
pr_debug("Image signature found, resuming\n");
} else {
- error = PTR_ERR(hib_resume_bdev_handle);
+ error = PTR_ERR(hib_resume_bdev_file);
}
if (error)
@@ -1570,12 +1570,12 @@ put:
void swsusp_close(void)
{
- if (IS_ERR(hib_resume_bdev_handle)) {
+ if (IS_ERR(hib_resume_bdev_file)) {
pr_debug("Image device not initialised\n");
return;
}
- bdev_release(hib_resume_bdev_handle);
+ fput(hib_resume_bdev_file);
}
/**
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index b96077152f49..c8093bcc01fe 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -140,39 +140,6 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta
return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom);
}
-#ifdef CONFIG_64BIT
-
-#define __seq_to_nbcon_seq(seq) (seq)
-#define __nbcon_seq_to_seq(seq) (seq)
-
-#else /* CONFIG_64BIT */
-
-#define __seq_to_nbcon_seq(seq) ((u32)seq)
-
-static inline u64 __nbcon_seq_to_seq(u32 nbcon_seq)
-{
- u64 seq;
- u64 rb_next_seq;
-
- /*
- * The provided sequence is only the lower 32 bits of the ringbuffer
- * sequence. It needs to be expanded to 64bit. Get the next sequence
- * number from the ringbuffer and fold it.
- *
- * Having a 32bit representation in the console is sufficient.
- * If a console ever gets more than 2^31 records behind
- * the ringbuffer then this is the least of the problems.
- *
- * Also the access to the ring buffer is always safe.
- */
- rb_next_seq = prb_next_seq(prb);
- seq = rb_next_seq - ((u32)rb_next_seq - nbcon_seq);
-
- return seq;
-}
-
-#endif /* CONFIG_64BIT */
-
/**
* nbcon_seq_read - Read the current console sequence
* @con: Console to read the sequence of
@@ -183,7 +150,7 @@ u64 nbcon_seq_read(struct console *con)
{
unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq));
- return __nbcon_seq_to_seq(nbcon_seq);
+ return __ulseq_to_u64seq(prb, nbcon_seq);
}
/**
@@ -204,7 +171,7 @@ void nbcon_seq_force(struct console *con, u64 seq)
*/
u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb));
- atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __seq_to_nbcon_seq(valid_seq));
+ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq));
/* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */
con->seq = 0;
@@ -223,11 +190,11 @@ void nbcon_seq_force(struct console *con, u64 seq)
*/
static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
{
- unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq);
+ unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq);
struct console *con = ctxt->console;
if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq,
- __seq_to_nbcon_seq(new_seq))) {
+ __u64seq_to_ulseq(new_seq))) {
ctxt->seq = new_seq;
} else {
ctxt->seq = nbcon_seq_read(con);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f2444b581e16..b06f63e276c1 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -347,6 +347,29 @@ static bool panic_in_progress(void)
return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
}
+/* Return true if a panic is in progress on the current CPU. */
+bool this_cpu_in_panic(void)
+{
+ /*
+ * We can use raw_smp_processor_id() here because it is impossible for
+ * the task to be migrated to the panic_cpu, or away from it. If
+ * panic_cpu has already been set, and we're not currently executing on
+ * that CPU, then we never will be.
+ */
+ return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id());
+}
+
+/*
+ * Return true if a panic is in progress on a remote CPU.
+ *
+ * On true, the local CPU should immediately release any printing resources
+ * that may be needed by the panic CPU.
+ */
+bool other_cpu_in_panic(void)
+{
+ return (panic_in_progress() && !this_cpu_in_panic());
+}
+
/*
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
@@ -439,12 +462,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
static DEFINE_MUTEX(syslog_lock);
#ifdef CONFIG_PRINTK
-/*
- * During panic, heavy printk by other CPUs can delay the
- * panic and risk deadlock on console resources.
- */
-static int __read_mostly suppress_panic_printk;
-
DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */
@@ -598,17 +615,6 @@ static int check_syslog_permissions(int type, int source)
if (syslog_action_restricted(type)) {
if (capable(CAP_SYSLOG))
goto ok;
- /*
- * For historical reasons, accept CAP_SYS_ADMIN too, with
- * a warning.
- */
- if (capable(CAP_SYS_ADMIN)) {
- pr_warn_once("%s (%d): Attempt to access syslog with "
- "CAP_SYS_ADMIN but no CAP_SYSLOG "
- "(deprecated).\n",
- current->comm, task_pid_nr(current));
- goto ok;
- }
return -EPERM;
}
ok:
@@ -1846,10 +1852,23 @@ static bool console_waiter;
*/
static void console_lock_spinning_enable(void)
{
+ /*
+ * Do not use spinning in panic(). The panic CPU wants to keep the lock.
+ * Non-panic CPUs abandon the flush anyway.
+ *
+ * Just keep the lockdep annotation. The panic-CPU should avoid
+ * taking console_owner_lock because it might cause a deadlock.
+ * This looks like the easiest way how to prevent false lockdep
+ * reports without handling races a lockless way.
+ */
+ if (panic_in_progress())
+ goto lockdep;
+
raw_spin_lock(&console_owner_lock);
console_owner = current;
raw_spin_unlock(&console_owner_lock);
+lockdep:
/* The waiter may spin on us after setting console_owner */
spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
}
@@ -1874,6 +1893,22 @@ static int console_lock_spinning_disable_and_check(int cookie)
{
int waiter;
+ /*
+ * Ignore spinning waiters during panic() because they might get stopped
+ * or blocked at any time,
+ *
+ * It is safe because nobody is allowed to start spinning during panic
+ * in the first place. If there has been a waiter then non panic CPUs
+ * might stay spinning. They would get stopped anyway. The panic context
+ * will never start spinning and an interrupted spin on panic CPU will
+ * never continue.
+ */
+ if (panic_in_progress()) {
+ /* Keep lockdep happy. */
+ spin_release(&console_owner_dep_map, _THIS_IP_);
+ return 0;
+ }
+
raw_spin_lock(&console_owner_lock);
waiter = READ_ONCE(console_waiter);
console_owner = NULL;
@@ -2270,8 +2305,12 @@ asmlinkage int vprintk_emit(int facility, int level,
if (unlikely(suppress_printk))
return 0;
- if (unlikely(suppress_panic_printk) &&
- atomic_read(&panic_cpu) != raw_smp_processor_id())
+ /*
+ * The messages on the panic CPU are the most important. If
+ * non-panic CPUs are generating any messages, they will be
+ * silently dropped.
+ */
+ if (other_cpu_in_panic())
return 0;
if (level == LOGLEVEL_SCHED) {
@@ -2601,26 +2640,6 @@ static int console_cpu_notify(unsigned int cpu)
return 0;
}
-/*
- * Return true if a panic is in progress on a remote CPU.
- *
- * On true, the local CPU should immediately release any printing resources
- * that may be needed by the panic CPU.
- */
-bool other_cpu_in_panic(void)
-{
- if (!panic_in_progress())
- return false;
-
- /*
- * We can use raw_smp_processor_id() here because it is impossible for
- * the task to be migrated to the panic_cpu, or away from it. If
- * panic_cpu has already been set, and we're not currently executing on
- * that CPU, then we never will be.
- */
- return atomic_read(&panic_cpu) != raw_smp_processor_id();
-}
-
/**
* console_lock - block the console subsystem from printing
*
@@ -2776,8 +2795,6 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
bool is_extended, bool may_suppress)
{
- static int panic_console_dropped;
-
struct printk_buffers *pbufs = pmsg->pbufs;
const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
const size_t outbuf_sz = sizeof(pbufs->outbuf);
@@ -2805,17 +2822,6 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
pmsg->seq = r.info->seq;
pmsg->dropped = r.info->seq - seq;
- /*
- * Check for dropped messages in panic here so that printk
- * suppression can occur as early as possible if necessary.
- */
- if (pmsg->dropped &&
- panic_in_progress() &&
- panic_console_dropped++ > 10) {
- suppress_panic_printk = 1;
- pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
- }
-
/* Skip record that has level above the console loglevel. */
if (may_suppress && suppress_message_printing(r.info->level))
goto out;
@@ -3761,7 +3767,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
might_sleep();
- seq = prb_next_seq(prb);
+ seq = prb_next_reserve_seq(prb);
/* Flush the consoles so that records up to @seq are printed. */
console_lock();
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index fde338606ce8..88e8f3a61922 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -6,6 +6,7 @@
#include <linux/errno.h>
#include <linux/bug.h>
#include "printk_ringbuffer.h"
+#include "internal.h"
/**
* DOC: printk_ringbuffer overview
@@ -303,6 +304,9 @@
*
* desc_push_tail:B / desc_reserve:D
* set descriptor reusable (state), then push descriptor tail (id)
+ *
+ * desc_update_last_finalized:A / desc_last_finalized_seq:A
+ * store finalized record, then set new highest finalized sequence number
*/
#define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits)
@@ -1030,9 +1034,13 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size,
unsigned long next_lpos;
if (size == 0) {
- /* Specify a data-less block. */
- blk_lpos->begin = NO_LPOS;
- blk_lpos->next = NO_LPOS;
+ /*
+ * Data blocks are not created for empty lines. Instead, the
+ * reader will recognize these special lpos values and handle
+ * it appropriately.
+ */
+ blk_lpos->begin = EMPTY_LINE_LPOS;
+ blk_lpos->next = EMPTY_LINE_LPOS;
return NULL;
}
@@ -1210,10 +1218,18 @@ static const char *get_data(struct prb_data_ring *data_ring,
/* Data-less data block description. */
if (BLK_DATALESS(blk_lpos)) {
- if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) {
+ /*
+ * Records that are just empty lines are also valid, even
+ * though they do not have a data block. For such records
+ * explicitly return empty string data to signify success.
+ */
+ if (blk_lpos->begin == EMPTY_LINE_LPOS &&
+ blk_lpos->next == EMPTY_LINE_LPOS) {
*data_size = 0;
return "";
}
+
+ /* Data lost, invalid, or otherwise unavailable. */
return NULL;
}
@@ -1442,19 +1458,117 @@ fail_reopen:
}
/*
+ * @last_finalized_seq value guarantees that all records up to and including
+ * this sequence number are finalized and can be read. The only exception are
+ * too old records which have already been overwritten.
+ *
+ * It is also guaranteed that @last_finalized_seq only increases.
+ *
+ * Be aware that finalized records following non-finalized records are not
+ * reported because they are not yet available to the reader. For example,
+ * a new record stored via printk() will not be available to a printer if
+ * it follows a record that has not been finalized yet. However, once that
+ * non-finalized record becomes finalized, @last_finalized_seq will be
+ * appropriately updated and the full set of finalized records will be
+ * available to the printer. And since each printk() caller will either
+ * directly print or trigger deferred printing of all available unprinted
+ * records, all printk() messages will get printed.
+ */
+static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb)
+{
+ struct prb_desc_ring *desc_ring = &rb->desc_ring;
+ unsigned long ulseq;
+
+ /*
+ * Guarantee the sequence number is loaded before loading the
+ * associated record in order to guarantee that the record can be
+ * seen by this CPU. This pairs with desc_update_last_finalized:A.
+ */
+ ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq
+ ); /* LMM(desc_last_finalized_seq:A) */
+
+ return __ulseq_to_u64seq(rb, ulseq);
+}
+
+static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
+ struct printk_record *r, unsigned int *line_count);
+
+/*
+ * Check if there are records directly following @last_finalized_seq that are
+ * finalized. If so, update @last_finalized_seq to the latest of these
+ * records. It is not allowed to skip over records that are not yet finalized.
+ */
+static void desc_update_last_finalized(struct printk_ringbuffer *rb)
+{
+ struct prb_desc_ring *desc_ring = &rb->desc_ring;
+ u64 old_seq = desc_last_finalized_seq(rb);
+ unsigned long oldval;
+ unsigned long newval;
+ u64 finalized_seq;
+ u64 try_seq;
+
+try_again:
+ finalized_seq = old_seq;
+ try_seq = finalized_seq + 1;
+
+ /* Try to find later finalized records. */
+ while (_prb_read_valid(rb, &try_seq, NULL, NULL)) {
+ finalized_seq = try_seq;
+ try_seq++;
+ }
+
+ /* No update needed if no later finalized record was found. */
+ if (finalized_seq == old_seq)
+ return;
+
+ oldval = __u64seq_to_ulseq(old_seq);
+ newval = __u64seq_to_ulseq(finalized_seq);
+
+ /*
+ * Set the sequence number of a later finalized record that has been
+ * seen.
+ *
+ * Guarantee the record data is visible to other CPUs before storing
+ * its sequence number. This pairs with desc_last_finalized_seq:A.
+ *
+ * Memory barrier involvement:
+ *
+ * If desc_last_finalized_seq:A reads from
+ * desc_update_last_finalized:A, then desc_read:A reads from
+ * _prb_commit:B.
+ *
+ * Relies on:
+ *
+ * RELEASE from _prb_commit:B to desc_update_last_finalized:A
+ * matching
+ * ACQUIRE from desc_last_finalized_seq:A to desc_read:A
+ *
+ * Note: _prb_commit:B and desc_update_last_finalized:A can be
+ * different CPUs. However, the desc_update_last_finalized:A
+ * CPU (which performs the release) must have previously seen
+ * _prb_commit:B.
+ */
+ if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq,
+ &oldval, newval)) { /* LMM(desc_update_last_finalized:A) */
+ old_seq = __ulseq_to_u64seq(rb, oldval);
+ goto try_again;
+ }
+}
+
+/*
* Attempt to finalize a specified descriptor. If this fails, the descriptor
* is either already final or it will finalize itself when the writer commits.
*/
-static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id)
+static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id)
{
+ struct prb_desc_ring *desc_ring = &rb->desc_ring;
unsigned long prev_state_val = DESC_SV(id, desc_committed);
struct prb_desc *d = to_desc(desc_ring, id);
- atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val,
- DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */
-
- /* Best effort to remember the last finalized @id. */
- atomic_long_set(&desc_ring->last_finalized_id, id);
+ if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val,
+ DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */
+ desc_update_last_finalized(rb);
+ }
}
/**
@@ -1550,7 +1664,7 @@ bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
* readers. (For seq==0 there is no previous descriptor.)
*/
if (info->seq > 0)
- desc_make_final(desc_ring, DESC_ID(id - 1));
+ desc_make_final(rb, DESC_ID(id - 1));
r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id);
/* If text data allocation fails, a data-less record is committed. */
@@ -1643,7 +1757,7 @@ void prb_commit(struct prb_reserved_entry *e)
*/
head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */
if (head_id != e->id)
- desc_make_final(desc_ring, e->id);
+ desc_make_final(e->rb, e->id);
}
/**
@@ -1663,12 +1777,9 @@ void prb_commit(struct prb_reserved_entry *e)
*/
void prb_final_commit(struct prb_reserved_entry *e)
{
- struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
-
_prb_commit(e, desc_finalized);
- /* Best effort to remember the last finalized @id. */
- atomic_long_set(&desc_ring->last_finalized_id, e->id);
+ desc_update_last_finalized(e->rb);
}
/*
@@ -1832,7 +1943,7 @@ static int prb_read(struct printk_ringbuffer *rb, u64 seq,
}
/* Get the sequence number of the tail descriptor. */
-static u64 prb_first_seq(struct printk_ringbuffer *rb)
+u64 prb_first_seq(struct printk_ringbuffer *rb)
{
struct prb_desc_ring *desc_ring = &rb->desc_ring;
enum desc_state d_state;
@@ -1875,12 +1986,123 @@ static u64 prb_first_seq(struct printk_ringbuffer *rb)
return seq;
}
+/**
+ * prb_next_reserve_seq() - Get the sequence number after the most recently
+ * reserved record.
+ *
+ * @rb: The ringbuffer to get the sequence number from.
+ *
+ * This is the public function available to readers to see what sequence
+ * number will be assigned to the next reserved record.
+ *
+ * Note that depending on the situation, this value can be equal to or
+ * higher than the sequence number returned by prb_next_seq().
+ *
+ * Context: Any context.
+ * Return: The sequence number that will be assigned to the next record
+ * reserved.
+ */
+u64 prb_next_reserve_seq(struct printk_ringbuffer *rb)
+{
+ struct prb_desc_ring *desc_ring = &rb->desc_ring;
+ unsigned long last_finalized_id;
+ atomic_long_t *state_var;
+ u64 last_finalized_seq;
+ unsigned long head_id;
+ struct prb_desc desc;
+ unsigned long diff;
+ struct prb_desc *d;
+ int err;
+
+ /*
+ * It may not be possible to read a sequence number for @head_id.
+ * So the ID of @last_finailzed_seq is used to calculate what the
+ * sequence number of @head_id will be.
+ */
+
+try_again:
+ last_finalized_seq = desc_last_finalized_seq(rb);
+
+ /*
+ * @head_id is loaded after @last_finalized_seq to ensure that
+ * it points to the record with @last_finalized_seq or newer.
+ *
+ * Memory barrier involvement:
+ *
+ * If desc_last_finalized_seq:A reads from
+ * desc_update_last_finalized:A, then
+ * prb_next_reserve_seq:A reads from desc_reserve:D.
+ *
+ * Relies on:
+ *
+ * RELEASE from desc_reserve:D to desc_update_last_finalized:A
+ * matching
+ * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A
+ *
+ * Note: desc_reserve:D and desc_update_last_finalized:A can be
+ * different CPUs. However, the desc_update_last_finalized:A CPU
+ * (which performs the release) must have previously seen
+ * desc_read:C, which implies desc_reserve:D can be seen.
+ */
+ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */
+
+ d = to_desc(desc_ring, last_finalized_seq);
+ state_var = &d->state_var;
+
+ /* Extract the ID, used to specify the descriptor to read. */
+ last_finalized_id = DESC_ID(atomic_long_read(state_var));
+
+ /* Ensure @last_finalized_id is correct. */
+ err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc);
+
+ if (err == -EINVAL) {
+ if (last_finalized_seq == 0) {
+ /*
+ * No record has been finalized or even reserved yet.
+ *
+ * The @head_id is initialized such that the first
+ * increment will yield the first record (seq=0).
+ * Handle it separately to avoid a negative @diff
+ * below.
+ */
+ if (head_id == DESC0_ID(desc_ring->count_bits))
+ return 0;
+
+ /*
+ * One or more descriptors are already reserved. Use
+ * the descriptor ID of the first one (@seq=0) for
+ * the @diff below.
+ */
+ last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1;
+ } else {
+ /* Record must have been overwritten. Try again. */
+ goto try_again;
+ }
+ }
+
+ /* Diff of known descriptor IDs to compute related sequence numbers. */
+ diff = head_id - last_finalized_id;
+
+ /*
+ * @head_id points to the most recently reserved record, but this
+ * function returns the sequence number that will be assigned to the
+ * next (not yet reserved) record. Thus +1 is needed.
+ */
+ return (last_finalized_seq + diff + 1);
+}
+
/*
- * Non-blocking read of a record. Updates @seq to the last finalized record
- * (which may have no data available).
+ * Non-blocking read of a record.
+ *
+ * On success @seq is updated to the record that was read and (if provided)
+ * @r and @line_count will contain the read/calculated data.
+ *
+ * On failure @seq is updated to a record that is not yet available to the
+ * reader, but it will be the next record available to the reader.
*
- * See the description of prb_read_valid() and prb_read_valid_info()
- * for details.
+ * Note: When the current CPU is in panic, this function will skip over any
+ * non-existent/non-finalized records in order to allow the panic CPU
+ * to print any and all records that have been finalized.
*/
static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
struct printk_record *r, unsigned int *line_count)
@@ -1899,12 +2121,32 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
*seq = tail_seq;
} else if (err == -ENOENT) {
- /* Record exists, but no data available. Skip. */
+ /* Record exists, but the data was lost. Skip. */
(*seq)++;
} else {
- /* Non-existent/non-finalized record. Must stop. */
- return false;
+ /*
+ * Non-existent/non-finalized record. Must stop.
+ *
+ * For panic situations it cannot be expected that
+ * non-finalized records will become finalized. But
+ * there may be other finalized records beyond that
+ * need to be printed for a panic situation. If this
+ * is the panic CPU, skip this
+ * non-existent/non-finalized record unless it is
+ * at or beyond the head, in which case it is not
+ * possible to continue.
+ *
+ * Note that new messages printed on panic CPU are
+ * finalized when we are here. The only exception
+ * might be the last message without trailing newline.
+ * But it would have the sequence number returned
+ * by "prb_next_reserve_seq() - 1".
+ */
+ if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb)))
+ (*seq)++;
+ else
+ return false;
}
}
@@ -1932,7 +2174,7 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
* On success, the reader must check r->info.seq to see which record was
* actually read. This allows the reader to detect dropped records.
*
- * Failure means @seq refers to a not yet written record.
+ * Failure means @seq refers to a record not yet available to the reader.
*/
bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
struct printk_record *r)
@@ -1962,7 +2204,7 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
* On success, the reader must check info->seq to see which record meta data
* was actually read. This allows the reader to detect dropped records.
*
- * Failure means @seq refers to a not yet written record.
+ * Failure means @seq refers to a record not yet available to the reader.
*/
bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
struct printk_info *info, unsigned int *line_count)
@@ -2008,7 +2250,9 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
* newest sequence number available to readers will be.
*
* This provides readers a sequence number to jump to if all currently
- * available records should be skipped.
+ * available records should be skipped. It is guaranteed that all records
+ * previous to the returned value have been finalized and are (or were)
+ * available to the reader.
*
* Context: Any context.
* Return: The sequence number of the next newest (not yet available) record
@@ -2016,34 +2260,19 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
*/
u64 prb_next_seq(struct printk_ringbuffer *rb)
{
- struct prb_desc_ring *desc_ring = &rb->desc_ring;
- enum desc_state d_state;
- unsigned long id;
u64 seq;
- /* Check if the cached @id still points to a valid @seq. */
- id = atomic_long_read(&desc_ring->last_finalized_id);
- d_state = desc_read(desc_ring, id, NULL, &seq, NULL);
+ seq = desc_last_finalized_seq(rb);
- if (d_state == desc_finalized || d_state == desc_reusable) {
- /*
- * Begin searching after the last finalized record.
- *
- * On 0, the search must begin at 0 because of hack#2
- * of the bootstrapping phase it is not known if a
- * record at index 0 exists.
- */
- if (seq != 0)
- seq++;
- } else {
- /*
- * The information about the last finalized sequence number
- * has gone. It should happen only when there is a flood of
- * new messages and the ringbuffer is rapidly recycled.
- * Give up and start from the beginning.
- */
- seq = 0;
- }
+ /*
+ * Begin searching after the last finalized record.
+ *
+ * On 0, the search must begin at 0 because of hack#2
+ * of the bootstrapping phase it is not known if a
+ * record at index 0 exists.
+ */
+ if (seq != 0)
+ seq++;
/*
* The information about the last finalized @seq might be inaccurate.
@@ -2085,7 +2314,7 @@ void prb_init(struct printk_ringbuffer *rb,
rb->desc_ring.infos = infos;
atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));
atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits));
- atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits));
+ atomic_long_set(&rb->desc_ring.last_finalized_seq, 0);
rb->text_data_ring.size_bits = textbits;
rb->text_data_ring.data = text_buf;
diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h
index 18cd25e489b8..52626d0f1fa3 100644
--- a/kernel/printk/printk_ringbuffer.h
+++ b/kernel/printk/printk_ringbuffer.h
@@ -75,7 +75,7 @@ struct prb_desc_ring {
struct printk_info *infos;
atomic_long_t head_id;
atomic_long_t tail_id;
- atomic_long_t last_finalized_id;
+ atomic_long_t last_finalized_seq;
};
/*
@@ -127,8 +127,22 @@ enum desc_state {
#define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id)
#define DESC_ID_MASK (~DESC_FLAGS_MASK)
#define DESC_ID(sv) ((sv) & DESC_ID_MASK)
+
+/*
+ * Special data block logical position values (for fields of
+ * @prb_desc.text_blk_lpos).
+ *
+ * - Bit0 is used to identify if the record has no data block. (Implemented in
+ * the LPOS_DATALESS() macro.)
+ *
+ * - Bit1 specifies the reason for not having a data block.
+ *
+ * These special values could never be real lpos values because of the
+ * meta data and alignment padding of data blocks. (See to_blk_size() for
+ * details.)
+ */
#define FAILED_LPOS 0x1
-#define NO_LPOS 0x3
+#define EMPTY_LINE_LPOS 0x3
#define FAILED_BLK_LPOS \
{ \
@@ -259,7 +273,7 @@ static struct printk_ringbuffer name = { \
.infos = &_##name##_infos[0], \
.head_id = ATOMIC_INIT(DESC0_ID(descbits)), \
.tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \
- .last_finalized_id = ATOMIC_INIT(DESC0_ID(descbits)), \
+ .last_finalized_seq = ATOMIC_INIT(0), \
}, \
.text_data_ring = { \
.size_bits = (avgtextbits) + (descbits), \
@@ -378,7 +392,41 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
struct printk_info *info, unsigned int *line_count);
+u64 prb_first_seq(struct printk_ringbuffer *rb);
u64 prb_first_valid_seq(struct printk_ringbuffer *rb);
u64 prb_next_seq(struct printk_ringbuffer *rb);
+u64 prb_next_reserve_seq(struct printk_ringbuffer *rb);
+
+#ifdef CONFIG_64BIT
+
+#define __u64seq_to_ulseq(u64seq) (u64seq)
+#define __ulseq_to_u64seq(rb, ulseq) (ulseq)
+
+#else /* CONFIG_64BIT */
+
+#define __u64seq_to_ulseq(u64seq) ((u32)u64seq)
+
+static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq)
+{
+ u64 rb_first_seq = prb_first_seq(rb);
+ u64 seq;
+
+ /*
+ * The provided sequence is only the lower 32 bits of the ringbuffer
+ * sequence. It needs to be expanded to 64bit. Get the first sequence
+ * number from the ringbuffer and fold it.
+ *
+ * Having a 32bit representation in the console is sufficient.
+ * If a console ever gets more than 2^31 records behind
+ * the ringbuffer then this is the least of the problems.
+ *
+ * Also the access to the ring buffer is always safe.
+ */
+ seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq);
+
+ return seq;
+}
+
+#endif /* CONFIG_64BIT */
#endif /* _KERNEL_PRINTK_RINGBUFFER_H */
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index bdd7eadb33d8..e7d2dd267593 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -314,6 +314,19 @@ config RCU_LAZY
To save power, batch RCU callbacks and flush after delay, memory
pressure, or callback list growing too big.
+ Requires rcu_nocbs=all to be set.
+
+ Use rcutree.enable_rcu_lazy=0 to turn it off at boot time.
+
+config RCU_LAZY_DEFAULT_OFF
+ bool "Turn RCU lazy invocation off by default"
+ depends on RCU_LAZY
+ default n
+ help
+ Allows building the kernel with CONFIG_RCU_LAZY=y yet keep it default
+ off. Boot time param rcutree.enable_rcu_lazy=1 can be used to switch
+ it back on.
+
config RCU_DOUBLE_CHECK_CB_TIME
bool "RCU callback-batch backup time check"
depends on RCU_EXPERT
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index f94f65877f2b..86fce206560e 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -528,6 +528,12 @@ struct task_struct *get_rcu_tasks_gp_kthread(void);
struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
#endif // # ifdef CONFIG_TASKS_RUDE_RCU
+#ifdef CONFIG_TASKS_RCU_GENERIC
+void tasks_cblist_init_generic(void);
+#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
+static inline void tasks_cblist_init_generic(void) { }
+#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
+
#define RCU_SCHEDULER_INACTIVE 0
#define RCU_SCHEDULER_INIT 1
#define RCU_SCHEDULER_RUNNING 2
@@ -543,11 +549,11 @@ enum rcutorture_type {
};
#if defined(CONFIG_RCU_LAZY)
-unsigned long rcu_lazy_get_jiffies_till_flush(void);
-void rcu_lazy_set_jiffies_till_flush(unsigned long j);
+unsigned long rcu_get_jiffies_lazy_flush(void);
+void rcu_set_jiffies_lazy_flush(unsigned long j);
#else
-static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; }
-static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { }
+static inline unsigned long rcu_get_jiffies_lazy_flush(void) { return 0; }
+static inline void rcu_set_jiffies_lazy_flush(unsigned long j) { }
#endif
#if defined(CONFIG_TREE_RCU)
@@ -623,12 +629,7 @@ int rcu_get_gp_kthreads_prio(void);
void rcu_fwd_progress_check(unsigned long j);
void rcu_force_quiescent_state(void);
extern struct workqueue_struct *rcu_gp_wq;
-#ifdef CONFIG_RCU_EXP_KTHREAD
extern struct kthread_worker *rcu_exp_gp_kworker;
-extern struct kthread_worker *rcu_exp_par_gp_kworker;
-#else /* !CONFIG_RCU_EXP_KTHREAD */
-extern struct workqueue_struct *rcu_par_gp_wq;
-#endif /* CONFIG_RCU_EXP_KTHREAD */
void rcu_gp_slow_register(atomic_t *rgssp);
void rcu_gp_slow_unregister(atomic_t *rgssp);
#endif /* #else #ifdef CONFIG_TINY_RCU */
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index ffdb30495e3c..8db4fedaaa1e 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -764,9 +764,9 @@ kfree_scale_init(void)
if (kfree_by_call_rcu) {
/* do a test to check the timeout. */
- orig_jif = rcu_lazy_get_jiffies_till_flush();
+ orig_jif = rcu_get_jiffies_lazy_flush();
- rcu_lazy_set_jiffies_till_flush(2 * HZ);
+ rcu_set_jiffies_lazy_flush(2 * HZ);
rcu_barrier();
jif_start = jiffies;
@@ -775,7 +775,7 @@ kfree_scale_init(void)
smp_cond_load_relaxed(&rcu_lazy_test1_cb_called, VAL == 1);
- rcu_lazy_set_jiffies_till_flush(orig_jif);
+ rcu_set_jiffies_lazy_flush(orig_jif);
if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) {
pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n");
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 7567ca8e743c..45d6b4c3d199 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1368,9 +1368,13 @@ rcu_torture_writer(void *arg)
struct rcu_torture *rp;
struct rcu_torture *old_rp;
static DEFINE_TORTURE_RANDOM(rand);
+ unsigned long stallsdone = jiffies;
bool stutter_waited;
unsigned long ulo[NUM_ACTIVE_RCU_POLL_OLDSTATE];
+ // If a new stall test is added, this must be adjusted.
+ if (stall_cpu_holdoff + stall_gp_kthread + stall_cpu)
+ stallsdone += (stall_cpu_holdoff + stall_gp_kthread + stall_cpu + 60) * HZ;
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
if (!can_expedite)
pr_alert("%s" TORTURE_FLAG
@@ -1576,11 +1580,11 @@ rcu_torture_writer(void *arg)
!atomic_read(&rcu_fwd_cb_nodelay) &&
!cur_ops->slow_gps &&
!torture_must_stop() &&
- boot_ended)
+ boot_ended &&
+ time_after(jiffies, stallsdone))
for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
if (list_empty(&rcu_tortures[i].rtort_free) &&
- rcu_access_pointer(rcu_torture_current) !=
- &rcu_tortures[i]) {
+ rcu_access_pointer(rcu_torture_current) != &rcu_tortures[i]) {
tracing_off();
show_rcu_gp_kthreads();
WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
@@ -2441,7 +2445,8 @@ static struct notifier_block rcu_torture_stall_block = {
/*
* CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then
- * induces a CPU stall for the time specified by stall_cpu.
+ * induces a CPU stall for the time specified by stall_cpu. If a new
+ * stall test is added, stallsdone in rcu_torture_writer() must be adjusted.
*/
static int rcu_torture_stall(void *args)
{
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 0351a4e83529..e4d673fc30f4 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -1234,11 +1234,20 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
if (rhp)
rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
/*
- * The snapshot for acceleration must be taken _before_ the read of the
- * current gp sequence used for advancing, otherwise advancing may fail
- * and acceleration may then fail too.
+ * It's crucial to capture the snapshot 's' for acceleration before
+ * reading the current gp_seq that is used for advancing. This is
+ * essential because if the acceleration snapshot is taken after a
+ * failed advancement attempt, there's a risk that a grace period may
+ * conclude and a new one may start in the interim. If the snapshot is
+ * captured after this sequence of events, the acceleration snapshot 's'
+ * could be excessively advanced, leading to acceleration failure.
+ * In such a scenario, an 'acceleration leak' can occur, where new
+ * callbacks become indefinitely stuck in the RCU_NEXT_TAIL segment.
+ * Also note that encountering advancing failures is a normal
+ * occurrence when the grace period for RCU_WAIT_TAIL is in progress.
*
- * This could happen if:
+ * To see this, consider the following events which occur if
+ * rcu_seq_snap() were to be called after advance:
*
* 1) The RCU_WAIT_TAIL segment has callbacks (gp_num = X + 4) and the
* RCU_NEXT_READY_TAIL also has callbacks (gp_num = X + 8).
@@ -1264,6 +1273,13 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
if (rhp) {
rcu_segcblist_advance(&sdp->srcu_cblist,
rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
+ /*
+ * Acceleration can never fail because the base current gp_seq
+ * used for acceleration is <= the value of gp_seq used for
+ * advancing. This means that RCU_NEXT_TAIL segment will
+ * always be able to be emptied by the acceleration into the
+ * RCU_NEXT_READY_TAIL or RCU_WAIT_TAIL segments.
+ */
WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s));
}
if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index e550f97779b8..86df878a2fee 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -24,22 +24,6 @@ void rcu_sync_init(struct rcu_sync *rsp)
init_waitqueue_head(&rsp->gp_wait);
}
-/**
- * rcu_sync_enter_start - Force readers onto slow path for multiple updates
- * @rsp: Pointer to rcu_sync structure to use for synchronization
- *
- * Must be called after rcu_sync_init() and before first use.
- *
- * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}()
- * pairs turn into NO-OPs.
- */
-void rcu_sync_enter_start(struct rcu_sync *rsp)
-{
- rsp->gp_count++;
- rsp->gp_state = GP_PASSED;
-}
-
-
static void rcu_sync_func(struct rcu_head *rhp);
static void rcu_sync_call(struct rcu_sync *rsp)
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 732ad5b39946..147b5945d67a 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -32,6 +32,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
* @rtp_irq_work: IRQ work queue for deferred wakeups.
* @barrier_q_head: RCU callback for barrier operation.
* @rtp_blkd_tasks: List of tasks blocked as readers.
+ * @rtp_exit_list: List of tasks in the latter portion of do_exit().
* @cpu: CPU number corresponding to this entry.
* @rtpp: Pointer to the rcu_tasks structure.
*/
@@ -46,6 +47,7 @@ struct rcu_tasks_percpu {
struct irq_work rtp_irq_work;
struct rcu_head barrier_q_head;
struct list_head rtp_blkd_tasks;
+ struct list_head rtp_exit_list;
int cpu;
struct rcu_tasks *rtpp;
};
@@ -144,8 +146,6 @@ static struct rcu_tasks rt_name = \
}
#ifdef CONFIG_TASKS_RCU
-/* Track exiting tasks in order to allow them to be waited for. */
-DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
/* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
@@ -240,7 +240,6 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
static void cblist_init_generic(struct rcu_tasks *rtp)
{
int cpu;
- unsigned long flags;
int lim;
int shift;
@@ -266,15 +265,15 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
WARN_ON_ONCE(!rtpcp);
if (cpu)
raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock));
- local_irq_save(flags); // serialize initialization
if (rcu_segcblist_empty(&rtpcp->cblist))
rcu_segcblist_init(&rtpcp->cblist);
- local_irq_restore(flags);
INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq);
rtpcp->cpu = cpu;
rtpcp->rtpp = rtp;
if (!rtpcp->rtp_blkd_tasks.next)
INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks);
+ if (!rtpcp->rtp_exit_list.next)
+ INIT_LIST_HEAD(&rtpcp->rtp_exit_list);
}
pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n", rtp->name,
@@ -851,10 +850,12 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
// number of voluntary context switches, and add that task to the
// holdout list.
// rcu_tasks_postscan():
-// Invoke synchronize_srcu() to ensure that all tasks that were
-// in the process of exiting (and which thus might not know to
-// synchronize with this RCU Tasks grace period) have completed
-// exiting.
+// Gather per-CPU lists of tasks in do_exit() to ensure that all
+// tasks that were in the process of exiting (and which thus might
+// not know to synchronize with this RCU Tasks grace period) have
+// completed exiting. The synchronize_rcu() in rcu_tasks_postgp()
+// will take care of any tasks stuck in the non-preemptible region
+// of do_exit() following its call to exit_tasks_rcu_stop().
// check_all_holdout_tasks(), repeatedly until holdout list is empty:
// Scans the holdout list, attempting to identify a quiescent state
// for each task on the list. If there is a quiescent state, the
@@ -867,8 +868,10 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
// with interrupts disabled.
//
// For each exiting task, the exit_tasks_rcu_start() and
-// exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU
-// read-side critical sections waited for by rcu_tasks_postscan().
+// exit_tasks_rcu_finish() functions add and remove, respectively, the
+// current task to a per-CPU list of tasks that rcu_tasks_postscan() must
+// wait on. This is necessary because rcu_tasks_postscan() must wait on
+// tasks that have already been removed from the global list of tasks.
//
// Pre-grace-period update-side code is ordered before the grace
// via the raw_spin_lock.*rcu_node(). Pre-grace-period read-side code
@@ -932,9 +935,13 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
}
}
+void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
+DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
+
/* Processing between scanning taskslist and draining the holdout list. */
static void rcu_tasks_postscan(struct list_head *hop)
{
+ int cpu;
int rtsi = READ_ONCE(rcu_task_stall_info);
if (!IS_ENABLED(CONFIG_TINY_RCU)) {
@@ -948,9 +955,9 @@ static void rcu_tasks_postscan(struct list_head *hop)
* this, divide the fragile exit path part in two intersecting
* read side critical sections:
*
- * 1) An _SRCU_ read side starting before calling exit_notify(),
- * which may remove the task from the tasklist, and ending after
- * the final preempt_disable() call in do_exit().
+ * 1) A task_struct list addition before calling exit_notify(),
+ * which may remove the task from the tasklist, with the
+ * removal after the final preempt_disable() call in do_exit().
*
* 2) An _RCU_ read side starting with the final preempt_disable()
* call in do_exit() and ending with the final call to schedule()
@@ -959,7 +966,37 @@ static void rcu_tasks_postscan(struct list_head *hop)
* This handles the part 1). And postgp will handle part 2) with a
* call to synchronize_rcu().
*/
- synchronize_srcu(&tasks_rcu_exit_srcu);
+
+ for_each_possible_cpu(cpu) {
+ unsigned long j = jiffies + 1;
+ struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu);
+ struct task_struct *t;
+ struct task_struct *t1;
+ struct list_head tmp;
+
+ raw_spin_lock_irq_rcu_node(rtpcp);
+ list_for_each_entry_safe(t, t1, &rtpcp->rtp_exit_list, rcu_tasks_exit_list) {
+ if (list_empty(&t->rcu_tasks_holdout_list))
+ rcu_tasks_pertask(t, hop);
+
+ // RT kernels need frequent pauses, otherwise
+ // pause at least once per pair of jiffies.
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && time_before(jiffies, j))
+ continue;
+
+ // Keep our place in the list while pausing.
+ // Nothing else traverses this list, so adding a
+ // bare list_head is OK.
+ list_add(&tmp, &t->rcu_tasks_exit_list);
+ raw_spin_unlock_irq_rcu_node(rtpcp);
+ cond_resched(); // For CONFIG_PREEMPT=n kernels
+ raw_spin_lock_irq_rcu_node(rtpcp);
+ t1 = list_entry(tmp.next, struct task_struct, rcu_tasks_exit_list);
+ list_del(&tmp);
+ j = jiffies + 1;
+ }
+ raw_spin_unlock_irq_rcu_node(rtpcp);
+ }
if (!IS_ENABLED(CONFIG_TINY_RCU))
del_timer_sync(&tasks_rcu_exit_srcu_stall_timer);
@@ -1027,7 +1064,6 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp)
*
* In addition, this synchronize_rcu() waits for exiting tasks
* to complete their final preempt_disable() region of execution,
- * cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu),
* enforcing the whole region before tasklist removal until
* the final schedule() with TASK_DEAD state to be an RCU TASKS
* read side critical section.
@@ -1035,9 +1071,6 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp)
synchronize_rcu();
}
-void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
-DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
-
static void tasks_rcu_exit_srcu_stall(struct timer_list *unused)
{
#ifndef CONFIG_TINY_RCU
@@ -1118,7 +1151,6 @@ module_param(rcu_tasks_lazy_ms, int, 0444);
static int __init rcu_spawn_tasks_kthread(void)
{
- cblist_init_generic(&rcu_tasks);
rcu_tasks.gp_sleep = HZ / 10;
rcu_tasks.init_fract = HZ / 10;
if (rcu_tasks_lazy_ms >= 0)
@@ -1147,25 +1179,48 @@ struct task_struct *get_rcu_tasks_gp_kthread(void)
EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread);
/*
- * Contribute to protect against tasklist scan blind spot while the
- * task is exiting and may be removed from the tasklist. See
- * corresponding synchronize_srcu() for further details.
+ * Protect against tasklist scan blind spot while the task is exiting and
+ * may be removed from the tasklist. Do this by adding the task to yet
+ * another list.
+ *
+ * Note that the task will remove itself from this list, so there is no
+ * need for get_task_struct(), except in the case where rcu_tasks_pertask()
+ * adds it to the holdout list, in which case rcu_tasks_pertask() supplies
+ * the needed get_task_struct().
*/
-void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
+void exit_tasks_rcu_start(void)
{
- current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
+ unsigned long flags;
+ struct rcu_tasks_percpu *rtpcp;
+ struct task_struct *t = current;
+
+ WARN_ON_ONCE(!list_empty(&t->rcu_tasks_exit_list));
+ preempt_disable();
+ rtpcp = this_cpu_ptr(rcu_tasks.rtpcpu);
+ t->rcu_tasks_exit_cpu = smp_processor_id();
+ raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
+ if (!rtpcp->rtp_exit_list.next)
+ INIT_LIST_HEAD(&rtpcp->rtp_exit_list);
+ list_add(&t->rcu_tasks_exit_list, &rtpcp->rtp_exit_list);
+ raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
+ preempt_enable();
}
/*
- * Contribute to protect against tasklist scan blind spot while the
- * task is exiting and may be removed from the tasklist. See
- * corresponding synchronize_srcu() for further details.
+ * Remove the task from the "yet another list" because do_exit() is now
+ * non-preemptible, allowing synchronize_rcu() to wait beyond this point.
*/
-void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu)
+void exit_tasks_rcu_stop(void)
{
+ unsigned long flags;
+ struct rcu_tasks_percpu *rtpcp;
struct task_struct *t = current;
- __srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx);
+ WARN_ON_ONCE(list_empty(&t->rcu_tasks_exit_list));
+ rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, t->rcu_tasks_exit_cpu);
+ raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
+ list_del_init(&t->rcu_tasks_exit_list);
+ raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
}
/*
@@ -1282,7 +1337,6 @@ module_param(rcu_tasks_rude_lazy_ms, int, 0444);
static int __init rcu_spawn_tasks_rude_kthread(void)
{
- cblist_init_generic(&rcu_tasks_rude);
rcu_tasks_rude.gp_sleep = HZ / 10;
if (rcu_tasks_rude_lazy_ms >= 0)
rcu_tasks_rude.lazy_jiffies = msecs_to_jiffies(rcu_tasks_rude_lazy_ms);
@@ -1914,7 +1968,6 @@ module_param(rcu_tasks_trace_lazy_ms, int, 0444);
static int __init rcu_spawn_tasks_trace_kthread(void)
{
- cblist_init_generic(&rcu_tasks_trace);
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) {
rcu_tasks_trace.gp_sleep = HZ / 10;
rcu_tasks_trace.init_fract = HZ / 10;
@@ -2086,6 +2139,24 @@ late_initcall(rcu_tasks_verify_schedule_work);
static void rcu_tasks_initiate_self_tests(void) { }
#endif /* #else #ifdef CONFIG_PROVE_RCU */
+void __init tasks_cblist_init_generic(void)
+{
+ lockdep_assert_irqs_disabled();
+ WARN_ON(num_online_cpus() > 1);
+
+#ifdef CONFIG_TASKS_RCU
+ cblist_init_generic(&rcu_tasks);
+#endif
+
+#ifdef CONFIG_TASKS_RUDE_RCU
+ cblist_init_generic(&rcu_tasks_rude);
+#endif
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+ cblist_init_generic(&rcu_tasks_trace);
+#endif
+}
+
void __init rcu_init_tasks_generic(void)
{
#ifdef CONFIG_TASKS_RCU
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index fec804b79080..705c0d16850a 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -261,4 +261,5 @@ void __init rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
rcu_early_boot_tests();
+ tasks_cblist_init_generic();
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1ae851777806..d9642dd06c25 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -145,7 +145,7 @@ static int rcu_scheduler_fully_active __read_mostly;
static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
unsigned long gps, unsigned long flags);
-static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
+static struct task_struct *rcu_boost_task(struct rcu_node *rnp);
static void invoke_rcu_core(void);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void sync_sched_exp_online_cleanup(int cpu);
@@ -1013,6 +1013,38 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
return needmore;
}
+static void swake_up_one_online_ipi(void *arg)
+{
+ struct swait_queue_head *wqh = arg;
+
+ swake_up_one(wqh);
+}
+
+static void swake_up_one_online(struct swait_queue_head *wqh)
+{
+ int cpu = get_cpu();
+
+ /*
+ * If called from rcutree_report_cpu_starting(), wake up
+ * is dangerous that late in the CPU-down hotplug process. The
+ * scheduler might queue an ignored hrtimer. Defer the wake up
+ * to an online CPU instead.
+ */
+ if (unlikely(cpu_is_offline(cpu))) {
+ int target;
+
+ target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU),
+ cpu_online_mask);
+
+ smp_call_function_single(target, swake_up_one_online_ipi,
+ wqh, 0);
+ put_cpu();
+ } else {
+ put_cpu();
+ swake_up_one(wqh);
+ }
+}
+
/*
* Awaken the grace-period kthread. Don't do a self-awaken (unless in an
* interrupt or softirq handler, in which case we just might immediately
@@ -1037,7 +1069,7 @@ static void rcu_gp_kthread_wake(void)
return;
WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
- swake_up_one(&rcu_state.gp_wq);
+ swake_up_one_online(&rcu_state.gp_wq);
}
/*
@@ -2113,6 +2145,12 @@ static void rcu_do_batch(struct rcu_data *rdp)
* Extract the list of ready callbacks, disabling IRQs to prevent
* races with call_rcu() from interrupt handlers. Leave the
* callback counts, as rcu_barrier() needs to be conservative.
+ *
+ * Callbacks execution is fully ordered against preceding grace period
+ * completion (materialized by rnp->gp_seq update) thanks to the
+ * smp_mb__after_unlock_lock() upon node locking required for callbacks
+ * advancing. In NOCB mode this ordering is then further relayed through
+ * the nocb locking that protects both callbacks advancing and extraction.
*/
rcu_nocb_lock_irqsave(rdp, flags);
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
@@ -2559,12 +2597,26 @@ static int __init rcu_spawn_core_kthreads(void)
return 0;
}
+static void rcutree_enqueue(struct rcu_data *rdp, struct rcu_head *head, rcu_callback_t func)
+{
+ rcu_segcblist_enqueue(&rdp->cblist, head);
+ if (__is_kvfree_rcu_offset((unsigned long)func))
+ trace_rcu_kvfree_callback(rcu_state.name, head,
+ (unsigned long)func,
+ rcu_segcblist_n_cbs(&rdp->cblist));
+ else
+ trace_rcu_callback(rcu_state.name, head,
+ rcu_segcblist_n_cbs(&rdp->cblist));
+ trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
+}
+
/*
* Handle any core-RCU processing required by a call_rcu() invocation.
*/
-static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
- unsigned long flags)
+static void call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
+ rcu_callback_t func, unsigned long flags)
{
+ rcutree_enqueue(rdp, head, func);
/*
* If called from an extended quiescent state, invoke the RCU
* core in order to force a re-evaluation of RCU's idleness.
@@ -2660,7 +2712,6 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
unsigned long flags;
bool lazy;
struct rcu_data *rdp;
- bool was_alldone;
/* Misaligned rcu_head! */
WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
@@ -2697,30 +2748,18 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
}
check_cb_ovld(rdp);
- if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))
- return; // Enqueued onto ->nocb_bypass, so just leave.
- // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
- rcu_segcblist_enqueue(&rdp->cblist, head);
- if (__is_kvfree_rcu_offset((unsigned long)func))
- trace_rcu_kvfree_callback(rcu_state.name, head,
- (unsigned long)func,
- rcu_segcblist_n_cbs(&rdp->cblist));
- else
- trace_rcu_callback(rcu_state.name, head,
- rcu_segcblist_n_cbs(&rdp->cblist));
- trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
-
- /* Go handle any RCU core processing required. */
- if (unlikely(rcu_rdp_is_offloaded(rdp))) {
- __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
- } else {
- __call_rcu_core(rdp, head, flags);
- local_irq_restore(flags);
- }
+ if (unlikely(rcu_rdp_is_offloaded(rdp)))
+ call_rcu_nocb(rdp, head, func, flags, lazy);
+ else
+ call_rcu_core(rdp, head, func, flags);
+ local_irq_restore(flags);
}
#ifdef CONFIG_RCU_LAZY
+static bool enable_rcu_lazy __read_mostly = !IS_ENABLED(CONFIG_RCU_LAZY_DEFAULT_OFF);
+module_param(enable_rcu_lazy, bool, 0444);
+
/**
* call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
* flush all lazy callbacks (including the new one) to the main ->cblist while
@@ -2746,6 +2785,8 @@ void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
__call_rcu_common(head, func, false);
}
EXPORT_SYMBOL_GPL(call_rcu_hurry);
+#else
+#define enable_rcu_lazy false
#endif
/**
@@ -2794,7 +2835,7 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry);
*/
void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
- __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
+ __call_rcu_common(head, func, enable_rcu_lazy);
}
EXPORT_SYMBOL_GPL(call_rcu);
@@ -4362,6 +4403,66 @@ rcu_boot_init_percpu_data(int cpu)
rcu_boot_init_nocb_percpu_data(rdp);
}
+struct kthread_worker *rcu_exp_gp_kworker;
+
+static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
+{
+ struct kthread_worker *kworker;
+ const char *name = "rcu_exp_par_gp_kthread_worker/%d";
+ struct sched_param param = { .sched_priority = kthread_prio };
+ int rnp_index = rnp - rcu_get_root();
+
+ if (rnp->exp_kworker)
+ return;
+
+ kworker = kthread_create_worker(0, name, rnp_index);
+ if (IS_ERR_OR_NULL(kworker)) {
+ pr_err("Failed to create par gp kworker on %d/%d\n",
+ rnp->grplo, rnp->grphi);
+ return;
+ }
+ WRITE_ONCE(rnp->exp_kworker, kworker);
+
+ if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD))
+ sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, &param);
+}
+
+static struct task_struct *rcu_exp_par_gp_task(struct rcu_node *rnp)
+{
+ struct kthread_worker *kworker = READ_ONCE(rnp->exp_kworker);
+
+ if (!kworker)
+ return NULL;
+
+ return kworker->task;
+}
+
+static void __init rcu_start_exp_gp_kworker(void)
+{
+ const char *name = "rcu_exp_gp_kthread_worker";
+ struct sched_param param = { .sched_priority = kthread_prio };
+
+ rcu_exp_gp_kworker = kthread_create_worker(0, name);
+ if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
+ pr_err("Failed to create %s!\n", name);
+ rcu_exp_gp_kworker = NULL;
+ return;
+ }
+
+ if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD))
+ sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param);
+}
+
+static void rcu_spawn_rnp_kthreads(struct rcu_node *rnp)
+{
+ if (rcu_scheduler_fully_active) {
+ mutex_lock(&rnp->kthread_mutex);
+ rcu_spawn_one_boost_kthread(rnp);
+ rcu_spawn_exp_par_gp_kworker(rnp);
+ mutex_unlock(&rnp->kthread_mutex);
+ }
+}
+
/*
* Invoked early in the CPU-online process, when pretty much all services
* are available. The incoming CPU is not present.
@@ -4410,7 +4511,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;
trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- rcu_spawn_one_boost_kthread(rnp);
+ rcu_spawn_rnp_kthreads(rnp);
rcu_spawn_cpu_nocb_kthread(cpu);
WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1);
@@ -4418,13 +4519,64 @@ int rcutree_prepare_cpu(unsigned int cpu)
}
/*
- * Update RCU priority boot kthread affinity for CPU-hotplug changes.
+ * Update kthreads affinity during CPU-hotplug changes.
+ *
+ * Set the per-rcu_node kthread's affinity to cover all CPUs that are
+ * served by the rcu_node in question. The CPU hotplug lock is still
+ * held, so the value of rnp->qsmaskinit will be stable.
+ *
+ * We don't include outgoingcpu in the affinity set, use -1 if there is
+ * no outgoing CPU. If there are no CPUs left in the affinity set,
+ * this function allows the kthread to execute on any CPU.
+ *
+ * Any future concurrent calls are serialized via ->kthread_mutex.
*/
-static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
+static void rcutree_affinity_setting(unsigned int cpu, int outgoingcpu)
{
- struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ cpumask_var_t cm;
+ unsigned long mask;
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+ struct task_struct *task_boost, *task_exp;
+
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ rnp = rdp->mynode;
+
+ task_boost = rcu_boost_task(rnp);
+ task_exp = rcu_exp_par_gp_task(rnp);
- rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);
+ /*
+ * If CPU is the boot one, those tasks are created later from early
+ * initcall since kthreadd must be created first.
+ */
+ if (!task_boost && !task_exp)
+ return;
+
+ if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
+ return;
+
+ mutex_lock(&rnp->kthread_mutex);
+ mask = rcu_rnp_online_cpus(rnp);
+ for_each_leaf_node_possible_cpu(rnp, cpu)
+ if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
+ cpu != outgoingcpu)
+ cpumask_set_cpu(cpu, cm);
+ cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
+ if (cpumask_empty(cm)) {
+ cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
+ if (outgoingcpu >= 0)
+ cpumask_clear_cpu(outgoingcpu, cm);
+ }
+
+ if (task_exp)
+ set_cpus_allowed_ptr(task_exp, cm);
+
+ if (task_boost)
+ set_cpus_allowed_ptr(task_boost, cm);
+
+ mutex_unlock(&rnp->kthread_mutex);
+
+ free_cpumask_var(cm);
}
/*
@@ -4608,8 +4760,9 @@ void rcutree_migrate_callbacks(int cpu)
__call_rcu_nocb_wake(my_rdp, true, flags);
} else {
rcu_nocb_unlock(my_rdp); /* irqs remain disabled. */
- raw_spin_unlock_irqrestore_rcu_node(my_rnp, flags);
+ raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
}
+ local_irq_restore(flags);
if (needwake)
rcu_gp_kthread_wake();
lockdep_assert_irqs_enabled();
@@ -4698,51 +4851,6 @@ static int rcu_pm_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-#ifdef CONFIG_RCU_EXP_KTHREAD
-struct kthread_worker *rcu_exp_gp_kworker;
-struct kthread_worker *rcu_exp_par_gp_kworker;
-
-static void __init rcu_start_exp_gp_kworkers(void)
-{
- const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker";
- const char *gp_kworker_name = "rcu_exp_gp_kthread_worker";
- struct sched_param param = { .sched_priority = kthread_prio };
-
- rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name);
- if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
- pr_err("Failed to create %s!\n", gp_kworker_name);
- return;
- }
-
- rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name);
- if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) {
- pr_err("Failed to create %s!\n", par_gp_kworker_name);
- kthread_destroy_worker(rcu_exp_gp_kworker);
- return;
- }
-
- sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param);
- sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,
- &param);
-}
-
-static inline void rcu_alloc_par_gp_wq(void)
-{
-}
-#else /* !CONFIG_RCU_EXP_KTHREAD */
-struct workqueue_struct *rcu_par_gp_wq;
-
-static void __init rcu_start_exp_gp_kworkers(void)
-{
-}
-
-static inline void rcu_alloc_par_gp_wq(void)
-{
- rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
- WARN_ON(!rcu_par_gp_wq);
-}
-#endif /* CONFIG_RCU_EXP_KTHREAD */
-
/*
* Spawn the kthreads that handle RCU's grace periods.
*/
@@ -4777,10 +4885,10 @@ static int __init rcu_spawn_gp_kthread(void)
* due to rcu_scheduler_fully_active.
*/
rcu_spawn_cpu_nocb_kthread(smp_processor_id());
- rcu_spawn_one_boost_kthread(rdp->mynode);
+ rcu_spawn_rnp_kthreads(rdp->mynode);
rcu_spawn_core_kthreads();
/* Create kthread worker for expedited GPs */
- rcu_start_exp_gp_kworkers();
+ rcu_start_exp_gp_kworker();
return 0;
}
early_initcall(rcu_spawn_gp_kthread);
@@ -4883,7 +4991,7 @@ static void __init rcu_init_one(void)
init_waitqueue_head(&rnp->exp_wq[2]);
init_waitqueue_head(&rnp->exp_wq[3]);
spin_lock_init(&rnp->exp_lock);
- mutex_init(&rnp->boost_kthread_mutex);
+ mutex_init(&rnp->kthread_mutex);
raw_spin_lock_init(&rnp->exp_poll_lock);
rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED;
INIT_WORK(&rnp->exp_poll_wq, sync_rcu_do_polled_gp);
@@ -5120,7 +5228,6 @@ void __init rcu_init(void)
/* Create workqueue for Tree SRCU and for expedited GPs. */
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_gp_wq);
- rcu_alloc_par_gp_wq();
/* Fill in default value for rcutree.qovld boot parameter. */
/* -After- the rcu_node ->lock fields are initialized! */
@@ -5133,6 +5240,8 @@ void __init rcu_init(void)
(void)start_poll_synchronize_rcu_expedited();
rcu_test_sync_prims();
+
+ tasks_cblist_init_generic();
}
#include "tree_stall.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e9821a8422db..df48160b3136 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -21,14 +21,10 @@
#include "rcu_segcblist.h"
-/* Communicate arguments to a workqueue handler. */
+/* Communicate arguments to a kthread worker handler. */
struct rcu_exp_work {
unsigned long rew_s;
-#ifdef CONFIG_RCU_EXP_KTHREAD
struct kthread_work rew_work;
-#else
- struct work_struct rew_work;
-#endif /* CONFIG_RCU_EXP_KTHREAD */
};
/* RCU's kthread states for tracing. */
@@ -72,6 +68,9 @@ struct rcu_node {
/* Online CPUs for next expedited GP. */
/* Any CPU that has ever been online will */
/* have its bit set. */
+ struct kthread_worker *exp_kworker;
+ /* Workers performing per node expedited GP */
+ /* initialization. */
unsigned long cbovldmask;
/* CPUs experiencing callback overload. */
unsigned long ffmask; /* Fully functional CPUs. */
@@ -113,7 +112,7 @@ struct rcu_node {
/* side effect, not as a lock. */
unsigned long boost_time;
/* When to start boosting (jiffies). */
- struct mutex boost_kthread_mutex;
+ struct mutex kthread_mutex;
/* Exclusion for thread spawning and affinity */
/* manipulation. */
struct task_struct *boost_kthread_task;
@@ -467,11 +466,10 @@ static void rcu_init_one_nocb(struct rcu_node *rnp);
static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j, bool lazy);
-static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- bool *was_alldone, unsigned long flags,
- bool lazy);
-static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
- unsigned long flags);
+static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
+ rcu_callback_t func, unsigned long flags, bool lazy);
+static void __maybe_unused __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
+ unsigned long flags);
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6d7cea5d591f..6b83537480b1 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -173,7 +173,6 @@ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp)
return ret;
}
-
/*
* Report the exit from RCU read-side critical section for the last task
* that queued itself during or before the current expedited preemptible-RCU
@@ -199,10 +198,9 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp,
}
if (rnp->parent == NULL) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- if (wake) {
- smp_mb(); /* EGP done before wake_up(). */
- swake_up_one(&rcu_state.expedited_wq);
- }
+ if (wake)
+ swake_up_one_online(&rcu_state.expedited_wq);
+
break;
}
mask = rnp->grpmask;
@@ -420,7 +418,6 @@ retry_ipi:
static void rcu_exp_sel_wait_wake(unsigned long s);
-#ifdef CONFIG_RCU_EXP_KTHREAD
static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
{
struct rcu_exp_work *rewp =
@@ -429,9 +426,14 @@ static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
__sync_rcu_exp_select_node_cpus(rewp);
}
-static inline bool rcu_gp_par_worker_started(void)
+static inline bool rcu_exp_worker_started(void)
+{
+ return !!READ_ONCE(rcu_exp_gp_kworker);
+}
+
+static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp)
{
- return !!READ_ONCE(rcu_exp_par_gp_kworker);
+ return !!READ_ONCE(rnp->exp_kworker);
}
static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
@@ -442,7 +444,7 @@ static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
* another work item on the same kthread worker can result in
* deadlock.
*/
- kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work);
+ kthread_queue_work(READ_ONCE(rnp->exp_kworker), &rnp->rew.rew_work);
}
static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
@@ -467,64 +469,6 @@ static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew
kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work);
}
-static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
-{
-}
-#else /* !CONFIG_RCU_EXP_KTHREAD */
-static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
-{
- struct rcu_exp_work *rewp =
- container_of(wp, struct rcu_exp_work, rew_work);
-
- __sync_rcu_exp_select_node_cpus(rewp);
-}
-
-static inline bool rcu_gp_par_worker_started(void)
-{
- return !!READ_ONCE(rcu_par_gp_wq);
-}
-
-static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
-{
- int cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
-
- INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
- /* If all offline, queue the work on an unbound CPU. */
- if (unlikely(cpu > rnp->grphi - rnp->grplo))
- cpu = WORK_CPU_UNBOUND;
- else
- cpu += rnp->grplo;
- queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
-}
-
-static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
-{
- flush_work(&rnp->rew.rew_work);
-}
-
-/*
- * Work-queue handler to drive an expedited grace period forward.
- */
-static void wait_rcu_exp_gp(struct work_struct *wp)
-{
- struct rcu_exp_work *rewp;
-
- rewp = container_of(wp, struct rcu_exp_work, rew_work);
- rcu_exp_sel_wait_wake(rewp->rew_s);
-}
-
-static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
-{
- INIT_WORK_ONSTACK(&rew->rew_work, wait_rcu_exp_gp);
- queue_work(rcu_gp_wq, &rew->rew_work);
-}
-
-static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
-{
- destroy_work_on_stack(&rew->rew_work);
-}
-#endif /* CONFIG_RCU_EXP_KTHREAD */
-
/*
* Select the nodes that the upcoming expedited grace period needs
* to wait for.
@@ -542,7 +486,7 @@ static void sync_rcu_exp_select_cpus(void)
rnp->exp_need_flush = false;
if (!READ_ONCE(rnp->expmask))
continue; /* Avoid early boot non-existent wq. */
- if (!rcu_gp_par_worker_started() ||
+ if (!rcu_exp_par_worker_started(rnp) ||
rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
rcu_is_last_leaf_node(rnp)) {
/* No worker started yet or last leaf, do direct call. */
@@ -957,7 +901,6 @@ static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
*/
void synchronize_rcu_expedited(void)
{
- bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT);
unsigned long flags;
struct rcu_exp_work rew;
struct rcu_node *rnp;
@@ -997,7 +940,7 @@ void synchronize_rcu_expedited(void)
return; /* Someone else did our work for us. */
/* Ensure that load happens before action based on it. */
- if (unlikely(boottime)) {
+ if (unlikely((rcu_scheduler_active == RCU_SCHEDULER_INIT) || !rcu_exp_worker_started())) {
/* Direct call during scheduler init and early_initcalls(). */
rcu_exp_sel_wait_wake(s);
} else {
@@ -1014,9 +957,6 @@ void synchronize_rcu_expedited(void)
/* Let the next expedited grace period start. */
mutex_unlock(&rcu_state.exp_mutex);
-
- if (likely(!boottime))
- synchronize_rcu_expedited_destroy_work(&rew);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 4efbf7333d4e..3f85577bddd4 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -256,6 +256,7 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
return __wake_nocb_gp(rdp_gp, rdp, force, flags);
}
+#ifdef CONFIG_RCU_LAZY
/*
* LAZY_FLUSH_JIFFIES decides the maximum amount of time that
* can elapse before lazy callbacks are flushed. Lazy callbacks
@@ -264,21 +265,20 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
* left unsubmitted to RCU after those many jiffies.
*/
#define LAZY_FLUSH_JIFFIES (10 * HZ)
-static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES;
+static unsigned long jiffies_lazy_flush = LAZY_FLUSH_JIFFIES;
-#ifdef CONFIG_RCU_LAZY
// To be called only from test code.
-void rcu_lazy_set_jiffies_till_flush(unsigned long jif)
+void rcu_set_jiffies_lazy_flush(unsigned long jif)
{
- jiffies_till_flush = jif;
+ jiffies_lazy_flush = jif;
}
-EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush);
+EXPORT_SYMBOL(rcu_set_jiffies_lazy_flush);
-unsigned long rcu_lazy_get_jiffies_till_flush(void)
+unsigned long rcu_get_jiffies_lazy_flush(void)
{
- return jiffies_till_flush;
+ return jiffies_lazy_flush;
}
-EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush);
+EXPORT_SYMBOL(rcu_get_jiffies_lazy_flush);
#endif
/*
@@ -299,7 +299,7 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
*/
if (waketype == RCU_NOCB_WAKE_LAZY &&
rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) {
- mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush);
+ mod_timer(&rdp_gp->nocb_timer, jiffies + rcu_get_jiffies_lazy_flush());
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
} else if (waketype == RCU_NOCB_WAKE_BYPASS) {
mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
@@ -482,7 +482,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
// flush ->nocb_bypass to ->cblist.
if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
(ncbs && bypass_is_lazy &&
- (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) ||
+ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) ||
ncbs >= qhimark) {
rcu_nocb_lock(rdp);
*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
@@ -532,9 +532,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
// 2. Both of these conditions are met:
// a. The bypass list previously had only lazy CBs, and:
// b. The new CB is non-lazy.
- if (ncbs && (!bypass_is_lazy || lazy)) {
- local_irq_restore(flags);
- } else {
+ if (!ncbs || (bypass_is_lazy && !lazy)) {
// No-CBs GP kthread might be indefinitely asleep, if so, wake.
rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {
@@ -544,7 +542,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
} else {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("FirstBQnoWake"));
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ rcu_nocb_unlock(rdp);
}
}
return true; // Callback already enqueued.
@@ -566,11 +564,12 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
long lazy_len;
long len;
struct task_struct *t;
+ struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
// If we are being polled or there is no kthread, just leave.
t = READ_ONCE(rdp->nocb_gp_kthread);
if (rcu_nocb_poll || !t) {
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ rcu_nocb_unlock(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeNotPoll"));
return;
@@ -583,17 +582,17 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
rdp->qlen_last_fqs_check = len;
// Only lazy CBs in bypass list
if (lazy_len && bypass_len == lazy_len) {
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
} else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ rcu_nocb_unlock(rdp);
wake_nocb_gp(rdp, false);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeEmpty"));
} else {
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
TPS("WakeEmptyIsDeferred"));
}
@@ -610,20 +609,32 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
smp_mb(); /* Enqueue before timer_pending(). */
if ((rdp->nocb_cb_sleep ||
!rcu_segcblist_ready_cbs(&rdp->cblist)) &&
- !timer_pending(&rdp->nocb_timer)) {
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ !timer_pending(&rdp_gp->nocb_timer)) {
+ rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
TPS("WakeOvfIsDeferred"));
} else {
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ rcu_nocb_unlock(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
}
} else {
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ rcu_nocb_unlock(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
}
}
+static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
+ rcu_callback_t func, unsigned long flags, bool lazy)
+{
+ bool was_alldone;
+
+ if (!rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) {
+ /* Not enqueued on bypass but locked, do regular enqueue */
+ rcutree_enqueue(rdp, head, func);
+ __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
+ }
+}
+
static int nocb_gp_toggle_rdp(struct rcu_data *rdp,
bool *wake_state)
{
@@ -723,7 +734,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
lazy_ncbs = READ_ONCE(rdp->lazy_len);
if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
- (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) ||
+ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) ||
bypass_ncbs > 2 * qhimark)) {
flush_bypass = true;
} else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
@@ -779,7 +790,6 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
needwake = rdp->nocb_cb_sleep;
WRITE_ONCE(rdp->nocb_cb_sleep, false);
- smp_mb(); /* CB invocation -after- GP end. */
} else {
needwake = false;
}
@@ -933,8 +943,7 @@ static void nocb_cb_wait(struct rcu_data *rdp)
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
nocb_cb_wait_cond(rdp));
- // VVV Ensure CB invocation follows _sleep test.
- if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
+ if (READ_ONCE(rdp->nocb_cb_sleep)) {
WARN_ON(signal_pending(current));
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
}
@@ -1383,7 +1392,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
rcu_nocb_unlock_irqrestore(rdp, flags);
continue;
}
- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
+ rcu_nocb_try_flush_bypass(rdp, jiffies);
rcu_nocb_unlock_irqrestore(rdp, flags);
wake_nocb_gp(rdp, false);
sc->nr_to_scan -= _count;
@@ -1768,10 +1777,10 @@ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
return true;
}
-static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- bool *was_alldone, unsigned long flags, bool lazy)
+static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
+ rcu_callback_t func, unsigned long flags, bool lazy)
{
- return false;
+ WARN_ON_ONCE(1); /* Should be dead code! */
}
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 41021080ad25..36a8b5dbf5b5 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1195,14 +1195,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
struct sched_param sp;
struct task_struct *t;
- mutex_lock(&rnp->boost_kthread_mutex);
- if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
- goto out;
+ if (rnp->boost_kthread_task)
+ return;
t = kthread_create(rcu_boost_kthread, (void *)rnp,
"rcub/%d", rnp_index);
if (WARN_ON_ONCE(IS_ERR(t)))
- goto out;
+ return;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rnp->boost_kthread_task = t;
@@ -1210,48 +1209,11 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
-
- out:
- mutex_unlock(&rnp->boost_kthread_mutex);
}
-/*
- * Set the per-rcu_node kthread's affinity to cover all CPUs that are
- * served by the rcu_node in question. The CPU hotplug lock is still
- * held, so the value of rnp->qsmaskinit will be stable.
- *
- * We don't include outgoingcpu in the affinity set, use -1 if there is
- * no outgoing CPU. If there are no CPUs left in the affinity set,
- * this function allows the kthread to execute on any CPU.
- *
- * Any future concurrent calls are serialized via ->boost_kthread_mutex.
- */
-static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
+static struct task_struct *rcu_boost_task(struct rcu_node *rnp)
{
- struct task_struct *t = rnp->boost_kthread_task;
- unsigned long mask;
- cpumask_var_t cm;
- int cpu;
-
- if (!t)
- return;
- if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
- return;
- mutex_lock(&rnp->boost_kthread_mutex);
- mask = rcu_rnp_online_cpus(rnp);
- for_each_leaf_node_possible_cpu(rnp, cpu)
- if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
- cpu != outgoingcpu)
- cpumask_set_cpu(cpu, cm);
- cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
- if (cpumask_empty(cm)) {
- cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
- if (outgoingcpu >= 0)
- cpumask_clear_cpu(outgoingcpu, cm);
- }
- set_cpus_allowed_ptr(t, cm);
- mutex_unlock(&rnp->boost_kthread_mutex);
- free_cpumask_var(cm);
+ return READ_ONCE(rnp->boost_kthread_task);
}
#else /* #ifdef CONFIG_RCU_BOOST */
@@ -1270,10 +1232,10 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
{
}
-static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
+static struct task_struct *rcu_boost_task(struct rcu_node *rnp)
{
+ return NULL;
}
-
#endif /* #else #ifdef CONFIG_RCU_BOOST */
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9116bcc90346..d44efa0d0611 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1792,7 +1792,6 @@ static void cpu_util_update_eff(struct cgroup_subsys_state *css);
#endif
#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_UCLAMP_TASK
#ifdef CONFIG_UCLAMP_TASK_GROUP
static void uclamp_update_root_tg(void)
{
@@ -1898,7 +1897,6 @@ undo:
return result;
}
#endif
-#endif
static int uclamp_validate(struct task_struct *p,
const struct sched_attr *attr)
@@ -2065,7 +2063,7 @@ static void __init init_uclamp(void)
}
}
-#else /* CONFIG_UCLAMP_TASK */
+#else /* !CONFIG_UCLAMP_TASK */
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
static inline int uclamp_validate(struct task_struct *p,
@@ -3955,6 +3953,17 @@ void wake_up_if_idle(int cpu)
}
}
+bool cpus_equal_capacity(int this_cpu, int that_cpu)
+{
+ if (!sched_asym_cpucap_active())
+ return true;
+
+ if (this_cpu == that_cpu)
+ return true;
+
+ return arch_scale_cpu_capacity(this_cpu) == arch_scale_cpu_capacity(that_cpu);
+}
+
bool cpus_share_cache(int this_cpu, int that_cpu)
{
if (this_cpu == that_cpu)
@@ -6787,10 +6796,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
static void sched_update_worker(struct task_struct *tsk)
{
- if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
+ if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER | PF_BLOCK_TS)) {
+ if (tsk->flags & PF_BLOCK_TS)
+ blk_plug_invalidate_ts(tsk);
if (tsk->flags & PF_WQ_WORKER)
wq_worker_running(tsk);
- else
+ else if (tsk->flags & PF_IO_WORKER)
io_wq_worker_running(tsk);
}
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 533547e3c90a..6a16129f9a5c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7289,7 +7289,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
if (!available_idle_cpu(cpu)) {
idle = false;
if (*idle_cpu == -1) {
- if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) {
+ if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, cpus)) {
*idle_cpu = cpu;
break;
}
@@ -7297,7 +7297,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
}
break;
}
- if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
+ if (*idle_cpu == -1 && cpumask_test_cpu(cpu, cpus))
*idle_cpu = cpu;
}
@@ -7311,13 +7311,19 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
/*
* Scan the local SMT mask for idle CPUs.
*/
-static int select_idle_smt(struct task_struct *p, int target)
+static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
{
int cpu;
for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) {
if (cpu == target)
continue;
+ /*
+ * Check if the CPU is in the LLC scheduling domain of @target.
+ * Due to isolcpus, there is no guarantee that all the siblings are in the domain.
+ */
+ if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
+ continue;
if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
return cpu;
}
@@ -7341,7 +7347,7 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
return __select_idle_cpu(core, p);
}
-static inline int select_idle_smt(struct task_struct *p, int target)
+static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
{
return -1;
}
@@ -7591,7 +7597,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
has_idle_core = test_idle_cores(target);
if (!has_idle_core && cpus_share_cache(prev, target)) {
- i = select_idle_smt(p, prev);
+ i = select_idle_smt(p, sd, prev);
if ((unsigned int)i < nr_cpumask_bits)
return i;
}
@@ -9237,19 +9243,17 @@ static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
static inline bool others_have_blocked(struct rq *rq)
{
- if (READ_ONCE(rq->avg_rt.util_avg))
+ if (cpu_util_rt(rq))
return true;
- if (READ_ONCE(rq->avg_dl.util_avg))
+ if (cpu_util_dl(rq))
return true;
if (thermal_load_avg(rq))
return true;
-#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
- if (READ_ONCE(rq->avg_irq.util_avg))
+ if (cpu_util_irq(rq))
return true;
-#endif
return false;
}
@@ -9506,8 +9510,8 @@ static unsigned long scale_rt_capacity(int cpu)
* avg_thermal.load_avg tracks thermal pressure and the weighted
* average uses the actual delta max capacity(load).
*/
- used = READ_ONCE(rq->avg_rt.util_avg);
- used += READ_ONCE(rq->avg_dl.util_avg);
+ used = cpu_util_rt(rq);
+ used += cpu_util_dl(rq);
used += thermal_load_avg(rq);
if (unlikely(used >= max))
@@ -9740,51 +9744,49 @@ group_type group_classify(unsigned int imbalance_pct,
*/
static bool sched_use_asym_prio(struct sched_domain *sd, int cpu)
{
+ if (!(sd->flags & SD_ASYM_PACKING))
+ return false;
+
if (!sched_smt_active())
return true;
return sd->flags & SD_SHARE_CPUCAPACITY || is_core_idle(cpu);
}
+static inline bool sched_asym(struct sched_domain *sd, int dst_cpu, int src_cpu)
+{
+ /*
+ * First check if @dst_cpu can do asym_packing load balance. Only do it
+ * if it has higher priority than @src_cpu.
+ */
+ return sched_use_asym_prio(sd, dst_cpu) &&
+ sched_asym_prefer(dst_cpu, src_cpu);
+}
+
/**
- * sched_asym - Check if the destination CPU can do asym_packing load balance
+ * sched_group_asym - Check if the destination CPU can do asym_packing balance
* @env: The load balancing environment
- * @sds: Load-balancing data with statistics of the local group
* @sgs: Load-balancing statistics of the candidate busiest group
* @group: The candidate busiest group
*
* @env::dst_cpu can do asym_packing if it has higher priority than the
* preferred CPU of @group.
*
- * SMT is a special case. If we are balancing load between cores, @env::dst_cpu
- * can do asym_packing balance only if all its SMT siblings are idle. Also, it
- * can only do it if @group is an SMT group and has exactly on busy CPU. Larger
- * imbalances in the number of CPUS are dealt with in find_busiest_group().
- *
- * If we are balancing load within an SMT core, or at PKG domain level, always
- * proceed.
- *
* Return: true if @env::dst_cpu can do with asym_packing load balance. False
* otherwise.
*/
static inline bool
-sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs,
- struct sched_group *group)
+sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group *group)
{
- /* Ensure that the whole local core is idle, if applicable. */
- if (!sched_use_asym_prio(env->sd, env->dst_cpu))
- return false;
-
/*
- * CPU priorities does not make sense for SMT cores with more than one
+ * CPU priorities do not make sense for SMT cores with more than one
* busy sibling.
*/
- if (group->flags & SD_SHARE_CPUCAPACITY) {
- if (sgs->group_weight - sgs->idle_cpus != 1)
- return false;
- }
+ if ((group->flags & SD_SHARE_CPUCAPACITY) &&
+ (sgs->group_weight - sgs->idle_cpus != 1))
+ return false;
- return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
+ return sched_asym(env->sd, env->dst_cpu, group->asym_prefer_cpu);
}
/* One group has more than one SMT CPU while the other group does not */
@@ -9938,11 +9940,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->group_weight = group->group_weight;
/* Check if dst CPU is idle and preferred to this group */
- if (!local_group && env->sd->flags & SD_ASYM_PACKING &&
- env->idle != CPU_NOT_IDLE && sgs->sum_h_nr_running &&
- sched_asym(env, sds, sgs, group)) {
+ if (!local_group && env->idle != CPU_NOT_IDLE && sgs->sum_h_nr_running &&
+ sched_group_asym(env, sgs, group))
sgs->group_asym_packing = 1;
- }
/* Check for loaded SMT group to be balanced to dst CPU */
if (!local_group && smt_balance(env, sgs, group))
@@ -10006,9 +10006,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
switch (sgs->group_type) {
case group_overloaded:
/* Select the overloaded group with highest avg_load. */
- if (sgs->avg_load <= busiest->avg_load)
- return false;
- break;
+ return sgs->avg_load > busiest->avg_load;
case group_imbalanced:
/*
@@ -10019,18 +10017,14 @@ static bool update_sd_pick_busiest(struct lb_env *env,
case group_asym_packing:
/* Prefer to move from lowest priority CPU's work */
- if (sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
- return false;
- break;
+ return sched_asym_prefer(sds->busiest->asym_prefer_cpu, sg->asym_prefer_cpu);
case group_misfit_task:
/*
* If we have more than one misfit sg go with the biggest
* misfit.
*/
- if (sgs->group_misfit_task_load < busiest->group_misfit_task_load)
- return false;
- break;
+ return sgs->group_misfit_task_load > busiest->group_misfit_task_load;
case group_smt_balance:
/*
@@ -10182,10 +10176,8 @@ static int idle_cpu_without(int cpu, struct task_struct *p)
* be computed and tested before calling idle_cpu_without().
*/
-#ifdef CONFIG_SMP
if (rq->ttwu_pending)
return 0;
-#endif
return 1;
}
@@ -10578,16 +10570,11 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
update_sg_lb_stats(env, sds, sg, sgs, &sg_status);
- if (local_group)
- goto next_group;
-
-
- if (update_sd_pick_busiest(env, sds, sg, sgs)) {
+ if (!local_group && update_sd_pick_busiest(env, sds, sg, sgs)) {
sds->busiest = sg;
sds->busiest_stat = *sgs;
}
-next_group:
/* Now, start updating sd_lb_stats */
sds->total_load += sgs->group_load;
sds->total_capacity += sgs->group_capacity;
@@ -10691,7 +10678,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
*/
if (local->group_type == group_has_spare) {
if ((busiest->group_type > group_fully_busy) &&
- !(env->sd->flags & SD_SHARE_PKG_RESOURCES)) {
+ !(env->sd->flags & SD_SHARE_LLC)) {
/*
* If busiest is overloaded, try to fill spare
* capacity. This might end up creating spare capacity
@@ -11038,10 +11025,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
* If balancing between cores, let lower priority CPUs help
* SMT cores with more than one busy sibling.
*/
- if ((env->sd->flags & SD_ASYM_PACKING) &&
- sched_use_asym_prio(env->sd, i) &&
- sched_asym_prefer(i, env->dst_cpu) &&
- nr_running == 1)
+ if (sched_asym(env->sd, i, env->dst_cpu) && nr_running == 1)
continue;
switch (env->migration_type) {
@@ -11137,8 +11121,7 @@ asym_active_balance(struct lb_env *env)
* the lower priority @env::dst_cpu help it. Do not follow
* CPU priority.
*/
- return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
- sched_use_asym_prio(env->sd, env->dst_cpu) &&
+ return env->idle != CPU_NOT_IDLE && sched_use_asym_prio(env->sd, env->dst_cpu) &&
(sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
!sched_use_asym_prio(env->sd, env->src_cpu));
}
@@ -11910,8 +11893,7 @@ static void nohz_balancer_kick(struct rq *rq)
* preferred CPU must be idle.
*/
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
- if (sched_use_asym_prio(sd, i) &&
- sched_asym_prefer(i, cpu)) {
+ if (sched_asym(sd, i, cpu)) {
flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
goto unlock;
}
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 31231925f1ec..6135fbe83d68 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -81,6 +81,25 @@ void __weak arch_cpu_idle(void)
cpu_idle_force_poll = 1;
}
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST_IDLE
+DEFINE_STATIC_KEY_FALSE(arch_needs_tick_broadcast);
+
+static inline void cond_tick_broadcast_enter(void)
+{
+ if (static_branch_unlikely(&arch_needs_tick_broadcast))
+ tick_broadcast_enter();
+}
+
+static inline void cond_tick_broadcast_exit(void)
+{
+ if (static_branch_unlikely(&arch_needs_tick_broadcast))
+ tick_broadcast_exit();
+}
+#else
+static inline void cond_tick_broadcast_enter(void) { }
+static inline void cond_tick_broadcast_exit(void) { }
+#endif
+
/**
* default_idle_call - Default CPU idle routine.
*
@@ -90,6 +109,7 @@ void __cpuidle default_idle_call(void)
{
instrumentation_begin();
if (!current_clr_polling_and_test()) {
+ cond_tick_broadcast_enter();
trace_cpu_idle(1, smp_processor_id());
stop_critical_timings();
@@ -99,6 +119,7 @@ void __cpuidle default_idle_call(void)
start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ cond_tick_broadcast_exit();
}
local_irq_enable();
instrumentation_end();
@@ -291,7 +312,6 @@ static void do_idle(void)
local_irq_disable();
if (cpu_is_offline(cpu)) {
- tick_nohz_idle_stop_tick();
cpuhp_report_idle_dead();
arch_cpu_idle_dead();
}
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 2ad881d07752..4e715b9b278e 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -162,6 +162,9 @@
| MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
| MEMBARRIER_CMD_GET_REGISTRATIONS)
+static DEFINE_MUTEX(membarrier_ipi_mutex);
+#define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex)
+
static void ipi_mb(void *info)
{
smp_mb(); /* IPIs should be serializing but paranoid. */
@@ -259,6 +262,7 @@ static int membarrier_global_expedited(void)
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;
+ SERIALIZE_IPI();
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
@@ -347,6 +351,7 @@ static int membarrier_private_expedited(int flags, int cpu_id)
if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;
+ SERIALIZE_IPI();
cpus_read_lock();
if (cpu_id >= 0) {
@@ -460,6 +465,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
* between threads which are users of @mm has its membarrier state
* updated.
*/
+ SERIALIZE_IPI();
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 001fe047bd5d..d2242679239e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3136,7 +3136,7 @@ static inline bool uclamp_rq_is_idle(struct rq *rq)
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
static inline unsigned long cpu_util_irq(struct rq *rq)
{
- return rq->avg_irq.util_avg;
+ return READ_ONCE(rq->avg_irq.util_avg);
}
static inline
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 10d1391e7416..99ea5986038c 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -657,13 +657,13 @@ static void destroy_sched_domains(struct sched_domain *sd)
}
/*
- * Keep a special pointer to the highest sched_domain that has
- * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
- * allows us to avoid some pointer chasing select_idle_sibling().
+ * Keep a special pointer to the highest sched_domain that has SD_SHARE_LLC set
+ * (Last Level Cache Domain) for this allows us to avoid some pointer chasing
+ * select_idle_sibling().
*
- * Also keep a unique ID per domain (we use the first CPU number in
- * the cpumask of the domain), this allows us to quickly tell if
- * two CPUs are in the same cache domain, see cpus_share_cache().
+ * Also keep a unique ID per domain (we use the first CPU number in the cpumask
+ * of the domain), this allows us to quickly tell if two CPUs are in the same
+ * cache domain, see cpus_share_cache().
*/
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
DEFINE_PER_CPU(int, sd_llc_size);
@@ -684,7 +684,7 @@ static void update_top_cache_domain(int cpu)
int id = cpu;
int size = 1;
- sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
+ sd = highest_flag_domain(cpu, SD_SHARE_LLC);
if (sd) {
id = cpumask_first(sched_domain_span(sd));
size = cpumask_weight(sched_domain_span(sd));
@@ -1551,11 +1551,12 @@ static struct cpumask ***sched_domains_numa_masks;
*
* These flags are purely descriptive of the topology and do not prescribe
* behaviour. Behaviour is artificial and mapped in the below sd_init()
- * function:
+ * function. For details, see include/linux/sched/sd_flags.h.
*
- * SD_SHARE_CPUCAPACITY - describes SMT topologies
- * SD_SHARE_PKG_RESOURCES - describes shared caches
- * SD_NUMA - describes NUMA topologies
+ * SD_SHARE_CPUCAPACITY
+ * SD_SHARE_LLC
+ * SD_CLUSTER
+ * SD_NUMA
*
* Odd one out, which beside describing the topology has a quirk also
* prescribes the desired behaviour that goes along with it:
@@ -1565,7 +1566,7 @@ static struct cpumask ***sched_domains_numa_masks;
#define TOPOLOGY_SD_FLAGS \
(SD_SHARE_CPUCAPACITY | \
SD_CLUSTER | \
- SD_SHARE_PKG_RESOURCES | \
+ SD_SHARE_LLC | \
SD_NUMA | \
SD_ASYM_PACKING)
@@ -1608,7 +1609,7 @@ sd_init(struct sched_domain_topology_level *tl,
| 0*SD_BALANCE_WAKE
| 1*SD_WAKE_AFFINE
| 0*SD_SHARE_CPUCAPACITY
- | 0*SD_SHARE_PKG_RESOURCES
+ | 0*SD_SHARE_LLC
| 0*SD_SERIALIZE
| 1*SD_PREFER_SIBLING
| 0*SD_NUMA
@@ -1645,7 +1646,7 @@ sd_init(struct sched_domain_topology_level *tl,
if (sd->flags & SD_SHARE_CPUCAPACITY) {
sd->imbalance_pct = 110;
- } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+ } else if (sd->flags & SD_SHARE_LLC) {
sd->imbalance_pct = 117;
sd->cache_nice_tries = 1;
@@ -1670,7 +1671,7 @@ sd_init(struct sched_domain_topology_level *tl,
* For all levels sharing cache; connect a sched_domain_shared
* instance.
*/
- if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+ if (sd->flags & SD_SHARE_LLC) {
sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
atomic_inc(&sd->shared->ref);
atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
@@ -2445,8 +2446,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
struct sched_domain *child = sd->child;
- if (!(sd->flags & SD_SHARE_PKG_RESOURCES) && child &&
- (child->flags & SD_SHARE_PKG_RESOURCES)) {
+ if (!(sd->flags & SD_SHARE_LLC) && child &&
+ (child->flags & SD_SHARE_LLC)) {
struct sched_domain __rcu *top_p;
unsigned int nr_llcs;
diff --git a/kernel/signal.c b/kernel/signal.c
index c9c57d053ce4..bdca529f0f7b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -47,6 +47,7 @@
#include <linux/cgroup.h>
#include <linux/audit.h>
#include <linux/sysctl.h>
+#include <uapi/linux/pidfd.h>
#define CREATE_TRACE_POINTS
#include <trace/events/signal.h>
@@ -1436,7 +1437,8 @@ void lockdep_assert_task_sighand_held(struct task_struct *task)
#endif
/*
- * send signal info to all the members of a group
+ * send signal info to all the members of a thread group or to the
+ * individual thread if type == PIDTYPE_PID.
*/
int group_send_sig_info(int sig, struct kernel_siginfo *info,
struct task_struct *p, enum pid_type type)
@@ -1478,7 +1480,8 @@ int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp)
return ret;
}
-int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
+static int kill_pid_info_type(int sig, struct kernel_siginfo *info,
+ struct pid *pid, enum pid_type type)
{
int error = -ESRCH;
struct task_struct *p;
@@ -1487,11 +1490,10 @@ int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
rcu_read_lock();
p = pid_task(pid, PIDTYPE_PID);
if (p)
- error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
+ error = group_send_sig_info(sig, info, p, type);
rcu_read_unlock();
if (likely(!p || error != -ESRCH))
return error;
-
/*
* The task was unhashed in between, try again. If it
* is dead, pid_task() will return NULL, if we race with
@@ -1500,6 +1502,11 @@ int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
}
}
+int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
+{
+ return kill_pid_info_type(sig, info, pid, PIDTYPE_TGID);
+}
+
static int kill_proc_info(int sig, struct kernel_siginfo *info, pid_t pid)
{
int error;
@@ -1898,16 +1905,19 @@ int send_sig_fault_trapno(int sig, int code, void __user *addr, int trapno,
return send_sig_info(info.si_signo, &info, t);
}
-int kill_pgrp(struct pid *pid, int sig, int priv)
+static int kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp)
{
int ret;
-
read_lock(&tasklist_lock);
- ret = __kill_pgrp_info(sig, __si_special(priv), pid);
+ ret = __kill_pgrp_info(sig, info, pgrp);
read_unlock(&tasklist_lock);
-
return ret;
}
+
+int kill_pgrp(struct pid *pid, int sig, int priv)
+{
+ return kill_pgrp_info(sig, __si_special(priv), pid);
+}
EXPORT_SYMBOL(kill_pgrp);
int kill_pid(struct pid *pid, int sig, int priv)
@@ -2019,13 +2029,14 @@ ret:
return ret;
}
-static void do_notify_pidfd(struct task_struct *task)
+void do_notify_pidfd(struct task_struct *task)
{
- struct pid *pid;
+ struct pid *pid = task_pid(task);
WARN_ON(task->exit_state == 0);
- pid = task_pid(task);
- wake_up_all(&pid->wait_pidfd);
+
+ __wake_up(&pid->wait_pidfd, TASK_NORMAL, 0,
+ poll_to_key(EPOLLIN | EPOLLRDNORM));
}
/*
@@ -2050,9 +2061,12 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
WARN_ON_ONCE(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
-
- /* Wake up all pidfd waiters */
- do_notify_pidfd(tsk);
+ /*
+ * tsk is a group leader and has no threads, wake up the
+ * non-PIDFD_THREAD waiters.
+ */
+ if (thread_group_empty(tsk))
+ do_notify_pidfd(tsk);
if (sig != SIGCHLD) {
/*
@@ -3789,12 +3803,13 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese,
#endif
#endif
-static inline void prepare_kill_siginfo(int sig, struct kernel_siginfo *info)
+static void prepare_kill_siginfo(int sig, struct kernel_siginfo *info,
+ enum pid_type type)
{
clear_siginfo(info);
info->si_signo = sig;
info->si_errno = 0;
- info->si_code = SI_USER;
+ info->si_code = (type == PIDTYPE_PID) ? SI_TKILL : SI_USER;
info->si_pid = task_tgid_vnr(current);
info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
}
@@ -3808,7 +3823,7 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
struct kernel_siginfo info;
- prepare_kill_siginfo(sig, &info);
+ prepare_kill_siginfo(sig, &info, PIDTYPE_TGID);
return kill_something_info(sig, &info, pid);
}
@@ -3861,6 +3876,10 @@ static struct pid *pidfd_to_pid(const struct file *file)
return tgid_pidfd_to_pid(file);
}
+#define PIDFD_SEND_SIGNAL_FLAGS \
+ (PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \
+ PIDFD_SIGNAL_PROCESS_GROUP)
+
/**
* sys_pidfd_send_signal - Signal a process through a pidfd
* @pidfd: file descriptor of the process
@@ -3868,14 +3887,10 @@ static struct pid *pidfd_to_pid(const struct file *file)
* @info: signal info
* @flags: future flags
*
- * The syscall currently only signals via PIDTYPE_PID which covers
- * kill(<positive-pid>, <signal>. It does not signal threads or process
- * groups.
- * In order to extend the syscall to threads and process groups the @flags
- * argument should be used. In essence, the @flags argument will determine
- * what is signaled and not the file descriptor itself. Put in other words,
- * grouping is a property of the flags argument not a property of the file
- * descriptor.
+ * Send the signal to the thread group or to the individual thread depending
+ * on PIDFD_THREAD.
+ * In the future extension to @flags may be used to override the default scope
+ * of @pidfd.
*
* Return: 0 on success, negative errno on failure
*/
@@ -3886,9 +3901,14 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
struct fd f;
struct pid *pid;
kernel_siginfo_t kinfo;
+ enum pid_type type;
/* Enforce flags be set to 0 until we add an extension. */
- if (flags)
+ if (flags & ~PIDFD_SEND_SIGNAL_FLAGS)
+ return -EINVAL;
+
+ /* Ensure that only a single signal scope determining flag is set. */
+ if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1)
return -EINVAL;
f = fdget(pidfd);
@@ -3906,6 +3926,25 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
if (!access_pidfd_pidns(pid))
goto err;
+ switch (flags) {
+ case 0:
+ /* Infer scope from the type of pidfd. */
+ if (f.file->f_flags & PIDFD_THREAD)
+ type = PIDTYPE_PID;
+ else
+ type = PIDTYPE_TGID;
+ break;
+ case PIDFD_SIGNAL_THREAD:
+ type = PIDTYPE_PID;
+ break;
+ case PIDFD_SIGNAL_THREAD_GROUP:
+ type = PIDTYPE_TGID;
+ break;
+ case PIDFD_SIGNAL_PROCESS_GROUP:
+ type = PIDTYPE_PGID;
+ break;
+ }
+
if (info) {
ret = copy_siginfo_from_user_any(&kinfo, info);
if (unlikely(ret))
@@ -3917,15 +3956,17 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
/* Only allow sending arbitrary signals to yourself. */
ret = -EPERM;
- if ((task_pid(current) != pid) &&
+ if ((task_pid(current) != pid || type > PIDTYPE_TGID) &&
(kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
goto err;
} else {
- prepare_kill_siginfo(sig, &kinfo);
+ prepare_kill_siginfo(sig, &kinfo, type);
}
- ret = kill_pid_info(sig, &kinfo, pid);
-
+ if (type == PIDTYPE_PGID)
+ ret = kill_pgrp_info(sig, &kinfo, pid);
+ else
+ ret = kill_pid_info_type(sig, &kinfo, pid, type);
err:
fdput(f);
return ret;
@@ -3965,12 +4006,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
{
struct kernel_siginfo info;
- clear_siginfo(&info);
- info.si_signo = sig;
- info.si_errno = 0;
- info.si_code = SI_TKILL;
- info.si_pid = task_tgid_vnr(current);
- info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+ prepare_kill_siginfo(sig, &info, PIDTYPE_PID);
return do_send_specific(tgid, pid, sig, &info);
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 210cf5f8d92c..b315b21fb28c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -27,6 +27,7 @@
#include <linux/tick.h>
#include <linux/irq.h>
#include <linux/wait_bit.h>
+#include <linux/workqueue.h>
#include <asm/softirq_stack.h>
@@ -802,11 +803,13 @@ static void tasklet_action_common(struct softirq_action *a,
static __latent_entropy void tasklet_action(struct softirq_action *a)
{
+ workqueue_softirq_action(false);
tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
}
static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
{
+ workqueue_softirq_action(true);
tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
}
@@ -929,6 +932,8 @@ static void run_ksoftirqd(unsigned int cpu)
#ifdef CONFIG_HOTPLUG_CPU
static int takeover_tasklets(unsigned int cpu)
{
+ workqueue_softirq_dead(cpu);
+
/* CPU is dead, so no lock needed. */
local_irq_disable();
diff --git a/kernel/sys.c b/kernel/sys.c
index e219fcfa112d..f8e543f1e38a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1785,21 +1785,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
struct task_struct *t;
unsigned long flags;
u64 tgutime, tgstime, utime, stime;
- unsigned long maxrss = 0;
+ unsigned long maxrss;
+ struct mm_struct *mm;
struct signal_struct *sig = p->signal;
+ unsigned int seq = 0;
- memset((char *)r, 0, sizeof (*r));
+retry:
+ memset(r, 0, sizeof(*r));
utime = stime = 0;
+ maxrss = 0;
if (who == RUSAGE_THREAD) {
task_cputime_adjusted(current, &utime, &stime);
accumulate_thread_rusage(p, r);
maxrss = sig->maxrss;
- goto out;
+ goto out_thread;
}
- if (!lock_task_sighand(p, &flags))
- return;
+ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
switch (who) {
case RUSAGE_BOTH:
@@ -1819,9 +1822,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
fallthrough;
case RUSAGE_SELF:
- thread_group_cputime_adjusted(p, &tgutime, &tgstime);
- utime += tgutime;
- stime += tgstime;
r->ru_nvcsw += sig->nvcsw;
r->ru_nivcsw += sig->nivcsw;
r->ru_minflt += sig->min_flt;
@@ -1830,28 +1830,42 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
r->ru_oublock += sig->oublock;
if (maxrss < sig->maxrss)
maxrss = sig->maxrss;
+
+ rcu_read_lock();
__for_each_thread(sig, t)
accumulate_thread_rusage(t, r);
+ rcu_read_unlock();
+
break;
default:
BUG();
}
- unlock_task_sighand(p, &flags);
-out:
- r->ru_utime = ns_to_kernel_old_timeval(utime);
- r->ru_stime = ns_to_kernel_old_timeval(stime);
+ if (need_seqretry(&sig->stats_lock, seq)) {
+ seq = 1;
+ goto retry;
+ }
+ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
- if (who != RUSAGE_CHILDREN) {
- struct mm_struct *mm = get_task_mm(p);
+ if (who == RUSAGE_CHILDREN)
+ goto out_children;
- if (mm) {
- setmax_mm_hiwater_rss(&maxrss, mm);
- mmput(mm);
- }
+ thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+ utime += tgutime;
+ stime += tgstime;
+
+out_thread:
+ mm = get_task_mm(p);
+ if (mm) {
+ setmax_mm_hiwater_rss(&maxrss, mm);
+ mmput(mm);
}
+
+out_children:
r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
+ r->ru_utime = ns_to_kernel_old_timeval(utime);
+ r->ru_stime = ns_to_kernel_old_timeval(stime);
}
SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index bae8f11070be..fc3b1a06c981 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -39,6 +39,11 @@ config GENERIC_CLOCKEVENTS_BROADCAST
bool
depends on GENERIC_CLOCKEVENTS
+# Handle broadcast in default_idle_call()
+config GENERIC_CLOCKEVENTS_BROADCAST_IDLE
+ bool
+ depends on GENERIC_CLOCKEVENTS_BROADCAST
+
# Automatically adjust the min. reprogramming time for
# clock event device
config GENERIC_CLOCKEVENTS_MIN_ADJUST
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 7e875e63ff3b..4af2a264a160 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -17,6 +17,9 @@ endif
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
obj-$(CONFIG_LEGACY_TIMER_TICK) += tick-legacy.o
+ifeq ($(CONFIG_SMP),y)
+ obj-$(CONFIG_NO_HZ_COMMON) += timer_migration.o
+endif
obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 960143b183cd..a7ca458cdd9c 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -659,7 +659,7 @@ void tick_cleanup_dead_cpu(int cpu)
#endif
#ifdef CONFIG_SYSFS
-static struct bus_type clockevents_subsys = {
+static const struct bus_type clockevents_subsys = {
.name = "clockevents",
.dev_name = "clockevent",
};
diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c
index df922f49d171..d06185e054ea 100644
--- a/kernel/time/clocksource-wdtest.c
+++ b/kernel/time/clocksource-wdtest.c
@@ -104,8 +104,8 @@ static void wdtest_ktime_clocksource_reset(void)
static int wdtest_func(void *arg)
{
unsigned long j1, j2;
+ int i, max_retries;
char *s;
- int i;
schedule_timeout_uninterruptible(holdoff * HZ);
@@ -139,18 +139,19 @@ static int wdtest_func(void *arg)
WARN_ON_ONCE(time_before(j2, j1 + NSEC_PER_USEC));
/* Verify tsc-like stability with various numbers of errors injected. */
- for (i = 0; i <= max_cswd_read_retries + 1; i++) {
- if (i <= 1 && i < max_cswd_read_retries)
+ max_retries = clocksource_get_max_watchdog_retry();
+ for (i = 0; i <= max_retries + 1; i++) {
+ if (i <= 1 && i < max_retries)
s = "";
- else if (i <= max_cswd_read_retries)
+ else if (i <= max_retries)
s = ", expect message";
else
s = ", expect clock skew";
- pr_info("--- Watchdog with %dx error injection, %lu retries%s.\n", i, max_cswd_read_retries, s);
+ pr_info("--- Watchdog with %dx error injection, %d retries%s.\n", i, max_retries, s);
WRITE_ONCE(wdtest_ktime_read_ndelays, i);
schedule_timeout_uninterruptible(2 * HZ);
WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays));
- WARN_ON_ONCE((i <= max_cswd_read_retries) !=
+ WARN_ON_ONCE((i <= max_retries) !=
!(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
wdtest_ktime_clocksource_reset();
}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c108ed8a9804..e5b260aa0e02 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -99,6 +99,7 @@ static u64 suspend_start;
* Interval: 0.5sec.
*/
#define WATCHDOG_INTERVAL (HZ >> 1)
+#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ))
/*
* Threshold: 0.0312s, when doubled: 0.0625s.
@@ -134,6 +135,7 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
static DEFINE_SPINLOCK(watchdog_lock);
static int watchdog_running;
static atomic_t watchdog_reset_pending;
+static int64_t watchdog_max_interval;
static inline void clocksource_watchdog_lock(unsigned long *flags)
{
@@ -208,9 +210,6 @@ void clocksource_mark_unstable(struct clocksource *cs)
spin_unlock_irqrestore(&watchdog_lock, flags);
}
-ulong max_cswd_read_retries = 2;
-module_param(max_cswd_read_retries, ulong, 0644);
-EXPORT_SYMBOL_GPL(max_cswd_read_retries);
static int verify_n_cpus = 8;
module_param(verify_n_cpus, int, 0644);
@@ -222,11 +221,12 @@ enum wd_read_status {
static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
{
- unsigned int nretries;
+ unsigned int nretries, max_retries;
u64 wd_end, wd_end2, wd_delta;
int64_t wd_delay, wd_seq_delay;
- for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
+ max_retries = clocksource_get_max_watchdog_retry();
+ for (nretries = 0; nretries <= max_retries; nretries++) {
local_irq_disable();
*wdnow = watchdog->read(watchdog);
*csnow = cs->read(cs);
@@ -238,7 +238,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow,
wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
watchdog->shift);
if (wd_delay <= WATCHDOG_MAX_SKEW) {
- if (nretries > 1 || nretries >= max_cswd_read_retries) {
+ if (nretries > 1 || nretries >= max_retries) {
pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
smp_processor_id(), watchdog->name, nretries);
}
@@ -399,8 +399,8 @@ static inline void clocksource_reset_watchdog(void)
static void clocksource_watchdog(struct timer_list *unused)
{
u64 csnow, wdnow, cslast, wdlast, delta;
+ int64_t wd_nsec, cs_nsec, interval;
int next_cpu, reset_pending;
- int64_t wd_nsec, cs_nsec;
struct clocksource *cs;
enum wd_read_status read_ret;
unsigned long extra_wait = 0;
@@ -470,6 +470,27 @@ static void clocksource_watchdog(struct timer_list *unused)
if (atomic_read(&watchdog_reset_pending))
continue;
+ /*
+ * The processing of timer softirqs can get delayed (usually
+ * on account of ksoftirqd not getting to run in a timely
+ * manner), which causes the watchdog interval to stretch.
+ * Skew detection may fail for longer watchdog intervals
+ * on account of fixed margins being used.
+ * Some clocksources, e.g. acpi_pm, cannot tolerate
+ * watchdog intervals longer than a few seconds.
+ */
+ interval = max(cs_nsec, wd_nsec);
+ if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) {
+ if (system_state > SYSTEM_SCHEDULING &&
+ interval > 2 * watchdog_max_interval) {
+ watchdog_max_interval = interval;
+ pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n",
+ cs_nsec, wd_nsec);
+ }
+ watchdog_timer.expires = jiffies;
+ continue;
+ }
+
/* Check the deviation from the watchdog clocksource. */
md = cs->uncertainty_margin + watchdog->uncertainty_margin;
if (abs(cs_nsec - wd_nsec) > md) {
@@ -1445,7 +1466,7 @@ static struct attribute *clocksource_attrs[] = {
};
ATTRIBUTE_GROUPS(clocksource);
-static struct bus_type clocksource_subsys = {
+static const struct bus_type clocksource_subsys = {
.name = "clocksource",
.dev_name = "clocksource",
};
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 760793998cdd..70625dff62ce 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -38,6 +38,7 @@
#include <linux/sched/deadline.h>
#include <linux/sched/nohz.h>
#include <linux/sched/debug.h>
+#include <linux/sched/isolation.h>
#include <linux/timer.h>
#include <linux/freezer.h>
#include <linux/compat.h>
@@ -746,7 +747,7 @@ static void hrtimer_switch_to_hres(void)
base->hres_active = 1;
hrtimer_resolution = HIGH_RES_NSEC;
- tick_setup_sched_timer();
+ tick_setup_sched_timer(true);
/* "Retrigger" the interrupt to get things going */
retrigger_next_event(NULL);
}
@@ -1021,21 +1022,23 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
}
/**
- * hrtimer_forward - forward the timer expiry
+ * hrtimer_forward() - forward the timer expiry
* @timer: hrtimer to forward
* @now: forward past this time
* @interval: the interval to forward
*
* Forward the timer expiry so it will expire in the future.
- * Returns the number of overruns.
*
- * Can be safely called from the callback function of @timer. If
- * called from other contexts @timer must neither be enqueued nor
- * running the callback and the caller needs to take care of
- * serialization.
+ * .. note::
+ * This only updates the timer expiry value and does not requeue the timer.
*
- * Note: This only updates the timer expiry value and does not requeue
- * the timer.
+ * There is also a variant of the function hrtimer_forward_now().
+ *
+ * Context: Can be safely called from the callback function of @timer. If called
+ * from other contexts @timer must neither be enqueued nor running the
+ * callback and the caller needs to take care of serialization.
+ *
+ * Return: The number of overruns are returned.
*/
u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
{
@@ -1085,6 +1088,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
enum hrtimer_mode mode)
{
debug_activate(timer, mode);
+ WARN_ON_ONCE(!base->cpu_base->online);
base->cpu_base->active_bases |= 1 << base->index;
@@ -2183,6 +2187,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
cpu_base->softirq_next_timer = NULL;
cpu_base->expires_next = KTIME_MAX;
cpu_base->softirq_expires_next = KTIME_MAX;
+ cpu_base->online = 1;
hrtimer_cpu_base_init_expiry_lock(cpu_base);
return 0;
}
@@ -2221,10 +2226,8 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
int hrtimers_cpu_dying(unsigned int dying_cpu)
{
+ int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));
struct hrtimer_cpu_base *old_base, *new_base;
- int i, ncpu = cpumask_first(cpu_active_mask);
-
- tick_cancel_sched_timer(dying_cpu);
old_base = this_cpu_ptr(&hrtimer_bases);
new_base = &per_cpu(hrtimer_bases, ncpu);
@@ -2250,6 +2253,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
raw_spin_unlock(&new_base->lock);
+ old_base->online = 0;
raw_spin_unlock(&old_base->lock);
return 0;
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index e9138cd7a0f5..fb0fdec8719a 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -111,15 +111,13 @@ void tick_handle_periodic(struct clock_event_device *dev)
tick_periodic(cpu);
-#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON)
/*
* The cpu might have transitioned to HIGHRES or NOHZ mode via
* update_process_times() -> run_local_timers() ->
* hrtimer_run_queues().
*/
- if (dev->event_handler != tick_handle_periodic)
+ if (IS_ENABLED(CONFIG_TICK_ONESHOT) && dev->event_handler != tick_handle_periodic)
return;
-#endif
if (!clockevent_state_oneshot(dev))
return;
@@ -398,16 +396,31 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
#ifdef CONFIG_HOTPLUG_CPU
+void tick_assert_timekeeping_handover(void)
+{
+ WARN_ON_ONCE(tick_do_timer_cpu == smp_processor_id());
+}
/*
- * Transfer the do_timer job away from a dying cpu.
- *
- * Called with interrupts disabled. No locking required. If
- * tick_do_timer_cpu is owned by this cpu, nothing can change it.
+ * Stop the tick and transfer the timekeeping job away from a dying cpu.
*/
-void tick_handover_do_timer(void)
+int tick_cpu_dying(unsigned int dying_cpu)
{
- if (tick_do_timer_cpu == smp_processor_id())
+ /*
+ * If the current CPU is the timekeeper, it's the only one that
+ * can safely hand over its duty. Also all online CPUs are in
+ * stop machine, guaranteed not to be idle, therefore it's safe
+ * to pick any online successor.
+ */
+ if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
+
+ /* Make sure the CPU won't try to retake the timekeeping duty */
+ tick_sched_timer_dying(dying_cpu);
+
+ /* Remove CPU from timer broadcasting */
+ tick_offline_cpu(dying_cpu);
+
+ return 0;
}
/*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 481b7ab65e2c..5f2105e637bd 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -8,6 +8,11 @@
#include "timekeeping.h"
#include "tick-sched.h"
+struct timer_events {
+ u64 local;
+ u64 global;
+};
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS
# define TICK_DO_TIMER_NONE -1
@@ -137,8 +142,10 @@ static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_
#endif /* !(BROADCAST && ONESHOT) */
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
+extern void tick_offline_cpu(unsigned int cpu);
extern void tick_broadcast_offline(unsigned int cpu);
#else
+static inline void tick_offline_cpu(unsigned int cpu) { }
static inline void tick_broadcast_offline(unsigned int cpu) { }
#endif
@@ -152,8 +159,16 @@ static inline void tick_nohz_init(void) { }
#ifdef CONFIG_NO_HZ_COMMON
extern unsigned long tick_nohz_active;
extern void timers_update_nohz(void);
+extern u64 get_jiffies_update(unsigned long *basej);
# ifdef CONFIG_SMP
extern struct static_key_false timers_migration_enabled;
+extern void fetch_next_timer_interrupt_remote(unsigned long basej, u64 basem,
+ struct timer_events *tevt,
+ unsigned int cpu);
+extern void timer_lock_remote_bases(unsigned int cpu);
+extern void timer_unlock_remote_bases(unsigned int cpu);
+extern bool timer_base_is_idle(void);
+extern void timer_expire_remote(unsigned int cpu);
# endif
#else /* CONFIG_NO_HZ_COMMON */
static inline void timers_update_nohz(void) { }
@@ -163,6 +178,7 @@ static inline void timers_update_nohz(void) { }
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
+u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle);
void timer_clear_idle(void);
#define CLOCK_SET_WALL \
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d2501673028d..269e21590df5 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -43,7 +43,6 @@ struct tick_sched *tick_get_tick_sched(int cpu)
return &per_cpu(tick_cpu_sched, cpu);
}
-#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
/*
* The time when the last jiffy update happened. Write access must hold
* jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a
@@ -181,13 +180,32 @@ static ktime_t tick_init_jiffy_update(void)
return period;
}
+static inline int tick_sched_flag_test(struct tick_sched *ts,
+ unsigned long flag)
+{
+ return !!(ts->flags & flag);
+}
+
+static inline void tick_sched_flag_set(struct tick_sched *ts,
+ unsigned long flag)
+{
+ lockdep_assert_irqs_disabled();
+ ts->flags |= flag;
+}
+
+static inline void tick_sched_flag_clear(struct tick_sched *ts,
+ unsigned long flag)
+{
+ lockdep_assert_irqs_disabled();
+ ts->flags &= ~flag;
+}
+
#define MAX_STALLED_JIFFIES 5
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
int cpu = smp_processor_id();
-#ifdef CONFIG_NO_HZ_COMMON
/*
* Check if the do_timer duty was dropped. We don't care about
* concurrency: This happens only when the CPU in charge went
@@ -198,13 +216,13 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
+ unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
tick_do_timer_cpu = cpu;
}
-#endif
/* Check if jiffies need an update */
if (tick_do_timer_cpu == cpu)
@@ -225,13 +243,12 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
}
}
- if (ts->inidle)
+ if (tick_sched_flag_test(ts, TS_FLAG_INIDLE))
ts->got_idle_tick = 1;
}
static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
{
-#ifdef CONFIG_NO_HZ_COMMON
/*
* When we are idle and the tick is stopped, we have to touch
* the watchdog as we might not schedule for a really long
@@ -240,7 +257,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
* idle" jiffy stamp so the idle accounting adjustment we do
* when we go busy again does not account too many ticks.
*/
- if (ts->tick_stopped) {
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
+ tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
@@ -251,11 +269,52 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
*/
ts->next_tick = 0;
}
-#endif
+
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING);
}
-#endif
+
+/*
+ * We rearm the timer until we get disabled by the idle code.
+ * Called with interrupts disabled.
+ */
+static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)
+{
+ struct tick_sched *ts = container_of(timer, struct tick_sched, sched_timer);
+ struct pt_regs *regs = get_irq_regs();
+ ktime_t now = ktime_get();
+
+ tick_sched_do_timer(ts, now);
+
+ /*
+ * Do not call when we are not in IRQ context and have
+ * no valid 'regs' pointer
+ */
+ if (regs)
+ tick_sched_handle(ts, regs);
+ else
+ ts->next_tick = 0;
+
+ /*
+ * In dynticks mode, tick reprogram is deferred:
+ * - to the idle task if in dynticks-idle
+ * - to IRQ exit if in full-dynticks.
+ */
+ if (unlikely(tick_sched_flag_test(ts, TS_FLAG_STOPPED)))
+ return HRTIMER_NORESTART;
+
+ hrtimer_forward(timer, now, TICK_NSEC);
+
+ return HRTIMER_RESTART;
+}
+
+static void tick_sched_timer_cancel(struct tick_sched *ts)
+{
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
+ hrtimer_cancel(&ts->sched_timer);
+ else if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
+ tick_program_event(KTIME_MAX, 1);
+}
#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
@@ -529,7 +588,7 @@ void __tick_nohz_task_switch(void)
ts = this_cpu_ptr(&tick_cpu_sched);
- if (ts->tick_stopped) {
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
if (atomic_read(&current->tick_dep_mask) ||
atomic_read(&current->signal->tick_dep_mask))
tick_nohz_full_kick();
@@ -601,7 +660,7 @@ void __init tick_nohz_init(void)
pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
cpumask_pr_args(tick_nohz_full_mask));
}
-#endif
+#endif /* #ifdef CONFIG_NO_HZ_FULL */
/*
* NOHZ - aka dynamic tick functionality
@@ -626,14 +685,14 @@ bool tick_nohz_tick_stopped(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- return ts->tick_stopped;
+ return tick_sched_flag_test(ts, TS_FLAG_STOPPED);
}
bool tick_nohz_tick_stopped_cpu(int cpu)
{
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
- return ts->tick_stopped;
+ return tick_sched_flag_test(ts, TS_FLAG_STOPPED);
}
/**
@@ -663,7 +722,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
{
ktime_t delta;
- if (WARN_ON_ONCE(!ts->idle_active))
+ if (WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)))
return;
delta = ktime_sub(now, ts->idle_entrytime);
@@ -675,7 +734,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
ts->idle_entrytime = now;
- ts->idle_active = 0;
+ tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE);
write_seqcount_end(&ts->idle_sleeptime_seq);
sched_clock_idle_wakeup_event();
@@ -685,7 +744,7 @@ static void tick_nohz_start_idle(struct tick_sched *ts)
{
write_seqcount_begin(&ts->idle_sleeptime_seq);
ts->idle_entrytime = ktime_get();
- ts->idle_active = 1;
+ tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE);
write_seqcount_end(&ts->idle_sleeptime_seq);
sched_clock_idle_sleep_event();
@@ -707,7 +766,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
do {
seq = read_seqcount_begin(&ts->idle_sleeptime_seq);
- if (ts->idle_active && compute_delta) {
+ if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE) && compute_delta) {
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
idle = ktime_add(*sleeptime, delta);
@@ -780,7 +839,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
/* Forward the time to expire in the future */
hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) {
hrtimer_start_expires(&ts->sched_timer,
HRTIMER_MODE_ABS_PINNED_HARD);
} else {
@@ -799,18 +858,40 @@ static inline bool local_timer_softirq_pending(void)
return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
}
-static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
+/*
+ * Read jiffies and the time when jiffies were updated last
+ */
+u64 get_jiffies_update(unsigned long *basej)
{
- u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
unsigned int seq;
+ u64 basemono;
- /* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqcount_begin(&jiffies_seq);
basemono = last_jiffies_update;
basejiff = jiffies;
} while (read_seqcount_retry(&jiffies_seq, seq));
+ *basej = basejiff;
+ return basemono;
+}
+
+/**
+ * tick_nohz_next_event() - return the clock monotonic based next event
+ * @ts: pointer to tick_sched struct
+ * @cpu: CPU number
+ *
+ * Return:
+ * *%0 - When the next event is a maximum of TICK_NSEC in the future
+ * and the tick is not stopped yet
+ * *%next_event - Next event based on clock monotonic
+ */
+static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
+{
+ u64 basemono, next_tick, delta, expires;
+ unsigned long basejiff;
+
+ basemono = get_jiffies_update(&basejiff);
ts->last_jiffies = basejiff;
ts->timer_expires_base = basemono;
@@ -850,15 +931,10 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
delta = next_tick - basemono;
if (delta <= (u64)TICK_NSEC) {
/*
- * Tell the timer code that the base is not idle, i.e. undo
- * the effect of get_next_timer_interrupt():
- */
- timer_clear_idle();
- /*
* We've not stopped the tick yet, and there's a timer in the
* next period, so no point in stopping it either, bail.
*/
- if (!ts->tick_stopped) {
+ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
ts->timer_expires = 0;
goto out;
}
@@ -871,7 +947,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
*/
delta = timekeeping_max_deferment();
if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
+ (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
+ !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;
/* Calculate the next expiry time */
@@ -889,13 +966,39 @@ out:
static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
+ unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
- u64 expires = ts->timer_expires;
+ bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+ u64 expires;
/* Make sure we won't be trying to stop it twice in a row. */
ts->timer_expires_base = 0;
/*
+ * Now the tick should be stopped definitely - so the timer base needs
+ * to be marked idle as well to not miss a newly queued timer.
+ */
+ expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle);
+ if (expires > ts->timer_expires) {
+ /*
+ * This path could only happen when the first timer was removed
+ * between calculating the possible sleep length and now (when
+ * high resolution mode is not active, timer could also be a
+ * hrtimer).
+ *
+ * We have to stick to the original calculated expiry value to
+ * not stop the tick for too long with a shallow C-state (which
+ * was programmed by cpuidle because of an early next expiration
+ * value).
+ */
+ expires = ts->timer_expires;
+ }
+
+ /* If the timer base is not idle, retain the not yet stopped tick. */
+ if (!timer_idle)
+ return;
+
+ /*
* If this CPU is the one which updates jiffies, then give up
* the assignment and let it be taken by the CPU which runs
* the tick timer next, which might be this CPU as well. If we
@@ -905,13 +1008,13 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
*/
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- ts->do_timer_last = 1;
+ tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
- ts->do_timer_last = 0;
+ tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}
/* Skip reprogram of event if it's not changed */
- if (ts->tick_stopped && (expires == ts->next_tick)) {
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED) && (expires == ts->next_tick)) {
/* Sanity check: make sure clockevent is actually programmed */
if (expires == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
return;
@@ -929,12 +1032,12 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* call we save the current tick time, so we can restart the
* scheduler tick in tick_nohz_restart_sched_tick().
*/
- if (!ts->tick_stopped) {
+ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
calc_load_nohz_start();
quiet_vmstat();
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
- ts->tick_stopped = 1;
+ tick_sched_flag_set(ts, TS_FLAG_STOPPED);
trace_tick_stop(1, TICK_DEP_MASK_NONE);
}
@@ -945,14 +1048,11 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* the tick timer.
*/
if (unlikely(expires == KTIME_MAX)) {
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
- hrtimer_cancel(&ts->sched_timer);
- else
- tick_program_event(KTIME_MAX, 1);
+ tick_sched_timer_cancel(ts);
return;
}
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) {
hrtimer_start(&ts->sched_timer, expires,
HRTIMER_MODE_ABS_PINNED_HARD);
} else {
@@ -967,7 +1067,7 @@ static void tick_nohz_retain_tick(struct tick_sched *ts)
}
#ifdef CONFIG_NO_HZ_FULL
-static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
+static void tick_nohz_full_stop_tick(struct tick_sched *ts, int cpu)
{
if (tick_nohz_next_event(ts, cpu))
tick_nohz_stop_tick(ts, cpu);
@@ -991,7 +1091,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
touch_softlockup_watchdog_sched();
/* Cancel the scheduled timer and restore the tick: */
- ts->tick_stopped = 0;
+ tick_sched_flag_clear(ts, TS_FLAG_STOPPED);
tick_nohz_restart(ts, now);
}
@@ -1002,8 +1102,8 @@ static void __tick_nohz_full_update_tick(struct tick_sched *ts,
int cpu = smp_processor_id();
if (can_stop_full_tick(cpu, ts))
- tick_nohz_stop_sched_tick(ts, cpu);
- else if (ts->tick_stopped)
+ tick_nohz_full_stop_tick(ts, cpu);
+ else if (tick_sched_flag_test(ts, TS_FLAG_STOPPED))
tick_nohz_restart_sched_tick(ts, now);
#endif
}
@@ -1013,7 +1113,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!tick_nohz_full_cpu(smp_processor_id()))
return;
- if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
+ if (!tick_sched_flag_test(ts, TS_FLAG_NOHZ))
return;
__tick_nohz_full_update_tick(ts, ktime_get());
@@ -1060,25 +1160,9 @@ static bool report_idle_softirq(void)
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
- /*
- * If this CPU is offline and it is the one which updates
- * jiffies, then give up the assignment and let it be taken by
- * the CPU which runs the tick timer next. If we don't drop
- * this here, the jiffies might be stale and do_timer() never
- * gets invoked.
- */
- if (unlikely(!cpu_online(cpu))) {
- if (cpu == tick_do_timer_cpu)
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- /*
- * Make sure the CPU doesn't get fooled by obsolete tick
- * deadline if it comes back online later.
- */
- ts->next_tick = 0;
- return false;
- }
+ WARN_ON_ONCE(cpu_is_offline(cpu));
- if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
+ if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ)))
return false;
if (need_resched())
@@ -1128,14 +1212,14 @@ void tick_nohz_idle_stop_tick(void)
ts->idle_calls++;
if (expires > 0LL) {
- int was_stopped = ts->tick_stopped;
+ int was_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
tick_nohz_stop_tick(ts, cpu);
ts->idle_sleeps++;
ts->idle_expires = expires;
- if (!was_stopped && ts->tick_stopped) {
+ if (!was_stopped && tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
ts->idle_jiffies = ts->last_jiffies;
nohz_balance_enter_idle(cpu);
}
@@ -1147,11 +1231,6 @@ void tick_nohz_idle_stop_tick(void)
void tick_nohz_idle_retain_tick(void)
{
tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
- /*
- * Undo the effect of get_next_timer_interrupt() called from
- * tick_nohz_next_event().
- */
- timer_clear_idle();
}
/**
@@ -1171,7 +1250,7 @@ void tick_nohz_idle_enter(void)
WARN_ON_ONCE(ts->timer_expires_base);
- ts->inidle = 1;
+ tick_sched_flag_set(ts, TS_FLAG_INIDLE);
tick_nohz_start_idle(ts);
local_irq_enable();
@@ -1200,7 +1279,7 @@ void tick_nohz_irq_exit(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- if (ts->inidle)
+ if (tick_sched_flag_test(ts, TS_FLAG_INIDLE))
tick_nohz_start_idle(ts);
else
tick_nohz_full_update_tick(ts);
@@ -1254,7 +1333,7 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
ktime_t now = ts->idle_entrytime;
ktime_t next_event;
- WARN_ON_ONCE(!ts->inidle);
+ WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE));
*delta_next = ktime_sub(dev->next_event, now);
@@ -1326,7 +1405,7 @@ void tick_nohz_idle_restart_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- if (ts->tick_stopped) {
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
ktime_t now = ktime_get();
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_time(ts, now);
@@ -1367,12 +1446,12 @@ void tick_nohz_idle_exit(void)
local_irq_disable();
- WARN_ON_ONCE(!ts->inidle);
+ WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE));
WARN_ON_ONCE(ts->timer_expires_base);
- ts->inidle = 0;
- idle_active = ts->idle_active;
- tick_stopped = ts->tick_stopped;
+ tick_sched_flag_clear(ts, TS_FLAG_INIDLE);
+ idle_active = tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE);
+ tick_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
if (idle_active || tick_stopped)
now = ktime_get();
@@ -1391,38 +1470,22 @@ void tick_nohz_idle_exit(void)
* at the clockevent level. hrtimer can't be used instead, because its
* infrastructure actually relies on the tick itself as a backend in
* low-resolution mode (see hrtimer_run_queues()).
- *
- * This low-resolution handler still makes use of some hrtimer APIs meanwhile
- * for convenience with expiration calculation and forwarding.
*/
static void tick_nohz_lowres_handler(struct clock_event_device *dev)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- struct pt_regs *regs = get_irq_regs();
- ktime_t now = ktime_get();
dev->next_event = KTIME_MAX;
- tick_sched_do_timer(ts, now);
- tick_sched_handle(ts, regs);
-
- /*
- * In dynticks mode, tick reprogram is deferred:
- * - to the idle task if in dynticks-idle
- * - to IRQ exit if in full-dynticks.
- */
- if (likely(!ts->tick_stopped)) {
- hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
+ if (likely(tick_nohz_handler(&ts->sched_timer) == HRTIMER_RESTART))
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
- }
-
}
-static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
+static inline void tick_nohz_activate(struct tick_sched *ts)
{
if (!tick_nohz_enabled)
return;
- ts->nohz_mode = mode;
+ tick_sched_flag_set(ts, TS_FLAG_NOHZ);
/* One update is enough */
if (!test_and_set_bit(0, &tick_nohz_active))
timers_update_nohz();
@@ -1433,9 +1496,6 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
*/
static void tick_nohz_switch_to_nohz(void)
{
- struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- ktime_t next;
-
if (!tick_nohz_enabled)
return;
@@ -1444,16 +1504,9 @@ static void tick_nohz_switch_to_nohz(void)
/*
* Recycle the hrtimer in 'ts', so we can share the
- * hrtimer_forward_now() function with the highres code.
+ * highres code.
*/
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- /* Get the next period */
- next = tick_init_jiffy_update();
-
- hrtimer_set_expires(&ts->sched_timer, next);
- hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
- tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
- tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
+ tick_setup_sched_timer(false);
}
static inline void tick_nohz_irq_enter(void)
@@ -1461,10 +1514,10 @@ static inline void tick_nohz_irq_enter(void)
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
ktime_t now;
- if (!ts->idle_active && !ts->tick_stopped)
+ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED | TS_FLAG_IDLE_ACTIVE))
return;
now = ktime_get();
- if (ts->idle_active)
+ if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE))
tick_nohz_stop_idle(ts, now);
/*
* If all CPUs are idle we may need to update a stale jiffies value.
@@ -1473,7 +1526,7 @@ static inline void tick_nohz_irq_enter(void)
* rare case (typically stop machine). So we must make sure we have a
* last resort.
*/
- if (ts->tick_stopped)
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED))
tick_nohz_update_jiffies(now);
}
@@ -1481,7 +1534,7 @@ static inline void tick_nohz_irq_enter(void)
static inline void tick_nohz_switch_to_nohz(void) { }
static inline void tick_nohz_irq_enter(void) { }
-static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
+static inline void tick_nohz_activate(struct tick_sched *ts) { }
#endif /* CONFIG_NO_HZ_COMMON */
@@ -1494,45 +1547,6 @@ void tick_irq_enter(void)
tick_nohz_irq_enter();
}
-/*
- * High resolution timer specific code
- */
-#ifdef CONFIG_HIGH_RES_TIMERS
-/*
- * We rearm the timer until we get disabled by the idle code.
- * Called with interrupts disabled.
- */
-static enum hrtimer_restart tick_nohz_highres_handler(struct hrtimer *timer)
-{
- struct tick_sched *ts =
- container_of(timer, struct tick_sched, sched_timer);
- struct pt_regs *regs = get_irq_regs();
- ktime_t now = ktime_get();
-
- tick_sched_do_timer(ts, now);
-
- /*
- * Do not call when we are not in IRQ context and have
- * no valid 'regs' pointer
- */
- if (regs)
- tick_sched_handle(ts, regs);
- else
- ts->next_tick = 0;
-
- /*
- * In dynticks mode, tick reprogram is deferred:
- * - to the idle task if in dynticks-idle
- * - to IRQ exit if in full-dynticks.
- */
- if (unlikely(ts->tick_stopped))
- return HRTIMER_NORESTART;
-
- hrtimer_forward(timer, now, TICK_NSEC);
-
- return HRTIMER_RESTART;
-}
-
static int sched_skew_tick;
static int __init skew_tick(char *str)
@@ -1545,15 +1559,19 @@ early_param("skew_tick", skew_tick);
/**
* tick_setup_sched_timer - setup the tick emulation timer
+ * @mode: tick_nohz_mode to setup for
*/
-void tick_setup_sched_timer(void)
+void tick_setup_sched_timer(bool hrtimer)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- ktime_t now = ktime_get();
/* Emulate tick processing via per-CPU hrtimers: */
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- ts->sched_timer.function = tick_nohz_highres_handler;
+
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer) {
+ tick_sched_flag_set(ts, TS_FLAG_HIGHRES);
+ ts->sched_timer.function = tick_nohz_handler;
+ }
/* Get the next period (per-CPU) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
@@ -1566,30 +1584,46 @@ void tick_setup_sched_timer(void)
hrtimer_add_expires_ns(&ts->sched_timer, offset);
}
- hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
- tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
+ hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer)
+ hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
+ else
+ tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+ tick_nohz_activate(ts);
}
-#endif /* HIGH_RES_TIMERS */
-#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
-void tick_cancel_sched_timer(int cpu)
+/*
+ * Shut down the tick and make sure the CPU won't try to retake the timekeeping
+ * duty before disabling IRQs in idle for the last time.
+ */
+void tick_sched_timer_dying(int cpu)
{
+ struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ struct clock_event_device *dev = td->evtdev;
ktime_t idle_sleeptime, iowait_sleeptime;
+ unsigned long idle_calls, idle_sleeps;
-# ifdef CONFIG_HIGH_RES_TIMERS
- if (ts->sched_timer.base)
- hrtimer_cancel(&ts->sched_timer);
-# endif
+ /* This must happen before hrtimers are migrated! */
+ tick_sched_timer_cancel(ts);
+
+ /*
+ * If the clockevents doesn't support CLOCK_EVT_STATE_ONESHOT_STOPPED,
+ * make sure not to call low-res tick handler.
+ */
+ if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
+ dev->event_handler = clockevents_handle_noop;
idle_sleeptime = ts->idle_sleeptime;
iowait_sleeptime = ts->iowait_sleeptime;
+ idle_calls = ts->idle_calls;
+ idle_sleeps = ts->idle_sleeps;
memset(ts, 0, sizeof(*ts));
ts->idle_sleeptime = idle_sleeptime;
ts->iowait_sleeptime = iowait_sleeptime;
+ ts->idle_calls = idle_calls;
+ ts->idle_sleeps = idle_sleeps;
}
-#endif
/*
* Async notification about clocksource changes
@@ -1627,7 +1661,7 @@ int tick_check_oneshot_change(int allow_nohz)
if (!test_and_clear_bit(0, &ts->check_clocks))
return 0;
- if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
+ if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
return 0;
if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 5ed5a9d41d5a..e11c4dc65bcb 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -14,20 +14,26 @@ struct tick_device {
enum tick_device_mode mode;
};
-enum tick_nohz_mode {
- NOHZ_MODE_INACTIVE,
- NOHZ_MODE_LOWRES,
- NOHZ_MODE_HIGHRES,
-};
+/* The CPU is in the tick idle mode */
+#define TS_FLAG_INIDLE BIT(0)
+/* The idle tick has been stopped */
+#define TS_FLAG_STOPPED BIT(1)
+/*
+ * Indicator that the CPU is actively in the tick idle mode;
+ * it is reset during irq handling phases.
+ */
+#define TS_FLAG_IDLE_ACTIVE BIT(2)
+/* CPU was the last one doing do_timer before going idle */
+#define TS_FLAG_DO_TIMER_LAST BIT(3)
+/* NO_HZ is enabled */
+#define TS_FLAG_NOHZ BIT(4)
+/* High resolution tick mode */
+#define TS_FLAG_HIGHRES BIT(5)
/**
* struct tick_sched - sched tick emulation and no idle tick control/stats
*
- * @inidle: Indicator that the CPU is in the tick idle mode
- * @tick_stopped: Indicator that the idle tick has been stopped
- * @idle_active: Indicator that the CPU is actively in the tick idle mode;
- * it is reset during irq handling phases.
- * @do_timer_last: CPU was the last one doing do_timer before going idle
+ * @flags: State flags gathering the TS_FLAG_* features
* @got_idle_tick: Tick timer function has run with @inidle set
* @stalled_jiffies: Number of stalled jiffies detected across ticks
* @last_tick_jiffies: Value of jiffies seen on last tick
@@ -57,11 +63,7 @@ enum tick_nohz_mode {
*/
struct tick_sched {
/* Common flags */
- unsigned int inidle : 1;
- unsigned int tick_stopped : 1;
- unsigned int idle_active : 1;
- unsigned int do_timer_last : 1;
- unsigned int got_idle_tick : 1;
+ unsigned long flags;
/* Tick handling: jiffies stall check */
unsigned int stalled_jiffies;
@@ -73,13 +75,13 @@ struct tick_sched {
ktime_t next_tick;
unsigned long idle_jiffies;
ktime_t idle_waketime;
+ unsigned int got_idle_tick;
/* Idle entry */
seqcount_t idle_sleeptime_seq;
ktime_t idle_entrytime;
/* Tick stop */
- enum tick_nohz_mode nohz_mode;
unsigned long last_jiffies;
u64 timer_expires_base;
u64 timer_expires;
@@ -102,11 +104,11 @@ struct tick_sched {
extern struct tick_sched *tick_get_tick_sched(int cpu);
-extern void tick_setup_sched_timer(void);
-#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
-extern void tick_cancel_sched_timer(int cpu);
+extern void tick_setup_sched_timer(bool hrtimer);
+#if defined CONFIG_TICK_ONESHOT
+extern void tick_sched_timer_dying(int cpu);
#else
-static inline void tick_cancel_sched_timer(int cpu) { }
+static inline void tick_sched_timer_dying(int cpu) { }
#endif
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
diff --git a/kernel/time/time_test.c b/kernel/time/time_test.c
index ca058c8af6ba..3e5d422dd15c 100644
--- a/kernel/time/time_test.c
+++ b/kernel/time/time_test.c
@@ -73,7 +73,7 @@ static void time64_to_tm_test_date_range(struct kunit *test)
days = div_s64(secs, 86400);
- #define FAIL_MSG "%05ld/%02d/%02d (%2d) : %ld", \
+ #define FAIL_MSG "%05ld/%02d/%02d (%2d) : %lld", \
year, month, mdday, yday, days
KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 266d02809dbb..b58dffc58a8f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1180,13 +1180,15 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history,
}
/*
- * cycle_between - true if test occurs chronologically between before and after
+ * timestamp_in_interval - true if ts is chronologically in [start, end]
+ *
+ * True if ts occurs chronologically at or after start, and before or at end.
*/
-static bool cycle_between(u64 before, u64 test, u64 after)
+static bool timestamp_in_interval(u64 start, u64 end, u64 ts)
{
- if (test > before && test < after)
+ if (ts >= start && ts <= end)
return true;
- if (test < before && before > after)
+ if (start > end && (ts >= start || ts <= end))
return true;
return false;
}
@@ -1232,11 +1234,12 @@ int get_device_system_crosststamp(int (*get_time_fn)
return ret;
/*
- * Verify that the clocksource associated with the captured
- * system counter value is the same as the currently installed
- * timekeeper clocksource
+ * Verify that the clocksource ID associated with the captured
+ * system counter value is the same as for the currently
+ * installed timekeeper clocksource
*/
- if (tk->tkr_mono.clock != system_counterval.cs)
+ if (system_counterval.cs_id == CSID_GENERIC ||
+ tk->tkr_mono.clock->id != system_counterval.cs_id)
return -ENODEV;
cycles = system_counterval.cycles;
@@ -1246,7 +1249,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
*/
now = tk_clock_read(&tk->tkr_mono);
interval_start = tk->tkr_mono.cycle_last;
- if (!cycle_between(interval_start, cycles, now)) {
+ if (!timestamp_in_interval(interval_start, now, cycles)) {
clock_was_set_seq = tk->clock_was_set_seq;
cs_was_changed_seq = tk->cs_was_changed_seq;
cycles = interval_start;
@@ -1259,10 +1262,8 @@ int get_device_system_crosststamp(int (*get_time_fn)
tk_core.timekeeper.offs_real);
base_raw = tk->tkr_raw.base;
- nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono,
- system_counterval.cycles);
- nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw,
- system_counterval.cycles);
+ nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles);
+ nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles);
} while (read_seqcount_retry(&tk_core.seq, seq));
xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
@@ -1277,13 +1278,13 @@ int get_device_system_crosststamp(int (*get_time_fn)
bool discontinuity;
/*
- * Check that the counter value occurs after the provided
+ * Check that the counter value is not before the provided
* history reference and that the history doesn't cross a
* clocksource change
*/
if (!history_begin ||
- !cycle_between(history_begin->cycles,
- system_counterval.cycles, cycles) ||
+ !timestamp_in_interval(history_begin->cycles,
+ cycles, system_counterval.cycles) ||
history_begin->cs_was_changed_seq != cs_was_changed_seq)
return -EINVAL;
partial_history_cycles = cycles - system_counterval.cycles;
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 352b161113cd..e69e75d3858c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -53,6 +53,7 @@
#include <asm/io.h>
#include "tick-internal.h"
+#include "timer_migration.h"
#define CREATE_TRACE_POINTS
#include <trace/events/timer.h>
@@ -187,15 +188,66 @@ EXPORT_SYMBOL(jiffies_64);
#define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH)
#ifdef CONFIG_NO_HZ_COMMON
-# define NR_BASES 2
-# define BASE_STD 0
-# define BASE_DEF 1
+/*
+ * If multiple bases need to be locked, use the base ordering for lock
+ * nesting, i.e. lowest number first.
+ */
+# define NR_BASES 3
+# define BASE_LOCAL 0
+# define BASE_GLOBAL 1
+# define BASE_DEF 2
#else
# define NR_BASES 1
-# define BASE_STD 0
+# define BASE_LOCAL 0
+# define BASE_GLOBAL 0
# define BASE_DEF 0
#endif
+/**
+ * struct timer_base - Per CPU timer base (number of base depends on config)
+ * @lock: Lock protecting the timer_base
+ * @running_timer: When expiring timers, the lock is dropped. To make
+ * sure not to race agains deleting/modifying a
+ * currently running timer, the pointer is set to the
+ * timer, which expires at the moment. If no timer is
+ * running, the pointer is NULL.
+ * @expiry_lock: PREEMPT_RT only: Lock is taken in softirq around
+ * timer expiry callback execution and when trying to
+ * delete a running timer and it wasn't successful in
+ * the first glance. It prevents priority inversion
+ * when callback was preempted on a remote CPU and a
+ * caller tries to delete the running timer. It also
+ * prevents a life lock, when the task which tries to
+ * delete a timer preempted the softirq thread which
+ * is running the timer callback function.
+ * @timer_waiters: PREEMPT_RT only: Tells, if there is a waiter
+ * waiting for the end of the timer callback function
+ * execution.
+ * @clk: clock of the timer base; is updated before enqueue
+ * of a timer; during expiry, it is 1 offset ahead of
+ * jiffies to avoid endless requeuing to current
+ * jiffies
+ * @next_expiry: expiry value of the first timer; it is updated when
+ * finding the next timer and during enqueue; the
+ * value is not valid, when next_expiry_recalc is set
+ * @cpu: Number of CPU the timer base belongs to
+ * @next_expiry_recalc: States, whether a recalculation of next_expiry is
+ * required. Value is set true, when a timer was
+ * deleted.
+ * @is_idle: Is set, when timer_base is idle. It is triggered by NOHZ
+ * code. This state is only used in standard
+ * base. Deferrable timers, which are enqueued remotely
+ * never wake up an idle CPU. So no matter of supporting it
+ * for this base.
+ * @timers_pending: Is set, when a timer is pending in the base. It is only
+ * reliable when next_expiry_recalc is not set.
+ * @pending_map: bitmap of the timer wheel; each bit reflects a
+ * bucket of the wheel. When a bit is set, at least a
+ * single timer is enqueued in the related bucket.
+ * @vectors: Array of lists; Each array member reflects a bucket
+ * of the timer wheel. The list contains all timers
+ * which are enqueued into a specific bucket.
+ */
struct timer_base {
raw_spinlock_t lock;
struct timer_list *running_timer;
@@ -583,11 +635,16 @@ trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
/*
* We might have to IPI the remote CPU if the base is idle and the
- * timer is not deferrable. If the other CPU is on the way to idle
- * then it can't set base->is_idle as we hold the base lock:
+ * timer is pinned. If it is a non pinned timer, it is only queued
+ * on the remote CPU, when timer was running during queueing. Then
+ * everything is handled by remote CPU anyway. If the other CPU is
+ * on the way to idle then it can't set base->is_idle as we hold
+ * the base lock:
*/
- if (base->is_idle)
+ if (base->is_idle) {
+ WARN_ON_ONCE(!(timer->flags & TIMER_PINNED));
wake_up_nohz_cpu(base->cpu);
+ }
}
/*
@@ -899,7 +956,10 @@ static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
{
- struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
+ int index = tflags & TIMER_PINNED ? BASE_LOCAL : BASE_GLOBAL;
+ struct timer_base *base;
+
+ base = per_cpu_ptr(&timer_bases[index], cpu);
/*
* If the timer is deferrable and NO_HZ_COMMON is set then we need
@@ -912,7 +972,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
{
- struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+ int index = tflags & TIMER_PINNED ? BASE_LOCAL : BASE_GLOBAL;
+ struct timer_base *base;
+
+ base = this_cpu_ptr(&timer_bases[index]);
/*
* If the timer is deferrable and NO_HZ_COMMON is set then we need
@@ -928,17 +991,6 @@ static inline struct timer_base *get_timer_base(u32 tflags)
return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
}
-static inline struct timer_base *
-get_target_base(struct timer_base *base, unsigned tflags)
-{
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
- if (static_branch_likely(&timers_migration_enabled) &&
- !(tflags & TIMER_PINNED))
- return get_timer_cpu_base(tflags, get_nohz_timer_target());
-#endif
- return get_timer_this_cpu_base(tflags);
-}
-
static inline void __forward_timer_base(struct timer_base *base,
unsigned long basej)
{
@@ -1093,7 +1145,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
if (!ret && (options & MOD_TIMER_PENDING_ONLY))
goto out_unlock;
- new_base = get_target_base(base, timer->flags);
+ new_base = get_timer_this_cpu_base(timer->flags);
if (base != new_base) {
/*
@@ -1246,11 +1298,48 @@ void add_timer(struct timer_list *timer)
EXPORT_SYMBOL(add_timer);
/**
+ * add_timer_local() - Start a timer on the local CPU
+ * @timer: The timer to be started
+ *
+ * Same as add_timer() except that the timer flag TIMER_PINNED is set.
+ *
+ * See add_timer() for further details.
+ */
+void add_timer_local(struct timer_list *timer)
+{
+ if (WARN_ON_ONCE(timer_pending(timer)))
+ return;
+ timer->flags |= TIMER_PINNED;
+ __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
+}
+EXPORT_SYMBOL(add_timer_local);
+
+/**
+ * add_timer_global() - Start a timer without TIMER_PINNED flag set
+ * @timer: The timer to be started
+ *
+ * Same as add_timer() except that the timer flag TIMER_PINNED is unset.
+ *
+ * See add_timer() for further details.
+ */
+void add_timer_global(struct timer_list *timer)
+{
+ if (WARN_ON_ONCE(timer_pending(timer)))
+ return;
+ timer->flags &= ~TIMER_PINNED;
+ __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
+}
+EXPORT_SYMBOL(add_timer_global);
+
+/**
* add_timer_on - Start a timer on a particular CPU
* @timer: The timer to be started
* @cpu: The CPU to start it on
*
- * Same as add_timer() except that it starts the timer on the given CPU.
+ * Same as add_timer() except that it starts the timer on the given CPU and
+ * the TIMER_PINNED flag is set. When timer shouldn't be a pinned timer in
+ * the next round, add_timer_global() should be used instead as it unsets
+ * the TIMER_PINNED flag.
*
* See add_timer() for further details.
*/
@@ -1264,6 +1353,9 @@ void add_timer_on(struct timer_list *timer, int cpu)
if (WARN_ON_ONCE(timer_pending(timer)))
return;
+ /* Make sure timer flags have TIMER_PINNED flag set */
+ timer->flags |= TIMER_PINNED;
+
new_base = get_timer_cpu_base(timer->flags, cpu);
/*
@@ -1911,71 +2003,350 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
}
+static unsigned long next_timer_interrupt(struct timer_base *base,
+ unsigned long basej)
+{
+ if (base->next_expiry_recalc)
+ next_expiry_recalc(base);
+
+ /*
+ * Move next_expiry for the empty base into the future to prevent an
+ * unnecessary raise of the timer softirq when the next_expiry value
+ * will be reached even if there is no timer pending.
+ *
+ * This update is also required to make timer_base::next_expiry values
+ * easy comparable to find out which base holds the first pending timer.
+ */
+ if (!base->timers_pending)
+ base->next_expiry = basej + NEXT_TIMER_MAX_DELTA;
+
+ return base->next_expiry;
+}
+
+static unsigned long fetch_next_timer_interrupt(unsigned long basej, u64 basem,
+ struct timer_base *base_local,
+ struct timer_base *base_global,
+ struct timer_events *tevt)
+{
+ unsigned long nextevt, nextevt_local, nextevt_global;
+ bool local_first;
+
+ nextevt_local = next_timer_interrupt(base_local, basej);
+ nextevt_global = next_timer_interrupt(base_global, basej);
+
+ local_first = time_before_eq(nextevt_local, nextevt_global);
+
+ nextevt = local_first ? nextevt_local : nextevt_global;
+
+ /*
+ * If the @nextevt is at max. one tick away, use @nextevt and store
+ * it in the local expiry value. The next global event is irrelevant in
+ * this case and can be left as KTIME_MAX.
+ */
+ if (time_before_eq(nextevt, basej + 1)) {
+ /* If we missed a tick already, force 0 delta */
+ if (time_before(nextevt, basej))
+ nextevt = basej;
+ tevt->local = basem + (u64)(nextevt - basej) * TICK_NSEC;
+
+ /*
+ * This is required for the remote check only but it doesn't
+ * hurt, when it is done for both call sites:
+ *
+ * * The remote callers will only take care of the global timers
+ * as local timers will be handled by CPU itself. When not
+ * updating tevt->global with the already missed first global
+ * timer, it is possible that it will be missed completely.
+ *
+ * * The local callers will ignore the tevt->global anyway, when
+ * nextevt is max. one tick away.
+ */
+ if (!local_first)
+ tevt->global = tevt->local;
+ return nextevt;
+ }
+
+ /*
+ * Update tevt.* values:
+ *
+ * If the local queue expires first, then the global event can be
+ * ignored. If the global queue is empty, nothing to do either.
+ */
+ if (!local_first && base_global->timers_pending)
+ tevt->global = basem + (u64)(nextevt_global - basej) * TICK_NSEC;
+
+ if (base_local->timers_pending)
+ tevt->local = basem + (u64)(nextevt_local - basej) * TICK_NSEC;
+
+ return nextevt;
+}
+
+# ifdef CONFIG_SMP
/**
- * get_next_timer_interrupt - return the time (clock mono) of the next timer
+ * fetch_next_timer_interrupt_remote() - Store next timers into @tevt
* @basej: base time jiffies
* @basem: base time clock monotonic
+ * @tevt: Pointer to the storage for the expiry values
+ * @cpu: Remote CPU
+ *
+ * Stores the next pending local and global timer expiry values in the
+ * struct pointed to by @tevt. If a queue is empty the corresponding
+ * field is set to KTIME_MAX. If local event expires before global
+ * event, global event is set to KTIME_MAX as well.
*
- * Returns the tick aligned clock monotonic time of the next pending
- * timer or KTIME_MAX if no timer is pending.
+ * Caller needs to make sure timer base locks are held (use
+ * timer_lock_remote_bases() for this purpose).
*/
-u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
+void fetch_next_timer_interrupt_remote(unsigned long basej, u64 basem,
+ struct timer_events *tevt,
+ unsigned int cpu)
+{
+ struct timer_base *base_local, *base_global;
+
+ /* Preset local / global events */
+ tevt->local = tevt->global = KTIME_MAX;
+
+ base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
+ base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);
+
+ lockdep_assert_held(&base_local->lock);
+ lockdep_assert_held(&base_global->lock);
+
+ fetch_next_timer_interrupt(basej, basem, base_local, base_global, tevt);
+}
+
+/**
+ * timer_unlock_remote_bases - unlock timer bases of cpu
+ * @cpu: Remote CPU
+ *
+ * Unlocks the remote timer bases.
+ */
+void timer_unlock_remote_bases(unsigned int cpu)
+ __releases(timer_bases[BASE_LOCAL]->lock)
+ __releases(timer_bases[BASE_GLOBAL]->lock)
+{
+ struct timer_base *base_local, *base_global;
+
+ base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
+ base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);
+
+ raw_spin_unlock(&base_global->lock);
+ raw_spin_unlock(&base_local->lock);
+}
+
+/**
+ * timer_lock_remote_bases - lock timer bases of cpu
+ * @cpu: Remote CPU
+ *
+ * Locks the remote timer bases.
+ */
+void timer_lock_remote_bases(unsigned int cpu)
+ __acquires(timer_bases[BASE_LOCAL]->lock)
+ __acquires(timer_bases[BASE_GLOBAL]->lock)
+{
+ struct timer_base *base_local, *base_global;
+
+ base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
+ base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);
+
+ lockdep_assert_irqs_disabled();
+
+ raw_spin_lock(&base_local->lock);
+ raw_spin_lock_nested(&base_global->lock, SINGLE_DEPTH_NESTING);
+}
+
+/**
+ * timer_base_is_idle() - Return whether timer base is set idle
+ *
+ * Returns value of local timer base is_idle value.
+ */
+bool timer_base_is_idle(void)
+{
+ return __this_cpu_read(timer_bases[BASE_LOCAL].is_idle);
+}
+
+static void __run_timer_base(struct timer_base *base);
+
+/**
+ * timer_expire_remote() - expire global timers of cpu
+ * @cpu: Remote CPU
+ *
+ * Expire timers of global base of remote CPU.
+ */
+void timer_expire_remote(unsigned int cpu)
+{
+ struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);
+
+ __run_timer_base(base);
+}
+
+static void timer_use_tmigr(unsigned long basej, u64 basem,
+ unsigned long *nextevt, bool *tick_stop_path,
+ bool timer_base_idle, struct timer_events *tevt)
{
- struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
- unsigned long nextevt = basej + NEXT_TIMER_MAX_DELTA;
- u64 expires = KTIME_MAX;
- bool was_idle;
+ u64 next_tmigr;
+
+ if (timer_base_idle)
+ next_tmigr = tmigr_cpu_new_timer(tevt->global);
+ else if (tick_stop_path)
+ next_tmigr = tmigr_cpu_deactivate(tevt->global);
+ else
+ next_tmigr = tmigr_quick_check(tevt->global);
/*
- * Pretend that there is no timer pending if the cpu is offline.
- * Possible pending timers will be migrated later to an active cpu.
+ * If the CPU is the last going idle in timer migration hierarchy, make
+ * sure the CPU will wake up in time to handle remote timers.
+ * next_tmigr == KTIME_MAX if other CPUs are still active.
*/
- if (cpu_is_offline(smp_processor_id()))
- return expires;
+ if (next_tmigr < tevt->local) {
+ u64 tmp;
- raw_spin_lock(&base->lock);
- if (base->next_expiry_recalc)
- next_expiry_recalc(base);
+ /* If we missed a tick already, force 0 delta */
+ if (next_tmigr < basem)
+ next_tmigr = basem;
+
+ tmp = div_u64(next_tmigr - basem, TICK_NSEC);
+
+ *nextevt = basej + (unsigned long)tmp;
+ tevt->local = next_tmigr;
+ }
+}
+# else
+static void timer_use_tmigr(unsigned long basej, u64 basem,
+ unsigned long *nextevt, bool *tick_stop_path,
+ bool timer_base_idle, struct timer_events *tevt)
+{
+ /*
+ * Make sure first event is written into tevt->local to not miss a
+ * timer on !SMP systems.
+ */
+ tevt->local = min_t(u64, tevt->local, tevt->global);
+}
+# endif /* CONFIG_SMP */
+
+static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
+ bool *idle)
+{
+ struct timer_events tevt = { .local = KTIME_MAX, .global = KTIME_MAX };
+ struct timer_base *base_local, *base_global;
+ unsigned long nextevt;
+ bool idle_is_possible;
+
+ /*
+ * When the CPU is offline, the tick is cancelled and nothing is supposed
+ * to try to stop it.
+ */
+ if (WARN_ON_ONCE(cpu_is_offline(smp_processor_id()))) {
+ if (idle)
+ *idle = true;
+ return tevt.local;
+ }
+
+ base_local = this_cpu_ptr(&timer_bases[BASE_LOCAL]);
+ base_global = this_cpu_ptr(&timer_bases[BASE_GLOBAL]);
+
+ raw_spin_lock(&base_local->lock);
+ raw_spin_lock_nested(&base_global->lock, SINGLE_DEPTH_NESTING);
+
+ nextevt = fetch_next_timer_interrupt(basej, basem, base_local,
+ base_global, &tevt);
+
+ /*
+ * If the next event is only one jiffie ahead there is no need to call
+ * timer migration hierarchy related functions. The value for the next
+ * global timer in @tevt struct equals then KTIME_MAX. This is also
+ * true, when the timer base is idle.
+ *
+ * The proper timer migration hierarchy function depends on the callsite
+ * and whether timer base is idle or not. @nextevt will be updated when
+ * this CPU needs to handle the first timer migration hierarchy
+ * event. See timer_use_tmigr() for detailed information.
+ */
+ idle_is_possible = time_after(nextevt, basej + 1);
+ if (idle_is_possible)
+ timer_use_tmigr(basej, basem, &nextevt, idle,
+ base_local->is_idle, &tevt);
/*
* We have a fresh next event. Check whether we can forward the
* base.
*/
- __forward_timer_base(base, basej);
+ __forward_timer_base(base_local, basej);
+ __forward_timer_base(base_global, basej);
- if (base->timers_pending) {
- nextevt = base->next_expiry;
+ /*
+ * Set base->is_idle only when caller is timer_base_try_to_set_idle()
+ */
+ if (idle) {
+ /*
+ * Bases are idle if the next event is more than a tick
+ * away. Caution: @nextevt could have changed by enqueueing a
+ * global timer into timer migration hierarchy. Therefore a new
+ * check is required here.
+ *
+ * If the base is marked idle then any timer add operation must
+ * forward the base clk itself to keep granularity small. This
+ * idle logic is only maintained for the BASE_LOCAL and
+ * BASE_GLOBAL base, deferrable timers may still see large
+ * granularity skew (by design).
+ */
+ if (!base_local->is_idle && time_after(nextevt, basej + 1)) {
+ base_local->is_idle = true;
+ trace_timer_base_idle(true, base_local->cpu);
+ }
+ *idle = base_local->is_idle;
- /* If we missed a tick already, force 0 delta */
- if (time_before(nextevt, basej))
- nextevt = basej;
- expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
- } else {
/*
- * Move next_expiry for the empty base into the future to
- * prevent a unnecessary raise of the timer softirq when the
- * next_expiry value will be reached even if there is no timer
- * pending.
+ * When timer base is not set idle, undo the effect of
+ * tmigr_cpu_deactivate() to prevent inconsitent states - active
+ * timer base but inactive timer migration hierarchy.
+ *
+ * When timer base was already marked idle, nothing will be
+ * changed here.
*/
- base->next_expiry = nextevt;
+ if (!base_local->is_idle && idle_is_possible)
+ tmigr_cpu_activate();
}
- /*
- * Base is idle if the next event is more than a tick away.
- *
- * If the base is marked idle then any timer add operation must forward
- * the base clk itself to keep granularity small. This idle logic is
- * only maintained for the BASE_STD base, deferrable timers may still
- * see large granularity skew (by design).
- */
- was_idle = base->is_idle;
- base->is_idle = time_after(nextevt, basej + 1);
- if (was_idle != base->is_idle)
- trace_timer_base_idle(base->is_idle, base->cpu);
+ raw_spin_unlock(&base_global->lock);
+ raw_spin_unlock(&base_local->lock);
- raw_spin_unlock(&base->lock);
+ return cmp_next_hrtimer_event(basem, tevt.local);
+}
- return cmp_next_hrtimer_event(basem, expires);
+/**
+ * get_next_timer_interrupt() - return the time (clock mono) of the next timer
+ * @basej: base time jiffies
+ * @basem: base time clock monotonic
+ *
+ * Returns the tick aligned clock monotonic time of the next pending timer or
+ * KTIME_MAX if no timer is pending. If timer of global base was queued into
+ * timer migration hierarchy, first global timer is not taken into account. If
+ * it was the last CPU of timer migration hierarchy going idle, first global
+ * event is taken into account.
+ */
+u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
+{
+ return __get_next_timer_interrupt(basej, basem, NULL);
+}
+
+/**
+ * timer_base_try_to_set_idle() - Try to set the idle state of the timer bases
+ * @basej: base time jiffies
+ * @basem: base time clock monotonic
+ * @idle: pointer to store the value of timer_base->is_idle on return;
+ * *idle contains the information whether tick was already stopped
+ *
+ * Returns the tick aligned clock monotonic time of the next pending timer or
+ * KTIME_MAX if no timer is pending. When tick was already stopped KTIME_MAX is
+ * returned as well.
+ */
+u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle)
+{
+ if (*idle)
+ return KTIME_MAX;
+
+ return __get_next_timer_interrupt(basej, basem, idle);
}
/**
@@ -1985,18 +2356,18 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
*/
void timer_clear_idle(void)
{
- struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
-
/*
- * We do this unlocked. The worst outcome is a remote enqueue sending
- * a pointless IPI, but taking the lock would just make the window for
- * sending the IPI a few instructions smaller for the cost of taking
- * the lock in the exit from idle path.
+ * We do this unlocked. The worst outcome is a remote pinned timer
+ * enqueue sending a pointless IPI, but taking the lock would just
+ * make the window for sending the IPI a few instructions smaller
+ * for the cost of taking the lock in the exit from idle
+ * path. Required for BASE_LOCAL only.
*/
- if (base->is_idle) {
- base->is_idle = false;
- trace_timer_base_idle(false, smp_processor_id());
- }
+ __this_cpu_write(timer_bases[BASE_LOCAL].is_idle, false);
+ trace_timer_base_idle(false, smp_processor_id());
+
+ /* Activate without holding the timer_base->lock */
+ tmigr_cpu_activate();
}
#endif
@@ -2009,11 +2380,10 @@ static inline void __run_timers(struct timer_base *base)
struct hlist_head heads[LVL_DEPTH];
int levels;
- if (time_before(jiffies, base->next_expiry))
- return;
+ lockdep_assert_held(&base->lock);
- timer_base_lock_expiry(base);
- raw_spin_lock_irq(&base->lock);
+ if (base->running_timer)
+ return;
while (time_after_eq(jiffies, base->clk) &&
time_after_eq(jiffies, base->next_expiry)) {
@@ -2037,20 +2407,40 @@ static inline void __run_timers(struct timer_base *base)
while (levels--)
expire_timers(base, heads + levels);
}
+}
+
+static void __run_timer_base(struct timer_base *base)
+{
+ if (time_before(jiffies, base->next_expiry))
+ return;
+
+ timer_base_lock_expiry(base);
+ raw_spin_lock_irq(&base->lock);
+ __run_timers(base);
raw_spin_unlock_irq(&base->lock);
timer_base_unlock_expiry(base);
}
+static void run_timer_base(int index)
+{
+ struct timer_base *base = this_cpu_ptr(&timer_bases[index]);
+
+ __run_timer_base(base);
+}
+
/*
* This function runs timers and the timer-tq in bottom half context.
*/
static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
- struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+ run_timer_base(BASE_LOCAL);
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) {
+ run_timer_base(BASE_GLOBAL);
+ run_timer_base(BASE_DEF);
- __run_timers(base);
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
- __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
+ if (is_timers_nohz_active())
+ tmigr_handle_remote();
+ }
}
/*
@@ -2058,19 +2448,18 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
*/
static void run_local_timers(void)
{
- struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+ struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_LOCAL]);
hrtimer_run_queues();
- /* Raise the softirq only if required. */
- if (time_before(jiffies, base->next_expiry)) {
- if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
- return;
- /* CPU is awake, so check the deferrable base. */
- base++;
- if (time_before(jiffies, base->next_expiry))
+
+ for (int i = 0; i < NR_BASES; i++, base++) {
+ /* Raise the softirq only if required. */
+ if (time_after_eq(jiffies, base->next_expiry) ||
+ (i == BASE_DEF && tmigr_requires_handle_remote())) {
+ raise_softirq(TIMER_SOFTIRQ);
return;
+ }
}
- raise_softirq(TIMER_SOFTIRQ);
}
/*
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ed7d6ad694fb..1c311c46da50 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -147,11 +147,15 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
# define P_ns(x) \
SEQ_printf(m, " .%-15s: %Lu nsecs\n", #x, \
(unsigned long long)(ktime_to_ns(ts->x)))
+# define P_flag(x, f) \
+ SEQ_printf(m, " .%-15s: %d\n", #x, !!(ts->flags & (f)))
+
{
struct tick_sched *ts = tick_get_tick_sched(cpu);
- P(nohz_mode);
+ P_flag(nohz, TS_FLAG_NOHZ);
+ P_flag(highres, TS_FLAG_HIGHRES);
P_ns(last_tick);
- P(tick_stopped);
+ P_flag(tick_stopped, TS_FLAG_STOPPED);
P(idle_jiffies);
P(idle_calls);
P(idle_sleeps);
@@ -256,7 +260,7 @@ static void timer_list_show_tickdevices_header(struct seq_file *m)
static inline void timer_list_header(struct seq_file *m, u64 now)
{
- SEQ_printf(m, "Timer List Version: v0.9\n");
+ SEQ_printf(m, "Timer List Version: v0.10\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
SEQ_printf(m, "\n");
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
new file mode 100644
index 000000000000..8f49b6b96dfd
--- /dev/null
+++ b/kernel/time/timer_migration.c
@@ -0,0 +1,1793 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Infrastructure for migratable timers
+ *
+ * Copyright(C) 2022 linutronix GmbH
+ */
+#include <linux/cpuhotplug.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/timerqueue.h>
+#include <trace/events/ipi.h>
+
+#include "timer_migration.h"
+#include "tick-internal.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/timer_migration.h>
+
+/*
+ * The timer migration mechanism is built on a hierarchy of groups. The
+ * lowest level group contains CPUs, the next level groups of CPU groups
+ * and so forth. The CPU groups are kept per node so for the normal case
+ * lock contention won't happen across nodes. Depending on the number of
+ * CPUs per node even the next level might be kept as groups of CPU groups
+ * per node and only the levels above cross the node topology.
+ *
+ * Example topology for a two node system with 24 CPUs each.
+ *
+ * LVL 2 [GRP2:0]
+ * GRP1:0 = GRP1:M
+ *
+ * LVL 1 [GRP1:0] [GRP1:1]
+ * GRP0:0 - GRP0:2 GRP0:3 - GRP0:5
+ *
+ * LVL 0 [GRP0:0] [GRP0:1] [GRP0:2] [GRP0:3] [GRP0:4] [GRP0:5]
+ * CPUS 0-7 8-15 16-23 24-31 32-39 40-47
+ *
+ * The groups hold a timer queue of events sorted by expiry time. These
+ * queues are updated when CPUs go in idle. When they come out of idle
+ * ignore flag of events is set.
+ *
+ * Each group has a designated migrator CPU/group as long as a CPU/group is
+ * active in the group. This designated role is necessary to avoid that all
+ * active CPUs in a group try to migrate expired timers from other CPUs,
+ * which would result in massive lock bouncing.
+ *
+ * When a CPU is awake, it checks in it's own timer tick the group
+ * hierarchy up to the point where it is assigned the migrator role or if
+ * no CPU is active, it also checks the groups where no migrator is set
+ * (TMIGR_NONE).
+ *
+ * If it finds expired timers in one of the group queues it pulls them over
+ * from the idle CPU and runs the timer function. After that it updates the
+ * group and the parent groups if required.
+ *
+ * CPUs which go idle arm their CPU local timer hardware for the next local
+ * (pinned) timer event. If the next migratable timer expires after the
+ * next local timer or the CPU has no migratable timer pending then the
+ * CPU does not queue an event in the LVL0 group. If the next migratable
+ * timer expires before the next local timer then the CPU queues that timer
+ * in the LVL0 group. In both cases the CPU marks itself idle in the LVL0
+ * group.
+ *
+ * When CPU comes out of idle and when a group has at least a single active
+ * child, the ignore flag of the tmigr_event is set. This indicates, that
+ * the event is ignored even if it is still enqueued in the parent groups
+ * timer queue. It will be removed when touching the timer queue the next
+ * time. This spares locking in active path as the lock protects (after
+ * setup) only event information. For more information about locking,
+ * please read the section "Locking rules".
+ *
+ * If the CPU is the migrator of the group then it delegates that role to
+ * the next active CPU in the group or sets migrator to TMIGR_NONE when
+ * there is no active CPU in the group. This delegation needs to be
+ * propagated up the hierarchy so hand over from other leaves can happen at
+ * all hierarchy levels w/o doing a search.
+ *
+ * When the last CPU in the system goes idle, then it drops all migrator
+ * duties up to the top level of the hierarchy (LVL2 in the example). It
+ * then has to make sure, that it arms it's own local hardware timer for
+ * the earliest event in the system.
+ *
+ *
+ * Lifetime rules:
+ * ---------------
+ *
+ * The groups are built up at init time or when CPUs come online. They are
+ * not destroyed when a group becomes empty due to offlining. The group
+ * just won't participate in the hierarchy management anymore. Destroying
+ * groups would result in interesting race conditions which would just make
+ * the whole mechanism slow and complex.
+ *
+ *
+ * Locking rules:
+ * --------------
+ *
+ * For setting up new groups and handling events it's required to lock both
+ * child and parent group. The lock ordering is always bottom up. This also
+ * includes the per CPU locks in struct tmigr_cpu. For updating the migrator and
+ * active CPU/group information atomic_try_cmpxchg() is used instead and only
+ * the per CPU tmigr_cpu->lock is held.
+ *
+ * During the setup of groups tmigr_level_list is required. It is protected by
+ * @tmigr_mutex.
+ *
+ * When @timer_base->lock as well as tmigr related locks are required, the lock
+ * ordering is: first @timer_base->lock, afterwards tmigr related locks.
+ *
+ *
+ * Protection of the tmigr group state information:
+ * ------------------------------------------------
+ *
+ * The state information with the list of active children and migrator needs to
+ * be protected by a sequence counter. It prevents a race when updates in child
+ * groups are propagated in changed order. The state update is performed
+ * lockless and group wise. The following scenario describes what happens
+ * without updating the sequence counter:
+ *
+ * Therefore, let's take three groups and four CPUs (CPU2 and CPU3 as well
+ * as GRP0:1 will not change during the scenario):
+ *
+ * LVL 1 [GRP1:0]
+ * migrator = GRP0:1
+ * active = GRP0:0, GRP0:1
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = CPU0 migrator = CPU2
+ * active = CPU0 active = CPU2
+ * / \ / \
+ * CPUs 0 1 2 3
+ * active idle active idle
+ *
+ *
+ * 1. CPU0 goes idle. As the update is performed group wise, in the first step
+ * only GRP0:0 is updated. The update of GRP1:0 is pending as CPU0 has to
+ * walk the hierarchy.
+ *
+ * LVL 1 [GRP1:0]
+ * migrator = GRP0:1
+ * active = GRP0:0, GRP0:1
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * --> migrator = TMIGR_NONE migrator = CPU2
+ * --> active = active = CPU2
+ * / \ / \
+ * CPUs 0 1 2 3
+ * --> idle idle active idle
+ *
+ * 2. While CPU0 goes idle and continues to update the state, CPU1 comes out of
+ * idle. CPU1 updates GRP0:0. The update for GRP1:0 is pending as CPU1 also
+ * has to walk the hierarchy. Both CPUs (CPU0 and CPU1) now walk the
+ * hierarchy to perform the needed update from their point of view. The
+ * currently visible state looks the following:
+ *
+ * LVL 1 [GRP1:0]
+ * migrator = GRP0:1
+ * active = GRP0:0, GRP0:1
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * --> migrator = CPU1 migrator = CPU2
+ * --> active = CPU1 active = CPU2
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle --> active active idle
+ *
+ * 3. Here is the race condition: CPU1 managed to propagate its changes (from
+ * step 2) through the hierarchy to GRP1:0 before CPU0 (step 1) did. The
+ * active members of GRP1:0 remain unchanged after the update since it is
+ * still valid from CPU1 current point of view:
+ *
+ * LVL 1 [GRP1:0]
+ * --> migrator = GRP0:1
+ * --> active = GRP0:0, GRP0:1
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = CPU1 migrator = CPU2
+ * active = CPU1 active = CPU2
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle active active idle
+ *
+ * 4. Now CPU0 finally propagates its changes (from step 1) to GRP1:0.
+ *
+ * LVL 1 [GRP1:0]
+ * --> migrator = GRP0:1
+ * --> active = GRP0:1
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = CPU1 migrator = CPU2
+ * active = CPU1 active = CPU2
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle active active idle
+ *
+ *
+ * The race of CPU0 vs. CPU1 led to an inconsistent state in GRP1:0. CPU1 is
+ * active and is correctly listed as active in GRP0:0. However GRP1:0 does not
+ * have GRP0:0 listed as active, which is wrong. The sequence counter has been
+ * added to avoid inconsistent states during updates. The state is updated
+ * atomically only if all members, including the sequence counter, match the
+ * expected value (compare-and-exchange).
+ *
+ * Looking back at the previous example with the addition of the sequence
+ * counter: The update as performed by CPU0 in step 4 will fail. CPU1 changed
+ * the sequence number during the update in step 3 so the expected old value (as
+ * seen by CPU0 before starting the walk) does not match.
+ *
+ * Prevent race between new event and last CPU going inactive
+ * ----------------------------------------------------------
+ *
+ * When the last CPU is going idle and there is a concurrent update of a new
+ * first global timer of an idle CPU, the group and child states have to be read
+ * while holding the lock in tmigr_update_events(). The following scenario shows
+ * what happens, when this is not done.
+ *
+ * 1. Only CPU2 is active:
+ *
+ * LVL 1 [GRP1:0]
+ * migrator = GRP0:1
+ * active = GRP0:1
+ * next_expiry = KTIME_MAX
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = TMIGR_NONE migrator = CPU2
+ * active = active = CPU2
+ * next_expiry = KTIME_MAX next_expiry = KTIME_MAX
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle idle active idle
+ *
+ * 2. Now CPU 2 goes idle (and has no global timer, that has to be handled) and
+ * propagates that to GRP0:1:
+ *
+ * LVL 1 [GRP1:0]
+ * migrator = GRP0:1
+ * active = GRP0:1
+ * next_expiry = KTIME_MAX
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = TMIGR_NONE --> migrator = TMIGR_NONE
+ * active = --> active =
+ * next_expiry = KTIME_MAX next_expiry = KTIME_MAX
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle idle --> idle idle
+ *
+ * 3. Now the idle state is propagated up to GRP1:0. As this is now the last
+ * child going idle in top level group, the expiry of the next group event
+ * has to be handed back to make sure no event is lost. As there is no event
+ * enqueued, KTIME_MAX is handed back to CPU2.
+ *
+ * LVL 1 [GRP1:0]
+ * --> migrator = TMIGR_NONE
+ * --> active =
+ * next_expiry = KTIME_MAX
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = TMIGR_NONE migrator = TMIGR_NONE
+ * active = active =
+ * next_expiry = KTIME_MAX next_expiry = KTIME_MAX
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle idle --> idle idle
+ *
+ * 4. CPU 0 has a new timer queued from idle and it expires at TIMER0. CPU0
+ * propagates that to GRP0:0:
+ *
+ * LVL 1 [GRP1:0]
+ * migrator = TMIGR_NONE
+ * active =
+ * next_expiry = KTIME_MAX
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = TMIGR_NONE migrator = TMIGR_NONE
+ * active = active =
+ * --> next_expiry = TIMER0 next_expiry = KTIME_MAX
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle idle idle idle
+ *
+ * 5. GRP0:0 is not active, so the new timer has to be propagated to
+ * GRP1:0. Therefore the GRP1:0 state has to be read. When the stalled value
+ * (from step 2) is read, the timer is enqueued into GRP1:0, but nothing is
+ * handed back to CPU0, as it seems that there is still an active child in
+ * top level group.
+ *
+ * LVL 1 [GRP1:0]
+ * migrator = TMIGR_NONE
+ * active =
+ * --> next_expiry = TIMER0
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = TMIGR_NONE migrator = TMIGR_NONE
+ * active = active =
+ * next_expiry = TIMER0 next_expiry = KTIME_MAX
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle idle idle idle
+ *
+ * This is prevented by reading the state when holding the lock (when a new
+ * timer has to be propagated from idle path)::
+ *
+ * CPU2 (tmigr_inactive_up()) CPU0 (tmigr_new_timer_up())
+ * -------------------------- ---------------------------
+ * // step 3:
+ * cmpxchg(&GRP1:0->state);
+ * tmigr_update_events() {
+ * spin_lock(&GRP1:0->lock);
+ * // ... update events ...
+ * // hand back first expiry when GRP1:0 is idle
+ * spin_unlock(&GRP1:0->lock);
+ * // ^^^ release state modification
+ * }
+ * tmigr_update_events() {
+ * spin_lock(&GRP1:0->lock)
+ * // ^^^ acquire state modification
+ * group_state = atomic_read(&GRP1:0->state)
+ * // .... update events ...
+ * // hand back first expiry when GRP1:0 is idle
+ * spin_unlock(&GRP1:0->lock) <3>
+ * // ^^^ makes state visible for other
+ * // callers of tmigr_new_timer_up()
+ * }
+ *
+ * When CPU0 grabs the lock directly after cmpxchg, the first timer is reported
+ * back to CPU0 and also later on to CPU2. So no timer is missed. A concurrent
+ * update of the group state from active path is no problem, as the upcoming CPU
+ * will take care of the group events.
+ *
+ * Required event and timerqueue update after a remote expiry:
+ * -----------------------------------------------------------
+ *
+ * After expiring timers of a remote CPU, a walk through the hierarchy and
+ * update of events and timerqueues is required. It is obviously needed if there
+ * is a 'new' global timer but also if there is no new global timer but the
+ * remote CPU is still idle.
+ *
+ * 1. CPU0 and CPU1 are idle and have both a global timer expiring at the same
+ * time. So both have an event enqueued in the timerqueue of GRP0:0. CPU3 is
+ * also idle and has no global timer pending. CPU2 is the only active CPU and
+ * thus also the migrator:
+ *
+ * LVL 1 [GRP1:0]
+ * migrator = GRP0:1
+ * active = GRP0:1
+ * --> timerqueue = evt-GRP0:0
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = TMIGR_NONE migrator = CPU2
+ * active = active = CPU2
+ * groupevt.ignore = false groupevt.ignore = true
+ * groupevt.cpu = CPU0 groupevt.cpu =
+ * timerqueue = evt-CPU0, timerqueue =
+ * evt-CPU1
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle idle active idle
+ *
+ * 2. CPU2 starts to expire remote timers. It starts with LVL0 group
+ * GRP0:1. There is no event queued in the timerqueue, so CPU2 continues with
+ * the parent of GRP0:1: GRP1:0. In GRP1:0 it dequeues the first event. It
+ * looks at tmigr_event::cpu struct member and expires the pending timer(s)
+ * of CPU0.
+ *
+ * LVL 1 [GRP1:0]
+ * migrator = GRP0:1
+ * active = GRP0:1
+ * --> timerqueue =
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = TMIGR_NONE migrator = CPU2
+ * active = active = CPU2
+ * groupevt.ignore = false groupevt.ignore = true
+ * --> groupevt.cpu = CPU0 groupevt.cpu =
+ * timerqueue = evt-CPU0, timerqueue =
+ * evt-CPU1
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle idle active idle
+ *
+ * 3. Some work has to be done after expiring the timers of CPU0. If we stop
+ * here, then CPU1's pending global timer(s) will not expire in time and the
+ * timerqueue of GRP0:0 has still an event for CPU0 enqueued which has just
+ * been processed. So it is required to walk the hierarchy from CPU0's point
+ * of view and update it accordingly. CPU0's event will be removed from the
+ * timerqueue because it has no pending timer. If CPU0 would have a timer
+ * pending then it has to expire after CPU1's first timer because all timers
+ * from this period were just expired. Either way CPU1's event will be first
+ * in GRP0:0's timerqueue and therefore set in the CPU field of the group
+ * event which is then enqueued in GRP1:0's timerqueue as GRP0:0 is still not
+ * active:
+ *
+ * LVL 1 [GRP1:0]
+ * migrator = GRP0:1
+ * active = GRP0:1
+ * --> timerqueue = evt-GRP0:0
+ * / \
+ * LVL 0 [GRP0:0] [GRP0:1]
+ * migrator = TMIGR_NONE migrator = CPU2
+ * active = active = CPU2
+ * groupevt.ignore = false groupevt.ignore = true
+ * --> groupevt.cpu = CPU1 groupevt.cpu =
+ * --> timerqueue = evt-CPU1 timerqueue =
+ * / \ / \
+ * CPUs 0 1 2 3
+ * idle idle active idle
+ *
+ * Now CPU2 (migrator) will continue step 2 at GRP1:0 and will expire the
+ * timer(s) of CPU1.
+ *
+ * The hierarchy walk in step 3 can be skipped if the migrator notices that a
+ * CPU of GRP0:0 is active again. The CPU will mark GRP0:0 active and take care
+ * of the group as migrator and any needed updates within the hierarchy.
+ */
+
+static DEFINE_MUTEX(tmigr_mutex);
+static struct list_head *tmigr_level_list __read_mostly;
+
+static unsigned int tmigr_hierarchy_levels __read_mostly;
+static unsigned int tmigr_crossnode_level __read_mostly;
+
+static DEFINE_PER_CPU(struct tmigr_cpu, tmigr_cpu);
+
+#define TMIGR_NONE 0xFF
+#define BIT_CNT 8
+
+static inline bool tmigr_is_not_available(struct tmigr_cpu *tmc)
+{
+ return !(tmc->tmgroup && tmc->online);
+}
+
+/*
+ * Returns true, when @childmask corresponds to the group migrator or when the
+ * group is not active - so no migrator is set.
+ */
+static bool tmigr_check_migrator(struct tmigr_group *group, u8 childmask)
+{
+ union tmigr_state s;
+
+ s.state = atomic_read(&group->migr_state);
+
+ if ((s.migrator == childmask) || (s.migrator == TMIGR_NONE))
+ return true;
+
+ return false;
+}
+
+static bool tmigr_check_migrator_and_lonely(struct tmigr_group *group, u8 childmask)
+{
+ bool lonely, migrator = false;
+ unsigned long active;
+ union tmigr_state s;
+
+ s.state = atomic_read(&group->migr_state);
+
+ if ((s.migrator == childmask) || (s.migrator == TMIGR_NONE))
+ migrator = true;
+
+ active = s.active;
+ lonely = bitmap_weight(&active, BIT_CNT) <= 1;
+
+ return (migrator && lonely);
+}
+
+static bool tmigr_check_lonely(struct tmigr_group *group)
+{
+ unsigned long active;
+ union tmigr_state s;
+
+ s.state = atomic_read(&group->migr_state);
+
+ active = s.active;
+
+ return bitmap_weight(&active, BIT_CNT) <= 1;
+}
+
+typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, void *);
+
+static void __walk_groups(up_f up, void *data,
+ struct tmigr_cpu *tmc)
+{
+ struct tmigr_group *child = NULL, *group = tmc->tmgroup;
+
+ do {
+ WARN_ON_ONCE(group->level >= tmigr_hierarchy_levels);
+
+ if (up(group, child, data))
+ break;
+
+ child = group;
+ group = group->parent;
+ } while (group);
+}
+
+static void walk_groups(up_f up, void *data, struct tmigr_cpu *tmc)
+{
+ lockdep_assert_held(&tmc->lock);
+
+ __walk_groups(up, data, tmc);
+}
+
+/**
+ * struct tmigr_walk - data required for walking the hierarchy
+ * @nextexp: Next CPU event expiry information which is handed into
+ * the timer migration code by the timer code
+ * (get_next_timer_interrupt())
+ * @firstexp: Contains the first event expiry information when last
+ * active CPU of hierarchy is on the way to idle to make
+ * sure CPU will be back in time.
+ * @evt: Pointer to tmigr_event which needs to be queued (of idle
+ * child group)
+ * @childmask: childmask of child group
+ * @remote: Is set, when the new timer path is executed in
+ * tmigr_handle_remote_cpu()
+ */
+struct tmigr_walk {
+ u64 nextexp;
+ u64 firstexp;
+ struct tmigr_event *evt;
+ u8 childmask;
+ bool remote;
+};
+
+/**
+ * struct tmigr_remote_data - data required for remote expiry hierarchy walk
+ * @basej: timer base in jiffies
+ * @now: timer base monotonic
+ * @firstexp: returns expiry of the first timer in the idle timer
+ * migration hierarchy to make sure the timer is handled in
+ * time; it is stored in the per CPU tmigr_cpu struct of
+ * CPU which expires remote timers
+ * @childmask: childmask of child group
+ * @check: is set if there is the need to handle remote timers;
+ * required in tmigr_requires_handle_remote() only
+ * @tmc_active: this flag indicates, whether the CPU which triggers
+ * the hierarchy walk is !idle in the timer migration
+ * hierarchy. When the CPU is idle and the whole hierarchy is
+ * idle, only the first event of the top level has to be
+ * considered.
+ */
+struct tmigr_remote_data {
+ unsigned long basej;
+ u64 now;
+ u64 firstexp;
+ u8 childmask;
+ bool check;
+ bool tmc_active;
+};
+
+/*
+ * Returns the next event of the timerqueue @group->events
+ *
+ * Removes timers with ignore flag and update next_expiry of the group. Values
+ * of the group event are updated in tmigr_update_events() only.
+ */
+static struct tmigr_event *tmigr_next_groupevt(struct tmigr_group *group)
+{
+ struct timerqueue_node *node = NULL;
+ struct tmigr_event *evt = NULL;
+
+ lockdep_assert_held(&group->lock);
+
+ WRITE_ONCE(group->next_expiry, KTIME_MAX);
+
+ while ((node = timerqueue_getnext(&group->events))) {
+ evt = container_of(node, struct tmigr_event, nextevt);
+
+ if (!evt->ignore) {
+ WRITE_ONCE(group->next_expiry, evt->nextevt.expires);
+ return evt;
+ }
+
+ /*
+ * Remove next timers with ignore flag, because the group lock
+ * is held anyway
+ */
+ if (!timerqueue_del(&group->events, node))
+ break;
+ }
+
+ return NULL;
+}
+
+/*
+ * Return the next event (with the expiry equal or before @now)
+ *
+ * Event, which is returned, is also removed from the queue.
+ */
+static struct tmigr_event *tmigr_next_expired_groupevt(struct tmigr_group *group,
+ u64 now)
+{
+ struct tmigr_event *evt = tmigr_next_groupevt(group);
+
+ if (!evt || now < evt->nextevt.expires)
+ return NULL;
+
+ /*
+ * The event is ready to expire. Remove it and update next group event.
+ */
+ timerqueue_del(&group->events, &evt->nextevt);
+ tmigr_next_groupevt(group);
+
+ return evt;
+}
+
+static u64 tmigr_next_groupevt_expires(struct tmigr_group *group)
+{
+ struct tmigr_event *evt;
+
+ evt = tmigr_next_groupevt(group);
+
+ if (!evt)
+ return KTIME_MAX;
+ else
+ return evt->nextevt.expires;
+}
+
+static bool tmigr_active_up(struct tmigr_group *group,
+ struct tmigr_group *child,
+ void *ptr)
+{
+ union tmigr_state curstate, newstate;
+ struct tmigr_walk *data = ptr;
+ bool walk_done;
+ u8 childmask;
+
+ childmask = data->childmask;
+ /*
+ * No memory barrier is required here in contrast to
+ * tmigr_inactive_up(), as the group state change does not depend on the
+ * child state.
+ */
+ curstate.state = atomic_read(&group->migr_state);
+
+ do {
+ newstate = curstate;
+ walk_done = true;
+
+ if (newstate.migrator == TMIGR_NONE) {
+ newstate.migrator = childmask;
+
+ /* Changes need to be propagated */
+ walk_done = false;
+ }
+
+ newstate.active |= childmask;
+ newstate.seq++;
+
+ } while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state));
+
+ if ((walk_done == false) && group->parent)
+ data->childmask = group->childmask;
+
+ /*
+ * The group is active (again). The group event might be still queued
+ * into the parent group's timerqueue but can now be handled by the
+ * migrator of this group. Therefore the ignore flag for the group event
+ * is updated to reflect this.
+ *
+ * The update of the ignore flag in the active path is done lockless. In
+ * worst case the migrator of the parent group observes the change too
+ * late and expires remotely all events belonging to this group. The
+ * lock is held while updating the ignore flag in idle path. So this
+ * state change will not be lost.
+ */
+ group->groupevt.ignore = true;
+
+ trace_tmigr_group_set_cpu_active(group, newstate, childmask);
+
+ return walk_done;
+}
+
+static void __tmigr_cpu_activate(struct tmigr_cpu *tmc)
+{
+ struct tmigr_walk data;
+
+ data.childmask = tmc->childmask;
+
+ trace_tmigr_cpu_active(tmc);
+
+ tmc->cpuevt.ignore = true;
+ WRITE_ONCE(tmc->wakeup, KTIME_MAX);
+
+ walk_groups(&tmigr_active_up, &data, tmc);
+}
+
+/**
+ * tmigr_cpu_activate() - set this CPU active in timer migration hierarchy
+ *
+ * Call site timer_clear_idle() is called with interrupts disabled.
+ */
+void tmigr_cpu_activate(void)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+
+ if (tmigr_is_not_available(tmc))
+ return;
+
+ if (WARN_ON_ONCE(!tmc->idle))
+ return;
+
+ raw_spin_lock(&tmc->lock);
+ tmc->idle = false;
+ __tmigr_cpu_activate(tmc);
+ raw_spin_unlock(&tmc->lock);
+}
+
+/*
+ * Returns true, if there is nothing to be propagated to the next level
+ *
+ * @data->firstexp is set to expiry of first gobal event of the (top level of
+ * the) hierarchy, but only when hierarchy is completely idle.
+ *
+ * The child and group states need to be read under the lock, to prevent a race
+ * against a concurrent tmigr_inactive_up() run when the last CPU goes idle. See
+ * also section "Prevent race between new event and last CPU going inactive" in
+ * the documentation at the top.
+ *
+ * This is the only place where the group event expiry value is set.
+ */
+static
+bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
+ struct tmigr_walk *data)
+{
+ struct tmigr_event *evt, *first_childevt;
+ union tmigr_state childstate, groupstate;
+ bool remote = data->remote;
+ bool walk_done = false;
+ u64 nextexp;
+
+ if (child) {
+ raw_spin_lock(&child->lock);
+ raw_spin_lock_nested(&group->lock, SINGLE_DEPTH_NESTING);
+
+ childstate.state = atomic_read(&child->migr_state);
+ groupstate.state = atomic_read(&group->migr_state);
+
+ if (childstate.active) {
+ walk_done = true;
+ goto unlock;
+ }
+
+ first_childevt = tmigr_next_groupevt(child);
+ nextexp = child->next_expiry;
+ evt = &child->groupevt;
+
+ evt->ignore = (nextexp == KTIME_MAX) ? true : false;
+ } else {
+ nextexp = data->nextexp;
+
+ first_childevt = evt = data->evt;
+
+ /*
+ * Walking the hierarchy is required in any case when a
+ * remote expiry was done before. This ensures to not lose
+ * already queued events in non active groups (see section
+ * "Required event and timerqueue update after a remote
+ * expiry" in the documentation at the top).
+ *
+ * The two call sites which are executed without a remote expiry
+ * before, are not prevented from propagating changes through
+ * the hierarchy by the return:
+ * - When entering this path by tmigr_new_timer(), @evt->ignore
+ * is never set.
+ * - tmigr_inactive_up() takes care of the propagation by
+ * itself and ignores the return value. But an immediate
+ * return is required because nothing has to be done in this
+ * level as the event could be ignored.
+ */
+ if (evt->ignore && !remote)
+ return true;
+
+ raw_spin_lock(&group->lock);
+
+ childstate.state = 0;
+ groupstate.state = atomic_read(&group->migr_state);
+ }
+
+ /*
+ * If the child event is already queued in the group, remove it from the
+ * queue when the expiry time changed only or when it could be ignored.
+ */
+ if (timerqueue_node_queued(&evt->nextevt)) {
+ if ((evt->nextevt.expires == nextexp) && !evt->ignore)
+ goto check_toplvl;
+
+ if (!timerqueue_del(&group->events, &evt->nextevt))
+ WRITE_ONCE(group->next_expiry, KTIME_MAX);
+ }
+
+ if (evt->ignore) {
+ /*
+ * When the next child event could be ignored (nextexp is
+ * KTIME_MAX) and there was no remote timer handling before or
+ * the group is already active, there is no need to walk the
+ * hierarchy even if there is a parent group.
+ *
+ * The other way round: even if the event could be ignored, but
+ * if a remote timer handling was executed before and the group
+ * is not active, walking the hierarchy is required to not miss
+ * an enqueued timer in the non active group. The enqueued timer
+ * of the group needs to be propagated to a higher level to
+ * ensure it is handled.
+ */
+ if (!remote || groupstate.active)
+ walk_done = true;
+ } else {
+ evt->nextevt.expires = nextexp;
+ evt->cpu = first_childevt->cpu;
+
+ if (timerqueue_add(&group->events, &evt->nextevt))
+ WRITE_ONCE(group->next_expiry, nextexp);
+ }
+
+check_toplvl:
+ if (!group->parent && (groupstate.migrator == TMIGR_NONE)) {
+ walk_done = true;
+
+ /*
+ * Nothing to do when update was done during remote timer
+ * handling. First timer in top level group which needs to be
+ * handled when top level group is not active, is calculated
+ * directly in tmigr_handle_remote_up().
+ */
+ if (remote)
+ goto unlock;
+
+ /*
+ * The top level group is idle and it has to be ensured the
+ * global timers are handled in time. (This could be optimized
+ * by keeping track of the last global scheduled event and only
+ * arming it on the CPU if the new event is earlier. Not sure if
+ * its worth the complexity.)
+ */
+ data->firstexp = tmigr_next_groupevt_expires(group);
+ }
+
+ trace_tmigr_update_events(child, group, childstate, groupstate,
+ nextexp);
+
+unlock:
+ raw_spin_unlock(&group->lock);
+
+ if (child)
+ raw_spin_unlock(&child->lock);
+
+ return walk_done;
+}
+
+static bool tmigr_new_timer_up(struct tmigr_group *group,
+ struct tmigr_group *child,
+ void *ptr)
+{
+ struct tmigr_walk *data = ptr;
+
+ return tmigr_update_events(group, child, data);
+}
+
+/*
+ * Returns the expiry of the next timer that needs to be handled. KTIME_MAX is
+ * returned, if an active CPU will handle all the timer migration hierarchy
+ * timers.
+ */
+static u64 tmigr_new_timer(struct tmigr_cpu *tmc, u64 nextexp)
+{
+ struct tmigr_walk data = { .nextexp = nextexp,
+ .firstexp = KTIME_MAX,
+ .evt = &tmc->cpuevt };
+
+ lockdep_assert_held(&tmc->lock);
+
+ if (tmc->remote)
+ return KTIME_MAX;
+
+ trace_tmigr_cpu_new_timer(tmc);
+
+ tmc->cpuevt.ignore = false;
+ data.remote = false;
+
+ walk_groups(&tmigr_new_timer_up, &data, tmc);
+
+ /* If there is a new first global event, make sure it is handled */
+ return data.firstexp;
+}
+
+static void tmigr_handle_remote_cpu(unsigned int cpu, u64 now,
+ unsigned long jif)
+{
+ struct timer_events tevt;
+ struct tmigr_walk data;
+ struct tmigr_cpu *tmc;
+
+ tmc = per_cpu_ptr(&tmigr_cpu, cpu);
+
+ raw_spin_lock_irq(&tmc->lock);
+
+ /*
+ * If the remote CPU is offline then the timers have been migrated to
+ * another CPU.
+ *
+ * If tmigr_cpu::remote is set, at the moment another CPU already
+ * expires the timers of the remote CPU.
+ *
+ * If tmigr_event::ignore is set, then the CPU returns from idle and
+ * takes care of its timers.
+ *
+ * If the next event expires in the future, then the event has been
+ * updated and there are no timers to expire right now. The CPU which
+ * updated the event takes care when hierarchy is completely
+ * idle. Otherwise the migrator does it as the event is enqueued.
+ */
+ if (!tmc->online || tmc->remote || tmc->cpuevt.ignore ||
+ now < tmc->cpuevt.nextevt.expires) {
+ raw_spin_unlock_irq(&tmc->lock);
+ return;
+ }
+
+ trace_tmigr_handle_remote_cpu(tmc);
+
+ tmc->remote = true;
+ WRITE_ONCE(tmc->wakeup, KTIME_MAX);
+
+ /* Drop the lock to allow the remote CPU to exit idle */
+ raw_spin_unlock_irq(&tmc->lock);
+
+ if (cpu != smp_processor_id())
+ timer_expire_remote(cpu);
+
+ /*
+ * Lock ordering needs to be preserved - timer_base locks before tmigr
+ * related locks (see section "Locking rules" in the documentation at
+ * the top). During fetching the next timer interrupt, also tmc->lock
+ * needs to be held. Otherwise there is a possible race window against
+ * the CPU itself when it comes out of idle, updates the first timer in
+ * the hierarchy and goes back to idle.
+ *
+ * timer base locks are dropped as fast as possible: After checking
+ * whether the remote CPU went offline in the meantime and after
+ * fetching the next remote timer interrupt. Dropping the locks as fast
+ * as possible keeps the locking region small and prevents holding
+ * several (unnecessary) locks during walking the hierarchy for updating
+ * the timerqueue and group events.
+ */
+ local_irq_disable();
+ timer_lock_remote_bases(cpu);
+ raw_spin_lock(&tmc->lock);
+
+ /*
+ * When the CPU went offline in the meantime, no hierarchy walk has to
+ * be done for updating the queued events, because the walk was
+ * already done during marking the CPU offline in the hierarchy.
+ *
+ * When the CPU is no longer idle, the CPU takes care of the timers and
+ * also of the timers in the hierarchy.
+ *
+ * (See also section "Required event and timerqueue update after a
+ * remote expiry" in the documentation at the top)
+ */
+ if (!tmc->online || !tmc->idle) {
+ timer_unlock_remote_bases(cpu);
+ goto unlock;
+ }
+
+ /* next event of CPU */
+ fetch_next_timer_interrupt_remote(jif, now, &tevt, cpu);
+ timer_unlock_remote_bases(cpu);
+
+ data.nextexp = tevt.global;
+ data.firstexp = KTIME_MAX;
+ data.evt = &tmc->cpuevt;
+ data.remote = true;
+
+ /*
+ * The update is done even when there is no 'new' global timer pending
+ * on the remote CPU (see section "Required event and timerqueue update
+ * after a remote expiry" in the documentation at the top)
+ */
+ walk_groups(&tmigr_new_timer_up, &data, tmc);
+
+unlock:
+ tmc->remote = false;
+ raw_spin_unlock_irq(&tmc->lock);
+}
+
+static bool tmigr_handle_remote_up(struct tmigr_group *group,
+ struct tmigr_group *child,
+ void *ptr)
+{
+ struct tmigr_remote_data *data = ptr;
+ struct tmigr_event *evt;
+ unsigned long jif;
+ u8 childmask;
+ u64 now;
+
+ jif = data->basej;
+ now = data->now;
+
+ childmask = data->childmask;
+
+ trace_tmigr_handle_remote(group);
+again:
+ /*
+ * Handle the group only if @childmask is the migrator or if the
+ * group has no migrator. Otherwise the group is active and is
+ * handled by its own migrator.
+ */
+ if (!tmigr_check_migrator(group, childmask))
+ return true;
+
+ raw_spin_lock_irq(&group->lock);
+
+ evt = tmigr_next_expired_groupevt(group, now);
+
+ if (evt) {
+ unsigned int remote_cpu = evt->cpu;
+
+ raw_spin_unlock_irq(&group->lock);
+
+ tmigr_handle_remote_cpu(remote_cpu, now, jif);
+
+ /* check if there is another event, that needs to be handled */
+ goto again;
+ }
+
+ /*
+ * Update of childmask for the next level and keep track of the expiry
+ * of the first event that needs to be handled (group->next_expiry was
+ * updated by tmigr_next_expired_groupevt(), next was set by
+ * tmigr_handle_remote_cpu()).
+ */
+ data->childmask = group->childmask;
+ data->firstexp = group->next_expiry;
+
+ raw_spin_unlock_irq(&group->lock);
+
+ return false;
+}
+
+/**
+ * tmigr_handle_remote() - Handle global timers of remote idle CPUs
+ *
+ * Called from the timer soft interrupt with interrupts enabled.
+ */
+void tmigr_handle_remote(void)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ struct tmigr_remote_data data;
+
+ if (tmigr_is_not_available(tmc))
+ return;
+
+ data.childmask = tmc->childmask;
+ data.firstexp = KTIME_MAX;
+
+ /*
+ * NOTE: This is a doubled check because the migrator test will be done
+ * in tmigr_handle_remote_up() anyway. Keep this check to speed up the
+ * return when nothing has to be done.
+ */
+ if (!tmigr_check_migrator(tmc->tmgroup, tmc->childmask))
+ return;
+
+ data.now = get_jiffies_update(&data.basej);
+
+ /*
+ * Update @tmc->wakeup only at the end and do not reset @tmc->wakeup to
+ * KTIME_MAX. Even if tmc->lock is not held during the whole remote
+ * handling, tmc->wakeup is fine to be stale as it is called in
+ * interrupt context and tick_nohz_next_event() is executed in interrupt
+ * exit path only after processing the last pending interrupt.
+ */
+
+ __walk_groups(&tmigr_handle_remote_up, &data, tmc);
+
+ raw_spin_lock_irq(&tmc->lock);
+ WRITE_ONCE(tmc->wakeup, data.firstexp);
+ raw_spin_unlock_irq(&tmc->lock);
+}
+
+static bool tmigr_requires_handle_remote_up(struct tmigr_group *group,
+ struct tmigr_group *child,
+ void *ptr)
+{
+ struct tmigr_remote_data *data = ptr;
+ u8 childmask;
+
+ childmask = data->childmask;
+
+ /*
+ * Handle the group only if the child is the migrator or if the group
+ * has no migrator. Otherwise the group is active and is handled by its
+ * own migrator.
+ */
+ if (!tmigr_check_migrator(group, childmask))
+ return true;
+
+ /*
+ * When there is a parent group and the CPU which triggered the
+ * hierarchy walk is not active, proceed the walk to reach the top level
+ * group before reading the next_expiry value.
+ */
+ if (group->parent && !data->tmc_active)
+ goto out;
+
+ /*
+ * The lock is required on 32bit architectures to read the variable
+ * consistently with a concurrent writer. On 64bit the lock is not
+ * required because the read operation is not split and so it is always
+ * consistent.
+ */
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ data->firstexp = READ_ONCE(group->next_expiry);
+ if (data->now >= data->firstexp) {
+ data->check = true;
+ return true;
+ }
+ } else {
+ raw_spin_lock(&group->lock);
+ data->firstexp = group->next_expiry;
+ if (data->now >= group->next_expiry) {
+ data->check = true;
+ raw_spin_unlock(&group->lock);
+ return true;
+ }
+ raw_spin_unlock(&group->lock);
+ }
+
+out:
+ /* Update of childmask for the next level */
+ data->childmask = group->childmask;
+ return false;
+}
+
+/**
+ * tmigr_requires_handle_remote() - Check the need of remote timer handling
+ *
+ * Must be called with interrupts disabled.
+ */
+bool tmigr_requires_handle_remote(void)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ struct tmigr_remote_data data;
+ unsigned long jif;
+ bool ret = false;
+
+ if (tmigr_is_not_available(tmc))
+ return ret;
+
+ data.now = get_jiffies_update(&jif);
+ data.childmask = tmc->childmask;
+ data.firstexp = KTIME_MAX;
+ data.tmc_active = !tmc->idle;
+ data.check = false;
+
+ /*
+ * If the CPU is active, walk the hierarchy to check whether a remote
+ * expiry is required.
+ *
+ * Check is done lockless as interrupts are disabled and @tmc->idle is
+ * set only by the local CPU.
+ */
+ if (!tmc->idle) {
+ __walk_groups(&tmigr_requires_handle_remote_up, &data, tmc);
+
+ return data.check;
+ }
+
+ /*
+ * When the CPU is idle, compare @tmc->wakeup with @data.now. The lock
+ * is required on 32bit architectures to read the variable consistently
+ * with a concurrent writer. On 64bit the lock is not required because
+ * the read operation is not split and so it is always consistent.
+ */
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ if (data.now >= READ_ONCE(tmc->wakeup))
+ return true;
+ } else {
+ raw_spin_lock(&tmc->lock);
+ if (data.now >= tmc->wakeup)
+ ret = true;
+ raw_spin_unlock(&tmc->lock);
+ }
+
+ return ret;
+}
+
+/**
+ * tmigr_cpu_new_timer() - enqueue next global timer into hierarchy (idle tmc)
+ * @nextexp: Next expiry of global timer (or KTIME_MAX if not)
+ *
+ * The CPU is already deactivated in the timer migration
+ * hierarchy. tick_nohz_get_sleep_length() calls tick_nohz_next_event()
+ * and thereby the timer idle path is executed once more. @tmc->wakeup
+ * holds the first timer, when the timer migration hierarchy is
+ * completely idle.
+ *
+ * Returns the first timer that needs to be handled by this CPU or KTIME_MAX if
+ * nothing needs to be done.
+ */
+u64 tmigr_cpu_new_timer(u64 nextexp)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ u64 ret;
+
+ if (tmigr_is_not_available(tmc))
+ return nextexp;
+
+ raw_spin_lock(&tmc->lock);
+
+ ret = READ_ONCE(tmc->wakeup);
+ if (nextexp != KTIME_MAX) {
+ if (nextexp != tmc->cpuevt.nextevt.expires ||
+ tmc->cpuevt.ignore) {
+ ret = tmigr_new_timer(tmc, nextexp);
+ }
+ }
+ /*
+ * Make sure the reevaluation of timers in idle path will not miss an
+ * event.
+ */
+ WRITE_ONCE(tmc->wakeup, ret);
+
+ trace_tmigr_cpu_new_timer_idle(tmc, nextexp);
+ raw_spin_unlock(&tmc->lock);
+ return ret;
+}
+
+static bool tmigr_inactive_up(struct tmigr_group *group,
+ struct tmigr_group *child,
+ void *ptr)
+{
+ union tmigr_state curstate, newstate, childstate;
+ struct tmigr_walk *data = ptr;
+ bool walk_done;
+ u8 childmask;
+
+ childmask = data->childmask;
+ childstate.state = 0;
+
+ /*
+ * The memory barrier is paired with the cmpxchg() in tmigr_active_up()
+ * to make sure the updates of child and group states are ordered. The
+ * ordering is mandatory, as the group state change depends on the child
+ * state.
+ */
+ curstate.state = atomic_read_acquire(&group->migr_state);
+
+ for (;;) {
+ if (child)
+ childstate.state = atomic_read(&child->migr_state);
+
+ newstate = curstate;
+ walk_done = true;
+
+ /* Reset active bit when the child is no longer active */
+ if (!childstate.active)
+ newstate.active &= ~childmask;
+
+ if (newstate.migrator == childmask) {
+ /*
+ * Find a new migrator for the group, because the child
+ * group is idle!
+ */
+ if (!childstate.active) {
+ unsigned long new_migr_bit, active = newstate.active;
+
+ new_migr_bit = find_first_bit(&active, BIT_CNT);
+
+ if (new_migr_bit != BIT_CNT) {
+ newstate.migrator = BIT(new_migr_bit);
+ } else {
+ newstate.migrator = TMIGR_NONE;
+
+ /* Changes need to be propagated */
+ walk_done = false;
+ }
+ }
+ }
+
+ newstate.seq++;
+
+ WARN_ON_ONCE((newstate.migrator != TMIGR_NONE) && !(newstate.active));
+
+ if (atomic_try_cmpxchg(&group->migr_state, &curstate.state,
+ newstate.state))
+ break;
+
+ /*
+ * The memory barrier is paired with the cmpxchg() in
+ * tmigr_active_up() to make sure the updates of child and group
+ * states are ordered. It is required only when the above
+ * try_cmpxchg() fails.
+ */
+ smp_mb__after_atomic();
+ }
+
+ data->remote = false;
+
+ /* Event Handling */
+ tmigr_update_events(group, child, data);
+
+ if (group->parent && (walk_done == false))
+ data->childmask = group->childmask;
+
+ /*
+ * data->firstexp was set by tmigr_update_events() and contains the
+ * expiry of the first global event which needs to be handled. It
+ * differs from KTIME_MAX if:
+ * - group is the top level group and
+ * - group is idle (which means CPU was the last active CPU in the
+ * hierarchy) and
+ * - there is a pending event in the hierarchy
+ */
+ WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent);
+
+ trace_tmigr_group_set_cpu_inactive(group, newstate, childmask);
+
+ return walk_done;
+}
+
+static u64 __tmigr_cpu_deactivate(struct tmigr_cpu *tmc, u64 nextexp)
+{
+ struct tmigr_walk data = { .nextexp = nextexp,
+ .firstexp = KTIME_MAX,
+ .evt = &tmc->cpuevt,
+ .childmask = tmc->childmask };
+
+ /*
+ * If nextexp is KTIME_MAX, the CPU event will be ignored because the
+ * local timer expires before the global timer, no global timer is set
+ * or CPU goes offline.
+ */
+ if (nextexp != KTIME_MAX)
+ tmc->cpuevt.ignore = false;
+
+ walk_groups(&tmigr_inactive_up, &data, tmc);
+ return data.firstexp;
+}
+
+/**
+ * tmigr_cpu_deactivate() - Put current CPU into inactive state
+ * @nextexp: The next global timer expiry of the current CPU
+ *
+ * Must be called with interrupts disabled.
+ *
+ * Return: the next event expiry of the current CPU or the next event expiry
+ * from the hierarchy if this CPU is the top level migrator or the hierarchy is
+ * completely idle.
+ */
+u64 tmigr_cpu_deactivate(u64 nextexp)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ u64 ret;
+
+ if (tmigr_is_not_available(tmc))
+ return nextexp;
+
+ raw_spin_lock(&tmc->lock);
+
+ ret = __tmigr_cpu_deactivate(tmc, nextexp);
+
+ tmc->idle = true;
+
+ /*
+ * Make sure the reevaluation of timers in idle path will not miss an
+ * event.
+ */
+ WRITE_ONCE(tmc->wakeup, ret);
+
+ trace_tmigr_cpu_idle(tmc, nextexp);
+ raw_spin_unlock(&tmc->lock);
+ return ret;
+}
+
+/**
+ * tmigr_quick_check() - Quick forecast of next tmigr event when CPU wants to
+ * go idle
+ * @nextevt: The next global timer expiry of the current CPU
+ *
+ * Return:
+ * * KTIME_MAX - when it is probable that nothing has to be done (not
+ * the only one in the level 0 group; and if it is the
+ * only one in level 0 group, but there are more than a
+ * single group active on the way to top level)
+ * * nextevt - when CPU is offline and has to handle timer on his own
+ * or when on the way to top in every group only a single
+ * child is active but @nextevt is before the lowest
+ * next_expiry encountered while walking up to top level.
+ * * next_expiry - value of lowest expiry encountered while walking groups
+ * if only a single child is active on each and @nextevt
+ * is after this lowest expiry.
+ */
+u64 tmigr_quick_check(u64 nextevt)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ struct tmigr_group *group = tmc->tmgroup;
+
+ if (tmigr_is_not_available(tmc))
+ return nextevt;
+
+ if (WARN_ON_ONCE(tmc->idle))
+ return nextevt;
+
+ if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->childmask))
+ return KTIME_MAX;
+
+ do {
+ if (!tmigr_check_lonely(group)) {
+ return KTIME_MAX;
+ } else {
+ /*
+ * Since current CPU is active, events may not be sorted
+ * from bottom to the top because the CPU's event is ignored
+ * up to the top and its sibling's events not propagated upwards.
+ * Thus keep track of the lowest observed expiry.
+ */
+ nextevt = min_t(u64, nextevt, READ_ONCE(group->next_expiry));
+ if (!group->parent)
+ return nextevt;
+ }
+ group = group->parent;
+ } while (group);
+
+ return KTIME_MAX;
+}
+
+static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl,
+ int node)
+{
+ union tmigr_state s;
+
+ raw_spin_lock_init(&group->lock);
+
+ group->level = lvl;
+ group->numa_node = lvl < tmigr_crossnode_level ? node : NUMA_NO_NODE;
+
+ group->num_children = 0;
+
+ s.migrator = TMIGR_NONE;
+ s.active = 0;
+ s.seq = 0;
+ atomic_set(&group->migr_state, s.state);
+
+ timerqueue_init_head(&group->events);
+ timerqueue_init(&group->groupevt.nextevt);
+ group->groupevt.nextevt.expires = KTIME_MAX;
+ WRITE_ONCE(group->next_expiry, KTIME_MAX);
+ group->groupevt.ignore = true;
+}
+
+static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node,
+ unsigned int lvl)
+{
+ struct tmigr_group *tmp, *group = NULL;
+
+ lockdep_assert_held(&tmigr_mutex);
+
+ /* Try to attach to an existing group first */
+ list_for_each_entry(tmp, &tmigr_level_list[lvl], list) {
+ /*
+ * If @lvl is below the cross NUMA node level, check whether
+ * this group belongs to the same NUMA node.
+ */
+ if (lvl < tmigr_crossnode_level && tmp->numa_node != node)
+ continue;
+
+ /* Capacity left? */
+ if (tmp->num_children >= TMIGR_CHILDREN_PER_GROUP)
+ continue;
+
+ /*
+ * TODO: A possible further improvement: Make sure that all CPU
+ * siblings end up in the same group of the lowest level of the
+ * hierarchy. Rely on the topology sibling mask would be a
+ * reasonable solution.
+ */
+
+ group = tmp;
+ break;
+ }
+
+ if (group)
+ return group;
+
+ /* Allocate and set up a new group */
+ group = kzalloc_node(sizeof(*group), GFP_KERNEL, node);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ tmigr_init_group(group, lvl, node);
+
+ /* Setup successful. Add it to the hierarchy */
+ list_add(&group->list, &tmigr_level_list[lvl]);
+ trace_tmigr_group_set(group);
+ return group;
+}
+
+static void tmigr_connect_child_parent(struct tmigr_group *child,
+ struct tmigr_group *parent)
+{
+ union tmigr_state childstate;
+
+ raw_spin_lock_irq(&child->lock);
+ raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);
+
+ child->parent = parent;
+ child->childmask = BIT(parent->num_children++);
+
+ raw_spin_unlock(&parent->lock);
+ raw_spin_unlock_irq(&child->lock);
+
+ trace_tmigr_connect_child_parent(child);
+
+ /*
+ * To prevent inconsistent states, active children need to be active in
+ * the new parent as well. Inactive children are already marked inactive
+ * in the parent group:
+ *
+ * * When new groups were created by tmigr_setup_groups() starting from
+ * the lowest level (and not higher then one level below the current
+ * top level), then they are not active. They will be set active when
+ * the new online CPU comes active.
+ *
+ * * But if a new group above the current top level is required, it is
+ * mandatory to propagate the active state of the already existing
+ * child to the new parent. So tmigr_connect_child_parent() is
+ * executed with the formerly top level group (child) and the newly
+ * created group (parent).
+ */
+ childstate.state = atomic_read(&child->migr_state);
+ if (childstate.migrator != TMIGR_NONE) {
+ struct tmigr_walk data;
+
+ data.childmask = child->childmask;
+
+ /*
+ * There is only one new level per time. When connecting the
+ * child and the parent and set the child active when the parent
+ * is inactive, the parent needs to be the uppermost
+ * level. Otherwise there went something wrong!
+ */
+ WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent);
+ }
+}
+
+static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
+{
+ struct tmigr_group *group, *child, **stack;
+ int top = 0, err = 0, i = 0;
+ struct list_head *lvllist;
+
+ stack = kcalloc(tmigr_hierarchy_levels, sizeof(*stack), GFP_KERNEL);
+ if (!stack)
+ return -ENOMEM;
+
+ do {
+ group = tmigr_get_group(cpu, node, i);
+ if (IS_ERR(group)) {
+ err = PTR_ERR(group);
+ break;
+ }
+
+ top = i;
+ stack[i++] = group;
+
+ /*
+ * When booting only less CPUs of a system than CPUs are
+ * available, not all calculated hierarchy levels are required.
+ *
+ * The loop is aborted as soon as the highest level, which might
+ * be different from tmigr_hierarchy_levels, contains only a
+ * single group.
+ */
+ if (group->parent || i == tmigr_hierarchy_levels ||
+ (list_empty(&tmigr_level_list[i]) &&
+ list_is_singular(&tmigr_level_list[i - 1])))
+ break;
+
+ } while (i < tmigr_hierarchy_levels);
+
+ do {
+ group = stack[--i];
+
+ if (err < 0) {
+ list_del(&group->list);
+ kfree(group);
+ continue;
+ }
+
+ WARN_ON_ONCE(i != group->level);
+
+ /*
+ * Update tmc -> group / child -> group connection
+ */
+ if (i == 0) {
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+
+ raw_spin_lock_irq(&group->lock);
+
+ tmc->tmgroup = group;
+ tmc->childmask = BIT(group->num_children++);
+
+ raw_spin_unlock_irq(&group->lock);
+
+ trace_tmigr_connect_cpu_parent(tmc);
+
+ /* There are no children that need to be connected */
+ continue;
+ } else {
+ child = stack[i - 1];
+ tmigr_connect_child_parent(child, group);
+ }
+
+ /* check if uppermost level was newly created */
+ if (top != i)
+ continue;
+
+ WARN_ON_ONCE(top == 0);
+
+ lvllist = &tmigr_level_list[top];
+ if (group->num_children == 1 && list_is_singular(lvllist)) {
+ lvllist = &tmigr_level_list[top - 1];
+ list_for_each_entry(child, lvllist, list) {
+ if (child->parent)
+ continue;
+
+ tmigr_connect_child_parent(child, group);
+ }
+ }
+ } while (i > 0);
+
+ kfree(stack);
+
+ return err;
+}
+
+static int tmigr_add_cpu(unsigned int cpu)
+{
+ int node = cpu_to_node(cpu);
+ int ret;
+
+ mutex_lock(&tmigr_mutex);
+ ret = tmigr_setup_groups(cpu, node);
+ mutex_unlock(&tmigr_mutex);
+
+ return ret;
+}
+
+static int tmigr_cpu_online(unsigned int cpu)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ int ret;
+
+ /* First online attempt? Initialize CPU data */
+ if (!tmc->tmgroup) {
+ raw_spin_lock_init(&tmc->lock);
+
+ ret = tmigr_add_cpu(cpu);
+ if (ret < 0)
+ return ret;
+
+ if (tmc->childmask == 0)
+ return -EINVAL;
+
+ timerqueue_init(&tmc->cpuevt.nextevt);
+ tmc->cpuevt.nextevt.expires = KTIME_MAX;
+ tmc->cpuevt.ignore = true;
+ tmc->cpuevt.cpu = cpu;
+
+ tmc->remote = false;
+ WRITE_ONCE(tmc->wakeup, KTIME_MAX);
+ }
+ raw_spin_lock_irq(&tmc->lock);
+ trace_tmigr_cpu_online(tmc);
+ tmc->idle = timer_base_is_idle();
+ if (!tmc->idle)
+ __tmigr_cpu_activate(tmc);
+ tmc->online = true;
+ raw_spin_unlock_irq(&tmc->lock);
+ return 0;
+}
+
+/*
+ * tmigr_trigger_active() - trigger a CPU to become active again
+ *
+ * This function is executed on a CPU which is part of cpu_online_mask, when the
+ * last active CPU in the hierarchy is offlining. With this, it is ensured that
+ * the other CPU is active and takes over the migrator duty.
+ */
+static long tmigr_trigger_active(void *unused)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+
+ WARN_ON_ONCE(!tmc->online || tmc->idle);
+
+ return 0;
+}
+
+static int tmigr_cpu_offline(unsigned int cpu)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ int migrator;
+ u64 firstexp;
+
+ raw_spin_lock_irq(&tmc->lock);
+ tmc->online = false;
+ WRITE_ONCE(tmc->wakeup, KTIME_MAX);
+
+ /*
+ * CPU has to handle the local events on his own, when on the way to
+ * offline; Therefore nextevt value is set to KTIME_MAX
+ */
+ firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX);
+ trace_tmigr_cpu_offline(tmc);
+ raw_spin_unlock_irq(&tmc->lock);
+
+ if (firstexp != KTIME_MAX) {
+ migrator = cpumask_any_but(cpu_online_mask, cpu);
+ work_on_cpu(migrator, tmigr_trigger_active, NULL);
+ }
+
+ return 0;
+}
+
+static int __init tmigr_init(void)
+{
+ unsigned int cpulvl, nodelvl, cpus_per_node, i;
+ unsigned int nnodes = num_possible_nodes();
+ unsigned int ncpus = num_possible_cpus();
+ int ret = -ENOMEM;
+
+ BUILD_BUG_ON_NOT_POWER_OF_2(TMIGR_CHILDREN_PER_GROUP);
+
+ /* Nothing to do if running on UP */
+ if (ncpus == 1)
+ return 0;
+
+ /*
+ * Calculate the required hierarchy levels. Unfortunately there is no
+ * reliable information available, unless all possible CPUs have been
+ * brought up and all NUMA nodes are populated.
+ *
+ * Estimate the number of levels with the number of possible nodes and
+ * the number of possible CPUs. Assume CPUs are spread evenly across
+ * nodes. We cannot rely on cpumask_of_node() because it only works for
+ * online CPUs.
+ */
+ cpus_per_node = DIV_ROUND_UP(ncpus, nnodes);
+
+ /* Calc the hierarchy levels required to hold the CPUs of a node */
+ cpulvl = DIV_ROUND_UP(order_base_2(cpus_per_node),
+ ilog2(TMIGR_CHILDREN_PER_GROUP));
+
+ /* Calculate the extra levels to connect all nodes */
+ nodelvl = DIV_ROUND_UP(order_base_2(nnodes),
+ ilog2(TMIGR_CHILDREN_PER_GROUP));
+
+ tmigr_hierarchy_levels = cpulvl + nodelvl;
+
+ /*
+ * If a NUMA node spawns more than one CPU level group then the next
+ * level(s) of the hierarchy contains groups which handle all CPU groups
+ * of the same NUMA node. The level above goes across NUMA nodes. Store
+ * this information for the setup code to decide in which level node
+ * matching is no longer required.
+ */
+ tmigr_crossnode_level = cpulvl;
+
+ tmigr_level_list = kcalloc(tmigr_hierarchy_levels, sizeof(struct list_head), GFP_KERNEL);
+ if (!tmigr_level_list)
+ goto err;
+
+ for (i = 0; i < tmigr_hierarchy_levels; i++)
+ INIT_LIST_HEAD(&tmigr_level_list[i]);
+
+ pr_info("Timer migration: %d hierarchy levels; %d children per group;"
+ " %d crossnode level\n",
+ tmigr_hierarchy_levels, TMIGR_CHILDREN_PER_GROUP,
+ tmigr_crossnode_level);
+
+ ret = cpuhp_setup_state(CPUHP_AP_TMIGR_ONLINE, "tmigr:online",
+ tmigr_cpu_online, tmigr_cpu_offline);
+ if (ret)
+ goto err;
+
+ return 0;
+
+err:
+ pr_err("Timer migration setup failed\n");
+ return ret;
+}
+late_initcall(tmigr_init);
diff --git a/kernel/time/timer_migration.h b/kernel/time/timer_migration.h
new file mode 100644
index 000000000000..6c37d94a37d9
--- /dev/null
+++ b/kernel/time/timer_migration.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _KERNEL_TIME_MIGRATION_H
+#define _KERNEL_TIME_MIGRATION_H
+
+/* Per group capacity. Must be a power of 2! */
+#define TMIGR_CHILDREN_PER_GROUP 8
+
+/**
+ * struct tmigr_event - a timer event associated to a CPU
+ * @nextevt: The node to enqueue an event in the parent group queue
+ * @cpu: The CPU to which this event belongs
+ * @ignore: Hint whether the event could be ignored; it is set when
+ * CPU or group is active;
+ */
+struct tmigr_event {
+ struct timerqueue_node nextevt;
+ unsigned int cpu;
+ bool ignore;
+};
+
+/**
+ * struct tmigr_group - timer migration hierarchy group
+ * @lock: Lock protecting the event information and group hierarchy
+ * information during setup
+ * @parent: Pointer to the parent group
+ * @groupevt: Next event of the group which is only used when the
+ * group is !active. The group event is then queued into
+ * the parent timer queue.
+ * Ignore bit of @groupevt is set when the group is active.
+ * @next_expiry: Base monotonic expiry time of the next event of the
+ * group; It is used for the racy lockless check whether a
+ * remote expiry is required; it is always reliable
+ * @events: Timer queue for child events queued in the group
+ * @migr_state: State of the group (see union tmigr_state)
+ * @level: Hierarchy level of the group; Required during setup
+ * @numa_node: Required for setup only to make sure CPU and low level
+ * group information is NUMA local. It is set to NUMA node
+ * as long as the group level is per NUMA node (level <
+ * tmigr_crossnode_level); otherwise it is set to
+ * NUMA_NO_NODE
+ * @num_children: Counter of group children to make sure the group is only
+ * filled with TMIGR_CHILDREN_PER_GROUP; Required for setup
+ * only
+ * @childmask: childmask of the group in the parent group; is set
+ * during setup and will never change; can be read
+ * lockless
+ * @list: List head that is added to the per level
+ * tmigr_level_list; is required during setup when a
+ * new group needs to be connected to the existing
+ * hierarchy groups
+ */
+struct tmigr_group {
+ raw_spinlock_t lock;
+ struct tmigr_group *parent;
+ struct tmigr_event groupevt;
+ u64 next_expiry;
+ struct timerqueue_head events;
+ atomic_t migr_state;
+ unsigned int level;
+ int numa_node;
+ unsigned int num_children;
+ u8 childmask;
+ struct list_head list;
+};
+
+/**
+ * struct tmigr_cpu - timer migration per CPU group
+ * @lock: Lock protecting the tmigr_cpu group information
+ * @online: Indicates whether the CPU is online; In deactivate path
+ * it is required to know whether the migrator in the top
+ * level group is to be set offline, while a timer is
+ * pending. Then another online CPU needs to be notified to
+ * take over the migrator role. Furthermore the information
+ * is required in CPU hotplug path as the CPU is able to go
+ * idle before the timer migration hierarchy hotplug AP is
+ * reached. During this phase, the CPU has to handle the
+ * global timers on its own and must not act as a migrator.
+ * @idle: Indicates whether the CPU is idle in the timer migration
+ * hierarchy
+ * @remote: Is set when timers of the CPU are expired remotely
+ * @tmgroup: Pointer to the parent group
+ * @childmask: childmask of tmigr_cpu in the parent group
+ * @wakeup: Stores the first timer when the timer migration
+ * hierarchy is completely idle and remote expiry was done;
+ * is returned to timer code in the idle path and is only
+ * used in idle path.
+ * @cpuevt: CPU event which could be enqueued into the parent group
+ */
+struct tmigr_cpu {
+ raw_spinlock_t lock;
+ bool online;
+ bool idle;
+ bool remote;
+ struct tmigr_group *tmgroup;
+ u8 childmask;
+ u64 wakeup;
+ struct tmigr_event cpuevt;
+};
+
+/**
+ * union tmigr_state - state of tmigr_group
+ * @state: Combined version of the state - only used for atomic
+ * read/cmpxchg function
+ * @struct: Split version of the state - only use the struct members to
+ * update information to stay independent of endianness
+ */
+union tmigr_state {
+ u32 state;
+ /**
+ * struct - split state of tmigr_group
+ * @active: Contains each childmask bit of the active children
+ * @migrator: Contains childmask of the child which is migrator
+ * @seq: Sequence counter needs to be increased when an update
+ * to the tmigr_state is done. It prevents a race when
+ * updates in the child groups are propagated in changed
+ * order. Detailed information about the scenario is
+ * given in the documentation at the begin of
+ * timer_migration.c.
+ */
+ struct {
+ u8 active;
+ u8 migrator;
+ u16 seq;
+ } __packed;
+};
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern void tmigr_handle_remote(void);
+extern bool tmigr_requires_handle_remote(void);
+extern void tmigr_cpu_activate(void);
+extern u64 tmigr_cpu_deactivate(u64 nextevt);
+extern u64 tmigr_cpu_new_timer(u64 nextevt);
+extern u64 tmigr_quick_check(u64 nextevt);
+#else
+static inline void tmigr_handle_remote(void) { }
+static inline bool tmigr_requires_handle_remote(void) { return false; }
+static inline void tmigr_cpu_activate(void) { }
+#endif
+
+#endif
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7ac6c52b25eb..0a5c4efc73c3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1412,14 +1412,14 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
__bpf_kfunc_end_defs();
-BTF_SET8_START(key_sig_kfunc_set)
+BTF_KFUNCS_START(key_sig_kfunc_set)
BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
#endif
-BTF_SET8_END(key_sig_kfunc_set)
+BTF_KFUNCS_END(key_sig_kfunc_set)
static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = {
.owner = THIS_MODULE,
@@ -1475,9 +1475,9 @@ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
__bpf_kfunc_end_defs();
-BTF_SET8_START(fs_kfunc_set_ids)
+BTF_KFUNCS_START(fs_kfunc_set_ids)
BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
-BTF_SET8_END(fs_kfunc_set_ids)
+BTF_KFUNCS_END(fs_kfunc_set_ids)
static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
@@ -1629,7 +1629,7 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto();
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -2679,6 +2679,7 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
struct bpf_link_info *info)
{
+ u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies);
u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs);
struct bpf_kprobe_multi_link *kmulti_link;
u32 ucount = info->kprobe_multi.count;
@@ -2686,6 +2687,8 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
if (!uaddrs ^ !ucount)
return -EINVAL;
+ if (ucookies && !ucount)
+ return -EINVAL;
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
info->kprobe_multi.count = kmulti_link->cnt;
@@ -2699,6 +2702,18 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
else
ucount = kmulti_link->cnt;
+ if (ucookies) {
+ if (kmulti_link->cookies) {
+ if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64)))
+ return -EFAULT;
+ } else {
+ for (i = 0; i < ucount; i++) {
+ if (put_user(0, ucookies + i))
+ return -EFAULT;
+ }
+ }
+ }
+
if (kallsyms_show_value(current_cred())) {
if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64)))
return -EFAULT;
@@ -3241,7 +3256,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
.uprobe = uprobe,
};
struct bpf_prog *prog = link->link.prog;
- bool sleepable = prog->aux->sleepable;
+ bool sleepable = prog->sleepable;
struct bpf_run_ctx *old_run_ctx;
int err = 0;
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 6cd2a4e3afb8..9ff018245840 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -189,9 +189,6 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
{
int size;
- if (num <= 0)
- return -EINVAL;
-
if (!fp->exit_handler) {
fp->rethook = NULL;
return 0;
@@ -199,15 +196,16 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
/* Initialize rethook if needed */
if (fp->nr_maxactive)
- size = fp->nr_maxactive;
+ num = fp->nr_maxactive;
else
- size = num * num_possible_cpus() * 2;
- if (size <= 0)
+ num *= num_possible_cpus() * 2;
+ if (num <= 0)
return -EINVAL;
+ size = sizeof(struct fprobe_rethook_node) + fp->entry_data_size;
+
/* Initialize rethook */
- fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler,
- sizeof(struct fprobe_rethook_node), size);
+ fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, size, num);
if (IS_ERR(fp->rethook))
return PTR_ERR(fp->rethook);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b01ae7d36021..83ba342aef31 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5325,7 +5325,17 @@ static LIST_HEAD(ftrace_direct_funcs);
static int register_ftrace_function_nolock(struct ftrace_ops *ops);
+/*
+ * If there are multiple ftrace_ops, use SAVE_REGS by default, so that direct
+ * call will be jumped from ftrace_regs_caller. Only if the architecture does
+ * not support ftrace_regs_caller but direct_call, use SAVE_ARGS so that it
+ * jumps from ftrace_caller for multiple ftrace_ops.
+ */
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS
#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS)
+#else
+#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS)
+#endif
static int check_direct_multi(struct ftrace_ops *ops)
{
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 13aaf5e85b81..3103a484182e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -384,7 +384,6 @@ struct rb_irq_work {
struct irq_work work;
wait_queue_head_t waiters;
wait_queue_head_t full_waiters;
- long wait_index;
bool waiters_pending;
bool full_waiters_pending;
bool wakeup_full;
@@ -756,8 +755,19 @@ static void rb_wake_up_waiters(struct irq_work *work)
wake_up_all(&rbwork->waiters);
if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
+ /* Only cpu_buffer sets the above flags */
+ struct ring_buffer_per_cpu *cpu_buffer =
+ container_of(rbwork, struct ring_buffer_per_cpu, irq_work);
+
+ /* Called from interrupt context */
+ raw_spin_lock(&cpu_buffer->reader_lock);
rbwork->wakeup_full = false;
rbwork->full_waiters_pending = false;
+
+ /* Waking up all waiters, they will reset the shortest full */
+ cpu_buffer->shortest_full = 0;
+ raw_spin_unlock(&cpu_buffer->reader_lock);
+
wake_up_all(&rbwork->full_waiters);
}
}
@@ -798,14 +808,40 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
rbwork = &cpu_buffer->irq_work;
}
- rbwork->wait_index++;
- /* make sure the waiters see the new index */
- smp_wmb();
-
/* This can be called in any context */
irq_work_queue(&rbwork->work);
}
+static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ bool ret = false;
+
+ /* Reads of all CPUs always waits for any data */
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ return !ring_buffer_empty(buffer);
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ if (!ring_buffer_empty_cpu(buffer, cpu)) {
+ unsigned long flags;
+ bool pagebusy;
+
+ if (!full)
+ return true;
+
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+ ret = !pagebusy && full_hit(buffer, cpu, full);
+
+ if (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full > full)
+ cpu_buffer->shortest_full = full;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ }
+ return ret;
+}
+
/**
* ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on
@@ -821,7 +857,6 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
struct ring_buffer_per_cpu *cpu_buffer;
DEFINE_WAIT(wait);
struct rb_irq_work *work;
- long wait_index;
int ret = 0;
/*
@@ -840,81 +875,54 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
work = &cpu_buffer->irq_work;
}
- wait_index = READ_ONCE(work->wait_index);
-
- while (true) {
- if (full)
- prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
- else
- prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
-
- /*
- * The events can happen in critical sections where
- * checking a work queue can cause deadlocks.
- * After adding a task to the queue, this flag is set
- * only to notify events to try to wake up the queue
- * using irq_work.
- *
- * We don't clear it even if the buffer is no longer
- * empty. The flag only causes the next event to run
- * irq_work to do the work queue wake up. The worse
- * that can happen if we race with !trace_empty() is that
- * an event will cause an irq_work to try to wake up
- * an empty queue.
- *
- * There's no reason to protect this flag either, as
- * the work queue and irq_work logic will do the necessary
- * synchronization for the wake ups. The only thing
- * that is necessary is that the wake up happens after
- * a task has been queued. It's OK for spurious wake ups.
- */
- if (full)
- work->full_waiters_pending = true;
- else
- work->waiters_pending = true;
-
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
-
- if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
- break;
-
- if (cpu != RING_BUFFER_ALL_CPUS &&
- !ring_buffer_empty_cpu(buffer, cpu)) {
- unsigned long flags;
- bool pagebusy;
- bool done;
-
- if (!full)
- break;
-
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
- pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
- done = !pagebusy && full_hit(buffer, cpu, full);
+ if (full)
+ prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
+ else
+ prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
- if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full > full)
- cpu_buffer->shortest_full = full;
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- if (done)
- break;
- }
+ /*
+ * The events can happen in critical sections where
+ * checking a work queue can cause deadlocks.
+ * After adding a task to the queue, this flag is set
+ * only to notify events to try to wake up the queue
+ * using irq_work.
+ *
+ * We don't clear it even if the buffer is no longer
+ * empty. The flag only causes the next event to run
+ * irq_work to do the work queue wake up. The worse
+ * that can happen if we race with !trace_empty() is that
+ * an event will cause an irq_work to try to wake up
+ * an empty queue.
+ *
+ * There's no reason to protect this flag either, as
+ * the work queue and irq_work logic will do the necessary
+ * synchronization for the wake ups. The only thing
+ * that is necessary is that the wake up happens after
+ * a task has been queued. It's OK for spurious wake ups.
+ */
+ if (full)
+ work->full_waiters_pending = true;
+ else
+ work->waiters_pending = true;
- schedule();
+ if (rb_watermark_hit(buffer, cpu, full))
+ goto out;
- /* Make sure to see the new wait index */
- smp_rmb();
- if (wait_index != work->wait_index)
- break;
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
}
+ schedule();
+ out:
if (full)
finish_wait(&work->full_waiters, &wait);
else
finish_wait(&work->waiters, &wait);
+ if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
+ ret = -EINTR;
+
return ret;
}
@@ -937,28 +945,33 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table, int full)
{
struct ring_buffer_per_cpu *cpu_buffer;
- struct rb_irq_work *work;
+ struct rb_irq_work *rbwork;
if (cpu == RING_BUFFER_ALL_CPUS) {
- work = &buffer->irq_work;
+ rbwork = &buffer->irq_work;
full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
- return -EINVAL;
+ return EPOLLERR;
cpu_buffer = buffer->buffers[cpu];
- work = &cpu_buffer->irq_work;
+ rbwork = &cpu_buffer->irq_work;
}
if (full) {
- poll_wait(filp, &work->full_waiters, poll_table);
- work->full_waiters_pending = true;
+ unsigned long flags;
+
+ poll_wait(filp, &rbwork->full_waiters, poll_table);
+
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ rbwork->full_waiters_pending = true;
if (!cpu_buffer->shortest_full ||
cpu_buffer->shortest_full > full)
cpu_buffer->shortest_full = full;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
} else {
- poll_wait(filp, &work->waiters, poll_table);
- work->waiters_pending = true;
+ poll_wait(filp, &rbwork->waiters, poll_table);
+ rbwork->waiters_pending = true;
}
/*
@@ -1009,7 +1022,7 @@ static inline u64 rb_time_stamp(struct trace_buffer *buffer)
u64 ts;
/* Skip retpolines :-( */
- if (IS_ENABLED(CONFIG_RETPOLINE) && likely(buffer->clock == trace_clock_local))
+ if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && likely(buffer->clock == trace_clock_local))
ts = trace_clock_local();
else
ts = buffer->clock();
@@ -5877,6 +5890,10 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
if (psize <= BUF_PAGE_HDR_SIZE)
return -EINVAL;
+ /* Size of a subbuf cannot be greater than the write counter */
+ if (psize > RB_WRITE_MASK + 1)
+ return -EINVAL;
+
old_order = buffer->subbuf_order;
old_size = buffer->subbuf_size;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a7c6fd934e9..c9c898307348 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -39,6 +39,7 @@
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/panic_notifier.h>
+#include <linux/kmemleak.h>
#include <linux/poll.h>
#include <linux/nmi.h>
#include <linux/fs.h>
@@ -1532,7 +1533,7 @@ void disable_trace_on_warning(void)
bool tracer_tracing_is_on(struct trace_array *tr)
{
if (tr->array_buffer.buffer)
- return ring_buffer_record_is_on(tr->array_buffer.buffer);
+ return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
return !tr->buffer_disabled;
}
@@ -2320,7 +2321,7 @@ struct saved_cmdlines_buffer {
unsigned *map_cmdline_to_pid;
unsigned cmdline_num;
int cmdline_idx;
- char *saved_cmdlines;
+ char saved_cmdlines[];
};
static struct saved_cmdlines_buffer *savedcmd;
@@ -2334,47 +2335,60 @@ static inline void set_cmdline(int idx, const char *cmdline)
strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
}
-static int allocate_cmdlines_buffer(unsigned int val,
- struct saved_cmdlines_buffer *s)
+static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
+{
+ int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
+
+ kfree(s->map_cmdline_to_pid);
+ kmemleak_free(s);
+ free_pages((unsigned long)s, order);
+}
+
+static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
{
+ struct saved_cmdlines_buffer *s;
+ struct page *page;
+ int orig_size, size;
+ int order;
+
+ /* Figure out how much is needed to hold the given number of cmdlines */
+ orig_size = sizeof(*s) + val * TASK_COMM_LEN;
+ order = get_order(orig_size);
+ size = 1 << (order + PAGE_SHIFT);
+ page = alloc_pages(GFP_KERNEL, order);
+ if (!page)
+ return NULL;
+
+ s = page_address(page);
+ kmemleak_alloc(s, size, 1, GFP_KERNEL);
+ memset(s, 0, sizeof(*s));
+
+ /* Round up to actual allocation */
+ val = (size - sizeof(*s)) / TASK_COMM_LEN;
+ s->cmdline_num = val;
+
s->map_cmdline_to_pid = kmalloc_array(val,
sizeof(*s->map_cmdline_to_pid),
GFP_KERNEL);
- if (!s->map_cmdline_to_pid)
- return -ENOMEM;
-
- s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
- if (!s->saved_cmdlines) {
- kfree(s->map_cmdline_to_pid);
- return -ENOMEM;
+ if (!s->map_cmdline_to_pid) {
+ free_saved_cmdlines_buffer(s);
+ return NULL;
}
s->cmdline_idx = 0;
- s->cmdline_num = val;
memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
sizeof(s->map_pid_to_cmdline));
memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
val * sizeof(*s->map_cmdline_to_pid));
- return 0;
+ return s;
}
static int trace_create_savedcmd(void)
{
- int ret;
-
- savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
- if (!savedcmd)
- return -ENOMEM;
+ savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
- ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
- if (ret < 0) {
- kfree(savedcmd);
- savedcmd = NULL;
- return -ENOMEM;
- }
-
- return 0;
+ return savedcmd ? 0 : -ENOMEM;
}
int is_tracing_stopped(void)
@@ -6056,26 +6070,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
-{
- kfree(s->saved_cmdlines);
- kfree(s->map_cmdline_to_pid);
- kfree(s);
-}
-
static int tracing_resize_saved_cmdlines(unsigned int val)
{
struct saved_cmdlines_buffer *s, *savedcmd_temp;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = allocate_cmdlines_buffer(val);
if (!s)
return -ENOMEM;
- if (allocate_cmdlines_buffer(val, s) < 0) {
- kfree(s);
- return -ENOMEM;
- }
-
preempt_disable();
arch_spin_lock(&trace_cmdline_lock);
savedcmd_temp = savedcmd;
@@ -7291,6 +7293,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
return 0;
}
+#define TRACE_MARKER_MAX_SIZE 4096
+
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
@@ -7318,6 +7322,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if ((ssize_t)cnt < 0)
return -EINVAL;
+ if (cnt > TRACE_MARKER_MAX_SIZE)
+ cnt = TRACE_MARKER_MAX_SIZE;
+
meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
again:
size = cnt + meta_size;
@@ -7326,11 +7333,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (cnt < FAULTED_SIZE)
size += FAULTED_SIZE - cnt;
- if (size > TRACE_SEQ_BUFFER_SIZE) {
- cnt -= size - TRACE_SEQ_BUFFER_SIZE;
- goto again;
- }
-
buffer = tr->array_buffer.buffer;
event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
tracing_gen_ctx());
@@ -8391,6 +8393,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
return size;
}
+static int tracing_buffers_flush(struct file *file, fl_owner_t id)
+{
+ struct ftrace_buffer_info *info = file->private_data;
+ struct trace_iterator *iter = &info->iter;
+
+ iter->wait_index++;
+ /* Make sure the waiters see the new wait_index */
+ smp_wmb();
+
+ ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
+
+ return 0;
+}
+
static int tracing_buffers_release(struct inode *inode, struct file *file)
{
struct ftrace_buffer_info *info = file->private_data;
@@ -8402,12 +8418,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
__trace_array_put(iter->tr);
- iter->wait_index++;
- /* Make sure the waiters see the new wait_index */
- smp_wmb();
-
- ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
-
if (info->spare)
ring_buffer_free_read_page(iter->array_buffer->buffer,
info->spare_cpu, info->spare);
@@ -8623,6 +8633,7 @@ static const struct file_operations tracing_buffers_fops = {
.read = tracing_buffers_read,
.poll = tracing_buffers_poll,
.release = tracing_buffers_release,
+ .flush = tracing_buffers_flush,
.splice_read = tracing_buffers_splice_read,
.unlocked_ioctl = tracing_buffers_ioctl,
.llseek = no_llseek,
diff --git a/kernel/trace/trace_btf.c b/kernel/trace/trace_btf.c
index ca224d53bfdc..5bbdbcbbde3c 100644
--- a/kernel/trace/trace_btf.c
+++ b/kernel/trace/trace_btf.c
@@ -91,8 +91,8 @@ retry:
for_each_member(i, type, member) {
if (!member->name_off) {
/* Anonymous union/struct: push it for later use */
- type = btf_type_skip_modifiers(btf, member->type, &tid);
- if (type && top < BTF_ANON_STACK_MAX) {
+ if (btf_type_skip_modifiers(btf, member->type, &tid) &&
+ top < BTF_ANON_STACK_MAX) {
anon_stack[top].tid = tid;
anon_stack[top++].offset =
cur_offset + member->offset;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index e7af286af4f1..c82b401a294d 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -441,8 +441,9 @@ static unsigned int trace_string(struct synth_trace_event *entry,
if (is_dynamic) {
union trace_synth_field *data = &entry->fields[*n_u64];
+ len = fetch_store_strlen((unsigned long)str_val);
data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size;
- data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val);
+ data->as_dynamic.len = len;
ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 46439e3bcec4..b33c3861fbbb 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1470,8 +1470,10 @@ register_snapshot_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
- if (tracing_alloc_snapshot_instance(file->tr) != 0)
- return 0;
+ int ret = tracing_alloc_snapshot_instance(file->tr);
+
+ if (ret < 0)
+ return ret;
return register_trigger(glob, data, file);
}
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index bd0d01d00fb9..a8e28f9b9271 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2444,6 +2444,9 @@ static int timerlat_fd_open(struct inode *inode, struct file *file)
tlat = this_cpu_tmr_var();
tlat->count = 0;
+ hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
+ tlat->timer.function = timerlat_irq;
+
migrate_enable();
return 0;
};
@@ -2526,9 +2529,6 @@ timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
tlat->tracing_thread = false;
tlat->kthread = current;
- hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
- tlat->timer.function = timerlat_irq;
-
/* Annotate now to drift new period */
tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3e7fa44dc2b2..d8b302d01083 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1587,12 +1587,11 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
{
struct print_entry *field;
struct trace_seq *s = &iter->seq;
- int max = iter->ent_size - offsetof(struct print_entry, buf);
trace_assign_type(field, iter->ent);
seq_print_ip_sym(s, field->ip, flags);
- trace_seq_printf(s, ": %.*s", max, field->buf);
+ trace_seq_printf(s, ": %s", field->buf);
return trace_handle_return(s);
}
@@ -1601,11 +1600,10 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct print_entry *field;
- int max = iter->ent_size - offsetof(struct print_entry, buf);
trace_assign_type(field, iter->ent);
- trace_seq_printf(&iter->seq, "# %lx %.*s", field->ip, max, field->buf);
+ trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf);
return trace_handle_return(&iter->seq);
}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 4dc74d73fc1d..34289f9c6707 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1159,9 +1159,12 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
if (!(ctx->flags & TPARG_FL_TEVENT) &&
(strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
strncmp(arg, "\\\"", 2) == 0)) {
- /* The type of $comm must be "string", and not an array. */
- if (parg->count || (t && strcmp(t, "string")))
+ /* The type of $comm must be "string", and not an array type. */
+ if (parg->count || (t && strcmp(t, "string"))) {
+ trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0),
+ NEED_STRING_TYPE);
goto out;
+ }
parg->type = find_fetch_type("string", ctx->flags);
} else
parg->type = find_fetch_type(t, ctx->flags);
@@ -1169,18 +1172,6 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), BAD_TYPE);
goto out;
}
- parg->offset = *size;
- *size += parg->type->size * (parg->count ?: 1);
-
- ret = -ENOMEM;
- if (parg->count) {
- len = strlen(parg->type->fmttype) + 6;
- parg->fmt = kmalloc(len, GFP_KERNEL);
- if (!parg->fmt)
- goto out;
- snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
- parg->count);
- }
code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL);
if (!code)
@@ -1204,6 +1195,19 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
goto fail;
}
}
+ parg->offset = *size;
+ *size += parg->type->size * (parg->count ?: 1);
+
+ if (parg->count) {
+ len = strlen(parg->type->fmttype) + 6;
+ parg->fmt = kmalloc(len, GFP_KERNEL);
+ if (!parg->fmt) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
+ parg->count);
+ }
ret = -EINVAL;
/* Store operation */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 850d9ecb6765..c1877d018269 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -515,7 +515,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(BAD_HYPHEN, "Failed to parse single hyphen. Forgot '>'?"), \
C(NO_BTF_FIELD, "This field is not found."), \
C(BAD_BTF_TID, "Failed to get BTF type info."),\
- C(BAD_TYPE4STR, "This type does not fit for string."),
+ C(BAD_TYPE4STR, "This type does not fit for string."),\
+ C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"),
#undef C
#define C(a, b) TP_ERR_##a
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index c774e560f2f9..a4dcf0f24352 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
}
memcpy(elt->key, key, map->key_size);
- entry->val = elt;
+ /*
+ * Ensure the initialization is visible and
+ * publish the elt.
+ */
+ smp_wmb();
+ WRITE_ONCE(entry->val, elt);
atomic64_inc(&map->hits);
return entry->val;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 76e60faed892..bf2bdac46843 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -29,6 +29,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
+#include <linux/interrupt.h>
#include <linux/signal.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
@@ -53,10 +54,11 @@
#include <linux/nmi.h>
#include <linux/kvm_para.h>
#include <linux/delay.h>
+#include <linux/irq_work.h>
#include "workqueue_internal.h"
-enum {
+enum worker_pool_flags {
/*
* worker_pool flags
*
@@ -72,10 +74,17 @@ enum {
* Note that DISASSOCIATED should be flipped only while holding
* wq_pool_attach_mutex to avoid changing binding state while
* worker_attach_to_pool() is in progress.
+ *
+ * As there can only be one concurrent BH execution context per CPU, a
+ * BH pool is per-CPU and always DISASSOCIATED.
*/
- POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
+ POOL_BH = 1 << 0, /* is a BH pool */
+ POOL_MANAGER_ACTIVE = 1 << 1, /* being managed */
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
+ POOL_BH_DRAINING = 1 << 3, /* draining after CPU offline */
+};
+enum worker_flags {
/* worker flags */
WORKER_DIE = 1 << 1, /* die die die */
WORKER_IDLE = 1 << 2, /* is idle */
@@ -86,7 +95,13 @@ enum {
WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
WORKER_UNBOUND | WORKER_REBOUND,
+};
+enum work_cancel_flags {
+ WORK_CANCEL_DELAYED = 1 << 0, /* canceling a delayed_work */
+};
+
+enum wq_internal_consts {
NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */
UNBOUND_POOL_HASH_ORDER = 6, /* hashed by pool->attrs */
@@ -108,10 +123,18 @@ enum {
RESCUER_NICE_LEVEL = MIN_NICE,
HIGHPRI_NICE_LEVEL = MIN_NICE,
- WQ_NAME_LEN = 24,
+ WQ_NAME_LEN = 32,
};
/*
+ * We don't want to trap softirq for too long. See MAX_SOFTIRQ_TIME and
+ * MAX_SOFTIRQ_RESTART in kernel/softirq.c. These are macros because
+ * msecs_to_jiffies() can't be an initializer.
+ */
+#define BH_WORKER_JIFFIES msecs_to_jiffies(2)
+#define BH_WORKER_RESTARTS 10
+
+/*
* Structure fields follow one of the following exclusion rules.
*
* I: Modifiable by initialization/destruction paths and read-only for
@@ -122,6 +145,9 @@ enum {
*
* L: pool->lock protected. Access with pool->lock held.
*
+ * LN: pool->lock and wq_node_nr_active->lock protected for writes. Either for
+ * reads.
+ *
* K: Only modified by worker while holding pool->lock. Can be safely read by
* self, while holding pool->lock or from IRQ context if %current is the
* kworker.
@@ -143,6 +169,9 @@ enum {
*
* WR: wq->mutex protected for writes. RCU protected for reads.
*
+ * WO: wq->mutex protected for writes. Updated with WRITE_ONCE() and can be read
+ * with READ_ONCE() without locking.
+ *
* MD: wq_mayday_lock protected.
*
* WD: Used internally by the watchdog.
@@ -219,7 +248,7 @@ enum pool_workqueue_stats {
};
/*
- * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
+ * The per-pool workqueue. While queued, bits below WORK_PWQ_SHIFT
* of work_struct->data are used for flags and the remaining high bits
* point to the pwq; thus, pwqs need to be aligned at two's power of the
* number of flag bits.
@@ -232,6 +261,7 @@ struct pool_workqueue {
int refcnt; /* L: reference count */
int nr_in_flight[WORK_NR_COLORS];
/* L: nr of in_flight works */
+ bool plugged; /* L: execution suspended */
/*
* nr_active management and WORK_STRUCT_INACTIVE:
@@ -240,18 +270,18 @@ struct pool_workqueue {
* pwq->inactive_works instead of pool->worklist and marked with
* WORK_STRUCT_INACTIVE.
*
- * All work items marked with WORK_STRUCT_INACTIVE do not participate
- * in pwq->nr_active and all work items in pwq->inactive_works are
- * marked with WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE
- * work items are in pwq->inactive_works. Some of them are ready to
- * run in pool->worklist or worker->scheduled. Those work itmes are
- * only struct wq_barrier which is used for flush_work() and should
- * not participate in pwq->nr_active. For non-barrier work item, it
- * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
+ * All work items marked with WORK_STRUCT_INACTIVE do not participate in
+ * nr_active and all work items in pwq->inactive_works are marked with
+ * WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE work items are
+ * in pwq->inactive_works. Some of them are ready to run in
+ * pool->worklist or worker->scheduled. Those work itmes are only struct
+ * wq_barrier which is used for flush_work() and should not participate
+ * in nr_active. For non-barrier work item, it is marked with
+ * WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
*/
int nr_active; /* L: nr of active works */
- int max_active; /* L: max active works */
struct list_head inactive_works; /* L: inactive works */
+ struct list_head pending_node; /* LN: node on wq_node_nr_active->pending_pwqs */
struct list_head pwqs_node; /* WR: node on wq->pwqs */
struct list_head mayday_node; /* MD: node on wq->maydays */
@@ -265,7 +295,7 @@ struct pool_workqueue {
*/
struct kthread_work release_work;
struct rcu_head rcu;
-} __aligned(1 << WORK_STRUCT_FLAG_BITS);
+} __aligned(1 << WORK_STRUCT_PWQ_SHIFT);
/*
* Structure used to wait for workqueue flush.
@@ -279,6 +309,26 @@ struct wq_flusher {
struct wq_device;
/*
+ * Unlike in a per-cpu workqueue where max_active limits its concurrency level
+ * on each CPU, in an unbound workqueue, max_active applies to the whole system.
+ * As sharing a single nr_active across multiple sockets can be very expensive,
+ * the counting and enforcement is per NUMA node.
+ *
+ * The following struct is used to enforce per-node max_active. When a pwq wants
+ * to start executing a work item, it should increment ->nr using
+ * tryinc_node_nr_active(). If acquisition fails due to ->nr already being over
+ * ->max, the pwq is queued on ->pending_pwqs. As in-flight work items finish
+ * and decrement ->nr, node_activate_pending_pwq() activates the pending pwqs in
+ * round-robin order.
+ */
+struct wq_node_nr_active {
+ int max; /* per-node max_active */
+ atomic_t nr; /* per-node nr_active */
+ raw_spinlock_t lock; /* nests inside pool locks */
+ struct list_head pending_pwqs; /* LN: pwqs with inactive works */
+};
+
+/*
* The externally visible workqueue. It relays the issued work items to
* the appropriate worker_pool through its pool_workqueues.
*/
@@ -298,10 +348,15 @@ struct workqueue_struct {
struct worker *rescuer; /* MD: rescue worker */
int nr_drainers; /* WQ: drain in progress */
- int saved_max_active; /* WQ: saved pwq max_active */
+
+ /* See alloc_workqueue() function comment for info on min/max_active */
+ int max_active; /* WO: max active works */
+ int min_active; /* WO: min active works */
+ int saved_max_active; /* WQ: saved max_active */
+ int saved_min_active; /* WQ: saved min_active */
struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */
- struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */
+ struct pool_workqueue __rcu *dfl_pwq; /* PW: only for unbound wqs */
#ifdef CONFIG_SYSFS
struct wq_device *wq_dev; /* I: for sysfs interface */
@@ -323,10 +378,9 @@ struct workqueue_struct {
/* hot fields used during command issue, aligned to cacheline */
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */
+ struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
};
-static struct kmem_cache *pwq_cache;
-
/*
* Each pod type describes how CPUs should be grouped for unbound workqueues.
* See the comment above workqueue_attrs->affn_scope.
@@ -338,16 +392,13 @@ struct wq_pod_type {
int *cpu_pod; /* cpu -> pod */
};
-static struct wq_pod_type wq_pod_types[WQ_AFFN_NR_TYPES];
-static enum wq_affn_scope wq_affn_dfl = WQ_AFFN_CACHE;
-
static const char *wq_affn_names[WQ_AFFN_NR_TYPES] = {
- [WQ_AFFN_DFL] = "default",
- [WQ_AFFN_CPU] = "cpu",
- [WQ_AFFN_SMT] = "smt",
- [WQ_AFFN_CACHE] = "cache",
- [WQ_AFFN_NUMA] = "numa",
- [WQ_AFFN_SYSTEM] = "system",
+ [WQ_AFFN_DFL] = "default",
+ [WQ_AFFN_CPU] = "cpu",
+ [WQ_AFFN_SMT] = "smt",
+ [WQ_AFFN_CACHE] = "cache",
+ [WQ_AFFN_NUMA] = "numa",
+ [WQ_AFFN_SYSTEM] = "system",
};
/*
@@ -359,12 +410,22 @@ static const char *wq_affn_names[WQ_AFFN_NR_TYPES] = {
*/
static unsigned long wq_cpu_intensive_thresh_us = ULONG_MAX;
module_param_named(cpu_intensive_thresh_us, wq_cpu_intensive_thresh_us, ulong, 0644);
+#ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT
+static unsigned int wq_cpu_intensive_warning_thresh = 4;
+module_param_named(cpu_intensive_warning_thresh, wq_cpu_intensive_warning_thresh, uint, 0644);
+#endif
/* see the comment above the definition of WQ_POWER_EFFICIENT */
static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
module_param_named(power_efficient, wq_power_efficient, bool, 0444);
static bool wq_online; /* can kworkers be created yet? */
+static bool wq_topo_initialized __read_mostly = false;
+
+static struct kmem_cache *pwq_cache;
+
+static struct wq_pod_type wq_pod_types[WQ_AFFN_NR_TYPES];
+static enum wq_affn_scope wq_affn_dfl = WQ_AFFN_CACHE;
/* buf for wq_update_unbound_pod_attrs(), protected by CPU hotplug exclusion */
static struct workqueue_attrs *wq_update_pod_attrs_buf;
@@ -405,8 +466,17 @@ static bool wq_debug_force_rr_cpu = false;
#endif
module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
+/* to raise softirq for the BH worker pools on other CPUs */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_work [NR_STD_WORKER_POOLS],
+ bh_pool_irq_works);
+
+/* the BH worker pools */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
+ bh_worker_pools);
+
/* the per-cpu worker pools */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
+ cpu_worker_pools);
static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */
@@ -420,6 +490,12 @@ static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
/*
+ * Used to synchronize multiple cancel_sync attempts on the same work item. See
+ * work_grab_pending() and __cancel_work_sync().
+ */
+static DECLARE_WAIT_QUEUE_HEAD(wq_cancel_waitq);
+
+/*
* I: kthread_worker to release pwq's. pwq release needs to be bounced to a
* process context while holding a pool lock. Bounce to a dedicated kthread
* worker to avoid A-A deadlocks.
@@ -440,6 +516,10 @@ struct workqueue_struct *system_power_efficient_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_power_efficient_wq);
struct workqueue_struct *system_freezable_power_efficient_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
+struct workqueue_struct *system_bh_wq;
+EXPORT_SYMBOL_GPL(system_bh_wq);
+struct workqueue_struct *system_bh_highpri_wq;
+EXPORT_SYMBOL_GPL(system_bh_highpri_wq);
static int worker_thread(void *__worker);
static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
@@ -450,16 +530,21 @@ static void show_one_worker_pool(struct worker_pool *pool);
#include <trace/events/workqueue.h>
#define assert_rcu_or_pool_mutex() \
- RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
+ RCU_LOCKDEP_WARN(!rcu_read_lock_any_held() && \
!lockdep_is_held(&wq_pool_mutex), \
"RCU or wq_pool_mutex should be held")
#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
- RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
+ RCU_LOCKDEP_WARN(!rcu_read_lock_any_held() && \
!lockdep_is_held(&wq->mutex) && \
!lockdep_is_held(&wq_pool_mutex), \
"RCU, wq->mutex or wq_pool_mutex should be held")
+#define for_each_bh_worker_pool(pool, cpu) \
+ for ((pool) = &per_cpu(bh_worker_pools, cpu)[0]; \
+ (pool) < &per_cpu(bh_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
+ (pool)++)
+
#define for_each_cpu_worker_pool(pool, cpu) \
for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
(pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
@@ -632,6 +717,36 @@ static int worker_pool_assign_id(struct worker_pool *pool)
return ret;
}
+static struct pool_workqueue __rcu **
+unbound_pwq_slot(struct workqueue_struct *wq, int cpu)
+{
+ if (cpu >= 0)
+ return per_cpu_ptr(wq->cpu_pwq, cpu);
+ else
+ return &wq->dfl_pwq;
+}
+
+/* @cpu < 0 for dfl_pwq */
+static struct pool_workqueue *unbound_pwq(struct workqueue_struct *wq, int cpu)
+{
+ return rcu_dereference_check(*unbound_pwq_slot(wq, cpu),
+ lockdep_is_held(&wq_pool_mutex) ||
+ lockdep_is_held(&wq->mutex));
+}
+
+/**
+ * unbound_effective_cpumask - effective cpumask of an unbound workqueue
+ * @wq: workqueue of interest
+ *
+ * @wq->unbound_attrs->cpumask contains the cpumask requested by the user which
+ * is masked with wq_unbound_cpumask to determine the effective cpumask. The
+ * default pwq is always mapped to the pool with the current effective cpumask.
+ */
+static struct cpumask *unbound_effective_cpumask(struct workqueue_struct *wq)
+{
+ return unbound_pwq(wq, -1)->pool->attrs->__pod_cpumask;
+}
+
static unsigned int work_color_to_flags(int color)
{
return color << WORK_STRUCT_COLOR_SHIFT;
@@ -653,10 +768,9 @@ static int work_next_color(int color)
* contain the pointer to the queued pwq. Once execution starts, the flag
* is cleared and the high bits contain OFFQ flags and pool ID.
*
- * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
- * and clear_work_data() can be used to set the pwq, pool or clear
- * work->data. These functions should only be called while the work is
- * owned - ie. while the PENDING bit is set.
+ * set_work_pwq(), set_work_pool_and_clear_pending() and mark_work_canceling()
+ * can be used to set the pwq, pool or clear work->data. These functions should
+ * only be called while the work is owned - ie. while the PENDING bit is set.
*
* get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
* corresponding to a work. Pool is available once the work has been
@@ -668,29 +782,28 @@ static int work_next_color(int color)
* but stay off timer and worklist for arbitrarily long and nobody should
* try to steal the PENDING bit.
*/
-static inline void set_work_data(struct work_struct *work, unsigned long data,
- unsigned long flags)
+static inline void set_work_data(struct work_struct *work, unsigned long data)
{
WARN_ON_ONCE(!work_pending(work));
- atomic_long_set(&work->data, data | flags | work_static(work));
+ atomic_long_set(&work->data, data | work_static(work));
}
static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
- unsigned long extra_flags)
+ unsigned long flags)
{
- set_work_data(work, (unsigned long)pwq,
- WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
+ set_work_data(work, (unsigned long)pwq | WORK_STRUCT_PENDING |
+ WORK_STRUCT_PWQ | flags);
}
static void set_work_pool_and_keep_pending(struct work_struct *work,
- int pool_id)
+ int pool_id, unsigned long flags)
{
- set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
- WORK_STRUCT_PENDING);
+ set_work_data(work, ((unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT) |
+ WORK_STRUCT_PENDING | flags);
}
static void set_work_pool_and_clear_pending(struct work_struct *work,
- int pool_id)
+ int pool_id, unsigned long flags)
{
/*
* The following wmb is paired with the implied mb in
@@ -699,7 +812,8 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
* owner.
*/
smp_wmb();
- set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
+ set_work_data(work, ((unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT) |
+ flags);
/*
* The following mb guarantees that previous clear of a PENDING bit
* will not be reordered with any speculative LOADS or STORES from
@@ -731,15 +845,9 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
smp_mb();
}
-static void clear_work_data(struct work_struct *work)
-{
- smp_wmb(); /* see set_work_pool_and_clear_pending() */
- set_work_data(work, WORK_STRUCT_NO_POOL, 0);
-}
-
static inline struct pool_workqueue *work_struct_pwq(unsigned long data)
{
- return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK);
+ return (struct pool_workqueue *)(data & WORK_STRUCT_PWQ_MASK);
}
static struct pool_workqueue *get_work_pwq(struct work_struct *work)
@@ -806,7 +914,7 @@ static void mark_work_canceling(struct work_struct *work)
unsigned long pool_id = get_work_pool_id(work);
pool_id <<= WORK_OFFQ_POOL_SHIFT;
- set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
+ set_work_data(work, pool_id | WORK_STRUCT_PENDING | WORK_OFFQ_CANCELING);
}
static bool work_is_canceling(struct work_struct *work)
@@ -1101,6 +1209,29 @@ static bool assign_work(struct work_struct *work, struct worker *worker,
return true;
}
+static struct irq_work *bh_pool_irq_work(struct worker_pool *pool)
+{
+ int high = pool->attrs->nice == HIGHPRI_NICE_LEVEL ? 1 : 0;
+
+ return &per_cpu(bh_pool_irq_works, pool->cpu)[high];
+}
+
+static void kick_bh_pool(struct worker_pool *pool)
+{
+#ifdef CONFIG_SMP
+ /* see drain_dead_softirq_workfn() for BH_DRAINING */
+ if (unlikely(pool->cpu != smp_processor_id() &&
+ !(pool->flags & POOL_BH_DRAINING))) {
+ irq_work_queue_on(bh_pool_irq_work(pool), pool->cpu);
+ return;
+ }
+#endif
+ if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
+ raise_softirq_irqoff(HI_SOFTIRQ);
+ else
+ raise_softirq_irqoff(TASKLET_SOFTIRQ);
+}
+
/**
* kick_pool - wake up an idle worker if necessary
* @pool: pool to kick
@@ -1118,6 +1249,11 @@ static bool kick_pool(struct worker_pool *pool)
if (!need_more_worker(pool) || !worker)
return false;
+ if (pool->flags & POOL_BH) {
+ kick_bh_pool(pool);
+ return true;
+ }
+
p = worker->task;
#ifdef CONFIG_SMP
@@ -1198,11 +1334,13 @@ restart:
u64 cnt;
/*
- * Start reporting from the fourth time and back off
+ * Start reporting from the warning_thresh and back off
* exponentially.
*/
cnt = atomic64_inc_return_relaxed(&ent->cnt);
- if (cnt >= 4 && is_power_of_2(cnt))
+ if (wq_cpu_intensive_warning_thresh &&
+ cnt >= wq_cpu_intensive_warning_thresh &&
+ is_power_of_2(cnt + 1 - wq_cpu_intensive_warning_thresh))
printk_deferred(KERN_WARNING "workqueue: %ps hogged CPU for >%luus %llu times, consider switching to WQ_UNBOUND\n",
ent->func, wq_cpu_intensive_thresh_us,
atomic64_read(&ent->cnt));
@@ -1231,10 +1369,12 @@ restart:
ent = &wci_ents[wci_nr_ents++];
ent->func = func;
- atomic64_set(&ent->cnt, 1);
+ atomic64_set(&ent->cnt, 0);
hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func);
raw_spin_unlock(&wci_lock);
+
+ goto restart;
}
#else /* CONFIG_WQ_CPU_INTENSIVE_REPORT */
@@ -1402,6 +1542,74 @@ work_func_t wq_worker_last_func(struct task_struct *task)
}
/**
+ * wq_node_nr_active - Determine wq_node_nr_active to use
+ * @wq: workqueue of interest
+ * @node: NUMA node, can be %NUMA_NO_NODE
+ *
+ * Determine wq_node_nr_active to use for @wq on @node. Returns:
+ *
+ * - %NULL for per-cpu workqueues as they don't need to use shared nr_active.
+ *
+ * - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE.
+ *
+ * - Otherwise, node_nr_active[@node].
+ */
+static struct wq_node_nr_active *wq_node_nr_active(struct workqueue_struct *wq,
+ int node)
+{
+ if (!(wq->flags & WQ_UNBOUND))
+ return NULL;
+
+ if (node == NUMA_NO_NODE)
+ node = nr_node_ids;
+
+ return wq->node_nr_active[node];
+}
+
+/**
+ * wq_update_node_max_active - Update per-node max_actives to use
+ * @wq: workqueue to update
+ * @off_cpu: CPU that's going down, -1 if a CPU is not going down
+ *
+ * Update @wq->node_nr_active[]->max. @wq must be unbound. max_active is
+ * distributed among nodes according to the proportions of numbers of online
+ * cpus. The result is always between @wq->min_active and max_active.
+ */
+static void wq_update_node_max_active(struct workqueue_struct *wq, int off_cpu)
+{
+ struct cpumask *effective = unbound_effective_cpumask(wq);
+ int min_active = READ_ONCE(wq->min_active);
+ int max_active = READ_ONCE(wq->max_active);
+ int total_cpus, node;
+
+ lockdep_assert_held(&wq->mutex);
+
+ if (!wq_topo_initialized)
+ return;
+
+ if (off_cpu >= 0 && !cpumask_test_cpu(off_cpu, effective))
+ off_cpu = -1;
+
+ total_cpus = cpumask_weight_and(effective, cpu_online_mask);
+ if (off_cpu >= 0)
+ total_cpus--;
+
+ for_each_node(node) {
+ int node_cpus;
+
+ node_cpus = cpumask_weight_and(effective, cpumask_of_node(node));
+ if (off_cpu >= 0 && cpu_to_node(off_cpu) == node)
+ node_cpus--;
+
+ wq_node_nr_active(wq, node)->max =
+ clamp(DIV_ROUND_UP(max_active * node_cpus, total_cpus),
+ min_active, max_active);
+ }
+
+ wq_node_nr_active(wq, NUMA_NO_NODE)->max = min_active;
+}
+
+/**
* get_pwq - get an extra reference on the specified pool_workqueue
* @pwq: pool_workqueue to get
*
@@ -1453,24 +1661,336 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
}
}
-static void pwq_activate_inactive_work(struct work_struct *work)
+static bool pwq_is_empty(struct pool_workqueue *pwq)
{
- struct pool_workqueue *pwq = get_work_pwq(work);
+ return !pwq->nr_active && list_empty(&pwq->inactive_works);
+}
+static void __pwq_activate_work(struct pool_workqueue *pwq,
+ struct work_struct *work)
+{
+ unsigned long *wdb = work_data_bits(work);
+
+ WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE));
trace_workqueue_activate_work(work);
if (list_empty(&pwq->pool->worklist))
pwq->pool->watchdog_ts = jiffies;
move_linked_works(work, &pwq->pool->worklist, NULL);
- __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
+ __clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb);
+}
+
+/**
+ * pwq_activate_work - Activate a work item if inactive
+ * @pwq: pool_workqueue @work belongs to
+ * @work: work item to activate
+ *
+ * Returns %true if activated. %false if already active.
+ */
+static bool pwq_activate_work(struct pool_workqueue *pwq,
+ struct work_struct *work)
+{
+ struct worker_pool *pool = pwq->pool;
+ struct wq_node_nr_active *nna;
+
+ lockdep_assert_held(&pool->lock);
+
+ if (!(*work_data_bits(work) & WORK_STRUCT_INACTIVE))
+ return false;
+
+ nna = wq_node_nr_active(pwq->wq, pool->node);
+ if (nna)
+ atomic_inc(&nna->nr);
+
pwq->nr_active++;
+ __pwq_activate_work(pwq, work);
+ return true;
+}
+
+static bool tryinc_node_nr_active(struct wq_node_nr_active *nna)
+{
+ int max = READ_ONCE(nna->max);
+
+ while (true) {
+ int old, tmp;
+
+ old = atomic_read(&nna->nr);
+ if (old >= max)
+ return false;
+ tmp = atomic_cmpxchg_relaxed(&nna->nr, old, old + 1);
+ if (tmp == old)
+ return true;
+ }
+}
+
+/**
+ * pwq_tryinc_nr_active - Try to increment nr_active for a pwq
+ * @pwq: pool_workqueue of interest
+ * @fill: max_active may have increased, try to increase concurrency level
+ *
+ * Try to increment nr_active for @pwq. Returns %true if an nr_active count is
+ * successfully obtained. %false otherwise.
+ */
+static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill)
+{
+ struct workqueue_struct *wq = pwq->wq;
+ struct worker_pool *pool = pwq->pool;
+ struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node);
+ bool obtained = false;
+
+ lockdep_assert_held(&pool->lock);
+
+ if (!nna) {
+ /* BH or per-cpu workqueue, pwq->nr_active is sufficient */
+ obtained = pwq->nr_active < READ_ONCE(wq->max_active);
+ goto out;
+ }
+
+ if (unlikely(pwq->plugged))
+ return false;
+
+ /*
+ * Unbound workqueue uses per-node shared nr_active $nna. If @pwq is
+ * already waiting on $nna, pwq_dec_nr_active() will maintain the
+ * concurrency level. Don't jump the line.
+ *
+ * We need to ignore the pending test after max_active has increased as
+ * pwq_dec_nr_active() can only maintain the concurrency level but not
+ * increase it. This is indicated by @fill.
+ */
+ if (!list_empty(&pwq->pending_node) && likely(!fill))
+ goto out;
+
+ obtained = tryinc_node_nr_active(nna);
+ if (obtained)
+ goto out;
+
+ /*
+ * Lockless acquisition failed. Lock, add ourself to $nna->pending_pwqs
+ * and try again. The smp_mb() is paired with the implied memory barrier
+ * of atomic_dec_return() in pwq_dec_nr_active() to ensure that either
+ * we see the decremented $nna->nr or they see non-empty
+ * $nna->pending_pwqs.
+ */
+ raw_spin_lock(&nna->lock);
+
+ if (list_empty(&pwq->pending_node))
+ list_add_tail(&pwq->pending_node, &nna->pending_pwqs);
+ else if (likely(!fill))
+ goto out_unlock;
+
+ smp_mb();
+
+ obtained = tryinc_node_nr_active(nna);
+
+ /*
+ * If @fill, @pwq might have already been pending. Being spuriously
+ * pending in cold paths doesn't affect anything. Let's leave it be.
+ */
+ if (obtained && likely(!fill))
+ list_del_init(&pwq->pending_node);
+
+out_unlock:
+ raw_spin_unlock(&nna->lock);
+out:
+ if (obtained)
+ pwq->nr_active++;
+ return obtained;
+}
+
+/**
+ * pwq_activate_first_inactive - Activate the first inactive work item on a pwq
+ * @pwq: pool_workqueue of interest
+ * @fill: max_active may have increased, try to increase concurrency level
+ *
+ * Activate the first inactive work item of @pwq if available and allowed by
+ * max_active limit.
+ *
+ * Returns %true if an inactive work item has been activated. %false if no
+ * inactive work item is found or max_active limit is reached.
+ */
+static bool pwq_activate_first_inactive(struct pool_workqueue *pwq, bool fill)
+{
+ struct work_struct *work =
+ list_first_entry_or_null(&pwq->inactive_works,
+ struct work_struct, entry);
+
+ if (work && pwq_tryinc_nr_active(pwq, fill)) {
+ __pwq_activate_work(pwq, work);
+ return true;
+ } else {
+ return false;
+ }
}
-static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
+/**
+ * unplug_oldest_pwq - unplug the oldest pool_workqueue
+ * @wq: workqueue_struct where its oldest pwq is to be unplugged
+ *
+ * This function should only be called for ordered workqueues where only the
+ * oldest pwq is unplugged, the others are plugged to suspend execution to
+ * ensure proper work item ordering::
+ *
+ * dfl_pwq --------------+ [P] - plugged
+ * |
+ * v
+ * pwqs -> A -> B [P] -> C [P] (newest)
+ * | | |
+ * 1 3 5
+ * | | |
+ * 2 4 6
+ *
+ * When the oldest pwq is drained and removed, this function should be called
+ * to unplug the next oldest one to start its work item execution. Note that
+ * pwq's are linked into wq->pwqs with the oldest first, so the first one in
+ * the list is the oldest.
+ */
+static void unplug_oldest_pwq(struct workqueue_struct *wq)
{
- struct work_struct *work = list_first_entry(&pwq->inactive_works,
- struct work_struct, entry);
+ struct pool_workqueue *pwq;
- pwq_activate_inactive_work(work);
+ lockdep_assert_held(&wq->mutex);
+
+ /* Caller should make sure that pwqs isn't empty before calling */
+ pwq = list_first_entry_or_null(&wq->pwqs, struct pool_workqueue,
+ pwqs_node);
+ raw_spin_lock_irq(&pwq->pool->lock);
+ if (pwq->plugged) {
+ pwq->plugged = false;
+ if (pwq_activate_first_inactive(pwq, true))
+ kick_pool(pwq->pool);
+ }
+ raw_spin_unlock_irq(&pwq->pool->lock);
+}
+
+/**
+ * node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active
+ * @nna: wq_node_nr_active to activate a pending pwq for
+ * @caller_pool: worker_pool the caller is locking
+ *
+ * Activate a pwq in @nna->pending_pwqs. Called with @caller_pool locked.
+ * @caller_pool may be unlocked and relocked to lock other worker_pools.
+ */
+static void node_activate_pending_pwq(struct wq_node_nr_active *nna,
+ struct worker_pool *caller_pool)
+{
+ struct worker_pool *locked_pool = caller_pool;
+ struct pool_workqueue *pwq;
+ struct work_struct *work;
+
+ lockdep_assert_held(&caller_pool->lock);
+
+ raw_spin_lock(&nna->lock);
+retry:
+ pwq = list_first_entry_or_null(&nna->pending_pwqs,
+ struct pool_workqueue, pending_node);
+ if (!pwq)
+ goto out_unlock;
+
+ /*
+ * If @pwq is for a different pool than @locked_pool, we need to lock
+ * @pwq->pool->lock. Let's trylock first. If unsuccessful, do the unlock
+ * / lock dance. For that, we also need to release @nna->lock as it's
+ * nested inside pool locks.
+ */
+ if (pwq->pool != locked_pool) {
+ raw_spin_unlock(&locked_pool->lock);
+ locked_pool = pwq->pool;
+ if (!raw_spin_trylock(&locked_pool->lock)) {
+ raw_spin_unlock(&nna->lock);
+ raw_spin_lock(&locked_pool->lock);
+ raw_spin_lock(&nna->lock);
+ goto retry;
+ }
+ }
+
+ /*
+ * $pwq may not have any inactive work items due to e.g. cancellations.
+ * Drop it from pending_pwqs and see if there's another one.
+ */
+ work = list_first_entry_or_null(&pwq->inactive_works,
+ struct work_struct, entry);
+ if (!work) {
+ list_del_init(&pwq->pending_node);
+ goto retry;
+ }
+
+ /*
+ * Acquire an nr_active count and activate the inactive work item. If
+ * $pwq still has inactive work items, rotate it to the end of the
+ * pending_pwqs so that we round-robin through them. This means that
+ * inactive work items are not activated in queueing order which is fine
+ * given that there has never been any ordering across different pwqs.
+ */
+ if (likely(tryinc_node_nr_active(nna))) {
+ pwq->nr_active++;
+ __pwq_activate_work(pwq, work);
+
+ if (list_empty(&pwq->inactive_works))
+ list_del_init(&pwq->pending_node);
+ else
+ list_move_tail(&pwq->pending_node, &nna->pending_pwqs);
+
+ /* if activating a foreign pool, make sure it's running */
+ if (pwq->pool != caller_pool)
+ kick_pool(pwq->pool);
+ }
+
+out_unlock:
+ raw_spin_unlock(&nna->lock);
+ if (locked_pool != caller_pool) {
+ raw_spin_unlock(&locked_pool->lock);
+ raw_spin_lock(&caller_pool->lock);
+ }
+}
+
+/**
+ * pwq_dec_nr_active - Retire an active count
+ * @pwq: pool_workqueue of interest
+ *
+ * Decrement @pwq's nr_active and try to activate the first inactive work item.
+ * For unbound workqueues, this function may temporarily drop @pwq->pool->lock.
+ */
+static void pwq_dec_nr_active(struct pool_workqueue *pwq)
+{
+ struct worker_pool *pool = pwq->pool;
+ struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pool->node);
+
+ lockdep_assert_held(&pool->lock);
+
+ /*
+ * @pwq->nr_active should be decremented for both percpu and unbound
+ * workqueues.
+ */
+ pwq->nr_active--;
+
+ /*
+ * For a percpu workqueue, it's simple. Just need to kick the first
+ * inactive work item on @pwq itself.
+ */
+ if (!nna) {
+ pwq_activate_first_inactive(pwq, false);
+ return;
+ }
+
+ /*
+ * If @pwq is for an unbound workqueue, it's more complicated because
+ * multiple pwqs and pools may be sharing the nr_active count. When a
+ * pwq needs to wait for an nr_active count, it puts itself on
+ * $nna->pending_pwqs. The following atomic_dec_return()'s implied
+ * memory barrier is paired with smp_mb() in pwq_tryinc_nr_active() to
+ * guarantee that either we see non-empty pending_pwqs or they see
+ * decremented $nna->nr.
+ *
+ * $nna->max may change as CPUs come online/offline and @pwq->wq's
+ * max_active gets updated. However, it is guaranteed to be equal to or
+ * larger than @pwq->wq->min_active which is above zero unless freezing.
+ * This maintains the forward progress guarantee.
+ */
+ if (atomic_dec_return(&nna->nr) >= READ_ONCE(nna->max))
+ return;
+
+ if (!list_empty(&nna->pending_pwqs))
+ node_activate_pending_pwq(nna, pool);
}
/**
@@ -1481,6 +2001,11 @@ static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
* A work either has completed or is removed from pending queue,
* decrement nr_in_flight of its pwq and handle workqueue flushing.
*
+ * NOTE:
+ * For unbound workqueues, this function may temporarily drop @pwq->pool->lock
+ * and thus should be called after all other state updates for the in-flight
+ * work item is complete.
+ *
* CONTEXT:
* raw_spin_lock_irq(pool->lock).
*/
@@ -1488,14 +2013,8 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_
{
int color = get_work_color(work_data);
- if (!(work_data & WORK_STRUCT_INACTIVE)) {
- pwq->nr_active--;
- if (!list_empty(&pwq->inactive_works)) {
- /* one down, submit an inactive one */
- if (pwq->nr_active < pwq->max_active)
- pwq_activate_first_inactive(pwq);
- }
- }
+ if (!(work_data & WORK_STRUCT_INACTIVE))
+ pwq_dec_nr_active(pwq);
pwq->nr_in_flight[color]--;
@@ -1523,8 +2042,8 @@ out_put:
/**
* try_to_grab_pending - steal work item from worklist and disable irq
* @work: work item to steal
- * @is_dwork: @work is a delayed_work
- * @flags: place to store irq state
+ * @cflags: %WORK_CANCEL_ flags
+ * @irq_flags: place to store irq state
*
* Try to grab PENDING bit of @work. This function can handle @work in any
* stable state - idle, on timer or on worklist.
@@ -1546,20 +2065,20 @@ out_put:
* irqsafe, ensures that we return -EAGAIN for finite short period of time.
*
* On successful return, >= 0, irq is disabled and the caller is
- * responsible for releasing it using local_irq_restore(*@flags).
+ * responsible for releasing it using local_irq_restore(*@irq_flags).
*
* This function is safe to call from any context including IRQ handler.
*/
-static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
- unsigned long *flags)
+static int try_to_grab_pending(struct work_struct *work, u32 cflags,
+ unsigned long *irq_flags)
{
struct worker_pool *pool;
struct pool_workqueue *pwq;
- local_irq_save(*flags);
+ local_irq_save(*irq_flags);
/* try to steal the timer if it exists */
- if (is_dwork) {
+ if (cflags & WORK_CANCEL_DELAYED) {
struct delayed_work *dwork = to_delayed_work(work);
/*
@@ -1595,6 +2114,8 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
*/
pwq = get_work_pwq(work);
if (pwq && pwq->pool == pool) {
+ unsigned long work_data;
+
debug_work_deactivate(work);
/*
@@ -1608,14 +2129,19 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
* management later on and cause stall. Make sure the work
* item is activated before grabbing.
*/
- if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
- pwq_activate_inactive_work(work);
+ pwq_activate_work(pwq, work);
list_del_init(&work->entry);
- pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
- /* work->data points to pwq iff queued, point to pool */
- set_work_pool_and_keep_pending(work, pool->id);
+ /*
+ * work->data points to pwq iff queued. Let's point to pool. As
+ * this destroys work->data needed by the next step, stash it.
+ */
+ work_data = *work_data_bits(work);
+ set_work_pool_and_keep_pending(work, pool->id, 0);
+
+ /* must be the last step, see the function comment */
+ pwq_dec_nr_in_flight(pwq, work_data);
raw_spin_unlock(&pool->lock);
rcu_read_unlock();
@@ -1624,13 +2150,82 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
raw_spin_unlock(&pool->lock);
fail:
rcu_read_unlock();
- local_irq_restore(*flags);
+ local_irq_restore(*irq_flags);
if (work_is_canceling(work))
return -ENOENT;
cpu_relax();
return -EAGAIN;
}
+struct cwt_wait {
+ wait_queue_entry_t wait;
+ struct work_struct *work;
+};
+
+static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
+{
+ struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
+
+ if (cwait->work != key)
+ return 0;
+ return autoremove_wake_function(wait, mode, sync, key);
+}
+
+/**
+ * work_grab_pending - steal work item from worklist and disable irq
+ * @work: work item to steal
+ * @cflags: %WORK_CANCEL_ flags
+ * @irq_flags: place to store IRQ state
+ *
+ * Grab PENDING bit of @work. @work can be in any stable state - idle, on timer
+ * or on worklist.
+ *
+ * Must be called in process context. IRQ is disabled on return with IRQ state
+ * stored in *@irq_flags. The caller is responsible for re-enabling it using
+ * local_irq_restore().
+ *
+ * Returns %true if @work was pending. %false if idle.
+ */
+static bool work_grab_pending(struct work_struct *work, u32 cflags,
+ unsigned long *irq_flags)
+{
+ struct cwt_wait cwait;
+ int ret;
+
+ might_sleep();
+repeat:
+ ret = try_to_grab_pending(work, cflags, irq_flags);
+ if (likely(ret >= 0))
+ return ret;
+ if (ret != -ENOENT)
+ goto repeat;
+
+ /*
+ * Someone is already canceling. Wait for it to finish. flush_work()
+ * doesn't work for PREEMPT_NONE because we may get woken up between
+ * @work's completion and the other canceling task resuming and clearing
+ * CANCELING - flush_work() will return false immediately as @work is no
+ * longer busy, try_to_grab_pending() will return -ENOENT as @work is
+ * still being canceled and the other canceling task won't be able to
+ * clear CANCELING as we're hogging the CPU.
+ *
+ * Let's wait for completion using a waitqueue. As this may lead to the
+ * thundering herd problem, use a custom wake function which matches
+ * @work along with exclusive wait and wakeup.
+ */
+ init_wait(&cwait.wait);
+ cwait.wait.func = cwt_wakefn;
+ cwait.work = work;
+
+ prepare_to_wait_exclusive(&wq_cancel_waitq, &cwait.wait,
+ TASK_UNINTERRUPTIBLE);
+ if (work_is_canceling(work))
+ schedule();
+ finish_wait(&wq_cancel_waitq, &cwait.wait);
+
+ goto repeat;
+}
+
/**
* insert_work - insert a work into a pool
* @pwq: pwq @work belongs to
@@ -1718,7 +2313,6 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
*/
lockdep_assert_irqs_disabled();
-
/*
* For a draining wq, only works from the same workqueue are
* allowed. The __WQ_DESTROYING helps to spot the issue that
@@ -1793,12 +2387,16 @@ retry:
pwq->nr_in_flight[pwq->work_color]++;
work_flags = work_color_to_flags(pwq->work_color);
- if (likely(pwq->nr_active < pwq->max_active)) {
+ /*
+ * Limit the number of concurrently active work items to max_active.
+ * @work must also queue behind existing inactive work items to maintain
+ * ordering when max_active changes. See wq_adjust_max_active().
+ */
+ if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) {
if (list_empty(&pool->worklist))
pool->watchdog_ts = jiffies;
trace_workqueue_activate_work(work);
- pwq->nr_active++;
insert_work(pwq, work, &pool->worklist, work_flags);
kick_pool(pool);
} else {
@@ -1829,16 +2427,16 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work)
{
bool ret = false;
- unsigned long flags;
+ unsigned long irq_flags;
- local_irq_save(flags);
+ local_irq_save(irq_flags);
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
__queue_work(cpu, wq, work);
ret = true;
}
- local_irq_restore(flags);
+ local_irq_restore(irq_flags);
return ret;
}
EXPORT_SYMBOL(queue_work_on);
@@ -1895,7 +2493,7 @@ static int select_numa_node_cpu(int node)
bool queue_work_node(int node, struct workqueue_struct *wq,
struct work_struct *work)
{
- unsigned long flags;
+ unsigned long irq_flags;
bool ret = false;
/*
@@ -1909,7 +2507,7 @@ bool queue_work_node(int node, struct workqueue_struct *wq,
*/
WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
- local_irq_save(flags);
+ local_irq_save(irq_flags);
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
int cpu = select_numa_node_cpu(node);
@@ -1918,7 +2516,7 @@ bool queue_work_node(int node, struct workqueue_struct *wq,
ret = true;
}
- local_irq_restore(flags);
+ local_irq_restore(irq_flags);
return ret;
}
EXPORT_SYMBOL_GPL(queue_work_node);
@@ -1958,10 +2556,18 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
dwork->cpu = cpu;
timer->expires = jiffies + delay;
- if (unlikely(cpu != WORK_CPU_UNBOUND))
+ if (housekeeping_enabled(HK_TYPE_TIMER)) {
+ /* If the current cpu is a housekeeping cpu, use it. */
+ cpu = smp_processor_id();
+ if (!housekeeping_test_cpu(cpu, HK_TYPE_TIMER))
+ cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
add_timer_on(timer, cpu);
- else
- add_timer(timer);
+ } else {
+ if (likely(cpu == WORK_CPU_UNBOUND))
+ add_timer_global(timer);
+ else
+ add_timer_on(timer, cpu);
+ }
}
/**
@@ -1980,17 +2586,17 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
{
struct work_struct *work = &dwork->work;
bool ret = false;
- unsigned long flags;
+ unsigned long irq_flags;
/* read the comment in __queue_work() */
- local_irq_save(flags);
+ local_irq_save(irq_flags);
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
__queue_delayed_work(cpu, wq, dwork, delay);
ret = true;
}
- local_irq_restore(flags);
+ local_irq_restore(irq_flags);
return ret;
}
EXPORT_SYMBOL(queue_delayed_work_on);
@@ -2016,16 +2622,17 @@ EXPORT_SYMBOL(queue_delayed_work_on);
bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *dwork, unsigned long delay)
{
- unsigned long flags;
+ unsigned long irq_flags;
int ret;
do {
- ret = try_to_grab_pending(&dwork->work, true, &flags);
+ ret = try_to_grab_pending(&dwork->work, WORK_CANCEL_DELAYED,
+ &irq_flags);
} while (unlikely(ret == -EAGAIN));
if (likely(ret >= 0)) {
__queue_delayed_work(cpu, wq, dwork, delay);
- local_irq_restore(flags);
+ local_irq_restore(irq_flags);
}
/* -ENOENT from try_to_grab_pending() becomes %true */
@@ -2100,19 +2707,21 @@ static cpumask_t *pool_allowed_cpus(struct worker_pool *pool)
* cpu-[un]hotplugs.
*/
static void worker_attach_to_pool(struct worker *worker,
- struct worker_pool *pool)
+ struct worker_pool *pool)
{
mutex_lock(&wq_pool_attach_mutex);
/*
- * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains
- * stable across this function. See the comments above the flag
- * definition for details.
+ * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains stable
+ * across this function. See the comments above the flag definition for
+ * details. BH workers are, while per-CPU, always DISASSOCIATED.
*/
- if (pool->flags & POOL_DISASSOCIATED)
+ if (pool->flags & POOL_DISASSOCIATED) {
worker->flags |= WORKER_UNBOUND;
- else
+ } else {
+ WARN_ON_ONCE(pool->flags & POOL_BH);
kthread_set_per_cpu(worker->task, pool->cpu);
+ }
if (worker->rescue_wq)
set_cpus_allowed_ptr(worker->task, pool_allowed_cpus(pool));
@@ -2136,6 +2745,9 @@ static void worker_detach_from_pool(struct worker *worker)
struct worker_pool *pool = worker->pool;
struct completion *detach_completion = NULL;
+ /* there is one permanent BH worker per CPU which should never detach */
+ WARN_ON_ONCE(pool->flags & POOL_BH);
+
mutex_lock(&wq_pool_attach_mutex);
kthread_set_per_cpu(worker->task, -1);
@@ -2187,27 +2799,29 @@ static struct worker *create_worker(struct worker_pool *pool)
worker->id = id;
- if (pool->cpu >= 0)
- snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
- pool->attrs->nice < 0 ? "H" : "");
- else
- snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
-
- worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
- "kworker/%s", id_buf);
- if (IS_ERR(worker->task)) {
- if (PTR_ERR(worker->task) == -EINTR) {
- pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
- id_buf);
- } else {
- pr_err_once("workqueue: Failed to create a worker thread: %pe",
- worker->task);
+ if (!(pool->flags & POOL_BH)) {
+ if (pool->cpu >= 0)
+ snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
+ pool->attrs->nice < 0 ? "H" : "");
+ else
+ snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
+
+ worker->task = kthread_create_on_node(worker_thread, worker,
+ pool->node, "kworker/%s", id_buf);
+ if (IS_ERR(worker->task)) {
+ if (PTR_ERR(worker->task) == -EINTR) {
+ pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
+ id_buf);
+ } else {
+ pr_err_once("workqueue: Failed to create a worker thread: %pe",
+ worker->task);
+ }
+ goto fail;
}
- goto fail;
- }
- set_user_nice(worker->task, pool->attrs->nice);
- kthread_bind_mask(worker->task, pool_allowed_cpus(pool));
+ set_user_nice(worker->task, pool->attrs->nice);
+ kthread_bind_mask(worker->task, pool_allowed_cpus(pool));
+ }
/* successful, attach the worker to the pool */
worker_attach_to_pool(worker, pool);
@@ -2217,14 +2831,14 @@ static struct worker *create_worker(struct worker_pool *pool)
worker->pool->nr_workers++;
worker_enter_idle(worker);
- kick_pool(pool);
/*
* @worker is waiting on a completion in kthread() and will trigger hung
- * check if not woken up soon. As kick_pool() might not have waken it
- * up, wake it up explicitly once more.
+ * check if not woken up soon. As kick_pool() is noop if @pool is empty,
+ * wake it up explicitly.
*/
- wake_up_process(worker->task);
+ if (worker->task)
+ wake_up_process(worker->task);
raw_spin_unlock_irq(&pool->lock);
@@ -2543,6 +3157,8 @@ __acquires(&pool->lock)
struct pool_workqueue *pwq = get_work_pwq(work);
struct worker_pool *pool = worker->pool;
unsigned long work_data;
+ int lockdep_start_depth, rcu_start_depth;
+ bool bh_draining = pool->flags & POOL_BH_DRAINING;
#ifdef CONFIG_LOCKDEP
/*
* It is permissible to free the struct work_struct from
@@ -2565,7 +3181,8 @@ __acquires(&pool->lock)
worker->current_work = work;
worker->current_func = work->func;
worker->current_pwq = pwq;
- worker->current_at = worker->task->se.sum_exec_runtime;
+ if (worker->task)
+ worker->current_at = worker->task->se.sum_exec_runtime;
work_data = *work_data_bits(work);
worker->current_color = get_work_color(work_data);
@@ -2600,12 +3217,16 @@ __acquires(&pool->lock)
* PENDING and queued state changes happen together while IRQ is
* disabled.
*/
- set_work_pool_and_clear_pending(work, pool->id);
+ set_work_pool_and_clear_pending(work, pool->id, 0);
pwq->stats[PWQ_STAT_STARTED]++;
raw_spin_unlock_irq(&pool->lock);
- lock_map_acquire(&pwq->wq->lockdep_map);
+ rcu_start_depth = rcu_preempt_depth();
+ lockdep_start_depth = lockdep_depth(current);
+ /* see drain_dead_softirq_workfn() */
+ if (!bh_draining)
+ lock_map_acquire(&pwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
/*
* Strictly speaking we should mark the invariant state without holding
@@ -2638,12 +3259,17 @@ __acquires(&pool->lock)
trace_workqueue_execute_end(work, worker->current_func);
pwq->stats[PWQ_STAT_COMPLETED]++;
lock_map_release(&lockdep_map);
- lock_map_release(&pwq->wq->lockdep_map);
+ if (!bh_draining)
+ lock_map_release(&pwq->wq->lockdep_map);
- if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
- pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
- " last function: %ps\n",
- current->comm, preempt_count(), task_pid_nr(current),
+ if (unlikely((worker->task && in_atomic()) ||
+ lockdep_depth(current) != lockdep_start_depth ||
+ rcu_preempt_depth() != rcu_start_depth)) {
+ pr_err("BUG: workqueue leaked atomic, lock or RCU: %s[%d]\n"
+ " preempt=0x%08x lock=%d->%d RCU=%d->%d workfn=%ps\n",
+ current->comm, task_pid_nr(current), preempt_count(),
+ lockdep_start_depth, lockdep_depth(current),
+ rcu_start_depth, rcu_preempt_depth(),
worker->current_func);
debug_show_held_locks(current);
dump_stack();
@@ -2657,7 +3283,8 @@ __acquires(&pool->lock)
* stop_machine. At the same time, report a quiescent RCU state so
* the same condition doesn't freeze RCU.
*/
- cond_resched();
+ if (worker->task)
+ cond_resched();
raw_spin_lock_irq(&pool->lock);
@@ -2677,6 +3304,8 @@ __acquires(&pool->lock)
worker->current_func = NULL;
worker->current_pwq = NULL;
worker->current_color = INT_MAX;
+
+ /* must be the last step, see the function comment */
pwq_dec_nr_in_flight(pwq, work_data);
}
@@ -2938,6 +3567,139 @@ repeat:
goto repeat;
}
+static void bh_worker(struct worker *worker)
+{
+ struct worker_pool *pool = worker->pool;
+ int nr_restarts = BH_WORKER_RESTARTS;
+ unsigned long end = jiffies + BH_WORKER_JIFFIES;
+
+ raw_spin_lock_irq(&pool->lock);
+ worker_leave_idle(worker);
+
+ /*
+ * This function follows the structure of worker_thread(). See there for
+ * explanations on each step.
+ */
+ if (!need_more_worker(pool))
+ goto done;
+
+ WARN_ON_ONCE(!list_empty(&worker->scheduled));
+ worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
+
+ do {
+ struct work_struct *work =
+ list_first_entry(&pool->worklist,
+ struct work_struct, entry);
+
+ if (assign_work(work, worker, NULL))
+ process_scheduled_works(worker);
+ } while (keep_working(pool) &&
+ --nr_restarts && time_before(jiffies, end));
+
+ worker_set_flags(worker, WORKER_PREP);
+done:
+ worker_enter_idle(worker);
+ kick_pool(pool);
+ raw_spin_unlock_irq(&pool->lock);
+}
+
+/*
+ * TODO: Convert all tasklet users to workqueue and use softirq directly.
+ *
+ * This is currently called from tasklet[_hi]action() and thus is also called
+ * whenever there are tasklets to run. Let's do an early exit if there's nothing
+ * queued. Once conversion from tasklet is complete, the need_more_worker() test
+ * can be dropped.
+ *
+ * After full conversion, we'll add worker->softirq_action, directly use the
+ * softirq action and obtain the worker pointer from the softirq_action pointer.
+ */
+void workqueue_softirq_action(bool highpri)
+{
+ struct worker_pool *pool =
+ &per_cpu(bh_worker_pools, smp_processor_id())[highpri];
+ if (need_more_worker(pool))
+ bh_worker(list_first_entry(&pool->workers, struct worker, node));
+}
+
+struct wq_drain_dead_softirq_work {
+ struct work_struct work;
+ struct worker_pool *pool;
+ struct completion done;
+};
+
+static void drain_dead_softirq_workfn(struct work_struct *work)
+{
+ struct wq_drain_dead_softirq_work *dead_work =
+ container_of(work, struct wq_drain_dead_softirq_work, work);
+ struct worker_pool *pool = dead_work->pool;
+ bool repeat;
+
+ /*
+ * @pool's CPU is dead and we want to execute its still pending work
+ * items from this BH work item which is running on a different CPU. As
+ * its CPU is dead, @pool can't be kicked and, as work execution path
+ * will be nested, a lockdep annotation needs to be suppressed. Mark
+ * @pool with %POOL_BH_DRAINING for the special treatments.
+ */
+ raw_spin_lock_irq(&pool->lock);
+ pool->flags |= POOL_BH_DRAINING;
+ raw_spin_unlock_irq(&pool->lock);
+
+ bh_worker(list_first_entry(&pool->workers, struct worker, node));
+
+ raw_spin_lock_irq(&pool->lock);
+ pool->flags &= ~POOL_BH_DRAINING;
+ repeat = need_more_worker(pool);
+ raw_spin_unlock_irq(&pool->lock);
+
+ /*
+ * bh_worker() might hit consecutive execution limit and bail. If there
+ * still are pending work items, reschedule self and return so that we
+ * don't hog this CPU's BH.
+ */
+ if (repeat) {
+ if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
+ queue_work(system_bh_highpri_wq, work);
+ else
+ queue_work(system_bh_wq, work);
+ } else {
+ complete(&dead_work->done);
+ }
+}
+
+/*
+ * @cpu is dead. Drain the remaining BH work items on the current CPU. It's
+ * possible to allocate dead_work per CPU and avoid flushing. However, then we
+ * have to worry about draining overlapping with CPU coming back online or
+ * nesting (one CPU's dead_work queued on another CPU which is also dead and so
+ * on). Let's keep it simple and drain them synchronously. These are BH work
+ * items which shouldn't be requeued on the same pool. Shouldn't take long.
+ */
+void workqueue_softirq_dead(unsigned int cpu)
+{
+ int i;
+
+ for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
+ struct worker_pool *pool = &per_cpu(bh_worker_pools, cpu)[i];
+ struct wq_drain_dead_softirq_work dead_work;
+
+ if (!need_more_worker(pool))
+ continue;
+
+ INIT_WORK(&dead_work.work, drain_dead_softirq_workfn);
+ dead_work.pool = pool;
+ init_completion(&dead_work.done);
+
+ if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
+ queue_work(system_bh_highpri_wq, &dead_work.work);
+ else
+ queue_work(system_bh_wq, &dead_work.work);
+
+ wait_for_completion(&dead_work.done);
+ }
+}
+
/**
* check_flush_dependency - check for flush dependency sanity
* @target_wq: workqueue being flushed
@@ -3010,6 +3772,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
struct wq_barrier *barr,
struct work_struct *target, struct worker *worker)
{
+ static __maybe_unused struct lock_class_key bh_key, thr_key;
unsigned int work_flags = 0;
unsigned int work_color;
struct list_head *head;
@@ -3019,15 +3782,20 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
* as we know for sure that this will not trigger any of the
* checks and call back into the fixup functions where we
* might deadlock.
+ *
+ * BH and threaded workqueues need separate lockdep keys to avoid
+ * spuriously triggering "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W}
+ * usage".
*/
- INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
+ INIT_WORK_ONSTACK_KEY(&barr->work, wq_barrier_func,
+ (pwq->wq->flags & WQ_BH) ? &bh_key : &thr_key);
__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
init_completion_map(&barr->done, &target->lockdep_map);
barr->task = current;
- /* The barrier work item does not participate in pwq->nr_active. */
+ /* The barrier work item does not participate in nr_active. */
work_flags |= WORK_STRUCT_INACTIVE;
/*
@@ -3124,6 +3892,35 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
return wait;
}
+static void touch_wq_lockdep_map(struct workqueue_struct *wq)
+{
+#ifdef CONFIG_LOCKDEP
+ if (wq->flags & WQ_BH)
+ local_bh_disable();
+
+ lock_map_acquire(&wq->lockdep_map);
+ lock_map_release(&wq->lockdep_map);
+
+ if (wq->flags & WQ_BH)
+ local_bh_enable();
+#endif
+}
+
+static void touch_work_lockdep_map(struct work_struct *work,
+ struct workqueue_struct *wq)
+{
+#ifdef CONFIG_LOCKDEP
+ if (wq->flags & WQ_BH)
+ local_bh_disable();
+
+ lock_map_acquire(&work->lockdep_map);
+ lock_map_release(&work->lockdep_map);
+
+ if (wq->flags & WQ_BH)
+ local_bh_enable();
+#endif
+}
+
/**
* __flush_workqueue - ensure that any scheduled work has run to completion.
* @wq: workqueue to flush
@@ -3143,8 +3940,7 @@ void __flush_workqueue(struct workqueue_struct *wq)
if (WARN_ON(!wq_online))
return;
- lock_map_acquire(&wq->lockdep_map);
- lock_map_release(&wq->lockdep_map);
+ touch_wq_lockdep_map(wq);
mutex_lock(&wq->mutex);
@@ -3316,7 +4112,7 @@ reflush:
bool drained;
raw_spin_lock_irq(&pwq->pool->lock);
- drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
+ drained = pwq_is_empty(pwq);
raw_spin_unlock_irq(&pwq->pool->lock);
if (drained)
@@ -3343,6 +4139,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
struct worker *worker = NULL;
struct worker_pool *pool;
struct pool_workqueue *pwq;
+ struct workqueue_struct *wq;
might_sleep();
@@ -3366,11 +4163,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
pwq = worker->current_pwq;
}
- check_flush_dependency(pwq->wq, work);
+ wq = pwq->wq;
+ check_flush_dependency(wq, work);
insert_wq_barrier(pwq, barr, work, worker);
raw_spin_unlock_irq(&pool->lock);
+ touch_work_lockdep_map(work, wq);
+
/*
* Force a lock recursion deadlock when using flush_work() inside a
* single-threaded or rescuer equipped workqueue.
@@ -3380,11 +4180,9 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
* workqueues the deadlock happens when the rescuer stalls, blocking
* forward progress.
*/
- if (!from_cancel &&
- (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
- lock_map_acquire(&pwq->wq->lockdep_map);
- lock_map_release(&pwq->wq->lockdep_map);
- }
+ if (!from_cancel && (wq->saved_max_active == 1 || wq->rescuer))
+ touch_wq_lockdep_map(wq);
+
rcu_read_unlock();
return true;
already_gone:
@@ -3403,9 +4201,6 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
if (WARN_ON(!work->func))
return false;
- lock_map_acquire(&work->lockdep_map);
- lock_map_release(&work->lockdep_map);
-
if (start_flush_work(work, &barr, from_cancel)) {
wait_for_completion(&barr.done);
destroy_work_on_stack(&barr.work);
@@ -3432,108 +4227,6 @@ bool flush_work(struct work_struct *work)
}
EXPORT_SYMBOL_GPL(flush_work);
-struct cwt_wait {
- wait_queue_entry_t wait;
- struct work_struct *work;
-};
-
-static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
-{
- struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
-
- if (cwait->work != key)
- return 0;
- return autoremove_wake_function(wait, mode, sync, key);
-}
-
-static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
-{
- static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
- unsigned long flags;
- int ret;
-
- do {
- ret = try_to_grab_pending(work, is_dwork, &flags);
- /*
- * If someone else is already canceling, wait for it to
- * finish. flush_work() doesn't work for PREEMPT_NONE
- * because we may get scheduled between @work's completion
- * and the other canceling task resuming and clearing
- * CANCELING - flush_work() will return false immediately
- * as @work is no longer busy, try_to_grab_pending() will
- * return -ENOENT as @work is still being canceled and the
- * other canceling task won't be able to clear CANCELING as
- * we're hogging the CPU.
- *
- * Let's wait for completion using a waitqueue. As this
- * may lead to the thundering herd problem, use a custom
- * wake function which matches @work along with exclusive
- * wait and wakeup.
- */
- if (unlikely(ret == -ENOENT)) {
- struct cwt_wait cwait;
-
- init_wait(&cwait.wait);
- cwait.wait.func = cwt_wakefn;
- cwait.work = work;
-
- prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
- TASK_UNINTERRUPTIBLE);
- if (work_is_canceling(work))
- schedule();
- finish_wait(&cancel_waitq, &cwait.wait);
- }
- } while (unlikely(ret < 0));
-
- /* tell other tasks trying to grab @work to back off */
- mark_work_canceling(work);
- local_irq_restore(flags);
-
- /*
- * This allows canceling during early boot. We know that @work
- * isn't executing.
- */
- if (wq_online)
- __flush_work(work, true);
-
- clear_work_data(work);
-
- /*
- * Paired with prepare_to_wait() above so that either
- * waitqueue_active() is visible here or !work_is_canceling() is
- * visible there.
- */
- smp_mb();
- if (waitqueue_active(&cancel_waitq))
- __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
-
- return ret;
-}
-
-/**
- * cancel_work_sync - cancel a work and wait for it to finish
- * @work: the work to cancel
- *
- * Cancel @work and wait for its execution to finish. This function
- * can be used even if the work re-queues itself or migrates to
- * another workqueue. On return from this function, @work is
- * guaranteed to be not pending or executing on any CPU.
- *
- * cancel_work_sync(&delayed_work->work) must not be used for
- * delayed_work's. Use cancel_delayed_work_sync() instead.
- *
- * The caller must ensure that the workqueue on which @work was last
- * queued can't be destroyed before this function returns.
- *
- * Return:
- * %true if @work was pending, %false otherwise.
- */
-bool cancel_work_sync(struct work_struct *work)
-{
- return __cancel_work_timer(work, false);
-}
-EXPORT_SYMBOL_GPL(cancel_work_sync);
-
/**
* flush_delayed_work - wait for a dwork to finish executing the last queueing
* @dwork: the delayed work to flush
@@ -3576,20 +4269,50 @@ bool flush_rcu_work(struct rcu_work *rwork)
}
EXPORT_SYMBOL(flush_rcu_work);
-static bool __cancel_work(struct work_struct *work, bool is_dwork)
+static bool __cancel_work(struct work_struct *work, u32 cflags)
{
- unsigned long flags;
+ unsigned long irq_flags;
int ret;
do {
- ret = try_to_grab_pending(work, is_dwork, &flags);
+ ret = try_to_grab_pending(work, cflags, &irq_flags);
} while (unlikely(ret == -EAGAIN));
if (unlikely(ret < 0))
return false;
- set_work_pool_and_clear_pending(work, get_work_pool_id(work));
- local_irq_restore(flags);
+ set_work_pool_and_clear_pending(work, get_work_pool_id(work), 0);
+ local_irq_restore(irq_flags);
+ return ret;
+}
+
+static bool __cancel_work_sync(struct work_struct *work, u32 cflags)
+{
+ unsigned long irq_flags;
+ bool ret;
+
+ /* claim @work and tell other tasks trying to grab @work to back off */
+ ret = work_grab_pending(work, cflags, &irq_flags);
+ mark_work_canceling(work);
+ local_irq_restore(irq_flags);
+
+ /*
+ * Skip __flush_work() during early boot when we know that @work isn't
+ * executing. This allows canceling during early boot.
+ */
+ if (wq_online)
+ __flush_work(work, true);
+
+ /*
+ * smp_mb() at the end of set_work_pool_and_clear_pending() is paired
+ * with prepare_to_wait() above so that either waitqueue_active() is
+ * visible here or !work_is_canceling() is visible there.
+ */
+ set_work_pool_and_clear_pending(work, WORK_OFFQ_POOL_NONE, 0);
+
+ if (waitqueue_active(&wq_cancel_waitq))
+ __wake_up(&wq_cancel_waitq, TASK_NORMAL, 1, work);
+
return ret;
}
@@ -3598,11 +4321,35 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork)
*/
bool cancel_work(struct work_struct *work)
{
- return __cancel_work(work, false);
+ return __cancel_work(work, 0);
}
EXPORT_SYMBOL(cancel_work);
/**
+ * cancel_work_sync - cancel a work and wait for it to finish
+ * @work: the work to cancel
+ *
+ * Cancel @work and wait for its execution to finish. This function
+ * can be used even if the work re-queues itself or migrates to
+ * another workqueue. On return from this function, @work is
+ * guaranteed to be not pending or executing on any CPU.
+ *
+ * cancel_work_sync(&delayed_work->work) must not be used for
+ * delayed_work's. Use cancel_delayed_work_sync() instead.
+ *
+ * The caller must ensure that the workqueue on which @work was last
+ * queued can't be destroyed before this function returns.
+ *
+ * Return:
+ * %true if @work was pending, %false otherwise.
+ */
+bool cancel_work_sync(struct work_struct *work)
+{
+ return __cancel_work_sync(work, 0);
+}
+EXPORT_SYMBOL_GPL(cancel_work_sync);
+
+/**
* cancel_delayed_work - cancel a delayed work
* @dwork: delayed_work to cancel
*
@@ -3620,7 +4367,7 @@ EXPORT_SYMBOL(cancel_work);
*/
bool cancel_delayed_work(struct delayed_work *dwork)
{
- return __cancel_work(&dwork->work, true);
+ return __cancel_work(&dwork->work, WORK_CANCEL_DELAYED);
}
EXPORT_SYMBOL(cancel_delayed_work);
@@ -3635,7 +4382,7 @@ EXPORT_SYMBOL(cancel_delayed_work);
*/
bool cancel_delayed_work_sync(struct delayed_work *dwork)
{
- return __cancel_work_timer(&dwork->work, true);
+ return __cancel_work_sync(&dwork->work, WORK_CANCEL_DELAYED);
}
EXPORT_SYMBOL(cancel_delayed_work_sync);
@@ -3927,11 +4674,66 @@ static void wq_free_lockdep(struct workqueue_struct *wq)
}
#endif
+static void free_node_nr_active(struct wq_node_nr_active **nna_ar)
+{
+ int node;
+
+ for_each_node(node) {
+ kfree(nna_ar[node]);
+ nna_ar[node] = NULL;
+ }
+
+ kfree(nna_ar[nr_node_ids]);
+ nna_ar[nr_node_ids] = NULL;
+}
+
+static void init_node_nr_active(struct wq_node_nr_active *nna)
+{
+ nna->max = WQ_DFL_MIN_ACTIVE;
+ atomic_set(&nna->nr, 0);
+ raw_spin_lock_init(&nna->lock);
+ INIT_LIST_HEAD(&nna->pending_pwqs);
+}
+
+/*
+ * Each node's nr_active counter will be accessed mostly from its own node and
+ * should be allocated in the node.
+ */
+static int alloc_node_nr_active(struct wq_node_nr_active **nna_ar)
+{
+ struct wq_node_nr_active *nna;
+ int node;
+
+ for_each_node(node) {
+ nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, node);
+ if (!nna)
+ goto err_free;
+ init_node_nr_active(nna);
+ nna_ar[node] = nna;
+ }
+
+ /* [nr_node_ids] is used as the fallback */
+ nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, NUMA_NO_NODE);
+ if (!nna)
+ goto err_free;
+ init_node_nr_active(nna);
+ nna_ar[nr_node_ids] = nna;
+
+ return 0;
+
+err_free:
+ free_node_nr_active(nna_ar);
+ return -ENOMEM;
+}
+
static void rcu_free_wq(struct rcu_head *rcu)
{
struct workqueue_struct *wq =
container_of(rcu, struct workqueue_struct, rcu);
+ if (wq->flags & WQ_UNBOUND)
+ free_node_nr_active(wq->node_nr_active);
+
wq_free_lockdep(wq);
free_percpu(wq->cpu_pwq);
free_workqueue_attrs(wq->unbound_attrs);
@@ -4121,6 +4923,13 @@ static void pwq_release_workfn(struct kthread_work *work)
mutex_lock(&wq->mutex);
list_del_rcu(&pwq->pwqs_node);
is_last = list_empty(&wq->pwqs);
+
+ /*
+ * For ordered workqueue with a plugged dfl_pwq, restart it now.
+ */
+ if (!is_last && (wq->flags & __WQ_ORDERED))
+ unplug_oldest_pwq(wq);
+
mutex_unlock(&wq->mutex);
}
@@ -4130,6 +4939,15 @@ static void pwq_release_workfn(struct kthread_work *work)
mutex_unlock(&wq_pool_mutex);
}
+ if (!list_empty(&pwq->pending_node)) {
+ struct wq_node_nr_active *nna =
+ wq_node_nr_active(pwq->wq, pwq->pool->node);
+
+ raw_spin_lock_irq(&nna->lock);
+ list_del_init(&pwq->pending_node);
+ raw_spin_unlock_irq(&nna->lock);
+ }
+
call_rcu(&pwq->rcu, rcu_free_pwq);
/*
@@ -4142,55 +4960,11 @@ static void pwq_release_workfn(struct kthread_work *work)
}
}
-/**
- * pwq_adjust_max_active - update a pwq's max_active to the current setting
- * @pwq: target pool_workqueue
- *
- * If @pwq isn't freezing, set @pwq->max_active to the associated
- * workqueue's saved_max_active and activate inactive work items
- * accordingly. If @pwq is freezing, clear @pwq->max_active to zero.
- */
-static void pwq_adjust_max_active(struct pool_workqueue *pwq)
-{
- struct workqueue_struct *wq = pwq->wq;
- bool freezable = wq->flags & WQ_FREEZABLE;
- unsigned long flags;
-
- /* for @wq->saved_max_active */
- lockdep_assert_held(&wq->mutex);
-
- /* fast exit for non-freezable wqs */
- if (!freezable && pwq->max_active == wq->saved_max_active)
- return;
-
- /* this function can be called during early boot w/ irq disabled */
- raw_spin_lock_irqsave(&pwq->pool->lock, flags);
-
- /*
- * During [un]freezing, the caller is responsible for ensuring that
- * this function is called at least once after @workqueue_freezing
- * is updated and visible.
- */
- if (!freezable || !workqueue_freezing) {
- pwq->max_active = wq->saved_max_active;
-
- while (!list_empty(&pwq->inactive_works) &&
- pwq->nr_active < pwq->max_active)
- pwq_activate_first_inactive(pwq);
-
- kick_pool(pwq->pool);
- } else {
- pwq->max_active = 0;
- }
-
- raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
-}
-
/* initialize newly allocated @pwq which is associated with @wq and @pool */
static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
struct worker_pool *pool)
{
- BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
+ BUG_ON((unsigned long)pwq & ~WORK_STRUCT_PWQ_MASK);
memset(pwq, 0, sizeof(*pwq));
@@ -4199,6 +4973,7 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
pwq->flush_color = -1;
pwq->refcnt = 1;
INIT_LIST_HEAD(&pwq->inactive_works);
+ INIT_LIST_HEAD(&pwq->pending_node);
INIT_LIST_HEAD(&pwq->pwqs_node);
INIT_LIST_HEAD(&pwq->mayday_node);
kthread_init_work(&pwq->release_work, pwq_release_workfn);
@@ -4218,11 +4993,8 @@ static void link_pwq(struct pool_workqueue *pwq)
/* set the matching work_color */
pwq->work_color = wq->work_color;
- /* sync max_active to the current setting */
- pwq_adjust_max_active(pwq);
-
/* link in @pwq */
- list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
+ list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
}
/* obtain a pool matching @attr and create a pwq associating the pool and @wq */
@@ -4289,10 +5061,11 @@ static void wq_calc_pod_cpumask(struct workqueue_attrs *attrs, int cpu,
"possible intersect\n");
}
-/* install @pwq into @wq's cpu_pwq and return the old pwq */
+/* install @pwq into @wq and return the old pwq, @cpu < 0 for dfl_pwq */
static struct pool_workqueue *install_unbound_pwq(struct workqueue_struct *wq,
int cpu, struct pool_workqueue *pwq)
{
+ struct pool_workqueue __rcu **slot = unbound_pwq_slot(wq, cpu);
struct pool_workqueue *old_pwq;
lockdep_assert_held(&wq_pool_mutex);
@@ -4301,8 +5074,8 @@ static struct pool_workqueue *install_unbound_pwq(struct workqueue_struct *wq,
/* link_pwq() can handle duplicate calls */
link_pwq(pwq);
- old_pwq = rcu_access_pointer(*per_cpu_ptr(wq->cpu_pwq, cpu));
- rcu_assign_pointer(*per_cpu_ptr(wq->cpu_pwq, cpu), pwq);
+ old_pwq = rcu_access_pointer(*slot);
+ rcu_assign_pointer(*slot, pwq);
return old_pwq;
}
@@ -4383,6 +5156,15 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask);
ctx->attrs = new_attrs;
+ /*
+ * For initialized ordered workqueues, there should only be one pwq
+ * (dfl_pwq). Set the plugged flag of ctx->dfl_pwq to suspend execution
+ * of newly queued work items until execution of older work items in
+ * the old pwq's have completed.
+ */
+ if ((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))
+ ctx->dfl_pwq->plugged = true;
+
ctx->wq = wq;
return ctx;
@@ -4402,14 +5184,19 @@ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
- /* save the previous pwq and install the new one */
+ /* save the previous pwqs and install the new ones */
for_each_possible_cpu(cpu)
ctx->pwq_tbl[cpu] = install_unbound_pwq(ctx->wq, cpu,
ctx->pwq_tbl[cpu]);
+ ctx->dfl_pwq = install_unbound_pwq(ctx->wq, -1, ctx->dfl_pwq);
+
+ /* update node_nr_active->max */
+ wq_update_node_max_active(ctx->wq, -1);
- /* @dfl_pwq might not have been used, ensure it's linked */
- link_pwq(ctx->dfl_pwq);
- swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
+ /* rescuer needs to respect wq cpumask changes */
+ if (ctx->wq->rescuer)
+ set_cpus_allowed_ptr(ctx->wq->rescuer->task,
+ unbound_effective_cpumask(ctx->wq));
mutex_unlock(&ctx->wq->mutex);
}
@@ -4423,14 +5210,6 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
return -EINVAL;
- /* creating multiple pwqs breaks ordering guarantee */
- if (!list_empty(&wq->pwqs)) {
- if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
- return -EINVAL;
-
- wq->flags &= ~__WQ_ORDERED;
- }
-
ctx = apply_wqattrs_prepare(wq, attrs, wq_unbound_cpumask);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -4519,9 +5298,7 @@ static void wq_update_pod(struct workqueue_struct *wq, int cpu,
/* nothing to do if the target cpumask matches the current pwq */
wq_calc_pod_cpumask(target_attrs, cpu, off_cpu);
- pwq = rcu_dereference_protected(*per_cpu_ptr(wq->cpu_pwq, cpu),
- lockdep_is_held(&wq_pool_mutex));
- if (wqattrs_equal(target_attrs, pwq->pool->attrs))
+ if (wqattrs_equal(target_attrs, unbound_pwq(wq, cpu)->pool->attrs))
return;
/* create a new pwq */
@@ -4539,10 +5316,11 @@ static void wq_update_pod(struct workqueue_struct *wq, int cpu,
use_dfl_pwq:
mutex_lock(&wq->mutex);
- raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
- get_pwq(wq->dfl_pwq);
- raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
- old_pwq = install_unbound_pwq(wq, cpu, wq->dfl_pwq);
+ pwq = unbound_pwq(wq, -1);
+ raw_spin_lock_irq(&pwq->pool->lock);
+ get_pwq(pwq);
+ raw_spin_unlock_irq(&pwq->pool->lock);
+ old_pwq = install_unbound_pwq(wq, cpu, pwq);
out_unlock:
mutex_unlock(&wq->mutex);
put_pwq_unlocked(old_pwq);
@@ -4559,10 +5337,17 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
if (!(wq->flags & WQ_UNBOUND)) {
for_each_possible_cpu(cpu) {
- struct pool_workqueue **pwq_p =
- per_cpu_ptr(wq->cpu_pwq, cpu);
- struct worker_pool *pool =
- &(per_cpu_ptr(cpu_worker_pools, cpu)[highpri]);
+ struct pool_workqueue **pwq_p;
+ struct worker_pool __percpu *pools;
+ struct worker_pool *pool;
+
+ if (wq->flags & WQ_BH)
+ pools = bh_worker_pools;
+ else
+ pools = cpu_worker_pools;
+
+ pool = &(per_cpu_ptr(pools, cpu)[highpri]);
+ pwq_p = per_cpu_ptr(wq->cpu_pwq, cpu);
*pwq_p = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL,
pool->node);
@@ -4580,10 +5365,13 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
cpus_read_lock();
if (wq->flags & __WQ_ORDERED) {
+ struct pool_workqueue *dfl_pwq;
+
ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
/* there should only be single pwq for ordering guarantee */
- WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
- wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
+ dfl_pwq = rcu_access_pointer(wq->dfl_pwq);
+ WARN(!ret && (wq->pwqs.next != &dfl_pwq->pwqs_node ||
+ wq->pwqs.prev != &dfl_pwq->pwqs_node),
"ordering guarantee broken for workqueue %s\n", wq->name);
} else {
ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
@@ -4652,12 +5440,78 @@ static int init_rescuer(struct workqueue_struct *wq)
}
wq->rescuer = rescuer;
- kthread_bind_mask(rescuer->task, cpu_possible_mask);
+ if (wq->flags & WQ_UNBOUND)
+ kthread_bind_mask(rescuer->task, wq_unbound_cpumask);
+ else
+ kthread_bind_mask(rescuer->task, cpu_possible_mask);
wake_up_process(rescuer->task);
return 0;
}
+/**
+ * wq_adjust_max_active - update a wq's max_active to the current setting
+ * @wq: target workqueue
+ *
+ * If @wq isn't freezing, set @wq->max_active to the saved_max_active and
+ * activate inactive work items accordingly. If @wq is freezing, clear
+ * @wq->max_active to zero.
+ */
+static void wq_adjust_max_active(struct workqueue_struct *wq)
+{
+ bool activated;
+ int new_max, new_min;
+
+ lockdep_assert_held(&wq->mutex);
+
+ if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) {
+ new_max = 0;
+ new_min = 0;
+ } else {
+ new_max = wq->saved_max_active;
+ new_min = wq->saved_min_active;
+ }
+
+ if (wq->max_active == new_max && wq->min_active == new_min)
+ return;
+
+ /*
+ * Update @wq->max/min_active and then kick inactive work items if more
+ * active work items are allowed. This doesn't break work item ordering
+ * because new work items are always queued behind existing inactive
+ * work items if there are any.
+ */
+ WRITE_ONCE(wq->max_active, new_max);
+ WRITE_ONCE(wq->min_active, new_min);
+
+ if (wq->flags & WQ_UNBOUND)
+ wq_update_node_max_active(wq, -1);
+
+ if (new_max == 0)
+ return;
+
+ /*
+ * Round-robin through pwq's activating the first inactive work item
+ * until max_active is filled.
+ */
+ do {
+ struct pool_workqueue *pwq;
+
+ activated = false;
+ for_each_pwq(pwq, wq) {
+ unsigned long irq_flags;
+
+ /* can be called during early boot w/ irq disabled */
+ raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags);
+ if (pwq_activate_first_inactive(pwq, true)) {
+ activated = true;
+ kick_pool(pwq->pool);
+ }
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags);
+ }
+ } while (activated);
+}
+
__printf(1, 4)
struct workqueue_struct *alloc_workqueue(const char *fmt,
unsigned int flags,
@@ -4665,23 +5519,27 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
{
va_list args;
struct workqueue_struct *wq;
- struct pool_workqueue *pwq;
+ size_t wq_size;
+ int name_len;
- /*
- * Unbound && max_active == 1 used to imply ordered, which is no longer
- * the case on many machines due to per-pod pools. While
- * alloc_ordered_workqueue() is the right way to create an ordered
- * workqueue, keep the previous behavior to avoid subtle breakages.
- */
- if ((flags & WQ_UNBOUND) && max_active == 1)
- flags |= __WQ_ORDERED;
+ if (flags & WQ_BH) {
+ if (WARN_ON_ONCE(flags & ~__WQ_BH_ALLOWS))
+ return NULL;
+ if (WARN_ON_ONCE(max_active))
+ return NULL;
+ }
/* see the comment above the definition of WQ_POWER_EFFICIENT */
if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
flags |= WQ_UNBOUND;
/* allocate wq and format name */
- wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+ if (flags & WQ_UNBOUND)
+ wq_size = struct_size(wq, node_nr_active, nr_node_ids + 1);
+ else
+ wq_size = sizeof(*wq);
+
+ wq = kzalloc(wq_size, GFP_KERNEL);
if (!wq)
return NULL;
@@ -4692,15 +5550,30 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
}
va_start(args, max_active);
- vsnprintf(wq->name, sizeof(wq->name), fmt, args);
+ name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args);
va_end(args);
- max_active = max_active ?: WQ_DFL_ACTIVE;
- max_active = wq_clamp_max_active(max_active, flags, wq->name);
+ if (name_len >= WQ_NAME_LEN)
+ pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n",
+ wq->name);
+
+ if (flags & WQ_BH) {
+ /*
+ * BH workqueues always share a single execution context per CPU
+ * and don't impose any max_active limit.
+ */
+ max_active = INT_MAX;
+ } else {
+ max_active = max_active ?: WQ_DFL_ACTIVE;
+ max_active = wq_clamp_max_active(max_active, flags, wq->name);
+ }
/* init wq */
wq->flags = flags;
- wq->saved_max_active = max_active;
+ wq->max_active = max_active;
+ wq->min_active = min(max_active, WQ_DFL_MIN_ACTIVE);
+ wq->saved_max_active = wq->max_active;
+ wq->saved_min_active = wq->min_active;
mutex_init(&wq->mutex);
atomic_set(&wq->nr_pwqs_to_flush, 0);
INIT_LIST_HEAD(&wq->pwqs);
@@ -4711,8 +5584,13 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
wq_init_lockdep(wq);
INIT_LIST_HEAD(&wq->list);
+ if (flags & WQ_UNBOUND) {
+ if (alloc_node_nr_active(wq->node_nr_active) < 0)
+ goto err_unreg_lockdep;
+ }
+
if (alloc_and_link_pwqs(wq) < 0)
- goto err_unreg_lockdep;
+ goto err_free_node_nr_active;
if (wq_online && init_rescuer(wq) < 0)
goto err_destroy;
@@ -4728,8 +5606,7 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
mutex_lock(&wq_pool_mutex);
mutex_lock(&wq->mutex);
- for_each_pwq(pwq, wq)
- pwq_adjust_max_active(pwq);
+ wq_adjust_max_active(wq);
mutex_unlock(&wq->mutex);
list_add_tail_rcu(&wq->list, &workqueues);
@@ -4738,6 +5615,9 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
return wq;
+err_free_node_nr_active:
+ if (wq->flags & WQ_UNBOUND)
+ free_node_nr_active(wq->node_nr_active);
err_unreg_lockdep:
wq_unregister_lockdep(wq);
wq_free_lockdep(wq);
@@ -4759,9 +5639,9 @@ static bool pwq_busy(struct pool_workqueue *pwq)
if (pwq->nr_in_flight[i])
return true;
- if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
+ if ((pwq != rcu_access_pointer(pwq->wq->dfl_pwq)) && (pwq->refcnt > 1))
return true;
- if (pwq->nr_active || !list_empty(&pwq->inactive_works))
+ if (!pwq_is_empty(pwq))
return true;
return false;
@@ -4843,13 +5723,12 @@ void destroy_workqueue(struct workqueue_struct *wq)
rcu_read_lock();
for_each_possible_cpu(cpu) {
- pwq = rcu_access_pointer(*per_cpu_ptr(wq->cpu_pwq, cpu));
- RCU_INIT_POINTER(*per_cpu_ptr(wq->cpu_pwq, cpu), NULL);
- put_pwq_unlocked(pwq);
+ put_pwq_unlocked(unbound_pwq(wq, cpu));
+ RCU_INIT_POINTER(*unbound_pwq_slot(wq, cpu), NULL);
}
- put_pwq_unlocked(wq->dfl_pwq);
- wq->dfl_pwq = NULL;
+ put_pwq_unlocked(unbound_pwq(wq, -1));
+ RCU_INIT_POINTER(*unbound_pwq_slot(wq, -1), NULL);
rcu_read_unlock();
}
@@ -4860,34 +5739,63 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
* @wq: target workqueue
* @max_active: new max_active value.
*
- * Set max_active of @wq to @max_active.
+ * Set max_active of @wq to @max_active. See the alloc_workqueue() function
+ * comment.
*
* CONTEXT:
* Don't call from IRQ context.
*/
void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
{
- struct pool_workqueue *pwq;
-
+ /* max_active doesn't mean anything for BH workqueues */
+ if (WARN_ON(wq->flags & WQ_BH))
+ return;
/* disallow meddling with max_active for ordered workqueues */
- if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
+ if (WARN_ON(wq->flags & __WQ_ORDERED))
return;
max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
mutex_lock(&wq->mutex);
- wq->flags &= ~__WQ_ORDERED;
wq->saved_max_active = max_active;
+ if (wq->flags & WQ_UNBOUND)
+ wq->saved_min_active = min(wq->saved_min_active, max_active);
- for_each_pwq(pwq, wq)
- pwq_adjust_max_active(pwq);
+ wq_adjust_max_active(wq);
mutex_unlock(&wq->mutex);
}
EXPORT_SYMBOL_GPL(workqueue_set_max_active);
/**
+ * workqueue_set_min_active - adjust min_active of an unbound workqueue
+ * @wq: target unbound workqueue
+ * @min_active: new min_active value
+ *
+ * Set min_active of an unbound workqueue. Unlike other types of workqueues, an
+ * unbound workqueue is not guaranteed to be able to process max_active
+ * interdependent work items. Instead, an unbound workqueue is guaranteed to be
+ * able to process min_active number of interdependent work items which is
+ * %WQ_DFL_MIN_ACTIVE by default.
+ *
+ * Use this function to adjust the min_active value between 0 and the current
+ * max_active.
+ */
+void workqueue_set_min_active(struct workqueue_struct *wq, int min_active)
+{
+ /* min_active is only meaningful for non-ordered unbound workqueues */
+ if (WARN_ON((wq->flags & (WQ_BH | WQ_UNBOUND | __WQ_ORDERED)) !=
+ WQ_UNBOUND))
+ return;
+
+ mutex_lock(&wq->mutex);
+ wq->saved_min_active = clamp(min_active, 0, wq->saved_max_active);
+ wq_adjust_max_active(wq);
+ mutex_unlock(&wq->mutex);
+}
+
+/**
* current_work - retrieve %current task's work struct
*
* Determine if %current task is a workqueue worker and what it's working on.
@@ -4972,7 +5880,7 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
unsigned int work_busy(struct work_struct *work)
{
struct worker_pool *pool;
- unsigned long flags;
+ unsigned long irq_flags;
unsigned int ret = 0;
if (work_pending(work))
@@ -4981,10 +5889,10 @@ unsigned int work_busy(struct work_struct *work)
rcu_read_lock();
pool = get_work_pool(work);
if (pool) {
- raw_spin_lock_irqsave(&pool->lock, flags);
+ raw_spin_lock_irqsave(&pool->lock, irq_flags);
if (find_worker_executing_work(pool, work))
ret |= WORK_BUSY_RUNNING;
- raw_spin_unlock_irqrestore(&pool->lock, flags);
+ raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
}
rcu_read_unlock();
@@ -5069,7 +5977,24 @@ static void pr_cont_pool_info(struct worker_pool *pool)
pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
if (pool->node != NUMA_NO_NODE)
pr_cont(" node=%d", pool->node);
- pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
+ pr_cont(" flags=0x%x", pool->flags);
+ if (pool->flags & POOL_BH)
+ pr_cont(" bh%s",
+ pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
+ else
+ pr_cont(" nice=%d", pool->attrs->nice);
+}
+
+static void pr_cont_worker_id(struct worker *worker)
+{
+ struct worker_pool *pool = worker->pool;
+
+ if (pool->flags & WQ_BH)
+ pr_cont("bh%s",
+ pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
+ else
+ pr_cont("%d%s", task_pid_nr(worker->task),
+ worker->rescue_wq ? "(RESCUER)" : "");
}
struct pr_cont_work_struct {
@@ -5128,8 +6053,8 @@ static void show_pwq(struct pool_workqueue *pwq)
pr_info(" pwq %d:", pool->id);
pr_cont_pool_info(pool);
- pr_cont(" active=%d/%d refcnt=%d%s\n",
- pwq->nr_active, pwq->max_active, pwq->refcnt,
+ pr_cont(" active=%d refcnt=%d%s\n",
+ pwq->nr_active, pwq->refcnt,
!list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
hash_for_each(pool->busy_hash, bkt, worker, hentry) {
@@ -5146,10 +6071,9 @@ static void show_pwq(struct pool_workqueue *pwq)
if (worker->current_pwq != pwq)
continue;
- pr_cont("%s %d%s:%ps", comma ? "," : "",
- task_pid_nr(worker->task),
- worker->rescue_wq ? "(RESCUER)" : "",
- worker->current_func);
+ pr_cont(" %s", comma ? "," : "");
+ pr_cont_worker_id(worker);
+ pr_cont(":%ps", worker->current_func);
list_for_each_entry(work, &worker->scheduled, entry)
pr_cont_work(false, work, &pcws);
pr_cont_work_flush(comma, (work_func_t)-1L, &pcws);
@@ -5200,10 +6124,10 @@ void show_one_workqueue(struct workqueue_struct *wq)
{
struct pool_workqueue *pwq;
bool idle = true;
- unsigned long flags;
+ unsigned long irq_flags;
for_each_pwq(pwq, wq) {
- if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
+ if (!pwq_is_empty(pwq)) {
idle = false;
break;
}
@@ -5214,8 +6138,8 @@ void show_one_workqueue(struct workqueue_struct *wq)
pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
for_each_pwq(pwq, wq) {
- raw_spin_lock_irqsave(&pwq->pool->lock, flags);
- if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
+ raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags);
+ if (!pwq_is_empty(pwq)) {
/*
* Defer printing to avoid deadlocks in console
* drivers that queue work while holding locks
@@ -5225,7 +6149,7 @@ void show_one_workqueue(struct workqueue_struct *wq)
show_pwq(pwq);
printk_deferred_exit();
}
- raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags);
/*
* We could be printing a lot from atomic context, e.g.
* sysrq-t -> show_all_workqueues(). Avoid triggering
@@ -5244,10 +6168,10 @@ static void show_one_worker_pool(struct worker_pool *pool)
{
struct worker *worker;
bool first = true;
- unsigned long flags;
+ unsigned long irq_flags;
unsigned long hung = 0;
- raw_spin_lock_irqsave(&pool->lock, flags);
+ raw_spin_lock_irqsave(&pool->lock, irq_flags);
if (pool->nr_workers == pool->nr_idle)
goto next_pool;
@@ -5268,14 +6192,14 @@ static void show_one_worker_pool(struct worker_pool *pool)
pr_cont(" manager: %d",
task_pid_nr(pool->manager->task));
list_for_each_entry(worker, &pool->idle_list, entry) {
- pr_cont(" %s%d", first ? "idle: " : "",
- task_pid_nr(worker->task));
+ pr_cont(" %s", first ? "idle: " : "");
+ pr_cont_worker_id(worker);
first = false;
}
pr_cont("\n");
printk_deferred_exit();
next_pool:
- raw_spin_unlock_irqrestore(&pool->lock, flags);
+ raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
/*
* We could be printing a lot from atomic context, e.g.
* sysrq-t -> show_all_workqueues(). Avoid triggering
@@ -5542,13 +6466,15 @@ int workqueue_online_cpu(unsigned int cpu)
mutex_lock(&wq_pool_mutex);
for_each_pool(pool, pi) {
- mutex_lock(&wq_pool_attach_mutex);
+ /* BH pools aren't affected by hotplug */
+ if (pool->flags & POOL_BH)
+ continue;
+ mutex_lock(&wq_pool_attach_mutex);
if (pool->cpu == cpu)
rebind_workers(pool);
else if (pool->cpu < 0)
restore_unbound_workers_cpumask(pool, cpu);
-
mutex_unlock(&wq_pool_attach_mutex);
}
@@ -5562,6 +6488,10 @@ int workqueue_online_cpu(unsigned int cpu)
for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]])
wq_update_pod(wq, tcpu, cpu, true);
+
+ mutex_lock(&wq->mutex);
+ wq_update_node_max_active(wq, -1);
+ mutex_unlock(&wq->mutex);
}
}
@@ -5590,6 +6520,10 @@ int workqueue_offline_cpu(unsigned int cpu)
for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]])
wq_update_pod(wq, tcpu, cpu, false);
+
+ mutex_lock(&wq->mutex);
+ wq_update_node_max_active(wq, cpu);
+ mutex_unlock(&wq->mutex);
}
}
mutex_unlock(&wq_pool_mutex);
@@ -5677,7 +6611,6 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe_key);
void freeze_workqueues_begin(void)
{
struct workqueue_struct *wq;
- struct pool_workqueue *pwq;
mutex_lock(&wq_pool_mutex);
@@ -5686,8 +6619,7 @@ void freeze_workqueues_begin(void)
list_for_each_entry(wq, &workqueues, list) {
mutex_lock(&wq->mutex);
- for_each_pwq(pwq, wq)
- pwq_adjust_max_active(pwq);
+ wq_adjust_max_active(wq);
mutex_unlock(&wq->mutex);
}
@@ -5752,7 +6684,6 @@ out_unlock:
void thaw_workqueues(void)
{
struct workqueue_struct *wq;
- struct pool_workqueue *pwq;
mutex_lock(&wq_pool_mutex);
@@ -5764,8 +6695,7 @@ void thaw_workqueues(void)
/* restore max_active and repopulate worklist */
list_for_each_entry(wq, &workqueues, list) {
mutex_lock(&wq->mutex);
- for_each_pwq(pwq, wq)
- pwq_adjust_max_active(pwq);
+ wq_adjust_max_active(wq);
mutex_unlock(&wq->mutex);
}
@@ -5784,16 +6714,9 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
lockdep_assert_held(&wq_pool_mutex);
list_for_each_entry(wq, &workqueues, list) {
- if (!(wq->flags & WQ_UNBOUND))
+ if (!(wq->flags & WQ_UNBOUND) || (wq->flags & __WQ_DESTROYING))
continue;
- /* creating multiple pwqs breaks ordering guarantee */
- if (!list_empty(&wq->pwqs)) {
- if (wq->flags & __WQ_ORDERED_EXPLICIT)
- continue;
- wq->flags &= ~__WQ_ORDERED;
- }
-
ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
@@ -6307,11 +7230,10 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
int ret;
/*
- * Adjusting max_active or creating new pwqs by applying
- * attributes breaks ordering guarantee. Disallow exposing ordered
- * workqueues.
+ * Adjusting max_active breaks ordering guarantee. Disallow exposing
+ * ordered workqueues.
*/
- if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
+ if (WARN_ON(wq->flags & __WQ_ORDERED))
return -EINVAL;
wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
@@ -6408,10 +7330,10 @@ static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
static void show_cpu_pool_hog(struct worker_pool *pool)
{
struct worker *worker;
- unsigned long flags;
+ unsigned long irq_flags;
int bkt;
- raw_spin_lock_irqsave(&pool->lock, flags);
+ raw_spin_lock_irqsave(&pool->lock, irq_flags);
hash_for_each(pool->busy_hash, bkt, worker, hentry) {
if (task_is_running(worker->task)) {
@@ -6429,7 +7351,7 @@ static void show_cpu_pool_hog(struct worker_pool *pool)
}
}
- raw_spin_unlock_irqrestore(&pool->lock, flags);
+ raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
}
static void show_cpu_pools_hogs(void)
@@ -6501,7 +7423,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
/* did we stall? */
if (time_after(now, ts + thresh)) {
lockup_detected = true;
- if (pool->cpu >= 0) {
+ if (pool->cpu >= 0 && !(pool->flags & POOL_BH)) {
pool->cpu_stall = true;
cpu_pool_stall = true;
}
@@ -6584,6 +7506,16 @@ static inline void wq_watchdog_init(void) { }
#endif /* CONFIG_WQ_WATCHDOG */
+static void bh_pool_kick_normal(struct irq_work *irq_work)
+{
+ raise_softirq_irqoff(TASKLET_SOFTIRQ);
+}
+
+static void bh_pool_kick_highpri(struct irq_work *irq_work)
+{
+ raise_softirq_irqoff(HI_SOFTIRQ);
+}
+
static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask)
{
if (!cpumask_intersects(wq_unbound_cpumask, mask)) {
@@ -6595,6 +7527,22 @@ static void __init restrict_unbound_cpumask(const char *name, const struct cpuma
cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask);
}
+static void __init init_cpu_worker_pool(struct worker_pool *pool, int cpu, int nice)
+{
+ BUG_ON(init_worker_pool(pool));
+ pool->cpu = cpu;
+ cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
+ cpumask_copy(pool->attrs->__pod_cpumask, cpumask_of(cpu));
+ pool->attrs->nice = nice;
+ pool->attrs->affn_strict = true;
+ pool->node = cpu_to_node(cpu);
+
+ /* alloc pool ID */
+ mutex_lock(&wq_pool_mutex);
+ BUG_ON(worker_pool_assign_id(pool));
+ mutex_unlock(&wq_pool_mutex);
+}
+
/**
* workqueue_init_early - early init for workqueue subsystem
*
@@ -6609,6 +7557,8 @@ void __init workqueue_init_early(void)
{
struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_SYSTEM];
int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
+ void (*irq_work_fns[2])(struct irq_work *) = { bh_pool_kick_normal,
+ bh_pool_kick_highpri };
int i, cpu;
BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
@@ -6630,6 +7580,13 @@ void __init workqueue_init_early(void)
wq_update_pod_attrs_buf = alloc_workqueue_attrs();
BUG_ON(!wq_update_pod_attrs_buf);
+ /*
+ * If nohz_full is enabled, set power efficient workqueue as unbound.
+ * This allows workqueue items to be moved to HK CPUs.
+ */
+ if (housekeeping_enabled(HK_TYPE_TICK))
+ wq_power_efficient = true;
+
/* initialize WQ_AFFN_SYSTEM pods */
pt->pod_cpus = kcalloc(1, sizeof(pt->pod_cpus[0]), GFP_KERNEL);
pt->pod_node = kcalloc(1, sizeof(pt->pod_node[0]), GFP_KERNEL);
@@ -6643,25 +7600,21 @@ void __init workqueue_init_early(void)
pt->pod_node[0] = NUMA_NO_NODE;
pt->cpu_pod[0] = 0;
- /* initialize CPU pools */
+ /* initialize BH and CPU pools */
for_each_possible_cpu(cpu) {
struct worker_pool *pool;
i = 0;
- for_each_cpu_worker_pool(pool, cpu) {
- BUG_ON(init_worker_pool(pool));
- pool->cpu = cpu;
- cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
- cpumask_copy(pool->attrs->__pod_cpumask, cpumask_of(cpu));
- pool->attrs->nice = std_nice[i++];
- pool->attrs->affn_strict = true;
- pool->node = cpu_to_node(cpu);
-
- /* alloc pool ID */
- mutex_lock(&wq_pool_mutex);
- BUG_ON(worker_pool_assign_id(pool));
- mutex_unlock(&wq_pool_mutex);
+ for_each_bh_worker_pool(pool, cpu) {
+ init_cpu_worker_pool(pool, cpu, std_nice[i]);
+ pool->flags |= POOL_BH;
+ init_irq_work(bh_pool_irq_work(pool), irq_work_fns[i]);
+ i++;
}
+
+ i = 0;
+ for_each_cpu_worker_pool(pool, cpu)
+ init_cpu_worker_pool(pool, cpu, std_nice[i++]);
}
/* create default unbound and ordered wq attrs */
@@ -6691,13 +7644,17 @@ void __init workqueue_init_early(void)
WQ_FREEZABLE, 0);
system_power_efficient_wq = alloc_workqueue("events_power_efficient",
WQ_POWER_EFFICIENT, 0);
- system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
+ system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_pwr_efficient",
WQ_FREEZABLE | WQ_POWER_EFFICIENT,
0);
+ system_bh_wq = alloc_workqueue("events_bh", WQ_BH, 0);
+ system_bh_highpri_wq = alloc_workqueue("events_bh_highpri",
+ WQ_BH | WQ_HIGHPRI, 0);
BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
!system_unbound_wq || !system_freezable_wq ||
!system_power_efficient_wq ||
- !system_freezable_power_efficient_wq);
+ !system_freezable_power_efficient_wq ||
+ !system_bh_wq || !system_bh_highpri_wq);
}
static void __init wq_cpu_intensive_thresh_init(void)
@@ -6763,9 +7720,10 @@ void __init workqueue_init(void)
* up. Also, create a rescuer for workqueues that requested it.
*/
for_each_possible_cpu(cpu) {
- for_each_cpu_worker_pool(pool, cpu) {
+ for_each_bh_worker_pool(pool, cpu)
+ pool->node = cpu_to_node(cpu);
+ for_each_cpu_worker_pool(pool, cpu)
pool->node = cpu_to_node(cpu);
- }
}
list_for_each_entry(wq, &workqueues, list) {
@@ -6776,7 +7734,16 @@ void __init workqueue_init(void)
mutex_unlock(&wq_pool_mutex);
- /* create the initial workers */
+ /*
+ * Create the initial workers. A BH pool has one pseudo worker that
+ * represents the shared BH execution context and thus doesn't get
+ * affected by hotplug events. Create the BH pseudo workers for all
+ * possible CPUs here.
+ */
+ for_each_possible_cpu(cpu)
+ for_each_bh_worker_pool(pool, cpu)
+ BUG_ON(!create_worker(pool));
+
for_each_online_cpu(cpu) {
for_each_cpu_worker_pool(pool, cpu) {
pool->flags &= ~POOL_DISASSOCIATED;
@@ -6856,7 +7823,7 @@ static bool __init cpus_share_numa(int cpu0, int cpu1)
/**
* workqueue_init_topology - initialize CPU pods for unbound workqueues
*
- * This is the third step of there-staged workqueue subsystem initialization and
+ * This is the third step of three-staged workqueue subsystem initialization and
* invoked after SMP and topology information are fully initialized. It
* initializes the unbound CPU pods accordingly.
*/
@@ -6870,6 +7837,8 @@ void __init workqueue_init_topology(void)
init_pod_type(&wq_pod_types[WQ_AFFN_CACHE], cpus_share_cache);
init_pod_type(&wq_pod_types[WQ_AFFN_NUMA], cpus_share_numa);
+ wq_topo_initialized = true;
+
mutex_lock(&wq_pool_mutex);
/*
@@ -6878,8 +7847,12 @@ void __init workqueue_init_topology(void)
* combinations to apply per-pod sharing.
*/
list_for_each_entry(wq, &workqueues, list) {
- for_each_online_cpu(cpu) {
+ for_each_online_cpu(cpu)
wq_update_pod(wq, cpu, cpu, true);
+ if (wq->flags & WQ_UNBOUND) {
+ mutex_lock(&wq->mutex);
+ wq_update_node_max_active(wq, -1);
+ mutex_unlock(&wq->mutex);
}
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 975a07f9f1cc..2164f066e7b6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1303,7 +1303,7 @@ config PROVE_LOCKING
select DEBUG_SPINLOCK
select DEBUG_MUTEXES if !PREEMPT_RT
select DEBUG_RT_MUTEXES if RT_MUTEXES
- select DEBUG_RWSEMS
+ select DEBUG_RWSEMS if !PREEMPT_RT
select DEBUG_WW_MUTEX_SLOWPATH
select DEBUG_LOCK_ALLOC
select PREEMPT_COUNT if !ARCH_NO_PREEMPT
@@ -1426,7 +1426,7 @@ config DEBUG_WW_MUTEX_SLOWPATH
config DEBUG_RWSEMS
bool "RW Semaphore debugging: basic checks"
- depends on DEBUG_KERNEL
+ depends on DEBUG_KERNEL && !PREEMPT_RT
help
This debugging feature allows mismatched rw semaphore locks
and unlocks to be detected and reported.
@@ -2235,6 +2235,7 @@ config TEST_DIV64
config TEST_IOV_ITER
tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS
depends on KUNIT
+ depends on MMU
default KUNIT_ALL_TESTS
help
Enable this to turn on testing of the operation of the I/O iterator
@@ -2352,11 +2353,15 @@ config ASYNC_RAID6_TEST
config TEST_HEXDUMP
tristate "Test functions located in the hexdump module at runtime"
-config STRING_SELFTEST
- tristate "Test string functions at runtime"
+config STRING_KUNIT_TEST
+ tristate "KUnit test string functions at runtime" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
-config TEST_STRING_HELPERS
- tristate "Test functions located in the string_helpers module at runtime"
+config STRING_HELPERS_KUNIT_TEST
+ tristate "KUnit test string helpers at runtime" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
config TEST_KSTRTOX
tristate "Test kstrto*() family of functions at runtime"
@@ -2748,7 +2753,7 @@ config STACKINIT_KUNIT_TEST
config FORTIFY_KUNIT_TEST
tristate "Test fortified str*() and mem*() function internals at runtime" if !KUNIT_ALL_TESTS
- depends on KUNIT && FORTIFY_SOURCE
+ depends on KUNIT
default KUNIT_ALL_TESTS
help
Builds unit tests for checking internals of FORTIFY_SOURCE as used
@@ -2857,28 +2862,6 @@ config TEST_MEMCAT_P
If unsure, say N.
-config TEST_LIVEPATCH
- tristate "Test livepatching"
- default n
- depends on DYNAMIC_DEBUG
- depends on LIVEPATCH
- depends on m
- help
- Test kernel livepatching features for correctness. The tests will
- load test modules that will be livepatched in various scenarios.
-
- To run all the livepatching tests:
-
- make -C tools/testing/selftests TARGETS=livepatch run_tests
-
- Alternatively, individual tests may be invoked:
-
- tools/testing/selftests/livepatch/test-callbacks.sh
- tools/testing/selftests/livepatch/test-livepatch.sh
- tools/testing/selftests/livepatch/test-shadow-vars.sh
-
- If unsure, say N.
-
config TEST_OBJAGG
tristate "Perform selftest on object aggreration manager"
default n
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 59e21bfec188..48a67058f84e 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-config ARCH_HAS_UBSAN_SANITIZE_ALL
+config ARCH_HAS_UBSAN
bool
menuconfig UBSAN
@@ -87,7 +87,6 @@ config UBSAN_LOCAL_BOUNDS
config UBSAN_SHIFT
bool "Perform checking for bit-shift overflows"
- default UBSAN
depends on $(cc-option,-fsanitize=shift)
help
This option enables -fsanitize=shift which checks for bit-shift
@@ -116,6 +115,20 @@ config UBSAN_UNREACHABLE
This option enables -fsanitize=unreachable which checks for control
flow reaching an expected-to-be-unreachable position.
+config UBSAN_SIGNED_WRAP
+ bool "Perform checking for signed arithmetic wrap-around"
+ default UBSAN
+ depends on !COMPILE_TEST
+ depends on $(cc-option,-fsanitize=signed-integer-overflow)
+ help
+ This option enables -fsanitize=signed-integer-overflow which checks
+ for wrap-around of any arithmetic operations with signed integers.
+ This currently performs nearly no instrumentation due to the
+ kernel's use of -fno-strict-overflow which converts all would-be
+ arithmetic undefined behavior into wrap-around arithmetic. Future
+ sanitizer versions will allow for wrap-around checking (rather than
+ exclusively undefined behavior).
+
config UBSAN_BOOL
bool "Perform checking for non-boolean values used as boolean"
default UBSAN
@@ -142,17 +155,6 @@ config UBSAN_ALIGNMENT
Enabling this option on architectures that support unaligned
accesses may produce a lot of false positives.
-config UBSAN_SANITIZE_ALL
- bool "Enable instrumentation for the entire kernel"
- depends on ARCH_HAS_UBSAN_SANITIZE_ALL
- default y
- help
- This option activates instrumentation for the entire kernel.
- If you don't enable this option, you have to explicitly specify
- UBSAN_SANITIZE := y for the files/directories you want to check for UB.
- Enabling this option will get kernel image size increased
- significantly.
-
config TEST_UBSAN
tristate "Module for testing for undefined behavior detection"
depends on m
diff --git a/lib/Makefile b/lib/Makefile
index 6b09731d8e61..363852afa200 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -49,9 +49,9 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
percpu-refcount.o rhashtable.o base64.o \
once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \
generic-radix-tree.o bitmap-str.o
-obj-$(CONFIG_STRING_SELFTEST) += test_string.o
+obj-$(CONFIG_STRING_KUNIT_TEST) += string_kunit.o
obj-y += string_helpers.o
-obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
+obj-$(CONFIG_STRING_HELPERS_KUNIT_TEST) += string_helpers_kunit.o
obj-y += hexdump.o
obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
obj-y += kstrtox.o
@@ -69,6 +69,7 @@ obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o
obj-$(CONFIG_TEST_IDA) += test_ida.o
obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla)
+CFLAGS_test_ubsan.o += $(call cc-disable-warning, unused-but-set-variable)
UBSAN_SANITIZE_test_ubsan.o := y
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
@@ -134,8 +135,6 @@ endif
obj-$(CONFIG_TEST_FPU) += test_fpu.o
CFLAGS_test_fpu.o += $(FPU_CFLAGS)
-obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
-
# Some KUnit files (hooks.o) need to be built-in even when KUnit is a module,
# so we can't just use obj-$(CONFIG_KUNIT).
ifdef CONFIG_KUNIT
@@ -401,6 +400,8 @@ obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable)
obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o
CFLAGS_fortify_kunit.o += $(call cc-disable-warning, unsequenced)
+CFLAGS_fortify_kunit.o += $(call cc-disable-warning, stringop-overread)
+CFLAGS_fortify_kunit.o += $(call cc-disable-warning, stringop-truncation)
CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN)
obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o
obj-$(CONFIG_STRCAT_KUNIT_TEST) += strcat_kunit.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 09522af227f1..b97692854966 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -348,6 +348,13 @@ unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
}
EXPORT_SYMBOL(__bitmap_weight_and);
+unsigned int __bitmap_weight_andnot(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int bits)
+{
+ return BITMAP_WEIGHT(bitmap1[idx] & ~bitmap2[idx], bits);
+}
+EXPORT_SYMBOL(__bitmap_weight_andnot);
+
void __bitmap_set(unsigned long *map, unsigned int start, int len)
{
unsigned long *p = map + BIT_WORD(start);
diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c
index 225bb7701460..bf70850035c7 100644
--- a/lib/checksum_kunit.c
+++ b/lib/checksum_kunit.c
@@ -215,7 +215,7 @@ static const u32 init_sums_no_overflow[] = {
0xffff0000, 0xfffffffb,
};
-static const __sum16 expected_csum_ipv6_magic[] = {
+static const u16 expected_csum_ipv6_magic[] = {
0x18d4, 0x3085, 0x2e4b, 0xd9f4, 0xbdc8, 0x78f, 0x1034, 0x8422, 0x6fc0,
0xd2f6, 0xbeb5, 0x9d3, 0x7e2a, 0x312e, 0x778e, 0xc1bb, 0x7cf2, 0x9d1e,
0xca21, 0xf3ff, 0x7569, 0xb02e, 0xca86, 0x7e76, 0x4539, 0x45e3, 0xf28d,
@@ -241,7 +241,7 @@ static const __sum16 expected_csum_ipv6_magic[] = {
0x3845, 0x1014
};
-static const __sum16 expected_fast_csum[] = {
+static const u16 expected_fast_csum[] = {
0xda83, 0x45da, 0x4f46, 0x4e4f, 0x34e, 0xe902, 0xa5e9, 0x87a5, 0x7187,
0x5671, 0xf556, 0x6df5, 0x816d, 0x8f81, 0xbb8f, 0xfbba, 0x5afb, 0xbe5a,
0xedbe, 0xabee, 0x6aac, 0xe6b, 0xea0d, 0x67ea, 0x7e68, 0x8a7e, 0x6f8a,
@@ -577,7 +577,8 @@ static void test_csum_no_carry_inputs(struct kunit *test)
static void test_ip_fast_csum(struct kunit *test)
{
- __sum16 csum_result, expected;
+ __sum16 csum_result;
+ u16 expected;
for (int len = IPv4_MIN_WORDS; len < IPv4_MAX_WORDS; len++) {
for (int index = 0; index < NUM_IP_FAST_CSUM_TESTS; index++) {
@@ -586,7 +587,7 @@ static void test_ip_fast_csum(struct kunit *test)
expected_fast_csum[(len - IPv4_MIN_WORDS) *
NUM_IP_FAST_CSUM_TESTS +
index];
- CHECK_EQ(expected, csum_result);
+ CHECK_EQ(to_sum16(expected), csum_result);
}
}
}
@@ -598,7 +599,7 @@ static void test_csum_ipv6_magic(struct kunit *test)
const struct in6_addr *daddr;
unsigned int len;
unsigned char proto;
- unsigned int csum;
+ __wsum csum;
const int daddr_offset = sizeof(struct in6_addr);
const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
@@ -611,10 +612,10 @@ static void test_csum_ipv6_magic(struct kunit *test)
saddr = (const struct in6_addr *)(random_buf + i);
daddr = (const struct in6_addr *)(random_buf + i +
daddr_offset);
- len = *(unsigned int *)(random_buf + i + len_offset);
+ len = le32_to_cpu(*(__le32 *)(random_buf + i + len_offset));
proto = *(random_buf + i + proto_offset);
- csum = *(unsigned int *)(random_buf + i + csum_offset);
- CHECK_EQ(expected_csum_ipv6_magic[i],
+ csum = *(__wsum *)(random_buf + i + csum_offset);
+ CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]),
csum_ipv6_magic(saddr, daddr, len, proto, csum));
}
#endif /* !CONFIG_NET */
diff --git a/lib/cmdline_kunit.c b/lib/cmdline_kunit.c
index d4572dbc9145..705b82736be0 100644
--- a/lib/cmdline_kunit.c
+++ b/lib/cmdline_kunit.c
@@ -124,7 +124,7 @@ static void cmdline_do_one_range_test(struct kunit *test, const char *in,
n, e[0], r[0]);
p = memchr_inv(&r[1], 0, sizeof(r) - sizeof(r[0]));
- KUNIT_EXPECT_PTR_EQ_MSG(test, p, NULL, "in test %u at %u out of bound", n, p - r);
+ KUNIT_EXPECT_PTR_EQ_MSG(test, p, NULL, "in test %u at %td out of bound", n, p - r);
}
static void cmdline_test_range(struct kunit *test)
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 83471e81501a..222c6d6c8281 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -96,15 +96,25 @@ static void __dump_stack(const char *log_lvl)
*/
asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
{
+ bool in_panic = this_cpu_in_panic();
unsigned long flags;
/*
* Permit this cpu to perform nested stack dumps while serialising
- * against other CPUs
+ * against other CPUs, unless this CPU is in panic.
+ *
+ * When in panic, non-panic CPUs are not permitted to store new
+ * printk messages so there is no need to synchronize the output.
+ * This avoids potential deadlock in panic() if another CPU is
+ * holding and unable to release the printk_cpu_sync.
*/
- printk_cpu_sync_get_irqsave(flags);
+ if (!in_panic)
+ printk_cpu_sync_get_irqsave(flags);
+
__dump_stack(log_lvl);
- printk_cpu_sync_put_irqrestore(flags);
+
+ if (!in_panic)
+ printk_cpu_sync_put_irqrestore(flags);
}
EXPORT_SYMBOL(dump_stack_lvl);
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index fde0aa244148..a1389db1c30a 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -10,10 +10,77 @@
#include <linux/dynamic_queue_limits.h>
#include <linux/compiler.h>
#include <linux/export.h>
+#include <trace/events/napi.h>
#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
+static void dql_check_stall(struct dql *dql)
+{
+ unsigned short stall_thrs;
+ unsigned long now;
+
+ stall_thrs = READ_ONCE(dql->stall_thrs);
+ if (!stall_thrs)
+ return;
+
+ now = jiffies;
+ /* Check for a potential stall */
+ if (time_after_eq(now, dql->last_reap + stall_thrs)) {
+ unsigned long hist_head, t, start, end;
+
+ /* We are trying to detect a period of at least @stall_thrs
+ * jiffies without any Tx completions, but during first half
+ * of which some Tx was posted.
+ */
+dqs_again:
+ hist_head = READ_ONCE(dql->history_head);
+ /* pairs with smp_wmb() in dql_queued() */
+ smp_rmb();
+
+ /* Get the previous entry in the ring buffer, which is the
+ * oldest sample.
+ */
+ start = (hist_head - DQL_HIST_LEN + 1) * BITS_PER_LONG;
+
+ /* Advance start to continue from the last reap time */
+ if (time_before(start, dql->last_reap + 1))
+ start = dql->last_reap + 1;
+
+ /* Newest sample we should have already seen a completion for */
+ end = hist_head * BITS_PER_LONG + (BITS_PER_LONG - 1);
+
+ /* Shrink the search space to [start, (now - start_thrs/2)] if
+ * `end` is beyond the stall zone
+ */
+ if (time_before(now, end + stall_thrs / 2))
+ end = now - stall_thrs / 2;
+
+ /* Search for the queued time in [t, end] */
+ for (t = start; time_before_eq(t, end); t++)
+ if (test_bit(t % (DQL_HIST_LEN * BITS_PER_LONG),
+ dql->history))
+ break;
+
+ /* Variable t contains the time of the queue */
+ if (!time_before_eq(t, end))
+ goto no_stall;
+
+ /* The ring buffer was modified in the meantime, retry */
+ if (hist_head != READ_ONCE(dql->history_head))
+ goto dqs_again;
+
+ dql->stall_cnt++;
+ dql->stall_max = max_t(unsigned short, dql->stall_max, now - t);
+
+ trace_dql_stall_detected(dql->stall_thrs, now - t,
+ dql->last_reap, dql->history_head,
+ now, dql->history);
+ }
+no_stall:
+ dql->last_reap = now;
+}
+
/* Records completed count and recalculates the queue limit */
void dql_completed(struct dql *dql, unsigned int count)
{
@@ -110,6 +177,8 @@ void dql_completed(struct dql *dql, unsigned int count)
dql->prev_last_obj_cnt = dql->last_obj_cnt;
dql->num_completed = completed;
dql->prev_num_queued = num_queued;
+
+ dql_check_stall(dql);
}
EXPORT_SYMBOL(dql_completed);
@@ -125,6 +194,10 @@ void dql_reset(struct dql *dql)
dql->prev_ovlimit = 0;
dql->lowest_slack = UINT_MAX;
dql->slack_start_time = jiffies;
+
+ dql->last_reap = jiffies;
+ dql->history_head = jiffies / BITS_PER_LONG;
+ memset(dql->history, 0, sizeof(dql->history));
}
EXPORT_SYMBOL(dql_reset);
@@ -133,6 +206,7 @@ void dql_init(struct dql *dql, unsigned int hold_time)
dql->max_limit = DQL_MAX_LIMIT;
dql->min_limit = 0;
dql->slack_hold_time = hold_time;
+ dql->stall_thrs = 0;
dql_reset(dql);
}
EXPORT_SYMBOL(dql_init);
diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c
index 2e4fedc81621..493ec02dd5b3 100644
--- a/lib/fortify_kunit.c
+++ b/lib/fortify_kunit.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Runtime test cases for CONFIG_FORTIFY_SOURCE that aren't expected to
- * Oops the kernel on success. (For those, see drivers/misc/lkdtm/fortify.c)
+ * Runtime test cases for CONFIG_FORTIFY_SOURCE. For testing memcpy(),
+ * see FORTIFY_MEM_* tests in LKDTM (drivers/misc/lkdtm/fortify.c).
*
* For corner cases with UBSAN, try testing with:
*
@@ -15,17 +15,55 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+/* Redefine fortify_panic() to track failures. */
+void fortify_add_kunit_error(int write);
+#define fortify_panic(func, write, avail, size, retfail) do { \
+ __fortify_report(FORTIFY_REASON(func, write), avail, size); \
+ fortify_add_kunit_error(write); \
+ return (retfail); \
+} while (0)
+
#include <kunit/device.h>
#include <kunit/test.h>
+#include <kunit/test-bug.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
+/* Handle being built without CONFIG_FORTIFY_SOURCE */
+#ifndef __compiletime_strlen
+# define __compiletime_strlen __builtin_strlen
+#endif
+
+static struct kunit_resource read_resource;
+static struct kunit_resource write_resource;
+static int fortify_read_overflows;
+static int fortify_write_overflows;
+
static const char array_of_10[] = "this is 10";
static const char *ptr_of_11 = "this is 11!";
static char array_unknown[] = "compiler thinks I might change";
+void fortify_add_kunit_error(int write)
+{
+ struct kunit_resource *resource;
+ struct kunit *current_test;
+
+ current_test = kunit_get_current_test();
+ if (!current_test)
+ return;
+
+ resource = kunit_find_named_resource(current_test,
+ write ? "fortify_write_overflows"
+ : "fortify_read_overflows");
+ if (!resource)
+ return;
+
+ (*(int *)resource->data)++;
+ kunit_put_resource(resource);
+}
+
static void known_sizes_test(struct kunit *test)
{
KUNIT_EXPECT_EQ(test, __compiletime_strlen("88888888"), 8);
@@ -308,6 +346,610 @@ DEFINE_ALLOC_SIZE_TEST_PAIR(kvmalloc)
} while (0)
DEFINE_ALLOC_SIZE_TEST_PAIR(devm_kmalloc)
+/*
+ * We can't have an array at the end of a structure or else
+ * builds without -fstrict-flex-arrays=3 will report them as
+ * being an unknown length. Additionally, add bytes before
+ * and after the string to catch over/underflows if tests
+ * fail.
+ */
+struct fortify_padding {
+ unsigned long bytes_before;
+ char buf[32];
+ unsigned long bytes_after;
+};
+/* Force compiler into not being able to resolve size at compile-time. */
+static volatile int unconst;
+
+static void strlen_test(struct kunit *test)
+{
+ struct fortify_padding pad = { };
+ int i, end = sizeof(pad.buf) - 1;
+
+ /* Fill 31 bytes with valid characters. */
+ for (i = 0; i < sizeof(pad.buf) - 1; i++)
+ pad.buf[i] = i + '0';
+ /* Trailing bytes are still %NUL. */
+ KUNIT_EXPECT_EQ(test, pad.buf[end], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* String is terminated, so strlen() is valid. */
+ KUNIT_EXPECT_EQ(test, strlen(pad.buf), end);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+
+ /* Make string unterminated, and recount. */
+ pad.buf[end] = 'A';
+ end = sizeof(pad.buf);
+ KUNIT_EXPECT_EQ(test, strlen(pad.buf), end);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 1);
+}
+
+static void strnlen_test(struct kunit *test)
+{
+ struct fortify_padding pad = { };
+ int i, end = sizeof(pad.buf) - 1;
+
+ /* Fill 31 bytes with valid characters. */
+ for (i = 0; i < sizeof(pad.buf) - 1; i++)
+ pad.buf[i] = i + '0';
+ /* Trailing bytes are still %NUL. */
+ KUNIT_EXPECT_EQ(test, pad.buf[end], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* String is terminated, so strnlen() is valid. */
+ KUNIT_EXPECT_EQ(test, strnlen(pad.buf, sizeof(pad.buf)), end);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ /* A truncated strnlen() will be safe, too. */
+ KUNIT_EXPECT_EQ(test, strnlen(pad.buf, sizeof(pad.buf) / 2),
+ sizeof(pad.buf) / 2);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+
+ /* Make string unterminated, and recount. */
+ pad.buf[end] = 'A';
+ end = sizeof(pad.buf);
+ /* Reading beyond with strncpy() will fail. */
+ KUNIT_EXPECT_EQ(test, strnlen(pad.buf, end + 1), end);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 1);
+ KUNIT_EXPECT_EQ(test, strnlen(pad.buf, end + 2), end);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 2);
+
+ /* Early-truncated is safe still, though. */
+ KUNIT_EXPECT_EQ(test, strnlen(pad.buf, end), end);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 2);
+
+ end = sizeof(pad.buf) / 2;
+ KUNIT_EXPECT_EQ(test, strnlen(pad.buf, end), end);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 2);
+}
+
+static void strcpy_test(struct kunit *test)
+{
+ struct fortify_padding pad = { };
+ char src[sizeof(pad.buf) + 1] = { };
+ int i;
+
+ /* Fill 31 bytes with valid characters. */
+ for (i = 0; i < sizeof(src) - 2; i++)
+ src[i] = i + '0';
+
+ /* Destination is %NUL-filled to start with. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_before, 0);
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* Legitimate strcpy() 1 less than of max size. */
+ KUNIT_ASSERT_TRUE(test, strcpy(pad.buf, src)
+ == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Only last byte should be %NUL */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+
+ src[sizeof(src) - 2] = 'A';
+ /* But now we trip the overflow checking. */
+ KUNIT_ASSERT_TRUE(test, strcpy(pad.buf, src)
+ == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 1);
+ /* Trailing %NUL -- thanks to FORTIFY. */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ /* And we will not have gone beyond. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ src[sizeof(src) - 1] = 'A';
+ /* And for sure now, two bytes past. */
+ KUNIT_ASSERT_TRUE(test, strcpy(pad.buf, src)
+ == pad.buf);
+ /*
+ * Which trips both the strlen() on the unterminated src,
+ * and the resulting copy attempt.
+ */
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 1);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 2);
+ /* Trailing %NUL -- thanks to FORTIFY. */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ /* And we will not have gone beyond. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+}
+
+static void strncpy_test(struct kunit *test)
+{
+ struct fortify_padding pad = { };
+ char src[] = "Copy me fully into a small buffer and I will overflow!";
+
+ /* Destination is %NUL-filled to start with. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_before, 0);
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* Legitimate strncpy() 1 less than of max size. */
+ KUNIT_ASSERT_TRUE(test, strncpy(pad.buf, src,
+ sizeof(pad.buf) + unconst - 1)
+ == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Only last byte should be %NUL */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+
+ /* Legitimate (though unterminated) max-size strncpy. */
+ KUNIT_ASSERT_TRUE(test, strncpy(pad.buf, src,
+ sizeof(pad.buf) + unconst)
+ == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* No trailing %NUL -- thanks strncpy API. */
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ /* But we will not have gone beyond. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* Now verify that FORTIFY is working... */
+ KUNIT_ASSERT_TRUE(test, strncpy(pad.buf, src,
+ sizeof(pad.buf) + unconst + 1)
+ == pad.buf);
+ /* Should catch the overflow. */
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 1);
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ /* And we will not have gone beyond. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* And further... */
+ KUNIT_ASSERT_TRUE(test, strncpy(pad.buf, src,
+ sizeof(pad.buf) + unconst + 2)
+ == pad.buf);
+ /* Should catch the overflow. */
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 2);
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ /* And we will not have gone beyond. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+}
+
+static void strscpy_test(struct kunit *test)
+{
+ struct fortify_padding pad = { };
+ char src[] = "Copy me fully into a small buffer and I will overflow!";
+
+ /* Destination is %NUL-filled to start with. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_before, 0);
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* Legitimate strscpy() 1 less than of max size. */
+ KUNIT_ASSERT_EQ(test, strscpy(pad.buf, src,
+ sizeof(pad.buf) + unconst - 1),
+ -E2BIG);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Keeping space for %NUL, last two bytes should be %NUL */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+
+ /* Legitimate max-size strscpy. */
+ KUNIT_ASSERT_EQ(test, strscpy(pad.buf, src,
+ sizeof(pad.buf) + unconst),
+ -E2BIG);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* A trailing %NUL will exist. */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+
+ /* Now verify that FORTIFY is working... */
+ KUNIT_ASSERT_EQ(test, strscpy(pad.buf, src,
+ sizeof(pad.buf) + unconst + 1),
+ -E2BIG);
+ /* Should catch the overflow. */
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 1);
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ /* And we will not have gone beyond. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* And much further... */
+ KUNIT_ASSERT_EQ(test, strscpy(pad.buf, src,
+ sizeof(src) * 2 + unconst),
+ -E2BIG);
+ /* Should catch the overflow. */
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 2);
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ /* And we will not have gone beyond. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+}
+
+static void strcat_test(struct kunit *test)
+{
+ struct fortify_padding pad = { };
+ char src[sizeof(pad.buf) / 2] = { };
+ char one[] = "A";
+ char two[] = "BC";
+ int i;
+
+ /* Fill 15 bytes with valid characters. */
+ for (i = 0; i < sizeof(src) - 1; i++)
+ src[i] = i + 'A';
+
+ /* Destination is %NUL-filled to start with. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_before, 0);
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* Legitimate strcat() using less than half max size. */
+ KUNIT_ASSERT_TRUE(test, strcat(pad.buf, src) == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Legitimate strcat() now 2 bytes shy of end. */
+ KUNIT_ASSERT_TRUE(test, strcat(pad.buf, src) == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Last two bytes should be %NUL */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+
+ /* Add one more character to the end. */
+ KUNIT_ASSERT_TRUE(test, strcat(pad.buf, one) == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Last byte should be %NUL */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+
+ /* And this one char will overflow. */
+ KUNIT_ASSERT_TRUE(test, strcat(pad.buf, one) == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 1);
+ /* Last byte should be %NUL thanks to FORTIFY. */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* And adding two will overflow more. */
+ KUNIT_ASSERT_TRUE(test, strcat(pad.buf, two) == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 2);
+ /* Last byte should be %NUL thanks to FORTIFY. */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+}
+
+static void strncat_test(struct kunit *test)
+{
+ struct fortify_padding pad = { };
+ char src[sizeof(pad.buf)] = { };
+ int i, partial;
+
+ /* Fill 31 bytes with valid characters. */
+ partial = sizeof(src) / 2 - 1;
+ for (i = 0; i < partial; i++)
+ src[i] = i + 'A';
+
+ /* Destination is %NUL-filled to start with. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_before, 0);
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* Legitimate strncat() using less than half max size. */
+ KUNIT_ASSERT_TRUE(test, strncat(pad.buf, src, partial) == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Legitimate strncat() now 2 bytes shy of end. */
+ KUNIT_ASSERT_TRUE(test, strncat(pad.buf, src, partial) == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Last two bytes should be %NUL */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+
+ /* Add one more character to the end. */
+ KUNIT_ASSERT_TRUE(test, strncat(pad.buf, src, 1) == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Last byte should be %NUL */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+
+ /* And this one char will overflow. */
+ KUNIT_ASSERT_TRUE(test, strncat(pad.buf, src, 1) == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 1);
+ /* Last byte should be %NUL thanks to FORTIFY. */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* And adding two will overflow more. */
+ KUNIT_ASSERT_TRUE(test, strncat(pad.buf, src, 2) == pad.buf);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 2);
+ /* Last byte should be %NUL thanks to FORTIFY. */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* Force an unterminated destination, and overflow. */
+ pad.buf[sizeof(pad.buf) - 1] = 'A';
+ KUNIT_ASSERT_TRUE(test, strncat(pad.buf, src, 1) == pad.buf);
+ /* This will have tripped both strlen() and strcat(). */
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 1);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 3);
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ /* But we should not go beyond the end. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+}
+
+static void strlcat_test(struct kunit *test)
+{
+ struct fortify_padding pad = { };
+ char src[sizeof(pad.buf)] = { };
+ int i, partial;
+ int len = sizeof(pad.buf) + unconst;
+
+ /* Fill 15 bytes with valid characters. */
+ partial = sizeof(src) / 2 - 1;
+ for (i = 0; i < partial; i++)
+ src[i] = i + 'A';
+
+ /* Destination is %NUL-filled to start with. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_before, 0);
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* Legitimate strlcat() using less than half max size. */
+ KUNIT_ASSERT_EQ(test, strlcat(pad.buf, src, len), partial);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Legitimate strlcat() now 2 bytes shy of end. */
+ KUNIT_ASSERT_EQ(test, strlcat(pad.buf, src, len), partial * 2);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Last two bytes should be %NUL */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+
+ /* Add one more character to the end. */
+ KUNIT_ASSERT_EQ(test, strlcat(pad.buf, "Q", len), partial * 2 + 1);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);
+ /* Last byte should be %NUL */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+
+ /* And this one char will overflow. */
+ KUNIT_ASSERT_EQ(test, strlcat(pad.buf, "V", len * 2), len);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 1);
+ /* Last byte should be %NUL thanks to FORTIFY. */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* And adding two will overflow more. */
+ KUNIT_ASSERT_EQ(test, strlcat(pad.buf, "QQ", len * 2), len + 1);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 2);
+ /* Last byte should be %NUL thanks to FORTIFY. */
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* Force an unterminated destination, and overflow. */
+ pad.buf[sizeof(pad.buf) - 1] = 'A';
+ KUNIT_ASSERT_EQ(test, strlcat(pad.buf, "TT", len * 2), len + 2);
+ /* This will have tripped both strlen() and strlcat(). */
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 2);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 2);
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 2], '\0');
+ KUNIT_EXPECT_NE(test, pad.buf[sizeof(pad.buf) - 3], '\0');
+ /* But we should not go beyond the end. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+
+ /* Force an unterminated source, and overflow. */
+ memset(src, 'B', sizeof(src));
+ pad.buf[sizeof(pad.buf) - 1] = '\0';
+ KUNIT_ASSERT_EQ(test, strlcat(pad.buf, src, len * 3), len - 1 + sizeof(src));
+ /* This will have tripped both strlen() and strlcat(). */
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 3);
+ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 3);
+ KUNIT_EXPECT_EQ(test, pad.buf[sizeof(pad.buf) - 1], '\0');
+ /* But we should not go beyond the end. */
+ KUNIT_EXPECT_EQ(test, pad.bytes_after, 0);
+}
+
+static void memscan_test(struct kunit *test)
+{
+ char haystack[] = "Where oh where is my memory range?";
+ char *mem = haystack + strlen("Where oh where is ");
+ char needle = 'm';
+ size_t len = sizeof(haystack) + unconst;
+
+ KUNIT_ASSERT_PTR_EQ(test, memscan(haystack, needle, len),
+ mem);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ /* Catch too-large range. */
+ KUNIT_ASSERT_PTR_EQ(test, memscan(haystack, needle, len + 1),
+ NULL);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 1);
+ KUNIT_ASSERT_PTR_EQ(test, memscan(haystack, needle, len * 2),
+ NULL);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 2);
+}
+
+static void memchr_test(struct kunit *test)
+{
+ char haystack[] = "Where oh where is my memory range?";
+ char *mem = haystack + strlen("Where oh where is ");
+ char needle = 'm';
+ size_t len = sizeof(haystack) + unconst;
+
+ KUNIT_ASSERT_PTR_EQ(test, memchr(haystack, needle, len),
+ mem);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ /* Catch too-large range. */
+ KUNIT_ASSERT_PTR_EQ(test, memchr(haystack, needle, len + 1),
+ NULL);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 1);
+ KUNIT_ASSERT_PTR_EQ(test, memchr(haystack, needle, len * 2),
+ NULL);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 2);
+}
+
+static void memchr_inv_test(struct kunit *test)
+{
+ char haystack[] = "Where oh where is my memory range?";
+ char *mem = haystack + 1;
+ char needle = 'W';
+ size_t len = sizeof(haystack) + unconst;
+
+ /* Normal search is okay. */
+ KUNIT_ASSERT_PTR_EQ(test, memchr_inv(haystack, needle, len),
+ mem);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ /* Catch too-large range. */
+ KUNIT_ASSERT_PTR_EQ(test, memchr_inv(haystack, needle, len + 1),
+ NULL);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 1);
+ KUNIT_ASSERT_PTR_EQ(test, memchr_inv(haystack, needle, len * 2),
+ NULL);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 2);
+}
+
+static void memcmp_test(struct kunit *test)
+{
+ char one[] = "My mind is going ...";
+ char two[] = "My mind is going ... I can feel it.";
+ size_t one_len = sizeof(one) + unconst - 1;
+ size_t two_len = sizeof(two) + unconst - 1;
+
+ /* We match the first string (ignoring the %NUL). */
+ KUNIT_ASSERT_EQ(test, memcmp(one, two, one_len), 0);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ /* Still in bounds, but no longer matching. */
+ KUNIT_ASSERT_EQ(test, memcmp(one, two, one_len + 1), -32);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+
+ /* Catch too-large ranges. */
+ KUNIT_ASSERT_EQ(test, memcmp(one, two, one_len + 2), INT_MIN);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 1);
+
+ KUNIT_ASSERT_EQ(test, memcmp(two, one, two_len + 2), INT_MIN);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 2);
+}
+
+static void kmemdup_test(struct kunit *test)
+{
+ char src[] = "I got Doom running on it!";
+ char *copy;
+ size_t len = sizeof(src) + unconst;
+
+ /* Copy is within bounds. */
+ copy = kmemdup(src, len, GFP_KERNEL);
+ KUNIT_EXPECT_NOT_NULL(test, copy);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ kfree(copy);
+
+ /* Without %NUL. */
+ copy = kmemdup(src, len - 1, GFP_KERNEL);
+ KUNIT_EXPECT_NOT_NULL(test, copy);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ kfree(copy);
+
+ /* Tiny bounds. */
+ copy = kmemdup(src, 1, GFP_KERNEL);
+ KUNIT_EXPECT_NOT_NULL(test, copy);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
+ kfree(copy);
+
+ /* Out of bounds by 1 byte. */
+ copy = kmemdup(src, len + 1, GFP_KERNEL);
+ KUNIT_EXPECT_NULL(test, copy);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 1);
+ kfree(copy);
+
+ /* Way out of bounds. */
+ copy = kmemdup(src, len * 2, GFP_KERNEL);
+ KUNIT_EXPECT_NULL(test, copy);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 2);
+ kfree(copy);
+
+ /* Starting offset causing out of bounds. */
+ copy = kmemdup(src + 1, len, GFP_KERNEL);
+ KUNIT_EXPECT_NULL(test, copy);
+ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 3);
+ kfree(copy);
+}
+
+static int fortify_test_init(struct kunit *test)
+{
+ if (!IS_ENABLED(CONFIG_FORTIFY_SOURCE))
+ kunit_skip(test, "Not built with CONFIG_FORTIFY_SOURCE=y");
+
+ fortify_read_overflows = 0;
+ kunit_add_named_resource(test, NULL, NULL, &read_resource,
+ "fortify_read_overflows",
+ &fortify_read_overflows);
+ fortify_write_overflows = 0;
+ kunit_add_named_resource(test, NULL, NULL, &write_resource,
+ "fortify_write_overflows",
+ &fortify_write_overflows);
+ return 0;
+}
+
static struct kunit_case fortify_test_cases[] = {
KUNIT_CASE(known_sizes_test),
KUNIT_CASE(control_flow_split_test),
@@ -319,11 +961,27 @@ static struct kunit_case fortify_test_cases[] = {
KUNIT_CASE(alloc_size_kvmalloc_dynamic_test),
KUNIT_CASE(alloc_size_devm_kmalloc_const_test),
KUNIT_CASE(alloc_size_devm_kmalloc_dynamic_test),
+ KUNIT_CASE(strlen_test),
+ KUNIT_CASE(strnlen_test),
+ KUNIT_CASE(strcpy_test),
+ KUNIT_CASE(strncpy_test),
+ KUNIT_CASE(strscpy_test),
+ KUNIT_CASE(strcat_test),
+ KUNIT_CASE(strncat_test),
+ KUNIT_CASE(strlcat_test),
+ /* skip memset: performs bounds checking on whole structs */
+ /* skip memcpy: still using warn-and-overwrite instead of hard-fail */
+ KUNIT_CASE(memscan_test),
+ KUNIT_CASE(memchr_test),
+ KUNIT_CASE(memchr_inv_test),
+ KUNIT_CASE(memcmp_test),
+ KUNIT_CASE(kmemdup_test),
{}
};
static struct kunit_suite fortify_test_suite = {
.name = "fortify",
+ .init = fortify_test_init,
.test_cases = fortify_test_cases,
};
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e0aa6b440ca5..4a6a9f419bd7 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -166,7 +166,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
WARN_ON(direction & ~(READ | WRITE));
*i = (struct iov_iter) {
.iter_type = ITER_IOVEC,
- .copy_mc = false,
.nofault = false,
.data_source = direction,
.__iov = iov,
@@ -245,26 +244,8 @@ EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
#endif /* CONFIG_ARCH_HAS_COPY_MC */
static __always_inline
-size_t memcpy_from_iter_mc(void *iter_from, size_t progress,
- size_t len, void *to, void *priv2)
-{
- return copy_mc_to_kernel(to + progress, iter_from, len);
-}
-
-static size_t __copy_from_iter_mc(void *addr, size_t bytes, struct iov_iter *i)
-{
- if (unlikely(i->count < bytes))
- bytes = i->count;
- if (unlikely(!bytes))
- return 0;
- return iterate_bvec(i, bytes, addr, NULL, memcpy_from_iter_mc);
-}
-
-static __always_inline
size_t __copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
- if (unlikely(iov_iter_is_copy_mc(i)))
- return __copy_from_iter_mc(addr, bytes, i);
return iterate_and_advance(i, bytes, addr,
copy_from_user_iter, memcpy_from_iter);
}
@@ -633,7 +614,6 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
WARN_ON(direction & ~(READ | WRITE));
*i = (struct iov_iter){
.iter_type = ITER_KVEC,
- .copy_mc = false,
.data_source = direction,
.kvec = kvec,
.nr_segs = nr_segs,
@@ -650,7 +630,6 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
WARN_ON(direction & ~(READ | WRITE));
*i = (struct iov_iter){
.iter_type = ITER_BVEC,
- .copy_mc = false,
.data_source = direction,
.bvec = bvec,
.nr_segs = nr_segs,
@@ -679,7 +658,6 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
BUG_ON(direction & ~1);
*i = (struct iov_iter) {
.iter_type = ITER_XARRAY,
- .copy_mc = false,
.data_source = direction,
.xarray = xarray,
.xarray_start = start,
@@ -703,7 +681,6 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
BUG_ON(direction != READ);
*i = (struct iov_iter){
.iter_type = ITER_DISCARD,
- .copy_mc = false,
.data_source = false,
.count = count,
.iov_offset = 0
@@ -714,12 +691,11 @@ EXPORT_SYMBOL(iov_iter_discard);
static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
unsigned len_mask)
{
+ const struct iovec *iov = iter_iov(i);
size_t size = i->count;
size_t skip = i->iov_offset;
- unsigned k;
- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- const struct iovec *iov = iter_iov(i) + k;
+ do {
size_t len = iov->iov_len - skip;
if (len > size)
@@ -729,34 +705,36 @@ static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
if ((unsigned long)(iov->iov_base + skip) & addr_mask)
return false;
+ iov++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size);
+
return true;
}
static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
unsigned len_mask)
{
- size_t size = i->count;
+ const struct bio_vec *bvec = i->bvec;
unsigned skip = i->iov_offset;
- unsigned k;
+ size_t size = i->count;
- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- size_t len = i->bvec[k].bv_len - skip;
+ do {
+ size_t len = bvec->bv_len;
if (len > size)
len = size;
if (len & len_mask)
return false;
- if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask)
+ if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
return false;
+ bvec++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size);
+
return true;
}
@@ -800,13 +778,12 @@ EXPORT_SYMBOL_GPL(iov_iter_is_aligned);
static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
{
+ const struct iovec *iov = iter_iov(i);
unsigned long res = 0;
size_t size = i->count;
size_t skip = i->iov_offset;
- unsigned k;
- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- const struct iovec *iov = iter_iov(i) + k;
+ do {
size_t len = iov->iov_len - skip;
if (len) {
res |= (unsigned long)iov->iov_base + skip;
@@ -814,30 +791,31 @@ static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
len = size;
res |= len;
size -= len;
- if (!size)
- break;
}
- }
+ iov++;
+ skip = 0;
+ } while (size);
return res;
}
static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
{
+ const struct bio_vec *bvec = i->bvec;
unsigned res = 0;
size_t size = i->count;
unsigned skip = i->iov_offset;
- unsigned k;
- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- size_t len = i->bvec[k].bv_len - skip;
- res |= (unsigned long)i->bvec[k].bv_offset + skip;
+ do {
+ size_t len = bvec->bv_len - skip;
+ res |= (unsigned long)bvec->bv_offset + skip;
if (len > size)
len = size;
res |= len;
+ bvec++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size);
+
return res;
}
@@ -1166,11 +1144,12 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
EXPORT_SYMBOL(dup_iter);
static __noclone int copy_compat_iovec_from_user(struct iovec *iov,
- const struct iovec __user *uvec, unsigned long nr_segs)
+ const struct iovec __user *uvec, u32 nr_segs)
{
const struct compat_iovec __user *uiov =
(const struct compat_iovec __user *)uvec;
- int ret = -EFAULT, i;
+ int ret = -EFAULT;
+ u32 i;
if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
return -EFAULT;
diff --git a/lib/kobject.c b/lib/kobject.c
index 59dbcbdb1c91..72fa20f405f1 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -74,10 +74,12 @@ static int create_dir(struct kobject *kobj)
if (error)
return error;
- error = sysfs_create_groups(kobj, ktype->default_groups);
- if (error) {
- sysfs_remove_dir(kobj);
- return error;
+ if (ktype) {
+ error = sysfs_create_groups(kobj, ktype->default_groups);
+ if (error) {
+ sysfs_remove_dir(kobj);
+ return error;
+ }
}
/*
@@ -589,7 +591,8 @@ static void __kobject_del(struct kobject *kobj)
sd = kobj->sd;
ktype = get_ktype(kobj);
- sysfs_remove_groups(kobj, ktype->default_groups);
+ if (ktype)
+ sysfs_remove_groups(kobj, ktype->default_groups);
/* send "remove" if the caller did not do it but sent "add" */
if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) {
@@ -666,6 +669,10 @@ static void kobject_cleanup(struct kobject *kobj)
pr_debug("'%s' (%p): %s, parent %p\n",
kobject_name(kobj), kobj, __func__, kobj->parent);
+ if (t && !t->release)
+ pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
+ kobject_name(kobj), kobj);
+
/* remove from sysfs if the caller did not do it */
if (kobj->state_in_sysfs) {
pr_debug("'%s' (%p): auto cleanup kobject_del\n",
@@ -676,13 +683,10 @@ static void kobject_cleanup(struct kobject *kobj)
parent = NULL;
}
- if (t->release) {
+ if (t && t->release) {
pr_debug("'%s' (%p): calling ktype release\n",
kobject_name(kobj), kobj);
t->release(kobj);
- } else {
- pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
- kobject_name(kobj), kobj);
}
/* free name if we allocated it */
@@ -1056,7 +1060,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa
{
const struct kobj_ns_type_operations *ops = NULL;
- if (parent && parent->ktype->child_ns_type)
+ if (parent && parent->ktype && parent->ktype->child_ns_type)
ops = parent->ktype->child_ns_type(parent);
return ops;
diff --git a/lib/kunit/device-impl.h b/lib/kunit/device-impl.h
index 54bd55836405..5fcd48ff0f36 100644
--- a/lib/kunit/device-impl.h
+++ b/lib/kunit/device-impl.h
@@ -13,5 +13,7 @@
// For internal use only -- registers the kunit_bus.
int kunit_bus_init(void);
+// For internal use only -- unregisters the kunit_bus.
+void kunit_bus_shutdown(void);
#endif //_KUNIT_DEVICE_IMPL_H
diff --git a/lib/kunit/device.c b/lib/kunit/device.c
index f5371287b375..abc603730b8e 100644
--- a/lib/kunit/device.c
+++ b/lib/kunit/device.c
@@ -10,6 +10,7 @@
*/
#include <linux/device.h>
+#include <linux/dma-mapping.h>
#include <kunit/test.h>
#include <kunit/device.h>
@@ -35,7 +36,7 @@ struct kunit_device {
#define to_kunit_device(d) container_of_const(d, struct kunit_device, dev)
-static struct bus_type kunit_bus_type = {
+static const struct bus_type kunit_bus_type = {
.name = "kunit",
};
@@ -45,8 +46,8 @@ int kunit_bus_init(void)
int error;
kunit_bus_device = root_device_register("kunit");
- if (!kunit_bus_device)
- return -ENOMEM;
+ if (IS_ERR(kunit_bus_device))
+ return PTR_ERR(kunit_bus_device);
error = bus_register(&kunit_bus_type);
if (error)
@@ -54,6 +55,20 @@ int kunit_bus_init(void)
return error;
}
+/* Unregister the 'kunit_bus' in case the KUnit module is unloaded. */
+void kunit_bus_shutdown(void)
+{
+ /* Make sure the bus exists before we unregister it. */
+ if (IS_ERR_OR_NULL(kunit_bus_device))
+ return;
+
+ bus_unregister(&kunit_bus_type);
+
+ root_device_unregister(kunit_bus_device);
+
+ kunit_bus_device = NULL;
+}
+
/* Release a 'fake' KUnit device. */
static void kunit_device_release(struct device *d)
{
@@ -119,6 +134,9 @@ static struct kunit_device *kunit_device_register_internal(struct kunit *test,
return ERR_PTR(err);
}
+ kunit_dev->dev.dma_mask = &kunit_dev->dev.coherent_dma_mask;
+ kunit_dev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+
kunit_add_action(test, device_unregister_wrapper, &kunit_dev->dev);
return kunit_dev;
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 717b9599036b..70b9a43cd257 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -33,13 +33,13 @@ static char *filter_glob_param;
static char *filter_param;
static char *filter_action_param;
-module_param_named(filter_glob, filter_glob_param, charp, 0400);
+module_param_named(filter_glob, filter_glob_param, charp, 0600);
MODULE_PARM_DESC(filter_glob,
"Filter which KUnit test suites/tests run at boot-time, e.g. list* or list*.*del_test");
-module_param_named(filter, filter_param, charp, 0400);
+module_param_named(filter, filter_param, charp, 0600);
MODULE_PARM_DESC(filter,
"Filter which KUnit test suites/tests run at boot-time using attributes, e.g. speed>slow");
-module_param_named(filter_action, filter_action_param, charp, 0400);
+module_param_named(filter_action, filter_action_param, charp, 0600);
MODULE_PARM_DESC(filter_action,
"Changes behavior of filtered tests using attributes, valid values are:\n"
"<none>: do not run filtered tests as normal\n"
@@ -146,6 +146,10 @@ void kunit_free_suite_set(struct kunit_suite_set suite_set)
kfree(suite_set.start);
}
+/*
+ * Filter and reallocate test suites. Must return the filtered test suites set
+ * allocated at a valid virtual address or NULL in case of error.
+ */
struct kunit_suite_set
kunit_filter_suites(const struct kunit_suite_set *suite_set,
const char *filter_glob,
diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c
index 22d4ee86dbed..3f7f967e3688 100644
--- a/lib/kunit/executor_test.c
+++ b/lib/kunit/executor_test.c
@@ -129,7 +129,7 @@ static void parse_filter_attr_test(struct kunit *test)
GFP_KERNEL);
for (j = 0; j < filter_count; j++) {
parsed_filters[j] = kunit_next_attr_filter(&filter, &err);
- KUNIT_ASSERT_EQ_MSG(test, err, 0, "failed to parse filter '%s'", filters[j]);
+ KUNIT_ASSERT_EQ_MSG(test, err, 0, "failed to parse filter from '%s'", filters);
}
KUNIT_EXPECT_STREQ(test, kunit_attr_filter_name(parsed_filters[0]), "speed");
diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c
index c4259d910356..f7980ef236a3 100644
--- a/lib/kunit/kunit-test.c
+++ b/lib/kunit/kunit-test.c
@@ -720,7 +720,7 @@ static void kunit_device_cleanup_test(struct kunit *test)
long action_was_run = 0;
test_device = kunit_device_register(test, "my_device");
- KUNIT_ASSERT_NOT_NULL(test, test_device);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, test_device);
/* Add an action to verify cleanup. */
devm_add_action(test_device, test_dev_action, &action_was_run);
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index f95d2093a0aa..1d1475578515 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -17,6 +17,7 @@
#include <linux/panic.h>
#include <linux/sched/debug.h>
#include <linux/sched.h>
+#include <linux/mm.h>
#include "debugfs.h"
#include "device-impl.h"
@@ -801,12 +802,19 @@ static void kunit_module_exit(struct module *mod)
};
const char *action = kunit_action();
+ /*
+ * Check if the start address is a valid virtual address to detect
+ * if the module load sequence has failed and the suite set has not
+ * been initialized and filtered.
+ */
+ if (!suite_set.start || !virt_addr_valid(suite_set.start))
+ return;
+
if (!action)
__kunit_test_suites_exit(mod->kunit_suites,
mod->num_kunit_suites);
- if (suite_set.start)
- kunit_free_suite_set(suite_set);
+ kunit_free_suite_set(suite_set);
}
static int kunit_module_notify(struct notifier_block *nb, unsigned long val,
@@ -816,12 +824,12 @@ static int kunit_module_notify(struct notifier_block *nb, unsigned long val,
switch (val) {
case MODULE_STATE_LIVE:
+ kunit_module_init(mod);
break;
case MODULE_STATE_GOING:
kunit_module_exit(mod);
break;
case MODULE_STATE_COMING:
- kunit_module_init(mod);
break;
case MODULE_STATE_UNFORMED:
break;
@@ -920,6 +928,9 @@ static void __exit kunit_exit(void)
#ifdef CONFIG_MODULES
unregister_module_notifier(&kunit_mod_nb);
#endif
+
+ kunit_bus_shutdown();
+
kunit_debugfs_cleanup();
}
module_exit(kunit_exit);
diff --git a/lib/livepatch/Makefile b/lib/livepatch/Makefile
deleted file mode 100644
index dcc912b3478f..000000000000
--- a/lib/livepatch/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for livepatch test code.
-
-obj-$(CONFIG_TEST_LIVEPATCH) += test_klp_atomic_replace.o \
- test_klp_callbacks_demo.o \
- test_klp_callbacks_demo2.o \
- test_klp_callbacks_busy.o \
- test_klp_callbacks_mod.o \
- test_klp_livepatch.o \
- test_klp_shadow_vars.o \
- test_klp_state.o \
- test_klp_state2.o \
- test_klp_state3.o
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 6f241bb38799..af0970288727 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -4290,6 +4290,56 @@ exists:
}
+/**
+ * mas_alloc_cyclic() - Internal call to find somewhere to store an entry
+ * @mas: The maple state.
+ * @startp: Pointer to ID.
+ * @range_lo: Lower bound of range to search.
+ * @range_hi: Upper bound of range to search.
+ * @entry: The entry to store.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: The GFP_FLAGS to use for allocations.
+ *
+ * Return: 0 if the allocation succeeded without wrapping, 1 if the
+ * allocation succeeded after wrapping, or -EBUSY if there are no
+ * free entries.
+ */
+int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp)
+{
+ unsigned long min = range_lo;
+ int ret = 0;
+
+ range_lo = max(min, *next);
+ ret = mas_empty_area(mas, range_lo, range_hi, 1);
+ if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) {
+ mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED;
+ ret = 1;
+ }
+ if (ret < 0 && range_lo > min) {
+ ret = mas_empty_area(mas, min, range_hi, 1);
+ if (ret == 0)
+ ret = 1;
+ }
+ if (ret < 0)
+ return ret;
+
+ do {
+ mas_insert(mas, entry);
+ } while (mas_nomem(mas, gfp));
+ if (mas_is_err(mas))
+ return xa_err(mas->node);
+
+ *startp = mas->index;
+ *next = *startp + 1;
+ if (*next == 0)
+ mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED;
+
+ return ret;
+}
+EXPORT_SYMBOL(mas_alloc_cyclic);
+
static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index)
{
retry:
@@ -6443,6 +6493,49 @@ unlock:
}
EXPORT_SYMBOL(mtree_alloc_range);
+/**
+ * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree.
+ * @mt: The maple tree.
+ * @startp: Pointer to ID.
+ * @range_lo: Lower bound of range to search.
+ * @range_hi: Upper bound of range to search.
+ * @entry: The entry to store.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: The GFP_FLAGS to use for allocations.
+ *
+ * Finds an empty entry in @mt after @next, stores the new index into
+ * the @id pointer, stores the entry at that index, then updates @next.
+ *
+ * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag.
+ *
+ * Context: Any context. Takes and releases the mt.lock. May sleep if
+ * the @gfp flags permit.
+ *
+ * Return: 0 if the allocation succeeded without wrapping, 1 if the
+ * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no
+ * free entries.
+ */
+int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp)
+{
+ int ret;
+
+ MA_STATE(mas, mt, 0, 0);
+
+ if (!mt_is_alloc(mt))
+ return -EINVAL;
+ if (WARN_ON_ONCE(mt_is_reserved(entry)))
+ return -EINVAL;
+ mtree_lock(mt);
+ ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi,
+ next, gfp);
+ mtree_unlock(mt);
+ return ret;
+}
+EXPORT_SYMBOL(mtree_alloc_cyclic);
+
int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
void *entry, unsigned long size, unsigned long min,
unsigned long max, gfp_t gfp)
diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c
index 440aee705ccc..30e00ef0bf2e 100644
--- a/lib/memcpy_kunit.c
+++ b/lib/memcpy_kunit.c
@@ -32,7 +32,7 @@ struct some_bytes {
BUILD_BUG_ON(sizeof(instance.data) != 32); \
for (size_t i = 0; i < sizeof(instance.data); i++) { \
KUNIT_ASSERT_EQ_MSG(test, instance.data[i], v, \
- "line %d: '%s' not initialized to 0x%02x @ %d (saw 0x%02x)\n", \
+ "line %d: '%s' not initialized to 0x%02x @ %zu (saw 0x%02x)\n", \
__LINE__, #instance, v, i, instance.data[i]); \
} \
} while (0)
@@ -41,7 +41,7 @@ struct some_bytes {
BUILD_BUG_ON(sizeof(one) != sizeof(two)); \
for (size_t i = 0; i < sizeof(one); i++) { \
KUNIT_EXPECT_EQ_MSG(test, one.data[i], two.data[i], \
- "line %d: %s.data[%d] (0x%02x) != %s.data[%d] (0x%02x)\n", \
+ "line %d: %s.data[%zu] (0x%02x) != %s.data[%zu] (0x%02x)\n", \
__LINE__, #one, i, one.data[i], #two, i, two.data[i]); \
} \
kunit_info(test, "ok: " TEST_OP "() " name "\n"); \
diff --git a/lib/nlattr.c b/lib/nlattr.c
index ed2ab43e1b22..be9c576b6e2d 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = {
[NLA_S16] = sizeof(s16),
[NLA_S32] = sizeof(s32),
[NLA_S64] = sizeof(s64),
+ [NLA_BE16] = sizeof(__be16),
+ [NLA_BE32] = sizeof(__be32),
};
static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
@@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
[NLA_S16] = sizeof(s16),
[NLA_S32] = sizeof(s32),
[NLA_S64] = sizeof(s64),
+ [NLA_BE16] = sizeof(__be16),
+ [NLA_BE32] = sizeof(__be32),
};
/*
diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c
index c527f6b75789..65e8a72a83bf 100644
--- a/lib/overflow_kunit.c
+++ b/lib/overflow_kunit.c
@@ -258,25 +258,84 @@ DEFINE_TEST_ARRAY(s64) = {
\
_of = check_ ## op ## _overflow(a, b, &_r); \
KUNIT_EXPECT_EQ_MSG(test, _of, of, \
- "expected "fmt" "sym" "fmt" to%s overflow (type %s)\n", \
+ "expected check "fmt" "sym" "fmt" to%s overflow (type %s)\n", \
a, b, of ? "" : " not", #t); \
KUNIT_EXPECT_EQ_MSG(test, _r, r, \
- "expected "fmt" "sym" "fmt" == "fmt", got "fmt" (type %s)\n", \
+ "expected check "fmt" "sym" "fmt" == "fmt", got "fmt" (type %s)\n", \
a, b, r, _r, #t); \
/* Check for internal macro side-effects. */ \
_of = check_ ## op ## _overflow(_a_orig++, _b_orig++, &_r); \
- KUNIT_EXPECT_EQ_MSG(test, _a_orig, _a_bump, "Unexpected " #op " macro side-effect!\n"); \
- KUNIT_EXPECT_EQ_MSG(test, _b_orig, _b_bump, "Unexpected " #op " macro side-effect!\n"); \
+ KUNIT_EXPECT_EQ_MSG(test, _a_orig, _a_bump, \
+ "Unexpected check " #op " macro side-effect!\n"); \
+ KUNIT_EXPECT_EQ_MSG(test, _b_orig, _b_bump, \
+ "Unexpected check " #op " macro side-effect!\n"); \
+ \
+ _r = wrapping_ ## op(t, a, b); \
+ KUNIT_EXPECT_TRUE_MSG(test, _r == r, \
+ "expected wrap "fmt" "sym" "fmt" == "fmt", got "fmt" (type %s)\n", \
+ a, b, r, _r, #t); \
+ /* Check for internal macro side-effects. */ \
+ _a_orig = a; \
+ _b_orig = b; \
+ _r = wrapping_ ## op(t, _a_orig++, _b_orig++); \
+ KUNIT_EXPECT_EQ_MSG(test, _a_orig, _a_bump, \
+ "Unexpected wrap " #op " macro side-effect!\n"); \
+ KUNIT_EXPECT_EQ_MSG(test, _b_orig, _b_bump, \
+ "Unexpected wrap " #op " macro side-effect!\n"); \
+} while (0)
+
+static int global_counter;
+static void bump_counter(void)
+{
+ global_counter++;
+}
+
+static int get_index(void)
+{
+ volatile int index = 0;
+ bump_counter();
+ return index;
+}
+
+#define check_self_op(fmt, op, sym, a, b) do { \
+ typeof(a + 0) _a = a; \
+ typeof(b + 0) _b = b; \
+ typeof(a + 0) _a_sym = a; \
+ typeof(a + 0) _a_orig[1] = { a }; \
+ typeof(b + 0) _b_orig = b; \
+ typeof(b + 0) _b_bump = b + 1; \
+ typeof(a + 0) _r; \
+ \
+ _a_sym sym _b; \
+ _r = wrapping_ ## op(_a, _b); \
+ KUNIT_EXPECT_TRUE_MSG(test, _r == _a_sym, \
+ "expected "fmt" "#op" "fmt" == "fmt", got "fmt"\n", \
+ a, b, _a_sym, _r); \
+ KUNIT_EXPECT_TRUE_MSG(test, _a == _a_sym, \
+ "expected "fmt" "#op" "fmt" == "fmt", got "fmt"\n", \
+ a, b, _a_sym, _a); \
+ /* Check for internal macro side-effects. */ \
+ global_counter = 0; \
+ wrapping_ ## op(_a_orig[get_index()], _b_orig++); \
+ KUNIT_EXPECT_EQ_MSG(test, global_counter, 1, \
+ "Unexpected wrapping_" #op " macro side-effect on arg1!\n"); \
+ KUNIT_EXPECT_EQ_MSG(test, _b_orig, _b_bump, \
+ "Unexpected wrapping_" #op " macro side-effect on arg2!\n"); \
} while (0)
#define DEFINE_TEST_FUNC_TYPED(n, t, fmt) \
static void do_test_ ## n(struct kunit *test, const struct test_ ## n *p) \
{ \
+ /* check_{add,sub,mul}_overflow() and wrapping_{add,sub,mul} */ \
check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of); \
check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of); \
check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of); \
check_one_op(t, fmt, mul, "*", p->a, p->b, p->prod, p->p_of); \
check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of); \
+ /* wrapping_assign_{add,sub}() */ \
+ check_self_op(fmt, assign_add, +=, p->a, p->b); \
+ check_self_op(fmt, assign_add, +=, p->b, p->a); \
+ check_self_op(fmt, assign_sub, -=, p->a, p->b); \
} \
\
static void n ## _overflow_test(struct kunit *test) { \
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
index cd0b9e95f499..863e2d320938 100644
--- a/lib/raid6/s390vx.uc
+++ b/lib/raid6/s390vx.uc
@@ -12,15 +12,14 @@
*/
#include <linux/raid/pq.h>
-#include <asm/fpu/api.h>
-#include <asm/vx-insn.h>
+#include <asm/fpu.h>
#define NSIZE 16
-static inline void LOAD_CONST(void)
+static __always_inline void LOAD_CONST(void)
{
- asm volatile("VREPIB %v24,7");
- asm volatile("VREPIB %v25,0x1d");
+ fpu_vrepib(24, 0x07);
+ fpu_vrepib(25, 0x1d);
}
/*
@@ -28,10 +27,7 @@ static inline void LOAD_CONST(void)
* vector register y left by 1 bit and stores the result in
* vector register x.
*/
-static inline void SHLBYTE(int x, int y)
-{
- asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
-}
+#define SHLBYTE(x, y) fpu_vab(x, y, y)
/*
* For each of the 16 bytes in the vector register y the MASK()
@@ -39,49 +35,17 @@ static inline void SHLBYTE(int x, int y)
* or 0x00 if the high bit is 0. The result is stored in vector
* register x.
*/
-static inline void MASK(int x, int y)
-{
- asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y));
-}
-
-static inline void AND(int x, int y, int z)
-{
- asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
-}
-
-static inline void XOR(int x, int y, int z)
-{
- asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
-}
+#define MASK(x, y) fpu_vesravb(x, y, 24)
-static inline void LOAD_DATA(int x, u8 *ptr)
-{
- typedef struct { u8 _[16 * $#]; } addrtype;
- register addrtype *__ptr asm("1") = (addrtype *) ptr;
-
- asm volatile ("VLM %2,%3,0,%1"
- : : "m" (*__ptr), "a" (__ptr), "i" (x),
- "i" (x + $# - 1));
-}
-
-static inline void STORE_DATA(int x, u8 *ptr)
-{
- typedef struct { u8 _[16 * $#]; } addrtype;
- register addrtype *__ptr asm("1") = (addrtype *) ptr;
-
- asm volatile ("VSTM %2,%3,0,1"
- : "=m" (*__ptr) : "a" (__ptr), "i" (x),
- "i" (x + $# - 1));
-}
-
-static inline void COPY_VEC(int x, int y)
-{
- asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
-}
+#define AND(x, y, z) fpu_vn(x, y, z)
+#define XOR(x, y, z) fpu_vx(x, y, z)
+#define LOAD_DATA(x, ptr) fpu_vlm(x, x + $# - 1, ptr)
+#define STORE_DATA(x, ptr) fpu_vstm(x, x + $# - 1, ptr)
+#define COPY_VEC(x, y) fpu_vlr(x, y)
static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
- struct kernel_fpu vxstate;
+ DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
u8 **dptr, *p, *q;
int d, z, z0;
@@ -114,7 +78,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
{
- struct kernel_fpu vxstate;
+ DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
u8 **dptr, *p, *q;
int d, z, z0;
diff --git a/lib/seq_buf.c b/lib/seq_buf.c
index 010c730ca7fc..f3f3436d60a9 100644
--- a/lib/seq_buf.c
+++ b/lib/seq_buf.c
@@ -13,16 +13,26 @@
* seq_buf_init() more than once to reset the seq_buf to start
* from scratch.
*/
-#include <linux/uaccess.h>
-#include <linux/seq_file.h>
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/hex.h>
+#include <linux/minmax.h>
+#include <linux/printk.h>
#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/sprintf.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
/**
* seq_buf_can_fit - can the new data fit in the current buffer?
* @s: the seq_buf descriptor
* @len: The length to see if it can fit in the current buffer
*
- * Returns true if there's enough unused space in the seq_buf buffer
+ * Returns: true if there's enough unused space in the seq_buf buffer
* to fit the amount of new data according to @len.
*/
static bool seq_buf_can_fit(struct seq_buf *s, size_t len)
@@ -35,7 +45,7 @@ static bool seq_buf_can_fit(struct seq_buf *s, size_t len)
* @m: the seq_file descriptor that is the destination
* @s: the seq_buf descriptor that is the source.
*
- * Returns zero on success, non zero otherwise
+ * Returns: zero on success, non-zero otherwise.
*/
int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s)
{
@@ -50,9 +60,9 @@ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s)
* @fmt: printf format string
* @args: va_list of arguments from a printf() type function
*
- * Writes a vnprintf() format into the sequencce buffer.
+ * Writes a vnprintf() format into the sequence buffer.
*
- * Returns zero on success, -1 on overflow.
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
{
@@ -78,7 +88,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
*
* Writes a printf() format into the sequence buffer.
*
- * Returns zero on success, -1 on overflow.
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
{
@@ -94,12 +104,12 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
EXPORT_SYMBOL_GPL(seq_buf_printf);
/**
- * seq_buf_do_printk - printk seq_buf line by line
+ * seq_buf_do_printk - printk() seq_buf line by line
* @s: seq_buf descriptor
* @lvl: printk level
*
* printk()-s a multi-line sequential buffer line by line. The function
- * makes sure that the buffer in @s is nul terminated and safe to read
+ * makes sure that the buffer in @s is NUL-terminated and safe to read
* as a string.
*/
void seq_buf_do_printk(struct seq_buf *s, const char *lvl)
@@ -139,7 +149,7 @@ EXPORT_SYMBOL_GPL(seq_buf_do_printk);
* This function will take the format and the binary array and finish
* the conversion into the ASCII string within the buffer.
*
- * Returns zero on success, -1 on overflow.
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary)
{
@@ -167,7 +177,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary)
*
* Copy a simple string into the sequence buffer.
*
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_puts(struct seq_buf *s, const char *str)
{
@@ -196,7 +206,7 @@ EXPORT_SYMBOL_GPL(seq_buf_puts);
*
* Copy a single character into the sequence buffer.
*
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_putc(struct seq_buf *s, unsigned char c)
{
@@ -212,7 +222,7 @@ int seq_buf_putc(struct seq_buf *s, unsigned char c)
EXPORT_SYMBOL_GPL(seq_buf_putc);
/**
- * seq_buf_putmem - write raw data into the sequenc buffer
+ * seq_buf_putmem - write raw data into the sequence buffer
* @s: seq_buf descriptor
* @mem: The raw memory to copy into the buffer
* @len: The length of the raw memory to copy (in bytes)
@@ -221,7 +231,7 @@ EXPORT_SYMBOL_GPL(seq_buf_putc);
* buffer and a strcpy() would not work. Using this function allows
* for such cases.
*
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len)
{
@@ -249,7 +259,7 @@ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len)
* raw memory into the buffer it writes its ASCII representation of it
* in hex characters.
*
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
unsigned int len)
@@ -297,7 +307,7 @@ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
*
* Write a path name into the sequence buffer.
*
- * Returns the number of written bytes on success, -1 on overflow
+ * Returns: the number of written bytes on success, -1 on overflow.
*/
int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc)
{
@@ -332,6 +342,7 @@ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc)
* or until it reaches the end of the content in the buffer (@s->len),
* whichever comes first.
*
+ * Returns:
* On success, it returns a positive number of the number of bytes
* it copied.
*
@@ -382,11 +393,11 @@ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, size_t start, int cnt)
* linebuf size is maximal length for one line.
* 32 * 3 - maximum bytes per line, each printed into 2 chars + 1 for
* separating space
- * 2 - spaces separating hex dump and ascii representation
- * 32 - ascii representation
+ * 2 - spaces separating hex dump and ASCII representation
+ * 32 - ASCII representation
* 1 - terminating '\0'
*
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, int prefix_type,
int rowsize, int groupsize,
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index a0be5d05c7f0..4a7055a63d9f 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -14,6 +14,7 @@
#define pr_fmt(fmt) "stackdepot: " fmt
+#include <linux/debugfs.h>
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <linux/kernel.h>
@@ -21,8 +22,10 @@
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
-#include <linux/percpu.h>
+#include <linux/poison.h>
#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -41,17 +44,7 @@
#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN)
#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
STACK_DEPOT_EXTRA_BITS)
-#if IS_ENABLED(CONFIG_KMSAN) && CONFIG_STACKDEPOT_MAX_FRAMES >= 32
-/*
- * KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack
- * traces. As KMSAN does not support evicting stack traces from the stack
- * depot, the stack depot capacity might be reached quickly with large stack
- * records. Adjust the maximum number of stack depot pools for this case.
- */
-#define DEPOT_POOLS_CAP (8192 * (CONFIG_STACKDEPOT_MAX_FRAMES / 16))
-#else
#define DEPOT_POOLS_CAP 8192
-#endif
#define DEPOT_MAX_POOLS \
(((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \
(1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP)
@@ -67,17 +60,30 @@ union handle_parts {
};
struct stack_record {
- struct list_head list; /* Links in hash table or freelist */
+ struct list_head hash_list; /* Links in the hash table */
u32 hash; /* Hash in hash table */
u32 size; /* Number of stored frames */
- union handle_parts handle;
+ union handle_parts handle; /* Constant after initialization */
refcount_t count;
- unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */
+ union {
+ unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */
+ struct {
+ /*
+ * An important invariant of the implementation is to
+ * only place a stack record onto the freelist iff its
+ * refcount is zero. Because stack records with a zero
+ * refcount are never considered as valid, it is safe to
+ * union @entries and freelist management state below.
+ * Conversely, as soon as an entry is off the freelist
+ * and its refcount becomes non-zero, the below must not
+ * be accessed until being placed back on the freelist.
+ */
+ struct list_head free_list; /* Links in the freelist */
+ unsigned long rcu_state; /* RCU cookie */
+ };
+ };
};
-#define DEPOT_STACK_RECORD_SIZE \
- ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN)
-
static bool stack_depot_disabled;
static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
static bool __stack_depot_early_init_passed __initdata;
@@ -103,17 +109,33 @@ static void *stack_pools[DEPOT_MAX_POOLS];
static void *new_pool;
/* Number of pools in stack_pools. */
static int pools_num;
+/* Offset to the unused space in the currently used pool. */
+static size_t pool_offset = DEPOT_POOL_SIZE;
/* Freelist of stack records within stack_pools. */
static LIST_HEAD(free_stacks);
-/*
- * Stack depot tries to keep an extra pool allocated even before it runs out
- * of space in the currently used pool. This flag marks whether this extra pool
- * needs to be allocated. It has the value 0 when either an extra pool is not
- * yet allocated or if the limit on the number of pools is reached.
- */
-static bool new_pool_required = true;
-/* Lock that protects the variables above. */
-static DEFINE_RWLOCK(pool_rwlock);
+/* The lock must be held when performing pool or freelist modifications. */
+static DEFINE_RAW_SPINLOCK(pool_lock);
+
+/* Statistics counters for debugfs. */
+enum depot_counter_id {
+ DEPOT_COUNTER_REFD_ALLOCS,
+ DEPOT_COUNTER_REFD_FREES,
+ DEPOT_COUNTER_REFD_INUSE,
+ DEPOT_COUNTER_FREELIST_SIZE,
+ DEPOT_COUNTER_PERSIST_COUNT,
+ DEPOT_COUNTER_PERSIST_BYTES,
+ DEPOT_COUNTER_COUNT,
+};
+static long counters[DEPOT_COUNTER_COUNT];
+static const char *const counter_names[] = {
+ [DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations",
+ [DEPOT_COUNTER_REFD_FREES] = "refcounted_frees",
+ [DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use",
+ [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size",
+ [DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count",
+ [DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes",
+};
+static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT);
static int __init disable_stack_depot(char *str)
{
@@ -258,174 +280,273 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(stack_depot_init);
-/* Initializes a stack depol pool. */
-static void depot_init_pool(void *pool)
+/*
+ * Initializes new stack pool, and updates the list of pools.
+ */
+static bool depot_init_pool(void **prealloc)
{
- int offset;
+ lockdep_assert_held(&pool_lock);
- lockdep_assert_held_write(&pool_rwlock);
+ if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
+ /* Bail out if we reached the pool limit. */
+ WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */
+ WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */
+ WARN_ONCE(1, "Stack depot reached limit capacity");
+ return false;
+ }
- WARN_ON(!list_empty(&free_stacks));
+ if (!new_pool && *prealloc) {
+ /* We have preallocated memory, use it. */
+ WRITE_ONCE(new_pool, *prealloc);
+ *prealloc = NULL;
+ }
- /* Initialize handles and link stack records into the freelist. */
- for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE;
- offset += DEPOT_STACK_RECORD_SIZE) {
- struct stack_record *stack = pool + offset;
+ if (!new_pool)
+ return false; /* new_pool and *prealloc are NULL */
- stack->handle.pool_index = pools_num;
- stack->handle.offset = offset >> DEPOT_STACK_ALIGN;
- stack->handle.extra = 0;
+ /* Save reference to the pool to be used by depot_fetch_stack(). */
+ stack_pools[pools_num] = new_pool;
- list_add(&stack->list, &free_stacks);
- }
+ /*
+ * Stack depot tries to keep an extra pool allocated even before it runs
+ * out of space in the currently used pool.
+ *
+ * To indicate that a new preallocation is needed new_pool is reset to
+ * NULL; do not reset to NULL if we have reached the maximum number of
+ * pools.
+ */
+ if (pools_num < DEPOT_MAX_POOLS)
+ WRITE_ONCE(new_pool, NULL);
+ else
+ WRITE_ONCE(new_pool, STACK_DEPOT_POISON);
- /* Save reference to the pool to be used by depot_fetch_stack(). */
- stack_pools[pools_num] = pool;
- pools_num++;
+ /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */
+ WRITE_ONCE(pools_num, pools_num + 1);
+ ASSERT_EXCLUSIVE_WRITER(pools_num);
+
+ pool_offset = 0;
+
+ return true;
}
/* Keeps the preallocated memory to be used for a new stack depot pool. */
static void depot_keep_new_pool(void **prealloc)
{
- lockdep_assert_held_write(&pool_rwlock);
+ lockdep_assert_held(&pool_lock);
/*
* If a new pool is already saved or the maximum number of
* pools is reached, do not use the preallocated memory.
*/
- if (!new_pool_required)
+ if (new_pool)
return;
- /*
- * Use the preallocated memory for the new pool
- * as long as we do not exceed the maximum number of pools.
- */
- if (pools_num < DEPOT_MAX_POOLS) {
- new_pool = *prealloc;
- *prealloc = NULL;
+ WRITE_ONCE(new_pool, *prealloc);
+ *prealloc = NULL;
+}
+
+/*
+ * Try to initialize a new stack record from the current pool, a cached pool, or
+ * the current pre-allocation.
+ */
+static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size)
+{
+ struct stack_record *stack;
+ void *current_pool;
+ u32 pool_index;
+
+ lockdep_assert_held(&pool_lock);
+
+ if (pool_offset + size > DEPOT_POOL_SIZE) {
+ if (!depot_init_pool(prealloc))
+ return NULL;
}
- /*
- * At this point, either a new pool is kept or the maximum
- * number of pools is reached. In either case, take note that
- * keeping another pool is not required.
- */
- new_pool_required = false;
+ if (WARN_ON_ONCE(pools_num < 1))
+ return NULL;
+ pool_index = pools_num - 1;
+ current_pool = stack_pools[pool_index];
+ if (WARN_ON_ONCE(!current_pool))
+ return NULL;
+
+ stack = current_pool + pool_offset;
+
+ /* Pre-initialize handle once. */
+ stack->handle.pool_index = pool_index;
+ stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
+ stack->handle.extra = 0;
+ INIT_LIST_HEAD(&stack->hash_list);
+
+ pool_offset += size;
+
+ return stack;
}
-/* Updates references to the current and the next stack depot pools. */
-static bool depot_update_pools(void **prealloc)
+/* Try to find next free usable entry from the freelist. */
+static struct stack_record *depot_pop_free(void)
{
- lockdep_assert_held_write(&pool_rwlock);
+ struct stack_record *stack;
- /* Check if we still have objects in the freelist. */
- if (!list_empty(&free_stacks))
- goto out_keep_prealloc;
+ lockdep_assert_held(&pool_lock);
- /* Check if we have a new pool saved and use it. */
- if (new_pool) {
- depot_init_pool(new_pool);
- new_pool = NULL;
+ if (list_empty(&free_stacks))
+ return NULL;
- /* Take note that we might need a new new_pool. */
- if (pools_num < DEPOT_MAX_POOLS)
- new_pool_required = true;
+ /*
+ * We maintain the invariant that the elements in front are least
+ * recently used, and are therefore more likely to be associated with an
+ * RCU grace period in the past. Consequently it is sufficient to only
+ * check the first entry.
+ */
+ stack = list_first_entry(&free_stacks, struct stack_record, free_list);
+ if (!poll_state_synchronize_rcu(stack->rcu_state))
+ return NULL;
- /* Try keeping the preallocated memory for new_pool. */
- goto out_keep_prealloc;
- }
+ list_del(&stack->free_list);
+ counters[DEPOT_COUNTER_FREELIST_SIZE]--;
- /* Bail out if we reached the pool limit. */
- if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
- WARN_ONCE(1, "Stack depot reached limit capacity");
- return false;
- }
+ return stack;
+}
- /* Check if we have preallocated memory and use it. */
- if (*prealloc) {
- depot_init_pool(*prealloc);
- *prealloc = NULL;
- return true;
- }
+static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries)
+{
+ const size_t used = flex_array_size(s, entries, nr_entries);
+ const size_t unused = sizeof(s->entries) - used;
- return false;
+ WARN_ON_ONCE(sizeof(s->entries) < used);
-out_keep_prealloc:
- /* Keep the preallocated memory for a new pool if required. */
- if (*prealloc)
- depot_keep_new_pool(prealloc);
- return true;
+ return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN);
}
/* Allocates a new stack in a stack depot pool. */
static struct stack_record *
-depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
+depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc)
{
- struct stack_record *stack;
+ struct stack_record *stack = NULL;
+ size_t record_size;
- lockdep_assert_held_write(&pool_rwlock);
+ lockdep_assert_held(&pool_lock);
- /* Update current and new pools if required and possible. */
- if (!depot_update_pools(prealloc))
+ /* This should already be checked by public API entry points. */
+ if (WARN_ON_ONCE(!nr_entries))
return NULL;
- /* Check if we have a stack record to save the stack trace. */
- if (list_empty(&free_stacks))
- return NULL;
+ /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
+ if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES)
+ nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES;
- /* Get and unlink the first entry from the freelist. */
- stack = list_first_entry(&free_stacks, struct stack_record, list);
- list_del(&stack->list);
+ if (flags & STACK_DEPOT_FLAG_GET) {
+ /*
+ * Evictable entries have to allocate the max. size so they may
+ * safely be re-used by differently sized allocations.
+ */
+ record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES);
+ stack = depot_pop_free();
+ } else {
+ record_size = depot_stack_record_size(stack, nr_entries);
+ }
- /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
- if (size > CONFIG_STACKDEPOT_MAX_FRAMES)
- size = CONFIG_STACKDEPOT_MAX_FRAMES;
+ if (!stack) {
+ stack = depot_pop_free_pool(prealloc, record_size);
+ if (!stack)
+ return NULL;
+ }
/* Save the stack trace. */
stack->hash = hash;
- stack->size = size;
- /* stack->handle is already filled in by depot_init_pool(). */
- refcount_set(&stack->count, 1);
- memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
+ stack->size = nr_entries;
+ /* stack->handle is already filled in by depot_pop_free_pool(). */
+ memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries));
+
+ if (flags & STACK_DEPOT_FLAG_GET) {
+ refcount_set(&stack->count, 1);
+ counters[DEPOT_COUNTER_REFD_ALLOCS]++;
+ counters[DEPOT_COUNTER_REFD_INUSE]++;
+ } else {
+ /* Warn on attempts to switch to refcounting this entry. */
+ refcount_set(&stack->count, REFCOUNT_SATURATED);
+ counters[DEPOT_COUNTER_PERSIST_COUNT]++;
+ counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size;
+ }
/*
* Let KMSAN know the stored stack record is initialized. This shall
* prevent false positive reports if instrumented code accesses it.
*/
- kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE);
+ kmsan_unpoison_memory(stack, record_size);
return stack;
}
static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
{
+ const int pools_num_cached = READ_ONCE(pools_num);
union handle_parts parts = { .handle = handle };
void *pool;
size_t offset = parts.offset << DEPOT_STACK_ALIGN;
struct stack_record *stack;
- lockdep_assert_held(&pool_rwlock);
+ lockdep_assert_not_held(&pool_lock);
- if (parts.pool_index > pools_num) {
+ if (parts.pool_index > pools_num_cached) {
WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n",
- parts.pool_index, pools_num, handle);
+ parts.pool_index, pools_num_cached, handle);
return NULL;
}
pool = stack_pools[parts.pool_index];
- if (!pool)
+ if (WARN_ON(!pool))
return NULL;
stack = pool + offset;
+ if (WARN_ON(!refcount_read(&stack->count)))
+ return NULL;
+
return stack;
}
/* Links stack into the freelist. */
static void depot_free_stack(struct stack_record *stack)
{
- lockdep_assert_held_write(&pool_rwlock);
+ unsigned long flags;
+
+ lockdep_assert_not_held(&pool_lock);
+
+ raw_spin_lock_irqsave(&pool_lock, flags);
+ printk_deferred_enter();
- list_add(&stack->list, &free_stacks);
+ /*
+ * Remove the entry from the hash list. Concurrent list traversal may
+ * still observe the entry, but since the refcount is zero, this entry
+ * will no longer be considered as valid.
+ */
+ list_del_rcu(&stack->hash_list);
+
+ /*
+ * Due to being used from constrained contexts such as the allocators,
+ * NMI, or even RCU itself, stack depot cannot rely on primitives that
+ * would sleep (such as synchronize_rcu()) or recursively call into
+ * stack depot again (such as call_rcu()).
+ *
+ * Instead, get an RCU cookie, so that we can ensure this entry isn't
+ * moved onto another list until the next grace period, and concurrent
+ * RCU list traversal remains safe.
+ */
+ stack->rcu_state = get_state_synchronize_rcu();
+
+ /*
+ * Add the entry to the freelist tail, so that older entries are
+ * considered first - their RCU cookie is more likely to no longer be
+ * associated with the current grace period.
+ */
+ list_add_tail(&stack->free_list, &free_stacks);
+
+ counters[DEPOT_COUNTER_FREELIST_SIZE]++;
+ counters[DEPOT_COUNTER_REFD_FREES]++;
+ counters[DEPOT_COUNTER_REFD_INUSE]--;
+
+ printk_deferred_exit();
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
}
/* Calculates the hash for a stack. */
@@ -453,22 +574,52 @@ int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
/* Finds a stack in a bucket of the hash table. */
static inline struct stack_record *find_stack(struct list_head *bucket,
- unsigned long *entries, int size,
- u32 hash)
+ unsigned long *entries, int size,
+ u32 hash, depot_flags_t flags)
{
- struct list_head *pos;
- struct stack_record *found;
+ struct stack_record *stack, *ret = NULL;
- lockdep_assert_held(&pool_rwlock);
+ /*
+ * Stack depot may be used from instrumentation that instruments RCU or
+ * tracing itself; use variant that does not call into RCU and cannot be
+ * traced.
+ *
+ * Note: Such use cases must take care when using refcounting to evict
+ * unused entries, because the stack record free-then-reuse code paths
+ * do call into RCU.
+ */
+ rcu_read_lock_sched_notrace();
+
+ list_for_each_entry_rcu(stack, bucket, hash_list) {
+ if (stack->hash != hash || stack->size != size)
+ continue;
+
+ /*
+ * This may race with depot_free_stack() accessing the freelist
+ * management state unioned with @entries. The refcount is zero
+ * in that case and the below refcount_inc_not_zero() will fail.
+ */
+ if (data_race(stackdepot_memcmp(entries, stack->entries, size)))
+ continue;
+
+ /*
+ * Try to increment refcount. If this succeeds, the stack record
+ * is valid and has not yet been freed.
+ *
+ * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior
+ * to then call stack_depot_put() later, and we can assume that
+ * a stack record is never placed back on the freelist.
+ */
+ if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count))
+ continue;
- list_for_each(pos, bucket) {
- found = list_entry(pos, struct stack_record, list);
- if (found->hash == hash &&
- found->size == size &&
- !stackdepot_memcmp(entries, found->entries, size))
- return found;
+ ret = stack;
+ break;
}
- return NULL;
+
+ rcu_read_unlock_sched_notrace();
+
+ return ret;
}
depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
@@ -482,7 +633,6 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
struct page *page = NULL;
void *prealloc = NULL;
bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC;
- bool need_alloc = false;
unsigned long flags;
u32 hash;
@@ -505,31 +655,16 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
hash = hash_stack(entries, nr_entries);
bucket = &stack_table[hash & stack_hash_mask];
- read_lock_irqsave(&pool_rwlock, flags);
- printk_deferred_enter();
-
- /* Fast path: look the stack trace up without full locking. */
- found = find_stack(bucket, entries, nr_entries, hash);
- if (found) {
- if (depot_flags & STACK_DEPOT_FLAG_GET)
- refcount_inc(&found->count);
- printk_deferred_exit();
- read_unlock_irqrestore(&pool_rwlock, flags);
+ /* Fast path: look the stack trace up without locking. */
+ found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
+ if (found)
goto exit;
- }
-
- /* Take note if another stack pool needs to be allocated. */
- if (new_pool_required)
- need_alloc = true;
-
- printk_deferred_exit();
- read_unlock_irqrestore(&pool_rwlock, flags);
/*
* Allocate memory for a new pool if required now:
* we won't be able to do that under the lock.
*/
- if (unlikely(can_alloc && need_alloc)) {
+ if (unlikely(can_alloc && !READ_ONCE(new_pool))) {
/*
* Zero out zone modifiers, as we don't have specific zone
* requirements. Keep the flags related to allocation in atomic
@@ -543,31 +678,36 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
prealloc = page_address(page);
}
- write_lock_irqsave(&pool_rwlock, flags);
+ raw_spin_lock_irqsave(&pool_lock, flags);
printk_deferred_enter();
- found = find_stack(bucket, entries, nr_entries, hash);
+ /* Try to find again, to avoid concurrently inserting duplicates. */
+ found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
if (!found) {
struct stack_record *new =
- depot_alloc_stack(entries, nr_entries, hash, &prealloc);
+ depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc);
if (new) {
- list_add(&new->list, bucket);
+ /*
+ * This releases the stack record into the bucket and
+ * makes it visible to readers in find_stack().
+ */
+ list_add_rcu(&new->hash_list, bucket);
found = new;
}
- } else {
- if (depot_flags & STACK_DEPOT_FLAG_GET)
- refcount_inc(&found->count);
+ }
+
+ if (prealloc) {
/*
- * Stack depot already contains this stack trace, but let's
- * keep the preallocated memory for future.
+ * Either stack depot already contains this stack trace, or
+ * depot_alloc_stack() did not consume the preallocated memory.
+ * Try to keep the preallocated memory for future.
*/
- if (prealloc)
- depot_keep_new_pool(&prealloc);
+ depot_keep_new_pool(&prealloc);
}
printk_deferred_exit();
- write_unlock_irqrestore(&pool_rwlock, flags);
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
exit:
if (prealloc) {
/* Stack depot didn't use this memory, free it. */
@@ -592,7 +732,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
unsigned long **entries)
{
struct stack_record *stack;
- unsigned long flags;
*entries = NULL;
/*
@@ -604,13 +743,13 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
if (!handle || stack_depot_disabled)
return 0;
- read_lock_irqsave(&pool_rwlock, flags);
- printk_deferred_enter();
-
stack = depot_fetch_stack(handle);
-
- printk_deferred_exit();
- read_unlock_irqrestore(&pool_rwlock, flags);
+ /*
+ * Should never be NULL, otherwise this is a use-after-put (or just a
+ * corrupt handle).
+ */
+ if (WARN(!stack, "corrupt handle or use after stack_depot_put()"))
+ return 0;
*entries = stack->entries;
return stack->size;
@@ -620,29 +759,20 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
void stack_depot_put(depot_stack_handle_t handle)
{
struct stack_record *stack;
- unsigned long flags;
if (!handle || stack_depot_disabled)
return;
- write_lock_irqsave(&pool_rwlock, flags);
- printk_deferred_enter();
-
stack = depot_fetch_stack(handle);
- if (WARN_ON(!stack))
- goto out;
-
- if (refcount_dec_and_test(&stack->count)) {
- /* Unlink stack from the hash table. */
- list_del(&stack->list);
+ /*
+ * Should always be able to find the stack record, otherwise this is an
+ * unbalanced put attempt (or corrupt handle).
+ */
+ if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()"))
+ return;
- /* Free stack. */
+ if (refcount_dec_and_test(&stack->count))
depot_free_stack(stack);
- }
-
-out:
- printk_deferred_exit();
- write_unlock_irqrestore(&pool_rwlock, flags);
}
EXPORT_SYMBOL_GPL(stack_depot_put);
@@ -690,3 +820,30 @@ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle)
return parts.extra;
}
EXPORT_SYMBOL(stack_depot_get_extra_bits);
+
+static int stats_show(struct seq_file *seq, void *v)
+{
+ /*
+ * data race ok: These are just statistics counters, and approximate
+ * statistics are ok for debugging.
+ */
+ seq_printf(seq, "pools: %d\n", data_race(pools_num));
+ for (int i = 0; i < DEPOT_COUNTER_COUNT; i++)
+ seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i]));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(stats);
+
+static int depot_debugfs_init(void)
+{
+ struct dentry *dir;
+
+ if (stack_depot_disabled)
+ return 0;
+
+ dir = debugfs_create_dir("stackdepot", NULL);
+ debugfs_create_file("stats", 0444, dir, NULL, &stats_fops);
+ return 0;
+}
+late_initcall(depot_debugfs_init);
diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c
index 05947a2feb93..dc3c68f46f0a 100644
--- a/lib/stackinit_kunit.c
+++ b/lib/stackinit_kunit.c
@@ -63,7 +63,16 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
#define FETCH_ARG_STRING(var) var
#define FETCH_ARG_STRUCT(var) &var
+/*
+ * On m68k, if the leaf function test variable is longer than 8 bytes,
+ * the start of the stack frame moves. 8 is sufficiently large to
+ * test m68k char arrays, but leave it at 16 for other architectures.
+ */
+#ifdef CONFIG_M68K
+#define FILL_SIZE_STRING 8
+#else
#define FILL_SIZE_STRING 16
+#endif
#define INIT_CLONE_SCALAR /**/
#define INIT_CLONE_STRING [FILL_SIZE_STRING]
@@ -165,19 +174,23 @@ static noinline void test_ ## name (struct kunit *test) \
/* Verify all bytes overwritten with 0xFF. */ \
for (sum = 0, i = 0; i < target_size; i++) \
sum += (check_buf[i] != 0xFF); \
- KUNIT_ASSERT_EQ_MSG(test, sum, 0, \
- "leaf fill was not 0xFF!?\n"); \
/* Clear entire check buffer for later bit tests. */ \
memset(check_buf, 0x00, sizeof(check_buf)); \
/* Extract stack-defined variable contents. */ \
ignored = leaf_ ##name((unsigned long)&ignored, 0, \
FETCH_ARG_ ## which(zero)); \
+ /* \
+ * Delay the sum test to here to do as little as \
+ * possible between the two leaf function calls. \
+ */ \
+ KUNIT_ASSERT_EQ_MSG(test, sum, 0, \
+ "leaf fill was not 0xFF!?\n"); \
\
/* Validate that compiler lined up fill and target. */ \
KUNIT_ASSERT_TRUE_MSG(test, \
stackinit_range_contains(fill_start, fill_size, \
target_start, target_size), \
- "stack fill missed target!? " \
+ "stackframe was not the same between calls!? " \
"(fill %zu wide, target offset by %d)\n", \
fill_size, \
(int)((ssize_t)(uintptr_t)fill_start - \
diff --git a/lib/string.c b/lib/string.c
index 6891d15ce991..966da44bfc86 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -15,19 +15,20 @@
*/
#define __NO_FORTIFY
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
+#include <linux/bits.h>
#include <linux/bug.h>
+#include <linux/ctype.h>
#include <linux/errno.h>
-#include <linux/slab.h>
+#include <linux/limits.h>
+#include <linux/linkage.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/page.h>
+#include <asm/rwonce.h>
#include <asm/unaligned.h>
-#include <asm/byteorder.h>
#include <asm/word-at-a-time.h>
-#include <asm/page.h>
#ifndef __HAVE_ARCH_STRNCASECMP
/**
@@ -103,8 +104,7 @@ char *strncpy(char *dest, const char *src, size_t count)
EXPORT_SYMBOL(strncpy);
#endif
-#ifndef __HAVE_ARCH_STRSCPY
-ssize_t strscpy(char *dest, const char *src, size_t count)
+ssize_t sized_strscpy(char *dest, const char *src, size_t count)
{
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
size_t max = count;
@@ -170,8 +170,7 @@ ssize_t strscpy(char *dest, const char *src, size_t count)
return -E2BIG;
}
-EXPORT_SYMBOL(strscpy);
-#endif
+EXPORT_SYMBOL(sized_strscpy);
/**
* stpcpy - copy a string from src to dest returning a pointer to the new end
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 7713f73e66b0..69ba49b853c7 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -18,12 +18,14 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/string_helpers.h>
+#include <kunit/test.h>
+#include <kunit/test-bug.h>
/**
* string_get_size - get the size in the specified units
* @size: The size to be converted in blocks
* @blk_size: Size of the block (use 1 for size in bytes)
- * @units: units to use (powers of 1000 or 1024)
+ * @units: Units to use (powers of 1000 or 1024), whether to include space separator
* @buf: buffer to format to
* @len: length of buffer
*
@@ -37,11 +39,12 @@
int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
char *buf, int len)
{
+ enum string_size_units units_base = units & STRING_UNITS_MASK;
static const char *const units_10[] = {
- "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
+ "", "k", "M", "G", "T", "P", "E", "Z", "Y",
};
static const char *const units_2[] = {
- "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
+ "", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi",
};
static const char *const *const units_str[] = {
[STRING_UNITS_10] = units_10,
@@ -66,7 +69,7 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
/* This is Napier's algorithm. Reduce the original block size to
*
- * coefficient * divisor[units]^i
+ * coefficient * divisor[units_base]^i
*
* we do the reduction so both coefficients are just under 32 bits so
* that multiplying them together won't overflow 64 bits and we keep
@@ -76,12 +79,12 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
* precision is in the coefficients.
*/
while (blk_size >> 32) {
- do_div(blk_size, divisor[units]);
+ do_div(blk_size, divisor[units_base]);
i++;
}
while (size >> 32) {
- do_div(size, divisor[units]);
+ do_div(size, divisor[units_base]);
i++;
}
@@ -90,8 +93,8 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
size *= blk_size;
/* and logarithmically reduce it until it's just under the divisor */
- while (size >= divisor[units]) {
- remainder = do_div(size, divisor[units]);
+ while (size >= divisor[units_base]) {
+ remainder = do_div(size, divisor[units_base]);
i++;
}
@@ -101,10 +104,10 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
for (j = 0; sf_cap*10 < 1000; j++)
sf_cap *= 10;
- if (units == STRING_UNITS_2) {
+ if (units_base == STRING_UNITS_2) {
/* express the remainder as a decimal. It's currently the
* numerator of a fraction whose denominator is
- * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
+ * divisor[units_base], which is 1 << 10 for STRING_UNITS_2 */
remainder *= 1000;
remainder >>= 10;
}
@@ -126,10 +129,12 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
if (i >= ARRAY_SIZE(units_2))
unit = "UNK";
else
- unit = units_str[units][i];
+ unit = units_str[units_base][i];
- return snprintf(buf, len, "%u%s %s", (u32)size,
- tmp, unit);
+ return snprintf(buf, len, "%u%s%s%s%s", (u32)size, tmp,
+ (units & STRING_UNITS_NO_SPACE) ? "" : " ",
+ unit,
+ (units & STRING_UNITS_NO_BYTES) ? "" : "B");
}
EXPORT_SYMBOL(string_get_size);
@@ -826,40 +831,6 @@ char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n)
EXPORT_SYMBOL_GPL(devm_kasprintf_strarray);
/**
- * strscpy_pad() - Copy a C-string into a sized buffer
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @count: Size of destination buffer
- *
- * Copy the string, or as much of it as fits, into the dest buffer. The
- * behavior is undefined if the string buffers overlap. The destination
- * buffer is always %NUL terminated, unless it's zero-sized.
- *
- * If the source string is shorter than the destination buffer, zeros
- * the tail of the destination buffer.
- *
- * For full explanation of why you may want to consider using the
- * 'strscpy' functions please see the function docstring for strscpy().
- *
- * Returns:
- * * The number of characters copied (not including the trailing %NUL)
- * * -E2BIG if count is 0 or @src was truncated.
- */
-ssize_t strscpy_pad(char *dest, const char *src, size_t count)
-{
- ssize_t written;
-
- written = strscpy(dest, src, count);
- if (written < 0 || written == count - 1)
- return written;
-
- memset(dest + written + 1, 0, count - written - 1);
-
- return written;
-}
-EXPORT_SYMBOL(strscpy_pad);
-
-/**
* skip_spaces - Removes leading whitespace from @str.
* @str: The string to be stripped.
*
@@ -1042,10 +1013,28 @@ EXPORT_SYMBOL(__read_overflow2_field);
void __write_overflow_field(size_t avail, size_t wanted) { }
EXPORT_SYMBOL(__write_overflow_field);
-void fortify_panic(const char *name)
+static const char * const fortify_func_name[] = {
+#define MAKE_FORTIFY_FUNC_NAME(func) [MAKE_FORTIFY_FUNC(func)] = #func
+ EACH_FORTIFY_FUNC(MAKE_FORTIFY_FUNC_NAME)
+#undef MAKE_FORTIFY_FUNC_NAME
+};
+
+void __fortify_report(const u8 reason, const size_t avail, const size_t size)
+{
+ const u8 func = FORTIFY_REASON_FUNC(reason);
+ const bool write = FORTIFY_REASON_DIR(reason);
+ const char *name;
+
+ name = fortify_func_name[umin(func, FORTIFY_FUNC_UNKNOWN)];
+ WARN(1, "%s: detected buffer overflow: %zu byte %s of buffer size %zu\n",
+ name, size, str_read_write(!write), avail);
+}
+EXPORT_SYMBOL(__fortify_report);
+
+void __fortify_panic(const u8 reason, const size_t avail, const size_t size)
{
- pr_emerg("detected buffer overflow in %s\n", name);
+ __fortify_report(reason, avail, size);
BUG();
}
-EXPORT_SYMBOL(fortify_panic);
+EXPORT_SYMBOL(__fortify_panic);
#endif /* CONFIG_FORTIFY_SOURCE */
diff --git a/lib/test-string_helpers.c b/lib/string_helpers_kunit.c
index 9a68849a5d55..f88e39fd68d6 100644
--- a/lib/test-string_helpers.c
+++ b/lib/string_helpers_kunit.c
@@ -1,34 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Test cases for lib/string_helpers.c module.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/init.h>
+#include <kunit/test.h>
+#include <linux/array_size.h>
#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/module.h>
#include <linux/random.h>
#include <linux/string.h>
#include <linux/string_helpers.h>
-static __init bool test_string_check_buf(const char *name, unsigned int flags,
- char *in, size_t p,
- char *out_real, size_t q_real,
- char *out_test, size_t q_test)
+static void test_string_check_buf(struct kunit *test,
+ const char *name, unsigned int flags,
+ char *in, size_t p,
+ char *out_real, size_t q_real,
+ char *out_test, size_t q_test)
{
- if (q_real == q_test && !memcmp(out_test, out_real, q_test))
- return true;
-
- pr_warn("Test '%s' failed: flags = %#x\n", name, flags);
-
- print_hex_dump(KERN_WARNING, "Input: ", DUMP_PREFIX_NONE, 16, 1,
- in, p, true);
- print_hex_dump(KERN_WARNING, "Expected: ", DUMP_PREFIX_NONE, 16, 1,
- out_test, q_test, true);
- print_hex_dump(KERN_WARNING, "Got: ", DUMP_PREFIX_NONE, 16, 1,
- out_real, q_real, true);
-
- return false;
+ KUNIT_ASSERT_EQ_MSG(test, q_real, q_test, "name:%s", name);
+ KUNIT_EXPECT_MEMEQ_MSG(test, out_test, out_real, q_test,
+ "name:%s", name);
}
struct test_string {
@@ -37,7 +28,7 @@ struct test_string {
unsigned int flags;
};
-static const struct test_string strings[] __initconst = {
+static const struct test_string strings[] = {
{
.in = "\\f\\ \\n\\r\\t\\v",
.out = "\f\\ \n\r\t\v",
@@ -60,17 +51,19 @@ static const struct test_string strings[] __initconst = {
},
};
-static void __init test_string_unescape(const char *name, unsigned int flags,
- bool inplace)
+static void test_string_unescape(struct kunit *test,
+ const char *name, unsigned int flags,
+ bool inplace)
{
int q_real = 256;
- char *in = kmalloc(q_real, GFP_KERNEL);
- char *out_test = kmalloc(q_real, GFP_KERNEL);
- char *out_real = kmalloc(q_real, GFP_KERNEL);
+ char *in = kunit_kzalloc(test, q_real, GFP_KERNEL);
+ char *out_test = kunit_kzalloc(test, q_real, GFP_KERNEL);
+ char *out_real = kunit_kzalloc(test, q_real, GFP_KERNEL);
int i, p = 0, q_test = 0;
- if (!in || !out_test || !out_real)
- goto out;
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, in);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, out_test);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, out_real);
for (i = 0; i < ARRAY_SIZE(strings); i++) {
const char *s = strings[i].in;
@@ -103,12 +96,8 @@ static void __init test_string_unescape(const char *name, unsigned int flags,
q_real = string_unescape(in, out_real, q_real, flags);
}
- test_string_check_buf(name, flags, in, p - 1, out_real, q_real,
+ test_string_check_buf(test, name, flags, in, p - 1, out_real, q_real,
out_test, q_test);
-out:
- kfree(out_real);
- kfree(out_test);
- kfree(in);
}
struct test_string_1 {
@@ -123,7 +112,7 @@ struct test_string_2 {
};
#define TEST_STRING_2_DICT_0 NULL
-static const struct test_string_2 escape0[] __initconst = {{
+static const struct test_string_2 escape0[] = {{
.in = "\f\\ \n\r\t\v",
.s1 = {{
.out = "\\f\\ \\n\\r\\t\\v",
@@ -221,7 +210,7 @@ static const struct test_string_2 escape0[] __initconst = {{
}};
#define TEST_STRING_2_DICT_1 "b\\ \t\r\xCF"
-static const struct test_string_2 escape1[] __initconst = {{
+static const struct test_string_2 escape1[] = {{
.in = "\f\\ \n\r\t\v",
.s1 = {{
.out = "\f\\134\\040\n\\015\\011\v",
@@ -358,7 +347,7 @@ static const struct test_string_2 escape1[] __initconst = {{
/* terminator */
}};
-static const struct test_string strings_upper[] __initconst = {
+static const struct test_string strings_upper[] = {
{
.in = "abcdefgh1234567890test",
.out = "ABCDEFGH1234567890TEST",
@@ -369,7 +358,7 @@ static const struct test_string strings_upper[] __initconst = {
},
};
-static const struct test_string strings_lower[] __initconst = {
+static const struct test_string strings_lower[] = {
{
.in = "ABCDEFGH1234567890TEST",
.out = "abcdefgh1234567890test",
@@ -380,8 +369,8 @@ static const struct test_string strings_lower[] __initconst = {
},
};
-static __init const char *test_string_find_match(const struct test_string_2 *s2,
- unsigned int flags)
+static const char *test_string_find_match(const struct test_string_2 *s2,
+ unsigned int flags)
{
const struct test_string_1 *s1 = s2->s1;
unsigned int i;
@@ -402,31 +391,31 @@ static __init const char *test_string_find_match(const struct test_string_2 *s2,
return NULL;
}
-static __init void
-test_string_escape_overflow(const char *in, int p, unsigned int flags, const char *esc,
+static void
+test_string_escape_overflow(struct kunit *test,
+ const char *in, int p, unsigned int flags, const char *esc,
int q_test, const char *name)
{
int q_real;
q_real = string_escape_mem(in, p, NULL, 0, flags, esc);
- if (q_real != q_test)
- pr_warn("Test '%s' failed: flags = %#x, osz = 0, expected %d, got %d\n",
- name, flags, q_test, q_real);
+ KUNIT_EXPECT_EQ_MSG(test, q_real, q_test, "name:%s: flags:%#x", name, flags);
}
-static __init void test_string_escape(const char *name,
- const struct test_string_2 *s2,
- unsigned int flags, const char *esc)
+static void test_string_escape(struct kunit *test, const char *name,
+ const struct test_string_2 *s2,
+ unsigned int flags, const char *esc)
{
size_t out_size = 512;
- char *out_test = kmalloc(out_size, GFP_KERNEL);
- char *out_real = kmalloc(out_size, GFP_KERNEL);
- char *in = kmalloc(256, GFP_KERNEL);
+ char *out_test = kunit_kzalloc(test, out_size, GFP_KERNEL);
+ char *out_real = kunit_kzalloc(test, out_size, GFP_KERNEL);
+ char *in = kunit_kzalloc(test, 256, GFP_KERNEL);
int p = 0, q_test = 0;
int q_real;
- if (!out_test || !out_real || !in)
- goto out;
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, out_test);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, out_real);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, in);
for (; s2->in; s2++) {
const char *out;
@@ -462,62 +451,99 @@ static __init void test_string_escape(const char *name,
q_real = string_escape_mem(in, p, out_real, out_size, flags, esc);
- test_string_check_buf(name, flags, in, p, out_real, q_real, out_test,
+ test_string_check_buf(test, name, flags, in, p, out_real, q_real, out_test,
q_test);
- test_string_escape_overflow(in, p, flags, esc, q_test, name);
-
-out:
- kfree(in);
- kfree(out_real);
- kfree(out_test);
+ test_string_escape_overflow(test, in, p, flags, esc, q_test, name);
}
#define string_get_size_maxbuf 16
-#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2) \
- do { \
- BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf); \
- BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf); \
- __test_string_get_size((size), (blk_size), (exp_result10), \
- (exp_result2)); \
+#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2) \
+ do { \
+ BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf); \
+ BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf); \
+ __test_string_get_size(test, (size), (blk_size), (exp_result10), \
+ (exp_result2)); \
} while (0)
-static __init void test_string_get_size_check(const char *units,
- const char *exp,
- char *res,
- const u64 size,
- const u64 blk_size)
+static void test_string_get_size_check(struct kunit *test,
+ const char *units,
+ const char *exp,
+ char *res,
+ const u64 size,
+ const u64 blk_size)
{
- if (!memcmp(res, exp, strlen(exp) + 1))
- return;
-
- res[string_get_size_maxbuf - 1] = '\0';
-
- pr_warn("Test 'test_string_get_size' failed!\n");
- pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n",
+ KUNIT_EXPECT_MEMEQ_MSG(test, res, exp, strlen(exp) + 1,
+ "string_get_size(size = %llu, blk_size = %llu, units = %s)",
size, blk_size, units);
- pr_warn("expected: '%s', got '%s'\n", exp, res);
}
-static __init void __test_string_get_size(const u64 size, const u64 blk_size,
- const char *exp_result10,
- const char *exp_result2)
+static void __strchrcut(char *dst, const char *src, const char *cut)
+{
+ const char *from = src;
+ size_t len;
+
+ do {
+ len = strcspn(from, cut);
+ memcpy(dst, from, len);
+ dst += len;
+ from += len;
+ } while (*from++);
+ *dst = '\0';
+}
+
+static void __test_string_get_size_one(struct kunit *test,
+ const u64 size, const u64 blk_size,
+ const char *exp_result10,
+ const char *exp_result2,
+ enum string_size_units units,
+ const char *cut)
{
char buf10[string_get_size_maxbuf];
char buf2[string_get_size_maxbuf];
+ char exp10[string_get_size_maxbuf];
+ char exp2[string_get_size_maxbuf];
+ char prefix10[64];
+ char prefix2[64];
+
+ sprintf(prefix10, "STRING_UNITS_10 [%s]", cut);
+ sprintf(prefix2, "STRING_UNITS_2 [%s]", cut);
+
+ __strchrcut(exp10, exp_result10, cut);
+ __strchrcut(exp2, exp_result2, cut);
- string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10));
- string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2));
+ string_get_size(size, blk_size, STRING_UNITS_10 | units, buf10, sizeof(buf10));
+ string_get_size(size, blk_size, STRING_UNITS_2 | units, buf2, sizeof(buf2));
- test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10,
- size, blk_size);
+ test_string_get_size_check(test, prefix10, exp10, buf10, size, blk_size);
+ test_string_get_size_check(test, prefix2, exp2, buf2, size, blk_size);
+}
+
+static void __test_string_get_size(struct kunit *test,
+ const u64 size, const u64 blk_size,
+ const char *exp_result10,
+ const char *exp_result2)
+{
+ struct {
+ enum string_size_units units;
+ const char *cut;
+ } get_size_test_cases[] = {
+ { 0, "" },
+ { STRING_UNITS_NO_SPACE, " " },
+ { STRING_UNITS_NO_SPACE | STRING_UNITS_NO_BYTES, " B" },
+ { STRING_UNITS_NO_BYTES, "B" },
+ };
+ int i;
- test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2,
- size, blk_size);
+ for (i = 0; i < ARRAY_SIZE(get_size_test_cases); i++)
+ __test_string_get_size_one(test, size, blk_size,
+ exp_result10, exp_result2,
+ get_size_test_cases[i].units,
+ get_size_test_cases[i].cut);
}
-static __init void test_string_get_size(void)
+static void test_get_size(struct kunit *test)
{
/* small values */
test_string_get_size_one(0, 512, "0 B", "0 B");
@@ -537,7 +563,7 @@ static __init void test_string_get_size(void)
test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB");
}
-static void __init test_string_upper_lower(void)
+static void test_upper_lower(struct kunit *test)
{
char *dst;
int i;
@@ -547,16 +573,10 @@ static void __init test_string_upper_lower(void)
int len = strlen(strings_upper[i].in) + 1;
dst = kmalloc(len, GFP_KERNEL);
- if (!dst)
- return;
+ KUNIT_ASSERT_NOT_NULL(test, dst);
string_upper(dst, s);
- if (memcmp(dst, strings_upper[i].out, len)) {
- pr_warn("Test 'string_upper' failed : expected %s, got %s!\n",
- strings_upper[i].out, dst);
- kfree(dst);
- return;
- }
+ KUNIT_EXPECT_STREQ(test, dst, strings_upper[i].out);
kfree(dst);
}
@@ -565,45 +585,44 @@ static void __init test_string_upper_lower(void)
int len = strlen(strings_lower[i].in) + 1;
dst = kmalloc(len, GFP_KERNEL);
- if (!dst)
- return;
+ KUNIT_ASSERT_NOT_NULL(test, dst);
string_lower(dst, s);
- if (memcmp(dst, strings_lower[i].out, len)) {
- pr_warn("Test 'string_lower failed : : expected %s, got %s!\n",
- strings_lower[i].out, dst);
- kfree(dst);
- return;
- }
+ KUNIT_EXPECT_STREQ(test, dst, strings_lower[i].out);
kfree(dst);
}
}
-static int __init test_string_helpers_init(void)
+static void test_unescape(struct kunit *test)
{
unsigned int i;
- pr_info("Running tests...\n");
for (i = 0; i < UNESCAPE_ALL_MASK + 1; i++)
- test_string_unescape("unescape", i, false);
- test_string_unescape("unescape inplace",
+ test_string_unescape(test, "unescape", i, false);
+ test_string_unescape(test, "unescape inplace",
get_random_u32_below(UNESCAPE_ALL_MASK + 1), true);
/* Without dictionary */
for (i = 0; i < ESCAPE_ALL_MASK + 1; i++)
- test_string_escape("escape 0", escape0, i, TEST_STRING_2_DICT_0);
+ test_string_escape(test, "escape 0", escape0, i, TEST_STRING_2_DICT_0);
/* With dictionary */
for (i = 0; i < ESCAPE_ALL_MASK + 1; i++)
- test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1);
+ test_string_escape(test, "escape 1", escape1, i, TEST_STRING_2_DICT_1);
+}
- /* Test string_get_size() */
- test_string_get_size();
+static struct kunit_case string_helpers_test_cases[] = {
+ KUNIT_CASE(test_get_size),
+ KUNIT_CASE(test_upper_lower),
+ KUNIT_CASE(test_unescape),
+ {}
+};
- /* Test string upper(), string_lower() */
- test_string_upper_lower();
+static struct kunit_suite string_helpers_test_suite = {
+ .name = "string_helpers",
+ .test_cases = string_helpers_test_cases,
+};
+
+kunit_test_suites(&string_helpers_test_suite);
- return -EINVAL;
-}
-module_init(test_string_helpers_init);
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/string_kunit.c b/lib/string_kunit.c
new file mode 100644
index 000000000000..eabf025cf77c
--- /dev/null
+++ b/lib/string_kunit.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test cases for string functions.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <kunit/test.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+static void test_memset16(struct kunit *test)
+{
+ unsigned i, j, k;
+ u16 v, *p;
+
+ p = kunit_kzalloc(test, 256 * 2 * 2, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p);
+
+ for (i = 0; i < 256; i++) {
+ for (j = 0; j < 256; j++) {
+ memset(p, 0xa1, 256 * 2 * sizeof(v));
+ memset16(p + i, 0xb1b2, j);
+ for (k = 0; k < 512; k++) {
+ v = p[k];
+ if (k < i) {
+ KUNIT_ASSERT_EQ_MSG(test, v, 0xa1a1,
+ "i:%d j:%d k:%d", i, j, k);
+ } else if (k < i + j) {
+ KUNIT_ASSERT_EQ_MSG(test, v, 0xb1b2,
+ "i:%d j:%d k:%d", i, j, k);
+ } else {
+ KUNIT_ASSERT_EQ_MSG(test, v, 0xa1a1,
+ "i:%d j:%d k:%d", i, j, k);
+ }
+ }
+ }
+ }
+}
+
+static void test_memset32(struct kunit *test)
+{
+ unsigned i, j, k;
+ u32 v, *p;
+
+ p = kunit_kzalloc(test, 256 * 2 * 4, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p);
+
+ for (i = 0; i < 256; i++) {
+ for (j = 0; j < 256; j++) {
+ memset(p, 0xa1, 256 * 2 * sizeof(v));
+ memset32(p + i, 0xb1b2b3b4, j);
+ for (k = 0; k < 512; k++) {
+ v = p[k];
+ if (k < i) {
+ KUNIT_ASSERT_EQ_MSG(test, v, 0xa1a1a1a1,
+ "i:%d j:%d k:%d", i, j, k);
+ } else if (k < i + j) {
+ KUNIT_ASSERT_EQ_MSG(test, v, 0xb1b2b3b4,
+ "i:%d j:%d k:%d", i, j, k);
+ } else {
+ KUNIT_ASSERT_EQ_MSG(test, v, 0xa1a1a1a1,
+ "i:%d j:%d k:%d", i, j, k);
+ }
+ }
+ }
+ }
+}
+
+static void test_memset64(struct kunit *test)
+{
+ unsigned i, j, k;
+ u64 v, *p;
+
+ p = kunit_kzalloc(test, 256 * 2 * 8, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p);
+
+ for (i = 0; i < 256; i++) {
+ for (j = 0; j < 256; j++) {
+ memset(p, 0xa1, 256 * 2 * sizeof(v));
+ memset64(p + i, 0xb1b2b3b4b5b6b7b8ULL, j);
+ for (k = 0; k < 512; k++) {
+ v = p[k];
+ if (k < i) {
+ KUNIT_ASSERT_EQ_MSG(test, v, 0xa1a1a1a1a1a1a1a1ULL,
+ "i:%d j:%d k:%d", i, j, k);
+ } else if (k < i + j) {
+ KUNIT_ASSERT_EQ_MSG(test, v, 0xb1b2b3b4b5b6b7b8ULL,
+ "i:%d j:%d k:%d", i, j, k);
+ } else {
+ KUNIT_ASSERT_EQ_MSG(test, v, 0xa1a1a1a1a1a1a1a1ULL,
+ "i:%d j:%d k:%d", i, j, k);
+ }
+ }
+ }
+ }
+}
+
+static void test_strchr(struct kunit *test)
+{
+ const char *test_string = "abcdefghijkl";
+ const char *empty_string = "";
+ char *result;
+ int i;
+
+ for (i = 0; i < strlen(test_string) + 1; i++) {
+ result = strchr(test_string, test_string[i]);
+ KUNIT_ASSERT_EQ_MSG(test, result - test_string, i,
+ "char:%c", 'a' + i);
+ }
+
+ result = strchr(empty_string, '\0');
+ KUNIT_ASSERT_PTR_EQ(test, result, empty_string);
+
+ result = strchr(empty_string, 'a');
+ KUNIT_ASSERT_NULL(test, result);
+
+ result = strchr(test_string, 'z');
+ KUNIT_ASSERT_NULL(test, result);
+}
+
+static void test_strnchr(struct kunit *test)
+{
+ const char *test_string = "abcdefghijkl";
+ const char *empty_string = "";
+ char *result;
+ int i, j;
+
+ for (i = 0; i < strlen(test_string) + 1; i++) {
+ for (j = 0; j < strlen(test_string) + 2; j++) {
+ result = strnchr(test_string, j, test_string[i]);
+ if (j <= i) {
+ KUNIT_ASSERT_NULL_MSG(test, result,
+ "char:%c i:%d j:%d", 'a' + i, i, j);
+ } else {
+ KUNIT_ASSERT_EQ_MSG(test, result - test_string, i,
+ "char:%c i:%d j:%d", 'a' + i, i, j);
+ }
+ }
+ }
+
+ result = strnchr(empty_string, 0, '\0');
+ KUNIT_ASSERT_NULL(test, result);
+
+ result = strnchr(empty_string, 1, '\0');
+ KUNIT_ASSERT_PTR_EQ(test, result, empty_string);
+
+ result = strnchr(empty_string, 1, 'a');
+ KUNIT_ASSERT_NULL(test, result);
+
+ result = strnchr(NULL, 0, '\0');
+ KUNIT_ASSERT_NULL(test, result);
+}
+
+static void test_strspn(struct kunit *test)
+{
+ static const struct strspn_test {
+ const char str[16];
+ const char accept[16];
+ const char reject[16];
+ unsigned a;
+ unsigned r;
+ } tests[] = {
+ { "foobar", "", "", 0, 6 },
+ { "abba", "abc", "ABBA", 4, 4 },
+ { "abba", "a", "b", 1, 1 },
+ { "", "abc", "abc", 0, 0},
+ };
+ const struct strspn_test *s = tests;
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i, ++s) {
+ KUNIT_ASSERT_EQ_MSG(test, s->a, strspn(s->str, s->accept),
+ "i:%zu", i);
+ KUNIT_ASSERT_EQ_MSG(test, s->r, strcspn(s->str, s->reject),
+ "i:%zu", i);
+ }
+}
+
+static struct kunit_case string_test_cases[] = {
+ KUNIT_CASE(test_memset16),
+ KUNIT_CASE(test_memset32),
+ KUNIT_CASE(test_memset64),
+ KUNIT_CASE(test_strchr),
+ KUNIT_CASE(test_strnchr),
+ KUNIT_CASE(test_strspn),
+ {}
+};
+
+static struct kunit_suite string_test_suite = {
+ .name = "string",
+ .test_cases = string_test_cases,
+};
+
+kunit_test_suites(&string_test_suite);
+
+MODULE_LICENSE("GPL v2");
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 65f22c2578b0..6b2b33579f56 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -380,6 +380,47 @@ static void __init test_replace(void)
expect_eq_bitmap(bmap, exp3_1_0, nbits);
}
+static const unsigned long sg_mask[] __initconst = {
+ BITMAP_FROM_U64(0x000000000000035aULL),
+};
+
+static const unsigned long sg_src[] __initconst = {
+ BITMAP_FROM_U64(0x0000000000000667ULL),
+};
+
+static const unsigned long sg_gather_exp[] __initconst = {
+ BITMAP_FROM_U64(0x0000000000000029ULL),
+};
+
+static const unsigned long sg_scatter_exp[] __initconst = {
+ BITMAP_FROM_U64(0x000000000000021aULL),
+};
+
+static void __init test_bitmap_sg(void)
+{
+ unsigned int nbits = 64;
+ DECLARE_BITMAP(bmap_gather, 100);
+ DECLARE_BITMAP(bmap_scatter, 100);
+ DECLARE_BITMAP(bmap_tmp, 100);
+ DECLARE_BITMAP(bmap_res, 100);
+
+ /* Simple gather call */
+ bitmap_zero(bmap_gather, 100);
+ bitmap_gather(bmap_gather, sg_src, sg_mask, nbits);
+ expect_eq_bitmap(sg_gather_exp, bmap_gather, nbits);
+
+ /* Simple scatter call */
+ bitmap_zero(bmap_scatter, 100);
+ bitmap_scatter(bmap_scatter, sg_src, sg_mask, nbits);
+ expect_eq_bitmap(sg_scatter_exp, bmap_scatter, nbits);
+
+ /* Scatter/gather relationship */
+ bitmap_zero(bmap_tmp, 100);
+ bitmap_gather(bmap_tmp, bmap_scatter, sg_mask, nbits);
+ bitmap_scatter(bmap_res, bmap_tmp, sg_mask, nbits);
+ expect_eq_bitmap(bmap_scatter, bmap_res, nbits);
+}
+
#define PARSE_TIME 0x1
#define NO_LEN 0x2
@@ -1252,6 +1293,7 @@ static void __init selftest(void)
test_copy();
test_bitmap_region();
test_replace();
+ test_bitmap_sg();
test_bitmap_arr32();
test_bitmap_arr64();
test_bitmap_parse();
diff --git a/lib/test_blackhole_dev.c b/lib/test_blackhole_dev.c
index 4c40580a99a3..f247089d63c0 100644
--- a/lib/test_blackhole_dev.c
+++ b/lib/test_blackhole_dev.c
@@ -29,7 +29,6 @@ static int __init test_blackholedev_init(void)
{
struct ipv6hdr *ip6h;
struct sk_buff *skb;
- struct ethhdr *ethh;
struct udphdr *uh;
int data_len;
int ret;
@@ -61,7 +60,7 @@ static int __init test_blackholedev_init(void)
ip6h->saddr = in6addr_loopback;
ip6h->daddr = in6addr_loopback;
/* Ether */
- ethh = (struct ethhdr *)skb_push(skb, sizeof(struct ethhdr));
+ skb_push(skb, sizeof(struct ethhdr));
skb_set_mac_header(skb, 0);
skb->protocol = htons(ETH_P_IPV6);
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index 29185ac5c727..399380db449c 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -3599,6 +3599,45 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
mas_unlock(&mas);
}
+static noinline void __init alloc_cyclic_testing(struct maple_tree *mt)
+{
+ unsigned long location;
+ unsigned long next;
+ int ret = 0;
+ MA_STATE(mas, mt, 0, 0);
+
+ next = 0;
+ mtree_lock(mt);
+ for (int i = 0; i < 100; i++) {
+ mas_alloc_cyclic(&mas, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
+ MAS_BUG_ON(&mas, i != location - 2);
+ MAS_BUG_ON(&mas, mas.index != location);
+ MAS_BUG_ON(&mas, mas.last != location);
+ MAS_BUG_ON(&mas, i != next - 3);
+ }
+
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+ next = 0;
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (int i = 0; i < 100; i++) {
+ mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
+ MT_BUG_ON(mt, i != location - 2);
+ MT_BUG_ON(mt, i != next - 3);
+ MT_BUG_ON(mt, mtree_load(mt, location) != mt);
+ }
+
+ mtree_destroy(mt);
+ /* Overflow test */
+ next = ULONG_MAX - 1;
+ ret = mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
+ MT_BUG_ON(mt, ret != 0);
+ ret = mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
+ MT_BUG_ON(mt, ret != 0);
+ ret = mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
+ MT_BUG_ON(mt, ret != 1);
+}
+
static DEFINE_MTREE(tree);
static int __init maple_tree_seed(void)
{
@@ -3880,6 +3919,11 @@ static int __init maple_tree_seed(void)
check_state_handling(&tree);
mtree_destroy(&tree);
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ alloc_cyclic_testing(&tree);
+ mtree_destroy(&tree);
+
+
#if defined(BENCH)
skip:
#endif
diff --git a/lib/test_string.c b/lib/test_string.c
deleted file mode 100644
index c5cb92fb710e..000000000000
--- a/lib/test_string.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/module.h>
-#include <linux/printk.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-
-static __init int memset16_selftest(void)
-{
- unsigned i, j, k;
- u16 v, *p;
-
- p = kmalloc(256 * 2 * 2, GFP_KERNEL);
- if (!p)
- return -1;
-
- for (i = 0; i < 256; i++) {
- for (j = 0; j < 256; j++) {
- memset(p, 0xa1, 256 * 2 * sizeof(v));
- memset16(p + i, 0xb1b2, j);
- for (k = 0; k < 512; k++) {
- v = p[k];
- if (k < i) {
- if (v != 0xa1a1)
- goto fail;
- } else if (k < i + j) {
- if (v != 0xb1b2)
- goto fail;
- } else {
- if (v != 0xa1a1)
- goto fail;
- }
- }
- }
- }
-
-fail:
- kfree(p);
- if (i < 256)
- return (i << 24) | (j << 16) | k | 0x8000;
- return 0;
-}
-
-static __init int memset32_selftest(void)
-{
- unsigned i, j, k;
- u32 v, *p;
-
- p = kmalloc(256 * 2 * 4, GFP_KERNEL);
- if (!p)
- return -1;
-
- for (i = 0; i < 256; i++) {
- for (j = 0; j < 256; j++) {
- memset(p, 0xa1, 256 * 2 * sizeof(v));
- memset32(p + i, 0xb1b2b3b4, j);
- for (k = 0; k < 512; k++) {
- v = p[k];
- if (k < i) {
- if (v != 0xa1a1a1a1)
- goto fail;
- } else if (k < i + j) {
- if (v != 0xb1b2b3b4)
- goto fail;
- } else {
- if (v != 0xa1a1a1a1)
- goto fail;
- }
- }
- }
- }
-
-fail:
- kfree(p);
- if (i < 256)
- return (i << 24) | (j << 16) | k | 0x8000;
- return 0;
-}
-
-static __init int memset64_selftest(void)
-{
- unsigned i, j, k;
- u64 v, *p;
-
- p = kmalloc(256 * 2 * 8, GFP_KERNEL);
- if (!p)
- return -1;
-
- for (i = 0; i < 256; i++) {
- for (j = 0; j < 256; j++) {
- memset(p, 0xa1, 256 * 2 * sizeof(v));
- memset64(p + i, 0xb1b2b3b4b5b6b7b8ULL, j);
- for (k = 0; k < 512; k++) {
- v = p[k];
- if (k < i) {
- if (v != 0xa1a1a1a1a1a1a1a1ULL)
- goto fail;
- } else if (k < i + j) {
- if (v != 0xb1b2b3b4b5b6b7b8ULL)
- goto fail;
- } else {
- if (v != 0xa1a1a1a1a1a1a1a1ULL)
- goto fail;
- }
- }
- }
- }
-
-fail:
- kfree(p);
- if (i < 256)
- return (i << 24) | (j << 16) | k | 0x8000;
- return 0;
-}
-
-static __init int strchr_selftest(void)
-{
- const char *test_string = "abcdefghijkl";
- const char *empty_string = "";
- char *result;
- int i;
-
- for (i = 0; i < strlen(test_string) + 1; i++) {
- result = strchr(test_string, test_string[i]);
- if (result - test_string != i)
- return i + 'a';
- }
-
- result = strchr(empty_string, '\0');
- if (result != empty_string)
- return 0x101;
-
- result = strchr(empty_string, 'a');
- if (result)
- return 0x102;
-
- result = strchr(test_string, 'z');
- if (result)
- return 0x103;
-
- return 0;
-}
-
-static __init int strnchr_selftest(void)
-{
- const char *test_string = "abcdefghijkl";
- const char *empty_string = "";
- char *result;
- int i, j;
-
- for (i = 0; i < strlen(test_string) + 1; i++) {
- for (j = 0; j < strlen(test_string) + 2; j++) {
- result = strnchr(test_string, j, test_string[i]);
- if (j <= i) {
- if (!result)
- continue;
- return ((i + 'a') << 8) | j;
- }
- if (result - test_string != i)
- return ((i + 'a') << 8) | j;
- }
- }
-
- result = strnchr(empty_string, 0, '\0');
- if (result)
- return 0x10001;
-
- result = strnchr(empty_string, 1, '\0');
- if (result != empty_string)
- return 0x10002;
-
- result = strnchr(empty_string, 1, 'a');
- if (result)
- return 0x10003;
-
- result = strnchr(NULL, 0, '\0');
- if (result)
- return 0x10004;
-
- return 0;
-}
-
-static __init int strspn_selftest(void)
-{
- static const struct strspn_test {
- const char str[16];
- const char accept[16];
- const char reject[16];
- unsigned a;
- unsigned r;
- } tests[] __initconst = {
- { "foobar", "", "", 0, 6 },
- { "abba", "abc", "ABBA", 4, 4 },
- { "abba", "a", "b", 1, 1 },
- { "", "abc", "abc", 0, 0},
- };
- const struct strspn_test *s = tests;
- size_t i, res;
-
- for (i = 0; i < ARRAY_SIZE(tests); ++i, ++s) {
- res = strspn(s->str, s->accept);
- if (res != s->a)
- return 0x100 + 2*i;
- res = strcspn(s->str, s->reject);
- if (res != s->r)
- return 0x100 + 2*i + 1;
- }
- return 0;
-}
-
-static __exit void string_selftest_remove(void)
-{
-}
-
-static __init int string_selftest_init(void)
-{
- int test, subtest;
-
- test = 1;
- subtest = memset16_selftest();
- if (subtest)
- goto fail;
-
- test = 2;
- subtest = memset32_selftest();
- if (subtest)
- goto fail;
-
- test = 3;
- subtest = memset64_selftest();
- if (subtest)
- goto fail;
-
- test = 4;
- subtest = strchr_selftest();
- if (subtest)
- goto fail;
-
- test = 5;
- subtest = strnchr_selftest();
- if (subtest)
- goto fail;
-
- test = 6;
- subtest = strspn_selftest();
- if (subtest)
- goto fail;
-
- pr_info("String selftests succeeded\n");
- return 0;
-fail:
- pr_crit("String selftest failure %d.%08x\n", test, subtest);
- return 0;
-}
-
-module_init(string_selftest_init);
-module_exit(string_selftest_remove);
-MODULE_LICENSE("GPL v2");
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index 2062be1f2e80..276c12140ee2 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -11,6 +11,39 @@ typedef void(*test_ubsan_fp)(void);
#config, IS_ENABLED(config) ? "y" : "n"); \
} while (0)
+static void test_ubsan_add_overflow(void)
+{
+ volatile int val = INT_MAX;
+
+ UBSAN_TEST(CONFIG_UBSAN_SIGNED_WRAP);
+ val += 2;
+}
+
+static void test_ubsan_sub_overflow(void)
+{
+ volatile int val = INT_MIN;
+ volatile int val2 = 2;
+
+ UBSAN_TEST(CONFIG_UBSAN_SIGNED_WRAP);
+ val -= val2;
+}
+
+static void test_ubsan_mul_overflow(void)
+{
+ volatile int val = INT_MAX / 2;
+
+ UBSAN_TEST(CONFIG_UBSAN_SIGNED_WRAP);
+ val *= 3;
+}
+
+static void test_ubsan_negate_overflow(void)
+{
+ volatile int val = INT_MIN;
+
+ UBSAN_TEST(CONFIG_UBSAN_SIGNED_WRAP);
+ val = -val;
+}
+
static void test_ubsan_divrem_overflow(void)
{
volatile int val = 16;
@@ -23,8 +56,8 @@ static void test_ubsan_divrem_overflow(void)
static void test_ubsan_shift_out_of_bounds(void)
{
volatile int neg = -1, wrap = 4;
- int val1 = 10;
- int val2 = INT_MAX;
+ volatile int val1 = 10;
+ volatile int val2 = INT_MAX;
UBSAN_TEST(CONFIG_UBSAN_SHIFT, "negative exponent");
val1 <<= neg;
@@ -90,6 +123,10 @@ static void test_ubsan_misaligned_access(void)
}
static const test_ubsan_fp test_ubsan_array[] = {
+ test_ubsan_add_overflow,
+ test_ubsan_sub_overflow,
+ test_ubsan_mul_overflow,
+ test_ubsan_negate_overflow,
test_ubsan_shift_out_of_bounds,
test_ubsan_out_of_bounds,
test_ubsan_load_invalid_value,
diff --git a/lib/ubsan.c b/lib/ubsan.c
index df4f8d1354bb..5fc107f61934 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -222,6 +222,74 @@ static void ubsan_epilogue(void)
check_panic_on_warn("UBSAN");
}
+static void handle_overflow(struct overflow_data *data, void *lhs,
+ void *rhs, char op)
+{
+
+ struct type_descriptor *type = data->type;
+ char lhs_val_str[VALUE_LENGTH];
+ char rhs_val_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, type_is_signed(type) ?
+ "signed-integer-overflow" :
+ "unsigned-integer-overflow");
+
+ val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs);
+ val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs);
+ pr_err("%s %c %s cannot be represented in type %s\n",
+ lhs_val_str,
+ op,
+ rhs_val_str,
+ type->type_name);
+
+ ubsan_epilogue();
+}
+
+void __ubsan_handle_add_overflow(void *data,
+ void *lhs, void *rhs)
+{
+
+ handle_overflow(data, lhs, rhs, '+');
+}
+EXPORT_SYMBOL(__ubsan_handle_add_overflow);
+
+void __ubsan_handle_sub_overflow(void *data,
+ void *lhs, void *rhs)
+{
+ handle_overflow(data, lhs, rhs, '-');
+}
+EXPORT_SYMBOL(__ubsan_handle_sub_overflow);
+
+void __ubsan_handle_mul_overflow(void *data,
+ void *lhs, void *rhs)
+{
+ handle_overflow(data, lhs, rhs, '*');
+}
+EXPORT_SYMBOL(__ubsan_handle_mul_overflow);
+
+void __ubsan_handle_negate_overflow(void *_data, void *old_val)
+{
+ struct overflow_data *data = _data;
+ char old_val_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+ ubsan_prologue(&data->location, "negation-overflow");
+
+ val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val);
+
+ pr_err("negation of %s cannot be represented in type %s:\n",
+ old_val_str, data->type->type_name);
+
+ ubsan_epilogue();
+}
+EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
+
+
void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs)
{
struct overflow_data *data = _data;
diff --git a/lib/ubsan.h b/lib/ubsan.h
index 5d99ab81913b..0abbbac8700d 100644
--- a/lib/ubsan.h
+++ b/lib/ubsan.h
@@ -124,6 +124,10 @@ typedef s64 s_max;
typedef u64 u_max;
#endif
+void __ubsan_handle_add_overflow(void *data, void *lhs, void *rhs);
+void __ubsan_handle_sub_overflow(void *data, void *lhs, void *rhs);
+void __ubsan_handle_mul_overflow(void *data, void *lhs, void *rhs);
+void __ubsan_handle_negate_overflow(void *_data, void *old_val);
void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs);
void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, void *ptr);
void __ubsan_handle_type_mismatch_v1(void *_data, void *ptr);
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 321ab379994f..afc72fde0f03 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -64,11 +64,11 @@ config SLUB_DEBUG_ON
help
Boot with debugging on by default. SLUB boots by default with
the runtime debug capabilities switched off. Enabling this is
- equivalent to specifying the "slub_debug" parameter on boot.
+ equivalent to specifying the "slab_debug" parameter on boot.
There is no support for more fine grained debug control like
- possible with slub_debug=xxx. SLUB debugging may be switched
+ possible with slab_debug=xxx. SLUB debugging may be switched
off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
- "slub_debug=-".
+ "slab_debug=-".
config PAGE_OWNER
bool "Track page owner"
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1e3447bccdb1..5f2be8c8df11 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -372,31 +372,6 @@ static int __init default_bdi_init(void)
}
subsys_initcall(default_bdi_init);
-/*
- * This function is used when the first inode for this wb is marked dirty. It
- * wakes-up the corresponding bdi thread which should then take care of the
- * periodic background write-out of dirty inodes. Since the write-out would
- * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
- * set up a timer which wakes the bdi thread up later.
- *
- * Note, we wouldn't bother setting up the timer, but this function is on the
- * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
- * by delaying the wake-up.
- *
- * We have to be careful not to postpone flush work if it is scheduled for
- * earlier. Thus we use queue_delayed_work().
- */
-void wb_wakeup_delayed(struct bdi_writeback *wb)
-{
- unsigned long timeout;
-
- timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
- spin_lock_irq(&wb->work_lock);
- if (test_bit(WB_registered, &wb->state))
- queue_delayed_work(bdi_wq, &wb->dwork, timeout);
- spin_unlock_irq(&wb->work_lock);
-}
-
static void wb_update_bandwidth_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(to_delayed_work(work),
@@ -436,7 +411,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
INIT_LIST_HEAD(&wb->work_list);
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
- wb->dirty_sleep = jiffies;
err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
@@ -921,6 +895,7 @@ int bdi_init(struct backing_dev_info *bdi)
INIT_LIST_HEAD(&bdi->bdi_list);
INIT_LIST_HEAD(&bdi->wb_list);
init_waitqueue_head(&bdi->wb_waitq);
+ bdi->last_bdp_sleep = jiffies;
return cgwb_bdi_init(bdi);
}
diff --git a/mm/compaction.c b/mm/compaction.c
index 4add68d40e8d..b961db601df4 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2723,16 +2723,11 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
unsigned int alloc_flags, const struct alloc_context *ac,
enum compact_priority prio, struct page **capture)
{
- int may_perform_io = (__force int)(gfp_mask & __GFP_IO);
struct zoneref *z;
struct zone *zone;
enum compact_result rc = COMPACT_SKIPPED;
- /*
- * Check if the GFP flags allow compaction - GFP_NOIO is really
- * tricky context because the migration might require IO
- */
- if (!may_perform_io)
+ if (!gfp_compaction_allowed(gfp_mask))
return COMPACT_SKIPPED;
trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 36f6f1d21ff0..5b325749fc12 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1026,6 +1026,9 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
damon_for_each_scheme(s, c) {
struct damos_quota *quota = &s->quota;
+ if (c->passed_sample_intervals != s->next_apply_sis)
+ continue;
+
if (!s->wmarks.activated)
continue;
@@ -1176,10 +1179,6 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
if (c->passed_sample_intervals != s->next_apply_sis)
continue;
- s->next_apply_sis +=
- (s->apply_interval_us ? s->apply_interval_us :
- c->attrs.aggr_interval) / sample_interval;
-
if (!s->wmarks.activated)
continue;
@@ -1195,6 +1194,14 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
damon_for_each_region_safe(r, next_r, t)
damon_do_apply_schemes(c, t, r);
}
+
+ damon_for_each_scheme(s, c) {
+ if (c->passed_sample_intervals != s->next_apply_sis)
+ continue;
+ s->next_apply_sis +=
+ (s->apply_interval_us ? s->apply_interval_us :
+ c->attrs.aggr_interval) / sample_interval;
+ }
}
/*
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index f2e5f9431892..3de2916a65c3 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -185,9 +185,21 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres)
return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO);
}
+static void damon_lru_sort_copy_quota_status(struct damos_quota *dst,
+ struct damos_quota *src)
+{
+ dst->total_charged_sz = src->total_charged_sz;
+ dst->total_charged_ns = src->total_charged_ns;
+ dst->charged_sz = src->charged_sz;
+ dst->charged_from = src->charged_from;
+ dst->charge_target_from = src->charge_target_from;
+ dst->charge_addr_from = src->charge_addr_from;
+}
+
static int damon_lru_sort_apply_parameters(void)
{
- struct damos *scheme;
+ struct damos *scheme, *hot_scheme, *cold_scheme;
+ struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL;
unsigned int hot_thres, cold_thres;
int err = 0;
@@ -195,18 +207,35 @@ static int damon_lru_sort_apply_parameters(void)
if (err)
return err;
+ damon_for_each_scheme(scheme, ctx) {
+ if (!old_hot_scheme) {
+ old_hot_scheme = scheme;
+ continue;
+ }
+ old_cold_scheme = scheme;
+ }
+
hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) *
hot_thres_access_freq / 1000;
- scheme = damon_lru_sort_new_hot_scheme(hot_thres);
- if (!scheme)
+ hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres);
+ if (!hot_scheme)
return -ENOMEM;
- damon_set_schemes(ctx, &scheme, 1);
+ if (old_hot_scheme)
+ damon_lru_sort_copy_quota_status(&hot_scheme->quota,
+ &old_hot_scheme->quota);
cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval;
- scheme = damon_lru_sort_new_cold_scheme(cold_thres);
- if (!scheme)
+ cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres);
+ if (!cold_scheme) {
+ damon_destroy_scheme(hot_scheme);
return -ENOMEM;
- damon_add_scheme(ctx, scheme);
+ }
+ if (old_cold_scheme)
+ damon_lru_sort_copy_quota_status(&cold_scheme->quota,
+ &old_cold_scheme->quota);
+
+ damon_set_schemes(ctx, &hot_scheme, 1);
+ damon_add_scheme(ctx, cold_scheme);
return damon_set_region_biggest_system_ram_default(target,
&monitor_region_start,
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index ab974e477d2f..66e190f0374a 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -150,9 +150,20 @@ static struct damos *damon_reclaim_new_scheme(void)
&damon_reclaim_wmarks);
}
+static void damon_reclaim_copy_quota_status(struct damos_quota *dst,
+ struct damos_quota *src)
+{
+ dst->total_charged_sz = src->total_charged_sz;
+ dst->total_charged_ns = src->total_charged_ns;
+ dst->charged_sz = src->charged_sz;
+ dst->charged_from = src->charged_from;
+ dst->charge_target_from = src->charge_target_from;
+ dst->charge_addr_from = src->charge_addr_from;
+}
+
static int damon_reclaim_apply_parameters(void)
{
- struct damos *scheme;
+ struct damos *scheme, *old_scheme;
struct damos_filter *filter;
int err = 0;
@@ -164,6 +175,11 @@ static int damon_reclaim_apply_parameters(void)
scheme = damon_reclaim_new_scheme();
if (!scheme)
return -ENOMEM;
+ if (!list_empty(&ctx->schemes)) {
+ damon_for_each_scheme(old_scheme, ctx)
+ damon_reclaim_copy_quota_status(&scheme->quota,
+ &old_scheme->quota);
+ }
if (skip_anon) {
filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true);
if (!filter) {
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 8dbaac6e5c2d..ae0f0b314f3a 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -1905,6 +1905,10 @@ void damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes,
damon_for_each_scheme(scheme, ctx) {
struct damon_sysfs_scheme *sysfs_scheme;
+ /* user could have removed the scheme sysfs dir */
+ if (i >= sysfs_schemes->nr)
+ break;
+
sysfs_scheme = sysfs_schemes->schemes_arr[i];
damos_sysfs_set_quota_score(sysfs_scheme->quotas->goals,
&scheme->quota);
@@ -2194,7 +2198,7 @@ static void damos_tried_regions_init_upd_status(
sysfs_regions->upd_timeout_jiffies = jiffies +
2 * usecs_to_jiffies(scheme->apply_interval_us ?
scheme->apply_interval_us :
- ctx->attrs.sample_interval);
+ ctx->attrs.aggr_interval);
}
}
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 5662e29fe253..65c19025da3d 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -362,6 +362,12 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
vaddr &= HPAGE_PUD_MASK;
pud = pfn_pud(args->pud_pfn, args->page_prot);
+ /*
+ * Some architectures have debug checks to make sure
+ * huge pud mapping are only found with devmap entries
+ * For now test with only devmap entries.
+ */
+ pud = pud_mkdevmap(pud);
set_pud_at(args->mm, vaddr, args->pudp, pud);
flush_dcache_page(page);
pudp_set_wrprotect(args->mm, vaddr, args->pudp);
@@ -374,6 +380,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
WARN_ON(!pud_none(pud));
#endif /* __PAGETABLE_PMD_FOLDED */
pud = pfn_pud(args->pud_pfn, args->page_prot);
+ pud = pud_mkdevmap(pud);
pud = pud_wrprotect(pud);
pud = pud_mkclean(pud);
set_pud_at(args->mm, vaddr, args->pudp, pud);
@@ -391,6 +398,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
#endif /* __PAGETABLE_PMD_FOLDED */
pud = pfn_pud(args->pud_pfn, args->page_prot);
+ pud = pud_mkdevmap(pud);
pud = pud_mkyoung(pud);
set_pud_at(args->mm, vaddr, args->pudp, pud);
flush_dcache_page(page);
diff --git a/mm/filemap.c b/mm/filemap.c
index 750e779c23db..8df4797c5287 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2609,15 +2609,6 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
/*
- * Pairs with a barrier in
- * block_write_end()->mark_buffer_dirty() or other page
- * dirtying routines like iomap_write_end() to ensure
- * changes to page contents are visible before we see
- * increased inode size.
- */
- smp_rmb();
-
- /*
* Once we start copying data, we don't want to be touching any
* cachelines that might be contended:
*/
@@ -4111,28 +4102,40 @@ static void filemap_cachestat(struct address_space *mapping,
rcu_read_lock();
xas_for_each(&xas, folio, last_index) {
+ int order;
unsigned long nr_pages;
pgoff_t folio_first_index, folio_last_index;
+ /*
+ * Don't deref the folio. It is not pinned, and might
+ * get freed (and reused) underneath us.
+ *
+ * We *could* pin it, but that would be expensive for
+ * what should be a fast and lightweight syscall.
+ *
+ * Instead, derive all information of interest from
+ * the rcu-protected xarray.
+ */
+
if (xas_retry(&xas, folio))
continue;
+ order = xa_get_order(xas.xa, xas.xa_index);
+ nr_pages = 1 << order;
+ folio_first_index = round_down(xas.xa_index, 1 << order);
+ folio_last_index = folio_first_index + nr_pages - 1;
+
+ /* Folios might straddle the range boundaries, only count covered pages */
+ if (folio_first_index < first_index)
+ nr_pages -= first_index - folio_first_index;
+
+ if (folio_last_index > last_index)
+ nr_pages -= folio_last_index - last_index;
+
if (xa_is_value(folio)) {
/* page is evicted */
void *shadow = (void *)folio;
bool workingset; /* not used */
- int order = xa_get_order(xas.xa, xas.xa_index);
-
- nr_pages = 1 << order;
- folio_first_index = round_down(xas.xa_index, 1 << order);
- folio_last_index = folio_first_index + nr_pages - 1;
-
- /* Folios might straddle the range boundaries, only count covered pages */
- if (folio_first_index < first_index)
- nr_pages -= first_index - folio_first_index;
-
- if (folio_last_index > last_index)
- nr_pages -= folio_last_index - last_index;
cs->nr_evicted += nr_pages;
@@ -4150,24 +4153,13 @@ static void filemap_cachestat(struct address_space *mapping,
goto resched;
}
- nr_pages = folio_nr_pages(folio);
- folio_first_index = folio_pgoff(folio);
- folio_last_index = folio_first_index + nr_pages - 1;
-
- /* Folios might straddle the range boundaries, only count covered pages */
- if (folio_first_index < first_index)
- nr_pages -= first_index - folio_first_index;
-
- if (folio_last_index > last_index)
- nr_pages -= folio_last_index - last_index;
-
/* page is in cache */
cs->nr_cache += nr_pages;
- if (folio_test_dirty(folio))
+ if (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY))
cs->nr_dirty += nr_pages;
- if (folio_test_writeback(folio))
+ if (xas_get_mark(&xas, PAGECACHE_TAG_WRITEBACK))
cs->nr_writeback += nr_pages;
resched:
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 94ef5c02b459..94c958f7ebb5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -37,6 +37,7 @@
#include <linux/page_owner.h>
#include <linux/sched/sysctl.h>
#include <linux/memory-tiers.h>
+#include <linux/compat.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
@@ -809,7 +810,10 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
{
loff_t off_end = off + len;
loff_t off_align = round_up(off, size);
- unsigned long len_pad, ret;
+ unsigned long len_pad, ret, off_sub;
+
+ if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall())
+ return 0;
if (off_end <= off_align || (off_end - off_align) < size)
return 0;
@@ -835,7 +839,13 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
if (ret == addr)
return addr;
- ret += (off - ret) & (size - 1);
+ off_sub = (off - ret) & (size - 1);
+
+ if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown &&
+ !off_sub)
+ return ret + size;
+
+ ret += off_sub;
return ret;
}
@@ -2437,7 +2447,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
page = pmd_page(old_pmd);
folio = page_folio(page);
if (!folio_test_dirty(folio) && pmd_dirty(old_pmd))
- folio_set_dirty(folio);
+ folio_mark_dirty(folio);
if (!folio_test_referenced(folio) && pmd_young(old_pmd))
folio_set_referenced(folio);
folio_remove_rmap_pmd(folio, page, vma);
@@ -3563,7 +3573,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
}
if (pmd_dirty(pmdval))
- folio_set_dirty(folio);
+ folio_mark_dirty(folio);
if (pmd_write(pmdval))
entry = make_writable_migration_entry(page_to_pfn(page));
else if (anon_exclusive)
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 610efae91220..6ca63e8dda74 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -65,8 +65,7 @@ void kasan_save_track(struct kasan_track *track, gfp_t flags)
{
depot_stack_handle_t stack;
- stack = kasan_save_stack(flags,
- STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET);
+ stack = kasan_save_stack(flags, STACK_DEPOT_FLAG_CAN_ALLOC);
kasan_set_track(track, stack);
}
@@ -266,10 +265,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object,
return true;
/*
- * If the object is not put into quarantine, it will likely be quickly
- * reallocated. Thus, release its metadata now.
+ * Note: Keep per-object metadata to allow KASAN print stack traces for
+ * use-after-free-before-realloc bugs.
*/
- kasan_release_object_meta(cache, object);
/* Let slab put the object onto the freelist. */
return false;
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index df6627f62402..6310a180278b 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -334,14 +334,6 @@ DEFINE_ASAN_SET_SHADOW(f3);
DEFINE_ASAN_SET_SHADOW(f5);
DEFINE_ASAN_SET_SHADOW(f8);
-/* Only allow cache merging when no per-object metadata is present. */
-slab_flags_t kasan_never_merge(void)
-{
- if (!kasan_requires_meta())
- return 0;
- return SLAB_KASAN;
-}
-
/*
* Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
* For larger allocations larger redzones are used.
@@ -370,15 +362,13 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
return;
/*
- * SLAB_KASAN is used to mark caches that are sanitized by KASAN
- * and that thus have per-object metadata.
- * Currently this flag is used in two places:
- * 1. In slab_ksize() to account for per-object metadata when
- * calculating the size of the accessible memory within the object.
- * 2. In slab_common.c via kasan_never_merge() to prevent merging of
- * caches with per-object metadata.
+ * SLAB_KASAN is used to mark caches that are sanitized by KASAN and
+ * that thus have per-object metadata. Currently, this flag is used in
+ * slab_ksize() to account for per-object metadata when calculating the
+ * size of the accessible memory within the object. Additionally, we use
+ * SLAB_NO_MERGE to prevent merging of caches with per-object metadata.
*/
- *flags |= SLAB_KASAN;
+ *flags |= SLAB_KASAN | SLAB_NO_MERGE;
ok_size = *size;
@@ -485,16 +475,6 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
if (alloc_meta) {
/* Zero out alloc meta to mark it as invalid. */
__memset(alloc_meta, 0, sizeof(*alloc_meta));
-
- /*
- * Prepare the lock for saving auxiliary stack traces.
- * Temporarily disable KASAN bug reporting to allow instrumented
- * raw_spin_lock_init to access aux_lock, which resides inside
- * of a redzone.
- */
- kasan_disable_current();
- raw_spin_lock_init(&alloc_meta->aux_lock);
- kasan_enable_current();
}
/*
@@ -506,47 +486,23 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
static void release_alloc_meta(struct kasan_alloc_meta *meta)
{
- /* Evict the stack traces from stack depot. */
- stack_depot_put(meta->alloc_track.stack);
- stack_depot_put(meta->aux_stack[0]);
- stack_depot_put(meta->aux_stack[1]);
-
- /*
- * Zero out alloc meta to mark it as invalid but keep aux_lock
- * initialized to avoid having to reinitialize it when another object
- * is allocated in the same slot.
- */
- __memset(&meta->alloc_track, 0, sizeof(meta->alloc_track));
- __memset(meta->aux_stack, 0, sizeof(meta->aux_stack));
+ /* Zero out alloc meta to mark it as invalid. */
+ __memset(meta, 0, sizeof(*meta));
}
static void release_free_meta(const void *object, struct kasan_free_meta *meta)
{
+ if (!kasan_arch_is_ready())
+ return;
+
/* Check if free meta is valid. */
if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_SLAB_FREE_META)
return;
- /* Evict the stack trace from the stack depot. */
- stack_depot_put(meta->free_track.stack);
-
/* Mark free meta as invalid. */
*(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE;
}
-void kasan_release_object_meta(struct kmem_cache *cache, const void *object)
-{
- struct kasan_alloc_meta *alloc_meta;
- struct kasan_free_meta *free_meta;
-
- alloc_meta = kasan_get_alloc_meta(cache, object);
- if (alloc_meta)
- release_alloc_meta(alloc_meta);
-
- free_meta = kasan_get_free_meta(cache, object);
- if (free_meta)
- release_free_meta(object, free_meta);
-}
-
size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object)
{
struct kasan_cache *info = &cache->kasan_info;
@@ -571,8 +527,6 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags)
struct kmem_cache *cache;
struct kasan_alloc_meta *alloc_meta;
void *object;
- depot_stack_handle_t new_handle, old_handle;
- unsigned long flags;
if (is_kfence_address(addr) || !slab)
return;
@@ -583,33 +537,18 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags)
if (!alloc_meta)
return;
- new_handle = kasan_save_stack(0, depot_flags);
-
- /*
- * Temporarily disable KASAN bug reporting to allow instrumented
- * spinlock functions to access aux_lock, which resides inside of a
- * redzone.
- */
- kasan_disable_current();
- raw_spin_lock_irqsave(&alloc_meta->aux_lock, flags);
- old_handle = alloc_meta->aux_stack[1];
alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0];
- alloc_meta->aux_stack[0] = new_handle;
- raw_spin_unlock_irqrestore(&alloc_meta->aux_lock, flags);
- kasan_enable_current();
-
- stack_depot_put(old_handle);
+ alloc_meta->aux_stack[0] = kasan_save_stack(0, depot_flags);
}
void kasan_record_aux_stack(void *addr)
{
- return __kasan_record_aux_stack(addr,
- STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET);
+ return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_CAN_ALLOC);
}
void kasan_record_aux_stack_noalloc(void *addr)
{
- return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_GET);
+ return __kasan_record_aux_stack(addr, 0);
}
void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
@@ -620,7 +559,7 @@ void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
if (!alloc_meta)
return;
- /* Evict previous stack traces (might exist for krealloc or mempool). */
+ /* Invalidate previous stack traces (might exist for krealloc or mempool). */
release_alloc_meta(alloc_meta);
kasan_save_track(&alloc_meta->alloc_track, flags);
@@ -634,7 +573,7 @@ void kasan_save_free_info(struct kmem_cache *cache, void *object)
if (!free_meta)
return;
- /* Evict previous stack trace (might exist for mempool). */
+ /* Invalidate previous stack trace (might exist for mempool). */
release_free_meta(object, free_meta);
kasan_save_track(&free_meta->free_track, 0);
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index d0f172f2b978..fb2b9ac0659a 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -6,7 +6,6 @@
#include <linux/kasan.h>
#include <linux/kasan-tags.h>
#include <linux/kfence.h>
-#include <linux/spinlock.h>
#include <linux/stackdepot.h>
#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
@@ -265,13 +264,6 @@ struct kasan_global {
struct kasan_alloc_meta {
struct kasan_track alloc_track;
/* Free track is stored in kasan_free_meta. */
- /*
- * aux_lock protects aux_stack from accesses from concurrent
- * kasan_record_aux_stack calls. It is a raw spinlock to avoid sleeping
- * on RT kernels, as kasan_record_aux_stack_noalloc can be called from
- * non-sleepable contexts.
- */
- raw_spinlock_t aux_lock;
depot_stack_handle_t aux_stack[2];
};
@@ -398,10 +390,8 @@ struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache,
struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
const void *object);
void kasan_init_object_meta(struct kmem_cache *cache, const void *object);
-void kasan_release_object_meta(struct kmem_cache *cache, const void *object);
#else
static inline void kasan_init_object_meta(struct kmem_cache *cache, const void *object) { }
-static inline void kasan_release_object_meta(struct kmem_cache *cache, const void *object) { }
#endif
depot_stack_handle_t kasan_save_stack(gfp_t flags, depot_flags_t depot_flags);
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 3ba02efb952a..6958aa713c67 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -145,7 +145,10 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
void *object = qlink_to_object(qlink, cache);
struct kasan_free_meta *free_meta = kasan_get_free_meta(cache, object);
- kasan_release_object_meta(cache, object);
+ /*
+ * Note: Keep per-object metadata to allow KASAN print stack traces for
+ * use-after-free-before-realloc bugs.
+ */
/*
* If init_on_free is enabled and KASAN's free metadata is stored in
diff --git a/mm/madvise.c b/mm/madvise.c
index 912155a94ed5..cfa5e7288261 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -429,6 +429,7 @@ restart:
if (++batch_count == SWAP_CLUSTER_MAX) {
batch_count = 0;
if (need_resched()) {
+ arch_leave_lazy_mmu_mode();
pte_unmap_unlock(start_pte, ptl);
cond_resched();
goto restart;
diff --git a/mm/memblock.c b/mm/memblock.c
index abd92869874d..d09136e040d3 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -180,8 +180,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
/*
* Address comparison utilities
*/
-static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
- phys_addr_t base2, phys_addr_t size2)
+unsigned long __init_memblock
+memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2,
+ phys_addr_t size2)
{
return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
}
@@ -2176,6 +2177,9 @@ static void __init memmap_init_reserved_pages(void)
start = region->base;
end = start + region->size;
+ if (nid == NUMA_NO_NODE || nid >= MAX_NUMNODES)
+ nid = early_pfn_to_nid(PFN_DOWN(start));
+
reserve_bootmem_region(start, end, nid);
}
}
@@ -2246,6 +2250,7 @@ static const char * const flagname[] = {
[ilog2(MEMBLOCK_MIRROR)] = "MIRROR",
[ilog2(MEMBLOCK_NOMAP)] = "NOMAP",
[ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG",
+ [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT",
};
static int memblock_debug_show(struct seq_file *m, void *private)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e4c8735e7c85..61932c9215e7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -621,6 +621,15 @@ static inline int memcg_events_index(enum vm_event_item idx)
}
struct memcg_vmstats_percpu {
+ /* Stats updates since the last flush */
+ unsigned int stats_updates;
+
+ /* Cached pointers for fast iteration in memcg_rstat_updated() */
+ struct memcg_vmstats_percpu *parent;
+ struct memcg_vmstats *vmstats;
+
+ /* The above should fit a single cacheline for memcg_rstat_updated() */
+
/* Local (CPU and cgroup) page state & events */
long state[MEMCG_NR_STAT];
unsigned long events[NR_MEMCG_EVENTS];
@@ -632,10 +641,7 @@ struct memcg_vmstats_percpu {
/* Cgroup1: threshold notifications & softlimit tree updates */
unsigned long nr_page_events;
unsigned long targets[MEM_CGROUP_NTARGETS];
-
- /* Stats updates since the last flush */
- unsigned int stats_updates;
-};
+} ____cacheline_aligned;
struct memcg_vmstats {
/* Aggregated (CPU and subtree) page state & events */
@@ -698,36 +704,35 @@ static void memcg_stats_unlock(void)
}
-static bool memcg_should_flush_stats(struct mem_cgroup *memcg)
+static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats)
{
- return atomic64_read(&memcg->vmstats->stats_updates) >
+ return atomic64_read(&vmstats->stats_updates) >
MEMCG_CHARGE_BATCH * num_online_cpus();
}
static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
{
+ struct memcg_vmstats_percpu *statc;
int cpu = smp_processor_id();
- unsigned int x;
if (!val)
return;
cgroup_rstat_updated(memcg->css.cgroup, cpu);
-
- for (; memcg; memcg = parent_mem_cgroup(memcg)) {
- x = __this_cpu_add_return(memcg->vmstats_percpu->stats_updates,
- abs(val));
-
- if (x < MEMCG_CHARGE_BATCH)
+ statc = this_cpu_ptr(memcg->vmstats_percpu);
+ for (; statc; statc = statc->parent) {
+ statc->stats_updates += abs(val);
+ if (statc->stats_updates < MEMCG_CHARGE_BATCH)
continue;
/*
* If @memcg is already flush-able, increasing stats_updates is
* redundant. Avoid the overhead of the atomic update.
*/
- if (!memcg_should_flush_stats(memcg))
- atomic64_add(x, &memcg->vmstats->stats_updates);
- __this_cpu_write(memcg->vmstats_percpu->stats_updates, 0);
+ if (!memcg_vmstats_needs_flush(statc->vmstats))
+ atomic64_add(statc->stats_updates,
+ &statc->vmstats->stats_updates);
+ statc->stats_updates = 0;
}
}
@@ -756,7 +761,7 @@ void mem_cgroup_flush_stats(struct mem_cgroup *memcg)
if (!memcg)
memcg = root_mem_cgroup;
- if (memcg_should_flush_stats(memcg))
+ if (memcg_vmstats_needs_flush(memcg->vmstats))
do_flush_stats(memcg);
}
@@ -770,7 +775,7 @@ void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg)
static void flush_memcg_stats_dwork(struct work_struct *w)
{
/*
- * Deliberately ignore memcg_should_flush_stats() here so that flushing
+ * Deliberately ignore memcg_vmstats_needs_flush() here so that flushing
* in latency-sensitive paths is as cheap as possible.
*/
do_flush_stats(root_mem_cgroup);
@@ -2623,8 +2628,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
}
/*
- * Scheduled by try_charge() to be executed from the userland return path
- * and reclaims memory over the high limit.
+ * Reclaims memory over the high limit. Called directly from
+ * try_charge() (context permitting), as well as from the userland
+ * return path where reclaim is always able to block.
*/
void mem_cgroup_handle_over_high(gfp_t gfp_mask)
{
@@ -2644,6 +2650,17 @@ void mem_cgroup_handle_over_high(gfp_t gfp_mask)
retry_reclaim:
/*
+ * Bail if the task is already exiting. Unlike memory.max,
+ * memory.high enforcement isn't as strict, and there is no
+ * OOM killer involved, which means the excess could already
+ * be much bigger (and still growing) than it could for
+ * memory.max; the dying task could get stuck in fruitless
+ * reclaim for a long time, which isn't desirable.
+ */
+ if (task_is_dying())
+ goto out;
+
+ /*
* The allocating task should reclaim at least the batch size, but for
* subsequent retries we only want to do what's necessary to prevent oom
* or breaching resource isolation.
@@ -2693,6 +2710,9 @@ retry_reclaim:
}
/*
+ * Reclaim didn't manage to push usage below the limit, slow
+ * this allocating task down.
+ *
* If we exit early, we're guaranteed to die (since
* schedule_timeout_killable sets TASK_KILLABLE). This means we don't
* need to account for any ill-begotten jiffies to pay them off later.
@@ -2887,11 +2907,17 @@ done_restock:
}
} while ((memcg = parent_mem_cgroup(memcg)));
+ /*
+ * Reclaim is set up above to be called from the userland
+ * return path. But also attempt synchronous reclaim to avoid
+ * excessive overrun while the task is still inside the
+ * kernel. If this is successful, the return path will see it
+ * when it rechecks the overage and simply bail out.
+ */
if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH &&
!(current->flags & PF_MEMALLOC) &&
- gfpflags_allow_blocking(gfp_mask)) {
+ gfpflags_allow_blocking(gfp_mask))
mem_cgroup_handle_over_high(gfp_mask);
- }
return 0;
}
@@ -5456,10 +5482,11 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
__mem_cgroup_free(memcg);
}
-static struct mem_cgroup *mem_cgroup_alloc(void)
+static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
{
+ struct memcg_vmstats_percpu *statc, *pstatc;
struct mem_cgroup *memcg;
- int node;
+ int node, cpu;
int __maybe_unused i;
long error = -ENOMEM;
@@ -5483,6 +5510,14 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (!memcg->vmstats_percpu)
goto fail;
+ for_each_possible_cpu(cpu) {
+ if (parent)
+ pstatc = per_cpu_ptr(parent->vmstats_percpu, cpu);
+ statc = per_cpu_ptr(memcg->vmstats_percpu, cpu);
+ statc->parent = parent ? pstatc : NULL;
+ statc->vmstats = memcg->vmstats;
+ }
+
for_each_node(node)
if (alloc_mem_cgroup_per_node_info(memcg, node))
goto fail;
@@ -5528,7 +5563,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
struct mem_cgroup *memcg, *old_memcg;
old_memcg = set_active_memcg(parent);
- memcg = mem_cgroup_alloc();
+ memcg = mem_cgroup_alloc(parent);
set_active_memcg(old_memcg);
if (IS_ERR(memcg))
return ERR_CAST(memcg);
@@ -7936,9 +7971,13 @@ bool mem_cgroup_swap_full(struct folio *folio)
static int __init setup_swap_account(char *s)
{
- pr_warn_once("The swapaccount= commandline option is deprecated. "
- "Please report your usecase to linux-mm@kvack.org if you "
- "depend on this functionality.\n");
+ bool res;
+
+ if (!kstrtobool(s, &res) && !res)
+ pr_warn_once("The swapaccount=0 commandline option is deprecated "
+ "in favor of configuring swap control via cgroupfs. "
+ "Please report your usecase to linux-mm@kvack.org if you "
+ "depend on this functionality.\n");
return 1;
}
__setup("swapaccount=", setup_swap_account);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4f9b61f4a668..9349948f1abf 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -982,7 +982,7 @@ static bool has_extra_refcount(struct page_state *ps, struct page *p,
int count = page_count(p) - 1;
if (extra_pins)
- count -= 1;
+ count -= folio_nr_pages(page_folio(p));
if (count > 0) {
pr_err("%#lx: %s still referenced by %d users\n",
@@ -1377,6 +1377,9 @@ void ClearPageHWPoisonTakenOff(struct page *page)
*/
static inline bool HWPoisonHandlable(struct page *page, unsigned long flags)
{
+ if (PageSlab(page))
+ return false;
+
/* Soft offline could migrate non-LRU movable pages */
if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page))
return true;
diff --git a/mm/memory.c b/mm/memory.c
index 7e1f4849463a..0bfc8b007c01 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1464,7 +1464,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
delay_rmap = 0;
if (!folio_test_anon(folio)) {
if (pte_dirty(ptent)) {
- folio_set_dirty(folio);
+ folio_mark_dirty(folio);
if (tlb_delay_rmap(tlb)) {
delay_rmap = 1;
force_flush = 1;
@@ -3799,6 +3799,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
struct page *page;
struct swap_info_struct *si = NULL;
rmap_t rmap_flags = RMAP_NONE;
+ bool need_clear_cache = false;
bool exclusive = false;
swp_entry_t entry;
pte_t pte;
@@ -3867,6 +3868,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (!folio) {
if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
__swap_count(entry) == 1) {
+ /*
+ * Prevent parallel swapin from proceeding with
+ * the cache flag. Otherwise, another thread may
+ * finish swapin first, free the entry, and swapout
+ * reusing the same entry. It's undetectable as
+ * pte_same() returns true due to entry reuse.
+ */
+ if (swapcache_prepare(entry)) {
+ /* Relax a bit to prevent rapid repeated page faults */
+ schedule_timeout_uninterruptible(1);
+ goto out;
+ }
+ need_clear_cache = true;
+
/* skip swapcache */
folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
vma, vmf->address, false);
@@ -4117,6 +4132,9 @@ unlock:
if (vmf->pte)
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
+ /* Clear the swap cache pin for direct swapin after PTL unlock */
+ if (need_clear_cache)
+ swapcache_clear(si, entry);
if (si)
put_swap_device(si);
return ret;
@@ -4131,6 +4149,8 @@ out_release:
folio_unlock(swapcache);
folio_put(swapcache);
}
+ if (need_clear_cache)
+ swapcache_clear(si, entry);
if (si)
put_swap_device(si);
return ret;
@@ -5478,7 +5498,7 @@ static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs
return true;
if (regs && !user_mode(regs)) {
- unsigned long ip = instruction_pointer(regs);
+ unsigned long ip = exception_ip(regs);
if (!search_exception_tables(ip))
return false;
}
@@ -5503,7 +5523,7 @@ static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_r
{
mmap_read_unlock(mm);
if (regs && !user_mode(regs)) {
- unsigned long ip = instruction_pointer(regs);
+ unsigned long ip = exception_ip(regs);
if (!search_exception_tables(ip))
return false;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index cc9f2bcd73b4..c27b1f8097d4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2519,6 +2519,14 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio)
if (managed_zone(pgdat->node_zones + z))
break;
}
+
+ /*
+ * If there are no managed zones, it should not proceed
+ * further.
+ */
+ if (z < 0)
+ return 0;
+
wakeup_kswapd(pgdat->node_zones + z, 0,
folio_order(folio), ZONE_MOVABLE);
return 0;
diff --git a/mm/mmap.c b/mm/mmap.c
index b78e83d351d2..3281287771c9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -954,13 +954,21 @@ static struct vm_area_struct
} else if (merge_prev) { /* case 2 */
if (curr) {
vma_start_write(curr);
- err = dup_anon_vma(prev, curr, &anon_dup);
if (end == curr->vm_end) { /* case 7 */
+ /*
+ * can_vma_merge_after() assumed we would not be
+ * removing prev vma, so it skipped the check
+ * for vm_ops->close, but we are removing curr
+ */
+ if (curr->vm_ops && curr->vm_ops->close)
+ err = -EINVAL;
remove = curr;
} else { /* case 5 */
adjust = curr;
adj_start = (end - curr->vm_start);
}
+ if (!err)
+ err = dup_anon_vma(prev, curr, &anon_dup);
}
} else { /* merge_next */
vma_start_write(next);
@@ -1825,15 +1833,17 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
/*
* mmap_region() will call shmem_zero_setup() to create a file,
* so use shmem's get_unmapped_area in case it can be huge.
- * do_mmap() will clear pgoff, so match alignment.
*/
- pgoff = 0;
get_area = shmem_get_unmapped_area;
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
/* Ensures that larger anonymous mappings are THP aligned. */
get_area = thp_get_unmapped_area;
}
+ /* Always treat pgoff as zero for anonymous memory. */
+ if (!file)
+ pgoff = 0;
+
addr = get_area(file, addr, len, pgoff, flags);
if (IS_ERR_VALUE(addr))
return addr;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index cd4e4ae77c40..3f255534986a 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1638,7 +1638,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
*/
dtc->wb_thresh = __wb_calc_thresh(dtc);
dtc->wb_bg_thresh = dtc->thresh ?
- div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
+ div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
/*
* In order to avoid the stacked BDI deadlock we need
@@ -1921,7 +1921,7 @@ pause:
break;
}
__set_current_state(TASK_KILLABLE);
- wb->dirty_sleep = now;
+ bdi->last_bdp_sleep = jiffies;
io_schedule_timeout(pause);
current->dirty_paused_when = now + pause;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 150d4f23b010..62fc2e8f2733 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4041,6 +4041,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct alloc_context *ac)
{
bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
+ bool can_compact = gfp_compaction_allowed(gfp_mask);
const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
struct page *page = NULL;
unsigned int alloc_flags;
@@ -4111,7 +4112,7 @@ restart:
* Don't try this for allocations that are allowed to ignore
* watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
*/
- if (can_direct_reclaim &&
+ if (can_direct_reclaim && can_compact &&
(costly_order ||
(order > 0 && ac->migratetype != MIGRATE_MOVABLE))
&& !gfp_pfmemalloc_allowed(gfp_mask)) {
@@ -4209,9 +4210,10 @@ retry:
/*
* Do not retry costly high order allocations unless they are
- * __GFP_RETRY_MAYFAIL
+ * __GFP_RETRY_MAYFAIL and we can compact
*/
- if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
+ if (costly_order && (!can_compact ||
+ !(gfp_mask & __GFP_RETRY_MAYFAIL)))
goto nopage;
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
@@ -4224,7 +4226,7 @@ retry:
* implementation of the compaction depends on the sufficient amount
* of free memory (see __compaction_suitable)
*/
- if (did_some_progress > 0 &&
+ if (did_some_progress > 0 && can_compact &&
should_compact_retry(ac, order, alloc_flags,
compact_result, &compact_priority,
&compaction_retries))
@@ -4685,8 +4687,8 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
gfp_t gfp = gfp_mask;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
- gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
- __GFP_NOMEMALLOC;
+ gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
+ __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
PAGE_FRAG_CACHE_MAX_ORDER);
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
@@ -4699,6 +4701,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
return page;
}
+void page_frag_cache_drain(struct page_frag_cache *nc)
+{
+ if (!nc->va)
+ return;
+
+ __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
+ nc->va = NULL;
+}
+EXPORT_SYMBOL(page_frag_cache_drain);
+
void __page_frag_cache_drain(struct page *page, unsigned int count)
{
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
@@ -4708,9 +4720,9 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
}
EXPORT_SYMBOL(__page_frag_cache_drain);
-void *page_frag_alloc_align(struct page_frag_cache *nc,
- unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask)
+void *__page_frag_alloc_align(struct page_frag_cache *nc,
+ unsigned int fragsz, gfp_t gfp_mask,
+ unsigned int align_mask)
{
unsigned int size = PAGE_SIZE;
struct page *page;
@@ -4779,7 +4791,7 @@ refill:
return nc->va + offset;
}
-EXPORT_SYMBOL(page_frag_alloc_align);
+EXPORT_SYMBOL(__page_frag_alloc_align);
/*
* Frees a page fragment allocated out of either a compound or order 0 page.
diff --git a/mm/readahead.c b/mm/readahead.c
index 23620c57c122..2648ec4f0494 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -469,7 +469,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
if (!folio)
return -ENOMEM;
- mark = round_up(mark, 1UL << order);
+ mark = round_down(mark, 1UL << order);
if (index == mark)
folio_set_readahead(folio);
err = filemap_add_folio(ractl->mapping, folio, index, gfp);
@@ -575,7 +575,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
* It's the expected callback index, assume sequential access.
* Ramp up sizes, and push forward the readahead window.
*/
- expected = round_up(ra->start + ra->size - ra->async_size,
+ expected = round_down(ra->start + ra->size - ra->async_size,
1UL << order);
if (index == expected || index == (ra->start + ra->size)) {
ra->start += ra->size;
diff --git a/mm/shmem.c b/mm/shmem.c
index d7c84ff62186..29d5a024df48 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3374,7 +3374,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
{
- if (!simple_empty(dentry))
+ if (!simple_offset_empty(dentry))
return -ENOTEMPTY;
drop_nlink(d_inode(dentry));
@@ -3431,7 +3431,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
return simple_offset_rename_exchange(old_dir, old_dentry,
new_dir, new_dentry);
- if (!simple_empty(new_dentry))
+ if (!simple_offset_empty(new_dentry))
return -ENOTEMPTY;
if (flags & RENAME_WHITEOUT) {
@@ -4355,7 +4355,9 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
#ifdef CONFIG_TMPFS_POSIX_ACL
sb->s_flags |= SB_POSIXACL;
#endif
- uuid_gen(&sb->s_uuid);
+ uuid_t uuid;
+ uuid_gen(&uuid);
+ super_set_uuid(sb, uuid.b, sizeof(uuid));
#ifdef CONFIG_TMPFS_QUOTA
if (ctx->seen & SHMEM_SEEN_QUOTA) {
diff --git a/mm/slab.h b/mm/slab.h
index 54deeb0428c6..d2bc9b191222 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -363,7 +363,6 @@ static inline int objs_per_slab(const struct kmem_cache *cache,
enum slab_state {
DOWN, /* No slab functionality yet */
PARTIAL, /* SLUB: kmem_cache_node available */
- PARTIAL_NODE, /* SLAB: kmalloc size for node struct available */
UP, /* Slab caches usable but not all extras yet */
FULL /* Everything is working */
};
@@ -387,7 +386,7 @@ extern const struct kmalloc_info_struct {
/* Kmalloc array related functions */
void setup_kmalloc_cache_index_table(void);
-void create_kmalloc_caches(slab_flags_t);
+void create_kmalloc_caches(void);
extern u8 kmalloc_size_index[24];
@@ -422,8 +421,6 @@ gfp_t kmalloc_fix_flags(gfp_t flags);
int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
void __init kmem_cache_init(void);
-void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
- slab_flags_t flags);
extern void create_boot_cache(struct kmem_cache *, const char *name,
unsigned int size, slab_flags_t flags,
unsigned int useroffset, unsigned int usersize);
@@ -435,8 +432,7 @@ struct kmem_cache *
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *));
-slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name);
+slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name);
static inline bool is_kmalloc_cache(struct kmem_cache *s)
{
@@ -469,7 +465,6 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
SLAB_STORE_USER | \
SLAB_TRACE | \
SLAB_CONSISTENCY_CHECKS | \
- SLAB_MEM_SPREAD | \
SLAB_NOLEAKTRACE | \
SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | \
@@ -528,7 +523,7 @@ static inline bool __slub_debug_enabled(void)
#endif
/*
- * Returns true if any of the specified slub_debug flags is enabled for the
+ * Returns true if any of the specified slab_debug flags is enabled for the
* cache. Use only for flags parsed by setup_slub_debug() as it also enables
* the static key.
*/
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 238293b1dbe1..23af762148ca 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -50,7 +50,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
- SLAB_FAILSLAB | SLAB_NO_MERGE | kasan_never_merge())
+ SLAB_FAILSLAB | SLAB_NO_MERGE)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
@@ -172,7 +172,7 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
size = ALIGN(size, sizeof(void *));
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
- flags = kmem_cache_flags(size, flags, name);
+ flags = kmem_cache_flags(flags, name);
if (flags & SLAB_NEVER_MERGE)
return NULL;
@@ -282,7 +282,7 @@ kmem_cache_create_usercopy(const char *name,
#ifdef CONFIG_SLUB_DEBUG
/*
- * If no slub_debug was enabled globally, the static key is not yet
+ * If no slab_debug was enabled globally, the static key is not yet
* enabled by setup_slub_debug(). Enable it if the cache is being
* created with any of the debugging flags passed explicitly.
* It's also possible that this is the first cache created with
@@ -404,8 +404,12 @@ EXPORT_SYMBOL(kmem_cache_create);
*/
static void kmem_cache_release(struct kmem_cache *s)
{
- sysfs_slab_unlink(s);
- sysfs_slab_release(s);
+ if (slab_state >= FULL) {
+ sysfs_slab_unlink(s);
+ sysfs_slab_release(s);
+ } else {
+ slab_kmem_cache_release(s);
+ }
}
#else
static void kmem_cache_release(struct kmem_cache *s)
@@ -766,7 +770,7 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
}
/*
- * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
+ * kmalloc_info[] is to make slab_debug=,kmalloc-xx option work at boot time.
* kmalloc_index() supports up to 2^21=2MB, so the final entry of the table is
* kmalloc-2M.
*/
@@ -853,9 +857,10 @@ static unsigned int __kmalloc_minalign(void)
return max(minalign, arch_slab_minalign());
}
-void __init
-new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
+static void __init
+new_kmalloc_cache(int idx, enum kmalloc_cache_type type)
{
+ slab_flags_t flags = 0;
unsigned int minalign = __kmalloc_minalign();
unsigned int aligned_size = kmalloc_info[idx].size;
int aligned_idx = idx;
@@ -902,7 +907,7 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
* may already have been created because they were needed to
* enable allocations for slab creation.
*/
-void __init create_kmalloc_caches(slab_flags_t flags)
+void __init create_kmalloc_caches(void)
{
int i;
enum kmalloc_cache_type type;
@@ -913,7 +918,7 @@ void __init create_kmalloc_caches(slab_flags_t flags)
for (type = KMALLOC_NORMAL; type < NR_KMALLOC_TYPES; type++) {
for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
if (!kmalloc_caches[type][i])
- new_kmalloc_cache(i, type, flags);
+ new_kmalloc_cache(i, type);
/*
* Caches that are not of the two-to-the-power-of size.
@@ -922,10 +927,10 @@ void __init create_kmalloc_caches(slab_flags_t flags)
*/
if (KMALLOC_MIN_SIZE <= 32 && i == 6 &&
!kmalloc_caches[type][1])
- new_kmalloc_cache(1, type, flags);
+ new_kmalloc_cache(1, type);
if (KMALLOC_MIN_SIZE <= 64 && i == 7 &&
!kmalloc_caches[type][2])
- new_kmalloc_cache(2, type, flags);
+ new_kmalloc_cache(2, type);
}
}
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
diff --git a/mm/slub.c b/mm/slub.c
index 2ef88bbf56a3..1bb2a93cf7b6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -295,7 +295,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
/*
* Debugging flags that require metadata to be stored in the slab. These get
- * disabled when slub_debug=O is used and a cache's min order increases with
+ * disabled when slab_debug=O is used and a cache's min order increases with
* metadata.
*/
#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
@@ -306,13 +306,13 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
/* Internal SLUB flags */
/* Poison object */
-#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
+#define __OBJECT_POISON __SLAB_FLAG_BIT(_SLAB_OBJECT_POISON)
/* Use cmpxchg_double */
#ifdef system_has_freelist_aba
-#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
+#define __CMPXCHG_DOUBLE __SLAB_FLAG_BIT(_SLAB_CMPXCHG_DOUBLE)
#else
-#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0U)
+#define __CMPXCHG_DOUBLE __SLAB_FLAG_UNUSED
#endif
/*
@@ -391,7 +391,7 @@ struct kmem_cache_cpu {
};
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
- struct slab *partial; /* Partially allocated frozen slabs */
+ struct slab *partial; /* Partially allocated slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
@@ -1498,16 +1498,8 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
{
struct kmem_cache_node *n = get_node(s, node);
- /*
- * May be called early in order to allocate a slab for the
- * kmem_cache_node structure. Solve the chicken-egg
- * dilemma by deferring the increment of the count during
- * bootstrap (see early_kmem_cache_node_alloc).
- */
- if (likely(n)) {
- atomic_long_inc(&n->nr_slabs);
- atomic_long_add(objects, &n->total_objects);
- }
+ atomic_long_inc(&n->nr_slabs);
+ atomic_long_add(objects, &n->total_objects);
}
static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
{
@@ -1616,7 +1608,7 @@ static inline int free_consistency_checks(struct kmem_cache *s,
}
/*
- * Parse a block of slub_debug options. Blocks are delimited by ';'
+ * Parse a block of slab_debug options. Blocks are delimited by ';'
*
* @str: start of block
* @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
@@ -1677,7 +1669,7 @@ parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
break;
default:
if (init)
- pr_err("slub_debug option '%c' unknown. skipped\n", *str);
+ pr_err("slab_debug option '%c' unknown. skipped\n", *str);
}
}
check_slabs:
@@ -1736,7 +1728,7 @@ static int __init setup_slub_debug(char *str)
/*
* For backwards compatibility, a single list of flags with list of
* slabs means debugging is only changed for those slabs, so the global
- * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
+ * slab_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
* on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as
* long as there is no option specifying flags without a slab list.
*/
@@ -1760,21 +1752,20 @@ out:
return 1;
}
-__setup("slub_debug", setup_slub_debug);
+__setup("slab_debug", setup_slub_debug);
+__setup_param("slub_debug", slub_debug, setup_slub_debug, 0);
/*
* kmem_cache_flags - apply debugging options to the cache
- * @object_size: the size of an object without meta data
* @flags: flags to set
* @name: name of the cache
*
* Debug option(s) are applied to @flags. In addition to the debug
* option(s), if a slab name (or multiple) is specified i.e.
- * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
+ * slab_debug=<Debug-Options>,<slab name1>,<slab name2> ...
* then only the select slabs will receive the debug option(s).
*/
-slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name)
+slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
{
char *iter;
size_t len;
@@ -1850,8 +1841,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct slab *slab) {}
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct slab *slab) {}
-slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name)
+slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
{
return flags;
}
@@ -2038,11 +2028,6 @@ void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
}
#else /* CONFIG_MEMCG_KMEM */
-static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
-{
- return NULL;
-}
-
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
}
@@ -2243,7 +2228,7 @@ static void __init init_freelist_randomization(void)
}
/* Get the next entry on the pre-computed freelist randomized */
-static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab,
+static void *next_freelist_entry(struct kmem_cache *s,
unsigned long *pos, void *start,
unsigned long page_limit,
unsigned long freelist_count)
@@ -2282,13 +2267,12 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
start = fixup_red_left(s, slab_address(slab));
/* First entry is used as the base of the freelist */
- cur = next_freelist_entry(s, slab, &pos, start, page_limit,
- freelist_count);
+ cur = next_freelist_entry(s, &pos, start, page_limit, freelist_count);
cur = setup_object(s, cur);
slab->freelist = cur;
for (idx = 1; idx < slab->objects; idx++) {
- next = next_freelist_entry(s, slab, &pos, start, page_limit,
+ next = next_freelist_entry(s, &pos, start, page_limit,
freelist_count);
next = setup_object(s, next);
set_freepointer(s, cur, next);
@@ -3263,7 +3247,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
oo_order(s->min));
if (oo_order(s->min) > get_order(s->object_size))
- pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
+ pr_warn(" %s debugging increased min order, use slab_debug=O to disable.\n",
s->name);
for_each_kmem_cache_node(s, node, n) {
@@ -3326,7 +3310,6 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
counters = slab->counters;
new.counters = counters;
- VM_BUG_ON(!new.frozen);
new.inuse = slab->objects;
new.frozen = freelist != NULL;
@@ -3498,18 +3481,20 @@ new_slab:
slab = slub_percpu_partial(c);
slub_set_percpu_partial(c, slab);
- local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- stat(s, CPU_PARTIAL_ALLOC);
- if (unlikely(!node_match(slab, node) ||
- !pfmemalloc_match(slab, gfpflags))) {
- slab->next = NULL;
- __put_partials(s, slab);
- continue;
+ if (likely(node_match(slab, node) &&
+ pfmemalloc_match(slab, gfpflags))) {
+ c->slab = slab;
+ freelist = get_freelist(s, slab);
+ VM_BUG_ON(!freelist);
+ stat(s, CPU_PARTIAL_ALLOC);
+ goto load_freelist;
}
- freelist = freeze_slab(s, slab);
- goto retry_load_slab;
+ local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+
+ slab->next = NULL;
+ __put_partials(s, slab);
}
#endif
@@ -3792,11 +3777,11 @@ void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
zero_size = orig_size;
/*
- * When slub_debug is enabled, avoid memory initialization integrated
+ * When slab_debug is enabled, avoid memory initialization integrated
* into KASAN and instead zero out the memory via the memset below with
* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
* cause false-positive reports. This does not lead to a performance
- * penalty on production builds, as slub_debug is not intended to be
+ * penalty on production builds, as slab_debug is not intended to be
* enabled there.
*/
if (__slub_debug_enabled())
@@ -4187,7 +4172,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
* then add it.
*/
if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
- remove_full(s, n, slab);
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
@@ -4201,9 +4185,6 @@ slab_empty:
*/
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
- } else {
- /* Slab must be on the full list */
- remove_full(s, n, slab);
}
spin_unlock_irqrestore(&n->list_lock, flags);
@@ -4702,8 +4683,8 @@ static unsigned int slub_min_objects;
* activity on the partial lists which requires taking the list_lock. This is
* less a concern for large slabs though which are rarely used.
*
- * slub_max_order specifies the order where we begin to stop considering the
- * number of objects in a slab as critical. If we reach slub_max_order then
+ * slab_max_order specifies the order where we begin to stop considering the
+ * number of objects in a slab as critical. If we reach slab_max_order then
* we try to keep the page order as low as possible. So we accept more waste
* of space in favor of a small page order.
*
@@ -4770,14 +4751,14 @@ static inline int calculate_order(unsigned int size)
* and backing off gradually.
*
* We start with accepting at most 1/16 waste and try to find the
- * smallest order from min_objects-derived/slub_min_order up to
- * slub_max_order that will satisfy the constraint. Note that increasing
+ * smallest order from min_objects-derived/slab_min_order up to
+ * slab_max_order that will satisfy the constraint. Note that increasing
* the order can only result in same or less fractional waste, not more.
*
* If that fails, we increase the acceptable fraction of waste and try
* again. The last iteration with fraction of 1/2 would effectively
* accept any waste and give us the order determined by min_objects, as
- * long as at least single object fits within slub_max_order.
+ * long as at least single object fits within slab_max_order.
*/
for (unsigned int fraction = 16; fraction > 1; fraction /= 2) {
order = calc_slab_order(size, min_order, slub_max_order,
@@ -4787,7 +4768,7 @@ static inline int calculate_order(unsigned int size)
}
/*
- * Doh this slab cannot be placed using slub_max_order.
+ * Doh this slab cannot be placed using slab_max_order.
*/
order = get_order(size);
if (order <= MAX_PAGE_ORDER)
@@ -4857,7 +4838,6 @@ static void early_kmem_cache_node_alloc(int node)
slab = new_slab(kmem_cache_node, GFP_NOWAIT, node);
BUG_ON(!slab);
- inc_slabs_node(kmem_cache_node, slab_nid(slab), slab->objects);
if (slab_nid(slab) != node) {
pr_err("SLUB: Unable to allocate memory from node %d\n", node);
pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
@@ -5104,7 +5084,7 @@ static int calculate_sizes(struct kmem_cache *s)
static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
{
- s->flags = kmem_cache_flags(s->size, flags, s->name);
+ s->flags = kmem_cache_flags(flags, s->name);
#ifdef CONFIG_SLAB_FREELIST_HARDENED
s->random = get_random_long();
#endif
@@ -5313,7 +5293,9 @@ static int __init setup_slub_min_order(char *str)
return 1;
}
-__setup("slub_min_order=", setup_slub_min_order);
+__setup("slab_min_order=", setup_slub_min_order);
+__setup_param("slub_min_order=", slub_min_order, setup_slub_min_order, 0);
+
static int __init setup_slub_max_order(char *str)
{
@@ -5326,7 +5308,8 @@ static int __init setup_slub_max_order(char *str)
return 1;
}
-__setup("slub_max_order=", setup_slub_max_order);
+__setup("slab_max_order=", setup_slub_max_order);
+__setup_param("slub_max_order=", slub_max_order, setup_slub_max_order, 0);
static int __init setup_slub_min_objects(char *str)
{
@@ -5335,7 +5318,8 @@ static int __init setup_slub_min_objects(char *str)
return 1;
}
-__setup("slub_min_objects=", setup_slub_min_objects);
+__setup("slab_min_objects=", setup_slub_min_objects);
+__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
#ifdef CONFIG_HARDENED_USERCOPY
/*
@@ -5663,7 +5647,7 @@ void __init kmem_cache_init(void)
/* Now we can use the kmem_cache to allocate kmalloc slabs */
setup_kmalloc_cache_index_table();
- create_kmalloc_caches(0);
+ create_kmalloc_caches();
/* Setup random freelists for each cache */
init_freelist_randomization();
@@ -6792,14 +6776,12 @@ out_del_kobj:
void sysfs_slab_unlink(struct kmem_cache *s)
{
- if (slab_state >= FULL)
- kobject_del(&s->kobj);
+ kobject_del(&s->kobj);
}
void sysfs_slab_release(struct kmem_cache *s)
{
- if (slab_state >= FULL)
- kobject_put(&s->kobj);
+ kobject_put(&s->kobj);
}
/*
diff --git a/mm/swap.h b/mm/swap.h
index 758c46ca671e..fc2f6ade7f80 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -41,6 +41,7 @@ void __delete_from_swap_cache(struct folio *folio,
void delete_from_swap_cache(struct folio *folio);
void clear_shadow_from_swap_cache(int type, unsigned long begin,
unsigned long end);
+void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry);
struct folio *swap_cache_get_folio(swp_entry_t entry,
struct vm_area_struct *vma, unsigned long addr);
struct folio *filemap_get_incore_folio(struct address_space *mapping,
@@ -97,6 +98,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
return 0;
}
+static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
+{
+}
+
static inline struct folio *swap_cache_get_folio(swp_entry_t entry,
struct vm_area_struct *vma, unsigned long addr)
{
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e671266ad772..7255c01a1e4e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -680,9 +680,10 @@ skip:
/* The page was likely read above, so no need for plugging here */
folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
&page_allocated, false);
- if (unlikely(page_allocated))
+ if (unlikely(page_allocated)) {
+ zswap_folio_swapin(folio);
swap_read_folio(folio, false, NULL);
- zswap_folio_swapin(folio);
+ }
return folio;
}
@@ -855,9 +856,10 @@ skip:
/* The folio was likely read above, so no need for plugging here */
folio = __read_swap_cache_async(targ_entry, gfp_mask, mpol, targ_ilx,
&page_allocated, false);
- if (unlikely(page_allocated))
+ if (unlikely(page_allocated)) {
+ zswap_folio_swapin(folio);
swap_read_folio(folio, false, NULL);
- zswap_folio_swapin(folio);
+ }
return folio;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 556ff7347d5f..573843d9cc91 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2532,10 +2532,10 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
exit_swap_address_space(p->type);
inode = mapping->host;
- if (p->bdev_handle) {
+ if (p->bdev_file) {
set_blocksize(p->bdev, old_block_size);
- bdev_release(p->bdev_handle);
- p->bdev_handle = NULL;
+ fput(p->bdev_file);
+ p->bdev_file = NULL;
}
inode_lock(inode);
@@ -2765,14 +2765,14 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
int error;
if (S_ISBLK(inode->i_mode)) {
- p->bdev_handle = bdev_open_by_dev(inode->i_rdev,
+ p->bdev_file = bdev_file_open_by_dev(inode->i_rdev,
BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL);
- if (IS_ERR(p->bdev_handle)) {
- error = PTR_ERR(p->bdev_handle);
- p->bdev_handle = NULL;
+ if (IS_ERR(p->bdev_file)) {
+ error = PTR_ERR(p->bdev_file);
+ p->bdev_file = NULL;
return error;
}
- p->bdev = p->bdev_handle->bdev;
+ p->bdev = file_bdev(p->bdev_file);
p->old_block_size = block_size(p->bdev);
error = set_blocksize(p->bdev, PAGE_SIZE);
if (error < 0)
@@ -3208,10 +3208,10 @@ bad_swap:
p->percpu_cluster = NULL;
free_percpu(p->cluster_next_cpu);
p->cluster_next_cpu = NULL;
- if (p->bdev_handle) {
+ if (p->bdev_file) {
set_blocksize(p->bdev, p->old_block_size);
- bdev_release(p->bdev_handle);
- p->bdev_handle = NULL;
+ fput(p->bdev_file);
+ p->bdev_file = NULL;
}
inode = NULL;
destroy_swap_extents(p);
@@ -3365,6 +3365,19 @@ int swapcache_prepare(swp_entry_t entry)
return __swap_duplicate(entry, SWAP_HAS_CACHE);
}
+void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
+{
+ struct swap_cluster_info *ci;
+ unsigned long offset = swp_offset(entry);
+ unsigned char usage;
+
+ ci = lock_cluster_or_swap_info(si, offset);
+ usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE);
+ unlock_cluster_or_swap_info(si, ci);
+ if (!usage)
+ free_swap_slot(entry);
+}
+
struct swap_info_struct *swp_swap_info(swp_entry_t entry)
{
return swap_type_to_swap_info(swp_type(entry));
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 20e3b0d9cf7e..313f1c42768a 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -357,6 +357,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
unsigned long dst_start,
unsigned long src_start,
unsigned long len,
+ atomic_t *mmap_changing,
uffd_flags_t flags)
{
struct mm_struct *dst_mm = dst_vma->vm_mm;
@@ -472,6 +473,15 @@ retry:
goto out;
}
mmap_read_lock(dst_mm);
+ /*
+ * If memory mappings are changing because of non-cooperative
+ * operation (e.g. mremap) running in parallel, bail out and
+ * request the user to retry later
+ */
+ if (mmap_changing && atomic_read(mmap_changing)) {
+ err = -EAGAIN;
+ break;
+ }
dst_vma = NULL;
goto retry;
@@ -506,6 +516,7 @@ extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma,
unsigned long dst_start,
unsigned long src_start,
unsigned long len,
+ atomic_t *mmap_changing,
uffd_flags_t flags);
#endif /* CONFIG_HUGETLB_PAGE */
@@ -622,8 +633,8 @@ retry:
* If this is a HUGETLB vma, pass off to appropriate routine
*/
if (is_vm_hugetlb_page(dst_vma))
- return mfill_atomic_hugetlb(dst_vma, dst_start,
- src_start, len, flags);
+ return mfill_atomic_hugetlb(dst_vma, dst_start, src_start,
+ len, mmap_changing, flags);
if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
goto out_unlock;
@@ -891,8 +902,8 @@ static int move_present_pte(struct mm_struct *mm,
double_pt_lock(dst_ptl, src_ptl);
- if (!pte_same(*src_pte, orig_src_pte) ||
- !pte_same(*dst_pte, orig_dst_pte)) {
+ if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
+ !pte_same(ptep_get(dst_pte), orig_dst_pte)) {
err = -EAGAIN;
goto out;
}
@@ -903,9 +914,6 @@ static int move_present_pte(struct mm_struct *mm,
goto out;
}
- folio_move_anon_rmap(src_folio, dst_vma);
- WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
-
orig_src_pte = ptep_clear_flush(src_vma, src_addr, src_pte);
/* Folio got pinned from under us. Put it back and fail the move. */
if (folio_maybe_dma_pinned(src_folio)) {
@@ -914,6 +922,9 @@ static int move_present_pte(struct mm_struct *mm,
goto out;
}
+ folio_move_anon_rmap(src_folio, dst_vma);
+ WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma);
@@ -935,8 +946,8 @@ static int move_swap_pte(struct mm_struct *mm,
double_pt_lock(dst_ptl, src_ptl);
- if (!pte_same(*src_pte, orig_src_pte) ||
- !pte_same(*dst_pte, orig_dst_pte)) {
+ if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
+ !pte_same(ptep_get(dst_pte), orig_dst_pte)) {
double_pt_unlock(dst_ptl, src_ptl);
return -EAGAIN;
}
@@ -1005,7 +1016,7 @@ retry:
}
spin_lock(dst_ptl);
- orig_dst_pte = *dst_pte;
+ orig_dst_pte = ptep_get(dst_pte);
spin_unlock(dst_ptl);
if (!pte_none(orig_dst_pte)) {
err = -EEXIST;
@@ -1013,7 +1024,7 @@ retry:
}
spin_lock(src_ptl);
- orig_src_pte = *src_pte;
+ orig_src_pte = ptep_get(src_pte);
spin_unlock(src_ptl);
if (pte_none(orig_src_pte)) {
if (!(mode & UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES))
@@ -1043,7 +1054,7 @@ retry:
* page isn't freed under us
*/
spin_lock(src_ptl);
- if (!pte_same(orig_src_pte, *src_pte)) {
+ if (!pte_same(orig_src_pte, ptep_get(src_pte))) {
spin_unlock(src_ptl);
err = -EAGAIN;
goto out;
diff --git a/mm/util.c b/mm/util.c
index 5a6a9802583b..5faf3adc6f43 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -136,6 +136,23 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp)
EXPORT_SYMBOL(kmemdup);
/**
+ * kmemdup_array - duplicate a given array.
+ *
+ * @src: array to duplicate.
+ * @element_size: size of each element of array.
+ * @count: number of elements to duplicate from array.
+ * @gfp: GFP mask to use.
+ *
+ * Return: duplicated array of @src or %NULL in case of error,
+ * result is physically contiguous. Use kfree() to free.
+ */
+void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp)
+{
+ return kmemdup(src, size_mul(element_size, count), gfp);
+}
+EXPORT_SYMBOL(kmemdup_array);
+
+/**
* kvmemdup - duplicate region of memory
*
* @src: memory region to duplicate
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d12a17fc0c17..1e36322d83d8 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -304,8 +304,8 @@ static int vmap_range_noflush(unsigned long addr, unsigned long end,
return err;
}
-int ioremap_page_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+int vmap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
{
int err;
@@ -318,6 +318,26 @@ int ioremap_page_range(unsigned long addr, unsigned long end,
return err;
}
+int ioremap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
+{
+ struct vm_struct *area;
+
+ area = find_vm_area((void *)addr);
+ if (!area || !(area->flags & VM_IOREMAP)) {
+ WARN_ONCE(1, "vm_area at addr %lx is not marked as VM_IOREMAP\n", addr);
+ return -EINVAL;
+ }
+ if (addr != (unsigned long)area->addr ||
+ (void *)end != area->addr + get_vm_area_size(area)) {
+ WARN_ONCE(1, "ioremap request [%lx,%lx) doesn't match vm_area [%lx, %lx)\n",
+ addr, end, (long)area->addr,
+ (long)area->addr + get_vm_area_size(area));
+ return -ERANGE;
+ }
+ return vmap_page_range(addr, end, phys_addr, prot);
+}
+
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
{
@@ -635,6 +655,58 @@ static int vmap_pages_range(unsigned long addr, unsigned long end,
return err;
}
+static int check_sparse_vm_area(struct vm_struct *area, unsigned long start,
+ unsigned long end)
+{
+ might_sleep();
+ if (WARN_ON_ONCE(area->flags & VM_FLUSH_RESET_PERMS))
+ return -EINVAL;
+ if (WARN_ON_ONCE(area->flags & VM_NO_GUARD))
+ return -EINVAL;
+ if (WARN_ON_ONCE(!(area->flags & VM_SPARSE)))
+ return -EINVAL;
+ if ((end - start) >> PAGE_SHIFT > totalram_pages())
+ return -E2BIG;
+ if (start < (unsigned long)area->addr ||
+ (void *)end > area->addr + get_vm_area_size(area))
+ return -ERANGE;
+ return 0;
+}
+
+/**
+ * vm_area_map_pages - map pages inside given sparse vm_area
+ * @area: vm_area
+ * @start: start address inside vm_area
+ * @end: end address inside vm_area
+ * @pages: pages to map (always PAGE_SIZE pages)
+ */
+int vm_area_map_pages(struct vm_struct *area, unsigned long start,
+ unsigned long end, struct page **pages)
+{
+ int err;
+
+ err = check_sparse_vm_area(area, start, end);
+ if (err)
+ return err;
+
+ return vmap_pages_range(start, end, PAGE_KERNEL, pages, PAGE_SHIFT);
+}
+
+/**
+ * vm_area_unmap_pages - unmap pages inside given sparse vm_area
+ * @area: vm_area
+ * @start: start address inside vm_area
+ * @end: end address inside vm_area
+ */
+void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
+ unsigned long end)
+{
+ if (check_sparse_vm_area(area, start, end))
+ return;
+
+ vunmap_range(start, end);
+}
+
int is_vmalloc_or_module_addr(const void *x)
{
/*
@@ -3809,9 +3881,9 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
if (flags & VMAP_RAM)
copied = vmap_ram_vread_iter(iter, addr, n, flags);
- else if (!(vm && (vm->flags & VM_IOREMAP)))
+ else if (!(vm && (vm->flags & (VM_IOREMAP | VM_SPARSE))))
copied = aligned_vread_iter(iter, addr, n);
- else /* IOREMAP area is treated as memory hole */
+ else /* IOREMAP | SPARSE area is treated as memory hole */
copied = zero_iter(iter, n);
addr += copied;
@@ -4402,6 +4474,9 @@ static int s_show(struct seq_file *m, void *p)
if (v->flags & VM_IOREMAP)
seq_puts(m, " ioremap");
+ if (v->flags & VM_SPARSE)
+ seq_puts(m, " sparse");
+
if (v->flags & VM_ALLOC)
seq_puts(m, " vmalloc");
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4f9c854ce6cc..4255619a1a31 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5753,7 +5753,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
/* Use reclaim/compaction for costly allocs or under memory pressure */
static bool in_reclaim_compaction(struct scan_control *sc)
{
- if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
+ if (gfp_compaction_allowed(sc->gfp_mask) && sc->order &&
(sc->order > PAGE_ALLOC_COSTLY_ORDER ||
sc->priority < DEF_PRIORITY - 2))
return true;
@@ -5998,6 +5998,9 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
{
unsigned long watermark;
+ if (!gfp_compaction_allowed(sc->gfp_mask))
+ return false;
+
/* Allocation can already succeed, nothing to do */
if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone),
sc->reclaim_idx, 0))
diff --git a/mm/zswap.c b/mm/zswap.c
index ca25b676048e..db4625af65fb 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -377,10 +377,9 @@ void zswap_folio_swapin(struct folio *folio)
{
struct lruvec *lruvec;
- if (folio) {
- lruvec = folio_lruvec(folio);
- atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
- }
+ VM_WARN_ON_ONCE(!folio_test_locked(folio));
+ lruvec = folio_lruvec(folio);
+ atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
}
/*********************************
@@ -536,10 +535,6 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
*/
static void zswap_free_entry(struct zswap_entry *entry)
{
- if (entry->objcg) {
- obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
- obj_cgroup_put(entry->objcg);
- }
if (!entry->length)
atomic_dec(&zswap_same_filled_pages);
else {
@@ -548,6 +543,10 @@ static void zswap_free_entry(struct zswap_entry *entry)
atomic_dec(&entry->pool->nr_stored);
zswap_pool_put(entry->pool);
}
+ if (entry->objcg) {
+ obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
+ obj_cgroup_put(entry->objcg);
+ }
zswap_entry_cache_free(entry);
atomic_dec(&zswap_stored_pages);
zswap_update_total_size();
@@ -895,10 +894,8 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o
* into the warmer region. We should terminate shrinking (if we're in the dynamic
* shrinker context).
*/
- if (writeback_result == -EEXIST && encountered_page_in_swapcache) {
- ret = LRU_SKIP;
+ if (writeback_result == -EEXIST && encountered_page_in_swapcache)
*encountered_page_in_swapcache = true;
- }
goto put_unlock;
}
@@ -1442,6 +1439,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
spin_unlock(&tree->lock);
delete_from_swap_cache(folio);
+ folio_unlock(folio);
+ folio_put(folio);
return -ENOMEM;
}
spin_unlock(&tree->lock);
@@ -1519,7 +1518,7 @@ bool zswap_store(struct folio *folio)
if (folio_test_large(folio))
return false;
- if (!zswap_enabled || !tree)
+ if (!tree)
return false;
/*
@@ -1534,6 +1533,10 @@ bool zswap_store(struct folio *folio)
zswap_invalidate_entry(tree, dupentry);
}
spin_unlock(&tree->lock);
+
+ if (!zswap_enabled)
+ return false;
+
objcg = get_obj_cgroup_from_folio(folio);
if (objcg && !obj_cgroup_may_zswap(objcg)) {
memcg = get_mem_cgroup_from_objcg(objcg);
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 7b3341cef926..850d4a185f55 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -179,4 +179,5 @@ static void __exit lowpan_module_exit(void)
module_init(lowpan_module_init);
module_exit(lowpan_module_exit);
+MODULE_DESCRIPTION("IPv6 over Low-Power Wireless Personal Area Network core module");
MODULE_LICENSE("GPL");
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 407b2335f091..39876eff51d2 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -504,28 +504,6 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
}
-/*
- * vlan network devices have devices nesting below it, and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key vlan_netdev_xmit_lock_key;
-static struct lock_class_key vlan_netdev_addr_lock_key;
-
-static void vlan_dev_set_lockdep_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *unused)
-{
- lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key);
-}
-
-static void vlan_dev_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock,
- &vlan_netdev_addr_lock_key);
- netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL);
-}
-
static __be16 vlan_parse_protocol(const struct sk_buff *skb)
{
struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
@@ -559,7 +537,7 @@ static const struct header_ops vlan_passthru_header_ops = {
.parse_protocol = vlan_parse_protocol,
};
-static struct device_type vlan_type = {
+static const struct device_type vlan_type = {
.name = "vlan",
};
@@ -627,7 +605,7 @@ static int vlan_dev_init(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &vlan_type);
- vlan_dev_set_lockdep_class(dev);
+ netdev_lockdep_set_classes(dev);
vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
if (!vlan->vlan_pcpu_stats)
@@ -784,9 +762,9 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
static int vlan_dev_get_iflink(const struct net_device *dev)
{
- struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+ const struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
- return real_dev->ifindex;
+ return READ_ONCE(real_dev->ifindex);
}
static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 214532173536..a3b68243fd4b 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -118,12 +118,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[],
}
if (data[IFLA_VLAN_INGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
+ if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
+ continue;
m = nla_data(attr);
vlan_dev_set_ingress_priority(dev, m->to, m->from);
}
}
if (data[IFLA_VLAN_EGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
+ if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
+ continue;
m = nla_data(attr);
err = vlan_dev_set_egress_priority(dev, m->from, m->to);
if (err)
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 7825c129742a..87b959da00cd 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -163,48 +163,34 @@ void vlan_proc_rem_dev(struct net_device *vlandev)
* The following few functions build the content of /proc/net/vlan/config
*/
-/* start read of /proc/net/vlan/config */
-static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(rcu)
+static void *vlan_seq_from_index(struct seq_file *seq, loff_t *pos)
{
+ unsigned long ifindex = *pos;
struct net_device *dev;
- struct net *net = seq_file_net(seq);
- loff_t i = 1;
-
- rcu_read_lock();
- if (*pos == 0)
- return SEQ_START_TOKEN;
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev_dump(seq_file_net(seq), dev, ifindex) {
if (!is_vlan_dev(dev))
continue;
-
- if (i++ == *pos)
- return dev;
+ *pos = dev->ifindex;
+ return dev;
}
+ return NULL;
+}
+
+static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(rcu)
+{
+ rcu_read_lock();
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
- return NULL;
+ return vlan_seq_from_index(seq, pos);
}
static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev;
- struct net *net = seq_file_net(seq);
-
++*pos;
-
- dev = v;
- if (v == SEQ_START_TOKEN)
- dev = net_device_entry(&net->dev_base_head);
-
- for_each_netdev_continue_rcu(net, dev) {
- if (!is_vlan_dev(dev))
- continue;
-
- return dev;
- }
-
- return NULL;
+ return vlan_seq_from_index(seq, pos);
}
static void vlan_seq_stop(struct seq_file *seq, void *v)
diff --git a/net/Kconfig b/net/Kconfig
index 4adc47d0c9c2..3e57ccf0da27 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -331,6 +331,7 @@ config NET_RX_BUSY_POLL
config BQL
bool
+ prompt "Enable Byte Queue Limits"
depends on SYSFS
select DQL
default y
diff --git a/net/Makefile b/net/Makefile
index b06b5539e7a6..65bb8c72a35e 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
-obj-$(CONFIG_UNIX_SCM) += unix/
+obj-$(CONFIG_UNIX) += unix/
obj-y += ipv6/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 033871e718a3..324e3ab96bb3 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1532,4 +1532,5 @@ static void __exit atm_mpoa_cleanup(void)
module_init(atm_mpoa_init);
module_exit(atm_mpoa_cleanup);
+MODULE_DESCRIPTION("Multi-Protocol Over ATM (MPOA) driver");
MODULE_LICENSE("GPL");
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 28a939d56090..4c7e85534324 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -684,7 +684,7 @@ static bool batadv_dat_forward_data(struct batadv_priv *bat_priv,
cand = batadv_dat_select_candidates(bat_priv, ip, vid);
if (!cand)
- goto out;
+ return ret;
batadv_dbg(BATADV_DBG_DAT, bat_priv, "DHT_SEND for %pI4\n", &ip);
@@ -728,7 +728,6 @@ free_orig:
batadv_orig_node_put(cand[i].orig_node);
}
-out:
kfree(cand);
return ret;
}
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5fc754b0b3f7..75119f1ffccc 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -691,29 +691,31 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
"%s%s", BATADV_UEV_TYPE_VAR,
batadv_uev_type_str[type]);
if (!uevent_env[0])
- goto out;
+ goto report_error;
uevent_env[1] = kasprintf(GFP_ATOMIC,
"%s%s", BATADV_UEV_ACTION_VAR,
batadv_uev_action_str[action]);
if (!uevent_env[1])
- goto out;
+ goto free_first_env;
/* If the event is DEL, ignore the data field */
if (action != BATADV_UEV_DEL) {
uevent_env[2] = kasprintf(GFP_ATOMIC,
"%s%s", BATADV_UEV_DATA_VAR, data);
if (!uevent_env[2])
- goto out;
+ goto free_second_env;
}
ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env);
-out:
- kfree(uevent_env[0]);
- kfree(uevent_env[1]);
kfree(uevent_env[2]);
+free_second_env:
+ kfree(uevent_env[1]);
+free_first_env:
+ kfree(uevent_env[0]);
if (ret)
+report_error:
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Impossible to send uevent for (%s,%s,%s) event (err: %d)\n",
batadv_uev_type_str[type],
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 870dcd7f1786..8ca854a75a32 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2024.0"
+#define BATADV_SOURCE_VERSION "2024.1"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d982daea8329..14088c4ff2f6 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -2175,6 +2175,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv)
cancel_delayed_work_sync(&bat_priv->mcast.work);
batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
+ batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST_TRACKER, 1);
batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
/* safely calling outside of worker, as worker was canceled above */
@@ -2198,6 +2199,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
BATADV_MCAST_WANT_NO_RTR4);
batadv_mcast_want_rtr6_update(bat_priv, orig,
BATADV_MCAST_WANT_NO_RTR6);
+ batadv_mcast_have_mc_ptype_update(bat_priv, orig,
+ BATADV_MCAST_HAVE_MC_PTYPE_CAPA);
spin_unlock_bh(&orig->mcast_handler_lock);
}
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 1f7ed9d4f6fd..0954757f0b8b 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -15,7 +15,6 @@
#include <linux/cache.h>
#include <linux/err.h>
#include <linux/errno.h>
-#include <linux/export.h>
#include <linux/genetlink.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 4eb1b3ced0d2..27520a8a486f 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -572,7 +572,7 @@ static void netdev_setup(struct net_device *dev)
dev->needs_free_netdev = true;
}
-static struct device_type bt_type = {
+static const struct device_type bt_type = {
.name = "bluetooth",
};
@@ -892,7 +892,7 @@ static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type)
chan->ops = &bt_6lowpan_chan_ops;
err = l2cap_chan_connect(chan, cpu_to_le16(L2CAP_PSM_IPSP), 0,
- addr, dst_type);
+ addr, dst_type, L2CAP_CONN_TIMEOUT);
BT_DBG("chan %p err %d", chan, err);
if (err < 0)
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index da7cac0a1b71..6b2b65a66700 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -62,14 +62,6 @@ source "net/bluetooth/cmtp/Kconfig"
source "net/bluetooth/hidp/Kconfig"
-config BT_HS
- bool "Bluetooth High Speed (HS) features"
- depends on BT_BREDR
- help
- Bluetooth High Speed includes support for off-loading
- Bluetooth connections via 802.11 (wifi) physical layer
- available with Bluetooth version 3.0 or later.
-
config BT_LE
bool "Bluetooth Low Energy (LE) features"
depends on BT
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 141ac1fda0bf..628d448d78be 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -21,7 +21,6 @@ bluetooth-$(CONFIG_DEV_COREDUMP) += coredump.o
bluetooth-$(CONFIG_BT_BREDR) += sco.o
bluetooth-$(CONFIG_BT_LE) += iso.o
-bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
bluetooth-$(CONFIG_BT_LEDS) += leds.o
bluetooth-$(CONFIG_BT_MSFTEXT) += msft.o
bluetooth-$(CONFIG_BT_AOSPEXT) += aosp.o
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
deleted file mode 100644
index e7adb8a98cf9..000000000000
--- a/net/bluetooth/a2mp.c
+++ /dev/null
@@ -1,1054 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- Copyright (c) 2010,2011 Code Aurora Forum. All rights reserved.
- Copyright (c) 2011,2012 Intel Corp.
-
-*/
-
-#include <net/bluetooth/bluetooth.h>
-#include <net/bluetooth/hci_core.h>
-#include <net/bluetooth/l2cap.h>
-
-#include "hci_request.h"
-#include "a2mp.h"
-#include "amp.h"
-
-#define A2MP_FEAT_EXT 0x8000
-
-/* Global AMP Manager list */
-static LIST_HEAD(amp_mgr_list);
-static DEFINE_MUTEX(amp_mgr_list_lock);
-
-/* A2MP build & send command helper functions */
-static struct a2mp_cmd *__a2mp_build(u8 code, u8 ident, u16 len, void *data)
-{
- struct a2mp_cmd *cmd;
- int plen;
-
- plen = sizeof(*cmd) + len;
- cmd = kzalloc(plen, GFP_KERNEL);
- if (!cmd)
- return NULL;
-
- cmd->code = code;
- cmd->ident = ident;
- cmd->len = cpu_to_le16(len);
-
- memcpy(cmd->data, data, len);
-
- return cmd;
-}
-
-static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data)
-{
- struct l2cap_chan *chan = mgr->a2mp_chan;
- struct a2mp_cmd *cmd;
- u16 total_len = len + sizeof(*cmd);
- struct kvec iv;
- struct msghdr msg;
-
- cmd = __a2mp_build(code, ident, len, data);
- if (!cmd)
- return;
-
- iv.iov_base = cmd;
- iv.iov_len = total_len;
-
- memset(&msg, 0, sizeof(msg));
-
- iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iv, 1, total_len);
-
- l2cap_chan_send(chan, &msg, total_len);
-
- kfree(cmd);
-}
-
-static u8 __next_ident(struct amp_mgr *mgr)
-{
- if (++mgr->ident == 0)
- mgr->ident = 1;
-
- return mgr->ident;
-}
-
-static struct amp_mgr *amp_mgr_lookup_by_state(u8 state)
-{
- struct amp_mgr *mgr;
-
- mutex_lock(&amp_mgr_list_lock);
- list_for_each_entry(mgr, &amp_mgr_list, list) {
- if (test_and_clear_bit(state, &mgr->state)) {
- amp_mgr_get(mgr);
- mutex_unlock(&amp_mgr_list_lock);
- return mgr;
- }
- }
- mutex_unlock(&amp_mgr_list_lock);
-
- return NULL;
-}
-
-/* hci_dev_list shall be locked */
-static void __a2mp_add_cl(struct amp_mgr *mgr, struct a2mp_cl *cl)
-{
- struct hci_dev *hdev;
- int i = 1;
-
- cl[0].id = AMP_ID_BREDR;
- cl[0].type = AMP_TYPE_BREDR;
- cl[0].status = AMP_STATUS_BLUETOOTH_ONLY;
-
- list_for_each_entry(hdev, &hci_dev_list, list) {
- if (hdev->dev_type == HCI_AMP) {
- cl[i].id = hdev->id;
- cl[i].type = hdev->amp_type;
- if (test_bit(HCI_UP, &hdev->flags))
- cl[i].status = hdev->amp_status;
- else
- cl[i].status = AMP_STATUS_POWERED_DOWN;
- i++;
- }
- }
-}
-
-/* Processing A2MP messages */
-static int a2mp_command_rej(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- struct a2mp_cmd_rej *rej = (void *) skb->data;
-
- if (le16_to_cpu(hdr->len) < sizeof(*rej))
- return -EINVAL;
-
- BT_DBG("ident %u reason %d", hdr->ident, le16_to_cpu(rej->reason));
-
- skb_pull(skb, sizeof(*rej));
-
- return 0;
-}
-
-static int a2mp_discover_req(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- struct a2mp_discov_req *req = (void *) skb->data;
- u16 len = le16_to_cpu(hdr->len);
- struct a2mp_discov_rsp *rsp;
- u16 ext_feat;
- u8 num_ctrl;
- struct hci_dev *hdev;
-
- if (len < sizeof(*req))
- return -EINVAL;
-
- skb_pull(skb, sizeof(*req));
-
- ext_feat = le16_to_cpu(req->ext_feat);
-
- BT_DBG("mtu %d efm 0x%4.4x", le16_to_cpu(req->mtu), ext_feat);
-
- /* check that packet is not broken for now */
- while (ext_feat & A2MP_FEAT_EXT) {
- if (len < sizeof(ext_feat))
- return -EINVAL;
-
- ext_feat = get_unaligned_le16(skb->data);
- BT_DBG("efm 0x%4.4x", ext_feat);
- len -= sizeof(ext_feat);
- skb_pull(skb, sizeof(ext_feat));
- }
-
- read_lock(&hci_dev_list_lock);
-
- /* at minimum the BR/EDR needs to be listed */
- num_ctrl = 1;
-
- list_for_each_entry(hdev, &hci_dev_list, list) {
- if (hdev->dev_type == HCI_AMP)
- num_ctrl++;
- }
-
- len = struct_size(rsp, cl, num_ctrl);
- rsp = kmalloc(len, GFP_ATOMIC);
- if (!rsp) {
- read_unlock(&hci_dev_list_lock);
- return -ENOMEM;
- }
-
- rsp->mtu = cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU);
- rsp->ext_feat = 0;
-
- __a2mp_add_cl(mgr, rsp->cl);
-
- read_unlock(&hci_dev_list_lock);
-
- a2mp_send(mgr, A2MP_DISCOVER_RSP, hdr->ident, len, rsp);
-
- kfree(rsp);
- return 0;
-}
-
-static int a2mp_discover_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- struct a2mp_discov_rsp *rsp = (void *) skb->data;
- u16 len = le16_to_cpu(hdr->len);
- struct a2mp_cl *cl;
- u16 ext_feat;
- bool found = false;
-
- if (len < sizeof(*rsp))
- return -EINVAL;
-
- len -= sizeof(*rsp);
- skb_pull(skb, sizeof(*rsp));
-
- ext_feat = le16_to_cpu(rsp->ext_feat);
-
- BT_DBG("mtu %d efm 0x%4.4x", le16_to_cpu(rsp->mtu), ext_feat);
-
- /* check that packet is not broken for now */
- while (ext_feat & A2MP_FEAT_EXT) {
- if (len < sizeof(ext_feat))
- return -EINVAL;
-
- ext_feat = get_unaligned_le16(skb->data);
- BT_DBG("efm 0x%4.4x", ext_feat);
- len -= sizeof(ext_feat);
- skb_pull(skb, sizeof(ext_feat));
- }
-
- cl = (void *) skb->data;
- while (len >= sizeof(*cl)) {
- BT_DBG("Remote AMP id %u type %u status %u", cl->id, cl->type,
- cl->status);
-
- if (cl->id != AMP_ID_BREDR && cl->type != AMP_TYPE_BREDR) {
- struct a2mp_info_req req;
-
- found = true;
-
- memset(&req, 0, sizeof(req));
-
- req.id = cl->id;
- a2mp_send(mgr, A2MP_GETINFO_REQ, __next_ident(mgr),
- sizeof(req), &req);
- }
-
- len -= sizeof(*cl);
- cl = skb_pull(skb, sizeof(*cl));
- }
-
- /* Fall back to L2CAP init sequence */
- if (!found) {
- struct l2cap_conn *conn = mgr->l2cap_conn;
- struct l2cap_chan *chan;
-
- mutex_lock(&conn->chan_lock);
-
- list_for_each_entry(chan, &conn->chan_l, list) {
-
- BT_DBG("chan %p state %s", chan,
- state_to_string(chan->state));
-
- if (chan->scid == L2CAP_CID_A2MP)
- continue;
-
- l2cap_chan_lock(chan);
-
- if (chan->state == BT_CONNECT)
- l2cap_send_conn_req(chan);
-
- l2cap_chan_unlock(chan);
- }
-
- mutex_unlock(&conn->chan_lock);
- }
-
- return 0;
-}
-
-static int a2mp_change_notify(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- struct a2mp_cl *cl = (void *) skb->data;
-
- while (skb->len >= sizeof(*cl)) {
- BT_DBG("Controller id %u type %u status %u", cl->id, cl->type,
- cl->status);
- cl = skb_pull(skb, sizeof(*cl));
- }
-
- /* TODO send A2MP_CHANGE_RSP */
-
- return 0;
-}
-
-static void read_local_amp_info_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
-{
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- a2mp_send_getinfo_rsp(hdev);
-}
-
-static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- struct a2mp_info_req *req = (void *) skb->data;
- struct hci_dev *hdev;
- struct hci_request hreq;
- int err = 0;
-
- if (le16_to_cpu(hdr->len) < sizeof(*req))
- return -EINVAL;
-
- BT_DBG("id %u", req->id);
-
- hdev = hci_dev_get(req->id);
- if (!hdev || hdev->dev_type != HCI_AMP) {
- struct a2mp_info_rsp rsp;
-
- memset(&rsp, 0, sizeof(rsp));
-
- rsp.id = req->id;
- rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
-
- a2mp_send(mgr, A2MP_GETINFO_RSP, hdr->ident, sizeof(rsp),
- &rsp);
-
- goto done;
- }
-
- set_bit(READ_LOC_AMP_INFO, &mgr->state);
- hci_req_init(&hreq, hdev);
- hci_req_add(&hreq, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
- err = hci_req_run(&hreq, read_local_amp_info_complete);
- if (err < 0)
- a2mp_send_getinfo_rsp(hdev);
-
-done:
- if (hdev)
- hci_dev_put(hdev);
-
- skb_pull(skb, sizeof(*req));
- return 0;
-}
-
-static int a2mp_getinfo_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- struct a2mp_info_rsp *rsp = (struct a2mp_info_rsp *) skb->data;
- struct a2mp_amp_assoc_req req;
- struct amp_ctrl *ctrl;
-
- if (le16_to_cpu(hdr->len) < sizeof(*rsp))
- return -EINVAL;
-
- BT_DBG("id %u status 0x%2.2x", rsp->id, rsp->status);
-
- if (rsp->status)
- return -EINVAL;
-
- ctrl = amp_ctrl_add(mgr, rsp->id);
- if (!ctrl)
- return -ENOMEM;
-
- memset(&req, 0, sizeof(req));
-
- req.id = rsp->id;
- a2mp_send(mgr, A2MP_GETAMPASSOC_REQ, __next_ident(mgr), sizeof(req),
- &req);
-
- skb_pull(skb, sizeof(*rsp));
- return 0;
-}
-
-static int a2mp_getampassoc_req(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- struct a2mp_amp_assoc_req *req = (void *) skb->data;
- struct hci_dev *hdev;
- struct amp_mgr *tmp;
-
- if (le16_to_cpu(hdr->len) < sizeof(*req))
- return -EINVAL;
-
- BT_DBG("id %u", req->id);
-
- /* Make sure that other request is not processed */
- tmp = amp_mgr_lookup_by_state(READ_LOC_AMP_ASSOC);
-
- hdev = hci_dev_get(req->id);
- if (!hdev || hdev->amp_type == AMP_TYPE_BREDR || tmp) {
- struct a2mp_amp_assoc_rsp rsp;
-
- memset(&rsp, 0, sizeof(rsp));
- rsp.id = req->id;
-
- if (tmp) {
- rsp.status = A2MP_STATUS_COLLISION_OCCURED;
- amp_mgr_put(tmp);
- } else {
- rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
- }
-
- a2mp_send(mgr, A2MP_GETAMPASSOC_RSP, hdr->ident, sizeof(rsp),
- &rsp);
-
- goto done;
- }
-
- amp_read_loc_assoc(hdev, mgr);
-
-done:
- if (hdev)
- hci_dev_put(hdev);
-
- skb_pull(skb, sizeof(*req));
- return 0;
-}
-
-static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- struct a2mp_amp_assoc_rsp *rsp = (void *) skb->data;
- u16 len = le16_to_cpu(hdr->len);
- struct hci_dev *hdev;
- struct amp_ctrl *ctrl;
- struct hci_conn *hcon;
- size_t assoc_len;
-
- if (len < sizeof(*rsp))
- return -EINVAL;
-
- assoc_len = len - sizeof(*rsp);
-
- BT_DBG("id %u status 0x%2.2x assoc len %zu", rsp->id, rsp->status,
- assoc_len);
-
- if (rsp->status)
- return -EINVAL;
-
- /* Save remote ASSOC data */
- ctrl = amp_ctrl_lookup(mgr, rsp->id);
- if (ctrl) {
- u8 *assoc;
-
- assoc = kmemdup(rsp->amp_assoc, assoc_len, GFP_KERNEL);
- if (!assoc) {
- amp_ctrl_put(ctrl);
- return -ENOMEM;
- }
-
- ctrl->assoc = assoc;
- ctrl->assoc_len = assoc_len;
- ctrl->assoc_rem_len = assoc_len;
- ctrl->assoc_len_so_far = 0;
-
- amp_ctrl_put(ctrl);
- }
-
- /* Create Phys Link */
- hdev = hci_dev_get(rsp->id);
- if (!hdev)
- return -EINVAL;
-
- hcon = phylink_add(hdev, mgr, rsp->id, true);
- if (!hcon)
- goto done;
-
- BT_DBG("Created hcon %p: loc:%u -> rem:%u", hcon, hdev->id, rsp->id);
-
- mgr->bredr_chan->remote_amp_id = rsp->id;
-
- amp_create_phylink(hdev, mgr, hcon);
-
-done:
- hci_dev_put(hdev);
- skb_pull(skb, len);
- return 0;
-}
-
-static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- struct a2mp_physlink_req *req = (void *) skb->data;
- struct a2mp_physlink_rsp rsp;
- struct hci_dev *hdev;
- struct hci_conn *hcon;
- struct amp_ctrl *ctrl;
-
- if (le16_to_cpu(hdr->len) < sizeof(*req))
- return -EINVAL;
-
- BT_DBG("local_id %u, remote_id %u", req->local_id, req->remote_id);
-
- memset(&rsp, 0, sizeof(rsp));
-
- rsp.local_id = req->remote_id;
- rsp.remote_id = req->local_id;
-
- hdev = hci_dev_get(req->remote_id);
- if (!hdev || hdev->amp_type == AMP_TYPE_BREDR) {
- rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
- goto send_rsp;
- }
-
- ctrl = amp_ctrl_lookup(mgr, rsp.remote_id);
- if (!ctrl) {
- ctrl = amp_ctrl_add(mgr, rsp.remote_id);
- if (ctrl) {
- amp_ctrl_get(ctrl);
- } else {
- rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION;
- goto send_rsp;
- }
- }
-
- if (ctrl) {
- size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req);
- u8 *assoc;
-
- assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL);
- if (!assoc) {
- amp_ctrl_put(ctrl);
- hci_dev_put(hdev);
- return -ENOMEM;
- }
-
- ctrl->assoc = assoc;
- ctrl->assoc_len = assoc_len;
- ctrl->assoc_rem_len = assoc_len;
- ctrl->assoc_len_so_far = 0;
-
- amp_ctrl_put(ctrl);
- }
-
- hcon = phylink_add(hdev, mgr, req->local_id, false);
- if (hcon) {
- amp_accept_phylink(hdev, mgr, hcon);
- rsp.status = A2MP_STATUS_SUCCESS;
- } else {
- rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION;
- }
-
-send_rsp:
- if (hdev)
- hci_dev_put(hdev);
-
- /* Reply error now and success after HCI Write Remote AMP Assoc
- command complete with success status
- */
- if (rsp.status != A2MP_STATUS_SUCCESS) {
- a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, hdr->ident,
- sizeof(rsp), &rsp);
- } else {
- set_bit(WRITE_REMOTE_AMP_ASSOC, &mgr->state);
- mgr->ident = hdr->ident;
- }
-
- skb_pull(skb, le16_to_cpu(hdr->len));
- return 0;
-}
-
-static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- struct a2mp_physlink_req *req = (void *) skb->data;
- struct a2mp_physlink_rsp rsp;
- struct hci_dev *hdev;
- struct hci_conn *hcon;
-
- if (le16_to_cpu(hdr->len) < sizeof(*req))
- return -EINVAL;
-
- BT_DBG("local_id %u remote_id %u", req->local_id, req->remote_id);
-
- memset(&rsp, 0, sizeof(rsp));
-
- rsp.local_id = req->remote_id;
- rsp.remote_id = req->local_id;
- rsp.status = A2MP_STATUS_SUCCESS;
-
- hdev = hci_dev_get(req->remote_id);
- if (!hdev) {
- rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
- goto send_rsp;
- }
-
- hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK,
- &mgr->l2cap_conn->hcon->dst);
- if (!hcon) {
- bt_dev_err(hdev, "no phys link exist");
- rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS;
- goto clean;
- }
-
- /* TODO Disconnect Phys Link here */
-
-clean:
- hci_dev_put(hdev);
-
-send_rsp:
- a2mp_send(mgr, A2MP_DISCONNPHYSLINK_RSP, hdr->ident, sizeof(rsp), &rsp);
-
- skb_pull(skb, sizeof(*req));
- return 0;
-}
-
-static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
- struct a2mp_cmd *hdr)
-{
- BT_DBG("ident %u code 0x%2.2x", hdr->ident, hdr->code);
-
- skb_pull(skb, le16_to_cpu(hdr->len));
- return 0;
-}
-
-/* Handle A2MP signalling */
-static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
-{
- struct a2mp_cmd *hdr;
- struct amp_mgr *mgr = chan->data;
- int err = 0;
-
- amp_mgr_get(mgr);
-
- while (skb->len >= sizeof(*hdr)) {
- u16 len;
-
- hdr = (void *) skb->data;
- len = le16_to_cpu(hdr->len);
-
- BT_DBG("code 0x%2.2x id %u len %u", hdr->code, hdr->ident, len);
-
- skb_pull(skb, sizeof(*hdr));
-
- if (len > skb->len || !hdr->ident) {
- err = -EINVAL;
- break;
- }
-
- mgr->ident = hdr->ident;
-
- switch (hdr->code) {
- case A2MP_COMMAND_REJ:
- a2mp_command_rej(mgr, skb, hdr);
- break;
-
- case A2MP_DISCOVER_REQ:
- err = a2mp_discover_req(mgr, skb, hdr);
- break;
-
- case A2MP_CHANGE_NOTIFY:
- err = a2mp_change_notify(mgr, skb, hdr);
- break;
-
- case A2MP_GETINFO_REQ:
- err = a2mp_getinfo_req(mgr, skb, hdr);
- break;
-
- case A2MP_GETAMPASSOC_REQ:
- err = a2mp_getampassoc_req(mgr, skb, hdr);
- break;
-
- case A2MP_CREATEPHYSLINK_REQ:
- err = a2mp_createphyslink_req(mgr, skb, hdr);
- break;
-
- case A2MP_DISCONNPHYSLINK_REQ:
- err = a2mp_discphyslink_req(mgr, skb, hdr);
- break;
-
- case A2MP_DISCOVER_RSP:
- err = a2mp_discover_rsp(mgr, skb, hdr);
- break;
-
- case A2MP_GETINFO_RSP:
- err = a2mp_getinfo_rsp(mgr, skb, hdr);
- break;
-
- case A2MP_GETAMPASSOC_RSP:
- err = a2mp_getampassoc_rsp(mgr, skb, hdr);
- break;
-
- case A2MP_CHANGE_RSP:
- case A2MP_CREATEPHYSLINK_RSP:
- case A2MP_DISCONNPHYSLINK_RSP:
- err = a2mp_cmd_rsp(mgr, skb, hdr);
- break;
-
- default:
- BT_ERR("Unknown A2MP sig cmd 0x%2.2x", hdr->code);
- err = -EINVAL;
- break;
- }
- }
-
- if (err) {
- struct a2mp_cmd_rej rej;
-
- memset(&rej, 0, sizeof(rej));
-
- rej.reason = cpu_to_le16(0);
- hdr = (void *) skb->data;
-
- BT_DBG("Send A2MP Rej: cmd 0x%2.2x err %d", hdr->code, err);
-
- a2mp_send(mgr, A2MP_COMMAND_REJ, hdr->ident, sizeof(rej),
- &rej);
- }
-
- /* Always free skb and return success error code to prevent
- from sending L2CAP Disconnect over A2MP channel */
- kfree_skb(skb);
-
- amp_mgr_put(mgr);
-
- return 0;
-}
-
-static void a2mp_chan_close_cb(struct l2cap_chan *chan)
-{
- l2cap_chan_put(chan);
-}
-
-static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state,
- int err)
-{
- struct amp_mgr *mgr = chan->data;
-
- if (!mgr)
- return;
-
- BT_DBG("chan %p state %s", chan, state_to_string(state));
-
- chan->state = state;
-
- switch (state) {
- case BT_CLOSED:
- if (mgr)
- amp_mgr_put(mgr);
- break;
- }
-}
-
-static struct sk_buff *a2mp_chan_alloc_skb_cb(struct l2cap_chan *chan,
- unsigned long hdr_len,
- unsigned long len, int nb)
-{
- struct sk_buff *skb;
-
- skb = bt_skb_alloc(hdr_len + len, GFP_KERNEL);
- if (!skb)
- return ERR_PTR(-ENOMEM);
-
- return skb;
-}
-
-static const struct l2cap_ops a2mp_chan_ops = {
- .name = "L2CAP A2MP channel",
- .recv = a2mp_chan_recv_cb,
- .close = a2mp_chan_close_cb,
- .state_change = a2mp_chan_state_change_cb,
- .alloc_skb = a2mp_chan_alloc_skb_cb,
-
- /* Not implemented for A2MP */
- .new_connection = l2cap_chan_no_new_connection,
- .teardown = l2cap_chan_no_teardown,
- .ready = l2cap_chan_no_ready,
- .defer = l2cap_chan_no_defer,
- .resume = l2cap_chan_no_resume,
- .set_shutdown = l2cap_chan_no_set_shutdown,
- .get_sndtimeo = l2cap_chan_no_get_sndtimeo,
-};
-
-static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn, bool locked)
-{
- struct l2cap_chan *chan;
- int err;
-
- chan = l2cap_chan_create();
- if (!chan)
- return NULL;
-
- BT_DBG("chan %p", chan);
-
- chan->chan_type = L2CAP_CHAN_FIXED;
- chan->scid = L2CAP_CID_A2MP;
- chan->dcid = L2CAP_CID_A2MP;
- chan->omtu = L2CAP_A2MP_DEFAULT_MTU;
- chan->imtu = L2CAP_A2MP_DEFAULT_MTU;
- chan->flush_to = L2CAP_DEFAULT_FLUSH_TO;
-
- chan->ops = &a2mp_chan_ops;
-
- l2cap_chan_set_defaults(chan);
- chan->remote_max_tx = chan->max_tx;
- chan->remote_tx_win = chan->tx_win;
-
- chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
- chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
-
- skb_queue_head_init(&chan->tx_q);
-
- chan->mode = L2CAP_MODE_ERTM;
-
- err = l2cap_ertm_init(chan);
- if (err < 0) {
- l2cap_chan_del(chan, 0);
- return NULL;
- }
-
- chan->conf_state = 0;
-
- if (locked)
- __l2cap_chan_add(conn, chan);
- else
- l2cap_chan_add(conn, chan);
-
- chan->remote_mps = chan->omtu;
- chan->mps = chan->omtu;
-
- chan->state = BT_CONNECTED;
-
- return chan;
-}
-
-/* AMP Manager functions */
-struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr)
-{
- BT_DBG("mgr %p orig refcnt %d", mgr, kref_read(&mgr->kref));
-
- kref_get(&mgr->kref);
-
- return mgr;
-}
-
-static void amp_mgr_destroy(struct kref *kref)
-{
- struct amp_mgr *mgr = container_of(kref, struct amp_mgr, kref);
-
- BT_DBG("mgr %p", mgr);
-
- mutex_lock(&amp_mgr_list_lock);
- list_del(&mgr->list);
- mutex_unlock(&amp_mgr_list_lock);
-
- amp_ctrl_list_flush(mgr);
- kfree(mgr);
-}
-
-int amp_mgr_put(struct amp_mgr *mgr)
-{
- BT_DBG("mgr %p orig refcnt %d", mgr, kref_read(&mgr->kref));
-
- return kref_put(&mgr->kref, &amp_mgr_destroy);
-}
-
-static struct amp_mgr *amp_mgr_create(struct l2cap_conn *conn, bool locked)
-{
- struct amp_mgr *mgr;
- struct l2cap_chan *chan;
-
- mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
- if (!mgr)
- return NULL;
-
- BT_DBG("conn %p mgr %p", conn, mgr);
-
- mgr->l2cap_conn = conn;
-
- chan = a2mp_chan_open(conn, locked);
- if (!chan) {
- kfree(mgr);
- return NULL;
- }
-
- mgr->a2mp_chan = chan;
- chan->data = mgr;
-
- conn->hcon->amp_mgr = mgr;
-
- kref_init(&mgr->kref);
-
- /* Remote AMP ctrl list initialization */
- INIT_LIST_HEAD(&mgr->amp_ctrls);
- mutex_init(&mgr->amp_ctrls_lock);
-
- mutex_lock(&amp_mgr_list_lock);
- list_add(&mgr->list, &amp_mgr_list);
- mutex_unlock(&amp_mgr_list_lock);
-
- return mgr;
-}
-
-struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn,
- struct sk_buff *skb)
-{
- struct amp_mgr *mgr;
-
- if (conn->hcon->type != ACL_LINK)
- return NULL;
-
- mgr = amp_mgr_create(conn, false);
- if (!mgr) {
- BT_ERR("Could not create AMP manager");
- return NULL;
- }
-
- BT_DBG("mgr: %p chan %p", mgr, mgr->a2mp_chan);
-
- return mgr->a2mp_chan;
-}
-
-void a2mp_send_getinfo_rsp(struct hci_dev *hdev)
-{
- struct amp_mgr *mgr;
- struct a2mp_info_rsp rsp;
-
- mgr = amp_mgr_lookup_by_state(READ_LOC_AMP_INFO);
- if (!mgr)
- return;
-
- BT_DBG("%s mgr %p", hdev->name, mgr);
-
- memset(&rsp, 0, sizeof(rsp));
-
- rsp.id = hdev->id;
- rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
-
- if (hdev->amp_type != AMP_TYPE_BREDR) {
- rsp.status = 0;
- rsp.total_bw = cpu_to_le32(hdev->amp_total_bw);
- rsp.max_bw = cpu_to_le32(hdev->amp_max_bw);
- rsp.min_latency = cpu_to_le32(hdev->amp_min_latency);
- rsp.pal_cap = cpu_to_le16(hdev->amp_pal_cap);
- rsp.assoc_size = cpu_to_le16(hdev->amp_assoc_size);
- }
-
- a2mp_send(mgr, A2MP_GETINFO_RSP, mgr->ident, sizeof(rsp), &rsp);
- amp_mgr_put(mgr);
-}
-
-void a2mp_send_getampassoc_rsp(struct hci_dev *hdev, u8 status)
-{
- struct amp_mgr *mgr;
- struct amp_assoc *loc_assoc = &hdev->loc_assoc;
- struct a2mp_amp_assoc_rsp *rsp;
- size_t len;
-
- mgr = amp_mgr_lookup_by_state(READ_LOC_AMP_ASSOC);
- if (!mgr)
- return;
-
- BT_DBG("%s mgr %p", hdev->name, mgr);
-
- len = sizeof(struct a2mp_amp_assoc_rsp) + loc_assoc->len;
- rsp = kzalloc(len, GFP_KERNEL);
- if (!rsp) {
- amp_mgr_put(mgr);
- return;
- }
-
- rsp->id = hdev->id;
-
- if (status) {
- rsp->status = A2MP_STATUS_INVALID_CTRL_ID;
- } else {
- rsp->status = A2MP_STATUS_SUCCESS;
- memcpy(rsp->amp_assoc, loc_assoc->data, loc_assoc->len);
- }
-
- a2mp_send(mgr, A2MP_GETAMPASSOC_RSP, mgr->ident, len, rsp);
- amp_mgr_put(mgr);
- kfree(rsp);
-}
-
-void a2mp_send_create_phy_link_req(struct hci_dev *hdev, u8 status)
-{
- struct amp_mgr *mgr;
- struct amp_assoc *loc_assoc = &hdev->loc_assoc;
- struct a2mp_physlink_req *req;
- struct l2cap_chan *bredr_chan;
- size_t len;
-
- mgr = amp_mgr_lookup_by_state(READ_LOC_AMP_ASSOC_FINAL);
- if (!mgr)
- return;
-
- len = sizeof(*req) + loc_assoc->len;
-
- BT_DBG("%s mgr %p assoc_len %zu", hdev->name, mgr, len);
-
- req = kzalloc(len, GFP_KERNEL);
- if (!req) {
- amp_mgr_put(mgr);
- return;
- }
-
- bredr_chan = mgr->bredr_chan;
- if (!bredr_chan)
- goto clean;
-
- req->local_id = hdev->id;
- req->remote_id = bredr_chan->remote_amp_id;
- memcpy(req->amp_assoc, loc_assoc->data, loc_assoc->len);
-
- a2mp_send(mgr, A2MP_CREATEPHYSLINK_REQ, __next_ident(mgr), len, req);
-
-clean:
- amp_mgr_put(mgr);
- kfree(req);
-}
-
-void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status)
-{
- struct amp_mgr *mgr;
- struct a2mp_physlink_rsp rsp;
- struct hci_conn *hs_hcon;
-
- mgr = amp_mgr_lookup_by_state(WRITE_REMOTE_AMP_ASSOC);
- if (!mgr)
- return;
-
- memset(&rsp, 0, sizeof(rsp));
-
- hs_hcon = hci_conn_hash_lookup_state(hdev, AMP_LINK, BT_CONNECT);
- if (!hs_hcon) {
- rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION;
- } else {
- rsp.remote_id = hs_hcon->remote_id;
- rsp.status = A2MP_STATUS_SUCCESS;
- }
-
- BT_DBG("%s mgr %p hs_hcon %p status %u", hdev->name, mgr, hs_hcon,
- status);
-
- rsp.local_id = hdev->id;
- a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, mgr->ident, sizeof(rsp), &rsp);
- amp_mgr_put(mgr);
-}
-
-void a2mp_discover_amp(struct l2cap_chan *chan)
-{
- struct l2cap_conn *conn = chan->conn;
- struct amp_mgr *mgr = conn->hcon->amp_mgr;
- struct a2mp_discov_req req;
-
- BT_DBG("chan %p conn %p mgr %p", chan, conn, mgr);
-
- if (!mgr) {
- mgr = amp_mgr_create(conn, true);
- if (!mgr)
- return;
- }
-
- mgr->bredr_chan = chan;
-
- memset(&req, 0, sizeof(req));
-
- req.mtu = cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU);
- req.ext_feat = 0;
- a2mp_send(mgr, A2MP_DISCOVER_REQ, 1, sizeof(req), &req);
-}
diff --git a/net/bluetooth/a2mp.h b/net/bluetooth/a2mp.h
deleted file mode 100644
index 2fd253a61a2a..000000000000
--- a/net/bluetooth/a2mp.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- Copyright (c) 2010,2011 Code Aurora Forum. All rights reserved.
- Copyright (c) 2011,2012 Intel Corp.
-
-*/
-
-#ifndef __A2MP_H
-#define __A2MP_H
-
-#include <net/bluetooth/l2cap.h>
-
-enum amp_mgr_state {
- READ_LOC_AMP_INFO,
- READ_LOC_AMP_ASSOC,
- READ_LOC_AMP_ASSOC_FINAL,
- WRITE_REMOTE_AMP_ASSOC,
-};
-
-struct amp_mgr {
- struct list_head list;
- struct l2cap_conn *l2cap_conn;
- struct l2cap_chan *a2mp_chan;
- struct l2cap_chan *bredr_chan;
- struct kref kref;
- __u8 ident;
- __u8 handle;
- unsigned long state;
- unsigned long flags;
-
- struct list_head amp_ctrls;
- struct mutex amp_ctrls_lock;
-};
-
-struct a2mp_cmd {
- __u8 code;
- __u8 ident;
- __le16 len;
- __u8 data[];
-} __packed;
-
-/* A2MP command codes */
-#define A2MP_COMMAND_REJ 0x01
-struct a2mp_cmd_rej {
- __le16 reason;
- __u8 data[];
-} __packed;
-
-#define A2MP_DISCOVER_REQ 0x02
-struct a2mp_discov_req {
- __le16 mtu;
- __le16 ext_feat;
-} __packed;
-
-struct a2mp_cl {
- __u8 id;
- __u8 type;
- __u8 status;
-} __packed;
-
-#define A2MP_DISCOVER_RSP 0x03
-struct a2mp_discov_rsp {
- __le16 mtu;
- __le16 ext_feat;
- struct a2mp_cl cl[];
-} __packed;
-
-#define A2MP_CHANGE_NOTIFY 0x04
-#define A2MP_CHANGE_RSP 0x05
-
-#define A2MP_GETINFO_REQ 0x06
-struct a2mp_info_req {
- __u8 id;
-} __packed;
-
-#define A2MP_GETINFO_RSP 0x07
-struct a2mp_info_rsp {
- __u8 id;
- __u8 status;
- __le32 total_bw;
- __le32 max_bw;
- __le32 min_latency;
- __le16 pal_cap;
- __le16 assoc_size;
-} __packed;
-
-#define A2MP_GETAMPASSOC_REQ 0x08
-struct a2mp_amp_assoc_req {
- __u8 id;
-} __packed;
-
-#define A2MP_GETAMPASSOC_RSP 0x09
-struct a2mp_amp_assoc_rsp {
- __u8 id;
- __u8 status;
- __u8 amp_assoc[];
-} __packed;
-
-#define A2MP_CREATEPHYSLINK_REQ 0x0A
-#define A2MP_DISCONNPHYSLINK_REQ 0x0C
-struct a2mp_physlink_req {
- __u8 local_id;
- __u8 remote_id;
- __u8 amp_assoc[];
-} __packed;
-
-#define A2MP_CREATEPHYSLINK_RSP 0x0B
-#define A2MP_DISCONNPHYSLINK_RSP 0x0D
-struct a2mp_physlink_rsp {
- __u8 local_id;
- __u8 remote_id;
- __u8 status;
-} __packed;
-
-/* A2MP response status */
-#define A2MP_STATUS_SUCCESS 0x00
-#define A2MP_STATUS_INVALID_CTRL_ID 0x01
-#define A2MP_STATUS_UNABLE_START_LINK_CREATION 0x02
-#define A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS 0x02
-#define A2MP_STATUS_COLLISION_OCCURED 0x03
-#define A2MP_STATUS_DISCONN_REQ_RECVD 0x04
-#define A2MP_STATUS_PHYS_LINK_EXISTS 0x05
-#define A2MP_STATUS_SECURITY_VIOLATION 0x06
-
-struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr);
-
-#if IS_ENABLED(CONFIG_BT_HS)
-int amp_mgr_put(struct amp_mgr *mgr);
-struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn,
- struct sk_buff *skb);
-void a2mp_discover_amp(struct l2cap_chan *chan);
-#else
-static inline int amp_mgr_put(struct amp_mgr *mgr)
-{
- return 0;
-}
-
-static inline struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn,
- struct sk_buff *skb)
-{
- return NULL;
-}
-
-static inline void a2mp_discover_amp(struct l2cap_chan *chan)
-{
-}
-#endif
-
-void a2mp_send_getinfo_rsp(struct hci_dev *hdev);
-void a2mp_send_getampassoc_rsp(struct hci_dev *hdev, u8 status);
-void a2mp_send_create_phy_link_req(struct hci_dev *hdev, u8 status);
-void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status);
-
-#endif /* __A2MP_H */
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index b93464ac3517..67604ccec2f4 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -309,14 +309,11 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- lock_sock(sk);
-
skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
err = 0;
- release_sock(sk);
return err;
}
@@ -346,8 +343,6 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
skb_free_datagram(sk, skb);
- release_sock(sk);
-
if (flags & MSG_TRUNC)
copied = skblen;
@@ -570,10 +565,11 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
if (sk->sk_state == BT_LISTEN)
return -EINVAL;
- lock_sock(sk);
+ spin_lock(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
amount = skb ? skb->len : 0;
- release_sock(sk);
+ spin_unlock(&sk->sk_receive_queue.lock);
+
err = put_user(amount, (int __user *)arg);
break;
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
deleted file mode 100644
index 5d698f19868c..000000000000
--- a/net/bluetooth/amp.c
+++ /dev/null
@@ -1,590 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- Copyright (c) 2011,2012 Intel Corp.
-
-*/
-
-#include <net/bluetooth/bluetooth.h>
-#include <net/bluetooth/hci.h>
-#include <net/bluetooth/hci_core.h>
-#include <crypto/hash.h>
-
-#include "hci_request.h"
-#include "a2mp.h"
-#include "amp.h"
-
-/* Remote AMP Controllers interface */
-void amp_ctrl_get(struct amp_ctrl *ctrl)
-{
- BT_DBG("ctrl %p orig refcnt %d", ctrl,
- kref_read(&ctrl->kref));
-
- kref_get(&ctrl->kref);
-}
-
-static void amp_ctrl_destroy(struct kref *kref)
-{
- struct amp_ctrl *ctrl = container_of(kref, struct amp_ctrl, kref);
-
- BT_DBG("ctrl %p", ctrl);
-
- kfree(ctrl->assoc);
- kfree(ctrl);
-}
-
-int amp_ctrl_put(struct amp_ctrl *ctrl)
-{
- BT_DBG("ctrl %p orig refcnt %d", ctrl,
- kref_read(&ctrl->kref));
-
- return kref_put(&ctrl->kref, &amp_ctrl_destroy);
-}
-
-struct amp_ctrl *amp_ctrl_add(struct amp_mgr *mgr, u8 id)
-{
- struct amp_ctrl *ctrl;
-
- ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
- if (!ctrl)
- return NULL;
-
- kref_init(&ctrl->kref);
- ctrl->id = id;
-
- mutex_lock(&mgr->amp_ctrls_lock);
- list_add(&ctrl->list, &mgr->amp_ctrls);
- mutex_unlock(&mgr->amp_ctrls_lock);
-
- BT_DBG("mgr %p ctrl %p", mgr, ctrl);
-
- return ctrl;
-}
-
-void amp_ctrl_list_flush(struct amp_mgr *mgr)
-{
- struct amp_ctrl *ctrl, *n;
-
- BT_DBG("mgr %p", mgr);
-
- mutex_lock(&mgr->amp_ctrls_lock);
- list_for_each_entry_safe(ctrl, n, &mgr->amp_ctrls, list) {
- list_del(&ctrl->list);
- amp_ctrl_put(ctrl);
- }
- mutex_unlock(&mgr->amp_ctrls_lock);
-}
-
-struct amp_ctrl *amp_ctrl_lookup(struct amp_mgr *mgr, u8 id)
-{
- struct amp_ctrl *ctrl;
-
- BT_DBG("mgr %p id %u", mgr, id);
-
- mutex_lock(&mgr->amp_ctrls_lock);
- list_for_each_entry(ctrl, &mgr->amp_ctrls, list) {
- if (ctrl->id == id) {
- amp_ctrl_get(ctrl);
- mutex_unlock(&mgr->amp_ctrls_lock);
- return ctrl;
- }
- }
- mutex_unlock(&mgr->amp_ctrls_lock);
-
- return NULL;
-}
-
-/* Physical Link interface */
-static u8 __next_handle(struct amp_mgr *mgr)
-{
- if (++mgr->handle == 0)
- mgr->handle = 1;
-
- return mgr->handle;
-}
-
-struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
- u8 remote_id, bool out)
-{
- bdaddr_t *dst = &mgr->l2cap_conn->hcon->dst;
- struct hci_conn *hcon;
- u8 role = out ? HCI_ROLE_MASTER : HCI_ROLE_SLAVE;
-
- hcon = hci_conn_add(hdev, AMP_LINK, dst, role, __next_handle(mgr));
- if (!hcon)
- return NULL;
-
- BT_DBG("hcon %p dst %pMR", hcon, dst);
-
- hcon->state = BT_CONNECT;
- hcon->attempt++;
- hcon->remote_id = remote_id;
- hcon->amp_mgr = amp_mgr_get(mgr);
-
- return hcon;
-}
-
-/* AMP crypto key generation interface */
-static int hmac_sha256(u8 *key, u8 ksize, char *plaintext, u8 psize, u8 *output)
-{
- struct crypto_shash *tfm;
- struct shash_desc *shash;
- int ret;
-
- if (!ksize)
- return -EINVAL;
-
- tfm = crypto_alloc_shash("hmac(sha256)", 0, 0);
- if (IS_ERR(tfm)) {
- BT_DBG("crypto_alloc_ahash failed: err %ld", PTR_ERR(tfm));
- return PTR_ERR(tfm);
- }
-
- ret = crypto_shash_setkey(tfm, key, ksize);
- if (ret) {
- BT_DBG("crypto_ahash_setkey failed: err %d", ret);
- goto failed;
- }
-
- shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm),
- GFP_KERNEL);
- if (!shash) {
- ret = -ENOMEM;
- goto failed;
- }
-
- shash->tfm = tfm;
-
- ret = crypto_shash_digest(shash, plaintext, psize, output);
-
- kfree(shash);
-
-failed:
- crypto_free_shash(tfm);
- return ret;
-}
-
-int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
-{
- struct hci_dev *hdev = conn->hdev;
- struct link_key *key;
- u8 keybuf[HCI_AMP_LINK_KEY_SIZE];
- u8 gamp_key[HCI_AMP_LINK_KEY_SIZE];
- int err;
-
- if (!hci_conn_check_link_mode(conn))
- return -EACCES;
-
- BT_DBG("conn %p key_type %d", conn, conn->key_type);
-
- /* Legacy key */
- if (conn->key_type < 3) {
- bt_dev_err(hdev, "legacy key type %u", conn->key_type);
- return -EACCES;
- }
-
- *type = conn->key_type;
- *len = HCI_AMP_LINK_KEY_SIZE;
-
- key = hci_find_link_key(hdev, &conn->dst);
- if (!key) {
- BT_DBG("No Link key for conn %p dst %pMR", conn, &conn->dst);
- return -EACCES;
- }
-
- /* BR/EDR Link Key concatenated together with itself */
- memcpy(&keybuf[0], key->val, HCI_LINK_KEY_SIZE);
- memcpy(&keybuf[HCI_LINK_KEY_SIZE], key->val, HCI_LINK_KEY_SIZE);
-
- /* Derive Generic AMP Link Key (gamp) */
- err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key);
- if (err) {
- bt_dev_err(hdev, "could not derive Generic AMP Key: err %d", err);
- return err;
- }
-
- if (conn->key_type == HCI_LK_DEBUG_COMBINATION) {
- BT_DBG("Use Generic AMP Key (gamp)");
- memcpy(data, gamp_key, HCI_AMP_LINK_KEY_SIZE);
- return err;
- }
-
- /* Derive Dedicated AMP Link Key: "802b" is 802.11 PAL keyID */
- return hmac_sha256(gamp_key, HCI_AMP_LINK_KEY_SIZE, "802b", 4, data);
-}
-
-static void read_local_amp_assoc_complete(struct hci_dev *hdev, u8 status,
- u16 opcode, struct sk_buff *skb)
-{
- struct hci_rp_read_local_amp_assoc *rp = (void *)skb->data;
- struct amp_assoc *assoc = &hdev->loc_assoc;
- size_t rem_len, frag_len;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
-
- if (rp->status)
- goto send_rsp;
-
- frag_len = skb->len - sizeof(*rp);
- rem_len = __le16_to_cpu(rp->rem_len);
-
- if (rem_len > frag_len) {
- BT_DBG("frag_len %zu rem_len %zu", frag_len, rem_len);
-
- memcpy(assoc->data + assoc->offset, rp->frag, frag_len);
- assoc->offset += frag_len;
-
- /* Read other fragments */
- amp_read_loc_assoc_frag(hdev, rp->phy_handle);
-
- return;
- }
-
- memcpy(assoc->data + assoc->offset, rp->frag, rem_len);
- assoc->len = assoc->offset + rem_len;
- assoc->offset = 0;
-
-send_rsp:
- /* Send A2MP Rsp when all fragments are received */
- a2mp_send_getampassoc_rsp(hdev, rp->status);
- a2mp_send_create_phy_link_req(hdev, rp->status);
-}
-
-void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle)
-{
- struct hci_cp_read_local_amp_assoc cp;
- struct amp_assoc *loc_assoc = &hdev->loc_assoc;
- struct hci_request req;
- int err;
-
- BT_DBG("%s handle %u", hdev->name, phy_handle);
-
- cp.phy_handle = phy_handle;
- cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
- cp.len_so_far = cpu_to_le16(loc_assoc->offset);
-
- hci_req_init(&req, hdev);
- hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
- err = hci_req_run_skb(&req, read_local_amp_assoc_complete);
- if (err < 0)
- a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID);
-}
-
-void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr)
-{
- struct hci_cp_read_local_amp_assoc cp;
- struct hci_request req;
- int err;
-
- memset(&hdev->loc_assoc, 0, sizeof(struct amp_assoc));
- memset(&cp, 0, sizeof(cp));
-
- cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
-
- set_bit(READ_LOC_AMP_ASSOC, &mgr->state);
- hci_req_init(&req, hdev);
- hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
- err = hci_req_run_skb(&req, read_local_amp_assoc_complete);
- if (err < 0)
- a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID);
-}
-
-void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
- struct hci_conn *hcon)
-{
- struct hci_cp_read_local_amp_assoc cp;
- struct amp_mgr *mgr = hcon->amp_mgr;
- struct hci_request req;
- int err;
-
- if (!mgr)
- return;
-
- cp.phy_handle = hcon->handle;
- cp.len_so_far = cpu_to_le16(0);
- cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
-
- set_bit(READ_LOC_AMP_ASSOC_FINAL, &mgr->state);
-
- /* Read Local AMP Assoc final link information data */
- hci_req_init(&req, hdev);
- hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
- err = hci_req_run_skb(&req, read_local_amp_assoc_complete);
- if (err < 0)
- a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID);
-}
-
-static void write_remote_amp_assoc_complete(struct hci_dev *hdev, u8 status,
- u16 opcode, struct sk_buff *skb)
-{
- struct hci_rp_write_remote_amp_assoc *rp = (void *)skb->data;
-
- BT_DBG("%s status 0x%2.2x phy_handle 0x%2.2x",
- hdev->name, rp->status, rp->phy_handle);
-
- if (rp->status)
- return;
-
- amp_write_rem_assoc_continue(hdev, rp->phy_handle);
-}
-
-/* Write AMP Assoc data fragments, returns true with last fragment written*/
-static bool amp_write_rem_assoc_frag(struct hci_dev *hdev,
- struct hci_conn *hcon)
-{
- struct hci_cp_write_remote_amp_assoc *cp;
- struct amp_mgr *mgr = hcon->amp_mgr;
- struct amp_ctrl *ctrl;
- struct hci_request req;
- u16 frag_len, len;
-
- ctrl = amp_ctrl_lookup(mgr, hcon->remote_id);
- if (!ctrl)
- return false;
-
- if (!ctrl->assoc_rem_len) {
- BT_DBG("all fragments are written");
- ctrl->assoc_rem_len = ctrl->assoc_len;
- ctrl->assoc_len_so_far = 0;
-
- amp_ctrl_put(ctrl);
- return true;
- }
-
- frag_len = min_t(u16, 248, ctrl->assoc_rem_len);
- len = frag_len + sizeof(*cp);
-
- cp = kzalloc(len, GFP_KERNEL);
- if (!cp) {
- amp_ctrl_put(ctrl);
- return false;
- }
-
- BT_DBG("hcon %p ctrl %p frag_len %u assoc_len %u rem_len %u",
- hcon, ctrl, frag_len, ctrl->assoc_len, ctrl->assoc_rem_len);
-
- cp->phy_handle = hcon->handle;
- cp->len_so_far = cpu_to_le16(ctrl->assoc_len_so_far);
- cp->rem_len = cpu_to_le16(ctrl->assoc_rem_len);
- memcpy(cp->frag, ctrl->assoc, frag_len);
-
- ctrl->assoc_len_so_far += frag_len;
- ctrl->assoc_rem_len -= frag_len;
-
- amp_ctrl_put(ctrl);
-
- hci_req_init(&req, hdev);
- hci_req_add(&req, HCI_OP_WRITE_REMOTE_AMP_ASSOC, len, cp);
- hci_req_run_skb(&req, write_remote_amp_assoc_complete);
-
- kfree(cp);
-
- return false;
-}
-
-void amp_write_rem_assoc_continue(struct hci_dev *hdev, u8 handle)
-{
- struct hci_conn *hcon;
-
- BT_DBG("%s phy handle 0x%2.2x", hdev->name, handle);
-
- hcon = hci_conn_hash_lookup_handle(hdev, handle);
- if (!hcon)
- return;
-
- /* Send A2MP create phylink rsp when all fragments are written */
- if (amp_write_rem_assoc_frag(hdev, hcon))
- a2mp_send_create_phy_link_rsp(hdev, 0);
-}
-
-void amp_write_remote_assoc(struct hci_dev *hdev, u8 handle)
-{
- struct hci_conn *hcon;
-
- BT_DBG("%s phy handle 0x%2.2x", hdev->name, handle);
-
- hcon = hci_conn_hash_lookup_handle(hdev, handle);
- if (!hcon)
- return;
-
- BT_DBG("%s phy handle 0x%2.2x hcon %p", hdev->name, handle, hcon);
-
- amp_write_rem_assoc_frag(hdev, hcon);
-}
-
-static void create_phylink_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
-{
- struct hci_cp_create_phy_link *cp;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- cp = hci_sent_cmd_data(hdev, HCI_OP_CREATE_PHY_LINK);
- if (!cp)
- return;
-
- hci_dev_lock(hdev);
-
- if (status) {
- struct hci_conn *hcon;
-
- hcon = hci_conn_hash_lookup_handle(hdev, cp->phy_handle);
- if (hcon)
- hci_conn_del(hcon);
- } else {
- amp_write_remote_assoc(hdev, cp->phy_handle);
- }
-
- hci_dev_unlock(hdev);
-}
-
-void amp_create_phylink(struct hci_dev *hdev, struct amp_mgr *mgr,
- struct hci_conn *hcon)
-{
- struct hci_cp_create_phy_link cp;
- struct hci_request req;
-
- cp.phy_handle = hcon->handle;
-
- BT_DBG("%s hcon %p phy handle 0x%2.2x", hdev->name, hcon,
- hcon->handle);
-
- if (phylink_gen_key(mgr->l2cap_conn->hcon, cp.key, &cp.key_len,
- &cp.key_type)) {
- BT_DBG("Cannot create link key");
- return;
- }
-
- hci_req_init(&req, hdev);
- hci_req_add(&req, HCI_OP_CREATE_PHY_LINK, sizeof(cp), &cp);
- hci_req_run(&req, create_phylink_complete);
-}
-
-static void accept_phylink_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
-{
- struct hci_cp_accept_phy_link *cp;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- if (status)
- return;
-
- cp = hci_sent_cmd_data(hdev, HCI_OP_ACCEPT_PHY_LINK);
- if (!cp)
- return;
-
- amp_write_remote_assoc(hdev, cp->phy_handle);
-}
-
-void amp_accept_phylink(struct hci_dev *hdev, struct amp_mgr *mgr,
- struct hci_conn *hcon)
-{
- struct hci_cp_accept_phy_link cp;
- struct hci_request req;
-
- cp.phy_handle = hcon->handle;
-
- BT_DBG("%s hcon %p phy handle 0x%2.2x", hdev->name, hcon,
- hcon->handle);
-
- if (phylink_gen_key(mgr->l2cap_conn->hcon, cp.key, &cp.key_len,
- &cp.key_type)) {
- BT_DBG("Cannot create link key");
- return;
- }
-
- hci_req_init(&req, hdev);
- hci_req_add(&req, HCI_OP_ACCEPT_PHY_LINK, sizeof(cp), &cp);
- hci_req_run(&req, accept_phylink_complete);
-}
-
-void amp_physical_cfm(struct hci_conn *bredr_hcon, struct hci_conn *hs_hcon)
-{
- struct hci_dev *bredr_hdev = hci_dev_hold(bredr_hcon->hdev);
- struct amp_mgr *mgr = hs_hcon->amp_mgr;
- struct l2cap_chan *bredr_chan;
-
- BT_DBG("bredr_hcon %p hs_hcon %p mgr %p", bredr_hcon, hs_hcon, mgr);
-
- if (!bredr_hdev || !mgr || !mgr->bredr_chan)
- return;
-
- bredr_chan = mgr->bredr_chan;
-
- l2cap_chan_lock(bredr_chan);
-
- set_bit(FLAG_EFS_ENABLE, &bredr_chan->flags);
- bredr_chan->remote_amp_id = hs_hcon->remote_id;
- bredr_chan->local_amp_id = hs_hcon->hdev->id;
- bredr_chan->hs_hcon = hs_hcon;
- bredr_chan->conn->mtu = hs_hcon->hdev->block_mtu;
-
- __l2cap_physical_cfm(bredr_chan, 0);
-
- l2cap_chan_unlock(bredr_chan);
-
- hci_dev_put(bredr_hdev);
-}
-
-void amp_create_logical_link(struct l2cap_chan *chan)
-{
- struct hci_conn *hs_hcon = chan->hs_hcon;
- struct hci_cp_create_accept_logical_link cp;
- struct hci_dev *hdev;
-
- BT_DBG("chan %p hs_hcon %p dst %pMR", chan, hs_hcon,
- &chan->conn->hcon->dst);
-
- if (!hs_hcon)
- return;
-
- hdev = hci_dev_hold(chan->hs_hcon->hdev);
- if (!hdev)
- return;
-
- cp.phy_handle = hs_hcon->handle;
-
- cp.tx_flow_spec.id = chan->local_id;
- cp.tx_flow_spec.stype = chan->local_stype;
- cp.tx_flow_spec.msdu = cpu_to_le16(chan->local_msdu);
- cp.tx_flow_spec.sdu_itime = cpu_to_le32(chan->local_sdu_itime);
- cp.tx_flow_spec.acc_lat = cpu_to_le32(chan->local_acc_lat);
- cp.tx_flow_spec.flush_to = cpu_to_le32(chan->local_flush_to);
-
- cp.rx_flow_spec.id = chan->remote_id;
- cp.rx_flow_spec.stype = chan->remote_stype;
- cp.rx_flow_spec.msdu = cpu_to_le16(chan->remote_msdu);
- cp.rx_flow_spec.sdu_itime = cpu_to_le32(chan->remote_sdu_itime);
- cp.rx_flow_spec.acc_lat = cpu_to_le32(chan->remote_acc_lat);
- cp.rx_flow_spec.flush_to = cpu_to_le32(chan->remote_flush_to);
-
- if (hs_hcon->out)
- hci_send_cmd(hdev, HCI_OP_CREATE_LOGICAL_LINK, sizeof(cp),
- &cp);
- else
- hci_send_cmd(hdev, HCI_OP_ACCEPT_LOGICAL_LINK, sizeof(cp),
- &cp);
-
- hci_dev_put(hdev);
-}
-
-void amp_disconnect_logical_link(struct hci_chan *hchan)
-{
- struct hci_conn *hcon = hchan->conn;
- struct hci_cp_disconn_logical_link cp;
-
- if (hcon->state != BT_CONNECTED) {
- BT_DBG("hchan %p not connected", hchan);
- return;
- }
-
- cp.log_handle = cpu_to_le16(hchan->handle);
- hci_send_cmd(hcon->hdev, HCI_OP_DISCONN_LOGICAL_LINK, sizeof(cp), &cp);
-}
-
-void amp_destroy_logical_link(struct hci_chan *hchan, u8 reason)
-{
- BT_DBG("hchan %p", hchan);
-
- hci_chan_del(hchan);
-}
diff --git a/net/bluetooth/amp.h b/net/bluetooth/amp.h
deleted file mode 100644
index 97c87abd129f..000000000000
--- a/net/bluetooth/amp.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- Copyright (c) 2011,2012 Intel Corp.
-
-*/
-
-#ifndef __AMP_H
-#define __AMP_H
-
-struct amp_ctrl {
- struct list_head list;
- struct kref kref;
- __u8 id;
- __u16 assoc_len_so_far;
- __u16 assoc_rem_len;
- __u16 assoc_len;
- __u8 *assoc;
-};
-
-int amp_ctrl_put(struct amp_ctrl *ctrl);
-void amp_ctrl_get(struct amp_ctrl *ctrl);
-struct amp_ctrl *amp_ctrl_add(struct amp_mgr *mgr, u8 id);
-struct amp_ctrl *amp_ctrl_lookup(struct amp_mgr *mgr, u8 id);
-void amp_ctrl_list_flush(struct amp_mgr *mgr);
-
-struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
- u8 remote_id, bool out);
-
-int phylink_gen_key(struct hci_conn *hcon, u8 *data, u8 *len, u8 *type);
-
-void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle);
-void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr);
-void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
- struct hci_conn *hcon);
-void amp_create_phylink(struct hci_dev *hdev, struct amp_mgr *mgr,
- struct hci_conn *hcon);
-void amp_accept_phylink(struct hci_dev *hdev, struct amp_mgr *mgr,
- struct hci_conn *hcon);
-
-#if IS_ENABLED(CONFIG_BT_HS)
-void amp_create_logical_link(struct l2cap_chan *chan);
-void amp_disconnect_logical_link(struct hci_chan *hchan);
-#else
-static inline void amp_create_logical_link(struct l2cap_chan *chan)
-{
-}
-
-static inline void amp_disconnect_logical_link(struct hci_chan *hchan)
-{
-}
-#endif
-
-void amp_write_remote_assoc(struct hci_dev *hdev, u8 handle);
-void amp_write_rem_assoc_continue(struct hci_dev *hdev, u8 handle);
-void amp_physical_cfm(struct hci_conn *bredr_hcon, struct hci_conn *hs_hcon);
-void amp_create_logical_link(struct l2cap_chan *chan);
-void amp_disconnect_logical_link(struct hci_chan *hchan);
-void amp_destroy_logical_link(struct hci_chan *hchan, u8 reason);
-
-#endif /* __AMP_H */
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 5a6a49885ab6..ec45f77fce21 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -385,7 +385,8 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
case BNEP_COMPRESSED_DST_ONLY:
__skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN);
- __skb_put_data(nskb, s->eh.h_source, ETH_ALEN + 2);
+ __skb_put_data(nskb, s->eh.h_source, ETH_ALEN);
+ put_unaligned(s->eh.h_proto, (__be16 *)__skb_put(nskb, 2));
break;
case BNEP_GENERAL:
@@ -549,7 +550,7 @@ static struct device *bnep_get_device(struct bnep_session *session)
return &conn->hcon->dev;
}
-static struct device_type bnep_type = {
+static const struct device_type bnep_type = {
.name = "bluetooth",
};
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
index 9214189279e8..1bc51e2b05a3 100644
--- a/net/bluetooth/eir.c
+++ b/net/bluetooth/eir.c
@@ -13,48 +13,33 @@
#define PNP_INFO_SVCLASS_ID 0x1200
-static u8 eir_append_name(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len)
-{
- u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1];
-
- /* If data is already NULL terminated just pass it directly */
- if (data[data_len - 1] == '\0')
- return eir_append_data(eir, eir_len, type, data, data_len);
-
- memcpy(name, data, HCI_MAX_SHORT_NAME_LENGTH);
- name[HCI_MAX_SHORT_NAME_LENGTH] = '\0';
-
- return eir_append_data(eir, eir_len, type, name, sizeof(name));
-}
-
u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
{
size_t short_len;
size_t complete_len;
- /* no space left for name (+ NULL + type + len) */
- if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3)
+ /* no space left for name (+ type + len) */
+ if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 2)
return ad_len;
/* use complete name if present and fits */
complete_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name));
if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH)
- return eir_append_name(ptr, ad_len, EIR_NAME_COMPLETE,
- hdev->dev_name, complete_len + 1);
+ return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE,
+ hdev->dev_name, complete_len);
/* use short name if present */
short_len = strnlen(hdev->short_name, sizeof(hdev->short_name));
if (short_len)
- return eir_append_name(ptr, ad_len, EIR_NAME_SHORT,
+ return eir_append_data(ptr, ad_len, EIR_NAME_SHORT,
hdev->short_name,
- short_len == HCI_MAX_SHORT_NAME_LENGTH ?
- short_len : short_len + 1);
+ short_len);
/* use shortened full name if present, we already know that name
* is longer then HCI_MAX_SHORT_NAME_LENGTH
*/
if (complete_len)
- return eir_append_name(ptr, ad_len, EIR_NAME_SHORT,
+ return eir_append_data(ptr, ad_len, EIR_NAME_SHORT,
hdev->dev_name,
HCI_MAX_SHORT_NAME_LENGTH);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index a41d2693f4d8..3ad74f76983b 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1,7 +1,7 @@
/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
- Copyright 2023 NXP
+ Copyright 2023-2024 NXP
Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
@@ -36,7 +36,6 @@
#include "hci_request.h"
#include "smp.h"
-#include "a2mp.h"
#include "eir.h"
struct sco_param {
@@ -69,7 +68,7 @@ static const struct sco_param esco_param_msbc[] = {
};
/* This function requires the caller holds hdev->lock */
-static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
+void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
{
struct hci_conn_params *params;
struct hci_dev *hdev = conn->hdev;
@@ -179,64 +178,6 @@ static void hci_conn_cleanup(struct hci_conn *conn)
hci_dev_put(hdev);
}
-static void hci_acl_create_connection(struct hci_conn *conn)
-{
- struct hci_dev *hdev = conn->hdev;
- struct inquiry_entry *ie;
- struct hci_cp_create_conn cp;
-
- BT_DBG("hcon %p", conn);
-
- /* Many controllers disallow HCI Create Connection while it is doing
- * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create
- * Connection. This may cause the MGMT discovering state to become false
- * without user space's request but it is okay since the MGMT Discovery
- * APIs do not promise that discovery should be done forever. Instead,
- * the user space monitors the status of MGMT discovering and it may
- * request for discovery again when this flag becomes false.
- */
- if (test_bit(HCI_INQUIRY, &hdev->flags)) {
- /* Put this connection to "pending" state so that it will be
- * executed after the inquiry cancel command complete event.
- */
- conn->state = BT_CONNECT2;
- hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL);
- return;
- }
-
- conn->state = BT_CONNECT;
- conn->out = true;
- conn->role = HCI_ROLE_MASTER;
-
- conn->attempt++;
-
- conn->link_policy = hdev->link_policy;
-
- memset(&cp, 0, sizeof(cp));
- bacpy(&cp.bdaddr, &conn->dst);
- cp.pscan_rep_mode = 0x02;
-
- ie = hci_inquiry_cache_lookup(hdev, &conn->dst);
- if (ie) {
- if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
- cp.pscan_rep_mode = ie->data.pscan_rep_mode;
- cp.pscan_mode = ie->data.pscan_mode;
- cp.clock_offset = ie->data.clock_offset |
- cpu_to_le16(0x8000);
- }
-
- memcpy(conn->dev_class, ie->data.dev_class, 3);
- }
-
- cp.pkt_type = cpu_to_le16(conn->pkt_type);
- if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER))
- cp.role_switch = 0x01;
- else
- cp.role_switch = 0x00;
-
- hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp);
-}
-
int hci_disconnect(struct hci_conn *conn, __u8 reason)
{
BT_DBG("hcon %p", conn);
@@ -1175,9 +1116,6 @@ void hci_conn_del(struct hci_conn *conn)
}
}
- if (conn->amp_mgr)
- amp_mgr_put(conn->amp_mgr);
-
skb_queue_purge(&conn->data_q);
/* Remove the connection from the list and cleanup its remaining
@@ -1186,6 +1124,9 @@ void hci_conn_del(struct hci_conn *conn)
* rest of hci_conn_del.
*/
hci_conn_cleanup(conn);
+
+ /* Dequeue callbacks using connection pointer as data */
+ hci_cmd_sync_dequeue(hdev, NULL, conn, NULL);
}
struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
@@ -1320,53 +1261,6 @@ u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle)
return 0;
}
-static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
-{
- struct hci_conn *conn;
- u16 handle = PTR_UINT(data);
-
- conn = hci_conn_hash_lookup_handle(hdev, handle);
- if (!conn)
- return;
-
- bt_dev_dbg(hdev, "err %d", err);
-
- hci_dev_lock(hdev);
-
- if (!err) {
- hci_connect_le_scan_cleanup(conn, 0x00);
- goto done;
- }
-
- /* Check if connection is still pending */
- if (conn != hci_lookup_le_connect(hdev))
- goto done;
-
- /* Flush to make sure we send create conn cancel command if needed */
- flush_delayed_work(&conn->le_conn_timeout);
- hci_conn_failed(conn, bt_status(err));
-
-done:
- hci_dev_unlock(hdev);
-}
-
-static int hci_connect_le_sync(struct hci_dev *hdev, void *data)
-{
- struct hci_conn *conn;
- u16 handle = PTR_UINT(data);
-
- conn = hci_conn_hash_lookup_handle(hdev, handle);
- if (!conn)
- return 0;
-
- bt_dev_dbg(hdev, "conn %p", conn);
-
- clear_bit(HCI_CONN_SCANNING, &conn->flags);
- conn->state = BT_CONNECT;
-
- return hci_le_create_conn_sync(hdev, conn);
-}
-
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, bool dst_resolved, u8 sec_level,
u16 conn_timeout, u8 role)
@@ -1433,9 +1327,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
conn->sec_level = BT_SECURITY_LOW;
conn->conn_timeout = conn_timeout;
- err = hci_cmd_sync_queue(hdev, hci_connect_le_sync,
- UINT_PTR(conn->handle),
- create_le_conn_complete);
+ err = hci_connect_le_sync(hdev, conn);
if (err) {
hci_conn_del(conn);
return ERR_PTR(err);
@@ -1669,7 +1561,7 @@ done:
struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
u8 sec_level, u8 auth_type,
- enum conn_reasons conn_reason)
+ enum conn_reasons conn_reason, u16 timeout)
{
struct hci_conn *acl;
@@ -1700,10 +1592,18 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
acl->conn_reason = conn_reason;
if (acl->state == BT_OPEN || acl->state == BT_CLOSED) {
+ int err;
+
acl->sec_level = BT_SECURITY_LOW;
acl->pending_sec_level = sec_level;
acl->auth_type = auth_type;
- hci_acl_create_connection(acl);
+ acl->conn_timeout = timeout;
+
+ err = hci_connect_acl_sync(hdev, acl);
+ if (err) {
+ hci_conn_del(acl);
+ return ERR_PTR(err);
+ }
}
return acl;
@@ -1738,14 +1638,15 @@ static struct hci_link *hci_conn_link(struct hci_conn *parent,
}
struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
- __u16 setting, struct bt_codec *codec)
+ __u16 setting, struct bt_codec *codec,
+ u16 timeout)
{
struct hci_conn *acl;
struct hci_conn *sco;
struct hci_link *link;
acl = hci_connect_acl(hdev, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING,
- CONN_REASON_SCO_CONNECT);
+ CONN_REASON_SCO_CONNECT, timeout);
if (IS_ERR(acl))
return acl;
@@ -2156,18 +2057,31 @@ static int create_pa_sync(struct hci_dev *hdev, void *data)
return hci_update_passive_scan_sync(hdev);
}
-int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
- __u8 sid, struct bt_iso_qos *qos)
+struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, __u8 sid,
+ struct bt_iso_qos *qos)
{
struct hci_cp_le_pa_create_sync *cp;
+ struct hci_conn *conn;
+ int err;
if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC))
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
+
+ conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE);
+ if (!conn)
+ return ERR_PTR(-ENOMEM);
+
+ conn->iso_qos = *qos;
+ conn->state = BT_LISTEN;
+
+ hci_conn_hold(conn);
cp = kzalloc(sizeof(*cp), GFP_KERNEL);
if (!cp) {
hci_dev_clear_flag(hdev, HCI_PA_SYNC);
- return -ENOMEM;
+ hci_conn_drop(conn);
+ return ERR_PTR(-ENOMEM);
}
cp->options = qos->bcast.options;
@@ -2179,7 +2093,14 @@ int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
cp->sync_cte_type = qos->bcast.sync_cte_type;
/* Queue start pa_create_sync and scan */
- return hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete);
+ err = hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete);
+ if (err < 0) {
+ hci_conn_drop(conn);
+ kfree(cp);
+ return ERR_PTR(err);
+ }
+
+ return conn;
}
int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
@@ -2647,22 +2568,6 @@ void hci_conn_hash_flush(struct hci_dev *hdev)
}
}
-/* Check pending connect attempts */
-void hci_conn_check_pending(struct hci_dev *hdev)
-{
- struct hci_conn *conn;
-
- BT_DBG("hdev %s", hdev->name);
-
- hci_dev_lock(hdev);
-
- conn = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2);
- if (conn)
- hci_acl_create_connection(conn);
-
- hci_dev_unlock(hdev);
-}
-
static u32 get_link_mode(struct hci_conn *conn)
{
u32 link_mode = 0;
@@ -2978,12 +2883,10 @@ u32 hci_conn_get_phy(struct hci_conn *conn)
static int abort_conn_sync(struct hci_dev *hdev, void *data)
{
- struct hci_conn *conn;
- u16 handle = PTR_UINT(data);
+ struct hci_conn *conn = data;
- conn = hci_conn_hash_lookup_handle(hdev, handle);
- if (!conn)
- return 0;
+ if (!hci_conn_valid(hdev, conn))
+ return -ECANCELED;
return hci_abort_conn_sync(hdev, conn, conn->abort_reason);
}
@@ -3011,14 +2914,17 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
*/
if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) {
switch (hci_skb_event(hdev->sent_cmd)) {
+ case HCI_EV_CONN_COMPLETE:
case HCI_EV_LE_CONN_COMPLETE:
case HCI_EV_LE_ENHANCED_CONN_COMPLETE:
case HCI_EVT_LE_CIS_ESTABLISHED:
- hci_cmd_sync_cancel(hdev, -ECANCELED);
+ hci_cmd_sync_cancel(hdev, ECANCELED);
break;
}
+ /* Cancel connect attempt if still queued/pending */
+ } else if (!hci_cancel_connect_sync(hdev, conn)) {
+ return 0;
}
- return hci_cmd_sync_queue(hdev, abort_conn_sync, UINT_PTR(conn->handle),
- NULL);
+ return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 65601aa52e0d..1690ae57a09d 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -908,7 +908,7 @@ int hci_get_dev_info(void __user *arg)
else
flags = hdev->flags;
- strcpy(di.name, hdev->name);
+ strscpy(di.name, hdev->name, sizeof(di.name));
di.bdaddr = hdev->bdaddr;
di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4);
di.flags = flags;
@@ -940,20 +940,51 @@ int hci_get_dev_info(void __user *arg)
/* ---- Interface to HCI drivers ---- */
+static int hci_dev_do_poweroff(struct hci_dev *hdev)
+{
+ int err;
+
+ BT_DBG("%s %p", hdev->name, hdev);
+
+ hci_req_sync_lock(hdev);
+
+ err = hci_set_powered_sync(hdev, false);
+
+ hci_req_sync_unlock(hdev);
+
+ return err;
+}
+
static int hci_rfkill_set_block(void *data, bool blocked)
{
struct hci_dev *hdev = data;
+ int err;
BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
return -EBUSY;
+ if (blocked == hci_dev_test_flag(hdev, HCI_RFKILLED))
+ return 0;
+
if (blocked) {
hci_dev_set_flag(hdev, HCI_RFKILLED);
+
if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
- !hci_dev_test_flag(hdev, HCI_CONFIG))
- hci_dev_do_close(hdev);
+ !hci_dev_test_flag(hdev, HCI_CONFIG)) {
+ err = hci_dev_do_poweroff(hdev);
+ if (err) {
+ bt_dev_err(hdev, "Error when powering off device on rfkill (%d)",
+ err);
+
+ /* Make sure the device is still closed even if
+ * anything during power off sequence (eg.
+ * disconnecting devices) failed.
+ */
+ hci_dev_do_close(hdev);
+ }
+ }
} else {
hci_dev_clear_flag(hdev, HCI_RFKILLED);
}
@@ -1049,6 +1080,7 @@ static void hci_error_reset(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);
+ hci_dev_hold(hdev);
BT_DBG("%s", hdev->name);
if (hdev->hw_error)
@@ -1056,10 +1088,10 @@ static void hci_error_reset(struct work_struct *work)
else
bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
- if (hci_dev_do_close(hdev))
- return;
+ if (!hci_dev_do_close(hdev))
+ hci_dev_do_open(hdev);
- hci_dev_do_open(hdev);
+ hci_dev_put(hdev);
}
void hci_uuids_clear(struct hci_dev *hdev)
@@ -1490,11 +1522,12 @@ static void hci_cmd_timeout(struct work_struct *work)
struct hci_dev *hdev = container_of(work, struct hci_dev,
cmd_timer.work);
- if (hdev->sent_cmd) {
- struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
- u16 opcode = __le16_to_cpu(sent->opcode);
+ if (hdev->req_skb) {
+ u16 opcode = hci_skb_opcode(hdev->req_skb);
bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode);
+
+ hci_cmd_sync_cancel_sync(hdev, ETIMEDOUT);
} else {
bt_dev_err(hdev, "command tx timeout");
}
@@ -2607,10 +2640,11 @@ int hci_register_dev(struct hci_dev *hdev)
*/
switch (hdev->dev_type) {
case HCI_PRIMARY:
- id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
+ id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL);
break;
case HCI_AMP:
- id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
+ id = ida_alloc_range(&hci_index_ida, 1, HCI_MAX_ID - 1,
+ GFP_KERNEL);
break;
default:
return -EINVAL;
@@ -2709,7 +2743,7 @@ err_wqueue:
destroy_workqueue(hdev->workqueue);
destroy_workqueue(hdev->req_workqueue);
err:
- ida_simple_remove(&hci_index_ida, hdev->id);
+ ida_free(&hci_index_ida, hdev->id);
return error;
}
@@ -2792,8 +2826,9 @@ void hci_release_dev(struct hci_dev *hdev)
hci_dev_unlock(hdev);
ida_destroy(&hdev->unset_handle_ida);
- ida_simple_remove(&hci_index_ida, hdev->id);
+ ida_free(&hci_index_ida, hdev->id);
kfree_skb(hdev->sent_cmd);
+ kfree_skb(hdev->req_skb);
kfree_skb(hdev->recv_event);
kfree(hdev);
}
@@ -2825,6 +2860,23 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev)
return ret;
}
+/* Cancel ongoing command synchronously:
+ *
+ * - Cancel command timer
+ * - Reset command counter
+ * - Cancel command request
+ */
+static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err)
+{
+ bt_dev_dbg(hdev, "err 0x%2.2x", err);
+
+ cancel_delayed_work_sync(&hdev->cmd_timer);
+ cancel_delayed_work_sync(&hdev->ncmd_timer);
+ atomic_set(&hdev->cmd_cnt, 1);
+
+ hci_cmd_sync_cancel_sync(hdev, -err);
+}
+
/* Suspend HCI device */
int hci_suspend_dev(struct hci_dev *hdev)
{
@@ -2842,7 +2894,7 @@ int hci_suspend_dev(struct hci_dev *hdev)
return 0;
/* Cancel potentially blocking sync operation before suspend */
- __hci_cmd_sync_cancel(hdev, -EHOSTDOWN);
+ hci_cancel_cmd_sync(hdev, -EHOSTDOWN);
hci_req_sync_lock(hdev);
ret = hci_suspend_sync(hdev);
@@ -3106,21 +3158,33 @@ int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen,
EXPORT_SYMBOL(__hci_cmd_send);
/* Get data from the previously sent command */
-void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
+static void *hci_cmd_data(struct sk_buff *skb, __u16 opcode)
{
struct hci_command_hdr *hdr;
- if (!hdev->sent_cmd)
+ if (!skb || skb->len < HCI_COMMAND_HDR_SIZE)
return NULL;
- hdr = (void *) hdev->sent_cmd->data;
+ hdr = (void *)skb->data;
if (hdr->opcode != cpu_to_le16(opcode))
return NULL;
- BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
+ return skb->data + HCI_COMMAND_HDR_SIZE;
+}
+
+/* Get data from the previously sent command */
+void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
+{
+ void *data;
+
+ /* Check if opcode matches last sent command */
+ data = hci_cmd_data(hdev->sent_cmd, opcode);
+ if (!data)
+ /* Check if opcode matches last request */
+ data = hci_cmd_data(hdev->req_skb, opcode);
- return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE;
+ return data;
}
/* Get data from last received event */
@@ -4021,17 +4085,19 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
if (!status && !hci_req_is_complete(hdev))
return;
+ skb = hdev->req_skb;
+
/* If this was the last command in a request the complete
- * callback would be found in hdev->sent_cmd instead of the
+ * callback would be found in hdev->req_skb instead of the
* command queue (hdev->cmd_q).
*/
- if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) {
- *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb;
+ if (skb && bt_cb(skb)->hci.req_flags & HCI_REQ_SKB) {
+ *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
return;
}
- if (bt_cb(hdev->sent_cmd)->hci.req_complete) {
- *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete;
+ if (skb && bt_cb(skb)->hci.req_complete) {
+ *req_complete = bt_cb(skb)->hci.req_complete;
return;
}
@@ -4127,6 +4193,36 @@ static void hci_rx_work(struct work_struct *work)
}
}
+static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ int err;
+
+ bt_dev_dbg(hdev, "skb %p", skb);
+
+ kfree_skb(hdev->sent_cmd);
+
+ hdev->sent_cmd = skb_clone(skb, GFP_KERNEL);
+ if (!hdev->sent_cmd) {
+ skb_queue_head(&hdev->cmd_q, skb);
+ queue_work(hdev->workqueue, &hdev->cmd_work);
+ return;
+ }
+
+ err = hci_send_frame(hdev, skb);
+ if (err < 0) {
+ hci_cmd_sync_cancel_sync(hdev, err);
+ return;
+ }
+
+ if (hci_req_status_pend(hdev) &&
+ !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) {
+ kfree_skb(hdev->req_skb);
+ hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
+ }
+
+ atomic_dec(&hdev->cmd_cnt);
+}
+
static void hci_cmd_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_work);
@@ -4141,30 +4237,15 @@ static void hci_cmd_work(struct work_struct *work)
if (!skb)
return;
- kfree_skb(hdev->sent_cmd);
-
- hdev->sent_cmd = skb_clone(skb, GFP_KERNEL);
- if (hdev->sent_cmd) {
- int res;
- if (hci_req_status_pend(hdev))
- hci_dev_set_flag(hdev, HCI_CMD_PENDING);
- atomic_dec(&hdev->cmd_cnt);
-
- res = hci_send_frame(hdev, skb);
- if (res < 0)
- __hci_cmd_sync_cancel(hdev, -res);
+ hci_send_cmd_sync(hdev, skb);
- rcu_read_lock();
- if (test_bit(HCI_RESET, &hdev->flags) ||
- hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE))
- cancel_delayed_work(&hdev->cmd_timer);
- else
- queue_delayed_work(hdev->workqueue, &hdev->cmd_timer,
- HCI_CMD_TIMEOUT);
- rcu_read_unlock();
- } else {
- skb_queue_head(&hdev->cmd_q, skb);
- queue_work(hdev->workqueue, &hdev->cmd_work);
- }
+ rcu_read_lock();
+ if (test_bit(HCI_RESET, &hdev->flags) ||
+ hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE))
+ cancel_delayed_work(&hdev->cmd_timer);
+ else
+ queue_delayed_work(hdev->workqueue, &hdev->cmd_timer,
+ HCI_CMD_TIMEOUT);
+ rcu_read_unlock();
}
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index ef8c3bed7361..4ae224824012 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -36,8 +36,6 @@
#include "hci_request.h"
#include "hci_debugfs.h"
#include "hci_codec.h"
-#include "a2mp.h"
-#include "amp.h"
#include "smp.h"
#include "msft.h"
#include "eir.h"
@@ -95,11 +93,11 @@ static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data,
/* It is possible that we receive Inquiry Complete event right
* before we receive Inquiry Cancel Command Complete event, in
* which case the latter event should have status of Command
- * Disallowed (0x0c). This should not be treated as error, since
+ * Disallowed. This should not be treated as error, since
* we actually achieve what Inquiry Cancel wants to achieve,
* which is to end the last Inquiry session.
*/
- if (rp->status == 0x0c && !test_bit(HCI_INQUIRY, &hdev->flags)) {
+ if (rp->status == HCI_ERROR_COMMAND_DISALLOWED && !test_bit(HCI_INQUIRY, &hdev->flags)) {
bt_dev_warn(hdev, "Ignoring error of Inquiry Cancel command");
rp->status = 0x00;
}
@@ -120,8 +118,6 @@ static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data,
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
hci_dev_unlock(hdev);
- hci_conn_check_pending(hdev);
-
return rp->status;
}
@@ -152,8 +148,6 @@ static u8 hci_cc_exit_periodic_inq(struct hci_dev *hdev, void *data,
hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ);
- hci_conn_check_pending(hdev);
-
return rp->status;
}
@@ -2314,10 +2308,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
{
bt_dev_dbg(hdev, "status 0x%2.2x", status);
- if (status) {
- hci_conn_check_pending(hdev);
+ if (status)
return;
- }
if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY))
set_bit(HCI_INQUIRY, &hdev->flags);
@@ -2342,12 +2334,9 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
if (status) {
if (conn && conn->state == BT_CONNECT) {
- if (status != 0x0c || conn->attempt > 2) {
- conn->state = BT_CLOSED;
- hci_connect_cfm(conn, status);
- hci_conn_del(conn);
- } else
- conn->state = BT_CONNECT2;
+ conn->state = BT_CLOSED;
+ hci_connect_cfm(conn, status);
+ hci_conn_del(conn);
}
} else {
if (!conn) {
@@ -2526,9 +2515,7 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn,
* Only those in BT_CONFIG or BT_CONNECTED states can be
* considered connected.
*/
- if (conn &&
- (conn->state == BT_CONFIG || conn->state == BT_CONNECTED) &&
- !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
+ if (conn && (conn->state == BT_CONFIG || conn->state == BT_CONNECTED))
mgmt_device_connected(hdev, conn, name, name_len);
if (discov->state == DISCOVERY_STOPPED)
@@ -3039,8 +3026,6 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
- hci_conn_check_pending(hdev);
-
if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags))
return;
@@ -3262,8 +3247,6 @@ done:
unlock:
hci_dev_unlock(hdev);
-
- hci_conn_check_pending(hdev);
}
static void hci_reject_conn(struct hci_dev *hdev, bdaddr_t *bdaddr)
@@ -3556,8 +3539,6 @@ static void hci_remote_name_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
- hci_conn_check_pending(hdev);
-
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
@@ -3660,7 +3641,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
* controller really supports it. If it doesn't, assume
* the default size (16).
*/
- if (!(hdev->commands[20] & 0x10)) {
+ if (!(hdev->commands[20] & 0x10) ||
+ test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) {
conn->enc_key_size = HCI_LINK_KEY_SIZE;
goto notify;
}
@@ -3762,8 +3744,9 @@ static void hci_remote_features_evt(struct hci_dev *hdev, void *data,
bacpy(&cp.bdaddr, &conn->dst);
cp.pscan_rep_mode = 0x02;
hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp);
- } else if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
+ } else {
mgmt_device_connected(hdev, conn, NULL, 0);
+ }
if (!hci_outgoing_auth_needed(hdev, conn)) {
conn->state = BT_CONNECTED;
@@ -3936,6 +3919,11 @@ static u8 hci_cc_le_setup_iso_path(struct hci_dev *hdev, void *data,
* last.
*/
hci_connect_cfm(conn, rp->status);
+
+ /* Notify device connected in case it is a BIG Sync */
+ if (!rp->status && test_bit(HCI_CONN_BIG_SYNC, &conn->flags))
+ mgmt_device_connected(hdev, conn, NULL, 0);
+
break;
}
@@ -4381,7 +4369,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, void *data,
* (since for this kind of commands there will not be a command
* complete event).
*/
- if (ev->status || (hdev->sent_cmd && !hci_skb_event(hdev->sent_cmd))) {
+ if (ev->status || (hdev->req_skb && !hci_skb_event(hdev->req_skb))) {
hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete,
req_complete_skb);
if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) {
@@ -5010,8 +4998,9 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev, void *data,
bacpy(&cp.bdaddr, &conn->dst);
cp.pscan_rep_mode = 0x02;
hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp);
- } else if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
+ } else {
mgmt_device_connected(hdev, conn, NULL, 0);
+ }
if (!hci_outgoing_auth_needed(hdev, conn)) {
conn->state = BT_CONNECTED;
@@ -5329,9 +5318,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
- if (!conn || !hci_conn_ssp_enabled(conn))
+ if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
goto unlock;
+ /* Assume remote supports SSP since it has triggered this event */
+ set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+
hci_conn_hold(conn);
if (!hci_dev_test_flag(hdev, HCI_MGMT))
@@ -5672,150 +5664,6 @@ unlock:
hci_dev_unlock(hdev);
}
-#if IS_ENABLED(CONFIG_BT_HS)
-static void hci_chan_selected_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_channel_selected *ev = data;
- struct hci_conn *hcon;
-
- bt_dev_dbg(hdev, "handle 0x%2.2x", ev->phy_handle);
-
- hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (!hcon)
- return;
-
- amp_read_loc_assoc_final_data(hdev, hcon);
-}
-
-static void hci_phy_link_complete_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_phy_link_complete *ev = data;
- struct hci_conn *hcon, *bredr_hcon;
-
- bt_dev_dbg(hdev, "handle 0x%2.2x status 0x%2.2x", ev->phy_handle,
- ev->status);
-
- hci_dev_lock(hdev);
-
- hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (!hcon)
- goto unlock;
-
- if (!hcon->amp_mgr)
- goto unlock;
-
- if (ev->status) {
- hci_conn_del(hcon);
- goto unlock;
- }
-
- bredr_hcon = hcon->amp_mgr->l2cap_conn->hcon;
-
- hcon->state = BT_CONNECTED;
- bacpy(&hcon->dst, &bredr_hcon->dst);
-
- hci_conn_hold(hcon);
- hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
- hci_conn_drop(hcon);
-
- hci_debugfs_create_conn(hcon);
- hci_conn_add_sysfs(hcon);
-
- amp_physical_cfm(bredr_hcon, hcon);
-
-unlock:
- hci_dev_unlock(hdev);
-}
-
-static void hci_loglink_complete_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_logical_link_complete *ev = data;
- struct hci_conn *hcon;
- struct hci_chan *hchan;
- struct amp_mgr *mgr;
-
- bt_dev_dbg(hdev, "log_handle 0x%4.4x phy_handle 0x%2.2x status 0x%2.2x",
- le16_to_cpu(ev->handle), ev->phy_handle, ev->status);
-
- hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (!hcon)
- return;
-
- /* Create AMP hchan */
- hchan = hci_chan_create(hcon);
- if (!hchan)
- return;
-
- hchan->handle = le16_to_cpu(ev->handle);
- hchan->amp = true;
-
- BT_DBG("hcon %p mgr %p hchan %p", hcon, hcon->amp_mgr, hchan);
-
- mgr = hcon->amp_mgr;
- if (mgr && mgr->bredr_chan) {
- struct l2cap_chan *bredr_chan = mgr->bredr_chan;
-
- l2cap_chan_lock(bredr_chan);
-
- bredr_chan->conn->mtu = hdev->block_mtu;
- l2cap_logical_cfm(bredr_chan, hchan, 0);
- hci_conn_hold(hcon);
-
- l2cap_chan_unlock(bredr_chan);
- }
-}
-
-static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_disconn_logical_link_complete *ev = data;
- struct hci_chan *hchan;
-
- bt_dev_dbg(hdev, "handle 0x%4.4x status 0x%2.2x",
- le16_to_cpu(ev->handle), ev->status);
-
- if (ev->status)
- return;
-
- hci_dev_lock(hdev);
-
- hchan = hci_chan_lookup_handle(hdev, le16_to_cpu(ev->handle));
- if (!hchan || !hchan->amp)
- goto unlock;
-
- amp_destroy_logical_link(hchan, ev->reason);
-
-unlock:
- hci_dev_unlock(hdev);
-}
-
-static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_disconn_phy_link_complete *ev = data;
- struct hci_conn *hcon;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
-
- if (ev->status)
- return;
-
- hci_dev_lock(hdev);
-
- hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (hcon && hcon->type == AMP_LINK) {
- hcon->state = BT_CLOSED;
- hci_disconn_cfm(hcon, ev->reason);
- hci_conn_del(hcon);
- }
-
- hci_dev_unlock(hdev);
-}
-#endif
-
static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr,
u8 bdaddr_type, bdaddr_t *local_rpa)
{
@@ -5981,8 +5829,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
goto unlock;
}
- if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
- mgmt_device_connected(hdev, conn, NULL, 0);
+ mgmt_device_connected(hdev, conn, NULL, 0);
conn->sec_level = BT_SECURITY_LOW;
conn->state = BT_CONFIG;
@@ -6681,7 +6528,7 @@ static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, void *data,
* transition into connected state and mark it as
* successful.
*/
- if (!conn->out && ev->status == 0x1a &&
+ if (!conn->out && ev->status == HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE &&
(hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES))
status = 0x00;
else
@@ -6794,6 +6641,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data,
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_UNKNOWN_CONN_ID);
+ if (max > hcon->le_conn_max_interval)
+ return send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_INVALID_LL_PARAMS);
+
if (hci_check_conn_params(min, max, latency, timeout))
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_INVALID_LL_PARAMS);
@@ -7207,6 +7058,9 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data,
/* Notify iso layer */
hci_connect_cfm(pa_sync, 0x00);
+ /* Notify MGMT layer */
+ mgmt_device_connected(hdev, pa_sync, NULL, 0);
+
unlock:
hci_dev_unlock(hdev);
}
@@ -7317,10 +7171,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "subevent 0x%2.2x", ev->subevent);
/* Only match event if command OGF is for LE */
- if (hdev->sent_cmd &&
- hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) == 0x08 &&
- hci_skb_event(hdev->sent_cmd) == ev->subevent) {
- *opcode = hci_skb_opcode(hdev->sent_cmd);
+ if (hdev->req_skb &&
+ hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 &&
+ hci_skb_event(hdev->req_skb) == ev->subevent) {
+ *opcode = hci_skb_opcode(hdev->req_skb);
hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete,
req_complete_skb);
}
@@ -7420,10 +7274,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
* keep track of the bdaddr of the connection event that woke us up.
*/
if (event == HCI_EV_CONN_REQUEST) {
- bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_request->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_CONN_COMPLETE) {
- bacpy(&hdev->wake_addr, &conn_request->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_LE_META) {
struct hci_ev_le_meta *le_ev = (void *)skb->data;
@@ -7619,25 +7473,6 @@ static const struct hci_ev {
/* [0x3e = HCI_EV_LE_META] */
HCI_EV_REQ_VL(HCI_EV_LE_META, hci_le_meta_evt,
sizeof(struct hci_ev_le_meta), HCI_MAX_EVENT_SIZE),
-#if IS_ENABLED(CONFIG_BT_HS)
- /* [0x40 = HCI_EV_PHY_LINK_COMPLETE] */
- HCI_EV(HCI_EV_PHY_LINK_COMPLETE, hci_phy_link_complete_evt,
- sizeof(struct hci_ev_phy_link_complete)),
- /* [0x41 = HCI_EV_CHANNEL_SELECTED] */
- HCI_EV(HCI_EV_CHANNEL_SELECTED, hci_chan_selected_evt,
- sizeof(struct hci_ev_channel_selected)),
- /* [0x42 = HCI_EV_DISCONN_PHY_LINK_COMPLETE] */
- HCI_EV(HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE,
- hci_disconn_loglink_complete_evt,
- sizeof(struct hci_ev_disconn_logical_link_complete)),
- /* [0x45 = HCI_EV_LOGICAL_LINK_COMPLETE] */
- HCI_EV(HCI_EV_LOGICAL_LINK_COMPLETE, hci_loglink_complete_evt,
- sizeof(struct hci_ev_logical_link_complete)),
- /* [0x46 = HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE] */
- HCI_EV(HCI_EV_DISCONN_PHY_LINK_COMPLETE,
- hci_disconn_phylink_complete_evt,
- sizeof(struct hci_ev_disconn_phy_link_complete)),
-#endif
/* [0x48 = HCI_EV_NUM_COMP_BLOCKS] */
HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt,
sizeof(struct hci_ev_num_comp_blocks)),
@@ -7707,10 +7542,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
}
/* Only match event if command OGF is not for LE */
- if (hdev->sent_cmd &&
- hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) != 0x08 &&
- hci_skb_event(hdev->sent_cmd) == event) {
- hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->sent_cmd),
+ if (hdev->req_skb &&
+ hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) != 0x08 &&
+ hci_skb_event(hdev->req_skb) == event) {
+ hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->req_skb),
status, &req_complete, &req_complete_skb);
req_evt = event;
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 6e023b0104b0..00e02138003e 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -895,7 +895,7 @@ void hci_request_setup(struct hci_dev *hdev)
void hci_request_cancel_all(struct hci_dev *hdev)
{
- __hci_cmd_sync_cancel(hdev, ENODEV);
+ hci_cmd_sync_cancel_sync(hdev, ENODEV);
cancel_interleave_scan(hdev);
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 3e7cd330d731..4ee1b976678b 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -101,7 +101,7 @@ static bool hci_sock_gen_cookie(struct sock *sk)
int id = hci_pi(sk)->cookie;
if (!id) {
- id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL);
+ id = ida_alloc_min(&sock_cookie_ida, 1, GFP_KERNEL);
if (id < 0)
id = 0xffffffff;
@@ -119,7 +119,7 @@ static void hci_sock_free_cookie(struct sock *sk)
if (id) {
hci_pi(sk)->cookie = 0xffffffff;
- ida_simple_remove(&sock_cookie_ida, id);
+ ida_free(&sock_cookie_ida, id);
}
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index a6fc8a2a5c67..f6b662369322 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -32,6 +32,10 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
hdev->req_result = result;
hdev->req_status = HCI_REQ_DONE;
+ /* Free the request command so it is not used as response */
+ kfree_skb(hdev->req_skb);
+ hdev->req_skb = NULL;
+
if (skb) {
struct sock *sk = hci_skb_sk(skb);
@@ -39,7 +43,7 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
if (sk)
sock_put(sk);
- hdev->req_skb = skb_get(skb);
+ hdev->req_rsp = skb_get(skb);
}
wake_up_interruptible(&hdev->req_wait_q);
@@ -187,8 +191,8 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
hdev->req_status = 0;
hdev->req_result = 0;
- skb = hdev->req_skb;
- hdev->req_skb = NULL;
+ skb = hdev->req_rsp;
+ hdev->req_rsp = NULL;
bt_dev_dbg(hdev, "end: err %d", err);
@@ -566,6 +570,17 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
}
+static void _hci_cmd_sync_cancel_entry(struct hci_dev *hdev,
+ struct hci_cmd_sync_work_entry *entry,
+ int err)
+{
+ if (entry->destroy)
+ entry->destroy(hdev, entry->data, err);
+
+ list_del(&entry->list);
+ kfree(entry);
+}
+
void hci_cmd_sync_clear(struct hci_dev *hdev)
{
struct hci_cmd_sync_work_entry *entry, *tmp;
@@ -574,17 +589,12 @@ void hci_cmd_sync_clear(struct hci_dev *hdev)
cancel_work_sync(&hdev->reenable_adv_work);
mutex_lock(&hdev->cmd_sync_work_lock);
- list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) {
- if (entry->destroy)
- entry->destroy(hdev, entry->data, -ECANCELED);
-
- list_del(&entry->list);
- kfree(entry);
- }
+ list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list)
+ _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED);
mutex_unlock(&hdev->cmd_sync_work_lock);
}
-void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err)
+void hci_cmd_sync_cancel(struct hci_dev *hdev, int err)
{
bt_dev_dbg(hdev, "err 0x%2.2x", err);
@@ -592,15 +602,17 @@ void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err)
hdev->req_result = err;
hdev->req_status = HCI_REQ_CANCELED;
- cancel_delayed_work_sync(&hdev->cmd_timer);
- cancel_delayed_work_sync(&hdev->ncmd_timer);
- atomic_set(&hdev->cmd_cnt, 1);
-
- wake_up_interruptible(&hdev->req_wait_q);
+ queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work);
}
}
+EXPORT_SYMBOL(hci_cmd_sync_cancel);
-void hci_cmd_sync_cancel(struct hci_dev *hdev, int err)
+/* Cancel ongoing command request synchronously:
+ *
+ * - Set result and mark status to HCI_REQ_CANCELED
+ * - Wakeup command sync thread
+ */
+void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err)
{
bt_dev_dbg(hdev, "err 0x%2.2x", err);
@@ -608,10 +620,10 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err)
hdev->req_result = err;
hdev->req_status = HCI_REQ_CANCELED;
- queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work);
+ wake_up_interruptible(&hdev->req_wait_q);
}
}
-EXPORT_SYMBOL(hci_cmd_sync_cancel);
+EXPORT_SYMBOL(hci_cmd_sync_cancel_sync);
/* Submit HCI command to be run in as cmd_sync_work:
*
@@ -667,6 +679,115 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
}
EXPORT_SYMBOL(hci_cmd_sync_queue);
+static struct hci_cmd_sync_work_entry *
+_hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy)
+{
+ struct hci_cmd_sync_work_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) {
+ if (func && entry->func != func)
+ continue;
+
+ if (data && entry->data != data)
+ continue;
+
+ if (destroy && entry->destroy != destroy)
+ continue;
+
+ return entry;
+ }
+
+ return NULL;
+}
+
+/* Queue HCI command entry once:
+ *
+ * - Lookup if an entry already exist and only if it doesn't creates a new entry
+ * and queue it.
+ */
+int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy)
+{
+ if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy))
+ return 0;
+
+ return hci_cmd_sync_queue(hdev, func, data, destroy);
+}
+EXPORT_SYMBOL(hci_cmd_sync_queue_once);
+
+/* Lookup HCI command entry:
+ *
+ * - Return first entry that matches by function callback or data or
+ * destroy callback.
+ */
+struct hci_cmd_sync_work_entry *
+hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy)
+{
+ struct hci_cmd_sync_work_entry *entry;
+
+ mutex_lock(&hdev->cmd_sync_work_lock);
+ entry = _hci_cmd_sync_lookup_entry(hdev, func, data, destroy);
+ mutex_unlock(&hdev->cmd_sync_work_lock);
+
+ return entry;
+}
+EXPORT_SYMBOL(hci_cmd_sync_lookup_entry);
+
+/* Cancel HCI command entry */
+void hci_cmd_sync_cancel_entry(struct hci_dev *hdev,
+ struct hci_cmd_sync_work_entry *entry)
+{
+ mutex_lock(&hdev->cmd_sync_work_lock);
+ _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED);
+ mutex_unlock(&hdev->cmd_sync_work_lock);
+}
+EXPORT_SYMBOL(hci_cmd_sync_cancel_entry);
+
+/* Dequeue one HCI command entry:
+ *
+ * - Lookup and cancel first entry that matches.
+ */
+bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev,
+ hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy)
+{
+ struct hci_cmd_sync_work_entry *entry;
+
+ entry = hci_cmd_sync_lookup_entry(hdev, func, data, destroy);
+ if (!entry)
+ return false;
+
+ hci_cmd_sync_cancel_entry(hdev, entry);
+
+ return true;
+}
+EXPORT_SYMBOL(hci_cmd_sync_dequeue_once);
+
+/* Dequeue HCI command entry:
+ *
+ * - Lookup and cancel any entry that matches by function callback or data or
+ * destroy callback.
+ */
+bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy)
+{
+ struct hci_cmd_sync_work_entry *entry;
+ bool ret = false;
+
+ mutex_lock(&hdev->cmd_sync_work_lock);
+ while ((entry = _hci_cmd_sync_lookup_entry(hdev, func, data,
+ destroy))) {
+ _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED);
+ ret = true;
+ }
+ mutex_unlock(&hdev->cmd_sync_work_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(hci_cmd_sync_dequeue);
+
int hci_update_eir_sync(struct hci_dev *hdev)
{
struct hci_cp_write_eir cp;
@@ -2206,8 +2327,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
/* During suspend, only wakeable devices can be in acceptlist */
if (hdev->suspended &&
- !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) {
+ hci_le_del_accept_list_sync(hdev, &params->addr,
+ params->addr_type);
return 0;
+ }
/* Select filter policy to accept all advertising */
if (*num_entries >= hdev->le_accept_list_size)
@@ -2442,6 +2566,16 @@ static struct conn_params *conn_params_copy(struct list_head *list, size_t *n)
return p;
}
+/* Clear LE Accept List */
+static int hci_le_clear_accept_list_sync(struct hci_dev *hdev)
+{
+ if (!(hdev->commands[26] & 0x80))
+ return 0;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL,
+ HCI_CMD_TIMEOUT);
+}
+
/* Device must not be scanning when updating the accept list.
*
* Update is done using the following sequence:
@@ -2490,6 +2624,31 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
goto done;
}
+ /* Force address filtering if PA Sync is in progress */
+ if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
+ struct hci_cp_le_pa_create_sync *sent;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_PA_CREATE_SYNC);
+ if (sent) {
+ struct conn_params pa;
+
+ memset(&pa, 0, sizeof(pa));
+
+ bacpy(&pa.addr, &sent->addr);
+ pa.addr_type = sent->addr_type;
+
+ /* Clear first since there could be addresses left
+ * behind.
+ */
+ hci_le_clear_accept_list_sync(hdev);
+
+ num_entries = 1;
+ err = hci_le_add_accept_list_sync(hdev, &pa,
+ &num_entries);
+ goto done;
+ }
+ }
+
/* Go through the current accept list programmed into the
* controller one by one and check if that address is connected or is
* still in the list of pending connections or list of devices to
@@ -2599,6 +2758,14 @@ done:
return filter_policy;
}
+static void hci_le_scan_phy_params(struct hci_cp_le_scan_phy_params *cp,
+ u8 type, u16 interval, u16 window)
+{
+ cp->type = type;
+ cp->interval = cpu_to_le16(interval);
+ cp->window = cpu_to_le16(window);
+}
+
static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
u16 interval, u16 window,
u8 own_addr_type, u8 filter_policy)
@@ -2606,7 +2773,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
struct hci_cp_le_set_ext_scan_params *cp;
struct hci_cp_le_scan_phy_params *phy;
u8 data[sizeof(*cp) + sizeof(*phy) * 2];
- u8 num_phy = 0;
+ u8 num_phy = 0x00;
cp = (void *)data;
phy = (void *)cp->data;
@@ -2616,28 +2783,64 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
cp->own_addr_type = own_addr_type;
cp->filter_policy = filter_policy;
+ /* Check if PA Sync is in progress then select the PHY based on the
+ * hci_conn.iso_qos.
+ */
+ if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
+ struct hci_cp_le_add_to_accept_list *sent;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST);
+ if (sent) {
+ struct hci_conn *conn;
+
+ conn = hci_conn_hash_lookup_ba(hdev, ISO_LINK,
+ &sent->bdaddr);
+ if (conn) {
+ struct bt_iso_qos *qos = &conn->iso_qos;
+
+ if (qos->bcast.in.phy & BT_ISO_PHY_1M ||
+ qos->bcast.in.phy & BT_ISO_PHY_2M) {
+ cp->scanning_phys |= LE_SCAN_PHY_1M;
+ hci_le_scan_phy_params(phy, type,
+ interval,
+ window);
+ num_phy++;
+ phy++;
+ }
+
+ if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
+ cp->scanning_phys |= LE_SCAN_PHY_CODED;
+ hci_le_scan_phy_params(phy, type,
+ interval,
+ window);
+ num_phy++;
+ phy++;
+ }
+
+ if (num_phy)
+ goto done;
+ }
+ }
+ }
+
if (scan_1m(hdev) || scan_2m(hdev)) {
cp->scanning_phys |= LE_SCAN_PHY_1M;
-
- phy->type = type;
- phy->interval = cpu_to_le16(interval);
- phy->window = cpu_to_le16(window);
-
+ hci_le_scan_phy_params(phy, type, interval, window);
num_phy++;
phy++;
}
if (scan_coded(hdev)) {
cp->scanning_phys |= LE_SCAN_PHY_CODED;
-
- phy->type = type;
- phy->interval = cpu_to_le16(interval);
- phy->window = cpu_to_le16(window);
-
+ hci_le_scan_phy_params(phy, type, interval, window);
num_phy++;
phy++;
}
+done:
+ if (!num_phy)
+ return -EINVAL;
+
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_PARAMS,
sizeof(*cp) + sizeof(*phy) * num_phy,
data, HCI_CMD_TIMEOUT);
@@ -2876,7 +3079,8 @@ int hci_update_passive_scan(struct hci_dev *hdev)
hci_dev_test_flag(hdev, HCI_UNREGISTER))
return 0;
- return hci_cmd_sync_queue(hdev, update_passive_scan_sync, NULL, NULL);
+ return hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL,
+ NULL);
}
int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val)
@@ -4095,16 +4299,6 @@ static int hci_le_read_accept_list_size_sync(struct hci_dev *hdev)
0, NULL, HCI_CMD_TIMEOUT);
}
-/* Clear LE Accept List */
-static int hci_le_clear_accept_list_sync(struct hci_dev *hdev)
-{
- if (!(hdev->commands[26] & 0x80))
- return 0;
-
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL,
- HCI_CMD_TIMEOUT);
-}
-
/* Read LE Resolving List Size */
static int hci_le_read_resolv_list_size_sync(struct hci_dev *hdev)
{
@@ -4831,6 +5025,11 @@ int hci_dev_open_sync(struct hci_dev *hdev)
hdev->sent_cmd = NULL;
}
+ if (hdev->req_skb) {
+ kfree_skb(hdev->req_skb);
+ hdev->req_skb = NULL;
+ }
+
clear_bit(HCI_RUNNING, &hdev->flags);
hci_sock_dev_event(hdev, HCI_DEV_CLOSE);
@@ -4991,6 +5190,12 @@ int hci_dev_close_sync(struct hci_dev *hdev)
hdev->sent_cmd = NULL;
}
+ /* Drop last request */
+ if (hdev->req_skb) {
+ kfree_skb(hdev->req_skb);
+ hdev->req_skb = NULL;
+ }
+
clear_bit(HCI_RUNNING, &hdev->flags);
hci_sock_dev_event(hdev, HCI_DEV_CLOSE);
@@ -5400,27 +5605,33 @@ static int hci_power_off_sync(struct hci_dev *hdev)
if (!test_bit(HCI_UP, &hdev->flags))
return 0;
+ hci_dev_set_flag(hdev, HCI_POWERING_DOWN);
+
if (test_bit(HCI_ISCAN, &hdev->flags) ||
test_bit(HCI_PSCAN, &hdev->flags)) {
err = hci_write_scan_enable_sync(hdev, 0x00);
if (err)
- return err;
+ goto out;
}
err = hci_clear_adv_sync(hdev, NULL, false);
if (err)
- return err;
+ goto out;
err = hci_stop_discovery_sync(hdev);
if (err)
- return err;
+ goto out;
/* Terminated due to Power Off */
err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF);
if (err)
- return err;
+ goto out;
+
+ err = hci_dev_close_sync(hdev);
- return hci_dev_close_sync(hdev);
+out:
+ hci_dev_clear_flag(hdev, HCI_POWERING_DOWN);
+ return err;
}
int hci_set_powered_sync(struct hci_dev *hdev, u8 val)
@@ -5559,7 +5770,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length)
bt_dev_dbg(hdev, "");
- if (hci_dev_test_flag(hdev, HCI_INQUIRY))
+ if (test_bit(HCI_INQUIRY, &hdev->flags))
return 0;
hci_dev_lock(hdev);
@@ -6158,12 +6369,21 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev,
conn->conn_timeout, NULL);
}
-int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
+static int hci_le_create_conn_sync(struct hci_dev *hdev, void *data)
{
struct hci_cp_le_create_conn cp;
struct hci_conn_params *params;
u8 own_addr_type;
int err;
+ struct hci_conn *conn = data;
+
+ if (!hci_conn_valid(hdev, conn))
+ return -ECANCELED;
+
+ bt_dev_dbg(hdev, "conn %p", conn);
+
+ clear_bit(HCI_CONN_SCANNING, &conn->flags);
+ conn->state = BT_CONNECT;
/* If requested to connect as peripheral use directed advertising */
if (conn->role == HCI_ROLE_SLAVE) {
@@ -6481,3 +6701,125 @@ int hci_update_adv_data(struct hci_dev *hdev, u8 instance)
return hci_cmd_sync_queue(hdev, _update_adv_data_sync,
UINT_PTR(instance), NULL);
}
+
+static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data)
+{
+ struct hci_conn *conn = data;
+ struct inquiry_entry *ie;
+ struct hci_cp_create_conn cp;
+ int err;
+
+ if (!hci_conn_valid(hdev, conn))
+ return -ECANCELED;
+
+ /* Many controllers disallow HCI Create Connection while it is doing
+ * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create
+ * Connection. This may cause the MGMT discovering state to become false
+ * without user space's request but it is okay since the MGMT Discovery
+ * APIs do not promise that discovery should be done forever. Instead,
+ * the user space monitors the status of MGMT discovering and it may
+ * request for discovery again when this flag becomes false.
+ */
+ if (test_bit(HCI_INQUIRY, &hdev->flags)) {
+ err = __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY_CANCEL, 0,
+ NULL, HCI_CMD_TIMEOUT);
+ if (err)
+ bt_dev_warn(hdev, "Failed to cancel inquiry %d", err);
+ }
+
+ conn->state = BT_CONNECT;
+ conn->out = true;
+ conn->role = HCI_ROLE_MASTER;
+
+ conn->attempt++;
+
+ conn->link_policy = hdev->link_policy;
+
+ memset(&cp, 0, sizeof(cp));
+ bacpy(&cp.bdaddr, &conn->dst);
+ cp.pscan_rep_mode = 0x02;
+
+ ie = hci_inquiry_cache_lookup(hdev, &conn->dst);
+ if (ie) {
+ if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
+ cp.pscan_rep_mode = ie->data.pscan_rep_mode;
+ cp.pscan_mode = ie->data.pscan_mode;
+ cp.clock_offset = ie->data.clock_offset |
+ cpu_to_le16(0x8000);
+ }
+
+ memcpy(conn->dev_class, ie->data.dev_class, 3);
+ }
+
+ cp.pkt_type = cpu_to_le16(conn->pkt_type);
+ if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER))
+ cp.role_switch = 0x01;
+ else
+ cp.role_switch = 0x00;
+
+ return __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN,
+ sizeof(cp), &cp,
+ HCI_EV_CONN_COMPLETE,
+ conn->conn_timeout, NULL);
+}
+
+int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn)
+{
+ return hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn,
+ NULL);
+}
+
+static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct hci_conn *conn = data;
+
+ bt_dev_dbg(hdev, "err %d", err);
+
+ if (err == -ECANCELED)
+ return;
+
+ hci_dev_lock(hdev);
+
+ if (!hci_conn_valid(hdev, conn))
+ goto done;
+
+ if (!err) {
+ hci_connect_le_scan_cleanup(conn, 0x00);
+ goto done;
+ }
+
+ /* Check if connection is still pending */
+ if (conn != hci_lookup_le_connect(hdev))
+ goto done;
+
+ /* Flush to make sure we send create conn cancel command if needed */
+ flush_delayed_work(&conn->le_conn_timeout);
+ hci_conn_failed(conn, bt_status(err));
+
+done:
+ hci_dev_unlock(hdev);
+}
+
+int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn)
+{
+ return hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn,
+ create_le_conn_complete);
+}
+
+int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn)
+{
+ if (conn->state != BT_OPEN)
+ return -EINVAL;
+
+ switch (conn->type) {
+ case ACL_LINK:
+ return !hci_cmd_sync_dequeue_once(hdev,
+ hci_acl_create_conn_sync,
+ conn, NULL);
+ case LE_LINK:
+ return !hci_cmd_sync_dequeue_once(hdev, hci_le_create_conn_sync,
+ conn, create_le_conn_complete);
+ }
+
+ return -ENOENT;
+}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 04f6572d35f1..c8793e57f4b5 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -3,7 +3,7 @@
* BlueZ - Bluetooth protocol stack for Linux
*
* Copyright (C) 2022 Intel Corporation
- * Copyright 2023 NXP
+ * Copyright 2023-2024 NXP
*/
#include <linux/module.h>
@@ -690,11 +690,8 @@ static void iso_sock_cleanup_listen(struct sock *parent)
iso_sock_kill(sk);
}
- /* If listening socket stands for a PA sync connection,
- * properly disconnect the hcon and socket.
- */
- if (iso_pi(parent)->conn && iso_pi(parent)->conn->hcon &&
- test_bit(HCI_CONN_PA_SYNC, &iso_pi(parent)->conn->hcon->flags)) {
+ /* If listening socket has a hcon, properly disconnect it */
+ if (iso_pi(parent)->conn && iso_pi(parent)->conn->hcon) {
iso_sock_disconn(parent);
return;
}
@@ -837,10 +834,10 @@ static struct bt_iso_qos default_qos = {
.bcode = {0x00},
.options = 0x00,
.skip = 0x0000,
- .sync_timeout = 0x4000,
+ .sync_timeout = BT_ISO_SYNC_TIMEOUT,
.sync_cte_type = 0x00,
.mse = 0x00,
- .timeout = 0x4000,
+ .timeout = BT_ISO_SYNC_TIMEOUT,
},
};
@@ -1076,6 +1073,8 @@ static int iso_listen_bis(struct sock *sk)
{
struct hci_dev *hdev;
int err = 0;
+ struct iso_conn *conn;
+ struct hci_conn *hcon;
BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &iso_pi(sk)->src,
&iso_pi(sk)->dst, iso_pi(sk)->bc_sid);
@@ -1096,18 +1095,40 @@ static int iso_listen_bis(struct sock *sk)
if (!hdev)
return -EHOSTUNREACH;
+ hci_dev_lock(hdev);
+
/* Fail if user set invalid QoS */
if (iso_pi(sk)->qos_user_set && !check_bcast_qos(&iso_pi(sk)->qos)) {
iso_pi(sk)->qos = default_qos;
- return -EINVAL;
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ hcon = hci_pa_create_sync(hdev, &iso_pi(sk)->dst,
+ le_addr_type(iso_pi(sk)->dst_type),
+ iso_pi(sk)->bc_sid, &iso_pi(sk)->qos);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto unlock;
}
- err = hci_pa_create_sync(hdev, &iso_pi(sk)->dst,
- le_addr_type(iso_pi(sk)->dst_type),
- iso_pi(sk)->bc_sid, &iso_pi(sk)->qos);
+ conn = iso_conn_add(hcon);
+ if (!conn) {
+ hci_conn_drop(hcon);
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ err = iso_chan_add(conn, sk, NULL);
+ if (err) {
+ hci_conn_drop(hcon);
+ goto unlock;
+ }
hci_dev_put(hdev);
+unlock:
+ hci_dev_unlock(hdev);
return err;
}
@@ -1889,7 +1910,6 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
struct hci_evt_le_big_info_adv_report *ev2;
struct hci_ev_le_per_adv_report *ev3;
struct sock *sk;
- int lm = 0;
bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr);
@@ -1933,7 +1953,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (sk && test_bit(BT_SK_PA_SYNC_TERM,
&iso_pi(sk)->flags))
- return lm;
+ return 0;
}
if (sk) {
@@ -1961,16 +1981,58 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT);
if (ev3) {
- size_t base_len = ev3->length;
+ size_t base_len = 0;
u8 *base;
+ struct hci_conn *hcon;
sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
iso_match_sync_handle_pa_report, ev3);
- base = eir_get_service_data(ev3->data, ev3->length,
- EIR_BAA_SERVICE_UUID, &base_len);
- if (base && sk && base_len <= sizeof(iso_pi(sk)->base)) {
+ if (!sk)
+ goto done;
+
+ hcon = iso_pi(sk)->conn->hcon;
+ if (!hcon)
+ goto done;
+
+ if (ev3->data_status == LE_PA_DATA_TRUNCATED) {
+ /* The controller was unable to retrieve PA data. */
+ memset(hcon->le_per_adv_data, 0,
+ HCI_MAX_PER_AD_TOT_LEN);
+ hcon->le_per_adv_data_len = 0;
+ hcon->le_per_adv_data_offset = 0;
+ goto done;
+ }
+
+ if (hcon->le_per_adv_data_offset + ev3->length >
+ HCI_MAX_PER_AD_TOT_LEN)
+ goto done;
+
+ memcpy(hcon->le_per_adv_data + hcon->le_per_adv_data_offset,
+ ev3->data, ev3->length);
+ hcon->le_per_adv_data_offset += ev3->length;
+
+ if (ev3->data_status == LE_PA_DATA_COMPLETE) {
+ /* All PA data has been received. */
+ hcon->le_per_adv_data_len =
+ hcon->le_per_adv_data_offset;
+ hcon->le_per_adv_data_offset = 0;
+
+ /* Extract BASE */
+ base = eir_get_service_data(hcon->le_per_adv_data,
+ hcon->le_per_adv_data_len,
+ EIR_BAA_SERVICE_UUID,
+ &base_len);
+
+ if (!base || base_len > BASE_MAX_LENGTH)
+ goto done;
+
memcpy(iso_pi(sk)->base, base, base_len);
iso_pi(sk)->base_len = base_len;
+ } else {
+ /* This is a PA data fragment. Keep pa_data_len set to 0
+ * until all data has been reassembled.
+ */
+ hcon->le_per_adv_data_len = 0;
}
} else {
sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL);
@@ -1978,16 +2040,14 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
done:
if (!sk)
- return lm;
-
- lm |= HCI_LM_ACCEPT;
+ return 0;
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))
*flags |= HCI_PROTO_DEFER;
sock_put(sk);
- return lm;
+ return HCI_LM_ACCEPT;
}
static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 60298975d5c4..467b242d8be0 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -39,8 +39,6 @@
#include <net/bluetooth/l2cap.h>
#include "smp.h"
-#include "a2mp.h"
-#include "amp.h"
#define LE_FLOWCTL_MAX_CREDITS 65535
@@ -167,24 +165,6 @@ static struct l2cap_chan *__l2cap_get_chan_by_ident(struct l2cap_conn *conn,
return NULL;
}
-static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn,
- u8 ident)
-{
- struct l2cap_chan *c;
-
- mutex_lock(&conn->chan_lock);
- c = __l2cap_get_chan_by_ident(conn, ident);
- if (c) {
- /* Only lock if chan reference is not 0 */
- c = l2cap_chan_hold_unless_zero(c);
- if (c)
- l2cap_chan_lock(c);
- }
- mutex_unlock(&conn->chan_lock);
-
- return c;
-}
-
static struct l2cap_chan *__l2cap_global_chan_by_addr(__le16 psm, bdaddr_t *src,
u8 src_type)
{
@@ -651,7 +631,6 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
chan->ops->teardown(chan, err);
if (conn) {
- struct amp_mgr *mgr = conn->hcon->amp_mgr;
/* Delete from channel list */
list_del(&chan->list);
@@ -666,16 +645,6 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
if (chan->chan_type != L2CAP_CHAN_FIXED ||
test_bit(FLAG_HOLD_HCI_CONN, &chan->flags))
hci_conn_drop(conn->hcon);
-
- if (mgr && mgr->bredr_chan == chan)
- mgr->bredr_chan = NULL;
- }
-
- if (chan->hs_hchan) {
- struct hci_chan *hs_hchan = chan->hs_hchan;
-
- BT_DBG("chan %p disconnect hs_hchan %p", chan, hs_hchan);
- amp_disconnect_logical_link(hs_hchan);
}
if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state))
@@ -977,12 +946,6 @@ static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len,
hci_send_acl(conn->hchan, skb, flags);
}
-static bool __chan_is_moving(struct l2cap_chan *chan)
-{
- return chan->move_state != L2CAP_MOVE_STABLE &&
- chan->move_state != L2CAP_MOVE_WAIT_PREPARE;
-}
-
static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb)
{
struct hci_conn *hcon = chan->conn->hcon;
@@ -991,15 +954,6 @@ static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb)
BT_DBG("chan %p, skb %p len %d priority %u", chan, skb, skb->len,
skb->priority);
- if (chan->hs_hcon && !__chan_is_moving(chan)) {
- if (chan->hs_hchan)
- hci_send_acl(chan->hs_hchan, skb, ACL_COMPLETE);
- else
- kfree_skb(skb);
-
- return;
- }
-
/* Use NO_FLUSH for LE links (where this is the only option) or
* if the BR/EDR link supports it and flushing has not been
* explicitly requested (through FLAG_FLUSHABLE).
@@ -1180,9 +1134,6 @@ static void l2cap_send_sframe(struct l2cap_chan *chan,
if (!control->sframe)
return;
- if (__chan_is_moving(chan))
- return;
-
if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state) &&
!control->poll)
control->final = 1;
@@ -1237,40 +1188,6 @@ static inline int __l2cap_no_conn_pending(struct l2cap_chan *chan)
return !test_bit(CONF_CONNECT_PEND, &chan->conf_state);
}
-static bool __amp_capable(struct l2cap_chan *chan)
-{
- struct l2cap_conn *conn = chan->conn;
- struct hci_dev *hdev;
- bool amp_available = false;
-
- if (!(conn->local_fixed_chan & L2CAP_FC_A2MP))
- return false;
-
- if (!(conn->remote_fixed_chan & L2CAP_FC_A2MP))
- return false;
-
- read_lock(&hci_dev_list_lock);
- list_for_each_entry(hdev, &hci_dev_list, list) {
- if (hdev->amp_type != AMP_TYPE_BREDR &&
- test_bit(HCI_UP, &hdev->flags)) {
- amp_available = true;
- break;
- }
- }
- read_unlock(&hci_dev_list_lock);
-
- if (chan->chan_policy == BT_CHANNEL_POLICY_AMP_PREFERRED)
- return amp_available;
-
- return false;
-}
-
-static bool l2cap_check_efs(struct l2cap_chan *chan)
-{
- /* Check EFS parameters */
- return true;
-}
-
void l2cap_send_conn_req(struct l2cap_chan *chan)
{
struct l2cap_conn *conn = chan->conn;
@@ -1286,76 +1203,6 @@ void l2cap_send_conn_req(struct l2cap_chan *chan)
l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_REQ, sizeof(req), &req);
}
-static void l2cap_send_create_chan_req(struct l2cap_chan *chan, u8 amp_id)
-{
- struct l2cap_create_chan_req req;
- req.scid = cpu_to_le16(chan->scid);
- req.psm = chan->psm;
- req.amp_id = amp_id;
-
- chan->ident = l2cap_get_ident(chan->conn);
-
- l2cap_send_cmd(chan->conn, chan->ident, L2CAP_CREATE_CHAN_REQ,
- sizeof(req), &req);
-}
-
-static void l2cap_move_setup(struct l2cap_chan *chan)
-{
- struct sk_buff *skb;
-
- BT_DBG("chan %p", chan);
-
- if (chan->mode != L2CAP_MODE_ERTM)
- return;
-
- __clear_retrans_timer(chan);
- __clear_monitor_timer(chan);
- __clear_ack_timer(chan);
-
- chan->retry_count = 0;
- skb_queue_walk(&chan->tx_q, skb) {
- if (bt_cb(skb)->l2cap.retries)
- bt_cb(skb)->l2cap.retries = 1;
- else
- break;
- }
-
- chan->expected_tx_seq = chan->buffer_seq;
-
- clear_bit(CONN_REJ_ACT, &chan->conn_state);
- clear_bit(CONN_SREJ_ACT, &chan->conn_state);
- l2cap_seq_list_clear(&chan->retrans_list);
- l2cap_seq_list_clear(&chan->srej_list);
- skb_queue_purge(&chan->srej_q);
-
- chan->tx_state = L2CAP_TX_STATE_XMIT;
- chan->rx_state = L2CAP_RX_STATE_MOVE;
-
- set_bit(CONN_REMOTE_BUSY, &chan->conn_state);
-}
-
-static void l2cap_move_done(struct l2cap_chan *chan)
-{
- u8 move_role = chan->move_role;
- BT_DBG("chan %p", chan);
-
- chan->move_state = L2CAP_MOVE_STABLE;
- chan->move_role = L2CAP_MOVE_ROLE_NONE;
-
- if (chan->mode != L2CAP_MODE_ERTM)
- return;
-
- switch (move_role) {
- case L2CAP_MOVE_ROLE_INITIATOR:
- l2cap_tx(chan, NULL, NULL, L2CAP_EV_EXPLICIT_POLL);
- chan->rx_state = L2CAP_RX_STATE_WAIT_F;
- break;
- case L2CAP_MOVE_ROLE_RESPONDER:
- chan->rx_state = L2CAP_RX_STATE_WAIT_P;
- break;
- }
-}
-
static void l2cap_chan_ready(struct l2cap_chan *chan)
{
/* The channel may have already been flagged as connected in
@@ -1505,10 +1352,7 @@ static void l2cap_le_start(struct l2cap_chan *chan)
static void l2cap_start_connection(struct l2cap_chan *chan)
{
- if (__amp_capable(chan)) {
- BT_DBG("chan %p AMP capable: discover AMPs", chan);
- a2mp_discover_amp(chan);
- } else if (chan->conn->hcon->type == LE_LINK) {
+ if (chan->conn->hcon->type == LE_LINK) {
l2cap_le_start(chan);
} else {
l2cap_send_conn_req(chan);
@@ -1611,11 +1455,6 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err)
__clear_ack_timer(chan);
}
- if (chan->scid == L2CAP_CID_A2MP) {
- l2cap_state_change(chan, BT_DISCONN);
- return;
- }
-
req.dcid = cpu_to_le16(chan->dcid);
req.scid = cpu_to_le16(chan->scid);
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_DISCONN_REQ,
@@ -1754,11 +1593,6 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
l2cap_chan_lock(chan);
- if (chan->scid == L2CAP_CID_A2MP) {
- l2cap_chan_unlock(chan);
- continue;
- }
-
if (hcon->type == LE_LINK) {
l2cap_le_start(chan);
} else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
@@ -2067,9 +1901,6 @@ static void l2cap_streaming_send(struct l2cap_chan *chan,
BT_DBG("chan %p, skbs %p", chan, skbs);
- if (__chan_is_moving(chan))
- return;
-
skb_queue_splice_tail_init(skbs, &chan->tx_q);
while (!skb_queue_empty(&chan->tx_q)) {
@@ -2112,9 +1943,6 @@ static int l2cap_ertm_send(struct l2cap_chan *chan)
if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state))
return 0;
- if (__chan_is_moving(chan))
- return 0;
-
while (chan->tx_send_head &&
chan->unacked_frames < chan->remote_tx_win &&
chan->tx_state == L2CAP_TX_STATE_XMIT) {
@@ -2180,9 +2008,6 @@ static void l2cap_ertm_resend(struct l2cap_chan *chan)
if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state))
return;
- if (__chan_is_moving(chan))
- return;
-
while (chan->retrans_list.head != L2CAP_SEQ_LIST_CLEAR) {
seq = l2cap_seq_list_pop(&chan->retrans_list);
@@ -2522,8 +2347,7 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan,
pdu_len = chan->conn->mtu;
/* Constrain PDU size for BR/EDR connections */
- if (!chan->hs_hcon)
- pdu_len = min_t(size_t, pdu_len, L2CAP_BREDR_MAX_PAYLOAD);
+ pdu_len = min_t(size_t, pdu_len, L2CAP_BREDR_MAX_PAYLOAD);
/* Adjust for largest possible L2CAP overhead. */
if (chan->fcs)
@@ -3287,11 +3111,6 @@ int l2cap_ertm_init(struct l2cap_chan *chan)
skb_queue_head_init(&chan->tx_q);
- chan->local_amp_id = AMP_ID_BREDR;
- chan->move_id = AMP_ID_BREDR;
- chan->move_state = L2CAP_MOVE_STABLE;
- chan->move_role = L2CAP_MOVE_ROLE_NONE;
-
if (chan->mode != L2CAP_MODE_ERTM)
return 0;
@@ -3326,52 +3145,19 @@ static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
static inline bool __l2cap_ews_supported(struct l2cap_conn *conn)
{
- return ((conn->local_fixed_chan & L2CAP_FC_A2MP) &&
- (conn->feat_mask & L2CAP_FEAT_EXT_WINDOW));
+ return (conn->feat_mask & L2CAP_FEAT_EXT_WINDOW);
}
static inline bool __l2cap_efs_supported(struct l2cap_conn *conn)
{
- return ((conn->local_fixed_chan & L2CAP_FC_A2MP) &&
- (conn->feat_mask & L2CAP_FEAT_EXT_FLOW));
+ return (conn->feat_mask & L2CAP_FEAT_EXT_FLOW);
}
static void __l2cap_set_ertm_timeouts(struct l2cap_chan *chan,
struct l2cap_conf_rfc *rfc)
{
- if (chan->local_amp_id != AMP_ID_BREDR && chan->hs_hcon) {
- u64 ertm_to = chan->hs_hcon->hdev->amp_be_flush_to;
-
- /* Class 1 devices have must have ERTM timeouts
- * exceeding the Link Supervision Timeout. The
- * default Link Supervision Timeout for AMP
- * controllers is 10 seconds.
- *
- * Class 1 devices use 0xffffffff for their
- * best-effort flush timeout, so the clamping logic
- * will result in a timeout that meets the above
- * requirement. ERTM timeouts are 16-bit values, so
- * the maximum timeout is 65.535 seconds.
- */
-
- /* Convert timeout to milliseconds and round */
- ertm_to = DIV_ROUND_UP_ULL(ertm_to, 1000);
-
- /* This is the recommended formula for class 2 devices
- * that start ERTM timers when packets are sent to the
- * controller.
- */
- ertm_to = 3 * ertm_to + 500;
-
- if (ertm_to > 0xffff)
- ertm_to = 0xffff;
-
- rfc->retrans_timeout = cpu_to_le16((u16) ertm_to);
- rfc->monitor_timeout = rfc->retrans_timeout;
- } else {
- rfc->retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO);
- rfc->monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO);
- }
+ rfc->retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO);
+ rfc->monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO);
}
static inline void l2cap_txwin_setup(struct l2cap_chan *chan)
@@ -3623,13 +3409,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
case L2CAP_CONF_EWS:
if (olen != 2)
break;
- if (!(chan->conn->local_fixed_chan & L2CAP_FC_A2MP))
- return -ECONNREFUSED;
- set_bit(FLAG_EXT_CTRL, &chan->flags);
- set_bit(CONF_EWS_RECV, &chan->conf_state);
- chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW;
- chan->remote_tx_win = val;
- break;
+ return -ECONNREFUSED;
default:
if (hint)
@@ -4027,11 +3807,7 @@ void __l2cap_connect_rsp_defer(struct l2cap_chan *chan)
rsp.dcid = cpu_to_le16(chan->scid);
rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
-
- if (chan->hs_hcon)
- rsp_code = L2CAP_CREATE_CHAN_RSP;
- else
- rsp_code = L2CAP_CONN_RSP;
+ rsp_code = L2CAP_CONN_RSP;
BT_DBG("chan %p rsp_code %u", chan, rsp_code);
@@ -4190,7 +3966,6 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
chan->dst_type = bdaddr_dst_type(conn->hcon);
chan->psm = psm;
chan->dcid = scid;
- chan->local_amp_id = amp_id;
__l2cap_chan_add(conn, chan);
@@ -4516,10 +4291,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
/* check compatibility */
/* Send rsp for BR/EDR channel */
- if (!chan->hs_hcon)
- l2cap_send_efs_conf_rsp(chan, rsp, cmd->ident, flags);
- else
- chan->ident = cmd->ident;
+ l2cap_send_efs_conf_rsp(chan, rsp, cmd->ident, flags);
}
unlock:
@@ -4571,15 +4343,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
goto done;
}
- if (!chan->hs_hcon) {
- l2cap_send_efs_conf_rsp(chan, buf, cmd->ident,
- 0);
- } else {
- if (l2cap_check_efs(chan)) {
- amp_create_logical_link(chan);
- chan->ident = cmd->ident;
- }
- }
+ l2cap_send_efs_conf_rsp(chan, buf, cmd->ident, 0);
}
goto done;
@@ -4750,9 +4514,6 @@ static inline int l2cap_information_req(struct l2cap_conn *conn,
if (!disable_ertm)
feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING
| L2CAP_FEAT_FCS;
- if (conn->local_fixed_chan & L2CAP_FC_A2MP)
- feat_mask |= L2CAP_FEAT_EXT_FLOW
- | L2CAP_FEAT_EXT_WINDOW;
put_unaligned_le32(feat_mask, rsp->data);
l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(buf),
@@ -4841,751 +4602,6 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn,
return 0;
}
-static int l2cap_create_channel_req(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd,
- u16 cmd_len, void *data)
-{
- struct l2cap_create_chan_req *req = data;
- struct l2cap_create_chan_rsp rsp;
- struct l2cap_chan *chan;
- struct hci_dev *hdev;
- u16 psm, scid;
-
- if (cmd_len != sizeof(*req))
- return -EPROTO;
-
- if (!(conn->local_fixed_chan & L2CAP_FC_A2MP))
- return -EINVAL;
-
- psm = le16_to_cpu(req->psm);
- scid = le16_to_cpu(req->scid);
-
- BT_DBG("psm 0x%2.2x, scid 0x%4.4x, amp_id %d", psm, scid, req->amp_id);
-
- /* For controller id 0 make BR/EDR connection */
- if (req->amp_id == AMP_ID_BREDR) {
- l2cap_connect(conn, cmd, data, L2CAP_CREATE_CHAN_RSP,
- req->amp_id);
- return 0;
- }
-
- /* Validate AMP controller id */
- hdev = hci_dev_get(req->amp_id);
- if (!hdev)
- goto error;
-
- if (hdev->dev_type != HCI_AMP || !test_bit(HCI_UP, &hdev->flags)) {
- hci_dev_put(hdev);
- goto error;
- }
-
- chan = l2cap_connect(conn, cmd, data, L2CAP_CREATE_CHAN_RSP,
- req->amp_id);
- if (chan) {
- struct amp_mgr *mgr = conn->hcon->amp_mgr;
- struct hci_conn *hs_hcon;
-
- hs_hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK,
- &conn->hcon->dst);
- if (!hs_hcon) {
- hci_dev_put(hdev);
- cmd_reject_invalid_cid(conn, cmd->ident, chan->scid,
- chan->dcid);
- return 0;
- }
-
- BT_DBG("mgr %p bredr_chan %p hs_hcon %p", mgr, chan, hs_hcon);
-
- mgr->bredr_chan = chan;
- chan->hs_hcon = hs_hcon;
- chan->fcs = L2CAP_FCS_NONE;
- conn->mtu = hdev->block_mtu;
- }
-
- hci_dev_put(hdev);
-
- return 0;
-
-error:
- rsp.dcid = 0;
- rsp.scid = cpu_to_le16(scid);
- rsp.result = cpu_to_le16(L2CAP_CR_BAD_AMP);
- rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
-
- l2cap_send_cmd(conn, cmd->ident, L2CAP_CREATE_CHAN_RSP,
- sizeof(rsp), &rsp);
-
- return 0;
-}
-
-static void l2cap_send_move_chan_req(struct l2cap_chan *chan, u8 dest_amp_id)
-{
- struct l2cap_move_chan_req req;
- u8 ident;
-
- BT_DBG("chan %p, dest_amp_id %d", chan, dest_amp_id);
-
- ident = l2cap_get_ident(chan->conn);
- chan->ident = ident;
-
- req.icid = cpu_to_le16(chan->scid);
- req.dest_amp_id = dest_amp_id;
-
- l2cap_send_cmd(chan->conn, ident, L2CAP_MOVE_CHAN_REQ, sizeof(req),
- &req);
-
- __set_chan_timer(chan, L2CAP_MOVE_TIMEOUT);
-}
-
-static void l2cap_send_move_chan_rsp(struct l2cap_chan *chan, u16 result)
-{
- struct l2cap_move_chan_rsp rsp;
-
- BT_DBG("chan %p, result 0x%4.4x", chan, result);
-
- rsp.icid = cpu_to_le16(chan->dcid);
- rsp.result = cpu_to_le16(result);
-
- l2cap_send_cmd(chan->conn, chan->ident, L2CAP_MOVE_CHAN_RSP,
- sizeof(rsp), &rsp);
-}
-
-static void l2cap_send_move_chan_cfm(struct l2cap_chan *chan, u16 result)
-{
- struct l2cap_move_chan_cfm cfm;
-
- BT_DBG("chan %p, result 0x%4.4x", chan, result);
-
- chan->ident = l2cap_get_ident(chan->conn);
-
- cfm.icid = cpu_to_le16(chan->scid);
- cfm.result = cpu_to_le16(result);
-
- l2cap_send_cmd(chan->conn, chan->ident, L2CAP_MOVE_CHAN_CFM,
- sizeof(cfm), &cfm);
-
- __set_chan_timer(chan, L2CAP_MOVE_TIMEOUT);
-}
-
-static void l2cap_send_move_chan_cfm_icid(struct l2cap_conn *conn, u16 icid)
-{
- struct l2cap_move_chan_cfm cfm;
-
- BT_DBG("conn %p, icid 0x%4.4x", conn, icid);
-
- cfm.icid = cpu_to_le16(icid);
- cfm.result = cpu_to_le16(L2CAP_MC_UNCONFIRMED);
-
- l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_MOVE_CHAN_CFM,
- sizeof(cfm), &cfm);
-}
-
-static void l2cap_send_move_chan_cfm_rsp(struct l2cap_conn *conn, u8 ident,
- u16 icid)
-{
- struct l2cap_move_chan_cfm_rsp rsp;
-
- BT_DBG("icid 0x%4.4x", icid);
-
- rsp.icid = cpu_to_le16(icid);
- l2cap_send_cmd(conn, ident, L2CAP_MOVE_CHAN_CFM_RSP, sizeof(rsp), &rsp);
-}
-
-static void __release_logical_link(struct l2cap_chan *chan)
-{
- chan->hs_hchan = NULL;
- chan->hs_hcon = NULL;
-
- /* Placeholder - release the logical link */
-}
-
-static void l2cap_logical_fail(struct l2cap_chan *chan)
-{
- /* Logical link setup failed */
- if (chan->state != BT_CONNECTED) {
- /* Create channel failure, disconnect */
- l2cap_send_disconn_req(chan, ECONNRESET);
- return;
- }
-
- switch (chan->move_role) {
- case L2CAP_MOVE_ROLE_RESPONDER:
- l2cap_move_done(chan);
- l2cap_send_move_chan_rsp(chan, L2CAP_MR_NOT_SUPP);
- break;
- case L2CAP_MOVE_ROLE_INITIATOR:
- if (chan->move_state == L2CAP_MOVE_WAIT_LOGICAL_COMP ||
- chan->move_state == L2CAP_MOVE_WAIT_LOGICAL_CFM) {
- /* Remote has only sent pending or
- * success responses, clean up
- */
- l2cap_move_done(chan);
- }
-
- /* Other amp move states imply that the move
- * has already aborted
- */
- l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
- break;
- }
-}
-
-static void l2cap_logical_finish_create(struct l2cap_chan *chan,
- struct hci_chan *hchan)
-{
- struct l2cap_conf_rsp rsp;
-
- chan->hs_hchan = hchan;
- chan->hs_hcon->l2cap_data = chan->conn;
-
- l2cap_send_efs_conf_rsp(chan, &rsp, chan->ident, 0);
-
- if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) {
- int err;
-
- set_default_fcs(chan);
-
- err = l2cap_ertm_init(chan);
- if (err < 0)
- l2cap_send_disconn_req(chan, -err);
- else
- l2cap_chan_ready(chan);
- }
-}
-
-static void l2cap_logical_finish_move(struct l2cap_chan *chan,
- struct hci_chan *hchan)
-{
- chan->hs_hcon = hchan->conn;
- chan->hs_hcon->l2cap_data = chan->conn;
-
- BT_DBG("move_state %d", chan->move_state);
-
- switch (chan->move_state) {
- case L2CAP_MOVE_WAIT_LOGICAL_COMP:
- /* Move confirm will be sent after a success
- * response is received
- */
- chan->move_state = L2CAP_MOVE_WAIT_RSP_SUCCESS;
- break;
- case L2CAP_MOVE_WAIT_LOGICAL_CFM:
- if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) {
- chan->move_state = L2CAP_MOVE_WAIT_LOCAL_BUSY;
- } else if (chan->move_role == L2CAP_MOVE_ROLE_INITIATOR) {
- chan->move_state = L2CAP_MOVE_WAIT_CONFIRM_RSP;
- l2cap_send_move_chan_cfm(chan, L2CAP_MC_CONFIRMED);
- } else if (chan->move_role == L2CAP_MOVE_ROLE_RESPONDER) {
- chan->move_state = L2CAP_MOVE_WAIT_CONFIRM;
- l2cap_send_move_chan_rsp(chan, L2CAP_MR_SUCCESS);
- }
- break;
- default:
- /* Move was not in expected state, free the channel */
- __release_logical_link(chan);
-
- chan->move_state = L2CAP_MOVE_STABLE;
- }
-}
-
-/* Call with chan locked */
-void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan,
- u8 status)
-{
- BT_DBG("chan %p, hchan %p, status %d", chan, hchan, status);
-
- if (status) {
- l2cap_logical_fail(chan);
- __release_logical_link(chan);
- return;
- }
-
- if (chan->state != BT_CONNECTED) {
- /* Ignore logical link if channel is on BR/EDR */
- if (chan->local_amp_id != AMP_ID_BREDR)
- l2cap_logical_finish_create(chan, hchan);
- } else {
- l2cap_logical_finish_move(chan, hchan);
- }
-}
-
-void l2cap_move_start(struct l2cap_chan *chan)
-{
- BT_DBG("chan %p", chan);
-
- if (chan->local_amp_id == AMP_ID_BREDR) {
- if (chan->chan_policy != BT_CHANNEL_POLICY_AMP_PREFERRED)
- return;
- chan->move_role = L2CAP_MOVE_ROLE_INITIATOR;
- chan->move_state = L2CAP_MOVE_WAIT_PREPARE;
- /* Placeholder - start physical link setup */
- } else {
- chan->move_role = L2CAP_MOVE_ROLE_INITIATOR;
- chan->move_state = L2CAP_MOVE_WAIT_RSP_SUCCESS;
- chan->move_id = 0;
- l2cap_move_setup(chan);
- l2cap_send_move_chan_req(chan, 0);
- }
-}
-
-static void l2cap_do_create(struct l2cap_chan *chan, int result,
- u8 local_amp_id, u8 remote_amp_id)
-{
- BT_DBG("chan %p state %s %u -> %u", chan, state_to_string(chan->state),
- local_amp_id, remote_amp_id);
-
- chan->fcs = L2CAP_FCS_NONE;
-
- /* Outgoing channel on AMP */
- if (chan->state == BT_CONNECT) {
- if (result == L2CAP_CR_SUCCESS) {
- chan->local_amp_id = local_amp_id;
- l2cap_send_create_chan_req(chan, remote_amp_id);
- } else {
- /* Revert to BR/EDR connect */
- l2cap_send_conn_req(chan);
- }
-
- return;
- }
-
- /* Incoming channel on AMP */
- if (__l2cap_no_conn_pending(chan)) {
- struct l2cap_conn_rsp rsp;
- char buf[128];
- rsp.scid = cpu_to_le16(chan->dcid);
- rsp.dcid = cpu_to_le16(chan->scid);
-
- if (result == L2CAP_CR_SUCCESS) {
- /* Send successful response */
- rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
- rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
- } else {
- /* Send negative response */
- rsp.result = cpu_to_le16(L2CAP_CR_NO_MEM);
- rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
- }
-
- l2cap_send_cmd(chan->conn, chan->ident, L2CAP_CREATE_CHAN_RSP,
- sizeof(rsp), &rsp);
-
- if (result == L2CAP_CR_SUCCESS) {
- l2cap_state_change(chan, BT_CONFIG);
- set_bit(CONF_REQ_SENT, &chan->conf_state);
- l2cap_send_cmd(chan->conn, l2cap_get_ident(chan->conn),
- L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
- chan->num_conf_req++;
- }
- }
-}
-
-static void l2cap_do_move_initiate(struct l2cap_chan *chan, u8 local_amp_id,
- u8 remote_amp_id)
-{
- l2cap_move_setup(chan);
- chan->move_id = local_amp_id;
- chan->move_state = L2CAP_MOVE_WAIT_RSP;
-
- l2cap_send_move_chan_req(chan, remote_amp_id);
-}
-
-static void l2cap_do_move_respond(struct l2cap_chan *chan, int result)
-{
- struct hci_chan *hchan = NULL;
-
- /* Placeholder - get hci_chan for logical link */
-
- if (hchan) {
- if (hchan->state == BT_CONNECTED) {
- /* Logical link is ready to go */
- chan->hs_hcon = hchan->conn;
- chan->hs_hcon->l2cap_data = chan->conn;
- chan->move_state = L2CAP_MOVE_WAIT_CONFIRM;
- l2cap_send_move_chan_rsp(chan, L2CAP_MR_SUCCESS);
-
- l2cap_logical_cfm(chan, hchan, L2CAP_MR_SUCCESS);
- } else {
- /* Wait for logical link to be ready */
- chan->move_state = L2CAP_MOVE_WAIT_LOGICAL_CFM;
- }
- } else {
- /* Logical link not available */
- l2cap_send_move_chan_rsp(chan, L2CAP_MR_NOT_ALLOWED);
- }
-}
-
-static void l2cap_do_move_cancel(struct l2cap_chan *chan, int result)
-{
- if (chan->move_role == L2CAP_MOVE_ROLE_RESPONDER) {
- u8 rsp_result;
- if (result == -EINVAL)
- rsp_result = L2CAP_MR_BAD_ID;
- else
- rsp_result = L2CAP_MR_NOT_ALLOWED;
-
- l2cap_send_move_chan_rsp(chan, rsp_result);
- }
-
- chan->move_role = L2CAP_MOVE_ROLE_NONE;
- chan->move_state = L2CAP_MOVE_STABLE;
-
- /* Restart data transmission */
- l2cap_ertm_send(chan);
-}
-
-/* Invoke with locked chan */
-void __l2cap_physical_cfm(struct l2cap_chan *chan, int result)
-{
- u8 local_amp_id = chan->local_amp_id;
- u8 remote_amp_id = chan->remote_amp_id;
-
- BT_DBG("chan %p, result %d, local_amp_id %d, remote_amp_id %d",
- chan, result, local_amp_id, remote_amp_id);
-
- if (chan->state == BT_DISCONN || chan->state == BT_CLOSED)
- return;
-
- if (chan->state != BT_CONNECTED) {
- l2cap_do_create(chan, result, local_amp_id, remote_amp_id);
- } else if (result != L2CAP_MR_SUCCESS) {
- l2cap_do_move_cancel(chan, result);
- } else {
- switch (chan->move_role) {
- case L2CAP_MOVE_ROLE_INITIATOR:
- l2cap_do_move_initiate(chan, local_amp_id,
- remote_amp_id);
- break;
- case L2CAP_MOVE_ROLE_RESPONDER:
- l2cap_do_move_respond(chan, result);
- break;
- default:
- l2cap_do_move_cancel(chan, result);
- break;
- }
- }
-}
-
-static inline int l2cap_move_channel_req(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd,
- u16 cmd_len, void *data)
-{
- struct l2cap_move_chan_req *req = data;
- struct l2cap_move_chan_rsp rsp;
- struct l2cap_chan *chan;
- u16 icid = 0;
- u16 result = L2CAP_MR_NOT_ALLOWED;
-
- if (cmd_len != sizeof(*req))
- return -EPROTO;
-
- icid = le16_to_cpu(req->icid);
-
- BT_DBG("icid 0x%4.4x, dest_amp_id %d", icid, req->dest_amp_id);
-
- if (!(conn->local_fixed_chan & L2CAP_FC_A2MP))
- return -EINVAL;
-
- chan = l2cap_get_chan_by_dcid(conn, icid);
- if (!chan) {
- rsp.icid = cpu_to_le16(icid);
- rsp.result = cpu_to_le16(L2CAP_MR_NOT_ALLOWED);
- l2cap_send_cmd(conn, cmd->ident, L2CAP_MOVE_CHAN_RSP,
- sizeof(rsp), &rsp);
- return 0;
- }
-
- chan->ident = cmd->ident;
-
- if (chan->scid < L2CAP_CID_DYN_START ||
- chan->chan_policy == BT_CHANNEL_POLICY_BREDR_ONLY ||
- (chan->mode != L2CAP_MODE_ERTM &&
- chan->mode != L2CAP_MODE_STREAMING)) {
- result = L2CAP_MR_NOT_ALLOWED;
- goto send_move_response;
- }
-
- if (chan->local_amp_id == req->dest_amp_id) {
- result = L2CAP_MR_SAME_ID;
- goto send_move_response;
- }
-
- if (req->dest_amp_id != AMP_ID_BREDR) {
- struct hci_dev *hdev;
- hdev = hci_dev_get(req->dest_amp_id);
- if (!hdev || hdev->dev_type != HCI_AMP ||
- !test_bit(HCI_UP, &hdev->flags)) {
- if (hdev)
- hci_dev_put(hdev);
-
- result = L2CAP_MR_BAD_ID;
- goto send_move_response;
- }
- hci_dev_put(hdev);
- }
-
- /* Detect a move collision. Only send a collision response
- * if this side has "lost", otherwise proceed with the move.
- * The winner has the larger bd_addr.
- */
- if ((__chan_is_moving(chan) ||
- chan->move_role != L2CAP_MOVE_ROLE_NONE) &&
- bacmp(&conn->hcon->src, &conn->hcon->dst) > 0) {
- result = L2CAP_MR_COLLISION;
- goto send_move_response;
- }
-
- chan->move_role = L2CAP_MOVE_ROLE_RESPONDER;
- l2cap_move_setup(chan);
- chan->move_id = req->dest_amp_id;
-
- if (req->dest_amp_id == AMP_ID_BREDR) {
- /* Moving to BR/EDR */
- if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) {
- chan->move_state = L2CAP_MOVE_WAIT_LOCAL_BUSY;
- result = L2CAP_MR_PEND;
- } else {
- chan->move_state = L2CAP_MOVE_WAIT_CONFIRM;
- result = L2CAP_MR_SUCCESS;
- }
- } else {
- chan->move_state = L2CAP_MOVE_WAIT_PREPARE;
- /* Placeholder - uncomment when amp functions are available */
- /*amp_accept_physical(chan, req->dest_amp_id);*/
- result = L2CAP_MR_PEND;
- }
-
-send_move_response:
- l2cap_send_move_chan_rsp(chan, result);
-
- l2cap_chan_unlock(chan);
- l2cap_chan_put(chan);
-
- return 0;
-}
-
-static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result)
-{
- struct l2cap_chan *chan;
- struct hci_chan *hchan = NULL;
-
- chan = l2cap_get_chan_by_scid(conn, icid);
- if (!chan) {
- l2cap_send_move_chan_cfm_icid(conn, icid);
- return;
- }
-
- __clear_chan_timer(chan);
- if (result == L2CAP_MR_PEND)
- __set_chan_timer(chan, L2CAP_MOVE_ERTX_TIMEOUT);
-
- switch (chan->move_state) {
- case L2CAP_MOVE_WAIT_LOGICAL_COMP:
- /* Move confirm will be sent when logical link
- * is complete.
- */
- chan->move_state = L2CAP_MOVE_WAIT_LOGICAL_CFM;
- break;
- case L2CAP_MOVE_WAIT_RSP_SUCCESS:
- if (result == L2CAP_MR_PEND) {
- break;
- } else if (test_bit(CONN_LOCAL_BUSY,
- &chan->conn_state)) {
- chan->move_state = L2CAP_MOVE_WAIT_LOCAL_BUSY;
- } else {
- /* Logical link is up or moving to BR/EDR,
- * proceed with move
- */
- chan->move_state = L2CAP_MOVE_WAIT_CONFIRM_RSP;
- l2cap_send_move_chan_cfm(chan, L2CAP_MC_CONFIRMED);
- }
- break;
- case L2CAP_MOVE_WAIT_RSP:
- /* Moving to AMP */
- if (result == L2CAP_MR_SUCCESS) {
- /* Remote is ready, send confirm immediately
- * after logical link is ready
- */
- chan->move_state = L2CAP_MOVE_WAIT_LOGICAL_CFM;
- } else {
- /* Both logical link and move success
- * are required to confirm
- */
- chan->move_state = L2CAP_MOVE_WAIT_LOGICAL_COMP;
- }
-
- /* Placeholder - get hci_chan for logical link */
- if (!hchan) {
- /* Logical link not available */
- l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
- break;
- }
-
- /* If the logical link is not yet connected, do not
- * send confirmation.
- */
- if (hchan->state != BT_CONNECTED)
- break;
-
- /* Logical link is already ready to go */
-
- chan->hs_hcon = hchan->conn;
- chan->hs_hcon->l2cap_data = chan->conn;
-
- if (result == L2CAP_MR_SUCCESS) {
- /* Can confirm now */
- l2cap_send_move_chan_cfm(chan, L2CAP_MC_CONFIRMED);
- } else {
- /* Now only need move success
- * to confirm
- */
- chan->move_state = L2CAP_MOVE_WAIT_RSP_SUCCESS;
- }
-
- l2cap_logical_cfm(chan, hchan, L2CAP_MR_SUCCESS);
- break;
- default:
- /* Any other amp move state means the move failed. */
- chan->move_id = chan->local_amp_id;
- l2cap_move_done(chan);
- l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
- }
-
- l2cap_chan_unlock(chan);
- l2cap_chan_put(chan);
-}
-
-static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
- u16 result)
-{
- struct l2cap_chan *chan;
-
- chan = l2cap_get_chan_by_ident(conn, ident);
- if (!chan) {
- /* Could not locate channel, icid is best guess */
- l2cap_send_move_chan_cfm_icid(conn, icid);
- return;
- }
-
- __clear_chan_timer(chan);
-
- if (chan->move_role == L2CAP_MOVE_ROLE_INITIATOR) {
- if (result == L2CAP_MR_COLLISION) {
- chan->move_role = L2CAP_MOVE_ROLE_RESPONDER;
- } else {
- /* Cleanup - cancel move */
- chan->move_id = chan->local_amp_id;
- l2cap_move_done(chan);
- }
- }
-
- l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
-
- l2cap_chan_unlock(chan);
- l2cap_chan_put(chan);
-}
-
-static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd,
- u16 cmd_len, void *data)
-{
- struct l2cap_move_chan_rsp *rsp = data;
- u16 icid, result;
-
- if (cmd_len != sizeof(*rsp))
- return -EPROTO;
-
- icid = le16_to_cpu(rsp->icid);
- result = le16_to_cpu(rsp->result);
-
- BT_DBG("icid 0x%4.4x, result 0x%4.4x", icid, result);
-
- if (result == L2CAP_MR_SUCCESS || result == L2CAP_MR_PEND)
- l2cap_move_continue(conn, icid, result);
- else
- l2cap_move_fail(conn, cmd->ident, icid, result);
-
- return 0;
-}
-
-static int l2cap_move_channel_confirm(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd,
- u16 cmd_len, void *data)
-{
- struct l2cap_move_chan_cfm *cfm = data;
- struct l2cap_chan *chan;
- u16 icid, result;
-
- if (cmd_len != sizeof(*cfm))
- return -EPROTO;
-
- icid = le16_to_cpu(cfm->icid);
- result = le16_to_cpu(cfm->result);
-
- BT_DBG("icid 0x%4.4x, result 0x%4.4x", icid, result);
-
- chan = l2cap_get_chan_by_dcid(conn, icid);
- if (!chan) {
- /* Spec requires a response even if the icid was not found */
- l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
- return 0;
- }
-
- if (chan->move_state == L2CAP_MOVE_WAIT_CONFIRM) {
- if (result == L2CAP_MC_CONFIRMED) {
- chan->local_amp_id = chan->move_id;
- if (chan->local_amp_id == AMP_ID_BREDR)
- __release_logical_link(chan);
- } else {
- chan->move_id = chan->local_amp_id;
- }
-
- l2cap_move_done(chan);
- }
-
- l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
-
- l2cap_chan_unlock(chan);
- l2cap_chan_put(chan);
-
- return 0;
-}
-
-static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd,
- u16 cmd_len, void *data)
-{
- struct l2cap_move_chan_cfm_rsp *rsp = data;
- struct l2cap_chan *chan;
- u16 icid;
-
- if (cmd_len != sizeof(*rsp))
- return -EPROTO;
-
- icid = le16_to_cpu(rsp->icid);
-
- BT_DBG("icid 0x%4.4x", icid);
-
- chan = l2cap_get_chan_by_scid(conn, icid);
- if (!chan)
- return 0;
-
- __clear_chan_timer(chan);
-
- if (chan->move_state == L2CAP_MOVE_WAIT_CONFIRM_RSP) {
- chan->local_amp_id = chan->move_id;
-
- if (chan->local_amp_id == AMP_ID_BREDR && chan->hs_hchan)
- __release_logical_link(chan);
-
- l2cap_move_done(chan);
- }
-
- l2cap_chan_unlock(chan);
- l2cap_chan_put(chan);
-
- return 0;
-}
-
static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
struct l2cap_cmd_hdr *cmd,
u16 cmd_len, u8 *data)
@@ -5613,7 +4629,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
memset(&rsp, 0, sizeof(rsp));
- err = hci_check_conn_params(min, max, latency, to_multiplier);
+ if (max > hcon->le_conn_max_interval) {
+ BT_DBG("requested connection interval exceeds current bounds.");
+ err = -EINVAL;
+ } else {
+ err = hci_check_conn_params(min, max, latency, to_multiplier);
+ }
+
if (err)
rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
else
@@ -5739,7 +4761,6 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
break;
case L2CAP_CONN_RSP:
- case L2CAP_CREATE_CHAN_RSP:
l2cap_connect_create_rsp(conn, cmd, cmd_len, data);
break;
@@ -5774,26 +4795,6 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
l2cap_information_rsp(conn, cmd, cmd_len, data);
break;
- case L2CAP_CREATE_CHAN_REQ:
- err = l2cap_create_channel_req(conn, cmd, cmd_len, data);
- break;
-
- case L2CAP_MOVE_CHAN_REQ:
- err = l2cap_move_channel_req(conn, cmd, cmd_len, data);
- break;
-
- case L2CAP_MOVE_CHAN_RSP:
- l2cap_move_channel_rsp(conn, cmd, cmd_len, data);
- break;
-
- case L2CAP_MOVE_CHAN_CFM:
- err = l2cap_move_channel_confirm(conn, cmd, cmd_len, data);
- break;
-
- case L2CAP_MOVE_CHAN_CFM_RSP:
- l2cap_move_channel_confirm_rsp(conn, cmd, cmd_len, data);
- break;
-
default:
BT_ERR("Unknown BR/EDR signaling command 0x%2.2x", cmd->code);
err = -EINVAL;
@@ -7045,8 +6046,8 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan,
if (control->final) {
clear_bit(CONN_REMOTE_BUSY, &chan->conn_state);
- if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state) &&
- !__chan_is_moving(chan)) {
+ if (!test_and_clear_bit(CONN_REJ_ACT,
+ &chan->conn_state)) {
control->final = 0;
l2cap_retransmit_all(chan, control);
}
@@ -7239,11 +6240,7 @@ static int l2cap_finish_move(struct l2cap_chan *chan)
BT_DBG("chan %p", chan);
chan->rx_state = L2CAP_RX_STATE_RECV;
-
- if (chan->hs_hcon)
- chan->conn->mtu = chan->hs_hcon->hdev->block_mtu;
- else
- chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu;
+ chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu;
return l2cap_resegment(chan);
}
@@ -7310,11 +6307,7 @@ static int l2cap_rx_state_wait_f(struct l2cap_chan *chan,
*/
chan->next_tx_seq = control->reqseq;
chan->unacked_frames = 0;
-
- if (chan->hs_hcon)
- chan->conn->mtu = chan->hs_hcon->hdev->block_mtu;
- else
- chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu;
+ chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu;
err = l2cap_resegment(chan);
@@ -7666,21 +6659,10 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid,
chan = l2cap_get_chan_by_scid(conn, cid);
if (!chan) {
- if (cid == L2CAP_CID_A2MP) {
- chan = a2mp_channel_create(conn, skb);
- if (!chan) {
- kfree_skb(skb);
- return;
- }
-
- l2cap_chan_hold(chan);
- l2cap_chan_lock(chan);
- } else {
- BT_DBG("unknown cid 0x%4.4x", cid);
- /* Drop packet and return */
- kfree_skb(skb);
- return;
- }
+ BT_DBG("unknown cid 0x%4.4x", cid);
+ /* Drop packet and return */
+ kfree_skb(skb);
+ return;
}
BT_DBG("chan %p, len %d", chan, skb->len);
@@ -7881,10 +6863,6 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
conn->local_fixed_chan = L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS;
- if (hcon->type == ACL_LINK &&
- hci_dev_test_flag(hcon->hdev, HCI_HS_ENABLED))
- conn->local_fixed_chan |= L2CAP_FC_A2MP;
-
if (hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED) &&
(bredr_sc_enabled(hcon->hdev) ||
hci_dev_test_flag(hcon->hdev, HCI_FORCE_BREDR_SMP)))
@@ -7947,7 +6925,7 @@ static void l2cap_chan_by_pid(struct l2cap_chan *chan, void *data)
}
int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
- bdaddr_t *dst, u8 dst_type)
+ bdaddr_t *dst, u8 dst_type, u16 timeout)
{
struct l2cap_conn *conn;
struct hci_conn *hcon;
@@ -8040,19 +7018,17 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
hcon = hci_connect_le(hdev, dst, dst_type, false,
- chan->sec_level,
- HCI_LE_CONN_TIMEOUT,
+ chan->sec_level, timeout,
HCI_ROLE_SLAVE);
else
hcon = hci_connect_le_scan(hdev, dst, dst_type,
- chan->sec_level,
- HCI_LE_CONN_TIMEOUT,
+ chan->sec_level, timeout,
CONN_REASON_L2CAP_CHAN);
} else {
u8 auth_type = l2cap_get_auth_type(chan);
hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type,
- CONN_REASON_L2CAP_CHAN);
+ CONN_REASON_L2CAP_CHAN, timeout);
}
if (IS_ERR(hcon)) {
@@ -8349,11 +7325,6 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
BT_DBG("chan %p scid 0x%4.4x state %s", chan, chan->scid,
state_to_string(chan->state));
- if (chan->scid == L2CAP_CID_A2MP) {
- l2cap_chan_unlock(chan);
- continue;
- }
-
if (!status && encrypt)
chan->sec_level = hcon->sec_level;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index e50d3d102078..4287aa6cc988 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -254,7 +254,8 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
chan->mode = L2CAP_MODE_LE_FLOWCTL;
err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid),
- &la.l2_bdaddr, la.l2_bdaddr_type);
+ &la.l2_bdaddr, la.l2_bdaddr_type,
+ sk->sk_sndtimeo);
if (err)
return err;
@@ -1027,23 +1028,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (opt > BT_CHANNEL_POLICY_AMP_PREFERRED) {
- err = -EINVAL;
- break;
- }
-
- if (chan->mode != L2CAP_MODE_ERTM &&
- chan->mode != L2CAP_MODE_STREAMING) {
- err = -EOPNOTSUPP;
- break;
- }
-
- chan->chan_policy = (u8) opt;
-
- if (sk->sk_state == BT_CONNECTED &&
- chan->move_role == L2CAP_MOVE_ROLE_NONE)
- l2cap_move_start(chan);
-
+ err = -EOPNOTSUPP;
break;
case BT_SNDMTU:
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index bb72ff6eb22f..32ed6e9245a3 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -835,8 +835,6 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (lmp_ssp_capable(hdev)) {
settings |= MGMT_SETTING_SSP;
- if (IS_ENABLED(CONFIG_BT_HS))
- settings |= MGMT_SETTING_HS;
}
if (lmp_sc_capable(hdev))
@@ -901,9 +899,6 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
settings |= MGMT_SETTING_SSP;
- if (hci_dev_test_flag(hdev, HCI_HS_ENABLED))
- settings |= MGMT_SETTING_HS;
-
if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
settings |= MGMT_SETTING_ADVERTISING;
@@ -1045,6 +1040,8 @@ static void rpa_expired(struct work_struct *work)
hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL);
}
+static int set_discoverable_sync(struct hci_dev *hdev, void *data);
+
static void discov_off(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
@@ -1063,7 +1060,7 @@ static void discov_off(struct work_struct *work)
hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
hdev->discov_timeout = 0;
- hci_update_discoverable(hdev);
+ hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL);
mgmt_new_settings(hdev);
@@ -1388,6 +1385,14 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
+ if (!cp->val) {
+ if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN)) {
+ err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED,
+ MGMT_STATUS_BUSY);
+ goto failed;
+ }
+ }
+
if (pending_find(MGMT_OP_SET_POWERED, hdev)) {
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED,
MGMT_STATUS_BUSY);
@@ -1407,7 +1412,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
/* Cancel potentially blocking sync operation before power off */
if (cp->val == 0x00) {
- __hci_cmd_sync_cancel(hdev, -EHOSTDOWN);
+ hci_cmd_sync_cancel_sync(hdev, -EHOSTDOWN);
err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd,
mgmt_set_powered_complete);
} else {
@@ -1702,8 +1707,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
new_settings(hdev, cmd->sk);
done:
- if (cmd)
- mgmt_pending_remove(cmd);
+ mgmt_pending_remove(cmd);
hci_dev_unlock(hdev);
}
@@ -1928,7 +1932,6 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
if (enable && hci_dev_test_and_clear_flag(hdev,
HCI_SSP_ENABLED)) {
- hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
new_settings(hdev, NULL);
}
@@ -1941,12 +1944,6 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED);
} else {
changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED);
-
- if (!changed)
- changed = hci_dev_test_and_clear_flag(hdev,
- HCI_HS_ENABLED);
- else
- hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
}
mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match);
@@ -2010,11 +2007,6 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
} else {
changed = hci_dev_test_and_clear_flag(hdev,
HCI_SSP_ENABLED);
- if (!changed)
- changed = hci_dev_test_and_clear_flag(hdev,
- HCI_HS_ENABLED);
- else
- hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
}
err = send_settings_rsp(sk, MGMT_OP_SET_SSP, hdev);
@@ -2060,63 +2052,10 @@ failed:
static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
{
- struct mgmt_mode *cp = data;
- bool changed;
- u8 status;
- int err;
-
bt_dev_dbg(hdev, "sock %p", sk);
- if (!IS_ENABLED(CONFIG_BT_HS))
- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
MGMT_STATUS_NOT_SUPPORTED);
-
- status = mgmt_bredr_support(hdev);
- if (status)
- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, status);
-
- if (!lmp_ssp_capable(hdev))
- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
- MGMT_STATUS_NOT_SUPPORTED);
-
- if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
- MGMT_STATUS_REJECTED);
-
- if (cp->val != 0x00 && cp->val != 0x01)
- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
- MGMT_STATUS_INVALID_PARAMS);
-
- hci_dev_lock(hdev);
-
- if (pending_find(MGMT_OP_SET_SSP, hdev)) {
- err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
- MGMT_STATUS_BUSY);
- goto unlock;
- }
-
- if (cp->val) {
- changed = !hci_dev_test_and_set_flag(hdev, HCI_HS_ENABLED);
- } else {
- if (hdev_is_powered(hdev)) {
- err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
- MGMT_STATUS_REJECTED);
- goto unlock;
- }
-
- changed = hci_dev_test_and_clear_flag(hdev, HCI_HS_ENABLED);
- }
-
- err = send_settings_rsp(sk, MGMT_OP_SET_HS, hdev);
- if (err < 0)
- goto unlock;
-
- if (changed)
- err = new_settings(hdev, sk);
-
-unlock:
- hci_dev_unlock(hdev);
- return err;
}
static void set_le_complete(struct hci_dev *hdev, void *data, int err)
@@ -3186,6 +3125,7 @@ failed:
static u8 link_to_bdaddr(u8 link_type, u8 addr_type)
{
switch (link_type) {
+ case ISO_LINK:
case LE_LINK:
switch (addr_type) {
case ADDR_LE_DEV_PUBLIC:
@@ -3503,7 +3443,8 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
if (cp->addr.type == BDADDR_BREDR) {
conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level,
- auth_type, CONN_REASON_PAIR_DEVICE);
+ auth_type, CONN_REASON_PAIR_DEVICE,
+ HCI_ACL_CONN_TIMEOUT);
} else {
u8 addr_type = le_addr_type(cp->addr.type);
struct hci_conn_params *p;
@@ -6764,7 +6705,6 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_dev_clear_flag(hdev, HCI_SSP_ENABLED);
hci_dev_clear_flag(hdev, HCI_LINK_SECURITY);
hci_dev_clear_flag(hdev, HCI_FAST_CONNECTABLE);
- hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
}
hci_dev_change_flag(hdev, HCI_BREDR_ENABLED);
@@ -8468,7 +8408,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
static u8 calculate_name_len(struct hci_dev *hdev)
{
- u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3];
+ u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 2]; /* len + type + name */
return eir_append_local_name(hdev, buf, 0);
}
@@ -8827,8 +8767,7 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data,
}
unlock:
- if (cmd)
- mgmt_pending_free(cmd);
+ mgmt_pending_free(cmd);
hci_dev_unlock(hdev);
}
@@ -9679,6 +9618,9 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
u16 eir_len = 0;
u32 flags = 0;
+ if (test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
+ return;
+
/* allocate buff for LE or BR/EDR adv */
if (conn->le_adv_data_len > 0)
skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED,
@@ -9746,6 +9688,9 @@ bool mgmt_powering_down(struct hci_dev *hdev)
struct mgmt_pending_cmd *cmd;
struct mgmt_mode *cp;
+ if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN))
+ return true;
+
cmd = pending_find(MGMT_OP_SET_POWERED, hdev);
if (!cmd)
return false;
@@ -9764,14 +9709,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
struct mgmt_ev_device_disconnected ev;
struct sock *sk = NULL;
- /* The connection is still in hci_conn_hash so test for 1
- * instead of 0 to know if this is the last one.
- */
- if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) {
- cancel_delayed_work(&hdev->power_off);
- queue_work(hdev->req_workqueue, &hdev->power_off.work);
- }
-
if (!mgmt_connected)
return;
@@ -9828,14 +9765,6 @@ void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
{
struct mgmt_ev_connect_failed ev;
- /* The connection is still in hci_conn_hash so test for 1
- * instead of 0 to know if this is the last one.
- */
- if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) {
- cancel_delayed_work(&hdev->power_off);
- queue_work(hdev->req_workqueue, &hdev->power_off.work);
- }
-
bacpy(&ev.addr.bdaddr, bdaddr);
ev.addr.type = link_to_bdaddr(link_type, addr_type);
ev.status = mgmt_status(status);
@@ -10069,6 +9998,9 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)
/* If this is a HCI command related to powering on the
* HCI dev don't send any mgmt signals.
*/
+ if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN))
+ return;
+
if (pending_find(MGMT_OP_SET_POWERED, hdev))
return;
}
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 630e3023273b..9612c5d1b13f 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -875,6 +875,7 @@ static int msft_add_address_filter_sync(struct hci_dev *hdev, void *data)
remove = true;
goto done;
}
+
cp->sub_opcode = MSFT_OP_LE_MONITOR_ADVERTISEMENT;
cp->rssi_high = address_filter->rssi_high;
cp->rssi_low = address_filter->rssi_low;
@@ -887,6 +888,8 @@ static int msft_add_address_filter_sync(struct hci_dev *hdev, void *data)
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, size, cp,
HCI_CMD_TIMEOUT);
+ kfree(cp);
+
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to enable address %pMR filter",
&address_filter->bdaddr);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 053ef8f25fae..1d34d8497033 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1941,7 +1941,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
/* Get data directly from socket receive queue without copying it. */
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
skb_orphan(skb);
- if (!skb_linearize(skb)) {
+ if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) {
s = rfcomm_recv_frame(s, skb);
if (!s)
break;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index c736186aba26..43daf965a01e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -264,7 +264,8 @@ static int sco_connect(struct sock *sk)
}
hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
- sco_pi(sk)->setting, &sco_pi(sk)->codec);
+ sco_pi(sk)->setting, &sco_pi(sk)->codec,
+ sk->sk_sndtimeo);
if (IS_ERR(hcon)) {
err = PTR_ERR(hcon);
goto unlock;
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 8906f7bdf4a9..de33dc1b0daa 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -7,7 +7,7 @@
#include <linux/bpf.h>
#include <linux/btf.h>
-extern struct bpf_struct_ops bpf_bpf_dummy_ops;
+static struct bpf_struct_ops bpf_bpf_dummy_ops;
/* A common type for test_N with return value in bpf_dummy_ops */
typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...);
@@ -22,6 +22,8 @@ struct bpf_dummy_ops_test_args {
struct bpf_dummy_ops_state state;
};
+static struct btf *bpf_dummy_ops_btf;
+
static struct bpf_dummy_ops_test_args *
dummy_ops_init_args(const union bpf_attr *kattr, unsigned int nr)
{
@@ -89,10 +91,17 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
struct bpf_tramp_link *link = NULL;
void *image = NULL;
unsigned int op_idx;
+ u32 image_off = 0;
int prog_ret;
+ s32 type_id;
int err;
- if (prog->aux->attach_btf_id != st_ops->type_id)
+ type_id = btf_find_by_name_kind(bpf_dummy_ops_btf,
+ bpf_bpf_dummy_ops.name,
+ BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ if (prog->aux->attach_btf_id != type_id)
return -EOPNOTSUPP;
func_proto = prog->aux->attach_func_proto;
@@ -106,12 +115,6 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
goto out;
}
- image = arch_alloc_bpf_trampoline(PAGE_SIZE);
- if (!image) {
- err = -ENOMEM;
- goto out;
- }
-
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
@@ -125,7 +128,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[op_idx],
&dummy_ops_test_ret_function,
- image, image + PAGE_SIZE);
+ &image, &image_off,
+ true);
if (err < 0)
goto out;
@@ -139,7 +143,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
err = -EFAULT;
out:
kfree(args);
- arch_free_bpf_trampoline(image, PAGE_SIZE);
+ bpf_struct_ops_image_free(image);
if (link)
bpf_link_put(&link->link);
kfree(tlinks);
@@ -148,6 +152,7 @@ out:
static int bpf_dummy_init(struct btf *btf)
{
+ bpf_dummy_ops_btf = btf;
return 0;
}
@@ -169,7 +174,7 @@ static int bpf_dummy_ops_check_member(const struct btf_type *t,
case offsetof(struct bpf_dummy_ops, test_sleepable):
break;
default:
- if (prog->aux->sleepable)
+ if (prog->sleepable)
return -EINVAL;
}
@@ -247,7 +252,7 @@ static struct bpf_dummy_ops __bpf_bpf_dummy_ops = {
.test_sleepable = bpf_dummy_test_sleepable,
};
-struct bpf_struct_ops bpf_bpf_dummy_ops = {
+static struct bpf_struct_ops bpf_bpf_dummy_ops = {
.verifier_ops = &bpf_dummy_verifier_ops,
.init = bpf_dummy_init,
.check_member = bpf_dummy_ops_check_member,
@@ -256,4 +261,11 @@ struct bpf_struct_ops bpf_bpf_dummy_ops = {
.unreg = bpf_dummy_unreg,
.name = "bpf_dummy_ops",
.cfi_stubs = &__bpf_bpf_dummy_ops,
+ .owner = THIS_MODULE,
};
+
+static int __init bpf_dummy_struct_ops_init(void)
+{
+ return register_bpf_struct_ops(&bpf_bpf_dummy_ops, bpf_dummy_ops);
+}
+late_initcall(bpf_dummy_struct_ops_init);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index dfd919374017..61efeadaff8d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -12,6 +12,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/sched/signal.h>
#include <net/bpf_sk_storage.h>
+#include <net/hotdata.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
@@ -254,7 +255,8 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
int i, n;
LIST_HEAD(list);
- n = kmem_cache_alloc_bulk(skbuff_cache, gfp, nframes, (void **)skbs);
+ n = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, nframes,
+ (void **)skbs);
if (unlikely(n == 0)) {
for (i = 0; i < nframes; i++)
xdp_return_frame(frames[i]);
@@ -617,21 +619,21 @@ CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor);
__bpf_kfunc_end_defs();
-BTF_SET8_START(bpf_test_modify_return_ids)
+BTF_KFUNCS_START(bpf_test_modify_return_ids)
BTF_ID_FLAGS(func, bpf_modify_return_test)
BTF_ID_FLAGS(func, bpf_modify_return_test2)
BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
-BTF_SET8_END(bpf_test_modify_return_ids)
+BTF_KFUNCS_END(bpf_test_modify_return_ids)
static const struct btf_kfunc_id_set bpf_test_modify_return_set = {
.owner = THIS_MODULE,
.set = &bpf_test_modify_return_ids,
};
-BTF_SET8_START(test_sk_check_kfunc_ids)
+BTF_KFUNCS_START(test_sk_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE)
-BTF_SET8_END(test_sk_check_kfunc_ids)
+BTF_KFUNCS_END(test_sk_check_kfunc_ids)
static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
u32 size, u32 headroom, u32 tailroom)
diff --git a/net/bridge/br.c b/net/bridge/br.c
index ac19b797dbec..2cab878e0a39 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -356,26 +356,21 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
clear_bit(opt, &br->options);
}
-static void __net_exit br_net_exit_batch(struct list_head *net_list)
+static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net_device *dev;
struct net *net;
- LIST_HEAD(list);
-
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
for_each_netdev(net, dev)
if (netif_is_bridge_master(dev))
- br_dev_delete(dev, &list);
-
- unregister_netdevice_many(&list);
-
- rtnl_unlock();
+ br_dev_delete(dev, dev_to_kill);
}
static struct pernet_operations br_net_ops = {
- .exit_batch = br_net_exit_batch,
+ .exit_batch_rtnl = br_net_exit_batch_rtnl,
};
static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 65cee0ad3c1b..c366ccc8b3db 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -108,38 +108,23 @@ out:
return NETDEV_TX_OK;
}
-static struct lock_class_key bridge_netdev_addr_lock_key;
-
-static void br_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
-}
-
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
err = br_fdb_hash_init(br);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
err = br_mdb_hash_init(br);
if (err) {
- free_percpu(dev->tstats);
br_fdb_hash_fini(br);
return err;
}
err = br_vlan_init(br);
if (err) {
- free_percpu(dev->tstats);
br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
return err;
@@ -147,14 +132,14 @@ static int br_dev_init(struct net_device *dev)
err = br_multicast_init_stats(br);
if (err) {
- free_percpu(dev->tstats);
br_vlan_flush(br);
br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
+ return err;
}
- br_set_lockdep_class(dev);
- return err;
+ netdev_lockdep_set_classes(dev);
+ return 0;
}
static void br_dev_uninit(struct net_device *dev)
@@ -166,7 +151,6 @@ static void br_dev_uninit(struct net_device *dev)
br_vlan_flush(br);
br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
- free_percpu(dev->tstats);
}
static int br_dev_open(struct net_device *dev)
@@ -481,7 +465,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fill_forward_path = br_fill_forward_path,
};
-static struct device_type br_type = {
+static const struct device_type br_type = {
.name = "bridge",
};
@@ -503,6 +487,7 @@ void br_dev_setup(struct net_device *dev)
dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
dev->vlan_features = COMMON_FEATURES;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
br->dev = dev;
spin_lock_init(&br->lock);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c622de5eccd0..c77591e63841 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -35,10 +35,7 @@ static struct kmem_cache *br_fdb_cache __read_mostly;
int __init br_fdb_init(void)
{
- br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
- sizeof(struct net_bridge_fdb_entry),
- 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ br_fdb_cache = KMEM_CACHE(net_bridge_fdb_entry, SLAB_HWCACHE_ALIGN);
if (!br_fdb_cache)
return -ENOMEM;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d7d021af1029..2d7b73242958 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1762,6 +1762,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t)
}
#endif
+static void br_multicast_query_delay_expired(struct timer_list *t)
+{
+}
+
static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx,
struct br_ip *ip,
struct sk_buff *skb)
@@ -3198,7 +3202,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx,
unsigned long max_delay)
{
if (!timer_pending(&query->timer))
- query->delay_time = jiffies + max_delay;
+ mod_timer(&query->delay_timer, jiffies + max_delay);
mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval);
}
@@ -4041,13 +4045,11 @@ void br_multicast_ctx_init(struct net_bridge *br,
brmctx->multicast_querier_interval = 255 * HZ;
brmctx->multicast_membership_interval = 260 * HZ;
- brmctx->ip4_other_query.delay_time = 0;
brmctx->ip4_querier.port_ifidx = 0;
seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
brmctx->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
brmctx->multicast_mld_version = 1;
- brmctx->ip6_other_query.delay_time = 0;
brmctx->ip6_querier.port_ifidx = 0;
seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
#endif
@@ -4056,6 +4058,8 @@ void br_multicast_ctx_init(struct net_bridge *br,
br_ip4_multicast_local_router_expired, 0);
timer_setup(&brmctx->ip4_other_query.timer,
br_ip4_multicast_querier_expired, 0);
+ timer_setup(&brmctx->ip4_other_query.delay_timer,
+ br_multicast_query_delay_expired, 0);
timer_setup(&brmctx->ip4_own_query.timer,
br_ip4_multicast_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
@@ -4063,6 +4067,8 @@ void br_multicast_ctx_init(struct net_bridge *br,
br_ip6_multicast_local_router_expired, 0);
timer_setup(&brmctx->ip6_other_query.timer,
br_ip6_multicast_querier_expired, 0);
+ timer_setup(&brmctx->ip6_other_query.delay_timer,
+ br_multicast_query_delay_expired, 0);
timer_setup(&brmctx->ip6_own_query.timer,
br_ip6_multicast_query_expired, 0);
#endif
@@ -4197,10 +4203,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx)
{
del_timer_sync(&brmctx->ip4_mc_router_timer);
del_timer_sync(&brmctx->ip4_other_query.timer);
+ del_timer_sync(&brmctx->ip4_other_query.delay_timer);
del_timer_sync(&brmctx->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
del_timer_sync(&brmctx->ip6_mc_router_timer);
del_timer_sync(&brmctx->ip6_other_query.timer);
+ del_timer_sync(&brmctx->ip6_other_query.delay_timer);
del_timer_sync(&brmctx->ip6_own_query.timer);
#endif
}
@@ -4643,13 +4651,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val)
max_delay = brmctx->multicast_query_response_interval;
if (!timer_pending(&brmctx->ip4_other_query.timer))
- brmctx->ip4_other_query.delay_time = jiffies + max_delay;
+ mod_timer(&brmctx->ip4_other_query.delay_timer,
+ jiffies + max_delay);
br_multicast_start_querier(brmctx, &brmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
if (!timer_pending(&brmctx->ip6_other_query.timer))
- brmctx->ip6_other_query.delay_time = jiffies + max_delay;
+ mod_timer(&brmctx->ip6_other_query.delay_timer,
+ jiffies + max_delay);
br_multicast_start_querier(brmctx, &brmctx->ip6_own_query);
#endif
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index ed1720890757..35e10c5a766d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -43,6 +43,10 @@
#include <linux/sysctl.h>
#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
static unsigned int brnf_net_id __read_mostly;
struct brnf_net {
@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
return NF_STOLEN;
}
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
+ * the same nf_conn entry, which will happen for multicast (broadcast)
+ * Frames on bridges.
+ *
+ * Example:
+ * macvlan0
+ * br0
+ * ethX ethY
+ *
+ * ethX (or Y) receives multicast or broadcast packet containing
+ * an IP packet, not yet in conntrack table.
+ *
+ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
+ * -> skb->_nfct now references a unconfirmed entry
+ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
+ * interface.
+ * 3. skb gets passed up the stack.
+ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
+ * and schedules a work queue to send them out on the lower devices.
+ *
+ * The clone skb->_nfct is not a copy, it is the same entry as the
+ * original skb. The macvlan rx handler then returns RX_HANDLER_PASS.
+ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
+ *
+ * The Macvlan broadcast worker and normal confirm path will race.
+ *
+ * This race will not happen if step 2 already confirmed a clone. In that
+ * case later steps perform skb_clone() with skb->_nfct already confirmed (in
+ * hash table). This works fine.
+ *
+ * But such confirmation won't happen when eb/ip/nftables rules dropped the
+ * packets before they reached the nf_confirm step in postrouting.
+ *
+ * Work around this problem by explicit confirmation of the entry at
+ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
+ * entry.
+ *
+ */
+static unsigned int br_nf_local_in(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conntrack *nfct = skb_nfct(skb);
+ const struct nf_ct_hook *ct_hook;
+ struct nf_conn *ct;
+ int ret;
+
+ if (!nfct || skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ if (likely(nf_ct_is_confirmed(ct)))
+ return NF_ACCEPT;
+
+ WARN_ON_ONCE(skb_shared(skb));
+ WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
+
+ /* We can't call nf_confirm here, it would create a dependency
+ * on nf_conntrack module.
+ */
+ ct_hook = rcu_dereference(nf_ct_hook);
+ if (!ct_hook) {
+ skb->_nfct = 0ul;
+ nf_conntrack_put(nfct);
+ return NF_ACCEPT;
+ }
+
+ nf_bridge_pull_encap_header(skb);
+ ret = ct_hook->confirm(skb);
+ switch (ret & NF_VERDICT_MASK) {
+ case NF_STOLEN:
+ return NF_STOLEN;
+ default:
+ nf_bridge_push_encap_header(skb);
+ break;
+ }
+
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
+
+ return ret;
+}
+#endif
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ {
+ .hook = br_nf_local_in,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_LOCAL_IN,
+ .priority = NF_BR_PRI_LAST,
+ },
+#endif
{
.hook = br_nf_forward,
.pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 5ad4abfcb7ba..2cf4fc756263 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -455,7 +455,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
u32 filter_mask, const struct net_device *dev,
bool getlink)
{
- u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
+ u8 operstate = netif_running(dev) ? READ_ONCE(dev->operstate) :
+ IF_OPER_DOWN;
struct nlattr *af = NULL;
struct net_bridge *br;
struct ifinfomsg *hdr;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b0a92c344722..86ea5e6689b5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -78,7 +78,7 @@ struct bridge_mcast_own_query {
/* other querier */
struct bridge_mcast_other_query {
struct timer_list timer;
- unsigned long delay_time;
+ struct timer_list delay_timer;
};
/* selected querier */
@@ -1159,7 +1159,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
own_querier_enabled = false;
}
- return time_is_before_jiffies(querier->delay_time) &&
+ return !timer_pending(&querier->delay_timer) &&
(own_querier_enabled || timer_pending(&querier->timer));
}
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index ee84e783e1df..7b41ee8740cb 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
}
static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
+ struct net_device *dev,
+ unsigned long action,
enum switchdev_obj_id id,
const struct net_bridge_mdb_entry *mp,
struct net_device *orig_dev)
{
- struct switchdev_obj_port_mdb *mdb;
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = id,
+ .orig_dev = orig_dev,
+ },
+ };
+ struct switchdev_obj_port_mdb *pmdb;
- mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
- if (!mdb)
- return -ENOMEM;
+ br_switchdev_mdb_populate(&mdb, mp);
+
+ if (action == SWITCHDEV_PORT_OBJ_ADD &&
+ switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) {
+ /* This event is already in the deferred queue of
+ * events, so this replay must be elided, lest the
+ * driver receives duplicate events for it. This can
+ * only happen when replaying additions, since
+ * modifications are always immediately visible in
+ * br->mdb_list, whereas actual event delivery may be
+ * delayed.
+ */
+ return 0;
+ }
- mdb->obj.id = id;
- mdb->obj.orig_dev = orig_dev;
- br_switchdev_mdb_populate(mdb, mp);
- list_add_tail(&mdb->obj.list, mdb_list);
+ pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC);
+ if (!pmdb)
+ return -ENOMEM;
+ list_add_tail(&pmdb->obj.list, mdb_list);
return 0;
}
@@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
- /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
- * because the write-side protection is br->multicast_lock. But we
- * need to emulate the [ blocking ] calling context of a regular
- * switchdev event, so since both br->multicast_lock and RCU read side
- * critical sections are atomic, we have no choice but to pick the RCU
- * read side lock, queue up all our events, leave the critical section
- * and notify switchdev from blocking context.
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
+ /* br_switchdev_mdb_queue_one() will take care to not queue a
+ * replay of an event that is already pending in the switchdev
+ * deferred queue. In order to safely determine that, there
+ * must be no new deferred MDB notifications enqueued for the
+ * duration of the MDB scan. Therefore, grab the write-side
+ * lock to avoid racing with any concurrent IGMP/MLD snooping.
*/
- rcu_read_lock();
+ spin_lock_bh(&br->multicast_lock);
- hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+ hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
struct net_bridge_port_group __rcu * const *pp;
const struct net_bridge_port_group *p;
if (mp->host_joined) {
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_HOST_MDB,
mp, br_dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
- for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+ for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p->key.port->dev != dev)
continue;
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_PORT_MDB,
mp, dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
}
- rcu_read_unlock();
-
- if (adding)
- action = SWITCHDEV_PORT_OBJ_ADD;
- else
- action = SWITCHDEV_PORT_OBJ_DEL;
+ spin_unlock_bh(&br->multicast_lock);
list_for_each_entry(obj, &mdb_list, list) {
err = br_switchdev_mdb_replay_one(nb, dev,
@@ -786,6 +804,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
+
+ /* Make sure that the device leaving this bridge has seen all
+ * relevant events before it is disassociated. In the normal
+ * case, when the device is directly attached to the bridge,
+ * this is covered by del_nbp(). If the association was indirect
+ * however, e.g. via a team or bond, and the device is leaving
+ * that intermediate device, then the bridge port remains in
+ * place.
+ */
+ switchdev_deferred_process();
}
/* Let the bridge know that this port is offloaded, so that it can assign a
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 15f44d026e75..9c2fffb827ab 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -841,7 +841,7 @@ void br_vlan_flush(struct net_bridge *br)
vg = br_vlan_group(br);
__vlan_flush(br, NULL, vg);
RCU_INIT_POINTER(br->vlgrp, NULL);
- synchronize_rcu();
+ synchronize_net();
__vlan_group_free(vg);
}
@@ -1372,7 +1372,7 @@ void nbp_vlan_flush(struct net_bridge_port *port)
vg = nbp_vlan_group(port);
__vlan_flush(port->br, port, vg);
RCU_INIT_POINTER(port->vlgrp, NULL);
- synchronize_rcu();
+ synchronize_net();
__vlan_group_free(vg);
}
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 7f304a19ac1b..104c0125e32e 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -39,6 +39,10 @@ config NF_CONNTRACK_BRIDGE
To compile it as a module, choose M here. If unsure, say N.
+# old sockopt interface and eval loop
+config BRIDGE_NF_EBTABLES_LEGACY
+ tristate
+
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
@@ -55,6 +59,7 @@ if BRIDGE_NF_EBTABLES
#
config BRIDGE_EBT_BROUTE
tristate "ebt: broute table support"
+ select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables broute table is used to define rules that decide between
bridging and routing frames, giving Linux the functionality of a
@@ -65,6 +70,7 @@ config BRIDGE_EBT_BROUTE
config BRIDGE_EBT_T_FILTER
tristate "ebt: filter table support"
+ select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables filter table is used to define frame filtering rules at
local input, forwarding and local output. See the man page for
@@ -74,6 +80,7 @@ config BRIDGE_EBT_T_FILTER
config BRIDGE_EBT_T_NAT
tristate "ebt: nat table support"
+ select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables nat table is used to define rules that alter the MAC
source address (MAC SNAT) or the MAC destination address (MAC DNAT).
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 1c9ce49ab651..b9a1303da977 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
-obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
+obj-$(CONFIG_BRIDGE_NF_EBTABLES_LEGACY) += ebtables.o
# tables
obj-$(CONFIG_BRIDGE_EBT_BROUTE) += ebtable_broute.o
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index abb090f94ed2..6f877e31709b 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
return nf_conntrack_in(skb, &bridge_state);
}
+static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ if (skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ /* nf_conntrack_confirm() cannot handle concurrent clones,
+ * this happens for broad/multicast frames with e.g. macvlan on top
+ * of the bridge device.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+ return NF_ACCEPT;
+
+ /* let inet prerouting call conntrack again */
+ skb->_nfct = 0;
+ nf_ct_put(ct);
+
+ return NF_ACCEPT;
+}
+
static void nf_ct_bridge_frag_save(struct sk_buff *skb,
struct nf_bridge_frag_data *data)
{
@@ -386,6 +410,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
.priority = NF_IP_PRI_CONNTRACK,
},
{
+ .hook = nf_ct_bridge_in,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+ {
.hook = nf_ct_bridge_post,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 7343fd487dbe..707576eeeb58 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -865,6 +865,8 @@ static __init int can_init(void)
/* check for correct padding to be able to use the structs similarly */
BUILD_BUG_ON(offsetof(struct can_frame, len) !=
offsetof(struct canfd_frame, len) ||
+ offsetof(struct can_frame, len) !=
+ offsetof(struct canxl_frame, flags) ||
offsetof(struct can_frame, data) !=
offsetof(struct canfd_frame, data));
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 9168114fc87f..27d5fcf0eac9 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -72,9 +72,11 @@
#define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60)
/* use of last_frames[index].flags */
+#define RX_LOCAL 0x10 /* frame was created on the local host */
+#define RX_OWN 0x20 /* frame was sent via the socket it was received on */
#define RX_RECV 0x40 /* received data for this element */
#define RX_THR 0x80 /* element not been sent due to throttle feature */
-#define BCM_CAN_FLAGS_MASK 0x3F /* to clean private flags after usage */
+#define BCM_CAN_FLAGS_MASK 0x0F /* to clean private flags after usage */
/* get best masking value for can_rx_register() for a given single can_id */
#define REGMASK(id) ((id & CAN_EFF_FLAG) ? \
@@ -138,6 +140,16 @@ static LIST_HEAD(bcm_notifier_list);
static DEFINE_SPINLOCK(bcm_notifier_lock);
static struct bcm_sock *bcm_busy_notifier;
+/* Return pointer to store the extra msg flags for bcm_recvmsg().
+ * We use the space of one unsigned int beyond the 'struct sockaddr_can'
+ * in skb->cb.
+ */
+static inline unsigned int *bcm_flags(struct sk_buff *skb)
+{
+ /* return pointer after struct sockaddr_can */
+ return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]);
+}
+
static inline struct bcm_sock *bcm_sk(const struct sock *sk)
{
return (struct bcm_sock *)sk;
@@ -325,6 +337,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
struct sock *sk = op->sk;
unsigned int datalen = head->nframes * op->cfsiz;
int err;
+ unsigned int *pflags;
skb = alloc_skb(sizeof(*head) + datalen, gfp_any());
if (!skb)
@@ -332,6 +345,14 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
skb_put_data(skb, head, sizeof(*head));
+ /* ensure space for sockaddr_can and msg flags */
+ sock_skb_cb_check_size(sizeof(struct sockaddr_can) +
+ sizeof(unsigned int));
+
+ /* initialize msg flags */
+ pflags = bcm_flags(skb);
+ *pflags = 0;
+
if (head->nframes) {
/* CAN frames starting here */
firstframe = (struct canfd_frame *)skb_tail_pointer(skb);
@@ -344,8 +365,14 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
* relevant for updates that are generated by the
* BCM, where nframes is 1
*/
- if (head->nframes == 1)
+ if (head->nframes == 1) {
+ if (firstframe->flags & RX_LOCAL)
+ *pflags |= MSG_DONTROUTE;
+ if (firstframe->flags & RX_OWN)
+ *pflags |= MSG_CONFIRM;
+
firstframe->flags &= BCM_CAN_FLAGS_MASK;
+ }
}
if (has_timestamp) {
@@ -360,7 +387,6 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
* containing the interface index.
*/
- sock_skb_cb_check_size(sizeof(struct sockaddr_can));
addr = (struct sockaddr_can *)skb->cb;
memset(addr, 0, sizeof(*addr));
addr->can_family = AF_CAN;
@@ -444,7 +470,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
op->frames_filtered = op->frames_abs = 0;
/* this element is not throttled anymore */
- data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV);
+ data->flags &= ~RX_THR;
memset(&head, 0, sizeof(head));
head.opcode = RX_CHANGED;
@@ -465,13 +491,17 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
*/
static void bcm_rx_update_and_send(struct bcm_op *op,
struct canfd_frame *lastdata,
- const struct canfd_frame *rxdata)
+ const struct canfd_frame *rxdata,
+ unsigned char traffic_flags)
{
memcpy(lastdata, rxdata, op->cfsiz);
/* mark as used and throttled by default */
lastdata->flags |= (RX_RECV|RX_THR);
+ /* add own/local/remote traffic flags */
+ lastdata->flags |= traffic_flags;
+
/* throttling mode inactive ? */
if (!op->kt_ival2) {
/* send RX_CHANGED to the user immediately */
@@ -508,7 +538,8 @@ rx_changed_settime:
* received data stored in op->last_frames[]
*/
static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
- const struct canfd_frame *rxdata)
+ const struct canfd_frame *rxdata,
+ unsigned char traffic_flags)
{
struct canfd_frame *cf = op->frames + op->cfsiz * index;
struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
@@ -521,7 +552,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
if (!(lcf->flags & RX_RECV)) {
/* received data for the first time => send update to user */
- bcm_rx_update_and_send(op, lcf, rxdata);
+ bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags);
return;
}
@@ -529,7 +560,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
for (i = 0; i < rxdata->len; i += 8) {
if ((get_u64(cf, i) & get_u64(rxdata, i)) !=
(get_u64(cf, i) & get_u64(lcf, i))) {
- bcm_rx_update_and_send(op, lcf, rxdata);
+ bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags);
return;
}
}
@@ -537,7 +568,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
if (op->flags & RX_CHECK_DLC) {
/* do a real check in CAN frame length */
if (rxdata->len != lcf->len) {
- bcm_rx_update_and_send(op, lcf, rxdata);
+ bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags);
return;
}
}
@@ -644,6 +675,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
struct bcm_op *op = (struct bcm_op *)data;
const struct canfd_frame *rxframe = (struct canfd_frame *)skb->data;
unsigned int i;
+ unsigned char traffic_flags;
if (op->can_id != rxframe->can_id)
return;
@@ -673,15 +705,24 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
return;
}
+ /* compute flags to distinguish between own/local/remote CAN traffic */
+ traffic_flags = 0;
+ if (skb->sk) {
+ traffic_flags |= RX_LOCAL;
+ if (skb->sk == op->sk)
+ traffic_flags |= RX_OWN;
+ }
+
if (op->flags & RX_FILTER_ID) {
/* the easiest case */
- bcm_rx_update_and_send(op, op->last_frames, rxframe);
+ bcm_rx_update_and_send(op, op->last_frames, rxframe,
+ traffic_flags);
goto rx_starttimer;
}
if (op->nframes == 1) {
/* simple compare with index 0 */
- bcm_rx_cmp_to_index(op, 0, rxframe);
+ bcm_rx_cmp_to_index(op, 0, rxframe, traffic_flags);
goto rx_starttimer;
}
@@ -698,7 +739,8 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
if ((get_u64(op->frames, 0) & get_u64(rxframe, 0)) ==
(get_u64(op->frames, 0) &
get_u64(op->frames + op->cfsiz * i, 0))) {
- bcm_rx_cmp_to_index(op, i, rxframe);
+ bcm_rx_cmp_to_index(op, i, rxframe,
+ traffic_flags);
break;
}
}
@@ -1675,6 +1717,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
}
+ /* assign the flags that have been recorded in bcm_send_to_user() */
+ msg->msg_flags |= *(bcm_flags(skb));
+
skb_free_datagram(sk, skb);
return size;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index d1c6f206f429..25bac0fafc83 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -381,8 +381,9 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
return 1;
}
- /* get communication parameters only from the first FC frame */
- if (so->tx.state == ISOTP_WAIT_FIRST_FC) {
+ /* get static/dynamic communication params from first/every FC frame */
+ if (so->tx.state == ISOTP_WAIT_FIRST_FC ||
+ so->opt.flags & CAN_ISOTP_DYN_FC_PARMS) {
so->txfc.bs = cf->data[ae + 1];
so->txfc.stmin = cf->data[ae + 2];
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index 16af1a7f80f6..31a93cae5111 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -86,7 +86,7 @@ struct j1939_priv {
unsigned int tp_max_packet_size;
/* lock for j1939_socks list */
- spinlock_t j1939_socks_lock;
+ rwlock_t j1939_socks_lock;
struct list_head j1939_socks;
struct kref rx_kref;
@@ -301,6 +301,7 @@ struct j1939_sock {
int ifindex;
struct j1939_addr addr;
+ spinlock_t filters_lock;
struct j1939_filter *filters;
int nfilters;
pgn_t pgn_rx_filter;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index ecff1c947d68..a6fb89fa6278 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
return ERR_PTR(-ENOMEM);
j1939_tp_init(priv);
- spin_lock_init(&priv->j1939_socks_lock);
+ rwlock_init(&priv->j1939_socks_lock);
INIT_LIST_HEAD(&priv->j1939_socks);
mutex_lock(&j1939_netdev_lock);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 14c431663233..305dd72c844c 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
jsk->state |= J1939_SOCK_BOUND;
j1939_priv_get(priv);
- spin_lock_bh(&priv->j1939_socks_lock);
+ write_lock_bh(&priv->j1939_socks_lock);
list_add_tail(&jsk->list, &priv->j1939_socks);
- spin_unlock_bh(&priv->j1939_socks_lock);
+ write_unlock_bh(&priv->j1939_socks_lock);
}
static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
{
- spin_lock_bh(&priv->j1939_socks_lock);
+ write_lock_bh(&priv->j1939_socks_lock);
list_del_init(&jsk->list);
- spin_unlock_bh(&priv->j1939_socks_lock);
+ write_unlock_bh(&priv->j1939_socks_lock);
j1939_priv_put(priv);
jsk->state &= ~J1939_SOCK_BOUND;
@@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk,
static bool j1939_sk_match_filter(struct j1939_sock *jsk,
const struct j1939_sk_buff_cb *skcb)
{
- const struct j1939_filter *f = jsk->filters;
- int nfilter = jsk->nfilters;
+ const struct j1939_filter *f;
+ int nfilter;
+
+ spin_lock_bh(&jsk->filters_lock);
+
+ f = jsk->filters;
+ nfilter = jsk->nfilters;
if (!nfilter)
/* receive all when no filters are assigned */
- return true;
+ goto filter_match_found;
for (; nfilter; ++f, --nfilter) {
if ((skcb->addr.pgn & f->pgn_mask) != f->pgn)
@@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk,
continue;
if ((skcb->addr.src_name & f->name_mask) != f->name)
continue;
- return true;
+ goto filter_match_found;
}
+
+ spin_unlock_bh(&jsk->filters_lock);
return false;
+
+filter_match_found:
+ spin_unlock_bh(&jsk->filters_lock);
+ return true;
}
static bool j1939_sk_recv_match_one(struct j1939_sock *jsk,
@@ -329,13 +340,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
struct j1939_sock *jsk;
bool match = false;
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
match = j1939_sk_recv_match_one(jsk, skcb);
if (match)
break;
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
return match;
}
@@ -344,11 +355,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
{
struct j1939_sock *jsk;
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
j1939_sk_recv_one(jsk, skb);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
}
static void j1939_sk_sock_destruct(struct sock *sk)
@@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk)
atomic_set(&jsk->skb_pending, 0);
spin_lock_init(&jsk->sk_session_queue_lock);
INIT_LIST_HEAD(&jsk->sk_session_queue);
+ spin_lock_init(&jsk->filters_lock);
/* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */
sock_set_flag(sk, SOCK_RCU_FREE);
@@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
}
lock_sock(&jsk->sk);
+ spin_lock_bh(&jsk->filters_lock);
ofilters = jsk->filters;
jsk->filters = filters;
jsk->nfilters = count;
+ spin_unlock_bh(&jsk->filters_lock);
release_sock(&jsk->sk);
kfree(ofilters);
return 0;
@@ -1080,12 +1094,12 @@ void j1939_sk_errqueue(struct j1939_session *session,
}
/* spread RX notifications to all sockets subscribed to this session */
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
if (j1939_sk_recv_match_one(jsk, &session->skcb))
__j1939_sk_errqueue(session, &jsk->sk, type);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
};
void j1939_sk_send_loop_abort(struct sock *sk, int err)
@@ -1273,7 +1287,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
struct j1939_sock *jsk;
int error_code = ENETDOWN;
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
jsk->sk.sk_err = error_code;
if (!sock_flag(&jsk->sk, SOCK_DEAD))
@@ -1281,7 +1295,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
j1939_sk_queue_drop_all(priv, jsk, error_code);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
}
static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
diff --git a/net/can/raw.c b/net/can/raw.c
index e6b822624ba2..00533f64d69d 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -91,6 +91,10 @@ struct raw_sock {
int recv_own_msgs;
int fd_frames;
int xl_frames;
+ struct can_raw_vcid_options raw_vcid_opts;
+ canid_t tx_vcid_shifted;
+ canid_t rx_vcid_shifted;
+ canid_t rx_vcid_mask_shifted;
int join_filters;
int count; /* number of active filters */
struct can_filter dfilter; /* default/single filter */
@@ -134,10 +138,29 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
return;
/* make sure to not pass oversized frames to the socket */
- if ((!ro->fd_frames && can_is_canfd_skb(oskb)) ||
- (!ro->xl_frames && can_is_canxl_skb(oskb)))
+ if (!ro->fd_frames && can_is_canfd_skb(oskb))
return;
+ if (can_is_canxl_skb(oskb)) {
+ struct canxl_frame *cxl = (struct canxl_frame *)oskb->data;
+
+ /* make sure to not pass oversized frames to the socket */
+ if (!ro->xl_frames)
+ return;
+
+ /* filter CAN XL VCID content */
+ if (ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_RX_FILTER) {
+ /* apply VCID filter if user enabled the filter */
+ if ((cxl->prio & ro->rx_vcid_mask_shifted) !=
+ (ro->rx_vcid_shifted & ro->rx_vcid_mask_shifted))
+ return;
+ } else {
+ /* no filter => do not forward VCID tagged frames */
+ if (cxl->prio & CANXL_VCID_MASK)
+ return;
+ }
+ }
+
/* eliminate multiple filter matches for the same skb */
if (this_cpu_ptr(ro->uniq)->skb == oskb &&
this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) {
@@ -698,6 +721,19 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->fd_frames = ro->xl_frames;
break;
+ case CAN_RAW_XL_VCID_OPTS:
+ if (optlen != sizeof(ro->raw_vcid_opts))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&ro->raw_vcid_opts, optval, optlen))
+ return -EFAULT;
+
+ /* prepare 32 bit values for handling in hot path */
+ ro->tx_vcid_shifted = ro->raw_vcid_opts.tx_vcid << CANXL_VCID_OFFSET;
+ ro->rx_vcid_shifted = ro->raw_vcid_opts.rx_vcid << CANXL_VCID_OFFSET;
+ ro->rx_vcid_mask_shifted = ro->raw_vcid_opts.rx_vcid_mask << CANXL_VCID_OFFSET;
+ break;
+
case CAN_RAW_JOIN_FILTERS:
if (optlen != sizeof(ro->join_filters))
return -EINVAL;
@@ -720,7 +756,6 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
struct raw_sock *ro = raw_sk(sk);
int len;
void *val;
- int err = 0;
if (level != SOL_CAN_RAW)
return -EINVAL;
@@ -730,7 +765,9 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
switch (optname) {
- case CAN_RAW_FILTER:
+ case CAN_RAW_FILTER: {
+ int err = 0;
+
lock_sock(sk);
if (ro->count > 0) {
int fsize = ro->count * sizeof(struct can_filter);
@@ -755,7 +792,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
if (!err)
err = put_user(len, optlen);
return err;
-
+ }
case CAN_RAW_ERR_FILTER:
if (len > sizeof(can_err_mask_t))
len = sizeof(can_err_mask_t);
@@ -786,6 +823,25 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
val = &ro->xl_frames;
break;
+ case CAN_RAW_XL_VCID_OPTS: {
+ int err = 0;
+
+ /* user space buffer to small for VCID opts? */
+ if (len < sizeof(ro->raw_vcid_opts)) {
+ /* return -ERANGE and needed space in optlen */
+ err = -ERANGE;
+ if (put_user(sizeof(ro->raw_vcid_opts), optlen))
+ err = -EFAULT;
+ } else {
+ if (len > sizeof(ro->raw_vcid_opts))
+ len = sizeof(ro->raw_vcid_opts);
+ if (copy_to_user(optval, &ro->raw_vcid_opts, len))
+ err = -EFAULT;
+ }
+ if (!err)
+ err = put_user(len, optlen);
+ return err;
+ }
case CAN_RAW_JOIN_FILTERS:
if (len > sizeof(int))
len = sizeof(int);
@@ -803,23 +859,41 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
-static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
+static void raw_put_canxl_vcid(struct raw_sock *ro, struct sk_buff *skb)
+{
+ struct canxl_frame *cxl = (struct canxl_frame *)skb->data;
+
+ /* sanitize non CAN XL bits */
+ cxl->prio &= (CANXL_PRIO_MASK | CANXL_VCID_MASK);
+
+ /* clear VCID in CAN XL frame if pass through is disabled */
+ if (!(ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_TX_PASS))
+ cxl->prio &= CANXL_PRIO_MASK;
+
+ /* set VCID in CAN XL frame if enabled */
+ if (ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_TX_SET) {
+ cxl->prio &= CANXL_PRIO_MASK;
+ cxl->prio |= ro->tx_vcid_shifted;
+ }
+}
+
+static unsigned int raw_check_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
{
/* Classical CAN -> no checks for flags and device capabilities */
if (can_is_can_skb(skb))
- return false;
+ return CAN_MTU;
/* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */
if (ro->fd_frames && can_is_canfd_skb(skb) &&
(mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu)))
- return false;
+ return CANFD_MTU;
/* CAN XL -> needs to be enabled and a CAN XL device */
if (ro->xl_frames && can_is_canxl_skb(skb) &&
can_is_canxl_dev_mtu(mtu))
- return false;
+ return CANXL_MTU;
- return true;
+ return 0;
}
static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -829,6 +903,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
struct sockcm_cookie sockc;
struct sk_buff *skb;
struct net_device *dev;
+ unsigned int txmtu;
int ifindex;
int err = -EINVAL;
@@ -869,9 +944,16 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
goto free_skb;
err = -EINVAL;
- if (raw_bad_txframe(ro, skb, dev->mtu))
+
+ /* check for valid CAN (CC/FD/XL) frame content */
+ txmtu = raw_check_txframe(ro, skb, dev->mtu);
+ if (!txmtu)
goto free_skb;
+ /* only CANXL: clear/forward/set VCID value */
+ if (txmtu == CANXL_MTU)
+ raw_put_canxl_vcid(ro, skb);
+
sockcm_init(&sockc, sk);
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index f9a50d7f0d20..0cb61c76b9b8 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
/* Initialize data cursor if it's not a sparse read */
- if (!msg->sparse_read)
- ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+ u64 len = msg->sparse_read_total ? : data_len;
+
+ ceph_msg_data_cursor_init(&msg->cursor, msg, len);
}
/*
@@ -991,7 +992,7 @@ static inline int read_partial_message_section(struct ceph_connection *con,
return read_partial_message_chunk(con, section, sec_len, crc);
}
-static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
@@ -1026,7 +1027,7 @@ static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
return 1;
}
-static int read_sparse_msg_data(struct ceph_connection *con)
+static int read_partial_sparse_msg_data(struct ceph_connection *con)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
@@ -1036,31 +1037,31 @@ static int read_sparse_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = con->in_data_crc;
- do {
+ while (cursor->total_resid) {
if (con->v1.in_sr_kvec.iov_base)
ret = read_partial_message_chunk(con,
&con->v1.in_sr_kvec,
con->v1.in_sr_len,
&crc);
else if (cursor->sr_resid > 0)
- ret = read_sparse_msg_extent(con, &crc);
-
- if (ret <= 0) {
- if (do_datacrc)
- con->in_data_crc = crc;
- return ret;
- }
+ ret = read_partial_sparse_msg_extent(con, &crc);
+ if (ret <= 0)
+ break;
memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
ret = con->ops->sparse_read(con, cursor,
(char **)&con->v1.in_sr_kvec.iov_base);
+ if (ret <= 0) {
+ ret = ret ? ret : 1; /* must return > 0 to indicate success */
+ break;
+ }
con->v1.in_sr_len = ret;
- } while (ret > 0);
+ }
if (do_datacrc)
con->in_data_crc = crc;
- return ret < 0 ? ret : 1; /* must return > 0 to indicate success */
+ return ret;
}
static int read_partial_msg_data(struct ceph_connection *con)
@@ -1253,8 +1254,8 @@ static int read_partial_message(struct ceph_connection *con)
if (!m->num_data_items)
return -EIO;
- if (m->sparse_read)
- ret = read_sparse_msg_data(con);
+ if (m->sparse_read_total)
+ ret = read_partial_sparse_msg_data(con);
else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
ret = read_partial_msg_data_bounce(con);
else
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index f8ec60e1aba3..bd608ffa0627 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -1128,7 +1128,7 @@ static int decrypt_tail(struct ceph_connection *con)
struct sg_table enc_sgt = {};
struct sg_table sgt = {};
struct page **pages = NULL;
- bool sparse = con->in_msg->sparse_read;
+ bool sparse = !!con->in_msg->sparse_read_total;
int dpos = 0;
int tail_len;
int ret;
@@ -2034,6 +2034,9 @@ static int prepare_sparse_read_data(struct ceph_connection *con)
if (!con_secure(con))
con->in_data_crc = -1;
+ ceph_msg_data_cursor_init(&con->v2.in_cursor, msg,
+ msg->sparse_read_total);
+
reset_in_kvecs(con);
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
con->v2.data_len_remain = data_len(msg);
@@ -2060,7 +2063,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
}
if (data_len(msg)) {
- if (msg->sparse_read)
+ if (msg->sparse_read_total)
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
else
con->v2.in_state = IN_S_PREPARE_READ_DATA;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 625622016f57..9d078b37fe0b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
}
m = ceph_msg_get(req->r_reply);
- m->sparse_read = (bool)srlen;
+ m->sparse_read_total = srlen;
dout("get_reply tid %lld %p\n", tid, m);
@@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con,
}
if (o->o_sparse_op_idx < 0) {
- u64 srlen = sparse_data_requested(req);
-
- dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
- __func__, o->o_osd, srlen);
- ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
+ dout("%s: [%d] starting new sparse read req\n",
+ __func__, o->o_osd);
} else {
u64 end;
@@ -5857,8 +5854,8 @@ static int osd_sparse_read(struct ceph_connection *con,
struct ceph_osd *o = con->private;
struct ceph_sparse_read *sr = &o->o_sparse_read;
u32 count = sr->sr_count;
- u64 eoff, elen;
- int ret;
+ u64 eoff, elen, len = 0;
+ int i, ret;
switch (sr->sr_state) {
case CEPH_SPARSE_READ_HDR:
@@ -5903,8 +5900,20 @@ next_op:
convert_extent_map(sr);
ret = sizeof(sr->sr_datalen);
*pbuf = (char *)&sr->sr_datalen;
- sr->sr_state = CEPH_SPARSE_READ_DATA;
+ sr->sr_state = CEPH_SPARSE_READ_DATA_PRE;
break;
+ case CEPH_SPARSE_READ_DATA_PRE:
+ /* Convert sr_datalen to host-endian */
+ sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen);
+ for (i = 0; i < count; i++)
+ len += sr->sr_extent[i].len;
+ if (sr->sr_datalen != len) {
+ pr_warn_ratelimited("data len %u != extent len %llu\n",
+ sr->sr_datalen, len);
+ return -EREMOTEIO;
+ }
+ sr->sr_state = CEPH_SPARSE_READ_DATA;
+ fallthrough;
case CEPH_SPARSE_READ_DATA:
if (sr->sr_index >= count) {
sr->sr_state = CEPH_SPARSE_READ_HDR;
diff --git a/net/core/Makefile b/net/core/Makefile
index 821aec06abf1..6e6548011fae 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,6 +18,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
obj-y += net-sysfs.o
+obj-y += hotdata.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 103d46fa0eeb..a8b625abe242 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -751,7 +751,7 @@ size_t memcpy_to_iter_csum(void *iter_to, size_t progress,
size_t len, void *from, void *priv2)
{
__wsum *csum = priv2;
- __wsum next = csum_partial_copy_nocheck(from, iter_to, len);
+ __wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len);
*csum = csum_block_add(*csum, next, progress);
return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index f01a9b858347..0766a245816b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -153,41 +153,21 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
#include <net/netdev_rx_queue.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
+#include <net/rps.h>
#include "dev.h"
#include "net-sysfs.h"
static DEFINE_SPINLOCK(ptype_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
-struct list_head ptype_all __read_mostly; /* Taps */
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack);
-/*
- * The @dev_base_head list is protected by @dev_base_lock and the rtnl
- * semaphore.
- *
- * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
- *
- * Writers must hold the rtnl semaphore while they loop through the
- * dev_base_head list, and hold dev_base_lock for writing when they do the
- * actual updates. This allows pure readers to access the list even
- * while a writer is preparing to update it.
- *
- * To put it another way, dev_base_lock is held for writing only to
- * protect against pure readers; the rtnl semaphore provides the
- * protection against other writers.
- *
- * See, for example usages, register_netdevice() and
- * unregister_netdevice(), which must be called with the rtnl
- * semaphore held.
- */
-DEFINE_RWLOCK(dev_base_lock);
-EXPORT_SYMBOL(dev_base_lock);
-
static DEFINE_MUTEX(ifalias_mutex);
/* protects napi_hash addition/deletion and napi_gen_id */
@@ -200,8 +180,9 @@ static DECLARE_RWSEM(devnet_rename_sem);
static inline void dev_base_seq_inc(struct net *net)
{
- while (++net->dev_base_seq == 0)
- ;
+ unsigned int val = net->dev_base_seq + 1;
+
+ WRITE_ONCE(net->dev_base_seq, val ?: 1);
}
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
@@ -336,18 +317,27 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
return -ENOMEM;
netdev_name_node_add(net, name_node);
/* The node that holds dev->name acts as a head of per-device list. */
- list_add_tail(&name_node->list, &dev->name_node->list);
+ list_add_tail_rcu(&name_node->list, &dev->name_node->list);
return 0;
}
-static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+static void netdev_name_node_alt_free(struct rcu_head *head)
{
- list_del(&name_node->list);
+ struct netdev_name_node *name_node =
+ container_of(head, struct netdev_name_node, rcu);
+
kfree(name_node->name);
netdev_name_node_free(name_node);
}
+static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+{
+ netdev_name_node_del(name_node);
+ list_del(&name_node->list);
+ call_rcu(&name_node->rcu, netdev_name_node_alt_free);
+}
+
int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
{
struct netdev_name_node *name_node;
@@ -362,10 +352,7 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
if (name_node == dev->name_node || name_node->dev != dev)
return -EINVAL;
- netdev_name_node_del(name_node);
- synchronize_rcu();
__netdev_name_node_alt_destroy(name_node);
-
return 0;
}
@@ -373,8 +360,10 @@ static void netdev_name_node_alt_flush(struct net_device *dev)
{
struct netdev_name_node *name_node, *tmp;
- list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
- __netdev_name_node_alt_destroy(name_node);
+ list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) {
+ list_del(&name_node->list);
+ netdev_name_node_alt_free(&name_node->rcu);
+ }
}
/* Device list insertion */
@@ -385,12 +374,10 @@ static void list_netdevice(struct net_device *dev)
ASSERT_RTNL();
- write_lock(&dev_base_lock);
list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
netdev_name_node_add(net, dev->name_node);
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
- write_unlock(&dev_base_lock);
netdev_for_each_altname(dev, name_node)
netdev_name_node_add(net, name_node);
@@ -404,7 +391,7 @@ static void list_netdevice(struct net_device *dev)
/* Device list removal
* caller must respect a RCU grace period before freeing/reusing dev
*/
-static void unlist_netdevice(struct net_device *dev, bool lock)
+static void unlist_netdevice(struct net_device *dev)
{
struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
@@ -417,13 +404,9 @@ static void unlist_netdevice(struct net_device *dev, bool lock)
netdev_name_node_del(name_node);
/* Unlink dev from the device chain */
- if (lock)
- write_lock(&dev_base_lock);
list_del_rcu(&dev->dev_list);
netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
- if (lock)
- write_unlock(&dev_base_lock);
dev_base_seq_inc(dev_net(dev));
}
@@ -442,6 +425,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
EXPORT_PER_CPU_SYMBOL(softnet_data);
+/* Page_pool has a lockless array/stack to alloc/recycle pages.
+ * PP consumers must pay attention to run APIs in the appropriate context
+ * (e.g. NAPI context).
+ */
+static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool);
+
#ifdef CONFIG_LOCKDEP
/*
* register_netdevice() inits txq->_xmit_lock and sets lockdep class
@@ -551,7 +540,7 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL))
- return pt->dev ? &pt->dev->ptype_all : &ptype_all;
+ return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all;
else
return pt->dev ? &pt->dev->ptype_specific :
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
@@ -653,7 +642,7 @@ int dev_get_iflink(const struct net_device *dev)
if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
return dev->netdev_ops->ndo_get_iflink(dev);
- return dev->ifindex;
+ return READ_ONCE(dev->ifindex);
}
EXPORT_SYMBOL(dev_get_iflink);
@@ -738,9 +727,9 @@ EXPORT_SYMBOL_GPL(dev_fill_forward_path);
* @net: the applicable net namespace
* @name: name to find
*
- * Find an interface by name. Must be called under RTNL semaphore
- * or @dev_base_lock. If the name is found a pointer to the device
- * is returned. If the name is not found then %NULL is returned. The
+ * Find an interface by name. Must be called under RTNL semaphore.
+ * If the name is found a pointer to the device is returned.
+ * If the name is not found then %NULL is returned. The
* reference counters are not incremented so the caller must be
* careful with locks.
*/
@@ -821,8 +810,7 @@ EXPORT_SYMBOL(netdev_get_by_name);
* Search for an interface by index. Returns %NULL if the device
* is not found or a pointer to the device. The device has not
* had its reference counter increased so the caller must be careful
- * about locking. The caller must hold either the RTNL semaphore
- * or @dev_base_lock.
+ * about locking. The caller must hold the RTNL semaphore.
*/
struct net_device *__dev_get_by_index(struct net *net, int ifindex)
@@ -1212,13 +1200,13 @@ int dev_change_name(struct net_device *dev, const char *newname)
dev->flags & IFF_UP ? " (while UP)" : "");
old_assign_type = dev->name_assign_type;
- dev->name_assign_type = NET_NAME_RENAMED;
+ WRITE_ONCE(dev->name_assign_type, NET_NAME_RENAMED);
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
- dev->name_assign_type = old_assign_type;
+ WRITE_ONCE(dev->name_assign_type, old_assign_type);
up_write(&devnet_rename_sem);
return ret;
}
@@ -1227,15 +1215,11 @@ rollback:
netdev_adjacent_rename_links(dev, oldname);
- write_lock(&dev_base_lock);
netdev_name_node_del(dev->name_node);
- write_unlock(&dev_base_lock);
- synchronize_rcu();
+ synchronize_net();
- write_lock(&dev_base_lock);
netdev_name_node_add(net, dev->name_node);
- write_unlock(&dev_base_lock);
ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
ret = notifier_to_errno(ret);
@@ -1247,7 +1231,7 @@ rollback:
down_write(&devnet_rename_sem);
memcpy(dev->name, oldname, IFNAMSIZ);
memcpy(oldname, newname, IFNAMSIZ);
- dev->name_assign_type = old_assign_type;
+ WRITE_ONCE(dev->name_assign_type, old_assign_type);
old_assign_type = NET_NAME_RENAMED;
goto rollback;
} else {
@@ -2242,7 +2226,8 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
*/
bool dev_nit_active(struct net_device *dev)
{
- return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
+ return !list_empty(&net_hotdata.ptype_all) ||
+ !list_empty(&dev->ptype_all);
}
EXPORT_SYMBOL_GPL(dev_nit_active);
@@ -2253,10 +2238,9 @@ EXPORT_SYMBOL_GPL(dev_nit_active);
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
- struct packet_type *ptype;
+ struct list_head *ptype_list = &net_hotdata.ptype_all;
+ struct packet_type *ptype, *pt_prev = NULL;
struct sk_buff *skb2 = NULL;
- struct packet_type *pt_prev = NULL;
- struct list_head *ptype_list = &ptype_all;
rcu_read_lock();
again:
@@ -2302,7 +2286,7 @@ again:
pt_prev = ptype;
}
- if (ptype_list == &ptype_all) {
+ if (ptype_list == &net_hotdata.ptype_all) {
ptype_list = &dev->ptype_all;
goto again;
}
@@ -4421,19 +4405,10 @@ EXPORT_SYMBOL(__dev_direct_xmit);
* Receiver routines
*************************************************************************/
-int netdev_max_backlog __read_mostly = 1000;
-EXPORT_SYMBOL(netdev_max_backlog);
-
-int netdev_tstamp_prequeue __read_mostly = 1;
unsigned int sysctl_skb_defer_max __read_mostly = 64;
-int netdev_budget __read_mostly = 300;
-/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
-unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
-int dev_rx_weight __read_mostly = 64;
-int dev_tx_weight __read_mostly = 64;
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
@@ -4475,12 +4450,6 @@ static inline void ____napi_schedule(struct softnet_data *sd,
#ifdef CONFIG_RPS
-/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
-EXPORT_SYMBOL(rps_sock_flow_table);
-u32 rps_cpu_mask __read_mostly;
-EXPORT_SYMBOL(rps_cpu_mask);
-
struct static_key_false rps_needed __read_mostly;
EXPORT_SYMBOL(rps_needed);
struct static_key_false rfs_needed __read_mostly;
@@ -4572,7 +4541,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
if (!hash)
goto done;
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
+ sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
if (flow_table && sock_flow_table) {
struct rps_dev_flow *rflow;
u32 next_cpu;
@@ -4582,10 +4551,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
* This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow().
*/
ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]);
- if ((ident ^ hash) & ~rps_cpu_mask)
+ if ((ident ^ hash) & ~net_hotdata.rps_cpu_mask)
goto try_rps;
- next_cpu = ident & rps_cpu_mask;
+ next_cpu = ident & net_hotdata.rps_cpu_mask;
/* OK, now we know there is a match,
* we can look at the local (per receive queue) flow table
@@ -4734,7 +4703,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
struct softnet_data *sd;
unsigned int old_flow, new_flow;
- if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
+ if (qlen < (READ_ONCE(net_hotdata.max_backlog) >> 1))
return false;
sd = this_cpu_ptr(&softnet_data);
@@ -4782,7 +4751,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
if (!netif_running(skb->dev))
goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
+ if (qlen <= READ_ONCE(net_hotdata.max_backlog) &&
+ !skb_flow_limit(skb, qlen)) {
if (qlen) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -4858,6 +4828,12 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
skb_headlen(skb) + mac_len, true);
+ if (skb_is_nonlinear(skb)) {
+ skb_shinfo(skb)->xdp_frags_size = skb->data_len;
+ xdp_buff_set_frags_flag(xdp);
+ } else {
+ xdp_buff_clear_frags_flag(xdp);
+ }
orig_data_end = xdp->data_end;
orig_data = xdp->data;
@@ -4887,6 +4863,14 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
skb->len += off; /* positive on grow, negative on shrink */
}
+ /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
+ * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
+ */
+ if (xdp_buff_has_frags(xdp))
+ skb->data_len = skb_shinfo(skb)->xdp_frags_size;
+ else
+ skb->data_len = 0;
+
/* check if XDP changed eth hdr such SKB needs update */
eth = (struct ethhdr *)xdp->data;
if ((orig_eth_type != eth->h_proto) ||
@@ -4920,11 +4904,35 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
return act;
}
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+static int
+netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
+{
+ struct sk_buff *skb = *pskb;
+ int err, hroom, troom;
+
+ if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog))
+ return 0;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ troom = skb->tail + skb->data_len - skb->end;
+ err = pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC);
+ if (err)
+ return err;
+
+ return skb_linearize(skb);
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
- u32 act = XDP_DROP;
+ struct sk_buff *skb = *pskb;
+ u32 mac_len, act = XDP_DROP;
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
@@ -4932,41 +4940,36 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (skb_is_redirected(skb))
return XDP_PASS;
- /* XDP packets must be linear and must have sufficient headroom
- * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
- * native XDP provides, thus we need to do it here as well.
+ /* XDP packets must have sufficient headroom of XDP_PACKET_HEADROOM
+ * bytes. This is the guarantee that also native XDP provides,
+ * thus we need to do it here as well.
*/
+ mac_len = skb->data - skb_mac_header(skb);
+ __skb_push(skb, mac_len);
+
if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
- int troom = skb->tail + skb->data_len - skb->end;
-
- /* In case we have to go down the path and also linearize,
- * then lets do the pskb_expand_head() work just once here.
- */
- if (pskb_expand_head(skb,
- hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
- troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
- goto do_drop;
- if (skb_linearize(skb))
+ if (netif_skb_check_for_xdp(pskb, xdp_prog))
goto do_drop;
}
- act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
+ __skb_pull(*pskb, mac_len);
+
+ act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog);
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
case XDP_PASS:
break;
default:
- bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act);
+ bpf_warn_invalid_xdp_action((*pskb)->dev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
- trace_xdp_exception(skb->dev, xdp_prog, act);
+ trace_xdp_exception((*pskb)->dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
do_drop:
- kfree_skb(skb);
+ kfree_skb(*pskb);
break;
}
@@ -5004,24 +5007,24 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
if (xdp_prog) {
struct xdp_buff xdp;
u32 act;
int err;
- act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
+ act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
if (act != XDP_PASS) {
switch (act) {
case XDP_REDIRECT:
- err = xdp_do_generic_redirect(skb->dev, skb,
+ err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
&xdp, xdp_prog);
if (err)
goto out_redir;
break;
case XDP_TX:
- generic_xdp_tx(skb, xdp_prog);
+ generic_xdp_tx(*pskb, xdp_prog);
break;
}
return XDP_DROP;
@@ -5029,7 +5032,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
}
return XDP_PASS;
out_redir:
- kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
+ kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
EXPORT_SYMBOL_GPL(do_xdp_generic);
@@ -5038,7 +5041,7 @@ static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb);
trace_netif_rx(skb);
@@ -5330,7 +5333,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
int ret = NET_RX_DROP;
__be16 type;
- net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5352,7 +5355,8 @@ another_round:
int ret2;
migrate_disable();
- ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+ ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
+ &skb);
migrate_enable();
if (ret2 != XDP_PASS) {
@@ -5373,7 +5377,7 @@ another_round:
if (pfmemalloc)
goto skip_taps;
- list_for_each_entry_rcu(ptype, &ptype_all, list) {
+ list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
@@ -5713,7 +5717,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
@@ -5743,7 +5747,8 @@ void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
- net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue),
+ skb);
skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
@@ -5967,7 +5972,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
net_rps_action_and_irq_enable(sd);
}
- napi->weight = READ_ONCE(dev_rx_weight);
+ napi->weight = READ_ONCE(net_hotdata.dev_rx_weight);
while (again) {
struct sk_buff *skb;
@@ -6156,6 +6161,27 @@ struct napi_struct *napi_by_id(unsigned int napi_id)
return NULL;
}
+static void skb_defer_free_flush(struct softnet_data *sd)
+{
+ struct sk_buff *skb, *next;
+
+ /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
+ if (!READ_ONCE(sd->defer_list))
+ return;
+
+ spin_lock(&sd->defer_lock);
+ skb = sd->defer_list;
+ sd->defer_list = NULL;
+ sd->defer_count = 0;
+ spin_unlock(&sd->defer_lock);
+
+ while (skb != NULL) {
+ next = skb->next;
+ napi_consume_skb(skb, 1);
+ skb = next;
+ }
+}
+
#if defined(CONFIG_NET_RX_BUSY_POLL)
static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
@@ -6177,8 +6203,13 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
clear_bit(NAPI_STATE_SCHED, &napi->state);
}
-static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
- u16 budget)
+enum {
+ NAPI_F_PREFER_BUSY_POLL = 1,
+ NAPI_F_END_ON_RESCHED = 2,
+};
+
+static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
+ unsigned flags, u16 budget)
{
bool skip_schedule = false;
unsigned long timeout;
@@ -6198,7 +6229,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
local_bh_disable();
- if (prefer_busy_poll) {
+ if (flags & NAPI_F_PREFER_BUSY_POLL) {
napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
timeout = READ_ONCE(napi->dev->gro_flush_timeout);
if (napi->defer_hard_irqs_count && timeout) {
@@ -6222,23 +6253,23 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
local_bh_enable();
}
-void napi_busy_loop(unsigned int napi_id,
- bool (*loop_end)(void *, unsigned long),
- void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+static void __napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, unsigned flags, u16 budget)
{
unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
void *have_poll_lock = NULL;
struct napi_struct *napi;
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
restart:
napi_poll = NULL;
- rcu_read_lock();
-
napi = napi_by_id(napi_id);
if (!napi)
- goto out;
+ return;
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
@@ -6254,14 +6285,14 @@ restart:
*/
if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
NAPIF_STATE_IN_BUSY_POLL)) {
- if (prefer_busy_poll)
+ if (flags & NAPI_F_PREFER_BUSY_POLL)
set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
goto count;
}
if (cmpxchg(&napi->state, val,
val | NAPIF_STATE_IN_BUSY_POLL |
NAPIF_STATE_SCHED) != val) {
- if (prefer_busy_poll)
+ if (flags & NAPI_F_PREFER_BUSY_POLL)
set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
goto count;
}
@@ -6275,18 +6306,22 @@ count:
if (work > 0)
__NET_ADD_STATS(dev_net(napi->dev),
LINUX_MIB_BUSYPOLLRXPACKETS, work);
+ skb_defer_free_flush(this_cpu_ptr(&softnet_data));
local_bh_enable();
if (!loop_end || loop_end(loop_end_arg, start_time))
break;
if (unlikely(need_resched())) {
+ if (flags & NAPI_F_END_ON_RESCHED)
+ break;
if (napi_poll)
- busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
+ busy_poll_stop(napi, have_poll_lock, flags, budget);
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
rcu_read_unlock();
cond_resched();
+ rcu_read_lock();
if (loop_end(loop_end_arg, start_time))
return;
goto restart;
@@ -6294,10 +6329,31 @@ count:
cpu_relax();
}
if (napi_poll)
- busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
+ busy_poll_stop(napi, have_poll_lock, flags, budget);
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
-out:
+}
+
+void napi_busy_loop_rcu(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+{
+ unsigned flags = NAPI_F_END_ON_RESCHED;
+
+ if (prefer_busy_poll)
+ flags |= NAPI_F_PREFER_BUSY_POLL;
+
+ __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
+}
+
+void napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+{
+ unsigned flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0;
+
+ rcu_read_lock();
+ __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
rcu_read_unlock();
}
EXPORT_SYMBOL(napi_busy_loop);
@@ -6680,27 +6736,6 @@ static int napi_thread_wait(struct napi_struct *napi)
return -1;
}
-static void skb_defer_free_flush(struct softnet_data *sd)
-{
- struct sk_buff *skb, *next;
-
- /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
- if (!READ_ONCE(sd->defer_list))
- return;
-
- spin_lock(&sd->defer_lock);
- skb = sd->defer_list;
- sd->defer_list = NULL;
- sd->defer_count = 0;
- spin_unlock(&sd->defer_lock);
-
- while (skb != NULL) {
- next = skb->next;
- napi_consume_skb(skb, 1);
- skb = next;
- }
-}
-
static int napi_threaded_poll(void *data)
{
struct napi_struct *napi = data;
@@ -6742,8 +6777,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
- usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
- int budget = READ_ONCE(netdev_budget);
+ usecs_to_jiffies(READ_ONCE(net_hotdata.netdev_budget_usecs));
+ int budget = READ_ONCE(net_hotdata.netdev_budget);
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -8586,12 +8621,12 @@ unsigned int dev_get_flags(const struct net_device *dev)
{
unsigned int flags;
- flags = (dev->flags & ~(IFF_PROMISC |
+ flags = (READ_ONCE(dev->flags) & ~(IFF_PROMISC |
IFF_ALLMULTI |
IFF_RUNNING |
IFF_LOWER_UP |
IFF_DORMANT)) |
- (dev->gflags & (IFF_PROMISC |
+ (READ_ONCE(dev->gflags) & (IFF_PROMISC |
IFF_ALLMULTI));
if (netif_running(dev)) {
@@ -8914,7 +8949,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
}
EXPORT_SYMBOL(dev_set_mac_address);
-static DECLARE_RWSEM(dev_addr_sem);
+DECLARE_RWSEM(dev_addr_sem);
int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
struct netlink_ext_ack *extack)
@@ -9074,28 +9109,6 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
}
EXPORT_SYMBOL(netdev_port_same_parent_id);
-static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin)
-{
-#if IS_ENABLED(CONFIG_DPLL)
- rtnl_lock();
- dev->dpll_pin = dpll_pin;
- rtnl_unlock();
-#endif
-}
-
-void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)
-{
- WARN_ON(!dpll_pin);
- netdev_dpll_pin_assign(dev, dpll_pin);
-}
-EXPORT_SYMBOL(netdev_dpll_pin_set);
-
-void netdev_dpll_pin_clear(struct net_device *dev)
-{
- netdev_dpll_pin_assign(dev, NULL);
-}
-EXPORT_SYMBOL(netdev_dpll_pin_clear);
-
/**
* dev_change_proto_down - set carrier according to proto_down.
*
@@ -9690,11 +9703,11 @@ static void dev_index_release(struct net *net, int ifindex)
/* Delayed registration/unregisteration */
LIST_HEAD(net_todo_list);
DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
+atomic_t dev_unreg_count = ATOMIC_INIT(0);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
- atomic_inc(&dev_net(dev)->dev_unreg_count);
}
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
@@ -10259,9 +10272,9 @@ int register_netdevice(struct net_device *dev)
goto err_ifindex_release;
ret = netdev_register_kobject(dev);
- write_lock(&dev_base_lock);
- dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
- write_unlock(&dev_base_lock);
+
+ WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED);
+
if (ret)
goto err_uninit_notify;
@@ -10337,7 +10350,7 @@ EXPORT_SYMBOL(register_netdevice);
* that need to tie several hardware interfaces to a single NAPI
* poll scheduler due to HW limitations.
*/
-int init_dummy_netdev(struct net_device *dev)
+void init_dummy_netdev(struct net_device *dev)
{
/* Clear everything. Note we don't initialize spinlocks
* are they aren't supposed to be taken by any of the
@@ -10365,8 +10378,6 @@ int init_dummy_netdev(struct net_device *dev)
* because users of this 'device' dont need to change
* its refcount.
*/
-
- return 0;
}
EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -10521,6 +10532,7 @@ void netdev_run_todo(void)
{
struct net_device *dev, *tmp;
struct list_head list;
+ int cnt;
#ifdef CONFIG_LOCKDEP
struct list_head unlink_list;
@@ -10551,12 +10563,11 @@ void netdev_run_todo(void)
continue;
}
- write_lock(&dev_base_lock);
- dev->reg_state = NETREG_UNREGISTERED;
- write_unlock(&dev_base_lock);
+ WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
linkwatch_sync_dev(dev);
}
+ cnt = 0;
while (!list_empty(&list)) {
dev = netdev_wait_allrefs_any(&list);
list_del(&dev->todo_list);
@@ -10574,12 +10585,13 @@ void netdev_run_todo(void)
if (dev->needs_free_netdev)
free_netdev(dev);
- if (atomic_dec_and_test(&dev_net(dev)->dev_unreg_count))
- wake_up(&netdev_unregistering_wq);
+ cnt++;
/* Free network device */
kobject_put(&dev->dev.kobj);
}
+ if (cnt && atomic_sub_and_test(cnt, &dev_unreg_count))
+ wake_up(&netdev_unregistering_wq);
}
/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
@@ -10656,6 +10668,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
ops->ndo_get_stats64(dev, storage);
} else if (ops->ndo_get_stats) {
netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
+ } else if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) {
+ dev_get_tstats64(dev, storage);
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
@@ -10970,7 +10984,7 @@ void free_netdev(struct net_device *dev)
}
BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
- dev->reg_state = NETREG_RELEASED;
+ WRITE_ONCE(dev->reg_state, NETREG_RELEASED);
/* will free via device release */
put_device(&dev->dev);
@@ -11026,6 +11040,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
{
struct net_device *dev, *tmp;
LIST_HEAD(close_head);
+ int cnt = 0;
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -11057,10 +11072,8 @@ void unregister_netdevice_many_notify(struct list_head *head,
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
- write_lock(&dev_base_lock);
- unlist_netdevice(dev, false);
- dev->reg_state = NETREG_UNREGISTERING;
- write_unlock(&dev_base_lock);
+ unlist_netdevice(dev);
+ WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
}
flush_all_backlogs();
@@ -11122,7 +11135,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
list_for_each_entry(dev, head, unreg_list) {
netdev_put(dev, &dev->dev_registered_tracker);
net_set_todo(dev);
+ cnt++;
}
+ atomic_add(cnt, &dev_unreg_count);
list_del(head);
}
@@ -11240,7 +11255,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_close(dev);
/* And unlink it from device chain */
- unlist_netdevice(dev, true);
+ unlist_netdevice(dev);
synchronize_net();
@@ -11551,6 +11566,7 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
static void __net_exit default_device_exit_net(struct net *net)
{
+ struct netdev_name_node *name_node, *tmp;
struct net_device *dev, *aux;
/*
* Push all migratable network devices back to the
@@ -11573,6 +11589,11 @@ static void __net_exit default_device_exit_net(struct net *net)
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
if (netdev_name_in_use(&init_net, fb_name))
snprintf(fb_name, IFNAMSIZ, "dev%%d");
+
+ netdev_for_each_altname_safe(dev, name_node, tmp)
+ if (netdev_name_in_use(&init_net, name_node->name))
+ __netdev_name_node_alt_destroy(name_node);
+
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
pr_emerg("%s: failed to move %s to init_net: %d\n",
@@ -11643,11 +11664,12 @@ static void __init net_dev_struct_check(void)
CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160);
/* TXRX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
- CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30);
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 38);
/* RX read-mostly hotpath */
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
@@ -11677,6 +11699,28 @@ static void __init net_dev_struct_check(void)
*
*/
+/* We allocate 256 pages for each CPU if PAGE_SHIFT is 12 */
+#define SYSTEM_PERCPU_PAGE_POOL_SIZE ((1 << 20) / PAGE_SIZE)
+
+static int net_page_pool_create(int cpuid)
+{
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ struct page_pool_params page_pool_params = {
+ .pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE,
+ .flags = PP_FLAG_SYSTEM_POOL,
+ .nid = NUMA_NO_NODE,
+ };
+ struct page_pool *pp_ptr;
+
+ pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
+ if (IS_ERR(pp_ptr))
+ return -ENOMEM;
+
+ per_cpu(system_page_pool, cpuid) = pp_ptr;
+#endif
+ return 0;
+}
+
/*
* This is called single threaded during boot, so no need
* to take the rtnl semaphore.
@@ -11695,7 +11739,6 @@ static int __init net_dev_init(void)
if (netdev_kobject_init())
goto out;
- INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
@@ -11729,6 +11772,9 @@ static int __init net_dev_init(void)
init_gro_hash(&sd->backlog);
sd->backlog.poll = process_backlog;
sd->backlog.weight = weight_p;
+
+ if (net_page_pool_create(i))
+ goto out;
}
dev_boot_phase = 0;
@@ -11756,6 +11802,19 @@ static int __init net_dev_init(void)
WARN_ON(rc < 0);
rc = 0;
out:
+ if (rc < 0) {
+ for_each_possible_cpu(i) {
+ struct page_pool *pp_ptr;
+
+ pp_ptr = per_cpu(system_page_pool, i);
+ if (!pp_ptr)
+ continue;
+
+ page_pool_destroy(pp_ptr);
+ per_cpu(system_page_pool, i) = NULL;
+ }
+ }
+
return rc;
}
diff --git a/net/core/dev.h b/net/core/dev.h
index cf93e188785b..2bcaf8eee50c 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -3,6 +3,7 @@
#define _NET_CORE_DEV_H
#include <linux/types.h>
+#include <linux/rwsem.h>
struct net;
struct net_device;
@@ -37,15 +38,14 @@ int dev_addr_init(struct net_device *dev);
void dev_addr_check(struct net_device *dev);
/* sysctls not referred to from outside net/core/ */
-extern int netdev_budget;
-extern unsigned int netdev_budget_usecs;
extern unsigned int sysctl_skb_defer_max;
-extern int netdev_tstamp_prequeue;
extern int netdev_unregister_timeout_secs;
extern int weight_p;
extern int dev_weight_rx_bias;
extern int dev_weight_tx_bias;
+extern struct rw_semaphore dev_addr_sem;
+
/* rtnl helpers */
extern struct list_head net_todo_list;
void netdev_run_todo(void);
@@ -56,6 +56,7 @@ struct netdev_name_node {
struct list_head list;
struct net_device *dev;
const char *name;
+ struct rcu_head rcu;
};
int netdev_get_name(struct net *net, char *name, int ifindex);
@@ -63,6 +64,9 @@ int dev_change_name(struct net_device *dev, const char *newname);
#define netdev_for_each_altname(dev, namenode) \
list_for_each_entry((namenode), &(dev)->name_node->list, list)
+#define netdev_for_each_altname_safe(dev, namenode, next) \
+ list_for_each_entry_safe((namenode), (next), &(dev)->name_node->list, \
+ list)
int netdev_name_node_alt_create(struct net_device *dev, const char *name);
int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
diff --git a/net/core/dst.c b/net/core/dst.c
index 6838d3212c37..95f533844f17 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -96,7 +96,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
}
EXPORT_SYMBOL(dst_alloc);
-struct dst_entry *dst_destroy(struct dst_entry * dst)
+static void dst_destroy(struct dst_entry *dst)
{
struct dst_entry *child = NULL;
@@ -126,15 +126,13 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
dst = child;
if (dst)
dst_release_immediate(dst);
- return NULL;
}
-EXPORT_SYMBOL(dst_destroy);
static void dst_destroy_rcu(struct rcu_head *head)
{
struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
- dst = dst_destroy(dst);
+ dst_destroy(dst);
}
/* Operations to mark dst as DEAD and clean up the net device referenced
diff --git a/net/core/filter.c b/net/core/filter.c
index 24061f29c9dd..8adf95765cdd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -83,11 +83,12 @@
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netkit.h>
#include <linux/un.h>
+#include <net/xdp_sock_drv.h>
#include "dev.h"
static const struct bpf_func_proto *
-bpf_sk_base_func_proto(enum bpf_func_id func_id);
+bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
{
@@ -777,7 +778,7 @@ jmp_rest:
BPF_EMIT_JMP;
break;
- /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
+ /* ldxb 4 * ([14] & 0xf) is remapped into 6 insns. */
case BPF_LDX | BPF_MSH | BPF_B: {
struct sock_filter tmp = {
.code = BPF_LD | BPF_ABS | BPF_B,
@@ -803,7 +804,7 @@ jmp_rest:
*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
break;
}
- /* RET_K is remaped into 2 insns. RET_A case doesn't need an
+ /* RET_K is remapped into 2 insns. RET_A case doesn't need an
* extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
*/
case BPF_RET | BPF_A:
@@ -2967,7 +2968,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
*
* Then if B is non-zero AND there is no space allocate space and
* compact A, B regions into page. If there is space shift ring to
- * the rigth free'ing the next element in ring to place B, leaving
+ * the right free'ing the next element in ring to place B, leaving
* A untouched except to reduce length.
*/
if (start != offset) {
@@ -4092,10 +4093,46 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
skb_frag_size_add(frag, offset);
sinfo->xdp_frags_size += offset;
+ if (rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
+ xsk_buff_get_tail(xdp)->data_end += offset;
return 0;
}
+static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
+ struct xdp_mem_info *mem_info, bool release)
+{
+ struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
+
+ if (release) {
+ xsk_buff_del_tail(zc_frag);
+ __xdp_return(NULL, mem_info, false, zc_frag);
+ } else {
+ zc_frag->data_end -= shrink;
+ }
+}
+
+static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
+ int shrink)
+{
+ struct xdp_mem_info *mem_info = &xdp->rxq->mem;
+ bool release = skb_frag_size(frag) == shrink;
+
+ if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) {
+ bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release);
+ goto out;
+ }
+
+ if (release) {
+ struct page *page = skb_frag_page(frag);
+
+ __xdp_return(page_address(page), mem_info, false, NULL);
+ }
+
+out:
+ return release;
+}
+
static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
@@ -4110,12 +4147,7 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
len_free += shrink;
offset -= shrink;
-
- if (skb_frag_size(frag) == shrink) {
- struct page *page = skb_frag_page(frag);
-
- __xdp_return(page_address(page), &xdp->rxq->mem,
- false, NULL);
+ if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
n_frags_free++;
} else {
skb_frag_size_sub(frag, shrink);
@@ -5956,7 +5988,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
return -ENODEV;
idev = __in6_dev_get_safely(dev);
- if (unlikely(!idev || !idev->cnf.forwarding))
+ if (unlikely(!idev || !READ_ONCE(idev->cnf.forwarding)))
return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
@@ -7862,7 +7894,7 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -7955,7 +7987,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -7974,7 +8006,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8161,7 +8193,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8220,13 +8252,13 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
/* The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The
* kfuncs are defined in two different modules, and we want to be able
- * to use them interchangably with the same BTF type ID. Because modules
+ * to use them interchangeably with the same BTF type ID. Because modules
* can't de-duplicate BTF IDs between each other, we need the type to be
* referenced in the vmlinux BTF or the verifier will get confused about
* the different types. So we add this dummy type reference which will
@@ -8281,7 +8313,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_sock_proto;
#endif /* CONFIG_INET */
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8323,7 +8355,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_cgroup_classid_curr_proto;
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8367,7 +8399,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skc_lookup_tcp_proto;
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8378,7 +8410,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_flow_dissector_load_bytes_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8405,7 +8437,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8580,7 +8612,7 @@ static bool cg_skb_is_valid_access(int off, int size,
return false;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_end):
- if (!bpf_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
break;
}
@@ -8592,7 +8624,7 @@ static bool cg_skb_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
case bpf_ctx_range(struct __sk_buff, tstamp):
- if (!bpf_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
break;
default:
@@ -11236,7 +11268,7 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -11418,7 +11450,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -11752,7 +11784,7 @@ const struct bpf_func_proto bpf_sock_from_file_proto = {
};
static const struct bpf_func_proto *
-bpf_sk_base_func_proto(enum bpf_func_id func_id)
+bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
const struct bpf_func_proto *func;
@@ -11781,10 +11813,10 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
- if (!perfmon_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
return NULL;
return func;
@@ -11837,6 +11869,103 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
return 0;
}
+
+__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk,
+ struct bpf_tcp_req_attrs *attrs, int attrs__sz)
+{
+#if IS_ENABLED(CONFIG_SYN_COOKIES)
+ const struct request_sock_ops *ops;
+ struct inet_request_sock *ireq;
+ struct tcp_request_sock *treq;
+ struct request_sock *req;
+ struct net *net;
+ __u16 min_mss;
+ u32 tsoff = 0;
+
+ if (attrs__sz != sizeof(*attrs) ||
+ attrs->reserved[0] || attrs->reserved[1] || attrs->reserved[2])
+ return -EINVAL;
+
+ if (!skb_at_tc_ingress(skb))
+ return -EINVAL;
+
+ net = dev_net(skb->dev);
+ if (net != sock_net(sk))
+ return -ENETUNREACH;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ops = &tcp_request_sock_ops;
+ min_mss = 536;
+ break;
+#if IS_BUILTIN(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ ops = &tcp6_request_sock_ops;
+ min_mss = IPV6_MIN_MTU - 60;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ if (sk->sk_type != SOCK_STREAM || sk->sk_state != TCP_LISTEN ||
+ sk_is_mptcp(sk))
+ return -EINVAL;
+
+ if (attrs->mss < min_mss)
+ return -EINVAL;
+
+ if (attrs->wscale_ok) {
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_window_scaling))
+ return -EINVAL;
+
+ if (attrs->snd_wscale > TCP_MAX_WSCALE ||
+ attrs->rcv_wscale > TCP_MAX_WSCALE)
+ return -EINVAL;
+ }
+
+ if (attrs->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
+ return -EINVAL;
+
+ if (attrs->tstamp_ok) {
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
+ return -EINVAL;
+
+ tsoff = attrs->rcv_tsecr - tcp_ns_to_ts(attrs->usec_ts_ok, tcp_clock_ns());
+ }
+
+ req = inet_reqsk_alloc(ops, sk, false);
+ if (!req)
+ return -ENOMEM;
+
+ ireq = inet_rsk(req);
+ treq = tcp_rsk(req);
+
+ req->rsk_listener = sk;
+ req->syncookie = 1;
+ req->mss = attrs->mss;
+ req->ts_recent = attrs->rcv_tsval;
+
+ ireq->snd_wscale = attrs->snd_wscale;
+ ireq->rcv_wscale = attrs->rcv_wscale;
+ ireq->tstamp_ok = !!attrs->tstamp_ok;
+ ireq->sack_ok = !!attrs->sack_ok;
+ ireq->wscale_ok = !!attrs->wscale_ok;
+ ireq->ecn_ok = !!attrs->ecn_ok;
+
+ treq->req_usec_ts = !!attrs->usec_ts_ok;
+ treq->ts_off = tsoff;
+
+ skb_orphan(skb);
+ skb->sk = req_to_sk(req);
+ skb->destructor = sock_pfree;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
__bpf_kfunc_end_defs();
int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
@@ -11853,17 +11982,21 @@ int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
return 0;
}
-BTF_SET8_START(bpf_kfunc_check_set_skb)
+BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
-BTF_SET8_END(bpf_kfunc_check_set_skb)
+BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
-BTF_SET8_START(bpf_kfunc_check_set_xdp)
+BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
-BTF_SET8_END(bpf_kfunc_check_set_xdp)
+BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
-BTF_SET8_START(bpf_kfunc_check_set_sock_addr)
+BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)
BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
-BTF_SET8_END(bpf_kfunc_check_set_sock_addr)
+BTF_KFUNCS_END(bpf_kfunc_check_set_sock_addr)
+
+BTF_KFUNCS_START(bpf_kfunc_check_set_tcp_reqsk)
+BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(bpf_kfunc_check_set_tcp_reqsk)
static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
.owner = THIS_MODULE,
@@ -11880,6 +12013,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = {
.set = &bpf_kfunc_check_set_sock_addr,
};
+static const struct btf_kfunc_id_set bpf_kfunc_set_tcp_reqsk = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_tcp_reqsk,
+};
+
static int __init bpf_kfunc_init(void)
{
int ret;
@@ -11895,8 +12033,9 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
- return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- &bpf_kfunc_set_sock_addr);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ &bpf_kfunc_set_sock_addr);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk);
}
late_initcall(bpf_kfunc_init);
@@ -11936,9 +12075,9 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
__bpf_kfunc_end_defs();
-BTF_SET8_START(bpf_sk_iter_kfunc_ids)
+BTF_KFUNCS_START(bpf_sk_iter_kfunc_ids)
BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
-BTF_SET8_END(bpf_sk_iter_kfunc_ids)
+BTF_KFUNCS_END(bpf_sk_iter_kfunc_ids)
static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
diff --git a/net/core/gro.c b/net/core/gro.c
index 0759277dc14e..ee30d4f0c038 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -10,9 +10,6 @@
#define GRO_MAX_HEAD (MAX_HEADER + 128)
static DEFINE_SPINLOCK(offload_lock);
-struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base);
-/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
-int gro_normal_batch __read_mostly = 8;
/**
* dev_add_offload - register offload handlers
@@ -31,7 +28,7 @@ void dev_add_offload(struct packet_offload *po)
struct packet_offload *elem;
spin_lock(&offload_lock);
- list_for_each_entry(elem, &offload_base, list) {
+ list_for_each_entry(elem, &net_hotdata.offload_base, list) {
if (po->priority < elem->priority)
break;
}
@@ -55,7 +52,7 @@ EXPORT_SYMBOL(dev_add_offload);
*/
static void __dev_remove_offload(struct packet_offload *po)
{
- struct list_head *head = &offload_base;
+ struct list_head *head = &net_hotdata.offload_base;
struct packet_offload *po1;
spin_lock(&offload_lock);
@@ -235,9 +232,9 @@ done:
static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
+ struct list_head *head = &net_hotdata.offload_base;
struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &offload_base;
int err = -ENOENT;
BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
@@ -369,15 +366,21 @@ static void gro_list_prepare(const struct list_head *head,
static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
{
- const struct skb_shared_info *pinfo = skb_shinfo(skb);
- const skb_frag_t *frag0 = &pinfo->frags[0];
+ const struct skb_shared_info *pinfo;
+ const skb_frag_t *frag0;
+ unsigned int headlen;
NAPI_GRO_CB(skb)->data_offset = 0;
- NAPI_GRO_CB(skb)->frag0 = NULL;
- NAPI_GRO_CB(skb)->frag0_len = 0;
+ headlen = skb_headlen(skb);
+ NAPI_GRO_CB(skb)->frag0 = skb->data;
+ NAPI_GRO_CB(skb)->frag0_len = headlen;
+ if (headlen)
+ return;
+
+ pinfo = skb_shinfo(skb);
+ frag0 = &pinfo->frags[0];
- if (!skb_headlen(skb) && pinfo->nr_frags &&
- !PageHighMem(skb_frag_page(frag0)) &&
+ if (pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0)) &&
(!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
@@ -438,7 +441,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
{
u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
struct gro_list *gro_list = &napi->gro_hash[bucket];
- struct list_head *head = &offload_base;
+ struct list_head *head = &net_hotdata.offload_base;
struct packet_offload *ptype;
__be16 type = skb->protocol;
struct sk_buff *pp = NULL;
@@ -544,7 +547,7 @@ normal:
struct packet_offload *gro_find_receive_by_type(__be16 type)
{
- struct list_head *offload_head = &offload_base;
+ struct list_head *offload_head = &net_hotdata.offload_base;
struct packet_offload *ptype;
list_for_each_entry_rcu(ptype, offload_head, list) {
@@ -558,7 +561,7 @@ EXPORT_SYMBOL(gro_find_receive_by_type);
struct packet_offload *gro_find_complete_by_type(__be16 type)
{
- struct list_head *offload_head = &offload_base;
+ struct list_head *offload_head = &net_hotdata.offload_base;
struct packet_offload *ptype;
list_for_each_entry_rcu(ptype, offload_head, list) {
@@ -700,7 +703,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
skb_reset_mac_header(skb);
skb_gro_reset_offset(skb, hlen);
- if (unlikely(skb_gro_header_hard(skb, hlen))) {
+ if (unlikely(!skb_gro_may_pull(skb, hlen))) {
eth = skb_gro_header_slow(skb, hlen, 0);
if (unlikely(!eth)) {
net_warn_ratelimited("%s: dropping impossible skb from %s\n",
@@ -710,7 +713,10 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
}
} else {
eth = (const struct ethhdr *)skb->data;
- gro_pull_from_frag0(skb, hlen);
+
+ if (NAPI_GRO_CB(skb)->frag0 != skb->data)
+ gro_pull_from_frag0(skb, hlen);
+
NAPI_GRO_CB(skb)->frag0 += hlen;
NAPI_GRO_CB(skb)->frag0_len -= hlen;
}
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index ed5ec5de47f6..ff8e5b64bf6b 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <net/gro_cells.h>
+#include <net/hotdata.h>
struct gro_cell {
struct sk_buff_head napi_skbs;
@@ -26,7 +27,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
cell = this_cpu_ptr(gcells->cells);
- if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) {
+ if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(net_hotdata.max_backlog)) {
drop:
dev_core_stats_rx_dropped_inc(dev);
kfree_skb(skb);
diff --git a/net/core/gso.c b/net/core/gso.c
index 9e1803bfc9c6..bcd156372f4d 100644
--- a/net/core/gso.c
+++ b/net/core/gso.c
@@ -17,7 +17,7 @@ struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
struct packet_offload *ptype;
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &offload_base, list) {
+ list_for_each_entry_rcu(ptype, &net_hotdata.offload_base, list) {
if (ptype->type == type && ptype->callbacks.gso_segment) {
segs = ptype->callbacks.gso_segment(skb, features);
break;
@@ -48,7 +48,7 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
__skb_pull(skb, vlan_depth);
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &offload_base, list) {
+ list_for_each_entry_rcu(ptype, &net_hotdata.offload_base, list) {
if (ptype->type == type && ptype->callbacks.gso_segment) {
segs = ptype->callbacks.gso_segment(skb, features);
break;
diff --git a/net/core/gso_test.c b/net/core/gso_test.c
index 4c2e77bd12f4..358c44680d91 100644
--- a/net/core/gso_test.c
+++ b/net/core/gso_test.c
@@ -225,7 +225,7 @@ static void gso_test_func(struct kunit *test)
segs = skb_segment(skb, features);
if (IS_ERR(segs)) {
- KUNIT_FAIL(test, "segs error %lld", PTR_ERR(segs));
+ KUNIT_FAIL(test, "segs error %pe", segs);
goto free_gso_skb;
} else if (!segs) {
KUNIT_FAIL(test, "no segments");
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
new file mode 100644
index 000000000000..c8a7a451c18a
--- /dev/null
+++ b/net/core/hotdata.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <net/hotdata.h>
+#include <linux/cache.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+
+
+struct net_hotdata net_hotdata __cacheline_aligned = {
+ .offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
+ .ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all),
+ .gro_normal_batch = 8,
+
+ .netdev_budget = 300,
+ /* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
+ .netdev_budget_usecs = 2 * USEC_PER_SEC / HZ,
+
+ .tstamp_prequeue = 1,
+ .max_backlog = 1000,
+ .dev_tx_weight = 64,
+ .dev_rx_weight = 64,
+};
+EXPORT_SYMBOL(net_hotdata);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 429571c258da..8ec35194bfcb 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -33,7 +33,7 @@ static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
static LIST_HEAD(lweventlist);
static DEFINE_SPINLOCK(lweventlist_lock);
-static unsigned char default_operstate(const struct net_device *dev)
+static unsigned int default_operstate(const struct net_device *dev)
{
if (netif_testing(dev))
return IF_OPER_TESTING;
@@ -62,16 +62,13 @@ static unsigned char default_operstate(const struct net_device *dev)
return IF_OPER_UP;
}
-
static void rfc2863_policy(struct net_device *dev)
{
- unsigned char operstate = default_operstate(dev);
+ unsigned int operstate = default_operstate(dev);
- if (operstate == dev->operstate)
+ if (operstate == READ_ONCE(dev->operstate))
return;
- write_lock(&dev_base_lock);
-
switch(dev->link_mode) {
case IF_LINK_MODE_TESTING:
if (operstate == IF_OPER_UP)
@@ -87,9 +84,7 @@ static void rfc2863_policy(struct net_device *dev)
break;
}
- dev->operstate = operstate;
-
- write_unlock(&dev_base_lock);
+ WRITE_ONCE(dev->operstate, operstate);
}
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 09f7ed1a04e8..a97eceb84e61 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -3,52 +3,22 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/wext.h>
+#include <net/hotdata.h>
#include "dev.h"
-#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
-
-#define get_bucket(x) ((x) >> BUCKET_SPACE)
-#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
-#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
-
-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
+static void *dev_seq_from_index(struct seq_file *seq, loff_t *pos)
{
- struct net *net = seq_file_net(seq);
+ unsigned long ifindex = *pos;
struct net_device *dev;
- struct hlist_head *h;
- unsigned int count = 0, offset = get_offset(*pos);
- h = &net->dev_index_head[get_bucket(*pos)];
- hlist_for_each_entry_rcu(dev, h, index_hlist) {
- if (++count == offset)
- return dev;
+ for_each_netdev_dump(seq_file_net(seq), dev, ifindex) {
+ *pos = dev->ifindex;
+ return dev;
}
-
- return NULL;
-}
-
-static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
-{
- struct net_device *dev;
- unsigned int bucket;
-
- do {
- dev = dev_from_same_bucket(seq, pos);
- if (dev)
- return dev;
-
- bucket = get_bucket(*pos) + 1;
- *pos = set_bucket_offset(bucket, 1);
- } while (bucket < NETDEV_HASHENTRIES);
-
return NULL;
}
-/*
- * This is invoked by the /proc filesystem handler to display a device
- * in detail.
- */
static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
@@ -56,16 +26,13 @@ static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
if (!*pos)
return SEQ_START_TOKEN;
- if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
- return NULL;
-
- return dev_from_bucket(seq, pos);
+ return dev_seq_from_index(seq, pos);
}
static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
- return dev_from_bucket(seq, pos);
+ return dev_seq_from_index(seq, pos);
}
static void dev_seq_stop(struct seq_file *seq, void *v)
@@ -217,7 +184,7 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
}
}
- list_for_each_entry_rcu(pt, &ptype_all, list) {
+ list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) {
if (i == pos)
return pt;
++i;
@@ -265,13 +232,13 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
}
- nxt = ptype_all.next;
+ nxt = net_hotdata.ptype_all.next;
goto ptype_all;
}
if (pt->type == htons(ETH_P_ALL)) {
ptype_all:
- if (nxt != &ptype_all)
+ if (nxt != &net_hotdata.ptype_all)
goto found;
hash = 0;
nxt = ptype_base[0].next;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a09d507c5b03..e3d7a8cfa20b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -24,6 +24,7 @@
#include <linux/of_net.h>
#include <linux/cpu.h>
#include <net/netdev_rx_queue.h>
+#include <net/rps.h>
#include "dev.h"
#include "net-sysfs.h"
@@ -34,10 +35,10 @@ static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
-/* Caller holds RTNL or dev_base_lock */
+/* Caller holds RTNL or RCU */
static inline int dev_isalive(const struct net_device *dev)
{
- return dev->reg_state <= NETREG_REGISTERED;
+ return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
}
/* use same locking rules as GIF* ioctl's */
@@ -48,10 +49,10 @@ static ssize_t netdev_show(const struct device *dev,
struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- read_lock(&dev_base_lock);
+ rcu_read_lock();
if (dev_isalive(ndev))
ret = (*format)(ndev, buf);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return ret;
}
@@ -60,7 +61,7 @@ static ssize_t netdev_show(const struct device *dev,
#define NETDEVICE_SHOW(field, format_string) \
static ssize_t format_##field(const struct net_device *dev, char *buf) \
{ \
- return sysfs_emit(buf, format_string, dev->field); \
+ return sysfs_emit(buf, format_string, READ_ONCE(dev->field)); \
} \
static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -125,7 +126,7 @@ static DEVICE_ATTR_RO(iflink);
static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
{
- return sysfs_emit(buf, fmt_dec, dev->name_assign_type);
+ return sysfs_emit(buf, fmt_dec, READ_ONCE(dev->name_assign_type));
}
static ssize_t name_assign_type_show(struct device *dev,
@@ -135,24 +136,28 @@ static ssize_t name_assign_type_show(struct device *dev,
struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- if (ndev->name_assign_type != NET_NAME_UNKNOWN)
+ if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN)
ret = netdev_show(dev, attr, buf, format_name_assign_type);
return ret;
}
static DEVICE_ATTR_RO(name_assign_type);
-/* use same locking rules as GIFHWADDR ioctl's */
+/* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */
static ssize_t address_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- read_lock(&dev_base_lock);
+ down_read(&dev_addr_sem);
+
+ rcu_read_lock();
if (dev_isalive(ndev))
ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
+
+ up_read(&dev_addr_sem);
return ret;
}
static DEVICE_ATTR_RO(address);
@@ -161,10 +166,13 @@ static ssize_t broadcast_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *ndev = to_net_dev(dev);
+ int ret = -EINVAL;
+ rcu_read_lock();
if (dev_isalive(ndev))
- return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
- return -EINVAL;
+ ret = sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
+ rcu_read_unlock();
+ return ret;
}
static DEVICE_ATTR_RO(broadcast);
@@ -318,11 +326,9 @@ static ssize_t operstate_show(struct device *dev,
const struct net_device *netdev = to_net_dev(dev);
unsigned char operstate;
- read_lock(&dev_base_lock);
- operstate = netdev->operstate;
+ operstate = READ_ONCE(netdev->operstate);
if (!netif_running(netdev))
operstate = IF_OPER_DOWN;
- read_unlock(&dev_base_lock);
if (operstate >= ARRAY_SIZE(operstates))
return -EINVAL; /* should not happen */
@@ -680,14 +686,14 @@ static ssize_t netstat_show(const struct device *d,
WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
offset % sizeof(u64) != 0);
- read_lock(&dev_base_lock);
+ rcu_read_lock();
if (dev_isalive(dev)) {
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return ret;
}
@@ -1409,6 +1415,65 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
= __ATTR(hold_time, 0644,
bql_show_hold_time, bql_set_hold_time);
+static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
+{
+ struct dql *dql = &queue->dql;
+
+ return sprintf(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
+}
+
+static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
+ const char *buf, size_t len)
+{
+ struct dql *dql = &queue->dql;
+ unsigned int value;
+ int err;
+
+ err = kstrtouint(buf, 10, &value);
+ if (err < 0)
+ return err;
+
+ value = msecs_to_jiffies(value);
+ if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG))
+ return -ERANGE;
+
+ if (!dql->stall_thrs && value)
+ dql->last_reap = jiffies;
+ /* Force last_reap to be live */
+ smp_wmb();
+ dql->stall_thrs = value;
+
+ return len;
+}
+
+static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
+ __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
+
+static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
+{
+ return sprintf(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
+}
+
+static ssize_t bql_set_stall_max(struct netdev_queue *queue,
+ const char *buf, size_t len)
+{
+ WRITE_ONCE(queue->dql.stall_max, 0);
+ return len;
+}
+
+static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
+ __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
+
+static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
+{
+ struct dql *dql = &queue->dql;
+
+ return sprintf(buf, "%lu\n", dql->stall_cnt);
+}
+
+static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
+ __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
+
static ssize_t bql_show_inflight(struct netdev_queue *queue,
char *buf)
{
@@ -1447,6 +1512,9 @@ static struct attribute *dql_attrs[] __ro_after_init = {
&bql_limit_min_attribute.attr,
&bql_hold_time_attribute.attr,
&bql_inflight_attribute.attr,
+ &bql_stall_thrs_attribute.attr,
+ &bql_stall_cnt_attribute.attr,
+ &bql_stall_max_attribute.attr,
NULL
};
@@ -1454,6 +1522,9 @@ static const struct attribute_group dql_group = {
.name = "byte_queue_limits",
.attrs = dql_attrs,
};
+#else
+/* Fake declaration, all the code using it should be dead */
+extern const struct attribute_group dql_group;
#endif /* CONFIG_BQL */
#ifdef CONFIG_XPS
@@ -1691,6 +1762,15 @@ static const struct kobj_type netdev_queue_ktype = {
.get_ownership = netdev_queue_get_ownership,
};
+static bool netdev_uses_bql(const struct net_device *dev)
+{
+ if (dev->features & NETIF_F_LLTX ||
+ dev->priv_flags & IFF_NO_QUEUE)
+ return false;
+
+ return IS_ENABLED(CONFIG_BQL);
+}
+
static int netdev_queue_add_kobject(struct net_device *dev, int index)
{
struct netdev_queue *queue = dev->_tx + index;
@@ -1708,11 +1788,11 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
if (error)
goto err;
-#ifdef CONFIG_BQL
- error = sysfs_create_group(kobj, &dql_group);
- if (error)
- goto err;
-#endif
+ if (netdev_uses_bql(dev)) {
+ error = sysfs_create_group(kobj, &dql_group);
+ if (error)
+ goto err;
+ }
kobject_uevent(kobj, KOBJ_ADD);
return 0;
@@ -1733,9 +1813,9 @@ static int tx_queue_change_owner(struct net_device *ndev, int index,
if (error)
return error;
-#ifdef CONFIG_BQL
- error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid);
-#endif
+ if (netdev_uses_bql(ndev))
+ error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid);
+
return error;
}
#endif /* CONFIG_SYSFS */
@@ -1767,9 +1847,10 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
if (!refcount_read(&dev_net(dev)->ns.count))
queue->kobj.uevent_suppress = 1;
-#ifdef CONFIG_BQL
- sysfs_remove_group(&queue->kobj, &dql_group);
-#endif
+
+ if (netdev_uses_bql(dev))
+ sysfs_remove_group(&queue->kobj, &dql_group);
+
kobject_put(&queue->kobj);
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 72799533426b..f0540c557515 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -318,8 +318,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
/* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
- int error = 0;
LIST_HEAD(net_exit_list);
+ LIST_HEAD(dev_kill_list);
+ int error = 0;
refcount_set(&net->ns.count, 1);
ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
@@ -358,6 +359,15 @@ out_undo:
synchronize_rcu();
ops = saved_ops;
+ rtnl_lock();
+ list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
+ if (ops->exit_batch_rtnl)
+ ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
+ }
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
+
+ ops = saved_ops;
list_for_each_entry_continue_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list);
@@ -573,6 +583,7 @@ static void cleanup_net(struct work_struct *work)
struct net *net, *tmp, *last;
struct llist_node *net_kill_list;
LIST_HEAD(net_exit_list);
+ LIST_HEAD(dev_kill_list);
/* Atomically snapshot the list of namespaces to cleanup */
net_kill_list = llist_del_all(&cleanup_list);
@@ -611,7 +622,15 @@ static void cleanup_net(struct work_struct *work)
* the rcu_barrier() below isn't sufficient alone.
* Also the pre_exit() and exit() methods need this barrier.
*/
- synchronize_rcu();
+ synchronize_rcu_expedited();
+
+ rtnl_lock();
+ list_for_each_entry_reverse(ops, &pernet_list, list) {
+ if (ops->exit_batch_rtnl)
+ ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
+ }
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
/* Run all of the network namespace exit methods */
list_for_each_entry_reverse(ops, &pernet_list, list)
@@ -1193,7 +1212,17 @@ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_
{
ops_pre_exit_list(ops, net_exit_list);
synchronize_rcu();
+
+ if (ops->exit_batch_rtnl) {
+ LIST_HEAD(dev_kill_list);
+
+ rtnl_lock();
+ ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
+ }
ops_exit_list(ops, net_exit_list);
+
ops_free_list(ops, net_exit_list);
}
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index be7f2ebd61b2..8d8ace9ef87f 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -68,6 +68,11 @@ static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFIN
[NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
};
+/* NETDEV_CMD_QSTATS_GET - dump */
+static const struct nla_policy netdev_qstats_get_nl_policy[NETDEV_A_QSTATS_SCOPE + 1] = {
+ [NETDEV_A_QSTATS_SCOPE] = NLA_POLICY_MASK(NLA_UINT, 0x1),
+};
+
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@@ -138,6 +143,13 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.maxattr = NETDEV_A_NAPI_IFINDEX,
.flags = GENL_CMD_CAP_DUMP,
},
+ {
+ .cmd = NETDEV_CMD_QSTATS_GET,
+ .dumpit = netdev_nl_qstats_get_dumpit,
+ .policy = netdev_qstats_get_nl_policy,
+ .maxattr = NETDEV_A_QSTATS_SCOPE,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index a47f2bcbe4fa..4db40fd5b4a9 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -28,6 +28,8 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
enum {
NETDEV_NLGRP_MGMT,
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index fd98936da3ae..7004b3399c2b 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -8,6 +8,7 @@
#include <net/xdp.h>
#include <net/xdp_sock.h>
#include <net/netdev_rx_queue.h>
+#include <net/netdev_queues.h>
#include <net/busy_poll.h>
#include "netdev-genl-gen.h"
@@ -152,10 +153,7 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
}
rtnl_unlock();
- if (err != -EMSGSIZE)
- return err;
-
- return skb->len;
+ return err;
}
static int
@@ -287,10 +285,7 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
}
rtnl_unlock();
- if (err != -EMSGSIZE)
- return err;
-
- return skb->len;
+ return err;
}
static int
@@ -463,10 +458,220 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
}
rtnl_unlock();
- if (err != -EMSGSIZE)
- return err;
+ return err;
+}
+
+#define NETDEV_STAT_NOT_SET (~0ULL)
+
+static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size)
+{
+ const u64 *add = _add;
+ u64 *sum = _sum;
+
+ while (size) {
+ if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET)
+ *sum += *add;
+ sum++;
+ add++;
+ size -= 8;
+ }
+}
+
+static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value)
+{
+ if (value == NETDEV_STAT_NOT_SET)
+ return 0;
+ return nla_put_uint(rsp, attr_id, value);
+}
+
+static int
+netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
+{
+ if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int
+netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
+{
+ if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int
+netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp,
+ u32 q_type, int i, const struct genl_info *info)
+{
+ const struct netdev_stat_ops *ops = netdev->stat_ops;
+ struct netdev_queue_stats_rx rx;
+ struct netdev_queue_stats_tx tx;
+ void *hdr;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+ if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) ||
+ nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) ||
+ nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i))
+ goto nla_put_failure;
+
+ switch (q_type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ memset(&rx, 0xff, sizeof(rx));
+ ops->get_queue_stats_rx(netdev, i, &rx);
+ if (!memchr_inv(&rx, 0xff, sizeof(rx)))
+ goto nla_cancel;
+ if (netdev_nl_stats_write_rx(rsp, &rx))
+ goto nla_put_failure;
+ break;
+ case NETDEV_QUEUE_TYPE_TX:
+ memset(&tx, 0xff, sizeof(tx));
+ ops->get_queue_stats_tx(netdev, i, &tx);
+ if (!memchr_inv(&tx, 0xff, sizeof(tx)))
+ goto nla_cancel;
+ if (netdev_nl_stats_write_tx(rsp, &tx))
+ goto nla_put_failure;
+ break;
+ }
+
+ genlmsg_end(rsp, hdr);
+ return 0;
+
+nla_cancel:
+ genlmsg_cancel(rsp, hdr);
+ return 0;
+nla_put_failure:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+static int
+netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
+ const struct genl_info *info,
+ struct netdev_nl_dump_ctx *ctx)
+{
+ const struct netdev_stat_ops *ops = netdev->stat_ops;
+ int i, err;
+
+ if (!(netdev->flags & IFF_UP))
+ return 0;
+
+ i = ctx->rxq_idx;
+ while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) {
+ err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX,
+ i, info);
+ if (err)
+ return err;
+ ctx->rxq_idx = i++;
+ }
+ i = ctx->txq_idx;
+ while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) {
+ err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX,
+ i, info);
+ if (err)
+ return err;
+ ctx->txq_idx = i++;
+ }
+
+ ctx->rxq_idx = 0;
+ ctx->txq_idx = 0;
+ return 0;
+}
+
+static int
+netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp,
+ const struct genl_info *info)
+{
+ struct netdev_queue_stats_rx rx_sum, rx;
+ struct netdev_queue_stats_tx tx_sum, tx;
+ const struct netdev_stat_ops *ops;
+ void *hdr;
+ int i;
+
+ ops = netdev->stat_ops;
+ /* Netdev can't guarantee any complete counters */
+ if (!ops->get_base_stats)
+ return 0;
+
+ memset(&rx_sum, 0xff, sizeof(rx_sum));
+ memset(&tx_sum, 0xff, sizeof(tx_sum));
+
+ ops->get_base_stats(netdev, &rx_sum, &tx_sum);
+
+ /* The op was there, but nothing reported, don't bother */
+ if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) &&
+ !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum)))
+ return 0;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+ if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex))
+ goto nla_put_failure;
+
+ for (i = 0; i < netdev->real_num_rx_queues; i++) {
+ memset(&rx, 0xff, sizeof(rx));
+ if (ops->get_queue_stats_rx)
+ ops->get_queue_stats_rx(netdev, i, &rx);
+ netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx));
+ }
+ for (i = 0; i < netdev->real_num_tx_queues; i++) {
+ memset(&tx, 0xff, sizeof(tx));
+ if (ops->get_queue_stats_tx)
+ ops->get_queue_stats_tx(netdev, i, &tx);
+ netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx));
+ }
+
+ if (netdev_nl_stats_write_rx(rsp, &rx_sum) ||
+ netdev_nl_stats_write_tx(rsp, &tx_sum))
+ goto nla_put_failure;
+
+ genlmsg_end(rsp, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ unsigned int scope;
+ int err = 0;
+
+ scope = 0;
+ if (info->attrs[NETDEV_A_QSTATS_SCOPE])
+ scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]);
+
+ rtnl_lock();
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ if (!netdev->stat_ops)
+ continue;
+
+ switch (scope) {
+ case 0:
+ err = netdev_nl_stats_by_netdev(netdev, skb, info);
+ break;
+ case NETDEV_QSTATS_SCOPE_QUEUE:
+ err = netdev_nl_stats_by_queue(netdev, skb, info, ctx);
+ break;
+ }
+ if (err < 0)
+ break;
+ }
+ rtnl_unlock();
- return skb->len;
+ return err;
}
static int netdev_genl_netdevice_event(struct notifier_block *nb,
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 4933762e5a6b..dd364d738c00 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -31,6 +31,8 @@
#define BIAS_MAX (LONG_MAX >> 1)
#ifdef CONFIG_PAGE_POOL_STATS
+static DEFINE_PER_CPU(struct page_pool_recycle_stats, pp_system_recycle_stats);
+
/* alloc_stat_inc is intended to be used in softirq context */
#define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++)
/* recycle_stat_inc is safe to use when preemption is possible. */
@@ -171,13 +173,16 @@ static void page_pool_producer_unlock(struct page_pool *pool,
}
static int page_pool_init(struct page_pool *pool,
- const struct page_pool_params *params)
+ const struct page_pool_params *params,
+ int cpuid)
{
unsigned int ring_qsize = 1024; /* Default */
memcpy(&pool->p, &params->fast, sizeof(pool->p));
memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
+ pool->cpuid = cpuid;
+
/* Validate only known flags were used */
if (pool->p.flags & ~(PP_FLAG_ALL))
return -EINVAL;
@@ -217,14 +222,23 @@ static int page_pool_init(struct page_pool *pool,
pool->has_init_callback = !!pool->slow.init_callback;
#ifdef CONFIG_PAGE_POOL_STATS
- pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
- if (!pool->recycle_stats)
- return -ENOMEM;
+ if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) {
+ pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
+ if (!pool->recycle_stats)
+ return -ENOMEM;
+ } else {
+ /* For system page pool instance we use a singular stats object
+ * instead of allocating a separate percpu variable for each
+ * (also percpu) page pool instance.
+ */
+ pool->recycle_stats = &pp_system_recycle_stats;
+ }
#endif
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
#ifdef CONFIG_PAGE_POOL_STATS
- free_percpu(pool->recycle_stats);
+ if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL))
+ free_percpu(pool->recycle_stats);
#endif
return -ENOMEM;
}
@@ -248,15 +262,18 @@ static void page_pool_uninit(struct page_pool *pool)
put_device(pool->p.dev);
#ifdef CONFIG_PAGE_POOL_STATS
- free_percpu(pool->recycle_stats);
+ if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL))
+ free_percpu(pool->recycle_stats);
#endif
}
/**
- * page_pool_create() - create a page pool.
+ * page_pool_create_percpu() - create a page pool for a given cpu.
* @params: parameters, see struct page_pool_params
+ * @cpuid: cpu identifier
*/
-struct page_pool *page_pool_create(const struct page_pool_params *params)
+struct page_pool *
+page_pool_create_percpu(const struct page_pool_params *params, int cpuid)
{
struct page_pool *pool;
int err;
@@ -265,7 +282,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
if (!pool)
return ERR_PTR(-ENOMEM);
- err = page_pool_init(pool, params);
+ err = page_pool_init(pool, params, cpuid);
if (err < 0)
goto err_free;
@@ -282,6 +299,16 @@ err_free:
kfree(pool);
return ERR_PTR(err);
}
+EXPORT_SYMBOL(page_pool_create_percpu);
+
+/**
+ * page_pool_create() - create a page pool
+ * @params: parameters, see struct page_pool_params
+ */
+struct page_pool *page_pool_create(const struct page_pool_params *params)
+{
+ return page_pool_create_percpu(params, -1);
+}
EXPORT_SYMBOL(page_pool_create);
static void page_pool_return_page(struct page_pool *pool, struct page *page);
@@ -630,6 +657,11 @@ static bool page_pool_recycle_in_cache(struct page *page,
return true;
}
+static bool __page_pool_page_can_be_recycled(const struct page *page)
+{
+ return page_ref_count(page) == 1 && !page_is_pfmemalloc(page);
+}
+
/* If the page refcnt == 1, this will try to recycle the page.
* if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
* the configured size min(dma_sync_size, pool->max_len).
@@ -651,7 +683,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* page is NOT reusable when allocated when system is under
* some pressure. (page_is_pfmemalloc)
*/
- if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) {
+ if (likely(__page_pool_page_can_be_recycled(page))) {
/* Read barrier done in page_ref_count / READ_ONCE */
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
@@ -766,7 +798,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
if (likely(page_pool_unref_page(page, drain_count)))
return NULL;
- if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
+ if (__page_pool_page_can_be_recycled(page)) {
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
page_pool_dma_sync_for_device(pool, page, -1);
@@ -934,8 +966,13 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
pool->xdp_mem_id = mem->id;
}
-void page_pool_unlink_napi(struct page_pool *pool)
+static void page_pool_disable_direct_recycling(struct page_pool *pool)
{
+ /* Disable direct recycling based on pool->cpuid.
+ * Paired with READ_ONCE() in napi_pp_put_page().
+ */
+ WRITE_ONCE(pool->cpuid, -1);
+
if (!pool->p.napi)
return;
@@ -947,7 +984,6 @@ void page_pool_unlink_napi(struct page_pool *pool)
WRITE_ONCE(pool->p.napi, NULL);
}
-EXPORT_SYMBOL(page_pool_unlink_napi);
void page_pool_destroy(struct page_pool *pool)
{
@@ -957,7 +993,7 @@ void page_pool_destroy(struct page_pool *pool)
if (!page_pool_put(pool))
return;
- page_pool_unlink_napi(pool);
+ page_pool_disable_direct_recycling(pool);
page_pool_free_frag(pool);
if (!page_pool_release(pool))
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index ffe5244e5597..3a3277ba167b 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -94,16 +94,15 @@ netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb,
state->pp_id = pool->user.id;
err = fill(skb, pool, info);
if (err)
- break;
+ goto out;
}
state->pp_id = 0;
}
+out:
mutex_unlock(&page_pools_lock);
rtnl_unlock();
- if (skb->len && err == -EMSGSIZE)
- return skb->len;
return err;
}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index f35c2e998406..63de5c635842 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -33,9 +33,6 @@
void reqsk_queue_alloc(struct request_sock_queue *queue)
{
- spin_lock_init(&queue->rskq_lock);
-
- spin_lock_init(&queue->fastopenq.lock);
queue->fastopenq.rskq_rst_head = NULL;
queue->fastopenq.rskq_rst_tail = NULL;
queue->fastopenq.qlen = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f6f29eb03ec2..a3d7847ce69d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -483,24 +483,15 @@ EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
*/
static void rtnl_lock_unregistering_all(void)
{
- struct net *net;
- bool unregistering;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(&netdev_unregistering_wq, &wait);
for (;;) {
- unregistering = false;
rtnl_lock();
/* We held write locked pernet_ops_rwsem, and parallel
* setup_net() and cleanup_net() are not possible.
*/
- for_each_net(net) {
- if (atomic_read(&net->dev_unreg_count) > 0) {
- unregistering = true;
- break;
- }
- }
- if (!unregistering)
+ if (!atomic_read(&dev_unreg_count))
break;
__rtnl_unlock();
@@ -851,9 +842,22 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
+void netdev_set_operstate(struct net_device *dev, int newstate)
+{
+ unsigned int old = READ_ONCE(dev->operstate);
+
+ do {
+ if (old == newstate)
+ return;
+ } while (!try_cmpxchg(&dev->operstate, &old, newstate));
+
+ netdev_state_change(dev);
+}
+EXPORT_SYMBOL(netdev_set_operstate);
+
static void set_operstate(struct net_device *dev, unsigned char transition)
{
- unsigned char operstate = dev->operstate;
+ unsigned char operstate = READ_ONCE(dev->operstate);
switch (transition) {
case IF_OPER_UP:
@@ -875,12 +879,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
break;
}
- if (dev->operstate != operstate) {
- write_lock(&dev_base_lock);
- dev->operstate = operstate;
- write_unlock(&dev_base_lock);
- netdev_state_change(dev);
- }
+ netdev_set_operstate(dev, operstate);
}
static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
@@ -1020,14 +1019,17 @@ static size_t rtnl_xdp_size(void)
static size_t rtnl_prop_list_size(const struct net_device *dev)
{
struct netdev_name_node *name_node;
- size_t size;
+ unsigned int cnt = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(name_node, &dev->name_node->list, list)
+ cnt++;
+ rcu_read_unlock();
- if (list_empty(&dev->name_node->list))
+ if (!cnt)
return 0;
- size = nla_total_size(0);
- list_for_each_entry(name_node, &dev->name_node->list, list)
- size += nla_total_size(ALTIFNAMSIZ);
- return size;
+
+ return nla_total_size(0) + cnt * nla_total_size(ALTIFNAMSIZ);
}
static size_t rtnl_proto_down_size(const struct net_device *dev)
@@ -1054,7 +1056,7 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev)
{
size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */
- size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev));
+ size += dpll_netdev_pin_handle_size(dev);
return size;
}
@@ -1453,17 +1455,18 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
return 0;
}
-static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
+static int rtnl_fill_link_ifmap(struct sk_buff *skb,
+ const struct net_device *dev)
{
struct rtnl_link_ifmap map;
memset(&map, 0, sizeof(map));
- map.mem_start = dev->mem_start;
- map.mem_end = dev->mem_end;
- map.base_addr = dev->base_addr;
- map.irq = dev->irq;
- map.dma = dev->dma;
- map.port = dev->if_port;
+ map.mem_start = READ_ONCE(dev->mem_start);
+ map.mem_end = READ_ONCE(dev->mem_end);
+ map.base_addr = READ_ONCE(dev->base_addr);
+ map.irq = READ_ONCE(dev->irq);
+ map.dma = READ_ONCE(dev->dma);
+ map.port = READ_ONCE(dev->if_port);
if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD))
return -EMSGSIZE;
@@ -1609,10 +1612,10 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev,
bool force)
{
- int ifindex = dev_get_iflink(dev);
+ int iflink = dev_get_iflink(dev);
- if (force || dev->ifindex != ifindex)
- return nla_put_u32(skb, IFLA_LINK, ifindex);
+ if (force || READ_ONCE(dev->ifindex) != iflink)
+ return nla_put_u32(skb, IFLA_LINK, iflink);
return 0;
}
@@ -1696,7 +1699,7 @@ static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
struct netdev_name_node *name_node;
int count = 0;
- list_for_each_entry(name_node, &dev->name_node->list, list) {
+ list_for_each_entry_rcu(name_node, &dev->name_node->list, list) {
if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name))
return -EMSGSIZE;
count++;
@@ -1704,6 +1707,7 @@ static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
return count;
}
+/* RCU protected. */
static int rtnl_fill_prop_list(struct sk_buff *skb,
const struct net_device *dev)
{
@@ -1789,7 +1793,7 @@ static int rtnl_fill_dpll_pin(struct sk_buff *skb,
if (!dpll_pin_nest)
return -EMSGSIZE;
- ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev));
+ ret = dpll_netdev_add_pin_handle(skb, dev);
if (ret < 0)
goto nest_cancel;
@@ -1873,9 +1877,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
}
- if (rtnl_fill_link_ifmap(skb, dev))
- goto nla_put_failure;
-
if (dev->addr_len) {
if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
nla_put(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast))
@@ -1925,10 +1926,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
rcu_read_lock();
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
goto nla_put_failure_rcu;
- rcu_read_unlock();
-
+ if (rtnl_fill_link_ifmap(skb, dev))
+ goto nla_put_failure_rcu;
if (rtnl_fill_prop_list(skb, dev))
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
+ rcu_read_unlock();
if (dev->dev.parent &&
nla_put_string(skb, IFLA_PARENT_DEV_NAME,
@@ -2197,25 +2199,22 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct rtnl_link_ops *kind_ops = NULL;
struct netlink_ext_ack *extack = cb->extack;
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- struct net *tgt_net = net;
- int h, s_h;
- int idx = 0, s_idx;
- struct net_device *dev;
- struct hlist_head *head;
+ unsigned int flags = NLM_F_MULTI;
struct nlattr *tb[IFLA_MAX+1];
+ struct {
+ unsigned long ifindex;
+ } *ctx = (void *)cb->ctx;
+ struct net *tgt_net = net;
u32 ext_filter_mask = 0;
- const struct rtnl_link_ops *kind_ops = NULL;
- unsigned int flags = NLM_F_MULTI;
+ struct net_device *dev;
int master_idx = 0;
int netnsid = -1;
int err, i;
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
err = rtnl_valid_dump_ifinfo_req(nlh, cb->strict_check, tb, extack);
if (err < 0) {
if (cb->strict_check)
@@ -2259,36 +2258,18 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
flags |= NLM_F_DUMP_FILTERED;
walk_entries:
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &tgt_net->dev_index_head[h];
- hlist_for_each_entry(dev, head, index_hlist) {
- if (link_dump_filtered(dev, master_idx, kind_ops))
- goto cont;
- if (idx < s_idx)
- goto cont;
- err = rtnl_fill_ifinfo(skb, dev, net,
- RTM_NEWLINK,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq, 0, flags,
- ext_filter_mask, 0, NULL, 0,
- netnsid, GFP_KERNEL);
-
- if (err < 0) {
- if (likely(skb->len))
- goto out;
-
- goto out_err;
- }
-cont:
- idx++;
- }
+ err = 0;
+ for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
+ if (link_dump_filtered(dev, master_idx, kind_ops))
+ continue;
+ err = rtnl_fill_ifinfo(skb, dev, net, RTM_NEWLINK,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq, 0, flags,
+ ext_filter_mask, 0, NULL, 0,
+ netnsid, GFP_KERNEL);
+ if (err < 0)
+ break;
}
-out:
- err = skb->len;
-out_err:
- cb->args[1] = idx;
- cb->args[0] = h;
cb->seq = tgt_net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
if (netnsid >= 0)
@@ -2980,11 +2961,9 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_LINKMODE]) {
unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);
- write_lock(&dev_base_lock);
if (dev->link_mode ^ value)
status |= DO_SETLINK_NOTIFY;
- dev->link_mode = value;
- write_unlock(&dev_base_lock);
+ WRITE_ONCE(dev->link_mode, value);
}
if (tb[IFLA_VFINFO_LIST]) {
@@ -5166,10 +5145,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct net_device *dev;
- struct nlattr *br_spec, *attr = NULL;
+ struct nlattr *br_spec, *attr, *br_flags_attr = NULL;
int rem, err = -EOPNOTSUPP;
u16 flags = 0;
- bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
return -EINVAL;
@@ -5187,11 +5165,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
- have_flags = true;
+ br_flags_attr = attr;
flags = nla_get_u16(attr);
}
@@ -5235,8 +5213,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
- if (have_flags)
- memcpy(nla_data(attr), &flags, sizeof(flags));
+ if (br_flags_attr)
+ memcpy(nla_data(br_flags_attr), &flags, sizeof(flags));
out:
return err;
}
@@ -6550,6 +6528,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
owner = link->owner;
dumpit = link->dumpit;
+ flags = link->flags;
if (type == RTM_GETLINK - RTM_BASE)
min_dump_alloc = rtnl_calcit(skb, nlh);
@@ -6567,6 +6546,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
.dump = dumpit,
.min_dump_alloc = min_dump_alloc,
.module = owner,
+ .flags = flags,
};
err = netlink_dump_start(rtnl, skb, nlh, &c);
/* netlink_dump_start() will keep a reference on
diff --git a/net/core/scm.c b/net/core/scm.c
index d0e0852a24d5..9cd4b0a01cd6 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -36,6 +36,7 @@
#include <net/compat.h>
#include <net/scm.h>
#include <net/cls_cgroup.h>
+#include <net/af_unix.h>
/*
@@ -85,6 +86,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
+ fpl->count_unix = 0;
fpl->max = SCM_MAX_FD;
fpl->user = NULL;
}
@@ -109,6 +111,9 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
fput(file);
return -EINVAL;
}
+ if (unix_get_socket(file))
+ fpl->count_unix++;
+
*fpp++ = file;
fpl->count++;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index edbbef563d4d..b99127712e67 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -69,6 +69,7 @@
#include <net/sock.h>
#include <net/checksum.h>
#include <net/gso.h>
+#include <net/hotdata.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/mpls.h>
@@ -88,15 +89,10 @@
#include "dev.h"
#include "sock_destructor.h"
-struct kmem_cache *skbuff_cache __ro_after_init;
-static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
#ifdef CONFIG_SKB_EXTENSIONS
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif
-
-static struct kmem_cache *skb_small_head_cache __ro_after_init;
-
#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
@@ -115,6 +111,24 @@ static struct kmem_cache *skb_small_head_cache __ro_after_init;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
+/* kcm_write_msgs() relies on casting paged frags to bio_vec to use
+ * iov_iter_bvec(). These static asserts ensure the cast is valid is long as the
+ * netmem is a page.
+ */
+static_assert(offsetof(struct bio_vec, bv_page) ==
+ offsetof(skb_frag_t, netmem));
+static_assert(sizeof_field(struct bio_vec, bv_page) ==
+ sizeof_field(skb_frag_t, netmem));
+
+static_assert(offsetof(struct bio_vec, bv_len) == offsetof(skb_frag_t, len));
+static_assert(sizeof_field(struct bio_vec, bv_len) ==
+ sizeof_field(skb_frag_t, len));
+
+static_assert(offsetof(struct bio_vec, bv_offset) ==
+ offsetof(skb_frag_t, offset));
+static_assert(sizeof_field(struct bio_vec, bv_offset) ==
+ sizeof_field(skb_frag_t, offset));
+
#undef FN
#define FN(reason) [SKB_DROP_REASON_##reason] = #reason,
static const char * const drop_reasons[] = {
@@ -297,7 +311,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
fragsz = SKB_DATA_ALIGN(fragsz);
- return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
+ return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
+ align_mask);
}
EXPORT_SYMBOL(__napi_alloc_frag_align);
@@ -309,13 +324,15 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
if (in_hardirq() || irqs_disabled()) {
struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
- data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
+ data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC,
+ align_mask);
} else {
struct napi_alloc_cache *nc;
local_bh_disable();
nc = this_cpu_ptr(&napi_alloc_cache);
- data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
+ data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
+ align_mask);
local_bh_enable();
}
return data;
@@ -328,7 +345,7 @@ static struct sk_buff *napi_skb_cache_get(void)
struct sk_buff *skb;
if (unlikely(!nc->skb_count)) {
- nc->skb_count = kmem_cache_alloc_bulk(skbuff_cache,
+ nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
GFP_ATOMIC,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
@@ -337,7 +354,7 @@ static struct sk_buff *napi_skb_cache_get(void)
}
skb = nc->skb_cache[--nc->skb_count];
- kasan_mempool_unpoison_object(skb, kmem_cache_size(skbuff_cache));
+ kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache));
return skb;
}
@@ -395,7 +412,7 @@ struct sk_buff *slab_build_skb(void *data)
struct sk_buff *skb;
unsigned int size;
- skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -446,7 +463,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -557,7 +574,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
obj_size = SKB_HEAD_ALIGN(*size);
if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
!(flags & KMALLOC_NOT_NORMAL_BITS)) {
- obj = kmem_cache_alloc_node(skb_small_head_cache,
+ obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache,
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
node);
*size = SKB_SMALL_HEAD_CACHE_SIZE;
@@ -565,7 +582,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
goto out;
/* Try again but now we are using pfmemalloc reserves */
ret_pfmemalloc = true;
- obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
+ obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node);
goto out;
}
@@ -628,7 +645,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
u8 *data;
cache = (flags & SKB_ALLOC_FCLONE)
- ? skbuff_fclone_cache : skbuff_cache;
+ ? net_hotdata.skbuff_fclone_cache : net_hotdata.skbuff_cache;
if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
gfp_mask |= __GFP_MEMALLOC;
@@ -845,17 +862,17 @@ skb_fail:
}
EXPORT_SYMBOL(__napi_alloc_skb);
-void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
- int size, unsigned int truesize)
+void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
+ int off, int size, unsigned int truesize)
{
DEBUG_NET_WARN_ON_ONCE(size > truesize);
- skb_fill_page_desc(skb, i, page, off, size);
+ skb_fill_netmem_desc(skb, i, netmem, off, size);
skb->len += size;
skb->data_len += size;
skb->truesize += truesize;
}
-EXPORT_SYMBOL(skb_add_rx_frag);
+EXPORT_SYMBOL(skb_add_rx_frag_netmem);
void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
unsigned int truesize)
@@ -895,6 +912,98 @@ static bool is_pp_page(struct page *page)
return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
}
+int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
+ unsigned int headroom)
+{
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ u32 size, truesize, len, max_head_size, off;
+ struct sk_buff *skb = *pskb, *nskb;
+ int err, i, head_off;
+ void *data;
+
+ /* XDP does not support fraglist so we need to linearize
+ * the skb.
+ */
+ if (skb_has_frag_list(skb))
+ return -EOPNOTSUPP;
+
+ max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - headroom);
+ if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE)
+ return -ENOMEM;
+
+ size = min_t(u32, skb->len, max_head_size);
+ truesize = SKB_HEAD_ALIGN(size) + headroom;
+ data = page_pool_dev_alloc_va(pool, &truesize);
+ if (!data)
+ return -ENOMEM;
+
+ nskb = napi_build_skb(data, truesize);
+ if (!nskb) {
+ page_pool_free_va(pool, data, true);
+ return -ENOMEM;
+ }
+
+ skb_reserve(nskb, headroom);
+ skb_copy_header(nskb, skb);
+ skb_mark_for_recycle(nskb);
+
+ err = skb_copy_bits(skb, 0, nskb->data, size);
+ if (err) {
+ consume_skb(nskb);
+ return err;
+ }
+ skb_put(nskb, size);
+
+ head_off = skb_headroom(nskb) - skb_headroom(skb);
+ skb_headers_offset_update(nskb, head_off);
+
+ off = size;
+ len = skb->len - off;
+ for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
+ struct page *page;
+ u32 page_off;
+
+ size = min_t(u32, len, PAGE_SIZE);
+ truesize = size;
+
+ page = page_pool_dev_alloc(pool, &page_off, &truesize);
+ if (!page) {
+ consume_skb(nskb);
+ return -ENOMEM;
+ }
+
+ skb_add_rx_frag(nskb, i, page, page_off, size, truesize);
+ err = skb_copy_bits(skb, off, page_address(page) + page_off,
+ size);
+ if (err) {
+ consume_skb(nskb);
+ return err;
+ }
+
+ len -= size;
+ off += size;
+ }
+
+ consume_skb(skb);
+ *pskb = nskb;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+EXPORT_SYMBOL(skb_pp_cow_data);
+
+int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
+ struct bpf_prog *prog)
+{
+ if (!prog->aux->xdp_has_frags)
+ return -EINVAL;
+
+ return skb_pp_cow_data(pool, pskb, XDP_PACKET_HEADROOM);
+}
+EXPORT_SYMBOL(skb_cow_data_for_xdp);
+
#if IS_ENABLED(CONFIG_PAGE_POOL)
bool napi_pp_put_page(struct page *page, bool napi_safe)
{
@@ -923,9 +1032,10 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
*/
if (napi_safe || in_softirq()) {
const struct napi_struct *napi = READ_ONCE(pp->p.napi);
+ unsigned int cpuid = smp_processor_id();
- allow_direct = napi &&
- READ_ONCE(napi->list_owner) == smp_processor_id();
+ allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
+ allow_direct |= READ_ONCE(pp->cpuid) == cpuid;
}
/* Driver set this to memory recycling info. Reset it on recycle.
@@ -981,7 +1091,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb)
static void skb_kfree_head(void *head, unsigned int end_offset)
{
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
- kmem_cache_free(skb_small_head_cache, head);
+ kmem_cache_free(net_hotdata.skb_small_head_cache, head);
else
kfree(head);
}
@@ -1005,9 +1115,7 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
struct skb_shared_info *shinfo = skb_shinfo(skb);
int i;
- if (skb->cloned &&
- atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
- &shinfo->dataref))
+ if (!skb_data_unref(skb, shinfo))
goto exit;
if (skb_zcopy(skb)) {
@@ -1048,7 +1156,7 @@ static void kfree_skbmem(struct sk_buff *skb)
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
- kmem_cache_free(skbuff_cache, skb);
+ kmem_cache_free(net_hotdata.skbuff_cache, skb);
return;
case SKB_FCLONE_ORIG:
@@ -1069,7 +1177,7 @@ static void kfree_skbmem(struct sk_buff *skb)
if (!refcount_dec_and_test(&fclones->fclone_ref))
return;
fastpath:
- kmem_cache_free(skbuff_fclone_cache, fclones);
+ kmem_cache_free(net_hotdata.skbuff_fclone_cache, fclones);
}
void skb_release_head_state(struct sk_buff *skb)
@@ -1166,7 +1274,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb,
sa->skb_array[sa->skb_count++] = skb;
if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
- kmem_cache_free_bulk(skbuff_cache, KFREE_SKB_BULK_SIZE,
+ kmem_cache_free_bulk(net_hotdata.skbuff_cache, KFREE_SKB_BULK_SIZE,
sa->skb_array);
sa->skb_count = 0;
}
@@ -1191,7 +1299,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
}
if (sa.skb_count)
- kmem_cache_free_bulk(skbuff_cache, sa.skb_count, sa.skb_array);
+ kmem_cache_free_bulk(net_hotdata.skbuff_cache, sa.skb_count, sa.skb_array);
}
EXPORT_SYMBOL(kfree_skb_list_reason);
@@ -1353,9 +1461,9 @@ static void napi_skb_cache_put(struct sk_buff *skb)
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
kasan_mempool_unpoison_object(nc->skb_cache[i],
- kmem_cache_size(skbuff_cache));
+ kmem_cache_size(net_hotdata.skbuff_cache));
- kmem_cache_free_bulk(skbuff_cache, NAPI_SKB_CACHE_HALF,
+ kmem_cache_free_bulk(net_hotdata.skbuff_cache, NAPI_SKB_CACHE_HALF,
nc->skb_cache + NAPI_SKB_CACHE_HALF);
nc->skb_count = NAPI_SKB_CACHE_HALF;
}
@@ -1906,10 +2014,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
/* skb frags point to kernel buffers */
for (i = 0; i < new_frags - 1; i++) {
- __skb_fill_page_desc(skb, i, head, 0, psize);
+ __skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize);
head = (struct page *)page_private(head);
}
- __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
+ __skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0,
+ d_off);
skb_shinfo(skb)->nr_frags = new_frags;
release:
@@ -1951,7 +2060,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- n = kmem_cache_alloc(skbuff_cache, gfp_mask);
+ n = kmem_cache_alloc(net_hotdata.skbuff_cache, gfp_mask);
if (!n)
return NULL;
@@ -3647,7 +3756,8 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
if (plen) {
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
- __skb_fill_page_desc(to, 0, page, offset, plen);
+ __skb_fill_netmem_desc(to, 0, page_to_netmem(page),
+ offset, plen);
get_page(page);
j = 1;
len -= plen;
@@ -4889,7 +4999,7 @@ static void skb_extensions_init(void) {}
void __init skb_init(void)
{
- skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
+ net_hotdata.skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
sizeof(struct sk_buff),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|
@@ -4897,7 +5007,7 @@ void __init skb_init(void)
offsetof(struct sk_buff, cb),
sizeof_field(struct sk_buff, cb),
NULL);
- skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+ net_hotdata.skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
sizeof(struct sk_buff_fclones),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
@@ -4906,7 +5016,7 @@ void __init skb_init(void)
* struct skb_shared_info is located at the end of skb->head,
* and should not be copied to/from user.
*/
- skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head",
+ net_hotdata.skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head",
SKB_SMALL_HEAD_CACHE_SIZE,
0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC,
@@ -5779,7 +5889,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
{
if (head_stolen) {
skb_release_head_state(skb);
- kmem_cache_free(skbuff_cache, skb);
+ kmem_cache_free(net_hotdata.skbuff_cache, skb);
} else {
__kfree_skb(skb);
}
@@ -6737,6 +6847,14 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
xfrm_state_hold(sp->xvec[i]);
}
#endif
+#ifdef CONFIG_MCTP_FLOWS
+ if (old_active & (1 << SKB_EXT_MCTP)) {
+ struct mctp_flow *flow = skb_ext_get_ptr(old, SKB_EXT_MCTP);
+
+ if (flow->key)
+ refcount_inc(&flow->key->refs);
+ }
+#endif
__skb_ext_put(old);
return new;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 93ecfceac1bc..4d75ef9d24bf 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
- if (psock)
- psock->saved_data_ready(sk);
+ if (psock) {
+ read_lock_bh(&sk->sk_callback_lock);
+ sk_psock_data_ready(sk, psock);
+ read_unlock_bh(&sk->sk_callback_lock);
+ }
rcu_read_unlock();
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 158dbdebce6a..43bf3818c19e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -107,6 +107,7 @@
#include <linux/interrupt.h>
#include <linux/poll.h>
#include <linux/tcp.h>
+#include <linux/udp.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/user_namespace.h>
@@ -282,6 +283,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
+int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
int sysctl_tstamp_allow_data __read_mostly = 1;
@@ -1187,6 +1189,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
*/
WRITE_ONCE(sk->sk_txrehash, (u8)val);
return 0;
+ case SO_PEEK_OFF:
+ {
+ int (*set_peek_off)(struct sock *sk, int val);
+
+ set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
+ if (set_peek_off)
+ ret = set_peek_off(sk, val);
+ else
+ ret = -EOPNOTSUPP;
+ return ret;
+ }
}
sockopt_lock_sock(sk);
@@ -1429,18 +1442,6 @@ set_sndbuf:
sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
break;
- case SO_PEEK_OFF:
- {
- int (*set_peek_off)(struct sock *sk, int val);
-
- set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
- if (set_peek_off)
- ret = set_peek_off(sk, val);
- else
- ret = -EOPNOTSUPP;
- break;
- }
-
case SO_NOFCS:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
break;
@@ -2052,8 +2053,9 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
- memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
- prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
+ unsafe_memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
+ prot->obj_size - offsetof(struct sock, sk_dontcopy_end),
+ /* alloc is larger than struct, see sk_prot_alloc() */);
#ifdef CONFIG_SECURITY_NETWORK
nsk->sk_security = sptr;
@@ -2582,8 +2584,18 @@ EXPORT_SYMBOL(sock_efree);
#ifdef CONFIG_INET
void sock_pfree(struct sk_buff *skb)
{
- if (sk_is_refcounted(skb->sk))
- sock_gen_put(skb->sk);
+ struct sock *sk = skb->sk;
+
+ if (!sk_is_refcounted(sk))
+ return;
+
+ if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
+ inet_reqsk(sk)->rsk_listener = NULL;
+ reqsk_free(inet_reqsk(sk));
+ return;
+ }
+
+ sock_gen_put(sk);
}
EXPORT_SYMBOL(sock_pfree);
#endif /* CONFIG_INET */
@@ -4144,8 +4156,14 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
{
struct sock *sk = p;
- return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
- sk_busy_loop_timeout(sk, start_time);
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ return true;
+
+ if (sk_is_udp(sk) &&
+ !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
+ return true;
+
+ return sk_busy_loop_timeout(sk, start_time);
}
EXPORT_SYMBOL(sk_busy_loop_end);
#endif /* CONFIG_NET_RX_BUSY_POLL */
@@ -4217,3 +4235,65 @@ int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
return sock_ioctl_out(sk, cmd, arg);
}
EXPORT_SYMBOL(sk_ioctl);
+
+static int __init sock_struct_check(void)
+{
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_drops);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_peek_off);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_error_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_receive_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_backlog);
+
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_ifindex);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_cookie);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvbuf);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_filter);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_wq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_data_ready);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvtimeo);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvlowat);
+
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
+
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_forward_alloc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_tsflags);
+
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_send_head);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_pending);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_dst_pending_confirm);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_status);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_frag);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_timer);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_rate);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_zckey);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tskey);
+
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_max_pacing_rate);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag);
+ return 0;
+}
+
+core_initcall(sock_struct_check);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index b1e29e18d1d6..654122838025 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -16,9 +16,10 @@
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
-static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
-static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
-static DEFINE_MUTEX(sock_diag_table_mutex);
+static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX];
+
+static struct sock_diag_inet_compat __rcu *inet_rcv_compat;
+
static struct workqueue_struct *broadcast_wq;
DEFINE_COOKIE(sock_cookie);
@@ -122,6 +123,24 @@ static size_t sock_diag_nlmsg_size(void)
+ nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
}
+static const struct sock_diag_handler *sock_diag_lock_handler(int family)
+{
+ const struct sock_diag_handler *handler;
+
+ rcu_read_lock();
+ handler = rcu_dereference(sock_diag_handlers[family]);
+ if (handler && !try_module_get(handler->owner))
+ handler = NULL;
+ rcu_read_unlock();
+
+ return handler;
+}
+
+static void sock_diag_unlock_handler(const struct sock_diag_handler *handler)
+{
+ module_put(handler->owner);
+}
+
static void sock_diag_broadcast_destroy_work(struct work_struct *work)
{
struct broadcast_sk *bsk =
@@ -138,12 +157,12 @@ static void sock_diag_broadcast_destroy_work(struct work_struct *work)
if (!skb)
goto out;
- mutex_lock(&sock_diag_table_mutex);
- hndl = sock_diag_handlers[sk->sk_family];
- if (hndl && hndl->get_info)
- err = hndl->get_info(skb, sk);
- mutex_unlock(&sock_diag_table_mutex);
-
+ hndl = sock_diag_lock_handler(sk->sk_family);
+ if (hndl) {
+ if (hndl->get_info)
+ err = hndl->get_info(skb, sk);
+ sock_diag_unlock_handler(hndl);
+ }
if (!err)
nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
GFP_KERNEL);
@@ -166,51 +185,45 @@ void sock_diag_broadcast_destroy(struct sock *sk)
queue_work(broadcast_wq, &bsk->work);
}
-void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
+void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr)
{
- mutex_lock(&sock_diag_table_mutex);
- inet_rcv_compat = fn;
- mutex_unlock(&sock_diag_table_mutex);
+ xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
+ ptr);
}
EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat);
-void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
+void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr)
{
- mutex_lock(&sock_diag_table_mutex);
- inet_rcv_compat = NULL;
- mutex_unlock(&sock_diag_table_mutex);
+ const struct sock_diag_inet_compat *old;
+
+ old = xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
+ NULL);
+ WARN_ON_ONCE(old != ptr);
}
EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat);
int sock_diag_register(const struct sock_diag_handler *hndl)
{
- int err = 0;
+ int family = hndl->family;
- if (hndl->family >= AF_MAX)
+ if (family >= AF_MAX)
return -EINVAL;
- mutex_lock(&sock_diag_table_mutex);
- if (sock_diag_handlers[hndl->family])
- err = -EBUSY;
- else
- sock_diag_handlers[hndl->family] = hndl;
- mutex_unlock(&sock_diag_table_mutex);
-
- return err;
+ return !cmpxchg((const struct sock_diag_handler **)
+ &sock_diag_handlers[family],
+ NULL, hndl) ? 0 : -EBUSY;
}
EXPORT_SYMBOL_GPL(sock_diag_register);
-void sock_diag_unregister(const struct sock_diag_handler *hnld)
+void sock_diag_unregister(const struct sock_diag_handler *hndl)
{
- int family = hnld->family;
+ int family = hndl->family;
if (family >= AF_MAX)
return;
- mutex_lock(&sock_diag_table_mutex);
- BUG_ON(sock_diag_handlers[family] != hnld);
- sock_diag_handlers[family] = NULL;
- mutex_unlock(&sock_diag_table_mutex);
+ xchg((const struct sock_diag_handler **)&sock_diag_handlers[family],
+ NULL);
}
EXPORT_SYMBOL_GPL(sock_diag_unregister);
@@ -227,20 +240,20 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX);
- if (sock_diag_handlers[req->sdiag_family] == NULL)
+ if (!rcu_access_pointer(sock_diag_handlers[req->sdiag_family]))
sock_load_diag_module(req->sdiag_family, 0);
- mutex_lock(&sock_diag_table_mutex);
- hndl = sock_diag_handlers[req->sdiag_family];
+ hndl = sock_diag_lock_handler(req->sdiag_family);
if (hndl == NULL)
- err = -ENOENT;
- else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
+ return -ENOENT;
+
+ if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
err = hndl->dump(skb, nlh);
else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
err = hndl->destroy(skb, nlh);
else
err = -EOPNOTSUPP;
- mutex_unlock(&sock_diag_table_mutex);
+ sock_diag_unlock_handler(hndl);
return err;
}
@@ -248,20 +261,27 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ const struct sock_diag_inet_compat *ptr;
int ret;
switch (nlh->nlmsg_type) {
case TCPDIAG_GETSOCK:
case DCCPDIAG_GETSOCK:
- if (inet_rcv_compat == NULL)
+
+ if (!rcu_access_pointer(inet_rcv_compat))
sock_load_diag_module(AF_INET, 0);
- mutex_lock(&sock_diag_table_mutex);
- if (inet_rcv_compat != NULL)
- ret = inet_rcv_compat(skb, nlh);
- else
- ret = -EOPNOTSUPP;
- mutex_unlock(&sock_diag_table_mutex);
+ rcu_read_lock();
+ ptr = rcu_dereference(inet_rcv_compat);
+ if (ptr && !try_module_get(ptr->owner))
+ ptr = NULL;
+ rcu_read_unlock();
+
+ ret = -EOPNOTSUPP;
+ if (ptr) {
+ ret = ptr->fn(skb, nlh);
+ module_put(ptr->owner);
+ }
return ret;
case SOCK_DIAG_BY_FAMILY:
@@ -272,13 +292,9 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
-static DEFINE_MUTEX(sock_diag_mutex);
-
static void sock_diag_rcv(struct sk_buff *skb)
{
- mutex_lock(&sock_diag_mutex);
netlink_rcv_skb(skb, &sock_diag_rcv_msg);
- mutex_unlock(&sock_diag_mutex);
}
static int sock_diag_bind(struct net *net, int group)
@@ -286,12 +302,12 @@ static int sock_diag_bind(struct net *net, int group)
switch (group) {
case SKNLGRP_INET_TCP_DESTROY:
case SKNLGRP_INET_UDP_DESTROY:
- if (!sock_diag_handlers[AF_INET])
+ if (!rcu_access_pointer(sock_diag_handlers[AF_INET]))
sock_load_diag_module(AF_INET, 0);
break;
case SKNLGRP_INET6_TCP_DESTROY:
case SKNLGRP_INET6_UDP_DESTROY:
- if (!sock_diag_handlers[AF_INET6])
+ if (!rcu_access_pointer(sock_diag_handlers[AF_INET6]))
sock_load_diag_module(AF_INET6, 0);
break;
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 0f0cb1465e08..6973dda3abda 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -23,6 +23,8 @@
#include <net/net_ratelimit.h>
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
+#include <net/hotdata.h>
+#include <net/rps.h>
#include "dev.h"
@@ -30,6 +32,7 @@ static int int_3600 = 3600;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
static int max_skb_frags = MAX_SKB_FRAGS;
+static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -137,7 +140,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
mutex_lock(&sock_flow_mutex);
- orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
+ orig_sock_table = rcu_dereference_protected(
+ net_hotdata.rps_sock_flow_table,
lockdep_is_held(&sock_flow_mutex));
size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
@@ -158,7 +162,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
mutex_unlock(&sock_flow_mutex);
return -ENOMEM;
}
- rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
+ net_hotdata.rps_cpu_mask =
+ roundup_pow_of_two(nr_cpu_ids) - 1;
sock_table->mask = size - 1;
} else
sock_table = orig_sock_table;
@@ -169,7 +174,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
sock_table = NULL;
if (sock_table != orig_sock_table) {
- rcu_assign_pointer(rps_sock_flow_table, sock_table);
+ rcu_assign_pointer(net_hotdata.rps_sock_flow_table,
+ sock_table);
if (sock_table) {
static_branch_inc(&rps_needed);
static_branch_inc(&rfs_needed);
@@ -299,8 +305,8 @@ static int proc_do_dev_weight(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write) {
weight = READ_ONCE(weight_p);
- WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
- WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
+ WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias);
+ WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias);
}
mutex_unlock(&dev_weight_mutex);
@@ -408,6 +414,14 @@ static struct ctl_table net_core_table[] = {
.extra1 = &min_rcvbuf,
},
{
+ .procname = "mem_pcpu_rsv",
+ .data = &sysctl_mem_pcpu_rsv,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_mem_pcpu_rsv,
+ },
+ {
.procname = "dev_weight",
.data = &weight_p,
.maxlen = sizeof(int),
@@ -430,7 +444,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "netdev_max_backlog",
- .data = &netdev_max_backlog,
+ .data = &net_hotdata.max_backlog,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
@@ -489,7 +503,7 @@ static struct ctl_table net_core_table[] = {
#endif
{
.procname = "netdev_tstamp_prequeue",
- .data = &netdev_tstamp_prequeue,
+ .data = &net_hotdata.tstamp_prequeue,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
@@ -567,7 +581,7 @@ static struct ctl_table net_core_table[] = {
#endif
{
.procname = "netdev_budget",
- .data = &netdev_budget,
+ .data = &net_hotdata.netdev_budget,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
@@ -590,7 +604,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "netdev_budget_usecs",
- .data = &netdev_budget_usecs,
+ .data = &net_hotdata.netdev_budget_usecs,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -623,7 +637,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "gro_normal_batch",
- .data = &gro_normal_batch,
+ .data = &net_hotdata.gro_normal_batch,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 4869c1c2d8f3..41693154e426 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -16,6 +16,7 @@
#include <linux/bug.h>
#include <net/page_pool/helpers.h>
+#include <net/hotdata.h>
#include <net/xdp.h>
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
#include <trace/events/xdp.h>
@@ -75,7 +76,7 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
xa = container_of(rcu, struct xdp_mem_allocator, rcu);
/* Allow this ID to be reused */
- ida_simple_remove(&mem_id_pool, xa->mem.id);
+ ida_free(&mem_id_pool, xa->mem.id);
kfree(xa);
}
@@ -242,7 +243,7 @@ static int __mem_id_cyclic_get(gfp_t gfp)
int id;
again:
- id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
+ id = ida_alloc_range(&mem_id_pool, mem_id_next, MEM_ID_MAX - 1, gfp);
if (id < 0) {
if (id == -ENOSPC) {
/* Cyclic allocator, reset next id */
@@ -317,7 +318,7 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
/* Insert allocator into ID lookup table */
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
if (IS_ERR(ptr)) {
- ida_simple_remove(&mem_id_pool, mem->id);
+ ida_free(&mem_id_pool, mem->id);
mem->id = 0;
errno = PTR_ERR(ptr);
goto err;
@@ -589,7 +590,7 @@ EXPORT_SYMBOL_GPL(xdp_warn);
int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
{
- n_skb = kmem_cache_alloc_bulk(skbuff_cache, gfp, n_skb, skbs);
+ n_skb = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, n_skb, skbs);
if (unlikely(!n_skb))
return -ENOMEM;
@@ -658,7 +659,7 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -771,11 +772,11 @@ __bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
__bpf_kfunc_end_defs();
-BTF_SET8_START(xdp_metadata_kfunc_ids)
+BTF_KFUNCS_START(xdp_metadata_kfunc_ids)
#define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
-BTF_SET8_END(xdp_metadata_kfunc_ids)
+BTF_KFUNCS_END(xdp_metadata_kfunc_ids)
static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index c4bbac99740d..1cba001bb4c8 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -376,15 +376,11 @@ EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
int __init dccp_ackvec_init(void)
{
- dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
- sizeof(struct dccp_ackvec), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ dccp_ackvec_slab = KMEM_CACHE(dccp_ackvec, SLAB_HWCACHE_ALIGN);
if (dccp_ackvec_slab == NULL)
goto out_err;
- dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
- sizeof(struct dccp_ackvec_record),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ dccp_ackvec_record_slab = KMEM_CACHE(dccp_ackvec_record, SLAB_HWCACHE_ALIGN);
if (dccp_ackvec_record_slab == NULL)
goto out_destroy_slab;
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 8a82c5a2c5a8..f5019d95c3ae 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -58,6 +58,7 @@ static int dccp_diag_dump_one(struct netlink_callback *cb,
}
static const struct inet_diag_handler dccp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = dccp_diag_dump,
.dump_one = dccp_diag_dump_one,
.idiag_get_info = dccp_diag_get_info,
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 4275a2bc6d8e..7f0b093208d7 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -46,7 +46,7 @@ struct devlink_rel {
u32 obj_index;
devlink_rel_notify_cb_t *notify_cb;
devlink_rel_cleanup_cb_t *cleanup_cb;
- struct work_struct notify_work;
+ struct delayed_work notify_work;
} nested_in;
};
@@ -70,7 +70,7 @@ static void __devlink_rel_put(struct devlink_rel *rel)
static void devlink_rel_nested_in_notify_work(struct work_struct *work)
{
struct devlink_rel *rel = container_of(work, struct devlink_rel,
- nested_in.notify_work);
+ nested_in.notify_work.work);
struct devlink *devlink;
devlink = devlinks_xa_get(rel->nested_in.devlink_index);
@@ -96,13 +96,13 @@ rel_put:
return;
reschedule_work:
- schedule_work(&rel->nested_in.notify_work);
+ schedule_delayed_work(&rel->nested_in.notify_work, 1);
}
static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel)
{
__devlink_rel_get(rel);
- schedule_work(&rel->nested_in.notify_work);
+ schedule_delayed_work(&rel->nested_in.notify_work, 0);
}
static struct devlink_rel *devlink_rel_alloc(void)
@@ -123,8 +123,8 @@ static struct devlink_rel *devlink_rel_alloc(void)
}
refcount_set(&rel->refcount, 1);
- INIT_WORK(&rel->nested_in.notify_work,
- &devlink_rel_nested_in_notify_work);
+ INIT_DELAYED_WORK(&rel->nested_in.notify_work,
+ &devlink_rel_nested_in_notify_work);
return rel;
}
@@ -529,14 +529,20 @@ static int __init devlink_init(void)
{
int err;
- err = genl_register_family(&devlink_nl_family);
- if (err)
- goto out;
err = register_pernet_subsys(&devlink_pernet_ops);
if (err)
goto out;
+ err = genl_register_family(&devlink_nl_family);
+ if (err)
+ goto out_unreg_pernet_subsys;
err = register_netdevice_notifier(&devlink_port_netdevice_nb);
+ if (!err)
+ return 0;
+
+ genl_unregister_family(&devlink_nl_family);
+out_unreg_pernet_subsys:
+ unregister_pernet_subsys(&devlink_pernet_ops);
out:
WARN_ON(err);
return err;
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index c81cf2dd154f..f9786d51f68f 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -198,7 +198,7 @@ static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITC
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 1),
- [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U16, 3),
+ [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U8, 3),
[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1),
};
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 62e54e152ecf..4b2d46ccfe48 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -583,7 +583,7 @@ devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
err = devlink_nl_port_fill(msg, devlink_port,
- DEVLINK_CMD_NEW,
+ DEVLINK_CMD_PORT_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, flags,
cb->extack);
@@ -674,7 +674,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
return -EOPNOTSUPP;
}
if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
- NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_STATE],
"Function does not support state setting");
return -EOPNOTSUPP;
}
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index ac7be864e80d..09d2f5d4b3dd 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -15,7 +15,6 @@
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/of.h>
-#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <net/dsa_stubs.h>
#include <net/sch_generic.h>
@@ -626,7 +625,6 @@ static void dsa_switch_teardown_tag_protocol(struct dsa_switch *ds)
static int dsa_switch_setup(struct dsa_switch *ds)
{
- struct device_node *dn;
int err;
if (ds->setup)
@@ -666,10 +664,7 @@ static int dsa_switch_setup(struct dsa_switch *ds)
dsa_user_mii_bus_init(ds);
- dn = of_get_child_by_name(ds->dev->of_node, "mdio");
-
- err = of_mdiobus_register(ds->user_mii_bus, dn);
- of_node_put(dn);
+ err = mdiobus_register(ds->user_mii_bus);
if (err < 0)
goto free_user_mii_bus;
}
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 2717e9d7b612..1aba1d05c27a 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -75,7 +75,7 @@ sja1105_tagger_private(struct dsa_switch *ds)
}
/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
-static inline bool sja1105_is_link_local(const struct sk_buff *skb)
+static bool sja1105_is_link_local(const struct sk_buff *skb)
{
const struct ethhdr *hdr = eth_hdr(skb);
u64 dmac = ether_addr_to_u64(hdr->h_dest);
@@ -121,7 +121,7 @@ static void sja1105_meta_unpack(const struct sk_buff *skb,
packing(buf + 7, &meta->switch_id, 7, 0, 1, UNPACK, 0);
}
-static inline bool sja1105_is_meta_frame(const struct sk_buff *skb)
+static bool sja1105_is_meta_frame(const struct sk_buff *skb)
{
const struct ethhdr *hdr = eth_hdr(skb);
u64 smac = ether_addr_to_u64(hdr->h_source);
diff --git a/net/dsa/user.c b/net/dsa/user.c
index b15e71cc342c..16d395bb1a1f 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -210,7 +210,7 @@ static int dsa_user_sync_uc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
static int dsa_user_unsync_uc(struct net_device *dev,
@@ -230,7 +230,7 @@ static int dsa_user_unsync_uc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
static int dsa_user_sync_mc(struct net_device *dev,
@@ -250,7 +250,7 @@ static int dsa_user_sync_mc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
static int dsa_user_unsync_mc(struct net_device *dev,
@@ -270,7 +270,7 @@ static int dsa_user_unsync_mc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
void dsa_user_sync_ha(struct net_device *dev)
@@ -352,7 +352,7 @@ void dsa_user_mii_bus_init(struct dsa_switch *ds)
/* user device handling ****************************************************/
static int dsa_user_get_iflink(const struct net_device *dev)
{
- return dsa_user_to_conduit(dev)->ifindex;
+ return READ_ONCE(dsa_user_to_conduit(dev)->ifindex);
}
static int dsa_user_open(struct net_device *dev)
@@ -875,8 +875,8 @@ static int dsa_user_port_obj_del(struct net_device *dev, const void *ctx,
return err;
}
-static inline netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev,
- struct sk_buff *skb)
+static netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev,
+ struct sk_buff *skb)
{
#ifdef CONFIG_NET_POLL_CONTROLLER
struct dsa_user_priv *p = netdev_priv(dev);
@@ -1222,7 +1222,7 @@ static int dsa_user_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
return ret;
}
-static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e)
+static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
@@ -1242,7 +1242,7 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e)
return phylink_ethtool_set_eee(dp->pl, e);
}
-static int dsa_user_get_eee(struct net_device *dev, struct ethtool_eee *e)
+static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
@@ -2429,7 +2429,7 @@ static const struct net_device_ops dsa_user_netdev_ops = {
.ndo_fill_forward_path = dsa_user_fill_forward_path,
};
-static struct device_type dsa_type = {
+static const struct device_type dsa_type = {
.name = "dsa",
};
@@ -2625,11 +2625,7 @@ int dsa_user_create(struct dsa_port *port)
user_dev->vlan_features = conduit->vlan_features;
p = netdev_priv(user_dev);
- user_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!user_dev->tstats) {
- free_netdev(user_dev);
- return -ENOMEM;
- }
+ user_dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
ret = gro_cells_init(&p->gcells, user_dev);
if (ret)
@@ -2695,7 +2691,6 @@ out_phy:
out_gcells:
gro_cells_destroy(&p->gcells);
out_free:
- free_percpu(user_dev->tstats);
free_netdev(user_dev);
port->user = NULL;
return ret;
@@ -2716,7 +2711,6 @@ void dsa_user_destroy(struct net_device *user_dev)
dsa_port_phylink_destroy(dp);
gro_cells_destroy(&p->gcells);
- free_percpu(user_dev->tstats);
free_netdev(user_dev);
}
diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c
index 2853394d06a8..bf398973eb8a 100644
--- a/net/ethtool/eee.c
+++ b/net/ethtool/eee.c
@@ -4,16 +4,13 @@
#include "common.h"
#include "bitset.h"
-#define EEE_MODES_COUNT \
- (sizeof_field(struct ethtool_eee, supported) * BITS_PER_BYTE)
-
struct eee_req_info {
struct ethnl_req_info base;
};
struct eee_reply_data {
struct ethnl_reply_data base;
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
};
#define EEE_REPDATA(__reply_base) \
@@ -30,6 +27,7 @@ static int eee_prepare_data(const struct ethnl_req_info *req_base,
{
struct eee_reply_data *data = EEE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct ethtool_keee *eee = &data->eee;
int ret;
if (!dev->ethtool_ops->get_eee)
@@ -37,7 +35,7 @@ static int eee_prepare_data(const struct ethnl_req_info *req_base,
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
- ret = dev->ethtool_ops->get_eee(dev, &data->eee);
+ ret = dev->ethtool_ops->get_eee(dev, eee);
ethnl_ops_complete(dev);
return ret;
@@ -48,24 +46,21 @@ static int eee_reply_size(const struct ethnl_req_info *req_base,
{
bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
const struct eee_reply_data *data = EEE_REPDATA(reply_base);
- const struct ethtool_eee *eee = &data->eee;
+ const struct ethtool_keee *eee = &data->eee;
int len = 0;
int ret;
- BUILD_BUG_ON(sizeof(eee->advertised) * BITS_PER_BYTE !=
- EEE_MODES_COUNT);
- BUILD_BUG_ON(sizeof(eee->lp_advertised) * BITS_PER_BYTE !=
- EEE_MODES_COUNT);
-
/* MODES_OURS */
- ret = ethnl_bitset32_size(&eee->advertised, &eee->supported,
- EEE_MODES_COUNT, link_mode_names, compact);
+ ret = ethnl_bitset_size(eee->advertised, eee->supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
len += ret;
/* MODES_PEERS */
- ret = ethnl_bitset32_size(&eee->lp_advertised, NULL,
- EEE_MODES_COUNT, link_mode_names, compact);
+ ret = ethnl_bitset_size(eee->lp_advertised, NULL,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
len += ret;
@@ -84,24 +79,26 @@ static int eee_fill_reply(struct sk_buff *skb,
{
bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
const struct eee_reply_data *data = EEE_REPDATA(reply_base);
- const struct ethtool_eee *eee = &data->eee;
+ const struct ethtool_keee *eee = &data->eee;
int ret;
- ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_OURS,
- &eee->advertised, &eee->supported,
- EEE_MODES_COUNT, link_mode_names, compact);
+ ret = ethnl_put_bitset(skb, ETHTOOL_A_EEE_MODES_OURS,
+ eee->advertised, eee->supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
- ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_PEER,
- &eee->lp_advertised, NULL, EEE_MODES_COUNT,
- link_mode_names, compact);
+ ret = ethnl_put_bitset(skb, ETHTOOL_A_EEE_MODES_PEER,
+ eee->lp_advertised, NULL,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
- if (nla_put_u8(skb, ETHTOOL_A_EEE_ACTIVE, !!eee->eee_active) ||
- nla_put_u8(skb, ETHTOOL_A_EEE_ENABLED, !!eee->eee_enabled) ||
+ if (nla_put_u8(skb, ETHTOOL_A_EEE_ACTIVE, eee->eee_active) ||
+ nla_put_u8(skb, ETHTOOL_A_EEE_ENABLED, eee->eee_enabled) ||
nla_put_u8(skb, ETHTOOL_A_EEE_TX_LPI_ENABLED,
- !!eee->tx_lpi_enabled) ||
+ eee->tx_lpi_enabled) ||
nla_put_u32(skb, ETHTOOL_A_EEE_TX_LPI_TIMER, eee->tx_lpi_timer))
return -EMSGSIZE;
@@ -132,7 +129,7 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info)
{
struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct ethtool_eee eee = {};
+ struct ethtool_keee eee = {};
bool mod = false;
int ret;
@@ -140,14 +137,15 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info)
if (ret < 0)
return ret;
- ret = ethnl_update_bitset32(&eee.advertised, EEE_MODES_COUNT,
- tb[ETHTOOL_A_EEE_MODES_OURS],
- link_mode_names, info->extack, &mod);
+ ret = ethnl_update_bitset(eee.advertised,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ tb[ETHTOOL_A_EEE_MODES_OURS],
+ link_mode_names, info->extack, &mod);
if (ret < 0)
return ret;
- ethnl_update_bool32(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod);
- ethnl_update_bool32(&eee.tx_lpi_enabled,
- tb[ETHTOOL_A_EEE_TX_LPI_ENABLED], &mod);
+ ethnl_update_bool(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod);
+ ethnl_update_bool(&eee.tx_lpi_enabled, tb[ETHTOOL_A_EEE_TX_LPI_ENABLED],
+ &mod);
ethnl_update_u32(&eee.tx_lpi_timer, tb[ETHTOOL_A_EEE_TX_LPI_TIMER],
&mod);
if (!mod)
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 7519b0818b91..5a55270aa86e 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -26,12 +26,12 @@
#include <linux/sched/signal.h>
#include <linux/net.h>
#include <linux/pm_runtime.h>
+#include <linux/utsname.h>
#include <net/devlink.h>
#include <net/ipv6.h>
#include <net/xdp_sock_drv.h>
#include <net/flow_offload.h>
#include <linux/ethtool_netlink.h>
-#include <generated/utsrelease.h>
#include "common.h"
/* State held across locks and calls for commands which have devlink fallback */
@@ -713,7 +713,8 @@ ethtool_get_drvinfo(struct net_device *dev, struct ethtool_devlink_compat *rsp)
struct device *parent = dev->dev.parent;
rsp->info.cmd = ETHTOOL_GDRVINFO;
- strscpy(rsp->info.version, UTS_RELEASE, sizeof(rsp->info.version));
+ strscpy(rsp->info.version, init_uts_ns.name.release,
+ sizeof(rsp->info.version));
if (ops->get_drvinfo) {
ops->get_drvinfo(dev, &rsp->info);
if (!rsp->info.bus_info[0] && parent)
@@ -1508,22 +1509,57 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
return 0;
}
+static void eee_to_keee(struct ethtool_keee *keee,
+ const struct ethtool_eee *eee)
+{
+ memset(keee, 0, sizeof(*keee));
+
+ keee->eee_enabled = eee->eee_enabled;
+ keee->tx_lpi_enabled = eee->tx_lpi_enabled;
+ keee->tx_lpi_timer = eee->tx_lpi_timer;
+
+ ethtool_convert_legacy_u32_to_link_mode(keee->advertised,
+ eee->advertised);
+}
+
+static void keee_to_eee(struct ethtool_eee *eee,
+ const struct ethtool_keee *keee)
+{
+ bool overflow;
+
+ memset(eee, 0, sizeof(*eee));
+
+ eee->eee_active = keee->eee_active;
+ eee->eee_enabled = keee->eee_enabled;
+ eee->tx_lpi_enabled = keee->tx_lpi_enabled;
+ eee->tx_lpi_timer = keee->tx_lpi_timer;
+
+ overflow = !ethtool_convert_link_mode_to_legacy_u32(&eee->supported,
+ keee->supported);
+ ethtool_convert_link_mode_to_legacy_u32(&eee->advertised,
+ keee->advertised);
+ ethtool_convert_link_mode_to_legacy_u32(&eee->lp_advertised,
+ keee->lp_advertised);
+ if (overflow)
+ pr_warn("Ethtool ioctl interface doesn't support passing EEE linkmodes beyond bit 32\n");
+}
+
static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)
{
- struct ethtool_eee edata;
+ struct ethtool_keee keee;
+ struct ethtool_eee eee;
int rc;
if (!dev->ethtool_ops->get_eee)
return -EOPNOTSUPP;
- memset(&edata, 0, sizeof(struct ethtool_eee));
- edata.cmd = ETHTOOL_GEEE;
- rc = dev->ethtool_ops->get_eee(dev, &edata);
-
+ memset(&keee, 0, sizeof(keee));
+ rc = dev->ethtool_ops->get_eee(dev, &keee);
if (rc)
return rc;
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ keee_to_eee(&eee, &keee);
+ if (copy_to_user(useraddr, &eee, sizeof(eee)))
return -EFAULT;
return 0;
@@ -1531,16 +1567,18 @@ static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)
static int ethtool_set_eee(struct net_device *dev, char __user *useraddr)
{
- struct ethtool_eee edata;
+ struct ethtool_keee keee;
+ struct ethtool_eee eee;
int ret;
if (!dev->ethtool_ops->set_eee)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ if (copy_from_user(&eee, useraddr, sizeof(eee)))
return -EFAULT;
- ret = dev->ethtool_ops->set_eee(dev, &edata);
+ eee_to_keee(&keee, &eee);
+ ret = dev->ethtool_ops->set_eee(dev, &keee);
if (!ret)
ethtool_notify(dev, ETHTOOL_MSG_EEE_NTF, NULL);
return ret;
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index fe3553f60bf3..bd04f28d5cf4 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -477,11 +477,7 @@ out:
return ret;
}
-/* Default ->dumpit() handler for GET requests. Device iteration copied from
- * rtnl_dump_ifinfo(); we have to be more careful about device hashtable
- * persistence as we cannot guarantee to hold RTNL lock through the whole
- * function as rtnetnlink does.
- */
+/* Default ->dumpit() handler for GET requests. */
static int ethnl_default_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -490,14 +486,14 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
struct net_device *dev;
int ret = 0;
- rtnl_lock();
+ rcu_read_lock();
for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
dev_hold(dev);
- rtnl_unlock();
+ rcu_read_unlock();
ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));
- rtnl_lock();
+ rcu_read_lock();
dev_put(dev);
if (ret < 0 && ret != -EOPNOTSUPP) {
@@ -507,7 +503,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
}
ret = 0;
}
- rtnl_unlock();
+ rcu_read_unlock();
return ret;
}
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index 16ed7bfd29e4..34fd1d9b2db8 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -471,7 +471,10 @@ static void handshake_req_destroy_test1(struct kunit *test)
handshake_req_cancel(sock->sk);
/* Act */
- fput(filp);
+ /* Ensure the close/release/put process has run to
+ * completion before checking the result.
+ */
+ __fput_sync(filp);
/* Assert */
KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 7ceb9ac6e730..c98b5b71ad7c 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -28,29 +28,19 @@ static bool is_slave_up(struct net_device *dev)
return dev && is_admin_up(dev) && netif_oper_up(dev);
}
-static void __hsr_set_operstate(struct net_device *dev, int transition)
-{
- write_lock(&dev_base_lock);
- if (dev->operstate != transition) {
- dev->operstate = transition;
- write_unlock(&dev_base_lock);
- netdev_state_change(dev);
- } else {
- write_unlock(&dev_base_lock);
- }
-}
-
static void hsr_set_operstate(struct hsr_port *master, bool has_carrier)
{
- if (!is_admin_up(master->dev)) {
- __hsr_set_operstate(master->dev, IF_OPER_DOWN);
+ struct net_device *dev = master->dev;
+
+ if (!is_admin_up(dev)) {
+ netdev_set_operstate(dev, IF_OPER_DOWN);
return;
}
if (has_carrier)
- __hsr_set_operstate(master->dev, IF_OPER_UP);
+ netdev_set_operstate(dev, IF_OPER_UP);
else
- __hsr_set_operstate(master->dev, IF_OPER_LOWERLAYERDOWN);
+ netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN);
}
static bool hsr_check_carrier(struct hsr_port *master)
@@ -78,14 +68,14 @@ static void hsr_check_announce(struct net_device *hsr_dev,
hsr = netdev_priv(hsr_dev);
- if (hsr_dev->operstate == IF_OPER_UP && old_operstate != IF_OPER_UP) {
+ if (READ_ONCE(hsr_dev->operstate) == IF_OPER_UP && old_operstate != IF_OPER_UP) {
/* Went up */
hsr->announce_count = 0;
mod_timer(&hsr->announce_timer,
jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
}
- if (hsr_dev->operstate != IF_OPER_UP && old_operstate == IF_OPER_UP)
+ if (READ_ONCE(hsr_dev->operstate) != IF_OPER_UP && old_operstate == IF_OPER_UP)
/* Went down */
del_timer(&hsr->announce_timer);
}
@@ -100,7 +90,7 @@ void hsr_check_carrier_and_operstate(struct hsr_priv *hsr)
/* netif_stacked_transfer_operstate() cannot be used here since
* it doesn't set IF_OPER_LOWERLAYERDOWN (?)
*/
- old_operstate = master->dev->operstate;
+ old_operstate = READ_ONCE(master->dev->operstate);
has_carrier = hsr_check_carrier(master);
hsr_set_operstate(master, has_carrier);
hsr_check_announce(master->dev, old_operstate);
@@ -308,7 +298,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
skb = hsr_init_skb(master);
if (!skb) {
- WARN_ONCE(1, "HSR: Could not send supervision frame\n");
+ netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n");
return;
}
@@ -355,7 +345,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
skb = hsr_init_skb(master);
if (!skb) {
- WARN_ONCE(1, "PRP: Could not send supervision frame\n");
+ netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n");
return;
}
@@ -477,7 +467,7 @@ static const struct net_device_ops hsr_device_ops = {
.ndo_set_rx_mode = hsr_set_rx_mode,
};
-static struct device_type hsr_type = {
+static const struct device_type hsr_type = {
.name = "hsr",
};
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 80cdc6f6b34c..5d68cb181695 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
return false;
/* Get next tlv */
- total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length;
+ total_length += hsr_sup_tag->tlv.HSR_TLV_length;
if (!pskb_may_pull(skb, total_length))
return false;
skb_pull(skb, total_length);
@@ -435,7 +435,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
continue;
/* Don't send frame over port where it has been sent before.
- * Also fro SAN, this shouldn't be done.
+ * Also for SAN, this shouldn't be done.
*/
if (!frame->is_from_san &&
hsr_register_frame_out(port, frame->node_src,
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 2c087b7f17c5..77b4e92027c5 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -93,7 +93,7 @@ static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
static int lowpan_get_iflink(const struct net_device *dev)
{
- return lowpan_802154_dev(dev)->wdev->ifindex;
+ return READ_ONCE(lowpan_802154_dev(dev)->wdev->ifindex);
}
static const struct net_device_ops lowpan_netdev_ops = {
@@ -280,5 +280,6 @@ static void __exit lowpan_cleanup_module(void)
module_init(lowpan_init_module);
module_exit(lowpan_cleanup_module);
+MODULE_DESCRIPTION("IPv6 over Low power Wireless Personal Area Network IEEE 802.15.4 core");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("lowpan");
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 00302e8b9615..990a83455dcf 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -1137,4 +1137,5 @@ module_init(af_ieee802154_init);
module_exit(af_ieee802154_remove);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IEEE 802.15.4 socket interface");
MODULE_ALIAS_NETPROTO(PF_IEEE802154);
diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c
index d2903933805c..6708160ebf9f 100644
--- a/net/ieee802154/sysfs.c
+++ b/net/ieee802154/sysfs.c
@@ -93,7 +93,7 @@ static SIMPLE_DEV_PM_OPS(wpan_phy_pm_ops, wpan_phy_suspend, wpan_phy_resume);
#define WPAN_PHY_PM_OPS NULL
#endif
-struct class wpan_phy_class = {
+const struct class wpan_phy_class = {
.name = "ieee802154",
.dev_release = wpan_phy_release,
.dev_groups = pmib_groups,
diff --git a/net/ieee802154/sysfs.h b/net/ieee802154/sysfs.h
index 337545b639e9..69961e166257 100644
--- a/net/ieee802154/sysfs.h
+++ b/net/ieee802154/sysfs.h
@@ -5,6 +5,6 @@
int wpan_phy_sysfs_init(void);
void wpan_phy_sysfs_exit(void);
-extern struct class wpan_phy_class;
+extern const struct class wpan_phy_class;
#endif /* __IEEE802154_SYSFS_H */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 835f4f9d98d2..55bd72997b31 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,6 +119,7 @@
#endif
#include <net/l3mdev.h>
#include <net/compat.h>
+#include <net/rps.h>
#include <trace/events/sock.h>
@@ -330,6 +331,9 @@ lookup_protocol:
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
+ if (INET_PROTOSW_ICSK & answer_flags)
+ inet_init_csk_locks(sk);
+
inet = inet_sk(sk);
inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
@@ -1100,7 +1104,7 @@ const struct proto_ops inet_dgram_ops = {
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.splice_eof = inet_splice_eof,
- .set_peek_off = sk_set_peek_off,
+ .set_peek_off = udp_set_peek_off,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet_compat_ioctl,
#endif
@@ -1323,7 +1327,7 @@ int inet_sk_rebuild_header(struct sock *sk)
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
inet->inet_dport, inet->inet_sport,
- sk->sk_protocol, RT_CONN_FLAGS(sk),
+ sk->sk_protocol, ip_sock_rt_tos(sk),
sk->sk_bound_dev_if);
if (!IS_ERR(rt)) {
err = 0;
@@ -1625,10 +1629,12 @@ EXPORT_SYMBOL(inet_current_timestamp);
int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
- if (sk->sk_family == AF_INET)
+ unsigned int family = READ_ONCE(sk->sk_family);
+
+ if (family == AF_INET)
return ip_recv_error(sk, msg, len, addr_len);
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
+ if (family == AF_INET6)
return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
#endif
return -EINVAL;
@@ -1746,19 +1752,6 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-static const struct net_protocol tcp_protocol = {
- .handler = tcp_v4_rcv,
- .err_handler = tcp_v4_err,
- .no_policy = 1,
- .icmp_strict_tag_validation = 1,
-};
-
-static const struct net_protocol udp_protocol = {
- .handler = udp_rcv,
- .err_handler = udp_err,
- .no_policy = 1,
-};
-
static const struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
.err_handler = icmp_err,
@@ -1899,14 +1892,6 @@ static int ipv4_proc_init(void);
* IP protocol layer initialiser
*/
-static struct packet_offload ip_packet_offload __read_mostly = {
- .type = cpu_to_be16(ETH_P_IP),
- .callbacks = {
- .gso_segment = inet_gso_segment,
- .gro_receive = inet_gro_receive,
- .gro_complete = inet_gro_complete,
- },
-};
static const struct net_offload ipip_offload = {
.callbacks = {
@@ -1933,7 +1918,15 @@ static int __init ipv4_offload_init(void)
if (ipip_offload_init() < 0)
pr_crit("%s: Cannot add IPIP protocol offload\n", __func__);
- dev_add_offload(&ip_packet_offload);
+ net_hotdata.ip_packet_offload = (struct packet_offload) {
+ .type = cpu_to_be16(ETH_P_IP),
+ .callbacks = {
+ .gso_segment = inet_gso_segment,
+ .gro_receive = inet_gro_receive,
+ .gro_complete = inet_gro_complete,
+ },
+ };
+ dev_add_offload(&net_hotdata.ip_packet_offload);
return 0;
}
@@ -1987,9 +1980,22 @@ static int __init inet_init(void)
if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
pr_crit("%s: Cannot add ICMP protocol\n", __func__);
- if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
+
+ net_hotdata.udp_protocol = (struct net_protocol) {
+ .handler = udp_rcv,
+ .err_handler = udp_err,
+ .no_policy = 1,
+ };
+ if (inet_add_protocol(&net_hotdata.udp_protocol, IPPROTO_UDP) < 0)
pr_crit("%s: Cannot add UDP protocol\n", __func__);
- if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
+
+ net_hotdata.tcp_protocol = (struct net_protocol) {
+ .handler = tcp_v4_rcv,
+ .err_handler = tcp_v4_err,
+ .no_policy = 1,
+ .icmp_strict_tag_validation = 1,
+ };
+ if (inet_add_protocol(&net_hotdata.tcp_protocol, IPPROTO_TCP) < 0)
pr_crit("%s: Cannot add TCP protocol\n", __func__);
#ifdef CONFIG_IP_MULTICAST
if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a2e6e1fdf82b..64aec3dff8ec 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -597,5 +597,6 @@ static void __exit ah4_fini(void)
module_init(ah4_init);
module_exit(ah4_fini);
+MODULE_DESCRIPTION("IPv4 AH transformation library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9456f5bb35e5..0d0d725b46ad 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
if (neigh) {
if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
r->arp_flags = arp_state_to_flags(neigh);
read_unlock_bh(&neigh->lock);
r->arp_ha.sa_family = dev->type;
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index ae8b15e6896f..7f518ea5f4ac 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -12,7 +12,7 @@
#include <net/bpf_sk_storage.h>
/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
-extern struct bpf_struct_ops bpf_tcp_congestion_ops;
+static struct bpf_struct_ops bpf_tcp_congestion_ops;
static u32 unsupported_ops[] = {
offsetof(struct tcp_congestion_ops, get_info),
@@ -20,6 +20,7 @@ static u32 unsupported_ops[] = {
static const struct btf_type *tcp_sock_type;
static u32 tcp_sock_id, sock_id;
+static const struct btf_type *tcp_congestion_ops_type;
static int bpf_tcp_ca_init(struct btf *btf)
{
@@ -36,6 +37,11 @@ static int bpf_tcp_ca_init(struct btf *btf)
tcp_sock_id = type_id;
tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
+ type_id = btf_find_by_name_kind(btf, "tcp_congestion_ops", BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ tcp_congestion_ops_type = btf_type_by_id(btf, type_id);
+
return 0;
}
@@ -149,7 +155,7 @@ static u32 prog_ops_moff(const struct bpf_prog *prog)
u32 midx;
midx = prog->expected_attach_type;
- t = bpf_tcp_congestion_ops.type;
+ t = tcp_congestion_ops_type;
m = &btf_type_member(t)[midx];
return __btf_member_bit_offset(t, m) / 8;
@@ -191,17 +197,17 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
-BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids)
+BTF_KFUNCS_START(bpf_tcp_ca_check_kfunc_ids)
BTF_ID_FLAGS(func, tcp_reno_ssthresh)
BTF_ID_FLAGS(func, tcp_reno_cong_avoid)
BTF_ID_FLAGS(func, tcp_reno_undo_cwnd)
BTF_ID_FLAGS(func, tcp_slow_start)
BTF_ID_FLAGS(func, tcp_cong_avoid_ai)
-BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids)
+BTF_KFUNCS_END(bpf_tcp_ca_check_kfunc_ids)
static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
.owner = THIS_MODULE,
@@ -339,7 +345,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
.release = __bpf_tcp_ca_release,
};
-struct bpf_struct_ops bpf_tcp_congestion_ops = {
+static struct bpf_struct_ops bpf_tcp_congestion_ops = {
.verifier_ops = &bpf_tcp_ca_verifier_ops,
.reg = bpf_tcp_ca_reg,
.unreg = bpf_tcp_ca_unreg,
@@ -350,10 +356,16 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
.validate = bpf_tcp_ca_validate,
.name = "tcp_congestion_ops",
.cfi_stubs = &__bpf_ops_tcp_congestion_ops,
+ .owner = THIS_MODULE,
};
static int __init bpf_tcp_ca_kfunc_init(void)
{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+ ret = ret ?: register_bpf_struct_ops(&bpf_tcp_congestion_ops, tcp_congestion_ops);
+
+ return ret;
}
late_initcall(bpf_tcp_ca_kfunc_init);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index d048aa833293..8b17d83e5fde 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -864,11 +864,8 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
net_clen_bits,
net_spot + 1,
1);
- if (net_spot < 0) {
- if (net_spot == -2)
- return -EFAULT;
+ if (net_spot < 0)
return 0;
- }
switch (doi_def->type) {
case CIPSO_V4_MAP_PASS:
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 2cc50cbfc2a3..cc6d0bd7b0a9 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -119,7 +119,7 @@ void ip4_datagram_release_cb(struct sock *sk)
rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr,
inet->inet_saddr, inet->inet_dport,
inet->inet_sport, sk->sk_protocol,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+ ip_sock_rt_tos(sk), sk->sk_bound_dev_if);
dst = !IS_ERR(rt) ? &rt->dst : NULL;
sk_dst_set(sk, dst);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ca0ff15dc8fa..7a437f0d4190 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -713,34 +713,37 @@ static void check_lifetime(struct work_struct *work)
rcu_read_lock();
hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
- unsigned long age;
+ unsigned long age, tstamp;
+ u32 preferred_lft;
+ u32 valid_lft;
+ u32 flags;
- if (ifa->ifa_flags & IFA_F_PERMANENT)
+ flags = READ_ONCE(ifa->ifa_flags);
+ if (flags & IFA_F_PERMANENT)
continue;
+ preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
+ valid_lft = READ_ONCE(ifa->ifa_valid_lft);
+ tstamp = READ_ONCE(ifa->ifa_tstamp);
/* We try to batch several events at once. */
- age = (now - ifa->ifa_tstamp +
+ age = (now - tstamp +
ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
- if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
- age >= ifa->ifa_valid_lft) {
+ if (valid_lft != INFINITY_LIFE_TIME &&
+ age >= valid_lft) {
change_needed = true;
- } else if (ifa->ifa_preferred_lft ==
+ } else if (preferred_lft ==
INFINITY_LIFE_TIME) {
continue;
- } else if (age >= ifa->ifa_preferred_lft) {
- if (time_before(ifa->ifa_tstamp +
- ifa->ifa_valid_lft * HZ, next))
- next = ifa->ifa_tstamp +
- ifa->ifa_valid_lft * HZ;
+ } else if (age >= preferred_lft) {
+ if (time_before(tstamp + valid_lft * HZ, next))
+ next = tstamp + valid_lft * HZ;
- if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
+ if (!(flags & IFA_F_DEPRECATED))
change_needed = true;
- } else if (time_before(ifa->ifa_tstamp +
- ifa->ifa_preferred_lft * HZ,
+ } else if (time_before(tstamp + preferred_lft * HZ,
next)) {
- next = ifa->ifa_tstamp +
- ifa->ifa_preferred_lft * HZ;
+ next = tstamp + preferred_lft * HZ;
}
}
rcu_read_unlock();
@@ -804,24 +807,26 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
__u32 prefered_lft)
{
unsigned long timeout;
+ u32 flags;
- ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
+ flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout))
- ifa->ifa_valid_lft = timeout;
+ WRITE_ONCE(ifa->ifa_valid_lft, timeout);
else
- ifa->ifa_flags |= IFA_F_PERMANENT;
+ flags |= IFA_F_PERMANENT;
timeout = addrconf_timeout_fixup(prefered_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
if (timeout == 0)
- ifa->ifa_flags |= IFA_F_DEPRECATED;
- ifa->ifa_preferred_lft = timeout;
+ flags |= IFA_F_DEPRECATED;
+ WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
}
- ifa->ifa_tstamp = jiffies;
+ WRITE_ONCE(ifa->ifa_flags, flags);
+ WRITE_ONCE(ifa->ifa_tstamp, jiffies);
if (!ifa->ifa_cstamp)
- ifa->ifa_cstamp = ifa->ifa_tstamp;
+ WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
}
static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
@@ -1312,7 +1317,7 @@ static __be32 in_dev_select_addr(const struct in_device *in_dev,
const struct in_ifaddr *ifa;
in_dev_for_each_ifa_rcu(ifa, in_dev) {
- if (ifa->ifa_flags & IFA_F_SECONDARY)
+ if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
continue;
if (ifa->ifa_scope != RT_SCOPE_LINK &&
ifa->ifa_scope <= scope)
@@ -1340,7 +1345,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
localnet_scope = RT_SCOPE_LINK;
in_dev_for_each_ifa_rcu(ifa, in_dev) {
- if (ifa->ifa_flags & IFA_F_SECONDARY)
+ if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
continue;
if (min(ifa->ifa_scope, localnet_scope) > scope)
continue;
@@ -1671,11 +1676,12 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
}
-static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
+static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
struct inet_fill_args *args)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
+ unsigned long tstamp;
u32 preferred, valid;
nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
@@ -1686,7 +1692,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
ifm = nlmsg_data(nlh);
ifm->ifa_family = AF_INET;
ifm->ifa_prefixlen = ifa->ifa_prefixlen;
- ifm->ifa_flags = ifa->ifa_flags;
+ ifm->ifa_flags = READ_ONCE(ifa->ifa_flags);
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
@@ -1694,11 +1700,12 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
goto nla_put_failure;
+ tstamp = READ_ONCE(ifa->ifa_tstamp);
if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
- preferred = ifa->ifa_preferred_lft;
- valid = ifa->ifa_valid_lft;
+ preferred = READ_ONCE(ifa->ifa_preferred_lft);
+ valid = READ_ONCE(ifa->ifa_valid_lft);
if (preferred != INFINITY_LIFE_TIME) {
- long tval = (jiffies - ifa->ifa_tstamp) / HZ;
+ long tval = (jiffies - tstamp) / HZ;
if (preferred > tval)
preferred -= tval;
@@ -1725,10 +1732,10 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
(ifa->ifa_proto &&
nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
- nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
+ nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) ||
(ifa->ifa_rt_priority &&
nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
- put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
+ put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
preferred, valid))
goto nla_put_failure;
@@ -1798,15 +1805,15 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
}
static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
- struct netlink_callback *cb, int s_ip_idx,
+ struct netlink_callback *cb, int *s_ip_idx,
struct inet_fill_args *fillargs)
{
struct in_ifaddr *ifa;
int ip_idx = 0;
int err;
- in_dev_for_each_ifa_rtnl(ifa, in_dev) {
- if (ip_idx < s_ip_idx) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ if (ip_idx < *s_ip_idx) {
ip_idx++;
continue;
}
@@ -1818,13 +1825,28 @@ static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
ip_idx++;
}
err = 0;
-
+ ip_idx = 0;
done:
- cb->args[2] = ip_idx;
+ *s_ip_idx = ip_idx;
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
+ READ_ONCE(net->dev_base_seq);
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
@@ -1837,76 +1859,51 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
};
struct net *net = sock_net(skb->sk);
struct net *tgt_net = net;
- int h, s_h;
- int idx, s_idx;
- int s_ip_idx;
- struct net_device *dev;
+ struct {
+ unsigned long ifindex;
+ int ip_idx;
+ } *ctx = (void *)cb->ctx;
struct in_device *in_dev;
- struct hlist_head *head;
+ struct net_device *dev;
int err = 0;
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
- s_ip_idx = cb->args[2];
-
+ rcu_read_lock();
if (cb->strict_check) {
err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
skb->sk, cb);
if (err < 0)
- goto put_tgt_net;
+ goto done;
- err = 0;
if (fillargs.ifindex) {
- dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
- if (!dev) {
- err = -ENODEV;
- goto put_tgt_net;
- }
-
- in_dev = __in_dev_get_rtnl(dev);
- if (in_dev) {
- err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
- &fillargs);
- }
- goto put_tgt_net;
- }
- }
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &tgt_net->dev_index_head[h];
- rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
- tgt_net->dev_base_seq;
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- if (h > s_h || idx > s_idx)
- s_ip_idx = 0;
+ err = -ENODEV;
+ dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
+ if (!dev)
+ goto done;
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
- goto cont;
-
- err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
- &fillargs);
- if (err < 0) {
- rcu_read_unlock();
goto done;
- }
-cont:
- idx++;
+ err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
+ &fillargs);
+ goto done;
}
- rcu_read_unlock();
}
+ cb->seq = inet_base_seq(tgt_net);
+
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ in_dev = __in_dev_get_rcu(dev);
+ if (!in_dev)
+ continue;
+ err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
+ &fillargs);
+ if (err < 0)
+ goto done;
+ }
done:
- cb->args[0] = h;
- cb->args[1] = idx;
-put_tgt_net:
if (fillargs.netnsid >= 0)
put_net(tgt_net);
-
- return skb->len ? : err;
+ rcu_read_unlock();
+ return err;
}
static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
@@ -1968,7 +1965,7 @@ static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
return -EMSGSIZE;
for (i = 0; i < IPV4_DEVCONF_MAX; i++)
- ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
+ ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
return 0;
}
@@ -2054,9 +2051,9 @@ static int inet_netconf_msgsize_devconf(int type)
}
static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
- struct ipv4_devconf *devconf, u32 portid,
- u32 seq, int event, unsigned int flags,
- int type)
+ const struct ipv4_devconf *devconf,
+ u32 portid, u32 seq, int event,
+ unsigned int flags, int type)
{
struct nlmsghdr *nlh;
struct netconfmsg *ncm;
@@ -2081,27 +2078,28 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
if ((all || type == NETCONFA_FORWARDING) &&
nla_put_s32(skb, NETCONFA_FORWARDING,
- IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
+ IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_RP_FILTER) &&
nla_put_s32(skb, NETCONFA_RP_FILTER,
- IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
+ IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_MC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_MC_FORWARDING,
- IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
+ IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_BC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_BC_FORWARDING,
- IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
+ IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_PROXY_NEIGH) &&
nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
- IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
+ IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
- IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
+ IPV4_DEVCONF_RO(*devconf,
+ IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
goto nla_put_failure;
out:
@@ -2190,21 +2188,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
- struct nlattr *tb[NETCONFA_MAX+1];
+ struct nlattr *tb[NETCONFA_MAX + 1];
+ const struct ipv4_devconf *devconf;
+ struct in_device *in_dev = NULL;
+ struct net_device *dev = NULL;
struct sk_buff *skb;
- struct ipv4_devconf *devconf;
- struct in_device *in_dev;
- struct net_device *dev;
int ifindex;
int err;
err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
if (err)
- goto errout;
+ return err;
- err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
- goto errout;
+ return -EINVAL;
ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
switch (ifindex) {
@@ -2215,10 +2212,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
devconf = net->ipv4.devconf_dflt;
break;
default:
- dev = __dev_get_by_index(net, ifindex);
- if (!dev)
- goto errout;
- in_dev = __in_dev_get_rtnl(dev);
+ err = -ENODEV;
+ dev = dev_get_by_index(net, ifindex);
+ if (dev)
+ in_dev = in_dev_get(dev);
if (!in_dev)
goto errout;
devconf = &in_dev->cnf;
@@ -2242,6 +2239,9 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
}
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
+ if (in_dev)
+ in_dev_put(in_dev);
+ dev_put(dev);
return err;
}
@@ -2250,11 +2250,13 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx, s_idx;
+ struct {
+ unsigned long ifindex;
+ unsigned int all_default;
+ } *ctx = (void *)cb->ctx;
+ const struct in_device *in_dev;
struct net_device *dev;
- struct in_device *in_dev;
- struct hlist_head *head;
+ int err = 0;
if (cb->strict_check) {
struct netlink_ext_ack *extack = cb->extack;
@@ -2271,65 +2273,45 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
}
}
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- rcu_read_lock();
- cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
- net->dev_base_seq;
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- in_dev = __in_dev_get_rcu(dev);
- if (!in_dev)
- goto cont;
-
- if (inet_netconf_fill_devconf(skb, dev->ifindex,
- &in_dev->cnf,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF,
- NLM_F_MULTI,
- NETCONFA_ALL) < 0) {
- rcu_read_unlock();
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ in_dev = __in_dev_get_rcu(dev);
+ if (!in_dev)
+ continue;
+ err = inet_netconf_fill_devconf(skb, dev->ifindex,
+ &in_dev->cnf,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
+ goto done;
}
- if (h == NETDEV_HASHENTRIES) {
- if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
- net->ipv4.devconf_all,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ if (ctx->all_default == 0) {
+ err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+ net->ipv4.devconf_all,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
- }
- if (h == NETDEV_HASHENTRIES + 1) {
- if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
- net->ipv4.devconf_dflt,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ ctx->all_default++;
+ }
+ if (ctx->all_default == 1) {
+ err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+ net->ipv4.devconf_dflt,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
+ ctx->all_default++;
}
done:
- cb->args[0] = h;
- cb->args[1] = idx;
-
- return skb->len;
+ rcu_read_unlock();
+ return err;
}
#ifdef CONFIG_SYSCTL
@@ -2810,7 +2792,9 @@ void __init devinet_init(void)
rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
- rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
+ rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
+ RTNL_FLAG_DUMP_UNLOCKED);
rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
- inet_netconf_dump_devconf, 0);
+ inet_netconf_dump_devconf,
+ RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4ccfc104f13a..4dd9e5040672 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1247,5 +1247,6 @@ static void __exit esp4_fini(void)
module_init(esp4_init);
module_exit(esp4_fini);
+MODULE_DESCRIPTION("IPv4 ESP transformation library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 390f4be7f7be..48741352a88a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -916,7 +916,8 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct rtmsg *rtm;
int err, i;
- ASSERT_RTNL();
+ if (filter->rtnl_held)
+ ASSERT_RTNL();
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
@@ -961,7 +962,10 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
break;
case RTA_OIF:
ifindex = nla_get_u32(tb[i]);
- filter->dev = __dev_get_by_index(net, ifindex);
+ if (filter->rtnl_held)
+ filter->dev = __dev_get_by_index(net, ifindex);
+ else
+ filter->dev = dev_get_by_index_rcu(net, ifindex);
if (!filter->dev)
return -ENODEV;
break;
@@ -983,20 +987,24 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct fib_dump_filter filter = { .dump_routes = true,
- .dump_exceptions = true };
+ struct fib_dump_filter filter = {
+ .dump_routes = true,
+ .dump_exceptions = true,
+ .rtnl_held = false,
+ };
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct fib_table *tb;
struct hlist_head *head;
- int dumped = 0, err;
+ int dumped = 0, err = 0;
+ rcu_read_lock();
if (cb->strict_check) {
err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
if (err < 0)
- return err;
+ goto unlock;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
@@ -1005,29 +1013,26 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
/* ipv4 does not use prefix flag */
if (filter.flags & RTM_F_PREFIX)
- return skb->len;
+ goto unlock;
if (filter.table_id) {
tb = fib_get_table(net, filter.table_id);
if (!tb) {
if (rtnl_msg_family(cb->nlh) != PF_INET)
- return skb->len;
+ goto unlock;
NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
- return -ENOENT;
+ err = -ENOENT;
+ goto unlock;
}
-
- rcu_read_lock();
err = fib_table_dump(tb, skb, cb, &filter);
- rcu_read_unlock();
- return skb->len ? : err;
+ goto unlock;
}
s_h = cb->args[0];
s_e = cb->args[1];
- rcu_read_lock();
-
+ err = 0;
for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv4.fib_table_hash[h];
@@ -1038,25 +1043,20 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
memset(&cb->args[2], 0, sizeof(cb->args) -
2 * sizeof(cb->args[0]));
err = fib_table_dump(tb, skb, cb, &filter);
- if (err < 0) {
- if (likely(skb->len))
- goto out;
-
- goto out_err;
- }
+ if (err < 0)
+ goto out;
dumped = 1;
next:
e++;
}
}
out:
- err = skb->len;
-out_err:
- rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
+unlock:
+ rcu_read_unlock();
return err;
}
@@ -1659,5 +1659,6 @@ void __init ip_fib_init(void)
rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
+ RTNL_FLAG_DUMP_UNLOCKED);
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3ff35f811765..f474106464d2 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -501,7 +501,7 @@ static void tnode_free(struct key_vector *tn)
if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
tnode_free_size = 0;
- synchronize_rcu();
+ synchronize_net();
}
}
@@ -2368,7 +2368,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
* and key == 0 means the dump has wrapped around and we are done.
*/
if (count && !key)
- return skb->len;
+ return 0;
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
int err;
@@ -2394,7 +2394,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
cb->args[3] = key;
cb->args[2] = count;
- return skb->len;
+ return 0;
}
void __init fib_trie_init(void)
diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c
index 4da03bf45c9b..06e5572f296f 100644
--- a/net/ipv4/fou_bpf.c
+++ b/net/ipv4/fou_bpf.c
@@ -100,10 +100,10 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
__bpf_kfunc_end_defs();
-BTF_SET8_START(fou_kfunc_set)
+BTF_KFUNCS_START(fou_kfunc_set)
BTF_ID_FLAGS(func, bpf_skb_set_fou_encap)
BTF_ID_FLAGS(func, bpf_skb_get_fou_encap)
-BTF_SET8_END(fou_kfunc_set)
+BTF_KFUNCS_END(fou_kfunc_set)
static const struct btf_kfunc_id_set fou_bpf_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
index 0c41076e31ed..a8494f796dca 100644
--- a/net/ipv4/fou_core.c
+++ b/net/ipv4/fou_core.c
@@ -351,7 +351,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
optlen = guehdr->hlen << 2;
len += optlen;
- if (skb_gro_header_hard(skb, len)) {
+ if (!skb_gro_may_pull(skb, len)) {
guehdr = skb_gro_header_slow(skb, len, off);
if (unlikely(!guehdr))
goto out;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 311e70bfce40..5028c72d494a 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -174,7 +174,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
grehlen += GRE_HEADER_SECTION;
hlen = off + grehlen;
- if (skb_gro_header_hard(skb, hlen)) {
+ if (!skb_gro_may_pull(skb, hlen)) {
greh = skb_gro_header_slow(skb, hlen, off);
if (unlikely(!greh))
goto out;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index efeeca2b1328..717e97a389a8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -120,12 +120,12 @@
*/
#define IGMP_V1_SEEN(in_dev) \
- (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
+ (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
((in_dev)->mr_v1_seen && \
time_before(jiffies, (in_dev)->mr_v1_seen)))
#define IGMP_V2_SEEN(in_dev) \
- (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
+ (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
((in_dev)->mr_v2_seen && \
time_before(jiffies, (in_dev)->mr_v2_seen)))
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8e2eb1793685..7d8090f109ef 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -727,6 +727,10 @@ out:
}
if (req)
reqsk_put(req);
+
+ if (newsk)
+ inet_init_csk_locks(newsk);
+
return newsk;
out_err:
newsk = NULL;
@@ -902,8 +906,9 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req,
memcpy(nreq_sk, req_sk,
offsetof(struct sock, sk_dontcopy_begin));
- memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
- req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
+ unsafe_memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
+ req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end),
+ /* alloc is larger than struct, see above */);
sk_node_init(&nreq_sk->sk_node);
nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
@@ -1463,7 +1468,7 @@ static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *f
rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
inet->inet_saddr, inet->inet_dport,
inet->inet_sport, sk->sk_protocol,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+ ip_sock_rt_tos(sk), sk->sk_bound_dev_if);
if (IS_ERR(rt))
rt = NULL;
if (rt)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8e6b6aa0579e..7adace541fe2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -32,7 +32,7 @@
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
-static const struct inet_diag_handler **inet_diag_table;
+static const struct inet_diag_handler __rcu **inet_diag_table;
struct inet_diag_entry {
const __be32 *saddr;
@@ -48,28 +48,28 @@ struct inet_diag_entry {
#endif
};
-static DEFINE_MUTEX(inet_diag_table_mutex);
-
static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
{
- if (proto < 0 || proto >= IPPROTO_MAX) {
- mutex_lock(&inet_diag_table_mutex);
- return ERR_PTR(-ENOENT);
- }
+ const struct inet_diag_handler *handler;
- if (!inet_diag_table[proto])
+ if (proto < 0 || proto >= IPPROTO_MAX)
+ return NULL;
+
+ if (!READ_ONCE(inet_diag_table[proto]))
sock_load_diag_module(AF_INET, proto);
- mutex_lock(&inet_diag_table_mutex);
- if (!inet_diag_table[proto])
- return ERR_PTR(-ENOENT);
+ rcu_read_lock();
+ handler = rcu_dereference(inet_diag_table[proto]);
+ if (handler && !try_module_get(handler->owner))
+ handler = NULL;
+ rcu_read_unlock();
- return inet_diag_table[proto];
+ return handler;
}
static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
{
- mutex_unlock(&inet_diag_table_mutex);
+ module_put(handler->owner);
}
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
@@ -104,9 +104,12 @@ static size_t inet_sk_attr_size(struct sock *sk,
const struct inet_diag_handler *handler;
size_t aux = 0;
- handler = inet_diag_table[req->sdiag_protocol];
+ rcu_read_lock();
+ handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]);
+ DEBUG_NET_WARN_ON_ONCE(!handler);
if (handler && handler->idiag_get_aux_size)
aux = handler->idiag_get_aux_size(sk, net_admin);
+ rcu_read_unlock();
return nla_total_size(sizeof(struct tcp_info))
+ nla_total_size(sizeof(struct inet_diag_msg))
@@ -244,10 +247,16 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct nlmsghdr *nlh;
struct nlattr *attr;
void *info = NULL;
+ int protocol;
cb_data = cb->data;
- handler = inet_diag_table[inet_diag_get_protocol(req, cb_data)];
- BUG_ON(!handler);
+ protocol = inet_diag_get_protocol(req, cb_data);
+
+ /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */
+ handler = rcu_dereference_protected(inet_diag_table[protocol], 1);
+ DEBUG_NET_WARN_ON_ONCE(!handler);
+ if (!handler)
+ return -ENXIO;
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
@@ -605,9 +614,10 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
protocol = inet_diag_get_protocol(req, &dump_data);
handler = inet_diag_lock_handler(protocol);
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- } else if (cmd == SOCK_DIAG_BY_FAMILY) {
+ if (!handler)
+ return -ENOENT;
+
+ if (cmd == SOCK_DIAG_BY_FAMILY) {
struct netlink_callback cb = {
.nlh = nlh,
.skb = in_skb,
@@ -1035,6 +1045,10 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
num = 0;
ilb = &hashinfo->lhash2[i];
+ if (hlist_nulls_empty(&ilb->nulls_head)) {
+ s_num = 0;
+ continue;
+ }
spin_lock(&ilb->lock);
sk_nulls_for_each(sk, node, &ilb->nulls_head) {
struct inet_sock *inet = inet_sk(sk);
@@ -1099,6 +1113,10 @@ resume_bind_walk:
accum = 0;
ibb = &hashinfo->bhash2[i];
+ if (hlist_empty(&ibb->chain)) {
+ s_num = 0;
+ continue;
+ }
spin_lock_bh(&ibb->lock);
inet_bind_bucket_for_each(tb2, &ibb->chain) {
if (!net_eq(ib2_net(tb2), net))
@@ -1259,12 +1277,12 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
again:
prev_min_dump_alloc = cb->min_dump_alloc;
handler = inet_diag_lock_handler(protocol);
- if (!IS_ERR(handler))
+ if (handler) {
handler->dump(skb, cb, r);
- else
- err = PTR_ERR(handler);
- inet_diag_unlock_handler(handler);
-
+ inet_diag_unlock_handler(handler);
+ } else {
+ err = -ENOENT;
+ }
/* The skb is not large enough to fit one sk info and
* inet_sk_diag_fill() has requested for a larger skb.
*/
@@ -1457,10 +1475,9 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
}
handler = inet_diag_lock_handler(sk->sk_protocol);
- if (IS_ERR(handler)) {
- inet_diag_unlock_handler(handler);
+ if (!handler) {
nlmsg_cancel(skb, nlh);
- return PTR_ERR(handler);
+ return -ENOENT;
}
attr = handler->idiag_info_size
@@ -1479,6 +1496,7 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
}
static const struct sock_diag_handler inet_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_INET,
.dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
@@ -1486,6 +1504,7 @@ static const struct sock_diag_handler inet_diag_handler = {
};
static const struct sock_diag_handler inet6_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_INET6,
.dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
@@ -1495,20 +1514,12 @@ static const struct sock_diag_handler inet6_diag_handler = {
int inet_diag_register(const struct inet_diag_handler *h)
{
const __u16 type = h->idiag_type;
- int err = -EINVAL;
if (type >= IPPROTO_MAX)
- goto out;
+ return -EINVAL;
- mutex_lock(&inet_diag_table_mutex);
- err = -EEXIST;
- if (!inet_diag_table[type]) {
- inet_diag_table[type] = h;
- err = 0;
- }
- mutex_unlock(&inet_diag_table_mutex);
-out:
- return err;
+ return !cmpxchg((const struct inet_diag_handler **)&inet_diag_table[type],
+ NULL, h) ? 0 : -EEXIST;
}
EXPORT_SYMBOL_GPL(inet_diag_register);
@@ -1519,12 +1530,16 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
if (type >= IPPROTO_MAX)
return;
- mutex_lock(&inet_diag_table_mutex);
- inet_diag_table[type] = NULL;
- mutex_unlock(&inet_diag_table_mutex);
+ xchg((const struct inet_diag_handler **)&inet_diag_table[type],
+ NULL);
}
EXPORT_SYMBOL_GPL(inet_diag_unregister);
+static const struct sock_diag_inet_compat inet_diag_compat = {
+ .owner = THIS_MODULE,
+ .fn = inet_diag_rcv_msg_compat,
+};
+
static int __init inet_diag_init(void)
{
const int inet_diag_table_size = (IPPROTO_MAX *
@@ -1543,7 +1558,7 @@ static int __init inet_diag_init(void)
if (err)
goto out_free_inet;
- sock_diag_register_inet_compat(inet_diag_rcv_msg_compat);
+ sock_diag_register_inet_compat(&inet_diag_compat);
out:
return err;
@@ -1558,7 +1573,7 @@ static void __exit inet_diag_exit(void)
{
sock_diag_unregister(&inet6_diag_handler);
sock_diag_unregister(&inet_diag_handler);
- sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat);
+ sock_diag_unregister_inet_compat(&inet_diag_compat);
kfree(inet_diag_table);
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 93e9193df544..7498af320164 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -24,6 +24,7 @@
#include <net/inet6_hashtables.h>
#endif
#include <net/secure_seq.h>
+#include <net/hotdata.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/sock_reuseport.h>
@@ -32,8 +33,6 @@ u32 inet_ehashfn(const struct net *net, const __be32 laddr,
const __u16 lport, const __be32 faddr,
const __be16 fport)
{
- static u32 inet_ehash_secret __read_mostly;
-
net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
return __inet_ehashfn(laddr, lport, faddr, fport,
@@ -1130,10 +1129,33 @@ ok:
return 0;
error:
+ if (sk_hashed(sk)) {
+ spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash);
+
+ sock_prot_inuse_add(net, sk->sk_prot, -1);
+
+ spin_lock(lock);
+ sk_nulls_del_node_init_rcu(sk);
+ spin_unlock(lock);
+
+ sk->sk_hash = 0;
+ inet_sk(sk)->inet_sport = 0;
+ inet_sk(sk)->inet_num = 0;
+
+ if (tw)
+ inet_twsk_bind_unhash(tw, hinfo);
+ }
+
spin_unlock(&head2->lock);
if (tb_created)
inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- spin_unlock_bh(&head->lock);
+ spin_unlock(&head->lock);
+
+ if (tw)
+ inet_twsk_deschedule_put(tw);
+
+ local_bh_enable();
+
return -ENOMEM;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e9fed83e9b3c..5bd759963451 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -81,10 +81,7 @@ void __init inet_initpeers(void)
inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128);
- peer_cachep = kmem_cache_create("inet_peer_cache",
- sizeof(struct inet_peer),
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
- NULL);
+ peer_cachep = KMEM_CACHE(inet_peer, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
}
/* Called with rcu_read_lock() or base->lock held */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5169c3c72cff..7b16c211b904 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1025,14 +1025,16 @@ static int __net_init ipgre_init_net(struct net *net)
return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}
-static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
+ ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops,
+ dev_to_kill);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
- .exit_batch = ipgre_exit_batch_net,
+ .exit_batch_rtnl = ipgre_exit_batch_rtnl,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1697,14 +1699,16 @@ static int __net_init ipgre_tap_init_net(struct net *net)
return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
-static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
+ ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops,
+ dev_to_kill);
}
static struct pernet_operations ipgre_tap_net_ops = {
.init = ipgre_tap_init_net,
- .exit_batch = ipgre_tap_exit_batch_net,
+ .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1715,14 +1719,16 @@ static int __net_init erspan_init_net(struct net *net)
&erspan_link_ops, "erspan0");
}
-static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
+static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
+ ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops,
+ dev_to_kill);
}
static struct pernet_operations erspan_net_ops = {
.init = erspan_init_net,
- .exit_batch = erspan_exit_batch_net,
+ .exit_batch_rtnl = erspan_exit_batch_rtnl,
.id = &erspan_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1793,6 +1799,7 @@ static void __exit ipgre_fini(void)
module_init(ipgre_init);
module_exit(ipgre_fini);
+MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("gre");
MODULE_ALIAS_RTNL_LINK("gretap");
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b06f678b03a1..33f93dc730a3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -493,7 +493,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
inet->inet_dport,
inet->inet_sport,
sk->sk_protocol,
- RT_CONN_FLAGS_TOS(sk, tos),
+ RT_TOS(tos),
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
@@ -972,8 +972,8 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
+ bool paged, hold_tskey, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
- bool paged, extra_uref = false;
u32 tskey = 0;
skb = skb_peek_tail(queue);
@@ -982,10 +982,6 @@ static int __ip_append_data(struct sock *sk,
mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
paged = !!cork->gso_size;
- if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
-
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1052,6 +1048,11 @@ static int __ip_append_data(struct sock *sk,
cork->length += length;
+ hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
+ if (hold_tskey)
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+
/* So, what's going on in the loop below?
*
* We use calculated fragment length to generate chained skb,
@@ -1274,6 +1275,8 @@ error:
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
+ if (hold_tskey)
+ atomic_dec(&sk->sk_tskey);
return err;
}
@@ -1287,6 +1290,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
if (unlikely(!rt))
return -EFAULT;
+ cork->fragsize = ip_sk_use_pmtu(sk) ?
+ dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
+
+ if (!inetdev_valid_mtu(cork->fragsize))
+ return -ENETUNREACH;
+
/*
* setup for corking.
*/
@@ -1303,12 +1312,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->addr = ipc->addr;
}
- cork->fragsize = ip_sk_use_pmtu(sk) ?
- dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
-
- if (!inetdev_valid_mtu(cork->fragsize))
- return -ENETUNREACH;
-
cork->gso_size = ipc->gso_size;
cork->dst = &rt->dst;
@@ -1455,6 +1458,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
skb->mark = cork->mark;
skb->tstamp = cork->transmit_time;
+ skb->mono_delivery_time = !!skb->tstamp;
/*
* Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
* on dst refcount
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 7aa9dc0e6760..cf377377b52d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -894,7 +894,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
- int val = 0, err;
+ int val = 0, err, retv;
bool needs_rtnl = setsockopt_needs_rtnl(optname);
switch (optname) {
@@ -938,8 +938,12 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
/* If optlen==0, it is equivalent to val == 0 */
- if (optname == IP_ROUTER_ALERT)
- return ip_ra_control(sk, val ? 1 : 0, NULL);
+ if (optname == IP_ROUTER_ALERT) {
+ retv = ip_ra_control(sk, val ? 1 : 0, NULL);
+ if (retv == 0)
+ inet_assign_bit(RTALERT, sk, val);
+ return retv;
+ }
if (ip_mroute_opt(optname))
return ip_mroute_setsockopt(sk, optname, optval, optlen);
@@ -1363,12 +1367,13 @@ e_inval:
* ipv4_pktinfo_prepare - transfer some info from rtable to skb
* @sk: socket
* @skb: buffer
+ * @drop_dst: if true, drops skb dst
*
* To support IP_CMSG_PKTINFO option, we store rt_iif and specific
* destination in skb->cb[] before dst drop.
* This way, receiver doesn't make cache line misses to read rtable.
*/
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst)
{
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
bool prepare = inet_test_bit(PKTINFO, sk) ||
@@ -1397,7 +1402,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
pktinfo->ipi_ifindex = 0;
pktinfo->ipi_spec_dst.s_addr = 0;
}
- skb_dst_drop(skb);
+ if (drop_dst)
+ skb_dst_drop(skb);
}
int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
@@ -1573,6 +1579,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_BIND_ADDRESS_NO_PORT:
val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
goto copyval;
+ case IP_ROUTER_ALERT:
+ val = inet_test_bit(RTALERT, sk);
+ goto copyval;
case IP_TTL:
val = READ_ONCE(inet->uc_ttl);
if (val < 0)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index beeae624c412..1b8d8ff9a237 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -102,10 +102,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else
- cand = t;
+ cand = t;
}
hlist_for_each_entry_rcu(t, head, hash_node) {
@@ -117,9 +116,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
@@ -137,9 +136,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
@@ -150,9 +149,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
!(t->dev->flags & IFF_UP))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
@@ -221,7 +220,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- link == t->parms.link &&
+ link == READ_ONCE(t->parms.link) &&
type == t->dev->type &&
ip_tunnel_key_match(&t->parms, flags, key))
break;
@@ -378,7 +377,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
bool log_ecn_error)
{
const struct iphdr *iph = ip_hdr(skb);
- int err;
+ int nh, err;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
@@ -404,8 +403,21 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(tunnel->dev, rx_length_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto drop;
+ }
+ iph = (struct iphdr *)(skb->head + nh);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -554,6 +566,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}
+static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
+{
+ /* we must cap headroom to some upperlimit, else pskb_expand_head
+ * will overflow header offsets in skb_headers_offset_update().
+ */
+ static const unsigned int max_allowed = 512;
+
+ if (headroom > max_allowed)
+ headroom = max_allowed;
+
+ if (headroom > READ_ONCE(dev->needed_headroom))
+ WRITE_ONCE(dev->needed_headroom, headroom);
+}
+
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
u8 proto, int tunnel_hlen)
{
@@ -632,13 +658,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
- if (headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, headroom);
-
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, headroom)) {
ip_rt_put(rt);
goto tx_dropped;
}
+
+ ip_tunnel_adj_headroom(dev, headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
@@ -747,7 +773,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos),
- dev_net(dev), tunnel->parms.link,
+ dev_net(dev), READ_ONCE(tunnel->parms.link),
tunnel->fwmark, skb_get_hash(skb), 0);
if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
@@ -818,16 +844,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
- if (max_headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, max_headroom);
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, max_headroom)) {
ip_rt_put(rt);
DEV_STATS_INC(dev, tx_dropped);
kfree_skb(skb);
return;
}
+ ip_tunnel_adj_headroom(dev, max_headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
@@ -867,7 +893,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (t->parms.link != p->link || t->fwmark != fwmark) {
int mtu;
- t->parms.link = p->link;
+ WRITE_ONCE(t->parms.link, p->link);
t->fwmark = fwmark;
mtu = ip_tunnel_bind_dev(dev);
if (set_mtu)
@@ -1057,9 +1083,9 @@ EXPORT_SYMBOL(ip_tunnel_get_link_net);
int ip_tunnel_get_iflink(const struct net_device *dev)
{
- struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct ip_tunnel *tunnel = netdev_priv(dev);
- return tunnel->parms.link;
+ return READ_ONCE(tunnel->parms.link);
}
EXPORT_SYMBOL(ip_tunnel_get_iflink);
@@ -1130,19 +1156,17 @@ static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
}
void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
- struct rtnl_link_ops *ops)
+ struct rtnl_link_ops *ops,
+ struct list_head *dev_to_kill)
{
struct ip_tunnel_net *itn;
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
itn = net_generic(net, id);
- ip_tunnel_destroy(net, itn, &list, ops);
+ ip_tunnel_destroy(net, itn, dev_to_kill, ops);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
@@ -1271,6 +1295,7 @@ int ip_tunnel_init(struct net_device *dev)
if (tunnel->collect_md)
netif_keep_dst(dev);
+ netdev_lockdep_set_classes(dev);
return 0;
}
EXPORT_SYMBOL_GPL(ip_tunnel_init);
@@ -1298,4 +1323,5 @@ void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
}
EXPORT_SYMBOL_GPL(ip_tunnel_setup);
+MODULE_DESCRIPTION("IPv4 tunnel implementation library");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 586b1b3e35b8..80ccd6661aa3 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
};
skb_reset_network_header(skb);
- csum = csum_partial(icmp6h, len, 0);
+ csum = skb_checksum(skb, skb_transport_offset(skb), len, 0);
icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len,
IPPROTO_ICMPV6, csum);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 9ab9b3ebe0cd..ee587adb169f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -510,14 +510,16 @@ static int __net_init vti_init_net(struct net *net)
return 0;
}
-static void __net_exit vti_exit_batch_net(struct list_head *list_net)
+static void __net_exit vti_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
+ ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops,
+ dev_to_kill);
}
static struct pernet_operations vti_net_ops = {
.init = vti_init_net,
- .exit_batch = vti_exit_batch_net,
+ .exit_batch_rtnl = vti_exit_batch_rtnl,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -721,6 +723,7 @@ static void __exit vti_fini(void)
module_init(vti_init);
module_exit(vti_fini);
+MODULE_DESCRIPTION("Virtual (secure) IP tunneling library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("vti");
MODULE_ALIAS_NETDEV("ip_vti0");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 27b8f83c6ea2..f2696eaadbe6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -592,14 +592,16 @@ static int __net_init ipip_init_net(struct net *net)
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
}
-static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipip_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
+ ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops,
+ dev_to_kill);
}
static struct pernet_operations ipip_net_ops = {
.init = ipip_init_net,
- .exit_batch = ipip_exit_batch_net,
+ .exit_batch_rtnl = ipip_exit_batch_rtnl,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -658,6 +660,7 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
+MODULE_DESCRIPTION("IP/IP protocol decoder library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("ipip");
MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d6f59531b3a..fd5c01c8489f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1073,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt,
msg = (struct igmpmsg *)skb_network_header(skb);
msg->im_vif = vifi;
msg->im_vif_hi = vifi >> 8;
- ipv4_pktinfo_prepare(mroute_sk, pkt);
+ ipv4_pktinfo_prepare(mroute_sk, pkt, false);
memcpy(skb->cb, pkt->cb, sizeof(skb->cb));
/* Add our header */
igmp = skb_put(skb, sizeof(struct igmphdr));
@@ -1603,9 +1603,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
if (copy_from_sockptr(&olr, optlen, sizeof(int)))
return -EFAULT;
- olr = min_t(unsigned int, olr, sizeof(int));
if (olr < 0)
return -EINVAL;
+
+ olr = min_t(unsigned int, olr, sizeof(int));
+
if (copy_to_sockptr(optlen, &olr, sizeof(int)))
return -EFAULT;
if (copy_to_sockptr(optval, &val, olr))
@@ -2587,7 +2589,9 @@ errout_free:
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct fib_dump_filter filter = {};
+ struct fib_dump_filter filter = {
+ .rtnl_held = true,
+ };
int err;
if (cb->strict_check) {
@@ -3139,10 +3143,7 @@ int __init ip_mr_init(void)
{
int err;
- mrt_cachep = kmem_cache_create("ip_mrt_cache",
- sizeof(struct mfc_cache),
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
- NULL);
+ mrt_cachep = KMEM_CACHE(mfc_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
err = register_pernet_subsys(&ipmr_net_ops);
if (err)
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f71a7e9a7de6..8f6e950163a7 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -10,6 +10,10 @@ config NF_DEFRAG_IPV4
tristate
default n
+# old sockopt interface and eval loop
+config IP_NF_IPTABLES_LEGACY
+ tristate
+
config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
help
@@ -152,7 +156,7 @@ config IP_NF_MATCH_ECN
config IP_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
- depends on IP_NF_MANGLE || IP_NF_RAW
+ depends on IP_NF_MANGLE || IP_NF_RAW || NFT_COMPAT
help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -173,6 +177,7 @@ config IP_NF_MATCH_TTL
config IP_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
+ select IP_NF_IPTABLES_LEGACY
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -182,7 +187,7 @@ config IP_NF_FILTER
config IP_NF_TARGET_REJECT
tristate "REJECT target support"
- depends on IP_NF_FILTER
+ depends on IP_NF_FILTER || NFT_COMPAT
select NF_REJECT_IPV4
default m if NETFILTER_ADVANCED=n
help
@@ -212,6 +217,7 @@ config IP_NF_NAT
default m if NETFILTER_ADVANCED=n
select NF_NAT
select NETFILTER_XT_NAT
+ select IP_NF_IPTABLES_LEGACY
help
This enables the `nat' table in iptables. This allows masquerading,
port forwarding and other forms of full Network Address Port
@@ -252,6 +258,7 @@ endif # IP_NF_NAT
config IP_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
+ select IP_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -261,7 +268,7 @@ config IP_NF_MANGLE
config IP_NF_TARGET_ECN
tristate "ECN target support"
- depends on IP_NF_MANGLE
+ depends on IP_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `ECN' target, which can be used in the iptables mangle
@@ -286,6 +293,7 @@ config IP_NF_TARGET_TTL
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
+ select IP_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -299,6 +307,7 @@ config IP_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
+ select IP_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -309,36 +318,35 @@ endif # IP_NF_IPTABLES
# ARP tables
config IP_NF_ARPTABLES
- tristate "ARP tables support"
- select NETFILTER_XTABLES
- select NETFILTER_FAMILY_ARP
- depends on NETFILTER_ADVANCED
- help
- arptables is a general, extensible packet identification framework.
- The ARP packet filtering and mangling (manipulation)subsystems
- use this: say Y or M here if you want to use either of those.
-
- To compile it as a module, choose M here. If unsure, say N.
+ tristate
-if IP_NF_ARPTABLES
+config NFT_COMPAT_ARP
+ tristate
+ depends on NF_TABLES_ARP && NFT_COMPAT
+ default m if NFT_COMPAT=m
+ default y if NFT_COMPAT=y
config IP_NF_ARPFILTER
- tristate "ARP packet filtering"
+ tristate "arptables-legacy packet filtering support"
+ select IP_NF_ARPTABLES
+ depends on NETFILTER_XTABLES
help
ARP packet filtering defines a table `filter', which has a series of
rules for simple ARP packet filtering at local input and
- local output. On a bridge, you can also specify filtering rules
- for forwarded ARP packets. See the man page for arptables(8).
+ local output. This is only needed for arptables-legacy(8).
+ Neither arptables-nft nor nftables need this to work.
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_ARP_MANGLE
tristate "ARP payload mangling"
+ depends on IP_NF_ARPTABLES || NFT_COMPAT_ARP
help
Allows altering the ARP packet payload: source and destination
hardware and network addresses.
-endif # IP_NF_ARPTABLES
+ This option is needed by both arptables-legacy and arptables-nft.
+ It is not used by nftables.
endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 5a26f9de1ab9..85502d4dfbb4 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -25,7 +25,7 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
# generic IP tables
-obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+obj-$(CONFIG_IP_NF_IPTABLES_LEGACY) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index bbff68b5b5d4..74928a9d1aa4 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -26,6 +26,9 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
#define NH_DEV_HASHBITS 8
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
+#define NHA_OP_FLAGS_DUMP_ALL (NHA_OP_FLAG_DUMP_STATS | \
+ NHA_OP_FLAG_DUMP_HW_STATS)
+
static const struct nla_policy rtm_nh_policy_new[] = {
[NHA_ID] = { .type = NLA_U32 },
[NHA_GROUP] = { .type = NLA_BINARY },
@@ -37,10 +40,17 @@ static const struct nla_policy rtm_nh_policy_new[] = {
[NHA_ENCAP] = { .type = NLA_NESTED },
[NHA_FDB] = { .type = NLA_FLAG },
[NHA_RES_GROUP] = { .type = NLA_NESTED },
+ [NHA_HW_STATS_ENABLE] = NLA_POLICY_MAX(NLA_U32, true),
};
static const struct nla_policy rtm_nh_policy_get[] = {
[NHA_ID] = { .type = NLA_U32 },
+ [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ NHA_OP_FLAGS_DUMP_ALL),
+};
+
+static const struct nla_policy rtm_nh_policy_del[] = {
+ [NHA_ID] = { .type = NLA_U32 },
};
static const struct nla_policy rtm_nh_policy_dump[] = {
@@ -48,6 +58,8 @@ static const struct nla_policy rtm_nh_policy_dump[] = {
[NHA_GROUPS] = { .type = NLA_FLAG },
[NHA_MASTER] = { .type = NLA_U32 },
[NHA_FDB] = { .type = NLA_FLAG },
+ [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ NHA_OP_FLAGS_DUMP_ALL),
};
static const struct nla_policy rtm_nh_res_policy_new[] = {
@@ -92,6 +104,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
else if (nh_info->gw_family == AF_INET6)
nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
+ nh_info->id = nhi->nh_parent->id;
nh_info->is_reject = nhi->reject_nh;
nh_info->is_fdb = nhi->fdb_nh;
nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
@@ -131,13 +144,13 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info,
info->nh_grp->num_nh = num_nh;
info->nh_grp->is_fdb = nhg->fdb_nh;
+ info->nh_grp->hw_stats = nhg->hw_stats;
for (i = 0; i < num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
struct nh_info *nhi;
nhi = rtnl_dereference(nhge->nh->nh_info);
- info->nh_grp->nh_entries[i].id = nhge->nh->id;
info->nh_grp->nh_entries[i].weight = nhge->weight;
__nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
nhi);
@@ -162,6 +175,7 @@ static int nh_notifier_res_table_info_init(struct nh_notifier_info *info,
return -ENOMEM;
info->nh_res_table->num_nh_buckets = num_nh_buckets;
+ info->nh_res_table->hw_stats = nhg->hw_stats;
for (i = 0; i < num_nh_buckets; i++) {
struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
@@ -393,6 +407,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
struct nh_notifier_info info = {
.net = net,
.extack = extack,
+ .id = nh->id,
};
struct nh_group *nhg;
int err;
@@ -474,6 +489,7 @@ static void nexthop_free_group(struct nexthop *nh)
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
WARN_ON(!list_empty(&nhge->nh_list));
+ free_percpu(nhge->stats);
nexthop_put(nhge->nh);
}
@@ -654,8 +670,202 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
+static void nh_grp_entry_stats_inc(struct nh_grp_entry *nhge)
+{
+ struct nh_grp_entry_stats *cpu_stats;
+
+ cpu_stats = get_cpu_ptr(nhge->stats);
+ u64_stats_update_begin(&cpu_stats->syncp);
+ u64_stats_inc(&cpu_stats->packets);
+ u64_stats_update_end(&cpu_stats->syncp);
+ put_cpu_ptr(cpu_stats);
+}
+
+static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge,
+ u64 *ret_packets)
{
+ int i;
+
+ *ret_packets = 0;
+
+ for_each_possible_cpu(i) {
+ struct nh_grp_entry_stats *cpu_stats;
+ unsigned int start;
+ u64 packets;
+
+ cpu_stats = per_cpu_ptr(nhge->stats, i);
+ do {
+ start = u64_stats_fetch_begin(&cpu_stats->syncp);
+ packets = u64_stats_read(&cpu_stats->packets);
+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
+
+ *ret_packets += packets;
+ }
+}
+
+static int nh_notifier_grp_hw_stats_init(struct nh_notifier_info *info,
+ const struct nexthop *nh)
+{
+ struct nh_group *nhg;
+ int i;
+
+ ASSERT_RTNL();
+ nhg = rtnl_dereference(nh->nh_grp);
+
+ info->id = nh->id;
+ info->type = NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS;
+ info->nh_grp_hw_stats = kzalloc(struct_size(info->nh_grp_hw_stats,
+ stats, nhg->num_nh),
+ GFP_KERNEL);
+ if (!info->nh_grp_hw_stats)
+ return -ENOMEM;
+
+ info->nh_grp_hw_stats->num_nh = nhg->num_nh;
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ info->nh_grp_hw_stats->stats[i].id = nhge->nh->id;
+ }
+
+ return 0;
+}
+
+static void nh_notifier_grp_hw_stats_fini(struct nh_notifier_info *info)
+{
+ kfree(info->nh_grp_hw_stats);
+}
+
+void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
+ unsigned int nh_idx,
+ u64 delta_packets)
+{
+ info->hw_stats_used = true;
+ info->stats[nh_idx].packets += delta_packets;
+}
+EXPORT_SYMBOL(nh_grp_hw_stats_report_delta);
+
+static void nh_grp_hw_stats_apply_update(struct nexthop *nh,
+ struct nh_notifier_info *info)
+{
+ struct nh_group *nhg;
+ int i;
+
+ ASSERT_RTNL();
+ nhg = rtnl_dereference(nh->nh_grp);
+
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ nhge->packets_hw += info->nh_grp_hw_stats->stats[i].packets;
+ }
+}
+
+static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used)
+{
+ struct nh_notifier_info info = {
+ .net = nh->net,
+ };
+ struct net *net = nh->net;
+ int err;
+
+ if (nexthop_notifiers_is_empty(net))
+ return 0;
+
+ err = nh_notifier_grp_hw_stats_init(&info, nh);
+ if (err)
+ return err;
+
+ err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
+ NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
+ &info);
+
+ /* Cache whatever we got, even if there was an error, otherwise the
+ * successful stats retrievals would get lost.
+ */
+ nh_grp_hw_stats_apply_update(nh, &info);
+ *hw_stats_used = info.nh_grp_hw_stats->hw_stats_used;
+
+ nh_notifier_grp_hw_stats_fini(&info);
+ return notifier_to_errno(err);
+}
+
+static int nla_put_nh_group_stats_entry(struct sk_buff *skb,
+ struct nh_grp_entry *nhge,
+ u32 op_flags)
+{
+ struct nlattr *nest;
+ u64 packets;
+
+ nh_grp_entry_stats_read(nhge, &packets);
+
+ nest = nla_nest_start(skb, NHA_GROUP_STATS_ENTRY);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, NHA_GROUP_STATS_ENTRY_ID, nhge->nh->id) ||
+ nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS,
+ packets + nhge->packets_hw))
+ goto nla_put_failure;
+
+ if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS &&
+ nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS_HW,
+ nhge->packets_hw))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh,
+ u32 op_flags)
+{
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ struct nlattr *nest;
+ bool hw_stats_used;
+ int err;
+ int i;
+
+ if (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats))
+ goto err_out;
+
+ if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS &&
+ nhg->hw_stats) {
+ err = nh_grp_hw_stats_update(nh, &hw_stats_used);
+ if (err)
+ goto out;
+
+ if (nla_put_u32(skb, NHA_HW_STATS_USED, hw_stats_used))
+ goto err_out;
+ }
+
+ nest = nla_nest_start(skb, NHA_GROUP_STATS);
+ if (!nest)
+ goto err_out;
+
+ for (i = 0; i < nhg->num_nh; i++)
+ if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i],
+ op_flags))
+ goto cancel_out;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+cancel_out:
+ nla_nest_cancel(skb, nest);
+err_out:
+ err = -EMSGSIZE;
+out:
+ return err;
+}
+
+static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
+ u32 op_flags)
+{
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
struct nexthop_grp *p;
size_t len = nhg->num_nh * sizeof(*p);
struct nlattr *nla;
@@ -684,6 +894,11 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
if (nhg->resilient && nla_put_nh_group_res(skb, nhg))
goto nla_put_failure;
+ if (op_flags & NHA_OP_FLAG_DUMP_STATS &&
+ (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats) ||
+ nla_put_nh_group_stats(skb, nh, op_flags)))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -691,7 +906,8 @@ nla_put_failure:
}
static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
- int event, u32 portid, u32 seq, unsigned int nlflags)
+ int event, u32 portid, u32 seq, unsigned int nlflags,
+ u32 op_flags)
{
struct fib6_nh *fib6_nh;
struct fib_nh *fib_nh;
@@ -718,7 +934,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
goto nla_put_failure;
- if (nla_put_nh_group(skb, nhg))
+ if (nla_put_nh_group(skb, nh, op_flags))
goto nla_put_failure;
goto out;
}
@@ -849,7 +1065,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
if (!skb)
goto errout;
- err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
+ err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in nh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -1104,6 +1320,7 @@ static int nh_check_attr_group(struct net *net,
if (!tb[i])
continue;
switch (i) {
+ case NHA_HW_STATS_ENABLE:
case NHA_FDB:
continue;
case NHA_RES_GROUP:
@@ -1176,6 +1393,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
+ nh_grp_entry_stats_inc(nhge);
return nhge->nh;
}
@@ -1185,7 +1403,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
{
- struct nexthop *rc = NULL;
+ struct nh_grp_entry *nhge0 = NULL;
int i;
if (nhg->fdb_nh)
@@ -1200,16 +1418,20 @@ static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
if (!nexthop_is_good_nh(nhge->nh))
continue;
- if (!rc)
- rc = nhge->nh;
+ if (!nhge0)
+ nhge0 = nhge;
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
+ nh_grp_entry_stats_inc(nhge);
return nhge->nh;
}
- return rc ? : nhg->nh_entries[0].nh;
+ if (!nhge0)
+ nhge0 = &nhg->nh_entries[0];
+ nh_grp_entry_stats_inc(nhge0);
+ return nhge0->nh;
}
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
@@ -1225,6 +1447,7 @@ static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
bucket = &res_table->nh_buckets[bucket_index];
nh_res_bucket_set_busy(bucket);
nhge = rcu_dereference(bucket->nh_entry);
+ nh_grp_entry_stats_inc(nhge);
return nhge->nh;
}
@@ -1798,6 +2021,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
newg->has_v4 = true;
list_del(&nhges[i].nh_list);
+ new_nhges[j].stats = nhges[i].stats;
new_nhges[j].nh_parent = nhges[i].nh_parent;
new_nhges[j].nh = nhges[i].nh;
new_nhges[j].weight = nhges[i].weight;
@@ -1813,6 +2037,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
rcu_assign_pointer(nhp->nh_grp, newg);
list_del(&nhge->nh_list);
+ free_percpu(nhge->stats);
nexthop_put(nhge->nh);
/* Removal of a NH from a resilient group is notified through
@@ -2477,6 +2702,13 @@ static struct nexthop *nexthop_create_group(struct net *net,
if (nhi->family == AF_INET)
nhg->has_v4 = true;
+ nhg->nh_entries[i].stats =
+ netdev_alloc_pcpu_stats(struct nh_grp_entry_stats);
+ if (!nhg->nh_entries[i].stats) {
+ err = -ENOMEM;
+ nexthop_put(nhe);
+ goto out_no_nh;
+ }
nhg->nh_entries[i].nh = nhe;
nhg->nh_entries[i].weight = entry[i].weight + 1;
list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
@@ -2509,6 +2741,9 @@ static struct nexthop *nexthop_create_group(struct net *net,
if (cfg->nh_fdb)
nhg->fdb_nh = 1;
+ if (cfg->nh_hw_stats)
+ nhg->hw_stats = true;
+
rcu_assign_pointer(nh->nh_grp, nhg);
return nh;
@@ -2516,6 +2751,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
out_no_nh:
for (i--; i >= 0; --i) {
list_del(&nhg->nh_entries[i].nh_list);
+ free_percpu(nhg->nh_entries[i].stats);
nexthop_put(nhg->nh_entries[i].nh);
}
@@ -2850,6 +3086,9 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP],
cfg, extack);
+ if (tb[NHA_HW_STATS_ENABLE])
+ cfg->nh_hw_stats = nla_get_u32(tb[NHA_HW_STATS_ENABLE]);
+
/* no other attributes should be set */
goto out;
}
@@ -2941,6 +3180,10 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
goto out;
}
+ if (tb[NHA_HW_STATS_ENABLE]) {
+ NL_SET_ERR_MSG(extack, "Cannot enable nexthop hardware statistics for non-group nexthops");
+ goto out;
+ }
err = 0;
out:
@@ -2966,9 +3209,9 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
-static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
- struct nlattr **tb, u32 *id,
- struct netlink_ext_ack *extack)
+static int nh_valid_get_del_req(const struct nlmsghdr *nlh,
+ struct nlattr **tb, u32 *id, u32 *op_flags,
+ struct netlink_ext_ack *extack)
{
struct nhmsg *nhm = nlmsg_data(nlh);
@@ -2988,28 +3231,21 @@ static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
return -EINVAL;
}
- return 0;
-}
-
-static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
- int err;
-
- err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
- ARRAY_SIZE(rtm_nh_policy_get) - 1,
- rtm_nh_policy_get, extack);
- if (err < 0)
- return err;
+ if (op_flags) {
+ if (tb[NHA_OP_FLAGS])
+ *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
+ else
+ *op_flags = 0;
+ }
- return __nh_valid_get_del_req(nlh, tb, id, extack);
+ return 0;
}
/* rtnl */
static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_del)];
struct net *net = sock_net(skb->sk);
struct nl_info nlinfo = {
.nlh = nlh,
@@ -3020,7 +3256,13 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
u32 id;
- err = nh_valid_get_del_req(nlh, &id, extack);
+ err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+ ARRAY_SIZE(rtm_nh_policy_del) - 1, rtm_nh_policy_del,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = nh_valid_get_del_req(nlh, tb, &id, NULL, extack);
if (err)
return err;
@@ -3037,13 +3279,21 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
struct net *net = sock_net(in_skb->sk);
struct sk_buff *skb = NULL;
struct nexthop *nh;
+ u32 op_flags;
int err;
u32 id;
- err = nh_valid_get_del_req(nlh, &id, extack);
+ err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+ ARRAY_SIZE(rtm_nh_policy_get) - 1, rtm_nh_policy_get,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack);
if (err)
return err;
@@ -3058,7 +3308,7 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout_free;
err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0);
+ nlh->nlmsg_seq, 0, op_flags);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
goto errout_free;
@@ -3079,6 +3329,7 @@ struct nh_dump_filter {
bool group_filter;
bool fdb_filter;
u32 res_bucket_nh_id;
+ u32 op_flags;
};
static bool nh_dump_filtered(struct nexthop *nh,
@@ -3166,6 +3417,11 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[NHA_OP_FLAGS])
+ filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
+ else
+ filter->op_flags = 0;
+
return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
}
@@ -3223,7 +3479,7 @@ static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, filter->op_flags);
}
/* rtnl */
@@ -3241,10 +3497,6 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
err = rtm_dump_walk_nexthops(skb, cb, root, ctx,
&rtm_dump_nexthop_cb, &filter);
- if (err < 0) {
- if (likely(skb->len))
- err = skb->len;
- }
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -3439,11 +3691,6 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
&rtm_dump_nexthop_bucket_cb, &dd);
}
- if (err < 0) {
- if (likely(skb->len))
- err = skb->len;
- }
-
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
@@ -3483,7 +3730,7 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh,
if (err < 0)
return err;
- err = __nh_valid_get_del_req(nlh, tb, id, extack);
+ err = nh_valid_get_del_req(nlh, tb, id, NULL, extack);
if (err)
return err;
@@ -3631,17 +3878,24 @@ unlock:
}
EXPORT_SYMBOL(register_nexthop_notifier);
-int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
int err;
- rtnl_lock();
err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
nb);
- if (err)
- goto unlock;
- nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
-unlock:
+ if (!err)
+ nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
+ return err;
+}
+EXPORT_SYMBOL(__unregister_nexthop_notifier);
+
+int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = __unregister_nexthop_notifier(net, nb);
rtnl_unlock();
return err;
}
@@ -3737,16 +3991,20 @@ out:
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);
-static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
+static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
+ ASSERT_RTNL();
+ list_for_each_entry(net, net_list, exit_list)
flush_all_nexthops(net);
- kfree(net->nexthop.devhash);
- }
- rtnl_unlock();
+}
+
+static void __net_exit nexthop_net_exit(struct net *net)
+{
+ kfree(net->nexthop.devhash);
+ net->nexthop.devhash = NULL;
}
static int __net_init nexthop_net_init(struct net *net)
@@ -3764,7 +4022,8 @@ static int __net_init nexthop_net_init(struct net *net)
static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
- .exit_batch = nexthop_net_exit_batch,
+ .exit = nexthop_net_exit,
+ .exit_batch_rtnl = nexthop_net_exit_batch_rtnl,
};
static int __init nexthop_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 5f4654ebff48..914bc9c35cc7 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -395,7 +395,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
seq_printf(seq, "\nIp: %d %d",
- IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
+ IPV4_DEVCONF_ALL_RO(net, FORWARDING) ? 1 : 2,
READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 27da9d7294c0..42ac434cfcfa 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -175,6 +175,13 @@ static int raw_v4_input(struct net *net, struct sk_buff *skb,
if (!raw_v4_match(net, sk, iph->protocol,
iph->saddr, iph->daddr, dif, sdif))
continue;
+
+ if (atomic_read(&sk->sk_rmem_alloc) >=
+ READ_ONCE(sk->sk_rcvbuf)) {
+ atomic_inc(&sk->sk_drops);
+ continue;
+ }
+
delivered = 1;
if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
@@ -292,7 +299,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
/* Charge it to the socket. */
- ipv4_pktinfo_prepare(sk, skb);
+ ipv4_pktinfo_prepare(sk, skb, true);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
kfree_skb_reason(skb, reason);
return NET_RX_DROP;
@@ -310,7 +317,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
}
nf_reset_ct(skb);
- skb_push(skb, skb->data - skb_network_header(skb));
+ skb_push(skb, -skb_network_offset(skb));
raw_rcv_skb(sk, skb);
return 0;
@@ -353,6 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
+ skb->mono_delivery_time = !!skb->tstamp;
skb_dst_set(skb, &rt->dst);
*rtp = NULL;
@@ -815,7 +823,7 @@ static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *o
out: return ret;
}
-static int do_raw_setsockopt(struct sock *sk, int level, int optname,
+static int do_raw_setsockopt(struct sock *sk, int optname,
sockptr_t optval, unsigned int optlen)
{
if (optname == ICMP_FILTER) {
@@ -832,11 +840,11 @@ static int raw_setsockopt(struct sock *sk, int level, int optname,
{
if (level != SOL_RAW)
return ip_setsockopt(sk, level, optname, optval, optlen);
- return do_raw_setsockopt(sk, level, optname, optval, optlen);
+ return do_raw_setsockopt(sk, optname, optval, optlen);
}
-static int do_raw_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+static int do_raw_getsockopt(struct sock *sk, int optname,
+ char __user *optval, int __user *optlen)
{
if (optname == ICMP_FILTER) {
if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
@@ -852,7 +860,7 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
{
if (level != SOL_RAW)
return ip_getsockopt(sk, level, optname, optval, optlen);
- return do_raw_getsockopt(sk, level, optname, optval, optlen);
+ return do_raw_getsockopt(sk, optname, optval, optlen);
}
static int raw_ioctl(struct sock *sk, int cmd, int *karg)
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index fe2140c8375c..cc793bd8de25 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -213,6 +213,7 @@ static int raw_diag_destroy(struct sk_buff *in_skb,
#endif
static const struct inet_diag_handler raw_diag_handler = {
+ .owner = THIS_MODULE,
.dump = raw_diag_dump,
.dump_one = raw_diag_dump_one,
.idiag_get_info = raw_diag_get_info,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 16615d107cf0..c8f76f56dc16 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2313,7 +2313,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (IN_DEV_BFORWARD(in_dev))
goto make_route;
/* not do cache if bc_forwarding is enabled */
- if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
+ if (IPV4_DEVCONF_ALL_RO(net, BC_FORWARDING))
do_cache = false;
goto brd_input;
}
@@ -2993,7 +2993,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
#ifdef CONFIG_IP_MROUTE
if (ipv4_is_multicast(dst) &&
!ipv4_is_local_multicast(dst) &&
- IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
+ IPV4_DEVCONF_ALL_RO(net, MC_FORWARDING)) {
int err = ipmr_get_route(net, skb,
fl4->saddr, fl4->daddr,
r, portid);
@@ -3693,9 +3693,8 @@ int __init ip_rt_init(void)
panic("IP: failed to allocate ip_rt_acct\n");
#endif
- ipv4_dst_ops.kmem_cachep =
- kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ ipv4_dst_ops.kmem_cachep = KMEM_CACHE(rtable,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC);
ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 61f1c96cfe63..7972ad3d7c73 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -51,15 +51,6 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
count, &syncookie_secret[c]);
}
-/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */
-static u64 tcp_ns_to_ts(bool usec_ts, u64 val)
-{
- if (usec_ts)
- return div_u64(val, NSEC_PER_USEC);
-
- return div_u64(val, NSEC_PER_MSEC);
-}
-
/*
* when syncookies are in effect and tcp timestamps are enabled we encode
* tcp options in the lower bits of the timestamp value that will be
@@ -304,6 +295,24 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
return 0;
}
+#if IS_ENABLED(CONFIG_BPF)
+struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct request_sock *req = inet_reqsk(skb->sk);
+
+ skb->sk = NULL;
+ skb->destructor = NULL;
+
+ if (cookie_tcp_reqsk_init(sk, skb, req)) {
+ reqsk_free(req);
+ req = NULL;
+ }
+
+ return req;
+}
+EXPORT_SYMBOL_GPL(cookie_bpf_check);
+#endif
+
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
struct sock *sk, struct sk_buff *skb,
struct tcp_options_received *tcp_opt,
@@ -399,16 +408,23 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
struct rtable *rt;
__u8 rcv_wscale;
int full_space;
+ SKB_DR(reason);
if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
!th->ack || th->rst)
goto out;
- req = cookie_tcp_check(net, sk, skb);
- if (IS_ERR(req))
- goto out;
- if (!req)
+ if (cookie_bpf_ok(skb)) {
+ req = cookie_bpf_check(sk, skb);
+ } else {
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ }
+ if (!req) {
+ SKB_DR_SET(reason, NO_SOCKET);
goto out_drop;
+ }
ireq = inet_rsk(req);
@@ -420,8 +436,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
*/
RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
- if (security_inet_conn_request(sk, skb, req))
+ if (security_inet_conn_request(sk, skb, req)) {
+ SKB_DR_SET(reason, SECURITY_HOOK);
goto out_free;
+ }
tcp_ao_syncookie(sk, skb, req, AF_INET);
@@ -438,8 +456,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt))
+ if (IS_ERR(rt)) {
+ SKB_DR_SET(reason, IP_OUTNOROUTES);
goto out_free;
+ }
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
@@ -454,19 +474,24 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
- ireq->rcv_wscale = rcv_wscale;
+ if (!req->syncookie)
+ ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst);
ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
/* ip_queue_xmit() depends on our flow being setup
* Normal sockets get it right from inet_csk_route_child_sock()
*/
- if (ret)
- inet_sk(ret)->cork.fl.u.ip4 = fl4;
+ if (!ret) {
+ SKB_DR_SET(reason, NO_SOCKET);
+ goto out_drop;
+ }
+ inet_sk(ret)->cork.fl.u.ip4 = fl4;
out:
return ret;
out_free:
reqsk_free(req);
out_drop:
+ kfree_skb_reason(skb, reason);
return NULL;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1baa484d2190..d20b62d52171 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -279,6 +279,7 @@
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <net/busy_poll.h>
+#include <net/rps.h>
/* Track pending CMSGs. */
enum {
@@ -722,6 +723,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now,
if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
+ smp_mb__after_atomic();
}
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED.
@@ -973,7 +975,7 @@ int tcp_wmem_schedule(struct sock *sk, int copy)
* Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0]
* to guarantee some progress.
*/
- left = sock_net(sk)->ipv4.sysctl_tcp_wmem[0] - sk->sk_wmem_queued;
+ left = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]) - sk->sk_wmem_queued;
if (left > 0)
sk_forced_mem_schedule(sk, min(left, copy));
return min(copy, sk->sk_forward_alloc);
@@ -1785,7 +1787,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
static bool can_map_frag(const skb_frag_t *frag)
{
- return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag);
+ struct page *page;
+
+ if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag))
+ return false;
+
+ page = skb_frag_page(frag);
+
+ if (PageCompound(page) || page->mapping)
+ return false;
+
+ return true;
}
static int find_next_mappable_frag(const skb_frag_t *frag,
@@ -3999,11 +4011,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
-
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
switch (optname) {
case TCP_MAXSEG:
val = tp->mss_cache;
@@ -4604,7 +4616,8 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32);
/* RX read-mostly hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq);
@@ -4639,7 +4652,7 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 105);
/* TXRX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 87db432c6bb4..3afeeb68e8a7 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -509,9 +509,9 @@ static int tcp_ao_hash_header(struct tcp_sigpool *hp,
bool exclude_options, u8 *hash,
int hash_offset, int hash_len)
{
- int err, len = th->doff << 2;
struct scatterlist sg;
u8 *hdr = hp->scratch;
+ int err, len;
/* We are not allowed to change tcphdr, make a local copy */
if (exclude_options) {
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 22358032dd48..05dc2d05bc7c 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1155,7 +1155,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.set_state = bbr_set_state,
};
-BTF_SET8_START(tcp_bbr_check_kfunc_ids)
+BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID_FLAGS(func, bbr_init)
@@ -1168,7 +1168,7 @@ BTF_ID_FLAGS(func, bbr_min_tso_segs)
BTF_ID_FLAGS(func, bbr_set_state)
#endif
#endif
-BTF_SET8_END(tcp_bbr_check_kfunc_ids)
+BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 1b34050a7538..28ffcfbeef14 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -146,11 +146,7 @@ EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca)
{
struct tcp_congestion_ops *existing;
- int ret;
-
- ret = tcp_validate_congestion_control(ca);
- if (ret)
- return ret;
+ int ret = 0;
ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 0fd78ecb67e7..44869ea089e3 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.name = "cubic",
};
-BTF_SET8_START(tcp_cubic_check_kfunc_ids)
+BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID_FLAGS(func, cubictcp_init)
@@ -496,7 +496,7 @@ BTF_ID_FLAGS(func, cubictcp_cwnd_event)
BTF_ID_FLAGS(func, cubictcp_acked)
#endif
#endif
-BTF_SET8_END(tcp_cubic_check_kfunc_ids)
+BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index bb23bb5b387a..e33fbe4933e4 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -260,7 +260,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
.name = "dctcp-reno",
};
-BTF_SET8_START(tcp_dctcp_check_kfunc_ids)
+BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID_FLAGS(func, dctcp_init)
@@ -271,7 +271,7 @@ BTF_ID_FLAGS(func, dctcp_cwnd_undo)
BTF_ID_FLAGS(func, dctcp_state)
#endif
#endif
-BTF_SET8_END(tcp_dctcp_check_kfunc_ids)
+BTF_KFUNCS_END(tcp_dctcp_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 4cbe4b44425a..f428ecf9120f 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -222,6 +222,7 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
#endif
static const struct inet_diag_handler tcp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = tcp_diag_dump,
.dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index df7b13f0e5e0..5d874817a78d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1164,7 +1164,7 @@ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
* L|R 1 - orig is lost, retransmit is in flight.
* S|R 1 - orig reached receiver, retrans is still in flight.
* (L|S|R is logically valid, it could occur when L|R is sacked,
- * but it is equivalent to plain S and code short-curcuits it to S.
+ * but it is equivalent to plain S and code short-circuits it to S.
* L|S is logically invalid, it would mean -1 packet in flight 8))
*
* These 6 states form finite state machine, controlled by the following events:
@@ -6361,6 +6361,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
inet_csk_reset_xmit_timer(sk,
ICSK_TIME_RETRANS,
TCP_TIMEOUT_MIN, TCP_RTO_MAX);
+ SKB_DR_SET(reason, TCP_INVALID_ACK_SEQUENCE);
goto reset_and_undo;
}
@@ -6369,6 +6370,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_ts(tp))) {
NET_INC_STATS(sock_net(sk),
LINUX_MIB_PAWSACTIVEREJECTED);
+ SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto reset_and_undo;
}
@@ -6572,7 +6574,8 @@ discard_and_undo:
reset_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
- return 1;
+ /* we can reuse/return @reason to its caller to handle the exception */
+ return reason;
}
static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
@@ -6616,14 +6619,14 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
* address independent.
*/
-int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
+enum skb_drop_reason
+tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcphdr *th = tcp_hdr(skb);
struct request_sock *req;
int queued = 0;
- bool acceptable;
SKB_DR(reason);
switch (sk->sk_state) {
@@ -6633,7 +6636,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_LISTEN:
if (th->ack)
- return 1;
+ return SKB_DROP_REASON_TCP_FLAGS;
if (th->rst) {
SKB_DR_SET(reason, TCP_RESET);
@@ -6649,12 +6652,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
*/
rcu_read_lock();
local_bh_disable();
- acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+ icsk->icsk_af_ops->conn_request(sk, skb);
local_bh_enable();
rcu_read_unlock();
- if (!acceptable)
- return 1;
consume_skb(skb);
return 0;
}
@@ -6699,17 +6700,25 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
return 0;
/* step 5: check the ACK field */
- acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
- FLAG_UPDATE_TS_RECENT |
- FLAG_NO_CHALLENGE_ACK) > 0;
-
- if (!acceptable) {
- if (sk->sk_state == TCP_SYN_RECV)
- return 1; /* send one RST */
- tcp_send_challenge_ack(sk);
- SKB_DR_SET(reason, TCP_OLD_ACK);
- goto discard;
+ reason = tcp_ack(sk, skb, FLAG_SLOWPATH |
+ FLAG_UPDATE_TS_RECENT |
+ FLAG_NO_CHALLENGE_ACK);
+
+ if ((int)reason <= 0) {
+ if (sk->sk_state == TCP_SYN_RECV) {
+ /* send one RST */
+ if (!reason)
+ return SKB_DROP_REASON_TCP_OLD_ACK;
+ return -reason;
+ }
+ /* accept old ack during closing */
+ if ((int)reason < 0) {
+ tcp_send_challenge_ack(sk);
+ reason = -reason;
+ goto discard;
+ }
}
+ SKB_DR_SET(reason, NOT_SPECIFIED);
switch (sk->sk_state) {
case TCP_SYN_RECV:
tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
@@ -6777,7 +6786,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (READ_ONCE(tp->linger2) < 0) {
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
- return 1;
+ return SKB_DROP_REASON_TCP_ABORT_ON_DATA;
}
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
@@ -6786,7 +6795,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_fastopen_active_disable(sk);
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
- return 1;
+ return SKB_DROP_REASON_TCP_ABORT_ON_DATA;
}
tmo = tcp_fin_time(sk);
@@ -6851,7 +6860,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk, skb);
- return 1;
+ return SKB_DROP_REASON_TCP_ABORT_ON_DATA;
}
}
fallthrough;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0c50c5a32b84..a22ee5838751 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1907,7 +1907,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (tcp_checksum_complete(skb))
goto csum_err;
@@ -1915,9 +1914,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
struct sock *nsk = tcp_v4_cookie_check(sk, skb);
if (!nsk)
- goto discard;
+ return 0;
if (nsk != sk) {
- if (tcp_child_process(sk, nsk, skb)) {
+ reason = tcp_child_process(sk, nsk, skb);
+ if (reason) {
rsk = nsk;
goto reset;
}
@@ -1926,7 +1926,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
} else
sock_rps_save_rxhash(sk, skb);
- if (tcp_rcv_state_process(sk, skb)) {
+ reason = tcp_rcv_state_process(sk, skb);
+ if (reason) {
rsk = sk;
goto reset;
}
@@ -2275,10 +2276,12 @@ process:
if (nsk == sk) {
reqsk_put(req);
tcp_v4_restore_cb(skb);
- } else if (tcp_child_process(sk, nsk, skb)) {
- tcp_v4_send_reset(nsk, skb);
- goto discard_and_relse;
} else {
+ drop_reason = tcp_child_process(sk, nsk, skb);
+ if (drop_reason) {
+ tcp_v4_send_reset(nsk, skb);
+ goto discard_and_relse;
+ }
sock_put(sk);
return 0;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 9e85f2a0bddd..52040b0e2616 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -911,11 +911,11 @@ EXPORT_SYMBOL(tcp_check_req);
* be created.
*/
-int tcp_child_process(struct sock *parent, struct sock *child,
- struct sk_buff *skb)
+enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb)
__releases(&((child)->sk_lock.slock))
{
- int ret = 0;
+ enum skb_drop_reason reason = SKB_NOT_DROPPED_YET;
int state = child->sk_state;
/* record sk_napi_id and sk_rx_queue_mapping of child. */
@@ -923,7 +923,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
tcp_segs_in(tcp_sk(child), skb);
if (!sock_owned_by_user(child)) {
- ret = tcp_rcv_state_process(child, skb);
+ reason = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
if (state == TCP_SYN_RECV && child->sk_state != state)
parent->sk_data_ready(parent);
@@ -937,6 +937,6 @@ int tcp_child_process(struct sock *parent, struct sock *child,
bh_unlock_sock(child);
sock_put(child);
- return ret;
+ return reason;
}
EXPORT_SYMBOL(tcp_child_process);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 8311c38267b5..ebe4722bb020 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -204,7 +204,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
goto out;
hlen = off + thlen;
- if (skb_gro_header_hard(skb, hlen)) {
+ if (!skb_gro_may_pull(skb, hlen)) {
th = skb_gro_header_slow(skb, hlen, off);
if (unlikely(!th))
goto out;
@@ -299,18 +299,20 @@ out:
void tcp_gro_complete(struct sk_buff *skb)
{
struct tcphdr *th = tcp_hdr(skb);
+ struct skb_shared_info *shinfo;
+
+ if (skb->encapsulation)
+ skb->inner_transport_header = skb->transport_header;
skb->csum_start = (unsigned char *)th - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
- skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ shinfo = skb_shinfo(skb);
+ shinfo->gso_segs = NAPI_GRO_CB(skb)->count;
if (th->cwr)
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
-
- if (skb->encapsulation)
- skb->inner_transport_header = skb->transport_header;
+ shinfo->gso_type |= SKB_GSO_TCP_ECN;
}
EXPORT_SYMBOL(tcp_gro_complete);
@@ -335,24 +337,22 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
- if (NAPI_GRO_CB(skb)->is_atomic)
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 |
+ (NAPI_GRO_CB(skb)->is_atomic * SKB_GSO_TCP_FIXEDID);
tcp_gro_complete(skb);
return 0;
}
-static const struct net_offload tcpv4_offload = {
- .callbacks = {
- .gso_segment = tcp4_gso_segment,
- .gro_receive = tcp4_gro_receive,
- .gro_complete = tcp4_gro_complete,
- },
-};
-
int __init tcpv4_offload_init(void)
{
- return inet_add_offload(&tcpv4_offload, IPPROTO_TCP);
+ net_hotdata.tcpv4_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = tcp4_gso_segment,
+ .gro_receive = tcp4_gro_receive,
+ .gro_complete = tcp4_gro_complete,
+ },
+ };
+ return inet_add_offload(&net_hotdata.tcpv4_offload, IPPROTO_TCP);
}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 5048c47c79b2..4c1f836aae38 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -294,4 +294,5 @@ static void __exit tunnel4_fini(void)
module_init(tunnel4_init);
module_exit(tunnel4_fini);
+MODULE_DESCRIPTION("IPv4 XFRM tunnel library");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 148ffb007969..661d0e0d273f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -411,8 +411,6 @@ INDIRECT_CALLABLE_SCOPE
u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
const __be32 faddr, const __be16 fport)
{
- static u32 udp_ehash_secret __read_mostly;
-
net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret));
return __inet_ehashfn(laddr, lport, faddr, fport,
@@ -1589,12 +1587,8 @@ int udp_init_sock(struct sock *sk)
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
{
- if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
- bool slow = lock_sock_fast(sk);
-
+ if (unlikely(READ_ONCE(udp_sk(sk)->peeking_with_offset)))
sk_peek_offset_bwd(sk, len);
- unlock_sock_fast(sk, slow);
- }
if (!skb_unref(skb))
return;
@@ -2169,7 +2163,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
udp_csum_pull_header(skb);
- ipv4_pktinfo_prepare(sk, skb);
+ ipv4_pktinfo_prepare(sk, skb, true);
return __udp_queue_rcv_skb(sk, skb);
csum_error:
@@ -2574,11 +2568,12 @@ int udp_v4_early_demux(struct sk_buff *skb)
uh->source, iph->saddr, dif, sdif);
}
- if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
+ if (!sk)
return 0;
skb->sk = sk;
- skb->destructor = sock_efree;
+ DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk));
+ skb->destructor = sock_pfree;
dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
@@ -2797,11 +2792,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
-
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
switch (optname) {
case UDP_CORK:
val = udp_test_bit(CORK, sk);
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index dc41a22ee80e..38cb3a28e4ed 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -237,6 +237,7 @@ static int udplite_diag_destroy(struct sk_buff *in_skb,
#endif
static const struct inet_diag_handler udp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = udp_diag_dump,
.dump_one = udp_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
@@ -260,6 +261,7 @@ static int udplite_diag_dump_one(struct netlink_callback *cb,
}
static const struct inet_diag_handler udplite_diag_handler = {
+ .owner = THIS_MODULE,
.dump = udplite_diag_dump,
.dump_one = udplite_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6c95d28d0c4a..b9880743765c 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -737,15 +737,14 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
}
-static const struct net_offload udpv4_offload = {
- .callbacks = {
- .gso_segment = udp4_ufo_fragment,
- .gro_receive = udp4_gro_receive,
- .gro_complete = udp4_gro_complete,
- },
-};
-
int __init udpv4_offload_init(void)
{
- return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
+ net_hotdata.udpv4_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = udp4_ufo_fragment,
+ .gro_receive = udp4_gro_receive,
+ .gro_complete = udp4_gro_complete,
+ },
+ };
+ return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP);
}
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index a87defb2b167..860aff5f8599 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -253,4 +253,5 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup);
+MODULE_DESCRIPTION("IPv4 Foo over UDP tunnel driver");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index c54676998eb6..dae35101d189 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -58,7 +58,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
return -iph->protocol;
#endif
- __skb_push(skb, skb->data - skb_network_header(skb));
+ __skb_push(skb, -skb_network_offset(skb));
iph->tot_len = htons(skb->len);
ip_send_check(iph);
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 8489fa106583..8cb266af1393 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -114,5 +114,6 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
+MODULE_DESCRIPTION("IPv4 XFRM tunnel driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 733ace18806c..247bd4d8ee45 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -195,6 +195,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_min_advance = REGEN_MIN_ADVANCE,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
@@ -257,6 +258,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_min_advance = REGEN_MIN_ADVANCE,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
@@ -549,7 +551,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
goto out;
if ((all || type == NETCONFA_FORWARDING) &&
- nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
+ nla_put_s32(skb, NETCONFA_FORWARDING,
+ READ_ONCE(devconf->forwarding)) < 0)
goto nla_put_failure;
#ifdef CONFIG_IPV6_MROUTE
if ((all || type == NETCONFA_MC_FORWARDING) &&
@@ -558,12 +561,13 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
goto nla_put_failure;
#endif
if ((all || type == NETCONFA_PROXY_NEIGH) &&
- nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
+ nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
+ READ_ONCE(devconf->proxy_ndp)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
- devconf->ignore_routes_with_linkdown) < 0)
+ READ_ONCE(devconf->ignore_routes_with_linkdown)) < 0)
goto nla_put_failure;
out:
@@ -708,16 +712,33 @@ errout:
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet6_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv6.dev_addr_genid) +
+ READ_ONCE(net->dev_base_seq);
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx, s_idx;
+ struct {
+ unsigned long ifindex;
+ unsigned int all_default;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
- struct hlist_head *head;
+ int err = 0;
if (cb->strict_check) {
struct netlink_ext_ack *extack = cb->extack;
@@ -734,65 +755,46 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
}
}
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- rcu_read_lock();
- cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
- net->dev_base_seq;
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- idev = __in6_dev_get(dev);
- if (!idev)
- goto cont;
-
- if (inet6_netconf_fill_devconf(skb, dev->ifindex,
- &idev->cnf,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF,
- NLM_F_MULTI,
- NETCONFA_ALL) < 0) {
- rcu_read_unlock();
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+ err = inet6_netconf_fill_devconf(skb, dev->ifindex,
+ &idev->cnf,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF,
+ NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
+ goto done;
}
- if (h == NETDEV_HASHENTRIES) {
- if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
- net->ipv6.devconf_all,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ if (ctx->all_default == 0) {
+ err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
- }
- if (h == NETDEV_HASHENTRIES + 1) {
- if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
- net->ipv6.devconf_dflt,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ ctx->all_default++;
+ }
+ if (ctx->all_default == 1) {
+ err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
+ ctx->all_default++;
}
done:
- cb->args[0] = h;
- cb->args[1] = idx;
-
- return skb->len;
+ rcu_read_unlock();
+ return err;
}
#ifdef CONFIG_SYSCTL
@@ -852,7 +854,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
- idev->cnf.forwarding = newf;
+
+ WRITE_ONCE(idev->cnf.forwarding, newf);
if (changed)
dev_forward_change(idev);
}
@@ -869,7 +872,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
net = (struct net *)table->extra2;
old = *p;
- *p = newf;
+ WRITE_ONCE(*p, newf);
if (p == &net->ipv6.devconf_dflt->forwarding) {
if ((!newf) ^ (!old))
@@ -884,7 +887,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
if (p == &net->ipv6.devconf_all->forwarding) {
int old_dflt = net->ipv6.devconf_dflt->forwarding;
- net->ipv6.devconf_dflt->forwarding = newf;
+ WRITE_ONCE(net->ipv6.devconf_dflt->forwarding, newf);
if ((!newf) ^ (!old_dflt))
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_FORWARDING,
@@ -916,7 +919,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
if (idev) {
int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf);
- idev->cnf.ignore_routes_with_linkdown = newf;
+ WRITE_ONCE(idev->cnf.ignore_routes_with_linkdown, newf);
if (changed)
inet6_netconf_notify_devconf(dev_net(dev),
RTM_NEWNETCONF,
@@ -937,7 +940,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
net = (struct net *)table->extra2;
old = *p;
- *p = newf;
+ WRITE_ONCE(*p, newf);
if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
if ((!newf) ^ (!old))
@@ -951,7 +954,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
}
if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) {
- net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf;
+ WRITE_ONCE(net->ipv6.devconf_dflt->ignore_routes_with_linkdown, newf);
addrconf_linkdown_change(net, newf);
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net,
@@ -1255,6 +1258,7 @@ static void
cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
bool del_rt, bool del_peer)
{
+ struct fib6_table *table;
struct fib6_info *f6i;
f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
@@ -1264,8 +1268,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
if (del_rt)
ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
else {
- if (!(f6i->fib6_flags & RTF_EXPIRES))
+ if (!(f6i->fib6_flags & RTF_EXPIRES)) {
+ table = f6i->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
fib6_set_expires(f6i, expires);
+ fib6_add_gc_list(f6i);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
fib6_info_release(f6i);
}
}
@@ -1331,12 +1342,21 @@ out:
in6_ifa_put(ifp);
}
+static unsigned long ipv6_get_regen_advance(const struct inet6_dev *idev)
+{
+ return READ_ONCE(idev->cnf.regen_min_advance) +
+ READ_ONCE(idev->cnf.regen_max_retry) *
+ READ_ONCE(idev->cnf.dad_transmits) *
+ max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+}
+
static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
{
struct inet6_dev *idev = ifp->idev;
unsigned long tmp_tstamp, age;
unsigned long regen_advance;
unsigned long now = jiffies;
+ u32 if_public_preferred_lft;
s32 cnf_temp_preferred_lft;
struct inet6_ifaddr *ift;
struct ifa6_config cfg;
@@ -1348,7 +1368,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
retry:
in6_dev_hold(idev);
- if (idev->cnf.use_tempaddr <= 0) {
+ if (READ_ONCE(idev->cnf.use_tempaddr) <= 0) {
write_unlock_bh(&idev->lock);
pr_info("%s: use_tempaddr is disabled\n", __func__);
in6_dev_put(idev);
@@ -1356,8 +1376,8 @@ retry:
goto out;
}
spin_lock_bh(&ifp->lock);
- if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
- idev->cnf.use_tempaddr = -1; /*XXX*/
+ if (ifp->regen_count++ >= READ_ONCE(idev->cnf.regen_max_retry)) {
+ WRITE_ONCE(idev->cnf.use_tempaddr, -1); /*XXX*/
spin_unlock_bh(&ifp->lock);
write_unlock_bh(&idev->lock);
pr_warn("%s: regeneration time exceeded - disabled temporary address support\n",
@@ -1372,16 +1392,14 @@ retry:
age = (now - ifp->tstamp) / HZ;
- regen_advance = idev->cnf.regen_max_retry *
- idev->cnf.dad_transmits *
- max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+ regen_advance = ipv6_get_regen_advance(idev);
/* recalculate max_desync_factor each time and update
* idev->desync_factor if it's larger
*/
cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
max_desync_factor = min_t(long,
- idev->cnf.max_desync_factor,
+ READ_ONCE(idev->cnf.max_desync_factor),
cnf_temp_preferred_lft - regen_advance);
if (unlikely(idev->desync_factor > max_desync_factor)) {
@@ -1394,11 +1412,13 @@ retry:
}
}
+ if_public_preferred_lft = ifp->prefered_lft;
+
memset(&cfg, 0, sizeof(cfg));
cfg.valid_lft = min_t(__u32, ifp->valid_lft,
- idev->cnf.temp_valid_lft + age);
+ READ_ONCE(idev->cnf.temp_valid_lft) + age);
cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
- cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft);
+ cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft);
cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft);
cfg.plen = ifp->prefix_len;
@@ -1407,19 +1427,41 @@ retry:
write_unlock_bh(&idev->lock);
- /* A temporary address is created only if this calculated Preferred
- * Lifetime is greater than REGEN_ADVANCE time units. In particular,
- * an implementation must not create a temporary address with a zero
- * Preferred Lifetime.
+ /* From RFC 4941:
+ *
+ * A temporary address is created only if this calculated Preferred
+ * Lifetime is greater than REGEN_ADVANCE time units. In
+ * particular, an implementation must not create a temporary address
+ * with a zero Preferred Lifetime.
+ *
+ * ...
+ *
+ * When creating a temporary address, the lifetime values MUST be
+ * derived from the corresponding prefix as follows:
+ *
+ * ...
+ *
+ * * Its Preferred Lifetime is the lower of the Preferred Lifetime
+ * of the public address or TEMP_PREFERRED_LIFETIME -
+ * DESYNC_FACTOR.
+ *
+ * To comply with the RFC's requirements, clamp the preferred lifetime
+ * to a minimum of regen_advance, unless that would exceed valid_lft or
+ * ifp->prefered_lft.
+ *
* Use age calculation as in addrconf_verify to avoid unnecessary
* temporary addresses being generated.
*/
age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
if (cfg.preferred_lft <= regen_advance + age) {
- in6_ifa_put(ifp);
- in6_dev_put(idev);
- ret = -1;
- goto out;
+ cfg.preferred_lft = regen_advance + age + 1;
+ if (cfg.preferred_lft > cfg.valid_lft ||
+ cfg.preferred_lft > if_public_preferred_lft) {
+ in6_ifa_put(ifp);
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
}
cfg.ifa_flags = IFA_F_TEMPORARY;
@@ -1498,15 +1540,17 @@ static inline int ipv6_saddr_preferred(int type)
return 0;
}
-static bool ipv6_use_optimistic_addr(struct net *net,
- struct inet6_dev *idev)
+static bool ipv6_use_optimistic_addr(const struct net *net,
+ const struct inet6_dev *idev)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (!idev)
return false;
- if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) &&
+ !READ_ONCE(idev->cnf.optimistic_dad))
return false;
- if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic)
+ if (!READ_ONCE(net->ipv6.devconf_all->use_optimistic) &&
+ !READ_ONCE(idev->cnf.use_optimistic))
return false;
return true;
@@ -1515,13 +1559,14 @@ static bool ipv6_use_optimistic_addr(struct net *net,
#endif
}
-static bool ipv6_allow_optimistic_dad(struct net *net,
- struct inet6_dev *idev)
+static bool ipv6_allow_optimistic_dad(const struct net *net,
+ const struct inet6_dev *idev)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (!idev)
return false;
- if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) &&
+ !READ_ONCE(idev->cnf.optimistic_dad))
return false;
return true;
@@ -1627,7 +1672,7 @@ static int ipv6_get_saddr_eval(struct net *net,
*/
int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ?
!!(dst->prefs & IPV6_PREFER_SRC_TMP) :
- score->ifa->idev->cnf.use_tempaddr >= 2;
+ READ_ONCE(score->ifa->idev->cnf.use_tempaddr) >= 2;
ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
break;
}
@@ -1803,7 +1848,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
idev = __in6_dev_get(dst_dev);
if ((dst_type & IPV6_ADDR_MULTICAST) ||
dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL ||
- (idev && idev->cnf.use_oif_addrs_only)) {
+ (idev && READ_ONCE(idev->cnf.use_oif_addrs_only))) {
use_oif_addr = true;
}
}
@@ -2110,6 +2155,7 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net *net = dev_net(idev->dev);
+ int max_addresses;
if (addrconf_dad_end(ifp)) {
in6_ifa_put(ifp);
@@ -2147,9 +2193,9 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
spin_unlock_bh(&ifp->lock);
- if (idev->cnf.max_addresses &&
- ipv6_count_addresses(idev) >=
- idev->cnf.max_addresses)
+ max_addresses = READ_ONCE(idev->cnf.max_addresses);
+ if (max_addresses &&
+ ipv6_count_addresses(idev) >= max_addresses)
goto lock_errdad;
net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n",
@@ -2546,11 +2592,11 @@ static void manage_tempaddrs(struct inet6_dev *idev,
* (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively.
*/
age = (now - ift->cstamp) / HZ;
- max_valid = idev->cnf.temp_valid_lft - age;
+ max_valid = READ_ONCE(idev->cnf.temp_valid_lft) - age;
if (max_valid < 0)
max_valid = 0;
- max_prefered = idev->cnf.temp_prefered_lft -
+ max_prefered = READ_ONCE(idev->cnf.temp_prefered_lft) -
idev->desync_factor - age;
if (max_prefered < 0)
max_prefered = 0;
@@ -2583,7 +2629,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft))
create = true;
- if (create && idev->cnf.use_tempaddr > 0) {
+ if (create && READ_ONCE(idev->cnf.use_tempaddr) > 0) {
/* When a new public address is created as described
* in [ADDRCONF], also create a new temporary address.
*/
@@ -2611,7 +2657,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
int create = 0, update_lft = 0;
if (!ifp && valid_lft) {
- int max_addresses = in6_dev->cnf.max_addresses;
+ int max_addresses = READ_ONCE(in6_dev->cnf.max_addresses);
struct ifa6_config cfg = {
.pfx = addr,
.plen = pinfo->prefix_len,
@@ -2623,8 +2669,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
};
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if ((net->ipv6.devconf_all->optimistic_dad ||
- in6_dev->cnf.optimistic_dad) &&
+ if ((READ_ONCE(net->ipv6.devconf_all->optimistic_dad) ||
+ READ_ONCE(in6_dev->cnf.optimistic_dad)) &&
!net->ipv6.devconf_all->forwarding && sllao)
cfg.ifa_flags |= IFA_F_OPTIMISTIC;
#endif
@@ -2673,7 +2719,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
*/
update_lft = !create && stored_lft;
- if (update_lft && !in6_dev->cnf.ra_honor_pio_life) {
+ if (update_lft && !READ_ONCE(in6_dev->cnf.ra_honor_pio_life)) {
const u32 minimum_lft = min_t(u32,
stored_lft, MIN_VALID_LIFETIME);
valid_lft = max(valid_lft, minimum_lft);
@@ -2682,7 +2728,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
if (update_lft) {
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
- ifp->tstamp = now;
+ WRITE_ONCE(ifp->tstamp, now);
flags = ifp->flags;
ifp->flags &= ~IFA_F_DEPRECATED;
spin_unlock_bh(&ifp->lock);
@@ -2706,6 +2752,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
+ struct fib6_table *table;
__u32 valid_lft;
__u32 prefered_lft;
int addr_type, err;
@@ -2782,11 +2829,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
if (valid_lft == 0) {
ip6_del_rt(net, rt, false);
rt = NULL;
- } else if (addrconf_finite_timeout(rt_expires)) {
- /* not infinity */
- fib6_set_expires(rt, jiffies + rt_expires);
} else {
- fib6_clean_expires(rt);
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (addrconf_finite_timeout(rt_expires)) {
+ /* not infinity */
+ fib6_set_expires(rt, jiffies + rt_expires);
+ fib6_add_gc_list(rt);
+ } else {
+ fib6_clean_expires(rt);
+ fib6_remove_gc_list(rt);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
}
} else if (valid_lft) {
clock_t expires = 0;
@@ -3247,8 +3303,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
struct inet6_ifaddr *ifp;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
- idev->cnf.optimistic_dad) &&
+ if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad) ||
+ READ_ONCE(idev->cnf.optimistic_dad)) &&
!dev_net(idev->dev)->ipv6.devconf_all->forwarding)
cfg.ifa_flags |= IFA_F_OPTIMISTIC;
#endif
@@ -3427,7 +3483,8 @@ static void addrconf_dev_config(struct net_device *dev)
/* this device type has no EUI support */
if (dev->type == ARPHRD_NONE &&
idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)
- idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_RANDOM);
addrconf_addr_gen(idev, false);
}
@@ -3605,7 +3662,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (idev) {
rt6_mtu_change(dev, dev->mtu);
- idev->cnf.mtu6 = dev->mtu;
+ WRITE_ONCE(idev->cnf.mtu6, dev->mtu);
break;
}
@@ -3697,9 +3754,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (idev->cnf.mtu6 != dev->mtu &&
dev->mtu >= IPV6_MIN_MTU) {
rt6_mtu_change(dev, dev->mtu);
- idev->cnf.mtu6 = dev->mtu;
+ WRITE_ONCE(idev->cnf.mtu6, dev->mtu);
}
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
/*
@@ -3819,10 +3876,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
*/
if (!unregister && !idev->cnf.disable_ipv6) {
/* aggregate the system setting and interface setting */
- int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
+ int _keep_addr = READ_ONCE(net->ipv6.devconf_all->keep_addr_on_down);
if (!_keep_addr)
- _keep_addr = idev->cnf.keep_addr_on_down;
+ _keep_addr = READ_ONCE(idev->cnf.keep_addr_on_down);
keep_addr = (_keep_addr > 0);
}
@@ -3941,7 +3998,7 @@ restart:
ipv6_mc_down(idev);
}
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
idev->ra_mtu = 0;
/* Last: Shot the device (if unregistered) */
@@ -3959,6 +4016,7 @@ static void addrconf_rs_timer(struct timer_list *t)
struct inet6_dev *idev = from_timer(idev, t, rs_timer);
struct net_device *dev = idev->dev;
struct in6_addr lladdr;
+ int rtr_solicits;
write_lock(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY))
@@ -3971,7 +4029,9 @@ static void addrconf_rs_timer(struct timer_list *t)
if (idev->if_flags & IF_RA_RCVD)
goto out;
- if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) {
+ rtr_solicits = READ_ONCE(idev->cnf.rtr_solicits);
+
+ if (idev->rs_probes++ < rtr_solicits || rtr_solicits < 0) {
write_unlock(&idev->lock);
if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
ndisc_send_rs(dev, &lladdr,
@@ -3981,11 +4041,12 @@ static void addrconf_rs_timer(struct timer_list *t)
write_lock(&idev->lock);
idev->rs_interval = rfc3315_s14_backoff_update(
- idev->rs_interval, idev->cnf.rtr_solicit_max_interval);
+ idev->rs_interval,
+ READ_ONCE(idev->cnf.rtr_solicit_max_interval));
/* The wait after the last probe can be shorter */
addrconf_mod_rs_timer(idev, (idev->rs_probes ==
- idev->cnf.rtr_solicits) ?
- idev->cnf.rtr_solicit_delay :
+ READ_ONCE(idev->cnf.rtr_solicits)) ?
+ READ_ONCE(idev->cnf.rtr_solicit_delay) :
idev->rs_interval);
} else {
/*
@@ -4006,24 +4067,25 @@ put:
*/
static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
{
- unsigned long rand_num;
struct inet6_dev *idev = ifp->idev;
+ unsigned long rand_num;
u64 nonce;
if (ifp->flags & IFA_F_OPTIMISTIC)
rand_num = 0;
else
- rand_num = get_random_u32_below(idev->cnf.rtr_solicit_delay ? : 1);
+ rand_num = get_random_u32_below(
+ READ_ONCE(idev->cnf.rtr_solicit_delay) ? : 1);
nonce = 0;
- if (idev->cnf.enhanced_dad ||
- dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) {
+ if (READ_ONCE(idev->cnf.enhanced_dad) ||
+ READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad)) {
do
get_random_bytes(&nonce, 6);
while (nonce == 0);
}
ifp->dad_nonce = nonce;
- ifp->dad_probes = idev->cnf.dad_transmits;
+ ifp->dad_probes = READ_ONCE(idev->cnf.dad_transmits);
addrconf_mod_dad_work(ifp, rand_num);
}
@@ -4043,8 +4105,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
net = dev_net(dev);
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
- (net->ipv6.devconf_all->accept_dad < 1 &&
- idev->cnf.accept_dad < 1) ||
+ (READ_ONCE(net->ipv6.devconf_all->accept_dad) < 1 &&
+ READ_ONCE(idev->cnf.accept_dad) < 1) ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
bool send_na = false;
@@ -4136,8 +4198,8 @@ static void addrconf_dad_work(struct work_struct *w)
action = DAD_ABORT;
ifp->state = INET6_IFADDR_STATE_POSTDAD;
- if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 ||
- idev->cnf.accept_dad > 1) &&
+ if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 ||
+ READ_ONCE(idev->cnf.accept_dad) > 1) &&
!idev->cnf.disable_ipv6 &&
!(ifp->flags & IFA_F_STABLE_PRIVACY)) {
struct in6_addr addr;
@@ -4148,7 +4210,7 @@ static void addrconf_dad_work(struct work_struct *w)
if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
ipv6_addr_equal(&ifp->addr, &addr)) {
/* DAD failed for link-local based on MAC */
- idev->cnf.disable_ipv6 = 1;
+ WRITE_ONCE(idev->cnf.disable_ipv6, 1);
pr_info("%s: IPv6 being disabled!\n",
ifp->idev->dev->name);
@@ -4262,7 +4324,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
send_rs = send_mld &&
ipv6_accept_ra(ifp->idev) &&
- ifp->idev->cnf.rtr_solicits != 0 &&
+ READ_ONCE(ifp->idev->cnf.rtr_solicits) != 0 &&
(dev->flags & IFF_LOOPBACK) == 0 &&
(dev->type != ARPHRD_TUNNEL) &&
!netif_is_team_port(dev);
@@ -4276,8 +4338,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
/* send unsolicited NA if enabled */
if (send_na &&
- (ifp->idev->cnf.ndisc_notify ||
- dev_net(dev)->ipv6.devconf_all->ndisc_notify)) {
+ (READ_ONCE(ifp->idev->cnf.ndisc_notify) ||
+ READ_ONCE(dev_net(dev)->ipv6.devconf_all->ndisc_notify))) {
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
/*router=*/ !!ifp->idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
@@ -4297,7 +4359,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
write_lock_bh(&ifp->idev->lock);
spin_lock(&ifp->lock);
ifp->idev->rs_interval = rfc3315_s14_backoff_init(
- ifp->idev->cnf.rtr_solicit_interval);
+ READ_ONCE(ifp->idev->cnf.rtr_solicit_interval));
ifp->idev->rs_probes = 1;
ifp->idev->if_flags |= IF_RS_SENT;
addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval);
@@ -4577,9 +4639,7 @@ restart:
!ifp->regen_count && ifp->ifpub) {
/* This is a non-regenerated temporary addr. */
- unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
- ifp->idev->cnf.dad_transmits *
- max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+ unsigned long regen_advance = ipv6_get_regen_advance(ifp->idev);
if (age + regen_advance >= ifp->prefered_lft) {
struct inet6_ifaddr *ifpub = ifp->ifpub;
@@ -4741,6 +4801,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags,
bool modify_peer)
{
+ struct fib6_table *table;
struct fib6_info *f6i;
u32 prio;
@@ -4761,10 +4822,18 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
} else {
- if (!expires)
+ table = f6i->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (!(flags & RTF_EXPIRES)) {
fib6_clean_expires(f6i);
- else
+ fib6_remove_gc_list(f6i);
+ } else {
fib6_set_expires(f6i, expires);
+ fib6_add_gc_list(f6i);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
fib6_info_release(f6i);
}
@@ -4827,13 +4896,13 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
IFA_F_NOPREFIXROUTE);
ifp->flags |= cfg->ifa_flags;
- ifp->tstamp = jiffies;
- ifp->valid_lft = cfg->valid_lft;
- ifp->prefered_lft = cfg->preferred_lft;
- ifp->ifa_proto = cfg->ifa_proto;
+ WRITE_ONCE(ifp->tstamp, jiffies);
+ WRITE_ONCE(ifp->valid_lft, cfg->valid_lft);
+ WRITE_ONCE(ifp->prefered_lft, cfg->preferred_lft);
+ WRITE_ONCE(ifp->ifa_proto, cfg->ifa_proto);
if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
- ifp->rt_priority = cfg->rt_priority;
+ WRITE_ONCE(ifp->rt_priority, cfg->rt_priority);
if (new_peer)
ifp->peer_addr = *cfg->peer_pfx;
@@ -5054,17 +5123,21 @@ struct inet6_fill_args {
enum addr_type_t type;
};
-static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+static int inet6_fill_ifaddr(struct sk_buff *skb,
+ const struct inet6_ifaddr *ifa,
struct inet6_fill_args *args)
{
- struct nlmsghdr *nlh;
+ struct nlmsghdr *nlh;
u32 preferred, valid;
+ u32 flags, priority;
+ u8 proto;
nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
sizeof(struct ifaddrmsg), args->flags);
if (!nlh)
return -EMSGSIZE;
+ flags = READ_ONCE(ifa->flags);
put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
ifa->idev->dev->ifindex);
@@ -5072,13 +5145,14 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
goto error;
- spin_lock_bh(&ifa->lock);
- if (!((ifa->flags&IFA_F_PERMANENT) &&
- (ifa->prefered_lft == INFINITY_LIFE_TIME))) {
- preferred = ifa->prefered_lft;
- valid = ifa->valid_lft;
+ preferred = READ_ONCE(ifa->prefered_lft);
+ valid = READ_ONCE(ifa->valid_lft);
+
+ if (!((flags & IFA_F_PERMANENT) &&
+ (preferred == INFINITY_LIFE_TIME))) {
if (preferred != INFINITY_LIFE_TIME) {
- long tval = (jiffies - ifa->tstamp)/HZ;
+ long tval = (jiffies - READ_ONCE(ifa->tstamp)) / HZ;
+
if (preferred > tval)
preferred -= tval;
else
@@ -5094,28 +5168,29 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
preferred = INFINITY_LIFE_TIME;
valid = INFINITY_LIFE_TIME;
}
- spin_unlock_bh(&ifa->lock);
if (!ipv6_addr_any(&ifa->peer_addr)) {
if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0)
goto error;
- } else
+ } else {
if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0)
goto error;
+ }
- if (ifa->rt_priority &&
- nla_put_u32(skb, IFA_RT_PRIORITY, ifa->rt_priority))
+ priority = READ_ONCE(ifa->rt_priority);
+ if (priority && nla_put_u32(skb, IFA_RT_PRIORITY, priority))
goto error;
- if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+ if (put_cacheinfo(skb, ifa->cstamp, READ_ONCE(ifa->tstamp),
+ preferred, valid) < 0)
goto error;
- if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
+ if (nla_put_u32(skb, IFA_FLAGS, flags) < 0)
goto error;
- if (ifa->ifa_proto &&
- nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto))
+ proto = READ_ONCE(ifa->ifa_proto);
+ if (proto && nla_put_u8(skb, IFA_PROTO, proto))
goto error;
nlmsg_end(skb, nlh);
@@ -5126,12 +5201,13 @@ error:
return -EMSGSIZE;
}
-static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
+static int inet6_fill_ifmcaddr(struct sk_buff *skb,
+ const struct ifmcaddr6 *ifmca,
struct inet6_fill_args *args)
{
- struct nlmsghdr *nlh;
- u8 scope = RT_SCOPE_UNIVERSE;
int ifindex = ifmca->idev->dev->ifindex;
+ u8 scope = RT_SCOPE_UNIVERSE;
+ struct nlmsghdr *nlh;
if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
@@ -5149,7 +5225,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
- put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+ put_cacheinfo(skb, ifmca->mca_cstamp, READ_ONCE(ifmca->mca_tstamp),
INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -5159,13 +5235,14 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
return 0;
}
-static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
+static int inet6_fill_ifacaddr(struct sk_buff *skb,
+ const struct ifacaddr6 *ifaca,
struct inet6_fill_args *args)
{
struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
int ifindex = dev ? dev->ifindex : 1;
- struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
+ struct nlmsghdr *nlh;
if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
@@ -5183,7 +5260,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||
- put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+ put_cacheinfo(skb, ifaca->aca_cstamp, READ_ONCE(ifaca->aca_tstamp),
INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -5194,24 +5271,23 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
}
/* called with rcu_read_lock() */
-static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
- struct netlink_callback *cb, int s_ip_idx,
+static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb,
+ struct netlink_callback *cb, int *s_ip_idx,
struct inet6_fill_args *fillargs)
{
- struct ifmcaddr6 *ifmca;
- struct ifacaddr6 *ifaca;
+ const struct ifmcaddr6 *ifmca;
+ const struct ifacaddr6 *ifaca;
int ip_idx = 0;
- int err = 1;
+ int err = 0;
- read_lock_bh(&idev->lock);
switch (fillargs->type) {
case UNICAST_ADDR: {
- struct inet6_ifaddr *ifa;
+ const struct inet6_ifaddr *ifa;
fillargs->event = RTM_NEWADDR;
/* unicast address incl. temp addr */
- list_for_each_entry(ifa, &idev->addr_list, if_list) {
- if (ip_idx < s_ip_idx)
+ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
+ if (ip_idx < *s_ip_idx)
goto next;
err = inet6_fill_ifaddr(skb, ifa, fillargs);
if (err < 0)
@@ -5223,27 +5299,25 @@ next:
break;
}
case MULTICAST_ADDR:
- read_unlock_bh(&idev->lock);
fillargs->event = RTM_GETMULTICAST;
/* multicast address */
- for (ifmca = rtnl_dereference(idev->mc_list);
+ for (ifmca = rcu_dereference(idev->mc_list);
ifmca;
- ifmca = rtnl_dereference(ifmca->next), ip_idx++) {
- if (ip_idx < s_ip_idx)
+ ifmca = rcu_dereference(ifmca->next), ip_idx++) {
+ if (ip_idx < *s_ip_idx)
continue;
err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
if (err < 0)
break;
}
- read_lock_bh(&idev->lock);
break;
case ANYCAST_ADDR:
fillargs->event = RTM_GETANYCAST;
/* anycast address */
- for (ifaca = idev->ac_list; ifaca;
- ifaca = ifaca->aca_next, ip_idx++) {
- if (ip_idx < s_ip_idx)
+ for (ifaca = rcu_dereference(idev->ac_list); ifaca;
+ ifaca = rcu_dereference(ifaca->aca_next), ip_idx++) {
+ if (ip_idx < *s_ip_idx)
continue;
err = inet6_fill_ifacaddr(skb, ifaca, fillargs);
if (err < 0)
@@ -5253,8 +5327,7 @@ next:
default:
break;
}
- read_unlock_bh(&idev->lock);
- cb->args[2] = ip_idx;
+ *s_ip_idx = err ? ip_idx : 0;
return err;
}
@@ -5317,6 +5390,7 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
enum addr_type_t type)
{
+ struct net *tgt_net = sock_net(skb->sk);
const struct nlmsghdr *nlh = cb->nlh;
struct inet6_fill_args fillargs = {
.portid = NETLINK_CB(cb->skb).portid,
@@ -5325,72 +5399,52 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
.netnsid = -1,
.type = type,
};
- struct net *tgt_net = sock_net(skb->sk);
- int idx, s_idx, s_ip_idx;
- int h, s_h;
+ struct {
+ unsigned long ifindex;
+ int ip_idx;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
- struct hlist_head *head;
int err = 0;
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
- s_ip_idx = cb->args[2];
-
+ rcu_read_lock();
if (cb->strict_check) {
err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
skb->sk, cb);
if (err < 0)
- goto put_tgt_net;
+ goto done;
err = 0;
if (fillargs.ifindex) {
- dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
- if (!dev) {
- err = -ENODEV;
- goto put_tgt_net;
- }
+ err = -ENODEV;
+ dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
+ if (!dev)
+ goto done;
idev = __in6_dev_get(dev);
- if (idev) {
- err = in6_dump_addrs(idev, skb, cb, s_ip_idx,
+ if (idev)
+ err = in6_dump_addrs(idev, skb, cb,
+ &ctx->ip_idx,
&fillargs);
- if (err > 0)
- err = 0;
- }
- goto put_tgt_net;
+ goto done;
}
}
- rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq;
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &tgt_net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- if (h > s_h || idx > s_idx)
- s_ip_idx = 0;
- idev = __in6_dev_get(dev);
- if (!idev)
- goto cont;
-
- if (in6_dump_addrs(idev, skb, cb, s_ip_idx,
- &fillargs) < 0)
- goto done;
-cont:
- idx++;
- }
+ cb->seq = inet6_base_seq(tgt_net);
+ for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+ err = in6_dump_addrs(idev, skb, cb, &ctx->ip_idx,
+ &fillargs);
+ if (err < 0)
+ goto done;
}
done:
rcu_read_unlock();
- cb->args[0] = h;
- cb->args[1] = idx;
-put_tgt_net:
if (fillargs.netnsid >= 0)
put_net(tgt_net);
- return skb->len ? : err;
+ return err;
}
static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -5494,9 +5548,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
- if (!addr)
- return -EINVAL;
-
+ if (!addr) {
+ err = -EINVAL;
+ goto errout;
+ }
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
dev = dev_get_by_index(tgt_net, ifm->ifa_index);
@@ -5562,87 +5617,97 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}
-static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
- __s32 *array, int bytes)
+static void ipv6_store_devconf(const struct ipv6_devconf *cnf,
+ __s32 *array, int bytes)
{
BUG_ON(bytes < (DEVCONF_MAX * 4));
memset(array, 0, bytes);
- array[DEVCONF_FORWARDING] = cnf->forwarding;
- array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
- array[DEVCONF_MTU6] = cnf->mtu6;
- array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
- array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
- array[DEVCONF_AUTOCONF] = cnf->autoconf;
- array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
- array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
+ array[DEVCONF_FORWARDING] = READ_ONCE(cnf->forwarding);
+ array[DEVCONF_HOPLIMIT] = READ_ONCE(cnf->hop_limit);
+ array[DEVCONF_MTU6] = READ_ONCE(cnf->mtu6);
+ array[DEVCONF_ACCEPT_RA] = READ_ONCE(cnf->accept_ra);
+ array[DEVCONF_ACCEPT_REDIRECTS] = READ_ONCE(cnf->accept_redirects);
+ array[DEVCONF_AUTOCONF] = READ_ONCE(cnf->autoconf);
+ array[DEVCONF_DAD_TRANSMITS] = READ_ONCE(cnf->dad_transmits);
+ array[DEVCONF_RTR_SOLICITS] = READ_ONCE(cnf->rtr_solicits);
array[DEVCONF_RTR_SOLICIT_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_solicit_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_interval));
array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_solicit_max_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_max_interval));
array[DEVCONF_RTR_SOLICIT_DELAY] =
- jiffies_to_msecs(cnf->rtr_solicit_delay);
- array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_delay));
+ array[DEVCONF_FORCE_MLD_VERSION] = READ_ONCE(cnf->force_mld_version);
array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
- jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->mldv1_unsolicited_report_interval));
array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
- jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
- array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
- array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
- array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
- array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
- array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
- array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
- array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
- array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric;
- array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
- array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
+ jiffies_to_msecs(READ_ONCE(cnf->mldv2_unsolicited_report_interval));
+ array[DEVCONF_USE_TEMPADDR] = READ_ONCE(cnf->use_tempaddr);
+ array[DEVCONF_TEMP_VALID_LFT] = READ_ONCE(cnf->temp_valid_lft);
+ array[DEVCONF_TEMP_PREFERED_LFT] = READ_ONCE(cnf->temp_prefered_lft);
+ array[DEVCONF_REGEN_MAX_RETRY] = READ_ONCE(cnf->regen_max_retry);
+ array[DEVCONF_MAX_DESYNC_FACTOR] = READ_ONCE(cnf->max_desync_factor);
+ array[DEVCONF_MAX_ADDRESSES] = READ_ONCE(cnf->max_addresses);
+ array[DEVCONF_ACCEPT_RA_DEFRTR] = READ_ONCE(cnf->accept_ra_defrtr);
+ array[DEVCONF_RA_DEFRTR_METRIC] = READ_ONCE(cnf->ra_defrtr_metric);
+ array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] =
+ READ_ONCE(cnf->accept_ra_min_hop_limit);
+ array[DEVCONF_ACCEPT_RA_PINFO] = READ_ONCE(cnf->accept_ra_pinfo);
#ifdef CONFIG_IPV6_ROUTER_PREF
- array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
+ array[DEVCONF_ACCEPT_RA_RTR_PREF] = READ_ONCE(cnf->accept_ra_rtr_pref);
array[DEVCONF_RTR_PROBE_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_probe_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_probe_interval));
#ifdef CONFIG_IPV6_ROUTE_INFO
- array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen;
- array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] =
+ READ_ONCE(cnf->accept_ra_rt_info_min_plen);
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] =
+ READ_ONCE(cnf->accept_ra_rt_info_max_plen);
#endif
#endif
- array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
- array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+ array[DEVCONF_PROXY_NDP] = READ_ONCE(cnf->proxy_ndp);
+ array[DEVCONF_ACCEPT_SOURCE_ROUTE] =
+ READ_ONCE(cnf->accept_source_route);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
- array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
+ array[DEVCONF_OPTIMISTIC_DAD] = READ_ONCE(cnf->optimistic_dad);
+ array[DEVCONF_USE_OPTIMISTIC] = READ_ONCE(cnf->use_optimistic);
#endif
#ifdef CONFIG_IPV6_MROUTE
array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
- array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
- array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
- array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
- array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
- array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
- array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
- array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
- array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
+ array[DEVCONF_DISABLE_IPV6] = READ_ONCE(cnf->disable_ipv6);
+ array[DEVCONF_ACCEPT_DAD] = READ_ONCE(cnf->accept_dad);
+ array[DEVCONF_FORCE_TLLAO] = READ_ONCE(cnf->force_tllao);
+ array[DEVCONF_NDISC_NOTIFY] = READ_ONCE(cnf->ndisc_notify);
+ array[DEVCONF_SUPPRESS_FRAG_NDISC] =
+ READ_ONCE(cnf->suppress_frag_ndisc);
+ array[DEVCONF_ACCEPT_RA_FROM_LOCAL] =
+ READ_ONCE(cnf->accept_ra_from_local);
+ array[DEVCONF_ACCEPT_RA_MTU] = READ_ONCE(cnf->accept_ra_mtu);
+ array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] =
+ READ_ONCE(cnf->ignore_routes_with_linkdown);
/* we omit DEVCONF_STABLE_SECRET for now */
- array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
- array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
- array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
- array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
- array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
+ array[DEVCONF_USE_OIF_ADDRS_ONLY] = READ_ONCE(cnf->use_oif_addrs_only);
+ array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] =
+ READ_ONCE(cnf->drop_unicast_in_l2_multicast);
+ array[DEVCONF_DROP_UNSOLICITED_NA] = READ_ONCE(cnf->drop_unsolicited_na);
+ array[DEVCONF_KEEP_ADDR_ON_DOWN] = READ_ONCE(cnf->keep_addr_on_down);
+ array[DEVCONF_SEG6_ENABLED] = READ_ONCE(cnf->seg6_enabled);
#ifdef CONFIG_IPV6_SEG6_HMAC
- array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac;
+ array[DEVCONF_SEG6_REQUIRE_HMAC] = READ_ONCE(cnf->seg6_require_hmac);
#endif
- array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
- array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
- array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
- array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
- array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
- array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
- array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
- array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
- array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
- array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
- array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft;
+ array[DEVCONF_ENHANCED_DAD] = READ_ONCE(cnf->enhanced_dad);
+ array[DEVCONF_ADDR_GEN_MODE] = READ_ONCE(cnf->addr_gen_mode);
+ array[DEVCONF_DISABLE_POLICY] = READ_ONCE(cnf->disable_policy);
+ array[DEVCONF_NDISC_TCLASS] = READ_ONCE(cnf->ndisc_tclass);
+ array[DEVCONF_RPL_SEG_ENABLED] = READ_ONCE(cnf->rpl_seg_enabled);
+ array[DEVCONF_IOAM6_ENABLED] = READ_ONCE(cnf->ioam6_enabled);
+ array[DEVCONF_IOAM6_ID] = READ_ONCE(cnf->ioam6_id);
+ array[DEVCONF_IOAM6_ID_WIDE] = READ_ONCE(cnf->ioam6_id_wide);
+ array[DEVCONF_NDISC_EVICT_NOCARRIER] =
+ READ_ONCE(cnf->ndisc_evict_nocarrier);
+ array[DEVCONF_ACCEPT_UNTRACKED_NA] =
+ READ_ONCE(cnf->accept_untracked_na);
+ array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft);
}
static inline size_t inet6_ifla6_size(void)
@@ -5722,13 +5787,14 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
u32 ext_filter_mask)
{
- struct nlattr *nla;
struct ifla_cacheinfo ci;
+ struct nlattr *nla;
+ u32 ra_mtu;
- if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags))
+ if (nla_put_u32(skb, IFLA_INET6_FLAGS, READ_ONCE(idev->if_flags)))
goto nla_put_failure;
ci.max_reasm_len = IPV6_MAXPLEN;
- ci.tstamp = cstamp_delta(idev->tstamp);
+ ci.tstamp = cstamp_delta(READ_ONCE(idev->tstamp));
ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME));
if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci))
@@ -5760,11 +5826,12 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
- if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
+ if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE,
+ READ_ONCE(idev->cnf.addr_gen_mode)))
goto nla_put_failure;
- if (idev->ra_mtu &&
- nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
+ ra_mtu = READ_ONCE(idev->ra_mtu);
+ if (ra_mtu && nla_put_u32(skb, IFLA_INET6_RA_MTU, ra_mtu))
goto nla_put_failure;
return 0;
@@ -5826,7 +5893,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token,
return -EINVAL;
}
- if (idev->cnf.rtr_solicits == 0) {
+ if (READ_ONCE(idev->cnf.rtr_solicits) == 0) {
NL_SET_ERR_MSG(extack,
"Router solicitation is disabled on device");
return -EINVAL;
@@ -5859,7 +5926,7 @@ update_lft:
if (update_rs) {
idev->if_flags |= IF_RS_SENT;
idev->rs_interval = rfc3315_s14_backoff_init(
- idev->cnf.rtr_solicit_interval);
+ READ_ONCE(idev->cnf.rtr_solicit_interval));
idev->rs_probes = 1;
addrconf_mod_rs_timer(idev, idev->rs_interval);
}
@@ -5965,7 +6032,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
- idev->cnf.addr_gen_mode = mode;
+ WRITE_ONCE(idev->cnf.addr_gen_mode, mode);
}
return 0;
@@ -5977,6 +6044,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
struct net_device *dev = idev->dev;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
+ int ifindex, iflink;
void *protoinfo;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
@@ -5987,18 +6055,20 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
hdr->ifi_family = AF_INET6;
hdr->__ifi_pad = 0;
hdr->ifi_type = dev->type;
- hdr->ifi_index = dev->ifindex;
+ ifindex = READ_ONCE(dev->ifindex);
+ hdr->ifi_index = ifindex;
hdr->ifi_flags = dev_get_flags(dev);
hdr->ifi_change = 0;
+ iflink = dev_get_iflink(dev);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
(dev->addr_len &&
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
- nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
- (dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
+ nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
+ (ifindex != iflink &&
+ nla_put_u32(skb, IFLA_LINK, iflink)) ||
nla_put_u8(skb, IFLA_OPERSTATE,
- netif_running(dev) ? dev->operstate : IF_OPER_DOWN))
+ netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN))
goto nla_put_failure;
protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO);
if (!protoinfo)
@@ -6044,50 +6114,39 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx = 0, s_idx;
+ struct {
+ unsigned long ifindex;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
- struct hlist_head *head;
+ int err;
/* only requests using strict checking can pass data to
* influence the dump
*/
if (cb->strict_check) {
- int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
+ err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
if (err < 0)
return err;
}
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
+ err = 0;
rcu_read_lock();
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- idev = __in6_dev_get(dev);
- if (!idev)
- goto cont;
- if (inet6_fill_ifinfo(skb, idev,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWLINK, NLM_F_MULTI) < 0)
- goto out;
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+ err = inet6_fill_ifinfo(skb, idev,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWLINK, NLM_F_MULTI);
+ if (err < 0)
+ break;
}
-out:
rcu_read_unlock();
- cb->args[1] = idx;
- cb->args[0] = h;
- return skb->len;
+ return err;
}
void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
@@ -6308,7 +6367,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
- idev->cnf.disable_ipv6 = newf;
+
+ WRITE_ONCE(idev->cnf.disable_ipv6, newf);
if (changed)
dev_disable_change(idev);
}
@@ -6317,23 +6377,22 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
{
- struct net *net;
+ struct net *net = (struct net *)table->extra2;
int old;
+ if (p == &net->ipv6.devconf_dflt->disable_ipv6) {
+ WRITE_ONCE(*p, newf);
+ return 0;
+ }
+
if (!rtnl_trylock())
return restart_syscall();
- net = (struct net *)table->extra2;
old = *p;
- *p = newf;
-
- if (p == &net->ipv6.devconf_dflt->disable_ipv6) {
- rtnl_unlock();
- return 0;
- }
+ WRITE_ONCE(*p, newf);
if (p == &net->ipv6.devconf_all->disable_ipv6) {
- net->ipv6.devconf_dflt->disable_ipv6 = newf;
+ WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf);
addrconf_disable_change(net, newf);
} else if ((!newf) ^ (!old))
dev_disable_change((struct inet6_dev *)table->extra1);
@@ -6444,24 +6503,25 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
}
if (idev->cnf.addr_gen_mode != new_val) {
- idev->cnf.addr_gen_mode = new_val;
+ WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
addrconf_init_auto_addrs(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
- net->ipv6.devconf_dflt->addr_gen_mode = new_val;
+ WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val);
for_each_netdev(net, dev) {
idev = __in6_dev_get(dev);
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
- idev->cnf.addr_gen_mode = new_val;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ new_val);
addrconf_init_auto_addrs(idev->dev);
}
}
}
- *((u32 *)ctl->data) = new_val;
+ WRITE_ONCE(*((u32 *)ctl->data), new_val);
}
out:
@@ -6520,14 +6580,15 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
struct inet6_dev *idev = __in6_dev_get(dev);
if (idev) {
- idev->cnf.addr_gen_mode =
- IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
}
}
} else {
struct inet6_dev *idev = ctl->extra1;
- idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
}
out:
@@ -6607,20 +6668,19 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
static
int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
{
+ struct net *net = (struct net *)ctl->extra2;
struct inet6_dev *idev;
- struct net *net;
- if (!rtnl_trylock())
- return restart_syscall();
-
- *valp = val;
-
- net = (struct net *)ctl->extra2;
if (valp == &net->ipv6.devconf_dflt->disable_policy) {
- rtnl_unlock();
+ WRITE_ONCE(*valp, val);
return 0;
}
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ WRITE_ONCE(*valp, val);
+
if (valp == &net->ipv6.devconf_all->disable_policy) {
struct net_device *dev;
@@ -6790,6 +6850,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "regen_min_advance",
+ .data = &ipv6_devconf.regen_min_advance,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "regen_max_retry",
.data = &ipv6_devconf.regen_max_retry,
.maxlen = sizeof(int),
@@ -7349,7 +7416,8 @@ int __init addrconf_init(void)
if (err < 0)
goto out_addrlabel;
- addrconf_wq = create_workqueue("ipv6_addrconf");
+ /* All works using addrconf_wq need to lock rtnl. */
+ addrconf_wq = create_singlethread_workqueue("ipv6_addrconf");
if (!addrconf_wq) {
err = -ENOMEM;
goto out_nowq;
@@ -7372,7 +7440,7 @@ int __init addrconf_init(void)
rtnl_af_register(&inet6_ops);
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
- NULL, inet6_dump_ifinfo, 0);
+ NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
@@ -7386,21 +7454,25 @@ int __init addrconf_init(void)
goto errout;
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
inet6_rtm_getaddr, inet6_dump_ifaddr,
- RTNL_FLAG_DOIT_UNLOCKED);
+ RTNL_FLAG_DOIT_UNLOCKED |
+ RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
- NULL, inet6_dump_ifmcaddr, 0);
+ NULL, inet6_dump_ifmcaddr,
+ RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
- NULL, inet6_dump_ifacaddr, 0);
+ NULL, inet6_dump_ifacaddr,
+ RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
inet6_netconf_get_devconf,
inet6_netconf_dump_devconf,
- RTNL_FLAG_DOIT_UNLOCKED);
+ RTNL_FLAG_DOIT_UNLOCKED |
+ RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
err = ipv6_addr_label_rtnl_register();
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 507a8353a6bd..c008d21925d7 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
EXPORT_SYMBOL_GPL(ipv6_stub);
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LOOPBACK_INIT;
EXPORT_SYMBOL(in6addr_loopback);
-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_ANY_INIT;
EXPORT_SYMBOL(in6addr_any);
-const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
EXPORT_SYMBOL(in6addr_linklocal_allnodes);
-const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_linklocal_allrouters);
-const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
-const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
-const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
static void snmp6_free_dev(struct inet6_dev *idev)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 13a1833a4df5..8041dc181bd4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -64,6 +64,7 @@
#include <net/xfrm.h>
#include <net/ioam6.h>
#include <net/rawv6.h>
+#include <net/rps.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -199,6 +200,9 @@ lookup_protocol:
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
+ if (INET_PROTOSW_ICSK & answer_flags)
+ inet_init_csk_locks(sk);
+
inet = inet_sk(sk);
inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
@@ -733,7 +737,7 @@ const struct proto_ops inet6_dgram_ops = {
.recvmsg = inet6_recvmsg, /* retpoline's sake */
.read_skb = udp_read_skb,
.mmap = sock_no_mmap,
- .set_peek_off = sk_set_peek_off,
+ .set_peek_off = udp_set_peek_off,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
#endif
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 2016e90e6e1d..eb474f0987ae 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -800,5 +800,6 @@ static void __exit ah6_fini(void)
module_init(ah6_init);
module_exit(ah6_fini);
+MODULE_DESCRIPTION("IPv6 AH transformation helpers");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index bb17f484ee2c..0f2506e35359 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -296,7 +296,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
goto out;
}
- for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ for (aca = rtnl_dereference(idev->ac_list); aca;
+ aca = rtnl_dereference(aca->aca_next)) {
if (ipv6_addr_equal(&aca->aca_addr, addr)) {
aca->aca_users++;
err = 0;
@@ -317,13 +318,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
goto out;
}
- aca->aca_next = idev->ac_list;
- idev->ac_list = aca;
-
/* Hold this for addrconf_join_solict() below before we unlock,
* it is already exposed via idev->ac_list.
*/
aca_get(aca);
+ aca->aca_next = idev->ac_list;
+ rcu_assign_pointer(idev->ac_list, aca);
+
write_unlock_bh(&idev->lock);
ipv6_add_acaddr_hash(net, aca);
@@ -350,7 +351,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
write_lock_bh(&idev->lock);
prev_aca = NULL;
- for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ for (aca = rtnl_dereference(idev->ac_list); aca;
+ aca = rtnl_dereference(aca->aca_next)) {
if (ipv6_addr_equal(&aca->aca_addr, addr))
break;
prev_aca = aca;
@@ -364,9 +366,9 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
return 0;
}
if (prev_aca)
- prev_aca->aca_next = aca->aca_next;
+ rcu_assign_pointer(prev_aca->aca_next, aca->aca_next);
else
- idev->ac_list = aca->aca_next;
+ rcu_assign_pointer(idev->ac_list, aca->aca_next);
write_unlock_bh(&idev->lock);
ipv6_del_acaddr_hash(aca);
addrconf_leave_solict(idev, &aca->aca_addr);
@@ -392,8 +394,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
struct ifacaddr6 *aca;
write_lock_bh(&idev->lock);
- while ((aca = idev->ac_list) != NULL) {
- idev->ac_list = aca->aca_next;
+ while ((aca = rtnl_dereference(idev->ac_list)) != NULL) {
+ rcu_assign_pointer(idev->ac_list, aca->aca_next);
write_unlock_bh(&idev->lock);
ipv6_del_acaddr_hash(aca);
@@ -420,11 +422,10 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
idev = __in6_dev_get(dev);
if (idev) {
- read_lock_bh(&idev->lock);
- for (aca = idev->ac_list; aca; aca = aca->aca_next)
+ for (aca = rcu_dereference(idev->ac_list); aca;
+ aca = rcu_dereference(aca->aca_next))
if (ipv6_addr_equal(&aca->aca_addr, addr))
break;
- read_unlock_bh(&idev->lock);
return aca != NULL;
}
return false;
@@ -477,30 +478,25 @@ bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
struct ac6_iter_state {
struct seq_net_private p;
struct net_device *dev;
- struct inet6_dev *idev;
};
#define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private)
static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
{
- struct ifacaddr6 *im = NULL;
struct ac6_iter_state *state = ac6_seq_private(seq);
struct net *net = seq_file_net(seq);
+ struct ifacaddr6 *im = NULL;
- state->idev = NULL;
for_each_netdev_rcu(net, state->dev) {
struct inet6_dev *idev;
+
idev = __in6_dev_get(state->dev);
if (!idev)
continue;
- read_lock_bh(&idev->lock);
- im = idev->ac_list;
- if (im) {
- state->idev = idev;
+ im = rcu_dereference(idev->ac_list);
+ if (im)
break;
- }
- read_unlock_bh(&idev->lock);
}
return im;
}
@@ -508,22 +504,17 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
{
struct ac6_iter_state *state = ac6_seq_private(seq);
+ struct inet6_dev *idev;
- im = im->aca_next;
+ im = rcu_dereference(im->aca_next);
while (!im) {
- if (likely(state->idev != NULL))
- read_unlock_bh(&state->idev->lock);
-
state->dev = next_net_device_rcu(state->dev);
- if (!state->dev) {
- state->idev = NULL;
+ if (!state->dev)
break;
- }
- state->idev = __in6_dev_get(state->dev);
- if (!state->idev)
+ idev = __in6_dev_get(state->dev);
+ if (!idev)
continue;
- read_lock_bh(&state->idev->lock);
- im = state->idev->ac_list;
+ im = rcu_dereference(idev->ac_list);
}
return im;
}
@@ -555,12 +546,6 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void ac6_seq_stop(struct seq_file *seq, void *v)
__releases(RCU)
{
- struct ac6_iter_state *state = ac6_seq_private(seq);
-
- if (likely(state->idev != NULL)) {
- read_unlock_bh(&state->idev->lock);
- state->idev = NULL;
- }
rcu_read_unlock();
}
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 1578ed9e97d8..eb8ee1e9373a 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -657,11 +657,8 @@ static int calipso_map_cat_ntoh(const struct calipso_doi *doi_def,
net_clen_bits,
spot + 1,
1);
- if (spot < 0) {
- if (spot == -2)
- return -EFAULT;
+ if (spot < 0)
return 0;
- }
ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat,
spot,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 2cc1a45742d8..6e6efe026cdc 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -1301,5 +1301,6 @@ static void __exit esp6_fini(void)
module_init(esp6_init);
module_exit(esp6_fini);
+MODULE_DESCRIPTION("IPv6 ESP transformation helpers");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4952ae792450..6789623b2b0d 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -50,6 +50,7 @@
#endif
#include <net/rpl.h>
#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
#include <net/ioam6.h>
#include <net/dst_metadata.h>
@@ -177,6 +178,8 @@ static bool ip6_parse_tlv(bool hopbyhop,
case IPV6_TLV_IOAM:
if (!ipv6_hop_ioam(skb, off))
return false;
+
+ nh = skb_network_header(skb);
break;
case IPV6_TLV_JUMBO:
if (!ipv6_hop_jumbo(skb, off))
@@ -376,9 +379,8 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
idev = __in6_dev_get(skb->dev);
- accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
- if (accept_seg6 > idev->cnf.seg6_enabled)
- accept_seg6 = idev->cnf.seg6_enabled;
+ accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled),
+ READ_ONCE(idev->cnf.seg6_enabled));
if (!accept_seg6) {
kfree_skb(skb);
@@ -652,10 +654,13 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
struct ipv6_rt_hdr *hdr;
struct rt0_hdr *rthdr;
struct net *net = dev_net(skb->dev);
- int accept_source_route = net->ipv6.devconf_all->accept_source_route;
+ int accept_source_route;
+
+ accept_source_route = READ_ONCE(net->ipv6.devconf_all->accept_source_route);
- if (idev && accept_source_route > idev->cnf.accept_source_route)
- accept_source_route = idev->cnf.accept_source_route;
+ if (idev)
+ accept_source_route = min(accept_source_route,
+ READ_ONCE(idev->cnf.accept_source_route));
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
@@ -799,7 +804,7 @@ looped_back:
ip6_route_input(skb);
if (skb_dst(skb)->error) {
- skb_push(skb, skb->data - skb_network_header(skb));
+ skb_push(skb, -skb_network_offset(skb));
dst_input(skb);
return -1;
}
@@ -816,7 +821,7 @@ looped_back:
goto looped_back;
}
- skb_push(skb, skb->data - skb_network_header(skb));
+ skb_push(skb, -skb_network_offset(skb));
dst_input(skb);
return -1;
@@ -878,14 +883,6 @@ void ipv6_exthdrs_exit(void)
Hop-by-hop options.
**********************************/
-/*
- * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input().
- */
-static inline struct net *ipv6_skb_net(struct sk_buff *skb)
-{
- return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev);
-}
-
/* Router Alert as of RFC 2711 */
static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
@@ -916,7 +913,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
goto drop;
/* Ignore if IOAM is not enabled on ingress */
- if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled)
+ if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled))
goto ignore;
/* Truncated Option header */
@@ -936,14 +933,25 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
goto drop;
/* Ignore if the IOAM namespace is unknown */
- ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id);
+ ns = ioam6_namespace(dev_net(skb->dev), trace->namespace_id);
if (!ns)
goto ignore;
if (!skb_valid_dst(skb))
ip6_route_input(skb);
+ /* About to mangle packet header */
+ if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len))
+ goto drop;
+
+ /* Trace pointer may have changed */
+ trace = (struct ioam6_trace_hdr *)(skb_network_header(skb)
+ + optoff + sizeof(*hdr));
+
ioam6_fill_trace_data(skb, ns, trace, true);
+
+ ioam6_event(IOAM6_EVENT_TRACE, dev_net(skb->dev),
+ GFP_ATOMIC, (void *)trace, hdr->opt_len - 2);
break;
default:
break;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 7523c4baef35..52c04f0ac498 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -449,6 +449,11 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
+ nla_total_size(16); /* src */
}
+static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
+{
+ rt_genid_bump_ipv6(ops->fro_net);
+}
+
static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.family = AF_INET6,
.rule_size = sizeof(struct fib6_rule),
@@ -461,6 +466,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
+ .flush_cache = fib6_rule_flush_cache,
.nlgroup = RTNLGRP_IPV6_RULE,
.owner = THIS_MODULE,
.fro_net = &init_net,
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b0e8d278e8a9..2e81383b663b 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -14,6 +14,7 @@
#include <linux/random.h>
#include <net/addrconf.h>
+#include <net/hotdata.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
@@ -25,16 +26,13 @@ u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
{
- static u32 inet6_ehash_secret __read_mostly;
- static u32 ipv6_hash_secret __read_mostly;
-
u32 lhash, fhash;
net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
- net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+ net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret));
lhash = (__force u32)laddr->s6_addr32[3];
- fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+ fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret);
return __inet6_ehashfn(lhash, lport, fhash, fport,
inet6_ehash_secret + net_hash_mix(net));
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index 571f0e4d9cf3..08c929513065 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -612,6 +612,68 @@ static const struct genl_ops ioam6_genl_ops[] = {
},
};
+#define IOAM6_GENL_EV_GRP_OFFSET 0
+
+static const struct genl_multicast_group ioam6_mcgrps[] = {
+ [IOAM6_GENL_EV_GRP_OFFSET] = { .name = IOAM6_GENL_EV_GRP_NAME,
+ .flags = GENL_MCAST_CAP_NET_ADMIN },
+};
+
+static int ioam6_event_put_trace(struct sk_buff *skb,
+ struct ioam6_trace_hdr *trace,
+ unsigned int len)
+{
+ if (nla_put_u16(skb, IOAM6_EVENT_ATTR_TRACE_NAMESPACE,
+ be16_to_cpu(trace->namespace_id)) ||
+ nla_put_u8(skb, IOAM6_EVENT_ATTR_TRACE_NODELEN, trace->nodelen) ||
+ nla_put_u32(skb, IOAM6_EVENT_ATTR_TRACE_TYPE,
+ be32_to_cpu(trace->type_be32)) ||
+ nla_put(skb, IOAM6_EVENT_ATTR_TRACE_DATA,
+ len - sizeof(struct ioam6_trace_hdr) - trace->remlen * 4,
+ trace->data + trace->remlen * 4))
+ return 1;
+
+ return 0;
+}
+
+void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp,
+ void *opt, unsigned int opt_len)
+{
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&ioam6_genl_family, net,
+ IOAM6_GENL_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &ioam6_genl_family, 0, type);
+ if (!nlh)
+ goto nla_put_failure;
+
+ switch (type) {
+ case IOAM6_EVENT_UNSPEC:
+ WARN_ON_ONCE(1);
+ break;
+ case IOAM6_EVENT_TRACE:
+ if (ioam6_event_put_trace(skb, (struct ioam6_trace_hdr *)opt,
+ opt_len))
+ goto nla_put_failure;
+ break;
+ }
+
+ genlmsg_end(skb, nlh);
+ genlmsg_multicast_netns(&ioam6_genl_family, net, skb, 0,
+ IOAM6_GENL_EV_GRP_OFFSET, gfp);
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+}
+
static struct genl_family ioam6_genl_family __ro_after_init = {
.name = IOAM6_GENL_NAME,
.version = IOAM6_GENL_VERSION,
@@ -620,6 +682,8 @@ static struct genl_family ioam6_genl_family __ro_after_init = {
.ops = ioam6_genl_ops,
.n_ops = ARRAY_SIZE(ioam6_genl_ops),
.resv_start_op = IOAM6_CMD_NS_SET_SCHEMA + 1,
+ .mcgrps = ioam6_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(ioam6_mcgrps),
.module = THIS_MODULE,
};
@@ -663,7 +727,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (!skb->dev)
raw16 = IOAM6_U16_UNAVAILABLE;
else
- raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id;
+ raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id);
*(__be16 *)data = cpu_to_be16(raw16);
data += sizeof(__be16);
@@ -671,7 +735,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
raw16 = IOAM6_U16_UNAVAILABLE;
else
- raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id;
+ raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id);
*(__be16 *)data = cpu_to_be16(raw16);
data += sizeof(__be16);
@@ -758,7 +822,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (!skb->dev)
raw32 = IOAM6_U32_UNAVAILABLE;
else
- raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide;
+ raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide);
*(__be32 *)data = cpu_to_be32(raw32);
data += sizeof(__be32);
@@ -766,7 +830,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
raw32 = IOAM6_U32_UNAVAILABLE;
else
- raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide;
+ raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide);
*(__be32 *)data = cpu_to_be32(raw32);
data += sizeof(__be32);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 4fc2cae0d116..5c558dc1c683 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
+ INIT_HLIST_NODE(&f6i->gc_link);
+
return f6i;
}
@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
+ INIT_HLIST_HEAD(&table->tb6_gc_hlist);
}
return table;
@@ -617,8 +620,11 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
- .filter.dump_routes = true };
+ struct rt6_rtnl_dump_arg arg = {
+ .filter.dump_exceptions = true,
+ .filter.dump_routes = true,
+ .filter.rtnl_held = true,
+ };
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
@@ -751,8 +757,6 @@ static struct fib6_node *fib6_add_1(struct net *net,
int bit;
__be32 dir = 0;
- RT6_TRACE("fib6_add_1\n");
-
/* insert node in tree */
fn = root;
@@ -1057,6 +1061,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock));
}
}
+
+ fib6_clean_expires(rt);
+ fib6_remove_gc_list(rt);
}
/*
@@ -1117,10 +1124,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
rt->fib6_nsiblings = 0;
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
- if (!(rt->fib6_flags & RTF_EXPIRES))
+ if (!(rt->fib6_flags & RTF_EXPIRES)) {
fib6_clean_expires(iter);
- else
+ fib6_remove_gc_list(iter);
+ } else {
fib6_set_expires(iter, rt->expires);
+ fib6_add_gc_list(iter);
+ }
if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU,
@@ -1479,6 +1489,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
+
+ if (rt->fib6_flags & RTF_EXPIRES)
+ fib6_add_gc_list(rt);
+
fib6_start_gc(info->nl_net, rt);
}
@@ -1803,7 +1817,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
lockdep_is_held(&table->tb6_lock));
struct fib6_info *new_fn_leaf;
- RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
+ pr_debug("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
iter++;
WARN_ON(fn->fn_flags & RTN_RTINFO);
@@ -1866,7 +1880,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
FOR_WALKERS(net, w) {
if (!child) {
if (w->node == fn) {
- RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
+ pr_debug("W %p adjusted by delnode 1, s=%d/%d\n",
+ w, w->state, nstate);
w->node = pn;
w->state = nstate;
}
@@ -1874,10 +1889,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
if (w->node == fn) {
w->node = child;
if (children&2) {
- RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ pr_debug("W %p adjusted by delnode 2, s=%d\n",
+ w, w->state);
w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
} else {
- RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ pr_debug("W %p adjusted by delnode 2, s=%d\n",
+ w, w->state);
w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
}
}
@@ -1905,8 +1922,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
struct net *net = info->nl_net;
bool notify_del = false;
- RT6_TRACE("fib6_del_route\n");
-
/* If the deleted route is the first in the node and it is not part of
* a multipath route, then we need to replace it with the next route
* in the node, if exists.
@@ -1955,7 +1970,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
- RT6_TRACE("walker %p adjusted by delroute\n", w);
+ pr_debug("walker %p adjusted by delroute\n", w);
w->leaf = rcu_dereference_protected(rt->fib6_next,
lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
@@ -2281,9 +2296,8 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct fib6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
- struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -2293,7 +2307,7 @@ static int fib6_age(struct fib6_info *rt, void *arg)
if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
if (time_after(now, rt->expires)) {
- RT6_TRACE("expiring %p\n", rt);
+ pr_debug("expiring %p\n", rt);
return -1;
}
gc_args->more++;
@@ -2308,6 +2322,42 @@ static int fib6_age(struct fib6_info *rt, void *arg)
return 0;
}
+static void fib6_gc_table(struct net *net,
+ struct fib6_table *tb6,
+ struct fib6_gc_args *gc_args)
+{
+ struct fib6_info *rt;
+ struct hlist_node *n;
+ struct nl_info info = {
+ .nl_net = net,
+ .skip_notify = false,
+ };
+
+ hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
+ if (fib6_age(rt, gc_args) == -1)
+ fib6_del(rt, &info);
+}
+
+static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
+{
+ struct fib6_table *table;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+ spin_lock_bh(&table->tb6_lock);
+
+ fib6_gc_table(net, table, gc_args);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
@@ -2323,7 +2373,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0;
- fib6_clean_all(net, fib6_age, &gc_args);
+ fib6_gc_all(net, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
@@ -2383,6 +2433,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
+ INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -2395,6 +2446,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
+ INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
#endif
fib6_tables_init(net);
@@ -2444,10 +2496,8 @@ int __init fib6_init(void)
{
int ret = -ENOMEM;
- fib6_node_kmem = kmem_cache_create("fib6_nodes",
- sizeof(struct fib6_node), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
- NULL);
+ fib6_node_kmem = KMEM_CACHE(fib6_node,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT);
if (!fib6_node_kmem)
goto out;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 070d87abf7c0..5e97e0aa8e07 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1511,6 +1511,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
ip6gre_tnl_init_features(dev);
netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
cleanup_dst_cache_init:
@@ -1632,21 +1633,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
+static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- ip6gre_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ ip6gre_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations ip6gre_net_ops = {
.init = ip6gre_init_net,
- .exit_batch = ip6gre_exit_batch_net,
+ .exit_batch_rtnl = ip6gre_exit_batch_rtnl,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
@@ -1903,6 +1902,7 @@ static int ip6erspan_tap_init(struct net_device *dev)
ip6erspan_tnl_link_config(tunnel, 1);
netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
cleanup_dst_cache_init:
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index b8378814532c..133610a49da6 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -168,9 +168,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
SKB_DR_SET(reason, NOT_SPECIFIED);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
- !idev || unlikely(idev->cnf.disable_ipv6)) {
+ !idev || unlikely(READ_ONCE(idev->cnf.disable_ipv6))) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
- if (idev && unlikely(idev->cnf.disable_ipv6))
+ if (idev && unlikely(READ_ONCE(idev->cnf.disable_ipv6)))
SKB_DR_SET(reason, IPV6DISABLED);
goto drop;
}
@@ -236,7 +236,7 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (!ipv6_addr_is_multicast(&hdr->daddr) &&
(skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) &&
- idev->cnf.drop_unicast_in_l2_multicast) {
+ READ_ONCE(idev->cnf.drop_unicast_in_l2_multicast)) {
SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST);
goto err;
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index cca64c7809be..b41e35af69ea 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -419,14 +419,6 @@ static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff)
return inet_gro_complete(skb, nhoff);
}
-static struct packet_offload ipv6_packet_offload __read_mostly = {
- .type = cpu_to_be16(ETH_P_IPV6),
- .callbacks = {
- .gso_segment = ipv6_gso_segment,
- .gro_receive = ipv6_gro_receive,
- .gro_complete = ipv6_gro_complete,
- },
-};
static struct sk_buff *sit_gso_segment(struct sk_buff *skb,
netdev_features_t features)
@@ -486,7 +478,15 @@ static int __init ipv6_offload_init(void)
if (ipv6_exthdrs_offload_init() < 0)
pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
- dev_add_offload(&ipv6_packet_offload);
+ net_hotdata.ipv6_packet_offload = (struct packet_offload) {
+ .type = cpu_to_be16(ETH_P_IPV6),
+ .callbacks = {
+ .gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
+ },
+ };
+ dev_add_offload(&net_hotdata.ipv6_packet_offload);
inet_add_offload(&sit_offload, IPPROTO_IPV6);
inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a722a43dd668..02eeca5492cd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -234,7 +234,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
- if (unlikely(idev->cnf.disable_ipv6)) {
+ if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
return 0;
@@ -501,7 +501,7 @@ int ip6_forward(struct sk_buff *skb)
u32 mtu;
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
- if (net->ipv6.devconf_all->forwarding == 0)
+ if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
goto error;
if (skb->pkt_type != PACKET_HOST)
@@ -513,8 +513,8 @@ int ip6_forward(struct sk_buff *skb)
if (skb_warn_if_lro(skb))
goto drop;
- if (!net->ipv6.devconf_all->disable_policy &&
- (!idev || !idev->cnf.disable_policy) &&
+ if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) &&
+ (!idev || !READ_ONCE(idev->cnf.disable_policy)) &&
!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
@@ -552,7 +552,7 @@ int ip6_forward(struct sk_buff *skb)
}
/* XXX: idev->cnf.proxy_ndp? */
- if (net->ipv6.devconf_all->proxy_ndp &&
+ if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
int proxied = ip6_forward_proxy_check(skb);
if (proxied > 0) {
@@ -1424,11 +1424,11 @@ static int __ip6_append_data(struct sock *sk,
bool zc = false;
u32 tskey = 0;
struct rt6_info *rt = (struct rt6_info *)cork->dst;
+ bool paged, hold_tskey, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
- bool paged, extra_uref = false;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1440,10 +1440,6 @@ static int __ip6_append_data(struct sock *sk,
mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
orig_mtu = mtu;
- if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
-
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
@@ -1538,6 +1534,11 @@ emsgsize:
flags &= ~MSG_SPLICE_PAGES;
}
+ hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
+ if (hold_tskey)
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
@@ -1794,6 +1795,8 @@ error:
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
+ if (hold_tskey)
+ atomic_dec(&sk->sk_tskey);
return err;
}
@@ -1922,7 +1925,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = cork->base.mark;
skb->tstamp = cork->base.transmit_time;
-
+ skb->mono_delivery_time = !!skb->tstamp;
ip6_cork_steal_dst(skb, cork);
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
if (proto == IPPROTO_ICMPV6) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 46c19bd48990..e9cc315832cb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -247,7 +247,6 @@ static void ip6_dev_free(struct net_device *dev)
gro_cells_destroy(&t->gro_cells);
dst_cache_destroy(&t->dst_cache);
- free_percpu(dev->tstats);
}
static int ip6_tnl_create2(struct net_device *dev)
@@ -796,8 +795,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
struct sk_buff *skb),
bool log_ecn_err)
{
- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- int err;
+ const struct ipv6hdr *ipv6h;
+ int nh, err;
if ((!(tpi->flags & TUNNEL_CSUM) &&
(tunnel->parms.i_flags & TUNNEL_CSUM)) ||
@@ -829,7 +828,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
goto drop;
}
- ipv6h = ipv6_hdr(skb);
skb->protocol = eth_type_trans(skb, tunnel->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
} else {
@@ -837,7 +835,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_reset_mac_header(skb);
}
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_reset_network_header(skb);
+
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(tunnel->dev, rx_length_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto drop;
+ }
+
+ /* Get the outer header. */
+ ipv6h = (struct ipv6hdr *)(skb->head + nh);
+
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
@@ -1741,7 +1755,7 @@ int ip6_tnl_get_iflink(const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- return t->parms.link;
+ return READ_ONCE(t->parms.link);
}
EXPORT_SYMBOL(ip6_tnl_get_iflink);
@@ -1833,6 +1847,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
dev->features |= NETIF_F_LLTX;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
netif_keep_dst(dev);
dev->features |= IPXIPX_FEATURES;
@@ -1858,13 +1873,10 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
if (ret)
- goto free_stats;
+ return ret;
ret = gro_cells_init(&t->gro_cells, dev);
if (ret)
@@ -1883,13 +1895,11 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
destroy_dst:
dst_cache_destroy(&t->dst_cache);
-free_stats:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
return ret;
}
@@ -2267,21 +2277,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
+static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- ip6_tnl_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ ip6_tnl_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit_batch = ip6_tnl_exit_batch_net,
+ .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index a7bf0327b380..c99053189ea8 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -182,4 +182,5 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup);
+MODULE_DESCRIPTION("IPv6 Foo over UDP tunnel driver");
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index e550240c85e1..7f4f976aa24a 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -935,6 +935,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
}
@@ -1174,24 +1175,22 @@ err_alloc_dev:
return err;
}
-static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
+static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct vti6_net *ip6n;
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
ip6n = net_generic(net, vti6_net_id);
- vti6_destroy_tunnels(ip6n, &list);
+ vti6_destroy_tunnels(ip6n, dev_to_kill);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
static struct pernet_operations vti6_net_ops = {
.init = vti6_init_net,
- .exit_batch = vti6_exit_batch_net,
+ .exit_batch_rtnl = vti6_exit_batch_rtnl,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
};
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9782c180fee6..cb0ee81a068a 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1373,10 +1373,7 @@ int __init ip6_mr_init(void)
{
int err;
- mrt_cachep = kmem_cache_create("ip6_mrt_cache",
- sizeof(struct mfc6_cache),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
if (!mrt_cachep)
return -ENOMEM;
@@ -2595,7 +2592,9 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
- struct fib_dump_filter filter = {};
+ struct fib_dump_filter filter = {
+ .rtnl_held = true,
+ };
int err;
if (cb->strict_check) {
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 56c3c467f9de..d4c28ec1bc51 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -948,6 +948,8 @@ done:
if (optlen < sizeof(int))
goto e_inval;
retv = ip6_ra_control(sk, val);
+ if (retv == 0)
+ inet6_assign_bit(RTALERT, sk, valbool);
break;
case IPV6_FLOWLABEL_MGR:
retv = ipv6_flowlabel_opt(sk, optval, optlen);
@@ -1346,7 +1348,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
if (val < 0)
- val = sock_net(sk)->ipv6.devconf_all->hop_limit;
+ val = READ_ONCE(sock_net(sk)->ipv6.devconf_all->hop_limit);
break;
}
@@ -1445,6 +1447,10 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->rxopt.bits.recvfragsize;
break;
+ case IPV6_ROUTER_ALERT:
+ val = inet6_test_bit(RTALERT, sk);
+ break;
+
case IPV6_ROUTER_ALERT_ISOLATE:
val = inet6_test_bit(RTALERT_ISOLATE, sk);
break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bc6e0a0bad3c..7ba01d8cfbae 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -159,9 +159,9 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
int iv;
if (mld_in_v1_mode(idev))
- iv = idev->cnf.mldv1_unsolicited_report_interval;
+ iv = READ_ONCE(idev->cnf.mldv1_unsolicited_report_interval);
else
- iv = idev->cnf.mldv2_unsolicited_report_interval;
+ iv = READ_ONCE(idev->cnf.mldv2_unsolicited_report_interval);
return iv > 0 ? iv : 1;
}
@@ -1202,15 +1202,15 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
static int mld_force_mld_version(const struct inet6_dev *idev)
{
+ const struct net *net = dev_net(idev->dev);
+ int all_force;
+
+ all_force = READ_ONCE(net->ipv6.devconf_all->force_mld_version);
/* Normally, both are 0 here. If enforcement to a particular is
* being used, individual device enforcement will have a lower
* precedence over 'all' device (.../conf/all/force_mld_version).
*/
-
- if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0)
- return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version;
- else
- return idev->cnf.force_mld_version;
+ return all_force ?: READ_ONCE(idev->cnf.force_mld_version);
}
static bool mld_in_v2_mode_only(const struct inet6_dev *idev)
@@ -2719,7 +2719,6 @@ void ipv6_mc_down(struct inet6_dev *idev)
/* Should stop work after group drop. or we will
* start work again in mld_ifc_event()
*/
- synchronize_net();
mld_query_stop_work(idev);
mld_report_stop_work(idev);
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 83d2a8be263f..6a16a5bd0d91 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -405,6 +405,7 @@ static void __exit mip6_fini(void)
module_init(mip6_init);
module_exit(mip6_fini);
+MODULE_DESCRIPTION("IPv6 Mobility driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a19999b30bc0..ae134634c323 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -451,7 +451,7 @@ static void ip6_nd_hdr(struct sk_buff *skb,
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
- tclass = idev ? idev->cnf.ndisc_tclass : 0;
+ tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0;
rcu_read_unlock();
skb_push(skb, sizeof(*hdr));
@@ -535,7 +535,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
src_addr = solicited_addr;
if (ifp->flags & IFA_F_OPTIMISTIC)
override = false;
- inc_opt |= ifp->idev->cnf.force_tllao;
+ inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao);
in6_ifa_put(ifp);
} else {
if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
@@ -903,8 +903,9 @@ have_ifp:
}
if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
- (idev->cnf.forwarding &&
- (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
+ (READ_ONCE(idev->cnf.forwarding) &&
+ (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) ||
+ READ_ONCE(idev->cnf.proxy_ndp)) &&
(is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
skb->pkt_type != PACKET_HOST &&
@@ -929,7 +930,7 @@ have_ifp:
}
if (is_router < 0)
- is_router = idev->cnf.forwarding;
+ is_router = READ_ONCE(idev->cnf.forwarding);
if (dad) {
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
@@ -973,7 +974,7 @@ static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr)
{
struct inet6_dev *idev = __in6_dev_get(dev);
- switch (idev->cnf.accept_untracked_na) {
+ switch (READ_ONCE(idev->cnf.accept_untracked_na)) {
case 0: /* Don't accept untracked na (absent in neighbor cache) */
return 0;
case 1: /* Create new entries from na if currently untracked */
@@ -1024,7 +1025,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
* drop_unsolicited_na takes precedence over accept_untracked_na
*/
if (!msg->icmph.icmp6_solicited && idev &&
- idev->cnf.drop_unsolicited_na)
+ READ_ONCE(idev->cnf.drop_unsolicited_na))
return reason;
if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
@@ -1080,7 +1081,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
* Note that we don't do a (daddr == all-routers-mcast) check.
*/
new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE;
- if (!neigh && lladdr && idev && idev->cnf.forwarding) {
+ if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) {
if (accept_untracked_na(dev, saddr)) {
neigh = neigh_create(&nd_tbl, &msg->target, dev);
new_state = NUD_STALE;
@@ -1100,7 +1101,8 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
* has already sent a NA to us.
*/
if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
- net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
+ READ_ONCE(net->ipv6.devconf_all->forwarding) &&
+ READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
/* XXX: idev->cnf.proxy_ndp */
goto out;
@@ -1148,7 +1150,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb)
}
/* Don't accept RS if we're not in router mode */
- if (!idev->cnf.forwarding)
+ if (!READ_ONCE(idev->cnf.forwarding))
goto out;
/*
@@ -1237,6 +1239,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
struct ndisc_options ndopts;
struct fib6_info *rt = NULL;
struct inet6_dev *in6_dev;
+ struct fib6_table *table;
u32 defrtr_usr_metric;
unsigned int pref = 0;
__u32 old_if_flags;
@@ -1317,7 +1320,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
if (old_if_flags != in6_dev->if_flags)
send_ifinfo_notify = true;
- if (!in6_dev->cnf.accept_ra_defrtr) {
+ if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) {
ND_PRINTK(2, info,
"RA: %s, defrtr is false for dev: %s\n",
__func__, skb->dev->name);
@@ -1325,7 +1328,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
}
lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
- if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) {
+ if (lifetime != 0 &&
+ lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) {
ND_PRINTK(2, info,
"RA: router lifetime (%ds) is too short: %s\n",
lifetime, skb->dev->name);
@@ -1336,7 +1340,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
* accept_ra_from_local is set to true.
*/
net = dev_net(in6_dev->dev);
- if (!in6_dev->cnf.accept_ra_from_local &&
+ if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: default router ignored\n",
@@ -1348,7 +1352,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
pref = ra_msg->icmph.icmp6_router_pref;
/* 10b is handled as if it were 00b (medium) */
if (pref == ICMPV6_ROUTER_PREF_INVALID ||
- !in6_dev->cnf.accept_ra_rtr_pref)
+ !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref))
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
/* routes added from RAs do not use nexthop objects */
@@ -1382,7 +1386,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
neigh_release(neigh);
rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
- skb->dev, pref, defrtr_usr_metric);
+ skb->dev, pref, defrtr_usr_metric,
+ lifetime);
if (!rt) {
ND_PRINTK(0, err,
"RA: %s failed to add default route\n",
@@ -1409,12 +1414,21 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
}
- if (rt)
+ if (rt) {
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
fib6_set_expires(rt, jiffies + (HZ * lifetime));
- if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
+ fib6_add_gc_list(rt);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 &&
ra_msg->icmph.icmp6_hop_limit) {
- if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
- in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+ if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <=
+ ra_msg->icmph.icmp6_hop_limit) {
+ WRITE_ONCE(in6_dev->cnf.hop_limit,
+ ra_msg->icmph.icmp6_hop_limit);
fib6_metric_set(rt, RTAX_HOPLIMIT,
ra_msg->icmph.icmp6_hop_limit);
} else {
@@ -1496,7 +1510,7 @@ skip_linkparms:
}
#ifdef CONFIG_IPV6_ROUTE_INFO
- if (!in6_dev->cnf.accept_ra_from_local &&
+ if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
in6_dev->dev, 0)) {
ND_PRINTK(2, info,
@@ -1505,7 +1519,7 @@ skip_linkparms:
goto skip_routeinfo;
}
- if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
+ if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_opts_ri;
p;
@@ -1517,14 +1531,14 @@ skip_linkparms:
continue;
#endif
if (ri->prefix_len == 0 &&
- !in6_dev->cnf.accept_ra_defrtr)
+ !READ_ONCE(in6_dev->cnf.accept_ra_defrtr))
continue;
if (ri->lifetime != 0 &&
- ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft)
+ ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft))
continue;
- if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
+ if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen))
continue;
- if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
+ if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen))
continue;
rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
&ipv6_hdr(skb)->saddr);
@@ -1544,7 +1558,7 @@ skip_routeinfo:
}
#endif
- if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
+ if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_opts_pi;
p;
@@ -1555,7 +1569,7 @@ skip_routeinfo:
}
}
- if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) {
+ if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) {
__be32 n;
u32 mtu;
@@ -1569,8 +1583,8 @@ skip_routeinfo:
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
- } else if (in6_dev->cnf.mtu6 != mtu) {
- in6_dev->cnf.mtu6 = mtu;
+ } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) {
+ WRITE_ONCE(in6_dev->cnf.mtu6, mtu);
fib6_metric_set(rt, RTAX_MTU, mtu);
rt6_mtu_change(skb->dev, mtu);
}
@@ -1804,7 +1818,7 @@ static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
if (!idev)
return true;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
- idev->cnf.suppress_frag_ndisc) {
+ READ_ONCE(idev->cnf.suppress_frag_ndisc)) {
net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
return true;
}
@@ -1881,8 +1895,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
idev = in6_dev_get(dev);
if (!idev)
break;
- if (idev->cnf.ndisc_notify ||
- net->ipv6.devconf_all->ndisc_notify)
+ if (READ_ONCE(idev->cnf.ndisc_notify) ||
+ READ_ONCE(net->ipv6.devconf_all->ndisc_notify))
ndisc_send_unsol_na(dev);
in6_dev_put(idev);
break;
@@ -1891,8 +1905,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
if (!idev)
evict_nocarrier = true;
else {
- evict_nocarrier = idev->cnf.ndisc_evict_nocarrier &&
- net->ipv6.devconf_all->ndisc_evict_nocarrier;
+ evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) &&
+ READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier);
in6_dev_put(idev);
}
@@ -1966,7 +1980,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer,
if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
idev->nd_parms->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
in6_dev_put(idev);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 0ba62f4868f9..f3c8e2d918e1 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -6,6 +6,10 @@
menu "IPv6: Netfilter Configuration"
depends on INET && IPV6 && NETFILTER
+# old sockopt interface and eval loop
+config IP6_NF_IPTABLES_LEGACY
+ tristate
+
config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
help
@@ -147,7 +151,7 @@ config IP6_NF_MATCH_MH
config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
- depends on IP6_NF_MANGLE || IP6_NF_RAW
+ depends on IP6_NF_MANGLE || IP6_NF_RAW || NFT_COMPAT
help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -186,6 +190,8 @@ config IP6_NF_TARGET_HL
config IP6_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
+ select IP6_NF_IPTABLES_LEGACY
+ tristate
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -195,7 +201,7 @@ config IP6_NF_FILTER
config IP6_NF_TARGET_REJECT
tristate "REJECT target support"
- depends on IP6_NF_FILTER
+ depends on IP6_NF_FILTER || NFT_COMPAT
select NF_REJECT_IPV6
default m if NETFILTER_ADVANCED=n
help
@@ -221,6 +227,7 @@ config IP6_NF_TARGET_SYNPROXY
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -230,6 +237,7 @@ config IP6_NF_MANGLE
config IP6_NF_RAW
tristate 'raw table support (required for TRACE)'
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to ip6tables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -243,6 +251,7 @@ config IP6_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -254,6 +263,7 @@ config IP6_NF_NAT
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_NAT
+ select IP6_NF_IPTABLES_LEGACY
select NETFILTER_XT_NAT
help
This enables the `nat' table in ip6tables. This allows masquerading,
@@ -262,25 +272,23 @@ config IP6_NF_NAT
To compile it as a module, choose M here. If unsure, say N.
-if IP6_NF_NAT
-
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
select NETFILTER_XT_TARGET_MASQUERADE
+ depends on IP6_NF_NAT
help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
config IP6_NF_TARGET_NPT
tristate "NPT (Network Prefix translation) target support"
+ depends on IP6_NF_NAT || NFT_COMPAT
help
This option adds the `SNPT' and `DNPT' target, which perform
stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
To compile it as a module, choose M here. If unsure, say N.
-endif # IP6_NF_NAT
-
endif # IP6_NF_IPTABLES
endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index b8d6dc9aeeb6..66ce6fa5b2f5 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -4,7 +4,7 @@
#
# Link order matters here.
-obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
+obj-$(CONFIG_IP6_NF_IPTABLES_LEGACY) += ip6_tables.o
obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b2dd48911c8d..1a51a44571c3 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -327,9 +327,9 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
if (!reasm_data)
goto err;
- payload_len = ((skb->data - skb_network_header(skb)) -
+ payload_len = -skb_network_offset(skb) -
sizeof(struct ipv6hdr) + fq->q.len -
- sizeof(struct frag_hdr));
+ sizeof(struct frag_hdr);
if (payload_len > IPV6_MAXPLEN) {
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
payload_len);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 196dd4ecb5e2..dedee264b8f6 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -83,7 +83,7 @@ struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
- net->ipv6.devconf_all->hop_limit);
+ READ_ONCE(net->ipv6.devconf_all->hop_limit));
nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen);
nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr));
@@ -124,7 +124,7 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6,
- net->ipv6.devconf_all->hop_limit);
+ READ_ONCE(net->ipv6.devconf_all->hop_limit));
skb_reset_transport_header(nskb);
icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr));
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index b5205311f372..806d4b5dd1e6 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -111,9 +111,9 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev)
- hoplimit = idev->cnf.hop_limit;
+ hoplimit = READ_ONCE(idev->cnf.hop_limit);
else
- hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
+ hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit);
rcu_read_unlock();
}
return hoplimit;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 03dbb874c363..ca49e6617afa 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -160,6 +160,13 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
if (!raw_v6_match(net, sk, nexthdr, daddr, saddr,
inet6_iif(skb), inet6_sdif(skb)))
continue;
+
+ if (atomic_read(&sk->sk_rmem_alloc) >=
+ READ_ONCE(sk->sk_rcvbuf)) {
+ atomic_inc(&sk->sk_drops);
+ continue;
+ }
+
delivered = true;
switch (nexthdr) {
case IPPROTO_ICMPV6:
@@ -288,8 +295,7 @@ out:
}
static void rawv6_err(struct sock *sk, struct sk_buff *skb,
- struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+ u8 type, u8 code, int offset, __be32 info)
{
bool recverr = inet6_test_bit(RECVERR6, sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -344,7 +350,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr,
inet6_iif(skb), inet6_iif(skb)))
continue;
- rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
+ rawv6_err(sk, skb, type, code, inner_offset, info);
}
rcu_read_unlock();
}
@@ -616,7 +622,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
-
+ skb->mono_delivery_time = !!skb->tstamp;
skb_put(skb, length);
skb_reset_network_header(skb);
iph = ipv6_hdr(skb);
@@ -935,7 +941,7 @@ do_confirm:
goto done;
}
-static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
+static int rawv6_seticmpfilter(struct sock *sk, int optname,
sockptr_t optval, int optlen)
{
switch (optname) {
@@ -952,7 +958,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
return 0;
}
-static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
+static int rawv6_geticmpfilter(struct sock *sk, int optname,
char __user *optval, int __user *optlen)
{
int len;
@@ -1038,7 +1044,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
case SOL_ICMPV6:
if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
- return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
+ return rawv6_seticmpfilter(sk, optname, optval, optlen);
case SOL_IPV6:
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
@@ -1099,7 +1105,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
case SOL_ICMPV6:
if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
- return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
+ return rawv6_geticmpfilter(sk, optname, optval, optlen);
case SOL_IPV6:
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5ebc47da1000..acb4f119e11f 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -272,9 +272,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
if (!reasm_data)
goto out_oom;
- payload_len = ((skb->data - skb_network_header(skb)) -
+ payload_len = -skb_network_offset(skb) -
sizeof(struct ipv6hdr) + fq->q.len -
- sizeof(struct frag_hdr));
+ sizeof(struct frag_hdr);
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ea1dec8448fc..1f4b935a0e57 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -645,14 +645,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
write_lock_bh(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies,
- neigh->updated + idev->cnf.rtr_probe_interval)) {
+ neigh->updated +
+ READ_ONCE(idev->cnf.rtr_probe_interval))) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (work)
__neigh_set_probe_once(neigh);
}
write_unlock_bh(&neigh->lock);
} else if (time_after(jiffies, last_probe +
- idev->cnf.rtr_probe_interval)) {
+ READ_ONCE(idev->cnf.rtr_probe_interval))) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
@@ -931,6 +932,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
struct net *net = dev_net(dev);
struct route_info *rinfo = (struct route_info *) opt;
struct in6_addr prefix_buf, *prefix;
+ struct fib6_table *table;
unsigned int pref;
unsigned long lifetime;
struct fib6_info *rt;
@@ -989,10 +991,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
(rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
if (rt) {
- if (!addrconf_finite_timeout(lifetime))
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (!addrconf_finite_timeout(lifetime)) {
fib6_clean_expires(rt);
- else
+ fib6_remove_gc_list(rt);
+ } else {
fib6_set_expires(rt, jiffies + HZ * lifetime);
+ fib6_add_gc_list(rt);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
fib6_info_release(rt);
}
@@ -1587,7 +1597,7 @@ static unsigned int fib6_mtu(const struct fib6_result *res)
rcu_read_lock();
idev = __in6_dev_get(dev);
- mtu = idev->cnf.mtu6;
+ mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
}
@@ -2085,12 +2095,12 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
*/
if (!(rt->rt6i_flags & RTF_EXPIRES)) {
if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
- RT6_TRACE("aging clone %p\n", rt);
+ pr_debug("aging clone %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
} else if (time_after(jiffies, rt->dst.expires)) {
- RT6_TRACE("purging expired route %p\n", rt);
+ pr_debug("purging expired route %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
@@ -2101,8 +2111,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
if (!(neigh && (neigh->flags & NTF_ROUTER))) {
- RT6_TRACE("purging route %p via non-router but gateway\n",
- rt);
+ pr_debug("purging route %p via non-router but gateway\n",
+ rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
@@ -2211,7 +2221,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
strict |= flags & RT6_LOOKUP_F_IFACE;
strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
- if (net->ipv6.devconf_all->forwarding == 0)
+ if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
rcu_read_lock();
@@ -3240,8 +3250,8 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
mtu = IPV6_MIN_MTU;
idev = __in6_dev_get(dev);
- if (idev && idev->cnf.mtu6 > mtu)
- mtu = idev->cnf.mtu6;
+ if (idev)
+ mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6));
}
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
@@ -3765,8 +3775,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_flags & RTF_EXPIRES)
fib6_set_expires(rt, jiffies +
clock_t_to_jiffies(cfg->fc_expires));
- else
- fib6_clean_expires(rt);
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
@@ -4142,7 +4150,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
in6_dev = __in6_dev_get(skb->dev);
if (!in6_dev)
return;
- if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+ if (READ_ONCE(in6_dev->cnf.forwarding) ||
+ !READ_ONCE(in6_dev->cnf.accept_redirects))
return;
/* RFC2461 8.1:
@@ -4355,7 +4364,8 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
struct net_device *dev,
unsigned int pref,
- u32 defrtr_usr_metric)
+ u32 defrtr_usr_metric,
+ int lifetime)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
@@ -4368,6 +4378,7 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = net,
+ .fc_expires = jiffies_to_clock_t(lifetime * HZ),
};
cfg.fc_gateway = *gwaddr;
@@ -4574,8 +4585,8 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
f6i->dst_nocount = true;
if (!anycast &&
- (net->ipv6.devconf_all->disable_policy ||
- idev->cnf.disable_policy))
+ (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
+ READ_ONCE(idev->cnf.disable_policy)))
f6i->dst_nopolicy = true;
}
@@ -5332,19 +5343,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
err = __ip6_ins_rt(nh->fib6_info, info, extack);
- fib6_info_release(nh->fib6_info);
-
- if (!err) {
- /* save reference to last route successfully inserted */
- rt_last = nh->fib6_info;
-
- /* save reference to first route for notification */
- if (!rt_notif)
- rt_notif = nh->fib6_info;
- }
- /* nh->fib6_info is used or freed at this point, reset to NULL*/
- nh->fib6_info = NULL;
if (err) {
if (replace && nhn)
NL_SET_ERR_MSG_MOD(extack,
@@ -5352,6 +5351,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = nh;
goto add_errout;
}
+ /* save reference to last route successfully inserted */
+ rt_last = nh->fib6_info;
+
+ /* save reference to first route for notification */
+ if (!rt_notif)
+ rt_notif = nh->fib6_info;
/* Because each route is added like a single route we remove
* these flags after the first nexthop: if there is a collision,
@@ -5412,8 +5417,7 @@ add_errout:
cleanup:
list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
- if (nh->fib6_info)
- fib6_info_release(nh->fib6_info);
+ fib6_info_release(nh->fib6_info);
list_del(&nh->next);
kfree(nh);
}
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 29346a6eec9f..35508abd76f4 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -512,22 +512,24 @@ int __init seg6_init(void)
{
int err;
- err = genl_register_family(&seg6_genl_family);
+ err = register_pernet_subsys(&ip6_segments_ops);
if (err)
goto out;
- err = register_pernet_subsys(&ip6_segments_ops);
+ err = genl_register_family(&seg6_genl_family);
if (err)
- goto out_unregister_genl;
+ goto out_unregister_pernet;
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
err = seg6_iptunnel_init();
if (err)
- goto out_unregister_pernet;
+ goto out_unregister_genl;
err = seg6_local_init();
- if (err)
- goto out_unregister_pernet;
+ if (err) {
+ seg6_iptunnel_exit();
+ goto out_unregister_genl;
+ }
#endif
#ifdef CONFIG_IPV6_SEG6_HMAC
@@ -548,11 +550,11 @@ out_unregister_iptun:
#endif
#endif
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
-out_unregister_pernet:
- unregister_pernet_subsys(&ip6_segments_ops);
-#endif
out_unregister_genl:
genl_unregister_family(&seg6_genl_family);
+#endif
+out_unregister_pernet:
+ unregister_pernet_subsys(&ip6_segments_ops);
goto out;
}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index d43c50a7310d..861e0366f549 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -241,6 +241,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb)
struct sr6_tlv_hmac *tlv;
struct ipv6_sr_hdr *srh;
struct inet6_dev *idev;
+ int require_hmac;
idev = __in6_dev_get(skb->dev);
@@ -248,16 +249,17 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb)
tlv = seg6_get_tlv_hmac(srh);
+ require_hmac = READ_ONCE(idev->cnf.seg6_require_hmac);
/* mandatory check but no tlv */
- if (idev->cnf.seg6_require_hmac > 0 && !tlv)
+ if (require_hmac > 0 && !tlv)
return false;
/* no check */
- if (idev->cnf.seg6_require_hmac < 0)
+ if (require_hmac < 0)
return true;
/* check only if present */
- if (idev->cnf.seg6_require_hmac == 0 && !tlv)
+ if (require_hmac == 0 && !tlv)
return true;
/* now, seg6_require_hmac >= 0 && tlv */
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cc24cefdb85c..655c9b1a19b8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1398,7 +1398,6 @@ static const struct net_device_ops ipip6_netdev_ops = {
.ndo_uninit = ipip6_tunnel_uninit,
.ndo_start_xmit = sit_tunnel_xmit,
.ndo_siocdevprivate = ipip6_tunnel_siocdevprivate,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
.ndo_tunnel_ctl = ipip6_tunnel_ctl,
};
@@ -1408,7 +1407,6 @@ static void ipip6_dev_free(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
dst_cache_destroy(&tunnel->dst_cache);
- free_percpu(dev->tstats);
}
#define SIT_FEATURES (NETIF_F_SG | \
@@ -1437,6 +1435,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->features |= NETIF_F_LLTX;
dev->features |= SIT_FEATURES;
dev->hw_features |= SIT_FEATURES;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+
}
static int ipip6_tunnel_init(struct net_device *dev)
@@ -1449,17 +1449,13 @@ static int ipip6_tunnel_init(struct net_device *dev)
strcpy(tunnel->parms.name, dev->name);
ipip6_tunnel_bind_dev(dev);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
- if (err) {
- free_percpu(dev->tstats);
- dev->tstats = NULL;
+ if (err)
return err;
- }
+
netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
}
@@ -1875,22 +1871,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit sit_exit_batch_net(struct list_head *net_list)
+static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- LIST_HEAD(list);
struct net *net;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- sit_destroy_tunnels(net, &list);
-
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ sit_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
- .exit_batch = sit_exit_batch_net,
+ .exit_batch_rtnl = sit_exit_batch_rtnl,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
};
@@ -1956,6 +1949,7 @@ xfrm_tunnel_failed:
module_init(sit_init);
module_exit(sit_cleanup);
+MODULE_DESCRIPTION("IPv6-in-IPv4 tunnel SIT driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("sit");
MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index c8d2ca27220c..8bad0a44a0a6 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -177,24 +177,33 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
struct sock *ret = sk;
__u8 rcv_wscale;
int full_space;
+ SKB_DR(reason);
if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
!th->ack || th->rst)
goto out;
- req = cookie_tcp_check(net, sk, skb);
- if (IS_ERR(req))
- goto out;
- if (!req)
+ if (cookie_bpf_ok(skb)) {
+ req = cookie_bpf_check(sk, skb);
+ } else {
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ }
+ if (!req) {
+ SKB_DR_SET(reason, NO_SOCKET);
goto out_drop;
+ }
ireq = inet_rsk(req);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (security_inet_conn_request(sk, skb, req))
+ if (security_inet_conn_request(sk, skb, req)) {
+ SKB_DR_SET(reason, SECURITY_HOOK);
goto out_free;
+ }
if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -231,8 +240,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
- if (IS_ERR(dst))
+ if (IS_ERR(dst)) {
+ SKB_DR_SET(reason, IP_OUTNOROUTES);
goto out_free;
+ }
}
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
@@ -247,14 +258,20 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
- ireq->rcv_wscale = rcv_wscale;
+ if (!req->syncookie)
+ ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok &= cookie_ecn_ok(net, dst);
ret = tcp_get_cookie_sock(sk, skb, req, dst);
+ if (!ret) {
+ SKB_DR_SET(reason, NO_SOCKET);
+ goto out_drop;
+ }
out:
return ret;
out_free:
reqsk_free(req);
out_drop:
+ kfree_skb_reason(skb, reason);
return NULL;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 57b25b1fc9d9..3f4cba49e9ee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -58,6 +58,7 @@
#include <net/timewait_sock.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
+#include <net/hotdata.h>
#include <net/busy_poll.h>
#include <linux/proc_fs.h>
@@ -1623,7 +1624,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (np->rxopt.all)
opt_skb = skb_clone_and_charge_r(skb, sk);
- reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst;
@@ -1653,12 +1653,12 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_LISTEN) {
struct sock *nsk = tcp_v6_cookie_check(sk, skb);
- if (!nsk)
- goto discard;
-
if (nsk != sk) {
- if (tcp_child_process(sk, nsk, skb))
- goto reset;
+ if (nsk) {
+ reason = tcp_child_process(sk, nsk, skb);
+ if (reason)
+ goto reset;
+ }
if (opt_skb)
__kfree_skb(opt_skb);
return 0;
@@ -1666,7 +1666,8 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
} else
sock_rps_save_rxhash(sk, skb);
- if (tcp_rcv_state_process(sk, skb))
+ reason = tcp_rcv_state_process(sk, skb);
+ if (reason)
goto reset;
if (opt_skb)
goto ipv6_pktoptions;
@@ -1856,10 +1857,12 @@ process:
if (nsk == sk) {
reqsk_put(req);
tcp_v6_restore_cb(skb);
- } else if (tcp_child_process(sk, nsk, skb)) {
- tcp_v6_send_reset(nsk, skb);
- goto discard_and_relse;
} else {
+ drop_reason = tcp_child_process(sk, nsk, skb);
+ if (drop_reason) {
+ tcp_v6_send_reset(nsk, skb);
+ goto discard_and_relse;
+ }
sock_put(sk);
return 0;
}
@@ -2365,11 +2368,6 @@ struct proto tcpv6_prot = {
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
-static const struct inet6_protocol tcpv6_protocol = {
- .handler = tcp_v6_rcv,
- .err_handler = tcp_v6_err,
- .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
-};
static struct inet_protosw tcpv6_protosw = {
.type = SOCK_STREAM,
@@ -2406,7 +2404,12 @@ int __init tcpv6_init(void)
{
int ret;
- ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ net_hotdata.tcpv6_protocol = (struct inet6_protocol) {
+ .handler = tcp_v6_rcv,
+ .err_handler = tcp_v6_err,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+ };
+ ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
if (ret)
goto out;
@@ -2431,7 +2434,7 @@ out_tcpv6_pernet_subsys:
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
out_tcpv6_protocol:
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
goto out;
}
@@ -2439,5 +2442,5 @@ void tcpv6_exit(void)
{
unregister_pernet_subsys(&tcpv6_net_ops);
inet6_unregister_protosw(&tcpv6_protosw);
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
}
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index bf0c957e4b5e..4b07d1e6c952 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -66,15 +66,15 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
return tcp_gso_segment(skb, features);
}
-static const struct net_offload tcpv6_offload = {
- .callbacks = {
- .gso_segment = tcp6_gso_segment,
- .gro_receive = tcp6_gro_receive,
- .gro_complete = tcp6_gro_complete,
- },
-};
int __init tcpv6_offload_init(void)
{
- return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
+ net_hotdata.tcpv6_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = tcp6_gso_segment,
+ .gro_receive = tcp6_gro_receive,
+ .gro_complete = tcp6_gro_complete,
+ },
+ };
+ return inet6_add_offload(&net_hotdata.tcpv6_offload, IPPROTO_TCP);
}
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 00e8d8b1c9a7..dc4ea9b11794 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -302,4 +302,5 @@ static void __exit tunnel6_fini(void)
module_init(tunnel6_init);
module_exit(tunnel6_fini);
+MODULE_DESCRIPTION("IP-in-IPv6 tunnel driver");
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3f2249b4cd5f..7c1e6469d091 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -79,9 +79,6 @@ u32 udp6_ehashfn(const struct net *net,
const struct in6_addr *faddr,
const __be16 fport)
{
- static u32 udp6_ehash_secret __read_mostly;
- static u32 udp_ipv6_hash_secret __read_mostly;
-
u32 lhash, fhash;
net_get_random_once(&udp6_ehash_secret,
@@ -1101,11 +1098,12 @@ void udp_v6_early_demux(struct sk_buff *skb)
else
return;
- if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
+ if (!sk)
return;
skb->sk = sk;
- skb->destructor = sock_efree;
+ DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk));
+ skb->destructor = sock_pfree;
dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
@@ -1702,11 +1700,6 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-static const struct inet6_protocol udpv6_protocol = {
- .handler = udpv6_rcv,
- .err_handler = udpv6_err,
- .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
-};
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
@@ -1803,7 +1796,12 @@ int __init udpv6_init(void)
{
int ret;
- ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
+ net_hotdata.udpv6_protocol = (struct inet6_protocol) {
+ .handler = udpv6_rcv,
+ .err_handler = udpv6_err,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+ };
+ ret = inet6_add_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
if (ret)
goto out;
@@ -1814,12 +1812,12 @@ out:
return ret;
out_udpv6_protocol:
- inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+ inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
goto out;
}
void udpv6_exit(void)
{
inet6_unregister_protosw(&udpv6_protosw);
- inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+ inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 6b95ba241ebe..312bcaeea96f 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -192,20 +192,19 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
}
-static const struct net_offload udpv6_offload = {
- .callbacks = {
- .gso_segment = udp6_ufo_fragment,
- .gro_receive = udp6_gro_receive,
- .gro_complete = udp6_gro_complete,
- },
-};
-
-int udpv6_offload_init(void)
+int __init udpv6_offload_init(void)
{
- return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
+ net_hotdata.udpv6_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = udp6_ufo_fragment,
+ .gro_receive = udp6_gro_receive,
+ .gro_complete = udp6_gro_complete,
+ },
+ };
+ return inet6_add_offload(&net_hotdata.udpv6_offload, IPPROTO_UDP);
}
int udpv6_offload_exit(void)
{
- return inet6_del_offload(&udpv6_offload, IPPROTO_UDP);
+ return inet6_del_offload(&net_hotdata.udpv6_offload, IPPROTO_UDP);
}
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 6e36e5047fba..a17d783dc7c0 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -43,7 +43,7 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
int xfrm6_transport_finish(struct sk_buff *skb, int async)
{
struct xfrm_offload *xo = xfrm_offload(skb);
- int nhlen = skb->data - skb_network_header(skb);
+ int nhlen = -skb_network_offset(skb);
skb_network_header(skb)[IP6CB(skb)->nhoff] =
XFRM_MODE_SKB_CB(skb)->protocol;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 1323f2f6928e..bf140ef781c1 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -355,10 +355,7 @@ static int __init xfrm6_tunnel_init(void)
{
int rv;
- xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
- sizeof(struct xfrm6_tunnel_spi),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ xfrm6_tunnel_spi_kmem = KMEM_CACHE(xfrm6_tunnel_spi, SLAB_HWCACHE_ALIGN);
if (!xfrm6_tunnel_spi_kmem)
return -ENOMEM;
rv = register_pernet_subsys(&xfrm6_tunnel_net_ops);
@@ -401,5 +398,6 @@ static void __exit xfrm6_tunnel_fini(void)
module_init(xfrm6_tunnel_init);
module_exit(xfrm6_tunnel_fini);
+MODULE_DESCRIPTION("IPv6 XFRM tunnel driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 498a0c35b7bb..4aa1c72e6c49 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1060,13 +1060,12 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
int i;
/* skip iucv_array lying in the headroom */
- iba[0].address = (u32)(addr_t)skb->data;
+ iba[0].address = (u32)virt_to_phys(skb->data);
iba[0].length = (u32)skb_headlen(skb);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- iba[i + 1].address =
- (u32)(addr_t)skb_frag_address(frag);
+ iba[i + 1].address = (u32)virt_to_phys(skb_frag_address(frag));
iba[i + 1].length = (u32)skb_frag_size(frag);
}
err = pr_iucv->message_send(iucv->path, &txmsg,
@@ -1162,13 +1161,12 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
struct iucv_array *iba = (struct iucv_array *)skb->head;
int i;
- iba[0].address = (u32)(addr_t)skb->data;
+ iba[0].address = (u32)virt_to_phys(skb->data);
iba[0].length = (u32)skb_headlen(skb);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- iba[i + 1].address =
- (u32)(addr_t)skb_frag_address(frag);
+ iba[i + 1].address = (u32)virt_to_phys(skb_frag_address(frag));
iba[i + 1].length = (u32)skb_frag_size(frag);
}
rc = pr_iucv->message_receive(path, msg,
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 6334f64f04d5..5b56ae6612dd 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID";
static LIST_HEAD(iucv_handler_list);
/*
- * iucv_path_table: an array of iucv_path structures.
+ * iucv_path_table: array of pointers to iucv_path structures.
*/
static struct iucv_path **iucv_path_table;
static unsigned long iucv_max_pathid;
@@ -286,6 +286,7 @@ static union iucv_param *iucv_param_irq[NR_CPUS];
*/
static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
{
+ unsigned long reg1 = virt_to_phys(parm);
int cc;
asm volatile(
@@ -296,7 +297,7 @@ static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
" srl %[cc],28\n"
: [cc] "=&d" (cc), "+m" (*parm)
: [reg0] "d" ((unsigned long)command),
- [reg1] "d" ((unsigned long)parm)
+ [reg1] "d" (reg1)
: "cc", "0", "1");
return cc;
}
@@ -544,7 +545,7 @@ static int iucv_enable(void)
cpus_read_lock();
rc = -ENOMEM;
- alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
+ alloc_size = iucv_max_pathid * sizeof(*iucv_path_table);
iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
if (!iucv_path_table)
goto out;
@@ -1123,7 +1124,7 @@ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
parm = iucv_param[smp_processor_id()];
memset(parm, 0, sizeof(union iucv_param));
- parm->db.ipbfadr1 = (u32)(addr_t) buffer;
+ parm->db.ipbfadr1 = (u32)virt_to_phys(buffer);
parm->db.ipbfln1f = (u32) size;
parm->db.ipmsgid = msg->id;
parm->db.ippathid = path->pathid;
@@ -1241,7 +1242,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
parm->dpl.iptrgcls = msg->class;
memcpy(parm->dpl.iprmmsg, reply, min_t(size_t, size, 8));
} else {
- parm->db.ipbfadr1 = (u32)(addr_t) reply;
+ parm->db.ipbfadr1 = (u32)virt_to_phys(reply);
parm->db.ipbfln1f = (u32) size;
parm->db.ippathid = path->pathid;
parm->db.ipflags1 = flags;
@@ -1293,7 +1294,7 @@ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
parm->dpl.ipmsgtag = msg->tag;
memcpy(parm->dpl.iprmmsg, buffer, 8);
} else {
- parm->db.ipbfadr1 = (u32)(addr_t) buffer;
+ parm->db.ipbfadr1 = (u32)virt_to_phys(buffer);
parm->db.ipbfln1f = (u32) size;
parm->db.ippathid = path->pathid;
parm->db.ipflags1 = flags | IUCV_IPNORPY;
@@ -1378,7 +1379,7 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
parm->dpl.iptrgcls = msg->class;
parm->dpl.ipsrccls = srccls;
parm->dpl.ipmsgtag = msg->tag;
- parm->dpl.ipbfadr2 = (u32)(addr_t) answer;
+ parm->dpl.ipbfadr2 = (u32)virt_to_phys(answer);
parm->dpl.ipbfln2f = (u32) asize;
memcpy(parm->dpl.iprmmsg, buffer, 8);
} else {
@@ -1387,9 +1388,9 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
parm->db.iptrgcls = msg->class;
parm->db.ipsrccls = srccls;
parm->db.ipmsgtag = msg->tag;
- parm->db.ipbfadr1 = (u32)(addr_t) buffer;
+ parm->db.ipbfadr1 = (u32)virt_to_phys(buffer);
parm->db.ipbfln1f = (u32) size;
- parm->db.ipbfadr2 = (u32)(addr_t) answer;
+ parm->db.ipbfadr2 = (u32)virt_to_phys(answer);
parm->db.ipbfln2f = (u32) asize;
}
rc = iucv_call_b2f0(IUCV_SEND, parm);
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 1184d40167b8..2f191e50d4fc 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -627,7 +627,8 @@ retry:
skb = txm->frag_skb;
}
- if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
+ if (WARN_ON(!skb_shinfo(skb)->nr_frags) ||
+ WARN_ON_ONCE(!skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
ret = -EINVAL;
goto out;
}
@@ -637,8 +638,8 @@ retry:
msize += skb_frag_size(&skb_shinfo(skb)->frags[i]);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE,
- skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags,
- msize);
+ (const struct bio_vec *)skb_shinfo(skb)->frags,
+ skb_shinfo(skb)->nr_frags, msize);
iov_iter_advance(&msg.msg_iter, txm->frag_offset);
do {
@@ -1152,10 +1153,11 @@ static int kcm_getsockopt(struct socket *sock, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
switch (optname) {
case KCM_RECV_DISABLE:
val = kcm->rx_disabled;
@@ -1877,15 +1879,11 @@ static int __init kcm_init(void)
{
int err = -ENOMEM;
- kcm_muxp = kmem_cache_create("kcm_mux_cache",
- sizeof(struct kcm_mux), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ kcm_muxp = KMEM_CACHE(kcm_mux, SLAB_HWCACHE_ALIGN);
if (!kcm_muxp)
goto fail;
- kcm_psockp = kmem_cache_create("kcm_psock_cache",
- sizeof(struct kcm_psock), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ kcm_psockp = KMEM_CACHE(kcm_psock, SLAB_HWCACHE_ALIGN);
if (!kcm_psockp)
goto fail;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d68d01804dc7..f79fb99271ed 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3924,5 +3924,6 @@ out_unregister_key_proto:
module_init(ipsec_pfkey_init);
module_exit(ipsec_pfkey_exit);
+MODULE_DESCRIPTION("PF_KEY socket helpers");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_KEY);
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 25ca89f80414..39e487ccc468 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -100,7 +100,7 @@ static const struct net_device_ops l2tp_eth_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
};
-static struct device_type l2tpeth_type = {
+static const struct device_type l2tpeth_type = {
.name = "l2tpeth",
};
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 9a2a9ed3ba47..970af3983d11 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -478,7 +478,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rt = ip_route_output_ports(sock_net(sk), fl4, sk,
daddr, inet->inet_saddr,
inet->inet_dport, inet->inet_sport,
- sk->sk_protocol, RT_CONN_FLAGS(sk),
+ sk->sk_protocol, ip_sock_rt_tos(sk),
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index dd3153966173..7bf14cf9ffaa 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -627,7 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
back_from_confirm:
lock_sock(sk);
- ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0;
+ ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f011af6601c9..6146e4e67bbb 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1356,11 +1356,11 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
-
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
err = -ENOTCONN;
if (!sk->sk_user_data)
goto end;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 9b06c380866b..fde1140d899e 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock)
}
netdev_put(llc->dev, &llc->dev_tracker);
sock_put(sk);
+ sock_orphan(sk);
+ sock->sk = NULL;
llc_sk_free(sk);
out:
return 0;
@@ -928,14 +930,15 @@ copy_uaddr:
*/
static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
+ DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
- DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
int flags = msg->msg_flags;
int noblock = flags & MSG_DONTWAIT;
+ int rc = -EINVAL, copied = 0, hdrlen, hh_len;
struct sk_buff *skb = NULL;
+ struct net_device *dev;
size_t size = 0;
- int rc = -EINVAL, copied = 0, hdrlen;
dprintk("%s: sending from %02X to %02X\n", __func__,
llc->laddr.lsap, llc->daddr.lsap);
@@ -955,22 +958,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (rc)
goto out;
}
- hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
+ dev = llc->dev;
+ hh_len = LL_RESERVED_SPACE(dev);
+ hdrlen = llc_ui_header_len(sk, addr);
size = hdrlen + len;
- if (size > llc->dev->mtu)
- size = llc->dev->mtu;
+ size = min_t(size_t, size, READ_ONCE(dev->mtu));
copied = size - hdrlen;
rc = -EINVAL;
if (copied < 0)
goto out;
release_sock(sk);
- skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+ skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc);
lock_sock(sk);
if (!skb)
goto out;
- skb->dev = llc->dev;
+ if (sock_flag(sk, SOCK_ZAPPED) ||
+ llc->dev != dev ||
+ hdrlen != llc_ui_header_len(sk, addr) ||
+ hh_len != LL_RESERVED_SPACE(dev) ||
+ size > READ_ONCE(dev->mtu))
+ goto out;
+ skb->dev = dev;
skb->protocol = llc_proto_type(addr->sllc_arphrd);
- skb_reserve(skb, hdrlen);
+ skb_reserve(skb, hh_len + hdrlen);
rc = memcpy_from_msg(skb_put(skb, copied), msg, copied);
if (rc)
goto out;
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 6e387aadffce..4f16d9c88350 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = {
.func = llc_rcv,
};
-static struct packet_type llc_tr_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_TR_802_2),
- .func = llc_rcv,
-};
-
static int __init llc_init(void)
{
dev_add_pack(&llc_packet_type);
- dev_add_pack(&llc_tr_packet_type);
return 0;
}
static void __exit llc_exit(void)
{
dev_remove_pack(&llc_packet_type);
- dev_remove_pack(&llc_tr_packet_type);
}
module_init(llc_init);
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index cb0291decf2e..13438cc0a6b1 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -62,7 +62,6 @@ config MAC80211_KUNIT_TEST
depends on KUNIT
depends on MAC80211
default KUNIT_ALL_TESTS
- depends on !KERNEL_6_2
help
Enable this option to test mac80211 internals with kunit.
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 4406b4f8f3b9..a33884967f21 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -29,7 +29,7 @@ mac80211-y := \
spectmgmt.o \
tx.o \
key.o \
- util.o \
+ util.o parse.o \
wme.o \
chan.o \
trace.o mlme.o \
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index b8a278355e18..21d55dc539f6 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -616,7 +616,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
return -EINVAL;
if (!pubsta->deflink.ht_cap.ht_supported &&
- sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ)
+ sta->sdata->vif.bss_conf.chanreq.oper.chan->band != NL80211_BAND_6GHZ)
return -EINVAL;
if (WARN_ON_ONCE(!local->ops->ampdu_action))
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 489dd97f5172..f03452dc716d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -886,33 +886,32 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
{
struct ieee80211_local *local = wiphy_priv(wiphy);
struct ieee80211_sub_if_data *sdata;
- int ret = 0;
+ struct ieee80211_chan_req chanreq = { .oper = *chandef };
+ int ret;
lockdep_assert_wiphy(local->hw.wiphy);
- if (cfg80211_chandef_identical(&local->monitor_chandef, chandef))
+ if (cfg80211_chandef_identical(&local->monitor_chanreq.oper,
+ &chanreq.oper))
return 0;
- if (local->use_chanctx) {
- sdata = wiphy_dereference(local->hw.wiphy,
- local->monitor_sdata);
- if (sdata) {
- ieee80211_link_release_channel(&sdata->deflink);
- ret = ieee80211_link_use_channel(&sdata->deflink,
- chandef,
- IEEE80211_CHANCTX_EXCLUSIVE);
- }
- } else {
- if (local->open_count == local->monitors) {
- local->_oper_chandef = *chandef;
- ieee80211_hw_config(local, 0);
- }
- }
+ sdata = wiphy_dereference(local->hw.wiphy,
+ local->monitor_sdata);
+ if (!sdata)
+ goto done;
- if (ret == 0)
- local->monitor_chandef = *chandef;
+ if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper,
+ &chanreq.oper))
+ return 0;
- return ret;
+ ieee80211_link_release_channel(&sdata->deflink);
+ ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
+ IEEE80211_CHANCTX_EXCLUSIVE);
+ if (ret)
+ return ret;
+done:
+ local->monitor_chanreq = chanreq;
+ return 0;
}
static int
@@ -953,7 +952,8 @@ ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata,
struct cfg80211_fils_discovery *params,
struct ieee80211_link_data *link,
- struct ieee80211_bss_conf *link_conf)
+ struct ieee80211_bss_conf *link_conf,
+ u64 *changed)
{
struct fils_discovery_data *new, *old = NULL;
struct ieee80211_fils_discovery *fd;
@@ -980,14 +980,16 @@ static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata,
RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL);
}
- return BSS_CHANGED_FILS_DISCOVERY;
+ *changed |= BSS_CHANGED_FILS_DISCOVERY;
+ return 0;
}
static int
ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
struct cfg80211_unsol_bcast_probe_resp *params,
struct ieee80211_link_data *link,
- struct ieee80211_bss_conf *link_conf)
+ struct ieee80211_bss_conf *link_conf,
+ u64 *changed)
{
struct unsol_bcast_probe_resp_data *new, *old = NULL;
@@ -1011,7 +1013,8 @@ ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL);
}
- return BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
+ *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
+ return 0;
}
static int ieee80211_set_ftm_responder_params(
@@ -1238,6 +1241,30 @@ ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
return 0;
}
+static u8 ieee80211_num_beaconing_links(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_link_data *link;
+ u8 link_id, num = 0;
+
+ if (sdata->vif.type != NL80211_IFTYPE_AP &&
+ sdata->vif.type != NL80211_IFTYPE_P2P_GO)
+ return num;
+
+ if (!sdata->vif.valid_links)
+ return num;
+
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ continue;
+
+ if (sdata_dereference(link->u.ap.beacon, sdata))
+ num++;
+ }
+
+ return num;
+}
+
static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_ap_settings *params)
{
@@ -1256,6 +1283,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
unsigned int link_id = params->beacon.link_id;
struct ieee80211_link_data *link;
struct ieee80211_bss_conf *link_conf;
+ struct ieee80211_chan_req chanreq = { .oper = params->chandef };
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1339,8 +1367,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
return -EOPNOTSUPP;
link_conf->eht_support = true;
- link_conf->eht_puncturing = params->punct_bitmap;
- changed |= BSS_CHANGED_EHT_PUNCTURING;
link_conf->eht_su_beamformer =
params->eht_cap->fixed.phy_cap_info[0] &
@@ -1368,7 +1394,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
return err;
}
- err = ieee80211_link_use_channel(link, &params->chandef,
+ err = ieee80211_link_use_channel(link, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (!err)
ieee80211_link_copy_chanctx_to_vlans(link, false);
@@ -1443,17 +1469,15 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
goto error;
err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
goto error;
- changed |= err;
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
&params->unsol_bcast_probe_resp,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
goto error;
- changed |= err;
err = drv_start_ap(sdata->local, sdata, link_conf);
if (err) {
@@ -1470,7 +1494,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_SSID);
ieee80211_link_info_change_notify(sdata, link, changed);
- netif_carrier_on(dev);
+ if (ieee80211_num_beaconing_links(sdata) <= 1)
+ netif_carrier_on(dev);
+
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
netif_carrier_on(vlan->dev);
@@ -1518,17 +1544,15 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
return err;
err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
return err;
- changed |= err;
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
&params->unsol_bcast_probe_resp,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
return err;
- changed |= err;
if (beacon->he_bss_color_valid &&
beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) {
@@ -1565,6 +1589,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_link_data *link =
sdata_dereference(sdata->link[link_id], sdata);
struct ieee80211_bss_conf *link_conf = link->conf;
+ LIST_HEAD(keys);
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1582,10 +1607,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
/* abort any running channel switch or color change */
link_conf->csa_active = false;
link_conf->color_change_active = false;
- if (link->csa_block_tx) {
+ if (sdata->csa_blocked_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- link->csa_block_tx = false;
+ sdata->csa_blocked_tx = false;
}
ieee80211_free_next_beacon(link);
@@ -1593,7 +1618,9 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
/* turn off carrier for this interface and dependent VLANs */
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
netif_carrier_off(vlan->dev);
- netif_carrier_off(dev);
+
+ if (ieee80211_num_beaconing_links(sdata) <= 1)
+ netif_carrier_off(dev);
/* remove beacon and probe response */
sdata->u.ap.active = false;
@@ -1618,8 +1645,13 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
link_conf->ema_ap = false;
link_conf->bssid_indicator = 0;
- __sta_info_flush(sdata, true);
- ieee80211_free_keys(sdata, true);
+ __sta_info_flush(sdata, true, link_id);
+
+ ieee80211_remove_link_keys(link, &keys);
+ if (!list_empty(&keys)) {
+ synchronize_net();
+ ieee80211_free_key_list(local, &keys);
+ }
link_conf->enable_beacon = false;
sdata->beacon_rate_set = false;
@@ -1629,7 +1661,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
BSS_CHANGED_BEACON_ENABLED);
if (sdata->wdev.cac_started) {
- chandef = link_conf->chandef;
+ chandef = link_conf->chanreq.oper;
wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
@@ -1829,7 +1861,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
if (params->supported_rates &&
params->supported_rates_len) {
- ieee80211_parse_bitrates(link->conf->chandef.width,
+ ieee80211_parse_bitrates(link->conf->chanreq.oper.width,
sband, params->supported_rates,
params->supported_rates_len,
&link_sta->pub->supp_rates[sband->band]);
@@ -1869,6 +1901,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
sband->band);
}
+ ieee80211_sta_init_nss(link_sta);
+
return ret;
}
@@ -1942,6 +1976,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
clear_sta_flag(sta, WLAN_STA_TDLS_PEER);
}
+ if (mask & BIT(NL80211_STA_FLAG_SPP_AMSDU))
+ sta->sta.spp_amsdu = set & BIT(NL80211_STA_FLAG_SPP_AMSDU);
+
/* mark TDLS channel switch support, if the AP allows it */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
!sdata->deflink.u.mgd.tdls_chan_switch_prohibited &&
@@ -2093,7 +2130,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
if (params->mac)
return sta_info_destroy_addr_bss(sdata, params->mac);
- sta_info_flush(sdata);
+ sta_info_flush(sdata, params->link_id);
return 0;
}
@@ -2600,6 +2637,7 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
const struct mesh_setup *setup)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_chan_req chanreq = { .oper = setup->chandef };
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
int err;
@@ -2616,7 +2654,7 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
sdata->deflink.needed_rx_chains = sdata->local->rx_chains;
- err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef,
+ err = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (err)
return err;
@@ -2659,7 +2697,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
return -EINVAL;
if (params->basic_rates) {
- if (!ieee80211_parse_bitrates(link->conf->chandef.width,
+ if (!ieee80211_parse_bitrates(link->conf->chanreq.oper.width,
wiphy->bands[sband->band],
params->basic_rates,
params->basic_rates_len,
@@ -3081,7 +3119,7 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy,
if (local->ops->get_txpower)
return drv_get_txpower(local, sdata, dbm);
- if (!local->use_chanctx)
+ if (local->emulate_chanctx)
*dbm = local->hw.conf.power_level;
else
*dbm = sdata->vif.bss_conf.txpower;
@@ -3151,8 +3189,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION))
return -EINVAL;
- if (ieee80211_vif_is_mld(&sdata->vif) &&
- !(sdata->vif.active_links & BIT(link->link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id))
return 0;
old_req = link->u.mgd.req_smps;
@@ -3174,7 +3211,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
* the new value until we associate.
*/
if (!sdata->u.mgd.associated ||
- link->conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
+ link->conf->chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT)
return 0;
ap = sdata->vif.cfg.ap_addr;
@@ -3205,7 +3242,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
if (err)
link->u.mgd.req_smps = old_req;
else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found)
- ieee80211_teardown_tdls_peers(sdata);
+ ieee80211_teardown_tdls_peers(link);
return err;
}
@@ -3252,33 +3289,57 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
return 0;
}
+static void ieee80211_set_cqm_rssi_link(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
+ s32 rssi_thold, u32 rssi_hyst,
+ s32 rssi_low, s32 rssi_high)
+{
+ struct ieee80211_bss_conf *conf;
+
+ if (!link || !link->conf)
+ return;
+
+ conf = link->conf;
+
+ if (rssi_thold && rssi_hyst &&
+ rssi_thold == conf->cqm_rssi_thold &&
+ rssi_hyst == conf->cqm_rssi_hyst)
+ return;
+
+ conf->cqm_rssi_thold = rssi_thold;
+ conf->cqm_rssi_hyst = rssi_hyst;
+ conf->cqm_rssi_low = rssi_low;
+ conf->cqm_rssi_high = rssi_high;
+ link->u.mgd.last_cqm_event_signal = 0;
+
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id))
+ return;
+
+ if (sdata->u.mgd.associated &&
+ (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI))
+ ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_CQM);
+}
+
static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
struct net_device *dev,
s32 rssi_thold, u32 rssi_hyst)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_vif *vif = &sdata->vif;
- struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+ int link_id;
- if (rssi_thold == bss_conf->cqm_rssi_thold &&
- rssi_hyst == bss_conf->cqm_rssi_hyst)
- return 0;
-
- if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER &&
- !(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI))
+ if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER &&
+ !(vif->driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI))
return -EOPNOTSUPP;
- bss_conf->cqm_rssi_thold = rssi_thold;
- bss_conf->cqm_rssi_hyst = rssi_hyst;
- bss_conf->cqm_rssi_low = 0;
- bss_conf->cqm_rssi_high = 0;
- sdata->deflink.u.mgd.last_cqm_event_signal = 0;
+ /* For MLD, handle CQM change on all the active links */
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ struct ieee80211_link_data *link =
+ sdata_dereference(sdata->link[link_id], sdata);
- /* tell the driver upon association, unless already associated */
- if (sdata->u.mgd.associated &&
- sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
- BSS_CHANGED_CQM);
+ ieee80211_set_cqm_rssi_link(sdata, link, rssi_thold, rssi_hyst,
+ 0, 0);
+ }
return 0;
}
@@ -3289,22 +3350,19 @@ static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_vif *vif = &sdata->vif;
- struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+ int link_id;
- if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
+ if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER)
return -EOPNOTSUPP;
- bss_conf->cqm_rssi_low = rssi_low;
- bss_conf->cqm_rssi_high = rssi_high;
- bss_conf->cqm_rssi_thold = 0;
- bss_conf->cqm_rssi_hyst = 0;
- sdata->deflink.u.mgd.last_cqm_event_signal = 0;
+ /* For MLD, handle CQM change on all the active links */
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ struct ieee80211_link_data *link =
+ sdata_dereference(sdata->link[link_id], sdata);
- /* tell the driver upon association, unless already associated */
- if (sdata->u.mgd.associated &&
- sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
- BSS_CHANGED_CQM);
+ ieee80211_set_cqm_rssi_link(sdata, link, 0, 0,
+ rssi_low, rssi_high);
+ }
return 0;
}
@@ -3329,9 +3387,11 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
* so at a basic rate so that all clients can receive it.
*/
if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) &&
- sdata->vif.bss_conf.chandef.chan) {
+ sdata->vif.bss_conf.chanreq.oper.chan) {
u32 basic_rates = sdata->vif.bss_conf.basic_rates;
- enum nl80211_band band = sdata->vif.bss_conf.chandef.chan->band;
+ enum nl80211_band band;
+
+ band = sdata->vif.bss_conf.chanreq.oper.chan->band;
if (!(mask->control[band].legacy & basic_rates))
return -EINVAL;
@@ -3383,6 +3443,7 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
u32 cac_time_ms)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_chan_req chanreq = { .oper = *chandef };
struct ieee80211_local *local = sdata->local;
int err;
@@ -3397,7 +3458,7 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
sdata->deflink.needed_rx_chains = local->rx_chains;
- err = ieee80211_link_use_channel(&sdata->deflink, chandef,
+ err = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (err)
goto out_unlock;
@@ -3540,13 +3601,24 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
return new_beacon;
}
-void ieee80211_csa_finish(struct ieee80211_vif *vif)
+void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_link_data *link_data;
+
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return;
rcu_read_lock();
+ link_data = rcu_dereference(sdata->link[link_id]);
+ if (WARN_ON(!link_data)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ /* TODO: MBSSID with MLO changes */
if (vif->mbssid_tx_vif == vif) {
/* Trigger ieee80211_csa_finish() on the non-transmitting
* interfaces when channel switch is received on
@@ -3565,7 +3637,7 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif)
&iter->deflink.csa_finalize_work);
}
}
- wiphy_work_queue(local->hw.wiphy, &sdata->deflink.csa_finalize_work);
+ wiphy_work_queue(local->hw.wiphy, &link_data->csa_finalize_work);
rcu_read_unlock();
}
@@ -3577,26 +3649,27 @@ void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_t
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
- sdata->deflink.csa_block_tx = block_tx;
+ sdata->csa_blocked_tx = block_tx;
sdata_info(sdata, "channel switch failed, disconnecting\n");
wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work);
}
EXPORT_SYMBOL(ieee80211_channel_switch_disconnect);
-static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
+static int ieee80211_set_after_csa_beacon(struct ieee80211_link_data *link_data,
u64 *changed)
{
+ struct ieee80211_sub_if_data *sdata = link_data->sdata;
int err;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- if (!sdata->deflink.u.ap.next_beacon)
+ if (!link_data->u.ap.next_beacon)
return -EINVAL;
- err = ieee80211_assign_beacon(sdata, &sdata->deflink,
- sdata->deflink.u.ap.next_beacon,
+ err = ieee80211_assign_beacon(sdata, link_data,
+ link_data->u.ap.next_beacon,
NULL, NULL, changed);
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link_data);
if (err < 0)
return err;
@@ -3625,6 +3698,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
{
struct ieee80211_sub_if_data *sdata = link_data->sdata;
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_bss_conf *link_conf = link_data->conf;
u64 changed = 0;
int err;
@@ -3646,40 +3720,33 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
if (link_data->reserved_ready)
return 0;
- return ieee80211_link_use_reserved_context(&sdata->deflink);
+ return ieee80211_link_use_reserved_context(link_data);
}
- if (!cfg80211_chandef_identical(&link_data->conf->chandef,
- &link_data->csa_chandef))
+ if (!cfg80211_chandef_identical(&link_conf->chanreq.oper,
+ &link_data->csa_chanreq.oper))
return -EINVAL;
- sdata->vif.bss_conf.csa_active = false;
+ link_conf->csa_active = false;
- err = ieee80211_set_after_csa_beacon(sdata, &changed);
+ err = ieee80211_set_after_csa_beacon(link_data, &changed);
if (err)
return err;
- if (sdata->vif.bss_conf.eht_puncturing != sdata->vif.bss_conf.csa_punct_bitmap) {
- sdata->vif.bss_conf.eht_puncturing =
- sdata->vif.bss_conf.csa_punct_bitmap;
- changed |= BSS_CHANGED_EHT_PUNCTURING;
- }
-
ieee80211_link_info_change_notify(sdata, link_data, changed);
- if (link_data->csa_block_tx) {
+ if (sdata->csa_blocked_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- link_data->csa_block_tx = false;
+ sdata->csa_blocked_tx = false;
}
err = drv_post_channel_switch(link_data);
if (err)
return err;
- cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chandef,
- link_data->link_id,
- link_data->conf->eht_puncturing);
+ cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chanreq.oper,
+ link_data->link_id);
return 0;
}
@@ -3689,7 +3756,8 @@ static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
struct ieee80211_sub_if_data *sdata = link_data->sdata;
if (__ieee80211_csa_finalize(link_data)) {
- sdata_info(sdata, "failed to finalize CSA, disconnecting\n");
+ sdata_info(sdata, "failed to finalize CSA on link %d, disconnecting\n",
+ link_data->link_id);
cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev,
GFP_KERNEL);
}
@@ -3714,18 +3782,19 @@ void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work)
ieee80211_csa_finalize(link);
}
-static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
+static int ieee80211_set_csa_beacon(struct ieee80211_link_data *link_data,
struct cfg80211_csa_settings *params,
u64 *changed)
{
+ struct ieee80211_sub_if_data *sdata = link_data->sdata;
struct ieee80211_csa_settings csa = {};
int err;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- sdata->deflink.u.ap.next_beacon =
+ link_data->u.ap.next_beacon =
cfg80211_beacon_dup(&params->beacon_after);
- if (!sdata->deflink.u.ap.next_beacon)
+ if (!link_data->u.ap.next_beacon)
return -ENOMEM;
/*
@@ -3751,7 +3820,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
IEEE80211_MAX_CNTDWN_COUNTERS_NUM) ||
(params->n_counter_offsets_presp >
IEEE80211_MAX_CNTDWN_COUNTERS_NUM)) {
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link_data);
return -EINVAL;
}
@@ -3761,11 +3830,11 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
csa.n_counter_offsets_presp = params->n_counter_offsets_presp;
csa.count = params->count;
- err = ieee80211_assign_beacon(sdata, &sdata->deflink,
+ err = ieee80211_assign_beacon(sdata, link_data,
&params->beacon_csa, &csa,
NULL, changed);
if (err < 0) {
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link_data);
return err;
}
@@ -3812,7 +3881,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
/* changes into another band are not supported */
- if (sdata->vif.bss_conf.chandef.chan->band !=
+ if (sdata->vif.bss_conf.chanreq.oper.chan->band !=
params->chandef.chan->band)
return -EINVAL;
@@ -3860,11 +3929,17 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_chan_req chanreq = { .oper = params->chandef };
struct ieee80211_local *local = sdata->local;
- struct ieee80211_channel_switch ch_switch;
+ struct ieee80211_channel_switch ch_switch = {
+ .link_id = params->link_id,
+ };
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *chanctx;
+ struct ieee80211_bss_conf *link_conf;
+ struct ieee80211_link_data *link_data;
u64 changed = 0;
+ u8 link_id = params->link_id;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -3875,16 +3950,23 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
if (sdata->wdev.cac_started)
return -EBUSY;
- if (cfg80211_chandef_identical(&params->chandef,
- &sdata->vif.bss_conf.chandef))
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return -EINVAL;
+
+ link_data = wiphy_dereference(wiphy, sdata->link[link_id]);
+ if (!link_data)
+ return -ENOLINK;
+
+ link_conf = link_data->conf;
+
+ if (chanreq.oper.punctured && !link_conf->eht_support)
return -EINVAL;
/* don't allow another channel switch if one is already active. */
- if (sdata->vif.bss_conf.csa_active)
+ if (link_conf->csa_active)
return -EBUSY;
- conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf,
- lockdep_is_held(&local->hw.wiphy->mtx));
+ conf = wiphy_dereference(wiphy, link_conf->chanctx_conf);
if (!conf) {
err = -EBUSY;
goto out;
@@ -3901,14 +3983,14 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
ch_switch.timestamp = 0;
ch_switch.device_timestamp = 0;
ch_switch.block_tx = params->block_tx;
- ch_switch.chandef = params->chandef;
+ ch_switch.chandef = chanreq.oper;
ch_switch.count = params->count;
err = drv_pre_channel_switch(sdata, &ch_switch);
if (err)
goto out;
- err = ieee80211_link_reserve_chanctx(&sdata->deflink, &params->chandef,
+ err = ieee80211_link_reserve_chanctx(link_data, &chanreq,
chanctx->mode,
params->radar_required);
if (err)
@@ -3917,44 +3999,40 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
/* if reservation is invalid then this will fail */
err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0);
if (err) {
- ieee80211_link_unreserve_chanctx(&sdata->deflink);
+ ieee80211_link_unreserve_chanctx(link_data);
goto out;
}
/* if there is a color change in progress, abort it */
- if (sdata->vif.bss_conf.color_change_active)
+ if (link_conf->color_change_active)
ieee80211_color_change_abort(sdata);
- err = ieee80211_set_csa_beacon(sdata, params, &changed);
+ err = ieee80211_set_csa_beacon(link_data, params, &changed);
if (err) {
- ieee80211_link_unreserve_chanctx(&sdata->deflink);
+ ieee80211_link_unreserve_chanctx(link_data);
goto out;
}
- if (params->punct_bitmap && !sdata->vif.bss_conf.eht_support)
- goto out;
+ link_data->csa_chanreq = chanreq;
+ link_conf->csa_active = true;
- sdata->deflink.csa_chandef = params->chandef;
- sdata->deflink.csa_block_tx = params->block_tx;
- sdata->vif.bss_conf.csa_active = true;
- sdata->vif.bss_conf.csa_punct_bitmap = params->punct_bitmap;
-
- if (sdata->deflink.csa_block_tx)
+ if (params->block_tx &&
+ !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) {
ieee80211_stop_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_blocked_tx = true;
+ }
cfg80211_ch_switch_started_notify(sdata->dev,
- &sdata->deflink.csa_chandef, 0,
- params->count, params->block_tx,
- sdata->vif.bss_conf.csa_punct_bitmap);
+ &link_data->csa_chanreq.oper, 0,
+ params->count, params->block_tx);
if (changed) {
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
- changed);
- drv_channel_switch_beacon(sdata, &params->chandef);
+ ieee80211_link_info_change_notify(sdata, link_data, changed);
+ drv_channel_switch_beacon(sdata, &link_data->csa_chanreq.oper);
} else {
/* if the beacon didn't change, we can finalize immediately */
- ieee80211_csa_finalize(&sdata->deflink);
+ ieee80211_csa_finalize(link_data);
}
out:
@@ -4204,15 +4282,12 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
if (chanctx_conf) {
- *chandef = link->conf->chandef;
+ *chandef = link->conf->chanreq.oper;
ret = 0;
} else if (local->open_count > 0 &&
local->open_count == local->monitors &&
sdata->vif.type == NL80211_IFTYPE_MONITOR) {
- if (local->use_chanctx)
- *chandef = local->monitor_chandef;
- else
- *chandef = local->_oper_chandef;
+ *chandef = local->monitor_chanreq.oper;
ret = 0;
}
out:
@@ -4260,12 +4335,13 @@ static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_link_data *link;
+ struct ieee80211_chan_req chanreq = { .oper = *chandef };
int ret;
u64 changed = 0;
link = sdata_dereference(sdata->link[link_id], sdata);
- ret = ieee80211_link_change_bandwidth(link, chandef, &changed);
+ ret = ieee80211_link_change_chanreq(link, &chanreq, &changed);
if (ret == 0)
ieee80211_link_info_change_notify(sdata, link, changed);
@@ -4747,7 +4823,7 @@ EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
void
ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap, gfp_t gfp)
+ u64 color_bitmap)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_link_data *link = &sdata->deflink;
@@ -4966,6 +5042,17 @@ static int ieee80211_set_hw_timestamp(struct wiphy *wiphy,
return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts);
}
+static int
+ieee80211_set_ttlm(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_ttlm_params *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
+ return ieee80211_req_neg_ttlm(sdata, params);
+}
+
const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
@@ -5078,4 +5165,5 @@ const struct cfg80211_ops mac80211_config_ops = {
.mod_link_station = ieee80211_mod_link_station,
.del_link_station = ieee80211_del_link_station,
.set_hw_timestamp = ieee80211_set_hw_timestamp,
+ .set_ttlm = ieee80211_set_ttlm,
};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index ef4c2cebc080..80e4b9784131 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* mac80211 - channel management
- * Copyright 2020 - 2022 Intel Corporation
+ * Copyright 2020 - 2024 Intel Corporation
*/
#include <linux/nl80211.h>
@@ -81,87 +81,122 @@ ieee80211_link_get_chanctx(struct ieee80211_link_data *link)
return container_of(conf, struct ieee80211_chanctx, conf);
}
-static const struct cfg80211_chan_def *
-ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local,
+bool ieee80211_chanreq_identical(const struct ieee80211_chan_req *a,
+ const struct ieee80211_chan_req *b)
+{
+ if (!cfg80211_chandef_identical(&a->oper, &b->oper))
+ return false;
+ if (!a->ap.chan && !b->ap.chan)
+ return true;
+ return cfg80211_chandef_identical(&a->ap, &b->ap);
+}
+
+static const struct ieee80211_chan_req *
+ieee80211_chanreq_compatible(const struct ieee80211_chan_req *a,
+ const struct ieee80211_chan_req *b,
+ struct ieee80211_chan_req *tmp)
+{
+ const struct cfg80211_chan_def *compat;
+
+ if (a->ap.chan && b->ap.chan &&
+ !cfg80211_chandef_identical(&a->ap, &b->ap))
+ return NULL;
+
+ compat = cfg80211_chandef_compatible(&a->oper, &b->oper);
+ if (!compat)
+ return NULL;
+
+ /* Note: later code assumes this always fills & returns tmp if compat */
+ tmp->oper = *compat;
+ tmp->ap = a->ap.chan ? a->ap : b->ap;
+ return tmp;
+}
+
+static const struct ieee80211_chan_req *
+ieee80211_chanctx_compatible(struct ieee80211_chanctx *ctx,
+ const struct ieee80211_chan_req *req,
+ struct ieee80211_chan_req *tmp)
+{
+ const struct ieee80211_chan_req *ret;
+ struct ieee80211_chan_req tmp2;
+
+ *tmp = (struct ieee80211_chan_req){
+ .oper = ctx->conf.def,
+ .ap = ctx->conf.ap,
+ };
+
+ ret = ieee80211_chanreq_compatible(tmp, req, &tmp2);
+ if (!ret)
+ return NULL;
+ *tmp = *ret;
+ return tmp;
+}
+
+static const struct ieee80211_chan_req *
+ieee80211_chanctx_reserved_chanreq(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
- const struct cfg80211_chan_def *compat)
+ const struct ieee80211_chan_req *req,
+ struct ieee80211_chan_req *tmp)
{
struct ieee80211_link_data *link;
lockdep_assert_wiphy(local->hw.wiphy);
- list_for_each_entry(link, &ctx->reserved_links,
- reserved_chanctx_list) {
- if (!compat)
- compat = &link->reserved_chandef;
+ if (WARN_ON(!req))
+ return NULL;
- compat = cfg80211_chandef_compatible(&link->reserved_chandef,
- compat);
- if (!compat)
+ list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list) {
+ req = ieee80211_chanreq_compatible(&link->reserved, req, tmp);
+ if (!req)
break;
}
- return compat;
+ return req;
}
-static const struct cfg80211_chan_def *
+static const struct ieee80211_chan_req *
ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
- const struct cfg80211_chan_def *compat)
+ const struct ieee80211_chan_req *compat,
+ struct ieee80211_chan_req *tmp)
{
struct ieee80211_link_data *link;
+ const struct ieee80211_chan_req *comp_def = compat;
lockdep_assert_wiphy(local->hw.wiphy);
- list_for_each_entry(link, &ctx->assigned_links,
- assigned_chanctx_list) {
+ list_for_each_entry(link, &ctx->assigned_links, assigned_chanctx_list) {
struct ieee80211_bss_conf *link_conf = link->conf;
if (link->reserved_chanctx)
continue;
- if (!compat)
- compat = &link_conf->chandef;
-
- compat = cfg80211_chandef_compatible(
- &link_conf->chandef, compat);
- if (!compat)
+ comp_def = ieee80211_chanreq_compatible(&link_conf->chanreq,
+ comp_def, tmp);
+ if (!comp_def)
break;
}
- return compat;
-}
-
-static const struct cfg80211_chan_def *
-ieee80211_chanctx_combined_chandef(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx,
- const struct cfg80211_chan_def *compat)
-{
- lockdep_assert_wiphy(local->hw.wiphy);
-
- compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat);
- if (!compat)
- return NULL;
-
- compat = ieee80211_chanctx_non_reserved_chandef(local, ctx, compat);
- if (!compat)
- return NULL;
-
- return compat;
+ return comp_def;
}
static bool
-ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx,
- const struct cfg80211_chan_def *def)
+ieee80211_chanctx_can_reserve(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ const struct ieee80211_chan_req *req)
{
+ struct ieee80211_chan_req tmp;
+
lockdep_assert_wiphy(local->hw.wiphy);
- if (ieee80211_chanctx_combined_chandef(local, ctx, def))
- return true;
+ if (!ieee80211_chanctx_reserved_chanreq(local, ctx, req, &tmp))
+ return false;
+
+ if (!ieee80211_chanctx_non_reserved_chandef(local, ctx, req, &tmp))
+ return false;
if (!list_empty(&ctx->reserved_links) &&
- ieee80211_chanctx_reserved_chandef(local, ctx, def))
+ ieee80211_chanctx_reserved_chanreq(local, ctx, req, &tmp))
return true;
return false;
@@ -169,7 +204,7 @@ ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local,
static struct ieee80211_chanctx *
ieee80211_find_reservation_chanctx(struct ieee80211_local *local,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode)
{
struct ieee80211_chanctx *ctx;
@@ -186,8 +221,7 @@ ieee80211_find_reservation_chanctx(struct ieee80211_local *local,
if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE)
continue;
- if (!ieee80211_chanctx_can_reserve_chandef(local, ctx,
- chandef))
+ if (!ieee80211_chanctx_can_reserve(local, ctx, chanreq))
continue;
return ctx;
@@ -202,7 +236,7 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta,
enum ieee80211_sta_rx_bandwidth width;
struct link_sta_info *link_sta;
- link_sta = rcu_dereference(sta->link[link_id]);
+ link_sta = wiphy_dereference(sta->local->hw.wiphy, sta->link[link_id]);
/* no effect if this STA has no presence on this link */
if (!link_sta)
@@ -240,9 +274,10 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta,
}
static enum nl80211_chan_width
-ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata,
- unsigned int link_id)
+ieee80211_get_max_required_bw(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ unsigned int link_id = link->link_id;
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
struct sta_info *sta;
@@ -258,31 +293,25 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata,
}
static enum nl80211_chan_width
-ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_chanctx *ctx,
- struct ieee80211_link_data *rsvd_for)
+ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_link_data *rsvd_for)
{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
- struct ieee80211_vif *vif = &sdata->vif;
- int link_id;
- rcu_read_lock();
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
+ for_each_sdata_link(local, link) {
enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT;
- struct ieee80211_link_data *link =
- rcu_dereference(sdata->link[link_id]);
-
- if (!link)
- continue;
if (link != rsvd_for &&
rcu_access_pointer(link->conf->chanctx_conf) != &ctx->conf)
continue;
- switch (vif->type) {
+ switch (link->sdata->vif.type) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
- width = ieee80211_get_max_required_bw(sdata, link_id);
+ width = ieee80211_get_max_required_bw(link);
break;
case NL80211_IFTYPE_STATION:
/*
@@ -290,8 +319,8 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
* point, so take the width from the chandef, but
* account also for TDLS peers
*/
- width = max(link->conf->chandef.width,
- ieee80211_get_max_required_bw(sdata, link_id));
+ width = max(link->conf->chanreq.oper.width,
+ ieee80211_get_max_required_bw(link));
break;
case NL80211_IFTYPE_P2P_DEVICE:
case NL80211_IFTYPE_NAN:
@@ -299,7 +328,7 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_OCB:
- width = link->conf->chandef.width;
+ width = link->conf->chanreq.oper.width;
break;
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_UNSPECIFIED:
@@ -312,40 +341,13 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
max_bw = max(max_bw, width);
}
- rcu_read_unlock();
-
- return max_bw;
-}
-
-static enum nl80211_chan_width
-ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx,
- struct ieee80211_link_data *rsvd_for)
-{
- struct ieee80211_sub_if_data *sdata;
- enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- enum nl80211_chan_width width;
-
- if (!ieee80211_sdata_running(sdata))
- continue;
-
- width = ieee80211_get_chanctx_vif_max_required_bw(sdata, ctx,
- rsvd_for);
-
- max_bw = max(max_bw, width);
- }
/* use the configured bandwidth in case of monitor interface */
- sdata = rcu_dereference(local->monitor_sdata);
+ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
if (sdata &&
rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == &ctx->conf)
max_bw = max(max_bw, ctx->conf.def.width);
- rcu_read_unlock();
-
return max_bw;
}
@@ -382,7 +384,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
/* downgrade chandef up to max_bw */
min_def = ctx->conf.def;
while (min_def.width > max_bw)
- ieee80211_chandef_downgrade(&min_def);
+ ieee80211_chandef_downgrade(&min_def, NULL);
if (cfg80211_chandef_identical(&ctx->conf.min_def, &min_def))
return 0;
@@ -395,7 +397,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
}
/* calling this function is assuming that station vif is updated to
- * lates changes by calling ieee80211_link_update_chandef
+ * lates changes by calling ieee80211_link_update_chanreq
*/
static void ieee80211_chan_bw_change(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
@@ -475,10 +477,15 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
static void _ieee80211_change_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
struct ieee80211_chanctx *old_ctx,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
struct ieee80211_link_data *rsvd_for)
{
- u32 changed;
+ const struct cfg80211_chan_def *chandef = &chanreq->oper;
+ struct ieee80211_chan_req ctx_req = {
+ .oper = ctx->conf.def,
+ .ap = ctx->conf.ap,
+ };
+ u32 changed = 0;
/* expected to handle only 20/40/80/160/320 channel widths */
switch (chandef->width) {
@@ -500,47 +507,52 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local,
*/
ieee80211_chan_bw_change(local, old_ctx, true);
- if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) {
+ if (ieee80211_chanreq_identical(&ctx_req, chanreq)) {
ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
return;
}
- WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
+ WARN_ON(ieee80211_chanctx_refcount(local, ctx) > 1 &&
+ !cfg80211_chandef_compatible(&ctx->conf.def, &chanreq->oper));
ieee80211_remove_wbrf(local, &ctx->conf.def);
+ if (!cfg80211_chandef_identical(&ctx->conf.def, &chanreq->oper)) {
+ if (ctx->conf.def.width != chanreq->oper.width)
+ changed |= IEEE80211_CHANCTX_CHANGE_WIDTH;
+ if (ctx->conf.def.punctured != chanreq->oper.punctured)
+ changed |= IEEE80211_CHANCTX_CHANGE_PUNCTURING;
+ }
+ if (!cfg80211_chandef_identical(&ctx->conf.ap, &chanreq->ap))
+ changed |= IEEE80211_CHANCTX_CHANGE_AP;
ctx->conf.def = *chandef;
+ ctx->conf.ap = chanreq->ap;
/* check if min chanctx also changed */
- changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
- _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
+ changed |= _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
ieee80211_add_wbrf(local, &ctx->conf.def);
drv_change_chanctx(local, ctx, changed);
- if (!local->use_chanctx) {
- local->_oper_chandef = *chandef;
- ieee80211_hw_config(local, 0);
- }
-
- /* check is BW wider */
+ /* check if BW is wider */
ieee80211_chan_bw_change(local, old_ctx, false);
}
static void ieee80211_change_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
struct ieee80211_chanctx *old_ctx,
- const struct cfg80211_chan_def *chandef)
+ const struct ieee80211_chan_req *chanreq)
{
- _ieee80211_change_chanctx(local, ctx, old_ctx, chandef, NULL);
+ _ieee80211_change_chanctx(local, ctx, old_ctx, chanreq, NULL);
}
static struct ieee80211_chanctx *
ieee80211_find_chanctx(struct ieee80211_local *local,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode)
{
+ struct ieee80211_chan_req tmp;
struct ieee80211_chanctx *ctx;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -549,7 +561,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
return NULL;
list_for_each_entry(ctx, &local->chanctx_list, list) {
- const struct cfg80211_chan_def *compat;
+ const struct ieee80211_chan_req *compat;
if (ctx->replace_state != IEEE80211_CHANCTX_REPLACE_NONE)
continue;
@@ -557,12 +569,12 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE)
continue;
- compat = cfg80211_chandef_compatible(&ctx->conf.def, chandef);
+ compat = ieee80211_chanctx_compatible(ctx, chanreq, &tmp);
if (!compat)
continue;
- compat = ieee80211_chanctx_reserved_chandef(local, ctx,
- compat);
+ compat = ieee80211_chanctx_reserved_chanreq(local, ctx,
+ compat, &tmp);
if (!compat)
continue;
@@ -576,26 +588,14 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
bool ieee80211_is_radar_required(struct ieee80211_local *local)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
lockdep_assert_wiphy(local->hw.wiphy);
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- unsigned int link_id;
-
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
- struct ieee80211_link_data *link;
-
- link = rcu_dereference(sdata->link[link_id]);
-
- if (link && link->radar_required) {
- rcu_read_unlock();
- return true;
- }
- }
+ for_each_sdata_link(local, link) {
+ if (link->radar_required)
+ return true;
}
- rcu_read_unlock();
return false;
}
@@ -605,43 +605,24 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
struct ieee80211_chanctx_conf *conf = &ctx->conf;
- struct ieee80211_sub_if_data *sdata;
- bool required = false;
+ struct ieee80211_link_data *link;
lockdep_assert_wiphy(local->hw.wiphy);
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- unsigned int link_id;
-
- if (!ieee80211_sdata_running(sdata))
+ for_each_sdata_link(local, link) {
+ if (rcu_access_pointer(link->conf->chanctx_conf) != conf)
continue;
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
- struct ieee80211_link_data *link;
-
- link = rcu_dereference(sdata->link[link_id]);
- if (!link)
- continue;
-
- if (rcu_access_pointer(link->conf->chanctx_conf) != conf)
- continue;
- if (!link->radar_required)
- continue;
- required = true;
- break;
- }
-
- if (required)
- break;
+ if (!link->radar_required)
+ continue;
+ return true;
}
- rcu_read_unlock();
- return required;
+ return false;
}
static struct ieee80211_chanctx *
ieee80211_alloc_chanctx(struct ieee80211_local *local,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode)
{
struct ieee80211_chanctx *ctx;
@@ -654,7 +635,8 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
INIT_LIST_HEAD(&ctx->assigned_links);
INIT_LIST_HEAD(&ctx->reserved_links);
- ctx->conf.def = *chandef;
+ ctx->conf.def = chanreq->oper;
+ ctx->conf.ap = chanreq->ap;
ctx->conf.rx_chains_static = 1;
ctx->conf.rx_chains_dynamic = 1;
ctx->mode = mode;
@@ -674,23 +656,15 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local,
ieee80211_add_wbrf(local, &ctx->conf.def);
- if (!local->use_chanctx)
- local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
-
/* turn idle off *before* setting channel -- some drivers need that */
changed = ieee80211_idle_off(local);
if (changed)
ieee80211_hw_config(local, changed);
- if (!local->use_chanctx) {
- local->_oper_chandef = ctx->conf.def;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
- } else {
- err = drv_add_chanctx(local, ctx);
- if (err) {
- ieee80211_recalc_idle(local);
- return err;
- }
+ err = drv_add_chanctx(local, ctx);
+ if (err) {
+ ieee80211_recalc_idle(local);
+ return err;
}
return 0;
@@ -698,7 +672,7 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local,
static struct ieee80211_chanctx *
ieee80211_new_chanctx(struct ieee80211_local *local,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode)
{
struct ieee80211_chanctx *ctx;
@@ -706,7 +680,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
lockdep_assert_wiphy(local->hw.wiphy);
- ctx = ieee80211_alloc_chanctx(local, chandef, mode);
+ ctx = ieee80211_alloc_chanctx(local, chanreq, mode);
if (!ctx)
return ERR_PTR(-ENOMEM);
@@ -725,32 +699,7 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local,
{
lockdep_assert_wiphy(local->hw.wiphy);
- if (!local->use_chanctx) {
- struct cfg80211_chan_def *chandef = &local->_oper_chandef;
- /* S1G doesn't have 20MHz, so get the correct width for the
- * current channel.
- */
- if (chandef->chan->band == NL80211_BAND_S1GHZ)
- chandef->width =
- ieee80211_s1g_channel_width(chandef->chan);
- else
- chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
- chandef->center_freq1 = chandef->chan->center_freq;
- chandef->freq1_offset = chandef->chan->freq_offset;
- chandef->center_freq2 = 0;
-
- /* NOTE: Disabling radar is only valid here for
- * single channel context. To be sure, check it ...
- */
- WARN_ON(local->hw.conf.radar_enabled &&
- !list_empty(&local->chanctx_list));
-
- local->hw.conf.radar_enabled = false;
-
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
- } else {
- drv_remove_chanctx(local, ctx);
- }
+ drv_remove_chanctx(local, ctx);
ieee80211_recalc_idle(local);
@@ -773,64 +722,53 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
struct ieee80211_chanctx_conf *conf = &ctx->conf;
- struct ieee80211_sub_if_data *sdata;
- const struct cfg80211_chan_def *compat = NULL;
+ const struct ieee80211_chan_req *compat = NULL;
+ struct ieee80211_link_data *link;
+ struct ieee80211_chan_req tmp;
struct sta_info *sta;
lockdep_assert_wiphy(local->hw.wiphy);
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- int link_id;
+ for_each_sdata_link(local, link) {
+ struct ieee80211_bss_conf *link_conf;
- if (!ieee80211_sdata_running(sdata))
+ if (link->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
continue;
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
- continue;
+ link_conf = link->conf;
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
- struct ieee80211_bss_conf *link_conf =
- rcu_dereference(sdata->vif.link_conf[link_id]);
-
- if (!link_conf)
- continue;
-
- if (rcu_access_pointer(link_conf->chanctx_conf) != conf)
- continue;
+ if (rcu_access_pointer(link_conf->chanctx_conf) != conf)
+ continue;
- if (!compat)
- compat = &link_conf->chandef;
+ if (!compat)
+ compat = &link_conf->chanreq;
- compat = cfg80211_chandef_compatible(&link_conf->chandef,
- compat);
- if (WARN_ON_ONCE(!compat))
- break;
- }
+ compat = ieee80211_chanreq_compatible(&link_conf->chanreq,
+ compat, &tmp);
+ if (WARN_ON_ONCE(!compat))
+ return;
}
- if (WARN_ON_ONCE(!compat)) {
- rcu_read_unlock();
+ if (WARN_ON_ONCE(!compat))
return;
- }
/* TDLS peers can sometimes affect the chandef width */
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ list_for_each_entry(sta, &local->sta_list, list) {
+ struct ieee80211_chan_req tdls_chanreq = {};
if (!sta->uploaded ||
!test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) ||
!test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
!sta->tdls_chandef.chan)
continue;
- compat = cfg80211_chandef_compatible(&sta->tdls_chandef,
- compat);
+ tdls_chanreq.oper = sta->tdls_chandef;
+
+ /* note this always fills and returns &tmp if compat */
+ compat = ieee80211_chanreq_compatible(&tdls_chanreq,
+ compat, &tmp);
if (WARN_ON_ONCE(!compat))
- break;
+ return;
}
- rcu_read_unlock();
-
- if (!compat)
- return;
ieee80211_change_chanctx(local, ctx, ctx, compat);
}
@@ -849,11 +787,6 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
chanctx->conf.radar_enabled = radar_enabled;
- if (!local->use_chanctx) {
- local->hw.conf.radar_enabled = chanctx->conf.radar_enabled;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
- }
-
drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR);
}
@@ -924,23 +857,19 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
{
struct ieee80211_sub_if_data *sdata;
u8 rx_chains_static, rx_chains_dynamic;
+ struct ieee80211_link_data *link;
lockdep_assert_wiphy(local->hw.wiphy);
rx_chains_static = 1;
rx_chains_dynamic = 1;
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ for_each_sdata_link(local, link) {
u8 needed_static, needed_dynamic;
- unsigned int link_id;
- if (!ieee80211_sdata_running(sdata))
- continue;
-
- switch (sdata->vif.type) {
+ switch (link->sdata->vif.type) {
case NL80211_IFTYPE_STATION:
- if (!sdata->u.mgd.associated)
+ if (!link->sdata->u.mgd.associated)
continue;
break;
case NL80211_IFTYPE_AP:
@@ -952,59 +881,38 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
continue;
}
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
- struct ieee80211_link_data *link;
-
- link = rcu_dereference(sdata->link[link_id]);
-
- if (!link)
- continue;
-
- if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf)
- continue;
-
- switch (link->smps_mode) {
- default:
- WARN_ONCE(1, "Invalid SMPS mode %d\n",
- link->smps_mode);
- fallthrough;
- case IEEE80211_SMPS_OFF:
- needed_static = link->needed_rx_chains;
- needed_dynamic = link->needed_rx_chains;
- break;
- case IEEE80211_SMPS_DYNAMIC:
- needed_static = 1;
- needed_dynamic = link->needed_rx_chains;
- break;
- case IEEE80211_SMPS_STATIC:
- needed_static = 1;
- needed_dynamic = 1;
- break;
- }
+ if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf)
+ continue;
- rx_chains_static = max(rx_chains_static, needed_static);
- rx_chains_dynamic = max(rx_chains_dynamic, needed_dynamic);
+ switch (link->smps_mode) {
+ default:
+ WARN_ONCE(1, "Invalid SMPS mode %d\n",
+ link->smps_mode);
+ fallthrough;
+ case IEEE80211_SMPS_OFF:
+ needed_static = link->needed_rx_chains;
+ needed_dynamic = link->needed_rx_chains;
+ break;
+ case IEEE80211_SMPS_DYNAMIC:
+ needed_static = 1;
+ needed_dynamic = link->needed_rx_chains;
+ break;
+ case IEEE80211_SMPS_STATIC:
+ needed_static = 1;
+ needed_dynamic = 1;
+ break;
}
+
+ rx_chains_static = max(rx_chains_static, needed_static);
+ rx_chains_dynamic = max(rx_chains_dynamic, needed_dynamic);
}
/* Disable SMPS for the monitor interface */
- sdata = rcu_dereference(local->monitor_sdata);
+ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
if (sdata &&
rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == &chanctx->conf)
rx_chains_dynamic = rx_chains_static = local->rx_chains;
- rcu_read_unlock();
-
- if (!local->use_chanctx) {
- if (rx_chains_static > 1)
- local->smps_mode = IEEE80211_SMPS_OFF;
- else if (rx_chains_dynamic > 1)
- local->smps_mode = IEEE80211_SMPS_DYNAMIC;
- else
- local->smps_mode = IEEE80211_SMPS_STATIC;
- ieee80211_hw_config(local, 0);
- }
-
if (rx_chains_static == chanctx->conf.rx_chains_static &&
rx_chains_dynamic == chanctx->conf.rx_chains_dynamic)
return;
@@ -1043,17 +951,16 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
if (clear)
conf = NULL;
- rcu_read_lock();
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
struct ieee80211_bss_conf *vlan_conf;
- vlan_conf = rcu_dereference(vlan->vif.link_conf[link_id]);
+ vlan_conf = wiphy_dereference(local->hw.wiphy,
+ vlan->vif.link_conf[link_id]);
if (WARN_ON(!vlan_conf))
continue;
rcu_assign_pointer(vlan_conf->chanctx_conf, conf);
}
- rcu_read_unlock();
}
void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
@@ -1103,7 +1010,7 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
}
int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode,
bool radar_required)
{
@@ -1114,13 +1021,13 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
lockdep_assert_wiphy(local->hw.wiphy);
curr_ctx = ieee80211_link_get_chanctx(link);
- if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx)
+ if (curr_ctx && !local->ops->switch_vif_chanctx)
return -EOPNOTSUPP;
- new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode);
+ new_ctx = ieee80211_find_reservation_chanctx(local, chanreq, mode);
if (!new_ctx) {
if (ieee80211_can_create_new_chanctx(local)) {
- new_ctx = ieee80211_new_chanctx(local, chandef, mode);
+ new_ctx = ieee80211_new_chanctx(local, chanreq, mode);
if (IS_ERR(new_ctx))
return PTR_ERR(new_ctx);
} else {
@@ -1174,7 +1081,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
!list_empty(&curr_ctx->reserved_links))
return -EBUSY;
- new_ctx = ieee80211_alloc_chanctx(local, chandef, mode);
+ new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode);
if (!new_ctx)
return -ENOMEM;
@@ -1192,7 +1099,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
list_add(&link->reserved_chanctx_list, &new_ctx->reserved_links);
link->reserved_chanctx = new_ctx;
- link->reserved_chandef = *chandef;
+ link->reserved = *chanreq;
link->reserved_radar_required = radar_required;
link->reserved_ready = false;
@@ -1231,29 +1138,28 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link)
}
static void
-ieee80211_link_update_chandef(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef)
+ieee80211_link_update_chanreq(struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *chanreq)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
unsigned int link_id = link->link_id;
struct ieee80211_sub_if_data *vlan;
- link->conf->chandef = *chandef;
+ link->conf->chanreq = *chanreq;
if (sdata->vif.type != NL80211_IFTYPE_AP)
return;
- rcu_read_lock();
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
struct ieee80211_bss_conf *vlan_conf;
- vlan_conf = rcu_dereference(vlan->vif.link_conf[link_id]);
+ vlan_conf = wiphy_dereference(sdata->local->hw.wiphy,
+ vlan->vif.link_conf[link_id]);
if (WARN_ON(!vlan_conf))
continue;
- vlan_conf->chandef = *chandef;
+ vlan_conf->chanreq = *chanreq;
}
- rcu_read_unlock();
}
static int
@@ -1264,7 +1170,8 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
struct ieee80211_local *local = sdata->local;
struct ieee80211_vif_chanctx_switch vif_chsw[1] = {};
struct ieee80211_chanctx *old_ctx, *new_ctx;
- const struct cfg80211_chan_def *chandef;
+ const struct ieee80211_chan_req *chanreq;
+ struct ieee80211_chan_req tmp;
u64 changed = 0;
int err;
@@ -1286,17 +1193,18 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
IEEE80211_CHANCTX_REPLACES_OTHER))
return -EINVAL;
- chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
- &link->reserved_chandef);
- if (WARN_ON(!chandef))
+ chanreq = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
+ &link->reserved,
+ &tmp);
+ if (WARN_ON(!chanreq))
return -EINVAL;
- if (link_conf->chandef.width != link->reserved_chandef.width)
+ if (link_conf->chanreq.oper.width != link->reserved.oper.width)
changed = BSS_CHANGED_BANDWIDTH;
- ieee80211_link_update_chandef(link, &link->reserved_chandef);
+ ieee80211_link_update_chanreq(link, &link->reserved);
- _ieee80211_change_chanctx(local, new_ctx, old_ctx, chandef, link);
+ _ieee80211_change_chanctx(local, new_ctx, old_ctx, chanreq, link);
vif_chsw[0].vif = &sdata->vif;
vif_chsw[0].old_ctx = &old_ctx->conf;
@@ -1344,7 +1252,8 @@ ieee80211_link_use_reserved_assign(struct ieee80211_link_data *link)
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx *old_ctx, *new_ctx;
- const struct cfg80211_chan_def *chandef;
+ const struct ieee80211_chan_req *chanreq;
+ struct ieee80211_chan_req tmp;
int err;
old_ctx = ieee80211_link_get_chanctx(link);
@@ -1363,12 +1272,13 @@ ieee80211_link_use_reserved_assign(struct ieee80211_link_data *link)
IEEE80211_CHANCTX_REPLACES_OTHER))
return -EINVAL;
- chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
- &link->reserved_chandef);
- if (WARN_ON(!chandef))
+ chanreq = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
+ &link->reserved,
+ &tmp);
+ if (WARN_ON(!chanreq))
return -EINVAL;
- ieee80211_change_chanctx(local, new_ctx, new_ctx, chandef);
+ ieee80211_change_chanctx(local, new_ctx, new_ctx, chanreq);
list_del(&link->reserved_chanctx_list);
link->reserved_chanctx = NULL;
@@ -1412,24 +1322,6 @@ ieee80211_link_has_in_place_reservation(struct ieee80211_link_data *link)
return true;
}
-static int ieee80211_chsw_switch_hwconf(struct ieee80211_local *local,
- struct ieee80211_chanctx *new_ctx)
-{
- const struct cfg80211_chan_def *chandef;
-
- lockdep_assert_wiphy(local->hw.wiphy);
-
- chandef = ieee80211_chanctx_reserved_chandef(local, new_ctx, NULL);
- if (WARN_ON(!chandef))
- return -EINVAL;
-
- local->hw.conf.radar_enabled = new_ctx->conf.radar_enabled;
- local->_oper_chandef = *chandef;
- ieee80211_hw_config(local, 0);
-
- return 0;
-}
-
static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local,
int n_vifs)
{
@@ -1518,7 +1410,6 @@ err:
static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
{
struct ieee80211_chanctx *ctx, *ctx_tmp, *old_ctx;
- struct ieee80211_chanctx *new_ctx = NULL;
int err, n_assigned, n_reserved, n_ready;
int n_ctx = 0, n_vifs_switch = 0, n_vifs_assign = 0, n_vifs_ctxless = 0;
@@ -1551,9 +1442,6 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
goto err;
}
- if (!local->use_chanctx)
- new_ctx = ctx;
-
n_ctx++;
n_assigned = 0;
@@ -1607,9 +1495,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
if (WARN_ON(n_ctx == 0) ||
WARN_ON(n_vifs_switch == 0 &&
n_vifs_assign == 0 &&
- n_vifs_ctxless == 0) ||
- WARN_ON(n_ctx > 1 && !local->use_chanctx) ||
- WARN_ON(!new_ctx && !local->use_chanctx)) {
+ n_vifs_ctxless == 0)) {
err = -EINVAL;
goto err;
}
@@ -1619,20 +1505,14 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
* reservations and driver capabilities.
*/
- if (local->use_chanctx) {
- if (n_vifs_switch > 0) {
- err = ieee80211_chsw_switch_vifs(local, n_vifs_switch);
- if (err)
- goto err;
- }
+ if (n_vifs_switch > 0) {
+ err = ieee80211_chsw_switch_vifs(local, n_vifs_switch);
+ if (err)
+ goto err;
+ }
- if (n_vifs_assign > 0 || n_vifs_ctxless > 0) {
- err = ieee80211_chsw_switch_ctxs(local);
- if (err)
- goto err;
- }
- } else {
- err = ieee80211_chsw_switch_hwconf(local, new_ctx);
+ if (n_vifs_assign > 0 || n_vifs_ctxless > 0) {
+ err = ieee80211_chsw_switch_ctxs(local);
if (err)
goto err;
}
@@ -1672,10 +1552,10 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
link->radar_required = link->reserved_radar_required;
- if (link_conf->chandef.width != link->reserved_chandef.width)
+ if (link_conf->chanreq.oper.width != link->reserved.oper.width)
changed = BSS_CHANGED_BANDWIDTH;
- ieee80211_link_update_chandef(link, &link->reserved_chandef);
+ ieee80211_link_update_chanreq(link, &link->reserved);
if (changed)
ieee80211_link_info_change_notify(sdata,
link,
@@ -1810,7 +1690,7 @@ static void __ieee80211_link_release_channel(struct ieee80211_link_data *link)
}
int ieee80211_link_use_channel(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
@@ -1821,38 +1701,37 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link,
lockdep_assert_wiphy(local->hw.wiphy);
- if (sdata->vif.active_links &&
- !(sdata->vif.active_links & BIT(link->link_id))) {
- ieee80211_link_update_chandef(link, chandef);
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) {
+ ieee80211_link_update_chanreq(link, chanreq);
return 0;
}
ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
- chandef,
+ &chanreq->oper,
sdata->wdev.iftype);
if (ret < 0)
goto out;
if (ret > 0)
- radar_detect_width = BIT(chandef->width);
+ radar_detect_width = BIT(chanreq->oper.width);
link->radar_required = ret;
- ret = ieee80211_check_combinations(sdata, chandef, mode,
+ ret = ieee80211_check_combinations(sdata, &chanreq->oper, mode,
radar_detect_width);
if (ret < 0)
goto out;
__ieee80211_link_release_channel(link);
- ctx = ieee80211_find_chanctx(local, chandef, mode);
+ ctx = ieee80211_find_chanctx(local, chanreq, mode);
if (!ctx)
- ctx = ieee80211_new_chanctx(local, chandef, mode);
+ ctx = ieee80211_new_chanctx(local, chanreq, mode);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
goto out;
}
- ieee80211_link_update_chandef(link, chandef);
+ ieee80211_link_update_chanreq(link, chanreq);
ret = ieee80211_assign_link_chanctx(link, ctx);
if (ret) {
@@ -1932,28 +1811,79 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link)
return 0;
}
-int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
- u64 *changed)
+/*
+ * This is similar to ieee80211_chanctx_compatible(), but rechecks
+ * against all the links actually using it (except the one that's
+ * passed, since that one is changing).
+ * This is done in order to allow changes to the AP's bandwidth for
+ * wider bandwidth OFDMA purposes, which wouldn't be treated as
+ * compatible by ieee80211_chanctx_recheck() but is OK if the link
+ * requesting the update is the only one using it.
+ */
+static const struct ieee80211_chan_req *
+ieee80211_chanctx_recheck(struct ieee80211_local *local,
+ struct ieee80211_link_data *skip_link,
+ struct ieee80211_chanctx *ctx,
+ const struct ieee80211_chan_req *req,
+ struct ieee80211_chan_req *tmp)
+{
+ const struct ieee80211_chan_req *ret = req;
+ struct ieee80211_link_data *link;
+
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ for_each_sdata_link(local, link) {
+ if (link == skip_link)
+ continue;
+
+ if (rcu_access_pointer(link->conf->chanctx_conf) == &ctx->conf) {
+ ret = ieee80211_chanreq_compatible(ret,
+ &link->conf->chanreq,
+ tmp);
+ if (!ret)
+ return NULL;
+ }
+
+ if (link->reserved_chanctx == ctx) {
+ ret = ieee80211_chanreq_compatible(ret,
+ &link->reserved,
+ tmp);
+ if (!ret)
+ return NULL;
+ }
+ }
+
+ *tmp = *ret;
+ return tmp;
+}
+
+int ieee80211_link_change_chanreq(struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *chanreq,
+ u64 *changed)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_bss_conf *link_conf = link->conf;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *ctx;
- const struct cfg80211_chan_def *compat;
+ const struct ieee80211_chan_req *compat;
+ struct ieee80211_chan_req tmp;
lockdep_assert_wiphy(local->hw.wiphy);
- if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
+ if (!cfg80211_chandef_usable(sdata->local->hw.wiphy,
+ &chanreq->oper,
IEEE80211_CHAN_DISABLED))
return -EINVAL;
- if (cfg80211_chandef_identical(chandef, &link_conf->chandef))
+ /* for non-HT 20 MHz the rest doesn't matter */
+ if (chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT &&
+ cfg80211_chandef_identical(&chanreq->oper, &link_conf->chanreq.oper))
return 0;
- if (chandef->width == NL80211_CHAN_WIDTH_20_NOHT ||
- link_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
+ /* but you cannot switch to/from it */
+ if (chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ link_conf->chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT)
return -EINVAL;
conf = rcu_dereference_protected(link_conf->chanctx_conf,
@@ -1963,13 +1893,14 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
ctx = container_of(conf, struct ieee80211_chanctx, conf);
- compat = cfg80211_chandef_compatible(&conf->def, chandef);
+ compat = ieee80211_chanctx_recheck(local, link, ctx, chanreq, &tmp);
if (!compat)
return -EINVAL;
switch (ctx->replace_state) {
case IEEE80211_CHANCTX_REPLACE_NONE:
- if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat))
+ if (!ieee80211_chanctx_reserved_chanreq(local, ctx, compat,
+ &tmp))
return -EBUSY;
break;
case IEEE80211_CHANCTX_WILL_BE_REPLACED:
@@ -1984,7 +1915,7 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
break;
}
- ieee80211_link_update_chandef(link, chandef);
+ ieee80211_link_update_chanreq(link, chanreq);
ieee80211_recalc_chanctx_chantype(local, ctx);
@@ -2019,12 +1950,11 @@ void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link)
ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
- rcu_read_lock();
- ap_conf = rcu_dereference(ap->vif.link_conf[link_id]);
- conf = rcu_dereference_protected(ap_conf->chanctx_conf,
- lockdep_is_held(&local->hw.wiphy->mtx));
+ ap_conf = wiphy_dereference(local->hw.wiphy,
+ ap->vif.link_conf[link_id]);
+ conf = wiphy_dereference(local->hw.wiphy,
+ ap_conf->chanctx_conf);
rcu_assign_pointer(link_conf->chanctx_conf, conf);
- rcu_read_unlock();
}
void ieee80211_iter_chan_contexts_atomic(
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index d49894df2351..49da401c5340 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -152,16 +152,17 @@ do { \
else \
_sdata_err((link)->sdata, fmt, ##__VA_ARGS__); \
} while (0)
-#define link_dbg(link, fmt, ...) \
+#define _link_id_dbg(print, sdata, link_id, fmt, ...) \
do { \
- if (ieee80211_vif_is_mld(&(link)->sdata->vif)) \
- _sdata_dbg(1, (link)->sdata, "[link %d] " fmt, \
- (link)->link_id, \
- ##__VA_ARGS__); \
+ if (ieee80211_vif_is_mld(&(sdata)->vif)) \
+ _sdata_dbg(print, sdata, "[link %d] " fmt, \
+ link_id, ##__VA_ARGS__); \
else \
- _sdata_dbg(1, (link)->sdata, fmt, \
- ##__VA_ARGS__); \
+ _sdata_dbg(1, sdata, fmt, ##__VA_ARGS__); \
} while (0)
+#define link_dbg(link, fmt, ...) \
+ _link_id_dbg(1, (link)->sdata, (link)->link_id, \
+ fmt, ##__VA_ARGS__)
#define ht_dbg(sdata, fmt, ...) \
_sdata_dbg(MAC80211_HT_DEBUG, \
@@ -226,6 +227,9 @@ do { \
#define mlme_dbg(sdata, fmt, ...) \
_sdata_dbg(MAC80211_MLME_DEBUG, \
sdata, fmt, ##__VA_ARGS__)
+#define mlme_link_id_dbg(sdata, link_id, fmt, ...) \
+ _link_id_dbg(MAC80211_MLME_DEBUG, sdata, link_id, \
+ fmt, ##__VA_ARGS__)
#define mlme_dbg_ratelimited(sdata, fmt, ...) \
_sdata_dbg(MAC80211_MLME_DEBUG && net_ratelimit(), \
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 74be49191e70..2f68e92a7404 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -4,7 +4,7 @@
*
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018 - 2019, 2021-2023 Intel Corporation
+ * Copyright (C) 2018 - 2019, 2021-2024 Intel Corporation
*/
#include <linux/debugfs.h>
@@ -498,6 +498,7 @@ static const char *hw_flag_names[] = {
FLAG(DETECTS_COLOR_COLLISION),
FLAG(MLO_MCAST_MULTI_LINK_TX),
FLAG(DISALLOW_PUNCTURING),
+ FLAG(HANDLES_QUIET_CSA),
#undef FLAG
};
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index dce5606ed66d..68596ef78b15 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -997,8 +997,8 @@ static void add_link_files(struct ieee80211_link_data *link,
}
}
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
- bool mld_vif)
+static void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
+ bool mld_vif)
{
char buf[10+IFNAMSIZ];
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index b226b1aae88a..a02ec0a413f6 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -11,8 +11,6 @@
#include "ieee80211_i.h"
#ifdef CONFIG_MAC80211_DEBUGFS
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
- bool mld_vif);
void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata,
@@ -24,9 +22,6 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link);
#else
-static inline void ieee80211_debugfs_add_netdev(
- struct ieee80211_sub_if_data *sdata, bool mld_vif)
-{}
static inline void ieee80211_debugfs_remove_netdev(
struct ieee80211_sub_if_data *sdata)
{}
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index 3b7f70073fc3..dce37ba8ebe3 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2015 Intel Deutschland GmbH
- * Copyright (C) 2022-2023 Intel Corporation
+ * Copyright (C) 2022-2024 Intel Corporation
*/
#include <net/mac80211.h>
#include "ieee80211_i.h"
@@ -214,8 +214,7 @@ int drv_conf_tx(struct ieee80211_local *local,
if (!check_sdata_in_driver(sdata))
return -EIO;
- if (sdata->vif.active_links &&
- !(sdata->vif.active_links & BIT(link->link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id))
return 0;
if (params->cw_min == 0 || params->cw_min > params->cw_max) {
@@ -315,8 +314,7 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local,
if (!check_sdata_in_driver(sdata))
return -EIO;
- if (sdata->vif.active_links &&
- !(sdata->vif.active_links & BIT(link_conf->link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link_conf->link_id))
return 0;
trace_drv_assign_vif_chanctx(local, sdata, link_conf, ctx);
@@ -343,8 +341,7 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local,
if (!check_sdata_in_driver(sdata))
return;
- if (sdata->vif.active_links &&
- !(sdata->vif.active_links & BIT(link_conf->link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link_conf->link_id))
return;
trace_drv_unassign_vif_chanctx(local, sdata, link_conf, ctx);
@@ -461,8 +458,7 @@ void drv_link_info_changed(struct ieee80211_local *local,
if (!check_sdata_in_driver(sdata))
return;
- if (sdata->vif.active_links &&
- !(sdata->vif.active_links & BIT(link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link_id))
return;
trace_drv_link_info_changed(local, sdata, info, changed);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index eb482fb8c3af..5d078c0a2323 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018 - 2019, 2021 - 2023 Intel Corporation
+* Copyright (C) 2018-2019, 2021-2024 Intel Corporation
*/
#ifndef __MAC80211_DRIVER_OPS
@@ -1180,8 +1180,9 @@ drv_post_channel_switch(struct ieee80211_link_data *link)
}
static inline void
-drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata)
+drv_abort_channel_switch(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
might_sleep();
@@ -1193,7 +1194,8 @@ drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata)
trace_drv_abort_channel_switch(local, sdata);
if (local->ops->abort_channel_switch)
- local->ops->abort_channel_switch(&local->hw, &sdata->vif);
+ local->ops->abort_channel_switch(&local->hw, &sdata->vif,
+ link->conf);
}
static inline void
@@ -1695,4 +1697,23 @@ int drv_change_sta_links(struct ieee80211_local *local,
struct ieee80211_sta *sta,
u16 old_links, u16 new_links);
+static inline enum ieee80211_neg_ttlm_res
+drv_can_neg_ttlm(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_neg_ttlm *neg_ttlm)
+{
+ enum ieee80211_neg_ttlm_res res = NEG_TTLM_RES_REJECT;
+
+ might_sleep();
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ trace_drv_can_neg_ttlm(local, sdata, neg_ttlm);
+ if (local->ops->can_neg_ttlm)
+ res = local->ops->can_neg_ttlm(&local->hw, &sdata->vif,
+ neg_ttlm);
+ trace_drv_neg_ttlm_res(local, sdata, res, neg_ttlm);
+
+ return res;
+}
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 749f4ecab990..c3330aea4da3 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright 2017 Intel Deutschland GmbH
- * Copyright(c) 2020-2023 Intel Corporation
+ * Copyright(c) 2020-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -257,7 +257,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!link_conf))
width = NL80211_CHAN_WIDTH_20_NOHT;
else
- width = link_conf->chandef.width;
+ width = link_conf->chanreq.oper.width;
switch (width) {
default:
@@ -603,6 +603,8 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, unsigned int link_id,
if (WARN_ON(!link))
goto out;
+ trace_api_request_smps(sdata->local, sdata, link, smps_mode);
+
if (link->u.mgd.driver_smps_mode == smps_mode)
goto out;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 8f2b445a5ec3..7ace5cdc6c26 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -223,7 +223,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt;
struct cfg80211_bss *bss;
u64 bss_change;
- struct cfg80211_chan_def chandef;
+ struct ieee80211_chan_req chanreq = {};
struct ieee80211_channel *chan;
struct beacon_data *presp;
struct cfg80211_inform_bss bss_meta = {};
@@ -237,7 +237,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
drv_reset_tsf(local, sdata);
if (!ether_addr_equal(ifibss->bssid, bssid))
- sta_info_flush(sdata);
+ sta_info_flush(sdata, -1);
/* if merging, indicate to driver that we leave the old IBSS */
if (sdata->vif.cfg.ibss_joined) {
@@ -257,22 +257,22 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
kfree_rcu(presp, rcu_head);
/* make a copy of the chandef, it could be modified below. */
- chandef = *req_chandef;
- chan = chandef.chan;
- if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef,
+ chanreq.oper = *req_chandef;
+ chan = chanreq.oper.chan;
+ if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chanreq.oper,
NL80211_IFTYPE_ADHOC)) {
- if (chandef.width == NL80211_CHAN_WIDTH_5 ||
- chandef.width == NL80211_CHAN_WIDTH_10 ||
- chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- chandef.width == NL80211_CHAN_WIDTH_20) {
+ if (chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ chanreq.oper.width == NL80211_CHAN_WIDTH_10 ||
+ chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ chanreq.oper.width == NL80211_CHAN_WIDTH_20) {
sdata_info(sdata,
"Failed to join IBSS, beacons forbidden\n");
return;
}
- chandef.width = NL80211_CHAN_WIDTH_20;
- chandef.center_freq1 = chan->center_freq;
+ chanreq.oper.width = NL80211_CHAN_WIDTH_20;
+ chanreq.oper.center_freq1 = chan->center_freq;
/* check again for downgraded chandef */
- if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef,
+ if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chanreq.oper,
NL80211_IFTYPE_ADHOC)) {
sdata_info(sdata,
"Failed to join IBSS, beacons forbidden\n");
@@ -281,7 +281,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
}
err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
- &chandef, NL80211_IFTYPE_ADHOC);
+ &chanreq.oper, NL80211_IFTYPE_ADHOC);
if (err < 0) {
sdata_info(sdata,
"Failed to join IBSS, invalid chandef\n");
@@ -295,7 +295,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
radar_required = err;
- if (ieee80211_link_use_channel(&sdata->deflink, &chandef,
+ if (ieee80211_link_use_channel(&sdata->deflink, &chanreq,
ifibss->fixed_channel ?
IEEE80211_CHANCTX_SHARED :
IEEE80211_CHANCTX_EXCLUSIVE)) {
@@ -307,7 +307,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
memcpy(ifibss->bssid, bssid, ETH_ALEN);
presp = ieee80211_ibss_build_presp(sdata, beacon_int, basic_rates,
- capability, tsf, &chandef,
+ capability, tsf, &chanreq.oper,
&have_higher_than_11mbit, NULL);
if (!presp)
return;
@@ -533,12 +533,12 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed)
IEEE80211_PRIVACY(ifibss->privacy));
/* XXX: should not really modify cfg80211 data */
if (cbss) {
- cbss->channel = sdata->deflink.csa_chandef.chan;
+ cbss->channel = sdata->deflink.csa_chanreq.oper.chan;
cfg80211_put_bss(sdata->local->hw.wiphy, cbss);
}
}
- ifibss->chandef = sdata->deflink.csa_chandef;
+ ifibss->chandef = sdata->deflink.csa_chanreq.oper;
/* generate the beacon */
return ieee80211_ibss_csa_beacon(sdata, NULL, changed);
@@ -682,7 +682,7 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
ifibss->state = IEEE80211_IBSS_MLME_SEARCH;
- sta_info_flush(sdata);
+ sta_info_flush(sdata, -1);
spin_lock_bh(&ifibss->incomplete_lock);
while (!list_empty(&ifibss->incomplete_stations)) {
@@ -757,21 +757,22 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
enum nl80211_channel_type ch_type;
int err;
- ieee80211_conn_flags_t conn_flags;
+ struct ieee80211_conn_settings conn = {
+ .mode = IEEE80211_CONN_MODE_HT,
+ .bw_limit = IEEE80211_CONN_BW_LIMIT_40,
+ };
u32 vht_cap_info = 0;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- conn_flags = IEEE80211_CONN_DISABLE_VHT;
-
switch (ifibss->chandef.width) {
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
case NL80211_CHAN_WIDTH_20_NOHT:
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
+ conn.mode = IEEE80211_CONN_MODE_LEGACY;
fallthrough;
case NL80211_CHAN_WIDTH_20:
- conn_flags |= IEEE80211_CONN_DISABLE_40MHZ;
+ conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20;
break;
default:
break;
@@ -783,8 +784,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
memset(&params, 0, sizeof(params));
err = ieee80211_parse_ch_switch_ie(sdata, elems,
ifibss->chandef.chan->band,
- vht_cap_info,
- conn_flags, ifibss->bssid, &csa_ie);
+ vht_cap_info, &conn,
+ ifibss->bssid, &csa_ie);
/* can't switch to destination channel, fail */
if (err < 0)
goto disconnect;
@@ -798,7 +799,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
goto disconnect;
params.count = csa_ie.count;
- params.chandef = csa_ie.chandef;
+ params.chandef = csa_ie.chanreq.oper;
switch (ifibss->chandef.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
@@ -857,7 +858,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
params.radar_required = err;
if (cfg80211_chandef_identical(&params.chandef,
- &sdata->vif.bss_conf.chandef)) {
+ &sdata->vif.bss_conf.chanreq.oper)) {
ibss_dbg(sdata,
"received csa with an identical chandef, ignoring\n");
return true;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 0b2b53550bd9..b6fead612b66 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#ifndef IEEE80211_I_H
@@ -370,19 +370,32 @@ enum ieee80211_sta_flags {
IEEE80211_STA_ENABLE_RRM = BIT(15),
};
-typedef u32 __bitwise ieee80211_conn_flags_t;
-
-enum ieee80211_conn_flags {
- IEEE80211_CONN_DISABLE_HT = (__force ieee80211_conn_flags_t)BIT(0),
- IEEE80211_CONN_DISABLE_40MHZ = (__force ieee80211_conn_flags_t)BIT(1),
- IEEE80211_CONN_DISABLE_VHT = (__force ieee80211_conn_flags_t)BIT(2),
- IEEE80211_CONN_DISABLE_80P80MHZ = (__force ieee80211_conn_flags_t)BIT(3),
- IEEE80211_CONN_DISABLE_160MHZ = (__force ieee80211_conn_flags_t)BIT(4),
- IEEE80211_CONN_DISABLE_HE = (__force ieee80211_conn_flags_t)BIT(5),
- IEEE80211_CONN_DISABLE_EHT = (__force ieee80211_conn_flags_t)BIT(6),
- IEEE80211_CONN_DISABLE_320MHZ = (__force ieee80211_conn_flags_t)BIT(7),
+enum ieee80211_conn_mode {
+ IEEE80211_CONN_MODE_S1G,
+ IEEE80211_CONN_MODE_LEGACY,
+ IEEE80211_CONN_MODE_HT,
+ IEEE80211_CONN_MODE_VHT,
+ IEEE80211_CONN_MODE_HE,
+ IEEE80211_CONN_MODE_EHT,
};
+#define IEEE80211_CONN_MODE_HIGHEST IEEE80211_CONN_MODE_EHT
+
+enum ieee80211_conn_bw_limit {
+ IEEE80211_CONN_BW_LIMIT_20,
+ IEEE80211_CONN_BW_LIMIT_40,
+ IEEE80211_CONN_BW_LIMIT_80,
+ IEEE80211_CONN_BW_LIMIT_160, /* also 80+80 */
+ IEEE80211_CONN_BW_LIMIT_320,
+};
+
+struct ieee80211_conn_settings {
+ enum ieee80211_conn_mode mode;
+ enum ieee80211_conn_bw_limit bw_limit;
+};
+
+extern const struct ieee80211_conn_settings ieee80211_conn_settings_unlimited;
+
struct ieee80211_mgd_auth_data {
struct cfg80211_bss *bss;
unsigned long timeout;
@@ -416,7 +429,7 @@ struct ieee80211_mgd_assoc_data {
size_t elems_len;
u8 *elems; /* pointing to inside ie[] below */
- ieee80211_conn_flags_t conn_flags;
+ struct ieee80211_conn_settings conn;
u16 status;
@@ -441,6 +454,7 @@ struct ieee80211_mgd_assoc_data {
bool timeout_started;
bool comeback; /* whether the AP has requested association comeback */
bool s1g;
+ bool spp_amsdu;
unsigned int assoc_link_id;
@@ -509,6 +523,8 @@ struct ieee80211_if_managed {
unsigned int flags;
+ u16 mcast_seq_last;
+
bool status_acked;
bool status_received;
__le16 status_fc;
@@ -579,6 +595,10 @@ struct ieee80211_if_managed {
/* TID-to-link mapping support */
struct wiphy_delayed_work ttlm_work;
struct ieee80211_adv_ttlm_info ttlm_info;
+
+ /* dialog token enumerator for neg TTLM request */
+ u8 dialog_token_alloc;
+ struct wiphy_delayed_work neg_ttlm_timeout_work;
};
struct ieee80211_if_ibss {
@@ -866,6 +886,9 @@ struct ieee80211_chanctx {
enum ieee80211_chanctx_mode mode;
bool driver_present;
+ /* temporary data for search algorithm etc. */
+ struct ieee80211_chan_req req;
+
struct ieee80211_chanctx_conf conf;
};
@@ -938,7 +961,7 @@ struct ieee80211_link_data_managed {
enum ieee80211_smps_mode req_smps, /* requested smps mode */
driver_smps_mode; /* smps mode request */
- ieee80211_conn_flags_t conn_flags;
+ struct ieee80211_conn_settings conn;
s16 p2p_noa_index;
@@ -983,8 +1006,6 @@ struct ieee80211_link_data_managed {
int mu_edca_last_param_set;
u8 bss_param_ch_cnt;
-
- struct cfg80211_bss *bss;
};
struct ieee80211_link_data_ap {
@@ -1013,11 +1034,10 @@ struct ieee80211_link_data {
struct ieee80211_key __rcu *default_beacon_key;
struct wiphy_work csa_finalize_work;
- bool csa_block_tx;
bool operating_11g_mode;
- struct cfg80211_chan_def csa_chandef;
+ struct ieee80211_chan_req csa_chanreq;
struct wiphy_work color_change_finalize_work;
struct delayed_work color_collision_detect_work;
@@ -1025,7 +1045,7 @@ struct ieee80211_link_data {
/* context reservation -- protected with wiphy mutex */
struct ieee80211_chanctx *reserved_chanctx;
- struct cfg80211_chan_def reserved_chandef;
+ struct ieee80211_chan_req reserved;
bool reserved_radar_required;
bool reserved_ready;
@@ -1072,6 +1092,8 @@ struct ieee80211_sub_if_data {
unsigned long state;
+ bool csa_blocked_tx;
+
char name[IFNAMSIZ];
struct ieee80211_fragment_cache frags;
@@ -1160,6 +1182,19 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
#define sdata_dereference(p, sdata) \
wiphy_dereference(sdata->local->hw.wiphy, p)
+#define for_each_sdata_link(_local, _link) \
+ /* outer loop just to define the variables ... */ \
+ for (struct ieee80211_sub_if_data *___sdata = NULL; \
+ !___sdata; \
+ ___sdata = (void *)~0 /* always stop */) \
+ list_for_each_entry(___sdata, &(_local)->interfaces, list) \
+ if (ieee80211_sdata_running(___sdata)) \
+ for (int ___link_id = 0; \
+ ___link_id < ARRAY_SIZE(___sdata->link); \
+ ___link_id++) \
+ if ((_link = wiphy_dereference((local)->hw.wiphy, \
+ ___sdata->link[___link_id])))
+
static inline int
ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems,
struct cfg80211_rnr_elems *rnr_elems,
@@ -1330,7 +1365,8 @@ struct ieee80211_local {
bool wiphy_ciphers_allocated;
- bool use_chanctx;
+ struct cfg80211_chan_def dflt_chandef;
+ bool emulate_chanctx;
/* protects the aggregated multicast list and filter calls */
spinlock_t filter_lock;
@@ -1456,8 +1492,6 @@ struct ieee80211_local {
enum mac80211_scan_state next_scan_state;
struct wiphy_delayed_work scan_work;
struct ieee80211_sub_if_data __rcu *scan_sdata;
- /* For backward compatibility only -- do not use */
- struct cfg80211_chan_def _oper_chandef;
/* Temporary remain-on-channel for off-channel operations */
struct ieee80211_channel *tmp_channel;
@@ -1531,8 +1565,6 @@ struct ieee80211_local {
int user_power_level; /* in dBm, for all interfaces */
- enum ieee80211_smps_mode smps_mode;
-
struct work_struct restart_work;
#ifdef CONFIG_MAC80211_DEBUGFS
@@ -1559,7 +1591,7 @@ struct ieee80211_local {
/* virtual monitor interface */
struct ieee80211_sub_if_data __rcu *monitor_sdata;
- struct cfg80211_chan_def monitor_chandef;
+ struct ieee80211_chan_req monitor_chanreq;
/* extended capabilities provided by mac80211 */
u8 ext_capa[8];
@@ -1624,7 +1656,7 @@ ieee80211_get_link_sband(struct ieee80211_link_data *link)
/* this struct holds the value parsing from channel switch IE */
struct ieee80211_csa_ie {
- struct cfg80211_chan_def chandef;
+ struct ieee80211_chan_req chanreq;
u8 mode;
u8 count;
u8 ttl;
@@ -1633,6 +1665,14 @@ struct ieee80211_csa_ie {
u32 max_switch_time;
};
+enum ieee80211_elems_parse_error {
+ IEEE80211_PARSE_ERR_INVALID_END = BIT(0),
+ IEEE80211_PARSE_ERR_DUP_ELEM = BIT(1),
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE = BIT(2),
+ IEEE80211_PARSE_ERR_UNEXPECTED_ELEM = BIT(3),
+ IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC = BIT(4),
+};
+
/* Parsed Information Elements */
struct ieee802_11_elems {
const u8 *ie_start;
@@ -1727,12 +1767,6 @@ struct ieee802_11_elems {
size_t ml_basic_len;
size_t ml_reconf_len;
- /* The basic Multi-Link element in the original IEs */
- const struct element *ml_basic_elem;
-
- /* The reconfiguration Multi-Link element in the original IEs */
- const struct element *ml_reconf_elem;
-
u8 ttlm_num;
/*
@@ -1743,16 +1777,8 @@ struct ieee802_11_elems {
struct ieee80211_mle_per_sta_profile *prof;
size_t sta_prof_len;
- /* whether a parse error occurred while retrieving these elements */
- bool parse_error;
-
- /*
- * scratch buffer that can be used for various element parsing related
- * tasks, e.g., element de-fragmentation etc.
- */
- size_t scratch_len;
- u8 *scratch_pos;
- u8 scratch[] __counted_by(scratch_len);
+ /* whether/which parse error occurred while retrieving these elements */
+ u8 parse_error;
};
static inline struct ieee80211_local *hw_to_local(
@@ -1801,6 +1827,8 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
unsigned int mpdu_len,
unsigned int mpdu_offset);
int ieee80211_hw_config(struct ieee80211_local *local, u32 changed);
+int ieee80211_hw_conf_chan(struct ieee80211_local *local);
+void ieee80211_hw_conf_init(struct ieee80211_local *local);
void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
u64 changed);
@@ -2112,7 +2140,7 @@ enum ieee80211_sta_rx_bandwidth
ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta);
-void ieee80211_sta_set_rx_nss(struct link_sta_info *link_sta);
+void ieee80211_sta_init_nss(struct link_sta_info *link_sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
enum nl80211_chan_width
@@ -2166,9 +2194,8 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
* @elems: parsed 802.11 elements received with the frame
* @current_band: indicates the current band
* @vht_cap_info: VHT capabilities of the transmitter
- * @conn_flags: contains information about own capabilities and restrictions
- * to decide which channel switch announcements can be accepted, using
- * flags from &enum ieee80211_conn_flags.
+ * @conn: contains information about own capabilities and restrictions
+ * to decide which channel switch announcements can be accepted
* @bssid: the currently connected bssid (for reporting)
* @csa_ie: parsed 802.11 csa elements on count, mode, chandef and mesh ttl.
* All of them will be filled with if success only.
@@ -2178,7 +2205,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum nl80211_band current_band,
u32 vht_cap_info,
- ieee80211_conn_flags_t conn_flags, u8 *bssid,
+ struct ieee80211_conn_settings *conn,
+ u8 *bssid,
struct ieee80211_csa_ie *csa_ie);
/* Suspend/resume and hw reconfiguration */
@@ -2202,6 +2230,9 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw)
/* utility functions/constants */
extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
+const char *ieee80211_conn_mode_str(enum ieee80211_conn_mode mode);
+enum ieee80211_conn_bw_limit
+ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef);
int ieee80211_frame_duration(enum nl80211_band band, size_t len,
int rate, int erp, int short_preamble);
void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
@@ -2243,6 +2274,7 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
/**
* struct ieee80211_elems_parse_params - element parsing parameters
+ * @mode: connection mode for parsing
* @start: pointer to the elements
* @len: length of the elements
* @action: %true if the elements came from an action frame
@@ -2260,6 +2292,7 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
* for EHT capabilities parsing)
*/
struct ieee80211_elems_parse_params {
+ enum ieee80211_conn_mode mode;
const u8 *start;
size_t len;
bool action;
@@ -2279,6 +2312,7 @@ ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
struct cfg80211_bss *bss)
{
struct ieee80211_elems_parse_params params = {
+ .mode = IEEE80211_CONN_MODE_HIGHEST,
.start = start,
.len = len,
.action = action,
@@ -2408,7 +2442,6 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
const u8 *da, const u8 *bssid,
u16 stype, u16 reason,
bool send_frame, u8 *frame_buf);
-u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end);
enum {
IEEE80211_PROBE_FLAG_DIRECTED = BIT(0),
@@ -2453,32 +2486,36 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
u32 cap);
u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
const struct cfg80211_chan_def *chandef);
-u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
-u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos,
- const struct ieee80211_sta_he_cap *he_cap,
- u8 *end);
-void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
- enum ieee80211_smps_mode smps_mode,
- struct sk_buff *skb);
+u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata);
u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef);
u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef,
const struct ieee80211_sta_eht_cap *eht_cap);
int ieee80211_parse_bitrates(enum nl80211_chan_width width,
const struct ieee80211_supported_band *sband,
const u8 *srates, int srates_len, u32 *rates);
-int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic,
- enum nl80211_band band);
-int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic,
- enum nl80211_band band);
u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo);
void ieee80211_add_s1g_capab_ie(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_s1g_cap *caps,
struct sk_buff *skb);
void ieee80211_add_aid_request_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
-u8 *ieee80211_ie_build_s1g_cap(u8 *pos, struct ieee80211_sta_s1g_cap *s1g_cap);
+
+/* element building in SKBs */
+int ieee80211_put_srates_elem(struct sk_buff *skb,
+ const struct ieee80211_supported_band *sband,
+ u32 basic_rates, u32 rate_flags, u32 masked_rates,
+ u8 element_id);
+int ieee80211_put_he_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_supported_band *sband,
+ const struct ieee80211_conn_settings *conn);
+int ieee80211_put_he_6ghz_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_smps_mode smps_mode);
+int ieee80211_put_eht_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_supported_band *sband,
+ const struct ieee80211_conn_settings *conn);
/* channel management */
bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
@@ -2488,23 +2525,36 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
const struct ieee80211_ht_operation *htop,
struct cfg80211_chan_def *chandef);
void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info,
- bool support_160, bool support_320,
struct cfg80211_chan_def *chandef);
-bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
+bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_local *local,
const struct ieee80211_he_operation *he_oper,
const struct ieee80211_eht_operation *eht_oper,
struct cfg80211_chan_def *chandef);
bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper,
struct cfg80211_chan_def *chandef);
-ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
+void ieee80211_chandef_downgrade(struct cfg80211_chan_def *chandef,
+ struct ieee80211_conn_settings *conn);
+static inline void
+ieee80211_chanreq_downgrade(struct ieee80211_chan_req *chanreq,
+ struct ieee80211_conn_settings *conn)
+{
+ ieee80211_chandef_downgrade(&chanreq->oper, conn);
+ if (WARN_ON(!conn))
+ return;
+ if (conn->mode < IEEE80211_CONN_MODE_EHT)
+ chanreq->ap.chan = NULL;
+}
+
+bool ieee80211_chanreq_identical(const struct ieee80211_chan_req *a,
+ const struct ieee80211_chan_req *b);
int __must_check
ieee80211_link_use_channel(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *req,
enum ieee80211_chanctx_mode mode);
int __must_check
ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *req,
enum ieee80211_chanctx_mode mode,
bool radar_required);
int __must_check
@@ -2512,9 +2562,9 @@ ieee80211_link_use_reserved_context(struct ieee80211_link_data *link);
int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link);
int __must_check
-ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
- u64 *changed);
+ieee80211_link_change_chanreq(struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *req,
+ u64 *changed);
void ieee80211_link_release_channel(struct ieee80211_link_data *link);
void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link);
void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
@@ -2561,7 +2611,7 @@ int ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy,
struct net_device *dev,
const u8 *addr);
-void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata);
+void ieee80211_teardown_tdls_peers(struct ieee80211_link_data *link);
void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
const u8 *peer, u16 reason);
void
@@ -2589,12 +2639,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);
-u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
-u8 *ieee80211_ie_build_eht_cap(u8 *pos,
- const struct ieee80211_sta_he_cap *he_cap,
- const struct ieee80211_sta_eht_cap *eht_cap,
- u8 *end,
- bool for_ap);
+u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata);
void
ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
@@ -2603,6 +2648,12 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_eht_cap_elem *eht_cap_ie_elem,
u8 eht_cap_len,
struct link_sta_info *link_sta);
+void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len);
+void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len);
+int ieee80211_req_neg_ttlm(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_ttlm_params *params);
void ieee80211_check_wbrf_support(struct ieee80211_local *local);
void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index e4e7c0b38cb6..395de62d9cb2 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -8,7 +8,7 @@
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -511,7 +511,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
* would have removed them, but in other modes there shouldn't
* be any stations.
*/
- flushed = sta_info_flush(sdata);
+ flushed = sta_info_flush(sdata, -1);
WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && flushed > 0);
/* don't count this interface for allmulti while it is down */
@@ -544,10 +544,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
sdata->vif.bss_conf.csa_active = false;
if (sdata->vif.type == NL80211_IFTYPE_STATION)
sdata->deflink.u.mgd.csa_waiting_bcn = false;
- if (sdata->deflink.csa_block_tx) {
+ if (sdata->csa_blocked_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->deflink.csa_block_tx = false;
+ sdata->csa_blocked_tx = false;
}
wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work);
@@ -557,7 +557,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
&sdata->deflink.dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
- chandef = sdata->vif.bss_conf.chandef;
+ chandef = sdata->vif.bss_conf.chanreq.oper;
WARN_ON(local->suspended);
ieee80211_link_release_channel(&sdata->deflink);
cfg80211_cac_event(sdata->dev, &chandef,
@@ -1164,7 +1164,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
rcu_assign_pointer(local->monitor_sdata, sdata);
mutex_unlock(&local->iflist_mtx);
- ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chandef,
+ ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chanreq,
IEEE80211_CHANCTX_EXCLUSIVE);
if (ret) {
mutex_lock(&local->iflist_mtx);
@@ -1252,7 +1252,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
sdata->vif.cab_queue = master->vif.cab_queue;
memcpy(sdata->vif.hw_queue, master->vif.hw_queue,
sizeof(sdata->vif.hw_queue));
- sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef;
+ sdata->vif.bss_conf.chanreq = master->vif.bss_conf.chanreq;
sdata->crypto_tx_tailroom_needed_cnt +=
master->crypto_tx_tailroom_needed_cnt;
@@ -1288,8 +1288,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
res = drv_start(local);
if (res)
goto err_del_bss;
- /* we're brought up, everything changes */
- hw_reconf_flags = ~0;
ieee80211_led_radio(local, true);
ieee80211_mod_tpt_led_trig(local,
IEEE80211_TPT_LEDTRIG_FL_RADIO, 0);
@@ -1436,7 +1434,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
if (coming_up)
local->open_count++;
- if (hw_reconf_flags)
+ if (local->open_count == 1)
+ ieee80211_hw_conf_init(local);
+ else if (hw_reconf_flags)
ieee80211_hw_config(local, hw_reconf_flags);
ieee80211_recalc_ps(local);
@@ -1546,6 +1546,22 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
default:
break;
}
+ } else if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_EHT) {
+ if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+ switch (mgmt->u.action.u.ttlm_req.action_code) {
+ case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ:
+ ieee80211_process_neg_ttlm_req(sdata, mgmt,
+ skb->len);
+ break;
+ case WLAN_PROTECTED_EHT_ACTION_TTLM_RES:
+ ieee80211_process_neg_ttlm_res(sdata, mgmt,
+ skb->len);
+ break;
+ default:
+ break;
+ }
+ }
} else if (ieee80211_is_ext(mgmt->frame_control)) {
if (sdata->vif.type == NL80211_IFTYPE_STATION)
ieee80211_sta_rx_queued_ext(sdata, skb);
@@ -1783,7 +1799,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
/* need to do this after the switch so vif.type is correct */
ieee80211_link_setup(&sdata->deflink);
- ieee80211_debugfs_add_netdev(sdata, false);
+ ieee80211_debugfs_recreate_netdev(sdata, false);
}
static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index af74d7f9d94d..eecdd2265eaa 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -6,7 +6,7 @@
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright 2018-2020, 2022-2023 Intel Corporation
+ * Copyright 2018-2020, 2022-2024 Intel Corporation
*/
#include <crypto/utils.h>
@@ -925,6 +925,10 @@ int ieee80211_key_link(struct ieee80211_key *key,
*/
key->color = atomic_inc_return(&key_color);
+ /* keep this flag for easier access later */
+ if (sta && sta->sta.spp_amsdu)
+ key->conf.flags |= IEEE80211_KEY_FLAG_SPP_AMSDU;
+
increment_tailroom_need_count(sdata);
ret = ieee80211_key_replace(sdata, link, sta, pairwise, old_key, key);
@@ -1368,12 +1372,19 @@ EXPORT_SYMBOL_GPL(ieee80211_remove_key);
struct ieee80211_key_conf *
ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
- struct ieee80211_key_conf *keyconf)
+ struct ieee80211_key_conf *keyconf,
+ int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
struct ieee80211_key *key;
int err;
+ struct ieee80211_link_data *link_data =
+ link_id < 0 ? &sdata->deflink :
+ sdata_dereference(sdata->link[link_id], sdata);
+
+ if (WARN_ON(!link_data))
+ return ERR_PTR(-EINVAL);
if (WARN_ON(!local->wowlan))
return ERR_PTR(-EINVAL);
@@ -1390,8 +1401,9 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED)
key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
- /* FIXME: this function needs to get a link ID */
- err = ieee80211_key_link(key, &sdata->deflink, NULL);
+ key->conf.link_id = link_id;
+
+ err = ieee80211_key_link(key, link_data, NULL);
if (err)
return ERR_PTR(err);
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index d4f86955afa6..685ec66b4264 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -2,7 +2,7 @@
/*
* MLO link handling
*
- * Copyright (C) 2022-2023 Intel Corporation
+ * Copyright (C) 2022-2024 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -73,6 +73,8 @@ void ieee80211_link_stop(struct ieee80211_link_data *link)
ieee80211_mgd_stop_link(link);
cancel_delayed_work_sync(&link->color_collision_detect_work);
+ wiphy_work_cancel(link->sdata->local->hw.wiphy,
+ &link->csa_finalize_work);
ieee80211_link_release_channel(link);
}
@@ -354,7 +356,7 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
link = sdata_dereference(sdata->link[link_id], sdata);
- /* FIXME: kill TDLS connections on the link */
+ ieee80211_teardown_tdls_peers(link);
ieee80211_link_release_channel(link);
}
@@ -402,7 +404,8 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
link = sdata_dereference(sdata->link[link_id], sdata);
- ret = ieee80211_link_use_channel(link, &link->conf->chandef,
+ ret = ieee80211_link_use_channel(link,
+ &link->conf->chanreq,
IEEE80211_CHANCTX_SHARED);
WARN_ON_ONCE(ret);
@@ -444,6 +447,9 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links)
lockdep_assert_wiphy(local->hw.wiphy);
+ if (WARN_ON(!active_links))
+ return -EINVAL;
+
if (!drv_can_activate_links(local, sdata, active_links))
return -EINVAL;
@@ -472,6 +478,9 @@ void ieee80211_set_active_links_async(struct ieee80211_vif *vif,
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ if (WARN_ON(!active_links))
+ return;
+
if (!ieee80211_sdata_running(sdata))
return;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f2ece7793573..4eaea0a9975b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -93,16 +93,32 @@ static void ieee80211_reconfig_filter(struct wiphy *wiphy,
ieee80211_configure_filter(local);
}
-static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
+static u32 ieee80211_calc_hw_conf_chan(struct ieee80211_local *local,
+ struct ieee80211_chanctx_conf *ctx)
{
struct ieee80211_sub_if_data *sdata;
struct cfg80211_chan_def chandef = {};
+ struct cfg80211_chan_def *oper = NULL;
+ enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_STATIC;
u32 changed = 0;
int power;
u32 offchannel_flag;
+ if (!local->emulate_chanctx)
+ return 0;
+
offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
+ if (ctx && !WARN_ON(!ctx->def.chan)) {
+ oper = &ctx->def;
+ if (ctx->rx_chains_static > 1)
+ smps_mode = IEEE80211_SMPS_OFF;
+ else if (ctx->rx_chains_dynamic > 1)
+ smps_mode = IEEE80211_SMPS_DYNAMIC;
+ else
+ smps_mode = IEEE80211_SMPS_STATIC;
+ }
+
if (local->scan_chandef.chan) {
chandef = local->scan_chandef;
} else if (local->tmp_channel) {
@@ -110,25 +126,30 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
chandef.center_freq1 = chandef.chan->center_freq;
chandef.freq1_offset = chandef.chan->freq_offset;
- } else
- chandef = local->_oper_chandef;
+ } else if (oper) {
+ chandef = *oper;
+ } else {
+ chandef = local->dflt_chandef;
+ }
- WARN(!cfg80211_chandef_valid(&chandef),
- "control:%d.%03d MHz width:%d center: %d.%03d/%d MHz",
- chandef.chan->center_freq, chandef.chan->freq_offset,
- chandef.width, chandef.center_freq1, chandef.freq1_offset,
- chandef.center_freq2);
+ if (WARN(!cfg80211_chandef_valid(&chandef),
+ "control:%d.%03d MHz width:%d center: %d.%03d/%d MHz",
+ chandef.chan ? chandef.chan->center_freq : -1,
+ chandef.chan ? chandef.chan->freq_offset : 0,
+ chandef.width, chandef.center_freq1, chandef.freq1_offset,
+ chandef.center_freq2))
+ return 0;
- if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef))
+ if (!oper || !cfg80211_chandef_identical(&chandef, oper))
local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
else
local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
- if (offchannel_flag ||
- !cfg80211_chandef_identical(&local->hw.conf.chandef,
- &local->_oper_chandef)) {
+ /* force it also for scanning, since drivers might config differently */
+ if (offchannel_flag || local->scanning ||
+ !cfg80211_chandef_identical(&local->hw.conf.chandef, &chandef)) {
local->hw.conf.chandef = chandef;
changed |= IEEE80211_CONF_CHANGE_CHANNEL;
}
@@ -140,8 +161,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
* that otherwise STATIC is used.
*/
local->hw.conf.smps_mode = IEEE80211_SMPS_STATIC;
- } else if (local->hw.conf.smps_mode != local->smps_mode) {
- local->hw.conf.smps_mode = local->smps_mode;
+ } else if (local->hw.conf.smps_mode != smps_mode) {
+ local->hw.conf.smps_mode = smps_mode;
changed |= IEEE80211_CONF_CHANGE_SMPS;
}
@@ -173,12 +194,9 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
might_sleep();
- if (!local->use_chanctx)
- changed |= ieee80211_hw_conf_chan(local);
- else
- changed &= ~(IEEE80211_CONF_CHANGE_CHANNEL |
- IEEE80211_CONF_CHANGE_POWER |
- IEEE80211_CONF_CHANGE_SMPS);
+ WARN_ON(changed & (IEEE80211_CONF_CHANGE_CHANNEL |
+ IEEE80211_CONF_CHANGE_POWER |
+ IEEE80211_CONF_CHANGE_SMPS));
if (changed && local->open_count) {
ret = drv_config(local, changed);
@@ -202,13 +220,115 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
return ret;
}
+/* for scanning, offchannel and chanctx emulation only */
+static int _ieee80211_hw_conf_chan(struct ieee80211_local *local,
+ struct ieee80211_chanctx_conf *ctx)
+{
+ u32 changed;
+
+ if (!local->open_count)
+ return 0;
+
+ changed = ieee80211_calc_hw_conf_chan(local, ctx);
+ if (!changed)
+ return 0;
+
+ return drv_config(local, changed);
+}
+
+int ieee80211_hw_conf_chan(struct ieee80211_local *local)
+{
+ struct ieee80211_chanctx *ctx;
+
+ ctx = list_first_entry_or_null(&local->chanctx_list,
+ struct ieee80211_chanctx,
+ list);
+
+ return _ieee80211_hw_conf_chan(local, ctx ? &ctx->conf : NULL);
+}
+
+void ieee80211_hw_conf_init(struct ieee80211_local *local)
+{
+ u32 changed = ~(IEEE80211_CONF_CHANGE_CHANNEL |
+ IEEE80211_CONF_CHANGE_POWER |
+ IEEE80211_CONF_CHANGE_SMPS);
+
+ if (WARN_ON(!local->open_count))
+ return;
+
+ if (local->emulate_chanctx) {
+ struct ieee80211_chanctx *ctx;
+
+ ctx = list_first_entry_or_null(&local->chanctx_list,
+ struct ieee80211_chanctx,
+ list);
+
+ changed |= ieee80211_calc_hw_conf_chan(local,
+ ctx ? &ctx->conf : NULL);
+ }
+
+ WARN_ON(drv_config(local, changed));
+}
+
+int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ local->hw.conf.radar_enabled = ctx->radar_enabled;
+
+ return _ieee80211_hw_conf_chan(local, ctx);
+}
+EXPORT_SYMBOL(ieee80211_emulate_add_chanctx);
+
+void ieee80211_emulate_remove_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ local->hw.conf.radar_enabled = false;
+
+ _ieee80211_hw_conf_chan(local, NULL);
+}
+EXPORT_SYMBOL(ieee80211_emulate_remove_chanctx);
+
+void ieee80211_emulate_change_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx,
+ u32 changed)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ local->hw.conf.radar_enabled = ctx->radar_enabled;
+
+ _ieee80211_hw_conf_chan(local, ctx);
+}
+EXPORT_SYMBOL(ieee80211_emulate_change_chanctx);
+
+int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs,
+ enum ieee80211_chanctx_switch_mode mode)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ if (n_vifs <= 0)
+ return -EINVAL;
+
+ local->hw.conf.radar_enabled = vifs[0].new_ctx->radar_enabled;
+ _ieee80211_hw_conf_chan(local, vifs[0].new_ctx);
+
+ return 0;
+}
+EXPORT_SYMBOL(ieee80211_emulate_switch_vif_chanctx);
+
#define BSS_CHANGED_VIF_CFG_FLAGS (BSS_CHANGED_ASSOC |\
BSS_CHANGED_IDLE |\
BSS_CHANGED_PS |\
BSS_CHANGED_IBSS |\
BSS_CHANGED_ARP_FILTER |\
BSS_CHANGED_SSID |\
- BSS_CHANGED_MLD_VALID_LINKS)
+ BSS_CHANGED_MLD_VALID_LINKS |\
+ BSS_CHANGED_MLD_TTLM)
void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
u64 changed)
@@ -644,7 +764,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
struct ieee80211_local *local;
int priv_size, i;
struct wiphy *wiphy;
- bool use_chanctx;
+ bool emulate_chanctx;
if (WARN_ON(!ops->tx || !ops->start || !ops->stop || !ops->config ||
!ops->add_interface || !ops->remove_interface ||
@@ -659,12 +779,26 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
return NULL;
/* check all or no channel context operations exist */
- i = !!ops->add_chanctx + !!ops->remove_chanctx +
- !!ops->change_chanctx + !!ops->assign_vif_chanctx +
- !!ops->unassign_vif_chanctx;
- if (WARN_ON(i != 0 && i != 5))
- return NULL;
- use_chanctx = i == 5;
+ if (ops->add_chanctx == ieee80211_emulate_add_chanctx &&
+ ops->remove_chanctx == ieee80211_emulate_remove_chanctx &&
+ ops->change_chanctx == ieee80211_emulate_change_chanctx) {
+ if (WARN_ON(ops->assign_vif_chanctx ||
+ ops->unassign_vif_chanctx))
+ return NULL;
+ emulate_chanctx = true;
+ } else {
+ if (WARN_ON(ops->add_chanctx == ieee80211_emulate_add_chanctx ||
+ ops->remove_chanctx == ieee80211_emulate_remove_chanctx ||
+ ops->change_chanctx == ieee80211_emulate_change_chanctx))
+ return NULL;
+ if (WARN_ON(!ops->add_chanctx ||
+ !ops->remove_chanctx ||
+ !ops->change_chanctx ||
+ !ops->assign_vif_chanctx ||
+ !ops->unassign_vif_chanctx))
+ return NULL;
+ emulate_chanctx = false;
+ }
/* Ensure 32-byte alignment of our private data and hw private data.
* We use the wiphy priv data for both our ieee80211_local and for
@@ -698,7 +832,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
WIPHY_FLAG_REPORTS_OBSS |
WIPHY_FLAG_OFFCHAN_TX;
- if (!use_chanctx || ops->remain_on_channel)
+ if (emulate_chanctx || ops->remain_on_channel)
wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
wiphy->features |= NL80211_FEATURE_SK_TX_STATUS |
@@ -734,8 +868,11 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT);
}
- if (!ops->set_key)
+ if (!ops->set_key) {
wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
+ wiphy_ext_feature_set(wiphy,
+ NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT);
+ }
wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_TXQS);
wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_RRM);
@@ -752,7 +889,10 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN);
local->ops = ops;
- local->use_chanctx = use_chanctx;
+ local->emulate_chanctx = emulate_chanctx;
+
+ if (emulate_chanctx)
+ ieee80211_hw_set(&local->hw, CHANCTX_STA_CSA);
/*
* We need a bit of data queued to build aggregates properly, so
@@ -829,7 +969,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
ieee80211_dfs_radar_detected_work);
wiphy_work_init(&local->reconfig_filter, ieee80211_reconfig_filter);
- local->smps_mode = IEEE80211_SMPS_OFF;
wiphy_work_init(&local->dynamic_ps_enable_work,
ieee80211_dynamic_ps_enable_work);
@@ -980,7 +1119,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
* as much, e.g. monitoring beacons would be hard if we
* might not even know which link is active at which time.
*/
- if (WARN_ON(!local->use_chanctx))
+ if (WARN_ON(local->emulate_chanctx))
return -EINVAL;
if (WARN_ON(!local->ops->link_info_changed))
@@ -1024,7 +1163,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
return -EINVAL;
#endif
- if (!local->use_chanctx) {
+ if (local->emulate_chanctx) {
for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) {
const struct ieee80211_iface_combination *comb;
@@ -1090,11 +1229,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
&sband->channels[i],
NL80211_CHAN_NO_HT);
/* init channel we're on */
- if (!local->use_chanctx && !local->_oper_chandef.chan) {
+ local->monitor_chanreq.oper = dflt_chandef;
+ if (local->emulate_chanctx) {
+ local->dflt_chandef = dflt_chandef;
local->hw.conf.chandef = dflt_chandef;
- local->_oper_chandef = dflt_chandef;
}
- local->monitor_chandef = dflt_chandef;
}
channels += sband->n_channels;
@@ -1115,8 +1254,26 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
supp_vht = supp_vht || sband->vht_cap.vht_supported;
for_each_sband_iftype_data(sband, i, iftd) {
+ u8 he_40_mhz_cap;
+
supp_he = supp_he || iftd->he_cap.has_he;
supp_eht = supp_eht || iftd->eht_cap.has_eht;
+
+ if (band == NL80211_BAND_2GHZ)
+ he_40_mhz_cap =
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G;
+ else
+ he_40_mhz_cap =
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G;
+
+ /* currently no support for HE client where HT has 40 MHz but not HT */
+ if (iftd->he_cap.has_he &&
+ iftd->types_mask & (BIT(NL80211_IFTYPE_STATION) |
+ BIT(NL80211_IFTYPE_P2P_CLIENT)) &&
+ sband->ht_cap.ht_supported &&
+ sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 &&
+ !(iftd->he_cap.he_cap_elem.phy_cap_info[0] & he_40_mhz_cap))
+ return -EINVAL;
}
/* HT, VHT, HE require QoS, thus >= 4 queues */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index fccbcde3359a..32475da98d73 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
* Authors: Luis Carlos Cobo <luisca@cozybit.com>
* Javier Cardona <javier@cozybit.com>
*/
@@ -97,7 +97,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
if (sdata->vif.bss_conf.basic_rates != basic_rates)
return false;
- cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chandef.chan,
+ cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chanreq.oper.chan,
NL80211_CHAN_NO_HT);
ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def);
@@ -107,10 +107,11 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
ie->vht_operation, ie->ht_operation,
&sta_chan_def);
- ieee80211_chandef_he_6ghz_oper(sdata, ie->he_operation, ie->eht_operation,
+ ieee80211_chandef_he_6ghz_oper(sdata->local, ie->he_operation,
+ ie->eht_operation,
&sta_chan_def);
- if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
+ if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chanreq.oper,
&sta_chan_def))
return false;
@@ -435,9 +436,9 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
return 0;
if (!sband->ht_cap.ht_supported ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap))
@@ -476,16 +477,16 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
return 0;
if (!ht_cap->ht_supported ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_operation))
return -ENOMEM;
pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
- ieee80211_ie_build_ht_oper(pos, ht_cap, &sdata->vif.bss_conf.chandef,
+ ieee80211_ie_build_ht_oper(pos, ht_cap, &sdata->vif.bss_conf.chanreq.oper,
sdata->vif.bss_conf.ht_operation_mode,
false);
@@ -507,9 +508,9 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
return 0;
if (!sband->vht_cap.vht_supported ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_cap))
@@ -548,9 +549,9 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
return 0;
if (!vht_cap->vht_supported ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_operation))
@@ -558,7 +559,7 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
ieee80211_ie_build_vht_oper(pos, vht_cap,
- &sdata->vif.bss_conf.chandef);
+ &sdata->vif.bss_conf.chanreq.oper);
return 0;
}
@@ -566,29 +567,18 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u8 ie_len)
{
- const struct ieee80211_sta_he_cap *he_cap;
struct ieee80211_supported_band *sband;
- u8 *pos;
sband = ieee80211_get_sband(sdata);
if (!sband)
return -EINVAL;
- he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
-
- if (!he_cap ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ if (sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
- if (skb_tailroom(skb) < ie_len)
- return -ENOMEM;
-
- pos = skb_put(skb, ie_len);
- ieee80211_ie_build_he_cap(0, pos, he_cap, pos + ie_len);
-
- return 0;
+ return ieee80211_put_he_cap(skb, sdata, sband, NULL);
}
int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
@@ -605,20 +595,20 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
if (!he_cap ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
len = 2 + 1 + sizeof(struct ieee80211_he_operation);
- if (sdata->vif.bss_conf.chandef.chan->band == NL80211_BAND_6GHZ)
+ if (sdata->vif.bss_conf.chanreq.oper.chan->band == NL80211_BAND_6GHZ)
len += sizeof(struct ieee80211_he_6ghz_oper);
if (skb_tailroom(skb) < len)
return -ENOMEM;
pos = skb_put(skb, len);
- ieee80211_ie_build_he_oper(pos, &sdata->vif.bss_conf.chandef);
+ ieee80211_ie_build_he_oper(pos, &sdata->vif.bss_conf.chanreq.oper);
return 0;
}
@@ -639,37 +629,25 @@ int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata,
if (!iftd)
return 0;
- ieee80211_ie_build_he_6ghz_cap(sdata, sdata->deflink.smps_mode, skb);
+ ieee80211_put_he_6ghz_cap(skb, sdata, sdata->deflink.smps_mode);
return 0;
}
int mesh_add_eht_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u8 ie_len)
{
- const struct ieee80211_sta_he_cap *he_cap;
- const struct ieee80211_sta_eht_cap *eht_cap;
struct ieee80211_supported_band *sband;
- u8 *pos;
sband = ieee80211_get_sband(sdata);
if (!sband)
return -EINVAL;
- he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
- eht_cap = ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
- if (!he_cap || !eht_cap ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ if (sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
- if (skb_tailroom(skb) < ie_len)
- return -ENOMEM;
-
- pos = skb_put(skb, ie_len);
- ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + ie_len, false);
-
- return 0;
+ return ieee80211_put_eht_cap(skb, sdata, sband, NULL);
}
int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
@@ -685,9 +663,9 @@ int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *sk
eht_cap = ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
if (!eht_cap ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
len = 2 + 1 + offsetof(struct ieee80211_eht_operation, optional) +
@@ -697,7 +675,7 @@ int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *sk
return -ENOMEM;
pos = skb_put(skb, len);
- ieee80211_ie_build_eht_oper(pos, &sdata->vif.bss_conf.chandef, eht_cap);
+ ieee80211_ie_build_eht_oper(pos, &sdata->vif.bss_conf.chanreq.oper, eht_cap);
return 0;
}
@@ -745,9 +723,9 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata,
return;
if (!ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT) ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return;
sdata->vif.bss_conf.he_support = true;
@@ -966,24 +944,22 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
int head_len, tail_len;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
- struct ieee80211_chanctx_conf *chanctx_conf;
struct mesh_csa_settings *csa;
- enum nl80211_band band;
+ const struct ieee80211_supported_band *sband;
u8 ie_len_he_cap, ie_len_eht_cap;
u8 *pos;
struct ieee80211_sub_if_data *sdata;
int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon);
+ u32 rate_flags;
sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
- band = chanctx_conf->def.chan->band;
- rcu_read_unlock();
- ie_len_he_cap = ieee80211_ie_len_he_cap(sdata,
- NL80211_IFTYPE_MESH_POINT);
- ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata,
- NL80211_IFTYPE_MESH_POINT);
+ sband = ieee80211_get_sband(sdata);
+ rate_flags =
+ ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper);
+
+ ie_len_he_cap = ieee80211_ie_len_he_cap(sdata);
+ ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata);
head_len = hdr_len +
2 + /* NULL SSID */
/* Channel Switch Announcement */
@@ -1107,7 +1083,9 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
}
rcu_read_unlock();
- if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
+ if (ieee80211_put_srates_elem(skb, sband,
+ sdata->vif.bss_conf.basic_rates,
+ rate_flags, 0, WLAN_EID_SUPP_RATES) ||
mesh_add_ds_params_ie(sdata, skb))
goto out_free;
@@ -1118,7 +1096,9 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
skb_trim(skb, 0);
bcn->tail = bcn->head + bcn->head_len;
- if (ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
+ if (ieee80211_put_srates_elem(skb, sband,
+ sdata->vif.bss_conf.basic_rates,
+ rate_flags, 0, WLAN_EID_EXT_SUPP_RATES) ||
mesh_add_rsn_ie(sdata, skb) ||
mesh_add_ht_cap_ie(sdata, skb) ||
mesh_add_ht_oper_ie(sdata, skb) ||
@@ -1234,7 +1214,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
netif_carrier_off(sdata->dev);
/* flush STAs and mpaths on this iface */
- sta_info_flush(sdata);
+ sta_info_flush(sdata, -1);
ieee80211_free_keys(sdata, true);
mesh_path_flush_by_iface(sdata);
@@ -1276,11 +1256,12 @@ static void ieee80211_mesh_csa_mark_radar(struct ieee80211_sub_if_data *sdata)
* unavailable.
*/
err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
- &sdata->vif.bss_conf.chandef,
+ &sdata->vif.bss_conf.chanreq.oper,
NL80211_IFTYPE_MESH_POINT);
if (err > 0)
cfg80211_radar_event(sdata->local->hw.wiphy,
- &sdata->vif.bss_conf.chandef, GFP_ATOMIC);
+ &sdata->vif.bss_conf.chanreq.oper,
+ GFP_ATOMIC);
}
static bool
@@ -1292,7 +1273,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct ieee80211_supported_band *sband;
int err;
- ieee80211_conn_flags_t conn_flags = 0;
+ struct ieee80211_conn_settings conn = ieee80211_conn_settings_unlimited;
u32 vht_cap_info = 0;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
@@ -1301,15 +1282,18 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
if (!sband)
return false;
- switch (sdata->vif.bss_conf.chandef.width) {
+ switch (sdata->vif.bss_conf.chanreq.oper.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
- fallthrough;
+ conn.mode = IEEE80211_CONN_MODE_LEGACY;
+ conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ break;
case NL80211_CHAN_WIDTH_20:
- conn_flags |= IEEE80211_CONN_DISABLE_40MHZ;
- fallthrough;
+ conn.mode = IEEE80211_CONN_MODE_HT;
+ conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ break;
case NL80211_CHAN_WIDTH_40:
- conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ conn.mode = IEEE80211_CONN_MODE_HT;
+ conn.bw_limit = IEEE80211_CONN_BW_LIMIT_40;
break;
default:
break;
@@ -1321,8 +1305,8 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
memset(&params, 0, sizeof(params));
err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band,
- vht_cap_info,
- conn_flags, sdata->vif.addr,
+ vht_cap_info, &conn,
+ sdata->vif.addr,
&csa_ie);
if (err < 0)
return false;
@@ -1335,7 +1319,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
if (csa_ie.reason_code == WLAN_REASON_MESH_CHAN_REGULATORY)
ieee80211_mesh_csa_mark_radar(sdata);
- params.chandef = csa_ie.chandef;
+ params.chandef = csa_ie.chanreq.oper;
params.count = csa_ie.count;
if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, &params.chandef,
@@ -1371,7 +1355,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
params.radar_required = err;
if (cfg80211_chandef_identical(&params.chandef,
- &sdata->vif.bss_conf.chandef)) {
+ &sdata->vif.bss_conf.chanreq.oper)) {
mcsa_dbg(sdata,
"received csa with an identical chandef, ignoring\n");
return true;
@@ -1551,7 +1535,7 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed)
*changed |= BSS_CHANGED_BEACON;
mcsa_dbg(sdata, "complete switching to center freq %d MHz",
- sdata->vif.bss_conf.chandef.chan->center_freq);
+ sdata->vif.bss_conf.chanreq.oper.chan->center_freq);
return 0;
}
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index ad8469293d71..d913ce7ba72e 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2023 Intel Corporation
+ * Copyright (C) 2023-2024 Intel Corporation
* Authors: Luis Carlos Cobo <luisca@cozybit.com>
* Javier Cardona <javier@cozybit.com>
*/
@@ -94,6 +94,7 @@ enum mesh_deferred_task_flags {
* @is_root: the destination station of this path is a root node
* @is_gate: the destination station of this path is a mesh gate
* @path_change_count: the number of path changes to destination
+ * @fast_tx_check: timestamp of last fast-xmit enable attempt
*
*
* The dst address is unique in the mesh path table. Since the mesh_path is
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 735edde1bd81..91b55d6a68b9 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -600,11 +600,10 @@ unlock_sta:
void mesh_fast_tx_gc(struct ieee80211_sub_if_data *sdata)
{
unsigned long timeout = msecs_to_jiffies(MESH_FAST_TX_CACHE_TIMEOUT);
- struct mesh_tx_cache *cache;
+ struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache;
struct ieee80211_mesh_fast_tx *entry;
struct hlist_node *n;
- cache = &sdata->u.mesh.tx_cache;
if (atomic_read(&cache->rht.nelems) < MESH_FAST_TX_CACHE_THRESHOLD_SIZE)
return;
@@ -622,7 +621,6 @@ void mesh_fast_tx_flush_mpath(struct mesh_path *mpath)
struct ieee80211_mesh_fast_tx *entry;
struct hlist_node *n;
- cache = &sdata->u.mesh.tx_cache;
spin_lock_bh(&cache->walk_lock);
hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list)
if (entry->mpath == mpath)
@@ -637,7 +635,6 @@ void mesh_fast_tx_flush_sta(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mesh_fast_tx *entry;
struct hlist_node *n;
- cache = &sdata->u.mesh.tx_cache;
spin_lock_bh(&cache->walk_lock);
hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list)
if (rcu_access_pointer(entry->mpath->next_hop) == sta)
@@ -651,7 +648,6 @@ void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata,
struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache;
struct ieee80211_mesh_fast_tx *entry;
- cache = &sdata->u.mesh.tx_cache;
spin_lock_bh(&cache->walk_lock);
entry = rhashtable_lookup_fast(&cache->rht, addr, fast_tx_rht_params);
if (entry)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 28bf794f67f8..8f2b492a9fe9 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2019, 2021-2023 Intel Corporation
+ * Copyright (C) 2019, 2021-2024 Intel Corporation
* Author: Luis Carlos Cobo <luisca@cozybit.com>
*/
#include <linux/gfp.h>
@@ -163,7 +163,7 @@ static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
u16 ht_opmode;
bool non_ht_sta = false, ht20_sta = false;
- switch (sdata->vif.bss_conf.chandef.width) {
+ switch (sdata->vif.bss_conf.chanreq.oper.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
@@ -196,7 +196,7 @@ static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
if (non_ht_sta)
ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED;
else if (ht20_sta &&
- sdata->vif.bss_conf.chandef.width > NL80211_CHAN_WIDTH_20)
+ sdata->vif.bss_conf.chanreq.oper.width > NL80211_CHAN_WIDTH_20)
ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_20MHZ;
else
ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONE;
@@ -226,10 +226,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
int err = -ENOMEM;
- ie_len_he_cap = ieee80211_ie_len_he_cap(sdata,
- NL80211_IFTYPE_MESH_POINT);
- ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata,
- NL80211_IFTYPE_MESH_POINT);
+ ie_len_he_cap = ieee80211_ie_len_he_cap(sdata);
+ ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata);
skb = dev_alloc_skb(local->tx_headroom +
hdr_len +
2 + /* capability info */
@@ -266,14 +264,13 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
if (action != WLAN_SP_MESH_PEERING_CLOSE) {
struct ieee80211_supported_band *sband;
- enum nl80211_band band;
+ u32 rate_flags, basic_rates;
sband = ieee80211_get_sband(sdata);
if (!sband) {
err = -EINVAL;
goto free;
}
- band = sband->band;
/* capability info */
pos = skb_put_zero(skb, 2);
@@ -282,8 +279,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, 2);
put_unaligned_le16(sta->sta.aid, pos);
}
- if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
- ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
+
+ rate_flags =
+ ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper);
+ basic_rates = sdata->vif.bss_conf.basic_rates;
+
+ if (ieee80211_put_srates_elem(skb, sband, basic_rates,
+ rate_flags, 0,
+ WLAN_EID_SUPP_RATES) ||
+ ieee80211_put_srates_elem(skb, sband, basic_rates,
+ rate_flags, 0,
+ WLAN_EID_EXT_SUPP_RATES) ||
mesh_add_rsn_ie(sdata, skb) ||
mesh_add_meshid_ie(sdata, skb) ||
mesh_add_meshconf_ie(sdata, skb))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 073105deb424..47a2cba8313f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
#include <linux/delay.h>
@@ -46,6 +46,8 @@
#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100)
#define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00
+#define IEEE80211_NEG_TTLM_REQ_TIMEOUT (HZ / 5)
+
static int max_nullfunc_tries = 2;
module_param(max_nullfunc_tries, int, 0644);
MODULE_PARM_DESC(max_nullfunc_tries,
@@ -92,84 +94,6 @@ MODULE_PARM_DESC(probe_wait_ms,
#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
/*
- * Extract from the given disabled subchannel bitmap (raw format
- * from the EHT Operation Element) the bits for the subchannel
- * we're using right now.
- */
-static u16
-ieee80211_extract_dis_subch_bmap(const struct ieee80211_eht_operation *eht_oper,
- struct cfg80211_chan_def *chandef, u16 bitmap)
-{
- struct ieee80211_eht_operation_info *info = (void *)eht_oper->optional;
- struct cfg80211_chan_def ap_chandef = *chandef;
- u32 ap_center_freq, local_center_freq;
- u32 ap_bw, local_bw;
- int ap_start_freq, local_start_freq;
- u16 shift, mask;
-
- if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) ||
- !(eht_oper->params &
- IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT))
- return 0;
-
- /* set 160/320 supported to get the full AP definition */
- ieee80211_chandef_eht_oper((const void *)eht_oper->optional,
- true, true, &ap_chandef);
- ap_center_freq = ap_chandef.center_freq1;
- ap_bw = 20 * BIT(u8_get_bits(info->control,
- IEEE80211_EHT_OPER_CHAN_WIDTH));
- ap_start_freq = ap_center_freq - ap_bw / 2;
- local_center_freq = chandef->center_freq1;
- local_bw = 20 * BIT(ieee80211_chan_width_to_rx_bw(chandef->width));
- local_start_freq = local_center_freq - local_bw / 2;
- shift = (local_start_freq - ap_start_freq) / 20;
- mask = BIT(local_bw / 20) - 1;
-
- return (bitmap >> shift) & mask;
-}
-
-/*
- * Handle the puncturing bitmap, possibly downgrading bandwidth to get a
- * valid bitmap.
- */
-static void
-ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link,
- const struct ieee80211_eht_operation *eht_oper,
- u16 bitmap, u64 *changed)
-{
- struct cfg80211_chan_def *chandef = &link->conf->chandef;
- struct ieee80211_local *local = link->sdata->local;
- u16 extracted;
- u64 _changed = 0;
-
- if (!changed)
- changed = &_changed;
-
- while (chandef->width > NL80211_CHAN_WIDTH_40) {
- extracted =
- ieee80211_extract_dis_subch_bmap(eht_oper, chandef,
- bitmap);
-
- if (cfg80211_valid_disable_subchannel_bitmap(&bitmap,
- chandef) &&
- !(bitmap && ieee80211_hw_check(&local->hw,
- DISALLOW_PUNCTURING)))
- break;
- link->u.mgd.conn_flags |=
- ieee80211_chandef_downgrade(chandef);
- *changed |= BSS_CHANGED_BANDWIDTH;
- }
-
- if (chandef->width <= NL80211_CHAN_WIDTH_40)
- extracted = 0;
-
- if (link->conf->eht_puncturing != extracted) {
- link->conf->eht_puncturing = extracted;
- *changed |= BSS_CHANGED_EHT_PUNCTURING;
- }
-}
-
-/*
* We can have multiple work items (and connection probing)
* scheduling this timer, but we need to take care to only
* reschedule it when it should fire _earlier_ than it was
@@ -223,77 +147,84 @@ static int ecw2cw(int ecw)
return (1 << ecw) - 1;
}
-static ieee80211_conn_flags_t
-ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_link_data *link,
- ieee80211_conn_flags_t conn_flags,
- struct ieee80211_supported_band *sband,
- struct ieee80211_channel *channel,
- u32 vht_cap_info,
- const struct ieee80211_ht_operation *ht_oper,
- const struct ieee80211_vht_operation *vht_oper,
- const struct ieee80211_he_operation *he_oper,
- const struct ieee80211_eht_operation *eht_oper,
- const struct ieee80211_s1g_oper_ie *s1g_oper,
- struct cfg80211_chan_def *chandef, bool tracking)
+static enum ieee80211_conn_mode
+ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
+ u32 vht_cap_info,
+ const struct ieee802_11_elems *elems,
+ bool ignore_ht_channel_mismatch,
+ const struct ieee80211_conn_settings *conn,
+ struct cfg80211_chan_def *chandef)
{
+ const struct ieee80211_ht_operation *ht_oper = elems->ht_operation;
+ const struct ieee80211_vht_operation *vht_oper = elems->vht_operation;
+ const struct ieee80211_he_operation *he_oper = elems->he_operation;
+ const struct ieee80211_eht_operation *eht_oper = elems->eht_operation;
+ struct ieee80211_supported_band *sband =
+ sdata->local->hw.wiphy->bands[channel->band];
struct cfg80211_chan_def vht_chandef;
- struct ieee80211_sta_ht_cap sta_ht_cap;
- ieee80211_conn_flags_t ret;
+ bool no_vht = false;
u32 ht_cfreq;
- memset(chandef, 0, sizeof(struct cfg80211_chan_def));
- chandef->chan = channel;
- chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
- chandef->center_freq1 = channel->center_freq;
- chandef->freq1_offset = channel->freq_offset;
+ *chandef = (struct cfg80211_chan_def) {
+ .chan = channel,
+ .width = NL80211_CHAN_WIDTH_20_NOHT,
+ .center_freq1 = channel->center_freq,
+ .freq1_offset = channel->freq_offset,
+ };
- if (channel->band == NL80211_BAND_6GHZ) {
- if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, eht_oper,
- chandef)) {
- mlme_dbg(sdata,
- "bad 6 GHz operation, disabling HT/VHT/HE/EHT\n");
- ret = IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- } else {
- ret = 0;
- }
- vht_chandef = *chandef;
- goto out;
- } else if (sband->band == NL80211_BAND_S1GHZ) {
- if (!ieee80211_chandef_s1g_oper(s1g_oper, chandef)) {
+ /* get special S1G case out of the way */
+ if (sband->band == NL80211_BAND_S1GHZ) {
+ if (!ieee80211_chandef_s1g_oper(elems->s1g_oper, chandef)) {
sdata_info(sdata,
"Missing S1G Operation Element? Trying operating == primary\n");
chandef->width = ieee80211_s1g_channel_width(channel);
}
- ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_40MHZ |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_80P80MHZ |
- IEEE80211_CONN_DISABLE_160MHZ;
- goto out;
+ return IEEE80211_CONN_MODE_S1G;
}
- memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
- ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+ /* get special 6 GHz case out of the way */
+ if (sband->band == NL80211_BAND_6GHZ) {
+ enum ieee80211_conn_mode mode = IEEE80211_CONN_MODE_EHT;
- if (!ht_oper || !sta_ht_cap.ht_supported) {
- mlme_dbg(sdata, "HT operation missing / HT not supported\n");
- ret = IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- goto out;
+ /* this is an error */
+ if (conn->mode < IEEE80211_CONN_MODE_HE)
+ return IEEE80211_CONN_MODE_LEGACY;
+
+ if (!elems->he_6ghz_capa || !elems->he_cap) {
+ sdata_info(sdata,
+ "HE 6 GHz AP is missing HE/HE 6 GHz band capability\n");
+ return IEEE80211_CONN_MODE_LEGACY;
+ }
+
+ if (!eht_oper || !elems->eht_cap) {
+ eht_oper = NULL;
+ mode = IEEE80211_CONN_MODE_HE;
+ }
+
+ if (!ieee80211_chandef_he_6ghz_oper(sdata->local, he_oper,
+ eht_oper, chandef)) {
+ sdata_info(sdata, "bad HE/EHT 6 GHz operation\n");
+ return IEEE80211_CONN_MODE_LEGACY;
+ }
+
+ return mode;
}
+ /* now we have the progression HT, VHT, ... */
+ if (conn->mode < IEEE80211_CONN_MODE_HT)
+ return IEEE80211_CONN_MODE_LEGACY;
+
+ if (!ht_oper || !elems->ht_cap_elem)
+ return IEEE80211_CONN_MODE_LEGACY;
+
chandef->width = NL80211_CHAN_WIDTH_20;
ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
channel->band);
/* check that channel matches the right operating channel */
- if (!tracking && channel->center_freq != ht_cfreq) {
+ if (!ignore_ht_channel_mismatch && channel->center_freq != ht_cfreq) {
/*
* It's possible that some APs are confused here;
* Netgear WNDR3700 sometimes reports 4 higher than
@@ -305,36 +236,22 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
"Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
channel->center_freq, ht_cfreq,
ht_oper->primary_chan, channel->band);
- ret = IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- goto out;
+ return IEEE80211_CONN_MODE_LEGACY;
}
- /* check 40 MHz support, if we have it */
- if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) {
- ieee80211_chandef_ht_oper(ht_oper, chandef);
- } else {
- mlme_dbg(sdata, "40 MHz not supported\n");
- /* 40 MHz (and 80 MHz) must be supported for VHT */
- ret = IEEE80211_CONN_DISABLE_VHT;
- /* also mark 40 MHz disabled */
- ret |= IEEE80211_CONN_DISABLE_40MHZ;
- goto out;
- }
+ ieee80211_chandef_ht_oper(ht_oper, chandef);
- if (!vht_oper || !sband->vht_cap.vht_supported) {
- mlme_dbg(sdata, "VHT operation missing / VHT not supported\n");
- ret = IEEE80211_CONN_DISABLE_VHT;
- goto out;
- }
+ if (conn->mode < IEEE80211_CONN_MODE_VHT)
+ return IEEE80211_CONN_MODE_HT;
vht_chandef = *chandef;
- if (!(conn_flags & IEEE80211_CONN_DISABLE_HE) &&
- he_oper &&
- (le32_to_cpu(he_oper->he_oper_params) &
- IEEE80211_HE_OPERATION_VHT_OPER_INFO)) {
+
+ /*
+ * having he_cap/he_oper parsed out implies we're at
+ * least operating as HE STA
+ */
+ if (elems->he_cap && he_oper &&
+ he_oper->he_oper_params & cpu_to_le32(IEEE80211_HE_OPERATION_VHT_OPER_INFO)) {
struct ieee80211_vht_operation he_oper_vht_cap;
/*
@@ -347,253 +264,614 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
&he_oper_vht_cap, ht_oper,
&vht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_HE))
- sdata_info(sdata,
- "HE AP VHT information is invalid, disabling HE\n");
- ret = IEEE80211_CONN_DISABLE_HE | IEEE80211_CONN_DISABLE_EHT;
- goto out;
+ sdata_info(sdata,
+ "HE AP VHT information is invalid, disabling HE\n");
+ /* this will cause us to re-parse as VHT STA */
+ return IEEE80211_CONN_MODE_VHT;
+ }
+ } else if (!vht_oper || !elems->vht_cap_elem) {
+ if (sband->band == NL80211_BAND_5GHZ) {
+ sdata_info(sdata,
+ "VHT information is missing, disabling VHT\n");
+ return IEEE80211_CONN_MODE_HT;
}
+ no_vht = true;
+ } else if (sband->band == NL80211_BAND_2GHZ) {
+ no_vht = true;
} else if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
vht_cap_info,
vht_oper, ht_oper,
&vht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT))
- sdata_info(sdata,
- "AP VHT information is invalid, disabling VHT\n");
- ret = IEEE80211_CONN_DISABLE_VHT;
- goto out;
+ sdata_info(sdata,
+ "AP VHT information is invalid, disabling VHT\n");
+ return IEEE80211_CONN_MODE_HT;
}
- if (!cfg80211_chandef_valid(&vht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT))
- sdata_info(sdata,
- "AP VHT information is invalid, disabling VHT\n");
- ret = IEEE80211_CONN_DISABLE_VHT;
- goto out;
+ if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
+ sdata_info(sdata,
+ "AP VHT information doesn't match HT, disabling VHT\n");
+ return IEEE80211_CONN_MODE_HT;
}
- if (cfg80211_chandef_identical(chandef, &vht_chandef)) {
- ret = 0;
- goto out;
- }
+ *chandef = vht_chandef;
- if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT))
- sdata_info(sdata,
- "AP VHT information doesn't match HT, disabling VHT\n");
- ret = IEEE80211_CONN_DISABLE_VHT;
- goto out;
+ /* stick to current max mode if we or the AP don't have HE */
+ if (conn->mode < IEEE80211_CONN_MODE_HE ||
+ !elems->he_operation || !elems->he_cap) {
+ if (no_vht)
+ return IEEE80211_CONN_MODE_HT;
+ return IEEE80211_CONN_MODE_VHT;
}
- *chandef = vht_chandef;
+ /* stick to HE if we or the AP don't have EHT */
+ if (conn->mode < IEEE80211_CONN_MODE_EHT ||
+ !eht_oper || !elems->eht_cap)
+ return IEEE80211_CONN_MODE_HE;
/*
* handle the case that the EHT operation indicates that it holds EHT
* operation information (in case that the channel width differs from
* the channel width reported in HT/VHT/HE).
*/
- if (eht_oper && (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) {
+ if (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) {
struct cfg80211_chan_def eht_chandef = *chandef;
ieee80211_chandef_eht_oper((const void *)eht_oper->optional,
- eht_chandef.width ==
- NL80211_CHAN_WIDTH_160,
- false, &eht_chandef);
+ &eht_chandef);
+
+ eht_chandef.punctured =
+ ieee80211_eht_oper_dis_subchan_bitmap(eht_oper);
if (!cfg80211_chandef_valid(&eht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_EHT))
- sdata_info(sdata,
- "AP EHT information is invalid, disabling EHT\n");
- ret = IEEE80211_CONN_DISABLE_EHT;
- goto out;
+ sdata_info(sdata,
+ "AP EHT information is invalid, disabling EHT\n");
+ return IEEE80211_CONN_MODE_HE;
}
if (!cfg80211_chandef_compatible(chandef, &eht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_EHT))
- sdata_info(sdata,
- "AP EHT information is incompatible, disabling EHT\n");
- ret = IEEE80211_CONN_DISABLE_EHT;
- goto out;
+ sdata_info(sdata,
+ "AP EHT information doesn't match HT/VHT/HE, disabling EHT\n");
+ return IEEE80211_CONN_MODE_HE;
}
*chandef = eht_chandef;
}
- ret = 0;
+ return IEEE80211_CONN_MODE_EHT;
+}
+
+static bool
+ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_he_cap_elem *he_cap,
+ const struct ieee80211_he_operation *he_op)
+{
+ struct ieee80211_he_mcs_nss_supp *he_mcs_nss_supp;
+ u16 mcs_80_map_tx, mcs_80_map_rx;
+ u16 ap_min_req_set;
+ int nss;
+
+ if (!he_cap)
+ return false;
+
+ /* mcs_nss is right after he_cap info */
+ he_mcs_nss_supp = (void *)(he_cap + 1);
+
+ mcs_80_map_tx = le16_to_cpu(he_mcs_nss_supp->tx_mcs_80);
+ mcs_80_map_rx = le16_to_cpu(he_mcs_nss_supp->rx_mcs_80);
+
+ /* P802.11-REVme/D0.3
+ * 27.1.1 Introduction to the HE PHY
+ * ...
+ * An HE STA shall support the following features:
+ * ...
+ * Single spatial stream HE-MCSs 0 to 7 (transmit and receive) in all
+ * supported channel widths for HE SU PPDUs
+ */
+ if ((mcs_80_map_tx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ (mcs_80_map_rx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED) {
+ sdata_info(sdata,
+ "Missing mandatory rates for 1 Nss, rx 0x%x, tx 0x%x, disable HE\n",
+ mcs_80_map_tx, mcs_80_map_rx);
+ return false;
+ }
+
+ if (!he_op)
+ return true;
+
+ ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
-out:
/*
- * When tracking the current AP, don't do any further checks if the
- * new chandef is identical to the one we're currently using for the
- * connection. This keeps us from playing ping-pong with regulatory,
- * without it the following can happen (for example):
- * - connect to an AP with 80 MHz, world regdom allows 80 MHz
- * - AP advertises regdom US
- * - CRDA loads regdom US with 80 MHz prohibited (old database)
- * - the code below detects an unsupported channel, downgrades, and
- * we disconnect from the AP in the caller
- * - disconnect causes CRDA to reload world regdomain and the game
- * starts anew.
- * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881)
+ * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all
+ * zeroes, which is nonsense, and completely inconsistent with itself
+ * (it doesn't have 8 streams). Accept the settings in this case anyway.
+ */
+ if (!ap_min_req_set)
+ return true;
+
+ /* make sure the AP is consistent with itself
*
- * It seems possible that there are still scenarios with CSA or real
- * bandwidth changes where a this could happen, but those cases are
- * less common and wouldn't completely prevent using the AP.
+ * P802.11-REVme/D0.3
+ * 26.17.1 Basic HE BSS operation
+ *
+ * A STA that is operating in an HE BSS shall be able to receive and
+ * transmit at each of the <HE-MCS, NSS> tuple values indicated by the
+ * Basic HE-MCS And NSS Set field of the HE Operation parameter of the
+ * MLME-START.request primitive and shall be able to receive at each of
+ * the <HE-MCS, NSS> tuple values indicated by the Supported HE-MCS and
+ * NSS Set field in the HE Capabilities parameter of the MLMESTART.request
+ * primitive
*/
- if (tracking &&
- cfg80211_chandef_identical(chandef, &link->conf->chandef))
- return ret;
+ for (nss = 8; nss > 0; nss--) {
+ u8 ap_op_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+ u8 ap_rx_val;
+ u8 ap_tx_val;
+
+ if (ap_op_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
+ continue;
+
+ ap_rx_val = (mcs_80_map_rx >> (2 * (nss - 1))) & 3;
+ ap_tx_val = (mcs_80_map_tx >> (2 * (nss - 1))) & 3;
+
+ if (ap_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ ap_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ ap_rx_val < ap_op_val || ap_tx_val < ap_op_val) {
+ sdata_info(sdata,
+ "Invalid rates for %d Nss, rx %d, tx %d oper %d, disable HE\n",
+ nss, ap_rx_val, ap_rx_val, ap_op_val);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ const struct ieee80211_he_operation *he_op)
+{
+ const struct ieee80211_sta_he_cap *sta_he_cap =
+ ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ u16 ap_min_req_set;
+ int i;
- /* don't print the message below for VHT mismatch if VHT is disabled */
- if (ret & IEEE80211_CONN_DISABLE_VHT)
- vht_chandef = *chandef;
+ if (!sta_he_cap || !he_op)
+ return false;
+
+ ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
/*
- * Ignore the DISABLED flag when we're already connected and only
- * tracking the APs beacon for bandwidth changes - otherwise we
- * might get disconnected here if we connect to an AP, update our
- * regulatory information based on the AP's country IE and the
- * information we have is wrong/outdated and disables the channel
- * that we're actually using for the connection to the AP.
+ * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all
+ * zeroes, which is nonsense, and completely inconsistent with itself
+ * (it doesn't have 8 streams). Accept the settings in this case anyway.
*/
- while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
- tracking ? 0 :
- IEEE80211_CHAN_DISABLED)) {
- if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
- ret = IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- break;
+ if (!ap_min_req_set)
+ return true;
+
+ /* Need to go over for 80MHz, 160MHz and for 80+80 */
+ for (i = 0; i < 3; i++) {
+ const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp =
+ &sta_he_cap->he_mcs_nss_supp;
+ u16 sta_mcs_map_rx =
+ le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]);
+ u16 sta_mcs_map_tx =
+ le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]);
+ u8 nss;
+ bool verified = true;
+
+ /*
+ * For each band there is a maximum of 8 spatial streams
+ * possible. Each of the sta_mcs_map_* is a 16-bit struct built
+ * of 2 bits per NSS (1-8), with the values defined in enum
+ * ieee80211_he_mcs_support. Need to make sure STA TX and RX
+ * capabilities aren't less than the AP's minimum requirements
+ * for this HE BSS per SS.
+ * It is enough to find one such band that meets the reqs.
+ */
+ for (nss = 8; nss > 0; nss--) {
+ u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3;
+ u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3;
+ u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+
+ if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
+ continue;
+
+ /*
+ * Make sure the HE AP doesn't require MCSs that aren't
+ * supported by the client as required by spec
+ *
+ * P802.11-REVme/D0.3
+ * 26.17.1 Basic HE BSS operation
+ *
+ * An HE STA shall not attempt to join * (MLME-JOIN.request primitive)
+ * a BSS, unless it supports (i.e., is able to both transmit and
+ * receive using) all of the <HE-MCS, NSS> tuples in the basic
+ * HE-MCS and NSS set.
+ */
+ if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) {
+ verified = false;
+ break;
+ }
}
- ret |= ieee80211_chandef_downgrade(chandef);
+ if (verified)
+ return true;
+ }
+
+ /* If here, STA doesn't meet AP's HE min requirements */
+ return false;
+}
+
+static u8
+ieee80211_get_eht_cap_mcs_nss(const struct ieee80211_sta_he_cap *sta_he_cap,
+ const struct ieee80211_sta_eht_cap *sta_eht_cap,
+ unsigned int idx, int bw)
+{
+ u8 he_phy_cap0 = sta_he_cap->he_cap_elem.phy_cap_info[0];
+ u8 eht_phy_cap0 = sta_eht_cap->eht_cap_elem.phy_cap_info[0];
+
+ /* handle us being a 20 MHz-only EHT STA - with four values
+ * for MCS 0-7, 8-9, 10-11, 12-13.
+ */
+ if (!(he_phy_cap0 & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL))
+ return sta_eht_cap->eht_mcs_nss_supp.only_20mhz.rx_tx_max_nss[idx];
+
+ /* the others have MCS 0-9 together, rather than separately from 0-7 */
+ if (idx > 0)
+ idx--;
+
+ switch (bw) {
+ case 0:
+ return sta_eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_max_nss[idx];
+ case 1:
+ if (!(he_phy_cap0 &
+ (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)))
+ return 0xff; /* pass check */
+ return sta_eht_cap->eht_mcs_nss_supp.bw._160.rx_tx_max_nss[idx];
+ case 2:
+ if (!(eht_phy_cap0 & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ))
+ return 0xff; /* pass check */
+ return sta_eht_cap->eht_mcs_nss_supp.bw._320.rx_tx_max_nss[idx];
+ }
+
+ WARN_ON(1);
+ return 0;
+}
+
+static bool
+ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ const struct ieee80211_eht_operation *eht_op)
+{
+ const struct ieee80211_sta_he_cap *sta_he_cap =
+ ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ const struct ieee80211_sta_eht_cap *sta_eht_cap =
+ ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
+ const struct ieee80211_eht_mcs_nss_supp_20mhz_only *req;
+ unsigned int i;
+
+ if (!sta_he_cap || !sta_eht_cap || !eht_op)
+ return false;
+
+ req = &eht_op->basic_mcs_nss;
+
+ for (i = 0; i < ARRAY_SIZE(req->rx_tx_max_nss); i++) {
+ u8 req_rx_nss, req_tx_nss;
+ unsigned int bw;
+
+ req_rx_nss = u8_get_bits(req->rx_tx_max_nss[i],
+ IEEE80211_EHT_MCS_NSS_RX);
+ req_tx_nss = u8_get_bits(req->rx_tx_max_nss[i],
+ IEEE80211_EHT_MCS_NSS_TX);
+
+ for (bw = 0; bw < 3; bw++) {
+ u8 have, have_rx_nss, have_tx_nss;
+
+ have = ieee80211_get_eht_cap_mcs_nss(sta_he_cap,
+ sta_eht_cap,
+ i, bw);
+ have_rx_nss = u8_get_bits(have,
+ IEEE80211_EHT_MCS_NSS_RX);
+ have_tx_nss = u8_get_bits(have,
+ IEEE80211_EHT_MCS_NSS_TX);
+
+ if (req_rx_nss > have_rx_nss ||
+ req_tx_nss > have_tx_nss)
+ return false;
+ }
}
- if (!he_oper || !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef,
- IEEE80211_CHAN_NO_HE))
- ret |= IEEE80211_CONN_DISABLE_HE | IEEE80211_CONN_DISABLE_EHT;
+ return true;
+}
- if (!eht_oper || !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef,
- IEEE80211_CHAN_NO_EHT))
- ret |= IEEE80211_CONN_DISABLE_EHT;
+static bool ieee80211_chandef_usable(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_chan_def *chandef,
+ u32 prohibited_flags)
+{
+ if (!cfg80211_chandef_usable(sdata->local->hw.wiphy,
+ chandef, prohibited_flags))
+ return false;
+
+ if (chandef->punctured &&
+ ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING))
+ return false;
- if (chandef->width != vht_chandef.width && !tracking)
+ return true;
+}
+
+static struct ieee802_11_elems *
+ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_conn_settings *conn,
+ struct cfg80211_bss *cbss, int link_id,
+ struct ieee80211_chan_req *chanreq)
+{
+ const struct cfg80211_bss_ies *ies = rcu_dereference(cbss->ies);
+ struct ieee80211_bss *bss = (void *)cbss->priv;
+ struct ieee80211_channel *channel = cbss->channel;
+ struct ieee80211_elems_parse_params parse_params = {
+ .link_id = -1,
+ .from_ap = true,
+ .start = ies->data,
+ .len = ies->len,
+ .mode = conn->mode,
+ };
+ struct ieee802_11_elems *elems;
+ struct ieee80211_supported_band *sband;
+ struct cfg80211_chan_def ap_chandef;
+ enum ieee80211_conn_mode ap_mode;
+ int ret;
+
+again:
+ elems = ieee802_11_parse_elems_full(&parse_params);
+ if (!elems)
+ return ERR_PTR(-ENOMEM);
+
+ ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info,
+ elems, false, conn, &ap_chandef);
+
+ mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n",
+ cbss->bssid, ieee80211_conn_mode_str(ap_mode));
+
+ /* this should be impossible since parsing depends on our mode */
+ if (WARN_ON(ap_mode > conn->mode)) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ sband = sdata->local->hw.wiphy->bands[channel->band];
+
+ switch (channel->band) {
+ case NL80211_BAND_S1GHZ:
+ if (WARN_ON(ap_mode != IEEE80211_CONN_MODE_S1G)) {
+ ret = -EINVAL;
+ goto free;
+ }
+ return elems;
+ case NL80211_BAND_6GHZ:
+ if (ap_mode < IEEE80211_CONN_MODE_HE) {
+ sdata_info(sdata,
+ "Rejecting non-HE 6/7 GHz connection");
+ ret = -EINVAL;
+ goto free;
+ }
+ break;
+ default:
+ if (WARN_ON(ap_mode == IEEE80211_CONN_MODE_S1G)) {
+ ret = -EINVAL;
+ goto free;
+ }
+ }
+
+ switch (ap_mode) {
+ case IEEE80211_CONN_MODE_S1G:
+ WARN_ON(1);
+ ret = -EINVAL;
+ goto free;
+ case IEEE80211_CONN_MODE_LEGACY:
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ break;
+ case IEEE80211_CONN_MODE_HT:
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_40);
+ break;
+ case IEEE80211_CONN_MODE_VHT:
+ case IEEE80211_CONN_MODE_HE:
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_160);
+ break;
+ case IEEE80211_CONN_MODE_EHT:
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_320);
+ break;
+ }
+
+ conn->mode = ap_mode;
+ chanreq->oper = ap_chandef;
+
+ /* wider-bandwidth OFDMA is only done in EHT */
+ if (conn->mode >= IEEE80211_CONN_MODE_EHT &&
+ !(sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW))
+ chanreq->ap = ap_chandef;
+ else
+ chanreq->ap.chan = NULL;
+
+ while (!ieee80211_chandef_usable(sdata, &chanreq->oper,
+ IEEE80211_CHAN_DISABLED)) {
+ if (WARN_ON(chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT)) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ ieee80211_chanreq_downgrade(chanreq, conn);
+ }
+
+ if (conn->mode >= IEEE80211_CONN_MODE_HE &&
+ !cfg80211_chandef_usable(sdata->wdev.wiphy, &chanreq->oper,
+ IEEE80211_CHAN_NO_HE)) {
+ conn->mode = IEEE80211_CONN_MODE_VHT;
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_160);
+ }
+
+ if (conn->mode >= IEEE80211_CONN_MODE_EHT &&
+ !cfg80211_chandef_usable(sdata->wdev.wiphy, &chanreq->oper,
+ IEEE80211_CHAN_NO_EHT)) {
+ conn->mode = IEEE80211_CONN_MODE_HE;
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_160);
+ }
+
+ if (chanreq->oper.width != ap_chandef.width || ap_mode != conn->mode)
sdata_info(sdata,
- "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n");
+ "regulatory prevented using AP config, downgraded\n");
- WARN_ON_ONCE(!cfg80211_chandef_valid(chandef));
- return ret;
+ if (conn->mode >= IEEE80211_CONN_MODE_HE &&
+ (!ieee80211_verify_peer_he_mcs_support(sdata, (void *)elems->he_cap,
+ elems->he_operation) ||
+ !ieee80211_verify_sta_he_mcs_support(sdata, sband,
+ elems->he_operation))) {
+ conn->mode = IEEE80211_CONN_MODE_VHT;
+ sdata_info(sdata, "required MCSes not supported, disabling HE\n");
+ }
+
+ if (conn->mode >= IEEE80211_CONN_MODE_EHT &&
+ !ieee80211_verify_sta_eht_mcs_support(sdata, sband,
+ elems->eht_operation)) {
+ conn->mode = IEEE80211_CONN_MODE_HE;
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_160);
+ sdata_info(sdata, "required MCSes not supported, disabling EHT\n");
+ }
+
+ /* the mode can only decrease, so this must terminate */
+ if (ap_mode != conn->mode)
+ goto again;
+
+ mlme_link_id_dbg(sdata, link_id,
+ "connecting with %s mode, max bandwidth %d MHz\n",
+ ieee80211_conn_mode_str(conn->mode),
+ 20 * (1 << conn->bw_limit));
+
+ if (WARN_ON_ONCE(!cfg80211_chandef_valid(&chanreq->oper))) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ return elems;
+free:
+ kfree(elems);
+ return ERR_PTR(ret);
}
static int ieee80211_config_bw(struct ieee80211_link_data *link,
- const struct ieee80211_ht_cap *ht_cap,
- const struct ieee80211_vht_cap *vht_cap,
- const struct ieee80211_ht_operation *ht_oper,
- const struct ieee80211_vht_operation *vht_oper,
- const struct ieee80211_he_operation *he_oper,
- const struct ieee80211_eht_operation *eht_oper,
- const struct ieee80211_s1g_oper_ie *s1g_oper,
- const u8 *bssid, u64 *changed)
+ struct ieee802_11_elems *elems,
+ bool update, u64 *changed)
{
+ struct ieee80211_channel *channel = link->conf->chanreq.oper.chan;
struct ieee80211_sub_if_data *sdata = link->sdata;
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct ieee80211_channel *chan = link->conf->chandef.chan;
- struct ieee80211_supported_band *sband =
- local->hw.wiphy->bands[chan->band];
- struct cfg80211_chan_def chandef;
- u16 ht_opmode;
- ieee80211_conn_flags_t flags;
+ struct ieee80211_chan_req chanreq = {};
+ enum ieee80211_conn_mode ap_mode;
u32 vht_cap_info = 0;
+ u16 ht_opmode;
int ret;
- /* if HT was/is disabled, don't track any bandwidth changes */
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT || !ht_oper)
+ /* don't track any bandwidth changes in legacy/S1G modes */
+ if (link->u.mgd.conn.mode == IEEE80211_CONN_MODE_LEGACY ||
+ link->u.mgd.conn.mode == IEEE80211_CONN_MODE_S1G)
return 0;
- /* don't check VHT if we associated as non-VHT station */
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)
- vht_oper = NULL;
+ if (elems->vht_cap_elem)
+ vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
- /* don't check HE if we associated as non-HE station */
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE ||
- !ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif)) {
- he_oper = NULL;
- eht_oper = NULL;
+ ap_mode = ieee80211_determine_ap_chan(sdata, channel, vht_cap_info,
+ elems, true, &link->u.mgd.conn,
+ &chanreq.ap);
+
+ if (ap_mode != link->u.mgd.conn.mode) {
+ link_info(link,
+ "AP appears to change mode (expected %s, found %s), disconnect\n",
+ ieee80211_conn_mode_str(link->u.mgd.conn.mode),
+ ieee80211_conn_mode_str(ap_mode));
+ return -EINVAL;
}
- /* don't check EHT if we associated as non-EHT station */
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT ||
- !ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif))
- eht_oper = NULL;
+ chanreq.oper = chanreq.ap;
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT ||
+ sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW)
+ chanreq.ap.chan = NULL;
/*
- * if bss configuration changed store the new one -
+ * if HT operation mode changed store the new one -
* this may be applicable even if channel is identical
*/
- ht_opmode = le16_to_cpu(ht_oper->operation_mode);
- if (link->conf->ht_operation_mode != ht_opmode) {
- *changed |= BSS_CHANGED_HT;
- link->conf->ht_operation_mode = ht_opmode;
+ if (elems->ht_operation) {
+ ht_opmode = le16_to_cpu(elems->ht_operation->operation_mode);
+ if (link->conf->ht_operation_mode != ht_opmode) {
+ *changed |= BSS_CHANGED_HT;
+ link->conf->ht_operation_mode = ht_opmode;
+ }
}
- if (vht_cap)
- vht_cap_info = le32_to_cpu(vht_cap->vht_cap_info);
-
- /* calculate new channel (type) based on HT/VHT/HE operation IEs */
- flags = ieee80211_determine_chantype(sdata, link,
- link->u.mgd.conn_flags,
- sband, chan, vht_cap_info,
- ht_oper, vht_oper,
- he_oper, eht_oper,
- s1g_oper, &chandef, true);
-
/*
* Downgrade the new channel if we associated with restricted
- * capabilities. For example, if we associated as a 20 MHz STA
- * to a 40 MHz AP (due to regulatory, capabilities or config
- * reasons) then switching to a 40 MHz channel now won't do us
- * any good -- we couldn't use it with the AP.
+ * bandwidth capabilities. For example, if we associated as a
+ * 20 MHz STA to a 40 MHz AP (due to regulatory, capabilities
+ * or config reasons) then switching to a 40 MHz channel now
+ * won't do us any good -- we couldn't use it with the AP.
*/
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ &&
- chandef.width == NL80211_CHAN_WIDTH_80P80)
- flags |= ieee80211_chandef_downgrade(&chandef);
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_160MHZ &&
- chandef.width == NL80211_CHAN_WIDTH_160)
- flags |= ieee80211_chandef_downgrade(&chandef);
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_40MHZ &&
- chandef.width > NL80211_CHAN_WIDTH_20)
- flags |= ieee80211_chandef_downgrade(&chandef);
-
- if (cfg80211_chandef_identical(&chandef, &link->conf->chandef))
+ while (link->u.mgd.conn.bw_limit <
+ ieee80211_min_bw_limit_from_chandef(&chanreq.oper))
+ ieee80211_chandef_downgrade(&chanreq.oper, NULL);
+
+ if (ieee80211_chanreq_identical(&chanreq, &link->conf->chanreq))
return 0;
link_info(link,
- "AP %pM changed bandwidth, new config is %d.%03d MHz, width %d (%d.%03d/%d MHz)\n",
- link->u.mgd.bssid, chandef.chan->center_freq,
- chandef.chan->freq_offset, chandef.width,
- chandef.center_freq1, chandef.freq1_offset,
- chandef.center_freq2);
-
- if (flags != (link->u.mgd.conn_flags &
- (IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT |
- IEEE80211_CONN_DISABLE_40MHZ |
- IEEE80211_CONN_DISABLE_80P80MHZ |
- IEEE80211_CONN_DISABLE_160MHZ |
- IEEE80211_CONN_DISABLE_320MHZ)) ||
- !cfg80211_chandef_valid(&chandef)) {
+ "AP %pM changed bandwidth, new used config is %d.%03d MHz, width %d (%d.%03d/%d MHz)\n",
+ link->u.mgd.bssid, chanreq.oper.chan->center_freq,
+ chanreq.oper.chan->freq_offset, chanreq.oper.width,
+ chanreq.oper.center_freq1, chanreq.oper.freq1_offset,
+ chanreq.oper.center_freq2);
+
+ if (!cfg80211_chandef_valid(&chanreq.oper)) {
sdata_info(sdata,
- "AP %pM changed caps/bw in a way we can't support (0x%x/0x%x) - disconnect\n",
- link->u.mgd.bssid, flags, ifmgd->flags);
+ "AP %pM changed caps/bw in a way we can't support - disconnect\n",
+ link->u.mgd.bssid);
return -EINVAL;
}
- ret = ieee80211_link_change_bandwidth(link, &chandef, changed);
+ if (!update) {
+ link->conf->chanreq = chanreq;
+ return 0;
+ }
+
+ /*
+ * We're tracking the current AP here, so don't do any further checks
+ * here. This keeps us from playing ping-pong with regulatory, without
+ * it the following can happen (for example):
+ * - connect to an AP with 80 MHz, world regdom allows 80 MHz
+ * - AP advertises regdom US
+ * - CRDA loads regdom US with 80 MHz prohibited (old database)
+ * - we detect an unsupported channel and disconnect
+ * - disconnect causes CRDA to reload world regdomain and the game
+ * starts anew.
+ * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881)
+ *
+ * It seems possible that there are still scenarios with CSA or real
+ * bandwidth changes where a this could happen, but those cases are
+ * less common and wouldn't completely prevent using the AP.
+ */
+ ret = ieee80211_link_change_chanreq(link, &chanreq, changed);
if (ret) {
sdata_info(sdata,
"AP %pM changed bandwidth to incompatible one - disconnect\n",
@@ -612,7 +890,7 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
struct ieee80211_channel *channel,
enum ieee80211_smps_mode smps,
- ieee80211_conn_flags_t conn_flags)
+ const struct ieee80211_conn_settings *conn)
{
u8 *pos;
u32 flags = channel->flags;
@@ -647,7 +925,7 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
* capable of 40 MHz -- some broken APs will never fall
* back to trying to transmit in 20 MHz.
*/
- if (conn_flags & IEEE80211_CONN_DISABLE_40MHZ) {
+ if (conn->bw_limit <= IEEE80211_CONN_BW_LIMIT_20) {
cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
cap &= ~IEEE80211_HT_CAP_SGI_40;
}
@@ -686,7 +964,7 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
struct ieee80211_supported_band *sband,
struct ieee80211_vht_cap *ap_vht_cap,
- ieee80211_conn_flags_t conn_flags)
+ const struct ieee80211_conn_settings *conn)
{
struct ieee80211_local *local = sdata->local;
u8 *pos;
@@ -703,16 +981,7 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
/* determine capability flags */
cap = vht_cap.cap;
- if (conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ) {
- u32 bw = cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
-
- cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
- if (bw == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ ||
- bw == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
- cap |= IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
- }
-
- if (conn_flags & IEEE80211_CONN_DISABLE_160MHZ) {
+ if (conn->bw_limit <= IEEE80211_CONN_BW_LIMIT_80) {
cap &= ~IEEE80211_VHT_CAP_SHORT_GI_160;
cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
}
@@ -769,79 +1038,12 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
return mu_mimo_owner;
}
-/* This function determines HE capability flags for the association
- * and builds the IE.
- */
-static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb,
- struct ieee80211_supported_band *sband,
- enum ieee80211_smps_mode smps_mode,
- ieee80211_conn_flags_t conn_flags)
-{
- u8 *pos, *pre_he_pos;
- const struct ieee80211_sta_he_cap *he_cap;
- u8 he_cap_size;
-
- he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
- if (WARN_ON(!he_cap))
- return;
-
- /* get a max size estimate */
- he_cap_size =
- 2 + 1 + sizeof(he_cap->he_cap_elem) +
- ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) +
- ieee80211_he_ppe_size(he_cap->ppe_thres[0],
- he_cap->he_cap_elem.phy_cap_info);
- pos = skb_put(skb, he_cap_size);
- pre_he_pos = pos;
- pos = ieee80211_ie_build_he_cap(conn_flags,
- pos, he_cap, pos + he_cap_size);
- /* trim excess if any */
- skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos));
-
- ieee80211_ie_build_he_6ghz_cap(sdata, smps_mode, skb);
-}
-
-static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb,
- struct ieee80211_supported_band *sband)
-{
- u8 *pos;
- const struct ieee80211_sta_he_cap *he_cap;
- const struct ieee80211_sta_eht_cap *eht_cap;
- u8 eht_cap_size;
-
- he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
- eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
-
- /*
- * EHT capabilities element is only added if the HE capabilities element
- * was added so assume that 'he_cap' is valid and don't check it.
- */
- if (WARN_ON(!he_cap || !eht_cap))
- return;
-
- eht_cap_size =
- 2 + 1 + sizeof(eht_cap->eht_cap_elem) +
- ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
- &eht_cap->eht_cap_elem,
- false) +
- ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0],
- eht_cap->eht_cap_elem.phy_cap_info);
- pos = skb_put(skb, eht_cap_size);
- ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + eht_cap_size,
- false);
-}
-
static void ieee80211_assoc_add_rates(struct sk_buff *skb,
enum nl80211_chan_width width,
struct ieee80211_supported_band *sband,
struct ieee80211_mgd_assoc_data *assoc_data)
{
- unsigned int rates_len, supp_rates_len;
- u32 rates = 0;
- int i, count;
- u8 *pos;
+ u32 rates;
if (assoc_data->supp_rates_len) {
/*
@@ -850,53 +1052,23 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb,
* in the association request (e.g. D-Link DAP 1353 in
* b-only mode)...
*/
- rates_len = ieee80211_parse_bitrates(width, sband,
- assoc_data->supp_rates,
- assoc_data->supp_rates_len,
- &rates);
+ ieee80211_parse_bitrates(width, sband,
+ assoc_data->supp_rates,
+ assoc_data->supp_rates_len,
+ &rates);
} else {
/*
* In case AP not provide any supported rates information
* before association, we send information element(s) with
* all rates that we support.
*/
- rates_len = sband->n_bitrates;
- for (i = 0; i < sband->n_bitrates; i++)
- rates |= BIT(i);
- }
-
- supp_rates_len = rates_len;
- if (supp_rates_len > 8)
- supp_rates_len = 8;
-
- pos = skb_put(skb, supp_rates_len + 2);
- *pos++ = WLAN_EID_SUPP_RATES;
- *pos++ = supp_rates_len;
-
- count = 0;
- for (i = 0; i < sband->n_bitrates; i++) {
- if (BIT(i) & rates) {
- int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
- *pos++ = (u8)rate;
- if (++count == 8)
- break;
- }
+ rates = ~0;
}
- if (rates_len > count) {
- pos = skb_put(skb, rates_len - count + 2);
- *pos++ = WLAN_EID_EXT_SUPP_RATES;
- *pos++ = rates_len - count;
-
- for (i++; i < sband->n_bitrates; i++) {
- if (BIT(i) & rates) {
- int rate;
-
- rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
- *pos++ = (u8)rate;
- }
- }
- }
+ ieee80211_put_srates_elem(skb, sband, 0, 0, ~rates,
+ WLAN_EID_SUPP_RATES);
+ ieee80211_put_srates_elem(skb, sband, 0, 0, ~rates,
+ WLAN_EID_EXT_SUPP_RATES);
}
static size_t ieee80211_add_before_ht_elems(struct sk_buff *skb,
@@ -1133,11 +1305,11 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
offset);
if (sband->band != NL80211_BAND_6GHZ &&
- !(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_HT) {
ieee80211_add_ht_ie(sdata, skb,
assoc_data->link[link_id].ap_ht_param,
sband, chan, smps_mode,
- assoc_data->link[link_id].conn_flags);
+ &assoc_data->link[link_id].conn);
ADD_PRESENT_ELEM(WLAN_EID_HT_CAPABILITY);
}
@@ -1147,37 +1319,28 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
offset);
if (sband->band != NL80211_BAND_6GHZ &&
- !(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_VHT &&
+ sband->vht_cap.vht_supported) {
bool mu_mimo_owner =
ieee80211_add_vht_ie(sdata, skb, sband,
&assoc_data->link[link_id].ap_vht_cap,
- assoc_data->link[link_id].conn_flags);
+ &assoc_data->link[link_id].conn);
if (link)
link->conf->mu_mimo_owner = mu_mimo_owner;
ADD_PRESENT_ELEM(WLAN_EID_VHT_CAPABILITY);
}
- /*
- * If AP doesn't support HT, mark HE and EHT as disabled.
- * If on the 5GHz band, make sure it supports VHT.
- */
- if (assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HT ||
- (sband->band == NL80211_BAND_5GHZ &&
- assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_VHT))
- assoc_data->link[link_id].conn_flags |=
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
-
/* if present, add any custom IEs that go before HE */
offset = ieee80211_add_before_he_elems(skb, extra_elems,
extra_elems_len,
offset);
- if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HE)) {
- ieee80211_add_he_ie(sdata, skb, sband, smps_mode,
- assoc_data->link[link_id].conn_flags);
+ if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_HE) {
+ ieee80211_put_he_cap(skb, sdata, sband,
+ &assoc_data->link[link_id].conn);
ADD_PRESENT_EXT_ELEM(WLAN_EID_EXT_HE_CAPABILITY);
+ ieee80211_put_he_6ghz_cap(skb, sdata, smps_mode);
}
/*
@@ -1185,7 +1348,7 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
* calling ieee80211_assoc_add_ml_elem(), so add this one if
* we're going to put it after the ML element
*/
- if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_EHT))
+ if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_EHT)
ADD_PRESENT_EXT_ELEM(WLAN_EID_EXT_EHT_CAPABILITY);
if (link_id == assoc_data->assoc_link_id)
@@ -1195,8 +1358,9 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
/* crash if somebody gets it wrong */
present_elems = NULL;
- if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_EHT))
- ieee80211_add_eht_ie(sdata, skb, sband);
+ if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_EHT)
+ ieee80211_put_eht_cap(skb, sdata, sband,
+ &assoc_data->link[link_id].conn);
if (sband->band == NL80211_BAND_S1GHZ) {
ieee80211_add_aid_request_ie(sdata, skb);
@@ -1206,9 +1370,6 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
if (iftd && iftd->vendor_elems.data && iftd->vendor_elems.len)
skb_put_data(skb, iftd->vendor_elems.data, iftd->vendor_elems.len);
- if (link)
- link->u.mgd.conn_flags = assoc_data->link[link_id].conn_flags;
-
return offset;
}
@@ -1318,8 +1479,6 @@ static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata,
cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EML_CAPA);
skb_put_data(skb, &eml_capa, sizeof(eml_capa));
}
- /* need indication from userspace to support this */
- mld_capa_ops &= ~cpu_to_le16(IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP);
skb_put_data(skb, &mld_capa_ops, sizeof(mld_capa_ops));
for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
@@ -1499,7 +1658,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
/* Set MBSSID support for HE AP if needed */
if (ieee80211_hw_check(&local->hw, SUPPORTS_ONLY_HE_MULTI_BSSID) &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE &&
ext_capa && ext_capa->datalen >= 3)
ext_capa->data[2] |= WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT;
@@ -1544,7 +1703,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
* for some reason check it and want it to be set, set the bit for all
* pre-EHT connections as we used to do.
*/
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT)
capab |= WLAN_CAPABILITY_ESS;
/* add the elements for the assoc (main) link */
@@ -1741,8 +1900,8 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy,
return;
}
- if (!cfg80211_chandef_identical(&link->conf->chandef,
- &link->csa_chandef)) {
+ if (!ieee80211_chanreq_identical(&link->conf->chanreq,
+ &link->csa_chanreq)) {
sdata_info(sdata,
"failed to finalize channel switch, disconnecting\n");
wiphy_work_queue(sdata->local->hw.wiphy,
@@ -1767,19 +1926,14 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
WARN_ON(!link->conf->csa_active);
- if (link->csa_block_tx) {
+ if (sdata->csa_blocked_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- link->csa_block_tx = false;
+ sdata->csa_blocked_tx = false;
}
link->conf->csa_active = false;
link->u.mgd.csa_waiting_bcn = false;
- /*
- * If the CSA IE is still present on the beacon after the switch,
- * we need to consider it as a new CSA (possibly to self).
- */
- link->u.mgd.beacon_crc_valid = false;
ret = drv_post_channel_switch(link);
if (ret) {
@@ -1790,8 +1944,8 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
return;
}
- cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef,
- link->link_id, 0);
+ cfg80211_ch_switch_notify(sdata->dev, &link->reserved.oper,
+ link->link_id);
}
void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success,
@@ -1838,14 +1992,15 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link)
ieee80211_link_unreserve_chanctx(link);
- if (link->csa_block_tx)
+ if (sdata->csa_blocked_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_blocked_tx = false;
+ }
- link->csa_block_tx = false;
link->conf->csa_active = false;
- drv_abort_channel_switch(sdata);
+ drv_abort_channel_switch(link);
}
static void
@@ -1857,12 +2012,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct cfg80211_bss *cbss = link->u.mgd.bss;
+ struct cfg80211_bss *cbss = link->conf->bss;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *chanctx;
enum nl80211_band current_band;
struct ieee80211_csa_ie csa_ie;
- struct ieee80211_channel_switch ch_switch;
+ struct ieee80211_channel_switch ch_switch = {
+ .link_id = link->link_id,
+ };
struct ieee80211_bss *bss;
unsigned long timeout;
int res;
@@ -1876,14 +2033,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
bss = (void *)cbss->priv;
res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
bss->vht_cap_info,
- link->u.mgd.conn_flags,
+ &link->u.mgd.conn,
link->u.mgd.bssid, &csa_ie);
if (!res) {
ch_switch.timestamp = timestamp;
ch_switch.device_timestamp = device_timestamp;
ch_switch.block_tx = csa_ie.mode;
- ch_switch.chandef = csa_ie.chandef;
+ ch_switch.chandef = csa_ie.chanreq.oper;
ch_switch.count = csa_ie.count;
ch_switch.delay = csa_ie.max_switch_time;
}
@@ -1891,46 +2048,62 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
if (res < 0)
goto drop_connection;
- if (beacon && link->conf->csa_active &&
- !link->u.mgd.csa_waiting_bcn) {
- if (res)
+ if (link->conf->csa_active) {
+ /* already processing - disregard action frames */
+ if (!beacon)
+ return;
+
+ if (link->u.mgd.csa_waiting_bcn) {
+ ieee80211_chswitch_post_beacon(link);
+ /*
+ * If the CSA IE is still present in the beacon after
+ * the switch, we need to consider it as a new CSA
+ * (possibly to self) - this happens by not returning
+ * here so we'll get to the check below.
+ */
+ } else if (res) {
ieee80211_sta_abort_chanswitch(link);
- else
+ return;
+ } else {
drv_channel_switch_rx_beacon(sdata, &ch_switch);
- return;
- } else if (link->conf->csa_active || res) {
- /* disregard subsequent announcements if already processing */
- return;
+ return;
+ }
}
- if (link->conf->chandef.chan->band !=
- csa_ie.chandef.chan->band) {
+ /* nothing to do at all - no active CSA nor a new one */
+ if (res)
+ return;
+
+ if (link->conf->chanreq.oper.chan->band !=
+ csa_ie.chanreq.oper.chan->band) {
sdata_info(sdata,
"AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
link->u.mgd.bssid,
- csa_ie.chandef.chan->center_freq,
- csa_ie.chandef.width, csa_ie.chandef.center_freq1,
- csa_ie.chandef.center_freq2);
+ csa_ie.chanreq.oper.chan->center_freq,
+ csa_ie.chanreq.oper.width,
+ csa_ie.chanreq.oper.center_freq1,
+ csa_ie.chanreq.oper.center_freq2);
goto drop_connection;
}
- if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef,
+ if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chanreq.oper,
IEEE80211_CHAN_DISABLED)) {
sdata_info(sdata,
"AP %pM switches to unsupported channel "
"(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), "
"disconnecting\n",
link->u.mgd.bssid,
- csa_ie.chandef.chan->center_freq,
- csa_ie.chandef.chan->freq_offset,
- csa_ie.chandef.width, csa_ie.chandef.center_freq1,
- csa_ie.chandef.freq1_offset,
- csa_ie.chandef.center_freq2);
+ csa_ie.chanreq.oper.chan->center_freq,
+ csa_ie.chanreq.oper.chan->freq_offset,
+ csa_ie.chanreq.oper.width,
+ csa_ie.chanreq.oper.center_freq1,
+ csa_ie.chanreq.oper.freq1_offset,
+ csa_ie.chanreq.oper.center_freq2);
goto drop_connection;
}
- if (cfg80211_chandef_identical(&csa_ie.chandef,
- &link->conf->chandef) &&
+ if (cfg80211_chandef_identical(&csa_ie.chanreq.oper,
+ &link->conf->chanreq.oper) &&
(!csa_ie.mode || !beacon)) {
if (link->u.mgd.csa_ignored_same_chan)
return;
@@ -1942,12 +2115,13 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
}
/*
- * Drop all TDLS peers - either we disconnect or move to a different
- * channel from this point on. There's no telling what our peer will do.
+ * Drop all TDLS peers on the affected link - either we disconnect or
+ * move to a different channel from this point on. There's no telling
+ * what our peer will do.
* The TDLS WIDER_BW scenario is also problematic, as peers might now
* have an incompatible wider chandef.
*/
- ieee80211_teardown_tdls_peers(sdata);
+ ieee80211_teardown_tdls_peers(link);
conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->hw.wiphy->mtx));
@@ -1959,8 +2133,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
chanctx = container_of(conf, struct ieee80211_chanctx, conf);
- if (local->use_chanctx &&
- !ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) {
+ if (!ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) {
sdata_info(sdata,
"driver doesn't support chan-switch with channel contexts\n");
goto drop_connection;
@@ -1972,7 +2145,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
goto drop_connection;
}
- res = ieee80211_link_reserve_chanctx(link, &csa_ie.chandef,
+ res = ieee80211_link_reserve_chanctx(link, &csa_ie.chanreq,
chanctx->mode, false);
if (res) {
sdata_info(sdata,
@@ -1982,18 +2155,20 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
}
link->conf->csa_active = true;
- link->csa_chandef = csa_ie.chandef;
- link->csa_block_tx = csa_ie.mode;
+ link->csa_chanreq = csa_ie.chanreq;
link->u.mgd.csa_ignored_same_chan = false;
link->u.mgd.beacon_crc_valid = false;
- if (link->csa_block_tx)
+ if (csa_ie.mode &&
+ !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) {
ieee80211_stop_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_blocked_tx = true;
+ }
- cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef,
+ cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chanreq.oper,
link->link_id, csa_ie.count,
- csa_ie.mode, 0);
+ csa_ie.mode);
if (local->ops->channel_switch) {
/* use driver's channel switch callback */
@@ -2017,7 +2192,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
* reset when the disconnection worker runs.
*/
link->conf->csa_active = true;
- link->csa_block_tx = csa_ie.mode;
+ sdata->csa_blocked_tx =
+ csa_ie.mode && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA);
wiphy_work_queue(sdata->local->hw.wiphy,
&ifmgd->csa_connection_drop_work);
@@ -2414,7 +2590,7 @@ void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work)
struct ieee80211_link_data *link =
container_of(work, struct ieee80211_link_data,
dfs_cac_timer_work.work);
- struct cfg80211_chan_def chandef = link->conf->chandef;
+ struct cfg80211_chan_def chandef = link->conf->chanreq.oper;
struct ieee80211_sub_if_data *sdata = link->sdata;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
@@ -2769,7 +2945,7 @@ static u64 ieee80211_link_set_associated(struct ieee80211_link_data *link,
ieee80211_check_rate_mask(link);
- link->u.mgd.bss = cbss;
+ link->conf->bss = cbss;
memcpy(link->u.mgd.bssid, cbss->bssid, ETH_ALEN);
if (sdata->vif.p2p ||
@@ -2917,7 +3093,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ifmgd->associated = false;
/* other links will be destroyed */
- sdata->deflink.u.mgd.bss = NULL;
+ sdata->deflink.conf->bss = NULL;
+ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
netif_carrier_off(sdata->dev);
@@ -2991,7 +3168,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
sdata->vif.cfg.ssid_len = 0;
/* remove AP and TDLS peers */
- sta_info_flush(sdata);
+ sta_info_flush(sdata, -1);
/* finally reset all BSS / config parameters */
if (!ieee80211_vif_is_mld(&sdata->vif))
@@ -3057,7 +3234,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
sdata->deflink.u.mgd.disable_wmm_tracking = false;
ifmgd->flags = 0;
- sdata->deflink.u.mgd.conn_flags = 0;
for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
struct ieee80211_link_data *link;
@@ -3071,25 +3247,35 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.csa_active = false;
sdata->deflink.u.mgd.csa_waiting_bcn = false;
sdata->deflink.u.mgd.csa_ignored_same_chan = false;
- if (sdata->deflink.csa_block_tx) {
+ if (sdata->csa_blocked_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->deflink.csa_block_tx = false;
+ sdata->csa_blocked_tx = false;
}
/* existing TX TSPEC sessions no longer exist */
memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec));
wiphy_delayed_work_cancel(local->hw.wiphy, &ifmgd->tx_tspec_wk);
+ sdata->vif.bss_conf.power_type = IEEE80211_REG_UNSET_AP;
sdata->vif.bss_conf.pwr_reduction = 0;
sdata->vif.bss_conf.tx_pwr_env_num = 0;
memset(sdata->vif.bss_conf.tx_pwr_env, 0,
sizeof(sdata->vif.bss_conf.tx_pwr_env));
+ sdata->vif.cfg.eml_cap = 0;
+ sdata->vif.cfg.eml_med_sync_delay = 0;
+ sdata->vif.cfg.mld_capa_op = 0;
+
memset(&sdata->u.mgd.ttlm_info, 0,
sizeof(sdata->u.mgd.ttlm_info));
wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work);
+
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &ifmgd->neg_ttlm_timeout_work);
ieee80211_vif_set_links(sdata, 0, 0);
+
+ ifmgd->mcast_seq_last = IEEE80211_SN_MODULO;
}
static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
@@ -3237,7 +3423,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst,
sdata->vif.cfg.ssid,
sdata->vif.cfg.ssid_len,
- sdata->deflink.u.mgd.bss->channel);
+ sdata->deflink.conf->bss->channel);
}
ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
@@ -3320,7 +3506,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
return NULL;
if (ifmgd->associated)
- cbss = sdata->deflink.u.mgd.bss;
+ cbss = sdata->deflink.conf->bss;
else if (ifmgd->auth_data)
cbss = ifmgd->auth_data->bss;
else if (ifmgd->assoc_data && ifmgd->assoc_data->link[0].bss)
@@ -3377,9 +3563,12 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
if (!ifmgd->associated)
return;
- /* in MLO assume we have a link where we can TX the frame */
- tx = ieee80211_vif_is_mld(&sdata->vif) ||
- !sdata->deflink.csa_block_tx;
+ /*
+ * MLO drivers should have HANDLES_QUIET_CSA, so that csa_blocked_tx
+ * is always false; if they don't then this may try to transmit the
+ * frame but queues will be stopped.
+ */
+ tx = !sdata->csa_blocked_tx;
if (!ifmgd->driver_disconnect) {
unsigned int link_id;
@@ -3399,8 +3588,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
link = sdata_dereference(sdata->link[link_id], sdata);
if (!link)
continue;
- cfg80211_unlink_bss(local->hw.wiphy, link->u.mgd.bss);
- link->u.mgd.bss = NULL;
+ cfg80211_unlink_bss(local->hw.wiphy, link->conf->bss);
+ link->conf->bss = NULL;
}
}
@@ -3412,10 +3601,10 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
/* the other links will be destroyed */
sdata->vif.bss_conf.csa_active = false;
sdata->deflink.u.mgd.csa_waiting_bcn = false;
- if (sdata->deflink.csa_block_tx) {
+ if (sdata->csa_blocked_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->deflink.csa_block_tx = false;
+ sdata->csa_blocked_tx = false;
}
ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx,
@@ -3517,7 +3706,6 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
sta_info_destroy_addr(sdata, auth_data->ap_addr);
/* other links are destroyed */
- sdata->deflink.u.mgd.conn_flags = 0;
eth_zero_addr(sdata->deflink.u.mgd.bssid);
ieee80211_link_info_change_notify(sdata, &sdata->deflink,
BSS_CHANGED_BSSID);
@@ -3555,7 +3743,6 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
del_timer_sync(&sdata->u.mgd.timer);
sta_info_destroy_addr(sdata, assoc_data->ap_addr);
- sdata->deflink.u.mgd.conn_flags = 0;
eth_zero_addr(sdata->deflink.u.mgd.bssid);
ieee80211_link_info_change_notify(sdata, &sdata->deflink,
BSS_CHANGED_BSSID);
@@ -4005,11 +4192,13 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
struct ieee80211_local *local = sdata->local;
unsigned int link_id = link->link_id;
struct ieee80211_elems_parse_params parse_params = {
+ .mode = link->u.mgd.conn.mode,
.start = elem_start,
.len = elem_len,
.link_id = link_id == assoc_data->assoc_link_id ? -1 : link_id,
.from_ap = true,
};
+ bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
bool is_s1g = cbss->channel->band == NL80211_BAND_S1GHZ;
const struct cfg80211_bss_ies *bss_ies = NULL;
@@ -4033,15 +4222,17 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
*/
assoc_data->link[link_id].status = WLAN_STATUS_SUCCESS;
if (elems->ml_basic) {
- if (!(elems->ml_basic->control &
- cpu_to_le16(IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT))) {
+ int bss_param_ch_cnt =
+ ieee80211_mle_get_bss_param_ch_cnt((const void *)elems->ml_basic);
+
+ if (bss_param_ch_cnt < 0) {
ret = false;
goto out;
}
- link->u.mgd.bss_param_ch_cnt =
- ieee80211_mle_get_bss_param_ch_cnt(elems->ml_basic);
+ link->u.mgd.bss_param_ch_cnt = bss_param_ch_cnt;
}
- } else if (!elems->prof ||
+ } else if (elems->parse_error & IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC ||
+ !elems->prof ||
!(elems->prof->control & prof_bss_param_ch_present)) {
ret = false;
goto out;
@@ -4085,9 +4276,9 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
*/
if (!is_6ghz &&
((assoc_data->wmm && !elems->wmm_param) ||
- (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT) &&
+ (link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT &&
(!elems->ht_cap_elem || !elems->ht_operation)) ||
- (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT) &&
+ (link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT &&
(!elems->vht_cap_elem || !elems->vht_operation)))) {
const struct cfg80211_bss_ies *ies;
struct ieee802_11_elems *bss_elems;
@@ -4124,25 +4315,25 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
* have to include the IEs in the (re)association response.
*/
if (!elems->ht_cap_elem && bss_elems->ht_cap_elem &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) {
elems->ht_cap_elem = bss_elems->ht_cap_elem;
sdata_info(sdata,
"AP bug: HT capability missing from AssocResp\n");
}
if (!elems->ht_operation && bss_elems->ht_operation &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) {
elems->ht_operation = bss_elems->ht_operation;
sdata_info(sdata,
"AP bug: HT operation missing from AssocResp\n");
}
if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
elems->vht_cap_elem = bss_elems->vht_cap_elem;
sdata_info(sdata,
"AP bug: VHT capa missing from AssocResp\n");
}
if (!elems->vht_operation && bss_elems->vht_operation &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
elems->vht_operation = bss_elems->vht_operation;
sdata_info(sdata,
"AP bug: VHT operation missing from AssocResp\n");
@@ -4154,8 +4345,10 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
/*
* We previously checked these in the beacon/probe response, so
* they should be present here. This is just a safety net.
+ * Note that the ieee80211_config_bw() below would also check
+ * for this (and more), but this has better error reporting.
*/
- if (!is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT) &&
+ if (!is_6ghz && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT &&
(!elems->wmm_param || !elems->ht_cap_elem || !elems->ht_operation)) {
sdata_info(sdata,
"HT AP is missing WMM params or HT capability/operation\n");
@@ -4163,7 +4356,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
goto out;
}
- if (!is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT) &&
+ if (is_5ghz && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT &&
(!elems->vht_cap_elem || !elems->vht_operation)) {
sdata_info(sdata,
"VHT AP is missing VHT capability/operation\n");
@@ -4171,36 +4364,28 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
goto out;
}
- if (is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
- !elems->he_6ghz_capa) {
- sdata_info(sdata,
- "HE 6 GHz AP is missing HE 6 GHz band capability\n");
- ret = false;
- goto out;
- }
-
- if (WARN_ON(!link->conf->chandef.chan)) {
+ /* check/update if AP changed anything in assoc response vs. scan */
+ if (ieee80211_config_bw(link, elems,
+ link_id == assoc_data->assoc_link_id,
+ changed)) {
ret = false;
goto out;
}
- sband = local->hw.wiphy->bands[link->conf->chandef.chan->band];
- if (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
- (!elems->he_cap || !elems->he_operation)) {
- sdata_info(sdata,
- "HE AP is missing HE capability/operation\n");
+ if (WARN_ON(!link->conf->chanreq.oper.chan)) {
ret = false;
goto out;
}
+ sband = local->hw.wiphy->bands[link->conf->chanreq.oper.chan->band];
/* Set up internal HT/VHT capabilities */
- if (elems->ht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT))
+ if (elems->ht_cap_elem && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT)
ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
elems->ht_cap_elem,
link_sta);
if (elems->vht_cap_elem &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
const struct ieee80211_vht_cap *bss_vht_cap = NULL;
const struct cfg80211_bss_ies *ies;
@@ -4227,14 +4412,41 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
rcu_read_unlock();
}
- if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
+ if (elems->he_operation &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE &&
elems->he_cap) {
+ const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
+
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
elems->he_cap,
elems->he_cap_len,
elems->he_6ghz_capa,
link_sta);
+ he_6ghz_oper = ieee80211_he_6ghz_oper(elems->he_operation);
+
+ if (is_6ghz && he_6ghz_oper) {
+ switch (u8_get_bits(he_6ghz_oper->control,
+ IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
+ case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
+ bss_conf->power_type = IEEE80211_REG_LPI_AP;
+ break;
+ case IEEE80211_6GHZ_CTRL_REG_SP_AP:
+ bss_conf->power_type = IEEE80211_REG_SP_AP;
+ break;
+ case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
+ bss_conf->power_type = IEEE80211_REG_VLP_AP;
+ break;
+ default:
+ bss_conf->power_type = IEEE80211_REG_UNSET_AP;
+ break;
+ }
+ } else if (is_6ghz) {
+ link_info(link,
+ "HE 6 GHz operation missing (on %d MHz), expect issues\n",
+ bss_conf->chanreq.oper.chan->center_freq);
+ }
+
bss_conf->he_support = link_sta->pub->he_cap.has_he;
if (elems->rsnx && elems->rsnx_len &&
(elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) &&
@@ -4248,7 +4460,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
link_sta, elems);
if (elems->eht_operation && elems->eht_cap &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_EHT) {
ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband,
elems->he_cap,
elems->he_cap_len,
@@ -4257,7 +4469,6 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
link_sta);
bss_conf->eht_support = link_sta->pub->eht_cap.has_eht;
- *changed |= BSS_CHANGED_EHT_PUNCTURING;
} else {
bss_conf->eht_support = false;
}
@@ -4455,7 +4666,7 @@ static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link,
bool support_160;
u8 chains = 1;
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_HT)
return chains;
ht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_CAPABILITY);
@@ -4468,7 +4679,7 @@ static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link,
*/
}
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_VHT)
return chains;
vht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY);
@@ -4487,7 +4698,7 @@ static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link,
chains = max(chains, nss);
}
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE)
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_HE)
return chains;
ies = rcu_dereference(cbss->ies);
@@ -4538,536 +4749,331 @@ static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link,
return chains;
}
-static bool
-ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata,
- const struct cfg80211_bss_ies *ies,
- const struct ieee80211_he_operation *he_op)
+static void
+ieee80211_determine_our_sta_mode(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ struct cfg80211_assoc_request *req,
+ bool wmm_used, int link_id,
+ struct ieee80211_conn_settings *conn)
{
- const struct element *he_cap_elem;
- const struct ieee80211_he_cap_elem *he_cap;
- struct ieee80211_he_mcs_nss_supp *he_mcs_nss_supp;
- u16 mcs_80_map_tx, mcs_80_map_rx;
- u16 ap_min_req_set;
- int mcs_nss_size;
- int nss;
-
- he_cap_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY,
- ies->data, ies->len);
-
- if (!he_cap_elem)
- return false;
+ struct ieee80211_sta_ht_cap sta_ht_cap = sband->ht_cap;
+ bool is_5ghz = sband->band == NL80211_BAND_5GHZ;
+ bool is_6ghz = sband->band == NL80211_BAND_6GHZ;
+ const struct ieee80211_sta_he_cap *he_cap;
+ const struct ieee80211_sta_eht_cap *eht_cap;
+ struct ieee80211_sta_vht_cap vht_cap;
- /* invalid HE IE */
- if (he_cap_elem->datalen < 1 + sizeof(*he_cap)) {
- sdata_info(sdata,
- "Invalid HE elem, Disable HE\n");
- return false;
+ if (sband->band == NL80211_BAND_S1GHZ) {
+ conn->mode = IEEE80211_CONN_MODE_S1G;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ mlme_dbg(sdata, "operating as S1G STA\n");
+ return;
}
- /* skip one byte ext_tag_id */
- he_cap = (void *)(he_cap_elem->data + 1);
- mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap);
+ conn->mode = IEEE80211_CONN_MODE_LEGACY;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
- /* invalid HE IE */
- if (he_cap_elem->datalen < 1 + sizeof(*he_cap) + mcs_nss_size) {
- sdata_info(sdata,
- "Invalid HE elem with nss size, Disable HE\n");
- return false;
+ ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+
+ if (req && req->flags & ASSOC_REQ_DISABLE_HT) {
+ mlme_link_id_dbg(sdata, link_id,
+ "HT disabled by flag, limiting to legacy\n");
+ goto out;
}
- /* mcs_nss is right after he_cap info */
- he_mcs_nss_supp = (void *)(he_cap + 1);
+ if (!wmm_used) {
+ mlme_link_id_dbg(sdata, link_id,
+ "WMM/QoS not supported, limiting to legacy\n");
+ goto out;
+ }
- mcs_80_map_tx = le16_to_cpu(he_mcs_nss_supp->tx_mcs_80);
- mcs_80_map_rx = le16_to_cpu(he_mcs_nss_supp->rx_mcs_80);
+ if (req) {
+ unsigned int i;
- /* P802.11-REVme/D0.3
- * 27.1.1 Introduction to the HE PHY
- * ...
- * An HE STA shall support the following features:
- * ...
- * Single spatial stream HE-MCSs 0 to 7 (transmit and receive) in all
- * supported channel widths for HE SU PPDUs
- */
- if ((mcs_80_map_tx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- (mcs_80_map_rx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED) {
- sdata_info(sdata,
- "Missing mandatory rates for 1 Nss, rx 0x%x, tx 0x%x, disable HE\n",
- mcs_80_map_tx, mcs_80_map_rx);
- return false;
+ for (i = 0; i < req->crypto.n_ciphers_pairwise; i++) {
+ if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 ||
+ req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP ||
+ req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) {
+ netdev_info(sdata->dev,
+ "WEP/TKIP use, limiting to legacy\n");
+ goto out;
+ }
+ }
}
- if (!he_op)
- return true;
+ if (!sta_ht_cap.ht_supported && !is_6ghz) {
+ mlme_link_id_dbg(sdata, link_id,
+ "HT not supported (and not on 6 GHz), limiting to legacy\n");
+ goto out;
+ }
- ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
+ /* HT is fine */
+ conn->mode = IEEE80211_CONN_MODE_HT;
+ conn->bw_limit = sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+ IEEE80211_CONN_BW_LIMIT_40 :
+ IEEE80211_CONN_BW_LIMIT_20;
- /*
- * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all
- * zeroes, which is nonsense, and completely inconsistent with itself
- * (it doesn't have 8 streams). Accept the settings in this case anyway.
- */
- if (!ap_min_req_set)
- return true;
-
- /* make sure the AP is consistent with itself
- *
- * P802.11-REVme/D0.3
- * 26.17.1 Basic HE BSS operation
- *
- * A STA that is operating in an HE BSS shall be able to receive and
- * transmit at each of the <HE-MCS, NSS> tuple values indicated by the
- * Basic HE-MCS And NSS Set field of the HE Operation parameter of the
- * MLME-START.request primitive and shall be able to receive at each of
- * the <HE-MCS, NSS> tuple values indicated by the Supported HE-MCS and
- * NSS Set field in the HE Capabilities parameter of the MLMESTART.request
- * primitive
- */
- for (nss = 8; nss > 0; nss--) {
- u8 ap_op_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
- u8 ap_rx_val;
- u8 ap_tx_val;
+ memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap));
+ ieee80211_apply_vhtcap_overrides(sdata, &vht_cap);
- if (ap_op_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
- continue;
+ if (req && req->flags & ASSOC_REQ_DISABLE_VHT) {
+ mlme_link_id_dbg(sdata, link_id,
+ "VHT disabled by flag, limiting to HT\n");
+ goto out;
+ }
- ap_rx_val = (mcs_80_map_rx >> (2 * (nss - 1))) & 3;
- ap_tx_val = (mcs_80_map_tx >> (2 * (nss - 1))) & 3;
+ if (vht_cap.vht_supported && is_5ghz) {
+ bool have_80mhz = false;
+ unsigned int i;
- if (ap_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- ap_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- ap_rx_val < ap_op_val || ap_tx_val < ap_op_val) {
- sdata_info(sdata,
- "Invalid rates for %d Nss, rx %d, tx %d oper %d, disable HE\n",
- nss, ap_rx_val, ap_rx_val, ap_op_val);
- return false;
+ if (conn->bw_limit == IEEE80211_CONN_BW_LIMIT_20) {
+ mlme_link_id_dbg(sdata, link_id,
+ "no 40 MHz support on 5 GHz, limiting to HT\n");
+ goto out;
}
- }
- return true;
-}
+ /* Allow VHT if at least one channel on the sband supports 80 MHz */
+ for (i = 0; i < sband->n_channels; i++) {
+ if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
+ IEEE80211_CHAN_NO_80MHZ))
+ continue;
-static bool
-ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_supported_band *sband,
- const struct ieee80211_he_operation *he_op)
-{
- const struct ieee80211_sta_he_cap *sta_he_cap =
- ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
- u16 ap_min_req_set;
- int i;
+ have_80mhz = true;
+ break;
+ }
- if (!sta_he_cap || !he_op)
- return false;
+ if (!have_80mhz) {
+ mlme_link_id_dbg(sdata, link_id,
+ "no 80 MHz channel support on 5 GHz, limiting to HT\n");
+ goto out;
+ }
+ } else if (is_5ghz) { /* !vht_supported but on 5 GHz */
+ mlme_link_id_dbg(sdata, link_id,
+ "no VHT support on 5 GHz, limiting to HT\n");
+ goto out;
+ }
- ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
+ /* VHT - if we have - is fine, including 80 MHz, check 160 below again */
+ if (sband->band != NL80211_BAND_2GHZ) {
+ conn->mode = IEEE80211_CONN_MODE_VHT;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_160;
+ }
- /*
- * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all
- * zeroes, which is nonsense, and completely inconsistent with itself
- * (it doesn't have 8 streams). Accept the settings in this case anyway.
- */
- if (!ap_min_req_set)
- return true;
+ if (is_5ghz &&
+ !(vht_cap.cap & (IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ |
+ IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ))) {
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80;
+ mlme_link_id_dbg(sdata, link_id,
+ "no VHT 160 MHz capability on 5 GHz, limiting to 80 MHz");
+ }
- /* Need to go over for 80MHz, 160MHz and for 80+80 */
- for (i = 0; i < 3; i++) {
- const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp =
- &sta_he_cap->he_mcs_nss_supp;
- u16 sta_mcs_map_rx =
- le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]);
- u16 sta_mcs_map_tx =
- le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]);
- u8 nss;
- bool verified = true;
+ if (req && req->flags & ASSOC_REQ_DISABLE_HE) {
+ mlme_link_id_dbg(sdata, link_id,
+ "HE disabled by flag, limiting to HT/VHT\n");
+ goto out;
+ }
- /*
- * For each band there is a maximum of 8 spatial streams
- * possible. Each of the sta_mcs_map_* is a 16-bit struct built
- * of 2 bits per NSS (1-8), with the values defined in enum
- * ieee80211_he_mcs_support. Need to make sure STA TX and RX
- * capabilities aren't less than the AP's minimum requirements
- * for this HE BSS per SS.
- * It is enough to find one such band that meets the reqs.
- */
- for (nss = 8; nss > 0; nss--) {
- u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3;
- u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3;
- u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+ he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ if (!he_cap) {
+ WARN_ON(is_6ghz);
+ mlme_link_id_dbg(sdata, link_id,
+ "no HE support, limiting to HT/VHT\n");
+ goto out;
+ }
- if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
- continue;
+ /* so we have HE */
+ conn->mode = IEEE80211_CONN_MODE_HE;
- /*
- * Make sure the HE AP doesn't require MCSs that aren't
- * supported by the client as required by spec
- *
- * P802.11-REVme/D0.3
- * 26.17.1 Basic HE BSS operation
- *
- * An HE STA shall not attempt to join * (MLME-JOIN.request primitive)
- * a BSS, unless it supports (i.e., is able to both transmit and
- * receive using) all of the <HE-MCS, NSS> tuples in the basic
- * HE-MCS and NSS set.
- */
- if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) {
- verified = false;
- break;
- }
+ /* check bandwidth */
+ switch (sband->band) {
+ default:
+ case NL80211_BAND_2GHZ:
+ if (he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G)
+ break;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ mlme_link_id_dbg(sdata, link_id,
+ "no 40 MHz HE cap in 2.4 GHz, limiting to 20 MHz\n");
+ break;
+ case NL80211_BAND_5GHZ:
+ if (!(he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G)) {
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ mlme_link_id_dbg(sdata, link_id,
+ "no 40/80 MHz HE cap in 5 GHz, limiting to 20 MHz\n");
+ break;
}
-
- if (verified)
- return true;
+ if (!(he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G)) {
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_80);
+ mlme_link_id_dbg(sdata, link_id,
+ "no 160 MHz HE cap in 5 GHz, limiting to 80 MHz\n");
+ }
+ break;
+ case NL80211_BAND_6GHZ:
+ if (he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G)
+ break;
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_80);
+ mlme_link_id_dbg(sdata, link_id,
+ "no 160 MHz HE cap in 6 GHz, limiting to 80 MHz\n");
+ break;
}
- /* If here, STA doesn't meet AP's HE min requirements */
- return false;
-}
+ if (req && req->flags & ASSOC_REQ_DISABLE_EHT) {
+ mlme_link_id_dbg(sdata, link_id,
+ "EHT disabled by flag, limiting to HE\n");
+ goto out;
+ }
-static u8
-ieee80211_get_eht_cap_mcs_nss(const struct ieee80211_sta_he_cap *sta_he_cap,
- const struct ieee80211_sta_eht_cap *sta_eht_cap,
- unsigned int idx, int bw)
-{
- u8 he_phy_cap0 = sta_he_cap->he_cap_elem.phy_cap_info[0];
- u8 eht_phy_cap0 = sta_eht_cap->eht_cap_elem.phy_cap_info[0];
+ eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
+ if (!eht_cap) {
+ mlme_link_id_dbg(sdata, link_id,
+ "no EHT support, limiting to HE\n");
+ goto out;
+ }
- /* handle us being a 20 MHz-only EHT STA - with four values
- * for MCS 0-7, 8-9, 10-11, 12-13.
- */
- if (!(he_phy_cap0 & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL))
- return sta_eht_cap->eht_mcs_nss_supp.only_20mhz.rx_tx_max_nss[idx];
+ /* we have EHT */
- /* the others have MCS 0-9 together, rather than separately from 0-7 */
- if (idx > 0)
- idx--;
+ conn->mode = IEEE80211_CONN_MODE_EHT;
- switch (bw) {
- case 0:
- return sta_eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_max_nss[idx];
- case 1:
- if (!(he_phy_cap0 &
- (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
- IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)))
- return 0xff; /* pass check */
- return sta_eht_cap->eht_mcs_nss_supp.bw._160.rx_tx_max_nss[idx];
- case 2:
- if (!(eht_phy_cap0 & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ))
- return 0xff; /* pass check */
- return sta_eht_cap->eht_mcs_nss_supp.bw._320.rx_tx_max_nss[idx];
- }
+ /* check bandwidth */
+ if (is_6ghz &&
+ eht_cap->eht_cap_elem.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ)
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_320;
+ else if (is_6ghz)
+ mlme_link_id_dbg(sdata, link_id,
+ "no EHT 320 MHz cap in 6 GHz, limiting to 160 MHz\n");
- WARN_ON(1);
- return 0;
+out:
+ mlme_link_id_dbg(sdata, link_id,
+ "determined local STA to be %s, BW limited to %d MHz\n",
+ ieee80211_conn_mode_str(conn->mode),
+ 20 * (1 << conn->bw_limit));
}
-static bool
-ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_supported_band *sband,
- const struct ieee80211_eht_operation *eht_op)
+static void
+ieee80211_determine_our_sta_mode_auth(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ struct cfg80211_auth_request *req,
+ bool wmm_used,
+ struct ieee80211_conn_settings *conn)
{
- const struct ieee80211_sta_he_cap *sta_he_cap =
- ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
- const struct ieee80211_sta_eht_cap *sta_eht_cap =
- ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
- const struct ieee80211_eht_mcs_nss_supp_20mhz_only *req;
- unsigned int i;
-
- if (!sta_he_cap || !sta_eht_cap || !eht_op)
- return false;
-
- req = &eht_op->basic_mcs_nss;
-
- for (i = 0; i < ARRAY_SIZE(req->rx_tx_max_nss); i++) {
- u8 req_rx_nss, req_tx_nss;
- unsigned int bw;
-
- req_rx_nss = u8_get_bits(req->rx_tx_max_nss[i],
- IEEE80211_EHT_MCS_NSS_RX);
- req_tx_nss = u8_get_bits(req->rx_tx_max_nss[i],
- IEEE80211_EHT_MCS_NSS_TX);
+ ieee80211_determine_our_sta_mode(sdata, sband, NULL, wmm_used,
+ req->link_id > 0 ? req->link_id : 0,
+ conn);
+}
- for (bw = 0; bw < 3; bw++) {
- u8 have, have_rx_nss, have_tx_nss;
+static void
+ieee80211_determine_our_sta_mode_assoc(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ struct cfg80211_assoc_request *req,
+ bool wmm_used, int link_id,
+ struct ieee80211_conn_settings *conn)
+{
+ struct ieee80211_conn_settings tmp;
- have = ieee80211_get_eht_cap_mcs_nss(sta_he_cap,
- sta_eht_cap,
- i, bw);
- have_rx_nss = u8_get_bits(have,
- IEEE80211_EHT_MCS_NSS_RX);
- have_tx_nss = u8_get_bits(have,
- IEEE80211_EHT_MCS_NSS_TX);
+ WARN_ON(!req);
- if (req_rx_nss > have_rx_nss ||
- req_tx_nss > have_tx_nss)
- return false;
- }
- }
+ ieee80211_determine_our_sta_mode(sdata, sband, req, wmm_used, link_id,
+ &tmp);
- return true;
+ conn->mode = min_t(enum ieee80211_conn_mode,
+ conn->mode, tmp.mode);
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit, tmp.bw_limit);
}
static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
struct ieee80211_link_data *link,
- struct cfg80211_bss *cbss,
- bool mlo,
- ieee80211_conn_flags_t *conn_flags)
+ int link_id,
+ struct cfg80211_bss *cbss, bool mlo,
+ struct ieee80211_conn_settings *conn)
{
struct ieee80211_local *local = sdata->local;
- const struct ieee80211_ht_cap *ht_cap = NULL;
- const struct ieee80211_ht_operation *ht_oper = NULL;
- const struct ieee80211_vht_operation *vht_oper = NULL;
- const struct ieee80211_he_operation *he_oper = NULL;
- const struct ieee80211_eht_operation *eht_oper = NULL;
- const struct ieee80211_s1g_oper_ie *s1g_oper = NULL;
- struct ieee80211_supported_band *sband;
- struct cfg80211_chan_def chandef;
bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
- bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
- bool supports_mlo = false;
- struct ieee80211_bss *bss = (void *)cbss->priv;
- struct ieee80211_elems_parse_params parse_params = {
- .link_id = -1,
- .from_ap = true,
- };
+ struct ieee80211_chan_req chanreq = {};
struct ieee802_11_elems *elems;
- const struct cfg80211_bss_ies *ies;
int ret;
u32 i;
- bool have_80mhz;
lockdep_assert_wiphy(local->hw.wiphy);
rcu_read_lock();
+ elems = ieee80211_determine_chan_mode(sdata, conn, cbss, link_id,
+ &chanreq);
- ies = rcu_dereference(cbss->ies);
- parse_params.start = ies->data;
- parse_params.len = ies->len;
- elems = ieee802_11_parse_elems_full(&parse_params);
- if (!elems) {
+ if (IS_ERR(elems)) {
rcu_read_unlock();
- return -ENOMEM;
- }
-
- sband = local->hw.wiphy->bands[cbss->channel->band];
-
- *conn_flags &= ~(IEEE80211_CONN_DISABLE_40MHZ |
- IEEE80211_CONN_DISABLE_80P80MHZ |
- IEEE80211_CONN_DISABLE_160MHZ);
-
- /* disable HT/VHT/HE if we don't support them */
- if (!sband->ht_cap.ht_supported && !is_6ghz) {
- mlme_dbg(sdata, "HT not supported, disabling HT/VHT/HE/EHT\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_HT;
- *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- *conn_flags |= IEEE80211_CONN_DISABLE_HE;
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
-
- if (!sband->vht_cap.vht_supported && is_5ghz) {
- mlme_dbg(sdata, "VHT not supported, disabling VHT/HE/EHT\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- *conn_flags |= IEEE80211_CONN_DISABLE_HE;
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
-
- if (!ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif)) {
- mlme_dbg(sdata, "HE not supported, disabling HE and EHT\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_HE;
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
-
- if (!ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif)) {
- mlme_dbg(sdata, "EHT not supported, disabling EHT\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
-
- if (!(*conn_flags & IEEE80211_CONN_DISABLE_HT) && !is_6ghz) {
- ht_oper = elems->ht_operation;
- ht_cap = elems->ht_cap_elem;
-
- if (!ht_cap) {
- *conn_flags |= IEEE80211_CONN_DISABLE_HT;
- ht_oper = NULL;
- }
+ return PTR_ERR(elems);
}
- if (!(*conn_flags & IEEE80211_CONN_DISABLE_VHT) && !is_6ghz) {
- vht_oper = elems->vht_operation;
- if (vht_oper && !ht_oper) {
- vht_oper = NULL;
- sdata_info(sdata,
- "AP advertised VHT without HT, disabling HT/VHT/HE\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_HT;
- *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- *conn_flags |= IEEE80211_CONN_DISABLE_HE;
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
-
- if (!elems->vht_cap_elem) {
- *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- vht_oper = NULL;
- }
+ if (mlo && !elems->ml_basic) {
+ sdata_info(sdata, "Rejecting MLO as it is not supported by AP\n");
+ rcu_read_unlock();
+ kfree(elems);
+ return -EINVAL;
}
- if (!(*conn_flags & IEEE80211_CONN_DISABLE_HE)) {
- he_oper = elems->he_operation;
+ if (link && is_6ghz && conn->mode >= IEEE80211_CONN_MODE_HE) {
+ struct ieee80211_bss_conf *bss_conf;
+ u8 j = 0;
- if (link && is_6ghz) {
- struct ieee80211_bss_conf *bss_conf;
- u8 j = 0;
+ bss_conf = link->conf;
- bss_conf = link->conf;
+ if (elems->pwr_constr_elem)
+ bss_conf->pwr_reduction = *elems->pwr_constr_elem;
- if (elems->pwr_constr_elem)
- bss_conf->pwr_reduction = *elems->pwr_constr_elem;
+ BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) !=
+ ARRAY_SIZE(elems->tx_pwr_env));
- BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) !=
- ARRAY_SIZE(elems->tx_pwr_env));
-
- for (i = 0; i < elems->tx_pwr_env_num; i++) {
- if (elems->tx_pwr_env_len[i] >
- sizeof(bss_conf->tx_pwr_env[j]))
- continue;
-
- bss_conf->tx_pwr_env_num++;
- memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i],
- elems->tx_pwr_env_len[i]);
- j++;
- }
- }
-
- if (!ieee80211_verify_peer_he_mcs_support(sdata, ies, he_oper) ||
- !ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper))
- *conn_flags |= IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- }
-
- /*
- * EHT requires HE to be supported as well. Specifically for 6 GHz
- * channels, the operation channel information can only be deduced from
- * both the 6 GHz operation information (from the HE operation IE) and
- * EHT operation.
- */
- if (!(*conn_flags &
- (IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT)) &&
- he_oper) {
- const struct cfg80211_bss_ies *cbss_ies;
- const struct element *eht_ml_elem;
- const u8 *eht_oper_ie;
-
- cbss_ies = rcu_dereference(cbss->ies);
- eht_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_EHT_OPERATION,
- cbss_ies->data, cbss_ies->len);
- if (eht_oper_ie && eht_oper_ie[1] >=
- 1 + sizeof(struct ieee80211_eht_operation))
- eht_oper = (void *)(eht_oper_ie + 3);
- else
- eht_oper = NULL;
-
- if (!ieee80211_verify_sta_eht_mcs_support(sdata, sband, eht_oper))
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
-
- eht_ml_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK,
- cbss_ies->data, cbss_ies->len);
-
- /* data + 1 / datalen - 1 since it's an extended element */
- if (!(*conn_flags & IEEE80211_CONN_DISABLE_EHT) &&
- eht_ml_elem &&
- ieee80211_mle_type_ok(eht_ml_elem->data + 1,
- IEEE80211_ML_CONTROL_TYPE_BASIC,
- eht_ml_elem->datalen - 1)) {
- supports_mlo = true;
+ for (i = 0; i < elems->tx_pwr_env_num; i++) {
+ if (elems->tx_pwr_env_len[i] > sizeof(bss_conf->tx_pwr_env[j]))
+ continue;
- sdata->vif.cfg.eml_cap =
- ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1);
- sdata->vif.cfg.eml_med_sync_delay =
- ieee80211_mle_get_eml_med_sync_delay(eht_ml_elem->data + 1);
+ bss_conf->tx_pwr_env_num++;
+ memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i],
+ elems->tx_pwr_env_len[i]);
+ j++;
}
}
-
- /* Allow VHT if at least one channel on the sband supports 80 MHz */
- have_80mhz = false;
- for (i = 0; i < sband->n_channels; i++) {
- if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
- IEEE80211_CHAN_NO_80MHZ))
- continue;
-
- have_80mhz = true;
- break;
- }
-
- if (!have_80mhz) {
- sdata_info(sdata, "80 MHz not supported, disabling VHT\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- }
-
- if (sband->band == NL80211_BAND_S1GHZ) {
- s1g_oper = elems->s1g_oper;
- if (!s1g_oper)
- sdata_info(sdata,
- "AP missing S1G operation element?\n");
- }
-
- *conn_flags |=
- ieee80211_determine_chantype(sdata, link, *conn_flags,
- sband,
- cbss->channel,
- bss->vht_cap_info,
- ht_oper, vht_oper,
- he_oper, eht_oper,
- s1g_oper,
- &chandef, false);
-
- if (link)
- link->needed_rx_chains =
- min(ieee80211_max_rx_chains(link, cbss),
- local->rx_chains);
-
rcu_read_unlock();
/* the element data was RCU protected so no longer valid anyway */
kfree(elems);
elems = NULL;
- if (*conn_flags & IEEE80211_CONN_DISABLE_HE && is_6ghz) {
- sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection");
- return -EINVAL;
- }
-
- if (mlo && !supports_mlo) {
- sdata_info(sdata, "Rejecting MLO as it is not supported by AP\n");
- return -EINVAL;
- }
-
if (!link)
return 0;
- /* will change later if needed */
- link->smps_mode = IEEE80211_SMPS_OFF;
+ rcu_read_lock();
+ link->needed_rx_chains = min(ieee80211_max_rx_chains(link, cbss),
+ local->rx_chains);
+ rcu_read_unlock();
/*
* If this fails (possibly due to channel context sharing
* on incompatible channels, e.g. 80+80 and 160 sharing the
* same control channel) try to use a smaller bandwidth.
*/
- ret = ieee80211_link_use_channel(link, &chandef,
+ ret = ieee80211_link_use_channel(link, &chanreq,
IEEE80211_CHANCTX_SHARED);
/* don't downgrade for 5 and 10 MHz channels, though. */
- if (chandef.width == NL80211_CHAN_WIDTH_5 ||
- chandef.width == NL80211_CHAN_WIDTH_10)
- goto out;
+ if (chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ chanreq.oper.width == NL80211_CHAN_WIDTH_10)
+ return ret;
- while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) {
- *conn_flags |=
- ieee80211_chandef_downgrade(&chandef);
- ret = ieee80211_link_use_channel(link, &chandef,
+ while (ret && chanreq.oper.width != NL80211_CHAN_WIDTH_20_NOHT) {
+ ieee80211_chanreq_downgrade(&chanreq, conn);
+
+ ret = ieee80211_link_use_channel(link, &chanreq,
IEEE80211_CHANCTX_SHARED);
}
- out:
+
return ret;
}
@@ -5128,6 +5134,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!sta))
goto out_err;
+ sta->sta.spp_amsdu = assoc_data->spp_amsdu;
+
if (ieee80211_vif_is_mld(&sdata->vif)) {
for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
if (!assoc_data->link[link_id].bss)
@@ -5191,8 +5199,10 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
link->conf->dtim_period = link->u.mgd.dtim_period ?: 1;
if (link_id != assoc_data->assoc_link_id) {
- err = ieee80211_prep_channel(sdata, link, cbss, true,
- &link->u.mgd.conn_flags);
+ link->u.mgd.conn = assoc_data->link[link_id].conn;
+
+ err = ieee80211_prep_channel(sdata, link, link_id, cbss,
+ true, &link->u.mgd.conn);
if (err) {
link_info(link, "prep_channel failed\n");
goto out_err;
@@ -5310,6 +5320,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
if (!assoc_data)
return;
+ parse_params.mode =
+ assoc_data->link[assoc_data->assoc_link_id].conn.mode;
+
if (!ether_addr_equal(assoc_data->ap_addr, mgmt->bssid) ||
!ether_addr_equal(assoc_data->ap_addr, mgmt->sa))
return;
@@ -5426,6 +5439,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
assoc_data->ap_addr);
goto abandon_assoc;
}
+
+ sdata->vif.cfg.eml_cap =
+ ieee80211_mle_get_eml_cap((const void *)elems->ml_basic);
+ sdata->vif.cfg.eml_med_sync_delay =
+ ieee80211_mle_get_eml_med_sync_delay((const void *)elems->ml_basic);
+ sdata->vif.cfg.mld_capa_op =
+ ieee80211_mle_get_mld_capa_op((const void *)elems->ml_basic);
}
sdata->vif.cfg.aid = aid;
@@ -5688,49 +5708,6 @@ static bool ieee80211_rx_our_beacon(const u8 *tx_bssid,
return ether_addr_equal(tx_bssid, bss->transmitted_bss->bssid);
}
-static bool ieee80211_config_puncturing(struct ieee80211_link_data *link,
- const struct ieee80211_eht_operation *eht_oper,
- u64 *changed)
-{
- struct ieee80211_local *local = link->sdata->local;
- u16 bitmap = 0, extracted;
-
- if ((eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) &&
- (eht_oper->params &
- IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) {
- const struct ieee80211_eht_operation_info *info =
- (void *)eht_oper->optional;
- const u8 *disable_subchannel_bitmap = info->optional;
-
- bitmap = get_unaligned_le16(disable_subchannel_bitmap);
- }
-
- extracted = ieee80211_extract_dis_subch_bmap(eht_oper,
- &link->conf->chandef,
- bitmap);
-
- /* accept if there are no changes */
- if (!(*changed & BSS_CHANGED_BANDWIDTH) &&
- extracted == link->conf->eht_puncturing)
- return true;
-
- if (!cfg80211_valid_disable_subchannel_bitmap(&bitmap,
- &link->conf->chandef)) {
- link_info(link,
- "Got an invalid disable subchannel bitmap from AP %pM: bitmap = 0x%x, bw = 0x%x. disconnect\n",
- link->u.mgd.bssid,
- bitmap,
- link->conf->chandef.width);
- return false;
- }
-
- if (bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))
- return false;
-
- ieee80211_handle_puncturing_bitmap(link, eht_oper, bitmap, changed);
- return true;
-}
-
static void ieee80211_ml_reconf_work(struct wiphy *wiphy,
struct wiphy_work *work)
{
@@ -5794,9 +5771,7 @@ out:
static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems)
{
- const struct ieee80211_multi_link_elem *ml;
const struct element *sub;
- ssize_t ml_len;
unsigned long removed_links = 0;
u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {};
u8 link_id;
@@ -5805,24 +5780,11 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_vif_is_mld(&sdata->vif) || !elems->ml_reconf)
return;
- ml_len = cfg80211_defragment_element(elems->ml_reconf_elem,
- elems->ie_start,
- elems->total_len,
- elems->scratch_pos,
- elems->scratch + elems->scratch_len -
- elems->scratch_pos,
- WLAN_EID_FRAGMENT);
- if (ml_len < 0)
- return;
-
- elems->ml_reconf = (const void *)elems->scratch_pos;
- elems->ml_reconf_len = ml_len;
- ml = elems->ml_reconf;
-
/* Directly parse the sub elements as the common information doesn't
* hold any useful information.
*/
- for_each_mle_subelement(sub, (u8 *)ml, ml_len) {
+ for_each_mle_subelement(sub, (const u8 *)elems->ml_reconf,
+ elems->ml_reconf_len) {
struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data;
u8 *pos = prof->variable;
u16 control;
@@ -5890,6 +5852,56 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
TU_TO_JIFFIES(delay));
}
+static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata,
+ u16 active_links, u16 dormant_links,
+ u16 suspended_links)
+{
+ u64 changed = 0;
+ int ret;
+
+ if (!active_links) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* If there is an active negotiated TTLM, it should be discarded by
+ * the new negotiated/advertised TTLM.
+ */
+ if (sdata->vif.neg_ttlm.valid) {
+ memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm));
+ sdata->vif.suspended_links = 0;
+ changed = BSS_CHANGED_MLD_TTLM;
+ }
+
+ if (sdata->vif.active_links != active_links) {
+ ret = ieee80211_set_active_links(&sdata->vif, active_links);
+ if (ret) {
+ sdata_info(sdata, "Failed to set TTLM active links\n");
+ goto out;
+ }
+ }
+
+ ret = ieee80211_vif_set_links(sdata, sdata->vif.valid_links,
+ dormant_links);
+ if (ret) {
+ sdata_info(sdata, "Failed to set TTLM dormant links\n");
+ goto out;
+ }
+
+ changed |= BSS_CHANGED_MLD_VALID_LINKS;
+ sdata->vif.suspended_links = suspended_links;
+ if (sdata->vif.suspended_links)
+ changed |= BSS_CHANGED_MLD_TTLM;
+
+ ieee80211_vif_cfg_change_notify(sdata, changed);
+
+out:
+ if (ret)
+ ieee80211_disconnect(&sdata->vif, false);
+
+ return ret;
+}
+
static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy,
struct wiphy_work *work)
{
@@ -5897,30 +5909,19 @@ static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
u.mgd.ttlm_work.work);
- int ret;
new_active_links = sdata->u.mgd.ttlm_info.map &
sdata->vif.valid_links;
new_dormant_links = ~sdata->u.mgd.ttlm_info.map &
sdata->vif.valid_links;
- if (!new_active_links) {
- ieee80211_disconnect(&sdata->vif, false);
- return;
- }
ieee80211_vif_set_links(sdata, sdata->vif.valid_links, 0);
- new_active_links = BIT(ffs(new_active_links) - 1);
- ieee80211_set_active_links(&sdata->vif, new_active_links);
-
- ret = ieee80211_vif_set_links(sdata, sdata->vif.valid_links,
- new_dormant_links);
+ if (ieee80211_ttlm_set_links(sdata, new_active_links, new_dormant_links,
+ 0))
+ return;
sdata->u.mgd.ttlm_info.active = true;
sdata->u.mgd.ttlm_info.switch_time = 0;
-
- if (!ret)
- ieee80211_vif_cfg_change_notify(sdata,
- BSS_CHANGED_MLD_VALID_LINKS);
}
static u16 ieee80211_get_ttlm(u8 bm_size, u8 *data)
@@ -6130,6 +6131,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
u8 *bssid, *variable = mgmt->u.beacon.variable;
u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
struct ieee80211_elems_parse_params parse_params = {
+ .mode = link->u.mgd.conn.mode,
.link_id = -1,
.from_ap = true,
};
@@ -6212,7 +6214,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
}
if (!ifmgd->associated ||
- !ieee80211_rx_our_beacon(bssid, link->u.mgd.bss))
+ !ieee80211_rx_our_beacon(bssid, link->conf->bss))
return;
bssid = link->u.mgd.bssid;
@@ -6239,7 +6241,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
*/
if (!ieee80211_is_s1g_beacon(hdr->frame_control))
ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
- parse_params.bss = link->u.mgd.bss;
+ parse_params.bss = link->conf->bss;
parse_params.filter = care_about_ies;
parse_params.crc = ncrc;
elems = ieee802_11_parse_elems_full(&parse_params);
@@ -6301,9 +6303,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
}
}
- if (link->u.mgd.csa_waiting_bcn)
- ieee80211_chswitch_post_beacon(link);
-
/*
* Update beacon timing and dtim count on every beacon appearance. This
* will allow the driver to use the most updated values. Do it before
@@ -6377,21 +6376,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
goto free;
}
- if (WARN_ON(!link->conf->chandef.chan))
+ if (WARN_ON(!link->conf->chanreq.oper.chan))
goto free;
- sband = local->hw.wiphy->bands[link->conf->chandef.chan->band];
+ sband = local->hw.wiphy->bands[link->conf->chanreq.oper.chan->band];
changed |= ieee80211_recalc_twt_req(sdata, sband, link, link_sta, elems);
- if (ieee80211_config_bw(link, elems->ht_cap_elem,
- elems->vht_cap_elem, elems->ht_operation,
- elems->vht_operation, elems->he_operation,
- elems->eht_operation,
- elems->s1g_oper, bssid, &changed)) {
- sdata_info(sdata,
- "failed to follow AP %pM bandwidth change, disconnect\n",
- bssid);
+ if (ieee80211_config_bw(link, elems, true, &changed)) {
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
WLAN_REASON_DEAUTH_LEAVING,
true, deauth_buf);
@@ -6413,21 +6405,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
elems->pwr_constr_elem,
elems->cisco_dtpc_elem);
- if (elems->eht_operation &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)) {
- if (!ieee80211_config_puncturing(link, elems->eht_operation,
- &changed)) {
- ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
- WLAN_REASON_DEAUTH_LEAVING,
- true, deauth_buf);
- ieee80211_report_disconnect(sdata, deauth_buf,
- sizeof(deauth_buf), true,
- WLAN_REASON_DEAUTH_LEAVING,
- false);
- goto free;
- }
- }
-
ieee80211_ml_reconfiguration(sdata, elems);
ieee80211_process_adv_ttlm(sdata, elems,
le64_to_cpu(mgmt->u.beacon.timestamp));
@@ -6437,6 +6414,376 @@ free:
kfree(elems);
}
+static void ieee80211_apply_neg_ttlm(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_neg_ttlm neg_ttlm)
+{
+ u16 new_active_links, new_dormant_links, new_suspended_links, map = 0;
+ u8 i;
+
+ for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++)
+ map |= neg_ttlm.downlink[i] | neg_ttlm.uplink[i];
+
+ /* If there is an active TTLM, unset previously suspended links */
+ if (sdata->vif.neg_ttlm.valid)
+ sdata->vif.dormant_links &= ~sdata->vif.suspended_links;
+
+ /* exclude links that are already disabled by advertised TTLM */
+ new_active_links =
+ map & sdata->vif.valid_links & ~sdata->vif.dormant_links;
+ new_suspended_links =
+ (~map & sdata->vif.valid_links) & ~sdata->vif.dormant_links;
+ new_dormant_links = sdata->vif.dormant_links | new_suspended_links;
+ if (ieee80211_ttlm_set_links(sdata, new_active_links,
+ new_dormant_links, new_suspended_links))
+ return;
+
+ sdata->vif.neg_ttlm = neg_ttlm;
+ sdata->vif.neg_ttlm.valid = true;
+}
+
+static void ieee80211_neg_ttlm_timeout_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
+{
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ u.mgd.neg_ttlm_timeout_work.work);
+
+ sdata_info(sdata,
+ "No negotiated TTLM response from AP, disconnecting.\n");
+
+ __ieee80211_disconnect(sdata);
+}
+
+static void
+ieee80211_neg_ttlm_add_suggested_map(struct sk_buff *skb,
+ struct ieee80211_neg_ttlm *neg_ttlm)
+{
+ u8 i, direction[IEEE80211_TTLM_MAX_CNT];
+
+ if (memcmp(neg_ttlm->downlink, neg_ttlm->uplink,
+ sizeof(neg_ttlm->downlink))) {
+ direction[0] = IEEE80211_TTLM_DIRECTION_DOWN;
+ direction[1] = IEEE80211_TTLM_DIRECTION_UP;
+ } else {
+ direction[0] = IEEE80211_TTLM_DIRECTION_BOTH;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(direction); i++) {
+ u8 tid, len, map_ind = 0, *len_pos, *map_ind_pos, *pos;
+ __le16 map;
+
+ len = sizeof(struct ieee80211_ttlm_elem) + 1 + 1;
+
+ pos = skb_put(skb, len + 2);
+ *pos++ = WLAN_EID_EXTENSION;
+ len_pos = pos++;
+ *pos++ = WLAN_EID_EXT_TID_TO_LINK_MAPPING;
+ *pos++ = direction[i];
+ map_ind_pos = pos++;
+ for (tid = 0; tid < IEEE80211_TTLM_NUM_TIDS; tid++) {
+ map = direction[i] == IEEE80211_TTLM_DIRECTION_UP ?
+ cpu_to_le16(neg_ttlm->uplink[tid]) :
+ cpu_to_le16(neg_ttlm->downlink[tid]);
+ if (!map)
+ continue;
+
+ len += 2;
+ map_ind |= BIT(tid);
+ skb_put_data(skb, &map, sizeof(map));
+ }
+
+ *map_ind_pos = map_ind;
+ *len_pos = len;
+
+ if (direction[i] == IEEE80211_TTLM_DIRECTION_BOTH)
+ break;
+ }
+}
+
+static void
+ieee80211_send_neg_ttlm_req(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_neg_ttlm *neg_ttlm,
+ u8 dialog_token)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgmt *mgmt;
+ struct sk_buff *skb;
+ int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_req);
+ int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 +
+ 2 * 2 * IEEE80211_TTLM_NUM_TIDS;
+
+ skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->tx_headroom);
+ mgmt = skb_put_zero(skb, hdr_len);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ memcpy(mgmt->da, sdata->vif.cfg.ap_addr, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
+
+ mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
+ mgmt->u.action.u.ttlm_req.action_code =
+ WLAN_PROTECTED_EHT_ACTION_TTLM_REQ;
+ mgmt->u.action.u.ttlm_req.dialog_token = dialog_token;
+ ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm);
+ ieee80211_tx_skb(sdata, skb);
+}
+
+int ieee80211_req_neg_ttlm(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_ttlm_params *params)
+{
+ struct ieee80211_neg_ttlm neg_ttlm = {};
+ u8 i;
+
+ if (!ieee80211_vif_is_mld(&sdata->vif) ||
+ !(sdata->vif.cfg.mld_capa_op &
+ IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP))
+ return -EINVAL;
+
+ for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++) {
+ if ((params->dlink[i] & ~sdata->vif.valid_links) ||
+ (params->ulink[i] & ~sdata->vif.valid_links))
+ return -EINVAL;
+
+ neg_ttlm.downlink[i] = params->dlink[i];
+ neg_ttlm.uplink[i] = params->ulink[i];
+ }
+
+ if (drv_can_neg_ttlm(sdata->local, sdata, &neg_ttlm) !=
+ NEG_TTLM_RES_ACCEPT)
+ return -EINVAL;
+
+ ieee80211_apply_neg_ttlm(sdata, neg_ttlm);
+ sdata->u.mgd.dialog_token_alloc++;
+ ieee80211_send_neg_ttlm_req(sdata, &sdata->vif.neg_ttlm,
+ sdata->u.mgd.dialog_token_alloc);
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &sdata->u.mgd.neg_ttlm_timeout_work);
+ wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+ &sdata->u.mgd.neg_ttlm_timeout_work,
+ IEEE80211_NEG_TTLM_REQ_TIMEOUT);
+ return 0;
+}
+
+static void
+ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_neg_ttlm_res ttlm_res,
+ u8 dialog_token,
+ struct ieee80211_neg_ttlm *neg_ttlm)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgmt *mgmt;
+ struct sk_buff *skb;
+ int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res);
+ int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 +
+ 2 * 2 * IEEE80211_TTLM_NUM_TIDS;
+
+ skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->tx_headroom);
+ mgmt = skb_put_zero(skb, hdr_len);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ memcpy(mgmt->da, sdata->vif.cfg.ap_addr, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
+
+ mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
+ mgmt->u.action.u.ttlm_res.action_code =
+ WLAN_PROTECTED_EHT_ACTION_TTLM_RES;
+ mgmt->u.action.u.ttlm_res.dialog_token = dialog_token;
+ switch (ttlm_res) {
+ default:
+ WARN_ON(1);
+ fallthrough;
+ case NEG_TTLM_RES_REJECT:
+ mgmt->u.action.u.ttlm_res.status_code =
+ WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING;
+ break;
+ case NEG_TTLM_RES_ACCEPT:
+ mgmt->u.action.u.ttlm_res.status_code = WLAN_STATUS_SUCCESS;
+ break;
+ case NEG_TTLM_RES_SUGGEST_PREFERRED:
+ mgmt->u.action.u.ttlm_res.status_code =
+ WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED;
+ ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm);
+ break;
+ }
+
+ ieee80211_tx_skb(sdata, skb);
+}
+
+static int
+ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_ttlm_elem *ttlm,
+ struct ieee80211_neg_ttlm *neg_ttlm,
+ u8 *direction)
+{
+ u8 control, link_map_presence, map_size, tid;
+ u8 *pos;
+
+ /* The element size was already validated in
+ * ieee80211_tid_to_link_map_size_ok()
+ */
+ pos = (void *)ttlm->optional;
+
+ control = ttlm->control;
+
+ /* mapping switch time and expected duration fields are not expected
+ * in case of negotiated TTLM
+ */
+ if (control & (IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT |
+ IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT)) {
+ mlme_dbg(sdata,
+ "Invalid TTLM element in negotiated TTLM request\n");
+ return -EINVAL;
+ }
+
+ if (control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP) {
+ for (tid = 0; tid < IEEE80211_TTLM_NUM_TIDS; tid++) {
+ neg_ttlm->downlink[tid] = sdata->vif.valid_links;
+ neg_ttlm->uplink[tid] = sdata->vif.valid_links;
+ }
+ *direction = IEEE80211_TTLM_DIRECTION_BOTH;
+ return 0;
+ }
+
+ *direction = u8_get_bits(control, IEEE80211_TTLM_CONTROL_DIRECTION);
+ if (*direction != IEEE80211_TTLM_DIRECTION_DOWN &&
+ *direction != IEEE80211_TTLM_DIRECTION_UP &&
+ *direction != IEEE80211_TTLM_DIRECTION_BOTH)
+ return -EINVAL;
+
+ link_map_presence = *pos;
+ pos++;
+
+ if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE)
+ map_size = 1;
+ else
+ map_size = 2;
+
+ for (tid = 0; tid < IEEE80211_TTLM_NUM_TIDS; tid++) {
+ u16 map;
+
+ if (link_map_presence & BIT(tid)) {
+ map = ieee80211_get_ttlm(map_size, pos);
+ if (!map) {
+ mlme_dbg(sdata,
+ "No active links for TID %d", tid);
+ return -EINVAL;
+ }
+ } else {
+ map = 0;
+ }
+
+ switch (*direction) {
+ case IEEE80211_TTLM_DIRECTION_BOTH:
+ neg_ttlm->downlink[tid] = map;
+ neg_ttlm->uplink[tid] = map;
+ break;
+ case IEEE80211_TTLM_DIRECTION_DOWN:
+ neg_ttlm->downlink[tid] = map;
+ break;
+ case IEEE80211_TTLM_DIRECTION_UP:
+ neg_ttlm->uplink[tid] = map;
+ break;
+ default:
+ return -EINVAL;
+ }
+ pos += map_size;
+ }
+ return 0;
+}
+
+void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len)
+{
+ u8 dialog_token, direction[IEEE80211_TTLM_MAX_CNT] = {}, i;
+ size_t ies_len;
+ enum ieee80211_neg_ttlm_res ttlm_res = NEG_TTLM_RES_ACCEPT;
+ struct ieee802_11_elems *elems = NULL;
+ struct ieee80211_neg_ttlm neg_ttlm = {};
+
+ BUILD_BUG_ON(ARRAY_SIZE(direction) != ARRAY_SIZE(elems->ttlm));
+
+ if (!ieee80211_vif_is_mld(&sdata->vif))
+ return;
+
+ dialog_token = mgmt->u.action.u.ttlm_req.dialog_token;
+ ies_len = len - offsetof(struct ieee80211_mgmt,
+ u.action.u.ttlm_req.variable);
+ elems = ieee802_11_parse_elems(mgmt->u.action.u.ttlm_req.variable,
+ ies_len, true, NULL);
+ if (!elems) {
+ ttlm_res = NEG_TTLM_RES_REJECT;
+ goto out;
+ }
+
+ for (i = 0; i < elems->ttlm_num; i++) {
+ if (ieee80211_parse_neg_ttlm(sdata, elems->ttlm[i],
+ &neg_ttlm, &direction[i]) ||
+ (direction[i] == IEEE80211_TTLM_DIRECTION_BOTH &&
+ elems->ttlm_num != 1)) {
+ ttlm_res = NEG_TTLM_RES_REJECT;
+ goto out;
+ }
+ }
+
+ if (!elems->ttlm_num ||
+ (elems->ttlm_num == 2 && direction[0] == direction[1])) {
+ ttlm_res = NEG_TTLM_RES_REJECT;
+ goto out;
+ }
+
+ for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++) {
+ if ((neg_ttlm.downlink[i] &&
+ (neg_ttlm.downlink[i] & ~sdata->vif.valid_links)) ||
+ (neg_ttlm.uplink[i] &&
+ (neg_ttlm.uplink[i] & ~sdata->vif.valid_links))) {
+ ttlm_res = NEG_TTLM_RES_REJECT;
+ goto out;
+ }
+ }
+
+ ttlm_res = drv_can_neg_ttlm(sdata->local, sdata, &neg_ttlm);
+
+ if (ttlm_res != NEG_TTLM_RES_ACCEPT)
+ goto out;
+
+ ieee80211_apply_neg_ttlm(sdata, neg_ttlm);
+out:
+ kfree(elems);
+ ieee80211_send_neg_ttlm_res(sdata, ttlm_res, dialog_token, &neg_ttlm);
+}
+
+void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len)
+{
+ if (!ieee80211_vif_is_mld(&sdata->vif) ||
+ mgmt->u.action.u.ttlm_req.dialog_token !=
+ sdata->u.mgd.dialog_token_alloc)
+ return;
+
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &sdata->u.mgd.neg_ttlm_timeout_work);
+
+ /* MLD station sends a TID to link mapping request, mainly to handle
+ * BTM (BSS transition management) request, in which case it needs to
+ * restrict the active links set.
+ * In this case it's not expected that the MLD AP will reject the
+ * negotiated TTLM request.
+ * This can be better implemented in the future, to handle request
+ * rejections.
+ */
+ if (mgmt->u.action.u.ttlm_res.status_code != WLAN_STATUS_SUCCESS)
+ __ieee80211_disconnect(sdata);
+}
+
void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
@@ -7066,6 +7413,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
ieee80211_sta_handle_tspec_ac_params_wk);
wiphy_delayed_work_init(&ifmgd->ttlm_work,
ieee80211_tid_to_link_map_work);
+ wiphy_delayed_work_init(&ifmgd->neg_ttlm_timeout_work,
+ ieee80211_neg_ttlm_timeout_work);
ifmgd->flags = 0;
ifmgd->powersave = sdata->wdev.ps;
@@ -7075,6 +7424,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
spin_lock_init(&ifmgd->teardown_lock);
ifmgd->teardown_skb = NULL;
ifmgd->orig_teardown_skb = NULL;
+ ifmgd->mcast_seq_last = IEEE80211_SN_MODULO;
}
static void ieee80211_recalc_smps_work(struct wiphy *wiphy,
@@ -7094,8 +7444,8 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
unsigned int link_id = link->link_id;
link->u.mgd.p2p_noa_index = -1;
- link->u.mgd.conn_flags = 0;
link->conf->bssid = link->u.mgd.bssid;
+ link->smps_mode = IEEE80211_SMPS_OFF;
wiphy_work_init(&link->u.mgd.request_smps_work,
ieee80211_request_smps_mgd_work);
@@ -7133,6 +7483,7 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
struct cfg80211_bss *cbss, s8 link_id,
const u8 *ap_mld_addr, bool assoc,
+ struct ieee80211_conn_settings *conn,
bool override)
{
struct ieee80211_local *local = sdata->local;
@@ -7264,13 +7615,22 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
}
if (new_sta || override) {
- err = ieee80211_prep_channel(sdata, link, cbss, mlo,
- &link->u.mgd.conn_flags);
+ /*
+ * Only set this if we're also going to calculate the AP
+ * settings etc., otherwise this was set before in a
+ * previous call. Note override is set to %true in assoc
+ * if the settings were changed.
+ */
+ link->u.mgd.conn = *conn;
+ err = ieee80211_prep_channel(sdata, link, link->link_id, cbss,
+ mlo, &link->u.mgd.conn);
if (err) {
if (new_sta)
sta_info_free(local, new_sta);
goto out_err;
}
+ /* pass out for use in assoc */
+ *conn = link->u.mgd.conn;
}
if (new_sta) {
@@ -7309,6 +7669,75 @@ out_err:
return err;
}
+static bool ieee80211_mgd_csa_present(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_bss_ies *ies,
+ u8 cur_channel, bool ignore_ecsa)
+{
+ const struct element *csa_elem, *ecsa_elem;
+ struct ieee80211_channel_sw_ie *csa = NULL;
+ struct ieee80211_ext_chansw_ie *ecsa = NULL;
+
+ if (!ies)
+ return false;
+
+ csa_elem = cfg80211_find_elem(WLAN_EID_CHANNEL_SWITCH,
+ ies->data, ies->len);
+ if (csa_elem && csa_elem->datalen == sizeof(*csa))
+ csa = (void *)csa_elem->data;
+
+ ecsa_elem = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ ies->data, ies->len);
+ if (ecsa_elem && ecsa_elem->datalen == sizeof(*ecsa))
+ ecsa = (void *)ecsa_elem->data;
+
+ if (csa && csa->count == 0)
+ csa = NULL;
+ if (csa && !csa->mode && csa->new_ch_num == cur_channel)
+ csa = NULL;
+
+ if (ecsa && ecsa->count == 0)
+ ecsa = NULL;
+ if (ecsa && !ecsa->mode && ecsa->new_ch_num == cur_channel)
+ ecsa = NULL;
+
+ if (ignore_ecsa && ecsa) {
+ sdata_info(sdata,
+ "Ignoring ECSA in probe response - was considered stuck!\n");
+ return csa;
+ }
+
+ return csa || ecsa;
+}
+
+static bool ieee80211_mgd_csa_in_process(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_bss *bss)
+{
+ u8 cur_channel;
+ bool ret;
+
+ cur_channel = ieee80211_frequency_to_channel(bss->channel->center_freq);
+
+ rcu_read_lock();
+ if (ieee80211_mgd_csa_present(sdata,
+ rcu_dereference(bss->beacon_ies),
+ cur_channel, false)) {
+ ret = true;
+ goto out;
+ }
+
+ if (ieee80211_mgd_csa_present(sdata,
+ rcu_dereference(bss->proberesp_ies),
+ cur_channel, bss->proberesp_ecsa_stuck)) {
+ ret = true;
+ goto out;
+ }
+
+ ret = false;
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
/* config hooks */
int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct cfg80211_auth_request *req)
@@ -7316,11 +7745,13 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_auth_data *auth_data;
+ struct ieee80211_conn_settings conn;
struct ieee80211_link_data *link;
- const struct element *csa_elem, *ecsa_elem;
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_bss *bss;
u16 auth_alg;
int err;
- bool cont_auth;
+ bool cont_auth, wmm_used;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
@@ -7360,21 +7791,10 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
if (ifmgd->assoc_data)
return -EBUSY;
- rcu_read_lock();
- csa_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_CHANNEL_SWITCH);
- ecsa_elem = ieee80211_bss_get_elem(req->bss,
- WLAN_EID_EXT_CHANSWITCH_ANN);
- if ((csa_elem &&
- csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) &&
- ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) ||
- (ecsa_elem &&
- ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) &&
- ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) {
- rcu_read_unlock();
+ if (ieee80211_mgd_csa_in_process(sdata, req->bss)) {
sdata_info(sdata, "AP is in CSA process, reject auth\n");
return -EINVAL;
}
- rcu_read_unlock();
auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
req->ie_len, GFP_KERNEL);
@@ -7462,15 +7882,24 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
/* needed for transmitting the auth frame(s) properly */
memcpy(sdata->vif.cfg.ap_addr, auth_data->ap_addr, ETH_ALEN);
+ bss = (void *)req->bss->priv;
+ wmm_used = bss->wmm_used && (local->hw.queues >= IEEE80211_NUM_ACS);
+
+ sband = local->hw.wiphy->bands[req->bss->channel->band];
+
+ ieee80211_determine_our_sta_mode_auth(sdata, sband, req, wmm_used,
+ &conn);
+
err = ieee80211_prep_connection(sdata, req->bss, req->link_id,
- req->ap_mld_addr, cont_auth, false);
+ req->ap_mld_addr, cont_auth,
+ &conn, false);
if (err)
goto err_clear;
- if (req->link_id > 0)
+ if (req->link_id >= 0)
link = sdata_dereference(sdata->link[req->link_id], sdata);
else
- link = sdata_dereference(sdata->link[0], sdata);
+ link = &sdata->deflink;
if (WARN_ON(!link)) {
err = -ENOLINK;
@@ -7502,38 +7931,33 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
return err;
}
-static ieee80211_conn_flags_t
+static void
ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgd_assoc_data *assoc_data,
struct cfg80211_assoc_request *req,
- ieee80211_conn_flags_t conn_flags,
+ struct ieee80211_conn_settings *conn,
unsigned int link_id)
{
struct ieee80211_local *local = sdata->local;
const struct cfg80211_bss_ies *bss_ies;
struct ieee80211_supported_band *sband;
- const struct element *ht_elem, *vht_elem;
struct ieee80211_link_data *link;
struct cfg80211_bss *cbss;
struct ieee80211_bss *bss;
- bool is_5ghz, is_6ghz;
cbss = assoc_data->link[link_id].bss;
if (WARN_ON(!cbss))
- return 0;
+ return;
bss = (void *)cbss->priv;
sband = local->hw.wiphy->bands[cbss->channel->band];
if (WARN_ON(!sband))
- return 0;
+ return;
link = sdata_dereference(sdata->link[link_id], sdata);
if (WARN_ON(!link))
- return 0;
-
- is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
- is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
+ return;
/* for MLO connections assume advertising all rates is OK */
if (!req->ap_mld_addr) {
@@ -7550,40 +7974,18 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
assoc_data->ie_pos += req->links[link_id].elems_len;
}
- rcu_read_lock();
- ht_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_OPERATION);
- if (ht_elem && ht_elem->datalen >= sizeof(struct ieee80211_ht_operation))
- assoc_data->link[link_id].ap_ht_param =
- ((struct ieee80211_ht_operation *)(ht_elem->data))->ht_param;
- else if (!is_6ghz)
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
- vht_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY);
- if (vht_elem && vht_elem->datalen >= sizeof(struct ieee80211_vht_cap)) {
- memcpy(&assoc_data->link[link_id].ap_vht_cap, vht_elem->data,
- sizeof(struct ieee80211_vht_cap));
- } else if (is_5ghz) {
- link_info(link,
- "VHT capa missing/short, disabling VHT/HE/EHT\n");
- conn_flags |= IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- }
- rcu_read_unlock();
-
link->u.mgd.beacon_crc_valid = false;
link->u.mgd.dtim_period = 0;
link->u.mgd.have_beacon = false;
- /* override HT/VHT configuration only if the AP and we support it */
- if (!(conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ /* override HT configuration only if the AP and we support it */
+ if (conn->mode >= IEEE80211_CONN_MODE_HT) {
struct ieee80211_sta_ht_cap sta_ht_cap;
memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
}
- link->conf->eht_puncturing = 0;
-
rcu_read_lock();
bss_ies = rcu_dereference(cbss->beacon_ies);
if (bss_ies) {
@@ -7604,7 +8006,6 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
}
if (bss_ies) {
- const struct ieee80211_eht_operation *eht_oper;
const struct element *elem;
elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
@@ -7621,32 +8022,6 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
link->conf->ema_ap = true;
else
link->conf->ema_ap = false;
-
- elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_OPERATION,
- bss_ies->data, bss_ies->len);
- eht_oper = (const void *)(elem->data + 1);
-
- if (elem &&
- ieee80211_eht_oper_size_ok((const void *)(elem->data + 1),
- elem->datalen - 1) &&
- (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) &&
- (eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) {
- const struct ieee80211_eht_operation_info *info =
- (void *)eht_oper->optional;
- const u8 *disable_subchannel_bitmap = info->optional;
- u16 bitmap;
-
- bitmap = get_unaligned_le16(disable_subchannel_bitmap);
- if (cfg80211_valid_disable_subchannel_bitmap(&bitmap,
- &link->conf->chandef) &&
- !(bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING)))
- ieee80211_handle_puncturing_bitmap(link,
- eht_oper,
- bitmap,
- NULL);
- else
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
}
rcu_read_unlock();
@@ -7673,8 +8048,67 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
} else {
link->smps_mode = link->u.mgd.req_smps;
}
+}
+
+static int
+ieee80211_mgd_get_ap_ht_vht_capa(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgd_assoc_data *assoc_data,
+ int link_id)
+{
+ struct cfg80211_bss *cbss = assoc_data->link[link_id].bss;
+ enum nl80211_band band = cbss->channel->band;
+ struct ieee80211_supported_band *sband;
+ const struct element *elem;
+ int err;
+
+ /* neither HT nor VHT elements used on 6 GHz */
+ if (band == NL80211_BAND_6GHZ)
+ return 0;
+
+ if (assoc_data->link[link_id].conn.mode < IEEE80211_CONN_MODE_HT)
+ return 0;
+
+ rcu_read_lock();
+ elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_OPERATION);
+ if (!elem || elem->datalen < sizeof(struct ieee80211_ht_operation)) {
+ mlme_link_id_dbg(sdata, link_id, "no HT operation on BSS %pM\n",
+ cbss->bssid);
+ err = -EINVAL;
+ goto out_rcu;
+ }
+ assoc_data->link[link_id].ap_ht_param =
+ ((struct ieee80211_ht_operation *)(elem->data))->ht_param;
+ rcu_read_unlock();
+
+ if (assoc_data->link[link_id].conn.mode < IEEE80211_CONN_MODE_VHT)
+ return 0;
- return conn_flags;
+ /* some drivers want to support VHT on 2.4 GHz even */
+ sband = sdata->local->hw.wiphy->bands[band];
+ if (!sband->vht_cap.vht_supported)
+ return 0;
+
+ rcu_read_lock();
+ elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY);
+ /* but even then accept it not being present on the AP */
+ if (!elem && band == NL80211_BAND_2GHZ) {
+ err = 0;
+ goto out_rcu;
+ }
+ if (!elem || elem->datalen < sizeof(struct ieee80211_vht_cap)) {
+ mlme_link_id_dbg(sdata, link_id, "no VHT capa on BSS %pM\n",
+ cbss->bssid);
+ err = -EINVAL;
+ goto out_rcu;
+ }
+ memcpy(&assoc_data->link[link_id].ap_vht_cap, elem->data,
+ sizeof(struct ieee80211_vht_cap));
+ rcu_read_unlock();
+
+ return 0;
+out_rcu:
+ rcu_read_unlock();
+ return err;
}
int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
@@ -7684,13 +8118,12 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_assoc_data *assoc_data;
- const struct element *ssid_elem, *csa_elem, *ecsa_elem;
+ const struct element *ssid_elem;
struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
- ieee80211_conn_flags_t conn_flags = 0;
struct ieee80211_link_data *link;
struct cfg80211_bss *cbss;
- struct ieee80211_bss *bss;
- bool override;
+ bool override, uapsd_supported;
+ bool match_auth;
int i, err;
size_t size = sizeof(*assoc_data) + req->ie_len;
@@ -7707,54 +8140,28 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
cbss = req->link_id < 0 ? req->bss : req->links[req->link_id].bss;
+ if (ieee80211_mgd_csa_in_process(sdata, cbss)) {
+ sdata_info(sdata, "AP is in CSA process, reject assoc\n");
+ err = -EINVAL;
+ goto err_free;
+ }
+
rcu_read_lock();
ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
rcu_read_unlock();
- kfree(assoc_data);
- return -EINVAL;
- }
-
- csa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_CHANNEL_SWITCH);
- ecsa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_EXT_CHANSWITCH_ANN);
- if ((csa_elem &&
- csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) &&
- ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) ||
- (ecsa_elem &&
- ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) &&
- ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) {
- sdata_info(sdata, "AP is in CSA process, reject assoc\n");
- rcu_read_unlock();
- kfree(assoc_data);
- return -EINVAL;
+ err = -EINVAL;
+ goto err_free;
}
memcpy(assoc_data->ssid, ssid_elem->data, ssid_elem->datalen);
assoc_data->ssid_len = ssid_elem->datalen;
- memcpy(vif_cfg->ssid, assoc_data->ssid, assoc_data->ssid_len);
- vif_cfg->ssid_len = assoc_data->ssid_len;
rcu_read_unlock();
- if (req->ap_mld_addr) {
- for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
- if (!req->links[i].bss)
- continue;
- link = sdata_dereference(sdata->link[i], sdata);
- if (link)
- ether_addr_copy(assoc_data->link[i].addr,
- link->conf->addr);
- else
- eth_random_addr(assoc_data->link[i].addr);
- }
- } else {
- memcpy(assoc_data->link[0].addr, sdata->vif.addr, ETH_ALEN);
- }
-
- assoc_data->s1g = cbss->channel->band == NL80211_BAND_S1GHZ;
-
- memcpy(assoc_data->ap_addr,
- req->ap_mld_addr ?: req->bss->bssid,
- ETH_ALEN);
+ if (req->ap_mld_addr)
+ memcpy(assoc_data->ap_addr, req->ap_mld_addr, ETH_ALEN);
+ else
+ memcpy(assoc_data->ap_addr, cbss->bssid, ETH_ALEN);
if (ifmgd->associated) {
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
@@ -7772,98 +8179,146 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
false);
}
- if (ifmgd->auth_data && !ifmgd->auth_data->done) {
- err = -EBUSY;
- goto err_free;
- }
+ memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa));
+ memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask,
+ sizeof(ifmgd->ht_capa_mask));
- if (ifmgd->assoc_data) {
- err = -EBUSY;
- goto err_free;
- }
+ memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa));
+ memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask,
+ sizeof(ifmgd->vht_capa_mask));
- if (ifmgd->auth_data) {
- bool match;
+ memcpy(&ifmgd->s1g_capa, &req->s1g_capa, sizeof(ifmgd->s1g_capa));
+ memcpy(&ifmgd->s1g_capa_mask, &req->s1g_capa_mask,
+ sizeof(ifmgd->s1g_capa_mask));
- /* keep sta info, bssid if matching */
- match = ether_addr_equal(ifmgd->auth_data->ap_addr,
- assoc_data->ap_addr) &&
- ifmgd->auth_data->link_id == req->link_id;
+ /* keep some setup (AP STA, channel, ...) if matching */
+ match_auth = ifmgd->auth_data &&
+ ether_addr_equal(ifmgd->auth_data->ap_addr,
+ assoc_data->ap_addr) &&
+ ifmgd->auth_data->link_id == req->link_id;
- /* Cleanup is delayed if auth_data matches */
- if (!match)
- ieee80211_destroy_auth_data(sdata, false);
- }
+ if (req->ap_mld_addr) {
+ uapsd_supported = true;
- /* prepare assoc data */
+ for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
+ struct ieee80211_supported_band *sband;
+ struct cfg80211_bss *link_cbss = req->links[i].bss;
+ struct ieee80211_bss *bss;
- bss = (void *)cbss->priv;
- assoc_data->wmm = bss->wmm_used &&
- (local->hw.queues >= IEEE80211_NUM_ACS);
+ if (!link_cbss)
+ continue;
- /*
- * IEEE802.11n does not allow TKIP/WEP as pairwise ciphers in HT mode.
- * We still associate in non-HT mode (11a/b/g) if any one of these
- * ciphers is configured as pairwise.
- * We can set this to true for non-11n hardware, that'll be checked
- * separately along with the peer capabilities.
- */
- for (i = 0; i < req->crypto.n_ciphers_pairwise; i++) {
- if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 ||
- req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP ||
- req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) {
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
- conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- conn_flags |= IEEE80211_CONN_DISABLE_HE;
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- netdev_info(sdata->dev,
- "disabling HT/VHT/HE due to WEP/TKIP use\n");
+ bss = (void *)link_cbss->priv;
+
+ if (!bss->wmm_used) {
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ if (req->flags & (ASSOC_REQ_DISABLE_HT |
+ ASSOC_REQ_DISABLE_VHT |
+ ASSOC_REQ_DISABLE_HE |
+ ASSOC_REQ_DISABLE_EHT)) {
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ if (link_cbss->channel->band == NL80211_BAND_S1GHZ) {
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ link = sdata_dereference(sdata->link[i], sdata);
+ if (link)
+ ether_addr_copy(assoc_data->link[i].addr,
+ link->conf->addr);
+ else
+ eth_random_addr(assoc_data->link[i].addr);
+ sband = local->hw.wiphy->bands[link_cbss->channel->band];
+
+ if (match_auth && i == assoc_link_id && link)
+ assoc_data->link[i].conn = link->u.mgd.conn;
+ else
+ assoc_data->link[i].conn =
+ ieee80211_conn_settings_unlimited;
+ ieee80211_determine_our_sta_mode_assoc(sdata, sband,
+ req, true, i,
+ &assoc_data->link[i].conn);
+ assoc_data->link[i].bss = link_cbss;
+ assoc_data->link[i].disabled = req->links[i].disabled;
+
+ if (!bss->uapsd_supported)
+ uapsd_supported = false;
+
+ if (assoc_data->link[i].conn.mode < IEEE80211_CONN_MODE_EHT) {
+ err = -EINVAL;
+ req->links[i].error = err;
+ goto err_free;
+ }
+
+ err = ieee80211_mgd_get_ap_ht_vht_capa(sdata,
+ assoc_data, i);
+ if (err) {
+ err = -EINVAL;
+ req->links[i].error = err;
+ goto err_free;
+ }
}
- }
- /* also disable HT/VHT/HE/EHT if the AP doesn't use WMM */
- if (!bss->wmm_used) {
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
- conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- conn_flags |= IEEE80211_CONN_DISABLE_HE;
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- netdev_info(sdata->dev,
- "disabling HT/VHT/HE as WMM/QoS is not supported by the AP\n");
- }
+ assoc_data->wmm = true;
+ } else {
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_bss *bss = (void *)cbss->priv;
- if (req->flags & ASSOC_REQ_DISABLE_HT) {
- mlme_dbg(sdata, "HT disabled by flag, disabling HT/VHT/HE\n");
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
- conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- conn_flags |= IEEE80211_CONN_DISABLE_HE;
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
+ memcpy(assoc_data->link[0].addr, sdata->vif.addr, ETH_ALEN);
+ assoc_data->s1g = cbss->channel->band == NL80211_BAND_S1GHZ;
- if (req->flags & ASSOC_REQ_DISABLE_VHT) {
- mlme_dbg(sdata, "VHT disabled by flag, disabling VHT\n");
- conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- }
+ assoc_data->wmm = bss->wmm_used &&
+ (local->hw.queues >= IEEE80211_NUM_ACS);
- if (req->flags & ASSOC_REQ_DISABLE_HE) {
- mlme_dbg(sdata, "HE disabled by flag, disabling HE/EHT\n");
- conn_flags |= IEEE80211_CONN_DISABLE_HE;
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ if (cbss->channel->band == NL80211_BAND_6GHZ &&
+ req->flags & (ASSOC_REQ_DISABLE_HT |
+ ASSOC_REQ_DISABLE_VHT |
+ ASSOC_REQ_DISABLE_HE)) {
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ sband = local->hw.wiphy->bands[cbss->channel->band];
+
+ assoc_data->link[0].bss = cbss;
+
+ if (match_auth)
+ assoc_data->link[0].conn = sdata->deflink.u.mgd.conn;
+ else
+ assoc_data->link[0].conn =
+ ieee80211_conn_settings_unlimited;
+ ieee80211_determine_our_sta_mode_assoc(sdata, sband, req,
+ assoc_data->wmm, 0,
+ &assoc_data->link[0].conn);
+
+ uapsd_supported = bss->uapsd_supported;
+
+ err = ieee80211_mgd_get_ap_ht_vht_capa(sdata, assoc_data, 0);
+ if (err)
+ goto err_free;
}
- if (req->flags & ASSOC_REQ_DISABLE_EHT)
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ assoc_data->spp_amsdu = req->flags & ASSOC_REQ_SPP_AMSDU;
- memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa));
- memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask,
- sizeof(ifmgd->ht_capa_mask));
+ if (ifmgd->auth_data && !ifmgd->auth_data->done) {
+ err = -EBUSY;
+ goto err_free;
+ }
- memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa));
- memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask,
- sizeof(ifmgd->vht_capa_mask));
+ if (ifmgd->assoc_data) {
+ err = -EBUSY;
+ goto err_free;
+ }
- memcpy(&ifmgd->s1g_capa, &req->s1g_capa, sizeof(ifmgd->s1g_capa));
- memcpy(&ifmgd->s1g_capa_mask, &req->s1g_capa_mask,
- sizeof(ifmgd->s1g_capa_mask));
+ /* Cleanup is delayed if auth_data matches */
+ if (ifmgd->auth_data && !match_auth)
+ ieee80211_destroy_auth_data(sdata, false);
if (req->ie && req->ie_len) {
memcpy(assoc_data->ie, req->ie, req->ie_len);
@@ -7895,19 +8350,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
assoc_data->assoc_link_id = assoc_link_id;
if (req->ap_mld_addr) {
- for (i = 0; i < ARRAY_SIZE(assoc_data->link); i++) {
- assoc_data->link[i].conn_flags = conn_flags;
- assoc_data->link[i].bss = req->links[i].bss;
- assoc_data->link[i].disabled = req->links[i].disabled;
- }
-
/* if there was no authentication, set up the link */
err = ieee80211_vif_set_links(sdata, BIT(assoc_link_id), 0);
if (err)
goto err_clear;
- } else {
- assoc_data->link[0].conn_flags = conn_flags;
- assoc_data->link[0].bss = cbss;
}
link = sdata_dereference(sdata->link[assoc_link_id], sdata);
@@ -7916,19 +8362,21 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
goto err_clear;
}
- /* keep old conn_flags from ieee80211_prep_channel() from auth */
- conn_flags |= link->u.mgd.conn_flags;
- conn_flags |= ieee80211_setup_assoc_link(sdata, assoc_data, req,
- conn_flags, assoc_link_id);
- override = link->u.mgd.conn_flags != conn_flags;
- link->u.mgd.conn_flags |= conn_flags;
+ override = link->u.mgd.conn.mode !=
+ assoc_data->link[assoc_link_id].conn.mode ||
+ link->u.mgd.conn.bw_limit !=
+ assoc_data->link[assoc_link_id].conn.bw_limit;
+ link->u.mgd.conn = assoc_data->link[assoc_link_id].conn;
+
+ ieee80211_setup_assoc_link(sdata, assoc_data, req, &link->u.mgd.conn,
+ assoc_link_id);
if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) &&
ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK),
"U-APSD not supported with HW_PS_NULLFUNC_STACK\n"))
sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD;
- if (bss->wmm_used && bss->uapsd_supported &&
+ if (assoc_data->wmm && uapsd_supported &&
(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD)) {
assoc_data->uapsd = true;
ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
@@ -7972,34 +8420,35 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
continue;
if (i == assoc_data->assoc_link_id)
continue;
- /* only calculate the flags, hence link == NULL */
- err = ieee80211_prep_channel(sdata, NULL,
+ /* only calculate the mode, hence link == NULL */
+ err = ieee80211_prep_channel(sdata, NULL, i,
assoc_data->link[i].bss, true,
- &assoc_data->link[i].conn_flags);
+ &assoc_data->link[i].conn);
if (err) {
req->links[i].error = err;
goto err_clear;
}
}
+ memcpy(vif_cfg->ssid, assoc_data->ssid, assoc_data->ssid_len);
+ vif_cfg->ssid_len = assoc_data->ssid_len;
+
/* needed for transmitting the assoc frames properly */
memcpy(sdata->vif.cfg.ap_addr, assoc_data->ap_addr, ETH_ALEN);
err = ieee80211_prep_connection(sdata, cbss, req->link_id,
- req->ap_mld_addr, true, override);
+ req->ap_mld_addr, true,
+ &assoc_data->link[assoc_link_id].conn,
+ override);
if (err)
goto err_clear;
- assoc_data->link[assoc_data->assoc_link_id].conn_flags =
- link->u.mgd.conn_flags;
-
if (ieee80211_hw_check(&sdata->local->hw, NEED_DTIM_BEFORE_ASSOC)) {
const struct cfg80211_bss_ies *beacon_ies;
rcu_read_lock();
beacon_ies = rcu_dereference(req->bss->beacon_ies);
-
- if (beacon_ies) {
+ if (!beacon_ies) {
/*
* Wait up to one beacon interval ...
* should this be more if we miss one?
@@ -8080,6 +8529,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
@@ -8156,6 +8606,8 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
&ifmgd->ml_reconf_work);
wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work);
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &ifmgd->neg_ttlm_timeout_work);
if (ifmgd->assoc_data)
ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT);
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 449af4e1cca4..9ef14e475c90 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -168,6 +168,7 @@ void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata)
int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
struct ocb_setup *setup)
{
+ struct ieee80211_chan_req chanreq = { .oper = setup->chandef };
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
u64 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID;
@@ -182,7 +183,7 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
sdata->deflink.needed_rx_chains = sdata->local->rx_chains;
- err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef,
+ err = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (err)
return err;
@@ -207,7 +208,7 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata)
lockdep_assert_wiphy(sdata->local->hw.wiphy);
ifocb->joined = false;
- sta_info_flush(sdata);
+ sta_info_flush(sdata, -1);
spin_lock_bh(&ifocb->incomplete_lock);
while (!list_empty(&ifocb->incomplete_stations)) {
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 6c4080202573..221695d841fd 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -86,7 +86,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
lockdep_assert_wiphy(local->hw.wiphy);
- if (WARN_ON(local->use_chanctx))
+ if (WARN_ON(!local->emulate_chanctx))
return;
/*
@@ -136,7 +136,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
lockdep_assert_wiphy(local->hw.wiphy);
- if (WARN_ON(local->use_chanctx))
+ if (WARN_ON(!local->emulate_chanctx))
return;
list_for_each_entry(sdata, &local->interfaces, list) {
@@ -351,10 +351,13 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local)
* 20 MHz channel width) don't stop all the operations but still
* treat it as though the ROC operation started properly, so
* other ROC operations won't interfere with this one.
+ *
+ * Note: scan can't run, tmp_channel is what we use, so this
+ * must be the currently active channel.
*/
- roc->on_channel = roc->chan == local->_oper_chandef.chan &&
- local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 &&
- local->_oper_chandef.width != NL80211_CHAN_WIDTH_10;
+ roc->on_channel = roc->chan == local->hw.conf.chandef.chan &&
+ local->hw.conf.chandef.width != NL80211_CHAN_WIDTH_5 &&
+ local->hw.conf.chandef.width != NL80211_CHAN_WIDTH_10;
/* start this ROC */
ieee80211_recalc_idle(local);
@@ -363,7 +366,7 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local)
ieee80211_offchannel_stop_vifs(local);
local->tmp_channel = roc->chan;
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_conf_chan(local);
}
wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work,
@@ -426,7 +429,7 @@ static void __ieee80211_roc_work(struct ieee80211_local *local)
return;
if (!roc->started) {
- WARN_ON(local->use_chanctx);
+ WARN_ON(!local->emulate_chanctx);
_ieee80211_start_next_roc(local);
} else {
on_channel = roc->on_channel;
@@ -439,7 +442,7 @@ static void __ieee80211_roc_work(struct ieee80211_local *local)
ieee80211_flush_queues(local, NULL, false);
local->tmp_channel = NULL;
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_conf_chan(local);
ieee80211_offchannel_return(local);
}
@@ -539,7 +542,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
/* this may work, but is untested */
return -EOPNOTSUPP;
- if (local->use_chanctx && !local->ops->remain_on_channel)
+ if (!local->emulate_chanctx && !local->ops->remain_on_channel)
return -EOPNOTSUPP;
roc = kzalloc(sizeof(*roc), GFP_KERNEL);
diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
new file mode 100644
index 000000000000..55e5497f8978
--- /dev/null
+++ b/net/mac80211/parse.c
@@ -0,0 +1,971 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005-2006, Devicescape Software, Inc.
+ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018-2024 Intel Corporation
+ *
+ * element parsing for mac80211
+ */
+
+#include <net/mac80211.h>
+#include <linux/netdevice.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/bitmap.h>
+#include <linux/crc32.h>
+#include <net/net_namespace.h>
+#include <net/cfg80211.h>
+#include <net/rtnetlink.h>
+#include <kunit/visibility.h>
+
+#include "ieee80211_i.h"
+#include "driver-ops.h"
+#include "rate.h"
+#include "mesh.h"
+#include "wme.h"
+#include "led.h"
+#include "wep.h"
+
+struct ieee80211_elems_parse {
+ /* must be first for kfree to work */
+ struct ieee802_11_elems elems;
+
+ /* The basic Multi-Link element in the original elements */
+ const struct element *ml_basic_elem;
+
+ /* The reconfiguration Multi-Link element in the original elements */
+ const struct element *ml_reconf_elem;
+
+ /*
+ * scratch buffer that can be used for various element parsing related
+ * tasks, e.g., element de-fragmentation etc.
+ */
+ size_t scratch_len;
+ u8 *scratch_pos;
+ u8 scratch[] __counted_by(scratch_len);
+};
+
+static void
+ieee80211_parse_extension_element(u32 *crc,
+ const struct element *elem,
+ struct ieee80211_elems_parse *elems_parse,
+ struct ieee80211_elems_parse_params *params)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ const void *data = elem->data + 1;
+ bool calc_crc = false;
+ u8 len;
+
+ if (!elem->datalen)
+ return;
+
+ len = elem->datalen - 1;
+
+ switch (elem->data[0]) {
+ case WLAN_EID_EXT_HE_MU_EDCA:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ calc_crc = true;
+ if (len >= sizeof(*elems->mu_edca_param_set))
+ elems->mu_edca_param_set = data;
+ break;
+ case WLAN_EID_EXT_HE_CAPABILITY:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ if (ieee80211_he_capa_size_ok(data, len)) {
+ elems->he_cap = data;
+ elems->he_cap_len = len;
+ }
+ break;
+ case WLAN_EID_EXT_HE_OPERATION:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ calc_crc = true;
+ if (len >= sizeof(*elems->he_operation) &&
+ len >= ieee80211_he_oper_size(data) - 1)
+ elems->he_operation = data;
+ break;
+ case WLAN_EID_EXT_UORA:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ if (len >= 1)
+ elems->uora_element = data;
+ break;
+ case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME:
+ if (len == 3)
+ elems->max_channel_switch_time = data;
+ break;
+ case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION:
+ if (len >= sizeof(*elems->mbssid_config_ie))
+ elems->mbssid_config_ie = data;
+ break;
+ case WLAN_EID_EXT_HE_SPR:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ if (len >= sizeof(*elems->he_spr) &&
+ len >= ieee80211_he_spr_size(data))
+ elems->he_spr = data;
+ break;
+ case WLAN_EID_EXT_HE_6GHZ_CAPA:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ if (len >= sizeof(*elems->he_6ghz_capa))
+ elems->he_6ghz_capa = data;
+ break;
+ case WLAN_EID_EXT_EHT_CAPABILITY:
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+ if (ieee80211_eht_capa_size_ok(elems->he_cap,
+ data, len,
+ params->from_ap)) {
+ elems->eht_cap = data;
+ elems->eht_cap_len = len;
+ }
+ break;
+ case WLAN_EID_EXT_EHT_OPERATION:
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+ if (ieee80211_eht_oper_size_ok(data, len))
+ elems->eht_operation = data;
+ calc_crc = true;
+ break;
+ case WLAN_EID_EXT_EHT_MULTI_LINK:
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+ calc_crc = true;
+
+ if (ieee80211_mle_size_ok(data, len)) {
+ const struct ieee80211_multi_link_elem *mle =
+ (void *)data;
+
+ switch (le16_get_bits(mle->control,
+ IEEE80211_ML_CONTROL_TYPE)) {
+ case IEEE80211_ML_CONTROL_TYPE_BASIC:
+ if (elems_parse->ml_basic_elem) {
+ elems->parse_error |=
+ IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC;
+ break;
+ }
+ elems_parse->ml_basic_elem = elem;
+ break;
+ case IEEE80211_ML_CONTROL_TYPE_RECONF:
+ elems_parse->ml_reconf_elem = elem;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case WLAN_EID_EXT_BANDWIDTH_INDICATION:
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+ if (ieee80211_bandwidth_indication_size_ok(data, len))
+ elems->bandwidth_indication = data;
+ calc_crc = true;
+ break;
+ case WLAN_EID_EXT_TID_TO_LINK_MAPPING:
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+ calc_crc = true;
+ if (ieee80211_tid_to_link_map_size_ok(data, len) &&
+ elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) {
+ elems->ttlm[elems->ttlm_num] = (void *)data;
+ elems->ttlm_num++;
+ }
+ break;
+ }
+
+ if (crc && calc_crc)
+ *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2);
+}
+
+static u32
+_ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
+ struct ieee80211_elems_parse *elems_parse,
+ const struct element *check_inherit)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ const struct element *elem;
+ bool calc_crc = params->filter != 0;
+ DECLARE_BITMAP(seen_elems, 256);
+ u32 crc = params->crc;
+
+ bitmap_zero(seen_elems, 256);
+
+ for_each_element(elem, params->start, params->len) {
+ const struct element *subelem;
+ u8 elem_parse_failed;
+ u8 id = elem->id;
+ u8 elen = elem->datalen;
+ const u8 *pos = elem->data;
+
+ if (check_inherit &&
+ !cfg80211_is_element_inherited(elem,
+ check_inherit))
+ continue;
+
+ switch (id) {
+ case WLAN_EID_SSID:
+ case WLAN_EID_SUPP_RATES:
+ case WLAN_EID_FH_PARAMS:
+ case WLAN_EID_DS_PARAMS:
+ case WLAN_EID_CF_PARAMS:
+ case WLAN_EID_TIM:
+ case WLAN_EID_IBSS_PARAMS:
+ case WLAN_EID_CHALLENGE:
+ case WLAN_EID_RSN:
+ case WLAN_EID_ERP_INFO:
+ case WLAN_EID_EXT_SUPP_RATES:
+ case WLAN_EID_HT_CAPABILITY:
+ case WLAN_EID_HT_OPERATION:
+ case WLAN_EID_VHT_CAPABILITY:
+ case WLAN_EID_VHT_OPERATION:
+ case WLAN_EID_MESH_ID:
+ case WLAN_EID_MESH_CONFIG:
+ case WLAN_EID_PEER_MGMT:
+ case WLAN_EID_PREQ:
+ case WLAN_EID_PREP:
+ case WLAN_EID_PERR:
+ case WLAN_EID_RANN:
+ case WLAN_EID_CHANNEL_SWITCH:
+ case WLAN_EID_EXT_CHANSWITCH_ANN:
+ case WLAN_EID_COUNTRY:
+ case WLAN_EID_PWR_CONSTRAINT:
+ case WLAN_EID_TIMEOUT_INTERVAL:
+ case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
+ case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
+ case WLAN_EID_CHAN_SWITCH_PARAM:
+ case WLAN_EID_EXT_CAPABILITY:
+ case WLAN_EID_CHAN_SWITCH_TIMING:
+ case WLAN_EID_LINK_ID:
+ case WLAN_EID_BSS_MAX_IDLE_PERIOD:
+ case WLAN_EID_RSNX:
+ case WLAN_EID_S1G_BCN_COMPAT:
+ case WLAN_EID_S1G_CAPABILITIES:
+ case WLAN_EID_S1G_OPERATION:
+ case WLAN_EID_AID_RESPONSE:
+ case WLAN_EID_S1G_SHORT_BCN_INTERVAL:
+ /*
+ * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
+ * that if the content gets bigger it might be needed more than once
+ */
+ if (test_bit(id, seen_elems)) {
+ elems->parse_error |=
+ IEEE80211_PARSE_ERR_DUP_ELEM;
+ continue;
+ }
+ break;
+ }
+
+ if (calc_crc && id < 64 && (params->filter & (1ULL << id)))
+ crc = crc32_be(crc, pos - 2, elen + 2);
+
+ elem_parse_failed = 0;
+
+ switch (id) {
+ case WLAN_EID_LINK_ID:
+ if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->lnk_id = (void *)(pos - 2);
+ break;
+ case WLAN_EID_CHAN_SWITCH_TIMING:
+ if (elen < sizeof(struct ieee80211_ch_switch_timing)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->ch_sw_timing = (void *)pos;
+ break;
+ case WLAN_EID_EXT_CAPABILITY:
+ elems->ext_capab = pos;
+ elems->ext_capab_len = elen;
+ break;
+ case WLAN_EID_SSID:
+ elems->ssid = pos;
+ elems->ssid_len = elen;
+ break;
+ case WLAN_EID_SUPP_RATES:
+ elems->supp_rates = pos;
+ elems->supp_rates_len = elen;
+ break;
+ case WLAN_EID_DS_PARAMS:
+ if (elen >= 1)
+ elems->ds_params = pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_TIM:
+ if (elen >= sizeof(struct ieee80211_tim_ie)) {
+ elems->tim = (void *)pos;
+ elems->tim_len = elen;
+ } else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_VENDOR_SPECIFIC:
+ if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
+ pos[2] == 0xf2) {
+ /* Microsoft OUI (00:50:F2) */
+
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+
+ if (elen >= 5 && pos[3] == 2) {
+ /* OUI Type 2 - WMM IE */
+ if (pos[4] == 0) {
+ elems->wmm_info = pos;
+ elems->wmm_info_len = elen;
+ } else if (pos[4] == 1) {
+ elems->wmm_param = pos;
+ elems->wmm_param_len = elen;
+ }
+ }
+ }
+ break;
+ case WLAN_EID_RSN:
+ elems->rsn = pos;
+ elems->rsn_len = elen;
+ break;
+ case WLAN_EID_ERP_INFO:
+ if (elen >= 1)
+ elems->erp_info = pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_EXT_SUPP_RATES:
+ elems->ext_supp_rates = pos;
+ elems->ext_supp_rates_len = elen;
+ break;
+ case WLAN_EID_HT_CAPABILITY:
+ if (params->mode < IEEE80211_CONN_MODE_HT)
+ break;
+ if (elen >= sizeof(struct ieee80211_ht_cap))
+ elems->ht_cap_elem = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_HT_OPERATION:
+ if (params->mode < IEEE80211_CONN_MODE_HT)
+ break;
+ if (elen >= sizeof(struct ieee80211_ht_operation))
+ elems->ht_operation = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_VHT_CAPABILITY:
+ if (params->mode < IEEE80211_CONN_MODE_VHT)
+ break;
+ if (elen >= sizeof(struct ieee80211_vht_cap))
+ elems->vht_cap_elem = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_VHT_OPERATION:
+ if (params->mode < IEEE80211_CONN_MODE_VHT)
+ break;
+ if (elen >= sizeof(struct ieee80211_vht_operation)) {
+ elems->vht_operation = (void *)pos;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_OPMODE_NOTIF:
+ if (params->mode < IEEE80211_CONN_MODE_VHT)
+ break;
+ if (elen > 0) {
+ elems->opmode_notif = pos;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_MESH_ID:
+ elems->mesh_id = pos;
+ elems->mesh_id_len = elen;
+ break;
+ case WLAN_EID_MESH_CONFIG:
+ if (elen >= sizeof(struct ieee80211_meshconf_ie))
+ elems->mesh_config = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_PEER_MGMT:
+ elems->peering = pos;
+ elems->peering_len = elen;
+ break;
+ case WLAN_EID_MESH_AWAKE_WINDOW:
+ if (elen >= 2)
+ elems->awake_window = (void *)pos;
+ break;
+ case WLAN_EID_PREQ:
+ elems->preq = pos;
+ elems->preq_len = elen;
+ break;
+ case WLAN_EID_PREP:
+ elems->prep = pos;
+ elems->prep_len = elen;
+ break;
+ case WLAN_EID_PERR:
+ elems->perr = pos;
+ elems->perr_len = elen;
+ break;
+ case WLAN_EID_RANN:
+ if (elen >= sizeof(struct ieee80211_rann_ie))
+ elems->rann = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_CHANNEL_SWITCH:
+ if (elen != sizeof(struct ieee80211_channel_sw_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->ch_switch_ie = (void *)pos;
+ break;
+ case WLAN_EID_EXT_CHANSWITCH_ANN:
+ if (elen != sizeof(struct ieee80211_ext_chansw_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->ext_chansw_ie = (void *)pos;
+ break;
+ case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
+ if (params->mode < IEEE80211_CONN_MODE_HT)
+ break;
+ if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->sec_chan_offs = (void *)pos;
+ break;
+ case WLAN_EID_CHAN_SWITCH_PARAM:
+ if (elen <
+ sizeof(*elems->mesh_chansw_params_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->mesh_chansw_params_ie = (void *)pos;
+ break;
+ case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
+ if (params->mode < IEEE80211_CONN_MODE_VHT)
+ break;
+
+ if (!params->action) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_UNEXPECTED_ELEM;
+ break;
+ }
+
+ if (elen < sizeof(*elems->wide_bw_chansw_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->wide_bw_chansw_ie = (void *)pos;
+ break;
+ case WLAN_EID_CHANNEL_SWITCH_WRAPPER:
+ if (params->mode < IEEE80211_CONN_MODE_VHT)
+ break;
+ if (params->action) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_UNEXPECTED_ELEM;
+ break;
+ }
+ /*
+ * This is a bit tricky, but as we only care about
+ * a few elements, parse them out manually.
+ */
+ subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
+ pos, elen);
+ if (subelem) {
+ if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie))
+ elems->wide_bw_chansw_ie =
+ (void *)subelem->data;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ }
+
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+
+ subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION,
+ pos, elen);
+ if (subelem) {
+ const void *edata = subelem->data + 1;
+ u8 edatalen = subelem->datalen - 1;
+
+ if (ieee80211_bandwidth_indication_size_ok(edata,
+ edatalen))
+ elems->bandwidth_indication = edata;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ }
+ break;
+ case WLAN_EID_COUNTRY:
+ elems->country_elem = pos;
+ elems->country_elem_len = elen;
+ break;
+ case WLAN_EID_PWR_CONSTRAINT:
+ if (elen != 1) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->pwr_constr_elem = pos;
+ break;
+ case WLAN_EID_CISCO_VENDOR_SPECIFIC:
+ /* Lots of different options exist, but we only care
+ * about the Dynamic Transmit Power Control element.
+ * First check for the Cisco OUI, then for the DTPC
+ * tag (0x00).
+ */
+ if (elen < 4) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+
+ if (pos[0] != 0x00 || pos[1] != 0x40 ||
+ pos[2] != 0x96 || pos[3] != 0x00)
+ break;
+
+ if (elen != 6) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+
+ elems->cisco_dtpc_elem = pos;
+ break;
+ case WLAN_EID_ADDBA_EXT:
+ if (elen < sizeof(struct ieee80211_addba_ext_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->addba_ext_ie = (void *)pos;
+ break;
+ case WLAN_EID_TIMEOUT_INTERVAL:
+ if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
+ elems->timeout_int = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_BSS_MAX_IDLE_PERIOD:
+ if (elen >= sizeof(*elems->max_idle_period_ie))
+ elems->max_idle_period_ie = (void *)pos;
+ break;
+ case WLAN_EID_RSNX:
+ elems->rsnx = pos;
+ elems->rsnx_len = elen;
+ break;
+ case WLAN_EID_TX_POWER_ENVELOPE:
+ if (elen < 1 ||
+ elen > sizeof(struct ieee80211_tx_pwr_env))
+ break;
+
+ if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env))
+ break;
+
+ elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos;
+ elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen;
+ elems->tx_pwr_env_num++;
+ break;
+ case WLAN_EID_EXTENSION:
+ ieee80211_parse_extension_element(calc_crc ?
+ &crc : NULL,
+ elem, elems_parse,
+ params);
+ break;
+ case WLAN_EID_S1G_CAPABILITIES:
+ if (params->mode != IEEE80211_CONN_MODE_S1G)
+ break;
+ if (elen >= sizeof(*elems->s1g_capab))
+ elems->s1g_capab = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_S1G_OPERATION:
+ if (params->mode != IEEE80211_CONN_MODE_S1G)
+ break;
+ if (elen == sizeof(*elems->s1g_oper))
+ elems->s1g_oper = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_S1G_BCN_COMPAT:
+ if (params->mode != IEEE80211_CONN_MODE_S1G)
+ break;
+ if (elen == sizeof(*elems->s1g_bcn_compat))
+ elems->s1g_bcn_compat = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_AID_RESPONSE:
+ if (params->mode != IEEE80211_CONN_MODE_S1G)
+ break;
+ if (elen == sizeof(struct ieee80211_aid_response_ie))
+ elems->aid_resp = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ default:
+ break;
+ }
+
+ if (elem_parse_failed)
+ elems->parse_error |= elem_parse_failed;
+ else
+ __set_bit(id, seen_elems);
+ }
+
+ if (!for_each_element_completed(elem, params->start, params->len))
+ elems->parse_error |= IEEE80211_PARSE_ERR_INVALID_END;
+
+ return crc;
+}
+
+static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
+ struct ieee802_11_elems *elems,
+ struct cfg80211_bss *bss,
+ u8 *nontransmitted_profile)
+{
+ const struct element *elem, *sub;
+ size_t profile_len = 0;
+ bool found = false;
+
+ if (!bss || !bss->transmitted_bss)
+ return profile_len;
+
+ for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) {
+ if (elem->datalen < 2)
+ continue;
+ if (elem->data[0] < 1 || elem->data[0] > 8)
+ continue;
+
+ for_each_element(sub, elem->data + 1, elem->datalen - 1) {
+ u8 new_bssid[ETH_ALEN];
+ const u8 *index;
+
+ if (sub->id != 0 || sub->datalen < 4) {
+ /* not a valid BSS profile */
+ continue;
+ }
+
+ if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP ||
+ sub->data[1] != 2) {
+ /* The first element of the
+ * Nontransmitted BSSID Profile is not
+ * the Nontransmitted BSSID Capability
+ * element.
+ */
+ continue;
+ }
+
+ memset(nontransmitted_profile, 0, len);
+ profile_len = cfg80211_merge_profile(start, len,
+ elem,
+ sub,
+ nontransmitted_profile,
+ len);
+
+ /* found a Nontransmitted BSSID Profile */
+ index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX,
+ nontransmitted_profile,
+ profile_len);
+ if (!index || index[1] < 1 || index[2] == 0) {
+ /* Invalid MBSSID Index element */
+ continue;
+ }
+
+ cfg80211_gen_new_bssid(bss->transmitted_bss->bssid,
+ elem->data[0],
+ index[2],
+ new_bssid);
+ if (ether_addr_equal(new_bssid, bss->bssid)) {
+ found = true;
+ elems->bssid_index_len = index[1];
+ elems->bssid_index = (void *)&index[2];
+ break;
+ }
+ }
+ }
+
+ return found ? profile_len : 0;
+}
+
+static void
+ieee80211_mle_get_sta_prof(struct ieee80211_elems_parse *elems_parse,
+ u8 link_id)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ const struct ieee80211_multi_link_elem *ml = elems->ml_basic;
+ ssize_t ml_len = elems->ml_basic_len;
+ const struct element *sub;
+
+ for_each_mle_subelement(sub, (u8 *)ml, ml_len) {
+ struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data;
+ ssize_t sta_prof_len;
+ u16 control;
+
+ if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE)
+ continue;
+
+ if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data,
+ sub->datalen))
+ return;
+
+ control = le16_to_cpu(prof->control);
+
+ if (link_id != u16_get_bits(control,
+ IEEE80211_MLE_STA_CONTROL_LINK_ID))
+ continue;
+
+ if (!(control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE))
+ return;
+
+ /* the sub element can be fragmented */
+ sta_prof_len =
+ cfg80211_defragment_element(sub,
+ (u8 *)ml, ml_len,
+ elems_parse->scratch_pos,
+ elems_parse->scratch +
+ elems_parse->scratch_len -
+ elems_parse->scratch_pos,
+ IEEE80211_MLE_SUBELEM_FRAGMENT);
+
+ if (sta_prof_len < 0)
+ return;
+
+ elems->prof = (void *)elems_parse->scratch_pos;
+ elems->sta_prof_len = sta_prof_len;
+ elems_parse->scratch_pos += sta_prof_len;
+
+ return;
+ }
+}
+
+static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse,
+ struct ieee80211_elems_parse_params *params)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ struct ieee80211_mle_per_sta_profile *prof;
+ struct ieee80211_elems_parse_params sub = {
+ .mode = params->mode,
+ .action = params->action,
+ .from_ap = params->from_ap,
+ .link_id = -1,
+ };
+ ssize_t ml_len = elems->ml_basic_len;
+ const struct element *non_inherit = NULL;
+ const u8 *end;
+
+ ml_len = cfg80211_defragment_element(elems_parse->ml_basic_elem,
+ elems->ie_start,
+ elems->total_len,
+ elems_parse->scratch_pos,
+ elems_parse->scratch +
+ elems_parse->scratch_len -
+ elems_parse->scratch_pos,
+ WLAN_EID_FRAGMENT);
+
+ if (ml_len < 0)
+ return;
+
+ elems->ml_basic = (const void *)elems_parse->scratch_pos;
+ elems->ml_basic_len = ml_len;
+ elems_parse->scratch_pos += ml_len;
+
+ if (params->link_id == -1)
+ return;
+
+ ieee80211_mle_get_sta_prof(elems_parse, params->link_id);
+ prof = elems->prof;
+
+ if (!prof)
+ return;
+
+ /* check if we have the 4 bytes for the fixed part in assoc response */
+ if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) {
+ elems->prof = NULL;
+ elems->sta_prof_len = 0;
+ return;
+ }
+
+ /*
+ * Skip the capability information and the status code that are expected
+ * as part of the station profile in association response frames. Note
+ * the -1 is because the 'sta_info_len' is accounted to as part of the
+ * per-STA profile, but not part of the 'u8 variable[]' portion.
+ */
+ sub.start = prof->variable + prof->sta_info_len - 1 + 4;
+ end = (const u8 *)prof + elems->sta_prof_len;
+ sub.len = end - sub.start;
+
+ non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ sub.start, sub.len);
+ _ieee802_11_parse_elems_full(&sub, elems_parse, non_inherit);
+}
+
+static void
+ieee80211_mle_defrag_reconf(struct ieee80211_elems_parse *elems_parse)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ ssize_t ml_len;
+
+ ml_len = cfg80211_defragment_element(elems_parse->ml_reconf_elem,
+ elems->ie_start,
+ elems->total_len,
+ elems_parse->scratch_pos,
+ elems_parse->scratch +
+ elems_parse->scratch_len -
+ elems_parse->scratch_pos,
+ WLAN_EID_FRAGMENT);
+ if (ml_len < 0)
+ return;
+ elems->ml_reconf = (void *)elems_parse->scratch_pos;
+ elems->ml_reconf_len = ml_len;
+ elems_parse->scratch_pos += ml_len;
+}
+
+struct ieee802_11_elems *
+ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
+{
+ struct ieee80211_elems_parse *elems_parse;
+ struct ieee802_11_elems *elems;
+ const struct element *non_inherit = NULL;
+ u8 *nontransmitted_profile;
+ int nontransmitted_profile_len = 0;
+ size_t scratch_len = 3 * params->len;
+
+ BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0);
+
+ elems_parse = kzalloc(struct_size(elems_parse, scratch, scratch_len),
+ GFP_ATOMIC);
+ if (!elems_parse)
+ return NULL;
+
+ elems_parse->scratch_len = scratch_len;
+ elems_parse->scratch_pos = elems_parse->scratch;
+
+ elems = &elems_parse->elems;
+ elems->ie_start = params->start;
+ elems->total_len = params->len;
+
+ nontransmitted_profile = elems_parse->scratch_pos;
+ nontransmitted_profile_len =
+ ieee802_11_find_bssid_profile(params->start, params->len,
+ elems, params->bss,
+ nontransmitted_profile);
+ elems_parse->scratch_pos += nontransmitted_profile_len;
+ non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ nontransmitted_profile,
+ nontransmitted_profile_len);
+
+ elems->crc = _ieee802_11_parse_elems_full(params, elems_parse,
+ non_inherit);
+
+ /* Override with nontransmitted profile, if found */
+ if (nontransmitted_profile_len) {
+ struct ieee80211_elems_parse_params sub = {
+ .mode = params->mode,
+ .start = nontransmitted_profile,
+ .len = nontransmitted_profile_len,
+ .action = params->action,
+ .link_id = params->link_id,
+ };
+
+ _ieee802_11_parse_elems_full(&sub, elems_parse, NULL);
+ }
+
+ ieee80211_mle_parse_link(elems_parse, params);
+
+ ieee80211_mle_defrag_reconf(elems_parse);
+
+ if (elems->tim && !elems->parse_error) {
+ const struct ieee80211_tim_ie *tim_ie = elems->tim;
+
+ elems->dtim_period = tim_ie->dtim_period;
+ elems->dtim_count = tim_ie->dtim_count;
+ }
+
+ /* Override DTIM period and count if needed */
+ if (elems->bssid_index &&
+ elems->bssid_index_len >=
+ offsetofend(struct ieee80211_bssid_index, dtim_period))
+ elems->dtim_period = elems->bssid_index->dtim_period;
+
+ if (elems->bssid_index &&
+ elems->bssid_index_len >=
+ offsetofend(struct ieee80211_bssid_index, dtim_count))
+ elems->dtim_count = elems->bssid_index->dtim_count;
+
+ return elems;
+}
+EXPORT_SYMBOL_IF_KUNIT(ieee802_11_parse_elems_full);
+
+int ieee80211_parse_bitrates(enum nl80211_chan_width width,
+ const struct ieee80211_supported_band *sband,
+ const u8 *srates, int srates_len, u32 *rates)
+{
+ u32 rate_flags = ieee80211_chanwidth_rate_flags(width);
+ struct ieee80211_rate *br;
+ int brate, rate, i, j, count = 0;
+
+ *rates = 0;
+
+ for (i = 0; i < srates_len; i++) {
+ rate = srates[i] & 0x7f;
+
+ for (j = 0; j < sband->n_bitrates; j++) {
+ br = &sband->bitrates[j];
+ if ((rate_flags & br->flags) != rate_flags)
+ continue;
+
+ brate = DIV_ROUND_UP(br->bitrate, 5);
+ if (brate == rate) {
+ *rates |= BIT(j);
+ count++;
+ break;
+ }
+ }
+ }
+ return count;
+}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index d5ea5f5bcf3a..23404b275457 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -4,7 +4,7 @@
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2019, 2022-2024 Intel Corporation
*/
#include <linux/kernel.h>
@@ -37,7 +37,7 @@ void rate_control_rate_init(struct sta_info *sta)
struct ieee80211_supported_band *sband;
struct ieee80211_chanctx_conf *chanctx_conf;
- ieee80211_sta_set_rx_nss(&sta->deflink);
+ ieee80211_sta_init_nss(&sta->deflink);
if (!ref)
return;
@@ -119,7 +119,8 @@ void rate_control_rate_update(struct ieee80211_local *local,
rcu_read_unlock();
}
- drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
+ if (sta->uploaded)
+ drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
}
int ieee80211_rate_control_register(const struct rate_control_ops *ops)
@@ -278,10 +279,10 @@ void ieee80211_check_rate_mask(struct ieee80211_link_data *link)
u32 user_mask, basic_rates = link->conf->basic_rates;
enum nl80211_band band;
- if (WARN_ON(!link->conf->chandef.chan))
+ if (WARN_ON(!link->conf->chanreq.oper.chan))
return;
- band = link->conf->chandef.chan->band;
+ band = link->conf->chanreq.oper.chan->band;
if (band == NL80211_BAND_S1GHZ) {
/* TODO */
return;
@@ -761,7 +762,7 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata,
u32 i, flags;
*mask = sdata->rc_rateidx_mask[sband->band];
- flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
+ flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper);
for (i = 0; i < sband->n_bitrates; i++) {
if ((flags & sband->bitrates[i].flags) != flags)
*mask &= ~BIT(i);
@@ -817,7 +818,7 @@ rate_control_apply_mask_ratetbl(struct sta_info *sta,
mcs_mask, vht_mask))
return;
- chan_width = sta->sdata->vif.bss_conf.chandef.width;
+ chan_width = sta->sdata->vif.bss_conf.chanreq.oper.width;
for (i = 0; i < IEEE80211_TX_RATE_TABLE_SIZE; i++) {
if (rates->rate[i].idx < 0)
break;
@@ -854,7 +855,7 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
* included in the configured mask and change the rate indexes
* if needed.
*/
- chan_width = sdata->vif.bss_conf.chandef.width;
+ chan_width = sdata->vif.bss_conf.chanreq.oper.width;
for (i = 0; i < max_rates; i++) {
/* Skip invalid rates */
if (rates[i].idx < 0)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0bf72928ccfc..c1f850138405 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -6,7 +6,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/jiffies.h>
@@ -1251,8 +1251,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- u16 sc = le16_to_cpu(hdr->seq_ctrl);
- u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
+ u16 mpdu_seq_num = ieee80211_get_sn(hdr);
u16 head_seq_num, buf_size;
int index;
bool ret = true;
@@ -1435,13 +1434,31 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
if (ieee80211_is_ctl(hdr->frame_control) ||
- ieee80211_is_any_nullfunc(hdr->frame_control) ||
- is_multicast_ether_addr(hdr->addr1))
+ ieee80211_is_any_nullfunc(hdr->frame_control))
return RX_CONTINUE;
if (!rx->sta)
return RX_CONTINUE;
+ if (unlikely(is_multicast_ether_addr(hdr->addr1))) {
+ struct ieee80211_sub_if_data *sdata = rx->sdata;
+ u16 sn = ieee80211_get_sn(hdr);
+
+ if (!ieee80211_is_data_present(hdr->frame_control))
+ return RX_CONTINUE;
+
+ if (!ieee80211_vif_is_mld(&sdata->vif) ||
+ sdata->vif.type != NL80211_IFTYPE_STATION)
+ return RX_CONTINUE;
+
+ if (sdata->u.mgd.mcast_seq_last != IEEE80211_SN_MODULO &&
+ ieee80211_sn_less_eq(sn, sdata->u.mgd.mcast_seq_last))
+ return RX_DROP_U_DUP;
+
+ sdata->u.mgd.mcast_seq_last = sn;
+ return RX_CONTINUE;
+ }
+
if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) {
I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
@@ -3369,8 +3386,7 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
if (color == bss_conf->he_bss_color.color)
ieee80211_obss_color_collision_notify(&rx->sdata->vif,
- BIT_ULL(color),
- GFP_ATOMIC);
+ BIT_ULL(color));
}
}
@@ -3763,6 +3779,28 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
break;
}
break;
+ case WLAN_CATEGORY_PROTECTED_EHT:
+ switch (mgmt->u.action.u.ttlm_req.action_code) {
+ case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ:
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ break;
+
+ if (len < offsetofend(typeof(*mgmt),
+ u.action.u.ttlm_req))
+ goto invalid;
+ goto queue;
+ case WLAN_PROTECTED_EHT_ACTION_TTLM_RES:
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ break;
+
+ if (len < offsetofend(typeof(*mgmt),
+ u.action.u.ttlm_res))
+ goto invalid;
+ goto queue;
+ default:
+ break;
+ }
+ break;
}
return RX_CONTINUE;
@@ -5192,7 +5230,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
*/
if (!status->link_valid && pubsta->mlo) {
- struct ieee80211_hdr *hdr = (void *)skb->data;
struct link_sta_info *link_sta;
link_sta = link_sta_info_get_bss(rx.sdata,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 645355e5f1bc..0429e59ba387 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/if_arp.h>
@@ -237,14 +237,18 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
}
static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
u32 scan_flags, const u8 *da)
{
if (!sdata)
return false;
- /* accept broadcast for OCE */
- if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
- is_broadcast_ether_addr(da))
+
+ /* accept broadcast on 6 GHz and for OCE */
+ if (is_broadcast_ether_addr(da) &&
+ (channel->band == NL80211_BAND_6GHZ ||
+ scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP))
return true;
+
if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
return true;
return ether_addr_equal(da, sdata->vif.addr);
@@ -253,7 +257,6 @@ static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
{
struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
- struct ieee80211_sub_if_data *sdata1, *sdata2;
struct ieee80211_mgmt *mgmt = (void *)skb->data;
struct ieee80211_bss *bss;
struct ieee80211_channel *channel;
@@ -277,12 +280,6 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
if (skb->len < min_hdr_len)
return;
- sdata1 = rcu_dereference(local->scan_sdata);
- sdata2 = rcu_dereference(local->sched_scan_sdata);
-
- if (likely(!sdata1 && !sdata2))
- return;
-
if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) {
/*
* we were passive scanning because of radar/no-IR, but
@@ -293,11 +290,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
}
+ channel = ieee80211_get_channel_khz(local->hw.wiphy,
+ ieee80211_rx_status_to_khz(rx_status));
+
+ if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+ return;
+
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
+ struct ieee80211_sub_if_data *sdata1, *sdata2;
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
u32 scan_req_flags = 0, sched_scan_req_flags = 0;
+ sdata1 = rcu_dereference(local->scan_sdata);
+ sdata2 = rcu_dereference(local->sched_scan_sdata);
+
+ if (likely(!sdata1 && !sdata2))
+ return;
+
scan_req = rcu_dereference(local->scan_req);
sched_scan_req = rcu_dereference(local->sched_scan_req);
@@ -310,17 +320,21 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
/* ignore ProbeResp to foreign address or non-bcast (OCE)
* unless scanning with randomised address
*/
- if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
+ if (!ieee80211_scan_accept_presp(sdata1, channel,
+ scan_req_flags,
mgmt->da) &&
- !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
+ !ieee80211_scan_accept_presp(sdata2, channel,
+ sched_scan_req_flags,
mgmt->da))
return;
+ } else {
+ /* Beacons are expected only with broadcast address */
+ if (!is_broadcast_ether_addr(mgmt->da))
+ return;
}
- channel = ieee80211_get_channel_khz(local->hw.wiphy,
- ieee80211_rx_status_to_khz(rx_status));
-
- if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+ /* Do not update the BSS table in case of only monitor interfaces */
+ if (local->open_count == local->monitors)
return;
bss = ieee80211_bss_info_update(local, rx_status,
@@ -394,6 +408,8 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
req->ie, req->ie_len,
bands_used, req->rates, &chandef,
flags);
+ if (ielen < 0)
+ return false;
local->hw_scan_req->req.ie_len = ielen;
local->hw_scan_req->req.no_cck = req->no_cck;
ether_addr_copy(local->hw_scan_req->req.mac_addr, req->mac_addr);
@@ -470,7 +486,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
}
/* Set power back to normal operating levels. */
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_conf_chan(local);
if (!hw_scan && was_scanning) {
ieee80211_configure_filter(local);
@@ -517,7 +533,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
/* Software scan is not supported in multi-channel cases */
- if (local->use_chanctx)
+ if (!local->emulate_chanctx)
return -EOPNOTSUPP;
/*
@@ -547,7 +563,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local,
ieee80211_configure_filter(local);
/* We need to set power level at maximum rate for scanning. */
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_conf_chan(local);
wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
@@ -671,7 +687,10 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
* After sending probe requests, wait for probe responses
* on the channel.
*/
- *next_delay = IEEE80211_CHANNEL_TIME;
+ *next_delay = msecs_to_jiffies(scan_req->duration) >
+ IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME ?
+ msecs_to_jiffies(scan_req->duration) - IEEE80211_PROBE_DELAY :
+ IEEE80211_CHANNEL_TIME;
local->next_scan_state = SCAN_DECISION;
}
@@ -781,7 +800,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
if (hw_scan) {
__set_bit(SCAN_HW_SCANNING, &local->scanning);
} else if ((req->n_channels == 1) &&
- (req->channels[0] == local->_oper_chandef.chan)) {
+ (req->channels[0] == local->hw.conf.chandef.chan)) {
/*
* If we are scanning only on the operating channel
* then we do not need to stop normal activities
@@ -799,7 +818,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
ieee80211_configure_filter(local); /* accept probe-responses */
/* We need to ensure power level is at max for scanning. */
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_conf_chan(local);
if ((req->channels[0]->flags & (IEEE80211_CHAN_NO_IR |
IEEE80211_CHAN_RADAR)) ||
@@ -964,13 +983,13 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
/* If scanning on oper channel, use whatever channel-type
* is currently in use.
*/
- if (chan == local->_oper_chandef.chan)
- local->scan_chandef = local->_oper_chandef;
+ if (chan == local->hw.conf.chandef.chan)
+ local->scan_chandef = local->hw.conf.chandef;
else
local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
set_channel:
- if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL))
+ if (ieee80211_hw_conf_chan(local))
skip = 1;
/* advance state machine to next channel/band */
@@ -994,7 +1013,10 @@ set_channel:
*/
if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) ||
!scan_req->n_ssids) {
- *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
+ *next_delay = msecs_to_jiffies(scan_req->duration) >
+ IEEE80211_PASSIVE_CHANNEL_TIME ?
+ msecs_to_jiffies(scan_req->duration) :
+ IEEE80211_PASSIVE_CHANNEL_TIME;
local->next_scan_state = SCAN_DECISION;
if (scan_req->n_ssids)
set_bit(SCAN_BEACON_WAIT, &local->scanning);
@@ -1011,7 +1033,7 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local,
{
/* switch back to the operating channel */
local->scan_chandef.chan = NULL;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+ ieee80211_hw_conf_chan(local);
/* disable PS */
ieee80211_offchannel_return(local);
@@ -1310,10 +1332,12 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
ieee80211_prepare_scan_chandef(&chandef);
- ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz,
- &sched_scan_ies, req->ie,
- req->ie_len, bands_used, rate_masks, &chandef,
- flags);
+ ret = ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz,
+ &sched_scan_ies, req->ie,
+ req->ie_len, bands_used, rate_masks,
+ &chandef, flags);
+ if (ret < 0)
+ goto error;
ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies);
if (ret == 0) {
@@ -1321,8 +1345,8 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
rcu_assign_pointer(local->sched_scan_req, req);
}
+error:
kfree(ie);
-
out:
if (ret) {
/* Clean in case of failure after HW restart or upon resume. */
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 55959b0b24c5..327c74e296e2 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -19,21 +19,222 @@
#include "sta_info.h"
#include "wme.h"
+static bool
+wbcs_elem_to_chandef(const struct ieee80211_wide_bw_chansw_ie *wbcs_elem,
+ struct cfg80211_chan_def *chandef)
+{
+ u8 ccfs0 = wbcs_elem->new_center_freq_seg0;
+ u8 ccfs1 = wbcs_elem->new_center_freq_seg1;
+ u32 cf0 = ieee80211_channel_to_frequency(ccfs0, chandef->chan->band);
+ u32 cf1 = ieee80211_channel_to_frequency(ccfs1, chandef->chan->band);
+
+ switch (wbcs_elem->new_channel_width) {
+ case IEEE80211_VHT_CHANWIDTH_160MHZ:
+ /* deprecated encoding */
+ chandef->width = NL80211_CHAN_WIDTH_160;
+ chandef->center_freq1 = cf0;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+ /* deprecated encoding */
+ chandef->width = NL80211_CHAN_WIDTH_80P80;
+ chandef->center_freq1 = cf0;
+ chandef->center_freq2 = cf1;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_80MHZ:
+ chandef->width = NL80211_CHAN_WIDTH_80;
+ chandef->center_freq1 = cf0;
+
+ if (ccfs1) {
+ u8 diff = abs(ccfs0 - ccfs1);
+
+ if (diff == 8) {
+ chandef->width = NL80211_CHAN_WIDTH_160;
+ chandef->center_freq1 = cf1;
+ } else if (diff > 8) {
+ chandef->width = NL80211_CHAN_WIDTH_80P80;
+ chandef->center_freq2 = cf1;
+ }
+ }
+ break;
+ case IEEE80211_VHT_CHANWIDTH_USE_HT:
+ default:
+ /* If the WBCS Element is present, new channel bandwidth is
+ * at least 40 MHz.
+ */
+ chandef->width = NL80211_CHAN_WIDTH_40;
+ chandef->center_freq1 = cf0;
+ break;
+ }
+
+ return cfg80211_chandef_valid(chandef);
+}
+
+static void
+validate_chandef_by_ht_vht_oper(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_conn_settings *conn,
+ u32 vht_cap_info,
+ struct cfg80211_chan_def *chandef)
+{
+ u32 control_freq, center_freq1, center_freq2;
+ enum nl80211_chan_width chan_width;
+ struct ieee80211_ht_operation ht_oper;
+ struct ieee80211_vht_operation vht_oper;
+
+ if (conn->mode < IEEE80211_CONN_MODE_HT ||
+ conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) {
+ chandef->chan = NULL;
+ return;
+ }
+
+ control_freq = chandef->chan->center_freq;
+ center_freq1 = chandef->center_freq1;
+ center_freq2 = chandef->center_freq2;
+ chan_width = chandef->width;
+
+ ht_oper.primary_chan = ieee80211_frequency_to_channel(control_freq);
+ if (control_freq != center_freq1)
+ ht_oper.ht_param = control_freq > center_freq1 ?
+ IEEE80211_HT_PARAM_CHA_SEC_BELOW :
+ IEEE80211_HT_PARAM_CHA_SEC_ABOVE;
+ else
+ ht_oper.ht_param = IEEE80211_HT_PARAM_CHA_SEC_NONE;
+
+ ieee80211_chandef_ht_oper(&ht_oper, chandef);
+
+ if (conn->mode < IEEE80211_CONN_MODE_VHT)
+ return;
+
+ vht_oper.center_freq_seg0_idx =
+ ieee80211_frequency_to_channel(center_freq1);
+ vht_oper.center_freq_seg1_idx = center_freq2 ?
+ ieee80211_frequency_to_channel(center_freq2) : 0;
+
+ switch (chan_width) {
+ case NL80211_CHAN_WIDTH_320:
+ WARN_ON(1);
+ break;
+ case NL80211_CHAN_WIDTH_160:
+ vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
+ vht_oper.center_freq_seg1_idx = vht_oper.center_freq_seg0_idx;
+ vht_oper.center_freq_seg0_idx +=
+ control_freq < center_freq1 ? -8 : 8;
+ break;
+ case NL80211_CHAN_WIDTH_80P80:
+ vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_80:
+ vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
+ break;
+ default:
+ vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_USE_HT;
+ break;
+ }
+
+ ht_oper.operation_mode =
+ le16_encode_bits(vht_oper.center_freq_seg1_idx,
+ IEEE80211_HT_OP_MODE_CCFS2_MASK);
+
+ if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
+ &vht_oper, &ht_oper, chandef))
+ chandef->chan = NULL;
+}
+
+static void
+validate_chandef_by_6ghz_he_eht_oper(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_conn_settings *conn,
+ struct cfg80211_chan_def *chandef)
+{
+ struct ieee80211_local *local = sdata->local;
+ u32 control_freq, center_freq1, center_freq2;
+ enum nl80211_chan_width chan_width;
+ struct {
+ struct ieee80211_he_operation _oper;
+ struct ieee80211_he_6ghz_oper _6ghz_oper;
+ } __packed he;
+ struct {
+ struct ieee80211_eht_operation _oper;
+ struct ieee80211_eht_operation_info _oper_info;
+ } __packed eht;
+
+ if (conn->mode < IEEE80211_CONN_MODE_HE) {
+ chandef->chan = NULL;
+ return;
+ }
+
+ control_freq = chandef->chan->center_freq;
+ center_freq1 = chandef->center_freq1;
+ center_freq2 = chandef->center_freq2;
+ chan_width = chandef->width;
+
+ he._oper.he_oper_params =
+ le32_encode_bits(1, IEEE80211_HE_OPERATION_6GHZ_OP_INFO);
+ he._6ghz_oper.primary =
+ ieee80211_frequency_to_channel(control_freq);
+ he._6ghz_oper.ccfs0 = ieee80211_frequency_to_channel(center_freq1);
+ he._6ghz_oper.ccfs1 = center_freq2 ?
+ ieee80211_frequency_to_channel(center_freq2) : 0;
+
+ switch (chan_width) {
+ case NL80211_CHAN_WIDTH_320:
+ he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0;
+ he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -16 : 16;
+ he._6ghz_oper.control = IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_160:
+ he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0;
+ he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -8 : 8;
+ fallthrough;
+ case NL80211_CHAN_WIDTH_80P80:
+ he._6ghz_oper.control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_80:
+ he._6ghz_oper.control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_40:
+ he._6ghz_oper.control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ;
+ break;
+ default:
+ he._6ghz_oper.control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ;
+ break;
+ }
+
+ if (conn->mode < IEEE80211_CONN_MODE_EHT) {
+ if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper,
+ NULL, chandef))
+ chandef->chan = NULL;
+ } else {
+ eht._oper.params = IEEE80211_EHT_OPER_INFO_PRESENT;
+ eht._oper_info.control = he._6ghz_oper.control;
+ eht._oper_info.ccfs0 = he._6ghz_oper.ccfs0;
+ eht._oper_info.ccfs1 = he._6ghz_oper.ccfs1;
+
+ if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper,
+ &eht._oper, chandef))
+ chandef->chan = NULL;
+ }
+}
+
int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum nl80211_band current_band,
u32 vht_cap_info,
- ieee80211_conn_flags_t conn_flags, u8 *bssid,
+ struct ieee80211_conn_settings *conn,
+ u8 *bssid,
struct ieee80211_csa_ie *csa_ie)
{
enum nl80211_band new_band = current_band;
int new_freq;
- u8 new_chan_no;
+ u8 new_chan_no = 0, new_op_class = 0;
struct ieee80211_channel *new_chan;
- struct cfg80211_chan_def new_vht_chandef = {};
+ struct cfg80211_chan_def new_chandef = {};
const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
const struct ieee80211_bandwidth_indication *bwi;
+ const struct ieee80211_ext_chansw_ie *ext_chansw_elem;
int secondary_channel_offset = -1;
memset(csa_ie, 0, sizeof(*csa_ie));
@@ -41,36 +242,41 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
sec_chan_offs = elems->sec_chan_offs;
wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
bwi = elems->bandwidth_indication;
+ ext_chansw_elem = elems->ext_chansw_ie;
- if (conn_flags & (IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_40MHZ)) {
+ if (conn->mode < IEEE80211_CONN_MODE_HT ||
+ conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) {
sec_chan_offs = NULL;
wide_bw_chansw_ie = NULL;
}
- if (conn_flags & IEEE80211_CONN_DISABLE_VHT)
+ if (conn->mode < IEEE80211_CONN_MODE_VHT)
wide_bw_chansw_ie = NULL;
- if (elems->ext_chansw_ie) {
- if (!ieee80211_operating_class_to_band(
- elems->ext_chansw_ie->new_operating_class,
- &new_band)) {
- sdata_info(sdata,
- "cannot understand ECSA IE operating class, %d, ignoring\n",
- elems->ext_chansw_ie->new_operating_class);
+ if (ext_chansw_elem) {
+ new_op_class = ext_chansw_elem->new_operating_class;
+
+ if (!ieee80211_operating_class_to_band(new_op_class, &new_band)) {
+ new_op_class = 0;
+ sdata_info(sdata, "cannot understand ECSA IE operating class, %d, ignoring\n",
+ ext_chansw_elem->new_operating_class);
+ } else {
+ new_chan_no = ext_chansw_elem->new_ch_num;
+ csa_ie->count = ext_chansw_elem->count;
+ csa_ie->mode = ext_chansw_elem->mode;
}
- new_chan_no = elems->ext_chansw_ie->new_ch_num;
- csa_ie->count = elems->ext_chansw_ie->count;
- csa_ie->mode = elems->ext_chansw_ie->mode;
- } else if (elems->ch_switch_ie) {
+ }
+
+ if (!new_op_class && elems->ch_switch_ie) {
new_chan_no = elems->ch_switch_ie->new_ch_num;
csa_ie->count = elems->ch_switch_ie->count;
csa_ie->mode = elems->ch_switch_ie->mode;
- } else {
- /* nothing here we understand */
- return 1;
}
+ /* nothing here we understand */
+ if (!new_chan_no)
+ return 1;
+
/* Mesh Channel Switch Parameters Element */
if (elems->mesh_chansw_params_ie) {
csa_ie->ttl = elems->mesh_chansw_params_ie->mesh_ttl;
@@ -95,7 +301,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
if (sec_chan_offs) {
secondary_channel_offset = sec_chan_offs->sec_chan_offs;
- } else if (!(conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ } else if (conn->mode >= IEEE80211_CONN_MODE_HT) {
/* If the secondary channel offset IE is not present,
* we can't know what's the post-CSA offset, so the
* best we can do is use 20MHz.
@@ -107,26 +313,26 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
default:
/* secondary_channel_offset was present but is invalid */
case IEEE80211_HT_PARAM_CHA_SEC_NONE:
- cfg80211_chandef_create(&csa_ie->chandef, new_chan,
+ cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan,
NL80211_CHAN_HT20);
break;
case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
- cfg80211_chandef_create(&csa_ie->chandef, new_chan,
+ cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan,
NL80211_CHAN_HT40PLUS);
break;
case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
- cfg80211_chandef_create(&csa_ie->chandef, new_chan,
+ cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan,
NL80211_CHAN_HT40MINUS);
break;
case -1:
- cfg80211_chandef_create(&csa_ie->chandef, new_chan,
+ cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan,
NL80211_CHAN_NO_HT);
/* keep width for 5/10 MHz channels */
- switch (sdata->vif.bss_conf.chandef.width) {
+ switch (sdata->vif.bss_conf.chanreq.oper.width) {
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
- csa_ie->chandef.width =
- sdata->vif.bss_conf.chandef.width;
+ csa_ie->chanreq.oper.width =
+ sdata->vif.bss_conf.chanreq.oper.width;
break;
default:
break;
@@ -134,59 +340,48 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
break;
}
+ /* parse one of the Elements to build a new chandef */
+ memset(&new_chandef, 0, sizeof(new_chandef));
+ new_chandef.chan = new_chan;
if (bwi) {
/* start with the CSA one */
- new_vht_chandef = csa_ie->chandef;
+ new_chandef = csa_ie->chanreq.oper;
/* and update the width accordingly */
- /* FIXME: support 160/320 */
- ieee80211_chandef_eht_oper(&bwi->info, true, true,
- &new_vht_chandef);
- } else if (wide_bw_chansw_ie) {
- u8 new_seg1 = wide_bw_chansw_ie->new_center_freq_seg1;
- struct ieee80211_vht_operation vht_oper = {
- .chan_width =
- wide_bw_chansw_ie->new_channel_width,
- .center_freq_seg0_idx =
- wide_bw_chansw_ie->new_center_freq_seg0,
- .center_freq_seg1_idx = new_seg1,
- /* .basic_mcs_set doesn't matter */
- };
- struct ieee80211_ht_operation ht_oper = {
- .operation_mode =
- cpu_to_le16(new_seg1 <<
- IEEE80211_HT_OP_MODE_CCFS2_SHIFT),
- };
-
- /* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT,
- * to the previously parsed chandef
- */
- new_vht_chandef = csa_ie->chandef;
-
- /* ignore if parsing fails */
- if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
- vht_cap_info,
- &vht_oper, &ht_oper,
- &new_vht_chandef))
- new_vht_chandef.chan = NULL;
-
- if (conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ &&
- new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
- ieee80211_chandef_downgrade(&new_vht_chandef);
- if (conn_flags & IEEE80211_CONN_DISABLE_160MHZ &&
- new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
- ieee80211_chandef_downgrade(&new_vht_chandef);
+ ieee80211_chandef_eht_oper(&bwi->info, &new_chandef);
+ } else if (!wide_bw_chansw_ie || !wbcs_elem_to_chandef(wide_bw_chansw_ie,
+ &new_chandef)) {
+ if (!ieee80211_operating_class_to_chandef(new_op_class, new_chan,
+ &new_chandef))
+ new_chandef = csa_ie->chanreq.oper;
}
- /* if VHT data is there validate & use it */
- if (new_vht_chandef.chan) {
- if (!cfg80211_chandef_compatible(&new_vht_chandef,
- &csa_ie->chandef)) {
+ /* check if the new chandef fits the capabilities */
+ if (new_band == NL80211_BAND_6GHZ)
+ validate_chandef_by_6ghz_he_eht_oper(sdata, conn, &new_chandef);
+ else
+ validate_chandef_by_ht_vht_oper(sdata, conn, vht_cap_info,
+ &new_chandef);
+
+ /* if data is there validate the bandwidth & use it */
+ if (new_chandef.chan) {
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320 &&
+ new_chandef.width == NL80211_CHAN_WIDTH_320)
+ ieee80211_chandef_downgrade(&new_chandef, NULL);
+
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160 &&
+ (new_chandef.width == NL80211_CHAN_WIDTH_80P80 ||
+ new_chandef.width == NL80211_CHAN_WIDTH_160))
+ ieee80211_chandef_downgrade(&new_chandef, NULL);
+
+ if (!cfg80211_chandef_compatible(&new_chandef,
+ &csa_ie->chanreq.oper)) {
sdata_info(sdata,
"BSS %pM: CSA has inconsistent channel data, disconnecting\n",
bssid);
return -EINVAL;
}
- csa_ie->chandef = new_vht_chandef;
+
+ csa_ie->chanreq.oper = new_chandef;
}
if (elems->max_channel_switch_time)
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index bf1adcd96b41..da5fdd6f5c85 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -404,7 +404,10 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
int i;
for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
- if (!(sta->sta.valid_links & BIT(i)))
+ struct link_sta_info *link_sta;
+
+ link_sta = rcu_access_pointer(sta->link[i]);
+ if (!link_sta)
continue;
sta_remove_link(sta, i, false);
@@ -910,6 +913,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
if (ieee80211_vif_is_mesh(&sdata->vif))
mesh_accept_plinks_update(sdata);
+ ieee80211_check_fast_xmit(sta);
+
return 0;
out_remove:
if (sta->sta.valid_links)
@@ -1561,7 +1566,8 @@ void sta_info_stop(struct ieee80211_local *local)
}
-int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans)
+int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans,
+ int link_id)
{
struct ieee80211_local *local = sdata->local;
struct sta_info *sta, *tmp;
@@ -1575,12 +1581,18 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans)
WARN_ON(vlans && !sdata->bss);
list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
- if (sdata == sta->sdata ||
- (vlans && sdata->bss == sta->sdata->bss)) {
- if (!WARN_ON(__sta_info_destroy_part1(sta)))
- list_add(&sta->free_list, &free_list);
- ret++;
- }
+ if (sdata != sta->sdata &&
+ (!vlans || sdata->bss != sta->sdata->bss))
+ continue;
+
+ if (link_id >= 0 && sta->sta.valid_links &&
+ !(sta->sta.valid_links & BIT(link_id)))
+ continue;
+
+ if (!WARN_ON(__sta_info_destroy_part1(sta)))
+ list_add(&sta->free_list, &free_list);
+
+ ret++;
}
if (!list_empty(&free_list)) {
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5ef1554f991f..a52fb76386d0 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -3,7 +3,7 @@
* Copyright 2002-2005, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright(c) 2020-2023 Intel Corporation
+ * Copyright(c) 2020-2024 Intel Corporation
*/
#ifndef STA_INFO_H
@@ -482,6 +482,8 @@ struct ieee80211_fragment_cache {
* same for non-MLD STA. This is used as key for searching link STA
* @link_id: Link ID uniquely identifying the link STA. This is 0 for non-MLD
* and set to the corresponding vif LinkId for MLD STA
+ * @op_mode_nss: NSS limit as set by operating mode notification, or 0
+ * @capa_nss: NSS limit as determined by local and peer capabilities
* @link_hash_node: hash node for rhashtable
* @sta: Points to the STA info
* @gtk: group keys negotiated with this station, if any
@@ -518,6 +520,8 @@ struct link_sta_info {
u8 addr[ETH_ALEN];
u8 link_id;
+ u8 op_mode_nss, capa_nss;
+
struct rhlist_head link_hash_node;
struct sta_info *sta;
@@ -886,8 +890,12 @@ void sta_info_stop(struct ieee80211_local *local);
*
* @sdata: sdata to remove all stations from
* @vlans: if the given interface is an AP interface, also flush VLANs
+ * @link_id: if given (>=0), all those STA entries using @link_id only
+ * will be removed. If -1 is passed, all STA entries will be
+ * removed.
*/
-int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans);
+int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans,
+ int link_id);
/**
* sta_info_flush - flush matching STA entries from the STA table
@@ -895,10 +903,14 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans);
* Returns the number of removed STA entries.
*
* @sdata: sdata to remove all stations from
+ * @link_id: if given (>=0), all those STA entries using @link_id only
+ * will be removed. If -1 is passed, all STA entries will be
+ * removed.
*/
-static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata)
+static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata,
+ int link_id)
{
- return __sta_info_flush(sdata, false);
+ return __sta_info_flush(sdata, false, link_id);
}
void sta_set_rate_info_tx(struct sta_info *sta,
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 49730b424141..f07b40916485 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -6,7 +6,7 @@
* Copyright 2014, Intel Corporation
* Copyright 2014 Intel Mobile Communications GmbH
* Copyright 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2019, 2021-2023 Intel Corporation
+ * Copyright (C) 2019, 2021-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -159,7 +159,7 @@ static void ieee80211_tdls_add_oper_classes(struct ieee80211_link_data *link,
u8 *pos;
u8 op_class;
- if (!ieee80211_chandef_to_operating_class(&link->conf->chandef,
+ if (!ieee80211_chandef_to_operating_class(&link->conf->chanreq.oper,
&op_class))
return;
@@ -347,7 +347,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
(uc.width > sta->tdls_chandef.width &&
!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
sdata->wdev.iftype)))
- ieee80211_chandef_downgrade(&uc);
+ ieee80211_chandef_downgrade(&uc, NULL);
if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) {
tdls_dbg(sdata, "TDLS ch width upgraded %d -> %d\n",
@@ -382,8 +382,8 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link,
if (WARN_ON_ONCE(!sband))
return;
- ieee80211_add_srates_ie(sdata, skb, false, sband->band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, sband->band);
+ ieee80211_put_srates_elem(skb, sband, 0, 0, 0, WLAN_EID_SUPP_RATES);
+ ieee80211_put_srates_elem(skb, sband, 0, 0, 0, WLAN_EID_EXT_SUPP_RATES);
ieee80211_tdls_add_supp_channels(sdata, skb);
/* add any custom IEs that go before Extended Capabilities */
@@ -438,7 +438,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link,
if (WARN_ON_ONCE(!sta))
return;
- sta->tdls_chandef = link->conf->chandef;
+ sta->tdls_chandef = link->conf->chanreq.oper;
}
ieee80211_tdls_add_oper_classes(link, skb);
@@ -548,30 +548,14 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link,
}
/* build the HE-cap from sband */
- if (he_cap &&
- (action_code == WLAN_TDLS_SETUP_REQUEST ||
- action_code == WLAN_TDLS_SETUP_RESPONSE ||
- action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)) {
- __le16 he_6ghz_capa;
- u8 cap_size;
-
- cap_size =
- 2 + 1 + sizeof(he_cap->he_cap_elem) +
- ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) +
- ieee80211_he_ppe_size(he_cap->ppe_thres[0],
- he_cap->he_cap_elem.phy_cap_info);
- pos = skb_put(skb, cap_size);
- pos = ieee80211_ie_build_he_cap(0, pos, he_cap, pos + cap_size);
+ if (action_code == WLAN_TDLS_SETUP_REQUEST ||
+ action_code == WLAN_TDLS_SETUP_RESPONSE ||
+ action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) {
+ ieee80211_put_he_cap(skb, sdata, sband, NULL);
/* Build HE 6Ghz capa IE from sband */
- if (sband->band == NL80211_BAND_6GHZ) {
- cap_size = 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa);
- pos = skb_put(skb, cap_size);
- he_6ghz_capa =
- ieee80211_get_he_6ghz_capa_vif(sband, &sdata->vif);
- pos = ieee80211_write_he_6ghz_cap(pos, he_6ghz_capa,
- pos + cap_size);
- }
+ if (sband->band == NL80211_BAND_6GHZ)
+ ieee80211_put_he_6ghz_cap(skb, sdata, link->smps_mode);
}
/* add any custom IEs that go before EHT capabilities */
@@ -591,21 +575,10 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link,
}
/* build the EHT-cap from sband */
- if (he_cap && eht_cap &&
- (action_code == WLAN_TDLS_SETUP_REQUEST ||
- action_code == WLAN_TDLS_SETUP_RESPONSE ||
- action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)) {
- u8 cap_size;
-
- cap_size =
- 2 + 1 + sizeof(eht_cap->eht_cap_elem) +
- ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
- &eht_cap->eht_cap_elem, false) +
- ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0],
- eht_cap->eht_cap_elem.phy_cap_info);
- pos = skb_put(skb, cap_size);
- ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + cap_size, false);
- }
+ if (action_code == WLAN_TDLS_SETUP_REQUEST ||
+ action_code == WLAN_TDLS_SETUP_RESPONSE ||
+ action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)
+ ieee80211_put_eht_cap(skb, sdata, sband, NULL);
/* add any remaining IEs */
if (extra_ies_len) {
@@ -638,7 +611,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_link_data *link,
if (WARN_ON_ONCE(!sta || !ap_sta))
return;
- sta->tdls_chandef = link->conf->chandef;
+ sta->tdls_chandef = link->conf->chanreq.oper;
/* add any custom IEs that go before the QoS IE */
if (extra_ies_len) {
@@ -684,7 +657,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_link_data *link,
pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
ieee80211_ie_build_ht_oper(pos, &sta->sta.deflink.ht_cap,
- &link->conf->chandef, prot,
+ &link->conf->chanreq.oper, prot,
true);
}
@@ -1413,8 +1386,8 @@ iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata,
IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
u16 opmode;
- /* Nothing to do if the BSS connection uses HT */
- if (!(sdata->deflink.u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT))
+ /* Nothing to do if the BSS connection uses (at least) HT */
+ if (sdata->deflink.u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT)
return;
tdls_ht = (sta && sta->sta.deflink.ht_cap.ht_supported) ||
@@ -2055,8 +2028,9 @@ ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
}
}
-void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
+void ieee80211_teardown_tdls_peers(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct sta_info *sta;
u16 reason = WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED;
@@ -2066,6 +2040,9 @@ void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
continue;
+ if (sta->deflink.link_id != link->link_id)
+ continue;
+
ieee80211_tdls_oper_request(&sdata->vif, sta->sta.addr,
NL80211_TDLS_TEARDOWN, reason,
GFP_ATOMIC);
diff --git a/net/mac80211/tests/elems.c b/net/mac80211/tests/elems.c
index 997d0cd27b2d..a413ba29f759 100644
--- a/net/mac80211/tests/elems.c
+++ b/net/mac80211/tests/elems.c
@@ -2,7 +2,7 @@
/*
* KUnit tests for element parsing
*
- * Copyright (C) 2023 Intel Corporation
+ * Copyright (C) 2023-2024 Intel Corporation
*/
#include <kunit/test.h>
#include "../ieee80211_i.h"
@@ -14,6 +14,7 @@ static void mle_defrag(struct kunit *test)
struct ieee80211_elems_parse_params parse_params = {
.link_id = 12,
.from_ap = true,
+ .mode = IEEE80211_CONN_MODE_EHT,
};
struct ieee802_11_elems *parsed;
struct sk_buff *skb;
@@ -68,7 +69,7 @@ static void mle_defrag(struct kunit *test)
if (IS_ERR_OR_NULL(parsed))
goto free_skb;
- KUNIT_EXPECT_NOT_NULL(test, parsed->ml_basic_elem);
+ KUNIT_EXPECT_NOT_NULL(test, parsed->ml_basic);
KUNIT_EXPECT_EQ(test,
parsed->ml_basic_len,
2 /* control */ +
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 06835ed4c44f..8e758b5074bd 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
#if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -50,7 +50,7 @@
__entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \
__entry->freq1_offset = (c) ? (c)->freq1_offset : 0; \
__entry->center_freq2 = (c) ? (c)->center_freq2 : 0;
-#define CHANDEF_PR_FMT " control:%d.%03d MHz width:%d center: %d.%03d/%d MHz"
+#define CHANDEF_PR_FMT " chandef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)"
#define CHANDEF_PR_ARG __entry->control_freq, __entry->freq_offset, __entry->chan_width, \
__entry->center_freq1, __entry->freq1_offset, __entry->center_freq2
@@ -69,22 +69,45 @@
__entry->min_center_freq1 = (c)->center_freq1; \
__entry->min_freq1_offset = (c)->freq1_offset; \
__entry->min_center_freq2 = (c)->center_freq2;
-#define MIN_CHANDEF_PR_FMT " min_control:%d.%03d MHz min_width:%d min_center: %d.%03d/%d MHz"
+#define MIN_CHANDEF_PR_FMT " mindef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)"
#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_freq_offset, \
__entry->min_chan_width, \
__entry->min_center_freq1, __entry->min_freq1_offset, \
__entry->min_center_freq2
+#define AP_CHANDEF_ENTRY \
+ __field(u32, ap_control_freq) \
+ __field(u32, ap_freq_offset) \
+ __field(u32, ap_chan_width) \
+ __field(u32, ap_center_freq1) \
+ __field(u32, ap_freq1_offset) \
+ __field(u32, ap_center_freq2)
+
+#define AP_CHANDEF_ASSIGN(c) \
+ __entry->ap_control_freq = (c)->chan ? (c)->chan->center_freq : 0;\
+ __entry->ap_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0;\
+ __entry->ap_chan_width = (c)->chan ? (c)->width : 0; \
+ __entry->ap_center_freq1 = (c)->chan ? (c)->center_freq1 : 0; \
+ __entry->ap_freq1_offset = (c)->chan ? (c)->freq1_offset : 0; \
+ __entry->ap_center_freq2 = (c)->chan ? (c)->center_freq2 : 0;
+#define AP_CHANDEF_PR_FMT " ap(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)"
+#define AP_CHANDEF_PR_ARG __entry->ap_control_freq, __entry->ap_freq_offset, \
+ __entry->ap_chan_width, \
+ __entry->ap_center_freq1, __entry->ap_freq1_offset, \
+ __entry->ap_center_freq2
+
#define CHANCTX_ENTRY CHANDEF_ENTRY \
MIN_CHANDEF_ENTRY \
+ AP_CHANDEF_ENTRY \
__field(u8, rx_chains_static) \
__field(u8, rx_chains_dynamic)
#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \
MIN_CHANDEF_ASSIGN(&ctx->conf.min_def) \
+ AP_CHANDEF_ASSIGN(&ctx->conf.ap) \
__entry->rx_chains_static = ctx->conf.rx_chains_static; \
__entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic
-#define CHANCTX_PR_FMT CHANDEF_PR_FMT MIN_CHANDEF_PR_FMT " chains:%d/%d"
-#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \
+#define CHANCTX_PR_FMT CHANDEF_PR_FMT MIN_CHANDEF_PR_FMT AP_CHANDEF_PR_FMT " chains:%d/%d"
+#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, AP_CHANDEF_PR_ARG, \
__entry->rx_chains_static, __entry->rx_chains_dynamic
#define KEY_ENTRY __field(u32, cipher) \
@@ -503,9 +526,9 @@ TRACE_EVENT(drv_link_info_changed,
__entry->ht_operation_mode = link_conf->ht_operation_mode;
__entry->cqm_rssi_thold = link_conf->cqm_rssi_thold;
__entry->cqm_rssi_hyst = link_conf->cqm_rssi_hyst;
- __entry->channel_width = link_conf->chandef.width;
- __entry->channel_cfreq1 = link_conf->chandef.center_freq1;
- __entry->channel_cfreq1_offset = link_conf->chandef.freq1_offset;
+ __entry->channel_width = link_conf->chanreq.oper.width;
+ __entry->channel_cfreq1 = link_conf->chanreq.oper.center_freq1;
+ __entry->channel_cfreq1_offset = link_conf->chanreq.oper.freq1_offset;
__entry->qos = link_conf->qos;
__entry->hidden_ssid = link_conf->hidden_ssid;
__entry->txpower = link_conf->txpower;
@@ -1186,7 +1209,7 @@ DEFINE_EVENT(sta_event, drv_flush_sta,
TP_ARGS(local, sdata, sta)
);
-TRACE_EVENT(drv_channel_switch,
+DECLARE_EVENT_CLASS(chanswitch_evt,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel_switch *ch_switch),
@@ -1201,6 +1224,7 @@ TRACE_EVENT(drv_channel_switch,
__field(u32, device_timestamp)
__field(bool, block_tx)
__field(u8, count)
+ __field(u8, link_id)
),
TP_fast_assign(
@@ -1211,14 +1235,24 @@ TRACE_EVENT(drv_channel_switch,
__entry->device_timestamp = ch_switch->device_timestamp;
__entry->block_tx = ch_switch->block_tx;
__entry->count = ch_switch->count;
+ __entry->link_id = ch_switch->link_id;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " new " CHANDEF_PR_FMT " count:%d",
- LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count
+ LOCAL_PR_FMT VIF_PR_FMT CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu device_ts:%u link_id:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
+ __entry->block_tx, __entry->timestamp,
+ __entry->device_timestamp, __entry->link_id
)
);
+DEFINE_EVENT(chanswitch_evt, drv_channel_switch,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel_switch *ch_switch),
+ TP_ARGS(local, sdata, ch_switch)
+);
+
TRACE_EVENT(drv_set_antenna,
TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
@@ -2098,39 +2132,11 @@ TRACE_EVENT(drv_channel_switch_beacon,
)
);
-TRACE_EVENT(drv_pre_channel_switch,
+DEFINE_EVENT(chanswitch_evt, drv_pre_channel_switch,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel_switch *ch_switch),
-
- TP_ARGS(local, sdata, ch_switch),
-
- TP_STRUCT__entry(
- LOCAL_ENTRY
- VIF_ENTRY
- CHANDEF_ENTRY
- __field(u64, timestamp)
- __field(u32, device_timestamp)
- __field(bool, block_tx)
- __field(u8, count)
- ),
-
- TP_fast_assign(
- LOCAL_ASSIGN;
- VIF_ASSIGN;
- CHANDEF_ASSIGN(&ch_switch->chandef)
- __entry->timestamp = ch_switch->timestamp;
- __entry->device_timestamp = ch_switch->device_timestamp;
- __entry->block_tx = ch_switch->block_tx;
- __entry->count = ch_switch->count;
- ),
-
- TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " prepare channel switch to "
- CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu",
- LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
- __entry->block_tx, __entry->timestamp
- )
+ TP_ARGS(local, sdata, ch_switch)
);
DEFINE_EVENT(local_sdata_evt, drv_post_channel_switch,
@@ -2145,40 +2151,11 @@ DEFINE_EVENT(local_sdata_evt, drv_abort_channel_switch,
TP_ARGS(local, sdata)
);
-TRACE_EVENT(drv_channel_switch_rx_beacon,
+DEFINE_EVENT(chanswitch_evt, drv_channel_switch_rx_beacon,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel_switch *ch_switch),
-
- TP_ARGS(local, sdata, ch_switch),
-
- TP_STRUCT__entry(
- LOCAL_ENTRY
- VIF_ENTRY
- CHANDEF_ENTRY
- __field(u64, timestamp)
- __field(u32, device_timestamp)
- __field(bool, block_tx)
- __field(u8, count)
- ),
-
- TP_fast_assign(
- LOCAL_ASSIGN;
- VIF_ASSIGN;
- CHANDEF_ASSIGN(&ch_switch->chandef)
- __entry->timestamp = ch_switch->timestamp;
- __entry->device_timestamp = ch_switch->device_timestamp;
- __entry->block_tx = ch_switch->block_tx;
- __entry->count = ch_switch->count;
- ),
-
- TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT
- " received a channel switch beacon to "
- CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu",
- LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
- __entry->block_tx, __entry->timestamp
- )
+ TP_ARGS(local, sdata, ch_switch)
);
TRACE_EVENT(drv_get_txpower,
@@ -3035,6 +3012,34 @@ TRACE_EVENT(api_radar_detected,
)
);
+TRACE_EVENT(api_request_smps,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
+ enum ieee80211_smps_mode smps_mode),
+
+ TP_ARGS(local, sdata, link, smps_mode),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(int, link_id)
+ __field(u32, smps_mode)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->link_id = link->link_id,
+ __entry->smps_mode = smps_mode;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT " " VIF_PR_FMT " link:%d, smps_mode:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, __entry->smps_mode
+ )
+);
+
/*
* Tracing for internal functions
* (which may also be called in response to driver calls)
@@ -3088,6 +3093,58 @@ TRACE_EVENT(stop_queue,
)
);
+TRACE_EVENT(drv_can_neg_ttlm,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_neg_ttlm *neg_ttlm),
+
+ TP_ARGS(local, sdata, neg_ttlm),
+
+ TP_STRUCT__entry(LOCAL_ENTRY
+ VIF_ENTRY
+ __array(u16, downlink, sizeof(u16) * 8)
+ __array(u16, uplink, sizeof(u16) * 8)
+ ),
+
+ TP_fast_assign(LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ memcpy(__entry->downlink, neg_ttlm->downlink,
+ sizeof(neg_ttlm->downlink));
+ memcpy(__entry->uplink, neg_ttlm->uplink,
+ sizeof(neg_ttlm->uplink));
+ ),
+
+ TP_printk(LOCAL_PR_FMT ", " VIF_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG)
+);
+
+TRACE_EVENT(drv_neg_ttlm_res,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_neg_ttlm_res res,
+ struct ieee80211_neg_ttlm *neg_ttlm),
+
+ TP_ARGS(local, sdata, res, neg_ttlm),
+
+ TP_STRUCT__entry(LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u32, res)
+ __array(u16, downlink, sizeof(u16) * 8)
+ __array(u16, uplink, sizeof(u16) * 8)
+ ),
+
+ TP_fast_assign(LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->res = res;
+ memcpy(__entry->downlink, neg_ttlm->downlink,
+ sizeof(neg_ttlm->downlink));
+ memcpy(__entry->uplink, neg_ttlm->uplink,
+ sizeof(neg_ttlm->uplink));
+ ),
+
+ TP_printk(LOCAL_PR_FMT VIF_PR_FMT " response: %d\n ",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->res
+ )
+);
#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index c9dbe9aab7bd..aea4ce55c5ac 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h
@@ -16,8 +16,6 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mac80211_msg
-#define MAX_MSG_LEN 120
-
DECLARE_EVENT_CLASS(mac80211_msg_event,
TP_PROTO(struct va_format *vaf),
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 314998fdb1a5..6bf223e6cd1a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*
* Transmit and frame generation functions.
*/
@@ -133,6 +133,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
mrate = sband->bitrates[0].bitrate;
for (i = 0; i < sband->n_bitrates; i++) {
struct ieee80211_rate *r = &sband->bitrates[i];
+ u32 flag;
if (r->bitrate > txrate->bitrate)
break;
@@ -145,28 +146,24 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
switch (sband->band) {
case NL80211_BAND_2GHZ:
- case NL80211_BAND_LC: {
- u32 flag;
+ case NL80211_BAND_LC:
if (tx->sdata->deflink.operating_11g_mode)
flag = IEEE80211_RATE_MANDATORY_G;
else
flag = IEEE80211_RATE_MANDATORY_B;
- if (r->flags & flag)
- mrate = r->bitrate;
break;
- }
case NL80211_BAND_5GHZ:
case NL80211_BAND_6GHZ:
- if (r->flags & IEEE80211_RATE_MANDATORY_A)
- mrate = r->bitrate;
+ flag = IEEE80211_RATE_MANDATORY_A;
break;
- case NL80211_BAND_S1GHZ:
- case NL80211_BAND_60GHZ:
- /* TODO, for now fall through */
- case NUM_NL80211_BANDS:
+ default:
+ flag = 0;
WARN_ON(1);
break;
}
+
+ if (r->flags & flag)
+ mrate = r->bitrate;
}
if (rate == -1) {
/* No matching basic rate found; use highest suitable mandatory
@@ -2393,12 +2390,18 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
if (chanctx_conf)
chandef = &chanctx_conf->def;
- else if (!local->use_chanctx)
- chandef = &local->_oper_chandef;
else
goto fail_rcu;
/*
+ * If driver/HW supports IEEE80211_CHAN_CAN_MONITOR we still
+ * shouldn't transmit on disabled channels.
+ */
+ if (!cfg80211_chandef_usable(local->hw.wiphy, chandef,
+ IEEE80211_CHAN_DISABLED))
+ goto fail_rcu;
+
+ /*
* Frame injection is not allowed if beaconing is not allowed
* or if we need radar detection. Beaconing is usually not allowed when
* the mode or operation (Adhoc, AP, Mesh) does not support DFS.
@@ -3048,7 +3051,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
sdata->vif.type == NL80211_IFTYPE_STATION)
goto out;
- if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded)
goto out;
if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
@@ -3100,10 +3103,11 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
/* DA SA BSSID */
build.da_offs = offsetof(struct ieee80211_hdr, addr1);
build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ rcu_read_lock();
link = rcu_dereference(sdata->link[tdls_link_id]);
- if (WARN_ON_ONCE(!link))
- break;
- memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+ if (!WARN_ON_ONCE(!link))
+ memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+ rcu_read_unlock();
build.hdr_len = 24;
break;
}
@@ -3926,6 +3930,7 @@ begin:
goto begin;
skb = __skb_dequeue(&tx.skbs);
+ info = IEEE80211_SKB_CB(skb);
if (!skb_queue_empty(&tx.skbs)) {
spin_lock_bh(&fq->lock);
@@ -3957,7 +3962,8 @@ begin:
ieee80211_free_txskb(&local->hw, skb);
goto begin;
} else {
- vif = NULL;
+ info->control.vif = NULL;
+ return skb;
}
break;
case NL80211_IFTYPE_AP_VLAN:
@@ -3970,7 +3976,7 @@ begin:
}
encap_out:
- IEEE80211_SKB_CB(skb)->control.vif = vif;
+ info->control.vif = vif;
if (tx.sta &&
wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
@@ -5030,16 +5036,24 @@ static u8 __ieee80211_beacon_update_cntdwn(struct beacon_data *beacon)
return beacon->cntdwn_current_counter;
}
-u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif)
+u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif, unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_link_data *link;
struct beacon_data *beacon = NULL;
u8 count = 0;
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return 0;
+
rcu_read_lock();
+ link = rcu_dereference(sdata->link[link_id]);
+ if (!link)
+ goto unlock;
+
if (sdata->vif.type == NL80211_IFTYPE_AP)
- beacon = rcu_dereference(sdata->deflink.u.ap.beacon);
+ beacon = rcu_dereference(link->u.ap.beacon);
else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
beacon = rcu_dereference(sdata->u.ibss.presp);
else if (ieee80211_vif_is_mesh(&sdata->vif))
@@ -5081,9 +5095,11 @@ unlock:
}
EXPORT_SYMBOL(ieee80211_beacon_set_cntdwn);
-bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif)
+bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif,
+ unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_link_data *link;
struct beacon_data *beacon = NULL;
u8 *beacon_data;
size_t beacon_data_len;
@@ -5092,9 +5108,17 @@ bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif)
if (!ieee80211_sdata_running(sdata))
return false;
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return 0;
+
rcu_read_lock();
+
+ link = rcu_dereference(sdata->link[link_id]);
+ if (!link)
+ goto out;
+
if (vif->type == NL80211_IFTYPE_AP) {
- beacon = rcu_dereference(sdata->deflink.u.ap.beacon);
+ beacon = rcu_dereference(link->u.ap.beacon);
if (WARN_ON(!beacon || !beacon->tail))
goto out;
beacon_data = beacon->tail;
@@ -5280,7 +5304,7 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw,
if (beacon->cntdwn_counter_offsets[0]) {
if (!is_template)
- ieee80211_beacon_update_cntdwn(vif);
+ ieee80211_beacon_update_cntdwn(vif, link->link_id);
ieee80211_set_beacon_cntdwn(sdata, beacon, link);
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 643c54855be6..a237cbcf7b49 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -6,7 +6,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*
* utilities for mac80211
*/
@@ -46,6 +46,11 @@ struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy)
}
EXPORT_SYMBOL(wiphy_to_ieee80211_hw);
+const struct ieee80211_conn_settings ieee80211_conn_settings_unlimited = {
+ .mode = IEEE80211_CONN_MODE_EHT,
+ .bw_limit = IEEE80211_CONN_BW_LIMIT_320,
+};
+
u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
enum nl80211_iftype type)
{
@@ -912,776 +917,6 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_queue_delayed_work);
-static void
-ieee80211_parse_extension_element(u32 *crc,
- const struct element *elem,
- struct ieee802_11_elems *elems,
- struct ieee80211_elems_parse_params *params)
-{
- const void *data = elem->data + 1;
- bool calc_crc = false;
- u8 len;
-
- if (!elem->datalen)
- return;
-
- len = elem->datalen - 1;
-
- switch (elem->data[0]) {
- case WLAN_EID_EXT_HE_MU_EDCA:
- calc_crc = true;
- if (len >= sizeof(*elems->mu_edca_param_set))
- elems->mu_edca_param_set = data;
- break;
- case WLAN_EID_EXT_HE_CAPABILITY:
- if (ieee80211_he_capa_size_ok(data, len)) {
- elems->he_cap = data;
- elems->he_cap_len = len;
- }
- break;
- case WLAN_EID_EXT_HE_OPERATION:
- calc_crc = true;
- if (len >= sizeof(*elems->he_operation) &&
- len >= ieee80211_he_oper_size(data) - 1)
- elems->he_operation = data;
- break;
- case WLAN_EID_EXT_UORA:
- if (len >= 1)
- elems->uora_element = data;
- break;
- case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME:
- if (len == 3)
- elems->max_channel_switch_time = data;
- break;
- case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION:
- if (len >= sizeof(*elems->mbssid_config_ie))
- elems->mbssid_config_ie = data;
- break;
- case WLAN_EID_EXT_HE_SPR:
- if (len >= sizeof(*elems->he_spr) &&
- len >= ieee80211_he_spr_size(data))
- elems->he_spr = data;
- break;
- case WLAN_EID_EXT_HE_6GHZ_CAPA:
- if (len >= sizeof(*elems->he_6ghz_capa))
- elems->he_6ghz_capa = data;
- break;
- case WLAN_EID_EXT_EHT_CAPABILITY:
- if (ieee80211_eht_capa_size_ok(elems->he_cap,
- data, len,
- params->from_ap)) {
- elems->eht_cap = data;
- elems->eht_cap_len = len;
- }
- break;
- case WLAN_EID_EXT_EHT_OPERATION:
- if (ieee80211_eht_oper_size_ok(data, len))
- elems->eht_operation = data;
- calc_crc = true;
- break;
- case WLAN_EID_EXT_EHT_MULTI_LINK:
- calc_crc = true;
-
- if (ieee80211_mle_size_ok(data, len)) {
- const struct ieee80211_multi_link_elem *mle =
- (void *)data;
-
- switch (le16_get_bits(mle->control,
- IEEE80211_ML_CONTROL_TYPE)) {
- case IEEE80211_ML_CONTROL_TYPE_BASIC:
- elems->ml_basic_elem = (void *)elem;
- elems->ml_basic = data;
- elems->ml_basic_len = len;
- break;
- case IEEE80211_ML_CONTROL_TYPE_RECONF:
- elems->ml_reconf_elem = (void *)elem;
- elems->ml_reconf = data;
- elems->ml_reconf_len = len;
- break;
- default:
- break;
- }
- }
- break;
- case WLAN_EID_EXT_BANDWIDTH_INDICATION:
- if (ieee80211_bandwidth_indication_size_ok(data, len))
- elems->bandwidth_indication = data;
- calc_crc = true;
- break;
- case WLAN_EID_EXT_TID_TO_LINK_MAPPING:
- calc_crc = true;
- if (ieee80211_tid_to_link_map_size_ok(data, len) &&
- elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) {
- elems->ttlm[elems->ttlm_num] = (void *)data;
- elems->ttlm_num++;
- }
- break;
- }
-
- if (crc && calc_crc)
- *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2);
-}
-
-static u32
-_ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
- struct ieee802_11_elems *elems,
- const struct element *check_inherit)
-{
- const struct element *elem;
- bool calc_crc = params->filter != 0;
- DECLARE_BITMAP(seen_elems, 256);
- u32 crc = params->crc;
-
- bitmap_zero(seen_elems, 256);
-
- for_each_element(elem, params->start, params->len) {
- const struct element *subelem;
- bool elem_parse_failed;
- u8 id = elem->id;
- u8 elen = elem->datalen;
- const u8 *pos = elem->data;
-
- if (check_inherit &&
- !cfg80211_is_element_inherited(elem,
- check_inherit))
- continue;
-
- switch (id) {
- case WLAN_EID_SSID:
- case WLAN_EID_SUPP_RATES:
- case WLAN_EID_FH_PARAMS:
- case WLAN_EID_DS_PARAMS:
- case WLAN_EID_CF_PARAMS:
- case WLAN_EID_TIM:
- case WLAN_EID_IBSS_PARAMS:
- case WLAN_EID_CHALLENGE:
- case WLAN_EID_RSN:
- case WLAN_EID_ERP_INFO:
- case WLAN_EID_EXT_SUPP_RATES:
- case WLAN_EID_HT_CAPABILITY:
- case WLAN_EID_HT_OPERATION:
- case WLAN_EID_VHT_CAPABILITY:
- case WLAN_EID_VHT_OPERATION:
- case WLAN_EID_MESH_ID:
- case WLAN_EID_MESH_CONFIG:
- case WLAN_EID_PEER_MGMT:
- case WLAN_EID_PREQ:
- case WLAN_EID_PREP:
- case WLAN_EID_PERR:
- case WLAN_EID_RANN:
- case WLAN_EID_CHANNEL_SWITCH:
- case WLAN_EID_EXT_CHANSWITCH_ANN:
- case WLAN_EID_COUNTRY:
- case WLAN_EID_PWR_CONSTRAINT:
- case WLAN_EID_TIMEOUT_INTERVAL:
- case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
- case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
- case WLAN_EID_CHAN_SWITCH_PARAM:
- case WLAN_EID_EXT_CAPABILITY:
- case WLAN_EID_CHAN_SWITCH_TIMING:
- case WLAN_EID_LINK_ID:
- case WLAN_EID_BSS_MAX_IDLE_PERIOD:
- case WLAN_EID_RSNX:
- case WLAN_EID_S1G_BCN_COMPAT:
- case WLAN_EID_S1G_CAPABILITIES:
- case WLAN_EID_S1G_OPERATION:
- case WLAN_EID_AID_RESPONSE:
- case WLAN_EID_S1G_SHORT_BCN_INTERVAL:
- /*
- * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
- * that if the content gets bigger it might be needed more than once
- */
- if (test_bit(id, seen_elems)) {
- elems->parse_error = true;
- continue;
- }
- break;
- }
-
- if (calc_crc && id < 64 && (params->filter & (1ULL << id)))
- crc = crc32_be(crc, pos - 2, elen + 2);
-
- elem_parse_failed = false;
-
- switch (id) {
- case WLAN_EID_LINK_ID:
- if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) {
- elem_parse_failed = true;
- break;
- }
- elems->lnk_id = (void *)(pos - 2);
- break;
- case WLAN_EID_CHAN_SWITCH_TIMING:
- if (elen < sizeof(struct ieee80211_ch_switch_timing)) {
- elem_parse_failed = true;
- break;
- }
- elems->ch_sw_timing = (void *)pos;
- break;
- case WLAN_EID_EXT_CAPABILITY:
- elems->ext_capab = pos;
- elems->ext_capab_len = elen;
- break;
- case WLAN_EID_SSID:
- elems->ssid = pos;
- elems->ssid_len = elen;
- break;
- case WLAN_EID_SUPP_RATES:
- elems->supp_rates = pos;
- elems->supp_rates_len = elen;
- break;
- case WLAN_EID_DS_PARAMS:
- if (elen >= 1)
- elems->ds_params = pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_TIM:
- if (elen >= sizeof(struct ieee80211_tim_ie)) {
- elems->tim = (void *)pos;
- elems->tim_len = elen;
- } else
- elem_parse_failed = true;
- break;
- case WLAN_EID_VENDOR_SPECIFIC:
- if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
- pos[2] == 0xf2) {
- /* Microsoft OUI (00:50:F2) */
-
- if (calc_crc)
- crc = crc32_be(crc, pos - 2, elen + 2);
-
- if (elen >= 5 && pos[3] == 2) {
- /* OUI Type 2 - WMM IE */
- if (pos[4] == 0) {
- elems->wmm_info = pos;
- elems->wmm_info_len = elen;
- } else if (pos[4] == 1) {
- elems->wmm_param = pos;
- elems->wmm_param_len = elen;
- }
- }
- }
- break;
- case WLAN_EID_RSN:
- elems->rsn = pos;
- elems->rsn_len = elen;
- break;
- case WLAN_EID_ERP_INFO:
- if (elen >= 1)
- elems->erp_info = pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_EXT_SUPP_RATES:
- elems->ext_supp_rates = pos;
- elems->ext_supp_rates_len = elen;
- break;
- case WLAN_EID_HT_CAPABILITY:
- if (elen >= sizeof(struct ieee80211_ht_cap))
- elems->ht_cap_elem = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_HT_OPERATION:
- if (elen >= sizeof(struct ieee80211_ht_operation))
- elems->ht_operation = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_VHT_CAPABILITY:
- if (elen >= sizeof(struct ieee80211_vht_cap))
- elems->vht_cap_elem = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_VHT_OPERATION:
- if (elen >= sizeof(struct ieee80211_vht_operation)) {
- elems->vht_operation = (void *)pos;
- if (calc_crc)
- crc = crc32_be(crc, pos - 2, elen + 2);
- break;
- }
- elem_parse_failed = true;
- break;
- case WLAN_EID_OPMODE_NOTIF:
- if (elen > 0) {
- elems->opmode_notif = pos;
- if (calc_crc)
- crc = crc32_be(crc, pos - 2, elen + 2);
- break;
- }
- elem_parse_failed = true;
- break;
- case WLAN_EID_MESH_ID:
- elems->mesh_id = pos;
- elems->mesh_id_len = elen;
- break;
- case WLAN_EID_MESH_CONFIG:
- if (elen >= sizeof(struct ieee80211_meshconf_ie))
- elems->mesh_config = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_PEER_MGMT:
- elems->peering = pos;
- elems->peering_len = elen;
- break;
- case WLAN_EID_MESH_AWAKE_WINDOW:
- if (elen >= 2)
- elems->awake_window = (void *)pos;
- break;
- case WLAN_EID_PREQ:
- elems->preq = pos;
- elems->preq_len = elen;
- break;
- case WLAN_EID_PREP:
- elems->prep = pos;
- elems->prep_len = elen;
- break;
- case WLAN_EID_PERR:
- elems->perr = pos;
- elems->perr_len = elen;
- break;
- case WLAN_EID_RANN:
- if (elen >= sizeof(struct ieee80211_rann_ie))
- elems->rann = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_CHANNEL_SWITCH:
- if (elen != sizeof(struct ieee80211_channel_sw_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->ch_switch_ie = (void *)pos;
- break;
- case WLAN_EID_EXT_CHANSWITCH_ANN:
- if (elen != sizeof(struct ieee80211_ext_chansw_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->ext_chansw_ie = (void *)pos;
- break;
- case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
- if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->sec_chan_offs = (void *)pos;
- break;
- case WLAN_EID_CHAN_SWITCH_PARAM:
- if (elen <
- sizeof(*elems->mesh_chansw_params_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->mesh_chansw_params_ie = (void *)pos;
- break;
- case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
- if (!params->action ||
- elen < sizeof(*elems->wide_bw_chansw_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->wide_bw_chansw_ie = (void *)pos;
- break;
- case WLAN_EID_CHANNEL_SWITCH_WRAPPER:
- if (params->action) {
- elem_parse_failed = true;
- break;
- }
- /*
- * This is a bit tricky, but as we only care about
- * a few elements, parse them out manually.
- */
- subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
- pos, elen);
- if (subelem) {
- if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie))
- elems->wide_bw_chansw_ie =
- (void *)subelem->data;
- else
- elem_parse_failed = true;
- }
-
- subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION,
- pos, elen);
- if (subelem) {
- const void *edata = subelem->data + 1;
- u8 edatalen = subelem->datalen - 1;
-
- if (ieee80211_bandwidth_indication_size_ok(edata,
- edatalen))
- elems->bandwidth_indication = edata;
- else
- elem_parse_failed = true;
- }
- break;
- case WLAN_EID_COUNTRY:
- elems->country_elem = pos;
- elems->country_elem_len = elen;
- break;
- case WLAN_EID_PWR_CONSTRAINT:
- if (elen != 1) {
- elem_parse_failed = true;
- break;
- }
- elems->pwr_constr_elem = pos;
- break;
- case WLAN_EID_CISCO_VENDOR_SPECIFIC:
- /* Lots of different options exist, but we only care
- * about the Dynamic Transmit Power Control element.
- * First check for the Cisco OUI, then for the DTPC
- * tag (0x00).
- */
- if (elen < 4) {
- elem_parse_failed = true;
- break;
- }
-
- if (pos[0] != 0x00 || pos[1] != 0x40 ||
- pos[2] != 0x96 || pos[3] != 0x00)
- break;
-
- if (elen != 6) {
- elem_parse_failed = true;
- break;
- }
-
- if (calc_crc)
- crc = crc32_be(crc, pos - 2, elen + 2);
-
- elems->cisco_dtpc_elem = pos;
- break;
- case WLAN_EID_ADDBA_EXT:
- if (elen < sizeof(struct ieee80211_addba_ext_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->addba_ext_ie = (void *)pos;
- break;
- case WLAN_EID_TIMEOUT_INTERVAL:
- if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
- elems->timeout_int = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_BSS_MAX_IDLE_PERIOD:
- if (elen >= sizeof(*elems->max_idle_period_ie))
- elems->max_idle_period_ie = (void *)pos;
- break;
- case WLAN_EID_RSNX:
- elems->rsnx = pos;
- elems->rsnx_len = elen;
- break;
- case WLAN_EID_TX_POWER_ENVELOPE:
- if (elen < 1 ||
- elen > sizeof(struct ieee80211_tx_pwr_env))
- break;
-
- if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env))
- break;
-
- elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos;
- elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen;
- elems->tx_pwr_env_num++;
- break;
- case WLAN_EID_EXTENSION:
- ieee80211_parse_extension_element(calc_crc ?
- &crc : NULL,
- elem, elems, params);
- break;
- case WLAN_EID_S1G_CAPABILITIES:
- if (elen >= sizeof(*elems->s1g_capab))
- elems->s1g_capab = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_S1G_OPERATION:
- if (elen == sizeof(*elems->s1g_oper))
- elems->s1g_oper = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_S1G_BCN_COMPAT:
- if (elen == sizeof(*elems->s1g_bcn_compat))
- elems->s1g_bcn_compat = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_AID_RESPONSE:
- if (elen == sizeof(struct ieee80211_aid_response_ie))
- elems->aid_resp = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- default:
- break;
- }
-
- if (elem_parse_failed)
- elems->parse_error = true;
- else
- __set_bit(id, seen_elems);
- }
-
- if (!for_each_element_completed(elem, params->start, params->len))
- elems->parse_error = true;
-
- return crc;
-}
-
-static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
- struct ieee802_11_elems *elems,
- struct cfg80211_bss *bss,
- u8 *nontransmitted_profile)
-{
- const struct element *elem, *sub;
- size_t profile_len = 0;
- bool found = false;
-
- if (!bss || !bss->transmitted_bss)
- return profile_len;
-
- for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) {
- if (elem->datalen < 2)
- continue;
- if (elem->data[0] < 1 || elem->data[0] > 8)
- continue;
-
- for_each_element(sub, elem->data + 1, elem->datalen - 1) {
- u8 new_bssid[ETH_ALEN];
- const u8 *index;
-
- if (sub->id != 0 || sub->datalen < 4) {
- /* not a valid BSS profile */
- continue;
- }
-
- if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP ||
- sub->data[1] != 2) {
- /* The first element of the
- * Nontransmitted BSSID Profile is not
- * the Nontransmitted BSSID Capability
- * element.
- */
- continue;
- }
-
- memset(nontransmitted_profile, 0, len);
- profile_len = cfg80211_merge_profile(start, len,
- elem,
- sub,
- nontransmitted_profile,
- len);
-
- /* found a Nontransmitted BSSID Profile */
- index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX,
- nontransmitted_profile,
- profile_len);
- if (!index || index[1] < 1 || index[2] == 0) {
- /* Invalid MBSSID Index element */
- continue;
- }
-
- cfg80211_gen_new_bssid(bss->transmitted_bss->bssid,
- elem->data[0],
- index[2],
- new_bssid);
- if (ether_addr_equal(new_bssid, bss->bssid)) {
- found = true;
- elems->bssid_index_len = index[1];
- elems->bssid_index = (void *)&index[2];
- break;
- }
- }
- }
-
- return found ? profile_len : 0;
-}
-
-static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems,
- u8 link_id)
-{
- const struct ieee80211_multi_link_elem *ml = elems->ml_basic;
- ssize_t ml_len = elems->ml_basic_len;
- const struct element *sub;
-
- if (!ml || !ml_len)
- return;
-
- if (le16_get_bits(ml->control, IEEE80211_ML_CONTROL_TYPE) !=
- IEEE80211_ML_CONTROL_TYPE_BASIC)
- return;
-
- for_each_mle_subelement(sub, (u8 *)ml, ml_len) {
- struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data;
- ssize_t sta_prof_len;
- u16 control;
-
- if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE)
- continue;
-
- if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data,
- sub->datalen))
- return;
-
- control = le16_to_cpu(prof->control);
-
- if (link_id != u16_get_bits(control,
- IEEE80211_MLE_STA_CONTROL_LINK_ID))
- continue;
-
- if (!(control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE))
- return;
-
- /* the sub element can be fragmented */
- sta_prof_len =
- cfg80211_defragment_element(sub,
- (u8 *)ml, ml_len,
- elems->scratch_pos,
- elems->scratch +
- elems->scratch_len -
- elems->scratch_pos,
- IEEE80211_MLE_SUBELEM_FRAGMENT);
-
- if (sta_prof_len < 0)
- return;
-
- elems->prof = (void *)elems->scratch_pos;
- elems->sta_prof_len = sta_prof_len;
- elems->scratch_pos += sta_prof_len;
-
- return;
- }
-}
-
-static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems,
- struct ieee80211_elems_parse_params *params)
-{
- struct ieee80211_mle_per_sta_profile *prof;
- struct ieee80211_elems_parse_params sub = {
- .action = params->action,
- .from_ap = params->from_ap,
- .link_id = -1,
- };
- ssize_t ml_len = elems->ml_basic_len;
- const struct element *non_inherit = NULL;
- const u8 *end;
-
- if (params->link_id == -1)
- return;
-
- ml_len = cfg80211_defragment_element(elems->ml_basic_elem,
- elems->ie_start,
- elems->total_len,
- elems->scratch_pos,
- elems->scratch +
- elems->scratch_len -
- elems->scratch_pos,
- WLAN_EID_FRAGMENT);
-
- if (ml_len < 0)
- return;
-
- elems->ml_basic = (const void *)elems->scratch_pos;
- elems->ml_basic_len = ml_len;
-
- ieee80211_mle_get_sta_prof(elems, params->link_id);
- prof = elems->prof;
-
- if (!prof)
- return;
-
- /* check if we have the 4 bytes for the fixed part in assoc response */
- if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) {
- elems->prof = NULL;
- elems->sta_prof_len = 0;
- return;
- }
-
- /*
- * Skip the capability information and the status code that are expected
- * as part of the station profile in association response frames. Note
- * the -1 is because the 'sta_info_len' is accounted to as part of the
- * per-STA profile, but not part of the 'u8 variable[]' portion.
- */
- sub.start = prof->variable + prof->sta_info_len - 1 + 4;
- end = (const u8 *)prof + elems->sta_prof_len;
- sub.len = end - sub.start;
-
- non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
- sub.start, sub.len);
- _ieee802_11_parse_elems_full(&sub, elems, non_inherit);
-}
-
-struct ieee802_11_elems *
-ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
-{
- struct ieee802_11_elems *elems;
- const struct element *non_inherit = NULL;
- u8 *nontransmitted_profile;
- int nontransmitted_profile_len = 0;
- size_t scratch_len = 3 * params->len;
-
- elems = kzalloc(struct_size(elems, scratch, scratch_len), GFP_ATOMIC);
- if (!elems)
- return NULL;
- elems->ie_start = params->start;
- elems->total_len = params->len;
- elems->scratch_len = scratch_len;
- elems->scratch_pos = elems->scratch;
-
- nontransmitted_profile = elems->scratch_pos;
- nontransmitted_profile_len =
- ieee802_11_find_bssid_profile(params->start, params->len,
- elems, params->bss,
- nontransmitted_profile);
- elems->scratch_pos += nontransmitted_profile_len;
- elems->scratch_len -= nontransmitted_profile_len;
- non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
- nontransmitted_profile,
- nontransmitted_profile_len);
-
- elems->crc = _ieee802_11_parse_elems_full(params, elems, non_inherit);
-
- /* Override with nontransmitted profile, if found */
- if (nontransmitted_profile_len) {
- struct ieee80211_elems_parse_params sub = {
- .start = nontransmitted_profile,
- .len = nontransmitted_profile_len,
- .action = params->action,
- .link_id = params->link_id,
- };
-
- _ieee802_11_parse_elems_full(&sub, elems, NULL);
- }
-
- ieee80211_mle_parse_link(elems, params);
-
- if (elems->tim && !elems->parse_error) {
- const struct ieee80211_tim_ie *tim_ie = elems->tim;
-
- elems->dtim_period = tim_ie->dtim_period;
- elems->dtim_count = tim_ie->dtim_count;
- }
-
- /* Override DTIM period and count if needed */
- if (elems->bssid_index &&
- elems->bssid_index_len >=
- offsetofend(struct ieee80211_bssid_index, dtim_period))
- elems->dtim_period = elems->bssid_index->dtim_period;
-
- if (elems->bssid_index &&
- elems->bssid_index_len >=
- offsetofend(struct ieee80211_bssid_index, dtim_count))
- elems->dtim_count = elems->bssid_index->dtim_count;
-
- return elems;
-}
-EXPORT_SYMBOL_IF_KUNIT(ieee802_11_parse_elems_full);
-
void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_queue_params
*qparam, int ac)
@@ -1938,37 +1173,34 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
}
}
-u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end)
+static int ieee80211_put_s1g_cap(struct sk_buff *skb,
+ struct ieee80211_sta_s1g_cap *s1g_cap)
{
- if ((end - pos) < 5)
- return pos;
+ if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_s1g_cap))
+ return -ENOBUFS;
- *pos++ = WLAN_EID_EXTENSION;
- *pos++ = 1 + sizeof(cap);
- *pos++ = WLAN_EID_EXT_HE_6GHZ_CAPA;
- memcpy(pos, &cap, sizeof(cap));
+ skb_put_u8(skb, WLAN_EID_S1G_CAPABILITIES);
+ skb_put_u8(skb, sizeof(struct ieee80211_s1g_cap));
- return pos + 2;
+ skb_put_data(skb, &s1g_cap->cap, sizeof(s1g_cap->cap));
+ skb_put_data(skb, &s1g_cap->nss_mcs, sizeof(s1g_cap->nss_mcs));
+
+ return 0;
}
-static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
- u8 *buffer, size_t buffer_len,
- const u8 *ie, size_t ie_len,
- enum nl80211_band band,
- u32 rate_mask,
- struct cfg80211_chan_def *chandef,
- size_t *offset, u32 flags)
+static int ieee80211_put_preq_ies_band(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ const u8 *ie, size_t ie_len,
+ size_t *offset,
+ enum nl80211_band band,
+ u32 rate_mask,
+ struct cfg80211_chan_def *chandef,
+ u32 flags)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
- const struct ieee80211_sta_he_cap *he_cap;
- const struct ieee80211_sta_eht_cap *eht_cap;
- u8 *pos = buffer, *end = buffer + buffer_len;
+ int i, err;
size_t noffset;
- int supp_rates_len, i;
- u8 rates[32];
- int num_rates;
- int ext_rates_len;
u32 rate_flags;
bool have_80mhz = false;
@@ -1981,32 +1213,13 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
rate_flags = ieee80211_chandef_rate_flags(chandef);
/* For direct scan add S1G IE and consider its override bits */
- if (band == NL80211_BAND_S1GHZ) {
- if (end - pos < 2 + sizeof(struct ieee80211_s1g_cap))
- goto out_err;
- pos = ieee80211_ie_build_s1g_cap(pos, &sband->s1g_cap);
- goto done;
- }
+ if (band == NL80211_BAND_S1GHZ)
+ return ieee80211_put_s1g_cap(skb, &sband->s1g_cap);
- num_rates = 0;
- for (i = 0; i < sband->n_bitrates; i++) {
- if ((BIT(i) & rate_mask) == 0)
- continue; /* skip rate */
- if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
- continue;
-
- rates[num_rates++] =
- (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
- }
-
- supp_rates_len = min_t(int, num_rates, 8);
-
- if (end - pos < 2 + supp_rates_len)
- goto out_err;
- *pos++ = WLAN_EID_SUPP_RATES;
- *pos++ = supp_rates_len;
- memcpy(pos, rates, supp_rates_len);
- pos += supp_rates_len;
+ err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags,
+ ~rate_mask, WLAN_EID_SUPP_RATES);
+ if (err)
+ return err;
/* insert "request information" if in custom IEs */
if (ie && ie_len) {
@@ -2019,34 +1232,28 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
before_extrates,
ARRAY_SIZE(before_extrates),
*offset);
- if (end - pos < noffset - *offset)
- goto out_err;
- memcpy(pos, ie + *offset, noffset - *offset);
- pos += noffset - *offset;
+ if (skb_tailroom(skb) < noffset - *offset)
+ return -ENOBUFS;
+ skb_put_data(skb, ie + *offset, noffset - *offset);
*offset = noffset;
}
- ext_rates_len = num_rates - supp_rates_len;
- if (ext_rates_len > 0) {
- if (end - pos < 2 + ext_rates_len)
- goto out_err;
- *pos++ = WLAN_EID_EXT_SUPP_RATES;
- *pos++ = ext_rates_len;
- memcpy(pos, rates + supp_rates_len, ext_rates_len);
- pos += ext_rates_len;
- }
+ err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags,
+ ~rate_mask, WLAN_EID_EXT_SUPP_RATES);
+ if (err)
+ return err;
if (chandef->chan && sband->band == NL80211_BAND_2GHZ) {
- if (end - pos < 3)
- goto out_err;
- *pos++ = WLAN_EID_DS_PARAMS;
- *pos++ = 1;
- *pos++ = ieee80211_frequency_to_channel(
- chandef->chan->center_freq);
+ if (skb_tailroom(skb) < 3)
+ return -ENOBUFS;
+ skb_put_u8(skb, WLAN_EID_DS_PARAMS);
+ skb_put_u8(skb, 1);
+ skb_put_u8(skb,
+ ieee80211_frequency_to_channel(chandef->chan->center_freq));
}
if (flags & IEEE80211_PROBE_FLAG_MIN_CONTENT)
- goto done;
+ return 0;
/* insert custom IEs that go before HT */
if (ie && ie_len) {
@@ -2061,18 +1268,21 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
noffset = ieee80211_ie_split(ie, ie_len,
before_ht, ARRAY_SIZE(before_ht),
*offset);
- if (end - pos < noffset - *offset)
- goto out_err;
- memcpy(pos, ie + *offset, noffset - *offset);
- pos += noffset - *offset;
+ if (skb_tailroom(skb) < noffset - *offset)
+ return -ENOBUFS;
+ skb_put_data(skb, ie + *offset, noffset - *offset);
*offset = noffset;
}
if (sband->ht_cap.ht_supported) {
- if (end - pos < 2 + sizeof(struct ieee80211_ht_cap))
- goto out_err;
- pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap,
- sband->ht_cap.cap);
+ u8 *pos;
+
+ if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap))
+ return -ENOBUFS;
+
+ pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_cap));
+ ieee80211_ie_build_ht_cap(pos, &sband->ht_cap,
+ sband->ht_cap.cap);
}
/* insert custom IEs that go before VHT */
@@ -2093,10 +1303,9 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
noffset = ieee80211_ie_split(ie, ie_len,
before_vht, ARRAY_SIZE(before_vht),
*offset);
- if (end - pos < noffset - *offset)
- goto out_err;
- memcpy(pos, ie + *offset, noffset - *offset);
- pos += noffset - *offset;
+ if (skb_tailroom(skb) < noffset - *offset)
+ return -ENOBUFS;
+ skb_put_data(skb, ie + *offset, noffset - *offset);
*offset = noffset;
}
@@ -2111,10 +1320,14 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
}
if (sband->vht_cap.vht_supported && have_80mhz) {
- if (end - pos < 2 + sizeof(struct ieee80211_vht_cap))
- goto out_err;
- pos = ieee80211_ie_build_vht_cap(pos, &sband->vht_cap,
- sband->vht_cap.cap);
+ u8 *pos;
+
+ if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_cap))
+ return -ENOBUFS;
+
+ pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_cap));
+ ieee80211_ie_build_vht_cap(pos, &sband->vht_cap,
+ sband->vht_cap.cap);
}
/* insert custom IEs that go before HE */
@@ -2131,107 +1344,128 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
noffset = ieee80211_ie_split(ie, ie_len,
before_he, ARRAY_SIZE(before_he),
*offset);
- if (end - pos < noffset - *offset)
- goto out_err;
- memcpy(pos, ie + *offset, noffset - *offset);
- pos += noffset - *offset;
+ if (skb_tailroom(skb) < noffset - *offset)
+ return -ENOBUFS;
+ skb_put_data(skb, ie + *offset, noffset - *offset);
*offset = noffset;
}
- he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
- if (he_cap &&
- cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
+ if (cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
IEEE80211_CHAN_NO_HE)) {
- pos = ieee80211_ie_build_he_cap(0, pos, he_cap, end);
- if (!pos)
- goto out_err;
+ err = ieee80211_put_he_cap(skb, sdata, sband, NULL);
+ if (err)
+ return err;
}
- eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
-
- if (eht_cap &&
- cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
+ if (cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
IEEE80211_CHAN_NO_HE |
IEEE80211_CHAN_NO_EHT)) {
- pos = ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, end,
- sdata->vif.type == NL80211_IFTYPE_AP);
- if (!pos)
- goto out_err;
+ err = ieee80211_put_eht_cap(skb, sdata, sband, NULL);
+ if (err)
+ return err;
}
- if (cfg80211_any_usable_channels(local->hw.wiphy,
- BIT(NL80211_BAND_6GHZ),
- IEEE80211_CHAN_NO_HE)) {
- struct ieee80211_supported_band *sband6;
-
- sband6 = local->hw.wiphy->bands[NL80211_BAND_6GHZ];
- he_cap = ieee80211_get_he_iftype_cap_vif(sband6, &sdata->vif);
-
- if (he_cap) {
- enum nl80211_iftype iftype =
- ieee80211_vif_type_p2p(&sdata->vif);
- __le16 cap = ieee80211_get_he_6ghz_capa(sband6, iftype);
-
- pos = ieee80211_write_he_6ghz_cap(pos, cap, end);
- }
- }
+ err = ieee80211_put_he_6ghz_cap(skb, sdata, IEEE80211_SMPS_OFF);
+ if (err)
+ return err;
/*
* If adding more here, adjust code in main.c
* that calculates local->scan_ies_len.
*/
- return pos - buffer;
- out_err:
- WARN_ONCE(1, "not enough space for preq IEs\n");
- done:
- return pos - buffer;
+ return 0;
}
-int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer,
- size_t buffer_len,
- struct ieee80211_scan_ies *ie_desc,
- const u8 *ie, size_t ie_len,
- u8 bands_used, u32 *rate_masks,
- struct cfg80211_chan_def *chandef,
- u32 flags)
+static int ieee80211_put_preq_ies(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_scan_ies *ie_desc,
+ const u8 *ie, size_t ie_len,
+ u8 bands_used, u32 *rate_masks,
+ struct cfg80211_chan_def *chandef,
+ u32 flags)
{
- size_t pos = 0, old_pos = 0, custom_ie_offset = 0;
- int i;
+ size_t custom_ie_offset = 0;
+ int i, err;
memset(ie_desc, 0, sizeof(*ie_desc));
for (i = 0; i < NUM_NL80211_BANDS; i++) {
if (bands_used & BIT(i)) {
- pos += ieee80211_build_preq_ies_band(sdata,
- buffer + pos,
- buffer_len - pos,
- ie, ie_len, i,
- rate_masks[i],
- chandef,
- &custom_ie_offset,
- flags);
- ie_desc->ies[i] = buffer + old_pos;
- ie_desc->len[i] = pos - old_pos;
- old_pos = pos;
+ ie_desc->ies[i] = skb_tail_pointer(skb);
+ err = ieee80211_put_preq_ies_band(skb, sdata,
+ ie, ie_len,
+ &custom_ie_offset,
+ i, rate_masks[i],
+ chandef, flags);
+ if (err)
+ return err;
+ ie_desc->len[i] = skb_tail_pointer(skb) -
+ ie_desc->ies[i];
}
}
/* add any remaining custom IEs */
if (ie && ie_len) {
- if (WARN_ONCE(buffer_len - pos < ie_len - custom_ie_offset,
+ if (WARN_ONCE(skb_tailroom(skb) < ie_len - custom_ie_offset,
"not enough space for preq custom IEs\n"))
- return pos;
- memcpy(buffer + pos, ie + custom_ie_offset,
- ie_len - custom_ie_offset);
- ie_desc->common_ies = buffer + pos;
- ie_desc->common_ie_len = ie_len - custom_ie_offset;
- pos += ie_len - custom_ie_offset;
+ return -ENOBUFS;
+ ie_desc->common_ies = skb_tail_pointer(skb);
+ skb_put_data(skb, ie + custom_ie_offset,
+ ie_len - custom_ie_offset);
+ ie_desc->common_ie_len = skb_tail_pointer(skb) -
+ ie_desc->common_ies;
}
- return pos;
+ return 0;
};
+int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer,
+ size_t buffer_len,
+ struct ieee80211_scan_ies *ie_desc,
+ const u8 *ie, size_t ie_len,
+ u8 bands_used, u32 *rate_masks,
+ struct cfg80211_chan_def *chandef,
+ u32 flags)
+{
+ struct sk_buff *skb = alloc_skb(buffer_len, GFP_KERNEL);
+ uintptr_t offs;
+ int ret, i;
+ u8 *start;
+
+ if (!skb)
+ return -ENOMEM;
+
+ start = skb_tail_pointer(skb);
+ memset(start, 0, skb_tailroom(skb));
+ ret = ieee80211_put_preq_ies(skb, sdata, ie_desc, ie, ie_len,
+ bands_used, rate_masks, chandef,
+ flags);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (skb->len > buffer_len) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ memcpy(buffer, start, skb->len);
+
+ /* adjust ie_desc for copy */
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
+ offs = ie_desc->ies[i] - start;
+ ie_desc->ies[i] = buffer + offs;
+ }
+ offs = ie_desc->common_ies - start;
+ ie_desc->common_ies = buffer + offs;
+
+ ret = skb->len;
+out:
+ consume_skb(skb);
+ return ret;
+}
+
struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
const u8 *src, const u8 *dst,
u32 ratemask,
@@ -2244,7 +1478,6 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
struct cfg80211_chan_def chandef;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
- int ies_len;
u32 rate_masks[NUM_NL80211_BANDS] = {};
struct ieee80211_scan_ies dummy_ie_desc;
@@ -2253,7 +1486,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
* in order to maximize the chance that we get a response. Some
* badly-behaved APs don't respond when this parameter is included.
*/
- chandef.width = sdata->vif.bss_conf.chandef.width;
+ chandef.width = sdata->vif.bss_conf.chanreq.oper.width;
if (flags & IEEE80211_PROBE_FLAG_DIRECTED)
chandef.chan = NULL;
else
@@ -2265,11 +1498,9 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
return NULL;
rate_masks[chan->band] = ratemask;
- ies_len = ieee80211_build_preq_ies(sdata, skb_tail_pointer(skb),
- skb_tailroom(skb), &dummy_ie_desc,
- ie, ie_len, BIT(chan->band),
- rate_masks, &chandef, flags);
- skb_put(skb, ies_len);
+ ieee80211_put_preq_ies(skb, sdata, &dummy_ie_desc,
+ ie, ie_len, BIT(chan->band),
+ rate_masks, &chandef, flags);
if (dst) {
mgmt = (struct ieee80211_mgmt *) skb->data;
@@ -2295,7 +1526,8 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!sband))
return 1;
- rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
+ rate_flags =
+ ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper);
num_rates = sband->n_bitrates;
supp_rates = 0;
@@ -2416,9 +1648,6 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local,
lockdep_assert_wiphy(local->hw.wiphy);
- if (!local->use_chanctx)
- return;
-
conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->hw.wiphy->mtx));
if (conf) {
@@ -2648,20 +1877,20 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
/* add channel contexts */
- if (local->use_chanctx) {
- list_for_each_entry(ctx, &local->chanctx_list, list)
- if (ctx->replace_state !=
- IEEE80211_CHANCTX_REPLACES_OTHER)
- WARN_ON(drv_add_chanctx(local, ctx));
-
- sdata = wiphy_dereference(local->hw.wiphy,
- local->monitor_sdata);
- if (sdata && ieee80211_sdata_running(sdata))
- ieee80211_assign_chanctx(local, sdata, &sdata->deflink);
- }
+ list_for_each_entry(ctx, &local->chanctx_list, list)
+ if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
+ WARN_ON(drv_add_chanctx(local, ctx));
+
+ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
+ if (sdata && ieee80211_sdata_running(sdata))
+ ieee80211_assign_chanctx(local, sdata, &sdata->deflink);
/* reconfigure hardware */
- ieee80211_hw_config(local, ~0);
+ ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_LISTEN_INTERVAL |
+ IEEE80211_CONF_CHANGE_MONITOR |
+ IEEE80211_CONF_CHANGE_PS |
+ IEEE80211_CONF_CHANGE_RETRY_LIMITS |
+ IEEE80211_CONF_CHANGE_IDLE);
ieee80211_configure_filter(local);
@@ -2706,8 +1935,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
for (link_id = 0;
link_id < ARRAY_SIZE(sdata->vif.link_conf);
link_id++) {
- if (ieee80211_vif_is_mld(&sdata->vif) &&
- !(sdata->vif.active_links & BIT(link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link_id))
continue;
link = sdata_dereference(sdata->link[link_id], sdata);
@@ -2756,9 +1984,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
sdata->vif.bss_conf.protected_keep_alive)
changed |= BSS_CHANGED_KEEP_ALIVE;
- if (sdata->vif.bss_conf.eht_puncturing)
- changed |= BSS_CHANGED_EHT_PUNCTURING;
-
ieee80211_bss_info_change_notify(sdata,
changed);
} else if (!WARN_ON(!link)) {
@@ -3109,21 +2334,6 @@ size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset)
return pos;
}
-u8 *ieee80211_ie_build_s1g_cap(u8 *pos, struct ieee80211_sta_s1g_cap *s1g_cap)
-{
- *pos++ = WLAN_EID_S1G_CAPABILITIES;
- *pos++ = sizeof(struct ieee80211_s1g_cap);
- memset(pos, 0, sizeof(struct ieee80211_s1g_cap));
-
- memcpy(pos, &s1g_cap->cap, sizeof(s1g_cap->cap));
- pos += sizeof(s1g_cap->cap);
-
- memcpy(pos, &s1g_cap->nss_mcs, sizeof(s1g_cap->nss_mcs));
- pos += sizeof(s1g_cap->nss_mcs);
-
- return pos;
-}
-
u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
u16 cap)
{
@@ -3180,7 +2390,8 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
return pos;
}
-u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
+/* this may return more than ieee80211_put_he_6ghz_cap() will need */
+u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata)
{
const struct ieee80211_sta_he_cap *he_cap;
struct ieee80211_supported_band *sband;
@@ -3190,7 +2401,7 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
if (!sband)
return 0;
- he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
+ he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
if (!he_cap)
return 0;
@@ -3201,38 +2412,75 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
he_cap->he_cap_elem.phy_cap_info);
}
-u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos,
+static void
+ieee80211_get_adjusted_he_cap(const struct ieee80211_conn_settings *conn,
const struct ieee80211_sta_he_cap *he_cap,
- u8 *end)
+ struct ieee80211_he_cap_elem *elem)
{
- struct ieee80211_he_cap_elem elem;
- u8 n;
- u8 ie_len;
- u8 *orig_pos = pos;
+ u8 ru_limit, max_ru;
- /* Make sure we have place for the IE */
- /*
- * TODO: the 1 added is because this temporarily is under the EXTENSION
- * IE. Get rid of it when it moves.
- */
- if (!he_cap)
- return orig_pos;
+ *elem = he_cap->he_cap_elem;
- /* modify on stack first to calculate 'n' and 'ie_len' correctly */
- elem = he_cap->he_cap_elem;
+ switch (conn->bw_limit) {
+ case IEEE80211_CONN_BW_LIMIT_20:
+ ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242;
+ break;
+ case IEEE80211_CONN_BW_LIMIT_40:
+ ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_484;
+ break;
+ case IEEE80211_CONN_BW_LIMIT_80:
+ ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_996;
+ break;
+ default:
+ ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996;
+ break;
+ }
+
+ max_ru = elem->phy_cap_info[8] & IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK;
+ max_ru = min(max_ru, ru_limit);
+ elem->phy_cap_info[8] &= ~IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK;
+ elem->phy_cap_info[8] |= max_ru;
- if (disable_flags & IEEE80211_CONN_DISABLE_40MHZ)
- elem.phy_cap_info[0] &=
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) {
+ elem->phy_cap_info[0] &=
~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G);
+ elem->phy_cap_info[9] &=
+ ~IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM;
+ }
+
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160) {
+ elem->phy_cap_info[0] &=
+ ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G);
+ elem->phy_cap_info[5] &=
+ ~IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK;
+ elem->phy_cap_info[7] &=
+ ~(IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ |
+ IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ);
+ }
+}
- if (disable_flags & IEEE80211_CONN_DISABLE_160MHZ)
- elem.phy_cap_info[0] &=
- ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+int ieee80211_put_he_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_supported_band *sband,
+ const struct ieee80211_conn_settings *conn)
+{
+ const struct ieee80211_sta_he_cap *he_cap;
+ struct ieee80211_he_cap_elem elem;
+ u8 *len;
+ u8 n;
+ u8 ie_len;
- if (disable_flags & IEEE80211_CONN_DISABLE_80P80MHZ)
- elem.phy_cap_info[0] &=
- ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+ if (!conn)
+ conn = &ieee80211_conn_settings_unlimited;
+
+ he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ if (!he_cap)
+ return 0;
+
+ /* modify on stack first to calculate 'n' and 'ie_len' correctly */
+ ieee80211_get_adjusted_he_cap(conn, he_cap, &elem);
n = ieee80211_he_mcs_nss_size(&elem);
ie_len = 2 + 1 +
@@ -3240,19 +2488,17 @@ u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos,
ieee80211_he_ppe_size(he_cap->ppe_thres[0],
he_cap->he_cap_elem.phy_cap_info);
- if ((end - pos) < ie_len)
- return orig_pos;
+ if (skb_tailroom(skb) < ie_len)
+ return -ENOBUFS;
- *pos++ = WLAN_EID_EXTENSION;
- pos++; /* We'll set the size later below */
- *pos++ = WLAN_EID_EXT_HE_CAPABILITY;
+ skb_put_u8(skb, WLAN_EID_EXTENSION);
+ len = skb_put(skb, 1); /* We'll set the size later below */
+ skb_put_u8(skb, WLAN_EID_EXT_HE_CAPABILITY);
/* Fixed data */
- memcpy(pos, &elem, sizeof(elem));
- pos += sizeof(elem);
+ skb_put_data(skb, &elem, sizeof(elem));
- memcpy(pos, &he_cap->he_mcs_nss_supp, n);
- pos += n;
+ skb_put_data(skb, &he_cap->he_mcs_nss_supp, n);
/* Check if PPE Threshold should be present */
if ((he_cap->he_cap_elem.phy_cap_info[6] &
@@ -3276,41 +2522,39 @@ u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos,
n = DIV_ROUND_UP(n, 8);
/* Copy PPE Thresholds */
- memcpy(pos, &he_cap->ppe_thres, n);
- pos += n;
+ skb_put_data(skb, &he_cap->ppe_thres, n);
end:
- orig_pos[1] = (pos - orig_pos) - 2;
- return pos;
+ *len = skb_tail_pointer(skb) - len - 1;
+ return 0;
}
-void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
- enum ieee80211_smps_mode smps_mode,
- struct sk_buff *skb)
+int ieee80211_put_he_6ghz_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_smps_mode smps_mode)
{
struct ieee80211_supported_band *sband;
const struct ieee80211_sband_iftype_data *iftd;
enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
- u8 *pos;
- u16 cap;
+ __le16 cap;
if (!cfg80211_any_usable_channels(sdata->local->hw.wiphy,
BIT(NL80211_BAND_6GHZ),
IEEE80211_CHAN_NO_HE))
- return;
+ return 0;
sband = sdata->local->hw.wiphy->bands[NL80211_BAND_6GHZ];
iftd = ieee80211_get_sband_iftype_data(sband, iftype);
if (!iftd)
- return;
+ return 0;
/* Check for device HE 6 GHz capability before adding element */
if (!iftd->he_6ghz_capa.capa)
- return;
+ return 0;
- cap = le16_to_cpu(iftd->he_6ghz_capa.capa);
- cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS;
+ cap = iftd->he_6ghz_capa.capa;
+ cap &= cpu_to_le16(~IEEE80211_HE_6GHZ_CAP_SM_PS);
switch (smps_mode) {
case IEEE80211_SMPS_AUTOMATIC:
@@ -3318,22 +2562,27 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
WARN_ON(1);
fallthrough;
case IEEE80211_SMPS_OFF:
- cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED,
- IEEE80211_HE_6GHZ_CAP_SM_PS);
+ cap |= le16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED,
+ IEEE80211_HE_6GHZ_CAP_SM_PS);
break;
case IEEE80211_SMPS_STATIC:
- cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_STATIC,
- IEEE80211_HE_6GHZ_CAP_SM_PS);
+ cap |= le16_encode_bits(WLAN_HT_CAP_SM_PS_STATIC,
+ IEEE80211_HE_6GHZ_CAP_SM_PS);
break;
case IEEE80211_SMPS_DYNAMIC:
- cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DYNAMIC,
- IEEE80211_HE_6GHZ_CAP_SM_PS);
+ cap |= le16_encode_bits(WLAN_HT_CAP_SM_PS_DYNAMIC,
+ IEEE80211_HE_6GHZ_CAP_SM_PS);
break;
}
- pos = skb_put(skb, 2 + 1 + sizeof(cap));
- ieee80211_write_he_6ghz_cap(pos, cpu_to_le16(cap),
- pos + 2 + 1 + sizeof(cap));
+ if (skb_tailroom(skb) < 2 + 1 + sizeof(cap))
+ return -ENOBUFS;
+
+ skb_put_u8(skb, WLAN_EID_EXTENSION);
+ skb_put_u8(skb, 1 + sizeof(cap));
+ skb_put_u8(skb, WLAN_EID_EXT_HE_6GHZ_CAPA);
+ skb_put_data(skb, &cap, sizeof(cap));
+ return 0;
}
u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
@@ -3785,7 +3034,6 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
}
void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info,
- bool support_160, bool support_320,
struct cfg80211_chan_def *chandef)
{
chandef->center_freq1 =
@@ -3804,90 +3052,38 @@ void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info,
chandef->width = NL80211_CHAN_WIDTH_80;
break;
case IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ:
- if (support_160) {
- chandef->width = NL80211_CHAN_WIDTH_160;
- chandef->center_freq1 =
- ieee80211_channel_to_frequency(info->ccfs1,
- chandef->chan->band);
- } else {
- chandef->width = NL80211_CHAN_WIDTH_80;
- }
+ chandef->width = NL80211_CHAN_WIDTH_160;
+ chandef->center_freq1 =
+ ieee80211_channel_to_frequency(info->ccfs1,
+ chandef->chan->band);
break;
case IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ:
- if (support_320) {
- chandef->width = NL80211_CHAN_WIDTH_320;
- chandef->center_freq1 =
- ieee80211_channel_to_frequency(info->ccfs1,
- chandef->chan->band);
- } else if (support_160) {
- chandef->width = NL80211_CHAN_WIDTH_160;
- } else {
- chandef->width = NL80211_CHAN_WIDTH_80;
-
- if (chandef->center_freq1 > chandef->chan->center_freq)
- chandef->center_freq1 -= 40;
- else
- chandef->center_freq1 += 40;
- }
+ chandef->width = NL80211_CHAN_WIDTH_320;
+ chandef->center_freq1 =
+ ieee80211_channel_to_frequency(info->ccfs1,
+ chandef->chan->band);
break;
}
}
-bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
+bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_local *local,
const struct ieee80211_he_operation *he_oper,
const struct ieee80211_eht_operation *eht_oper,
struct cfg80211_chan_def *chandef)
{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_supported_band *sband;
- enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
- const struct ieee80211_sta_he_cap *he_cap;
- const struct ieee80211_sta_eht_cap *eht_cap;
struct cfg80211_chan_def he_chandef = *chandef;
const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
- struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
- bool support_80_80, support_160, support_320;
- u8 he_phy_cap, eht_phy_cap;
u32 freq;
if (chandef->chan->band != NL80211_BAND_6GHZ)
return true;
- sband = local->hw.wiphy->bands[NL80211_BAND_6GHZ];
-
- he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
- if (!he_cap) {
- sdata_info(sdata, "Missing iftype sband data/HE cap");
- return false;
- }
-
- he_phy_cap = he_cap->he_cap_elem.phy_cap_info[0];
- support_160 =
- he_phy_cap &
- IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
- support_80_80 =
- he_phy_cap &
- IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
-
- if (!he_oper) {
- sdata_info(sdata,
- "HE is not advertised on (on %d MHz), expect issues\n",
- chandef->chan->center_freq);
+ if (!he_oper)
return false;
- }
-
- eht_cap = ieee80211_get_eht_iftype_cap(sband, iftype);
- if (!eht_cap)
- eht_oper = NULL;
he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
-
- if (!he_6ghz_oper) {
- sdata_info(sdata,
- "HE 6GHz operation missing (on %d MHz), expect issues\n",
- chandef->chan->center_freq);
+ if (!he_6ghz_oper)
return false;
- }
/*
* The EHT operation IE does not contain the primary channel so the
@@ -3896,20 +3092,10 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
*/
freq = ieee80211_channel_to_frequency(he_6ghz_oper->primary,
NL80211_BAND_6GHZ);
- he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq);
+ he_chandef.chan = ieee80211_get_channel(local->hw.wiphy, freq);
- switch (u8_get_bits(he_6ghz_oper->control,
- IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
- case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
- bss_conf->power_type = IEEE80211_REG_LPI_AP;
- break;
- case IEEE80211_6GHZ_CTRL_REG_SP_AP:
- bss_conf->power_type = IEEE80211_REG_SP_AP;
- break;
- default:
- bss_conf->power_type = IEEE80211_REG_UNSET_AP;
- break;
- }
+ if (!he_chandef.chan)
+ return false;
if (!eht_oper ||
!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) {
@@ -3928,13 +3114,10 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
he_chandef.width = NL80211_CHAN_WIDTH_80;
if (!he_6ghz_oper->ccfs1)
break;
- if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8) {
- if (support_160)
- he_chandef.width = NL80211_CHAN_WIDTH_160;
- } else {
- if (support_80_80)
- he_chandef.width = NL80211_CHAN_WIDTH_80P80;
- }
+ if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8)
+ he_chandef.width = NL80211_CHAN_WIDTH_160;
+ else
+ he_chandef.width = NL80211_CHAN_WIDTH_80P80;
break;
}
@@ -3946,30 +3129,17 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
he_chandef.center_freq1 =
ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0,
NL80211_BAND_6GHZ);
- if (support_80_80 || support_160)
- he_chandef.center_freq2 =
- ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
- NL80211_BAND_6GHZ);
+ he_chandef.center_freq2 =
+ ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
+ NL80211_BAND_6GHZ);
}
} else {
- eht_phy_cap = eht_cap->eht_cap_elem.phy_cap_info[0];
- support_320 =
- eht_phy_cap & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ;
-
ieee80211_chandef_eht_oper((const void *)eht_oper->optional,
- support_160, support_320,
&he_chandef);
}
- if (!cfg80211_chandef_valid(&he_chandef)) {
- sdata_info(sdata,
- "HE 6GHz operation resulted in invalid chandef: %d MHz/%d/%d MHz/%d MHz\n",
- he_chandef.chan ? he_chandef.chan->center_freq : 0,
- he_chandef.width,
- he_chandef.center_freq1,
- he_chandef.center_freq2);
+ if (!cfg80211_chandef_valid(&he_chandef))
return false;
- }
*chandef = he_chandef;
@@ -4012,121 +3182,62 @@ bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper,
return true;
}
-int ieee80211_parse_bitrates(enum nl80211_chan_width width,
- const struct ieee80211_supported_band *sband,
- const u8 *srates, int srates_len, u32 *rates)
-{
- u32 rate_flags = ieee80211_chanwidth_rate_flags(width);
- struct ieee80211_rate *br;
- int brate, rate, i, j, count = 0;
-
- *rates = 0;
-
- for (i = 0; i < srates_len; i++) {
- rate = srates[i] & 0x7f;
-
- for (j = 0; j < sband->n_bitrates; j++) {
- br = &sband->bitrates[j];
- if ((rate_flags & br->flags) != rate_flags)
- continue;
-
- brate = DIV_ROUND_UP(br->bitrate, 5);
- if (brate == rate) {
- *rates |= BIT(j);
- count++;
- break;
- }
- }
- }
- return count;
-}
-
-int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic,
- enum nl80211_band band)
+int ieee80211_put_srates_elem(struct sk_buff *skb,
+ const struct ieee80211_supported_band *sband,
+ u32 basic_rates, u32 rate_flags, u32 masked_rates,
+ u8 element_id)
{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_supported_band *sband;
- int rate;
- u8 i, rates, *pos;
- u32 basic_rates = sdata->vif.bss_conf.basic_rates;
- u32 rate_flags;
+ u8 i, rates, skip;
- rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
- sband = local->hw.wiphy->bands[band];
rates = 0;
for (i = 0; i < sband->n_bitrates; i++) {
if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
continue;
+ if (masked_rates & BIT(i))
+ continue;
rates++;
}
- if (rates > 8)
- rates = 8;
-
- if (skb_tailroom(skb) < rates + 2)
- return -ENOMEM;
-
- pos = skb_put(skb, rates + 2);
- *pos++ = WLAN_EID_SUPP_RATES;
- *pos++ = rates;
- for (i = 0; i < rates; i++) {
- u8 basic = 0;
- if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
- continue;
- if (need_basic && basic_rates & BIT(i))
- basic = 0x80;
- rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
- *pos++ = basic | (u8) rate;
+ if (element_id == WLAN_EID_SUPP_RATES) {
+ rates = min_t(u8, rates, 8);
+ skip = 0;
+ } else {
+ skip = 8;
+ if (rates <= skip)
+ return 0;
+ rates -= skip;
}
- return 0;
-}
+ if (skb_tailroom(skb) < rates + 2)
+ return -ENOBUFS;
-int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic,
- enum nl80211_band band)
-{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_supported_band *sband;
- int rate;
- u8 i, exrates, *pos;
- u32 basic_rates = sdata->vif.bss_conf.basic_rates;
- u32 rate_flags;
+ skb_put_u8(skb, element_id);
+ skb_put_u8(skb, rates);
- rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
+ for (i = 0; i < sband->n_bitrates && rates; i++) {
+ int rate;
+ u8 basic;
- sband = local->hw.wiphy->bands[band];
- exrates = 0;
- for (i = 0; i < sband->n_bitrates; i++) {
if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
continue;
- exrates++;
- }
+ if (masked_rates & BIT(i))
+ continue;
- if (exrates > 8)
- exrates -= 8;
- else
- exrates = 0;
+ if (skip > 0) {
+ skip--;
+ continue;
+ }
- if (skb_tailroom(skb) < exrates + 2)
- return -ENOMEM;
+ basic = basic_rates & BIT(i) ? 0x80 : 0;
- if (exrates) {
- pos = skb_put(skb, exrates + 2);
- *pos++ = WLAN_EID_EXT_SUPP_RATES;
- *pos++ = exrates;
- for (i = 8; i < sband->n_bitrates; i++) {
- u8 basic = 0;
- if ((rate_flags & sband->bitrates[i].flags)
- != rate_flags)
- continue;
- if (need_basic && basic_rates & BIT(i))
- basic = 0x80;
- rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
- *pos++ = basic | (u8) rate;
- }
+ rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
+ skb_put_u8(skb, basic | (u8)rate);
+ rates--;
}
+
+ WARN(rates > 0, "rates confused: rates:%d, element:%d\n",
+ rates, element_id);
+
return 0;
}
@@ -4338,7 +3449,7 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
&sdata->deflink.dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
- chandef = sdata->vif.bss_conf.chandef;
+ chandef = sdata->vif.bss_conf.chanreq.oper;
ieee80211_link_release_channel(&sdata->deflink);
cfg80211_cac_event(sdata->dev,
&chandef,
@@ -4386,78 +3497,92 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL(ieee80211_radar_detected);
-ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c)
+void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c,
+ struct ieee80211_conn_settings *conn)
{
- ieee80211_conn_flags_t ret;
- int tmp;
+ enum nl80211_chan_width new_primary_width;
+ struct ieee80211_conn_settings _ignored = {};
+
+ /* allow passing NULL if caller doesn't care */
+ if (!conn)
+ conn = &_ignored;
+
+again:
+ /* no-HT indicates nothing to do */
+ new_primary_width = NL80211_CHAN_WIDTH_20_NOHT;
switch (c->width) {
+ default:
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ WARN_ON_ONCE(1);
+ fallthrough;
case NL80211_CHAN_WIDTH_20:
c->width = NL80211_CHAN_WIDTH_20_NOHT;
- ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT;
+ conn->mode = IEEE80211_CONN_MODE_LEGACY;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ c->punctured = 0;
break;
case NL80211_CHAN_WIDTH_40:
c->width = NL80211_CHAN_WIDTH_20;
c->center_freq1 = c->chan->center_freq;
- ret = IEEE80211_CONN_DISABLE_40MHZ |
- IEEE80211_CONN_DISABLE_VHT;
+ if (conn->mode == IEEE80211_CONN_MODE_VHT)
+ conn->mode = IEEE80211_CONN_MODE_HT;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ c->punctured = 0;
break;
case NL80211_CHAN_WIDTH_80:
- tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
- /* n_P40 */
- tmp /= 2;
- /* freq_P40 */
- c->center_freq1 = c->center_freq1 - 20 + 40 * tmp;
- c->width = NL80211_CHAN_WIDTH_40;
- ret = IEEE80211_CONN_DISABLE_VHT;
+ new_primary_width = NL80211_CHAN_WIDTH_40;
+ if (conn->mode == IEEE80211_CONN_MODE_VHT)
+ conn->mode = IEEE80211_CONN_MODE_HT;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_40;
break;
case NL80211_CHAN_WIDTH_80P80:
c->center_freq2 = 0;
c->width = NL80211_CHAN_WIDTH_80;
- ret = IEEE80211_CONN_DISABLE_80P80MHZ |
- IEEE80211_CONN_DISABLE_160MHZ;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80;
break;
case NL80211_CHAN_WIDTH_160:
- /* n_P20 */
- tmp = (70 + c->chan->center_freq - c->center_freq1)/20;
- /* n_P80 */
- tmp /= 4;
- c->center_freq1 = c->center_freq1 - 40 + 80 * tmp;
- c->width = NL80211_CHAN_WIDTH_80;
- ret = IEEE80211_CONN_DISABLE_80P80MHZ |
- IEEE80211_CONN_DISABLE_160MHZ;
+ new_primary_width = NL80211_CHAN_WIDTH_80;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80;
break;
case NL80211_CHAN_WIDTH_320:
- /* n_P20 */
- tmp = (150 + c->chan->center_freq - c->center_freq1) / 20;
- /* n_P160 */
- tmp /= 8;
- c->center_freq1 = c->center_freq1 - 80 + 160 * tmp;
- c->width = NL80211_CHAN_WIDTH_160;
- ret = IEEE80211_CONN_DISABLE_320MHZ;
- break;
- default:
- case NL80211_CHAN_WIDTH_20_NOHT:
- WARN_ON_ONCE(1);
- c->width = NL80211_CHAN_WIDTH_20_NOHT;
- ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT;
+ new_primary_width = NL80211_CHAN_WIDTH_160;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_160;
break;
case NL80211_CHAN_WIDTH_1:
case NL80211_CHAN_WIDTH_2:
case NL80211_CHAN_WIDTH_4:
case NL80211_CHAN_WIDTH_8:
case NL80211_CHAN_WIDTH_16:
+ WARN_ON_ONCE(1);
+ /* keep c->width */
+ conn->mode = IEEE80211_CONN_MODE_S1G;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ break;
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
WARN_ON_ONCE(1);
/* keep c->width */
- ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT;
+ conn->mode = IEEE80211_CONN_MODE_LEGACY;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
break;
}
- WARN_ON_ONCE(!cfg80211_chandef_valid(c));
+ if (new_primary_width != NL80211_CHAN_WIDTH_20_NOHT) {
+ c->center_freq1 = cfg80211_chandef_primary(c, new_primary_width,
+ &c->punctured);
+ c->width = new_primary_width;
+ }
- return ret;
+ /*
+ * With an 80 MHz channel, we might have the puncturing in the primary
+ * 40 Mhz channel, but that's not valid when downgraded to 40 MHz width.
+ * In that case, downgrade again.
+ */
+ if (!cfg80211_chandef_valid(c) && c->punctured)
+ goto again;
+
+ WARN_ON_ONCE(!cfg80211_chandef_valid(c));
}
/*
@@ -4773,7 +3898,7 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list)
if (link->reserved_radar_required)
- radar_detect |= BIT(link->reserved_chandef.width);
+ radar_detect |= BIT(link->reserved.oper.width);
/*
* An in-place reservation context should not have any assigned vifs
@@ -4787,7 +3912,7 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
continue;
radar_detect |=
- BIT(link->conf->chandef.width);
+ BIT(link->conf->chanreq.oper.width);
}
return radar_detect;
@@ -5037,7 +4162,8 @@ u16 ieee80211_encode_usf(int listen_interval)
return (u16) listen_interval;
}
-u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
+/* this may return more than ieee80211_put_eht_cap() will need */
+u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata)
{
const struct ieee80211_sta_he_cap *he_cap;
const struct ieee80211_sta_eht_cap *eht_cap;
@@ -5049,13 +4175,12 @@ u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
if (!sband)
return 0;
- he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
- eht_cap = ieee80211_get_eht_iftype_cap(sband, iftype);
+ he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
if (!he_cap || !eht_cap)
return 0;
- is_ap = iftype == NL80211_IFTYPE_AP ||
- iftype == NL80211_IFTYPE_P2P_GO;
+ is_ap = sdata->vif.type == NL80211_IFTYPE_AP;
n = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
&eht_cap->eht_cap_elem,
@@ -5067,45 +4192,134 @@ u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
return 0;
}
-u8 *ieee80211_ie_build_eht_cap(u8 *pos,
- const struct ieee80211_sta_he_cap *he_cap,
- const struct ieee80211_sta_eht_cap *eht_cap,
- u8 *end,
- bool for_ap)
+int ieee80211_put_eht_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_supported_band *sband,
+ const struct ieee80211_conn_settings *conn)
{
+ const struct ieee80211_sta_he_cap *he_cap =
+ ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ const struct ieee80211_sta_eht_cap *eht_cap =
+ ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
+ bool for_ap = sdata->vif.type == NL80211_IFTYPE_AP;
+ struct ieee80211_eht_cap_elem_fixed fixed;
+ struct ieee80211_he_cap_elem he;
u8 mcs_nss_len, ppet_len;
+ u8 orig_mcs_nss_len;
u8 ie_len;
- u8 *orig_pos = pos;
+
+ if (!conn)
+ conn = &ieee80211_conn_settings_unlimited;
/* Make sure we have place for the IE */
if (!he_cap || !eht_cap)
- return orig_pos;
+ return 0;
+
+ orig_mcs_nss_len = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
+ &eht_cap->eht_cap_elem,
+ for_ap);
- mcs_nss_len = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
- &eht_cap->eht_cap_elem,
- for_ap);
+ ieee80211_get_adjusted_he_cap(conn, he_cap, &he);
+
+ fixed = eht_cap->eht_cap_elem;
+
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_80)
+ fixed.phy_cap_info[6] &=
+ ~IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ;
+
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160) {
+ fixed.phy_cap_info[1] &=
+ ~IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_160MHZ_MASK;
+ fixed.phy_cap_info[2] &=
+ ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_160MHZ_MASK;
+ fixed.phy_cap_info[6] &=
+ ~IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ;
+ }
+
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320) {
+ fixed.phy_cap_info[0] &=
+ ~IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ;
+ fixed.phy_cap_info[1] &=
+ ~IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_320MHZ_MASK;
+ fixed.phy_cap_info[2] &=
+ ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK;
+ fixed.phy_cap_info[6] &=
+ ~IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ;
+ }
+
+ if (conn->bw_limit == IEEE80211_CONN_BW_LIMIT_20)
+ fixed.phy_cap_info[0] &=
+ ~IEEE80211_EHT_PHY_CAP0_242_TONE_RU_GT20MHZ;
+
+ mcs_nss_len = ieee80211_eht_mcs_nss_size(&he, &fixed, for_ap);
ppet_len = ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0],
- eht_cap->eht_cap_elem.phy_cap_info);
+ fixed.phy_cap_info);
ie_len = 2 + 1 + sizeof(eht_cap->eht_cap_elem) + mcs_nss_len + ppet_len;
- if ((end - pos) < ie_len)
- return orig_pos;
+ if (skb_tailroom(skb) < ie_len)
+ return -ENOBUFS;
- *pos++ = WLAN_EID_EXTENSION;
- *pos++ = ie_len - 2;
- *pos++ = WLAN_EID_EXT_EHT_CAPABILITY;
+ skb_put_u8(skb, WLAN_EID_EXTENSION);
+ skb_put_u8(skb, ie_len - 2);
+ skb_put_u8(skb, WLAN_EID_EXT_EHT_CAPABILITY);
+ skb_put_data(skb, &fixed, sizeof(fixed));
- /* Fixed data */
- memcpy(pos, &eht_cap->eht_cap_elem, sizeof(eht_cap->eht_cap_elem));
- pos += sizeof(eht_cap->eht_cap_elem);
+ if (mcs_nss_len == 4 && orig_mcs_nss_len != 4) {
+ /*
+ * If the (non-AP) STA became 20 MHz only, then convert from
+ * <=80 to 20-MHz-only format, where MCSes are indicated in
+ * the groups 0-7, 8-9, 10-11, 12-13 rather than just 0-9,
+ * 10-11, 12-13. Thus, use 0-9 for 0-7 and 8-9.
+ */
+ skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs9_max_nss);
+ skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs9_max_nss);
+ skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs11_max_nss);
+ skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs13_max_nss);
+ } else {
+ skb_put_data(skb, &eht_cap->eht_mcs_nss_supp, mcs_nss_len);
+ }
- memcpy(pos, &eht_cap->eht_mcs_nss_supp, mcs_nss_len);
- pos += mcs_nss_len;
+ if (ppet_len)
+ skb_put_data(skb, &eht_cap->eht_ppe_thres, ppet_len);
- if (ppet_len) {
- memcpy(pos, &eht_cap->eht_ppe_thres, ppet_len);
- pos += ppet_len;
- }
+ return 0;
+}
- return pos;
+const char *ieee80211_conn_mode_str(enum ieee80211_conn_mode mode)
+{
+ static const char * const modes[] = {
+ [IEEE80211_CONN_MODE_S1G] = "S1G",
+ [IEEE80211_CONN_MODE_LEGACY] = "legacy",
+ [IEEE80211_CONN_MODE_HT] = "HT",
+ [IEEE80211_CONN_MODE_VHT] = "VHT",
+ [IEEE80211_CONN_MODE_HE] = "HE",
+ [IEEE80211_CONN_MODE_EHT] = "EHT",
+ };
+
+ if (WARN_ON(mode >= ARRAY_SIZE(modes)))
+ return "<out of range>";
+
+ return modes[mode] ?: "<missing string>";
+}
+
+enum ieee80211_conn_bw_limit
+ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef)
+{
+ switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ case NL80211_CHAN_WIDTH_20:
+ return IEEE80211_CONN_BW_LIMIT_20;
+ case NL80211_CHAN_WIDTH_40:
+ return IEEE80211_CONN_BW_LIMIT_40;
+ case NL80211_CHAN_WIDTH_80:
+ return IEEE80211_CONN_BW_LIMIT_80;
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ return IEEE80211_CONN_BW_LIMIT_160;
+ case NL80211_CHAN_WIDTH_320:
+ return IEEE80211_CONN_BW_LIMIT_320;
+ default:
+ WARN(1, "unhandled chandef width %d\n", chandef->width);
+ return IEEE80211_CONN_BW_LIMIT_20;
+ }
}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index b3a5c3e96a72..642891cafbaf 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -4,7 +4,7 @@
*
* Portions of this file
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -369,7 +369,7 @@ ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta)
link_conf = rcu_dereference(sdata->vif.link_conf[link_id]);
if (eht_cap->has_eht &&
- link_conf->chandef.chan->band == NL80211_BAND_6GHZ) {
+ link_conf->chanreq.oper.chan->band == NL80211_BAND_6GHZ) {
info = eht_cap->eht_cap_elem.phy_cap_info[0];
if (info & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) {
@@ -380,7 +380,7 @@ ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta)
info = he_cap->he_cap_elem.phy_cap_info[0];
- if (link_conf->chandef.chan->band == NL80211_BAND_2GHZ) {
+ if (link_conf->chanreq.oper.chan->band == NL80211_BAND_2GHZ) {
if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G)
ret = IEEE80211_STA_RX_BW_40;
else
@@ -515,7 +515,7 @@ ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta)
if (WARN_ON(!link_conf))
bss_width = NL80211_CHAN_WIDTH_20_NOHT;
else
- bss_width = link_conf->chandef.width;
+ bss_width = link_conf->chanreq.oper.width;
rcu_read_unlock();
bw = ieee80211_sta_cap_rx_bw(link_sta);
@@ -541,15 +541,11 @@ ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta)
return bw;
}
-void ieee80211_sta_set_rx_nss(struct link_sta_info *link_sta)
+void ieee80211_sta_init_nss(struct link_sta_info *link_sta)
{
u8 ht_rx_nss = 0, vht_rx_nss = 0, he_rx_nss = 0, eht_rx_nss = 0, rx_nss;
bool support_160;
- /* if we received a notification already don't overwrite it */
- if (link_sta->pub->rx_nss)
- return;
-
if (link_sta->pub->eht_cap.has_eht) {
int i;
const u8 *rx_nss_mcs = (void *)&link_sta->pub->eht_cap.eht_mcs_nss_supp;
@@ -627,7 +623,15 @@ void ieee80211_sta_set_rx_nss(struct link_sta_info *link_sta)
rx_nss = max(vht_rx_nss, ht_rx_nss);
rx_nss = max(he_rx_nss, rx_nss);
rx_nss = max(eht_rx_nss, rx_nss);
- link_sta->pub->rx_nss = max_t(u8, 1, rx_nss);
+ rx_nss = max_t(u8, 1, rx_nss);
+ link_sta->capa_nss = rx_nss;
+
+ /* that shouldn't be set yet, but we can handle it anyway */
+ if (link_sta->op_mode_nss)
+ link_sta->pub->rx_nss =
+ min_t(u8, rx_nss, link_sta->op_mode_nss);
+ else
+ link_sta->pub->rx_nss = rx_nss;
}
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
@@ -637,7 +641,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
enum ieee80211_sta_rx_bandwidth new_bw;
struct sta_opmode_info sta_opmode = {};
u32 changed = 0;
- u8 nss, cur_nss;
+ u8 nss;
/* ignore - no support for BF yet */
if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)
@@ -647,23 +651,17 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
nss += 1;
- if (link_sta->pub->rx_nss != nss) {
- cur_nss = link_sta->pub->rx_nss;
- /* Reset rx_nss and call ieee80211_sta_set_rx_nss() which
- * will set the same to max nss value calculated based on capability.
- */
- link_sta->pub->rx_nss = 0;
- ieee80211_sta_set_rx_nss(link_sta);
- /* Do not allow an nss change to rx_nss greater than max_nss
- * negotiated and capped to APs capability during association.
- */
- if (nss <= link_sta->pub->rx_nss) {
- link_sta->pub->rx_nss = nss;
- sta_opmode.rx_nss = nss;
- changed |= IEEE80211_RC_NSS_CHANGED;
- sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
+ if (link_sta->op_mode_nss != nss) {
+ if (nss <= link_sta->capa_nss) {
+ link_sta->op_mode_nss = nss;
+
+ if (nss != link_sta->pub->rx_nss) {
+ link_sta->pub->rx_nss = nss;
+ changed |= IEEE80211_RC_NSS_CHANGED;
+ sta_opmode.rx_nss = link_sta->pub->rx_nss;
+ sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
+ }
} else {
- link_sta->pub->rx_nss = cur_nss;
pr_warn_ratelimited("Ignoring NSS change in VHT Operating Mode Notification from %pM with invalid nss %d",
link_sta->pub->addr, nss);
}
diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c
index a05c5b971789..3a8612309137 100644
--- a/net/mac80211/wbrf.c
+++ b/net/mac80211/wbrf.c
@@ -23,8 +23,6 @@ void ieee80211_check_wbrf_support(struct ieee80211_local *local)
return;
local->wbrf_supported = acpi_amd_wbrf_supported_producer(dev);
- dev_dbg(dev, "WBRF is %s supported\n",
- local->wbrf_supported ? "" : "not");
}
static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 *start, u64 *end)
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 94dae7cb6dbd..e40529b8c5c9 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -315,7 +315,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
* Calculate AAD for CCMP/GCMP, returning qos_tid since we
* need that in CCMP also for b_0.
*/
-static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad)
+static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu)
{
struct ieee80211_hdr *hdr = (void *)skb->data;
__le16 mask_fc;
@@ -340,7 +340,14 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad)
len_a += 6;
if (ieee80211_is_data_qos(hdr->frame_control)) {
- qos_tid = ieee80211_get_tid(hdr);
+ qos_tid = *ieee80211_get_qos_ctl(hdr);
+
+ if (spp_amsdu)
+ qos_tid &= IEEE80211_QOS_CTL_TID_MASK |
+ IEEE80211_QOS_CTL_A_MSDU_PRESENT;
+ else
+ qos_tid &= IEEE80211_QOS_CTL_TID_MASK;
+
mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_ORDER);
len_a += 2;
} else {
@@ -369,10 +376,11 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad)
return qos_tid;
}
-static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
+static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
+ bool spp_amsdu)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- u8 qos_tid = ccmp_gcmp_aad(skb, aad);
+ u8 qos_tid = ccmp_gcmp_aad(skb, aad, spp_amsdu);
/* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
* mode authentication are not allowed to collide, yet both are derived
@@ -479,7 +487,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
return 0;
pos += IEEE80211_CCMP_HDR_LEN;
- ccmp_special_blocks(skb, pn, b_0, aad);
+ ccmp_special_blocks(skb, pn, b_0, aad,
+ key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU);
return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
skb_put(skb, mic_len));
}
@@ -557,7 +566,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
u8 aad[2 * AES_BLOCK_SIZE];
u8 b_0[AES_BLOCK_SIZE];
/* hardware didn't decrypt/verify MIC */
- ccmp_special_blocks(skb, pn, b_0, aad);
+ ccmp_special_blocks(skb, pn, b_0, aad,
+ key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU);
if (ieee80211_aes_ccm_decrypt(
key->u.ccmp.tfm, b_0, aad,
@@ -581,7 +591,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
return RX_CONTINUE;
}
-static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
+static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad,
+ bool spp_amsdu)
{
struct ieee80211_hdr *hdr = (void *)skb->data;
@@ -591,7 +602,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
j_0[14] = 0;
j_0[AES_BLOCK_SIZE - 1] = 0x01;
- ccmp_gcmp_aad(skb, aad);
+ ccmp_gcmp_aad(skb, aad, spp_amsdu);
}
static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id)
@@ -680,7 +691,8 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
return 0;
pos += IEEE80211_GCMP_HDR_LEN;
- gcmp_special_blocks(skb, pn, j_0, aad);
+ gcmp_special_blocks(skb, pn, j_0, aad,
+ key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU);
return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len,
skb_put(skb, IEEE80211_GCMP_MIC_LEN));
}
@@ -753,7 +765,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
u8 aad[2 * AES_BLOCK_SIZE];
u8 j_0[AES_BLOCK_SIZE];
/* hardware didn't decrypt/verify MIC */
- gcmp_special_blocks(skb, pn, j_0, aad);
+ gcmp_special_blocks(skb, pn, j_0, aad,
+ key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU);
if (ieee80211_aes_gcm_decrypt(
key->u.gcmp.tfm, j_0, aad,
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 8d2eabc71bbe..f13b07ebfb98 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -265,19 +265,27 @@ fail:
return -ENOMEM;
}
+static void mac802154_llsec_key_del_rcu(struct rcu_head *rcu)
+{
+ struct ieee802154_llsec_key_entry *pos;
+ struct mac802154_llsec_key *mkey;
+
+ pos = container_of(rcu, struct ieee802154_llsec_key_entry, rcu);
+ mkey = container_of(pos->key, struct mac802154_llsec_key, key);
+
+ llsec_key_put(mkey);
+ kfree_sensitive(pos);
+}
+
int mac802154_llsec_key_del(struct mac802154_llsec *sec,
const struct ieee802154_llsec_key_id *key)
{
struct ieee802154_llsec_key_entry *pos;
list_for_each_entry(pos, &sec->table.keys, list) {
- struct mac802154_llsec_key *mkey;
-
- mkey = container_of(pos->key, struct mac802154_llsec_key, key);
-
if (llsec_key_id_equal(&pos->id, key)) {
list_del_rcu(&pos->list);
- llsec_key_put(mkey);
+ call_rcu(&pos->rcu, mac802154_llsec_key_del_rcu);
return 0;
}
}
diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig
index 3a5c0e70da77..d8d3413a37f7 100644
--- a/net/mctp/Kconfig
+++ b/net/mctp/Kconfig
@@ -14,6 +14,7 @@ menuconfig MCTP
config MCTP_TEST
bool "MCTP core tests" if !KUNIT_ALL_TESTS
+ select MCTP_FLOWS
depends on MCTP=y && KUNIT=y
default KUNIT_ALL_TESTS
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index f6be58b68c6f..de52a9191da0 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -350,30 +350,102 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
}
-static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg)
+/* helpers for reading/writing the tag ioc, handling compatibility across the
+ * two versions, and some basic API error checking
+ */
+static int mctp_ioctl_tag_copy_from_user(unsigned long arg,
+ struct mctp_ioc_tag_ctl2 *ctl,
+ bool tagv2)
+{
+ struct mctp_ioc_tag_ctl ctl_compat;
+ unsigned long size;
+ void *ptr;
+ int rc;
+
+ if (tagv2) {
+ size = sizeof(*ctl);
+ ptr = ctl;
+ } else {
+ size = sizeof(ctl_compat);
+ ptr = &ctl_compat;
+ }
+
+ rc = copy_from_user(ptr, (void __user *)arg, size);
+ if (rc)
+ return -EFAULT;
+
+ if (!tagv2) {
+ /* compat, using defaults for new fields */
+ ctl->net = MCTP_INITIAL_DEFAULT_NET;
+ ctl->peer_addr = ctl_compat.peer_addr;
+ ctl->local_addr = MCTP_ADDR_ANY;
+ ctl->flags = ctl_compat.flags;
+ ctl->tag = ctl_compat.tag;
+ }
+
+ if (ctl->flags)
+ return -EINVAL;
+
+ if (ctl->local_addr != MCTP_ADDR_ANY &&
+ ctl->local_addr != MCTP_ADDR_NULL)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int mctp_ioctl_tag_copy_to_user(unsigned long arg,
+ struct mctp_ioc_tag_ctl2 *ctl,
+ bool tagv2)
+{
+ struct mctp_ioc_tag_ctl ctl_compat;
+ unsigned long size;
+ void *ptr;
+ int rc;
+
+ if (tagv2) {
+ ptr = ctl;
+ size = sizeof(*ctl);
+ } else {
+ ctl_compat.peer_addr = ctl->peer_addr;
+ ctl_compat.tag = ctl->tag;
+ ctl_compat.flags = ctl->flags;
+
+ ptr = &ctl_compat;
+ size = sizeof(ctl_compat);
+ }
+
+ rc = copy_to_user((void __user *)arg, ptr, size);
+ if (rc)
+ return -EFAULT;
+
+ return 0;
+}
+
+static int mctp_ioctl_alloctag(struct mctp_sock *msk, bool tagv2,
+ unsigned long arg)
{
struct net *net = sock_net(&msk->sk);
struct mctp_sk_key *key = NULL;
- struct mctp_ioc_tag_ctl ctl;
+ struct mctp_ioc_tag_ctl2 ctl;
unsigned long flags;
u8 tag;
+ int rc;
- if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
- return -EFAULT;
+ rc = mctp_ioctl_tag_copy_from_user(arg, &ctl, tagv2);
+ if (rc)
+ return rc;
if (ctl.tag)
return -EINVAL;
- if (ctl.flags)
- return -EINVAL;
-
- key = mctp_alloc_local_tag(msk, ctl.peer_addr, MCTP_ADDR_ANY,
- true, &tag);
+ key = mctp_alloc_local_tag(msk, ctl.net, MCTP_ADDR_ANY,
+ ctl.peer_addr, true, &tag);
if (IS_ERR(key))
return PTR_ERR(key);
ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC;
- if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) {
+ rc = mctp_ioctl_tag_copy_to_user(arg, &ctl, tagv2);
+ if (rc) {
unsigned long fl2;
/* Unwind our key allocation: the keys list lock needs to be
* taken before the individual key locks, and we need a valid
@@ -385,28 +457,27 @@ static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg)
__mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_DROPPED);
mctp_key_unref(key);
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
- return -EFAULT;
+ return rc;
}
mctp_key_unref(key);
return 0;
}
-static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
+static int mctp_ioctl_droptag(struct mctp_sock *msk, bool tagv2,
+ unsigned long arg)
{
struct net *net = sock_net(&msk->sk);
- struct mctp_ioc_tag_ctl ctl;
+ struct mctp_ioc_tag_ctl2 ctl;
unsigned long flags, fl2;
struct mctp_sk_key *key;
struct hlist_node *tmp;
int rc;
u8 tag;
- if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
- return -EFAULT;
-
- if (ctl.flags)
- return -EINVAL;
+ rc = mctp_ioctl_tag_copy_from_user(arg, &ctl, tagv2);
+ if (rc)
+ return rc;
/* Must be a local tag, TO set, preallocated */
if ((ctl.tag & ~MCTP_TAG_MASK) != (MCTP_TAG_OWNER | MCTP_TAG_PREALLOC))
@@ -422,6 +493,7 @@ static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
*/
spin_lock_irqsave(&key->lock, fl2);
if (key->manual_alloc &&
+ ctl.net == key->net &&
ctl.peer_addr == key->peer_addr &&
tag == key->tag) {
__mctp_key_remove(key, net, fl2,
@@ -439,12 +511,17 @@ static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
static int mctp_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);
+ bool tagv2 = false;
switch (cmd) {
+ case SIOCMCTPALLOCTAG2:
case SIOCMCTPALLOCTAG:
- return mctp_ioctl_alloctag(msk, arg);
+ tagv2 = cmd == SIOCMCTPALLOCTAG2;
+ return mctp_ioctl_alloctag(msk, tagv2, arg);
case SIOCMCTPDROPTAG:
- return mctp_ioctl_droptag(msk, arg);
+ case SIOCMCTPDROPTAG2:
+ tagv2 = cmd == SIOCMCTPDROPTAG2;
+ return mctp_ioctl_droptag(msk, tagv2, arg);
}
return -EINVAL;
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 7a47a58aa54b..eefd7834d9a0 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -73,13 +73,50 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
return NULL;
}
-static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
- mctp_eid_t peer, u8 tag)
+/* A note on the key allocations.
+ *
+ * struct net->mctp.keys contains our set of currently-allocated keys for
+ * MCTP tag management. The lookup tuple for these is the peer EID,
+ * local EID and MCTP tag.
+ *
+ * In some cases, the peer EID may be MCTP_EID_ANY: for example, when a
+ * broadcast message is sent, we may receive responses from any peer EID.
+ * Because the broadcast dest address is equivalent to ANY, we create
+ * a key with (local = local-eid, peer = ANY). This allows a match on the
+ * incoming broadcast responses from any peer.
+ *
+ * We perform lookups when packets are received, and when tags are allocated
+ * in two scenarios:
+ *
+ * - when a packet is sent, with a locally-owned tag: we need to find an
+ * unused tag value for the (local, peer) EID pair.
+ *
+ * - when a tag is manually allocated: we need to find an unused tag value
+ * for the peer EID, but don't have a specific local EID at that stage.
+ *
+ * in the latter case, on successful allocation, we end up with a tag with
+ * (local = ANY, peer = peer-eid).
+ *
+ * So, the key set allows both a local EID of ANY, as well as a peer EID of
+ * ANY in the lookup tuple. Both may be ANY if we prealloc for a broadcast.
+ * The matching (in mctp_key_match()) during lookup allows the match value to
+ * be ANY in either the dest or source addresses.
+ *
+ * When allocating (+ inserting) a tag, we need to check for conflicts amongst
+ * the existing tag set. This requires macthing either exactly on the local
+ * and peer addresses, or either being ANY.
+ */
+
+static bool mctp_key_match(struct mctp_sk_key *key, unsigned int net,
+ mctp_eid_t local, mctp_eid_t peer, u8 tag)
{
+ if (key->net != net)
+ return false;
+
if (!mctp_address_matches(key->local_addr, local))
return false;
- if (key->peer_addr != peer)
+ if (!mctp_address_matches(key->peer_addr, peer))
return false;
if (key->tag != tag)
@@ -92,7 +129,7 @@ static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
* key exists.
*/
static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
- mctp_eid_t peer,
+ unsigned int netid, mctp_eid_t peer,
unsigned long *irqflags)
__acquires(&key->lock)
{
@@ -108,7 +145,7 @@ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry(key, &net->mctp.keys, hlist) {
- if (!mctp_key_match(key, mh->dest, peer, tag))
+ if (!mctp_key_match(key, netid, mh->dest, peer, tag))
continue;
spin_lock(&key->lock);
@@ -131,6 +168,7 @@ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
}
static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
+ unsigned int net,
mctp_eid_t local, mctp_eid_t peer,
u8 tag, gfp_t gfp)
{
@@ -140,6 +178,7 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
if (!key)
return NULL;
+ key->net = net;
key->peer_addr = peer;
key->local_addr = local;
key->tag = tag;
@@ -185,8 +224,8 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
}
hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
- if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
- key->tag)) {
+ if (mctp_key_match(tmp, key->net, key->local_addr,
+ key->peer_addr, key->tag)) {
spin_lock(&tmp->lock);
if (tmp->valid)
rc = -EEXIST;
@@ -327,6 +366,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
struct mctp_sock *msk;
struct mctp_hdr *mh;
+ unsigned int netid;
unsigned long f;
u8 tag, flags;
int rc;
@@ -345,6 +385,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
/* grab header, advance data ptr */
mh = mctp_hdr(skb);
+ netid = mctp_cb(skb)->net;
skb_pull(skb, sizeof(struct mctp_hdr));
if (mh->ver != 1)
@@ -358,7 +399,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
/* lookup socket / reasm context, exactly matching (src,dest,tag).
* we hold a ref on the key, and key->lock held.
*/
- key = mctp_lookup_key(net, skb, mh->src, &f);
+ key = mctp_lookup_key(net, skb, netid, mh->src, &f);
if (flags & MCTP_HDR_FLAG_SOM) {
if (key) {
@@ -368,8 +409,12 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* key lookup to find the socket, but don't use this
* key for reassembly - we'll create a more specific
* one for future packets if required (ie, !EOM).
+ *
+ * this lookup requires key->peer to be MCTP_ADDR_ANY,
+ * it doesn't match just any key->peer.
*/
- any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
+ any_key = mctp_lookup_key(net, skb, netid,
+ MCTP_ADDR_ANY, &f);
if (any_key) {
msk = container_of(any_key->sk,
struct mctp_sock, sk);
@@ -406,7 +451,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* packets for this message
*/
if (!key) {
- key = mctp_key_alloc(msk, mh->dest, mh->src,
+ key = mctp_key_alloc(msk, netid, mh->dest, mh->src,
tag, GFP_ATOMIC);
if (!key) {
rc = -ENOMEM;
@@ -596,11 +641,12 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
refcount_inc(&key->refs);
}
-/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
+/* Allocate a locally-owned tag value for (local, peer), and reserve
* it for the socket msk
*/
struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
- mctp_eid_t daddr, mctp_eid_t saddr,
+ unsigned int netid,
+ mctp_eid_t local, mctp_eid_t peer,
bool manual, u8 *tagp)
{
struct net *net = sock_net(&msk->sk);
@@ -610,11 +656,11 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
u8 tagbits;
/* for NULL destination EIDs, we may get a response from any peer */
- if (daddr == MCTP_ADDR_NULL)
- daddr = MCTP_ADDR_ANY;
+ if (peer == MCTP_ADDR_NULL)
+ peer = MCTP_ADDR_ANY;
/* be optimistic, alloc now */
- key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
+ key = mctp_key_alloc(msk, netid, local, peer, 0, GFP_KERNEL);
if (!key)
return ERR_PTR(-ENOMEM);
@@ -631,12 +677,24 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
* lock held, they don't change over the lifetime of the key.
*/
+ /* tags are net-specific */
+ if (tmp->net != netid)
+ continue;
+
/* if we don't own the tag, it can't conflict */
if (tmp->tag & MCTP_HDR_FLAG_TO)
continue;
- if (!(mctp_address_matches(tmp->peer_addr, daddr) &&
- mctp_address_matches(tmp->local_addr, saddr)))
+ /* Since we're avoiding conflicting entries, match peer and
+ * local addresses, including with a wildcard on ANY. See
+ * 'A note on key allocations' for background.
+ */
+ if (peer != MCTP_ADDR_ANY &&
+ !mctp_address_matches(tmp->peer_addr, peer))
+ continue;
+
+ if (local != MCTP_ADDR_ANY &&
+ !mctp_address_matches(tmp->local_addr, local))
continue;
spin_lock(&tmp->lock);
@@ -663,7 +721,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
spin_unlock_irqrestore(&mns->keys_lock, flags);
if (!tagbits) {
- kfree(key);
+ mctp_key_unref(key);
return ERR_PTR(-EBUSY);
}
@@ -671,6 +729,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
}
static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
+ unsigned int netid,
mctp_eid_t daddr,
u8 req_tag, u8 *tagp)
{
@@ -685,6 +744,9 @@ static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
spin_lock_irqsave(&mns->keys_lock, flags);
hlist_for_each_entry(tmp, &mns->keys, hlist) {
+ if (tmp->net != netid)
+ continue;
+
if (tmp->tag != req_tag)
continue;
@@ -843,6 +905,9 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
/* copy message payload */
skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
+ /* we need to copy the extensions, for MCTP flow data */
+ skb_ext_copy(skb2, skb);
+
/* do route */
rc = rt->output(rt, skb2);
if (rc)
@@ -865,6 +930,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct mctp_sk_key *key;
struct mctp_hdr *hdr;
unsigned long flags;
+ unsigned int netid;
unsigned int mtu;
mctp_eid_t saddr;
bool ext_rt;
@@ -888,7 +954,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
if (!dev) {
rcu_read_unlock();
- return rc;
+ goto out_free;
}
rt->dev = __mctp_dev_get(dev);
rcu_read_unlock();
@@ -903,7 +969,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rt->mtu = 0;
} else {
- return -EINVAL;
+ rc = -EINVAL;
+ goto out_free;
}
spin_lock_irqsave(&rt->dev->addrs_lock, flags);
@@ -915,16 +982,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rc = 0;
}
spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
+ netid = READ_ONCE(rt->dev->net);
if (rc)
goto out_release;
if (req_tag & MCTP_TAG_OWNER) {
if (req_tag & MCTP_TAG_PREALLOC)
- key = mctp_lookup_prealloc_tag(msk, daddr,
+ key = mctp_lookup_prealloc_tag(msk, netid, daddr,
req_tag, &tag);
else
- key = mctp_alloc_local_tag(msk, daddr, saddr,
+ key = mctp_alloc_local_tag(msk, netid, saddr, daddr,
false, &tag);
if (IS_ERR(key)) {
@@ -966,12 +1034,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rc = mctp_do_fragment_route(rt, skb, mtu, tag);
}
+ /* route output functions consume the skb, even on error */
+ skb = NULL;
+
out_release:
if (!ext_rt)
mctp_route_release(rt);
mctp_dev_put(tmp_rt.dev);
+out_free:
+ kfree_skb(skb);
return rc;
}
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 92ea4158f7fc..77e5dd422258 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -79,6 +79,16 @@ static void mctp_test_route_destroy(struct kunit *test,
kfree_rcu(&rt->rt, rcu);
}
+static void mctp_test_skb_set_dev(struct sk_buff *skb,
+ struct mctp_test_dev *dev)
+{
+ struct mctp_skb_cb *cb;
+
+ cb = mctp_cb(skb);
+ cb->net = READ_ONCE(dev->mdev->net);
+ skb->dev = dev->ndev;
+}
+
static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr,
unsigned int data_len)
{
@@ -91,6 +101,7 @@ static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr,
if (!skb)
return NULL;
+ __mctp_cb(skb);
memcpy(skb_put(skb, hdr_len), hdr, hdr_len);
buf = skb_put(skb, data_len);
@@ -111,6 +122,7 @@ static struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr,
if (!skb)
return NULL;
+ __mctp_cb(skb);
memcpy(skb_put(skb, hdr_len), hdr, hdr_len);
memcpy(skb_put(skb, data_len), data, data_len);
@@ -249,8 +261,6 @@ static void mctp_test_rx_input(struct kunit *test)
skb = mctp_test_create_skb(&params->hdr, 1);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
- __mctp_cb(skb);
-
mctp_pkttype_receive(skb, dev->ndev, &mctp_packet_type, NULL);
KUNIT_EXPECT_EQ(test, !!rt->pkts.qlen, params->input);
@@ -283,7 +293,8 @@ KUNIT_ARRAY_PARAM(mctp_rx_input, mctp_rx_input_tests,
static void __mctp_route_test_init(struct kunit *test,
struct mctp_test_dev **devp,
struct mctp_test_route **rtp,
- struct socket **sockp)
+ struct socket **sockp,
+ unsigned int netid)
{
struct sockaddr_mctp addr = {0};
struct mctp_test_route *rt;
@@ -293,6 +304,8 @@ static void __mctp_route_test_init(struct kunit *test,
dev = mctp_test_create_dev();
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+ if (netid != MCTP_NET_ANY)
+ WRITE_ONCE(dev->mdev->net, netid);
rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
@@ -301,7 +314,7 @@ static void __mctp_route_test_init(struct kunit *test,
KUNIT_ASSERT_EQ(test, rc, 0);
addr.smctp_family = AF_MCTP;
- addr.smctp_network = MCTP_NET_ANY;
+ addr.smctp_network = netid;
addr.smctp_addr.s_addr = 8;
addr.smctp_type = 0;
rc = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr));
@@ -339,13 +352,12 @@ static void mctp_test_route_input_sk(struct kunit *test)
params = test->param_value;
- __mctp_route_test_init(test, &dev, &rt, &sock);
+ __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
skb = mctp_test_create_skb_data(&params->hdr, &params->type);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
- skb->dev = dev->ndev;
- __mctp_cb(skb);
+ mctp_test_skb_set_dev(skb, dev);
rc = mctp_route_input(&rt->rt, skb);
@@ -410,15 +422,14 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
params = test->param_value;
- __mctp_route_test_init(test, &dev, &rt, &sock);
+ __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
for (i = 0; i < params->n_hdrs; i++) {
c = i;
skb = mctp_test_create_skb_data(&params->hdrs[i], &c);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
- skb->dev = dev->ndev;
- __mctp_cb(skb);
+ mctp_test_skb_set_dev(skb, dev);
rc = mctp_route_input(&rt->rt, skb);
}
@@ -544,6 +555,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
struct mctp_sock *msk;
struct socket *sock;
unsigned long flags;
+ unsigned int net;
int rc;
u8 c;
@@ -551,6 +563,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
dev = mctp_test_create_dev();
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+ net = READ_ONCE(dev->mdev->net);
rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
@@ -562,8 +575,9 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
mns = &sock_net(sock->sk)->mctp;
/* set the incoming tag according to test params */
- key = mctp_key_alloc(msk, params->key_local_addr, params->key_peer_addr,
- params->key_tag, GFP_KERNEL);
+ key = mctp_key_alloc(msk, net, params->key_local_addr,
+ params->key_peer_addr, params->key_tag,
+ GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, key);
@@ -576,8 +590,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
skb = mctp_test_create_skb_data(&params->hdr, &c);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
- skb->dev = dev->ndev;
- __mctp_cb(skb);
+ mctp_test_skb_set_dev(skb, dev);
rc = mctp_route_input(&rt->rt, skb);
@@ -665,6 +678,373 @@ static void mctp_route_input_sk_keys_to_desc(
KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests,
mctp_route_input_sk_keys_to_desc);
+struct test_net {
+ unsigned int netid;
+ struct mctp_test_dev *dev;
+ struct mctp_test_route *rt;
+ struct socket *sock;
+ struct sk_buff *skb;
+ struct mctp_sk_key *key;
+ struct {
+ u8 type;
+ unsigned int data;
+ } msg;
+};
+
+static void
+mctp_test_route_input_multiple_nets_bind_init(struct kunit *test,
+ struct test_net *t)
+{
+ struct mctp_hdr hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1) | FL_TO);
+
+ t->msg.data = t->netid;
+
+ __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid);
+
+ t->skb = mctp_test_create_skb_data(&hdr, &t->msg);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->skb);
+ mctp_test_skb_set_dev(t->skb, t->dev);
+}
+
+static void
+mctp_test_route_input_multiple_nets_bind_fini(struct kunit *test,
+ struct test_net *t)
+{
+ __mctp_route_test_fini(test, t->dev, t->rt, t->sock);
+}
+
+/* Test that skbs from different nets (otherwise identical) get routed to their
+ * corresponding socket via the sockets' bind()
+ */
+static void mctp_test_route_input_multiple_nets_bind(struct kunit *test)
+{
+ struct sk_buff *rx_skb1, *rx_skb2;
+ struct test_net t1, t2;
+ int rc;
+
+ t1.netid = 1;
+ t2.netid = 2;
+
+ t1.msg.type = 0;
+ t2.msg.type = 0;
+
+ mctp_test_route_input_multiple_nets_bind_init(test, &t1);
+ mctp_test_route_input_multiple_nets_bind_init(test, &t2);
+
+ rc = mctp_route_input(&t1.rt->rt, t1.skb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+ rc = mctp_route_input(&t2.rt->rt, t2.skb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb1);
+ KUNIT_EXPECT_EQ(test, rx_skb1->len, sizeof(t1.msg));
+ KUNIT_EXPECT_EQ(test,
+ *(unsigned int *)skb_pull(rx_skb1, sizeof(t1.msg.data)),
+ t1.netid);
+ kfree_skb(rx_skb1);
+
+ rx_skb2 = skb_recv_datagram(t2.sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb2);
+ KUNIT_EXPECT_EQ(test, rx_skb2->len, sizeof(t2.msg));
+ KUNIT_EXPECT_EQ(test,
+ *(unsigned int *)skb_pull(rx_skb2, sizeof(t2.msg.data)),
+ t2.netid);
+ kfree_skb(rx_skb2);
+
+ mctp_test_route_input_multiple_nets_bind_fini(test, &t1);
+ mctp_test_route_input_multiple_nets_bind_fini(test, &t2);
+}
+
+static void
+mctp_test_route_input_multiple_nets_key_init(struct kunit *test,
+ struct test_net *t)
+{
+ struct mctp_hdr hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1));
+ struct mctp_sock *msk;
+ struct netns_mctp *mns;
+ unsigned long flags;
+
+ t->msg.data = t->netid;
+
+ __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid);
+
+ msk = container_of(t->sock->sk, struct mctp_sock, sk);
+
+ t->key = mctp_key_alloc(msk, t->netid, hdr.dest, hdr.src, 1, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->key);
+
+ mns = &sock_net(t->sock->sk)->mctp;
+ spin_lock_irqsave(&mns->keys_lock, flags);
+ mctp_reserve_tag(&init_net, t->key, msk);
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->key);
+ t->skb = mctp_test_create_skb_data(&hdr, &t->msg);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->skb);
+ mctp_test_skb_set_dev(t->skb, t->dev);
+}
+
+static void
+mctp_test_route_input_multiple_nets_key_fini(struct kunit *test,
+ struct test_net *t)
+{
+ mctp_key_unref(t->key);
+ __mctp_route_test_fini(test, t->dev, t->rt, t->sock);
+}
+
+/* test that skbs from different nets (otherwise identical) get routed to their
+ * corresponding socket via the sk_key
+ */
+static void mctp_test_route_input_multiple_nets_key(struct kunit *test)
+{
+ struct sk_buff *rx_skb1, *rx_skb2;
+ struct test_net t1, t2;
+ int rc;
+
+ t1.netid = 1;
+ t2.netid = 2;
+
+ /* use type 1 which is not bound */
+ t1.msg.type = 1;
+ t2.msg.type = 1;
+
+ mctp_test_route_input_multiple_nets_key_init(test, &t1);
+ mctp_test_route_input_multiple_nets_key_init(test, &t2);
+
+ rc = mctp_route_input(&t1.rt->rt, t1.skb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+ rc = mctp_route_input(&t2.rt->rt, t2.skb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb1);
+ KUNIT_EXPECT_EQ(test, rx_skb1->len, sizeof(t1.msg));
+ KUNIT_EXPECT_EQ(test,
+ *(unsigned int *)skb_pull(rx_skb1, sizeof(t1.msg.data)),
+ t1.netid);
+ kfree_skb(rx_skb1);
+
+ rx_skb2 = skb_recv_datagram(t2.sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb2);
+ KUNIT_EXPECT_EQ(test, rx_skb2->len, sizeof(t2.msg));
+ KUNIT_EXPECT_EQ(test,
+ *(unsigned int *)skb_pull(rx_skb2, sizeof(t2.msg.data)),
+ t2.netid);
+ kfree_skb(rx_skb2);
+
+ mctp_test_route_input_multiple_nets_key_fini(test, &t1);
+ mctp_test_route_input_multiple_nets_key_fini(test, &t2);
+}
+
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+
+static void mctp_test_flow_init(struct kunit *test,
+ struct mctp_test_dev **devp,
+ struct mctp_test_route **rtp,
+ struct socket **sock,
+ struct sk_buff **skbp,
+ unsigned int len)
+{
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct sk_buff *skb;
+
+ /* we have a slightly odd routing setup here; the test route
+ * is for EID 8, which is our local EID. We don't do a routing
+ * lookup, so that's fine - all we require is a path through
+ * mctp_local_output, which will call rt->output on whatever
+ * route we provide
+ */
+ __mctp_route_test_init(test, &dev, &rt, sock, MCTP_NET_ANY);
+
+ /* Assign a single EID. ->addrs is freed on mctp netdev release */
+ dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL);
+ dev->mdev->num_addrs = 1;
+ dev->mdev->addrs[0] = 8;
+
+ skb = alloc_skb(len + sizeof(struct mctp_hdr) + 1, GFP_KERNEL);
+ KUNIT_ASSERT_TRUE(test, skb);
+ __mctp_cb(skb);
+ skb_reserve(skb, sizeof(struct mctp_hdr) + 1);
+ memset(skb_put(skb, len), 0, len);
+
+ /* take a ref for the route, we'll decrement in local output */
+ refcount_inc(&rt->rt.refs);
+
+ *devp = dev;
+ *rtp = rt;
+ *skbp = skb;
+}
+
+static void mctp_test_flow_fini(struct kunit *test,
+ struct mctp_test_dev *dev,
+ struct mctp_test_route *rt,
+ struct socket *sock)
+{
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
+/* test that an outgoing skb has the correct MCTP extension data set */
+static void mctp_test_packet_flow(struct kunit *test)
+{
+ struct sk_buff *skb, *skb2;
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct mctp_flow *flow;
+ struct socket *sock;
+ u8 dst = 8;
+ int n, rc;
+
+ mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 30);
+
+ rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ n = rt->pkts.qlen;
+ KUNIT_ASSERT_EQ(test, n, 1);
+
+ skb2 = skb_dequeue(&rt->pkts);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2);
+
+ flow = skb_ext_find(skb2, SKB_EXT_MCTP);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flow);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flow->key);
+ KUNIT_ASSERT_PTR_EQ(test, flow->key->sk, sock->sk);
+
+ kfree_skb(skb2);
+ mctp_test_flow_fini(test, dev, rt, sock);
+}
+
+/* test that outgoing skbs, after fragmentation, all have the correct MCTP
+ * extension data set.
+ */
+static void mctp_test_fragment_flow(struct kunit *test)
+{
+ struct mctp_flow *flows[2];
+ struct sk_buff *tx_skbs[2];
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct sk_buff *skb;
+ struct socket *sock;
+ u8 dst = 8;
+ int n, rc;
+
+ mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 100);
+
+ rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ n = rt->pkts.qlen;
+ KUNIT_ASSERT_EQ(test, n, 2);
+
+ /* both resulting packets should have the same flow data */
+ tx_skbs[0] = skb_dequeue(&rt->pkts);
+ tx_skbs[1] = skb_dequeue(&rt->pkts);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[0]);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[1]);
+
+ flows[0] = skb_ext_find(tx_skbs[0], SKB_EXT_MCTP);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[0]);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[0]->key);
+ KUNIT_ASSERT_PTR_EQ(test, flows[0]->key->sk, sock->sk);
+
+ flows[1] = skb_ext_find(tx_skbs[1], SKB_EXT_MCTP);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[1]);
+ KUNIT_ASSERT_PTR_EQ(test, flows[1]->key, flows[0]->key);
+
+ kfree_skb(tx_skbs[0]);
+ kfree_skb(tx_skbs[1]);
+ mctp_test_flow_fini(test, dev, rt, sock);
+}
+
+#else
+static void mctp_test_packet_flow(struct kunit *test)
+{
+ kunit_skip(test, "Requires CONFIG_MCTP_FLOWS=y");
+}
+
+static void mctp_test_fragment_flow(struct kunit *test)
+{
+ kunit_skip(test, "Requires CONFIG_MCTP_FLOWS=y");
+}
+#endif
+
+/* Test that outgoing skbs cause a suitable tag to be created */
+static void mctp_test_route_output_key_create(struct kunit *test)
+{
+ const unsigned int netid = 50;
+ const u8 dst = 26, src = 15;
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct mctp_sk_key *key;
+ struct netns_mctp *mns;
+ unsigned long flags;
+ struct socket *sock;
+ struct sk_buff *skb;
+ bool empty, single;
+ const int len = 2;
+ int rc;
+
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+ WRITE_ONCE(dev->mdev->net, netid);
+
+ rt = mctp_test_create_route(&init_net, dev->mdev, dst, 68);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+
+ rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL);
+ dev->mdev->num_addrs = 1;
+ dev->mdev->addrs[0] = src;
+
+ skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL);
+ KUNIT_ASSERT_TRUE(test, skb);
+ __mctp_cb(skb);
+ skb_reserve(skb, sizeof(struct mctp_hdr) + 1 + len);
+ memset(skb_put(skb, len), 0, len);
+
+ refcount_inc(&rt->rt.refs);
+
+ mns = &sock_net(sock->sk)->mctp;
+
+ /* We assume we're starting from an empty keys list, which requires
+ * preceding tests to clean up correctly!
+ */
+ spin_lock_irqsave(&mns->keys_lock, flags);
+ empty = hlist_empty(&mns->keys);
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+ KUNIT_ASSERT_TRUE(test, empty);
+
+ rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ key = NULL;
+ single = false;
+ spin_lock_irqsave(&mns->keys_lock, flags);
+ if (!hlist_empty(&mns->keys)) {
+ key = hlist_entry(mns->keys.first, struct mctp_sk_key, hlist);
+ single = hlist_is_singular_node(&key->hlist, &mns->keys);
+ }
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ KUNIT_ASSERT_NOT_NULL(test, key);
+ KUNIT_ASSERT_TRUE(test, single);
+
+ KUNIT_EXPECT_EQ(test, key->net, netid);
+ KUNIT_EXPECT_EQ(test, key->local_addr, src);
+ KUNIT_EXPECT_EQ(test, key->peer_addr, dst);
+ /* key has incoming tag, so inverse of what we sent */
+ KUNIT_EXPECT_FALSE(test, key->tag & MCTP_TAG_OWNER);
+
+ sock_release(sock);
+ mctp_test_route_destroy(test, rt);
+ mctp_test_destroy_dev(dev);
+}
+
static struct kunit_case mctp_test_cases[] = {
KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params),
KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params),
@@ -673,6 +1053,11 @@ static struct kunit_case mctp_test_cases[] = {
mctp_route_input_sk_reasm_gen_params),
KUNIT_CASE_PARAM(mctp_test_route_input_sk_keys,
mctp_route_input_sk_keys_gen_params),
+ KUNIT_CASE(mctp_test_route_input_multiple_nets_bind),
+ KUNIT_CASE(mctp_test_route_input_multiple_nets_key),
+ KUNIT_CASE(mctp_test_packet_flow),
+ KUNIT_CASE(mctp_test_fragment_flow),
+ KUNIT_CASE(mctp_test_route_output_key_create),
{}
};
diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c
index e03ba66bbe18..565763eb0211 100644
--- a/net/mctp/test/utils.c
+++ b/net/mctp/test/utils.c
@@ -4,6 +4,7 @@
#include <linux/mctp.h>
#include <linux/if_arp.h>
+#include <net/mctp.h>
#include <net/mctpdevice.h>
#include <net/pkt_sched.h>
@@ -54,6 +55,7 @@ struct mctp_test_dev *mctp_test_create_dev(void)
rcu_read_lock();
dev->mdev = __mctp_dev_get(ndev);
+ dev->mdev->net = mctp_default_net(dev_net(ndev));
rcu_read_unlock();
return dev;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 1af29af65388..6dab883a08dd 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2179,7 +2179,9 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct mpls_route __rcu **platform_label;
- struct fib_dump_filter filter = {};
+ struct fib_dump_filter filter = {
+ .rtnl_held = true,
+ };
unsigned int flags = NLM_F_MULTI;
size_t platform_labels;
unsigned int index;
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 533d082f0701..45d1e6a157fc 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -27,6 +27,9 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
__be16 mpls_protocol;
unsigned int mpls_hlen;
+ if (!skb_inner_network_header_was_set(skb))
+ goto out;
+
skb_reset_network_header(skb);
mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb);
if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN))
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index ef59e25dc482..8fc790f2a01b 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -55,8 +55,6 @@ static int mpls_xmit(struct sk_buff *skb)
out_dev = dst->dev;
net = dev_net(out_dev);
- skb_orphan(skb);
-
if (!mpls_output_possible(out_dev) ||
!dst->lwtstate || skb_warn_if_lro(skb))
goto drop;
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index a536586742f2..3ae46b545d2c 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -10,20 +10,24 @@
#include <linux/net.h>
#include <linux/inet_diag.h>
#include <net/netlink.h>
-#include <uapi/linux/mptcp.h>
#include "protocol.h"
-static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
+static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *sf;
struct nlattr *start;
u32 flags = 0;
+ bool slow;
int err;
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ return 0;
+
start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
if (!start)
return -EMSGSIZE;
+ slow = lock_sock_fast(sk);
rcu_read_lock();
sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
if (!sf) {
@@ -63,17 +67,19 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
sf->map_data_len) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) ||
nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) ||
- nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) {
+ nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) {
err = -EMSGSIZE;
goto nla_failure;
}
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_end(skb, start);
return 0;
nla_failure:
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_cancel(skb, start);
return err;
}
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index 74698582a285..ad28da655f8b 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
mptcp_data_unlock(sk);
}
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
struct sock *sk = (struct sock *)msk;
struct sk_buff *skb;
- mptcp_data_lock(sk);
skb = skb_peek_tail(&sk->sk_receive_queue);
if (skb) {
WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
@@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_
}
pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
- mptcp_data_unlock(sk);
}
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index 5409c2ea3f57..0566dd793810 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -10,7 +10,6 @@
#include <linux/net.h>
#include <linux/inet_diag.h>
#include <net/netlink.h>
-#include <uapi/linux/mptcp.h>
#include "protocol.h"
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
@@ -225,6 +224,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
}
static const struct inet_diag_handler mptcp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = mptcp_diag_dump,
.dump_one = mptcp_diag_dump_one,
.idiag_get_info = mptcp_diag_get_info,
diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c
index 670da7822e6c..c30a2a90a192 100644
--- a/net/mptcp/mptcp_pm_gen.c
+++ b/net/mptcp/mptcp_pm_gen.c
@@ -32,8 +32,9 @@ const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]
};
/* MPTCP_PM_CMD_GET_ADDR - do */
-const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = {
- [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = {
+ [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
};
/* MPTCP_PM_CMD_FLUSH_ADDRS - do */
@@ -110,7 +111,7 @@ const struct genl_ops mptcp_pm_nl_ops[11] = {
.doit = mptcp_pm_nl_get_addr_doit,
.dumpit = mptcp_pm_nl_get_addr_dumpit,
.policy = mptcp_pm_get_addr_nl_policy,
- .maxattr = MPTCP_PM_ENDPOINT_ADDR,
+ .maxattr = MPTCP_PM_ATTR_TOKEN,
.flags = GENL_UNS_ADMIN_PERM,
},
{
diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h
index ac9fc7225b6a..e24258f6f819 100644
--- a/net/mptcp/mptcp_pm_gen.h
+++ b/net/mptcp/mptcp_pm_gen.h
@@ -18,7 +18,7 @@ extern const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADD
extern const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
-extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
+extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ATTR_TOKEN + 1];
extern const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index d2527d189a79..27ca42c77b02 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -689,8 +689,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
- opts->ahmac = add_addr_generate_hmac(msk->local_key,
- msk->remote_key,
+ opts->ahmac = add_addr_generate_hmac(READ_ONCE(msk->local_key),
+ READ_ONCE(msk->remote_key),
&opts->addr);
} else {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
@@ -792,7 +792,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk,
*size = TCPOLEN_MPTCP_FASTCLOSE;
opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
- opts->rcvr_key = msk->remote_key;
+ opts->rcvr_key = READ_ONCE(msk->remote_key);
pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
@@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
- subflow->fully_established = 1;
- WRITE_ONCE(msk->fully_established, true);
- goto check_notify;
+ goto set_fully_established;
}
/* If the first established packet does not contain MP_CAPABLE + data
@@ -983,10 +981,13 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
-set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
- mptcp_subflow_fully_established(subflow, mp_opt);
+
+set_fully_established:
+ mptcp_data_lock((struct sock *)msk);
+ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
+ mptcp_data_unlock((struct sock *)msk);
check_notify:
/* if the subflow is not already linked into the conn_list, we can't
@@ -1030,7 +1031,7 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
{
msk->bytes_acked += new_snd_una - msk->snd_una;
- msk->snd_una = new_snd_una;
+ WRITE_ONCE(msk->snd_una, new_snd_una);
}
static void ack_update_msk(struct mptcp_sock *msk,
@@ -1057,10 +1058,10 @@ static void ack_update_msk(struct mptcp_sock *msk,
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
if (after64(new_wnd_end, msk->wnd_end))
- msk->wnd_end = new_wnd_end;
+ WRITE_ONCE(msk->wnd_end, new_wnd_end);
/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
- if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
+ if (after64(msk->wnd_end, snd_nxt))
__mptcp_check_push(sk, ssk);
if (after64(new_snd_una, old_snd_una)) {
@@ -1071,7 +1072,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
trace_ack_update_msk(mp_opt->data_ack,
old_snd_una, new_snd_una,
- new_wnd_end, msk->wnd_end);
+ new_wnd_end, READ_ONCE(msk->wnd_end));
}
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
@@ -1099,8 +1100,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
if (mp_opt->echo)
return true;
- hmac = add_addr_generate_hmac(msk->remote_key,
- msk->local_key,
+ hmac = add_addr_generate_hmac(READ_ONCE(msk->remote_key),
+ READ_ONCE(msk->local_key),
&mp_opt->addr);
pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
@@ -1147,7 +1148,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) {
if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) &&
- msk->local_key == mp_opt.rcvr_key) {
+ READ_ONCE(msk->local_key) == mp_opt.rcvr_key) {
WRITE_ONCE(msk->rcv_fastclose, true);
mptcp_schedule_work((struct sock *)msk);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 4ae19113b8eb..55406720c607 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -6,7 +6,6 @@
#define pr_fmt(fmt) "MPTCP: " fmt
#include <linux/kernel.h>
-#include <net/tcp.h>
#include <net/mptcp.h>
#include "protocol.h"
@@ -77,7 +76,7 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int
{
struct mptcp_pm_data *pm = &msk->pm;
- pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
+ pr_debug("msk=%p, token=%u side=%d", msk, READ_ONCE(msk->token), server_side);
WRITE_ONCE(pm->server_side, server_side);
mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
@@ -441,13 +440,27 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id
return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
}
-int mptcp_pm_set_flags(struct net *net, struct nlattr *token,
- struct mptcp_pm_addr_entry *loc,
- struct mptcp_pm_addr_entry *rem, u8 bkup)
+int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info)
{
- if (token)
- return mptcp_userspace_pm_set_flags(net, token, loc, rem, bkup);
- return mptcp_pm_nl_set_flags(net, loc, bkup);
+ if (info->attrs[MPTCP_PM_ATTR_TOKEN])
+ return mptcp_userspace_pm_get_addr(skb, info);
+ return mptcp_pm_nl_get_addr(skb, info);
+}
+
+int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+
+ if (info->attrs[MPTCP_PM_ATTR_TOKEN])
+ return mptcp_userspace_pm_dump_addr(msg, cb);
+ return mptcp_pm_nl_dump_addr(msg, cb);
+}
+
+int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
+{
+ if (info->attrs[MPTCP_PM_ATTR_TOKEN])
+ return mptcp_userspace_pm_set_flags(skb, info);
+ return mptcp_pm_nl_set_flags(skb, info);
}
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 287a60381eae..5c17d39146ea 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -8,19 +8,13 @@
#include <linux/inet.h>
#include <linux/kernel.h>
-#include <net/tcp.h>
#include <net/inet_common.h>
#include <net/netns/generic.h>
#include <net/mptcp.h>
-#include <net/genetlink.h>
-#include <uapi/linux/mptcp.h>
#include "protocol.h"
#include "mib.h"
-/* forward declaration */
-static struct genl_family mptcp_genl_family;
-
static int pm_nl_pernet_id;
struct mptcp_pm_add_entry {
@@ -396,19 +390,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
- const struct mptcp_addr_info *addr)
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- if (addrs[i].id == addr->id)
- return true;
- }
-
- return false;
-}
-
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
@@ -440,18 +421,34 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
msk->pm.subflows++;
addrs[i++] = remote;
} else {
+ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ /* Forbid creation of new subflows matching existing
+ * ones, possibly already created by incoming ADD_ADDR
+ */
+ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+ mptcp_for_each_subflow(msk, subflow)
+ if (READ_ONCE(subflow->local_id) == local->id)
+ __set_bit(subflow->remote_id, unavail_id);
+
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- addrs[i].id = subflow->remote_id;
+ addrs[i].id = READ_ONCE(subflow->remote_id);
if (deny_id0 && !addrs[i].id)
continue;
+ if (test_bit(addrs[i].id, unavail_id))
+ continue;
+
if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
continue;
- if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
- msk->pm.subflows < subflows_max) {
+ if (msk->pm.subflows < subflows_max) {
+ /* forbid creating multiple address towards
+ * this id
+ */
+ __set_bit(addrs[i].id, unavail_id);
msk->pm.subflows++;
i++;
}
@@ -502,15 +499,12 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
}
static struct mptcp_pm_addr_entry *
-__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
- bool lookup_by_id)
+__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
{
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if ((!lookup_by_id &&
- mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) ||
- (lookup_by_id && entry->addr.id == info->id))
+ if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port))
return entry;
}
return NULL;
@@ -540,7 +534,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
mptcp_local_address((struct sock_common *)msk->first, &mpc_addr);
rcu_read_lock();
- entry = __lookup_addr(pernet, &mpc_addr, false);
+ entry = __lookup_addr(pernet, &mpc_addr);
if (entry) {
__clear_bit(entry->addr.id, msk->pm.id_avail_bitmap);
msk->mpc_endpoint_id = entry->addr.id;
@@ -799,18 +793,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ u8 remote_id = READ_ONCE(subflow->remote_id);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
- u8 id = subflow->local_id;
+ u8 id = subflow_get_local_id(subflow);
- if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
+ if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
continue;
if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
continue;
pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
- i, rm_id, subflow->local_id, subflow->remote_id,
- msk->mpc_endpoint_id);
+ i, rm_id, id, remote_id, msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
@@ -901,7 +895,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
}
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
unsigned int addr_max;
@@ -949,7 +944,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
}
}
- if (!entry->addr.id) {
+ if (!entry->addr.id && needs_id) {
find_next:
entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
@@ -960,7 +955,7 @@ find_next:
}
}
- if (!entry->addr.id)
+ if (!entry->addr.id && needs_id)
goto out;
__set_bit(entry->addr.id, pernet->id_bitmap);
@@ -1092,7 +1087,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
entry->ifindex = 0;
entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true);
if (ret < 0)
kfree(entry);
@@ -1285,6 +1280,18 @@ next:
return 0;
}
+static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr,
+ struct genl_info *info)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+
+ if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
+ mptcp_pm_address_nl_policy, info->extack) &&
+ tb[MPTCP_PM_ADDR_ATTR_ID])
+ return true;
+ return false;
+}
+
int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
@@ -1326,7 +1333,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
goto out_free;
}
}
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
+ !mptcp_pm_has_addr_attr_id(attr, info));
if (ret < 0) {
GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
goto out_free;
@@ -1533,8 +1541,8 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
}
}
-void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list)
+static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
@@ -1619,8 +1627,8 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-static int mptcp_nl_fill_addr(struct sk_buff *skb,
- struct mptcp_pm_addr_entry *entry)
+int mptcp_nl_fill_addr(struct sk_buff *skb,
+ struct mptcp_pm_addr_entry *entry)
{
struct mptcp_addr_info *addr = &entry->addr;
struct nlattr *attr;
@@ -1658,7 +1666,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
@@ -1708,8 +1716,13 @@ fail:
return ret;
}
-int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ return mptcp_pm_get_addr(skb, info);
+}
+
+int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
+ struct netlink_callback *cb)
{
struct net *net = sock_net(msg->sk);
struct mptcp_pm_addr_entry *entry;
@@ -1751,6 +1764,12 @@ int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
return msg->len;
}
+int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ return mptcp_pm_dump_addr(msg, cb);
+}
+
static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
{
struct nlattr *attr = info->attrs[id];
@@ -1865,66 +1884,63 @@ next:
return ret;
}
-int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup)
+int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info)
{
- struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
+ struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, };
+ struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
MPTCP_PM_ADDR_FLAG_FULLMESH;
+ struct net *net = sock_net(skb->sk);
struct mptcp_pm_addr_entry *entry;
+ struct pm_nl_pernet *pernet;
u8 lookup_by_id = 0;
+ u8 bkup = 0;
+ int ret;
- if (addr->addr.family == AF_UNSPEC) {
+ pernet = pm_nl_get_pernet(net);
+
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
+ if (ret < 0)
+ return ret;
+
+ if (addr.addr.family == AF_UNSPEC) {
lookup_by_id = 1;
- if (!addr->addr.id)
+ if (!addr.addr.id) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
return -EOPNOTSUPP;
+ }
}
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ bkup = 1;
+
spin_lock_bh(&pernet->lock);
- entry = __lookup_addr(pernet, &addr->addr, lookup_by_id);
+ entry = lookup_by_id ? __lookup_addr_by_id(pernet, addr.addr.id) :
+ __lookup_addr(pernet, &addr.addr);
if (!entry) {
spin_unlock_bh(&pernet->lock);
+ GENL_SET_ERR_MSG(info, "address not found");
return -EINVAL;
}
- if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
+ if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
spin_unlock_bh(&pernet->lock);
+ GENL_SET_ERR_MSG(info, "invalid addr flags");
return -EINVAL;
}
- changed = (addr->flags ^ entry->flags) & mask;
- entry->flags = (entry->flags & ~mask) | (addr->flags & mask);
- *addr = *entry;
+ changed = (addr.flags ^ entry->flags) & mask;
+ entry->flags = (entry->flags & ~mask) | (addr.flags & mask);
+ addr = *entry;
spin_unlock_bh(&pernet->lock);
- mptcp_nl_set_flags(net, &addr->addr, bkup, changed);
+ mptcp_nl_set_flags(net, &addr.addr, bkup, changed);
return 0;
}
int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
- struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, };
- struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
- struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
- struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
- struct net *net = sock_net(skb->sk);
- u8 bkup = 0;
- int ret;
-
- ret = mptcp_pm_parse_entry(attr, info, false, &addr);
- if (ret < 0)
- return ret;
-
- if (attr_rem) {
- ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote);
- if (ret < 0)
- return ret;
- }
-
- if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
- bkup = 1;
-
- return mptcp_pm_set_flags(net, token, &addr, &remote, bkup);
+ return mptcp_pm_set_flags(skb, info);
}
static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp)
@@ -1980,7 +1996,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
if (WARN_ON_ONCE(!sf))
return -EINVAL;
- if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
+ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf)))
return -EMSGSIZE;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
@@ -1997,7 +2013,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
const struct mptcp_subflow_context *sf;
u8 sk_err;
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
return -EMSGSIZE;
if (mptcp_event_add_subflow(skb, ssk))
@@ -2055,7 +2071,7 @@ static int mptcp_event_created(struct sk_buff *skb,
const struct mptcp_sock *msk,
const struct sock *ssk)
{
- int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token);
+ int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token));
if (err)
return err;
@@ -2083,7 +2099,7 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
if (!nlh)
goto nla_put_failure;
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id))
@@ -2118,7 +2134,7 @@ void mptcp_event_addr_announced(const struct sock *ssk,
if (!nlh)
goto nla_put_failure;
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
@@ -2234,7 +2250,7 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
goto nla_put_failure;
break;
case MPTCP_EVENT_CLOSED:
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0)
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)) < 0)
goto nla_put_failure;
break;
case MPTCP_EVENT_ANNOUNCED:
@@ -2264,7 +2280,7 @@ nla_put_failure:
nlmsg_free(skb);
}
-static struct genl_family mptcp_genl_family __ro_after_init = {
+struct genl_family mptcp_genl_family __ro_after_init = {
.name = MPTCP_PM_NAME,
.version = MPTCP_PM_VER,
.netnsok = true,
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index efecbe3cf415..9f5d422d5ef6 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
}
static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_pm_addr_entry *match = NULL;
@@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
- if (addr_match && entry->addr.id == 0)
+ if (addr_match && entry->addr.id == 0 && needs_id)
entry->addr.id = e->addr.id;
id_match = (e->addr.id == entry->addr.id);
if (addr_match && id_match) {
@@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
}
*e = *entry;
- if (!e->addr.id)
+ if (!e->addr.id && needs_id)
e->addr.id = find_next_zero_bit(id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
1);
@@ -105,19 +106,26 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk,
return -EINVAL;
}
+static struct mptcp_pm_addr_entry *
+mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (entry->addr.id == id)
+ return entry;
+ }
+ return NULL;
+}
+
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex)
{
- struct mptcp_pm_addr_entry *entry, *match = NULL;
+ struct mptcp_pm_addr_entry *match;
spin_lock_bh(&msk->pm.lock);
- list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
- if (id == entry->addr.id) {
- match = entry;
- break;
- }
- }
+ match = mptcp_userspace_pm_lookup_addr_by_id(msk, id);
spin_unlock_bh(&msk->pm.lock);
if (match) {
*flags = match->flags;
@@ -130,10 +138,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
- struct mptcp_pm_addr_entry new_entry;
+ struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry;
__be16 msk_sport = ((struct inet_sock *)
inet_sk((struct sock *)msk))->inet_sport;
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&e->addr, skc, false)) {
+ entry = e;
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ if (entry)
+ return entry->addr.id;
+
memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
new_entry.addr = *skc;
new_entry.addr.id = 0;
@@ -142,7 +161,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
if (new_entry.addr.port == msk_sport)
new_entry.addr.port = 0;
- return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
+ return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true);
}
int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
@@ -187,7 +206,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
goto announce_err;
}
- err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto announce_err;
@@ -222,7 +241,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
lock_sock(sk);
mptcp_for_each_subflow(msk, subflow) {
- if (subflow->local_id == 0) {
+ if (READ_ONCE(subflow->local_id) == 0) {
has_id_0 = true;
break;
}
@@ -249,7 +268,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID];
- struct mptcp_pm_addr_entry *match = NULL;
+ struct mptcp_pm_addr_entry *match;
struct mptcp_pm_addr_entry *entry;
struct mptcp_sock *msk;
LIST_HEAD(free_list);
@@ -286,13 +305,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
lock_sock(sk);
- list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
- if (entry->addr.id == id_val) {
- match = entry;
- break;
- }
- }
-
+ match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val);
if (!match) {
GENL_SET_ERR_MSG(info, "address with specified id not found");
release_sock(sk);
@@ -322,7 +335,6 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct mptcp_pm_addr_entry local = { 0 };
struct mptcp_addr_info addr_r;
- struct mptcp_addr_info addr_l;
struct mptcp_sock *msk;
int err = -EINVAL;
struct sock *sk;
@@ -348,26 +360,32 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- err = mptcp_pm_parse_addr(laddr, info, &addr_l);
+ err = mptcp_pm_parse_entry(laddr, info, true, &local);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
goto create_err;
}
+ if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ GENL_SET_ERR_MSG(info, "invalid addr flags");
+ err = -EINVAL;
+ goto create_err;
+ }
+ local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
+
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
goto create_err;
}
- if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) {
+ if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) {
GENL_SET_ERR_MSG(info, "families mismatch");
err = -EINVAL;
goto create_err;
}
- local.addr = addr_l;
- err = mptcp_userspace_pm_append_new_local_addr(msk, &local);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto create_err;
@@ -375,7 +393,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
lock_sock(sk);
- err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
+ err = __mptcp_subflow_connect(sk, &local.addr, &addr_r);
release_sock(sk);
@@ -483,6 +501,16 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
goto destroy_err;
}
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
+ ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6);
+ addr_l.family = AF_INET6;
+ }
+ if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) {
+ ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6);
+ addr_r.family = AF_INET6;
+ }
+#endif
if (addr_l.family != addr_r.family) {
GENL_SET_ERR_MSG(info, "address families do not match");
err = -EINVAL;
@@ -518,35 +546,194 @@ destroy_err:
return err;
}
-int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
- struct mptcp_pm_addr_entry *loc,
- struct mptcp_pm_addr_entry *rem, u8 bkup)
+int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
{
+ struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, };
+ struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, };
+ struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct net *net = sock_net(skb->sk);
struct mptcp_sock *msk;
int ret = -EINVAL;
struct sock *sk;
u32 token_val;
+ u8 bkup = 0;
token_val = nla_get_u32(token);
msk = mptcp_token_get_sock(net, token_val);
- if (!msk)
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
return ret;
+ }
sk = (struct sock *)msk;
- if (!mptcp_pm_is_userspace(msk))
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "userspace PM not selected");
+ goto set_flags_err;
+ }
+
+ ret = mptcp_pm_parse_entry(attr, info, false, &loc);
+ if (ret < 0)
goto set_flags_err;
- if (loc->addr.family == AF_UNSPEC ||
- rem->addr.family == AF_UNSPEC)
+ if (attr_rem) {
+ ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem);
+ if (ret < 0)
+ goto set_flags_err;
+ }
+
+ if (loc.addr.family == AF_UNSPEC ||
+ rem.addr.family == AF_UNSPEC) {
+ GENL_SET_ERR_MSG(info, "invalid address families");
+ ret = -EINVAL;
goto set_flags_err;
+ }
+
+ if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ bkup = 1;
lock_sock(sk);
- ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup);
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup);
release_sock(sk);
set_flags_err:
sock_put(sk);
return ret;
}
+
+int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct id_bitmap {
+ DECLARE_BITMAP(map, MPTCP_PM_MAX_ADDR_ID + 1);
+ } *bitmap;
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(msg->sk);
+ struct mptcp_pm_addr_entry *entry;
+ struct mptcp_sock *msk;
+ struct nlattr *token;
+ int ret = -EINVAL;
+ struct sock *sk;
+ void *hdr;
+
+ bitmap = (struct id_bitmap *)cb->ctx;
+ token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+
+ msk = mptcp_token_get_sock(net, nla_get_u32(token));
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return ret;
+ }
+
+ sk = (struct sock *)msk;
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto out;
+ }
+
+ lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (test_bit(entry->addr.id, bitmap->map))
+ continue;
+
+ hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &mptcp_genl_family,
+ NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
+ if (!hdr)
+ break;
+
+ if (mptcp_nl_fill_addr(msg, entry) < 0) {
+ genlmsg_cancel(msg, hdr);
+ break;
+ }
+
+ __set_bit(entry->addr.id, bitmap->map);
+ genlmsg_end(msg, hdr);
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock(sk);
+ ret = msg->len;
+
+out:
+ sock_put(sk);
+ return ret;
+}
+
+int mptcp_userspace_pm_get_addr(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct mptcp_pm_addr_entry addr, *entry;
+ struct net *net = sock_net(skb->sk);
+ struct mptcp_sock *msk;
+ struct sk_buff *msg;
+ int ret = -EINVAL;
+ struct sock *sk;
+ void *reply;
+
+ msk = mptcp_token_get_sock(net, nla_get_u32(token));
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return ret;
+ }
+
+ sk = (struct sock *)msk;
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto out;
+ }
+
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
+ if (ret < 0)
+ goto out;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0,
+ info->genlhdr->cmd);
+ if (!reply) {
+ GENL_SET_ERR_MSG(info, "not enough space in Netlink message");
+ ret = -EMSGSIZE;
+ goto fail;
+ }
+
+ lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
+ entry = mptcp_userspace_pm_lookup_addr_by_id(msk, addr.addr.id);
+ if (!entry) {
+ GENL_SET_ERR_MSG(info, "address not found");
+ ret = -EINVAL;
+ goto unlock_fail;
+ }
+
+ ret = mptcp_nl_fill_addr(msg, entry);
+ if (ret)
+ goto unlock_fail;
+
+ genlmsg_end(msg, reply);
+ ret = genlmsg_reply(msg, info);
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock(sk);
+ sock_put(sk);
+ return ret;
+
+unlock_fail:
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock(sk);
+fail:
+ nlmsg_free(msg);
+out:
+ sock_put(sk);
+ return ret;
+}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3ed4709a7509..3a1967bc7bad 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -15,7 +15,6 @@
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
#include <net/protocol.h>
-#include <net/tcp.h>
#include <net/tcp_states.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/transp_v6.h>
@@ -85,7 +84,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
subflow->subflow_id = msk->subflow_id++;
/* This is the first subflow, always with id 0 */
- subflow->local_id_valid = 1;
+ WRITE_ONCE(subflow->local_id, 0);
mptcp_sock_graft(msk->first, sk->sk_socket);
iput(SOCK_INODE(ssock));
@@ -410,6 +409,7 @@ static void mptcp_close_wake_up(struct sock *sk)
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
}
+/* called under the msk socket lock */
static bool mptcp_pending_data_fin_ack(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -441,16 +441,17 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
}
}
+/* can be called with no lock acquired */
static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
{
struct mptcp_sock *msk = mptcp_sk(sk);
if (READ_ONCE(msk->rcv_data_fin) &&
- ((1 << sk->sk_state) &
+ ((1 << inet_sk_state_load(sk)) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
- if (msk->ack_seq == rcv_data_fin_seq) {
+ if (READ_ONCE(msk->ack_seq) == rcv_data_fin_seq) {
if (seq)
*seq = rcv_data_fin_seq;
@@ -748,7 +749,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
msk->bytes_received += end_seq - msk->ack_seq;
- msk->ack_seq = end_seq;
+ WRITE_ONCE(msk->ack_seq, end_seq);
moved = true;
}
return moved;
@@ -985,6 +986,7 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
put_page(dfrag->page);
}
+/* called under both the msk socket lock and the data lock */
static void __mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1033,13 +1035,15 @@ static void __mptcp_clean_una(struct sock *sk)
msk->recovery = false;
out:
- if (snd_una == READ_ONCE(msk->snd_nxt) &&
- snd_una == READ_ONCE(msk->write_seq)) {
+ if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) {
if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
mptcp_stop_rtx_timer(sk);
} else {
mptcp_reset_rtx_timer(sk);
}
+
+ if (mptcp_pending_data_fin_ack(sk))
+ mptcp_schedule_work(sk);
}
static void __mptcp_clean_una_wakeup(struct sock *sk)
@@ -1260,6 +1264,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = mptcp_get_ext(skb);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
+ tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
@@ -1499,14 +1504,17 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
*/
if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
- msk->snd_nxt = snd_nxt_new;
+ WRITE_ONCE(msk->snd_nxt, snd_nxt_new);
}
}
void mptcp_check_and_set_pending(struct sock *sk)
{
- if (mptcp_send_head(sk))
- mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
+ if (mptcp_send_head(sk)) {
+ mptcp_data_lock(sk);
+ mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING);
+ mptcp_data_unlock(sk);
+ }
}
static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
@@ -1683,15 +1691,6 @@ out:
}
}
-static void mptcp_set_nospace(struct sock *sk)
-{
- /* enable autotune */
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-
- /* will be cleared on avail space */
- set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
-}
-
static int mptcp_disconnect(struct sock *sk, int flags);
static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
@@ -1762,6 +1761,30 @@ static int do_copy_data_nocache(struct sock *sk, int copy,
return 0;
}
+/* open-code sk_stream_memory_free() plus sent limit computation to
+ * avoid indirect calls in fast-path.
+ * Called under the msk socket lock, so we can avoid a bunch of ONCE
+ * annotations.
+ */
+static u32 mptcp_send_limit(const struct sock *sk)
+{
+ const struct mptcp_sock *msk = mptcp_sk(sk);
+ u32 limit, not_sent;
+
+ if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf))
+ return 0;
+
+ limit = mptcp_notsent_lowat(sk);
+ if (limit == UINT_MAX)
+ return UINT_MAX;
+
+ not_sent = msk->write_seq - msk->snd_nxt;
+ if (not_sent >= limit)
+ return 0;
+
+ return limit - not_sent;
+}
+
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1806,6 +1829,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct mptcp_data_frag *dfrag;
bool dfrag_collapsed;
size_t psize, offset;
+ u32 copy_limit;
+
+ /* ensure fitting the notsent_lowat() constraint */
+ copy_limit = mptcp_send_limit(sk);
+ if (!copy_limit)
+ goto wait_for_memory;
/* reuse tail pfrag, if possible, or carve a new one from the
* page allocator
@@ -1813,9 +1842,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
dfrag = mptcp_pending_tail(sk);
dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
if (!dfrag_collapsed) {
- if (!sk_stream_memory_free(sk))
- goto wait_for_memory;
-
if (!mptcp_page_frag_refill(sk, pfrag))
goto wait_for_memory;
@@ -1830,6 +1856,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
offset = dfrag->offset + dfrag->data_len;
psize = pfrag->size - offset;
psize = min_t(size_t, psize, msg_data_left(msg));
+ psize = min_t(size_t, psize, copy_limit);
total_ts = psize + frag_truesize;
if (!sk_wmem_schedule(sk, total_ts))
@@ -1865,7 +1892,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
continue;
wait_for_memory:
- mptcp_set_nospace(sk);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
__mptcp_push_pending(sk, msg->msg_flags);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
@@ -1960,6 +1987,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (copied <= 0)
return;
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, msk->first);
+
msk->rcvq_space.copied += copied;
mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
@@ -2108,7 +2138,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)
skb = skb_peek(&msk->receive_queue);
if (skb) {
- u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
+ u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq;
if (hint_val >= INT_MAX)
return INT_MAX;
@@ -2314,9 +2344,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
if (__mptcp_check_fallback(msk))
return false;
- if (tcp_rtx_and_write_queues_empty(sk))
- return false;
-
/* the closing socket has some data untransmitted and/or unacked:
* some data in the mptcp rtx queue has not really xmitted yet.
* keep it simple and re-inject the whole mptcp level rtx queue
@@ -2755,7 +2782,7 @@ static void __mptcp_init_sock(struct sock *sk)
__skb_queue_head_init(&msk->receive_queue);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
- msk->rmem_fwd_alloc = 0;
+ WRITE_ONCE(msk->rmem_fwd_alloc, 0);
WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
@@ -2971,7 +2998,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk->sk_prot->destroy(sk);
- WARN_ON_ONCE(msk->rmem_fwd_alloc);
+ WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc));
WARN_ON_ONCE(msk->rmem_released);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
@@ -3145,22 +3172,22 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
- msk->push_pending = 0;
msk->recovery = false;
- msk->can_ack = false;
- msk->fully_established = false;
- msk->rcv_data_fin = false;
- msk->snd_data_fin_enable = false;
- msk->rcv_fastclose = false;
- msk->use_64bit_ack = false;
- msk->bytes_consumed = 0;
+ WRITE_ONCE(msk->can_ack, false);
+ WRITE_ONCE(msk->fully_established, false);
+ WRITE_ONCE(msk->rcv_data_fin, false);
+ WRITE_ONCE(msk->snd_data_fin_enable, false);
+ WRITE_ONCE(msk->rcv_fastclose, false);
+ WRITE_ONCE(msk->use_64bit_ack, false);
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
+ msk->bytes_consumed = 0;
msk->bytes_acked = 0;
msk->bytes_received = 0;
msk->bytes_sent = 0;
msk->bytes_retrans = 0;
+ msk->rcvspace_init = 0;
WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
@@ -3174,8 +3201,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
+
+static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk)
+{
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+ struct ipv6_pinfo *newnp;
+
+ newnp = inet6_sk(newsk);
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ if (!opt)
+ net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__);
+ }
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+}
#endif
+static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk)
+{
+ struct ip_options_rcu *inet_opt, *newopt = NULL;
+ const struct inet_sock *inet = inet_sk(sk);
+ struct inet_sock *newinet;
+
+ newinet = inet_sk(newsk);
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+ inet_opt->opt.optlen, GFP_ATOMIC);
+ if (newopt)
+ memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+ inet_opt->opt.optlen);
+ else
+ net_warn_ratelimited("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newinet->inet_opt, newopt);
+ rcu_read_unlock();
+}
+
struct sock *mptcp_sk_clone_init(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct sock *ssk,
@@ -3183,6 +3252,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
if (!nsk)
@@ -3195,18 +3265,25 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_init_sock(nsk);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (nsk->sk_family == AF_INET6)
+ mptcp_copy_ip6_options(nsk, sk);
+ else
+#endif
+ mptcp_copy_ip_options(nsk, sk);
+
msk = mptcp_sk(nsk);
- msk->local_key = subflow_req->local_key;
- msk->token = subflow_req->token;
+ WRITE_ONCE(msk->local_key, subflow_req->local_key);
+ WRITE_ONCE(msk->token, subflow_req->token);
msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
- msk->write_seq = subflow_req->idsn + 1;
- msk->snd_nxt = msk->write_seq;
- msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1);
+ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+ WRITE_ONCE(msk->snd_una, msk->write_seq);
+ WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched);
@@ -3223,7 +3300,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
/* The msk maintain a ref to each subflow in the connections list */
WRITE_ONCE(msk->first, ssk);
- list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
+ subflow = mptcp_subflow_ctx(ssk);
+ list_add(&subflow->node, &msk->conn_list);
sock_hold(ssk);
/* new mpc subflow takes ownership of the newly
@@ -3238,6 +3316,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_propagate_sndbuf(nsk, ssk);
mptcp_rcv_space_init(msk, ssk);
+
+ if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
+ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
bh_unlock_sock(nsk);
/* note: the newly allocated socket refcount is 2 now */
@@ -3248,6 +3329,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
{
const struct tcp_sock *tp = tcp_sk(ssk);
+ msk->rcvspace_init = 1;
msk->rcvq_space.copied = 0;
msk->rcvq_space.rtt_us = 0;
@@ -3258,8 +3340,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
TCP_INIT_CWND * tp->advmss);
if (msk->rcvq_space.space == 0)
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
-
- WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
@@ -3306,9 +3386,6 @@ void __mptcp_data_acked(struct sock *sk)
__mptcp_clean_una(sk);
else
__set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
-
- if (mptcp_pending_data_fin_ack(sk))
- mptcp_schedule_work(sk);
}
void __mptcp_check_push(struct sock *sk, struct sock *ssk)
@@ -3333,8 +3410,7 @@ static void mptcp_release_cb(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
for (;;) {
- unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
- msk->push_pending;
+ unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED);
struct list_head join_list;
if (!flags)
@@ -3350,7 +3426,6 @@ static void mptcp_release_cb(struct sock *sk)
* datapath acquires the msk socket spinlock while helding
* the subflow socket lock
*/
- msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
@@ -3478,13 +3553,8 @@ void mptcp_finish_connect(struct sock *ssk)
* accessing the field below
*/
WRITE_ONCE(msk->local_key, subflow->local_key);
- WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
- WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- WRITE_ONCE(msk->snd_una, msk->write_seq);
mptcp_pm_new_connection(msk, ssk, 0);
-
- mptcp_rcv_space_init(msk, ssk);
}
void mptcp_sock_graft(struct sock *sk, struct socket *parent)
@@ -3717,6 +3787,7 @@ static struct proto mptcp_prot = {
.unhash = mptcp_unhash,
.get_port = mptcp_get_port,
.forward_alloc_get = mptcp_forward_alloc_get,
+ .stream_memory_free = mptcp_stream_memory_free,
.sockets_allocated = &mptcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
@@ -3890,12 +3961,12 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
- if (sk_stream_is_writeable(sk))
+ if (__mptcp_stream_is_writeable(sk, 1))
return EPOLLOUT | EPOLLWRNORM;
- mptcp_set_nospace(sk);
- smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
- if (sk_stream_is_writeable(sk))
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ smp_mb__after_atomic(); /* NOSPACE is changed by mptcp_write_space() */
+ if (__mptcp_stream_is_writeable(sk, 1))
return EPOLLOUT | EPOLLWRNORM;
return 0;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3517f2d24a22..a10ebf3ee10a 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -113,10 +113,9 @@
#define MPTCP_RST_TRANSIENT BIT(0)
/* MPTCP socket atomic flags */
-#define MPTCP_NOSPACE 1
-#define MPTCP_WORK_RTX 2
-#define MPTCP_FALLBACK_DONE 4
-#define MPTCP_WORK_CLOSE_SUBFLOW 5
+#define MPTCP_WORK_RTX 1
+#define MPTCP_FALLBACK_DONE 2
+#define MPTCP_WORK_CLOSE_SUBFLOW 3
/* MPTCP socket release cb flags */
#define MPTCP_PUSH_PENDING 1
@@ -260,8 +259,10 @@ struct mptcp_data_frag {
struct mptcp_sock {
/* inet_connection_sock must be the first member */
struct inet_connection_sock sk;
- u64 local_key;
- u64 remote_key;
+ u64 local_key; /* protected by the first subflow socket lock
+ * lockless access read
+ */
+ u64 remote_key; /* same as above */
u64 write_seq;
u64 bytes_sent;
u64 snd_nxt;
@@ -286,7 +287,6 @@ struct mptcp_sock {
int rmem_released;
unsigned long flags;
unsigned long cb_flags;
- unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
@@ -305,7 +305,9 @@ struct mptcp_sock {
nodelay:1,
fastopening:1,
in_accept_queue:1,
- free_first:1;
+ free_first:1,
+ rcvspace_init:1;
+ u32 notsent_lowat;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -341,12 +343,30 @@ struct mptcp_sock {
#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \
list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
+extern struct genl_family mptcp_genl_family;
+
static inline void msk_owned_by_me(const struct mptcp_sock *msk)
{
sock_owned_by_me((const struct sock *)msk);
}
+#ifdef CONFIG_DEBUG_NET
+/* MPTCP-specific: we might (indirectly) call this helper with the wrong sk */
+#undef tcp_sk
+#define tcp_sk(ptr) ({ \
+ typeof(ptr) _ptr = (ptr); \
+ WARN_ON(_ptr->sk_protocol != IPPROTO_TCP); \
+ container_of_const(_ptr, struct tcp_sock, inet_conn.icsk_inet.sk); \
+})
+#define mptcp_sk(ptr) ({ \
+ typeof(ptr) _ptr = (ptr); \
+ WARN_ON(_ptr->sk_protocol != IPPROTO_MPTCP); \
+ container_of_const(_ptr, struct mptcp_sock, sk.icsk_inet.sk); \
+})
+
+#else /* !CONFIG_DEBUG_NET */
#define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk)
+#endif
/* the msk socket don't use the backlog, also account for the bulk
* free memory
@@ -400,7 +420,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- if (msk->snd_una == READ_ONCE(msk->snd_nxt))
+ if (msk->snd_una == msk->snd_nxt)
return NULL;
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
@@ -491,10 +511,9 @@ struct mptcp_subflow_context {
remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
- local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
- __unused : 9;
+ __unused : 10;
bool data_avail;
bool scheduled;
u32 remote_nonce;
@@ -505,7 +524,7 @@ struct mptcp_subflow_context {
u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
u64 iasn; /* initial ack sequence number, MPC subflows only */
};
- u8 local_id;
+ s16 local_id; /* if negative not initialized yet */
u8 remote_id;
u8 reset_seen:1;
u8 reset_transient:1;
@@ -556,6 +575,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
+ WRITE_ONCE(subflow->local_id, -1);
}
static inline u64
@@ -622,8 +642,9 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@ -789,6 +810,38 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}
+static inline u32 mptcp_notsent_lowat(const struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ u32 val;
+
+ val = READ_ONCE(mptcp_sk(sk)->notsent_lowat);
+ return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+}
+
+static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake)
+{
+ const struct mptcp_sock *msk = mptcp_sk(sk);
+ u32 notsent_bytes;
+
+ notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt);
+ return (notsent_bytes << wake) < mptcp_notsent_lowat(sk);
+}
+
+static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake)
+{
+ return mptcp_stream_memory_free(sk, wake) &&
+ __sk_stream_is_writeable(sk, wake);
+}
+
+static inline void mptcp_write_space(struct sock *sk)
+{
+ /* pairs with memory barrier in mptcp_poll */
+ smp_mb();
+ if (mptcp_stream_memory_free(sk, 1))
+ sk_stream_write_space(sk);
+}
+
static inline void __mptcp_sync_sndbuf(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -797,7 +850,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
return;
- new_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[0];
+ new_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]);
mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf);
@@ -807,6 +860,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
+ mptcp_write_space(sk);
}
/* The called held both the msk socket and the subflow socket locks,
@@ -837,16 +891,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
local_bh_enable();
}
-static inline void mptcp_write_space(struct sock *sk)
-{
- if (sk_stream_is_writeable(sk)) {
- /* pairs with memory barrier in mptcp_poll */
- smp_mb();
- if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
- sk_stream_write_space(sk);
- }
-}
-
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
#define MPTCP_TOKEN_MAX_RETRIES 4
@@ -926,21 +970,15 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex);
-int mptcp_pm_set_flags(struct net *net, struct nlattr *token,
- struct mptcp_pm_addr_entry *loc,
- struct mptcp_pm_addr_entry *rem, u8 bkup);
-int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup);
-int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
- struct mptcp_pm_addr_entry *loc,
- struct mptcp_pm_addr_entry *rem, u8 bkup);
+int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info);
+int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list);
-void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list);
void mptcp_free_local_addr_list(struct mptcp_sock *msk);
@@ -952,10 +990,12 @@ void mptcp_event_pm_listener(const struct sock *ssk,
enum mptcp_event_type event);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req);
+int mptcp_nl_fill_addr(struct sk_buff *skb,
+ struct mptcp_pm_addr_entry *entry);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
@@ -1020,6 +1060,24 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb);
+int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
+ struct netlink_callback *cb);
+int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
+ struct netlink_callback *cb);
+int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info);
+int mptcp_userspace_pm_get_addr(struct sk_buff *skb,
+ struct genl_info *info);
+
+static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
+{
+ int local_id = READ_ONCE(subflow->local_id);
+
+ if (local_id < 0)
+ return 0;
+ return local_id;
+}
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
@@ -1128,7 +1186,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) &&
+ return (1 << sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) &&
is_active_ssk(subflow) &&
!subflow->conn_finished;
}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index c40f1428e602..dcd1c76d2a3b 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -624,20 +624,11 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
return ret;
}
-static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval,
- unsigned int optlen)
+static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
- int val;
- if (optlen < sizeof(int))
- return -EINVAL;
-
- if (copy_from_sockptr(&val, optval, sizeof(val)))
- return -EFAULT;
-
- lock_sock(sk);
sockopt_seq_inc(msk);
msk->cork = !!val;
mptcp_for_each_subflow(msk, subflow) {
@@ -649,25 +640,15 @@ static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optva
}
if (!val)
mptcp_check_and_set_pending(sk);
- release_sock(sk);
return 0;
}
-static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval,
- unsigned int optlen)
+static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
- int val;
-
- if (optlen < sizeof(int))
- return -EINVAL;
-
- if (copy_from_sockptr(&val, optval, sizeof(val)))
- return -EFAULT;
- lock_sock(sk);
sockopt_seq_inc(msk);
msk->nodelay = !!val;
mptcp_for_each_subflow(msk, subflow) {
@@ -679,8 +660,6 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op
}
if (val)
mptcp_check_and_set_pending(sk);
- release_sock(sk);
-
return 0;
}
@@ -803,25 +782,10 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
int ret, val;
switch (optname) {
- case TCP_INQ:
- ret = mptcp_get_int_option(msk, optval, optlen, &val);
- if (ret)
- return ret;
- if (val < 0 || val > 1)
- return -EINVAL;
-
- lock_sock(sk);
- msk->recvmsg_inq = !!val;
- release_sock(sk);
- return 0;
case TCP_ULP:
return -EOPNOTSUPP;
case TCP_CONGESTION:
return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
- case TCP_CORK:
- return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen);
- case TCP_NODELAY:
- return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
case TCP_DEFER_ACCEPT:
/* See tcp.c: TCP_DEFER_ACCEPT does not fail */
mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
@@ -834,7 +798,34 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
optval, optlen);
}
- return -EOPNOTSUPP;
+ ret = mptcp_get_int_option(msk, optval, optlen, &val);
+ if (ret)
+ return ret;
+
+ lock_sock(sk);
+ switch (optname) {
+ case TCP_INQ:
+ if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ msk->recvmsg_inq = !!val;
+ break;
+ case TCP_NOTSENT_LOWAT:
+ WRITE_ONCE(msk->notsent_lowat, val);
+ mptcp_write_space(sk);
+ break;
+ case TCP_CORK:
+ ret = __mptcp_setsockopt_sol_tcp_cork(msk, val);
+ break;
+ case TCP_NODELAY:
+ ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val);
+ break;
+ default:
+ ret = -ENOPROTOOPT;
+ }
+
+ release_sock(sk);
+ return ret;
}
int mptcp_setsockopt(struct sock *sk, int level, int optname,
@@ -942,7 +933,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
mptcp_data_unlock(sk);
slow = lock_sock_fast(sk);
- info->mptcpi_csum_enabled = msk->csum_enabled;
+ info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
info->mptcpi_token = msk->token;
info->mptcpi_write_seq = msk->write_seq;
info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
@@ -1349,6 +1340,8 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_put_int_option(msk, optval, optlen, msk->cork);
case TCP_NODELAY:
return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
+ case TCP_NOTSENT_LOWAT:
+ return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
}
return -EOPNOTSUPP;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 0dcb721c89d1..1626dd20c68f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -15,13 +15,11 @@
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
#include <net/protocol.h>
-#include <net/tcp.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/ip6_route.h>
#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
-#include <uapi/linux/mptcp.h>
#include "protocol.h"
#include "mib.h"
@@ -75,7 +73,8 @@ static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_
get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
- subflow_generate_hmac(msk->local_key, msk->remote_key,
+ subflow_generate_hmac(READ_ONCE(msk->local_key),
+ READ_ONCE(msk->remote_key),
subflow_req->local_nonce,
subflow_req->remote_nonce, hmac);
@@ -421,29 +420,26 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
void __mptcp_sync_state(struct sock *sk, int state)
{
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct sock *ssk = msk->first;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ __mptcp_propagate_sndbuf(sk, ssk);
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, ssk);
- __mptcp_propagate_sndbuf(sk, msk->first);
if (sk->sk_state == TCP_SYN_SENT) {
+ /* subflow->idsn is always available is TCP_SYN_SENT state,
+ * even for the FASTOPEN scenarios
+ */
+ WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
mptcp_set_state(sk, state);
sk->sk_state_change(sk);
}
}
-static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk)) {
- __mptcp_sync_state(sk, ssk->sk_state);
- } else {
- msk->pending_state = ssk->sk_state;
- __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
- }
- mptcp_data_unlock(sk);
-}
-
static void subflow_set_remote_key(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
@@ -465,6 +461,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
}
+static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_data_lock(sk);
+ if (mp_opt) {
+ /* Options are available only in the non fallback cases
+ * avoid updating rx path fields otherwise
+ */
+ WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
+ WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+ subflow_set_remote_key(msk, subflow, mp_opt);
+ }
+
+ if (!sock_owned_by_user(sk)) {
+ __mptcp_sync_state(sk, ssk->sk_state);
+ } else {
+ msk->pending_state = ssk->sk_state;
+ __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+ }
+ mptcp_data_unlock(sk);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -499,10 +520,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (mp_opt.deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
- subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -514,7 +534,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
- subflow->remote_id = mp_opt.join_id;
+ WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -545,8 +565,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
- mptcp_rcv_space_init(msk, sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
return;
@@ -557,8 +576,8 @@ do_reset:
static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
{
- subflow->local_id = local_id;
- subflow->local_id_valid = 1;
+ WARN_ON_ONCE(local_id < 0 || local_id > 255);
+ WRITE_ONCE(subflow->local_id, local_id);
}
static int subflow_chk_local_id(struct sock *sk)
@@ -567,7 +586,7 @@ static int subflow_chk_local_id(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int err;
- if (likely(subflow->local_id_valid))
+ if (likely(subflow->local_id >= 0))
return 0;
err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
@@ -694,7 +713,8 @@ static bool subflow_hmac_valid(const struct request_sock *req,
if (!msk)
return false;
- subflow_generate_hmac(msk->remote_key, msk->local_key,
+ subflow_generate_hmac(READ_ONCE(msk->remote_key),
+ READ_ONCE(msk->local_key),
subflow_req->remote_nonce,
subflow_req->local_nonce, hmac);
@@ -731,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
- mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
+ __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -834,7 +853,6 @@ create_child:
* mpc option
*/
if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
- mptcp_subflow_fully_established(ctx, &mp_opt);
mptcp_pm_fully_established(owner, child);
ctx->pm_notified = 1;
}
@@ -1530,8 +1548,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
subflow->remote_key_valid = 1;
- subflow->remote_key = msk->remote_key;
- subflow->local_key = msk->local_key;
+ subflow->remote_key = READ_ONCE(msk->remote_key);
+ subflow->local_key = READ_ONCE(msk->local_key);
subflow->token = msk->token;
mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
@@ -1549,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->remote_id = remote_id;
+ WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
@@ -1713,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
pr_debug("subflow=%p", ctx);
ctx->tcp_sock = sk;
+ WRITE_ONCE(ctx->local_id, -1);
return ctx;
}
@@ -1744,10 +1763,9 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_do_fallback(sk);
- mptcp_rcv_space_init(msk, sk);
pr_fallback(msk);
subflow->conn_finished = 1;
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
/* as recvmsg() does not acquire the subflow socket for ssk selection
@@ -1949,14 +1967,14 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->idsn = subflow_req->idsn;
/* this is the first subflow, id is always 0 */
- new_ctx->local_id_valid = 1;
+ subflow_set_local_id(new_ctx, 0);
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->remote_id = subflow_req->remote_id;
+ WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
index bfff53e668da..4fc39fa2e262 100644
--- a/net/mptcp/token_test.c
+++ b/net/mptcp/token_test.c
@@ -52,14 +52,19 @@ static struct mptcp_subflow_context *build_ctx(struct kunit *test)
static struct mptcp_sock *build_msk(struct kunit *test)
{
struct mptcp_sock *msk;
+ struct sock *sk;
msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
sock_net_set((struct sock *)msk, &init_net);
+ sk = (struct sock *)msk;
+
/* be sure the token helpers can dereference sk->sk_prot */
- ((struct sock *)msk)->sk_prot = &tcp_prot;
+ sk->sk_prot = &tcp_prot;
+ sk->sk_protocol = IPPROTO_MPTCP;
+
return msk;
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 441d1f134110..df2dc21304ef 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -818,7 +818,7 @@ config NETFILTER_XT_TARGET_AUDIT
config NETFILTER_XT_TARGET_CHECKSUM
tristate "CHECKSUM target support"
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `CHECKSUM' target, which can be used in the iptables mangle
@@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_CONNSECMARK
config NETFILTER_XT_TARGET_CT
tristate '"CT" target support'
depends on NF_CONNTRACK
- depends on IP_NF_RAW || IP6_NF_RAW
+ depends on IP_NF_RAW || IP6_NF_RAW || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This options adds a `CT' target, which allows to specify initial
@@ -880,7 +880,7 @@ config NETFILTER_XT_TARGET_CT
config NETFILTER_XT_TARGET_DSCP
tristate '"DSCP" and "TOS" target support'
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `DSCP' target, which allows you to manipulate
@@ -896,7 +896,7 @@ config NETFILTER_XT_TARGET_DSCP
config NETFILTER_XT_TARGET_HL
tristate '"HL" hoplimit target support'
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds the "HL" (for IPv6) and "TTL" (for IPv4)
@@ -1080,7 +1080,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
- depends on IP_NF_MANGLE
+ depends on IP_NF_MANGLE || NFT_COMPAT
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
select NF_TPROXY_IPV4
@@ -1147,7 +1147,7 @@ config NETFILTER_XT_TARGET_TCPMSS
config NETFILTER_XT_TARGET_TCPOPTSTRIP
tristate '"TCPOPTSTRIP" target support'
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a "TCPOPTSTRIP" target, which allows you to strip
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index d4958e7e7631..614815a3ed73 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -101,7 +101,7 @@ endif
endif
ifdef CONFIG_NFT_CT
-ifdef CONFIG_RETPOLINE
+ifdef CONFIG_MITIGATION_RETPOLINE
nf_tables-objs += nft_ct_fast.o
endif
endif
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 21f7860e8fa1..cb48a2b9cb9f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -30,6 +30,7 @@
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype MTYPE
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
@@ -59,9 +60,6 @@ mtype_destroy(struct ip_set *set)
{
struct mtype *map = set->data;
- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&map->gc);
-
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
ip_set_free(map->members);
@@ -290,6 +288,15 @@ mtype_gc(struct timer_list *t)
add_timer(&map->gc);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ del_timer_sync(&map->gc);
+}
+
static const struct ip_set_type_variant mtype = {
.kadt = mtype_kadt,
.uadt = mtype_uadt,
@@ -303,6 +310,7 @@ static const struct ip_set_type_variant mtype = {
.head = mtype_head,
.list = mtype_list,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
};
#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 4c133e06be1d..3184cc6be4c9 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1154,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
return ret;
cleanup:
+ set->variant->cancel_gc(set);
set->variant->destroy(set);
put_out:
module_put(set->type->me);
@@ -1182,6 +1183,14 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
+static void
+ip_set_destroy_set_rcu(struct rcu_head *head)
+{
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+ ip_set_destroy_set(set);
+}
+
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const attr[])
{
@@ -1193,8 +1202,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
@@ -1206,8 +1213,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
- read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+ read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
@@ -1221,6 +1230,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
s = ip_set(inst, i);
if (s) {
ip_set(inst, i) = NULL;
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
ip_set_destroy_set(s);
}
}
@@ -1228,6 +1239,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
inst->is_destroyed = false;
} else {
u32 flags = flag_exist(info->nlh);
+ u16 features = 0;
+
+ read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
@@ -1238,10 +1252,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
ret = -IPSET_ERR_BUSY;
goto out;
}
+ features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
-
- ip_set_destroy_set(s);
+ if (features & IPSET_TYPE_NAME) {
+ /* Must wait for flush to be really finished */
+ rcu_barrier();
+ }
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
+ call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
out:
@@ -1394,9 +1414,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
- /* Make sure all readers of the old set pointers are completed. */
- synchronize_rcu();
-
return 0;
}
@@ -2362,6 +2379,7 @@ ip_set_net_exit(struct net *net)
set = ip_set(inst, i);
if (set) {
ip_set(inst, i) = NULL;
+ set->variant->cancel_gc(set);
ip_set_destroy_set(set);
}
}
@@ -2409,8 +2427,11 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
unregister_pernet_subsys(&ip_set_net_ops);
+
+ /* Wait for call_rcu() in destroy */
+ rcu_barrier();
+
pr_debug("these are the famous last words\n");
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index cbf80da9a01c..cf3ce72c3de6 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -222,6 +222,7 @@ static const union nf_inet_addr zeromask = {};
#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
+#undef mtype_cancel_gc
#undef mtype_variant
#undef mtype_data_match
@@ -266,6 +267,7 @@ static const union nf_inet_addr zeromask = {};
#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
@@ -430,7 +432,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference(hbucket(t, i));
+ n = (__force struct hbucket *)hbucket(t, i);
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -450,10 +452,7 @@ mtype_destroy(struct ip_set *set)
struct htype *h = set->data;
struct list_head *l, *lt;
- if (SET_WITH_TIMEOUT(set))
- cancel_delayed_work_sync(&h->gc.dwork);
-
- mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
list_for_each_safe(l, lt, &h->ad) {
list_del(l);
kfree(l);
@@ -599,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc)
queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct htype *h = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ cancel_delayed_work_sync(&h->gc.dwork);
+}
+
static int
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags);
@@ -1441,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
.region_lock = true,
};
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e162636525cf..6c3f28bc59b3 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
struct list_set *map = set->data;
struct set_elem *e, *n;
- if (SET_WITH_TIMEOUT(set))
- timer_shutdown_sync(&map->gc);
-
list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
+static void
+list_set_cancel_gc(struct ip_set *set)
+{
+ struct list_set *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ timer_shutdown_sync(&map->gc);
+}
+
static const struct ip_set_type_variant set_variant = {
.kadt = list_set_kadt,
.uadt = list_set_uadt,
@@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = {
.head = list_set_head,
.list = list_set_list,
.same_set = list_set_same_set,
+ .cancel_gc = list_set_cancel_gc,
};
static void
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a743db073887..98d7dbe3d787 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1511,9 +1511,7 @@ int __init ip_vs_conn_init(void)
return -ENOMEM;
/* Allocate ip_vs_conn slab cache */
- ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
- sizeof(struct ip_vs_conn), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ ip_vs_conn_cachep = KMEM_CACHE(ip_vs_conn, SLAB_HWCACHE_ALIGN);
if (!ip_vs_conn_cachep) {
kvfree(ip_vs_conn_tab);
return -ENOMEM;
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 0e4beae421f8..5257d5e7eb09 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -314,7 +314,7 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type,
static const struct bpf_func_proto *
bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
const struct bpf_verifier_ops netfilter_verifier_ops = {
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 5d8ed6c90b7e..8715617b02fe 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -605,15 +605,11 @@ static int __init nf_conncount_modinit(void)
for (i = 0; i < CONNCOUNT_SLOTS; ++i)
spin_lock_init(&nf_conncount_locks[i]);
- conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
- sizeof(struct nf_conncount_tuple),
- 0, 0, NULL);
+ conncount_conn_cachep = KMEM_CACHE(nf_conncount_tuple, 0);
if (!conncount_conn_cachep)
return -ENOMEM;
- conncount_rb_cachep = kmem_cache_create("nf_conncount_rb",
- sizeof(struct nf_conncount_rb),
- 0, 0, NULL);
+ conncount_rb_cachep = KMEM_CACHE(nf_conncount_rb, 0);
if (!conncount_rb_cachep) {
kmem_cache_destroy(conncount_conn_cachep);
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 475358ec8212..d2492d050fe6 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -467,7 +467,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
__bpf_kfunc_end_defs();
-BTF_SET8_START(nf_ct_kfunc_set)
+BTF_KFUNCS_START(nf_ct_kfunc_set)
BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
@@ -478,7 +478,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
-BTF_SET8_END(nf_ct_kfunc_set)
+BTF_KFUNCS_END(nf_ct_kfunc_set)
static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2e5f3864d353..c63868666bd9 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2530,7 +2530,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
* netfilter framework. Roll on, two-stage module
* delete...
*/
- synchronize_net();
+ synchronize_rcu_expedited();
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
@@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
.get_tuple_skb = nf_conntrack_get_tuple_skb,
.attach = nf_conntrack_attach,
.set_closing = nf_conntrack_set_closing,
+ .confirm = __nf_conntrack_confirm,
};
void nf_conntrack_init_end(void)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 81ca348915c9..21fa550966f0 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -722,9 +722,7 @@ int nf_conntrack_expect_init(void)
nf_ct_expect_hsize = 1;
}
nf_ct_expect_max = nf_ct_expect_hsize * 4;
- nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
- sizeof(struct nf_conntrack_expect),
- 0, 0, NULL);
+ nf_ct_expect_cachep = KMEM_CACHE(nf_conntrack_expect, 0);
if (!nf_ct_expect_cachep)
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index e697a824b001..540d97715bd2 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
/* Get fields bitmap */
if (nf_h323_error_boundary(bs, 0, f->sz))
return H323_ERROR_BOUND;
+ if (f->sz > 32)
+ return H323_ERROR_RANGE;
bmp = get_bitmap(bs, f->sz);
if (base)
*(unsigned int *)base = bmp;
@@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
bmp2_len = get_bits(bs, 7) + 1;
if (nf_h323_error_boundary(bs, 0, bmp2_len))
return H323_ERROR_BOUND;
+ if (bmp2_len > 32)
+ return H323_ERROR_RANGE;
bmp2 = get_bitmap(bs, bmp2_len);
bmp |= bmp2 >> f->sz;
if (base)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0c22a02c2035..3b846cbdc050 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -876,6 +876,7 @@ struct ctnetlink_filter_u32 {
struct ctnetlink_filter {
u8 family;
+ bool zone_filter;
u_int32_t orig_flags;
u_int32_t reply_flags;
@@ -992,9 +993,12 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
if (err)
goto err_filter;
- err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
- if (err < 0)
- goto err_filter;
+ if (cda[CTA_ZONE]) {
+ err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
+ if (err < 0)
+ goto err_filter;
+ filter->zone_filter = true;
+ }
if (!cda[CTA_FILTER])
return filter;
@@ -1148,7 +1152,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
if (filter->family && nf_ct_l3num(ct) != filter->family)
goto ignore_entry;
- if (filter->zone.id != NF_CT_DEFAULT_ZONE_ID &&
+ if (filter->zone_filter &&
!nf_ct_zone_equal_any(ct, &filter->zone))
goto ignore_entry;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6bd533983c1..4cc97f971264 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
- } else {
+ } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) {
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
pr_debug("Setting vtag %x for new conn OOTB\n",
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index e573be5afde7..ae493599a3ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
const struct sk_buff *skb,
unsigned int dataoff,
const struct tcphdr *tcph,
- u32 end, u32 win)
+ u32 end, u32 win,
+ enum ip_conntrack_dir dir)
{
/* SYN-ACK in reply to a SYN
* or SYN from reply direction in simultaneous open.
@@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
* Both sides must send the Window Scale option
* to enable window scaling in either direction.
*/
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ if (dir == IP_CT_DIR_REPLY &&
+ !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
sender->td_scale = 0;
receiver->td_scale = 0;
@@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
if (tcph->syn) {
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
- end, win);
+ end, win, dir);
if (!tcph->ack)
/* Simultaneous open */
return NFCT_TCP_ACCEPT;
@@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
*/
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
- end, win);
+ end, win, dir);
if (dir == IP_CT_DIR_REPLY && !tcph->ack)
return NFCT_TCP_ACCEPT;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 920a5a29ae1d..a0571339239c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
return 0;
}
+static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route,
+ enum flow_offload_tuple_dir dir)
+{
+ struct dst_entry *dst = route->tuple[dir].dst;
+
+ route->tuple[dir].dst = NULL;
+
+ return dst;
+}
+
static int flow_offload_fill_route(struct flow_offload *flow,
- const struct nf_flow_route *route,
+ struct nf_flow_route *route,
enum flow_offload_tuple_dir dir)
{
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
- struct dst_entry *dst = route->tuple[dir].dst;
+ struct dst_entry *dst = nft_route_dst_fetch(route, dir);
int i, j = 0;
switch (flow_tuple->l3proto) {
@@ -122,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
ETH_ALEN);
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
+ dst_release(dst);
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
@@ -146,7 +157,7 @@ static void nft_flow_dst_release(struct flow_offload *flow,
}
void flow_offload_route_init(struct flow_offload *flow,
- const struct nf_flow_route *route)
+ struct nf_flow_route *route)
{
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8cc52d2bd31b..370f8231385c 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -31,10 +31,10 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
int i;
for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
- if (loggers[pf][i] == NULL)
+ log = nft_log_dereference(loggers[pf][i]);
+ if (!log)
continue;
- log = nft_log_dereference(loggers[pf][i]);
if (!strncasecmp(str_logger, log->name, strlen(log->name)))
return log;
}
@@ -156,6 +156,11 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
struct nf_logger *logger;
int ret = -ENOENT;
+ if (pf >= ARRAY_SIZE(loggers))
+ return -EINVAL;
+ if (type >= NF_LOG_TYPE_MAX)
+ return -EINVAL;
+
if (pf == NFPROTO_INET) {
ret = nf_logger_find_get(NFPROTO_IPV4, type);
if (ret < 0)
@@ -193,11 +198,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
return;
}
- BUG_ON(loggers[pf][type] == NULL);
-
rcu_read_lock();
logger = rcu_dereference(loggers[pf][type]);
- module_put(logger->me);
+ if (!logger)
+ WARN_ON_ONCE(1);
+ else
+ module_put(logger->me);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_logger_put);
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
index 6e3b2f58855f..481be15609b1 100644
--- a/net/netfilter/nf_nat_bpf.c
+++ b/net/netfilter/nf_nat_bpf.c
@@ -54,9 +54,9 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
__bpf_kfunc_end_defs();
-BTF_SET8_START(nf_nat_kfunc_set)
+BTF_KFUNCS_START(nf_nat_kfunc_set)
BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
-BTF_SET8_END(nf_nat_kfunc_set)
+BTF_KFUNCS_END(nf_nat_kfunc_set)
static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index c3d7ecbc777c..016c816d91cb 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -551,8 +551,11 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
find_free_id:
if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
- else
+ else if ((range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL) ||
+ maniptype != NF_NAT_MANIP_DST)
off = get_random_u16();
+ else
+ off = 0;
attempts = range_size;
if (attempts > NF_NAT_MAX_ATTEMPTS)
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index e2f334f70281..7f12e56e6e52 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -248,109 +248,3 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
return 0;
}
EXPORT_SYMBOL_GPL(nf_queue);
-
-static unsigned int nf_iterate(struct sk_buff *skb,
- struct nf_hook_state *state,
- const struct nf_hook_entries *hooks,
- unsigned int *index)
-{
- const struct nf_hook_entry *hook;
- unsigned int verdict, i = *index;
-
- while (i < hooks->num_hook_entries) {
- hook = &hooks->hooks[i];
-repeat:
- verdict = nf_hook_entry_hookfn(hook, skb, state);
- if (verdict != NF_ACCEPT) {
- *index = i;
- if (verdict != NF_REPEAT)
- return verdict;
- goto repeat;
- }
- i++;
- }
-
- *index = i;
- return NF_ACCEPT;
-}
-
-static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
-{
- switch (pf) {
-#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
- case NFPROTO_BRIDGE:
- return rcu_dereference(net->nf.hooks_bridge[hooknum]);
-#endif
- case NFPROTO_IPV4:
- return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
- case NFPROTO_IPV6:
- return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
- default:
- WARN_ON_ONCE(1);
- return NULL;
- }
-
- return NULL;
-}
-
-/* Caller must hold rcu read-side lock */
-void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
-{
- const struct nf_hook_entry *hook_entry;
- const struct nf_hook_entries *hooks;
- struct sk_buff *skb = entry->skb;
- const struct net *net;
- unsigned int i;
- int err;
- u8 pf;
-
- net = entry->state.net;
- pf = entry->state.pf;
-
- hooks = nf_hook_entries_head(net, pf, entry->state.hook);
-
- i = entry->hook_index;
- if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
- kfree_skb(skb);
- nf_queue_entry_free(entry);
- return;
- }
-
- hook_entry = &hooks->hooks[i];
-
- /* Continue traversal iff userspace said ok... */
- if (verdict == NF_REPEAT)
- verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
-
- if (verdict == NF_ACCEPT) {
- if (nf_reroute(skb, entry) < 0)
- verdict = NF_DROP;
- }
-
- if (verdict == NF_ACCEPT) {
-next_hook:
- ++i;
- verdict = nf_iterate(skb, &entry->state, hooks, &i);
- }
-
- switch (verdict & NF_VERDICT_MASK) {
- case NF_ACCEPT:
- case NF_STOP:
- local_bh_disable();
- entry->state.okfn(entry->state.net, entry->state.sk, skb);
- local_bh_enable();
- break;
- case NF_QUEUE:
- err = nf_queue(skb, &entry->state, i, verdict);
- if (err == 1)
- goto next_hook;
- break;
- case NF_STOLEN:
- break;
- default:
- kfree_skb(skb);
- }
-
- nf_queue_entry_free(entry);
-}
-EXPORT_SYMBOL(nf_reinject);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index fbbc4fd37349..5b140c12b7df 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -800,7 +800,7 @@ synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb,
skb_reset_network_header(skb);
iph = skb_put(skb, sizeof(*iph));
ip6_flow_hdr(iph, 0, 0);
- iph->hop_limit = net->ipv6.devconf_all->hop_limit;
+ iph->hop_limit = READ_ONCE(net->ipv6.devconf_all->hop_limit);
iph->nexthdr = IPPROTO_TCP;
iph->saddr = *saddr;
iph->daddr = *daddr;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4b55533ce5ca..e93f905e60b6 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -24,6 +24,7 @@
#include <net/sock.h>
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
+#define NFT_SET_MAX_ANONLEN 16
unsigned int nf_tables_net_id __read_mostly;
@@ -683,15 +684,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
}
-static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
- struct nft_flowtable *flowtable)
+static struct nft_trans *
+nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_flowtable *flowtable)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type,
sizeof(struct nft_trans_flowtable));
if (trans == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
@@ -700,22 +702,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
nft_trans_flowtable(trans) = flowtable;
nft_trans_commit_list_add_tail(ctx->net, trans);
- return 0;
+ return trans;
}
static int nft_delflowtable(struct nft_ctx *ctx,
struct nft_flowtable *flowtable)
{
- int err;
+ struct nft_trans *trans;
- err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
- if (err < 0)
- return err;
+ trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
nft_deactivate_next(ctx->net, flowtable);
nft_use_dec(&ctx->table->use);
- return err;
+ return 0;
}
static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg)
@@ -1193,8 +1195,10 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1)
#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0)
#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1)
+#define __NFT_TABLE_F_WAS_ORPHAN (__NFT_TABLE_F_INTERNAL << 2)
#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \
- __NFT_TABLE_F_WAS_AWAKEN)
+ __NFT_TABLE_F_WAS_AWAKEN | \
+ __NFT_TABLE_F_WAS_ORPHAN)
static int nf_tables_updtable(struct nft_ctx *ctx)
{
@@ -1214,8 +1218,11 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
if ((nft_table_has_owner(ctx->table) &&
!(flags & NFT_TABLE_F_OWNER)) ||
- (!nft_table_has_owner(ctx->table) &&
- flags & NFT_TABLE_F_OWNER))
+ (flags & NFT_TABLE_F_OWNER &&
+ !nft_table_is_orphan(ctx->table)))
+ return -EOPNOTSUPP;
+
+ if ((flags ^ ctx->table->flags) & NFT_TABLE_F_PERSIST)
return -EOPNOTSUPP;
/* No dormant off/on/off/on games in single transaction */
@@ -1244,12 +1251,20 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
}
}
+ if ((flags & NFT_TABLE_F_OWNER) &&
+ !nft_table_has_owner(ctx->table)) {
+ ctx->table->nlpid = ctx->portid;
+ ctx->table->flags |= NFT_TABLE_F_OWNER |
+ __NFT_TABLE_F_WAS_ORPHAN;
+ }
+
nft_trans_table_update(trans) = true;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
err_register_hooks:
+ ctx->table->flags |= NFT_TABLE_F_DORMANT;
nft_trans_destroy(trans);
return ret;
}
@@ -2079,7 +2094,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
struct nft_hook *hook;
int err;
- hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
+ hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
if (!hook) {
err = -ENOMEM;
goto err_hook_alloc;
@@ -2502,19 +2517,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
RCU_INIT_POINTER(chain->blob_gen_0, blob);
RCU_INIT_POINTER(chain->blob_gen_1, blob);
- err = nf_tables_register_hook(net, table, chain);
- if (err < 0)
- goto err_destroy_chain;
-
if (!nft_use_inc(&table->use)) {
err = -EMFILE;
- goto err_use;
+ goto err_destroy_chain;
}
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto err_unregister_hook;
+ goto err_trans;
}
nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
@@ -2522,17 +2533,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
nft_trans_chain_policy(trans) = policy;
err = nft_chain_add(table, chain);
- if (err < 0) {
- nft_trans_destroy(trans);
- goto err_unregister_hook;
- }
+ if (err < 0)
+ goto err_chain_add;
+
+ /* This must be LAST to ensure no packets are walking over this chain. */
+ err = nf_tables_register_hook(net, table, chain);
+ if (err < 0)
+ goto err_register_hook;
return 0;
-err_unregister_hook:
+err_register_hook:
+ nft_chain_del(chain);
+err_chain_add:
+ nft_trans_destroy(trans);
+err_trans:
nft_use_dec_restore(&table->use);
-err_use:
- nf_tables_unregister_hook(net, table, chain);
err_destroy_chain:
nf_tables_chain_destroy(ctx);
@@ -4234,23 +4250,18 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
* given, in that case the amount of memory per element is used.
*/
static const struct nft_set_ops *
-nft_select_set_ops(const struct nft_ctx *ctx,
- const struct nlattr * const nla[],
+nft_select_set_ops(const struct nft_ctx *ctx, u32 flags,
const struct nft_set_desc *desc)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_set_ops *ops, *bops;
struct nft_set_estimate est, best;
const struct nft_set_type *type;
- u32 flags = 0;
int i;
lockdep_assert_held(&nft_net->commit_mutex);
lockdep_nfnl_nft_mutex_not_held();
- if (nla[NFTA_SET_FLAGS] != NULL)
- flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
-
bops = NULL;
best.size = ~0;
best.lookup = ~0;
@@ -4413,6 +4424,9 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
+ if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN)
+ return -EINVAL;
+
inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (inuse == NULL)
return -ENOMEM;
@@ -4994,6 +5008,12 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
(NFT_SET_EVAL | NFT_SET_OBJECT))
return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) ==
+ (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) ==
+ (NFT_SET_CONSTANT | NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
}
desc.dtype = 0;
@@ -5133,7 +5153,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(&ctx, nla, &desc);
+ ops = nft_select_set_ops(&ctx, flags, &desc);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -5417,6 +5437,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
list_del_rcu(&set->list);
+ set->dead = 1;
if (event)
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
GFP_KERNEL);
@@ -7547,11 +7568,15 @@ nla_put_failure:
return -1;
}
-static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
+static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
{
const struct nft_object_type *type;
list_for_each_entry(type, &nf_tables_objects, list) {
+ if (type->family != NFPROTO_UNSPEC &&
+ type->family != family)
+ continue;
+
if (objtype == type->type)
return type;
}
@@ -7559,11 +7584,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
}
static const struct nft_object_type *
-nft_obj_type_get(struct net *net, u32 objtype)
+nft_obj_type_get(struct net *net, u32 objtype, u8 family)
{
const struct nft_object_type *type;
- type = __nft_obj_type_get(objtype);
+ type = __nft_obj_type_get(objtype, family);
if (type != NULL && try_module_get(type->owner))
return type;
@@ -7656,7 +7681,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- type = __nft_obj_type_get(objtype);
+ type = __nft_obj_type_get(objtype, family);
if (WARN_ON_ONCE(!type))
return -ENOENT;
@@ -7670,7 +7695,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (!nft_use_inc(&table->use))
return -EMFILE;
- type = nft_obj_type_get(net, objtype);
+ type = nft_obj_type_get(net, objtype, family);
if (IS_ERR(type)) {
err = PTR_ERR(type);
goto err_type;
@@ -8447,9 +8472,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
u8 family = info->nfmsg->nfgen_family;
const struct nf_flowtable_type *type;
struct nft_flowtable *flowtable;
- struct nft_hook *hook, *next;
struct net *net = info->net;
struct nft_table *table;
+ struct nft_trans *trans;
struct nft_ctx ctx;
int err;
@@ -8529,34 +8554,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable,
extack, true);
if (err < 0)
- goto err4;
+ goto err_flowtable_parse_hooks;
list_splice(&flowtable_hook.list, &flowtable->hook_list);
flowtable->data.priority = flowtable_hook.priority;
flowtable->hooknum = flowtable_hook.num;
+ trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto err_flowtable_trans;
+ }
+
+ /* This must be LAST to ensure no packets are walking over this flowtable. */
err = nft_register_flowtable_net_hooks(ctx.net, table,
&flowtable->hook_list,
flowtable);
- if (err < 0) {
- nft_hooks_destroy(&flowtable->hook_list);
- goto err4;
- }
-
- err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
- goto err5;
+ goto err_flowtable_hooks;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
return 0;
-err5:
- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- nft_unregister_flowtable_hook(net, flowtable, hook);
- list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
- }
-err4:
+
+err_flowtable_hooks:
+ nft_trans_destroy(trans);
+err_flowtable_trans:
+ nft_hooks_destroy(&flowtable->hook_list);
+err_flowtable_parse_hooks:
flowtable->data.type->free(&flowtable->data);
err3:
module_put(type->owner);
@@ -9819,6 +9844,7 @@ dead_elem:
struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
{
struct nft_set_elem_catchall *catchall, *next;
+ u64 tstamp = nft_net_tstamp(gc->net);
const struct nft_set *set = gc->set;
struct nft_elem_priv *elem_priv;
struct nft_set_ext *ext;
@@ -9828,7 +9854,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_expired(ext))
+ if (!__nft_set_elem_expired(ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@@ -10416,6 +10442,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
} else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
}
+ if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) {
+ trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER;
+ trans->ctx.table->nlpid = 0;
+ }
trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
nft_trans_destroy(trans);
} else {
@@ -10614,6 +10644,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
bool genid_ok;
mutex_lock(&nft_net->commit_mutex);
+ nft_net->tstamp = get_jiffies_64();
genid_ok = genid == 0 || nft_net->base_seq == genid;
if (!genid_ok)
@@ -10988,16 +11019,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict.code) {
- default:
- switch (data->verdict.code & NF_VERDICT_MASK) {
- case NF_ACCEPT:
- case NF_DROP:
- case NF_QUEUE:
- break;
- default:
- return -EINVAL;
- }
- fallthrough;
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ break;
case NFT_CONTINUE:
case NFT_BREAK:
case NFT_RETURN:
@@ -11032,6 +11057,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict.chain = chain;
break;
+ default:
+ return -EINVAL;
}
desc->len = sizeof(data->verdict);
@@ -11345,6 +11372,10 @@ again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
n->portid == table->nlpid) {
+ if (table->flags & NFT_TABLE_F_PERSIST) {
+ table->flags &= ~NFT_TABLE_F_OWNER;
+ continue;
+ }
__nft_release_hook(net, table);
list_del_rcu(&table->list);
to_delete[deleted++] = table;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index c3e635364701..a48d5f0e2f3e 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -21,7 +21,7 @@
#include <net/netfilter/nf_log.h>
#include <net/netfilter/nft_meta.h>
-#if defined(CONFIG_RETPOLINE) && defined(CONFIG_X86)
+#if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_X86)
static struct static_key_false nf_tables_skip_direct_calls;
@@ -207,7 +207,7 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
struct nft_regs *regs,
struct nft_pktinfo *pkt)
{
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
unsigned long e;
if (nf_skip_indirect_calls())
@@ -236,7 +236,7 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
X(e, nft_objref_map_eval);
#undef X
indirect_call:
-#endif /* CONFIG_RETPOLINE */
+#endif /* CONFIG_MITIGATION_RETPOLINE */
expr->ops->eval(expr, regs, pkt);
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 171d1f52d3dd..00f4bd21c59b 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -225,6 +225,148 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
return entry;
}
+static unsigned int nf_iterate(struct sk_buff *skb,
+ struct nf_hook_state *state,
+ const struct nf_hook_entries *hooks,
+ unsigned int *index)
+{
+ const struct nf_hook_entry *hook;
+ unsigned int verdict, i = *index;
+
+ while (i < hooks->num_hook_entries) {
+ hook = &hooks->hooks[i];
+repeat:
+ verdict = nf_hook_entry_hookfn(hook, skb, state);
+ if (verdict != NF_ACCEPT) {
+ *index = i;
+ if (verdict != NF_REPEAT)
+ return verdict;
+ goto repeat;
+ }
+ i++;
+ }
+
+ *index = i;
+ return NF_ACCEPT;
+}
+
+static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
+{
+ switch (pf) {
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ case NFPROTO_BRIDGE:
+ return rcu_dereference(net->nf.hooks_bridge[hooknum]);
+#endif
+ case NFPROTO_IPV4:
+ return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
+ case NFPROTO_IPV6:
+ return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
+{
+#ifdef CONFIG_INET
+ const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ if (!(iph->tos == rt_info->tos &&
+ skb->mark == rt_info->mark &&
+ iph->daddr == rt_info->daddr &&
+ iph->saddr == rt_info->saddr))
+ return ip_route_me_harder(entry->state.net, entry->state.sk,
+ skb, RTN_UNSPEC);
+ }
+#endif
+ return 0;
+}
+
+static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
+{
+ const struct nf_ipv6_ops *v6ops;
+ int ret = 0;
+
+ switch (entry->state.pf) {
+ case AF_INET:
+ ret = nf_ip_reroute(skb, entry);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ ret = v6ops->reroute(skb, entry);
+ break;
+ }
+ return ret;
+}
+
+/* caller must hold rcu read-side lock */
+static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
+{
+ const struct nf_hook_entry *hook_entry;
+ const struct nf_hook_entries *hooks;
+ struct sk_buff *skb = entry->skb;
+ const struct net *net;
+ unsigned int i;
+ int err;
+ u8 pf;
+
+ net = entry->state.net;
+ pf = entry->state.pf;
+
+ hooks = nf_hook_entries_head(net, pf, entry->state.hook);
+
+ i = entry->hook_index;
+ if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP);
+ nf_queue_entry_free(entry);
+ return;
+ }
+
+ hook_entry = &hooks->hooks[i];
+
+ /* Continue traversal iff userspace said ok... */
+ if (verdict == NF_REPEAT)
+ verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
+
+ if (verdict == NF_ACCEPT) {
+ if (nf_reroute(skb, entry) < 0)
+ verdict = NF_DROP;
+ }
+
+ if (verdict == NF_ACCEPT) {
+next_hook:
+ ++i;
+ verdict = nf_iterate(skb, &entry->state, hooks, &i);
+ }
+
+ switch (verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ case NF_STOP:
+ local_bh_disable();
+ entry->state.okfn(entry->state.net, entry->state.sk, skb);
+ local_bh_enable();
+ break;
+ case NF_QUEUE:
+ err = nf_queue(skb, &entry->state, i, verdict);
+ if (err == 1)
+ goto next_hook;
+ break;
+ case NF_STOLEN:
+ break;
+ default:
+ kfree_skb(skb);
+ }
+
+ nf_queue_entry_free(entry);
+}
+
static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
const struct nf_ct_hook *ct_hook;
@@ -232,18 +374,25 @@ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
if (verdict == NF_ACCEPT ||
verdict == NF_REPEAT ||
verdict == NF_STOP) {
+ unsigned int ct_verdict = verdict;
+
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
if (ct_hook)
- verdict = ct_hook->update(entry->state.net, entry->skb);
+ ct_verdict = ct_hook->update(entry->state.net, entry->skb);
rcu_read_unlock();
- switch (verdict & NF_VERDICT_MASK) {
+ switch (ct_verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ /* follow userspace verdict, could be REPEAT */
+ break;
case NF_STOLEN:
nf_queue_entry_free(entry);
return;
+ default:
+ verdict = ct_verdict & NF_VERDICT_MASK;
+ break;
}
-
}
nf_reinject(entry, verdict);
}
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 680fe557686e..274b6f7e6bb5 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nft_base_chain *basechain;
struct nftables_pernet *nft_net;
- struct nft_table *table;
struct nft_chain *chain, *nr;
+ struct nft_table *table;
struct nft_ctx ctx = {
.net = dev_net(dev),
};
@@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct notifier_block *this,
nft_net = nft_pernet(ctx.net);
mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {
- if (table->family != NFPROTO_NETDEV)
+ if (table->family != NFPROTO_NETDEV &&
+ table->family != NFPROTO_INET)
continue;
ctx.family = table->family;
@@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct notifier_block *this,
if (!nft_is_base_chain(chain))
continue;
+ basechain = nft_base_chain(chain);
+ if (table->family == NFPROTO_INET &&
+ basechain->ops.hooknum != NF_INET_INGRESS)
+ continue;
+
ctx.chain = chain;
nft_netdev_event(event, dev, &ctx);
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 5284cd2ad532..d3d11dede545 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
[NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_TARGET_REV] = { .type = NLA_U32 },
+ [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TARGET_INFO] = { .type = NLA_BINARY },
};
@@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
{
struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+ u32 l4proto;
u32 flags;
int err;
@@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
return -EINVAL;
flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
- if (flags & ~NFT_RULE_COMPAT_F_MASK)
+ if (flags & NFT_RULE_COMPAT_F_UNUSED ||
+ flags & ~NFT_RULE_COMPAT_F_MASK)
return -EINVAL;
if (flags & NFT_RULE_COMPAT_F_INV)
*inv = true;
- *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ if (l4proto > U16_MAX)
+ return -EINVAL;
+
+ *proto = l4proto;
+
return 0;
}
@@ -350,6 +357,22 @@ static int nft_target_validate(const struct nft_ctx *ctx,
unsigned int hook_mask = 0;
int ret;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
+ ctx->family != NFPROTO_BRIDGE &&
+ ctx->family != NFPROTO_ARP)
+ return -EOPNOTSUPP;
+
+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -413,7 +436,7 @@ static void nft_match_eval(const struct nft_expr *expr,
static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
[NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_MATCH_REV] = { .type = NLA_U32 },
+ [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_MATCH_INFO] = { .type = NLA_BINARY },
};
@@ -595,6 +618,22 @@ static int nft_match_validate(const struct nft_ctx *ctx,
unsigned int hook_mask = 0;
int ret;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
+ ctx->family != NFPROTO_BRIDGE &&
+ ctx->family != NFPROTO_ARP)
+ return -EOPNOTSUPP;
+
+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -712,7 +751,7 @@ out_put:
static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
[NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING,
.len = NFT_COMPAT_NAME_MAX-1 },
- [NFTA_COMPAT_REV] = { .type = NLA_U32 },
+ [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_COMPAT_TYPE] = { .type = NLA_U32 },
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 86bb9d7797d9..452ed94c3a4d 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
break;
#endif
case NFT_CT_ID:
+ if (tb[NFTA_CT_DIRECTION])
+ return -EINVAL;
+
len = sizeof(u32);
break;
default:
@@ -751,7 +754,7 @@ static bool nft_ct_set_reduce(struct nft_regs_track *track,
return false;
}
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
static const struct nft_expr_ops nft_ct_get_fast_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
@@ -796,7 +799,7 @@ nft_ct_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-EINVAL);
if (tb[NFTA_CT_DREG]) {
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
u32 k = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (k) {
@@ -1250,7 +1253,30 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
if (tb[NFTA_CT_EXPECT_L3PROTO])
priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO]));
+ switch (priv->l3num) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET)
+ break;
+
+ return -EINVAL;
+ case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */
+ default:
+ return -EAFNOSUPPORT;
+ }
+
priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
+ switch (priv->l4proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_DCCP:
+ case IPPROTO_SCTP:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index ab3362c483b4..ab9576098701 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -361,6 +361,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
+ __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
ret = flow_offload_add(flowtable, flow);
if (ret < 0)
goto err_flow_add;
@@ -384,6 +385,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx,
{
unsigned int hook_mask = (1 << NF_INET_FORWARD);
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, hook_mask);
}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 79039afde34e..cefa25e0dbb0 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -58,17 +58,19 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost)
static int nft_limit_init(struct nft_limit_priv *priv,
const struct nlattr * const tb[], bool pkts)
{
+ u64 unit, tokens, rate_with_burst;
bool invert = false;
- u64 unit, tokens;
if (tb[NFTA_LIMIT_RATE] == NULL ||
tb[NFTA_LIMIT_UNIT] == NULL)
return -EINVAL;
priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+ if (priv->rate == 0)
+ return -EINVAL;
+
unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
- priv->nsecs = unit * NSEC_PER_SEC;
- if (priv->rate == 0 || priv->nsecs < unit)
+ if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs))
return -EOVERFLOW;
if (tb[NFTA_LIMIT_BURST])
@@ -77,18 +79,25 @@ static int nft_limit_init(struct nft_limit_priv *priv,
if (pkts && priv->burst == 0)
priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
- if (priv->rate + priv->burst < priv->rate)
+ if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst))
return -EOVERFLOW;
if (pkts) {
- tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst;
+ u64 tmp = div64_u64(priv->nsecs, priv->rate);
+
+ if (check_mul_overflow(tmp, priv->burst, &tokens))
+ return -EOVERFLOW;
} else {
+ u64 tmp;
+
/* The token bucket size limits the number of tokens can be
* accumulated. tokens_max specifies the bucket size.
* tokens_max = unit * (rate + burst) / rate.
*/
- tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst),
- priv->rate);
+ if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp))
+ return -EOVERFLOW;
+
+ tokens = div64_u64(tmp, priv->rate);
}
if (tb[NFTA_LIMIT_FLAGS]) {
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 870e5b113d13..a0055f510e31 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -24,7 +24,7 @@ struct nft_lookup {
struct nft_set_binding binding;
};
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 583885ce7232..808f5802c270 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -143,6 +143,11 @@ static int nft_nat_validate(const struct nft_ctx *ctx,
struct nft_nat *priv = nft_expr_priv(expr);
int err;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 7f61506e5b44..7fec57ff736f 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -63,7 +63,6 @@ static int nft_osf_init(const struct nft_ctx *ctx,
{
struct nft_osf *priv = nft_expr_priv(expr);
u32 flags;
- int err;
u8 ttl;
if (!tb[NFTA_OSF_DREG])
@@ -83,13 +82,9 @@ static int nft_osf_init(const struct nft_ctx *ctx,
priv->flags = flags;
}
- err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg,
- NULL, NFT_DATA_VALUE,
- NFT_OSF_MAXGENRELEN);
- if (err < 0)
- return err;
-
- return 0;
+ return nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE,
+ NFT_OSF_MAXGENRELEN);
}
static int nft_osf_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 35a2c28caa60..24d977138572 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -166,6 +166,11 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
const struct nft_rt *priv = nft_expr_priv(expr);
unsigned int hooks;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
switch (priv->key) {
case NFT_RT_NEXTHOP4:
case NFT_RT_NEXTHOP6:
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6c2061bfdae6..6968a3b34236 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -36,6 +36,7 @@ struct nft_rhash_cmp_arg {
const struct nft_set *set;
const u32 *key;
u8 genmask;
+ u64 tstamp;
};
static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
@@ -62,7 +63,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
return 1;
if (nft_set_elem_is_dead(&he->ext))
return 1;
- if (nft_set_elem_expired(&he->ext))
+ if (__nft_set_elem_expired(&he->ext, x->tstamp))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
return 1;
@@ -87,6 +88,7 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -106,6 +108,7 @@ nft_rhash_get(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -131,6 +134,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
.genmask = NFT_GENMASK_ANY,
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -175,6 +179,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
struct nft_rhash_elem *prev;
@@ -216,6 +221,7 @@ nft_rhash_deactivate(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
rcu_read_lock();
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index efd523496be4..c0ceea068936 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -342,9 +342,6 @@
#include "nft_set_pipapo_avx2.h"
#include "nft_set_pipapo.h"
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
-
/**
* pipapo_refill() - For each set bit, set bits from selected mapping table item
* @map: Bitmap to be scanned for set bits
@@ -362,11 +359,13 @@ static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
*
* Return: -1 on no match, bit position on 'match_only', 0 otherwise.
*/
-int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
- union nft_pipapo_map_bucket *mt, bool match_only)
+int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
+ unsigned long *dst,
+ const union nft_pipapo_map_bucket *mt, bool match_only)
{
unsigned long bitset;
- int k, ret = -1;
+ unsigned int k;
+ int ret = -1;
for (k = 0; k < len; k++) {
bitset = map[k];
@@ -412,25 +411,28 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
u8 genmask = nft_genmask_cur(net);
+ const struct nft_pipapo_match *m;
+ const struct nft_pipapo_field *f;
const u8 *rp = (const u8 *)key;
- struct nft_pipapo_match *m;
- struct nft_pipapo_field *f;
bool map_index;
int i;
local_bh_disable();
- map_index = raw_cpu_read(nft_pipapo_scratch_index);
-
m = rcu_dereference(priv->match);
if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
goto out;
- res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
- fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
+ scratch = *raw_cpu_ptr(m->scratch);
+
+ map_index = scratch->map_index;
+
+ res_map = scratch->map + (map_index ? m->bsize_max : 0);
+ fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
@@ -460,7 +462,7 @@ next_match:
b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
last);
if (b < 0) {
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return false;
@@ -477,7 +479,7 @@ next_match:
* current inactive bitmap is clean and can be reused as
* *next* bitmap (not initial) for the next packet.
*/
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return true;
@@ -504,6 +506,8 @@ out:
* @set: nftables API set representation
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
+ * @tstamp: timestamp to check for expired elements
+ * @gfp: the type of memory to allocate (see kmalloc).
*
* This is essentially the same as the lookup function, except that it matches
* key data against the uncommitted copy and doesn't use preallocated maps for
@@ -513,22 +517,27 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
- const u8 *data, u8 genmask)
+ const u8 *data, u8 genmask,
+ u64 tstamp, gfp_t gfp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *m = priv->clone;
unsigned long *res_map, *fill_map = NULL;
- struct nft_pipapo_field *f;
+ const struct nft_pipapo_match *m;
+ const struct nft_pipapo_field *f;
int i;
- res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC);
+ m = priv->clone;
+ if (m->bsize_max == 0)
+ return ret;
+
+ res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), gfp);
if (!res_map) {
ret = ERR_PTR(-ENOMEM);
goto out;
}
- fill_map = kcalloc(m->bsize_max, sizeof(*res_map), GFP_ATOMIC);
+ fill_map = kcalloc(m->bsize_max, sizeof(*res_map), gfp);
if (!fill_map) {
ret = ERR_PTR(-ENOMEM);
goto out;
@@ -566,7 +575,7 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext))
+ if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
@@ -603,10 +612,11 @@ static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
- static struct nft_pipapo_elem *e;
+ struct nft_pipapo_elem *e;
e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ nft_genmask_cur(net), get_jiffies_64(),
+ GFP_ATOMIC);
if (IS_ERR(e))
return ERR_CAST(e);
@@ -614,6 +624,65 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set,
}
/**
+ * pipapo_realloc_mt() - Reallocate mapping table if needed upon resize
+ * @f: Field containing mapping table
+ * @old_rules: Amount of existing mapped rules
+ * @rules: Amount of new rules to map
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static int pipapo_realloc_mt(struct nft_pipapo_field *f,
+ unsigned int old_rules, unsigned int rules)
+{
+ union nft_pipapo_map_bucket *new_mt = NULL, *old_mt = f->mt;
+ const unsigned int extra = PAGE_SIZE / sizeof(*new_mt);
+ unsigned int rules_alloc = rules;
+
+ might_sleep();
+
+ if (unlikely(rules == 0))
+ goto out_free;
+
+ /* growing and enough space left, no action needed */
+ if (rules > old_rules && f->rules_alloc > rules)
+ return 0;
+
+ /* downsize and extra slack has not grown too large */
+ if (rules < old_rules) {
+ unsigned int remove = f->rules_alloc - rules;
+
+ if (remove < (2u * extra))
+ return 0;
+ }
+
+ /* If set needs more than one page of memory for rules then
+ * allocate another extra page to avoid frequent reallocation.
+ */
+ if (rules > extra &&
+ check_add_overflow(rules, extra, &rules_alloc))
+ return -EOVERFLOW;
+
+ new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL);
+ if (!new_mt)
+ return -ENOMEM;
+
+ if (old_mt)
+ memcpy(new_mt, old_mt, min(old_rules, rules) * sizeof(*new_mt));
+
+ if (rules > old_rules) {
+ memset(new_mt + old_rules, 0,
+ (rules - old_rules) * sizeof(*new_mt));
+ }
+out_free:
+ f->rules_alloc = rules_alloc;
+ f->mt = new_mt;
+
+ kvfree(old_mt);
+
+ return 0;
+}
+
+/**
* pipapo_resize() - Resize lookup or mapping table, or both
* @f: Field containing lookup and mapping tables
* @old_rules: Previous amount of rules in field
@@ -625,12 +694,15 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set,
*
* Return: 0 on success, -ENOMEM on allocation failure.
*/
-static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules)
+static int pipapo_resize(struct nft_pipapo_field *f,
+ unsigned int old_rules, unsigned int rules)
{
long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p;
- union nft_pipapo_map_bucket *new_mt, *old_mt = f->mt;
- size_t new_bucket_size, copy;
- int group, bucket;
+ unsigned int new_bucket_size, copy;
+ int group, bucket, err;
+
+ if (rules >= NFT_PIPAPO_RULE0_MAX)
+ return -ENOSPC;
new_bucket_size = DIV_ROUND_UP(rules, BITS_PER_LONG);
#ifdef NFT_PIPAPO_ALIGN
@@ -670,27 +742,18 @@ static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules)
}
mt:
- new_mt = kvmalloc(rules * sizeof(*new_mt), GFP_KERNEL);
- if (!new_mt) {
+ err = pipapo_realloc_mt(f, old_rules, rules);
+ if (err) {
kvfree(new_lt);
- return -ENOMEM;
- }
-
- memcpy(new_mt, f->mt, min(old_rules, rules) * sizeof(*new_mt));
- if (rules > old_rules) {
- memset(new_mt + old_rules, 0,
- (rules - old_rules) * sizeof(*new_mt));
+ return err;
}
if (new_lt) {
f->bsize = new_bucket_size;
- NFT_PIPAPO_LT_ASSIGN(f, new_lt);
+ f->lt = new_lt;
kvfree(old_lt);
}
- f->mt = new_mt;
- kvfree(old_mt);
-
return 0;
}
@@ -841,8 +904,8 @@ static void pipapo_lt_8b_to_4b(int old_groups, int bsize,
*/
static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
{
+ unsigned int groups, bb;
unsigned long *new_lt;
- int groups, bb;
size_t lt_size;
lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize *
@@ -892,7 +955,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
f->groups = groups;
f->bb = bb;
kvfree(f->lt);
- NFT_PIPAPO_LT_ASSIGN(f, new_lt);
+ f->lt = new_lt;
}
/**
@@ -909,7 +972,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
int mask_bits)
{
- int rule = f->rules, group, ret, bit_offset = 0;
+ unsigned int rule = f->rules, group, ret, bit_offset = 0;
ret = pipapo_resize(f, f->rules, f->rules + 1);
if (ret)
@@ -1109,6 +1172,25 @@ static void pipapo_map(struct nft_pipapo_match *m,
}
/**
+ * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * @m: Matching data
+ * @cpu: CPU number
+ */
+static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
+{
+ struct nft_pipapo_scratch *s;
+ void *mem;
+
+ s = *per_cpu_ptr(m->scratch, cpu);
+ if (!s)
+ return;
+
+ mem = s;
+ mem -= s->align_off;
+ kfree(mem);
+}
+
+/**
* pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
* @clone: Copy of matching data with pending insertions and deletions
* @bsize_max: Maximum bucket size, scratch maps cover two buckets
@@ -1121,12 +1203,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
int i;
for_each_possible_cpu(i) {
- unsigned long *scratch;
+ struct nft_pipapo_scratch *scratch;
#ifdef NFT_PIPAPO_ALIGN
- unsigned long *scratch_aligned;
+ void *scratch_aligned;
+ u32 align_off;
#endif
-
- scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
+ scratch = kzalloc_node(struct_size(scratch, map,
+ bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL, cpu_to_node(i));
if (!scratch) {
@@ -1140,14 +1223,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return -ENOMEM;
}
- kfree(*per_cpu_ptr(clone->scratch, i));
-
- *per_cpu_ptr(clone->scratch, i) = scratch;
+ pipapo_free_scratch(clone, i);
#ifdef NFT_PIPAPO_ALIGN
- scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
- *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+ /* Align &scratch->map (not the struct itself): the extra
+ * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
+ * above guarantee we can waste up to those bytes in order
+ * to align the map field regardless of its offset within
+ * the struct.
+ */
+ BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
+
+ scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
+ scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
+ align_off = scratch_aligned - (void *)scratch;
+
+ scratch = scratch_aligned;
+ scratch->align_off = align_off;
#endif
+ *per_cpu_ptr(clone->scratch, i) = scratch;
}
return 0;
@@ -1173,6 +1267,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_elem *e, *dup;
+ u64 tstamp = nft_net_tstamp(net);
struct nft_pipapo_field *f;
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
@@ -1182,7 +1277,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
- dup = pipapo_get(net, set, start, genmask);
+ dup = pipapo_get(net, set, start, genmask, tstamp, GFP_KERNEL);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1204,7 +1299,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net));
+ dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp,
+ GFP_KERNEL);
}
if (PTR_ERR(dup) != -ENOENT) {
@@ -1217,8 +1313,14 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
/* Validate */
start_p = start;
end_p = end;
+
+ /* some helpers return -1, or 0 >= for valid rule pos,
+ * so we cannot support more than INT_MAX rules at this time.
+ */
+ BUILD_BUG_ON(NFT_PIPAPO_RULE0_MAX > INT_MAX);
+
nft_pipapo_for_each_field(f, i, m) {
- if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX)
+ if (f->rules >= NFT_PIPAPO_RULE0_MAX)
return -ENOSPC;
if (memcmp(start_p, end_p,
@@ -1301,11 +1403,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new->scratch)
goto out_scratch;
-#ifdef NFT_PIPAPO_ALIGN
- new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
- if (!new->scratch_aligned)
- goto out_scratch;
-#endif
for_each_possible_cpu(i)
*per_cpu_ptr(new->scratch, i) = NULL;
@@ -1329,18 +1426,25 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new_lt)
goto out_lt;
- NFT_PIPAPO_LT_ASSIGN(dst, new_lt);
+ dst->lt = new_lt;
memcpy(NFT_PIPAPO_LT_ALIGN(new_lt),
NFT_PIPAPO_LT_ALIGN(src->lt),
src->bsize * sizeof(*dst->lt) *
src->groups * NFT_PIPAPO_BUCKETS(src->bb));
- dst->mt = kvmalloc(src->rules * sizeof(*src->mt), GFP_KERNEL);
- if (!dst->mt)
- goto out_mt;
+ if (src->rules > 0) {
+ dst->mt = kvmalloc_array(src->rules_alloc,
+ sizeof(*src->mt), GFP_KERNEL);
+ if (!dst->mt)
+ goto out_mt;
+
+ memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt));
+ } else {
+ dst->mt = NULL;
+ dst->rules_alloc = 0;
+ }
- memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt));
src++;
dst++;
}
@@ -1357,10 +1461,7 @@ out_lt:
}
out_scratch_realloc:
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(new->scratch, i));
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(new->scratch_aligned);
-#endif
+ pipapo_free_scratch(new, i);
out_scratch:
free_percpu(new->scratch);
kfree(new);
@@ -1397,10 +1498,10 @@ out_scratch:
*
* Return: Number of rules that originated from the same entry as @first.
*/
-static int pipapo_rules_same_key(struct nft_pipapo_field *f, int first)
+static unsigned int pipapo_rules_same_key(struct nft_pipapo_field *f, unsigned int first)
{
struct nft_pipapo_elem *e = NULL; /* Keep gcc happy */
- int r;
+ unsigned int r;
for (r = first; r < f->rules; r++) {
if (r != first && e != f->mt[r].e)
@@ -1453,8 +1554,9 @@ static int pipapo_rules_same_key(struct nft_pipapo_field *f, int first)
* 0 1 2
* element pointers: 0x42 0x42 0x44
*/
-static void pipapo_unmap(union nft_pipapo_map_bucket *mt, int rules,
- int start, int n, int to_offset, bool is_last)
+static void pipapo_unmap(union nft_pipapo_map_bucket *mt, unsigned int rules,
+ unsigned int start, unsigned int n,
+ unsigned int to_offset, bool is_last)
{
int i;
@@ -1560,7 +1662,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
- int rules_f0, first_rule = 0;
+ unsigned int rules_f0, first_rule = 0;
+ u64 tstamp = nft_net_tstamp(net);
struct nft_pipapo_elem *e;
struct nft_trans_gc *gc;
@@ -1570,8 +1673,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
- struct nft_pipapo_field *f;
- int i, start, rules_fx;
+ const struct nft_pipapo_field *f;
+ unsigned int i, start, rules_fx;
start = first_rule;
rules_fx = rules_f0;
@@ -1594,7 +1697,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
/* synchronous gc never fails, there is no need to set on
* NFT_SET_ELEM_DEAD_BIT.
*/
- if (nft_set_elem_expired(&e->ext)) {
+ if (__nft_set_elem_expired(&e->ext, tstamp)) {
priv->dirty = true;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@@ -1640,13 +1743,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m)
int i;
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(m->scratch, i));
+ pipapo_free_scratch(m, i);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
-
pipapo_free_fields(m);
kfree(m);
@@ -1769,7 +1868,8 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
{
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net));
+ e = pipapo_get(net, set, data, nft_genmask_next(net),
+ nft_net_tstamp(net), GFP_KERNEL);
if (IS_ERR(e))
return NULL;
@@ -1953,7 +2053,7 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
- int rules_f0, first_rule = 0;
+ unsigned int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
const u8 *data;
@@ -2016,9 +2116,9 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
- struct nft_pipapo_match *m;
- struct nft_pipapo_field *f;
- int i, r;
+ const struct nft_pipapo_match *m;
+ const struct nft_pipapo_field *f;
+ unsigned int i, r;
rcu_read_lock();
if (iter->genmask == nft_genmask_cur(net))
@@ -2122,6 +2222,9 @@ static int nft_pipapo_init(const struct nft_set *set,
field_count = desc->field_count ? : 1;
+ BUILD_BUG_ON(NFT_PIPAPO_MAX_FIELDS > 255);
+ BUILD_BUG_ON(NFT_PIPAPO_MAX_FIELDS != NFT_REG32_COUNT);
+
if (field_count > NFT_PIPAPO_MAX_FIELDS)
return -EINVAL;
@@ -2132,7 +2235,7 @@ static int nft_pipapo_init(const struct nft_set *set,
m->field_count = field_count;
m->bsize_max = 0;
- m->scratch = alloc_percpu(unsigned long *);
+ m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
if (!m->scratch) {
err = -ENOMEM;
goto out_scratch;
@@ -2140,20 +2243,14 @@ static int nft_pipapo_init(const struct nft_set *set,
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch, i) = NULL;
-#ifdef NFT_PIPAPO_ALIGN
- m->scratch_aligned = alloc_percpu(unsigned long *);
- if (!m->scratch_aligned) {
- err = -ENOMEM;
- goto out_free;
- }
- for_each_possible_cpu(i)
- *per_cpu_ptr(m->scratch_aligned, i) = NULL;
-#endif
-
rcu_head_init(&m->rcu);
nft_pipapo_for_each_field(f, i, m) {
- int len = desc->field_len[i] ? : set->klen;
+ unsigned int len = desc->field_len[i] ? : set->klen;
+
+ /* f->groups is u8 */
+ BUILD_BUG_ON((NFT_PIPAPO_MAX_BYTES *
+ BITS_PER_BYTE / NFT_PIPAPO_GROUP_BITS_LARGE_SET) >= 256);
f->bb = NFT_PIPAPO_GROUP_BITS_INIT;
f->groups = len * NFT_PIPAPO_GROUPS_PER_BYTE(f);
@@ -2162,7 +2259,8 @@ static int nft_pipapo_init(const struct nft_set *set,
f->bsize = 0;
f->rules = 0;
- NFT_PIPAPO_LT_ASSIGN(f, NULL);
+ f->rules_alloc = 0;
+ f->lt = NULL;
f->mt = NULL;
}
@@ -2180,9 +2278,6 @@ static int nft_pipapo_init(const struct nft_set *set,
return 0;
out_free:
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2201,7 +2296,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
struct nft_pipapo_match *m)
{
struct nft_pipapo_field *f;
- int i, r;
+ unsigned int i, r;
for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
;
@@ -2236,11 +2331,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(m->scratch, cpu));
+ pipapo_free_scratch(m, cpu);
free_percpu(m->scratch);
pipapo_free_fields(m);
kfree(m);
@@ -2253,11 +2345,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (priv->dirty)
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(priv->clone->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
+ pipapo_free_scratch(priv->clone, cpu);
free_percpu(priv->clone->scratch);
pipapo_free_fields(priv->clone);
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 1040223da5fa..24cd1ff73f98 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -70,15 +70,9 @@
#define NFT_PIPAPO_ALIGN_HEADROOM \
(NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN)
#define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN))
-#define NFT_PIPAPO_LT_ASSIGN(field, x) \
- do { \
- (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \
- (field)->lt = (x); \
- } while (0)
#else
#define NFT_PIPAPO_ALIGN_HEADROOM 0
#define NFT_PIPAPO_LT_ALIGN(lt) (lt)
-#define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x))
#endif /* NFT_PIPAPO_ALIGN */
#define nft_pipapo_for_each_field(field, index, match) \
@@ -110,42 +104,48 @@ union nft_pipapo_map_bucket {
/**
* struct nft_pipapo_field - Lookup, mapping tables and related data for a field
- * @groups: Amount of bit groups
* @rules: Number of inserted rules
* @bsize: Size of each bucket in lookup table, in longs
+ * @rules_alloc: Number of allocated rules, always >= rules
+ * @groups: Amount of bit groups
* @bb: Number of bits grouped together in lookup table buckets
* @lt: Lookup table: 'groups' rows of buckets
- * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes
* @mt: Mapping table: one bucket per rule
*/
struct nft_pipapo_field {
- int groups;
- unsigned long rules;
- size_t bsize;
- int bb;
-#ifdef NFT_PIPAPO_ALIGN
- unsigned long *lt_aligned;
-#endif
+ unsigned int rules;
+ unsigned int bsize;
+ unsigned int rules_alloc;
+ u8 groups;
+ u8 bb;
unsigned long *lt;
union nft_pipapo_map_bucket *mt;
};
/**
+ * struct nft_pipapo_scratch - percpu data used for lookup and matching
+ * @map_index: Current working bitmap index, toggled between field matches
+ * @align_off: Offset to get the originally allocated address
+ * @map: store partial matching results during lookup
+ */
+struct nft_pipapo_scratch {
+ u8 map_index;
+ u32 align_off;
+ unsigned long map[];
+};
+
+/**
* struct nft_pipapo_match - Data used for lookup and matching
- * @field_count Amount of fields in set
- * @scratch: Preallocated per-CPU maps for partial matching results
- * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
+ * @field_count: Amount of fields in set
* @bsize_max: Maximum lookup table bucket size of all fields, in longs
- * @rcu Matching data is swapped on commits
+ * @scratch: Preallocated per-CPU maps for partial matching results
+ * @rcu: Matching data is swapped on commits
* @f: Fields, with lookup and mapping tables
*/
struct nft_pipapo_match {
- int field_count;
-#ifdef NFT_PIPAPO_ALIGN
- unsigned long * __percpu *scratch_aligned;
-#endif
- unsigned long * __percpu *scratch;
- size_t bsize_max;
+ u8 field_count;
+ unsigned int bsize_max;
+ struct nft_pipapo_scratch * __percpu *scratch;
struct rcu_head rcu;
struct nft_pipapo_field f[] __counted_by(field_count);
};
@@ -178,8 +178,9 @@ struct nft_pipapo_elem {
struct nft_set_ext ext;
};
-int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
- union nft_pipapo_map_bucket *mt, bool match_only);
+int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
+ unsigned long *dst,
+ const union nft_pipapo_map_bucket *mt, bool match_only);
/**
* pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
@@ -187,7 +188,7 @@ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
* @dst: Area to store result
* @data: Input data selecting table buckets
*/
-static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f,
+static inline void pipapo_and_field_buckets_4bit(const struct nft_pipapo_field *f,
unsigned long *dst,
const u8 *data)
{
@@ -215,7 +216,7 @@ static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f,
* @dst: Area to store result
* @data: Input data selecting table buckets
*/
-static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f,
+static inline void pipapo_and_field_buckets_8bit(const struct nft_pipapo_field *f,
unsigned long *dst,
const u8 *data)
{
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 52e0d026d30a..d08407d589ea 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -57,7 +57,7 @@
/* Jump to label if @reg is zero */
#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
- asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
+ asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
"je %l[" #label "]" : : : : label)
/* Store 256 bits from YMM register into memory. Contrary to bucket load
@@ -71,9 +71,6 @@
#define NFT_PIPAPO_AVX2_ZERO(reg) \
asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
-
/**
* nft_pipapo_avx2_prepare() - Prepare before main algorithm body
*
@@ -215,8 +212,9 @@ static int nft_pipapo_avx2_refill(int offset, unsigned long *map,
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
u8 pg[2] = { pkt[0] >> 4, pkt[0] & 0xf };
@@ -277,8 +275,9 @@ nothing:
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
u8 pg[4] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf };
@@ -353,8 +352,9 @@ nothing:
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
u8 pg[8] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf,
pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf,
@@ -448,8 +448,9 @@ nothing:
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
u8 pg[12] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf,
pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf,
@@ -537,8 +538,9 @@ nothing:
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
u8 pg[32] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf,
pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf,
@@ -672,8 +674,9 @@ nothing:
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
unsigned long *lt = f->lt, bsize = f->bsize;
@@ -729,8 +732,9 @@ nothing:
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
unsigned long *lt = f->lt, bsize = f->bsize;
@@ -793,8 +797,9 @@ nothing:
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
unsigned long *lt = f->lt, bsize = f->bsize;
@@ -868,8 +873,9 @@ nothing:
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
unsigned long *lt = f->lt, bsize = f->bsize;
@@ -953,8 +959,9 @@ nothing:
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
unsigned long *lt = f->lt, bsize = f->bsize;
@@ -1045,8 +1052,9 @@ nothing:
* word index to be checked next (i.e. first filled word).
*/
static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
- struct nft_pipapo_field *f, int offset,
- const u8 *pkt, bool first, bool last)
+ const struct nft_pipapo_field *f,
+ int offset, const u8 *pkt,
+ bool first, bool last)
{
unsigned long bsize = f->bsize;
int i, ret = -1, b;
@@ -1120,11 +1128,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
- unsigned long *res, *fill, *scratch;
+ struct nft_pipapo_scratch *scratch;
u8 genmask = nft_genmask_cur(net);
+ const struct nft_pipapo_match *m;
+ const struct nft_pipapo_field *f;
const u8 *rp = (const u8 *)key;
- struct nft_pipapo_match *m;
- struct nft_pipapo_field *f;
+ unsigned long *res, *fill;
bool map_index;
int i, ret = 0;
@@ -1141,15 +1150,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
*/
kernel_fpu_begin_mask(0);
- scratch = *raw_cpu_ptr(m->scratch_aligned);
+ scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch)) {
kernel_fpu_end();
return false;
}
- map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
- res = scratch + (map_index ? m->bsize_max : 0);
- fill = scratch + (map_index ? 0 : m->bsize_max);
+ map_index = scratch->map_index;
+
+ res = scratch->map + (map_index ? m->bsize_max : 0);
+ fill = scratch->map + (map_index ? 0 : m->bsize_max);
/* Starting map doesn't need to be set for this implementation */
@@ -1221,7 +1231,7 @@ next_match:
out:
if (i % 2)
- raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
+ scratch->map_index = !map_index;
kernel_fpu_end();
return ret >= 0;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index baa3fea4fe65..9944fe479e53 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -234,7 +234,7 @@ static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set,
static const struct nft_rbtree_elem *
nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe, u8 genmask)
+ struct nft_rbtree_elem *rbe)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
@@ -253,7 +253,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe_prev) &&
- nft_set_elem_active(&rbe_prev->ext, genmask))
+ nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
break;
prev = rb_prev(prev);
@@ -313,6 +313,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
/* Descend the tree to search for an existing element greater than the
@@ -360,11 +361,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* perform garbage collection to avoid bogus overlap reports
* but skip new elements in this transaction.
*/
- if (nft_set_elem_expired(&rbe->ext) &&
+ if (__nft_set_elem_expired(&rbe->ext, tstamp) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) {
const struct nft_rbtree_elem *removed_end;
- removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+ removed_end = nft_rbtree_gc_elem(set, priv, rbe);
if (IS_ERR(removed_end))
return PTR_ERR(removed_end);
@@ -551,6 +552,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
while (parent != NULL) {
@@ -571,7 +573,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
- } else if (nft_set_elem_expired(&rbe->ext)) {
+ } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) {
break;
} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = parent->rb_left;
@@ -624,9 +626,10 @@ static void nft_rbtree_gc(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+ struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
struct rb_node *node, *next;
struct nft_trans_gc *gc;
- struct net *net;
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
@@ -648,7 +651,7 @@ static void nft_rbtree_gc(struct nft_set *set)
rbe_end = rbe;
continue;
}
- if (!nft_set_elem_expired(&rbe->ext))
+ if (!__nft_set_elem_expired(&rbe->ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 9ed85be79452..f30163e2ca62 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -242,6 +242,11 @@ static int nft_socket_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 13da882669a4..1d737f89dfc1 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -186,7 +186,6 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx,
break;
#endif
case NFPROTO_INET:
- case NFPROTO_BRIDGE:
err = nf_synproxy_ipv4_init(snet, ctx->net);
if (err)
goto nf_ct_failure;
@@ -219,7 +218,6 @@ static void nft_synproxy_do_destroy(const struct nft_ctx *ctx)
break;
#endif
case NFPROTO_INET:
- case NFPROTO_BRIDGE:
nf_synproxy_ipv4_fini(snet, ctx->net);
nf_synproxy_ipv6_fini(snet, ctx->net);
break;
@@ -253,6 +251,11 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_FORWARD));
}
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index ae15cd693f0e..71412adb73d4 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -316,6 +316,11 @@ static int nft_tproxy_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 9f21953c7433..f735d79d8be5 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = {
static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
.type = NFT_OBJECT_TUNNEL,
+ .family = NFPROTO_NETDEV,
.ops = &nft_tunnel_obj_ops,
.maxattr = NFTA_TUNNEL_KEY_MAX,
.policy = nft_tunnel_key_policy,
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 452f8587adda..1c866757db55 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -235,6 +235,11 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e
const struct nft_xfrm *priv = nft_expr_priv(expr);
unsigned int hooks;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
switch (priv->dir) {
case XFRM_POLICY_IN:
hooks = (1 << NF_INET_FORWARD) |
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index acef4155f0da..008419db815a 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -179,43 +179,6 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
}
EXPORT_SYMBOL_GPL(nf_route);
-static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
-{
-#ifdef CONFIG_INET
- const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
- const struct iphdr *iph = ip_hdr(skb);
-
- if (!(iph->tos == rt_info->tos &&
- skb->mark == rt_info->mark &&
- iph->daddr == rt_info->daddr &&
- iph->saddr == rt_info->saddr))
- return ip_route_me_harder(entry->state.net, entry->state.sk,
- skb, RTN_UNSPEC);
- }
-#endif
- return 0;
-}
-
-int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
-{
- const struct nf_ipv6_ops *v6ops;
- int ret = 0;
-
- switch (entry->state.pf) {
- case AF_INET:
- ret = nf_ip_reroute(skb, entry);
- break;
- case AF_INET6:
- v6ops = rcu_dereference(nf_ipv6_ops);
- if (v6ops)
- ret = v6ops->reroute(skb, entry);
- break;
- }
- return ret;
-}
-
/* Only get and check the lengths, not do any hop-by-hop stuff. */
int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen)
{
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 21624d68314f..da5d929c7c85 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1142,7 +1142,8 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
if (target->compat_from_user)
target->compat_from_user(t->data, ct->data);
else
- memcpy(t->data, ct->data, tsize - sizeof(*ct));
+ unsafe_memcpy(t->data, ct->data, tsize - sizeof(*ct),
+ /* UAPI 0-sized destination */);
tsize += off;
t->u.user.target_size = tsize;
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 27511c90a26f..1ba4f58e1d35 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -610,7 +610,7 @@ int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset)
struct netlbl_lsm_catmap *iter;
u32 idx;
u32 bit;
- NETLBL_CATMAP_MAPTYPE bitmap;
+ u64 bitmap;
iter = _netlbl_catmap_getnode(&catmap, offset, _CM_F_WALK, 0);
if (iter == NULL)
@@ -666,8 +666,8 @@ int netlbl_catmap_walkrng(struct netlbl_lsm_catmap *catmap, u32 offset)
struct netlbl_lsm_catmap *prev = NULL;
u32 idx;
u32 bit;
- NETLBL_CATMAP_MAPTYPE bitmask;
- NETLBL_CATMAP_MAPTYPE bitmap;
+ u64 bitmask;
+ u64 bitmap;
iter = _netlbl_catmap_getnode(&catmap, offset, _CM_F_WALK, 0);
if (iter == NULL)
@@ -857,7 +857,7 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap,
offset -= iter->startbit;
idx = offset / NETLBL_CATMAP_MAPSIZE;
- iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap
+ iter->bitmap[idx] |= (u64)bitmap
<< (offset % NETLBL_CATMAP_MAPSIZE);
return 0;
@@ -876,7 +876,7 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap,
* Description:
* Starting at @offset, walk the bitmap from left to right until either the
* desired bit is found or we reach the end. Return the bit offset, -1 if
- * not found, or -2 if error.
+ * not found.
*/
int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len,
u32 offset, u8 state)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4ed8ffd58ff3..7554803218a2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -130,7 +130,7 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
"nlk_cb_mutex-MAX_LINKS"
};
-static int netlink_dump(struct sock *sk);
+static int netlink_dump(struct sock *sk, bool lock_taken);
/* nl_table locking explained:
* Lookup and traversal are protected with an RCU read-side lock. Insertion
@@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group)
static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
gfp_t gfp_mask)
{
- unsigned int len = skb_end_offset(skb);
+ unsigned int len = skb->len;
struct sk_buff *new;
new = alloc_skb(len, gfp_mask);
@@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb)
if (is_vmalloc_addr(skb->head)) {
if (!skb->cloned ||
!atomic_dec_return(&(skb_shinfo(skb)->dataref)))
- vfree(skb->head);
+ vfree_atomic(skb->head);
skb->head = NULL;
}
@@ -636,7 +636,7 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
- struct mutex *cb_mutex, int protocol,
+ struct mutex *dump_cb_mutex, int protocol,
int kern)
{
struct sock *sk;
@@ -651,15 +651,11 @@ static int __netlink_create(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
nlk = nlk_sk(sk);
- if (cb_mutex) {
- nlk->cb_mutex = cb_mutex;
- } else {
- nlk->cb_mutex = &nlk->cb_def_mutex;
- mutex_init(nlk->cb_mutex);
- lockdep_set_class_and_name(nlk->cb_mutex,
+ mutex_init(&nlk->nl_cb_mutex);
+ lockdep_set_class_and_name(&nlk->nl_cb_mutex,
nlk_cb_mutex_keys + protocol,
nlk_cb_mutex_key_strings[protocol]);
- }
+ nlk->dump_cb_mutex = dump_cb_mutex;
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
@@ -1206,23 +1202,21 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
{
+ size_t head_size = SKB_HEAD_ALIGN(size);
struct sk_buff *skb;
void *data;
- if (size <= NLMSG_GOODSIZE || broadcast)
+ if (head_size <= PAGE_SIZE || broadcast)
return alloc_skb(size, GFP_KERNEL);
- size = SKB_DATA_ALIGN(size) +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-
- data = vmalloc(size);
- if (data == NULL)
+ data = kvmalloc(head_size, GFP_KERNEL);
+ if (!data)
return NULL;
- skb = __build_skb(data, size);
- if (skb == NULL)
- vfree(data);
- else
+ skb = __build_skb(data, head_size);
+ if (!skb)
+ kvfree(data);
+ else if (is_vmalloc_addr(data))
skb->destructor = netlink_skb_destructor;
return skb;
@@ -1779,6 +1773,9 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
netlink_unlock_table();
return err;
}
+ case NETLINK_LISTEN_ALL_NSID:
+ flag = NETLINK_F_LISTEN_ALL_NSID;
+ break;
case NETLINK_CAP_ACK:
flag = NETLINK_F_CAP_ACK;
break;
@@ -1987,7 +1984,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (READ_ONCE(nlk->cb_running) &&
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
- ret = netlink_dump(sk);
+ ret = netlink_dump(sk, false);
if (ret) {
WRITE_ONCE(sk->sk_err, -ret);
sk_error_report(sk);
@@ -2196,7 +2193,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
return 0;
}
-static int netlink_dump(struct sock *sk)
+static int netlink_dump(struct sock *sk, bool lock_taken)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ext_ack extack = {};
@@ -2208,7 +2205,8 @@ static int netlink_dump(struct sock *sk)
int alloc_min_size;
int alloc_size;
- mutex_lock(nlk->cb_mutex);
+ if (!lock_taken)
+ mutex_lock(&nlk->nl_cb_mutex);
if (!nlk->cb_running) {
err = -EINVAL;
goto errout_skb;
@@ -2260,14 +2258,33 @@ static int netlink_dump(struct sock *sk)
netlink_skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0) {
+ struct mutex *extra_mutex = nlk->dump_cb_mutex;
+
cb->extack = &extack;
+
+ if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED)
+ extra_mutex = NULL;
+ if (extra_mutex)
+ mutex_lock(extra_mutex);
nlk->dump_done_errno = cb->dump(skb, cb);
+ if (extra_mutex)
+ mutex_unlock(extra_mutex);
+
+ /* EMSGSIZE plus something already in the skb means
+ * that there's more to dump but current skb has filled up.
+ * If the callback really wants to return EMSGSIZE to user space
+ * it needs to do so again, on the next cb->dump() call,
+ * without putting data in the skb.
+ */
+ if (nlk->dump_done_errno == -EMSGSIZE && skb->len)
+ nlk->dump_done_errno = skb->len;
+
cb->extack = NULL;
}
if (nlk->dump_done_errno > 0 ||
skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
if (sk_filter(sk, skb))
kfree_skb(skb);
@@ -2301,13 +2318,13 @@ static int netlink_dump(struct sock *sk)
WRITE_ONCE(nlk->cb_running, false);
module = cb->module;
skb = cb->skb;
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
module_put(module);
consume_skb(skb);
return 0;
errout_skb:
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
kfree_skb(skb);
return err;
}
@@ -2330,7 +2347,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
}
nlk = nlk_sk(sk);
- mutex_lock(nlk->cb_mutex);
+ mutex_lock(&nlk->nl_cb_mutex);
/* A dump is in progress... */
if (nlk->cb_running) {
ret = -EBUSY;
@@ -2350,6 +2367,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->data = control->data;
cb->module = control->module;
cb->min_dump_alloc = control->min_dump_alloc;
+ cb->flags = control->flags;
cb->skb = skb;
cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
@@ -2365,9 +2383,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
WRITE_ONCE(nlk->cb_running, true);
nlk->dump_done_errno = INT_MAX;
- mutex_unlock(nlk->cb_mutex);
-
- ret = netlink_dump(sk);
+ ret = netlink_dump(sk, true);
sock_put(sk);
@@ -2383,7 +2399,7 @@ error_put:
module_put(control->module);
error_unlock:
sock_put(sk);
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
error_free:
kfree_skb(skb);
return ret;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 2145979b9986..9751e29d4bbb 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -39,8 +39,9 @@ struct netlink_sock {
bool cb_running;
int dump_done_errno;
struct netlink_callback cb;
- struct mutex *cb_mutex;
- struct mutex cb_def_mutex;
+ struct mutex nl_cb_mutex;
+
+ struct mutex *dump_cb_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 1eeff9422856..61981e01fd6f 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -207,7 +207,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
}
- return err < 0 ? err : skb->len;
+ return err <= 0 ? err : skb->len;
}
static int netlink_diag_dump_done(struct netlink_callback *cb)
@@ -241,6 +241,7 @@ static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler netlink_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_NETLINK,
.dump = netlink_diag_handler_dump,
};
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 8c7af02f8454..3b7666944b11 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1232,7 +1232,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq,
hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd);
if (hdr == NULL)
- return -1;
+ return -EMSGSIZE;
if (nla_put_string(skb, CTRL_ATTR_FAMILY_NAME, family->name) ||
nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id) ||
@@ -1355,6 +1355,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
struct net *net = sock_net(skb->sk);
int fams_to_skip = cb->args[0];
unsigned int id;
+ int err = 0;
idr_for_each_entry(&genl_fam_idr, rt, id) {
if (!rt->netnsok && !net_eq(net, &init_net))
@@ -1363,16 +1364,17 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
if (n++ < fams_to_skip)
continue;
- if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- skb, CTRL_CMD_NEWFAMILY) < 0) {
+ err = ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ skb, CTRL_CMD_NEWFAMILY);
+ if (err) {
n--;
break;
}
}
cb->args[0] = n;
- return skb->len;
+ return err;
}
static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family,
@@ -1836,6 +1838,9 @@ static int genl_bind(struct net *net, int group)
!ns_capable(net->user_ns, CAP_SYS_ADMIN))
ret = -EPERM;
+ if (family->bind)
+ family->bind(i);
+
break;
}
@@ -1843,12 +1848,39 @@ static int genl_bind(struct net *net, int group)
return ret;
}
+static void genl_unbind(struct net *net, int group)
+{
+ const struct genl_family *family;
+ unsigned int id;
+
+ down_read(&cb_lock);
+
+ idr_for_each_entry(&genl_fam_idr, family, id) {
+ int i;
+
+ if (family->n_mcgrps == 0)
+ continue;
+
+ i = group - family->mcgrp_offset;
+ if (i < 0 || i >= family->n_mcgrps)
+ continue;
+
+ if (family->unbind)
+ family->unbind(i);
+
+ break;
+ }
+
+ up_read(&cb_lock);
+}
+
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
.bind = genl_bind,
+ .unbind = genl_unbind,
.release = genl_release,
};
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 0eed00184adf..104a80b75477 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -453,16 +453,16 @@ static int nr_create(struct net *net, struct socket *sock, int protocol,
nr_init_timers(sk);
nr->t1 =
- msecs_to_jiffies(sysctl_netrom_transport_timeout);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout));
nr->t2 =
- msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay));
nr->n2 =
- msecs_to_jiffies(sysctl_netrom_transport_maximum_tries);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries));
nr->t4 =
- msecs_to_jiffies(sysctl_netrom_transport_busy_delay);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay));
nr->idle =
- msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout);
- nr->window = sysctl_netrom_transport_requested_window_size;
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout));
+ nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size);
nr->bpqext = 1;
nr->state = NR_STATE_0;
@@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
* G8PZT's Xrouter which is sending packets with command type 7
* as an extension of the protocol.
*/
- if (sysctl_netrom_reset_circuit &&
+ if (READ_ONCE(sysctl_netrom_reset_circuit) &&
(frametype != NR_RESET || flags != 0))
nr_transmit_reset(skb, 1);
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 3aaac4a22b38..2c34389c3ce6 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev,
buff[6] |= AX25_SSSID_SPARE;
buff += AX25_ADDR_LEN;
- *buff++ = sysctl_netrom_network_ttl_initialiser;
+ *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
*buff++ = NR_PROTO_IP;
*buff++ = NR_PROTO_IP;
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 2f084b6f69d7..97944db6b5ac 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
@@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
@@ -262,7 +262,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 44929657f5b7..5e531394a724 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb)
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
if (!nr_route_frame(skb, NULL)) {
kfree_skb(skb);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index baea3cbd76ca..70480869ad1c 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
nr_neigh->digipeat = NULL;
nr_neigh->ax25 = NULL;
nr_neigh->dev = dev;
- nr_neigh->quality = sysctl_netrom_default_path_quality;
+ nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality);
nr_neigh->locked = 0;
nr_neigh->count = 0;
nr_neigh->number = nr_neigh_no++;
@@ -728,7 +728,7 @@ void nr_link_failed(ax25_cb *ax25, int reason)
nr_neigh->ax25 = NULL;
ax25_cb_put(ax25);
- if (++nr_neigh->failed < sysctl_netrom_link_fails_count) {
+ if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) {
nr_neigh_put(nr_neigh);
return;
}
@@ -766,7 +766,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
if (ax25 != NULL) {
ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat,
ax25->ax25_dev->dev, 0,
- sysctl_netrom_obsolescence_count_initialiser);
+ READ_ONCE(sysctl_netrom_obsolescence_count_initialiser));
if (ret)
return ret;
}
@@ -780,7 +780,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
return ret;
}
- if (!sysctl_netrom_routing_control && ax25 != NULL)
+ if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL)
return 0;
/* Its Time-To-Live has expired */
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index e2d2af924cff..c3bbd5880850 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype)
*dptr++ = nr->my_id;
*dptr++ = frametype;
*dptr++ = nr->window;
- if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ if (nr->bpqext)
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
break;
case NR_DISCREQ:
@@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
if (mine) {
*dptr++ = 0;
diff --git a/net/nfc/core.c b/net/nfc/core.c
index eb2c0959e5b6..e58dc6405054 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1015,7 +1015,7 @@ static void nfc_check_pres_timeout(struct timer_list *t)
schedule_work(&dev->check_pres_work);
}
-struct class nfc_class = {
+const struct class nfc_class = {
.name = "nfc",
.dev_release = nfc_release,
};
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 2140f6724644..ba91284f4086 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -30,15 +30,19 @@ exit:
return r;
}
+static void nfc_llc_del_engine(struct nfc_llc_engine *llc_engine)
+{
+ list_del(&llc_engine->entry);
+ kfree_const(llc_engine->name);
+ kfree(llc_engine);
+}
+
void nfc_llc_exit(void)
{
struct nfc_llc_engine *llc_engine, *n;
- list_for_each_entry_safe(llc_engine, n, &llc_engines, entry) {
- list_del(&llc_engine->entry);
- kfree(llc_engine->name);
- kfree(llc_engine);
- }
+ list_for_each_entry_safe(llc_engine, n, &llc_engines, entry)
+ nfc_llc_del_engine(llc_engine);
}
int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops)
@@ -49,7 +53,7 @@ int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops)
if (llc_engine == NULL)
return -ENOMEM;
- llc_engine->name = kstrdup(name, GFP_KERNEL);
+ llc_engine->name = kstrdup_const(name, GFP_KERNEL);
if (llc_engine->name == NULL) {
kfree(llc_engine);
return -ENOMEM;
@@ -82,9 +86,7 @@ void nfc_llc_unregister(const char *name)
if (llc_engine == NULL)
return;
- list_del(&llc_engine->entry);
- kfree(llc_engine->name);
- kfree(llc_engine);
+ nfc_llc_del_engine(llc_engine);
}
struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev,
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 97348cedb16b..cdad47b140fa 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1208,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev)
{
nfc_free_device(ndev->nfc_dev);
nci_hci_deallocate(ndev);
+
+ /* drop partial rx data packet if present */
+ if (ndev->rx_data_reassembly)
+ kfree_skb(ndev->rx_data_reassembly);
kfree(ndev);
}
EXPORT_SYMBOL(nci_free_device);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 88965e2068ac..ebc5728aab4e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@ struct ovs_len_tbl {
#define OVS_ATTR_NESTED -1
#define OVS_ATTR_VARIABLE -2
+#define OVS_COPY_ACTIONS_MAX_DEPTH 16
static bool actions_may_change_flow(const struct nlattr *actions)
{
@@ -2545,13 +2546,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log);
+ u32 mpls_label_count, bool log,
+ u32 depth);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
+ u32 mpls_label_count, bool log, bool last,
+ u32 depth)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -2602,7 +2605,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
return err;
err = __ovs_nla_copy_actions(net, actions, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -2617,7 +2621,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
+ u32 mpls_label_count, bool log,
+ u32 depth)
{
const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
int start, action_start, err, rem;
@@ -2660,7 +2665,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
return action_start;
err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
- vlan_tci, mpls_label_count, log);
+ vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -2674,7 +2680,8 @@ static int validate_and_copy_clone(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
+ u32 mpls_label_count, bool log, bool last,
+ u32 depth)
{
int start, err;
u32 exec;
@@ -2694,7 +2701,8 @@ static int validate_and_copy_clone(struct net *net,
return err;
err = __ovs_nla_copy_actions(net, attr, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3063,7 +3071,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
u32 mpls_label_count,
- bool log, bool last)
+ bool log, bool last, u32 depth)
{
const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
@@ -3111,7 +3119,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3124,7 +3133,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3152,12 +3162,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
+ u32 mpls_label_count, bool log,
+ u32 depth)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;
+ if (depth > OVS_COPY_ACTIONS_MAX_DEPTH)
+ return -EOVERFLOW;
+
nla_for_each_nested(a, attr, rem) {
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@@ -3355,7 +3369,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_sample(net, a, key, sfa,
eth_type, vlan_tci,
mpls_label_count,
- log, last);
+ log, last, depth);
if (err)
return err;
skip_copy = true;
@@ -3426,7 +3440,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_clone(net, a, key, sfa,
eth_type, vlan_tci,
mpls_label_count,
- log, last);
+ log, last, depth);
if (err)
return err;
skip_copy = true;
@@ -3440,7 +3454,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
eth_type,
vlan_tci,
mpls_label_count,
- log, last);
+ log, last,
+ depth);
if (err)
return err;
skip_copy = true;
@@ -3450,7 +3465,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_DEC_TTL:
err = validate_and_copy_dec_ttl(net, a, key, sfa,
eth_type, vlan_tci,
- mpls_label_count, log);
+ mpls_label_count, log,
+ depth);
if (err)
return err;
skip_copy = true;
@@ -3495,7 +3511,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
- key->eth.vlan.tci, mpls_label_count, log);
+ key->eth.vlan.tci, mpls_label_count, log,
+ 0);
if (err)
ovs_nla_free_flow_actions(*sfa);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c9bbc2686690..61270826b9ac 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2057,7 +2057,7 @@ retry:
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
-
+ skb->mono_delivery_time = !!skb->tstamp;
skb_setup_tx_timestamp(skb, sockc.tsflags);
if (unlikely(extra_len == 4))
@@ -2318,7 +2318,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
if (po->tp_version <= TPACKET_V2) {
if (macoff + snaplen > po->rx_ring.frame_size) {
- if (po->copy_thresh &&
+ if (READ_ONCE(po->copy_thresh) &&
atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
if (skb_shared(skb)) {
copy_skb = skb_clone(skb, GFP_ATOMIC);
@@ -2586,6 +2586,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->priority = READ_ONCE(po->sk.sk_priority);
skb->mark = READ_ONCE(po->sk.sk_mark);
skb->tstamp = sockc->transmit_time;
+ skb->mono_delivery_time = !!skb->tstamp;
skb_setup_tx_timestamp(skb, sockc->tsflags);
skb_zcopy_set_nouarg(skb, ph.raw);
@@ -3064,6 +3065,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time;
+ skb->mono_delivery_time = !!skb->tstamp;
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -3834,7 +3836,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- pkt_sk(sk)->copy_thresh = val;
+ WRITE_ONCE(pkt_sk(sk)->copy_thresh, val);
return 0;
}
case PACKET_VERSION:
@@ -4088,6 +4090,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
case PACKET_VNET_HDR_SZ:
val = READ_ONCE(po->vnet_hdr_sz);
break;
+ case PACKET_COPY_THRESH:
+ val = READ_ONCE(pkt_sk(sk)->copy_thresh);
+ break;
case PACKET_VERSION:
val = po->tp_version;
break;
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 9a7980e3309d..47f69f3dbf73 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -17,7 +17,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
pinfo.pdi_index = po->ifindex;
pinfo.pdi_version = po->tp_version;
pinfo.pdi_reserve = po->tp_reserve;
- pinfo.pdi_copy_thresh = po->copy_thresh;
+ pinfo.pdi_copy_thresh = READ_ONCE(po->copy_thresh);
pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp);
pinfo.pdi_flags = 0;
@@ -245,6 +245,7 @@ static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler packet_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_PACKET,
.dump = packet_diag_handler_dump,
};
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 3aa50dc7535b..976fe250b509 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -34,10 +34,10 @@ static int pn_ioctl(struct sock *sk, int cmd, int *karg)
switch (cmd) {
case SIOCINQ:
- lock_sock(sk);
+ spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
*karg = skb ? skb->len : 0;
- release_sock(sk);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
return 0;
case SIOCPNADDRESOURCE:
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index faba31f2eff2..3dd5f52bc1b5 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
return 0;
}
+static unsigned int pep_first_packet_length(struct sock *sk)
+{
+ struct pep_sock *pn = pep_sk(sk);
+ struct sk_buff_head *q;
+ struct sk_buff *skb;
+ unsigned int len = 0;
+ bool found = false;
+
+ if (sock_flag(sk, SOCK_URGINLINE)) {
+ q = &pn->ctrlreq_queue;
+ spin_lock_bh(&q->lock);
+ skb = skb_peek(q);
+ if (skb) {
+ len = skb->len;
+ found = true;
+ }
+ spin_unlock_bh(&q->lock);
+ }
+
+ if (likely(!found)) {
+ q = &sk->sk_receive_queue;
+ spin_lock_bh(&q->lock);
+ skb = skb_peek(q);
+ if (skb)
+ len = skb->len;
+ spin_unlock_bh(&q->lock);
+ }
+
+ return len;
+}
+
static int pep_ioctl(struct sock *sk, int cmd, int *karg)
{
struct pep_sock *pn = pep_sk(sk);
@@ -929,15 +960,7 @@ static int pep_ioctl(struct sock *sk, int cmd, int *karg)
break;
}
- lock_sock(sk);
- if (sock_flag(sk, SOCK_URGINLINE) &&
- !skb_queue_empty(&pn->ctrlreq_queue))
- *karg = skb_peek(&pn->ctrlreq_queue)->len;
- else if (!skb_queue_empty(&sk->sk_receive_queue))
- *karg = skb_peek(&sk->sk_receive_queue)->len;
- else
- *karg = 0;
- release_sock(sk);
+ *karg = pep_first_packet_length(sk);
ret = 0;
break;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 01c4cdfef45d..8435a20968ef 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -419,7 +419,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval,
rs->rs_rx_traces = trace.rx_traces;
for (i = 0; i < rs->rs_rx_traces; i++) {
- if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
+ if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) {
rs->rs_rx_traces = 0;
return -EFAULT;
}
diff --git a/net/rds/connection.c b/net/rds/connection.c
index b4cc699c5fad..c749c5525b40 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -829,9 +829,7 @@ int rds_conn_init(void)
if (ret)
return ret;
- rds_conn_slab = kmem_cache_create("rds_connection",
- sizeof(struct rds_connection),
- 0, 0, NULL);
+ rds_conn_slab = KMEM_CACHE(rds_connection, 0);
if (!rds_conn_slab) {
rds_loop_net_exit();
return -ENOMEM;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index fba82d36593a..a4e3c5de998b 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
kfree(sg);
}
ret = PTR_ERR(trans_private);
+ /* Trigger connection so that its ready for the next retry */
+ if (ret == -ENODEV)
+ rds_conn_connect_if_down(cp->cp_conn);
goto out;
}
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c71b923764fd..5627f80013f8 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -425,6 +425,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
struct sock *sk = rds_rs_to_sk(rs);
int ret = 0;
unsigned long flags;
+ struct rds_incoming *to_drop = NULL;
write_lock_irqsave(&rs->rs_recv_lock, flags);
if (!list_empty(&inc->i_item)) {
@@ -435,11 +436,14 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
-be32_to_cpu(inc->i_hdr.h_len),
inc->i_hdr.h_dport);
list_del_init(&inc->i_item);
- rds_inc_put(inc);
+ to_drop = inc;
}
}
write_unlock_irqrestore(&rs->rs_recv_lock, flags);
+ if (to_drop)
+ rds_inc_put(to_drop);
+
rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop);
return ret;
}
@@ -758,16 +762,21 @@ void rds_clear_recv_queue(struct rds_sock *rs)
struct sock *sk = rds_rs_to_sk(rs);
struct rds_incoming *inc, *tmp;
unsigned long flags;
+ LIST_HEAD(to_drop);
write_lock_irqsave(&rs->rs_recv_lock, flags);
list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) {
rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
-be32_to_cpu(inc->i_hdr.h_len),
inc->i_hdr.h_dport);
+ list_move(&inc->i_item, &to_drop);
+ }
+ write_unlock_irqrestore(&rs->rs_recv_lock, flags);
+
+ list_for_each_entry_safe(inc, tmp, &to_drop, i_item) {
list_del_init(&inc->i_item);
rds_inc_put(inc);
}
- write_unlock_irqrestore(&rs->rs_recv_lock, flags);
}
/*
diff --git a/net/rds/send.c b/net/rds/send.c
index 5e57a1581dc6..2899def23865 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1313,12 +1313,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* Parse any control messages the user may have included. */
ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct);
- if (ret) {
- /* Trigger connection so that its ready for the next retry */
- if (ret == -EAGAIN)
- rds_conn_connect_if_down(conn);
+ if (ret)
goto out;
- }
if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 465bfe5eb061..5222bc97d192 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -487,7 +487,7 @@ EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
* rxrpc_kernel_set_max_life - Set maximum lifespan on a call
* @sock: The socket the call is on
* @call: The call to configure
- * @hard_timeout: The maximum lifespan of the call in jiffies
+ * @hard_timeout: The maximum lifespan of the call in ms
*
* Set the maximum lifespan of a call. The call will end with ETIME or
* ETIMEDOUT if it takes longer than this.
@@ -495,14 +495,14 @@ EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
void rxrpc_kernel_set_max_life(struct socket *sock, struct rxrpc_call *call,
unsigned long hard_timeout)
{
- unsigned long now;
+ ktime_t delay = ms_to_ktime(hard_timeout), expect_term_by;
mutex_lock(&call->user_mutex);
- now = jiffies;
- hard_timeout += now;
- WRITE_ONCE(call->expect_term_by, hard_timeout);
- rxrpc_reduce_call_timer(call, hard_timeout, now, rxrpc_timer_set_for_hard);
+ expect_term_by = ktime_add(ktime_get_real(), delay);
+ WRITE_ONCE(call->expect_term_by, expect_term_by);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard);
+ rxrpc_poke_call(call, rxrpc_call_poke_set_timeout);
mutex_unlock(&call->user_mutex);
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index dbeb75c29857..08c0a32db8c7 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -198,12 +198,23 @@ struct rxrpc_host_header {
* - max 48 bytes (struct sk_buff::cb)
*/
struct rxrpc_skb_priv {
- struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
- u16 offset; /* Offset of data */
- u16 len; /* Length of data */
- u8 flags;
+ union {
+ struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
+ struct {
+ u16 offset; /* Offset of data */
+ u16 len; /* Length of data */
+ u8 flags;
#define RXRPC_RX_VERIFIED 0x01
-
+ };
+ struct {
+ rxrpc_seq_t first_ack; /* First packet in acks table */
+ rxrpc_seq_t prev_ack; /* Highest seq seen */
+ rxrpc_serial_t acked_serial; /* Packet in response to (or 0) */
+ u8 reason; /* Reason for ack */
+ u8 nr_acks; /* Number of acks+nacks */
+ u8 nr_nacks; /* Number of nacks */
+ } ack;
+ };
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
};
@@ -240,10 +251,9 @@ struct rxrpc_security {
struct rxrpc_key_token *);
/* Work out how much data we can store in a packet, given an estimate
- * of the amount of data remaining.
+ * of the amount of data remaining and allocate a data buffer.
*/
- int (*how_much_data)(struct rxrpc_call *, size_t,
- size_t *, size_t *, size_t *);
+ struct rxrpc_txbuf *(*alloc_txbuf)(struct rxrpc_call *call, size_t remaining, gfp_t gfp);
/* impose security on a packet */
int (*secure_packet)(struct rxrpc_call *, struct rxrpc_txbuf *);
@@ -284,6 +294,7 @@ struct rxrpc_local {
struct socket *socket; /* my UDP socket */
struct task_struct *io_thread;
struct completion io_thread_ready; /* Indication that the I/O thread started */
+ struct page_frag_cache tx_alloc; /* Tx control packet allocation (I/O thread only) */
struct rxrpc_sock *service; /* Service(s) listening on this endpoint */
#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
struct sk_buff_head rx_delay_queue; /* Delay injection queue */
@@ -344,8 +355,8 @@ struct rxrpc_peer {
u32 mdev_us; /* medium deviation */
u32 mdev_max_us; /* maximal mdev for the last rtt period */
u32 rttvar_us; /* smoothed mdev_max */
- u32 rto_j; /* Retransmission timeout in jiffies */
- u8 backoff; /* Backoff timeout */
+ u32 rto_us; /* Retransmission timeout in usec */
+ u8 backoff; /* Backoff timeout (as shift) */
u8 cong_ssthresh; /* Congestion slow-start threshold */
};
@@ -492,6 +503,8 @@ struct rxrpc_connection {
struct list_head proc_link; /* link in procfs list */
struct list_head link; /* link in master connection list */
struct sk_buff_head rx_queue; /* received conn-level packets */
+ struct page_frag_cache tx_data_alloc; /* Tx DATA packet allocation */
+ struct mutex tx_data_alloc_lock;
struct mutex security_lock; /* Lock for security management */
const struct rxrpc_security *security; /* applied security module */
@@ -510,7 +523,7 @@ struct rxrpc_connection {
enum rxrpc_call_completion completion; /* Completion condition */
s32 abort_code; /* Abort code of connection abort */
int debug_id; /* debug ID for printks */
- atomic_t serial; /* packet serial number counter */
+ rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
u32 service_id; /* Service ID, possibly upgraded */
u32 security_level; /* Security level selected */
@@ -610,17 +623,17 @@ struct rxrpc_call {
const struct rxrpc_security *security; /* applied security module */
struct mutex user_mutex; /* User access mutex */
struct sockaddr_rxrpc dest_srx; /* Destination address */
- unsigned long delay_ack_at; /* When DELAY ACK needs to happen */
- unsigned long ack_lost_at; /* When ACK is figured as lost */
- unsigned long resend_at; /* When next resend needs to happen */
- unsigned long ping_at; /* When next to send a ping */
- unsigned long keepalive_at; /* When next to send a keepalive ping */
- unsigned long expect_rx_by; /* When we expect to get a packet by */
- unsigned long expect_req_by; /* When we expect to get a request DATA packet by */
- unsigned long expect_term_by; /* When we expect call termination by */
- u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
- u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
- u32 hard_timo; /* Maximum lifetime or 0 (jif) */
+ ktime_t delay_ack_at; /* When DELAY ACK needs to happen */
+ ktime_t ack_lost_at; /* When ACK is figured as lost */
+ ktime_t resend_at; /* When next resend needs to happen */
+ ktime_t ping_at; /* When next to send a ping */
+ ktime_t keepalive_at; /* When next to send a keepalive ping */
+ ktime_t expect_rx_by; /* When we expect to get a packet by */
+ ktime_t expect_req_by; /* When we expect to get a request DATA packet by */
+ ktime_t expect_term_by; /* When we expect call termination by */
+ u32 next_rx_timo; /* Timeout for next Rx packet (ms) */
+ u32 next_req_timo; /* Timeout for next Rx request packet (ms) */
+ u32 hard_timo; /* Maximum lifetime or 0 (s) */
struct timer_list timer; /* Combined event timer */
struct work_struct destroyer; /* In-process-context destroyer */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
@@ -665,7 +678,7 @@ struct rxrpc_call {
rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */
rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
- u16 tx_backoff; /* Delay to insert due to Tx failure */
+ u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */
u8 tx_winsize; /* Maximum size of Tx window */
#define RXRPC_TX_MAX_WINDOW 128
ktime_t tx_last_sent; /* Last time a transmission occurred */
@@ -692,11 +705,11 @@ struct rxrpc_call {
u8 cong_dup_acks; /* Count of ACKs showing missing packets */
u8 cong_cumul_acks; /* Cumulative ACK count */
ktime_t cong_tstamp; /* Last time cwnd was changed */
+ struct sk_buff *cong_last_nack; /* Last ACK with nacks received */
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
u16 ackr_sack_base; /* Starting slot in SACK table ring */
- rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
rxrpc_seq_t ackr_window; /* Base of SACK window */
rxrpc_seq_t ackr_wtop; /* Base of SACK window */
unsigned int ackr_nr_unacked; /* Number of unacked packets */
@@ -730,7 +743,8 @@ struct rxrpc_call {
struct rxrpc_ack_summary {
u16 nr_acks; /* Number of ACKs in packet */
u16 nr_new_acks; /* Number of new ACKs in packet */
- u16 nr_rot_new_acks; /* Number of rotated new ACKs */
+ u16 nr_new_nacks; /* Number of new nacks in packet */
+ u16 nr_retained_nacks; /* Number of nacks retained between ACKs */
u8 ack_reason;
bool saw_nacks; /* Saw NACKs in packet */
bool new_low_nack; /* T if new low NACK found */
@@ -779,40 +793,30 @@ struct rxrpc_send_params {
* Buffer of data to be output as a packet.
*/
struct rxrpc_txbuf {
- struct rcu_head rcu;
struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */
struct list_head tx_link; /* Link in live Enc queue or Tx queue */
ktime_t last_sent; /* Time at which last transmitted */
refcount_t ref;
rxrpc_seq_t seq; /* Sequence number of this packet */
+ rxrpc_serial_t serial; /* Last serial number transmitted with */
unsigned int call_debug_id;
unsigned int debug_id;
unsigned int len; /* Amount of data in buffer */
unsigned int space; /* Remaining data space */
unsigned int offset; /* Offset of fill point */
- unsigned long flags;
-#define RXRPC_TXBUF_LAST 0 /* Set if last packet in Tx phase */
-#define RXRPC_TXBUF_RESENT 1 /* Set if has been resent */
+ unsigned int flags;
+#define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */
+#define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */
+ __be16 cksum; /* Checksum to go in header */
+ unsigned short ack_rwind; /* ACK receive window */
u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */
- struct {
- /* The packet for encrypting and DMA'ing. We align it such
- * that data[] aligns correctly for any crypto blocksize.
- */
- u8 pad[64 - sizeof(struct rxrpc_wire_header)];
- struct rxrpc_wire_header wire; /* Network-ready header */
- union {
- u8 data[RXRPC_JUMBO_DATALEN]; /* Data packet */
- struct {
- struct rxrpc_ackpacket ack;
- DECLARE_FLEX_ARRAY(u8, acks);
- };
- };
- } __aligned(64);
+ u8 nr_kvec; /* Amount of kvec[] used */
+ struct kvec kvec[3];
};
static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb)
{
- return txb->wire.flags & RXRPC_CLIENT_INITIATED;
+ return txb->flags & RXRPC_CLIENT_INITIATED;
}
static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
@@ -823,6 +827,20 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
#include <trace/events/rxrpc.h>
/*
+ * Allocate the next serial number on a connection. 0 must be skipped.
+ */
+static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn)
+{
+ rxrpc_serial_t serial;
+
+ serial = conn->tx_serial;
+ if (serial == 0)
+ serial = 1;
+ conn->tx_serial = serial + 1;
+ return serial;
+}
+
+/*
* af_rxrpc.c
*/
extern atomic_t rxrpc_n_rx_skbs;
@@ -846,17 +864,11 @@ int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
*/
void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
enum rxrpc_propose_ack_trace why);
-void rxrpc_send_ACK(struct rxrpc_call *, u8, rxrpc_serial_t, enum rxrpc_propose_ack_trace);
void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
enum rxrpc_propose_ack_trace);
void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *);
void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb);
-void rxrpc_reduce_call_timer(struct rxrpc_call *call,
- unsigned long expire_at,
- unsigned long now,
- enum rxrpc_timer_trace why);
-
bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
/*
@@ -1137,9 +1149,9 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
/*
* output.c
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
+void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
+ rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why);
int rxrpc_send_abort_packet(struct rxrpc_call *);
-int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
void rxrpc_send_keepalive(struct rxrpc_peer *);
@@ -1200,7 +1212,7 @@ static inline int rxrpc_abort_eproto(struct rxrpc_call *call,
*/
void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int,
rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
-unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool);
+ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans);
void rxrpc_peer_init_rtt(struct rxrpc_peer *);
/*
@@ -1272,8 +1284,9 @@ static inline void rxrpc_sysctl_exit(void) {}
* txbuf.c
*/
extern atomic_t rxrpc_nr_txbuf;
-struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
- gfp_t gfp);
+struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size,
+ size_t data_align, gfp_t gfp);
+struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size);
void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index e363f21a2014..7bbb68504766 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -23,14 +23,14 @@
void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
enum rxrpc_propose_ack_trace why)
{
- unsigned long now = jiffies;
- unsigned long ping_at = now + rxrpc_idle_ack_delay;
-
- if (time_before(ping_at, call->ping_at)) {
- WRITE_ONCE(call->ping_at, ping_at);
- rxrpc_reduce_call_timer(call, ping_at, now,
- rxrpc_timer_set_for_ping);
- trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial);
+ ktime_t delay = ms_to_ktime(READ_ONCE(rxrpc_idle_ack_delay));
+ ktime_t now = ktime_get_real();
+ ktime_t ping_at = ktime_add(now, delay);
+
+ trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial);
+ if (ktime_before(ping_at, call->ping_at)) {
+ call->ping_at = ping_at;
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_ping);
}
}
@@ -40,64 +40,18 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
enum rxrpc_propose_ack_trace why)
{
- unsigned long expiry = rxrpc_soft_ack_delay;
- unsigned long now = jiffies, ack_at;
-
- call->ackr_serial = serial;
-
- if (rxrpc_soft_ack_delay < expiry)
- expiry = rxrpc_soft_ack_delay;
- if (call->peer->srtt_us != 0)
- ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3);
- else
- ack_at = expiry;
-
- ack_at += READ_ONCE(call->tx_backoff);
- ack_at += now;
- if (time_before(ack_at, call->delay_ack_at)) {
- WRITE_ONCE(call->delay_ack_at, ack_at);
- rxrpc_reduce_call_timer(call, ack_at, now,
- rxrpc_timer_set_for_ack);
- }
+ ktime_t now = ktime_get_real(), delay;
trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial);
-}
-
-/*
- * Queue an ACK for immediate transmission.
- */
-void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
- rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
-{
- struct rxrpc_txbuf *txb;
-
- if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
- return;
- rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
-
- txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK,
- rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS);
- if (!txb) {
- kleave(" = -ENOMEM");
- return;
- }
+ if (call->peer->srtt_us)
+ delay = (call->peer->srtt_us >> 3) * NSEC_PER_USEC;
+ else
+ delay = ms_to_ktime(READ_ONCE(rxrpc_soft_ack_delay));
+ ktime_add_ms(delay, call->tx_backoff);
- txb->ack_why = why;
- txb->wire.seq = 0;
- txb->wire.type = RXRPC_PACKET_TYPE_ACK;
- txb->wire.flags |= RXRPC_SLOW_START_OK;
- txb->ack.bufferSpace = 0;
- txb->ack.maxSkew = 0;
- txb->ack.firstPacket = 0;
- txb->ack.previousPacket = 0;
- txb->ack.serial = htonl(serial);
- txb->ack.reason = ack_reason;
- txb->ack.nAcks = 0;
-
- trace_rxrpc_send_ack(call, why, ack_reason, serial);
- rxrpc_send_ack_packet(call, txb);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
+ call->delay_ack_at = ktime_add(now, delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_delayed_ack);
}
/*
@@ -114,26 +68,21 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
{
struct rxrpc_ackpacket *ack = NULL;
+ struct rxrpc_skb_priv *sp;
struct rxrpc_txbuf *txb;
- unsigned long resend_at;
- rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
- ktime_t now, max_age, oldest, ack_ts;
- bool unacked = false;
+ rxrpc_seq_t transmitted = call->tx_transmitted;
+ ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC);
+ ktime_t resend_at = KTIME_MAX, now, delay;
+ bool unacked = false, did_send = false;
unsigned int i;
- LIST_HEAD(retrans_queue);
_enter("{%d,%d}", call->acks_hard_ack, call->tx_top);
now = ktime_get_real();
- max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
- oldest = now;
if (list_empty(&call->tx_buffer))
goto no_resend;
- if (list_empty(&call->tx_buffer))
- goto no_further_resend;
-
trace_rxrpc_resend(call, ack_skb);
txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link);
@@ -141,14 +90,15 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
* explicitly NAK'd packets.
*/
if (ack_skb) {
+ sp = rxrpc_skb(ack_skb);
ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
- for (i = 0; i < ack->nAcks; i++) {
+ for (i = 0; i < sp->ack.nr_acks; i++) {
rxrpc_seq_t seq;
if (ack->acks[i] & 1)
continue;
- seq = ntohl(ack->firstPacket) + i;
+ seq = sp->ack.first_ack + i;
if (after(txb->seq, transmitted))
break;
if (after(txb->seq, seq))
@@ -160,19 +110,23 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
goto no_further_resend;
found_txb:
- if (after(ntohl(txb->wire.serial), call->acks_highest_serial))
+ resend_at = ktime_add(txb->last_sent, rto);
+ if (after(txb->serial, call->acks_highest_serial)) {
+ if (ktime_after(resend_at, now) &&
+ ktime_before(resend_at, next_resend))
+ next_resend = resend_at;
continue; /* Ack point not yet reached */
+ }
rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked);
- if (list_empty(&txb->tx_link)) {
- list_add_tail(&txb->tx_link, &retrans_queue);
- set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
- }
+ trace_rxrpc_retransmit(call, txb->seq, txb->serial,
+ ktime_sub(resend_at, now));
- trace_rxrpc_retransmit(call, txb->seq,
- ktime_to_ns(ktime_sub(txb->last_sent,
- max_age)));
+ txb->flags |= RXRPC_TXBUF_RESENT;
+ rxrpc_transmit_one(call, txb);
+ did_send = true;
+ now = ktime_get_real();
if (list_is_last(&txb->call_link, &call->tx_buffer))
goto no_further_resend;
@@ -184,43 +138,46 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
* seen. Anything between the soft-ACK table and that point will get
* ACK'd or NACK'd in due course, so don't worry about it here; here we
* need to consider retransmitting anything beyond that point.
- *
- * Note that ACK for a packet can beat the update of tx_transmitted.
*/
- if (after_eq(READ_ONCE(call->acks_prev_seq), READ_ONCE(call->tx_transmitted)))
+ if (after_eq(call->acks_prev_seq, call->tx_transmitted))
goto no_further_resend;
list_for_each_entry_from(txb, &call->tx_buffer, call_link) {
- if (before_eq(txb->seq, READ_ONCE(call->acks_prev_seq)))
+ resend_at = ktime_add(txb->last_sent, rto);
+
+ if (before_eq(txb->seq, call->acks_prev_seq))
continue;
- if (after(txb->seq, READ_ONCE(call->tx_transmitted)))
+ if (after(txb->seq, call->tx_transmitted))
break; /* Not transmitted yet */
if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE &&
- before(ntohl(txb->wire.serial), ntohl(ack->serial)))
+ before(txb->serial, ntohl(ack->serial)))
goto do_resend; /* Wasn't accounted for by a more recent ping. */
- if (ktime_after(txb->last_sent, max_age)) {
- if (ktime_before(txb->last_sent, oldest))
- oldest = txb->last_sent;
+ if (ktime_after(resend_at, now)) {
+ if (ktime_before(resend_at, next_resend))
+ next_resend = resend_at;
continue;
}
do_resend:
unacked = true;
- if (list_empty(&txb->tx_link)) {
- list_add_tail(&txb->tx_link, &retrans_queue);
- set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
- rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
- }
+
+ txb->flags |= RXRPC_TXBUF_RESENT;
+ rxrpc_transmit_one(call, txb);
+ did_send = true;
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
+ now = ktime_get_real();
}
no_further_resend:
no_resend:
- resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
- resend_at += jiffies + rxrpc_get_rto_backoff(call->peer,
- !list_empty(&retrans_queue));
- WRITE_ONCE(call->resend_at, resend_at);
+ if (resend_at < KTIME_MAX) {
+ delay = rxrpc_get_rto_backoff(call->peer, did_send);
+ resend_at = ktime_add(resend_at, delay);
+ trace_rxrpc_timer_set(call, resend_at - now, rxrpc_timer_trace_resend_reset);
+ }
+ call->resend_at = resend_at;
if (unacked)
rxrpc_congestion_timeout(call);
@@ -229,25 +186,15 @@ no_resend:
* that an ACK got lost somewhere. Send a ping to find out instead of
* retransmitting data.
*/
- if (list_empty(&retrans_queue)) {
- rxrpc_reduce_call_timer(call, resend_at, jiffies,
- rxrpc_timer_set_for_resend);
- ack_ts = ktime_sub(now, call->acks_latest_ts);
- if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3))
- goto out;
- rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
- rxrpc_propose_ack_ping_for_lost_ack);
- goto out;
- }
+ if (!did_send) {
+ ktime_t next_ping = ktime_add_us(call->acks_latest_ts,
+ call->peer->srtt_us >> 3);
- /* Retransmit the queue */
- while ((txb = list_first_entry_or_null(&retrans_queue,
- struct rxrpc_txbuf, tx_link))) {
- list_del_init(&txb->tx_link);
- rxrpc_transmit_one(call, txb);
+ if (ktime_sub(next_ping, now) <= 0)
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_0_retrans);
}
-out:
_leave("");
}
@@ -257,13 +204,11 @@ out:
*/
static void rxrpc_begin_service_reply(struct rxrpc_call *call)
{
- unsigned long now = jiffies;
-
rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY);
- WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
if (call->ackr_reason == RXRPC_ACK_DELAY)
call->ackr_reason = 0;
- trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
+ call->delay_ack_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_delayed_ack);
}
/*
@@ -320,7 +265,7 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
call->tx_top = txb->seq;
list_add_tail(&txb->call_link, &call->tx_buffer);
- if (txb->wire.flags & RXRPC_LAST_PACKET)
+ if (txb->flags & RXRPC_LAST_PACKET)
rxrpc_close_tx_phase(call);
rxrpc_transmit_one(call, txb);
@@ -372,9 +317,8 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
*/
bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
- unsigned long now, next, t;
- rxrpc_serial_t ackr_serial;
- bool resend = false, expired = false;
+ ktime_t now, t;
+ bool resend = false;
s32 abort_code;
rxrpc_see_call(call, rxrpc_call_see_input);
@@ -398,71 +342,73 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
goto out;
+ if (skb)
+ rxrpc_input_call_packet(call, skb);
+
/* If we see our async-event poke, check for timeout trippage. */
- now = jiffies;
- t = READ_ONCE(call->expect_rx_by);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now);
- expired = true;
+ now = ktime_get_real();
+ t = ktime_sub(call->expect_rx_by, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_expect_rx);
+ goto expired;
}
- t = READ_ONCE(call->expect_req_by);
- if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST &&
- time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
- expired = true;
+ t = ktime_sub(call->expect_req_by, now);
+ if (t <= 0) {
+ call->expect_req_by = KTIME_MAX;
+ if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_idle);
+ goto expired;
+ }
}
- t = READ_ONCE(call->expect_term_by);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now);
- expired = true;
+ t = ktime_sub(READ_ONCE(call->expect_term_by), now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_hard);
+ goto expired;
}
- t = READ_ONCE(call->delay_ack_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
- cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET);
- ackr_serial = xchg(&call->ackr_serial, 0);
- rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial,
- rxrpc_propose_ack_ping_for_lost_ack);
+ t = ktime_sub(call->delay_ack_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_delayed_ack);
+ call->delay_ack_at = KTIME_MAX;
+ rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0,
+ rxrpc_propose_ack_delayed_ack);
}
- t = READ_ONCE(call->ack_lost_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now);
- cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->ack_lost_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_lost_ack);
+ call->ack_lost_at = KTIME_MAX;
set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events);
}
- t = READ_ONCE(call->keepalive_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
- cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->ping_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_ping);
+ call->ping_at = KTIME_MAX;
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_keepalive);
}
- t = READ_ONCE(call->ping_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now);
- cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET);
- rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
- rxrpc_propose_ack_ping_for_keepalive);
- }
-
- t = READ_ONCE(call->resend_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now);
- cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->resend_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_resend);
+ call->resend_at = KTIME_MAX;
resend = true;
}
- if (skb)
- rxrpc_input_call_packet(call, skb);
-
rxrpc_transmit_some_data(call);
+ now = ktime_get_real();
+ t = ktime_sub(call->keepalive_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_keepalive);
+ call->keepalive_at = KTIME_MAX;
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_keepalive);
+ }
+
if (skb) {
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -474,24 +420,13 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_send_initial_ping(call);
/* Process events */
- if (expired) {
- if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
- (int)call->conn->hi_serial - (int)call->rx_serial > 0) {
- trace_rxrpc_call_reset(call);
- rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
- rxrpc_abort_call_reset);
- } else {
- rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
- rxrpc_abort_call_timeout);
- }
- goto out;
- }
-
if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_lost_ack);
- if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY)
+ if (resend &&
+ __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY &&
+ !test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
rxrpc_resend(call, NULL);
if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
@@ -513,23 +448,33 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
/* Make sure the timer is restarted */
if (!__rxrpc_call_is_complete(call)) {
- next = call->expect_rx_by;
+ ktime_t next = READ_ONCE(call->expect_term_by), delay;
-#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
+#define set(T) { ktime_t _t = (T); if (ktime_before(_t, next)) next = _t; }
set(call->expect_req_by);
- set(call->expect_term_by);
+ set(call->expect_rx_by);
set(call->delay_ack_at);
set(call->ack_lost_at);
set(call->resend_at);
set(call->keepalive_at);
set(call->ping_at);
- now = jiffies;
- if (time_after_eq(now, next))
+ now = ktime_get_real();
+ delay = ktime_sub(next, now);
+ if (delay <= 0) {
rxrpc_poke_call(call, rxrpc_call_poke_timer_now);
-
- rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
+ } else {
+ unsigned long nowj = jiffies, delayj, nextj;
+
+ delayj = max(nsecs_to_jiffies(delay), 1);
+ nextj = nowj + delayj;
+ if (time_before(nextj, call->timer.expires) ||
+ !timer_pending(&call->timer)) {
+ trace_rxrpc_timer_restart(call, delay, delayj);
+ timer_reduce(&call->timer, nextj);
+ }
+ }
}
out:
@@ -544,4 +489,16 @@ out:
rxrpc_shrink_call_tx_buffer(call);
_leave("");
return true;
+
+expired:
+ if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
+ (int)call->conn->hi_serial - (int)call->rx_serial > 0) {
+ trace_rxrpc_call_reset(call);
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_reset);
+ } else {
+ rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
+ rxrpc_abort_call_timeout);
+ }
+ goto out;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 0943e54370ba..01fa71e8b1f7 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -70,20 +70,11 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
_enter("%d", call->debug_id);
if (!__rxrpc_call_is_complete(call)) {
- trace_rxrpc_timer_expired(call, jiffies);
+ trace_rxrpc_timer_expired(call);
rxrpc_poke_call(call, rxrpc_call_poke_timer);
}
}
-void rxrpc_reduce_call_timer(struct rxrpc_call *call,
- unsigned long expire_at,
- unsigned long now,
- enum rxrpc_timer_trace why)
-{
- trace_rxrpc_timer(call, why, now);
- timer_reduce(&call->timer, expire_at);
-}
-
static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
static void rxrpc_destroy_call(struct work_struct *);
@@ -163,12 +154,20 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->tx_lock);
refcount_set(&call->ref, 1);
- call->debug_id = debug_id;
- call->tx_total_len = -1;
- call->next_rx_timo = 20 * HZ;
- call->next_req_timo = 1 * HZ;
- call->ackr_window = 1;
- call->ackr_wtop = 1;
+ call->debug_id = debug_id;
+ call->tx_total_len = -1;
+ call->next_rx_timo = 20 * HZ;
+ call->next_req_timo = 1 * HZ;
+ call->ackr_window = 1;
+ call->ackr_wtop = 1;
+ call->delay_ack_at = KTIME_MAX;
+ call->ack_lost_at = KTIME_MAX;
+ call->resend_at = KTIME_MAX;
+ call->ping_at = KTIME_MAX;
+ call->keepalive_at = KTIME_MAX;
+ call->expect_rx_by = KTIME_MAX;
+ call->expect_req_by = KTIME_MAX;
+ call->expect_term_by = KTIME_MAX;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
@@ -226,11 +225,11 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
__set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
if (p->timeouts.normal)
- call->next_rx_timo = min(msecs_to_jiffies(p->timeouts.normal), 1UL);
+ call->next_rx_timo = min(p->timeouts.normal, 1);
if (p->timeouts.idle)
- call->next_req_timo = min(msecs_to_jiffies(p->timeouts.idle), 1UL);
+ call->next_req_timo = min(p->timeouts.idle, 1);
if (p->timeouts.hard)
- call->hard_timo = p->timeouts.hard * HZ;
+ call->hard_timo = p->timeouts.hard;
ret = rxrpc_init_client_call_security(call);
if (ret < 0) {
@@ -253,18 +252,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
*/
void rxrpc_start_call_timer(struct rxrpc_call *call)
{
- unsigned long now = jiffies;
- unsigned long j = now + MAX_JIFFY_OFFSET;
-
- call->delay_ack_at = j;
- call->ack_lost_at = j;
- call->resend_at = j;
- call->ping_at = j;
- call->keepalive_at = j;
- call->expect_rx_by = j;
- call->expect_req_by = j;
- call->expect_term_by = j + call->hard_timo;
- call->timer.expires = now;
+ if (call->hard_timo) {
+ ktime_t delay = ms_to_ktime(call->hard_timo * 1000);
+
+ call->expect_term_by = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard);
+ }
+ call->timer.expires = jiffies;
}
/*
@@ -686,6 +680,7 @@ static void rxrpc_destroy_call(struct work_struct *work)
del_timer_sync(&call->timer);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
rxrpc_cleanup_ring(call);
while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
struct rxrpc_txbuf, call_link))) {
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 3b9b267a4431..d25bf1cf3670 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -636,7 +636,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
unsigned long final_ack_at = jiffies + 2;
- WRITE_ONCE(chan->final_ack_at, final_ack_at);
+ chan->final_ack_at = final_ack_at;
smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */
set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
rxrpc_reduce_conn_timer(conn, final_ack_at);
@@ -770,7 +770,7 @@ next:
conn_expires_at = conn->idle_timestamp + expiry;
- now = READ_ONCE(jiffies);
+ now = jiffies;
if (time_after(conn_expires_at, now))
goto not_yet_expired;
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 95f4bc206b3d..598b4ee389fc 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -88,13 +88,21 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
struct rxrpc_ackpacket ack;
};
} __attribute__((packed)) pkt;
- struct rxrpc_ackinfo ack_info;
+ struct rxrpc_acktrailer trailer;
size_t len;
int ret, ioc;
u32 serial, mtu, call_id, padding;
_enter("%d", conn->debug_id);
+ if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) {
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &pkt.ack, sizeof(pkt.ack)) < 0)
+ return;
+ if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE)
+ return;
+ }
+
chan = &conn->channels[channel];
/* If the last call got moved on whilst we were waiting to run, just
@@ -114,10 +122,10 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[0].iov_len = sizeof(pkt.whdr);
iov[1].iov_base = &padding;
iov[1].iov_len = 3;
- iov[2].iov_base = &ack_info;
- iov[2].iov_len = sizeof(ack_info);
+ iov[2].iov_base = &trailer;
+ iov[2].iov_len = sizeof(trailer);
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
pkt.whdr.epoch = htonl(conn->proto.epoch);
pkt.whdr.cid = htonl(conn->proto.cid | channel);
@@ -150,14 +158,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
pkt.ack.nAcks = 0;
- ack_info.rxMTU = htonl(rxrpc_rx_mtu);
- ack_info.maxMTU = htonl(mtu);
- ack_info.rwind = htonl(rxrpc_rx_window_size);
- ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
+ trailer.maxMTU = htonl(rxrpc_rx_mtu);
+ trailer.ifMTU = htonl(mtu);
+ trailer.rwind = htonl(rxrpc_rx_window_size);
+ trailer.jumbo_max = htonl(rxrpc_rx_jumbo_max);
pkt.whdr.flags |= RXRPC_SLOW_START_OK;
padding = 0;
iov[0].iov_len += sizeof(pkt.ack);
- len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
+ len += sizeof(pkt.ack) + 3 + sizeof(trailer);
ioc = 3;
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index df8a271948a1..0af4642aeec4 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -68,6 +68,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
mutex_init(&conn->security_lock);
+ mutex_init(&conn->tx_data_alloc_lock);
skb_queue_head_init(&conn->rx_queue);
conn->rxnet = rxnet;
conn->security = &rxrpc_no_security;
@@ -341,6 +342,9 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
*/
rxrpc_purge_queue(&conn->rx_queue);
+ if (conn->tx_data_alloc.va)
+ __page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va),
+ conn->tx_data_alloc.pagecnt_bias);
call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 92495e73b869..3dedb8c0618c 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -45,11 +45,9 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
}
cumulative_acks += summary->nr_new_acks;
- cumulative_acks += summary->nr_rot_new_acks;
if (cumulative_acks > 255)
cumulative_acks = 255;
- summary->mode = call->cong_mode;
summary->cwnd = call->cong_cwnd;
summary->ssthresh = call->cong_ssthresh;
summary->cumulative_acks = cumulative_acks;
@@ -151,6 +149,7 @@ out_no_clear_ca:
cwnd = RXRPC_TX_MAX_WINDOW;
call->cong_cwnd = cwnd;
call->cong_cumul_acks = cumulative_acks;
+ summary->mode = call->cong_mode;
trace_rxrpc_congest(call, summary, acked_serial, change);
if (resend)
rxrpc_resend(call, skb);
@@ -213,8 +212,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
if (before_eq(txb->seq, call->acks_hard_ack))
continue;
- summary->nr_rot_new_acks++;
- if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) {
+ if (txb->flags & RXRPC_LAST_PACKET) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
rot_last = true;
}
@@ -254,6 +252,14 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
{
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
+ call->resend_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend);
+
+ if (unlikely(call->cong_last_nack)) {
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+ call->cong_last_nack = NULL;
+ }
+
switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
@@ -285,15 +291,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
static bool rxrpc_receiving_reply(struct rxrpc_call *call)
{
struct rxrpc_ack_summary summary = { 0 };
- unsigned long now, timo;
rxrpc_seq_t top = READ_ONCE(call->tx_top);
if (call->ackr_reason) {
- now = jiffies;
- timo = now + MAX_JIFFY_OFFSET;
-
- WRITE_ONCE(call->delay_ack_at, timo);
- trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
+ call->delay_ack_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_delayed_ack);
}
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
@@ -326,7 +328,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
case RXRPC_CALL_SERVER_RECV_REQUEST:
rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST);
- call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
+ call->expect_req_by = KTIME_MAX;
rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op);
break;
@@ -586,14 +588,12 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_CALL_SERVER_RECV_REQUEST: {
unsigned long timo = READ_ONCE(call->next_req_timo);
- unsigned long now, expect_req_by;
if (timo) {
- now = jiffies;
- expect_req_by = now + timo;
- WRITE_ONCE(call->expect_req_by, expect_req_by);
- rxrpc_reduce_call_timer(call, expect_req_by, now,
- rxrpc_timer_set_for_idle);
+ ktime_t delay = ms_to_ktime(timo);
+
+ call->expect_req_by = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_idle);
}
break;
}
@@ -667,14 +667,14 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
/*
* Process the extra information that may be appended to an ACK packet
*/
-static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
- struct rxrpc_ackinfo *ackinfo)
+static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb,
+ struct rxrpc_acktrailer *trailer)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_peer *peer;
unsigned int mtu;
bool wake = false;
- u32 rwind = ntohl(ackinfo->rwind);
+ u32 rwind = ntohl(trailer->rwind);
if (rwind > RXRPC_TX_MAX_WINDOW)
rwind = RXRPC_TX_MAX_WINDOW;
@@ -688,7 +688,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
if (call->cong_ssthresh > rwind)
call->cong_ssthresh = rwind;
- mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));
+ mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU));
peer = call->peer;
if (mtu < peer->maxdata) {
@@ -703,6 +703,42 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
}
/*
+ * Determine how many nacks from the previous ACK have now been satisfied.
+ */
+static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ rxrpc_seq_t seq)
+{
+ struct sk_buff *skb = call->cong_last_nack;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int i, new_acks = 0, retained_nacks = 0;
+ rxrpc_seq_t old_seq = sp->ack.first_ack;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
+
+ if (after_eq(seq, old_seq + sp->ack.nr_acks)) {
+ summary->nr_new_acks += sp->ack.nr_nacks;
+ summary->nr_new_acks += seq - (old_seq + sp->ack.nr_acks);
+ summary->nr_retained_nacks = 0;
+ } else if (seq == old_seq) {
+ summary->nr_retained_nacks = sp->ack.nr_nacks;
+ } else {
+ for (i = 0; i < sp->ack.nr_acks; i++) {
+ if (acks[i] == RXRPC_ACK_TYPE_NACK) {
+ if (before(old_seq + i, seq))
+ new_acks++;
+ else
+ retained_nacks++;
+ }
+ }
+
+ summary->nr_new_acks += new_acks;
+ summary->nr_retained_nacks = retained_nacks;
+ }
+
+ return old_seq + sp->ack.nr_acks;
+}
+
+/*
* Process individual soft ACKs.
*
* Each ACK in the array corresponds to one packet and can be either an ACK or
@@ -711,25 +747,51 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
* the timer on the basis that the peer might just not have processed them at
* the time the ACK was sent.
*/
-static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
- rxrpc_seq_t seq, int nr_acks,
- struct rxrpc_ack_summary *summary)
+static void rxrpc_input_soft_acks(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct sk_buff *skb,
+ rxrpc_seq_t seq,
+ rxrpc_seq_t since)
{
- unsigned int i;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int i, old_nacks = 0;
+ rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- for (i = 0; i < nr_acks; i++) {
+ for (i = 0; i < sp->ack.nr_acks; i++) {
if (acks[i] == RXRPC_ACK_TYPE_ACK) {
summary->nr_acks++;
- summary->nr_new_acks++;
+ if (after_eq(seq, since))
+ summary->nr_new_acks++;
} else {
- if (!summary->saw_nacks &&
- call->acks_lowest_nak != seq + i) {
- call->acks_lowest_nak = seq + i;
- summary->new_low_nack = true;
- }
summary->saw_nacks = true;
+ if (before(seq, since)) {
+ /* Overlap with previous ACK */
+ old_nacks++;
+ } else {
+ summary->nr_new_nacks++;
+ sp->ack.nr_nacks++;
+ }
+
+ if (before(seq, lowest_nak))
+ lowest_nak = seq;
}
+ seq++;
}
+
+ if (lowest_nak != call->acks_lowest_nak) {
+ call->acks_lowest_nak = lowest_nak;
+ summary->new_low_nack = true;
+ }
+
+ /* We *can* have more nacks than we did - the peer is permitted to drop
+ * packets it has soft-acked and re-request them. Further, it is
+ * possible for the nack distribution to change whilst the number of
+ * nacks stays the same or goes down.
+ */
+ if (old_nacks < summary->nr_retained_nacks)
+ summary->nr_new_acks += summary->nr_retained_nacks - old_nacks;
+ summary->nr_retained_nacks = old_nacks;
}
/*
@@ -769,36 +831,32 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_ack_summary summary = { 0 };
- struct rxrpc_ackpacket ack;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxrpc_ackinfo info;
+ struct rxrpc_acktrailer trailer;
rxrpc_serial_t ack_serial, acked_serial;
- rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
+ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since;
int nr_acks, offset, ioffset;
_enter("");
- offset = sizeof(struct rxrpc_wire_header);
- if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0)
- return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack);
- offset += sizeof(ack);
+ offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- ack_serial = sp->hdr.serial;
- acked_serial = ntohl(ack.serial);
- first_soft_ack = ntohl(ack.firstPacket);
- prev_pkt = ntohl(ack.previousPacket);
- hard_ack = first_soft_ack - 1;
- nr_acks = ack.nAcks;
- summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ?
- ack.reason : RXRPC_ACK__INVALID);
+ ack_serial = sp->hdr.serial;
+ acked_serial = sp->ack.acked_serial;
+ first_soft_ack = sp->ack.first_ack;
+ prev_pkt = sp->ack.prev_ack;
+ nr_acks = sp->ack.nr_acks;
+ hard_ack = first_soft_ack - 1;
+ summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ?
+ sp->ack.reason : RXRPC_ACK__INVALID);
trace_rxrpc_rx_ack(call, ack_serial, acked_serial,
first_soft_ack, prev_pkt,
summary.ack_reason, nr_acks);
- rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]);
+ rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]);
if (acked_serial != 0) {
- switch (ack.reason) {
+ switch (summary.ack_reason) {
case RXRPC_ACK_PING_RESPONSE:
rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
rxrpc_rtt_rx_ping_response);
@@ -818,7 +876,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
* indicates that the client address changed due to NAT. The server
* lost the call because it switched to a different peer.
*/
- if (unlikely(ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
+ if (unlikely(summary.ack_reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
first_soft_ack == 1 &&
prev_pkt == 0 &&
rxrpc_is_client_call(call)) {
@@ -831,7 +889,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
* indicate a change of address. However, we can retransmit the call
* if we still have it buffered to the beginning.
*/
- if (unlikely(ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
+ if (unlikely(summary.ack_reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
first_soft_ack == 1 &&
prev_pkt == 0 &&
call->acks_hard_ack == 0 &&
@@ -849,31 +907,41 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
goto send_response;
}
- info.rxMTU = 0;
+ trailer.maxMTU = 0;
ioffset = offset + nr_acks + 3;
- if (skb->len >= ioffset + sizeof(info) &&
- skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0)
- return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info);
+ if (skb->len >= ioffset + sizeof(trailer) &&
+ skb_copy_bits(skb, ioffset, &trailer, sizeof(trailer)) < 0)
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_trailer);
if (nr_acks > 0)
skb_condense(skb);
+ if (call->cong_last_nack) {
+ since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+ call->cong_last_nack = NULL;
+ } else {
+ summary.nr_new_acks = first_soft_ack - call->acks_first_seq;
+ call->acks_lowest_nak = first_soft_ack + nr_acks;
+ since = first_soft_ack;
+ }
+
call->acks_latest_ts = skb->tstamp;
call->acks_first_seq = first_soft_ack;
call->acks_prev_seq = prev_pkt;
- switch (ack.reason) {
+ switch (summary.ack_reason) {
case RXRPC_ACK_PING:
break;
default:
- if (after(acked_serial, call->acks_highest_serial))
+ if (acked_serial && after(acked_serial, call->acks_highest_serial))
call->acks_highest_serial = acked_serial;
break;
}
/* Parse rwind and mtu sizes if provided. */
- if (info.rxMTU)
- rxrpc_input_ackinfo(call, skb, &info);
+ if (trailer.maxMTU)
+ rxrpc_input_ack_trailer(call, skb, &trailer);
if (first_soft_ack == 0)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero);
@@ -905,8 +973,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0) {
if (offset > (int)skb->len - nr_acks)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
- rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
- nr_acks, &summary);
+ rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since);
+ rxrpc_get_skb(skb, rxrpc_skb_get_last_nack);
+ call->cong_last_nack = skb;
}
if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
@@ -918,7 +987,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_congestion_management(call, skb, &summary, acked_serial);
send_response:
- if (ack.reason == RXRPC_ACK_PING)
+ if (summary.ack_reason == RXRPC_ACK_PING)
rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
rxrpc_propose_ack_respond_to_ping);
else if (sp->hdr.flags & RXRPC_REQUEST_ACK)
@@ -969,12 +1038,10 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
timo = READ_ONCE(call->next_rx_timo);
if (timo) {
- unsigned long now = jiffies, expect_rx_by;
+ ktime_t delay = ms_to_ktime(timo);
- expect_rx_by = now + timo;
- WRITE_ONCE(call->expect_rx_by, expect_rx_by);
- rxrpc_reduce_call_timer(call, expect_rx_by, now,
- rxrpc_timer_set_for_normal);
+ call->expect_rx_by = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
}
switch (sp->hdr.type) {
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 34353b6e584b..f2701068ed9e 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -15,14 +15,11 @@ static int none_init_connection_security(struct rxrpc_connection *conn,
}
/*
- * Work out how much data we can put in an unsecured packet.
+ * Allocate an appropriately sized buffer for the amount of data remaining.
*/
-static int none_how_much_data(struct rxrpc_call *call, size_t remain,
- size_t *_buf_size, size_t *_data_size, size_t *_offset)
+static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp)
{
- *_buf_size = *_data_size = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
- *_offset = 0;
- return 0;
+ return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 0, gfp);
}
static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
@@ -79,7 +76,7 @@ const struct rxrpc_security rxrpc_no_security = {
.exit = none_exit,
.init_connection_security = none_init_connection_security,
.free_call_crypto = none_free_call_crypto,
- .how_much_data = none_how_much_data,
+ .alloc_txbuf = none_alloc_txbuf,
.secure_packet = none_secure_packet,
.verify_packet = none_verify_packet,
.respond_to_challenge = none_respond_to_challenge,
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 4a3a08a0e2cd..0300baa9afcd 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -124,6 +124,7 @@ static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
struct sk_buff *skb)
{
struct rxrpc_wire_header whdr;
+ struct rxrpc_ackpacket ack;
/* dig out the RxRPC connection details */
if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
@@ -141,6 +142,16 @@ static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
sp->hdr.securityIndex = whdr.securityIndex;
sp->hdr._rsvd = ntohs(whdr._rsvd);
sp->hdr.serviceId = ntohs(whdr.serviceId);
+
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) {
+ if (skb_copy_bits(skb, sizeof(whdr), &ack, sizeof(ack)) < 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_short_ack);
+ sp->ack.first_ack = ntohl(ack.firstPacket);
+ sp->ack.prev_ack = ntohl(ack.previousPacket);
+ sp->ack.acked_serial = ntohl(ack.serial);
+ sp->ack.reason = ack.reason;
+ sp->ack.nr_acks = ack.nAcks;
+ }
return true;
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 34d307368135..504453c688d7 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -452,6 +452,9 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
#endif
rxrpc_purge_queue(&local->rx_queue);
rxrpc_purge_client_connections(local);
+ if (local->tx_alloc.va)
+ __page_frag_cache_drain(virt_to_page(local->tx_alloc.va),
+ local->tx_alloc.pagecnt_bias);
}
/*
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 825b81183046..657cf35089a6 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -17,22 +17,22 @@
unsigned int rxrpc_max_backlog __read_mostly = 10;
/*
- * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ * How long to wait before scheduling an ACK with subtype DELAY (in ms).
*
* We use this when we've received new data packets. If those packets aren't
* all consumed within this time we will send a DELAY ACK if an ACK was not
* requested to let the sender know it doesn't need to resend.
*/
-unsigned long rxrpc_soft_ack_delay = HZ;
+unsigned long rxrpc_soft_ack_delay = 1000;
/*
- * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ * How long to wait before scheduling an ACK with subtype IDLE (in ms).
*
* We use this when we've consumed some previously soft-ACK'd packets when
* further packets aren't immediately received to decide when to send an IDLE
* ACK let the other end know that it can free up its Tx buffer space.
*/
-unsigned long rxrpc_idle_ack_delay = HZ / 2;
+unsigned long rxrpc_idle_ack_delay = 500;
/*
* Receive window size in packets. This indicates the maximum number of
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index a0906145e829..5ea9601efd05 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -48,12 +48,10 @@ static const char rxrpc_keepalive_string[] = "";
static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
{
if (ret < 0) {
- u16 tx_backoff = READ_ONCE(call->tx_backoff);
-
- if (tx_backoff < HZ)
- WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+ if (call->tx_backoff < 1000)
+ call->tx_backoff += 100;
} else {
- WRITE_ONCE(call->tx_backoff, 0);
+ call->tx_backoff = 0;
}
}
@@ -65,84 +63,92 @@ static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
* Receiving a response to the ping will prevent the ->expect_rx_by timer from
* expiring.
*/
-static void rxrpc_set_keepalive(struct rxrpc_call *call)
+static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now)
{
- unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6;
+ ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo) / 6);
- keepalive_at += now;
- WRITE_ONCE(call->keepalive_at, keepalive_at);
- rxrpc_reduce_call_timer(call, keepalive_at, now,
- rxrpc_timer_set_for_keepalive);
+ call->keepalive_at = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_keepalive);
}
/*
* Fill out an ACK packet.
*/
-static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
- struct rxrpc_call *call,
- struct rxrpc_txbuf *txb,
- u16 *_rwind)
+static void rxrpc_fill_out_ack(struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb,
+ u8 ack_reason,
+ rxrpc_serial_t serial)
{
- struct rxrpc_ackinfo ackinfo;
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxrpc_acktrailer *trailer = txb->kvec[2].iov_base + 3;
+ struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
unsigned int qsize, sack, wrap, to;
rxrpc_seq_t window, wtop;
int rsize;
u32 mtu, jmax;
- u8 *ackp = txb->acks;
+ u8 *filler = txb->kvec[2].iov_base;
+ u8 *sackp = txb->kvec[1].iov_base;
- call->ackr_nr_unacked = 0;
- atomic_set(&call->ackr_nr_consumed, 0);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
- clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
window = call->ackr_window;
wtop = call->ackr_wtop;
sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
- txb->ack.firstPacket = htonl(window);
- txb->ack.nAcks = wtop - window;
+
+ whdr->seq = 0;
+ whdr->type = RXRPC_PACKET_TYPE_ACK;
+ txb->flags |= RXRPC_SLOW_START_OK;
+ ack->bufferSpace = 0;
+ ack->maxSkew = 0;
+ ack->firstPacket = htonl(window);
+ ack->previousPacket = htonl(call->rx_highest_seq);
+ ack->serial = htonl(serial);
+ ack->reason = ack_reason;
+ ack->nAcks = wtop - window;
+ filler[0] = 0;
+ filler[1] = 0;
+ filler[2] = 0;
+
+ if (ack_reason == RXRPC_ACK_PING)
+ txb->flags |= RXRPC_REQUEST_ACK;
if (after(wtop, window)) {
+ txb->len += ack->nAcks;
+ txb->kvec[1].iov_base = sackp;
+ txb->kvec[1].iov_len = ack->nAcks;
+
wrap = RXRPC_SACK_SIZE - sack;
- to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE);
+ to = min_t(unsigned int, ack->nAcks, RXRPC_SACK_SIZE);
- if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
- memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks);
+ if (sack + ack->nAcks <= RXRPC_SACK_SIZE) {
+ memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks);
} else {
- memcpy(txb->acks, call->ackr_sack_table + sack, wrap);
- memcpy(txb->acks + wrap, call->ackr_sack_table,
- to - wrap);
+ memcpy(sackp, call->ackr_sack_table + sack, wrap);
+ memcpy(sackp + wrap, call->ackr_sack_table, to - wrap);
}
-
- ackp += to;
} else if (before(wtop, window)) {
pr_warn("ack window backward %x %x", window, wtop);
- } else if (txb->ack.reason == RXRPC_ACK_DELAY) {
- txb->ack.reason = RXRPC_ACK_IDLE;
+ } else if (ack->reason == RXRPC_ACK_DELAY) {
+ ack->reason = RXRPC_ACK_IDLE;
}
- mtu = conn->peer->if_mtu;
- mtu -= conn->peer->hdrsize;
+ mtu = call->peer->if_mtu;
+ mtu -= call->peer->hdrsize;
jmax = rxrpc_rx_jumbo_max;
qsize = (window - 1) - call->rx_consumed;
rsize = max_t(int, call->rx_winsize - qsize, 0);
- *_rwind = rsize;
- ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
- ackinfo.maxMTU = htonl(mtu);
- ackinfo.rwind = htonl(rsize);
- ackinfo.jumbo_max = htonl(jmax);
-
- *ackp++ = 0;
- *ackp++ = 0;
- *ackp++ = 0;
- memcpy(ackp, &ackinfo, sizeof(ackinfo));
- return txb->ack.nAcks + 3 + sizeof(ackinfo);
+ txb->ack_rwind = rsize;
+ trailer->maxMTU = htonl(rxrpc_rx_mtu);
+ trailer->ifMTU = htonl(mtu);
+ trailer->rwind = htonl(rsize);
+ trailer->jumbo_max = htonl(jmax);
}
/*
* Record the beginning of an RTT probe.
*/
-static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
- enum rxrpc_rtt_tx_trace why)
+static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
+ ktime_t now, enum rxrpc_rtt_tx_trace why)
{
unsigned long avail = call->rtt_avail;
int rtt_slot = 9;
@@ -155,47 +161,31 @@ static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
goto no_slot;
call->rtt_serial[rtt_slot] = serial;
- call->rtt_sent_at[rtt_slot] = ktime_get_real();
+ call->rtt_sent_at[rtt_slot] = now;
smp_wmb(); /* Write data before avail bit */
set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
trace_rxrpc_rtt_tx(call, why, rtt_slot, serial);
- return rtt_slot;
+ return;
no_slot:
trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial);
- return -1;
-}
-
-/*
- * Cancel an RTT probe.
- */
-static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call,
- rxrpc_serial_t serial, int rtt_slot)
-{
- if (rtt_slot != -1) {
- clear_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
- smp_wmb(); /* Clear pending bit before setting slot */
- set_bit(rtt_slot, &call->rtt_avail);
- trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_cancel, rtt_slot, serial);
- }
}
/*
* Transmit an ACK packet.
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
{
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
struct rxrpc_connection *conn;
+ struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
struct msghdr msg;
- struct kvec iov[1];
- rxrpc_serial_t serial;
- size_t len, n;
- int ret, rtt_slot = -1;
- u16 rwind;
+ ktime_t now;
+ int ret;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
- return -ECONNRESET;
+ return;
conn = call->conn;
@@ -203,55 +193,68 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
msg.msg_namelen = call->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
- msg.msg_flags = 0;
-
- if (txb->ack.reason == RXRPC_ACK_PING)
- txb->wire.flags |= RXRPC_REQUEST_ACK;
-
- n = rxrpc_fill_out_ack(conn, call, txb, &rwind);
- if (n == 0)
- return 0;
-
- iov[0].iov_base = &txb->wire;
- iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n;
- len = iov[0].iov_len;
+ msg.msg_flags = MSG_SPLICE_PAGES;
- serial = atomic_inc_return(&conn->serial);
- txb->wire.serial = htonl(serial);
- trace_rxrpc_tx_ack(call->debug_id, serial,
- ntohl(txb->ack.firstPacket),
- ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks,
- rwind);
+ whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
- if (txb->ack.reason == RXRPC_ACK_PING)
- rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping);
+ txb->serial = rxrpc_get_next_serial(conn);
+ whdr->serial = htonl(txb->serial);
+ trace_rxrpc_tx_ack(call->debug_id, txb->serial,
+ ntohl(ack->firstPacket),
+ ntohl(ack->serial), ack->reason, ack->nAcks,
+ txb->ack_rwind);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_send);
- /* Grab the highest received seq as late as possible */
- txb->ack.previousPacket = htonl(call->rx_highest_seq);
-
- iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
- ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, txb->len);
+ rxrpc_local_dont_fragment(conn->local, false);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len);
call->peer->last_tx_at = ktime_get_seconds();
if (ret < 0) {
- trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret,
rxrpc_tx_point_call_ack);
} else {
- trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
+ trace_rxrpc_tx_packet(call->debug_id, whdr,
rxrpc_tx_point_call_ack);
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
- call->peer->rtt_last_req = ktime_get_real();
+ now = ktime_get_real();
+ if (ack->reason == RXRPC_ACK_PING)
+ rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_ping);
+ if (txb->flags & RXRPC_REQUEST_ACK)
+ call->peer->rtt_last_req = now;
+ rxrpc_set_keepalive(call, now);
}
rxrpc_tx_backoff(call, ret);
+}
- if (!__rxrpc_call_is_complete(call)) {
- if (ret < 0)
- rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
- rxrpc_set_keepalive(call);
+/*
+ * Queue an ACK for immediate transmission.
+ */
+void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
+ rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
+{
+ struct rxrpc_txbuf *txb;
+
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ return;
+
+ rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
+
+ txb = rxrpc_alloc_ack_txbuf(call, call->ackr_wtop - call->ackr_window);
+ if (!txb) {
+ kleave(" = -ENOMEM");
+ return;
}
- return ret;
+ txb->ack_why = why;
+
+ rxrpc_fill_out_ack(call, txb, ack_reason, serial);
+ call->ackr_nr_unacked = 0;
+ atomic_set(&call->ackr_nr_consumed, 0);
+ clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
+
+ trace_rxrpc_send_ack(call, why, ack_reason, serial);
+ rxrpc_send_ack_packet(call, txb);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
}
/*
@@ -302,7 +305,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
iov[0].iov_base = &pkt;
iov[0].iov_len = sizeof(pkt);
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
pkt.whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
@@ -319,38 +322,22 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
}
/*
- * send a packet through the transport endpoint
+ * Prepare a (sub)packet for transmission.
*/
-int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb,
+ rxrpc_serial_t serial)
{
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
enum rxrpc_req_ack_trace why;
struct rxrpc_connection *conn = call->conn;
- struct msghdr msg;
- struct kvec iov[1];
- rxrpc_serial_t serial;
- size_t len;
- int ret, rtt_slot = -1;
_enter("%x,{%d}", txb->seq, txb->len);
- /* Each transmission of a Tx packet needs a new serial number */
- serial = atomic_inc_return(&conn->serial);
- txb->wire.serial = htonl(serial);
+ txb->serial = serial;
if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
txb->seq == 1)
- txb->wire.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
-
- iov[0].iov_base = &txb->wire;
- iov[0].iov_len = sizeof(txb->wire) + txb->len;
- len = iov[0].iov_len;
- iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
-
- msg.msg_name = &call->peer->srx.transport;
- msg.msg_namelen = call->peer->srx.transport_len;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
+ whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
/* If our RTT cache needs working on, request an ACK. Also request
* ACKs if a DATA packet appears to have been lost.
@@ -359,13 +346,13 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
* service call, lest OpenAFS incorrectly send us an ACK with some
* soft-ACKs in it and then never follow up with a proper hard ACK.
*/
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ if (txb->flags & RXRPC_REQUEST_ACK)
why = rxrpc_reqack_already_on;
- else if (test_bit(RXRPC_TXBUF_LAST, &txb->flags) && rxrpc_sending_to_client(txb))
+ else if ((txb->flags & RXRPC_LAST_PACKET) && rxrpc_sending_to_client(txb))
why = rxrpc_reqack_no_srv_last;
else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
why = rxrpc_reqack_ack_lost;
- else if (test_bit(RXRPC_TXBUF_RESENT, &txb->flags))
+ else if (txb->flags & RXRPC_TXBUF_RESENT)
why = rxrpc_reqack_retrans;
else if (call->cong_mode == RXRPC_CALL_SLOW_START && call->cong_cwnd <= 2)
why = rxrpc_reqack_slow_start;
@@ -381,42 +368,116 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]);
trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
if (why != rxrpc_reqack_no_srv_last)
- txb->wire.flags |= RXRPC_REQUEST_ACK;
+ txb->flags |= RXRPC_REQUEST_ACK;
dont_set_request_ack:
+ whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
+ whdr->serial = htonl(txb->serial);
+ whdr->cksum = txb->cksum;
+
+ trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, false);
+}
+
+/*
+ * Prepare a packet for transmission.
+ */
+static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ rxrpc_serial_t serial;
+
+ /* Each transmission of a Tx packet needs a new serial number */
+ serial = rxrpc_get_next_serial(call->conn);
+
+ rxrpc_prepare_data_subpacket(call, txb, serial);
+
+ return txb->len;
+}
+
+/*
+ * Set timeouts after transmitting a packet.
+ */
+static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ ktime_t now = ktime_get_real();
+ bool ack_requested = txb->flags & RXRPC_REQUEST_ACK;
+
+ call->tx_last_sent = now;
+ txb->last_sent = now;
+
+ if (ack_requested) {
+ rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_data);
+
+ call->peer->rtt_last_req = now;
+ if (call->peer->rtt_count > 1) {
+ ktime_t delay = rxrpc_get_rto_backoff(call->peer, false);
+
+ call->ack_lost_at = ktime_add(now, delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack);
+ }
+ }
+
+ if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) {
+ ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo));
+
+ call->expect_rx_by = ktime_add(now, delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
+ }
+
+ rxrpc_set_keepalive(call, now);
+}
+
+/*
+ * send a packet through the transport endpoint
+ */
+static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxrpc_connection *conn = call->conn;
+ enum rxrpc_tx_point frag;
+ struct msghdr msg;
+ size_t len;
+ int ret;
+
+ _enter("%x,{%d}", txb->seq, txb->len);
+
+ len = rxrpc_prepare_data_packet(call, txb);
+
if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
static int lose;
if ((lose++ & 7) == 7) {
ret = 0;
- trace_rxrpc_tx_data(call, txb->seq, serial,
- txb->wire.flags,
- test_bit(RXRPC_TXBUF_RESENT, &txb->flags),
- true);
+ trace_rxrpc_tx_data(call, txb->seq, txb->serial,
+ txb->flags, true);
goto done;
}
}
- trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags,
- test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false);
+ iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, len);
+
+ msg.msg_name = &call->peer->srx.transport;
+ msg.msg_namelen = call->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = MSG_SPLICE_PAGES;
/* Track what we've attempted to transmit at least once so that the
* retransmission algorithm doesn't try to resend what we haven't sent
- * yet. However, this can race as we can receive an ACK before we get
- * to this point. But, OTOH, if we won't get an ACK mentioning this
- * packet unless the far side received it (though it could have
- * discarded it anyway and NAK'd it).
+ * yet.
*/
- cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq);
+ if (txb->seq == call->tx_transmitted + 1)
+ call->tx_transmitted = txb->seq;
/* send the packet with the don't fragment bit set if we currently
* think it's small enough */
- if (txb->len >= call->peer->maxdata)
- goto send_fragmentable;
-
- txb->last_sent = ktime_get_real();
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
- rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
+ if (txb->len >= call->peer->maxdata) {
+ rxrpc_local_dont_fragment(conn->local, false);
+ frag = rxrpc_tx_point_call_data_frag;
+ } else {
+ rxrpc_local_dont_fragment(conn->local, true);
+ frag = rxrpc_tx_point_call_data_nofrag;
+ }
+retry:
/* send the packet by UDP
* - returns -EMSGSIZE if UDP would have to fragment the packet
* to go out of the interface
@@ -429,46 +490,21 @@ dont_set_request_ack:
if (ret < 0) {
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
- rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
- trace_rxrpc_tx_fail(call->debug_id, serial, ret,
- rxrpc_tx_point_call_data_nofrag);
+ trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag);
} else {
- trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
- rxrpc_tx_point_call_data_nofrag);
+ trace_rxrpc_tx_packet(call->debug_id, whdr, frag);
}
rxrpc_tx_backoff(call, ret);
- if (ret == -EMSGSIZE)
- goto send_fragmentable;
+ if (ret == -EMSGSIZE && frag == rxrpc_tx_point_call_data_frag) {
+ rxrpc_local_dont_fragment(conn->local, false);
+ frag = rxrpc_tx_point_call_data_frag;
+ goto retry;
+ }
done:
if (ret >= 0) {
- call->tx_last_sent = txb->last_sent;
- if (txb->wire.flags & RXRPC_REQUEST_ACK) {
- call->peer->rtt_last_req = txb->last_sent;
- if (call->peer->rtt_count > 1) {
- unsigned long nowj = jiffies, ack_lost_at;
-
- ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
- ack_lost_at += nowj;
- WRITE_ONCE(call->ack_lost_at, ack_lost_at);
- rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
- rxrpc_timer_set_for_lost_ack);
- }
- }
-
- if (txb->seq == 1 &&
- !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER,
- &call->flags)) {
- unsigned long nowj = jiffies, expect_rx_by;
-
- expect_rx_by = nowj + call->next_rx_timo;
- WRITE_ONCE(call->expect_rx_by, expect_rx_by);
- rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
- rxrpc_timer_set_for_normal);
- }
-
- rxrpc_set_keepalive(call);
+ rxrpc_tstamp_data_packets(call, txb);
} else {
/* Cancel the call if the initial transmission fails,
* particularly if that's due to network routing issues that
@@ -482,41 +518,6 @@ done:
_leave(" = %d [%u]", ret, call->peer->maxdata);
return ret;
-
-send_fragmentable:
- /* attempt to send this message with fragmentation enabled */
- _debug("send fragment");
-
- txb->last_sent = ktime_get_real();
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
- rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
-
- switch (conn->local->srx.transport.family) {
- case AF_INET6:
- case AF_INET:
- rxrpc_local_dont_fragment(conn->local, false);
- rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag);
- ret = do_udp_sendmsg(conn->local->socket, &msg, len);
- conn->peer->last_tx_at = ktime_get_seconds();
-
- rxrpc_local_dont_fragment(conn->local, true);
- break;
-
- default:
- BUG();
- }
-
- if (ret < 0) {
- rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
- rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
- trace_rxrpc_tx_fail(call->debug_id, serial, ret,
- rxrpc_tx_point_call_data_frag);
- } else {
- trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
- rxrpc_tx_point_call_data_frag);
- }
- rxrpc_tx_backoff(call, ret);
- goto done;
}
/*
@@ -558,7 +559,7 @@ void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
len = iov[0].iov_len + iov[1].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
@@ -723,11 +724,9 @@ void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
rxrpc_instant_resend(call, txb);
}
} else {
- unsigned long now = jiffies;
- unsigned long resend_at = now + call->peer->rto_j;
+ ktime_t delay = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC);
- WRITE_ONCE(call->resend_at, resend_at);
- rxrpc_reduce_call_timer(call, resend_at, now,
- rxrpc_timer_set_for_send);
+ call->resend_at = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend_tx);
}
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 6c86cbb98d1d..263a2251e3d2 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -52,9 +52,9 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
enum rxrpc_call_state state;
- unsigned long timeout = 0;
rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
+ long timeout = 0;
if (v == &rxnet->calls) {
seq_puts(seq,
@@ -76,10 +76,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
sprintf(rbuff, "%pISpc", &call->dest_srx.transport);
state = rxrpc_call_state(call);
- if (state != RXRPC_CALL_SERVER_PREALLOC) {
- timeout = READ_ONCE(call->expect_rx_by);
- timeout -= jiffies;
- }
+ if (state != RXRPC_CALL_SERVER_PREALLOC)
+ timeout = ktime_ms_delta(READ_ONCE(call->expect_rx_by), ktime_get_real());
acks_hard_ack = READ_ONCE(call->acks_hard_ack);
seq_printf(seq,
@@ -181,7 +179,7 @@ print:
atomic_read(&conn->active),
state,
key_serial(conn->key),
- atomic_read(&conn->serial),
+ conn->tx_serial,
conn->hi_serial,
conn->channels[0].call_id,
conn->channels[1].call_id,
@@ -309,7 +307,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
peer->mtu,
now - peer->last_tx_at,
peer->srtt_us >> 3,
- jiffies_to_usecs(peer->rto_j));
+ peer->rto_us);
return 0;
}
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
index e8ee4af43ca8..4fe6b4d20ada 100644
--- a/net/rxrpc/protocol.h
+++ b/net/rxrpc/protocol.h
@@ -135,9 +135,9 @@ struct rxrpc_ackpacket {
/*
* ACK packets can have a further piece of information tagged on the end
*/
-struct rxrpc_ackinfo {
- __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */
- __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */
+struct rxrpc_acktrailer {
+ __be32 maxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */
+ __be32 ifMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */
__be32 rwind; /* Rx window size (packets) [AFS 3.4] */
__be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */
};
diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c
index be61d6f5be8d..cdab7b7d08a0 100644
--- a/net/rxrpc/rtt.c
+++ b/net/rxrpc/rtt.c
@@ -11,8 +11,8 @@
#include <linux/net.h>
#include "ar-internal.h"
-#define RXRPC_RTO_MAX ((unsigned)(120 * HZ))
-#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
+#define RXRPC_RTO_MAX (120 * USEC_PER_SEC)
+#define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * MSEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */
#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */
static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
@@ -22,7 +22,7 @@ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
{
- return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
+ return (peer->srtt_us >> 3) + peer->rttvar_us;
}
static u32 rxrpc_bound_rto(u32 rto)
@@ -124,7 +124,7 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer)
/* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
* guarantees that rto is higher.
*/
- peer->rto_j = rxrpc_bound_rto(rto);
+ peer->rto_us = rxrpc_bound_rto(rto);
}
static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us)
@@ -163,33 +163,33 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
spin_unlock(&peer->rtt_input_lock);
trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial,
- peer->srtt_us >> 3, peer->rto_j);
+ peer->srtt_us >> 3, peer->rto_us);
}
/*
- * Get the retransmission timeout to set in jiffies, backing it off each time
- * we retransmit.
+ * Get the retransmission timeout to set in nanoseconds, backing it off each
+ * time we retransmit.
*/
-unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
+ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
{
- u64 timo_j;
- u8 backoff = READ_ONCE(peer->backoff);
+ u64 timo_us;
+ u32 backoff = READ_ONCE(peer->backoff);
- timo_j = peer->rto_j;
- timo_j <<= backoff;
- if (retrans && timo_j * 2 <= RXRPC_RTO_MAX)
+ timo_us = peer->rto_us;
+ timo_us <<= backoff;
+ if (retrans && timo_us * 2 <= RXRPC_RTO_MAX)
WRITE_ONCE(peer->backoff, backoff + 1);
- if (timo_j < 1)
- timo_j = 1;
+ if (timo_us < 1)
+ timo_us = 1;
- return timo_j;
+ return ns_to_ktime(timo_us * NSEC_PER_USEC);
}
void rxrpc_peer_init_rtt(struct rxrpc_peer *peer)
{
- peer->rto_j = RXRPC_TIMEOUT_INIT;
- peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT);
+ peer->rto_us = RXRPC_TIMEOUT_INIT;
+ peer->mdev_us = RXRPC_TIMEOUT_INIT;
peer->backoff = 0;
//minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index b52dedcebce0..f1a68270862d 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -145,16 +145,17 @@ error:
/*
* Work out how much data we can put in a packet.
*/
-static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain,
- size_t *_buf_size, size_t *_data_size, size_t *_offset)
+static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp)
{
- size_t shdr, buf_size, chunk;
+ struct rxrpc_txbuf *txb;
+ size_t shdr, space;
+
+ remain = min(remain, 65535 - sizeof(struct rxrpc_wire_header));
switch (call->conn->security_level) {
default:
- buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
- shdr = 0;
- goto out;
+ space = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
+ return rxrpc_alloc_data_txbuf(call, space, 0, gfp);
case RXRPC_SECURITY_AUTH:
shdr = sizeof(struct rxkad_level1_hdr);
break;
@@ -163,17 +164,16 @@ static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain,
break;
}
- buf_size = round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN);
+ space = min_t(size_t, round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN), remain + shdr);
+ space = round_up(space, RXKAD_ALIGN);
- chunk = buf_size - shdr;
- if (remain < chunk)
- buf_size = round_up(shdr + remain, RXKAD_ALIGN);
+ txb = rxrpc_alloc_data_txbuf(call, space, RXKAD_ALIGN, gfp);
+ if (!txb)
+ return NULL;
-out:
- *_buf_size = buf_size;
- *_data_size = chunk;
- *_offset = shdr;
- return 0;
+ txb->offset += shdr;
+ txb->space -= shdr;
+ return txb;
}
/*
@@ -251,7 +251,8 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
struct rxrpc_txbuf *txb,
struct skcipher_request *req)
{
- struct rxkad_level1_hdr *hdr = (void *)txb->data;
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxkad_level1_hdr *hdr = (void *)(whdr + 1);
struct rxrpc_crypt iv;
struct scatterlist sg;
size_t pad;
@@ -259,7 +260,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
_enter("");
- check = txb->seq ^ ntohl(txb->wire.callNumber);
+ check = txb->seq ^ call->call_id;
hdr->data_size = htonl((u32)check << 16 | txb->len);
txb->len += sizeof(struct rxkad_level1_hdr);
@@ -267,14 +268,14 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
pad = RXKAD_ALIGN - pad;
pad &= RXKAD_ALIGN - 1;
if (pad) {
- memset(txb->data + txb->offset, 0, pad);
+ memset(txb->kvec[0].iov_base + txb->offset, 0, pad);
txb->len += pad;
}
/* start the encryption afresh */
memset(&iv, 0, sizeof(iv));
- sg_init_one(&sg, txb->data, 8);
+ sg_init_one(&sg, hdr, 8);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
@@ -293,7 +294,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
struct skcipher_request *req)
{
const struct rxrpc_key_token *token;
- struct rxkad_level2_hdr *rxkhdr = (void *)txb->data;
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxkad_level2_hdr *rxkhdr = (void *)(whdr + 1);
struct rxrpc_crypt iv;
struct scatterlist sg;
size_t pad;
@@ -302,7 +304,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
_enter("");
- check = txb->seq ^ ntohl(txb->wire.callNumber);
+ check = txb->seq ^ call->call_id;
rxkhdr->data_size = htonl(txb->len | (u32)check << 16);
rxkhdr->checksum = 0;
@@ -312,7 +314,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
pad = RXKAD_ALIGN - pad;
pad &= RXKAD_ALIGN - 1;
if (pad) {
- memset(txb->data + txb->offset, 0, pad);
+ memset(txb->kvec[0].iov_base + txb->offset, 0, pad);
txb->len += pad;
}
@@ -320,7 +322,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
token = call->conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- sg_init_one(&sg, txb->data, txb->len);
+ sg_init_one(&sg, rxkhdr, txb->len);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, txb->len, iv.x);
@@ -362,9 +364,9 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv));
/* calculate the security checksum */
- x = (ntohl(txb->wire.cid) & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
+ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
x |= txb->seq & 0x3fffffff;
- crypto.buf[0] = txb->wire.callNumber;
+ crypto.buf[0] = htonl(call->call_id);
crypto.buf[1] = htonl(x);
sg_init_one(&sg, crypto.buf, 8);
@@ -378,7 +380,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
y = (y >> 16) & 0xffff;
if (y == 0)
y = 1; /* zero checksums are not permitted */
- txb->wire.cksum = htons(y);
+ txb->cksum = htons(y);
switch (call->conn->security_level) {
case RXRPC_SECURITY_PLAIN:
@@ -664,7 +666,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
len = iov[0].iov_len + iov[1].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
@@ -721,12 +723,11 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
rxrpc_local_dont_fragment(conn->local, false);
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len);
- rxrpc_local_dont_fragment(conn->local, true);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_rxkad_response);
@@ -1256,7 +1257,7 @@ const struct rxrpc_security rxkad = {
.free_preparse_server_key = rxkad_free_preparse_server_key,
.destroy_server_key = rxkad_destroy_server_key,
.init_connection_security = rxkad_init_connection_security,
- .how_much_data = rxkad_how_much_data,
+ .alloc_txbuf = rxkad_alloc_txbuf,
.secure_packet = rxkad_secure_packet,
.verify_packet = rxkad_verify_packet,
.free_call_crypto = rxkad_free_call_crypto,
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 5677d5690a02..6f765768c49c 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -240,7 +240,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
rxrpc_notify_end_tx_t notify_end_tx)
{
rxrpc_seq_t seq = txb->seq;
- bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke;
+ bool poke, last = txb->flags & RXRPC_LAST_PACKET;
rxrpc_inc_stat(call->rxnet, stat_tx_data);
@@ -336,7 +336,7 @@ reload:
do {
if (!txb) {
- size_t remain, bufsize, chunk, offset;
+ size_t remain;
_debug("alloc");
@@ -348,23 +348,11 @@ reload:
* region (enc blocksize), but the trailer is not.
*/
remain = more ? INT_MAX : msg_data_left(msg);
- ret = call->conn->security->how_much_data(call, remain,
- &bufsize, &chunk, &offset);
- if (ret < 0)
+ txb = call->conn->security->alloc_txbuf(call, remain, sk->sk_allocation);
+ if (IS_ERR(txb)) {
+ ret = PTR_ERR(txb);
goto maybe_error;
-
- _debug("SIZE: %zu/%zu @%zu", chunk, bufsize, offset);
-
- /* create a buffer that we can retain until it's ACK'd */
- ret = -ENOMEM;
- txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_DATA,
- GFP_KERNEL);
- if (!txb)
- goto maybe_error;
-
- txb->offset = offset;
- txb->space -= offset;
- txb->space = min_t(size_t, chunk, txb->space);
+ }
}
_debug("append");
@@ -374,8 +362,8 @@ reload:
size_t copy = min_t(size_t, txb->space, msg_data_left(msg));
_debug("add %zu", copy);
- if (!copy_from_iter_full(txb->data + txb->offset, copy,
- &msg->msg_iter))
+ if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset,
+ copy, &msg->msg_iter))
goto efault;
_debug("added");
txb->space -= copy;
@@ -394,18 +382,18 @@ reload:
/* add the packet to the send queue if it's now full */
if (!txb->space ||
(msg_data_left(msg) == 0 && !more)) {
- if (msg_data_left(msg) == 0 && !more) {
- txb->wire.flags |= RXRPC_LAST_PACKET;
- __set_bit(RXRPC_TXBUF_LAST, &txb->flags);
- }
+ if (msg_data_left(msg) == 0 && !more)
+ txb->flags |= RXRPC_LAST_PACKET;
else if (call->tx_top - call->acks_hard_ack <
call->tx_winsize)
- txb->wire.flags |= RXRPC_MORE_PACKETS;
+ txb->flags |= RXRPC_MORE_PACKETS;
ret = call->security->secure_packet(call, txb);
if (ret < 0)
goto out;
+ txb->kvec[0].iov_len += txb->len;
+ txb->len = txb->kvec[0].iov_len;
rxrpc_queue_packet(rx, call, txb, notify_end_tx);
txb = NULL;
}
@@ -621,7 +609,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
{
struct rxrpc_call *call;
- unsigned long now, j;
bool dropped_lock = false;
int ret;
@@ -699,25 +686,21 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
switch (p.call.nr_timeouts) {
case 3:
- j = msecs_to_jiffies(p.call.timeouts.normal);
- if (p.call.timeouts.normal > 0 && j == 0)
- j = 1;
- WRITE_ONCE(call->next_rx_timo, j);
+ WRITE_ONCE(call->next_rx_timo, p.call.timeouts.normal);
fallthrough;
case 2:
- j = msecs_to_jiffies(p.call.timeouts.idle);
- if (p.call.timeouts.idle > 0 && j == 0)
- j = 1;
- WRITE_ONCE(call->next_req_timo, j);
+ WRITE_ONCE(call->next_req_timo, p.call.timeouts.idle);
fallthrough;
case 1:
if (p.call.timeouts.hard > 0) {
- j = p.call.timeouts.hard * HZ;
- now = jiffies;
- j += now;
- WRITE_ONCE(call->expect_term_by, j);
- rxrpc_reduce_call_timer(call, j, now,
- rxrpc_timer_set_for_hard);
+ ktime_t delay = ms_to_ktime(p.call.timeouts.hard * MSEC_PER_SEC);
+
+ WRITE_ONCE(call->expect_term_by,
+ ktime_add(p.call.timeouts.hard,
+ ktime_get_real()));
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard);
+ rxrpc_poke_call(call, rxrpc_call_poke_set_timeout);
+
}
break;
}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index ecaeb4ecfb58..c9bedd0e2d86 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -15,6 +15,8 @@ static const unsigned int four = 4;
static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = 255;
+static const unsigned long one_ms = 1;
+static const unsigned long max_ms = 1000;
static const unsigned long one_jiffy = 1;
static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
@@ -28,24 +30,24 @@ static const unsigned long max_500 = 500;
* information on the individual parameters.
*/
static struct ctl_table rxrpc_sysctl_table[] = {
- /* Values measured in milliseconds but used in jiffies */
+ /* Values measured in milliseconds */
{
.procname = "soft_ack_delay",
.data = &rxrpc_soft_ack_delay,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_doulongvec_ms_jiffies_minmax,
- .extra1 = (void *)&one_jiffy,
- .extra2 = (void *)&max_jiffies,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)&one_ms,
+ .extra2 = (void *)&max_ms,
},
{
.procname = "idle_ack_delay",
.data = &rxrpc_idle_ack_delay,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_doulongvec_ms_jiffies_minmax,
- .extra1 = (void *)&one_jiffy,
- .extra2 = (void *)&max_jiffies,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)&one_ms,
+ .extra2 = (void *)&max_ms,
},
{
.procname = "idle_conn_expiry",
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index d43be8512386..b2a82ab756c2 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -14,45 +14,146 @@ static atomic_t rxrpc_txbuf_debug_ids;
atomic_t rxrpc_nr_txbuf;
/*
- * Allocate and partially initialise an I/O request structure.
+ * Allocate and partially initialise a data transmission buffer.
*/
-struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
- gfp_t gfp)
+struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size,
+ size_t data_align, gfp_t gfp)
{
+ struct rxrpc_wire_header *whdr;
struct rxrpc_txbuf *txb;
+ size_t total, hoff = 0;
+ void *buf;
txb = kmalloc(sizeof(*txb), gfp);
- if (txb) {
- INIT_LIST_HEAD(&txb->call_link);
- INIT_LIST_HEAD(&txb->tx_link);
- refcount_set(&txb->ref, 1);
- txb->call_debug_id = call->debug_id;
- txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
- txb->space = sizeof(txb->data);
- txb->len = 0;
- txb->offset = 0;
- txb->flags = 0;
- txb->ack_why = 0;
- txb->seq = call->tx_prepared + 1;
- txb->wire.epoch = htonl(call->conn->proto.epoch);
- txb->wire.cid = htonl(call->cid);
- txb->wire.callNumber = htonl(call->call_id);
- txb->wire.seq = htonl(txb->seq);
- txb->wire.type = packet_type;
- txb->wire.flags = call->conn->out_clientflag;
- txb->wire.userStatus = 0;
- txb->wire.securityIndex = call->security_ix;
- txb->wire._rsvd = 0;
- txb->wire.serviceId = htons(call->dest_srx.srx_service);
-
- trace_rxrpc_txbuf(txb->debug_id,
- txb->call_debug_id, txb->seq, 1,
- packet_type == RXRPC_PACKET_TYPE_DATA ?
- rxrpc_txbuf_alloc_data :
- rxrpc_txbuf_alloc_ack);
- atomic_inc(&rxrpc_nr_txbuf);
+ if (!txb)
+ return NULL;
+
+ if (data_align)
+ hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr);
+ total = hoff + sizeof(*whdr) + data_size;
+
+ mutex_lock(&call->conn->tx_data_alloc_lock);
+ buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp,
+ ~(data_align - 1) & ~(L1_CACHE_BYTES - 1));
+ mutex_unlock(&call->conn->tx_data_alloc_lock);
+ if (!buf) {
+ kfree(txb);
+ return NULL;
+ }
+
+ whdr = buf + hoff;
+
+ INIT_LIST_HEAD(&txb->call_link);
+ INIT_LIST_HEAD(&txb->tx_link);
+ refcount_set(&txb->ref, 1);
+ txb->last_sent = KTIME_MIN;
+ txb->call_debug_id = call->debug_id;
+ txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
+ txb->space = data_size;
+ txb->len = 0;
+ txb->offset = sizeof(*whdr);
+ txb->flags = call->conn->out_clientflag;
+ txb->ack_why = 0;
+ txb->seq = call->tx_prepared + 1;
+ txb->serial = 0;
+ txb->cksum = 0;
+ txb->nr_kvec = 1;
+ txb->kvec[0].iov_base = whdr;
+ txb->kvec[0].iov_len = sizeof(*whdr);
+
+ whdr->epoch = htonl(call->conn->proto.epoch);
+ whdr->cid = htonl(call->cid);
+ whdr->callNumber = htonl(call->call_id);
+ whdr->seq = htonl(txb->seq);
+ whdr->type = RXRPC_PACKET_TYPE_DATA;
+ whdr->flags = 0;
+ whdr->userStatus = 0;
+ whdr->securityIndex = call->security_ix;
+ whdr->_rsvd = 0;
+ whdr->serviceId = htons(call->dest_srx.srx_service);
+
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1,
+ rxrpc_txbuf_alloc_data);
+
+ atomic_inc(&rxrpc_nr_txbuf);
+ return txb;
+}
+
+/*
+ * Allocate and partially initialise an ACK packet.
+ */
+struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size)
+{
+ struct rxrpc_wire_header *whdr;
+ struct rxrpc_acktrailer *trailer;
+ struct rxrpc_ackpacket *ack;
+ struct rxrpc_txbuf *txb;
+ gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS;
+ void *buf, *buf2 = NULL;
+ u8 *filler;
+
+ txb = kmalloc(sizeof(*txb), gfp);
+ if (!txb)
+ return NULL;
+
+ buf = page_frag_alloc(&call->local->tx_alloc,
+ sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp);
+ if (!buf) {
+ kfree(txb);
+ return NULL;
+ }
+
+ if (sack_size) {
+ buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp);
+ if (!buf2) {
+ page_frag_free(buf);
+ kfree(txb);
+ return NULL;
+ }
}
+ whdr = buf;
+ ack = buf + sizeof(*whdr);
+ filler = buf + sizeof(*whdr) + sizeof(*ack) + 1;
+ trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3;
+
+ INIT_LIST_HEAD(&txb->call_link);
+ INIT_LIST_HEAD(&txb->tx_link);
+ refcount_set(&txb->ref, 1);
+ txb->call_debug_id = call->debug_id;
+ txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
+ txb->space = 0;
+ txb->len = sizeof(*whdr) + sizeof(*ack) + 3 + sizeof(*trailer);
+ txb->offset = 0;
+ txb->flags = call->conn->out_clientflag;
+ txb->ack_rwind = 0;
+ txb->seq = 0;
+ txb->serial = 0;
+ txb->cksum = 0;
+ txb->nr_kvec = 3;
+ txb->kvec[0].iov_base = whdr;
+ txb->kvec[0].iov_len = sizeof(*whdr) + sizeof(*ack);
+ txb->kvec[1].iov_base = buf2;
+ txb->kvec[1].iov_len = sack_size;
+ txb->kvec[2].iov_base = filler;
+ txb->kvec[2].iov_len = 3 + sizeof(*trailer);
+
+ whdr->epoch = htonl(call->conn->proto.epoch);
+ whdr->cid = htonl(call->cid);
+ whdr->callNumber = htonl(call->call_id);
+ whdr->seq = 0;
+ whdr->type = RXRPC_PACKET_TYPE_ACK;
+ whdr->flags = 0;
+ whdr->userStatus = 0;
+ whdr->securityIndex = call->security_ix;
+ whdr->_rsvd = 0;
+ whdr->serviceId = htons(call->dest_srx.srx_service);
+
+ get_page(virt_to_head_page(trailer));
+
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1,
+ rxrpc_txbuf_alloc_ack);
+ atomic_inc(&rxrpc_nr_txbuf);
return txb;
}
@@ -71,12 +172,15 @@ void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r, what);
}
-static void rxrpc_free_txbuf(struct rcu_head *rcu)
+static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb)
{
- struct rxrpc_txbuf *txb = container_of(rcu, struct rxrpc_txbuf, rcu);
+ int i;
trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0,
rxrpc_txbuf_free);
+ for (i = 0; i < txb->nr_kvec; i++)
+ if (txb->kvec[i].iov_base)
+ page_frag_free(txb->kvec[i].iov_base);
kfree(txb);
atomic_dec(&rxrpc_nr_txbuf);
}
@@ -95,7 +199,7 @@ void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
dead = __refcount_dec_and_test(&txb->ref, &r);
trace_rxrpc_txbuf(debug_id, call_debug_id, seq, r - 1, what);
if (dead)
- call_rcu(&txb->rcu, rxrpc_free_txbuf);
+ rxrpc_free_txbuf(txb);
}
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 470c70deffe2..8180d0c12fce 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -737,16 +737,6 @@ config NET_ACT_SAMPLE
To compile this code as a module, choose M here: the
module will be called act_sample.
-config NET_ACT_IPT
- tristate "IPtables targets"
- depends on NET_CLS_ACT && NETFILTER && NETFILTER_XTABLES
- help
- Say Y here to be able to invoke iptables targets after successful
- classification.
-
- To compile this code as a module, choose M here: the
- module will be called act_ipt.
-
config NET_ACT_NAT
tristate "Stateless NAT"
depends on NET_CLS_ACT
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3e30d7260493..9ee622fb1160 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1363,7 +1363,7 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags,
if (rtnl_held)
rtnl_unlock();
- request_module("act_%s", act_name);
+ request_module(NET_ACT_ALIAS_PREFIX "%s", act_name);
if (rtnl_held)
rtnl_lock();
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 6cfee6658103..0e3cf11ae5fc 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -401,6 +401,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
.init = tcf_bpf_init,
.size = sizeof(struct tcf_bpf),
};
+MODULE_ALIAS_NET_ACT("bpf");
static __net_init int bpf_init_net(struct net *net)
{
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index f8762756657d..0fce631e7c91 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -242,6 +242,7 @@ static struct tc_action_ops act_connmark_ops = {
.cleanup = tcf_connmark_cleanup,
.size = sizeof(struct tcf_connmark_info),
};
+MODULE_ALIAS_NET_ACT("connmark");
static __net_init int connmark_init_net(struct net *net)
{
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 7f8b1f2f2ed9..5cc8e407e791 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -709,6 +709,7 @@ static struct tc_action_ops act_csum_ops = {
.offload_act_setup = tcf_csum_offload_act_setup,
.size = sizeof(struct tcf_csum),
};
+MODULE_ALIAS_NET_ACT("csum");
static __net_init int csum_init_net(struct net *net)
{
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 6124d8b128d1..baac083fd8f1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1600,6 +1600,7 @@ static struct tc_action_ops act_ct_ops = {
.offload_act_setup = tcf_ct_offload_act_setup,
.size = sizeof(struct tcf_ct),
};
+MODULE_ALIAS_NET_ACT("ct");
static __net_init int ct_init_net(struct net *net)
{
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index e620f9a84afe..5dd41a012110 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -363,6 +363,7 @@ static struct tc_action_ops act_ctinfo_ops = {
.cleanup= tcf_ctinfo_cleanup,
.size = sizeof(struct tcf_ctinfo),
};
+MODULE_ALIAS_NET_ACT("ctinfo");
static __net_init int ctinfo_init_net(struct net *net)
{
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 4af3b7ec249f..e949280eb800 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -296,6 +296,7 @@ static struct tc_action_ops act_gact_ops = {
.offload_act_setup = tcf_gact_offload_act_setup,
.size = sizeof(struct tcf_gact),
};
+MODULE_ALIAS_NET_ACT("gact");
static __net_init int gact_init_net(struct net *net)
{
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index c681cd011afd..1dd74125398a 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -645,6 +645,7 @@ static struct tc_action_ops act_gate_ops = {
.offload_act_setup = tcf_gate_offload_act_setup,
.size = sizeof(struct tcf_gate),
};
+MODULE_ALIAS_NET_ACT("gate");
static __net_init int gate_init_net(struct net *net)
{
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 0e867d13beb5..107c6d83dc5c 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -889,6 +889,7 @@ static struct tc_action_ops act_ife_ops = {
.init = tcf_ife_init,
.size = sizeof(struct tcf_ife_info),
};
+MODULE_ALIAS_NET_ACT("ife");
static __net_init int ife_init_net(struct net *net)
{
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 12386f590b0f..5b3814365924 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -232,18 +232,14 @@ release_idr:
return err;
}
-static bool is_mirred_nested(void)
-{
- return unlikely(__this_cpu_read(mirred_nest_level) > 1);
-}
-
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int
+tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
{
int err;
if (!want_ingress)
err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
- else if (is_mirred_nested())
+ else if (!at_ingress)
err = netif_rx(skb);
else
err = netif_receive_skb(skb);
@@ -270,8 +266,7 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
- err = -ENODEV;
- goto out;
+ goto err_cant_do;
}
/* we could easily avoid the clone only if called by ingress and clsact;
@@ -283,10 +278,8 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
tcf_mirred_can_reinsert(retval);
if (!dont_clone) {
skb_to_send = skb_clone(skb, GFP_ATOMIC);
- if (!skb_to_send) {
- err = -ENOMEM;
- goto out;
- }
+ if (!skb_to_send)
+ goto err_cant_do;
}
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
@@ -319,19 +312,20 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
- err = tcf_mirred_forward(want_ingress, skb_to_send);
+ err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
} else {
- err = tcf_mirred_forward(want_ingress, skb_to_send);
+ err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
}
-
- if (err) {
-out:
+ if (err)
tcf_action_inc_overlimit_qstats(&m->common);
- if (is_redirect)
- retval = TC_ACT_SHOT;
- }
return retval;
+
+err_cant_do:
+ if (is_redirect)
+ retval = TC_ACT_SHOT;
+ tcf_action_inc_overlimit_qstats(&m->common);
+ return retval;
}
static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m,
@@ -533,8 +527,6 @@ static int mirred_device_event(struct notifier_block *unused,
* net_device are already rcu protected.
*/
RCU_INIT_POINTER(m->tcfm_dev, NULL);
- } else if (m->tcfm_blockid) {
- m->tcfm_blockid = 0;
}
spin_unlock_bh(&m->tcf_lock);
}
@@ -643,6 +635,7 @@ static struct tc_action_ops act_mirred_ops = {
.size = sizeof(struct tcf_mirred),
.get_dev = tcf_mirred_get_dev,
};
+MODULE_ALIAS_NET_ACT("mirred");
static __net_init int mirred_init_net(struct net *net)
{
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 34b8edb6cc77..44a37a71ae92 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -452,6 +452,7 @@ static struct tc_action_ops act_mpls_ops = {
.offload_act_setup = tcf_mpls_offload_act_setup,
.size = sizeof(struct tcf_mpls),
};
+MODULE_ALIAS_NET_ACT("mpls");
static __net_init int mpls_init_net(struct net *net)
{
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index a180e724634e..d541f553805f 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -324,6 +324,7 @@ static struct tc_action_ops act_nat_ops = {
.cleanup = tcf_nat_cleanup,
.size = sizeof(struct tcf_nat),
};
+MODULE_ALIAS_NET_ACT("nat");
static __net_init int nat_init_net(struct net *net)
{
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 2ef22969f274..fc0a35a7b62a 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -515,11 +515,11 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
spin_unlock_bh(&p->tcf_lock);
return -ENOBUFS;
}
+ opt->nkeys = parms->tcfp_nkeys;
memcpy(opt->keys, parms->tcfp_keys,
flex_array_size(opt, keys, parms->tcfp_nkeys));
opt->index = p->tcf_index;
- opt->nkeys = parms->tcfp_nkeys;
opt->flags = parms->tcfp_flags;
opt->action = p->tcf_action;
opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
@@ -620,6 +620,7 @@ static struct tc_action_ops act_pedit_ops = {
.offload_act_setup = tcf_pedit_offload_act_setup,
.size = sizeof(struct tcf_pedit),
};
+MODULE_ALIAS_NET_ACT("pedit");
static __net_init int pedit_init_net(struct net *net)
{
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index e119b4a3db9f..8555125ed34d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -502,6 +502,7 @@ static struct tc_action_ops act_police_ops = {
.offload_act_setup = tcf_police_offload_act_setup,
.size = sizeof(struct tcf_police),
};
+MODULE_ALIAS_NET_ACT("police");
static __net_init int police_init_net(struct net *net)
{
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index c5c61efe6db4..a69b53d54039 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -316,6 +316,7 @@ static struct tc_action_ops act_sample_ops = {
.offload_act_setup = tcf_sample_offload_act_setup,
.size = sizeof(struct tcf_sample),
};
+MODULE_ALIAS_NET_ACT("sample");
static __net_init int sample_init_net(struct net *net)
{
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 0a3e92888295..f3abe0545989 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -209,6 +209,7 @@ static struct tc_action_ops act_simp_ops = {
.init = tcf_simp_init,
.size = sizeof(struct tcf_defact),
};
+MODULE_ALIAS_NET_ACT("simple");
static __net_init int simp_init_net(struct net *net)
{
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 754f78b35bb8..1f1d9ce3e968 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -426,6 +426,7 @@ static struct tc_action_ops act_skbedit_ops = {
.offload_act_setup = tcf_skbedit_offload_act_setup,
.size = sizeof(struct tcf_skbedit),
};
+MODULE_ALIAS_NET_ACT("skbedit");
static __net_init int skbedit_init_net(struct net *net)
{
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index bcb673ab0008..39945b139c48 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -287,6 +287,7 @@ static struct tc_action_ops act_skbmod_ops = {
.cleanup = tcf_skbmod_cleanup,
.size = sizeof(struct tcf_skbmod),
};
+MODULE_ALIAS_NET_ACT("skbmod");
static __net_init int skbmod_init_net(struct net *net)
{
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 300b08aa8283..1536f8b16f1b 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -842,6 +842,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
.offload_act_setup = tcf_tunnel_key_offload_act_setup,
.size = sizeof(struct tcf_tunnel_key),
};
+MODULE_ALIAS_NET_ACT("tunnel_key");
static __net_init int tunnel_key_init_net(struct net *net)
{
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 836183011a7c..22f4b1e8ade9 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -427,6 +427,7 @@ static struct tc_action_ops act_vlan_ops = {
.offload_act_setup = tcf_vlan_offload_act_setup,
.size = sizeof(struct tcf_vlan),
};
+MODULE_ALIAS_NET_ACT("vlan");
static __net_init int vlan_init_net(struct net *net)
{
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 92a12e3d0fe6..ca5676b2668e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -257,7 +257,7 @@ tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
#ifdef CONFIG_MODULES
if (rtnl_held)
rtnl_unlock();
- request_module("cls_%s", kind);
+ request_module(NET_CLS_ALIAS_PREFIX "%s", kind);
if (rtnl_held)
rtnl_lock();
ops = __tcf_proto_lookup_ops(kind);
@@ -1560,6 +1560,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
chain_prev = chain,
chain = __tcf_get_next_chain(block, chain),
tcf_chain_put(chain_prev)) {
+ if (chain->tmplt_ops && add)
+ chain->tmplt_ops->tmplt_reoffload(chain, true, cb,
+ cb_priv);
for (tp = __tcf_get_next_proto(chain, NULL); tp;
tp_prev = tp,
tp = __tcf_get_next_proto(chain, tp),
@@ -1575,6 +1578,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
goto err_playback_remove;
}
}
+ if (chain->tmplt_ops && !add)
+ chain->tmplt_ops->tmplt_reoffload(chain, false, cb,
+ cb_priv);
}
return 0;
@@ -3000,7 +3006,8 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
ops = tcf_proto_lookup_ops(name, true, extack);
if (IS_ERR(ops))
return PTR_ERR(ops);
- if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
+ if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump ||
+ !ops->tmplt_reoffload) {
NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
module_put(ops->owner);
return -EOPNOTSUPP;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index a1f56931330c..ecfaa4f9a04e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -328,6 +328,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = {
.bind_class = basic_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("basic");
static int __init init_basic(void)
{
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 382c7a71f81f..5e83e890f6a4 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -693,6 +693,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
.dump = cls_bpf_dump,
.bind_class = cls_bpf_bind_class,
};
+MODULE_ALIAS_NET_CLS("bpf");
static int __init cls_bpf_init_mod(void)
{
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 7ee8dbf49ed0..424252982d6a 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -209,6 +209,7 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
.dump = cls_cgroup_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("cgroup");
static int __init init_cgroup_cls(void)
{
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6ab317b48d6c..5502998aace7 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -702,6 +702,7 @@ static struct tcf_proto_ops cls_flow_ops __read_mostly = {
.walk = flow_walk,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("flow");
static int __init cls_flow_init(void)
{
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e5314a31f75a..e1314674b4a9 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2460,8 +2460,11 @@ unbind_filter:
}
errout_idr:
- if (!fold)
+ if (!fold) {
+ spin_lock(&tp->lock);
idr_remove(&head->handle_idr, fnew->handle);
+ spin_unlock(&tp->lock);
+ }
__fl_put(fnew);
errout_tb:
kfree(tb);
@@ -2721,6 +2724,28 @@ static void fl_tmplt_destroy(void *tmplt_priv)
kfree(tmplt);
}
+static void fl_tmplt_reoffload(struct tcf_chain *chain, bool add,
+ flow_setup_cb_t *cb, void *cb_priv)
+{
+ struct fl_flow_tmplt *tmplt = chain->tmplt_priv;
+ struct flow_cls_offload cls_flower = {};
+
+ cls_flower.rule = flow_rule_alloc(0);
+ if (!cls_flower.rule)
+ return;
+
+ cls_flower.common.chain_index = chain->index;
+ cls_flower.command = add ? FLOW_CLS_TMPLT_CREATE :
+ FLOW_CLS_TMPLT_DESTROY;
+ cls_flower.cookie = (unsigned long) tmplt;
+ cls_flower.rule->match.dissector = &tmplt->dissector;
+ cls_flower.rule->match.mask = &tmplt->mask;
+ cls_flower.rule->match.key = &tmplt->dummy_key;
+
+ cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+ kfree(cls_flower.rule);
+}
+
static int fl_dump_key_val(struct sk_buff *skb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@ -3628,11 +3653,13 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.bind_class = fl_bind_class,
.tmplt_create = fl_tmplt_create,
.tmplt_destroy = fl_tmplt_destroy,
+ .tmplt_reoffload = fl_tmplt_reoffload,
.tmplt_dump = fl_tmplt_dump,
.get_exts = fl_get_exts,
.owner = THIS_MODULE,
.flags = TCF_PROTO_OPS_DOIT_UNLOCKED,
};
+MODULE_ALIAS_NET_CLS("flower");
static int __init cls_fl_init(void)
{
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index afc534ee0a18..cdddc8695228 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -433,6 +433,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = {
.bind_class = fw_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("fw");
static int __init init_fw(void)
{
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index c4ed11df6254..9f1e62ca508d 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -398,6 +398,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = {
.bind_class = mall_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("matchall");
static int __init cls_mall_init(void)
{
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 12a505db4183..b9c58c040c30 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -671,6 +671,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = {
.bind_class = route4_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("route");
static int __init init_route4(void)
{
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 289e1755c26b..9412d88a99bc 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1453,6 +1453,7 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
.bind_class = u32_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("u32");
static int __init init_u32(void)
{
diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c
index 5ea84decec19..5337bc462755 100644
--- a/net/sched/em_canid.c
+++ b/net/sched/em_canid.c
@@ -222,6 +222,7 @@ static void __exit exit_em_canid(void)
tcf_em_unregister(&em_canid_ops);
}
+MODULE_DESCRIPTION("ematch classifier to match CAN IDs embedded in skb CAN frames");
MODULE_LICENSE("GPL");
module_init(init_em_canid);
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index f17b049ea530..c90ad7ea26b4 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -87,6 +87,7 @@ static void __exit exit_em_cmp(void)
tcf_em_unregister(&em_cmp_ops);
}
+MODULE_DESCRIPTION("ematch classifier for basic data types(8/16/32 bit) against skb data");
MODULE_LICENSE("GPL");
module_init(init_em_cmp);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 09d8afd04a2a..8996c73c9779 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -1006,6 +1006,7 @@ static void __exit exit_em_meta(void)
tcf_em_unregister(&em_meta_ops);
}
+MODULE_DESCRIPTION("ematch classifier for various internal kernel metadata, skb metadata and sk metadata");
MODULE_LICENSE("GPL");
module_init(init_em_meta);
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index a83b237cbeb0..4f9f21a05d5e 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -68,6 +68,7 @@ static void __exit exit_em_nbyte(void)
tcf_em_unregister(&em_nbyte_ops);
}
+MODULE_DESCRIPTION("ematch classifier for arbitrary skb multi-bytes");
MODULE_LICENSE("GPL");
module_init(init_em_nbyte);
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index f176afb70559..420c66203b17 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -147,6 +147,7 @@ static void __exit exit_em_text(void)
tcf_em_unregister(&em_text_ops);
}
+MODULE_DESCRIPTION("ematch classifier for embedded text in skbs");
MODULE_LICENSE("GPL");
module_init(init_em_text);
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 71b070da0437..fdec4db5ec89 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -52,6 +52,7 @@ static void __exit exit_em_u32(void)
tcf_em_unregister(&em_u32_ops);
}
+MODULE_DESCRIPTION("ematch skb classifier using 32 bit chunks of data");
MODULE_LICENSE("GPL");
module_init(init_em_u32);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 36b025cc4fd2..65e05b0c98e4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -228,7 +228,7 @@ int qdisc_set_default(const char *name)
if (!ops) {
/* Not found, drop lock and try to load module */
write_unlock(&qdisc_mod_lock);
- request_module("sch_%s", name);
+ request_module(NET_SCH_ALIAS_PREFIX "%s", name);
write_lock(&qdisc_mod_lock);
ops = qdisc_lookup_default(name);
@@ -1275,7 +1275,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
* go away in the mean time.
*/
rtnl_unlock();
- request_module("sch_%s", name);
+ request_module(NET_SCH_ALIAS_PREFIX "%s", name);
rtnl_lock();
ops = qdisc_lookup_ops(kind);
if (ops != NULL) {
@@ -2410,7 +2410,7 @@ static struct pernet_operations psched_net_ops = {
.exit = psched_net_exit,
};
-#if IS_ENABLED(CONFIG_RETPOLINE)
+#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
#endif
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 9cff99558694..edee926ccde8 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -3103,6 +3103,7 @@ static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
.dump_stats = cake_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("cake");
static int __init cake_module_init(void)
{
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index beece8e82c23..69001eff0315 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -546,6 +546,7 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
.dump = cbs_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("cbs");
static struct notifier_block cbs_device_notifier = {
.notifier_call = cbs_dev_notifier,
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ae1da08e268f..ea108030c6b4 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -498,6 +498,7 @@ static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
.dump_stats = choke_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("choke");
static int __init choke_module_init(void)
{
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index d7a4874543de..ecb3f164bb25 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Codel - The Controlled-Delay Active Queue Management algorithm
*
@@ -7,37 +8,6 @@
* Implemented on linux by :
* Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
* Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The names of the authors may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, provided that this notice is retained in full, this
- * software may be distributed under the terms of the GNU General
- * Public License ("GPL") version 2, in which case the provisions of the
- * GPL apply INSTEAD OF those given above.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
*/
#include <linux/module.h>
@@ -287,6 +257,7 @@ static struct Qdisc_ops codel_qdisc_ops __read_mostly = {
.dump_stats = codel_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("codel");
static int __init codel_module_init(void)
{
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 097740a9afea..c69b999fae17 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -481,6 +481,7 @@ static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
.destroy = drr_destroy_qdisc,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("drr");
static int __init drr_init(void)
{
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 4808159a5466..2e4bef713b6a 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -500,6 +500,7 @@ static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
.dump = etf_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("etf");
static int __init etf_module_init(void)
{
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index f7c88495946b..835b4460b448 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -812,6 +812,7 @@ static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
.dump = ets_qdisc_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("ets");
static int __init ets_init(void)
{
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 3a31c47fea9b..cdf23ff16f40 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -1264,6 +1264,7 @@ static struct Qdisc_ops fq_qdisc_ops __read_mostly = {
.dump_stats = fq_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("fq");
static int __init fq_module_init(void)
{
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 8c4fee063436..79f9d6de6c85 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -717,6 +717,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
.dump_stats = fq_codel_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("fq_codel");
static int __init fq_codel_module_init(void)
{
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9b3e9262040b..ff5336493777 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -27,6 +27,7 @@
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
+#include <net/hotdata.h>
#include <trace/events/qdisc.h>
#include <trace/events/net.h>
#include <net/xfrm.h>
@@ -409,7 +410,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = READ_ONCE(dev_tx_weight);
+ int quota = READ_ONCE(net_hotdata.dev_tx_weight);
int packets;
while (qdisc_restart(q, &packets)) {
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 8c61eb3dc943..79ba9dc70254 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -930,6 +930,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
.dump = gred_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("gred");
static int __init gred_module_init(void)
{
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 16c45da4036a..4e626df742d7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1679,6 +1679,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct hfsc_sched),
.owner = THIS_MODULE
};
+MODULE_ALIAS_NET_SCH("hfsc");
static int __init
hfsc_init(void)
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index d26cd436cbe3..3f906df1435b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -702,6 +702,7 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
.dump_stats = hhf_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("hhf");
static int __init hhf_module_init(void)
{
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 7349233eaa9b..93e6fb56f3b5 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -2166,6 +2166,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
.dump = htb_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("htb");
static int __init htb_module_init(void)
{
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 5fa9eaa79bfc..c2ef9dcf91d2 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -168,6 +168,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.ingress_block_get = ingress_ingress_block_get,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("ingress");
struct clsact_sched_data {
struct tcf_block *ingress_block;
@@ -344,6 +345,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
.egress_block_get = clsact_egress_block_get,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("clsact");
static int __init ingress_module_init(void)
{
@@ -368,6 +370,5 @@ static void __exit ingress_module_exit(void)
module_init(ingress_module_init);
module_exit(ingress_module_exit);
-MODULE_ALIAS("sch_clsact");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Ingress and clsact based ingress and egress qdiscs");
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 43e53ee00a56..225353fbb3f1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -774,6 +774,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
.dump = mqprio_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("mqprio");
static int __init mqprio_module_init(void)
{
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index d66d5f0ec080..79e93a19d5fa 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -395,6 +395,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
.dump = multiq_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("multiq");
static int __init multiq_module_init(void)
{
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index fa678eb88528..edc72962ae63 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1293,6 +1293,7 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
.dump = netem_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("netem");
static int __init netem_module_init(void)
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 2da6250ec346..1764059b0635 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -556,6 +556,7 @@ static struct Qdisc_ops pie_qdisc_ops __read_mostly = {
.dump_stats = pie_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("pie");
static int __init pie_module_init(void)
{
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 992f0c8d7988..cefb65201e17 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -213,6 +213,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
.reset = qdisc_reset_queue,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("plug");
static int __init plug_module_init(void)
{
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 8ecdd3ef6f8e..cc30f7a32f1a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -418,6 +418,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
.dump = prio_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("prio");
static int __init prio_module_init(void)
{
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 48a604c320c7..d584c0c25899 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1521,6 +1521,7 @@ static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
.destroy = qfq_destroy_qdisc,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("qfq");
static int __init qfq_init(void)
{
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 607b6c8b3a9b..b5f096588fae 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -548,6 +548,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
.dump_stats = red_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("red");
static int __init red_module_init(void)
{
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 1871a1c0224d..b717e15a3a17 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -709,6 +709,7 @@ static struct Qdisc_ops sfb_qdisc_ops __read_mostly = {
.dump_stats = sfb_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("sfb");
static int __init sfb_module_init(void)
{
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index eb77558fa367..e66f4afb920d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -925,6 +925,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
.dump = sfq_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("sfq");
static int __init sfq_module_init(void)
{
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 28beb11762d8..b4dd626c309c 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -292,6 +292,7 @@ static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
.destroy = skbprio_destroy,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("skbprio");
static int __init skbprio_module_init(void)
{
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 31a8252bd09c..c5de70efdc86 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -40,6 +40,8 @@ static struct static_key_false taprio_have_working_mqprio;
#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
+#define TAPRIO_SUPPORTED_FLAGS \
+ (TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
#define TAPRIO_FLAGS_INVALID U32_MAX
struct sched_entry {
@@ -408,19 +410,6 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
return entry;
}
-static bool taprio_flags_valid(u32 flags)
-{
- /* Make sure no other flag bits are set. */
- if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST |
- TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
- return false;
- /* txtime-assist and full offload are mutually exclusive */
- if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
- (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
- return false;
- return true;
-}
-
/* This returns the tstamp value set by TCP in terms of the set clock. */
static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
{
@@ -1031,7 +1020,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
- [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
+ [TCA_TAPRIO_ATTR_FLAGS] =
+ NLA_POLICY_MASK(NLA_U32, TAPRIO_SUPPORTED_FLAGS),
[TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
[TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED },
};
@@ -1815,33 +1805,6 @@ static int taprio_mqprio_cmp(const struct net_device *dev,
return 0;
}
-/* The semantics of the 'flags' argument in relation to 'change()'
- * requests, are interpreted following two rules (which are applied in
- * this order): (1) an omitted 'flags' argument is interpreted as
- * zero; (2) the 'flags' of a "running" taprio instance cannot be
- * changed.
- */
-static int taprio_new_flags(const struct nlattr *attr, u32 old,
- struct netlink_ext_ack *extack)
-{
- u32 new = 0;
-
- if (attr)
- new = nla_get_u32(attr);
-
- if (old != TAPRIO_FLAGS_INVALID && old != new) {
- NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
- return -EOPNOTSUPP;
- }
-
- if (!taprio_flags_valid(new)) {
- NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
- return -EINVAL;
- }
-
- return new;
-}
-
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -1852,6 +1815,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
unsigned long flags;
+ u32 taprio_flags;
ktime_t start;
int i, err;
@@ -1863,12 +1827,28 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
- err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS],
- q->flags, extack);
- if (err < 0)
- return err;
+ /* The semantics of the 'flags' argument in relation to 'change()'
+ * requests, are interpreted following two rules (which are applied in
+ * this order): (1) an omitted 'flags' argument is interpreted as
+ * zero; (2) the 'flags' of a "running" taprio instance cannot be
+ * changed.
+ */
+ taprio_flags = tb[TCA_TAPRIO_ATTR_FLAGS] ? nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]) : 0;
- q->flags = err;
+ /* txtime-assist and full offload are mutually exclusive */
+ if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
+ (taprio_flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_ATTR_FLAGS],
+ "TXTIME_ASSIST and FULL_OFFLOAD are mutually exclusive");
+ return -EINVAL;
+ }
+
+ if (q->flags != TAPRIO_FLAGS_INVALID && q->flags != taprio_flags) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Changing 'flags' of a running schedule is not supported");
+ return -EOPNOTSUPP;
+ }
+ q->flags = taprio_flags;
err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
@@ -2548,6 +2528,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
.dump_stats = taprio_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("taprio");
static struct notifier_block taprio_device_notifier = {
.notifier_call = taprio_dev_notifier,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index dd6b1a723bf7..f1d09183ae63 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -608,6 +608,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
.dump = tbf_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("tbf");
static int __init tbf_module_init(void)
{
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index eb05131ff1dd..23359e522273 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -507,6 +507,7 @@ done:
}
static const struct inet_diag_handler sctp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = sctp_diag_dump,
.dump_one = sctp_diag_dump_one,
.idiag_get_info = sctp_diag_get_info,
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 7182c5a450fb..5c1652181805 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -38,6 +38,14 @@ void sctp_inq_init(struct sctp_inq *queue)
INIT_WORK(&queue->immediate, NULL);
}
+/* Properly release the chunk which is being worked on. */
+static inline void sctp_inq_chunk_free(struct sctp_chunk *chunk)
+{
+ if (chunk->head_skb)
+ chunk->skb = chunk->head_skb;
+ sctp_chunk_free(chunk);
+}
+
/* Release the memory associated with an SCTP inqueue. */
void sctp_inq_free(struct sctp_inq *queue)
{
@@ -53,7 +61,7 @@ void sctp_inq_free(struct sctp_inq *queue)
* free it as well.
*/
if (queue->in_progress) {
- sctp_chunk_free(queue->in_progress);
+ sctp_inq_chunk_free(queue->in_progress);
queue->in_progress = NULL;
}
}
@@ -130,9 +138,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
goto new_skb;
}
- if (chunk->head_skb)
- chunk->skb = chunk->head_skb;
- sctp_chunk_free(chunk);
+ sctp_inq_chunk_free(chunk);
chunk = queue->in_progress = NULL;
} else {
/* Nothing to do. Next chunk in the packet, please. */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 94c6dd53cd62..e849f368ed91 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1495,17 +1495,11 @@ static __init int sctp_init(void)
/* Allocate bind_bucket and chunk caches. */
status = -ENOBUFS;
- sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket",
- sizeof(struct sctp_bind_bucket),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ sctp_bucket_cachep = KMEM_CACHE(sctp_bind_bucket, SLAB_HWCACHE_ALIGN);
if (!sctp_bucket_cachep)
goto out;
- sctp_chunk_cachep = kmem_cache_create("sctp_chunk",
- sizeof(struct sctp_chunk),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ sctp_chunk_cachep = KMEM_CACHE(sctp_chunk, SLAB_HWCACHE_ALIGN);
if (!sctp_chunk_cachep)
goto err_chunk_cachep;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6b9fcdb0952a..c67679a41044 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -67,6 +67,7 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
#include <net/sctp/stream_sched.h>
+#include <net/rps.h>
/* Forward declarations for internal helper functions. */
static bool sctp_writeable(const struct sock *sk);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index a2cb30af46cb..4b52b3b159c0 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -924,6 +924,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
smc->clcsock->file->private_data = smc->clcsock;
smc->clcsock->wq.fasync_list =
smc->sk.sk_socket->wq.fasync_list;
+ smc->sk.sk_socket->wq.fasync_list = NULL;
/* There might be some wait entries remaining
* in smc sk->sk_wq and they should be woken up
@@ -1045,7 +1046,7 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
int rc = SMC_CLC_DECL_NOSMCDDEV;
struct smcd_dev *smcd;
int i = 1, entry = 1;
- bool is_virtual;
+ bool is_emulated;
u16 chid;
if (smcd_indicated(ini->smc_type_v1))
@@ -1057,12 +1058,12 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
chid = smc_ism_get_chid(smcd);
if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
continue;
- is_virtual = __smc_ism_is_virtual(chid);
+ is_emulated = __smc_ism_is_emulated(chid);
if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
- if (is_virtual && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
+ if (is_emulated && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
/* It's the last GID-CHID entry left in CLC
- * Proposal SMC-Dv2 extension, but a virtual
+ * Proposal SMC-Dv2 extension, but an Emulated-
* ISM device will take two entries. So give
* up it and try the next potential ISM device.
*/
@@ -1072,7 +1073,7 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
ini->is_smcd = true;
rc = 0;
i++;
- entry = is_virtual ? entry + 2 : entry + 1;
+ entry = is_emulated ? entry + 2 : entry + 1;
if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES)
break;
}
@@ -1413,10 +1414,10 @@ static int smc_connect_ism(struct smc_sock *smc,
if (rc)
return rc;
- if (__smc_ism_is_virtual(ini->ism_chid[ini->ism_selected]))
+ if (__smc_ism_is_emulated(ini->ism_chid[ini->ism_selected]))
ini->ism_peer_gid[ini->ism_selected].gid_ext =
ntohll(aclc->d1.gid_ext);
- /* for non-virtual ISM devices, peer gid_ext remains 0. */
+ /* for non-Emulated-ISM devices, peer gid_ext remains 0. */
}
ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
@@ -2117,10 +2118,10 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini,
if (smc_ism_get_chid(smcd) == proposed_chid &&
!smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
ini->ism_peer_gid[*matches].gid = proposed_gid->gid;
- if (__smc_ism_is_virtual(proposed_chid))
+ if (__smc_ism_is_emulated(proposed_chid))
ini->ism_peer_gid[*matches].gid_ext =
proposed_gid->gid_ext;
- /* non-virtual ISM's peer gid_ext remains 0. */
+ /* non-Emulated-ISM's peer gid_ext remains 0. */
ini->ism_dev[*matches] = smcd;
(*matches)++;
break;
@@ -2170,10 +2171,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid);
smcd_gid.gid_ext = 0;
chid = ntohs(smcd_v2_ext->gidchid[i].chid);
- if (__smc_ism_is_virtual(chid)) {
+ if (__smc_ism_is_emulated(chid)) {
if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt ||
chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid))
- /* each virtual ISM device takes two GID-CHID
+ /* each Emulated-ISM device takes two GID-CHID
* entries and CHID of the second entry repeats
* that of the first entry.
*
diff --git a/net/smc/smc.h b/net/smc/smc.h
index df64efd2dee8..18c8b7870198 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -56,11 +56,11 @@ enum smc_state { /* possible states of an SMC socket */
};
enum smc_supplemental_features {
- SMC_SPF_VIRT_ISM_DEV = 0,
+ SMC_SPF_EMULATED_ISM_DEV = 0,
};
#define SMC_FEATURE_MASK \
- (BIT(SMC_SPF_VIRT_ISM_DEV))
+ (BIT(SMC_SPF_EMULATED_ISM_DEV))
struct smc_link_group;
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 9a13709bea1c..e55026c7529c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -952,8 +952,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
gidchids[entry].chid =
htons(smc_ism_get_chid(ini->ism_dev[i]));
gidchids[entry].gid = htonll(smcd_gid.gid);
- if (smc_ism_is_virtual(smcd)) {
- /* a virtual ISM device takes two
+ if (smc_ism_is_emulated(smcd)) {
+ /* an Emulated-ISM device takes two
* entries. CHID of the second entry
* repeats that of the first entry.
*/
@@ -1055,7 +1055,7 @@ smcd_clc_prep_confirm_accept(struct smc_connection *conn,
clc->d1.chid = htons(chid);
if (eid && eid[0])
memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN);
- if (__smc_ism_is_virtual(chid))
+ if (__smc_ism_is_emulated(chid))
clc->d1.gid_ext = htonll(smcd_gid.gid_ext);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact) {
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index a9f9bdd26dcd..7cc7070b9772 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -175,7 +175,7 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
#define SMCD_CLC_MAX_V2_GID_ENTRIES 8 /* max # of CHID-GID entries in CLC
* proposal SMC-Dv2 extension.
* each ISM device takes one entry and
- * each virtual ISM takes two entries.
+ * each Emulated-ISM takes two entries
*/
struct smc_clc_msg_proposal_area {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 95cc95458e2d..9b84d5897aa5 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1535,7 +1535,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
if ((!peer_gid->gid ||
(lgr->peer_gid.gid == peer_gid->gid &&
- !smc_ism_is_virtual(dev) ? 1 :
+ !smc_ism_is_emulated(dev) ? 1 :
lgr->peer_gid.gid_ext == peer_gid->gid_ext)) &&
(vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
if (peer_gid->gid) /* peer triggered termination */
@@ -1877,9 +1877,15 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
struct smcd_dev *smcismdev,
struct smcd_gid *peer_gid)
{
- return lgr->peer_gid.gid == peer_gid->gid && lgr->smcd == smcismdev &&
- smc_ism_is_virtual(smcismdev) ?
- (lgr->peer_gid.gid_ext == peer_gid->gid_ext) : 1;
+ if (lgr->peer_gid.gid != peer_gid->gid ||
+ lgr->smcd != smcismdev)
+ return false;
+
+ if (smc_ism_is_emulated(smcismdev) &&
+ lgr->peer_gid.gid_ext != peer_gid->gid_ext)
+ return false;
+
+ return true;
}
/* create a new SMC connection (and a new link group if necessary) */
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 52f7c4f1e767..6fdb2d96777a 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -164,7 +164,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
}
if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
- !list_empty(&smc->conn.lgr->list)) {
+ !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) {
struct smc_connection *conn = &smc->conn;
struct smcd_diag_dmbinfo dinfo;
struct smcd_dev *smcd = conn->lgr->smcd;
@@ -255,6 +255,7 @@ static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler smc_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_SMC,
.dump = smc_diag_handler_dump,
};
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index ffff40c30a06..165cd013404b 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -15,7 +15,7 @@
#include "smc.h"
-#define SMC_VIRTUAL_ISM_CHID_MASK 0xFF00
+#define SMC_EMULATED_ISM_CHID_MASK 0xFF00
#define SMC_ISM_IDENT_MASK 0x00FFFF
struct smcd_dev_list { /* List of SMCD devices */
@@ -66,10 +66,10 @@ static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok,
return rc < 0 ? rc : 0;
}
-static inline bool __smc_ism_is_virtual(u16 chid)
+static inline bool __smc_ism_is_emulated(u16 chid)
{
/* CHIDs in range of 0xFF00 to 0xFFFF are reserved
- * for virtual ISM device.
+ * for Emulated-ISM device.
*
* loopback-ism: 0xFFFF
* virtio-ism: 0xFF00 ~ 0xFFFE
@@ -77,11 +77,11 @@ static inline bool __smc_ism_is_virtual(u16 chid)
return ((chid & 0xFF00) == 0xFF00);
}
-static inline bool smc_ism_is_virtual(struct smcd_dev *smcd)
+static inline bool smc_ism_is_emulated(struct smcd_dev *smcd)
{
u16 chid = smcd->ops->get_chid(smcd);
- return __smc_ism_is_virtual(chid);
+ return __smc_ism_is_emulated(chid);
}
#endif
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 9f2c58c5a86b..2adb92b8c469 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -806,6 +806,16 @@ static void smc_pnet_create_pnetids_list(struct net *net)
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct net_device *dev;
+ /* Newly created netns do not have devices.
+ * Do not even acquire rtnl.
+ */
+ if (list_empty(&net->dev_base_head))
+ return;
+
+ /* Note: This might not be needed, because smc_pnet_netdev_event()
+ * is also calling smc_pnet_add_base_pnetid() when handling
+ * NETDEV_UP event.
+ */
rtnl_lock();
for_each_netdev(net, dev)
smc_pnet_add_base_pnetid(net, dev, ndev_pnetid);
diff --git a/net/socket.c b/net/socket.c
index ed3df2f749bf..7e9c8fc9a5b4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -343,7 +343,7 @@ static void init_inodecache(void)
0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD | SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
BUG_ON(sock_inode_cachep == NULL);
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index d2b02710ab07..b2c1b683a88e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -921,6 +921,8 @@ out_err:
* Caller provides the truncation length of the output token (h) in
* cksumout.len.
*
+ * Note that for RPCSEC, the "initial cipher state" is always all zeroes.
+ *
* Return values:
* %GSS_S_COMPLETE: Digest computed, @cksumout filled in
* %GSS_S_FAILURE: Call failed
@@ -931,22 +933,19 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
int body_offset, struct xdr_netobj *cksumout)
{
unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher);
+ static const u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
struct ahash_request *req;
struct scatterlist sg[1];
- u8 *iv, *checksumdata;
int err = -ENOMEM;
+ u8 *checksumdata;
checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
if (!checksumdata)
return GSS_S_FAILURE;
- /* For RPCSEC, the "initial cipher state" is always all zeroes. */
- iv = kzalloc(ivsize, GFP_KERNEL);
- if (!iv)
- goto out_free_mem;
req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
- goto out_free_mem;
+ goto out_free_cksumdata;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
err = crypto_ahash_init(req);
if (err)
@@ -970,8 +969,7 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
out_free_ahash:
ahash_request_free(req);
-out_free_mem:
- kfree(iv);
+out_free_cksumdata:
kfree_sensitive(checksumdata);
return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 64cff717c3d9..3366505bc669 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -398,6 +398,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
u64 seq_send64;
int keylen;
u32 time32;
+ int ret;
p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
if (IS_ERR(p))
@@ -450,8 +451,16 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
}
ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
- return gss_krb5_import_ctx_v2(ctx, gfp_mask);
+ ret = gss_krb5_import_ctx_v2(ctx, gfp_mask);
+ if (ret) {
+ p = ERR_PTR(ret);
+ goto out_free;
+ }
+ return 0;
+
+out_free:
+ kfree(ctx->mech_used.data);
out_err:
return PTR_ERR(p);
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index d79f12c2550a..cb32ab9a8395 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -250,8 +250,8 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL);
if (!creds) {
- kfree(oa->data);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto free_oa;
}
oa->data[0].option.data = CREDS_VALUE;
@@ -265,29 +265,40 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
/* option buffer */
p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- return -ENOSPC;
+ if (unlikely(p == NULL)) {
+ err = -ENOSPC;
+ goto free_creds;
+ }
length = be32_to_cpup(p);
p = xdr_inline_decode(xdr, length);
- if (unlikely(p == NULL))
- return -ENOSPC;
+ if (unlikely(p == NULL)) {
+ err = -ENOSPC;
+ goto free_creds;
+ }
if (length == sizeof(CREDS_VALUE) &&
memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
/* We have creds here. parse them */
err = gssx_dec_linux_creds(xdr, creds);
if (err)
- return err;
+ goto free_creds;
oa->data[0].value.len = 1; /* presence */
} else {
/* consume uninteresting buffer */
err = gssx_dec_buffer(xdr, &dummy);
if (err)
- return err;
+ goto free_creds;
}
}
return 0;
+
+free_creds:
+ kfree(creds);
+free_oa:
+ kfree(oa->data);
+ oa->data = NULL;
+ return err;
}
static int gssx_dec_status(struct xdr_stream *xdr,
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index dcc2b4f49e77..910a5d850d04 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1490,7 +1490,7 @@ int register_rpc_pipefs(void)
rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
sizeof(struct rpc_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (!rpc_inode_cachep)
return -ENOMEM;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 65fc1297c6df..383860cb1d5b 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(rpc_proc_unregister);
struct proc_dir_entry *
svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops)
{
- return do_register(net, statp->program->pg_name, statp, proc_ops);
+ return do_register(net, statp->program->pg_name, net, proc_ops);
}
EXPORT_SYMBOL_GPL(svc_proc_register);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index f60c93e5a25d..b33e429336fb 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -451,8 +451,8 @@ __svc_init_bc(struct svc_serv *serv)
* Create an RPC service
*/
static struct svc_serv *
-__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- int (*threadfn)(void *data))
+__svc_create(struct svc_program *prog, struct svc_stat *stats,
+ unsigned int bufsize, int npools, int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int vers;
@@ -463,7 +463,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return NULL;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
- serv->sv_stats = prog->pg_stats;
+ serv->sv_stats = stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
serv->sv_max_payload = bufsize? bufsize : 4096;
@@ -529,26 +529,28 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
int (*threadfn)(void *data))
{
- return __svc_create(prog, bufsize, 1, threadfn);
+ return __svc_create(prog, NULL, bufsize, 1, threadfn);
}
EXPORT_SYMBOL_GPL(svc_create);
/**
* svc_create_pooled - Create an RPC service with pooled threads
* @prog: the RPC program the new service will handle
+ * @stats: the stats struct if desired
* @bufsize: maximum message size for @prog
* @threadfn: a function to service RPC requests for @prog
*
* Returns an instantiated struct svc_serv object or NULL.
*/
struct svc_serv *svc_create_pooled(struct svc_program *prog,
+ struct svc_stat *stats,
unsigned int bufsize,
int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, threadfn);
+ serv = __svc_create(prog, stats, bufsize, npools, threadfn);
if (!serv)
goto out_err;
return serv;
@@ -1375,7 +1377,8 @@ svc_process_common(struct svc_rqst *rqstp)
goto err_bad_proc;
/* Syntactic check complete */
- serv->sv_stats->rpccnt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpccnt++;
trace_svc_process(rqstp, progp->pg_name);
aoffset = xdr_stream_pos(xdr);
@@ -1427,7 +1430,8 @@ err_short_len:
goto close_xprt;
err_bad_rpc:
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
xdr_stream_encode_u32(xdr, RPC_MISMATCH);
/* Only RPCv2 supported */
@@ -1438,7 +1442,8 @@ err_bad_rpc:
err_bad_auth:
dprintk("svc: authentication failed (%d)\n",
be32_to_cpu(rqstp->rq_auth_stat));
- serv->sv_stats->rpcbadauth++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadauth++;
/* Restore write pointer to location of reply status: */
xdr_truncate_encode(xdr, XDR_UNIT * 2);
xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
@@ -1448,7 +1453,8 @@ err_bad_auth:
err_bad_prog:
dprintk("svc: unknown program %d\n", rqstp->rq_prog);
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_prog_unavail;
goto sendit;
@@ -1456,7 +1462,8 @@ err_bad_vers:
svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_prog_mismatch;
/*
@@ -1470,19 +1477,22 @@ err_bad_vers:
err_bad_proc:
svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_proc_unavail;
goto sendit;
err_garbage_args:
svc_printk(rqstp, "failed to decode RPC header\n");
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_garbage_args;
goto sendit;
err_system_err:
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_system_err;
goto sendit;
}
@@ -1534,7 +1544,8 @@ void svc_process(struct svc_rqst *rqstp)
out_baddir:
svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
be32_to_cpu(*p));
- rqstp->rq_server->sv_stats->rpcbadfmt++;
+ if (rqstp->rq_server->sv_stats)
+ rqstp->rq_server->sv_stats->rpcbadfmt++;
out_drop:
svc_drop(rqstp);
}
@@ -1598,10 +1609,10 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
/* Finally, send the reply synchronously */
if (rqstp->bc_to_initval > 0) {
timeout.to_initval = rqstp->bc_to_initval;
- timeout.to_retries = rqstp->bc_to_initval;
+ timeout.to_retries = rqstp->bc_to_retries;
} else {
timeout.to_initval = req->rq_xprt->timeout->to_initval;
- timeout.to_initval = req->rq_xprt->timeout->to_retries;
+ timeout.to_retries = req->rq_xprt->timeout->to_retries;
}
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
task = rpc_run_bc_task(req, &timeout);
@@ -1612,7 +1623,6 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
rpc_put_task(task);
}
-EXPORT_SYMBOL_GPL(svc_process_bc);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/**
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index bfb2f78523a8..545017a3daa4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -717,12 +717,12 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
ARRAY_SIZE(rqstp->rq_bvec), xdr);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
- count, 0);
+ count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
- count, 0);
+ count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index c9be6778643b..e5a78b761012 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -90,7 +90,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND;
- return svc_rdma_send(rdma, sctxt);
+ return svc_rdma_post_send(rdma, sctxt);
}
/* Server-side transport endpoint wants a whole page for its send
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index c00fcce61d1e..f2a100c4c81f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -197,28 +197,6 @@ void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
llist_add_batch(first, last, &rdma->sc_rw_ctxts);
}
-/* State for sending a Write or Reply chunk.
- * - Tracks progress of writing one chunk over all its segments
- * - Stores arguments for the SGL constructor functions
- */
-struct svc_rdma_write_info {
- struct svcxprt_rdma *wi_rdma;
-
- const struct svc_rdma_chunk *wi_chunk;
-
- /* write state of this chunk */
- unsigned int wi_seg_off;
- unsigned int wi_seg_no;
-
- /* SGL constructor arguments */
- const struct xdr_buf *wi_xdr;
- unsigned char *wi_base;
- unsigned int wi_next_off;
-
- struct svc_rdma_chunk_ctxt wi_cc;
- struct work_struct wi_work;
-};
-
static struct svc_rdma_write_info *
svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
const struct svc_rdma_chunk *chunk)
@@ -253,6 +231,71 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
}
/**
+ * svc_rdma_write_chunk_release - Release Write chunk I/O resources
+ * @rdma: controlling transport
+ * @ctxt: Send context that is being released
+ */
+void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ struct svc_rdma_write_info *info;
+ struct svc_rdma_chunk_ctxt *cc;
+
+ while (!list_empty(&ctxt->sc_write_info_list)) {
+ info = list_first_entry(&ctxt->sc_write_info_list,
+ struct svc_rdma_write_info, wi_list);
+ list_del(&info->wi_list);
+
+ cc = &info->wi_cc;
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
+ svc_rdma_write_info_free(info);
+ }
+}
+
+/**
+ * svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
+ * @rdma: controlling transport
+ * @ctxt: Send context that is being released
+ */
+void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ struct svc_rdma_chunk_ctxt *cc = &ctxt->sc_reply_info.wi_cc;
+
+ if (!cc->cc_sqecount)
+ return;
+ svc_rdma_cc_release(rdma, cc, DMA_TO_DEVICE);
+}
+
+/**
+ * svc_rdma_reply_done - Reply chunk Write completion handler
+ * @cq: controlling Completion Queue
+ * @wc: Work Completion report
+ *
+ * Pages under I/O are released by a subsequent Send completion.
+ */
+static void svc_rdma_reply_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct svc_rdma_chunk_ctxt *cc =
+ container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+ struct svcxprt_rdma *rdma = cq->cq_context;
+
+ switch (wc->status) {
+ case IB_WC_SUCCESS:
+ trace_svcrdma_wc_reply(&cc->cc_cid);
+ return;
+ case IB_WC_WR_FLUSH_ERR:
+ trace_svcrdma_wc_reply_flush(wc, &cc->cc_cid);
+ break;
+ default:
+ trace_svcrdma_wc_reply_err(wc, &cc->cc_cid);
+ }
+
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+}
+
+/**
* svc_rdma_write_done - Write chunk completion
* @cq: controlling Completion Queue
* @wc: Work Completion
@@ -265,13 +308,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svc_rdma_write_info *info =
- container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
trace_svcrdma_wc_write(&cc->cc_cid);
- break;
+ return;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
break;
@@ -279,12 +320,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
}
- svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
-
- if (unlikely(wc->status != IB_WC_SUCCESS))
- svc_xprt_deferred_close(&rdma->sc_xprt);
-
- svc_rdma_write_info_free(info);
+ /* The RDMA Write has flushed, so the client won't get
+ * some of the outgoing RPC message. Signal the loss
+ * to the client by closing the connection.
+ */
+ svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
@@ -580,41 +620,54 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
return xdr->len;
}
-/**
- * svc_rdma_send_write_chunk - Write all segments in a Write chunk
- * @rdma: controlling RDMA transport
- * @chunk: Write chunk provided by the client
- * @xdr: xdr_buf containing the data payload
- *
- * Returns a non-negative number of bytes the chunk consumed, or
- * %-E2BIG if the payload was larger than the Write chunk,
- * %-EINVAL if client provided too many segments,
- * %-ENOMEM if rdma_rw context pool was exhausted,
- * %-ENOTCONN if posting failed (connection is lost),
- * %-EIO if rdma_rw initialization failed (DMA mapping, etc).
+/* Link Write WRs for @chunk onto @sctxt's WR chain.
*/
-int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr)
+static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct svc_rdma_chunk *chunk,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
+ struct ib_send_wr *first_wr;
+ struct xdr_buf payload;
+ struct list_head *pos;
+ struct ib_cqe *cqe;
int ret;
+ if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
+ chunk->ch_payload_length))
+ return -EMSGSIZE;
+
info = svc_rdma_write_info_alloc(rdma, chunk);
if (!info)
return -ENOMEM;
cc = &info->wi_cc;
- ret = svc_rdma_xb_write(xdr, info);
- if (ret != xdr->len)
+ ret = svc_rdma_xb_write(&payload, info);
+ if (ret != payload.len)
goto out_err;
- trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(rdma, cc);
- if (ret < 0)
+ ret = -EINVAL;
+ if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth))
goto out_err;
- return xdr->len;
+
+ first_wr = sctxt->sc_wr_chain;
+ cqe = &cc->cc_cqe;
+ list_for_each(pos, &cc->cc_rwctxts) {
+ struct svc_rdma_rw_ctxt *rwc;
+
+ rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
+ first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, cqe, first_wr);
+ cqe = NULL;
+ }
+ sctxt->sc_wr_chain = first_wr;
+ sctxt->sc_sqecount += cc->cc_sqecount;
+ list_add(&info->wi_list, &sctxt->sc_write_info_list);
+
+ trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
+ return 0;
out_err:
svc_rdma_write_info_free(info);
@@ -622,9 +675,39 @@ out_err:
}
/**
- * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk
+ * svc_rdma_prepare_write_list - Construct WR chain for sending Write list
* @rdma: controlling RDMA transport
- * @rctxt: Write and Reply chunks from client
+ * @write_pcl: Write list provisioned by the client
+ * @sctxt: Send WR resources
+ * @xdr: xdr_buf containing an RPC Reply message
+ *
+ * Returns zero on success, or a negative errno if one or more
+ * Write chunks could not be sent.
+ */
+int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr)
+{
+ struct svc_rdma_chunk *chunk;
+ int ret;
+
+ pcl_for_each_chunk(chunk, write_pcl) {
+ if (!chunk->ch_payload_length)
+ break;
+ ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+/**
+ * svc_rdma_prepare_reply_chunk - Construct WR chain for writing the Reply chunk
+ * @rdma: controlling RDMA transport
+ * @write_pcl: Write chunk list provided by client
+ * @reply_pcl: Reply chunk provided by client
+ * @sctxt: Send WR resources
* @xdr: xdr_buf containing an RPC Reply
*
* Returns a non-negative number of bytes the chunk consumed, or
@@ -634,39 +717,45 @@ out_err:
* %-ENOTCONN if posting failed (connection is lost),
* %-EIO if rdma_rw initialization failed (DMA mapping, etc).
*/
-int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_recv_ctxt *rctxt,
- const struct xdr_buf *xdr)
+int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr)
{
- struct svc_rdma_write_info *info;
- struct svc_rdma_chunk_ctxt *cc;
- struct svc_rdma_chunk *chunk;
+ struct svc_rdma_write_info *info = &sctxt->sc_reply_info;
+ struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
+ struct ib_send_wr *first_wr;
+ struct list_head *pos;
+ struct ib_cqe *cqe;
int ret;
- if (pcl_is_empty(&rctxt->rc_reply_pcl))
- return 0;
-
- chunk = pcl_first_chunk(&rctxt->rc_reply_pcl);
- info = svc_rdma_write_info_alloc(rdma, chunk);
- if (!info)
- return -ENOMEM;
- cc = &info->wi_cc;
+ info->wi_rdma = rdma;
+ info->wi_chunk = pcl_first_chunk(reply_pcl);
+ info->wi_seg_off = 0;
+ info->wi_seg_no = 0;
+ info->wi_cc.cc_cqe.done = svc_rdma_reply_done;
- ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_write, info);
if (ret < 0)
- goto out_err;
+ return ret;
- trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(rdma, cc);
- if (ret < 0)
- goto out_err;
+ first_wr = sctxt->sc_wr_chain;
+ cqe = &cc->cc_cqe;
+ list_for_each(pos, &cc->cc_rwctxts) {
+ struct svc_rdma_rw_ctxt *rwc;
- return xdr->len;
+ rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
+ first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, cqe, first_wr);
+ cqe = NULL;
+ }
+ sctxt->sc_wr_chain = first_wr;
+ sctxt->sc_sqecount += cc->cc_sqecount;
-out_err:
- svc_rdma_write_info_free(info);
- return ret;
+ trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
+ return xdr->len;
}
/**
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1a49b7f02041..dfca39abd16c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -142,6 +142,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
ctxt->sc_cqe.done = svc_rdma_wc_send;
+ INIT_LIST_HEAD(&ctxt->sc_write_info_list);
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
rdma->sc_max_req_size);
@@ -205,9 +206,13 @@ out:
xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf,
ctxt->sc_xprt_buf, NULL);
+ svc_rdma_cc_init(rdma, &ctxt->sc_reply_info.wi_cc);
ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0;
ctxt->sc_page_count = 0;
+ ctxt->sc_wr_chain = &ctxt->sc_send_wr;
+ ctxt->sc_sqecount = 1;
+
return ctxt;
out_empty:
@@ -223,6 +228,9 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
+ svc_rdma_write_chunk_release(rdma, ctxt);
+ svc_rdma_reply_chunk_release(rdma, ctxt);
+
if (ctxt->sc_page_count)
release_pages(ctxt->sc_pages, ctxt->sc_page_count);
@@ -293,7 +301,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_send_ctxt *ctxt =
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
- svc_rdma_wake_send_waiters(rdma, 1);
+ svc_rdma_wake_send_waiters(rdma, ctxt->sc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed;
@@ -312,51 +320,76 @@ flushed:
}
/**
- * svc_rdma_send - Post a single Send WR
- * @rdma: transport on which to post the WR
- * @ctxt: send ctxt with a Send WR ready to post
+ * svc_rdma_post_send - Post a WR chain to the Send Queue
+ * @rdma: transport context
+ * @ctxt: WR chain to post
+ *
+ * Copy fields in @ctxt to stack variables in order to guarantee
+ * that these values remain available after the ib_post_send() call.
+ * In some error flow cases, svc_rdma_wc_send() releases @ctxt.
+ *
+ * Note there is potential for starvation when the Send Queue is
+ * full because there is no order to when waiting threads are
+ * awoken. The transport is typically provisioned with a deep
+ * enough Send Queue that SQ exhaustion should be a rare event.
*
- * Returns zero if the Send WR was posted successfully. Otherwise, a
- * negative errno is returned.
+ * Return values:
+ * %0: @ctxt's WR chain was posted successfully
+ * %-ENOTCONN: The connection was lost
*/
-int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
+int svc_rdma_post_send(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
{
- struct ib_send_wr *wr = &ctxt->sc_send_wr;
- int ret;
+ struct ib_send_wr *first_wr = ctxt->sc_wr_chain;
+ struct ib_send_wr *send_wr = &ctxt->sc_send_wr;
+ const struct ib_send_wr *bad_wr = first_wr;
+ struct rpc_rdma_cid cid = ctxt->sc_cid;
+ int ret, sqecount = ctxt->sc_sqecount;
might_sleep();
/* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device,
- wr->sg_list[0].addr,
- wr->sg_list[0].length,
+ send_wr->sg_list[0].addr,
+ send_wr->sg_list[0].length,
DMA_TO_DEVICE);
/* If the SQ is full, wait until an SQ entry is available */
- while (1) {
- if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
+ while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
+ if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
+ svc_rdma_wake_send_waiters(rdma, sqecount);
+
+ /* When the transport is torn down, assume
+ * ib_drain_sq() will trigger enough Send
+ * completions to wake us. The XPT_CLOSE test
+ * above should then cause the while loop to
+ * exit.
+ */
percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma, &ctxt->sc_cid);
- atomic_inc(&rdma->sc_sq_avail);
+ trace_svcrdma_sq_full(rdma, &cid);
wait_event(rdma->sc_send_wait,
- atomic_read(&rdma->sc_sq_avail) > 1);
- if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
- return -ENOTCONN;
- trace_svcrdma_sq_retry(rdma, &ctxt->sc_cid);
+ atomic_read(&rdma->sc_sq_avail) > 0);
+ trace_svcrdma_sq_retry(rdma, &cid);
continue;
}
trace_svcrdma_post_send(ctxt);
- ret = ib_post_send(rdma->sc_qp, wr, NULL);
- if (ret)
- break;
+ ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+ if (ret) {
+ trace_svcrdma_sq_post_err(rdma, &cid, ret);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+
+ /* If even one WR was posted, there will be a
+ * Send completion that bumps sc_sq_avail.
+ */
+ if (bad_wr == first_wr) {
+ svc_rdma_wake_send_waiters(rdma, sqecount);
+ break;
+ }
+ }
return 0;
}
-
- trace_svcrdma_sq_post_err(rdma, &ctxt->sc_cid, ret);
- svc_xprt_deferred_close(&rdma->sc_xprt);
- wake_up(&rdma->sc_send_wait);
- return ret;
+ return -ENOTCONN;
}
/**
@@ -839,16 +872,10 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
*
* Depending on whether a Write list or Reply chunk is present,
- * the server may send all, a portion of, or none of the xdr_buf.
+ * the server may Send all, a portion of, or none of the xdr_buf.
* In the latter case, only the transport header (sc_sges[0]) is
* transmitted.
*
- * RDMA Send is the last step of transmitting an RPC reply. Pages
- * involved in the earlier RDMA Writes are here transferred out
- * of the rqstp and into the sctxt's page array. These pages are
- * DMA unmapped by each Write completion, but the subsequent Send
- * completion finally releases these pages.
- *
* Assumptions:
* - The Reply's transport header will never be larger than a page.
*/
@@ -857,6 +884,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt,
struct svc_rqst *rqstp)
{
+ struct ib_send_wr *send_wr = &sctxt->sc_send_wr;
int ret;
ret = svc_rdma_map_reply_msg(rdma, sctxt, &rctxt->rc_write_pcl,
@@ -864,16 +892,19 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0)
return ret;
+ /* Transfer pages involved in RDMA Writes to the sctxt's
+ * page array. Completion handling releases these pages.
+ */
svc_rdma_save_io_pages(rqstp, sctxt);
if (rctxt->rc_inv_rkey) {
- sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
- sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
+ send_wr->opcode = IB_WR_SEND_WITH_INV;
+ send_wr->ex.invalidate_rkey = rctxt->rc_inv_rkey;
} else {
- sctxt->sc_send_wr.opcode = IB_WR_SEND;
+ send_wr->opcode = IB_WR_SEND;
}
- return svc_rdma_send(rdma, sctxt);
+ return svc_rdma_post_send(rdma, sctxt);
}
/**
@@ -937,7 +968,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.num_sge = 1;
sctxt->sc_send_wr.opcode = IB_WR_SEND;
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
- if (svc_rdma_send(rdma, sctxt))
+ if (svc_rdma_post_send(rdma, sctxt))
goto put_ctxt;
return;
@@ -984,10 +1015,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!p)
goto put_ctxt;
- ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res);
+ ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt,
+ &rqstp->rq_res);
if (ret < 0)
- goto reply_chunk;
- rc_size = ret;
+ goto put_ctxt;
+
+ rc_size = 0;
+ if (!pcl_is_empty(&rctxt->rc_reply_pcl)) {
+ ret = svc_rdma_prepare_reply_chunk(rdma, &rctxt->rc_write_pcl,
+ &rctxt->rc_reply_pcl, sctxt,
+ &rqstp->rq_res);
+ if (ret < 0)
+ goto reply_chunk;
+ rc_size = ret;
+ }
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
@@ -1030,45 +1071,33 @@ drop_connection:
/**
* svc_rdma_result_payload - special processing for a result payload
- * @rqstp: svc_rqst to operate on
- * @offset: payload's byte offset in @xdr
+ * @rqstp: RPC transaction context
+ * @offset: payload's byte offset in @rqstp->rq_res
* @length: size of payload, in bytes
*
+ * Assign the passed-in result payload to the current Write chunk,
+ * and advance to cur_result_payload to the next Write chunk, if
+ * there is one.
+ *
* Return values:
* %0 if successful or nothing needed to be done
- * %-EMSGSIZE on XDR buffer overflow
* %-E2BIG if the payload was larger than the Write chunk
- * %-EINVAL if client provided too many segments
- * %-ENOMEM if rdma_rw context pool was exhausted
- * %-ENOTCONN if posting failed (connection is lost)
- * %-EIO if rdma_rw initialization failed (DMA mapping, etc)
*/
int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length)
{
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
struct svc_rdma_chunk *chunk;
- struct svcxprt_rdma *rdma;
- struct xdr_buf subbuf;
- int ret;
chunk = rctxt->rc_cur_result_payload;
if (!length || !chunk)
return 0;
rctxt->rc_cur_result_payload =
pcl_next_chunk(&rctxt->rc_write_pcl, chunk);
+
if (length > chunk->ch_length)
return -E2BIG;
-
chunk->ch_position = offset;
chunk->ch_payload_length = length;
-
- if (xdr_buf_subsegment(&rqstp->rq_res, &subbuf, offset, length))
- return -EMSGSIZE;
-
- rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
- ret = svc_rdma_send_write_chunk(rdma, chunk, &subbuf);
- if (ret < 0)
- return ret;
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4f27325ace4a..2b1c16b9547d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -415,15 +415,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
- newxprt->sc_recv_batch;
+ newxprt->sc_recv_batch + 1 /* drain */;
if (rq_depth > dev->attrs.max_qp_wr) {
rq_depth = dev->attrs.max_qp_wr;
newxprt->sc_recv_batch = 1;
newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
- ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
- ctxts *= newxprt->sc_max_requests;
+
+ /* Arbitrarily estimate the number of rw_ctxs needed for
+ * this transport. This is enough rw_ctxs to make forward
+ * progress even if the client is using one rkey per page
+ * in each Read chunk.
+ */
+ ctxts = 3 * RPCSVC_MAXPAGES;
newxprt->sc_sq_depth = rq_depth + ctxts;
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
@@ -460,12 +465,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
-
+ dprintk(" send CQ depth = %u, recv CQ depth = %u\n",
+ newxprt->sc_sq_depth, rq_depth);
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
trace_svcrdma_qp_err(newxprt, ret);
goto errout;
}
+ newxprt->sc_max_send_sges = qp_attr.cap.max_send_sge;
newxprt->sc_qp = newxprt->sc_cm_id->qp;
if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 58f3dc8d0d71..d92c13e78a56 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2987,20 +2987,11 @@ static int bc_send_request(struct rpc_rqst *req)
return len;
}
-/*
- * The close routine. Since this is client initiated, we do nothing
- */
-
static void bc_close(struct rpc_xprt *xprt)
{
xprt_disconnect_done(xprt);
}
-/*
- * The xprt destroy routine. Again, because this connection is client
- * initiated, we do nothing
- */
-
static void bc_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: bc_destroy xprt %p\n", xprt);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 5b045284849e..c9189a970eec 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -19,6 +19,35 @@
#include <linux/rtnetlink.h>
#include <net/switchdev.h>
+static bool switchdev_obj_eq(const struct switchdev_obj *a,
+ const struct switchdev_obj *b)
+{
+ const struct switchdev_obj_port_vlan *va, *vb;
+ const struct switchdev_obj_port_mdb *ma, *mb;
+
+ if (a->id != b->id || a->orig_dev != b->orig_dev)
+ return false;
+
+ switch (a->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ va = SWITCHDEV_OBJ_PORT_VLAN(a);
+ vb = SWITCHDEV_OBJ_PORT_VLAN(b);
+ return va->flags == vb->flags &&
+ va->vid == vb->vid &&
+ va->changed == vb->changed;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ ma = SWITCHDEV_OBJ_PORT_MDB(a);
+ mb = SWITCHDEV_OBJ_PORT_MDB(b);
+ return ma->vid == mb->vid &&
+ ether_addr_equal(ma->addr, mb->addr);
+ default:
+ break;
+ }
+
+ BUG();
+}
+
static LIST_HEAD(deferred);
static DEFINE_SPINLOCK(deferred_lock);
@@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
+/**
+ * switchdev_port_obj_act_is_deferred - Is object action pending?
+ *
+ * @dev: port device
+ * @nt: type of action; add or delete
+ * @obj: object to test
+ *
+ * Returns true if a deferred item is pending, which is
+ * equivalent to the action @nt on an object @obj.
+ *
+ * rtnl_lock must be held.
+ */
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+ enum switchdev_notifier_type nt,
+ const struct switchdev_obj *obj)
+{
+ struct switchdev_deferred_item *dfitem;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ spin_lock_bh(&deferred_lock);
+
+ list_for_each_entry(dfitem, &deferred, list) {
+ if (dfitem->dev != dev)
+ continue;
+
+ if ((dfitem->func == switchdev_port_obj_add_deferred &&
+ nt == SWITCHDEV_PORT_OBJ_ADD) ||
+ (dfitem->func == switchdev_port_obj_del_deferred &&
+ nt == SWITCHDEV_PORT_OBJ_DEL)) {
+ if (switchdev_obj_eq((const void *)dfitem->data, obj)) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ spin_unlock_bh(&deferred_lock);
+
+ return found;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
+
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index be1c4003d67d..bb0d71eb02a6 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -32,16 +32,17 @@ config TIPC_MEDIA_UDP
bool "IP/UDP media type support"
depends on TIPC
select NET_UDP_TUNNEL
+ default y
help
Saying Y here will enable support for running TIPC over IP/UDP
- bool
- default y
+
config TIPC_CRYPTO
bool "TIPC encryption support"
depends on TIPC
select CRYPTO
select CRYPTO_AES
select CRYPTO_GCM
+ default y
help
Saying Y here will enable support for TIPC encryption.
All TIPC messages will be encrypted/decrypted by using the currently most
@@ -49,8 +50,6 @@ config TIPC_CRYPTO
entering the TIPC stack.
Key setting from user-space is performed via netlink by a user program
(e.g. the iproute2 'tipc' tool).
- bool
- default y
config TIPC_DIAG
tristate "TIPC: socket monitoring interface"
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index ee49a9f1dd4f..18e1636aa036 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -18,5 +18,5 @@ tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
tipc-$(CONFIG_SYSCTL) += sysctl.o
tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o
-
-obj-$(CONFIG_TIPC_DIAG) += diag.o
+obj-$(CONFIG_TIPC_DIAG) += tipc_diag.o
+tipc_diag-y += diag.o
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 2cde375477e3..5a526ebafeb4 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1079,24 +1079,27 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
b = tipc_bearer_find(net, name);
if (!b) {
- rtnl_unlock();
NL_SET_ERR_MSG(info->extack, "Bearer not found");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
#ifdef CONFIG_TIPC_MEDIA_UDP
if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+ NL_SET_ERR_MSG(info->extack, "UDP option is unsupported");
+ err = -EINVAL;
+ goto out;
+ }
+
err = tipc_udp_nl_bearer_add(b,
attrs[TIPC_NLA_BEARER_UDP_OPTS]);
- if (err) {
- rtnl_unlock();
- return err;
- }
}
#endif
+out:
rtnl_unlock();
- return 0;
+ return err;
}
int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
index 18733451c9e0..54dde8c4e4d4 100644
--- a/net/tipc/diag.c
+++ b/net/tipc/diag.c
@@ -95,6 +95,7 @@ static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
}
static const struct sock_diag_handler tipc_sock_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_TIPC,
.dump = tipc_sock_diag_handler_dump,
};
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3105abe97bb9..c1e890a82434 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -86,8 +86,6 @@ struct tipc_bclink_entry {
* @lock: rwlock governing access to structure
* @net: the applicable net namespace
* @hash: links to adjacent nodes in unsorted hash chain
- * @inputq: pointer to input queue containing messages for msg event
- * @namedq: pointer to name table input queue with name table messages
* @active_links: bearer ids of active links, used as index into links[] array
* @links: array containing references to all links to node
* @bc_entry: broadcast link entry
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index bb1118d02f95..7e4135db5816 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -80,7 +80,6 @@ struct sockaddr_pair {
* @phdr: preformatted message header used when sending messages
* @cong_links: list of congested links
* @publications: list of publications for port
- * @blocking_link: address of the congested link we are currently sleeping on
* @pub_count: total # of publications port has made during its lifetime
* @conn_timeout: the time we can wait for an unresponded setup request
* @probe_unacked: probe has not received ack yet
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 1c2c6800949d..b4674f03d71a 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -1003,7 +1003,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx)
return 0;
}
-static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
+static int tls_get_info(struct sock *sk, struct sk_buff *skb)
{
u16 version, cipher_type;
struct tls_context *ctx;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 31e8a94dfc11..211f57164cb6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -52,6 +52,7 @@ struct tls_decrypt_arg {
struct_group(inargs,
bool zc;
bool async;
+ bool async_done;
u8 tail;
);
@@ -63,6 +64,7 @@ struct tls_decrypt_ctx {
u8 iv[TLS_MAX_IV_SIZE];
u8 aad[TLS_MAX_AAD_SIZE];
u8 tail;
+ bool free_sgout;
struct scatterlist sg[];
};
@@ -187,7 +189,6 @@ static void tls_decrypt_done(void *data, int err)
struct aead_request *aead_req = data;
struct crypto_aead *aead = crypto_aead_reqtfm(aead_req);
struct scatterlist *sgout = aead_req->dst;
- struct scatterlist *sgin = aead_req->src;
struct tls_sw_context_rx *ctx;
struct tls_decrypt_ctx *dctx;
struct tls_context *tls_ctx;
@@ -196,6 +197,17 @@ static void tls_decrypt_done(void *data, int err)
struct sock *sk;
int aead_size;
+ /* If requests get too backlogged crypto API returns -EBUSY and calls
+ * ->complete(-EINPROGRESS) immediately followed by ->complete(0)
+ * to make waiting for backlog to flush with crypto_wait_req() easier.
+ * First wait converts -EBUSY -> -EINPROGRESS, and the second one
+ * -EINPROGRESS -> 0.
+ * We have a single struct crypto_async_request per direction, this
+ * scheme doesn't help us, so just ignore the first ->complete().
+ */
+ if (err == -EINPROGRESS)
+ return;
+
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead);
aead_size = ALIGN(aead_size, __alignof__(*dctx));
dctx = (void *)((u8 *)aead_req + aead_size);
@@ -213,7 +225,7 @@ static void tls_decrypt_done(void *data, int err)
}
/* Free the destination pages if skb was not decrypted inplace */
- if (sgout != sgin) {
+ if (dctx->free_sgout) {
/* Skip the first S/G entry as it points to AAD */
for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
if (!sg)
@@ -224,10 +236,17 @@ static void tls_decrypt_done(void *data, int err)
kfree(aead_req);
- spin_lock_bh(&ctx->decrypt_compl_lock);
- if (!atomic_dec_return(&ctx->decrypt_pending))
+ if (atomic_dec_and_test(&ctx->decrypt_pending))
complete(&ctx->async_wait.completion);
- spin_unlock_bh(&ctx->decrypt_compl_lock);
+}
+
+static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->decrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ atomic_inc(&ctx->decrypt_pending);
+
+ return ctx->async_wait.err;
}
static int tls_do_decryption(struct sock *sk,
@@ -253,20 +272,33 @@ static int tls_do_decryption(struct sock *sk,
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
tls_decrypt_done, aead_req);
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1);
atomic_inc(&ctx->decrypt_pending);
} else {
+ DECLARE_CRYPTO_WAIT(wait);
+
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &ctx->async_wait);
+ crypto_req_done, &wait);
+ ret = crypto_aead_decrypt(aead_req);
+ if (ret == -EINPROGRESS || ret == -EBUSY)
+ ret = crypto_wait_req(ret, &wait);
+ return ret;
}
ret = crypto_aead_decrypt(aead_req);
- if (ret == -EINPROGRESS) {
- if (darg->async)
- return 0;
+ if (ret == -EINPROGRESS)
+ return 0;
- ret = crypto_wait_req(ret, &ctx->async_wait);
+ if (ret == -EBUSY) {
+ ret = tls_decrypt_async_wait(ctx);
+ darg->async_done = true;
+ /* all completions have run, we're not doing async anymore */
+ darg->async = false;
+ return ret;
}
+
+ atomic_dec(&ctx->decrypt_pending);
darg->async = false;
return ret;
@@ -439,9 +471,10 @@ static void tls_encrypt_done(void *data, int err)
struct tls_rec *rec = data;
struct scatterlist *sge;
struct sk_msg *msg_en;
- bool ready = false;
struct sock *sk;
- int pending;
+
+ if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */
+ return;
msg_en = &rec->msg_encrypted;
@@ -476,23 +509,25 @@ static void tls_encrypt_done(void *data, int err)
/* If received record is at head of tx_list, schedule tx */
first_rec = list_first_entry(&ctx->tx_list,
struct tls_rec, list);
- if (rec == first_rec)
- ready = true;
+ if (rec == first_rec) {
+ /* Schedule the transmission */
+ if (!test_and_set_bit(BIT_TX_SCHEDULED,
+ &ctx->tx_bitmask))
+ schedule_delayed_work(&ctx->tx_work.work, 1);
+ }
}
- spin_lock_bh(&ctx->encrypt_compl_lock);
- pending = atomic_dec_return(&ctx->encrypt_pending);
-
- if (!pending && ctx->async_notify)
+ if (atomic_dec_and_test(&ctx->encrypt_pending))
complete(&ctx->async_wait.completion);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
+}
- if (!ready)
- return;
+static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->encrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ atomic_inc(&ctx->encrypt_pending);
- /* Schedule the transmission */
- if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
- schedule_delayed_work(&ctx->tx_work.work, 1);
+ return ctx->async_wait.err;
}
static int tls_do_encryption(struct sock *sk,
@@ -541,9 +576,14 @@ static int tls_do_encryption(struct sock *sk,
/* Add the record in tx_list */
list_add_tail((struct list_head *)&rec->list, &ctx->tx_list);
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1);
atomic_inc(&ctx->encrypt_pending);
rc = crypto_aead_encrypt(aead_req);
+ if (rc == -EBUSY) {
+ rc = tls_encrypt_async_wait(ctx);
+ rc = rc ?: -EINPROGRESS;
+ }
if (!rc || rc != -EINPROGRESS) {
atomic_dec(&ctx->encrypt_pending);
sge->offset -= prot->prepend_size;
@@ -984,7 +1024,6 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
int num_zc = 0;
int orig_size;
int ret = 0;
- int pending;
if (!eor && (msg->msg_flags & MSG_EOR))
return -EINVAL;
@@ -1163,24 +1202,12 @@ trim_sgl:
if (!num_async) {
goto send_end;
} else if (num_zc) {
- /* Wait for pending encryptions to get completed */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
-
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- else
- reinit_completion(&ctx->async_wait.completion);
-
- /* There can be no concurrent accesses, since we have no
- * pending encrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
+ int err;
- if (ctx->async_wait.err) {
- ret = ctx->async_wait.err;
+ /* Wait for pending encryptions to get completed */
+ err = tls_encrypt_async_wait(ctx);
+ if (err) {
+ ret = err;
copied = 0;
}
}
@@ -1229,7 +1256,6 @@ void tls_sw_splice_eof(struct socket *sock)
ssize_t copied = 0;
bool retrying = false;
int ret = 0;
- int pending;
if (!ctx->open_rec)
return;
@@ -1264,22 +1290,7 @@ retry:
}
/* Wait for pending encryptions to get completed */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
-
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- else
- reinit_completion(&ctx->async_wait.completion);
-
- /* There can be no concurrent accesses, since we have no pending
- * encrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
-
- if (ctx->async_wait.err)
+ if (tls_encrypt_async_wait(ctx))
goto unlock;
/* Transmit if any encryptions have completed */
@@ -1581,12 +1592,16 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
} else if (out_sg) {
memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
}
+ dctx->free_sgout = !!pages;
/* Prepare and submit AEAD request */
err = tls_do_decryption(sk, sgin, sgout, dctx->iv,
data_len + prot->tail_size, aead_req, darg);
- if (err)
+ if (err) {
+ if (darg->async_done)
+ goto exit_free_skb;
goto exit_free_pages;
+ }
darg->skb = clear_skb ?: tls_strp_msg(ctx);
clear_skb = NULL;
@@ -1598,6 +1613,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
return err;
}
+ if (unlikely(darg->async_done))
+ return 0;
+
if (prot->tail_size)
darg->tail = dctx->tail;
@@ -1769,7 +1787,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
u8 *control,
size_t skip,
size_t len,
- bool is_peek)
+ bool is_peek,
+ bool *more)
{
struct sk_buff *skb = skb_peek(&ctx->rx_list);
struct tls_msg *tlm;
@@ -1782,7 +1801,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
err = tls_record_content_type(msg, tlm, control);
if (err <= 0)
- goto out;
+ goto more;
if (skip < rxm->full_len)
break;
@@ -1800,12 +1819,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
err = tls_record_content_type(msg, tlm, control);
if (err <= 0)
- goto out;
+ goto more;
err = skb_copy_datagram_msg(skb, rxm->offset + skip,
msg, chunk);
if (err < 0)
- goto out;
+ goto more;
len = len - chunk;
copied = copied + chunk;
@@ -1841,6 +1860,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
out:
return copied ? : err;
+more:
+ if (more)
+ *more = true;
+ goto out;
}
static bool
@@ -1940,10 +1963,12 @@ int tls_sw_recvmsg(struct sock *sk,
struct strp_msg *rxm;
struct tls_msg *tlm;
ssize_t copied = 0;
+ ssize_t peeked = 0;
bool async = false;
int target, err;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
+ bool rx_more = false;
bool released = true;
bool bpf_strp_enabled;
bool zc_capable;
@@ -1963,12 +1988,12 @@ int tls_sw_recvmsg(struct sock *sk,
goto end;
/* Process pending decrypted records. It must be non-zero-copy */
- err = process_rx_list(ctx, msg, &control, 0, len, is_peek);
+ err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more);
if (err < 0)
goto end;
copied = err;
- if (len <= copied)
+ if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more)
goto end;
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -2061,6 +2086,8 @@ put_on_rx_list:
decrypted += chunk;
len -= chunk;
__skb_queue_tail(&ctx->rx_list, skb);
+ if (unlikely(control != TLS_RECORD_TYPE_DATA))
+ break;
continue;
}
@@ -2084,8 +2111,10 @@ put_on_rx_list:
if (err < 0)
goto put_on_rx_list_err;
- if (is_peek)
+ if (is_peek) {
+ peeked += chunk;
goto put_on_rx_list;
+ }
if (partially_consumed) {
rxm->offset += chunk;
@@ -2109,16 +2138,10 @@ put_on_rx_list:
recv_end:
if (async) {
- int ret, pending;
+ int ret;
/* Wait for all previously submitted records to be decrypted */
- spin_lock_bh(&ctx->decrypt_compl_lock);
- reinit_completion(&ctx->async_wait.completion);
- pending = atomic_read(&ctx->decrypt_pending);
- spin_unlock_bh(&ctx->decrypt_compl_lock);
- ret = 0;
- if (pending)
- ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ ret = tls_decrypt_async_wait(ctx);
__skb_queue_purge(&ctx->async_hold);
if (ret) {
@@ -2130,12 +2153,11 @@ recv_end:
/* Drain records from the rx_list & copy if required */
if (is_peek || is_kvec)
- err = process_rx_list(ctx, msg, &control, copied,
- decrypted, is_peek);
+ err = process_rx_list(ctx, msg, &control, copied + peeked,
+ decrypted - peeked, is_peek, NULL);
else
err = process_rx_list(ctx, msg, &control, 0,
- async_copy_bytes, is_peek);
- decrypted += max(err, 0);
+ async_copy_bytes, is_peek, NULL);
}
copied += decrypted;
@@ -2435,16 +2457,9 @@ void tls_sw_release_resources_tx(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct tls_rec *rec, *tmp;
- int pending;
/* Wait for any pending async encryptions to complete */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
-
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ tls_encrypt_async_wait(ctx);
tls_tx_records(sk, -1);
@@ -2607,7 +2622,7 @@ static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct soc
}
crypto_init_wait(&sw_ctx_tx->async_wait);
- spin_lock_init(&sw_ctx_tx->encrypt_compl_lock);
+ atomic_set(&sw_ctx_tx->encrypt_pending, 1);
INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
sw_ctx_tx->tx_work.sk = sk;
@@ -2628,7 +2643,7 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx)
}
crypto_init_wait(&sw_ctx_rx->async_wait);
- spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
+ atomic_set(&sw_ctx_rx->decrypt_pending, 1);
init_waitqueue_head(&sw_ctx_rx->wq);
skb_queue_head_init(&sw_ctx_rx->rx_list);
skb_queue_head_init(&sw_ctx_rx->async_hold);
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 28b232f281ab..8b5d04210d7c 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -16,11 +16,6 @@ config UNIX
Say Y unless you know what you are doing.
-config UNIX_SCM
- bool
- depends on UNIX
- default y
-
config AF_UNIX_OOB
bool
depends on UNIX
diff --git a/net/unix/Makefile b/net/unix/Makefile
index 20491825b4d0..4ddd125c4642 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -11,5 +11,3 @@ unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
-
-obj-$(CONFIG_UNIX_SCM) += scm.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ac1f2bc18fc9..5b41e2321209 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -118,8 +118,6 @@
#include <linux/btf_ids.h>
#include <linux/bpf-cgroup.h>
-#include "scm.h"
-
static atomic_long_t unix_nr_socks;
static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
@@ -782,19 +780,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
int);
-static int unix_set_peek_off(struct sock *sk, int val)
-{
- struct unix_sock *u = unix_sk(sk);
-
- if (mutex_lock_interruptible(&u->iolock))
- return -EINTR;
-
- WRITE_ONCE(sk->sk_peek_off, val);
- mutex_unlock(&u->iolock);
-
- return 0;
-}
-
#ifdef CONFIG_PROC_FS
static int unix_count_nr_fds(struct sock *sk)
{
@@ -862,7 +847,7 @@ static const struct proto_ops unix_stream_ops = {
.read_skb = unix_stream_read_skb,
.mmap = sock_no_mmap,
.splice_read = unix_stream_splice_read,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
@@ -886,7 +871,7 @@ static const struct proto_ops unix_dgram_ops = {
.read_skb = unix_read_skb,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
@@ -909,7 +894,7 @@ static const struct proto_ops unix_seqpacket_ops = {
.sendmsg = unix_seqpacket_sendmsg,
.recvmsg = unix_seqpacket_recvmsg,
.mmap = sock_no_mmap,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
@@ -993,11 +978,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
sk->sk_destruct = unix_sock_destructor;
- u = unix_sk(sk);
+ u = unix_sk(sk);
+ u->inflight = 0;
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
- atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
@@ -1344,13 +1329,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
unix_state_lock(sk1);
return;
}
- if (sk1 < sk2) {
- unix_state_lock(sk1);
- unix_state_lock_nested(sk2);
- } else {
- unix_state_lock(sk2);
- unix_state_lock_nested(sk1);
- }
+ if (sk1 > sk2)
+ swap(sk1, sk2);
+
+ unix_state_lock(sk1);
+ unix_state_lock_nested(sk2, U_LOCK_SECOND);
}
static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1591,7 +1574,7 @@ restart:
goto out_unlock;
}
- unix_state_lock_nested(sk);
+ unix_state_lock_nested(sk, U_LOCK_SECOND);
if (sk->sk_state != st) {
unix_state_unlock(sk);
@@ -1790,6 +1773,52 @@ out:
return err;
}
+/* The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ int i;
+
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
+ /* Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->user, scm->fp->fp[i]);
+
+ return 0;
+}
+
+static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ int i;
+
+ scm->fp = UNIXCB(skb).fp;
+ UNIXCB(skb).fp = NULL;
+
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_notinflight(scm->fp->user, scm->fp->fp[i]);
+}
+
static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
scm->fp = scm_fp_dup(UNIXCB(skb).fp);
@@ -1837,6 +1866,21 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
spin_unlock(&unix_gc_lock);
}
+static void unix_destruct_scm(struct sk_buff *skb)
+{
+ struct scm_cookie scm;
+
+ memset(&scm, 0, sizeof(scm));
+ scm.pid = UNIXCB(skb).pid;
+ if (UNIXCB(skb).fp)
+ unix_detach_fds(&scm, skb);
+
+ /* Alas, it calls VFS */
+ /* So fscking what? fput() had been SMP-safe since the last Summer */
+ scm_destroy(&scm);
+ sock_wfree(skb);
+}
+
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
{
int err = 0;
@@ -1923,11 +1967,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
long timeo;
int err;
- wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
if (err < 0)
return err;
+ wait_for_unix_gc(scm.fp);
+
err = -EOPNOTSUPP;
if (msg->msg_flags&MSG_OOB)
goto out;
@@ -2199,11 +2244,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
bool fds_sent = false;
int data_len;
- wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
if (err < 0)
return err;
+ wait_for_unix_gc(scm.fp);
+
err = -EOPNOTSUPP;
if (msg->msg_flags & MSG_OOB) {
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
diff --git a/net/unix/diag.c b/net/unix/diag.c
index bec09a3a1d44..ae39538c5042 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
* queue lock. With the other's queue locked it's
* OK to lock the state.
*/
- unix_state_lock_nested(req);
+ unix_state_lock_nested(req, U_LOCK_DIAG);
peer = unix_sk(req)->peer;
buf[i++] = (peer ? sock_i_ino(peer) : 0);
unix_state_unlock(req);
@@ -322,6 +322,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler unix_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_UNIX,
.dump = unix_diag_handler_dump,
};
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2405f0f9af31..fa39b6265238 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -81,12 +81,80 @@
#include <net/scm.h>
#include <net/tcp_states.h>
-#include "scm.h"
+struct unix_sock *unix_get_socket(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+
+ /* Socket ? */
+ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+ struct socket *sock = SOCKET_I(inode);
+ const struct proto_ops *ops;
+ struct sock *sk = sock->sk;
+
+ ops = READ_ONCE(sock->ops);
-/* Internal data structures and random procedures: */
+ /* PF_UNIX ? */
+ if (sk && ops && ops->family == PF_UNIX)
+ return unix_sk(sk);
+ }
+
+ return NULL;
+}
+DEFINE_SPINLOCK(unix_gc_lock);
+unsigned int unix_tot_inflight;
static LIST_HEAD(gc_candidates);
-static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
+static LIST_HEAD(gc_inflight_list);
+
+/* Keep the number of times in flight count for the file
+ * descriptor if it is for an AF_UNIX socket.
+ */
+void unix_inflight(struct user_struct *user, struct file *filp)
+{
+ struct unix_sock *u = unix_get_socket(filp);
+
+ spin_lock(&unix_gc_lock);
+
+ if (u) {
+ if (!u->inflight) {
+ WARN_ON_ONCE(!list_empty(&u->link));
+ list_add_tail(&u->link, &gc_inflight_list);
+ } else {
+ WARN_ON_ONCE(list_empty(&u->link));
+ }
+ u->inflight++;
+
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
+ }
+
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
+
+ spin_unlock(&unix_gc_lock);
+}
+
+void unix_notinflight(struct user_struct *user, struct file *filp)
+{
+ struct unix_sock *u = unix_get_socket(filp);
+
+ spin_lock(&unix_gc_lock);
+
+ if (u) {
+ WARN_ON_ONCE(!u->inflight);
+ WARN_ON_ONCE(list_empty(&u->link));
+
+ u->inflight--;
+ if (!u->inflight)
+ list_del_init(&u->link);
+
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
+ }
+
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
+
+ spin_unlock(&unix_gc_lock);
+}
static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
struct sk_buff_head *hitlist)
@@ -105,20 +173,15 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
while (nfd--) {
/* Get the socket the fd matches if it indeed does so */
- struct sock *sk = unix_get_socket(*fp++);
-
- if (sk) {
- struct unix_sock *u = unix_sk(sk);
+ struct unix_sock *u = unix_get_socket(*fp++);
- /* Ignore non-candidates, they could
- * have been added to the queues after
- * starting the garbage collection
- */
- if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
- hit = true;
+ /* Ignore non-candidates, they could have been added
+ * to the queues after starting the garbage collection
+ */
+ if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
+ hit = true;
- func(u);
- }
+ func(u);
}
}
if (hit && hitlist != NULL) {
@@ -151,7 +214,7 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
/* An embryo cannot be in-flight, so it's safe
* to use the list link.
*/
- BUG_ON(!list_empty(&u->link));
+ WARN_ON_ONCE(!list_empty(&u->link));
list_add_tail(&u->link, &embryos);
}
spin_unlock(&x->sk_receive_queue.lock);
@@ -166,17 +229,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
static void dec_inflight(struct unix_sock *usk)
{
- atomic_long_dec(&usk->inflight);
+ usk->inflight--;
}
static void inc_inflight(struct unix_sock *usk)
{
- atomic_long_inc(&usk->inflight);
+ usk->inflight++;
}
static void inc_inflight_move_tail(struct unix_sock *u)
{
- atomic_long_inc(&u->inflight);
+ u->inflight++;
+
/* If this still might be part of a cycle, move it to the end
* of the list, so that it's checked even if it was already
* passed over
@@ -186,40 +250,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
}
static bool gc_in_progress;
-#define UNIX_INFLIGHT_TRIGGER_GC 16000
-
-void wait_for_unix_gc(void)
-{
- /* If number of inflight sockets is insane,
- * force a garbage collect right now.
- * Paired with the WRITE_ONCE() in unix_inflight(),
- * unix_notinflight() and gc_in_progress().
- */
- if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
- !READ_ONCE(gc_in_progress))
- unix_gc();
- wait_event(unix_gc_wait, gc_in_progress == false);
-}
-/* The external entry point: unix_gc() */
-void unix_gc(void)
+static void __unix_gc(struct work_struct *work)
{
- struct sk_buff *next_skb, *skb;
- struct unix_sock *u;
- struct unix_sock *next;
struct sk_buff_head hitlist;
- struct list_head cursor;
+ struct unix_sock *u, *next;
LIST_HEAD(not_cycle_list);
+ struct list_head cursor;
spin_lock(&unix_gc_lock);
- /* Avoid a recursive GC. */
- if (gc_in_progress)
- goto out;
-
- /* Paired with READ_ONCE() in wait_for_unix_gc(). */
- WRITE_ONCE(gc_in_progress, true);
-
/* First, select candidates for garbage collection. Only
* in-flight sockets are considered, and from those only ones
* which don't have any external reference.
@@ -237,14 +277,12 @@ void unix_gc(void)
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
long total_refs;
- long inflight_refs;
total_refs = file_count(u->sk.sk_socket->file);
- inflight_refs = atomic_long_read(&u->inflight);
- BUG_ON(inflight_refs < 1);
- BUG_ON(total_refs < inflight_refs);
- if (total_refs == inflight_refs) {
+ WARN_ON_ONCE(!u->inflight);
+ WARN_ON_ONCE(total_refs < u->inflight);
+ if (total_refs == u->inflight) {
list_move_tail(&u->link, &gc_candidates);
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
@@ -271,7 +309,7 @@ void unix_gc(void)
/* Move cursor to after the current position. */
list_move(&cursor, &u->link);
- if (atomic_long_read(&u->inflight) > 0) {
+ if (u->inflight) {
list_move_tail(&u->link, &not_cycle_list);
__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
scan_children(&u->sk, inc_inflight_move_tail, NULL);
@@ -284,9 +322,17 @@ void unix_gc(void)
* which are creating the cycle(s).
*/
skb_queue_head_init(&hitlist);
- list_for_each_entry(u, &gc_candidates, link)
+ list_for_each_entry(u, &gc_candidates, link) {
scan_children(&u->sk, inc_inflight, &hitlist);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ kfree_skb(u->oob_skb);
+ u->oob_skb = NULL;
+ }
+#endif
+ }
+
/* not_cycle_list contains those sockets which do not make up a
* cycle. Restore these to the inflight list.
*/
@@ -298,38 +344,50 @@ void unix_gc(void)
spin_unlock(&unix_gc_lock);
- /* We need io_uring to clean its registered files, ignore all io_uring
- * originated skbs. It's fine as io_uring doesn't keep references to
- * other io_uring instances and so killing all other files in the cycle
- * will put all io_uring references forcing it to go through normal
- * release.path eventually putting registered files.
- */
- skb_queue_walk_safe(&hitlist, skb, next_skb) {
- if (skb->destructor == io_uring_destruct_scm) {
- __skb_unlink(skb, &hitlist);
- skb_queue_tail(&skb->sk->sk_receive_queue, skb);
- }
- }
-
/* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist);
spin_lock(&unix_gc_lock);
- /* There could be io_uring registered files, just push them back to
- * the inflight list
- */
- list_for_each_entry_safe(u, next, &gc_candidates, link)
- list_move_tail(&u->link, &gc_inflight_list);
-
/* All candidates should have been detached by now. */
- BUG_ON(!list_empty(&gc_candidates));
+ WARN_ON_ONCE(!list_empty(&gc_candidates));
/* Paired with READ_ONCE() in wait_for_unix_gc(). */
WRITE_ONCE(gc_in_progress, false);
- wake_up(&unix_gc_wait);
-
- out:
spin_unlock(&unix_gc_lock);
}
+
+static DECLARE_WORK(unix_gc_work, __unix_gc);
+
+void unix_gc(void)
+{
+ WRITE_ONCE(gc_in_progress, true);
+ queue_work(system_unbound_wq, &unix_gc_work);
+}
+
+#define UNIX_INFLIGHT_TRIGGER_GC 16000
+#define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
+
+void wait_for_unix_gc(struct scm_fp_list *fpl)
+{
+ /* If number of inflight sockets is insane,
+ * force a garbage collect right now.
+ *
+ * Paired with the WRITE_ONCE() in unix_inflight(),
+ * unix_notinflight(), and __unix_gc().
+ */
+ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+ !READ_ONCE(gc_in_progress))
+ unix_gc();
+
+ /* Penalise users who want to send AF_UNIX sockets
+ * but whose sockets have not been received yet.
+ */
+ if (!fpl || !fpl->count_unix ||
+ READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
+ return;
+
+ if (READ_ONCE(gc_in_progress))
+ flush_work(&unix_gc_work);
+}
diff --git a/net/unix/scm.c b/net/unix/scm.c
deleted file mode 100644
index 822ce0d0d791..000000000000
--- a/net/unix/scm.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/net.h>
-#include <linux/fs.h>
-#include <net/af_unix.h>
-#include <net/scm.h>
-#include <linux/init.h>
-#include <linux/io_uring.h>
-
-#include "scm.h"
-
-unsigned int unix_tot_inflight;
-EXPORT_SYMBOL(unix_tot_inflight);
-
-LIST_HEAD(gc_inflight_list);
-EXPORT_SYMBOL(gc_inflight_list);
-
-DEFINE_SPINLOCK(unix_gc_lock);
-EXPORT_SYMBOL(unix_gc_lock);
-
-struct sock *unix_get_socket(struct file *filp)
-{
- struct sock *u_sock = NULL;
- struct inode *inode = file_inode(filp);
-
- /* Socket ? */
- if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
- struct socket *sock = SOCKET_I(inode);
- const struct proto_ops *ops = READ_ONCE(sock->ops);
- struct sock *s = sock->sk;
-
- /* PF_UNIX ? */
- if (s && ops && ops->family == PF_UNIX)
- u_sock = s;
- }
-
- return u_sock;
-}
-EXPORT_SYMBOL(unix_get_socket);
-
-/* Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
- */
-void unix_inflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- if (atomic_long_inc_return(&u->inflight) == 1) {
- BUG_ON(!list_empty(&u->link));
- list_add_tail(&u->link, &gc_inflight_list);
- } else {
- BUG_ON(list_empty(&u->link));
- }
- /* Paired with READ_ONCE() in wait_for_unix_gc() */
- WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
- }
- WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
- spin_unlock(&unix_gc_lock);
-}
-
-void unix_notinflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- BUG_ON(!atomic_long_read(&u->inflight));
- BUG_ON(list_empty(&u->link));
-
- if (atomic_long_dec_and_test(&u->inflight))
- list_del_init(&u->link);
- /* Paired with READ_ONCE() in wait_for_unix_gc() */
- WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
- }
- WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
- spin_unlock(&unix_gc_lock);
-}
-
-/*
- * The "user->unix_inflight" variable is protected by the garbage
- * collection lock, and we just read it locklessly here. If you go
- * over the limit, there might be a tiny race in actually noticing
- * it across threads. Tough.
- */
-static inline bool too_many_unix_fds(struct task_struct *p)
-{
- struct user_struct *user = current_user();
-
- if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
- return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
- return false;
-}
-
-int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- if (too_many_unix_fds(current))
- return -ETOOMANYREFS;
-
- /*
- * Need to duplicate file references for the sake of garbage
- * collection. Otherwise a socket in the fps might become a
- * candidate for GC while the skb is not yet queued.
- */
- UNIXCB(skb).fp = scm_fp_dup(scm->fp);
- if (!UNIXCB(skb).fp)
- return -ENOMEM;
-
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->user, scm->fp->fp[i]);
- return 0;
-}
-EXPORT_SYMBOL(unix_attach_fds);
-
-void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- scm->fp = UNIXCB(skb).fp;
- UNIXCB(skb).fp = NULL;
-
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->user, scm->fp->fp[i]);
-}
-EXPORT_SYMBOL(unix_detach_fds);
-
-void unix_destruct_scm(struct sk_buff *skb)
-{
- struct scm_cookie scm;
-
- memset(&scm, 0, sizeof(scm));
- scm.pid = UNIXCB(skb).pid;
- if (UNIXCB(skb).fp)
- unix_detach_fds(&scm, skb);
-
- /* Alas, it calls VFS */
- /* So fscking what? fput() had been SMP-safe since the last Summer */
- scm_destroy(&scm);
- sock_wfree(skb);
-}
-EXPORT_SYMBOL(unix_destruct_scm);
-
-void io_uring_destruct_scm(struct sk_buff *skb)
-{
- unix_destruct_scm(skb);
-}
-EXPORT_SYMBOL(io_uring_destruct_scm);
diff --git a/net/unix/scm.h b/net/unix/scm.h
deleted file mode 100644
index 5a255a477f16..000000000000
--- a/net/unix/scm.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef NET_UNIX_SCM_H
-#define NET_UNIX_SCM_H
-
-extern struct list_head gc_inflight_list;
-extern spinlock_t unix_gc_lock;
-
-int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
-void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
-
-#endif
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
index 2e29994f92ff..ab87ef66c1e8 100644
--- a/net/vmw_vsock/diag.c
+++ b/net/vmw_vsock/diag.c
@@ -157,6 +157,7 @@ static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler vsock_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_VSOCK,
.dump = vsock_diag_handler_dump,
};
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index a9ac85e09af3..10345388ad13 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -206,7 +206,6 @@ config CFG80211_KUNIT_TEST
depends on KUNIT
depends on CFG80211
default KUNIT_ALL_TESTS
- depends on !KERNEL_6_2
help
Enable this option to test cfg80211 functions with kunit.
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index ceb9174c5c3d..3414b2c3abcc 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2018-2023 Intel Corporation
+ * Copyright 2018-2024 Intel Corporation
*/
#include <linux/export.h>
@@ -27,11 +27,10 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
if (WARN_ON(!chan))
return;
- chandef->chan = chan;
- chandef->freq1_offset = chan->freq_offset;
- chandef->center_freq2 = 0;
- chandef->edmg.bw_config = 0;
- chandef->edmg.channels = 0;
+ *chandef = (struct cfg80211_chan_def) {
+ .chan = chan,
+ .freq1_offset = chan->freq_offset,
+ };
switch (chan_type) {
case NL80211_CHAN_NO_HT:
@@ -56,6 +55,73 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
}
EXPORT_SYMBOL(cfg80211_chandef_create);
+struct cfg80211_per_bw_puncturing_values {
+ u8 len;
+ const u16 *valid_values;
+};
+
+static const u16 puncturing_values_80mhz[] = {
+ 0x8, 0x4, 0x2, 0x1
+};
+
+static const u16 puncturing_values_160mhz[] = {
+ 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, 0xc0, 0x30, 0xc, 0x3
+};
+
+static const u16 puncturing_values_320mhz[] = {
+ 0xc000, 0x3000, 0xc00, 0x300, 0xc0, 0x30, 0xc, 0x3, 0xf000, 0xf00,
+ 0xf0, 0xf, 0xfc00, 0xf300, 0xf0c0, 0xf030, 0xf00c, 0xf003, 0xc00f,
+ 0x300f, 0xc0f, 0x30f, 0xcf, 0x3f
+};
+
+#define CFG80211_PER_BW_VALID_PUNCTURING_VALUES(_bw) \
+ { \
+ .len = ARRAY_SIZE(puncturing_values_ ## _bw ## mhz), \
+ .valid_values = puncturing_values_ ## _bw ## mhz \
+ }
+
+static const struct cfg80211_per_bw_puncturing_values per_bw_puncturing[] = {
+ CFG80211_PER_BW_VALID_PUNCTURING_VALUES(80),
+ CFG80211_PER_BW_VALID_PUNCTURING_VALUES(160),
+ CFG80211_PER_BW_VALID_PUNCTURING_VALUES(320)
+};
+
+static bool valid_puncturing_bitmap(const struct cfg80211_chan_def *chandef)
+{
+ u32 idx, i, start_freq, primary_center = chandef->chan->center_freq;
+
+ switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_80:
+ idx = 0;
+ start_freq = chandef->center_freq1 - 40;
+ break;
+ case NL80211_CHAN_WIDTH_160:
+ idx = 1;
+ start_freq = chandef->center_freq1 - 80;
+ break;
+ case NL80211_CHAN_WIDTH_320:
+ idx = 2;
+ start_freq = chandef->center_freq1 - 160;
+ break;
+ default:
+ return chandef->punctured == 0;
+ }
+
+ if (!chandef->punctured)
+ return true;
+
+ /* check if primary channel is punctured */
+ if (chandef->punctured & (u16)BIT((primary_center - start_freq) / 20))
+ return false;
+
+ for (i = 0; i < per_bw_puncturing[idx].len; i++) {
+ if (per_bw_puncturing[idx].valid_values[i] == chandef->punctured)
+ return true;
+ }
+
+ return false;
+}
+
static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef)
{
int max_contiguous = 0;
@@ -317,72 +383,81 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
!cfg80211_edmg_chandef_valid(chandef))
return false;
- return true;
+ return valid_puncturing_bitmap(chandef);
}
EXPORT_SYMBOL(cfg80211_chandef_valid);
-static void chandef_primary_freqs(const struct cfg80211_chan_def *c,
- u32 *pri40, u32 *pri80, u32 *pri160)
+int cfg80211_chandef_primary(const struct cfg80211_chan_def *c,
+ enum nl80211_chan_width primary_chan_width,
+ u16 *punctured)
{
- int tmp;
+ int pri_width = nl80211_chan_width_to_mhz(primary_chan_width);
+ int width = cfg80211_chandef_get_width(c);
+ u32 control = c->chan->center_freq;
+ u32 center = c->center_freq1;
+ u16 _punct = 0;
- switch (c->width) {
- case NL80211_CHAN_WIDTH_40:
- *pri40 = c->center_freq1;
- *pri80 = 0;
- *pri160 = 0;
- break;
- case NL80211_CHAN_WIDTH_80:
- case NL80211_CHAN_WIDTH_80P80:
- *pri160 = 0;
- *pri80 = c->center_freq1;
- /* n_P20 */
- tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
- /* n_P40 */
- tmp /= 2;
- /* freq_P40 */
- *pri40 = c->center_freq1 - 20 + 40 * tmp;
- break;
- case NL80211_CHAN_WIDTH_160:
- *pri160 = c->center_freq1;
- /* n_P20 */
- tmp = (70 + c->chan->center_freq - c->center_freq1)/20;
- /* n_P40 */
- tmp /= 2;
- /* freq_P40 */
- *pri40 = c->center_freq1 - 60 + 40 * tmp;
- /* n_P80 */
- tmp /= 2;
- *pri80 = c->center_freq1 - 40 + 80 * tmp;
- break;
- case NL80211_CHAN_WIDTH_320:
- /* n_P20 */
- tmp = (150 + c->chan->center_freq - c->center_freq1) / 20;
- /* n_P40 */
- tmp /= 2;
- /* freq_P40 */
- *pri40 = c->center_freq1 - 140 + 40 * tmp;
- /* n_P80 */
- tmp /= 2;
- *pri80 = c->center_freq1 - 120 + 80 * tmp;
- /* n_P160 */
- tmp /= 2;
- *pri160 = c->center_freq1 - 80 + 160 * tmp;
- break;
- default:
- WARN_ON_ONCE(1);
+ if (WARN_ON_ONCE(pri_width < 0 || width < 0))
+ return -1;
+
+ /* not intended to be called this way, can't determine */
+ if (WARN_ON_ONCE(pri_width > width))
+ return -1;
+
+ if (!punctured)
+ punctured = &_punct;
+
+ *punctured = c->punctured;
+
+ while (width > pri_width) {
+ unsigned int bits_to_drop = width / 20 / 2;
+
+ if (control > center) {
+ center += width / 4;
+ *punctured >>= bits_to_drop;
+ } else {
+ center -= width / 4;
+ *punctured &= (1 << bits_to_drop) - 1;
+ }
+ width /= 2;
}
+
+ return center;
}
+EXPORT_SYMBOL(cfg80211_chandef_primary);
-const struct cfg80211_chan_def *
-cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
- const struct cfg80211_chan_def *c2)
+static const struct cfg80211_chan_def *
+check_chandef_primary_compat(const struct cfg80211_chan_def *c1,
+ const struct cfg80211_chan_def *c2,
+ enum nl80211_chan_width primary_chan_width)
{
- u32 c1_pri40, c1_pri80, c2_pri40, c2_pri80, c1_pri160, c2_pri160;
+ u16 punct_c1 = 0, punct_c2 = 0;
+
+ /* check primary is compatible -> error if not */
+ if (cfg80211_chandef_primary(c1, primary_chan_width, &punct_c1) !=
+ cfg80211_chandef_primary(c2, primary_chan_width, &punct_c2))
+ return ERR_PTR(-EINVAL);
+
+ if (punct_c1 != punct_c2)
+ return ERR_PTR(-EINVAL);
+
+ /* assumes c1 is smaller width, if that was just checked -> done */
+ if (c1->width == primary_chan_width)
+ return c2;
+
+ /* otherwise continue checking the next width */
+ return NULL;
+}
+
+static const struct cfg80211_chan_def *
+_cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
+ const struct cfg80211_chan_def *c2)
+{
+ const struct cfg80211_chan_def *ret;
/* If they are identical, return */
if (cfg80211_chandef_identical(c1, c2))
- return c1;
+ return c2;
/* otherwise, must have same control channel */
if (c1->chan != c2->chan)
@@ -396,53 +471,76 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
return NULL;
/*
- * can't be compatible if one of them is 5 or 10 MHz,
+ * can't be compatible if one of them is 5/10 MHz or S1G
* but they don't have the same width.
*/
- if (c1->width == NL80211_CHAN_WIDTH_5 ||
- c1->width == NL80211_CHAN_WIDTH_10 ||
- c2->width == NL80211_CHAN_WIDTH_5 ||
- c2->width == NL80211_CHAN_WIDTH_10)
+#define NARROW_OR_S1G(width) ((width) == NL80211_CHAN_WIDTH_5 || \
+ (width) == NL80211_CHAN_WIDTH_10 || \
+ (width) == NL80211_CHAN_WIDTH_1 || \
+ (width) == NL80211_CHAN_WIDTH_2 || \
+ (width) == NL80211_CHAN_WIDTH_4 || \
+ (width) == NL80211_CHAN_WIDTH_8 || \
+ (width) == NL80211_CHAN_WIDTH_16)
+
+ if (NARROW_OR_S1G(c1->width) || NARROW_OR_S1G(c2->width))
return NULL;
- if (c1->width == NL80211_CHAN_WIDTH_20_NOHT ||
- c1->width == NL80211_CHAN_WIDTH_20)
+ /*
+ * Make sure that c1 is always the narrower one, so that later
+ * we either return NULL or c2 and don't have to check both
+ * directions.
+ */
+ if (c1->width > c2->width)
+ swap(c1, c2);
+
+ /*
+ * No further checks needed if the "narrower" one is only 20 MHz.
+ * Here "narrower" includes being a 20 MHz non-HT channel vs. a
+ * 20 MHz HT (or later) one.
+ */
+ if (c1->width <= NL80211_CHAN_WIDTH_20)
return c2;
- if (c2->width == NL80211_CHAN_WIDTH_20_NOHT ||
- c2->width == NL80211_CHAN_WIDTH_20)
- return c1;
+ ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_40);
+ if (ret)
+ return ret;
- chandef_primary_freqs(c1, &c1_pri40, &c1_pri80, &c1_pri160);
- chandef_primary_freqs(c2, &c2_pri40, &c2_pri80, &c2_pri160);
+ ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_80);
+ if (ret)
+ return ret;
- if (c1_pri40 != c2_pri40)
+ /*
+ * If c1 is 80+80, then c2 is 160 or higher, but that cannot
+ * match. If c2 was also 80+80 it was already either accepted
+ * or rejected above (identical or not, respectively.)
+ */
+ if (c1->width == NL80211_CHAN_WIDTH_80P80)
return NULL;
- if (c1->width == NL80211_CHAN_WIDTH_40)
- return c2;
-
- if (c2->width == NL80211_CHAN_WIDTH_40)
- return c1;
+ ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_160);
+ if (ret)
+ return ret;
- if (c1_pri80 != c2_pri80)
- return NULL;
+ /*
+ * Getting here would mean they're both wider than 160, have the
+ * same primary 160, but are not identical - this cannot happen
+ * since they must be 320 (no wider chandefs exist, at least yet.)
+ */
+ WARN_ON_ONCE(1);
- if (c1->width == NL80211_CHAN_WIDTH_80 &&
- c2->width > NL80211_CHAN_WIDTH_80)
- return c2;
+ return NULL;
+}
- if (c2->width == NL80211_CHAN_WIDTH_80 &&
- c1->width > NL80211_CHAN_WIDTH_80)
- return c1;
+const struct cfg80211_chan_def *
+cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
+ const struct cfg80211_chan_def *c2)
+{
+ const struct cfg80211_chan_def *ret;
- WARN_ON(!c1_pri160 && !c2_pri160);
- if (c1_pri160 && c2_pri160 && c1_pri160 != c2_pri160)
+ ret = _cfg80211_chandef_compatible(c1, c2);
+ if (IS_ERR(ret))
return NULL;
-
- if (c1->width > c2->width)
- return c1;
- return c2;
+ return ret;
}
EXPORT_SYMBOL(cfg80211_chandef_compatible);
@@ -1047,7 +1145,7 @@ EXPORT_SYMBOL(cfg80211_chandef_dfs_cac_time);
static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
u32 center_freq, u32 bandwidth,
- u32 prohibited_flags)
+ u32 prohibited_flags, bool monitor)
{
struct ieee80211_channel *c;
u32 freq, start_freq, end_freq;
@@ -1057,7 +1155,11 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
c = ieee80211_get_channel_khz(wiphy, freq);
- if (!c || c->flags & prohibited_flags)
+ if (!c)
+ return false;
+ if (monitor && c->flags & IEEE80211_CHAN_CAN_MONITOR)
+ continue;
+ if (c->flags & prohibited_flags)
return false;
}
@@ -1117,9 +1219,9 @@ static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels,
return true;
}
-bool cfg80211_chandef_usable(struct wiphy *wiphy,
- const struct cfg80211_chan_def *chandef,
- u32 prohibited_flags)
+bool _cfg80211_chandef_usable(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef,
+ u32 prohibited_flags, bool monitor)
{
struct ieee80211_sta_ht_cap *ht_cap;
struct ieee80211_sta_vht_cap *vht_cap;
@@ -1281,14 +1383,22 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
if (!cfg80211_secondary_chans_ok(wiphy,
ieee80211_chandef_to_khz(chandef),
- width, prohibited_flags))
+ width, prohibited_flags, monitor))
return false;
if (!chandef->center_freq2)
return true;
return cfg80211_secondary_chans_ok(wiphy,
MHZ_TO_KHZ(chandef->center_freq2),
- width, prohibited_flags);
+ width, prohibited_flags, monitor);
+}
+
+bool cfg80211_chandef_usable(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef,
+ u32 prohibited_flags)
+{
+ return _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags,
+ false);
}
EXPORT_SYMBOL(cfg80211_chandef_usable);
@@ -1532,72 +1642,3 @@ struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev,
}
}
EXPORT_SYMBOL(wdev_chandef);
-
-struct cfg80211_per_bw_puncturing_values {
- u8 len;
- const u16 *valid_values;
-};
-
-static const u16 puncturing_values_80mhz[] = {
- 0x8, 0x4, 0x2, 0x1
-};
-
-static const u16 puncturing_values_160mhz[] = {
- 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, 0xc0, 0x30, 0xc, 0x3
-};
-
-static const u16 puncturing_values_320mhz[] = {
- 0xc000, 0x3000, 0xc00, 0x300, 0xc0, 0x30, 0xc, 0x3, 0xf000, 0xf00,
- 0xf0, 0xf, 0xfc00, 0xf300, 0xf0c0, 0xf030, 0xf00c, 0xf003, 0xc00f,
- 0x300f, 0xc0f, 0x30f, 0xcf, 0x3f
-};
-
-#define CFG80211_PER_BW_VALID_PUNCTURING_VALUES(_bw) \
- { \
- .len = ARRAY_SIZE(puncturing_values_ ## _bw ## mhz), \
- .valid_values = puncturing_values_ ## _bw ## mhz \
- }
-
-static const struct cfg80211_per_bw_puncturing_values per_bw_puncturing[] = {
- CFG80211_PER_BW_VALID_PUNCTURING_VALUES(80),
- CFG80211_PER_BW_VALID_PUNCTURING_VALUES(160),
- CFG80211_PER_BW_VALID_PUNCTURING_VALUES(320)
-};
-
-bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
- const struct cfg80211_chan_def *chandef)
-{
- u32 idx, i, start_freq;
-
- switch (chandef->width) {
- case NL80211_CHAN_WIDTH_80:
- idx = 0;
- start_freq = chandef->center_freq1 - 40;
- break;
- case NL80211_CHAN_WIDTH_160:
- idx = 1;
- start_freq = chandef->center_freq1 - 80;
- break;
- case NL80211_CHAN_WIDTH_320:
- idx = 2;
- start_freq = chandef->center_freq1 - 160;
- break;
- default:
- *bitmap = 0;
- break;
- }
-
- if (!*bitmap)
- return true;
-
- /* check if primary channel is punctured */
- if (*bitmap & (u16)BIT((chandef->chan->center_freq - start_freq) / 20))
- return false;
-
- for (i = 0; i < per_bw_puncturing[idx].len; i++)
- if (per_bw_puncturing[idx].valid_values[i] == *bitmap)
- return true;
-
- return false;
-}
-EXPORT_SYMBOL(cfg80211_valid_disable_subchannel_bitmap);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 409d74c57ca0..3fb1b637352a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1661,6 +1661,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy,
unsigned long delay)
{
if (!delay) {
+ del_timer(&dwork->timer);
wiphy_work_queue(wiphy, &dwork->work);
return;
}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 13657a85cf61..118f2f619828 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -3,7 +3,7 @@
* Wireless configuration interface internals.
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#ifndef __NET_WIRELESS_CORE_H
#define __NET_WIRELESS_CORE_H
@@ -362,7 +362,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
struct cfg80211_auth_request *req);
int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
- struct cfg80211_assoc_request *req);
+ struct cfg80211_assoc_request *req,
+ struct netlink_ext_ack *extack);
int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
struct net_device *dev, const u8 *bssid,
const u8 *ie, int ie_len, u16 reason,
@@ -491,6 +492,9 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev,
struct ieee80211_channel *chan,
bool primary_only);
+bool _cfg80211_chandef_usable(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef,
+ u32 prohibited_flags, bool monitor);
static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
{
@@ -549,9 +553,53 @@ int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask);
+/**
+ * struct cfg80211_colocated_ap - colocated AP information
+ *
+ * @list: linked list to all colocated APs
+ * @bssid: BSSID of the reported AP
+ * @ssid: SSID of the reported AP
+ * @ssid_len: length of the ssid
+ * @center_freq: frequency the reported AP is on
+ * @unsolicited_probe: the reported AP is part of an ESS, where all the APs
+ * that operate in the same channel as the reported AP and that might be
+ * detected by a STA receiving this frame, are transmitting unsolicited
+ * Probe Response frames every 20 TUs
+ * @oct_recommended: OCT is recommended to exchange MMPDUs with the reported AP
+ * @same_ssid: the reported AP has the same SSID as the reporting AP
+ * @multi_bss: the reported AP is part of a multiple BSSID set
+ * @transmitted_bssid: the reported AP is the transmitting BSSID
+ * @colocated_ess: all the APs that share the same ESS as the reported AP are
+ * colocated and can be discovered via legacy bands.
+ * @short_ssid_valid: short_ssid is valid and can be used
+ * @short_ssid: the short SSID for this SSID
+ * @psd_20: The 20MHz PSD EIRP of the primary 20MHz channel for the reported AP
+ */
+struct cfg80211_colocated_ap {
+ struct list_head list;
+ u8 bssid[ETH_ALEN];
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ size_t ssid_len;
+ u32 short_ssid;
+ u32 center_freq;
+ u8 unsolicited_probe:1,
+ oct_recommended:1,
+ same_ssid:1,
+ multi_bss:1,
+ transmitted_bssid:1,
+ colocated_ess:1,
+ short_ssid_valid:1;
+ s8 psd_20;
+};
+
#if IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST)
#define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym)
#define VISIBLE_IF_CFG80211_KUNIT
+void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list);
+
+int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
+ struct list_head *list);
+
size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
const u8 *subie, size_t subie_len,
u8 *new_ie, size_t new_ie_len);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index f635a8b6ca2e..4052041a19ea 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -4,7 +4,7 @@
*
* Copyright (c) 2009, Jouni Malinen <j@w1.fi>
* Copyright (c) 2015 Intel Deutschland GmbH
- * Copyright (C) 2019-2020, 2022-2023 Intel Corporation
+ * Copyright (C) 2019-2020, 2022-2024 Intel Corporation
*/
#include <linux/kernel.h>
@@ -241,12 +241,12 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
char *buf = kmalloc(128, gfp);
if (buf) {
- sprintf(buf, "MLME-MICHAELMICFAILURE.indication("
- "keyid=%d %scast addr=%pM)", key_id,
- key_type == NL80211_KEYTYPE_GROUP ? "broad" : "uni",
- addr);
memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = strlen(buf);
+ wrqu.data.length =
+ sprintf(buf, "MLME-MICHAELMICFAILURE."
+ "indication(keyid=%d %scast addr=%pM)",
+ key_id, key_type == NL80211_KEYTYPE_GROUP
+ ? "broad" : "uni", addr);
wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf);
kfree(buf);
}
@@ -325,28 +325,136 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
p1[i] &= p2[i];
}
-/* Note: caller must cfg80211_put_bss() regardless of result */
-int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct cfg80211_assoc_request *req)
+static int
+cfg80211_mlme_check_mlo_compat(const struct ieee80211_multi_link_elem *mle_a,
+ const struct ieee80211_multi_link_elem *mle_b,
+ struct netlink_ext_ack *extack)
{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err, i, j;
+ const struct ieee80211_mle_basic_common_info *common_a, *common_b;
- lockdep_assert_wiphy(wdev->wiphy);
+ common_a = (const void *)mle_a->variable;
+ common_b = (const void *)mle_b->variable;
+
+ if (memcmp(common_a->mld_mac_addr, common_b->mld_mac_addr, ETH_ALEN)) {
+ NL_SET_ERR_MSG(extack, "AP MLD address mismatch");
+ return -EINVAL;
+ }
+
+ if (ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_a) !=
+ ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_b)) {
+ NL_SET_ERR_MSG(extack, "link EML medium sync delay mismatch");
+ return -EINVAL;
+ }
+
+ if (ieee80211_mle_get_eml_cap((const u8 *)mle_a) !=
+ ieee80211_mle_get_eml_cap((const u8 *)mle_b)) {
+ NL_SET_ERR_MSG(extack, "link EML capabilities mismatch");
+ return -EINVAL;
+ }
+
+ if (ieee80211_mle_get_mld_capa_op((const u8 *)mle_a) !=
+ ieee80211_mle_get_mld_capa_op((const u8 *)mle_b)) {
+ NL_SET_ERR_MSG(extack, "link MLD capabilities/ops mismatch");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cfg80211_mlme_check_mlo(struct net_device *dev,
+ struct cfg80211_assoc_request *req,
+ struct netlink_ext_ack *extack)
+{
+ const struct ieee80211_multi_link_elem *mles[ARRAY_SIZE(req->links)] = {};
+ int i;
+
+ if (req->link_id < 0)
+ return 0;
+
+ if (!req->links[req->link_id].bss) {
+ NL_SET_ERR_MSG(extack, "no BSS for assoc link");
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+ for (i = 0; i < ARRAY_SIZE(req->links); i++) {
+ const struct cfg80211_bss_ies *ies;
+ const struct element *ml;
- for (i = 1; i < ARRAY_SIZE(req->links); i++) {
if (!req->links[i].bss)
continue;
- for (j = 0; j < i; j++) {
- if (req->links[i].bss == req->links[j].bss)
- return -EINVAL;
+
+ if (ether_addr_equal(req->links[i].bss->bssid, dev->dev_addr)) {
+ NL_SET_ERR_MSG(extack, "BSSID must not be our address");
+ req->links[i].error = -EINVAL;
+ goto error;
}
- if (ether_addr_equal(req->links[i].bss->bssid, dev->dev_addr))
- return -EINVAL;
+ ies = rcu_dereference(req->links[i].bss->ies);
+ ml = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK,
+ ies->data, ies->len);
+ if (!ml) {
+ NL_SET_ERR_MSG(extack, "MLO BSS w/o ML element");
+ req->links[i].error = -EINVAL;
+ goto error;
+ }
+
+ if (!ieee80211_mle_type_ok(ml->data + 1,
+ IEEE80211_ML_CONTROL_TYPE_BASIC,
+ ml->datalen - 1)) {
+ NL_SET_ERR_MSG(extack, "BSS with invalid ML element");
+ req->links[i].error = -EINVAL;
+ goto error;
+ }
+
+ mles[i] = (const void *)(ml->data + 1);
+
+ if (ieee80211_mle_get_link_id((const u8 *)mles[i]) != i) {
+ NL_SET_ERR_MSG(extack, "link ID mismatch");
+ req->links[i].error = -EINVAL;
+ goto error;
+ }
+ }
+
+ if (WARN_ON(!mles[req->link_id]))
+ goto error;
+
+ for (i = 0; i < ARRAY_SIZE(req->links); i++) {
+ if (i == req->link_id || !req->links[i].bss)
+ continue;
+
+ if (WARN_ON(!mles[i]))
+ goto error;
+
+ if (cfg80211_mlme_check_mlo_compat(mles[req->link_id], mles[i],
+ extack)) {
+ req->links[i].error = -EINVAL;
+ goto error;
+ }
}
+ rcu_read_unlock();
+ return 0;
+error:
+ rcu_read_unlock();
+ return -EINVAL;
+}
+
+/* Note: caller must cfg80211_put_bss() regardless of result */
+int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_assoc_request *req,
+ struct netlink_ext_ack *extack)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int err;
+
+ lockdep_assert_wiphy(wdev->wiphy);
+
+ err = cfg80211_mlme_check_mlo(dev, req, extack);
+ if (err)
+ return err;
+
if (wdev->connected &&
(!req->prev_bssid ||
!ether_addr_equal(wdev->u.client.connected_addr, req->prev_bssid)))
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 60877b532993..b4edba6b0b7b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/if.h>
@@ -581,7 +581,11 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
[NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 },
[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
- [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
+ [NL80211_ATTR_WPA_VERSIONS] =
+ NLA_POLICY_RANGE(NLA_U32, 0,
+ NL80211_WPA_VERSION_1 |
+ NL80211_WPA_VERSION_2 |
+ NL80211_WPA_VERSION_3),
[NL80211_ATTR_PID] = { .type = NLA_U32 },
[NL80211_ATTR_4ADDR] = { .type = NLA_U8 },
[NL80211_ATTR_PMKID] = NLA_POLICY_EXACT_LEN_WARN(WLAN_PMKID_LEN),
@@ -821,6 +825,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA] = { .type = NLA_FLAG },
[NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
[NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
+ [NL80211_ATTR_ASSOC_SPP_AMSDU] = { .type = NLA_FLAG },
};
/* policy for the key attributes */
@@ -906,22 +911,11 @@ nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
};
static const struct nla_policy
-nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = {
- [NL80211_BAND_2GHZ] = { .type = NLA_S32 },
- [NL80211_BAND_5GHZ] = { .type = NLA_S32 },
- [NL80211_BAND_6GHZ] = { .type = NLA_S32 },
- [NL80211_BAND_60GHZ] = { .type = NLA_S32 },
- [NL80211_BAND_LC] = { .type = NLA_S32 },
-};
-
-static const struct nla_policy
nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_SSID_LEN },
[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
- [NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI] =
- NLA_POLICY_NESTED(nl80211_match_band_rssi_policy),
};
static const struct nla_policy
@@ -1204,11 +1198,11 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
if ((chan->flags & IEEE80211_CHAN_DFS_CONCURRENT) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DFS_CONCURRENT))
goto nla_put_failure;
- if ((chan->flags & IEEE80211_CHAN_NO_UHB_VLP_CLIENT) &&
- nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT))
+ if ((chan->flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT))
goto nla_put_failure;
- if ((chan->flags & IEEE80211_CHAN_NO_UHB_AFC_CLIENT) &&
- nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT))
+ if ((chan->flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT))
goto nla_put_failure;
}
@@ -3224,24 +3218,9 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
wdev->iftype == NL80211_IFTYPE_P2P_GO;
}
-static int nl80211_parse_punct_bitmap(struct cfg80211_registered_device *rdev,
- struct genl_info *info,
- const struct cfg80211_chan_def *chandef,
- u16 *punct_bitmap)
-{
- if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_PUNCT))
- return -EINVAL;
-
- *punct_bitmap = nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]);
- if (!cfg80211_valid_disable_subchannel_bitmap(punct_bitmap, chandef))
- return -EINVAL;
-
- return 0;
-}
-
-int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
- struct genl_info *info,
- struct cfg80211_chan_def *chandef)
+static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
+ struct genl_info *info, bool monitor,
+ struct cfg80211_chan_def *chandef)
{
struct netlink_ext_ack *extack = info->extack;
struct nlattr **attrs = info->attrs;
@@ -3266,10 +3245,9 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
chandef->freq1_offset = control_freq % 1000;
chandef->center_freq2 = 0;
- /* Primary channel not allowed */
- if (!chandef->chan || chandef->chan->flags & IEEE80211_CHAN_DISABLED) {
+ if (!chandef->chan) {
NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ],
- "Channel is disabled");
+ "Unknown channel");
return -EINVAL;
}
@@ -3346,13 +3324,27 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
chandef->edmg.channels = 0;
}
+ if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) {
+ chandef->punctured =
+ nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]);
+
+ if (chandef->punctured &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_PUNCT)) {
+ NL_SET_ERR_MSG(extack,
+ "driver doesn't support puncturing");
+ return -EINVAL;
+ }
+ }
+
if (!cfg80211_chandef_valid(chandef)) {
NL_SET_ERR_MSG(extack, "invalid channel definition");
return -EINVAL;
}
- if (!cfg80211_chandef_usable(&rdev->wiphy, chandef,
- IEEE80211_CHAN_DISABLED)) {
+ if (!_cfg80211_chandef_usable(&rdev->wiphy, chandef,
+ IEEE80211_CHAN_DISABLED,
+ monitor)) {
NL_SET_ERR_MSG(extack, "(extension) channel is disabled");
return -EINVAL;
}
@@ -3367,6 +3359,13 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
return 0;
}
+int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
+ struct genl_info *info,
+ struct cfg80211_chan_def *chandef)
+{
+ return _nl80211_parse_chandef(rdev, info, false, chandef);
+}
+
static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct genl_info *info,
@@ -3391,7 +3390,9 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
link_id = 0;
}
- result = nl80211_parse_chandef(rdev, info, &chandef);
+ result = _nl80211_parse_chandef(rdev, info,
+ iftype == NL80211_IFTYPE_MONITOR,
+ &chandef);
if (result)
return result;
@@ -3822,6 +3823,10 @@ int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *ch
if (chandef->center_freq2 &&
nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ2, chandef->center_freq2))
return -ENOBUFS;
+ if (chandef->punctured &&
+ nla_put_u32(msg, NL80211_ATTR_PUNCT_BITMAP, chandef->punctured))
+ return -ENOBUFS;
+
return 0;
}
EXPORT_SYMBOL(nl80211_send_chandef);
@@ -4020,6 +4025,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
}
wiphy_unlock(&rdev->wiphy);
+ if_start = 0;
wp_idx++;
}
out:
@@ -4196,11 +4202,11 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (ntype != NL80211_IFTYPE_MESH_POINT)
return -EINVAL;
+ if (otype != NL80211_IFTYPE_MESH_POINT)
+ return -EINVAL;
if (netif_running(dev))
return -EBUSY;
- BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
- IEEE80211_MAX_MESH_ID_LEN);
wdev->u.mesh.id_up_len =
nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
memcpy(wdev->u.mesh.id,
@@ -4306,8 +4312,6 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_MESH_POINT:
if (!info->attrs[NL80211_ATTR_MESH_ID])
break;
- BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
- IEEE80211_MAX_MESH_ID_LEN);
wdev->u.mesh.id_up_len =
nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
memcpy(wdev->u.mesh.id,
@@ -6066,14 +6070,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) {
- err = nl80211_parse_punct_bitmap(rdev, info,
- &params->chandef,
- &params->punct_bitmap);
- if (err)
- goto out;
- }
-
if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params->chandef,
wdev->iftype)) {
err = -EINVAL;
@@ -6873,7 +6869,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
return -EINVAL;
/* When you run into this, adjust the code below for the new flag */
- BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
+ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 8);
switch (statype) {
case CFG80211_STA_MESH_PEER_KERNEL:
@@ -6933,6 +6929,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
params->link_sta_params.he_capa ||
params->link_sta_params.eht_capa)
return -EINVAL;
+ if (params->sta_flags_mask & BIT(NL80211_STA_FLAG_SPP_AMSDU))
+ return -EINVAL;
}
if (statype != CFG80211_STA_AP_CLIENT &&
@@ -6956,7 +6954,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
BIT(NL80211_STA_FLAG_ASSOCIATED) |
BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
BIT(NL80211_STA_FLAG_WME) |
- BIT(NL80211_STA_FLAG_MFP)))
+ BIT(NL80211_STA_FLAG_MFP) |
+ BIT(NL80211_STA_FLAG_SPP_AMSDU)))
return -EINVAL;
/* but authenticated/associated only if driver handles it */
@@ -7515,7 +7514,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
/* When you run into this, adjust the code below for the new flag */
- BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
+ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 8);
switch (dev->ieee80211_ptr->iftype) {
case NL80211_IFTYPE_AP:
@@ -7539,6 +7538,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
params.sta_flags_mask & auth_assoc)
return -EINVAL;
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT) &&
+ params.sta_flags_mask & BIT(NL80211_STA_FLAG_SPP_AMSDU))
+ return -EINVAL;
+
/* Older userspace, or userspace wanting to be compatible with
* !NL80211_FEATURE_FULL_AP_CLIENT_STATE, will not set the auth
* and assoc flags in the mask, but assumes the station will be
@@ -7627,14 +7631,16 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
struct station_del_parameters params;
+ int link_id = nl80211_link_id_or_invalid(info->attrs);
memset(&params, 0, sizeof(params));
if (info->attrs[NL80211_ATTR_MAC])
params.mac = nla_data(info->attrs[NL80211_ATTR_MAC]);
- switch (dev->ieee80211_ptr->iftype) {
+ switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_MESH_POINT:
@@ -7675,6 +7681,17 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
params.reason_code = WLAN_REASON_PREV_AUTH_NOT_VALID;
}
+ /* Link ID not expected in case of non-ML operation */
+ if (!wdev->valid_links && link_id != -1)
+ return -EINVAL;
+
+ /* If given, a valid link ID should be passed during MLO */
+ if (wdev->valid_links && link_id >= 0 &&
+ !(wdev->valid_links & BIT(link_id)))
+ return -EINVAL;
+
+ params.link_id = link_id;
+
return rdev_del_station(rdev, dev, &params);
}
@@ -9480,41 +9497,6 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans,
return 0;
}
-static int
-nl80211_parse_sched_scan_per_band_rssi(struct wiphy *wiphy,
- struct cfg80211_match_set *match_sets,
- struct nlattr *tb_band_rssi,
- s32 rssi_thold)
-{
- struct nlattr *attr;
- int i, tmp, ret = 0;
-
- if (!wiphy_ext_feature_isset(wiphy,
- NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD)) {
- if (tb_band_rssi)
- ret = -EOPNOTSUPP;
- else
- for (i = 0; i < NUM_NL80211_BANDS; i++)
- match_sets->per_band_rssi_thold[i] =
- NL80211_SCAN_RSSI_THOLD_OFF;
- return ret;
- }
-
- for (i = 0; i < NUM_NL80211_BANDS; i++)
- match_sets->per_band_rssi_thold[i] = rssi_thold;
-
- nla_for_each_nested(attr, tb_band_rssi, tmp) {
- enum nl80211_band band = nla_type(attr);
-
- if (band < 0 || band >= NUM_NL80211_BANDS)
- return -EINVAL;
-
- match_sets->per_band_rssi_thold[band] = nla_get_s32(attr);
- }
-
- return 0;
-}
-
static struct cfg80211_sched_scan_request *
nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
struct nlattr **attrs, int max_match_sets)
@@ -9789,15 +9771,6 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
if (rssi)
request->match_sets[i].rssi_thold =
nla_get_s32(rssi);
-
- /* Parse per band RSSI attribute */
- err = nl80211_parse_sched_scan_per_band_rssi(wiphy,
- &request->match_sets[i],
- tb[NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI],
- request->match_sets[i].rssi_thold);
- if (err)
- goto out_free;
-
i++;
}
@@ -10077,6 +10050,42 @@ static int nl80211_notify_radar_detection(struct sk_buff *skb,
return 0;
}
+static int nl80211_parse_counter_offsets(struct cfg80211_registered_device *rdev,
+ const u8 *data, size_t datalen,
+ int first_count, struct nlattr *attr,
+ const u16 **offsets, unsigned int *n_offsets)
+{
+ int i;
+
+ *n_offsets = 0;
+
+ if (!attr)
+ return 0;
+
+ if (!nla_len(attr) || (nla_len(attr) % sizeof(u16)))
+ return -EINVAL;
+
+ *n_offsets = nla_len(attr) / sizeof(u16);
+ if (rdev->wiphy.max_num_csa_counters &&
+ (*n_offsets > rdev->wiphy.max_num_csa_counters))
+ return -EINVAL;
+
+ *offsets = nla_data(attr);
+
+ /* sanity checks - counters should fit and be the same */
+ for (i = 0; i < *n_offsets; i++) {
+ u16 offset = (*offsets)[i];
+
+ if (offset >= datalen)
+ return -EINVAL;
+
+ if (first_count != -1 && data[offset] != first_count)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -10088,7 +10097,6 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
int err;
bool need_new_beacon = false;
bool need_handle_dfs_flag = true;
- int len, i;
u32 cs_count;
if (!rdev->ops->channel_switch ||
@@ -10173,72 +10181,23 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
goto free;
}
- len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
- if (!len || (len % sizeof(u16))) {
- err = -EINVAL;
+ err = nl80211_parse_counter_offsets(rdev, params.beacon_csa.tail,
+ params.beacon_csa.tail_len,
+ params.count,
+ csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON],
+ &params.counter_offsets_beacon,
+ &params.n_counter_offsets_beacon);
+ if (err)
goto free;
- }
- params.n_counter_offsets_beacon = len / sizeof(u16);
- if (rdev->wiphy.max_num_csa_counters &&
- (params.n_counter_offsets_beacon >
- rdev->wiphy.max_num_csa_counters)) {
- err = -EINVAL;
+ err = nl80211_parse_counter_offsets(rdev, params.beacon_csa.probe_resp,
+ params.beacon_csa.probe_resp_len,
+ params.count,
+ csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP],
+ &params.counter_offsets_presp,
+ &params.n_counter_offsets_presp);
+ if (err)
goto free;
- }
-
- params.counter_offsets_beacon =
- nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
-
- /* sanity checks - counters should fit and be the same */
- for (i = 0; i < params.n_counter_offsets_beacon; i++) {
- u16 offset = params.counter_offsets_beacon[i];
-
- if (offset >= params.beacon_csa.tail_len) {
- err = -EINVAL;
- goto free;
- }
-
- if (params.beacon_csa.tail[offset] != params.count) {
- err = -EINVAL;
- goto free;
- }
- }
-
- if (csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]) {
- len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
- if (!len || (len % sizeof(u16))) {
- err = -EINVAL;
- goto free;
- }
-
- params.n_counter_offsets_presp = len / sizeof(u16);
- if (rdev->wiphy.max_num_csa_counters &&
- (params.n_counter_offsets_presp >
- rdev->wiphy.max_num_csa_counters)) {
- err = -EINVAL;
- goto free;
- }
-
- params.counter_offsets_presp =
- nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
-
- /* sanity checks - counters should fit and be the same */
- for (i = 0; i < params.n_counter_offsets_presp; i++) {
- u16 offset = params.counter_offsets_presp[i];
-
- if (offset >= params.beacon_csa.probe_resp_len) {
- err = -EINVAL;
- goto free;
- }
-
- if (params.beacon_csa.probe_resp[offset] !=
- params.count) {
- err = -EINVAL;
- goto free;
- }
- }
- }
skip_beacons:
err = nl80211_parse_chandef(rdev, info, &params.chandef);
@@ -10269,14 +10228,7 @@ skip_beacons:
if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX])
params.block_tx = true;
- if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) {
- err = nl80211_parse_punct_bitmap(rdev, info,
- &params.chandef,
- &params.punct_bitmap);
- if (err)
- goto free;
- }
-
+ params.link_id = link_id;
err = rdev_channel_switch(rdev, dev, &params);
free:
@@ -10649,13 +10601,6 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
return res;
}
-static bool nl80211_valid_wpa_versions(u32 wpa_versions)
-{
- return !(wpa_versions & ~(NL80211_WPA_VERSION_1 |
- NL80211_WPA_VERSION_2 |
- NL80211_WPA_VERSION_3));
-}
-
static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -10881,12 +10826,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
return -EINVAL;
}
- if (info->attrs[NL80211_ATTR_WPA_VERSIONS]) {
+ if (info->attrs[NL80211_ATTR_WPA_VERSIONS])
settings->wpa_versions =
nla_get_u32(info->attrs[NL80211_ATTR_WPA_VERSIONS]);
- if (!nl80211_valid_wpa_versions(settings->wpa_versions))
- return -EINVAL;
- }
if (info->attrs[NL80211_ATTR_AKM_SUITES]) {
void *data;
@@ -11101,6 +11043,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
sizeof(req.s1g_capa));
}
+ if (nla_get_flag(info->attrs[NL80211_ATTR_ASSOC_SPP_AMSDU])) {
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT)) {
+ GENL_SET_ERR_MSG(info, "SPP A-MSDUs not supported");
+ return -EINVAL;
+ }
+ req.flags |= ASSOC_REQ_SPP_AMSDU;
+ }
+
req.link_id = nl80211_link_id_or_invalid(info->attrs);
if (info->attrs[NL80211_ATTR_MLO_LINKS]) {
@@ -11226,7 +11177,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
struct nlattr *link;
int rem = 0;
- err = cfg80211_mlme_assoc(rdev, dev, &req);
+ err = cfg80211_mlme_assoc(rdev, dev, &req,
+ info->extack);
if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
dev->ieee80211_ptr->conn_owner_nlportid =
@@ -12674,23 +12626,12 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
- if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) {
- int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
- int i;
-
- if (len % sizeof(u16))
- return -EINVAL;
-
- params.n_csa_offsets = len / sizeof(u16);
- params.csa_offsets =
- nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
-
- /* check that all the offsets fit the frame */
- for (i = 0; i < params.n_csa_offsets; i++) {
- if (params.csa_offsets[i] >= params.len)
- return -EINVAL;
- }
- }
+ err = nl80211_parse_counter_offsets(rdev, NULL, params.len, -1,
+ info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX],
+ &params.csa_offsets,
+ &params.n_csa_offsets);
+ if (err)
+ return err;
if (!params.dont_wait_for_ack) {
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -16827,6 +16768,10 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_station,
.flags = GENL_UNS_ADMIN_PERM,
+ /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on
+ * whether MAC address is passed or not. If MAC address is
+ * passed, then even during MLO, link ID is not required.
+ */
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
@@ -19397,7 +19342,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef,
gfp_t gfp,
enum nl80211_commands notif,
- u8 count, bool quiet, u16 punct_bitmap)
+ u8 count, bool quiet)
{
struct wireless_dev *wdev = netdev->ieee80211_ptr;
struct sk_buff *msg;
@@ -19431,9 +19376,6 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
goto nla_put_failure;
}
- if (nla_put_u32(msg, NL80211_ATTR_PUNCT_BITMAP, punct_bitmap))
- goto nla_put_failure;
-
genlmsg_end(msg, hdr);
genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
@@ -19446,7 +19388,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
void cfg80211_ch_switch_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id, u16 punct_bitmap)
+ unsigned int link_id)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -19455,7 +19397,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
lockdep_assert_wiphy(wdev->wiphy);
WARN_INVALID_LINK_ID(wdev, link_id);
- trace_cfg80211_ch_switch_notify(dev, chandef, link_id, punct_bitmap);
+ trace_cfg80211_ch_switch_notify(dev, chandef, link_id);
switch (wdev->iftype) {
case NL80211_IFTYPE_STATION:
@@ -19484,15 +19426,14 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
cfg80211_sched_dfs_chan_update(rdev);
nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL,
- NL80211_CMD_CH_SWITCH_NOTIFY, 0, false,
- punct_bitmap);
+ NL80211_CMD_CH_SWITCH_NOTIFY, 0, false);
}
EXPORT_SYMBOL(cfg80211_ch_switch_notify);
void cfg80211_ch_switch_started_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
unsigned int link_id, u8 count,
- bool quiet, u16 punct_bitmap)
+ bool quiet)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -19501,13 +19442,12 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev,
lockdep_assert_wiphy(wdev->wiphy);
WARN_INVALID_LINK_ID(wdev, link_id);
- trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id,
- punct_bitmap);
+ trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id);
nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL,
NL80211_CMD_CH_SWITCH_STARTED_NOTIFY,
- count, quiet, punct_bitmap);
+ count, quiet);
}
EXPORT_SYMBOL(cfg80211_ch_switch_started_notify);
@@ -19884,6 +19824,11 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev,
NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS))
goto free_msg;
+ if (wakeup->unprot_deauth_disassoc &&
+ nla_put_flag(msg,
+ NL80211_WOWLAN_TRIG_UNPROTECTED_DEAUTH_DISASSOC))
+ goto free_msg;
+
if (wakeup->packet) {
u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211;
u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN;
@@ -20164,9 +20109,26 @@ int cfg80211_external_auth_request(struct net_device *dev,
if (!hdr)
goto nla_put_failure;
+ /* Some historical mistakes in drivers <-> userspace interface (notably
+ * between drivers and wpa_supplicant) led to a big-endian conversion
+ * being needed on NL80211_ATTR_AKM_SUITES _only_ when its value is
+ * WLAN_AKM_SUITE_SAE. This is now fixed on userspace side, but for the
+ * benefit of older wpa_supplicant versions, send this particular value
+ * in big-endian. Note that newer wpa_supplicant will also detect this
+ * particular value in big endian still, so it all continues to work.
+ */
+ if (params->key_mgmt_suite == WLAN_AKM_SUITE_SAE) {
+ if (nla_put_be32(msg, NL80211_ATTR_AKM_SUITES,
+ cpu_to_be32(WLAN_AKM_SUITE_SAE)))
+ goto nla_put_failure;
+ } else {
+ if (nla_put_u32(msg, NL80211_ATTR_AKM_SUITES,
+ params->key_mgmt_suite))
+ goto nla_put_failure;
+ }
+
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
- nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) ||
nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION,
params->action) ||
nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) ||
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2741b626919a..753f8e9aa4b1 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -5,7 +5,7 @@
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -57,6 +57,8 @@
#include <linux/verification.h>
#include <linux/moduleparam.h>
#include <linux/firmware.h>
+#include <linux/units.h>
+
#include <net/cfg80211.h>
#include "core.h"
#include "reg.h"
@@ -1289,20 +1291,17 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd)
static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
u32 freq_khz)
{
-#define ONE_GHZ_IN_KHZ 1000000
/*
* From 802.11ad: directional multi-gigabit (DMG):
* Pertaining to operation in a frequency band containing a channel
* with the Channel starting frequency above 45 GHz.
*/
- u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ?
- 20 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ;
+ u32 limit = freq_khz > 45 * KHZ_PER_GHZ ? 20 * KHZ_PER_GHZ : 2 * KHZ_PER_GHZ;
if (abs(freq_khz - freq_range->start_freq_khz) <= limit)
return true;
if (abs(freq_khz - freq_range->end_freq_khz) <= limit)
return true;
return false;
-#undef ONE_GHZ_IN_KHZ
}
/*
@@ -1595,10 +1594,10 @@ static u32 map_regdom_flags(u32 rd_flags)
channel_flags |= IEEE80211_CHAN_NO_EHT;
if (rd_flags & NL80211_RRF_DFS_CONCURRENT)
channel_flags |= IEEE80211_CHAN_DFS_CONCURRENT;
- if (rd_flags & NL80211_RRF_NO_UHB_VLP_CLIENT)
- channel_flags |= IEEE80211_CHAN_NO_UHB_VLP_CLIENT;
- if (rd_flags & NL80211_RRF_NO_UHB_AFC_CLIENT)
- channel_flags |= IEEE80211_CHAN_NO_UHB_AFC_CLIENT;
+ if (rd_flags & NL80211_RRF_NO_6GHZ_VLP_CLIENT)
+ channel_flags |= IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT;
+ if (rd_flags & NL80211_RRF_NO_6GHZ_AFC_CLIENT)
+ channel_flags |= IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT;
if (rd_flags & NL80211_RRF_PSD)
channel_flags |= IEEE80211_CHAN_PSD;
return channel_flags;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 2249b1a89d1c..5a5dd3ce497f 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -5,7 +5,7 @@
* Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -77,45 +77,6 @@ MODULE_PARM_DESC(bss_entries_limit,
#define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ)
-/**
- * struct cfg80211_colocated_ap - colocated AP information
- *
- * @list: linked list to all colocated aPS
- * @bssid: BSSID of the reported AP
- * @ssid: SSID of the reported AP
- * @ssid_len: length of the ssid
- * @center_freq: frequency the reported AP is on
- * @unsolicited_probe: the reported AP is part of an ESS, where all the APs
- * that operate in the same channel as the reported AP and that might be
- * detected by a STA receiving this frame, are transmitting unsolicited
- * Probe Response frames every 20 TUs
- * @oct_recommended: OCT is recommended to exchange MMPDUs with the reported AP
- * @same_ssid: the reported AP has the same SSID as the reporting AP
- * @multi_bss: the reported AP is part of a multiple BSSID set
- * @transmitted_bssid: the reported AP is the transmitting BSSID
- * @colocated_ess: all the APs that share the same ESS as the reported AP are
- * colocated and can be discovered via legacy bands.
- * @short_ssid_valid: short_ssid is valid and can be used
- * @short_ssid: the short SSID for this SSID
- * @psd_20: The 20MHz PSD EIRP of the primary 20MHz channel for the reported AP
- */
-struct cfg80211_colocated_ap {
- struct list_head list;
- u8 bssid[ETH_ALEN];
- u8 ssid[IEEE80211_MAX_SSID_LEN];
- size_t ssid_len;
- u32 short_ssid;
- u32 center_freq;
- u8 unsolicited_probe:1,
- oct_recommended:1,
- same_ssid:1,
- multi_bss:1,
- transmitted_bssid:1,
- colocated_ess:1,
- short_ssid_valid:1;
- s8 psd_20;
-};
-
static void bss_free(struct cfg80211_internal_bss *bss)
{
struct cfg80211_bss_ies *ies;
@@ -566,7 +527,8 @@ static int cfg80211_calc_short_ssid(const struct cfg80211_bss_ies *ies,
return 0;
}
-static void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list)
+VISIBLE_IF_CFG80211_KUNIT void
+cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list)
{
struct cfg80211_colocated_ap *ap, *tmp_ap;
@@ -575,6 +537,7 @@ static void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list)
kfree(ap);
}
}
+EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_free_coloc_ap_list);
static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
const u8 *pos, u8 length,
@@ -648,104 +611,140 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
return 0;
}
-static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
- struct list_head *list)
+bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len,
+ enum cfg80211_rnr_iter_ret
+ (*iter)(void *data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len),
+ void *iter_data)
{
- struct ieee80211_neighbor_ap_info *ap_info;
- const struct element *elem, *ssid_elem;
+ const struct element *rnr;
const u8 *pos, *end;
- u32 s_ssid_tmp;
- int n_coloc = 0, ret;
- LIST_HEAD(ap_list);
- ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp);
- if (ret)
- return 0;
+ for_each_element_id(rnr, WLAN_EID_REDUCED_NEIGHBOR_REPORT,
+ elems, elems_len) {
+ const struct ieee80211_neighbor_ap_info *info;
- for_each_element_id(elem, WLAN_EID_REDUCED_NEIGHBOR_REPORT,
- ies->data, ies->len) {
- pos = elem->data;
- end = elem->data + elem->datalen;
+ pos = rnr->data;
+ end = rnr->data + rnr->datalen;
/* RNR IE may contain more than one NEIGHBOR_AP_INFO */
- while (pos + sizeof(*ap_info) <= end) {
- enum nl80211_band band;
- int freq;
+ while (sizeof(*info) <= end - pos) {
u8 length, i, count;
+ u8 type;
- ap_info = (void *)pos;
- count = u8_get_bits(ap_info->tbtt_info_hdr,
- IEEE80211_AP_INFO_TBTT_HDR_COUNT) + 1;
- length = ap_info->tbtt_info_len;
+ info = (void *)pos;
+ count = u8_get_bits(info->tbtt_info_hdr,
+ IEEE80211_AP_INFO_TBTT_HDR_COUNT) +
+ 1;
+ length = info->tbtt_info_len;
- pos += sizeof(*ap_info);
+ pos += sizeof(*info);
- if (!ieee80211_operating_class_to_band(ap_info->op_class,
- &band))
- break;
+ if (count * length > end - pos)
+ return false;
- freq = ieee80211_channel_to_frequency(ap_info->channel,
- band);
+ type = u8_get_bits(info->tbtt_info_hdr,
+ IEEE80211_AP_INFO_TBTT_HDR_TYPE);
- if (end - pos < count * length)
- break;
+ for (i = 0; i < count; i++) {
+ switch (iter(iter_data, type, info,
+ pos, length)) {
+ case RNR_ITER_CONTINUE:
+ break;
+ case RNR_ITER_BREAK:
+ return true;
+ case RNR_ITER_ERROR:
+ return false;
+ }
- if (u8_get_bits(ap_info->tbtt_info_hdr,
- IEEE80211_AP_INFO_TBTT_HDR_TYPE) !=
- IEEE80211_TBTT_INFO_TYPE_TBTT) {
- pos += count * length;
- continue;
+ pos += length;
}
+ }
- /* TBTT info must include bss param + BSSID +
- * (short SSID or same_ssid bit to be set).
- * ignore other options, and move to the
- * next AP info
- */
- if (band != NL80211_BAND_6GHZ ||
- !(length == offsetofend(struct ieee80211_tbtt_info_7_8_9,
- bss_params) ||
- length == sizeof(struct ieee80211_tbtt_info_7_8_9) ||
- length >= offsetofend(struct ieee80211_tbtt_info_ge_11,
- bss_params))) {
- pos += count * length;
- continue;
- }
+ if (pos != end)
+ return false;
+ }
- for (i = 0; i < count; i++) {
- struct cfg80211_colocated_ap *entry;
+ return true;
+}
+EXPORT_SYMBOL_GPL(cfg80211_iter_rnr);
- entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN,
- GFP_ATOMIC);
+struct colocated_ap_data {
+ const struct element *ssid_elem;
+ struct list_head ap_list;
+ u32 s_ssid_tmp;
+ int n_coloc;
+};
- if (!entry)
- goto error;
+static enum cfg80211_rnr_iter_ret
+cfg80211_parse_colocated_ap_iter(void *_data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len)
+{
+ struct colocated_ap_data *data = _data;
+ struct cfg80211_colocated_ap *entry;
+ enum nl80211_band band;
- entry->center_freq = freq;
+ if (type != IEEE80211_TBTT_INFO_TYPE_TBTT)
+ return RNR_ITER_CONTINUE;
- if (!cfg80211_parse_ap_info(entry, pos, length,
- ssid_elem,
- s_ssid_tmp)) {
- n_coloc++;
- list_add_tail(&entry->list, &ap_list);
- } else {
- kfree(entry);
- }
+ if (!ieee80211_operating_class_to_band(info->op_class, &band))
+ return RNR_ITER_CONTINUE;
- pos += length;
- }
- }
+ /* TBTT info must include bss param + BSSID + (short SSID or
+ * same_ssid bit to be set). Ignore other options, and move to
+ * the next AP info
+ */
+ if (band != NL80211_BAND_6GHZ ||
+ !(tbtt_info_len == offsetofend(struct ieee80211_tbtt_info_7_8_9,
+ bss_params) ||
+ tbtt_info_len == sizeof(struct ieee80211_tbtt_info_7_8_9) ||
+ tbtt_info_len >= offsetofend(struct ieee80211_tbtt_info_ge_11,
+ bss_params)))
+ return RNR_ITER_CONTINUE;
+
+ entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN, GFP_ATOMIC);
+ if (!entry)
+ return RNR_ITER_ERROR;
+
+ entry->center_freq =
+ ieee80211_channel_to_frequency(info->channel, band);
+
+ if (!cfg80211_parse_ap_info(entry, tbtt_info, tbtt_info_len,
+ data->ssid_elem, data->s_ssid_tmp)) {
+ data->n_coloc++;
+ list_add_tail(&entry->list, &data->ap_list);
+ } else {
+ kfree(entry);
+ }
-error:
- if (pos != end) {
- cfg80211_free_coloc_ap_list(&ap_list);
- return 0;
- }
+ return RNR_ITER_CONTINUE;
+}
+
+VISIBLE_IF_CFG80211_KUNIT int
+cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
+ struct list_head *list)
+{
+ struct colocated_ap_data data = {};
+ int ret;
+
+ INIT_LIST_HEAD(&data.ap_list);
+
+ ret = cfg80211_calc_short_ssid(ies, &data.ssid_elem, &data.s_ssid_tmp);
+ if (ret)
+ return 0;
+
+ if (!cfg80211_iter_rnr(ies->data, ies->len,
+ cfg80211_parse_colocated_ap_iter, &data)) {
+ cfg80211_free_coloc_ap_list(&data.ap_list);
+ return 0;
}
- list_splice_tail(&ap_list, list);
- return n_coloc;
+ list_splice_tail(&data.ap_list, list);
+ return data.n_coloc;
}
+EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_parse_colocated_ap);
static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request,
struct ieee80211_channel *chan,
@@ -1731,6 +1730,61 @@ static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known,
}
}
+static void cfg80211_check_stuck_ecsa(struct cfg80211_registered_device *rdev,
+ struct cfg80211_internal_bss *known,
+ const struct cfg80211_bss_ies *old)
+{
+ const struct ieee80211_ext_chansw_ie *ecsa;
+ const struct element *elem_new, *elem_old;
+ const struct cfg80211_bss_ies *new, *bcn;
+
+ if (known->pub.proberesp_ecsa_stuck)
+ return;
+
+ new = rcu_dereference_protected(known->pub.proberesp_ies,
+ lockdep_is_held(&rdev->bss_lock));
+ if (WARN_ON(!new))
+ return;
+
+ if (new->tsf - old->tsf < USEC_PER_SEC)
+ return;
+
+ elem_old = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ old->data, old->len);
+ if (!elem_old)
+ return;
+
+ elem_new = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ new->data, new->len);
+ if (!elem_new)
+ return;
+
+ bcn = rcu_dereference_protected(known->pub.beacon_ies,
+ lockdep_is_held(&rdev->bss_lock));
+ if (bcn &&
+ cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ bcn->data, bcn->len))
+ return;
+
+ if (elem_new->datalen != elem_old->datalen)
+ return;
+ if (elem_new->datalen < sizeof(struct ieee80211_ext_chansw_ie))
+ return;
+ if (memcmp(elem_new->data, elem_old->data, elem_new->datalen))
+ return;
+
+ ecsa = (void *)elem_new->data;
+
+ if (!ecsa->mode)
+ return;
+
+ if (ecsa->new_ch_num !=
+ ieee80211_frequency_to_channel(known->pub.channel->center_freq))
+ return;
+
+ known->pub.proberesp_ecsa_stuck = 1;
+}
+
static bool
cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *known,
@@ -1750,8 +1804,10 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
/* Override possible earlier Beacon frame IEs */
rcu_assign_pointer(known->pub.ies,
new->pub.proberesp_ies);
- if (old)
+ if (old) {
+ cfg80211_check_stuck_ecsa(rdev, known, old);
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+ }
}
if (rcu_access_pointer(new->pub.beacon_ies)) {
@@ -2065,6 +2121,35 @@ struct cfg80211_inform_single_bss_data {
u64 cannot_use_reasons;
};
+static bool cfg80211_6ghz_power_type_valid(const u8 *ie, size_t ielen,
+ const u32 flags)
+{
+ const struct element *tmp;
+ struct ieee80211_he_operation *he_oper;
+
+ tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen);
+ if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) {
+ const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
+
+ he_oper = (void *)&tmp->data[1];
+ he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
+
+ if (!he_6ghz_oper)
+ return false;
+
+ switch (u8_get_bits(he_6ghz_oper->control,
+ IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
+ case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
+ return true;
+ case IEEE80211_6GHZ_CTRL_REG_SP_AP:
+ return !(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT);
+ case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
+ return !(flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT);
+ }
+ }
+ return false;
+}
+
/* Returned bss is reference counted and must be cleaned up appropriately. */
static struct cfg80211_bss *
cfg80211_inform_single_bss_data(struct wiphy *wiphy,
@@ -2097,6 +2182,14 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
if (!channel)
return NULL;
+ if (channel->band == NL80211_BAND_6GHZ &&
+ !cfg80211_6ghz_power_type_valid(data->ie, data->ielen,
+ channel->flags)) {
+ data->use_for = 0;
+ data->cannot_use_reasons =
+ NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH;
+ }
+
memcpy(tmp.pub.bssid, data->bssid, ETH_ALEN);
tmp.pub.channel = channel;
if (data->bss_source != BSS_SOURCE_STA_PROFILE)
@@ -2108,6 +2201,9 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
tmp.ts_boottime = drv_data->boottime_ns;
tmp.parent_tsf = drv_data->parent_tsf;
ether_addr_copy(tmp.parent_bssid, drv_data->parent_bssid);
+ tmp.pub.chains = drv_data->chains;
+ memcpy(tmp.pub.chain_signal, drv_data->chain_signal,
+ IEEE80211_MAX_CHAINS);
tmp.pub.use_for = data->use_for;
tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
@@ -2151,6 +2247,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
switch (data->ftype) {
case CFG80211_BSS_FTYPE_BEACON:
+ case CFG80211_BSS_FTYPE_S1G_BEACON:
ies->from_beacon = true;
fallthrough;
case CFG80211_BSS_FTYPE_UNKNOWN:
@@ -2407,16 +2504,22 @@ ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies,
if (elem->id == WLAN_EID_EXTENSION) {
copied = elem->datalen - 1;
- if (copied > data_len)
- return -ENOSPC;
- memmove(data, elem->data + 1, copied);
+ if (data) {
+ if (copied > data_len)
+ return -ENOSPC;
+
+ memmove(data, elem->data + 1, copied);
+ }
} else {
copied = elem->datalen;
- if (copied > data_len)
- return -ENOSPC;
- memmove(data, elem->data, copied);
+ if (data) {
+ if (copied > data_len)
+ return -ENOSPC;
+
+ memmove(data, elem->data, copied);
+ }
}
/* Fragmented elements must have 255 bytes */
@@ -2435,10 +2538,13 @@ ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies,
elem_datalen = elem->datalen;
- if (copied + elem_datalen > data_len)
- return -ENOSPC;
+ if (data) {
+ if (copied + elem_datalen > data_len)
+ return -ENOSPC;
+
+ memmove(data + copied, elem->data, elem_datalen);
+ }
- memmove(data + copied, elem->data, elem_datalen);
copied += elem_datalen;
/* Only the last fragment may be short */
@@ -2544,77 +2650,168 @@ error:
return NULL;
}
+struct tbtt_info_iter_data {
+ const struct ieee80211_neighbor_ap_info *ap_info;
+ u8 param_ch_count;
+ u32 use_for;
+ u8 mld_id, link_id;
+};
+
+static enum cfg80211_rnr_iter_ret
+cfg802121_mld_ap_rnr_iter(void *_data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len)
+{
+ const struct ieee80211_rnr_mld_params *mld_params;
+ struct tbtt_info_iter_data *data = _data;
+ u8 link_id;
+
+ if (type == IEEE80211_TBTT_INFO_TYPE_TBTT &&
+ tbtt_info_len >= offsetofend(struct ieee80211_tbtt_info_ge_11,
+ mld_params))
+ mld_params = (void *)(tbtt_info +
+ offsetof(struct ieee80211_tbtt_info_ge_11,
+ mld_params));
+ else if (type == IEEE80211_TBTT_INFO_TYPE_MLD &&
+ tbtt_info_len >= sizeof(struct ieee80211_rnr_mld_params))
+ mld_params = (void *)tbtt_info;
+ else
+ return RNR_ITER_CONTINUE;
+
+ link_id = le16_get_bits(mld_params->params,
+ IEEE80211_RNR_MLD_PARAMS_LINK_ID);
+
+ if (data->mld_id != mld_params->mld_id)
+ return RNR_ITER_CONTINUE;
+
+ if (data->link_id != link_id)
+ return RNR_ITER_CONTINUE;
+
+ data->ap_info = info;
+ data->param_ch_count =
+ le16_get_bits(mld_params->params,
+ IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT);
+
+ if (type == IEEE80211_TBTT_INFO_TYPE_TBTT)
+ data->use_for = NL80211_BSS_USE_FOR_ALL;
+ else
+ data->use_for = NL80211_BSS_USE_FOR_MLD_LINK;
+ return RNR_ITER_BREAK;
+}
+
static u8
-cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
- const struct ieee80211_neighbor_ap_info **ap_info,
- const u8 **tbtt_info)
+cfg80211_rnr_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
+ const struct ieee80211_neighbor_ap_info **ap_info,
+ u8 *param_ch_count)
{
- const struct ieee80211_neighbor_ap_info *info;
- const struct element *rnr;
- const u8 *pos, *end;
+ struct tbtt_info_iter_data data = {
+ .mld_id = mld_id,
+ .link_id = link_id,
+ };
- for_each_element_id(rnr, WLAN_EID_REDUCED_NEIGHBOR_REPORT, ie, ielen) {
- pos = rnr->data;
- end = rnr->data + rnr->datalen;
+ cfg80211_iter_rnr(ie, ielen, cfg802121_mld_ap_rnr_iter, &data);
- /* RNR IE may contain more than one NEIGHBOR_AP_INFO */
- while (sizeof(*info) <= end - pos) {
- const struct ieee80211_rnr_mld_params *mld_params;
- u16 params;
- u8 length, i, count, mld_params_offset;
- u8 type, lid;
- u32 use_for;
+ *ap_info = data.ap_info;
+ *param_ch_count = data.param_ch_count;
- info = (void *)pos;
- count = u8_get_bits(info->tbtt_info_hdr,
- IEEE80211_AP_INFO_TBTT_HDR_COUNT) + 1;
- length = info->tbtt_info_len;
+ return data.use_for;
+}
- pos += sizeof(*info);
+static struct element *
+cfg80211_gen_reporter_rnr(struct cfg80211_bss *source_bss, bool is_mbssid,
+ bool same_mld, u8 link_id, u8 bss_change_count,
+ gfp_t gfp)
+{
+ const struct cfg80211_bss_ies *ies;
+ struct ieee80211_neighbor_ap_info ap_info;
+ struct ieee80211_tbtt_info_ge_11 tbtt_info;
+ u32 short_ssid;
+ const struct element *elem;
+ struct element *res;
- if (count * length > end - pos)
- return 0;
+ /*
+ * We only generate the RNR to permit ML lookups. For that we do not
+ * need an entry for the corresponding transmitting BSS, lets just skip
+ * it even though it would be easy to add.
+ */
+ if (!same_mld)
+ return NULL;
- type = u8_get_bits(info->tbtt_info_hdr,
- IEEE80211_AP_INFO_TBTT_HDR_TYPE);
+ /* We could use tx_data->ies if we change cfg80211_calc_short_ssid */
+ rcu_read_lock();
+ ies = rcu_dereference(source_bss->ies);
- if (type == IEEE80211_TBTT_INFO_TYPE_TBTT &&
- length >=
- offsetofend(struct ieee80211_tbtt_info_ge_11,
- mld_params)) {
- mld_params_offset =
- offsetof(struct ieee80211_tbtt_info_ge_11, mld_params);
- use_for = NL80211_BSS_USE_FOR_ALL;
- } else if (type == IEEE80211_TBTT_INFO_TYPE_MLD &&
- length >= sizeof(struct ieee80211_rnr_mld_params)) {
- mld_params_offset = 0;
- use_for = NL80211_BSS_USE_FOR_MLD_LINK;
- } else {
- pos += count * length;
- continue;
- }
+ ap_info.tbtt_info_len = offsetofend(typeof(tbtt_info), mld_params);
+ ap_info.tbtt_info_hdr =
+ u8_encode_bits(IEEE80211_TBTT_INFO_TYPE_TBTT,
+ IEEE80211_AP_INFO_TBTT_HDR_TYPE) |
+ u8_encode_bits(0, IEEE80211_AP_INFO_TBTT_HDR_COUNT);
- for (i = 0; i < count; i++) {
- mld_params = (void *)pos + mld_params_offset;
- params = le16_to_cpu(mld_params->params);
+ ap_info.channel = ieee80211_frequency_to_channel(source_bss->channel->center_freq);
- lid = u16_get_bits(params,
- IEEE80211_RNR_MLD_PARAMS_LINK_ID);
+ /* operating class */
+ elem = cfg80211_find_elem(WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+ ies->data, ies->len);
+ if (elem && elem->datalen >= 1) {
+ ap_info.op_class = elem->data[0];
+ } else {
+ struct cfg80211_chan_def chandef;
- if (mld_id == mld_params->mld_id &&
- link_id == lid) {
- *ap_info = info;
- *tbtt_info = pos;
+ /* The AP is not providing us with anything to work with. So
+ * make up a somewhat reasonable operating class, but don't
+ * bother with it too much as no one will ever use the
+ * information.
+ */
+ cfg80211_chandef_create(&chandef, source_bss->channel,
+ NL80211_CHAN_NO_HT);
- return use_for;
- }
+ if (!ieee80211_chandef_to_operating_class(&chandef,
+ &ap_info.op_class))
+ goto out_unlock;
+ }
- pos += length;
- }
- }
+ /* Just set TBTT offset and PSD 20 to invalid/unknown */
+ tbtt_info.tbtt_offset = 255;
+ tbtt_info.psd_20 = IEEE80211_RNR_TBTT_PARAMS_PSD_RESERVED;
+
+ memcpy(tbtt_info.bssid, source_bss->bssid, ETH_ALEN);
+ if (cfg80211_calc_short_ssid(ies, &elem, &short_ssid))
+ goto out_unlock;
+
+ rcu_read_unlock();
+
+ tbtt_info.short_ssid = cpu_to_le32(short_ssid);
+
+ tbtt_info.bss_params = IEEE80211_RNR_TBTT_PARAMS_SAME_SSID;
+
+ if (is_mbssid) {
+ tbtt_info.bss_params |= IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID;
+ tbtt_info.bss_params |= IEEE80211_RNR_TBTT_PARAMS_TRANSMITTED_BSSID;
}
- return 0;
+ tbtt_info.mld_params.mld_id = 0;
+ tbtt_info.mld_params.params =
+ le16_encode_bits(link_id, IEEE80211_RNR_MLD_PARAMS_LINK_ID) |
+ le16_encode_bits(bss_change_count,
+ IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT);
+
+ res = kzalloc(struct_size(res, data,
+ sizeof(ap_info) + ap_info.tbtt_info_len),
+ gfp);
+ if (!res)
+ return NULL;
+
+ /* Copy the data */
+ res->id = WLAN_EID_REDUCED_NEIGHBOR_REPORT;
+ res->datalen = sizeof(ap_info) + ap_info.tbtt_info_len;
+ memcpy(res->data, &ap_info, sizeof(ap_info));
+ memcpy(res->data + sizeof(ap_info), &tbtt_info, ap_info.tbtt_info_len);
+
+ return res;
+
+out_unlock:
+ rcu_read_unlock();
+ return NULL;
}
static void
@@ -2630,25 +2827,25 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
.source_bss = source_bss,
.bss_source = BSS_SOURCE_STA_PROFILE,
};
+ struct element *reporter_rnr = NULL;
struct ieee80211_multi_link_elem *ml_elem;
struct cfg80211_mle *mle;
u16 control;
u8 ml_common_len;
- u8 *new_ie;
+ u8 *new_ie = NULL;
struct cfg80211_bss *bss;
- int mld_id;
+ u8 mld_id, reporter_link_id, bss_change_count;
u16 seen_links = 0;
- const u8 *pos;
u8 i;
- if (!ieee80211_mle_size_ok(elem->data + 1, elem->datalen - 1))
+ if (!ieee80211_mle_type_ok(elem->data + 1,
+ IEEE80211_ML_CONTROL_TYPE_BASIC,
+ elem->datalen - 1))
return;
- ml_elem = (void *)elem->data + 1;
+ ml_elem = (void *)(elem->data + 1);
control = le16_to_cpu(ml_elem->control);
- if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) !=
- IEEE80211_ML_CONTROL_TYPE_BASIC)
- return;
+ ml_common_len = ml_elem->variable[0];
/* Must be present when transmitted by an AP (in a probe response) */
if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) ||
@@ -2656,18 +2853,8 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP))
return;
- ml_common_len = ml_elem->variable[0];
-
- /* length + MLD MAC address + link ID info + BSS Params Change Count */
- pos = ml_elem->variable + 1 + 6 + 1 + 1;
-
- if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY))
- pos += 2;
- if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_EML_CAPA))
- pos += 2;
-
- /* MLD capabilities and operations */
- pos += 2;
+ reporter_link_id = ieee80211_mle_get_link_id(elem->data + 1);
+ bss_change_count = ieee80211_mle_get_bss_param_ch_cnt(elem->data + 1);
/*
* The MLD ID of the reporting AP is always zero. It is set if the AP
@@ -2675,32 +2862,35 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
* relating to a nontransmitted BSS (matching the Multi-BSSID Index,
* Draft P802.11be_D3.2, 35.3.4.2)
*/
- if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MLD_ID)) {
- mld_id = *pos;
- pos += 1;
- } else {
- mld_id = 0;
- }
-
- /* Extended MLD capabilities and operations */
- pos += 2;
+ mld_id = ieee80211_mle_get_mld_id(elem->data + 1);
/* Fully defrag the ML element for sta information/profile iteration */
mle = cfg80211_defrag_mle(elem, tx_data->ie, tx_data->ielen, gfp);
if (!mle)
return;
+ /* No point in doing anything if there is no per-STA profile */
+ if (!mle->sta_prof[0])
+ goto out;
+
new_ie = kmalloc(IEEE80211_MAX_DATA_LEN, gfp);
if (!new_ie)
goto out;
+ reporter_rnr = cfg80211_gen_reporter_rnr(source_bss,
+ u16_get_bits(control,
+ IEEE80211_MLC_BASIC_PRES_MLD_ID),
+ mld_id == 0, reporter_link_id,
+ bss_change_count,
+ gfp);
+
for (i = 0; i < ARRAY_SIZE(mle->sta_prof) && mle->sta_prof[i]; i++) {
const struct ieee80211_neighbor_ap_info *ap_info;
enum nl80211_band band;
u32 freq;
const u8 *profile;
- const u8 *tbtt_info;
ssize_t profile_len;
+ u8 param_ch_count;
u8 link_id, use_for;
if (!ieee80211_mle_basic_sta_prof_size_ok((u8 *)mle->sta_prof[i],
@@ -2743,10 +2933,11 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
profile_len -= 2;
/* Find in RNR to look up channel information */
- use_for = cfg80211_tbtt_info_for_mld_ap(tx_data->ie,
- tx_data->ielen,
- mld_id, link_id,
- &ap_info, &tbtt_info);
+ use_for = cfg80211_rnr_info_for_mld_ap(tx_data->ie,
+ tx_data->ielen,
+ mld_id, link_id,
+ &ap_info,
+ &param_ch_count);
if (!use_for)
continue;
@@ -2789,7 +2980,8 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
continue;
/* Copy the Basic Multi-Link element including the common
- * information, and then fix up the link ID.
+ * information, and then fix up the link ID and BSS param
+ * change count.
* Note that the ML element length has been verified and we
* also checked that it contains the link ID.
*/
@@ -2800,10 +2992,21 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
sizeof(*ml_elem) + ml_common_len);
new_ie[data.ielen + sizeof(*ml_elem) + 1 + ETH_ALEN] = link_id;
+ new_ie[data.ielen + sizeof(*ml_elem) + 1 + ETH_ALEN + 1] =
+ param_ch_count;
data.ielen += sizeof(*ml_elem) + ml_common_len;
- /* TODO: Add an RNR containing only the reporting AP */
+ if (reporter_rnr && (use_for & NL80211_BSS_USE_FOR_NORMAL)) {
+ if (data.ielen + sizeof(struct element) +
+ reporter_rnr->datalen > IEEE80211_MAX_DATA_LEN)
+ continue;
+
+ memcpy(new_ie + data.ielen, reporter_rnr,
+ sizeof(struct element) + reporter_rnr->datalen);
+ data.ielen += sizeof(struct element) +
+ reporter_rnr->datalen;
+ }
bss = cfg80211_inform_single_bss_data(wiphy, &data, gfp);
if (!bss)
@@ -2812,6 +3015,7 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
}
out:
+ kfree(reporter_rnr);
kfree(new_ie);
kfree(mle);
}
@@ -2864,6 +3068,10 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
if (!res)
return NULL;
+ /* don't do any further MBSSID/ML handling for S1G */
+ if (ftype == CFG80211_BSS_FTYPE_S1G_BEACON)
+ return res;
+
cfg80211_parse_mbssid_data(wiphy, &inform_data, res, gfp);
cfg80211_parse_ml_sta_data(wiphy, &inform_data, res, gfp);
@@ -2872,59 +3080,22 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
}
EXPORT_SYMBOL(cfg80211_inform_bss_data);
-static bool cfg80211_uhb_power_type_valid(const u8 *ie,
- size_t ielen,
- const u32 flags)
-{
- const struct element *tmp;
- struct ieee80211_he_operation *he_oper;
-
- tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen);
- if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) {
- const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
-
- he_oper = (void *)&tmp->data[1];
- he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
-
- if (!he_6ghz_oper)
- return false;
-
- switch (u8_get_bits(he_6ghz_oper->control,
- IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
- case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
- return true;
- case IEEE80211_6GHZ_CTRL_REG_SP_AP:
- return !(flags & IEEE80211_CHAN_NO_UHB_AFC_CLIENT);
- case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
- return !(flags & IEEE80211_CHAN_NO_UHB_VLP_CLIENT);
- }
- }
- return false;
-}
-
-/* cfg80211_inform_bss_width_frame helper */
-static struct cfg80211_bss *
-cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
- struct cfg80211_inform_bss *data,
- struct ieee80211_mgmt *mgmt, size_t len,
- gfp_t gfp)
+struct cfg80211_bss *
+cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
+ struct cfg80211_inform_bss *data,
+ struct ieee80211_mgmt *mgmt, size_t len,
+ gfp_t gfp)
{
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- struct cfg80211_internal_bss tmp = {}, *res;
- struct cfg80211_bss_ies *ies;
- struct ieee80211_channel *channel;
- bool signal_valid;
+ size_t min_hdr_len = offsetof(struct ieee80211_mgmt,
+ u.probe_resp.variable);
struct ieee80211_ext *ext = NULL;
- u8 *bssid, *variable;
- u16 capability, beacon_int;
- size_t ielen, min_hdr_len = offsetof(struct ieee80211_mgmt,
- u.probe_resp.variable);
- int bss_type;
-
- BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
- offsetof(struct ieee80211_mgmt, u.beacon.variable));
-
- trace_cfg80211_inform_bss_frame(wiphy, data, mgmt, len);
+ enum cfg80211_bss_frame_type ftype;
+ u16 beacon_interval;
+ const u8 *bssid;
+ u16 capability;
+ const u8 *ie;
+ size_t ielen;
+ u64 tsf;
if (WARN_ON(!mgmt))
return NULL;
@@ -2932,9 +3103,10 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
if (WARN_ON(!wiphy))
return NULL;
- if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
- (data->signal < 0 || data->signal > 100)))
- return NULL;
+ BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
+ offsetof(struct ieee80211_mgmt, u.beacon.variable));
+
+ trace_cfg80211_inform_bss_frame(wiphy, data, mgmt, len);
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
ext = (void *) mgmt;
@@ -2948,32 +3120,17 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
return NULL;
ielen = len - min_hdr_len;
- variable = mgmt->u.probe_resp.variable;
- if (ext) {
- if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
- variable = ext->u.s1g_short_beacon.variable;
- else
- variable = ext->u.s1g_beacon.variable;
- }
-
- channel = cfg80211_get_bss_channel(wiphy, variable, ielen, data->chan);
- if (!channel)
- return NULL;
-
- if (channel->band == NL80211_BAND_6GHZ &&
- !cfg80211_uhb_power_type_valid(variable, ielen, channel->flags)) {
- data->restrict_use = 1;
- data->use_for = 0;
- data->cannot_use_reasons =
- NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH;
- }
-
+ ie = mgmt->u.probe_resp.variable;
if (ext) {
const struct ieee80211_s1g_bcn_compat_ie *compat;
const struct element *elem;
- elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT,
- variable, ielen);
+ if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
+ ie = ext->u.s1g_short_beacon.variable;
+ else
+ ie = ext->u.s1g_beacon.variable;
+
+ elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT, ie, ielen);
if (!elem)
return NULL;
if (elem->datalen < sizeof(*compat))
@@ -2981,112 +3138,26 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
compat = (void *)elem->data;
bssid = ext->u.s1g_beacon.sa;
capability = le16_to_cpu(compat->compat_info);
- beacon_int = le16_to_cpu(compat->beacon_int);
+ beacon_interval = le16_to_cpu(compat->beacon_int);
} else {
bssid = mgmt->bssid;
- beacon_int = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
+ beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
}
- if (channel->band == NL80211_BAND_60GHZ) {
- bss_type = capability & WLAN_CAPABILITY_DMG_TYPE_MASK;
- if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP ||
- bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS)
- regulatory_hint_found_beacon(wiphy, channel, gfp);
- } else {
- if (capability & WLAN_CAPABILITY_ESS)
- regulatory_hint_found_beacon(wiphy, channel, gfp);
- }
-
- ies = kzalloc(sizeof(*ies) + ielen, gfp);
- if (!ies)
- return NULL;
- ies->len = ielen;
- ies->tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
- ies->from_beacon = ieee80211_is_beacon(mgmt->frame_control) ||
- ieee80211_is_s1g_beacon(mgmt->frame_control);
- memcpy(ies->data, variable, ielen);
+ tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
if (ieee80211_is_probe_resp(mgmt->frame_control))
- rcu_assign_pointer(tmp.pub.proberesp_ies, ies);
+ ftype = CFG80211_BSS_FTYPE_PRESP;
+ else if (ext)
+ ftype = CFG80211_BSS_FTYPE_S1G_BEACON;
else
- rcu_assign_pointer(tmp.pub.beacon_ies, ies);
- rcu_assign_pointer(tmp.pub.ies, ies);
-
- memcpy(tmp.pub.bssid, bssid, ETH_ALEN);
- tmp.pub.beacon_interval = beacon_int;
- tmp.pub.capability = capability;
- tmp.pub.channel = channel;
- tmp.pub.signal = data->signal;
- tmp.ts_boottime = data->boottime_ns;
- tmp.parent_tsf = data->parent_tsf;
- tmp.pub.chains = data->chains;
- memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS);
- ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
- tmp.pub.use_for = data->restrict_use ?
- data->use_for :
- NL80211_BSS_USE_FOR_ALL;
- tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
-
- signal_valid = data->chan == channel;
- spin_lock_bh(&rdev->bss_lock);
- res = __cfg80211_bss_update(rdev, &tmp, signal_valid, jiffies);
- if (!res)
- goto drop;
-
- rdev_inform_bss(rdev, &res->pub, ies, data->drv_data);
+ ftype = CFG80211_BSS_FTYPE_BEACON;
- spin_unlock_bh(&rdev->bss_lock);
-
- trace_cfg80211_return_bss(&res->pub);
- /* __cfg80211_bss_update gives us a referenced result */
- return &res->pub;
-
-drop:
- spin_unlock_bh(&rdev->bss_lock);
- return NULL;
-}
-
-struct cfg80211_bss *
-cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
- struct cfg80211_inform_bss *data,
- struct ieee80211_mgmt *mgmt, size_t len,
- gfp_t gfp)
-{
- struct cfg80211_inform_single_bss_data inform_data = {
- .drv_data = data,
- .ie = mgmt->u.probe_resp.variable,
- .ielen = len - offsetof(struct ieee80211_mgmt,
- u.probe_resp.variable),
- .use_for = data->restrict_use ?
- data->use_for :
- NL80211_BSS_USE_FOR_ALL,
- .cannot_use_reasons = data->cannot_use_reasons,
- };
- struct cfg80211_bss *res;
-
- res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt,
- len, gfp);
- if (!res)
- return NULL;
-
- /* don't do any further MBSSID/ML handling for S1G */
- if (ieee80211_is_s1g_beacon(mgmt->frame_control))
- return res;
-
- inform_data.ftype = ieee80211_is_beacon(mgmt->frame_control) ?
- CFG80211_BSS_FTYPE_BEACON : CFG80211_BSS_FTYPE_PRESP;
- memcpy(inform_data.bssid, mgmt->bssid, ETH_ALEN);
- inform_data.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
- inform_data.beacon_interval =
- le16_to_cpu(mgmt->u.probe_resp.beacon_int);
-
- /* process each non-transmitting bss */
- cfg80211_parse_mbssid_data(wiphy, &inform_data, res, gfp);
-
- cfg80211_parse_ml_sta_data(wiphy, &inform_data, res, gfp);
-
- return res;
+ return cfg80211_inform_bss_data(wiphy, data, ftype,
+ bssid, tsf, capability,
+ beacon_interval, ie, ielen,
+ gfp);
}
EXPORT_SYMBOL(cfg80211_inform_bss_frame_data);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 195c8532734b..82e3ce42206c 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -209,7 +209,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev,
if (!req.bss) {
err = -ENOENT;
} else {
- err = cfg80211_mlme_assoc(rdev, wdev->netdev, &req);
+ err = cfg80211_mlme_assoc(rdev, wdev->netdev,
+ &req, NULL);
cfg80211_put_bss(&rdev->wiphy, req.bss);
}
diff --git a/net/wireless/tests/Makefile b/net/wireless/tests/Makefile
index 1f6622fcb758..c364e63b508e 100644
--- a/net/wireless/tests/Makefile
+++ b/net/wireless/tests/Makefile
@@ -1,3 +1,3 @@
-cfg80211-tests-y += module.o fragmentation.o scan.o util.o
+cfg80211-tests-y += module.o fragmentation.o scan.o util.o chan.o
obj-$(CONFIG_CFG80211_KUNIT_TEST) += cfg80211-tests.o
diff --git a/net/wireless/tests/chan.c b/net/wireless/tests/chan.c
new file mode 100644
index 000000000000..d02258ac2dab
--- /dev/null
+++ b/net/wireless/tests/chan.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit tests for channel helper functions
+ *
+ * Copyright (C) 2023-2024 Intel Corporation
+ */
+#include <net/cfg80211.h>
+#include <kunit/test.h>
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+static struct ieee80211_channel chan_6ghz_1 = {
+ .band = NL80211_BAND_6GHZ,
+ .center_freq = 5955,
+};
+
+static struct ieee80211_channel chan_6ghz_5 = {
+ .band = NL80211_BAND_6GHZ,
+ .center_freq = 5975,
+};
+
+static struct ieee80211_channel chan_6ghz_105 = {
+ .band = NL80211_BAND_6GHZ,
+ .center_freq = 6475,
+};
+
+static const struct chandef_compat_case {
+ const char *desc;
+ /* leave c1 empty for tests for identical */
+ struct cfg80211_chan_def c1, c2;
+ /* we test both ways around, so c2 should always be the compat one */
+ bool compat;
+} chandef_compat_cases[] = {
+ {
+ .desc = "identical non-HT",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_20_NOHT,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "identical 20 MHz",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_20,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "identical 40 MHz",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_40,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955 + 10,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "identical 80 MHz",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_80,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955 + 10 + 20,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "identical 160 MHz",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_160,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955 + 10 + 20 + 40,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "identical 320 MHz",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955 + 10 + 20 + 40 + 80,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "20 MHz in 320 MHz\n",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_20,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955 + 10 + 20 + 40 + 80,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "different 20 MHz",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_20,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_20,
+ .chan = &chan_6ghz_5,
+ .center_freq1 = 5975,
+ },
+ },
+ {
+ .desc = "different primary 160 MHz",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 + 150,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 - 10,
+ },
+ },
+ {
+ /* similar to previous test but one has lower BW */
+ .desc = "matching primary 160 MHz",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_160,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 + 70,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 - 10,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "matching primary 160 MHz & punctured secondary 160 Mhz",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_160,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 + 70,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 - 10,
+ .punctured = 0xf,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "matching primary 160 MHz & punctured matching",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_160,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 + 70,
+ .punctured = 0xc0,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 - 10,
+ .punctured = 0xc000,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "matching primary 160 MHz & punctured not matching",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_160,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 + 70,
+ .punctured = 0x80,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 - 10,
+ .punctured = 0xc000,
+ },
+ },
+};
+
+KUNIT_ARRAY_PARAM_DESC(chandef_compat, chandef_compat_cases, desc)
+
+static void test_chandef_compat(struct kunit *test)
+{
+ const struct chandef_compat_case *params = test->param_value;
+ const struct cfg80211_chan_def *ret, *expect;
+ struct cfg80211_chan_def c1 = params->c1;
+
+ /* tests with identical ones */
+ if (!params->c1.chan)
+ c1 = params->c2;
+
+ KUNIT_EXPECT_EQ(test, cfg80211_chandef_valid(&c1), true);
+ KUNIT_EXPECT_EQ(test, cfg80211_chandef_valid(&params->c2), true);
+
+ expect = params->compat ? &params->c2 : NULL;
+
+ ret = cfg80211_chandef_compatible(&c1, &params->c2);
+ KUNIT_EXPECT_PTR_EQ(test, ret, expect);
+
+ if (!params->c1.chan)
+ expect = &c1;
+
+ ret = cfg80211_chandef_compatible(&params->c2, &c1);
+ KUNIT_EXPECT_PTR_EQ(test, ret, expect);
+}
+
+static struct kunit_case chandef_compat_test_cases[] = {
+ KUNIT_CASE_PARAM(test_chandef_compat, chandef_compat_gen_params),
+ {}
+};
+
+static struct kunit_suite chandef_compat = {
+ .name = "cfg80211-chandef-compat",
+ .test_cases = chandef_compat_test_cases,
+};
+
+kunit_test_suite(chandef_compat);
diff --git a/net/wireless/tests/fragmentation.c b/net/wireless/tests/fragmentation.c
index 49a339ca8880..411fae18cd88 100644
--- a/net/wireless/tests/fragmentation.c
+++ b/net/wireless/tests/fragmentation.c
@@ -2,7 +2,7 @@
/*
* KUnit tests for element fragmentation
*
- * Copyright (C) 2023 Intel Corporation
+ * Copyright (C) 2023-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
#include <net/cfg80211.h>
@@ -27,7 +27,12 @@ static void defragment_0(struct kunit *test)
ret = cfg80211_defragment_element((void *)input,
input, sizeof(input),
- data, sizeof(input),
+ NULL, 0,
+ WLAN_EID_FRAGMENT);
+ KUNIT_EXPECT_EQ(test, ret, 253);
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, ret,
WLAN_EID_FRAGMENT);
KUNIT_EXPECT_EQ(test, ret, 253);
KUNIT_EXPECT_MEMEQ(test, data, input + 3, 253);
@@ -63,7 +68,12 @@ static void defragment_1(struct kunit *test)
ret = cfg80211_defragment_element((void *)input,
input, sizeof(input),
- data, sizeof(input),
+ NULL, 0,
+ WLAN_EID_FRAGMENT);
+ KUNIT_EXPECT_EQ(test, ret, 254 + 7);
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, ret,
WLAN_EID_FRAGMENT);
/* this means the last fragment was not used */
KUNIT_EXPECT_EQ(test, ret, 254 + 7);
@@ -106,10 +116,15 @@ static void defragment_2(struct kunit *test)
ret = cfg80211_defragment_element((void *)input,
input, sizeof(input),
- data, sizeof(input),
+ NULL, 0,
WLAN_EID_FRAGMENT);
/* this means the last fragment was not used */
KUNIT_EXPECT_EQ(test, ret, 254 + 255 + 1);
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, ret,
+ WLAN_EID_FRAGMENT);
+ KUNIT_EXPECT_EQ(test, ret, 254 + 255 + 1);
KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254);
KUNIT_EXPECT_MEMEQ(test, data + 254, input + 257 + 2, 255);
KUNIT_EXPECT_MEMEQ(test, data + 254 + 255, input + 2 * 257 + 2, 1);
@@ -134,7 +149,12 @@ static void defragment_at_end(struct kunit *test)
ret = cfg80211_defragment_element((void *)input,
input, sizeof(input),
- data, sizeof(input),
+ NULL, 0,
+ WLAN_EID_FRAGMENT);
+ KUNIT_EXPECT_EQ(test, ret, 254 + 7);
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, ret,
WLAN_EID_FRAGMENT);
KUNIT_EXPECT_EQ(test, ret, 254 + 7);
KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254);
diff --git a/net/wireless/tests/scan.c b/net/wireless/tests/scan.c
index 77854161cd22..9f458be71659 100644
--- a/net/wireless/tests/scan.c
+++ b/net/wireless/tests/scan.c
@@ -2,7 +2,7 @@
/*
* KUnit tests for inform_bss functions
*
- * Copyright (C) 2023 Intel Corporation
+ * Copyright (C) 2023-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
#include <net/cfg80211.h>
@@ -406,9 +406,32 @@ static struct inform_bss_ml_sta_case {
const char *desc;
int mld_id;
bool sta_prof_vendor_elems;
+ bool include_oper_class;
+ bool nstr;
} inform_bss_ml_sta_cases[] = {
- { .desc = "no_mld_id", .mld_id = 0, .sta_prof_vendor_elems = false },
- { .desc = "mld_id_eq_1", .mld_id = 1, .sta_prof_vendor_elems = true },
+ {
+ .desc = "zero_mld_id",
+ .mld_id = 0,
+ .sta_prof_vendor_elems = false,
+ }, {
+ .desc = "zero_mld_id_with_oper_class",
+ .mld_id = 0,
+ .sta_prof_vendor_elems = false,
+ .include_oper_class = true,
+ }, {
+ .desc = "mld_id_eq_1",
+ .mld_id = 1,
+ .sta_prof_vendor_elems = true,
+ }, {
+ .desc = "mld_id_eq_1_with_oper_class",
+ .mld_id = 1,
+ .sta_prof_vendor_elems = true,
+ .include_oper_class = true,
+ }, {
+ .desc = "nstr",
+ .mld_id = 0,
+ .nstr = true,
+ },
};
KUNIT_ARRAY_PARAM_DESC(inform_bss_ml_sta, inform_bss_ml_sta_cases, desc)
@@ -440,7 +463,7 @@ static void test_inform_bss_ml_sta(struct kunit *test)
struct {
struct ieee80211_neighbor_ap_info info;
struct ieee80211_tbtt_info_ge_11 ap;
- } __packed rnr = {
+ } __packed rnr_normal = {
.info = {
.tbtt_info_hdr = u8_encode_bits(0, IEEE80211_AP_INFO_TBTT_HDR_COUNT),
.tbtt_info_len = sizeof(struct ieee80211_tbtt_info_ge_11),
@@ -460,6 +483,28 @@ static void test_inform_bss_ml_sta(struct kunit *test)
}
};
struct {
+ struct ieee80211_neighbor_ap_info info;
+ struct ieee80211_rnr_mld_params mld_params;
+ } __packed rnr_nstr = {
+ .info = {
+ .tbtt_info_hdr =
+ u8_encode_bits(0, IEEE80211_AP_INFO_TBTT_HDR_COUNT) |
+ u8_encode_bits(IEEE80211_TBTT_INFO_TYPE_MLD,
+ IEEE80211_AP_INFO_TBTT_HDR_TYPE),
+ .tbtt_info_len = sizeof(struct ieee80211_rnr_mld_params),
+ .op_class = 81,
+ .channel = 11,
+ },
+ .mld_params = {
+ .mld_id = params->mld_id,
+ .params =
+ le16_encode_bits(link_id,
+ IEEE80211_RNR_MLD_PARAMS_LINK_ID),
+ }
+ };
+ size_t rnr_len = params->nstr ? sizeof(rnr_nstr) : sizeof(rnr_normal);
+ void *rnr = params->nstr ? (void *)&rnr_nstr : (void *)&rnr_normal;
+ struct {
__le16 control;
u8 var_len;
u8 mld_mac_addr[ETH_ALEN];
@@ -498,7 +543,7 @@ static void test_inform_bss_ml_sta(struct kunit *test)
u16_encode_bits(link_id,
IEEE80211_MLE_STA_CONTROL_LINK_ID)),
.var_len = sizeof(sta_prof) - 2 - 2,
- .bssid = { *rnr.ap.bssid },
+ .bssid = { *rnr_normal.ap.bssid },
.beacon_int = cpu_to_le16(101),
.tsf_offset = cpu_to_le64(-123ll),
.capabilities = cpu_to_le16(0xdead),
@@ -515,9 +560,15 @@ static void test_inform_bss_ml_sta(struct kunit *test)
skb_put_u8(input, 4);
skb_put_data(input, "TEST", 4);
+ if (params->include_oper_class) {
+ skb_put_u8(input, WLAN_EID_SUPPORTED_REGULATORY_CLASSES);
+ skb_put_u8(input, 1);
+ skb_put_u8(input, 81);
+ }
+
skb_put_u8(input, WLAN_EID_REDUCED_NEIGHBOR_REPORT);
- skb_put_u8(input, sizeof(rnr));
- skb_put_data(input, &rnr, sizeof(rnr));
+ skb_put_u8(input, rnr_len);
+ skb_put_data(input, rnr, rnr_len);
/* build a multi-link element */
skb_put_u8(input, WLAN_EID_EXTENSION);
@@ -563,9 +614,10 @@ static void test_inform_bss_ml_sta(struct kunit *test)
KUNIT_EXPECT_EQ(test, ctx.inform_bss_count, 2);
/* Check link_bss *****************************************************/
- link_bss = cfg80211_get_bss(wiphy, NULL, sta_prof.bssid, NULL, 0,
- IEEE80211_BSS_TYPE_ANY,
- IEEE80211_PRIVACY_ANY);
+ link_bss = __cfg80211_get_bss(wiphy, NULL, sta_prof.bssid, NULL, 0,
+ IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY,
+ 0);
KUNIT_ASSERT_NOT_NULL(test, link_bss);
KUNIT_EXPECT_EQ(test, link_bss->signal, 0);
KUNIT_EXPECT_EQ(test, link_bss->beacon_interval,
@@ -576,21 +628,43 @@ static void test_inform_bss_ml_sta(struct kunit *test)
KUNIT_EXPECT_PTR_EQ(test, link_bss->channel,
ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(2462)));
+ /* Test wiphy does not set WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY */
+ if (params->nstr) {
+ KUNIT_EXPECT_EQ(test, link_bss->use_for, 0);
+ KUNIT_EXPECT_EQ(test, link_bss->cannot_use_reasons,
+ NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY);
+ KUNIT_EXPECT_NULL(test,
+ cfg80211_get_bss(wiphy, NULL, sta_prof.bssid,
+ NULL, 0,
+ IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY));
+ } else {
+ KUNIT_EXPECT_EQ(test, link_bss->use_for,
+ NL80211_BSS_USE_FOR_ALL);
+ KUNIT_EXPECT_EQ(test, link_bss->cannot_use_reasons, 0);
+ }
+
rcu_read_lock();
ies = rcu_dereference(link_bss->ies);
KUNIT_EXPECT_NOT_NULL(test, ies);
KUNIT_EXPECT_EQ(test, ies->tsf, tsf + le64_to_cpu(sta_prof.tsf_offset));
/* Resulting length should be:
* SSID (inherited) + RNR (inherited) + vendor element(s) +
+ * operating class (if requested) +
+ * generated RNR (if MLD ID == 0 and not NSTR) +
* MLE common info + MLE header and control
*/
if (params->sta_prof_vendor_elems)
KUNIT_EXPECT_EQ(test, ies->len,
- 6 + 2 + sizeof(rnr) + 2 + 160 + 2 + 165 +
+ 6 + 2 + rnr_len + 2 + 160 + 2 + 165 +
+ (params->include_oper_class ? 3 : 0) +
+ (!params->mld_id && !params->nstr ? 22 : 0) +
mle_basic_common_info.var_len + 5);
else
KUNIT_EXPECT_EQ(test, ies->len,
- 6 + 2 + sizeof(rnr) + 2 + 155 +
+ 6 + 2 + rnr_len + 2 + 155 +
+ (params->include_oper_class ? 3 : 0) +
+ (!params->mld_id && !params->nstr ? 22 : 0) +
mle_basic_common_info.var_len + 5);
rcu_read_unlock();
@@ -598,6 +672,172 @@ static void test_inform_bss_ml_sta(struct kunit *test)
cfg80211_put_bss(wiphy, link_bss);
}
+static struct cfg80211_parse_colocated_ap_case {
+ const char *desc;
+ u8 op_class;
+ u8 channel;
+ struct ieee80211_neighbor_ap_info info;
+ union {
+ struct ieee80211_tbtt_info_ge_11 tbtt_long;
+ struct ieee80211_tbtt_info_7_8_9 tbtt_short;
+ };
+ bool add_junk;
+ bool same_ssid;
+ bool valid;
+} cfg80211_parse_colocated_ap_cases[] = {
+ {
+ .desc = "wrong_band",
+ .info.op_class = 81,
+ .info.channel = 11,
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ },
+ .valid = false,
+ },
+ {
+ .desc = "wrong_type",
+ /* IEEE80211_AP_INFO_TBTT_HDR_TYPE is in the least significant bits */
+ .info.tbtt_info_hdr = IEEE80211_TBTT_INFO_TYPE_MLD,
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ },
+ .valid = false,
+ },
+ {
+ .desc = "colocated_invalid_len_short",
+ .info.tbtt_info_len = 6,
+ .tbtt_short = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP |
+ IEEE80211_RNR_TBTT_PARAMS_SAME_SSID,
+ },
+ .valid = false,
+ },
+ {
+ .desc = "colocated_invalid_len_short_mld",
+ .info.tbtt_info_len = 10,
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ },
+ .valid = false,
+ },
+ {
+ .desc = "colocated_non_mld",
+ .info.tbtt_info_len = sizeof(struct ieee80211_tbtt_info_7_8_9),
+ .tbtt_short = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP |
+ IEEE80211_RNR_TBTT_PARAMS_SAME_SSID,
+ },
+ .same_ssid = true,
+ .valid = true,
+ },
+ {
+ .desc = "colocated_non_mld_invalid_bssid",
+ .info.tbtt_info_len = sizeof(struct ieee80211_tbtt_info_7_8_9),
+ .tbtt_short = {
+ .bssid = { 0xff, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP |
+ IEEE80211_RNR_TBTT_PARAMS_SAME_SSID,
+ },
+ .same_ssid = true,
+ .valid = false,
+ },
+ {
+ .desc = "colocated_mld",
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ },
+ .valid = true,
+ },
+ {
+ .desc = "colocated_mld",
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ },
+ .add_junk = true,
+ .valid = false,
+ },
+ {
+ .desc = "colocated_disabled_mld",
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ .mld_params.params = cpu_to_le16(IEEE80211_RNR_MLD_PARAMS_DISABLED_LINK),
+ },
+ .valid = false,
+ },
+};
+KUNIT_ARRAY_PARAM_DESC(cfg80211_parse_colocated_ap, cfg80211_parse_colocated_ap_cases, desc)
+
+static void test_cfg80211_parse_colocated_ap(struct kunit *test)
+{
+ const struct cfg80211_parse_colocated_ap_case *params = test->param_value;
+ struct sk_buff *input = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ struct cfg80211_bss_ies *ies;
+ struct ieee80211_neighbor_ap_info info;
+ LIST_HEAD(coloc_ap_list);
+ int count;
+
+ KUNIT_ASSERT_NOT_NULL(test, input);
+
+ info = params->info;
+
+ /* Reasonable values for a colocated AP */
+ if (!info.tbtt_info_len)
+ info.tbtt_info_len = sizeof(params->tbtt_long);
+ if (!info.op_class)
+ info.op_class = 131;
+ if (!info.channel)
+ info.channel = 33;
+ /* Zero is the correct default for .btt_info_hdr (one entry, TBTT type) */
+
+ skb_put_u8(input, WLAN_EID_SSID);
+ skb_put_u8(input, 4);
+ skb_put_data(input, "TEST", 4);
+
+ skb_put_u8(input, WLAN_EID_REDUCED_NEIGHBOR_REPORT);
+ skb_put_u8(input, sizeof(info) + info.tbtt_info_len + (params->add_junk ? 3 : 0));
+ skb_put_data(input, &info, sizeof(info));
+ skb_put_data(input, &params->tbtt_long, info.tbtt_info_len);
+
+ if (params->add_junk)
+ skb_put_data(input, "123", 3);
+
+ ies = kunit_kzalloc(test, struct_size(ies, data, input->len), GFP_KERNEL);
+ ies->len = input->len;
+ memcpy(ies->data, input->data, input->len);
+
+ count = cfg80211_parse_colocated_ap(ies, &coloc_ap_list);
+
+ KUNIT_EXPECT_EQ(test, count, params->valid);
+ KUNIT_EXPECT_EQ(test, list_count_nodes(&coloc_ap_list), params->valid);
+
+ if (params->valid && !list_empty(&coloc_ap_list)) {
+ struct cfg80211_colocated_ap *ap;
+
+ ap = list_first_entry(&coloc_ap_list, typeof(*ap), list);
+ if (info.tbtt_info_len <= sizeof(params->tbtt_short))
+ KUNIT_EXPECT_MEMEQ(test, ap->bssid, params->tbtt_short.bssid, ETH_ALEN);
+ else
+ KUNIT_EXPECT_MEMEQ(test, ap->bssid, params->tbtt_long.bssid, ETH_ALEN);
+
+ if (params->same_ssid) {
+ KUNIT_EXPECT_EQ(test, ap->ssid_len, 4);
+ KUNIT_EXPECT_MEMEQ(test, ap->ssid, "TEST", 4);
+ } else {
+ KUNIT_EXPECT_EQ(test, ap->ssid_len, 0);
+ }
+ }
+
+ cfg80211_free_coloc_ap_list(&coloc_ap_list);
+}
+
static struct kunit_case gen_new_ie_test_cases[] = {
KUNIT_CASE_PARAM(test_gen_new_ie, gen_new_ie_gen_params),
KUNIT_CASE(test_gen_new_ie_malformed),
@@ -623,3 +863,16 @@ static struct kunit_suite inform_bss = {
};
kunit_test_suite(inform_bss);
+
+static struct kunit_case scan_6ghz_cases[] = {
+ KUNIT_CASE_PARAM(test_cfg80211_parse_colocated_ap,
+ cfg80211_parse_colocated_ap_gen_params),
+ {}
+};
+
+static struct kunit_suite scan_6ghz = {
+ .name = "cfg80211-scan-6ghz",
+ .test_cases = scan_6ghz_cases,
+};
+
+kunit_test_suite(scan_6ghz);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 1f374c8a17a5..e039e66ab377 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1,4 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions of this file
+ * Copyright(c) 2016-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018, 2020-2024 Intel Corporation
+ */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cfg80211
@@ -135,7 +140,8 @@
__field(u32, width) \
__field(u32, center_freq1) \
__field(u32, freq1_offset) \
- __field(u32, center_freq2)
+ __field(u32, center_freq2) \
+ __field(u16, punctured)
#define CHAN_DEF_ASSIGN(chandef) \
do { \
if ((chandef) && (chandef)->chan) { \
@@ -148,6 +154,7 @@
__entry->center_freq1 = (chandef)->center_freq1;\
__entry->freq1_offset = (chandef)->freq1_offset;\
__entry->center_freq2 = (chandef)->center_freq2;\
+ __entry->punctured = (chandef)->punctured; \
} else { \
__entry->band = 0; \
__entry->control_freq = 0; \
@@ -156,14 +163,15 @@
__entry->center_freq1 = 0; \
__entry->freq1_offset = 0; \
__entry->center_freq2 = 0; \
+ __entry->punctured = 0; \
} \
} while (0)
#define CHAN_DEF_PR_FMT \
- "band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u"
+ "band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u, punct: 0x%x"
#define CHAN_DEF_PR_ARG __entry->band, __entry->control_freq, \
__entry->freq_offset, __entry->width, \
__entry->center_freq1, __entry->freq1_offset, \
- __entry->center_freq2
+ __entry->center_freq2, __entry->punctured
#define FILS_AAD_ASSIGN(fa) \
do { \
@@ -810,8 +818,8 @@ DECLARE_EVENT_CLASS(station_add_change,
params->link_sta_params.opmode_notif_used;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
- ", station flags mask: %u, station flags set: %u, "
- "station modify mask: %u, listen interval: %d, aid: %u, "
+ ", station flags mask: 0x%x, station flags set: 0x%x, "
+ "station modify mask: 0x%x, listen interval: %d, aid: %u, "
"plink action: %u, plink state: %u, uapsd queues: %u, vlan:%s",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac,
__entry->sta_flags_mask, __entry->sta_flags_set,
@@ -859,6 +867,7 @@ DECLARE_EVENT_CLASS(station_del,
MAC_ENTRY(sta_mac)
__field(u8, subtype)
__field(u16, reason_code)
+ __field(int, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -866,11 +875,13 @@ DECLARE_EVENT_CLASS(station_del,
MAC_ASSIGN(sta_mac, params->mac);
__entry->subtype = params->subtype;
__entry->reason_code = params->reason_code;
+ __entry->link_id = params->link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
- ", subtype: %u, reason_code: %u",
+ ", subtype: %u, reason_code: %u, link_id: %d",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac,
- __entry->subtype, __entry->reason_code)
+ __entry->subtype, __entry->reason_code,
+ __entry->link_id)
);
DEFINE_EVENT(station_del, rdev_del_station,
@@ -1064,7 +1075,7 @@ TRACE_EVENT(rdev_return_int_mpath_info,
),
TP_printk(WIPHY_PR_FMT ", returned %d. mpath info - generation: %d, "
"filled: %u, frame qlen: %u, sn: %u, metric: %u, exptime: %u,"
- " discovery timeout: %u, discovery retries: %u, flags: %u",
+ " discovery timeout: %u, discovery retries: %u, flags: 0x%x",
WIPHY_PR_ARG, __entry->ret, __entry->generation,
__entry->filled, __entry->frame_qlen, __entry->sn,
__entry->metric, __entry->exptime, __entry->discovery_timeout,
@@ -1306,7 +1317,7 @@ TRACE_EVENT(rdev_assoc,
req->fils_nonces, 2 * FILS_NONCE_LEN);
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
- ", previous bssid: %pM, use mfp: %s, flags: %u",
+ ", previous bssid: %pM, use mfp: %s, flags: 0x%x",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
__entry->prev_bssid, BOOL_TO_STR(__entry->use_mfp),
__entry->flags)
@@ -1428,7 +1439,7 @@ TRACE_EVENT(rdev_connect,
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
", ssid: %s, auth type: %d, privacy: %s, wpa versions: %u, "
- "flags: %u, previous bssid: %pM",
+ "flags: 0x%x, previous bssid: %pM",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->ssid,
__entry->auth_type, BOOL_TO_STR(__entry->privacy),
__entry->wpa_versions, __entry->flags, __entry->prev_bssid)
@@ -2324,6 +2335,7 @@ TRACE_EVENT(rdev_channel_switch,
__field(u8, count)
__dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon)
__dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp)
+ __field(u8, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -2341,11 +2353,13 @@ TRACE_EVENT(rdev_channel_switch,
memcpy(__get_dynamic_array(pres_ofs),
params->counter_offsets_presp,
params->n_counter_offsets_presp * sizeof(u16));
+ __entry->link_id = params->link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
- ", block_tx: %d, count: %u, radar_required: %d",
+ ", block_tx: %d, count: %u, radar_required: %d, link_id: %d",
WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
- __entry->block_tx, __entry->count, __entry->radar_required)
+ __entry->block_tx, __entry->count, __entry->radar_required,
+ __entry->link_id)
);
TRACE_EVENT(rdev_set_qos_map,
@@ -3267,47 +3281,39 @@ TRACE_EVENT(cfg80211_chandef_dfs_required,
TRACE_EVENT(cfg80211_ch_switch_notify,
TP_PROTO(struct net_device *netdev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id,
- u16 punct_bitmap),
- TP_ARGS(netdev, chandef, link_id, punct_bitmap),
+ unsigned int link_id),
+ TP_ARGS(netdev, chandef, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
CHAN_DEF_ENTRY
__field(unsigned int, link_id)
- __field(u16, punct_bitmap)
),
TP_fast_assign(
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->link_id = link_id;
- __entry->punct_bitmap = punct_bitmap;
),
- TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d, punct_bitmap:%u",
- NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id,
- __entry->punct_bitmap)
+ TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
+ NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id)
);
TRACE_EVENT(cfg80211_ch_switch_started_notify,
TP_PROTO(struct net_device *netdev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id,
- u16 punct_bitmap),
- TP_ARGS(netdev, chandef, link_id, punct_bitmap),
+ unsigned int link_id),
+ TP_ARGS(netdev, chandef, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
CHAN_DEF_ENTRY
__field(unsigned int, link_id)
- __field(u16, punct_bitmap)
),
TP_fast_assign(
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->link_id = link_id;
- __entry->punct_bitmap = punct_bitmap;
),
- TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d, punct_bitmap:%u",
- NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id,
- __entry->punct_bitmap)
+ TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
+ NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id)
);
TRACE_EVENT(cfg80211_radar_event,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index d1ce3bee2797..2bde8a354631 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -791,15 +791,19 @@ ieee80211_amsdu_subframe_length(void *field, u8 mesh_flags, u8 hdr_type)
bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr)
{
- int offset = 0, remaining, subframe_len, padding;
+ int offset = 0, subframe_len, padding;
for (offset = 0; offset < skb->len; offset += subframe_len + padding) {
+ int remaining = skb->len - offset;
struct {
__be16 len;
u8 mesh_flags;
} hdr;
u16 len;
+ if (sizeof(hdr) > remaining)
+ return false;
+
if (skb_copy_bits(skb, offset + 2 * ETH_ALEN, &hdr, sizeof(hdr)) < 0)
return false;
@@ -807,7 +811,6 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr)
mesh_hdr);
subframe_len = sizeof(struct ethhdr) + len;
padding = (4 - subframe_len) & 0x3;
- remaining = skb->len - offset;
if (subframe_len > remaining)
return false;
@@ -825,7 +828,7 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
{
unsigned int hlen = ALIGN(extra_headroom, 4);
struct sk_buff *frame = NULL;
- int offset = 0, remaining;
+ int offset = 0;
struct {
struct ethhdr eth;
uint8_t flags;
@@ -839,10 +842,14 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
copy_len = sizeof(hdr);
while (!last) {
+ int remaining = skb->len - offset;
unsigned int subframe_len;
int len, mesh_len = 0;
u8 padding;
+ if (copy_len > remaining)
+ goto purge;
+
skb_copy_bits(skb, offset, &hdr, copy_len);
if (iftype == NL80211_IFTYPE_MESH_POINT)
mesh_len = __ieee80211_get_mesh_hdrlen(hdr.flags);
@@ -852,7 +859,6 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
padding = (4 - subframe_len) & 0x3;
/* the last MSDU has no padding */
- remaining = skb->len - offset;
if (subframe_len > remaining)
goto purge;
/* mitigate A-MSDU aggregation injection attacks */
@@ -2073,6 +2079,82 @@ bool ieee80211_operating_class_to_band(u8 operating_class,
}
EXPORT_SYMBOL(ieee80211_operating_class_to_band);
+bool ieee80211_operating_class_to_chandef(u8 operating_class,
+ struct ieee80211_channel *chan,
+ struct cfg80211_chan_def *chandef)
+{
+ u32 control_freq, offset = 0;
+ enum nl80211_band band;
+
+ if (!ieee80211_operating_class_to_band(operating_class, &band) ||
+ !chan || band != chan->band)
+ return false;
+
+ control_freq = chan->center_freq;
+ chandef->chan = chan;
+
+ if (control_freq >= 5955)
+ offset = control_freq - 5955;
+ else if (control_freq >= 5745)
+ offset = control_freq - 5745;
+ else if (control_freq >= 5180)
+ offset = control_freq - 5180;
+ offset /= 20;
+
+ switch (operating_class) {
+ case 81: /* 2 GHz band; 20 MHz; channels 1..13 */
+ case 82: /* 2 GHz band; 20 MHz; channel 14 */
+ case 115: /* 5 GHz band; 20 MHz; channels 36,40,44,48 */
+ case 118: /* 5 GHz band; 20 MHz; channels 52,56,60,64 */
+ case 121: /* 5 GHz band; 20 MHz; channels 100..144 */
+ case 124: /* 5 GHz band; 20 MHz; channels 149,153,157,161 */
+ case 125: /* 5 GHz band; 20 MHz; channels 149..177 */
+ case 131: /* 6 GHz band; 20 MHz; channels 1..233*/
+ case 136: /* 6 GHz band; 20 MHz; channel 2 */
+ chandef->center_freq1 = control_freq;
+ chandef->width = NL80211_CHAN_WIDTH_20;
+ return true;
+ case 83: /* 2 GHz band; 40 MHz; channels 1..9 */
+ case 116: /* 5 GHz band; 40 MHz; channels 36,44 */
+ case 119: /* 5 GHz band; 40 MHz; channels 52,60 */
+ case 122: /* 5 GHz band; 40 MHz; channels 100,108,116,124,132,140 */
+ case 126: /* 5 GHz band; 40 MHz; channels 149,157,165,173 */
+ chandef->center_freq1 = control_freq + 10;
+ chandef->width = NL80211_CHAN_WIDTH_40;
+ return true;
+ case 84: /* 2 GHz band; 40 MHz; channels 5..13 */
+ case 117: /* 5 GHz band; 40 MHz; channels 40,48 */
+ case 120: /* 5 GHz band; 40 MHz; channels 56,64 */
+ case 123: /* 5 GHz band; 40 MHz; channels 104,112,120,128,136,144 */
+ case 127: /* 5 GHz band; 40 MHz; channels 153,161,169,177 */
+ chandef->center_freq1 = control_freq - 10;
+ chandef->width = NL80211_CHAN_WIDTH_40;
+ return true;
+ case 132: /* 6 GHz band; 40 MHz; channels 1,5,..,229*/
+ chandef->center_freq1 = control_freq + 10 - (offset & 1) * 20;
+ chandef->width = NL80211_CHAN_WIDTH_40;
+ return true;
+ case 128: /* 5 GHz band; 80 MHz; channels 36..64,100..144,149..177 */
+ case 133: /* 6 GHz band; 80 MHz; channels 1,5,..,229 */
+ chandef->center_freq1 = control_freq + 30 - (offset & 3) * 20;
+ chandef->width = NL80211_CHAN_WIDTH_80;
+ return true;
+ case 129: /* 5 GHz band; 160 MHz; channels 36..64,100..144,149..177 */
+ case 134: /* 6 GHz band; 160 MHz; channels 1,5,..,229 */
+ chandef->center_freq1 = control_freq + 70 - (offset & 7) * 20;
+ chandef->width = NL80211_CHAN_WIDTH_160;
+ return true;
+ case 130: /* 5 GHz band; 80+80 MHz; channels 36..64,100..144,149..177 */
+ case 135: /* 6 GHz band; 80+80 MHz; channels 1,5,..,229 */
+ /* The center_freq2 of 80+80 MHz is unknown */
+ case 137: /* 6 GHz band; 320 MHz; channels 1,5,..,229 */
+ /* 320-1 or 320-2 channelization is unknown */
+ default:
+ return false;
+ }
+}
+EXPORT_SYMBOL(ieee80211_operating_class_to_chandef);
+
bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
u8 *op_class)
{
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index 68729aa3a5d5..dc72302cbd07 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -17,8 +17,6 @@ config X25
if you want that) and the lower level data link layer protocol LAPB
(say Y to "LAPB Data Link Driver" below if you want that).
- You can read more about X.25 at <https://www.sangoma.com/tutorials/x25/> and
- <http://docwiki.cisco.com/wiki/X.25>.
Information about X.25 for Linux is contained in the files
<file:Documentation/networking/x25.rst> and
<file:Documentation/networking/x25-iface.rst>.
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f7a7c7798c3b..d18d51412cc0 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -460,12 +460,12 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
if (get_user(len, optlen))
goto out;
- len = min_t(unsigned int, len, sizeof(int));
-
rc = -EINVAL;
if (len < 0)
goto out;
+ len = min_t(unsigned int, len, sizeof(int));
+
rc = -EFAULT;
if (put_user(len, optlen))
goto out;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9f13aa3353e3..3404d076a8a3 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -167,8 +167,10 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
contd = XDP_PKT_CONTD;
err = __xsk_rcv_zc(xs, xskb, len, contd);
- if (err || likely(!frags))
- goto out;
+ if (err)
+ goto err;
+ if (likely(!frags))
+ return 0;
xskb_list = &xskb->pool->xskb_list;
list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
@@ -177,11 +179,13 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
len = pos->xdp.data_end - pos->xdp.data;
err = __xsk_rcv_zc(xs, pos, len, contd);
if (err)
- return err;
+ goto err;
list_del(&pos->xskb_list_node);
}
-out:
+ return 0;
+err:
+ xsk_buff_free(xdp);
return err;
}
@@ -309,10 +313,13 @@ static bool xsk_is_bound(struct xdp_sock *xs)
static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
+ struct net_device *dev = xdp->rxq->dev;
+ u32 qid = xdp->rxq->queue_index;
+
if (!xsk_is_bound(xs))
return -ENXIO;
- if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
+ if (!dev->_rx[qid].pool || xs->umem != dev->_rx[qid].pool->umem)
return -EINVAL;
if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
@@ -718,7 +725,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
memcpy(vaddr, buffer, len);
kunmap_local(vaddr);
- skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
+ skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
+ refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
}
if (first_frag && desc->options & XDP_TX_METADATA) {
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 28711cc44ced..ce60ecd48a4d 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -555,6 +555,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
xskb->xdp.data_meta = xskb->xdp.data;
+ xskb->xdp.flags = 0;
if (pool->dma_need_sync) {
dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 9f8955367275..09dcea0cbbed 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -194,6 +194,7 @@ static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
}
static const struct sock_diag_handler xsk_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_XDP,
.dump = xsk_diag_handler_dump,
};
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index d3b3f9e720b3..fe82e2d07300 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -10,6 +10,7 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
+#include <net/hotdata.h>
static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
struct sock *sk)
@@ -169,7 +170,8 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
- if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
+ if (skb_queue_len(&ctx->out_queue) >=
+ READ_ONCE(net_hotdata.max_backlog))
return -ENOBUFS;
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 41533c631431..e6da7e8495c9 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -858,4 +858,5 @@ int xfrm_count_pfkey_enc_supported(void)
}
EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported);
+MODULE_DESCRIPTION("XFRM Algorithm interface");
MODULE_LICENSE("GPL");
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 3784534c9185..653e51ae3964 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -407,7 +407,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct net_device *dev = x->xso.dev;
- if (!x->type_offload || x->encap)
+ if (!x->type_offload)
return false;
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET ||
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index bd4ce21d76d7..161f535c8b94 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -21,6 +21,7 @@
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
#include <net/dst_metadata.h>
+#include <net/hotdata.h>
#include "xfrm_inout.h"
@@ -764,7 +765,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
trans = this_cpu_ptr(&xfrm_trans_tasklet);
- if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
+ if (skb_queue_len(&trans->queue) >= READ_ONCE(net_hotdata.max_backlog))
return -ENOBUFS;
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
index 7d5e920141e9..5ea15037ebd1 100644
--- a/net/xfrm/xfrm_interface_bpf.c
+++ b/net/xfrm/xfrm_interface_bpf.c
@@ -93,10 +93,10 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp
__bpf_kfunc_end_defs();
-BTF_SET8_START(xfrm_ifc_kfunc_set)
+BTF_KFUNCS_START(xfrm_ifc_kfunc_set)
BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info)
BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info)
-BTF_SET8_END(xfrm_ifc_kfunc_set)
+BTF_KFUNCS_END(xfrm_ifc_kfunc_set)
static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 21d50d75c260..4df5c06e3ece 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -240,7 +240,6 @@ static void xfrmi_dev_free(struct net_device *dev)
struct xfrm_if *xi = netdev_priv(dev);
gro_cells_destroy(&xi->gro_cells);
- free_percpu(dev->tstats);
}
static int xfrmi_create(struct net_device *dev)
@@ -727,7 +726,7 @@ static int xfrmi_get_iflink(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
- return xi->p.link;
+ return READ_ONCE(xi->p.link);
}
static const struct net_device_ops xfrmi_netdev_ops = {
@@ -749,6 +748,7 @@ static void xfrmi_dev_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->needs_free_netdev = true;
dev->priv_destructor = xfrmi_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
netif_keep_dst(dev);
eth_broadcast_addr(dev->broadcast);
@@ -765,15 +765,9 @@ static int xfrmi_dev_init(struct net_device *dev)
struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
err = gro_cells_init(&xi->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
dev->features |= NETIF_F_LLTX;
dev->features |= XFRMI_FEATURES;
@@ -957,12 +951,12 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
.get_link_net = xfrmi_get_link_net,
};
-static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
+static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_exit_list, exit_list) {
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
struct xfrm_if __rcu **xip;
@@ -973,18 +967,16 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
for (xip = &xfrmn->xfrmi[i];
(xi = rtnl_dereference(*xip)) != NULL;
xip = &xi->next)
- unregister_netdevice_queue(xi->dev, &list);
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
xi = rtnl_dereference(xfrmn->collect_md_xfrmi);
if (xi)
- unregister_netdevice_queue(xi->dev, &list);
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
static struct pernet_operations xfrmi_net_ops = {
- .exit_batch = xfrmi_exit_batch_net,
+ .exit_batch_rtnl = xfrmi_exit_batch_rtnl,
.id = &xfrmi_net_id,
.size = sizeof(struct xfrmi_net),
};
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 662c83beb345..e5722c95b8bb 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -704,9 +704,13 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct xfrm_state *x = skb_dst(skb)->xfrm;
+ int family;
int err;
- switch (x->outer_mode.family) {
+ family = (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ? x->outer_mode.family
+ : skb_dst(skb)->ops->family;
+
+ switch (family) {
case AF_INET:
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1b7e75159727..6affe5cd85d8 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,7 @@
#include <linux/audit.h>
#include <linux/rhashtable.h>
#include <linux/if_tunnel.h>
+#include <linux/icmp.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/inet_ecn.h>
@@ -2694,7 +2695,9 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
- family = xfrm[i]->props.family;
+ if (xfrm[i]->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ family = xfrm[i]->props.family;
+
oif = fl->flowi_oif ? : fl->flowi_l3mdev;
dst = xfrm_dst_lookup(xfrm[i], tos, oif,
&saddr, &daddr, family, mark);
@@ -3416,7 +3419,7 @@ decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reve
}
fl4->flowi4_proto = flkeys->basic.ip_proto;
- fl4->flowi4_tos = flkeys->ip.tos;
+ fl4->flowi4_tos = flkeys->ip.tos & ~INET_ECN_MASK;
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -3503,6 +3506,128 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
return 0;
}
+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+ const struct flowi4 *fl4 = &fl->u.ip4;
+
+ if (family == AF_INET &&
+ fl4->flowi4_proto == IPPROTO_ICMP &&
+ (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+ fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+ return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6) {
+ const struct flowi6 *fl6 = &fl->u.ip6;
+
+ if (fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+ (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+ fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
+ fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+ return true;
+ }
+#endif
+ return false;
+}
+
+static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family,
+ const struct flowi *fl, struct flowi *fl1)
+{
+ bool ret = true;
+ struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+ int hl = family == AF_INET ? (sizeof(struct iphdr) + sizeof(struct icmphdr)) :
+ (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
+
+ if (!newskb)
+ return true;
+
+ if (!pskb_pull(newskb, hl))
+ goto out;
+
+ skb_reset_network_header(newskb);
+
+ if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0)
+ goto out;
+
+ fl1->flowi_oif = fl->flowi_oif;
+ fl1->flowi_mark = fl->flowi_mark;
+ fl1->flowi_tos = fl->flowi_tos;
+ nf_nat_decode_session(newskb, fl1, family);
+ ret = false;
+
+out:
+ consume_skb(newskb);
+ return ret;
+}
+
+static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family,
+ const struct xfrm_selector *sel,
+ const struct flowi *fl)
+{
+ bool ret = false;
+
+ if (icmp_err_packet(fl, family)) {
+ struct flowi fl1;
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return ret;
+
+ ret = xfrm_selector_match(sel, &fl1, family);
+ }
+
+ return ret;
+}
+
+static inline struct
+xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
+ const struct flowi *fl, unsigned short family,
+ u32 if_id)
+{
+ struct xfrm_policy *pol = NULL;
+
+ if (icmp_err_packet(fl, family)) {
+ struct flowi fl1;
+ struct net *net = dev_net(skb->dev);
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return pol;
+
+ pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+ }
+
+ return pol;
+}
+
+static inline struct
+dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+ unsigned short family, struct dst_entry *dst)
+{
+ if (icmp_err_packet(fl, family)) {
+ struct net *net = dev_net(skb->dev);
+ struct dst_entry *dst2;
+ struct flowi fl1;
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return dst;
+
+ dst_hold(dst);
+
+ dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
+
+ if (IS_ERR(dst2))
+ return dst;
+
+ if (dst2->xfrm) {
+ dst_release(dst);
+ dst = dst2;
+ } else {
+ dst_release(dst2);
+ }
+ }
+
+ return dst;
+}
+
int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
unsigned short family)
{
@@ -3549,9 +3674,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
for (i = sp->len - 1; i >= 0; i--) {
struct xfrm_state *x = sp->xvec[i];
+ int ret = 0;
+
if (!xfrm_selector_match(&x->sel, &fl, family)) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
- return 0;
+ ret = 1;
+ if (x->props.flags & XFRM_STATE_ICMP &&
+ xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl))
+ ret = 0;
+ if (ret) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+ return 0;
+ }
}
}
}
@@ -3574,6 +3707,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 0;
}
+ if (!pol && dir == XFRM_POLICY_FWD)
+ pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
if (!pol) {
if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3707,6 +3843,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
res = 0;
dst = NULL;
}
+
+ if (dst && !dst->xfrm)
+ dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
skb_dst_set(skb, dst);
return res;
}
@@ -4025,10 +4165,7 @@ static int __net_init xfrm_policy_init(struct net *net)
int dir, err;
if (net_eq(net, &init_net)) {
- xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
- sizeof(struct xfrm_dst),
- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
- NULL);
+ xfrm_dst_cache = KMEM_CACHE(xfrm_dst, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
err = rhashtable_init(&xfrm_policy_inexact_table,
&xfrm_pol_inexact_params);
BUG_ON(err);
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index fee9b5cf37a7..5f9bf8e5c933 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -52,6 +52,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX);
+ xfrm_state_update_stats(net);
snmp_get_cpu_field_batch(buff, xfrm_mib_list,
net->mib.xfrm_statistics);
for (i = 0; xfrm_mib_list[i].name; i++)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index bda5327bf34d..0c306473a79d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -570,7 +570,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
int err = 0;
spin_lock(&x->lock);
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
if (x->km.state == XFRM_STATE_DEAD)
goto out;
@@ -1935,7 +1935,7 @@ EXPORT_SYMBOL(xfrm_state_update);
int xfrm_state_check_expire(struct xfrm_state *x)
{
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
if (!READ_ONCE(x->curlft.use_time))
WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
@@ -1957,6 +1957,19 @@ int xfrm_state_check_expire(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_check_expire);
+void xfrm_state_update_stats(struct net *net)
+{
+ struct xfrm_state *x;
+ int i;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ for (i = 0; i <= net->xfrm.state_hmask; i++) {
+ hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst)
+ xfrm_dev_state_update_stats(x);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+}
+
struct xfrm_state *
xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family)
diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c
index 9e20d4a377f7..2248eda741f8 100644
--- a/net/xfrm/xfrm_state_bpf.c
+++ b/net/xfrm/xfrm_state_bpf.c
@@ -117,10 +117,10 @@ __bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x)
__bpf_kfunc_end_defs();
-BTF_SET8_START(xfrm_state_kfunc_set)
+BTF_KFUNCS_START(xfrm_state_kfunc_set)
BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE)
-BTF_SET8_END(xfrm_state_kfunc_set)
+BTF_KFUNCS_END(xfrm_state_kfunc_set)
static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ad01997c3aa9..810b520493f3 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -902,7 +902,7 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
memcpy(&p->sel, &x->sel, sizeof(p->sel));
memcpy(&p->lft, &x->lft, sizeof(p->lft));
if (x->xso.dev)
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
put_unaligned(x->stats.replay_window, &p->stats.replay_window);
put_unaligned(x->stats.replay, &p->stats.replay);
@@ -2017,6 +2017,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
if (xp->xfrm_nr == 0)
return 0;
+ if (xp->xfrm_nr > XFRM_MAX_DEPTH)
+ return -ENOBUFS;
+
for (i = 0; i < xp->xfrm_nr; i++) {
struct xfrm_user_tmpl *up = &vec[i];
struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
@@ -3888,5 +3891,6 @@ static void __exit xfrm_user_exit(void)
module_init(xfrm_user_init);
module_exit(xfrm_user_exit);
+MODULE_DESCRIPTION("XFRM User interface");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM);
diff --git a/rust/Makefile b/rust/Makefile
index 9d2a16cc91cb..a78fcf4004b0 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated_kunit.o
ifdef CONFIG_RUST
# `$(rust_flags)` is passed in case the user added `--sysroot`.
-rustc_sysroot := $(shell $(RUSTC) $(rust_flags) --print sysroot)
+rustc_sysroot := $(shell MAKEFLAGS= $(RUSTC) $(rust_flags) --print sysroot)
rustc_host_target := $(shell $(RUSTC) --version --verbose | grep -F 'host: ' | cut -d' ' -f2)
RUST_LIB_SRC ?= $(rustc_sysroot)/lib/rustlib/src/rust/library
@@ -108,14 +108,14 @@ rustdoc-macros: private rustdoc_host = yes
rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \
--extern proc_macro
rustdoc-macros: $(src)/macros/lib.rs FORCE
- $(call if_changed,rustdoc)
+ +$(call if_changed,rustdoc)
rustdoc-core: private rustc_target_flags = $(core-cfgs)
rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs FORCE
- $(call if_changed,rustdoc)
+ +$(call if_changed,rustdoc)
rustdoc-compiler_builtins: $(src)/compiler_builtins.rs rustdoc-core FORCE
- $(call if_changed,rustdoc)
+ +$(call if_changed,rustdoc)
# We need to allow `rustdoc::broken_intra_doc_links` because some
# `no_global_oom_handling` functions refer to non-`no_global_oom_handling`
@@ -124,7 +124,7 @@ rustdoc-compiler_builtins: $(src)/compiler_builtins.rs rustdoc-core FORCE
rustdoc-alloc: private rustc_target_flags = $(alloc-cfgs) \
-Arustdoc::broken_intra_doc_links
rustdoc-alloc: $(src)/alloc/lib.rs rustdoc-core rustdoc-compiler_builtins FORCE
- $(call if_changed,rustdoc)
+ +$(call if_changed,rustdoc)
rustdoc-kernel: private rustc_target_flags = --extern alloc \
--extern build_error --extern macros=$(objtree)/$(obj)/libmacros.so \
@@ -132,7 +132,7 @@ rustdoc-kernel: private rustc_target_flags = --extern alloc \
rustdoc-kernel: $(src)/kernel/lib.rs rustdoc-core rustdoc-macros \
rustdoc-compiler_builtins rustdoc-alloc $(obj)/libmacros.so \
$(obj)/bindings.o FORCE
- $(call if_changed,rustdoc)
+ +$(call if_changed,rustdoc)
quiet_cmd_rustc_test_library = RUSTC TL $<
cmd_rustc_test_library = \
@@ -146,18 +146,18 @@ quiet_cmd_rustc_test_library = RUSTC TL $<
--crate-name $(subst rusttest-,,$(subst rusttestlib-,,$@)) $<
rusttestlib-build_error: $(src)/build_error.rs rusttest-prepare FORCE
- $(call if_changed,rustc_test_library)
+ +$(call if_changed,rustc_test_library)
rusttestlib-macros: private rustc_target_flags = --extern proc_macro
rusttestlib-macros: private rustc_test_library_proc = yes
rusttestlib-macros: $(src)/macros/lib.rs rusttest-prepare FORCE
- $(call if_changed,rustc_test_library)
+ +$(call if_changed,rustc_test_library)
rusttestlib-bindings: $(src)/bindings/lib.rs rusttest-prepare FORCE
- $(call if_changed,rustc_test_library)
+ +$(call if_changed,rustc_test_library)
rusttestlib-uapi: $(src)/uapi/lib.rs rusttest-prepare FORCE
- $(call if_changed,rustc_test_library)
+ +$(call if_changed,rustc_test_library)
quiet_cmd_rustdoc_test = RUSTDOC T $<
cmd_rustdoc_test = \
@@ -189,7 +189,7 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $<
$(src)/kernel/lib.rs $(obj)/kernel.o \
$(objtree)/scripts/rustdoc_test_builder \
$(objtree)/scripts/rustdoc_test_gen FORCE
- $(call if_changed,rustdoc_test_kernel)
+ +$(call if_changed,rustdoc_test_kernel)
# We cannot use `-Zpanic-abort-tests` because some tests are dynamic,
# so for the moment we skip `-Cpanic=abort`.
@@ -254,21 +254,21 @@ quiet_cmd_rustsysroot = RUSTSYSROOT
$(objtree)/$(obj)/test/sysroot/lib/rustlib/$(rustc_host_target)/lib
rusttest-prepare: FORCE
- $(call if_changed,rustsysroot)
+ +$(call if_changed,rustsysroot)
rusttest-macros: private rustc_target_flags = --extern proc_macro
rusttest-macros: private rustdoc_test_target_flags = --crate-type proc-macro
rusttest-macros: $(src)/macros/lib.rs rusttest-prepare FORCE
- $(call if_changed,rustc_test)
- $(call if_changed,rustdoc_test)
+ +$(call if_changed,rustc_test)
+ +$(call if_changed,rustdoc_test)
rusttest-kernel: private rustc_target_flags = --extern alloc \
--extern build_error --extern macros --extern bindings --extern uapi
rusttest-kernel: $(src)/kernel/lib.rs rusttest-prepare \
rusttestlib-build_error rusttestlib-macros rusttestlib-bindings \
rusttestlib-uapi FORCE
- $(call if_changed,rustc_test)
- $(call if_changed,rustc_test_library)
+ +$(call if_changed,rustc_test)
+ +$(call if_changed,rustc_test_library)
ifdef CONFIG_CC_IS_CLANG
bindgen_c_flags = $(c_flags)
@@ -396,7 +396,7 @@ quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@
# Therefore, to get `libmacros.so` automatically recompiled when the compiler
# version changes, we add `core.o` as a dependency (even if it is not needed).
$(obj)/libmacros.so: $(src)/macros/lib.rs $(obj)/core.o FORCE
- $(call if_changed_dep,rustc_procmacro)
+ +$(call if_changed_dep,rustc_procmacro)
quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L $@
cmd_rustc_library = \
@@ -435,36 +435,36 @@ $(obj)/core.o: private skip_flags = -Dunreachable_pub
$(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym))
$(obj)/core.o: private rustc_target_flags = $(core-cfgs)
$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs scripts/target.json FORCE
- $(call if_changed_dep,rustc_library)
+ +$(call if_changed_dep,rustc_library)
$(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*'
$(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE
- $(call if_changed_dep,rustc_library)
+ +$(call if_changed_dep,rustc_library)
$(obj)/alloc.o: private skip_clippy = 1
$(obj)/alloc.o: private skip_flags = -Dunreachable_pub
$(obj)/alloc.o: private rustc_target_flags = $(alloc-cfgs)
$(obj)/alloc.o: $(src)/alloc/lib.rs $(obj)/compiler_builtins.o FORCE
- $(call if_changed_dep,rustc_library)
+ +$(call if_changed_dep,rustc_library)
$(obj)/build_error.o: $(src)/build_error.rs $(obj)/compiler_builtins.o FORCE
- $(call if_changed_dep,rustc_library)
+ +$(call if_changed_dep,rustc_library)
$(obj)/bindings.o: $(src)/bindings/lib.rs \
$(obj)/compiler_builtins.o \
$(obj)/bindings/bindings_generated.rs \
$(obj)/bindings/bindings_helpers_generated.rs FORCE
- $(call if_changed_dep,rustc_library)
+ +$(call if_changed_dep,rustc_library)
$(obj)/uapi.o: $(src)/uapi/lib.rs \
$(obj)/compiler_builtins.o \
$(obj)/uapi/uapi_generated.rs FORCE
- $(call if_changed_dep,rustc_library)
+ +$(call if_changed_dep,rustc_library)
$(obj)/kernel.o: private rustc_target_flags = --extern alloc \
--extern build_error --extern macros --extern bindings --extern uapi
$(obj)/kernel.o: $(src)/kernel/lib.rs $(obj)/alloc.o $(obj)/build_error.o \
$(obj)/libmacros.so $(obj)/bindings.o $(obj)/uapi.o FORCE
- $(call if_changed_dep,rustc_library)
+ +$(call if_changed_dep,rustc_library)
endif # CONFIG_RUST
diff --git a/rust/alloc/alloc.rs b/rust/alloc/alloc.rs
index 150e13750ff7..abb791cc2371 100644
--- a/rust/alloc/alloc.rs
+++ b/rust/alloc/alloc.rs
@@ -379,13 +379,20 @@ pub const fn handle_alloc_error(layout: Layout) -> ! {
panic!("allocation failed");
}
+ #[inline]
fn rt_error(layout: Layout) -> ! {
unsafe {
__rust_alloc_error_handler(layout.size(), layout.align());
}
}
- unsafe { core::intrinsics::const_eval_select((layout,), ct_error, rt_error) }
+ #[cfg(not(feature = "panic_immediate_abort"))]
+ unsafe {
+ core::intrinsics::const_eval_select((layout,), ct_error, rt_error)
+ }
+
+ #[cfg(feature = "panic_immediate_abort")]
+ ct_error(layout)
}
// For alloc test `std::alloc::handle_alloc_error` can be used directly.
@@ -418,12 +425,14 @@ pub mod __alloc_error_handler {
}
}
+#[cfg(not(no_global_oom_handling))]
/// Specialize clones into pre-allocated, uninitialized memory.
/// Used by `Box::clone` and `Rc`/`Arc::make_mut`.
pub(crate) trait WriteCloneIntoRaw: Sized {
unsafe fn write_clone_into_raw(&self, target: *mut Self);
}
+#[cfg(not(no_global_oom_handling))]
impl<T: Clone> WriteCloneIntoRaw for T {
#[inline]
default unsafe fn write_clone_into_raw(&self, target: *mut Self) {
@@ -433,6 +442,7 @@ impl<T: Clone> WriteCloneIntoRaw for T {
}
}
+#[cfg(not(no_global_oom_handling))]
impl<T: Copy> WriteCloneIntoRaw for T {
#[inline]
unsafe fn write_clone_into_raw(&self, target: *mut Self) {
diff --git a/rust/alloc/boxed.rs b/rust/alloc/boxed.rs
index 9620eba17268..c93a22a5c97f 100644
--- a/rust/alloc/boxed.rs
+++ b/rust/alloc/boxed.rs
@@ -161,7 +161,7 @@ use core::marker::Tuple;
use core::marker::Unsize;
use core::mem::{self, SizedTypeProperties};
use core::ops::{
- CoerceUnsized, Deref, DerefMut, DispatchFromDyn, Generator, GeneratorState, Receiver,
+ CoerceUnsized, Coroutine, CoroutineState, Deref, DerefMut, DispatchFromDyn, Receiver,
};
use core::pin::Pin;
use core::ptr::{self, NonNull, Unique};
@@ -211,7 +211,7 @@ impl<T> Box<T> {
/// ```
/// let five = Box::new(5);
/// ```
- #[cfg(all(not(no_global_oom_handling)))]
+ #[cfg(not(no_global_oom_handling))]
#[inline(always)]
#[stable(feature = "rust1", since = "1.0.0")]
#[must_use]
@@ -1042,10 +1042,18 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
/// use std::ptr;
///
/// let x = Box::new(String::from("Hello"));
- /// let p = Box::into_raw(x);
+ /// let ptr = Box::into_raw(x);
/// unsafe {
- /// ptr::drop_in_place(p);
- /// dealloc(p as *mut u8, Layout::new::<String>());
+ /// ptr::drop_in_place(ptr);
+ /// dealloc(ptr as *mut u8, Layout::new::<String>());
+ /// }
+ /// ```
+ /// Note: This is equivalent to the following:
+ /// ```
+ /// let x = Box::new(String::from("Hello"));
+ /// let ptr = Box::into_raw(x);
+ /// unsafe {
+ /// drop(Box::from_raw(ptr));
/// }
/// ```
///
@@ -2110,28 +2118,28 @@ impl<T: ?Sized, A: Allocator> AsMut<T> for Box<T, A> {
#[stable(feature = "pin", since = "1.33.0")]
impl<T: ?Sized, A: Allocator> Unpin for Box<T, A> where A: 'static {}
-#[unstable(feature = "generator_trait", issue = "43122")]
-impl<G: ?Sized + Generator<R> + Unpin, R, A: Allocator> Generator<R> for Box<G, A>
+#[unstable(feature = "coroutine_trait", issue = "43122")]
+impl<G: ?Sized + Coroutine<R> + Unpin, R, A: Allocator> Coroutine<R> for Box<G, A>
where
A: 'static,
{
type Yield = G::Yield;
type Return = G::Return;
- fn resume(mut self: Pin<&mut Self>, arg: R) -> GeneratorState<Self::Yield, Self::Return> {
+ fn resume(mut self: Pin<&mut Self>, arg: R) -> CoroutineState<Self::Yield, Self::Return> {
G::resume(Pin::new(&mut *self), arg)
}
}
-#[unstable(feature = "generator_trait", issue = "43122")]
-impl<G: ?Sized + Generator<R>, R, A: Allocator> Generator<R> for Pin<Box<G, A>>
+#[unstable(feature = "coroutine_trait", issue = "43122")]
+impl<G: ?Sized + Coroutine<R>, R, A: Allocator> Coroutine<R> for Pin<Box<G, A>>
where
A: 'static,
{
type Yield = G::Yield;
type Return = G::Return;
- fn resume(mut self: Pin<&mut Self>, arg: R) -> GeneratorState<Self::Yield, Self::Return> {
+ fn resume(mut self: Pin<&mut Self>, arg: R) -> CoroutineState<Self::Yield, Self::Return> {
G::resume((*self).as_mut(), arg)
}
}
@@ -2448,4 +2456,8 @@ impl<T: core::error::Error> core::error::Error for Box<T> {
fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
core::error::Error::source(&**self)
}
+
+ fn provide<'b>(&'b self, request: &mut core::error::Request<'b>) {
+ core::error::Error::provide(&**self, request);
+ }
}
diff --git a/rust/alloc/collections/mod.rs b/rust/alloc/collections/mod.rs
index 2506065d158a..00ffb3b97365 100644
--- a/rust/alloc/collections/mod.rs
+++ b/rust/alloc/collections/mod.rs
@@ -150,6 +150,7 @@ impl Display for TryReserveError {
/// An intermediate trait for specialization of `Extend`.
#[doc(hidden)]
+#[cfg(not(no_global_oom_handling))]
trait SpecExtend<I: IntoIterator> {
/// Extends `self` with the contents of the given iterator.
fn spec_extend(&mut self, iter: I);
diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs
index 9c7ea73da108..36f79c075593 100644
--- a/rust/alloc/lib.rs
+++ b/rust/alloc/lib.rs
@@ -80,6 +80,8 @@
not(no_sync),
target_has_atomic = "ptr"
))]
+#![doc(rust_logo)]
+#![feature(rustdoc_internals)]
#![no_std]
#![needs_allocator]
// Lints:
@@ -115,7 +117,6 @@
#![feature(const_eval_select)]
#![feature(const_maybe_uninit_as_mut_ptr)]
#![feature(const_maybe_uninit_write)]
-#![feature(const_maybe_uninit_zeroed)]
#![feature(const_pin)]
#![feature(const_refs_to_cell)]
#![feature(const_size_of_val)]
@@ -141,7 +142,6 @@
#![feature(maybe_uninit_uninit_array)]
#![feature(maybe_uninit_uninit_array_transpose)]
#![feature(pattern)]
-#![feature(pointer_byte_offsets)]
#![feature(ptr_internals)]
#![feature(ptr_metadata)]
#![feature(ptr_sub_ptr)]
@@ -156,6 +156,7 @@
#![feature(std_internals)]
#![feature(str_internals)]
#![feature(strict_provenance)]
+#![feature(trusted_fused)]
#![feature(trusted_len)]
#![feature(trusted_random_access)]
#![feature(try_trait_v2)]
@@ -168,7 +169,7 @@
//
// Language features:
// tidy-alphabetical-start
-#![cfg_attr(not(test), feature(generator_trait))]
+#![cfg_attr(not(test), feature(coroutine_trait))]
#![cfg_attr(test, feature(panic_update_hook))]
#![cfg_attr(test, feature(test))]
#![feature(allocator_internals)]
@@ -276,7 +277,7 @@ pub(crate) mod test_helpers {
/// seed not being the same for every RNG invocation too.
pub(crate) fn test_rng() -> rand_xorshift::XorShiftRng {
use std::hash::{BuildHasher, Hash, Hasher};
- let mut hasher = std::collections::hash_map::RandomState::new().build_hasher();
+ let mut hasher = std::hash::RandomState::new().build_hasher();
std::panic::Location::caller().hash(&mut hasher);
let hc64 = hasher.finish();
let seed_vec =
diff --git a/rust/alloc/raw_vec.rs b/rust/alloc/raw_vec.rs
index a7425582a323..98b6abf30af6 100644
--- a/rust/alloc/raw_vec.rs
+++ b/rust/alloc/raw_vec.rs
@@ -27,6 +27,16 @@ enum AllocInit {
Zeroed,
}
+#[repr(transparent)]
+#[cfg_attr(target_pointer_width = "16", rustc_layout_scalar_valid_range_end(0x7fff))]
+#[cfg_attr(target_pointer_width = "32", rustc_layout_scalar_valid_range_end(0x7fff_ffff))]
+#[cfg_attr(target_pointer_width = "64", rustc_layout_scalar_valid_range_end(0x7fff_ffff_ffff_ffff))]
+struct Cap(usize);
+
+impl Cap {
+ const ZERO: Cap = unsafe { Cap(0) };
+}
+
/// A low-level utility for more ergonomically allocating, reallocating, and deallocating
/// a buffer of memory on the heap without having to worry about all the corner cases
/// involved. This type is excellent for building your own data structures like Vec and VecDeque.
@@ -52,7 +62,12 @@ enum AllocInit {
#[allow(missing_debug_implementations)]
pub(crate) struct RawVec<T, A: Allocator = Global> {
ptr: Unique<T>,
- cap: usize,
+ /// Never used for ZSTs; it's `capacity()`'s responsibility to return usize::MAX in that case.
+ ///
+ /// # Safety
+ ///
+ /// `cap` must be in the `0..=isize::MAX` range.
+ cap: Cap,
alloc: A,
}
@@ -121,7 +136,7 @@ impl<T, A: Allocator> RawVec<T, A> {
/// the returned `RawVec`.
pub const fn new_in(alloc: A) -> Self {
// `cap: 0` means "unallocated". zero-sized types are ignored.
- Self { ptr: Unique::dangling(), cap: 0, alloc }
+ Self { ptr: Unique::dangling(), cap: Cap::ZERO, alloc }
}
/// Like `with_capacity`, but parameterized over the choice of
@@ -203,7 +218,7 @@ impl<T, A: Allocator> RawVec<T, A> {
// here should change to `ptr.len() / mem::size_of::<T>()`.
Self {
ptr: unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) },
- cap: capacity,
+ cap: unsafe { Cap(capacity) },
alloc,
}
}
@@ -228,7 +243,7 @@ impl<T, A: Allocator> RawVec<T, A> {
// here should change to `ptr.len() / mem::size_of::<T>()`.
Ok(Self {
ptr: unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) },
- cap: capacity,
+ cap: unsafe { Cap(capacity) },
alloc,
})
}
@@ -240,12 +255,13 @@ impl<T, A: Allocator> RawVec<T, A> {
/// The `ptr` must be allocated (via the given allocator `alloc`), and with the given
/// `capacity`.
/// The `capacity` cannot exceed `isize::MAX` for sized types. (only a concern on 32-bit
- /// systems). ZST vectors may have a capacity up to `usize::MAX`.
+ /// systems). For ZSTs capacity is ignored.
/// If the `ptr` and `capacity` come from a `RawVec` created via `alloc`, then this is
/// guaranteed.
#[inline]
pub unsafe fn from_raw_parts_in(ptr: *mut T, capacity: usize, alloc: A) -> Self {
- Self { ptr: unsafe { Unique::new_unchecked(ptr) }, cap: capacity, alloc }
+ let cap = if T::IS_ZST { Cap::ZERO } else { unsafe { Cap(capacity) } };
+ Self { ptr: unsafe { Unique::new_unchecked(ptr) }, cap, alloc }
}
/// Gets a raw pointer to the start of the allocation. Note that this is
@@ -261,7 +277,7 @@ impl<T, A: Allocator> RawVec<T, A> {
/// This will always be `usize::MAX` if `T` is zero-sized.
#[inline(always)]
pub fn capacity(&self) -> usize {
- if T::IS_ZST { usize::MAX } else { self.cap }
+ if T::IS_ZST { usize::MAX } else { self.cap.0 }
}
/// Returns a shared reference to the allocator backing this `RawVec`.
@@ -270,7 +286,7 @@ impl<T, A: Allocator> RawVec<T, A> {
}
fn current_memory(&self) -> Option<(NonNull<u8>, Layout)> {
- if T::IS_ZST || self.cap == 0 {
+ if T::IS_ZST || self.cap.0 == 0 {
None
} else {
// We could use Layout::array here which ensures the absence of isize and usize overflows
@@ -280,7 +296,7 @@ impl<T, A: Allocator> RawVec<T, A> {
let _: () = const { assert!(mem::size_of::<T>() % mem::align_of::<T>() == 0) };
unsafe {
let align = mem::align_of::<T>();
- let size = mem::size_of::<T>().unchecked_mul(self.cap);
+ let size = mem::size_of::<T>().unchecked_mul(self.cap.0);
let layout = Layout::from_size_align_unchecked(size, align);
Some((self.ptr.cast().into(), layout))
}
@@ -338,10 +354,13 @@ impl<T, A: Allocator> RawVec<T, A> {
/// The same as `reserve`, but returns on errors instead of panicking or aborting.
pub fn try_reserve(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> {
if self.needs_to_grow(len, additional) {
- self.grow_amortized(len, additional)
- } else {
- Ok(())
+ self.grow_amortized(len, additional)?;
}
+ unsafe {
+ // Inform the optimizer that the reservation has succeeded or wasn't needed
+ core::intrinsics::assume(!self.needs_to_grow(len, additional));
+ }
+ Ok(())
}
/// The same as `reserve_for_push`, but returns on errors instead of panicking or aborting.
@@ -378,7 +397,14 @@ impl<T, A: Allocator> RawVec<T, A> {
len: usize,
additional: usize,
) -> Result<(), TryReserveError> {
- if self.needs_to_grow(len, additional) { self.grow_exact(len, additional) } else { Ok(()) }
+ if self.needs_to_grow(len, additional) {
+ self.grow_exact(len, additional)?;
+ }
+ unsafe {
+ // Inform the optimizer that the reservation has succeeded or wasn't needed
+ core::intrinsics::assume(!self.needs_to_grow(len, additional));
+ }
+ Ok(())
}
/// Shrinks the buffer down to the specified capacity. If the given amount
@@ -404,12 +430,15 @@ impl<T, A: Allocator> RawVec<T, A> {
additional > self.capacity().wrapping_sub(len)
}
- fn set_ptr_and_cap(&mut self, ptr: NonNull<[u8]>, cap: usize) {
+ /// # Safety:
+ ///
+ /// `cap` must not exceed `isize::MAX`.
+ unsafe fn set_ptr_and_cap(&mut self, ptr: NonNull<[u8]>, cap: usize) {
// Allocators currently return a `NonNull<[u8]>` whose length matches
// the size requested. If that ever changes, the capacity here should
// change to `ptr.len() / mem::size_of::<T>()`.
self.ptr = unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) };
- self.cap = cap;
+ self.cap = unsafe { Cap(cap) };
}
// This method is usually instantiated many times. So we want it to be as
@@ -434,14 +463,15 @@ impl<T, A: Allocator> RawVec<T, A> {
// This guarantees exponential growth. The doubling cannot overflow
// because `cap <= isize::MAX` and the type of `cap` is `usize`.
- let cap = cmp::max(self.cap * 2, required_cap);
+ let cap = cmp::max(self.cap.0 * 2, required_cap);
let cap = cmp::max(Self::MIN_NON_ZERO_CAP, cap);
let new_layout = Layout::array::<T>(cap);
// `finish_grow` is non-generic over `T`.
let ptr = finish_grow(new_layout, self.current_memory(), &mut self.alloc)?;
- self.set_ptr_and_cap(ptr, cap);
+ // SAFETY: finish_grow would have resulted in a capacity overflow if we tried to allocate more than isize::MAX items
+ unsafe { self.set_ptr_and_cap(ptr, cap) };
Ok(())
}
@@ -460,7 +490,10 @@ impl<T, A: Allocator> RawVec<T, A> {
// `finish_grow` is non-generic over `T`.
let ptr = finish_grow(new_layout, self.current_memory(), &mut self.alloc)?;
- self.set_ptr_and_cap(ptr, cap);
+ // SAFETY: finish_grow would have resulted in a capacity overflow if we tried to allocate more than isize::MAX items
+ unsafe {
+ self.set_ptr_and_cap(ptr, cap);
+ }
Ok(())
}
@@ -478,7 +511,7 @@ impl<T, A: Allocator> RawVec<T, A> {
if cap == 0 {
unsafe { self.alloc.deallocate(ptr, layout) };
self.ptr = Unique::dangling();
- self.cap = 0;
+ self.cap = Cap::ZERO;
} else {
let ptr = unsafe {
// `Layout::array` cannot overflow here because it would have
@@ -489,7 +522,10 @@ impl<T, A: Allocator> RawVec<T, A> {
.shrink(ptr, layout, new_layout)
.map_err(|_| AllocError { layout: new_layout, non_exhaustive: () })?
};
- self.set_ptr_and_cap(ptr, cap);
+ // SAFETY: if the allocation is valid, then the capacity is too
+ unsafe {
+ self.set_ptr_and_cap(ptr, cap);
+ }
}
Ok(())
}
@@ -569,6 +605,7 @@ fn alloc_guard(alloc_size: usize) -> Result<(), TryReserveError> {
// ensure that the code generation related to these panics is minimal as there's
// only one location which panics rather than a bunch throughout the module.
#[cfg(not(no_global_oom_handling))]
+#[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))]
fn capacity_overflow() -> ! {
panic!("capacity overflow");
}
diff --git a/rust/alloc/vec/into_iter.rs b/rust/alloc/vec/into_iter.rs
index aac0ec16aef1..136bfe94af6c 100644
--- a/rust/alloc/vec/into_iter.rs
+++ b/rust/alloc/vec/into_iter.rs
@@ -9,7 +9,8 @@ use crate::raw_vec::RawVec;
use core::array;
use core::fmt;
use core::iter::{
- FusedIterator, InPlaceIterable, SourceIter, TrustedLen, TrustedRandomAccessNoCoerce,
+ FusedIterator, InPlaceIterable, SourceIter, TrustedFused, TrustedLen,
+ TrustedRandomAccessNoCoerce,
};
use core::marker::PhantomData;
use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
@@ -287,9 +288,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
// Also note the implementation of `Self: TrustedRandomAccess` requires
// that `T: Copy` so reading elements from the buffer doesn't invalidate
// them for `Drop`.
- unsafe {
- if T::IS_ZST { mem::zeroed() } else { ptr::read(self.ptr.add(i)) }
- }
+ unsafe { if T::IS_ZST { mem::zeroed() } else { ptr::read(self.ptr.add(i)) } }
}
}
@@ -341,6 +340,10 @@ impl<T, A: Allocator> ExactSizeIterator for IntoIter<T, A> {
#[stable(feature = "fused", since = "1.26.0")]
impl<T, A: Allocator> FusedIterator for IntoIter<T, A> {}
+#[doc(hidden)]
+#[unstable(issue = "none", feature = "trusted_fused")]
+unsafe impl<T, A: Allocator> TrustedFused for IntoIter<T, A> {}
+
#[unstable(feature = "trusted_len", issue = "37572")]
unsafe impl<T, A: Allocator> TrustedLen for IntoIter<T, A> {}
@@ -425,7 +428,10 @@ unsafe impl<#[may_dangle] T, A: Allocator> Drop for IntoIter<T, A> {
// also refer to the vec::in_place_collect module documentation to get an overview
#[unstable(issue = "none", feature = "inplace_iteration")]
#[doc(hidden)]
-unsafe impl<T, A: Allocator> InPlaceIterable for IntoIter<T, A> {}
+unsafe impl<T, A: Allocator> InPlaceIterable for IntoIter<T, A> {
+ const EXPAND_BY: Option<NonZeroUsize> = NonZeroUsize::new(1);
+ const MERGE_BY: Option<NonZeroUsize> = NonZeroUsize::new(1);
+}
#[unstable(issue = "none", feature = "inplace_iteration")]
#[doc(hidden)]
diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs
index 41ca71805ef0..220fb9d6f45b 100644
--- a/rust/alloc/vec/mod.rs
+++ b/rust/alloc/vec/mod.rs
@@ -105,6 +105,7 @@ mod into_iter;
#[cfg(not(no_global_oom_handling))]
use self::is_zero::IsZero;
+#[cfg(not(no_global_oom_handling))]
mod is_zero;
#[cfg(not(no_global_oom_handling))]
@@ -123,7 +124,7 @@ use self::set_len_on_drop::SetLenOnDrop;
mod set_len_on_drop;
#[cfg(not(no_global_oom_handling))]
-use self::in_place_drop::{InPlaceDrop, InPlaceDstBufDrop};
+use self::in_place_drop::{InPlaceDrop, InPlaceDstDataSrcBufDrop};
#[cfg(not(no_global_oom_handling))]
mod in_place_drop;
@@ -1376,7 +1377,7 @@ impl<T, A: Allocator> Vec<T, A> {
/// [`as_mut_ptr`]: Vec::as_mut_ptr
/// [`as_ptr`]: Vec::as_ptr
#[stable(feature = "vec_as_ptr", since = "1.37.0")]
- #[cfg_attr(not(bootstrap), rustc_never_returns_null_ptr)]
+ #[rustc_never_returns_null_ptr]
#[inline]
pub fn as_ptr(&self) -> *const T {
// We shadow the slice method of the same name to avoid going through
@@ -1436,7 +1437,7 @@ impl<T, A: Allocator> Vec<T, A> {
/// [`as_mut_ptr`]: Vec::as_mut_ptr
/// [`as_ptr`]: Vec::as_ptr
#[stable(feature = "vec_as_ptr", since = "1.37.0")]
- #[cfg_attr(not(bootstrap), rustc_never_returns_null_ptr)]
+ #[rustc_never_returns_null_ptr]
#[inline]
pub fn as_mut_ptr(&mut self) -> *mut T {
// We shadow the slice method of the same name to avoid going through
@@ -1565,7 +1566,8 @@ impl<T, A: Allocator> Vec<T, A> {
#[stable(feature = "rust1", since = "1.0.0")]
pub fn swap_remove(&mut self, index: usize) -> T {
#[cold]
- #[inline(never)]
+ #[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))]
+ #[track_caller]
fn assert_failed(index: usize, len: usize) -> ! {
panic!("swap_remove index (is {index}) should be < len (is {len})");
}
@@ -1606,7 +1608,8 @@ impl<T, A: Allocator> Vec<T, A> {
#[stable(feature = "rust1", since = "1.0.0")]
pub fn insert(&mut self, index: usize, element: T) {
#[cold]
- #[inline(never)]
+ #[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))]
+ #[track_caller]
fn assert_failed(index: usize, len: usize) -> ! {
panic!("insertion index (is {index}) should be <= len (is {len})");
}
@@ -1667,7 +1670,7 @@ impl<T, A: Allocator> Vec<T, A> {
#[track_caller]
pub fn remove(&mut self, index: usize) -> T {
#[cold]
- #[inline(never)]
+ #[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))]
#[track_caller]
fn assert_failed(index: usize, len: usize) -> ! {
panic!("removal index (is {index}) should be < len (is {len})");
@@ -1891,7 +1894,32 @@ impl<T, A: Allocator> Vec<T, A> {
return;
}
- /* INVARIANT: vec.len() > read >= write > write-1 >= 0 */
+ // Check if we ever want to remove anything.
+ // This allows to use copy_non_overlapping in next cycle.
+ // And avoids any memory writes if we don't need to remove anything.
+ let mut first_duplicate_idx: usize = 1;
+ let start = self.as_mut_ptr();
+ while first_duplicate_idx != len {
+ let found_duplicate = unsafe {
+ // SAFETY: first_duplicate always in range [1..len)
+ // Note that we start iteration from 1 so we never overflow.
+ let prev = start.add(first_duplicate_idx.wrapping_sub(1));
+ let current = start.add(first_duplicate_idx);
+ // We explicitly say in docs that references are reversed.
+ same_bucket(&mut *current, &mut *prev)
+ };
+ if found_duplicate {
+ break;
+ }
+ first_duplicate_idx += 1;
+ }
+ // Don't need to remove anything.
+ // We cannot get bigger than len.
+ if first_duplicate_idx == len {
+ return;
+ }
+
+ /* INVARIANT: vec.len() > read > write > write-1 >= 0 */
struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> {
/* Offset of the element we want to check if it is duplicate */
read: usize,
@@ -1937,31 +1965,39 @@ impl<T, A: Allocator> Vec<T, A> {
}
}
- let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self };
- let ptr = gap.vec.as_mut_ptr();
-
/* Drop items while going through Vec, it should be more efficient than
* doing slice partition_dedup + truncate */
+ // Construct gap first and then drop item to avoid memory corruption if `T::drop` panics.
+ let mut gap =
+ FillGapOnDrop { read: first_duplicate_idx + 1, write: first_duplicate_idx, vec: self };
+ unsafe {
+ // SAFETY: we checked that first_duplicate_idx in bounds before.
+ // If drop panics, `gap` would remove this item without drop.
+ ptr::drop_in_place(start.add(first_duplicate_idx));
+ }
+
/* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr
* are always in-bounds and read_ptr never aliases prev_ptr */
unsafe {
while gap.read < len {
- let read_ptr = ptr.add(gap.read);
- let prev_ptr = ptr.add(gap.write.wrapping_sub(1));
+ let read_ptr = start.add(gap.read);
+ let prev_ptr = start.add(gap.write.wrapping_sub(1));
- if same_bucket(&mut *read_ptr, &mut *prev_ptr) {
+ // We explicitly say in docs that references are reversed.
+ let found_duplicate = same_bucket(&mut *read_ptr, &mut *prev_ptr);
+ if found_duplicate {
// Increase `gap.read` now since the drop may panic.
gap.read += 1;
/* We have found duplicate, drop it in-place */
ptr::drop_in_place(read_ptr);
} else {
- let write_ptr = ptr.add(gap.write);
+ let write_ptr = start.add(gap.write);
- /* Because `read_ptr` can be equal to `write_ptr`, we either
- * have to use `copy` or conditional `copy_nonoverlapping`.
- * Looks like the first option is faster. */
- ptr::copy(read_ptr, write_ptr, 1);
+ /* read_ptr cannot be equal to write_ptr because at this point
+ * we guaranteed to skip at least one element (before loop starts).
+ */
+ ptr::copy_nonoverlapping(read_ptr, write_ptr, 1);
/* We have filled that place, so go further */
gap.write += 1;
@@ -2097,6 +2133,7 @@ impl<T, A: Allocator> Vec<T, A> {
} else {
unsafe {
self.len -= 1;
+ core::intrinsics::assume(self.len < self.capacity());
Some(ptr::read(self.as_ptr().add(self.len())))
}
}
@@ -2299,7 +2336,8 @@ impl<T, A: Allocator> Vec<T, A> {
A: Clone,
{
#[cold]
- #[inline(never)]
+ #[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))]
+ #[track_caller]
fn assert_failed(at: usize, len: usize) -> ! {
panic!("`at` split index (is {at}) should be <= len (is {len})");
}
@@ -2840,6 +2878,7 @@ pub fn from_elem_in<T: Clone, A: Allocator>(elem: T, n: usize, alloc: A) -> Vec<
<T as SpecFromElem>::from_elem(elem, n, alloc)
}
+#[cfg(not(no_global_oom_handling))]
trait ExtendFromWithinSpec {
/// # Safety
///
@@ -2848,6 +2887,7 @@ trait ExtendFromWithinSpec {
unsafe fn spec_extend_from_within(&mut self, src: Range<usize>);
}
+#[cfg(not(no_global_oom_handling))]
impl<T: Clone, A: Allocator> ExtendFromWithinSpec for Vec<T, A> {
default unsafe fn spec_extend_from_within(&mut self, src: Range<usize>) {
// SAFETY:
@@ -2867,6 +2907,7 @@ impl<T: Clone, A: Allocator> ExtendFromWithinSpec for Vec<T, A> {
}
}
+#[cfg(not(no_global_oom_handling))]
impl<T: Copy, A: Allocator> ExtendFromWithinSpec for Vec<T, A> {
unsafe fn spec_extend_from_within(&mut self, src: Range<usize>) {
let count = src.len();
@@ -2947,7 +2988,7 @@ impl<T: Clone, A: Allocator + Clone> Clone for Vec<T, A> {
/// ```
/// use std::hash::BuildHasher;
///
-/// let b = std::collections::hash_map::RandomState::new();
+/// let b = std::hash::RandomState::new();
/// let v: Vec<u8> = vec![0xa8, 0x3c, 0x09];
/// let s: &[u8] = &[0xa8, 0x3c, 0x09];
/// assert_eq!(b.hash_one(v), b.hash_one(s));
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index c0cb4b05b918..65b98831b975 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -9,12 +9,13 @@
#include <kunit/test.h>
#include <linux/errname.h>
#include <linux/ethtool.h>
+#include <linux/jiffies.h>
#include <linux/mdio.h>
#include <linux/phy.h>
-#include <linux/slab.h>
#include <linux/refcount.h>
-#include <linux/wait.h>
#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
#include <linux/workqueue.h>
/* `bindgen` gets confused at certain things. */
diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs
index 4b057e837358..01ad139e19bc 100644
--- a/rust/kernel/allocator.rs
+++ b/rust/kernel/allocator.rs
@@ -35,7 +35,7 @@ unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: bindings::gf
// - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the
// function safety requirement.
// - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero
- // according to the function safety requirement) or a result from `next_power_of_two()`.
+ // according to the function safety requirement) or a result from `next_power_of_two()`.
unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags) as *mut u8 }
}
diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs
index 4f0c1edd63b7..4786d3ee1e92 100644
--- a/rust/kernel/error.rs
+++ b/rust/kernel/error.rs
@@ -264,13 +264,9 @@ pub fn to_result(err: core::ffi::c_int) -> Result {
/// pdev: &mut PlatformDevice,
/// index: u32,
/// ) -> Result<*mut core::ffi::c_void> {
-/// // SAFETY: FFI call.
-/// unsafe {
-/// from_err_ptr(bindings::devm_platform_ioremap_resource(
-/// pdev.to_ptr(),
-/// index,
-/// ))
-/// }
+/// // SAFETY: `pdev` points to a valid platform device. There are no safety requirements
+/// // on `index`.
+/// from_err_ptr(unsafe { bindings::devm_platform_ioremap_resource(pdev.to_ptr(), index) })
/// }
/// ```
// TODO: Remove `dead_code` marker once an in-kernel client is available.
diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 65be9ae57b80..424257284d16 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -36,7 +36,7 @@
//!
//! ```rust
//! # #![allow(clippy::disallowed_names)]
-//! use kernel::{prelude::*, sync::Mutex, new_mutex};
+//! use kernel::sync::{new_mutex, Mutex};
//! # use core::pin::Pin;
//! #[pin_data]
//! struct Foo {
@@ -56,7 +56,7 @@
//!
//! ```rust
//! # #![allow(clippy::disallowed_names)]
-//! # use kernel::{prelude::*, sync::Mutex, new_mutex};
+//! # use kernel::sync::{new_mutex, Mutex};
//! # use core::pin::Pin;
//! # #[pin_data]
//! # struct Foo {
@@ -79,7 +79,7 @@
//! above method only works for types where you can access the fields.
//!
//! ```rust
-//! # use kernel::{new_mutex, sync::{Arc, Mutex}};
+//! # use kernel::sync::{new_mutex, Arc, Mutex};
//! let mtx: Result<Arc<Mutex<usize>>> = Arc::pin_init(new_mutex!(42, "example::mtx"));
//! ```
//!
@@ -751,10 +751,10 @@ macro_rules! try_init {
///
/// # Safety
///
-/// When implementing this type you will need to take great care. Also there are probably very few
+/// When implementing this trait you will need to take great care. Also there are probably very few
/// cases where a manual implementation is necessary. Use [`pin_init_from_closure`] where possible.
///
-/// The [`PinInit::__pinned_init`] function
+/// The [`PinInit::__pinned_init`] function:
/// - returns `Ok(())` if it initialized every field of `slot`,
/// - returns `Err(err)` if it encountered an error and then cleaned `slot`, this means:
/// - `slot` can be deallocated without UB occurring,
@@ -861,10 +861,10 @@ where
///
/// # Safety
///
-/// When implementing this type you will need to take great care. Also there are probably very few
+/// When implementing this trait you will need to take great care. Also there are probably very few
/// cases where a manual implementation is necessary. Use [`init_from_closure`] where possible.
///
-/// The [`Init::__init`] function
+/// The [`Init::__init`] function:
/// - returns `Ok(())` if it initialized every field of `slot`,
/// - returns `Err(err)` if it encountered an error and then cleaned `slot`, this means:
/// - `slot` can be deallocated without UB occurring,
@@ -1013,7 +1013,7 @@ pub fn uninit<T, E>() -> impl Init<MaybeUninit<T>, E> {
///
/// ```rust
/// use kernel::{error::Error, init::init_array_from_fn};
-/// let array: Box<[usize; 1_000]>= Box::init::<Error>(init_array_from_fn(|i| i)).unwrap();
+/// let array: Box<[usize; 1_000]> = Box::init::<Error>(init_array_from_fn(|i| i)).unwrap();
/// assert_eq!(array.len(), 1_000);
/// ```
pub fn init_array_from_fn<I, const N: usize, T, E>(
@@ -1027,7 +1027,7 @@ where
// Counts the number of initialized elements and when dropped drops that many elements from
// `slot`.
let mut init_count = ScopeGuard::new_with_data(0, |i| {
- // We now free every element that has been initialized before:
+ // We now free every element that has been initialized before.
// SAFETY: The loop initialized exactly the values from 0..i and since we
// return `Err` below, the caller will consider the memory at `slot` as
// uninitialized.
@@ -1056,7 +1056,7 @@ where
///
/// ```rust
/// use kernel::{sync::{Arc, Mutex}, init::pin_init_array_from_fn, new_mutex};
-/// let array: Arc<[Mutex<usize>; 1_000]>=
+/// let array: Arc<[Mutex<usize>; 1_000]> =
/// Arc::pin_init(pin_init_array_from_fn(|i| new_mutex!(i))).unwrap();
/// assert_eq!(array.len(), 1_000);
/// ```
@@ -1071,7 +1071,7 @@ where
// Counts the number of initialized elements and when dropped drops that many elements from
// `slot`.
let mut init_count = ScopeGuard::new_with_data(0, |i| {
- // We now free every element that has been initialized before:
+ // We now free every element that has been initialized before.
// SAFETY: The loop initialized exactly the values from 0..i and since we
// return `Err` below, the caller will consider the memory at `slot` as
// uninitialized.
diff --git a/rust/kernel/ioctl.rs b/rust/kernel/ioctl.rs
index f1d42ab69972..cfa7d080b531 100644
--- a/rust/kernel/ioctl.rs
+++ b/rust/kernel/ioctl.rs
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-//! ioctl() number definitions
+//! `ioctl()` number definitions.
//!
//! C header: [`include/asm-generic/ioctl.h`](srctree/include/asm-generic/ioctl.h)
@@ -28,13 +28,13 @@ pub const fn _IO(ty: u32, nr: u32) -> u32 {
_IOC(uapi::_IOC_NONE, ty, nr, 0)
}
-/// Build an ioctl number for an read-only ioctl.
+/// Build an ioctl number for a read-only ioctl.
#[inline(always)]
pub const fn _IOR<T>(ty: u32, nr: u32) -> u32 {
_IOC(uapi::_IOC_READ, ty, nr, core::mem::size_of::<T>())
}
-/// Build an ioctl number for an write-only ioctl.
+/// Build an ioctl number for a write-only ioctl.
#[inline(always)]
pub const fn _IOW<T>(ty: u32, nr: u32) -> u32 {
_IOC(uapi::_IOC_WRITE, ty, nr, core::mem::size_of::<T>())
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 7ac39874aeac..be68d5e567b1 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -14,11 +14,9 @@
#![no_std]
#![feature(allocator_api)]
#![feature(coerce_unsized)]
-#![feature(const_maybe_uninit_zeroed)]
#![feature(dispatch_from_dyn)]
#![feature(new_uninit)]
#![feature(offset_of)]
-#![feature(ptr_metadata)]
#![feature(receiver_trait)]
#![feature(unsize)]
@@ -49,6 +47,7 @@ pub mod std_vendor;
pub mod str;
pub mod sync;
pub mod task;
+pub mod time;
pub mod types;
pub mod workqueue;
@@ -78,7 +77,7 @@ pub trait Module: Sized + Sync {
/// Equivalent to `THIS_MODULE` in the C API.
///
-/// C header: `include/linux/export.h`
+/// C header: [`include/linux/export.h`](srctree/include/linux/export.h)
pub struct ThisModule(*mut bindings::module);
// SAFETY: `THIS_MODULE` may be used from all threads within a module.
@@ -102,3 +101,35 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
// SAFETY: FFI call.
unsafe { bindings::BUG() };
}
+
+/// Produces a pointer to an object from a pointer to one of its fields.
+///
+/// # Safety
+///
+/// The pointer passed to this macro, and the pointer returned by this macro, must both be in
+/// bounds of the same allocation.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::container_of;
+/// struct Test {
+/// a: u64,
+/// b: u32,
+/// }
+///
+/// let test = Test { a: 10, b: 20 };
+/// let b_ptr = &test.b;
+/// // SAFETY: The pointer points at the `b` field of a `Test`, so the resulting pointer will be
+/// // in-bounds of the same allocation as `b_ptr`.
+/// let test_alias = unsafe { container_of!(b_ptr, Test, b) };
+/// assert!(core::ptr::eq(&test, test_alias));
+/// ```
+#[macro_export]
+macro_rules! container_of {
+ ($ptr:expr, $type:ty, $($f:tt)*) => {{
+ let ptr = $ptr as *const _ as *const u8;
+ let offset: usize = ::core::mem::offset_of!($type, $($f)*);
+ ptr.sub(offset) as *const $type
+ }}
+}
diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs
index e457b3c7cb2f..96e09c6e8530 100644
--- a/rust/kernel/net/phy.rs
+++ b/rust/kernel/net/phy.rs
@@ -4,7 +4,7 @@
//! Network PHY device.
//!
-//! C headers: [`include/linux/phy.h`](../../../../../../../include/linux/phy.h).
+//! C headers: [`include/linux/phy.h`](srctree/include/linux/phy.h).
use crate::{bindings, error::*, prelude::*, str::CStr, types::Opaque};
@@ -16,7 +16,7 @@ use core::marker::PhantomData;
///
/// Some of PHY drivers access to the state of PHY's software state machine.
///
-/// [`enum phy_state`]: ../../../../../../../include/linux/phy.h
+/// [`enum phy_state`]: srctree/include/linux/phy.h
#[derive(PartialEq, Eq)]
pub enum DeviceState {
/// PHY device and driver are not ready for anything.
@@ -61,7 +61,7 @@ pub enum DuplexMode {
/// Referencing a `phy_device` using this struct asserts that you are in
/// a context where all methods defined on this struct are safe to call.
///
-/// [`struct phy_device`]: ../../../../../../../include/linux/phy.h
+/// [`struct phy_device`]: srctree/include/linux/phy.h
// During the calls to most functions in [`Driver`], the C side (`PHYLIB`) holds a lock that is
// unique for every instance of [`Device`]. `PHYLIB` uses a different serialization technique for
// [`Driver::resume`] and [`Driver::suspend`]: `PHYLIB` updates `phy_device`'s state with
@@ -486,7 +486,7 @@ impl<T: Driver> Adapter<T> {
///
/// `self.0` is always in a valid state.
///
-/// [`struct phy_driver`]: ../../../../../../../include/linux/phy.h
+/// [`struct phy_driver`]: srctree/include/linux/phy.h
#[repr(transparent)]
pub struct DriverVTable(Opaque<bindings::phy_driver>);
@@ -580,12 +580,12 @@ pub trait Driver {
/// Issues a PHY software reset.
fn soft_reset(_dev: &mut Device) -> Result {
- Err(code::ENOTSUPP)
+ kernel::build_error(VTABLE_DEFAULT_ERROR)
}
/// Probes the hardware to determine what abilities it has.
fn get_features(_dev: &mut Device) -> Result {
- Err(code::ENOTSUPP)
+ kernel::build_error(VTABLE_DEFAULT_ERROR)
}
/// Returns true if this is a suitable driver for the given phydev.
@@ -597,32 +597,32 @@ pub trait Driver {
/// Configures the advertisement and resets auto-negotiation
/// if auto-negotiation is enabled.
fn config_aneg(_dev: &mut Device) -> Result {
- Err(code::ENOTSUPP)
+ kernel::build_error(VTABLE_DEFAULT_ERROR)
}
/// Determines the negotiated speed and duplex.
fn read_status(_dev: &mut Device) -> Result<u16> {
- Err(code::ENOTSUPP)
+ kernel::build_error(VTABLE_DEFAULT_ERROR)
}
/// Suspends the hardware, saving state if needed.
fn suspend(_dev: &mut Device) -> Result {
- Err(code::ENOTSUPP)
+ kernel::build_error(VTABLE_DEFAULT_ERROR)
}
/// Resumes the hardware, restoring state if needed.
fn resume(_dev: &mut Device) -> Result {
- Err(code::ENOTSUPP)
+ kernel::build_error(VTABLE_DEFAULT_ERROR)
}
/// Overrides the default MMD read function for reading a MMD register.
fn read_mmd(_dev: &mut Device, _devnum: u8, _regnum: u16) -> Result<u16> {
- Err(code::ENOTSUPP)
+ kernel::build_error(VTABLE_DEFAULT_ERROR)
}
/// Overrides the default MMD write function for writing a MMD register.
fn write_mmd(_dev: &mut Device, _devnum: u8, _regnum: u16, _val: u16) -> Result {
- Err(code::ENOTSUPP)
+ kernel::build_error(VTABLE_DEFAULT_ERROR)
}
/// Callback for notification of link change.
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index 7d848b83add4..925ced8fdc61 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -13,9 +13,102 @@ use crate::{
};
/// Byte string without UTF-8 validity guarantee.
-///
-/// `BStr` is simply an alias to `[u8]`, but has a more evident semantical meaning.
-pub type BStr = [u8];
+#[repr(transparent)]
+pub struct BStr([u8]);
+
+impl BStr {
+ /// Returns the length of this string.
+ #[inline]
+ pub const fn len(&self) -> usize {
+ self.0.len()
+ }
+
+ /// Returns `true` if the string is empty.
+ #[inline]
+ pub const fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Creates a [`BStr`] from a `[u8]`.
+ #[inline]
+ pub const fn from_bytes(bytes: &[u8]) -> &Self {
+ // SAFETY: `BStr` is transparent to `[u8]`.
+ unsafe { &*(bytes as *const [u8] as *const BStr) }
+ }
+}
+
+impl fmt::Display for BStr {
+ /// Formats printable ASCII characters, escaping the rest.
+ ///
+ /// ```
+ /// # use kernel::{fmt, b_str, str::{BStr, CString}};
+ /// let ascii = b_str!("Hello, BStr!");
+ /// let s = CString::try_from_fmt(fmt!("{}", ascii)).unwrap();
+ /// assert_eq!(s.as_bytes(), "Hello, BStr!".as_bytes());
+ ///
+ /// let non_ascii = b_str!("🦀");
+ /// let s = CString::try_from_fmt(fmt!("{}", non_ascii)).unwrap();
+ /// assert_eq!(s.as_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes());
+ /// ```
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ for &b in &self.0 {
+ match b {
+ // Common escape codes.
+ b'\t' => f.write_str("\\t")?,
+ b'\n' => f.write_str("\\n")?,
+ b'\r' => f.write_str("\\r")?,
+ // Printable characters.
+ 0x20..=0x7e => f.write_char(b as char)?,
+ _ => write!(f, "\\x{:02x}", b)?,
+ }
+ }
+ Ok(())
+ }
+}
+
+impl fmt::Debug for BStr {
+ /// Formats printable ASCII characters with a double quote on either end,
+ /// escaping the rest.
+ ///
+ /// ```
+ /// # use kernel::{fmt, b_str, str::{BStr, CString}};
+ /// // Embedded double quotes are escaped.
+ /// let ascii = b_str!("Hello, \"BStr\"!");
+ /// let s = CString::try_from_fmt(fmt!("{:?}", ascii)).unwrap();
+ /// assert_eq!(s.as_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes());
+ ///
+ /// let non_ascii = b_str!("😺");
+ /// let s = CString::try_from_fmt(fmt!("{:?}", non_ascii)).unwrap();
+ /// assert_eq!(s.as_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes());
+ /// ```
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.write_char('"')?;
+ for &b in &self.0 {
+ match b {
+ // Common escape codes.
+ b'\t' => f.write_str("\\t")?,
+ b'\n' => f.write_str("\\n")?,
+ b'\r' => f.write_str("\\r")?,
+ // String escape characters.
+ b'\"' => f.write_str("\\\"")?,
+ b'\\' => f.write_str("\\\\")?,
+ // Printable characters.
+ 0x20..=0x7e => f.write_char(b as char)?,
+ _ => write!(f, "\\x{:02x}", b)?,
+ }
+ }
+ f.write_char('"')
+ }
+}
+
+impl Deref for BStr {
+ type Target = [u8];
+
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
/// Creates a new [`BStr`] from a string literal.
///
@@ -33,7 +126,7 @@ pub type BStr = [u8];
macro_rules! b_str {
($str:literal) => {{
const S: &'static str = $str;
- const C: &'static $crate::str::BStr = S.as_bytes();
+ const C: &'static $crate::str::BStr = $crate::str::BStr::from_bytes(S.as_bytes());
C
}};
}
@@ -149,13 +242,13 @@ impl CStr {
self.0.as_ptr() as _
}
- /// Convert the string to a byte slice without the trailing 0 byte.
+ /// Convert the string to a byte slice without the trailing `NUL` byte.
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.0[..self.len()]
}
- /// Convert the string to a byte slice containing the trailing 0 byte.
+ /// Convert the string to a byte slice containing the trailing `NUL` byte.
#[inline]
pub const fn as_bytes_with_nul(&self) -> &[u8] {
&self.0
@@ -191,9 +284,9 @@ impl CStr {
/// ```
/// # use kernel::c_str;
/// # use kernel::str::CStr;
+ /// let bar = c_str!("ツ");
/// // SAFETY: String literals are guaranteed to be valid UTF-8
/// // by the Rust compiler.
- /// let bar = c_str!("ツ");
/// assert_eq!(unsafe { bar.as_str_unchecked() }, "ツ");
/// ```
#[inline]
@@ -271,7 +364,7 @@ impl fmt::Debug for CStr {
impl AsRef<BStr> for CStr {
#[inline]
fn as_ref(&self) -> &BStr {
- self.as_bytes()
+ BStr::from_bytes(self.as_bytes())
}
}
@@ -280,7 +373,7 @@ impl Deref for CStr {
#[inline]
fn deref(&self) -> &Self::Target {
- self.as_bytes()
+ self.as_ref()
}
}
@@ -327,7 +420,7 @@ where
#[inline]
fn index(&self, index: Idx) -> &Self::Output {
- &self.as_bytes()[index]
+ &self.as_ref()[index]
}
}
@@ -357,6 +450,21 @@ macro_rules! c_str {
#[cfg(test)]
mod tests {
use super::*;
+ use alloc::format;
+
+ const ALL_ASCII_CHARS: &'static str =
+ "\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\
+ \\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f \
+ !\"#$%&'()*+,-./0123456789:;<=>?@\
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f\
+ \\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f\
+ \\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f\
+ \\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf\
+ \\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\
+ \\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\
+ \\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\
+ \\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef\
+ \\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff";
#[test]
fn test_cstr_to_str() {
@@ -381,6 +489,69 @@ mod tests {
let unchecked_str = unsafe { checked_cstr.as_str_unchecked() };
assert_eq!(unchecked_str, "ðŸ§");
}
+
+ #[test]
+ fn test_cstr_display() {
+ let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap();
+ assert_eq!(format!("{}", hello_world), "hello, world!");
+ let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap();
+ assert_eq!(format!("{}", non_printables), "\\x01\\x09\\x0a");
+ let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap();
+ assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu");
+ let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap();
+ assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80");
+ }
+
+ #[test]
+ fn test_cstr_display_all_bytes() {
+ let mut bytes: [u8; 256] = [0; 256];
+ // fill `bytes` with [1..=255] + [0]
+ for i in u8::MIN..=u8::MAX {
+ bytes[i as usize] = i.wrapping_add(1);
+ }
+ let cstr = CStr::from_bytes_with_nul(&bytes).unwrap();
+ assert_eq!(format!("{}", cstr), ALL_ASCII_CHARS);
+ }
+
+ #[test]
+ fn test_cstr_debug() {
+ let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap();
+ assert_eq!(format!("{:?}", hello_world), "\"hello, world!\"");
+ let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap();
+ assert_eq!(format!("{:?}", non_printables), "\"\\x01\\x09\\x0a\"");
+ let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap();
+ assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\"");
+ let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap();
+ assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\"");
+ }
+
+ #[test]
+ fn test_bstr_display() {
+ let hello_world = BStr::from_bytes(b"hello, world!");
+ assert_eq!(format!("{}", hello_world), "hello, world!");
+ let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_");
+ assert_eq!(format!("{}", escapes), "_\\t_\\n_\\r_\\_'_\"_");
+ let others = BStr::from_bytes(b"\x01");
+ assert_eq!(format!("{}", others), "\\x01");
+ let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu");
+ assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu");
+ let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80");
+ assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80");
+ }
+
+ #[test]
+ fn test_bstr_debug() {
+ let hello_world = BStr::from_bytes(b"hello, world!");
+ assert_eq!(format!("{:?}", hello_world), "\"hello, world!\"");
+ let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_");
+ assert_eq!(format!("{:?}", escapes), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\"");
+ let others = BStr::from_bytes(b"\x01");
+ assert_eq!(format!("{:?}", others), "\"\\x01\"");
+ let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu");
+ assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\"");
+ let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80");
+ assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\"");
+ }
}
/// Allows formatting of [`fmt::Arguments`] into a raw buffer.
@@ -449,7 +620,7 @@ impl RawFormatter {
self.pos as _
}
- /// Return the number of bytes written to the formatter.
+ /// Returns the number of bytes written to the formatter.
pub(crate) fn bytes_written(&self) -> usize {
self.pos - self.beg
}
diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs
index d219ee518eff..c983f63fd56e 100644
--- a/rust/kernel/sync.rs
+++ b/rust/kernel/sync.rs
@@ -13,8 +13,9 @@ pub mod lock;
mod locked_by;
pub use arc::{Arc, ArcBorrow, UniqueArc};
-pub use condvar::CondVar;
-pub use lock::{mutex::Mutex, spinlock::SpinLock};
+pub use condvar::{new_condvar, CondVar, CondVarTimeoutResult};
+pub use lock::mutex::{new_mutex, Mutex};
+pub use lock::spinlock::{new_spinlock, SpinLock};
pub use locked_by::LockedBy;
/// Represents a lockdep class. It's a wrapper around C's `lock_class_key`.
diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs
index 77cdbcf7bd2e..7d4c4bf58388 100644
--- a/rust/kernel/sync/arc.rs
+++ b/rust/kernel/sync/arc.rs
@@ -30,7 +30,7 @@ use core::{
mem::{ManuallyDrop, MaybeUninit},
ops::{Deref, DerefMut},
pin::Pin,
- ptr::{NonNull, Pointee},
+ ptr::NonNull,
};
use macros::pin_data;
@@ -56,7 +56,7 @@ mod std_vendor;
/// b: u32,
/// }
///
-/// // Create a ref-counted instance of `Example`.
+/// // Create a refcounted instance of `Example`.
/// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
///
/// // Get a new pointer to `obj` and increment the refcount.
@@ -239,22 +239,20 @@ impl<T: ?Sized> Arc<T> {
// binary, so its layout is not so large that it can trigger arithmetic overflow.
let val_offset = unsafe { refcount_layout.extend(val_layout).unwrap_unchecked().1 };
- let metadata: <T as Pointee>::Metadata = core::ptr::metadata(ptr);
- // SAFETY: The metadata of `T` and `ArcInner<T>` is the same because `ArcInner` is a struct
- // with `T` as its last field.
+ // Pointer casts leave the metadata unchanged. This is okay because the metadata of `T` and
+ // `ArcInner<T>` is the same since `ArcInner` is a struct with `T` as its last field.
//
// This is documented at:
// <https://doc.rust-lang.org/std/ptr/trait.Pointee.html>.
- let metadata: <ArcInner<T> as Pointee>::Metadata =
- unsafe { core::mem::transmute_copy(&metadata) };
+ let ptr = ptr as *const ArcInner<T>;
+
// SAFETY: The pointer is in-bounds of an allocation both before and after offsetting the
// pointer, since it originates from a previous call to `Arc::into_raw` and is still valid.
- let ptr = unsafe { (ptr as *mut u8).sub(val_offset) as *mut () };
- let ptr = core::ptr::from_raw_parts_mut(ptr, metadata);
+ let ptr = unsafe { ptr.byte_sub(val_offset) };
// SAFETY: By the safety requirements we know that `ptr` came from `Arc::into_raw`, so the
// reference count held then will be owned by the new `Arc` object.
- unsafe { Self::from_inner(NonNull::new_unchecked(ptr)) }
+ unsafe { Self::from_inner(NonNull::new_unchecked(ptr.cast_mut())) }
}
/// Returns an [`ArcBorrow`] from the given [`Arc`].
@@ -365,12 +363,12 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
/// A borrowed reference to an [`Arc`] instance.
///
/// For cases when one doesn't ever need to increment the refcount on the allocation, it is simpler
-/// to use just `&T`, which we can trivially get from an `Arc<T>` instance.
+/// to use just `&T`, which we can trivially get from an [`Arc<T>`] instance.
///
/// However, when one may need to increment the refcount, it is preferable to use an `ArcBorrow<T>`
/// over `&Arc<T>` because the latter results in a double-indirection: a pointer (shared reference)
-/// to a pointer (`Arc<T>`) to the object (`T`). An [`ArcBorrow`] eliminates this double
-/// indirection while still allowing one to increment the refcount and getting an `Arc<T>` when/if
+/// to a pointer ([`Arc<T>`]) to the object (`T`). An [`ArcBorrow`] eliminates this double
+/// indirection while still allowing one to increment the refcount and getting an [`Arc<T>`] when/if
/// needed.
///
/// # Invariants
@@ -510,7 +508,7 @@ impl<T: ?Sized> Deref for ArcBorrow<'_, T> {
/// # test().unwrap();
/// ```
///
-/// In the following example we first allocate memory for a ref-counted `Example` but we don't
+/// In the following example we first allocate memory for a refcounted `Example` but we don't
/// initialise it on allocation. We do initialise it later with a call to [`UniqueArc::write`],
/// followed by a conversion to `Arc<Example>`. This is particularly useful when allocation happens
/// in one context (e.g., sleepable) and initialisation in another (e.g., atomic):
@@ -560,7 +558,7 @@ impl<T> UniqueArc<T> {
/// Tries to allocate a new [`UniqueArc`] instance.
pub fn try_new(value: T) -> Result<Self, AllocError> {
Ok(Self {
- // INVARIANT: The newly-created object has a ref-count of 1.
+ // INVARIANT: The newly-created object has a refcount of 1.
inner: Arc::try_new(value)?,
})
}
@@ -574,7 +572,7 @@ impl<T> UniqueArc<T> {
data <- init::uninit::<T, AllocError>(),
}? AllocError))?;
Ok(UniqueArc {
- // INVARIANT: The newly-created object has a ref-count of 1.
+ // INVARIANT: The newly-created object has a refcount of 1.
// SAFETY: The pointer from the `Box` is valid.
inner: unsafe { Arc::from_inner(Box::leak(inner).into()) },
})
diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs
index f65e19d5a37c..0c3671caffeb 100644
--- a/rust/kernel/sync/condvar.rs
+++ b/rust/kernel/sync/condvar.rs
@@ -6,8 +6,18 @@
//! variable.
use super::{lock::Backend, lock::Guard, LockClassKey};
-use crate::{bindings, init::PinInit, pin_init, str::CStr, types::Opaque};
+use crate::{
+ bindings,
+ init::PinInit,
+ pin_init,
+ str::CStr,
+ task::{MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE, TASK_NORMAL, TASK_UNINTERRUPTIBLE},
+ time::Jiffies,
+ types::Opaque,
+};
+use core::ffi::{c_int, c_long};
use core::marker::PhantomPinned;
+use core::ptr;
use macros::pin_data;
/// Creates a [`CondVar`] initialiser with the given name and a newly-created lock class.
@@ -17,6 +27,7 @@ macro_rules! new_condvar {
$crate::sync::CondVar::new($crate::optional_name!($($name)?), $crate::static_lock_class!())
};
}
+pub use new_condvar;
/// A conditional variable.
///
@@ -34,8 +45,7 @@ macro_rules! new_condvar {
/// The following is an example of using a condvar with a mutex:
///
/// ```
-/// use kernel::sync::{CondVar, Mutex};
-/// use kernel::{new_condvar, new_mutex};
+/// use kernel::sync::{new_condvar, new_mutex, CondVar, Mutex};
///
/// #[pin_data]
/// pub struct Example {
@@ -73,10 +83,12 @@ macro_rules! new_condvar {
#[pin_data]
pub struct CondVar {
#[pin]
- pub(crate) wait_list: Opaque<bindings::wait_queue_head>,
+ pub(crate) wait_queue_head: Opaque<bindings::wait_queue_head>,
/// A condvar needs to be pinned because it contains a [`struct list_head`] that is
/// self-referential, so it cannot be safely moved once it is initialised.
+ ///
+ /// [`struct list_head`]: srctree/include/linux/types.h
#[pin]
_pin: PhantomPinned,
}
@@ -96,28 +108,35 @@ impl CondVar {
_pin: PhantomPinned,
// SAFETY: `slot` is valid while the closure is called and both `name` and `key` have
// static lifetimes so they live indefinitely.
- wait_list <- Opaque::ffi_init(|slot| unsafe {
+ wait_queue_head <- Opaque::ffi_init(|slot| unsafe {
bindings::__init_waitqueue_head(slot, name.as_char_ptr(), key.as_ptr())
}),
})
}
- fn wait_internal<T: ?Sized, B: Backend>(&self, wait_state: u32, guard: &mut Guard<'_, T, B>) {
+ fn wait_internal<T: ?Sized, B: Backend>(
+ &self,
+ wait_state: c_int,
+ guard: &mut Guard<'_, T, B>,
+ timeout_in_jiffies: c_long,
+ ) -> c_long {
let wait = Opaque::<bindings::wait_queue_entry>::uninit();
// SAFETY: `wait` points to valid memory.
unsafe { bindings::init_wait(wait.get()) };
- // SAFETY: Both `wait` and `wait_list` point to valid memory.
+ // SAFETY: Both `wait` and `wait_queue_head` point to valid memory.
unsafe {
- bindings::prepare_to_wait_exclusive(self.wait_list.get(), wait.get(), wait_state as _)
+ bindings::prepare_to_wait_exclusive(self.wait_queue_head.get(), wait.get(), wait_state)
};
- // SAFETY: No arguments, switches to another thread.
- guard.do_unlocked(|| unsafe { bindings::schedule() });
+ // SAFETY: Switches to another thread. The timeout can be any number.
+ let ret = guard.do_unlocked(|| unsafe { bindings::schedule_timeout(timeout_in_jiffies) });
+
+ // SAFETY: Both `wait` and `wait_queue_head` point to valid memory.
+ unsafe { bindings::finish_wait(self.wait_queue_head.get(), wait.get()) };
- // SAFETY: Both `wait` and `wait_list` point to valid memory.
- unsafe { bindings::finish_wait(self.wait_list.get(), wait.get()) };
+ ret
}
/// Releases the lock and waits for a notification in uninterruptible mode.
@@ -127,7 +146,7 @@ impl CondVar {
/// [`CondVar::notify_one`] or [`CondVar::notify_all`]. Note that it may also wake up
/// spuriously.
pub fn wait<T: ?Sized, B: Backend>(&self, guard: &mut Guard<'_, T, B>) {
- self.wait_internal(bindings::TASK_UNINTERRUPTIBLE, guard);
+ self.wait_internal(TASK_UNINTERRUPTIBLE, guard, MAX_SCHEDULE_TIMEOUT);
}
/// Releases the lock and waits for a notification in interruptible mode.
@@ -138,29 +157,60 @@ impl CondVar {
/// Returns whether there is a signal pending.
#[must_use = "wait_interruptible returns if a signal is pending, so the caller must check the return value"]
pub fn wait_interruptible<T: ?Sized, B: Backend>(&self, guard: &mut Guard<'_, T, B>) -> bool {
- self.wait_internal(bindings::TASK_INTERRUPTIBLE, guard);
+ self.wait_internal(TASK_INTERRUPTIBLE, guard, MAX_SCHEDULE_TIMEOUT);
crate::current!().signal_pending()
}
- /// Calls the kernel function to notify the appropriate number of threads with the given flags.
- fn notify(&self, count: i32, flags: u32) {
- // SAFETY: `wait_list` points to valid memory.
+ /// Releases the lock and waits for a notification in interruptible mode.
+ ///
+ /// Atomically releases the given lock (whose ownership is proven by the guard) and puts the
+ /// thread to sleep. It wakes up when notified by [`CondVar::notify_one`] or
+ /// [`CondVar::notify_all`], or when a timeout occurs, or when the thread receives a signal.
+ #[must_use = "wait_interruptible_timeout returns if a signal is pending, so the caller must check the return value"]
+ pub fn wait_interruptible_timeout<T: ?Sized, B: Backend>(
+ &self,
+ guard: &mut Guard<'_, T, B>,
+ jiffies: Jiffies,
+ ) -> CondVarTimeoutResult {
+ let jiffies = jiffies.try_into().unwrap_or(MAX_SCHEDULE_TIMEOUT);
+ let res = self.wait_internal(TASK_INTERRUPTIBLE, guard, jiffies);
+
+ match (res as Jiffies, crate::current!().signal_pending()) {
+ (jiffies, true) => CondVarTimeoutResult::Signal { jiffies },
+ (0, false) => CondVarTimeoutResult::Timeout,
+ (jiffies, false) => CondVarTimeoutResult::Woken { jiffies },
+ }
+ }
+
+ /// Calls the kernel function to notify the appropriate number of threads.
+ fn notify(&self, count: c_int) {
+ // SAFETY: `wait_queue_head` points to valid memory.
unsafe {
bindings::__wake_up(
- self.wait_list.get(),
- bindings::TASK_NORMAL,
+ self.wait_queue_head.get(),
+ TASK_NORMAL,
count,
- flags as _,
+ ptr::null_mut(),
)
};
}
+ /// Calls the kernel function to notify one thread synchronously.
+ ///
+ /// This method behaves like `notify_one`, except that it hints to the scheduler that the
+ /// current thread is about to go to sleep, so it should schedule the target thread on the same
+ /// CPU.
+ pub fn notify_sync(&self) {
+ // SAFETY: `wait_queue_head` points to valid memory.
+ unsafe { bindings::__wake_up_sync(self.wait_queue_head.get(), TASK_NORMAL) };
+ }
+
/// Wakes a single waiter up, if any.
///
/// This is not 'sticky' in the sense that if no thread is waiting, the notification is lost
/// completely (as opposed to automatically waking up the next waiter).
pub fn notify_one(&self) {
- self.notify(1, 0);
+ self.notify(1);
}
/// Wakes all waiters up, if any.
@@ -168,6 +218,22 @@ impl CondVar {
/// This is not 'sticky' in the sense that if no thread is waiting, the notification is lost
/// completely (as opposed to automatically waking up the next waiter).
pub fn notify_all(&self) {
- self.notify(0, 0);
+ self.notify(0);
}
}
+
+/// The return type of `wait_timeout`.
+pub enum CondVarTimeoutResult {
+ /// The timeout was reached.
+ Timeout,
+ /// Somebody woke us up.
+ Woken {
+ /// Remaining sleep duration.
+ jiffies: Jiffies,
+ },
+ /// A signal occurred.
+ Signal {
+ /// Remaining sleep duration.
+ jiffies: Jiffies,
+ },
+}
diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs
index f12a684bc957..5b5c8efe427a 100644
--- a/rust/kernel/sync/lock.rs
+++ b/rust/kernel/sync/lock.rs
@@ -21,14 +21,21 @@ pub mod spinlock;
/// # Safety
///
/// - Implementers must ensure that only one thread/CPU may access the protected data once the lock
-/// is owned, that is, between calls to `lock` and `unlock`.
-/// - Implementers must also ensure that `relock` uses the same locking method as the original
-/// lock operation.
+/// is owned, that is, between calls to [`lock`] and [`unlock`].
+/// - Implementers must also ensure that [`relock`] uses the same locking method as the original
+/// lock operation.
+///
+/// [`lock`]: Backend::lock
+/// [`unlock`]: Backend::unlock
+/// [`relock`]: Backend::relock
pub unsafe trait Backend {
/// The state required by the lock.
type State;
- /// The state required to be kept between lock and unlock.
+ /// The state required to be kept between [`lock`] and [`unlock`].
+ ///
+ /// [`lock`]: Backend::lock
+ /// [`unlock`]: Backend::unlock
type GuardState;
/// Initialises the lock.
@@ -139,7 +146,7 @@ pub struct Guard<'a, T: ?Sized, B: Backend> {
unsafe impl<T: Sync + ?Sized, B: Backend> Sync for Guard<'_, T, B> {}
impl<T: ?Sized, B: Backend> Guard<'_, T, B> {
- pub(crate) fn do_unlocked(&mut self, cb: impl FnOnce()) {
+ pub(crate) fn do_unlocked<U>(&mut self, cb: impl FnOnce() -> U) -> U {
// SAFETY: The caller owns the lock, so it is safe to unlock it.
unsafe { B::unlock(self.lock.state.get(), &self.state) };
@@ -147,7 +154,7 @@ impl<T: ?Sized, B: Backend> Guard<'_, T, B> {
let _relock =
ScopeGuard::new(|| unsafe { B::relock(self.lock.state.get(), &mut self.state) });
- cb();
+ cb()
}
}
diff --git a/rust/kernel/sync/lock/mutex.rs b/rust/kernel/sync/lock/mutex.rs
index 8c524a3ec45a..ef4c4634d294 100644
--- a/rust/kernel/sync/lock/mutex.rs
+++ b/rust/kernel/sync/lock/mutex.rs
@@ -17,6 +17,7 @@ macro_rules! new_mutex {
$inner, $crate::optional_name!($($name)?), $crate::static_lock_class!())
};
}
+pub use new_mutex;
/// A mutual exclusion primitive.
///
@@ -35,7 +36,7 @@ macro_rules! new_mutex {
/// contains an inner struct (`Inner`) that is protected by a mutex.
///
/// ```
-/// use kernel::{init::InPlaceInit, init::PinInit, new_mutex, pin_init, sync::Mutex};
+/// use kernel::sync::{new_mutex, Mutex};
///
/// struct Inner {
/// a: u32,
diff --git a/rust/kernel/sync/lock/spinlock.rs b/rust/kernel/sync/lock/spinlock.rs
index 068535ce1b29..0b22c635634f 100644
--- a/rust/kernel/sync/lock/spinlock.rs
+++ b/rust/kernel/sync/lock/spinlock.rs
@@ -17,6 +17,7 @@ macro_rules! new_spinlock {
$inner, $crate::optional_name!($($name)?), $crate::static_lock_class!())
};
}
+pub use new_spinlock;
/// A spinlock.
///
@@ -33,7 +34,7 @@ macro_rules! new_spinlock {
/// contains an inner struct (`Inner`) that is protected by a spinlock.
///
/// ```
-/// use kernel::{init::InPlaceInit, init::PinInit, new_spinlock, pin_init, sync::SpinLock};
+/// use kernel::sync::{new_spinlock, SpinLock};
///
/// struct Inner {
/// a: u32,
@@ -112,7 +113,7 @@ unsafe impl super::Backend for SpinLockBackend {
unsafe fn unlock(ptr: *mut Self::State, _guard_state: &Self::GuardState) {
// SAFETY: The safety requirements of this function ensure that `ptr` is valid and that the
- // caller is the owner of the mutex.
+ // caller is the owner of the spinlock.
unsafe { bindings::spin_unlock(ptr) }
}
}
diff --git a/rust/kernel/sync/locked_by.rs b/rust/kernel/sync/locked_by.rs
index b17ee5cd98f3..babc731bd5f6 100644
--- a/rust/kernel/sync/locked_by.rs
+++ b/rust/kernel/sync/locked_by.rs
@@ -9,14 +9,17 @@ use core::{cell::UnsafeCell, mem::size_of, ptr};
/// Allows access to some data to be serialised by a lock that does not wrap it.
///
/// In most cases, data protected by a lock is wrapped by the appropriate lock type, e.g.,
-/// [`super::Mutex`] or [`super::SpinLock`]. [`LockedBy`] is meant for cases when this is not
-/// possible. For example, if a container has a lock and some data in the contained elements needs
+/// [`Mutex`] or [`SpinLock`]. [`LockedBy`] is meant for cases when this is not possible.
+/// For example, if a container has a lock and some data in the contained elements needs
/// to be protected by the same lock.
///
/// [`LockedBy`] wraps the data in lieu of another locking primitive, and only allows access to it
/// when the caller shows evidence that the 'external' lock is locked. It panics if the evidence
/// refers to the wrong instance of the lock.
///
+/// [`Mutex`]: super::Mutex
+/// [`SpinLock`]: super::SpinLock
+///
/// # Examples
///
/// The following is an example for illustrative purposes: `InnerDirectory::bytes_used` is an
diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index 9451932d5d86..ca6e7e31d71c 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -5,7 +5,23 @@
//! C header: [`include/linux/sched.h`](srctree/include/linux/sched.h).
use crate::{bindings, types::Opaque};
-use core::{marker::PhantomData, ops::Deref, ptr};
+use core::{
+ ffi::{c_int, c_long, c_uint},
+ marker::PhantomData,
+ ops::Deref,
+ ptr,
+};
+
+/// A sentinel value used for infinite timeouts.
+pub const MAX_SCHEDULE_TIMEOUT: c_long = c_long::MAX;
+
+/// Bitmask for tasks that are sleeping in an interruptible state.
+pub const TASK_INTERRUPTIBLE: c_int = bindings::TASK_INTERRUPTIBLE as c_int;
+/// Bitmask for tasks that are sleeping in an uninterruptible state.
+pub const TASK_UNINTERRUPTIBLE: c_int = bindings::TASK_UNINTERRUPTIBLE as c_int;
+/// Convenience constant for waking up tasks regardless of whether they are in interruptible or
+/// uninterruptible sleep.
+pub const TASK_NORMAL: c_uint = bindings::TASK_NORMAL as c_uint;
/// Returns the currently running task.
#[macro_export]
@@ -23,7 +39,7 @@ macro_rules! current {
///
/// All instances are valid tasks created by the C portion of the kernel.
///
-/// Instances of this type are always ref-counted, that is, a call to `get_task_struct` ensures
+/// Instances of this type are always refcounted, that is, a call to `get_task_struct` ensures
/// that the allocation remains valid at least until the matching call to `put_task_struct`.
///
/// # Examples
@@ -116,7 +132,7 @@ impl Task {
/// Returns the group leader of the given task.
pub fn group_leader(&self) -> &Task {
// SAFETY: By the type invariant, we know that `self.0` is a valid task. Valid tasks always
- // have a valid group_leader.
+ // have a valid `group_leader`.
let ptr = unsafe { *ptr::addr_of!((*self.0.get()).group_leader) };
// SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`,
@@ -147,7 +163,7 @@ impl Task {
}
}
-// SAFETY: The type invariants guarantee that `Task` is always ref-counted.
+// SAFETY: The type invariants guarantee that `Task` is always refcounted.
unsafe impl crate::types::AlwaysRefCounted for Task {
fn inc_ref(&self) {
// SAFETY: The existence of a shared reference means that the refcount is nonzero.
diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs
new file mode 100644
index 000000000000..25a896eed468
--- /dev/null
+++ b/rust/kernel/time.rs
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Time related primitives.
+//!
+//! This module contains the kernel APIs related to time and timers that
+//! have been ported or wrapped for usage by Rust code in the kernel.
+
+/// The time unit of Linux kernel. One jiffy equals (1/HZ) second.
+pub type Jiffies = core::ffi::c_ulong;
+
+/// The millisecond time unit.
+pub type Msecs = core::ffi::c_uint;
+
+/// Converts milliseconds to jiffies.
+#[inline]
+pub fn msecs_to_jiffies(msecs: Msecs) -> Jiffies {
+ // SAFETY: The `__msecs_to_jiffies` function is always safe to call no
+ // matter what the argument is.
+ unsafe { bindings::__msecs_to_jiffies(msecs) }
+}
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index fdb778e65d79..aa77bad9bce4 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -46,6 +46,25 @@ pub trait ForeignOwnable: Sized {
/// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] for
/// this object must have been dropped.
unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self;
+
+ /// Tries to convert a foreign-owned object back to a Rust-owned one.
+ ///
+ /// A convenience wrapper over [`ForeignOwnable::from_foreign`] that returns [`None`] if `ptr`
+ /// is null.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must either be null or satisfy the safety requirements for
+ /// [`ForeignOwnable::from_foreign`].
+ unsafe fn try_from_foreign(ptr: *const core::ffi::c_void) -> Option<Self> {
+ if ptr.is_null() {
+ None
+ } else {
+ // SAFETY: Since `ptr` is not null here, then `ptr` satisfies the safety requirements
+ // of `from_foreign` given the safety requirements of this function.
+ unsafe { Some(Self::from_foreign(ptr)) }
+ }
+ }
}
impl<T: 'static> ForeignOwnable for Box<T> {
@@ -90,6 +109,7 @@ impl ForeignOwnable for () {
///
/// In the example below, we have multiple exit paths and we want to log regardless of which one is
/// taken:
+///
/// ```
/// # use kernel::types::ScopeGuard;
/// fn example1(arg: bool) {
@@ -108,6 +128,7 @@ impl ForeignOwnable for () {
///
/// In the example below, we want to log the same message on all early exits but a different one on
/// the main exit path:
+///
/// ```
/// # use kernel::types::ScopeGuard;
/// fn example2(arg: bool) {
@@ -129,6 +150,7 @@ impl ForeignOwnable for () {
///
/// In the example below, we need a mutable object (the vector) to be accessible within the log
/// function, so we wrap it in the [`ScopeGuard`]:
+///
/// ```
/// # use kernel::types::ScopeGuard;
/// fn example3(arg: bool) -> Result {
diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs
index 498397877376..480cb292e7c2 100644
--- a/rust/kernel/workqueue.rs
+++ b/rust/kernel/workqueue.rs
@@ -12,19 +12,19 @@
//!
//! # The raw API
//!
-//! The raw API consists of the `RawWorkItem` trait, where the work item needs to provide an
+//! The raw API consists of the [`RawWorkItem`] trait, where the work item needs to provide an
//! arbitrary function that knows how to enqueue the work item. It should usually not be used
//! directly, but if you want to, you can use it without using the pieces from the safe API.
//!
//! # The safe API
//!
-//! The safe API is used via the `Work` struct and `WorkItem` traits. Furthermore, it also includes
-//! a trait called `WorkItemPointer`, which is usually not used directly by the user.
+//! The safe API is used via the [`Work`] struct and [`WorkItem`] traits. Furthermore, it also
+//! includes a trait called [`WorkItemPointer`], which is usually not used directly by the user.
//!
-//! * The `Work` struct is the Rust wrapper for the C `work_struct` type.
-//! * The `WorkItem` trait is implemented for structs that can be enqueued to a workqueue.
-//! * The `WorkItemPointer` trait is implemented for the pointer type that points at a something
-//! that implements `WorkItem`.
+//! * The [`Work`] struct is the Rust wrapper for the C `work_struct` type.
+//! * The [`WorkItem`] trait is implemented for structs that can be enqueued to a workqueue.
+//! * The [`WorkItemPointer`] trait is implemented for the pointer type that points at a something
+//! that implements [`WorkItem`].
//!
//! ## Example
//!
@@ -35,8 +35,7 @@
//! ```
//! use kernel::prelude::*;
//! use kernel::sync::Arc;
-//! use kernel::workqueue::{self, Work, WorkItem};
-//! use kernel::{impl_has_work, new_work};
+//! use kernel::workqueue::{self, impl_has_work, new_work, Work, WorkItem};
//!
//! #[pin_data]
//! struct MyStruct {
@@ -78,8 +77,7 @@
//! ```
//! use kernel::prelude::*;
//! use kernel::sync::Arc;
-//! use kernel::workqueue::{self, Work, WorkItem};
-//! use kernel::{impl_has_work, new_work};
+//! use kernel::workqueue::{self, impl_has_work, new_work, Work, WorkItem};
//!
//! #[pin_data]
//! struct MyStruct {
@@ -147,6 +145,7 @@ macro_rules! new_work {
$crate::workqueue::Work::new($crate::optional_name!($($name)?), $crate::static_lock_class!())
};
}
+pub use new_work;
/// A kernel work queue.
///
@@ -168,7 +167,7 @@ impl Queue {
/// # Safety
///
/// The caller must ensure that the provided raw pointer is not dangling, that it points at a
- /// valid workqueue, and that it remains valid until the end of 'a.
+ /// valid workqueue, and that it remains valid until the end of `'a`.
pub unsafe fn from_raw<'a>(ptr: *const bindings::workqueue_struct) -> &'a Queue {
// SAFETY: The `Queue` type is `#[repr(transparent)]`, so the pointer cast is valid. The
// caller promises that the pointer is not dangling.
@@ -199,7 +198,11 @@ impl Queue {
// stay valid until we call the function pointer in the `work_struct`, so the access is ok.
unsafe {
w.__enqueue(move |work_ptr| {
- bindings::queue_work_on(bindings::WORK_CPU_UNBOUND as _, queue_ptr, work_ptr)
+ bindings::queue_work_on(
+ bindings::wq_misc_consts_WORK_CPU_UNBOUND as _,
+ queue_ptr,
+ work_ptr,
+ )
})
}
}
@@ -218,7 +221,9 @@ impl Queue {
}
}
-/// A helper type used in `try_spawn`.
+/// A helper type used in [`try_spawn`].
+///
+/// [`try_spawn`]: Queue::try_spawn
#[pin_data]
struct ClosureWork<T> {
#[pin]
@@ -253,14 +258,16 @@ impl<T: FnOnce()> WorkItem for ClosureWork<T> {
/// actual value of the id is not important as long as you use different ids for different fields
/// of the same struct. (Fields of different structs need not use different ids.)
///
-/// Note that the id is used only to select the right method to call during compilation. It wont be
+/// Note that the id is used only to select the right method to call during compilation. It won't be
/// part of the final executable.
///
/// # Safety
///
-/// Implementers must ensure that any pointers passed to a `queue_work_on` closure by `__enqueue`
+/// Implementers must ensure that any pointers passed to a `queue_work_on` closure by [`__enqueue`]
/// remain valid for the duration specified in the guarantees section of the documentation for
-/// `__enqueue`.
+/// [`__enqueue`].
+///
+/// [`__enqueue`]: RawWorkItem::__enqueue
pub unsafe trait RawWorkItem<const ID: u64> {
/// The return type of [`Queue::enqueue`].
type EnqueueOutput;
@@ -290,10 +297,11 @@ pub unsafe trait RawWorkItem<const ID: u64> {
/// Defines the method that should be called directly when a work item is executed.
///
-/// This trait is implemented by `Pin<Box<T>>` and `Arc<T>`, and is mainly intended to be
+/// This trait is implemented by `Pin<Box<T>>` and [`Arc<T>`], and is mainly intended to be
/// implemented for smart pointer types. For your own structs, you would implement [`WorkItem`]
-/// instead. The `run` method on this trait will usually just perform the appropriate
-/// `container_of` translation and then call into the `run` method from the [`WorkItem`] trait.
+/// instead. The [`run`] method on this trait will usually just perform the appropriate
+/// `container_of` translation and then call into the [`run`][WorkItem::run] method from the
+/// [`WorkItem`] trait.
///
/// This trait is used when the `work_struct` field is defined using the [`Work`] helper.
///
@@ -309,8 +317,10 @@ pub unsafe trait WorkItemPointer<const ID: u64>: RawWorkItem<ID> {
///
/// # Safety
///
- /// The provided `work_struct` pointer must originate from a previous call to `__enqueue` where
- /// the `queue_work_on` closure returned true, and the pointer must still be valid.
+ /// The provided `work_struct` pointer must originate from a previous call to [`__enqueue`]
+ /// where the `queue_work_on` closure returned true, and the pointer must still be valid.
+ ///
+ /// [`__enqueue`]: RawWorkItem::__enqueue
unsafe extern "C" fn run(ptr: *mut bindings::work_struct);
}
@@ -328,12 +338,14 @@ pub trait WorkItem<const ID: u64 = 0> {
/// Links for a work item.
///
-/// This struct contains a function pointer to the `run` function from the [`WorkItemPointer`]
+/// This struct contains a function pointer to the [`run`] function from the [`WorkItemPointer`]
/// trait, and defines the linked list pointers necessary to enqueue a work item in a workqueue.
///
/// Wraps the kernel's C `struct work_struct`.
///
/// This is a helper type used to associate a `work_struct` with the [`WorkItem`] that uses it.
+///
+/// [`run`]: WorkItemPointer::run
#[repr(transparent)]
pub struct Work<T: ?Sized, const ID: u64 = 0> {
work: Opaque<bindings::work_struct>,
@@ -396,9 +408,8 @@ impl<T: ?Sized, const ID: u64> Work<T, ID> {
/// like this:
///
/// ```no_run
-/// use kernel::impl_has_work;
/// use kernel::prelude::*;
-/// use kernel::workqueue::Work;
+/// use kernel::workqueue::{impl_has_work, Work};
///
/// struct MyWorkItem {
/// work_field: Work<MyWorkItem, 1>,
@@ -409,28 +420,25 @@ impl<T: ?Sized, const ID: u64> Work<T, ID> {
/// }
/// ```
///
-/// Note that since the `Work` type is annotated with an id, you can have several `work_struct`
+/// Note that since the [`Work`] type is annotated with an id, you can have several `work_struct`
/// fields by using a different id for each one.
///
/// # Safety
///
-/// The [`OFFSET`] constant must be the offset of a field in Self of type [`Work<T, ID>`]. The methods on
-/// this trait must have exactly the behavior that the definitions given below have.
+/// The [`OFFSET`] constant must be the offset of a field in `Self` of type [`Work<T, ID>`]. The
+/// methods on this trait must have exactly the behavior that the definitions given below have.
///
-/// [`Work<T, ID>`]: Work
/// [`impl_has_work!`]: crate::impl_has_work
/// [`OFFSET`]: HasWork::OFFSET
pub unsafe trait HasWork<T, const ID: u64 = 0> {
/// The offset of the [`Work<T, ID>`] field.
- ///
- /// [`Work<T, ID>`]: Work
const OFFSET: usize;
/// Returns the offset of the [`Work<T, ID>`] field.
///
- /// This method exists because the [`OFFSET`] constant cannot be accessed if the type is not Sized.
+ /// This method exists because the [`OFFSET`] constant cannot be accessed if the type is not
+ /// [`Sized`].
///
- /// [`Work<T, ID>`]: Work
/// [`OFFSET`]: HasWork::OFFSET
#[inline]
fn get_work_offset(&self) -> usize {
@@ -442,8 +450,6 @@ pub unsafe trait HasWork<T, const ID: u64 = 0> {
/// # Safety
///
/// The provided pointer must point at a valid struct of type `Self`.
- ///
- /// [`Work<T, ID>`]: Work
#[inline]
unsafe fn raw_get_work(ptr: *mut Self) -> *mut Work<T, ID> {
// SAFETY: The caller promises that the pointer is valid.
@@ -455,8 +461,6 @@ pub unsafe trait HasWork<T, const ID: u64 = 0> {
/// # Safety
///
/// The pointer must point at a [`Work<T, ID>`] field in a struct of type `Self`.
- ///
- /// [`Work<T, ID>`]: Work
#[inline]
unsafe fn work_container_of(ptr: *mut Work<T, ID>) -> *mut Self
where
@@ -473,9 +477,8 @@ pub unsafe trait HasWork<T, const ID: u64 = 0> {
/// # Examples
///
/// ```
-/// use kernel::impl_has_work;
/// use kernel::sync::Arc;
-/// use kernel::workqueue::{self, Work};
+/// use kernel::workqueue::{self, impl_has_work, Work};
///
/// struct MyStruct {
/// work_field: Work<MyStruct, 17>,
@@ -485,8 +488,6 @@ pub unsafe trait HasWork<T, const ID: u64 = 0> {
/// impl HasWork<MyStruct, 17> for MyStruct { self.work_field }
/// }
/// ```
-///
-/// [`HasWork<T, ID>`]: HasWork
#[macro_export]
macro_rules! impl_has_work {
($(impl$(<$($implarg:ident),*>)?
@@ -509,6 +510,7 @@ macro_rules! impl_has_work {
}
)*};
}
+pub use impl_has_work;
impl_has_work! {
impl<T> HasWork<Self> for ClosureWork<T> { self.work }
diff --git a/rust/macros/module.rs b/rust/macros/module.rs
index d62d8710d77a..27979e582e4b 100644
--- a/rust/macros/module.rs
+++ b/rust/macros/module.rs
@@ -222,10 +222,15 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
}};
// Loadable modules need to export the `{{init,cleanup}}_module` identifiers.
+ /// # Safety
+ ///
+ /// This function must not be called after module initialization, because it may be
+ /// freed after that completes.
#[cfg(MODULE)]
#[doc(hidden)]
#[no_mangle]
- pub extern \"C\" fn init_module() -> core::ffi::c_int {{
+ #[link_section = \".init.text\"]
+ pub unsafe extern \"C\" fn init_module() -> core::ffi::c_int {{
__init()
}}
diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h
index 7048bb3594d6..634e81d83efd 100644
--- a/samples/bpf/asm_goto_workaround.h
+++ b/samples/bpf/asm_goto_workaround.h
@@ -4,14 +4,14 @@
#define __ASM_GOTO_WORKAROUND_H
/*
- * This will bring in asm_volatile_goto and asm_inline macro definitions
+ * This will bring in asm_goto_output and asm_inline macro definitions
* if enabled by compiler and config options.
*/
#include <linux/types.h>
-#ifdef asm_volatile_goto
-#undef asm_volatile_goto
-#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
+#ifdef asm_goto_output
+#undef asm_goto_output
+#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output")
#endif
/*
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index d2fbcf963cdf..07ff471ed6ae 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -370,7 +370,7 @@ static void run_perf_test(int tasks)
static void fill_lpm_trie(void)
{
- struct bpf_lpm_trie_key *key;
+ struct bpf_lpm_trie_key_u8 *key;
unsigned long value = 0;
unsigned int i;
int r;
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 9d41db09c480..266fdd0b025d 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -91,7 +91,7 @@ static int recv_msg(struct sockaddr_nl sock_addr, int sock)
static void read_route(struct nlmsghdr *nh, int nll)
{
char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
- struct bpf_lpm_trie_key *prefix_key;
+ struct bpf_lpm_trie_key_u8 *prefix_key;
struct rtattr *rt_attr;
struct rtmsg *rt_msg;
int rtm_family;
diff --git a/samples/cgroup/.gitignore b/samples/cgroup/.gitignore
new file mode 100644
index 000000000000..3a0161194cce
--- /dev/null
+++ b/samples/cgroup/.gitignore
@@ -0,0 +1,3 @@
+/cgroup_event_listener
+/memcg_event_listener
+
diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c
index 20291ec6489f..a23fec357b5d 100644
--- a/samples/seccomp/user-trap.c
+++ b/samples/seccomp/user-trap.c
@@ -33,6 +33,7 @@ static int send_fd(int sock, int fd)
{
struct msghdr msg = {};
struct cmsghdr *cmsg;
+ int *fd_ptr;
char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c';
struct iovec io = {
.iov_base = &c,
@@ -47,7 +48,8 @@ static int send_fd(int sock, int fd)
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
- *((int *)CMSG_DATA(cmsg)) = fd;
+ fd_ptr = (int *)CMSG_DATA(cmsg);
+ *fd_ptr = fd;
msg.msg_controllen = cmsg->cmsg_len;
if (sendmsg(sock, &msg, 0) < 0) {
@@ -62,6 +64,7 @@ static int recv_fd(int sock)
{
struct msghdr msg = {};
struct cmsghdr *cmsg;
+ int *fd_ptr;
char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c';
struct iovec io = {
.iov_base = &c,
@@ -79,8 +82,9 @@ static int recv_fd(int sock)
}
cmsg = CMSG_FIRSTHDR(&msg);
+ fd_ptr = (int *)CMSG_DATA(cmsg);
- return *((int *)CMSG_DATA(cmsg));
+ return *fd_ptr;
}
static int user_trap_syscall(int nr, unsigned int flags)
diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include
index 5a84b6443875..3ee8ecfb8c04 100644
--- a/scripts/Kconfig.include
+++ b/scripts/Kconfig.include
@@ -33,7 +33,7 @@ ld-option = $(success,$(LD) -v $(1))
# $(as-instr,<instr>)
# Return y if the assembler supports <instr>, n otherwise
-as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -c -x assembler-with-cpp -o /dev/null -)
+as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o /dev/null -)
# check if $(CC) and $(LD) exist
$(error-if,$(failure,command -v $(CC)),C compiler '$(CC)' not found)
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index dae447a1ad30..0fb7a785594c 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -290,7 +290,7 @@ quiet_cmd_rustc_o_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@
cmd_rustc_o_rs = $(rust_common_cmd) --emit=obj=$@ $<
$(obj)/%.o: $(src)/%.rs FORCE
- $(call if_changed_dep,rustc_o_rs)
+ +$(call if_changed_dep,rustc_o_rs)
quiet_cmd_rustc_rsi_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@
cmd_rustc_rsi_rs = \
@@ -298,19 +298,19 @@ quiet_cmd_rustc_rsi_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@
command -v $(RUSTFMT) >/dev/null && $(RUSTFMT) $@
$(obj)/%.rsi: $(src)/%.rs FORCE
- $(call if_changed_dep,rustc_rsi_rs)
+ +$(call if_changed_dep,rustc_rsi_rs)
quiet_cmd_rustc_s_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@
cmd_rustc_s_rs = $(rust_common_cmd) --emit=asm=$@ $<
$(obj)/%.s: $(src)/%.rs FORCE
- $(call if_changed_dep,rustc_s_rs)
+ +$(call if_changed_dep,rustc_s_rs)
quiet_cmd_rustc_ll_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@
cmd_rustc_ll_rs = $(rust_common_cmd) --emit=llvm-ir=$@ $<
$(obj)/%.ll: $(src)/%.rs FORCE
- $(call if_changed_dep,rustc_ll_rs)
+ +$(call if_changed_dep,rustc_ll_rs)
# Compile assembler sources (.S)
# ---------------------------------------------------------------------------
diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler
index 8fcb427405a6..92be0c9a13ee 100644
--- a/scripts/Makefile.compiler
+++ b/scripts/Makefile.compiler
@@ -38,7 +38,7 @@ as-option = $(call try-run,\
# Usage: aflags-y += $(call as-instr,instr,option1,option2)
as-instr = $(call try-run,\
- printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
+ printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
# __cc-option
# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)
diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf
index ab271b2051a2..226ea3df3b4b 100644
--- a/scripts/Makefile.defconf
+++ b/scripts/Makefile.defconf
@@ -9,8 +9,8 @@
# Input config fragments without '.config' suffix
define merge_into_defconfig
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
- -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \
- $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config)
+ -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \
+ $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config)
+$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
endef
@@ -23,7 +23,7 @@ endef
# Input config fragments without '.config' suffix
define merge_into_defconfig_override
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
- -Q -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \
- $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config)
+ -Q -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \
+ $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config)
+$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
endef
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 9b7a37ae28a8..a9e552a1e910 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -97,7 +97,6 @@ KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
KBUILD_CFLAGS += $(call cc-option, -Wformat-overflow)
KBUILD_CFLAGS += $(call cc-option, -Wformat-truncation)
-KBUILD_CFLAGS += $(call cc-option, -Wstringop-overflow)
KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
KBUILD_CPPFLAGS += -Wundef
@@ -113,7 +112,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
-KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
ifdef CONFIG_CC_IS_CLANG
diff --git a/scripts/Makefile.host b/scripts/Makefile.host
index 08d83d9db31a..3c17e6ba421c 100644
--- a/scripts/Makefile.host
+++ b/scripts/Makefile.host
@@ -156,7 +156,7 @@ quiet_cmd_host-rust = HOSTRUSTC $@
cmd_host-rust = \
$(HOSTRUSTC) $(hostrust_flags) --emit=link=$@ $<
$(host-rust): $(obj)/%: $(src)/%.rs FORCE
- $(call if_changed_dep,host-rust)
+ +$(call if_changed_dep,host-rust)
targets += $(host-csingle) $(host-cmulti) $(host-cobjs) \
$(host-cxxmulti) $(host-cxxobjs) $(host-rust)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index cd5b181060f1..1bd59b8db05f 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -175,8 +175,11 @@ endif
ifeq ($(CONFIG_UBSAN),y)
_c_flags += $(if $(patsubst n%,, \
- $(UBSAN_SANITIZE_$(basetarget).o)$(UBSAN_SANITIZE)$(CONFIG_UBSAN_SANITIZE_ALL)), \
+ $(UBSAN_SANITIZE_$(basetarget).o)$(UBSAN_SANITIZE)y), \
$(CFLAGS_UBSAN))
+_c_flags += $(if $(patsubst n%,, \
+ $(UBSAN_SIGNED_WRAP_$(basetarget).o)$(UBSAN_SANITIZE_$(basetarget).o)$(UBSAN_SIGNED_WRAP)$(UBSAN_SANITIZE)y), \
+ $(CFLAGS_UBSAN_SIGNED_WRAP))
endif
ifeq ($(CONFIG_KCOV),y)
@@ -254,7 +257,7 @@ objtool := $(objtree)/tools/objtool/objtool
objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label
objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr
-objtool-args-$(CONFIG_CALL_DEPTH_TRACKING) += --hacks=skylake
+objtool-args-$(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) += --hacks=skylake
objtool-args-$(CONFIG_X86_KERNEL_IBT) += --ibt
objtool-args-$(CONFIG_FINEIBT) += --cfi
objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL) += --mcount
@@ -262,9 +265,9 @@ ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL
objtool-args-$(CONFIG_HAVE_OBJTOOL_NOP_MCOUNT) += --mnop
endif
objtool-args-$(CONFIG_UNWINDER_ORC) += --orc
-objtool-args-$(CONFIG_RETPOLINE) += --retpoline
-objtool-args-$(CONFIG_RETHUNK) += --rethunk
-objtool-args-$(CONFIG_SLS) += --sls
+objtool-args-$(CONFIG_MITIGATION_RETPOLINE) += --retpoline
+objtool-args-$(CONFIG_MITIGATION_RETHUNK) += --rethunk
+objtool-args-$(CONFIG_MITIGATION_SLS) += --sls
objtool-args-$(CONFIG_STACK_VALIDATION) += --stackval
objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE) += --static-call
objtool-args-$(CONFIG_HAVE_UACCESS_VALIDATION) += --uaccess
@@ -340,7 +343,7 @@ quiet_cmd_gzip = GZIP $@
# DTC
# ---------------------------------------------------------------------------
DTC ?= $(objtree)/scripts/dtc/dtc
-DTC_FLAGS += -Wno-interrupt_provider \
+DTC_FLAGS += \
-Wno-unique_unit_address
# Disable noisy checks by default
@@ -358,7 +361,6 @@ endif
ifneq ($(findstring 2,$(KBUILD_EXTRA_WARN)),)
DTC_FLAGS += -Wnode_name_chars_strict \
-Wproperty_name_chars_strict \
- -Winterrupt_provider \
-Wunique_unit_address
endif
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 4749865c1b2c..b2d3b273b802 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -10,6 +10,9 @@ ubsan-cflags-$(CONFIG_UBSAN_DIV_ZERO) += -fsanitize=integer-divide-by-zero
ubsan-cflags-$(CONFIG_UBSAN_UNREACHABLE) += -fsanitize=unreachable
ubsan-cflags-$(CONFIG_UBSAN_BOOL) += -fsanitize=bool
ubsan-cflags-$(CONFIG_UBSAN_ENUM) += -fsanitize=enum
-ubsan-cflags-$(CONFIG_UBSAN_TRAP) += -fsanitize-undefined-trap-on-error
+ubsan-cflags-$(CONFIG_UBSAN_TRAP) += $(call cc-option,-fsanitize-trap=undefined,-fsanitize-undefined-trap-on-error)
export CFLAGS_UBSAN := $(ubsan-cflags-y)
+
+ubsan-signed-wrap-cflags-$(CONFIG_UBSAN_SIGNED_WRAP) += -fsanitize=signed-integer-overflow
+export CFLAGS_UBSAN_SIGNED_WRAP := $(ubsan-signed-wrap-cflags-y)
diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o
index 25b3b587d37c..6de297916ce6 100644
--- a/scripts/Makefile.vmlinux_o
+++ b/scripts/Makefile.vmlinux_o
@@ -38,7 +38,7 @@ objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION))
vmlinux-objtool-args-$(delay-objtool) += $(objtool-args-y)
vmlinux-objtool-args-$(CONFIG_GCOV_KERNEL) += --no-unreachable
vmlinux-objtool-args-$(CONFIG_NOINSTR_VALIDATION) += --noinstr \
- $(if $(or $(CONFIG_CPU_UNRET_ENTRY),$(CONFIG_CPU_SRSO)), --unret)
+ $(if $(or $(CONFIG_MITIGATION_UNRET_ENTRY),$(CONFIG_MITIGATION_SRSO)), --unret)
objtool-args = $(vmlinux-objtool-args-y) --link
diff --git a/scripts/atomic/kerneldoc/add_unless b/scripts/atomic/kerneldoc/add_unless
index f828e5f6750c..fbc2fadfbdc4 100644
--- a/scripts/atomic/kerneldoc/add_unless
+++ b/scripts/atomic/kerneldoc/add_unless
@@ -10,6 +10,7 @@ cat <<EOF
* @u: ${int} value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with ${desc_order} ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* ${desc_noinstr}
*
diff --git a/scripts/atomic/kerneldoc/cmpxchg b/scripts/atomic/kerneldoc/cmpxchg
index 3bce328f50cf..02b24ee9d8a4 100644
--- a/scripts/atomic/kerneldoc/cmpxchg
+++ b/scripts/atomic/kerneldoc/cmpxchg
@@ -6,6 +6,7 @@ cat <<EOF
* @new: ${int} value to assign
*
* If (@v == @old), atomically updates @v to @new with ${desc_order} ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* ${desc_noinstr}
*
diff --git a/scripts/atomic/kerneldoc/dec_if_positive b/scripts/atomic/kerneldoc/dec_if_positive
index 04f1aed3cf83..9468b4a69603 100644
--- a/scripts/atomic/kerneldoc/dec_if_positive
+++ b/scripts/atomic/kerneldoc/dec_if_positive
@@ -4,6 +4,7 @@ cat <<EOF
* @v: pointer to ${atomic}_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with ${desc_order} ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* ${desc_noinstr}
*
diff --git a/scripts/atomic/kerneldoc/dec_unless_positive b/scripts/atomic/kerneldoc/dec_unless_positive
index ee73612f0354..06a678678f71 100644
--- a/scripts/atomic/kerneldoc/dec_unless_positive
+++ b/scripts/atomic/kerneldoc/dec_unless_positive
@@ -4,6 +4,7 @@ cat <<EOF
* @v: pointer to ${atomic}_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with ${desc_order} ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* ${desc_noinstr}
*
diff --git a/scripts/atomic/kerneldoc/inc_not_zero b/scripts/atomic/kerneldoc/inc_not_zero
index 618be08e653e..c1a30fc66ee9 100644
--- a/scripts/atomic/kerneldoc/inc_not_zero
+++ b/scripts/atomic/kerneldoc/inc_not_zero
@@ -4,6 +4,7 @@ cat <<EOF
* @v: pointer to ${atomic}_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with ${desc_order} ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* ${desc_noinstr}
*
diff --git a/scripts/atomic/kerneldoc/inc_unless_negative b/scripts/atomic/kerneldoc/inc_unless_negative
index 597f23d4dc8d..ece0d2c7b38f 100644
--- a/scripts/atomic/kerneldoc/inc_unless_negative
+++ b/scripts/atomic/kerneldoc/inc_unless_negative
@@ -4,6 +4,7 @@ cat <<EOF
* @v: pointer to ${atomic}_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with ${desc_order} ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* ${desc_noinstr}
*
diff --git a/scripts/atomic/kerneldoc/try_cmpxchg b/scripts/atomic/kerneldoc/try_cmpxchg
index 296553206c06..3ccff29538f5 100644
--- a/scripts/atomic/kerneldoc/try_cmpxchg
+++ b/scripts/atomic/kerneldoc/try_cmpxchg
@@ -6,7 +6,8 @@ cat <<EOF
* @new: ${int} value to assign
*
* If (@v == @old), atomically updates @v to @new with ${desc_order} ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* ${desc_noinstr}
*
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 61b7dddedc46..4606944984ee 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -513,7 +513,7 @@ eBPF programs can have an associated license, passed along with the bytecode
instructions to the kernel when the programs are loaded. The format for that
string is identical to the one in use for kernel modules (Dual licenses, such
as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
-programs that are compatible with the GNU Privacy License (GPL).
+programs that are compatible with the GNU General Public License (GNU GPL).
In order to use such helpers, the eBPF program must be loaded with the correct
license string passed (via **attr**) to the **bpf**\\ () system call, and this
@@ -827,7 +827,7 @@ class PrinterHelpers(Printer):
print(' *{}{}'.format(' \t' if line else '', line))
print(' */')
- print('static %s %s(*%s)(' % (self.map_type(proto['ret_type']),
+ print('static %s %s(* const %s)(' % (self.map_type(proto['ret_type']),
proto['ret_star'], proto['name']), end='')
comma = ''
for i, a in enumerate(proto['args']):
diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py
index 5dea4479240b..e4fb686dfaa9 100755
--- a/scripts/clang-tools/gen_compile_commands.py
+++ b/scripts/clang-tools/gen_compile_commands.py
@@ -170,7 +170,7 @@ def process_line(root_directory, command_prefix, file_path):
# escape the pound sign '#', either as '\#' or '$(pound)' (depending on the
# kernel version). The compile_commands.json file is not interepreted
# by Make, so this code replaces the escaped version with '#'.
- prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#')
+ prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#')
# Return the canonical path, eliminating any symbolic links encountered in the path.
abs_path = os.path.realpath(os.path.join(root_directory, file_path))
diff --git a/scripts/coccinelle/api/string_choices.cocci b/scripts/coccinelle/api/string_choices.cocci
new file mode 100644
index 000000000000..a71966c0494e
--- /dev/null
+++ b/scripts/coccinelle/api/string_choices.cocci
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/// Find places to use string_choices.h's various helpers.
+//
+// Confidence: Medium
+// Options: --no-includes --include-headers
+virtual patch
+virtual context
+virtual report
+
+@str_plural depends on patch@
+expression E;
+@@
+(
+- ((E == 1) ? "" : "s")
++ str_plural(E)
+|
+- ((E != 1) ? "s" : "")
++ str_plural(E)
+|
+- ((E > 1) ? "s" : "")
++ str_plural(E)
+)
+
+@str_plural_r depends on !patch exists@
+expression E;
+position P;
+@@
+(
+* ((E@P == 1) ? "" : "s")
+|
+* ((E@P != 1) ? "s" : "")
+|
+* ((E@P > 1) ? "s" : "")
+)
+
+@script:python depends on report@
+p << str_plural_r.P;
+e << str_plural_r.E;
+@@
+
+coccilib.report.print_report(p[0], "opportunity for str_plural(%s)" % e)
diff --git a/scripts/coccinelle/misc/struct_size.cocci b/scripts/coccinelle/misc/struct_size.cocci
new file mode 100644
index 000000000000..9b02c37438e4
--- /dev/null
+++ b/scripts/coccinelle/misc/struct_size.cocci
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+///
+/// Check for code that could use struct_size().
+///
+// Confidence: Medium
+// Author: Jacob Keller <jacob.e.keller@intel.com>
+// Copyright: (C) 2023 Intel Corporation
+// Options: --no-includes --include-headers
+
+virtual patch
+virtual context
+virtual org
+virtual report
+
+// the overflow Kunit tests have some code which intentionally does not use
+// the macros, so we want to ignore this code when reporting potential
+// issues.
+@overflow_tests@
+identifier f = overflow_size_helpers_test;
+@@
+
+f
+
+//----------------------------------------------------------
+// For context mode
+//----------------------------------------------------------
+
+@depends on !overflow_tests && context@
+expression E1, E2;
+identifier m;
+@@
+(
+* (sizeof(*E1) + (E2 * sizeof(*E1->m)))
+)
+
+//----------------------------------------------------------
+// For patch mode
+//----------------------------------------------------------
+
+@depends on !overflow_tests && patch@
+expression E1, E2;
+identifier m;
+@@
+(
+- (sizeof(*E1) + (E2 * sizeof(*E1->m)))
++ struct_size(E1, m, E2)
+)
+
+//----------------------------------------------------------
+// For org and report mode
+//----------------------------------------------------------
+
+@r depends on !overflow_tests && (org || report)@
+expression E1, E2;
+identifier m;
+position p;
+@@
+(
+ (sizeof(*E1)@p + (E2 * sizeof(*E1->m)))
+)
+
+@script:python depends on org@
+p << r.p;
+@@
+
+coccilib.org.print_todo(p[0], "WARNING should use struct_size")
+
+@script:python depends on report@
+p << r.p;
+@@
+
+msg="WARNING: Use struct_size"
+coccilib.report.print_report(p[0], msg)
+
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index e810e0c27ff1..10fadc238719 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -139,7 +139,7 @@ LX_CONFIG(CONFIG_ARM64_64K_PAGES)
if IS_BUILTIN(CONFIG_ARM64):
LX_VALUE(CONFIG_ARM64_PA_BITS)
LX_VALUE(CONFIG_ARM64_VA_BITS)
- LX_VALUE(CONFIG_ARM64_PAGE_SHIFT)
+ LX_VALUE(CONFIG_PAGE_SHIFT)
LX_VALUE(CONFIG_ARCH_FORCE_MAX_ORDER)
LX_CONFIG(CONFIG_SPARSEMEM)
LX_CONFIG(CONFIG_SPARSEMEM_EXTREME)
diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py
index ad5641dcb068..515730fd4c9d 100644
--- a/scripts/gdb/linux/mm.py
+++ b/scripts/gdb/linux/mm.py
@@ -41,7 +41,7 @@ class aarch64_page_ops():
self.SECTION_SIZE_BITS = 27
self.MAX_PHYSMEM_BITS = constants.LX_CONFIG_ARM64_VA_BITS
- self.PAGE_SHIFT = constants.LX_CONFIG_ARM64_PAGE_SHIFT
+ self.PAGE_SHIFT = constants.LX_CONFIG_PAGE_SHIFT
self.PAGE_SIZE = 1 << self.PAGE_SHIFT
self.PAGE_MASK = (~(self.PAGE_SIZE - 1)) & ((1 << 64) - 1)
diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py
index c8047f4441e6..e8316beb17a7 100644
--- a/scripts/gdb/linux/symbols.py
+++ b/scripts/gdb/linux/symbols.py
@@ -82,7 +82,7 @@ lx-symbols command."""
self.module_files_updated = True
def _get_module_file(self, module_name):
- module_pattern = ".*/{0}\.ko(?:.debug)?$".format(
+ module_pattern = r".*/{0}\.ko(?:.debug)?$".format(
module_name.replace("_", r"[_\-]"))
for name in self.module_files:
if re.match(module_pattern, name) and os.path.exists(name):
diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs
index 0da52b548ba5..19f72bfdbb82 100644
--- a/scripts/generate_rust_target.rs
+++ b/scripts/generate_rust_target.rs
@@ -155,7 +155,7 @@ fn main() {
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
);
let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string();
- if cfg.has("RETPOLINE") {
+ if cfg.has("MITIGATION_RETPOLINE") {
features += ",+retpoline-external-thunk";
}
ts.push("features", features);
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index 3e808528aaea..e9e9fb8d8674 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -345,6 +345,8 @@ void sym_calc_value(struct symbol *sym)
oldval = sym->curr;
+ newval.tri = no;
+
switch (sym->type) {
case S_INT:
newval.val = "0";
@@ -357,7 +359,7 @@ void sym_calc_value(struct symbol *sym)
break;
case S_BOOLEAN:
case S_TRISTATE:
- newval = symbol_no.curr;
+ newval.val = "n";
break;
default:
sym->curr.val = sym->name;
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index e8aefd258a29..71a89d8832c1 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1,5 +1,6 @@
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0
+# vim: softtabstop=4
use warnings;
use strict;
@@ -81,49 +82,51 @@ my $type_member_func = $type_member . '\(\)';
# these are pretty rough
my @highlights_man = (
- [$type_constant, "\$1"],
- [$type_constant2, "\$1"],
- [$type_func, "\\\\fB\$1\\\\fP"],
- [$type_enum, "\\\\fI\$1\\\\fP"],
- [$type_struct, "\\\\fI\$1\\\\fP"],
- [$type_typedef, "\\\\fI\$1\\\\fP"],
- [$type_union, "\\\\fI\$1\\\\fP"],
- [$type_param, "\\\\fI\$1\\\\fP"],
- [$type_param_ref, "\\\\fI\$1\$2\\\\fP"],
- [$type_member, "\\\\fI\$1\$2\$3\\\\fP"],
- [$type_fallback, "\\\\fI\$1\\\\fP"]
- );
+ [$type_constant, "\$1"],
+ [$type_constant2, "\$1"],
+ [$type_func, "\\\\fB\$1\\\\fP"],
+ [$type_enum, "\\\\fI\$1\\\\fP"],
+ [$type_struct, "\\\\fI\$1\\\\fP"],
+ [$type_typedef, "\\\\fI\$1\\\\fP"],
+ [$type_union, "\\\\fI\$1\\\\fP"],
+ [$type_param, "\\\\fI\$1\\\\fP"],
+ [$type_param_ref, "\\\\fI\$1\$2\\\\fP"],
+ [$type_member, "\\\\fI\$1\$2\$3\\\\fP"],
+ [$type_fallback, "\\\\fI\$1\\\\fP"]
+ );
my $blankline_man = "";
# rst-mode
my @highlights_rst = (
- [$type_constant, "``\$1``"],
- [$type_constant2, "``\$1``"],
- # Note: need to escape () to avoid func matching later
- [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"],
- [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"],
- [$type_fp_param, "**\$1\\\\(\\\\)**"],
- [$type_fp_param2, "**\$1\\\\(\\\\)**"],
- [$type_func, "\$1()"],
- [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"],
- [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"],
- [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"],
- [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"],
- # in rst this can refer to any type
- [$type_fallback, "\\:c\\:type\\:`\$1`"],
- [$type_param_ref, "**\$1\$2**"]
- );
+ [$type_constant, "``\$1``"],
+ [$type_constant2, "``\$1``"],
+
+ # Note: need to escape () to avoid func matching later
+ [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"],
+ [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"],
+ [$type_fp_param, "**\$1\\\\(\\\\)**"],
+ [$type_fp_param2, "**\$1\\\\(\\\\)**"],
+ [$type_func, "\$1()"],
+ [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"],
+ [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"],
+ [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"],
+ [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"],
+
+ # in rst this can refer to any type
+ [$type_fallback, "\\:c\\:type\\:`\$1`"],
+ [$type_param_ref, "**\$1\$2**"]
+ );
my $blankline_rst = "\n";
# read arguments
if ($#ARGV == -1) {
- pod2usage(
- -message => "No arguments!\n",
- -exitval => 1,
- -verbose => 99,
- -sections => 'SYNOPSIS',
- -output => \*STDERR,
- );
+ pod2usage(
+ -message => "No arguments!\n",
+ -exitval => 1,
+ -verbose => 99,
+ -sections => 'SYNOPSIS',
+ -output => \*STDERR,
+ );
}
my $kernelversion;
@@ -164,9 +167,9 @@ if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) &&
}
my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
- 'July', 'August', 'September', 'October',
- 'November', 'December')[$build_time[4]] .
- " " . ($build_time[5]+1900);
+ 'July', 'August', 'September', 'October',
+ 'November', 'December')[$build_time[4]] .
+ " " . ($build_time[5]+1900);
# Essentially these are globals.
# They probably want to be tidied up, made more localised or something.
@@ -179,22 +182,22 @@ my ($type, $declaration_name, $return_type);
my ($newsection, $newcontents, $prototype, $brcount, %source_map);
if (defined($ENV{'KBUILD_VERBOSE'}) && $ENV{'KBUILD_VERBOSE'} =~ '1') {
- $verbose = 1;
+ $verbose = 1;
}
if (defined($ENV{'KCFLAGS'})) {
- my $kcflags = "$ENV{'KCFLAGS'}";
+ my $kcflags = "$ENV{'KCFLAGS'}";
- if ($kcflags =~ /(\s|^)-Werror(\s|$)/) {
- $Werror = 1;
- }
+ if ($kcflags =~ /(\s|^)-Werror(\s|$)/) {
+ $Werror = 1;
+ }
}
# reading this variable is for backwards compat just in case
# someone was calling it with the variable from outside the
# kernel's build system
if (defined($ENV{'KDOC_WERROR'})) {
- $Werror = "$ENV{'KDOC_WERROR'}";
+ $Werror = "$ENV{'KDOC_WERROR'}";
}
# other environment variables are converted to command-line
# arguments in cmd_checkdoc in the build system
@@ -295,90 +298,90 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
my $cmd = $1;
shift @ARGV;
if ($cmd eq "man") {
- $output_mode = "man";
- @highlights = @highlights_man;
- $blankline = $blankline_man;
+ $output_mode = "man";
+ @highlights = @highlights_man;
+ $blankline = $blankline_man;
} elsif ($cmd eq "rst") {
- $output_mode = "rst";
- @highlights = @highlights_rst;
- $blankline = $blankline_rst;
+ $output_mode = "rst";
+ @highlights = @highlights_rst;
+ $blankline = $blankline_rst;
} elsif ($cmd eq "none") {
- $output_mode = "none";
+ $output_mode = "none";
} elsif ($cmd eq "module") { # not needed for XML, inherits from calling document
- $modulename = shift @ARGV;
+ $modulename = shift @ARGV;
} elsif ($cmd eq "function") { # to only output specific functions
- $output_selection = OUTPUT_INCLUDE;
- $function = shift @ARGV;
- $function_table{$function} = 1;
+ $output_selection = OUTPUT_INCLUDE;
+ $function = shift @ARGV;
+ $function_table{$function} = 1;
} elsif ($cmd eq "nosymbol") { # Exclude specific symbols
- my $symbol = shift @ARGV;
- $nosymbol_table{$symbol} = 1;
+ my $symbol = shift @ARGV;
+ $nosymbol_table{$symbol} = 1;
} elsif ($cmd eq "export") { # only exported symbols
- $output_selection = OUTPUT_EXPORTED;
- %function_table = ();
+ $output_selection = OUTPUT_EXPORTED;
+ %function_table = ();
} elsif ($cmd eq "internal") { # only non-exported symbols
- $output_selection = OUTPUT_INTERNAL;
- %function_table = ();
+ $output_selection = OUTPUT_INTERNAL;
+ %function_table = ();
} elsif ($cmd eq "export-file") {
- my $file = shift @ARGV;
- push(@export_file_list, $file);
+ my $file = shift @ARGV;
+ push(@export_file_list, $file);
} elsif ($cmd eq "v") {
- $verbose = 1;
+ $verbose = 1;
} elsif ($cmd eq "Werror") {
- $Werror = 1;
+ $Werror = 1;
} elsif ($cmd eq "Wreturn") {
- $Wreturn = 1;
+ $Wreturn = 1;
} elsif ($cmd eq "Wshort-desc" or $cmd eq "Wshort-description") {
- $Wshort_desc = 1;
+ $Wshort_desc = 1;
} elsif ($cmd eq "Wcontents-before-sections") {
- $Wcontents_before_sections = 1;
+ $Wcontents_before_sections = 1;
} elsif ($cmd eq "Wall") {
$Wreturn = 1;
$Wshort_desc = 1;
$Wcontents_before_sections = 1;
} elsif (($cmd eq "h") || ($cmd eq "help")) {
- pod2usage(-exitval => 0, -verbose => 2);
+ pod2usage(-exitval => 0, -verbose => 2);
} elsif ($cmd eq 'no-doc-sections') {
- $no_doc_sections = 1;
+ $no_doc_sections = 1;
} elsif ($cmd eq 'enable-lineno') {
- $enable_lineno = 1;
+ $enable_lineno = 1;
} elsif ($cmd eq 'show-not-found') {
- $show_not_found = 1; # A no-op but don't fail
+ $show_not_found = 1; # A no-op but don't fail
} elsif ($cmd eq "sphinx-version") {
- my $ver_string = shift @ARGV;
- if ($ver_string =~ m/^(\d+)(\.\d+)?(\.\d+)?/) {
- $sphinx_major = $1;
- if (defined($2)) {
- $sphinx_minor = substr($2,1);
- } else {
- $sphinx_minor = 0;
- }
- if (defined($3)) {
- $sphinx_patch = substr($3,1)
- } else {
- $sphinx_patch = 0;
- }
- } else {
- die "Sphinx version should either major.minor or major.minor.patch format\n";
- }
+ my $ver_string = shift @ARGV;
+ if ($ver_string =~ m/^(\d+)(\.\d+)?(\.\d+)?/) {
+ $sphinx_major = $1;
+ if (defined($2)) {
+ $sphinx_minor = substr($2,1);
+ } else {
+ $sphinx_minor = 0;
+ }
+ if (defined($3)) {
+ $sphinx_patch = substr($3,1)
+ } else {
+ $sphinx_patch = 0;
+ }
+ } else {
+ die "Sphinx version should either major.minor or major.minor.patch format\n";
+ }
} else {
- # Unknown argument
- pod2usage(
- -message => "Argument unknown!\n",
- -exitval => 1,
- -verbose => 99,
- -sections => 'SYNOPSIS',
- -output => \*STDERR,
- );
+ # Unknown argument
+ pod2usage(
+ -message => "Argument unknown!\n",
+ -exitval => 1,
+ -verbose => 99,
+ -sections => 'SYNOPSIS',
+ -output => \*STDERR,
+ );
}
if ($#ARGV < 0){
- pod2usage(
- -message => "FILE argument missing\n",
- -exitval => 1,
- -verbose => 99,
- -sections => 'SYNOPSIS',
- -output => \*STDERR,
- );
+ pod2usage(
+ -message => "FILE argument missing\n",
+ -exitval => 1,
+ -verbose => 99,
+ -sections => 'SYNOPSIS',
+ -output => \*STDERR,
+ );
}
}
@@ -388,45 +391,45 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
# version in order to produce the right tags.
sub findprog($)
{
- foreach(split(/:/, $ENV{PATH})) {
- return "$_/$_[0]" if(-x "$_/$_[0]");
- }
+ foreach(split(/:/, $ENV{PATH})) {
+ return "$_/$_[0]" if(-x "$_/$_[0]");
+ }
}
sub get_sphinx_version()
{
- my $ver;
-
- my $cmd = "sphinx-build";
- if (!findprog($cmd)) {
- my $cmd = "sphinx-build3";
- if (!findprog($cmd)) {
- $sphinx_major = 1;
- $sphinx_minor = 2;
- $sphinx_patch = 0;
- printf STDERR "Warning: Sphinx version not found. Using default (Sphinx version %d.%d.%d)\n",
- $sphinx_major, $sphinx_minor, $sphinx_patch;
- return;
- }
- }
-
- open IN, "$cmd --version 2>&1 |";
- while (<IN>) {
- if (m/^\s*sphinx-build\s+([\d]+)\.([\d\.]+)(\+\/[\da-f]+)?$/) {
- $sphinx_major = $1;
- $sphinx_minor = $2;
- $sphinx_patch = $3;
- last;
- }
- # Sphinx 1.2.x uses a different format
- if (m/^\s*Sphinx.*\s+([\d]+)\.([\d\.]+)$/) {
- $sphinx_major = $1;
- $sphinx_minor = $2;
- $sphinx_patch = $3;
- last;
- }
- }
- close IN;
+ my $ver;
+
+ my $cmd = "sphinx-build";
+ if (!findprog($cmd)) {
+ my $cmd = "sphinx-build3";
+ if (!findprog($cmd)) {
+ $sphinx_major = 1;
+ $sphinx_minor = 2;
+ $sphinx_patch = 0;
+ printf STDERR "Warning: Sphinx version not found. Using default (Sphinx version %d.%d.%d)\n",
+ $sphinx_major, $sphinx_minor, $sphinx_patch;
+ return;
+ }
+ }
+
+ open IN, "$cmd --version 2>&1 |";
+ while (<IN>) {
+ if (m/^\s*sphinx-build\s+([\d]+)\.([\d\.]+)(\+\/[\da-f]+)?$/) {
+ $sphinx_major = $1;
+ $sphinx_minor = $2;
+ $sphinx_patch = $3;
+ last;
+ }
+ # Sphinx 1.2.x uses a different format
+ if (m/^\s*Sphinx.*\s+([\d]+)\.([\d\.]+)$/) {
+ $sphinx_major = $1;
+ $sphinx_minor = $2;
+ $sphinx_patch = $3;
+ last;
+ }
+ }
+ close IN;
}
# get kernel version from env
@@ -434,7 +437,7 @@ sub get_kernel_version() {
my $version = 'unknown kernel version';
if (defined($ENV{'KERNELVERSION'})) {
- $version = $ENV{'KERNELVERSION'};
+ $version = $ENV{'KERNELVERSION'};
}
return $version;
}
@@ -462,30 +465,30 @@ sub dump_section {
my $contents = join "\n", @_;
if ($name =~ m/$type_param/) {
- $name = $1;
- $parameterdescs{$name} = $contents;
- $sectcheck = $sectcheck . $name . " ";
+ $name = $1;
+ $parameterdescs{$name} = $contents;
+ $sectcheck = $sectcheck . $name . " ";
$parameterdesc_start_lines{$name} = $new_start_line;
$new_start_line = 0;
} elsif ($name eq "@\.\.\.") {
- $name = "...";
- $parameterdescs{$name} = $contents;
- $sectcheck = $sectcheck . $name . " ";
+ $name = "...";
+ $parameterdescs{$name} = $contents;
+ $sectcheck = $sectcheck . $name . " ";
$parameterdesc_start_lines{$name} = $new_start_line;
$new_start_line = 0;
} else {
- if (defined($sections{$name}) && ($sections{$name} ne "")) {
- # Only warn on user specified duplicate section names.
- if ($name ne $section_default) {
- emit_warning("${file}:$.", "duplicate section name '$name'\n");
- }
- $sections{$name} .= $contents;
- } else {
- $sections{$name} = $contents;
- push @sectionlist, $name;
+ if (defined($sections{$name}) && ($sections{$name} ne "")) {
+ # Only warn on user specified duplicate section names.
+ if ($name ne $section_default) {
+ emit_warning("${file}:$.", "duplicate section name '$name'\n");
+ }
+ $sections{$name} .= $contents;
+ } else {
+ $sections{$name} = $contents;
+ push @sectionlist, $name;
$section_start_lines{$name} = $new_start_line;
$new_start_line = 0;
- }
+ }
}
}
@@ -504,14 +507,14 @@ sub dump_doc_section {
return if (defined($nosymbol_table{$name}));
if (($output_selection == OUTPUT_ALL) ||
- (($output_selection == OUTPUT_INCLUDE) &&
- defined($function_table{$name})))
+ (($output_selection == OUTPUT_INCLUDE) &&
+ defined($function_table{$name})))
{
- dump_section($file, $name, $contents);
- output_blockhead({'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'module' => $modulename,
- 'content-only' => ($output_selection != OUTPUT_ALL), });
+ dump_section($file, $name, $contents);
+ output_blockhead({'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'module' => $modulename,
+ 'content-only' => ($output_selection != OUTPUT_ALL), });
}
}
@@ -542,21 +545,21 @@ sub output_highlight {
# print STDERR "contents af:$contents\n";
foreach $line (split "\n", $contents) {
- if (! $output_preformatted) {
- $line =~ s/^\s*//;
- }
- if ($line eq ""){
- if (! $output_preformatted) {
- print $lineprefix, $blankline;
- }
- } else {
- if ($output_mode eq "man" && substr($line, 0, 1) eq ".") {
- print "\\&$line";
- } else {
- print $lineprefix, $line;
- }
- }
- print "\n";
+ if (! $output_preformatted) {
+ $line =~ s/^\s*//;
+ }
+ if ($line eq ""){
+ if (! $output_preformatted) {
+ print $lineprefix, $blankline;
+ }
+ } else {
+ if ($output_mode eq "man" && substr($line, 0, 1) eq ".") {
+ print "\\&$line";
+ } else {
+ print $lineprefix, $line;
+ }
+ }
+ print "\n";
}
}
@@ -574,40 +577,40 @@ sub output_function_man(%) {
print ".SH SYNOPSIS\n";
if ($args{'functiontype'} ne "") {
- print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n";
+ print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n";
} else {
- print ".B \"" . $args{'function'} . "\n";
+ print ".B \"" . $args{'function'} . "\n";
}
$count = 0;
my $parenth = "(";
my $post = ",";
foreach my $parameter (@{$args{'parameterlist'}}) {
- if ($count == $#{$args{'parameterlist'}}) {
- $post = ");";
- }
- $type = $args{'parametertypes'}{$parameter};
- if ($type =~ m/$function_pointer/) {
- # pointer-to-function
- print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
- } else {
- $type =~ s/([^\*])$/$1 /;
- print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n";
- }
- $count++;
- $parenth = "";
+ if ($count == $#{$args{'parameterlist'}}) {
+ $post = ");";
+ }
+ $type = $args{'parametertypes'}{$parameter};
+ if ($type =~ m/$function_pointer/) {
+ # pointer-to-function
+ print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
+ } else {
+ $type =~ s/([^\*])$/$1 /;
+ print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n";
+ }
+ $count++;
+ $parenth = "";
}
print ".SH ARGUMENTS\n";
foreach $parameter (@{$args{'parameterlist'}}) {
- my $parameter_name = $parameter;
- $parameter_name =~ s/\[.*//;
+ my $parameter_name = $parameter;
+ $parameter_name =~ s/\[.*//;
- print ".IP \"" . $parameter . "\" 12\n";
- output_highlight($args{'parameterdescs'}{$parameter_name});
+ print ".IP \"" . $parameter . "\" 12\n";
+ output_highlight($args{'parameterdescs'}{$parameter_name});
}
foreach $section (@{$args{'sectionlist'}}) {
- print ".SH \"", uc $section, "\"\n";
- output_highlight($args{'sections'}{$section});
+ print ".SH \"", uc $section, "\"\n";
+ output_highlight($args{'sections'}{$section});
}
}
@@ -627,28 +630,27 @@ sub output_enum_man(%) {
print "enum " . $args{'enum'} . " {\n";
$count = 0;
foreach my $parameter (@{$args{'parameterlist'}}) {
- print ".br\n.BI \" $parameter\"\n";
- if ($count == $#{$args{'parameterlist'}}) {
- print "\n};\n";
- last;
- }
- else {
- print ", \n.br\n";
- }
- $count++;
+ print ".br\n.BI \" $parameter\"\n";
+ if ($count == $#{$args{'parameterlist'}}) {
+ print "\n};\n";
+ last;
+ } else {
+ print ", \n.br\n";
+ }
+ $count++;
}
print ".SH Constants\n";
foreach $parameter (@{$args{'parameterlist'}}) {
- my $parameter_name = $parameter;
- $parameter_name =~ s/\[.*//;
+ my $parameter_name = $parameter;
+ $parameter_name =~ s/\[.*//;
- print ".IP \"" . $parameter . "\" 12\n";
- output_highlight($args{'parameterdescs'}{$parameter_name});
+ print ".IP \"" . $parameter . "\" 12\n";
+ output_highlight($args{'parameterdescs'}{$parameter_name});
}
foreach $section (@{$args{'sectionlist'}}) {
- print ".SH \"$section\"\n";
- output_highlight($args{'sections'}{$section});
+ print ".SH \"$section\"\n";
+ output_highlight($args{'sections'}{$section});
}
}
@@ -672,18 +674,18 @@ sub output_struct_man(%) {
print ".SH Members\n";
foreach $parameter (@{$args{'parameterlist'}}) {
- ($parameter =~ /^#/) && next;
+ ($parameter =~ /^#/) && next;
- my $parameter_name = $parameter;
- $parameter_name =~ s/\[.*//;
+ my $parameter_name = $parameter;
+ $parameter_name =~ s/\[.*//;
- ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
- print ".IP \"" . $parameter . "\" 12\n";
- output_highlight($args{'parameterdescs'}{$parameter_name});
+ ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
+ print ".IP \"" . $parameter . "\" 12\n";
+ output_highlight($args{'parameterdescs'}{$parameter_name});
}
foreach $section (@{$args{'sectionlist'}}) {
- print ".SH \"$section\"\n";
- output_highlight($args{'sections'}{$section});
+ print ".SH \"$section\"\n";
+ output_highlight($args{'sections'}{$section});
}
}
@@ -699,8 +701,8 @@ sub output_typedef_man(%) {
print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n";
foreach $section (@{$args{'sectionlist'}}) {
- print ".SH \"$section\"\n";
- output_highlight($args{'sections'}{$section});
+ print ".SH \"$section\"\n";
+ output_highlight($args{'sections'}{$section});
}
}
@@ -712,8 +714,8 @@ sub output_blockhead_man(%) {
print ".TH \"$args{'module'}\" 9 \"$args{'module'}\" \"$man_date\" \"API Manual\" LINUX\n";
foreach $section (@{$args{'sectionlist'}}) {
- print ".SH \"$section\"\n";
- output_highlight($args{'sections'}{$section});
+ print ".SH \"$section\"\n";
+ output_highlight($args{'sections'}{$section});
}
}
@@ -731,15 +733,15 @@ sub output_blockhead_rst(%) {
my ($parameter, $section);
foreach $section (@{$args{'sectionlist'}}) {
- next if (defined($nosymbol_table{$section}));
+ next if (defined($nosymbol_table{$section}));
- if ($output_selection != OUTPUT_INCLUDE) {
- print ".. _$section:\n\n";
- print "**$section**\n\n";
- }
+ if ($output_selection != OUTPUT_INCLUDE) {
+ print ".. _$section:\n\n";
+ print "**$section**\n\n";
+ }
print_lineno($section_start_lines{$section});
- output_highlight_rst($args{'sections'}{$section});
- print "\n";
+ output_highlight_rst($args{'sections'}{$section});
+ print "\n";
}
}
@@ -769,48 +771,48 @@ sub output_highlight_rst {
my $block = "";
foreach $line (split "\n",$input) {
- #
- # If we're in a literal block, see if we should drop out
- # of it. Otherwise pass the line straight through unmunged.
- #
- if ($in_literal) {
- if (! ($line =~ /^\s*$/)) {
- #
- # If this is the first non-blank line in a literal
- # block we need to figure out what the proper indent is.
- #
- if ($litprefix eq "") {
- $line =~ /^(\s*)/;
- $litprefix = '^' . $1;
- $output .= $line . "\n";
- } elsif (! ($line =~ /$litprefix/)) {
- $in_literal = 0;
- } else {
- $output .= $line . "\n";
- }
- } else {
- $output .= $line . "\n";
- }
- }
- #
- # Not in a literal block (or just dropped out)
- #
- if (! $in_literal) {
- $block .= $line . "\n";
- if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) {
- $in_literal = 1;
- $litprefix = "";
- $output .= highlight_block($block);
- $block = ""
- }
- }
+ #
+ # If we're in a literal block, see if we should drop out
+ # of it. Otherwise pass the line straight through unmunged.
+ #
+ if ($in_literal) {
+ if (! ($line =~ /^\s*$/)) {
+ #
+ # If this is the first non-blank line in a literal
+ # block we need to figure out what the proper indent is.
+ #
+ if ($litprefix eq "") {
+ $line =~ /^(\s*)/;
+ $litprefix = '^' . $1;
+ $output .= $line . "\n";
+ } elsif (! ($line =~ /$litprefix/)) {
+ $in_literal = 0;
+ } else {
+ $output .= $line . "\n";
+ }
+ } else {
+ $output .= $line . "\n";
+ }
+ }
+ #
+ # Not in a literal block (or just dropped out)
+ #
+ if (! $in_literal) {
+ $block .= $line . "\n";
+ if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) {
+ $in_literal = 1;
+ $litprefix = "";
+ $output .= highlight_block($block);
+ $block = ""
+ }
+ }
}
if ($block) {
- $output .= highlight_block($block);
+ $output .= highlight_block($block);
}
foreach $line (split "\n", $output) {
- print $lineprefix . $line . "\n";
+ print $lineprefix . $line . "\n";
}
}
@@ -818,71 +820,68 @@ sub output_function_rst(%) {
my %args = %{$_[0]};
my ($parameter, $section);
my $oldprefix = $lineprefix;
- my $start = "";
- my $is_macro = 0;
- if ($sphinx_major < 3) {
- if ($args{'typedef'}) {
- print ".. c:type:: ". $args{'function'} . "\n\n";
- print_lineno($declaration_start_line);
- print " **Typedef**: ";
- $lineprefix = "";
- output_highlight_rst($args{'purpose'});
- $start = "\n\n**Syntax**\n\n ``";
- $is_macro = 1;
- } else {
- print ".. c:function:: ";
- }
- } else {
- if ($args{'typedef'} || $args{'functiontype'} eq "") {
- $is_macro = 1;
- print ".. c:macro:: ". $args{'function'} . "\n\n";
- } else {
- print ".. c:function:: ";
- }
-
- if ($args{'typedef'}) {
- print_lineno($declaration_start_line);
- print " **Typedef**: ";
- $lineprefix = "";
- output_highlight_rst($args{'purpose'});
- $start = "\n\n**Syntax**\n\n ``";
- } else {
- print "``" if ($is_macro);
- }
- }
+ my $signature = "";
if ($args{'functiontype'} ne "") {
- $start .= $args{'functiontype'} . " " . $args{'function'} . " (";
+ $signature = $args{'functiontype'} . " " . $args{'function'} . " (";
} else {
- $start .= $args{'function'} . " (";
+ $signature = $args{'function'} . " (";
}
- print $start;
my $count = 0;
foreach my $parameter (@{$args{'parameterlist'}}) {
- if ($count ne 0) {
- print ", ";
- }
- $count++;
- $type = $args{'parametertypes'}{$parameter};
-
- if ($type =~ m/$function_pointer/) {
- # pointer-to-function
- print $1 . $parameter . ") (" . $2 . ")";
- } else {
- print $type;
- }
- }
- if ($is_macro) {
- print ")``\n\n";
+ if ($count ne 0) {
+ $signature .= ", ";
+ }
+ $count++;
+ $type = $args{'parametertypes'}{$parameter};
+
+ if ($type =~ m/$function_pointer/) {
+ # pointer-to-function
+ $signature .= $1 . $parameter . ") (" . $2 . ")";
+ } else {
+ $signature .= $type;
+ }
+ }
+
+ $signature .= ")";
+
+ if ($sphinx_major < 3) {
+ if ($args{'typedef'}) {
+ print ".. c:type:: ". $args{'function'} . "\n\n";
+ print_lineno($declaration_start_line);
+ print " **Typedef**: ";
+ $lineprefix = "";
+ output_highlight_rst($args{'purpose'});
+ print "\n\n**Syntax**\n\n";
+ print " ``$signature``\n\n";
+ } else {
+ print ".. c:function:: $signature\n\n";
+ }
} else {
- print ")\n\n";
+ if ($args{'typedef'} || $args{'functiontype'} eq "") {
+ print ".. c:macro:: ". $args{'function'} . "\n\n";
+
+ if ($args{'typedef'}) {
+ print_lineno($declaration_start_line);
+ print " **Typedef**: ";
+ $lineprefix = "";
+ output_highlight_rst($args{'purpose'});
+ print "\n\n**Syntax**\n\n";
+ print " ``$signature``\n\n";
+ } else {
+ print "``$signature``\n\n";
+ }
+ } else {
+ print ".. c:function:: $signature\n\n";
+ }
}
+
if (!$args{'typedef'}) {
- print_lineno($declaration_start_line);
- $lineprefix = " ";
- output_highlight_rst($args{'purpose'});
- print "\n";
+ print_lineno($declaration_start_line);
+ $lineprefix = " ";
+ output_highlight_rst($args{'purpose'});
+ print "\n";
}
#
@@ -893,27 +892,27 @@ sub output_function_rst(%) {
$lineprefix = " ";
print $lineprefix . "**Parameters**\n\n";
foreach $parameter (@{$args{'parameterlist'}}) {
- my $parameter_name = $parameter;
- $parameter_name =~ s/\[.*//;
- $type = $args{'parametertypes'}{$parameter};
-
- if ($type ne "") {
- print $lineprefix . "``$type``\n";
- } else {
- print $lineprefix . "``$parameter``\n";
- }
+ my $parameter_name = $parameter;
+ $parameter_name =~ s/\[.*//;
+ $type = $args{'parametertypes'}{$parameter};
+
+ if ($type ne "") {
+ print $lineprefix . "``$type``\n";
+ } else {
+ print $lineprefix . "``$parameter``\n";
+ }
print_lineno($parameterdesc_start_lines{$parameter_name});
- $lineprefix = " ";
- if (defined($args{'parameterdescs'}{$parameter_name}) &&
- $args{'parameterdescs'}{$parameter_name} ne $undescribed) {
- output_highlight_rst($args{'parameterdescs'}{$parameter_name});
- } else {
- print $lineprefix . "*undescribed*\n";
- }
- $lineprefix = " ";
- print "\n";
+ $lineprefix = " ";
+ if (defined($args{'parameterdescs'}{$parameter_name}) &&
+ $args{'parameterdescs'}{$parameter_name} ne $undescribed) {
+ output_highlight_rst($args{'parameterdescs'}{$parameter_name});
+ } else {
+ print $lineprefix . "*undescribed*\n";
+ }
+ $lineprefix = " ";
+ print "\n";
}
output_section_rst(@_);
@@ -926,10 +925,10 @@ sub output_section_rst(%) {
my $oldprefix = $lineprefix;
foreach $section (@{$args{'sectionlist'}}) {
- print $lineprefix . "**$section**\n\n";
+ print $lineprefix . "**$section**\n\n";
print_lineno($section_start_lines{$section});
- output_highlight_rst($args{'sections'}{$section});
- print "\n";
+ output_highlight_rst($args{'sections'}{$section});
+ print "\n";
}
print "\n";
}
@@ -942,11 +941,11 @@ sub output_enum_rst(%) {
my $outer;
if ($sphinx_major < 3) {
- my $name = "enum " . $args{'enum'};
- print "\n\n.. c:type:: " . $name . "\n\n";
+ my $name = "enum " . $args{'enum'};
+ print "\n\n.. c:type:: " . $name . "\n\n";
} else {
- my $name = $args{'enum'};
- print "\n\n.. c:enum:: " . $name . "\n\n";
+ my $name = $args{'enum'};
+ print "\n\n.. c:enum:: " . $name . "\n\n";
}
print_lineno($declaration_start_line);
$lineprefix = " ";
@@ -958,14 +957,14 @@ sub output_enum_rst(%) {
$lineprefix = $outer . " ";
print $outer . "**Constants**\n\n";
foreach $parameter (@{$args{'parameterlist'}}) {
- print $outer . "``$parameter``\n";
+ print $outer . "``$parameter``\n";
- if ($args{'parameterdescs'}{$parameter} ne $undescribed) {
- output_highlight_rst($args{'parameterdescs'}{$parameter});
- } else {
- print $lineprefix . "*undescribed*\n";
- }
- print "\n";
+ if ($args{'parameterdescs'}{$parameter} ne $undescribed) {
+ output_highlight_rst($args{'parameterdescs'}{$parameter});
+ } else {
+ print $lineprefix . "*undescribed*\n";
+ }
+ print "\n";
}
print "\n";
$lineprefix = $oldprefix;
@@ -979,9 +978,9 @@ sub output_typedef_rst(%) {
my $name;
if ($sphinx_major < 3) {
- $name = "typedef " . $args{'typedef'};
+ $name = "typedef " . $args{'typedef'};
} else {
- $name = $args{'typedef'};
+ $name = $args{'typedef'};
}
print "\n\n.. c:type:: " . $name . "\n\n";
print_lineno($declaration_start_line);
@@ -999,15 +998,15 @@ sub output_struct_rst(%) {
my $oldprefix = $lineprefix;
if ($sphinx_major < 3) {
- my $name = $args{'type'} . " " . $args{'struct'};
- print "\n\n.. c:type:: " . $name . "\n\n";
+ my $name = $args{'type'} . " " . $args{'struct'};
+ print "\n\n.. c:type:: " . $name . "\n\n";
} else {
- my $name = $args{'struct'};
- if ($args{'type'} eq 'union') {
- print "\n\n.. c:union:: " . $name . "\n\n";
- } else {
- print "\n\n.. c:struct:: " . $name . "\n\n";
- }
+ my $name = $args{'struct'};
+ if ($args{'type'} eq 'union') {
+ print "\n\n.. c:union:: " . $name . "\n\n";
+ } else {
+ print "\n\n.. c:struct:: " . $name . "\n\n";
+ }
}
print_lineno($declaration_start_line);
$lineprefix = " ";
@@ -1024,19 +1023,19 @@ sub output_struct_rst(%) {
$lineprefix = " ";
print $lineprefix . "**Members**\n\n";
foreach $parameter (@{$args{'parameterlist'}}) {
- ($parameter =~ /^#/) && next;
+ ($parameter =~ /^#/) && next;
- my $parameter_name = $parameter;
- $parameter_name =~ s/\[.*//;
+ my $parameter_name = $parameter;
+ $parameter_name =~ s/\[.*//;
- ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
- $type = $args{'parametertypes'}{$parameter};
+ ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
+ $type = $args{'parametertypes'}{$parameter};
print_lineno($parameterdesc_start_lines{$parameter_name});
- print $lineprefix . "``" . $parameter . "``\n";
- $lineprefix = " ";
- output_highlight_rst($args{'parameterdescs'}{$parameter_name});
- $lineprefix = " ";
- print "\n";
+ print $lineprefix . "``" . $parameter . "``\n";
+ $lineprefix = " ";
+ output_highlight_rst($args{'parameterdescs'}{$parameter_name});
+ $lineprefix = " ";
+ print "\n";
}
print "\n";
@@ -1074,14 +1073,14 @@ sub output_declaration {
return if (defined($nosymbol_table{$name}));
if (($output_selection == OUTPUT_ALL) ||
- (($output_selection == OUTPUT_INCLUDE ||
- $output_selection == OUTPUT_EXPORTED) &&
- defined($function_table{$name})) ||
- ($output_selection == OUTPUT_INTERNAL &&
- !($functype eq "function" && defined($function_table{$name}))))
+ (($output_selection == OUTPUT_INCLUDE ||
+ $output_selection == OUTPUT_EXPORTED) &&
+ defined($function_table{$name})) ||
+ ($output_selection == OUTPUT_INTERNAL &&
+ !($functype eq "function" && defined($function_table{$name}))))
{
- &$func(@_);
- $section_counter++;
+ &$func(@_);
+ $section_counter++;
}
}
@@ -1120,203 +1119,202 @@ sub dump_struct($$) {
my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
- $decl_type = $1;
- $declaration_name = $2;
- $members = $3;
+ $decl_type = $1;
+ $declaration_name = $2;
+ $members = $3;
} elsif ($x =~ /typedef\s+($type)\s*$definition_body\s*(\w+)\s*;/) {
- $decl_type = $1;
- $declaration_name = $3;
- $members = $2;
+ $decl_type = $1;
+ $declaration_name = $3;
+ $members = $2;
}
if ($members) {
- if ($identifier ne $declaration_name) {
- emit_warning("${file}:$.", "expecting prototype for $decl_type $identifier. Prototype was for $decl_type $declaration_name instead\n");
- return;
- }
-
- # ignore members marked private:
- $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi;
- $members =~ s/\/\*\s*private:.*//gosi;
- # strip comments:
- $members =~ s/\/\*.*?\*\///gos;
- # strip attributes
- $members =~ s/\s*$attribute/ /gi;
- $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
- $members =~ s/\s*__counted_by\s*\([^;]*\)/ /gos;
- $members =~ s/\s*__packed\s*/ /gos;
- $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
- $members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
- $members =~ s/\s*____cacheline_aligned/ /gos;
- # unwrap struct_group():
- # - first eat non-declaration parameters and rewrite for final match
- # - then remove macro, outer parens, and trailing semicolon
- $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos;
- $members =~ s/\bstruct_group_(attr|tagged)\s*\(([^,]*,){2}/STRUCT_GROUP(/gos;
- $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos;
- $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos;
-
- my $args = qr{([^,)]+)};
- # replace DECLARE_BITMAP
- $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
- $members =~ s/DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, PHY_INTERFACE_MODE_MAX)/gos;
- $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
- # replace DECLARE_HASHTABLE
- $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
- # replace DECLARE_KFIFO
- $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
- # replace DECLARE_KFIFO_PTR
- $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
- # replace DECLARE_FLEX_ARRAY
- $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos;
- #replace DEFINE_DMA_UNMAP_ADDR
- $members =~ s/DEFINE_DMA_UNMAP_ADDR\s*\($args\)/dma_addr_t $1/gos;
- #replace DEFINE_DMA_UNMAP_LEN
- $members =~ s/DEFINE_DMA_UNMAP_LEN\s*\($args\)/__u32 $1/gos;
- my $declaration = $members;
-
- # Split nested struct/union elements as newer ones
- while ($members =~ m/$struct_members/) {
- my $newmember;
- my $maintype = $1;
- my $ids = $4;
- my $content = $3;
- foreach my $id(split /,/, $ids) {
- $newmember .= "$maintype $id; ";
-
- $id =~ s/[:\[].*//;
- $id =~ s/^\s*\**(\S+)\s*/$1/;
- foreach my $arg (split /;/, $content) {
- next if ($arg =~ m/^\s*$/);
- if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) {
- # pointer-to-function
- my $type = $1;
- my $name = $2;
- my $extra = $3;
- next if (!$name);
- if ($id =~ m/^\s*$/) {
- # anonymous struct/union
- $newmember .= "$type$name$extra; ";
- } else {
- $newmember .= "$type$id.$name$extra; ";
- }
- } else {
- my $type;
- my $names;
- $arg =~ s/^\s+//;
- $arg =~ s/\s+$//;
- # Handle bitmaps
- $arg =~ s/:\s*\d+\s*//g;
- # Handle arrays
- $arg =~ s/\[.*\]//g;
- # The type may have multiple words,
- # and multiple IDs can be defined, like:
- # const struct foo, *bar, foobar
- # So, we remove spaces when parsing the
- # names, in order to match just names
- # and commas for the names
- $arg =~ s/\s*,\s*/,/g;
- if ($arg =~ m/(.*)\s+([\S+,]+)/) {
- $type = $1;
- $names = $2;
- } else {
- $newmember .= "$arg; ";
- next;
- }
- foreach my $name (split /,/, $names) {
- $name =~ s/^\s*\**(\S+)\s*/$1/;
- next if (($name =~ m/^\s*$/));
- if ($id =~ m/^\s*$/) {
- # anonymous struct/union
- $newmember .= "$type $name; ";
- } else {
- $newmember .= "$type $id.$name; ";
- }
- }
- }
- }
- }
- $members =~ s/$struct_members/$newmember/;
- }
-
- # Ignore other nested elements, like enums
- $members =~ s/(\{[^\{\}]*\})//g;
-
- create_parameterlist($members, ';', $file, $declaration_name);
- check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual);
-
- # Adjust declaration for better display
- $declaration =~ s/([\{;])/$1\n/g;
- $declaration =~ s/\}\s+;/};/g;
- # Better handle inlined enums
- do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/);
-
- my @def_args = split /\n/, $declaration;
- my $level = 1;
- $declaration = "";
- foreach my $clause (@def_args) {
- $clause =~ s/^\s+//;
- $clause =~ s/\s+$//;
- $clause =~ s/\s+/ /;
- next if (!$clause);
- $level-- if ($clause =~ m/(\})/ && $level > 1);
- if (!($clause =~ m/^\s*#/)) {
- $declaration .= "\t" x $level;
- }
- $declaration .= "\t" . $clause . "\n";
- $level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/));
- }
- output_declaration($declaration_name,
- 'struct',
- {'struct' => $declaration_name,
- 'module' => $modulename,
- 'definition' => $declaration,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'parametertypes' => \%parametertypes,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose,
- 'type' => $decl_type
- });
- }
- else {
- print STDERR "${file}:$.: error: Cannot parse struct or union!\n";
- ++$errors;
+ if ($identifier ne $declaration_name) {
+ emit_warning("${file}:$.", "expecting prototype for $decl_type $identifier. Prototype was for $decl_type $declaration_name instead\n");
+ return;
+ }
+
+ # ignore members marked private:
+ $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi;
+ $members =~ s/\/\*\s*private:.*//gosi;
+ # strip comments:
+ $members =~ s/\/\*.*?\*\///gos;
+ # strip attributes
+ $members =~ s/\s*$attribute/ /gi;
+ $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
+ $members =~ s/\s*__counted_by\s*\([^;]*\)/ /gos;
+ $members =~ s/\s*__packed\s*/ /gos;
+ $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
+ $members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
+ $members =~ s/\s*____cacheline_aligned/ /gos;
+ # unwrap struct_group():
+ # - first eat non-declaration parameters and rewrite for final match
+ # - then remove macro, outer parens, and trailing semicolon
+ $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos;
+ $members =~ s/\bstruct_group_(attr|tagged)\s*\(([^,]*,){2}/STRUCT_GROUP(/gos;
+ $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos;
+ $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos;
+
+ my $args = qr{([^,)]+)};
+ # replace DECLARE_BITMAP
+ $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
+ $members =~ s/DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, PHY_INTERFACE_MODE_MAX)/gos;
+ $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
+ # replace DECLARE_HASHTABLE
+ $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
+ # replace DECLARE_KFIFO
+ $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
+ # replace DECLARE_KFIFO_PTR
+ $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
+ # replace DECLARE_FLEX_ARRAY
+ $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos;
+ #replace DEFINE_DMA_UNMAP_ADDR
+ $members =~ s/DEFINE_DMA_UNMAP_ADDR\s*\($args\)/dma_addr_t $1/gos;
+ #replace DEFINE_DMA_UNMAP_LEN
+ $members =~ s/DEFINE_DMA_UNMAP_LEN\s*\($args\)/__u32 $1/gos;
+ my $declaration = $members;
+
+ # Split nested struct/union elements as newer ones
+ while ($members =~ m/$struct_members/) {
+ my $newmember;
+ my $maintype = $1;
+ my $ids = $4;
+ my $content = $3;
+ foreach my $id(split /,/, $ids) {
+ $newmember .= "$maintype $id; ";
+
+ $id =~ s/[:\[].*//;
+ $id =~ s/^\s*\**(\S+)\s*/$1/;
+ foreach my $arg (split /;/, $content) {
+ next if ($arg =~ m/^\s*$/);
+ if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) {
+ # pointer-to-function
+ my $type = $1;
+ my $name = $2;
+ my $extra = $3;
+ next if (!$name);
+ if ($id =~ m/^\s*$/) {
+ # anonymous struct/union
+ $newmember .= "$type$name$extra; ";
+ } else {
+ $newmember .= "$type$id.$name$extra; ";
+ }
+ } else {
+ my $type;
+ my $names;
+ $arg =~ s/^\s+//;
+ $arg =~ s/\s+$//;
+ # Handle bitmaps
+ $arg =~ s/:\s*\d+\s*//g;
+ # Handle arrays
+ $arg =~ s/\[.*\]//g;
+ # The type may have multiple words,
+ # and multiple IDs can be defined, like:
+ # const struct foo, *bar, foobar
+ # So, we remove spaces when parsing the
+ # names, in order to match just names
+ # and commas for the names
+ $arg =~ s/\s*,\s*/,/g;
+ if ($arg =~ m/(.*)\s+([\S+,]+)/) {
+ $type = $1;
+ $names = $2;
+ } else {
+ $newmember .= "$arg; ";
+ next;
+ }
+ foreach my $name (split /,/, $names) {
+ $name =~ s/^\s*\**(\S+)\s*/$1/;
+ next if (($name =~ m/^\s*$/));
+ if ($id =~ m/^\s*$/) {
+ # anonymous struct/union
+ $newmember .= "$type $name; ";
+ } else {
+ $newmember .= "$type $id.$name; ";
+ }
+ }
+ }
+ }
+ }
+ $members =~ s/$struct_members/$newmember/;
+ }
+
+ # Ignore other nested elements, like enums
+ $members =~ s/(\{[^\{\}]*\})//g;
+
+ create_parameterlist($members, ';', $file, $declaration_name);
+ check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual);
+
+ # Adjust declaration for better display
+ $declaration =~ s/([\{;])/$1\n/g;
+ $declaration =~ s/\}\s+;/};/g;
+ # Better handle inlined enums
+ do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/);
+
+ my @def_args = split /\n/, $declaration;
+ my $level = 1;
+ $declaration = "";
+ foreach my $clause (@def_args) {
+ $clause =~ s/^\s+//;
+ $clause =~ s/\s+$//;
+ $clause =~ s/\s+/ /;
+ next if (!$clause);
+ $level-- if ($clause =~ m/(\})/ && $level > 1);
+ if (!($clause =~ m/^\s*#/)) {
+ $declaration .= "\t" x $level;
+ }
+ $declaration .= "\t" . $clause . "\n";
+ $level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/));
+ }
+ output_declaration($declaration_name,
+ 'struct',
+ {'struct' => $declaration_name,
+ 'module' => $modulename,
+ 'definition' => $declaration,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'parametertypes' => \%parametertypes,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose,
+ 'type' => $decl_type
+ });
+ } else {
+ print STDERR "${file}:$.: error: Cannot parse struct or union!\n";
+ ++$errors;
}
}
sub show_warnings($$) {
- my $functype = shift;
- my $name = shift;
-
- return 0 if (defined($nosymbol_table{$name}));
-
- return 1 if ($output_selection == OUTPUT_ALL);
-
- if ($output_selection == OUTPUT_EXPORTED) {
- if (defined($function_table{$name})) {
- return 1;
- } else {
- return 0;
- }
- }
- if ($output_selection == OUTPUT_INTERNAL) {
- if (!($functype eq "function" && defined($function_table{$name}))) {
- return 1;
- } else {
- return 0;
- }
- }
- if ($output_selection == OUTPUT_INCLUDE) {
- if (defined($function_table{$name})) {
- return 1;
- } else {
- return 0;
- }
- }
- die("Please add the new output type at show_warnings()");
+ my $functype = shift;
+ my $name = shift;
+
+ return 0 if (defined($nosymbol_table{$name}));
+
+ return 1 if ($output_selection == OUTPUT_ALL);
+
+ if ($output_selection == OUTPUT_EXPORTED) {
+ if (defined($function_table{$name})) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ if ($output_selection == OUTPUT_INTERNAL) {
+ if (!($functype eq "function" && defined($function_table{$name}))) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ if ($output_selection == OUTPUT_INCLUDE) {
+ if (defined($function_table{$name})) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ die("Please add the new output type at show_warnings()");
}
sub dump_enum($$) {
@@ -1330,65 +1328,65 @@ sub dump_enum($$) {
$x =~ s@/\*.*?\*/@@gos; # strip comments.
# strip #define macros inside enums
- $x =~ s@#\s*((define|ifdef)\s+|endif)[^;]*;@@gos;
+ $x =~ s@#\s*((define|ifdef|if)\s+|endif)[^;]*;@@gos;
if ($x =~ /typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;/) {
- $declaration_name = $2;
- $members = $1;
+ $declaration_name = $2;
+ $members = $1;
} elsif ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) {
- $declaration_name = $1;
- $members = $2;
+ $declaration_name = $1;
+ $members = $2;
}
if ($members) {
- if ($identifier ne $declaration_name) {
- if ($identifier eq "") {
- emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n");
- } else {
- emit_warning("${file}:$.", "expecting prototype for enum $identifier. Prototype was for enum $declaration_name instead\n");
- }
- return;
- }
- $declaration_name = "(anonymous)" if ($declaration_name eq "");
-
- my %_members;
-
- $members =~ s/\s+$//;
- $members =~ s/\([^;]*?[\)]//g;
-
- foreach my $arg (split ',', $members) {
- $arg =~ s/^\s*(\w+).*/$1/;
- push @parameterlist, $arg;
- if (!$parameterdescs{$arg}) {
- $parameterdescs{$arg} = $undescribed;
- if (show_warnings("enum", $declaration_name)) {
- emit_warning("${file}:$.", "Enum value '$arg' not described in enum '$declaration_name'\n");
- }
- }
- $_members{$arg} = 1;
- }
-
- while (my ($k, $v) = each %parameterdescs) {
- if (!exists($_members{$k})) {
- if (show_warnings("enum", $declaration_name)) {
- emit_warning("${file}:$.", "Excess enum value '$k' description in '$declaration_name'\n");
- }
- }
- }
-
- output_declaration($declaration_name,
- 'enum',
- {'enum' => $declaration_name,
- 'module' => $modulename,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose
- });
+ if ($identifier ne $declaration_name) {
+ if ($identifier eq "") {
+ emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n");
+ } else {
+ emit_warning("${file}:$.", "expecting prototype for enum $identifier. Prototype was for enum $declaration_name instead\n");
+ }
+ return;
+ }
+ $declaration_name = "(anonymous)" if ($declaration_name eq "");
+
+ my %_members;
+
+ $members =~ s/\s+$//;
+ $members =~ s/\([^;]*?[\)]//g;
+
+ foreach my $arg (split ',', $members) {
+ $arg =~ s/^\s*(\w+).*/$1/;
+ push @parameterlist, $arg;
+ if (!$parameterdescs{$arg}) {
+ $parameterdescs{$arg} = $undescribed;
+ if (show_warnings("enum", $declaration_name)) {
+ emit_warning("${file}:$.", "Enum value '$arg' not described in enum '$declaration_name'\n");
+ }
+ }
+ $_members{$arg} = 1;
+ }
+
+ while (my ($k, $v) = each %parameterdescs) {
+ if (!exists($_members{$k})) {
+ if (show_warnings("enum", $declaration_name)) {
+ emit_warning("${file}:$.", "Excess enum value '$k' description in '$declaration_name'\n");
+ }
+ }
+ }
+
+ output_declaration($declaration_name,
+ 'enum',
+ {'enum' => $declaration_name,
+ 'module' => $modulename,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose
+ });
} else {
- print STDERR "${file}:$.: error: Cannot parse enum!\n";
- ++$errors;
+ print STDERR "${file}:$.: error: Cannot parse enum!\n";
+ ++$errors;
}
}
@@ -1407,59 +1405,58 @@ sub dump_typedef($$) {
# Parse function typedef prototypes
if ($x =~ $typedef1 || $x =~ $typedef2) {
- $return_type = $1;
- $declaration_name = $2;
- my $args = $3;
- $return_type =~ s/^\s+//;
-
- if ($identifier ne $declaration_name) {
- emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n");
- return;
- }
-
- create_parameterlist($args, ',', $file, $declaration_name);
-
- output_declaration($declaration_name,
- 'function',
- {'function' => $declaration_name,
- 'typedef' => 1,
- 'module' => $modulename,
- 'functiontype' => $return_type,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'parametertypes' => \%parametertypes,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose
- });
- return;
+ $return_type = $1;
+ $declaration_name = $2;
+ my $args = $3;
+ $return_type =~ s/^\s+//;
+
+ if ($identifier ne $declaration_name) {
+ emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n");
+ return;
+ }
+
+ create_parameterlist($args, ',', $file, $declaration_name);
+
+ output_declaration($declaration_name,
+ 'function',
+ {'function' => $declaration_name,
+ 'typedef' => 1,
+ 'module' => $modulename,
+ 'functiontype' => $return_type,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'parametertypes' => \%parametertypes,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose
+ });
+ return;
}
while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) {
- $x =~ s/\(*.\)\s*;$/;/;
- $x =~ s/\[*.\]\s*;$/;/;
+ $x =~ s/\(*.\)\s*;$/;/;
+ $x =~ s/\[*.\]\s*;$/;/;
}
if ($x =~ /typedef.*\s+(\w+)\s*;/) {
- $declaration_name = $1;
+ $declaration_name = $1;
- if ($identifier ne $declaration_name) {
- emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n");
- return;
- }
+ if ($identifier ne $declaration_name) {
+ emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n");
+ return;
+ }
- output_declaration($declaration_name,
- 'typedef',
- {'typedef' => $declaration_name,
- 'module' => $modulename,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose
- });
- }
- else {
- print STDERR "${file}:$.: error: Cannot parse typedef!\n";
- ++$errors;
+ output_declaration($declaration_name,
+ 'typedef',
+ {'typedef' => $declaration_name,
+ 'module' => $modulename,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose
+ });
+ } else {
+ print STDERR "${file}:$.: error: Cannot parse typedef!\n";
+ ++$errors;
}
}
@@ -1482,214 +1479,220 @@ sub create_parameterlist($$$$) {
# temporarily replace commas inside function pointer definition
my $arg_expr = qr{\([^\),]+};
while ($args =~ /$arg_expr,/) {
- $args =~ s/($arg_expr),/$1#/g;
+ $args =~ s/($arg_expr),/$1#/g;
}
foreach my $arg (split($splitter, $args)) {
- # strip comments
- $arg =~ s/\/\*.*\*\///;
- # ignore argument attributes
- $arg =~ s/\sPOS0?\s/ /;
- # strip leading/trailing spaces
- $arg =~ s/^\s*//;
- $arg =~ s/\s*$//;
- $arg =~ s/\s+/ /;
-
- if ($arg =~ /^#/) {
- # Treat preprocessor directive as a typeless variable just to fill
- # corresponding data structures "correctly". Catch it later in
- # output_* subs.
- push_parameter($arg, "", "", $file);
- } elsif ($arg =~ m/\(.+\)\s*\(/) {
- # pointer-to-function
- $arg =~ tr/#/,/;
- $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/;
- $param = $1;
- $type = $arg;
- $type =~ s/([^\(]+\(\*?)\s*$param/$1/;
- save_struct_actual($param);
- push_parameter($param, $type, $arg, $file, $declaration_name);
- } elsif ($arg) {
- $arg =~ s/\s*:\s*/:/g;
- $arg =~ s/\s*\[/\[/g;
-
- my @args = split('\s*,\s*', $arg);
- if ($args[0] =~ m/\*/) {
- $args[0] =~ s/(\*+)\s*/ $1/;
- }
-
- my @first_arg;
- if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) {
- shift @args;
- push(@first_arg, split('\s+', $1));
- push(@first_arg, $2);
- } else {
- @first_arg = split('\s+', shift @args);
- }
-
- unshift(@args, pop @first_arg);
- $type = join " ", @first_arg;
-
- foreach $param (@args) {
- if ($param =~ m/^(\*+)\s*(.*)/) {
- save_struct_actual($2);
-
- push_parameter($2, "$type $1", $arg, $file, $declaration_name);
- }
- elsif ($param =~ m/(.*?):(\d+)/) {
- if ($type ne "") { # skip unnamed bit-fields
- save_struct_actual($1);
- push_parameter($1, "$type:$2", $arg, $file, $declaration_name)
- }
- }
- else {
- save_struct_actual($param);
- push_parameter($param, $type, $arg, $file, $declaration_name);
- }
- }
- }
+ # strip comments
+ $arg =~ s/\/\*.*\*\///;
+ # ignore argument attributes
+ $arg =~ s/\sPOS0?\s/ /;
+ # strip leading/trailing spaces
+ $arg =~ s/^\s*//;
+ $arg =~ s/\s*$//;
+ $arg =~ s/\s+/ /;
+
+ if ($arg =~ /^#/) {
+ # Treat preprocessor directive as a typeless variable just to fill
+ # corresponding data structures "correctly". Catch it later in
+ # output_* subs.
+ push_parameter($arg, "", "", $file);
+ } elsif ($arg =~ m/\(.+\)\s*\(/) {
+ # pointer-to-function
+ $arg =~ tr/#/,/;
+ $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/;
+ $param = $1;
+ $type = $arg;
+ $type =~ s/([^\(]+\(\*?)\s*$param/$1/;
+ save_struct_actual($param);
+ push_parameter($param, $type, $arg, $file, $declaration_name);
+ } elsif ($arg =~ m/\(.+\)\s*\[/) {
+ # array-of-pointers
+ $arg =~ tr/#/,/;
+ $arg =~ m/[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)/;
+ $param = $1;
+ $type = $arg;
+ $type =~ s/([^\(]+\(\*?)\s*$param/$1/;
+ save_struct_actual($param);
+ push_parameter($param, $type, $arg, $file, $declaration_name);
+ } elsif ($arg) {
+ $arg =~ s/\s*:\s*/:/g;
+ $arg =~ s/\s*\[/\[/g;
+
+ my @args = split('\s*,\s*', $arg);
+ if ($args[0] =~ m/\*/) {
+ $args[0] =~ s/(\*+)\s*/ $1/;
+ }
+
+ my @first_arg;
+ if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) {
+ shift @args;
+ push(@first_arg, split('\s+', $1));
+ push(@first_arg, $2);
+ } else {
+ @first_arg = split('\s+', shift @args);
+ }
+
+ unshift(@args, pop @first_arg);
+ $type = join " ", @first_arg;
+
+ foreach $param (@args) {
+ if ($param =~ m/^(\*+)\s*(.*)/) {
+ save_struct_actual($2);
+
+ push_parameter($2, "$type $1", $arg, $file, $declaration_name);
+ } elsif ($param =~ m/(.*?):(\d+)/) {
+ if ($type ne "") { # skip unnamed bit-fields
+ save_struct_actual($1);
+ push_parameter($1, "$type:$2", $arg, $file, $declaration_name)
+ }
+ } else {
+ save_struct_actual($param);
+ push_parameter($param, $type, $arg, $file, $declaration_name);
+ }
+ }
+ }
}
}
sub push_parameter($$$$$) {
- my $param = shift;
- my $type = shift;
- my $org_arg = shift;
- my $file = shift;
- my $declaration_name = shift;
-
- if (($anon_struct_union == 1) && ($type eq "") &&
- ($param eq "}")) {
- return; # ignore the ending }; from anon. struct/union
- }
-
- $anon_struct_union = 0;
- $param =~ s/[\[\)].*//;
-
- if ($type eq "" && $param =~ /\.\.\.$/)
- {
- if (!$param =~ /\w\.\.\.$/) {
- # handles unnamed variable parameters
- $param = "...";
- }
- elsif ($param =~ /\w\.\.\.$/) {
- # for named variable parameters of the form `x...`, remove the dots
- $param =~ s/\.\.\.$//;
- }
- if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") {
- $parameterdescs{$param} = "variable arguments";
- }
- }
- elsif ($type eq "" && ($param eq "" or $param eq "void"))
- {
- $param="void";
- $parameterdescs{void} = "no arguments";
- }
- elsif ($type eq "" && ($param eq "struct" or $param eq "union"))
- # handle unnamed (anonymous) union or struct:
- {
- $type = $param;
- $param = "{unnamed_" . $param . "}";
- $parameterdescs{$param} = "anonymous\n";
- $anon_struct_union = 1;
- }
- elsif ($param =~ "__cacheline_group" )
- # handle cache group enforcing variables: they do not need be described in header files
- {
- return; # ignore __cacheline_group_begin and __cacheline_group_end
- }
-
- # warn if parameter has no description
- # (but ignore ones starting with # as these are not parameters
- # but inline preprocessor statements);
- # Note: It will also ignore void params and unnamed structs/unions
- if (!defined $parameterdescs{$param} && $param !~ /^#/) {
- $parameterdescs{$param} = $undescribed;
-
- if (show_warnings($type, $declaration_name) && $param !~ /\./) {
- emit_warning("${file}:$.", "Function parameter or struct member '$param' not described in '$declaration_name'\n");
- }
- }
-
- # strip spaces from $param so that it is one continuous string
- # on @parameterlist;
- # this fixes a problem where check_sections() cannot find
- # a parameter like "addr[6 + 2]" because it actually appears
- # as "addr[6", "+", "2]" on the parameter list;
- # but it's better to maintain the param string unchanged for output,
- # so just weaken the string compare in check_sections() to ignore
- # "[blah" in a parameter string;
- ###$param =~ s/\s*//g;
- push @parameterlist, $param;
- $org_arg =~ s/\s\s+/ /g;
- $parametertypes{$param} = $org_arg;
+ my $param = shift;
+ my $type = shift;
+ my $org_arg = shift;
+ my $file = shift;
+ my $declaration_name = shift;
+
+ if (($anon_struct_union == 1) && ($type eq "") &&
+ ($param eq "}")) {
+ return; # ignore the ending }; from anon. struct/union
+ }
+
+ $anon_struct_union = 0;
+ $param =~ s/[\[\)].*//;
+
+ if ($type eq "" && $param =~ /\.\.\.$/)
+ {
+ if (!$param =~ /\w\.\.\.$/) {
+ # handles unnamed variable parameters
+ $param = "...";
+ } elsif ($param =~ /\w\.\.\.$/) {
+ # for named variable parameters of the form `x...`, remove the dots
+ $param =~ s/\.\.\.$//;
+ }
+ if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") {
+ $parameterdescs{$param} = "variable arguments";
+ }
+ }
+ elsif ($type eq "" && ($param eq "" or $param eq "void"))
+ {
+ $param="void";
+ $parameterdescs{void} = "no arguments";
+ }
+ elsif ($type eq "" && ($param eq "struct" or $param eq "union"))
+ # handle unnamed (anonymous) union or struct:
+ {
+ $type = $param;
+ $param = "{unnamed_" . $param . "}";
+ $parameterdescs{$param} = "anonymous\n";
+ $anon_struct_union = 1;
+ }
+ elsif ($param =~ "__cacheline_group" )
+ # handle cache group enforcing variables: they do not need be described in header files
+ {
+ return; # ignore __cacheline_group_begin and __cacheline_group_end
+ }
+
+ # warn if parameter has no description
+ # (but ignore ones starting with # as these are not parameters
+ # but inline preprocessor statements);
+ # Note: It will also ignore void params and unnamed structs/unions
+ if (!defined $parameterdescs{$param} && $param !~ /^#/) {
+ $parameterdescs{$param} = $undescribed;
+
+ if (show_warnings($type, $declaration_name) && $param !~ /\./) {
+ emit_warning("${file}:$.", "Function parameter or struct member '$param' not described in '$declaration_name'\n");
+ }
+ }
+
+ # strip spaces from $param so that it is one continuous string
+ # on @parameterlist;
+ # this fixes a problem where check_sections() cannot find
+ # a parameter like "addr[6 + 2]" because it actually appears
+ # as "addr[6", "+", "2]" on the parameter list;
+ # but it's better to maintain the param string unchanged for output,
+ # so just weaken the string compare in check_sections() to ignore
+ # "[blah" in a parameter string;
+ ###$param =~ s/\s*//g;
+ push @parameterlist, $param;
+ $org_arg =~ s/\s\s+/ /g;
+ $parametertypes{$param} = $org_arg;
}
sub check_sections($$$$$) {
- my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_;
- my @sects = split ' ', $sectcheck;
- my @prms = split ' ', $prmscheck;
- my $err;
- my ($px, $sx);
- my $prm_clean; # strip trailing "[array size]" and/or beginning "*"
-
- foreach $sx (0 .. $#sects) {
- $err = 1;
- foreach $px (0 .. $#prms) {
- $prm_clean = $prms[$px];
- $prm_clean =~ s/\[.*\]//;
- $prm_clean =~ s/$attribute//i;
- # ignore array size in a parameter string;
- # however, the original param string may contain
- # spaces, e.g.: addr[6 + 2]
- # and this appears in @prms as "addr[6" since the
- # parameter list is split at spaces;
- # hence just ignore "[..." for the sections check;
- $prm_clean =~ s/\[.*//;
-
- ##$prm_clean =~ s/^\**//;
- if ($prm_clean eq $sects[$sx]) {
- $err = 0;
- last;
- }
- }
- if ($err) {
- if ($decl_type eq "function") {
- emit_warning("${file}:$.",
- "Excess function parameter " .
- "'$sects[$sx]' " .
- "description in '$decl_name'\n");
- }
- elsif (($decl_type eq "struct") or
- ($decl_type eq "union")) {
- emit_warning("${file}:$.",
- "Excess $decl_type member " .
- "'$sects[$sx]' " .
- "description in '$decl_name'\n");
- }
- }
- }
+ my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_;
+ my @sects = split ' ', $sectcheck;
+ my @prms = split ' ', $prmscheck;
+ my $err;
+ my ($px, $sx);
+ my $prm_clean; # strip trailing "[array size]" and/or beginning "*"
+
+ foreach $sx (0 .. $#sects) {
+ $err = 1;
+ foreach $px (0 .. $#prms) {
+ $prm_clean = $prms[$px];
+ $prm_clean =~ s/\[.*\]//;
+ $prm_clean =~ s/$attribute//i;
+ # ignore array size in a parameter string;
+ # however, the original param string may contain
+ # spaces, e.g.: addr[6 + 2]
+ # and this appears in @prms as "addr[6" since the
+ # parameter list is split at spaces;
+ # hence just ignore "[..." for the sections check;
+ $prm_clean =~ s/\[.*//;
+
+ ##$prm_clean =~ s/^\**//;
+ if ($prm_clean eq $sects[$sx]) {
+ $err = 0;
+ last;
+ }
+ }
+ if ($err) {
+ if ($decl_type eq "function") {
+ emit_warning("${file}:$.",
+ "Excess function parameter " .
+ "'$sects[$sx]' " .
+ "description in '$decl_name'\n");
+ } elsif (($decl_type eq "struct") or
+ ($decl_type eq "union")) {
+ emit_warning("${file}:$.",
+ "Excess $decl_type member " .
+ "'$sects[$sx]' " .
+ "description in '$decl_name'\n");
+ }
+ }
+ }
}
##
# Checks the section describing the return value of a function.
sub check_return_section {
- my $file = shift;
- my $declaration_name = shift;
- my $return_type = shift;
+ my $file = shift;
+ my $declaration_name = shift;
+ my $return_type = shift;
- # Ignore an empty return type (It's a macro)
- # Ignore functions with a "void" return type. (But don't ignore "void *")
- if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) {
- return;
- }
+ # Ignore an empty return type (It's a macro)
+ # Ignore functions with a "void" return type. (But don't ignore "void *")
+ if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) {
+ return;
+ }
- if (!defined($sections{$section_return}) ||
- $sections{$section_return} eq "") {
- emit_warning("${file}:$.",
- "No description found for return value of " .
- "'$declaration_name'\n");
- }
+ if (!defined($sections{$section_return}) ||
+ $sections{$section_return} eq "")
+ {
+ emit_warning("${file}:$.",
+ "No description found for return value of " .
+ "'$declaration_name'\n");
+ }
}
##
@@ -1764,21 +1767,21 @@ sub dump_function($$) {
$declaration_name = $2;
$noret = 1;
} elsif ($prototype =~ m/^()($name)\s*$prototype_end/ ||
- $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
- $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) {
- $return_type = $1;
- $declaration_name = $2;
- my $args = $3;
+ $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
+ $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) {
+ $return_type = $1;
+ $declaration_name = $2;
+ my $args = $3;
- create_parameterlist($args, ',', $file, $declaration_name);
+ create_parameterlist($args, ',', $file, $declaration_name);
} else {
- emit_warning("${file}:$.", "cannot understand function prototype: '$prototype'\n");
- return;
+ emit_warning("${file}:$.", "cannot understand function prototype: '$prototype'\n");
+ return;
}
if ($identifier ne $declaration_name) {
- emit_warning("${file}:$.", "expecting prototype for $identifier(). Prototype was for $declaration_name() instead\n");
- return;
+ emit_warning("${file}:$.", "expecting prototype for $identifier(). Prototype was for $declaration_name() instead\n");
+ return;
}
my $prms = join " ", @parameterlist;
@@ -1790,38 +1793,38 @@ sub dump_function($$) {
# -Wreturn mode.
# TODO: always perform the check.
if ($Wreturn && !$noret) {
- check_return_section($file, $declaration_name, $return_type);
+ check_return_section($file, $declaration_name, $return_type);
}
# The function parser can be called with a typedef parameter.
# Handle it.
if ($return_type =~ /typedef/) {
- output_declaration($declaration_name,
- 'function',
- {'function' => $declaration_name,
- 'typedef' => 1,
- 'module' => $modulename,
- 'functiontype' => $return_type,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'parametertypes' => \%parametertypes,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose
- });
+ output_declaration($declaration_name,
+ 'function',
+ {'function' => $declaration_name,
+ 'typedef' => 1,
+ 'module' => $modulename,
+ 'functiontype' => $return_type,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'parametertypes' => \%parametertypes,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose
+ });
} else {
- output_declaration($declaration_name,
- 'function',
- {'function' => $declaration_name,
- 'module' => $modulename,
- 'functiontype' => $return_type,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'parametertypes' => \%parametertypes,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose
- });
+ output_declaration($declaration_name,
+ 'function',
+ {'function' => $declaration_name,
+ 'module' => $modulename,
+ 'functiontype' => $return_type,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'parametertypes' => \%parametertypes,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose
+ });
}
}
@@ -1841,64 +1844,64 @@ sub reset_state {
}
sub tracepoint_munge($) {
- my $file = shift;
- my $tracepointname = 0;
- my $tracepointargs = 0;
-
- if ($prototype =~ m/TRACE_EVENT\((.*?),/) {
- $tracepointname = $1;
- }
- if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) {
- $tracepointname = $1;
- }
- if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) {
- $tracepointname = $2;
- }
- $tracepointname =~ s/^\s+//; #strip leading whitespace
- if ($prototype =~ m/TP_PROTO\((.*?)\)/) {
- $tracepointargs = $1;
- }
- if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
- emit_warning("${file}:$.", "Unrecognized tracepoint format: \n".
- "$prototype\n");
- } else {
- $prototype = "static inline void trace_$tracepointname($tracepointargs)";
- $identifier = "trace_$identifier";
- }
+ my $file = shift;
+ my $tracepointname = 0;
+ my $tracepointargs = 0;
+
+ if ($prototype =~ m/TRACE_EVENT\((.*?),/) {
+ $tracepointname = $1;
+ }
+ if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) {
+ $tracepointname = $1;
+ }
+ if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) {
+ $tracepointname = $2;
+ }
+ $tracepointname =~ s/^\s+//; #strip leading whitespace
+ if ($prototype =~ m/TP_PROTO\((.*?)\)/) {
+ $tracepointargs = $1;
+ }
+ if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
+ emit_warning("${file}:$.", "Unrecognized tracepoint format: \n".
+ "$prototype\n");
+ } else {
+ $prototype = "static inline void trace_$tracepointname($tracepointargs)";
+ $identifier = "trace_$identifier";
+ }
}
sub syscall_munge() {
- my $void = 0;
-
- $prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's
-## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) {
- if ($prototype =~ m/SYSCALL_DEFINE0/) {
- $void = 1;
-## $prototype = "long sys_$1(void)";
- }
-
- $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name
- if ($prototype =~ m/long (sys_.*?),/) {
- $prototype =~ s/,/\(/;
- } elsif ($void) {
- $prototype =~ s/\)/\(void\)/;
- }
-
- # now delete all of the odd-number commas in $prototype
- # so that arg types & arg names don't have a comma between them
- my $count = 0;
- my $len = length($prototype);
- if ($void) {
- $len = 0; # skip the for-loop
- }
- for (my $ix = 0; $ix < $len; $ix++) {
- if (substr($prototype, $ix, 1) eq ',') {
- $count++;
- if ($count % 2 == 1) {
- substr($prototype, $ix, 1) = ' ';
- }
- }
- }
+ my $void = 0;
+
+ $prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's
+## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) {
+ if ($prototype =~ m/SYSCALL_DEFINE0/) {
+ $void = 1;
+## $prototype = "long sys_$1(void)";
+ }
+
+ $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name
+ if ($prototype =~ m/long (sys_.*?),/) {
+ $prototype =~ s/,/\(/;
+ } elsif ($void) {
+ $prototype =~ s/\)/\(void\)/;
+ }
+
+ # now delete all of the odd-number commas in $prototype
+ # so that arg types & arg names don't have a comma between them
+ my $count = 0;
+ my $len = length($prototype);
+ if ($void) {
+ $len = 0; # skip the for-loop
+ }
+ for (my $ix = 0; $ix < $len; $ix++) {
+ if (substr($prototype, $ix, 1) eq ',') {
+ $count++;
+ if ($count % 2 == 1) {
+ substr($prototype, $ix, 1) = ' ';
+ }
+ }
+ }
}
sub process_proto_function($$) {
@@ -1907,32 +1910,31 @@ sub process_proto_function($$) {
$x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line
- if ($x =~ m#\s*/\*\s+MACDOC\s*#io || ($x =~ /^#/ && $x !~ /^#\s*define/)) {
- # do nothing
- }
- elsif ($x =~ /([^\{]*)/) {
- $prototype .= $1;
+ if ($x =~ /^#/ && $x !~ /^#\s*define/) {
+ # do nothing
+ } elsif ($x =~ /([^\{]*)/) {
+ $prototype .= $1;
}
if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) {
- $prototype =~ s@/\*.*?\*/@@gos; # strip comments.
- $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's.
- $prototype =~ s@^\s+@@gos; # strip leading spaces
+ $prototype =~ s@/\*.*?\*/@@gos; # strip comments.
+ $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's.
+ $prototype =~ s@^\s+@@gos; # strip leading spaces
- # Handle prototypes for function pointers like:
- # int (*pcs_config)(struct foo)
- $prototype =~ s@^(\S+\s+)\(\s*\*(\S+)\)@$1$2@gos;
+ # Handle prototypes for function pointers like:
+ # int (*pcs_config)(struct foo)
+ $prototype =~ s@^(\S+\s+)\(\s*\*(\S+)\)@$1$2@gos;
- if ($prototype =~ /SYSCALL_DEFINE/) {
- syscall_munge();
- }
- if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ ||
- $prototype =~ /DEFINE_SINGLE_EVENT/)
- {
- tracepoint_munge($file);
- }
- dump_function($prototype, $file);
- reset_state();
+ if ($prototype =~ /SYSCALL_DEFINE/) {
+ syscall_munge();
+ }
+ if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ ||
+ $prototype =~ /DEFINE_SINGLE_EVENT/)
+ {
+ tracepoint_munge($file);
+ }
+ dump_function($prototype, $file);
+ reset_state();
}
}
@@ -1946,28 +1948,28 @@ sub process_proto_type($$) {
$x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line
if ($x =~ /^#/) {
- # To distinguish preprocessor directive from regular declaration later.
- $x .= ";";
+ # To distinguish preprocessor directive from regular declaration later.
+ $x .= ";";
}
while (1) {
- if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) {
+ if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) {
if( length $prototype ) {
$prototype .= " "
}
- $prototype .= $1 . $2;
- ($2 eq '{') && $brcount++;
- ($2 eq '}') && $brcount--;
- if (($2 eq ';') && ($brcount == 0)) {
- dump_declaration($prototype, $file);
- reset_state();
- last;
- }
- $x = $3;
- } else {
- $prototype .= $x;
- last;
- }
+ $prototype .= $1 . $2;
+ ($2 eq '{') && $brcount++;
+ ($2 eq '}') && $brcount--;
+ if (($2 eq ';') && ($brcount == 0)) {
+ dump_declaration($prototype, $file);
+ reset_state();
+ last;
+ }
+ $x = $3;
+ } else {
+ $prototype .= $x;
+ last;
+ }
}
}
@@ -1977,13 +1979,13 @@ sub map_filename($) {
my ($orig_file) = @_;
if (defined($ENV{'SRCTREE'})) {
- $file = "$ENV{'SRCTREE'}" . "/" . $orig_file;
+ $file = "$ENV{'SRCTREE'}" . "/" . $orig_file;
} else {
- $file = $orig_file;
+ $file = $orig_file;
}
if (defined($source_map{$file})) {
- $file = $source_map{$file};
+ $file = $source_map{$file};
}
return $file;
@@ -1994,20 +1996,20 @@ sub process_export_file($) {
my $file = map_filename($orig_file);
if (!open(IN,"<$file")) {
- print STDERR "Error: Cannot open file $file\n";
- ++$errors;
- return;
+ print STDERR "Error: Cannot open file $file\n";
+ ++$errors;
+ return;
}
while (<IN>) {
- if (/$export_symbol/) {
- next if (defined($nosymbol_table{$2}));
- $function_table{$2} = 1;
- }
- if (/$export_symbol_ns/) {
- next if (defined($nosymbol_table{$2}));
- $function_table{$2} = 1;
- }
+ if (/$export_symbol/) {
+ next if (defined($nosymbol_table{$2}));
+ $function_table{$2} = 1;
+ }
+ if (/$export_symbol_ns/) {
+ next if (defined($nosymbol_table{$2}));
+ $function_table{$2} = 1;
+ }
}
close(IN);
@@ -2020,9 +2022,9 @@ sub process_export_file($) {
#
sub process_normal() {
if (/$doc_start/o) {
- $state = STATE_NAME; # next line is always the function name
- $in_doc_sect = 0;
- $declaration_start_line = $. + 1;
+ $state = STATE_NAME; # next line is always the function name
+ $in_doc_sect = 0;
+ $declaration_start_line = $. + 1;
}
}
@@ -2034,80 +2036,80 @@ sub process_name($$) {
my $descr;
if (/$doc_block/o) {
- $state = STATE_DOCBLOCK;
- $contents = "";
- $new_start_line = $.;
-
- if ( $1 eq "" ) {
- $section = $section_intro;
- } else {
- $section = $1;
- }
+ $state = STATE_DOCBLOCK;
+ $contents = "";
+ $new_start_line = $.;
+
+ if ( $1 eq "" ) {
+ $section = $section_intro;
+ } else {
+ $section = $1;
+ }
} elsif (/$doc_decl/o) {
- $identifier = $1;
- my $is_kernel_comment = 0;
- my $decl_start = qr{$doc_com};
- # test for pointer declaration type, foo * bar() - desc
- my $fn_type = qr{\w+\s*\*\s*};
- my $parenthesis = qr{\(\w*\)};
- my $decl_end = qr{[-:].*};
- if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
- $identifier = $1;
- }
- if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
- $decl_type = $1;
- $identifier = $2;
- $is_kernel_comment = 1;
- }
- # Look for foo() or static void foo() - description; or misspelt
- # identifier
- elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
- /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
- $identifier = $1;
- $decl_type = 'function';
- $identifier =~ s/^define\s+//;
- $is_kernel_comment = 1;
- }
- $identifier =~ s/\s+$//;
-
- $state = STATE_BODY;
- # if there's no @param blocks need to set up default section
- # here
- $contents = "";
- $section = $section_default;
- $new_start_line = $. + 1;
- if (/[-:](.*)/) {
- # strip leading/trailing/multiple spaces
- $descr= $1;
- $descr =~ s/^\s*//;
- $descr =~ s/\s*$//;
- $descr =~ s/\s+/ /g;
- $declaration_purpose = $descr;
- $state = STATE_BODY_MAYBE;
- } else {
- $declaration_purpose = "";
- }
-
- if (!$is_kernel_comment) {
- emit_warning("${file}:$.", "This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n$_");
- $state = STATE_NORMAL;
- }
-
- if (($declaration_purpose eq "") && $Wshort_desc) {
- emit_warning("${file}:$.", "missing initial short description on line:\n$_");
- }
-
- if ($identifier eq "" && $decl_type ne "enum") {
- emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n$_");
- $state = STATE_NORMAL;
- }
-
- if ($verbose) {
- print STDERR "${file}:$.: info: Scanning doc for $decl_type $identifier\n";
- }
+ $identifier = $1;
+ my $is_kernel_comment = 0;
+ my $decl_start = qr{$doc_com};
+ # test for pointer declaration type, foo * bar() - desc
+ my $fn_type = qr{\w+\s*\*\s*};
+ my $parenthesis = qr{\(\w*\)};
+ my $decl_end = qr{[-:].*};
+ if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
+ $identifier = $1;
+ }
+ if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
+ $decl_type = $1;
+ $identifier = $2;
+ $is_kernel_comment = 1;
+ }
+ # Look for foo() or static void foo() - description; or misspelt
+ # identifier
+ elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
+ /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
+ $identifier = $1;
+ $decl_type = 'function';
+ $identifier =~ s/^define\s+//;
+ $is_kernel_comment = 1;
+ }
+ $identifier =~ s/\s+$//;
+
+ $state = STATE_BODY;
+ # if there's no @param blocks need to set up default section
+ # here
+ $contents = "";
+ $section = $section_default;
+ $new_start_line = $. + 1;
+ if (/[-:](.*)/) {
+ # strip leading/trailing/multiple spaces
+ $descr= $1;
+ $descr =~ s/^\s*//;
+ $descr =~ s/\s*$//;
+ $descr =~ s/\s+/ /g;
+ $declaration_purpose = $descr;
+ $state = STATE_BODY_MAYBE;
+ } else {
+ $declaration_purpose = "";
+ }
+
+ if (!$is_kernel_comment) {
+ emit_warning("${file}:$.", "This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n$_");
+ $state = STATE_NORMAL;
+ }
+
+ if (($declaration_purpose eq "") && $Wshort_desc) {
+ emit_warning("${file}:$.", "missing initial short description on line:\n$_");
+ }
+
+ if ($identifier eq "" && $decl_type ne "enum") {
+ emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n$_");
+ $state = STATE_NORMAL;
+ }
+
+ if ($verbose) {
+ print STDERR "${file}:$.: info: Scanning doc for $decl_type $identifier\n";
+ }
} else {
- emit_warning("${file}:$.", "Cannot understand $_ on line $. - I thought it was a doc line\n");
- $state = STATE_NORMAL;
+ emit_warning("${file}:$.", "Cannot understand $_ on line $. - I thought it was a doc line\n");
+ $state = STATE_NORMAL;
}
}
@@ -2119,102 +2121,102 @@ sub process_body($$) {
my $file = shift;
if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) {
- dump_section($file, $section, $contents);
- $section = $section_default;
- $new_start_line = $.;
- $contents = "";
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ $new_start_line = $.;
+ $contents = "";
}
if (/$doc_sect/i) { # case insensitive for supported section names
- $in_doc_sect = 1;
- $newsection = $1;
- $newcontents = $2;
-
- # map the supported section names to the canonical names
- if ($newsection =~ m/^description$/i) {
- $newsection = $section_default;
- } elsif ($newsection =~ m/^context$/i) {
- $newsection = $section_context;
- } elsif ($newsection =~ m/^returns?$/i) {
- $newsection = $section_return;
- } elsif ($newsection =~ m/^\@return$/) {
- # special: @return is a section, not a param description
- $newsection = $section_return;
- }
-
- if (($contents ne "") && ($contents ne "\n")) {
- if (!$in_doc_sect && $Wcontents_before_sections) {
- emit_warning("${file}:$.", "contents before sections\n");
- }
- dump_section($file, $section, $contents);
- $section = $section_default;
- }
-
- $in_doc_sect = 1;
- $state = STATE_BODY;
- $contents = $newcontents;
- $new_start_line = $.;
- while (substr($contents, 0, 1) eq " ") {
- $contents = substr($contents, 1);
- }
- if ($contents ne "") {
- $contents .= "\n";
- }
- $section = $newsection;
- $leading_space = undef;
+ $in_doc_sect = 1;
+ $newsection = $1;
+ $newcontents = $2;
+
+ # map the supported section names to the canonical names
+ if ($newsection =~ m/^description$/i) {
+ $newsection = $section_default;
+ } elsif ($newsection =~ m/^context$/i) {
+ $newsection = $section_context;
+ } elsif ($newsection =~ m/^returns?$/i) {
+ $newsection = $section_return;
+ } elsif ($newsection =~ m/^\@return$/) {
+ # special: @return is a section, not a param description
+ $newsection = $section_return;
+ }
+
+ if (($contents ne "") && ($contents ne "\n")) {
+ if (!$in_doc_sect && $Wcontents_before_sections) {
+ emit_warning("${file}:$.", "contents before sections\n");
+ }
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ }
+
+ $in_doc_sect = 1;
+ $state = STATE_BODY;
+ $contents = $newcontents;
+ $new_start_line = $.;
+ while (substr($contents, 0, 1) eq " ") {
+ $contents = substr($contents, 1);
+ }
+ if ($contents ne "") {
+ $contents .= "\n";
+ }
+ $section = $newsection;
+ $leading_space = undef;
} elsif (/$doc_end/) {
- if (($contents ne "") && ($contents ne "\n")) {
- dump_section($file, $section, $contents);
- $section = $section_default;
- $contents = "";
- }
- # look for doc_com + <text> + doc_end:
- if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') {
- emit_warning("${file}:$.", "suspicious ending line: $_");
- }
-
- $prototype = "";
- $state = STATE_PROTO;
- $brcount = 0;
+ if (($contents ne "") && ($contents ne "\n")) {
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ $contents = "";
+ }
+ # look for doc_com + <text> + doc_end:
+ if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') {
+ emit_warning("${file}:$.", "suspicious ending line: $_");
+ }
+
+ $prototype = "";
+ $state = STATE_PROTO;
+ $brcount = 0;
$new_start_line = $. + 1;
} elsif (/$doc_content/) {
- if ($1 eq "") {
- if ($section eq $section_context) {
- dump_section($file, $section, $contents);
- $section = $section_default;
- $contents = "";
- $new_start_line = $.;
- $state = STATE_BODY;
- } else {
- if ($section ne $section_default) {
- $state = STATE_BODY_WITH_BLANK_LINE;
- } else {
- $state = STATE_BODY;
- }
- $contents .= "\n";
- }
- } elsif ($state == STATE_BODY_MAYBE) {
- # Continued declaration purpose
- chomp($declaration_purpose);
- $declaration_purpose .= " " . $1;
- $declaration_purpose =~ s/\s+/ /g;
- } else {
- my $cont = $1;
- if ($section =~ m/^@/ || $section eq $section_context) {
- if (!defined $leading_space) {
- if ($cont =~ m/^(\s+)/) {
- $leading_space = $1;
- } else {
- $leading_space = "";
- }
- }
- $cont =~ s/^$leading_space//;
- }
- $contents .= $cont . "\n";
- }
+ if ($1 eq "") {
+ if ($section eq $section_context) {
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ $contents = "";
+ $new_start_line = $.;
+ $state = STATE_BODY;
+ } else {
+ if ($section ne $section_default) {
+ $state = STATE_BODY_WITH_BLANK_LINE;
+ } else {
+ $state = STATE_BODY;
+ }
+ $contents .= "\n";
+ }
+ } elsif ($state == STATE_BODY_MAYBE) {
+ # Continued declaration purpose
+ chomp($declaration_purpose);
+ $declaration_purpose .= " " . $1;
+ $declaration_purpose =~ s/\s+/ /g;
+ } else {
+ my $cont = $1;
+ if ($section =~ m/^@/ || $section eq $section_context) {
+ if (!defined $leading_space) {
+ if ($cont =~ m/^(\s+)/) {
+ $leading_space = $1;
+ } else {
+ $leading_space = "";
+ }
+ }
+ $cont =~ s/^$leading_space//;
+ }
+ $contents .= $cont . "\n";
+ }
} else {
- # i dont know - bad line? ignore.
- emit_warning("${file}:$.", "bad line: $_");
+ # i dont know - bad line? ignore.
+ emit_warning("${file}:$.", "bad line: $_");
}
}
@@ -2226,21 +2228,21 @@ sub process_proto($$) {
my $file = shift;
if (/$doc_inline_oneline/) {
- $section = $1;
- $contents = $2;
- if ($contents ne "") {
- $contents .= "\n";
- dump_section($file, $section, $contents);
- $section = $section_default;
- $contents = "";
- }
+ $section = $1;
+ $contents = $2;
+ if ($contents ne "") {
+ $contents .= "\n";
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ $contents = "";
+ }
} elsif (/$doc_inline_start/) {
- $state = STATE_INLINE;
- $inline_doc_state = STATE_INLINE_NAME;
+ $state = STATE_INLINE;
+ $inline_doc_state = STATE_INLINE_NAME;
} elsif ($decl_type eq 'function') {
- process_proto_function($_, $file);
+ process_proto_function($_, $file);
} else {
- process_proto_type($_, $file);
+ process_proto_type($_, $file);
}
}
@@ -2251,23 +2253,23 @@ sub process_docblock($$) {
my $file = shift;
if (/$doc_end/) {
- dump_doc_section($file, $section, $contents);
- $section = $section_default;
- $contents = "";
- $function = "";
- %parameterdescs = ();
- %parametertypes = ();
- @parameterlist = ();
- %sections = ();
- @sectionlist = ();
- $prototype = "";
- $state = STATE_NORMAL;
+ dump_doc_section($file, $section, $contents);
+ $section = $section_default;
+ $contents = "";
+ $function = "";
+ %parameterdescs = ();
+ %parametertypes = ();
+ @parameterlist = ();
+ %sections = ();
+ @sectionlist = ();
+ $prototype = "";
+ $state = STATE_NORMAL;
} elsif (/$doc_content/) {
- if ( $1 eq "" ) {
- $contents .= $blankline;
- } else {
- $contents .= $1 . "\n";
- }
+ if ( $1 eq "" ) {
+ $contents .= $blankline;
+ } else {
+ $contents .= $1 . "\n";
+ }
}
}
@@ -2279,37 +2281,37 @@ sub process_inline($$) {
# First line (state 1) needs to be a @parameter
if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) {
- $section = $1;
- $contents = $2;
- $new_start_line = $.;
- if ($contents ne "") {
- while (substr($contents, 0, 1) eq " ") {
- $contents = substr($contents, 1);
- }
- $contents .= "\n";
- }
- $inline_doc_state = STATE_INLINE_TEXT;
- # Documentation block end */
+ $section = $1;
+ $contents = $2;
+ $new_start_line = $.;
+ if ($contents ne "") {
+ while (substr($contents, 0, 1) eq " ") {
+ $contents = substr($contents, 1);
+ }
+ $contents .= "\n";
+ }
+ $inline_doc_state = STATE_INLINE_TEXT;
+ # Documentation block end */
} elsif (/$doc_inline_end/) {
- if (($contents ne "") && ($contents ne "\n")) {
- dump_section($file, $section, $contents);
- $section = $section_default;
- $contents = "";
- }
- $state = STATE_PROTO;
- $inline_doc_state = STATE_INLINE_NA;
- # Regular text
+ if (($contents ne "") && ($contents ne "\n")) {
+ dump_section($file, $section, $contents);
+ $section = $section_default;
+ $contents = "";
+ }
+ $state = STATE_PROTO;
+ $inline_doc_state = STATE_INLINE_NA;
+ # Regular text
} elsif (/$doc_content/) {
- if ($inline_doc_state == STATE_INLINE_TEXT) {
- $contents .= $1 . "\n";
- # nuke leading blank lines
- if ($contents =~ /^\s*$/) {
- $contents = "";
- }
- } elsif ($inline_doc_state == STATE_INLINE_NAME) {
- $inline_doc_state = STATE_INLINE_ERROR;
- emit_warning("${file}:$.", "Incorrect use of kernel-doc format: $_");
- }
+ if ($inline_doc_state == STATE_INLINE_TEXT) {
+ $contents .= $1 . "\n";
+ # nuke leading blank lines
+ if ($contents =~ /^\s*$/) {
+ $contents = "";
+ }
+ } elsif ($inline_doc_state == STATE_INLINE_NAME) {
+ $inline_doc_state = STATE_INLINE_ERROR;
+ emit_warning("${file}:$.", "Incorrect use of kernel-doc format: $_");
+ }
}
}
@@ -2322,54 +2324,53 @@ sub process_file($) {
$file = map_filename($orig_file);
if (!open(IN_FILE,"<$file")) {
- print STDERR "Error: Cannot open file $file\n";
- ++$errors;
- return;
+ print STDERR "Error: Cannot open file $file\n";
+ ++$errors;
+ return;
}
$. = 1;
$section_counter = 0;
while (<IN_FILE>) {
- while (s/\\\s*$//) {
- $_ .= <IN_FILE>;
- }
- # Replace tabs by spaces
+ while (!/^ \*/ && s/\\\s*$//) {
+ $_ .= <IN_FILE>;
+ }
+ # Replace tabs by spaces
while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {};
- # Hand this line to the appropriate state handler
- if ($state == STATE_NORMAL) {
- process_normal();
- } elsif ($state == STATE_NAME) {
- process_name($file, $_);
- } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE ||
- $state == STATE_BODY_WITH_BLANK_LINE) {
- process_body($file, $_);
- } elsif ($state == STATE_INLINE) { # scanning for inline parameters
- process_inline($file, $_);
- } elsif ($state == STATE_PROTO) {
- process_proto($file, $_);
- } elsif ($state == STATE_DOCBLOCK) {
- process_docblock($file, $_);
- }
+ # Hand this line to the appropriate state handler
+ if ($state == STATE_NORMAL) {
+ process_normal();
+ } elsif ($state == STATE_NAME) {
+ process_name($file, $_);
+ } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE ||
+ $state == STATE_BODY_WITH_BLANK_LINE) {
+ process_body($file, $_);
+ } elsif ($state == STATE_INLINE) { # scanning for inline parameters
+ process_inline($file, $_);
+ } elsif ($state == STATE_PROTO) {
+ process_proto($file, $_);
+ } elsif ($state == STATE_DOCBLOCK) {
+ process_docblock($file, $_);
+ }
}
# Make sure we got something interesting.
if ($initial_section_counter == $section_counter && $
- output_mode ne "none") {
- if ($output_selection == OUTPUT_INCLUDE) {
- emit_warning("${file}:1", "'$_' not found\n")
- for keys %function_table;
- }
- else {
- emit_warning("${file}:1", "no structured comments found\n");
- }
+ output_mode ne "none") {
+ if ($output_selection == OUTPUT_INCLUDE) {
+ emit_warning("${file}:1", "'$_' not found\n")
+ for keys %function_table;
+ } else {
+ emit_warning("${file}:1", "no structured comments found\n");
+ }
}
close IN_FILE;
}
if ($output_mode eq "rst") {
- get_sphinx_version() if (!$sphinx_major);
+ get_sphinx_version() if (!$sphinx_major);
}
$kernelversion = get_kernel_version();
@@ -2386,14 +2387,14 @@ for (my $k = 0; $k < @highlights; $k++) {
# Read the file that maps relative names to absolute names for
# separate source and object directories and for shadow trees.
if (open(SOURCE_MAP, "<.tmp_filelist.txt")) {
- my ($relname, $absname);
- while(<SOURCE_MAP>) {
- chop();
- ($relname, $absname) = (split())[0..1];
- $relname =~ s:^/+::;
- $source_map{$relname} = $absname;
- }
- close(SOURCE_MAP);
+ my ($relname, $absname);
+ while(<SOURCE_MAP>) {
+ chop();
+ ($relname, $absname) = (split())[0..1];
+ $relname =~ s:^/+::;
+ $source_map{$relname} = $absname;
+ }
+ close(SOURCE_MAP);
}
if ($output_selection == OUTPUT_EXPORTED ||
@@ -2402,8 +2403,8 @@ if ($output_selection == OUTPUT_EXPORTED ||
push(@export_file_list, @ARGV);
foreach (@export_file_list) {
- chomp;
- process_export_file($_);
+ chomp;
+ process_export_file($_);
}
}
@@ -2412,10 +2413,10 @@ foreach (@ARGV) {
process_file($_);
}
if ($verbose && $errors) {
- print STDERR "$errors errors\n";
+ print STDERR "$errors errors\n";
}
if ($verbose && $warnings) {
- print STDERR "$warnings warnings\n";
+ print STDERR "$warnings warnings\n";
}
if ($Werror && $warnings) {
diff --git a/scripts/leaking_addresses.pl b/scripts/leaking_addresses.pl
index e695634d153d..8e992b18bcd9 100755
--- a/scripts/leaking_addresses.pl
+++ b/scripts/leaking_addresses.pl
@@ -23,6 +23,7 @@ use strict;
use POSIX;
use File::Basename;
use File::Spec;
+use File::Temp qw/tempfile/;
use Cwd 'abs_path';
use Term::ANSIColor qw(:constants);
use Getopt::Long qw(:config no_auto_abbrev);
@@ -51,10 +52,13 @@ my $input_raw = ""; # Read raw results from file instead of scanning.
my $suppress_dmesg = 0; # Don't show dmesg in output.
my $squash_by_path = 0; # Summary report grouped by absolute path.
my $squash_by_filename = 0; # Summary report grouped by filename.
+my $kallsyms_file = ""; # Kernel symbols file.
my $kernel_config_file = ""; # Kernel configuration file.
my $opt_32bit = 0; # Scan 32-bit kernel.
my $page_offset_32bit = 0; # Page offset for 32-bit kernel.
+my @kallsyms = ();
+
# Skip these absolute paths.
my @skip_abs = (
'/proc/kmsg',
@@ -95,6 +99,8 @@ Options:
--squash-by-path Show one result per unique path.
--squash-by-filename Show one result per unique filename.
--kernel-config-file=<file> Kernel configuration file (e.g /boot/config)
+ --kallsyms=<file> Read kernel symbol addresses from file (for
+ scanning binary files).
--32-bit Scan 32-bit kernel.
--page-offset-32-bit=o Page offset (for 32-bit kernel 0xABCD1234).
-d, --debug Display debugging output.
@@ -115,6 +121,7 @@ GetOptions(
'squash-by-path' => \$squash_by_path,
'squash-by-filename' => \$squash_by_filename,
'raw' => \$raw,
+ 'kallsyms=s' => \$kallsyms_file,
'kernel-config-file=s' => \$kernel_config_file,
'32-bit' => \$opt_32bit,
'page-offset-32-bit=o' => \$page_offset_32bit,
@@ -155,6 +162,25 @@ if ($output_raw) {
select $fh;
}
+if ($kallsyms_file) {
+ open my $fh, '<', $kallsyms_file or die "$0: $kallsyms_file: $!\n";
+ while (<$fh>) {
+ chomp;
+ my @entry = split / /, $_;
+ my $addr_text = $entry[0];
+ if ($addr_text !~ /^0/) {
+ # TODO: Why is hex() so impossibly slow?
+ my $addr = hex($addr_text);
+ my $symbol = $entry[2];
+ # Only keep kernel text addresses.
+ my $long = pack("J", $addr);
+ my $entry = [$long, $symbol];
+ push @kallsyms, $entry;
+ }
+ }
+ close $fh;
+}
+
parse_dmesg();
walk(@DIRS);
@@ -221,6 +247,7 @@ sub get_kernel_config_option
{
my ($option) = @_;
my $value = "";
+ my $tmp_fh;
my $tmp_file = "";
my @config_files;
@@ -228,7 +255,8 @@ sub get_kernel_config_option
if ($kernel_config_file ne "") {
@config_files = ($kernel_config_file);
} elsif (-R "/proc/config.gz") {
- my $tmp_file = "/tmp/tmpkconf";
+ ($tmp_fh, $tmp_file) = tempfile("config.gz-XXXXXX",
+ UNLINK => 1);
if (system("gunzip < /proc/config.gz > $tmp_file")) {
dprint("system(gunzip < /proc/config.gz) failed\n");
@@ -250,10 +278,6 @@ sub get_kernel_config_option
}
}
- if ($tmp_file ne "") {
- system("rm -f $tmp_file");
- }
-
return $value;
}
@@ -285,9 +309,10 @@ sub is_false_positive
return is_false_positive_32bit($match);
}
- # 64 bit false positives.
-
- if ($match =~ '\b(0x)?(f|F){16}\b' or
+ # Ignore 64 bit false positives:
+ # 0xfffffffffffffff[0-f]
+ # 0x0000000000000000
+ if ($match =~ '\b(0x)?(f|F){15}[0-9a-f]\b' or
$match =~ '\b(0x)?0{16}\b') {
return 1;
}
@@ -304,7 +329,7 @@ sub is_false_positive_32bit
my ($match) = @_;
state $page_offset = get_page_offset();
- if ($match =~ '\b(0x)?(f|F){8}\b') {
+ if ($match =~ '\b(0x)?(f|F){7}[0-9a-f]\b') {
return 1;
}
@@ -347,18 +372,23 @@ sub is_in_vsyscall_memory_region
# True if argument potentially contains a kernel address.
sub may_leak_address
{
- my ($line) = @_;
+ my ($path, $line) = @_;
my $address_re;
- # Signal masks.
+ # Ignore Signal masks.
if ($line =~ '^SigBlk:' or
$line =~ '^SigIgn:' or
$line =~ '^SigCgt:') {
return 0;
}
- if ($line =~ '\bKEY=[[:xdigit:]]{14} [[:xdigit:]]{16} [[:xdigit:]]{16}\b' or
- $line =~ '\b[[:xdigit:]]{14} [[:xdigit:]]{16} [[:xdigit:]]{16}\b') {
+ # Ignore input device reporting.
+ # /proc/bus/input/devices: B: KEY=402000000 3803078f800d001 feffffdfffefffff fffffffffffffffe
+ # /sys/devices/platform/i8042/serio0/input/input1/uevent: KEY=402000000 3803078f800d001 feffffdfffefffff fffffffffffffffe
+ # /sys/devices/platform/i8042/serio0/input/input1/capabilities/key: 402000000 3803078f800d001 feffffdfffefffff fffffffffffffffe
+ if ($line =~ '\bKEY=[[:xdigit:]]{9,14} [[:xdigit:]]{16} [[:xdigit:]]{16}\b' or
+ ($path =~ '\bkey$' and
+ $line =~ '\b[[:xdigit:]]{9,14} [[:xdigit:]]{16} [[:xdigit:]]{16}\b')) {
return 0;
}
@@ -401,7 +431,7 @@ sub parse_dmesg
{
open my $cmd, '-|', 'dmesg';
while (<$cmd>) {
- if (may_leak_address($_)) {
+ if (may_leak_address("dmesg", $_)) {
print 'dmesg: ' . $_;
}
}
@@ -442,6 +472,25 @@ sub timed_parse_file
}
}
+sub parse_binary
+{
+ my ($file) = @_;
+
+ open my $fh, "<:raw", $file or return;
+ local $/ = undef;
+ my $bytes = <$fh>;
+ close $fh;
+
+ foreach my $entry (@kallsyms) {
+ my $addr = $entry->[0];
+ my $symbol = $entry->[1];
+ my $offset = index($bytes, $addr);
+ if ($offset != -1) {
+ printf("$file: $symbol @ $offset\n");
+ }
+ }
+}
+
sub parse_file
{
my ($file) = @_;
@@ -451,13 +500,22 @@ sub parse_file
}
if (! -T $file) {
+ if ($file =~ m|^/sys/kernel/btf/| or
+ $file =~ m|^/sys/devices/pci| or
+ $file =~ m|^/sys/firmware/efi/efivars/| or
+ $file =~ m|^/proc/bus/pci/|) {
+ return;
+ }
+ if (scalar @kallsyms > 0) {
+ parse_binary($file);
+ }
return;
}
open my $fh, "<", $file or return;
while ( <$fh> ) {
chomp;
- if (may_leak_address($_)) {
+ if (may_leak_address($file, $_)) {
printf("$file: $_\n");
}
}
@@ -469,7 +527,7 @@ sub check_path_for_leaks
{
my ($path) = @_;
- if (may_leak_address($path)) {
+ if (may_leak_address($path, $path)) {
printf("Path name may contain address: $path\n");
}
}
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index a432b171be82..7862a8101747 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -135,8 +135,13 @@ gen_btf()
${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \
--strip-all ${1} ${2} 2>/dev/null
# Change e_type to ET_REL so that it can be used to link final vmlinux.
- # Unlike GNU ld, lld does not allow an ET_EXEC input.
- printf '\1' | dd of=${2} conv=notrunc bs=1 seek=16 status=none
+ # GNU ld 2.35+ and lld do not allow an ET_EXEC input.
+ if is_enabled CONFIG_CPU_BIG_ENDIAN; then
+ et_rel='\0\1'
+ else
+ et_rel='\1\0'
+ fi
+ printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none
}
# Create ${2} .S file with all symbols from the ${1} object file
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index 9faa4d3d91e3..e217683b10d6 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -33,7 +33,7 @@ llvm)
fi
;;
rustc)
- echo 1.74.1
+ echo 1.76.0
;;
bindgen)
echo 0.65.1
diff --git a/scripts/mksysmap b/scripts/mksysmap
index 9ba1c9da0a40..57ff5656d566 100755
--- a/scripts/mksysmap
+++ b/scripts/mksysmap
@@ -48,17 +48,8 @@ ${NM} -n ${1} | sed >${2} -e "
/ __kvm_nvhe_\\$/d
/ __kvm_nvhe_\.L/d
-# arm64 lld
-/ __AArch64ADRPThunk_/d
-
-# arm lld
-/ __ARMV5PILongThunk_/d
-/ __ARMV7PILongThunk_/d
-/ __ThumbV7PILongThunk_/d
-
-# mips lld
-/ __LA25Thunk_/d
-/ __microLA25Thunk_/d
+# lld arm/aarch64/mips thunks
+/ __[[:alnum:]]*Thunk_/d
# CFI type identifiers
/ __kcfi_typeid_/d
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 795b21154446..bf7c4b4b5ff4 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -70,9 +70,7 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...)
break;
case LOG_ERROR:
fprintf(stderr, "ERROR: ");
- break;
- case LOG_FATAL:
- fprintf(stderr, "FATAL: ");
+ error_occurred = true;
break;
default: /* invalid loglevel, ignore */
break;
@@ -83,16 +81,8 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...)
va_start(arglist, fmt);
vfprintf(stderr, fmt, arglist);
va_end(arglist);
-
- if (loglevel == LOG_FATAL)
- exit(1);
- if (loglevel == LOG_ERROR)
- error_occurred = true;
}
-void __attribute__((alias("modpost_log")))
-modpost_log_noret(enum loglevel loglevel, const char *fmt, ...);
-
static inline bool strends(const char *str, const char *postfix)
{
if (strlen(str) < strlen(postfix))
@@ -806,7 +796,8 @@ static void check_section(const char *modname, struct elf_info *elf,
#define DATA_SECTIONS ".data", ".data.rel"
#define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \
- ".kprobes.text", ".cpuidle.text", ".noinstr.text"
+ ".kprobes.text", ".cpuidle.text", ".noinstr.text", \
+ ".ltext", ".ltext.*"
#define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
".fixup", ".entry.text", ".exception.text", \
".coldtext", ".softirqentry.text"
@@ -1857,7 +1848,7 @@ static void add_header(struct buffer *b, struct module *mod)
buf_printf(b,
"\n"
- "#ifdef CONFIG_RETPOLINE\n"
+ "#ifdef CONFIG_MITIGATION_RETPOLINE\n"
"MODULE_INFO(retpoline, \"Y\");\n"
"#endif\n");
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 835cababf1b0..ee43c7950636 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -194,15 +194,11 @@ void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym);
enum loglevel {
LOG_WARN,
LOG_ERROR,
- LOG_FATAL
};
void __attribute__((format(printf, 2, 3)))
modpost_log(enum loglevel loglevel, const char *fmt, ...);
-void __attribute__((format(printf, 2, 3), noreturn))
-modpost_log_noret(enum loglevel loglevel, const char *fmt, ...);
-
/*
* warn - show the given message, then let modpost continue running, still
* allowing modpost to exit successfully. This should be used when
@@ -218,4 +214,4 @@ modpost_log_noret(enum loglevel loglevel, const char *fmt, ...);
*/
#define warn(fmt, args...) modpost_log(LOG_WARN, fmt, ##args)
#define error(fmt, args...) modpost_log(LOG_ERROR, fmt, ##args)
-#define fatal(fmt, args...) modpost_log_noret(LOG_FATAL, fmt, ##args)
+#define fatal(fmt, args...) do { error(fmt, ##args); exit(1); } while (1)
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index 31066bfdba04..dc4878502276 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -326,7 +326,12 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
/* Sum all files in the same dir or subdirs. */
while ((line = get_line(&pos))) {
- char* p = line;
+ char* p;
+
+ /* trim the leading spaces away */
+ while (isspace(*line))
+ line++;
+ p = line;
if (strncmp(line, "source_", sizeof("source_")-1) == 0) {
p = strrchr(line, ' ');
diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec
index 89298983a169..f58726671fb3 100644
--- a/scripts/package/kernel.spec
+++ b/scripts/package/kernel.spec
@@ -55,12 +55,12 @@ patch -p1 < %{SOURCE2}
%{make} %{makeflags} KERNELRELEASE=%{KERNELRELEASE} KBUILD_BUILD_VERSION=%{release}
%install
-mkdir -p %{buildroot}/boot
-cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE}
+mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE}
+cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz
%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install
%{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
-cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE}
-cp .config %{buildroot}/boot/config-%{KERNELRELEASE}
+cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE}
+cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config
ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/build
%if %{with_devel}
%{make} %{makeflags} run-command KBUILD_RUN_COMMAND='${srctree}/scripts/package/install-extmod-build %{buildroot}/usr/src/kernels/%{KERNELRELEASE}'
@@ -70,13 +70,14 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA
rm -rf %{buildroot}
%post
-if [ -x /sbin/installkernel -a -r /boot/vmlinuz-%{KERNELRELEASE} -a -r /boot/System.map-%{KERNELRELEASE} ]; then
-cp /boot/vmlinuz-%{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm
-cp /boot/System.map-%{KERNELRELEASE} /boot/.System.map-%{KERNELRELEASE}-rpm
-rm -f /boot/vmlinuz-%{KERNELRELEASE} /boot/System.map-%{KERNELRELEASE}
-/sbin/installkernel %{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm
-rm -f /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm
+if [ -x /usr/bin/kernel-install ]; then
+ /usr/bin/kernel-install add %{KERNELRELEASE} /lib/modules/%{KERNELRELEASE}/vmlinuz
fi
+for file in vmlinuz System.map config; do
+ if ! cmp --silent "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}"; then
+ cp "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}"
+ fi
+done
%preun
if [ -x /sbin/new-kernel-pkg ]; then
@@ -94,7 +95,6 @@ fi
%defattr (-, root, root)
/lib/modules/%{KERNELRELEASE}
%exclude /lib/modules/%{KERNELRELEASE}/build
-/boot/*
%files headers
%defattr (-, root, root)
diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index 25aefbb35377..4c781617ffe6 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -280,8 +280,6 @@ sub get_sphinx_version($)
sub check_sphinx()
{
- my $default_version;
-
open IN, $conf or die "Can't open $conf";
while (<IN>) {
if (m/^\s*needs_sphinx\s*=\s*[\'\"]([\d\.]+)[\'\"]/) {
@@ -293,18 +291,7 @@ sub check_sphinx()
die "Can't get needs_sphinx version from $conf" if (!$min_version);
- open IN, $requirement_file or die "Can't open $requirement_file";
- while (<IN>) {
- if (m/^\s*Sphinx\s*==\s*([\d\.]+)$/) {
- $default_version=$1;
- last;
- }
- }
- close IN;
-
- die "Can't get default sphinx version from $requirement_file" if (!$default_version);
-
- $virtenv_dir = $virtenv_prefix . $default_version;
+ $virtenv_dir = $virtenv_prefix . "latest";
my $sphinx = get_sphinx_fname();
if ($sphinx eq "") {
@@ -318,8 +305,8 @@ sub check_sphinx()
die "$sphinx didn't return its version" if (!$cur_version);
if ($cur_version lt $min_version) {
- printf "ERROR: Sphinx version is %s. It should be >= %s (recommended >= %s)\n",
- $cur_version, $min_version, $default_version;
+ printf "ERROR: Sphinx version is %s. It should be >= %s\n",
+ $cur_version, $min_version;
$need_sphinx = 1;
return;
}
@@ -361,6 +348,7 @@ sub give_debian_hints()
{
my %map = (
"python-sphinx" => "python3-sphinx",
+ "yaml" => "python3-yaml",
"ensurepip" => "python3-venv",
"virtualenv" => "virtualenv",
"dot" => "graphviz",
@@ -395,6 +383,7 @@ sub give_redhat_hints()
{
my %map = (
"python-sphinx" => "python3-sphinx",
+ "yaml" => "python3-pyyaml",
"virtualenv" => "python3-virtualenv",
"dot" => "graphviz",
"convert" => "ImageMagick",
@@ -421,6 +410,7 @@ sub give_redhat_hints()
#
my $old = 0;
my $rel;
+ my $noto_sans_redhat = "google-noto-sans-cjk-ttc-fonts";
$rel = $1 if ($system_release =~ /release\s+(\d+)/);
if (!($system_release =~ /Fedora/)) {
@@ -438,6 +428,9 @@ sub give_redhat_hints()
if ($rel && $rel < 26) {
$old = 1;
}
+ if ($rel && $rel >= 38) {
+ $noto_sans_redhat = "google-noto-sans-cjk-fonts";
+ }
}
if (!$rel) {
printf("Couldn't identify release number\n");
@@ -446,8 +439,9 @@ sub give_redhat_hints()
}
if ($pdf) {
- check_missing_file(["/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc"],
- "google-noto-sans-cjk-ttc-fonts", 2);
+ check_missing_file(["/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+ "/usr/share/fonts/google-noto-sans-cjk-fonts/NotoSansCJK-Regular.ttc"],
+ $noto_sans_redhat, 2);
}
check_rpm_missing(\@fedora26_opt_pkgs, 2) if ($pdf && !$old);
@@ -472,6 +466,7 @@ sub give_opensuse_hints()
{
my %map = (
"python-sphinx" => "python3-sphinx",
+ "yaml" => "python3-pyyaml",
"virtualenv" => "python3-virtualenv",
"dot" => "graphviz",
"convert" => "ImageMagick",
@@ -951,6 +946,7 @@ sub check_needs()
# Check for needed programs/tools
check_perl_module("Pod::Usage", 0);
+ check_python_module("yaml", 0);
check_program("make", 0);
check_program("gcc", 0);
check_program("dot", 1);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 7717354ce095..9a3dcaafb5b1 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -469,8 +469,10 @@ static int apparmor_file_open(struct file *file)
* Cache permissions granted by the previous exec check, with
* implicit read and executable mmap which are required to
* actually execute the image.
+ *
+ * Illogically, FMODE_EXEC is in f_flags, not f_mode.
*/
- if (current->in_execve) {
+ if (file->f_flags & __FMODE_EXEC) {
fctx->allow = MAY_EXEC | MAY_READ | AA_EXEC_MMAP;
return 0;
}
@@ -782,7 +784,7 @@ static int apparmor_getselfattr(unsigned int attr, struct lsm_ctx __user *lx,
int error = -ENOENT;
struct aa_task_ctx *ctx = task_ctx(current);
struct aa_label *label = NULL;
- char *value;
+ char *value = NULL;
switch (attr) {
case LSM_ATTR_CURRENT:
diff --git a/security/integrity/Makefile b/security/integrity/Makefile
index d0ffe37dc1d6..92b63039c654 100644
--- a/security/integrity/Makefile
+++ b/security/integrity/Makefile
@@ -18,5 +18,6 @@ integrity-$(CONFIG_LOAD_IPL_KEYS) += platform_certs/load_ipl_s390.o
integrity-$(CONFIG_LOAD_PPC_KEYS) += platform_certs/efi_parser.o \
platform_certs/load_powerpc.o \
platform_certs/keyring_handler.o
+# The relative order of the 'ima' and 'evm' LSMs depends on the order below.
obj-$(CONFIG_IMA) += ima/
obj-$(CONFIG_EVM) += evm/
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index df387de29bfa..45c3e5dda355 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -179,7 +179,8 @@ static int __init integrity_add_key(const unsigned int id, const void *data,
KEY_ALLOC_NOT_IN_QUOTA);
if (IS_ERR(key)) {
rc = PTR_ERR(key);
- pr_err("Problem loading X.509 certificate %d\n", rc);
+ if (id != INTEGRITY_KEYRING_MACHINE)
+ pr_err("Problem loading X.509 certificate %d\n", rc);
} else {
pr_notice("Loaded X.509 cert '%s'\n",
key_ref_to_ptr(key)->description);
diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
index 895f4b9ce8c6..de603cf42ac7 100644
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -132,26 +132,3 @@ out:
pr_debug("%s() = %d\n", __func__, ret);
return ret;
}
-
-/**
- * integrity_kernel_module_request - prevent crypto-pkcs1pad(rsa,*) requests
- * @kmod_name: kernel module name
- *
- * We have situation, when public_key_verify_signature() in case of RSA
- * algorithm use alg_name to store internal information in order to
- * construct an algorithm on the fly, but crypto_larval_lookup() will try
- * to use alg_name in order to load kernel module with same name.
- * Since we don't have any real "crypto-pkcs1pad(rsa,*)" kernel modules,
- * we are safe to fail such module request from crypto_larval_lookup().
- *
- * In this way we prevent modprobe execution during digsig verification
- * and avoid possible deadlock if modprobe and/or it's dependencies
- * also signed with digsig.
- */
-int integrity_kernel_module_request(char *kmod_name)
-{
- if (strncmp(kmod_name, "crypto-pkcs1pad(rsa,", 20) == 0)
- return -EINVAL;
-
- return 0;
-}
diff --git a/security/integrity/evm/Kconfig b/security/integrity/evm/Kconfig
index fba9ee359bc9..861b3bacab82 100644
--- a/security/integrity/evm/Kconfig
+++ b/security/integrity/evm/Kconfig
@@ -6,6 +6,7 @@ config EVM
select CRYPTO_HMAC
select CRYPTO_SHA1
select CRYPTO_HASH_INFO
+ select SECURITY_PATH
default n
help
EVM protects a file's security extended attributes against
diff --git a/security/integrity/evm/evm.h b/security/integrity/evm/evm.h
index 53bd7fec93fa..eb1a2c343bd7 100644
--- a/security/integrity/evm/evm.h
+++ b/security/integrity/evm/evm.h
@@ -32,6 +32,25 @@ struct xattr_list {
bool enabled;
};
+#define EVM_NEW_FILE 0x00000001
+#define EVM_IMMUTABLE_DIGSIG 0x00000002
+
+/* EVM integrity metadata associated with an inode */
+struct evm_iint_cache {
+ unsigned long flags;
+ enum integrity_status evm_status:4;
+};
+
+extern struct lsm_blob_sizes evm_blob_sizes;
+
+static inline struct evm_iint_cache *evm_iint_inode(const struct inode *inode)
+{
+ if (unlikely(!inode->i_security))
+ return NULL;
+
+ return inode->i_security + evm_blob_sizes.lbs_inode;
+}
+
extern int evm_initialized;
#define EVM_ATTR_FSUUID 0x0001
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index b1ffd4cc0b44..7552d49d0725 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -322,10 +322,10 @@ int evm_calc_hash(struct dentry *dentry, const char *req_xattr_name,
static int evm_is_immutable(struct dentry *dentry, struct inode *inode)
{
const struct evm_ima_xattr_data *xattr_data = NULL;
- struct integrity_iint_cache *iint;
+ struct evm_iint_cache *iint;
int rc = 0;
- iint = integrity_iint_find(inode);
+ iint = evm_iint_inode(inode);
if (iint && (iint->flags & EVM_IMMUTABLE_DIGSIG))
return 1;
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index cc7956d7878b..81dbade5b9b3 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -178,14 +178,14 @@ static int is_unsupported_fs(struct dentry *dentry)
static enum integrity_status evm_verify_hmac(struct dentry *dentry,
const char *xattr_name,
char *xattr_value,
- size_t xattr_value_len,
- struct integrity_iint_cache *iint)
+ size_t xattr_value_len)
{
struct evm_ima_xattr_data *xattr_data = NULL;
struct signature_v2_hdr *hdr;
enum integrity_status evm_status = INTEGRITY_PASS;
struct evm_digest digest;
- struct inode *inode;
+ struct inode *inode = d_backing_inode(dentry);
+ struct evm_iint_cache *iint = evm_iint_inode(inode);
int rc, xattr_len, evm_immutable = 0;
if (iint && (iint->evm_status == INTEGRITY_PASS ||
@@ -254,8 +254,6 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
(const char *)xattr_data, xattr_len,
digest.digest, digest.hdr.length);
if (!rc) {
- inode = d_backing_inode(dentry);
-
if (xattr_data->type == EVM_XATTR_PORTABLE_DIGSIG) {
if (iint)
iint->flags |= EVM_IMMUTABLE_DIGSIG;
@@ -403,7 +401,6 @@ int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer,
* @xattr_name: requested xattr
* @xattr_value: requested xattr value
* @xattr_value_len: requested xattr value length
- * @iint: inode integrity metadata
*
* Calculate the HMAC for the given dentry and verify it against the stored
* security.evm xattr. For performance, use the xattr value and length
@@ -416,8 +413,7 @@ int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer,
*/
enum integrity_status evm_verifyxattr(struct dentry *dentry,
const char *xattr_name,
- void *xattr_value, size_t xattr_value_len,
- struct integrity_iint_cache *iint)
+ void *xattr_value, size_t xattr_value_len)
{
if (!evm_key_loaded() || !evm_protected_xattr(xattr_name))
return INTEGRITY_UNKNOWN;
@@ -425,13 +421,8 @@ enum integrity_status evm_verifyxattr(struct dentry *dentry,
if (is_unsupported_fs(dentry))
return INTEGRITY_UNKNOWN;
- if (!iint) {
- iint = integrity_iint_find(d_backing_inode(dentry));
- if (!iint)
- return INTEGRITY_UNKNOWN;
- }
return evm_verify_hmac(dentry, xattr_name, xattr_value,
- xattr_value_len, iint);
+ xattr_value_len);
}
EXPORT_SYMBOL_GPL(evm_verifyxattr);
@@ -448,7 +439,7 @@ static enum integrity_status evm_verify_current_integrity(struct dentry *dentry)
if (!evm_key_loaded() || !S_ISREG(inode->i_mode) || evm_fixmode)
return INTEGRITY_PASS;
- return evm_verify_hmac(dentry, NULL, NULL, 0, NULL);
+ return evm_verify_hmac(dentry, NULL, NULL, 0);
}
/*
@@ -526,14 +517,14 @@ static int evm_protect_xattr(struct mnt_idmap *idmap,
evm_status = evm_verify_current_integrity(dentry);
if (evm_status == INTEGRITY_NOXATTRS) {
- struct integrity_iint_cache *iint;
+ struct evm_iint_cache *iint;
/* Exception if the HMAC is not going to be calculated. */
if (evm_hmac_disabled())
return 0;
- iint = integrity_iint_find(d_backing_inode(dentry));
- if (iint && (iint->flags & IMA_NEW_FILE))
+ iint = evm_iint_inode(d_backing_inode(dentry));
+ if (iint && (iint->flags & EVM_NEW_FILE))
return 0;
/* exception for pseudo filesystems */
@@ -581,6 +572,7 @@ out:
* @xattr_name: pointer to the affected extended attribute name
* @xattr_value: pointer to the new extended attribute value
* @xattr_value_len: pointer to the new extended attribute value length
+ * @flags: flags to pass into filesystem operations
*
* Before allowing the 'security.evm' protected xattr to be updated,
* verify the existing value is valid. As only the kernel should have
@@ -588,9 +580,9 @@ out:
* userspace from writing HMAC value. Writing 'security.evm' requires
* requires CAP_SYS_ADMIN privileges.
*/
-int evm_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
- const char *xattr_name, const void *xattr_value,
- size_t xattr_value_len)
+static int evm_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ const char *xattr_name, const void *xattr_value,
+ size_t xattr_value_len, int flags)
{
const struct evm_ima_xattr_data *xattr_data = xattr_value;
@@ -620,8 +612,8 @@ int evm_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
* Removing 'security.evm' requires CAP_SYS_ADMIN privileges and that
* the current value is valid.
*/
-int evm_inode_removexattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *xattr_name)
+static int evm_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ const char *xattr_name)
{
/* Policy permits modification of the protected xattrs even though
* there's no HMAC key loaded
@@ -671,9 +663,11 @@ static inline int evm_inode_set_acl_change(struct mnt_idmap *idmap,
* Prevent modifying posix acls causing the EVM HMAC to be re-calculated
* and 'security.evm' xattr updated, unless the existing 'security.evm' is
* valid.
+ *
+ * Return: zero on success, -EPERM on failure.
*/
-int evm_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
- const char *acl_name, struct posix_acl *kacl)
+static int evm_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+ const char *acl_name, struct posix_acl *kacl)
{
enum integrity_status evm_status;
@@ -712,11 +706,29 @@ int evm_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
return -EPERM;
}
+/**
+ * evm_inode_remove_acl - Protect the EVM extended attribute from posix acls
+ * @idmap: idmap of the mount
+ * @dentry: pointer to the affected dentry
+ * @acl_name: name of the posix acl
+ *
+ * Prevent removing posix acls causing the EVM HMAC to be re-calculated
+ * and 'security.evm' xattr updated, unless the existing 'security.evm' is
+ * valid.
+ *
+ * Return: zero on success, -EPERM on failure.
+ */
+static int evm_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+ const char *acl_name)
+{
+ return evm_inode_set_acl(idmap, dentry, acl_name, NULL);
+}
+
static void evm_reset_status(struct inode *inode)
{
- struct integrity_iint_cache *iint;
+ struct evm_iint_cache *iint;
- iint = integrity_iint_find(inode);
+ iint = evm_iint_inode(inode);
if (iint)
iint->evm_status = INTEGRITY_UNKNOWN;
}
@@ -752,6 +764,7 @@ bool evm_revalidate_status(const char *xattr_name)
* @xattr_name: pointer to the affected extended attribute name
* @xattr_value: pointer to the new extended attribute value
* @xattr_value_len: pointer to the new extended attribute value length
+ * @flags: flags to pass into filesystem operations
*
* Update the HMAC stored in 'security.evm' to reflect the change.
*
@@ -759,8 +772,11 @@ bool evm_revalidate_status(const char *xattr_name)
* __vfs_setxattr_noperm(). The caller of which has taken the inode's
* i_mutex lock.
*/
-void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name,
- const void *xattr_value, size_t xattr_value_len)
+static void evm_inode_post_setxattr(struct dentry *dentry,
+ const char *xattr_name,
+ const void *xattr_value,
+ size_t xattr_value_len,
+ int flags)
{
if (!evm_revalidate_status(xattr_name))
return;
@@ -780,6 +796,21 @@ void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name,
}
/**
+ * evm_inode_post_set_acl - Update the EVM extended attribute from posix acls
+ * @dentry: pointer to the affected dentry
+ * @acl_name: name of the posix acl
+ * @kacl: pointer to the posix acls
+ *
+ * Update the 'security.evm' xattr with the EVM HMAC re-calculated after setting
+ * posix acls.
+ */
+static void evm_inode_post_set_acl(struct dentry *dentry, const char *acl_name,
+ struct posix_acl *kacl)
+{
+ return evm_inode_post_setxattr(dentry, acl_name, NULL, 0, 0);
+}
+
+/**
* evm_inode_post_removexattr - update 'security.evm' after removing the xattr
* @dentry: pointer to the affected dentry
* @xattr_name: pointer to the affected extended attribute name
@@ -789,7 +820,8 @@ void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name,
* No need to take the i_mutex lock here, as this function is called from
* vfs_removexattr() which takes the i_mutex.
*/
-void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name)
+static void evm_inode_post_removexattr(struct dentry *dentry,
+ const char *xattr_name)
{
if (!evm_revalidate_status(xattr_name))
return;
@@ -805,6 +837,22 @@ void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name)
evm_update_evmxattr(dentry, xattr_name, NULL, 0);
}
+/**
+ * evm_inode_post_remove_acl - Update the EVM extended attribute from posix acls
+ * @idmap: idmap of the mount
+ * @dentry: pointer to the affected dentry
+ * @acl_name: name of the posix acl
+ *
+ * Update the 'security.evm' xattr with the EVM HMAC re-calculated after
+ * removing posix acls.
+ */
+static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry,
+ const char *acl_name)
+{
+ evm_inode_post_removexattr(dentry, acl_name);
+}
+
static int evm_attr_change(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
@@ -828,8 +876,8 @@ static int evm_attr_change(struct mnt_idmap *idmap,
* Permit update of file attributes when files have a valid EVM signature,
* except in the case of them having an immutable portable signature.
*/
-int evm_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
- struct iattr *attr)
+static int evm_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
enum integrity_status evm_status;
@@ -870,6 +918,7 @@ int evm_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
/**
* evm_inode_post_setattr - update 'security.evm' after modifying metadata
+ * @idmap: idmap of the idmapped mount
* @dentry: pointer to the affected dentry
* @ia_valid: for the UID and GID status
*
@@ -879,7 +928,8 @@ int evm_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
* This function is called from notify_change(), which expects the caller
* to lock the inode's i_mutex.
*/
-void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
+static void evm_inode_post_setattr(struct mnt_idmap *idmap,
+ struct dentry *dentry, int ia_valid)
{
if (!evm_revalidate_status(NULL))
return;
@@ -896,7 +946,7 @@ void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
evm_update_evmxattr(dentry, NULL, NULL, 0);
}
-int evm_inode_copy_up_xattr(const char *name)
+static int evm_inode_copy_up_xattr(const char *name)
{
if (strcmp(name, XATTR_NAME_EVM) == 0)
return 1; /* Discard */
@@ -960,6 +1010,42 @@ out:
}
EXPORT_SYMBOL_GPL(evm_inode_init_security);
+static int evm_inode_alloc_security(struct inode *inode)
+{
+ struct evm_iint_cache *iint = evm_iint_inode(inode);
+
+ /* Called by security_inode_alloc(), it cannot be NULL. */
+ iint->flags = 0UL;
+ iint->evm_status = INTEGRITY_UNKNOWN;
+
+ return 0;
+}
+
+static void evm_file_release(struct file *file)
+{
+ struct inode *inode = file_inode(file);
+ struct evm_iint_cache *iint = evm_iint_inode(inode);
+ fmode_t mode = file->f_mode;
+
+ if (!S_ISREG(inode->i_mode) || !(mode & FMODE_WRITE))
+ return;
+
+ if (iint && atomic_read(&inode->i_writecount) == 1)
+ iint->flags &= ~EVM_NEW_FILE;
+}
+
+static void evm_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
+{
+ struct inode *inode = d_backing_inode(dentry);
+ struct evm_iint_cache *iint = evm_iint_inode(inode);
+
+ if (!S_ISREG(inode->i_mode))
+ return;
+
+ if (iint)
+ iint->flags |= EVM_NEW_FILE;
+}
+
#ifdef CONFIG_EVM_LOAD_X509
void __init evm_load_x509(void)
{
@@ -999,4 +1085,45 @@ error:
return error;
}
+static struct security_hook_list evm_hooks[] __ro_after_init = {
+ LSM_HOOK_INIT(inode_setattr, evm_inode_setattr),
+ LSM_HOOK_INIT(inode_post_setattr, evm_inode_post_setattr),
+ LSM_HOOK_INIT(inode_copy_up_xattr, evm_inode_copy_up_xattr),
+ LSM_HOOK_INIT(inode_setxattr, evm_inode_setxattr),
+ LSM_HOOK_INIT(inode_post_setxattr, evm_inode_post_setxattr),
+ LSM_HOOK_INIT(inode_set_acl, evm_inode_set_acl),
+ LSM_HOOK_INIT(inode_post_set_acl, evm_inode_post_set_acl),
+ LSM_HOOK_INIT(inode_remove_acl, evm_inode_remove_acl),
+ LSM_HOOK_INIT(inode_post_remove_acl, evm_inode_post_remove_acl),
+ LSM_HOOK_INIT(inode_removexattr, evm_inode_removexattr),
+ LSM_HOOK_INIT(inode_post_removexattr, evm_inode_post_removexattr),
+ LSM_HOOK_INIT(inode_init_security, evm_inode_init_security),
+ LSM_HOOK_INIT(inode_alloc_security, evm_inode_alloc_security),
+ LSM_HOOK_INIT(file_release, evm_file_release),
+ LSM_HOOK_INIT(path_post_mknod, evm_post_path_mknod),
+};
+
+static const struct lsm_id evm_lsmid = {
+ .name = "evm",
+ .id = LSM_ID_EVM,
+};
+
+static int __init init_evm_lsm(void)
+{
+ security_add_hooks(evm_hooks, ARRAY_SIZE(evm_hooks), &evm_lsmid);
+ return 0;
+}
+
+struct lsm_blob_sizes evm_blob_sizes __ro_after_init = {
+ .lbs_inode = sizeof(struct evm_iint_cache),
+ .lbs_xattr_count = 1,
+};
+
+DEFINE_LSM(evm) = {
+ .name = "evm",
+ .init = init_evm_lsm,
+ .order = LSM_ORDER_LAST,
+ .blobs = &evm_blob_sizes,
+};
+
late_initcall(init_evm);
diff --git a/security/integrity/iint.c b/security/integrity/iint.c
index d4419a2a1e24..068ac6c2ae1e 100644
--- a/security/integrity/iint.c
+++ b/security/integrity/iint.c
@@ -6,208 +6,15 @@
* Mimi Zohar <zohar@us.ibm.com>
*
* File: integrity_iint.c
- * - implements the integrity hooks: integrity_inode_alloc,
- * integrity_inode_free
- * - cache integrity information associated with an inode
- * using a rbtree tree.
+ * - initialize the integrity directory in securityfs
+ * - load IMA and EVM keys
*/
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/rbtree.h>
-#include <linux/file.h>
-#include <linux/uaccess.h>
#include <linux/security.h>
-#include <linux/lsm_hooks.h>
#include "integrity.h"
-static struct rb_root integrity_iint_tree = RB_ROOT;
-static DEFINE_RWLOCK(integrity_iint_lock);
-static struct kmem_cache *iint_cache __ro_after_init;
-
struct dentry *integrity_dir;
/*
- * __integrity_iint_find - return the iint associated with an inode
- */
-static struct integrity_iint_cache *__integrity_iint_find(struct inode *inode)
-{
- struct integrity_iint_cache *iint;
- struct rb_node *n = integrity_iint_tree.rb_node;
-
- while (n) {
- iint = rb_entry(n, struct integrity_iint_cache, rb_node);
-
- if (inode < iint->inode)
- n = n->rb_left;
- else if (inode > iint->inode)
- n = n->rb_right;
- else
- return iint;
- }
-
- return NULL;
-}
-
-/*
- * integrity_iint_find - return the iint associated with an inode
- */
-struct integrity_iint_cache *integrity_iint_find(struct inode *inode)
-{
- struct integrity_iint_cache *iint;
-
- if (!IS_IMA(inode))
- return NULL;
-
- read_lock(&integrity_iint_lock);
- iint = __integrity_iint_find(inode);
- read_unlock(&integrity_iint_lock);
-
- return iint;
-}
-
-#define IMA_MAX_NESTING (FILESYSTEM_MAX_STACK_DEPTH+1)
-
-/*
- * It is not clear that IMA should be nested at all, but as long is it measures
- * files both on overlayfs and on underlying fs, we need to annotate the iint
- * mutex to avoid lockdep false positives related to IMA + overlayfs.
- * See ovl_lockdep_annotate_inode_mutex_key() for more details.
- */
-static inline void iint_lockdep_annotate(struct integrity_iint_cache *iint,
- struct inode *inode)
-{
-#ifdef CONFIG_LOCKDEP
- static struct lock_class_key iint_mutex_key[IMA_MAX_NESTING];
-
- int depth = inode->i_sb->s_stack_depth;
-
- if (WARN_ON_ONCE(depth < 0 || depth >= IMA_MAX_NESTING))
- depth = 0;
-
- lockdep_set_class(&iint->mutex, &iint_mutex_key[depth]);
-#endif
-}
-
-static void iint_init_always(struct integrity_iint_cache *iint,
- struct inode *inode)
-{
- iint->ima_hash = NULL;
- iint->version = 0;
- iint->flags = 0UL;
- iint->atomic_flags = 0UL;
- iint->ima_file_status = INTEGRITY_UNKNOWN;
- iint->ima_mmap_status = INTEGRITY_UNKNOWN;
- iint->ima_bprm_status = INTEGRITY_UNKNOWN;
- iint->ima_read_status = INTEGRITY_UNKNOWN;
- iint->ima_creds_status = INTEGRITY_UNKNOWN;
- iint->evm_status = INTEGRITY_UNKNOWN;
- iint->measured_pcrs = 0;
- mutex_init(&iint->mutex);
- iint_lockdep_annotate(iint, inode);
-}
-
-static void iint_free(struct integrity_iint_cache *iint)
-{
- kfree(iint->ima_hash);
- mutex_destroy(&iint->mutex);
- kmem_cache_free(iint_cache, iint);
-}
-
-/**
- * integrity_inode_get - find or allocate an iint associated with an inode
- * @inode: pointer to the inode
- * @return: allocated iint
- *
- * Caller must lock i_mutex
- */
-struct integrity_iint_cache *integrity_inode_get(struct inode *inode)
-{
- struct rb_node **p;
- struct rb_node *node, *parent = NULL;
- struct integrity_iint_cache *iint, *test_iint;
-
- iint = integrity_iint_find(inode);
- if (iint)
- return iint;
-
- iint = kmem_cache_alloc(iint_cache, GFP_NOFS);
- if (!iint)
- return NULL;
-
- iint_init_always(iint, inode);
-
- write_lock(&integrity_iint_lock);
-
- p = &integrity_iint_tree.rb_node;
- while (*p) {
- parent = *p;
- test_iint = rb_entry(parent, struct integrity_iint_cache,
- rb_node);
- if (inode < test_iint->inode) {
- p = &(*p)->rb_left;
- } else if (inode > test_iint->inode) {
- p = &(*p)->rb_right;
- } else {
- write_unlock(&integrity_iint_lock);
- kmem_cache_free(iint_cache, iint);
- return test_iint;
- }
- }
-
- iint->inode = inode;
- node = &iint->rb_node;
- inode->i_flags |= S_IMA;
- rb_link_node(node, parent, p);
- rb_insert_color(node, &integrity_iint_tree);
-
- write_unlock(&integrity_iint_lock);
- return iint;
-}
-
-/**
- * integrity_inode_free - called on security_inode_free
- * @inode: pointer to the inode
- *
- * Free the integrity information(iint) associated with an inode.
- */
-void integrity_inode_free(struct inode *inode)
-{
- struct integrity_iint_cache *iint;
-
- if (!IS_IMA(inode))
- return;
-
- write_lock(&integrity_iint_lock);
- iint = __integrity_iint_find(inode);
- rb_erase(&iint->rb_node, &integrity_iint_tree);
- write_unlock(&integrity_iint_lock);
-
- iint_free(iint);
-}
-
-static void iint_init_once(void *foo)
-{
- struct integrity_iint_cache *iint = (struct integrity_iint_cache *) foo;
-
- memset(iint, 0, sizeof(*iint));
-}
-
-static int __init integrity_iintcache_init(void)
-{
- iint_cache =
- kmem_cache_create("iint_cache", sizeof(struct integrity_iint_cache),
- 0, SLAB_PANIC, iint_init_once);
- return 0;
-}
-DEFINE_LSM(integrity) = {
- .name = "integrity",
- .init = integrity_iintcache_init,
- .order = LSM_ORDER_LAST,
-};
-
-
-/*
* integrity_kernel_read - read data from the file
*
* This is a function for reading file content instead of kernel_read().
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index b98bfe9efd0c..475c32615006 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -8,6 +8,7 @@ config IMA
select CRYPTO_HMAC
select CRYPTO_SHA1
select CRYPTO_HASH_INFO
+ select SECURITY_PATH
select TCG_TPM if HAS_IOMEM
select TCG_TIS if TCG_TPM && X86
select TCG_CRB if TCG_TPM && ACPI
diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile
index 2499f2485c04..b376d38b4ee6 100644
--- a/security/integrity/ima/Makefile
+++ b/security/integrity/ima/Makefile
@@ -4,7 +4,7 @@
# Measurement Architecture(IMA).
#
-obj-$(CONFIG_IMA) += ima.o
+obj-$(CONFIG_IMA) += ima.o ima_iint.o
ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \
ima_policy.o ima_template.o ima_template_lib.o
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index c29db699c996..11d7c0332207 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -60,7 +60,7 @@ extern const char boot_aggregate_name[];
/* IMA event related data */
struct ima_event_data {
- struct integrity_iint_cache *iint;
+ struct ima_iint_cache *iint;
struct file *file;
const unsigned char *filename;
struct evm_ima_xattr_data *xattr_value;
@@ -119,6 +119,107 @@ struct ima_kexec_hdr {
u64 count;
};
+/* IMA iint action cache flags */
+#define IMA_MEASURE 0x00000001
+#define IMA_MEASURED 0x00000002
+#define IMA_APPRAISE 0x00000004
+#define IMA_APPRAISED 0x00000008
+/*#define IMA_COLLECT 0x00000010 do not use this flag */
+#define IMA_COLLECTED 0x00000020
+#define IMA_AUDIT 0x00000040
+#define IMA_AUDITED 0x00000080
+#define IMA_HASH 0x00000100
+#define IMA_HASHED 0x00000200
+
+/* IMA iint policy rule cache flags */
+#define IMA_NONACTION_FLAGS 0xff000000
+#define IMA_DIGSIG_REQUIRED 0x01000000
+#define IMA_PERMIT_DIRECTIO 0x02000000
+#define IMA_NEW_FILE 0x04000000
+#define IMA_FAIL_UNVERIFIABLE_SIGS 0x10000000
+#define IMA_MODSIG_ALLOWED 0x20000000
+#define IMA_CHECK_BLACKLIST 0x40000000
+#define IMA_VERITY_REQUIRED 0x80000000
+
+#define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \
+ IMA_HASH | IMA_APPRAISE_SUBMASK)
+#define IMA_DONE_MASK (IMA_MEASURED | IMA_APPRAISED | IMA_AUDITED | \
+ IMA_HASHED | IMA_COLLECTED | \
+ IMA_APPRAISED_SUBMASK)
+
+/* IMA iint subaction appraise cache flags */
+#define IMA_FILE_APPRAISE 0x00001000
+#define IMA_FILE_APPRAISED 0x00002000
+#define IMA_MMAP_APPRAISE 0x00004000
+#define IMA_MMAP_APPRAISED 0x00008000
+#define IMA_BPRM_APPRAISE 0x00010000
+#define IMA_BPRM_APPRAISED 0x00020000
+#define IMA_READ_APPRAISE 0x00040000
+#define IMA_READ_APPRAISED 0x00080000
+#define IMA_CREDS_APPRAISE 0x00100000
+#define IMA_CREDS_APPRAISED 0x00200000
+#define IMA_APPRAISE_SUBMASK (IMA_FILE_APPRAISE | IMA_MMAP_APPRAISE | \
+ IMA_BPRM_APPRAISE | IMA_READ_APPRAISE | \
+ IMA_CREDS_APPRAISE)
+#define IMA_APPRAISED_SUBMASK (IMA_FILE_APPRAISED | IMA_MMAP_APPRAISED | \
+ IMA_BPRM_APPRAISED | IMA_READ_APPRAISED | \
+ IMA_CREDS_APPRAISED)
+
+/* IMA iint cache atomic_flags */
+#define IMA_CHANGE_XATTR 0
+#define IMA_UPDATE_XATTR 1
+#define IMA_CHANGE_ATTR 2
+#define IMA_DIGSIG 3
+#define IMA_MUST_MEASURE 4
+
+/* IMA integrity metadata associated with an inode */
+struct ima_iint_cache {
+ struct mutex mutex; /* protects: version, flags, digest */
+ u64 version; /* track inode changes */
+ unsigned long flags;
+ unsigned long measured_pcrs;
+ unsigned long atomic_flags;
+ unsigned long real_ino;
+ dev_t real_dev;
+ enum integrity_status ima_file_status:4;
+ enum integrity_status ima_mmap_status:4;
+ enum integrity_status ima_bprm_status:4;
+ enum integrity_status ima_read_status:4;
+ enum integrity_status ima_creds_status:4;
+ struct ima_digest_data *ima_hash;
+};
+
+extern struct lsm_blob_sizes ima_blob_sizes;
+
+static inline struct ima_iint_cache *
+ima_inode_get_iint(const struct inode *inode)
+{
+ struct ima_iint_cache **iint_sec;
+
+ if (unlikely(!inode->i_security))
+ return NULL;
+
+ iint_sec = inode->i_security + ima_blob_sizes.lbs_inode;
+ return *iint_sec;
+}
+
+static inline void ima_inode_set_iint(const struct inode *inode,
+ struct ima_iint_cache *iint)
+{
+ struct ima_iint_cache **iint_sec;
+
+ if (unlikely(!inode->i_security))
+ return;
+
+ iint_sec = inode->i_security + ima_blob_sizes.lbs_inode;
+ *iint_sec = iint;
+}
+
+struct ima_iint_cache *ima_iint_find(struct inode *inode);
+struct ima_iint_cache *ima_inode_get(struct inode *inode);
+void ima_inode_free(struct inode *inode);
+void __init ima_iintcache_init(void);
+
extern const int read_idmap[];
#ifdef CONFIG_HAVE_IMA_KEXEC
@@ -127,6 +228,12 @@ void ima_load_kexec_buffer(void);
static inline void ima_load_kexec_buffer(void) {}
#endif /* CONFIG_HAVE_IMA_KEXEC */
+#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS
+void ima_post_key_create_or_update(struct key *keyring, struct key *key,
+ const void *payload, size_t plen,
+ unsigned long flags, bool create);
+#endif
+
/*
* The default binary_runtime_measurements list format is defined as the
* platform native format. The canonical format is defined as little-endian.
@@ -146,8 +253,8 @@ int ima_calc_field_array_hash(struct ima_field_data *field_data,
struct ima_template_entry *entry);
int ima_calc_boot_aggregate(struct ima_digest_data *hash);
void ima_add_violation(struct file *file, const unsigned char *filename,
- struct integrity_iint_cache *iint,
- const char *op, const char *cause);
+ struct ima_iint_cache *iint, const char *op,
+ const char *cause);
int ima_init_crypto(void);
void ima_putc(struct seq_file *m, void *data, int datalen);
void ima_print_digest(struct seq_file *m, u8 *digest, u32 size);
@@ -261,10 +368,10 @@ int ima_get_action(struct mnt_idmap *idmap, struct inode *inode,
struct ima_template_desc **template_desc,
const char *func_data, unsigned int *allowed_algos);
int ima_must_measure(struct inode *inode, int mask, enum ima_hooks func);
-int ima_collect_measurement(struct integrity_iint_cache *iint,
- struct file *file, void *buf, loff_t size,
- enum hash_algo algo, struct modsig *modsig);
-void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
+int ima_collect_measurement(struct ima_iint_cache *iint, struct file *file,
+ void *buf, loff_t size, enum hash_algo algo,
+ struct modsig *modsig);
+void ima_store_measurement(struct ima_iint_cache *iint, struct file *file,
const unsigned char *filename,
struct evm_ima_xattr_data *xattr_value,
int xattr_len, const struct modsig *modsig, int pcr,
@@ -274,7 +381,7 @@ int process_buffer_measurement(struct mnt_idmap *idmap,
const char *eventname, enum ima_hooks func,
int pcr, const char *func_data,
bool buf_hash, u8 *digest, size_t digest_len);
-void ima_audit_measurement(struct integrity_iint_cache *iint,
+void ima_audit_measurement(struct ima_iint_cache *iint,
const unsigned char *filename);
int ima_alloc_init_template(struct ima_event_data *event_data,
struct ima_template_entry **entry,
@@ -312,32 +419,32 @@ int ima_policy_show(struct seq_file *m, void *v);
#define IMA_APPRAISE_KEXEC 0x40
#ifdef CONFIG_IMA_APPRAISE
-int ima_check_blacklist(struct integrity_iint_cache *iint,
+int ima_check_blacklist(struct ima_iint_cache *iint,
const struct modsig *modsig, int pcr);
-int ima_appraise_measurement(enum ima_hooks func,
- struct integrity_iint_cache *iint,
+int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint,
struct file *file, const unsigned char *filename,
struct evm_ima_xattr_data *xattr_value,
int xattr_len, const struct modsig *modsig);
int ima_must_appraise(struct mnt_idmap *idmap, struct inode *inode,
int mask, enum ima_hooks func);
-void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
-enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
+void ima_update_xattr(struct ima_iint_cache *iint, struct file *file);
+enum integrity_status ima_get_cache_status(struct ima_iint_cache *iint,
enum ima_hooks func);
enum hash_algo ima_get_hash_algo(const struct evm_ima_xattr_data *xattr_value,
int xattr_len);
int ima_read_xattr(struct dentry *dentry,
struct evm_ima_xattr_data **xattr_value, int xattr_len);
+void __init init_ima_appraise_lsm(const struct lsm_id *lsmid);
#else
-static inline int ima_check_blacklist(struct integrity_iint_cache *iint,
+static inline int ima_check_blacklist(struct ima_iint_cache *iint,
const struct modsig *modsig, int pcr)
{
return 0;
}
static inline int ima_appraise_measurement(enum ima_hooks func,
- struct integrity_iint_cache *iint,
+ struct ima_iint_cache *iint,
struct file *file,
const unsigned char *filename,
struct evm_ima_xattr_data *xattr_value,
@@ -354,14 +461,13 @@ static inline int ima_must_appraise(struct mnt_idmap *idmap,
return 0;
}
-static inline void ima_update_xattr(struct integrity_iint_cache *iint,
+static inline void ima_update_xattr(struct ima_iint_cache *iint,
struct file *file)
{
}
-static inline enum integrity_status ima_get_cache_status(struct integrity_iint_cache
- *iint,
- enum ima_hooks func)
+static inline enum integrity_status
+ima_get_cache_status(struct ima_iint_cache *iint, enum ima_hooks func)
{
return INTEGRITY_UNKNOWN;
}
@@ -379,6 +485,10 @@ static inline int ima_read_xattr(struct dentry *dentry,
return 0;
}
+static inline void __init init_ima_appraise_lsm(const struct lsm_id *lsmid)
+{
+}
+
#endif /* CONFIG_IMA_APPRAISE */
#ifdef CONFIG_IMA_APPRAISE_MODSIG
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 597ea0c4d72f..b37d043d5748 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -131,8 +131,8 @@ int ima_store_template(struct ima_template_entry *entry,
* value is invalidated.
*/
void ima_add_violation(struct file *file, const unsigned char *filename,
- struct integrity_iint_cache *iint,
- const char *op, const char *cause)
+ struct ima_iint_cache *iint, const char *op,
+ const char *cause)
{
struct ima_template_entry *entry;
struct inode *inode = file_inode(file);
@@ -201,7 +201,8 @@ int ima_get_action(struct mnt_idmap *idmap, struct inode *inode,
allowed_algos);
}
-static bool ima_get_verity_digest(struct integrity_iint_cache *iint,
+static bool ima_get_verity_digest(struct ima_iint_cache *iint,
+ struct inode *inode,
struct ima_max_digest_data *hash)
{
enum hash_algo alg;
@@ -211,7 +212,7 @@ static bool ima_get_verity_digest(struct integrity_iint_cache *iint,
* On failure, 'measure' policy rules will result in a file data
* hash containing 0's.
*/
- digest_len = fsverity_get_digest(iint->inode, hash->digest, NULL, &alg);
+ digest_len = fsverity_get_digest(inode, hash->digest, NULL, &alg);
if (digest_len == 0)
return false;
@@ -237,9 +238,9 @@ static bool ima_get_verity_digest(struct integrity_iint_cache *iint,
*
* Return 0 on success, error code otherwise
*/
-int ima_collect_measurement(struct integrity_iint_cache *iint,
- struct file *file, void *buf, loff_t size,
- enum hash_algo algo, struct modsig *modsig)
+int ima_collect_measurement(struct ima_iint_cache *iint, struct file *file,
+ void *buf, loff_t size, enum hash_algo algo,
+ struct modsig *modsig)
{
const char *audit_cause = "failed";
struct inode *inode = file_inode(file);
@@ -280,7 +281,7 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
memset(&hash.digest, 0, sizeof(hash.digest));
if (iint->flags & IMA_VERITY_REQUIRED) {
- if (!ima_get_verity_digest(iint, &hash)) {
+ if (!ima_get_verity_digest(iint, inode, &hash)) {
audit_cause = "no-verity-digest";
result = -ENODATA;
}
@@ -338,8 +339,8 @@ out:
*
* Must be called with iint->mutex held.
*/
-void ima_store_measurement(struct integrity_iint_cache *iint,
- struct file *file, const unsigned char *filename,
+void ima_store_measurement(struct ima_iint_cache *iint, struct file *file,
+ const unsigned char *filename,
struct evm_ima_xattr_data *xattr_value,
int xattr_len, const struct modsig *modsig, int pcr,
struct ima_template_desc *template_desc)
@@ -382,7 +383,7 @@ void ima_store_measurement(struct integrity_iint_cache *iint,
ima_free_template_entry(entry);
}
-void ima_audit_measurement(struct integrity_iint_cache *iint,
+void ima_audit_measurement(struct ima_iint_cache *iint,
const unsigned char *filename)
{
struct audit_buffer *ab;
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 870dde67707b..3497741caea9 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -84,8 +84,7 @@ int ima_must_appraise(struct mnt_idmap *idmap, struct inode *inode,
NULL, NULL, NULL);
}
-static int ima_fix_xattr(struct dentry *dentry,
- struct integrity_iint_cache *iint)
+static int ima_fix_xattr(struct dentry *dentry, struct ima_iint_cache *iint)
{
int rc, offset;
u8 algo = iint->ima_hash->algo;
@@ -106,7 +105,7 @@ static int ima_fix_xattr(struct dentry *dentry,
}
/* Return specific func appraised cached result */
-enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
+enum integrity_status ima_get_cache_status(struct ima_iint_cache *iint,
enum ima_hooks func)
{
switch (func) {
@@ -126,7 +125,7 @@ enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
}
}
-static void ima_set_cache_status(struct integrity_iint_cache *iint,
+static void ima_set_cache_status(struct ima_iint_cache *iint,
enum ima_hooks func,
enum integrity_status status)
{
@@ -152,8 +151,7 @@ static void ima_set_cache_status(struct integrity_iint_cache *iint,
}
}
-static void ima_cache_flags(struct integrity_iint_cache *iint,
- enum ima_hooks func)
+static void ima_cache_flags(struct ima_iint_cache *iint, enum ima_hooks func)
{
switch (func) {
case MMAP_CHECK:
@@ -276,7 +274,7 @@ static int calc_file_id_hash(enum evm_ima_xattr_type type,
*
* Return 0 on success, error code otherwise.
*/
-static int xattr_verify(enum ima_hooks func, struct integrity_iint_cache *iint,
+static int xattr_verify(enum ima_hooks func, struct ima_iint_cache *iint,
struct evm_ima_xattr_data *xattr_value, int xattr_len,
enum integrity_status *status, const char **cause)
{
@@ -443,7 +441,7 @@ static int modsig_verify(enum ima_hooks func, const struct modsig *modsig,
*
* Returns -EPERM if the hash is blacklisted.
*/
-int ima_check_blacklist(struct integrity_iint_cache *iint,
+int ima_check_blacklist(struct ima_iint_cache *iint,
const struct modsig *modsig, int pcr)
{
enum hash_algo hash_algo;
@@ -477,8 +475,7 @@ int ima_check_blacklist(struct integrity_iint_cache *iint,
*
* Return 0 on success, error code otherwise
*/
-int ima_appraise_measurement(enum ima_hooks func,
- struct integrity_iint_cache *iint,
+int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint,
struct file *file, const unsigned char *filename,
struct evm_ima_xattr_data *xattr_value,
int xattr_len, const struct modsig *modsig)
@@ -520,7 +517,7 @@ int ima_appraise_measurement(enum ima_hooks func,
}
status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value,
- rc < 0 ? 0 : rc, iint);
+ rc < 0 ? 0 : rc);
switch (status) {
case INTEGRITY_PASS:
case INTEGRITY_PASS_IMMUTABLE:
@@ -603,7 +600,7 @@ out:
/*
* ima_update_xattr - update 'security.ima' hash value
*/
-void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
+void ima_update_xattr(struct ima_iint_cache *iint, struct file *file)
{
struct dentry *dentry = file_dentry(file);
int rc = 0;
@@ -629,17 +626,18 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
* ima_inode_post_setattr - reflect file metadata changes
* @idmap: idmap of the mount the inode was found from
* @dentry: pointer to the affected dentry
+ * @ia_valid: for the UID and GID status
*
* Changes to a dentry's metadata might result in needing to appraise.
*
* This function is called from notify_change(), which expects the caller
* to lock the inode's i_mutex.
*/
-void ima_inode_post_setattr(struct mnt_idmap *idmap,
- struct dentry *dentry)
+static void ima_inode_post_setattr(struct mnt_idmap *idmap,
+ struct dentry *dentry, int ia_valid)
{
struct inode *inode = d_backing_inode(dentry);
- struct integrity_iint_cache *iint;
+ struct ima_iint_cache *iint;
int action;
if (!(ima_policy_flag & IMA_APPRAISE) || !S_ISREG(inode->i_mode)
@@ -647,7 +645,7 @@ void ima_inode_post_setattr(struct mnt_idmap *idmap,
return;
action = ima_must_appraise(idmap, inode, MAY_ACCESS, POST_SETATTR);
- iint = integrity_iint_find(inode);
+ iint = ima_iint_find(inode);
if (iint) {
set_bit(IMA_CHANGE_ATTR, &iint->atomic_flags);
if (!action)
@@ -673,12 +671,12 @@ static int ima_protect_xattr(struct dentry *dentry, const char *xattr_name,
static void ima_reset_appraise_flags(struct inode *inode, int digsig)
{
- struct integrity_iint_cache *iint;
+ struct ima_iint_cache *iint;
if (!(ima_policy_flag & IMA_APPRAISE) || !S_ISREG(inode->i_mode))
return;
- iint = integrity_iint_find(inode);
+ iint = ima_iint_find(inode);
if (!iint)
return;
iint->measured_pcrs = 0;
@@ -749,8 +747,9 @@ static int validate_hash_algo(struct dentry *dentry,
return -EACCES;
}
-int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
- const void *xattr_value, size_t xattr_value_len)
+static int ima_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ const char *xattr_name, const void *xattr_value,
+ size_t xattr_value_len, int flags)
{
const struct evm_ima_xattr_data *xvalue = xattr_value;
int digsig = 0;
@@ -779,8 +778,8 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
return result;
}
-int ima_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
- const char *acl_name, struct posix_acl *kacl)
+static int ima_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+ const char *acl_name, struct posix_acl *kacl)
{
if (evm_revalidate_status(acl_name))
ima_reset_appraise_flags(d_backing_inode(dentry), 0);
@@ -788,7 +787,8 @@ int ima_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
return 0;
}
-int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name)
+static int ima_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ const char *xattr_name)
{
int result;
@@ -800,3 +800,23 @@ int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name)
}
return result;
}
+
+static int ima_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+ const char *acl_name)
+{
+ return ima_inode_set_acl(idmap, dentry, acl_name, NULL);
+}
+
+static struct security_hook_list ima_appraise_hooks[] __ro_after_init = {
+ LSM_HOOK_INIT(inode_post_setattr, ima_inode_post_setattr),
+ LSM_HOOK_INIT(inode_setxattr, ima_inode_setxattr),
+ LSM_HOOK_INIT(inode_set_acl, ima_inode_set_acl),
+ LSM_HOOK_INIT(inode_removexattr, ima_inode_removexattr),
+ LSM_HOOK_INIT(inode_remove_acl, ima_inode_remove_acl),
+};
+
+void __init init_ima_appraise_lsm(const struct lsm_id *lsmid)
+{
+ security_add_hooks(ima_appraise_hooks, ARRAY_SIZE(ima_appraise_hooks),
+ lsmid);
+}
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
new file mode 100644
index 000000000000..e7c9c216c1c6
--- /dev/null
+++ b/security/integrity/ima/ima_iint.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008 IBM Corporation
+ *
+ * Authors:
+ * Mimi Zohar <zohar@us.ibm.com>
+ *
+ * File: ima_iint.c
+ * - implements the IMA hook: ima_inode_free
+ * - cache integrity information in the inode security blob
+ */
+#include <linux/slab.h>
+
+#include "ima.h"
+
+static struct kmem_cache *ima_iint_cache __ro_after_init;
+
+/**
+ * ima_iint_find - Return the iint associated with an inode
+ * @inode: Pointer to the inode
+ *
+ * Return the IMA integrity information (iint) associated with an inode, if the
+ * inode was processed by IMA.
+ *
+ * Return: Found iint or NULL.
+ */
+struct ima_iint_cache *ima_iint_find(struct inode *inode)
+{
+ if (!IS_IMA(inode))
+ return NULL;
+
+ return ima_inode_get_iint(inode);
+}
+
+#define IMA_MAX_NESTING (FILESYSTEM_MAX_STACK_DEPTH + 1)
+
+/*
+ * It is not clear that IMA should be nested at all, but as long is it measures
+ * files both on overlayfs and on underlying fs, we need to annotate the iint
+ * mutex to avoid lockdep false positives related to IMA + overlayfs.
+ * See ovl_lockdep_annotate_inode_mutex_key() for more details.
+ */
+static inline void ima_iint_lockdep_annotate(struct ima_iint_cache *iint,
+ struct inode *inode)
+{
+#ifdef CONFIG_LOCKDEP
+ static struct lock_class_key ima_iint_mutex_key[IMA_MAX_NESTING];
+
+ int depth = inode->i_sb->s_stack_depth;
+
+ if (WARN_ON_ONCE(depth < 0 || depth >= IMA_MAX_NESTING))
+ depth = 0;
+
+ lockdep_set_class(&iint->mutex, &ima_iint_mutex_key[depth]);
+#endif
+}
+
+static void ima_iint_init_always(struct ima_iint_cache *iint,
+ struct inode *inode)
+{
+ iint->ima_hash = NULL;
+ iint->version = 0;
+ iint->flags = 0UL;
+ iint->atomic_flags = 0UL;
+ iint->ima_file_status = INTEGRITY_UNKNOWN;
+ iint->ima_mmap_status = INTEGRITY_UNKNOWN;
+ iint->ima_bprm_status = INTEGRITY_UNKNOWN;
+ iint->ima_read_status = INTEGRITY_UNKNOWN;
+ iint->ima_creds_status = INTEGRITY_UNKNOWN;
+ iint->measured_pcrs = 0;
+ mutex_init(&iint->mutex);
+ ima_iint_lockdep_annotate(iint, inode);
+}
+
+static void ima_iint_free(struct ima_iint_cache *iint)
+{
+ kfree(iint->ima_hash);
+ mutex_destroy(&iint->mutex);
+ kmem_cache_free(ima_iint_cache, iint);
+}
+
+/**
+ * ima_inode_get - Find or allocate an iint associated with an inode
+ * @inode: Pointer to the inode
+ *
+ * Find an iint associated with an inode, and allocate a new one if not found.
+ * Caller must lock i_mutex.
+ *
+ * Return: An iint on success, NULL on error.
+ */
+struct ima_iint_cache *ima_inode_get(struct inode *inode)
+{
+ struct ima_iint_cache *iint;
+
+ iint = ima_iint_find(inode);
+ if (iint)
+ return iint;
+
+ iint = kmem_cache_alloc(ima_iint_cache, GFP_NOFS);
+ if (!iint)
+ return NULL;
+
+ ima_iint_init_always(iint, inode);
+
+ inode->i_flags |= S_IMA;
+ ima_inode_set_iint(inode, iint);
+
+ return iint;
+}
+
+/**
+ * ima_inode_free - Called on inode free
+ * @inode: Pointer to the inode
+ *
+ * Free the iint associated with an inode.
+ */
+void ima_inode_free(struct inode *inode)
+{
+ struct ima_iint_cache *iint;
+
+ if (!IS_IMA(inode))
+ return;
+
+ iint = ima_iint_find(inode);
+ ima_inode_set_iint(inode, NULL);
+
+ ima_iint_free(iint);
+}
+
+static void ima_iint_init_once(void *foo)
+{
+ struct ima_iint_cache *iint = (struct ima_iint_cache *)foo;
+
+ memset(iint, 0, sizeof(*iint));
+}
+
+void __init ima_iintcache_init(void)
+{
+ ima_iint_cache =
+ kmem_cache_create("ima_iint_cache", sizeof(struct ima_iint_cache),
+ 0, SLAB_PANIC, ima_iint_init_once);
+}
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 63979aefc95f..393f5c7912d5 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -44,7 +44,7 @@ static int __init ima_add_boot_aggregate(void)
static const char op[] = "add_boot_aggregate";
const char *audit_cause = "ENOMEM";
struct ima_template_entry *entry;
- struct integrity_iint_cache tmp_iint, *iint = &tmp_iint;
+ struct ima_iint_cache tmp_iint, *iint = &tmp_iint;
struct ima_event_data event_data = { .iint = iint,
.filename = boot_aggregate_name };
struct ima_max_digest_data hash;
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index cc1217ac2c6f..c84e8c55333d 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -114,7 +114,7 @@ static int mmap_violation_check(enum ima_hooks func, struct file *file,
*
*/
static void ima_rdwr_violation_check(struct file *file,
- struct integrity_iint_cache *iint,
+ struct ima_iint_cache *iint,
int must_measure,
char **pathbuf,
const char **pathname,
@@ -127,7 +127,7 @@ static void ima_rdwr_violation_check(struct file *file,
if (mode & FMODE_WRITE) {
if (atomic_read(&inode->i_readcount) && IS_IMA(inode)) {
if (!iint)
- iint = integrity_iint_find(inode);
+ iint = ima_iint_find(inode);
/* IMA_MEASURE is set from reader side */
if (iint && test_bit(IMA_MUST_MEASURE,
&iint->atomic_flags))
@@ -153,7 +153,7 @@ static void ima_rdwr_violation_check(struct file *file,
"invalid_pcr", "open_writers");
}
-static void ima_check_last_writer(struct integrity_iint_cache *iint,
+static void ima_check_last_writer(struct ima_iint_cache *iint,
struct inode *inode, struct file *file)
{
fmode_t mode = file->f_mode;
@@ -189,15 +189,15 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint,
*
* Flag files that changed, based on i_version
*/
-void ima_file_free(struct file *file)
+static void ima_file_free(struct file *file)
{
struct inode *inode = file_inode(file);
- struct integrity_iint_cache *iint;
+ struct ima_iint_cache *iint;
if (!ima_policy_flag || !S_ISREG(inode->i_mode))
return;
- iint = integrity_iint_find(inode);
+ iint = ima_iint_find(inode);
if (!iint)
return;
@@ -209,7 +209,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
enum ima_hooks func)
{
struct inode *backing_inode, *inode = file_inode(file);
- struct integrity_iint_cache *iint = NULL;
+ struct ima_iint_cache *iint = NULL;
struct ima_template_desc *template_desc = NULL;
char *pathbuf = NULL;
char filename[NAME_MAX];
@@ -248,7 +248,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
inode_lock(inode);
if (action) {
- iint = integrity_inode_get(inode);
+ iint = ima_inode_get(inode);
if (!iint)
rc = -ENOMEM;
}
@@ -427,8 +427,8 @@ out:
* On success return 0. On integrity appraisal error, assuming the file
* is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
*/
-int ima_file_mmap(struct file *file, unsigned long reqprot,
- unsigned long prot, unsigned long flags)
+static int ima_file_mmap(struct file *file, unsigned long reqprot,
+ unsigned long prot, unsigned long flags)
{
u32 secid;
int ret;
@@ -455,7 +455,8 @@ int ima_file_mmap(struct file *file, unsigned long reqprot,
/**
* ima_file_mprotect - based on policy, limit mprotect change
* @vma: vm_area_struct protection is set to
- * @prot: contains the protection that will be applied by the kernel.
+ * @reqprot: protection requested by the application
+ * @prot: protection that will be applied by the kernel
*
* Files can be mmap'ed read/write and later changed to execute to circumvent
* IMA's mmap appraisal policy rules. Due to locking issues (mmap semaphore
@@ -465,7 +466,8 @@ int ima_file_mmap(struct file *file, unsigned long reqprot,
*
* On mprotect change success, return 0. On failure, return -EACESS.
*/
-int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot)
+static int ima_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
+ unsigned long prot)
{
struct ima_template_desc *template = NULL;
struct file *file;
@@ -523,7 +525,7 @@ int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot)
* On success return 0. On integrity appraisal error, assuming the file
* is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
*/
-int ima_bprm_check(struct linux_binprm *bprm)
+static int ima_bprm_check(struct linux_binprm *bprm)
{
int ret;
u32 secid;
@@ -549,7 +551,7 @@ int ima_bprm_check(struct linux_binprm *bprm)
* On success return 0. On integrity appraisal error, assuming the file
* is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
*/
-int ima_file_check(struct file *file, int mask)
+static int ima_file_check(struct file *file, int mask)
{
u32 secid;
@@ -558,16 +560,15 @@ int ima_file_check(struct file *file, int mask)
mask & (MAY_READ | MAY_WRITE | MAY_EXEC |
MAY_APPEND), FILE_CHECK);
}
-EXPORT_SYMBOL_GPL(ima_file_check);
static int __ima_inode_hash(struct inode *inode, struct file *file, char *buf,
size_t buf_size)
{
- struct integrity_iint_cache *iint = NULL, tmp_iint;
+ struct ima_iint_cache *iint = NULL, tmp_iint;
int rc, hash_algo;
if (ima_policy_flag) {
- iint = integrity_iint_find(inode);
+ iint = ima_iint_find(inode);
if (iint)
mutex_lock(&iint->mutex);
}
@@ -577,7 +578,6 @@ static int __ima_inode_hash(struct inode *inode, struct file *file, char *buf,
mutex_unlock(&iint->mutex);
memset(&tmp_iint, 0, sizeof(tmp_iint));
- tmp_iint.inode = inode;
mutex_init(&tmp_iint.mutex);
rc = ima_collect_measurement(&tmp_iint, file, NULL, 0,
@@ -683,10 +683,11 @@ EXPORT_SYMBOL_GPL(ima_inode_hash);
* Skip calling process_measurement(), but indicate which newly, created
* tmpfiles are in policy.
*/
-void ima_post_create_tmpfile(struct mnt_idmap *idmap,
- struct inode *inode)
+static void ima_post_create_tmpfile(struct mnt_idmap *idmap,
+ struct inode *inode)
+
{
- struct integrity_iint_cache *iint;
+ struct ima_iint_cache *iint;
int must_appraise;
if (!ima_policy_flag || !S_ISREG(inode->i_mode))
@@ -698,7 +699,7 @@ void ima_post_create_tmpfile(struct mnt_idmap *idmap,
return;
/* Nothing to do if we can't allocate memory */
- iint = integrity_inode_get(inode);
+ iint = ima_inode_get(inode);
if (!iint)
return;
@@ -715,10 +716,9 @@ void ima_post_create_tmpfile(struct mnt_idmap *idmap,
* Mark files created via the mknodat syscall as new, so that the
* file data can be written later.
*/
-void ima_post_path_mknod(struct mnt_idmap *idmap,
- struct dentry *dentry)
+static void ima_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
{
- struct integrity_iint_cache *iint;
+ struct ima_iint_cache *iint;
struct inode *inode = dentry->d_inode;
int must_appraise;
@@ -731,7 +731,7 @@ void ima_post_path_mknod(struct mnt_idmap *idmap,
return;
/* Nothing to do if we can't allocate memory */
- iint = integrity_inode_get(inode);
+ iint = ima_inode_get(inode);
if (!iint)
return;
@@ -751,8 +751,8 @@ void ima_post_path_mknod(struct mnt_idmap *idmap,
*
* For permission return 0, otherwise return -EACCES.
*/
-int ima_read_file(struct file *file, enum kernel_read_file_id read_id,
- bool contents)
+static int ima_read_file(struct file *file, enum kernel_read_file_id read_id,
+ bool contents)
{
enum ima_hooks func;
u32 secid;
@@ -801,8 +801,8 @@ const int read_idmap[READING_MAX_ID] = {
* On success return 0. On integrity appraisal error, assuming the file
* is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
*/
-int ima_post_read_file(struct file *file, void *buf, loff_t size,
- enum kernel_read_file_id read_id)
+static int ima_post_read_file(struct file *file, char *buf, loff_t size,
+ enum kernel_read_file_id read_id)
{
enum ima_hooks func;
u32 secid;
@@ -835,7 +835,7 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
*
* For permission return 0, otherwise return -EACCES.
*/
-int ima_load_data(enum kernel_load_data_id id, bool contents)
+static int ima_load_data(enum kernel_load_data_id id, bool contents)
{
bool ima_enforce, sig_enforce;
@@ -889,9 +889,9 @@ int ima_load_data(enum kernel_load_data_id id, bool contents)
* On success return 0. On integrity appraisal error, assuming the file
* is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
*/
-int ima_post_load_data(char *buf, loff_t size,
- enum kernel_load_data_id load_id,
- char *description)
+static int ima_post_load_data(char *buf, loff_t size,
+ enum kernel_load_data_id load_id,
+ char *description)
{
if (load_id == LOADING_FIRMWARE) {
if ((ima_appraise & IMA_APPRAISE_FIRMWARE) &&
@@ -934,7 +934,7 @@ int process_buffer_measurement(struct mnt_idmap *idmap,
int ret = 0;
const char *audit_cause = "ENOMEM";
struct ima_template_entry *entry = NULL;
- struct integrity_iint_cache iint = {};
+ struct ima_iint_cache iint = {};
struct ima_event_data event_data = {.iint = &iint,
.filename = eventname,
.buf = buf,
@@ -1089,6 +1089,39 @@ int ima_measure_critical_data(const char *event_label,
}
EXPORT_SYMBOL_GPL(ima_measure_critical_data);
+#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
+
+/**
+ * ima_kernel_module_request - Prevent crypto-pkcs1pad(rsa,*) requests
+ * @kmod_name: kernel module name
+ *
+ * Avoid a verification loop where verifying the signature of the modprobe
+ * binary requires executing modprobe itself. Since the modprobe iint->mutex
+ * is already held when the signature verification is performed, a deadlock
+ * occurs as soon as modprobe is executed within the critical region, since
+ * the same lock cannot be taken again.
+ *
+ * This happens when public_key_verify_signature(), in case of RSA algorithm,
+ * use alg_name to store internal information in order to construct an
+ * algorithm on the fly, but crypto_larval_lookup() will try to use alg_name
+ * in order to load a kernel module with same name.
+ *
+ * Since we don't have any real "crypto-pkcs1pad(rsa,*)" kernel modules,
+ * we are safe to fail such module request from crypto_larval_lookup(), and
+ * avoid the verification loop.
+ *
+ * Return: Zero if it is safe to load the kernel module, -EINVAL otherwise.
+ */
+static int ima_kernel_module_request(char *kmod_name)
+{
+ if (strncmp(kmod_name, "crypto-pkcs1pad(rsa,", 20) == 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */
+
static int __init init_ima(void)
{
int error;
@@ -1120,4 +1153,49 @@ static int __init init_ima(void)
return error;
}
+static struct security_hook_list ima_hooks[] __ro_after_init = {
+ LSM_HOOK_INIT(bprm_check_security, ima_bprm_check),
+ LSM_HOOK_INIT(file_post_open, ima_file_check),
+ LSM_HOOK_INIT(inode_post_create_tmpfile, ima_post_create_tmpfile),
+ LSM_HOOK_INIT(file_release, ima_file_free),
+ LSM_HOOK_INIT(mmap_file, ima_file_mmap),
+ LSM_HOOK_INIT(file_mprotect, ima_file_mprotect),
+ LSM_HOOK_INIT(kernel_load_data, ima_load_data),
+ LSM_HOOK_INIT(kernel_post_load_data, ima_post_load_data),
+ LSM_HOOK_INIT(kernel_read_file, ima_read_file),
+ LSM_HOOK_INIT(kernel_post_read_file, ima_post_read_file),
+ LSM_HOOK_INIT(path_post_mknod, ima_post_path_mknod),
+#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS
+ LSM_HOOK_INIT(key_post_create_or_update, ima_post_key_create_or_update),
+#endif
+#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
+ LSM_HOOK_INIT(kernel_module_request, ima_kernel_module_request),
+#endif
+ LSM_HOOK_INIT(inode_free_security, ima_inode_free),
+};
+
+static const struct lsm_id ima_lsmid = {
+ .name = "ima",
+ .id = LSM_ID_IMA,
+};
+
+static int __init init_ima_lsm(void)
+{
+ ima_iintcache_init();
+ security_add_hooks(ima_hooks, ARRAY_SIZE(ima_hooks), &ima_lsmid);
+ init_ima_appraise_lsm(&ima_lsmid);
+ return 0;
+}
+
+struct lsm_blob_sizes ima_blob_sizes __ro_after_init = {
+ .lbs_inode = sizeof(struct ima_iint_cache *),
+};
+
+DEFINE_LSM(ima) = {
+ .name = "ima",
+ .init = init_ima_lsm,
+ .order = LSM_ORDER_LAST,
+ .blobs = &ima_blob_sizes,
+};
+
late_initcall(init_ima); /* Start IMA after the TPM is available */
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index f69062617754..c0556907c2e6 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -49,7 +49,7 @@
#define DONT_HASH 0x0200
#define INVALID_PCR(a) (((a) < 0) || \
- (a) >= (sizeof_field(struct integrity_iint_cache, measured_pcrs) * 8))
+ (a) >= (sizeof_field(struct ima_iint_cache, measured_pcrs) * 8))
int ima_policy_flag;
static int temp_ima_appraise;
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 9561db7cf6b4..50d6f798e613 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -18,60 +18,7 @@
#include <crypto/hash.h>
#include <linux/key.h>
#include <linux/audit.h>
-
-/* iint action cache flags */
-#define IMA_MEASURE 0x00000001
-#define IMA_MEASURED 0x00000002
-#define IMA_APPRAISE 0x00000004
-#define IMA_APPRAISED 0x00000008
-/*#define IMA_COLLECT 0x00000010 do not use this flag */
-#define IMA_COLLECTED 0x00000020
-#define IMA_AUDIT 0x00000040
-#define IMA_AUDITED 0x00000080
-#define IMA_HASH 0x00000100
-#define IMA_HASHED 0x00000200
-
-/* iint policy rule cache flags */
-#define IMA_NONACTION_FLAGS 0xff000000
-#define IMA_DIGSIG_REQUIRED 0x01000000
-#define IMA_PERMIT_DIRECTIO 0x02000000
-#define IMA_NEW_FILE 0x04000000
-#define EVM_IMMUTABLE_DIGSIG 0x08000000
-#define IMA_FAIL_UNVERIFIABLE_SIGS 0x10000000
-#define IMA_MODSIG_ALLOWED 0x20000000
-#define IMA_CHECK_BLACKLIST 0x40000000
-#define IMA_VERITY_REQUIRED 0x80000000
-
-#define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \
- IMA_HASH | IMA_APPRAISE_SUBMASK)
-#define IMA_DONE_MASK (IMA_MEASURED | IMA_APPRAISED | IMA_AUDITED | \
- IMA_HASHED | IMA_COLLECTED | \
- IMA_APPRAISED_SUBMASK)
-
-/* iint subaction appraise cache flags */
-#define IMA_FILE_APPRAISE 0x00001000
-#define IMA_FILE_APPRAISED 0x00002000
-#define IMA_MMAP_APPRAISE 0x00004000
-#define IMA_MMAP_APPRAISED 0x00008000
-#define IMA_BPRM_APPRAISE 0x00010000
-#define IMA_BPRM_APPRAISED 0x00020000
-#define IMA_READ_APPRAISE 0x00040000
-#define IMA_READ_APPRAISED 0x00080000
-#define IMA_CREDS_APPRAISE 0x00100000
-#define IMA_CREDS_APPRAISED 0x00200000
-#define IMA_APPRAISE_SUBMASK (IMA_FILE_APPRAISE | IMA_MMAP_APPRAISE | \
- IMA_BPRM_APPRAISE | IMA_READ_APPRAISE | \
- IMA_CREDS_APPRAISE)
-#define IMA_APPRAISED_SUBMASK (IMA_FILE_APPRAISED | IMA_MMAP_APPRAISED | \
- IMA_BPRM_APPRAISED | IMA_READ_APPRAISED | \
- IMA_CREDS_APPRAISED)
-
-/* iint cache atomic_flags */
-#define IMA_CHANGE_XATTR 0
-#define IMA_UPDATE_XATTR 1
-#define IMA_CHANGE_ATTR 2
-#define IMA_DIGSIG 3
-#define IMA_MUST_MEASURE 4
+#include <linux/lsm_hooks.h>
enum evm_ima_xattr_type {
IMA_XATTR_DIGEST = 0x01,
@@ -155,31 +102,6 @@ struct ima_file_id {
__u8 hash[HASH_MAX_DIGESTSIZE];
} __packed;
-/* integrity data associated with an inode */
-struct integrity_iint_cache {
- struct rb_node rb_node; /* rooted in integrity_iint_tree */
- struct mutex mutex; /* protects: version, flags, digest */
- struct inode *inode; /* back pointer to inode in question */
- u64 version; /* track inode changes */
- unsigned long flags;
- unsigned long measured_pcrs;
- unsigned long atomic_flags;
- unsigned long real_ino;
- dev_t real_dev;
- enum integrity_status ima_file_status:4;
- enum integrity_status ima_mmap_status:4;
- enum integrity_status ima_bprm_status:4;
- enum integrity_status ima_read_status:4;
- enum integrity_status ima_creds_status:4;
- enum integrity_status evm_status:4;
- struct ima_digest_data *ima_hash;
-};
-
-/* rbtree tree calls to lookup, insert, delete
- * integrity data associated with an inode.
- */
-struct integrity_iint_cache *integrity_iint_find(struct inode *inode);
-
int integrity_kernel_read(struct file *file, loff_t offset,
void *addr, unsigned long count);
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 76f55dd13cb8..8af2136069d2 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -237,10 +237,6 @@ static int datablob_parse(char *datablob, const char **format,
break;
}
*decrypted_data = strsep(&datablob, " \t");
- if (!*decrypted_data) {
- pr_info("encrypted_key: decrypted_data is missing\n");
- break;
- }
ret = 0;
break;
case Opt_load:
diff --git a/security/keys/key.c b/security/keys/key.c
index 5b10641debd5..560790038329 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -13,7 +13,6 @@
#include <linux/security.h>
#include <linux/workqueue.h>
#include <linux/random.h>
-#include <linux/ima.h>
#include <linux/err.h>
#include "internal.h"
@@ -930,8 +929,8 @@ static key_ref_t __key_create_or_update(key_ref_t keyring_ref,
goto error_link_end;
}
- ima_post_key_create_or_update(keyring, key, payload, plen,
- flags, true);
+ security_key_post_create_or_update(keyring, key, payload, plen, flags,
+ true);
key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
@@ -964,9 +963,8 @@ error:
key_ref = __key_update(key_ref, &prep);
if (!IS_ERR(key_ref))
- ima_post_key_create_or_update(keyring, key,
- payload, plen,
- flags, false);
+ security_key_post_create_or_update(keyring, key, payload, plen,
+ flags, false);
goto error_free_prep;
}
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index fc520a06f9af..0171f7eb6ee1 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -737,8 +737,8 @@ static int current_check_refer_path(struct dentry *const old_dentry,
bool allow_parent1, allow_parent2;
access_mask_t access_request_parent1, access_request_parent2;
struct path mnt_dir;
- layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS],
- layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS];
+ layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {},
+ layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {};
if (!dom)
return 0;
diff --git a/security/security.c b/security/security.c
index 0144a98d3712..a07f43c58d65 100644
--- a/security/security.c
+++ b/security/security.c
@@ -19,16 +19,15 @@
#include <linux/kernel.h>
#include <linux/kernel_read_file.h>
#include <linux/lsm_hooks.h>
-#include <linux/integrity.h>
-#include <linux/ima.h>
-#include <linux/evm.h>
#include <linux/fsnotify.h>
#include <linux/mman.h>
#include <linux/mount.h>
#include <linux/personality.h>
#include <linux/backing-dev.h>
#include <linux/string.h>
+#include <linux/xattr.h>
#include <linux/msg.h>
+#include <linux/overflow.h>
#include <net/flow.h>
/* How many LSMs were built into the kernel? */
@@ -50,7 +49,9 @@
(IS_ENABLED(CONFIG_SECURITY_SAFESETID) ? 1 : 0) + \
(IS_ENABLED(CONFIG_SECURITY_LOCKDOWN_LSM) ? 1 : 0) + \
(IS_ENABLED(CONFIG_BPF_LSM) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_SECURITY_LANDLOCK) ? 1 : 0))
+ (IS_ENABLED(CONFIG_SECURITY_LANDLOCK) ? 1 : 0) + \
+ (IS_ENABLED(CONFIG_IMA) ? 1 : 0) + \
+ (IS_ENABLED(CONFIG_EVM) ? 1 : 0))
/*
* These are descriptions of the reasons that can be passed to the
@@ -855,14 +856,14 @@ out:
P->hook.FUNC(__VA_ARGS__); \
} while (0)
-#define call_int_hook(FUNC, IRC, ...) ({ \
- int RC = IRC; \
+#define call_int_hook(FUNC, ...) ({ \
+ int RC = LSM_RET_DEFAULT(FUNC); \
do { \
struct security_hook_list *P; \
\
hlist_for_each_entry(P, &security_hook_heads.FUNC, list) { \
RC = P->hook.FUNC(__VA_ARGS__); \
- if (RC != 0) \
+ if (RC != LSM_RET_DEFAULT(FUNC)) \
break; \
} \
} while (0); \
@@ -881,7 +882,7 @@ out:
*/
int security_binder_set_context_mgr(const struct cred *mgr)
{
- return call_int_hook(binder_set_context_mgr, 0, mgr);
+ return call_int_hook(binder_set_context_mgr, mgr);
}
/**
@@ -896,7 +897,7 @@ int security_binder_set_context_mgr(const struct cred *mgr)
int security_binder_transaction(const struct cred *from,
const struct cred *to)
{
- return call_int_hook(binder_transaction, 0, from, to);
+ return call_int_hook(binder_transaction, from, to);
}
/**
@@ -911,7 +912,7 @@ int security_binder_transaction(const struct cred *from,
int security_binder_transfer_binder(const struct cred *from,
const struct cred *to)
{
- return call_int_hook(binder_transfer_binder, 0, from, to);
+ return call_int_hook(binder_transfer_binder, from, to);
}
/**
@@ -927,7 +928,7 @@ int security_binder_transfer_binder(const struct cred *from,
int security_binder_transfer_file(const struct cred *from,
const struct cred *to, const struct file *file)
{
- return call_int_hook(binder_transfer_file, 0, from, to, file);
+ return call_int_hook(binder_transfer_file, from, to, file);
}
/**
@@ -946,7 +947,7 @@ int security_binder_transfer_file(const struct cred *from,
*/
int security_ptrace_access_check(struct task_struct *child, unsigned int mode)
{
- return call_int_hook(ptrace_access_check, 0, child, mode);
+ return call_int_hook(ptrace_access_check, child, mode);
}
/**
@@ -961,7 +962,7 @@ int security_ptrace_access_check(struct task_struct *child, unsigned int mode)
*/
int security_ptrace_traceme(struct task_struct *parent)
{
- return call_int_hook(ptrace_traceme, 0, parent);
+ return call_int_hook(ptrace_traceme, parent);
}
/**
@@ -983,8 +984,7 @@ int security_capget(const struct task_struct *target,
kernel_cap_t *inheritable,
kernel_cap_t *permitted)
{
- return call_int_hook(capget, 0, target,
- effective, inheritable, permitted);
+ return call_int_hook(capget, target, effective, inheritable, permitted);
}
/**
@@ -1005,8 +1005,8 @@ int security_capset(struct cred *new, const struct cred *old,
const kernel_cap_t *inheritable,
const kernel_cap_t *permitted)
{
- return call_int_hook(capset, 0, new, old,
- effective, inheritable, permitted);
+ return call_int_hook(capset, new, old, effective, inheritable,
+ permitted);
}
/**
@@ -1027,7 +1027,7 @@ int security_capable(const struct cred *cred,
int cap,
unsigned int opts)
{
- return call_int_hook(capable, 0, cred, ns, cap, opts);
+ return call_int_hook(capable, cred, ns, cap, opts);
}
/**
@@ -1043,7 +1043,7 @@ int security_capable(const struct cred *cred,
*/
int security_quotactl(int cmds, int type, int id, const struct super_block *sb)
{
- return call_int_hook(quotactl, 0, cmds, type, id, sb);
+ return call_int_hook(quotactl, cmds, type, id, sb);
}
/**
@@ -1056,7 +1056,7 @@ int security_quotactl(int cmds, int type, int id, const struct super_block *sb)
*/
int security_quota_on(struct dentry *dentry)
{
- return call_int_hook(quota_on, 0, dentry);
+ return call_int_hook(quota_on, dentry);
}
/**
@@ -1071,7 +1071,7 @@ int security_quota_on(struct dentry *dentry)
*/
int security_syslog(int type)
{
- return call_int_hook(syslog, 0, type);
+ return call_int_hook(syslog, type);
}
/**
@@ -1086,7 +1086,7 @@ int security_syslog(int type)
*/
int security_settime64(const struct timespec64 *ts, const struct timezone *tz)
{
- return call_int_hook(settime, 0, ts, tz);
+ return call_int_hook(settime, ts, tz);
}
/**
@@ -1141,7 +1141,7 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
*/
int security_bprm_creds_for_exec(struct linux_binprm *bprm)
{
- return call_int_hook(bprm_creds_for_exec, 0, bprm);
+ return call_int_hook(bprm_creds_for_exec, bprm);
}
/**
@@ -1165,7 +1165,7 @@ int security_bprm_creds_for_exec(struct linux_binprm *bprm)
*/
int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file)
{
- return call_int_hook(bprm_creds_from_file, 0, bprm, file);
+ return call_int_hook(bprm_creds_from_file, bprm, file);
}
/**
@@ -1182,12 +1182,7 @@ int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *
*/
int security_bprm_check(struct linux_binprm *bprm)
{
- int ret;
-
- ret = call_int_hook(bprm_check_security, 0, bprm);
- if (ret)
- return ret;
- return ima_bprm_check(bprm);
+ return call_int_hook(bprm_check_security, bprm);
}
/**
@@ -1234,7 +1229,7 @@ void security_bprm_committed_creds(const struct linux_binprm *bprm)
*/
int security_fs_context_submount(struct fs_context *fc, struct super_block *reference)
{
- return call_int_hook(fs_context_submount, 0, fc, reference);
+ return call_int_hook(fs_context_submount, fc, reference);
}
/**
@@ -1250,7 +1245,7 @@ int security_fs_context_submount(struct fs_context *fc, struct super_block *refe
*/
int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
{
- return call_int_hook(fs_context_dup, 0, fc, src_fc);
+ return call_int_hook(fs_context_dup, fc, src_fc);
}
/**
@@ -1299,7 +1294,7 @@ int security_sb_alloc(struct super_block *sb)
if (unlikely(rc))
return rc;
- rc = call_int_hook(sb_alloc_security, 0, sb);
+ rc = call_int_hook(sb_alloc_security, sb);
if (unlikely(rc))
security_sb_free(sb);
return rc;
@@ -1357,7 +1352,7 @@ EXPORT_SYMBOL(security_free_mnt_opts);
*/
int security_sb_eat_lsm_opts(char *options, void **mnt_opts)
{
- return call_int_hook(sb_eat_lsm_opts, 0, options, mnt_opts);
+ return call_int_hook(sb_eat_lsm_opts, options, mnt_opts);
}
EXPORT_SYMBOL(security_sb_eat_lsm_opts);
@@ -1374,7 +1369,7 @@ EXPORT_SYMBOL(security_sb_eat_lsm_opts);
int security_sb_mnt_opts_compat(struct super_block *sb,
void *mnt_opts)
{
- return call_int_hook(sb_mnt_opts_compat, 0, sb, mnt_opts);
+ return call_int_hook(sb_mnt_opts_compat, sb, mnt_opts);
}
EXPORT_SYMBOL(security_sb_mnt_opts_compat);
@@ -1391,7 +1386,7 @@ EXPORT_SYMBOL(security_sb_mnt_opts_compat);
int security_sb_remount(struct super_block *sb,
void *mnt_opts)
{
- return call_int_hook(sb_remount, 0, sb, mnt_opts);
+ return call_int_hook(sb_remount, sb, mnt_opts);
}
EXPORT_SYMBOL(security_sb_remount);
@@ -1405,7 +1400,7 @@ EXPORT_SYMBOL(security_sb_remount);
*/
int security_sb_kern_mount(const struct super_block *sb)
{
- return call_int_hook(sb_kern_mount, 0, sb);
+ return call_int_hook(sb_kern_mount, sb);
}
/**
@@ -1419,7 +1414,7 @@ int security_sb_kern_mount(const struct super_block *sb)
*/
int security_sb_show_options(struct seq_file *m, struct super_block *sb)
{
- return call_int_hook(sb_show_options, 0, m, sb);
+ return call_int_hook(sb_show_options, m, sb);
}
/**
@@ -1433,7 +1428,7 @@ int security_sb_show_options(struct seq_file *m, struct super_block *sb)
*/
int security_sb_statfs(struct dentry *dentry)
{
- return call_int_hook(sb_statfs, 0, dentry);
+ return call_int_hook(sb_statfs, dentry);
}
/**
@@ -1456,7 +1451,7 @@ int security_sb_statfs(struct dentry *dentry)
int security_sb_mount(const char *dev_name, const struct path *path,
const char *type, unsigned long flags, void *data)
{
- return call_int_hook(sb_mount, 0, dev_name, path, type, flags, data);
+ return call_int_hook(sb_mount, dev_name, path, type, flags, data);
}
/**
@@ -1470,7 +1465,7 @@ int security_sb_mount(const char *dev_name, const struct path *path,
*/
int security_sb_umount(struct vfsmount *mnt, int flags)
{
- return call_int_hook(sb_umount, 0, mnt, flags);
+ return call_int_hook(sb_umount, mnt, flags);
}
/**
@@ -1485,7 +1480,7 @@ int security_sb_umount(struct vfsmount *mnt, int flags)
int security_sb_pivotroot(const struct path *old_path,
const struct path *new_path)
{
- return call_int_hook(sb_pivotroot, 0, old_path, new_path);
+ return call_int_hook(sb_pivotroot, old_path, new_path);
}
/**
@@ -1504,9 +1499,17 @@ int security_sb_set_mnt_opts(struct super_block *sb,
unsigned long kern_flags,
unsigned long *set_kern_flags)
{
- return call_int_hook(sb_set_mnt_opts,
- mnt_opts ? -EOPNOTSUPP : 0, sb,
- mnt_opts, kern_flags, set_kern_flags);
+ struct security_hook_list *hp;
+ int rc = mnt_opts ? -EOPNOTSUPP : LSM_RET_DEFAULT(sb_set_mnt_opts);
+
+ hlist_for_each_entry(hp, &security_hook_heads.sb_set_mnt_opts,
+ list) {
+ rc = hp->hook.sb_set_mnt_opts(sb, mnt_opts, kern_flags,
+ set_kern_flags);
+ if (rc != LSM_RET_DEFAULT(sb_set_mnt_opts))
+ break;
+ }
+ return rc;
}
EXPORT_SYMBOL(security_sb_set_mnt_opts);
@@ -1526,7 +1529,7 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb,
unsigned long kern_flags,
unsigned long *set_kern_flags)
{
- return call_int_hook(sb_clone_mnt_opts, 0, oldsb, newsb,
+ return call_int_hook(sb_clone_mnt_opts, oldsb, newsb,
kern_flags, set_kern_flags);
}
EXPORT_SYMBOL(security_sb_clone_mnt_opts);
@@ -1543,7 +1546,7 @@ EXPORT_SYMBOL(security_sb_clone_mnt_opts);
int security_move_mount(const struct path *from_path,
const struct path *to_path)
{
- return call_int_hook(move_mount, 0, from_path, to_path);
+ return call_int_hook(move_mount, from_path, to_path);
}
/**
@@ -1560,7 +1563,7 @@ int security_move_mount(const struct path *from_path,
int security_path_notify(const struct path *path, u64 mask,
unsigned int obj_type)
{
- return call_int_hook(path_notify, 0, path, mask, obj_type);
+ return call_int_hook(path_notify, path, mask, obj_type);
}
/**
@@ -1579,7 +1582,7 @@ int security_inode_alloc(struct inode *inode)
if (unlikely(rc))
return rc;
- rc = call_int_hook(inode_alloc_security, 0, inode);
+ rc = call_int_hook(inode_alloc_security, inode);
if (unlikely(rc))
security_inode_free(inode);
return rc;
@@ -1601,7 +1604,6 @@ static void inode_free_by_rcu(struct rcu_head *head)
*/
void security_inode_free(struct inode *inode)
{
- integrity_inode_free(inode);
call_void_hook(inode_free_security, inode);
/*
* The inode may still be referenced in a path walk and
@@ -1637,20 +1639,8 @@ int security_dentry_init_security(struct dentry *dentry, int mode,
const char **xattr_name, void **ctx,
u32 *ctxlen)
{
- struct security_hook_list *hp;
- int rc;
-
- /*
- * Only one module will provide a security context.
- */
- hlist_for_each_entry(hp, &security_hook_heads.dentry_init_security,
- list) {
- rc = hp->hook.dentry_init_security(dentry, mode, name,
- xattr_name, ctx, ctxlen);
- if (rc != LSM_RET_DEFAULT(dentry_init_security))
- return rc;
- }
- return LSM_RET_DEFAULT(dentry_init_security);
+ return call_int_hook(dentry_init_security, dentry, mode, name,
+ xattr_name, ctx, ctxlen);
}
EXPORT_SYMBOL(security_dentry_init_security);
@@ -1673,7 +1663,7 @@ int security_dentry_create_files_as(struct dentry *dentry, int mode,
struct qstr *name,
const struct cred *old, struct cred *new)
{
- return call_int_hook(dentry_create_files_as, 0, dentry, mode,
+ return call_int_hook(dentry_create_files_as, dentry, mode,
name, old, new);
}
EXPORT_SYMBOL(security_dentry_create_files_as);
@@ -1720,8 +1710,8 @@ int security_inode_init_security(struct inode *inode, struct inode *dir,
return 0;
if (initxattrs) {
- /* Allocate +1 for EVM and +1 as terminator. */
- new_xattrs = kcalloc(blob_sizes.lbs_xattr_count + 2,
+ /* Allocate +1 as terminator. */
+ new_xattrs = kcalloc(blob_sizes.lbs_xattr_count + 1,
sizeof(*new_xattrs), GFP_NOFS);
if (!new_xattrs)
return -ENOMEM;
@@ -1745,10 +1735,6 @@ int security_inode_init_security(struct inode *inode, struct inode *dir,
if (!xattr_count)
goto out;
- ret = evm_inode_init_security(inode, dir, qstr, new_xattrs,
- &xattr_count);
- if (ret)
- goto out;
ret = initxattrs(inode, new_xattrs, fs_data);
out:
for (; xattr_count > 0; xattr_count--)
@@ -1774,7 +1760,7 @@ int security_inode_init_security_anon(struct inode *inode,
const struct qstr *name,
const struct inode *context_inode)
{
- return call_int_hook(inode_init_security_anon, 0, inode, name,
+ return call_int_hook(inode_init_security_anon, inode, name,
context_inode);
}
@@ -1796,11 +1782,25 @@ int security_path_mknod(const struct path *dir, struct dentry *dentry,
{
if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
return 0;
- return call_int_hook(path_mknod, 0, dir, dentry, mode, dev);
+ return call_int_hook(path_mknod, dir, dentry, mode, dev);
}
EXPORT_SYMBOL(security_path_mknod);
/**
+ * security_path_post_mknod() - Update inode security field after file creation
+ * @idmap: idmap of the mount
+ * @dentry: new file
+ *
+ * Update inode security field after a file has been created.
+ */
+void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
+{
+ if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
+ return;
+ call_void_hook(path_post_mknod, idmap, dentry);
+}
+
+/**
* security_path_mkdir() - Check if creating a new directory is allowed
* @dir: parent directory
* @dentry: new directory
@@ -1815,7 +1815,7 @@ int security_path_mkdir(const struct path *dir, struct dentry *dentry,
{
if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
return 0;
- return call_int_hook(path_mkdir, 0, dir, dentry, mode);
+ return call_int_hook(path_mkdir, dir, dentry, mode);
}
EXPORT_SYMBOL(security_path_mkdir);
@@ -1832,7 +1832,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry)
{
if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
return 0;
- return call_int_hook(path_rmdir, 0, dir, dentry);
+ return call_int_hook(path_rmdir, dir, dentry);
}
/**
@@ -1848,7 +1848,7 @@ int security_path_unlink(const struct path *dir, struct dentry *dentry)
{
if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
return 0;
- return call_int_hook(path_unlink, 0, dir, dentry);
+ return call_int_hook(path_unlink, dir, dentry);
}
EXPORT_SYMBOL(security_path_unlink);
@@ -1867,7 +1867,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry,
{
if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
return 0;
- return call_int_hook(path_symlink, 0, dir, dentry, old_name);
+ return call_int_hook(path_symlink, dir, dentry, old_name);
}
/**
@@ -1885,7 +1885,7 @@ int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
{
if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry))))
return 0;
- return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry);
+ return call_int_hook(path_link, old_dentry, new_dir, new_dentry);
}
/**
@@ -1909,7 +1909,7 @@ int security_path_rename(const struct path *old_dir, struct dentry *old_dentry,
IS_PRIVATE(d_backing_inode(new_dentry)))))
return 0;
- return call_int_hook(path_rename, 0, old_dir, old_dentry, new_dir,
+ return call_int_hook(path_rename, old_dir, old_dentry, new_dir,
new_dentry, flags);
}
EXPORT_SYMBOL(security_path_rename);
@@ -1928,7 +1928,7 @@ int security_path_truncate(const struct path *path)
{
if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
return 0;
- return call_int_hook(path_truncate, 0, path);
+ return call_int_hook(path_truncate, path);
}
/**
@@ -1946,7 +1946,7 @@ int security_path_chmod(const struct path *path, umode_t mode)
{
if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
return 0;
- return call_int_hook(path_chmod, 0, path, mode);
+ return call_int_hook(path_chmod, path, mode);
}
/**
@@ -1963,7 +1963,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
{
if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
return 0;
- return call_int_hook(path_chown, 0, path, uid, gid);
+ return call_int_hook(path_chown, path, uid, gid);
}
/**
@@ -1976,7 +1976,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
*/
int security_path_chroot(const struct path *path)
{
- return call_int_hook(path_chroot, 0, path);
+ return call_int_hook(path_chroot, path);
}
#endif /* CONFIG_SECURITY_PATH */
@@ -1995,11 +1995,26 @@ int security_inode_create(struct inode *dir, struct dentry *dentry,
{
if (unlikely(IS_PRIVATE(dir)))
return 0;
- return call_int_hook(inode_create, 0, dir, dentry, mode);
+ return call_int_hook(inode_create, dir, dentry, mode);
}
EXPORT_SYMBOL_GPL(security_inode_create);
/**
+ * security_inode_post_create_tmpfile() - Update inode security of new tmpfile
+ * @idmap: idmap of the mount
+ * @inode: inode of the new tmpfile
+ *
+ * Update inode security data after a tmpfile has been created.
+ */
+void security_inode_post_create_tmpfile(struct mnt_idmap *idmap,
+ struct inode *inode)
+{
+ if (unlikely(IS_PRIVATE(inode)))
+ return;
+ call_void_hook(inode_post_create_tmpfile, idmap, inode);
+}
+
+/**
* security_inode_link() - Check if creating a hard link is allowed
* @old_dentry: existing file
* @dir: new parent directory
@@ -2014,7 +2029,7 @@ int security_inode_link(struct dentry *old_dentry, struct inode *dir,
{
if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry))))
return 0;
- return call_int_hook(inode_link, 0, old_dentry, dir, new_dentry);
+ return call_int_hook(inode_link, old_dentry, dir, new_dentry);
}
/**
@@ -2030,7 +2045,7 @@ int security_inode_unlink(struct inode *dir, struct dentry *dentry)
{
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- return call_int_hook(inode_unlink, 0, dir, dentry);
+ return call_int_hook(inode_unlink, dir, dentry);
}
/**
@@ -2048,7 +2063,7 @@ int security_inode_symlink(struct inode *dir, struct dentry *dentry,
{
if (unlikely(IS_PRIVATE(dir)))
return 0;
- return call_int_hook(inode_symlink, 0, dir, dentry, old_name);
+ return call_int_hook(inode_symlink, dir, dentry, old_name);
}
/**
@@ -2066,7 +2081,7 @@ int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
if (unlikely(IS_PRIVATE(dir)))
return 0;
- return call_int_hook(inode_mkdir, 0, dir, dentry, mode);
+ return call_int_hook(inode_mkdir, dir, dentry, mode);
}
EXPORT_SYMBOL_GPL(security_inode_mkdir);
@@ -2083,7 +2098,7 @@ int security_inode_rmdir(struct inode *dir, struct dentry *dentry)
{
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- return call_int_hook(inode_rmdir, 0, dir, dentry);
+ return call_int_hook(inode_rmdir, dir, dentry);
}
/**
@@ -2105,7 +2120,7 @@ int security_inode_mknod(struct inode *dir, struct dentry *dentry,
{
if (unlikely(IS_PRIVATE(dir)))
return 0;
- return call_int_hook(inode_mknod, 0, dir, dentry, mode, dev);
+ return call_int_hook(inode_mknod, dir, dentry, mode, dev);
}
/**
@@ -2130,13 +2145,13 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
return 0;
if (flags & RENAME_EXCHANGE) {
- int err = call_int_hook(inode_rename, 0, new_dir, new_dentry,
+ int err = call_int_hook(inode_rename, new_dir, new_dentry,
old_dir, old_dentry);
if (err)
return err;
}
- return call_int_hook(inode_rename, 0, old_dir, old_dentry,
+ return call_int_hook(inode_rename, old_dir, old_dentry,
new_dir, new_dentry);
}
@@ -2152,7 +2167,7 @@ int security_inode_readlink(struct dentry *dentry)
{
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- return call_int_hook(inode_readlink, 0, dentry);
+ return call_int_hook(inode_readlink, dentry);
}
/**
@@ -2171,7 +2186,7 @@ int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
{
if (unlikely(IS_PRIVATE(inode)))
return 0;
- return call_int_hook(inode_follow_link, 0, dentry, inode, rcu);
+ return call_int_hook(inode_follow_link, dentry, inode, rcu);
}
/**
@@ -2192,7 +2207,7 @@ int security_inode_permission(struct inode *inode, int mask)
{
if (unlikely(IS_PRIVATE(inode)))
return 0;
- return call_int_hook(inode_permission, 0, inode, mask);
+ return call_int_hook(inode_permission, inode, mask);
}
/**
@@ -2211,18 +2226,29 @@ int security_inode_permission(struct inode *inode, int mask)
int security_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
- int ret;
-
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- ret = call_int_hook(inode_setattr, 0, dentry, attr);
- if (ret)
- return ret;
- return evm_inode_setattr(idmap, dentry, attr);
+ return call_int_hook(inode_setattr, idmap, dentry, attr);
}
EXPORT_SYMBOL_GPL(security_inode_setattr);
/**
+ * security_inode_post_setattr() - Update the inode after a setattr operation
+ * @idmap: idmap of the mount
+ * @dentry: file
+ * @ia_valid: file attributes set
+ *
+ * Update inode security field after successful setting file attributes.
+ */
+void security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ int ia_valid)
+{
+ if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
+ return;
+ call_void_hook(inode_post_setattr, idmap, dentry, ia_valid);
+}
+
+/**
* security_inode_getattr() - Check if getting file attributes is allowed
* @path: file
*
@@ -2234,7 +2260,7 @@ int security_inode_getattr(const struct path *path)
{
if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
return 0;
- return call_int_hook(inode_getattr, 0, path);
+ return call_int_hook(inode_getattr, path);
}
/**
@@ -2262,17 +2288,12 @@ int security_inode_setxattr(struct mnt_idmap *idmap,
* SELinux and Smack integrate the cap call,
* so assume that all LSMs supplying this call do so.
*/
- ret = call_int_hook(inode_setxattr, 1, idmap, dentry, name, value,
- size, flags);
+ ret = call_int_hook(inode_setxattr, idmap, dentry, name, value, size,
+ flags);
if (ret == 1)
ret = cap_inode_setxattr(dentry, name, value, size, flags);
- if (ret)
- return ret;
- ret = ima_inode_setxattr(dentry, name, value, size);
- if (ret)
- return ret;
- return evm_inode_setxattr(idmap, dentry, name, value, size);
+ return ret;
}
/**
@@ -2291,18 +2312,26 @@ int security_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl)
{
- int ret;
-
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- ret = call_int_hook(inode_set_acl, 0, idmap, dentry, acl_name,
- kacl);
- if (ret)
- return ret;
- ret = ima_inode_set_acl(idmap, dentry, acl_name, kacl);
- if (ret)
- return ret;
- return evm_inode_set_acl(idmap, dentry, acl_name, kacl);
+ return call_int_hook(inode_set_acl, idmap, dentry, acl_name, kacl);
+}
+
+/**
+ * security_inode_post_set_acl() - Update inode security from posix acls set
+ * @dentry: file
+ * @acl_name: acl name
+ * @kacl: acl struct
+ *
+ * Update inode security data after successfully setting posix acls on @dentry.
+ * The posix acls in @kacl are identified by @acl_name.
+ */
+void security_inode_post_set_acl(struct dentry *dentry, const char *acl_name,
+ struct posix_acl *kacl)
+{
+ if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
+ return;
+ call_void_hook(inode_post_set_acl, dentry, acl_name, kacl);
}
/**
@@ -2321,7 +2350,7 @@ int security_inode_get_acl(struct mnt_idmap *idmap,
{
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- return call_int_hook(inode_get_acl, 0, idmap, dentry, acl_name);
+ return call_int_hook(inode_get_acl, idmap, dentry, acl_name);
}
/**
@@ -2338,17 +2367,26 @@ int security_inode_get_acl(struct mnt_idmap *idmap,
int security_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
{
- int ret;
-
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- ret = call_int_hook(inode_remove_acl, 0, idmap, dentry, acl_name);
- if (ret)
- return ret;
- ret = ima_inode_remove_acl(idmap, dentry, acl_name);
- if (ret)
- return ret;
- return evm_inode_remove_acl(idmap, dentry, acl_name);
+ return call_int_hook(inode_remove_acl, idmap, dentry, acl_name);
+}
+
+/**
+ * security_inode_post_remove_acl() - Update inode security after rm posix acls
+ * @idmap: idmap of the mount
+ * @dentry: file
+ * @acl_name: acl name
+ *
+ * Update inode security data after successfully removing posix acls on
+ * @dentry in @idmap. The posix acls are identified by @acl_name.
+ */
+void security_inode_post_remove_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry, const char *acl_name)
+{
+ if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
+ return;
+ call_void_hook(inode_post_remove_acl, idmap, dentry, acl_name);
}
/**
@@ -2367,7 +2405,6 @@ void security_inode_post_setxattr(struct dentry *dentry, const char *name,
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return;
call_void_hook(inode_post_setxattr, dentry, name, value, size, flags);
- evm_inode_post_setxattr(dentry, name, value, size);
}
/**
@@ -2384,7 +2421,7 @@ int security_inode_getxattr(struct dentry *dentry, const char *name)
{
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- return call_int_hook(inode_getxattr, 0, dentry, name);
+ return call_int_hook(inode_getxattr, dentry, name);
}
/**
@@ -2400,7 +2437,7 @@ int security_inode_listxattr(struct dentry *dentry)
{
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- return call_int_hook(inode_listxattr, 0, dentry);
+ return call_int_hook(inode_listxattr, dentry);
}
/**
@@ -2425,15 +2462,24 @@ int security_inode_removexattr(struct mnt_idmap *idmap,
* SELinux and Smack integrate the cap call,
* so assume that all LSMs supplying this call do so.
*/
- ret = call_int_hook(inode_removexattr, 1, idmap, dentry, name);
+ ret = call_int_hook(inode_removexattr, idmap, dentry, name);
if (ret == 1)
ret = cap_inode_removexattr(idmap, dentry, name);
- if (ret)
- return ret;
- ret = ima_inode_removexattr(dentry, name);
- if (ret)
- return ret;
- return evm_inode_removexattr(idmap, dentry, name);
+ return ret;
+}
+
+/**
+ * security_inode_post_removexattr() - Update the inode after a removexattr op
+ * @dentry: file
+ * @name: xattr name
+ *
+ * Update the inode after a successful removexattr operation.
+ */
+void security_inode_post_removexattr(struct dentry *dentry, const char *name)
+{
+ if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
+ return;
+ call_void_hook(inode_post_removexattr, dentry, name);
}
/**
@@ -2449,7 +2495,7 @@ int security_inode_removexattr(struct mnt_idmap *idmap,
*/
int security_inode_need_killpriv(struct dentry *dentry)
{
- return call_int_hook(inode_need_killpriv, 0, dentry);
+ return call_int_hook(inode_need_killpriv, dentry);
}
/**
@@ -2466,7 +2512,7 @@ int security_inode_need_killpriv(struct dentry *dentry)
int security_inode_killpriv(struct mnt_idmap *idmap,
struct dentry *dentry)
{
- return call_int_hook(inode_killpriv, 0, idmap, dentry);
+ return call_int_hook(inode_killpriv, idmap, dentry);
}
/**
@@ -2489,21 +2535,11 @@ int security_inode_getsecurity(struct mnt_idmap *idmap,
struct inode *inode, const char *name,
void **buffer, bool alloc)
{
- struct security_hook_list *hp;
- int rc;
-
if (unlikely(IS_PRIVATE(inode)))
return LSM_RET_DEFAULT(inode_getsecurity);
- /*
- * Only one module will provide an attribute with a given name.
- */
- hlist_for_each_entry(hp, &security_hook_heads.inode_getsecurity, list) {
- rc = hp->hook.inode_getsecurity(idmap, inode, name, buffer,
- alloc);
- if (rc != LSM_RET_DEFAULT(inode_getsecurity))
- return rc;
- }
- return LSM_RET_DEFAULT(inode_getsecurity);
+
+ return call_int_hook(inode_getsecurity, idmap, inode, name, buffer,
+ alloc);
}
/**
@@ -2524,21 +2560,11 @@ int security_inode_getsecurity(struct mnt_idmap *idmap,
int security_inode_setsecurity(struct inode *inode, const char *name,
const void *value, size_t size, int flags)
{
- struct security_hook_list *hp;
- int rc;
-
if (unlikely(IS_PRIVATE(inode)))
return LSM_RET_DEFAULT(inode_setsecurity);
- /*
- * Only one module will provide an attribute with a given name.
- */
- hlist_for_each_entry(hp, &security_hook_heads.inode_setsecurity, list) {
- rc = hp->hook.inode_setsecurity(inode, name, value, size,
- flags);
- if (rc != LSM_RET_DEFAULT(inode_setsecurity))
- return rc;
- }
- return LSM_RET_DEFAULT(inode_setsecurity);
+
+ return call_int_hook(inode_setsecurity, inode, name, value, size,
+ flags);
}
/**
@@ -2559,7 +2585,7 @@ int security_inode_listsecurity(struct inode *inode,
{
if (unlikely(IS_PRIVATE(inode)))
return 0;
- return call_int_hook(inode_listsecurity, 0, inode, buffer, buffer_size);
+ return call_int_hook(inode_listsecurity, inode, buffer, buffer_size);
}
EXPORT_SYMBOL(security_inode_listsecurity);
@@ -2590,7 +2616,7 @@ void security_inode_getsecid(struct inode *inode, u32 *secid)
*/
int security_inode_copy_up(struct dentry *src, struct cred **new)
{
- return call_int_hook(inode_copy_up, 0, src, new);
+ return call_int_hook(inode_copy_up, src, new);
}
EXPORT_SYMBOL(security_inode_copy_up);
@@ -2608,7 +2634,6 @@ EXPORT_SYMBOL(security_inode_copy_up);
*/
int security_inode_copy_up_xattr(const char *name)
{
- struct security_hook_list *hp;
int rc;
/*
@@ -2616,14 +2641,11 @@ int security_inode_copy_up_xattr(const char *name)
* xattr), -EOPNOTSUPP if it does not know anything about the xattr or
* any other error code in case of an error.
*/
- hlist_for_each_entry(hp,
- &security_hook_heads.inode_copy_up_xattr, list) {
- rc = hp->hook.inode_copy_up_xattr(name);
- if (rc != LSM_RET_DEFAULT(inode_copy_up_xattr))
- return rc;
- }
+ rc = call_int_hook(inode_copy_up_xattr, name);
+ if (rc != LSM_RET_DEFAULT(inode_copy_up_xattr))
+ return rc;
- return evm_inode_copy_up_xattr(name);
+ return LSM_RET_DEFAULT(inode_copy_up_xattr);
}
EXPORT_SYMBOL(security_inode_copy_up_xattr);
@@ -2640,7 +2662,7 @@ EXPORT_SYMBOL(security_inode_copy_up_xattr);
int security_kernfs_init_security(struct kernfs_node *kn_dir,
struct kernfs_node *kn)
{
- return call_int_hook(kernfs_init_security, 0, kn_dir, kn);
+ return call_int_hook(kernfs_init_security, kn_dir, kn);
}
/**
@@ -2664,7 +2686,7 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir,
*/
int security_file_permission(struct file *file, int mask)
{
- return call_int_hook(file_permission, 0, file, mask);
+ return call_int_hook(file_permission, file, mask);
}
/**
@@ -2682,13 +2704,24 @@ int security_file_alloc(struct file *file)
if (rc)
return rc;
- rc = call_int_hook(file_alloc_security, 0, file);
+ rc = call_int_hook(file_alloc_security, file);
if (unlikely(rc))
security_file_free(file);
return rc;
}
/**
+ * security_file_release() - Perform actions before releasing the file ref
+ * @file: the file
+ *
+ * Perform actions before releasing the last reference to a file.
+ */
+void security_file_release(struct file *file)
+{
+ call_void_hook(file_release, file);
+}
+
+/**
* security_file_free() - Free a file's LSM blob
* @file: the file
*
@@ -2722,7 +2755,7 @@ void security_file_free(struct file *file)
*/
int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- return call_int_hook(file_ioctl, 0, file, cmd, arg);
+ return call_int_hook(file_ioctl, file, cmd, arg);
}
EXPORT_SYMBOL_GPL(security_file_ioctl);
@@ -2740,7 +2773,7 @@ EXPORT_SYMBOL_GPL(security_file_ioctl);
int security_file_ioctl_compat(struct file *file, unsigned int cmd,
unsigned long arg)
{
- return call_int_hook(file_ioctl_compat, 0, file, cmd, arg);
+ return call_int_hook(file_ioctl_compat, file, cmd, arg);
}
EXPORT_SYMBOL_GPL(security_file_ioctl_compat);
@@ -2791,13 +2824,8 @@ static inline unsigned long mmap_prot(struct file *file, unsigned long prot)
int security_mmap_file(struct file *file, unsigned long prot,
unsigned long flags)
{
- unsigned long prot_adj = mmap_prot(file, prot);
- int ret;
-
- ret = call_int_hook(mmap_file, 0, file, prot, prot_adj, flags);
- if (ret)
- return ret;
- return ima_file_mmap(file, prot, prot_adj, flags);
+ return call_int_hook(mmap_file, file, prot, mmap_prot(file, prot),
+ flags);
}
/**
@@ -2810,7 +2838,7 @@ int security_mmap_file(struct file *file, unsigned long prot,
*/
int security_mmap_addr(unsigned long addr)
{
- return call_int_hook(mmap_addr, 0, addr);
+ return call_int_hook(mmap_addr, addr);
}
/**
@@ -2826,12 +2854,7 @@ int security_mmap_addr(unsigned long addr)
int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
unsigned long prot)
{
- int ret;
-
- ret = call_int_hook(file_mprotect, 0, vma, reqprot, prot);
- if (ret)
- return ret;
- return ima_file_mprotect(vma, prot);
+ return call_int_hook(file_mprotect, vma, reqprot, prot);
}
/**
@@ -2846,7 +2869,7 @@ int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
*/
int security_file_lock(struct file *file, unsigned int cmd)
{
- return call_int_hook(file_lock, 0, file, cmd);
+ return call_int_hook(file_lock, file, cmd);
}
/**
@@ -2865,7 +2888,7 @@ int security_file_lock(struct file *file, unsigned int cmd)
*/
int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
{
- return call_int_hook(file_fcntl, 0, file, cmd, arg);
+ return call_int_hook(file_fcntl, file, cmd, arg);
}
/**
@@ -2899,11 +2922,11 @@ void security_file_set_fowner(struct file *file)
int security_file_send_sigiotask(struct task_struct *tsk,
struct fown_struct *fown, int sig)
{
- return call_int_hook(file_send_sigiotask, 0, tsk, fown, sig);
+ return call_int_hook(file_send_sigiotask, tsk, fown, sig);
}
/**
- * security_file_receive() - Check is receiving a file via IPC is allowed
+ * security_file_receive() - Check if receiving a file via IPC is allowed
* @file: file being received
*
* This hook allows security modules to control the ability of a process to
@@ -2913,7 +2936,7 @@ int security_file_send_sigiotask(struct task_struct *tsk,
*/
int security_file_receive(struct file *file)
{
- return call_int_hook(file_receive, 0, file);
+ return call_int_hook(file_receive, file);
}
/**
@@ -2929,7 +2952,7 @@ int security_file_open(struct file *file)
{
int ret;
- ret = call_int_hook(file_open, 0, file);
+ ret = call_int_hook(file_open, file);
if (ret)
return ret;
@@ -2937,6 +2960,23 @@ int security_file_open(struct file *file)
}
/**
+ * security_file_post_open() - Evaluate a file after it has been opened
+ * @file: the file
+ * @mask: access mask
+ *
+ * Evaluate an opened file and the access mask requested with open(). The hook
+ * is useful for LSMs that require the file content to be available in order to
+ * make decisions.
+ *
+ * Return: Returns 0 if permission is granted.
+ */
+int security_file_post_open(struct file *file, int mask)
+{
+ return call_int_hook(file_post_open, file, mask);
+}
+EXPORT_SYMBOL_GPL(security_file_post_open);
+
+/**
* security_file_truncate() - Check if truncating a file is allowed
* @file: file
*
@@ -2948,7 +2988,7 @@ int security_file_open(struct file *file)
*/
int security_file_truncate(struct file *file)
{
- return call_int_hook(file_truncate, 0, file);
+ return call_int_hook(file_truncate, file);
}
/**
@@ -2966,7 +3006,7 @@ int security_task_alloc(struct task_struct *task, unsigned long clone_flags)
if (rc)
return rc;
- rc = call_int_hook(task_alloc, 0, task, clone_flags);
+ rc = call_int_hook(task_alloc, task, clone_flags);
if (unlikely(rc))
security_task_free(task);
return rc;
@@ -3004,7 +3044,7 @@ int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
if (rc)
return rc;
- rc = call_int_hook(cred_alloc_blank, 0, cred, gfp);
+ rc = call_int_hook(cred_alloc_blank, cred, gfp);
if (unlikely(rc))
security_cred_free(cred);
return rc;
@@ -3048,7 +3088,7 @@ int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp)
if (rc)
return rc;
- rc = call_int_hook(cred_prepare, 0, new, old, gfp);
+ rc = call_int_hook(cred_prepare, new, old, gfp);
if (unlikely(rc))
security_cred_free(new);
return rc;
@@ -3093,7 +3133,7 @@ EXPORT_SYMBOL(security_cred_getsecid);
*/
int security_kernel_act_as(struct cred *new, u32 secid)
{
- return call_int_hook(kernel_act_as, 0, new, secid);
+ return call_int_hook(kernel_act_as, new, secid);
}
/**
@@ -3109,11 +3149,11 @@ int security_kernel_act_as(struct cred *new, u32 secid)
*/
int security_kernel_create_files_as(struct cred *new, struct inode *inode)
{
- return call_int_hook(kernel_create_files_as, 0, new, inode);
+ return call_int_hook(kernel_create_files_as, new, inode);
}
/**
- * security_kernel_module_request() - Check is loading a module is allowed
+ * security_kernel_module_request() - Check if loading a module is allowed
* @kmod_name: module name
*
* Ability to trigger the kernel to automatically upcall to userspace for
@@ -3123,12 +3163,7 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
*/
int security_kernel_module_request(char *kmod_name)
{
- int ret;
-
- ret = call_int_hook(kernel_module_request, 0, kmod_name);
- if (ret)
- return ret;
- return integrity_kernel_module_request(kmod_name);
+ return call_int_hook(kernel_module_request, kmod_name);
}
/**
@@ -3144,12 +3179,7 @@ int security_kernel_module_request(char *kmod_name)
int security_kernel_read_file(struct file *file, enum kernel_read_file_id id,
bool contents)
{
- int ret;
-
- ret = call_int_hook(kernel_read_file, 0, file, id, contents);
- if (ret)
- return ret;
- return ima_read_file(file, id, contents);
+ return call_int_hook(kernel_read_file, file, id, contents);
}
EXPORT_SYMBOL_GPL(security_kernel_read_file);
@@ -3169,12 +3199,7 @@ EXPORT_SYMBOL_GPL(security_kernel_read_file);
int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
enum kernel_read_file_id id)
{
- int ret;
-
- ret = call_int_hook(kernel_post_read_file, 0, file, buf, size, id);
- if (ret)
- return ret;
- return ima_post_read_file(file, buf, size, id);
+ return call_int_hook(kernel_post_read_file, file, buf, size, id);
}
EXPORT_SYMBOL_GPL(security_kernel_post_read_file);
@@ -3189,12 +3214,7 @@ EXPORT_SYMBOL_GPL(security_kernel_post_read_file);
*/
int security_kernel_load_data(enum kernel_load_data_id id, bool contents)
{
- int ret;
-
- ret = call_int_hook(kernel_load_data, 0, id, contents);
- if (ret)
- return ret;
- return ima_load_data(id, contents);
+ return call_int_hook(kernel_load_data, id, contents);
}
EXPORT_SYMBOL_GPL(security_kernel_load_data);
@@ -3216,13 +3236,7 @@ int security_kernel_post_load_data(char *buf, loff_t size,
enum kernel_load_data_id id,
char *description)
{
- int ret;
-
- ret = call_int_hook(kernel_post_load_data, 0, buf, size, id,
- description);
- if (ret)
- return ret;
- return ima_post_load_data(buf, size, id, description);
+ return call_int_hook(kernel_post_load_data, buf, size, id, description);
}
EXPORT_SYMBOL_GPL(security_kernel_post_load_data);
@@ -3243,7 +3257,7 @@ EXPORT_SYMBOL_GPL(security_kernel_post_load_data);
int security_task_fix_setuid(struct cred *new, const struct cred *old,
int flags)
{
- return call_int_hook(task_fix_setuid, 0, new, old, flags);
+ return call_int_hook(task_fix_setuid, new, old, flags);
}
/**
@@ -3263,7 +3277,7 @@ int security_task_fix_setuid(struct cred *new, const struct cred *old,
int security_task_fix_setgid(struct cred *new, const struct cred *old,
int flags)
{
- return call_int_hook(task_fix_setgid, 0, new, old, flags);
+ return call_int_hook(task_fix_setgid, new, old, flags);
}
/**
@@ -3280,7 +3294,7 @@ int security_task_fix_setgid(struct cred *new, const struct cred *old,
*/
int security_task_fix_setgroups(struct cred *new, const struct cred *old)
{
- return call_int_hook(task_fix_setgroups, 0, new, old);
+ return call_int_hook(task_fix_setgroups, new, old);
}
/**
@@ -3295,7 +3309,7 @@ int security_task_fix_setgroups(struct cred *new, const struct cred *old)
*/
int security_task_setpgid(struct task_struct *p, pid_t pgid)
{
- return call_int_hook(task_setpgid, 0, p, pgid);
+ return call_int_hook(task_setpgid, p, pgid);
}
/**
@@ -3309,7 +3323,7 @@ int security_task_setpgid(struct task_struct *p, pid_t pgid)
*/
int security_task_getpgid(struct task_struct *p)
{
- return call_int_hook(task_getpgid, 0, p);
+ return call_int_hook(task_getpgid, p);
}
/**
@@ -3322,7 +3336,7 @@ int security_task_getpgid(struct task_struct *p)
*/
int security_task_getsid(struct task_struct *p)
{
- return call_int_hook(task_getsid, 0, p);
+ return call_int_hook(task_getsid, p);
}
/**
@@ -3365,7 +3379,7 @@ EXPORT_SYMBOL(security_task_getsecid_obj);
*/
int security_task_setnice(struct task_struct *p, int nice)
{
- return call_int_hook(task_setnice, 0, p, nice);
+ return call_int_hook(task_setnice, p, nice);
}
/**
@@ -3379,7 +3393,7 @@ int security_task_setnice(struct task_struct *p, int nice)
*/
int security_task_setioprio(struct task_struct *p, int ioprio)
{
- return call_int_hook(task_setioprio, 0, p, ioprio);
+ return call_int_hook(task_setioprio, p, ioprio);
}
/**
@@ -3392,7 +3406,7 @@ int security_task_setioprio(struct task_struct *p, int ioprio)
*/
int security_task_getioprio(struct task_struct *p)
{
- return call_int_hook(task_getioprio, 0, p);
+ return call_int_hook(task_getioprio, p);
}
/**
@@ -3409,7 +3423,7 @@ int security_task_getioprio(struct task_struct *p)
int security_task_prlimit(const struct cred *cred, const struct cred *tcred,
unsigned int flags)
{
- return call_int_hook(task_prlimit, 0, cred, tcred, flags);
+ return call_int_hook(task_prlimit, cred, tcred, flags);
}
/**
@@ -3427,7 +3441,7 @@ int security_task_prlimit(const struct cred *cred, const struct cred *tcred,
int security_task_setrlimit(struct task_struct *p, unsigned int resource,
struct rlimit *new_rlim)
{
- return call_int_hook(task_setrlimit, 0, p, resource, new_rlim);
+ return call_int_hook(task_setrlimit, p, resource, new_rlim);
}
/**
@@ -3441,7 +3455,7 @@ int security_task_setrlimit(struct task_struct *p, unsigned int resource,
*/
int security_task_setscheduler(struct task_struct *p)
{
- return call_int_hook(task_setscheduler, 0, p);
+ return call_int_hook(task_setscheduler, p);
}
/**
@@ -3454,7 +3468,7 @@ int security_task_setscheduler(struct task_struct *p)
*/
int security_task_getscheduler(struct task_struct *p)
{
- return call_int_hook(task_getscheduler, 0, p);
+ return call_int_hook(task_getscheduler, p);
}
/**
@@ -3467,7 +3481,7 @@ int security_task_getscheduler(struct task_struct *p)
*/
int security_task_movememory(struct task_struct *p)
{
- return call_int_hook(task_movememory, 0, p);
+ return call_int_hook(task_movememory, p);
}
/**
@@ -3488,7 +3502,7 @@ int security_task_movememory(struct task_struct *p)
int security_task_kill(struct task_struct *p, struct kernel_siginfo *info,
int sig, const struct cred *cred)
{
- return call_int_hook(task_kill, 0, p, info, sig, cred);
+ return call_int_hook(task_kill, p, info, sig, cred);
}
/**
@@ -3546,7 +3560,7 @@ void security_task_to_inode(struct task_struct *p, struct inode *inode)
*/
int security_create_user_ns(const struct cred *cred)
{
- return call_int_hook(userns_create, 0, cred);
+ return call_int_hook(userns_create, cred);
}
/**
@@ -3560,7 +3574,7 @@ int security_create_user_ns(const struct cred *cred)
*/
int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag)
{
- return call_int_hook(ipc_permission, 0, ipcp, flag);
+ return call_int_hook(ipc_permission, ipcp, flag);
}
/**
@@ -3592,7 +3606,7 @@ int security_msg_msg_alloc(struct msg_msg *msg)
if (unlikely(rc))
return rc;
- rc = call_int_hook(msg_msg_alloc_security, 0, msg);
+ rc = call_int_hook(msg_msg_alloc_security, msg);
if (unlikely(rc))
security_msg_msg_free(msg);
return rc;
@@ -3626,7 +3640,7 @@ int security_msg_queue_alloc(struct kern_ipc_perm *msq)
if (unlikely(rc))
return rc;
- rc = call_int_hook(msg_queue_alloc_security, 0, msq);
+ rc = call_int_hook(msg_queue_alloc_security, msq);
if (unlikely(rc))
security_msg_queue_free(msq);
return rc;
@@ -3658,7 +3672,7 @@ void security_msg_queue_free(struct kern_ipc_perm *msq)
*/
int security_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg)
{
- return call_int_hook(msg_queue_associate, 0, msq, msqflg);
+ return call_int_hook(msg_queue_associate, msq, msqflg);
}
/**
@@ -3673,7 +3687,7 @@ int security_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg)
*/
int security_msg_queue_msgctl(struct kern_ipc_perm *msq, int cmd)
{
- return call_int_hook(msg_queue_msgctl, 0, msq, cmd);
+ return call_int_hook(msg_queue_msgctl, msq, cmd);
}
/**
@@ -3690,7 +3704,7 @@ int security_msg_queue_msgctl(struct kern_ipc_perm *msq, int cmd)
int security_msg_queue_msgsnd(struct kern_ipc_perm *msq,
struct msg_msg *msg, int msqflg)
{
- return call_int_hook(msg_queue_msgsnd, 0, msq, msg, msqflg);
+ return call_int_hook(msg_queue_msgsnd, msq, msg, msqflg);
}
/**
@@ -3711,7 +3725,7 @@ int security_msg_queue_msgsnd(struct kern_ipc_perm *msq,
int security_msg_queue_msgrcv(struct kern_ipc_perm *msq, struct msg_msg *msg,
struct task_struct *target, long type, int mode)
{
- return call_int_hook(msg_queue_msgrcv, 0, msq, msg, target, type, mode);
+ return call_int_hook(msg_queue_msgrcv, msq, msg, target, type, mode);
}
/**
@@ -3729,7 +3743,7 @@ int security_shm_alloc(struct kern_ipc_perm *shp)
if (unlikely(rc))
return rc;
- rc = call_int_hook(shm_alloc_security, 0, shp);
+ rc = call_int_hook(shm_alloc_security, shp);
if (unlikely(rc))
security_shm_free(shp);
return rc;
@@ -3762,7 +3776,7 @@ void security_shm_free(struct kern_ipc_perm *shp)
*/
int security_shm_associate(struct kern_ipc_perm *shp, int shmflg)
{
- return call_int_hook(shm_associate, 0, shp, shmflg);
+ return call_int_hook(shm_associate, shp, shmflg);
}
/**
@@ -3777,7 +3791,7 @@ int security_shm_associate(struct kern_ipc_perm *shp, int shmflg)
*/
int security_shm_shmctl(struct kern_ipc_perm *shp, int cmd)
{
- return call_int_hook(shm_shmctl, 0, shp, cmd);
+ return call_int_hook(shm_shmctl, shp, cmd);
}
/**
@@ -3795,7 +3809,7 @@ int security_shm_shmctl(struct kern_ipc_perm *shp, int cmd)
int security_shm_shmat(struct kern_ipc_perm *shp,
char __user *shmaddr, int shmflg)
{
- return call_int_hook(shm_shmat, 0, shp, shmaddr, shmflg);
+ return call_int_hook(shm_shmat, shp, shmaddr, shmflg);
}
/**
@@ -3813,7 +3827,7 @@ int security_sem_alloc(struct kern_ipc_perm *sma)
if (unlikely(rc))
return rc;
- rc = call_int_hook(sem_alloc_security, 0, sma);
+ rc = call_int_hook(sem_alloc_security, sma);
if (unlikely(rc))
security_sem_free(sma);
return rc;
@@ -3845,7 +3859,7 @@ void security_sem_free(struct kern_ipc_perm *sma)
*/
int security_sem_associate(struct kern_ipc_perm *sma, int semflg)
{
- return call_int_hook(sem_associate, 0, sma, semflg);
+ return call_int_hook(sem_associate, sma, semflg);
}
/**
@@ -3860,7 +3874,7 @@ int security_sem_associate(struct kern_ipc_perm *sma, int semflg)
*/
int security_sem_semctl(struct kern_ipc_perm *sma, int cmd)
{
- return call_int_hook(sem_semctl, 0, sma, cmd);
+ return call_int_hook(sem_semctl, sma, cmd);
}
/**
@@ -3878,7 +3892,7 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd)
int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops,
unsigned nsops, int alter)
{
- return call_int_hook(sem_semop, 0, sma, sops, nsops, alter);
+ return call_int_hook(sem_semop, sma, sops, nsops, alter);
}
/**
@@ -4015,6 +4029,7 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
struct security_hook_list *hp;
struct lsm_ctx *lctx;
int rc = LSM_RET_DEFAULT(setselfattr);
+ u64 required_len;
if (flags)
return -EINVAL;
@@ -4027,8 +4042,9 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
if (IS_ERR(lctx))
return PTR_ERR(lctx);
- if (size < lctx->len || size < lctx->ctx_len + sizeof(*lctx) ||
- lctx->len < lctx->ctx_len + sizeof(*lctx)) {
+ if (size < lctx->len ||
+ check_add_overflow(sizeof(*lctx), lctx->ctx_len, &required_len) ||
+ lctx->len < required_len) {
rc = -EINVAL;
goto free_out;
}
@@ -4107,11 +4123,11 @@ int security_setprocattr(int lsmid, const char *name, void *value, size_t size)
*/
int security_netlink_send(struct sock *sk, struct sk_buff *skb)
{
- return call_int_hook(netlink_send, 0, sk, skb);
+ return call_int_hook(netlink_send, sk, skb);
}
/**
- * security_ismaclabel() - Check is the named attribute is a MAC label
+ * security_ismaclabel() - Check if the named attribute is a MAC label
* @name: full extended attribute name
*
* Check if the extended attribute specified by @name represents a MAC label.
@@ -4120,7 +4136,7 @@ int security_netlink_send(struct sock *sk, struct sk_buff *skb)
*/
int security_ismaclabel(const char *name)
{
- return call_int_hook(ismaclabel, 0, name);
+ return call_int_hook(ismaclabel, name);
}
EXPORT_SYMBOL(security_ismaclabel);
@@ -4139,20 +4155,7 @@ EXPORT_SYMBOL(security_ismaclabel);
*/
int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
{
- struct security_hook_list *hp;
- int rc;
-
- /*
- * Currently, only one LSM can implement secid_to_secctx (i.e this
- * LSM hook is not "stackable").
- */
- hlist_for_each_entry(hp, &security_hook_heads.secid_to_secctx, list) {
- rc = hp->hook.secid_to_secctx(secid, secdata, seclen);
- if (rc != LSM_RET_DEFAULT(secid_to_secctx))
- return rc;
- }
-
- return LSM_RET_DEFAULT(secid_to_secctx);
+ return call_int_hook(secid_to_secctx, secid, secdata, seclen);
}
EXPORT_SYMBOL(security_secid_to_secctx);
@@ -4169,7 +4172,7 @@ EXPORT_SYMBOL(security_secid_to_secctx);
int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid)
{
*secid = 0;
- return call_int_hook(secctx_to_secid, 0, secdata, seclen, secid);
+ return call_int_hook(secctx_to_secid, secdata, seclen, secid);
}
EXPORT_SYMBOL(security_secctx_to_secid);
@@ -4216,7 +4219,7 @@ EXPORT_SYMBOL(security_inode_invalidate_secctx);
*/
int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
{
- return call_int_hook(inode_notifysecctx, 0, inode, ctx, ctxlen);
+ return call_int_hook(inode_notifysecctx, inode, ctx, ctxlen);
}
EXPORT_SYMBOL(security_inode_notifysecctx);
@@ -4238,7 +4241,7 @@ EXPORT_SYMBOL(security_inode_notifysecctx);
*/
int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
{
- return call_int_hook(inode_setsecctx, 0, dentry, ctx, ctxlen);
+ return call_int_hook(inode_setsecctx, dentry, ctx, ctxlen);
}
EXPORT_SYMBOL(security_inode_setsecctx);
@@ -4255,7 +4258,7 @@ EXPORT_SYMBOL(security_inode_setsecctx);
*/
int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
{
- return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen);
+ return call_int_hook(inode_getsecctx, inode, ctx, ctxlen);
}
EXPORT_SYMBOL(security_inode_getsecctx);
@@ -4274,7 +4277,7 @@ int security_post_notification(const struct cred *w_cred,
const struct cred *cred,
struct watch_notification *n)
{
- return call_int_hook(post_notification, 0, w_cred, cred, n);
+ return call_int_hook(post_notification, w_cred, cred, n);
}
#endif /* CONFIG_WATCH_QUEUE */
@@ -4290,7 +4293,7 @@ int security_post_notification(const struct cred *w_cred,
*/
int security_watch_key(struct key *key)
{
- return call_int_hook(watch_key, 0, key);
+ return call_int_hook(watch_key, key);
}
#endif /* CONFIG_KEY_NOTIFICATIONS */
@@ -4319,7 +4322,7 @@ int security_watch_key(struct key *key)
int security_unix_stream_connect(struct sock *sock, struct sock *other,
struct sock *newsk)
{
- return call_int_hook(unix_stream_connect, 0, sock, other, newsk);
+ return call_int_hook(unix_stream_connect, sock, other, newsk);
}
EXPORT_SYMBOL(security_unix_stream_connect);
@@ -4345,7 +4348,7 @@ EXPORT_SYMBOL(security_unix_stream_connect);
*/
int security_unix_may_send(struct socket *sock, struct socket *other)
{
- return call_int_hook(unix_may_send, 0, sock, other);
+ return call_int_hook(unix_may_send, sock, other);
}
EXPORT_SYMBOL(security_unix_may_send);
@@ -4362,7 +4365,7 @@ EXPORT_SYMBOL(security_unix_may_send);
*/
int security_socket_create(int family, int type, int protocol, int kern)
{
- return call_int_hook(socket_create, 0, family, type, protocol, kern);
+ return call_int_hook(socket_create, family, type, protocol, kern);
}
/**
@@ -4386,7 +4389,7 @@ int security_socket_create(int family, int type, int protocol, int kern)
int security_socket_post_create(struct socket *sock, int family,
int type, int protocol, int kern)
{
- return call_int_hook(socket_post_create, 0, sock, family, type,
+ return call_int_hook(socket_post_create, sock, family, type,
protocol, kern);
}
@@ -4402,7 +4405,7 @@ int security_socket_post_create(struct socket *sock, int family,
*/
int security_socket_socketpair(struct socket *socka, struct socket *sockb)
{
- return call_int_hook(socket_socketpair, 0, socka, sockb);
+ return call_int_hook(socket_socketpair, socka, sockb);
}
EXPORT_SYMBOL(security_socket_socketpair);
@@ -4421,7 +4424,7 @@ EXPORT_SYMBOL(security_socket_socketpair);
int security_socket_bind(struct socket *sock,
struct sockaddr *address, int addrlen)
{
- return call_int_hook(socket_bind, 0, sock, address, addrlen);
+ return call_int_hook(socket_bind, sock, address, addrlen);
}
/**
@@ -4438,7 +4441,7 @@ int security_socket_bind(struct socket *sock,
int security_socket_connect(struct socket *sock,
struct sockaddr *address, int addrlen)
{
- return call_int_hook(socket_connect, 0, sock, address, addrlen);
+ return call_int_hook(socket_connect, sock, address, addrlen);
}
/**
@@ -4452,7 +4455,7 @@ int security_socket_connect(struct socket *sock,
*/
int security_socket_listen(struct socket *sock, int backlog)
{
- return call_int_hook(socket_listen, 0, sock, backlog);
+ return call_int_hook(socket_listen, sock, backlog);
}
/**
@@ -4468,11 +4471,11 @@ int security_socket_listen(struct socket *sock, int backlog)
*/
int security_socket_accept(struct socket *sock, struct socket *newsock)
{
- return call_int_hook(socket_accept, 0, sock, newsock);
+ return call_int_hook(socket_accept, sock, newsock);
}
/**
- * security_socket_sendmsg() - Check is sending a message is allowed
+ * security_socket_sendmsg() - Check if sending a message is allowed
* @sock: sending socket
* @msg: message to send
* @size: size of message
@@ -4483,7 +4486,7 @@ int security_socket_accept(struct socket *sock, struct socket *newsock)
*/
int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size)
{
- return call_int_hook(socket_sendmsg, 0, sock, msg, size);
+ return call_int_hook(socket_sendmsg, sock, msg, size);
}
/**
@@ -4500,7 +4503,7 @@ int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size)
int security_socket_recvmsg(struct socket *sock, struct msghdr *msg,
int size, int flags)
{
- return call_int_hook(socket_recvmsg, 0, sock, msg, size, flags);
+ return call_int_hook(socket_recvmsg, sock, msg, size, flags);
}
/**
@@ -4514,7 +4517,7 @@ int security_socket_recvmsg(struct socket *sock, struct msghdr *msg,
*/
int security_socket_getsockname(struct socket *sock)
{
- return call_int_hook(socket_getsockname, 0, sock);
+ return call_int_hook(socket_getsockname, sock);
}
/**
@@ -4527,7 +4530,7 @@ int security_socket_getsockname(struct socket *sock)
*/
int security_socket_getpeername(struct socket *sock)
{
- return call_int_hook(socket_getpeername, 0, sock);
+ return call_int_hook(socket_getpeername, sock);
}
/**
@@ -4543,7 +4546,7 @@ int security_socket_getpeername(struct socket *sock)
*/
int security_socket_getsockopt(struct socket *sock, int level, int optname)
{
- return call_int_hook(socket_getsockopt, 0, sock, level, optname);
+ return call_int_hook(socket_getsockopt, sock, level, optname);
}
/**
@@ -4558,7 +4561,7 @@ int security_socket_getsockopt(struct socket *sock, int level, int optname)
*/
int security_socket_setsockopt(struct socket *sock, int level, int optname)
{
- return call_int_hook(socket_setsockopt, 0, sock, level, optname);
+ return call_int_hook(socket_setsockopt, sock, level, optname);
}
/**
@@ -4573,7 +4576,7 @@ int security_socket_setsockopt(struct socket *sock, int level, int optname)
*/
int security_socket_shutdown(struct socket *sock, int how)
{
- return call_int_hook(socket_shutdown, 0, sock, how);
+ return call_int_hook(socket_shutdown, sock, how);
}
/**
@@ -4590,7 +4593,7 @@ int security_socket_shutdown(struct socket *sock, int how)
*/
int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
- return call_int_hook(socket_sock_rcv_skb, 0, sk, skb);
+ return call_int_hook(socket_sock_rcv_skb, sk, skb);
}
EXPORT_SYMBOL(security_sock_rcv_skb);
@@ -4612,8 +4615,8 @@ EXPORT_SYMBOL(security_sock_rcv_skb);
int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
sockptr_t optlen, unsigned int len)
{
- return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock,
- optval, optlen, len);
+ return call_int_hook(socket_getpeersec_stream, sock, optval, optlen,
+ len);
}
/**
@@ -4633,8 +4636,7 @@ int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
int security_socket_getpeersec_dgram(struct socket *sock,
struct sk_buff *skb, u32 *secid)
{
- return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock,
- skb, secid);
+ return call_int_hook(socket_getpeersec_dgram, sock, skb, secid);
}
EXPORT_SYMBOL(security_socket_getpeersec_dgram);
@@ -4651,7 +4653,7 @@ EXPORT_SYMBOL(security_socket_getpeersec_dgram);
*/
int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
{
- return call_int_hook(sk_alloc_security, 0, sk, family, priority);
+ return call_int_hook(sk_alloc_security, sk, family, priority);
}
/**
@@ -4732,7 +4734,7 @@ EXPORT_SYMBOL(security_sock_graft);
int security_inet_conn_request(const struct sock *sk,
struct sk_buff *skb, struct request_sock *req)
{
- return call_int_hook(inet_conn_request, 0, sk, skb, req);
+ return call_int_hook(inet_conn_request, sk, skb, req);
}
EXPORT_SYMBOL(security_inet_conn_request);
@@ -4773,7 +4775,7 @@ EXPORT_SYMBOL(security_inet_conn_established);
*/
int security_secmark_relabel_packet(u32 secid)
{
- return call_int_hook(secmark_relabel_packet, 0, secid);
+ return call_int_hook(secmark_relabel_packet, secid);
}
EXPORT_SYMBOL(security_secmark_relabel_packet);
@@ -4810,7 +4812,7 @@ EXPORT_SYMBOL(security_secmark_refcount_dec);
*/
int security_tun_dev_alloc_security(void **security)
{
- return call_int_hook(tun_dev_alloc_security, 0, security);
+ return call_int_hook(tun_dev_alloc_security, security);
}
EXPORT_SYMBOL(security_tun_dev_alloc_security);
@@ -4835,7 +4837,7 @@ EXPORT_SYMBOL(security_tun_dev_free_security);
*/
int security_tun_dev_create(void)
{
- return call_int_hook(tun_dev_create, 0);
+ return call_int_hook(tun_dev_create);
}
EXPORT_SYMBOL(security_tun_dev_create);
@@ -4849,7 +4851,7 @@ EXPORT_SYMBOL(security_tun_dev_create);
*/
int security_tun_dev_attach_queue(void *security)
{
- return call_int_hook(tun_dev_attach_queue, 0, security);
+ return call_int_hook(tun_dev_attach_queue, security);
}
EXPORT_SYMBOL(security_tun_dev_attach_queue);
@@ -4865,7 +4867,7 @@ EXPORT_SYMBOL(security_tun_dev_attach_queue);
*/
int security_tun_dev_attach(struct sock *sk, void *security)
{
- return call_int_hook(tun_dev_attach, 0, sk, security);
+ return call_int_hook(tun_dev_attach, sk, security);
}
EXPORT_SYMBOL(security_tun_dev_attach);
@@ -4880,7 +4882,7 @@ EXPORT_SYMBOL(security_tun_dev_attach);
*/
int security_tun_dev_open(void *security)
{
- return call_int_hook(tun_dev_open, 0, security);
+ return call_int_hook(tun_dev_open, security);
}
EXPORT_SYMBOL(security_tun_dev_open);
@@ -4896,7 +4898,7 @@ EXPORT_SYMBOL(security_tun_dev_open);
int security_sctp_assoc_request(struct sctp_association *asoc,
struct sk_buff *skb)
{
- return call_int_hook(sctp_assoc_request, 0, asoc, skb);
+ return call_int_hook(sctp_assoc_request, asoc, skb);
}
EXPORT_SYMBOL(security_sctp_assoc_request);
@@ -4917,8 +4919,7 @@ EXPORT_SYMBOL(security_sctp_assoc_request);
int security_sctp_bind_connect(struct sock *sk, int optname,
struct sockaddr *address, int addrlen)
{
- return call_int_hook(sctp_bind_connect, 0, sk, optname,
- address, addrlen);
+ return call_int_hook(sctp_bind_connect, sk, optname, address, addrlen);
}
EXPORT_SYMBOL(security_sctp_bind_connect);
@@ -4952,7 +4953,7 @@ EXPORT_SYMBOL(security_sctp_sk_clone);
int security_sctp_assoc_established(struct sctp_association *asoc,
struct sk_buff *skb)
{
- return call_int_hook(sctp_assoc_established, 0, asoc, skb);
+ return call_int_hook(sctp_assoc_established, asoc, skb);
}
EXPORT_SYMBOL(security_sctp_assoc_established);
@@ -4970,7 +4971,7 @@ EXPORT_SYMBOL(security_sctp_assoc_established);
*/
int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
{
- return call_int_hook(mptcp_add_subflow, 0, sk, ssk);
+ return call_int_hook(mptcp_add_subflow, sk, ssk);
}
#endif /* CONFIG_SECURITY_NETWORK */
@@ -4988,7 +4989,7 @@ int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
*/
int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey)
{
- return call_int_hook(ib_pkey_access, 0, sec, subnet_prefix, pkey);
+ return call_int_hook(ib_pkey_access, sec, subnet_prefix, pkey);
}
EXPORT_SYMBOL(security_ib_pkey_access);
@@ -5005,8 +5006,7 @@ EXPORT_SYMBOL(security_ib_pkey_access);
int security_ib_endport_manage_subnet(void *sec,
const char *dev_name, u8 port_num)
{
- return call_int_hook(ib_endport_manage_subnet, 0, sec,
- dev_name, port_num);
+ return call_int_hook(ib_endport_manage_subnet, sec, dev_name, port_num);
}
EXPORT_SYMBOL(security_ib_endport_manage_subnet);
@@ -5020,7 +5020,7 @@ EXPORT_SYMBOL(security_ib_endport_manage_subnet);
*/
int security_ib_alloc_security(void **sec)
{
- return call_int_hook(ib_alloc_security, 0, sec);
+ return call_int_hook(ib_alloc_security, sec);
}
EXPORT_SYMBOL(security_ib_alloc_security);
@@ -5053,7 +5053,7 @@ int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
struct xfrm_user_sec_ctx *sec_ctx,
gfp_t gfp)
{
- return call_int_hook(xfrm_policy_alloc_security, 0, ctxp, sec_ctx, gfp);
+ return call_int_hook(xfrm_policy_alloc_security, ctxp, sec_ctx, gfp);
}
EXPORT_SYMBOL(security_xfrm_policy_alloc);
@@ -5070,7 +5070,7 @@ EXPORT_SYMBOL(security_xfrm_policy_alloc);
int security_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
struct xfrm_sec_ctx **new_ctxp)
{
- return call_int_hook(xfrm_policy_clone_security, 0, old_ctx, new_ctxp);
+ return call_int_hook(xfrm_policy_clone_security, old_ctx, new_ctxp);
}
/**
@@ -5095,7 +5095,7 @@ EXPORT_SYMBOL(security_xfrm_policy_free);
*/
int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
{
- return call_int_hook(xfrm_policy_delete_security, 0, ctx);
+ return call_int_hook(xfrm_policy_delete_security, ctx);
}
/**
@@ -5112,7 +5112,7 @@ int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
int security_xfrm_state_alloc(struct xfrm_state *x,
struct xfrm_user_sec_ctx *sec_ctx)
{
- return call_int_hook(xfrm_state_alloc, 0, x, sec_ctx);
+ return call_int_hook(xfrm_state_alloc, x, sec_ctx);
}
EXPORT_SYMBOL(security_xfrm_state_alloc);
@@ -5131,7 +5131,7 @@ EXPORT_SYMBOL(security_xfrm_state_alloc);
int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
struct xfrm_sec_ctx *polsec, u32 secid)
{
- return call_int_hook(xfrm_state_alloc_acquire, 0, x, polsec, secid);
+ return call_int_hook(xfrm_state_alloc_acquire, x, polsec, secid);
}
/**
@@ -5144,7 +5144,7 @@ int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
*/
int security_xfrm_state_delete(struct xfrm_state *x)
{
- return call_int_hook(xfrm_state_delete_security, 0, x);
+ return call_int_hook(xfrm_state_delete_security, x);
}
EXPORT_SYMBOL(security_xfrm_state_delete);
@@ -5173,7 +5173,7 @@ void security_xfrm_state_free(struct xfrm_state *x)
*/
int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid)
{
- return call_int_hook(xfrm_policy_lookup, 0, ctx, fl_secid);
+ return call_int_hook(xfrm_policy_lookup, ctx, fl_secid);
}
/**
@@ -5221,12 +5221,12 @@ int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
*/
int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
{
- return call_int_hook(xfrm_decode_session, 0, skb, secid, 1);
+ return call_int_hook(xfrm_decode_session, skb, secid, 1);
}
void security_skb_classify_flow(struct sk_buff *skb, struct flowi_common *flic)
{
- int rc = call_int_hook(xfrm_decode_session, 0, skb, &flic->flowic_secid,
+ int rc = call_int_hook(xfrm_decode_session, skb, &flic->flowic_secid,
0);
BUG_ON(rc);
@@ -5249,7 +5249,7 @@ EXPORT_SYMBOL(security_skb_classify_flow);
int security_key_alloc(struct key *key, const struct cred *cred,
unsigned long flags)
{
- return call_int_hook(key_alloc, 0, key, cred, flags);
+ return call_int_hook(key_alloc, key, cred, flags);
}
/**
@@ -5276,7 +5276,7 @@ void security_key_free(struct key *key)
int security_key_permission(key_ref_t key_ref, const struct cred *cred,
enum key_need_perm need_perm)
{
- return call_int_hook(key_permission, 0, key_ref, cred, need_perm);
+ return call_int_hook(key_permission, key_ref, cred, need_perm);
}
/**
@@ -5295,7 +5295,26 @@ int security_key_permission(key_ref_t key_ref, const struct cred *cred,
int security_key_getsecurity(struct key *key, char **buffer)
{
*buffer = NULL;
- return call_int_hook(key_getsecurity, 0, key, buffer);
+ return call_int_hook(key_getsecurity, key, buffer);
+}
+
+/**
+ * security_key_post_create_or_update() - Notification of key create or update
+ * @keyring: keyring to which the key is linked to
+ * @key: created or updated key
+ * @payload: data used to instantiate or update the key
+ * @payload_len: length of payload
+ * @flags: key flags
+ * @create: flag indicating whether the key was created or updated
+ *
+ * Notify the caller of a key creation or update.
+ */
+void security_key_post_create_or_update(struct key *keyring, struct key *key,
+ const void *payload, size_t payload_len,
+ unsigned long flags, bool create)
+{
+ call_void_hook(key_post_create_or_update, keyring, key, payload,
+ payload_len, flags, create);
}
#endif /* CONFIG_KEYS */
@@ -5314,7 +5333,7 @@ int security_key_getsecurity(struct key *key, char **buffer)
*/
int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule)
{
- return call_int_hook(audit_rule_init, 0, field, op, rulestr, lsmrule);
+ return call_int_hook(audit_rule_init, field, op, rulestr, lsmrule);
}
/**
@@ -5328,7 +5347,7 @@ int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule)
*/
int security_audit_rule_known(struct audit_krule *krule)
{
- return call_int_hook(audit_rule_known, 0, krule);
+ return call_int_hook(audit_rule_known, krule);
}
/**
@@ -5358,7 +5377,7 @@ void security_audit_rule_free(void *lsmrule)
*/
int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule)
{
- return call_int_hook(audit_rule_match, 0, secid, field, op, lsmrule);
+ return call_int_hook(audit_rule_match, secid, field, op, lsmrule);
}
#endif /* CONFIG_AUDIT */
@@ -5377,7 +5396,7 @@ int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule)
*/
int security_bpf(int cmd, union bpf_attr *attr, unsigned int size)
{
- return call_int_hook(bpf, 0, cmd, attr, size);
+ return call_int_hook(bpf, cmd, attr, size);
}
/**
@@ -5392,7 +5411,7 @@ int security_bpf(int cmd, union bpf_attr *attr, unsigned int size)
*/
int security_bpf_map(struct bpf_map *map, fmode_t fmode)
{
- return call_int_hook(bpf_map, 0, map, fmode);
+ return call_int_hook(bpf_map, map, fmode);
}
/**
@@ -5406,33 +5425,91 @@ int security_bpf_map(struct bpf_map *map, fmode_t fmode)
*/
int security_bpf_prog(struct bpf_prog *prog)
{
- return call_int_hook(bpf_prog, 0, prog);
+ return call_int_hook(bpf_prog, prog);
}
/**
- * security_bpf_map_alloc() - Allocate a bpf map LSM blob
- * @map: bpf map
+ * security_bpf_map_create() - Check if BPF map creation is allowed
+ * @map: BPF map object
+ * @attr: BPF syscall attributes used to create BPF map
+ * @token: BPF token used to grant user access
*
- * Initialize the security field inside bpf map.
+ * Do a check when the kernel creates a new BPF map. This is also the
+ * point where LSM blob is allocated for LSMs that need them.
*
* Return: Returns 0 on success, error on failure.
*/
-int security_bpf_map_alloc(struct bpf_map *map)
+int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token)
{
- return call_int_hook(bpf_map_alloc_security, 0, map);
+ return call_int_hook(bpf_map_create, map, attr, token);
}
/**
- * security_bpf_prog_alloc() - Allocate a bpf program LSM blob
- * @aux: bpf program aux info struct
+ * security_bpf_prog_load() - Check if loading of BPF program is allowed
+ * @prog: BPF program object
+ * @attr: BPF syscall attributes used to create BPF program
+ * @token: BPF token used to grant user access to BPF subsystem
*
- * Initialize the security field inside bpf program.
+ * Perform an access control check when the kernel loads a BPF program and
+ * allocates associated BPF program object. This hook is also responsible for
+ * allocating any required LSM state for the BPF program.
*
* Return: Returns 0 on success, error on failure.
*/
-int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
+int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token)
{
- return call_int_hook(bpf_prog_alloc_security, 0, aux);
+ return call_int_hook(bpf_prog_load, prog, attr, token);
+}
+
+/**
+ * security_bpf_token_create() - Check if creating of BPF token is allowed
+ * @token: BPF token object
+ * @attr: BPF syscall attributes used to create BPF token
+ * @path: path pointing to BPF FS mount point from which BPF token is created
+ *
+ * Do a check when the kernel instantiates a new BPF token object from BPF FS
+ * instance. This is also the point where LSM blob can be allocated for LSMs.
+ *
+ * Return: Returns 0 on success, error on failure.
+ */
+int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
+ struct path *path)
+{
+ return call_int_hook(bpf_token_create, token, attr, path);
+}
+
+/**
+ * security_bpf_token_cmd() - Check if BPF token is allowed to delegate
+ * requested BPF syscall command
+ * @token: BPF token object
+ * @cmd: BPF syscall command requested to be delegated by BPF token
+ *
+ * Do a check when the kernel decides whether provided BPF token should allow
+ * delegation of requested BPF syscall command.
+ *
+ * Return: Returns 0 on success, error on failure.
+ */
+int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
+{
+ return call_int_hook(bpf_token_cmd, token, cmd);
+}
+
+/**
+ * security_bpf_token_capable() - Check if BPF token is allowed to delegate
+ * requested BPF-related capability
+ * @token: BPF token object
+ * @cap: capabilities requested to be delegated by BPF token
+ *
+ * Do a check when the kernel decides whether provided BPF token should allow
+ * delegation of requested BPF-related capabilities.
+ *
+ * Return: Returns 0 on success, error on failure.
+ */
+int security_bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ return call_int_hook(bpf_token_capable, token, cap);
}
/**
@@ -5443,18 +5520,29 @@ int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
*/
void security_bpf_map_free(struct bpf_map *map)
{
- call_void_hook(bpf_map_free_security, map);
+ call_void_hook(bpf_map_free, map);
+}
+
+/**
+ * security_bpf_prog_free() - Free a BPF program's LSM blob
+ * @prog: BPF program struct
+ *
+ * Clean up the security information stored inside BPF program.
+ */
+void security_bpf_prog_free(struct bpf_prog *prog)
+{
+ call_void_hook(bpf_prog_free, prog);
}
/**
- * security_bpf_prog_free() - Free a bpf program's LSM blob
- * @aux: bpf program aux info struct
+ * security_bpf_token_free() - Free a BPF token's LSM blob
+ * @token: BPF token struct
*
- * Clean up the security information stored inside bpf prog.
+ * Clean up the security information stored inside BPF token.
*/
-void security_bpf_prog_free(struct bpf_prog_aux *aux)
+void security_bpf_token_free(struct bpf_token *token)
{
- call_void_hook(bpf_prog_free_security, aux);
+ call_void_hook(bpf_token_free, token);
}
#endif /* CONFIG_BPF_SYSCALL */
@@ -5469,7 +5557,7 @@ void security_bpf_prog_free(struct bpf_prog_aux *aux)
*/
int security_locked_down(enum lockdown_reason what)
{
- return call_int_hook(locked_down, 0, what);
+ return call_int_hook(locked_down, what);
}
EXPORT_SYMBOL(security_locked_down);
@@ -5485,7 +5573,7 @@ EXPORT_SYMBOL(security_locked_down);
*/
int security_perf_event_open(struct perf_event_attr *attr, int type)
{
- return call_int_hook(perf_event_open, 0, attr, type);
+ return call_int_hook(perf_event_open, attr, type);
}
/**
@@ -5498,7 +5586,7 @@ int security_perf_event_open(struct perf_event_attr *attr, int type)
*/
int security_perf_event_alloc(struct perf_event *event)
{
- return call_int_hook(perf_event_alloc, 0, event);
+ return call_int_hook(perf_event_alloc, event);
}
/**
@@ -5522,7 +5610,7 @@ void security_perf_event_free(struct perf_event *event)
*/
int security_perf_event_read(struct perf_event *event)
{
- return call_int_hook(perf_event_read, 0, event);
+ return call_int_hook(perf_event_read, event);
}
/**
@@ -5535,7 +5623,7 @@ int security_perf_event_read(struct perf_event *event)
*/
int security_perf_event_write(struct perf_event *event)
{
- return call_int_hook(perf_event_write, 0, event);
+ return call_int_hook(perf_event_write, event);
}
#endif /* CONFIG_PERF_EVENTS */
@@ -5551,7 +5639,7 @@ int security_perf_event_write(struct perf_event *event)
*/
int security_uring_override_creds(const struct cred *new)
{
- return call_int_hook(uring_override_creds, 0, new);
+ return call_int_hook(uring_override_creds, new);
}
/**
@@ -5564,7 +5652,7 @@ int security_uring_override_creds(const struct cred *new)
*/
int security_uring_sqpoll(void)
{
- return call_int_hook(uring_sqpoll, 0);
+ return call_int_hook(uring_sqpoll);
}
/**
@@ -5577,6 +5665,6 @@ int security_uring_sqpoll(void)
*/
int security_uring_cmd(struct io_uring_cmd *ioucmd)
{
- return call_int_hook(uring_cmd, 0, ioucmd);
+ return call_int_hook(uring_cmd, ioucmd);
}
#endif /* CONFIG_IO_URING */
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a6bf90ace84c..8db4875164ab 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2920,23 +2920,22 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
struct superblock_security_struct *sbsec;
struct xattr *xattr = lsm_get_xattr_slot(xattrs, xattr_count);
u32 newsid, clen;
+ u16 newsclass;
int rc;
char *context;
sbsec = selinux_superblock(dir->i_sb);
newsid = tsec->create_sid;
-
- rc = selinux_determine_inode_label(tsec, dir, qstr,
- inode_mode_to_security_class(inode->i_mode),
- &newsid);
+ newsclass = inode_mode_to_security_class(inode->i_mode);
+ rc = selinux_determine_inode_label(tsec, dir, qstr, newsclass, &newsid);
if (rc)
return rc;
/* Possibly defer initialization to selinux_complete_init. */
if (sbsec->flags & SE_SBINITIALIZED) {
struct inode_security_struct *isec = selinux_inode(inode);
- isec->sclass = inode_mode_to_security_class(inode->i_mode);
+ isec->sclass = newsclass;
isec->sid = newsid;
isec->initialized = LABEL_INITIALIZED;
}
@@ -3136,7 +3135,8 @@ static int selinux_inode_permission(struct inode *inode, int mask)
return rc;
}
-static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
+static int selinux_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *iattr)
{
const struct cred *cred = current_cred();
struct inode *inode = d_backing_inode(dentry);
@@ -3534,9 +3534,10 @@ static int selinux_inode_copy_up_xattr(const char *name)
{
/* The copy_up hook above sets the initial context on an inode, but we
* don't then want to overwrite it by blindly copying all the lower
- * xattrs up. Instead, we have to filter out SELinux-related xattrs.
+ * xattrs up. Instead, filter out SELinux-related xattrs following
+ * policy load.
*/
- if (strcmp(name, XATTR_NAME_SELINUX) == 0)
+ if (selinux_initialized() && strcmp(name, XATTR_NAME_SELINUX) == 0)
return 1; /* Discard */
/*
* Any other attribute apart from SELINUX is not claimed, supported
@@ -5194,11 +5195,11 @@ out_len:
return err;
}
-static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
+static int selinux_socket_getpeersec_dgram(struct socket *sock,
+ struct sk_buff *skb, u32 *secid)
{
u32 peer_secid = SECSID_NULL;
u16 family;
- struct inode_security_struct *isec;
if (skb && skb->protocol == htons(ETH_P_IP))
family = PF_INET;
@@ -5206,19 +5207,21 @@ static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *
family = PF_INET6;
else if (sock)
family = sock->sk->sk_family;
- else
- goto out;
+ else {
+ *secid = SECSID_NULL;
+ return -EINVAL;
+ }
if (sock && family == PF_UNIX) {
+ struct inode_security_struct *isec;
isec = inode_security_novalidate(SOCK_INODE(sock));
peer_secid = isec->sid;
} else if (skb)
selinux_skb_peerlbl_sid(skb, family, &peer_secid);
-out:
*secid = peer_secid;
if (peer_secid == SECSID_NULL)
- return -EINVAL;
+ return -ENOPROTOOPT;
return 0;
}
@@ -6559,7 +6562,7 @@ static int selinux_getselfattr(unsigned int attr, struct lsm_ctx __user *ctx,
size_t *size, u32 flags)
{
int rc;
- char *val;
+ char *val = NULL;
int val_len;
val_len = selinux_lsm_getattr(attr, current, &val);
@@ -6920,7 +6923,8 @@ static int selinux_bpf_prog(struct bpf_prog *prog)
BPF__PROG_RUN, NULL);
}
-static int selinux_bpf_map_alloc(struct bpf_map *map)
+static int selinux_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token)
{
struct bpf_security_struct *bpfsec;
@@ -6942,7 +6946,31 @@ static void selinux_bpf_map_free(struct bpf_map *map)
kfree(bpfsec);
}
-static int selinux_bpf_prog_alloc(struct bpf_prog_aux *aux)
+static int selinux_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token)
+{
+ struct bpf_security_struct *bpfsec;
+
+ bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL);
+ if (!bpfsec)
+ return -ENOMEM;
+
+ bpfsec->sid = current_sid();
+ prog->aux->security = bpfsec;
+
+ return 0;
+}
+
+static void selinux_bpf_prog_free(struct bpf_prog *prog)
+{
+ struct bpf_security_struct *bpfsec = prog->aux->security;
+
+ prog->aux->security = NULL;
+ kfree(bpfsec);
+}
+
+static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
+ struct path *path)
{
struct bpf_security_struct *bpfsec;
@@ -6951,16 +6979,16 @@ static int selinux_bpf_prog_alloc(struct bpf_prog_aux *aux)
return -ENOMEM;
bpfsec->sid = current_sid();
- aux->security = bpfsec;
+ token->security = bpfsec;
return 0;
}
-static void selinux_bpf_prog_free(struct bpf_prog_aux *aux)
+static void selinux_bpf_token_free(struct bpf_token *token)
{
- struct bpf_security_struct *bpfsec = aux->security;
+ struct bpf_security_struct *bpfsec = token->security;
- aux->security = NULL;
+ token->security = NULL;
kfree(bpfsec);
}
#endif
@@ -7324,8 +7352,9 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
LSM_HOOK_INIT(bpf, selinux_bpf),
LSM_HOOK_INIT(bpf_map, selinux_bpf_map),
LSM_HOOK_INIT(bpf_prog, selinux_bpf_prog),
- LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free),
- LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free),
+ LSM_HOOK_INIT(bpf_map_free, selinux_bpf_map_free),
+ LSM_HOOK_INIT(bpf_prog_free, selinux_bpf_prog_free),
+ LSM_HOOK_INIT(bpf_token_free, selinux_bpf_token_free),
#endif
#ifdef CONFIG_PERF_EVENTS
@@ -7382,8 +7411,9 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
LSM_HOOK_INIT(audit_rule_init, selinux_audit_rule_init),
#endif
#ifdef CONFIG_BPF_SYSCALL
- LSM_HOOK_INIT(bpf_map_alloc_security, selinux_bpf_map_alloc),
- LSM_HOOK_INIT(bpf_prog_alloc_security, selinux_bpf_prog_alloc),
+ LSM_HOOK_INIT(bpf_map_create, selinux_bpf_map_create),
+ LSM_HOOK_INIT(bpf_prog_load, selinux_bpf_prog_load),
+ LSM_HOOK_INIT(bpf_token_create, selinux_bpf_token_create),
#endif
#ifdef CONFIG_PERF_EVENTS
LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc),
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index 697eb4352439..2ad98732d052 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -1,20 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Implementation of the access vector table type.
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
-/* Updated: Frank Mayer <mayerf@tresys.com> and Karl MacMillan <kmacmillan@tresys.com>
- *
- * Added conditional policy language extensions
- *
- * Copyright (C) 2003 Tresys Technology, LLC
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 2.
+/* Updated: Frank Mayer <mayerf@tresys.com> and
+ * Karl MacMillan <kmacmillan@tresys.com>
+ * Added conditional policy language extensions
+ * Copyright (C) 2003 Tresys Technology, LLC
*
* Updated: Yuichi Nakamura <ynakam@hitachisoft.jp>
- * Tuned number of hash slots for avtab to reduce memory usage
+ * Tuned number of hash slots for avtab to reduce memory usage
*/
#include <linux/bitops.h>
@@ -36,19 +33,20 @@ static inline u32 avtab_hash(const struct avtab_key *keyp, u32 mask)
static const u32 c2 = 0x1b873593;
static const u32 r1 = 15;
static const u32 r2 = 13;
- static const u32 m = 5;
- static const u32 n = 0xe6546b64;
+ static const u32 m = 5;
+ static const u32 n = 0xe6546b64;
u32 hash = 0;
-#define mix(input) do { \
- u32 v = input; \
- v *= c1; \
- v = (v << r1) | (v >> (32 - r1)); \
- v *= c2; \
- hash ^= v; \
+#define mix(input) \
+ do { \
+ u32 v = input; \
+ v *= c1; \
+ v = (v << r1) | (v >> (32 - r1)); \
+ v *= c2; \
+ hash ^= v; \
hash = (hash << r2) | (hash >> (32 - r2)); \
- hash = hash * m + n; \
+ hash = hash * m + n; \
} while (0)
mix(keyp->target_class);
@@ -66,9 +64,10 @@ static inline u32 avtab_hash(const struct avtab_key *keyp, u32 mask)
return hash & mask;
}
-static struct avtab_node*
-avtab_insert_node(struct avtab *h, struct avtab_node **dst,
- const struct avtab_key *key, const struct avtab_datum *datum)
+static struct avtab_node *avtab_insert_node(struct avtab *h,
+ struct avtab_node **dst,
+ const struct avtab_key *key,
+ const struct avtab_datum *datum)
{
struct avtab_node *newnode;
struct avtab_extended_perms *xperms;
@@ -99,7 +98,7 @@ avtab_insert_node(struct avtab *h, struct avtab_node **dst,
static int avtab_node_cmp(const struct avtab_key *key1,
const struct avtab_key *key2)
{
- u16 specified = key1->specified & ~(AVTAB_ENABLED|AVTAB_ENABLED_OLD);
+ u16 specified = key1->specified & ~(AVTAB_ENABLED | AVTAB_ENABLED_OLD);
if (key1->source_type == key2->source_type &&
key1->target_type == key2->target_type &&
@@ -129,8 +128,7 @@ static int avtab_insert(struct avtab *h, const struct avtab_key *key,
return -EINVAL;
hvalue = avtab_hash(key, h->mask);
- for (prev = NULL, cur = h->htable[hvalue];
- cur;
+ for (prev = NULL, cur = h->htable[hvalue]; cur;
prev = cur, cur = cur->next) {
cmp = avtab_node_cmp(key, &cur->key);
/* extended perms may not be unique */
@@ -163,8 +161,7 @@ struct avtab_node *avtab_insert_nonunique(struct avtab *h,
if (!h || !h->nslot || h->nel == U32_MAX)
return NULL;
hvalue = avtab_hash(key, h->mask);
- for (prev = NULL, cur = h->htable[hvalue];
- cur;
+ for (prev = NULL, cur = h->htable[hvalue]; cur;
prev = cur, cur = cur->next) {
cmp = avtab_node_cmp(key, &cur->key);
if (cmp <= 0)
@@ -188,8 +185,7 @@ struct avtab_node *avtab_search_node(struct avtab *h,
return NULL;
hvalue = avtab_hash(key, h->mask);
- for (cur = h->htable[hvalue]; cur;
- cur = cur->next) {
+ for (cur = h->htable[hvalue]; cur; cur = cur->next) {
cmp = avtab_node_cmp(key, &cur->key);
if (cmp == 0)
return cur;
@@ -199,8 +195,8 @@ struct avtab_node *avtab_search_node(struct avtab *h,
return NULL;
}
-struct avtab_node*
-avtab_search_node_next(struct avtab_node *node, u16 specified)
+struct avtab_node *avtab_search_node_next(struct avtab_node *node,
+ u16 specified)
{
struct avtab_key tmp_key;
struct avtab_node *cur;
@@ -314,17 +310,19 @@ void avtab_hash_eval(struct avtab *h, const char *tag)
if (chain_len > max_chain_len)
max_chain_len = chain_len;
- chain2_len_sum += (unsigned long long)chain_len * chain_len;
+ chain2_len_sum +=
+ (unsigned long long)chain_len * chain_len;
}
}
pr_debug("SELinux: %s: %d entries and %d/%d buckets used, "
- "longest chain length %d, sum of chain length^2 %llu\n",
- tag, h->nel, slots_used, h->nslot, max_chain_len,
- chain2_len_sum);
+ "longest chain length %d, sum of chain length^2 %llu\n",
+ tag, h->nel, slots_used, h->nslot, max_chain_len,
+ chain2_len_sum);
}
#endif /* CONFIG_SECURITY_SELINUX_DEBUG */
+/* clang-format off */
static const uint16_t spec_order[] = {
AVTAB_ALLOWED,
AVTAB_AUDITDENY,
@@ -336,6 +334,7 @@ static const uint16_t spec_order[] = {
AVTAB_XPERMS_AUDITALLOW,
AVTAB_XPERMS_DONTAUDIT
};
+/* clang-format on */
int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
int (*insertf)(struct avtab *a, const struct avtab_key *k,
@@ -365,9 +364,8 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
if (items2 > ARRAY_SIZE(buf32)) {
pr_err("SELinux: avtab: entry overflow\n");
return -EINVAL;
-
}
- rc = next_entry(buf32, fp, sizeof(u32)*items2);
+ rc = next_entry(buf32, fp, sizeof(u32) * items2);
if (rc) {
pr_err("SELinux: avtab: truncated entry\n");
return rc;
@@ -400,8 +398,7 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
pr_err("SELinux: avtab: null entry\n");
return -EINVAL;
}
- if ((val & AVTAB_AV) &&
- (val & AVTAB_TYPE)) {
+ if ((val & AVTAB_AV) && (val & AVTAB_TYPE)) {
pr_err("SELinux: avtab: entry has both access vectors and types\n");
return -EINVAL;
}
@@ -428,7 +425,7 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
return 0;
}
- rc = next_entry(buf16, fp, sizeof(u16)*4);
+ rc = next_entry(buf16, fp, sizeof(u16) * 4);
if (rc) {
pr_err("SELinux: avtab: truncated entry\n");
return rc;
@@ -454,10 +451,11 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
}
if ((vers < POLICYDB_VERSION_XPERMS_IOCTL) &&
- (key.specified & AVTAB_XPERMS)) {
+ (key.specified & AVTAB_XPERMS)) {
pr_err("SELinux: avtab: policy version %u does not "
- "support extended permissions rules and one "
- "was specified\n", vers);
+ "support extended permissions rules and one "
+ "was specified\n",
+ vers);
return -EINVAL;
} else if (key.specified & AVTAB_XPERMS) {
memset(&xperms, 0, sizeof(struct avtab_extended_perms));
@@ -471,7 +469,8 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
pr_err("SELinux: avtab: truncated entry\n");
return rc;
}
- rc = next_entry(buf32, fp, sizeof(u32)*ARRAY_SIZE(xperms.perms.p));
+ rc = next_entry(buf32, fp,
+ sizeof(u32) * ARRAY_SIZE(xperms.perms.p));
if (rc) {
pr_err("SELinux: avtab: truncated entry\n");
return rc;
@@ -507,7 +506,6 @@ int avtab_read(struct avtab *a, void *fp, struct policydb *pol)
__le32 buf[1];
u32 nel, i;
-
rc = next_entry(buf, fp, sizeof(u32));
if (rc < 0) {
pr_err("SELinux: avtab: truncated table\n");
@@ -561,7 +559,8 @@ int avtab_write_item(struct policydb *p, const struct avtab_node *cur, void *fp)
return rc;
if (cur->key.specified & AVTAB_XPERMS) {
- rc = put_entry(&cur->datum.u.xperms->specified, sizeof(u8), 1, fp);
+ rc = put_entry(&cur->datum.u.xperms->specified, sizeof(u8), 1,
+ fp);
if (rc)
return rc;
rc = put_entry(&cur->datum.u.xperms->driver, sizeof(u8), 1, fp);
@@ -570,7 +569,7 @@ int avtab_write_item(struct policydb *p, const struct avtab_node *cur, void *fp)
for (i = 0; i < ARRAY_SIZE(cur->datum.u.xperms->perms.p); i++)
buf32[i] = cpu_to_le32(cur->datum.u.xperms->perms.p[i]);
rc = put_entry(buf32, sizeof(u32),
- ARRAY_SIZE(cur->datum.u.xperms->perms.p), fp);
+ ARRAY_SIZE(cur->datum.u.xperms->perms.p), fp);
} else {
buf32[0] = cpu_to_le32(cur->datum.u.data);
rc = put_entry(buf32, sizeof(u32), 1, fp);
@@ -593,8 +592,7 @@ int avtab_write(struct policydb *p, struct avtab *a, void *fp)
return rc;
for (i = 0; i < a->nslot; i++) {
- for (cur = a->htable[i]; cur;
- cur = cur->next) {
+ for (cur = a->htable[i]; cur; cur = cur->next) {
rc = avtab_write_item(p, cur, fp);
if (rc)
return rc;
@@ -606,10 +604,9 @@ int avtab_write(struct policydb *p, struct avtab *a, void *fp)
void __init avtab_cache_init(void)
{
- avtab_node_cachep = kmem_cache_create("avtab_node",
- sizeof(struct avtab_node),
- 0, SLAB_PANIC, NULL);
- avtab_xperms_cachep = kmem_cache_create("avtab_extended_perms",
- sizeof(struct avtab_extended_perms),
- 0, SLAB_PANIC, NULL);
+ avtab_node_cachep = kmem_cache_create(
+ "avtab_node", sizeof(struct avtab_node), 0, SLAB_PANIC, NULL);
+ avtab_xperms_cachep = kmem_cache_create(
+ "avtab_extended_perms", sizeof(struct avtab_extended_perms), 0,
+ SLAB_PANIC, NULL);
}
diff --git a/security/selinux/ss/avtab.h b/security/selinux/ss/avtab.h
index 3c3904bf02b0..8e8820484c55 100644
--- a/security/selinux/ss/avtab.h
+++ b/security/selinux/ss/avtab.h
@@ -9,42 +9,42 @@
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
-/* Updated: Frank Mayer <mayerf@tresys.com> and Karl MacMillan <kmacmillan@tresys.com>
- *
- * Added conditional policy language extensions
- *
- * Copyright (C) 2003 Tresys Technology, LLC
+/* Updated: Frank Mayer <mayerf@tresys.com> and
+ * Karl MacMillan <kmacmillan@tresys.com>
+ * Added conditional policy language extensions
+ * Copyright (C) 2003 Tresys Technology, LLC
*
* Updated: Yuichi Nakamura <ynakam@hitachisoft.jp>
- * Tuned number of hash slots for avtab to reduce memory usage
+ * Tuned number of hash slots for avtab to reduce memory usage
*/
+
#ifndef _SS_AVTAB_H_
#define _SS_AVTAB_H_
#include "security.h"
struct avtab_key {
- u16 source_type; /* source type */
- u16 target_type; /* target type */
- u16 target_class; /* target object class */
-#define AVTAB_ALLOWED 0x0001
-#define AVTAB_AUDITALLOW 0x0002
-#define AVTAB_AUDITDENY 0x0004
-#define AVTAB_AV (AVTAB_ALLOWED | AVTAB_AUDITALLOW | AVTAB_AUDITDENY)
-#define AVTAB_TRANSITION 0x0010
-#define AVTAB_MEMBER 0x0020
-#define AVTAB_CHANGE 0x0040
-#define AVTAB_TYPE (AVTAB_TRANSITION | AVTAB_MEMBER | AVTAB_CHANGE)
+ u16 source_type; /* source type */
+ u16 target_type; /* target type */
+ u16 target_class; /* target object class */
+#define AVTAB_ALLOWED 0x0001
+#define AVTAB_AUDITALLOW 0x0002
+#define AVTAB_AUDITDENY 0x0004
+#define AVTAB_AV (AVTAB_ALLOWED | AVTAB_AUDITALLOW | AVTAB_AUDITDENY)
+#define AVTAB_TRANSITION 0x0010
+#define AVTAB_MEMBER 0x0020
+#define AVTAB_CHANGE 0x0040
+#define AVTAB_TYPE (AVTAB_TRANSITION | AVTAB_MEMBER | AVTAB_CHANGE)
/* extended permissions */
#define AVTAB_XPERMS_ALLOWED 0x0100
-#define AVTAB_XPERMS_AUDITALLOW 0x0200
+#define AVTAB_XPERMS_AUDITALLOW 0x0200
#define AVTAB_XPERMS_DONTAUDIT 0x0400
-#define AVTAB_XPERMS (AVTAB_XPERMS_ALLOWED | \
- AVTAB_XPERMS_AUDITALLOW | \
- AVTAB_XPERMS_DONTAUDIT)
-#define AVTAB_ENABLED_OLD 0x80000000 /* reserved for used in cond_avtab */
-#define AVTAB_ENABLED 0x8000 /* reserved for used in cond_avtab */
- u16 specified; /* what field is specified */
+#define AVTAB_XPERMS \
+ (AVTAB_XPERMS_ALLOWED | AVTAB_XPERMS_AUDITALLOW | \
+ AVTAB_XPERMS_DONTAUDIT)
+#define AVTAB_ENABLED_OLD 0x80000000 /* reserved for used in cond_avtab */
+#define AVTAB_ENABLED 0x8000 /* reserved for used in cond_avtab */
+ u16 specified; /* what field is specified */
};
/*
@@ -53,8 +53,8 @@ struct avtab_key {
*/
struct avtab_extended_perms {
/* These are not flags. All 256 values may be used */
-#define AVTAB_XPERMS_IOCTLFUNCTION 0x01
-#define AVTAB_XPERMS_IOCTLDRIVER 0x02
+#define AVTAB_XPERMS_IOCTLFUNCTION 0x01
+#define AVTAB_XPERMS_IOCTLDRIVER 0x02
/* extension of the avtab_key specified */
u8 specified; /* ioctl, netfilter, ... */
/*
@@ -82,9 +82,9 @@ struct avtab_node {
struct avtab {
struct avtab_node **htable;
- u32 nel; /* number of elements */
- u32 nslot; /* number of hash slots */
- u32 mask; /* mask to compute hash func */
+ u32 nel; /* number of elements */
+ u32 nslot; /* number of hash slots */
+ u32 mask; /* mask to compute hash func */
};
void avtab_init(struct avtab *h);
@@ -92,6 +92,9 @@ int avtab_alloc(struct avtab *, u32);
int avtab_alloc_dup(struct avtab *new, const struct avtab *orig);
void avtab_destroy(struct avtab *h);
+#define MAX_AVTAB_HASH_BITS 16
+#define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS)
+
#ifdef CONFIG_SECURITY_SELINUX_DEBUG
void avtab_hash_eval(struct avtab *h, const char *tag);
#else
@@ -107,7 +110,8 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
void *p);
int avtab_read(struct avtab *a, void *fp, struct policydb *pol);
-int avtab_write_item(struct policydb *p, const struct avtab_node *cur, void *fp);
+int avtab_write_item(struct policydb *p, const struct avtab_node *cur,
+ void *fp);
int avtab_write(struct policydb *p, struct avtab *a, void *fp);
struct avtab_node *avtab_insert_nonunique(struct avtab *h,
@@ -116,11 +120,7 @@ struct avtab_node *avtab_insert_nonunique(struct avtab *h,
struct avtab_node *avtab_search_node(struct avtab *h,
const struct avtab_key *key);
+struct avtab_node *avtab_search_node_next(struct avtab_node *node,
+ u16 specified);
-struct avtab_node *avtab_search_node_next(struct avtab_node *node, u16 specified);
-
-#define MAX_AVTAB_HASH_BITS 16
-#define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS)
-
-#endif /* _SS_AVTAB_H_ */
-
+#endif /* _SS_AVTAB_H_ */
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index 81ff676f209a..f12476855b27 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -1,8 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0-only
+/* SPDX-License-Identifier: GPL-2.0-only */
/* Authors: Karl MacMillan <kmacmillan@tresys.com>
* Frank Mayer <mayerf@tresys.com>
- *
- * Copyright (C) 2003 - 2004 Tresys Technology, LLC
+ * Copyright (C) 2003 - 2004 Tresys Technology, LLC
*/
#include <linux/kernel.h>
@@ -166,9 +165,8 @@ void cond_policydb_destroy(struct policydb *p)
int cond_init_bool_indexes(struct policydb *p)
{
kfree(p->bool_val_to_struct);
- p->bool_val_to_struct = kmalloc_array(p->p_bools.nprim,
- sizeof(*p->bool_val_to_struct),
- GFP_KERNEL);
+ p->bool_val_to_struct = kmalloc_array(
+ p->p_bools.nprim, sizeof(*p->bool_val_to_struct), GFP_KERNEL);
if (!p->bool_val_to_struct)
return -ENOMEM;
return 0;
@@ -287,7 +285,8 @@ static int cond_insertf(struct avtab *a, const struct avtab_key *k,
if (other) {
node_ptr = avtab_search_node(&p->te_cond_avtab, k);
if (node_ptr) {
- if (avtab_search_node_next(node_ptr, k->specified)) {
+ if (avtab_search_node_next(node_ptr,
+ k->specified)) {
pr_err("SELinux: too many conflicting type rules.\n");
return -EINVAL;
}
@@ -478,8 +477,8 @@ int cond_write_bool(void *vkey, void *datum, void *ptr)
* the conditional. This means that the avtab with the conditional
* rules will not be saved but will be rebuilt on policy load.
*/
-static int cond_write_av_list(struct policydb *p,
- struct cond_av_list *list, struct policy_file *fp)
+static int cond_write_av_list(struct policydb *p, struct cond_av_list *list,
+ struct policy_file *fp)
{
__le32 buf[1];
u32 i;
@@ -500,7 +499,7 @@ static int cond_write_av_list(struct policydb *p,
}
static int cond_write_node(struct policydb *p, struct cond_node *node,
- struct policy_file *fp)
+ struct policy_file *fp)
{
__le32 buf[2];
int rc;
@@ -555,7 +554,7 @@ int cond_write_list(struct policydb *p, void *fp)
}
void cond_compute_xperms(struct avtab *ctab, struct avtab_key *key,
- struct extended_perms_decision *xpermd)
+ struct extended_perms_decision *xpermd)
{
struct avtab_node *node;
@@ -563,7 +562,7 @@ void cond_compute_xperms(struct avtab *ctab, struct avtab_key *key,
return;
for (node = avtab_search_node(ctab, key); node;
- node = avtab_search_node_next(node, key->specified)) {
+ node = avtab_search_node_next(node, key->specified)) {
if (node->key.specified & AVTAB_ENABLED)
services_compute_xperms_decision(xpermd, node);
}
@@ -572,7 +571,7 @@ void cond_compute_xperms(struct avtab *ctab, struct avtab_key *key,
* av table, and if so, add them to the result
*/
void cond_compute_av(struct avtab *ctab, struct avtab_key *key,
- struct av_decision *avd, struct extended_perms *xperms)
+ struct av_decision *avd, struct extended_perms *xperms)
{
struct avtab_node *node;
@@ -580,30 +579,29 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key,
return;
for (node = avtab_search_node(ctab, key); node;
- node = avtab_search_node_next(node, key->specified)) {
- if ((u16)(AVTAB_ALLOWED|AVTAB_ENABLED) ==
- (node->key.specified & (AVTAB_ALLOWED|AVTAB_ENABLED)))
+ node = avtab_search_node_next(node, key->specified)) {
+ if ((u16)(AVTAB_ALLOWED | AVTAB_ENABLED) ==
+ (node->key.specified & (AVTAB_ALLOWED | AVTAB_ENABLED)))
avd->allowed |= node->datum.u.data;
- if ((u16)(AVTAB_AUDITDENY|AVTAB_ENABLED) ==
- (node->key.specified & (AVTAB_AUDITDENY|AVTAB_ENABLED)))
+ if ((u16)(AVTAB_AUDITDENY | AVTAB_ENABLED) ==
+ (node->key.specified & (AVTAB_AUDITDENY | AVTAB_ENABLED)))
/* Since a '0' in an auditdeny mask represents a
* permission we do NOT want to audit (dontaudit), we use
* the '&' operand to ensure that all '0's in the mask
* are retained (much unlike the allow and auditallow cases).
*/
avd->auditdeny &= node->datum.u.data;
- if ((u16)(AVTAB_AUDITALLOW|AVTAB_ENABLED) ==
- (node->key.specified & (AVTAB_AUDITALLOW|AVTAB_ENABLED)))
+ if ((u16)(AVTAB_AUDITALLOW | AVTAB_ENABLED) ==
+ (node->key.specified & (AVTAB_AUDITALLOW | AVTAB_ENABLED)))
avd->auditallow |= node->datum.u.data;
if (xperms && (node->key.specified & AVTAB_ENABLED) &&
- (node->key.specified & AVTAB_XPERMS))
+ (node->key.specified & AVTAB_XPERMS))
services_compute_xperms_drivers(xperms, node);
}
}
-static int cond_dup_av_list(struct cond_av_list *new,
- struct cond_av_list *orig,
- struct avtab *avtab)
+static int cond_dup_av_list(struct cond_av_list *new, struct cond_av_list *orig,
+ struct avtab *avtab)
{
u32 i;
@@ -614,9 +612,8 @@ static int cond_dup_av_list(struct cond_av_list *new,
return -ENOMEM;
for (i = 0; i < orig->len; i++) {
- new->nodes[i] = avtab_insert_nonunique(avtab,
- &orig->nodes[i]->key,
- &orig->nodes[i]->datum);
+ new->nodes[i] = avtab_insert_nonunique(
+ avtab, &orig->nodes[i]->key, &orig->nodes[i]->datum);
if (!new->nodes[i])
return -ENOMEM;
new->len++;
@@ -637,8 +634,7 @@ static int duplicate_policydb_cond_list(struct policydb *newp,
newp->cond_list_len = 0;
newp->cond_list = kcalloc(origp->cond_list_len,
- sizeof(*newp->cond_list),
- GFP_KERNEL);
+ sizeof(*newp->cond_list), GFP_KERNEL);
if (!newp->cond_list)
goto error;
@@ -649,7 +645,8 @@ static int duplicate_policydb_cond_list(struct policydb *newp,
newp->cond_list_len++;
newn->cur_state = orign->cur_state;
- newn->expr.nodes = kmemdup(orign->expr.nodes,
+ newn->expr.nodes =
+ kmemdup(orign->expr.nodes,
orign->expr.len * sizeof(*orign->expr.nodes),
GFP_KERNEL);
if (!newn->expr.nodes)
@@ -658,12 +655,12 @@ static int duplicate_policydb_cond_list(struct policydb *newp,
newn->expr.len = orign->expr.len;
rc = cond_dup_av_list(&newn->true_list, &orign->true_list,
- &newp->te_cond_avtab);
+ &newp->te_cond_avtab);
if (rc)
goto error;
rc = cond_dup_av_list(&newn->false_list, &orign->false_list,
- &newp->te_cond_avtab);
+ &newp->te_cond_avtab);
if (rc)
goto error;
}
@@ -683,7 +680,8 @@ static int cond_bools_destroy(void *key, void *datum, void *args)
return 0;
}
-static int cond_bools_copy(struct hashtab_node *new, struct hashtab_node *orig, void *args)
+static int cond_bools_copy(struct hashtab_node *new, struct hashtab_node *orig,
+ void *args)
{
struct cond_bool_datum *datum;
@@ -709,7 +707,7 @@ static int cond_bools_index(void *key, void *datum, void *args)
}
static int duplicate_policydb_bools(struct policydb *newdb,
- struct policydb *orig)
+ struct policydb *orig)
{
struct cond_bool_datum **cond_bool_array;
int rc;
@@ -721,7 +719,7 @@ static int duplicate_policydb_bools(struct policydb *newdb,
return -ENOMEM;
rc = hashtab_duplicate(&newdb->p_bools.table, &orig->p_bools.table,
- cond_bools_copy, cond_bools_destroy, NULL);
+ cond_bools_copy, cond_bools_destroy, NULL);
if (rc) {
kfree(cond_bool_array);
return -ENOMEM;
diff --git a/security/selinux/ss/conditional.h b/security/selinux/ss/conditional.h
index 5a7b51278dc6..b972ce40db18 100644
--- a/security/selinux/ss/conditional.h
+++ b/security/selinux/ss/conditional.h
@@ -1,8 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Authors: Karl MacMillan <kmacmillan@tresys.com>
* Frank Mayer <mayerf@tresys.com>
- *
- * Copyright (C) 2003 - 2004 Tresys Technology, LLC
+ * Copyright (C) 2003 - 2004 Tresys Technology, LLC
*/
#ifndef _CONDITIONAL_H_
@@ -20,14 +19,14 @@
* in reverse polish notation.
*/
struct cond_expr_node {
-#define COND_BOOL 1 /* plain bool */
-#define COND_NOT 2 /* !bool */
-#define COND_OR 3 /* bool || bool */
-#define COND_AND 4 /* bool && bool */
-#define COND_XOR 5 /* bool ^ bool */
-#define COND_EQ 6 /* bool == bool */
-#define COND_NEQ 7 /* bool != bool */
-#define COND_LAST COND_NEQ
+#define COND_BOOL 1 /* plain bool */
+#define COND_NOT 2 /* !bool */
+#define COND_OR 3 /* bool || bool */
+#define COND_AND 4 /* bool && bool */
+#define COND_XOR 5 /* bool ^ bool */
+#define COND_EQ 6 /* bool == bool */
+#define COND_NEQ 7 /* bool != bool */
+#define COND_LAST COND_NEQ
u32 expr_type;
u32 boolean;
};
@@ -75,9 +74,9 @@ int cond_write_bool(void *key, void *datum, void *ptr);
int cond_write_list(struct policydb *p, void *fp);
void cond_compute_av(struct avtab *ctab, struct avtab_key *key,
- struct av_decision *avd, struct extended_perms *xperms);
+ struct av_decision *avd, struct extended_perms *xperms);
void cond_compute_xperms(struct avtab *ctab, struct avtab_key *key,
- struct extended_perms_decision *xpermd);
+ struct extended_perms_decision *xpermd);
void evaluate_cond_nodes(struct policydb *p);
void cond_policydb_destroy_dup(struct policydb *p);
int cond_policydb_dup(struct policydb *new, struct policydb *orig);
diff --git a/security/selinux/ss/constraint.h b/security/selinux/ss/constraint.h
index f76eb3128ad5..203033cfad67 100644
--- a/security/selinux/ss/constraint.h
+++ b/security/selinux/ss/constraint.h
@@ -13,6 +13,7 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
#ifndef _SS_CONSTRAINT_H_
#define _SS_CONSTRAINT_H_
@@ -21,43 +22,43 @@
#define CEXPR_MAXDEPTH 5
struct constraint_expr {
-#define CEXPR_NOT 1 /* not expr */
-#define CEXPR_AND 2 /* expr and expr */
-#define CEXPR_OR 3 /* expr or expr */
-#define CEXPR_ATTR 4 /* attr op attr */
-#define CEXPR_NAMES 5 /* attr op names */
- u32 expr_type; /* expression type */
-
-#define CEXPR_USER 1 /* user */
-#define CEXPR_ROLE 2 /* role */
-#define CEXPR_TYPE 4 /* type */
-#define CEXPR_TARGET 8 /* target if set, source otherwise */
-#define CEXPR_XTARGET 16 /* special 3rd target for validatetrans rule */
-#define CEXPR_L1L2 32 /* low level 1 vs. low level 2 */
-#define CEXPR_L1H2 64 /* low level 1 vs. high level 2 */
-#define CEXPR_H1L2 128 /* high level 1 vs. low level 2 */
-#define CEXPR_H1H2 256 /* high level 1 vs. high level 2 */
-#define CEXPR_L1H1 512 /* low level 1 vs. high level 1 */
-#define CEXPR_L2H2 1024 /* low level 2 vs. high level 2 */
- u32 attr; /* attribute */
-
-#define CEXPR_EQ 1 /* == or eq */
-#define CEXPR_NEQ 2 /* != */
-#define CEXPR_DOM 3 /* dom */
-#define CEXPR_DOMBY 4 /* domby */
-#define CEXPR_INCOMP 5 /* incomp */
- u32 op; /* operator */
-
- struct ebitmap names; /* names */
+#define CEXPR_NOT 1 /* not expr */
+#define CEXPR_AND 2 /* expr and expr */
+#define CEXPR_OR 3 /* expr or expr */
+#define CEXPR_ATTR 4 /* attr op attr */
+#define CEXPR_NAMES 5 /* attr op names */
+ u32 expr_type; /* expression type */
+
+#define CEXPR_USER 1 /* user */
+#define CEXPR_ROLE 2 /* role */
+#define CEXPR_TYPE 4 /* type */
+#define CEXPR_TARGET 8 /* target if set, source otherwise */
+#define CEXPR_XTARGET 16 /* special 3rd target for validatetrans rule */
+#define CEXPR_L1L2 32 /* low level 1 vs. low level 2 */
+#define CEXPR_L1H2 64 /* low level 1 vs. high level 2 */
+#define CEXPR_H1L2 128 /* high level 1 vs. low level 2 */
+#define CEXPR_H1H2 256 /* high level 1 vs. high level 2 */
+#define CEXPR_L1H1 512 /* low level 1 vs. high level 1 */
+#define CEXPR_L2H2 1024 /* low level 2 vs. high level 2 */
+ u32 attr; /* attribute */
+
+#define CEXPR_EQ 1 /* == or eq */
+#define CEXPR_NEQ 2 /* != */
+#define CEXPR_DOM 3 /* dom */
+#define CEXPR_DOMBY 4 /* domby */
+#define CEXPR_INCOMP 5 /* incomp */
+ u32 op; /* operator */
+
+ struct ebitmap names; /* names */
struct type_set *type_names;
- struct constraint_expr *next; /* next expression */
+ struct constraint_expr *next; /* next expression */
};
struct constraint_node {
- u32 permissions; /* constrained permissions */
- struct constraint_expr *expr; /* constraint on permissions */
- struct constraint_node *next; /* next constraint */
+ u32 permissions; /* constrained permissions */
+ struct constraint_expr *expr; /* constraint on permissions */
+ struct constraint_node *next; /* next constraint */
};
-#endif /* _SS_CONSTRAINT_H_ */
+#endif /* _SS_CONSTRAINT_H_ */
diff --git a/security/selinux/ss/context.c b/security/selinux/ss/context.c
index 38bc0aa524a6..e39990f494dd 100644
--- a/security/selinux/ss/context.c
+++ b/security/selinux/ss/context.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Implementations of the security context functions.
*
diff --git a/security/selinux/ss/context.h b/security/selinux/ss/context.h
index 1f59468c0759..7ccab2e6965f 100644
--- a/security/selinux/ss/context.h
+++ b/security/selinux/ss/context.h
@@ -13,6 +13,7 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
#ifndef _SS_CONTEXT_H_
#define _SS_CONTEXT_H_
@@ -28,9 +29,9 @@ struct context {
u32 user;
u32 role;
u32 type;
- u32 len; /* length of string in bytes */
+ u32 len; /* length of string in bytes */
struct mls_range range;
- char *str; /* string representation if context cannot be mapped. */
+ char *str; /* string representation if context cannot be mapped. */
};
static inline void mls_context_init(struct context *c)
@@ -38,7 +39,8 @@ static inline void mls_context_init(struct context *c)
memset(&c->range, 0, sizeof(c->range));
}
-static inline int mls_context_cpy(struct context *dst, const struct context *src)
+static inline int mls_context_cpy(struct context *dst,
+ const struct context *src)
{
int rc;
@@ -58,7 +60,8 @@ out:
/*
* Sets both levels in the MLS range of 'dst' to the low level of 'src'.
*/
-static inline int mls_context_cpy_low(struct context *dst, const struct context *src)
+static inline int mls_context_cpy_low(struct context *dst,
+ const struct context *src)
{
int rc;
@@ -78,7 +81,8 @@ out:
/*
* Sets both levels in the MLS range of 'dst' to the high level of 'src'.
*/
-static inline int mls_context_cpy_high(struct context *dst, const struct context *src)
+static inline int mls_context_cpy_high(struct context *dst,
+ const struct context *src)
{
int rc;
@@ -95,9 +99,9 @@ out:
return rc;
}
-
static inline int mls_context_glblub(struct context *dst,
- const struct context *c1, const struct context *c2)
+ const struct context *c1,
+ const struct context *c2)
{
struct mls_range *dr = &dst->range;
const struct mls_range *r1 = &c1->range, *r2 = &c2->range;
@@ -114,13 +118,13 @@ static inline int mls_context_glblub(struct context *dst,
/* Take the least of the high */
dr->level[1].sens = min(r1->level[1].sens, r2->level[1].sens);
- rc = ebitmap_and(&dr->level[0].cat,
- &r1->level[0].cat, &r2->level[0].cat);
+ rc = ebitmap_and(&dr->level[0].cat, &r1->level[0].cat,
+ &r2->level[0].cat);
if (rc)
goto out;
- rc = ebitmap_and(&dr->level[1].cat,
- &r1->level[1].cat, &r2->level[1].cat);
+ rc = ebitmap_and(&dr->level[1].cat, &r1->level[1].cat,
+ &r2->level[1].cat);
if (rc)
goto out;
@@ -128,7 +132,8 @@ out:
return rc;
}
-static inline int mls_context_cmp(const struct context *c1, const struct context *c2)
+static inline int mls_context_cmp(const struct context *c1,
+ const struct context *c2)
{
return ((c1->range.level[0].sens == c2->range.level[0].sens) &&
ebitmap_cmp(&c1->range.level[0].cat, &c2->range.level[0].cat) &&
@@ -183,19 +188,17 @@ static inline void context_destroy(struct context *c)
mls_context_destroy(c);
}
-static inline int context_cmp(const struct context *c1, const struct context *c2)
+static inline int context_cmp(const struct context *c1,
+ const struct context *c2)
{
if (c1->len && c2->len)
return (c1->len == c2->len && !strcmp(c1->str, c2->str));
if (c1->len || c2->len)
return 0;
- return ((c1->user == c2->user) &&
- (c1->role == c2->role) &&
- (c1->type == c2->type) &&
- mls_context_cmp(c1, c2));
+ return ((c1->user == c2->user) && (c1->role == c2->role) &&
+ (c1->type == c2->type) && mls_context_cmp(c1, c2));
}
u32 context_compute_hash(const struct context *c);
-#endif /* _SS_CONTEXT_H_ */
-
+#endif /* _SS_CONTEXT_H_ */
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index 77875ad355f7..67c1a73cd5ee 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Implementation of the extensible bitmap type.
*
@@ -6,14 +6,11 @@
*/
/*
* Updated: Hewlett-Packard <paul@paul-moore.com>
+ * Added support to import/export the NetLabel category bitmap
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
*
- * Added support to import/export the NetLabel category bitmap
- *
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
- */
-/*
* Updated: KaiGai Kohei <kaigai@ak.jp.nec.com>
- * Applied standard bit operations to improve bitmap scanning.
+ * Applied standard bit operations to improve bitmap scanning.
*/
#include <linux/kernel.h>
@@ -24,7 +21,7 @@
#include "ebitmap.h"
#include "policydb.h"
-#define BITS_PER_U64 (sizeof(u64) * 8)
+#define BITS_PER_U64 (sizeof(u64) * 8)
static struct kmem_cache *ebitmap_node_cachep __ro_after_init;
@@ -37,8 +34,7 @@ int ebitmap_cmp(const struct ebitmap *e1, const struct ebitmap *e2)
n1 = e1->node;
n2 = e2->node;
- while (n1 && n2 &&
- (n1->startbit == n2->startbit) &&
+ while (n1 && n2 && (n1->startbit == n2->startbit) &&
!memcmp(n1->maps, n2->maps, EBITMAP_SIZE / 8)) {
n1 = n1->next;
n2 = n2->next;
@@ -79,14 +75,16 @@ int ebitmap_cpy(struct ebitmap *dst, const struct ebitmap *src)
return 0;
}
-int ebitmap_and(struct ebitmap *dst, const struct ebitmap *e1, const struct ebitmap *e2)
+int ebitmap_and(struct ebitmap *dst, const struct ebitmap *e1,
+ const struct ebitmap *e2)
{
struct ebitmap_node *n;
int bit, rc;
ebitmap_init(dst);
- ebitmap_for_each_positive_bit(e1, n, bit) {
+ ebitmap_for_each_positive_bit(e1, n, bit)
+ {
if (ebitmap_get_bit(e2, bit)) {
rc = ebitmap_set_bit(dst, bit, 1);
if (rc < 0)
@@ -96,7 +94,6 @@ int ebitmap_and(struct ebitmap *dst, const struct ebitmap *e1, const struct ebit
return 0;
}
-
#ifdef CONFIG_NETLABEL
/**
* ebitmap_netlbl_export - Export an ebitmap into a NetLabel category bitmap
@@ -131,10 +128,8 @@ int ebitmap_netlbl_export(struct ebitmap *ebmap,
for (iter = 0; iter < EBITMAP_UNIT_NUMS; iter++) {
e_map = e_iter->maps[iter];
if (e_map != 0) {
- rc = netlbl_catmap_setlong(catmap,
- offset,
- e_map,
- GFP_ATOMIC);
+ rc = netlbl_catmap_setlong(catmap, offset,
+ e_map, GFP_ATOMIC);
if (rc != 0)
goto netlbl_export_failure;
}
@@ -185,7 +180,8 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap,
if (e_iter == NULL ||
offset >= e_iter->startbit + EBITMAP_SIZE) {
e_prev = e_iter;
- e_iter = kmem_cache_zalloc(ebitmap_node_cachep, GFP_ATOMIC);
+ e_iter = kmem_cache_zalloc(ebitmap_node_cachep,
+ GFP_ATOMIC);
if (e_iter == NULL)
goto netlbl_import_failure;
e_iter->startbit = offset - (offset % EBITMAP_SIZE);
@@ -218,7 +214,8 @@ netlbl_import_failure:
* if last_e2bit is non-zero, the highest set bit in e2 cannot exceed
* last_e2bit.
*/
-int ebitmap_contains(const struct ebitmap *e1, const struct ebitmap *e2, u32 last_e2bit)
+int ebitmap_contains(const struct ebitmap *e1, const struct ebitmap *e2,
+ u32 last_e2bit)
{
const struct ebitmap_node *n1, *n2;
int i;
@@ -234,8 +231,8 @@ int ebitmap_contains(const struct ebitmap *e1, const struct ebitmap *e2, u32 las
n1 = n1->next;
continue;
}
- for (i = EBITMAP_UNIT_NUMS - 1; (i >= 0) && !n2->maps[i]; )
- i--; /* Skip trailing NULL map entries */
+ for (i = EBITMAP_UNIT_NUMS - 1; (i >= 0) && !n2->maps[i];)
+ i--; /* Skip trailing NULL map entries */
if (last_e2bit && (i >= 0)) {
u32 lastsetbit = n2->startbit + i * EBITMAP_UNIT_SIZE +
__fls(n2->maps[i]);
@@ -302,8 +299,8 @@ int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value)
* within the bitmap
*/
if (prev)
- e->highbit = prev->startbit
- + EBITMAP_SIZE;
+ e->highbit = prev->startbit +
+ EBITMAP_SIZE;
else
e->highbit = 0;
}
@@ -424,7 +421,8 @@ int ebitmap_read(struct ebitmap *e, void *fp)
if (!n || startbit >= n->startbit + EBITMAP_SIZE) {
struct ebitmap_node *tmp;
- tmp = kmem_cache_zalloc(ebitmap_node_cachep, GFP_KERNEL);
+ tmp = kmem_cache_zalloc(ebitmap_node_cachep,
+ GFP_KERNEL);
if (!tmp) {
pr_err("SELinux: ebitmap: out of memory\n");
rc = -ENOMEM;
@@ -481,7 +479,8 @@ int ebitmap_write(const struct ebitmap *e, void *fp)
count = 0;
last_bit = 0;
last_startbit = -1;
- ebitmap_for_each_positive_bit(e, n, bit) {
+ ebitmap_for_each_positive_bit(e, n, bit)
+ {
if (rounddown(bit, (int)BITS_PER_U64) > last_startbit) {
count++;
last_startbit = rounddown(bit, BITS_PER_U64);
@@ -497,7 +496,8 @@ int ebitmap_write(const struct ebitmap *e, void *fp)
map = 0;
last_startbit = INT_MIN;
- ebitmap_for_each_positive_bit(e, n, bit) {
+ ebitmap_for_each_positive_bit(e, n, bit)
+ {
if (rounddown(bit, (int)BITS_PER_U64) > last_startbit) {
__le64 buf64[1];
@@ -559,6 +559,6 @@ u32 ebitmap_hash(const struct ebitmap *e, u32 hash)
void __init ebitmap_cache_init(void)
{
ebitmap_node_cachep = kmem_cache_create("ebitmap_node",
- sizeof(struct ebitmap_node),
- 0, SLAB_PANIC, NULL);
+ sizeof(struct ebitmap_node), 0,
+ SLAB_PANIC, NULL);
}
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h
index e3c807cfad90..02798b35eecc 100644
--- a/security/selinux/ss/ebitmap.h
+++ b/security/selinux/ss/ebitmap.h
@@ -12,23 +12,25 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
#ifndef _SS_EBITMAP_H_
#define _SS_EBITMAP_H_
#include <net/netlabel.h>
#ifdef CONFIG_64BIT
-#define EBITMAP_NODE_SIZE 64
+#define EBITMAP_NODE_SIZE 64
#else
-#define EBITMAP_NODE_SIZE 32
+#define EBITMAP_NODE_SIZE 32
#endif
-#define EBITMAP_UNIT_NUMS ((EBITMAP_NODE_SIZE-sizeof(void *)-sizeof(u32))\
- / sizeof(unsigned long))
-#define EBITMAP_UNIT_SIZE BITS_PER_LONG
-#define EBITMAP_SIZE (EBITMAP_UNIT_NUMS * EBITMAP_UNIT_SIZE)
-#define EBITMAP_BIT 1ULL
-#define EBITMAP_SHIFT_UNIT_SIZE(x) \
+#define EBITMAP_UNIT_NUMS \
+ ((EBITMAP_NODE_SIZE - sizeof(void *) - sizeof(u32)) / \
+ sizeof(unsigned long))
+#define EBITMAP_UNIT_SIZE BITS_PER_LONG
+#define EBITMAP_SIZE (EBITMAP_UNIT_NUMS * EBITMAP_UNIT_SIZE)
+#define EBITMAP_BIT 1ULL
+#define EBITMAP_SHIFT_UNIT_SIZE(x) \
(((x) >> EBITMAP_UNIT_SIZE / 2) >> EBITMAP_UNIT_SIZE / 2)
struct ebitmap_node {
@@ -38,8 +40,8 @@ struct ebitmap_node {
};
struct ebitmap {
- struct ebitmap_node *node; /* first node in the bitmap */
- u32 highbit; /* highest position in the total bitmap */
+ struct ebitmap_node *node; /* first node in the bitmap */
+ u32 highbit; /* highest position in the total bitmap */
};
#define ebitmap_length(e) ((e)->highbit)
@@ -80,9 +82,9 @@ static inline unsigned int ebitmap_next_positive(const struct ebitmap *e,
return ebitmap_length(e);
}
-#define EBITMAP_NODE_INDEX(node, bit) \
+#define EBITMAP_NODE_INDEX(node, bit) \
(((bit) - (node)->startbit) / EBITMAP_UNIT_SIZE)
-#define EBITMAP_NODE_OFFSET(node, bit) \
+#define EBITMAP_NODE_OFFSET(node, bit) \
(((bit) - (node)->startbit) % EBITMAP_UNIT_SIZE)
static inline int ebitmap_node_get_bit(const struct ebitmap_node *n,
@@ -117,15 +119,17 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n,
n->maps[index] &= ~(EBITMAP_BIT << ofs);
}
-#define ebitmap_for_each_positive_bit(e, n, bit) \
- for ((bit) = ebitmap_start_positive(e, &(n)); \
- (bit) < ebitmap_length(e); \
- (bit) = ebitmap_next_positive(e, &(n), bit)) \
+#define ebitmap_for_each_positive_bit(e, n, bit) \
+ for ((bit) = ebitmap_start_positive(e, &(n)); \
+ (bit) < ebitmap_length(e); \
+ (bit) = ebitmap_next_positive(e, &(n), bit))
int ebitmap_cmp(const struct ebitmap *e1, const struct ebitmap *e2);
int ebitmap_cpy(struct ebitmap *dst, const struct ebitmap *src);
-int ebitmap_and(struct ebitmap *dst, const struct ebitmap *e1, const struct ebitmap *e2);
-int ebitmap_contains(const struct ebitmap *e1, const struct ebitmap *e2, u32 last_e2bit);
+int ebitmap_and(struct ebitmap *dst, const struct ebitmap *e1,
+ const struct ebitmap *e2);
+int ebitmap_contains(const struct ebitmap *e1, const struct ebitmap *e2,
+ u32 last_e2bit);
int ebitmap_get_bit(const struct ebitmap *e, unsigned long bit);
int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value);
void ebitmap_destroy(struct ebitmap *e);
@@ -151,4 +155,4 @@ static inline int ebitmap_netlbl_import(struct ebitmap *ebmap,
}
#endif
-#endif /* _SS_EBITMAP_H_ */
+#endif /* _SS_EBITMAP_H_ */
diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c
index c05d8346a94a..754bedbde133 100644
--- a/security/selinux/ss/hashtab.c
+++ b/security/selinux/ss/hashtab.c
@@ -4,6 +4,7 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/errno.h>
@@ -47,8 +48,8 @@ int hashtab_init(struct hashtab *h, u32 nel_hint)
return 0;
}
-int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst,
- void *key, void *datum)
+int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst, void *key,
+ void *datum)
{
struct hashtab_node *newnode;
@@ -83,8 +84,7 @@ void hashtab_destroy(struct hashtab *h)
h->htable = NULL;
}
-int hashtab_map(struct hashtab *h,
- int (*apply)(void *k, void *d, void *args),
+int hashtab_map(struct hashtab *h, int (*apply)(void *k, void *d, void *args),
void *args)
{
u32 i;
@@ -137,10 +137,9 @@ void hashtab_stat(struct hashtab *h, struct hashtab_info *info)
#endif /* CONFIG_SECURITY_SELINUX_DEBUG */
int hashtab_duplicate(struct hashtab *new, struct hashtab *orig,
- int (*copy)(struct hashtab_node *new,
- struct hashtab_node *orig, void *args),
- int (*destroy)(void *k, void *d, void *args),
- void *args)
+ int (*copy)(struct hashtab_node *new,
+ struct hashtab_node *orig, void *args),
+ int (*destroy)(void *k, void *d, void *args), void *args)
{
struct hashtab_node *cur, *tmp, *tail;
u32 i;
@@ -178,7 +177,7 @@ int hashtab_duplicate(struct hashtab *new, struct hashtab *orig,
return 0;
- error:
+error:
for (i = 0; i < new->size; i++) {
for (cur = new->htable[i]; cur; cur = tmp) {
tmp = cur->next;
@@ -193,7 +192,7 @@ int hashtab_duplicate(struct hashtab *new, struct hashtab *orig,
void __init hashtab_cache_init(void)
{
- hashtab_node_cachep = kmem_cache_create("hashtab_node",
- sizeof(struct hashtab_node),
- 0, SLAB_PANIC, NULL);
+ hashtab_node_cachep = kmem_cache_create("hashtab_node",
+ sizeof(struct hashtab_node), 0,
+ SLAB_PANIC, NULL);
}
diff --git a/security/selinux/ss/hashtab.h b/security/selinux/ss/hashtab.h
index 09b0a3744937..5f74dcc1360f 100644
--- a/security/selinux/ss/hashtab.h
+++ b/security/selinux/ss/hashtab.h
@@ -8,6 +8,7 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
#ifndef _SS_HASHTAB_H_
#define _SS_HASHTAB_H_
@@ -15,12 +16,11 @@
#include <linux/errno.h>
#include <linux/sched.h>
-#define HASHTAB_MAX_NODES U32_MAX
+#define HASHTAB_MAX_NODES U32_MAX
struct hashtab_key_params {
- u32 (*hash)(const void *key); /* hash function */
- int (*cmp)(const void *key1, const void *key2);
- /* key comparison function */
+ u32 (*hash)(const void *key); /* hash func */
+ int (*cmp)(const void *key1, const void *key2); /* comparison func */
};
struct hashtab_node {
@@ -30,9 +30,9 @@ struct hashtab_node {
};
struct hashtab {
- struct hashtab_node **htable; /* hash table */
- u32 size; /* number of slots in hash table */
- u32 nel; /* number of elements in hash table */
+ struct hashtab_node **htable; /* hash table */
+ u32 size; /* number of slots in hash table */
+ u32 nel; /* number of elements in hash table */
};
struct hashtab_info {
@@ -48,8 +48,8 @@ struct hashtab_info {
*/
int hashtab_init(struct hashtab *h, u32 nel_hint);
-int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst,
- void *key, void *datum);
+int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst, void *key,
+ void *datum);
/*
* Inserts the specified (key, datum) pair into the specified hash table.
@@ -84,8 +84,8 @@ static inline int hashtab_insert(struct hashtab *h, void *key, void *datum,
cur = cur->next;
}
- return __hashtab_insert(h, prev ? &prev->next : &h->htable[hvalue],
- key, datum);
+ return __hashtab_insert(h, prev ? &prev->next : &h->htable[hvalue], key,
+ datum);
}
/*
@@ -133,15 +133,13 @@ void hashtab_destroy(struct hashtab *h);
* iterating through the hash table and will propagate the error
* return to its caller.
*/
-int hashtab_map(struct hashtab *h,
- int (*apply)(void *k, void *d, void *args),
+int hashtab_map(struct hashtab *h, int (*apply)(void *k, void *d, void *args),
void *args);
int hashtab_duplicate(struct hashtab *new, struct hashtab *orig,
- int (*copy)(struct hashtab_node *new,
- struct hashtab_node *orig, void *args),
- int (*destroy)(void *k, void *d, void *args),
- void *args);
+ int (*copy)(struct hashtab_node *new,
+ struct hashtab_node *orig, void *args),
+ int (*destroy)(void *k, void *d, void *args), void *args);
#ifdef CONFIG_SECURITY_SELINUX_DEBUG
/* Fill info with some hash table statistics */
@@ -149,7 +147,8 @@ void hashtab_stat(struct hashtab *h, struct hashtab_info *info);
#else
static inline void hashtab_stat(struct hashtab *h, struct hashtab_info *info)
{
+ return;
}
#endif
-#endif /* _SS_HASHTAB_H */
+#endif /* _SS_HASHTAB_H */
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index cd38f5913b63..989c809d310d 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -4,19 +4,15 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
/*
* Updated: Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
+ * Support for enhanced MLS infrastructure.
+ * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
*
- * Support for enhanced MLS infrastructure.
- *
- * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
- */
-/*
* Updated: Hewlett-Packard <paul@paul-moore.com>
- *
- * Added support to import/export the MLS label from NetLabel
- *
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * Added support to import/export the MLS label from NetLabel
+ * Copyright (C) Hewlett-Packard Development Company, L.P., 2006
*/
#include <linux/kernel.h>
@@ -52,7 +48,8 @@ int mls_compute_context_len(struct policydb *p, struct context *context)
head = -2;
prev = -2;
e = &context->range.level[l].cat;
- ebitmap_for_each_positive_bit(e, node, i) {
+ ebitmap_for_each_positive_bit(e, node, i)
+ {
if (i - prev > 1) {
/* one or more negative bits are skipped */
if (head != prev) {
@@ -86,8 +83,7 @@ int mls_compute_context_len(struct policydb *p, struct context *context)
* the MLS fields of `context' into the string `*scontext'.
* Update `*scontext' to point to the end of the MLS fields.
*/
-void mls_sid_to_context(struct policydb *p,
- struct context *context,
+void mls_sid_to_context(struct policydb *p, struct context *context,
char **scontext)
{
char *scontextp, *nm;
@@ -112,7 +108,8 @@ void mls_sid_to_context(struct policydb *p,
head = -2;
prev = -2;
e = &context->range.level[l].cat;
- ebitmap_for_each_positive_bit(e, node, i) {
+ ebitmap_for_each_positive_bit(e, node, i)
+ {
if (i - prev > 1) {
/* one or more negative bits are skipped */
if (prev != head) {
@@ -230,12 +227,8 @@ int mls_context_isvalid(struct policydb *p, struct context *c)
* Policy read-lock must be held for sidtab lookup.
*
*/
-int mls_context_to_sid(struct policydb *pol,
- char oldc,
- char *scontext,
- struct context *context,
- struct sidtab *s,
- u32 def_sid)
+int mls_context_to_sid(struct policydb *pol, char oldc, char *scontext,
+ struct context *context, struct sidtab *s, u32 def_sid)
{
char *sensitivity, *cur_cat, *next_cat, *rngptr;
struct level_datum *levdatum;
@@ -333,7 +326,8 @@ int mls_context_to_sid(struct policydb *pol,
return -EINVAL;
for (i = catdatum->value; i < rngdatum->value; i++) {
- rc = ebitmap_set_bit(&context->range.level[l].cat, i, 1);
+ rc = ebitmap_set_bit(
+ &context->range.level[l].cat, i, 1);
if (rc)
return rc;
}
@@ -371,8 +365,8 @@ int mls_from_string(struct policydb *p, char *str, struct context *context,
if (!tmpstr) {
rc = -ENOMEM;
} else {
- rc = mls_context_to_sid(p, ':', tmpstr, context,
- NULL, SECSID_NULL);
+ rc = mls_context_to_sid(p, ':', tmpstr, context, NULL,
+ SECSID_NULL);
kfree(tmpstr);
}
@@ -382,8 +376,7 @@ int mls_from_string(struct policydb *p, char *str, struct context *context,
/*
* Copies the MLS range `range' into `context'.
*/
-int mls_range_set(struct context *context,
- struct mls_range *range)
+int mls_range_set(struct context *context, struct mls_range *range)
{
int l, rc = 0;
@@ -399,9 +392,8 @@ int mls_range_set(struct context *context,
return rc;
}
-int mls_setup_user_range(struct policydb *p,
- struct context *fromcon, struct user_datum *user,
- struct context *usercon)
+int mls_setup_user_range(struct policydb *p, struct context *fromcon,
+ struct user_datum *user, struct context *usercon)
{
if (p->mls_enabled) {
struct mls_level *fromcon_sen = &(fromcon->range.level[0]);
@@ -444,10 +436,8 @@ int mls_setup_user_range(struct policydb *p,
* policy `oldp' to the values specified in the policy `newp',
* storing the resulting context in `newc'.
*/
-int mls_convert_context(struct policydb *oldp,
- struct policydb *newp,
- struct context *oldc,
- struct context *newc)
+int mls_convert_context(struct policydb *oldp, struct policydb *newp,
+ struct context *oldc, struct context *newc)
{
struct level_datum *levdatum;
struct cat_datum *catdatum;
@@ -468,8 +458,9 @@ int mls_convert_context(struct policydb *oldp,
return -EINVAL;
newc->range.level[l].sens = levdatum->level->sens;
- ebitmap_for_each_positive_bit(&oldc->range.level[l].cat,
- node, i) {
+ ebitmap_for_each_positive_bit(&oldc->range.level[l].cat, node,
+ i)
+ {
int rc;
catdatum = symtab_search(&newp->p_cats,
@@ -486,13 +477,9 @@ int mls_convert_context(struct policydb *oldp,
return 0;
}
-int mls_compute_sid(struct policydb *p,
- struct context *scontext,
- struct context *tcontext,
- u16 tclass,
- u32 specified,
- struct context *newcontext,
- bool sock)
+int mls_compute_sid(struct policydb *p, struct context *scontext,
+ struct context *tcontext, u16 tclass, u32 specified,
+ struct context *newcontext, bool sock)
{
struct range_trans rtr;
struct mls_range *r;
@@ -532,8 +519,8 @@ int mls_compute_sid(struct policydb *p,
case DEFAULT_TARGET_LOW_HIGH:
return mls_context_cpy(newcontext, tcontext);
case DEFAULT_GLBLUB:
- return mls_context_glblub(newcontext,
- scontext, tcontext);
+ return mls_context_glblub(newcontext, scontext,
+ tcontext);
}
fallthrough;
@@ -563,8 +550,7 @@ int mls_compute_sid(struct policydb *p,
* NetLabel MLS sensitivity level field.
*
*/
-void mls_export_netlbl_lvl(struct policydb *p,
- struct context *context,
+void mls_export_netlbl_lvl(struct policydb *p, struct context *context,
struct netlbl_lsm_secattr *secattr)
{
if (!p->mls_enabled)
@@ -585,8 +571,7 @@ void mls_export_netlbl_lvl(struct policydb *p,
* NetLabel MLS sensitivity level into the context.
*
*/
-void mls_import_netlbl_lvl(struct policydb *p,
- struct context *context,
+void mls_import_netlbl_lvl(struct policydb *p, struct context *context,
struct netlbl_lsm_secattr *secattr)
{
if (!p->mls_enabled)
@@ -607,8 +592,7 @@ void mls_import_netlbl_lvl(struct policydb *p,
* MLS category field. Returns zero on success, negative values on failure.
*
*/
-int mls_export_netlbl_cat(struct policydb *p,
- struct context *context,
+int mls_export_netlbl_cat(struct policydb *p, struct context *context,
struct netlbl_lsm_secattr *secattr)
{
int rc;
@@ -637,8 +621,7 @@ int mls_export_netlbl_cat(struct policydb *p,
* negative values on failure.
*
*/
-int mls_import_netlbl_cat(struct policydb *p,
- struct context *context,
+int mls_import_netlbl_cat(struct policydb *p, struct context *context,
struct netlbl_lsm_secattr *secattr)
{
int rc;
diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h
index 107681dd1824..07980636751f 100644
--- a/security/selinux/ss/mls.h
+++ b/security/selinux/ss/mls.h
@@ -4,19 +4,15 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
/*
* Updated: Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
+ * Support for enhanced MLS infrastructure.
+ * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
*
- * Support for enhanced MLS infrastructure.
- *
- * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
- */
-/*
* Updated: Hewlett-Packard <paul@paul-moore.com>
- *
- * Added support to import/export the MLS label from NetLabel
- *
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * Added support to import/export the MLS label from NetLabel
+ * Copyright (X) Hewlett-Packard Development Company, L.P., 2006
*/
#ifndef _SS_MLS_H_
@@ -35,47 +31,32 @@ int mls_context_isvalid(struct policydb *p, struct context *c);
int mls_range_isvalid(struct policydb *p, struct mls_range *r);
int mls_level_isvalid(struct policydb *p, struct mls_level *l);
-int mls_context_to_sid(struct policydb *p,
- char oldc,
- char *scontext,
- struct context *context,
- struct sidtab *s,
- u32 def_sid);
+int mls_context_to_sid(struct policydb *p, char oldc, char *scontext,
+ struct context *context, struct sidtab *s, u32 def_sid);
int mls_from_string(struct policydb *p, char *str, struct context *context,
gfp_t gfp_mask);
int mls_range_set(struct context *context, struct mls_range *range);
-int mls_convert_context(struct policydb *oldp,
- struct policydb *newp,
- struct context *oldc,
- struct context *newc);
+int mls_convert_context(struct policydb *oldp, struct policydb *newp,
+ struct context *oldc, struct context *newc);
-int mls_compute_sid(struct policydb *p,
- struct context *scontext,
- struct context *tcontext,
- u16 tclass,
- u32 specified,
- struct context *newcontext,
- bool sock);
+int mls_compute_sid(struct policydb *p, struct context *scontext,
+ struct context *tcontext, u16 tclass, u32 specified,
+ struct context *newcontext, bool sock);
-int mls_setup_user_range(struct policydb *p,
- struct context *fromcon, struct user_datum *user,
- struct context *usercon);
+int mls_setup_user_range(struct policydb *p, struct context *fromcon,
+ struct user_datum *user, struct context *usercon);
#ifdef CONFIG_NETLABEL
-void mls_export_netlbl_lvl(struct policydb *p,
- struct context *context,
+void mls_export_netlbl_lvl(struct policydb *p, struct context *context,
struct netlbl_lsm_secattr *secattr);
-void mls_import_netlbl_lvl(struct policydb *p,
- struct context *context,
+void mls_import_netlbl_lvl(struct policydb *p, struct context *context,
struct netlbl_lsm_secattr *secattr);
-int mls_export_netlbl_cat(struct policydb *p,
- struct context *context,
+int mls_export_netlbl_cat(struct policydb *p, struct context *context,
struct netlbl_lsm_secattr *secattr);
-int mls_import_netlbl_cat(struct policydb *p,
- struct context *context,
+int mls_import_netlbl_cat(struct policydb *p, struct context *context,
struct netlbl_lsm_secattr *secattr);
#else
static inline void mls_export_netlbl_lvl(struct policydb *p,
@@ -112,5 +93,4 @@ static inline u32 mls_range_hash(const struct mls_range *r, u32 hash)
return hash;
}
-#endif /* _SS_MLS_H */
-
+#endif /* _SS_MLS_H */
diff --git a/security/selinux/ss/mls_types.h b/security/selinux/ss/mls_types.h
index f492cf148891..7ef6e8cb0cf4 100644
--- a/security/selinux/ss/mls_types.h
+++ b/security/selinux/ss/mls_types.h
@@ -4,12 +4,11 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
/*
* Updated: Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
- *
- * Support for enhanced MLS infrastructure.
- *
- * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
+ * Support for enhanced MLS infrastructure.
+ * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
*/
#ifndef _SS_MLS_TYPES_H_
@@ -19,34 +18,35 @@
#include "ebitmap.h"
struct mls_level {
- u32 sens; /* sensitivity */
- struct ebitmap cat; /* category set */
+ u32 sens; /* sensitivity */
+ struct ebitmap cat; /* category set */
};
struct mls_range {
struct mls_level level[2]; /* low == level[0], high == level[1] */
};
-static inline int mls_level_eq(const struct mls_level *l1, const struct mls_level *l2)
+static inline int mls_level_eq(const struct mls_level *l1,
+ const struct mls_level *l2)
{
- return ((l1->sens == l2->sens) &&
- ebitmap_cmp(&l1->cat, &l2->cat));
+ return ((l1->sens == l2->sens) && ebitmap_cmp(&l1->cat, &l2->cat));
}
-static inline int mls_level_dom(const struct mls_level *l1, const struct mls_level *l2)
+static inline int mls_level_dom(const struct mls_level *l1,
+ const struct mls_level *l2)
{
return ((l1->sens >= l2->sens) &&
ebitmap_contains(&l1->cat, &l2->cat, 0));
}
#define mls_level_incomp(l1, l2) \
-(!mls_level_dom((l1), (l2)) && !mls_level_dom((l2), (l1)))
+ (!mls_level_dom((l1), (l2)) && !mls_level_dom((l2), (l1)))
#define mls_level_between(l1, l2, l3) \
-(mls_level_dom((l1), (l2)) && mls_level_dom((l3), (l1)))
+ (mls_level_dom((l1), (l2)) && mls_level_dom((l3), (l1)))
-#define mls_range_contains(r1, r2) \
-(mls_level_dom(&(r2).level[0], &(r1).level[0]) && \
- mls_level_dom(&(r1).level[1], &(r2).level[1]))
+#define mls_range_contains(r1, r2) \
+ (mls_level_dom(&(r2).level[0], &(r1).level[0]) && \
+ mls_level_dom(&(r1).level[1], &(r2).level[1]))
-#endif /* _SS_MLS_TYPES_H_ */
+#endif /* _SS_MLS_TYPES_H_ */
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 3b19ad28c922..3d22d5baa829 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -7,25 +7,21 @@
/*
* Updated: Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
+ * Support for enhanced MLS infrastructure.
+ * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
*
- * Support for enhanced MLS infrastructure.
- *
- * Updated: Frank Mayer <mayerf@tresys.com> and Karl MacMillan <kmacmillan@tresys.com>
- *
- * Added conditional policy language extensions
+ * Updated: Frank Mayer <mayerf@tresys.com> and
+ * Karl MacMillan <kmacmillan@tresys.com>
+ * Added conditional policy language extensions
+ * Copyright (C) 2003-2004 Tresys Technology, LLC
*
* Updated: Hewlett-Packard <paul@paul-moore.com>
- *
- * Added support for the policy capability bitmap
+ * Added support for the policy capability bitmap
+ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
*
* Update: Mellanox Techonologies
- *
- * Added Infiniband support
- *
- * Copyright (C) 2016 Mellanox Techonologies
- * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
- * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
- * Copyright (C) 2003 - 2004 Tresys Technology, LLC
+ * Added Infiniband support
+ * Copyright (C) 2016 Mellanox Techonologies
*/
#include <linux/kernel.h>
@@ -42,6 +38,7 @@
#include "services.h"
#ifdef CONFIG_SECURITY_SELINUX_DEBUG
+/* clang-format off */
static const char *const symtab_name[SYM_NUM] = {
"common prefixes",
"classes",
@@ -52,6 +49,7 @@ static const char *const symtab_name[SYM_NUM] = {
"levels",
"categories",
};
+/* clang-format off */
#endif
struct policydb_compat_info {
@@ -63,103 +61,104 @@ struct policydb_compat_info {
/* These need to be updated if SYM_NUM or OCON_NUM changes */
static const struct policydb_compat_info policydb_compat[] = {
{
- .version = POLICYDB_VERSION_BASE,
- .sym_num = SYM_NUM - 3,
- .ocon_num = OCON_NUM - 3,
+ .version = POLICYDB_VERSION_BASE,
+ .sym_num = SYM_NUM - 3,
+ .ocon_num = OCON_NUM - 3,
},
{
- .version = POLICYDB_VERSION_BOOL,
- .sym_num = SYM_NUM - 2,
- .ocon_num = OCON_NUM - 3,
+ .version = POLICYDB_VERSION_BOOL,
+ .sym_num = SYM_NUM - 2,
+ .ocon_num = OCON_NUM - 3,
},
{
- .version = POLICYDB_VERSION_IPV6,
- .sym_num = SYM_NUM - 2,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_IPV6,
+ .sym_num = SYM_NUM - 2,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_NLCLASS,
- .sym_num = SYM_NUM - 2,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_NLCLASS,
+ .sym_num = SYM_NUM - 2,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_MLS,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_MLS,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_AVTAB,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_AVTAB,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_RANGETRANS,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_RANGETRANS,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_POLCAP,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_POLCAP,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_PERMISSIVE,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_PERMISSIVE,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_BOUNDARY,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_BOUNDARY,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_FILENAME_TRANS,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_FILENAME_TRANS,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_ROLETRANS,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_ROLETRANS,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_NEW_OBJECT_DEFAULTS,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_NEW_OBJECT_DEFAULTS,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_DEFAULT_TYPE,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_DEFAULT_TYPE,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_CONSTRAINT_NAMES,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_CONSTRAINT_NAMES,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_XPERMS_IOCTL,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM - 2,
+ .version = POLICYDB_VERSION_XPERMS_IOCTL,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM - 2,
},
{
- .version = POLICYDB_VERSION_INFINIBAND,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM,
+ .version = POLICYDB_VERSION_INFINIBAND,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM,
},
{
- .version = POLICYDB_VERSION_GLBLUB,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM,
+ .version = POLICYDB_VERSION_GLBLUB,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM,
},
{
- .version = POLICYDB_VERSION_COMP_FTRANS,
- .sym_num = SYM_NUM,
- .ocon_num = OCON_NUM,
+ .version = POLICYDB_VERSION_COMP_FTRANS,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM,
},
};
-static const struct policydb_compat_info *policydb_lookup_compat(unsigned int version)
+static const struct policydb_compat_info *
+policydb_lookup_compat(unsigned int version)
{
unsigned int i;
@@ -312,7 +311,8 @@ static int cat_destroy(void *key, void *datum, void *p)
return 0;
}
-static int (*const destroy_f[SYM_NUM]) (void *key, void *datum, void *datap) = {
+/* clang-format off */
+static int (*const destroy_f[SYM_NUM])(void *key, void *datum, void *datap) = {
common_destroy,
cls_destroy,
role_destroy,
@@ -322,6 +322,7 @@ static int (*const destroy_f[SYM_NUM]) (void *key, void *datum, void *datap) = {
sens_destroy,
cat_destroy,
};
+/* clang-format on */
static int filenametr_destroy(void *key, void *datum, void *p)
{
@@ -366,8 +367,8 @@ static void ocontext_destroy(struct ocontext *c, unsigned int i)
context_destroy(&c->context[0]);
context_destroy(&c->context[1]);
- if (i == OCON_ISID || i == OCON_FS ||
- i == OCON_NETIF || i == OCON_FSUSE)
+ if (i == OCON_ISID || i == OCON_FS || i == OCON_NETIF ||
+ i == OCON_FSUSE)
kfree(c->u.name);
kfree(c);
}
@@ -429,7 +430,6 @@ static int filenametr_cmp(const void *k1, const void *k2)
return v;
return strcmp(ft1->name, ft2->name);
-
}
static const struct hashtab_key_params filenametr_key_params = {
@@ -437,8 +437,8 @@ static const struct hashtab_key_params filenametr_key_params = {
.cmp = filenametr_cmp,
};
-struct filename_trans_datum *policydb_filenametr_search(
- struct policydb *p, struct filename_trans_key *key)
+struct filename_trans_datum *
+policydb_filenametr_search(struct policydb *p, struct filename_trans_key *key)
{
return hashtab_search(&p->filename_trans, key, filenametr_key_params);
}
@@ -448,7 +448,7 @@ static u32 rangetr_hash(const void *k)
const struct range_trans *key = k;
return key->source_type + (key->target_type << 3) +
- (key->target_class << 5);
+ (key->target_class << 5);
}
static int rangetr_cmp(const void *k1, const void *k2)
@@ -484,7 +484,8 @@ static u32 role_trans_hash(const void *k)
{
const struct role_trans_key *key = k;
- return jhash_3words(key->role, key->type, (u32)key->tclass << 16 | key->tclass, 0);
+ return jhash_3words(key->role, key->type,
+ (u32)key->tclass << 16 | key->tclass, 0);
}
static int role_trans_cmp(const void *k1, const void *k2)
@@ -576,9 +577,8 @@ static int role_index(void *key, void *datum, void *datap)
role = datum;
p = datap;
- if (!role->value
- || role->value > p->p_roles.nprim
- || role->bounds > p->p_roles.nprim)
+ if (!role->value || role->value > p->p_roles.nprim ||
+ role->bounds > p->p_roles.nprim)
return -EINVAL;
p->sym_val_to_name[SYM_ROLES][role->value - 1] = key;
@@ -595,9 +595,8 @@ static int type_index(void *key, void *datum, void *datap)
p = datap;
if (typdatum->primary) {
- if (!typdatum->value
- || typdatum->value > p->p_types.nprim
- || typdatum->bounds > p->p_types.nprim)
+ if (!typdatum->value || typdatum->value > p->p_types.nprim ||
+ typdatum->bounds > p->p_types.nprim)
return -EINVAL;
p->sym_val_to_name[SYM_TYPES][typdatum->value - 1] = key;
p->type_val_to_struct[typdatum->value - 1] = typdatum;
@@ -613,9 +612,8 @@ static int user_index(void *key, void *datum, void *datap)
usrdatum = datum;
p = datap;
- if (!usrdatum->value
- || usrdatum->value > p->p_users.nprim
- || usrdatum->bounds > p->p_users.nprim)
+ if (!usrdatum->value || usrdatum->value > p->p_users.nprim ||
+ usrdatum->bounds > p->p_users.nprim)
return -EINVAL;
p->sym_val_to_name[SYM_USERS][usrdatum->value - 1] = key;
@@ -660,7 +658,8 @@ static int cat_index(void *key, void *datum, void *datap)
return 0;
}
-static int (*const index_f[SYM_NUM]) (void *key, void *datum, void *datap) = {
+/* clang-format off */
+static int (*const index_f[SYM_NUM])(void *key, void *datum, void *datap) = {
common_index,
class_index,
role_index,
@@ -670,6 +669,7 @@ static int (*const index_f[SYM_NUM]) (void *key, void *datum, void *datap) = {
sens_index,
cat_index,
};
+/* clang-format on */
#ifdef CONFIG_SECURITY_SELINUX_DEBUG
static void hash_eval(struct hashtab *h, const char *hash_name)
@@ -677,9 +677,10 @@ static void hash_eval(struct hashtab *h, const char *hash_name)
struct hashtab_info info;
hashtab_stat(h, &info);
- pr_debug("SELinux: %s: %d entries and %d/%d buckets used, longest chain length %d, sum of chain length^2 %llu\n",
- hash_name, h->nel, info.slots_used, h->size,
- info.max_chain_len, info.chain2_len_sum);
+ pr_debug(
+ "SELinux: %s: %d entries and %d/%d buckets used, longest chain length %d, sum of chain length^2 %llu\n",
+ hash_name, h->nel, info.slots_used, h->size, info.max_chain_len,
+ info.chain2_len_sum);
}
static void symtab_hash_eval(struct symtab *s)
@@ -710,16 +711,17 @@ static int policydb_index(struct policydb *p)
int i, rc;
if (p->mls_enabled)
- pr_debug("SELinux: %d users, %d roles, %d types, %d bools, %d sens, %d cats\n",
- p->p_users.nprim, p->p_roles.nprim, p->p_types.nprim,
- p->p_bools.nprim, p->p_levels.nprim, p->p_cats.nprim);
+ pr_debug(
+ "SELinux: %d users, %d roles, %d types, %d bools, %d sens, %d cats\n",
+ p->p_users.nprim, p->p_roles.nprim, p->p_types.nprim,
+ p->p_bools.nprim, p->p_levels.nprim, p->p_cats.nprim);
else
pr_debug("SELinux: %d users, %d roles, %d types, %d bools\n",
p->p_users.nprim, p->p_roles.nprim, p->p_types.nprim,
p->p_bools.nprim);
- pr_debug("SELinux: %d classes, %d rules\n",
- p->p_classes.nprim, p->te_avtab.nel);
+ pr_debug("SELinux: %d classes, %d rules\n", p->p_classes.nprim,
+ p->te_avtab.nel);
avtab_hash_eval(&p->te_avtab, "rules");
symtab_hash_eval(p->symtab);
@@ -730,21 +732,18 @@ static int policydb_index(struct policydb *p)
if (!p->class_val_to_struct)
return -ENOMEM;
- p->role_val_to_struct = kcalloc(p->p_roles.nprim,
- sizeof(*p->role_val_to_struct),
- GFP_KERNEL);
+ p->role_val_to_struct = kcalloc(
+ p->p_roles.nprim, sizeof(*p->role_val_to_struct), GFP_KERNEL);
if (!p->role_val_to_struct)
return -ENOMEM;
- p->user_val_to_struct = kcalloc(p->p_users.nprim,
- sizeof(*p->user_val_to_struct),
- GFP_KERNEL);
+ p->user_val_to_struct = kcalloc(
+ p->p_users.nprim, sizeof(*p->user_val_to_struct), GFP_KERNEL);
if (!p->user_val_to_struct)
return -ENOMEM;
- p->type_val_to_struct = kvcalloc(p->p_types.nprim,
- sizeof(*p->type_val_to_struct),
- GFP_KERNEL);
+ p->type_val_to_struct = kvcalloc(
+ p->p_types.nprim, sizeof(*p->type_val_to_struct), GFP_KERNEL);
if (!p->type_val_to_struct)
return -ENOMEM;
@@ -754,8 +753,7 @@ static int policydb_index(struct policydb *p)
for (i = 0; i < SYM_NUM; i++) {
p->sym_val_to_name[i] = kvcalloc(p->symtab[i].nprim,
- sizeof(char *),
- GFP_KERNEL);
+ sizeof(char *), GFP_KERNEL);
if (!p->sym_val_to_name[i])
return -ENOMEM;
@@ -857,8 +855,7 @@ void policydb_destroy(struct policydb *p)
int policydb_load_isids(struct policydb *p, struct sidtab *s)
{
struct ocontext *head, *c;
- bool isid_init_supported = ebitmap_get_bit(&p->policycaps,
- POLICYDB_CAP_USERSPACE_INITIAL_CONTEXT);
+ bool isid_init;
int rc;
rc = sidtab_init(s);
@@ -867,6 +864,9 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
return rc;
}
+ isid_init = ebitmap_get_bit(&p->policycaps,
+ POLICYDB_CAP_USERSPACE_INITIAL_CONTEXT);
+
head = p->ocontexts[OCON_ISID];
for (c = head; c; c = c->next) {
u32 sid = c->sid[0];
@@ -886,7 +886,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
* Also ignore SECINITSID_INIT if the policy doesn't declare
* support for it
*/
- if (sid == SECINITSID_INIT && !isid_init_supported)
+ if (sid == SECINITSID_INIT && !isid_init)
continue;
rc = sidtab_set_initial(s, sid, &c->context[0]);
@@ -905,8 +905,9 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
* started before policy load would initially get the context
* corresponding to SECINITSID_KERNEL.
*/
- if (sid == SECINITSID_KERNEL && !isid_init_supported) {
- rc = sidtab_set_initial(s, SECINITSID_INIT, &c->context[0]);
+ if (sid == SECINITSID_KERNEL && !isid_init) {
+ rc = sidtab_set_initial(s, SECINITSID_INIT,
+ &c->context[0]);
if (rc) {
pr_err("SELinux: unable to load initial SID %s.\n",
name);
@@ -1047,8 +1048,7 @@ out:
* Read and validate a security context structure
* from a policydb binary representation file.
*/
-static int context_read_and_validate(struct context *c,
- struct policydb *p,
+static int context_read_and_validate(struct context *c, struct policydb *p,
void *fp)
{
__le32 buf[3];
@@ -1211,10 +1211,8 @@ static int type_set_read(struct type_set *t, void *fp)
return 0;
}
-
-static int read_cons_helper(struct policydb *p,
- struct constraint_node **nodep,
- u32 ncons, int allowxtarget, void *fp)
+static int read_cons_helper(struct policydb *p, struct constraint_node **nodep,
+ u32 ncons, int allowxtarget, void *fp)
{
struct constraint_node *c, *lc;
struct constraint_expr *e, *le;
@@ -1284,8 +1282,9 @@ static int read_cons_helper(struct policydb *p,
return rc;
if (p->policyvers >=
POLICYDB_VERSION_CONSTRAINT_NAMES) {
- e->type_names = kzalloc(sizeof
- (*e->type_names), GFP_KERNEL);
+ e->type_names =
+ kzalloc(sizeof(*e->type_names),
+ GFP_KERNEL);
if (!e->type_names)
return -ENOMEM;
type_set_init(e->type_names);
@@ -1319,7 +1318,7 @@ static int class_read(struct policydb *p, struct symtab *s, void *fp)
if (!cladatum)
return -ENOMEM;
- rc = next_entry(buf, fp, sizeof(u32)*6);
+ rc = next_entry(buf, fp, sizeof(u32) * 6);
if (rc)
goto bad;
@@ -1345,8 +1344,8 @@ static int class_read(struct policydb *p, struct symtab *s, void *fp)
goto bad;
rc = -EINVAL;
- cladatum->comdatum = symtab_search(&p->p_commons,
- cladatum->comkey);
+ cladatum->comdatum =
+ symtab_search(&p->p_commons, cladatum->comkey);
if (!cladatum->comdatum) {
pr_err("SELinux: unknown common %s\n",
cladatum->comkey);
@@ -1369,8 +1368,8 @@ static int class_read(struct policydb *p, struct symtab *s, void *fp)
if (rc)
goto bad;
ncons = le32_to_cpu(buf[0]);
- rc = read_cons_helper(p, &cladatum->validatetrans,
- ncons, 1, fp);
+ rc = read_cons_helper(p, &cladatum->validatetrans, ncons, 1,
+ fp);
if (rc)
goto bad;
}
@@ -1507,7 +1506,6 @@ bad:
return rc;
}
-
/*
* Read a MLS level structure from a policydb binary
* representation file.
@@ -1659,8 +1657,9 @@ bad:
return rc;
}
-static int (*const read_f[SYM_NUM]) (struct policydb *p,
- struct symtab *s, void *fp) = {
+/* clang-format off */
+static int (*const read_f[SYM_NUM])(struct policydb *p, struct symtab *s,
+ void *fp) = {
common_read,
class_read,
role_read,
@@ -1670,6 +1669,7 @@ static int (*const read_f[SYM_NUM]) (struct policydb *p,
sens_read,
cat_read,
};
+/* clang-format on */
static int user_bounds_sanity_check(void *key, void *datum, void *datap)
{
@@ -1685,12 +1685,13 @@ static int user_bounds_sanity_check(void *key, void *datum, void *datap)
if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
pr_err("SELinux: user %s: "
"too deep or looped boundary\n",
- (char *) key);
+ (char *)key);
return -EINVAL;
}
upper = p->user_val_to_struct[upper->bounds - 1];
- ebitmap_for_each_positive_bit(&user->roles, node, bit) {
+ ebitmap_for_each_positive_bit(&user->roles, node, bit)
+ {
if (ebitmap_get_bit(&upper->roles, bit))
continue;
@@ -1721,12 +1722,13 @@ static int role_bounds_sanity_check(void *key, void *datum, void *datap)
if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
pr_err("SELinux: role %s: "
"too deep or looped bounds\n",
- (char *) key);
+ (char *)key);
return -EINVAL;
}
upper = p->role_val_to_struct[upper->bounds - 1];
- ebitmap_for_each_positive_bit(&role->types, node, bit) {
+ ebitmap_for_each_positive_bit(&role->types, node, bit)
+ {
if (ebitmap_get_bit(&upper->types, bit))
continue;
@@ -1754,7 +1756,7 @@ static int type_bounds_sanity_check(void *key, void *datum, void *datap)
if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
pr_err("SELinux: type %s: "
"too deep or looped boundary\n",
- (char *) key);
+ (char *)key);
return -EINVAL;
}
@@ -1764,7 +1766,7 @@ static int type_bounds_sanity_check(void *key, void *datum, void *datap)
if (upper->attribute) {
pr_err("SELinux: type %s: "
"bounded by attribute %s\n",
- (char *) key,
+ (char *)key,
sym_name(p, SYM_TYPES, upper->value - 1));
return -EINVAL;
}
@@ -1815,7 +1817,7 @@ u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name)
if (!tclass || tclass > p->p_classes.nprim)
return 0;
- cladatum = p->class_val_to_struct[tclass-1];
+ cladatum = p->class_val_to_struct[tclass - 1];
comdatum = cladatum->comdatum;
if (comdatum)
perdatum = symtab_search(&comdatum->permissions, name);
@@ -1824,7 +1826,7 @@ u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name)
if (!perdatum)
return 0;
- return 1U << (perdatum->value-1);
+ return 1U << (perdatum->value - 1);
}
static int range_read(struct policydb *p, void *fp)
@@ -2192,12 +2194,12 @@ static int genfs_read(struct policydb *p, void *fp)
goto out;
newc->v.sclass = le32_to_cpu(buf[0]);
- rc = context_read_and_validate(&newc->context[0], p, fp);
+ rc = context_read_and_validate(&newc->context[0], p,
+ fp);
if (rc)
goto out;
- for (l = NULL, c = genfs->head; c;
- l = c, c = c->next) {
+ for (l = NULL, c = genfs->head; c; l = c, c = c->next) {
rc = -EINVAL;
if (!strcmp(newc->u.name, c->u.name) &&
(!c->v.sclass || !newc->v.sclass ||
@@ -2231,8 +2233,8 @@ out:
return rc;
}
-static int ocontext_read(struct policydb *p, const struct policydb_compat_info *info,
- void *fp)
+static int ocontext_read(struct policydb *p,
+ const struct policydb_compat_info *info, void *fp)
{
int rc;
unsigned int i;
@@ -2267,7 +2269,8 @@ static int ocontext_read(struct policydb *p, const struct policydb_compat_info *
goto out;
c->sid[0] = le32_to_cpu(buf[0]);
- rc = context_read_and_validate(&c->context[0], p, fp);
+ rc = context_read_and_validate(&c->context[0],
+ p, fp);
if (rc)
goto out;
break;
@@ -2286,21 +2289,24 @@ static int ocontext_read(struct policydb *p, const struct policydb_compat_info *
pr_warn("SELinux: void and deprecated fs ocon %s\n",
c->u.name);
- rc = context_read_and_validate(&c->context[0], p, fp);
+ rc = context_read_and_validate(&c->context[0],
+ p, fp);
if (rc)
goto out;
- rc = context_read_and_validate(&c->context[1], p, fp);
+ rc = context_read_and_validate(&c->context[1],
+ p, fp);
if (rc)
goto out;
break;
case OCON_PORT:
- rc = next_entry(buf, fp, sizeof(u32)*3);
+ rc = next_entry(buf, fp, sizeof(u32) * 3);
if (rc)
goto out;
c->u.port.protocol = le32_to_cpu(buf[0]);
c->u.port.low_port = le32_to_cpu(buf[1]);
c->u.port.high_port = le32_to_cpu(buf[2]);
- rc = context_read_and_validate(&c->context[0], p, fp);
+ rc = context_read_and_validate(&c->context[0],
+ p, fp);
if (rc)
goto out;
break;
@@ -2310,12 +2316,13 @@ static int ocontext_read(struct policydb *p, const struct policydb_compat_info *
goto out;
c->u.node.addr = nodebuf[0]; /* network order */
c->u.node.mask = nodebuf[1]; /* network order */
- rc = context_read_and_validate(&c->context[0], p, fp);
+ rc = context_read_and_validate(&c->context[0],
+ p, fp);
if (rc)
goto out;
break;
case OCON_FSUSE:
- rc = next_entry(buf, fp, sizeof(u32)*2);
+ rc = next_entry(buf, fp, sizeof(u32) * 2);
if (rc)
goto out;
@@ -2332,7 +2339,8 @@ static int ocontext_read(struct policydb *p, const struct policydb_compat_info *
if (rc)
goto out;
- rc = context_read_and_validate(&c->context[0], p, fp);
+ rc = context_read_and_validate(&c->context[0],
+ p, fp);
if (rc)
goto out;
break;
@@ -2345,8 +2353,9 @@ static int ocontext_read(struct policydb *p, const struct policydb_compat_info *
for (k = 0; k < 4; k++)
c->u.node6.addr[k] = nodebuf[k];
for (k = 0; k < 4; k++)
- c->u.node6.mask[k] = nodebuf[k+4];
- rc = context_read_and_validate(&c->context[0], p, fp);
+ c->u.node6.mask[k] = nodebuf[k + 4];
+ rc = context_read_and_validate(&c->context[0],
+ p, fp);
if (rc)
goto out;
break;
@@ -2359,7 +2368,8 @@ static int ocontext_read(struct policydb *p, const struct policydb_compat_info *
goto out;
/* we need to have subnet_prefix in CPU order */
- c->u.ibpkey.subnet_prefix = be64_to_cpu(prefixbuf[0]);
+ c->u.ibpkey.subnet_prefix =
+ be64_to_cpu(prefixbuf[0]);
rc = next_entry(buf, fp, sizeof(u32) * 2);
if (rc)
@@ -2373,12 +2383,11 @@ static int ocontext_read(struct policydb *p, const struct policydb_compat_info *
goto out;
}
- c->u.ibpkey.low_pkey = pkey_lo;
+ c->u.ibpkey.low_pkey = pkey_lo;
c->u.ibpkey.high_pkey = pkey_hi;
rc = context_read_and_validate(&c->context[0],
- p,
- fp);
+ p, fp);
if (rc)
goto out;
break;
@@ -2391,7 +2400,8 @@ static int ocontext_read(struct policydb *p, const struct policydb_compat_info *
goto out;
len = le32_to_cpu(buf[0]);
- rc = str_read(&c->u.ibendport.dev_name, GFP_KERNEL, fp, len);
+ rc = str_read(&c->u.ibendport.dev_name,
+ GFP_KERNEL, fp, len);
if (rc)
goto out;
@@ -2404,8 +2414,7 @@ static int ocontext_read(struct policydb *p, const struct policydb_compat_info *
c->u.ibendport.port = port;
rc = context_read_and_validate(&c->context[0],
- p,
- fp);
+ p, fp);
if (rc)
goto out;
break;
@@ -2462,7 +2471,8 @@ int policydb_read(struct policydb *p, void *fp)
policydb_str = kmalloc(len + 1, GFP_KERNEL);
if (!policydb_str) {
pr_err("SELinux: unable to allocate memory for policydb "
- "string of length %d\n", len);
+ "string of length %d\n",
+ len);
goto bad;
}
@@ -2477,7 +2487,8 @@ int policydb_read(struct policydb *p, void *fp)
policydb_str[len] = '\0';
if (strcmp(policydb_str, POLICYDB_STRING)) {
pr_err("SELinux: policydb string %s does not match "
- "my string %s\n", policydb_str, POLICYDB_STRING);
+ "my string %s\n",
+ policydb_str, POLICYDB_STRING);
kfree(policydb_str);
goto bad;
}
@@ -2486,7 +2497,7 @@ int policydb_read(struct policydb *p, void *fp)
policydb_str = NULL;
/* Read the version and table sizes. */
- rc = next_entry(buf, fp, sizeof(u32)*4);
+ rc = next_entry(buf, fp, sizeof(u32) * 4);
if (rc)
goto bad;
@@ -2496,7 +2507,8 @@ int policydb_read(struct policydb *p, void *fp)
p->policyvers > POLICYDB_VERSION_MAX) {
pr_err("SELinux: policydb version %d does not match "
"my version range %d-%d\n",
- le32_to_cpu(buf[0]), POLICYDB_VERSION_MIN, POLICYDB_VERSION_MAX);
+ le32_to_cpu(buf[0]), POLICYDB_VERSION_MIN,
+ POLICYDB_VERSION_MAX);
goto bad;
}
@@ -2506,8 +2518,8 @@ int policydb_read(struct policydb *p, void *fp)
rc = -EINVAL;
if (p->policyvers < POLICYDB_VERSION_MLS) {
pr_err("SELinux: security policydb version %d "
- "(MLS) not backwards compatible\n",
- p->policyvers);
+ "(MLS) not backwards compatible\n",
+ p->policyvers);
goto bad;
}
}
@@ -2530,22 +2542,23 @@ int policydb_read(struct policydb *p, void *fp)
info = policydb_lookup_compat(p->policyvers);
if (!info) {
pr_err("SELinux: unable to find policy compat info "
- "for version %d\n", p->policyvers);
+ "for version %d\n",
+ p->policyvers);
goto bad;
}
rc = -EINVAL;
if (le32_to_cpu(buf[2]) != info->sym_num ||
- le32_to_cpu(buf[3]) != info->ocon_num) {
+ le32_to_cpu(buf[3]) != info->ocon_num) {
pr_err("SELinux: policydb table sizes (%d,%d) do "
- "not match mine (%d,%d)\n", le32_to_cpu(buf[2]),
- le32_to_cpu(buf[3]),
- info->sym_num, info->ocon_num);
+ "not match mine (%d,%d)\n",
+ le32_to_cpu(buf[2]), le32_to_cpu(buf[3]), info->sym_num,
+ info->ocon_num);
goto bad;
}
for (i = 0; i < info->sym_num; i++) {
- rc = next_entry(buf, fp, sizeof(u32)*2);
+ rc = next_entry(buf, fp, sizeof(u32) * 2);
if (rc)
goto bad;
nprim = le32_to_cpu(buf[0]);
@@ -2606,7 +2619,7 @@ int policydb_read(struct policydb *p, void *fp)
if (!rtd)
goto bad;
- rc = next_entry(buf, fp, sizeof(u32)*3);
+ rc = next_entry(buf, fp, sizeof(u32) * 3);
if (rc)
goto bad;
@@ -2650,7 +2663,7 @@ int policydb_read(struct policydb *p, void *fp)
lra->next = ra;
else
p->role_allow = ra;
- rc = next_entry(buf, fp, sizeof(u32)*2);
+ rc = next_entry(buf, fp, sizeof(u32) * 2);
if (rc)
goto bad;
@@ -2698,9 +2711,8 @@ int policydb_read(struct policydb *p, void *fp)
goto bad;
rc = -ENOMEM;
- p->type_attr_map_array = kvcalloc(p->p_types.nprim,
- sizeof(*p->type_attr_map_array),
- GFP_KERNEL);
+ p->type_attr_map_array = kvcalloc(
+ p->p_types.nprim, sizeof(*p->type_attr_map_array), GFP_KERNEL);
if (!p->type_attr_map_array)
goto bad;
@@ -2773,7 +2785,7 @@ static int mls_write_range_helper(struct mls_range *r, void *fp)
items = 2;
else
items = 3;
- buf[0] = cpu_to_le32(items-1);
+ buf[0] = cpu_to_le32(items - 1);
buf[1] = cpu_to_le32(r->level[0].sens);
if (!eq)
buf[2] = cpu_to_le32(r->level[1].sens);
@@ -2916,8 +2928,7 @@ static int role_allow_write(struct role_allow *r, void *fp)
* Write a security context structure
* to a policydb binary representation file.
*/
-static int context_write(struct policydb *p, struct context *c,
- void *fp)
+static int context_write(struct policydb *p, struct context *c, void *fp)
{
int rc;
__le32 buf[3];
@@ -3045,7 +3056,7 @@ static int write_cons_helper(struct policydb *p, struct constraint_node *node,
if (rc)
return rc;
if (p->policyvers >=
- POLICYDB_VERSION_CONSTRAINT_NAMES) {
+ POLICYDB_VERSION_CONSTRAINT_NAMES) {
rc = type_set_write(e->type_names, fp);
if (rc)
return rc;
@@ -3266,7 +3277,8 @@ static int user_write(void *vkey, void *datum, void *ptr)
return 0;
}
-static int (*const write_f[SYM_NUM]) (void *key, void *datum, void *datap) = {
+/* clang-format off */
+static int (*const write_f[SYM_NUM])(void *key, void *datum, void *datap) = {
common_write,
class_write,
role_write,
@@ -3276,9 +3288,10 @@ static int (*const write_f[SYM_NUM]) (void *key, void *datum, void *datap) = {
sens_write,
cat_write,
};
+/* clang-format on */
-static int ocontext_write(struct policydb *p, const struct policydb_compat_info *info,
- void *fp)
+static int ocontext_write(struct policydb *p,
+ const struct policydb_compat_info *info, void *fp)
{
unsigned int i, j;
int rc;
@@ -3360,9 +3373,13 @@ static int ocontext_write(struct policydb *p, const struct policydb_compat_info
break;
case OCON_NODE6:
for (j = 0; j < 4; j++)
- nodebuf[j] = c->u.node6.addr[j]; /* network order */
+ nodebuf[j] =
+ c->u.node6.addr
+ [j]; /* network order */
for (j = 0; j < 4; j++)
- nodebuf[j + 4] = c->u.node6.mask[j]; /* network order */
+ nodebuf[j + 4] =
+ c->u.node6.mask
+ [j]; /* network order */
rc = put_entry(nodebuf, sizeof(u32), 8, fp);
if (rc)
return rc;
@@ -3372,7 +3389,8 @@ static int ocontext_write(struct policydb *p, const struct policydb_compat_info
break;
case OCON_IBPKEY:
/* subnet_prefix is in CPU order */
- prefixbuf[0] = cpu_to_be64(c->u.ibpkey.subnet_prefix);
+ prefixbuf[0] =
+ cpu_to_be64(c->u.ibpkey.subnet_prefix);
rc = put_entry(prefixbuf, sizeof(u64), 1, fp);
if (rc)
@@ -3395,7 +3413,8 @@ static int ocontext_write(struct policydb *p, const struct policydb_compat_info
rc = put_entry(buf, sizeof(u32), 2, fp);
if (rc)
return rc;
- rc = put_entry(c->u.ibendport.dev_name, 1, len, fp);
+ rc = put_entry(c->u.ibendport.dev_name, 1, len,
+ fp);
if (rc)
return rc;
rc = context_write(p, &c->context[0], fp);
@@ -3521,7 +3540,8 @@ static int filename_write_helper_compat(void *key, void *data, void *ptr)
u32 bit, len = strlen(ft->name);
do {
- ebitmap_for_each_positive_bit(&datum->stypes, node, bit) {
+ ebitmap_for_each_positive_bit(&datum->stypes, node, bit)
+ {
buf[0] = cpu_to_le32(len);
rc = put_entry(buf, sizeof(u32), 1, fp);
if (rc)
@@ -3645,8 +3665,8 @@ int policydb_write(struct policydb *p, void *fp)
*/
if (p->policyvers < POLICYDB_VERSION_AVTAB) {
pr_err("SELinux: refusing to write policy version %d."
- " Because it is less than version %d\n", p->policyvers,
- POLICYDB_VERSION_AVTAB);
+ " Because it is less than version %d\n",
+ p->policyvers, POLICYDB_VERSION_AVTAB);
return -EINVAL;
}
@@ -3674,7 +3694,8 @@ int policydb_write(struct policydb *p, void *fp)
info = policydb_lookup_compat(p->policyvers);
if (!info) {
pr_err("SELinux: compatibility lookup failed for policy "
- "version %d\n", p->policyvers);
+ "version %d\n",
+ p->policyvers);
return -EINVAL;
}
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index b97cda489753..4bba386264a3 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -8,15 +8,13 @@
/*
* Updated: Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
+ * Support for enhanced MLS infrastructure.
+ * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
*
- * Support for enhanced MLS infrastructure.
- *
- * Updated: Frank Mayer <mayerf@tresys.com> and Karl MacMillan <kmacmillan@tresys.com>
- *
- * Added conditional policy language extensions
- *
- * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
- * Copyright (C) 2003 - 2004 Tresys Technology, LLC
+ * Updated: Frank Mayer <mayerf@tresys.com> and
+ * Karl MacMillan <kmacmillan@tresys.com>
+ * Added conditional policy language extensions
+ * Copyright (C) 2003-2004 Tresys Technology, LLC
*/
#ifndef _SS_POLICYDB_H_
@@ -39,104 +37,103 @@
/* Permission attributes */
struct perm_datum {
- u32 value; /* permission bit + 1 */
+ u32 value; /* permission bit + 1 */
};
/* Attributes of a common prefix for access vectors */
struct common_datum {
- u32 value; /* internal common value */
- struct symtab permissions; /* common permissions */
+ u32 value; /* internal common value */
+ struct symtab permissions; /* common permissions */
};
/* Class attributes */
struct class_datum {
- u32 value; /* class value */
- char *comkey; /* common name */
- struct common_datum *comdatum; /* common datum */
- struct symtab permissions; /* class-specific permission symbol table */
- struct constraint_node *constraints; /* constraints on class permissions */
- struct constraint_node *validatetrans; /* special transition rules */
+ u32 value; /* class value */
+ char *comkey; /* common name */
+ struct common_datum *comdatum; /* common datum */
+ struct symtab permissions; /* class-specific permission symbol table */
+ struct constraint_node *constraints; /* constraints on class perms */
+ struct constraint_node *validatetrans; /* special transition rules */
/* Options how a new object user, role, and type should be decided */
-#define DEFAULT_SOURCE 1
-#define DEFAULT_TARGET 2
+#define DEFAULT_SOURCE 1
+#define DEFAULT_TARGET 2
char default_user;
char default_role;
char default_type;
/* Options how a new object range should be decided */
-#define DEFAULT_SOURCE_LOW 1
-#define DEFAULT_SOURCE_HIGH 2
-#define DEFAULT_SOURCE_LOW_HIGH 3
-#define DEFAULT_TARGET_LOW 4
-#define DEFAULT_TARGET_HIGH 5
-#define DEFAULT_TARGET_LOW_HIGH 6
+#define DEFAULT_SOURCE_LOW 1
+#define DEFAULT_SOURCE_HIGH 2
+#define DEFAULT_SOURCE_LOW_HIGH 3
+#define DEFAULT_TARGET_LOW 4
+#define DEFAULT_TARGET_HIGH 5
+#define DEFAULT_TARGET_LOW_HIGH 6
#define DEFAULT_GLBLUB 7
char default_range;
};
/* Role attributes */
struct role_datum {
- u32 value; /* internal role value */
- u32 bounds; /* boundary of role */
- struct ebitmap dominates; /* set of roles dominated by this role */
- struct ebitmap types; /* set of authorized types for role */
+ u32 value; /* internal role value */
+ u32 bounds; /* boundary of role */
+ struct ebitmap dominates; /* set of roles dominated by this role */
+ struct ebitmap types; /* set of authorized types for role */
};
struct role_trans_key {
- u32 role; /* current role */
- u32 type; /* program executable type, or new object type */
- u32 tclass; /* process class, or new object class */
+ u32 role; /* current role */
+ u32 type; /* program executable type, or new object type */
+ u32 tclass; /* process class, or new object class */
};
struct role_trans_datum {
- u32 new_role; /* new role */
+ u32 new_role; /* new role */
};
struct filename_trans_key {
- u32 ttype; /* parent dir context */
- u16 tclass; /* class of new object */
- const char *name; /* last path component */
+ u32 ttype; /* parent dir context */
+ u16 tclass; /* class of new object */
+ const char *name; /* last path component */
};
struct filename_trans_datum {
- struct ebitmap stypes; /* bitmap of source types for this otype */
- u32 otype; /* resulting type of new object */
- struct filename_trans_datum *next; /* record for next otype*/
+ struct ebitmap stypes; /* bitmap of source types for this otype */
+ u32 otype; /* resulting type of new object */
+ struct filename_trans_datum *next; /* record for next otype*/
};
struct role_allow {
- u32 role; /* current role */
- u32 new_role; /* new role */
+ u32 role; /* current role */
+ u32 new_role; /* new role */
struct role_allow *next;
};
/* Type attributes */
struct type_datum {
- u32 value; /* internal type value */
- u32 bounds; /* boundary of type */
- unsigned char primary; /* primary name? */
- unsigned char attribute;/* attribute ?*/
+ u32 value; /* internal type value */
+ u32 bounds; /* boundary of type */
+ unsigned char primary; /* primary name? */
+ unsigned char attribute; /* attribute ?*/
};
/* User attributes */
struct user_datum {
- u32 value; /* internal user value */
- u32 bounds; /* bounds of user */
- struct ebitmap roles; /* set of authorized roles for user */
- struct mls_range range; /* MLS range (min - max) for user */
- struct mls_level dfltlevel; /* default login MLS level for user */
+ u32 value; /* internal user value */
+ u32 bounds; /* bounds of user */
+ struct ebitmap roles; /* set of authorized roles for user */
+ struct mls_range range; /* MLS range (min - max) for user */
+ struct mls_level dfltlevel; /* default login MLS level for user */
};
-
/* Sensitivity attributes */
struct level_datum {
- struct mls_level *level; /* sensitivity and associated categories */
- unsigned char isalias; /* is this sensitivity an alias for another? */
+ struct mls_level *level; /* sensitivity and associated categories */
+ unsigned char isalias; /* is this sensitivity an alias for another? */
};
/* Category attributes */
struct cat_datum {
- u32 value; /* internal category bit + 1 */
- unsigned char isalias; /* is this category an alias for another? */
+ u32 value; /* internal category bit + 1 */
+ unsigned char isalias; /* is this category an alias for another? */
};
struct range_trans {
@@ -147,7 +144,7 @@ struct range_trans {
/* Boolean data type */
struct cond_bool_datum {
- __u32 value; /* internal type value */
+ __u32 value; /* internal type value */
int state;
};
@@ -173,20 +170,20 @@ struct type_set {
*/
struct ocontext {
union {
- char *name; /* name of initial SID, fs, netif, fstype, path */
+ char *name; /* name of initial SID, fs, netif, fstype, path */
struct {
u8 protocol;
u16 low_port;
u16 high_port;
- } port; /* TCP or UDP port information */
+ } port; /* TCP or UDP port information */
struct {
u32 addr;
u32 mask;
- } node; /* node information */
+ } node; /* node information */
struct {
u32 addr[4];
u32 mask[4];
- } node6; /* IPv6 node information */
+ } node6; /* IPv6 node information */
struct {
u64 subnet_prefix;
u16 low_pkey;
@@ -198,11 +195,11 @@ struct ocontext {
} ibendport;
} u;
union {
- u32 sclass; /* security class for genfs */
- u32 behavior; /* labeling behavior for fs_use */
+ u32 sclass; /* security class for genfs */
+ u32 behavior; /* labeling behavior for fs_use */
} v;
- struct context context[2]; /* security context(s) */
- u32 sid[2]; /* SID(s) */
+ struct context context[2]; /* security context(s) */
+ u32 sid[2]; /* SID(s) */
struct ocontext *next;
};
@@ -221,19 +218,19 @@ struct genfs {
#define SYM_BOOLS 5
#define SYM_LEVELS 6
#define SYM_CATS 7
-#define SYM_NUM 8
+#define SYM_NUM 8
/* object context array indices */
-#define OCON_ISID 0 /* initial SIDs */
-#define OCON_FS 1 /* unlabeled file systems (deprecated) */
-#define OCON_PORT 2 /* TCP and UDP port numbers */
-#define OCON_NETIF 3 /* network interfaces */
-#define OCON_NODE 4 /* nodes */
-#define OCON_FSUSE 5 /* fs_use */
-#define OCON_NODE6 6 /* IPv6 nodes */
-#define OCON_IBPKEY 7 /* Infiniband PKeys */
-#define OCON_IBENDPORT 8 /* Infiniband end ports */
-#define OCON_NUM 9
+#define OCON_ISID 0 /* initial SIDs */
+#define OCON_FS 1 /* unlabeled file systems (deprecated) */
+#define OCON_PORT 2 /* TCP and UDP port numbers */
+#define OCON_NETIF 3 /* network interfaces */
+#define OCON_NODE 4 /* nodes */
+#define OCON_FSUSE 5 /* fs_use */
+#define OCON_NODE6 6 /* IPv6 nodes */
+#define OCON_IBPKEY 7 /* Infiniband PKeys */
+#define OCON_IBENDPORT 8 /* Infiniband end ports */
+#define OCON_NUM 9
/* The policy database */
struct policydb {
@@ -243,15 +240,15 @@ struct policydb {
struct symtab symtab[SYM_NUM];
#define p_commons symtab[SYM_COMMONS]
#define p_classes symtab[SYM_CLASSES]
-#define p_roles symtab[SYM_ROLES]
-#define p_types symtab[SYM_TYPES]
-#define p_users symtab[SYM_USERS]
-#define p_bools symtab[SYM_BOOLS]
-#define p_levels symtab[SYM_LEVELS]
-#define p_cats symtab[SYM_CATS]
+#define p_roles symtab[SYM_ROLES]
+#define p_types symtab[SYM_TYPES]
+#define p_users symtab[SYM_USERS]
+#define p_bools symtab[SYM_BOOLS]
+#define p_levels symtab[SYM_LEVELS]
+#define p_cats symtab[SYM_CATS]
/* symbol names indexed by (value - 1) */
- char **sym_val_to_name[SYM_NUM];
+ char **sym_val_to_name[SYM_NUM];
/* class, role, and user attributes indexed by (value - 1) */
struct class_datum **class_val_to_struct;
@@ -324,25 +321,25 @@ extern int policydb_role_isvalid(struct policydb *p, unsigned int role);
extern int policydb_read(struct policydb *p, void *fp);
extern int policydb_write(struct policydb *p, void *fp);
-extern struct filename_trans_datum *policydb_filenametr_search(
- struct policydb *p, struct filename_trans_key *key);
+extern struct filename_trans_datum *
+policydb_filenametr_search(struct policydb *p, struct filename_trans_key *key);
-extern struct mls_range *policydb_rangetr_search(
- struct policydb *p, struct range_trans *key);
+extern struct mls_range *policydb_rangetr_search(struct policydb *p,
+ struct range_trans *key);
-extern struct role_trans_datum *policydb_roletr_search(
- struct policydb *p, struct role_trans_key *key);
+extern struct role_trans_datum *
+policydb_roletr_search(struct policydb *p, struct role_trans_key *key);
-#define POLICYDB_CONFIG_MLS 1
+#define POLICYDB_CONFIG_MLS 1
/* the config flags related to unknown classes/perms are bits 2 and 3 */
-#define REJECT_UNKNOWN 0x00000002
-#define ALLOW_UNKNOWN 0x00000004
+#define REJECT_UNKNOWN 0x00000002
+#define ALLOW_UNKNOWN 0x00000004
-#define OBJECT_R "object_r"
+#define OBJECT_R "object_r"
#define OBJECT_R_VAL 1
-#define POLICYDB_MAGIC SELINUX_MAGIC
+#define POLICYDB_MAGIC SELINUX_MAGIC
#define POLICYDB_STRING "SE Linux"
struct policy_file {
@@ -366,7 +363,8 @@ static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes)
return 0;
}
-static inline int put_entry(const void *buf, size_t bytes, size_t num, struct policy_file *fp)
+static inline int put_entry(const void *buf, size_t bytes, size_t num,
+ struct policy_file *fp)
{
size_t len;
@@ -382,7 +380,8 @@ static inline int put_entry(const void *buf, size_t bytes, size_t num, struct po
return 0;
}
-static inline char *sym_name(struct policydb *p, unsigned int sym_num, unsigned int element_nr)
+static inline char *sym_name(struct policydb *p, unsigned int sym_num,
+ unsigned int element_nr)
{
return p->sym_val_to_name[sym_num][element_nr];
}
@@ -390,5 +389,4 @@ static inline char *sym_name(struct policydb *p, unsigned int sym_num, unsigned
extern u16 string_to_security_class(struct policydb *p, const char *name);
extern u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name);
-#endif /* _SS_POLICYDB_H_ */
-
+#endif /* _SS_POLICYDB_H_ */
diff --git a/security/selinux/ss/services.h b/security/selinux/ss/services.h
index d24b0a3d198e..93358e7a649c 100644
--- a/security/selinux/ss/services.h
+++ b/security/selinux/ss/services.h
@@ -4,6 +4,7 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
#ifndef _SS_SERVICES_H_
#define _SS_SERVICES_H_
@@ -43,4 +44,4 @@ int services_convert_context(struct convert_context_args *args,
struct context *oldc, struct context *newc,
gfp_t gfp_flags);
-#endif /* _SS_SERVICES_H_ */
+#endif /* _SS_SERVICES_H_ */
diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c
index 732fd8e22a12..c8848cbba81f 100644
--- a/security/selinux/ss/sidtab.c
+++ b/security/selinux/ss/sidtab.c
@@ -7,6 +7,7 @@
*
* Copyright (C) 2018 Red Hat, Inc.
*/
+
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/list.h>
@@ -29,7 +30,7 @@ struct sidtab_str_cache {
};
#define index_to_sid(index) ((index) + SECINITSID_NUM + 1)
-#define sid_to_index(sid) ((sid) - (SECINITSID_NUM + 1))
+#define sid_to_index(sid) ((sid) - (SECINITSID_NUM + 1))
int sidtab_init(struct sidtab *s)
{
@@ -140,9 +141,11 @@ int sidtab_hash_stats(struct sidtab *sidtab, char *page)
if (chain_len > max_chain_len)
max_chain_len = chain_len;
- return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n"
- "longest chain: %d\n", entries,
- slots_used, SIDTAB_HASH_BUCKETS, max_chain_len);
+ return scnprintf(page, PAGE_SIZE,
+ "entries: %d\nbuckets used: %d/%d\n"
+ "longest chain: %d\n",
+ entries, slots_used, SIDTAB_HASH_BUCKETS,
+ max_chain_len);
}
static u32 sidtab_level_from_count(u32 count)
@@ -162,15 +165,15 @@ static int sidtab_alloc_roots(struct sidtab *s, u32 level)
u32 l;
if (!s->roots[0].ptr_leaf) {
- s->roots[0].ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
- GFP_ATOMIC);
+ s->roots[0].ptr_leaf =
+ kzalloc(SIDTAB_NODE_ALLOC_SIZE, GFP_ATOMIC);
if (!s->roots[0].ptr_leaf)
return -ENOMEM;
}
for (l = 1; l <= level; ++l)
if (!s->roots[l].ptr_inner) {
- s->roots[l].ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
- GFP_ATOMIC);
+ s->roots[l].ptr_inner =
+ kzalloc(SIDTAB_NODE_ALLOC_SIZE, GFP_ATOMIC);
if (!s->roots[l].ptr_inner)
return -ENOMEM;
s->roots[l].ptr_inner->entries[0] = s->roots[l - 1];
@@ -203,16 +206,16 @@ static struct sidtab_entry *sidtab_do_lookup(struct sidtab *s, u32 index,
if (!entry->ptr_inner) {
if (alloc)
- entry->ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
- GFP_ATOMIC);
+ entry->ptr_inner = kzalloc(
+ SIDTAB_NODE_ALLOC_SIZE, GFP_ATOMIC);
if (!entry->ptr_inner)
return NULL;
}
}
if (!entry->ptr_leaf) {
if (alloc)
- entry->ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
- GFP_ATOMIC);
+ entry->ptr_leaf =
+ kzalloc(SIDTAB_NODE_ALLOC_SIZE, GFP_ATOMIC);
if (!entry->ptr_leaf)
return NULL;
}
@@ -262,8 +265,7 @@ struct sidtab_entry *sidtab_search_entry_force(struct sidtab *s, u32 sid)
return sidtab_search_core(s, sid, 1);
}
-int sidtab_context_to_sid(struct sidtab *s, struct context *context,
- u32 *sid)
+int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid)
{
unsigned long flags;
u32 count, hash = context_compute_hash(context);
@@ -327,8 +329,8 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context,
goto out_unlock;
}
- rc = services_convert_context(convert->args,
- context, &dst_convert->context,
+ rc = services_convert_context(convert->args, context,
+ &dst_convert->context,
GFP_ATOMIC);
if (rc) {
context_destroy(&dst->context);
@@ -338,8 +340,8 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context,
dst_convert->hash = context_compute_hash(&dst_convert->context);
target->count = count + 1;
- hash_add_rcu(target->context_to_sid,
- &dst_convert->list, dst_convert->hash);
+ hash_add_rcu(target->context_to_sid, &dst_convert->list,
+ dst_convert->hash);
}
if (context->len)
@@ -373,8 +375,8 @@ static void sidtab_convert_hashtable(struct sidtab *s, u32 count)
}
static int sidtab_convert_tree(union sidtab_entry_inner *edst,
- union sidtab_entry_inner *esrc,
- u32 *pos, u32 count, u32 level,
+ union sidtab_entry_inner *esrc, u32 *pos,
+ u32 count, u32 level,
struct sidtab_convert_params *convert)
{
int rc;
@@ -382,8 +384,8 @@ static int sidtab_convert_tree(union sidtab_entry_inner *edst,
if (level != 0) {
if (!edst->ptr_inner) {
- edst->ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
- GFP_KERNEL);
+ edst->ptr_inner =
+ kzalloc(SIDTAB_NODE_ALLOC_SIZE, GFP_KERNEL);
if (!edst->ptr_inner)
return -ENOMEM;
}
@@ -399,17 +401,18 @@ static int sidtab_convert_tree(union sidtab_entry_inner *edst,
}
} else {
if (!edst->ptr_leaf) {
- edst->ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
- GFP_KERNEL);
+ edst->ptr_leaf =
+ kzalloc(SIDTAB_NODE_ALLOC_SIZE, GFP_KERNEL);
if (!edst->ptr_leaf)
return -ENOMEM;
}
i = 0;
while (i < SIDTAB_LEAF_ENTRIES && *pos < count) {
- rc = services_convert_context(convert->args,
- &esrc->ptr_leaf->entries[i].context,
- &edst->ptr_leaf->entries[i].context,
- GFP_KERNEL);
+ rc = services_convert_context(
+ convert->args,
+ &esrc->ptr_leaf->entries[i].context,
+ &edst->ptr_leaf->entries[i].context,
+ GFP_KERNEL);
if (rc)
return rc;
(*pos)++;
@@ -489,13 +492,15 @@ void sidtab_cancel_convert(struct sidtab *s)
spin_unlock_irqrestore(&s->lock, flags);
}
-void sidtab_freeze_begin(struct sidtab *s, unsigned long *flags) __acquires(&s->lock)
+void sidtab_freeze_begin(struct sidtab *s, unsigned long *flags)
+ __acquires(&s->lock)
{
spin_lock_irqsave(&s->lock, *flags);
s->frozen = true;
s->convert = NULL;
}
-void sidtab_freeze_end(struct sidtab *s, unsigned long *flags) __releases(&s->lock)
+void sidtab_freeze_end(struct sidtab *s, unsigned long *flags)
+ __releases(&s->lock)
{
spin_unlock_irqrestore(&s->lock, *flags);
}
@@ -600,8 +605,8 @@ out_unlock:
kfree_rcu(victim, rcu_member);
}
-int sidtab_sid2str_get(struct sidtab *s, struct sidtab_entry *entry,
- char **out, u32 *out_len)
+int sidtab_sid2str_get(struct sidtab *s, struct sidtab_entry *entry, char **out,
+ u32 *out_len)
{
struct sidtab_str_cache *cache;
int rc = 0;
diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h
index 22258201cd14..832c85c70d83 100644
--- a/security/selinux/ss/sidtab.h
+++ b/security/selinux/ss/sidtab.h
@@ -8,6 +8,7 @@
*
* Copyright (C) 2018 Red Hat, Inc.
*/
+
#ifndef _SS_SIDTAB_H_
#define _SS_SIDTAB_H_
@@ -29,25 +30,26 @@ struct sidtab_entry {
union sidtab_entry_inner {
struct sidtab_node_inner *ptr_inner;
- struct sidtab_node_leaf *ptr_leaf;
+ struct sidtab_node_leaf *ptr_leaf;
};
/* align node size to page boundary */
#define SIDTAB_NODE_ALLOC_SHIFT PAGE_SHIFT
-#define SIDTAB_NODE_ALLOC_SIZE PAGE_SIZE
+#define SIDTAB_NODE_ALLOC_SIZE PAGE_SIZE
-#define size_to_shift(size) ((size) == 1 ? 1 : (const_ilog2((size) - 1) + 1))
+#define size_to_shift(size) ((size) == 1 ? 1 : (const_ilog2((size)-1) + 1))
-#define SIDTAB_INNER_SHIFT \
- (SIDTAB_NODE_ALLOC_SHIFT - size_to_shift(sizeof(union sidtab_entry_inner)))
+#define SIDTAB_INNER_SHIFT \
+ (SIDTAB_NODE_ALLOC_SHIFT - \
+ size_to_shift(sizeof(union sidtab_entry_inner)))
#define SIDTAB_INNER_ENTRIES ((size_t)1 << SIDTAB_INNER_SHIFT)
#define SIDTAB_LEAF_ENTRIES \
(SIDTAB_NODE_ALLOC_SIZE / sizeof(struct sidtab_entry))
#define SIDTAB_MAX_BITS 32
-#define SIDTAB_MAX U32_MAX
+#define SIDTAB_MAX U32_MAX
/* ensure enough tree levels for SIDTAB_MAX entries */
-#define SIDTAB_MAX_LEVEL \
+#define SIDTAB_MAX_LEVEL \
DIV_ROUND_UP(SIDTAB_MAX_BITS - size_to_shift(SIDTAB_LEAF_ENTRIES), \
SIDTAB_INNER_SHIFT)
@@ -69,7 +71,7 @@ struct sidtab_convert_params {
struct sidtab *target;
};
-#define SIDTAB_HASH_BITS CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS
+#define SIDTAB_HASH_BITS CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS
#define SIDTAB_HASH_BUCKETS (1 << SIDTAB_HASH_BITS)
struct sidtab {
@@ -125,8 +127,10 @@ int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params);
void sidtab_cancel_convert(struct sidtab *s);
-void sidtab_freeze_begin(struct sidtab *s, unsigned long *flags) __acquires(&s->lock);
-void sidtab_freeze_end(struct sidtab *s, unsigned long *flags) __releases(&s->lock);
+void sidtab_freeze_begin(struct sidtab *s, unsigned long *flags)
+ __acquires(&s->lock);
+void sidtab_freeze_end(struct sidtab *s, unsigned long *flags)
+ __releases(&s->lock);
int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid);
@@ -137,8 +141,8 @@ int sidtab_hash_stats(struct sidtab *sidtab, char *page);
#if CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE > 0
void sidtab_sid2str_put(struct sidtab *s, struct sidtab_entry *entry,
const char *str, u32 str_len);
-int sidtab_sid2str_get(struct sidtab *s, struct sidtab_entry *entry,
- char **out, u32 *out_len);
+int sidtab_sid2str_get(struct sidtab *s, struct sidtab_entry *entry, char **out,
+ u32 *out_len);
#else
static inline void sidtab_sid2str_put(struct sidtab *s,
struct sidtab_entry *entry,
@@ -146,13 +150,11 @@ static inline void sidtab_sid2str_put(struct sidtab *s,
{
}
static inline int sidtab_sid2str_get(struct sidtab *s,
- struct sidtab_entry *entry,
- char **out, u32 *out_len)
+ struct sidtab_entry *entry, char **out,
+ u32 *out_len)
{
return -ENOENT;
}
#endif /* CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE > 0 */
-#endif /* _SS_SIDTAB_H_ */
-
-
+#endif /* _SS_SIDTAB_H_ */
diff --git a/security/selinux/ss/symtab.c b/security/selinux/ss/symtab.c
index 43d7f0319ccd..c04f8d447873 100644
--- a/security/selinux/ss/symtab.c
+++ b/security/selinux/ss/symtab.c
@@ -4,6 +4,7 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
@@ -19,7 +20,8 @@ static unsigned int symhash(const void *key)
keyp = key;
size = strlen(keyp);
for (p = keyp; (p - keyp) < size; p++)
- val = (val << 4 | (val >> (8*sizeof(unsigned int)-4))) ^ (*p);
+ val = (val << 4 | (val >> (8 * sizeof(unsigned int) - 4))) ^
+ (*p);
return val;
}
diff --git a/security/selinux/ss/symtab.h b/security/selinux/ss/symtab.h
index 0a3b5de79a0f..8e667cdbf38f 100644
--- a/security/selinux/ss/symtab.h
+++ b/security/selinux/ss/symtab.h
@@ -7,14 +7,15 @@
*
* Author : Stephen Smalley, <stephen.smalley.work@gmail.com>
*/
+
#ifndef _SS_SYMTAB_H_
#define _SS_SYMTAB_H_
#include "hashtab.h"
struct symtab {
- struct hashtab table; /* hash table (keyed on a string) */
- u32 nprim; /* number of primary names in table */
+ struct hashtab table; /* hash table (keyed on a string) */
+ u32 nprim; /* number of primary names in table */
};
int symtab_init(struct symtab *s, u32 size);
@@ -22,6 +23,4 @@ int symtab_init(struct symtab *s, u32 size);
int symtab_insert(struct symtab *s, char *name, void *datum);
void *symtab_search(struct symtab *s, const char *name);
-#endif /* _SS_SYMTAB_H_ */
-
-
+#endif /* _SS_SYMTAB_H_ */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 0fdbf04cc258..28be26712396 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -994,57 +994,62 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir,
struct xattr *xattrs, int *xattr_count)
{
struct task_smack *tsp = smack_cred(current_cred());
+ struct inode_smack *issp = smack_inode(inode);
struct smack_known *skp = smk_of_task(tsp);
struct smack_known *isp = smk_of_inode(inode);
struct smack_known *dsp = smk_of_inode(dir);
struct xattr *xattr = lsm_get_xattr_slot(xattrs, xattr_count);
int may;
- if (xattr) {
- /*
- * If equal, transmuting already occurred in
- * smack_dentry_create_files_as(). No need to check again.
- */
- if (tsp->smk_task != tsp->smk_transmuted) {
- rcu_read_lock();
- may = smk_access_entry(skp->smk_known, dsp->smk_known,
- &skp->smk_rules);
- rcu_read_unlock();
- }
+ /*
+ * If equal, transmuting already occurred in
+ * smack_dentry_create_files_as(). No need to check again.
+ */
+ if (tsp->smk_task != tsp->smk_transmuted) {
+ rcu_read_lock();
+ may = smk_access_entry(skp->smk_known, dsp->smk_known,
+ &skp->smk_rules);
+ rcu_read_unlock();
+ }
+
+ /*
+ * In addition to having smk_task equal to smk_transmuted,
+ * if the access rule allows transmutation and the directory
+ * requests transmutation then by all means transmute.
+ * Mark the inode as changed.
+ */
+ if ((tsp->smk_task == tsp->smk_transmuted) ||
+ (may > 0 && ((may & MAY_TRANSMUTE) != 0) &&
+ smk_inode_transmutable(dir))) {
+ struct xattr *xattr_transmute;
/*
- * In addition to having smk_task equal to smk_transmuted,
- * if the access rule allows transmutation and the directory
- * requests transmutation then by all means transmute.
- * Mark the inode as changed.
+ * The caller of smack_dentry_create_files_as()
+ * should have overridden the current cred, so the
+ * inode label was already set correctly in
+ * smack_inode_alloc_security().
*/
- if ((tsp->smk_task == tsp->smk_transmuted) ||
- (may > 0 && ((may & MAY_TRANSMUTE) != 0) &&
- smk_inode_transmutable(dir))) {
- struct xattr *xattr_transmute;
+ if (tsp->smk_task != tsp->smk_transmuted)
+ isp = issp->smk_inode = dsp;
+
+ issp->smk_flags |= SMK_INODE_TRANSMUTE;
+ xattr_transmute = lsm_get_xattr_slot(xattrs,
+ xattr_count);
+ if (xattr_transmute) {
+ xattr_transmute->value = kmemdup(TRANS_TRUE,
+ TRANS_TRUE_SIZE,
+ GFP_NOFS);
+ if (!xattr_transmute->value)
+ return -ENOMEM;
- /*
- * The caller of smack_dentry_create_files_as()
- * should have overridden the current cred, so the
- * inode label was already set correctly in
- * smack_inode_alloc_security().
- */
- if (tsp->smk_task != tsp->smk_transmuted)
- isp = dsp;
- xattr_transmute = lsm_get_xattr_slot(xattrs,
- xattr_count);
- if (xattr_transmute) {
- xattr_transmute->value = kmemdup(TRANS_TRUE,
- TRANS_TRUE_SIZE,
- GFP_NOFS);
- if (!xattr_transmute->value)
- return -ENOMEM;
-
- xattr_transmute->value_len = TRANS_TRUE_SIZE;
- xattr_transmute->name = XATTR_SMACK_TRANSMUTE;
- }
+ xattr_transmute->value_len = TRANS_TRUE_SIZE;
+ xattr_transmute->name = XATTR_SMACK_TRANSMUTE;
}
+ }
+
+ issp->smk_flags |= SMK_INODE_INSTANT;
+ if (xattr) {
xattr->value = kstrdup(isp->smk_known, GFP_NOFS);
if (!xattr->value)
return -ENOMEM;
@@ -1233,12 +1238,14 @@ static int smack_inode_permission(struct inode *inode, int mask)
/**
* smack_inode_setattr - Smack check for setting attributes
+ * @idmap: idmap of the mount
* @dentry: the object
* @iattr: for the force flag
*
* Returns 0 if access is permitted, an error code otherwise
*/
-static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
+static int smack_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *iattr)
{
struct smk_audit_info ad;
int rc;
@@ -1314,7 +1321,8 @@ static int smack_inode_setxattr(struct mnt_idmap *idmap,
check_star = 1;
} else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) {
check_priv = 1;
- if (size != TRANS_TRUE_SIZE ||
+ if (!S_ISDIR(d_backing_inode(dentry)->i_mode) ||
+ size != TRANS_TRUE_SIZE ||
strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0)
rc = -EINVAL;
} else
@@ -2095,12 +2103,7 @@ static void smack_cred_transfer(struct cred *new, const struct cred *old)
struct task_smack *old_tsp = smack_cred(old);
struct task_smack *new_tsp = smack_cred(new);
- new_tsp->smk_task = old_tsp->smk_task;
- new_tsp->smk_forked = old_tsp->smk_task;
- mutex_init(&new_tsp->smk_rules_lock);
- INIT_LIST_HEAD(&new_tsp->smk_rules);
-
- /* cbs copy rule list */
+ init_task_smack(new_tsp, old_tsp->smk_task, old_tsp->smk_task);
}
/**
@@ -2855,6 +2858,15 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name,
if (value == NULL || size > SMK_LONGLABEL || size == 0)
return -EINVAL;
+ if (strcmp(name, XATTR_SMACK_TRANSMUTE) == 0) {
+ if (!S_ISDIR(inode->i_mode) || size != TRANS_TRUE_SIZE ||
+ strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0)
+ return -EINVAL;
+
+ nsp->smk_flags |= SMK_INODE_TRANSMUTE;
+ return 0;
+ }
+
skp = smk_import_entry(value, size);
if (IS_ERR(skp))
return PTR_ERR(skp);
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index 57ee70ae50f2..ea3140d510ec 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -2649,13 +2649,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
{
int error = buffer_len;
size_t avail_len = buffer_len;
- char *cp0 = head->write_buf;
+ char *cp0;
int idx;
if (!head->write)
return -EINVAL;
if (mutex_lock_interruptible(&head->io_sem))
return -EINTR;
+ cp0 = head->write_buf;
head->read_user_buf_avail = 0;
idx = tomoyo_read_lock();
/* Read a line and dispatch it to the policy handler. */
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 3c3af149bf1c..04a92c3d65d4 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -328,7 +328,8 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
static int tomoyo_file_open(struct file *f)
{
/* Don't check read permission here if called from execve(). */
- if (current->in_execve)
+ /* Illogically, FMODE_EXEC is in f_flags, not f_mode. */
+ if (f->f_flags & __FMODE_EXEC)
return 0;
return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path,
f->f_flags);
diff --git a/sound/core/Makefile b/sound/core/Makefile
index a6b444ee2832..f6526b337137 100644
--- a/sound/core/Makefile
+++ b/sound/core/Makefile
@@ -32,7 +32,6 @@ snd-ump-objs := ump.o
snd-ump-$(CONFIG_SND_UMP_LEGACY_RAWMIDI) += ump_convert.o
snd-timer-objs := timer.o
snd-hrtimer-objs := hrtimer.o
-snd-rtctimer-objs := rtctimer.o
snd-hwdep-objs := hwdep.o
snd-seq-device-objs := seq_device.o
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index a09f0154e6a7..d0788126cbab 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -211,6 +211,10 @@ static const char * const snd_pcm_format_names[] = {
FORMAT(DSD_U32_LE),
FORMAT(DSD_U16_BE),
FORMAT(DSD_U32_BE),
+ FORMAT(S20_LE),
+ FORMAT(S20_BE),
+ FORMAT(U20_LE),
+ FORMAT(U20_BE),
};
/**
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index f5ff00f99788..21baf6bf7e25 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -486,6 +486,11 @@ static int fixup_unreferenced_params(struct snd_pcm_substream *substream,
i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_SAMPLE_BITS);
if (snd_interval_single(i))
params->msbits = snd_interval_value(i);
+ m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT);
+ if (snd_mask_single(m)) {
+ snd_pcm_format_t format = (__force snd_pcm_format_t)snd_mask_min(m);
+ params->msbits = snd_pcm_format_width(format);
+ }
}
if (params->msbits) {
diff --git a/sound/core/ump.c b/sound/core/ump.c
index 3bef1944e955..fe7911498cc4 100644
--- a/sound/core/ump.c
+++ b/sound/core/ump.c
@@ -985,7 +985,7 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream)
struct snd_ump_endpoint *ump = substream->rmidi->private_data;
int dir = substream->stream;
int group = ump->legacy_mapping[substream->number];
- int err;
+ int err = 0;
mutex_lock(&ump->open_mutex);
if (ump->legacy_substreams[dir][group]) {
@@ -1009,7 +1009,7 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream)
spin_unlock_irq(&ump->legacy_locks[dir]);
unlock:
mutex_unlock(&ump->open_mutex);
- return 0;
+ return err;
}
static int snd_ump_legacy_close(struct snd_rawmidi_substream *substream)
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index a13c0b408aad..7be17bca257f 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -951,7 +951,7 @@ static int generate_tx_packet_descs(struct amdtp_stream *s, struct pkt_desc *des
// to the reason.
unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle,
IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES);
- lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0);
+ lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0);
}
if (lost) {
dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n",
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index 21a90b3c4cc7..8e0ff70fb610 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -156,7 +156,7 @@ config SND_HDA_SCODEC_CS35L56_I2C
depends on I2C
depends on ACPI || COMPILE_TEST
depends on SND_SOC
- select CS_DSP
+ select FW_CS_DSP
select SND_HDA_GENERIC
select SND_SOC_CS35L56_SHARED
select SND_HDA_SCODEC_CS35L56
@@ -171,7 +171,7 @@ config SND_HDA_SCODEC_CS35L56_SPI
depends on SPI_MASTER
depends on ACPI || COMPILE_TEST
depends on SND_SOC
- select CS_DSP
+ select FW_CS_DSP
select SND_HDA_GENERIC
select SND_SOC_CS35L56_SHARED
select SND_HDA_SCODEC_CS35L56
diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c
index 35277ce890a4..e436d4dab317 100644
--- a/sound/pci/hda/cs35l41_hda_property.c
+++ b/sound/pci/hda/cs35l41_hda_property.c
@@ -76,11 +76,14 @@ static const struct cs35l41_config cs35l41_config_table[] = {
{ "10431533", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
{ "10431573", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
{ "10431663", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 },
+ { "10431683", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
+ { "104316A3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
{ "104316D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
{ "104316F3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
{ "104317F3", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
{ "10431863", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
{ "104318D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
+ { "10431A83", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
{ "10431C9F", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
{ "10431CAF", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
{ "10431CCF", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
@@ -89,10 +92,14 @@ static const struct cs35l41_config cs35l41_config_table[] = {
{ "10431D1F", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
{ "10431DA2", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
{ "10431E02", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
+ { "10431E12", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
{ "10431EE2", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
{ "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
{ "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 },
{ "10431F62", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
+ { "17AA386F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
+ { "17AA38A9", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
+ { "17AA38AB", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
{ "17AA38B4", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
{ "17AA38B5", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
{ "17AA38B6", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
@@ -410,11 +417,14 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
{ "CSC3551", "10431533", generic_dsd_config },
{ "CSC3551", "10431573", generic_dsd_config },
{ "CSC3551", "10431663", generic_dsd_config },
+ { "CSC3551", "10431683", generic_dsd_config },
+ { "CSC3551", "104316A3", generic_dsd_config },
{ "CSC3551", "104316D3", generic_dsd_config },
{ "CSC3551", "104316F3", generic_dsd_config },
{ "CSC3551", "104317F3", generic_dsd_config },
{ "CSC3551", "10431863", generic_dsd_config },
{ "CSC3551", "104318D3", generic_dsd_config },
+ { "CSC3551", "10431A83", generic_dsd_config },
{ "CSC3551", "10431C9F", generic_dsd_config },
{ "CSC3551", "10431CAF", generic_dsd_config },
{ "CSC3551", "10431CCF", generic_dsd_config },
@@ -423,10 +433,14 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
{ "CSC3551", "10431D1F", generic_dsd_config },
{ "CSC3551", "10431DA2", generic_dsd_config },
{ "CSC3551", "10431E02", generic_dsd_config },
+ { "CSC3551", "10431E12", generic_dsd_config },
{ "CSC3551", "10431EE2", generic_dsd_config },
{ "CSC3551", "10431F12", generic_dsd_config },
{ "CSC3551", "10431F1F", generic_dsd_config },
{ "CSC3551", "10431F62", generic_dsd_config },
+ { "CSC3551", "17AA386F", generic_dsd_config },
+ { "CSC3551", "17AA38A9", generic_dsd_config },
+ { "CSC3551", "17AA38AB", generic_dsd_config },
{ "CSC3551", "17AA38B4", generic_dsd_config },
{ "CSC3551", "17AA38B5", generic_dsd_config },
{ "CSC3551", "17AA38B6", generic_dsd_config },
diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
index b61e1de8c4bf..75a14ba54fcd 100644
--- a/sound/pci/hda/cs35l56_hda.c
+++ b/sound/pci/hda/cs35l56_hda.c
@@ -30,14 +30,23 @@
* ASP1_RX_WL = 24 bits per sample
* ASP1_TX_WL = 24 bits per sample
* ASP1_RXn_EN 1..3 and ASP1_TXn_EN 1..4 disabled
+ *
+ * Override any Windows-specific mixer settings applied by the firmware.
*/
static const struct reg_sequence cs35l56_hda_dai_config[] = {
{ CS35L56_ASP1_CONTROL1, 0x00000021 },
{ CS35L56_ASP1_CONTROL2, 0x20200200 },
{ CS35L56_ASP1_CONTROL3, 0x00000003 },
+ { CS35L56_ASP1_FRAME_CONTROL1, 0x03020100 },
+ { CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 },
{ CS35L56_ASP1_DATA_CONTROL5, 0x00000018 },
{ CS35L56_ASP1_DATA_CONTROL1, 0x00000018 },
{ CS35L56_ASP1_ENABLES1, 0x00000000 },
+ { CS35L56_ASP1TX1_INPUT, 0x00000018 },
+ { CS35L56_ASP1TX2_INPUT, 0x00000019 },
+ { CS35L56_ASP1TX3_INPUT, 0x00000020 },
+ { CS35L56_ASP1TX4_INPUT, 0x00000028 },
+
};
static void cs35l56_hda_play(struct cs35l56_hda *cs35l56)
@@ -133,6 +142,10 @@ static int cs35l56_hda_runtime_resume(struct device *dev)
}
}
+ ret = cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base);
+ if (ret)
+ goto err;
+
return 0;
err:
@@ -384,7 +397,7 @@ static const struct cs_dsp_client_ops cs35l56_hda_client_ops = {
static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56,
const struct firmware **firmware, char **filename,
- const char *dir, const char *system_name,
+ const char *base_name, const char *system_name,
const char *amp_name,
const char *filetype)
{
@@ -392,17 +405,13 @@ static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56,
int ret = 0;
if (system_name && amp_name)
- *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc-%s-%s.%s", dir,
- cs35l56->base.secured ? "s" : "", cs35l56->base.rev,
+ *filename = kasprintf(GFP_KERNEL, "%s-%s-%s.%s", base_name,
system_name, amp_name, filetype);
else if (system_name)
- *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc-%s.%s", dir,
- cs35l56->base.secured ? "s" : "", cs35l56->base.rev,
+ *filename = kasprintf(GFP_KERNEL, "%s-%s.%s", base_name,
system_name, filetype);
else
- *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc.%s", dir,
- cs35l56->base.secured ? "s" : "", cs35l56->base.rev,
- filetype);
+ *filename = kasprintf(GFP_KERNEL, "%s.%s", base_name, filetype);
if (!*filename)
return -ENOMEM;
@@ -435,8 +444,8 @@ static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56,
return 0;
}
-static const char cirrus_dir[] = "cirrus/";
static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56,
+ unsigned int preloaded_fw_ver,
const struct firmware **wmfw_firmware,
char **wmfw_filename,
const struct firmware **coeff_firmware,
@@ -444,55 +453,73 @@ static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56,
{
const char *system_name = cs35l56->system_name;
const char *amp_name = cs35l56->amp_name;
+ char base_name[37];
int ret;
+ if (preloaded_fw_ver) {
+ snprintf(base_name, sizeof(base_name),
+ "cirrus/cs35l56-%02x%s-%06x-dsp1-misc",
+ cs35l56->base.rev,
+ cs35l56->base.secured ? "-s" : "",
+ preloaded_fw_ver & 0xffffff);
+ } else {
+ snprintf(base_name, sizeof(base_name),
+ "cirrus/cs35l56-%02x%s-dsp1-misc",
+ cs35l56->base.rev,
+ cs35l56->base.secured ? "-s" : "");
+ }
+
if (system_name && amp_name) {
if (!cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename,
- cirrus_dir, system_name, amp_name, "wmfw")) {
+ base_name, system_name, amp_name, "wmfw")) {
cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
- cirrus_dir, system_name, amp_name, "bin");
+ base_name, system_name, amp_name, "bin");
return;
}
}
if (system_name) {
if (!cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename,
- cirrus_dir, system_name, NULL, "wmfw")) {
+ base_name, system_name, NULL, "wmfw")) {
if (amp_name)
cs35l56_hda_request_firmware_file(cs35l56,
coeff_firmware, coeff_filename,
- cirrus_dir, system_name,
+ base_name, system_name,
amp_name, "bin");
if (!*coeff_firmware)
cs35l56_hda_request_firmware_file(cs35l56,
coeff_firmware, coeff_filename,
- cirrus_dir, system_name,
+ base_name, system_name,
NULL, "bin");
return;
}
+
+ /*
+ * Check for system-specific bin files without wmfw before
+ * falling back to generic firmware
+ */
+ if (amp_name)
+ cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
+ base_name, system_name, amp_name, "bin");
+ if (!*coeff_firmware)
+ cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
+ base_name, system_name, NULL, "bin");
+
+ if (*coeff_firmware)
+ return;
}
ret = cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename,
- cirrus_dir, NULL, NULL, "wmfw");
+ base_name, NULL, NULL, "wmfw");
if (!ret) {
cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
- cirrus_dir, NULL, NULL, "bin");
+ base_name, NULL, NULL, "bin");
return;
}
- /* When a firmware file is not found must still search for the coeff files */
- if (system_name) {
- if (amp_name)
- cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
- cirrus_dir, system_name, amp_name, "bin");
- if (!*coeff_firmware)
- cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
- cirrus_dir, system_name, NULL, "bin");
- }
-
if (!*coeff_firmware)
cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
- cirrus_dir, NULL, NULL, "bin");
+ base_name, NULL, NULL, "bin");
}
static void cs35l56_hda_release_firmware_files(const struct firmware *wmfw_firmware,
@@ -526,7 +553,8 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
const struct firmware *wmfw_firmware = NULL;
char *coeff_filename = NULL;
char *wmfw_filename = NULL;
- unsigned int firmware_missing;
+ unsigned int preloaded_fw_ver;
+ bool firmware_missing;
int ret = 0;
/* Prepare for a new DSP power-up */
@@ -537,24 +565,21 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
pm_runtime_get_sync(cs35l56->base.dev);
- ret = regmap_read(cs35l56->base.regmap, CS35L56_PROTECTION_STATUS, &firmware_missing);
- if (ret) {
- dev_err(cs35l56->base.dev, "Failed to read PROTECTION_STATUS: %d\n", ret);
+ /*
+ * The firmware can only be upgraded if it is currently running
+ * from the built-in ROM. If not, the wmfw/bin must be for the
+ * version of firmware that is running on the chip.
+ */
+ ret = cs35l56_read_prot_status(&cs35l56->base, &firmware_missing, &preloaded_fw_ver);
+ if (ret)
goto err_pm_put;
- }
- firmware_missing &= CS35L56_FIRMWARE_MISSING;
+ if (firmware_missing)
+ preloaded_fw_ver = 0;
- /*
- * Firmware can only be downloaded if the CS35L56 is secured or is
- * running from the built-in ROM. If it is secured the BIOS will have
- * downloaded firmware, and the wmfw/bin files will only contain
- * tunings that are safe to download with the firmware running.
- */
- if (cs35l56->base.secured || firmware_missing) {
- cs35l56_hda_request_firmware_files(cs35l56, &wmfw_firmware, &wmfw_filename,
- &coeff_firmware, &coeff_filename);
- }
+ cs35l56_hda_request_firmware_files(cs35l56, preloaded_fw_ver,
+ &wmfw_firmware, &wmfw_filename,
+ &coeff_firmware, &coeff_filename);
/*
* If the BIOS didn't patch the firmware a bin file is mandatory to
@@ -569,12 +594,12 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
mutex_lock(&cs35l56->base.irq_lock);
/*
- * When the device is running in secure mode the firmware files can
- * only contain insecure tunings and therefore we do not need to
- * shutdown the firmware to apply them and can use the lower cost
- * reinit sequence instead.
+ * If the firmware hasn't been patched it must be shutdown before
+ * doing a full patch and reset afterwards. If it is already
+ * running a patched version the firmware files only contain
+ * tunings and we can use the lower cost reinit sequence instead.
*/
- if (!cs35l56->base.secured && (wmfw_firmware || coeff_firmware)) {
+ if (firmware_missing && (wmfw_firmware || coeff_firmware)) {
ret = cs35l56_firmware_shutdown(&cs35l56->base);
if (ret)
goto err;
@@ -593,7 +618,7 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
if (coeff_filename)
dev_dbg(cs35l56->base.dev, "Loaded Coefficients: %s\n", coeff_filename);
- if (cs35l56->base.secured) {
+ if (!firmware_missing) {
ret = cs35l56_mbox_send(&cs35l56->base, CS35L56_MBOX_CMD_AUDIO_REINIT);
if (ret)
goto err_powered_up;
@@ -976,6 +1001,9 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int id)
regmap_multi_reg_write(cs35l56->base.regmap, cs35l56_hda_dai_config,
ARRAY_SIZE(cs35l56_hda_dai_config));
+ ret = cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base);
+ if (ret)
+ goto err;
/*
* By default only enable one ASP1TXn, where n=amplifier index,
@@ -1035,16 +1063,6 @@ const struct dev_pm_ops cs35l56_hda_pm_ops = {
};
EXPORT_SYMBOL_NS_GPL(cs35l56_hda_pm_ops, SND_HDA_SCODEC_CS35L56);
-#if IS_ENABLED(CONFIG_SND_HDA_SCODEC_CS35L56_KUNIT_TEST)
-/* Hooks to export static function to KUnit test */
-
-int cs35l56_hda_test_hook_get_speaker_id(struct device *dev, int amp_index, int num_amps)
-{
- return cs35l56_hda_get_speaker_id(dev, amp_index, num_amps);
-}
-EXPORT_SYMBOL_NS_GPL(cs35l56_hda_test_hook_get_speaker_id, SND_HDA_SCODEC_CS35L56);
-#endif
-
MODULE_DESCRIPTION("CS35L56 HDA Driver");
MODULE_IMPORT_NS(SND_HDA_CIRRUS_SCODEC);
MODULE_IMPORT_NS(SND_HDA_CS_DSP_CONTROLS);
diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 3e7bfeee84fd..efe98f6f19a3 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -1207,6 +1207,9 @@ int azx_probe_codecs(struct azx *chip, unsigned int max_slots)
dev_warn(chip->card->dev,
"Codec #%d probe error; disabling it...\n", c);
bus->codec_mask &= ~(1 << c);
+ /* no codecs */
+ if (bus->codec_mask == 0)
+ break;
/* More badly, accessing to a non-existing
* codec often screws up the controller chip,
* and disturbs the further communications.
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 2276adc84478..1b550c42db09 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1729,9 +1729,11 @@ static int default_bdl_pos_adj(struct azx *chip)
/* some exceptions: Atoms seem problematic with value 1 */
if (chip->pci->vendor == PCI_VENDOR_ID_INTEL) {
switch (chip->pci->device) {
- case 0x0f04: /* Baytrail */
- case 0x2284: /* Braswell */
+ case PCI_DEVICE_ID_INTEL_HDA_BYT:
+ case PCI_DEVICE_ID_INTEL_HDA_BSW:
return 32;
+ case PCI_DEVICE_ID_INTEL_HDA_APL:
+ return 64;
}
}
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index e8819e8a9876..e8209178d87b 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -344,6 +344,7 @@ enum {
CXT_FIXUP_HP_ZBOOK_MUTE_LED,
CXT_FIXUP_HEADSET_MIC,
CXT_FIXUP_HP_MIC_NO_PRESENCE,
+ CXT_PINCFG_SWS_JS201D,
};
/* for hda_fixup_thinkpad_acpi() */
@@ -841,6 +842,17 @@ static const struct hda_pintbl cxt_pincfg_lemote[] = {
{}
};
+/* SuoWoSi/South-holding JS201D with sn6140 */
+static const struct hda_pintbl cxt_pincfg_sws_js201d[] = {
+ { 0x16, 0x03211040 }, /* hp out */
+ { 0x17, 0x91170110 }, /* SPK/Class_D */
+ { 0x18, 0x95a70130 }, /* Internal mic */
+ { 0x19, 0x03a11020 }, /* Headset Mic */
+ { 0x1a, 0x40f001f0 }, /* Not used */
+ { 0x21, 0x40f001f0 }, /* Not used */
+ {}
+};
+
static const struct hda_fixup cxt_fixups[] = {
[CXT_PINCFG_LENOVO_X200] = {
.type = HDA_FIXUP_PINS,
@@ -996,6 +1008,10 @@ static const struct hda_fixup cxt_fixups[] = {
.chained = true,
.chain_id = CXT_FIXUP_HEADSET_MIC,
},
+ [CXT_PINCFG_SWS_JS201D] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = cxt_pincfg_sws_js201d,
+ },
};
static const struct snd_pci_quirk cxt5045_fixups[] = {
@@ -1069,6 +1085,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
+ SND_PCI_QUIRK(0x14f1, 0x0265, "SWS JS201D", CXT_PINCFG_SWS_JS201D),
SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410),
@@ -1109,6 +1126,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
{ .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" },
{ .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" },
{ .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" },
+ { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" },
{}
};
diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c
index 627899959ffe..e41316e2e983 100644
--- a/sound/pci/hda/patch_cs8409.c
+++ b/sound/pci/hda/patch_cs8409.c
@@ -1371,6 +1371,7 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac
spec->scodecs[CS8409_CODEC1] = &dolphin_cs42l42_1;
spec->scodecs[CS8409_CODEC1]->codec = codec;
spec->num_scodecs = 2;
+ spec->gen.suppress_vmaster = 1;
codec->patch_ops = cs8409_dolphin_patch_ops;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index f6f16622f9cc..a1facdb98d9a 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -439,6 +439,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000);
fallthrough;
case 0x10ec0215:
+ case 0x10ec0285:
+ case 0x10ec0289:
+ alc_update_coef_idx(codec, 0x36, 1<<13, 0);
+ fallthrough;
case 0x10ec0230:
case 0x10ec0233:
case 0x10ec0235:
@@ -452,9 +456,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
case 0x10ec0283:
case 0x10ec0286:
case 0x10ec0288:
- case 0x10ec0285:
case 0x10ec0298:
- case 0x10ec0289:
case 0x10ec0300:
alc_update_coef_idx(codec, 0x10, 1<<9, 0);
break;
@@ -3682,6 +3684,7 @@ static void alc285_hp_init(struct hda_codec *codec)
int i, val;
int coef38, coef0d, coef36;
+ alc_write_coefex_idx(codec, 0x58, 0x00, 0x1888); /* write default value */
alc_update_coef_idx(codec, 0x4a, 1<<15, 1<<15); /* Reset HP JD */
coef38 = alc_read_coef_idx(codec, 0x38); /* Amp control */
coef0d = alc_read_coef_idx(codec, 0x0d); /* Digital Misc control */
@@ -7442,6 +7445,7 @@ enum {
ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE,
ALC287_FIXUP_YOGA7_14ITL_SPEAKERS,
ALC298_FIXUP_LENOVO_C940_DUET7,
+ ALC287_FIXUP_LENOVO_14IRP8_DUETITL,
ALC287_FIXUP_13S_GEN2_SPEAKERS,
ALC256_FIXUP_SET_COEF_DEFAULTS,
ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE,
@@ -7493,6 +7497,26 @@ static void alc298_fixup_lenovo_c940_duet7(struct hda_codec *codec,
__snd_hda_apply_fixup(codec, id, action, 0);
}
+/* A special fixup for Lenovo Slim/Yoga Pro 9 14IRP8 and Yoga DuetITL 2021;
+ * 14IRP8 PCI SSID will mistakenly be matched with the DuetITL codec SSID,
+ * so we need to apply a different fixup in this case. The only DuetITL codec
+ * SSID reported so far is the 17aa:3802 while the 14IRP8 has the 17aa:38be
+ * and 17aa:38bf. If it weren't for the PCI SSID, the 14IRP8 models would
+ * have matched correctly by their codecs.
+ */
+static void alc287_fixup_lenovo_14irp8_duetitl(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ int id;
+
+ if (codec->core.subsystem_id == 0x17aa3802)
+ id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* DuetITL */
+ else
+ id = ALC287_FIXUP_TAS2781_I2C; /* 14IRP8 */
+ __snd_hda_apply_fixup(codec, id, action, 0);
+}
+
static const struct hda_fixup alc269_fixups[] = {
[ALC269_FIXUP_GPIO2] = {
.type = HDA_FIXUP_FUNC,
@@ -9377,6 +9401,10 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc298_fixup_lenovo_c940_duet7,
},
+ [ALC287_FIXUP_LENOVO_14IRP8_DUETITL] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc287_fixup_lenovo_14irp8_duetitl,
+ },
[ALC287_FIXUP_13S_GEN2_SPEAKERS] = {
.type = HDA_FIXUP_VERBS,
.v.verbs = (const struct hda_verb[]) {
@@ -9577,13 +9605,13 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = cs35l41_fixup_i2c_two,
.chained = true,
- .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+ .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
},
[ALC287_FIXUP_TAS2781_I2C] = {
.type = HDA_FIXUP_FUNC,
.v.func = tas2781_fixup_i2c,
.chained = true,
- .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+ .chain_id = ALC285_FIXUP_THINKPAD_HEADSET_JACK,
},
[ALC287_FIXUP_YOGA7_14ARB7_I2C] = {
.type = HDA_FIXUP_FUNC,
@@ -9604,6 +9632,8 @@ static const struct hda_fixup alc269_fixups[] = {
[ALC287_FIXUP_THINKPAD_I2S_SPK] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc287_fixup_bind_dacs,
+ .chained = true,
+ .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
},
[ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = {
.type = HDA_FIXUP_FUNC,
@@ -9653,6 +9683,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x126a, "Acer Swift SF114-32", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
@@ -9732,12 +9763,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
SND_PCI_QUIRK(0x1028, 0x0beb, "Dell XPS 15 9530 (2023)", ALC289_FIXUP_DELL_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1028, 0x0c0b, "Dell Oasis 14 RPL-P", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
+ SND_PCI_QUIRK(0x1028, 0x0c0d, "Dell Oasis", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
+ SND_PCI_QUIRK(0x1028, 0x0c0e, "Dell Oasis 16", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
+ SND_PCI_QUIRK(0x1028, 0x0c28, "Dell Inspiron 16 Plus 7630", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
SND_PCI_QUIRK(0x1028, 0x0c4d, "Dell", ALC287_FIXUP_CS35L41_I2C_4),
SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2),
@@ -9852,6 +9887,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
@@ -9893,6 +9929,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4),
SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
@@ -9918,16 +9955,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
+ SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b59, "HP Elite mt645 G7 Mobile Thin Client U89", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b63, "HP Elite Dragonfly 13.5 inch G4", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
@@ -9955,8 +9996,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c8a, "HP EliteBook 630", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c8c, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c90, "HP EliteBook 640", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
@@ -9994,6 +10041,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK),
SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZI/ZJ/ZQ/ZU/ZV", ALC285_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1683, "ASUS UM3402YAR", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x16d3, "ASUS UX5304VA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
@@ -10037,14 +10085,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
- SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
- SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10235,7 +10281,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
SND_PCI_QUIRK(0x17aa, 0x334b, "Lenovo ThinkCentre M70 Gen5", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
- SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
+ SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga Pro 9 14IRP8 / DuetITL 2021", ALC287_FIXUP_LENOVO_14IRP8_DUETITL),
SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7),
SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS),
@@ -10251,6 +10297,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3855, "Legion 7 16ITHG6", ALC287_FIXUP_LEGION_16ITHG6),
SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
+ SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C),
SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C),
@@ -10260,6 +10307,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C),
+ SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10322,6 +10371,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO),
+ SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
@@ -10951,6 +11001,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
* at most one tbl is allowed to define for the same vendor and same codec
*/
static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = {
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1025, "Acer", ALC2XX_FIXUP_HEADSET_MIC,
+ {0x19, 0x40000000}),
SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
{0x19, 0x40000000},
{0x1b, 0x40000000}),
@@ -11640,8 +11692,7 @@ static void alc897_hp_automute_hook(struct hda_codec *codec,
snd_hda_gen_hp_automute(codec, jack);
vref = spec->gen.hp_jack_present ? (PIN_HP | AC_PINCTL_VREF_100) : PIN_HP;
- snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
- vref);
+ snd_hda_set_pin_ctl(codec, 0x1b, vref);
}
static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec,
@@ -11650,6 +11701,10 @@ static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec,
struct alc_spec *spec = codec->spec;
if (action == HDA_FIXUP_ACT_PRE_PROBE) {
spec->gen.hp_automute_hook = alc897_hp_automute_hook;
+ spec->no_shutup_pins = 1;
+ }
+ if (action == HDA_FIXUP_ACT_PROBE) {
+ snd_hda_set_pin_ctl_cache(codec, 0x1a, PIN_IN | AC_PINCTL_VREF_100);
}
}
diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c
index 2dd809de62e5..1bfb00102a77 100644
--- a/sound/pci/hda/tas2781_hda_i2c.c
+++ b/sound/pci/hda/tas2781_hda_i2c.c
@@ -710,7 +710,7 @@ static int tas2781_hda_bind(struct device *dev, struct device *master,
strscpy(comps->name, dev_name(dev), sizeof(comps->name));
- ret = tascodec_init(tas_hda->priv, codec, tasdev_fw_ready);
+ ret = tascodec_init(tas_hda->priv, codec, THIS_MODULE, tasdev_fw_ready);
if (!ret)
comps->playback_hook = tas2781_hda_playback_hook;
diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c
index c90ec3419247..504d1b8c4cbb 100644
--- a/sound/soc/amd/acp/acp-mach-common.c
+++ b/sound/soc/amd/acp/acp-mach-common.c
@@ -505,6 +505,13 @@ static int acp_card_rt5682s_hw_params(struct snd_pcm_substream *substream,
clk_set_rate(drvdata->wclk, srate);
clk_set_rate(drvdata->bclk, srate * ch * format);
+ if (!drvdata->soc_mclk) {
+ ret = acp_clk_enable(drvdata, srate, ch * format);
+ if (ret < 0) {
+ dev_err(rtd->card->dev, "Failed to enable HS clk: %d\n", ret);
+ return ret;
+ }
+ }
return 0;
}
@@ -1464,8 +1471,13 @@ int acp_sofdsp_dai_links_create(struct snd_soc_card *card)
if (drv_data->amp_cpu_id == I2S_SP) {
links[i].name = "acp-amp-codec";
links[i].id = AMP_BE_ID;
- links[i].cpus = sof_sp_virtual;
- links[i].num_cpus = ARRAY_SIZE(sof_sp_virtual);
+ if (drv_data->platform == RENOIR) {
+ links[i].cpus = sof_sp;
+ links[i].num_cpus = ARRAY_SIZE(sof_sp);
+ } else {
+ links[i].cpus = sof_sp_virtual;
+ links[i].num_cpus = ARRAY_SIZE(sof_sp_virtual);
+ }
links[i].platforms = sof_component;
links[i].num_platforms = ARRAY_SIZE(sof_component);
links[i].dpcm_playback = 1;
diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c
index 2a9fd3275e42..20b94814a046 100644
--- a/sound/soc/amd/acp/acp-sof-mach.c
+++ b/sound/soc/amd/acp/acp-sof-mach.c
@@ -48,6 +48,7 @@ static struct acp_card_drvdata sof_rt5682s_rt1019_data = {
.hs_codec_id = RT5682S,
.amp_codec_id = RT1019,
.dmic_codec_id = DMIC,
+ .platform = RENOIR,
.tdm_mode = false,
};
@@ -58,6 +59,7 @@ static struct acp_card_drvdata sof_rt5682s_max_data = {
.hs_codec_id = RT5682S,
.amp_codec_id = MAX98360A,
.dmic_codec_id = DMIC,
+ .platform = RENOIR,
.tdm_mode = false,
};
@@ -68,6 +70,7 @@ static struct acp_card_drvdata sof_nau8825_data = {
.hs_codec_id = NAU8825,
.amp_codec_id = MAX98360A,
.dmic_codec_id = DMIC,
+ .platform = REMBRANDT,
.soc_mclk = true,
.tdm_mode = false,
};
@@ -79,6 +82,7 @@ static struct acp_card_drvdata sof_rt5682s_hs_rt1019_data = {
.hs_codec_id = RT5682S,
.amp_codec_id = RT1019,
.dmic_codec_id = DMIC,
+ .platform = REMBRANDT,
.soc_mclk = true,
.tdm_mode = false,
};
diff --git a/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c b/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c
index f85b85ea4be9..2b0aa270a3e9 100644
--- a/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c
+++ b/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c
@@ -358,6 +358,14 @@ static const struct dmi_system_id acp3x_es83xx_dmi_table[] = {
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HVY-WXX9"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "M1010"),
+ },
+ .driver_data = (void *)(ES83XX_ENABLE_DMIC),
+ },
+ {
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HVY-WXX9"),
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "M1020"),
},
.driver_data = (void *)(ES83XX_ENABLE_DMIC),
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index d83cb6e4c62a..90360f8b3e81 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -203,6 +203,20 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "21J2"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "21J0"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_NAME, "21J5"),
}
},
@@ -238,6 +252,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "82UU"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_NAME, "82V2"),
}
},
@@ -251,6 +272,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "83AS"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"),
}
@@ -300,6 +328,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VF"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"),
DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"),
}
@@ -384,6 +419,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+ DMI_MATCH(DMI_BOARD_NAME, "8BD6"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "MECHREVO"),
DMI_MATCH(DMI_BOARD_NAME, "MRID6"),
}
diff --git a/sound/soc/amd/yc/pci-acp6x.c b/sound/soc/amd/yc/pci-acp6x.c
index 7af6a349b1d4..694b8e313902 100644
--- a/sound/soc/amd/yc/pci-acp6x.c
+++ b/sound/soc/amd/yc/pci-acp6x.c
@@ -162,6 +162,7 @@ static int snd_acp6x_probe(struct pci_dev *pci,
/* Yellow Carp device check */
switch (pci->revision) {
case 0x60:
+ case 0x63:
case 0x6f:
break;
default:
diff --git a/sound/soc/codecs/cs35l45.c b/sound/soc/codecs/cs35l45.c
index 44c221745c3b..2392c6effed8 100644
--- a/sound/soc/codecs/cs35l45.c
+++ b/sound/soc/codecs/cs35l45.c
@@ -184,7 +184,7 @@ static int cs35l45_activate_ctl(struct snd_soc_component *component,
else
snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s", ctl_name);
- kcontrol = snd_soc_card_get_kcontrol(component->card, name);
+ kcontrol = snd_soc_card_get_kcontrol_locked(component->card, name);
if (!kcontrol) {
dev_err(component->dev, "Can't find kcontrol %s\n", name);
return -EINVAL;
diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c
index 953ba066bab1..cb4e83126b08 100644
--- a/sound/soc/codecs/cs35l56-shared.c
+++ b/sound/soc/codecs/cs35l56-shared.c
@@ -5,6 +5,7 @@
// Copyright (C) 2023 Cirrus Logic, Inc. and
// Cirrus Logic International Semiconductor Ltd.
+#include <linux/gpio/consumer.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/types.h>
@@ -12,6 +13,15 @@
#include "cs35l56.h"
static const struct reg_sequence cs35l56_patch[] = {
+ /*
+ * Firmware can change these to non-defaults to satisfy SDCA.
+ * Ensure that they are at known defaults.
+ */
+ { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 },
+ { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 },
+ { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 },
+ { CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 },
+
/* These are not reset by a soft-reset, so patch to defaults. */
{ CS35L56_MAIN_RENDER_USER_MUTE, 0x00000000 },
{ CS35L56_MAIN_RENDER_USER_VOLUME, 0x00000000 },
@@ -34,15 +44,13 @@ static const struct reg_default cs35l56_reg_defaults[] = {
{ CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 },
{ CS35L56_ASP1_DATA_CONTROL1, 0x00000018 },
{ CS35L56_ASP1_DATA_CONTROL5, 0x00000018 },
- { CS35L56_ASP1TX1_INPUT, 0x00000018 },
- { CS35L56_ASP1TX2_INPUT, 0x00000019 },
- { CS35L56_ASP1TX3_INPUT, 0x00000020 },
- { CS35L56_ASP1TX4_INPUT, 0x00000028 },
+
+ /* no defaults for ASP1TX mixer */
+
{ CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 },
{ CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 },
{ CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 },
{ CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 },
- { CS35L56_IRQ1_CFG, 0x00000000 },
{ CS35L56_IRQ1_MASK_1, 0x83ffffff },
{ CS35L56_IRQ1_MASK_2, 0xffff7fff },
{ CS35L56_IRQ1_MASK_4, 0xe0ffffff },
@@ -195,6 +203,47 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg)
}
}
+/*
+ * The firmware boot sequence can overwrite the ASP1 config registers so that
+ * they don't match regmap's view of their values. Rewrite the values from the
+ * regmap cache into the hardware registers.
+ */
+int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base)
+{
+ struct reg_sequence asp1_regs[] = {
+ { .reg = CS35L56_ASP1_ENABLES1 },
+ { .reg = CS35L56_ASP1_CONTROL1 },
+ { .reg = CS35L56_ASP1_CONTROL2 },
+ { .reg = CS35L56_ASP1_CONTROL3 },
+ { .reg = CS35L56_ASP1_FRAME_CONTROL1 },
+ { .reg = CS35L56_ASP1_FRAME_CONTROL5 },
+ { .reg = CS35L56_ASP1_DATA_CONTROL1 },
+ { .reg = CS35L56_ASP1_DATA_CONTROL5 },
+ };
+ int i, ret;
+
+ /* Read values from regmap cache into a write sequence */
+ for (i = 0; i < ARRAY_SIZE(asp1_regs); ++i) {
+ ret = regmap_read(cs35l56_base->regmap, asp1_regs[i].reg, &asp1_regs[i].def);
+ if (ret)
+ goto err;
+ }
+
+ /* Write the values cache-bypassed so that they will be written to silicon */
+ ret = regmap_multi_reg_write_bypassed(cs35l56_base->regmap, asp1_regs,
+ ARRAY_SIZE(asp1_regs));
+ if (ret)
+ goto err;
+
+ return 0;
+
+err:
+ dev_err(cs35l56_base->dev, "Failed to sync ASP1 registers: %d\n", ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(cs35l56_force_sync_asp1_registers_from_cache, SND_SOC_CS35L56_SHARED);
+
int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command)
{
unsigned int val;
@@ -286,6 +335,7 @@ void cs35l56_wait_min_reset_pulse(void)
EXPORT_SYMBOL_NS_GPL(cs35l56_wait_min_reset_pulse, SND_SOC_CS35L56_SHARED);
static const struct reg_sequence cs35l56_system_reset_seq[] = {
+ REG_SEQ0(CS35L56_DSP1_HALO_STATE, 0),
REG_SEQ0(CS35L56_DSP_VIRTUAL1_MBOX_1, CS35L56_MBOX_CMD_SYSTEM_RESET),
};
@@ -400,17 +450,6 @@ int cs35l56_is_fw_reload_needed(struct cs35l56_base *cs35l56_base)
unsigned int val;
int ret;
- /* Nothing to re-patch if we haven't done any patching yet. */
- if (!cs35l56_base->fw_patched)
- return false;
-
- /*
- * If we have control of RESET we will have asserted it so the firmware
- * will need re-patching.
- */
- if (cs35l56_base->reset_gpio)
- return true;
-
/*
* In secure mode FIRMWARE_MISSING is cleared by the BIOS loader so
* can't be used here to test for memory retention.
@@ -590,10 +629,35 @@ void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_ds
}
EXPORT_SYMBOL_NS_GPL(cs35l56_init_cs_dsp, SND_SOC_CS35L56_SHARED);
+int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base,
+ bool *fw_missing, unsigned int *fw_version)
+{
+ unsigned int prot_status;
+ int ret;
+
+ ret = regmap_read(cs35l56_base->regmap, CS35L56_PROTECTION_STATUS, &prot_status);
+ if (ret) {
+ dev_err(cs35l56_base->dev, "Get PROTECTION_STATUS failed: %d\n", ret);
+ return ret;
+ }
+
+ *fw_missing = !!(prot_status & CS35L56_FIRMWARE_MISSING);
+
+ ret = regmap_read(cs35l56_base->regmap, CS35L56_DSP1_FW_VER, fw_version);
+ if (ret) {
+ dev_err(cs35l56_base->dev, "Get FW VER failed: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cs35l56_read_prot_status, SND_SOC_CS35L56_SHARED);
+
int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
{
int ret;
- unsigned int devid, revid, otpid, secured;
+ unsigned int devid, revid, otpid, secured, fw_ver;
+ bool fw_missing;
/*
* When the system is not using a reset_gpio ensure the device is
@@ -652,8 +716,13 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
return ret;
}
- dev_info(cs35l56_base->dev, "Cirrus Logic CS35L56%s Rev %02X OTP%d\n",
- cs35l56_base->secured ? "s" : "", cs35l56_base->rev, otpid);
+ ret = cs35l56_read_prot_status(cs35l56_base, &fw_missing, &fw_ver);
+ if (ret)
+ return ret;
+
+ dev_info(cs35l56_base->dev, "Cirrus Logic CS35L56%s Rev %02X OTP%d fw:%d.%d.%d (patched=%u)\n",
+ cs35l56_base->secured ? "s" : "", cs35l56_base->rev, otpid,
+ fw_ver >> 16, (fw_ver >> 8) & 0xff, fw_ver & 0xff, !fw_missing);
/* Wake source and *_BLOCKED interrupts default to unmasked, so mask them */
regmap_write(cs35l56_base->regmap, CS35L56_IRQ1_MASK_20, 0xffffffff);
@@ -668,6 +737,41 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
}
EXPORT_SYMBOL_NS_GPL(cs35l56_hw_init, SND_SOC_CS35L56_SHARED);
+int cs35l56_get_speaker_id(struct cs35l56_base *cs35l56_base)
+{
+ struct gpio_descs *descs;
+ int speaker_id;
+ int i, ret;
+
+ /* Read the speaker type qualifier from the motherboard GPIOs */
+ descs = gpiod_get_array_optional(cs35l56_base->dev, "spk-id", GPIOD_IN);
+ if (!descs) {
+ return -ENOENT;
+ } else if (IS_ERR(descs)) {
+ ret = PTR_ERR(descs);
+ return dev_err_probe(cs35l56_base->dev, ret, "Failed to get spk-id-gpios\n");
+ }
+
+ speaker_id = 0;
+ for (i = 0; i < descs->ndescs; i++) {
+ ret = gpiod_get_value_cansleep(descs->desc[i]);
+ if (ret < 0) {
+ dev_err_probe(cs35l56_base->dev, ret, "Failed to read spk-id[%d]\n", i);
+ goto err;
+ }
+
+ speaker_id |= (ret << i);
+ }
+
+ dev_dbg(cs35l56_base->dev, "Speaker ID = %d\n", speaker_id);
+ ret = speaker_id;
+err:
+ gpiod_put_array(descs);
+
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(cs35l56_get_speaker_id, SND_SOC_CS35L56_SHARED);
+
static const u32 cs35l56_bclk_valid_for_pll_freq_table[] = {
[0x0C] = 128000,
[0x0F] = 256000,
diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c
index 45b4de3eff94..6dd0319bc843 100644
--- a/sound/soc/codecs/cs35l56.c
+++ b/sound/soc/codecs/cs35l56.c
@@ -5,6 +5,7 @@
// Copyright (C) 2023 Cirrus Logic, Inc. and
// Cirrus Logic International Semiconductor Ltd.
+#include <linux/acpi.h>
#include <linux/completion.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
@@ -15,6 +16,7 @@
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
+#include <linux/property.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/slab.h>
@@ -59,6 +61,131 @@ static int cs35l56_dspwait_put_volsw(struct snd_kcontrol *kcontrol,
return snd_soc_put_volsw(kcontrol, ucontrol);
}
+static const unsigned short cs35l56_asp1_mixer_regs[] = {
+ CS35L56_ASP1TX1_INPUT, CS35L56_ASP1TX2_INPUT,
+ CS35L56_ASP1TX3_INPUT, CS35L56_ASP1TX4_INPUT,
+};
+
+static const char * const cs35l56_asp1_mux_control_names[] = {
+ "ASP1 TX1 Source", "ASP1 TX2 Source", "ASP1 TX3 Source", "ASP1 TX4 Source"
+};
+
+static int cs35l56_sync_asp1_mixer_widgets_with_firmware(struct cs35l56_private *cs35l56)
+{
+ struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(cs35l56->component);
+ const char *prefix = cs35l56->component->name_prefix;
+ char full_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
+ const char *name;
+ struct snd_kcontrol *kcontrol;
+ struct soc_enum *e;
+ unsigned int val[4];
+ int i, item, ret;
+
+ if (cs35l56->asp1_mixer_widgets_initialized)
+ return 0;
+
+ /*
+ * Resume so we can read the registers from silicon if the regmap
+ * cache has not yet been populated.
+ */
+ ret = pm_runtime_resume_and_get(cs35l56->base.dev);
+ if (ret < 0)
+ return ret;
+
+ /* Wait for firmware download and reboot */
+ cs35l56_wait_dsp_ready(cs35l56);
+
+ ret = regmap_bulk_read(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT,
+ val, ARRAY_SIZE(val));
+
+ pm_runtime_mark_last_busy(cs35l56->base.dev);
+ pm_runtime_put_autosuspend(cs35l56->base.dev);
+
+ if (ret) {
+ dev_err(cs35l56->base.dev, "Failed to read ASP1 mixer regs: %d\n", ret);
+ return ret;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cs35l56_asp1_mux_control_names); ++i) {
+ name = cs35l56_asp1_mux_control_names[i];
+
+ if (prefix) {
+ snprintf(full_name, sizeof(full_name), "%s %s", prefix, name);
+ name = full_name;
+ }
+
+ kcontrol = snd_soc_card_get_kcontrol_locked(dapm->card, name);
+ if (!kcontrol) {
+ dev_warn(cs35l56->base.dev, "Could not find control %s\n", name);
+ continue;
+ }
+
+ e = (struct soc_enum *)kcontrol->private_value;
+ item = snd_soc_enum_val_to_item(e, val[i] & CS35L56_ASP_TXn_SRC_MASK);
+ snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL);
+ }
+
+ cs35l56->asp1_mixer_widgets_initialized = true;
+
+ return 0;
+}
+
+static int cs35l56_dspwait_asp1tx_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol);
+ struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component);
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ int index = e->shift_l;
+ unsigned int addr, val;
+ int ret;
+
+ ret = cs35l56_sync_asp1_mixer_widgets_with_firmware(cs35l56);
+ if (ret)
+ return ret;
+
+ addr = cs35l56_asp1_mixer_regs[index];
+ ret = regmap_read(cs35l56->base.regmap, addr, &val);
+ if (ret)
+ return ret;
+
+ val &= CS35L56_ASP_TXn_SRC_MASK;
+ ucontrol->value.enumerated.item[0] = snd_soc_enum_val_to_item(e, val);
+
+ return 0;
+}
+
+static int cs35l56_dspwait_asp1tx_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol);
+ struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol);
+ struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component);
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ int item = ucontrol->value.enumerated.item[0];
+ int index = e->shift_l;
+ unsigned int addr, val;
+ bool changed;
+ int ret;
+
+ ret = cs35l56_sync_asp1_mixer_widgets_with_firmware(cs35l56);
+ if (ret)
+ return ret;
+
+ addr = cs35l56_asp1_mixer_regs[index];
+ val = snd_soc_enum_item_to_val(e, item);
+
+ ret = regmap_update_bits_check(cs35l56->base.regmap, addr,
+ CS35L56_ASP_TXn_SRC_MASK, val, &changed);
+ if (ret)
+ return ret;
+
+ if (changed)
+ snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL);
+
+ return changed;
+}
+
static DECLARE_TLV_DB_SCALE(vol_tlv, -10000, 25, 0);
static const struct snd_kcontrol_new cs35l56_controls[] = {
@@ -77,40 +204,44 @@ static const struct snd_kcontrol_new cs35l56_controls[] = {
};
static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx1_enum,
- CS35L56_ASP1TX1_INPUT,
- 0, CS35L56_ASP_TXn_SRC_MASK,
+ SND_SOC_NOPM,
+ 0, 0,
cs35l56_tx_input_texts,
cs35l56_tx_input_values);
static const struct snd_kcontrol_new asp1_tx1_mux =
- SOC_DAPM_ENUM("ASP1TX1 SRC", cs35l56_asp1tx1_enum);
+ SOC_DAPM_ENUM_EXT("ASP1TX1 SRC", cs35l56_asp1tx1_enum,
+ cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put);
static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx2_enum,
- CS35L56_ASP1TX2_INPUT,
- 0, CS35L56_ASP_TXn_SRC_MASK,
+ SND_SOC_NOPM,
+ 1, 0,
cs35l56_tx_input_texts,
cs35l56_tx_input_values);
static const struct snd_kcontrol_new asp1_tx2_mux =
- SOC_DAPM_ENUM("ASP1TX2 SRC", cs35l56_asp1tx2_enum);
+ SOC_DAPM_ENUM_EXT("ASP1TX2 SRC", cs35l56_asp1tx2_enum,
+ cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put);
static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx3_enum,
- CS35L56_ASP1TX3_INPUT,
- 0, CS35L56_ASP_TXn_SRC_MASK,
+ SND_SOC_NOPM,
+ 2, 0,
cs35l56_tx_input_texts,
cs35l56_tx_input_values);
static const struct snd_kcontrol_new asp1_tx3_mux =
- SOC_DAPM_ENUM("ASP1TX3 SRC", cs35l56_asp1tx3_enum);
+ SOC_DAPM_ENUM_EXT("ASP1TX3 SRC", cs35l56_asp1tx3_enum,
+ cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put);
static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx4_enum,
- CS35L56_ASP1TX4_INPUT,
- 0, CS35L56_ASP_TXn_SRC_MASK,
+ SND_SOC_NOPM,
+ 3, 0,
cs35l56_tx_input_texts,
cs35l56_tx_input_values);
static const struct snd_kcontrol_new asp1_tx4_mux =
- SOC_DAPM_ENUM("ASP1TX4 SRC", cs35l56_asp1tx4_enum);
+ SOC_DAPM_ENUM_EXT("ASP1TX4 SRC", cs35l56_asp1tx4_enum,
+ cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put);
static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_sdw1tx1_enum,
CS35L56_SWIRE_DP3_CH1_INPUT,
@@ -148,6 +279,21 @@ static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_sdw1tx4_enum,
static const struct snd_kcontrol_new sdw1_tx4_mux =
SOC_DAPM_ENUM("SDW1TX4 SRC", cs35l56_sdw1tx4_enum);
+static int cs35l56_asp1_cfg_event(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+ struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component);
+
+ switch (event) {
+ case SND_SOC_DAPM_PRE_PMU:
+ /* Override register values set by firmware boot */
+ return cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base);
+ default:
+ return 0;
+ }
+}
+
static int cs35l56_play_event(struct snd_soc_dapm_widget *w,
struct snd_kcontrol *kcontrol, int event)
{
@@ -184,6 +330,9 @@ static const struct snd_soc_dapm_widget cs35l56_dapm_widgets[] = {
SND_SOC_DAPM_REGULATOR_SUPPLY("VDD_B", 0, 0),
SND_SOC_DAPM_REGULATOR_SUPPLY("VDD_AMP", 0, 0),
+ SND_SOC_DAPM_SUPPLY("ASP1 CFG", SND_SOC_NOPM, 0, 0, cs35l56_asp1_cfg_event,
+ SND_SOC_DAPM_PRE_PMU),
+
SND_SOC_DAPM_SUPPLY("PLAY", SND_SOC_NOPM, 0, 0, cs35l56_play_event,
SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD),
@@ -251,6 +400,9 @@ static const struct snd_soc_dapm_route cs35l56_audio_map[] = {
{ "AMP", NULL, "VDD_B" },
{ "AMP", NULL, "VDD_AMP" },
+ { "ASP1 Playback", NULL, "ASP1 CFG" },
+ { "ASP1 Capture", NULL, "ASP1 CFG" },
+
{ "ASP1 Playback", NULL, "PLAY" },
{ "SDW1 Playback", NULL, "PLAY" },
@@ -650,7 +802,7 @@ static struct snd_soc_dai_driver cs35l56_dai[] = {
}
};
-static void cs35l56_secure_patch(struct cs35l56_private *cs35l56)
+static void cs35l56_reinit_patch(struct cs35l56_private *cs35l56)
{
int ret;
@@ -662,19 +814,10 @@ static void cs35l56_secure_patch(struct cs35l56_private *cs35l56)
cs35l56_mbox_send(&cs35l56->base, CS35L56_MBOX_CMD_AUDIO_REINIT);
}
-static void cs35l56_patch(struct cs35l56_private *cs35l56)
+static void cs35l56_patch(struct cs35l56_private *cs35l56, bool firmware_missing)
{
- unsigned int firmware_missing;
int ret;
- ret = regmap_read(cs35l56->base.regmap, CS35L56_PROTECTION_STATUS, &firmware_missing);
- if (ret) {
- dev_err(cs35l56->base.dev, "Failed to read PROTECTION_STATUS: %d\n", ret);
- return;
- }
-
- firmware_missing &= CS35L56_FIRMWARE_MISSING;
-
/*
* Disable SoundWire interrupts to prevent race with IRQ work.
* Setting sdw_irq_no_unmask prevents the handler re-enabling
@@ -747,23 +890,51 @@ static void cs35l56_dsp_work(struct work_struct *work)
struct cs35l56_private *cs35l56 = container_of(work,
struct cs35l56_private,
dsp_work);
+ unsigned int firmware_version;
+ bool firmware_missing;
+ int ret;
if (!cs35l56->base.init_done)
return;
pm_runtime_get_sync(cs35l56->base.dev);
+ ret = cs35l56_read_prot_status(&cs35l56->base, &firmware_missing, &firmware_version);
+ if (ret)
+ goto err;
+
+ /* Populate fw file qualifier with the revision and security state */
+ kfree(cs35l56->dsp.fwf_name);
+ if (firmware_missing) {
+ cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL, "%02x-dsp1", cs35l56->base.rev);
+ } else {
+ /* Firmware files must match the running firmware version */
+ cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL,
+ "%02x%s-%06x-dsp1",
+ cs35l56->base.rev,
+ cs35l56->base.secured ? "-s" : "",
+ firmware_version);
+ }
+
+ if (!cs35l56->dsp.fwf_name)
+ goto err;
+
+ dev_dbg(cs35l56->base.dev, "DSP fwf name: '%s' system name: '%s'\n",
+ cs35l56->dsp.fwf_name, cs35l56->dsp.system_name);
+
/*
- * When the device is running in secure mode the firmware files can
- * only contain insecure tunings and therefore we do not need to
- * shutdown the firmware to apply them and can use the lower cost
- * reinit sequence instead.
+ * The firmware cannot be patched if it is already running from
+ * patch RAM. In this case the firmware files are versioned to
+ * match the running firmware version and will only contain
+ * tunings. We do not need to shutdown the firmware to apply
+ * tunings so can use the lower cost reinit sequence instead.
*/
- if (cs35l56->base.secured)
- cs35l56_secure_patch(cs35l56);
+ if (!firmware_missing)
+ cs35l56_reinit_patch(cs35l56);
else
- cs35l56_patch(cs35l56);
+ cs35l56_patch(cs35l56, firmware_missing);
+err:
pm_runtime_mark_last_busy(cs35l56->base.dev);
pm_runtime_put_autosuspend(cs35l56->base.dev);
}
@@ -778,10 +949,19 @@ static int cs35l56_component_probe(struct snd_soc_component *component)
if (!cs35l56->dsp.system_name &&
(snd_soc_card_get_pci_ssid(component->card, &vendor, &device) == 0)) {
- cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev,
- GFP_KERNEL,
- "%04x%04x",
- vendor, device);
+ /* Append a speaker qualifier if there is a speaker ID */
+ if (cs35l56->speaker_id >= 0) {
+ cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev,
+ GFP_KERNEL,
+ "%04x%04x-spkid%d",
+ vendor, device,
+ cs35l56->speaker_id);
+ } else {
+ cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev,
+ GFP_KERNEL,
+ "%04x%04x",
+ vendor, device);
+ }
if (!cs35l56->dsp.system_name)
return -ENOMEM;
}
@@ -799,6 +979,13 @@ static int cs35l56_component_probe(struct snd_soc_component *component)
debugfs_create_bool("can_hibernate", 0444, debugfs_root, &cs35l56->base.can_hibernate);
debugfs_create_bool("fw_patched", 0444, debugfs_root, &cs35l56->base.fw_patched);
+ /*
+ * The widgets for the ASP1TX mixer can't be initialized
+ * until the firmware has been downloaded and rebooted.
+ */
+ regcache_drop_region(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT, CS35L56_ASP1TX4_INPUT);
+ cs35l56->asp1_mixer_widgets_initialized = false;
+
queue_work(cs35l56->dsp_wq, &cs35l56->dsp_work);
return 0;
@@ -809,6 +996,16 @@ static void cs35l56_component_remove(struct snd_soc_component *component)
struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component);
cancel_work_sync(&cs35l56->dsp_work);
+
+ if (cs35l56->dsp.cs_dsp.booted)
+ wm_adsp_power_down(&cs35l56->dsp);
+
+ wm_adsp2_component_remove(&cs35l56->dsp, component);
+
+ kfree(cs35l56->dsp.fwf_name);
+ cs35l56->dsp.fwf_name = NULL;
+
+ cs35l56->component = NULL;
}
static int cs35l56_set_bias_level(struct snd_soc_component *component,
@@ -1050,7 +1247,13 @@ static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56)
if (ret < 0)
return 0;
- cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL);
+ /* Append a speaker qualifier if there is a speaker ID */
+ if (cs35l56->speaker_id >= 0)
+ cs35l56->dsp.system_name = devm_kasprintf(dev, GFP_KERNEL, "%s-spkid%d",
+ prop, cs35l56->speaker_id);
+ else
+ cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL);
+
if (cs35l56->dsp.system_name == NULL)
return -ENOMEM;
@@ -1059,12 +1262,101 @@ static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56)
return 0;
}
+/*
+ * Some SoundWire laptops have a spk-id-gpios property but it points to
+ * the wrong ACPI Device node so can't be used to get the GPIO. Try to
+ * find the SDCA node containing the GpioIo resource and add a GPIO
+ * mapping to it.
+ */
+static const struct acpi_gpio_params cs35l56_af01_first_gpio = { 0, 0, false };
+static const struct acpi_gpio_mapping cs35l56_af01_spkid_gpios_mapping[] = {
+ { "spk-id-gpios", &cs35l56_af01_first_gpio, 1 },
+ { }
+};
+
+static void cs35l56_acpi_dev_release_driver_gpios(void *adev)
+{
+ acpi_dev_remove_driver_gpios(adev);
+}
+
+static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l56)
+{
+ struct fwnode_handle *af01_fwnode;
+ const union acpi_object *obj;
+ struct gpio_desc *desc;
+ int ret;
+
+ /* Find the SDCA node containing the GpioIo */
+ af01_fwnode = device_get_named_child_node(cs35l56->base.dev, "AF01");
+ if (!af01_fwnode) {
+ dev_dbg(cs35l56->base.dev, "No AF01 node\n");
+ return -ENOENT;
+ }
+
+ ret = acpi_dev_get_property(ACPI_COMPANION(cs35l56->base.dev),
+ "spk-id-gpios", ACPI_TYPE_PACKAGE, &obj);
+ if (ret) {
+ dev_dbg(cs35l56->base.dev, "Could not get spk-id-gpios package: %d\n", ret);
+ return -ENOENT;
+ }
+
+ /* The broken properties we can handle are a 4-element package (one GPIO) */
+ if (obj->package.count != 4) {
+ dev_warn(cs35l56->base.dev, "Unexpected spk-id element count %d\n",
+ obj->package.count);
+ return -ENOENT;
+ }
+
+ /* Add a GPIO mapping if it doesn't already have one */
+ if (!fwnode_property_present(af01_fwnode, "spk-id-gpios")) {
+ struct acpi_device *adev = to_acpi_device_node(af01_fwnode);
+
+ /*
+ * Can't use devm_acpi_dev_add_driver_gpios() because the
+ * mapping isn't being added to the node pointed to by
+ * ACPI_COMPANION().
+ */
+ ret = acpi_dev_add_driver_gpios(adev, cs35l56_af01_spkid_gpios_mapping);
+ if (ret) {
+ return dev_err_probe(cs35l56->base.dev, ret,
+ "Failed to add gpio mapping to AF01\n");
+ }
+
+ ret = devm_add_action_or_reset(cs35l56->base.dev,
+ cs35l56_acpi_dev_release_driver_gpios,
+ adev);
+ if (ret)
+ return ret;
+
+ dev_dbg(cs35l56->base.dev, "Added spk-id-gpios mapping to AF01\n");
+ }
+
+ desc = fwnode_gpiod_get_index(af01_fwnode, "spk-id", 0, GPIOD_IN, NULL);
+ if (IS_ERR(desc)) {
+ ret = PTR_ERR(desc);
+ return dev_err_probe(cs35l56->base.dev, ret, "Get GPIO from AF01 failed\n");
+ }
+
+ ret = gpiod_get_value_cansleep(desc);
+ gpiod_put(desc);
+
+ if (ret < 0) {
+ dev_err_probe(cs35l56->base.dev, ret, "Error reading spk-id GPIO\n");
+ return ret;
+ }
+
+ dev_info(cs35l56->base.dev, "Got spk-id from AF01\n");
+
+ return ret;
+}
+
int cs35l56_common_probe(struct cs35l56_private *cs35l56)
{
int ret;
init_completion(&cs35l56->init_completion);
mutex_init(&cs35l56->base.irq_lock);
+ cs35l56->speaker_id = -ENOENT;
dev_set_drvdata(cs35l56->base.dev, cs35l56);
@@ -1101,6 +1393,15 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56)
gpiod_set_value_cansleep(cs35l56->base.reset_gpio, 1);
}
+ ret = cs35l56_get_speaker_id(&cs35l56->base);
+ if (ACPI_COMPANION(cs35l56->base.dev) && cs35l56->sdw_peripheral && (ret == -ENOENT))
+ ret = cs35l56_try_get_broken_sdca_spkid_gpio(cs35l56);
+
+ if ((ret < 0) && (ret != -ENOENT))
+ goto err;
+
+ cs35l56->speaker_id = ret;
+
ret = cs35l56_get_firmware_uid(cs35l56);
if (ret != 0)
goto err;
@@ -1152,11 +1453,9 @@ int cs35l56_init(struct cs35l56_private *cs35l56)
if (ret < 0)
return ret;
- /* Populate the DSP information with the revision and security state */
- cs35l56->dsp.part = devm_kasprintf(cs35l56->base.dev, GFP_KERNEL, "cs35l56%s-%02x",
- cs35l56->base.secured ? "s" : "", cs35l56->base.rev);
- if (!cs35l56->dsp.part)
- return -ENOMEM;
+ ret = cs35l56_set_patch(&cs35l56->base);
+ if (ret)
+ return ret;
if (!cs35l56->base.reset_gpio) {
dev_dbg(cs35l56->base.dev, "No reset gpio: using soft reset\n");
@@ -1190,10 +1489,6 @@ post_soft_reset:
if (ret)
return ret;
- ret = cs35l56_set_patch(&cs35l56->base);
- if (ret)
- return ret;
-
/* Registers could be dirty after soft reset or SoundWire enumeration */
regcache_sync(cs35l56->base.regmap);
diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h
index 8159c3e217d9..b000e7365e40 100644
--- a/sound/soc/codecs/cs35l56.h
+++ b/sound/soc/codecs/cs35l56.h
@@ -44,12 +44,14 @@ struct cs35l56_private {
bool sdw_attached;
struct completion init_completion;
+ int speaker_id;
u32 rx_mask;
u32 tx_mask;
u8 asp_slot_width;
u8 asp_slot_count;
bool tdm_mode;
bool sysclk_set;
+ bool asp1_mixer_widgets_initialized;
u8 old_sdw_clock_scale;
};
diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c
index 6a64681767de..a97ccb512deb 100644
--- a/sound/soc/codecs/cs42l43.c
+++ b/sound/soc/codecs/cs42l43.c
@@ -2257,7 +2257,10 @@ static int cs42l43_codec_probe(struct platform_device *pdev)
pm_runtime_use_autosuspend(priv->dev);
pm_runtime_set_active(priv->dev);
pm_runtime_get_noresume(priv->dev);
- devm_pm_runtime_enable(priv->dev);
+
+ ret = devm_pm_runtime_enable(priv->dev);
+ if (ret)
+ goto err_pm;
for (i = 0; i < ARRAY_SIZE(cs42l43_irqs); i++) {
ret = cs42l43_request_irq(priv, dom, cs42l43_irqs[i].name,
@@ -2333,8 +2336,47 @@ static int cs42l43_codec_runtime_resume(struct device *dev)
return 0;
}
-static DEFINE_RUNTIME_DEV_PM_OPS(cs42l43_codec_pm_ops, NULL,
- cs42l43_codec_runtime_resume, NULL);
+static int cs42l43_codec_suspend(struct device *dev)
+{
+ struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+
+ disable_irq(cs42l43->irq);
+
+ return 0;
+}
+
+static int cs42l43_codec_suspend_noirq(struct device *dev)
+{
+ struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+
+ enable_irq(cs42l43->irq);
+
+ return 0;
+}
+
+static int cs42l43_codec_resume(struct device *dev)
+{
+ struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+
+ enable_irq(cs42l43->irq);
+
+ return 0;
+}
+
+static int cs42l43_codec_resume_noirq(struct device *dev)
+{
+ struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+
+ disable_irq(cs42l43->irq);
+
+ return 0;
+}
+
+static const struct dev_pm_ops cs42l43_codec_pm_ops = {
+ SYSTEM_SLEEP_PM_OPS(cs42l43_codec_suspend, cs42l43_codec_resume)
+ NOIRQ_SYSTEM_SLEEP_PM_OPS(cs42l43_codec_suspend_noirq, cs42l43_codec_resume_noirq)
+ RUNTIME_PM_OPS(NULL, cs42l43_codec_runtime_resume, NULL)
+};
static const struct platform_device_id cs42l43_codec_id_table[] = {
{ "cs42l43-codec", },
diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c
index fa890f6205e2..cbcd02ec6ba4 100755..100644
--- a/sound/soc/codecs/es8326.c
+++ b/sound/soc/codecs/es8326.c
@@ -45,6 +45,82 @@ struct es8326_priv {
int jack_remove_retry;
};
+static int es8326_crosstalk1_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component);
+ unsigned int crosstalk_h, crosstalk_l;
+ unsigned int crosstalk;
+
+ regmap_read(es8326->regmap, ES8326_DAC_RAMPRATE, &crosstalk_h);
+ regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l);
+ crosstalk_h &= 0x20;
+ crosstalk_l &= 0xf0;
+ crosstalk = crosstalk_h >> 1 | crosstalk_l >> 4;
+ ucontrol->value.integer.value[0] = crosstalk;
+
+ return 0;
+}
+
+static int es8326_crosstalk1_set(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component);
+ unsigned int crosstalk_h, crosstalk_l;
+ unsigned int crosstalk;
+
+ crosstalk = ucontrol->value.integer.value[0];
+ regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l);
+ crosstalk_h = (crosstalk & 0x10) << 1;
+ crosstalk_l &= 0x0f;
+ crosstalk_l |= (crosstalk & 0x0f) << 4;
+ regmap_update_bits(es8326->regmap, ES8326_DAC_RAMPRATE,
+ 0x20, crosstalk_h);
+ regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, crosstalk_l);
+
+ return 0;
+}
+
+static int es8326_crosstalk2_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component);
+ unsigned int crosstalk_h, crosstalk_l;
+ unsigned int crosstalk;
+
+ regmap_read(es8326->regmap, ES8326_DAC_RAMPRATE, &crosstalk_h);
+ regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l);
+ crosstalk_h &= 0x10;
+ crosstalk_l &= 0x0f;
+ crosstalk = crosstalk_h | crosstalk_l;
+ ucontrol->value.integer.value[0] = crosstalk;
+
+ return 0;
+}
+
+static int es8326_crosstalk2_set(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component);
+ unsigned int crosstalk_h, crosstalk_l;
+ unsigned int crosstalk;
+
+ crosstalk = ucontrol->value.integer.value[0];
+ regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l);
+ crosstalk_h = crosstalk & 0x10;
+ crosstalk_l &= 0xf0;
+ crosstalk_l |= crosstalk & 0x0f;
+ regmap_update_bits(es8326->regmap, ES8326_DAC_RAMPRATE,
+ 0x10, crosstalk_h);
+ regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, crosstalk_l);
+
+ return 0;
+}
+
static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(dac_vol_tlv, -9550, 50, 0);
static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(adc_vol_tlv, -9550, 50, 0);
static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(adc_analog_pga_tlv, 0, 300, 0);
@@ -102,6 +178,10 @@ static const struct snd_kcontrol_new es8326_snd_controls[] = {
SOC_SINGLE_TLV("ALC Capture Target Level", ES8326_ALC_LEVEL,
0, 0x0f, 0, drc_target_tlv),
+ SOC_SINGLE_EXT("CROSSTALK1", SND_SOC_NOPM, 0, 31, 0,
+ es8326_crosstalk1_get, es8326_crosstalk1_set),
+ SOC_SINGLE_EXT("CROSSTALK2", SND_SOC_NOPM, 0, 31, 0,
+ es8326_crosstalk2_get, es8326_crosstalk2_set),
};
static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = {
@@ -117,12 +197,6 @@ static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = {
SND_SOC_DAPM_AIF_OUT("I2S OUT", "I2S1 Capture", 0, SND_SOC_NOPM, 0, 0),
SND_SOC_DAPM_AIF_IN("I2S IN", "I2S1 Playback", 0, SND_SOC_NOPM, 0, 0),
- /* ADC Digital Mute */
- SND_SOC_DAPM_PGA("ADC L1", ES8326_ADC_MUTE, 0, 1, NULL, 0),
- SND_SOC_DAPM_PGA("ADC R1", ES8326_ADC_MUTE, 1, 1, NULL, 0),
- SND_SOC_DAPM_PGA("ADC L2", ES8326_ADC_MUTE, 2, 1, NULL, 0),
- SND_SOC_DAPM_PGA("ADC R2", ES8326_ADC_MUTE, 3, 1, NULL, 0),
-
/* Analog Power Supply*/
SND_SOC_DAPM_DAC("Right DAC", NULL, ES8326_ANA_PDN, 0, 1),
SND_SOC_DAPM_DAC("Left DAC", NULL, ES8326_ANA_PDN, 1, 1),
@@ -142,15 +216,10 @@ static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = {
};
static const struct snd_soc_dapm_route es8326_dapm_routes[] = {
- {"ADC L1", NULL, "MIC1"},
- {"ADC R1", NULL, "MIC2"},
- {"ADC L2", NULL, "MIC3"},
- {"ADC R2", NULL, "MIC4"},
-
- {"ADC L", NULL, "ADC L1"},
- {"ADC R", NULL, "ADC R1"},
- {"ADC L", NULL, "ADC L2"},
- {"ADC R", NULL, "ADC R2"},
+ {"ADC L", NULL, "MIC1"},
+ {"ADC R", NULL, "MIC2"},
+ {"ADC L", NULL, "MIC3"},
+ {"ADC R", NULL, "MIC4"},
{"I2S OUT", NULL, "ADC L"},
{"I2S OUT", NULL, "ADC R"},
@@ -440,10 +509,16 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction)
unsigned int offset_l, offset_r;
if (mute) {
- regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_OFF);
- regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE,
- ES8326_MUTE_MASK, ES8326_MUTE);
- regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xf0);
+ if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
+ regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_OFF);
+ regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE,
+ ES8326_MUTE_MASK, ES8326_MUTE);
+ regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF,
+ 0x30, 0x00);
+ } else {
+ regmap_update_bits(es8326->regmap, ES8326_ADC_MUTE,
+ 0x0F, 0x0F);
+ }
} else {
if (!es8326->calibrated) {
regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_FORCE_CAL);
@@ -456,11 +531,22 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction)
regmap_write(es8326->regmap, ES8326_HPR_OFFSET_INI, offset_r);
es8326->calibrated = true;
}
- regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1);
- regmap_write(es8326->regmap, ES8326_HP_VOL, 0x91);
- regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_ON);
- regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE,
- ES8326_MUTE_MASK, ~(ES8326_MUTE));
+ if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
+ regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x01);
+ usleep_range(1000, 5000);
+ regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00);
+ usleep_range(1000, 5000);
+ regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x20);
+ regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x30);
+ regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1);
+ regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_ON);
+ regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE,
+ ES8326_MUTE_MASK, ~(ES8326_MUTE));
+ } else {
+ msleep(300);
+ regmap_update_bits(es8326->regmap, ES8326_ADC_MUTE,
+ 0x0F, 0x00);
+ }
}
return 0;
}
@@ -477,23 +563,20 @@ static int es8326_set_bias_level(struct snd_soc_component *codec,
if (ret)
return ret;
- regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00);
+ regmap_update_bits(es8326->regmap, ES8326_RESET, 0x02, 0x02);
+ usleep_range(5000, 10000);
regmap_write(es8326->regmap, ES8326_INTOUT_IO, es8326->interrupt_clk);
regmap_write(es8326->regmap, ES8326_SDINOUT1_IO,
(ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT));
- regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E);
regmap_write(es8326->regmap, ES8326_PGA_PDN, 0x40);
regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00);
regmap_update_bits(es8326->regmap, ES8326_CLK_CTL, 0x20, 0x20);
-
- regmap_update_bits(es8326->regmap, ES8326_RESET,
- ES8326_CSM_ON, ES8326_CSM_ON);
+ regmap_update_bits(es8326->regmap, ES8326_RESET, 0x02, 0x00);
break;
case SND_SOC_BIAS_PREPARE:
break;
case SND_SOC_BIAS_STANDBY:
regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b);
- regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x00);
regmap_update_bits(es8326->regmap, ES8326_CLK_CTL, 0x20, 0x00);
regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, ES8326_IO_INPUT);
break;
@@ -513,7 +596,7 @@ static const struct snd_soc_dai_ops es8326_ops = {
.set_fmt = es8326_set_dai_fmt,
.set_sysclk = es8326_set_dai_sysclk,
.mute_stream = es8326_mute,
- .no_capture_mute = 1,
+ .no_capture_mute = 0,
};
static struct snd_soc_dai_driver es8326_dai = {
@@ -672,6 +755,8 @@ static void es8326_jack_detect_handler(struct work_struct *work)
es8326->hp = 0;
}
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
+ regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x0a);
+ regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x03);
/*
* Inverted HPJACK_POL bit to trigger one IRQ to double check HP Removal event
*/
@@ -695,8 +780,11 @@ static void es8326_jack_detect_handler(struct work_struct *work)
* Don't report jack status.
*/
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
+ es8326_enable_micbias(es8326->component);
usleep_range(50000, 70000);
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00);
+ regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f);
+ regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08);
queue_delayed_work(system_wq, &es8326->jack_detect_work,
msecs_to_jiffies(400));
es8326->hp = 1;
@@ -736,13 +824,10 @@ exit:
static irqreturn_t es8326_irq(int irq, void *dev_id)
{
struct es8326_priv *es8326 = dev_id;
- struct snd_soc_component *comp = es8326->component;
if (!es8326->jack)
goto out;
- es8326_enable_micbias(comp);
-
if (es8326->jack->status & SND_JACK_HEADSET)
queue_delayed_work(system_wq, &es8326->jack_detect_work,
msecs_to_jiffies(10));
@@ -766,14 +851,14 @@ static int es8326_calibrate(struct snd_soc_component *component)
if ((es8326->version == ES8326_VERSION_B) && (es8326->calibrated == false)) {
dev_dbg(component->dev, "ES8326_VERSION_B, calibrating\n");
regmap_write(es8326->regmap, ES8326_CLK_INV, 0xc0);
- regmap_write(es8326->regmap, ES8326_CLK_DIV1, 0x01);
+ regmap_write(es8326->regmap, ES8326_CLK_DIV1, 0x03);
regmap_write(es8326->regmap, ES8326_CLK_DLL, 0x30);
regmap_write(es8326->regmap, ES8326_CLK_MUX, 0xed);
regmap_write(es8326->regmap, ES8326_CLK_DAC_SEL, 0x08);
regmap_write(es8326->regmap, ES8326_CLK_TRI, 0xc1);
regmap_write(es8326->regmap, ES8326_DAC_MUTE, 0x03);
regmap_write(es8326->regmap, ES8326_ANA_VSEL, 0x7f);
- regmap_write(es8326->regmap, ES8326_VMIDLOW, 0x03);
+ regmap_write(es8326->regmap, ES8326_VMIDLOW, 0x23);
regmap_write(es8326->regmap, ES8326_DAC2HPMIX, 0x88);
usleep_range(15000, 20000);
regmap_write(es8326->regmap, ES8326_HP_OFFSET_CAL, 0x8c);
@@ -814,13 +899,13 @@ static int es8326_resume(struct snd_soc_component *component)
/* reset internal clock state */
regmap_write(es8326->regmap, ES8326_RESET, 0x1f);
regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E);
+ regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0);
usleep_range(10000, 15000);
regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xe9);
- regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0x4b);
+ regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0xcb);
/* set headphone default type and detect pin */
regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x83);
regmap_write(es8326->regmap, ES8326_CLK_RESAMPLE, 0x05);
- regmap_write(es8326->regmap, ES8326_HP_MISC, 0x30);
/* set internal oscillator as clock source of headpone cp */
regmap_write(es8326->regmap, ES8326_CLK_DIV_CPC, 0x89);
@@ -828,14 +913,15 @@ static int es8326_resume(struct snd_soc_component *component)
/* clock manager reset release */
regmap_write(es8326->regmap, ES8326_RESET, 0x17);
/* set headphone detection as half scan mode */
- regmap_write(es8326->regmap, ES8326_HP_MISC, 0x30);
+ regmap_write(es8326->regmap, ES8326_HP_MISC, 0x3d);
regmap_write(es8326->regmap, ES8326_PULLUP_CTL, 0x00);
/* enable headphone driver */
+ regmap_write(es8326->regmap, ES8326_HP_VOL, 0xc4);
regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa7);
usleep_range(2000, 5000);
- regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0xa3);
- regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0xb3);
+ regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0x23);
+ regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0x33);
regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1);
regmap_write(es8326->regmap, ES8326_CLK_INV, 0x00);
@@ -844,6 +930,8 @@ static int es8326_resume(struct snd_soc_component *component)
regmap_write(es8326->regmap, ES8326_CLK_CAL_TIME, 0x00);
/* calibrate for B version */
es8326_calibrate(component);
+ regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, 0xaa);
+ regmap_write(es8326->regmap, ES8326_DAC_RAMPRATE, 0x00);
/* turn off headphone out */
regmap_write(es8326->regmap, ES8326_HP_CAL, 0x00);
/* set ADC and DAC in low power mode */
@@ -856,6 +944,14 @@ static int es8326_resume(struct snd_soc_component *component)
regmap_write(es8326->regmap, ES8326_DAC_DSM, 0x08);
regmap_write(es8326->regmap, ES8326_DAC_VPPSCALE, 0x15);
+ regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x80 |
+ ((es8326->version == ES8326_VERSION_B) ?
+ (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol) :
+ (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol | 0x04)));
+ usleep_range(5000, 10000);
+ es8326_enable_micbias(es8326->component);
+ usleep_range(50000, 70000);
+ regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00);
regmap_write(es8326->regmap, ES8326_INT_SOURCE,
(ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
regmap_write(es8326->regmap, ES8326_INTOUT_IO,
@@ -864,7 +960,7 @@ static int es8326_resume(struct snd_soc_component *component)
(ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT));
regmap_write(es8326->regmap, ES8326_SDINOUT23_IO, ES8326_IO_INPUT);
- regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b);
+ regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00);
regmap_write(es8326->regmap, ES8326_RESET, ES8326_CSM_ON);
regmap_update_bits(es8326->regmap, ES8326_PGAGAIN, ES8326_MIC_SEL_MASK,
ES8326_MIC1_SEL);
@@ -872,11 +968,7 @@ static int es8326_resume(struct snd_soc_component *component)
regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, ES8326_MUTE_MASK,
ES8326_MUTE);
- regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x80 |
- ((es8326->version == ES8326_VERSION_B) ?
- (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol) :
- (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol | 0x04)));
- regmap_write(es8326->regmap, ES8326_HP_VOL, 0x11);
+ regmap_write(es8326->regmap, ES8326_ADC_MUTE, 0x0f);
es8326->jack_remove_retry = 0;
es8326->hp = 0;
diff --git a/sound/soc/codecs/es8326.h b/sound/soc/codecs/es8326.h
index 90a08351d6ac..4234bbb900c4 100644
--- a/sound/soc/codecs/es8326.h
+++ b/sound/soc/codecs/es8326.h
@@ -72,6 +72,7 @@
#define ES8326_DAC_VOL 0x50
#define ES8326_DRC_RECOVERY 0x53
#define ES8326_DRC_WINSIZE 0x54
+#define ES8326_DAC_CROSSTALK 0x55
#define ES8326_HPJACK_TIMER 0x56
#define ES8326_HPDET_TYPE 0x57
#define ES8326_INT_SOURCE 0x58
@@ -100,7 +101,7 @@
#define ES8326_MUTE (3 << 0)
/* ES8326_CLK_CTL */
-#define ES8326_CLK_ON (0x7f << 0)
+#define ES8326_CLK_ON (0x7e << 0)
#define ES8326_CLK_OFF (0 << 0)
/* ES8326_CLK_INV */
diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c
index 7e21cec3c2fb..6ce309980cd1 100644
--- a/sound/soc/codecs/lpass-wsa-macro.c
+++ b/sound/soc/codecs/lpass-wsa-macro.c
@@ -1584,7 +1584,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
u16 gain_reg;
u16 reg;
int val;
- int offset_val = 0;
struct wsa_macro *wsa = snd_soc_component_get_drvdata(component);
if (w->shift == WSA_MACRO_COMP1) {
@@ -1623,10 +1622,8 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
CDC_WSA_RX1_RX_PATH_MIX_SEC0,
CDC_WSA_RX_PGA_HALF_DB_MASK,
CDC_WSA_RX_PGA_HALF_DB_ENABLE);
- offset_val = -2;
}
val = snd_soc_component_read(component, gain_reg);
- val += offset_val;
snd_soc_component_write(component, gain_reg, val);
wsa_macro_config_ear_spkr_gain(component, wsa,
event, gain_reg);
@@ -1654,10 +1651,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
CDC_WSA_RX1_RX_PATH_MIX_SEC0,
CDC_WSA_RX_PGA_HALF_DB_MASK,
CDC_WSA_RX_PGA_HALF_DB_DISABLE);
- offset_val = 2;
- val = snd_soc_component_read(component, gain_reg);
- val += offset_val;
- snd_soc_component_write(component, gain_reg, val);
}
wsa_macro_config_ear_spkr_gain(component, wsa,
event, gain_reg);
diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c
index b9f19fbd2911..b24d6472ad5f 100644
--- a/sound/soc/codecs/madera.c
+++ b/sound/soc/codecs/madera.c
@@ -3884,7 +3884,7 @@ static inline int madera_set_fll_clks(struct madera_fll *fll, int base, bool ena
return madera_set_fll_clks_reg(fll, ena,
base + MADERA_FLL_CONTROL_6_OFFS,
MADERA_FLL1_REFCLK_SRC_MASK,
- MADERA_FLL1_REFCLK_DIV_SHIFT);
+ MADERA_FLL1_REFCLK_SRC_SHIFT);
}
static inline int madera_set_fllao_clks(struct madera_fll *fll, int base, bool ena)
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 5150d6ee3748..20191a4473c2 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3317,6 +3317,7 @@ static void rt5645_jack_detect_work(struct work_struct *work)
report, SND_JACK_HEADPHONE);
snd_soc_jack_report(rt5645->mic_jack,
report, SND_JACK_MICROPHONE);
+ mutex_unlock(&rt5645->jd_mutex);
return;
case 4:
val = snd_soc_component_read(rt5645->component, RT5645_A_JD_CTRL1) & 0x0020;
@@ -3692,6 +3693,11 @@ static const struct rt5645_platform_data jd_mode3_monospk_platform_data = {
.mono_speaker = true,
};
+static const struct rt5645_platform_data jd_mode3_inv_data = {
+ .jd_mode = 3,
+ .inv_jd1_1 = true,
+};
+
static const struct rt5645_platform_data jd_mode3_platform_data = {
.jd_mode = 3,
};
@@ -3837,6 +3843,16 @@ static const struct dmi_system_id dmi_platform_data[] = {
DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
DMI_EXACT_MATCH(DMI_BOARD_VERSION, "Default string"),
+ /*
+ * Above strings are too generic, LattePanda BIOS versions for
+ * all 4 hw revisions are:
+ * DF-BI-7-S70CR100-*
+ * DF-BI-7-S70CR110-*
+ * DF-BI-7-S70CR200-*
+ * LP-BS-7-S70CR700-*
+ * Do a partial match for S70CR to avoid false positive matches.
+ */
+ DMI_MATCH(DMI_BIOS_VERSION, "S70CR"),
},
.driver_data = (void *)&lattepanda_board_platform_data,
},
@@ -3871,6 +3887,16 @@ static const struct dmi_system_id dmi_platform_data[] = {
},
.driver_data = (void *)&intel_braswell_platform_data,
},
+ {
+ .ident = "Meegopad T08",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Default string"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Default string"),
+ DMI_MATCH(DMI_BOARD_NAME, "T3 MRD"),
+ DMI_MATCH(DMI_BOARD_VERSION, "V1.1"),
+ },
+ .driver_data = (void *)&jd_mode3_inv_data,
+ },
{ }
};
diff --git a/sound/soc/codecs/tas2781-comlib.c b/sound/soc/codecs/tas2781-comlib.c
index b7e56ceb1acf..5d0e5348b361 100644
--- a/sound/soc/codecs/tas2781-comlib.c
+++ b/sound/soc/codecs/tas2781-comlib.c
@@ -267,6 +267,7 @@ void tas2781_reset(struct tasdevice_priv *tas_dev)
EXPORT_SYMBOL_GPL(tas2781_reset);
int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
+ struct module *module,
void (*cont)(const struct firmware *fw, void *context))
{
int ret = 0;
@@ -280,7 +281,7 @@ int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
tas_priv->dev_name, tas_priv->ndev);
crc8_populate_msb(tas_priv->crc8_lkp_tbl, TASDEVICE_CRC8_POLYNOMIAL);
tas_priv->codec = codec;
- ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_UEVENT,
+ ret = request_firmware_nowait(module, FW_ACTION_UEVENT,
tas_priv->rca_binaryname, tas_priv->dev, GFP_KERNEL, tas_priv,
cont);
if (ret)
diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c
index 32913bd1a623..b5abff230e43 100644
--- a/sound/soc/codecs/tas2781-i2c.c
+++ b/sound/soc/codecs/tas2781-i2c.c
@@ -566,7 +566,7 @@ static int tasdevice_codec_probe(struct snd_soc_component *codec)
{
struct tasdevice_priv *tas_priv = snd_soc_component_get_drvdata(codec);
- return tascodec_init(tas_priv, codec, tasdevice_fw_ready);
+ return tascodec_init(tas_priv, codec, THIS_MODULE, tasdevice_fw_ready);
}
static void tasdevice_deinit(void *context)
diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c
index 43c648efd0d9..deb15b95992d 100644
--- a/sound/soc/codecs/wcd9335.c
+++ b/sound/soc/codecs/wcd9335.c
@@ -3033,7 +3033,6 @@ static int wcd9335_codec_enable_mix_path(struct snd_soc_dapm_widget *w,
{
struct snd_soc_component *comp = snd_soc_dapm_to_component(w->dapm);
u16 gain_reg;
- int offset_val = 0;
int val = 0;
switch (w->reg) {
@@ -3073,7 +3072,6 @@ static int wcd9335_codec_enable_mix_path(struct snd_soc_dapm_widget *w,
switch (event) {
case SND_SOC_DAPM_POST_PMU:
val = snd_soc_component_read(comp, gain_reg);
- val += offset_val;
snd_soc_component_write(comp, gain_reg, val);
break;
case SND_SOC_DAPM_POST_PMD:
@@ -3294,7 +3292,6 @@ static int wcd9335_codec_enable_interpolator(struct snd_soc_dapm_widget *w,
u16 gain_reg;
u16 reg;
int val;
- int offset_val = 0;
if (!(snd_soc_dapm_widget_name_cmp(w, "RX INT0 INTERP"))) {
reg = WCD9335_CDC_RX0_RX_PATH_CTL;
@@ -3337,7 +3334,6 @@ static int wcd9335_codec_enable_interpolator(struct snd_soc_dapm_widget *w,
case SND_SOC_DAPM_POST_PMU:
wcd9335_config_compander(comp, w->shift, event);
val = snd_soc_component_read(comp, gain_reg);
- val += offset_val;
snd_soc_component_write(comp, gain_reg, val);
break;
case SND_SOC_DAPM_POST_PMD:
diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c
index 1b6e376f3833..6813268e6a19 100644
--- a/sound/soc/codecs/wcd934x.c
+++ b/sound/soc/codecs/wcd934x.c
@@ -13,7 +13,6 @@
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
-#include <linux/regulator/consumer.h>
#include <linux/slab.h>
#include <linux/slimbus.h>
#include <sound/pcm_params.h>
diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
index faf8d3f9b3c5..6021aa5a5689 100644
--- a/sound/soc/codecs/wcd938x.c
+++ b/sound/soc/codecs/wcd938x.c
@@ -210,7 +210,7 @@ struct wcd938x_priv {
};
static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800);
-static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000);
+static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0);
static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000);
struct wcd938x_mbhc_zdet_param {
@@ -3587,10 +3587,8 @@ static int wcd938x_probe(struct platform_device *pdev)
mutex_init(&wcd938x->micb_lock);
ret = wcd938x_populate_dt_data(wcd938x, dev);
- if (ret) {
- dev_err(dev, "%s: Fail to obtain platform data\n", __func__);
- return -EINVAL;
- }
+ if (ret)
+ return ret;
ret = wcd938x_add_slave_components(wcd938x, dev, &match);
if (ret)
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index fb90ae6a8a34..7c6ed2983128 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -2229,6 +2229,9 @@ SND_SOC_DAPM_PGA_E("HPOUT", SND_SOC_NOPM, 0, 0, NULL, 0, hp_event,
SND_SOC_DAPM_OUTPUT("HPOUTL"),
SND_SOC_DAPM_OUTPUT("HPOUTR"),
+
+SND_SOC_DAPM_PGA("SPKOUTL Output", WM8962_CLASS_D_CONTROL_1, 6, 0, NULL, 0),
+SND_SOC_DAPM_PGA("SPKOUTR Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0),
};
static const struct snd_soc_dapm_widget wm8962_dapm_spk_mono_widgets[] = {
@@ -2236,7 +2239,6 @@ SND_SOC_DAPM_MIXER("Speaker Mixer", WM8962_MIXER_ENABLES, 1, 0,
spkmixl, ARRAY_SIZE(spkmixl)),
SND_SOC_DAPM_MUX_E("Speaker PGA", WM8962_PWR_MGMT_2, 4, 0, &spkoutl_mux,
out_pga_event, SND_SOC_DAPM_POST_PMU),
-SND_SOC_DAPM_PGA("Speaker Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0),
SND_SOC_DAPM_OUTPUT("SPKOUT"),
};
@@ -2251,9 +2253,6 @@ SND_SOC_DAPM_MUX_E("SPKOUTL PGA", WM8962_PWR_MGMT_2, 4, 0, &spkoutl_mux,
SND_SOC_DAPM_MUX_E("SPKOUTR PGA", WM8962_PWR_MGMT_2, 3, 0, &spkoutr_mux,
out_pga_event, SND_SOC_DAPM_POST_PMU),
-SND_SOC_DAPM_PGA("SPKOUTR Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0),
-SND_SOC_DAPM_PGA("SPKOUTL Output", WM8962_CLASS_D_CONTROL_1, 6, 0, NULL, 0),
-
SND_SOC_DAPM_OUTPUT("SPKOUTL"),
SND_SOC_DAPM_OUTPUT("SPKOUTR"),
};
@@ -2366,12 +2365,18 @@ static const struct snd_soc_dapm_route wm8962_spk_mono_intercon[] = {
{ "Speaker PGA", "Mixer", "Speaker Mixer" },
{ "Speaker PGA", "DAC", "DACL" },
- { "Speaker Output", NULL, "Speaker PGA" },
- { "Speaker Output", NULL, "SYSCLK" },
- { "Speaker Output", NULL, "TOCLK" },
- { "Speaker Output", NULL, "TEMP_SPK" },
+ { "SPKOUTL Output", NULL, "Speaker PGA" },
+ { "SPKOUTL Output", NULL, "SYSCLK" },
+ { "SPKOUTL Output", NULL, "TOCLK" },
+ { "SPKOUTL Output", NULL, "TEMP_SPK" },
- { "SPKOUT", NULL, "Speaker Output" },
+ { "SPKOUTR Output", NULL, "Speaker PGA" },
+ { "SPKOUTR Output", NULL, "SYSCLK" },
+ { "SPKOUTR Output", NULL, "TOCLK" },
+ { "SPKOUTR Output", NULL, "TEMP_SPK" },
+
+ { "SPKOUT", NULL, "SPKOUTL Output" },
+ { "SPKOUT", NULL, "SPKOUTR Output" },
};
static const struct snd_soc_dapm_route wm8962_spk_stereo_intercon[] = {
@@ -2914,8 +2919,12 @@ static int wm8962_set_fll(struct snd_soc_component *component, int fll_id, int s
switch (fll_id) {
case WM8962_FLL_MCLK:
case WM8962_FLL_BCLK:
+ fll1 |= (fll_id - 1) << WM8962_FLL_REFCLK_SRC_SHIFT;
+ break;
case WM8962_FLL_OSC:
fll1 |= (fll_id - 1) << WM8962_FLL_REFCLK_SRC_SHIFT;
+ snd_soc_component_update_bits(component, WM8962_PLL2,
+ WM8962_OSC_ENA, WM8962_OSC_ENA);
break;
case WM8962_FLL_INT:
snd_soc_component_update_bits(component, WM8962_FLL_CONTROL_1,
@@ -2924,7 +2933,7 @@ static int wm8962_set_fll(struct snd_soc_component *component, int fll_id, int s
WM8962_FLL_FRC_NCO, WM8962_FLL_FRC_NCO);
break;
default:
- dev_err(component->dev, "Unknown FLL source %d\n", ret);
+ dev_err(component->dev, "Unknown FLL source %d\n", source);
return -EINVAL;
}
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index c01e31175015..36ea0dcdc7ab 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -739,19 +739,25 @@ static int wm_adsp_request_firmware_file(struct wm_adsp *dsp,
const char *filetype)
{
struct cs_dsp *cs_dsp = &dsp->cs_dsp;
+ const char *fwf;
char *s, c;
int ret = 0;
+ if (dsp->fwf_name)
+ fwf = dsp->fwf_name;
+ else
+ fwf = dsp->cs_dsp.name;
+
if (system_name && asoc_component_prefix)
*filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s-%s.%s", dir, dsp->part,
- dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name,
+ fwf, wm_adsp_fw[dsp->fw].file, system_name,
asoc_component_prefix, filetype);
else if (system_name)
*filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s.%s", dir, dsp->part,
- dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name,
+ fwf, wm_adsp_fw[dsp->fw].file, system_name,
filetype);
else
- *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, dsp->fwf_name,
+ *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, fwf,
wm_adsp_fw[dsp->fw].file, filetype);
if (*filename == NULL)
@@ -823,6 +829,23 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp,
}
}
+ /* Check system-specific bin without wmfw before falling back to generic */
+ if (dsp->wmfw_optional && system_name) {
+ if (asoc_component_prefix)
+ wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
+ cirrus_dir, system_name,
+ asoc_component_prefix, "bin");
+
+ if (!*coeff_firmware)
+ wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
+ cirrus_dir, system_name,
+ NULL, "bin");
+
+ if (*coeff_firmware)
+ return 0;
+ }
+
+ /* Check legacy location */
if (!wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename,
"", NULL, NULL, "wmfw")) {
wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
@@ -830,62 +853,28 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp,
return 0;
}
+ /* Fall back to generic wmfw and optional matching bin */
ret = wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename,
cirrus_dir, NULL, NULL, "wmfw");
- if (!ret) {
+ if (!ret || dsp->wmfw_optional) {
wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
cirrus_dir, NULL, NULL, "bin");
return 0;
}
- if (dsp->wmfw_optional) {
- if (system_name) {
- if (asoc_component_prefix)
- wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
- cirrus_dir, system_name,
- asoc_component_prefix, "bin");
-
- if (!*coeff_firmware)
- wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
- cirrus_dir, system_name,
- NULL, "bin");
- }
-
- if (!*coeff_firmware)
- wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
- "", NULL, NULL, "bin");
-
- if (!*coeff_firmware)
- wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
- cirrus_dir, NULL, NULL, "bin");
-
- return 0;
- }
-
adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n",
- cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file,
- system_name, asoc_component_prefix);
+ cirrus_dir, dsp->part,
+ dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name,
+ wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix);
return -ENOENT;
}
static int wm_adsp_common_init(struct wm_adsp *dsp)
{
- char *p;
-
INIT_LIST_HEAD(&dsp->compr_list);
INIT_LIST_HEAD(&dsp->buffer_list);
- if (!dsp->fwf_name) {
- p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- dsp->fwf_name = p;
- for (; *p != 0; ++p)
- *p = tolower(*p);
- }
-
return 0;
}
diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c
index cb83c569e18d..a2e86ef7d18f 100644
--- a/sound/soc/codecs/wsa883x.c
+++ b/sound/soc/codecs/wsa883x.c
@@ -1098,7 +1098,11 @@ static int wsa_dev_mode_put(struct snd_kcontrol *kcontrol,
return 1;
}
-static const DECLARE_TLV_DB_SCALE(pa_gain, -300, 150, -300);
+static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(pa_gain,
+ 0, 14, TLV_DB_SCALE_ITEM(-300, 0, 0),
+ 15, 29, TLV_DB_SCALE_ITEM(-300, 150, 0),
+ 30, 31, TLV_DB_SCALE_ITEM(1800, 0, 0),
+);
static int wsa883x_get_swr_port(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c
index f0fb33d719c2..c46f64557a7f 100644
--- a/sound/soc/fsl/fsl_xcvr.c
+++ b/sound/soc/fsl/fsl_xcvr.c
@@ -174,7 +174,9 @@ static int fsl_xcvr_activate_ctl(struct snd_soc_dai *dai, const char *name,
struct snd_kcontrol *kctl;
bool enabled;
- kctl = snd_soc_card_get_kcontrol(card, name);
+ lockdep_assert_held(&card->snd_card->controls_rwsem);
+
+ kctl = snd_soc_card_get_kcontrol_locked(card, name);
if (kctl == NULL)
return -ENOENT;
@@ -576,10 +578,14 @@ static int fsl_xcvr_startup(struct snd_pcm_substream *substream,
xcvr->streams |= BIT(substream->stream);
if (!xcvr->soc_data->spdif_only) {
+ struct snd_soc_card *card = dai->component->card;
+
/* Disable XCVR controls if there is stream started */
+ down_read(&card->snd_card->controls_rwsem);
fsl_xcvr_activate_ctl(dai, fsl_xcvr_mode_kctl.name, false);
fsl_xcvr_activate_ctl(dai, fsl_xcvr_arc_mode_kctl.name, false);
fsl_xcvr_activate_ctl(dai, fsl_xcvr_earc_capds_kctl.name, false);
+ up_read(&card->snd_card->controls_rwsem);
}
return 0;
@@ -598,11 +604,15 @@ static void fsl_xcvr_shutdown(struct snd_pcm_substream *substream,
/* Enable XCVR controls if there is no stream started */
if (!xcvr->streams) {
if (!xcvr->soc_data->spdif_only) {
+ struct snd_soc_card *card = dai->component->card;
+
+ down_read(&card->snd_card->controls_rwsem);
fsl_xcvr_activate_ctl(dai, fsl_xcvr_mode_kctl.name, true);
fsl_xcvr_activate_ctl(dai, fsl_xcvr_arc_mode_kctl.name,
(xcvr->mode == FSL_XCVR_MODE_ARC));
fsl_xcvr_activate_ctl(dai, fsl_xcvr_earc_capds_kctl.name,
(xcvr->mode == FSL_XCVR_MODE_EARC));
+ up_read(&card->snd_card->controls_rwsem);
}
ret = regmap_update_bits(xcvr->regmap, FSL_XCVR_EXT_IER0,
FSL_XCVR_IRQ_EARC_ALL, 0);
diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c
index 59c3793f65df..db78eb2f0108 100644
--- a/sound/soc/intel/avs/core.c
+++ b/sound/soc/intel/avs/core.c
@@ -477,6 +477,9 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
return 0;
err_i915_init:
+ pci_free_irq(pci, 0, adev);
+ pci_free_irq(pci, 0, bus);
+ pci_free_irq_vectors(pci);
pci_clear_master(pci);
pci_set_drvdata(pci, NULL);
err_acquire_irq:
diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c
index 778236d3fd28..48b3c67c9103 100644
--- a/sound/soc/intel/avs/topology.c
+++ b/sound/soc/intel/avs/topology.c
@@ -857,7 +857,7 @@ assign_copier_gtw_instance(struct snd_soc_component *comp, struct avs_tplg_modcf
}
/* If topology sets value don't overwrite it */
- if (cfg->copier.vindex.i2s.instance)
+ if (cfg->copier.vindex.val)
return;
mach = dev_get_platdata(comp->card->dev);
diff --git a/sound/soc/intel/boards/bytcht_cx2072x.c b/sound/soc/intel/boards/bytcht_cx2072x.c
index 10a84a2c1036..c014d85a08b2 100644
--- a/sound/soc/intel/boards/bytcht_cx2072x.c
+++ b/sound/soc/intel/boards/bytcht_cx2072x.c
@@ -241,7 +241,8 @@ static int snd_byt_cht_cx2072x_probe(struct platform_device *pdev)
/* fix index of codec dai */
for (i = 0; i < ARRAY_SIZE(byt_cht_cx2072x_dais); i++) {
- if (!strcmp(byt_cht_cx2072x_dais[i].codecs->name,
+ if (byt_cht_cx2072x_dais[i].codecs->name &&
+ !strcmp(byt_cht_cx2072x_dais[i].codecs->name,
"i2c-14F10720:00")) {
dai_index = i;
break;
diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c
index 7e5eea690023..f4ac3ddd148b 100644
--- a/sound/soc/intel/boards/bytcht_da7213.c
+++ b/sound/soc/intel/boards/bytcht_da7213.c
@@ -245,7 +245,8 @@ static int bytcht_da7213_probe(struct platform_device *pdev)
/* fix index of codec dai */
for (i = 0; i < ARRAY_SIZE(dailink); i++) {
- if (!strcmp(dailink[i].codecs->name, "i2c-DLGS7213:00")) {
+ if (dailink[i].codecs->name &&
+ !strcmp(dailink[i].codecs->name, "i2c-DLGS7213:00")) {
dai_index = i;
break;
}
diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c
index 1564a88a885e..2fcec2e02bb5 100644
--- a/sound/soc/intel/boards/bytcht_es8316.c
+++ b/sound/soc/intel/boards/bytcht_es8316.c
@@ -546,7 +546,8 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
/* fix index of codec dai */
for (i = 0; i < ARRAY_SIZE(byt_cht_es8316_dais); i++) {
- if (!strcmp(byt_cht_es8316_dais[i].codecs->name,
+ if (byt_cht_es8316_dais[i].codecs->name &&
+ !strcmp(byt_cht_es8316_dais[i].codecs->name,
"i2c-ESSX8316:00")) {
dai_index = i;
break;
diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index 42466b4b1ca4..05f38d1f7d82 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -685,6 +685,18 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
BYT_RT5640_SSP0_AIF1 |
BYT_RT5640_MCLK_EN),
},
+ { /* Chuwi Vi8 dual-boot (CWI506) */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "i86"),
+ /* The above are too generic, also match BIOS info */
+ DMI_MATCH(DMI_BIOS_VERSION, "CHUWI2.D86JHBNR02"),
+ },
+ .driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
+ BYT_RT5640_MONO_SPEAKER |
+ BYT_RT5640_SSP0_AIF1 |
+ BYT_RT5640_MCLK_EN),
+ },
{
/* Chuwi Vi10 (CWI505) */
.matches = {
@@ -1652,7 +1664,8 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
/* fix index of codec dai */
for (i = 0; i < ARRAY_SIZE(byt_rt5640_dais); i++) {
- if (!strcmp(byt_rt5640_dais[i].codecs->name,
+ if (byt_rt5640_dais[i].codecs->name &&
+ !strcmp(byt_rt5640_dais[i].codecs->name,
"i2c-10EC5640:00")) {
dai_index = i;
break;
diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c
index f9fe8414f454..80c841b000a3 100644
--- a/sound/soc/intel/boards/bytcr_rt5651.c
+++ b/sound/soc/intel/boards/bytcr_rt5651.c
@@ -910,7 +910,8 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
/* fix index of codec dai */
for (i = 0; i < ARRAY_SIZE(byt_rt5651_dais); i++) {
- if (!strcmp(byt_rt5651_dais[i].codecs->name,
+ if (byt_rt5651_dais[i].codecs->name &&
+ !strcmp(byt_rt5651_dais[i].codecs->name,
"i2c-10EC5651:00")) {
dai_index = i;
break;
diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c
index 6978ebde6693..cccb5e90c0fe 100644
--- a/sound/soc/intel/boards/bytcr_wm5102.c
+++ b/sound/soc/intel/boards/bytcr_wm5102.c
@@ -605,7 +605,8 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev)
/* find index of codec dai */
for (i = 0; i < ARRAY_SIZE(byt_wm5102_dais); i++) {
- if (!strcmp(byt_wm5102_dais[i].codecs->name,
+ if (byt_wm5102_dais[i].codecs->name &&
+ !strcmp(byt_wm5102_dais[i].codecs->name,
"wm5102-codec")) {
dai_index = i;
break;
diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c
index c952a96cde7e..eb41b7115d01 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5645.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5645.c
@@ -40,7 +40,6 @@ struct cht_acpi_card {
struct cht_mc_private {
struct snd_soc_jack jack;
struct cht_acpi_card *acpi_card;
- char codec_name[SND_ACPI_I2C_ID_LEN];
struct clk *mclk;
};
@@ -567,14 +566,14 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
}
card->dev = &pdev->dev;
- sprintf(drv->codec_name, "i2c-%s:00", drv->acpi_card->codec_id);
/* set correct codec name */
for (i = 0; i < ARRAY_SIZE(cht_dailink); i++)
- if (!strcmp(card->dai_link[i].codecs->name,
+ if (cht_dailink[i].codecs->name &&
+ !strcmp(cht_dailink[i].codecs->name,
"i2c-10EC5645:00")) {
- card->dai_link[i].codecs->name = drv->codec_name;
dai_index = i;
+ break;
}
/* fixup codec name based on HID */
diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c
index 8cf0b33cc02e..be2d1a8dbca8 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5672.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5672.c
@@ -466,7 +466,8 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
/* find index of codec dai */
for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) {
- if (!strcmp(cht_dailink[i].codecs->name, RT5672_I2C_DEFAULT)) {
+ if (cht_dailink[i].codecs->name &&
+ !strcmp(cht_dailink[i].codecs->name, RT5672_I2C_DEFAULT)) {
dai_index = i;
break;
}
diff --git a/sound/soc/qcom/lpass-cdc-dma.c b/sound/soc/qcom/lpass-cdc-dma.c
index 48b03e60e3a3..8106c586f68a 100644
--- a/sound/soc/qcom/lpass-cdc-dma.c
+++ b/sound/soc/qcom/lpass-cdc-dma.c
@@ -259,7 +259,7 @@ static int lpass_cdc_dma_daiops_trigger(struct snd_pcm_substream *substream,
int cmd, struct snd_soc_dai *dai)
{
struct snd_soc_pcm_runtime *soc_runtime = snd_soc_substream_to_rtd(substream);
- struct lpaif_dmactl *dmactl;
+ struct lpaif_dmactl *dmactl = NULL;
int ret = 0, id;
switch (cmd) {
diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c
index 052e40cb38fe..00bbd291be5c 100644
--- a/sound/soc/qcom/qdsp6/q6apm-dai.c
+++ b/sound/soc/qcom/qdsp6/q6apm-dai.c
@@ -123,7 +123,7 @@ static struct snd_pcm_hardware q6apm_dai_hardware_playback = {
.fifo_size = 0,
};
-static void event_handler(uint32_t opcode, uint32_t token, uint32_t *payload, void *priv)
+static void event_handler(uint32_t opcode, uint32_t token, void *payload, void *priv)
{
struct q6apm_dai_rtd *prtd = priv;
struct snd_pcm_substream *substream = prtd->substream;
@@ -157,7 +157,7 @@ static void event_handler(uint32_t opcode, uint32_t token, uint32_t *payload, vo
}
static void event_handler_compr(uint32_t opcode, uint32_t token,
- uint32_t *payload, void *priv)
+ void *payload, void *priv)
{
struct q6apm_dai_rtd *prtd = priv;
struct snd_compr_stream *substream = prtd->cstream;
@@ -352,7 +352,7 @@ static int q6apm_dai_open(struct snd_soc_component *component,
spin_lock_init(&prtd->lock);
prtd->substream = substream;
- prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler, prtd, graph_id);
+ prtd->graph = q6apm_graph_open(dev, event_handler, prtd, graph_id);
if (IS_ERR(prtd->graph)) {
dev_err(dev, "%s: Could not allocate memory\n", __func__);
ret = PTR_ERR(prtd->graph);
@@ -496,7 +496,7 @@ static int q6apm_dai_compr_open(struct snd_soc_component *component,
return -ENOMEM;
prtd->cstream = stream;
- prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler_compr, prtd, graph_id);
+ prtd->graph = q6apm_graph_open(dev, event_handler_compr, prtd, graph_id);
if (IS_ERR(prtd->graph)) {
ret = PTR_ERR(prtd->graph);
kfree(prtd);
diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c
index ed4bb551bfbb..b7fd503a1666 100644
--- a/sound/soc/qcom/sc8280xp.c
+++ b/sound/soc/qcom/sc8280xp.c
@@ -32,12 +32,14 @@ static int sc8280xp_snd_init(struct snd_soc_pcm_runtime *rtd)
case WSA_CODEC_DMA_RX_0:
case WSA_CODEC_DMA_RX_1:
/*
- * set limit of 0dB on Digital Volume for Speakers,
- * this can prevent damage of speakers to some extent without
- * active speaker protection
+ * Set limit of -3 dB on Digital Volume and 0 dB on PA Volume
+ * to reduce the risk of speaker damage until we have active
+ * speaker protection in place.
*/
- snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 84);
- snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 84);
+ snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 81);
+ snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 81);
+ snd_soc_limit_volume(card, "SpkrLeft PA Volume", 17);
+ snd_soc_limit_volume(card, "SpkrRight PA Volume", 17);
break;
default:
break;
diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c
index 230c48648af3..afd69c6eb654 100644
--- a/sound/soc/sh/rcar/adg.c
+++ b/sound/soc/sh/rcar/adg.c
@@ -111,6 +111,13 @@ static u32 rsnd_adg_ssi_ws_timing_gen2(struct rsnd_dai_stream *io)
ws = 7;
break;
}
+ } else {
+ /*
+ * SSI8 is not connected to ADG.
+ * Thus SSI9 is using ws = 8
+ */
+ if (id == 9)
+ ws = 8;
}
return (0x6 + ws) << 8;
diff --git a/sound/soc/soc-card.c b/sound/soc/soc-card.c
index 285ab4c9c716..8a2f163da6bc 100644
--- a/sound/soc/soc-card.c
+++ b/sound/soc/soc-card.c
@@ -5,6 +5,9 @@
// Copyright (C) 2019 Renesas Electronics Corp.
// Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
//
+
+#include <linux/lockdep.h>
+#include <linux/rwsem.h>
#include <sound/soc.h>
#include <sound/jack.h>
@@ -26,12 +29,15 @@ static inline int _soc_card_ret(struct snd_soc_card *card,
return ret;
}
-struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card,
- const char *name)
+struct snd_kcontrol *snd_soc_card_get_kcontrol_locked(struct snd_soc_card *soc_card,
+ const char *name)
{
struct snd_card *card = soc_card->snd_card;
struct snd_kcontrol *kctl;
+ /* must be held read or write */
+ lockdep_assert_held(&card->controls_rwsem);
+
if (unlikely(!name))
return NULL;
@@ -40,6 +46,20 @@ struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card,
return kctl;
return NULL;
}
+EXPORT_SYMBOL_GPL(snd_soc_card_get_kcontrol_locked);
+
+struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card,
+ const char *name)
+{
+ struct snd_card *card = soc_card->snd_card;
+ struct snd_kcontrol *kctl;
+
+ down_read(&card->controls_rwsem);
+ kctl = snd_soc_card_get_kcontrol_locked(soc_card, name);
+ up_read(&card->controls_rwsem);
+
+ return kctl;
+}
EXPORT_SYMBOL_GPL(snd_soc_card_get_kcontrol);
static int jack_new(struct snd_soc_card *card, const char *id, int type,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index f8524b5bfb33..516350533e73 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1037,7 +1037,7 @@ component_dai_empty:
return -EINVAL;
}
-#define MAX_DEFAULT_CH_MAP_SIZE 7
+#define MAX_DEFAULT_CH_MAP_SIZE 8
static struct snd_soc_dai_link_ch_map default_ch_map_sync[MAX_DEFAULT_CH_MAP_SIZE] = {
{ .cpu = 0, .codec = 0 },
{ .cpu = 1, .codec = 1 },
@@ -1046,6 +1046,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_sync[MAX_DEFAULT_CH_MAP_SIZ
{ .cpu = 4, .codec = 4 },
{ .cpu = 5, .codec = 5 },
{ .cpu = 6, .codec = 6 },
+ { .cpu = 7, .codec = 7 },
};
static struct snd_soc_dai_link_ch_map default_ch_map_1cpu[MAX_DEFAULT_CH_MAP_SIZE] = {
{ .cpu = 0, .codec = 0 },
@@ -1055,6 +1056,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_1cpu[MAX_DEFAULT_CH_MAP_SIZ
{ .cpu = 0, .codec = 4 },
{ .cpu = 0, .codec = 5 },
{ .cpu = 0, .codec = 6 },
+ { .cpu = 0, .codec = 7 },
};
static struct snd_soc_dai_link_ch_map default_ch_map_1codec[MAX_DEFAULT_CH_MAP_SIZE] = {
{ .cpu = 0, .codec = 0 },
@@ -1064,6 +1066,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_1codec[MAX_DEFAULT_CH_MAP_S
{ .cpu = 4, .codec = 0 },
{ .cpu = 5, .codec = 0 },
{ .cpu = 6, .codec = 0 },
+ { .cpu = 7, .codec = 0 },
};
static int snd_soc_compensate_channel_connection_map(struct snd_soc_card *card,
struct snd_soc_dai_link *dai_link)
diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c
index 2743f07a5e08..b44b1b1adb6e 100644
--- a/sound/soc/sof/amd/acp-ipc.c
+++ b/sound/soc/sof/amd/acp-ipc.c
@@ -188,11 +188,13 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write);
if (dsp_ack) {
+ spin_lock_irq(&sdev->ipc_lock);
/* handle immediate reply from DSP core */
acp_dsp_ipc_get_reply(sdev);
snd_sof_ipc_reply(sdev, 0);
/* set the done bit */
acp_dsp_ipc_dsp_done(sdev);
+ spin_unlock_irq(&sdev->ipc_lock);
ipc_irq = true;
}
diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c
index 32a741fcb84f..07632ae6ccf5 100644
--- a/sound/soc/sof/amd/acp.c
+++ b/sound/soc/sof/amd/acp.c
@@ -355,21 +355,20 @@ static irqreturn_t acp_irq_thread(int irq, void *context)
unsigned int count = ACP_HW_SEM_RETRY_COUNT;
spin_lock_irq(&sdev->ipc_lock);
- while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset)) {
- /* Wait until acquired HW Semaphore lock or timeout */
- count--;
- if (!count) {
- dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__);
- spin_unlock_irq(&sdev->ipc_lock);
- return IRQ_NONE;
- }
+ /* Wait until acquired HW Semaphore lock or timeout */
+ while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset) && --count)
+ ;
+ spin_unlock_irq(&sdev->ipc_lock);
+
+ if (!count) {
+ dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__);
+ return IRQ_NONE;
}
sof_ops(sdev)->irq_thread(irq, sdev);
/* Unlock or Release HW Semaphore */
snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset, 0x0);
- spin_unlock_irq(&sdev->ipc_lock);
return IRQ_HANDLED;
};
diff --git a/sound/soc/sof/intel/pci-lnl.c b/sound/soc/sof/intel/pci-lnl.c
index 78a57eb9cbc3..b26ffe767fab 100644
--- a/sound/soc/sof/intel/pci-lnl.c
+++ b/sound/soc/sof/intel/pci-lnl.c
@@ -36,7 +36,7 @@ static const struct sof_dev_desc lnl_desc = {
[SOF_IPC_TYPE_4] = "intel/sof-ipc4/lnl",
},
.default_tplg_path = {
- [SOF_IPC_TYPE_4] = "intel/sof-ace-tplg",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
},
.default_fw_filename = {
[SOF_IPC_TYPE_4] = "sof-lnl.ri",
diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c
index 0660d4b2ac96..a361ee9d1107 100644
--- a/sound/soc/sof/intel/pci-tgl.c
+++ b/sound/soc/sof/intel/pci-tgl.c
@@ -33,18 +33,18 @@ static const struct sof_dev_desc tgl_desc = {
.dspless_mode_supported = true, /* Only supported for HDaudio */
.default_fw_path = {
[SOF_IPC_TYPE_3] = "intel/sof",
- [SOF_IPC_TYPE_4] = "intel/avs/tgl",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4/tgl",
},
.default_lib_path = {
- [SOF_IPC_TYPE_4] = "intel/avs-lib/tgl",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/tgl",
},
.default_tplg_path = {
[SOF_IPC_TYPE_3] = "intel/sof-tplg",
- [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
},
.default_fw_filename = {
[SOF_IPC_TYPE_3] = "sof-tgl.ri",
- [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+ [SOF_IPC_TYPE_4] = "sof-tgl.ri",
},
.nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
.ops = &sof_tgl_ops,
@@ -66,18 +66,18 @@ static const struct sof_dev_desc tglh_desc = {
.dspless_mode_supported = true, /* Only supported for HDaudio */
.default_fw_path = {
[SOF_IPC_TYPE_3] = "intel/sof",
- [SOF_IPC_TYPE_4] = "intel/avs/tgl-h",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4/tgl-h",
},
.default_lib_path = {
- [SOF_IPC_TYPE_4] = "intel/avs-lib/tgl-h",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/tgl-h",
},
.default_tplg_path = {
[SOF_IPC_TYPE_3] = "intel/sof-tplg",
- [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
},
.default_fw_filename = {
[SOF_IPC_TYPE_3] = "sof-tgl-h.ri",
- [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+ [SOF_IPC_TYPE_4] = "sof-tgl-h.ri",
},
.nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
.ops = &sof_tgl_ops,
@@ -98,18 +98,18 @@ static const struct sof_dev_desc ehl_desc = {
.dspless_mode_supported = true, /* Only supported for HDaudio */
.default_fw_path = {
[SOF_IPC_TYPE_3] = "intel/sof",
- [SOF_IPC_TYPE_4] = "intel/avs/ehl",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4/ehl",
},
.default_lib_path = {
- [SOF_IPC_TYPE_4] = "intel/avs-lib/ehl",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/ehl",
},
.default_tplg_path = {
[SOF_IPC_TYPE_3] = "intel/sof-tplg",
- [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
},
.default_fw_filename = {
[SOF_IPC_TYPE_3] = "sof-ehl.ri",
- [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+ [SOF_IPC_TYPE_4] = "sof-ehl.ri",
},
.nocodec_tplg_filename = "sof-ehl-nocodec.tplg",
.ops = &sof_tgl_ops,
@@ -131,18 +131,18 @@ static const struct sof_dev_desc adls_desc = {
.dspless_mode_supported = true, /* Only supported for HDaudio */
.default_fw_path = {
[SOF_IPC_TYPE_3] = "intel/sof",
- [SOF_IPC_TYPE_4] = "intel/avs/adl-s",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl-s",
},
.default_lib_path = {
- [SOF_IPC_TYPE_4] = "intel/avs-lib/adl-s",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl-s",
},
.default_tplg_path = {
[SOF_IPC_TYPE_3] = "intel/sof-tplg",
- [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
},
.default_fw_filename = {
[SOF_IPC_TYPE_3] = "sof-adl-s.ri",
- [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+ [SOF_IPC_TYPE_4] = "sof-adl-s.ri",
},
.nocodec_tplg_filename = "sof-adl-nocodec.tplg",
.ops = &sof_tgl_ops,
@@ -164,18 +164,18 @@ static const struct sof_dev_desc adl_desc = {
.dspless_mode_supported = true, /* Only supported for HDaudio */
.default_fw_path = {
[SOF_IPC_TYPE_3] = "intel/sof",
- [SOF_IPC_TYPE_4] = "intel/avs/adl",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl",
},
.default_lib_path = {
- [SOF_IPC_TYPE_4] = "intel/avs-lib/adl",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl",
},
.default_tplg_path = {
[SOF_IPC_TYPE_3] = "intel/sof-tplg",
- [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
},
.default_fw_filename = {
[SOF_IPC_TYPE_3] = "sof-adl.ri",
- [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+ [SOF_IPC_TYPE_4] = "sof-adl.ri",
},
.nocodec_tplg_filename = "sof-adl-nocodec.tplg",
.ops = &sof_tgl_ops,
@@ -197,18 +197,18 @@ static const struct sof_dev_desc adl_n_desc = {
.dspless_mode_supported = true, /* Only supported for HDaudio */
.default_fw_path = {
[SOF_IPC_TYPE_3] = "intel/sof",
- [SOF_IPC_TYPE_4] = "intel/avs/adl-n",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl-n",
},
.default_lib_path = {
- [SOF_IPC_TYPE_4] = "intel/avs-lib/adl-n",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl-n",
},
.default_tplg_path = {
[SOF_IPC_TYPE_3] = "intel/sof-tplg",
- [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
},
.default_fw_filename = {
[SOF_IPC_TYPE_3] = "sof-adl-n.ri",
- [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+ [SOF_IPC_TYPE_4] = "sof-adl-n.ri",
},
.nocodec_tplg_filename = "sof-adl-nocodec.tplg",
.ops = &sof_tgl_ops,
@@ -230,18 +230,18 @@ static const struct sof_dev_desc rpls_desc = {
.dspless_mode_supported = true, /* Only supported for HDaudio */
.default_fw_path = {
[SOF_IPC_TYPE_3] = "intel/sof",
- [SOF_IPC_TYPE_4] = "intel/avs/rpl-s",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4/rpl-s",
},
.default_lib_path = {
- [SOF_IPC_TYPE_4] = "intel/avs-lib/rpl-s",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/rpl-s",
},
.default_tplg_path = {
[SOF_IPC_TYPE_3] = "intel/sof-tplg",
- [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
},
.default_fw_filename = {
[SOF_IPC_TYPE_3] = "sof-rpl-s.ri",
- [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+ [SOF_IPC_TYPE_4] = "sof-rpl-s.ri",
},
.nocodec_tplg_filename = "sof-rpl-nocodec.tplg",
.ops = &sof_tgl_ops,
@@ -263,18 +263,18 @@ static const struct sof_dev_desc rpl_desc = {
.dspless_mode_supported = true, /* Only supported for HDaudio */
.default_fw_path = {
[SOF_IPC_TYPE_3] = "intel/sof",
- [SOF_IPC_TYPE_4] = "intel/avs/rpl",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4/rpl",
},
.default_lib_path = {
- [SOF_IPC_TYPE_4] = "intel/avs-lib/rpl",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/rpl",
},
.default_tplg_path = {
[SOF_IPC_TYPE_3] = "intel/sof-tplg",
- [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
},
.default_fw_filename = {
[SOF_IPC_TYPE_3] = "sof-rpl.ri",
- [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+ [SOF_IPC_TYPE_4] = "sof-rpl.ri",
},
.nocodec_tplg_filename = "sof-rpl-nocodec.tplg",
.ops = &sof_tgl_ops,
diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c
index a8832a1c1a24..d47698f4be2d 100644
--- a/sound/soc/sof/ipc3-topology.c
+++ b/sound/soc/sof/ipc3-topology.c
@@ -2360,27 +2360,16 @@ static int sof_tear_down_left_over_pipelines(struct snd_sof_dev *sdev)
return 0;
}
-/*
- * For older firmware, this function doesn't free widgets for static pipelines during suspend.
- * It only resets use_count for all widgets.
- */
-static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verify)
+static int sof_ipc3_free_widgets_in_list(struct snd_sof_dev *sdev, bool include_scheduler,
+ bool *dyn_widgets, bool verify)
{
struct sof_ipc_fw_version *v = &sdev->fw_ready.version;
struct snd_sof_widget *swidget;
- struct snd_sof_route *sroute;
- bool dyn_widgets = false;
int ret;
- /*
- * This function is called during suspend and for one-time topology verification during
- * first boot. In both cases, there is no need to protect swidget->use_count and
- * sroute->setup because during suspend all running streams are suspended and during
- * topology loading the sound card unavailable to open PCMs.
- */
list_for_each_entry(swidget, &sdev->widget_list, list) {
if (swidget->dynamic_pipeline_widget) {
- dyn_widgets = true;
+ *dyn_widgets = true;
continue;
}
@@ -2395,11 +2384,49 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif
continue;
}
+ if (include_scheduler && swidget->id != snd_soc_dapm_scheduler)
+ continue;
+
+ if (!include_scheduler && swidget->id == snd_soc_dapm_scheduler)
+ continue;
+
ret = sof_widget_free(sdev, swidget);
if (ret < 0)
return ret;
}
+ return 0;
+}
+
+/*
+ * For older firmware, this function doesn't free widgets for static pipelines during suspend.
+ * It only resets use_count for all widgets.
+ */
+static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verify)
+{
+ struct sof_ipc_fw_version *v = &sdev->fw_ready.version;
+ struct snd_sof_widget *swidget;
+ struct snd_sof_route *sroute;
+ bool dyn_widgets = false;
+ int ret;
+
+ /*
+ * This function is called during suspend and for one-time topology verification during
+ * first boot. In both cases, there is no need to protect swidget->use_count and
+ * sroute->setup because during suspend all running streams are suspended and during
+ * topology loading the sound card unavailable to open PCMs. Do not free the scheduler
+ * widgets yet so that the secondary cores do not get powered down before all the widgets
+ * associated with the scheduler are freed.
+ */
+ ret = sof_ipc3_free_widgets_in_list(sdev, false, &dyn_widgets, verify);
+ if (ret < 0)
+ return ret;
+
+ /* free all the scheduler widgets now */
+ ret = sof_ipc3_free_widgets_in_list(sdev, true, &dyn_widgets, verify);
+ if (ret < 0)
+ return ret;
+
/*
* Tear down all pipelines associated with PCMs that did not get suspended
* and unset the prepare flag so that they can be set up again during resume.
diff --git a/sound/soc/sof/ipc3.c b/sound/soc/sof/ipc3.c
index fb40378ad084..c03dd513fbff 100644
--- a/sound/soc/sof/ipc3.c
+++ b/sound/soc/sof/ipc3.c
@@ -1067,7 +1067,7 @@ static void sof_ipc3_rx_msg(struct snd_sof_dev *sdev)
return;
}
- if (hdr.size < sizeof(hdr)) {
+ if (hdr.size < sizeof(hdr) || hdr.size > SOF_IPC_MSG_MAX_SIZE) {
dev_err(sdev->dev, "The received message size is invalid\n");
return;
}
diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c
index 85d3f390e4b2..07eb5c6d4adf 100644
--- a/sound/soc/sof/ipc4-pcm.c
+++ b/sound/soc/sof/ipc4-pcm.c
@@ -413,7 +413,18 @@ skip_pause_transition:
ret = sof_ipc4_set_multi_pipeline_state(sdev, state, trigger_list);
if (ret < 0) {
dev_err(sdev->dev, "failed to set final state %d for all pipelines\n", state);
- goto free;
+ /*
+ * workaround: if the firmware is crashed while setting the
+ * pipelines to reset state we must ignore the error code and
+ * reset it to 0.
+ * Since the firmware is crashed we will not send IPC messages
+ * and we are going to see errors printed, but the state of the
+ * widgets will be correct for the next boot.
+ */
+ if (sdev->fw_state != SOF_FW_CRASHED || state != SOF_IPC4_PIPE_RESET)
+ goto free;
+
+ ret = 0;
}
/* update RUNNING/RESET state for all pipelines that were just triggered */
diff --git a/sound/soc/sunxi/sun4i-spdif.c b/sound/soc/sunxi/sun4i-spdif.c
index 702386823d17..f41c30955857 100644
--- a/sound/soc/sunxi/sun4i-spdif.c
+++ b/sound/soc/sunxi/sun4i-spdif.c
@@ -577,6 +577,11 @@ static const struct of_device_id sun4i_spdif_of_match[] = {
.compatible = "allwinner,sun50i-h6-spdif",
.data = &sun50i_h6_spdif_quirks,
},
+ {
+ .compatible = "allwinner,sun50i-h616-spdif",
+ /* Essentially the same as the H6, but without RX */
+ .data = &sun50i_h6_spdif_quirks,
+ },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, sun4i_spdif_of_match);
diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index 33db334e6556..60fcb872a80b 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -261,6 +261,8 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip,
int ret, i, cur, err, pins, clock_id;
const u8 *sources;
int proto = fmt->protocol;
+ bool readable, writeable;
+ u32 bmControls;
entity_id &= 0xff;
@@ -292,11 +294,27 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip,
sources = GET_VAL(selector, proto, baCSourceID);
cur = 0;
+ if (proto == UAC_VERSION_3)
+ bmControls = le32_to_cpu(*(__le32 *)(&selector->v3.baCSourceID[0] + pins));
+ else
+ bmControls = *(__u8 *)(&selector->v2.baCSourceID[0] + pins);
+
+ readable = uac_v2v3_control_is_readable(bmControls,
+ UAC2_CX_CLOCK_SELECTOR);
+ writeable = uac_v2v3_control_is_writeable(bmControls,
+ UAC2_CX_CLOCK_SELECTOR);
+
if (pins == 1) {
ret = 1;
goto find_source;
}
+ /* for now just warn about buggy device */
+ if (!readable)
+ usb_audio_warn(chip,
+ "%s(): clock selector control is not readable, id %d\n",
+ __func__, clock_id);
+
/* the entity ID we are looking at is a selector.
* find out what it currently selects */
ret = uac_clock_selector_get_val(chip, clock_id);
@@ -325,17 +343,29 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip,
visited, validate);
if (ret > 0) {
/* Skip setting clock selector again for some devices */
- if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR)
+ if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR ||
+ !writeable)
return ret;
err = uac_clock_selector_set_val(chip, entity_id, cur);
- if (err < 0)
+ if (err < 0) {
+ if (pins == 1) {
+ usb_audio_dbg(chip,
+ "%s(): selector returned an error, "
+ "assuming a firmware bug, id %d, ret %d\n",
+ __func__, clock_id, err);
+ return ret;
+ }
return err;
+ }
}
if (!validate || ret > 0 || !chip->autoclock)
return ret;
find_others:
+ if (!writeable)
+ return -ENXIO;
+
/* The current clock source is invalid, try others. */
for (i = 1; i <= pins; i++) {
if (i == cur)
diff --git a/sound/usb/format.c b/sound/usb/format.c
index ab5fed9f55b6..3b45d0ee7693 100644
--- a/sound/usb/format.c
+++ b/sound/usb/format.c
@@ -470,9 +470,11 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip,
int clock)
{
struct usb_device *dev = chip->dev;
+ struct usb_host_interface *alts;
unsigned int *table;
unsigned int nr_rates;
int i, err;
+ u32 bmControls;
/* performing the rate verification may lead to unexpected USB bus
* behavior afterwards by some unknown reason. Do this only for the
@@ -481,6 +483,24 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip,
if (!(chip->quirk_flags & QUIRK_FLAG_VALIDATE_RATES))
return 0; /* don't perform the validation as default */
+ alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting);
+ if (!alts)
+ return 0;
+
+ if (fp->protocol == UAC_VERSION_3) {
+ struct uac3_as_header_descriptor *as = snd_usb_find_csint_desc(
+ alts->extra, alts->extralen, NULL, UAC_AS_GENERAL);
+ bmControls = le32_to_cpu(as->bmControls);
+ } else {
+ struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc(
+ alts->extra, alts->extralen, NULL, UAC_AS_GENERAL);
+ bmControls = as->bmControls;
+ }
+
+ if (!uac_v2v3_control_is_readable(bmControls,
+ UAC2_AS_VAL_ALT_SETTINGS))
+ return 0;
+
table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL);
if (!table)
return -ENOMEM;
diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 6b0993258e03..c1f2e5a03de9 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1742,50 +1742,44 @@ static void snd_usbmidi_get_port_info(struct snd_rawmidi *rmidi, int number,
}
}
-static struct usb_midi_in_jack_descriptor *find_usb_in_jack_descriptor(
- struct usb_host_interface *hostif, uint8_t jack_id)
+/* return iJack for the corresponding jackID */
+static int find_usb_ijack(struct usb_host_interface *hostif, uint8_t jack_id)
{
unsigned char *extra = hostif->extra;
int extralen = hostif->extralen;
+ struct usb_descriptor_header *h;
+ struct usb_midi_out_jack_descriptor *outjd;
+ struct usb_midi_in_jack_descriptor *injd;
+ size_t sz;
while (extralen > 4) {
- struct usb_midi_in_jack_descriptor *injd =
- (struct usb_midi_in_jack_descriptor *)extra;
+ h = (struct usb_descriptor_header *)extra;
+ if (h->bDescriptorType != USB_DT_CS_INTERFACE)
+ goto next;
+ outjd = (struct usb_midi_out_jack_descriptor *)h;
+ if (h->bLength >= sizeof(*outjd) &&
+ outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK &&
+ outjd->bJackID == jack_id) {
+ sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins);
+ if (outjd->bLength < sz)
+ goto next;
+ return *(extra + sz - 1);
+ }
+
+ injd = (struct usb_midi_in_jack_descriptor *)h;
if (injd->bLength >= sizeof(*injd) &&
- injd->bDescriptorType == USB_DT_CS_INTERFACE &&
injd->bDescriptorSubtype == UAC_MIDI_IN_JACK &&
- injd->bJackID == jack_id)
- return injd;
- if (!extra[0])
- break;
- extralen -= extra[0];
- extra += extra[0];
- }
- return NULL;
-}
-
-static struct usb_midi_out_jack_descriptor *find_usb_out_jack_descriptor(
- struct usb_host_interface *hostif, uint8_t jack_id)
-{
- unsigned char *extra = hostif->extra;
- int extralen = hostif->extralen;
+ injd->bJackID == jack_id)
+ return injd->iJack;
- while (extralen > 4) {
- struct usb_midi_out_jack_descriptor *outjd =
- (struct usb_midi_out_jack_descriptor *)extra;
-
- if (outjd->bLength >= sizeof(*outjd) &&
- outjd->bDescriptorType == USB_DT_CS_INTERFACE &&
- outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK &&
- outjd->bJackID == jack_id)
- return outjd;
+next:
if (!extra[0])
break;
extralen -= extra[0];
extra += extra[0];
}
- return NULL;
+ return 0;
}
static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi,
@@ -1796,13 +1790,10 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi,
const char *name_format;
struct usb_interface *intf;
struct usb_host_interface *hostif;
- struct usb_midi_in_jack_descriptor *injd;
- struct usb_midi_out_jack_descriptor *outjd;
uint8_t jack_name_buf[32];
uint8_t *default_jack_name = "MIDI";
uint8_t *jack_name = default_jack_name;
uint8_t iJack;
- size_t sz;
int res;
struct snd_rawmidi_substream *substream =
@@ -1816,21 +1807,7 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi,
intf = umidi->iface;
if (intf && jack_id >= 0) {
hostif = intf->cur_altsetting;
- iJack = 0;
- if (stream != SNDRV_RAWMIDI_STREAM_OUTPUT) {
- /* in jacks connect to outs */
- outjd = find_usb_out_jack_descriptor(hostif, jack_id);
- if (outjd) {
- sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins);
- if (outjd->bLength >= sz)
- iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t));
- }
- } else {
- /* and out jacks connect to ins */
- injd = find_usb_in_jack_descriptor(hostif, jack_id);
- if (injd)
- iJack = injd->iJack;
- }
+ iJack = find_usb_ijack(hostif, jack_id);
if (iJack != 0) {
res = usb_string(umidi->dev, iJack, jack_name_buf,
ARRAY_SIZE(jack_name_buf));
diff --git a/sound/usb/midi2.c b/sound/usb/midi2.c
index 1ec177fe284e..820d3e4b672a 100644
--- a/sound/usb/midi2.c
+++ b/sound/usb/midi2.c
@@ -1085,7 +1085,7 @@ int snd_usb_midi_v2_create(struct snd_usb_audio *chip,
}
if ((quirk && quirk->type != QUIRK_MIDI_STANDARD_INTERFACE) ||
iface->num_altsetting < 2) {
- usb_audio_info(chip, "Quirk or no altest; falling back to MIDI 1.0\n");
+ usb_audio_info(chip, "Quirk or no altset; falling back to MIDI 1.0\n");
goto fallback_to_midi1;
}
hostif = &iface->altsetting[1];
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 07cc6a201579..09712e61c606 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -2031,10 +2031,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR),
DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */
QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x0499, 0x3108, /* Yamaha YIT-W12TX */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x04d8, 0xfeea, /* Benchmark DAC1 Pre */
QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */
QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_CTL_MSG_DELAY_5M),
+ DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */
+ QUIRK_FLAG_IFACE_SKIP_CLOSE),
DEVICE_FLG(0x054c, 0x0b8c, /* Sony WALKMAN NW-A45 DAC */
QUIRK_FLAG_SET_IFACE_FIRST),
DEVICE_FLG(0x0556, 0x0014, /* Phoenix Audio TMX320VC */
@@ -2073,14 +2077,22 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_GENERIC_IMPLICIT_FB),
DEVICE_FLG(0x0763, 0x2031, /* M-Audio Fast Track C600 */
QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x07fd, 0x000b, /* MOTU M Series 2nd hardware revision */
+ QUIRK_FLAG_CTL_MSG_DELAY_1M),
DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */
QUIRK_FLAG_IGNORE_CTL_ERROR),
DEVICE_FLG(0x0951, 0x16ad, /* Kingston HyperX */
QUIRK_FLAG_CTL_MSG_DELAY_1M),
DEVICE_FLG(0x0b0e, 0x0349, /* Jabra 550a */
QUIRK_FLAG_CTL_MSG_DELAY_1M),
+ DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */
+ QUIRK_FLAG_FIXED_RATE),
+ DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */
+ QUIRK_FLAG_FIXED_RATE),
DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */
QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
+ DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */
QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x1397, 0x0507, /* Behringer UMC202HD */
@@ -2113,6 +2125,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY),
DEVICE_FLG(0x1901, 0x0191, /* GE B850V3 CP2114 audio interface */
QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x2040, 0x7200, /* Hauppauge HVR-950Q */
QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
DEVICE_FLG(0x2040, 0x7201, /* Hauppauge HVR-950Q-MXL */
@@ -2155,6 +2171,12 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_IGNORE_CTL_ERROR),
DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */
QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */
+ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */
+ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */
+ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */
QUIRK_FLAG_IGNORE_CTL_ERROR),
DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */
@@ -2163,22 +2185,6 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_ALIGN_TRANSFER),
DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */
QUIRK_FLAG_ALIGN_TRANSFER),
- DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */
- QUIRK_FLAG_GET_SAMPLE_RATE),
- DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */
- QUIRK_FLAG_GENERIC_IMPLICIT_FB),
- DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */
- QUIRK_FLAG_GENERIC_IMPLICIT_FB),
- DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */
- QUIRK_FLAG_GENERIC_IMPLICIT_FB),
- DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */
- QUIRK_FLAG_IFACE_SKIP_CLOSE),
- DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */
- QUIRK_FLAG_FIXED_RATE),
- DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */
- QUIRK_FLAG_FIXED_RATE),
- DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */
- QUIRK_FLAG_GET_SAMPLE_RATE),
/* Vendor matches */
VENDOR_FLG(0x045e, /* MS Lifecam */
diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c
index e2847c040f75..b158c3cb8e5f 100644
--- a/sound/virtio/virtio_card.c
+++ b/sound/virtio/virtio_card.c
@@ -91,8 +91,6 @@ static void virtsnd_event_notify_cb(struct virtqueue *vqueue)
virtsnd_event_dispatch(snd, event);
virtsnd_event_send(vqueue, event, true, GFP_ATOMIC);
}
- if (unlikely(virtqueue_is_broken(vqueue)))
- break;
} while (!virtqueue_enable_cb(vqueue));
spin_unlock_irqrestore(&queue->lock, flags);
}
diff --git a/sound/virtio/virtio_ctl_msg.c b/sound/virtio/virtio_ctl_msg.c
index 18dc5aca2e0c..9dabea01277f 100644
--- a/sound/virtio/virtio_ctl_msg.c
+++ b/sound/virtio/virtio_ctl_msg.c
@@ -303,8 +303,6 @@ void virtsnd_ctl_notify_cb(struct virtqueue *vqueue)
virtqueue_disable_cb(vqueue);
while ((msg = virtqueue_get_buf(vqueue, &length)))
virtsnd_ctl_msg_complete(msg);
- if (unlikely(virtqueue_is_broken(vqueue)))
- break;
} while (!virtqueue_enable_cb(vqueue));
spin_unlock_irqrestore(&queue->lock, flags);
}
diff --git a/sound/virtio/virtio_pcm_msg.c b/sound/virtio/virtio_pcm_msg.c
index 542446c4c7ba..8c32efaf4c52 100644
--- a/sound/virtio/virtio_pcm_msg.c
+++ b/sound/virtio/virtio_pcm_msg.c
@@ -358,8 +358,6 @@ static inline void virtsnd_pcm_notify_cb(struct virtio_snd_queue *queue)
virtqueue_disable_cb(queue->vqueue);
while ((msg = virtqueue_get_buf(queue->vqueue, &written_bytes)))
virtsnd_pcm_msg_complete(msg, written_bytes);
- if (unlikely(virtqueue_is_broken(queue->vqueue)))
- break;
} while (!virtqueue_enable_cb(queue->vqueue));
spin_unlock_irqrestore(&queue->lock, flags);
}
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index f4542d2718f4..25160d26764b 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -198,6 +198,7 @@
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
@@ -308,10 +309,14 @@
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
-
#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
+#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */
+#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */
+#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */
+#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -321,7 +326,9 @@
#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */
#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */
#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
+#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */
#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
+#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */
@@ -437,6 +444,7 @@
#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
@@ -495,6 +503,7 @@
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */
+#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */
/* BUG word 2 */
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 702d93fdd10e..1f23960d2b06 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -44,32 +44,32 @@
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
#endif
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
# define DISABLE_PTI 0
#else
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
# define DISABLE_RETPOLINE 0
#else
# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
(1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
#endif
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_MITIGATION_RETHUNK
# define DISABLE_RETHUNK 0
#else
# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
#endif
-#ifdef CONFIG_CPU_UNRET_ENTRY
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
# define DISABLE_UNRET 0
#else
# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
#endif
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
# define DISABLE_CALL_DEPTH_TRACKING 0
#else
# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
@@ -117,6 +117,12 @@
#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31))
#endif
+#ifdef CONFIG_X86_FRED
+# define DISABLE_FRED 0
+#else
+# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -133,7 +139,7 @@
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
-#define DISABLED_MASK12 (DISABLE_LAM)
+#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM)
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 1d51e1850ed0..1f9dc9bd13eb 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -36,8 +36,19 @@
#define EFER_FFXSR (1<<_EFER_FFXSR)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
-/* Intel MSRs. Some also available on other CPUs */
+/* FRED MSRs */
+#define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */
+#define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */
+#define MSR_IA32_FRED_RSP2 0x1ce /* Level 2 stack pointer */
+#define MSR_IA32_FRED_RSP3 0x1cf /* Level 3 stack pointer */
+#define MSR_IA32_FRED_STKLVLS 0x1d0 /* Exception stack levels */
+#define MSR_IA32_FRED_SSP0 MSR_IA32_PL0_SSP /* Level 0 shadow stack pointer */
+#define MSR_IA32_FRED_SSP1 0x1d1 /* Level 1 shadow stack pointer */
+#define MSR_IA32_FRED_SSP2 0x1d2 /* Level 2 shadow stack pointer */
+#define MSR_IA32_FRED_SSP3 0x1d3 /* Level 3 shadow stack pointer */
+#define MSR_IA32_FRED_CONFIG 0x1d4 /* Entrypoint and interrupt stack level */
+/* Intel MSRs. Some also available on other CPUs */
#define MSR_TEST_CTRL 0x00000033
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)
@@ -237,6 +248,11 @@
#define LBR_INFO_CYCLES 0xffff
#define LBR_INFO_BR_TYPE_OFFSET 56
#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+#define LBR_INFO_BR_CNTR_OFFSET 32
+#define LBR_INFO_BR_CNTR_NUM 4
+#define LBR_INFO_BR_CNTR_BITS 2
+#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0)
+#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0)
#define MSR_ARCH_LBR_CTL 0x000014ce
#define ARCH_LBR_CTL_LBREN BIT(0)
@@ -536,6 +552,9 @@
#define MSR_RELOAD_PMC0 0x000014c1
#define MSR_RELOAD_FIXED_CTR0 0x00001309
+/* KeyID partitioning between MKTME and TDX */
+#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087
+
/*
* AMD64 MSRs. Not complete. See the architecture manual for a more
* complete list.
diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h
index 11ff975242ca..e2ff22b379a4 100644
--- a/tools/arch/x86/include/asm/rmwcc.h
+++ b/tools/arch/x86/include/asm/rmwcc.h
@@ -4,7 +4,7 @@
#define __GEN_RMWcc(fullop, var, cc, ...) \
do { \
- asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \
+ asm goto (fullop "; j" cc " %l[cc_label]" \
: : "m" (var), ## __VA_ARGS__ \
: "memory" : cc_label); \
return 0; \
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 1a6a1f987949..a448d0964fc0 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -562,4 +562,7 @@ struct kvm_pmu_event_filter {
/* x86-specific KVM_EXIT_HYPERCALL flags. */
#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
+#define KVM_X86_DEFAULT_VM 0
+#define KVM_X86_SW_PROTECTED_VM 1
+
#endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index 8fd63a067308..ada4b4a79dd4 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -71,7 +71,7 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
insn->kaddr = kaddr;
insn->end_kaddr = kaddr + buf_len;
insn->next_byte = kaddr;
- insn->x86_64 = x86_64 ? 1 : 0;
+ insn->x86_64 = x86_64;
insn->opnd_bytes = 4;
if (x86_64)
insn->addr_bytes = 8;
@@ -268,11 +268,9 @@ int insn_get_opcode(struct insn *insn)
if (opcode->got)
return 0;
- if (!insn->prefixes.got) {
- ret = insn_get_prefixes(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_prefixes(insn);
+ if (ret)
+ return ret;
/* Get first opcode */
op = get_next(insn_byte_t, insn);
@@ -339,11 +337,9 @@ int insn_get_modrm(struct insn *insn)
if (modrm->got)
return 0;
- if (!insn->opcode.got) {
- ret = insn_get_opcode(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_opcode(insn);
+ if (ret)
+ return ret;
if (inat_has_modrm(insn->attr)) {
mod = get_next(insn_byte_t, insn);
@@ -386,11 +382,9 @@ int insn_rip_relative(struct insn *insn)
if (!insn->x86_64)
return 0;
- if (!modrm->got) {
- ret = insn_get_modrm(insn);
- if (ret)
- return 0;
- }
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return 0;
/*
* For rip-relative instructions, the mod field (top 2 bits)
* is zero and the r/m field (bottom 3 bits) is 0x5.
@@ -417,11 +411,9 @@ int insn_get_sib(struct insn *insn)
if (insn->sib.got)
return 0;
- if (!insn->modrm.got) {
- ret = insn_get_modrm(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return ret;
if (insn->modrm.nbytes) {
modrm = insn->modrm.bytes[0];
@@ -460,11 +452,9 @@ int insn_get_displacement(struct insn *insn)
if (insn->displacement.got)
return 0;
- if (!insn->sib.got) {
- ret = insn_get_sib(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_sib(insn);
+ if (ret)
+ return ret;
if (insn->modrm.nbytes) {
/*
@@ -628,11 +618,9 @@ int insn_get_immediate(struct insn *insn)
if (insn->immediate.got)
return 0;
- if (!insn->displacement.got) {
- ret = insn_get_displacement(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_displacement(insn);
+ if (ret)
+ return ret;
if (inat_has_moffset(insn->attr)) {
if (!__get_moffset(insn))
@@ -703,11 +691,9 @@ int insn_get_length(struct insn *insn)
if (insn->length)
return 0;
- if (!insn->immediate.got) {
- ret = insn_get_immediate(insn);
- if (ret)
- return ret;
- }
+ ret = insn_get_immediate(insn);
+ if (ret)
+ return ret;
insn->length = (unsigned char)((unsigned long)insn->next_byte
- (unsigned long)insn->kaddr);
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index d055b82d22cc..59cf6f9065aa 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -1,11 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright 2002 Andi Kleen */
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
-#include <asm/export.h>
.section .noinstr.text, "ax"
@@ -39,7 +39,7 @@ SYM_TYPED_FUNC_START(__memcpy)
SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
-SYM_FUNC_ALIAS(memcpy, __memcpy)
+SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)
SYM_FUNC_START_LOCAL(memcpy_orig)
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index 7c59a704c458..0199d56cb479 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -1,10 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright 2002 Andi Kleen, SuSE Labs */
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
-#include <asm/export.h>
.section .noinstr.text, "ax"
@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)
-SYM_FUNC_ALIAS(memset, __memset)
+SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
EXPORT_SYMBOL(memset)
SYM_FUNC_START_LOCAL(memset_orig)
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index 5168ee0360b2..12af572201a2 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -1051,8 +1051,8 @@ GrpTable: Grp6
EndTable
GrpTable: Grp7
-0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B)
-1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B)
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B) | WRMSRNS (110),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B) | ERETU (F3),(010),(11B) | ERETS (F2),(010),(11B)
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 5006e724d1bc..5e60825818dd 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -257,18 +257,48 @@ EXAMPLES
return 0;
}
-This is example BPF application with two BPF programs and a mix of BPF maps
-and global variables. Source code is split across two source code files.
+**$ cat example3.bpf.c**
+
+::
+
+ #include <linux/ptrace.h>
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+ /* This header file is provided by the bpf_testmod module. */
+ #include "bpf_testmod.h"
+
+ int test_2_result = 0;
+
+ /* bpf_Testmod.ko calls this function, passing a "4"
+ * and testmod_map->data.
+ */
+ SEC("struct_ops/test_2")
+ void BPF_PROG(test_2, int a, int b)
+ {
+ test_2_result = a + b;
+ }
+
+ SEC(".struct_ops")
+ struct bpf_testmod_ops testmod_map = {
+ .test_2 = (void *)test_2,
+ .data = 0x1,
+ };
+
+This is example BPF application with three BPF programs and a mix of BPF
+maps and global variables. Source code is split across three source code
+files.
**$ clang --target=bpf -g example1.bpf.c -o example1.bpf.o**
**$ clang --target=bpf -g example2.bpf.c -o example2.bpf.o**
-**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o**
+**$ clang --target=bpf -g example3.bpf.c -o example3.bpf.o**
+
+**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o example3.bpf.o**
-This set of commands compiles *example1.bpf.c* and *example2.bpf.c*
-individually and then statically links respective object files into the final
-BPF ELF object file *example.bpf.o*.
+This set of commands compiles *example1.bpf.c*, *example2.bpf.c* and
+*example3.bpf.c* individually and then statically links respective object
+files into the final BPF ELF object file *example.bpf.o*.
**$ bpftool gen skeleton example.bpf.o name example | tee example.skel.h**
@@ -291,7 +321,15 @@ BPF ELF object file *example.bpf.o*.
struct bpf_map *data;
struct bpf_map *bss;
struct bpf_map *my_map;
+ struct bpf_map *testmod_map;
} maps;
+ struct {
+ struct example__testmod_map__bpf_testmod_ops {
+ const struct bpf_program *test_1;
+ const struct bpf_program *test_2;
+ int data;
+ } *testmod_map;
+ } struct_ops;
struct {
struct bpf_program *handle_sys_enter;
struct bpf_program *handle_sys_exit;
@@ -304,6 +342,7 @@ BPF ELF object file *example.bpf.o*.
struct {
int x;
} data;
+ int test_2_result;
} *bss;
struct example__data {
_Bool global_flag;
@@ -342,10 +381,16 @@ BPF ELF object file *example.bpf.o*.
skel->rodata->param1 = 128;
+ /* Change the value through the pointer of shadow type */
+ skel->struct_ops.testmod_map->data = 13;
+
err = example__load(skel);
if (err)
goto cleanup;
+ /* The result of the function test_2() */
+ printf("test_2_result: %d\n", skel->bss->test_2_result);
+
err = example__attach(skel);
if (err)
goto cleanup;
@@ -372,6 +417,7 @@ BPF ELF object file *example.bpf.o*.
::
+ test_2_result: 17
my_map name: my_map
sys_enter prog FD: 8
my_static_var: 7
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 3b7ba037af95..9d6a314dfd7a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -55,7 +55,7 @@ MAP COMMANDS
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
-| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** }
+| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** }
DESCRIPTION
===========
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index ee3ce2b8000d..4fa4ade1ce74 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -7,6 +7,7 @@
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
+#include <libgen.h>
#include <linux/err.h>
#include <stdbool.h>
#include <stdio.h>
@@ -54,11 +55,27 @@ static bool str_has_suffix(const char *str, const char *suffix)
return true;
}
+static const struct btf_type *
+resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
+{
+ const struct btf_type *t;
+
+ t = skip_mods_and_typedefs(btf, id, NULL);
+ if (!btf_is_ptr(t))
+ return NULL;
+
+ t = skip_mods_and_typedefs(btf, t->type, res_id);
+
+ return btf_is_func_proto(t) ? t : NULL;
+}
+
static void get_obj_name(char *name, const char *file)
{
- /* Using basename() GNU version which doesn't modify arg. */
- strncpy(name, basename(file), MAX_OBJ_NAME_LEN - 1);
- name[MAX_OBJ_NAME_LEN - 1] = '\0';
+ char file_copy[PATH_MAX];
+
+ /* Using basename() POSIX version to be more portable. */
+ strncpy(file_copy, file, PATH_MAX - 1)[PATH_MAX - 1] = '\0';
+ strncpy(name, basename(file_copy), MAX_OBJ_NAME_LEN - 1)[MAX_OBJ_NAME_LEN - 1] = '\0';
if (str_has_suffix(name, ".o"))
name[strlen(name) - 2] = '\0';
sanitize_identifier(name);
@@ -103,6 +120,12 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
int i, n;
+ /* recognize hard coded LLVM section name */
+ if (strcmp(sec_name, ".arena.1") == 0) {
+ /* this is the name to use in skeleton */
+ snprintf(buf, buf_sz, "arena");
+ return true;
+ }
for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) {
const char *pfx = pfxs[i];
@@ -231,8 +254,15 @@ static const struct btf_type *find_type_for_map(struct btf *btf, const char *map
return NULL;
}
-static bool is_internal_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
+static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
{
+ size_t tmp_sz;
+
+ if (bpf_map__type(map) == BPF_MAP_TYPE_ARENA && bpf_map__initial_value(map, &tmp_sz)) {
+ snprintf(buf, sz, "arena");
+ return true;
+ }
+
if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
return false;
@@ -257,7 +287,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
bpf_object__for_each_map(map, obj) {
/* only generate definitions for memory-mapped internal maps */
- if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident)))
+ if (!is_mmapable_map(map, map_ident, sizeof(map_ident)))
continue;
sec = find_type_for_map(btf, map_ident);
@@ -310,7 +340,7 @@ static int codegen_subskel_datasecs(struct bpf_object *obj, const char *obj_name
bpf_object__for_each_map(map, obj) {
/* only generate definitions for memory-mapped internal maps */
- if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident)))
+ if (!is_mmapable_map(map, map_ident, sizeof(map_ident)))
continue;
sec = find_type_for_map(btf, map_ident);
@@ -487,7 +517,7 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name)
", obj_name);
bpf_object__for_each_map(map, obj) {
- if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident)))
+ if (!is_mmapable_map(map, map_ident, sizeof(map_ident)))
continue;
sec = find_type_for_map(btf, map_ident);
@@ -703,7 +733,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
const void *mmap_data = NULL;
size_t mmap_size = 0;
- if (!is_internal_mmapable_map(map, ident, sizeof(ident)))
+ if (!is_mmapable_map(map, ident, sizeof(ident)))
continue;
codegen("\
@@ -765,7 +795,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
bpf_object__for_each_map(map, obj) {
const char *mmap_flags;
- if (!is_internal_mmapable_map(map, ident, sizeof(ident)))
+ if (!is_mmapable_map(map, ident, sizeof(ident)))
continue;
if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
@@ -854,7 +884,7 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped)
",
i, bpf_map__name(map), i, ident);
/* memory-mapped internal maps */
- if (mmaped && is_internal_mmapable_map(map, ident, sizeof(ident))) {
+ if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) {
printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
i, ident);
}
@@ -906,6 +936,207 @@ codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_li
}
}
+static int walk_st_ops_shadow_vars(struct btf *btf, const char *ident,
+ const struct btf_type *map_type, __u32 map_type_id)
+{
+ LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts, .indent_level = 3);
+ const struct btf_type *member_type;
+ __u32 offset, next_offset = 0;
+ const struct btf_member *m;
+ struct btf_dump *d = NULL;
+ const char *member_name;
+ __u32 member_type_id;
+ int i, err = 0, n;
+ int size;
+
+ d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL);
+ if (!d)
+ return -errno;
+
+ n = btf_vlen(map_type);
+ for (i = 0, m = btf_members(map_type); i < n; i++, m++) {
+ member_type = skip_mods_and_typedefs(btf, m->type, &member_type_id);
+ member_name = btf__name_by_offset(btf, m->name_off);
+
+ offset = m->offset / 8;
+ if (next_offset < offset)
+ printf("\t\t\tchar __padding_%d[%d];\n", i, offset - next_offset);
+
+ switch (btf_kind(member_type)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ /* scalar type */
+ printf("\t\t\t");
+ opts.field_name = member_name;
+ err = btf_dump__emit_type_decl(d, member_type_id, &opts);
+ if (err) {
+ p_err("Failed to emit type declaration for %s: %d", member_name, err);
+ goto out;
+ }
+ printf(";\n");
+
+ size = btf__resolve_size(btf, member_type_id);
+ if (size < 0) {
+ p_err("Failed to resolve size of %s: %d\n", member_name, size);
+ err = size;
+ goto out;
+ }
+
+ next_offset = offset + size;
+ break;
+
+ case BTF_KIND_PTR:
+ if (resolve_func_ptr(btf, m->type, NULL)) {
+ /* Function pointer */
+ printf("\t\t\tstruct bpf_program *%s;\n", member_name);
+
+ next_offset = offset + sizeof(void *);
+ break;
+ }
+ /* All pointer types are unsupported except for
+ * function pointers.
+ */
+ fallthrough;
+
+ default:
+ /* Unsupported types
+ *
+ * Types other than scalar types and function
+ * pointers are currently not supported in order to
+ * prevent conflicts in the generated code caused
+ * by multiple definitions. For instance, if the
+ * struct type FOO is used in a struct_ops map,
+ * bpftool has to generate definitions for FOO,
+ * which may result in conflicts if FOO is defined
+ * in different skeleton files.
+ */
+ size = btf__resolve_size(btf, member_type_id);
+ if (size < 0) {
+ p_err("Failed to resolve size of %s: %d\n", member_name, size);
+ err = size;
+ goto out;
+ }
+ printf("\t\t\tchar __unsupported_%d[%d];\n", i, size);
+
+ next_offset = offset + size;
+ break;
+ }
+ }
+
+ /* Cannot fail since it must be a struct type */
+ size = btf__resolve_size(btf, map_type_id);
+ if (next_offset < (__u32)size)
+ printf("\t\t\tchar __padding_end[%d];\n", size - next_offset);
+
+out:
+ btf_dump__free(d);
+
+ return err;
+}
+
+/* Generate the pointer of the shadow type for a struct_ops map.
+ *
+ * This function adds a pointer of the shadow type for a struct_ops map.
+ * The members of a struct_ops map can be exported through a pointer to a
+ * shadow type. The user can access these members through the pointer.
+ *
+ * A shadow type includes not all members, only members of some types.
+ * They are scalar types and function pointers. The function pointers are
+ * translated to the pointer of the struct bpf_program. The scalar types
+ * are translated to the original type without any modifiers.
+ *
+ * Unsupported types will be translated to a char array to occupy the same
+ * space as the original field, being renamed as __unsupported_*. The user
+ * should treat these fields as opaque data.
+ */
+static int gen_st_ops_shadow_type(const char *obj_name, struct btf *btf, const char *ident,
+ const struct bpf_map *map)
+{
+ const struct btf_type *map_type;
+ const char *type_name;
+ __u32 map_type_id;
+ int err;
+
+ map_type_id = bpf_map__btf_value_type_id(map);
+ if (map_type_id == 0)
+ return -EINVAL;
+ map_type = btf__type_by_id(btf, map_type_id);
+ if (!map_type)
+ return -EINVAL;
+
+ type_name = btf__name_by_offset(btf, map_type->name_off);
+
+ printf("\t\tstruct %s__%s__%s {\n", obj_name, ident, type_name);
+
+ err = walk_st_ops_shadow_vars(btf, ident, map_type, map_type_id);
+ if (err)
+ return err;
+
+ printf("\t\t} *%s;\n", ident);
+
+ return 0;
+}
+
+static int gen_st_ops_shadow(const char *obj_name, struct btf *btf, struct bpf_object *obj)
+{
+ int err, st_ops_cnt = 0;
+ struct bpf_map *map;
+ char ident[256];
+
+ if (!btf)
+ return 0;
+
+ /* Generate the pointers to shadow types of
+ * struct_ops maps.
+ */
+ bpf_object__for_each_map(map, obj) {
+ if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
+ continue;
+ if (!get_map_ident(map, ident, sizeof(ident)))
+ continue;
+
+ if (st_ops_cnt == 0) /* first struct_ops map */
+ printf("\tstruct {\n");
+ st_ops_cnt++;
+
+ err = gen_st_ops_shadow_type(obj_name, btf, ident, map);
+ if (err)
+ return err;
+ }
+
+ if (st_ops_cnt)
+ printf("\t} struct_ops;\n");
+
+ return 0;
+}
+
+/* Generate the code to initialize the pointers of shadow types. */
+static void gen_st_ops_shadow_init(struct btf *btf, struct bpf_object *obj)
+{
+ struct bpf_map *map;
+ char ident[256];
+
+ if (!btf)
+ return;
+
+ /* Initialize the pointers to_ops shadow types of
+ * struct_ops maps.
+ */
+ bpf_object__for_each_map(map, obj) {
+ if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
+ continue;
+ if (!get_map_ident(map, ident, sizeof(ident)))
+ continue;
+ codegen("\
+ \n\
+ obj->struct_ops.%1$s = bpf_map__initial_value(obj->maps.%1$s, NULL);\n\
+ \n\
+ ", ident);
+ }
+}
+
static int do_skeleton(int argc, char **argv)
{
char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
@@ -1049,6 +1280,11 @@ static int do_skeleton(int argc, char **argv)
printf("\t} maps;\n");
}
+ btf = bpf_object__btf(obj);
+ err = gen_st_ops_shadow(obj_name, btf, obj);
+ if (err)
+ goto out;
+
if (prog_cnt) {
printf("\tstruct {\n");
bpf_object__for_each_program(prog, obj) {
@@ -1072,7 +1308,6 @@ static int do_skeleton(int argc, char **argv)
printf("\t} links;\n");
}
- btf = bpf_object__btf(obj);
if (btf) {
err = codegen_datasecs(obj, obj_name);
if (err)
@@ -1130,6 +1365,12 @@ static int do_skeleton(int argc, char **argv)
if (err) \n\
goto err_out; \n\
\n\
+ ", obj_name);
+
+ gen_st_ops_shadow_init(btf, obj);
+
+ codegen("\
+ \n\
return obj; \n\
err_out: \n\
%1$s__destroy(obj); \n\
@@ -1389,7 +1630,7 @@ static int do_subskeleton(int argc, char **argv)
/* Also count all maps that have a name */
map_cnt++;
- if (!is_internal_mmapable_map(map, ident, sizeof(ident)))
+ if (!is_mmapable_map(map, ident, sizeof(ident)))
continue;
map_type_id = bpf_map__btf_value_type_id(map);
@@ -1439,6 +1680,10 @@ static int do_subskeleton(int argc, char **argv)
printf("\t} maps;\n");
}
+ err = gen_st_ops_shadow(obj_name, btf, obj);
+ if (err)
+ goto out;
+
if (prog_cnt) {
printf("\tstruct {\n");
bpf_object__for_each_program(prog, obj) {
@@ -1507,7 +1752,7 @@ static int do_subskeleton(int argc, char **argv)
/* walk through each symbol and emit the runtime representation */
bpf_object__for_each_map(map, obj) {
- if (!is_internal_mmapable_map(map, ident, sizeof(ident)))
+ if (!is_mmapable_map(map, ident, sizeof(ident)))
continue;
map_type_id = bpf_map__btf_value_type_id(map);
@@ -1550,6 +1795,12 @@ static int do_subskeleton(int argc, char **argv)
if (err) \n\
goto err; \n\
\n\
+ ");
+
+ gen_st_ops_shadow_init(btf, obj);
+
+ codegen("\
+ \n\
return obj; \n\
err: \n\
%1$s__destroy(obj); \n\
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index cb46667a6b2e..afde9d0c2ea1 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -249,18 +249,44 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info)
return err;
}
-static int cmp_u64(const void *A, const void *B)
+struct addr_cookie {
+ __u64 addr;
+ __u64 cookie;
+};
+
+static int cmp_addr_cookie(const void *A, const void *B)
{
- const __u64 *a = A, *b = B;
+ const struct addr_cookie *a = A, *b = B;
- return *a - *b;
+ if (a->addr == b->addr)
+ return 0;
+ return a->addr < b->addr ? -1 : 1;
+}
+
+static struct addr_cookie *
+get_addr_cookie_array(__u64 *addrs, __u64 *cookies, __u32 count)
+{
+ struct addr_cookie *data;
+ __u32 i;
+
+ data = calloc(count, sizeof(data[0]));
+ if (!data) {
+ p_err("mem alloc failed");
+ return NULL;
+ }
+ for (i = 0; i < count; i++) {
+ data[i].addr = addrs[i];
+ data[i].cookie = cookies[i];
+ }
+ qsort(data, count, sizeof(data[0]), cmp_addr_cookie);
+ return data;
}
static void
show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
{
+ struct addr_cookie *data;
__u32 i, j = 0;
- __u64 *addrs;
jsonw_bool_field(json_wtr, "retprobe",
info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN);
@@ -268,14 +294,20 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
jsonw_uint_field(json_wtr, "missed", info->kprobe_multi.missed);
jsonw_name(json_wtr, "funcs");
jsonw_start_array(json_wtr);
- addrs = u64_to_ptr(info->kprobe_multi.addrs);
- qsort(addrs, info->kprobe_multi.count, sizeof(addrs[0]), cmp_u64);
+ data = get_addr_cookie_array(u64_to_ptr(info->kprobe_multi.addrs),
+ u64_to_ptr(info->kprobe_multi.cookies),
+ info->kprobe_multi.count);
+ if (!data)
+ return;
/* Load it once for all. */
if (!dd.sym_count)
kernel_syms_load(&dd);
+ if (!dd.sym_count)
+ goto error;
+
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != addrs[j])
+ if (dd.sym_mapping[i].address != data[j].addr)
continue;
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "addr", dd.sym_mapping[i].address);
@@ -287,11 +319,14 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
} else {
jsonw_string_field(json_wtr, "module", dd.sym_mapping[i].module);
}
+ jsonw_uint_field(json_wtr, "cookie", data[j].cookie);
jsonw_end_object(json_wtr);
if (j++ == info->kprobe_multi.count)
break;
}
jsonw_end_array(json_wtr);
+error:
+ free(data);
}
static __u64 *u64_to_arr(__u64 val)
@@ -334,6 +369,7 @@ show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
u64_to_ptr(info->perf_event.kprobe.func_name));
jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset);
jsonw_uint_field(wtr, "missed", info->perf_event.kprobe.missed);
+ jsonw_uint_field(wtr, "cookie", info->perf_event.kprobe.cookie);
}
static void
@@ -343,6 +379,7 @@ show_perf_event_uprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
jsonw_string_field(wtr, "file",
u64_to_ptr(info->perf_event.uprobe.file_name));
jsonw_uint_field(wtr, "offset", info->perf_event.uprobe.offset);
+ jsonw_uint_field(wtr, "cookie", info->perf_event.uprobe.cookie);
}
static void
@@ -350,6 +387,7 @@ show_perf_event_tracepoint_json(struct bpf_link_info *info, json_writer_t *wtr)
{
jsonw_string_field(wtr, "tracepoint",
u64_to_ptr(info->perf_event.tracepoint.tp_name));
+ jsonw_uint_field(wtr, "cookie", info->perf_event.tracepoint.cookie);
}
static char *perf_config_hw_cache_str(__u64 config)
@@ -426,6 +464,8 @@ show_perf_event_event_json(struct bpf_link_info *info, json_writer_t *wtr)
else
jsonw_uint_field(wtr, "event_config", config);
+ jsonw_uint_field(wtr, "cookie", info->perf_event.event.cookie);
+
if (type == PERF_TYPE_HW_CACHE && perf_config)
free((void *)perf_config);
}
@@ -670,8 +710,8 @@ void netfilter_dump_plain(const struct bpf_link_info *info)
static void show_kprobe_multi_plain(struct bpf_link_info *info)
{
+ struct addr_cookie *data;
__u32 i, j = 0;
- __u64 *addrs;
if (!info->kprobe_multi.count)
return;
@@ -683,21 +723,24 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
printf("func_cnt %u ", info->kprobe_multi.count);
if (info->kprobe_multi.missed)
printf("missed %llu ", info->kprobe_multi.missed);
- addrs = (__u64 *)u64_to_ptr(info->kprobe_multi.addrs);
- qsort(addrs, info->kprobe_multi.count, sizeof(__u64), cmp_u64);
+ data = get_addr_cookie_array(u64_to_ptr(info->kprobe_multi.addrs),
+ u64_to_ptr(info->kprobe_multi.cookies),
+ info->kprobe_multi.count);
+ if (!data)
+ return;
/* Load it once for all. */
if (!dd.sym_count)
kernel_syms_load(&dd);
if (!dd.sym_count)
- return;
+ goto error;
- printf("\n\t%-16s %s", "addr", "func [module]");
+ printf("\n\t%-16s %-16s %s", "addr", "cookie", "func [module]");
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != addrs[j])
+ if (dd.sym_mapping[i].address != data[j].addr)
continue;
- printf("\n\t%016lx %s",
- dd.sym_mapping[i].address, dd.sym_mapping[i].name);
+ printf("\n\t%016lx %-16llx %s",
+ dd.sym_mapping[i].address, data[j].cookie, dd.sym_mapping[i].name);
if (dd.sym_mapping[i].module[0] != '\0')
printf(" [%s] ", dd.sym_mapping[i].module);
else
@@ -706,6 +749,8 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
if (j++ == info->kprobe_multi.count)
break;
}
+error:
+ free(data);
}
static void show_uprobe_multi_plain(struct bpf_link_info *info)
@@ -754,6 +799,8 @@ static void show_perf_event_kprobe_plain(struct bpf_link_info *info)
printf("+%#x", info->perf_event.kprobe.offset);
if (info->perf_event.kprobe.missed)
printf(" missed %llu", info->perf_event.kprobe.missed);
+ if (info->perf_event.kprobe.cookie)
+ printf(" cookie %llu", info->perf_event.kprobe.cookie);
printf(" ");
}
@@ -770,6 +817,8 @@ static void show_perf_event_uprobe_plain(struct bpf_link_info *info)
else
printf("\n\tuprobe ");
printf("%s+%#x ", buf, info->perf_event.uprobe.offset);
+ if (info->perf_event.uprobe.cookie)
+ printf("cookie %llu ", info->perf_event.uprobe.cookie);
}
static void show_perf_event_tracepoint_plain(struct bpf_link_info *info)
@@ -781,6 +830,8 @@ static void show_perf_event_tracepoint_plain(struct bpf_link_info *info)
return;
printf("\n\ttracepoint %s ", buf);
+ if (info->perf_event.tracepoint.cookie)
+ printf("cookie %llu ", info->perf_event.tracepoint.cookie);
}
static void show_perf_event_event_plain(struct bpf_link_info *info)
@@ -802,6 +853,9 @@ static void show_perf_event_event_plain(struct bpf_link_info *info)
else
printf("%llu ", config);
+ if (info->perf_event.event.cookie)
+ printf("cookie %llu ", info->perf_event.event.cookie);
+
if (type == PERF_TYPE_HW_CACHE && perf_config)
free((void *)perf_config);
}
@@ -952,6 +1006,14 @@ again:
return -ENOMEM;
}
info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ cookies = calloc(count, sizeof(__u64));
+ if (!cookies) {
+ p_err("mem alloc failed");
+ free(addrs);
+ close(fd);
+ return -ENOMEM;
+ }
+ info.kprobe_multi.cookies = ptr_to_u64(cookies);
goto again;
}
}
@@ -977,7 +1039,7 @@ again:
cookies = calloc(count, sizeof(__u64));
if (!cookies) {
p_err("mem alloc failed");
- free(cookies);
+ free(ref_ctr_offsets);
free(offsets);
close(fd);
return -ENOMEM;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index f98f7bbea2b1..b89bd792c1d5 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1463,7 +1463,7 @@ static int do_help(int argc, char **argv)
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
- " task_storage | bloom_filter | user_ringbuf | cgrp_storage }\n"
+ " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-n|--nomount} }\n"
"",
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index feb8e305804f..9cb42a3366c0 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -2298,7 +2298,7 @@ static int profile_open_perf_events(struct profiler_bpf *obj)
int map_fd;
profile_perf_events = calloc(
- sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric);
+ obj->rodata->num_cpu * obj->rodata->num_metric, sizeof(int));
if (!profile_perf_events) {
p_err("failed to allocate memory for perf_event array: %s",
strerror(errno));
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 27a23196d58e..d9520cb826b3 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -70,6 +70,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
+#include <linux/btf_ids.h>
#include <linux/rbtree.h>
#include <linux/zalloc.h>
#include <linux/err.h>
@@ -78,7 +79,7 @@
#include <subcmd/parse-options.h>
#define BTF_IDS_SECTION ".BTF_ids"
-#define BTF_ID "__BTF_ID__"
+#define BTF_ID_PREFIX "__BTF_ID__"
#define BTF_STRUCT "struct"
#define BTF_UNION "union"
@@ -89,6 +90,14 @@
#define ADDR_CNT 100
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define ELFDATANATIVE ELFDATA2LSB
+#elif __BYTE_ORDER == __BIG_ENDIAN
+# define ELFDATANATIVE ELFDATA2MSB
+#else
+# error "Unknown machine endianness!"
+#endif
+
struct btf_id {
struct rb_node rb_node;
char *name;
@@ -116,6 +125,7 @@ struct object {
int idlist_shndx;
size_t strtabidx;
unsigned long idlist_addr;
+ int encoding;
} efile;
struct rb_root sets;
@@ -161,7 +171,7 @@ static int eprintf(int level, int var, const char *fmt, ...)
static bool is_btf_id(const char *name)
{
- return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1);
+ return name && !strncmp(name, BTF_ID_PREFIX, sizeof(BTF_ID_PREFIX) - 1);
}
static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
@@ -319,6 +329,7 @@ static int elf_collect(struct object *obj)
{
Elf_Scn *scn = NULL;
size_t shdrstrndx;
+ GElf_Ehdr ehdr;
int idx = 0;
Elf *elf;
int fd;
@@ -350,6 +361,13 @@ static int elf_collect(struct object *obj)
return -1;
}
+ if (gelf_getehdr(obj->efile.elf, &ehdr) == NULL) {
+ pr_err("FAILED cannot get ELF header: %s\n",
+ elf_errmsg(-1));
+ return -1;
+ }
+ obj->efile.encoding = ehdr.e_ident[EI_DATA];
+
/*
* Scan all the elf sections and look for save data
* from .BTF_ids section and symbols.
@@ -441,7 +459,7 @@ static int symbols_collect(struct object *obj)
* __BTF_ID__TYPE__vfs_truncate__0
* prefix = ^
*/
- prefix = name + sizeof(BTF_ID) - 1;
+ prefix = name + sizeof(BTF_ID_PREFIX) - 1;
/* struct */
if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) {
@@ -649,19 +667,18 @@ static int cmp_id(const void *pa, const void *pb)
static int sets_patch(struct object *obj)
{
Elf_Data *data = obj->efile.idlist;
- int *ptr = data->d_buf;
struct rb_node *next;
next = rb_first(&obj->sets);
while (next) {
- unsigned long addr, idx;
+ struct btf_id_set8 *set8;
+ struct btf_id_set *set;
+ unsigned long addr, off;
struct btf_id *id;
- int *base;
- int cnt;
id = rb_entry(next, struct btf_id, rb_node);
addr = id->addr[0];
- idx = addr - obj->efile.idlist_addr;
+ off = addr - obj->efile.idlist_addr;
/* sets are unique */
if (id->addr_cnt != 1) {
@@ -670,14 +687,39 @@ static int sets_patch(struct object *obj)
return -1;
}
- idx = idx / sizeof(int);
- base = &ptr[idx] + (id->is_set8 ? 2 : 1);
- cnt = ptr[idx];
+ if (id->is_set) {
+ set = data->d_buf + off;
+ qsort(set->ids, set->cnt, sizeof(set->ids[0]), cmp_id);
+ } else {
+ set8 = data->d_buf + off;
+ /*
+ * Make sure id is at the beginning of the pairs
+ * struct, otherwise the below qsort would not work.
+ */
+ BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id);
+ qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id);
- pr_debug("sorting addr %5lu: cnt %6d [%s]\n",
- (idx + 1) * sizeof(int), cnt, id->name);
+ /*
+ * When ELF endianness does not match endianness of the
+ * host, libelf will do the translation when updating
+ * the ELF. This, however, corrupts SET8 flags which are
+ * already in the target endianness. So, let's bswap
+ * them to the host endianness and libelf will then
+ * correctly translate everything.
+ */
+ if (obj->efile.encoding != ELFDATANATIVE) {
+ int i;
+
+ set8->flags = bswap_32(set8->flags);
+ for (i = 0; i < set8->cnt; i++) {
+ set8->pairs[i].flags =
+ bswap_32(set8->pairs[i].flags);
+ }
+ }
+ }
- qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id);
+ pr_debug("sorting addr %5lu: cnt %6d [%s]\n",
+ off, id->is_set ? set->cnt : set8->cnt, id->name);
next = rb_next(next);
}
diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h
index 2fd551915c20..cdd2fd078027 100644
--- a/tools/include/asm-generic/unaligned.h
+++ b/tools/include/asm-generic/unaligned.h
@@ -105,9 +105,9 @@ static inline u32 get_unaligned_le24(const void *p)
static inline void __put_unaligned_be24(const u32 val, u8 *p)
{
- *p++ = val >> 16;
- *p++ = val >> 8;
- *p++ = val;
+ *p++ = (val >> 16) & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = val & 0xff;
}
static inline void put_unaligned_be24(const u32 val, void *p)
@@ -117,9 +117,9 @@ static inline void put_unaligned_be24(const u32 val, void *p)
static inline void __put_unaligned_le24(const u32 val, u8 *p)
{
- *p++ = val;
- *p++ = val >> 8;
- *p++ = val >> 16;
+ *p++ = val & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = (val >> 16) & 0xff;
}
static inline void put_unaligned_le24(const u32 val, void *p)
@@ -129,12 +129,12 @@ static inline void put_unaligned_le24(const u32 val, void *p)
static inline void __put_unaligned_be48(const u64 val, u8 *p)
{
- *p++ = val >> 40;
- *p++ = val >> 32;
- *p++ = val >> 24;
- *p++ = val >> 16;
- *p++ = val >> 8;
- *p++ = val;
+ *p++ = (val >> 40) & 0xff;
+ *p++ = (val >> 32) & 0xff;
+ *p++ = (val >> 24) & 0xff;
+ *p++ = (val >> 16) & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = val & 0xff;
}
static inline void put_unaligned_be48(const u64 val, void *p)
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 2f882d5cb30f..72535f00572f 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -8,6 +8,15 @@ struct btf_id_set {
u32 ids[];
};
+struct btf_id_set8 {
+ u32 cnt;
+ u32 flags;
+ struct {
+ u32 id;
+ u32 flags;
+ } pairs[];
+};
+
#ifdef CONFIG_DEBUG_INFO_BTF
#include <linux/compiler.h> /* for __PASTE */
diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h
index 1bdd834bdd57..d09f9dc172a4 100644
--- a/tools/include/linux/compiler_types.h
+++ b/tools/include/linux/compiler_types.h
@@ -36,8 +36,8 @@
#include <linux/compiler-gcc.h>
#endif
-#ifndef asm_volatile_goto
-#define asm_volatile_goto(x...) asm goto(x)
+#ifndef asm_goto_output
+#define asm_goto_output(x...) asm goto(x)
#endif
#endif /* __LINUX_COMPILER_TYPES_H */
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index 311759ea25e9..51b25e9c4ec7 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -18,7 +18,6 @@ bool slab_is_available(void);
enum slab_state {
DOWN,
PARTIAL,
- PARTIAL_NODE,
UP,
FULL
};
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 756b013fb832..75f00965ab15 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -829,8 +829,21 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
#define __NR_futex_requeue 456
__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
+#define __NR_statmount 457
+__SYSCALL(__NR_statmount, sys_statmount)
+
+#define __NR_listmount 458
+__SYSCALL(__NR_listmount, sys_listmount)
+
+#define __NR_lsm_get_self_attr 459
+__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr)
+#define __NR_lsm_set_self_attr 460
+__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
+#define __NR_lsm_list_modules 461
+__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
+
#undef __NR_syscalls
-#define __NR_syscalls 457
+#define __NR_syscalls 462
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index de723566c5ae..16122819edfe 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -713,7 +713,8 @@ struct drm_gem_open {
/**
* DRM_CAP_ASYNC_PAGE_FLIP
*
- * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy
+ * page-flips.
*/
#define DRM_CAP_ASYNC_PAGE_FLIP 0x7
/**
@@ -773,6 +774,13 @@ struct drm_gem_open {
* :ref:`drm_sync_objects`.
*/
#define DRM_CAP_SYNCOBJ_TIMELINE 0x14
+/**
+ * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic
+ * commits.
+ */
+#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15
/* DRM_IOCTL_GET_CAP ioctl argument type */
struct drm_get_cap {
@@ -842,6 +850,31 @@ struct drm_get_cap {
*/
#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5
+/**
+ * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT
+ *
+ * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and
+ * virtualbox) have additional restrictions for cursor planes (thus
+ * making cursor planes on those drivers not truly universal,) e.g.
+ * they need cursor planes to act like one would expect from a mouse
+ * cursor and have correctly set hotspot properties.
+ * If this client cap is not set the DRM core will hide cursor plane on
+ * those virtualized drivers because not setting it implies that the
+ * client is not capable of dealing with those extra restictions.
+ * Clients which do set cursor hotspot and treat the cursor plane
+ * like a mouse cursor should set this property.
+ * The client must enable &DRM_CLIENT_CAP_ATOMIC first.
+ *
+ * Setting this property on drivers which do not special case
+ * cursor planes (i.e. non-virtualized drivers) will return
+ * EOPNOTSUPP, which can be used by userspace to gauge
+ * requirements of the hardware/drivers they're running on.
+ *
+ * This capability is always supported for atomic-capable virtualized
+ * drivers starting from kernel version 6.6.
+ */
+#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6
+
/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
struct drm_set_client_cap {
__u64 capability;
@@ -893,6 +926,7 @@ struct drm_syncobj_transfer {
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */
struct drm_syncobj_wait {
__u64 handles;
/* absolute timeout */
@@ -901,6 +935,14 @@ struct drm_syncobj_wait {
__u32 flags;
__u32 first_signaled; /* only valid when not waiting all */
__u32 pad;
+ /**
+ * @deadline_nsec - fence deadline hint
+ *
+ * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
+ * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
+ * set.
+ */
+ __u64 deadline_nsec;
};
struct drm_syncobj_timeline_wait {
@@ -913,6 +955,14 @@ struct drm_syncobj_timeline_wait {
__u32 flags;
__u32 first_signaled; /* only valid when not waiting all */
__u32 pad;
+ /**
+ * @deadline_nsec - fence deadline hint
+ *
+ * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
+ * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
+ * set.
+ */
+ __u64 deadline_nsec;
};
/**
@@ -1218,6 +1268,26 @@ extern "C" {
#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd)
+/**
+ * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer.
+ *
+ * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
+ * argument is a framebuffer object ID.
+ *
+ * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable
+ * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept
+ * alive. When the plane no longer uses the framebuffer (because the
+ * framebuffer is replaced with another one, or the plane is disabled), the
+ * framebuffer is cleaned up.
+ *
+ * This is useful to implement flicker-free transitions between two processes.
+ *
+ * Depending on the threat model, user-space may want to ensure that the
+ * framebuffer doesn't expose any sensitive user information: closed
+ * framebuffers attached to a plane can be read back by the next DRM master.
+ */
+#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb)
+
/*
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 218edb0a96f8..fd4f9574d177 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -693,7 +693,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_EXEC_FENCE 44
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
- * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * user-specified buffers for post-mortem debugging of GPU hangs. See
* EXEC_OBJECT_CAPTURE.
*/
#define I915_PARAM_HAS_EXEC_CAPTURE 45
@@ -1606,7 +1606,7 @@ struct drm_i915_gem_busy {
* is accurate.
*
* The returned dword is split into two fields to indicate both
- * the engine classess on which the object is being read, and the
+ * the engine classes on which the object is being read, and the
* engine class on which it is currently being written (if any).
*
* The low word (bits 0:15) indicate if the object is being written
@@ -1815,7 +1815,7 @@ struct drm_i915_gem_madvise {
__u32 handle;
/* Advice: either the buffer will be needed again in the near future,
- * or wont be and could be discarded under memory pressure.
+ * or won't be and could be discarded under memory pressure.
*/
__u32 madv;
@@ -3246,7 +3246,7 @@ struct drm_i915_query_topology_info {
* // enough to hold our array of engines. The kernel will fill out the
* // item.length for us, which is the number of bytes we need.
* //
- * // Alternatively a large buffer can be allocated straight away enabling
+ * // Alternatively a large buffer can be allocated straightaway enabling
* // querying in one pass, in which case item.length should contain the
* // length of the provided buffer.
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
@@ -3256,7 +3256,7 @@ struct drm_i915_query_topology_info {
* // Now that we allocated the required number of bytes, we call the ioctl
* // again, this time with the data_ptr pointing to our newly allocated
* // blob, which the kernel can then populate with info on all engines.
- * item.data_ptr = (uintptr_t)&info,
+ * item.data_ptr = (uintptr_t)&info;
*
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
* if (err) ...
@@ -3286,7 +3286,7 @@ struct drm_i915_query_topology_info {
/**
* struct drm_i915_engine_info
*
- * Describes one engine and it's capabilities as known to the driver.
+ * Describes one engine and its capabilities as known to the driver.
*/
struct drm_i915_engine_info {
/** @engine: Engine class and instance. */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7f24d898efbb..3c42b9f1bada 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -42,6 +42,7 @@
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
@@ -50,6 +51,10 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+enum bpf_cond_pseudo_jmp {
+ BPF_MAY_GOTO = 0,
+};
+
/* Register numbers */
enum {
BPF_REG_0 = 0,
@@ -77,12 +82,29 @@ struct bpf_insn {
__s32 imm; /* signed immediate constant */
};
-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for
+ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for
+ * the trailing flexible array member) instead.
+ */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
__u8 data[0]; /* Arbitrary size */
};
+/* Header for bpf_lpm_trie_key structs */
+struct bpf_lpm_trie_key_hdr {
+ __u32 prefixlen;
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */
+struct bpf_lpm_trie_key_u8 {
+ union {
+ struct bpf_lpm_trie_key_hdr hdr;
+ __u32 prefixlen;
+ };
+ __u8 data[]; /* Arbitrary size */
+};
+
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
__u32 attach_type; /* program attach type (enum bpf_attach_type) */
@@ -617,7 +639,11 @@ union bpf_iter_link_info {
* to NULL to begin the batched operation. After each subsequent
* **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
* *out_batch* as the *in_batch* for the next operation to
- * continue iteration from the current point.
+ * continue iteration from the current point. Both *in_batch* and
+ * *out_batch* must point to memory large enough to hold a key,
+ * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH,
+ * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters
+ * must be at least 4 bytes wide regardless of key size.
*
* The *keys* and *values* are output parameters which must point
* to memory large enough to hold *count* items based on the key
@@ -847,6 +873,36 @@ union bpf_iter_link_info {
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
+ * BPF_TOKEN_CREATE
+ * Description
+ * Create BPF token with embedded information about what
+ * BPF-related functionality it allows:
+ * - a set of allowed bpf() syscall commands;
+ * - a set of allowed BPF map types to be created with
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ * - a set of allowed BPF program types and BPF program attach
+ * types to be loaded with BPF_PROG_LOAD command, if
+ * BPF_PROG_LOAD itself is allowed.
+ *
+ * BPF token is created (derived) from an instance of BPF FS,
+ * assuming it has necessary delegation mount options specified.
+ * This BPF token can be passed as an extra parameter to various
+ * bpf() syscall commands to grant BPF subsystem functionality to
+ * unprivileged processes.
+ *
+ * When created, BPF token is "associated" with the owning
+ * user namespace of BPF FS instance (super block) that it was
+ * derived from, and subsequent BPF operations performed with
+ * BPF token would be performing capabilities checks (i.e.,
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ * that user namespace. Without BPF token, such capabilities
+ * have to be granted in init user namespace, making bpf()
+ * syscall incompatible with user namespace, for the most part.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -901,6 +957,8 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_TOKEN_CREATE,
+ __MAX_BPF_CMD,
};
enum bpf_map_type {
@@ -951,6 +1009,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_BLOOM_FILTER,
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
+ BPF_MAP_TYPE_ARENA,
+ __MAX_BPF_MAP_TYPE
};
/* Note that tracing related programs such as
@@ -995,6 +1055,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
BPF_PROG_TYPE_NETFILTER,
+ __MAX_BPF_PROG_TYPE
};
enum bpf_attach_type {
@@ -1278,6 +1339,10 @@ enum {
*/
#define BPF_PSEUDO_KFUNC_CALL 2
+enum bpf_addr_space_cast {
+ BPF_ADDR_SPACE_CAST = 1,
+};
+
/* flags for BPF_MAP_UPDATE_ELEM command */
enum {
BPF_ANY = 0, /* create new element or update existing */
@@ -1330,6 +1395,18 @@ enum {
/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
BPF_F_PATH_FD = (1U << 14),
+
+/* Flag for value_type_btf_obj_fd, the fd is available */
+ BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15),
+
+/* BPF token FD is passed in a corresponding command's token_fd field */
+ BPF_F_TOKEN_FD = (1U << 16),
+
+/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */
+ BPF_F_SEGV_ON_FAULT = (1U << 17),
+
+/* Do not translate kernel bpf_arena pointers to user pointers */
+ BPF_F_NO_USER_CONV = (1U << 18),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1401,8 +1478,20 @@ union bpf_attr {
* BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
* number of hash functions (if 0, the bloom filter will default
* to using 5 hash functions).
+ *
+ * BPF_MAP_TYPE_ARENA - contains the address where user space
+ * is going to mmap() the arena. It has to be page aligned.
*/
__u64 map_extra;
+
+ __s32 value_type_btf_obj_fd; /* fd pointing to a BTF
+ * type data for
+ * btf_vmlinux_value_type_id.
+ */
+ /* BPF token FD to use with BPF_MAP_CREATE operation.
+ * If provided, map_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 map_token_fd;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -1472,6 +1561,10 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 log_true_size;
+ /* BPF token FD to use with BPF_PROG_LOAD operation.
+ * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 prog_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1584,6 +1677,11 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 btf_log_true_size;
+ __u32 btf_flags;
+ /* BPF token FD to use with BPF_BTF_LOAD operation.
+ * If provided, btf_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 btf_token_fd;
};
struct {
@@ -1714,6 +1812,11 @@ union bpf_attr {
__u32 flags; /* extra flags */
} prog_bind_map;
+ struct { /* struct used by BPF_TOKEN_CREATE command */
+ __u32 flags;
+ __u32 bpffs_fd;
+ } token_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -4839,9 +4942,9 @@ union bpf_attr {
* going through the CPU's backlog queue.
*
* The *flags* argument is reserved and must be 0. The helper is
- * currently only supported for tc BPF program types at the ingress
- * hook and for veth device types. The peer device must reside in a
- * different network namespace.
+ * currently only supported for tc BPF program types at the
+ * ingress hook and for veth and netkit target device types. The
+ * peer device must reside in a different network namespace.
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
@@ -6487,7 +6590,7 @@ struct bpf_map_info {
__u32 btf_id;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
- __u32 :32; /* alignment pad */
+ __u32 btf_vmlinux_id;
__u64 map_extra;
} __attribute__((aligned(8)));
@@ -6563,6 +6666,7 @@ struct bpf_link_info {
__u32 count; /* in/out: kprobe_multi function count */
__u32 flags;
__u64 missed;
+ __aligned_u64 cookies;
} kprobe_multi;
struct {
__aligned_u64 path;
@@ -6582,6 +6686,7 @@ struct bpf_link_info {
__aligned_u64 file_name; /* in/out */
__u32 name_len;
__u32 offset; /* offset from file_name */
+ __u64 cookie;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
@@ -6589,14 +6694,19 @@ struct bpf_link_info {
__u32 offset; /* offset from func_name */
__u64 addr;
__u64 missed;
+ __u64 cookie;
} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
struct {
__aligned_u64 tp_name; /* in/out */
__u32 name_len;
+ __u32 :32;
+ __u64 cookie;
} tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */
struct {
__u64 config;
__u32 type;
+ __u32 :32;
+ __u64 cookie;
} event; /* BPF_PERF_EVENT_EVENT */
};
} perf_event;
@@ -6904,6 +7014,7 @@ enum {
BPF_TCP_LISTEN,
BPF_TCP_CLOSING, /* Now a valid state */
BPF_TCP_NEW_SYN_RECV,
+ BPF_TCP_BOUND_INACTIVE,
BPF_TCP_MAX_STATES /* Leave at the end! */
};
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index 6c80f96049bd..282e90aeb163 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -116,5 +116,8 @@
#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
compare object identity and may not
be usable to open_by_handle_at(2) */
+#if defined(__KERNEL__)
+#define AT_GETATTR_NOSEC 0x80000000
+#endif
#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index a0aa05a28cf2..f0d71b2a3f1e 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -974,6 +974,7 @@ enum {
IFLA_BOND_AD_LACP_ACTIVE,
IFLA_BOND_MISSED_MAX,
IFLA_BOND_NS_IP6_TARGET,
+ IFLA_BOND_COUPLED_CONTROL,
__IFLA_BOND_MAX,
};
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 211b86de35ac..c3308536482b 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -16,76 +16,6 @@
#define KVM_API_VERSION 12
-/* *** Deprecated interfaces *** */
-
-#define KVM_TRC_SHIFT 16
-
-#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT)
-#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1))
-
-#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01)
-#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02)
-#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01)
-
-#define KVM_TRC_HEAD_SIZE 12
-#define KVM_TRC_CYCLE_SIZE 8
-#define KVM_TRC_EXTRA_MAX 7
-
-#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
-#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
-#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
-#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
-#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
-#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
-#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
-#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
-#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
-#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
-#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
-#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
-#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
-#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
-#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
-#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
-#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
-#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
-#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
-#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15)
-#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16)
-#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17)
-#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
-#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
-
-struct kvm_user_trace_setup {
- __u32 buf_size;
- __u32 buf_nr;
-};
-
-#define __KVM_DEPRECATED_MAIN_W_0x06 \
- _IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
-#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
-#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
-
-#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
-
-struct kvm_breakpoint {
- __u32 enabled;
- __u32 padding;
- __u64 address;
-};
-
-struct kvm_debug_guest {
- __u32 enabled;
- __u32 pad;
- struct kvm_breakpoint breakpoints[4];
- __u32 singlestep;
-};
-
-#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
-
-/* *** End of deprecated interfaces *** */
-
-
/* for KVM_SET_USER_MEMORY_REGION */
struct kvm_userspace_memory_region {
__u32 slot;
@@ -95,6 +25,19 @@ struct kvm_userspace_memory_region {
__u64 userspace_addr; /* start of the userspace allocated memory */
};
+/* for KVM_SET_USER_MEMORY_REGION2 */
+struct kvm_userspace_memory_region2 {
+ __u32 slot;
+ __u32 flags;
+ __u64 guest_phys_addr;
+ __u64 memory_size;
+ __u64 userspace_addr;
+ __u64 guest_memfd_offset;
+ __u32 guest_memfd;
+ __u32 pad1;
+ __u64 pad2[14];
+};
+
/*
* The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
* userspace, other bits are reserved for kvm internal use which are defined
@@ -102,6 +45,7 @@ struct kvm_userspace_memory_region {
*/
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
#define KVM_MEM_READONLY (1UL << 1)
+#define KVM_MEM_GUEST_MEMFD (1UL << 2)
/* for KVM_IRQ_LINE */
struct kvm_irq_level {
@@ -265,6 +209,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_RISCV_CSR 36
#define KVM_EXIT_NOTIFY 37
#define KVM_EXIT_LOONGARCH_IOCSR 38
+#define KVM_EXIT_MEMORY_FAULT 39
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -518,6 +463,13 @@ struct kvm_run {
#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
__u32 flags;
} notify;
+ /* KVM_EXIT_MEMORY_FAULT */
+ struct {
+#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
+ __u64 flags;
+ __u64 gpa;
+ __u64 size;
+ } memory_fault;
/* Fix the size of the union. */
char padding[256];
};
@@ -945,9 +897,6 @@ struct kvm_ppc_resize_hpt {
*/
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
-#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06
-#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07
-#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08
#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
#define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
@@ -1201,6 +1150,11 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230
+#define KVM_CAP_USER_MEMORY2 231
+#define KVM_CAP_MEMORY_FAULT_INFO 232
+#define KVM_CAP_MEMORY_ATTRIBUTES 233
+#define KVM_CAP_GUEST_MEMFD 234
+#define KVM_CAP_VM_TYPES 235
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1291,6 +1245,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
struct kvm_xen_hvm_config {
__u32 flags;
@@ -1483,6 +1438,8 @@ struct kvm_vfio_spapr_tce {
struct kvm_userspace_memory_region)
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
+#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \
+ struct kvm_userspace_memory_region2)
/* enable ucontrol for s390 */
struct kvm_s390_ucas_mapping {
@@ -1507,20 +1464,8 @@ struct kvm_s390_ucas_mapping {
_IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
#define KVM_UNREGISTER_COALESCED_MMIO \
_IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
-#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
- struct kvm_assigned_pci_dev)
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
-/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
-#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70
-#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
-#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
- struct kvm_assigned_pci_dev)
-#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \
- struct kvm_assigned_msix_nr)
-#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \
- struct kvm_assigned_msix_entry)
-#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd)
#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
@@ -1537,9 +1482,6 @@ struct kvm_s390_ucas_mapping {
* KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */
#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2)
#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3)
-/* Available with KVM_CAP_PCI_2_3 */
-#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \
- struct kvm_assigned_pci_dev)
/* Available with KVM_CAP_SIGNAL_MSI */
#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
@@ -1592,8 +1534,6 @@ struct kvm_s390_ucas_mapping {
#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
-/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
-#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87
#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
@@ -2267,4 +2207,24 @@ struct kvm_s390_zpci_op {
/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
+/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
+#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes)
+
+struct kvm_memory_attributes {
+ __u64 address;
+ __u64 size;
+ __u64 attributes;
+ __u64 flags;
+};
+
+#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
+
+#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd)
+
+struct kvm_create_guest_memfd {
+ __u64 size;
+ __u64 flags;
+ __u64 reserved[6];
+};
+
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index bb242fdcfe6b..ad5478dbad00 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -138,4 +138,74 @@ struct mount_attr {
/* List of all mount_attr versions. */
#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
+
+/*
+ * Structure for getting mount/superblock/filesystem info with statmount(2).
+ *
+ * The interface is similar to statx(2): individual fields or groups can be
+ * selected with the @mask argument of statmount(). Kernel will set the @mask
+ * field according to the supported fields.
+ *
+ * If string fields are selected, then the caller needs to pass a buffer that
+ * has space after the fixed part of the structure. Nul terminated strings are
+ * copied there and offsets relative to @str are stored in the relevant fields.
+ * If the buffer is too small, then EOVERFLOW is returned. The actually used
+ * size is returned in @size.
+ */
+struct statmount {
+ __u32 size; /* Total size, including strings */
+ __u32 __spare1;
+ __u64 mask; /* What results were written */
+ __u32 sb_dev_major; /* Device ID */
+ __u32 sb_dev_minor;
+ __u64 sb_magic; /* ..._SUPER_MAGIC */
+ __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
+ __u32 fs_type; /* [str] Filesystem type */
+ __u64 mnt_id; /* Unique ID of mount */
+ __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
+ __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
+ __u32 mnt_parent_id_old;
+ __u64 mnt_attr; /* MOUNT_ATTR_... */
+ __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
+ __u64 mnt_peer_group; /* ID of shared peer group */
+ __u64 mnt_master; /* Mount receives propagation from this ID */
+ __u64 propagate_from; /* Propagation from in current namespace */
+ __u32 mnt_root; /* [str] Root of mount relative to root of fs */
+ __u32 mnt_point; /* [str] Mountpoint relative to current root */
+ __u64 __spare2[50];
+ char str[]; /* Variable size part containing strings */
+};
+
+/*
+ * Structure for passing mount ID and miscellaneous parameters to statmount(2)
+ * and listmount(2).
+ *
+ * For statmount(2) @param represents the request mask.
+ * For listmount(2) @param represents the last listed mount id (or zero).
+ */
+struct mnt_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 mnt_id;
+ __u64 param;
+};
+
+/* List of all mnt_id_req versions. */
+#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+
+/*
+ * @mask bits for statmount(2)
+ */
+#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
+#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
+#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
+#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
+#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
+#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+
+/*
+ * Special @mnt_id values that can be passed to listmount
+ */
+#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 93cb411adf72..bb65ee840cda 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -70,6 +70,10 @@ enum netdev_queue_type {
NETDEV_QUEUE_TYPE_TX,
};
+enum netdev_qstats_scope {
+ NETDEV_QSTATS_SCOPE_QUEUE = 1,
+};
+
enum {
NETDEV_A_DEV_IFINDEX = 1,
NETDEV_A_DEV_PAD,
@@ -133,6 +137,21 @@ enum {
};
enum {
+ NETDEV_A_QSTATS_IFINDEX = 1,
+ NETDEV_A_QSTATS_QUEUE_TYPE,
+ NETDEV_A_QSTATS_QUEUE_ID,
+ NETDEV_A_QSTATS_SCOPE,
+ NETDEV_A_QSTATS_RX_PACKETS = 8,
+ NETDEV_A_QSTATS_RX_BYTES,
+ NETDEV_A_QSTATS_TX_PACKETS,
+ NETDEV_A_QSTATS_TX_BYTES,
+ NETDEV_A_QSTATS_RX_ALLOC_FAIL,
+
+ __NETDEV_A_QSTATS_MAX,
+ NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
+};
+
+enum {
NETDEV_CMD_DEV_GET = 1,
NETDEV_CMD_DEV_ADD_NTF,
NETDEV_CMD_DEV_DEL_NTF,
@@ -144,6 +163,7 @@ enum {
NETDEV_CMD_PAGE_POOL_STATS_GET,
NETDEV_CMD_QUEUE_GET,
NETDEV_CMD_NAPI_GET,
+ NETDEV_CMD_QSTATS_GET,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 7cab2c65d3d7..2f2ee82d5517 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -154,6 +154,7 @@ struct statx {
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
+#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 2d0c282c8588..b6619199a706 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
- usdt.o zip.o elf.o
+ usdt.o zip.o elf.o features.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9dc9625651dc..97ec005c3c47 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
* [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
* [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
*/
-int probe_memcg_account(void)
+int probe_memcg_account(int token_fd)
{
const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
struct bpf_insn insns[] = {
@@ -120,6 +120,9 @@ int probe_memcg_account(void)
attr.insns = ptr_to_u64(insns);
attr.insn_cnt = insn_cnt;
attr.license = ptr_to_u64("GPL");
+ attr.prog_token_fd = token_fd;
+ if (token_fd)
+ attr.prog_flags |= BPF_F_TOKEN_FD;
prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
if (prog_fd >= 0) {
@@ -146,7 +149,7 @@ int bump_rlimit_memlock(void)
struct rlimit rlim;
/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
- if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
+ if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT))
return 0;
memlock_bumped = true;
@@ -169,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type,
__u32 max_entries,
const struct bpf_map_create_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+ const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
union bpf_attr attr;
int fd;
@@ -181,7 +184,7 @@ int bpf_map_create(enum bpf_map_type map_type,
return libbpf_err(-EINVAL);
attr.map_type = map_type;
- if (map_name && kernel_supports(NULL, FEAT_PROG_NAME))
+ if (map_name && feat_supported(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size;
attr.value_size = value_size;
@@ -191,6 +194,7 @@ int bpf_map_create(enum bpf_map_type map_type,
attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0);
attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0);
attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0);
+ attr.value_type_btf_obj_fd = OPTS_GET(opts, value_type_btf_obj_fd, 0);
attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0);
attr.map_flags = OPTS_GET(opts, map_flags, 0);
@@ -198,6 +202,8 @@ int bpf_map_create(enum bpf_map_type map_type,
attr.numa_node = OPTS_GET(opts, numa_node, 0);
attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
+ attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
+
fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
return libbpf_err_errno(fd);
}
@@ -232,7 +238,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, size_t insn_cnt,
struct bpf_prog_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, log_true_size);
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
void *finfo = NULL, *linfo = NULL;
const char *func_info, *line_info;
__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
@@ -261,8 +267,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
attr.prog_flags = OPTS_GET(opts, prog_flags, 0);
attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
attr.kern_version = OPTS_GET(opts, kern_version, 0);
+ attr.prog_token_fd = OPTS_GET(opts, token_fd, 0);
- if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME))
+ if (prog_name && feat_supported(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
attr.license = ptr_to_u64(license);
@@ -1182,7 +1189,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size);
+ const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd);
union bpf_attr attr;
char *log_buf;
size_t log_size;
@@ -1207,6 +1214,10 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts
attr.btf = ptr_to_u64(btf_data);
attr.btf_size = btf_size;
+
+ attr.btf_flags = OPTS_GET(opts, btf_flags, 0);
+ attr.btf_token_fd = OPTS_GET(opts, token_fd, 0);
+
/* log_level == 0 and log_buf != NULL means "try loading without
* log_buf, but retry with log_buf and log_level=1 on error", which is
* consistent across low-level and high-level BTF and program loading
@@ -1287,3 +1298,20 @@ int bpf_prog_bind_map(int prog_fd, int map_fd,
ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz);
return libbpf_err_errno(ret);
}
+
+int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, token_create);
+ union bpf_attr attr;
+ int fd;
+
+ if (!OPTS_VALID(opts, bpf_token_create_opts))
+ return libbpf_err(-EINVAL);
+
+ memset(&attr, 0, attr_sz);
+ attr.token_create.bpffs_fd = bpffs_fd;
+ attr.token_create.flags = OPTS_GET(opts, flags, 0);
+
+ fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz);
+ return libbpf_err_errno(fd);
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index d0f53772bdc0..df0db2f0cdb7 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -35,7 +35,7 @@
extern "C" {
#endif
-int libbpf_set_memlock_rlim(size_t memlock_bytes);
+LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes);
struct bpf_map_create_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
@@ -51,8 +51,12 @@ struct bpf_map_create_opts {
__u32 numa_node;
__u32 map_ifindex;
+ __s32 value_type_btf_obj_fd;
+
+ __u32 token_fd;
+ size_t :0;
};
-#define bpf_map_create_opts__last_field map_ifindex
+#define bpf_map_create_opts__last_field token_fd
LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
const char *map_name,
@@ -102,9 +106,10 @@ struct bpf_prog_load_opts {
* If kernel doesn't support this feature, log_size is left unchanged.
*/
__u32 log_true_size;
+ __u32 token_fd;
size_t :0;
};
-#define bpf_prog_load_opts__last_field log_true_size
+#define bpf_prog_load_opts__last_field token_fd
LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
const char *prog_name, const char *license,
@@ -130,9 +135,12 @@ struct bpf_btf_load_opts {
* If kernel doesn't support this feature, log_size is left unchanged.
*/
__u32 log_true_size;
+
+ __u32 btf_flags;
+ __u32 token_fd;
size_t :0;
};
-#define bpf_btf_load_opts__last_field log_true_size
+#define bpf_btf_load_opts__last_field token_fd
LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
struct bpf_btf_load_opts *opts);
@@ -182,10 +190,14 @@ LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys,
/**
* @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements.
*
- * The parameter *in_batch* is the address of the first element in the batch to read.
- * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent
- * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate
- * that the batched lookup starts from the beginning of the map.
+ * The parameter *in_batch* is the address of the first element in the batch to
+ * read. *out_batch* is an output parameter that should be passed as *in_batch*
+ * to subsequent calls to **bpf_map_lookup_batch()**. NULL can be passed for
+ * *in_batch* to indicate that the batched lookup starts from the beginning of
+ * the map. Both *in_batch* and *out_batch* must point to memory large enough to
+ * hold a single key, except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH,
+ * LRU_HASH, LRU_PERCPU_HASH}**, for which the memory size must be at
+ * least 4 bytes wide regardless of key size.
*
* The *keys* and *values* are output parameters which must point to memory large enough to
* hold *count* items based on the key and value size of the map *map_fd*. The *keys*
@@ -218,7 +230,10 @@ LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
*
* @param fd BPF map file descriptor
* @param in_batch address of the first element in batch to read, can pass NULL to
- * get address of the first element in *out_batch*
+ * get address of the first element in *out_batch*. If not NULL, must be large
+ * enough to hold a key. For **BPF_MAP_TYPE_{HASH, PERCPU_HASH, LRU_HASH,
+ * LRU_PERCPU_HASH}**, the memory size must be at least 4 bytes wide regardless
+ * of key size.
* @param out_batch output parameter that should be passed to next call as *in_batch*
* @param keys pointer to an array of *count* keys
* @param values pointer to an array large enough for *count* values
@@ -492,7 +507,10 @@ LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
* program corresponding to *prog_fd*.
*
* Populates up to *info_len* bytes of *info* and updates *info_len* with the
- * actual number of bytes written to *info*.
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
*
* @param prog_fd BPF program file descriptor
* @param info pointer to **struct bpf_prog_info** that will be populated with
@@ -509,7 +527,10 @@ LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info,
* map corresponding to *map_fd*.
*
* Populates up to *info_len* bytes of *info* and updates *info_len* with the
- * actual number of bytes written to *info*.
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
*
* @param map_fd BPF map file descriptor
* @param info pointer to **struct bpf_map_info** that will be populated with
@@ -522,11 +543,14 @@ LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info,
LIBBPF_API int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len);
/**
- * @brief **bpf_btf_get_info_by_fd()** obtains information about the
+ * @brief **bpf_btf_get_info_by_fd()** obtains information about the
* BTF object corresponding to *btf_fd*.
*
* Populates up to *info_len* bytes of *info* and updates *info_len* with the
- * actual number of bytes written to *info*.
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
*
* @param btf_fd BTF object file descriptor
* @param info pointer to **struct bpf_btf_info** that will be populated with
@@ -543,7 +567,10 @@ LIBBPF_API int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u
* link corresponding to *link_fd*.
*
* Populates up to *info_len* bytes of *info* and updates *info_len* with the
- * actual number of bytes written to *info*.
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
*
* @param link_fd BPF link file descriptor
* @param info pointer to **struct bpf_link_info** that will be populated with
@@ -640,6 +667,30 @@ struct bpf_test_run_opts {
LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
struct bpf_test_run_opts *opts);
+struct bpf_token_create_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+ size_t :0;
+};
+#define bpf_token_create_opts__last_field flags
+
+/**
+ * @brief **bpf_token_create()** creates a new instance of BPF token derived
+ * from specified BPF FS mount point.
+ *
+ * BPF token created with this API can be passed to bpf() syscall for
+ * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc.
+ *
+ * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token
+ * instance.
+ * @param opts optional BPF token creation options, can be NULL
+ *
+ * @return BPF token FD > 0, on success; negative error code, otherwise (errno
+ * is also set to the error code)
+ */
+LIBBPF_API int bpf_token_create(int bpffs_fd,
+ struct bpf_token_create_opts *opts);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 7325a12692a3..1ce738d91685 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -2,6 +2,8 @@
#ifndef __BPF_CORE_READ_H__
#define __BPF_CORE_READ_H__
+#include <bpf/bpf_helpers.h>
+
/*
* enum bpf_field_info_kind is passed as a second argument into
* __builtin_preserve_field_info() built-in to get a specific aspect of
@@ -44,7 +46,7 @@ enum bpf_enum_value_kind {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
bpf_probe_read_kernel( \
- (void *)dst, \
+ (void *)dst, \
__CORE_RELO(src, fld, BYTE_SIZE), \
(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#else
@@ -143,8 +145,29 @@ enum bpf_enum_value_kind {
} \
})
+/* Differentiator between compilers builtin implementations. This is a
+ * requirement due to the compiler parsing differences where GCC optimizes
+ * early in parsing those constructs of type pointers to the builtin specific
+ * type, resulting in not being possible to collect the required type
+ * information in the builtin expansion.
+ */
+#ifdef __clang__
+#define ___bpf_typeof(type) ((typeof(type) *) 0)
+#else
+#define ___bpf_typeof1(type, NR) ({ \
+ extern typeof(type) *___concat(bpf_type_tmp_, NR); \
+ ___concat(bpf_type_tmp_, NR); \
+})
+#define ___bpf_typeof(type) ___bpf_typeof1(type, __COUNTER__)
+#endif
+
+#ifdef __clang__
#define ___bpf_field_ref1(field) (field)
-#define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field)
+#define ___bpf_field_ref2(type, field) (___bpf_typeof(type)->field)
+#else
+#define ___bpf_field_ref1(field) (&(field))
+#define ___bpf_field_ref2(type, field) (&(___bpf_typeof(type)->field))
+#endif
#define ___bpf_field_ref(args...) \
___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args)
@@ -194,7 +217,7 @@ enum bpf_enum_value_kind {
* BTF. Always succeeds.
*/
#define bpf_core_type_id_local(type) \
- __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL)
+ __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_LOCAL)
/*
* Convenience macro to get BTF type ID of a target kernel's type that matches
@@ -204,7 +227,7 @@ enum bpf_enum_value_kind {
* - 0, if no matching type was found in a target kernel BTF.
*/
#define bpf_core_type_id_kernel(type) \
- __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET)
+ __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_TARGET)
/*
* Convenience macro to check that provided named type
@@ -214,7 +237,7 @@ enum bpf_enum_value_kind {
* 0, if no matching type is found.
*/
#define bpf_core_type_exists(type) \
- __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
+ __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_EXISTS)
/*
* Convenience macro to check that provided named type
@@ -224,7 +247,7 @@ enum bpf_enum_value_kind {
* 0, if the type does not match any in the target kernel
*/
#define bpf_core_type_matches(type) \
- __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES)
+ __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_MATCHES)
/*
* Convenience macro to get the byte size of a provided named type
@@ -234,7 +257,7 @@ enum bpf_enum_value_kind {
* 0, if no matching type is found.
*/
#define bpf_core_type_size(type) \
- __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE)
+ __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_SIZE)
/*
* Convenience macro to check that provided enumerator value is defined in
@@ -244,8 +267,13 @@ enum bpf_enum_value_kind {
* kernel's BTF;
* 0, if no matching enum and/or enum value within that enum is found.
*/
+#ifdef __clang__
#define bpf_core_enum_value_exists(enum_type, enum_value) \
__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS)
+#else
+#define bpf_core_enum_value_exists(enum_type, enum_value) \
+ __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_EXISTS)
+#endif
/*
* Convenience macro to get the integer value of an enumerator value in
@@ -255,8 +283,13 @@ enum bpf_enum_value_kind {
* present in target kernel's BTF;
* 0, if no matching enum and/or enum value within that enum is found.
*/
+#ifdef __clang__
#define bpf_core_enum_value(enum_type, enum_value) \
__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE)
+#else
+#define bpf_core_enum_value(enum_type, enum_value) \
+ __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_VALUE)
+#endif
/*
* bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
@@ -268,7 +301,7 @@ enum bpf_enum_value_kind {
* a relocation, which records BTF type ID describing root struct/union and an
* accessor string which describes exact embedded field that was used to take
* an address. See detailed description of this relocation format and
- * semantics in comments to struct bpf_field_reloc in libbpf_internal.h.
+ * semantics in comments to struct bpf_core_relo in include/uapi/linux/bpf.h.
*
* This relocation allows libbpf to adjust BPF instruction to use correct
* actual field offset, based on target kernel BTF type that matches original
@@ -292,6 +325,17 @@ enum bpf_enum_value_kind {
#define bpf_core_read_user_str(dst, sz, src) \
bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src))
+extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
+
+/*
+ * Cast provided pointer *ptr* into a pointer to a specified *type* in such
+ * a way that BPF verifier will become aware of associated kernel-side BTF
+ * type. This allows to access members of kernel types directly without the
+ * need to use BPF_CORE_READ() macros.
+ */
+#define bpf_core_cast(ptr, type) \
+ ((typeof(type) *)bpf_rdonly_cast((ptr), bpf_core_type_id_kernel(type)))
+
#define ___concat(a, b) a ## b
#define ___apply(fn, n) ___concat(fn, n)
#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 2324cc42b017..cd17f6d0791f 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -13,6 +13,7 @@
#define __uint(name, val) int (*name)[val]
#define __type(name, val) typeof(val) *name
#define __array(name, val) typeof(val) *name[]
+#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name
/*
* Helper macro to place programs, maps, license in
@@ -190,6 +191,9 @@ enum libbpf_tristate {
#define __arg_ctx __attribute__((btf_decl_tag("arg:ctx")))
#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull")))
+#define __arg_nullable __attribute((btf_decl_tag("arg:nullable")))
+#define __arg_trusted __attribute((btf_decl_tag("arg:trusted")))
+#define __arg_arena __attribute((btf_decl_tag("arg:arena")))
#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index ee95fd379d4d..2d0840ef599a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1079,6 +1079,11 @@ struct btf *btf__new(const void *data, __u32 size)
return libbpf_ptr(btf_new(data, size, NULL));
}
+struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
+{
+ return libbpf_ptr(btf_new(data, size, base_btf));
+}
+
static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
struct btf_ext **btf_ext)
{
@@ -1317,7 +1322,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf)
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
-int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level)
+int btf_load_into_kernel(struct btf *btf,
+ char *log_buf, size_t log_sz, __u32 log_level,
+ int token_fd)
{
LIBBPF_OPTS(bpf_btf_load_opts, opts);
__u32 buf_sz = 0, raw_size;
@@ -1367,6 +1374,10 @@ retry_load:
opts.log_level = log_level;
}
+ opts.token_fd = token_fd;
+ if (token_fd)
+ opts.btf_flags |= BPF_F_TOKEN_FD;
+
btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
if (btf->fd < 0) {
/* time to turn on verbose mode and try again */
@@ -1394,7 +1405,7 @@ done:
int btf__load_into_kernel(struct btf *btf)
{
- return btf_load_into_kernel(btf, NULL, 0, 0);
+ return btf_load_into_kernel(btf, NULL, 0, 0, 0);
}
int btf__fd(const struct btf *btf)
@@ -3039,12 +3050,16 @@ done:
return btf_ext;
}
-const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
+const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size)
{
*size = btf_ext->data_size;
return btf_ext->data;
}
+__attribute__((alias("btf_ext__raw_data")))
+const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size);
+
+
struct btf_dedup;
static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts);
@@ -4926,10 +4941,9 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
*/
struct btf *btf__load_vmlinux_btf(void)
{
+ const char *sysfs_btf_path = "/sys/kernel/btf/vmlinux";
+ /* fall back locations, trying to find vmlinux on disk */
const char *locations[] = {
- /* try canonical vmlinux BTF through sysfs first */
- "/sys/kernel/btf/vmlinux",
- /* fall back to trying to find vmlinux on disk otherwise */
"/boot/vmlinux-%1$s",
"/lib/modules/%1$s/vmlinux-%1$s",
"/lib/modules/%1$s/build/vmlinux",
@@ -4943,8 +4957,23 @@ struct btf *btf__load_vmlinux_btf(void)
struct btf *btf;
int i, err;
- uname(&buf);
+ /* is canonical sysfs location accessible? */
+ if (faccessat(AT_FDCWD, sysfs_btf_path, F_OK, AT_EACCESS) < 0) {
+ pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
+ sysfs_btf_path);
+ } else {
+ btf = btf__parse(sysfs_btf_path, NULL);
+ if (!btf) {
+ err = -errno;
+ pr_warn("failed to read kernel BTF from '%s': %d\n", sysfs_btf_path, err);
+ return libbpf_err_ptr(err);
+ }
+ pr_debug("loaded kernel BTF from '%s'\n", sysfs_btf_path);
+ return btf;
+ }
+ /* try fallback locations */
+ uname(&buf);
for (i = 0; i < ARRAY_SIZE(locations); i++) {
snprintf(path, PATH_MAX, locations[i], buf.release);
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index b02faec748a5..c92e02394159 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -11,8 +11,6 @@
#include "libbpf_internal.h"
#include "str_error.h"
-#define STRERR_BUFSIZE 128
-
/* A SHT_GNU_versym section holds 16-bit words. This bit is set if
* the symbol is hidden and can only be seen when referenced using an
* explicit version number. This is a GNU extension.
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
new file mode 100644
index 000000000000..4e783cc7fc4b
--- /dev/null
+++ b/tools/lib/bpf/features.c
@@ -0,0 +1,583 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <linux/kernel.h>
+#include <linux/filter.h>
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_common.h"
+#include "libbpf_internal.h"
+#include "str_error.h"
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+int probe_fd(int fd)
+{
+ if (fd >= 0)
+ close(fd);
+ return fd >= 0;
+}
+
+static int probe_kern_prog_name(int token_fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, attr_sz);
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.license = ptr_to_u64("GPL");
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
+ attr.prog_token_fd = token_fd;
+ if (token_fd)
+ attr.prog_flags |= BPF_F_TOKEN_FD;
+ libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
+
+ /* make sure loading with name works */
+ ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
+ return probe_fd(ret);
+}
+
+static int probe_kern_global_data(int token_fd)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts,
+ .token_fd = token_fd,
+ .map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int ret, map, insn_cnt = ARRAY_SIZE(insns);
+
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
+ }
+
+ insns[0].imm = map;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
+ close(map);
+ return probe_fd(ret);
+}
+
+static int probe_kern_btf(int token_fd)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_func(int token_fd)
+{
+ static const char strs[] = "\0int\0x\0a";
+ /* void x(int a) {} */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* FUNC_PROTO */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+ BTF_PARAM_ENC(7, 1),
+ /* FUNC x */ /* [3] */
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_func_global(int token_fd)
+{
+ static const char strs[] = "\0int\0x\0a";
+ /* static void x(int a) {} */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* FUNC_PROTO */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+ BTF_PARAM_ENC(7, 1),
+ /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_datasec(int token_fd)
+{
+ static const char strs[] = "\0x\0.data";
+ /* static int a; */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC val */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_qmark_datasec(int token_fd)
+{
+ static const char strs[] = "\0x\0?.data";
+ /* static int a; */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC ?.data */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_float(int token_fd)
+{
+ static const char strs[] = "\0float";
+ __u32 types[] = {
+ /* float */
+ BTF_TYPE_FLOAT_ENC(1, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_decl_tag(int token_fd)
+{
+ static const char strs[] = "\0tag";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* attr */
+ BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_type_tag(int token_fd)
+{
+ static const char strs[] = "\0tag";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* attr */
+ BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */
+ /* ptr */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_array_mmap(int token_fd)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .map_flags = BPF_F_MMAPABLE | (token_fd ? BPF_F_TOKEN_FD : 0),
+ .token_fd = token_fd,
+ );
+ int fd;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_kern_exp_attach_type(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int fd, insn_cnt = ARRAY_SIZE(insns);
+
+ /* use any valid combination of program type and (optional)
+ * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
+ * to see if kernel supports expected_attach_type field for
+ * BPF_PROG_LOAD command
+ */
+ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_kern_probe_read_kernel(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
+ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
+ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+ BPF_EXIT_INSN(),
+ };
+ int fd, insn_cnt = ARRAY_SIZE(insns);
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_prog_bind_map(int token_fd)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts,
+ .token_fd = token_fd,
+ .map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
+
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
+ }
+
+ prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
+ if (prog < 0) {
+ close(map);
+ return 0;
+ }
+
+ ret = bpf_prog_bind_map(prog, map, NULL);
+
+ close(map);
+ close(prog);
+
+ return ret >= 0;
+}
+
+static int probe_module_btf(int token_fd)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+ struct bpf_btf_info info;
+ __u32 len = sizeof(info);
+ char name[16];
+ int fd, err;
+
+ fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
+ if (fd < 0)
+ return 0; /* BTF not supported at all */
+
+ memset(&info, 0, sizeof(info));
+ info.name = ptr_to_u64(name);
+ info.name_len = sizeof(name);
+
+ /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
+ * kernel's module BTF support coincides with support for
+ * name/name_len fields in struct bpf_btf_info.
+ */
+ err = bpf_btf_get_info_by_fd(fd, &info, &len);
+ close(fd);
+ return !err;
+}
+
+static int probe_perf_link(int token_fd)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int prog_fd, link_fd, err;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
+ insns, ARRAY_SIZE(insns), &opts);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* use invalid perf_event FD to get EBADF, if link is supported;
+ * otherwise EINVAL should be returned
+ */
+ link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_uprobe_multi_link(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
+ .expected_attach_type = BPF_TRACE_UPROBE_MULTI,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, link_fd, err;
+ unsigned long offset = 0;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
+ insns, ARRAY_SIZE(insns), &load_opts);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* Creating uprobe in '/' binary should fail with -EBADF. */
+ link_opts.uprobe_multi.path = "/";
+ link_opts.uprobe_multi.offsets = &offset;
+ link_opts.uprobe_multi.cnt = 1;
+
+ link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_kern_bpf_cookie(int token_fd)
+{
+ struct bpf_insn insns[] = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int ret, insn_cnt = ARRAY_SIZE(insns);
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(ret);
+}
+
+static int probe_kern_btf_enum64(int token_fd)
+{
+ static const char strs[] = "\0enum64";
+ __u32 types[] = {
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_arg_ctx_tag(int token_fd)
+{
+ static const char strs[] = "\0a\0b\0arg:ctx\0";
+ const __u32 types[] = {
+ /* [1] INT */
+ BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4),
+ /* [2] PTR -> VOID */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ /* [3] FUNC_PROTO `int(void *a)` */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
+ BTF_PARAM_ENC(1 /* "a" */, 2),
+ /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */
+ BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3),
+ /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
+ BTF_PARAM_ENC(3 /* "b" */, 2),
+ /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */
+ BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5),
+ /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */
+ BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0),
+ };
+ const struct bpf_insn insns[] = {
+ /* main prog */
+ BPF_CALL_REL(+1),
+ BPF_EXIT_INSN(),
+ /* global subprog */
+ BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */
+ BPF_EXIT_INSN(),
+ };
+ const struct bpf_func_info_min func_infos[] = {
+ { 0, 4 }, /* main prog -> FUNC 'a' */
+ { 2, 6 }, /* subprog -> FUNC 'b' */
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns);
+
+ btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
+ if (btf_fd < 0)
+ return 0;
+
+ opts.prog_btf_fd = btf_fd;
+ opts.func_info = &func_infos;
+ opts.func_info_cnt = ARRAY_SIZE(func_infos);
+ opts.func_info_rec_size = sizeof(func_infos[0]);
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx",
+ "GPL", insns, insn_cnt, &opts);
+ close(btf_fd);
+
+ return probe_fd(prog_fd);
+}
+
+typedef int (*feature_probe_fn)(int /* token_fd */);
+
+static struct kern_feature_cache feature_cache;
+
+static struct kern_feature_desc {
+ const char *desc;
+ feature_probe_fn probe;
+} feature_probes[__FEAT_CNT] = {
+ [FEAT_PROG_NAME] = {
+ "BPF program name", probe_kern_prog_name,
+ },
+ [FEAT_GLOBAL_DATA] = {
+ "global variables", probe_kern_global_data,
+ },
+ [FEAT_BTF] = {
+ "minimal BTF", probe_kern_btf,
+ },
+ [FEAT_BTF_FUNC] = {
+ "BTF functions", probe_kern_btf_func,
+ },
+ [FEAT_BTF_GLOBAL_FUNC] = {
+ "BTF global function", probe_kern_btf_func_global,
+ },
+ [FEAT_BTF_DATASEC] = {
+ "BTF data section and variable", probe_kern_btf_datasec,
+ },
+ [FEAT_ARRAY_MMAP] = {
+ "ARRAY map mmap()", probe_kern_array_mmap,
+ },
+ [FEAT_EXP_ATTACH_TYPE] = {
+ "BPF_PROG_LOAD expected_attach_type attribute",
+ probe_kern_exp_attach_type,
+ },
+ [FEAT_PROBE_READ_KERN] = {
+ "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+ },
+ [FEAT_PROG_BIND_MAP] = {
+ "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
+ },
+ [FEAT_MODULE_BTF] = {
+ "module BTF support", probe_module_btf,
+ },
+ [FEAT_BTF_FLOAT] = {
+ "BTF_KIND_FLOAT support", probe_kern_btf_float,
+ },
+ [FEAT_PERF_LINK] = {
+ "BPF perf link support", probe_perf_link,
+ },
+ [FEAT_BTF_DECL_TAG] = {
+ "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
+ },
+ [FEAT_BTF_TYPE_TAG] = {
+ "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
+ },
+ [FEAT_MEMCG_ACCOUNT] = {
+ "memcg-based memory accounting", probe_memcg_account,
+ },
+ [FEAT_BPF_COOKIE] = {
+ "BPF cookie support", probe_kern_bpf_cookie,
+ },
+ [FEAT_BTF_ENUM64] = {
+ "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
+ },
+ [FEAT_SYSCALL_WRAPPER] = {
+ "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
+ },
+ [FEAT_UPROBE_MULTI_LINK] = {
+ "BPF multi-uprobe link support", probe_uprobe_multi_link,
+ },
+ [FEAT_ARG_CTX_TAG] = {
+ "kernel-side __arg_ctx tag", probe_kern_arg_ctx_tag,
+ },
+ [FEAT_BTF_QMARK_DATASEC] = {
+ "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec,
+ },
+};
+
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
+{
+ struct kern_feature_desc *feat = &feature_probes[feat_id];
+ int ret;
+
+ /* assume global feature cache, unless custom one is provided */
+ if (!cache)
+ cache = &feature_cache;
+
+ if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
+ ret = feat->probe(cache->token_fd);
+ if (ret > 0) {
+ WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
+ } else if (ret == 0) {
+ WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
+ } else {
+ pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+ WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
+ }
+ }
+
+ return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index afd09571c482..efab29b8935b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -33,6 +33,7 @@
#include <linux/filter.h>
#include <linux/limits.h>
#include <linux/perf_event.h>
+#include <linux/bpf_perf_event.h>
#include <linux/ring_buffer.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
@@ -59,6 +60,8 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
+
#define BPF_INSN_SZ (sizeof(struct bpf_insn))
/* vsprintf() in __base_pr() uses nonliteral format string. It may break
@@ -70,6 +73,7 @@
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
+static int map_set_def_max_entries(struct bpf_map *map);
static const char * const attach_type_name[] = {
[BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
@@ -181,6 +185,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
[BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
+ [BPF_MAP_TYPE_ARENA] = "arena",
};
static const char * const prog_type_name[] = {
@@ -493,6 +498,7 @@ struct bpf_struct_ops {
#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
+#define ARENA_SEC ".arena.1"
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
@@ -527,6 +533,7 @@ struct bpf_map {
struct bpf_map_def def;
__u32 numa_node;
__u32 btf_var_idx;
+ int mod_btf_fd;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
__u32 btf_vmlinux_value_type_id;
@@ -607,6 +614,7 @@ enum sec_type {
SEC_BSS,
SEC_DATA,
SEC_RODATA,
+ SEC_ST_OPS,
};
struct elf_sec_desc {
@@ -622,8 +630,7 @@ struct elf_state {
Elf *elf;
Elf64_Ehdr *ehdr;
Elf_Data *symbols;
- Elf_Data *st_ops_data;
- Elf_Data *st_ops_link_data;
+ Elf_Data *arena_data;
size_t shstrndx; /* section index for section name strings */
size_t strtabidx;
struct elf_sec_desc *secs;
@@ -632,8 +639,8 @@ struct elf_state {
__u32 btf_maps_sec_btf_id;
int text_shndx;
int symbols_shndx;
- int st_ops_shndx;
- int st_ops_link_shndx;
+ bool has_st_ops;
+ int arena_data_shndx;
};
struct usdt_manager;
@@ -693,6 +700,14 @@ struct bpf_object {
struct usdt_manager *usdt_man;
+ struct bpf_map *arena_map;
+ void *arena_data;
+ size_t arena_data_sz;
+
+ struct kern_feature_cache *feat_cache;
+ char *token_path;
+ int token_fd;
+
char path[];
};
@@ -930,22 +945,33 @@ find_member_by_name(const struct btf *btf, const struct btf_type *t,
return NULL;
}
+static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
+ __u16 kind, struct btf **res_btf,
+ struct module_btf **res_mod_btf);
+
#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
const char *name, __u32 kind);
static int
-find_struct_ops_kern_types(const struct btf *btf, const char *tname,
+find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
+ struct module_btf **mod_btf,
const struct btf_type **type, __u32 *type_id,
const struct btf_type **vtype, __u32 *vtype_id,
const struct btf_member **data_member)
{
const struct btf_type *kern_type, *kern_vtype;
const struct btf_member *kern_data_member;
+ struct btf *btf;
__s32 kern_vtype_id, kern_type_id;
+ char tname[256];
__u32 i;
- kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ snprintf(tname, sizeof(tname), "%.*s",
+ (int)bpf_core_essential_name_len(tname_raw), tname_raw);
+
+ kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
+ &btf, mod_btf);
if (kern_type_id < 0) {
pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
tname);
@@ -998,15 +1024,72 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map)
return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
}
+static bool is_valid_st_ops_program(struct bpf_object *obj,
+ const struct bpf_program *prog)
+{
+ int i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ if (&obj->programs[i] == prog)
+ return prog->type == BPF_PROG_TYPE_STRUCT_OPS;
+ }
+
+ return false;
+}
+
+/* For each struct_ops program P, referenced from some struct_ops map M,
+ * enable P.autoload if there are Ms for which M.autocreate is true,
+ * disable P.autoload if for all Ms M.autocreate is false.
+ * Don't change P.autoload for programs that are not referenced from any maps.
+ */
+static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
+{
+ struct bpf_program *prog, *slot_prog;
+ struct bpf_map *map;
+ int i, j, k, vlen;
+
+ for (i = 0; i < obj->nr_programs; ++i) {
+ int should_load = false;
+ int use_cnt = 0;
+
+ prog = &obj->programs[i];
+ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
+ continue;
+
+ for (j = 0; j < obj->nr_maps; ++j) {
+ map = &obj->maps[j];
+ if (!bpf_map__is_struct_ops(map))
+ continue;
+
+ vlen = btf_vlen(map->st_ops->type);
+ for (k = 0; k < vlen; ++k) {
+ slot_prog = map->st_ops->progs[k];
+ if (prog != slot_prog)
+ continue;
+
+ use_cnt++;
+ if (map->autocreate)
+ should_load = true;
+ }
+ }
+ if (use_cnt)
+ prog->autoload = should_load;
+ }
+
+ return 0;
+}
+
/* Init the map's fields that depend on kern_btf */
-static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
- const struct btf *btf,
- const struct btf *kern_btf)
+static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
{
const struct btf_member *member, *kern_member, *kern_data_member;
const struct btf_type *type, *kern_type, *kern_vtype;
__u32 i, kern_type_id, kern_vtype_id, kern_data_off;
+ struct bpf_object *obj = map->obj;
+ const struct btf *btf = obj->btf;
struct bpf_struct_ops *st_ops;
+ const struct btf *kern_btf;
+ struct module_btf *mod_btf;
void *data, *kern_data;
const char *tname;
int err;
@@ -1014,16 +1097,19 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
st_ops = map->st_ops;
type = st_ops->type;
tname = st_ops->tname;
- err = find_struct_ops_kern_types(kern_btf, tname,
+ err = find_struct_ops_kern_types(obj, tname, &mod_btf,
&kern_type, &kern_type_id,
&kern_vtype, &kern_vtype_id,
&kern_data_member);
if (err)
return err;
+ kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
+
pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
+ map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
map->def.value_size = kern_vtype->size;
map->btf_vmlinux_value_type_id = kern_vtype_id;
@@ -1081,9 +1167,16 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
if (btf_is_ptr(mtype)) {
struct bpf_program *prog;
- prog = st_ops->progs[i];
+ /* Update the value from the shadow type */
+ prog = *(void **)mdata;
+ st_ops->progs[i] = prog;
if (!prog)
continue;
+ if (!is_valid_st_ops_program(obj, prog)) {
+ pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
+ map->name, mname);
+ return -ENOTSUP;
+ }
kern_mtype = skip_mods_and_typedefs(kern_btf,
kern_mtype->type,
@@ -1099,8 +1192,34 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
return -ENOTSUP;
}
- prog->attach_btf_id = kern_type_id;
- prog->expected_attach_type = kern_member_idx;
+ if (mod_btf)
+ prog->attach_btf_obj_fd = mod_btf->fd;
+
+ /* if we haven't yet processed this BPF program, record proper
+ * attach_btf_id and member_idx
+ */
+ if (!prog->attach_btf_id) {
+ prog->attach_btf_id = kern_type_id;
+ prog->expected_attach_type = kern_member_idx;
+ }
+
+ /* struct_ops BPF prog can be re-used between multiple
+ * .struct_ops & .struct_ops.link as long as it's the
+ * same struct_ops struct definition and the same
+ * function pointer field
+ */
+ if (prog->attach_btf_id != kern_type_id) {
+ pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
+ map->name, mname, prog->name, prog->sec_name, prog->type,
+ prog->attach_btf_id, kern_type_id);
+ return -EINVAL;
+ }
+ if (prog->expected_attach_type != kern_member_idx) {
+ pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
+ map->name, mname, prog->name, prog->sec_name, prog->type,
+ prog->expected_attach_type, kern_member_idx);
+ return -EINVAL;
+ }
st_ops->kern_func_off[i] = kern_data_off + kern_moff;
@@ -1141,8 +1260,10 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
if (!bpf_map__is_struct_ops(map))
continue;
- err = bpf_map__init_kern_struct_ops(map, obj->btf,
- obj->btf_vmlinux);
+ if (!map->autocreate)
+ continue;
+
+ err = bpf_map__init_kern_struct_ops(map);
if (err)
return err;
}
@@ -1151,7 +1272,7 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
}
static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
- int shndx, Elf_Data *data, __u32 map_flags)
+ int shndx, Elf_Data *data)
{
const struct btf_type *type, *datasec;
const struct btf_var_secinfo *vsi;
@@ -1207,12 +1328,22 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
map->name = strdup(var_name);
if (!map->name)
return -ENOMEM;
+ map->btf_value_type_id = type_id;
+
+ /* Follow same convention as for programs autoload:
+ * SEC("?.struct_ops") means map is not created by default.
+ */
+ if (sec_name[0] == '?') {
+ map->autocreate = false;
+ /* from now on forget there was ? in section name */
+ sec_name++;
+ }
map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
map->def.key_size = sizeof(int);
map->def.value_size = type->size;
map->def.max_entries = 1;
- map->def.map_flags = map_flags;
+ map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0;
map->st_ops = calloc(1, sizeof(*map->st_ops));
if (!map->st_ops)
@@ -1247,15 +1378,25 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
static int bpf_object_init_struct_ops(struct bpf_object *obj)
{
- int err;
+ const char *sec_name;
+ int sec_idx, err;
- err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx,
- obj->efile.st_ops_data, 0);
- err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC,
- obj->efile.st_ops_link_shndx,
- obj->efile.st_ops_link_data,
- BPF_F_LINK);
- return err;
+ for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) {
+ struct elf_sec_desc *desc = &obj->efile.secs[sec_idx];
+
+ if (desc->sec_type != SEC_ST_OPS)
+ continue;
+
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ if (!sec_name)
+ return -LIBBPF_ERRNO__FORMAT;
+
+ err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data);
+ if (err)
+ return err;
+ }
+
+ return 0;
}
static struct bpf_object *bpf_object__new(const char *path,
@@ -1293,8 +1434,6 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->efile.obj_buf = obj_buf;
obj->efile.obj_buf_sz = obj_buf_sz;
obj->efile.btf_maps_shndx = -1;
- obj->efile.st_ops_shndx = -1;
- obj->efile.st_ops_link_shndx = -1;
obj->kconfig_map_idx = -1;
obj->kern_version = get_kernel_version();
@@ -1311,8 +1450,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
elf_end(obj->efile.elf);
obj->efile.elf = NULL;
obj->efile.symbols = NULL;
- obj->efile.st_ops_data = NULL;
- obj->efile.st_ops_link_data = NULL;
+ obj->efile.arena_data = NULL;
zfree(&obj->efile.secs);
obj->efile.sec_cnt = 0;
@@ -1503,11 +1641,20 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam
return ERR_PTR(-ENOENT);
}
+/* Some versions of Android don't provide memfd_create() in their libc
+ * implementation, so avoid complications and just go straight to Linux
+ * syscall.
+ */
+static int sys_memfd_create(const char *name, unsigned flags)
+{
+ return syscall(__NR_memfd_create, name, flags);
+}
+
static int create_placeholder_fd(void)
{
int fd;
- fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
+ fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
if (fd < 0)
return -errno;
return fd;
@@ -1546,7 +1693,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
return map;
}
-static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
+static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
{
const long page_sz = sysconf(_SC_PAGE_SIZE);
size_t map_sz;
@@ -1556,6 +1703,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
return map_sz;
}
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+{
+ const long page_sz = sysconf(_SC_PAGE_SIZE);
+
+ switch (map->def.type) {
+ case BPF_MAP_TYPE_ARRAY:
+ return array_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ case BPF_MAP_TYPE_ARENA:
+ return page_sz * map->def.max_entries;
+ default:
+ return 0; /* not supported */
+ }
+}
+
static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
{
void *mmaped;
@@ -1698,7 +1859,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
def->value_size = data_sz;
def->max_entries = 1;
def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
- ? BPF_F_RDONLY_PROG : 0;
+ ? BPF_F_RDONLY_PROG : 0;
/* failures are fine because of maps like .rodata.str1.1 */
(void) map_fill_btf_type_info(obj, map);
@@ -1709,7 +1870,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
map->name, map->sec_idx, map->sec_offset, def->map_flags);
- mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ mmap_sz = bpf_map_mmap_sz(map);
map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (map->mmaped == MAP_FAILED) {
@@ -2197,6 +2358,46 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
return true;
}
+static bool get_map_field_long(const char *map_name, const struct btf *btf,
+ const struct btf_member *m, __u64 *res)
+{
+ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
+ const char *name = btf__name_by_offset(btf, m->name_off);
+
+ if (btf_is_ptr(t)) {
+ __u32 res32;
+ bool ret;
+
+ ret = get_map_field_int(map_name, btf, m, &res32);
+ if (ret)
+ *res = (__u64)res32;
+ return ret;
+ }
+
+ if (!btf_is_enum(t) && !btf_is_enum64(t)) {
+ pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n",
+ map_name, name, btf_kind_str(t));
+ return false;
+ }
+
+ if (btf_vlen(t) != 1) {
+ pr_warn("map '%s': attr '%s': invalid __ulong\n",
+ map_name, name);
+ return false;
+ }
+
+ if (btf_is_enum(t)) {
+ const struct btf_enum *e = btf_enum(t);
+
+ *res = e->val;
+ } else {
+ const struct btf_enum64 *e = btf_enum64(t);
+
+ *res = btf_enum64_value(e);
+ }
+ return true;
+}
+
static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
{
int len;
@@ -2216,7 +2417,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
int err;
if (!path)
- path = "/sys/fs/bpf";
+ path = BPF_FS_DEFAULT_PATH;
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
if (err)
@@ -2430,9 +2631,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf,
map_def->pinning = val;
map_def->parts |= MAP_DEF_PINNING;
} else if (strcmp(name, "map_extra") == 0) {
- __u32 map_extra;
+ __u64 map_extra;
- if (!get_map_field_int(map_name, btf, m, &map_extra))
+ if (!get_map_field_long(map_name, btf, m, &map_extra))
return -EINVAL;
map_def->map_extra = map_extra;
map_def->parts |= MAP_DEF_MAP_EXTRA;
@@ -2650,6 +2851,32 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
return 0;
}
+static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
+ const char *sec_name, int sec_idx,
+ void *data, size_t data_sz)
+{
+ const long page_sz = sysconf(_SC_PAGE_SIZE);
+ size_t mmap_sz;
+
+ mmap_sz = bpf_map_mmap_sz(obj->arena_map);
+ if (roundup(data_sz, page_sz) > mmap_sz) {
+ pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
+ sec_name, mmap_sz, data_sz);
+ return -E2BIG;
+ }
+
+ obj->arena_data = malloc(data_sz);
+ if (!obj->arena_data)
+ return -ENOMEM;
+ memcpy(obj->arena_data, data, data_sz);
+ obj->arena_data_sz = data_sz;
+
+ /* make bpf_map__init_value() work for ARENA maps */
+ map->mmaped = obj->arena_data;
+
+ return 0;
+}
+
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
const char *pin_root_path)
{
@@ -2699,6 +2926,33 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
return err;
}
+ for (i = 0; i < obj->nr_maps; i++) {
+ struct bpf_map *map = &obj->maps[i];
+
+ if (map->def.type != BPF_MAP_TYPE_ARENA)
+ continue;
+
+ if (obj->arena_map) {
+ pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
+ map->name, obj->arena_map->name);
+ return -EINVAL;
+ }
+ obj->arena_map = map;
+
+ if (obj->efile.arena_data) {
+ err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
+ obj->efile.arena_data->d_buf,
+ obj->efile.arena_data->d_size);
+ if (err)
+ return err;
+ }
+ }
+ if (obj->efile.arena_data && !obj->arena_map) {
+ pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
+ ARENA_SEC);
+ return -ENOENT;
+ }
+
return 0;
}
@@ -2731,6 +2985,11 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
return sh->sh_flags & SHF_EXECINSTR;
}
+static bool starts_with_qmark(const char *s)
+{
+ return s && s[0] == '?';
+}
+
static bool btf_needs_sanitization(struct bpf_object *obj)
{
bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
@@ -2740,9 +2999,10 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
+ bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
return !has_func || !has_datasec || !has_func_global || !has_float ||
- !has_decl_tag || !has_type_tag || !has_enum64;
+ !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
}
static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
@@ -2754,6 +3014,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
+ bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
int enum64_placeholder_id = 0;
struct btf_type *t;
int i, j, vlen;
@@ -2780,7 +3041,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
name = (char *)btf__name_by_offset(btf, t->name_off);
while (*name) {
- if (*name == '.')
+ if (*name == '.' || *name == '?')
*name = '_';
name++;
}
@@ -2795,6 +3056,14 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
vt = (void *)btf__type_by_id(btf, v->type);
m->name_off = vt->name_off;
}
+ } else if (!has_qmark_datasec && btf_is_datasec(t) &&
+ starts_with_qmark(btf__name_by_offset(btf, t->name_off))) {
+ /* replace '?' prefix with '_' for DATASEC names */
+ char *name;
+
+ name = (char *)btf__name_by_offset(btf, t->name_off);
+ if (name[0] == '?')
+ name[0] = '_';
} else if (!has_func && btf_is_func_proto(t)) {
/* replace FUNC_PROTO with ENUM */
vlen = btf_vlen(t);
@@ -2848,14 +3117,13 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
static bool libbpf_needs_btf(const struct bpf_object *obj)
{
return obj->efile.btf_maps_shndx >= 0 ||
- obj->efile.st_ops_shndx >= 0 ||
- obj->efile.st_ops_link_shndx >= 0 ||
+ obj->efile.has_st_ops ||
obj->nr_extern > 0;
}
static bool kernel_needs_btf(const struct bpf_object *obj)
{
- return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0;
+ return obj->efile.has_st_ops;
}
static int bpf_object__init_btf(struct bpf_object *obj,
@@ -3225,7 +3493,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
} else {
/* currently BPF_BTF_LOAD only supports log_level 1 */
err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
- obj->log_level ? 1 : 0);
+ obj->log_level ? 1 : 0, obj->token_fd);
}
if (sanitize) {
if (!err) {
@@ -3556,12 +3824,17 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
sec_desc->sec_type = SEC_RODATA;
sec_desc->shdr = sh;
sec_desc->data = data;
- } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
- obj->efile.st_ops_data = data;
- obj->efile.st_ops_shndx = idx;
- } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) {
- obj->efile.st_ops_link_data = data;
- obj->efile.st_ops_link_shndx = idx;
+ } else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
+ strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
+ strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
+ strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) {
+ sec_desc->sec_type = SEC_ST_OPS;
+ sec_desc->shdr = sh;
+ sec_desc->data = data;
+ obj->efile.has_st_ops = true;
+ } else if (strcmp(name, ARENA_SEC) == 0) {
+ obj->efile.arena_data = data;
+ obj->efile.arena_data_shndx = idx;
} else {
pr_info("elf: skipping unrecognized data section(%d) %s\n",
idx, name);
@@ -3577,6 +3850,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
if (!section_have_execinstr(obj, targ_sec_idx) &&
strcmp(name, ".rel" STRUCT_OPS_SEC) &&
strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
+ strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
+ strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
strcmp(name, ".rel" MAPS_ELF_SEC)) {
pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
idx, name, targ_sec_idx,
@@ -4189,6 +4464,15 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+ /* arena data relocation */
+ if (shdr_idx == obj->efile.arena_data_shndx) {
+ reloc_desc->type = RELO_DATA;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = obj->arena_map - obj->maps;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
/* generic map reference relocation */
if (type == LIBBPF_MAP_UNSPEC) {
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
@@ -4546,6 +4830,58 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
return 0;
}
+static int bpf_object_prepare_token(struct bpf_object *obj)
+{
+ const char *bpffs_path;
+ int bpffs_fd = -1, token_fd, err;
+ bool mandatory;
+ enum libbpf_print_level level;
+
+ /* token is explicitly prevented */
+ if (obj->token_path && obj->token_path[0] == '\0') {
+ pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
+ return 0;
+ }
+
+ mandatory = obj->token_path != NULL;
+ level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
+
+ bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
+ bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
+ if (bpffs_fd < 0) {
+ err = -errno;
+ __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
+ obj->name, err, bpffs_path,
+ mandatory ? "" : ", skipping optional step...");
+ return mandatory ? err : 0;
+ }
+
+ token_fd = bpf_token_create(bpffs_fd, 0);
+ close(bpffs_fd);
+ if (token_fd < 0) {
+ if (!mandatory && token_fd == -ENOENT) {
+ pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
+ obj->name, bpffs_path);
+ return 0;
+ }
+ __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
+ obj->name, token_fd, bpffs_path,
+ mandatory ? "" : ", skipping optional step...");
+ return mandatory ? token_fd : 0;
+ }
+
+ obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
+ if (!obj->feat_cache) {
+ close(token_fd);
+ return -ENOMEM;
+ }
+
+ obj->token_fd = token_fd;
+ obj->feat_cache->token_fd = token_fd;
+
+ return 0;
+}
+
static int
bpf_object__probe_loading(struct bpf_object *obj)
{
@@ -4555,6 +4891,10 @@ bpf_object__probe_loading(struct bpf_object *obj)
BPF_EXIT_INSN(),
};
int ret, insn_cnt = ARRAY_SIZE(insns);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = obj->token_fd,
+ .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
+ );
if (obj->gen_loader)
return 0;
@@ -4564,9 +4904,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
/* make sure basic loading works */
- ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
if (ret < 0)
- ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
if (ret < 0) {
ret = errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4581,462 +4921,18 @@ bpf_object__probe_loading(struct bpf_object *obj)
return 0;
}
-static int probe_fd(int fd)
-{
- if (fd >= 0)
- close(fd);
- return fd >= 0;
-}
-
-static int probe_kern_prog_name(void)
-{
- const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- union bpf_attr attr;
- int ret;
-
- memset(&attr, 0, attr_sz);
- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.license = ptr_to_u64("GPL");
- attr.insns = ptr_to_u64(insns);
- attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
- libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
-
- /* make sure loading with name works */
- ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
- return probe_fd(ret);
-}
-
-static int probe_kern_global_data(void)
-{
- char *cp, errmsg[STRERR_BUFSIZE];
- struct bpf_insn insns[] = {
- BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int ret, map, insn_cnt = ARRAY_SIZE(insns);
-
- map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
- if (map < 0) {
- ret = -errno;
- cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
- pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
- __func__, cp, -ret);
- return ret;
- }
-
- insns[0].imm = map;
-
- ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
- close(map);
- return probe_fd(ret);
-}
-
-static int probe_kern_btf(void)
-{
- static const char strs[] = "\0int";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_func(void)
-{
- static const char strs[] = "\0int\0x\0a";
- /* void x(int a) {} */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* FUNC_PROTO */ /* [2] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
- BTF_PARAM_ENC(7, 1),
- /* FUNC x */ /* [3] */
- BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_func_global(void)
-{
- static const char strs[] = "\0int\0x\0a";
- /* static void x(int a) {} */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* FUNC_PROTO */ /* [2] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
- BTF_PARAM_ENC(7, 1),
- /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
- BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_datasec(void)
-{
- static const char strs[] = "\0x\0.data";
- /* static int a; */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* VAR x */ /* [2] */
- BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
- BTF_VAR_STATIC,
- /* DATASEC val */ /* [3] */
- BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
- BTF_VAR_SECINFO_ENC(2, 0, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_float(void)
-{
- static const char strs[] = "\0float";
- __u32 types[] = {
- /* float */
- BTF_TYPE_FLOAT_ENC(1, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_decl_tag(void)
-{
- static const char strs[] = "\0tag";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* VAR x */ /* [2] */
- BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
- BTF_VAR_STATIC,
- /* attr */
- BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_type_tag(void)
-{
- static const char strs[] = "\0tag";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* attr */
- BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */
- /* ptr */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_array_mmap(void)
-{
- LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
- int fd;
-
- fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
- return probe_fd(fd);
-}
-
-static int probe_kern_exp_attach_type(void)
-{
- LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int fd, insn_cnt = ARRAY_SIZE(insns);
-
- /* use any valid combination of program type and (optional)
- * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
- * to see if kernel supports expected_attach_type field for
- * BPF_PROG_LOAD command
- */
- fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
- return probe_fd(fd);
-}
-
-static int probe_kern_probe_read_kernel(void)
-{
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
- BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
- BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- };
- int fd, insn_cnt = ARRAY_SIZE(insns);
-
- fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
- return probe_fd(fd);
-}
-
-static int probe_prog_bind_map(void)
-{
- char *cp, errmsg[STRERR_BUFSIZE];
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
-
- map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
- if (map < 0) {
- ret = -errno;
- cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
- pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
- __func__, cp, -ret);
- return ret;
- }
-
- prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
- if (prog < 0) {
- close(map);
- return 0;
- }
-
- ret = bpf_prog_bind_map(prog, map, NULL);
-
- close(map);
- close(prog);
-
- return ret >= 0;
-}
-
-static int probe_module_btf(void)
-{
- static const char strs[] = "\0int";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
- };
- struct bpf_btf_info info;
- __u32 len = sizeof(info);
- char name[16];
- int fd, err;
-
- fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
- if (fd < 0)
- return 0; /* BTF not supported at all */
-
- memset(&info, 0, sizeof(info));
- info.name = ptr_to_u64(name);
- info.name_len = sizeof(name);
-
- /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
- * kernel's module BTF support coincides with support for
- * name/name_len fields in struct bpf_btf_info.
- */
- err = bpf_btf_get_info_by_fd(fd, &info, &len);
- close(fd);
- return !err;
-}
-
-static int probe_perf_link(void)
-{
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int prog_fd, link_fd, err;
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
- insns, ARRAY_SIZE(insns), NULL);
- if (prog_fd < 0)
- return -errno;
-
- /* use invalid perf_event FD to get EBADF, if link is supported;
- * otherwise EINVAL should be returned
- */
- link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
- err = -errno; /* close() can clobber errno */
-
- if (link_fd >= 0)
- close(link_fd);
- close(prog_fd);
-
- return link_fd < 0 && err == -EBADF;
-}
-
-static int probe_uprobe_multi_link(void)
-{
- LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
- .expected_attach_type = BPF_TRACE_UPROBE_MULTI,
- );
- LIBBPF_OPTS(bpf_link_create_opts, link_opts);
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int prog_fd, link_fd, err;
- unsigned long offset = 0;
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
- insns, ARRAY_SIZE(insns), &load_opts);
- if (prog_fd < 0)
- return -errno;
-
- /* Creating uprobe in '/' binary should fail with -EBADF. */
- link_opts.uprobe_multi.path = "/";
- link_opts.uprobe_multi.offsets = &offset;
- link_opts.uprobe_multi.cnt = 1;
-
- link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
- err = -errno; /* close() can clobber errno */
-
- if (link_fd >= 0)
- close(link_fd);
- close(prog_fd);
-
- return link_fd < 0 && err == -EBADF;
-}
-
-static int probe_kern_bpf_cookie(void)
-{
- struct bpf_insn insns[] = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
- BPF_EXIT_INSN(),
- };
- int ret, insn_cnt = ARRAY_SIZE(insns);
-
- ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
- return probe_fd(ret);
-}
-
-static int probe_kern_btf_enum64(void)
-{
- static const char strs[] = "\0enum64";
- __u32 types[] = {
- BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_syscall_wrapper(void);
-
-enum kern_feature_result {
- FEAT_UNKNOWN = 0,
- FEAT_SUPPORTED = 1,
- FEAT_MISSING = 2,
-};
-
-typedef int (*feature_probe_fn)(void);
-
-static struct kern_feature_desc {
- const char *desc;
- feature_probe_fn probe;
- enum kern_feature_result res;
-} feature_probes[__FEAT_CNT] = {
- [FEAT_PROG_NAME] = {
- "BPF program name", probe_kern_prog_name,
- },
- [FEAT_GLOBAL_DATA] = {
- "global variables", probe_kern_global_data,
- },
- [FEAT_BTF] = {
- "minimal BTF", probe_kern_btf,
- },
- [FEAT_BTF_FUNC] = {
- "BTF functions", probe_kern_btf_func,
- },
- [FEAT_BTF_GLOBAL_FUNC] = {
- "BTF global function", probe_kern_btf_func_global,
- },
- [FEAT_BTF_DATASEC] = {
- "BTF data section and variable", probe_kern_btf_datasec,
- },
- [FEAT_ARRAY_MMAP] = {
- "ARRAY map mmap()", probe_kern_array_mmap,
- },
- [FEAT_EXP_ATTACH_TYPE] = {
- "BPF_PROG_LOAD expected_attach_type attribute",
- probe_kern_exp_attach_type,
- },
- [FEAT_PROBE_READ_KERN] = {
- "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
- },
- [FEAT_PROG_BIND_MAP] = {
- "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
- },
- [FEAT_MODULE_BTF] = {
- "module BTF support", probe_module_btf,
- },
- [FEAT_BTF_FLOAT] = {
- "BTF_KIND_FLOAT support", probe_kern_btf_float,
- },
- [FEAT_PERF_LINK] = {
- "BPF perf link support", probe_perf_link,
- },
- [FEAT_BTF_DECL_TAG] = {
- "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
- },
- [FEAT_BTF_TYPE_TAG] = {
- "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
- },
- [FEAT_MEMCG_ACCOUNT] = {
- "memcg-based memory accounting", probe_memcg_account,
- },
- [FEAT_BPF_COOKIE] = {
- "BPF cookie support", probe_kern_bpf_cookie,
- },
- [FEAT_BTF_ENUM64] = {
- "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
- },
- [FEAT_SYSCALL_WRAPPER] = {
- "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
- },
- [FEAT_UPROBE_MULTI_LINK] = {
- "BPF multi-uprobe link support", probe_uprobe_multi_link,
- },
-};
-
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
{
- struct kern_feature_desc *feat = &feature_probes[feat_id];
- int ret;
-
- if (obj && obj->gen_loader)
+ if (obj->gen_loader)
/* To generate loader program assume the latest kernel
* to avoid doing extra prog_load, map_create syscalls.
*/
return true;
- if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
- ret = feat->probe();
- if (ret > 0) {
- WRITE_ONCE(feat->res, FEAT_SUPPORTED);
- } else if (ret == 0) {
- WRITE_ONCE(feat->res, FEAT_MISSING);
- } else {
- pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
- WRITE_ONCE(feat->res, FEAT_MISSING);
- }
- }
+ if (obj->token_fd)
+ return feat_supported(obj->feat_cache, feat_id);
- return READ_ONCE(feat->res) == FEAT_SUPPORTED;
+ return feat_supported(NULL, feat_id);
}
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
@@ -5117,6 +5013,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
return 0;
}
+
err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
if (err) {
err = -errno;
@@ -5160,9 +5057,17 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.map_flags = def->map_flags;
create_attr.numa_node = map->numa_node;
create_attr.map_extra = map->map_extra;
+ create_attr.token_fd = obj->token_fd;
+ if (obj->token_fd)
+ create_attr.map_flags |= BPF_F_TOKEN_FD;
- if (bpf_map__is_struct_ops(map))
+ if (bpf_map__is_struct_ops(map)) {
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
+ if (map->mod_btf_fd >= 0) {
+ create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
+ create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
+ }
+ }
if (obj->btf && btf__fd(obj->btf) >= 0) {
create_attr.btf_fd = btf__fd(obj->btf);
@@ -5172,6 +5077,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
if (bpf_map_type__is_map_in_map(def->type)) {
if (map->inner_map) {
+ err = map_set_def_max_entries(map->inner_map);
+ if (err)
+ return err;
err = bpf_object__create_map(obj, map->inner_map, true);
if (err) {
pr_warn("map '%s': failed to create inner map: %d\n",
@@ -5198,11 +5106,16 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
case BPF_MAP_TYPE_SOCKHASH:
case BPF_MAP_TYPE_QUEUE:
case BPF_MAP_TYPE_STACK:
+ case BPF_MAP_TYPE_ARENA:
create_attr.btf_fd = 0;
create_attr.btf_key_type_id = 0;
create_attr.btf_value_type_id = 0;
map->btf_key_type_id = 0;
map->btf_value_type_id = 0;
+ break;
+ case BPF_MAP_TYPE_STRUCT_OPS:
+ create_attr.btf_value_type_id = 0;
+ break;
default:
break;
}
@@ -5438,7 +5351,23 @@ retry:
if (err < 0)
goto err_out;
}
-
+ if (map->def.type == BPF_MAP_TYPE_ARENA) {
+ map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map),
+ PROT_READ | PROT_WRITE,
+ map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
+ map->fd, 0);
+ if (map->mmaped == MAP_FAILED) {
+ err = -errno;
+ map->mmaped = NULL;
+ pr_warn("map '%s': failed to mmap arena: %d\n",
+ map->name, err);
+ return err;
+ }
+ if (obj->arena_data) {
+ memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
+ zfree(&obj->arena_data);
+ }
+ }
if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
err = init_map_in_map_slots(obj, map);
if (err < 0)
@@ -6695,6 +6624,14 @@ static struct {
/* all other program types don't have "named" context structs */
};
+/* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
+ * for below __builtin_types_compatible_p() checks;
+ * with this approach we don't need any extra arch-specific #ifdef guards
+ */
+struct pt_regs;
+struct user_pt_regs;
+struct user_regs_struct;
+
static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
const char *subprog_name, int arg_idx,
int arg_type_id, const char *ctx_name)
@@ -6735,11 +6672,21 @@ static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_pro
/* special cases */
switch (prog->type) {
case BPF_PROG_TYPE_KPROBE:
- case BPF_PROG_TYPE_PERF_EVENT:
/* `struct pt_regs *` is expected, but we need to fix up */
if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
return true;
break;
+ case BPF_PROG_TYPE_PERF_EVENT:
+ if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
+ btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
+ return true;
+ if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
+ btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
+ return true;
+ if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
+ btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
+ return true;
+ break;
case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
/* allow u64* as ctx */
@@ -6818,69 +6765,6 @@ static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_progr
return fn_id;
}
-static int probe_kern_arg_ctx_tag(void)
-{
- /* To minimize merge conflicts with BPF token series that refactors
- * feature detection code a lot, we don't integrate
- * probe_kern_arg_ctx_tag() into kernel_supports() feature-detection
- * framework yet, doing our own caching internally.
- * This will be cleaned up a bit later when bpf/bpf-next trees settle.
- */
- static int cached_result = -1;
- static const char strs[] = "\0a\0b\0arg:ctx\0";
- const __u32 types[] = {
- /* [1] INT */
- BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4),
- /* [2] PTR -> VOID */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
- /* [3] FUNC_PROTO `int(void *a)` */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
- BTF_PARAM_ENC(1 /* "a" */, 2),
- /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */
- BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3),
- /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
- BTF_PARAM_ENC(3 /* "b" */, 2),
- /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */
- BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5),
- /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */
- BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0),
- };
- const struct bpf_insn insns[] = {
- /* main prog */
- BPF_CALL_REL(+1),
- BPF_EXIT_INSN(),
- /* global subprog */
- BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */
- BPF_EXIT_INSN(),
- };
- const struct bpf_func_info_min func_infos[] = {
- { 0, 4 }, /* main prog -> FUNC 'a' */
- { 2, 6 }, /* subprog -> FUNC 'b' */
- };
- LIBBPF_OPTS(bpf_prog_load_opts, opts);
- int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns);
-
- if (cached_result >= 0)
- return cached_result;
-
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
- if (btf_fd < 0)
- return 0;
-
- opts.prog_btf_fd = btf_fd;
- opts.func_info = &func_infos;
- opts.func_info_cnt = ARRAY_SIZE(func_infos);
- opts.func_info_rec_size = sizeof(func_infos[0]);
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx",
- "GPL", insns, insn_cnt, &opts);
- close(btf_fd);
-
- cached_result = probe_fd(prog_fd);
- return cached_result;
-}
-
/* Check if main program or global subprog's function prototype has `arg:ctx`
* argument tags, and, if necessary, substitute correct type to match what BPF
* verifier would expect, taking into account specific program type. This
@@ -6905,7 +6789,7 @@ static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_progra
return 0;
/* don't do any fix ups if kernel natively supports __arg_ctx */
- if (probe_kern_arg_ctx_tag() > 0)
+ if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
return 0;
/* some BPF program types just don't have named context structs, so
@@ -7292,12 +7176,12 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
data = sec_desc->data;
idx = shdr->sh_info;
- if (shdr->sh_type != SHT_REL) {
+ if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) {
pr_warn("internal error at %d\n", __LINE__);
return -LIBBPF_ERRNO__INTERNAL;
}
- if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx)
+ if (obj->efile.secs[idx].sec_type == SEC_ST_OPS)
err = bpf_object__collect_st_ops_relos(obj, shdr, data);
else if (idx == obj->efile.btf_maps_shndx)
err = bpf_object__collect_map_relos(obj, shdr, data);
@@ -7473,6 +7357,10 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog
load_attr.prog_flags = prog->prog_flags;
load_attr.fd_array = obj->fd_array;
+ load_attr.token_fd = obj->token_fd;
+ if (obj->token_fd)
+ load_attr.prog_flags |= BPF_F_TOKEN_FD;
+
/* adjust load_attr if sec_def provides custom preload callback */
if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
@@ -7918,7 +7806,7 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
{
- const char *obj_name, *kconfig, *btf_tmp_path;
+ const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
struct bpf_object *obj;
char tmp_name[64];
int err;
@@ -7955,6 +7843,16 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
if (log_size && !log_buf)
return ERR_PTR(-EINVAL);
+ token_path = OPTS_GET(opts, bpf_token_path, NULL);
+ /* if user didn't specify bpf_token_path explicitly, check if
+ * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
+ * option
+ */
+ if (!token_path)
+ token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
+ if (token_path && strlen(token_path) >= PATH_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+
obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
if (IS_ERR(obj))
return obj;
@@ -7963,6 +7861,14 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
obj->log_size = log_size;
obj->log_level = log_level;
+ if (token_path) {
+ obj->token_path = strdup(token_path);
+ if (!obj->token_path) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
if (btf_tmp_path) {
if (strlen(btf_tmp_path) >= PATH_MAX) {
@@ -8449,11 +8355,20 @@ static void bpf_map_prepare_vdata(const struct bpf_map *map)
static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
{
+ struct bpf_map *map;
int i;
- for (i = 0; i < obj->nr_maps; i++)
- if (bpf_map__is_struct_ops(&obj->maps[i]))
- bpf_map_prepare_vdata(&obj->maps[i]);
+ for (i = 0; i < obj->nr_maps; i++) {
+ map = &obj->maps[i];
+
+ if (!bpf_map__is_struct_ops(map))
+ continue;
+
+ if (!map->autocreate)
+ continue;
+
+ bpf_map_prepare_vdata(map);
+ }
return 0;
}
@@ -8473,11 +8388,13 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
if (obj->gen_loader)
bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
- err = bpf_object__probe_loading(obj);
+ err = bpf_object_prepare_token(obj);
+ err = err ? : bpf_object__probe_loading(obj);
err = err ? : bpf_object__load_vmlinux_btf(obj, false);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
err = err ? : bpf_object__sanitize_maps(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
+ err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__create_maps(obj);
@@ -8947,13 +8864,9 @@ static void bpf_map__destroy(struct bpf_map *map)
zfree(&map->init_slots);
map->init_slots_sz = 0;
- if (map->mmaped) {
- size_t mmap_sz;
-
- mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
- munmap(map->mmaped, mmap_sz);
- map->mmaped = NULL;
- }
+ if (map->mmaped && map->mmaped != map->obj->arena_data)
+ munmap(map->mmaped, bpf_map_mmap_sz(map));
+ map->mmaped = NULL;
if (map->st_ops) {
zfree(&map->st_ops->data);
@@ -9008,6 +8921,13 @@ void bpf_object__close(struct bpf_object *obj)
}
zfree(&obj->programs);
+ zfree(&obj->feat_cache);
+ zfree(&obj->token_path);
+ if (obj->token_fd > 0)
+ close(obj->token_fd);
+
+ zfree(&obj->arena_data);
+
free(obj);
}
@@ -9668,7 +9588,9 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
return NULL;
}
-/* Collect the reloc from ELF and populate the st_ops->progs[] */
+/* Collect the reloc from ELF, populate the st_ops->progs[], and update
+ * st_ops->data for shadow type.
+ */
static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
Elf64_Shdr *shdr, Elf_Data *data)
{
@@ -9760,28 +9682,15 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
return -EINVAL;
}
- /* if we haven't yet processed this BPF program, record proper
- * attach_btf_id and member_idx
- */
- if (!prog->attach_btf_id) {
- prog->attach_btf_id = st_ops->type_id;
- prog->expected_attach_type = member_idx;
- }
+ st_ops->progs[member_idx] = prog;
- /* struct_ops BPF prog can be re-used between multiple
- * .struct_ops & .struct_ops.link as long as it's the
- * same struct_ops struct definition and the same
- * function pointer field
+ /* st_ops->data will be exposed to users, being returned by
+ * bpf_map__initial_value() as a pointer to the shadow
+ * type. All function pointers in the original struct type
+ * should be converted to a pointer to struct bpf_program
+ * in the shadow type.
*/
- if (prog->attach_btf_id != st_ops->type_id ||
- prog->expected_attach_type != member_idx) {
- pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
- map->name, prog->name, prog->sec_name, prog->type,
- prog->attach_btf_id, prog->expected_attach_type, name);
- return -EINVAL;
- }
-
- st_ops->progs[member_idx] = prog;
+ *((struct bpf_program **)(st_ops->data + moff)) = prog;
}
return 0;
@@ -9966,7 +9875,9 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
*btf_obj_fd = 0;
*btf_type_id = 1;
} else {
- err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
+ err = find_kernel_btf_id(prog->obj, attach_name,
+ attach_type, btf_obj_fd,
+ btf_type_id);
}
if (err) {
pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
@@ -10188,11 +10099,14 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
return libbpf_err(-EBUSY);
if (map->mmaped) {
- int err;
size_t mmap_old_sz, mmap_new_sz;
+ int err;
- mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
- mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
+ if (map->def.type != BPF_MAP_TYPE_ARRAY)
+ return -EOPNOTSUPP;
+
+ mmap_old_sz = bpf_map_mmap_sz(map);
+ mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
if (err) {
pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
@@ -10225,22 +10139,41 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size)
{
+ size_t actual_sz;
+
if (map->obj->loaded || map->reused)
return libbpf_err(-EBUSY);
- if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
- size != map->def.value_size)
+ if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
+ return libbpf_err(-EINVAL);
+
+ if (map->def.type == BPF_MAP_TYPE_ARENA)
+ actual_sz = map->obj->arena_data_sz;
+ else
+ actual_sz = map->def.value_size;
+ if (size != actual_sz)
return libbpf_err(-EINVAL);
memcpy(map->mmaped, data, size);
return 0;
}
-void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
+void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
{
+ if (bpf_map__is_struct_ops(map)) {
+ if (psize)
+ *psize = map->def.value_size;
+ return map->st_ops->data;
+ }
+
if (!map->mmaped)
return NULL;
- *psize = map->def.value_size;
+
+ if (map->def.type == BPF_MAP_TYPE_ARENA)
+ *psize = map->obj->arena_data_sz;
+ else
+ *psize = map->def.value_size;
+
return map->mmaped;
}
@@ -11028,7 +10961,7 @@ static const char *arch_specific_syscall_pfx(void)
#endif
}
-static int probe_kern_syscall_wrapper(void)
+int probe_kern_syscall_wrapper(int token_fd)
{
char syscall_name[64];
const char *ksys_pfx;
@@ -13717,7 +13650,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
for (i = 0; i < s->map_cnt; i++) {
struct bpf_map *map = *s->maps[i].map;
- size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ size_t mmap_sz = bpf_map_mmap_sz(map);
int prot, map_fd = map->fd;
void **mmaped = s->maps[i].mmaped;
@@ -13729,6 +13662,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
continue;
}
+ if (map->def.type == BPF_MAP_TYPE_ARENA) {
+ *mmaped = map->mmaped;
+ continue;
+ }
+
if (map->def.map_flags & BPF_F_RDONLY_PROG)
prot = PROT_READ;
else
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6cd9c501624f..7b510761f545 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -177,10 +177,29 @@ struct bpf_object_open_opts {
* logs through its print callback.
*/
__u32 kernel_log_level;
+ /* Path to BPF FS mount point to derive BPF token from.
+ *
+ * Created BPF token will be used for all bpf() syscall operations
+ * that accept BPF token (e.g., map creation, BTF and program loads,
+ * etc) automatically within instantiated BPF object.
+ *
+ * If bpf_token_path is not specified, libbpf will consult
+ * LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will be
+ * taken as a value of bpf_token_path option and will force libbpf to
+ * either create BPF token from provided custom BPF FS path, or will
+ * disable implicit BPF token creation, if envvar value is an empty
+ * string. bpf_token_path overrides LIBBPF_BPF_TOKEN_PATH, if both are
+ * set at the same time.
+ *
+ * Setting bpf_token_path option to empty string disables libbpf's
+ * automatic attempt to create BPF token from default BPF FS mount
+ * point (/sys/fs/bpf), in case this default behavior is undesirable.
+ */
+ const char *bpf_token_path;
size_t :0;
};
-#define bpf_object_open_opts__last_field kernel_log_level
+#define bpf_object_open_opts__last_field bpf_token_path
/**
* @brief **bpf_object__open()** creates a bpf_object by opening
@@ -995,7 +1014,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
-LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
+LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize);
/**
* @brief **bpf_map__is_internal()** tells the caller whether or not the
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 91c5aef7dae7..86804fd90dd1 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -245,7 +245,6 @@ LIBBPF_0.3.0 {
btf__parse_raw_split;
btf__parse_split;
btf__new_empty_split;
- btf__new_split;
ring_buffer__epoll_fd;
} LIBBPF_0.2.0;
@@ -326,7 +325,6 @@ LIBBPF_0.7.0 {
bpf_xdp_detach;
bpf_xdp_query;
bpf_xdp_query_id;
- btf_ext__raw_data;
libbpf_probe_bpf_helper;
libbpf_probe_bpf_map_type;
libbpf_probe_bpf_prog_type;
@@ -411,4 +409,8 @@ LIBBPF_1.3.0 {
} LIBBPF_1.2.0;
LIBBPF_1.4.0 {
+ global:
+ bpf_token_create;
+ btf__new_split;
+ btf_ext__raw_data;
} LIBBPF_1.3.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 27e4e320e1a6..864b36177424 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -15,9 +15,24 @@
#include <linux/err.h>
#include <fcntl.h>
#include <unistd.h>
+#include <sys/syscall.h>
#include <libelf.h>
#include "relo_core.h"
+/* Android's libc doesn't support AT_EACCESS in faccessat() implementation
+ * ([0]), and just returns -EINVAL even if file exists and is accessible.
+ * See [1] for issues caused by this.
+ *
+ * So just redefine it to 0 on Android.
+ *
+ * [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50
+ * [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250
+ */
+#ifdef __ANDROID__
+#undef AT_EACCESS
+#define AT_EACCESS 0
+#endif
+
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -357,18 +372,39 @@ enum kern_feature_id {
FEAT_SYSCALL_WRAPPER,
/* BPF multi-uprobe link support */
FEAT_UPROBE_MULTI_LINK,
+ /* Kernel supports arg:ctx tag (__arg_ctx) for global subprogs natively */
+ FEAT_ARG_CTX_TAG,
+ /* Kernel supports '?' at the front of datasec names */
+ FEAT_BTF_QMARK_DATASEC,
__FEAT_CNT,
};
-int probe_memcg_account(void);
+enum kern_feature_result {
+ FEAT_UNKNOWN = 0,
+ FEAT_SUPPORTED = 1,
+ FEAT_MISSING = 2,
+};
+
+struct kern_feature_cache {
+ enum kern_feature_result res[__FEAT_CNT];
+ int token_fd;
+};
+
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
+
+int probe_kern_syscall_wrapper(int token_fd);
+int probe_memcg_account(int token_fd);
int bump_rlimit_memlock(void);
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
- const char *str_sec, size_t str_len);
-int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level);
+ const char *str_sec, size_t str_len,
+ int token_fd);
+int btf_load_into_kernel(struct btf *btf,
+ char *log_buf, size_t log_sz, __u32 log_level,
+ int token_fd);
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
@@ -532,6 +568,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn)
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
+/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range.
+ * Original FD is not closed or altered in any other way.
+ * Preserves original FD value, if it's invalid (negative).
+ */
+static inline int dup_good_fd(int fd)
+{
+ if (fd < 0)
+ return fd;
+ return fcntl(fd, F_DUPFD_CLOEXEC, 3);
+}
+
/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2
* Takes ownership of the fd passed in, and closes it if calling
* fcntl(fd, F_DUPFD_CLOEXEC, 3).
@@ -543,7 +590,7 @@ static inline int ensure_good_fd(int fd)
if (fd < 0)
return fd;
if (fd < 3) {
- fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ fd = dup_good_fd(fd);
saved_errno = errno;
close(old_fd);
errno = saved_errno;
@@ -555,6 +602,15 @@ static inline int ensure_good_fd(int fd)
return fd;
}
+static inline int sys_dup2(int oldfd, int newfd)
+{
+#ifdef __NR_dup2
+ return syscall(__NR_dup2, oldfd, newfd);
+#else
+ return syscall(__NR_dup3, oldfd, newfd, 0);
+#endif
+}
+
/* Point *fixed_fd* to the same file that *tmp_fd* points to.
* Regardless of success, *tmp_fd* is closed.
* Whatever *fixed_fd* pointed to is closed silently.
@@ -563,7 +619,7 @@ static inline int reuse_fd(int fixed_fd, int tmp_fd)
{
int err;
- err = dup2(tmp_fd, fixed_fd);
+ err = sys_dup2(tmp_fd, fixed_fd);
err = err < 0 ? -errno : 0;
close(tmp_fd); /* clean up temporary FD */
return err;
@@ -613,4 +669,6 @@ int elf_resolve_syms_offsets(const char *binary_path, int cnt,
int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
unsigned long **poffsets, size_t *pcnt);
+int probe_fd(int fd);
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 9c4db90b92b6..302188122439 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
}
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
- const char *str_sec, size_t str_len)
+ const char *str_sec, size_t str_len,
+ int token_fd)
{
struct btf_header hdr = {
.magic = BTF_MAGIC,
@@ -229,6 +230,10 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
.str_off = types_len,
.str_len = str_len,
};
+ LIBBPF_OPTS(bpf_btf_load_opts, opts,
+ .token_fd = token_fd,
+ .btf_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
int btf_fd, btf_len;
__u8 *raw_btf;
@@ -241,7 +246,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
- btf_fd = bpf_btf_load(raw_btf, btf_len, NULL);
+ btf_fd = bpf_btf_load(raw_btf, btf_len, &opts);
free(raw_btf);
return btf_fd;
@@ -271,7 +276,7 @@ static int load_local_storage_btf(void)
};
return libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
+ strs, sizeof(strs), 0);
}
static int probe_map_create(enum bpf_map_type map_type)
@@ -326,12 +331,20 @@ static int probe_map_create(enum bpf_map_type map_type)
case BPF_MAP_TYPE_STRUCT_OPS:
/* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */
opts.btf_vmlinux_value_type_id = 1;
+ opts.value_type_btf_obj_fd = -1;
exp_err = -524; /* -ENOTSUPP */
break;
case BPF_MAP_TYPE_BLOOM_FILTER:
key_size = 0;
max_entries = 1;
break;
+ case BPF_MAP_TYPE_ARENA:
+ key_size = 0;
+ value_size = 0;
+ max_entries = 1; /* one page */
+ opts.map_extra = 0; /* can mmap() at any address */
+ opts.map_flags = BPF_F_MMAPABLE;
+ break;
case BPF_MAP_TYPE_HASH:
case BPF_MAP_TYPE_ARRAY:
case BPF_MAP_TYPE_PROG_ARRAY:
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 16bca56002ab..0d4be829551b 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -2732,7 +2732,7 @@ static int finalize_btf(struct bpf_linker *linker)
/* Emit .BTF.ext section */
if (linker->btf_ext) {
- raw_data = btf_ext__get_raw_data(linker->btf_ext, &raw_sz);
+ raw_data = btf_ext__raw_data(linker->btf_ext, &raw_sz);
if (!raw_data)
return -ENOMEM;
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 090bcf6e3b3d..68a2def17175 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -496,8 +496,8 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
if (err)
return libbpf_err(err);
- opts->feature_flags = md.flags;
- opts->xdp_zc_max_segs = md.xdp_zc_max_segs;
+ OPTS_SET(opts, feature_flags, md.flags);
+ OPTS_SET(opts, xdp_zc_max_segs, md.xdp_zc_max_segs);
skip_feature_flags:
return 0;
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
index a139334d57b6..626d7ffb03d6 100644
--- a/tools/lib/bpf/str_error.h
+++ b/tools/lib/bpf/str_error.h
@@ -2,5 +2,8 @@
#ifndef __LIBBPF_STR_ERROR_H
#define __LIBBPF_STR_ERROR_H
+#define STRERR_BUFSIZE 128
+
char *libbpf_strerror_r(int err, char *dst, int len);
+
#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index da1aa10bbcc3..8e9e09d84e26 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -11,11 +11,11 @@ $(SUBDIRS):
$(MAKE) -C $@ ; \
fi
-clean hardclean:
+clean distclean:
@for dir in $(SUBDIRS) ; do \
if [ -f "$$dir/Makefile" ] ; then \
$(MAKE) -C $$dir $@; \
fi \
done
-.PHONY: clean all $(SUBDIRS)
+.PHONY: all clean distclean $(SUBDIRS)
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index 3110f84dd029..07373c5a7afe 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -15,7 +15,12 @@ UAPI_PATH:=../../../../include/uapi/
get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2)
CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
+CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h)
CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
+CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
+CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py
index 2ad9ec0f5545..f131e33ac3ee 100755
--- a/tools/net/ynl/cli.py
+++ b/tools/net/ynl/cli.py
@@ -6,7 +6,16 @@ import json
import pprint
import time
-from lib import YnlFamily, Netlink
+from lib import YnlFamily, Netlink, NlError
+
+
+class YnlEncoder(json.JSONEncoder):
+ def default(self, obj):
+ if isinstance(obj, bytes):
+ return bytes.hex(obj)
+ if isinstance(obj, set):
+ return list(obj)
+ return json.JSONEncoder.default(self, obj)
def main():
@@ -28,8 +37,17 @@ def main():
parser.add_argument('--append', dest='flags', action='append_const',
const=Netlink.NLM_F_APPEND)
parser.add_argument('--process-unknown', action=argparse.BooleanOptionalAction)
+ parser.add_argument('--output-json', action='store_true')
+ parser.add_argument('--dbg-small-recv', default=0, const=4000,
+ action='store', nargs='?', type=int)
args = parser.parse_args()
+ def output(msg):
+ if args.output_json:
+ print(json.dumps(msg, cls=YnlEncoder))
+ else:
+ pprint.PrettyPrinter().pprint(msg)
+
if args.no_schema:
args.schema = ''
@@ -37,7 +55,10 @@ def main():
if args.json_text:
attrs = json.loads(args.json_text)
- ynl = YnlFamily(args.spec, args.schema, args.process_unknown)
+ ynl = YnlFamily(args.spec, args.schema, args.process_unknown,
+ recv_size=args.dbg_small_recv)
+ if args.dbg_small_recv:
+ ynl.set_recv_dbg(True)
if args.ntf:
ynl.ntf_subscribe(args.ntf)
@@ -45,16 +66,20 @@ def main():
if args.sleep:
time.sleep(args.sleep)
- if args.do:
- reply = ynl.do(args.do, attrs, args.flags)
- pprint.PrettyPrinter().pprint(reply)
- if args.dump:
- reply = ynl.dump(args.dump, attrs)
- pprint.PrettyPrinter().pprint(reply)
+ try:
+ if args.do:
+ reply = ynl.do(args.do, attrs, args.flags)
+ output(reply)
+ if args.dump:
+ reply = ynl.dump(args.dump, attrs)
+ output(reply)
+ except NlError as e:
+ print(e)
+ exit(1)
if args.ntf:
ynl.check_ntf()
- pprint.PrettyPrinter().pprint(ynl.async_msg_queue)
+ output(ynl.async_msg_queue)
if __name__ == "__main__":
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
index 84cbabdd02a8..713f5fb9cc2d 100644
--- a/tools/net/ynl/generated/Makefile
+++ b/tools/net/ynl/generated/Makefile
@@ -14,7 +14,10 @@ YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \
TOOL:=../ynl-gen-c.py
-GENS:=ethtool devlink handshake fou netdev nfsd
+GENS_PATHS=$(shell grep -nrI --files-without-match \
+ 'protocol: netlink' \
+ ../../../../Documentation/netlink/specs/)
+GENS=$(patsubst ../../../../Documentation/netlink/specs/%.yaml,%,${GENS_PATHS})
SRCS=$(patsubst %,%-user.c,${GENS})
HDRS=$(patsubst %,%-user.h,${GENS})
OBJS=$(patsubst %,%-user.o,${GENS})
@@ -40,11 +43,11 @@ protos.a: $(OBJS)
clean:
rm -f *.o
-hardclean: clean
+distclean: clean
rm -f *.c *.h *.a
regen:
@../ynl-regen.sh
-.PHONY: all clean hardclean regen
+.PHONY: all clean distclean regen
.DEFAULT_GOAL: all
diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile
index d2e50fd0a52d..dfff3ecd1cba 100644
--- a/tools/net/ynl/lib/Makefile
+++ b/tools/net/ynl/lib/Makefile
@@ -17,12 +17,13 @@ ynl.a: $(OBJS)
ar rcs $@ $(OBJS)
clean:
rm -f *.o *.d *~
+ rm -rf __pycache__
-hardclean: clean
+distclean: clean
rm -f *.a
%.o: %.c
$(COMPILE.c) -MMD -c -o $@ $<
-.PHONY: all clean
+.PHONY: all clean distclean
.DEFAULT_GOAL=all
diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/lib/__init__.py
index f7eaa07783e7..9137b83e580a 100644
--- a/tools/net/ynl/lib/__init__.py
+++ b/tools/net/ynl/lib/__init__.py
@@ -2,7 +2,7 @@
from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
SpecFamily, SpecOperation
-from .ynl import YnlFamily, Netlink
+from .ynl import YnlFamily, Netlink, NlError
__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
- "SpecFamily", "SpecOperation", "YnlFamily", "Netlink"]
+ "SpecFamily", "SpecOperation", "YnlFamily", "Netlink", "NlError"]
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index 44f13e383e8a..6d08ab9e213f 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -144,7 +144,7 @@ class SpecEnumSet(SpecElement):
class SpecAttr(SpecElement):
- """ Single Netlink atttribute type
+ """ Single Netlink attribute type
Represents a single attribute type within an attr space.
@@ -248,6 +248,7 @@ class SpecStructMember(SpecElement):
len integer, optional byte length of binary types
display_hint string, hint to help choose format specifier
when displaying the value
+ struct string, name of nested struct type
"""
def __init__(self, family, yaml):
super().__init__(family, yaml)
@@ -256,6 +257,7 @@ class SpecStructMember(SpecElement):
self.enum = yaml.get('enum')
self.len = yaml.get('len')
self.display_hint = yaml.get('display-hint')
+ self.struct = yaml.get('struct')
class SpecStruct(SpecElement):
@@ -306,10 +308,9 @@ class SpecSubMessage(SpecElement):
class SpecSubMessageFormat(SpecElement):
- """ Netlink sub-message definition
+ """ Netlink sub-message format definition
- Represents a set of sub-message formats for polymorphic nlattrs
- that contain type-specific sub messages.
+ Represents a single format for a sub-message.
Attributes:
value attribute value to match against type selector
@@ -417,6 +418,7 @@ class SpecFamily(SpecElement):
consts dict of all constants/enums
fixed_header string, optional name of family default fixed header struct
mcast_groups dict of all multicast groups (index by name)
+ kernel_family dict of kernel family attributes
"""
def __init__(self, spec_path, schema_path=None, exclude_ops=None):
with open(spec_path, "r") as stream:
@@ -460,6 +462,7 @@ class SpecFamily(SpecElement):
self.ntfs = collections.OrderedDict()
self.consts = collections.OrderedDict()
self.mcast_groups = collections.OrderedDict()
+ self.kernel_family = collections.OrderedDict(self.yaml.get('kernel-family', {}))
last_exception = None
while len(self._resolution_list) > 0:
diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
index 7491da8e7555..6cf890080dc0 100644
--- a/tools/net/ynl/lib/ynl-priv.h
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -2,16 +2,16 @@
#ifndef __YNL_C_PRIV_H
#define __YNL_C_PRIV_H 1
+#include <stdbool.h>
#include <stddef.h>
-#include <libmnl/libmnl.h>
#include <linux/types.h>
+struct ynl_parse_arg;
+
/*
* YNL internals / low level stuff
*/
-/* Generic mnl helper code */
-
enum ynl_policy_type {
YNL_PT_REJECT = 1,
YNL_PT_IGNORE,
@@ -27,6 +27,20 @@ enum ynl_policy_type {
YNL_PT_BITFIELD32,
};
+enum ynl_parse_result {
+ YNL_PARSE_CB_ERROR = -1,
+ YNL_PARSE_CB_STOP = 0,
+ YNL_PARSE_CB_OK = 1,
+};
+
+#define YNL_SOCKET_BUFFER_SIZE (1 << 17)
+
+#define YNL_ARRAY_SIZE(array) (sizeof(array) ? \
+ sizeof(array) / sizeof(array[0]) : 0)
+
+typedef int (*ynl_parse_cb_t)(const struct nlmsghdr *nlh,
+ struct ynl_parse_arg *yarg);
+
struct ynl_policy_attr {
enum ynl_policy_type type;
unsigned int len;
@@ -80,8 +94,6 @@ struct ynl_ntf_base_type {
unsigned char data[] __attribute__((aligned(8)));
};
-extern mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE];
-
struct nlmsghdr *
ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
struct nlmsghdr *
@@ -89,30 +101,26 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
-int ynl_recv_ack(struct ynl_sock *ys, int ret);
-int ynl_cb_null(const struct nlmsghdr *nlh, void *data);
-
/* YNL specific helpers used by the auto-generated code */
struct ynl_req_state {
struct ynl_parse_arg yarg;
- mnl_cb_t cb;
+ ynl_parse_cb_t cb;
__u32 rsp_cmd;
};
struct ynl_dump_state {
- struct ynl_sock *ys;
- struct ynl_policy_nest *rsp_policy;
+ struct ynl_parse_arg yarg;
void *first;
struct ynl_dump_list_type *last;
size_t alloc_sz;
- mnl_cb_t cb;
+ ynl_parse_cb_t cb;
__u32 rsp_cmd;
};
struct ynl_ntf_info {
struct ynl_policy_nest *policy;
- mnl_cb_t cb;
+ ynl_parse_cb_t cb;
size_t alloc_sz;
void (*free)(struct ynl_ntf_base_type *ntf);
};
@@ -125,20 +133,325 @@ int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd);
int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg);
-#ifndef MNL_HAS_AUTO_SCALARS
-static inline uint64_t mnl_attr_get_uint(const struct nlattr *attr)
+/* Netlink message handling helpers */
+
+#define YNL_MSG_OVERFLOW 1
+
+static inline struct nlmsghdr *ynl_nlmsg_put_header(void *buf)
+{
+ struct nlmsghdr *nlh = buf;
+
+ memset(nlh, 0, sizeof(*nlh));
+ nlh->nlmsg_len = NLMSG_HDRLEN;
+
+ return nlh;
+}
+
+static inline unsigned int ynl_nlmsg_data_len(const struct nlmsghdr *nlh)
{
- if (mnl_attr_get_payload_len(attr) == 4)
- return mnl_attr_get_u32(attr);
- return mnl_attr_get_u64(attr);
+ return nlh->nlmsg_len - NLMSG_HDRLEN;
+}
+
+static inline void *ynl_nlmsg_data(const struct nlmsghdr *nlh)
+{
+ return (unsigned char *)nlh + NLMSG_HDRLEN;
+}
+
+static inline void *
+ynl_nlmsg_data_offset(const struct nlmsghdr *nlh, unsigned int offset)
+{
+ return (unsigned char *)nlh + NLMSG_HDRLEN + offset;
+}
+
+static inline void *ynl_nlmsg_end_addr(const struct nlmsghdr *nlh)
+{
+ return (char *)nlh + nlh->nlmsg_len;
+}
+
+static inline void *
+ynl_nlmsg_put_extra_header(struct nlmsghdr *nlh, unsigned int size)
+{
+ void *tail = ynl_nlmsg_end_addr(nlh);
+
+ nlh->nlmsg_len += NLMSG_ALIGN(size);
+ return tail;
+}
+
+/* Netlink attribute helpers */
+
+static inline unsigned int ynl_attr_type(const struct nlattr *attr)
+{
+ return attr->nla_type & NLA_TYPE_MASK;
+}
+
+static inline unsigned int ynl_attr_data_len(const struct nlattr *attr)
+{
+ return attr->nla_len - NLA_HDRLEN;
+}
+
+static inline void *ynl_attr_data(const struct nlattr *attr)
+{
+ return (unsigned char *)attr + NLA_HDRLEN;
+}
+
+static inline void *ynl_attr_data_end(const struct nlattr *attr)
+{
+ return ynl_attr_data(attr) + ynl_attr_data_len(attr);
+}
+
+#define ynl_attr_for_each(attr, nlh, fixed_hdr_sz) \
+ for ((attr) = ynl_attr_first(nlh, (nlh)->nlmsg_len, \
+ NLMSG_HDRLEN + fixed_hdr_sz); attr; \
+ (attr) = ynl_attr_next(ynl_nlmsg_end_addr(nlh), attr))
+
+#define ynl_attr_for_each_nested(attr, outer) \
+ for ((attr) = ynl_attr_first(outer, outer->nla_len, \
+ sizeof(struct nlattr)); attr; \
+ (attr) = ynl_attr_next(ynl_attr_data_end(outer), attr))
+
+#define ynl_attr_for_each_payload(start, len, attr) \
+ for ((attr) = ynl_attr_first(start, len, 0); attr; \
+ (attr) = ynl_attr_next(start + len, attr))
+
+static inline struct nlattr *
+ynl_attr_if_good(const void *end, struct nlattr *attr)
+{
+ if (attr + 1 > (const struct nlattr *)end)
+ return NULL;
+ if (ynl_attr_data_end(attr) > end)
+ return NULL;
+ return attr;
+}
+
+static inline struct nlattr *
+ynl_attr_next(const void *end, const struct nlattr *prev)
+{
+ struct nlattr *attr;
+
+ attr = (void *)((char *)prev + NLA_ALIGN(prev->nla_len));
+ return ynl_attr_if_good(end, attr);
+}
+
+static inline struct nlattr *
+ynl_attr_first(const void *start, size_t len, size_t skip)
+{
+ struct nlattr *attr;
+
+ attr = (void *)((char *)start + NLMSG_ALIGN(skip));
+ return ynl_attr_if_good(start + len, attr);
+}
+
+static inline bool
+__ynl_attr_put_overflow(struct nlmsghdr *nlh, size_t size)
+{
+ bool o;
+
+ /* ynl_msg_start() stashed buffer length in nlmsg_pid. */
+ o = nlh->nlmsg_len + NLA_HDRLEN + NLMSG_ALIGN(size) > nlh->nlmsg_pid;
+ if (o)
+ /* YNL_MSG_OVERFLOW is < NLMSG_HDRLEN, all subsequent checks
+ * are guaranteed to fail.
+ */
+ nlh->nlmsg_pid = YNL_MSG_OVERFLOW;
+ return o;
+}
+
+static inline struct nlattr *
+ynl_attr_nest_start(struct nlmsghdr *nlh, unsigned int attr_type)
+{
+ struct nlattr *attr;
+
+ if (__ynl_attr_put_overflow(nlh, 0))
+ return ynl_nlmsg_end_addr(nlh) - NLA_HDRLEN;
+
+ attr = ynl_nlmsg_end_addr(nlh);
+ attr->nla_type = attr_type | NLA_F_NESTED;
+ nlh->nlmsg_len += NLA_HDRLEN;
+
+ return attr;
}
static inline void
-mnl_attr_put_uint(struct nlmsghdr *nlh, uint16_t type, uint64_t data)
+ynl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *attr)
{
- if ((uint32_t)data == (uint64_t)data)
- return mnl_attr_put_u32(nlh, type, data);
- return mnl_attr_put_u64(nlh, type, data);
+ attr->nla_len = (char *)ynl_nlmsg_end_addr(nlh) - (char *)attr;
+}
+
+static inline void
+ynl_attr_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ const void *value, size_t size)
+{
+ struct nlattr *attr;
+
+ if (__ynl_attr_put_overflow(nlh, size))
+ return;
+
+ attr = ynl_nlmsg_end_addr(nlh);
+ attr->nla_type = attr_type;
+ attr->nla_len = NLA_HDRLEN + size;
+
+ memcpy(ynl_attr_data(attr), value, size);
+
+ nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len);
+}
+
+static inline void
+ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str)
+{
+ struct nlattr *attr;
+ size_t len;
+
+ len = strlen(str);
+ if (__ynl_attr_put_overflow(nlh, len))
+ return;
+
+ attr = ynl_nlmsg_end_addr(nlh);
+ attr->nla_type = attr_type;
+
+ strcpy(ynl_attr_data(attr), str);
+ attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len);
+
+ nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len);
+}
+
+static inline const char *ynl_attr_get_str(const struct nlattr *attr)
+{
+ return (const char *)ynl_attr_data(attr);
+}
+
+static inline __s8 ynl_attr_get_s8(const struct nlattr *attr)
+{
+ return *(__s8 *)ynl_attr_data(attr);
+}
+
+static inline __s16 ynl_attr_get_s16(const struct nlattr *attr)
+{
+ return *(__s16 *)ynl_attr_data(attr);
+}
+
+static inline __s32 ynl_attr_get_s32(const struct nlattr *attr)
+{
+ return *(__s32 *)ynl_attr_data(attr);
+}
+
+static inline __s64 ynl_attr_get_s64(const struct nlattr *attr)
+{
+ __s64 tmp;
+
+ memcpy(&tmp, (unsigned char *)(attr + 1), sizeof(tmp));
+ return tmp;
+}
+
+static inline __u8 ynl_attr_get_u8(const struct nlattr *attr)
+{
+ return *(__u8 *)ynl_attr_data(attr);
+}
+
+static inline __u16 ynl_attr_get_u16(const struct nlattr *attr)
+{
+ return *(__u16 *)ynl_attr_data(attr);
+}
+
+static inline __u32 ynl_attr_get_u32(const struct nlattr *attr)
+{
+ return *(__u32 *)ynl_attr_data(attr);
+}
+
+static inline __u64 ynl_attr_get_u64(const struct nlattr *attr)
+{
+ __u64 tmp;
+
+ memcpy(&tmp, (unsigned char *)(attr + 1), sizeof(tmp));
+ return tmp;
+}
+
+static inline void
+ynl_attr_put_s8(struct nlmsghdr *nlh, unsigned int attr_type, __s8 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_s16(struct nlmsghdr *nlh, unsigned int attr_type, __s16 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_s32(struct nlmsghdr *nlh, unsigned int attr_type, __s32 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_s64(struct nlmsghdr *nlh, unsigned int attr_type, __s64 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_u8(struct nlmsghdr *nlh, unsigned int attr_type, __u8 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_u16(struct nlmsghdr *nlh, unsigned int attr_type, __u16 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_u32(struct nlmsghdr *nlh, unsigned int attr_type, __u32 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_u64(struct nlmsghdr *nlh, unsigned int attr_type, __u64 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline __u64 ynl_attr_get_uint(const struct nlattr *attr)
+{
+ switch (ynl_attr_data_len(attr)) {
+ case 4:
+ return ynl_attr_get_u32(attr);
+ case 8:
+ return ynl_attr_get_u64(attr);
+ default:
+ return 0;
+ }
+}
+
+static inline __s64 ynl_attr_get_sint(const struct nlattr *attr)
+{
+ switch (ynl_attr_data_len(attr)) {
+ case 4:
+ return ynl_attr_get_s32(attr);
+ case 8:
+ return ynl_attr_get_s64(attr);
+ default:
+ return 0;
+ }
+}
+
+static inline void
+ynl_attr_put_uint(struct nlmsghdr *nlh, __u16 type, __u64 data)
+{
+ if ((__u32)data == (__u64)data)
+ ynl_attr_put_u32(nlh, type, data);
+ else
+ ynl_attr_put_u64(nlh, type, data);
+}
+
+static inline void
+ynl_attr_put_sint(struct nlmsghdr *nlh, __u16 type, __s64 data)
+{
+ if ((__s32)data == (__s64)data)
+ ynl_attr_put_s32(nlh, type, data);
+ else
+ ynl_attr_put_s64(nlh, type, data);
}
-#endif
#endif
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index c82a7f41b31c..4b9c091fc86b 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -3,10 +3,11 @@
#include <poll.h>
#include <string.h>
#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
#include <linux/types.h>
-
-#include <libmnl/libmnl.h>
#include <linux/genetlink.h>
+#include <sys/socket.h>
#include "ynl.h"
@@ -92,9 +93,9 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
data_len = end - start;
- mnl_attr_for_each_payload(start, data_len) {
+ ynl_attr_for_each_payload(start, data_len, attr) {
astart_off = (char *)attr - (char *)start;
- aend_off = astart_off + mnl_attr_get_payload_len(attr);
+ aend_off = astart_off + ynl_attr_data_len(attr);
if (aend_off <= off)
continue;
@@ -106,7 +107,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
off -= astart_off;
- type = mnl_attr_get_type(attr);
+ type = ynl_attr_type(attr);
if (ynl_err_walk_report_one(policy, type, str, str_sz, &n))
return n;
@@ -124,8 +125,8 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
}
off -= sizeof(struct nlattr);
- start = mnl_attr_get_payload(attr);
- end = start + mnl_attr_get_payload_len(attr);
+ start = ynl_attr_data(attr);
+ end = start + ynl_attr_data_len(attr);
return n + ynl_err_walk(ys, start, end, off, policy->table[type].nest,
&str[n], str_sz - n, nest_pol);
@@ -147,14 +148,14 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) {
yerr_msg(ys, "%s", strerror(ys->err.code));
- return MNL_CB_OK;
+ return YNL_PARSE_CB_OK;
}
- mnl_attr_for_each(attr, nlh, hlen) {
+ ynl_attr_for_each(attr, nlh, hlen) {
unsigned int len, type;
- len = mnl_attr_get_payload_len(attr);
- type = mnl_attr_get_type(attr);
+ len = ynl_attr_data_len(attr);
+ type = ynl_attr_type(attr);
if (type > NLMSGERR_ATTR_MAX)
continue;
@@ -166,12 +167,12 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
case NLMSGERR_ATTR_MISS_TYPE:
case NLMSGERR_ATTR_MISS_NEST:
if (len != sizeof(__u32))
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
break;
case NLMSGERR_ATTR_MSG:
- str = mnl_attr_get_payload(attr);
+ str = ynl_attr_get_str(attr);
if (str[len - 1])
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
break;
default:
break;
@@ -185,14 +186,13 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
unsigned int n, off;
void *start, *end;
- ys->err.attr_offs = mnl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]);
+ ys->err.attr_offs = ynl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]);
n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ",
str ? " (" : "");
- start = mnl_nlmsg_get_payload_offset(ys->nlh,
- ys->family->hdr_len);
- end = mnl_nlmsg_get_payload_tail(ys->nlh);
+ start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+ end = ynl_nlmsg_end_addr(ys->nlh);
off = ys->err.attr_offs;
off -= sizeof(struct nlmsghdr);
@@ -211,18 +211,17 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
void *start, *end;
int n2;
- type = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_TYPE]);
+ type = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_TYPE]);
n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ",
bad_attr[0] ? ", " : (str ? " (" : ""));
- start = mnl_nlmsg_get_payload_offset(ys->nlh,
- ys->family->hdr_len);
- end = mnl_nlmsg_get_payload_tail(ys->nlh);
+ start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+ end = ynl_nlmsg_end_addr(ys->nlh);
nest_pol = ys->req_policy;
if (tb[NLMSGERR_ATTR_MISS_NEST]) {
- off = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
+ off = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
off -= sizeof(struct nlmsghdr);
off -= ys->family->hdr_len;
@@ -254,13 +253,13 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
else
yerr_msg(ys, "%s", strerror(ys->err.code));
- return MNL_CB_OK;
+ return YNL_PARSE_CB_OK;
}
-static int ynl_cb_error(const struct nlmsghdr *nlh, void *data)
+static int
+ynl_cb_error(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh);
- struct ynl_parse_arg *yarg = data;
+ const struct nlmsgerr *err = ynl_nlmsg_data(nlh);
unsigned int hlen;
int code;
@@ -270,16 +269,15 @@ static int ynl_cb_error(const struct nlmsghdr *nlh, void *data)
hlen = sizeof(*err);
if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
- hlen += mnl_nlmsg_get_payload_len(&err->msg);
+ hlen += ynl_nlmsg_data_len(&err->msg);
ynl_ext_ack_check(yarg->ys, nlh, hlen);
- return code ? MNL_CB_ERROR : MNL_CB_STOP;
+ return code ? YNL_PARSE_CB_ERROR : YNL_PARSE_CB_STOP;
}
-static int ynl_cb_done(const struct nlmsghdr *nlh, void *data)
+static int ynl_cb_done(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- struct ynl_parse_arg *yarg = data;
int err;
err = *(int *)NLMSG_DATA(nlh);
@@ -289,23 +287,11 @@ static int ynl_cb_done(const struct nlmsghdr *nlh, void *data)
ynl_ext_ack_check(yarg->ys, nlh, sizeof(int));
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
}
- return MNL_CB_STOP;
-}
-
-static int ynl_cb_noop(const struct nlmsghdr *nlh, void *data)
-{
- return MNL_CB_OK;
+ return YNL_PARSE_CB_STOP;
}
-mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE] = {
- [NLMSG_NOOP] = ynl_cb_noop,
- [NLMSG_ERROR] = ynl_cb_error,
- [NLMSG_DONE] = ynl_cb_done,
- [NLMSG_OVERRUN] = ynl_cb_noop,
-};
-
/* Attribute validation */
int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
@@ -314,9 +300,9 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
unsigned int type, len;
unsigned char *data;
- data = mnl_attr_get_payload(attr);
- len = mnl_attr_get_payload_len(attr);
- type = mnl_attr_get_type(attr);
+ data = ynl_attr_data(attr);
+ len = ynl_attr_data_len(attr);
+ type = ynl_attr_type(attr);
if (type > yarg->rsp_policy->max_attr) {
yerr(yarg->ys, YNL_ERROR_INTERNAL,
"Internal error, validating unknown attribute");
@@ -413,14 +399,38 @@ struct nlmsghdr *ynl_msg_start(struct ynl_sock *ys, __u32 id, __u16 flags)
ynl_err_reset(ys);
- nlh = ys->nlh = mnl_nlmsg_put_header(ys->tx_buf);
+ nlh = ys->nlh = ynl_nlmsg_put_header(ys->tx_buf);
nlh->nlmsg_type = id;
nlh->nlmsg_flags = flags;
nlh->nlmsg_seq = ++ys->seq;
+ /* This is a local YNL hack for length checking, we put the buffer
+ * length in nlmsg_pid, since messages sent to the kernel always use
+ * PID 0. Message needs to be terminated with ynl_msg_end().
+ */
+ nlh->nlmsg_pid = YNL_SOCKET_BUFFER_SIZE;
+
return nlh;
}
+static int ynl_msg_end(struct ynl_sock *ys, struct nlmsghdr *nlh)
+{
+ /* We stash buffer length in nlmsg_pid. */
+ if (nlh->nlmsg_pid == 0) {
+ yerr(ys, YNL_ERROR_INPUT_INVALID,
+ "Unknown input buffer length");
+ return -EINVAL;
+ }
+ if (nlh->nlmsg_pid == YNL_MSG_OVERFLOW) {
+ yerr(ys, YNL_ERROR_INPUT_TOO_BIG,
+ "Constructed message longer than internal buffer");
+ return -EMSGSIZE;
+ }
+
+ nlh->nlmsg_pid = 0;
+ return 0;
+}
+
struct nlmsghdr *
ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
__u8 cmd, __u8 version)
@@ -435,7 +445,7 @@ ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
gehdr.cmd = cmd;
gehdr.version = version;
- data = mnl_nlmsg_put_extra_header(nlh, sizeof(gehdr));
+ data = ynl_nlmsg_put_extra_header(nlh, sizeof(gehdr));
memcpy(data, &gehdr, sizeof(gehdr));
return nlh;
@@ -464,31 +474,85 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version)
cmd, version);
}
-int ynl_recv_ack(struct ynl_sock *ys, int ret)
+static int ynl_cb_null(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- if (!ret) {
- yerr(ys, YNL_ERROR_EXPECT_ACK,
- "Expecting an ACK but nothing received");
- return -1;
+ yerr(yarg->ys, YNL_ERROR_UNEXPECT_MSG,
+ "Received a message when none were expected");
+
+ return YNL_PARSE_CB_ERROR;
+}
+
+static int
+__ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb, int flags)
+{
+ struct ynl_sock *ys = yarg->ys;
+ const struct nlmsghdr *nlh;
+ ssize_t len, rem;
+ int ret;
+
+ len = recv(ys->socket, ys->rx_buf, YNL_SOCKET_BUFFER_SIZE, flags);
+ if (len < 0) {
+ if (flags & MSG_DONTWAIT && errno == EAGAIN)
+ return YNL_PARSE_CB_STOP;
+ return len;
}
- ret = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);
- if (ret < 0) {
- perr(ys, "Socket receive failed");
- return ret;
+ ret = YNL_PARSE_CB_STOP;
+ for (rem = len; rem > 0; NLMSG_NEXT(nlh, rem)) {
+ nlh = (struct nlmsghdr *)&ys->rx_buf[len - rem];
+ if (!NLMSG_OK(nlh, rem)) {
+ yerr(yarg->ys, YNL_ERROR_INV_RESP,
+ "Invalid message or trailing data in the response.");
+ return YNL_PARSE_CB_ERROR;
+ }
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP_INTR) {
+ /* TODO: handle this better */
+ yerr(yarg->ys, YNL_ERROR_DUMP_INTER,
+ "Dump interrupted / inconsistent, please retry.");
+ return YNL_PARSE_CB_ERROR;
+ }
+
+ switch (nlh->nlmsg_type) {
+ case 0:
+ yerr(yarg->ys, YNL_ERROR_INV_RESP,
+ "Invalid message type in the response.");
+ return YNL_PARSE_CB_ERROR;
+ case NLMSG_NOOP:
+ case NLMSG_OVERRUN ... NLMSG_MIN_TYPE - 1:
+ ret = YNL_PARSE_CB_OK;
+ break;
+ case NLMSG_ERROR:
+ ret = ynl_cb_error(nlh, yarg);
+ break;
+ case NLMSG_DONE:
+ ret = ynl_cb_done(nlh, yarg);
+ break;
+ default:
+ ret = cb(nlh, yarg);
+ break;
+ }
}
- return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid,
- ynl_cb_null, ys);
+
+ return ret;
}
-int ynl_cb_null(const struct nlmsghdr *nlh, void *data)
+static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb)
{
- struct ynl_parse_arg *yarg = data;
+ return __ynl_sock_read_msgs(yarg, cb, 0);
+}
- yerr(yarg->ys, YNL_ERROR_UNEXPECT_MSG,
- "Received a message when none were expected");
+static int ynl_recv_ack(struct ynl_sock *ys, int ret)
+{
+ struct ynl_parse_arg yarg = { .ys = ys, };
+
+ if (!ret) {
+ yerr(ys, YNL_ERROR_EXPECT_ACK,
+ "Expecting an ACK but nothing received");
+ return -1;
+ }
- return MNL_CB_ERROR;
+ return ynl_sock_read_msgs(&yarg, ynl_cb_null);
}
/* Init/fini and genetlink boiler plate */
@@ -498,7 +562,7 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts)
const struct nlattr *entry, *attr;
unsigned int i;
- mnl_attr_for_each_nested(attr, mcasts)
+ ynl_attr_for_each_nested(attr, mcasts)
ys->n_mcast_groups++;
if (!ys->n_mcast_groups)
@@ -507,54 +571,55 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts)
ys->mcast_groups = calloc(ys->n_mcast_groups,
sizeof(*ys->mcast_groups));
if (!ys->mcast_groups)
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
i = 0;
- mnl_attr_for_each_nested(entry, mcasts) {
- mnl_attr_for_each_nested(attr, entry) {
- if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_ID)
- ys->mcast_groups[i].id = mnl_attr_get_u32(attr);
- if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_NAME) {
+ ynl_attr_for_each_nested(entry, mcasts) {
+ ynl_attr_for_each_nested(attr, entry) {
+ if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GRP_ID)
+ ys->mcast_groups[i].id = ynl_attr_get_u32(attr);
+ if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GRP_NAME) {
strncpy(ys->mcast_groups[i].name,
- mnl_attr_get_str(attr),
+ ynl_attr_get_str(attr),
GENL_NAMSIZ - 1);
ys->mcast_groups[i].name[GENL_NAMSIZ - 1] = 0;
}
}
+ i++;
}
return 0;
}
-static int ynl_get_family_info_cb(const struct nlmsghdr *nlh, void *data)
+static int
+ynl_get_family_info_cb(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- struct ynl_parse_arg *yarg = data;
struct ynl_sock *ys = yarg->ys;
const struct nlattr *attr;
bool found_id = true;
- mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
- if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GROUPS)
+ ynl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GROUPS)
if (ynl_get_family_info_mcast(ys, attr))
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
- if (mnl_attr_get_type(attr) != CTRL_ATTR_FAMILY_ID)
+ if (ynl_attr_type(attr) != CTRL_ATTR_FAMILY_ID)
continue;
- if (mnl_attr_get_payload_len(attr) != sizeof(__u16)) {
+ if (ynl_attr_data_len(attr) != sizeof(__u16)) {
yerr(ys, YNL_ERROR_ATTR_INVALID, "Invalid family ID");
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
}
- ys->family_id = mnl_attr_get_u16(attr);
+ ys->family_id = ynl_attr_get_u16(attr);
found_id = true;
}
if (!found_id) {
yerr(ys, YNL_ERROR_ATTR_MISSING, "Family ID missing");
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
}
- return MNL_CB_OK;
+ return YNL_PARSE_CB_OK;
}
static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name)
@@ -564,66 +629,85 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name)
int err;
nlh = ynl_gemsg_start_req(ys, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 1);
- mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name);
+ ynl_attr_put_str(nlh, CTRL_ATTR_FAMILY_NAME, family_name);
- err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);
+ err = ynl_msg_end(ys, nlh);
+ if (err < 0)
+ return err;
+
+ err = send(ys->socket, nlh, nlh->nlmsg_len, 0);
if (err < 0) {
perr(ys, "failed to request socket family info");
return err;
}
- err = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);
- if (err <= 0) {
- perr(ys, "failed to receive the socket family info");
+ err = ynl_sock_read_msgs(&yarg, ynl_get_family_info_cb);
+ if (err < 0) {
+ free(ys->mcast_groups);
+ perr(ys, "failed to receive the socket family info - no such family?");
return err;
}
- err = mnl_cb_run2(ys->rx_buf, err, ys->seq, ys->portid,
- ynl_get_family_info_cb, &yarg,
- ynl_cb_array, ARRAY_SIZE(ynl_cb_array));
+
+ err = ynl_recv_ack(ys, err);
if (err < 0) {
free(ys->mcast_groups);
- perr(ys, "failed to receive the socket family info - no such family?");
return err;
}
- return ynl_recv_ack(ys, err);
+ return 0;
}
struct ynl_sock *
ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
{
+ struct sockaddr_nl addr;
struct ynl_sock *ys;
+ socklen_t addrlen;
int one = 1;
- ys = malloc(sizeof(*ys) + 2 * MNL_SOCKET_BUFFER_SIZE);
+ ys = malloc(sizeof(*ys) + 2 * YNL_SOCKET_BUFFER_SIZE);
if (!ys)
return NULL;
memset(ys, 0, sizeof(*ys));
ys->family = yf;
ys->tx_buf = &ys->raw_buf[0];
- ys->rx_buf = &ys->raw_buf[MNL_SOCKET_BUFFER_SIZE];
+ ys->rx_buf = &ys->raw_buf[YNL_SOCKET_BUFFER_SIZE];
ys->ntf_last_next = &ys->ntf_first;
- ys->sock = mnl_socket_open(NETLINK_GENERIC);
- if (!ys->sock) {
+ ys->socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (ys->socket < 0) {
__perr(yse, "failed to create a netlink socket");
goto err_free_sock;
}
- if (mnl_socket_setsockopt(ys->sock, NETLINK_CAP_ACK,
- &one, sizeof(one))) {
+ if (setsockopt(ys->socket, SOL_NETLINK, NETLINK_CAP_ACK,
+ &one, sizeof(one))) {
__perr(yse, "failed to enable netlink ACK");
goto err_close_sock;
}
- if (mnl_socket_setsockopt(ys->sock, NETLINK_EXT_ACK,
- &one, sizeof(one))) {
+ if (setsockopt(ys->socket, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one))) {
__perr(yse, "failed to enable netlink ext ACK");
goto err_close_sock;
}
+ memset(&addr, 0, sizeof(addr));
+ addr.nl_family = AF_NETLINK;
+ if (bind(ys->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ __perr(yse, "unable to bind to a socket address");
+ goto err_close_sock;;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addrlen = sizeof(addr);
+ if (getsockname(ys->socket, (struct sockaddr *)&addr, &addrlen) < 0) {
+ __perr(yse, "unable to read socket address");
+ goto err_close_sock;;
+ }
+ ys->portid = addr.nl_pid;
ys->seq = random();
- ys->portid = mnl_socket_get_portid(ys->sock);
+
if (ynl_sock_read_family(ys, yf->name)) {
if (yse)
@@ -634,7 +718,7 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
return ys;
err_close_sock:
- mnl_socket_close(ys->sock);
+ close(ys->socket);
err_free_sock:
free(ys);
return NULL;
@@ -644,7 +728,7 @@ void ynl_sock_destroy(struct ynl_sock *ys)
{
struct ynl_ntf_base_type *ntf;
- mnl_socket_close(ys->sock);
+ close(ys->socket);
while ((ntf = ynl_ntf_dequeue(ys)))
ynl_ntf_free(ntf);
free(ys->mcast_groups);
@@ -671,9 +755,9 @@ int ynl_subscribe(struct ynl_sock *ys, const char *grp_name)
return -1;
}
- err = mnl_socket_setsockopt(ys->sock, NETLINK_ADD_MEMBERSHIP,
- &ys->mcast_groups[i].id,
- sizeof(ys->mcast_groups[i].id));
+ err = setsockopt(ys->socket, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+ &ys->mcast_groups[i].id,
+ sizeof(ys->mcast_groups[i].id));
if (err < 0) {
perr(ys, "Subscribing to multicast group failed");
return -1;
@@ -684,7 +768,7 @@ int ynl_subscribe(struct ynl_sock *ys, const char *grp_name)
int ynl_socket_get_fd(struct ynl_sock *ys)
{
- return mnl_socket_get_fd(ys->sock);
+ return ys->socket;
}
struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys)
@@ -710,12 +794,12 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
struct genlmsghdr *gehdr;
int ret;
- gehdr = mnl_nlmsg_get_payload(nlh);
+ gehdr = ynl_nlmsg_data(nlh);
if (gehdr->cmd >= ys->family->ntf_info_size)
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
info = &ys->family->ntf_info[gehdr->cmd];
if (!info->cb)
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
rsp = calloc(1, info->alloc_sz);
rsp->free = info->free;
@@ -723,7 +807,7 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
yarg.rsp_policy = info->policy;
ret = info->cb(nlh, &yarg);
- if (ret <= MNL_CB_STOP)
+ if (ret <= YNL_PARSE_CB_STOP)
goto err_free;
rsp->family = nlh->nlmsg_type;
@@ -732,43 +816,27 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
*ys->ntf_last_next = rsp;
ys->ntf_last_next = &rsp->next;
- return MNL_CB_OK;
+ return YNL_PARSE_CB_OK;
err_free:
info->free(rsp);
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
}
-static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data)
+static int
+ynl_ntf_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- return ynl_ntf_parse((struct ynl_sock *)data, nlh);
+ return ynl_ntf_parse(yarg->ys, nlh);
}
int ynl_ntf_check(struct ynl_sock *ys)
{
- ssize_t len;
+ struct ynl_parse_arg yarg = { .ys = ys, };
int err;
do {
- /* libmnl doesn't let us pass flags to the recv to make
- * it non-blocking so we need to poll() or peek() :|
- */
- struct pollfd pfd = { };
-
- pfd.fd = mnl_socket_get_fd(ys->sock);
- pfd.events = POLLIN;
- err = poll(&pfd, 1, 1);
- if (err < 1)
- return err;
-
- len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
- MNL_SOCKET_BUFFER_SIZE);
- if (len < 0)
- return len;
-
- err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
- ynl_ntf_trampoline, ys,
- ynl_cb_array, NLMSG_MIN_TYPE);
+ err = __ynl_sock_read_msgs(&yarg, ynl_ntf_trampoline,
+ MSG_DONTWAIT);
if (err < 0)
return err;
} while (err > 0);
@@ -789,7 +857,7 @@ void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd)
int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg)
{
yerr(yarg->ys, YNL_ERROR_INV_RESP, "Error parsing response: %s", msg);
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
}
static int
@@ -797,27 +865,28 @@ ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd)
{
struct genlmsghdr *gehdr;
- if (mnl_nlmsg_get_payload_len(nlh) < sizeof(*gehdr)) {
+ if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
yerr(ys, YNL_ERROR_INV_RESP,
"Kernel responded with truncated message");
return -1;
}
- gehdr = mnl_nlmsg_get_payload(nlh);
+ gehdr = ynl_nlmsg_data(nlh);
if (gehdr->cmd != rsp_cmd)
return ynl_ntf_parse(ys, nlh);
return 0;
}
-static int ynl_req_trampoline(const struct nlmsghdr *nlh, void *data)
+static
+int ynl_req_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- struct ynl_req_state *yrs = data;
+ struct ynl_req_state *yrs = (void *)yarg;
int ret;
ret = ynl_check_alien(yrs->yarg.ys, nlh, yrs->rsp_cmd);
if (ret)
- return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK;
+ return ret < 0 ? YNL_PARSE_CB_ERROR : YNL_PARSE_CB_OK;
return yrs->cb(nlh, &yrs->yarg);
}
@@ -825,43 +894,38 @@ static int ynl_req_trampoline(const struct nlmsghdr *nlh, void *data)
int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
struct ynl_req_state *yrs)
{
- ssize_t len;
int err;
- err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len);
+ err = ynl_msg_end(ys, req_nlh);
+ if (err < 0)
+ return err;
+
+ err = send(ys->socket, req_nlh, req_nlh->nlmsg_len, 0);
if (err < 0)
return err;
do {
- len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
- MNL_SOCKET_BUFFER_SIZE);
- if (len < 0)
- return len;
-
- err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
- ynl_req_trampoline, yrs,
- ynl_cb_array, NLMSG_MIN_TYPE);
- if (err < 0)
- return err;
+ err = ynl_sock_read_msgs(&yrs->yarg, ynl_req_trampoline);
} while (err > 0);
- return 0;
+ return err;
}
-static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data)
+static int
+ynl_dump_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *data)
{
- struct ynl_dump_state *ds = data;
+ struct ynl_dump_state *ds = (void *)data;
struct ynl_dump_list_type *obj;
struct ynl_parse_arg yarg = {};
int ret;
- ret = ynl_check_alien(ds->ys, nlh, ds->rsp_cmd);
+ ret = ynl_check_alien(ds->yarg.ys, nlh, ds->rsp_cmd);
if (ret)
- return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK;
+ return ret < 0 ? YNL_PARSE_CB_ERROR : YNL_PARSE_CB_OK;
obj = calloc(1, ds->alloc_sz);
if (!obj)
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
if (!ds->first)
ds->first = obj;
@@ -869,8 +933,7 @@ static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data)
ds->last->next = obj;
ds->last = obj;
- yarg.ys = ds->ys;
- yarg.rsp_policy = ds->rsp_policy;
+ yarg = ds->yarg;
yarg.data = &obj->data;
return ds->cb(nlh, &yarg);
@@ -888,22 +951,18 @@ static void *ynl_dump_end(struct ynl_dump_state *ds)
int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
struct ynl_dump_state *yds)
{
- ssize_t len;
int err;
- err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len);
+ err = ynl_msg_end(ys, req_nlh);
if (err < 0)
return err;
- do {
- len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
- MNL_SOCKET_BUFFER_SIZE);
- if (len < 0)
- goto err_close_list;
+ err = send(ys->socket, req_nlh, req_nlh->nlmsg_len, 0);
+ if (err < 0)
+ return err;
- err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
- ynl_dump_trampoline, yds,
- ynl_cb_array, NLMSG_MIN_TYPE);
+ do {
+ err = ynl_sock_read_msgs(&yds->yarg, ynl_dump_trampoline);
if (err < 0)
goto err_close_list;
} while (err > 0);
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
index ce77a6d76ce0..9842e85a8c57 100644
--- a/tools/net/ynl/lib/ynl.h
+++ b/tools/net/ynl/lib/ynl.h
@@ -12,6 +12,7 @@ enum ynl_error_code {
YNL_ERROR_NONE = 0,
__YNL_ERRNO_END = 4096,
YNL_ERROR_INTERNAL,
+ YNL_ERROR_DUMP_INTER,
YNL_ERROR_EXPECT_ACK,
YNL_ERROR_EXPECT_MSG,
YNL_ERROR_UNEXPECT_MSG,
@@ -19,6 +20,8 @@ enum ynl_error_code {
YNL_ERROR_ATTR_INVALID,
YNL_ERROR_UNKNOWN_NTF,
YNL_ERROR_INV_RESP,
+ YNL_ERROR_INPUT_INVALID,
+ YNL_ERROR_INPUT_TOO_BIG,
};
/**
@@ -58,7 +61,7 @@ struct ynl_sock {
/* private: */
const struct ynl_family *family;
- struct mnl_socket *sock;
+ int socket;
__u32 seq;
__u32 portid;
__u16 family_id;
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 1e10512b2117..5fa7957f6e0f 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -7,6 +7,7 @@ import random
import socket
import struct
from struct import Struct
+import sys
import yaml
import ipaddress
import uuid
@@ -84,6 +85,10 @@ class NlError(Exception):
return f"Netlink error: {os.strerror(-self.nl_msg.error)}\n{self.nl_msg}"
+class ConfigError(Exception):
+ pass
+
+
class NlAttr:
ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little'])
type_formats = {
@@ -113,20 +118,6 @@ class NlAttr:
else format.little
return format.native
- @classmethod
- def formatted_string(cls, raw, display_hint):
- if display_hint == 'mac':
- formatted = ':'.join('%02x' % b for b in raw)
- elif display_hint == 'hex':
- formatted = bytes.hex(raw, ' ')
- elif display_hint in [ 'ipv4', 'ipv6' ]:
- formatted = format(ipaddress.ip_address(raw))
- elif display_hint == 'uuid':
- formatted = str(uuid.UUID(bytes=raw))
- else:
- formatted = raw
- return formatted
-
def as_scalar(self, attr_type, byte_order=None):
format = self.get_format(attr_type, byte_order)
return format.unpack(self.raw)[0]
@@ -148,23 +139,6 @@ class NlAttr:
format = self.get_format(type)
return [ x[0] for x in format.iter_unpack(self.raw) ]
- def as_struct(self, members):
- value = dict()
- offset = 0
- for m in members:
- # TODO: handle non-scalar members
- if m.type == 'binary':
- decoded = self.raw[offset : offset + m['len']]
- offset += m['len']
- elif m.type in NlAttr.type_formats:
- format = self.get_format(m.type, m.byte_order)
- [ decoded ] = format.unpack_from(self.raw, offset)
- offset += format.size
- if m.display_hint:
- decoded = self.formatted_string(decoded, m.display_hint)
- value[m.name] = decoded
- return value
-
def __repr__(self):
return f"[type:{self.type} len:{self._len}] {self.raw}"
@@ -244,11 +218,11 @@ class NlMsg:
return self.nl_type
def __repr__(self):
- msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}\n"
+ msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}"
if self.error:
- msg += '\terror: ' + str(self.error)
+ msg += '\n\terror: ' + str(self.error)
if self.extack:
- msg += '\textack: ' + repr(self.extack)
+ msg += '\n\textack: ' + repr(self.extack)
return msg
@@ -370,7 +344,7 @@ class NetlinkProtocol:
fixed_header_size = 0
if ynl:
op = ynl.rsp_by_value[msg.cmd()]
- fixed_header_size = ynl._fixed_header_size(op.fixed_header)
+ fixed_header_size = ynl._struct_size(op.fixed_header)
msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size)
return msg
@@ -379,6 +353,9 @@ class NetlinkProtocol:
raise Exception(f'Multicast group "{mcast_name}" not present in the spec')
return mcast_groups[mcast_name].value
+ def msghdr_size(self):
+ return 16
+
class GenlProtocol(NetlinkProtocol):
def __init__(self, family_name):
@@ -404,6 +381,28 @@ class GenlProtocol(NetlinkProtocol):
raise Exception(f'Multicast group "{mcast_name}" not present in the family')
return self.genl_family['mcast'][mcast_name]
+ def msghdr_size(self):
+ return super().msghdr_size() + 4
+
+
+class SpaceAttrs:
+ SpecValuesPair = namedtuple('SpecValuesPair', ['spec', 'values'])
+
+ def __init__(self, attr_space, attrs, outer = None):
+ outer_scopes = outer.scopes if outer else []
+ inner_scope = self.SpecValuesPair(attr_space, attrs)
+ self.scopes = [inner_scope] + outer_scopes
+
+ def lookup(self, name):
+ for scope in self.scopes:
+ if name in scope.spec:
+ if name in scope.values:
+ return scope.values[name]
+ spec_name = scope.spec.yaml['name']
+ raise Exception(
+ f"No value for '{name}' in attribute space '{spec_name}'")
+ raise Exception(f"Attribute '{name}' not defined in any attribute-set")
+
#
# YNL implementation details.
@@ -411,7 +410,8 @@ class GenlProtocol(NetlinkProtocol):
class YnlFamily(SpecFamily):
- def __init__(self, def_path, schema=None, process_unknown=False):
+ def __init__(self, def_path, schema=None, process_unknown=False,
+ recv_size=0):
super().__init__(def_path, schema)
self.include_raw = False
@@ -426,6 +426,17 @@ class YnlFamily(SpecFamily):
except KeyError:
raise Exception(f"Family '{self.yaml['name']}' not supported by the kernel")
+ self._recv_dbg = False
+ # Note that netlink will use conservative (min) message size for
+ # the first dump recv() on the socket, our setting will only matter
+ # from the second recv() on.
+ self._recv_size = recv_size if recv_size else 131072
+ # Netlink will always allocate at least PAGE_SIZE - sizeof(skb_shinfo)
+ # for a message, so smaller receive sizes will lead to truncation.
+ # Note that the min size for other families may be larger than 4k!
+ if self._recv_size < 4000:
+ raise ConfigError()
+
self.sock = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, self.nlproto.proto_num)
self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1)
self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_EXT_ACK, 1)
@@ -449,18 +460,61 @@ class YnlFamily(SpecFamily):
self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP,
mcast_id)
- def _add_attr(self, space, name, value):
+ def set_recv_dbg(self, enabled):
+ self._recv_dbg = enabled
+
+ def _recv_dbg_print(self, reply, nl_msgs):
+ if not self._recv_dbg:
+ return
+ print("Recv: read", len(reply), "bytes,",
+ len(nl_msgs.msgs), "messages", file=sys.stderr)
+ for nl_msg in nl_msgs:
+ print(" ", nl_msg, file=sys.stderr)
+
+ def _encode_enum(self, attr_spec, value):
+ enum = self.consts[attr_spec['enum']]
+ if enum.type == 'flags' or attr_spec.get('enum-as-flags', False):
+ scalar = 0
+ if isinstance(value, str):
+ value = [value]
+ for single_value in value:
+ scalar += enum.entries[single_value].user_value(as_flags = True)
+ return scalar
+ else:
+ return enum.entries[value].user_value()
+
+ def _get_scalar(self, attr_spec, value):
+ try:
+ return int(value)
+ except (ValueError, TypeError) as e:
+ if 'enum' not in attr_spec:
+ raise e
+ return self._encode_enum(attr_spec, value)
+
+ def _add_attr(self, space, name, value, search_attrs):
try:
attr = self.attr_sets[space][name]
except KeyError:
raise Exception(f"Space '{space}' has no attribute '{name}'")
nl_type = attr.value
+
+ if attr.is_multi and isinstance(value, list):
+ attr_payload = b''
+ for subvalue in value:
+ attr_payload += self._add_attr(space, name, subvalue, search_attrs)
+ return attr_payload
+
if attr["type"] == 'nest':
nl_type |= Netlink.NLA_F_NESTED
attr_payload = b''
+ sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs)
for subname, subvalue in value.items():
- attr_payload += self._add_attr(attr['nested-attributes'], subname, subvalue)
+ attr_payload += self._add_attr(attr['nested-attributes'],
+ subname, subvalue, sub_attrs)
elif attr["type"] == 'flag':
+ if not value:
+ # If value is absent or false then skip attribute creation.
+ return b''
attr_payload = b''
elif attr["type"] == 'string':
attr_payload = str(value).encode('ascii') + b'\x00'
@@ -469,18 +523,36 @@ class YnlFamily(SpecFamily):
attr_payload = value
elif isinstance(value, str):
attr_payload = bytes.fromhex(value)
+ elif isinstance(value, dict) and attr.struct_name:
+ attr_payload = self._encode_struct(attr.struct_name, value)
else:
raise Exception(f'Unknown type for binary attribute, value: {value}')
- elif attr.is_auto_scalar:
- scalar = int(value)
- real_type = attr["type"][0] + ('32' if scalar.bit_length() <= 32 else '64')
- format = NlAttr.get_format(real_type, attr.byte_order)
- attr_payload = format.pack(int(value))
- elif attr['type'] in NlAttr.type_formats:
- format = NlAttr.get_format(attr['type'], attr.byte_order)
- attr_payload = format.pack(int(value))
+ elif attr['type'] in NlAttr.type_formats or attr.is_auto_scalar:
+ scalar = self._get_scalar(attr, value)
+ if attr.is_auto_scalar:
+ attr_type = attr["type"][0] + ('32' if scalar.bit_length() <= 32 else '64')
+ else:
+ attr_type = attr["type"]
+ format = NlAttr.get_format(attr_type, attr.byte_order)
+ attr_payload = format.pack(scalar)
elif attr['type'] in "bitfield32":
- attr_payload = struct.pack("II", int(value["value"]), int(value["selector"]))
+ scalar_value = self._get_scalar(attr, value["value"])
+ scalar_selector = self._get_scalar(attr, value["selector"])
+ attr_payload = struct.pack("II", scalar_value, scalar_selector)
+ elif attr['type'] == 'sub-message':
+ msg_format = self._resolve_selector(attr, search_attrs)
+ attr_payload = b''
+ if msg_format.fixed_header:
+ attr_payload += self._encode_struct(msg_format.fixed_header, value)
+ if msg_format.attr_set:
+ if msg_format.attr_set in self.attr_sets:
+ nl_type |= Netlink.NLA_F_NESTED
+ sub_attrs = SpaceAttrs(msg_format.attr_set, value, search_attrs)
+ for subname, subvalue in value.items():
+ attr_payload += self._add_attr(msg_format.attr_set,
+ subname, subvalue, sub_attrs)
+ else:
+ raise Exception(f"Unknown attribute-set '{msg_format.attr_set}'")
else:
raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}')
@@ -503,17 +575,13 @@ class YnlFamily(SpecFamily):
def _decode_binary(self, attr, attr_spec):
if attr_spec.struct_name:
- members = self.consts[attr_spec.struct_name]
- decoded = attr.as_struct(members)
- for m in members:
- if m.enum:
- decoded[m.name] = self._decode_enum(decoded[m.name], m)
+ decoded = self._decode_struct(attr.raw, attr_spec.struct_name)
elif attr_spec.sub_type:
decoded = attr.as_c_array(attr_spec.sub_type)
else:
decoded = attr.as_bin()
if attr_spec.display_hint:
- decoded = NlAttr.formatted_string(decoded, attr_spec.display_hint)
+ decoded = self._formatted_string(decoded, attr_spec.display_hint)
return decoded
def _decode_array_nest(self, attr, attr_spec):
@@ -527,6 +595,16 @@ class YnlFamily(SpecFamily):
decoded.append({ item.type: subattrs })
return decoded
+ def _decode_nest_type_value(self, attr, attr_spec):
+ decoded = {}
+ value = attr
+ for name in attr_spec['type-value']:
+ value = NlAttr(value.raw, 0)
+ decoded[name] = value.type
+ subattrs = self._decode(NlAttrs(value.raw), attr_spec['nested-attributes'])
+ decoded.update(subattrs)
+ return decoded
+
def _decode_unknown(self, attr):
if attr.is_nest:
return self._decode(NlAttrs(attr.raw), None)
@@ -548,29 +626,27 @@ class YnlFamily(SpecFamily):
else:
rsp[name] = [decoded]
- def _resolve_selector(self, attr_spec, vals):
+ def _resolve_selector(self, attr_spec, search_attrs):
sub_msg = attr_spec.sub_message
if sub_msg not in self.sub_msgs:
raise Exception(f"No sub-message spec named {sub_msg} for {attr_spec.name}")
sub_msg_spec = self.sub_msgs[sub_msg]
selector = attr_spec.selector
- if selector not in vals:
- raise Exception(f"There is no value for {selector} to resolve '{attr_spec.name}'")
- value = vals[selector]
+ value = search_attrs.lookup(selector)
if value not in sub_msg_spec.formats:
raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'")
spec = sub_msg_spec.formats[value]
return spec
- def _decode_sub_msg(self, attr, attr_spec, rsp):
- msg_format = self._resolve_selector(attr_spec, rsp)
+ def _decode_sub_msg(self, attr, attr_spec, search_attrs):
+ msg_format = self._resolve_selector(attr_spec, search_attrs)
decoded = {}
offset = 0
if msg_format.fixed_header:
- decoded.update(self._decode_fixed_header(attr, msg_format.fixed_header));
- offset = self._fixed_header_size(msg_format.fixed_header)
+ decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header));
+ offset = self._struct_size(msg_format.fixed_header)
if msg_format.attr_set:
if msg_format.attr_set in self.attr_sets:
subdict = self._decode(NlAttrs(attr.raw, offset), msg_format.attr_set)
@@ -579,10 +655,12 @@ class YnlFamily(SpecFamily):
raise Exception(f"Unknown attribute-set '{attr_space}' when decoding '{attr_spec.name}'")
return decoded
- def _decode(self, attrs, space):
+ def _decode(self, attrs, space, outer_attrs = None):
+ rsp = dict()
if space:
attr_space = self.attr_sets[space]
- rsp = dict()
+ search_attrs = SpaceAttrs(attr_space, rsp, outer_attrs)
+
for attr in attrs:
try:
attr_spec = attr_space.attrs_by_val[attr.type]
@@ -594,7 +672,7 @@ class YnlFamily(SpecFamily):
continue
if attr_spec["type"] == 'nest':
- subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'])
+ subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs)
decoded = subdict
elif attr_spec["type"] == 'string':
decoded = attr.as_strz()
@@ -617,7 +695,9 @@ class YnlFamily(SpecFamily):
selector = self._decode_enum(selector, attr_spec)
decoded = {"value": value, "selector": selector}
elif attr_spec["type"] == 'sub-message':
- decoded = self._decode_sub_msg(attr, attr_spec, rsp)
+ decoded = self._decode_sub_msg(attr, attr_spec, search_attrs)
+ elif attr_spec["type"] == 'nest-type-value':
+ decoded = self._decode_nest_type_value(attr, attr_spec)
else:
if not self.process_unknown:
raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
@@ -658,20 +738,23 @@ class YnlFamily(SpecFamily):
return
msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set))
- offset = 20 + self._fixed_header_size(op.fixed_header)
+ offset = self.nlproto.msghdr_size() + self._struct_size(op.fixed_header)
path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset,
extack['bad-attr-offs'])
if path:
del extack['bad-attr-offs']
extack['bad-attr'] = path
- def _fixed_header_size(self, name):
+ def _struct_size(self, name):
if name:
- fixed_header_members = self.consts[name].members
+ members = self.consts[name].members
size = 0
- for m in fixed_header_members:
+ for m in members:
if m.type in ['pad', 'binary']:
- size += m.len
+ if m.struct:
+ size += self._struct_size(m.struct)
+ else:
+ size += m.len
else:
format = NlAttr.get_format(m.type, m.byte_order)
size += format.size
@@ -679,26 +762,71 @@ class YnlFamily(SpecFamily):
else:
return 0
- def _decode_fixed_header(self, msg, name):
- fixed_header_members = self.consts[name].members
- fixed_header_attrs = dict()
+ def _decode_struct(self, data, name):
+ members = self.consts[name].members
+ attrs = dict()
offset = 0
- for m in fixed_header_members:
+ for m in members:
value = None
if m.type == 'pad':
offset += m.len
elif m.type == 'binary':
- value = msg.raw[offset : offset + m.len]
- offset += m.len
+ if m.struct:
+ len = self._struct_size(m.struct)
+ value = self._decode_struct(data[offset : offset + len],
+ m.struct)
+ offset += len
+ else:
+ value = data[offset : offset + m.len]
+ offset += m.len
else:
format = NlAttr.get_format(m.type, m.byte_order)
- [ value ] = format.unpack_from(msg.raw, offset)
+ [ value ] = format.unpack_from(data, offset)
offset += format.size
if value is not None:
if m.enum:
value = self._decode_enum(value, m)
- fixed_header_attrs[m.name] = value
- return fixed_header_attrs
+ elif m.display_hint:
+ value = self._formatted_string(value, m.display_hint)
+ attrs[m.name] = value
+ return attrs
+
+ def _encode_struct(self, name, vals):
+ members = self.consts[name].members
+ attr_payload = b''
+ for m in members:
+ value = vals.pop(m.name) if m.name in vals else None
+ if m.type == 'pad':
+ attr_payload += bytearray(m.len)
+ elif m.type == 'binary':
+ if m.struct:
+ if value is None:
+ value = dict()
+ attr_payload += self._encode_struct(m.struct, value)
+ else:
+ if value is None:
+ attr_payload += bytearray(m.len)
+ else:
+ attr_payload += bytes.fromhex(value)
+ else:
+ if value is None:
+ value = 0
+ format = NlAttr.get_format(m.type, m.byte_order)
+ attr_payload += format.pack(value)
+ return attr_payload
+
+ def _formatted_string(self, raw, display_hint):
+ if display_hint == 'mac':
+ formatted = ':'.join('%02x' % b for b in raw)
+ elif display_hint == 'hex':
+ formatted = bytes.hex(raw, ' ')
+ elif display_hint in [ 'ipv4', 'ipv6' ]:
+ formatted = format(ipaddress.ip_address(raw))
+ elif display_hint == 'uuid':
+ formatted = str(uuid.UUID(bytes=raw))
+ else:
+ formatted = raw
+ return formatted
def handle_ntf(self, decoded):
msg = dict()
@@ -707,7 +835,7 @@ class YnlFamily(SpecFamily):
op = self.rsp_by_value[decoded.cmd()]
attrs = self._decode(decoded.raw_attrs, op.attr_set.name)
if op.fixed_header:
- attrs.update(self._decode_fixed_header(decoded, op.fixed_header))
+ attrs.update(self._decode_struct(decoded.raw, op.fixed_header))
msg['name'] = op['name']
msg['msg'] = attrs
@@ -716,11 +844,12 @@ class YnlFamily(SpecFamily):
def check_ntf(self):
while True:
try:
- reply = self.sock.recv(128 * 1024, socket.MSG_DONTWAIT)
+ reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT)
except BlockingIOError:
return
nms = NlMsgs(reply)
+ self._recv_dbg_print(reply, nms)
for nl_msg in nms:
if nl_msg.error:
print("Netlink error in ntf!?", os.strerror(-nl_msg.error))
@@ -759,20 +888,11 @@ class YnlFamily(SpecFamily):
req_seq = random.randint(1024, 65535)
msg = self.nlproto.message(nl_flags, op.req_value, 1, req_seq)
- fixed_header_members = []
if op.fixed_header:
- fixed_header_members = self.consts[op.fixed_header].members
- for m in fixed_header_members:
- value = vals.pop(m.name) if m.name in vals else 0
- if m.type == 'pad':
- msg += bytearray(m.len)
- elif m.type == 'binary':
- msg += bytes.fromhex(value)
- else:
- format = NlAttr.get_format(m.type, m.byte_order)
- msg += format.pack(value)
+ msg += self._encode_struct(op.fixed_header, vals)
+ search_attrs = SpaceAttrs(op.attr_set, vals)
for name, value in vals.items():
- msg += self._add_attr(op.attr_set.name, name, value)
+ msg += self._add_attr(op.attr_set.name, name, value, search_attrs)
msg = _genl_msg_finalize(msg)
self.sock.send(msg, 0)
@@ -780,8 +900,9 @@ class YnlFamily(SpecFamily):
done = False
rsp = []
while not done:
- reply = self.sock.recv(128 * 1024)
+ reply = self.sock.recv(self._recv_size)
nms = NlMsgs(reply, attr_space=op.attr_set)
+ self._recv_dbg_print(reply, nms)
for nl_msg in nms:
if nl_msg.extack:
self._decode_extack(msg, op, nl_msg.extack)
@@ -808,7 +929,7 @@ class YnlFamily(SpecFamily):
rsp_msg = self._decode(decoded.raw_attrs, op.attr_set.name)
if op.fixed_header:
- rsp_msg.update(self._decode_fixed_header(decoded, op.fixed_header))
+ rsp_msg.update(self._decode_struct(decoded.raw, op.fixed_header))
rsp.append(rsp_msg)
if not rsp:
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index 49637b26c482..dda6686257a7 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -1,4 +1,5 @@
ethtool
devlink
netdev
+ovs
page-pool \ No newline at end of file
diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
index 28bdb1557a54..e194a7565861 100644
--- a/tools/net/ynl/samples/Makefile
+++ b/tools/net/ynl/samples/Makefile
@@ -9,7 +9,7 @@ ifeq ("$(DEBUG)","1")
CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
endif
-LDLIBS=-lmnl ../lib/ynl.a ../generated/protos.a
+LDLIBS=../lib/ynl.a ../generated/protos.a
SRCS=$(wildcard *.c)
BINS=$(patsubst %.c,%,${SRCS})
@@ -28,8 +28,8 @@ $(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS)
clean:
rm -f *.o *.d *~
-hardclean: clean
+distclean: clean
rm -f $(BINS)
-.PHONY: all clean
+.PHONY: all clean distclean
.DEFAULT_GOAL=all
diff --git a/tools/net/ynl/samples/ovs.c b/tools/net/ynl/samples/ovs.c
new file mode 100644
index 000000000000..3e975c003d77
--- /dev/null
+++ b/tools/net/ynl/samples/ovs.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include "ovs_datapath-user.h"
+
+int main(int argc, char **argv)
+{
+ struct ynl_sock *ys;
+ int err;
+
+ ys = ynl_sock_create(&ynl_ovs_datapath_family, NULL);
+ if (!ys)
+ return 1;
+
+ if (argc > 1) {
+ struct ovs_datapath_new_req *req;
+
+ req = ovs_datapath_new_req_alloc();
+ if (!req)
+ goto err_close;
+
+ ovs_datapath_new_req_set_upcall_pid(req, 1);
+ ovs_datapath_new_req_set_name(req, argv[1]);
+
+ err = ovs_datapath_new(ys, req);
+ ovs_datapath_new_req_free(req);
+ if (err)
+ goto err_close;
+ } else {
+ struct ovs_datapath_get_req_dump *req;
+ struct ovs_datapath_get_list *dps;
+
+ printf("Dump:\n");
+ req = ovs_datapath_get_req_dump_alloc();
+
+ dps = ovs_datapath_get_dump(ys, req);
+ ovs_datapath_get_req_dump_free(req);
+ if (!dps)
+ goto err_close;
+
+ ynl_dump_foreach(dps, dp) {
+ printf(" %s(%d): pid:%u cache:%u\n",
+ dp->name, dp->_hdr.dp_ifindex,
+ dp->upcall_pid, dp->masks_cache_size);
+ }
+ ovs_datapath_get_list_free(dps);
+ }
+
+ ynl_sock_destroy(ys);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL (%d): %s\n", ys->err.code, ys->err.msg);
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c
index 098b5190d0e5..332f281ee5cb 100644
--- a/tools/net/ynl/samples/page-pool.c
+++ b/tools/net/ynl/samples/page-pool.c
@@ -95,6 +95,8 @@ int main(int argc, char **argv)
if (pp->_present.alloc_fast)
s->alloc_fast += pp->alloc_fast;
+ if (pp->_present.alloc_refill)
+ s->alloc_fast += pp->alloc_refill;
if (pp->_present.alloc_slow)
s->alloc_slow += pp->alloc_slow;
if (pp->_present.recycle_ring)
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 7fc1aa788f6f..6b7eb2d2aaf1 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -40,14 +40,6 @@ class BaseNlLib:
def get_family_id(self):
return 'ys->family_id'
- def parse_cb_run(self, cb, data, is_dump=False, indent=1):
- ind = '\n\t\t' + '\t' * indent + ' '
- if is_dump:
- return f"mnl_cb_run2(ys->rx_buf, len, 0, 0, {cb}, {data},{ind}ynl_cb_array, NLMSG_MIN_TYPE)"
- else:
- return f"mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,{ind}{cb}, {data},{ind}" + \
- "ynl_cb_array, NLMSG_MIN_TYPE)"
-
class Type(SpecAttr):
def __init__(self, family, attr_set, attr, value):
@@ -88,6 +80,8 @@ class Type(SpecAttr):
value = self.checks.get(limit, default)
if value is None:
return value
+ elif value in self.family.consts:
+ return c_upper(f"{self.family['name']}-{value}")
if not isinstance(value, int):
value = limit_to_number(value)
return value
@@ -168,15 +162,6 @@ class Type(SpecAttr):
spec = self._attr_policy(policy)
cw.p(f"\t[{self.enum_name}] = {spec},")
- def _mnl_type(self):
- # mnl does not have helpers for signed integer types
- # turn signed type into unsigned
- # this only makes sense for scalar types
- t = self.type
- if t[0] == 's':
- t = 'u' + t[1:]
- return t
-
def _attr_typol(self):
raise Exception(f"Type policy not implemented for class type {self.type}")
@@ -192,7 +177,7 @@ class Type(SpecAttr):
ri.cw.p(f"{line};")
def _attr_put_simple(self, ri, var, put_type):
- line = f"mnl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name})"
+ line = f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name})"
self._attr_put_line(ri, var, line)
def attr_put(self, ri, var):
@@ -217,7 +202,7 @@ class Type(SpecAttr):
if not self.is_multi_val():
ri.cw.p("if (ynl_attr_validate(yarg, attr))")
- ri.cw.p("return MNL_CB_ERROR;")
+ ri.cw.p("return YNL_PARSE_CB_ERROR;")
if self.presence_type() == 'bit':
ri.cw.p(f"{var}->_present.{self.c_name} = 1;")
@@ -264,7 +249,7 @@ class TypeUnused(Type):
return []
def _attr_get(self, ri, var):
- return ['return MNL_CB_ERROR;'], None, None
+ return ['return YNL_PARSE_CB_ERROR;'], None, None
def _attr_typol(self):
return '.type = YNL_PT_REJECT, '
@@ -357,9 +342,6 @@ class TypeScalar(Type):
else:
self.type_name = '__' + self.type
- def mnl_type(self):
- return self._mnl_type()
-
def _attr_policy(self, policy):
if 'flags-mask' in self.checks or self.is_bitfield:
if self.is_bitfield:
@@ -387,10 +369,10 @@ class TypeScalar(Type):
return [f'{self.type_name} {self.c_name}{self.byte_order_comment}']
def attr_put(self, ri, var):
- self._attr_put_simple(ri, var, self.mnl_type())
+ self._attr_put_simple(ri, var, self.type)
def _attr_get(self, ri, var):
- return f"{var}->{self.c_name} = mnl_attr_get_{self.mnl_type()}(attr);", None, None
+ return f"{var}->{self.c_name} = ynl_attr_get_{self.type}(attr);", None, None
def _setter_lines(self, ri, member, presence):
return [f"{member} = {self.c_name};"]
@@ -404,7 +386,7 @@ class TypeFlag(Type):
return '.type = YNL_PT_FLAG, '
def attr_put(self, ri, var):
- self._attr_put_line(ri, var, f"mnl_attr_put(nlh, {self.enum_name}, 0, NULL)")
+ self._attr_put_line(ri, var, f"ynl_attr_put(nlh, {self.enum_name}, NULL, 0)")
def _attr_get(self, ri, var):
return [], None, None
@@ -446,15 +428,15 @@ class TypeString(Type):
cw.p(f"\t[{self.enum_name}] = {spec},")
def attr_put(self, ri, var):
- self._attr_put_simple(ri, var, 'strz')
+ self._attr_put_simple(ri, var, 'str')
def _attr_get(self, ri, var):
len_mem = var + '->_present.' + self.c_name + '_len'
return [f"{len_mem} = len;",
f"{var}->{self.c_name} = malloc(len + 1);",
- f"memcpy({var}->{self.c_name}, mnl_attr_get_str(attr), len);",
+ f"memcpy({var}->{self.c_name}, ynl_attr_get_str(attr), len);",
f"{var}->{self.c_name}[len] = 0;"], \
- ['len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));'], \
+ ['len = strnlen(ynl_attr_get_str(attr), ynl_attr_data_len(attr));'], \
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
@@ -493,15 +475,15 @@ class TypeBinary(Type):
return mem
def attr_put(self, ri, var):
- self._attr_put_line(ri, var, f"mnl_attr_put(nlh, {self.enum_name}, " +
- f"{var}->_present.{self.c_name}_len, {var}->{self.c_name})")
+ self._attr_put_line(ri, var, f"ynl_attr_put(nlh, {self.enum_name}, " +
+ f"{var}->{self.c_name}, {var}->_present.{self.c_name}_len)")
def _attr_get(self, ri, var):
len_mem = var + '->_present.' + self.c_name + '_len'
return [f"{len_mem} = len;",
f"{var}->{self.c_name} = malloc(len);",
- f"memcpy({var}->{self.c_name}, mnl_attr_get_payload(attr), len);"], \
- ['len = mnl_attr_get_payload_len(attr);'], \
+ f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
+ ['len = ynl_attr_data_len(attr);'], \
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
@@ -526,11 +508,11 @@ class TypeBitfield32(Type):
return f"NLA_POLICY_BITFIELD32({mask})"
def attr_put(self, ri, var):
- line = f"mnl_attr_put(nlh, {self.enum_name}, sizeof(struct nla_bitfield32), &{var}->{self.c_name})"
+ line = f"ynl_attr_put(nlh, {self.enum_name}, &{var}->{self.c_name}, sizeof(struct nla_bitfield32))"
self._attr_put_line(ri, var, line)
def _attr_get(self, ri, var):
- return f"memcpy(&{var}->{self.c_name}, mnl_attr_get_payload(attr), sizeof(struct nla_bitfield32));", None, None
+ return f"memcpy(&{var}->{self.c_name}, ynl_attr_data(attr), sizeof(struct nla_bitfield32));", None, None
def _setter_lines(self, ri, member, presence):
return [f"memcpy(&{member}, {self.c_name}, sizeof(struct nla_bitfield32));"]
@@ -563,7 +545,7 @@ class TypeNest(Type):
def _attr_get(self, ri, var):
get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))",
- "return MNL_CB_ERROR;"]
+ "return YNL_PARSE_CB_ERROR;"]
init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
f"parg.data = &{var}->{self.c_name};"]
return get_lines, init_lines, None
@@ -589,9 +571,6 @@ class TypeMultiAttr(Type):
def presence_type(self):
return 'count'
- def mnl_type(self):
- return self._mnl_type()
-
def _complex_member_type(self, ri):
if 'type' not in self.attr or self.attr['type'] == 'nest':
return self.nested_struct_type
@@ -625,9 +604,9 @@ class TypeMultiAttr(Type):
def attr_put(self, ri, var):
if self.attr['type'] in scalars:
- put_type = self.mnl_type()
+ put_type = self.type
ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
- ri.cw.p(f"mnl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
+ ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
elif 'type' not in self.attr or self.attr['type'] == 'nest':
ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
@@ -665,7 +644,7 @@ class TypeArrayNest(Type):
def _attr_get(self, ri, var):
local_vars = ['const struct nlattr *attr2;']
get_lines = [f'attr_{self.c_name} = attr;',
- 'mnl_attr_for_each_nested(attr2, attr)',
+ 'ynl_attr_for_each_nested(attr2, attr)',
f'\t{var}->n_{self.c_name}++;']
return get_lines, None, local_vars
@@ -690,8 +669,8 @@ class TypeNestTypeValue(Type):
local_vars += [f'__u32 {", ".join(tv_names)};']
for level in self.attr["type-value"]:
level = c_lower(level)
- get_lines += [f'attr_{level} = mnl_attr_get_payload({prev});']
- get_lines += [f'{level} = mnl_attr_get_type(attr_{level});']
+ get_lines += [f'attr_{level} = ynl_attr_data({prev});']
+ get_lines += [f'{level} = ynl_attr_type(attr_{level});']
prev = 'attr_' + level
tv_args = f", {', '.join(tv_names)}"
@@ -1550,7 +1529,7 @@ def _put_enum_to_str_helper(cw, render_name, map_name, arg_name, enum=None):
cw.block_start()
if enum and enum.type == 'flags':
cw.p(f'{arg_name} = ffs({arg_name}) - 1;')
- cw.p(f'if ({arg_name} < 0 || {arg_name} >= (int)MNL_ARRAY_SIZE({map_name}))')
+ cw.p(f'if ({arg_name} < 0 || {arg_name} >= (int)YNL_ARRAY_SIZE({map_name}))')
cw.p('return NULL;')
cw.p(f'return {map_name}[{arg_name}];')
cw.block_end()
@@ -1612,12 +1591,12 @@ def put_req_nested(ri, struct):
ri.cw.block_start()
ri.cw.write_func_lvar('struct nlattr *nest;')
- ri.cw.p("nest = mnl_attr_nest_start(nlh, attr_type);")
+ ri.cw.p("nest = ynl_attr_nest_start(nlh, attr_type);")
for _, arg in struct.member_list():
arg.attr_put(ri, "obj")
- ri.cw.p("mnl_attr_nest_end(nlh, nest);")
+ ri.cw.p("ynl_attr_nest_end(nlh, nest);")
ri.cw.nl()
ri.cw.p('return 0;')
@@ -1627,11 +1606,11 @@ def put_req_nested(ri, struct):
def _multi_parse(ri, struct, init_lines, local_vars):
if struct.nested:
- iter_line = "mnl_attr_for_each_nested(attr, nested)"
+ iter_line = "ynl_attr_for_each_nested(attr, nested)"
else:
if ri.fixed_hdr:
local_vars += ['void *hdr;']
- iter_line = "mnl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)"
+ iter_line = "ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)"
array_nests = set()
multi_attrs = set()
@@ -1665,7 +1644,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p(f'dst->{arg} = {arg};')
if ri.fixed_hdr:
- ri.cw.p('hdr = mnl_nlmsg_get_payload_offset(nlh, sizeof(struct genlmsghdr));')
+ ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));")
for anest in sorted(all_multi):
aspec = struct[anest]
@@ -1674,7 +1653,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.nl()
ri.cw.block_start(line=iter_line)
- ri.cw.p('unsigned int type = mnl_attr_get_type(attr);')
+ ri.cw.p('unsigned int type = ynl_attr_type(attr);')
ri.cw.nl()
first = True
@@ -1690,14 +1669,14 @@ def _multi_parse(ri, struct, init_lines, local_vars):
aspec = struct[anest]
ri.cw.block_start(line=f"if (n_{aspec.c_name})")
- ri.cw.p(f"dst->{aspec.c_name} = calloc({aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
+ ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
ri.cw.p('i = 0;')
ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
- ri.cw.block_start(line=f"mnl_attr_for_each_nested(attr, attr_{aspec.c_name})")
+ ri.cw.block_start(line=f"ynl_attr_for_each_nested(attr, attr_{aspec.c_name})")
ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];")
- ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, mnl_attr_get_type(attr)))")
- ri.cw.p('return MNL_CB_ERROR;')
+ ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))")
+ ri.cw.p('return YNL_PARSE_CB_ERROR;')
ri.cw.p('i++;')
ri.cw.block_end()
ri.cw.block_end()
@@ -1712,13 +1691,13 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if 'nested-attributes' in aspec:
ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
ri.cw.block_start(line=iter_line)
- ri.cw.block_start(line=f"if (mnl_attr_get_type(attr) == {aspec.enum_name})")
+ ri.cw.block_start(line=f"if (ynl_attr_type(attr) == {aspec.enum_name})")
if 'nested-attributes' in aspec:
ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];")
ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr))")
- ri.cw.p('return MNL_CB_ERROR;')
+ ri.cw.p('return YNL_PARSE_CB_ERROR;')
elif aspec.type in scalars:
- ri.cw.p(f"dst->{aspec.c_name}[i] = mnl_attr_get_{aspec.mnl_type()}(attr);")
+ ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.type}(attr);")
else:
raise Exception('Nest parsing type not supported yet')
ri.cw.p('i++;')
@@ -1730,7 +1709,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if struct.nested:
ri.cw.p('return 0;')
else:
- ri.cw.p('return MNL_CB_OK;')
+ ri.cw.p('return YNL_PARSE_CB_OK;')
ri.cw.block_end()
ri.cw.nl()
@@ -1760,10 +1739,9 @@ def parse_rsp_msg(ri, deref=False):
return
func_args = ['const struct nlmsghdr *nlh',
- 'void *data']
+ 'struct ynl_parse_arg *yarg']
local_vars = [f'{type_name(ri, "reply", deref=deref)} *dst;',
- 'struct ynl_parse_arg *yarg = data;',
'const struct nlattr *attr;']
init_lines = ['dst = yarg->data;']
@@ -1774,7 +1752,7 @@ def parse_rsp_msg(ri, deref=False):
else:
# Empty reply
ri.cw.block_start()
- ri.cw.p('return MNL_CB_OK;')
+ ri.cw.p('return YNL_PARSE_CB_OK;')
ri.cw.block_end()
ri.cw.nl()
@@ -1809,7 +1787,7 @@ def print_req(ri):
if ri.fixed_hdr:
ri.cw.p("hdr_len = sizeof(req->_hdr);")
- ri.cw.p("hdr = mnl_nlmsg_put_extra_header(nlh, hdr_len);")
+ ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);")
ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
ri.cw.nl()
@@ -1859,20 +1837,21 @@ def print_dump(ri):
ri.cw.write_func_lvar(local_vars)
- ri.cw.p('yds.ys = ys;')
+ ri.cw.p('yds.yarg.ys = ys;')
+ ri.cw.p(f"yds.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
+ ri.cw.p("yds.yarg.data = NULL;")
ri.cw.p(f"yds.alloc_sz = sizeof({type_name(ri, rdir(direction))});")
ri.cw.p(f"yds.cb = {op_prefix(ri, 'reply', deref=True)}_parse;")
if ri.op.value is not None:
ri.cw.p(f'yds.rsp_cmd = {ri.op.enum_name};')
else:
ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};')
- ri.cw.p(f"yds.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
ri.cw.nl()
ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
if ri.fixed_hdr:
ri.cw.p("hdr_len = sizeof(req->_hdr);")
- ri.cw.p("hdr = mnl_nlmsg_put_extra_header(nlh, hdr_len);")
+ ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);")
ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
ri.cw.nl()
@@ -2363,6 +2342,10 @@ def print_kernel_family_struct_hdr(family, cw):
cw.p(f"extern struct genl_family {family.c_name}_nl_family;")
cw.nl()
+ if 'sock-priv' in family.kernel_family:
+ cw.p(f'void {family.c_name}_nl_sock_priv_init({family.kernel_family["sock-priv"]} *priv);')
+ cw.p(f'void {family.c_name}_nl_sock_priv_destroy({family.kernel_family["sock-priv"]} *priv);')
+ cw.nl()
def print_kernel_family_struct_src(family, cw):
@@ -2384,6 +2367,11 @@ def print_kernel_family_struct_src(family, cw):
if family.mcgrps['list']:
cw.p(f'.mcgrps\t\t= {family.c_name}_nl_mcgrps,')
cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),')
+ if 'sock-priv' in family.kernel_family:
+ cw.p(f'.sock_priv_size\t= sizeof({family.kernel_family["sock-priv"]}),')
+ # Force cast here, actual helpers take pointer to the real type.
+ cw.p(f'.sock_priv_init\t= (void *){family.c_name}_nl_sock_priv_init,')
+ cw.p(f'.sock_priv_destroy = (void *){family.c_name}_nl_sock_priv_destroy,')
cw.block_end(';')
@@ -2584,7 +2572,7 @@ def render_user_family(family, cw, prototype):
cw.p('.hdr_len\t= sizeof(struct genlmsghdr),')
if family.ntfs:
cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
- cw.p(f".ntf_info_size\t= MNL_ARRAY_SIZE({family['name']}_ntf_info),")
+ cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family['name']}_ntf_info),")
cw.block_end(line=';')
@@ -2680,6 +2668,7 @@ def main():
cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"')
cw.nl()
headers = ['uapi/' + parsed.uapi_header]
+ headers += parsed.kernel_family.get('headers', [])
else:
cw.p('#include <stdlib.h>')
cw.p('#include <string.h>')
@@ -2700,7 +2689,6 @@ def main():
if args.mode == "user":
if not args.header:
- cw.p("#include <libmnl/libmnl.h>")
cw.p("#include <linux/genetlink.h>")
cw.nl()
for one in args.user_header:
diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py
index 262d88f88696..927407b3efb3 100755
--- a/tools/net/ynl/ynl-gen-rst.py
+++ b/tools/net/ynl/ynl-gen-rst.py
@@ -189,12 +189,19 @@ def parse_operations(operations: List[Dict[str, Any]]) -> str:
def parse_entries(entries: List[Dict[str, Any]], level: int) -> str:
"""Parse a list of entries"""
+ ignored = ["pad"]
lines = []
for entry in entries:
if isinstance(entry, dict):
# entries could be a list or a dictionary
+ field_name = entry.get("name", "")
+ if field_name in ignored:
+ continue
+ type_ = entry.get("type")
+ if type_:
+ field_name += f" ({inline(type_)})"
lines.append(
- rst_fields(entry.get("name", ""), sanitize(entry.get("doc", "")), level)
+ rst_fields(field_name, sanitize(entry.get("doc", "")), level)
)
elif isinstance(entry, list):
lines.append(rst_list_inline(entry, level))
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index e327cd827135..3a1d80a7878d 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -509,11 +509,20 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
if (op2 == 0x01) {
- if (modrm == 0xca)
- insn->type = INSN_CLAC;
- else if (modrm == 0xcb)
- insn->type = INSN_STAC;
-
+ switch (insn_last_prefix_id(&ins)) {
+ case INAT_PFX_REPE:
+ case INAT_PFX_REPNE:
+ if (modrm == 0xca)
+ /* eretu/erets */
+ insn->type = INSN_CONTEXT_SWITCH;
+ break;
+ default:
+ if (modrm == 0xca)
+ insn->type = INSN_CLAC;
+ else if (modrm == 0xcb)
+ insn->type = INSN_STAC;
+ break;
+ }
} else if (op2 >= 0x80 && op2 <= 0x8f) {
insn->type = INSN_JUMP_CONDITIONAL;
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 29e949579ede..4134d27c696b 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -83,7 +83,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
* TODO: Once we have DWARF CFI and smarter instruction decoding logic,
* ensure the same register is used in the mov and jump instructions.
*
- * NOTE: RETPOLINE made it harder still to decode dynamic jumps.
+ * NOTE: MITIGATION_RETPOLINE made it harder still to decode dynamic jumps.
*/
struct reloc *arch_find_switch_table(struct objtool_file *file,
struct instruction *insn)
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 548ec3cd7c00..eb7e12ebc1d0 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3620,6 +3620,18 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
}
if (!save_insn->visited) {
+ /*
+ * If the restore hint insn is at the
+ * beginning of a basic block and was
+ * branched to from elsewhere, and the
+ * save insn hasn't been visited yet,
+ * defer following this branch for now.
+ * It will be seen later via the
+ * straight-line path.
+ */
+ if (!prev_insn)
+ return 0;
+
WARN_INSN(insn, "objtool isn't smart enough to handle this CFI save/restore combo");
return 1;
}
@@ -3980,11 +3992,11 @@ static int validate_retpoline(struct objtool_file *file)
if (insn->type == INSN_RETURN) {
if (opts.rethunk) {
- WARN_INSN(insn, "'naked' return found in RETHUNK build");
+ WARN_INSN(insn, "'naked' return found in MITIGATION_RETHUNK build");
} else
continue;
} else {
- WARN_INSN(insn, "indirect %s found in RETPOLINE build",
+ WARN_INSN(insn, "indirect %s found in MITIGATION_RETPOLINE build",
insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
}
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index 1685d7ea6a9f..3a301696f005 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -6,6 +6,7 @@
*
* Yes, this is unfortunate. A better solution is in the works.
*/
+NORETURN(__fortify_panic)
NORETURN(__kunit_abort)
NORETURN(__module_put_and_kthread_exit)
NORETURN(__reiserfs_panic)
@@ -22,7 +23,6 @@ NORETURN(do_exit)
NORETURN(do_group_exit)
NORETURN(do_task_dead)
NORETURN(ex_handler_msr_mce)
-NORETURN(fortify_panic)
NORETURN(hlt_play_dead)
NORETURN(hv_ghcb_terminate)
NORETURN(kthread_complete_and_exit)
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 1b90575ee3c8..3b12595193c9 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -47,6 +47,10 @@ Print PMU events and metrics limited to the specific PMU name.
--json::
Output in JSON format.
+-o::
+--output=::
+ Output file name. By default output is written to stdout.
+
[[EVENT_MODIFIERS]]
EVENT MODIFIERS
---------------
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 27e7c478880f..f8774a9b1377 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -236,6 +236,16 @@ else
SHELLCHECK := $(shell which shellcheck 2> /dev/null)
endif
+# shellcheck is using in tools/perf/tests/Build with option -a/--check-sourced (
+# introduced in v0.4.7) and -S/--severity (introduced in v0.6.0). So make the
+# minimal shellcheck version as v0.6.0.
+ifneq ($(SHELLCHECK),)
+ ifeq ($(shell expr $(shell $(SHELLCHECK) --version | grep version: | \
+ sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 060), 1)
+ SHELLCHECK :=
+ endif
+endif
+
export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 61c2c96cc070..e27a1b1288c2 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -30,6 +30,8 @@
* functions.
*/
struct print_state {
+ /** @fp: File to write output to. */
+ FILE *fp;
/**
* @pmu_glob: Optionally restrict PMU and metric matching to PMU or
* debugfs subsystem name.
@@ -66,13 +68,15 @@ static void default_print_start(void *ps)
{
struct print_state *print_state = ps;
- if (!print_state->name_only && pager_in_use())
- printf("\nList of pre-defined events (to be used in -e or -M):\n\n");
+ if (!print_state->name_only && pager_in_use()) {
+ fprintf(print_state->fp,
+ "\nList of pre-defined events (to be used in -e or -M):\n\n");
+ }
}
static void default_print_end(void *print_state __maybe_unused) {}
-static void wordwrap(const char *s, int start, int max, int corr)
+static void wordwrap(FILE *fp, const char *s, int start, int max, int corr)
{
int column = start;
int n;
@@ -82,10 +86,10 @@ static void wordwrap(const char *s, int start, int max, int corr)
int wlen = strcspn(s, " \t\n");
if ((column + wlen >= max && column > start) || saw_newline) {
- printf("\n%*s", start, "");
+ fprintf(fp, "\n%*s", start, "");
column = start + corr;
}
- n = printf("%s%.*s", column > start ? " " : "", wlen, s);
+ n = fprintf(fp, "%s%.*s", column > start ? " " : "", wlen, s);
if (n <= 0)
break;
saw_newline = s[wlen] == '\n';
@@ -104,6 +108,7 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
{
struct print_state *print_state = ps;
int pos;
+ FILE *fp = print_state->fp;
if (deprecated && !print_state->deprecated)
return;
@@ -119,30 +124,30 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
if (print_state->name_only) {
if (event_alias && strlen(event_alias))
- printf("%s ", event_alias);
+ fprintf(fp, "%s ", event_alias);
else
- printf("%s ", event_name);
+ fprintf(fp, "%s ", event_name);
return;
}
if (strcmp(print_state->last_topic, topic ?: "")) {
if (topic)
- printf("\n%s:\n", topic);
+ fprintf(fp, "\n%s:\n", topic);
zfree(&print_state->last_topic);
print_state->last_topic = strdup(topic ?: "");
}
if (event_alias && strlen(event_alias))
- pos = printf(" %s OR %s", event_name, event_alias);
+ pos = fprintf(fp, " %s OR %s", event_name, event_alias);
else
- pos = printf(" %s", event_name);
+ pos = fprintf(fp, " %s", event_name);
if (!topic && event_type_desc) {
for (; pos < 53; pos++)
- putchar(' ');
- printf("[%s]\n", event_type_desc);
+ fputc(' ', fp);
+ fprintf(fp, "[%s]\n", event_type_desc);
} else
- putchar('\n');
+ fputc('\n', fp);
if (desc && print_state->desc) {
char *desc_with_unit = NULL;
@@ -155,22 +160,22 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
? "%s. Unit: %s" : "%s Unit: %s",
desc, pmu_name);
}
- printf("%*s", 8, "[");
- wordwrap(desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0);
- printf("]\n");
+ fprintf(fp, "%*s", 8, "[");
+ wordwrap(fp, desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0);
+ fprintf(fp, "]\n");
free(desc_with_unit);
}
long_desc = long_desc ?: desc;
if (long_desc && print_state->long_desc) {
- printf("%*s", 8, "[");
- wordwrap(long_desc, 8, pager_get_columns(), 0);
- printf("]\n");
+ fprintf(fp, "%*s", 8, "[");
+ wordwrap(fp, long_desc, 8, pager_get_columns(), 0);
+ fprintf(fp, "]\n");
}
if (print_state->detailed && encoding_desc) {
- printf("%*s", 8, "");
- wordwrap(encoding_desc, 8, pager_get_columns(), 0);
- putchar('\n');
+ fprintf(fp, "%*s", 8, "");
+ wordwrap(fp, encoding_desc, 8, pager_get_columns(), 0);
+ fputc('\n', fp);
}
}
@@ -184,6 +189,7 @@ static void default_print_metric(void *ps,
const char *unit __maybe_unused)
{
struct print_state *print_state = ps;
+ FILE *fp = print_state->fp;
if (print_state->event_glob &&
(!print_state->metrics || !name || !strglobmatch(name, print_state->event_glob)) &&
@@ -192,27 +198,27 @@ static void default_print_metric(void *ps,
if (!print_state->name_only && !print_state->last_metricgroups) {
if (print_state->metricgroups) {
- printf("\nMetric Groups:\n");
+ fprintf(fp, "\nMetric Groups:\n");
if (!print_state->metrics)
- putchar('\n');
+ fputc('\n', fp);
} else {
- printf("\nMetrics:\n\n");
+ fprintf(fp, "\nMetrics:\n\n");
}
}
if (!print_state->last_metricgroups ||
strcmp(print_state->last_metricgroups, group ?: "")) {
if (group && print_state->metricgroups) {
if (print_state->name_only)
- printf("%s ", group);
+ fprintf(fp, "%s ", group);
else if (print_state->metrics) {
const char *gdesc = describe_metricgroup(group);
if (gdesc)
- printf("\n%s: [%s]\n", group, gdesc);
+ fprintf(fp, "\n%s: [%s]\n", group, gdesc);
else
- printf("\n%s:\n", group);
+ fprintf(fp, "\n%s:\n", group);
} else
- printf("%s\n", group);
+ fprintf(fp, "%s\n", group);
}
zfree(&print_state->last_metricgroups);
print_state->last_metricgroups = strdup(group ?: "");
@@ -223,53 +229,59 @@ static void default_print_metric(void *ps,
if (print_state->name_only) {
if (print_state->metrics &&
!strlist__has_entry(print_state->visited_metrics, name)) {
- printf("%s ", name);
+ fprintf(fp, "%s ", name);
strlist__add(print_state->visited_metrics, name);
}
return;
}
- printf(" %s\n", name);
+ fprintf(fp, " %s\n", name);
if (desc && print_state->desc) {
- printf("%*s", 8, "[");
- wordwrap(desc, 8, pager_get_columns(), 0);
- printf("]\n");
+ fprintf(fp, "%*s", 8, "[");
+ wordwrap(fp, desc, 8, pager_get_columns(), 0);
+ fprintf(fp, "]\n");
}
if (long_desc && print_state->long_desc) {
- printf("%*s", 8, "[");
- wordwrap(long_desc, 8, pager_get_columns(), 0);
- printf("]\n");
+ fprintf(fp, "%*s", 8, "[");
+ wordwrap(fp, long_desc, 8, pager_get_columns(), 0);
+ fprintf(fp, "]\n");
}
if (expr && print_state->detailed) {
- printf("%*s", 8, "[");
- wordwrap(expr, 8, pager_get_columns(), 0);
- printf("]\n");
+ fprintf(fp, "%*s", 8, "[");
+ wordwrap(fp, expr, 8, pager_get_columns(), 0);
+ fprintf(fp, "]\n");
}
if (threshold && print_state->detailed) {
- printf("%*s", 8, "[");
- wordwrap(threshold, 8, pager_get_columns(), 0);
- printf("]\n");
+ fprintf(fp, "%*s", 8, "[");
+ wordwrap(fp, threshold, 8, pager_get_columns(), 0);
+ fprintf(fp, "]\n");
}
}
struct json_print_state {
+ /** @fp: File to write output to. */
+ FILE *fp;
/** Should a separator be printed prior to the next item? */
bool need_sep;
};
-static void json_print_start(void *print_state __maybe_unused)
+static void json_print_start(void *ps)
{
- printf("[\n");
+ struct json_print_state *print_state = ps;
+ FILE *fp = print_state->fp;
+
+ fprintf(fp, "[\n");
}
static void json_print_end(void *ps)
{
struct json_print_state *print_state = ps;
+ FILE *fp = print_state->fp;
- printf("%s]\n", print_state->need_sep ? "\n" : "");
+ fprintf(fp, "%s]\n", print_state->need_sep ? "\n" : "");
}
-static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...)
+static void fix_escape_fprintf(FILE *fp, struct strbuf *buf, const char *fmt, ...)
{
va_list args;
@@ -318,7 +330,7 @@ static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...)
}
}
va_end(args);
- fputs(buf->buf, stdout);
+ fputs(buf->buf, fp);
}
static void json_print_event(void *ps, const char *pmu_name, const char *topic,
@@ -330,60 +342,71 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic,
{
struct json_print_state *print_state = ps;
bool need_sep = false;
+ FILE *fp = print_state->fp;
struct strbuf buf;
strbuf_init(&buf, 0);
- printf("%s{\n", print_state->need_sep ? ",\n" : "");
+ fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : "");
print_state->need_sep = true;
if (pmu_name) {
- fix_escape_printf(&buf, "\t\"Unit\": \"%S\"", pmu_name);
+ fix_escape_fprintf(fp, &buf, "\t\"Unit\": \"%S\"", pmu_name);
need_sep = true;
}
if (topic) {
- fix_escape_printf(&buf, "%s\t\"Topic\": \"%S\"", need_sep ? ",\n" : "", topic);
+ fix_escape_fprintf(fp, &buf, "%s\t\"Topic\": \"%S\"",
+ need_sep ? ",\n" : "",
+ topic);
need_sep = true;
}
if (event_name) {
- fix_escape_printf(&buf, "%s\t\"EventName\": \"%S\"", need_sep ? ",\n" : "",
- event_name);
+ fix_escape_fprintf(fp, &buf, "%s\t\"EventName\": \"%S\"",
+ need_sep ? ",\n" : "",
+ event_name);
need_sep = true;
}
if (event_alias && strlen(event_alias)) {
- fix_escape_printf(&buf, "%s\t\"EventAlias\": \"%S\"", need_sep ? ",\n" : "",
- event_alias);
+ fix_escape_fprintf(fp, &buf, "%s\t\"EventAlias\": \"%S\"",
+ need_sep ? ",\n" : "",
+ event_alias);
need_sep = true;
}
if (scale_unit && strlen(scale_unit)) {
- fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "",
- scale_unit);
+ fix_escape_fprintf(fp, &buf, "%s\t\"ScaleUnit\": \"%S\"",
+ need_sep ? ",\n" : "",
+ scale_unit);
need_sep = true;
}
if (event_type_desc) {
- fix_escape_printf(&buf, "%s\t\"EventType\": \"%S\"", need_sep ? ",\n" : "",
- event_type_desc);
+ fix_escape_fprintf(fp, &buf, "%s\t\"EventType\": \"%S\"",
+ need_sep ? ",\n" : "",
+ event_type_desc);
need_sep = true;
}
if (deprecated) {
- fix_escape_printf(&buf, "%s\t\"Deprecated\": \"%S\"", need_sep ? ",\n" : "",
- deprecated ? "1" : "0");
+ fix_escape_fprintf(fp, &buf, "%s\t\"Deprecated\": \"%S\"",
+ need_sep ? ",\n" : "",
+ deprecated ? "1" : "0");
need_sep = true;
}
if (desc) {
- fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "",
- desc);
+ fix_escape_fprintf(fp, &buf, "%s\t\"BriefDescription\": \"%S\"",
+ need_sep ? ",\n" : "",
+ desc);
need_sep = true;
}
if (long_desc) {
- fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "",
- long_desc);
+ fix_escape_fprintf(fp, &buf, "%s\t\"PublicDescription\": \"%S\"",
+ need_sep ? ",\n" : "",
+ long_desc);
need_sep = true;
}
if (encoding_desc) {
- fix_escape_printf(&buf, "%s\t\"Encoding\": \"%S\"", need_sep ? ",\n" : "",
- encoding_desc);
+ fix_escape_fprintf(fp, &buf, "%s\t\"Encoding\": \"%S\"",
+ need_sep ? ",\n" : "",
+ encoding_desc);
need_sep = true;
}
- printf("%s}", need_sep ? "\n" : "");
+ fprintf(fp, "%s}", need_sep ? "\n" : "");
strbuf_release(&buf);
}
@@ -394,43 +417,53 @@ static void json_print_metric(void *ps __maybe_unused, const char *group,
{
struct json_print_state *print_state = ps;
bool need_sep = false;
+ FILE *fp = print_state->fp;
struct strbuf buf;
strbuf_init(&buf, 0);
- printf("%s{\n", print_state->need_sep ? ",\n" : "");
+ fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : "");
print_state->need_sep = true;
if (group) {
- fix_escape_printf(&buf, "\t\"MetricGroup\": \"%S\"", group);
+ fix_escape_fprintf(fp, &buf, "\t\"MetricGroup\": \"%S\"", group);
need_sep = true;
}
if (name) {
- fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", name);
+ fix_escape_fprintf(fp, &buf, "%s\t\"MetricName\": \"%S\"",
+ need_sep ? ",\n" : "",
+ name);
need_sep = true;
}
if (expr) {
- fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", expr);
+ fix_escape_fprintf(fp, &buf, "%s\t\"MetricExpr\": \"%S\"",
+ need_sep ? ",\n" : "",
+ expr);
need_sep = true;
}
if (threshold) {
- fix_escape_printf(&buf, "%s\t\"MetricThreshold\": \"%S\"", need_sep ? ",\n" : "",
- threshold);
+ fix_escape_fprintf(fp, &buf, "%s\t\"MetricThreshold\": \"%S\"",
+ need_sep ? ",\n" : "",
+ threshold);
need_sep = true;
}
if (unit) {
- fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", unit);
+ fix_escape_fprintf(fp, &buf, "%s\t\"ScaleUnit\": \"%S\"",
+ need_sep ? ",\n" : "",
+ unit);
need_sep = true;
}
if (desc) {
- fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "",
- desc);
+ fix_escape_fprintf(fp, &buf, "%s\t\"BriefDescription\": \"%S\"",
+ need_sep ? ",\n" : "",
+ desc);
need_sep = true;
}
if (long_desc) {
- fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "",
- long_desc);
+ fix_escape_fprintf(fp, &buf, "%s\t\"PublicDescription\": \"%S\"",
+ need_sep ? ",\n" : "",
+ long_desc);
need_sep = true;
}
- printf("%s}", need_sep ? "\n" : "");
+ fprintf(fp, "%s}", need_sep ? "\n" : "");
strbuf_release(&buf);
}
@@ -449,8 +482,12 @@ static bool default_skip_duplicate_pmus(void *ps)
int cmd_list(int argc, const char **argv)
{
int i, ret = 0;
- struct print_state default_ps = {};
- struct print_state json_ps = {};
+ struct print_state default_ps = {
+ .fp = stdout,
+ };
+ struct print_state json_ps = {
+ .fp = stdout,
+ };
void *ps = &default_ps;
struct print_callbacks print_cb = {
.print_start = default_print_start,
@@ -461,6 +498,7 @@ int cmd_list(int argc, const char **argv)
};
const char *cputype = NULL;
const char *unit_name = NULL;
+ const char *output_path = NULL;
bool json = false;
struct option list_options[] = {
OPT_BOOLEAN(0, "raw-dump", &default_ps.name_only, "Dump raw events"),
@@ -471,6 +509,7 @@ int cmd_list(int argc, const char **argv)
"Print longer event descriptions."),
OPT_BOOLEAN(0, "details", &default_ps.detailed,
"Print information on the perf event names and expressions used internally by events."),
+ OPT_STRING('o', "output", &output_path, "file", "output file name"),
OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated,
"Print deprecated events."),
OPT_STRING(0, "cputype", &cputype, "cpu type",
@@ -497,6 +536,11 @@ int cmd_list(int argc, const char **argv)
argc = parse_options(argc, argv, list_options, list_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
+ if (output_path) {
+ default_ps.fp = fopen(output_path, "w");
+ json_ps.fp = default_ps.fp;
+ }
+
setup_pager();
if (!default_ps.name_only)
@@ -618,5 +662,8 @@ out:
free(default_ps.last_topic);
free(default_ps.last_metricgroups);
strlist__delete(default_ps.visited_metrics);
+ if (output_path)
+ fclose(default_ps.fp);
+
return ret;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 91e6828c38cc..86c910125172 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -4080,8 +4080,8 @@ int cmd_record(int argc, const char **argv)
}
if (rec->switch_output.num_files) {
- rec->switch_output.filenames = calloc(sizeof(char *),
- rec->switch_output.num_files);
+ rec->switch_output.filenames = calloc(rec->switch_output.num_files,
+ sizeof(char *));
if (!rec->switch_output.filenames) {
err = -EINVAL;
goto out_opts;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index baf1ab083436..5301d1badd43 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -357,7 +357,7 @@ static void perf_top__print_sym_table(struct perf_top *top)
static void prompt_integer(int *target, const char *msg)
{
- char *buf = malloc(0), *p;
+ char *buf = NULL, *p;
size_t dummy = 0;
int tmp;
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index 35124a4ddcb2..bbfa3883e533 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -114,7 +114,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.",
- "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_alloc_restriction",
"MetricThreshold": "tma_alloc_restriction > 0.1",
@@ -124,7 +124,7 @@
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots",
"MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.1",
@@ -169,7 +169,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend",
- "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_DETECT@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_detect",
"MetricThreshold": "tma_branch_detect > 0.05",
@@ -179,7 +179,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MISPREDICT@ / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.05",
@@ -189,7 +189,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
- "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_RESTEER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteer",
"MetricThreshold": "tma_branch_resteer > 0.05",
@@ -198,7 +198,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).",
- "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.CISC@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_cisc",
"MetricThreshold": "tma_cisc > 0.05",
@@ -217,7 +217,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.",
- "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.DECODE@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_decode",
"MetricThreshold": "tma_decode > 0.05",
@@ -235,7 +235,6 @@
},
{
"BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
@@ -245,7 +244,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.FASTNUKE@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_fast_nuke",
"MetricThreshold": "tma_fast_nuke > 0.05",
@@ -254,7 +253,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
- "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH@ / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1",
@@ -264,7 +263,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
- "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_LATENCY@ / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.15",
@@ -283,7 +282,7 @@
},
{
"BriefDescription": "Counts the number of floating point divide operations per uop.",
- "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@UOPS_RETIRED.FPDIV@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_base_group",
"MetricName": "tma_fpdiv_uops",
"MetricThreshold": "tma_fpdiv_uops > 0.2",
@@ -293,7 +292,7 @@
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ALL@ / tma_info_core_slots",
"MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.2",
@@ -303,7 +302,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.",
- "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ICACHE@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05",
@@ -330,7 +329,7 @@
},
{
"BriefDescription": "Instructions Per Cycle",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / tma_info_core_clks",
"MetricName": "tma_info_core_ipc",
"Unit": "cpu_atom"
},
@@ -342,7 +341,7 @@
},
{
"BriefDescription": "Uops Per Instruction",
- "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
+ "MetricExpr": "cpu_atom@UOPS_RETIRED.ALL@ / INST_RETIRED.ANY",
"MetricName": "tma_info_core_upi",
"Unit": "cpu_atom"
},
@@ -366,13 +365,13 @@
},
{
"BriefDescription": "Ratio of all branches which mispredict",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.ALL_BRANCHES",
"MetricName": "tma_info_inst_mix_branch_mispredict_ratio",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Ratio between Mispredicted branches and unknown branches",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
+ "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BACLEARS.ANY",
"MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio",
"Unit": "cpu_atom"
},
@@ -390,61 +389,61 @@
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES",
"MetricName": "tma_info_inst_mix_ipbranch",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.CALL",
"MetricName": "tma_info_inst_mix_ipcall",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per Far Branch",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)",
"MetricName": "tma_info_inst_mix_ipfarbranch",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per Load",
- "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricName": "tma_info_inst_mix_ipload",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)",
"MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN",
"MetricName": "tma_info_inst_mix_ipmisp_cond_taken",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.INDIRECT",
"MetricName": "tma_info_inst_mix_ipmisp_indirect",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired return Branch Misprediction",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RETURN",
"MetricName": "tma_info_inst_mix_ipmisp_ret",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired Branch Misprediction",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricName": "tma_info_inst_mix_ipmispredict",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per Store",
- "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_STORES",
"MetricName": "tma_info_inst_mix_ipstore",
"Unit": "cpu_atom"
},
@@ -480,19 +479,19 @@
},
{
"BriefDescription": "Cycle cost per DRAM hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+ "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
"MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Cycle cost per L2 hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / MEM_LOAD_UOPS_RETIRED.L2_HIT",
"MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Cycle cost per LLC hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / MEM_LOAD_UOPS_RETIRED.L3_HIT",
"MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit",
"Unit": "cpu_atom"
},
@@ -504,7 +503,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_atom"
},
@@ -524,7 +523,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
- "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ITLB@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05",
@@ -533,7 +532,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.",
- "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ / tma_info_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1",
@@ -542,7 +541,6 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
@@ -552,7 +550,6 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -571,7 +568,7 @@
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS@ / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.05",
@@ -581,7 +578,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
- "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.MEM_SCHEDULER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_mem_scheduler",
"MetricThreshold": "tma_mem_scheduler > 0.1",
@@ -590,7 +587,7 @@
},
{
"BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.",
- "MetricExpr": "min(cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)",
+ "MetricExpr": "min(tma_backend_bound, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)",
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2",
@@ -609,7 +606,7 @@
},
{
"BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)",
- "MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@UOPS_RETIRED.MS@ / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_ms_uops",
"MetricThreshold": "tma_ms_uops > 0.05",
@@ -620,7 +617,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
- "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_non_mem_scheduler",
"MetricThreshold": "tma_non_mem_scheduler > 0.1",
@@ -629,7 +626,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.NUKE@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_nuke",
"MetricThreshold": "tma_nuke > 0.05",
@@ -638,7 +635,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.",
- "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.OTHER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_other_fb",
"MetricThreshold": "tma_other_fb > 0.05",
@@ -647,7 +644,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.",
- "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@LD_HEAD.OTHER_AT_RET@ / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_other_l1",
"MetricThreshold": "tma_other_l1 > 0.05",
@@ -683,7 +680,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.",
- "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.PREDECODE@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_predecode",
"MetricThreshold": "tma_predecode > 0.05",
@@ -692,7 +689,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
- "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REGISTER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_register",
"MetricThreshold": "tma_register > 0.1",
@@ -701,7 +698,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
- "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REORDER_BUFFER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_reorder_buffer",
"MetricThreshold": "tma_reorder_buffer > 0.1",
@@ -722,7 +719,7 @@
{
"BriefDescription": "Counts the number of issue slots that result in retirement slots.",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL@ / tma_info_core_slots",
"MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.75",
@@ -741,7 +738,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
- "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.SERIALIZATION@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_serialization",
"MetricThreshold": "tma_serialization > 0.1",
@@ -768,7 +765,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.",
- "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_stlb_hit",
"MetricThreshold": "tma_stlb_hit > 0.05",
@@ -777,7 +774,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.",
- "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@LD_HEAD.PGWALK_AT_RET@ / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_stlb_miss",
"MetricThreshold": "tma_stlb_miss > 0.05",
@@ -795,8 +792,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@LD_HEAD.ST_ADDR_AT_RET@ / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.05",
@@ -875,7 +871,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
+ "MetricExpr": "cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks + tma_unknown_branches",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -905,7 +901,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(25 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -927,7 +922,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "24 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
@@ -948,7 +942,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@ARITH.DIV_ACTIVE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -958,7 +952,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@ / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
@@ -979,7 +972,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1019,7 +1012,7 @@
},
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
- "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@L1D_PEND_MISS.FB_FULL@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
@@ -1154,7 +1147,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1164,7 +1157,6 @@
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bad_spec_branch_misprediction_cost",
@@ -1173,7 +1165,7 @@
},
{
"BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_NTAKEN",
"MetricGroup": "Bad;BrMispredicts",
"MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken",
"MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200",
@@ -1181,7 +1173,7 @@
},
{
"BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN",
"MetricGroup": "Bad;BrMispredicts",
"MetricName": "tma_info_bad_spec_ipmisp_cond_taken",
"MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200",
@@ -1197,7 +1189,7 @@
},
{
"BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RET",
"MetricGroup": "Bad;BrMispredicts",
"MetricName": "tma_info_bad_spec_ipmisp_ret",
"MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500",
@@ -1205,7 +1197,7 @@
},
{
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BadSpec;BrMispredicts",
"MetricName": "tma_info_bad_spec_ipmispredict",
"MetricThreshold": "tma_info_bad_spec_ipmispredict < 200",
@@ -1213,7 +1205,6 @@
},
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
"MetricGroup": "Cor;SMT",
"MetricName": "tma_info_botlnk_l0_core_bound_likely",
@@ -1222,7 +1213,6 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
"MetricGroup": "DSBmiss;Fed;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_misses",
@@ -1232,7 +1222,6 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -1242,7 +1231,6 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
"MetricName": "tma_info_bottleneck_big_code",
@@ -1261,7 +1249,6 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
"MetricGroup": "Fed;FetchBW;Frontend",
"MetricName": "tma_info_bottleneck_instruction_fetch_bw",
@@ -1270,7 +1257,6 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -1280,7 +1266,6 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_info_bottleneck_memory_data_tlbs",
@@ -1290,7 +1275,6 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
"MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
"MetricName": "tma_info_bottleneck_memory_latency",
@@ -1300,7 +1284,6 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bottleneck_mispredictions",
@@ -1317,14 +1300,14 @@
},
{
"BriefDescription": "Fraction of branches that are non-taken conditionals",
- "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_NTAKEN@ / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
"MetricName": "tma_info_branches_cond_nt",
"Unit": "cpu_core"
},
{
"BriefDescription": "Fraction of branches that are taken conditionals",
- "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_TAKEN@ / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
"MetricName": "tma_info_branches_cond_tk",
"Unit": "cpu_core"
@@ -1352,7 +1335,7 @@
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
"MetricName": "tma_info_core_coreipc",
"Unit": "cpu_core"
@@ -1374,14 +1357,14 @@
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)",
"MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
"MetricName": "tma_info_core_ilp",
"Unit": "cpu_core"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
- "MetricExpr": "IDQ.DSB_UOPS / cpu_core@UOPS_ISSUED.ANY@",
+ "MetricExpr": "cpu_core@IDQ.DSB_UOPS@ / cpu_core@UOPS_ISSUED.ANY@",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_frontend_dsb_coverage",
"MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35",
@@ -1390,28 +1373,28 @@
},
{
"BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
+ "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
"MetricGroup": "DSBmiss",
"MetricName": "tma_info_frontend_dsb_switch_cost",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average number of Uops issued by front-end when it issued something",
- "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@",
+ "MetricExpr": "cpu_core@UOPS_ISSUED.ANY@ / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@",
"MetricGroup": "Fed;FetchBW",
"MetricName": "tma_info_frontend_fetch_upc",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Latency for L1 instruction cache misses",
- "MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@",
+ "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@",
"MetricGroup": "Fed;FetchLat;IcMiss",
"MetricName": "tma_info_frontend_icache_miss_latency",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FRONTEND_RETIRED.ANY_DSB_MISS",
"MetricGroup": "DSBmiss;Fed",
"MetricName": "tma_info_frontend_ipdsb_miss_ret",
"MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50",
@@ -1440,14 +1423,14 @@
},
{
"BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
- "MetricExpr": "LSD.UOPS / cpu_core@UOPS_ISSUED.ANY@",
+ "MetricExpr": "cpu_core@LSD.UOPS@ / cpu_core@UOPS_ISSUED.ANY@",
"MetricGroup": "Fed;LSD",
"MetricName": "tma_info_frontend_lsd_coverage",
"Unit": "cpu_core"
},
{
"BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricExpr": "cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;PGO",
"MetricName": "tma_info_inst_mix_bptkbranch",
"Unit": "cpu_core"
@@ -1462,7 +1445,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_iparith",
"MetricThreshold": "tma_info_inst_mix_iparith < 10",
@@ -1471,7 +1454,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx128",
"MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
@@ -1480,7 +1463,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx256",
"MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
@@ -1489,7 +1472,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_dp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
@@ -1498,7 +1481,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_sp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
@@ -1507,7 +1490,7 @@
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
"MetricName": "tma_info_inst_mix_ipbranch",
"MetricThreshold": "tma_info_inst_mix_ipbranch < 8",
@@ -1515,7 +1498,7 @@
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
"MetricName": "tma_info_inst_mix_ipcall",
"MetricThreshold": "tma_info_inst_mix_ipcall < 200",
@@ -1523,7 +1506,7 @@
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_ipflop",
"MetricThreshold": "tma_info_inst_mix_ipflop < 10",
@@ -1531,7 +1514,7 @@
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
"MetricName": "tma_info_inst_mix_ipload",
"MetricThreshold": "tma_info_inst_mix_ipload < 3",
@@ -1539,7 +1522,7 @@
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
"MetricName": "tma_info_inst_mix_ipstore",
"MetricThreshold": "tma_info_inst_mix_ipstore < 8",
@@ -1547,7 +1530,7 @@
},
{
"BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
"MetricGroup": "Prefetches",
"MetricName": "tma_info_inst_mix_ipswpf",
"MetricThreshold": "tma_info_inst_mix_ipswpf < 100",
@@ -1555,7 +1538,7 @@
},
{
"BriefDescription": "Instruction per taken branch",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
"MetricName": "tma_info_inst_mix_iptb",
"MetricThreshold": "tma_info_inst_mix_iptb < 13",
@@ -1655,14 +1638,14 @@
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY",
+ "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / MEM_LOAD_COMPLETED.L1_MISS_ANY",
"MetricGroup": "Mem;MemoryBound;MemoryLat",
"MetricName": "tma_info_memory_load_miss_real_latency",
"Unit": "cpu_core"
},
{
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / L1D_PEND_MISS.PENDING_CYCLES",
"MetricGroup": "Mem;MemoryBW;MemoryBound",
"MetricName": "tma_info_memory_mlp",
"PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
@@ -1670,28 +1653,28 @@
},
{
"BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD@ / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
"MetricName": "tma_info_memory_oro_data_l2_mlp",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
"MetricName": "tma_info_memory_oro_load_l2_miss_latency",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
"MetricGroup": "Memory_BW;Offcore",
"MetricName": "tma_info_memory_oro_load_l2_mlp",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Latency for L3 cache miss demand Loads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
"MetricName": "tma_info_memory_oro_load_l3_miss_latency",
"Unit": "cpu_core"
@@ -1755,14 +1738,14 @@
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per a microcode Assist invocation",
- "MetricExpr": "INST_RETIRED.ANY / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@",
"MetricGroup": "Pipeline;Ret;Retire",
"MetricName": "tma_info_pipeline_ipassist",
"MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
@@ -1778,7 +1761,7 @@
},
{
"BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
- "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricExpr": "cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "tma_info_pipeline_strings_cycles",
"MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1",
@@ -1793,7 +1776,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
"MetricGroup": "HPC;Summary",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_core"
@@ -1816,7 +1799,7 @@
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u",
"MetricGroup": "Branches;OS",
"MetricName": "tma_info_system_ipfarbranch",
"MetricThreshold": "tma_info_system_ipfarbranch < 1e6",
@@ -1847,6 +1830,7 @@
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD",
"MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "tma_info_system_mem_read_latency",
@@ -1855,6 +1839,7 @@
},
{
"BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL",
"MetricGroup": "Mem;SoC",
"MetricName": "tma_info_system_mem_request_latency",
@@ -1897,7 +1882,7 @@
},
{
"BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / UOPS_ISSUED.ANY",
"MetricGroup": "Cor;Pipeline",
"MetricName": "tma_info_thread_execute_per_issue",
"PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage.",
@@ -1905,7 +1890,7 @@
},
{
"BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_thread_clks",
"MetricGroup": "Ret;Summary",
"MetricName": "tma_info_thread_ipc",
"Unit": "cpu_core"
@@ -1972,7 +1957,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@ICACHE_TAG.STALLS@ / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1992,7 +1977,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
@@ -2003,7 +1987,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -2024,7 +2007,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@DECODE.LCP@ / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -2045,7 +2028,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_core_clks)",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_2_3_10@ / (3 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -2064,7 +2047,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@DTLB_LOAD_MISSES.WALK_ACTIVE@ / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
"MetricName": "tma_load_stlb_miss",
"MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2073,7 +2056,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -2136,6 +2118,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2145,7 +2128,6 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_light_operations * cpu_core@MEM_UOP_RETIRED.ANY@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_memory_operations",
@@ -2155,7 +2137,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.MS / tma_info_thread_slots",
+ "MetricExpr": "cpu_core@UOPS_RETIRED.MS@ / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -2225,7 +2207,6 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2246,7 +2227,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_0@ / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -2256,7 +2237,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_1@ / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -2266,7 +2247,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_6@ / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -2296,7 +2277,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2306,7 +2287,8 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2316,7 +2298,8 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2338,7 +2321,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
- "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@RESOURCE_STALLS.SCOREBOARD@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -2348,7 +2331,7 @@
},
{
"BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.",
- "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@INT_VEC_RETIRED.SHUFFLES@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group",
"MetricName": "tma_shuffles",
"MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
@@ -2357,7 +2340,8 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
"MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -2377,8 +2361,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@MEM_INST_RETIRED.SPLIT_STORES@ / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -2398,7 +2381,7 @@
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@EXE_ACTIVITY.BOUND_ON_STORES@ / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -2408,7 +2391,6 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
@@ -2448,7 +2430,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@DTLB_STORE_MISSES.WALK_ACTIVE@ / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
"MetricName": "tma_store_stlb_miss",
"MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2467,7 +2449,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
- "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
"MetricName": "tma_unknown_branches",
"MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
index c150c14ac6ed..a35edf7d86a9 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
@@ -195,7 +195,6 @@
},
{
"BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
@@ -457,7 +456,6 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
@@ -466,7 +464,6 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -683,7 +680,6 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
index e31a4aac9f20..56e54babcc26 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
@@ -400,7 +400,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(76 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -421,7 +420,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "75.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
@@ -449,7 +447,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
@@ -656,7 +653,6 @@
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bad_spec_branch_misprediction_cost",
@@ -699,7 +695,6 @@
},
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
"MetricGroup": "Cor;SMT",
"MetricName": "tma_info_botlnk_l0_core_bound_likely",
@@ -707,7 +702,6 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
"MetricGroup": "DSBmiss;Fed;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_misses",
@@ -716,7 +710,6 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -725,7 +718,6 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
"MetricName": "tma_info_bottleneck_big_code",
@@ -742,7 +734,6 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
"MetricGroup": "Fed;FetchBW;Frontend",
"MetricName": "tma_info_bottleneck_instruction_fetch_bw",
@@ -750,7 +741,6 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -759,7 +749,6 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_info_bottleneck_memory_data_tlbs",
@@ -768,7 +757,6 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
"MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
"MetricName": "tma_info_bottleneck_memory_latency",
@@ -777,7 +765,6 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bottleneck_mispredictions",
@@ -1301,6 +1288,7 @@
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "tma_info_system_mem_read_latency",
@@ -1455,7 +1443,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
@@ -1465,7 +1452,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1538,7 +1524,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1596,6 +1581,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -1604,7 +1590,6 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_memory_operations",
@@ -1676,7 +1661,6 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -1758,6 +1742,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -1767,6 +1752,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -1822,6 +1808,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -1840,7 +1827,6 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1868,7 +1854,6 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh
index 4c598cfc5afa..e5fa8d6f9eb1 100755
--- a/tools/perf/tests/shell/daemon.sh
+++ b/tools/perf/tests/shell/daemon.sh
@@ -414,16 +414,30 @@ EOF
# start daemon
daemon_start ${config} test
- # send 2 signals
- perf daemon signal --config ${config} --session test
- perf daemon signal --config ${config}
-
- # stop daemon
- daemon_exit ${config}
-
- # count is 2 perf.data for signals and 1 for perf record finished
- count=`ls ${base}/session-test/*perf.data* | wc -l`
- if [ ${count} -ne 3 ]; then
+ # send 2 signals then exit. Do this in a loop watching the number of
+ # files to avoid races. If the loop retries more than 600 times then
+ # give up.
+ local retries=0
+ local signals=0
+ local success=0
+ while [ ${retries} -lt 600 ] && [ ${success} -eq 0 ]; do
+ local files
+ files=`ls ${base}/session-test/*perf.data* 2> /dev/null | wc -l`
+ if [ ${signals} -eq 0 ]; then
+ perf daemon signal --config ${config} --session test
+ signals=1
+ elif [ ${signals} -eq 1 ] && [ $files -ge 1 ]; then
+ perf daemon signal --config ${config}
+ signals=2
+ elif [ ${signals} -eq 2 ] && [ $files -ge 2 ]; then
+ daemon_exit ${config}
+ signals=3
+ elif [ ${signals} -eq 3 ] && [ $files -ge 3 ]; then
+ success=1
+ fi
+ retries=$((${retries} +1))
+ done
+ if [ ${success} -eq 0 ]; then
error=1
echo "FAILED: perf data no generated"
fi
diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh
index 22b004f2b23e..8a868ae64560 100755
--- a/tools/perf/tests/shell/list.sh
+++ b/tools/perf/tests/shell/list.sh
@@ -3,17 +3,32 @@
# SPDX-License-Identifier: GPL-2.0
set -e
-err=0
shelldir=$(dirname "$0")
# shellcheck source=lib/setup_python.sh
. "${shelldir}"/lib/setup_python.sh
+list_output=$(mktemp /tmp/__perf_test.list_output.json.XXXXX)
+
+cleanup() {
+ rm -f "${list_output}"
+
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
test_list_json() {
echo "Json output test"
- perf list -j | $PYTHON -m json.tool
+ perf list -j -o "${list_output}"
+ $PYTHON -m json.tool "${list_output}"
echo "Json output test [Success]"
}
test_list_json
-exit $err
+cleanup
+exit 0
diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh
index 5ae7bd0031a8..fa4d71e2e72a 100755
--- a/tools/perf/tests/shell/script.sh
+++ b/tools/perf/tests/shell/script.sh
@@ -36,8 +36,7 @@ test_db()
echo "DB test"
# Check if python script is supported
- libpython=$(perf version --build-options | grep python | grep -cv OFF)
- if [ "${libpython}" != "1" ] ; then
+ if perf version --build-options | grep python | grep -q OFF ; then
echo "SKIP: python scripting is not supported"
err=2
return
@@ -54,7 +53,14 @@ def sample_table(*args):
def call_path_table(*args):
print(f'call_path_table({args}')
_end_of_file_
- perf record -g -o "${perfdatafile}" true
+ case $(uname -m)
+ in s390x)
+ cmd_flags="--call-graph dwarf -e cpu-clock";;
+ *)
+ cmd_flags="-g";;
+ esac
+
+ perf record $cmd_flags -o "${perfdatafile}" true
perf script -i "${perfdatafile}" -s "${db_test}"
echo "DB test [Success]"
}
diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c
index 5f5320f7c6e2..dc5943a6352d 100644
--- a/tools/perf/trace/beauty/statx.c
+++ b/tools/perf/trace/beauty/statx.c
@@ -67,6 +67,7 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a
P_FLAG(BTIME);
P_FLAG(MNT_ID);
P_FLAG(DIOALIGN);
+ P_FLAG(MNT_ID_UNIQUE);
#undef P_FLAG
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 95f25e9fb994..55a300a0977b 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -103,7 +103,14 @@ struct evlist *evlist__new_default(void)
err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
if (err) {
evlist__delete(evlist);
- evlist = NULL;
+ return NULL;
+ }
+
+ if (evlist->core.nr_entries > 1) {
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__set_sample_id(evsel, /*can_sample_identifier=*/false);
}
return evlist;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 0888b7163b7c..fa359180ebf8 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -491,8 +491,8 @@ static int hist_entry__init(struct hist_entry *he,
}
if (symbol_conf.res_sample) {
- he->res_samples = calloc(sizeof(struct res_sample),
- symbol_conf.res_sample);
+ he->res_samples = calloc(symbol_conf.res_sample,
+ sizeof(struct res_sample));
if (!he->res_samples)
goto err_srcline;
}
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 75e2248416f5..178b00205fe6 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -115,6 +115,10 @@
SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
#endif
+#ifndef SYM_FUNC_ALIAS_MEMFUNC
+#define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS
+#endif
+
// In the kernel sources (include/linux/cfi_types.h), this has a different
// definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang
// definition:
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index ca3e0404f187..966cca5a3e88 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -286,7 +286,7 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids,
*out_metric_events = NULL;
ids_size = hashmap__size(ids);
- metric_events = calloc(sizeof(void *), ids_size + 1);
+ metric_events = calloc(ids_size + 1, sizeof(void *));
if (!metric_events)
return -ENOMEM;
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index b0fc48be623f..9e47712507cc 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -66,7 +66,7 @@ void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unus
put_tracing_file(events_path);
if (events_fd < 0) {
- printf("Error: failed to open tracing events directory\n");
+ pr_err("Error: failed to open tracing events directory\n");
return;
}
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 3712186353fb..2a0289c14959 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1055,11 +1055,11 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
if (thread_nr > n)
thread_nr = n;
- synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
+ synthesize_threads = calloc(thread_nr, sizeof(pthread_t));
if (synthesize_threads == NULL)
goto free_dirent;
- args = calloc(sizeof(*args), thread_nr);
+ args = calloc(thread_nr, sizeof(*args));
if (args == NULL)
goto free_threads;
diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile
index d9d9923af85c..a4b902f9e1c4 100644
--- a/tools/power/cpupower/bench/Makefile
+++ b/tools/power/cpupower/bench/Makefile
@@ -15,7 +15,7 @@ LIBS = -L../ -L$(OUTPUT) -lm -lcpupower
OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o
endif
-CFLAGS += -D_GNU_SOURCE -I../lib -DDEFAULT_CONFIG_FILE=\"$(confdir)/cpufreq-bench.conf\"
+override CFLAGS += -D_GNU_SOURCE -I../lib -DDEFAULT_CONFIG_FILE=\"$(confdir)/cpufreq-bench.conf\"
$(OUTPUT)%.o : %.c
$(ECHO) " CC " $@
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 0b12c36902d8..030b388800f0 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -13,6 +13,7 @@ ldflags-y += --wrap=cxl_hdm_decode_init
ldflags-y += --wrap=cxl_dvsec_rr_decode
ldflags-y += --wrap=devm_cxl_add_rch_dport
ldflags-y += --wrap=cxl_rcd_component_reg_phys
+ldflags-y += --wrap=cxl_endpoint_parse_cdat
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
@@ -65,4 +66,6 @@ cxl_core-y += config_check.o
cxl_core-y += cxl_core_test.o
cxl_core-y += cxl_core_exports.o
+KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS))
+
obj-m += test/
diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild
index 61d5f7bcddf9..6b1927897856 100644
--- a/tools/testing/cxl/test/Kbuild
+++ b/tools/testing/cxl/test/Kbuild
@@ -8,3 +8,5 @@ obj-m += cxl_mock_mem.o
cxl_test-y := cxl.o
cxl_mock-y := mock.o
cxl_mock_mem-y := mem.o
+
+KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS))
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index a3cdbb2be038..908e0d083936 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -15,6 +15,8 @@
static int interleave_arithmetic;
+#define FAKE_QTG_ID 42
+
#define NR_CXL_HOST_BRIDGES 2
#define NR_CXL_SINGLE_HOST 1
#define NR_CXL_RCH 1
@@ -209,7 +211,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
- .qtg_id = 0,
+ .qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
},
.target = { 0 },
@@ -224,7 +226,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
- .qtg_id = 1,
+ .qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
},
.target = { 0, 1, },
@@ -239,7 +241,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
- .qtg_id = 2,
+ .qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
},
.target = { 0 },
@@ -254,7 +256,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
- .qtg_id = 3,
+ .qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
},
.target = { 0, 1, },
@@ -269,7 +271,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
- .qtg_id = 4,
+ .qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
},
.target = { 2 },
@@ -284,7 +286,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
- .qtg_id = 5,
+ .qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M,
},
.target = { 3 },
@@ -301,7 +303,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
- .qtg_id = 0,
+ .qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
},
.target = { 0, },
@@ -317,7 +319,7 @@ static struct {
.granularity = 0,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
- .qtg_id = 1,
+ .qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
},
.target = { 0, 1, },
@@ -333,7 +335,7 @@ static struct {
.granularity = 0,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
- .qtg_id = 0,
+ .qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 16UL,
},
.target = { 0, 1, 0, 1, },
@@ -976,6 +978,48 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
return 0;
}
+/*
+ * Faking the cxl_dpa_perf for the memdev when appropriate.
+ */
+static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range,
+ struct cxl_dpa_perf *dpa_perf)
+{
+ dpa_perf->qos_class = FAKE_QTG_ID;
+ dpa_perf->dpa_range = *range;
+ dpa_perf->coord.read_latency = 500;
+ dpa_perf->coord.write_latency = 500;
+ dpa_perf->coord.read_bandwidth = 1000;
+ dpa_perf->coord.write_bandwidth = 1000;
+}
+
+static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
+{
+ struct cxl_root *cxl_root __free(put_cxl_root) =
+ find_cxl_root(port);
+ struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+ struct range pmem_range = {
+ .start = cxlds->pmem_res.start,
+ .end = cxlds->pmem_res.end,
+ };
+ struct range ram_range = {
+ .start = cxlds->ram_res.start,
+ .end = cxlds->ram_res.end,
+ };
+
+ if (!cxl_root)
+ return;
+
+ if (range_len(&ram_range))
+ dpa_perf_setup(port, &ram_range, &mds->ram_perf);
+
+ if (range_len(&pmem_range))
+ dpa_perf_setup(port, &pmem_range, &mds->pmem_perf);
+
+ cxl_memdev_update_perf(cxlmd);
+}
+
static struct cxl_mock_ops cxl_mock_ops = {
.is_mock_adev = is_mock_adev,
.is_mock_bridge = is_mock_bridge,
@@ -989,6 +1033,7 @@ static struct cxl_mock_ops cxl_mock_ops = {
.devm_cxl_setup_hdm = mock_cxl_setup_hdm,
.devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder,
.devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders,
+ .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
.list = LIST_HEAD_INIT(cxl_mock_ops.list),
};
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 1a61e68e3095..6f737941dc0e 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -285,6 +285,20 @@ resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev,
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, CXL);
+void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+ struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
+
+ if (ops && ops->is_mock_dev(cxlmd->dev.parent))
+ ops->cxl_endpoint_parse_cdat(port);
+ else
+ cxl_endpoint_parse_cdat(port);
+ put_cxl_mock_ops(index);
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL);
+
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(ACPI);
MODULE_IMPORT_NS(CXL);
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index a94223750346..d1b0271d2822 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -25,6 +25,7 @@ struct cxl_mock_ops {
int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port);
int (*devm_cxl_enumerate_decoders)(
struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info);
+ void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
};
void register_cxl_mock_ops(struct cxl_mock_ops *ops);
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index 3bf506d4a63c..a6cf69a665e8 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -23,10 +23,16 @@ CONFIG_USB4=y
CONFIG_NET=y
CONFIG_MCTP=y
+CONFIG_MCTP_FLOWS=y
CONFIG_INET=y
CONFIG_MPTCP=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
+CONFIG_WLAN_VENDOR_INTEL=y
+CONFIG_IWLWIFI=y
+
CONFIG_DAMON=y
CONFIG_DAMON_VADDR=y
CONFIG_DAMON_PADDR=y
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 0b6488efed47..7254c110ff23 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -146,6 +146,7 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations):
"""Runs the Linux UML binary. Must be named 'linux'."""
linux_bin = os.path.join(build_dir, 'linux')
params.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt'])
+ print('Running tests with:\n$', linux_bin, ' '.join(shlex.quote(arg) for arg in params))
return subprocess.Popen([linux_bin] + params,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 8153251ea389..91a3627f301a 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -82,4 +82,6 @@ libnvdimm-$(CONFIG_NVDIMM_KEYS) += $(NVDIMM_SRC)/security.o
libnvdimm-y += libnvdimm_test.o
libnvdimm-y += config_check.o
+KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS))
+
obj-m += test/
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 15b6a111c3be..d117e8a96ded 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -67,6 +67,7 @@ TARGETS += nsfs
TARGETS += perf_events
TARGETS += pidfd
TARGETS += pid_namespace
+TARGETS += power_supply
TARGETS += powerpc
TARGETS += prctl
TARGETS += proc
@@ -78,6 +79,7 @@ TARGETS += riscv
TARGETS += rlimits
TARGETS += rseq
TARGETS += rtc
+TARGETS += rust
TARGETS += seccomp
TARGETS += sgx
TARGETS += sigaltstack
@@ -191,6 +193,8 @@ run_tests: all
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests \
+ SRC_PATH=$(shell readlink -e $$(pwd)) \
+ OBJ_PATH=$(BUILD) \
O=$(abs_objtree); \
done;
@@ -236,12 +240,16 @@ ifdef INSTALL_PATH
install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
+ install -m 744 kselftest/ktap_helpers.sh $(INSTALL_PATH)/kselftest/
install -m 744 run_kselftest.sh $(INSTALL_PATH)/
rm -f $(TEST_LIST)
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install \
+ INSTALL_PATH=$(INSTALL_PATH)/$$TARGET \
+ SRC_PATH=$(shell readlink -e $$(pwd)) \
+ OBJ_PATH=$(INSTALL_PATH) \
O=$(abs_objtree) \
$(if $(FORCE_TARGETS),|| exit); \
ret=$$((ret * $$?)); \
diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c
index a52ecd43dbe3..ca81afa4ee90 100644
--- a/tools/testing/selftests/alsa/test-pcmtest-driver.c
+++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c
@@ -127,11 +127,11 @@ FIXTURE_SETUP(pcmtest) {
int err;
if (geteuid())
- SKIP(exit(-1), "This test needs root to run!");
+ SKIP(return, "This test needs root to run!");
err = read_patterns();
if (err)
- SKIP(exit(-1), "Can't read patterns. Probably, module isn't loaded");
+ SKIP(return, "Can't read patterns. Probably, module isn't loaded");
card_name = malloc(127);
ASSERT_NE(card_name, NULL);
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 5c2cc7e8c5d0..d8ade15e2789 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,6 +1,5 @@
bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-exceptions # JIT does not support calling kfunc bpf_throw: -524
fexit_sleep # The test never returns. The remaining tests cannot start.
kprobe_multi_bench_attach # needs CONFIG_FPROBE
kprobe_multi_test # needs CONFIG_FPROBE
@@ -11,3 +10,5 @@ fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_mu
fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
+verifier_arena # JIT does not support arena
+arena_htab # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 1a63996c0304..f4a2f66a683d 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -3,3 +3,6 @@
exceptions # JIT does not support calling kfunc bpf_throw (exceptions)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
+verifier_iterating_callbacks
+verifier_arena # JIT does not support arena
+arena_htab # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fd15017ed3b1..3b9eb40d6343 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -34,13 +34,26 @@ LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
CFLAGS += -g $(OPT_FLAGS) -rdynamic \
- -Wall -Werror \
+ -Wall -Werror -fno-omit-frame-pointer \
$(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
LDFLAGS += $(SAN_LDFLAGS)
LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread
+# The following tests perform type punning and they may break strict
+# aliasing rules, which are exploited by both GCC and clang by default
+# while optimizing. This can lead to broken programs.
+progs/bind4_prog.c-CFLAGS := -fno-strict-aliasing
+progs/bind6_prog.c-CFLAGS := -fno-strict-aliasing
+progs/dynptr_fail.c-CFLAGS := -fno-strict-aliasing
+progs/linked_list_fail.c-CFLAGS := -fno-strict-aliasing
+progs/map_kptr_fail.c-CFLAGS := -fno-strict-aliasing
+progs/syscall.c-CFLAGS := -fno-strict-aliasing
+progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing
+progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing
+progs/timer_crash.c-CFLAGS := -fno-strict-aliasing
+
ifneq ($(LLVM),)
# Silence some warnings when compiled with clang
CFLAGS += -Wno-unused-command-line-argument
@@ -64,6 +77,15 @@ TEST_INST_SUBDIRS := no_alu32
ifneq ($(BPF_GCC),)
TEST_GEN_PROGS += test_progs-bpf_gcc
TEST_INST_SUBDIRS += bpf_gcc
+
+# The following tests contain C code that, although technically legal,
+# triggers GCC warnings that cannot be disabled: declaration of
+# anonymous struct types in function parameter lists.
+progs/btf_dump_test_case_bitfields.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_namespacing.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_packing.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_padding.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_syntax.c-CFLAGS := -Wno-error
endif
ifneq ($(CLANG_CPUV4),)
@@ -110,7 +132,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
- xdp_features
+ xdp_features bpf_test_no_cfi.ko
TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
@@ -175,8 +197,7 @@ endif
# NOTE: Semicolon at the end is critical to override lib.mk's default static
# rule for binaries.
$(notdir $(TEST_GEN_PROGS) \
- $(TEST_GEN_PROGS_EXTENDED) \
- $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+ $(TEST_GEN_PROGS_EXTENDED)): %: $(OUTPUT)/% ;
# sort removes libbpf duplicates when not cross-building
MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \
@@ -233,6 +254,12 @@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmo
$(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod
$(Q)cp bpf_testmod/bpf_testmod.ko $@
+$(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch])
+ $(call msg,MOD,,$@)
+ $(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation
+ $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_test_no_cfi
+ $(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@
+
DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
ifneq ($(CROSS_COMPILE),)
CROSS_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
@@ -382,11 +409,11 @@ endif
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
- -I$(abspath $(OUTPUT)/../usr/include)
+ -I$(abspath $(OUTPUT)/../usr/include) \
+ -Wno-compare-distinct-pointer-types
# TODO: enable me -Wsign-compare
-CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
- -Wno-compare-distinct-pointer-types
+CLANG_CFLAGS = $(CLANG_SYS_INCLUDES)
$(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline
@@ -504,7 +531,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \
$(wildcard $(BPFDIR)/*.bpf.h) \
| $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
- $(TRUNNER_BPF_CFLAGS))
+ $(TRUNNER_BPF_CFLAGS) \
+ $$($$<-CFLAGS))
$(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
@@ -514,6 +542,7 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$@
$(Q)$$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$(@:.skel.h=.subskel.h)
+ $(Q)rm -f $$(<:.o=.linked1.o) $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
$(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
@@ -522,6 +551,7 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
$(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
+ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.bpf.o))
@@ -532,6 +562,7 @@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
$(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@
$(Q)$$(BPFTOOL) gen subskeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$(@:.skel.h=.subskel.h)
+ $(Q)rm -f $$(@:.skel.h=.linked1.o) $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
endif
# ensure we set up tests.h header generation rule just once
@@ -606,6 +637,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
flow_dissector_load.h \
ip_check_defrag_frags.h
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
+ $(OUTPUT)/bpf_test_no_cfi.ko \
$(OUTPUT)/liburandom_read.so \
$(OUTPUT)/xdp_synproxy \
$(OUTPUT)/sign-file \
@@ -729,11 +761,12 @@ $(OUTPUT)/uprobe_multi: uprobe_multi.c
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
+EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature bpftool \
$(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \
no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \
+ bpf_test_no_cfi.ko \
liburandom_read.so)
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 9af79c7a9b58..9b974e425af3 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -115,7 +115,7 @@ the insn 20 undoes map_value addition. It is currently impossible for the
verifier to understand such speculative pointer arithmetic.
Hence `this patch`__ addresses it on the compiler side. It was committed on llvm 12.
-__ https://reviews.llvm.org/D85570
+__ https://github.com/llvm/llvm-project/commit/ddf1864ace484035e3cde5e83b3a31ac81e059c6
The corresponding C code
@@ -165,7 +165,7 @@ This is due to a llvm BPF backend bug. `The fix`__
has been pushed to llvm 10.x release branch and will be
available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
-__ https://reviews.llvm.org/D78466
+__ https://github.com/llvm/llvm-project/commit/3cb7e7bf959dcd3b8080986c62e10a75c7af43f0
bpf_verif_scale/loop6.bpf.o test failure with Clang 12
======================================================
@@ -204,7 +204,7 @@ r5(w5) is eventually saved on stack at insn #24 for later use.
This cause later verifier failure. The bug has been `fixed`__ in
Clang 13.
-__ https://reviews.llvm.org/D97479
+__ https://github.com/llvm/llvm-project/commit/1959ead525b8830cc8a345f45e1c3ef9902d3229
BPF CO-RE-based tests and Clang version
=======================================
@@ -221,11 +221,11 @@ failures:
- __builtin_btf_type_id() [0_, 1_, 2_];
- __builtin_preserve_type_info(), __builtin_preserve_enum_value() [3_, 4_].
-.. _0: https://reviews.llvm.org/D74572
-.. _1: https://reviews.llvm.org/D74668
-.. _2: https://reviews.llvm.org/D85174
-.. _3: https://reviews.llvm.org/D83878
-.. _4: https://reviews.llvm.org/D83242
+.. _0: https://github.com/llvm/llvm-project/commit/6b01b465388b204d543da3cf49efd6080db094a9
+.. _1: https://github.com/llvm/llvm-project/commit/072cde03aaa13a2c57acf62d79876bf79aa1919f
+.. _2: https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99
+.. _3: https://github.com/llvm/llvm-project/commit/6d218b4adb093ff2e9764febbbc89f429412006c
+.. _4: https://github.com/llvm/llvm-project/commit/6d6750696400e7ce988d66a1a00e1d0cb32815f8
Floating-point tests and Clang version
======================================
@@ -234,7 +234,7 @@ Certain selftests, e.g. core_reloc, require support for the floating-point
types, which was introduced in `Clang 13`__. The older Clang versions will
either crash when compiling these tests, or generate an incorrect BTF.
-__ https://reviews.llvm.org/D83289
+__ https://github.com/llvm/llvm-project/commit/a7137b238a07d9399d3ae96c0b461571bd5aa8b2
Kernel function call test and Clang version
===========================================
@@ -248,7 +248,7 @@ Without it, the error from compiling bpf selftests looks like:
libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
-__ https://reviews.llvm.org/D93563
+__ https://github.com/llvm/llvm-project/commit/886f9ff53155075bd5f1e994f17b85d1e1b7470c
btf_tag test and Clang version
==============================
@@ -264,8 +264,8 @@ Without them, the btf_tag selftest will be skipped and you will observe:
#<test_num> btf_tag:SKIP
-.. _0: https://reviews.llvm.org/D111588
-.. _1: https://reviews.llvm.org/D111199
+.. _0: https://github.com/llvm/llvm-project/commit/a162b67c98066218d0d00aa13b99afb95d9bb5e6
+.. _1: https://github.com/llvm/llvm-project/commit/3466e00716e12e32fdb100e3fcfca5c2b3e8d784
Clang dependencies for static linking tests
===========================================
@@ -274,7 +274,7 @@ linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to
generate valid BTF information for weak variables. Please make sure you use
Clang that contains the fix.
-__ https://reviews.llvm.org/D100362
+__ https://github.com/llvm/llvm-project/commit/968292cb93198442138128d850fd54dc7edc0035
Clang relocation changes
========================
@@ -292,7 +292,7 @@ Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
To fix this issue, user newer libbpf.
.. Links
-.. _clang reloc patch: https://reviews.llvm.org/D102712
+.. _clang reloc patch: https://github.com/llvm/llvm-project/commit/6a2ea84600ba4bd3b2733bd8f08f5115eb32164b
.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst
Clang dependencies for the u32 spill test (xdpwall)
@@ -304,6 +304,6 @@ from running test_progs will look like:
.. code-block:: console
- test_xdpwall:FAIL:Does LLVM have https://reviews.llvm.org/D109073? unexpected error: -4007
+ test_xdpwall:FAIL:Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5? unexpected error: -4007
-__ https://reviews.llvm.org/D109073
+__ https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 73ce11b0547d..b2b4c391eb0a 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -323,14 +323,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
break;
case 'p':
env.producer_cnt = strtol(arg, NULL, 10);
- if (env.producer_cnt <= 0) {
+ if (env.producer_cnt < 0) {
fprintf(stderr, "Invalid producer count: %s\n", arg);
argp_usage(state);
}
break;
case 'c':
env.consumer_cnt = strtol(arg, NULL, 10);
- if (env.consumer_cnt <= 0) {
+ if (env.consumer_cnt < 0) {
fprintf(stderr, "Invalid consumer count: %s\n", arg);
argp_usage(state);
}
@@ -495,14 +495,20 @@ extern const struct bench bench_trig_base;
extern const struct bench bench_trig_tp;
extern const struct bench bench_trig_rawtp;
extern const struct bench bench_trig_kprobe;
+extern const struct bench bench_trig_kretprobe;
+extern const struct bench bench_trig_kprobe_multi;
+extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fexit;
extern const struct bench bench_trig_fentry_sleep;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_trig_uprobe_base;
-extern const struct bench bench_trig_uprobe_with_nop;
-extern const struct bench bench_trig_uretprobe_with_nop;
-extern const struct bench bench_trig_uprobe_without_nop;
-extern const struct bench bench_trig_uretprobe_without_nop;
+extern const struct bench bench_trig_uprobe_nop;
+extern const struct bench bench_trig_uretprobe_nop;
+extern const struct bench bench_trig_uprobe_push;
+extern const struct bench bench_trig_uretprobe_push;
+extern const struct bench bench_trig_uprobe_ret;
+extern const struct bench bench_trig_uretprobe_ret;
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
extern const struct bench bench_pb_libbpf;
@@ -537,14 +543,20 @@ static const struct bench *benchs[] = {
&bench_trig_tp,
&bench_trig_rawtp,
&bench_trig_kprobe,
+ &bench_trig_kretprobe,
+ &bench_trig_kprobe_multi,
+ &bench_trig_kretprobe_multi,
&bench_trig_fentry,
+ &bench_trig_fexit,
&bench_trig_fentry_sleep,
&bench_trig_fmodret,
&bench_trig_uprobe_base,
- &bench_trig_uprobe_with_nop,
- &bench_trig_uretprobe_with_nop,
- &bench_trig_uprobe_without_nop,
- &bench_trig_uretprobe_without_nop,
+ &bench_trig_uprobe_nop,
+ &bench_trig_uretprobe_nop,
+ &bench_trig_uprobe_push,
+ &bench_trig_uretprobe_push,
+ &bench_trig_uprobe_ret,
+ &bench_trig_uretprobe_ret,
&bench_rb_libbpf,
&bench_rb_custom,
&bench_pb_libbpf,
@@ -607,6 +619,10 @@ static void setup_benchmark(void)
bench->setup();
for (i = 0; i < env.consumer_cnt; i++) {
+ if (!bench->consumer_thread) {
+ fprintf(stderr, "benchmark doesn't support consumers!\n");
+ exit(1);
+ }
err = pthread_create(&state.consumers[i], NULL,
bench->consumer_thread, (void *)(long)i);
if (err) {
@@ -626,6 +642,10 @@ static void setup_benchmark(void)
env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
for (i = 0; i < env.producer_cnt; i++) {
+ if (!bench->producer_thread) {
+ fprintf(stderr, "benchmark doesn't support producers!\n");
+ exit(1);
+ }
err = pthread_create(&state.producers[i], NULL,
bench->producer_thread, (void *)(long)i);
if (err) {
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index dbd362771d6a..ace0d1011a8e 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -85,12 +85,36 @@ static void trigger_kprobe_setup(void)
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
}
+static void trigger_kretprobe_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
+}
+
+static void trigger_kprobe_multi_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
+}
+
+static void trigger_kretprobe_multi_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
+}
+
static void trigger_fentry_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
+static void trigger_fexit_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fexit);
+}
+
static void trigger_fentry_sleep_setup(void)
{
setup_ctx();
@@ -113,12 +137,25 @@ static void trigger_fmodret_setup(void)
* GCC doesn't generate stack setup preample for these functions due to them
* having no input arguments and doing nothing in the body.
*/
-__weak void uprobe_target_with_nop(void)
+__weak void uprobe_target_nop(void)
{
asm volatile ("nop");
}
-__weak void uprobe_target_without_nop(void)
+__weak void opaque_noop_func(void)
+{
+}
+
+__weak int uprobe_target_push(void)
+{
+ /* overhead of function call is negligible compared to uprobe
+ * triggering, so this shouldn't affect benchmark results much
+ */
+ opaque_noop_func();
+ return 1;
+}
+
+__weak void uprobe_target_ret(void)
{
asm volatile ("");
}
@@ -126,27 +163,34 @@ __weak void uprobe_target_without_nop(void)
static void *uprobe_base_producer(void *input)
{
while (true) {
- uprobe_target_with_nop();
+ uprobe_target_nop();
atomic_inc(&base_hits.value);
}
return NULL;
}
-static void *uprobe_producer_with_nop(void *input)
+static void *uprobe_producer_nop(void *input)
+{
+ while (true)
+ uprobe_target_nop();
+ return NULL;
+}
+
+static void *uprobe_producer_push(void *input)
{
while (true)
- uprobe_target_with_nop();
+ uprobe_target_push();
return NULL;
}
-static void *uprobe_producer_without_nop(void *input)
+static void *uprobe_producer_ret(void *input)
{
while (true)
- uprobe_target_without_nop();
+ uprobe_target_ret();
return NULL;
}
-static void usetup(bool use_retprobe, bool use_nop)
+static void usetup(bool use_retprobe, void *target_addr)
{
size_t uprobe_offset;
struct bpf_link *link;
@@ -159,11 +203,7 @@ static void usetup(bool use_retprobe, bool use_nop)
exit(1);
}
- if (use_nop)
- uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop);
- else
- uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop);
-
+ uprobe_offset = get_uprobe_offset(target_addr);
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
use_retprobe,
-1 /* all PIDs */,
@@ -176,24 +216,34 @@ static void usetup(bool use_retprobe, bool use_nop)
ctx.skel->links.bench_trigger_uprobe = link;
}
-static void uprobe_setup_with_nop(void)
+static void uprobe_setup_nop(void)
{
- usetup(false, true);
+ usetup(false, &uprobe_target_nop);
}
-static void uretprobe_setup_with_nop(void)
+static void uretprobe_setup_nop(void)
{
- usetup(true, true);
+ usetup(true, &uprobe_target_nop);
}
-static void uprobe_setup_without_nop(void)
+static void uprobe_setup_push(void)
{
- usetup(false, false);
+ usetup(false, &uprobe_target_push);
}
-static void uretprobe_setup_without_nop(void)
+static void uretprobe_setup_push(void)
{
- usetup(true, false);
+ usetup(true, &uprobe_target_push);
+}
+
+static void uprobe_setup_ret(void)
+{
+ usetup(false, &uprobe_target_ret);
+}
+
+static void uretprobe_setup_ret(void)
+{
+ usetup(true, &uprobe_target_ret);
}
const struct bench bench_trig_base = {
@@ -235,6 +285,36 @@ const struct bench bench_trig_kprobe = {
.report_final = hits_drops_report_final,
};
+const struct bench bench_trig_kretprobe = {
+ .name = "trig-kretprobe",
+ .validate = trigger_validate,
+ .setup = trigger_kretprobe_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kprobe_multi = {
+ .name = "trig-kprobe-multi",
+ .validate = trigger_validate,
+ .setup = trigger_kprobe_multi_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kretprobe_multi = {
+ .name = "trig-kretprobe-multi",
+ .validate = trigger_validate,
+ .setup = trigger_kretprobe_multi_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
const struct bench bench_trig_fentry = {
.name = "trig-fentry",
.validate = trigger_validate,
@@ -245,6 +325,16 @@ const struct bench bench_trig_fentry = {
.report_final = hits_drops_report_final,
};
+const struct bench bench_trig_fexit = {
+ .name = "trig-fexit",
+ .validate = trigger_validate,
+ .setup = trigger_fexit_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
const struct bench bench_trig_fentry_sleep = {
.name = "trig-fentry-sleep",
.validate = trigger_validate,
@@ -274,37 +364,55 @@ const struct bench bench_trig_uprobe_base = {
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uprobe_with_nop = {
- .name = "trig-uprobe-with-nop",
- .setup = uprobe_setup_with_nop,
- .producer_thread = uprobe_producer_with_nop,
+const struct bench bench_trig_uprobe_nop = {
+ .name = "trig-uprobe-nop",
+ .setup = uprobe_setup_nop,
+ .producer_thread = uprobe_producer_nop,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uretprobe_nop = {
+ .name = "trig-uretprobe-nop",
+ .setup = uretprobe_setup_nop,
+ .producer_thread = uprobe_producer_nop,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uprobe_push = {
+ .name = "trig-uprobe-push",
+ .setup = uprobe_setup_push,
+ .producer_thread = uprobe_producer_push,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uretprobe_with_nop = {
- .name = "trig-uretprobe-with-nop",
- .setup = uretprobe_setup_with_nop,
- .producer_thread = uprobe_producer_with_nop,
+const struct bench bench_trig_uretprobe_push = {
+ .name = "trig-uretprobe-push",
+ .setup = uretprobe_setup_push,
+ .producer_thread = uprobe_producer_push,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uprobe_without_nop = {
- .name = "trig-uprobe-without-nop",
- .setup = uprobe_setup_without_nop,
- .producer_thread = uprobe_producer_without_nop,
+const struct bench bench_trig_uprobe_ret = {
+ .name = "trig-uprobe-ret",
+ .setup = uprobe_setup_ret,
+ .producer_thread = uprobe_producer_ret,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uretprobe_without_nop = {
- .name = "trig-uretprobe-without-nop",
- .setup = uretprobe_setup_without_nop,
- .producer_thread = uprobe_producer_without_nop,
+const struct bench bench_trig_uretprobe_ret = {
+ .name = "trig-uretprobe-ret",
+ .setup = uretprobe_setup_ret,
+ .producer_thread = uprobe_producer_ret,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
new file mode 100755
index 000000000000..9bdcc74e03a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base {uprobe,uretprobe}-{nop,push,ret}
+do
+ summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-15s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/bpf_arena_alloc.h b/tools/testing/selftests/bpf/bpf_arena_alloc.h
new file mode 100644
index 000000000000..c27678299e0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_alloc.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+#ifndef __round_mask
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#endif
+#ifndef round_up
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#endif
+
+#ifdef __BPF__
+#define NR_CPUS (sizeof(struct cpumask) * 8)
+
+static void __arena * __arena page_frag_cur_page[NR_CPUS];
+static int __arena page_frag_cur_offset[NR_CPUS];
+
+/* Simple page_frag allocator */
+static inline void __arena* bpf_alloc(unsigned int size)
+{
+ __u64 __arena *obj_cnt;
+ __u32 cpu = bpf_get_smp_processor_id();
+ void __arena *page = page_frag_cur_page[cpu];
+ int __arena *cur_offset = &page_frag_cur_offset[cpu];
+ int offset;
+
+ size = round_up(size, 8);
+ if (size >= PAGE_SIZE - 8)
+ return NULL;
+ if (!page) {
+refill:
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page)
+ return NULL;
+ cast_kern(page);
+ page_frag_cur_page[cpu] = page;
+ *cur_offset = PAGE_SIZE - 8;
+ obj_cnt = page + PAGE_SIZE - 8;
+ *obj_cnt = 0;
+ } else {
+ cast_kern(page);
+ obj_cnt = page + PAGE_SIZE - 8;
+ }
+
+ offset = *cur_offset - size;
+ if (offset < 0)
+ goto refill;
+
+ (*obj_cnt)++;
+ *cur_offset = offset;
+ return page + offset;
+}
+
+static inline void bpf_free(void __arena *addr)
+{
+ __u64 __arena *obj_cnt;
+
+ addr = (void __arena *)(((long)addr) & ~(PAGE_SIZE - 1));
+ obj_cnt = addr + PAGE_SIZE - 8;
+ if (--(*obj_cnt) == 0)
+ bpf_arena_free_pages(&arena, addr, 1);
+}
+#else
+static inline void __arena* bpf_alloc(unsigned int size) { return NULL; }
+static inline void bpf_free(void __arena *addr) {}
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
new file mode 100644
index 000000000000..bcf195c64a45
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef WRITE_ONCE
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
+#endif
+
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE (-1)
+#endif
+
+#ifndef arena_container_of
+#define arena_container_of(ptr, type, member) \
+ ({ \
+ void __arena *__mptr = (void __arena *)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); \
+ })
+#endif
+
+#ifdef __BPF__ /* when compiled as bpf program */
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+/*
+ * for older kernels try sizeof(struct genradix_node)
+ * or flexible:
+ * static inline long __bpf_page_size(void) {
+ * return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
+ * }
+ * but generated code is not great.
+ */
+#endif
+
+#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#define __arena __attribute__((address_space(1)))
+#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
+#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
+#else
+#define __arena
+#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
+#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
+#endif
+
+void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
+ int node_id, __u64 flags) __ksym __weak;
+void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
+
+#else /* when compiled as user space code */
+
+#define __arena
+#define __arg_arena
+#define cast_kern(ptr) /* nop for user space */
+#define cast_user(ptr) /* nop for user space */
+__weak char arena[1];
+
+#ifndef offsetof
+#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
+#endif
+
+static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
+ int node_id, __u64 flags)
+{
+ return NULL;
+}
+static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
+{
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_arena_htab.h b/tools/testing/selftests/bpf/bpf_arena_htab.h
new file mode 100644
index 000000000000..acc01a876668
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_htab.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include <errno.h>
+#include "bpf_arena_alloc.h"
+#include "bpf_arena_list.h"
+
+struct htab_bucket {
+ struct arena_list_head head;
+};
+typedef struct htab_bucket __arena htab_bucket_t;
+
+struct htab {
+ htab_bucket_t *buckets;
+ int n_buckets;
+};
+typedef struct htab __arena htab_t;
+
+static inline htab_bucket_t *__select_bucket(htab_t *htab, __u32 hash)
+{
+ htab_bucket_t *b = htab->buckets;
+
+ cast_kern(b);
+ return &b[hash & (htab->n_buckets - 1)];
+}
+
+static inline arena_list_head_t *select_bucket(htab_t *htab, __u32 hash)
+{
+ return &__select_bucket(htab, hash)->head;
+}
+
+struct hashtab_elem {
+ int hash;
+ int key;
+ int value;
+ struct arena_list_node hash_node;
+};
+typedef struct hashtab_elem __arena hashtab_elem_t;
+
+static hashtab_elem_t *lookup_elem_raw(arena_list_head_t *head, __u32 hash, int key)
+{
+ hashtab_elem_t *l;
+
+ list_for_each_entry(l, head, hash_node)
+ if (l->hash == hash && l->key == key)
+ return l;
+
+ return NULL;
+}
+
+static int htab_hash(int key)
+{
+ return key;
+}
+
+__weak int htab_lookup_elem(htab_t *htab __arg_arena, int key)
+{
+ hashtab_elem_t *l_old;
+ arena_list_head_t *head;
+
+ cast_kern(htab);
+ head = select_bucket(htab, key);
+ l_old = lookup_elem_raw(head, htab_hash(key), key);
+ if (l_old)
+ return l_old->value;
+ return 0;
+}
+
+__weak int htab_update_elem(htab_t *htab __arg_arena, int key, int value)
+{
+ hashtab_elem_t *l_new = NULL, *l_old;
+ arena_list_head_t *head;
+
+ cast_kern(htab);
+ head = select_bucket(htab, key);
+ l_old = lookup_elem_raw(head, htab_hash(key), key);
+
+ l_new = bpf_alloc(sizeof(*l_new));
+ if (!l_new)
+ return -ENOMEM;
+ l_new->key = key;
+ l_new->hash = htab_hash(key);
+ l_new->value = value;
+
+ list_add_head(&l_new->hash_node, head);
+ if (l_old) {
+ list_del(&l_old->hash_node);
+ bpf_free(l_old);
+ }
+ return 0;
+}
+
+void htab_init(htab_t *htab)
+{
+ void __arena *buckets = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+
+ cast_user(buckets);
+ htab->buckets = buckets;
+ htab->n_buckets = 2 * PAGE_SIZE / sizeof(struct htab_bucket);
+}
diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h
new file mode 100644
index 000000000000..b99b9f408eff
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_list.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+struct arena_list_node;
+
+typedef struct arena_list_node __arena arena_list_node_t;
+
+struct arena_list_node {
+ arena_list_node_t *next;
+ arena_list_node_t * __arena *pprev;
+};
+
+struct arena_list_head {
+ struct arena_list_node __arena *first;
+};
+typedef struct arena_list_head __arena arena_list_head_t;
+
+#define list_entry(ptr, type, member) arena_container_of(ptr, type, member)
+
+#define list_entry_safe(ptr, type, member) \
+ ({ typeof(*ptr) * ___ptr = (ptr); \
+ ___ptr ? ({ cast_kern(___ptr); list_entry(___ptr, type, member); }) : NULL; \
+ })
+
+#ifndef __BPF__
+static inline void *bpf_iter_num_new(struct bpf_iter_num *it, int i, int j) { return NULL; }
+static inline void bpf_iter_num_destroy(struct bpf_iter_num *it) {}
+static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; }
+#define cond_break ({})
+#endif
+
+/* Safely walk link list elements. Deletion of elements is allowed. */
+#define list_for_each_entry(pos, head, member) \
+ for (void * ___tmp = (pos = list_entry_safe((head)->first, \
+ typeof(*(pos)), member), \
+ (void *)0); \
+ pos && ({ ___tmp = (void *)pos->member.next; 1; }); \
+ cond_break, \
+ pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member))
+
+static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
+{
+ arena_list_node_t *first = h->first, * __arena *tmp;
+
+ cast_user(first);
+ cast_kern(n);
+ WRITE_ONCE(n->next, first);
+ cast_kern(first);
+ if (first) {
+ tmp = &n->next;
+ cast_user(tmp);
+ WRITE_ONCE(first->pprev, tmp);
+ }
+ cast_user(n);
+ WRITE_ONCE(h->first, n);
+
+ tmp = &h->first;
+ cast_user(tmp);
+ cast_kern(n);
+ WRITE_ONCE(n->pprev, tmp);
+}
+
+static inline void __list_del(arena_list_node_t *n)
+{
+ arena_list_node_t *next = n->next, *tmp;
+ arena_list_node_t * __arena *pprev = n->pprev;
+
+ cast_user(next);
+ cast_kern(pprev);
+ tmp = *pprev;
+ cast_kern(tmp);
+ WRITE_ONCE(tmp, next);
+ if (next) {
+ cast_user(pprev);
+ cast_kern(next);
+ WRITE_ONCE(next->pprev, pprev);
+ }
+}
+
+#define POISON_POINTER_DELTA 0
+
+#define LIST_POISON1 ((void __arena *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void __arena *) 0x122 + POISON_POINTER_DELTA)
+
+static inline void list_del(arena_list_node_t *n)
+{
+ __list_del(n);
+ n->next = LIST_POISON1;
+ n->pprev = LIST_POISON2;
+}
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index f44875f8b367..a5b9df38c162 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -260,11 +260,11 @@ extern void bpf_throw(u64 cookie) __ksym;
#define __is_signed_type(type) (((type)(-1)) < (type)1)
-#define __bpf_cmp(LHS, OP, SIGN, PRED, RHS, DEFAULT) \
+#define __bpf_cmp(LHS, OP, PRED, RHS, DEFAULT) \
({ \
__label__ l_true; \
bool ret = DEFAULT; \
- asm volatile goto("if %[lhs] " SIGN #OP " %[rhs] goto %l[l_true]" \
+ asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]" \
:: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \
ret = !DEFAULT; \
l_true: \
@@ -276,7 +276,7 @@ l_true: \
* __lhs OP __rhs below will catch the mistake.
* Be aware that we check only __lhs to figure out the sign of compare.
*/
-#define _bpf_cmp(LHS, OP, RHS, NOFLIP) \
+#define _bpf_cmp(LHS, OP, RHS, UNLIKELY) \
({ \
typeof(LHS) __lhs = (LHS); \
typeof(RHS) __rhs = (RHS); \
@@ -285,14 +285,17 @@ l_true: \
(void)(__lhs OP __rhs); \
if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) { \
if (sizeof(__rhs) == 8) \
- ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP); \
+ /* "i" will truncate 64-bit constant into s32, \
+ * so we have to use extra register via "r". \
+ */ \
+ ret = __bpf_cmp(__lhs, #OP, "r", __rhs, UNLIKELY); \
else \
- ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP); \
+ ret = __bpf_cmp(__lhs, #OP, "ri", __rhs, UNLIKELY); \
} else { \
if (sizeof(__rhs) == 8) \
- ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP); \
+ ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, UNLIKELY); \
else \
- ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP); \
+ ret = __bpf_cmp(__lhs, "s"#OP, "ri", __rhs, UNLIKELY); \
} \
ret; \
})
@@ -304,7 +307,7 @@ l_true: \
#ifndef bpf_cmp_likely
#define bpf_cmp_likely(LHS, OP, RHS) \
({ \
- bool ret; \
+ bool ret = 0; \
if (__builtin_strcmp(#OP, "==") == 0) \
ret = _bpf_cmp(LHS, !=, RHS, false); \
else if (__builtin_strcmp(#OP, "!=") == 0) \
@@ -318,16 +321,71 @@ l_true: \
else if (__builtin_strcmp(#OP, ">=") == 0) \
ret = _bpf_cmp(LHS, <, RHS, false); \
else \
- (void) "bug"; \
+ asm volatile("r0 " #OP " invalid compare"); \
ret; \
})
#endif
+#define cond_break \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: break; \
+ l_continue:; \
+ })
+
#ifndef bpf_nop_mov
#define bpf_nop_mov(var) \
asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
#endif
+/* emit instruction:
+ * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
+ */
+#ifndef bpf_addr_space_cast
+#define bpf_addr_space_cast(var, dst_as, src_as)\
+ asm volatile(".byte 0xBF; \
+ .ifc %[reg], r0; \
+ .byte 0x00; \
+ .endif; \
+ .ifc %[reg], r1; \
+ .byte 0x11; \
+ .endif; \
+ .ifc %[reg], r2; \
+ .byte 0x22; \
+ .endif; \
+ .ifc %[reg], r3; \
+ .byte 0x33; \
+ .endif; \
+ .ifc %[reg], r4; \
+ .byte 0x44; \
+ .endif; \
+ .ifc %[reg], r5; \
+ .byte 0x55; \
+ .endif; \
+ .ifc %[reg], r6; \
+ .byte 0x66; \
+ .endif; \
+ .ifc %[reg], r7; \
+ .byte 0x77; \
+ .endif; \
+ .ifc %[reg], r8; \
+ .byte 0x88; \
+ .endif; \
+ .ifc %[reg], r9; \
+ .byte 0x99; \
+ .endif; \
+ .short %[off]; \
+ .long %[as]" \
+ : [reg]"+r"(var) \
+ : [off]"i"(BPF_ADDR_SPACE_CAST) \
+ , [as]"i"((dst_as << 16) | src_as));
+#endif
+
/* Description
* Assert that a conditional expression is true.
* Returns
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index b4e78c1eb37b..14ebe7d9e1a3 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -9,7 +9,7 @@ struct bpf_sock_addr_kern;
* Error code
*/
extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
- struct bpf_dynptr *ptr__uninit) __ksym;
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
/* Description
* Initializes an xdp-type dynptr
@@ -17,7 +17,7 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
* Error code
*/
extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
- struct bpf_dynptr *ptr__uninit) __ksym;
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
/* Description
* Obtain a read-only pointer to the dynptr's data
@@ -26,7 +26,7 @@ extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
* buffer if unable to obtain a direct pointer
*/
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
- void *buffer, __u32 buffer__szk) __ksym;
+ void *buffer, __u32 buffer__szk) __ksym __weak;
/* Description
* Obtain a read-write pointer to the dynptr's data
@@ -35,13 +35,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
* buffer if unable to obtain a direct pointer
*/
extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
- void *buffer, __u32 buffer__szk) __ksym;
+ void *buffer, __u32 buffer__szk) __ksym __weak;
-extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym;
-extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym;
-extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym;
-extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym;
-extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym;
+extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak;
+extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak;
+extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak;
+extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
+extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak;
/* Description
* Modify the address of a AF_UNIX sockaddr.
@@ -51,9 +51,19 @@ extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clo
extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
const __u8 *sun_path, __u32 sun_path__sz) __ksym;
+/* Description
+ * Allocate and configure a reqsk and link it with a listener and skb.
+ * Returns
+ * Error code
+ */
+struct sock;
+struct bpf_tcp_req_attrs;
+extern int bpf_sk_assign_tcp_reqsk(struct __sk_buff *skb, struct sock *sk,
+ struct bpf_tcp_req_attrs *attrs, int attrs__sz) __ksym;
+
void *bpf_cast_to_kern_ctx(void *) __ksym;
-void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym;
+extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
extern int bpf_get_file_xattr(struct file *file, const char *name,
struct bpf_dynptr *value_ptr) __ksym;
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile
new file mode 100644
index 000000000000..ed5143b79edf
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile
@@ -0,0 +1,19 @@
+BPF_TEST_NO_CFI_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= $(abspath $(BPF_TEST_NO_CFI_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = bpf_test_no_cfi.ko
+
+obj-m += bpf_test_no_cfi.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) clean
+
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
new file mode 100644
index 000000000000..b1dd889d5d7d
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+struct bpf_test_no_cfi_ops {
+ void (*fn_1)(void);
+ void (*fn_2)(void);
+};
+
+static int dummy_init(struct btf *btf)
+{
+ return 0;
+}
+
+static int dummy_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ return 0;
+}
+
+static int dummy_reg(void *kdata)
+{
+ return 0;
+}
+
+static void dummy_unreg(void *kdata)
+{
+}
+
+static const struct bpf_verifier_ops dummy_verifier_ops;
+
+static void bpf_test_no_cfi_ops__fn_1(void)
+{
+}
+
+static void bpf_test_no_cfi_ops__fn_2(void)
+{
+}
+
+static struct bpf_test_no_cfi_ops __test_no_cif_ops = {
+ .fn_1 = bpf_test_no_cfi_ops__fn_1,
+ .fn_2 = bpf_test_no_cfi_ops__fn_2,
+};
+
+static struct bpf_struct_ops test_no_cif_ops = {
+ .verifier_ops = &dummy_verifier_ops,
+ .init = dummy_init,
+ .init_member = dummy_init_member,
+ .reg = dummy_reg,
+ .unreg = dummy_unreg,
+ .name = "bpf_test_no_cfi_ops",
+ .owner = THIS_MODULE,
+};
+
+static int bpf_test_no_cfi_init(void)
+{
+ int ret;
+
+ ret = register_bpf_struct_ops(&test_no_cif_ops,
+ bpf_test_no_cfi_ops);
+ if (!ret)
+ return -EINVAL;
+
+ test_no_cif_ops.cfi_stubs = &__test_no_cif_ops;
+ ret = register_bpf_struct_ops(&test_no_cif_ops,
+ bpf_test_no_cfi_ops);
+ return ret;
+}
+
+static void bpf_test_no_cfi_exit(void)
+{
+}
+
+module_init(bpf_test_no_cfi_init);
+module_exit(bpf_test_no_cfi_exit);
+
+MODULE_AUTHOR("Kuifeng Lee");
+MODULE_DESCRIPTION("BPF no cfi_stubs test module");
+MODULE_LICENSE("Dual BSD/GPL");
+
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 91907b321f91..39ad96a18123 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
+#include <linux/delay.h>
#include <linux/error-injection.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -341,12 +343,12 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
.write = bpf_testmod_test_write,
};
-BTF_SET8_START(bpf_testmod_common_kfunc_ids)
+BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_kfunc_common_test)
-BTF_SET8_END(bpf_testmod_common_kfunc_ids)
+BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids)
static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
.owner = THIS_MODULE,
@@ -492,7 +494,7 @@ __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused
return arg;
}
-BTF_SET8_START(bpf_testmod_check_kfunc_ids)
+BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
@@ -518,13 +520,120 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
-BTF_SET8_END(bpf_testmod_check_kfunc_ids)
+BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
+
+static int bpf_testmod_ops_init(struct btf *btf)
+{
+ return 0;
+}
+
+static bool bpf_testmod_ops_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
+}
+
+static int bpf_testmod_ops_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ if (member->offset == offsetof(struct bpf_testmod_ops, data) * 8) {
+ /* For data fields, this function has to copy it and return
+ * 1 to indicate that the data has been handled by the
+ * struct_ops type, or the verifier will reject the map if
+ * the value of the data field is not zero.
+ */
+ ((struct bpf_testmod_ops *)kdata)->data = ((struct bpf_testmod_ops *)udata)->data;
+ return 1;
+ }
+ return 0;
+}
static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
.owner = THIS_MODULE,
.set = &bpf_testmod_check_kfunc_ids,
};
+static const struct bpf_verifier_ops bpf_testmod_verifier_ops = {
+ .is_valid_access = bpf_testmod_ops_is_valid_access,
+};
+
+static int bpf_dummy_reg(void *kdata)
+{
+ struct bpf_testmod_ops *ops = kdata;
+
+ if (ops->test_1)
+ ops->test_1();
+ /* Some test cases (ex. struct_ops_maybe_null) may not have test_2
+ * initialized, so we need to check for NULL.
+ */
+ if (ops->test_2)
+ ops->test_2(4, ops->data);
+
+ return 0;
+}
+
+static void bpf_dummy_unreg(void *kdata)
+{
+}
+
+static int bpf_testmod_test_1(void)
+{
+ return 0;
+}
+
+static void bpf_testmod_test_2(int a, int b)
+{
+}
+
+static int bpf_testmod_ops__test_maybe_null(int dummy,
+ struct task_struct *task__nullable)
+{
+ return 0;
+}
+
+static struct bpf_testmod_ops __bpf_testmod_ops = {
+ .test_1 = bpf_testmod_test_1,
+ .test_2 = bpf_testmod_test_2,
+ .test_maybe_null = bpf_testmod_ops__test_maybe_null,
+};
+
+struct bpf_struct_ops bpf_bpf_testmod_ops = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = bpf_testmod_ops_init,
+ .init_member = bpf_testmod_ops_init_member,
+ .reg = bpf_dummy_reg,
+ .unreg = bpf_dummy_unreg,
+ .cfi_stubs = &__bpf_testmod_ops,
+ .name = "bpf_testmod_ops",
+ .owner = THIS_MODULE,
+};
+
+static int bpf_dummy_reg2(void *kdata)
+{
+ struct bpf_testmod_ops2 *ops = kdata;
+
+ ops->test_1();
+ return 0;
+}
+
+static struct bpf_testmod_ops2 __bpf_testmod_ops2 = {
+ .test_1 = bpf_testmod_test_1,
+};
+
+struct bpf_struct_ops bpf_testmod_ops2 = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = bpf_testmod_ops_init,
+ .init_member = bpf_testmod_ops_init_member,
+ .reg = bpf_dummy_reg2,
+ .unreg = bpf_dummy_unreg,
+ .cfi_stubs = &__bpf_testmod_ops2,
+ .name = "bpf_testmod_ops2",
+ .owner = THIS_MODULE,
+};
+
extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
@@ -535,6 +644,8 @@ static int bpf_testmod_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops);
+ ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
if (ret < 0)
return ret;
if (bpf_fentry_test1(0) < 0)
@@ -544,7 +655,15 @@ static int bpf_testmod_init(void)
static void bpf_testmod_exit(void)
{
- return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+ /* Need to wait for all references to be dropped because
+ * bpf_kfunc_call_test_release() which currently resides in kernel can
+ * be called after bpf_testmod is unloaded. Once release function is
+ * moved into the module this wait can be removed.
+ */
+ while (refcount_read(&prog_test_struct.cnt) > 1)
+ msleep(20);
+
+ sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}
module_init(bpf_testmod_init);
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
index f32793efe095..23fa1872ee67 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
@@ -5,6 +5,8 @@
#include <linux/types.h>
+struct task_struct;
+
struct bpf_testmod_test_read_ctx {
char *buf;
loff_t off;
@@ -28,4 +30,67 @@ struct bpf_iter_testmod_seq {
int cnt;
};
+struct bpf_testmod_ops {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ /* Used to test nullable arguments. */
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+
+ /* The following fields are used to test shadow copies. */
+ char onebyte;
+ struct {
+ int a;
+ int b;
+ } unsupported;
+ int data;
+
+ /* The following pointers are used to test the maps having multiple
+ * pages of trampolines.
+ */
+ int (*tramp_1)(int value);
+ int (*tramp_2)(int value);
+ int (*tramp_3)(int value);
+ int (*tramp_4)(int value);
+ int (*tramp_5)(int value);
+ int (*tramp_6)(int value);
+ int (*tramp_7)(int value);
+ int (*tramp_8)(int value);
+ int (*tramp_9)(int value);
+ int (*tramp_10)(int value);
+ int (*tramp_11)(int value);
+ int (*tramp_12)(int value);
+ int (*tramp_13)(int value);
+ int (*tramp_14)(int value);
+ int (*tramp_15)(int value);
+ int (*tramp_16)(int value);
+ int (*tramp_17)(int value);
+ int (*tramp_18)(int value);
+ int (*tramp_19)(int value);
+ int (*tramp_20)(int value);
+ int (*tramp_21)(int value);
+ int (*tramp_22)(int value);
+ int (*tramp_23)(int value);
+ int (*tramp_24)(int value);
+ int (*tramp_25)(int value);
+ int (*tramp_26)(int value);
+ int (*tramp_27)(int value);
+ int (*tramp_28)(int value);
+ int (*tramp_29)(int value);
+ int (*tramp_30)(int value);
+ int (*tramp_31)(int value);
+ int (*tramp_32)(int value);
+ int (*tramp_33)(int value);
+ int (*tramp_34)(int value);
+ int (*tramp_35)(int value);
+ int (*tramp_36)(int value);
+ int (*tramp_37)(int value);
+ int (*tramp_38)(int value);
+ int (*tramp_39)(int value);
+ int (*tramp_40)(int value);
+};
+
+struct bpf_testmod_ops2 {
+ int (*test_1)(void);
+};
+
#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index c125c441abc7..01f241ea2c67 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -81,6 +81,7 @@ CONFIG_NF_NAT=y
CONFIG_RC_CORE=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
+CONFIG_SYN_COOKIES=y
CONFIG_TEST_BPF=m
CONFIG_USERFAULTFD=y
CONFIG_VSOCKETS=y
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_htab.c b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
new file mode 100644
index 000000000000..0766702de846
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <network_helpers.h>
+
+#include "arena_htab_asm.skel.h"
+#include "arena_htab.skel.h"
+
+#define PAGE_SIZE 4096
+
+#include "bpf_arena_htab.h"
+
+static void test_arena_htab_common(struct htab *htab)
+{
+ int i;
+
+ printf("htab %p buckets %p n_buckets %d\n", htab, htab->buckets, htab->n_buckets);
+ ASSERT_OK_PTR(htab->buckets, "htab->buckets shouldn't be NULL");
+ for (i = 0; htab->buckets && i < 16; i += 4) {
+ /*
+ * Walk htab buckets and link lists since all pointers are correct,
+ * though they were written by bpf program.
+ */
+ int val = htab_lookup_elem(htab, i);
+
+ ASSERT_EQ(i, val, "key == value");
+ }
+}
+
+static void test_arena_htab_llvm(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_htab *skel;
+ struct htab *htab;
+ size_t arena_sz;
+ void *area;
+ int ret;
+
+ skel = arena_htab__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_htab__open_and_load"))
+ return;
+
+ area = bpf_map__initial_value(skel->maps.arena, &arena_sz);
+ /* fault-in a page with pgoff == 0 as sanity check */
+ *(volatile int *)area = 0x55aa;
+
+ /* bpf prog will allocate more pages */
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_llvm), &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ goto out;
+ }
+ htab = skel->bss->htab_for_user;
+ test_arena_htab_common(htab);
+out:
+ arena_htab__destroy(skel);
+}
+
+static void test_arena_htab_asm(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_htab_asm *skel;
+ struct htab *htab;
+ int ret;
+
+ skel = arena_htab_asm__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_htab_asm__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_asm), &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+ htab = skel->bss->htab_for_user;
+ test_arena_htab_common(htab);
+ arena_htab_asm__destroy(skel);
+}
+
+void test_arena_htab(void)
+{
+ if (test__start_subtest("arena_htab_llvm"))
+ test_arena_htab_llvm();
+ if (test__start_subtest("arena_htab_asm"))
+ test_arena_htab_asm();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c
new file mode 100644
index 000000000000..e61886debab1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <network_helpers.h>
+
+#define PAGE_SIZE 4096
+
+#include "bpf_arena_list.h"
+#include "arena_list.skel.h"
+
+struct elem {
+ struct arena_list_node node;
+ __u64 value;
+};
+
+static int list_sum(struct arena_list_head *head)
+{
+ struct elem __arena *n;
+ int sum = 0;
+
+ list_for_each_entry(n, head, node)
+ sum += n->value;
+ return sum;
+}
+
+static void test_arena_list_add_del(int cnt)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_list *skel;
+ int expected_sum = (u64)cnt * (cnt - 1) / 2;
+ int ret, sum;
+
+ skel = arena_list__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_list__open_and_load"))
+ return;
+
+ skel->bss->cnt = cnt;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_add), &opts);
+ ASSERT_OK(ret, "ret_add");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ goto out;
+ }
+ sum = list_sum(skel->bss->list_head);
+ ASSERT_EQ(sum, expected_sum, "sum of elems");
+ ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems");
+ ASSERT_EQ(skel->arena->test_val, cnt + 1, "num of elems");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_del), &opts);
+ ASSERT_OK(ret, "ret_del");
+ sum = list_sum(skel->bss->list_head);
+ ASSERT_EQ(sum, 0, "sum of list elems after del");
+ ASSERT_EQ(skel->bss->list_sum, expected_sum, "sum of list elems computed by prog");
+ ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems");
+out:
+ arena_list__destroy(skel);
+}
+
+void test_arena_list(void)
+{
+ if (test__start_subtest("arena_list_1"))
+ test_arena_list_add_del(1);
+ if (test__start_subtest("arena_list_1000"))
+ test_arena_list_add_del(1000);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c
new file mode 100644
index 000000000000..6a707213e46b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "bad_struct_ops.skel.h"
+#include "bad_struct_ops2.skel.h"
+
+static void invalid_prog_reuse(void)
+{
+ struct bad_struct_ops *skel;
+ char *log = NULL;
+ int err;
+
+ skel = bad_struct_ops__open();
+ if (!ASSERT_OK_PTR(skel, "bad_struct_ops__open"))
+ return;
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+
+ err = bad_struct_ops__load(skel);
+ log = stop_libbpf_log_capture();
+ ASSERT_ERR(err, "bad_struct_ops__load should fail");
+ ASSERT_HAS_SUBSTR(log,
+ "struct_ops init_kern testmod_2 func ptr test_1: invalid reuse of prog test_1",
+ "expected init_kern message");
+
+cleanup:
+ free(log);
+ bad_struct_ops__destroy(skel);
+}
+
+static void unused_program(void)
+{
+ struct bad_struct_ops2 *skel;
+ char *log = NULL;
+ int err;
+
+ skel = bad_struct_ops2__open();
+ if (!ASSERT_OK_PTR(skel, "bad_struct_ops2__open"))
+ return;
+
+ /* struct_ops programs not referenced from any maps are open
+ * with autoload set to true.
+ */
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.foo), "foo autoload == true");
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+
+ err = bad_struct_ops2__load(skel);
+ ASSERT_ERR(err, "bad_struct_ops2__load should fail");
+ log = stop_libbpf_log_capture();
+ ASSERT_HAS_SUBSTR(log, "prog 'foo': failed to load",
+ "message about 'foo' failing to load");
+
+cleanup:
+ free(log);
+ bad_struct_ops2__destroy(skel);
+}
+
+void test_bad_struct_ops(void)
+{
+ if (test__start_subtest("invalid_prog_reuse"))
+ invalid_prog_reuse();
+ if (test__start_subtest("unused_program"))
+ unused_program();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e770912fc1d2..4c6ada5b270b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type)
}
bpf_program__set_type(prog, type);
- bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
+ bpf_program__set_flags(prog, testing_prog_flags());
bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
err = bpf_object__load(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 816145bcb647..00965a6e83bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3535,6 +3535,32 @@ static struct btf_raw_test raw_tests[] = {
.value_type_id = 1,
.max_entries = 1,
},
+{
+ .descr = "datasec: name '?.foo bar:buz' is ok",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC ?.data */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0x\0?.foo bar:buz"),
+},
+{
+ .descr = "type name '?foo' is not ok",
+ .raw_types = {
+ /* union ?foo; */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0?foo"),
+ .err_str = "Invalid name",
+ .btf_load_err = true,
+},
{
.descr = "float test #1, well-formed",
@@ -4363,6 +4389,9 @@ static void do_test_raw(unsigned int test_num)
if (err || btf_fd < 0)
goto done;
+ if (!test->map_type)
+ goto done;
+
opts.btf_fd = btf_fd;
opts.btf_key_type_id = test->key_type_id;
opts.btf_value_type_id = test->value_type_id;
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index c2e886399e3c..ecf89df78109 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -27,7 +27,7 @@ static void verify_success(const char *prog_name)
struct bpf_program *prog;
struct bpf_link *link = NULL;
pid_t child_pid;
- int status;
+ int status, err;
skel = cpumask_success__open();
if (!ASSERT_OK_PTR(skel, "cpumask_success__open"))
@@ -36,8 +36,8 @@ static void verify_success(const char *prog_name)
skel->bss->pid = getpid();
skel->bss->nr_cpus = libbpf_num_possible_cpus();
- cpumask_success__load(skel);
- if (!ASSERT_OK_PTR(skel, "cpumask_success__load"))
+ err = cpumask_success__load(skel);
+ if (!ASSERT_OK(err, "cpumask_success__load"))
goto cleanup;
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
index 4951aa978f33..3b7c57fe55a5 100644
--- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -626,50 +626,6 @@ err:
return false;
}
-/* Request BPF program instructions after all rewrites are applied,
- * e.g. verifier.c:convert_ctx_access() is done.
- */
-static int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt)
-{
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- __u32 xlated_prog_len;
- __u32 buf_element_size = sizeof(struct bpf_insn);
-
- if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
- perror("bpf_prog_get_info_by_fd failed");
- return -1;
- }
-
- xlated_prog_len = info.xlated_prog_len;
- if (xlated_prog_len % buf_element_size) {
- printf("Program length %d is not multiple of %d\n",
- xlated_prog_len, buf_element_size);
- return -1;
- }
-
- *cnt = xlated_prog_len / buf_element_size;
- *buf = calloc(*cnt, buf_element_size);
- if (!buf) {
- perror("can't allocate xlated program buffer");
- return -ENOMEM;
- }
-
- bzero(&info, sizeof(info));
- info.xlated_prog_len = xlated_prog_len;
- info.xlated_prog_insns = (__u64)(unsigned long)*buf;
- if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
- perror("second bpf_prog_get_info_by_fd failed");
- goto out_free_buf;
- }
-
- return 0;
-
-out_free_buf:
- free(*buf);
- return -1;
-}
-
static void print_insn(void *private_data, const char *fmt, ...)
{
va_list args;
diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
index 5c0ebe6ba866..dcb9e5070cc3 100644
--- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
+++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
@@ -72,6 +72,6 @@ fail:
bpf_tc_hook_destroy(&qdisc_hook);
close_netns(nstoken);
}
- SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
+ SYS_NOFAIL("ip netns del " NS_TEST);
decap_sanity__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 4ad4cd69152e..3379df2d4cf2 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -298,6 +298,6 @@ void test_fib_lookup(void)
fail:
if (nstoken)
close_netns(nstoken);
- SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
+ SYS_NOFAIL("ip netns del " NS_TEST);
fib_lookup__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index d4b1901f7879..f3932941bbaa 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -19,6 +19,7 @@ static const char *kmulti_syms[] = {
};
#define KMULTI_CNT ARRAY_SIZE(kmulti_syms)
static __u64 kmulti_addrs[KMULTI_CNT];
+static __u64 kmulti_cookies[] = { 3, 1, 2 };
#define KPROBE_FUNC "bpf_fentry_test1"
static __u64 kprobe_addr;
@@ -31,6 +32,8 @@ static noinline void uprobe_func(void)
asm volatile ("");
}
+#define PERF_EVENT_COOKIE 0xdeadbeef
+
static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr,
ssize_t offset, ssize_t entry_offset)
{
@@ -62,6 +65,8 @@ again:
ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset,
"kprobe_addr");
+ ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie");
+
if (!info.perf_event.kprobe.func_name) {
ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len");
info.perf_event.kprobe.func_name = ptr_to_u64(&buf);
@@ -81,6 +86,8 @@ again:
goto again;
}
+ ASSERT_EQ(info.perf_event.tracepoint.cookie, PERF_EVENT_COOKIE, "tracepoint_cookie");
+
err = strncmp(u64_to_ptr(info.perf_event.tracepoint.tp_name), TP_NAME,
strlen(TP_NAME));
ASSERT_EQ(err, 0, "cmp_tp_name");
@@ -96,10 +103,17 @@ again:
goto again;
}
+ ASSERT_EQ(info.perf_event.uprobe.cookie, PERF_EVENT_COOKIE, "uprobe_cookie");
+
err = strncmp(u64_to_ptr(info.perf_event.uprobe.file_name), UPROBE_FILE,
strlen(UPROBE_FILE));
ASSERT_EQ(err, 0, "cmp_file_name");
break;
+ case BPF_PERF_EVENT_EVENT:
+ ASSERT_EQ(info.perf_event.event.type, PERF_TYPE_SOFTWARE, "event_type");
+ ASSERT_EQ(info.perf_event.event.config, PERF_COUNT_SW_PAGE_FAULTS, "event_config");
+ ASSERT_EQ(info.perf_event.event.cookie, PERF_EVENT_COOKIE, "event_cookie");
+ break;
default:
err = -1;
break;
@@ -139,6 +153,7 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel,
DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
.attach_mode = PROBE_ATTACH_MODE_LINK,
.retprobe = type == BPF_PERF_EVENT_KRETPROBE,
+ .bpf_cookie = PERF_EVENT_COOKIE,
);
ssize_t entry_offset = 0;
struct bpf_link *link;
@@ -163,10 +178,13 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel,
static void test_tp_fill_link_info(struct test_fill_link_info *skel)
{
+ DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
struct bpf_link *link;
int link_fd, err;
- link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME);
+ link = bpf_program__attach_tracepoint_opts(skel->progs.tp_run, TP_CAT, TP_NAME, &opts);
if (!ASSERT_OK_PTR(link, "attach_tp"))
return;
@@ -176,16 +194,53 @@ static void test_tp_fill_link_info(struct test_fill_link_info *skel)
bpf_link__destroy(link);
}
+static void test_event_fill_link_info(struct test_fill_link_info *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
+ struct bpf_link *link;
+ int link_fd, err, pfd;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_PAGE_FAULTS,
+ .freq = 1,
+ .sample_freq = 1,
+ .size = sizeof(struct perf_event_attr),
+ };
+
+ pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */,
+ -1 /* group id */, 0 /* flags */);
+ if (!ASSERT_GE(pfd, 0, "perf_event_open"))
+ return;
+
+ link = bpf_program__attach_perf_event_opts(skel->progs.event_run, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_event"))
+ goto error;
+
+ link_fd = bpf_link__fd(link);
+ err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_EVENT, 0, 0, 0);
+ ASSERT_OK(err, "verify_perf_link_info");
+ bpf_link__destroy(link);
+
+error:
+ close(pfd);
+}
+
static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
enum bpf_perf_event_type type)
{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts,
+ .retprobe = type == BPF_PERF_EVENT_URETPROBE,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
struct bpf_link *link;
int link_fd, err;
- link = bpf_program__attach_uprobe(skel->progs.uprobe_run,
- type == BPF_PERF_EVENT_URETPROBE,
- 0, /* self pid */
- UPROBE_FILE, uprobe_offset);
+ link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run,
+ 0, /* self pid */
+ UPROBE_FILE, uprobe_offset,
+ &opts);
if (!ASSERT_OK_PTR(link, "attach_uprobe"))
return;
@@ -195,11 +250,11 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
bpf_link__destroy(link);
}
-static int verify_kmulti_link_info(int fd, bool retprobe)
+static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies)
{
+ __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT];
struct bpf_link_info info;
__u32 len = sizeof(info);
- __u64 addrs[KMULTI_CNT];
int flags, i, err;
memset(&info, 0, sizeof(info));
@@ -221,18 +276,22 @@ again:
if (!info.kprobe_multi.addrs) {
info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ info.kprobe_multi.cookies = ptr_to_u64(cookies);
goto again;
}
- for (i = 0; i < KMULTI_CNT; i++)
+ for (i = 0; i < KMULTI_CNT; i++) {
ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs");
+ ASSERT_EQ(cookies[i], has_cookies ? kmulti_cookies[i] : 0,
+ "kmulti_cookies_value");
+ }
return 0;
}
static void verify_kmulti_invalid_user_buffer(int fd)
{
+ __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT];
struct bpf_link_info info;
__u32 len = sizeof(info);
- __u64 addrs[KMULTI_CNT];
int err, i;
memset(&info, 0, sizeof(info));
@@ -266,7 +325,20 @@ static void verify_kmulti_invalid_user_buffer(int fd)
info.kprobe_multi.count = KMULTI_CNT;
info.kprobe_multi.addrs = 0x1; /* invalid addr */
err = bpf_link_get_info_by_fd(fd, &info, &len);
- ASSERT_EQ(err, -EFAULT, "invalid_buff");
+ ASSERT_EQ(err, -EFAULT, "invalid_buff_addrs");
+
+ info.kprobe_multi.count = KMULTI_CNT;
+ info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ info.kprobe_multi.cookies = 0x1; /* invalid addr */
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "invalid_buff_cookies");
+
+ /* cookies && !count */
+ info.kprobe_multi.count = 0;
+ info.kprobe_multi.addrs = ptr_to_u64(NULL);
+ info.kprobe_multi.cookies = ptr_to_u64(cookies);
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "invalid_cookies_count");
}
static int symbols_cmp_r(const void *a, const void *b)
@@ -278,13 +350,15 @@ static int symbols_cmp_r(const void *a, const void *b)
}
static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel,
- bool retprobe, bool invalid)
+ bool retprobe, bool cookies,
+ bool invalid)
{
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
struct bpf_link *link;
int link_fd, err;
opts.syms = kmulti_syms;
+ opts.cookies = cookies ? kmulti_cookies : NULL;
opts.cnt = KMULTI_CNT;
opts.retprobe = retprobe;
link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts);
@@ -293,7 +367,7 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel,
link_fd = bpf_link__fd(link);
if (!invalid) {
- err = verify_kmulti_link_info(link_fd, retprobe);
+ err = verify_kmulti_link_info(link_fd, retprobe, cookies);
ASSERT_OK(err, "verify_kmulti_link_info");
} else {
verify_kmulti_invalid_user_buffer(link_fd);
@@ -513,6 +587,8 @@ void test_fill_link_info(void)
test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, true);
if (test__start_subtest("tracepoint_link_info"))
test_tp_fill_link_info(skel);
+ if (test__start_subtest("event_link_info"))
+ test_event_fill_link_info(skel);
uprobe_offset = get_uprobe_offset(&uprobe_func);
if (test__start_subtest("uprobe_link_info"))
@@ -523,12 +599,16 @@ void test_fill_link_info(void)
qsort(kmulti_syms, KMULTI_CNT, sizeof(kmulti_syms[0]), symbols_cmp_r);
for (i = 0; i < KMULTI_CNT; i++)
kmulti_addrs[i] = ksym_get_addr(kmulti_syms[i]);
- if (test__start_subtest("kprobe_multi_link_info"))
- test_kprobe_multi_fill_link_info(skel, false, false);
- if (test__start_subtest("kretprobe_multi_link_info"))
- test_kprobe_multi_fill_link_info(skel, true, false);
+ if (test__start_subtest("kprobe_multi_link_info")) {
+ test_kprobe_multi_fill_link_info(skel, false, false, false);
+ test_kprobe_multi_fill_link_info(skel, false, true, false);
+ }
+ if (test__start_subtest("kretprobe_multi_link_info")) {
+ test_kprobe_multi_fill_link_info(skel, true, false, false);
+ test_kprobe_multi_fill_link_info(skel, true, true, false);
+ }
if (test__start_subtest("kprobe_multi_invalid_ubuff"))
- test_kprobe_multi_fill_link_info(skel, true, true);
+ test_kprobe_multi_fill_link_info(skel, true, true, true);
if (test__start_subtest("uprobe_multi_link_info"))
test_uprobe_multi_fill_link_info(skel, false, false);
diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
index 57c814f5f6a7..8dd2af9081f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
+++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
@@ -59,9 +59,9 @@ static int setup_topology(bool ipv6)
/* Wait for up to 5s for links to come up */
for (i = 0; i < 5; ++i) {
if (ipv6)
- up = !system("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6 " &>/dev/null");
+ up = !SYS_NOFAIL("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6);
else
- up = !system("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR " &>/dev/null");
+ up = !SYS_NOFAIL("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR);
if (up)
break;
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
index bf84d4a1d9ae..3c440370c1f0 100644
--- a/tools/testing/selftests/bpf/prog_tests/iters.c
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -193,6 +193,7 @@ static void subtest_task_iters(void)
ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt");
ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt");
ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt");
+ ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt");
pthread_mutex_unlock(&do_nothing_mutex);
for (int i = 0; i < thread_num; i++)
ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join");
diff --git a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c
new file mode 100644
index 000000000000..7def158da9eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+
+#include "linux/filter.h"
+#include "kptr_xchg_inline.skel.h"
+
+void test_kptr_xchg_inline(void)
+{
+ struct kptr_xchg_inline *skel;
+ struct bpf_insn *insn = NULL;
+ struct bpf_insn exp;
+ unsigned int cnt;
+ int err;
+
+#if !(defined(__x86_64__) || defined(__aarch64__) || \
+ (defined(__riscv) && __riscv_xlen == 64))
+ test__skip();
+ return;
+#endif
+
+ skel = kptr_xchg_inline__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ err = get_xlated_program(bpf_program__fd(skel->progs.kptr_xchg_inline), &insn, &cnt);
+ if (!ASSERT_OK(err, "prog insn"))
+ goto out;
+
+ /* The original instructions are:
+ * r1 = map[id:xxx][0]+0
+ * r2 = 0
+ * call bpf_kptr_xchg#yyy
+ *
+ * call bpf_kptr_xchg#yyy will be inlined as:
+ * r0 = r2
+ * r0 = atomic64_xchg((u64 *)(r1 +0), r0)
+ */
+ if (!ASSERT_GT(cnt, 5, "insn cnt"))
+ goto out;
+
+ exp = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
+ if (!ASSERT_OK(memcmp(&insn[3], &exp, sizeof(exp)), "mov"))
+ goto out;
+
+ exp = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
+ if (!ASSERT_OK(memcmp(&insn[4], &exp, sizeof(exp)), "xchg"))
+ goto out;
+out:
+ free(insn);
+ kptr_xchg_inline__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
index 9f766ddd946a..4ed46ed58a7b 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
@@ -30,6 +30,8 @@ void test_libbpf_probe_prog_types(void)
if (prog_type == BPF_PROG_TYPE_UNSPEC)
continue;
+ if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0)
+ continue;
if (!test__start_subtest(prog_type_name))
continue;
@@ -68,6 +70,8 @@ void test_libbpf_probe_map_types(void)
if (map_type == BPF_MAP_TYPE_UNSPEC)
continue;
+ if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0)
+ continue;
if (!test__start_subtest(map_type_name))
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
index eb34d612d6f8..62ea855ec4d0 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -132,6 +132,9 @@ static void test_libbpf_bpf_map_type_str(void)
const char *map_type_str;
char buf[256];
+ if (map_type == __MAX_BPF_MAP_TYPE)
+ continue;
+
map_type_name = btf__str_by_offset(btf, e->name_off);
map_type_str = libbpf_bpf_map_type_str(map_type);
ASSERT_OK_PTR(map_type_str, map_type_name);
@@ -186,6 +189,9 @@ static void test_libbpf_bpf_prog_type_str(void)
const char *prog_type_str;
char buf[256];
+ if (prog_type == __MAX_BPF_PROG_TYPE)
+ continue;
+
prog_type_name = btf__str_by_offset(btf, e->name_off);
prog_type_str = libbpf_bpf_prog_type_str(prog_type);
ASSERT_OK_PTR(prog_type_str, prog_type_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
index 7a3fa2ff567b..90a98e23be61 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -169,9 +169,9 @@ void test_log_fixup(void)
if (test__start_subtest("bad_core_relo_trunc_none"))
bad_core_relo(0, TRUNC_NONE /* full buf */);
if (test__start_subtest("bad_core_relo_trunc_partial"))
- bad_core_relo(280, TRUNC_PARTIAL /* truncate original log a bit */);
+ bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */);
if (test__start_subtest("bad_core_relo_trunc_full"))
- bad_core_relo(220, TRUNC_FULL /* truncate also libbpf's message patch */);
+ bad_core_relo(240, TRUNC_FULL /* truncate also libbpf's message patch */);
if (test__start_subtest("bad_core_relo_subprog"))
bad_core_relo_subprog();
if (test__start_subtest("missing_map"))
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
index e9190574e79f..fb1eb8c67361 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
@@ -27,8 +27,6 @@
} \
})
-#define NETNS "ns_lwt"
-
static inline int netns_create(void)
{
return system("ip netns add " NETNS);
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
index 59b38569f310..835a1d756c16 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
@@ -54,6 +54,7 @@
#include <stdbool.h>
#include <stdlib.h>
+#define NETNS "ns_lwt_redirect"
#include "lwt_helpers.h"
#include "test_progs.h"
#include "network_helpers.h"
@@ -85,7 +86,7 @@ static void ping_dev(const char *dev, bool is_ingress)
snprintf(ip, sizeof(ip), "20.0.0.%d", link_index);
/* We won't get a reply. Don't fail here */
- SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1",
+ SYS_NOFAIL("ping %s -c1 -W1 -s %d",
ip, ICMP_PAYLOAD_SIZE);
}
@@ -203,6 +204,7 @@ static int setup_redirect_target(const char *target_dev, bool need_mac)
if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
goto fail;
+ SYS(fail, "sysctl -w net.ipv6.conf.all.disable_ipv6=1");
SYS(fail, "ip link add link_err type dummy");
SYS(fail, "ip link set lo up");
SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
index f4bb2d5fcae0..03825d2b45a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
@@ -48,6 +48,7 @@
* For case 2, force UDP packets to overflow fq limit. As long as kernel
* is not crashed, it is considered successful.
*/
+#define NETNS "ns_lwt_reroute"
#include "lwt_helpers.h"
#include "network_helpers.h"
#include <linux/net_tstamp.h>
@@ -63,7 +64,7 @@
static void ping_once(const char *ip)
{
/* We won't get a reply. Don't fail here */
- SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1",
+ SYS_NOFAIL("ping %s -c1 -W1 -s %d",
ip, ICMP_PAYLOAD_SIZE);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 7c0be7cf550b..8f8d792307c1 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -79,7 +79,7 @@ static void cleanup_netns(struct nstoken *nstoken)
if (nstoken)
close_netns(nstoken);
- SYS_NOFAIL("ip netns del %s &> /dev/null", NS_TEST);
+ SYS_NOFAIL("ip netns del %s", NS_TEST);
}
static int verify_tsk(int map_fd, int client_fd)
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
index 3f1f58d3a729..a1f7e7378a64 100644
--- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -29,6 +29,10 @@ static void test_success(void)
bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_lock, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_unlock, true);
err = rcu_read_lock__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto out;
@@ -75,6 +79,8 @@ static const char * const inproper_region_tests[] = {
"inproper_sleepable_helper",
"inproper_sleepable_kfunc",
"nested_rcu_region",
+ "rcu_read_lock_global_subprog_lock",
+ "rcu_read_lock_global_subprog_unlock",
};
static void test_inproper_region(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
new file mode 100644
index 000000000000..3405923fe4e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include "test_progs.h"
+#include "read_vsyscall.skel.h"
+
+#if defined(__x86_64__)
+/* For VSYSCALL_ADDR */
+#include <asm/vsyscall.h>
+#else
+/* To prevent build failure on non-x86 arch */
+#define VSYSCALL_ADDR 0UL
+#endif
+
+struct read_ret_desc {
+ const char *name;
+ int ret;
+} all_read[] = {
+ { .name = "probe_read_kernel", .ret = -ERANGE },
+ { .name = "probe_read_kernel_str", .ret = -ERANGE },
+ { .name = "probe_read", .ret = -ERANGE },
+ { .name = "probe_read_str", .ret = -ERANGE },
+ { .name = "probe_read_user", .ret = -EFAULT },
+ { .name = "probe_read_user_str", .ret = -EFAULT },
+ { .name = "copy_from_user", .ret = -EFAULT },
+ { .name = "copy_from_user_task", .ret = -EFAULT },
+};
+
+void test_read_vsyscall(void)
+{
+ struct read_vsyscall *skel;
+ unsigned int i;
+ int err;
+
+#if !defined(__x86_64__)
+ test__skip();
+ return;
+#endif
+ skel = read_vsyscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
+ return;
+
+ skel->bss->target_pid = getpid();
+ err = read_vsyscall__attach(skel);
+ if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
+ goto out;
+
+ /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE,
+ * but it doesn't affect the returned error codes.
+ */
+ skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
+ usleep(1);
+
+ for (i = 0; i < ARRAY_SIZE(all_read); i++)
+ ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name);
+out:
+ read_vsyscall__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 820d0bcfc474..eb74363f9f70 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -840,7 +840,7 @@ static int load_range_cmp_prog(struct range x, struct range y, enum op op,
.log_level = 2,
.log_buf = log_buf,
.log_size = log_sz,
- .prog_flags = BPF_F_TEST_REG_INVARIANTS,
+ .prog_flags = testing_prog_flags(),
);
/* ; skip exit block below
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
index b0583309a94e..9c11938fe597 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
@@ -214,7 +214,7 @@ void test_sock_destroy(void)
cleanup:
if (nstoken)
close_netns(nstoken);
- SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null");
+ SYS_NOFAIL("ip netns del " TEST_NS);
if (cgroup_fd >= 0)
close(cgroup_fd);
sock_destroy_prog__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index 0c365f36c73b..d56e18b25528 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -112,7 +112,7 @@ void test_sock_iter_batch(void)
{
struct nstoken *nstoken = NULL;
- SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null");
+ SYS_NOFAIL("ip netns del " TEST_NS);
SYS(done, "ip netns add %s", TEST_NS);
SYS(done, "ip -net %s link set dev lo up", TEST_NS);
@@ -131,5 +131,5 @@ void test_sock_iter_batch(void)
close_netns(nstoken);
done:
- SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null");
+ SYS_NOFAIL("ip netns del " TEST_NS);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index 18d451be57c8..2b0068742ef9 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -48,6 +48,8 @@ static struct {
{ "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" },
{ "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" },
{ "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" },
+ { "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" },
};
static int match_regex(const char *pattern, const char *string)
diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c
new file mode 100644
index 000000000000..a5cc593c1e1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_autocreate.skel.h"
+#include "struct_ops_autocreate2.skel.h"
+
+static void cant_load_full_object(void)
+{
+ struct struct_ops_autocreate *skel;
+ char *log = NULL;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open"))
+ return;
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+ /* The testmod_2 map BTF type (struct bpf_testmod_ops___v2) doesn't
+ * match the BTF of the actual struct bpf_testmod_ops defined in the
+ * kernel, so we should fail to load it if we don't disable autocreate
+ * for that map.
+ */
+ err = struct_ops_autocreate__load(skel);
+ log = stop_libbpf_log_capture();
+ if (!ASSERT_ERR(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log, "libbpf: struct_ops init_kern", "init_kern message");
+ ASSERT_EQ(err, -ENOTSUP, "errno should be ENOTSUP");
+
+cleanup:
+ free(log);
+ struct_ops_autocreate__destroy(skel);
+}
+
+static int check_test_1_link(struct struct_ops_autocreate *skel, struct bpf_map *map)
+{
+ struct bpf_link *link;
+ int err;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_1);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ return -1;
+
+ /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */
+ err = ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result");
+ bpf_link__destroy(link);
+ return err;
+}
+
+static void can_load_partial_object(void)
+{
+ struct struct_ops_autocreate *skel;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts"))
+ return;
+
+ err = bpf_map__set_autocreate(skel->maps.testmod_2, false);
+ if (!ASSERT_OK(err, "bpf_map__set_autocreate"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 default autoload");
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_2), "test_2 default autoload");
+
+ err = struct_ops_autocreate__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 actual autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.test_2), "test_2 actual autoload");
+
+ check_test_1_link(skel, skel->maps.testmod_1);
+
+cleanup:
+ struct_ops_autocreate__destroy(skel);
+}
+
+static void optional_maps(void)
+{
+ struct struct_ops_autocreate *skel;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open"))
+ return;
+
+ ASSERT_TRUE(bpf_map__autocreate(skel->maps.testmod_1), "testmod_1 autocreate");
+ ASSERT_TRUE(bpf_map__autocreate(skel->maps.testmod_2), "testmod_2 autocreate");
+ ASSERT_FALSE(bpf_map__autocreate(skel->maps.optional_map), "optional_map autocreate");
+ ASSERT_FALSE(bpf_map__autocreate(skel->maps.optional_map2), "optional_map2 autocreate");
+
+ err = bpf_map__set_autocreate(skel->maps.testmod_1, false);
+ err |= bpf_map__set_autocreate(skel->maps.testmod_2, false);
+ err |= bpf_map__set_autocreate(skel->maps.optional_map2, true);
+ if (!ASSERT_OK(err, "bpf_map__set_autocreate"))
+ goto cleanup;
+
+ err = struct_ops_autocreate__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ check_test_1_link(skel, skel->maps.optional_map2);
+
+cleanup:
+ struct_ops_autocreate__destroy(skel);
+}
+
+/* Swap test_mod1->test_1 program from 'bar' to 'foo' using shadow vars.
+ * test_mod1 load should enable autoload for 'foo'.
+ */
+static void autoload_and_shadow_vars(void)
+{
+ struct struct_ops_autocreate2 *skel = NULL;
+ struct bpf_link *link = NULL;
+ int err;
+
+ skel = struct_ops_autocreate2__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts"))
+ return;
+
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.foo), "foo default autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.bar), "bar default autoload");
+
+ /* loading map testmod_1 would switch foo's autoload to true */
+ skel->struct_ops.testmod_1->test_1 = skel->progs.foo;
+
+ err = struct_ops_autocreate2__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.foo), "foo actual autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.bar), "bar actual autoload");
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_1);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ goto cleanup;
+
+ /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */
+ err = ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ struct_ops_autocreate2__destroy(skel);
+}
+
+void test_struct_ops_autocreate(void)
+{
+ if (test__start_subtest("cant_load_full_object"))
+ cant_load_full_object();
+ if (test__start_subtest("can_load_partial_object"))
+ can_load_partial_object();
+ if (test__start_subtest("autoload_and_shadow_vars"))
+ autoload_and_shadow_vars();
+ if (test__start_subtest("optional_maps"))
+ optional_maps();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index ea8537c54413..c33c05161a9e 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -117,12 +117,6 @@ static void test_recursion(void)
ASSERT_OK(err, "lookup map_b");
ASSERT_EQ(value, 100, "map_b value");
- prog_fd = bpf_program__fd(skel->progs.on_lookup);
- memset(&info, 0, sizeof(info));
- err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
- ASSERT_OK(err, "get prog info");
- ASSERT_GT(info.recursion_misses, 0, "on_lookup prog recursion");
-
prog_fd = bpf_program__fd(skel->progs.on_update);
memset(&info, 0, sizeof(info));
err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 518f143c5b0f..dbe06aeaa2b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -188,6 +188,7 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
struct nstoken *nstoken = NULL;
char src_fwd_addr[IFADDR_STR_LEN+1] = {};
+ char src_addr[IFADDR_STR_LEN + 1] = {};
int err;
if (result->dev_mode == MODE_VETH) {
@@ -208,6 +209,9 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
if (get_ifaddr("src_fwd", src_fwd_addr))
goto fail;
+ if (get_ifaddr("src", src_addr))
+ goto fail;
+
result->ifindex_src = if_nametoindex("src");
if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
goto fail;
@@ -270,6 +274,13 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
+ SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
+ SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
+ SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
+ }
+
close_netns(nstoken);
/** setup in 'dst' namespace */
@@ -280,6 +291,7 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
SYS(fail, "ip link set dev dst up");
+ SYS(fail, "ip link set dev lo up");
SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
@@ -457,7 +469,7 @@ static int set_forwarding(bool enable)
return 0;
}
-static void rcv_tstamp(int fd, const char *expected, size_t s)
+static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
{
struct __kernel_timespec pkt_ts = {};
char ctl[CMSG_SPACE(sizeof(pkt_ts))];
@@ -478,7 +490,7 @@ static void rcv_tstamp(int fd, const char *expected, size_t s)
ret = recvmsg(fd, &msg, 0);
if (!ASSERT_EQ(ret, s, "recvmsg"))
- return;
+ return -1;
ASSERT_STRNEQ(data, expected, s, "expected rcv data");
cmsg = CMSG_FIRSTHDR(&msg);
@@ -487,6 +499,12 @@ static void rcv_tstamp(int fd, const char *expected, size_t s)
memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
+ if (tstamp) {
+ /* caller will check the tstamp itself */
+ *tstamp = pkt_ns;
+ return 0;
+ }
+
ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
ret = clock_gettime(CLOCK_REALTIME, &now_ts);
@@ -496,6 +514,60 @@ static void rcv_tstamp(int fd, const char *expected, size_t s)
if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
"check rcv tstamp");
+ return 0;
+}
+
+static void rcv_tstamp(int fd, const char *expected, size_t s)
+{
+ __rcv_tstamp(fd, expected, s, NULL);
+}
+
+static int wait_netstamp_needed_key(void)
+{
+ int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
+ char buf[] = "testing testing";
+ struct nstoken *nstoken;
+ __u64 tstamp = 0;
+
+ nstoken = open_netns(NS_DST);
+ if (!nstoken)
+ return -1;
+
+ srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
+ if (!ASSERT_GE(srv_fd, 0, "start_server"))
+ goto done;
+
+ err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
+ &opt, sizeof(opt));
+ if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
+ goto done;
+
+ cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
+ if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
+ goto done;
+
+again:
+ n = write(cli_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+ err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
+ if (!ASSERT_OK(err, "__rcv_tstamp"))
+ goto done;
+ if (!tstamp && nretries++ < 5) {
+ sleep(1);
+ printf("netstamp_needed_key retry#%d\n", nretries);
+ goto again;
+ }
+
+done:
+ if (!tstamp && srv_fd != -1) {
+ close(srv_fd);
+ srv_fd = -1;
+ }
+ if (cli_fd != -1)
+ close(cli_fd);
+ close_netns(nstoken);
+ return srv_fd;
}
static void snd_tstamp(int fd, char *b, size_t s)
@@ -832,11 +904,20 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
{
struct test_tc_dtime *skel;
struct nstoken *nstoken;
- int err;
+ int hold_tstamp_fd, err;
+
+ /* Hold a sk with the SOCK_TIMESTAMP set to ensure there
+ * is no delay in the kernel net_enable_timestamp().
+ * This ensures the following tests must have
+ * non zero rcv tstamp in the recvmsg().
+ */
+ hold_tstamp_fd = wait_netstamp_needed_key();
+ if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
+ return;
skel = test_tc_dtime__open();
if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
@@ -881,6 +962,7 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
done:
test_tc_dtime__destroy(skel);
+ close(hold_tstamp_fd);
}
static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c
new file mode 100644
index 000000000000..eaf441dc7e79
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_tcp_custom_syncookie.skel.h"
+
+static struct test_tcp_custom_syncookie_case {
+ int family, type;
+ char addr[16];
+ char name[10];
+} test_cases[] = {
+ {
+ .name = "IPv4 TCP",
+ .family = AF_INET,
+ .type = SOCK_STREAM,
+ .addr = "127.0.0.1",
+ },
+ {
+ .name = "IPv6 TCP",
+ .family = AF_INET6,
+ .type = SOCK_STREAM,
+ .addr = "::1",
+ },
+};
+
+static int setup_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "ip"))
+ goto err;
+
+ if (!ASSERT_OK(write_sysctl("/proc/sys/net/ipv4/tcp_ecn", "1"),
+ "write_sysctl"))
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int setup_tc(struct test_tcp_custom_syncookie *skel)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach,
+ .prog_fd = bpf_program__fd(skel->progs.tcp_custom_syncookie));
+
+ qdisc_lo.ifindex = if_nametoindex("lo");
+ if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact"))
+ goto err;
+
+ if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach),
+ "filter add dev lo ingress"))
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+#define msg "Hello World"
+#define msglen 11
+
+static void transfer_message(int sender, int receiver)
+{
+ char buf[msglen];
+ int ret;
+
+ ret = send(sender, msg, msglen, 0);
+ if (!ASSERT_EQ(ret, msglen, "send"))
+ return;
+
+ memset(buf, 0, sizeof(buf));
+
+ ret = recv(receiver, buf, msglen, 0);
+ if (!ASSERT_EQ(ret, msglen, "recv"))
+ return;
+
+ ret = strncmp(buf, msg, msglen);
+ if (!ASSERT_EQ(ret, 0, "strncmp"))
+ return;
+}
+
+static void create_connection(struct test_tcp_custom_syncookie_case *test_case)
+{
+ int server, client, child;
+
+ server = start_server(test_case->family, test_case->type, test_case->addr, 0, 0);
+ if (!ASSERT_NEQ(server, -1, "start_server"))
+ return;
+
+ client = connect_to_fd(server, 0);
+ if (!ASSERT_NEQ(client, -1, "connect_to_fd"))
+ goto close_server;
+
+ child = accept(server, NULL, 0);
+ if (!ASSERT_NEQ(child, -1, "accept"))
+ goto close_client;
+
+ transfer_message(client, child);
+ transfer_message(child, client);
+
+ close(child);
+close_client:
+ close(client);
+close_server:
+ close(server);
+}
+
+void test_tcp_custom_syncookie(void)
+{
+ struct test_tcp_custom_syncookie *skel;
+ int i;
+
+ if (setup_netns())
+ return;
+
+ skel = test_tcp_custom_syncookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (setup_tc(skel))
+ goto destroy_skel;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ if (!test__start_subtest(test_cases[i].name))
+ continue;
+
+ skel->bss->handled_syn = false;
+ skel->bss->handled_ack = false;
+
+ create_connection(&test_cases[i]);
+
+ ASSERT_EQ(skel->bss->handled_syn, true, "SYN is not handled at tc.");
+ ASSERT_EQ(skel->bss->handled_ack, true, "ACK is not handled at tc");
+ }
+
+destroy_skel:
+ system("tc qdisc del dev lo clsact");
+
+ test_tcp_custom_syncookie__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c
new file mode 100644
index 000000000000..01dc2613c8a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#include "struct_ops_maybe_null.skel.h"
+#include "struct_ops_maybe_null_fail.skel.h"
+
+/* Test that the verifier accepts a program that access a nullable pointer
+ * with a proper check.
+ */
+static void maybe_null(void)
+{
+ struct struct_ops_maybe_null *skel;
+
+ skel = struct_ops_maybe_null__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_and_load"))
+ return;
+
+ struct_ops_maybe_null__destroy(skel);
+}
+
+/* Test that the verifier rejects a program that access a nullable pointer
+ * without a check beforehand.
+ */
+static void maybe_null_fail(void)
+{
+ struct struct_ops_maybe_null_fail *skel;
+
+ skel = struct_ops_maybe_null_fail__open_and_load();
+ if (ASSERT_ERR_PTR(skel, "struct_ops_module_fail__open_and_load"))
+ return;
+
+ struct_ops_maybe_null_fail__destroy(skel);
+}
+
+void test_struct_ops_maybe_null(void)
+{
+ /* The verifier verifies the programs at load time, so testing both
+ * programs in the same compile-unit is complicated. We run them in
+ * separate objects to simplify the testing.
+ */
+ if (test__start_subtest("maybe_null"))
+ maybe_null();
+ if (test__start_subtest("maybe_null_fail"))
+ maybe_null_fail();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
new file mode 100644
index 000000000000..ee5372c7f2c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <time.h>
+
+#include "struct_ops_module.skel.h"
+
+static void check_map_info(struct bpf_map_info *info)
+{
+ struct bpf_btf_info btf_info;
+ char btf_name[256];
+ u32 btf_info_len = sizeof(btf_info);
+ int err, fd;
+
+ fd = bpf_btf_get_fd_by_id(info->btf_vmlinux_id);
+ if (!ASSERT_GE(fd, 0, "get_value_type_btf_obj_fd"))
+ return;
+
+ memset(&btf_info, 0, sizeof(btf_info));
+ btf_info.name = ptr_to_u64(btf_name);
+ btf_info.name_len = sizeof(btf_name);
+ err = bpf_btf_get_info_by_fd(fd, &btf_info, &btf_info_len);
+ if (!ASSERT_OK(err, "get_value_type_btf_obj_info"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(strcmp(btf_name, "bpf_testmod"), 0, "get_value_type_btf_obj_name"))
+ goto cleanup;
+
+cleanup:
+ close(fd);
+}
+
+static int attach_ops_and_check(struct struct_ops_module *skel,
+ struct bpf_map *map,
+ int expected_test_2_result)
+{
+ struct bpf_link *link;
+
+ link = bpf_map__attach_struct_ops(map);
+ ASSERT_OK_PTR(link, "attach_test_mod_1");
+ if (!link)
+ return -1;
+
+ /* test_{1,2}() would be called from bpf_dummy_reg() in bpf_testmod.c */
+ ASSERT_EQ(skel->bss->test_1_result, 0xdeadbeef, "test_1_result");
+ ASSERT_EQ(skel->bss->test_2_result, expected_test_2_result, "test_2_result");
+
+ bpf_link__destroy(link);
+ return 0;
+}
+
+static void test_struct_ops_load(void)
+{
+ struct struct_ops_module *skel;
+ struct bpf_map_info info = {};
+ int err;
+ u32 len;
+
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open"))
+ return;
+
+ skel->struct_ops.testmod_1->data = 13;
+ skel->struct_ops.testmod_1->test_2 = skel->progs.test_3;
+ /* Since test_2() is not being used, it should be disabled from
+ * auto-loading, or it will fail to load.
+ */
+ bpf_program__set_autoload(skel->progs.test_2, false);
+
+ err = struct_ops_module__load(skel);
+ if (!ASSERT_OK(err, "struct_ops_module_load"))
+ goto cleanup;
+
+ len = sizeof(info);
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.testmod_1), &info,
+ &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto cleanup;
+
+ check_map_info(&info);
+ /* test_3() will be called from bpf_dummy_reg() in bpf_testmod.c
+ *
+ * In bpf_testmod.c it will pass 4 and 13 (the value of data) to
+ * .test_2. So, the value of test_2_result should be 20 (4 + 13 +
+ * 3).
+ */
+ if (!attach_ops_and_check(skel, skel->maps.testmod_1, 20))
+ goto cleanup;
+ if (!attach_ops_and_check(skel, skel->maps.testmod_2, 12))
+ goto cleanup;
+
+cleanup:
+ struct_ops_module__destroy(skel);
+}
+
+void serial_test_struct_ops_module(void)
+{
+ if (test__start_subtest("test_struct_ops_load"))
+ test_struct_ops_load();
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c
new file mode 100644
index 000000000000..645d32b5160c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#include "struct_ops_multi_pages.skel.h"
+
+static void do_struct_ops_multi_pages(void)
+{
+ struct struct_ops_multi_pages *skel;
+ struct bpf_link *link;
+
+ /* The size of all trampolines of skel->maps.multi_pages should be
+ * over 1 page (at least for x86).
+ */
+ skel = struct_ops_multi_pages__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_multi_pages_open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.multi_pages);
+ ASSERT_OK_PTR(link, "attach_multi_pages");
+
+ bpf_link__destroy(link);
+ struct_ops_multi_pages__destroy(skel);
+}
+
+void test_struct_ops_multi_pages(void)
+{
+ if (test__start_subtest("multi_pages"))
+ do_struct_ops_multi_pages();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c
new file mode 100644
index 000000000000..106ea447965a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <testing_helpers.h>
+
+static void load_bpf_test_no_cfi(void)
+{
+ int fd;
+ int err;
+
+ fd = open("bpf_test_no_cfi.ko", O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open"))
+ return;
+
+ /* The module will try to register a struct_ops type without
+ * cfi_stubs and with cfi_stubs.
+ *
+ * The one without cfi_stub should fail. The module will be loaded
+ * successfully only if the result of the registration is as
+ * expected, or it fails.
+ */
+ err = finit_module(fd, "", 0);
+ close(fd);
+ if (!ASSERT_OK(err, "finit_module"))
+ return;
+
+ err = delete_module("bpf_test_no_cfi", 0);
+ ASSERT_OK(err, "delete_module");
+}
+
+void test_struct_ops_no_cfi(void)
+{
+ if (test__start_subtest("load_bpf_test_no_cfi"))
+ load_bpf_test_no_cfi();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index 2b3c6dd66259..5f1fb0a2ea56 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -118,9 +118,9 @@ fail:
static void cleanup(void)
{
SYS_NOFAIL("test -f /var/run/netns/at_ns0 && ip netns delete at_ns0");
- SYS_NOFAIL("ip link del veth1 2> /dev/null");
- SYS_NOFAIL("ip link del %s 2> /dev/null", VXLAN_TUNL_DEV1);
- SYS_NOFAIL("ip link del %s 2> /dev/null", IP6VXLAN_TUNL_DEV1);
+ SYS_NOFAIL("ip link del veth1");
+ SYS_NOFAIL("ip link del %s", VXLAN_TUNL_DEV1);
+ SYS_NOFAIL("ip link del %s", IP6VXLAN_TUNL_DEV1);
}
static int add_vxlan_tunnel(void)
@@ -265,9 +265,9 @@ fail:
static void delete_ipip_tunnel(void)
{
SYS_NOFAIL("ip -n at_ns0 link delete dev %s", IPIP_TUNL_DEV0);
- SYS_NOFAIL("ip -n at_ns0 fou del port 5555 2> /dev/null");
+ SYS_NOFAIL("ip -n at_ns0 fou del port 5555");
SYS_NOFAIL("ip link delete dev %s", IPIP_TUNL_DEV1);
- SYS_NOFAIL("ip fou del port 5555 2> /dev/null");
+ SYS_NOFAIL("ip fou del port 5555");
}
static int add_xfrm_tunnel(void)
@@ -346,13 +346,13 @@ fail:
static void delete_xfrm_tunnel(void)
{
- SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null",
+ SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32",
IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0);
- SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null",
+ SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32",
IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1);
- SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
+ SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d",
IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT);
- SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
+ SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d",
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 760ad96b4be0..d66687f1ee6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -4,10 +4,29 @@
#include "timer.skel.h"
#include "timer_failure.skel.h"
+#define NUM_THR 8
+
+static void *spin_lock_thread(void *arg)
+{
+ int i, err, prog_fd = *(int *)arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ for (i = 0; i < 10000; i++) {
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err") ||
+ !ASSERT_OK(topts.retval, "test_run_opts retval"))
+ break;
+ }
+
+ pthread_exit(arg);
+}
+
static int timer(struct timer *timer_skel)
{
- int err, prog_fd;
+ int i, err, prog_fd;
LIBBPF_OPTS(bpf_test_run_opts, topts);
+ pthread_t thread_id[NUM_THR];
+ void *ret;
err = timer__attach(timer_skel);
if (!ASSERT_OK(err, "timer_attach"))
@@ -43,6 +62,20 @@ static int timer(struct timer *timer_skel)
/* check that code paths completed */
ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
+ prog_fd = bpf_program__fd(timer_skel->progs.race);
+ for (i = 0; i < NUM_THR; i++) {
+ err = pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ break;
+ }
+
+ while (i) {
+ err = pthread_join(thread_id[--i], &ret);
+ if (ASSERT_OK(err, "pthread_join"))
+ ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join");
+ }
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
new file mode 100644
index 000000000000..fc4a175d8d76
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -0,0 +1,1052 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "cap_helpers.h"
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <linux/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/un.h>
+#include "priv_map.skel.h"
+#include "priv_prog.skel.h"
+#include "dummy_st_ops_success.skel.h"
+#include "token_lsm.skel.h"
+
+static inline int sys_mount(const char *dev_name, const char *dir_name,
+ const char *type, unsigned long flags,
+ const void *data)
+{
+ return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
+}
+
+static inline int sys_fsopen(const char *fsname, unsigned flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fspick(int dfd, const char *path, unsigned flags)
+{
+ return syscall(__NR_fspick, dfd, path, flags);
+}
+
+static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
+}
+
+static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
+{
+ return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
+}
+
+static inline int sys_move_mount(int from_dfd, const char *from_path,
+ int to_dfd, const char *to_path,
+ unsigned flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
+}
+
+static int drop_priv_caps(__u64 *old_caps)
+{
+ return cap_disable_effective((1ULL << CAP_BPF) |
+ (1ULL << CAP_PERFMON) |
+ (1ULL << CAP_NET_ADMIN) |
+ (1ULL << CAP_SYS_ADMIN), old_caps);
+}
+
+static int restore_priv_caps(__u64 old_caps)
+{
+ return cap_enable_effective(old_caps, NULL);
+}
+
+static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
+{
+ char buf[32];
+ int err;
+
+ if (!mask_str) {
+ if (mask == ~0ULL) {
+ mask_str = "any";
+ } else {
+ snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
+ mask_str = buf;
+ }
+ }
+
+ err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
+ mask_str, 0);
+ if (err < 0)
+ err = -errno;
+ return err;
+}
+
+#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
+
+struct bpffs_opts {
+ __u64 cmds;
+ __u64 maps;
+ __u64 progs;
+ __u64 attachs;
+ const char *cmds_str;
+ const char *maps_str;
+ const char *progs_str;
+ const char *attachs_str;
+};
+
+static int create_bpffs_fd(void)
+{
+ int fs_fd;
+
+ /* create VFS context */
+ fs_fd = sys_fsopen("bpf", 0);
+ ASSERT_GE(fs_fd, 0, "fs_fd");
+
+ return fs_fd;
+}
+
+static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
+{
+ int mnt_fd, err;
+
+ /* set up token delegation mount options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
+ if (!ASSERT_OK(err, "fs_cfg_cmds"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
+ if (!ASSERT_OK(err, "fs_cfg_maps"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
+ if (!ASSERT_OK(err, "fs_cfg_progs"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
+ if (!ASSERT_OK(err, "fs_cfg_attachs"))
+ return err;
+
+ /* instantiate FS object */
+ err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+ if (err < 0)
+ return -errno;
+
+ /* create O_PATH fd for detached mount */
+ mnt_fd = sys_fsmount(fs_fd, 0, 0);
+ if (err < 0)
+ return -errno;
+
+ return mnt_fd;
+}
+
+/* send FD over Unix domain (AF_UNIX) socket */
+static int sendfd(int sockfd, int fd)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ int fds[1] = { fd }, err;
+ char iobuf[1];
+ struct iovec io = {
+ .iov_base = iobuf,
+ .iov_len = sizeof(iobuf),
+ };
+ union {
+ char buf[CMSG_SPACE(sizeof(fds))];
+ struct cmsghdr align;
+ } u;
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
+
+ err = sendmsg(sockfd, &msg, 0);
+ if (err < 0)
+ err = -errno;
+ if (!ASSERT_EQ(err, 1, "sendmsg"))
+ return -EINVAL;
+
+ return 0;
+}
+
+/* receive FD over Unix domain (AF_UNIX) socket */
+static int recvfd(int sockfd, int *fd)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ int fds[1], err;
+ char iobuf[1];
+ struct iovec io = {
+ .iov_base = iobuf,
+ .iov_len = sizeof(iobuf),
+ };
+ union {
+ char buf[CMSG_SPACE(sizeof(fds))];
+ struct cmsghdr align;
+ } u;
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+
+ err = recvmsg(sockfd, &msg, 0);
+ if (err < 0)
+ err = -errno;
+ if (!ASSERT_EQ(err, 1, "recvmsg"))
+ return -EINVAL;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
+ !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
+ !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
+ !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
+ return -EINVAL;
+
+ memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
+ *fd = fds[0];
+
+ return 0;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+typedef int (*child_callback_fn)(int bpffs_fd, struct token_lsm *lsm_skel);
+
+static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
+{
+ int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1;
+ struct token_lsm *lsm_skel = NULL;
+
+ /* load and attach LSM "policy" before we go into unpriv userns */
+ lsm_skel = token_lsm__open_and_load();
+ if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel_load")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ lsm_skel->bss->my_pid = getpid();
+ err = token_lsm__attach(lsm_skel);
+ if (!ASSERT_OK(err, "lsm_skel_attach"))
+ goto cleanup;
+
+ /* setup userns with root mappings */
+ err = create_and_enter_userns();
+ if (!ASSERT_OK(err, "create_and_enter_userns"))
+ goto cleanup;
+
+ /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "create_mountns"))
+ goto cleanup;
+
+ err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
+ if (!ASSERT_OK(err, "remount_root"))
+ goto cleanup;
+
+ fs_fd = create_bpffs_fd();
+ if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* ensure unprivileged child cannot set delegation options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
+
+ /* pass BPF FS context object to parent */
+ err = sendfd(sock_fd, fs_fd);
+ if (!ASSERT_OK(err, "send_fs_fd"))
+ goto cleanup;
+ zclose(fs_fd);
+
+ /* avoid mucking around with mount namespaces and mounting at
+ * well-known path, just get detach-mounted BPF FS fd back from parent
+ */
+ err = recvfd(sock_fd, &mnt_fd);
+ if (!ASSERT_OK(err, "recv_mnt_fd"))
+ goto cleanup;
+
+ /* try to fspick() BPF FS and try to add some delegation options */
+ fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
+ if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* ensure unprivileged child cannot reconfigure to set delegation options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ zclose(fs_fd);
+
+ bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
+ if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* create BPF token FD and pass it to parent for some extra checks */
+ token_fd = bpf_token_create(bpffs_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "child_token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = sendfd(sock_fd, token_fd);
+ if (!ASSERT_OK(err, "send_token_fd"))
+ goto cleanup;
+ zclose(token_fd);
+
+ /* do custom test logic with customly set up BPF FS instance */
+ err = callback(bpffs_fd, lsm_skel);
+ if (!ASSERT_OK(err, "test_callback"))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ zclose(sock_fd);
+ zclose(mnt_fd);
+ zclose(fs_fd);
+ zclose(bpffs_fd);
+ zclose(token_fd);
+
+ lsm_skel->bss->my_pid = 0;
+ token_lsm__destroy(lsm_skel);
+
+ exit(-err);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
+{
+ int fs_fd = -1, mnt_fd = -1, token_fd = -1, err;
+
+ err = recvfd(sock_fd, &fs_fd);
+ if (!ASSERT_OK(err, "recv_bpffs_fd"))
+ goto cleanup;
+
+ mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
+ if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ zclose(fs_fd);
+
+ /* pass BPF FS context object to parent */
+ err = sendfd(sock_fd, mnt_fd);
+ if (!ASSERT_OK(err, "send_mnt_fd"))
+ goto cleanup;
+ zclose(mnt_fd);
+
+ /* receive BPF token FD back from child for some extra tests */
+ err = recvfd(sock_fd, &token_fd);
+ if (!ASSERT_OK(err, "recv_token_fd"))
+ goto cleanup;
+
+ err = wait_for_pid(child_pid);
+ ASSERT_OK(err, "waitpid_child");
+
+cleanup:
+ zclose(sock_fd);
+ zclose(fs_fd);
+ zclose(mnt_fd);
+ zclose(token_fd);
+
+ if (child_pid > 0)
+ (void)kill(child_pid, SIGKILL);
+}
+
+static void subtest_userns(struct bpffs_opts *bpffs_opts,
+ child_callback_fn child_cb)
+{
+ int sock_fds[2] = { -1, -1 };
+ int child_pid = 0, err;
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
+ if (!ASSERT_OK(err, "socketpair"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GE(child_pid, 0, "fork"))
+ goto cleanup;
+
+ if (child_pid == 0) {
+ zclose(sock_fds[0]);
+ return child(sock_fds[1], bpffs_opts, child_cb);
+
+ } else {
+ zclose(sock_fds[1]);
+ return parent(child_pid, bpffs_opts, sock_fds[0]);
+ }
+
+cleanup:
+ zclose(sock_fds[0]);
+ zclose(sock_fds[1]);
+ if (child_pid > 0)
+ (void)kill(child_pid, SIGKILL);
+}
+
+static int userns_map_create(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts);
+ int err, token_fd = -1, map_fd = -1;
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* while inside non-init userns, we need both a BPF token *and*
+ * CAP_BPF inside current userns to create privileged map; let's test
+ * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+ */
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* no token, no CAP_BPF -> fail */
+ map_opts.map_flags = 0;
+ map_opts.token_fd = 0;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* token without CAP_BPF -> fail */
+ map_opts.map_flags = BPF_F_TOKEN_FD;
+ map_opts.token_fd = token_fd;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+ err = restore_priv_caps(old_caps);
+ if (!ASSERT_OK(err, "restore_caps"))
+ goto cleanup;
+
+ /* CAP_BPF without token -> fail */
+ map_opts.map_flags = 0;
+ map_opts.token_fd = 0;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* finally, namespaced CAP_BPF + token -> success */
+ map_opts.map_flags = BPF_F_TOKEN_FD;
+ map_opts.token_fd = token_fd;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+cleanup:
+ zclose(token_fd);
+ zclose(map_fd);
+ return err;
+}
+
+static int userns_btf_load(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
+ int err, token_fd = -1, btf_fd = -1;
+ const void *raw_btf_data;
+ struct btf *btf = NULL;
+ __u32 raw_btf_size;
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* while inside non-init userns, we need both a BPF token *and*
+ * CAP_BPF inside current userns to create privileged map; let's test
+ * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+ */
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* setup a trivial BTF data to load to the kernel */
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ goto cleanup;
+
+ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
+ goto cleanup;
+
+ /* no token + no CAP_BPF -> failure */
+ btf_opts.btf_flags = 0;
+ btf_opts.token_fd = 0;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
+ goto cleanup;
+
+ /* token + no CAP_BPF -> failure */
+ btf_opts.btf_flags = BPF_F_TOKEN_FD;
+ btf_opts.token_fd = token_fd;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
+ goto cleanup;
+
+ /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+ err = restore_priv_caps(old_caps);
+ if (!ASSERT_OK(err, "restore_caps"))
+ goto cleanup;
+
+ /* token + CAP_BPF -> success */
+ btf_opts.btf_flags = BPF_F_TOKEN_FD;
+ btf_opts.token_fd = token_fd;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ btf__free(btf);
+ zclose(btf_fd);
+ zclose(token_fd);
+ return err;
+}
+
+static int userns_prog_load(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
+ int err, token_fd = -1, prog_fd = -1;
+ struct bpf_insn insns[] = {
+ /* bpf_jiffies64() requires CAP_BPF */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ /* bpf_get_current_task() requires CAP_PERFMON */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
+ /* r0 = 0; exit; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ size_t insn_cnt = ARRAY_SIZE(insns);
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* validate we can successfully load BPF program with token; this
+ * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
+ * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
+ * BPF token wired properly in a bunch of places in the kernel
+ */
+ prog_opts.prog_flags = BPF_F_TOKEN_FD;
+ prog_opts.token_fd = token_fd;
+ prog_opts.expected_attach_type = BPF_XDP;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ /* no token + caps -> failure */
+ prog_opts.prog_flags = 0;
+ prog_opts.token_fd = 0;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* no caps + token -> failure */
+ prog_opts.prog_flags = BPF_F_TOKEN_FD;
+ prog_opts.token_fd = token_fd;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ /* no caps + no token -> definitely a failure */
+ prog_opts.prog_flags = 0;
+ prog_opts.token_fd = 0;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ err = 0;
+cleanup:
+ zclose(prog_fd);
+ zclose(token_fd);
+ return err;
+}
+
+static int userns_obj_priv_map(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct priv_map *skel;
+ int err;
+
+ skel = priv_map__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ priv_map__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = priv_map__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = priv_map__load(skel);
+ priv_map__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct priv_prog *skel;
+ int err;
+
+ skel = priv_prog__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ priv_prog__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+
+ /* provide BPF token, but reject bpf_token_capable() with LSM */
+ lsm_skel->bss->reject_capable = true;
+ lsm_skel->bss->reject_cmd = false;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cap_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_token_lsm_reject_cap_load"))
+ return -EINVAL;
+
+ /* provide BPF token, but reject bpf_token_cmd() with LSM */
+ lsm_skel->bss->reject_capable = false;
+ lsm_skel->bss->reject_cmd = true;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cmd_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_token_lsm_reject_cmd_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
+ * which should cause struct_ops application to fail, as BTF won't be uploaded
+ * into the kernel, even if STRUCT_OPS programs themselves are allowed
+ */
+static int validate_struct_ops_load(int mnt_fd, bool expect_success)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (expect_success) {
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+ } else /* expect failure */ {
+ if (!ASSERT_ERR(err, "obj_token_path_load"))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int userns_obj_priv_btf_fail(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ return validate_struct_ops_load(mnt_fd, false /* should fail */);
+}
+
+static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ return validate_struct_ops_load(mnt_fd, true /* should succeed */);
+}
+
+#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
+#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
+
+static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ /* before we mount BPF FS with token delegation, struct_ops skeleton
+ * should fail to load
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
+ * token automatically and implicitly
+ */
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
+ if (!ASSERT_OK(err, "move_mount_bpffs"))
+ return -EINVAL;
+
+ /* disable implicit BPF token creation by setting
+ * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
+ */
+ err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
+ if (!ASSERT_OK(err, "setenv_token_path"))
+ return -EINVAL;
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
+ unsetenv(TOKEN_ENVVAR);
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+ unsetenv(TOKEN_ENVVAR);
+
+ /* now the same struct_ops skeleton should succeed thanks to libppf
+ * creating BPF token from /sys/fs/bpf mount point
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+ return -EINVAL;
+
+ dummy_st_ops_success__destroy(skel);
+
+ /* now disable implicit token through empty bpf_token_path, should fail */
+ opts.bpf_token_path = "";
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ /* before we mount BPF FS with token delegation, struct_ops skeleton
+ * should fail to load
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* mount custom BPF FS over custom location, so libbpf can't create
+ * BPF token implicitly, unless pointed to it through
+ * LIBBPF_BPF_TOKEN_PATH envvar
+ */
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
+ goto err_out;
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
+ if (!ASSERT_OK(err, "move_mount_bpffs"))
+ goto err_out;
+
+ /* even though we have BPF FS with delegation, it's not at default
+ * /sys/fs/bpf location, so we still fail to load until envvar is set up
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
+ dummy_st_ops_success__destroy(skel);
+ goto err_out;
+ }
+
+ err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
+ if (!ASSERT_OK(err, "setenv_token_path"))
+ goto err_out;
+
+ /* now the same struct_ops skeleton should succeed thanks to libppf
+ * creating BPF token from custom mount point
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+ goto err_out;
+
+ dummy_st_ops_success__destroy(skel);
+
+ /* now disable implicit token through empty bpf_token_path, envvar
+ * will be ignored, should fail
+ */
+ opts.bpf_token_path = "";
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+ goto err_out;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+ goto err_out;
+
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ unsetenv(TOKEN_ENVVAR);
+ return 0;
+err_out:
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ unsetenv(TOKEN_ENVVAR);
+ return -EINVAL;
+}
+
+#define bit(n) (1ULL << (n))
+
+void test_token(void)
+{
+ if (test__start_subtest("map_token")) {
+ struct bpffs_opts opts = {
+ .cmds_str = "map_create",
+ .maps_str = "stack",
+ };
+
+ subtest_userns(&opts, userns_map_create);
+ }
+ if (test__start_subtest("btf_token")) {
+ struct bpffs_opts opts = {
+ .cmds = 1ULL << BPF_BTF_LOAD,
+ };
+
+ subtest_userns(&opts, userns_btf_load);
+ }
+ if (test__start_subtest("prog_token")) {
+ struct bpffs_opts opts = {
+ .cmds_str = "PROG_LOAD",
+ .progs_str = "XDP",
+ .attachs_str = "xdp",
+ };
+
+ subtest_userns(&opts, userns_prog_load);
+ }
+ if (test__start_subtest("obj_priv_map")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_MAP_CREATE),
+ .maps = bit(BPF_MAP_TYPE_QUEUE),
+ };
+
+ subtest_userns(&opts, userns_obj_priv_map);
+ }
+ if (test__start_subtest("obj_priv_prog")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_PROG_LOAD),
+ .progs = bit(BPF_PROG_TYPE_KPROBE),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_prog);
+ }
+ if (test__start_subtest("obj_priv_btf_fail")) {
+ struct bpffs_opts opts = {
+ /* disallow BTF loading */
+ .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_btf_fail);
+ }
+ if (test__start_subtest("obj_priv_btf_success")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_btf_success);
+ }
+ if (test__start_subtest("obj_priv_implicit_token")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_implicit_token);
+ }
+ if (test__start_subtest("obj_priv_implicit_token_envvar")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
new file mode 100644
index 000000000000..a222df765bc3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "tracing_failure.skel.h"
+
+static void test_bpf_spin_lock(bool is_spin_lock)
+{
+ struct tracing_failure *skel;
+ int err;
+
+ skel = tracing_failure__open();
+ if (!ASSERT_OK_PTR(skel, "tracing_failure__open"))
+ return;
+
+ if (is_spin_lock)
+ bpf_program__set_autoload(skel->progs.test_spin_lock, true);
+ else
+ bpf_program__set_autoload(skel->progs.test_spin_unlock, true);
+
+ err = tracing_failure__load(skel);
+ if (!ASSERT_OK(err, "tracing_failure__load"))
+ goto out;
+
+ err = tracing_failure__attach(skel);
+ ASSERT_ERR(err, "tracing_failure__attach");
+
+out:
+ tracing_failure__destroy(skel);
+}
+
+void test_tracing_failure(void)
+{
+ if (test__start_subtest("bpf_spin_lock"))
+ test_bpf_spin_lock(true);
+ if (test__start_subtest("bpf_spin_unlock"))
+ test_bpf_spin_lock(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index d62c5bf00e71..985273832f89 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -4,6 +4,7 @@
#include "cap_helpers.h"
#include "verifier_and.skel.h"
+#include "verifier_arena.skel.h"
#include "verifier_array_access.skel.h"
#include "verifier_basic_stack.skel.h"
#include "verifier_bitfield_write.skel.h"
@@ -28,6 +29,7 @@
#include "verifier_div0.skel.h"
#include "verifier_div_overflow.skel.h"
#include "verifier_global_subprogs.skel.h"
+#include "verifier_global_ptr_args.skel.h"
#include "verifier_gotol.skel.h"
#include "verifier_helper_access_var_len.skel.h"
#include "verifier_helper_packet_access.skel.h"
@@ -117,6 +119,7 @@ static void run_tests_aux(const char *skel_name,
#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
void test_verifier_and(void) { RUN(verifier_and); }
+void test_verifier_arena(void) { RUN(verifier_arena); }
void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
void test_verifier_bounds(void) { RUN(verifier_bounds); }
@@ -140,6 +143,7 @@ void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_st
void test_verifier_div0(void) { RUN(verifier_div0); }
void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); }
void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); }
+void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); }
void test_verifier_gotol(void) { RUN(verifier_gotol); }
void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); }
void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index c3b45745cbcc..6d8b54124cb3 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -511,7 +511,7 @@ static void test_xdp_bonding_features(struct skeletons *skeletons)
if (!ASSERT_OK(err, "bond bpf_xdp_query"))
goto out;
- if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK,
+ if (!ASSERT_EQ(query_opts.feature_flags, 0,
"bond query_opts.feature_flags"))
goto out;
@@ -601,7 +601,7 @@ static void test_xdp_bonding_features(struct skeletons *skeletons)
if (!ASSERT_OK(err, "bond bpf_xdp_query"))
goto out;
- ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK,
+ ASSERT_EQ(query_opts.feature_flags, 0,
"bond query_opts.feature_flags");
out:
bpf_link__destroy(link);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdpwall.c b/tools/testing/selftests/bpf/prog_tests/xdpwall.c
index f3927829a55a..4599154c8e9b 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdpwall.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdpwall.c
@@ -9,7 +9,7 @@ void test_xdpwall(void)
struct xdpwall *skel;
skel = xdpwall__open_and_load();
- ASSERT_OK_PTR(skel, "Does LLMV have https://reviews.llvm.org/D109073?");
+ ASSERT_OK_PTR(skel, "Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5?");
xdpwall__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c
new file mode 100644
index 000000000000..b7bb712cacfd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_htab.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+} arena SEC(".maps");
+
+#include "bpf_arena_htab.h"
+
+void __arena *htab_for_user;
+bool skip = false;
+
+int zero = 0;
+
+SEC("syscall")
+int arena_htab_llvm(void *ctx)
+{
+#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM)
+ struct htab __arena *htab;
+ __u64 i;
+
+ htab = bpf_alloc(sizeof(*htab));
+ cast_kern(htab);
+ htab_init(htab);
+
+ /* first run. No old elems in the table */
+ for (i = zero; i < 1000; i++)
+ htab_update_elem(htab, i, i);
+
+ /* should replace all elems with new ones */
+ for (i = zero; i < 1000; i++)
+ htab_update_elem(htab, i, i);
+ cast_user(htab);
+ htab_for_user = htab;
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_htab_asm.c b/tools/testing/selftests/bpf/progs/arena_htab_asm.c
new file mode 100644
index 000000000000..6cd70ea12f0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_htab_asm.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_ARENA_FORCE_ASM
+#define arena_htab_llvm arena_htab_asm
+#include "arena_htab.c"
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
new file mode 100644
index 000000000000..cd35b8448435
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+#include "bpf_arena_alloc.h"
+#include "bpf_arena_list.h"
+
+struct elem {
+ struct arena_list_node node;
+ __u64 value;
+};
+
+struct arena_list_head __arena *list_head;
+int list_sum;
+int cnt;
+bool skip = false;
+
+#ifdef __BPF_FEATURE_ARENA_CAST
+long __arena arena_sum;
+int __arena test_val = 1;
+struct arena_list_head __arena global_head;
+#else
+long arena_sum SEC(".arena.1");
+int test_val SEC(".arena.1");
+#endif
+
+int zero;
+
+SEC("syscall")
+int arena_list_add(void *ctx)
+{
+#ifdef __BPF_FEATURE_ARENA_CAST
+ __u64 i;
+
+ list_head = &global_head;
+
+ for (i = zero; i < cnt; cond_break, i++) {
+ struct elem __arena *n = bpf_alloc(sizeof(*n));
+
+ test_val++;
+ n->value = i;
+ arena_sum += i;
+ list_add_head(&n->node, list_head);
+ }
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+int arena_list_del(void *ctx)
+{
+#ifdef __BPF_FEATURE_ARENA_CAST
+ struct elem __arena *n;
+ int sum = 0;
+
+ arena_sum = 0;
+ list_for_each_entry(n, list_head, node) {
+ sum += n->value;
+ arena_sum += n->value;
+ list_del(&n->node);
+ bpf_free(n);
+ }
+ list_sum = sum;
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/async_stack_depth.c b/tools/testing/selftests/bpf/progs/async_stack_depth.c
index 3517c0e01206..36734683acbd 100644
--- a/tools/testing/selftests/bpf/progs/async_stack_depth.c
+++ b/tools/testing/selftests/bpf/progs/async_stack_depth.c
@@ -30,7 +30,7 @@ static int bad_timer_cb(void *map, int *key, struct bpf_timer *timer)
}
SEC("tc")
-__failure __msg("combined stack size of 2 calls is 576. Too large")
+__failure __msg("combined stack size of 2 calls is")
int pseudo_call_check(struct __sk_buff *ctx)
{
struct hmap_elem *elem;
@@ -45,7 +45,7 @@ int pseudo_call_check(struct __sk_buff *ctx)
}
SEC("tc")
-__failure __msg("combined stack size of 2 calls is 608. Too large")
+__failure __msg("combined stack size of 2 calls is")
int async_call_root_check(struct __sk_buff *ctx)
{
struct hmap_elem *elem;
diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops.c b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
new file mode 100644
index 000000000000..b7e175cd0af0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1) { return 0; }
+
+SEC("struct_ops/test_2")
+int BPF_PROG(test_2) { return 0; }
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops2 testmod_2 = {
+ .test_1 = (void *)test_1
+};
diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops2.c b/tools/testing/selftests/bpf/progs/bad_struct_ops2.c
new file mode 100644
index 000000000000..64a95f6be86d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bad_struct_ops2.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* This is an unused struct_ops program, it lacks corresponding
+ * struct_ops map, which provides attachment information.
+ * W/o additional configuration attempt to load such
+ * BPF object file would fail.
+ */
+SEC("struct_ops/foo")
+void foo(void) {}
diff --git a/tools/testing/selftests/bpf/progs/bpf_compiler.h b/tools/testing/selftests/bpf/progs/bpf_compiler.h
new file mode 100644
index 000000000000..a7c343dc82e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_compiler.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_COMPILER_H__
+#define __BPF_COMPILER_H__
+
+#define DO_PRAGMA_(X) _Pragma(#X)
+
+#if __clang__
+#define __pragma_loop_unroll DO_PRAGMA_(clang loop unroll(enable))
+#else
+/* In GCC -funroll-loops, which is enabled with -O2, should have the
+ same impact than the loop-unroll-enable pragma above. */
+#define __pragma_loop_unroll
+#endif
+
+#if __clang__
+#define __pragma_loop_unroll_count(N) DO_PRAGMA_(clang loop unroll_count(N))
+#else
+#define __pragma_loop_unroll_count(N) DO_PRAGMA_(GCC unroll N)
+#endif
+
+#if __clang__
+#define __pragma_loop_unroll_full DO_PRAGMA_(clang loop unroll(full))
+#else
+#define __pragma_loop_unroll_full DO_PRAGMA_(GCC unroll 65534)
+#endif
+
+#if __clang__
+#define __pragma_loop_no_unroll DO_PRAGMA_(clang loop unroll(disable))
+#else
+#define __pragma_loop_no_unroll DO_PRAGMA_(GCC unroll 1)
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 2fd59970c43a..fb2f5513e29e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -80,7 +80,7 @@
#define __imm(name) [name]"i"(name)
#define __imm_const(name, expr) [name]"i"(expr)
#define __imm_addr(name) [name]"i"(&name)
-#define __imm_ptr(name) [name]"p"(&name)
+#define __imm_ptr(name) [name]"r"(&name)
#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
/* Magic constants used with __retval() */
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index e8bd4b7b5ef7..7001965d1cc3 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -51,9 +51,25 @@
#define ICSK_TIME_LOSS_PROBE 5
#define ICSK_TIME_REO_TIMEOUT 6
+#define ETH_ALEN 6
#define ETH_HLEN 14
+#define ETH_P_IP 0x0800
#define ETH_P_IPV6 0x86DD
+#define NEXTHDR_TCP 6
+
+#define TCPOPT_NOP 1
+#define TCPOPT_EOL 0
+#define TCPOPT_MSS 2
+#define TCPOPT_WINDOW 3
+#define TCPOPT_TIMESTAMP 8
+#define TCPOPT_SACK_PERM 4
+
+#define TCPOLEN_MSS 4
+#define TCPOLEN_WINDOW 3
+#define TCPOLEN_TIMESTAMP 10
+#define TCPOLEN_SACK_PERM 2
+
#define CHECKSUM_NONE 0
#define CHECKSUM_PARTIAL 3
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
index 610c2427fd93..3500e4b69ebe 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
@@ -27,32 +27,6 @@ bool is_cgroup1 = 0;
struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
-static void __on_lookup(struct cgroup *cgrp)
-{
- bpf_cgrp_storage_delete(&map_a, cgrp);
- bpf_cgrp_storage_delete(&map_b, cgrp);
-}
-
-SEC("fentry/bpf_local_storage_lookup")
-int BPF_PROG(on_lookup)
-{
- struct task_struct *task = bpf_get_current_task_btf();
- struct cgroup *cgrp;
-
- if (is_cgroup1) {
- cgrp = bpf_task_get_cgroup1(task, target_hid);
- if (!cgrp)
- return 0;
-
- __on_lookup(cgrp);
- bpf_cgroup_release(cgrp);
- return 0;
- }
-
- __on_lookup(task->cgroups->dfl_cgrp);
- return 0;
-}
-
static void __on_update(struct cgroup *cgrp)
{
long *ptr;
diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
index ca8aa2f116b3..2ef0e0c46d17 100644
--- a/tools/testing/selftests/bpf/progs/connect_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
@@ -28,8 +28,7 @@ int connect_unix_prog(struct bpf_sock_addr *ctx)
if (sa_kern->uaddrlen != unaddrlen)
return 0;
- sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
- bpf_core_type_id_kernel(struct sockaddr_un));
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index 0cd4aebb97cf..c705d8112a35 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -23,41 +23,42 @@ struct array_map {
__uint(max_entries, 1);
} __cpumask_map SEC(".maps");
-struct bpf_cpumask *bpf_cpumask_create(void) __ksym;
-void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym;
-struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym;
-u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym;
-u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
+struct bpf_cpumask *bpf_cpumask_create(void) __ksym __weak;
+void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym __weak;
+struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym __weak;
+u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym __weak;
+u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym __weak;
u32 bpf_cpumask_first_and(const struct cpumask *src1,
- const struct cpumask *src2) __ksym;
-void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
-void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
-bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym;
-bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
-bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
-void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym;
-void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym;
+ const struct cpumask *src2) __ksym __weak;
+void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym __weak;
bool bpf_cpumask_and(struct bpf_cpumask *cpumask,
const struct cpumask *src1,
- const struct cpumask *src2) __ksym;
+ const struct cpumask *src2) __ksym __weak;
void bpf_cpumask_or(struct bpf_cpumask *cpumask,
const struct cpumask *src1,
- const struct cpumask *src2) __ksym;
+ const struct cpumask *src2) __ksym __weak;
void bpf_cpumask_xor(struct bpf_cpumask *cpumask,
const struct cpumask *src1,
- const struct cpumask *src2) __ksym;
-bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym;
-bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym;
-bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym;
-bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym;
-bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym;
-void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym;
-u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym;
-u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym;
-u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym;
-
-void bpf_rcu_read_lock(void) __ksym;
-void bpf_rcu_read_unlock(void) __ksym;
+ const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym __weak;
+u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak;
+u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak;
+
+void bpf_rcu_read_lock(void) __ksym __weak;
+void bpf_rcu_read_unlock(void) __ksym __weak;
static inline const struct cpumask *cast(struct bpf_cpumask *cpumask)
{
diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
index 9c078f34bbb2..5a76754f846b 100644
--- a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
@@ -27,8 +27,7 @@ int getpeername_unix_prog(struct bpf_sock_addr *ctx)
if (sa_kern->uaddrlen != unaddrlen)
return 1;
- sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
- bpf_core_type_id_kernel(struct sockaddr_un));
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
index ac7145111497..7867113c696f 100644
--- a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
@@ -27,8 +27,7 @@ int getsockname_unix_prog(struct bpf_sock_addr *ctx)
if (sa_kern->uaddrlen != unaddrlen)
return 1;
- sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
- bpf_core_type_id_kernel(struct sockaddr_un));
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index fe971992e635..3db416606f2f 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -5,6 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_compiler.h"
#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
@@ -78,8 +79,8 @@ int iter_err_unsafe_asm_loop(const void *ctx)
"*(u32 *)(r1 + 0) = r6;" /* invalid */
:
: [it]"r"(&it),
- [small_arr]"p"(small_arr),
- [zero]"p"(zero),
+ [small_arr]"r"(small_arr),
+ [zero]"r"(zero),
__imm(bpf_iter_num_new),
__imm(bpf_iter_num_next),
__imm(bpf_iter_num_destroy)
@@ -183,7 +184,7 @@ int iter_pragma_unroll_loop(const void *ctx)
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 2);
-#pragma nounroll
+ __pragma_loop_no_unroll
for (i = 0; i < 3; i++) {
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
@@ -238,7 +239,7 @@ int iter_multiple_sequential_loops(const void *ctx)
bpf_iter_num_destroy(&it);
bpf_iter_num_new(&it, 0, 2);
-#pragma nounroll
+ __pragma_loop_no_unroll
for (i = 0; i < 3; i++) {
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c
index c9b4055cd410..e4d53e40ff20 100644
--- a/tools/testing/selftests/bpf/progs/iters_task.c
+++ b/tools/testing/selftests/bpf/progs/iters_task.c
@@ -10,7 +10,7 @@
char _license[] SEC("license") = "GPL";
pid_t target_pid;
-int procs_cnt, threads_cnt, proc_threads_cnt;
+int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
@@ -26,6 +26,16 @@ int iter_task_for_each_sleep(void *ctx)
procs_cnt = threads_cnt = proc_threads_cnt = 0;
bpf_rcu_read_lock();
+ bpf_for_each(task, pos, NULL, ~0U) {
+ /* Below instructions shouldn't be executed for invalid flags */
+ invalid_cnt++;
+ }
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) {
+ /* Below instructions shouldn't be executed for invalid task__nullable */
+ invalid_cnt++;
+ }
+
bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS)
if (pos->pid == target_pid)
procs_cnt++;
diff --git a/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
new file mode 100644
index 000000000000..2414ac20b6d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct bin_data {
+ char blob[32];
+};
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+private(kptr) struct bin_data __kptr * ptr;
+
+SEC("tc")
+__naked int kptr_xchg_inline(void)
+{
+ asm volatile (
+ "r1 = %[ptr] ll;"
+ "r2 = 0;"
+ "call %[bpf_kptr_xchg];"
+ "if r0 == 0 goto 1f;"
+ "r1 = r0;"
+ "r2 = 0;"
+ "call %[bpf_obj_drop_impl];"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(ptr),
+ __imm(bpf_kptr_xchg),
+ __imm(bpf_obj_drop_impl)
+ : __clobber_all
+ );
+}
+
+/* BTF FUNC records are not generated for kfuncs referenced
+ * from inline assembly. These records are necessary for
+ * libbpf to link the program. The function below is a hack
+ * to ensure that BTF FUNC records are generated.
+ */
+void __btf_root(void)
+{
+ bpf_obj_drop(NULL);
+}
diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c
index b35337926d66..0de0357f57cc 100644
--- a/tools/testing/selftests/bpf/progs/loop4.c
+++ b/tools/testing/selftests/bpf/progs/loop4.c
@@ -3,6 +3,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
char _license[] SEC("license") = "GPL";
SEC("socket")
@@ -10,7 +12,7 @@ int combinations(volatile struct __sk_buff* skb)
{
int ret = 0, i;
-#pragma nounroll
+ __pragma_loop_no_unroll
for (i = 0; i < 20; i++)
if (skb->len)
ret |= 1 << i;
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index 3325da17ec81..efaf622c28dd 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -316,7 +316,7 @@ struct lpm_trie {
} __attribute__((preserve_access_index));
struct lpm_key {
- struct bpf_lpm_trie_key trie_key;
+ struct bpf_lpm_trie_key_hdr trie_key;
__u32 data;
};
diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c
new file mode 100644
index 000000000000..9085be50f03b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_map.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __uint(max_entries, 1);
+ __type(value, __u32);
+} priv_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
new file mode 100644
index 000000000000..3c7b2b618c8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe")
+int kprobe_prog(void *ctx)
+{
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index de3b6e4e4d0a..6957d9f2805e 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -8,6 +8,7 @@
#include "profiler.h"
#include "err.h"
#include "bpf_experimental.h"
+#include "bpf_compiler.h"
#ifndef NULL
#define NULL 0
@@ -169,7 +170,7 @@ static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
int spid)
{
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
if (arr_struct->array[i].meta.pid == spid)
@@ -185,7 +186,7 @@ static INLINE void populate_ancestors(struct task_struct* task,
ancestors_data->num_ancestors = 0;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
parent = BPF_CORE_READ(parent, real_parent);
@@ -212,7 +213,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
size_t filepart_length;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
filepart_length =
@@ -261,7 +262,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
pids_cgrp_id___local);
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys_state* subsys =
@@ -402,7 +403,7 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
if (kill_data == NULL)
return 0;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
if (arr_struct->array[i].meta.pid == 0) {
@@ -482,7 +483,7 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
struct dentry* parent_dentry;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
filepart_length =
@@ -508,7 +509,7 @@ is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
{
struct dentry* parent_dentry;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
@@ -629,7 +630,7 @@ int raw_tracepoint__sched_process_exit(void* ctx)
struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 026d573ce179..86484f07e1d1 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -8,6 +8,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_compiler.h"
#define FUNCTION_NAME_LEN 64
#define FILE_NAME_LEN 128
@@ -298,11 +299,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
#if defined(USE_ITER)
/* no for loop, no unrolling */
#elif defined(NO_UNROLL)
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#elif defined(UNROLL_COUNT)
-#pragma clang loop unroll_count(UNROLL_COUNT)
+ __pragma_loop_unroll_count(UNROLL_COUNT)
#else
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
#endif /* NO_UNROLL */
/* Unwind python stack */
#ifdef USE_ITER
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 14fb01437fb8..ab3a532b7dd6 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -319,3 +319,123 @@ int cross_rcu_region(void *ctx)
bpf_rcu_read_unlock();
return 0;
}
+
+__noinline
+static int static_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog(NULL);
+}
+
+__noinline
+static int static_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog_lock(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog_lock(NULL);
+}
+
+__noinline
+static int static_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_unlock();
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog_unlock(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog_unlock(NULL);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ ret += static_subprog(ctx);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ ret += global_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ ret += static_subprog_lock(ctx);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ ret += global_subprog_lock(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += static_subprog_unlock(ctx);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_subprog_unlock(ret);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
new file mode 100644
index 000000000000..986f96687ae1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+int target_pid = 0;
+void *user_ptr = 0;
+int read_ret[8];
+
+char _license[] SEC("license") = "GPL";
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int do_probe_read(void *ctx)
+{
+ char buf[8];
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr);
+ read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr);
+ read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr);
+ read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr);
+ read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr);
+ read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
+
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int do_copy_from_user(void *ctx)
+{
+ char buf[8];
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr);
+ read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
+ bpf_get_current_task_btf(), 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
index 4dfbc8552558..1c7ab44bccfa 100644
--- a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
@@ -27,8 +27,7 @@ int recvmsg_unix_prog(struct bpf_sock_addr *ctx)
if (sa_kern->uaddrlen != unaddrlen)
return 1;
- sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
- bpf_core_type_id_kernel(struct sockaddr_un));
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS,
sizeof(SERVUN_ADDRESS) - 1) != 0)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
index 1f67e832666e..d8869b03dda9 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
@@ -28,8 +28,7 @@ int sendmsg_unix_prog(struct bpf_sock_addr *ctx)
if (sa_kern->uaddrlen != unaddrlen)
return 0;
- sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
- bpf_core_type_id_kernel(struct sockaddr_un));
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
index 3e745793b27a..46d6eb2a3b17 100644
--- a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
+++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
@@ -12,8 +12,6 @@ int cookie_found = 0;
__u64 cookie = 0;
__u32 omem = 0;
-void *bpf_rdonly_cast(void *, __u32) __ksym;
-
struct {
__uint(type, BPF_MAP_TYPE_SK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
@@ -29,7 +27,7 @@ int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
if (local_storage_ptr != local_storage)
return 0;
- sk = bpf_rdonly_cast(sk_ptr, bpf_core_type_id_kernel(struct sock));
+ sk = bpf_core_cast(sk_ptr, struct sock);
if (sk->sk_cookie.counter != cookie)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index ffbbfe1fa1c1..96531b0d9d55 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -32,7 +32,7 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
if (!sk)
return 0;
- sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock));
+ sk = bpf_core_cast(sk, struct sock);
if (sk->sk_family != AF_INET6 ||
sk->sk_state != TCP_LISTEN ||
!ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
@@ -68,7 +68,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
if (!sk)
return 0;
- sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock));
+ sk = bpf_core_cast(sk, struct sock);
if (sk->sk_family != AF_INET6 ||
!ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
return 0;
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 40df2cc26eaf..f74459eead26 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -10,6 +10,8 @@
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
typedef uint32_t pid_t;
struct task_struct {};
@@ -419,9 +421,9 @@ static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
if (i >= map.cnt)
@@ -560,25 +562,25 @@ static void *read_strobe_meta(struct task_struct *task,
payload_off = sizeof(data->payload);
#else
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
+ __pragma_loop_unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_INTS; ++i) {
read_int_var(cfg, i, tls_base, &value, data);
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
+ __pragma_loop_unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
+ __pragma_loop_unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c
new file mode 100644
index 000000000000..ba10c3896213
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1)
+{
+ test_1_result = 42;
+ return 0;
+}
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_2)
+{
+ return 0;
+}
+
+struct bpf_testmod_ops___v1 {
+ int (*test_1)(void);
+};
+
+struct bpf_testmod_ops___v2 {
+ int (*test_1)(void);
+ int (*does_not_exist)(void);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v1 testmod_1 = {
+ .test_1 = (void *)test_1
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v2 testmod_2 = {
+ .test_1 = (void *)test_1,
+ .does_not_exist = (void *)test_2
+};
+
+SEC("?.struct_ops")
+struct bpf_testmod_ops___v1 optional_map = {
+ .test_1 = (void *)test_1,
+};
+
+SEC("?.struct_ops.link")
+struct bpf_testmod_ops___v1 optional_map2 = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c
new file mode 100644
index 000000000000..6049d9c902d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+
+SEC("?struct_ops/test_1")
+int BPF_PROG(foo)
+{
+ test_1_result = 42;
+ return 0;
+}
+
+SEC("?struct_ops/test_1")
+int BPF_PROG(bar)
+{
+ test_1_result = 24;
+ return 0;
+}
+
+struct bpf_testmod_ops {
+ int (*test_1)(void);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)bar
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
new file mode 100644
index 000000000000..b450f72e744a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t tgid = 0;
+
+/* This is a test BPF program that uses struct_ops to access an argument
+ * that may be NULL. This is a test for the verifier to ensure that it can
+ * rip PTR_MAYBE_NULL correctly.
+ */
+SEC("struct_ops/test_maybe_null")
+int BPF_PROG(test_maybe_null, int dummy,
+ struct task_struct *task)
+{
+ if (task)
+ tgid = task->tgid;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_maybe_null = (void *)test_maybe_null,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
new file mode 100644
index 000000000000..6283099ec383
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t tgid = 0;
+
+SEC("struct_ops/test_maybe_null_struct_ptr")
+int BPF_PROG(test_maybe_null_struct_ptr, int dummy,
+ struct task_struct *task)
+{
+ tgid = task->tgid;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_struct_ptr = {
+ .test_maybe_null = (void *)test_maybe_null_struct_ptr,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
new file mode 100644
index 000000000000..026cabfa7f1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+int test_2_result = 0;
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1)
+{
+ test_1_result = 0xdeadbeef;
+ return 0;
+}
+
+SEC("struct_ops/test_2")
+void BPF_PROG(test_2, int a, int b)
+{
+ test_2_result = a + b;
+}
+
+SEC("struct_ops/test_3")
+int BPF_PROG(test_3, int a, int b)
+{
+ test_2_result = a + b + 3;
+ return a + b + 3;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+ .data = 0x1,
+};
+
+SEC("struct_ops/test_2")
+void BPF_PROG(test_2_v2, int a, int b)
+{
+ test_2_result = a * b;
+}
+
+struct bpf_testmod_ops___v2 {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v2 testmod_2 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2_v2,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
new file mode 100644
index 000000000000..9efcc6e4d356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define TRAMP(x) \
+ SEC("struct_ops/tramp_" #x) \
+ int BPF_PROG(tramp_ ## x, int a) \
+ { \
+ return a; \
+ }
+
+TRAMP(1)
+TRAMP(2)
+TRAMP(3)
+TRAMP(4)
+TRAMP(5)
+TRAMP(6)
+TRAMP(7)
+TRAMP(8)
+TRAMP(9)
+TRAMP(10)
+TRAMP(11)
+TRAMP(12)
+TRAMP(13)
+TRAMP(14)
+TRAMP(15)
+TRAMP(16)
+TRAMP(17)
+TRAMP(18)
+TRAMP(19)
+TRAMP(20)
+TRAMP(21)
+TRAMP(22)
+TRAMP(23)
+TRAMP(24)
+TRAMP(25)
+TRAMP(26)
+TRAMP(27)
+TRAMP(28)
+TRAMP(29)
+TRAMP(30)
+TRAMP(31)
+TRAMP(32)
+TRAMP(33)
+TRAMP(34)
+TRAMP(35)
+TRAMP(36)
+TRAMP(37)
+TRAMP(38)
+TRAMP(39)
+TRAMP(40)
+
+#define F_TRAMP(x) .tramp_ ## x = (void *)tramp_ ## x
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops multi_pages = {
+ F_TRAMP(1),
+ F_TRAMP(2),
+ F_TRAMP(3),
+ F_TRAMP(4),
+ F_TRAMP(5),
+ F_TRAMP(6),
+ F_TRAMP(7),
+ F_TRAMP(8),
+ F_TRAMP(9),
+ F_TRAMP(10),
+ F_TRAMP(11),
+ F_TRAMP(12),
+ F_TRAMP(13),
+ F_TRAMP(14),
+ F_TRAMP(15),
+ F_TRAMP(16),
+ F_TRAMP(17),
+ F_TRAMP(18),
+ F_TRAMP(19),
+ F_TRAMP(20),
+ F_TRAMP(21),
+ F_TRAMP(22),
+ F_TRAMP(23),
+ F_TRAMP(24),
+ F_TRAMP(25),
+ F_TRAMP(26),
+ F_TRAMP(27),
+ F_TRAMP(28),
+ F_TRAMP(29),
+ F_TRAMP(30),
+ F_TRAMP(31),
+ F_TRAMP(32),
+ F_TRAMP(33),
+ F_TRAMP(34),
+ F_TRAMP(35),
+ F_TRAMP(36),
+ F_TRAMP(37),
+ F_TRAMP(38),
+ F_TRAMP(39),
+ F_TRAMP(40),
+};
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
index 4542dc683b44..f1853c38aada 100644
--- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -27,23 +27,6 @@ struct {
__type(value, long);
} map_b SEC(".maps");
-SEC("fentry/bpf_local_storage_lookup")
-int BPF_PROG(on_lookup)
-{
- struct task_struct *task = bpf_get_current_task_btf();
-
- if (!test_pid || task->pid != test_pid)
- return 0;
-
- /* The bpf_task_storage_delete will call
- * bpf_local_storage_lookup. The prog->active will
- * stop the recursion.
- */
- bpf_task_storage_delete(&map_a, task);
- bpf_task_storage_delete(&map_b, task);
- return 0;
-}
-
SEC("fentry/bpf_local_storage_update")
int BPF_PROG(on_update)
{
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 66b304982245..683c8aaa63da 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -20,8 +20,11 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
#include "test_cls_redirect.h"
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+
#ifdef SUBPROGS
#define INLINING __noinline
#else
@@ -267,7 +270,7 @@ static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
uint32_t acc = 0;
uint16_t *ipw = (uint16_t *)iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) {
acc += ipw[i];
}
@@ -294,7 +297,7 @@ bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
};
*is_fragment = false;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 6; i++) {
switch (exthdr.next) {
case IPPROTO_FRAGMENT:
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
index f41c81212ee9..da54c09e9a15 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -23,6 +23,8 @@
#include "test_cls_redirect.h"
#include "bpf_kfuncs.h"
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
index 22aba3f6e344..6fc8b9d66e34 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
@@ -80,7 +80,7 @@ int test_core_type_id(void *ctx)
* to detect whether this test has to be executed, however strange
* that might look like.
*
- * [0] https://reviews.llvm.org/D85174
+ * [0] https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99
*/
#if __has_builtin(__builtin_preserve_type_info)
struct core_reloc_type_id_output *out = (void *)&data.out;
diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
index 69509f8bb680..6afa834756e9 100644
--- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c
+++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
@@ -33,6 +33,12 @@ int BPF_PROG(tp_run)
return 0;
}
+SEC("perf_event")
+int event_run(void *ctx)
+{
+ return 0;
+}
+
SEC("kprobe.multi")
int BPF_PROG(kmulti_run)
{
diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c
index 17a9f59bf5f3..fc69ff18880d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func1.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func1.c
@@ -5,7 +5,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-#define MAX_STACK (512 - 3 * 32 + 8)
+#define MAX_STACK 260
static __attribute__ ((noinline))
int f0(int var, struct __sk_buff *skb)
@@ -30,6 +30,10 @@ int f3(int, struct __sk_buff *skb, int);
__attribute__ ((noinline))
int f2(int val, struct __sk_buff *skb)
{
+ volatile char buf[MAX_STACK] = {};
+
+ __sink(buf[MAX_STACK - 1]);
+
return f1(skb) + f3(val, skb, 1);
}
@@ -44,7 +48,7 @@ int f3(int val, struct __sk_buff *skb, int var)
}
SEC("tc")
-__failure __msg("combined stack size of 4 calls is 544")
+__failure __msg("combined stack size of 3 calls is")
int global_func1(struct __sk_buff *skb)
{
return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
index 9a06e5eb1fbe..143c8a4852bf 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
@@ -26,6 +26,23 @@ int kprobe_typedef_ctx(void *ctx)
return kprobe_typedef_ctx_subprog(ctx);
}
+/* s390x defines:
+ *
+ * typedef user_pt_regs bpf_user_pt_regs_t;
+ * typedef struct { ... } user_pt_regs;
+ *
+ * And so "canonical" underlying struct type is anonymous.
+ * So on s390x only valid ways to have PTR_TO_CTX argument in global subprogs
+ * are:
+ * - bpf_user_pt_regs_t *ctx (typedef);
+ * - struct bpf_user_pt_regs_t *ctx (backwards compatible struct hack);
+ * - void *ctx __arg_ctx (arg:ctx tag)
+ *
+ * Other architectures also allow using underlying struct types (e.g.,
+ * `struct pt_regs *ctx` for x86-64)
+ */
+#ifndef bpf_target_s390
+
#define pt_regs_struct_t typeof(*(__PT_REGS_CAST((struct pt_regs *)NULL)))
__weak int kprobe_struct_ctx_subprog(pt_regs_struct_t *ctx)
@@ -40,6 +57,8 @@ int kprobe_resolved_ctx(void *ctx)
return kprobe_struct_ctx_subprog(ctx);
}
+#endif
+
/* this is current hack to make this work on old kernels */
struct bpf_user_pt_regs_t {};
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
index 48ff2b2ad5e7..fed66f36adb6 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -6,6 +6,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
+
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
@@ -131,7 +133,7 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
*pad_off = 0;
// we can only go as far as ~10 TLVs due to the BPF max stack size
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 10; i++) {
struct sr6_tlv_t tlv;
@@ -302,7 +304,7 @@ int __encap_srh(struct __sk_buff *skb)
seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (unsigned long long lo = 0; lo < 4; lo++) {
seg->lo = bpf_cpu_to_be64(4 - lo);
seg->hi = bpf_cpu_to_be64(hi);
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
index f416032ba858..b295f9b721bf 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
@@ -21,6 +21,32 @@ struct {
__type(value, __u32);
} mim_hash SEC(".maps");
+/* The following three maps are used to test
+ * perf_event_array map can be an inner
+ * map of hash/array_of_maps.
+ */
+struct perf_event_array {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+} inner_map0 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __array(values, struct perf_event_array);
+} mim_array_pe SEC(".maps") = {
+ .values = {&inner_map0}};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __array(values, struct perf_event_array);
+} mim_hash_pe SEC(".maps") = {
+ .values = {&inner_map0}};
+
SEC("xdp")
int xdp_mimtest0(struct xdp_md *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
index 4bdd65b5aa2d..2fdc44e76624 100644
--- a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
+++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
@@ -6,13 +6,13 @@
char tp_name[128];
-SEC("lsm/bpf")
+SEC("lsm.s/bpf")
int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
{
switch (cmd) {
case BPF_RAW_TRACEPOINT_OPEN:
- bpf_probe_read_user_str(tp_name, sizeof(tp_name) - 1,
- (void *)attr->raw_tracepoint.name);
+ bpf_copy_from_user(tp_name, sizeof(tp_name) - 1,
+ (void *)attr->raw_tracepoint.name);
break;
default:
break;
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
index a7278f064368..5059050f74f6 100644
--- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -6,6 +6,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
+
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
@@ -134,7 +136,7 @@ static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb,
// we can only go as far as ~10 TLVs due to the BPF max stack size
// workaround: define induction variable "i" as "long" instead
// of "int" to prevent alu32 sub-register spilling.
- #pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (long i = 0; i < 100; i++) {
struct sr6_tlv_t tlv;
diff --git a/tools/testing/selftests/bpf/progs/test_siphash.h b/tools/testing/selftests/bpf/progs/test_siphash.h
new file mode 100644
index 000000000000..5d3a7ec36780
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_siphash.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#ifndef _TEST_SIPHASH_H
+#define _TEST_SIPHASH_H
+
+/* include/linux/bitops.h */
+static inline u64 rol64(u64 word, unsigned int shift)
+{
+ return (word << (shift & 63)) | (word >> ((-shift) & 63));
+}
+
+/* include/linux/siphash.h */
+#define SIPHASH_PERMUTATION(a, b, c, d) ( \
+ (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \
+ (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \
+ (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \
+ (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32))
+
+#define SIPHASH_CONST_0 0x736f6d6570736575ULL
+#define SIPHASH_CONST_1 0x646f72616e646f6dULL
+#define SIPHASH_CONST_2 0x6c7967656e657261ULL
+#define SIPHASH_CONST_3 0x7465646279746573ULL
+
+/* lib/siphash.c */
+#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3)
+
+#define PREAMBLE(len) \
+ u64 v0 = SIPHASH_CONST_0; \
+ u64 v1 = SIPHASH_CONST_1; \
+ u64 v2 = SIPHASH_CONST_2; \
+ u64 v3 = SIPHASH_CONST_3; \
+ u64 b = ((u64)(len)) << 56; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+ v1 ^= key->key[1]; \
+ v0 ^= key->key[0];
+
+#define POSTAMBLE \
+ v3 ^= b; \
+ SIPROUND; \
+ SIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
+static inline u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
+{
+ PREAMBLE(16)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ POSTAMBLE
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index c482110cfc95..a724a70c6700 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -3,12 +3,14 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
char _license[] SEC("license") = "GPL";
SEC("tc")
int process(struct __sk_buff *skb)
{
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 5; i++) {
if (skb->cb[i] != i + 1)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index b2440a0ff422..d8d77bdffd3d 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -101,4 +101,69 @@ int bpf_spin_lock_test(struct __sk_buff *skb)
err:
return err;
}
+
+struct bpf_spin_lock lockA __hidden SEC(".data.A");
+
+__noinline
+static int static_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ return ret;
+ return ret + ctx->len;
+}
+
+__noinline
+static int static_subprog_lock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ ret = static_subprog(ctx);
+ bpf_spin_lock(&lockA);
+ return ret + ctx->len;
+}
+
+__noinline
+static int static_subprog_unlock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ ret = static_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret + ctx->len;
+}
+
+SEC("tc")
+int lock_static_subprog_call(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = static_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("tc")
+int lock_static_subprog_lock(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ ret = static_subprog_lock(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("tc")
+int lock_static_subprog_unlock(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ ret = static_subprog_unlock(ctx);
+ return ret;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
index 86cd183ef6dc..43f40c4fe241 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
@@ -201,4 +201,48 @@ CHECK(innermapval_mapval, &iv->lock, &v->lock);
#undef CHECK
+__noinline
+int global_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ ret += ctx->protocol;
+ return ret + ctx->mark;
+}
+
+__noinline
+static int static_subprog_call_global(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ return ret;
+ return ret + ctx->len + global_subprog(ctx);
+}
+
+SEC("?tc")
+int lock_global_subprog_call1(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?tc")
+int lock_global_subprog_call2(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = static_subprog_call_global(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 553a282d816a..7f74077d6622 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
@@ -30,7 +32,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -59,7 +61,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index 2b64bc563a12..68a75436e8af 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
@@ -30,7 +32,7 @@ static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 5489823c83fc..efc3c61f7852 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
#define MAX_ULONG_STR_LEN 0xF
@@ -31,7 +33,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index e6e678aa9874..404124a93892 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -19,6 +19,9 @@
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
static const int cfg_port = 8000;
@@ -81,7 +84,7 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
csum += *iph16++;
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
new file mode 100644
index 000000000000..c8e4553648bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
+#include "bpf_kfuncs.h"
+#include "test_siphash.h"
+#include "test_tcp_custom_syncookie.h"
+
+#define MAX_PACKET_OFF 0xffff
+
+/* Hash is calculated for each client and split into ISN and TS.
+ *
+ * MSB LSB
+ * ISN: | 31 ... 8 | 7 6 | 5 | 4 | 3 2 1 0 |
+ * | Hash_1 | MSS | ECN | SACK | WScale |
+ *
+ * TS: | 31 ... 8 | 7 ... 0 |
+ * | Random | Hash_2 |
+ */
+#define COOKIE_BITS 8
+#define COOKIE_MASK (((__u32)1 << COOKIE_BITS) - 1)
+
+enum {
+ /* 0xf is invalid thus means that SYN did not have WScale. */
+ BPF_SYNCOOKIE_WSCALE_MASK = (1 << 4) - 1,
+ BPF_SYNCOOKIE_SACK = (1 << 4),
+ BPF_SYNCOOKIE_ECN = (1 << 5),
+};
+
+#define MSS_LOCAL_IPV4 65495
+#define MSS_LOCAL_IPV6 65476
+
+const __u16 msstab4[] = {
+ 536,
+ 1300,
+ 1460,
+ MSS_LOCAL_IPV4,
+};
+
+const __u16 msstab6[] = {
+ 1280 - 60, /* IPV6_MIN_MTU - 60 */
+ 1480 - 60,
+ 9000 - 60,
+ MSS_LOCAL_IPV6,
+};
+
+static siphash_key_t test_key_siphash = {
+ { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }
+};
+
+struct tcp_syncookie {
+ struct __sk_buff *skb;
+ void *data;
+ void *data_end;
+ struct ethhdr *eth;
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ struct tcphdr *tcp;
+ __be32 *ptr32;
+ struct bpf_tcp_req_attrs attrs;
+ u32 off;
+ u32 cookie;
+ u64 first;
+};
+
+bool handled_syn, handled_ack;
+
+static int tcp_load_headers(struct tcp_syncookie *ctx)
+{
+ ctx->data = (void *)(long)ctx->skb->data;
+ ctx->data_end = (void *)(long)ctx->skb->data_end;
+ ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
+
+ if (ctx->eth + 1 > ctx->data_end)
+ goto err;
+
+ switch (bpf_ntohs(ctx->eth->h_proto)) {
+ case ETH_P_IP:
+ ctx->ipv4 = (struct iphdr *)(ctx->eth + 1);
+
+ if (ctx->ipv4 + 1 > ctx->data_end)
+ goto err;
+
+ if (ctx->ipv4->ihl != sizeof(*ctx->ipv4) / 4)
+ goto err;
+
+ if (ctx->ipv4->version != 4)
+ goto err;
+
+ if (ctx->ipv4->protocol != IPPROTO_TCP)
+ goto err;
+
+ ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1);
+ break;
+ case ETH_P_IPV6:
+ ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1);
+
+ if (ctx->ipv6 + 1 > ctx->data_end)
+ goto err;
+
+ if (ctx->ipv6->version != 6)
+ goto err;
+
+ if (ctx->ipv6->nexthdr != NEXTHDR_TCP)
+ goto err;
+
+ ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1);
+ break;
+ default:
+ goto err;
+ }
+
+ if (ctx->tcp + 1 > ctx->data_end)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int tcp_reload_headers(struct tcp_syncookie *ctx)
+{
+ /* Without volatile,
+ * R3 32-bit pointer arithmetic prohibited
+ */
+ volatile u64 data_len = ctx->skb->data_end - ctx->skb->data;
+
+ if (ctx->tcp->doff < sizeof(*ctx->tcp) / 4)
+ goto err;
+
+ /* Needed to calculate csum and parse TCP options. */
+ if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0))
+ goto err;
+
+ ctx->data = (void *)(long)ctx->skb->data;
+ ctx->data_end = (void *)(long)ctx->skb->data_end;
+ ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
+ if (ctx->ipv4) {
+ ctx->ipv4 = (struct iphdr *)(ctx->eth + 1);
+ ctx->ipv6 = NULL;
+ ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1);
+ } else {
+ ctx->ipv4 = NULL;
+ ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1);
+ ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1);
+ }
+
+ if ((void *)ctx->tcp + 60 > ctx->data_end)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static __sum16 tcp_v4_csum(struct tcp_syncookie *ctx, __wsum csum)
+{
+ return csum_tcpudp_magic(ctx->ipv4->saddr, ctx->ipv4->daddr,
+ ctx->tcp->doff * 4, IPPROTO_TCP, csum);
+}
+
+static __sum16 tcp_v6_csum(struct tcp_syncookie *ctx, __wsum csum)
+{
+ return csum_ipv6_magic(&ctx->ipv6->saddr, &ctx->ipv6->daddr,
+ ctx->tcp->doff * 4, IPPROTO_TCP, csum);
+}
+
+static int tcp_validate_header(struct tcp_syncookie *ctx)
+{
+ s64 csum;
+
+ if (tcp_reload_headers(ctx))
+ goto err;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (ctx->ipv4) {
+ /* check tcp_v4_csum(csum) is 0 if not on lo. */
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, ctx->ipv4->ihl * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (csum_fold(csum) != 0)
+ goto err;
+ } else if (ctx->ipv6) {
+ /* check tcp_v6_csum(csum) is 0 if not on lo. */
+ }
+
+ return 0;
+err:
+ return -1;
+}
+
+static __always_inline void *next(struct tcp_syncookie *ctx, __u32 sz)
+{
+ __u64 off = ctx->off;
+ __u8 *data;
+
+ /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+ if (off > MAX_PACKET_OFF - sz)
+ return NULL;
+
+ data = ctx->data + off;
+ barrier_var(data);
+ if (data + sz >= ctx->data_end)
+ return NULL;
+
+ ctx->off += sz;
+ return data;
+}
+
+static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx)
+{
+ __u8 *opcode, *opsize, *wscale;
+ __u32 *tsval, *tsecr;
+ __u16 *mss;
+ __u32 off;
+
+ off = ctx->off;
+ opcode = next(ctx, 1);
+ if (!opcode)
+ goto stop;
+
+ if (*opcode == TCPOPT_EOL)
+ goto stop;
+
+ if (*opcode == TCPOPT_NOP)
+ goto next;
+
+ opsize = next(ctx, 1);
+ if (!opsize)
+ goto stop;
+
+ if (*opsize < 2)
+ goto stop;
+
+ switch (*opcode) {
+ case TCPOPT_MSS:
+ mss = next(ctx, 2);
+ if (*opsize == TCPOLEN_MSS && ctx->tcp->syn && mss)
+ ctx->attrs.mss = get_unaligned_be16(mss);
+ break;
+ case TCPOPT_WINDOW:
+ wscale = next(ctx, 1);
+ if (*opsize == TCPOLEN_WINDOW && ctx->tcp->syn && wscale) {
+ ctx->attrs.wscale_ok = 1;
+ ctx->attrs.snd_wscale = *wscale;
+ }
+ break;
+ case TCPOPT_TIMESTAMP:
+ tsval = next(ctx, 4);
+ tsecr = next(ctx, 4);
+ if (*opsize == TCPOLEN_TIMESTAMP && tsval && tsecr) {
+ ctx->attrs.rcv_tsval = get_unaligned_be32(tsval);
+ ctx->attrs.rcv_tsecr = get_unaligned_be32(tsecr);
+
+ if (ctx->tcp->syn && ctx->attrs.rcv_tsecr)
+ ctx->attrs.tstamp_ok = 0;
+ else
+ ctx->attrs.tstamp_ok = 1;
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (*opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn)
+ ctx->attrs.sack_ok = 1;
+ break;
+ }
+
+ ctx->off = off + *opsize;
+next:
+ return 0;
+stop:
+ return 1;
+}
+
+static void tcp_parse_options(struct tcp_syncookie *ctx)
+{
+ ctx->off = (__u8 *)(ctx->tcp + 1) - (__u8 *)ctx->data,
+
+ bpf_loop(40, tcp_parse_option, ctx, 0);
+}
+
+static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
+{
+ if ((ctx->ipv4 && ctx->attrs.mss != MSS_LOCAL_IPV4) ||
+ (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
+ goto err;
+
+ if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7)
+ goto err;
+
+ if (!ctx->attrs.tstamp_ok)
+ goto err;
+
+ if (!ctx->attrs.sack_ok)
+ goto err;
+
+ if (!ctx->tcp->ece || !ctx->tcp->cwr)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static void tcp_prepare_cookie(struct tcp_syncookie *ctx)
+{
+ u32 seq = bpf_ntohl(ctx->tcp->seq);
+ u64 first = 0, second;
+ int mssind = 0;
+ u32 hash;
+
+ if (ctx->ipv4) {
+ for (mssind = ARRAY_SIZE(msstab4) - 1; mssind; mssind--)
+ if (ctx->attrs.mss >= msstab4[mssind])
+ break;
+
+ ctx->attrs.mss = msstab4[mssind];
+
+ first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr;
+ } else if (ctx->ipv6) {
+ for (mssind = ARRAY_SIZE(msstab6) - 1; mssind; mssind--)
+ if (ctx->attrs.mss >= msstab6[mssind])
+ break;
+
+ ctx->attrs.mss = msstab6[mssind];
+
+ first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 |
+ ctx->ipv6->daddr.in6_u.u6_addr32[0];
+ }
+
+ second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest;
+ hash = siphash_2u64(first, second, &test_key_siphash);
+
+ if (ctx->attrs.tstamp_ok) {
+ ctx->attrs.rcv_tsecr = bpf_get_prandom_u32();
+ ctx->attrs.rcv_tsecr &= ~COOKIE_MASK;
+ ctx->attrs.rcv_tsecr |= hash & COOKIE_MASK;
+ }
+
+ hash &= ~COOKIE_MASK;
+ hash |= mssind << 6;
+
+ if (ctx->attrs.wscale_ok)
+ hash |= ctx->attrs.snd_wscale & BPF_SYNCOOKIE_WSCALE_MASK;
+
+ if (ctx->attrs.sack_ok)
+ hash |= BPF_SYNCOOKIE_SACK;
+
+ if (ctx->attrs.tstamp_ok && ctx->tcp->ece && ctx->tcp->cwr)
+ hash |= BPF_SYNCOOKIE_ECN;
+
+ ctx->cookie = hash;
+}
+
+static void tcp_write_options(struct tcp_syncookie *ctx)
+{
+ ctx->ptr32 = (__be32 *)(ctx->tcp + 1);
+
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_MSS << 24 | TCPOLEN_MSS << 16 |
+ ctx->attrs.mss);
+
+ if (ctx->attrs.wscale_ok)
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_WINDOW << 16 |
+ TCPOLEN_WINDOW << 8 |
+ ctx->attrs.snd_wscale);
+
+ if (ctx->attrs.tstamp_ok) {
+ if (ctx->attrs.sack_ok)
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_SACK_PERM << 24 |
+ TCPOLEN_SACK_PERM << 16 |
+ TCPOPT_TIMESTAMP << 8 |
+ TCPOLEN_TIMESTAMP);
+ else
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_NOP << 16 |
+ TCPOPT_TIMESTAMP << 8 |
+ TCPOLEN_TIMESTAMP);
+
+ *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsecr);
+ *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsval);
+ } else if (ctx->attrs.sack_ok) {
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_NOP << 16 |
+ TCPOPT_SACK_PERM << 8 |
+ TCPOLEN_SACK_PERM);
+ }
+}
+
+static int tcp_handle_syn(struct tcp_syncookie *ctx)
+{
+ s64 csum;
+
+ if (tcp_validate_header(ctx))
+ goto err;
+
+ tcp_parse_options(ctx);
+
+ if (tcp_validate_sysctl(ctx))
+ goto err;
+
+ tcp_prepare_cookie(ctx);
+ tcp_write_options(ctx);
+
+ swap(ctx->tcp->source, ctx->tcp->dest);
+ ctx->tcp->check = 0;
+ ctx->tcp->ack_seq = bpf_htonl(bpf_ntohl(ctx->tcp->seq) + 1);
+ ctx->tcp->seq = bpf_htonl(ctx->cookie);
+ ctx->tcp->doff = ((long)ctx->ptr32 - (long)ctx->tcp) >> 2;
+ ctx->tcp->ack = 1;
+ if (!ctx->attrs.tstamp_ok || !ctx->tcp->ece || !ctx->tcp->cwr)
+ ctx->tcp->ece = 0;
+ ctx->tcp->cwr = 0;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (ctx->ipv4) {
+ swap(ctx->ipv4->saddr, ctx->ipv4->daddr);
+ ctx->tcp->check = tcp_v4_csum(ctx, csum);
+
+ ctx->ipv4->check = 0;
+ ctx->ipv4->tos = 0;
+ ctx->ipv4->tot_len = bpf_htons((long)ctx->ptr32 - (long)ctx->ipv4);
+ ctx->ipv4->id = 0;
+ ctx->ipv4->ttl = 64;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, sizeof(*ctx->ipv4), 0);
+ if (csum < 0)
+ goto err;
+
+ ctx->ipv4->check = csum_fold(csum);
+ } else if (ctx->ipv6) {
+ swap(ctx->ipv6->saddr, ctx->ipv6->daddr);
+ ctx->tcp->check = tcp_v6_csum(ctx, csum);
+
+ *(__be32 *)ctx->ipv6 = bpf_htonl(0x60000000);
+ ctx->ipv6->payload_len = bpf_htons((long)ctx->ptr32 - (long)ctx->tcp);
+ ctx->ipv6->hop_limit = 64;
+ }
+
+ swap_array(ctx->eth->h_source, ctx->eth->h_dest);
+
+ if (bpf_skb_change_tail(ctx->skb, (long)ctx->ptr32 - (long)ctx->eth, 0))
+ goto err;
+
+ return bpf_redirect(ctx->skb->ifindex, 0);
+err:
+ return TC_ACT_SHOT;
+}
+
+static int tcp_validate_cookie(struct tcp_syncookie *ctx)
+{
+ u32 cookie = bpf_ntohl(ctx->tcp->ack_seq) - 1;
+ u32 seq = bpf_ntohl(ctx->tcp->seq) - 1;
+ u64 first = 0, second;
+ int mssind;
+ u32 hash;
+
+ if (ctx->ipv4)
+ first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr;
+ else if (ctx->ipv6)
+ first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 |
+ ctx->ipv6->daddr.in6_u.u6_addr32[0];
+
+ second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest;
+ hash = siphash_2u64(first, second, &test_key_siphash);
+
+ if (ctx->attrs.tstamp_ok)
+ hash -= ctx->attrs.rcv_tsecr & COOKIE_MASK;
+ else
+ hash &= ~COOKIE_MASK;
+
+ hash -= cookie & ~COOKIE_MASK;
+ if (hash)
+ goto err;
+
+ mssind = (cookie & (3 << 6)) >> 6;
+ if (ctx->ipv4) {
+ if (mssind > ARRAY_SIZE(msstab4))
+ goto err;
+
+ ctx->attrs.mss = msstab4[mssind];
+ } else {
+ if (mssind > ARRAY_SIZE(msstab6))
+ goto err;
+
+ ctx->attrs.mss = msstab6[mssind];
+ }
+
+ ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK;
+ ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale;
+ ctx->attrs.wscale_ok = ctx->attrs.snd_wscale == BPF_SYNCOOKIE_WSCALE_MASK;
+ ctx->attrs.sack_ok = cookie & BPF_SYNCOOKIE_SACK;
+ ctx->attrs.ecn_ok = cookie & BPF_SYNCOOKIE_ECN;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int tcp_handle_ack(struct tcp_syncookie *ctx)
+{
+ struct bpf_sock_tuple tuple;
+ struct bpf_sock *skc;
+ int ret = TC_ACT_OK;
+ struct sock *sk;
+ u32 tuple_size;
+
+ if (ctx->ipv4) {
+ tuple.ipv4.saddr = ctx->ipv4->saddr;
+ tuple.ipv4.daddr = ctx->ipv4->daddr;
+ tuple.ipv4.sport = ctx->tcp->source;
+ tuple.ipv4.dport = ctx->tcp->dest;
+ tuple_size = sizeof(tuple.ipv4);
+ } else if (ctx->ipv6) {
+ __builtin_memcpy(tuple.ipv6.saddr, &ctx->ipv6->saddr, sizeof(tuple.ipv6.saddr));
+ __builtin_memcpy(tuple.ipv6.daddr, &ctx->ipv6->daddr, sizeof(tuple.ipv6.daddr));
+ tuple.ipv6.sport = ctx->tcp->source;
+ tuple.ipv6.dport = ctx->tcp->dest;
+ tuple_size = sizeof(tuple.ipv6);
+ } else {
+ goto out;
+ }
+
+ skc = bpf_skc_lookup_tcp(ctx->skb, &tuple, tuple_size, -1, 0);
+ if (!skc)
+ goto out;
+
+ if (skc->state != TCP_LISTEN)
+ goto release;
+
+ sk = (struct sock *)bpf_skc_to_tcp_sock(skc);
+ if (!sk)
+ goto err;
+
+ if (tcp_validate_header(ctx))
+ goto err;
+
+ tcp_parse_options(ctx);
+
+ if (tcp_validate_cookie(ctx))
+ goto err;
+
+ ret = bpf_sk_assign_tcp_reqsk(ctx->skb, sk, &ctx->attrs, sizeof(ctx->attrs));
+ if (ret < 0)
+ goto err;
+
+release:
+ bpf_sk_release(skc);
+out:
+ return ret;
+
+err:
+ ret = TC_ACT_SHOT;
+ goto release;
+}
+
+SEC("tc")
+int tcp_custom_syncookie(struct __sk_buff *skb)
+{
+ struct tcp_syncookie ctx = {
+ .skb = skb,
+ };
+
+ if (tcp_load_headers(&ctx))
+ return TC_ACT_OK;
+
+ if (ctx.tcp->rst)
+ return TC_ACT_OK;
+
+ if (ctx.tcp->syn) {
+ if (ctx.tcp->ack)
+ return TC_ACT_OK;
+
+ handled_syn = true;
+
+ return tcp_handle_syn(&ctx);
+ }
+
+ handled_ack = true;
+
+ return tcp_handle_ack(&ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
new file mode 100644
index 000000000000..29a6a53cf229
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#ifndef _TEST_TCP_SYNCOOKIE_H
+#define _TEST_TCP_SYNCOOKIE_H
+
+#define __packed __attribute__((__packed__))
+#define __force
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define swap(a, b) \
+ do { \
+ typeof(a) __tmp = (a); \
+ (a) = (b); \
+ (b) = __tmp; \
+ } while (0)
+
+#define swap_array(a, b) \
+ do { \
+ typeof(a) __tmp[sizeof(a)]; \
+ __builtin_memcpy(__tmp, a, sizeof(a)); \
+ __builtin_memcpy(a, b, sizeof(a)); \
+ __builtin_memcpy(b, __tmp, sizeof(a)); \
+ } while (0)
+
+/* asm-generic/unaligned.h */
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __packed * __pptr = (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+ return bpf_ntohs(__get_unaligned_t(__be16, p));
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return bpf_ntohl(__get_unaligned_t(__be32, p));
+}
+
+/* lib/checksum.c */
+static inline u32 from64to32(u64 x)
+{
+ /* add up 32-bit and 32-bit for 32+c bit */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up carry.. */
+ x = (x & 0xffffffff) + (x >> 32);
+ return (u32)x;
+}
+
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto, __wsum sum)
+{
+ unsigned long long s = (__force u32)sum;
+
+ s += (__force u32)saddr;
+ s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN
+ s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
+ return (__force __wsum)from64to32(s);
+}
+
+/* asm-generic/checksum.h */
+static inline __sum16 csum_fold(__wsum csum)
+{
+ u32 sum = (__force u32)csum;
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (__force __sum16)~sum;
+}
+
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/* net/ipv6/ip6_checksum.c */
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ int carry;
+ __u32 ulen;
+ __u32 uproto;
+ __u32 sum = (__force u32)csum;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[0];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[1];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[2];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[3];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[3]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[0];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[1];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[2];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[3];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[3]);
+ sum += carry;
+
+ ulen = (__force u32)bpf_htonl((__u32)len);
+ sum += ulen;
+ carry = (sum < ulen);
+ sum += carry;
+
+ uproto = (__force u32)bpf_htonl(proto);
+ sum += uproto;
+ carry = (sum < uproto);
+ sum += carry;
+
+ return csum_fold((__force __wsum)sum);
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index cf7ed8cbb1fe..a3f3f43fc195 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -59,7 +59,7 @@ int bpf_testcb(struct bpf_sock_ops *skops)
asm volatile (
"%[op] = *(u32 *)(%[skops] +96)"
- : [op] "+r"(op)
+ : [op] "=r"(op)
: [skops] "r"(skops)
:);
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index d7a9a74b7245..8caf58be5818 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -19,6 +19,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
+#include "bpf_compiler.h"
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@@ -137,7 +138,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
iph->ttl = 8;
next_iph = (__u16 *)iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
index 78c368e71797..67a77944ef29 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
@@ -18,11 +18,11 @@
#include "test_iptunnel_common.h"
#include "bpf_kfuncs.h"
-const size_t tcphdr_sz = sizeof(struct tcphdr);
-const size_t udphdr_sz = sizeof(struct udphdr);
-const size_t ethhdr_sz = sizeof(struct ethhdr);
-const size_t iphdr_sz = sizeof(struct iphdr);
-const size_t ipv6hdr_sz = sizeof(struct ipv6hdr);
+#define tcphdr_sz sizeof(struct tcphdr)
+#define udphdr_sz sizeof(struct udphdr)
+#define ethhdr_sz sizeof(struct ethhdr)
+#define iphdr_sz sizeof(struct iphdr)
+#define ipv6hdr_sz sizeof(struct ipv6hdr)
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
index c98fb44156f0..93267a68825b 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -15,6 +15,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
+#include "bpf_compiler.h"
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@@ -133,7 +134,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
iph->ttl = 8;
next_iph = (__u16 *)iph;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 42c8f6ded0e4..5c7e4758a0ca 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -15,6 +15,7 @@
#include <linux/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
@@ -362,7 +363,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
iph->ttl = 4;
next_iph_u16 = (__u16 *) iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
@@ -409,7 +410,7 @@ int send_icmp_reply(void *data, void *data_end)
iph->saddr = tmp_addr;
iph->check = 0;
next_iph_u16 = (__u16 *) iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index 8b946c8188c6..f615da97df26 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -51,7 +51,8 @@ struct {
__uint(max_entries, 1);
__type(key, int);
__type(value, struct elem);
-} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps");
+} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"),
+ race_array SEC(".maps");
__u64 bss_data;
__u64 abs_data;
@@ -390,3 +391,34 @@ int BPF_PROG2(test5, int, a)
return 0;
}
+
+static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer)
+{
+ bpf_timer_start(timer, 1000000, 0);
+ return 0;
+}
+
+SEC("syscall")
+int race(void *ctx)
+{
+ struct bpf_timer *timer;
+ int err, race_key = 0;
+ struct elem init;
+
+ __builtin_memset(&init, 0, sizeof(struct elem));
+ bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY);
+
+ timer = bpf_map_lookup_elem(&race_array, &race_key);
+ if (!timer)
+ return 1;
+
+ err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC);
+ if (err && err != -EBUSY)
+ return 1;
+
+ bpf_timer_set_callback(timer, race_timer_callback);
+ bpf_timer_start(timer, 0, 0);
+ bpf_timer_cancel(timer);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/token_lsm.c b/tools/testing/selftests/bpf/progs/token_lsm.c
new file mode 100644
index 000000000000..e4d59b6ba743
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/token_lsm.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int my_pid;
+bool reject_capable;
+bool reject_cmd;
+
+SEC("lsm/bpf_token_capable")
+int BPF_PROG(token_capable, struct bpf_token *token, int cap)
+{
+ if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+ if (reject_capable)
+ return -1;
+ return 0;
+}
+
+SEC("lsm/bpf_token_cmd")
+int BPF_PROG(token_cmd, struct bpf_token *token, enum bpf_cmd cmd)
+{
+ if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+ if (reject_cmd)
+ return -1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c
new file mode 100644
index 000000000000..d41665d2ec8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tracing_failure.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?fentry/bpf_spin_lock")
+int BPF_PROG(test_spin_lock, struct bpf_spin_lock *lock)
+{
+ return 0;
+}
+
+SEC("?fentry/bpf_spin_unlock")
+int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 694e7cec1823..5fda43901033 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -33,6 +33,27 @@ int bench_trigger_kprobe(void *ctx)
return 0;
}
+SEC("kretprobe/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_kretprobe(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("kprobe.multi/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_kprobe_multi(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("kretprobe.multi/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_kretprobe_multi(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_trigger_fentry(void *ctx)
{
@@ -40,6 +61,13 @@ int bench_trigger_fentry(void *ctx)
return 0;
}
+SEC("fexit/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_fexit(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
int bench_trigger_fentry_sleep(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c
index a9629ac230fd..9d808b8f4ab0 100644
--- a/tools/testing/selftests/bpf/progs/type_cast.c
+++ b/tools/testing/selftests/bpf/progs/type_cast.c
@@ -4,6 +4,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
@@ -19,9 +20,6 @@ char name[IFNAMSIZ];
unsigned int inum;
unsigned int meta_len, frag0_len, kskb_len, kskb2_len;
-void *bpf_cast_to_kern_ctx(void *) __ksym;
-void *bpf_rdonly_cast(void *, __u32) __ksym;
-
SEC("?xdp")
int md_xdp(struct xdp_md *ctx)
{
@@ -48,13 +46,12 @@ int md_skb(struct __sk_buff *skb)
/* Simulate the following kernel macro:
* #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
*/
- shared_info = bpf_rdonly_cast(kskb->head + kskb->end,
- bpf_core_type_id_kernel(struct skb_shared_info));
+ shared_info = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info);
meta_len = shared_info->meta_len;
frag0_len = shared_info->frag_list->len;
/* kskb2 should be equal to kskb */
- kskb2 = bpf_rdonly_cast(kskb, bpf_core_type_id_kernel(struct sk_buff));
+ kskb2 = bpf_core_cast(kskb, typeof(*kskb2));
kskb2_len = kskb2->len;
return 0;
}
@@ -65,7 +62,7 @@ int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id)
struct task_struct *task, *task_dup;
task = bpf_get_current_task_btf();
- task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct));
+ task_dup = bpf_core_cast(task, struct task_struct);
(void)bpf_task_storage_get(&enter_id, task_dup, 0, 0);
return 0;
}
@@ -73,7 +70,7 @@ int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id)
SEC("?tracepoint/syscalls/sys_enter_nanosleep")
int kctx_u64(void *ctx)
{
- u64 *kctx = bpf_rdonly_cast(ctx, bpf_core_type_id_kernel(u64));
+ u64 *kctx = bpf_core_cast(ctx, u64);
(void)kctx;
return 0;
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
new file mode 100644
index 000000000000..5540b05ff9ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+} arena SEC(".maps");
+
+SEC("syscall")
+__success __retval(0)
+int basic_alloc1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ARENA_CAST)
+ volatile int __arena *page1, *page2, *no_page, *page3;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page2, 1);
+ if (*page1 != 1)
+ return 6;
+ if (*page2 != 0) /* use-after-free should return 0 */
+ return 7;
+ page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page3)
+ return 8;
+ *page3 = 3;
+ if (page2 != page3)
+ return 9;
+ if (*page1 != 1)
+ return 10;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int basic_alloc2(void *ctx)
+{
+#if defined(__BPF_FEATURE_ARENA_CAST)
+ volatile char __arena *page1, *page2, *page3, *page4;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ page2 = page1 + __PAGE_SIZE;
+ page3 = page1 + __PAGE_SIZE * 2;
+ page4 = page1 - __PAGE_SIZE;
+ *page1 = 1;
+ *page2 = 2;
+ *page3 = 3;
+ *page4 = 4;
+ if (*page1 != 1)
+ return 1;
+ if (*page2 != 2)
+ return 2;
+ if (*page3 != 0)
+ return 3;
+ if (*page4 != 0)
+ return 4;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 2);
+ if (*page1 != 0)
+ return 5;
+ if (*page2 != 0)
+ return 6;
+ if (*page3 != 0)
+ return 7;
+ if (*page4 != 0)
+ return 8;
+#endif
+ return 0;
+}
+
+struct bpf_arena___l {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+SEC("syscall")
+__success __retval(0) __log_level(2)
+int basic_alloc3(void *ctx)
+{
+ struct bpf_arena___l *ar = (struct bpf_arena___l *)&arena;
+ volatile char __arena *pages;
+
+ pages = bpf_arena_alloc_pages(&ar->map, NULL, ar->map.max_entries, NUMA_NO_NODE, 0);
+ if (!pages)
+ return 1;
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__success __log_level(2)
+int iter_maps1(struct bpf_iter__bpf_map *ctx)
+{
+ struct bpf_map *map = ctx->map;
+
+ if (!map)
+ return 0;
+ bpf_arena_alloc_pages(map, NULL, map->max_entries, 0, 0);
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__failure __msg("expected pointer to STRUCT bpf_map")
+int iter_maps2(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+
+ bpf_arena_alloc_pages((void *)seq, NULL, 1, 0, 0);
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__failure __msg("untrusted_ptr_bpf_map")
+int iter_maps3(struct bpf_iter__bpf_map *ctx)
+{
+ struct bpf_map *map = ctx->map;
+
+ if (!map)
+ return 0;
+ bpf_arena_alloc_pages(map->inner_map_meta, NULL, map->max_entries, 0, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
index be95570ab382..28b602ac9cbe 100644
--- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -568,7 +568,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("direct packet access: test23 (x += pkt_ptr, 4)")
-__failure __msg("invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)")
+__failure __msg("invalid access to packet, off=0 size=8, R5(id=3,off=0,r=0)")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void test23_x_pkt_ptr_4(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
new file mode 100644
index 000000000000..4ab0ef18d7eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "xdp_metadata.h"
+#include "bpf_kfuncs.h"
+
+extern struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
+extern void bpf_task_release(struct task_struct *p) __ksym __weak;
+
+__weak int subprog_trusted_task_nullable(struct task_struct *task __arg_trusted __arg_nullable)
+{
+ if (!task)
+ return 0;
+ return task->pid + task->tgid;
+}
+
+__weak int subprog_trusted_task_nullable_extra_layer(struct task_struct *task __arg_trusted __arg_nullable)
+{
+ return subprog_trusted_task_nullable(task) + subprog_trusted_task_nullable(NULL);
+}
+
+SEC("?tp_btf/task_newtask")
+__success __log_level(2)
+__msg("Validating subprog_trusted_task_nullable() func#1...")
+__msg(": R1=trusted_ptr_or_null_task_struct(")
+int trusted_task_arg_nullable(void *ctx)
+{
+ struct task_struct *t1 = bpf_get_current_task_btf();
+ struct task_struct *t2 = bpf_task_acquire(t1);
+ int res = 0;
+
+ /* known NULL */
+ res += subprog_trusted_task_nullable(NULL);
+
+ /* known non-NULL */
+ res += subprog_trusted_task_nullable(t1);
+ res += subprog_trusted_task_nullable_extra_layer(t1);
+
+ /* unknown if NULL or not */
+ res += subprog_trusted_task_nullable(t2);
+ res += subprog_trusted_task_nullable_extra_layer(t2);
+
+ if (t2) {
+ /* known non-NULL after explicit NULL check, just in case */
+ res += subprog_trusted_task_nullable(t2);
+ res += subprog_trusted_task_nullable_extra_layer(t2);
+
+ bpf_task_release(t2);
+ }
+
+ return res;
+}
+
+__weak int subprog_trusted_task_nonnull(struct task_struct *task __arg_trusted)
+{
+ return task->pid + task->tgid;
+}
+
+SEC("?kprobe")
+__failure __log_level(2)
+__msg("R1 type=scalar expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')")
+int trusted_task_arg_nonnull_fail1(void *ctx)
+{
+ return subprog_trusted_task_nonnull(NULL);
+}
+
+SEC("?tp_btf/task_newtask")
+__failure __log_level(2)
+__msg("R1 type=ptr_or_null_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')")
+int trusted_task_arg_nonnull_fail2(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+ struct task_struct *nullable;
+ int res;
+
+ nullable = bpf_task_acquire(t);
+
+ /* should fail, PTR_TO_BTF_ID_OR_NULL */
+ res = subprog_trusted_task_nonnull(nullable);
+
+ if (nullable)
+ bpf_task_release(nullable);
+
+ return res;
+}
+
+SEC("?kprobe")
+__success __log_level(2)
+__msg("Validating subprog_trusted_task_nonnull() func#1...")
+__msg(": R1=trusted_ptr_task_struct(")
+int trusted_task_arg_nonnull(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+
+ return subprog_trusted_task_nonnull(t);
+}
+
+struct task_struct___local {} __attribute__((preserve_access_index));
+
+__weak int subprog_nullable_task_flavor(
+ struct task_struct___local *task __arg_trusted __arg_nullable)
+{
+ char buf[16];
+
+ if (!task)
+ return 0;
+
+ return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0);
+}
+
+SEC("?uprobe.s")
+__success __log_level(2)
+__msg("Validating subprog_nullable_task_flavor() func#1...")
+__msg(": R1=trusted_ptr_or_null_task_struct(")
+int flavor_ptr_nullable(void *ctx)
+{
+ struct task_struct___local *t = (void *)bpf_get_current_task_btf();
+
+ return subprog_nullable_task_flavor(t);
+}
+
+__weak int subprog_nonnull_task_flavor(struct task_struct___local *task __arg_trusted)
+{
+ char buf[16];
+
+ return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0);
+}
+
+SEC("?uprobe.s")
+__success __log_level(2)
+__msg("Validating subprog_nonnull_task_flavor() func#1...")
+__msg(": R1=trusted_ptr_task_struct(")
+int flavor_ptr_nonnull(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+
+ return subprog_nonnull_task_flavor((void *)t);
+}
+
+__weak int subprog_trusted_destroy(struct task_struct *task __arg_trusted)
+{
+ bpf_task_release(task); /* should be rejected */
+
+ return 0;
+}
+
+SEC("?tp_btf/task_newtask")
+__failure __log_level(2)
+__msg("release kernel function bpf_task_release expects refcounted PTR_TO_BTF_ID")
+int BPF_PROG(trusted_destroy_fail, struct task_struct *task, u64 clone_flags)
+{
+ return subprog_trusted_destroy(task);
+}
+
+__weak int subprog_trusted_acq_rel(struct task_struct *task __arg_trusted)
+{
+ struct task_struct *owned;
+
+ owned = bpf_task_acquire(task);
+ if (!owned)
+ return 0;
+
+ bpf_task_release(owned); /* this one is OK, we acquired it locally */
+
+ return 0;
+}
+
+SEC("?tp_btf/task_newtask")
+__success __log_level(2)
+int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags)
+{
+ return subprog_trusted_acq_rel(task);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
index 67dddd941891..baff5ffe9405 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -115,6 +115,35 @@ int arg_tag_nullable_ptr_fail(void *ctx)
return subprog_nullable_ptr_bad(&x);
}
+typedef struct {
+ int x;
+} user_struct_t;
+
+__noinline __weak int subprog_user_anon_mem(user_struct_t *t)
+{
+ return t ? t->x : 0;
+}
+
+SEC("?tracepoint")
+__failure __log_level(2)
+__msg("invalid bpf_context access")
+__msg("Caller passes invalid args into func#1 ('subprog_user_anon_mem')")
+int anon_user_mem_invalid(void *ctx)
+{
+ /* can't pass PTR_TO_CTX as user memory */
+ return subprog_user_anon_mem(ctx);
+}
+
+SEC("?tracepoint")
+__success __log_level(2)
+__msg("Func#1 ('subprog_user_anon_mem') is safe for any args that match its prototype")
+int anon_user_mem_valid(void *ctx)
+{
+ user_struct_t t = { .x = 42 };
+
+ return subprog_user_anon_mem(&t);
+}
+
__noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull)
{
return (*p1) * (*p2); /* good, no need for NULL checks */
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index 5905e036e0ea..99e561f18f9b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_experimental.h"
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -239,4 +237,173 @@ int bpf_loop_iter_limit_nested(void *unused)
return 1000 * a + b + c;
}
+struct iter_limit_bug_ctx {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+};
+
+static __naked void iter_limit_bug_cb(void)
+{
+ /* This is the same as C code below, but written
+ * in assembly to control which branches are fall-through.
+ *
+ * switch (bpf_get_prandom_u32()) {
+ * case 1: ctx->a = 42; break;
+ * case 2: ctx->b = 42; break;
+ * default: ctx->c = 42; break;
+ * }
+ */
+ asm volatile (
+ "r9 = r2;"
+ "call %[bpf_get_prandom_u32];"
+ "r1 = r0;"
+ "r2 = 42;"
+ "r0 = 0;"
+ "if r1 == 0x1 goto 1f;"
+ "if r1 == 0x2 goto 2f;"
+ "*(u64 *)(r9 + 16) = r2;"
+ "exit;"
+ "1: *(u64 *)(r9 + 0) = r2;"
+ "exit;"
+ "2: *(u64 *)(r9 + 8) = r2;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+SEC("tc")
+__failure
+__flag(BPF_F_TEST_STATE_FREQ)
+int iter_limit_bug(struct __sk_buff *skb)
+{
+ struct iter_limit_bug_ctx ctx = { 7, 7, 7 };
+
+ bpf_loop(2, iter_limit_bug_cb, &ctx, 0);
+
+ /* This is the same as C code below,
+ * written in assembly to guarantee checks order.
+ *
+ * if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7)
+ * asm volatile("r1 /= 0;":::"r1");
+ */
+ asm volatile (
+ "r1 = *(u64 *)%[ctx_a];"
+ "if r1 != 42 goto 1f;"
+ "r1 = *(u64 *)%[ctx_b];"
+ "if r1 != 42 goto 1f;"
+ "r1 = *(u64 *)%[ctx_c];"
+ "if r1 != 7 goto 1f;"
+ "r1 /= 0;"
+ "1:"
+ :
+ : [ctx_a]"m"(ctx.a),
+ [ctx_b]"m"(ctx.b),
+ [ctx_c]"m"(ctx.c)
+ : "r1"
+ );
+ return 0;
+}
+
+#define ARR_SZ 1000000
+int zero;
+char arr[ARR_SZ];
+
+SEC("socket")
+__success __retval(0xd495cdc0)
+int cond_break1(const void *ctx)
+{
+ unsigned long i;
+ unsigned int sum = 0;
+
+ for (i = zero; i < ARR_SZ; cond_break, i++)
+ sum += i;
+ for (i = zero; i < ARR_SZ; i++) {
+ barrier_var(i);
+ sum += i + arr[i];
+ cond_break;
+ }
+
+ return sum;
+}
+
+SEC("socket")
+__success __retval(999000000)
+int cond_break2(const void *ctx)
+{
+ int i, j;
+ int sum = 0;
+
+ for (i = zero; i < 1000; cond_break, i++)
+ for (j = zero; j < 1000; j++) {
+ sum += i + j;
+ cond_break;
+ }
+
+ return sum;
+}
+
+static __noinline int loop(void)
+{
+ int i, sum = 0;
+
+ for (i = zero; i <= 1000000; i++, cond_break)
+ sum += i;
+
+ return sum;
+}
+
+SEC("socket")
+__success __retval(0x6a5a2920)
+int cond_break3(const void *ctx)
+{
+ return loop();
+}
+
+SEC("socket")
+__success __retval(1)
+int cond_break4(const void *ctx)
+{
+ int cnt = zero;
+
+ for (;;) {
+ /* should eventually break out of the loop */
+ cond_break;
+ cnt++;
+ }
+ /* if we looped a bit, it's a success */
+ return cnt > 1 ? 1 : 0;
+}
+
+static __noinline int static_subprog(void)
+{
+ int cnt = zero;
+
+ for (;;) {
+ cond_break;
+ cnt++;
+ }
+
+ return cnt;
+}
+
+SEC("socket")
+__success __retval(1)
+int cond_break5(const void *ctx)
+{
+ int cnt1 = zero, cnt2;
+
+ for (;;) {
+ cond_break;
+ cnt1++;
+ }
+
+ cnt2 = static_subprog();
+
+ /* main and subprog have to loop a bit */
+ return cnt1 > 1 && cnt2 > 1 ? 1 : 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
index 71735dbf33d4..e07b43b78fd2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_loops1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -259,4 +259,28 @@ l0_%=: r2 += r1; \
" ::: __clobber_all);
}
+SEC("xdp")
+__success
+__naked void not_an_inifinite_loop(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ *(u64 *)(r10 - 8) = r0; \
+ r0 = 0; \
+loop_%=: \
+ r0 = *(u64 *)(r10 - 8); \
+ if r0 > 10 goto exit_%=; \
+ r0 += 1; \
+ *(u64 *)(r10 - 8) = r0; \
+ r0 = 0; \
+ goto loop_%=; \
+exit_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 39fe3372e0e0..85e48069c9e6 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -217,7 +217,7 @@ __naked void uninit_u32_from_the_stack(void)
SEC("tc")
__description("Spill a u32 const scalar. Refill as u16. Offset to skb->data")
-__failure __msg("invalid access to packet")
+__success __retval(0)
__naked void u16_offset_to_skb_data(void)
{
asm volatile (" \
@@ -225,13 +225,19 @@ __naked void u16_offset_to_skb_data(void)
r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
w4 = 20; \
*(u32*)(r10 - 8) = r4; \
- r4 = *(u16*)(r10 - 8); \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r4 = *(u16*)(r10 - 8);"
+#else
+ "r4 = *(u16*)(r10 - 6);"
+#endif
+ " \
r0 = r2; \
- /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */\
r0 += r4; \
- /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
+ /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
if r0 > r3 goto l0_%=; \
- /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
+ /* r0 = *(u32 *)r2 R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
r0 = *(u32*)(r2 + 0); \
l0_%=: r0 = 0; \
exit; \
@@ -243,7 +249,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("Spill u32 const scalars. Refill as u64. Offset to skb->data")
-__failure __msg("invalid access to packet")
+__failure __msg("math between pkt pointer and register with unbounded min value is not allowed")
__naked void u64_offset_to_skb_data(void)
{
asm volatile (" \
@@ -253,13 +259,11 @@ __naked void u64_offset_to_skb_data(void)
w7 = 20; \
*(u32*)(r10 - 4) = r6; \
*(u32*)(r10 - 8) = r7; \
- r4 = *(u16*)(r10 - 8); \
+ r4 = *(u64*)(r10 - 8); \
r0 = r2; \
- /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4= */ \
r0 += r4; \
- /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
if r0 > r3 goto l0_%=; \
- /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
r0 = *(u32*)(r2 + 0); \
l0_%=: r0 = 0; \
exit; \
@@ -270,7 +274,7 @@ l0_%=: r0 = 0; \
}
SEC("tc")
-__description("Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data")
+__description("Spill a u32 const scalar. Refill as u16 from MSB. Offset to skb->data")
__failure __msg("invalid access to packet")
__naked void _6_offset_to_skb_data(void)
{
@@ -279,7 +283,13 @@ __naked void _6_offset_to_skb_data(void)
r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
w4 = 20; \
*(u32*)(r10 - 8) = r4; \
- r4 = *(u16*)(r10 - 6); \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r4 = *(u16*)(r10 - 6);"
+#else
+ "r4 = *(u16*)(r10 - 8);"
+#endif
+ " \
r0 = r2; \
/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
r0 += r4; \
@@ -454,9 +464,9 @@ l0_%=: r1 >>= 16; \
SEC("raw_tp")
__log_level(2)
__success
-__msg("fp-8=0m??mmmm")
-__msg("fp-16=00mm??mm")
-__msg("fp-24=00mm???m")
+__msg("fp-8=0m??scalar()")
+__msg("fp-16=00mm??scalar()")
+__msg("fp-24=00mm???scalar()")
__naked void spill_subregs_preserve_stack_zero(void)
{
asm volatile (
@@ -495,14 +505,14 @@ char single_byte_buf[1] SEC(".data.single_byte_buf");
SEC("raw_tp")
__log_level(2)
__success
-/* make sure fp-8 is all STACK_ZERO */
-__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000")
+/* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */
+__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0")
/* but fp-16 is spilled IMPRECISE zero const reg */
__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0")
-/* validate that assigning R2 from STACK_ZERO doesn't mark register
+/* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register
* precise immediately; if necessary, it will be marked precise later
*/
-__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=00000000")
+__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0")
/* similarly, when R2 is assigned from spilled register, it is initially
* imprecise, but will be marked precise later once it is used in precise context
*/
@@ -520,14 +530,14 @@ __msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0")
__naked void partial_stack_load_preserves_zeros(void)
{
asm volatile (
- /* fp-8 is all STACK_ZERO */
+ /* fp-8 is value zero (represented by a zero value fake reg) */
".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */
/* fp-16 is const zero register */
"r0 = 0;"
"*(u64 *)(r10 -16) = r0;"
- /* load single U8 from non-aligned STACK_ZERO slot */
+ /* load single U8 from non-aligned spilled value zero slot */
"r1 = %[single_byte_buf];"
"r2 = *(u8 *)(r10 -1);"
"r1 += r2;"
@@ -539,7 +549,7 @@ __naked void partial_stack_load_preserves_zeros(void)
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
- /* load single U16 from non-aligned STACK_ZERO slot */
+ /* load single U16 from non-aligned spilled value zero slot */
"r1 = %[single_byte_buf];"
"r2 = *(u16 *)(r10 -2);"
"r1 += r2;"
@@ -551,7 +561,7 @@ __naked void partial_stack_load_preserves_zeros(void)
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
- /* load single U32 from non-aligned STACK_ZERO slot */
+ /* load single U32 from non-aligned spilled value zero slot */
"r1 = %[single_byte_buf];"
"r2 = *(u32 *)(r10 -4);"
"r1 += r2;"
@@ -583,6 +593,47 @@ __naked void partial_stack_load_preserves_zeros(void)
: __clobber_common);
}
+SEC("raw_tp")
+__log_level(2)
+__success
+/* fp-4 is STACK_ZERO */
+__msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????")
+__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8=0000????")
+__msg("5: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)")
+__naked void partial_stack_load_preserves_partial_zeros(void)
+{
+ asm volatile (
+ /* fp-4 is value zero */
+ ".8byte %[fp4_st_zero];" /* LLVM-18+: *(u32 *)(r10 -4) = 0; */
+
+ /* load single U8 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u8 *)(r10 -1);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U16 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u16 *)(r10 -2);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u32 *)(r10 -4);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(single_byte_buf),
+ __imm_insn(fp4_st_zero, BPF_ST_MEM(BPF_W, BPF_REG_FP, -4, 0))
+ : __clobber_common);
+}
+
char two_byte_buf[2] SEC(".data.two_byte_buf");
SEC("raw_tp")
@@ -737,4 +788,460 @@ __naked void stack_load_preserves_const_precision_subreg(void)
: __clobber_common);
}
+SEC("xdp")
+__description("32-bit spilled reg range should be tracked")
+__success __retval(0)
+__naked void spill_32bit_range_track(void)
+{
+ asm volatile(" \
+ call %[bpf_ktime_get_ns]; \
+ /* Make r0 bounded. */ \
+ r0 &= 65535; \
+ /* Assign an ID to r0. */ \
+ r1 = r0; \
+ /* 32-bit spill r0 to stack. */ \
+ *(u32*)(r10 - 8) = r0; \
+ /* Boundary check on r0. */ \
+ if r0 < 1 goto l0_%=; \
+ /* 32-bit fill r1 from stack. */ \
+ r1 = *(u32*)(r10 - 8); \
+ /* r1 == r0 => r1 >= 1 always. */ \
+ if r1 >= 1 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. \
+ * Do an invalid memory access if the verifier \
+ * follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("64-bit spill of 64-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_64bit_of_64bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x80000000; \
+ r0 <<= 32; \
+ /* 64-bit spill r0 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r0; \
+ /* 64-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u64*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit spill of 32-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_32bit_of_32bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0x80000000; \
+ /* 32-bit spill r0 to stack - should assign an ID. */\
+ *(u32*)(r10 - 8) = r0; \
+ /* 32-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u32*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("16-bit spill of 16-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_16bit_of_16bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8000; \
+ /* 16-bit spill r0 to stack - should assign an ID. */\
+ *(u16*)(r10 - 8) = r0; \
+ /* 16-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u16*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("8-bit spill of 8-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_8bit_of_8bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x80; \
+ /* 8-bit spill r0 to stack - should assign an ID. */\
+ *(u8*)(r10 - 8) = r0; \
+ /* 8-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u8*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("spill unbounded reg, then range check src")
+__success __retval(0)
+__naked void spill_unbounded(void)
+{
+ asm volatile (" \
+ /* Produce an unbounded scalar. */ \
+ call %[bpf_get_prandom_u32]; \
+ /* Spill r0 to stack. */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* Boundary check on r0. */ \
+ if r0 > 16 goto l0_%=; \
+ /* Fill r0 from stack. */ \
+ r0 = *(u64*)(r10 - 8); \
+ /* Boundary check on r0 with predetermined result. */\
+ if r0 <= 16 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill")
+__success __retval(0)
+__naked void fill_32bit_after_spill_64bit(void)
+{
+ asm volatile(" \
+ /* Randomize the upper 32 bits. */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 <<= 32; \
+ /* 64-bit spill r0 to stack. */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* 32-bit fill r0 from stack. */ \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(u32*)(r10 - 8);"
+#else
+ "r0 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Boundary check on r0 with predetermined result. */\
+ if r0 == 0 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill of 32-bit value should preserve ID")
+__success __retval(0)
+__naked void fill_32bit_after_spill_64bit_preserve_id(void)
+{
+ asm volatile (" \
+ /* Randomize the lower 32 bits. */ \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0xffffffff; \
+ /* 64-bit spill r0 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r0; \
+ /* 32-bit fill r1 from stack - should preserve the ID. */\
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(u32*)(r10 - 8);"
+#else
+ "r1 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Compare r1 with another register to trigger find_equal_scalars. */\
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill should clear ID")
+__failure __msg("math between ctx pointer and 4294967295 is not allowed")
+__naked void fill_32bit_after_spill_64bit_clear_id(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* Roll one bit to force the verifier to track both branches. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8; \
+ /* Put a large number into r1. */ \
+ r1 = 0xffffffff; \
+ r1 <<= 32; \
+ r1 += r0; \
+ /* 64-bit spill r1 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r1; \
+ /* 32-bit fill r2 from stack - should clear the ID. */\
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r2 = *(u32*)(r10 - 8);"
+#else
+ "r2 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Compare r2 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. If the ID was mistakenly preserved on fill, this would\
+ * cause the verifier to think that r1 is also equal to zero in one of\
+ * the branches, and equal to eight on the other branch.\
+ */ \
+ r3 = 0; \
+ if r2 != r3 goto l0_%=; \
+l0_%=: r1 >>= 32; \
+ /* The verifier shouldn't propagate r2's range to r1, so it should\
+ * still remember r1 = 0xffffffff and reject the below.\
+ */ \
+ r6 += r1; \
+ r0 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* stacksafe(): check if stack spill of an imprecise scalar in old state
+ * is considered equivalent to STACK_{MISC,INVALID} in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg("8: safe")
+__msg("processed 11 insns")
+/* STACK_INVALID should prevent verifier in unpriv mode from
+ * considering states equivalent and force an error on second
+ * verification path (entry - label 1 - label 2).
+ */
+__failure_unpriv
+__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg_unpriv("9: (95) exit")
+__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg_unpriv("invalid read from stack off -8+2 size 8")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_imprecise_scalar_vs_cur_stack_misc(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure STACK_{MISC,INVALID} at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u16*)(r10 - 8) = r0;"
+ "*(u16*)(r10 - 4) = r0;"
+"2:"
+ /* read fp-8, should be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check that stack spill of a precise scalar in old state
+ * is not considered equivalent to STACK_MISC in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* verifier should visit 'if r1 == 0x2a ...' two times:
+ * - once for path entry - label 2;
+ * - once for path entry - label 1 - label 2.
+ */
+__msg("if r1 == 0x2a goto pc+0")
+__msg("if r1 == 0x2a goto pc+0")
+__msg("processed 15 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_precise_scalar_vs_cur_stack_misc(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure STACK_MISC at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u64*)(r10 - 8) = r0;"
+ "*(u32*)(r10 - 4) = r0;"
+"2:"
+ /* read fp-8, should not be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ /* use r1 in precise context */
+ "if r1 == 42 goto +0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check if STACK_MISC in old state is considered
+ * equivalent to stack spill of a scalar in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("8: (79) r0 = *(u64 *)(r10 -8)")
+__msg("8: safe")
+__msg("processed 11 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_stack_misc_vs_cur_scalar(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure STACK_{MISC,INVALID} at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u16*)(r10 - 8) = r0;"
+ "*(u16*)(r10 - 4) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+"2:"
+ /* read fp-8, should be considered safe on second visit */
+ "r0 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check that STACK_MISC in old state is not considered
+ * equivalent to stack spill of a non-scalar in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* verifier should process exit instructions twice:
+ * - once for path entry - label 2;
+ * - once for path entry - label 1 - label 2.
+ */
+__msg("r1 = *(u64 *)(r10 -8)")
+__msg("exit")
+__msg("r1 = *(u64 *)(r10 -8)")
+__msg("exit")
+__msg("processed 11 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_stack_misc_vs_cur_ctx_ptr(void)
+{
+ asm volatile(
+ /* remember context pointer in r9 */
+ "r9 = r1;"
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure STACK_MISC at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u64*)(r10 - 8) = r0;"
+ "*(u32*)(r10 - 4) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure context pointer in fp-8 */
+ "*(u64*)(r10 - 8) = r9;"
+"2:"
+ /* read fp-8, should not be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
index 9c1aa69650f8..fb316c080c84 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
@@ -330,7 +330,7 @@ l1_%=: r7 = r0; \
SEC("cgroup/skb")
__description("spin_lock: test10 lock in subprog without unlock")
-__failure __msg("unlock is missing")
+__success
__failure_unpriv __msg_unpriv("")
__naked void lock_in_subprog_without_unlock(void)
{
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index 518329c666e9..7ea9785738b5 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -7,6 +7,8 @@
#include <bpf/bpf_endian.h>
#include <asm/errno.h>
+#include "bpf_compiler.h"
+
#define TC_ACT_OK 0
#define TC_ACT_SHOT 2
@@ -151,11 +153,11 @@ static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,
__u64 sum = csum;
int i;
-#pragma unroll
+ __pragma_loop_unroll
for (i = 0; i < 4; i++)
sum += (__u32)saddr->in6_u.u6_addr32[i];
-#pragma unroll
+ __pragma_loop_unroll
for (i = 0; i < 4; i++)
sum += (__u32)daddr->in6_u.u6_addr32[i];
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
index 54cf1765118b..44e2b0ef23ae 100644
--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -15,6 +15,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
#include "xdping.h"
struct {
@@ -116,7 +117,7 @@ int xdping_client(struct xdp_md *ctx)
return XDP_PASS;
if (pinginfo->start) {
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < XDPING_MAX_COUNT; i++) {
if (pinginfo->times[i] == 0)
break;
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index f01391021218..524c38e9cde4 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -181,7 +181,7 @@ static int parse_test_spec(struct test_loader *tester,
memset(spec, 0, sizeof(*spec));
spec->prog_name = bpf_program__name(prog);
- spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */
+ spec->prog_flags = testing_prog_flags();
btf = bpf_object__btf(obj);
if (!btf) {
@@ -501,7 +501,7 @@ static bool is_unpriv_capable_map(struct bpf_map *map)
}
}
-static int do_prog_test_run(int fd_prog, int *retval)
+static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
{
__u8 tmp_out[TEST_DATA_LEN << 2] = {};
__u8 tmp_in[TEST_DATA_LEN] = {};
@@ -514,6 +514,10 @@ static int do_prog_test_run(int fd_prog, int *retval)
.repeat = 1,
);
+ if (empty_opts) {
+ memset(&topts, 0, sizeof(struct bpf_test_run_opts));
+ topts.sz = sizeof(struct bpf_test_run_opts);
+ }
err = bpf_prog_test_run_opts(fd_prog, &topts);
saved_errno = errno;
@@ -649,7 +653,8 @@ void run_subtest(struct test_loader *tester,
}
}
- do_prog_test_run(bpf_program__fd(tprog), &retval);
+ do_prog_test_run(bpf_program__fd(tprog), &retval,
+ bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
if (retval != subspec->retval && subspec->retval != POINTER_VALUE) {
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
goto tobj_cleanup;
@@ -688,7 +693,7 @@ static void process_subtest(struct test_loader *tester,
++nr_progs;
specs = calloc(nr_progs, sizeof(struct test_spec));
- if (!ASSERT_OK_PTR(specs, "Can't alloc specs array"))
+ if (!ASSERT_OK_PTR(specs, "specs_alloc"))
return;
i = 0;
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index c028d621c744..d98c72dc563e 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -211,7 +211,7 @@ static void test_lpm_map(int keysize)
volatile size_t n_matches, n_matches_after_delete;
size_t i, j, n_nodes, n_lookups;
struct tlpm_node *t, *list = NULL;
- struct bpf_lpm_trie_key *key;
+ struct bpf_lpm_trie_key_u8 *key;
uint8_t *data, *value;
int r, map;
@@ -331,8 +331,8 @@ static void test_lpm_map(int keysize)
static void test_lpm_ipaddr(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
- struct bpf_lpm_trie_key *key_ipv4;
- struct bpf_lpm_trie_key *key_ipv6;
+ struct bpf_lpm_trie_key_u8 *key_ipv4;
+ struct bpf_lpm_trie_key_u8 *key_ipv6;
size_t key_size_ipv4;
size_t key_size_ipv6;
int map_fd_ipv4;
@@ -423,7 +423,7 @@ static void test_lpm_ipaddr(void)
static void test_lpm_delete(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
- struct bpf_lpm_trie_key *key;
+ struct bpf_lpm_trie_key_u8 *key;
size_t key_size;
int map_fd;
__u64 value;
@@ -532,7 +532,7 @@ static void test_lpm_delete(void)
static void test_lpm_get_next_key(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
- struct bpf_lpm_trie_key *key_p, *next_key_p;
+ struct bpf_lpm_trie_key_u8 *key_p, *next_key_p;
size_t key_size;
__u32 value = 0;
int map_fd;
@@ -693,9 +693,9 @@ static void *lpm_test_command(void *arg)
{
int i, j, ret, iter, key_size;
struct lpm_mt_test_info *info = arg;
- struct bpf_lpm_trie_key *key_p;
+ struct bpf_lpm_trie_key_u8 *key_p;
- key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
+ key_size = sizeof(*key_p) + sizeof(__u32);
key_p = alloca(key_size);
for (iter = 0; iter < info->iter; iter++)
for (i = 0; i < MAX_TEST_KEYS; i++) {
@@ -717,7 +717,7 @@ static void *lpm_test_command(void *arg)
ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
assert(ret == 0 || errno == ENOENT);
} else {
- struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
+ struct bpf_lpm_trie_key_u8 *next_key_p = alloca(key_size);
ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
}
@@ -752,7 +752,7 @@ static void test_lpm_multi_thread(void)
/* create a trie */
value_size = sizeof(__u32);
- key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
+ key_size = sizeof(struct bpf_lpm_trie_key_hdr) + value_size;
map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts);
/* create 4 threads to test update, delete, lookup and get_next_key */
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 767e0693df10..dfbab214f4d1 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1190,7 +1190,11 @@ static void test_map_in_map(void)
goto out_map_in_map;
}
- bpf_object__load(obj);
+ err = bpf_object__load(obj);
+ if (err) {
+ printf("Failed to load test prog\n");
+ goto out_map_in_map;
+ }
map = bpf_object__find_map_by_name(obj, "mim_array");
if (!map) {
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 1b9387890148..89ff704e9dad 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -547,24 +547,6 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
return bpf_map__fd(map);
}
-static bool is_jit_enabled(void)
-{
- const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
- bool enabled = false;
- int sysctl_fd;
-
- sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
- if (sysctl_fd != -1) {
- char tmpc;
-
- if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
- enabled = (tmpc != '0');
- close(sysctl_fd);
- }
-
- return enabled;
-}
-
int compare_map_keys(int map1_fd, int map2_fd)
{
__u32 key, next_key;
@@ -701,11 +683,69 @@ static const struct argp_option opts[] = {
{},
};
+static FILE *libbpf_capture_stream;
+
+static struct {
+ char *buf;
+ size_t buf_sz;
+} libbpf_output_capture;
+
+/* Creates a global memstream capturing INFO and WARN level output
+ * passed to libbpf_print_fn.
+ * Returns 0 on success, negative value on failure.
+ * On failure the description is printed using PRINT_FAIL and
+ * current test case is marked as fail.
+ */
+int start_libbpf_log_capture(void)
+{
+ if (libbpf_capture_stream) {
+ PRINT_FAIL("%s: libbpf_capture_stream != NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ libbpf_capture_stream = open_memstream(&libbpf_output_capture.buf,
+ &libbpf_output_capture.buf_sz);
+ if (!libbpf_capture_stream) {
+ PRINT_FAIL("%s: open_memstream failed errno=%d\n", __func__, errno);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Destroys global memstream created by start_libbpf_log_capture().
+ * Returns a pointer to captured data which has to be freed.
+ * Returned buffer is null terminated.
+ */
+char *stop_libbpf_log_capture(void)
+{
+ char *buf;
+
+ if (!libbpf_capture_stream)
+ return NULL;
+
+ fputc(0, libbpf_capture_stream);
+ fclose(libbpf_capture_stream);
+ libbpf_capture_stream = NULL;
+ /* get 'buf' after fclose(), see open_memstream() documentation */
+ buf = libbpf_output_capture.buf;
+ memset(&libbpf_output_capture, 0, sizeof(libbpf_output_capture));
+ return buf;
+}
+
static int libbpf_print_fn(enum libbpf_print_level level,
const char *format, va_list args)
{
+ if (libbpf_capture_stream && level != LIBBPF_DEBUG) {
+ va_list args2;
+
+ va_copy(args2, args);
+ vfprintf(libbpf_capture_stream, format, args2);
+ }
+
if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG)
return 0;
+
vfprintf(stdout, format, args);
return 0;
}
@@ -1099,6 +1139,7 @@ static void run_one_test(int test_num)
cleanup_cgroup_environment();
stdio_restore();
+ free(stop_libbpf_log_capture());
dump_test_log(test, state, false, false, NULL);
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 2f9f6f250f17..0ba5a20b19ba 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -385,13 +385,21 @@ int test__join_cgroup(const char *path);
goto goto_label; \
})
+#define ALL_TO_DEV_NULL " >/dev/null 2>&1"
+
#define SYS_NOFAIL(fmt, ...) \
({ \
char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ int n; \
+ n = snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (n < sizeof(cmd) && sizeof(cmd) - n >= sizeof(ALL_TO_DEV_NULL)) \
+ strcat(cmd, ALL_TO_DEV_NULL); \
system(cmd); \
})
+int start_libbpf_log_capture(void);
+char *stop_libbpf_log_capture(void);
+
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index b0068a9d2cfe..80c42583f597 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -19,6 +19,7 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#include "bpf_util.h"
#ifndef ENOTSUPP
@@ -679,7 +680,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
- bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
+ bpf_program__set_flags(prog, testing_prog_flags());
err = bpf_object__load(obj);
if (err) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index f36e41435be7..df04bda1c927 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -67,6 +67,7 @@
#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
+#define F_NEEDS_JIT_ENABLED (1 << 2)
/* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
#define ADMIN_CAPS (1ULL << CAP_NET_ADMIN | \
@@ -74,6 +75,7 @@
1ULL << CAP_BPF)
#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
static bool unpriv_disabled = false;
+static bool jit_disabled;
static int skips;
static bool verbose = false;
static int verif_log_level = 0;
@@ -1341,48 +1343,6 @@ static bool cmp_str_seq(const char *log, const char *exp)
return true;
}
-static struct bpf_insn *get_xlated_program(int fd_prog, int *cnt)
-{
- __u32 buf_element_size = sizeof(struct bpf_insn);
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- __u32 xlated_prog_len;
- struct bpf_insn *buf;
-
- if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
- perror("bpf_prog_get_info_by_fd failed");
- return NULL;
- }
-
- xlated_prog_len = info.xlated_prog_len;
- if (xlated_prog_len % buf_element_size) {
- printf("Program length %d is not multiple of %d\n",
- xlated_prog_len, buf_element_size);
- return NULL;
- }
-
- *cnt = xlated_prog_len / buf_element_size;
- buf = calloc(*cnt, buf_element_size);
- if (!buf) {
- perror("can't allocate xlated program buffer");
- return NULL;
- }
-
- bzero(&info, sizeof(info));
- info.xlated_prog_len = xlated_prog_len;
- info.xlated_prog_insns = (__u64)(unsigned long)buf;
- if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
- perror("second bpf_prog_get_info_by_fd failed");
- goto out_free_buf;
- }
-
- return buf;
-
-out_free_buf:
- free(buf);
- return NULL;
-}
-
static bool is_null_insn(struct bpf_insn *insn)
{
struct bpf_insn null_insn = {};
@@ -1505,7 +1465,7 @@ static void print_insn(struct bpf_insn *buf, int cnt)
static bool check_xlated_program(struct bpf_test *test, int fd_prog)
{
struct bpf_insn *buf;
- int cnt;
+ unsigned int cnt;
bool result = true;
bool check_expected = !is_null_insn(test->expected_insns);
bool check_unexpected = !is_null_insn(test->unexpected_insns);
@@ -1513,8 +1473,7 @@ static bool check_xlated_program(struct bpf_test *test, int fd_prog)
if (!check_expected && !check_unexpected)
goto out;
- buf = get_xlated_program(fd_prog, &cnt);
- if (!buf) {
+ if (get_xlated_program(fd_prog, &buf, &cnt)) {
printf("FAIL: can't get xlated program\n");
result = false;
goto out;
@@ -1567,6 +1526,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
__u32 pflags;
int i, err;
+ if ((test->flags & F_NEEDS_JIT_ENABLED) && jit_disabled) {
+ printf("SKIP (requires BPF JIT)\n");
+ skips++;
+ sched_yield();
+ return;
+ }
+
fd_prog = -1;
for (i = 0; i < MAX_NR_MAPS; i++)
map_fds[i] = -1;
@@ -1588,7 +1554,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (fixup_skips != skips)
return;
- pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
+ pflags = testing_prog_flags();
if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
pflags |= BPF_F_STRICT_ALIGNMENT;
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
@@ -1887,6 +1853,8 @@ int main(int argc, char **argv)
return EXIT_FAILURE;
}
+ jit_disabled = !is_jit_enabled();
+
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index d2458c1b1671..28b6646662af 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -252,6 +252,34 @@ __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
int extra_prog_load_log_flags = 0;
+int testing_prog_flags(void)
+{
+ static int cached_flags = -1;
+ static int prog_flags[] = { BPF_F_TEST_RND_HI32, BPF_F_TEST_REG_INVARIANTS };
+ static struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int insn_cnt = ARRAY_SIZE(insns), i, fd, flags = 0;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+ if (cached_flags >= 0)
+ return cached_flags;
+
+ for (i = 0; i < ARRAY_SIZE(prog_flags); i++) {
+ opts.prog_flags = prog_flags[i];
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "flag-test", "GPL",
+ insns, insn_cnt, &opts);
+ if (fd >= 0) {
+ flags |= prog_flags[i];
+ close(fd);
+ }
+ }
+
+ cached_flags = flags;
+ return cached_flags;
+}
+
int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd)
{
@@ -276,7 +304,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
bpf_program__set_type(prog, type);
- flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
+ flags = bpf_program__flags(prog) | testing_prog_flags();
bpf_program__set_flags(prog, flags);
err = bpf_object__load(obj);
@@ -299,7 +327,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
{
LIBBPF_OPTS(bpf_prog_load_opts, opts,
.kern_version = kern_version,
- .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS,
+ .prog_flags = testing_prog_flags(),
.log_level = extra_prog_load_log_flags,
.log_buf = log_buf,
.log_size = log_buf_sz,
@@ -328,12 +356,12 @@ __u64 read_perf_max_sample_freq(void)
return sample_freq;
}
-static int finit_module(int fd, const char *param_values, int flags)
+int finit_module(int fd, const char *param_values, int flags)
{
return syscall(__NR_finit_module, fd, param_values, flags);
}
-static int delete_module(const char *name, int flags)
+int delete_module(const char *name, int flags)
{
return syscall(__NR_delete_module, name, flags);
}
@@ -387,3 +415,63 @@ int kern_sync_rcu(void)
{
return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0);
}
+
+int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt)
+{
+ __u32 buf_element_size = sizeof(struct bpf_insn);
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 xlated_prog_len;
+
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("bpf_prog_get_info_by_fd failed");
+ return -1;
+ }
+
+ xlated_prog_len = info.xlated_prog_len;
+ if (xlated_prog_len % buf_element_size) {
+ printf("Program length %u is not multiple of %u\n",
+ xlated_prog_len, buf_element_size);
+ return -1;
+ }
+
+ *cnt = xlated_prog_len / buf_element_size;
+ *buf = calloc(*cnt, buf_element_size);
+ if (!buf) {
+ perror("can't allocate xlated program buffer");
+ return -ENOMEM;
+ }
+
+ bzero(&info, sizeof(info));
+ info.xlated_prog_len = xlated_prog_len;
+ info.xlated_prog_insns = (__u64)(unsigned long)*buf;
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("second bpf_prog_get_info_by_fd failed");
+ goto out_free_buf;
+ }
+
+ return 0;
+
+out_free_buf:
+ free(*buf);
+ *buf = NULL;
+ return -1;
+}
+
+bool is_jit_enabled(void)
+{
+ const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+ bool enabled = false;
+ int sysctl_fd;
+
+ sysctl_fd = open(jit_sysctl, O_RDONLY);
+ if (sysctl_fd != -1) {
+ char tmpc;
+
+ if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
+ enabled = (tmpc != '0');
+ close(sysctl_fd);
+ }
+
+ return enabled;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 35284faff4f2..d55f6ab12433 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -36,6 +36,8 @@ __u64 read_perf_max_sample_freq(void);
int load_bpf_testmod(bool verbose);
int unload_bpf_testmod(bool verbose);
int kern_sync_rcu(void);
+int finit_module(int fd, const char *param_values, int flags);
+int delete_module(const char *name, int flags);
static inline __u64 get_time_ns(void)
{
@@ -46,4 +48,12 @@ static inline __u64 get_time_ns(void)
return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
}
+struct bpf_insn;
+/* Request BPF program instructions after all rewrites are applied,
+ * e.g. verifier.c:convert_ctx_access() is done.
+ */
+int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt);
+int testing_prog_flags(void);
+bool is_jit_enabled(void);
+
#endif /* __TESTING_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 4faa898ff7fc..27fd7ed3e4b0 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -271,7 +271,7 @@ ssize_t get_uprobe_offset(const void *addr)
* addi r2,r2,XXXX
*/
{
- const u32 *insn = (const u32 *)(uintptr_t)addr;
+ const __u32 *insn = (const __u32 *)(uintptr_t)addr;
if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
index a535d41dc20d..59125b22ae39 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
@@ -57,6 +57,7 @@
.expected_insns = { PSEUDO_CALL_INSN() },
.unexpected_insns = { HELPER_CALL_INSN() },
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.result = ACCEPT,
.runs = 0,
.func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } },
@@ -90,6 +91,7 @@
.expected_insns = { HELPER_CALL_INSN() },
.unexpected_insns = { PSEUDO_CALL_INSN() },
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.result = ACCEPT,
.runs = 0,
.func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
@@ -127,6 +129,7 @@
.expected_insns = { HELPER_CALL_INSN() },
.unexpected_insns = { PSEUDO_CALL_INSN() },
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.result = ACCEPT,
.runs = 0,
.func_info = {
@@ -165,6 +168,7 @@
.expected_insns = { PSEUDO_CALL_INSN() },
.unexpected_insns = { HELPER_CALL_INSN() },
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.result = ACCEPT,
.runs = 0,
.func_info = {
@@ -235,6 +239,7 @@
},
.unexpected_insns = { HELPER_CALL_INSN() },
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.result = ACCEPT,
.func_info = {
{ 0, MAIN_TYPE },
@@ -252,6 +257,7 @@
.unexpected_insns = { HELPER_CALL_INSN() },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
.func_info_cnt = 2,
BTF_TYPES
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 8a2ff81d8350..0a9293a57211 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -183,10 +183,10 @@
.prog_type = BPF_PROG_TYPE_XDP,
.flags = BPF_F_TEST_STATE_FREQ,
.errstr = "mark_precise: frame0: last_idx 7 first_idx 7\
- mark_precise: frame0: parent state regs=r4 stack=:\
+ mark_precise: frame0: parent state regs=r4 stack=-8:\
mark_precise: frame0: last_idx 6 first_idx 4\
- mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\
- mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\
+ mark_precise: frame0: regs=r4 stack=-8 before 6: (b7) r0 = -1\
+ mark_precise: frame0: regs=r4 stack=-8 before 5: (79) r4 = *(u64 *)(r10 -8)\
mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\
mark_precise: frame0: parent state regs=r0 stack=:\
mark_precise: frame0: last_idx 3 first_idx 3\
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 878d68db0325..bdf5d8180067 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -480,7 +480,7 @@ peek:
for (int j = 0; j < 500; j++) {
if (complete_tx(xsk, clock_id))
break;
- usleep(10*1000);
+ usleep(10);
}
}
}
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 534576f06df1..c59e4adb905d 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -12,6 +12,7 @@
#include <syscall.h>
#include <unistd.h>
#include <sys/resource.h>
+#include <linux/close_range.h>
#include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 8a72bb7de70f..03a089165d3f 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -15,7 +15,10 @@ TEST_PROGS := \
TEST_FILES := \
lag_lib.sh \
bond_topo_2d1c.sh \
- bond_topo_3d1c.sh \
- net_forwarding_lib.sh
+ bond_topo_3d1c.sh
+
+TEST_INCLUDES := \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
index 6358df5752f9..1ec7f59db7f4 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
@@ -20,21 +20,21 @@
# +------+ +------+
#
# We use veths instead of physical interfaces
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
set -e
-tmp=$(mktemp -q dump.XXXXXX)
cleanup() {
ip link del fab-br0 >/dev/null 2>&1 || :
ip link del fbond >/dev/null 2>&1 || :
ip link del veth1-bond >/dev/null 2>&1 || :
ip link del veth2-bond >/dev/null 2>&1 || :
- modprobe -r bonding >/dev/null 2>&1 || :
- rm -f -- ${tmp}
}
trap cleanup 0 1 2
cleanup
-sleep 1
# create the bridge
ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \
@@ -67,13 +67,12 @@ ip link set fab-br0 up
ip link set fbond up
ip addr add dev fab-br0 10.0.0.3
-tcpdump -n -i veth1-end -e ether proto 0x8809 >${tmp} 2>&1 &
-sleep 15
-pkill tcpdump >/dev/null 2>&1
rc=0
-num=$(grep "packets captured" ${tmp} | awk '{print $1}')
-if test "$num" -gt 0; then
- echo "PASS, captured ${num}"
+tc qdisc add dev veth1-end clsact
+tc filter add dev veth1-end ingress protocol 0x8809 pref 1 handle 101 flower skip_hw action pass
+if slowwait_for_counter 15 2 \
+ tc_rule_handle_stats_get "dev veth1-end ingress" 101 ".packets" "" &> /dev/null; then
+ echo "PASS, captured 2"
else
echo "FAIL"
rc=1
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
index 862e947e17c7..8293dbc7c18f 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
@@ -11,7 +11,7 @@ ALL_TESTS="
REQUIRE_MZ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
bond_check_flags()
{
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
index 89af402fabbe..78d3e0fe6604 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
@@ -17,6 +17,11 @@
# +----------------+
#
# We use veths instead of physical interfaces
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
sw="sw-$(mktemp -u XXXXXX)"
host="ns-$(mktemp -u XXXXXX)"
@@ -26,6 +31,16 @@ cleanup()
ip netns del $host
}
+wait_lladdr_dad()
+{
+ $@ | grep fe80 | grep -qv tentative
+}
+
+wait_bond_up()
+{
+ $@ | grep -q 'state UP'
+}
+
trap cleanup 0 1 2
ip netns add $sw
@@ -37,8 +52,8 @@ ip -n $host link add veth1 type veth peer name veth1 netns $sw
ip -n $sw link add br0 type bridge
ip -n $sw link set br0 up
sw_lladdr=$(ip -n $sw addr show br0 | awk '/fe80/{print $2}' | cut -d'/' -f1)
-# sleep some time to make sure bridge lladdr pass DAD
-sleep 2
+# wait some time to make sure bridge lladdr pass DAD
+slowwait 2 wait_lladdr_dad ip -n $sw addr show br0
ip -n $host link add bond0 type bond mode 1 ns_ip6_target ${sw_lladdr} \
arp_validate 3 arp_interval 1000
@@ -53,7 +68,7 @@ ip -n $sw link set veth1 master br0
ip -n $sw link set veth0 up
ip -n $sw link set veth1 up
-sleep 5
+slowwait 5 wait_bond_up ip -n $host link show bond0
rc=0
if ip -n $host link show bond0 | grep -q LOWER_UP; then
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index c54d1697f439..41d0859feb7d 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -45,15 +45,23 @@ skip_ns()
}
active_slave=""
+active_slave_changed()
+{
+ local old_active_slave=$1
+ local new_active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" \
+ ".[].linkinfo.info_data.active_slave")
+ [ "$new_active_slave" != "$old_active_slave" -a "$new_active_slave" != "null" ]
+}
+
check_active_slave()
{
local target_active_slave=$1
+ slowwait 5 active_slave_changed $active_slave
active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
test "$active_slave" = "$target_active_slave"
check_err $? "Current active slave is $active_slave but not $target_active_slave"
}
-
# Test bonding prio option
prio_test()
{
@@ -62,6 +70,8 @@ prio_test()
# create bond
bond_reset "${param}"
+ # set active_slave to primary eth1 specifically
+ ip -n ${s_ns} link set bond0 type bond active_slave eth1
# check bonding member prio value
ip -n ${s_ns} link set eth0 type bond_slave prio 0
@@ -84,13 +94,13 @@ prio_test()
# active slave should be the higher prio slave
ip -n ${s_ns} link set $active_slave down
- bond_check_connection "fail over"
check_active_slave eth2
+ bond_check_connection "fail over"
# when only 1 slave is up
ip -n ${s_ns} link set $active_slave down
- bond_check_connection "only 1 slave up"
check_active_slave eth0
+ bond_check_connection "only 1 slave up"
# when a higher prio slave change to up
ip -n ${s_ns} link set eth2 up
@@ -140,8 +150,8 @@ prio_test()
check_active_slave "eth1"
ip -n ${s_ns} link set $active_slave down
- bond_check_connection "change slave prio"
check_active_slave "eth0"
+ bond_check_connection "change slave prio"
fi
}
@@ -162,7 +172,7 @@ prio_arp()
local mode=$1
for primary_reselect in 0 1 2; do
- prio_test "mode active-backup arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect"
+ prio_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect"
log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect"
done
}
@@ -178,7 +188,7 @@ prio_ns()
fi
for primary_reselect in 0 1 2; do
- prio_test "mode active-backup arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect"
+ prio_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect"
log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect"
done
}
@@ -194,9 +204,18 @@ prio()
for mode in $modes; do
prio_miimon $mode
- prio_arp $mode
- prio_ns $mode
done
+ prio_arp "active-backup"
+ prio_ns "active-backup"
+}
+
+wait_mii_up()
+{
+ for i in $(seq 0 2); do
+ mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
+ [ ${mii_status} != "UP" ] && return 1
+ done
+ return 0
}
arp_validate_test()
@@ -211,7 +230,7 @@ arp_validate_test()
[ $RET -ne 0 ] && log_test "arp_validate" "$retmsg"
# wait for a while to make sure the mii status stable
- sleep 5
+ slowwait 5 wait_mii_up
for i in $(seq 0 2); do
mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
if [ ${mii_status} != "UP" ]; then
@@ -276,10 +295,13 @@ garp_test()
active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
ip -n ${s_ns} link set ${active_slave} down
- exp_num=$(echo "${param}" | cut -f6 -d ' ')
- sleep $((exp_num + 2))
+ # wait for active link change
+ slowwait 2 active_slave_changed $active_slave
+ exp_num=$(echo "${param}" | cut -f6 -d ' ')
active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ slowwait_for_counter $((exp_num + 5)) $exp_num \
+ tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}"
# check result
real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}")
@@ -296,8 +318,8 @@ garp_test()
num_grat_arp()
{
local val
- for val in 10 20 30 50; do
- garp_test "mode active-backup miimon 100 num_grat_arp $val peer_notify_delay 1000"
+ for val in 10 20 30; do
+ garp_test "mode active-backup miimon 10 num_grat_arp $val peer_notify_delay 100"
log_test "num_grat_arp" "active-backup miimon num_grat_arp $val"
done
}
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
index a509ef949dcf..195ef83cfbf1 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -28,7 +28,7 @@
REQUIRE_MZ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source ${lib_dir}/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
s_ns="s-$(mktemp -u XXXXXX)"
c_ns="c-$(mktemp -u XXXXXX)"
@@ -73,7 +73,6 @@ server_create()
ip -n ${s_ns} link set bond0 up
ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
- sleep 2
}
# Reset bond with new mode and options
@@ -96,7 +95,8 @@ bond_reset()
ip -n ${s_ns} link set bond0 up
ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
- sleep 2
+ # Wait for IPv6 address ready as it needs DAD
+ slowwait 2 ip netns exec ${s_ns} ping6 ${c_ip6} -c 1 -W 0.1 &> /dev/null
}
server_destroy()
@@ -150,7 +150,7 @@ bond_check_connection()
{
local msg=${1:-"check connection"}
- sleep 2
+ slowwait 2 ip netns exec ${s_ns} ping ${c_ip4} -c 1 -W 0.1 &> /dev/null
ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
check_err $? "${msg}: ping failed"
ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
diff --git a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
index 5cfe7d8ebc25..e6fa24eded5b 100755
--- a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
+++ b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
@@ -14,7 +14,7 @@ ALL_TESTS="
REQUIRE_MZ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
source "$lib_dir"/lag_lib.sh
diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
index 2a268b17b61f..bf9bcd1b5ec0 100644
--- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
+++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
@@ -48,6 +48,17 @@ test_LAG_cleanup()
ip link add mv0 link "$name" up address "$ucaddr" type macvlan
# Used to test dev->mc handling
ip address add "$addr6" dev "$name"
+
+ # Check that addresses were added as expected
+ (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 ||
+ grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null
+ check_err $? "macvlan unicast address not found on a slave"
+
+ # mcaddr is added asynchronously by addrconf_dad_work(), use busywait
+ (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 ||
+ grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null
+ check_err $? "IPv6 solicited-node multicast mac address not found on a slave"
+
ip link set dev "$name" down
ip link del "$name"
@@ -96,13 +107,12 @@ lag_setup2x2()
NAMESPACES="${namespaces}"
}
-# cleanup all lag related namespaces and remove the bonding module
+# cleanup all lag related namespaces
lag_cleanup()
{
for n in ${NAMESPACES}; do
ip netns delete ${n} >/dev/null 2>&1 || true
done
- modprobe -r bonding
}
SWITCH="lag_node1"
@@ -148,7 +158,7 @@ test_bond_recovery()
create_bond $@
# verify connectivity
- ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1
+ slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null
check_err $? "No connectivity"
# force the links of the bond down
@@ -158,7 +168,7 @@ test_bond_recovery()
ip netns exec ${SWITCH} ip link set eth1 down
# re-verify connectivity
- ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1
+ slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null
local rc=$?
check_err $rc "Bond failed to recover"
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
index b76bf5030952..9d26ab4cad0b 100755
--- a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
+++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
@@ -23,7 +23,7 @@ REQUIRE_MZ=no
REQUIRE_JQ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
source "$lib_dir"/lag_lib.sh
cleanup()
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
index 8c2619002147..2d275b3e47dd 100755
--- a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
+++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
@@ -23,7 +23,7 @@ REQUIRE_MZ=no
REQUIRE_JQ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
source "$lib_dir"/lag_lib.sh
cleanup()
diff --git a/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh b/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh
deleted file mode 120000
index 39c96828c5ef..000000000000
--- a/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh
+++ /dev/null
@@ -1 +0,0 @@
-../../../net/forwarding/lib.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings
index 6091b45d226b..79b65bdf05db 100644
--- a/tools/testing/selftests/drivers/net/bonding/settings
+++ b/tools/testing/selftests/drivers/net/bonding/settings
@@ -1 +1 @@
-timeout=120
+timeout=1200
diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile
index c393e7b73805..cd6817fe5be6 100644
--- a/tools/testing/selftests/drivers/net/dsa/Makefile
+++ b/tools/testing/selftests/drivers/net/dsa/Makefile
@@ -11,8 +11,22 @@ TEST_PROGS = bridge_locked_port.sh \
tc_actions.sh \
test_bridge_fdb_stress.sh
-TEST_PROGS_EXTENDED := lib.sh tc_common.sh
+TEST_FILES := \
+ run_net_forwarding_test.sh \
+ forwarding.config
-TEST_FILES := forwarding.config
+TEST_INCLUDES := \
+ ../../../net/forwarding/bridge_locked_port.sh \
+ ../../../net/forwarding/bridge_mdb.sh \
+ ../../../net/forwarding/bridge_mld.sh \
+ ../../../net/forwarding/bridge_vlan_aware.sh \
+ ../../../net/forwarding/bridge_vlan_mcast.sh \
+ ../../../net/forwarding/bridge_vlan_unaware.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/forwarding/local_termination.sh \
+ ../../../net/forwarding/no_forwarding.sh \
+ ../../../net/forwarding/tc_actions.sh \
+ ../../../net/forwarding/tc_common.sh \
+ ../../../net/lib.sh
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
index f5eb940c4c7c..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_locked_port.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
index 76492da525f7..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_mdb.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
index 81a7e0df0474..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_mld.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
index 9831ed74376a..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_vlan_aware.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
index 7f3c3f0bf719..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_vlan_mcast.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
index bf1a57e6bde1..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_vlan_unaware.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/lib.sh b/tools/testing/selftests/drivers/net/dsa/lib.sh
deleted file mode 120000
index 39c96828c5ef..000000000000
--- a/tools/testing/selftests/drivers/net/dsa/lib.sh
+++ /dev/null
@@ -1 +0,0 @@
-../../../net/forwarding/lib.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
index c08166f84501..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/local_termination.sh
+++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
@@ -1 +1 @@
-../../../net/forwarding/local_termination.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
index b9757466bc97..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
+++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
@@ -1 +1 @@
-../../../net/forwarding/no_forwarding.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh
new file mode 100755
index 000000000000..4106c0a102ea
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+libdir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+testname=$(basename "${BASH_SOURCE[0]}")
+
+source "$libdir"/forwarding.config
+cd "$libdir"/../../../net/forwarding/ || exit 1
+source "./$testname" "$@"
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
index 306213d9430e..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
+++ b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
@@ -1 +1 @@
-../../../net/forwarding/tc_actions.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_common.sh b/tools/testing/selftests/drivers/net/dsa/tc_common.sh
deleted file mode 120000
index bc3465bdc36b..000000000000
--- a/tools/testing/selftests/drivers/net/dsa/tc_common.sh
+++ /dev/null
@@ -1 +0,0 @@
-../../../net/forwarding/tc_common.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
index 92acab83fbe2..74682151d04d 100755
--- a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
+++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
@@ -19,7 +19,7 @@ REQUIRE_JQ="no"
REQUIRE_MZ="no"
NETIF_CREATE="no"
lib_dir=$(dirname "$0")
-source "$lib_dir"/lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
cleanup() {
echo "Cleaning up"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
index 616d3581419c..31252bc8775e 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -869,7 +869,7 @@ bloom_simple_test()
bloom_complex_test()
{
# Bloom filter index computation is affected from region ID, eRP
- # ID and from the region key size. In order to excercise those parts
+ # ID and from the region key size. In order to exercise those parts
# of the Bloom filter code, use a series of regions, each with a
# different key size and send packet that should hit all of them.
local index
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
new file mode 100644
index 000000000000..5bace0b7fb57
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = devlink.sh \
+ devlink_in_netns.sh \
+ devlink_trap.sh \
+ ethtool-coalesce.sh \
+ ethtool-fec.sh \
+ ethtool-pause.sh \
+ ethtool-ring.sh \
+ fib.sh \
+ hw_stats_l3.sh \
+ nexthop.sh \
+ peer.sh \
+ psample.sh \
+ tc-mq-visibility.sh \
+ udp_tunnel_nic.sh \
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 46e20b13473c..b5ea2526f23c 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -31,7 +31,7 @@ devlink_wait()
fw_flash_test()
{
- DUMMYFILE=$(find /lib/firmware -maxdepth 1 -type f -printf '%f\n' |head -1)
+ DUMMYFILE=$(find /lib/firmware -type f -printf '%P\n' | head -1)
RET=0
if [ -z "$DUMMYFILE" ]
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
index 7d7829f57550..6c52ce1b0450 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
@@ -49,7 +49,7 @@ for o in llrs rs; do
Active FEC encoding: ${o^^}"
done
-# Test mutliple bits
+# Test multiple bits
$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs
check $?
s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
new file mode 100755
index 000000000000..aed62d9e6c0a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ../../../net/net_helper.sh
+
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+socat_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_1_SYS/net -exec basename {} \;)
+ NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_2_SYS/net -exec basename {} \;)
+
+ ip link set $NSIM_DEV_1_NAME netns nssv
+ ip link set $NSIM_DEV_2_NAME netns nscl
+
+ ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME
+ ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up
+ ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+###
+### Code start
+###
+
+socat_check || exit 4
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_DEV_1_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_1_FD}</var/run/netns/nssv
+NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex)
+
+NSIM_DEV_2_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_2_FD}</var/run/netns/nscl
+NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex)
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:2000" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netdevsim should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netnsid should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with self should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+# argument error checking
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "invalid arg should fail"
+ cleanup_ns
+ exit 1
+fi
+
+# send/recv packets
+
+tmp_file=$(mktemp)
+ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file &
+pid=$!
+res=0
+
+wait_local_port_listen nssv 1234 tcp
+
+echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234
+
+count=$(cat $tmp_file | wc -c)
+if [[ $count -ne 3 ]]; then
+ echo "expected 3 bytes, got $count"
+ res=1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL
+
+kill $pid
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit $res
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index 4855ef597a15..384cfa3d38a6 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -270,6 +270,7 @@ for port in 0 1; do
echo 1 > $NSIM_DEV_SYS/new_port
fi
NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
msg="new NIC device created"
exp0=( 0 0 0 0 )
@@ -283,8 +284,8 @@ for port in 0 1; do
msg="VxLAN v4 devices go down"
exp0=( 0 0 0 0 )
- ifconfig vxlan1 down
- ifconfig vxlan0 down
+ ip link set dev vxlan1 down
+ ip link set dev vxlan0 down
check_tables
msg="VxLAN v6 devices"
@@ -292,7 +293,7 @@ for port in 0 1; do
new_vxlan vxlanA 4789 $NSIM_NETDEV 6
for ifc in vxlan0 vxlan1; do
- ifconfig $ifc up
+ ip link set dev $ifc up
done
new_vxlan vxlanB 4789 $NSIM_NETDEV 6
@@ -306,14 +307,14 @@ for port in 0 1; do
new_geneve gnv0 6081
msg="NIC device goes down"
- ifconfig $NSIM_NETDEV down
+ ip link set dev $NSIM_NETDEV down
if [ $port -eq 1 ]; then
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
fi
check_tables
msg="NIC device goes up again"
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
exp1=( `mke 6081 2` 0 0 0 )
check_tables
@@ -431,7 +432,8 @@ for port in 0 1; do
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
overflow_table0 "overflow NIC table"
overflow_table1 "overflow NIC table"
@@ -488,7 +490,8 @@ for port in 0 1; do
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
overflow_table0 "overflow NIC table"
overflow_table1 "overflow NIC table"
@@ -544,7 +547,8 @@ for port in 0 1; do
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
overflow_table0 "destroy NIC"
overflow_table1 "destroy NIC"
@@ -573,7 +577,8 @@ for port in 0 1; do
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
msg="create VxLANs v6"
new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
@@ -633,7 +638,8 @@ for port in 0 1; do
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
@@ -688,7 +694,8 @@ for port in 0 1; do
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
msg="create VxLANs v6"
exp0=( `mke 10000 1` 0 0 0 )
@@ -747,7 +754,8 @@ for port in 0 1; do
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
msg="create VxLANs v6"
exp0=( `mke 10000 1` 0 0 0 )
@@ -760,7 +768,7 @@ for port in 0 1; do
check_tables
msg="NIC device goes down"
- ifconfig $NSIM_NETDEV down
+ ip link set dev $NSIM_NETDEV down
if [ $port -eq 1 ]; then
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
@@ -771,7 +779,7 @@ for port in 0 1; do
check_tables
msg="NIC device goes up again"
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
exp0=( `mke 10000 1` 0 0 0 )
check_tables
@@ -819,12 +827,12 @@ new_vxlan vxlan1 4789 $NSIM_NETDEV2
msg="VxLAN v4 devices go down"
exp0=( 0 0 0 0 )
-ifconfig vxlan1 down
-ifconfig vxlan0 down
+ip link set dev vxlan1 down
+ip link set dev vxlan0 down
check_tables
for ifc in vxlan0 vxlan1; do
- ifconfig $ifc up
+ ip link set dev $ifc up
done
msg="VxLAN v6 device"
@@ -836,11 +844,11 @@ exp1=( `mke 6081 2` 0 0 0 )
new_geneve gnv0 6081
msg="NIC device goes down"
-ifconfig $NSIM_NETDEV down
+ip link set dev $NSIM_NETDEV down
check_tables
msg="NIC device goes up again"
-ifconfig $NSIM_NETDEV up
+ip link set dev $NSIM_NETDEV up
check_tables
for i in `seq 2`; do
@@ -877,6 +885,7 @@ msg="re-add a port"
echo 2 > $NSIM_DEV_SYS/del_port
echo 2 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
check_tables
msg="replace VxLAN in overflow table"
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
index 6a86e61e8bfe..2d5a76d99181 100644
--- a/tools/testing/selftests/drivers/net/team/Makefile
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -3,8 +3,9 @@
TEST_PROGS := dev_addr_lists.sh
-TEST_FILES := \
- lag_lib.sh \
- net_forwarding_lib.sh
+TEST_INCLUDES := \
+ ../bonding/lag_lib.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config
index 265b6882cc21..b5e3a3aad4bf 100644
--- a/tools/testing/selftests/drivers/net/team/config
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -1,3 +1,5 @@
+CONFIG_DUMMY=y
+CONFIG_IPV6=y
+CONFIG_MACVLAN=y
CONFIG_NET_TEAM=y
CONFIG_NET_TEAM_MODE_LOADBALANCE=y
-CONFIG_MACVLAN=y
diff --git a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
index 33913112d5ca..b1ec7755b783 100755
--- a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
+++ b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
@@ -11,9 +11,9 @@ ALL_TESTS="
REQUIRE_MZ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
-source "$lib_dir"/lag_lib.sh
+source "$lib_dir"/../bonding/lag_lib.sh
destroy()
diff --git a/tools/testing/selftests/drivers/net/team/lag_lib.sh b/tools/testing/selftests/drivers/net/team/lag_lib.sh
deleted file mode 120000
index e1347a10afde..000000000000
--- a/tools/testing/selftests/drivers/net/team/lag_lib.sh
+++ /dev/null
@@ -1 +0,0 @@
-../bonding/lag_lib.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh b/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh
deleted file mode 120000
index 39c96828c5ef..000000000000
--- a/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh
+++ /dev/null
@@ -1 +0,0 @@
-../../../net/forwarding/lib.sh \ No newline at end of file
diff --git a/tools/testing/selftests/dt/Makefile b/tools/testing/selftests/dt/Makefile
index 62dc00ee4978..2d33ee9e9b71 100644
--- a/tools/testing/selftests/dt/Makefile
+++ b/tools/testing/selftests/dt/Makefile
@@ -4,7 +4,7 @@ ifneq ($(PY3),)
TEST_PROGS := test_unprobed_devices.sh
TEST_GEN_FILES := compatible_list
-TEST_FILES := compatible_ignore_list ktap_helpers.sh
+TEST_FILES := compatible_ignore_list
include ../lib.mk
diff --git a/tools/testing/selftests/dt/test_unprobed_devices.sh b/tools/testing/selftests/dt/test_unprobed_devices.sh
index b07af2a4c4de..2d7e70c5ad2d 100755
--- a/tools/testing/selftests/dt/test_unprobed_devices.sh
+++ b/tools/testing/selftests/dt/test_unprobed_devices.sh
@@ -15,16 +15,12 @@
DIR="$(dirname $(readlink -f "$0"))"
-source "${DIR}"/ktap_helpers.sh
+source "${DIR}"/../kselftest/ktap_helpers.sh
PDT=/proc/device-tree/
COMPAT_LIST="${DIR}"/compatible_list
IGNORE_LIST="${DIR}"/compatible_ignore_list
-KSFT_PASS=0
-KSFT_FAIL=1
-KSFT_SKIP=4
-
ktap_print_header
if [[ ! -d "${PDT}" ]]; then
@@ -33,8 +29,8 @@ if [[ ! -d "${PDT}" ]]; then
fi
nodes_compatible=$(
- for node_compat in $(find ${PDT} -name compatible); do
- node=$(dirname "${node_compat}")
+ for node in $(find ${PDT} -type d); do
+ [ ! -f "${node}"/compatible ] && continue
# Check if node is available
if [[ -e "${node}"/status ]]; then
status=$(tr -d '\000' < "${node}"/status)
@@ -46,10 +42,11 @@ nodes_compatible=$(
nodes_dev_bound=$(
IFS=$'\n'
- for uevent in $(find /sys/devices -name uevent); do
- if [[ -d "$(dirname "${uevent}")"/driver ]]; then
- grep '^OF_FULLNAME=' "${uevent}" | sed -e 's|OF_FULLNAME=||'
- fi
+ for dev_dir in $(find /sys/devices -type d); do
+ [ ! -f "${dev_dir}"/uevent ] && continue
+ [ ! -d "${dev_dir}"/driver ] && continue
+
+ grep '^OF_FULLNAME=' "${dev_dir}"/uevent | sed -e 's|OF_FULLNAME=||'
done
)
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index bf79d664c8e6..0546ca24f2b2 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -393,7 +393,7 @@ static int run_tests(void)
static void prerequisites(void)
{
int fd;
- const char *script = "#!/bin/sh\nexit $*\n";
+ const char *script = "#!/bin/bash\nexit $*\n";
/* Create ephemeral copies of files */
exe_cp("execveat", "execveat.ephemeral");
diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
index e19ab0e85709..759f86e7d263 100644
--- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -10,7 +10,6 @@
#include <linux/mount.h>
#include <sys/syscall.h>
#include <sys/stat.h>
-#include <sys/mount.h>
#include <sys/mman.h>
#include <sched.h>
#include <fcntl.h>
@@ -32,7 +31,11 @@ static int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags)
{
return syscall(__NR_fsmount, fd, flags, attr_flags);
}
-
+static int sys_mount(const char *src, const char *tgt, const char *fst,
+ unsigned long flags, const void *data)
+{
+ return syscall(__NR_mount, src, tgt, fst, flags, data);
+}
static int sys_move_mount(int from_dfd, const char *from_pathname,
int to_dfd, const char *to_pathname,
unsigned int flags)
@@ -166,8 +169,7 @@ int main(int argc, char **argv)
ksft_test_result_skip("unable to create a new mount namespace\n");
return 1;
}
-
- if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) {
+ if (sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) {
pr_perror("mount");
return 1;
}
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index c778d4dcc17e..25d4e0fca385 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -504,7 +504,7 @@ prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
if [ "$KTAP" = "1" ]; then
echo -n "# Totals:"
echo -n " pass:"`echo $PASSED_CASES | wc -w`
- echo -n " faii:"`echo $FAILED_CASES | wc -w`
+ echo -n " fail:"`echo $FAILED_CASES | wc -w`
echo -n " xfail:"`echo $XFAILED_CASES | wc -w`
echo -n " xpass:0"
echo -n " skip:"`echo $UNTESTED_CASES $UNSUPPORTED_CASES | wc -w`
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
index add7d5bf585d..c45094d1e1d2 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
@@ -1,6 +1,6 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-# description: Test file and directory owership changes for eventfs
+# description: Test file and directory ownership changes for eventfs
original_group=`stat -c "%g" .`
original_owner=`stat -c "%u" .`
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc
new file mode 100644
index 000000000000..ccfbfde3d942
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: ftrace - function trace across cpu hotplug
+# requires: function:tracer
+
+if ! which nproc ; then
+ nproc() {
+ ls -d /sys/devices/system/cpu/cpu[0-9]* | wc -l
+ }
+fi
+
+NP=`nproc`
+
+if [ $NP -eq 1 ] ;then
+ echo "We cannot test cpu hotplug in UP environment"
+ exit_unresolved
+fi
+
+# Find online cpu
+for i in /sys/devices/system/cpu/cpu[1-9]*; do
+ if [ -f $i/online ] && [ "$(cat $i/online)" = "1" ]; then
+ cpu=$i
+ break
+ fi
+done
+
+if [ -z "$cpu" ]; then
+ echo "We cannot test cpu hotplug with a single cpu online"
+ exit_unresolved
+fi
+
+echo 0 > tracing_on
+echo > trace
+
+: "Set $(basename $cpu) offline/online with function tracer enabled"
+echo function > current_tracer
+echo 1 > tracing_on
+(echo 0 > $cpu/online)
+(echo "forked"; sleep 1)
+(echo 1 > $cpu/online)
+echo 0 > tracing_on
+echo nop > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
index 4562e13cb26b..717898894ef7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -40,7 +40,7 @@ grep "id: \(unknown_\|sys_\)" events/raw_syscalls/sys_exit/hist > /dev/null || \
reset_trigger
-echo "Test histgram with log2 modifier"
+echo "Test histogram with log2 modifier"
echo 'hist:keys=bytes_req.log2' > events/kmem/kmalloc/trigger
for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index 1ee5518ee6b7..7f3ca5c78df1 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -17,6 +17,8 @@
*
*****************************************************************************/
+#define _GNU_SOURCE
+
#include <errno.h>
#include <limits.h>
#include <pthread.h>
@@ -358,6 +360,7 @@ out:
int main(int argc, char *argv[])
{
+ const char *test_name;
int c, ret;
while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
@@ -397,6 +400,14 @@ int main(int argc, char *argv[])
"\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
broadcast, locked, owner, timeout_ns);
+ ret = asprintf(&test_name,
+ "%s broadcast=%d locked=%d owner=%d timeout=%ldns",
+ TEST_NAME, broadcast, locked, owner, timeout_ns);
+ if (ret < 0) {
+ ksft_print_msg("Failed to generate test name\n");
+ test_name = TEST_NAME;
+ }
+
/*
* FIXME: unit_test is obsolete now that we parse options and the
* various style of runs are done by run.sh - simplify the code and move
@@ -404,6 +415,6 @@ int main(int argc, char *argv[])
*/
ret = unit_test(broadcast, locked, owner, timeout_ns);
- print_result(TEST_NAME, ret);
+ print_result(test_name, ret);
return ret;
}
diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py
index 352fc39f3c6c..b62c7dba6777 100644
--- a/tools/testing/selftests/hid/tests/test_wacom_generic.py
+++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py
@@ -880,8 +880,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
does not overlap with other contacts. The value of `t` may be
incremented over time to move the point along a linear path.
"""
- x = 50 + 10 * contact_id + t
- y = 100 + 100 * contact_id + t
+ x = 50 + 10 * contact_id + t * 11
+ y = 100 + 100 * contact_id + t * 11
return test_multitouch.Touch(contact_id, x, y)
def make_contacts(self, n, t=0):
@@ -902,8 +902,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
tracking_id = contact_ids.tracking_id
slot_num = contact_ids.slot_num
- x = 50 + 10 * contact_id + t
- y = 100 + 100 * contact_id + t
+ x = 50 + 10 * contact_id + t * 11
+ y = 100 + 100 * contact_id + t * 11
# If the data isn't supposed to be stored in any slots, there is
# nothing we can check for in the evdev stream.
diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config
index 6c4f901d6fed..110d73917615 100644
--- a/tools/testing/selftests/iommu/config
+++ b/tools/testing/selftests/iommu/config
@@ -1,2 +1,3 @@
-CONFIG_IOMMUFD
-CONFIG_IOMMUFD_TEST
+CONFIG_IOMMUFD=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_IOMMUFD_TEST=y
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 1a881e7a21d1..edf1c99c9936 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -12,6 +12,7 @@
static unsigned long HUGEPAGE_SIZE;
#define MOCK_PAGE_SIZE (PAGE_SIZE / 2)
+#define MOCK_HUGE_PAGE_SIZE (512 * MOCK_PAGE_SIZE)
static unsigned long get_huge_page_size(void)
{
@@ -1716,10 +1717,12 @@ FIXTURE(iommufd_dirty_tracking)
FIXTURE_VARIANT(iommufd_dirty_tracking)
{
unsigned long buffer_size;
+ bool hugepages;
};
FIXTURE_SETUP(iommufd_dirty_tracking)
{
+ int mmap_flags;
void *vrc;
int rc;
@@ -1732,25 +1735,41 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
variant->buffer_size, rc);
}
+ mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED;
+ if (variant->hugepages) {
+ /*
+ * MAP_POPULATE will cause the kernel to fail mmap if THPs are
+ * not available.
+ */
+ mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+ }
assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0);
vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ mmap_flags, -1, 0);
assert(vrc == self->buffer);
self->page_size = MOCK_PAGE_SIZE;
self->bitmap_size =
variant->buffer_size / self->page_size / BITS_PER_BYTE;
- /* Provision with an extra (MOCK_PAGE_SIZE) for the unaligned case */
+ /* Provision with an extra (PAGE_SIZE) for the unaligned case */
rc = posix_memalign(&self->bitmap, PAGE_SIZE,
- self->bitmap_size + MOCK_PAGE_SIZE);
+ self->bitmap_size + PAGE_SIZE);
assert(!rc);
assert(self->bitmap);
assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
test_ioctl_ioas_alloc(&self->ioas_id);
- test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id,
- &self->idev_id);
+ /* Enable 1M mock IOMMU hugepages */
+ if (variant->hugepages) {
+ test_cmd_mock_domain_flags(self->ioas_id,
+ MOCK_FLAGS_DEVICE_HUGE_IOVA,
+ &self->stdev_id, &self->hwpt_id,
+ &self->idev_id);
+ } else {
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_id,
+ &self->hwpt_id, &self->idev_id);
+ }
}
FIXTURE_TEARDOWN(iommufd_dirty_tracking)
@@ -1784,12 +1803,26 @@ FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
.buffer_size = 128UL * 1024UL * 1024UL,
};
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
+{
+ /* 4K bitmap (128M IOVA range) */
+ .buffer_size = 128UL * 1024UL * 1024UL,
+ .hugepages = true,
+};
+
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M)
{
/* 8K bitmap (256M IOVA range) */
.buffer_size = 256UL * 1024UL * 1024UL,
};
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge)
+{
+ /* 8K bitmap (256M IOVA range) */
+ .buffer_size = 256UL * 1024UL * 1024UL,
+ .hugepages = true,
+};
+
TEST_F(iommufd_dirty_tracking, enforce_dirty)
{
uint32_t ioas_id, stddev_id, idev_id;
@@ -1849,65 +1882,80 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability)
TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
{
- uint32_t stddev_id;
+ uint32_t page_size = MOCK_PAGE_SIZE;
uint32_t hwpt_id;
uint32_t ioas_id;
+ if (variant->hugepages)
+ page_size = MOCK_HUGE_PAGE_SIZE;
+
test_ioctl_ioas_alloc(&ioas_id);
test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
variant->buffer_size, MOCK_APERTURE_START);
test_cmd_hwpt_alloc(self->idev_id, ioas_id,
IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
- test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
test_cmd_set_dirty_tracking(hwpt_id, true);
test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
- MOCK_APERTURE_START, self->page_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
self->bitmap, self->bitmap_size, 0, _metadata);
/* PAGE_SIZE unaligned bitmap */
test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
- MOCK_APERTURE_START, self->page_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
self->bitmap + MOCK_PAGE_SIZE,
self->bitmap_size, 0, _metadata);
- test_ioctl_destroy(stddev_id);
+ /* u64 unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap + 0xff1, self->bitmap_size, 0,
+ _metadata);
+
test_ioctl_destroy(hwpt_id);
}
TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear)
{
- uint32_t stddev_id;
+ uint32_t page_size = MOCK_PAGE_SIZE;
uint32_t hwpt_id;
uint32_t ioas_id;
+ if (variant->hugepages)
+ page_size = MOCK_HUGE_PAGE_SIZE;
+
test_ioctl_ioas_alloc(&ioas_id);
test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
variant->buffer_size, MOCK_APERTURE_START);
test_cmd_hwpt_alloc(self->idev_id, ioas_id,
IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
- test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
test_cmd_set_dirty_tracking(hwpt_id, true);
test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
- MOCK_APERTURE_START, self->page_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
self->bitmap, self->bitmap_size,
IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
_metadata);
/* Unaligned bitmap */
test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
- MOCK_APERTURE_START, self->page_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
self->bitmap + MOCK_PAGE_SIZE,
self->bitmap_size,
IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
_metadata);
- test_ioctl_destroy(stddev_id);
+ /* u64 unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap + 0xff1, self->bitmap_size,
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+ _metadata);
+
test_ioctl_destroy(hwpt_id);
}
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index c646264aa41f..8d2b46b2114d 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -344,16 +344,19 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length,
page_size, bitmap, nr))
static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
- __u64 iova, size_t page_size, __u64 *bitmap,
+ __u64 iova, size_t page_size,
+ size_t pte_page_size, __u64 *bitmap,
__u64 bitmap_size, __u32 flags,
struct __test_metadata *_metadata)
{
- unsigned long i, nbits = bitmap_size * BITS_PER_BYTE;
- unsigned long nr = nbits / 2;
+ unsigned long npte = pte_page_size / page_size, pteset = 2 * npte;
+ unsigned long nbits = bitmap_size * BITS_PER_BYTE;
+ unsigned long j, i, nr = nbits / pteset ?: 1;
__u64 out_dirty = 0;
/* Mark all even bits as dirty in the mock domain */
- for (i = 0; i < nbits; i += 2)
+ memset(bitmap, 0, bitmap_size);
+ for (i = 0; i < nbits; i += pteset)
set_bit(i, (unsigned long *)bitmap);
test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size,
@@ -365,8 +368,12 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
flags);
/* Beware ASSERT_EQ() is two statements -- braces are not redundant! */
- for (i = 0; i < nbits; i++) {
- ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap));
+ for (i = 0; i < nbits; i += pteset) {
+ for (j = 0; j < pteset; j++) {
+ ASSERT_EQ(j < npte,
+ test_bit(i + j, (unsigned long *)bitmap));
+ }
+ ASSERT_EQ(!(i % pteset), test_bit(i, (unsigned long *)bitmap));
}
memset(bitmap, 0, bitmap_size);
@@ -374,19 +381,23 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
flags);
/* It as read already -- expect all zeroes */
- for (i = 0; i < nbits; i++) {
- ASSERT_EQ(!(i % 2) && (flags &
- IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR),
- test_bit(i, (unsigned long *)bitmap));
+ for (i = 0; i < nbits; i += pteset) {
+ for (j = 0; j < pteset; j++) {
+ ASSERT_EQ(
+ (j < npte) &&
+ (flags &
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR),
+ test_bit(i + j, (unsigned long *)bitmap));
+ }
}
return 0;
}
-#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, bitmap, \
- bitmap_size, flags, _metadata) \
+#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, pte_size,\
+ bitmap, bitmap_size, flags, _metadata) \
ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \
- page_size, bitmap, bitmap_size, \
- flags, _metadata))
+ page_size, pte_size, bitmap, \
+ bitmap_size, flags, _metadata))
static int _test_cmd_create_access(int fd, unsigned int ioas_id,
__u32 *access_id, unsigned int flags)
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index a781e6311810..541bf192e30e 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -25,6 +25,7 @@
* ksft_test_result_skip(fmt, ...);
* ksft_test_result_xfail(fmt, ...);
* ksft_test_result_error(fmt, ...);
+ * ksft_test_result_code(exit_code, test_name, fmt, ...);
*
* When all tests are finished, clean up and exit the program with one of:
*
@@ -254,6 +255,50 @@ static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...)
va_end(args);
}
+static inline __printf(3, 4)
+void ksft_test_result_code(int exit_code, const char *test_name,
+ const char *msg, ...)
+{
+ const char *tap_code = "ok";
+ const char *directive = "";
+ int saved_errno = errno;
+ va_list args;
+
+ switch (exit_code) {
+ case KSFT_PASS:
+ ksft_cnt.ksft_pass++;
+ break;
+ case KSFT_XFAIL:
+ directive = " # XFAIL ";
+ ksft_cnt.ksft_xfail++;
+ break;
+ case KSFT_XPASS:
+ directive = " # XPASS ";
+ ksft_cnt.ksft_xpass++;
+ break;
+ case KSFT_SKIP:
+ directive = " # SKIP ";
+ ksft_cnt.ksft_xskip++;
+ break;
+ case KSFT_FAIL:
+ default:
+ tap_code = "not ok";
+ ksft_cnt.ksft_fail++;
+ break;
+ }
+
+ /* Docs seem to call for double space if directive is absent */
+ if (!directive[0] && msg[0])
+ directive = " # ";
+
+ va_start(args, msg);
+ printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive);
+ errno = saved_errno;
+ vprintf(msg, args);
+ printf("\n");
+ va_end(args);
+}
+
static inline int ksft_exit_pass(void)
{
ksft_print_cnts();
diff --git a/tools/testing/selftests/dt/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh
index 8dfae51bb4e2..f2fbb914e058 100644
--- a/tools/testing/selftests/dt/ktap_helpers.sh
+++ b/tools/testing/selftests/kselftest/ktap_helpers.sh
@@ -9,14 +9,27 @@ KTAP_CNT_PASS=0
KTAP_CNT_FAIL=0
KTAP_CNT_SKIP=0
+KSFT_PASS=0
+KSFT_FAIL=1
+KSFT_XFAIL=2
+KSFT_XPASS=3
+KSFT_SKIP=4
+
+KSFT_NUM_TESTS=0
+
ktap_print_header() {
echo "TAP version 13"
}
+ktap_print_msg()
+{
+ echo "#" $@
+}
+
ktap_set_plan() {
- num_tests="$1"
+ KSFT_NUM_TESTS="$1"
- echo "1..$num_tests"
+ echo "1..$KSFT_NUM_TESTS"
}
ktap_skip_all() {
@@ -65,6 +78,34 @@ ktap_test_fail() {
KTAP_CNT_FAIL=$((KTAP_CNT_FAIL+1))
}
+ktap_test_result() {
+ description="$1"
+ shift
+
+ if $@; then
+ ktap_test_pass "$description"
+ else
+ ktap_test_fail "$description"
+ fi
+}
+
+ktap_exit_fail_msg() {
+ echo "Bail out! " $@
+ ktap_print_totals
+
+ exit "$KSFT_FAIL"
+}
+
+ktap_finished() {
+ ktap_print_totals
+
+ if [ $(("$KTAP_CNT_PASS" + "$KTAP_CNT_SKIP")) -eq "$KSFT_NUM_TESTS" ]; then
+ exit "$KSFT_PASS"
+ else
+ exit "$KSFT_FAIL"
+ fi
+}
+
ktap_print_totals() {
echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0"
}
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index e05ac8261046..4fd735e48ee7 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -56,6 +56,7 @@
#include <asm/types.h>
#include <ctype.h>
#include <errno.h>
+#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -95,14 +96,6 @@
* E.g., #define TH_LOG_ENABLED 1
*
* If no definition is provided, logging is enabled by default.
- *
- * If there is no way to print an error message for the process running the
- * test (e.g. not allowed to write to stderr), it is still possible to get the
- * ASSERT_* number for which the test failed. This behavior can be enabled by
- * writing `_metadata->no_print = true;` before the check sequence that is
- * unable to print. When an error occur, instead of printing an error message
- * and calling `abort(3)`, the test process call `_exit(2)` with the assert
- * number as argument, which is then printed by the parent process.
*/
#define TH_LOG(fmt, ...) do { \
if (TH_LOG_ENABLED) \
@@ -135,8 +128,7 @@
fprintf(TH_LOG_STREAM, "# SKIP %s\n", \
_metadata->results->reason); \
} \
- _metadata->passed = 1; \
- _metadata->skip = 1; \
+ _metadata->exit_code = KSFT_SKIP; \
_metadata->trigger = 0; \
statement; \
} while (0)
@@ -363,6 +355,11 @@
* Defines a test that depends on a fixture (e.g., is part of a test case).
* Very similar to TEST() except that *self* is the setup instance of fixture's
* datatype exposed for use by the implementation.
+ *
+ * The @test_name code is run in a separate process sharing the same memory
+ * (i.e. vfork), which means that the test process can update its privileges
+ * without impacting the related FIXTURE_TEARDOWN() (e.g. to remove files from
+ * a directory where write access was dropped).
*/
#define TEST_F(fixture_name, test_name) \
__TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT)
@@ -384,17 +381,34 @@
{ \
/* fixture data is alloced, setup, and torn down per call. */ \
FIXTURE_DATA(fixture_name) self; \
+ pid_t child = 1; \
+ int status = 0; \
memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
if (setjmp(_metadata->env) == 0) { \
- fixture_name##_setup(_metadata, &self, variant->data); \
- /* Let setup failure terminate early. */ \
- if (!_metadata->passed || _metadata->skip) \
- return; \
- _metadata->setup_completed = true; \
- fixture_name##_##test_name(_metadata, &self, variant->data); \
+ /* Use the same _metadata. */ \
+ child = vfork(); \
+ if (child == 0) { \
+ fixture_name##_setup(_metadata, &self, variant->data); \
+ /* Let setup failure terminate early. */ \
+ if (_metadata->exit_code) \
+ _exit(0); \
+ _metadata->setup_completed = true; \
+ fixture_name##_##test_name(_metadata, &self, variant->data); \
+ } else if (child < 0 || child != waitpid(child, &status, 0)) { \
+ ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
+ _metadata->exit_code = KSFT_FAIL; \
+ } \
+ } \
+ if (child == 0) { \
+ if (_metadata->setup_completed && !_metadata->teardown_parent) \
+ fixture_name##_teardown(_metadata, &self, variant->data); \
+ _exit(0); \
} \
- if (_metadata->setup_completed) \
+ if (_metadata->setup_completed && _metadata->teardown_parent) \
fixture_name##_teardown(_metadata, &self, variant->data); \
+ if (!WIFEXITED(status) && WIFSIGNALED(status)) \
+ /* Forward signal to __wait_for_test(). */ \
+ kill(getpid(), WTERMSIG(status)); \
__test_check_assert(_metadata); \
} \
static struct __test_metadata \
@@ -404,6 +418,7 @@
.fixture = &_##fixture_name##_fixture_object, \
.termsig = signal, \
.timeout = tmout, \
+ .teardown_parent = false, \
}; \
static void __attribute__((constructor)) \
_register_##fixture_name##_##test_name(void) \
@@ -694,18 +709,12 @@
for (; _metadata->trigger; _metadata->trigger = \
__bail(_assert, _metadata))
-#define __INC_STEP(_metadata) \
- /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \
- if (_metadata->passed && _metadata->step < 253) \
- _metadata->step++;
-
#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
/* Avoid multiple evaluation of the cases */ \
__typeof__(_expected) __exp = (_expected); \
__typeof__(_seen) __seen = (_seen); \
- if (_assert) __INC_STEP(_metadata); \
if (!(__exp _t __seen)) { \
/* Report with actual signedness to avoid weird output. */ \
switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \
@@ -742,7 +751,7 @@
break; \
} \
} \
- _metadata->passed = 0; \
+ _metadata->exit_code = KSFT_FAIL; \
/* Ensure the optional handler is triggered */ \
_metadata->trigger = 1; \
} \
@@ -751,10 +760,9 @@
#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
const char *__exp = (_expected); \
const char *__seen = (_seen); \
- if (_assert) __INC_STEP(_metadata); \
if (!(strcmp(__exp, __seen) _t 0)) { \
__TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
- _metadata->passed = 0; \
+ _metadata->exit_code = KSFT_FAIL; \
_metadata->trigger = 1; \
} \
} while (0); OPTIONAL_HANDLER(_assert)
@@ -800,6 +808,37 @@ struct __fixture_metadata {
.prev = &_fixture_global,
};
+struct __test_xfail {
+ struct __fixture_metadata *fixture;
+ struct __fixture_variant_metadata *variant;
+ struct __test_metadata *test;
+ struct __test_xfail *prev, *next;
+};
+
+/**
+ * XFAIL_ADD() - mark variant + test case combination as expected to fail
+ * @fixture_name: name of the fixture
+ * @variant_name: name of the variant
+ * @test_name: name of the test case
+ *
+ * Mark a combination of variant + test case for a given fixture as expected
+ * to fail. Tests marked this way will report XPASS / XFAIL return codes,
+ * instead of PASS / FAIL,and use respective counters.
+ */
+#define XFAIL_ADD(fixture_name, variant_name, test_name) \
+ static struct __test_xfail \
+ _##fixture_name##_##variant_name##_##test_name##_xfail = \
+ { \
+ .fixture = &_##fixture_name##_fixture_object, \
+ .variant = &_##fixture_name##_##variant_name##_object, \
+ .test = &_##fixture_name##_##test_name##_object, \
+ }; \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \
+ { \
+ __register_xfail(&_##fixture_name##_##variant_name##_##test_name##_xfail); \
+ }
+
static struct __fixture_metadata *__fixture_list = &_fixture_global;
static int __constructor_order;
@@ -814,6 +853,7 @@ static inline void __register_fixture(struct __fixture_metadata *f)
struct __fixture_variant_metadata {
const char *name;
const void *data;
+ struct __test_xfail *xfails;
struct __fixture_variant_metadata *prev, *next;
};
@@ -832,20 +872,24 @@ struct __test_metadata {
pid_t pid; /* pid of test when being run */
struct __fixture_metadata *fixture;
int termsig;
- int passed;
- int skip; /* did SKIP get used? */
+ int exit_code;
int trigger; /* extra handler after the evaluation */
int timeout; /* seconds to wait for test timeout */
bool timed_out; /* did this test timeout instead of exiting? */
- __u8 step;
- bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
bool aborted; /* stopped test due to failed ASSERT */
bool setup_completed; /* did setup finish? */
+ bool teardown_parent; /* run teardown in a parent process */
jmp_buf env; /* for exiting out of test early */
struct __test_results *results;
struct __test_metadata *prev, *next;
};
+static inline bool __test_passed(struct __test_metadata *metadata)
+{
+ return metadata->exit_code != KSFT_FAIL &&
+ metadata->exit_code <= KSFT_SKIP;
+}
+
/*
* Since constructors are called in reverse order, reverse the test
* list so tests are run in source declaration order.
@@ -860,6 +904,11 @@ static inline void __register_test(struct __test_metadata *t)
__LIST_APPEND(t->fixture->tests, t);
}
+static inline void __register_xfail(struct __test_xfail *xf)
+{
+ __LIST_APPEND(xf->variant->xfails, xf);
+}
+
static inline int __bail(int for_realz, struct __test_metadata *t)
{
/* if this is ASSERT, return immediately. */
@@ -873,11 +922,8 @@ static inline int __bail(int for_realz, struct __test_metadata *t)
static inline void __test_check_assert(struct __test_metadata *t)
{
- if (t->aborted) {
- if (t->no_print)
- _exit(t->step);
+ if (t->aborted)
abort();
- }
}
struct __test_metadata *__active_test;
@@ -913,7 +959,7 @@ void __wait_for_test(struct __test_metadata *t)
int status;
if (sigaction(SIGALRM, &action, &saved_action)) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: unable to install SIGALRM handler\n",
t->name);
@@ -925,7 +971,7 @@ void __wait_for_test(struct __test_metadata *t)
waitpid(t->pid, &status, 0);
alarm(0);
if (sigaction(SIGALRM, &saved_action, NULL)) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: unable to uninstall SIGALRM handler\n",
t->name);
@@ -934,16 +980,16 @@ void __wait_for_test(struct __test_metadata *t)
__active_test = NULL;
if (t->timed_out) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test terminated by timeout\n", t->name);
} else if (WIFEXITED(status)) {
- if (WEXITSTATUS(status) == 255) {
- /* SKIP */
- t->passed = 1;
- t->skip = 1;
+ if (WEXITSTATUS(status) == KSFT_SKIP ||
+ WEXITSTATUS(status) == KSFT_XPASS ||
+ WEXITSTATUS(status) == KSFT_XFAIL) {
+ t->exit_code = WEXITSTATUS(status);
} else if (t->termsig != -1) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test exited normally instead of by signal (code: %d)\n",
t->name,
@@ -951,26 +997,25 @@ void __wait_for_test(struct __test_metadata *t)
} else {
switch (WEXITSTATUS(status)) {
/* Success */
- case 0:
- t->passed = 1;
+ case KSFT_PASS:
+ t->exit_code = KSFT_PASS;
break;
- /* Other failure, assume step report. */
+ /* Failure */
default:
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: Test failed at step #%d\n",
- t->name,
- WEXITSTATUS(status));
+ "# %s: Test failed\n",
+ t->name);
}
}
} else if (WIFSIGNALED(status)) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
if (WTERMSIG(status) == SIGABRT) {
fprintf(TH_LOG_STREAM,
"# %s: Test terminated by assertion\n",
t->name);
} else if (WTERMSIG(status) == t->termsig) {
- t->passed = 1;
+ t->exit_code = KSFT_PASS;
} else {
fprintf(TH_LOG_STREAM,
"# %s: Test terminated unexpectedly by signal %d\n",
@@ -1110,16 +1155,19 @@ void __run_test(struct __fixture_metadata *f,
struct __fixture_variant_metadata *variant,
struct __test_metadata *t)
{
+ struct __test_xfail *xfail;
+ char test_name[LINE_MAX];
+ const char *diagnostic;
+
/* reset test struct */
- t->passed = 1;
- t->skip = 0;
+ t->exit_code = KSFT_PASS;
t->trigger = 0;
- t->step = 1;
- t->no_print = 0;
memset(t->results->reason, 0, sizeof(t->results->reason));
- ksft_print_msg(" RUN %s%s%s.%s ...\n",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+ ksft_print_msg(" RUN %s ...\n", test_name);
/* Make sure output buffers are flushed before fork */
fflush(stdout);
@@ -1128,29 +1176,33 @@ void __run_test(struct __fixture_metadata *f,
t->pid = fork();
if (t->pid < 0) {
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
} else if (t->pid == 0) {
setpgrp();
t->fn(t, variant);
- if (t->skip)
- _exit(255);
- /* Pass is exit 0 */
- if (t->passed)
- _exit(0);
- /* Something else happened, report the step. */
- _exit(t->step);
+ _exit(t->exit_code);
} else {
__wait_for_test(t);
}
- ksft_print_msg(" %4s %s%s%s.%s\n", t->passed ? "OK" : "FAIL",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ ksft_print_msg(" %4s %s\n",
+ __test_passed(t) ? "OK" : "FAIL", test_name);
- if (t->skip)
- ksft_test_result_skip("%s\n", t->results->reason[0] ?
- t->results->reason : "unknown");
+ /* Check if we're expecting this test to fail */
+ for (xfail = variant->xfails; xfail; xfail = xfail->next)
+ if (xfail->test == t)
+ break;
+ if (xfail)
+ t->exit_code = __test_passed(t) ? KSFT_XPASS : KSFT_XFAIL;
+
+ if (t->results->reason[0])
+ diagnostic = t->results->reason;
+ else if (t->exit_code == KSFT_PASS || t->exit_code == KSFT_FAIL)
+ diagnostic = NULL;
else
- ksft_test_result(t->passed, "%s%s%s.%s\n",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ diagnostic = "unknown";
+
+ ksft_test_result_code(t->exit_code, test_name,
+ diagnostic ? "%s" : "", diagnostic);
}
static int test_harness_run(int argc, char **argv)
@@ -1198,7 +1250,7 @@ static int test_harness_run(int argc, char **argv)
t->results = results;
__run_test(f, v, t);
t->results = NULL;
- if (t->passed)
+ if (__test_passed(t))
pass_count++;
else
ret = 1;
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 274b8465b42a..2cb8dd1f8275 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -248,7 +248,7 @@ static void *test_vcpu_run(void *arg)
REPORT_GUEST_ASSERT(uc);
break;
default:
- TEST_FAIL("Unexpected guest exit\n");
+ TEST_FAIL("Unexpected guest exit");
}
return NULL;
@@ -287,7 +287,7 @@ static int test_migrate_vcpu(unsigned int vcpu_idx)
/* Allow the error where the vCPU thread is already finished */
TEST_ASSERT(ret == 0 || ret == ESRCH,
- "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n",
+ "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d",
vcpu_idx, new_pcpu, ret);
return ret;
@@ -326,12 +326,12 @@ static void test_run(struct kvm_vm *vm)
pthread_mutex_init(&vcpu_done_map_lock, NULL);
vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus);
- TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap\n");
+ TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap");
for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) {
ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run,
(void *)(unsigned long)i);
- TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread\n", i);
+ TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread", i);
}
/* Spawn a thread to control the vCPU migrations */
@@ -340,7 +340,7 @@ static void test_run(struct kvm_vm *vm)
ret = pthread_create(&pt_vcpu_migration, NULL,
test_vcpu_migration, NULL);
- TEST_ASSERT(!ret, "Failed to create the migration pthread\n");
+ TEST_ASSERT(!ret, "Failed to create the migration pthread");
}
@@ -384,7 +384,7 @@ static struct kvm_vm *test_vm_create(void)
if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET))
vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset);
else
- TEST_FAIL("no support for global offset\n");
+ TEST_FAIL("no support for global offset");
}
for (i = 0; i < nr_vcpus; i++)
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c
index 31f66ba97228..27c10e7a7e01 100644
--- a/tools/testing/selftests/kvm/aarch64/hypercalls.c
+++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c
@@ -175,18 +175,18 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
/* First 'read' should be an upper limit of the features supported */
vcpu_get_reg(vcpu, reg_info->reg, &val);
TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
- "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx\n",
+ "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
/* Test a 'write' by disabling all the features of the register map */
ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
TEST_ASSERT(ret == 0,
- "Failed to clear all the features of reg: 0x%lx; ret: %d\n",
+ "Failed to clear all the features of reg: 0x%lx; ret: %d",
reg_info->reg, errno);
vcpu_get_reg(vcpu, reg_info->reg, &val);
TEST_ASSERT(val == 0,
- "Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg);
+ "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
/*
* Test enabling a feature that's not supported.
@@ -195,7 +195,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
if (reg_info->max_feat_bit < 63) {
ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
TEST_ASSERT(ret != 0 && errno == EINVAL,
- "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx\n",
+ "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx",
errno, reg_info->reg);
}
}
@@ -216,7 +216,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
*/
vcpu_get_reg(vcpu, reg_info->reg, &val);
TEST_ASSERT(val == 0,
- "Expected all the features to be cleared for reg: 0x%lx\n",
+ "Expected all the features to be cleared for reg: 0x%lx",
reg_info->reg);
/*
@@ -226,7 +226,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
*/
ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
TEST_ASSERT(ret != 0 && errno == EBUSY,
- "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx\n",
+ "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx",
errno, reg_info->reg);
}
}
@@ -265,7 +265,7 @@ static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
case TEST_STAGE_HVC_IFACE_FALSE_INFO:
break;
default:
- TEST_FAIL("Unknown test stage: %d\n", prev_stage);
+ TEST_FAIL("Unknown test stage: %d", prev_stage);
}
}
@@ -294,7 +294,7 @@ static void test_run(void)
REPORT_GUEST_ASSERT(uc);
break;
default:
- TEST_FAIL("Unexpected guest exit\n");
+ TEST_FAIL("Unexpected guest exit");
}
}
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 08a5ca5bed56..53fddad57cbb 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -414,10 +414,10 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm,
if (fd != -1) {
ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
0, paging_size);
- TEST_ASSERT(ret == 0, "fallocate failed\n");
+ TEST_ASSERT(ret == 0, "fallocate failed");
} else {
ret = madvise(hva, paging_size, MADV_DONTNEED);
- TEST_ASSERT(ret == 0, "madvise failed\n");
+ TEST_ASSERT(ret == 0, "madvise failed");
}
return true;
@@ -501,7 +501,7 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd)
void fail_vcpu_run_no_handler(int ret)
{
- TEST_FAIL("Unexpected vcpu run failure\n");
+ TEST_FAIL("Unexpected vcpu run failure");
}
void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c
index f4ceae9c8925..2d189f3da228 100644
--- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c
+++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c
@@ -178,7 +178,7 @@ static void expect_call_denied(struct kvm_vcpu *vcpu)
struct ucall uc;
if (get_ucall(vcpu, &uc) != UCALL_SYNC)
- TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
"Unexpected SMCCC return code: %lu", uc.args[1]);
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
index 9d51b5691349..5f9713364693 100644
--- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
@@ -517,11 +517,11 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
if (expect_fail)
TEST_ASSERT(pmcr_orig == pmcr,
- "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx\n",
+ "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
pmcr, pmcr_n);
else
TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
- "Failed to update PMCR.N to %lu (received: %lu)\n",
+ "Failed to update PMCR.N to %lu (received: %lu)",
pmcr_n, get_pmcr_n(pmcr));
}
@@ -594,12 +594,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
*/
vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
- "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+ "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
KVM_ARM64_SYS_REG(set_reg_id), reg_val);
vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
- "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+ "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
/*
@@ -611,12 +611,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
- "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+ "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
KVM_ARM64_SYS_REG(set_reg_id), reg_val);
vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
- "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+ "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
}
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 09c116a82a84..bf3609f71854 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -45,10 +45,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
/* Let the guest access its memory */
ret = _vcpu_run(vcpu);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
if (get_ucall(vcpu, NULL) != UCALL_SYNC) {
TEST_ASSERT(false,
- "Invalid guest sync status: exit_reason=%s\n",
+ "Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
}
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index d374dbcf9a53..504f6fe980e8 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -88,9 +88,9 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
ret = _vcpu_run(vcpu);
ts_diff = timespec_elapsed(start);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
- "Invalid guest sync status: exit_reason=%s\n",
+ "Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
pr_debug("Got sync event from vCPU %d\n", vcpu_idx);
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 6cbecf499767..eaad5b20854c 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -262,7 +262,7 @@ static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
"vcpu run failed: errno=%d", err);
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
- "Invalid guest sync status: exit_reason=%s\n",
+ "Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
vcpu_handle_sync_stop();
@@ -376,7 +376,10 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
cleared = kvm_vm_reset_dirty_ring(vcpu->vm);
- /* Cleared pages should be the same as collected */
+ /*
+ * Cleared pages should be the same as collected, as KVM is supposed to
+ * clear only the entries that have been harvested.
+ */
TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
"with collected (%u)", cleared, count);
@@ -410,17 +413,11 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
pr_info("vcpu continues now.\n");
} else {
TEST_ASSERT(false, "Invalid guest sync status: "
- "exit_reason=%s\n",
+ "exit_reason=%s",
exit_reason_str(run->exit_reason));
}
}
-static void dirty_ring_before_vcpu_join(void)
-{
- /* Kick another round of vcpu just to make sure it will quit */
- sem_post(&sem_vcpu_cont);
-}
-
struct log_mode {
const char *name;
/* Return true if this mode is supported, otherwise false */
@@ -433,7 +430,6 @@ struct log_mode {
uint32_t *ring_buf_idx);
/* Hook to call when after each vcpu run */
void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
- void (*before_vcpu_join) (void);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
@@ -452,7 +448,6 @@ struct log_mode {
.supported = dirty_ring_supported,
.create_vm_done = dirty_ring_create_vm_done,
.collect_dirty_pages = dirty_ring_collect_dirty_pages,
- .before_vcpu_join = dirty_ring_before_vcpu_join,
.after_vcpu_run = dirty_ring_after_vcpu_run,
},
};
@@ -513,14 +508,6 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
mode->after_vcpu_run(vcpu, ret, err);
}
-static void log_mode_before_vcpu_join(void)
-{
- struct log_mode *mode = &log_modes[host_log_mode];
-
- if (mode->before_vcpu_join)
- mode->before_vcpu_join();
-}
-
static void generate_random_array(uint64_t *guest_array, uint64_t size)
{
uint64_t i;
@@ -719,6 +706,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct kvm_vm *vm;
unsigned long *bmap;
uint32_t ring_buf_idx = 0;
+ int sem_val;
if (!log_mode_supported()) {
print_skip("Log mode '%s' not supported",
@@ -788,12 +776,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
/* Start the iterations */
iteration = 1;
sync_global_to_guest(vm, iteration);
- host_quit = false;
+ WRITE_ONCE(host_quit, false);
host_dirty_count = 0;
host_clear_count = 0;
host_track_next_count = 0;
WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+ /*
+ * Ensure the previous iteration didn't leave a dangling semaphore, i.e.
+ * that the main task and vCPU worker were synchronized and completed
+ * verification of all iterations.
+ */
+ sem_getvalue(&sem_vcpu_stop, &sem_val);
+ TEST_ASSERT_EQ(sem_val, 0);
+ sem_getvalue(&sem_vcpu_cont, &sem_val);
+ TEST_ASSERT_EQ(sem_val, 0);
+
pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
while (iteration < p->iterations) {
@@ -819,15 +817,21 @@ static void run_test(enum vm_guest_mode mode, void *arg)
assert(host_log_mode == LOG_MODE_DIRTY_RING ||
atomic_read(&vcpu_sync_stop_requested) == false);
vm_dirty_log_verify(mode, bmap);
- sem_post(&sem_vcpu_cont);
- iteration++;
+ /*
+ * Set host_quit before sem_vcpu_cont in the final iteration to
+ * ensure that the vCPU worker doesn't resume the guest. As
+ * above, the dirty ring test may stop and wait even when not
+ * explicitly request to do so, i.e. would hang waiting for a
+ * "continue" if it's allowed to resume the guest.
+ */
+ if (++iteration == p->iterations)
+ WRITE_ONCE(host_quit, true);
+
+ sem_post(&sem_vcpu_cont);
sync_global_to_guest(vm, iteration);
}
- /* Tell the vcpu thread to quit */
- host_quit = true;
- log_mode_before_vcpu_join();
pthread_join(vcpu_thread, NULL);
pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c
index 8274ef04301f..91f05f78e824 100644
--- a/tools/testing/selftests/kvm/get-reg-list.c
+++ b/tools/testing/selftests/kvm/get-reg-list.c
@@ -152,7 +152,7 @@ static void check_supported(struct vcpu_reg_list *c)
continue;
__TEST_REQUIRE(kvm_has_cap(s->capability),
- "%s: %s not available, skipping tests\n",
+ "%s: %s not available, skipping tests",
config_name(c), s->name);
}
}
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c
index 41230b746190..3502caa3590c 100644
--- a/tools/testing/selftests/kvm/guest_print_test.c
+++ b/tools/testing/selftests/kvm/guest_print_test.c
@@ -98,7 +98,7 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg)
int offset = len_str - len_substr;
TEST_ASSERT(len_substr <= len_str,
- "Expected '%s' to be a substring of '%s'\n",
+ "Expected '%s' to be a substring of '%s'",
assert_msg, expected_assert_msg);
TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0,
@@ -116,7 +116,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
- "Unexpected exit reason: %u (%s),\n",
+ "Unexpected exit reason: %u (%s),",
run->exit_reason, exit_reason_str(run->exit_reason));
switch (get_ucall(vcpu, &uc)) {
@@ -161,11 +161,11 @@ static void test_limits(void)
vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
- "Unexpected exit reason: %u (%s),\n",
+ "Unexpected exit reason: %u (%s),",
run->exit_reason, exit_reason_str(run->exit_reason));
TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT,
- "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)\n",
+ "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)",
uc.cmd, UCALL_ABORT);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index f5d59b9934f1..decc521fc760 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -41,7 +41,7 @@ static void *run_vcpu(void *arg)
vcpu_run(vcpu);
- TEST_ASSERT(false, "%s: exited with reason %d: %s\n",
+ TEST_ASSERT(false, "%s: exited with reason %d: %s",
__func__, run->exit_reason,
exit_reason_str(run->exit_reason));
pthread_exit(NULL);
@@ -55,7 +55,7 @@ static void *sleeping_thread(void *arg)
fd = open("/dev/null", O_RDWR);
close(fd);
}
- TEST_ASSERT(false, "%s: exited\n", __func__);
+ TEST_ASSERT(false, "%s: exited", __func__);
pthread_exit(NULL);
}
@@ -118,7 +118,7 @@ static void run_test(uint32_t run)
for (i = 0; i < VCPU_NUM; ++i)
check_join(threads[i], &b);
/* Should not be reached */
- TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run);
+ TEST_ASSERT(false, "%s: [%d] child escaped the ninja", __func__, run);
}
void wait_for_child_setup(pid_t pid)
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 71a41fa924b7..50a5e31ba8da 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -195,4 +195,6 @@ __printf(3, 4) int guest_snprintf(char *buf, int n, const char *fmt, ...);
char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1)));
+char *sys_get_cur_clocksource(void);
+
#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index a84863503fcb..5bca8c947c82 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -1271,4 +1271,6 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
+bool sys_clocksource_is_based_on_tsc(void);
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index 31b3cb24b9a7..b9e23265e4b3 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -65,7 +65,7 @@ int main(int argc, char *argv[])
int r = setrlimit(RLIMIT_NOFILE, &rl);
__TEST_REQUIRE(r >= 0,
- "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
+ "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
old_rlim_max, nr_fds_wanted);
} else {
TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index e37dc9c21888..e0ba97ac1c56 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -204,9 +204,9 @@ static void *vcpu_worker(void *data)
ret = _vcpu_run(vcpu);
ts_diff = timespec_elapsed(start);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
- "Invalid guest sync status: exit_reason=%s\n",
+ "Invalid guest sync status: exit_reason=%s",
exit_reason_str(vcpu->run->exit_reason));
pr_debug("Got sync event from vCPU %d\n", vcpu->id);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 41c776b642c0..43b9a7283360 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -398,7 +398,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
int i;
TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
- " num: %u\n", num);
+ " num: %u", num);
va_start(ap, num);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index b5f28d21a947..184378d593e9 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -38,7 +38,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
struct list_head *iter;
unsigned int nr_gic_pages, nr_vcpus_created = 0;
- TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty\n");
+ TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
/*
* Make sure that the caller is infact calling this
@@ -47,7 +47,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
list_for_each(iter, &vm->vcpus)
nr_vcpus_created++;
TEST_ASSERT(nr_vcpus == nr_vcpus_created,
- "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)\n",
+ "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
nr_vcpus, nr_vcpus_created);
/* Distributor setup */
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index 266f3876e10a..f34d926d9735 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -184,7 +184,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
"Seek to program segment offset failed,\n"
" program header idx: %u errno: %i\n"
" offset_rv: 0x%jx\n"
- " expected: 0x%jx\n",
+ " expected: 0x%jx",
n1, errno, (intmax_t) offset_rv,
(intmax_t) phdr.p_offset);
test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index e066d584c656..1b197426f29f 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -27,7 +27,8 @@ int open_path_or_exit(const char *path, int flags)
int fd;
fd = open(path, flags);
- __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno);
+ __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno));
+ TEST_ASSERT(fd >= 0, "Failed to open '%s'", path);
return fd;
}
@@ -320,7 +321,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
uint64_t nr_pages;
TEST_ASSERT(nr_runnable_vcpus,
- "Use vm_create_barebones() for VMs that _never_ have vCPUs\n");
+ "Use vm_create_barebones() for VMs that _never_ have vCPUs");
TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
"nr_vcpus = %d too large for host, max-vcpus = %d",
@@ -491,7 +492,7 @@ void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
CPU_ZERO(&mask);
CPU_SET(pcpu, &mask);
r = sched_setaffinity(0, sizeof(mask), &mask);
- TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu);
+ TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu);
}
static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
@@ -499,7 +500,7 @@ static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
uint32_t pcpu = atoi_non_negative("CPU number", cpu_str);
TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask),
- "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu);
+ "Not allowed to run on pCPU '%d', check cgroups?", pcpu);
return pcpu;
}
@@ -529,7 +530,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
int i, r;
cpu_list = strdup(pcpus_string);
- TEST_ASSERT(cpu_list, "strdup() allocation failed.\n");
+ TEST_ASSERT(cpu_list, "strdup() allocation failed.");
r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
TEST_ASSERT(!r, "sched_getaffinity() failed");
@@ -538,7 +539,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
/* 1. Get all pcpus for vcpus. */
for (i = 0; i < nr_vcpus; i++) {
- TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i);
+ TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i);
vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask);
cpu = strtok(NULL, delim);
}
@@ -1057,7 +1058,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n",
+ " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d",
ret, errno, slot, flags,
guest_paddr, (uint64_t) region->region.memory_size,
region->region.guest_memfd);
@@ -1222,7 +1223,7 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
len = min_t(uint64_t, end - gpa, region->region.memory_size - offset);
ret = fallocate(region->region.guest_memfd, mode, fd_offset, len);
- TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx\n",
+ TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx",
punch_hole ? "punch hole" : "allocate", gpa, len,
region->region.guest_memfd, mode, fd_offset);
}
@@ -1265,7 +1266,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
struct kvm_vcpu *vcpu;
/* Confirm a vcpu with the specified id doesn't already exist. */
- TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id);
+ TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id);
/* Allocate and initialize new vcpu structure. */
vcpu = calloc(1, sizeof(*vcpu));
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index d05487e5a371..cf2c73971308 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -192,7 +192,7 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
TEST_ASSERT(guest_num_pages < region_end_gfn,
"Requested more guest memory than address space allows.\n"
" guest pages: %" PRIx64 " max gfn: %" PRIx64
- " nr_vcpus: %d wss: %" PRIx64 "]\n",
+ " nr_vcpus: %d wss: %" PRIx64 "]",
guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes);
args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size;
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index 7ca736fb4194..2bb33a8ac03c 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -327,7 +327,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
int i;
TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
- " num: %u\n", num);
+ " num: %u", num);
va_start(ap, num);
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 15945121daf1..f6d227892cbc 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -198,7 +198,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
int i;
TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
- " num: %u\n",
+ " num: %u",
num);
va_start(ap, num);
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 5d7f28b02d73..5a8f8becb129 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -392,3 +392,28 @@ char *strdup_printf(const char *fmt, ...)
return str;
}
+
+#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource"
+
+char *sys_get_cur_clocksource(void)
+{
+ char *clk_name;
+ struct stat st;
+ FILE *fp;
+
+ fp = fopen(CLOCKSOURCE_PATH, "r");
+ TEST_ASSERT(fp, "failed to open clocksource file, errno: %d", errno);
+
+ TEST_ASSERT(!fstat(fileno(fp), &st), "failed to stat clocksource file, errno: %d",
+ errno);
+
+ clk_name = malloc(st.st_size);
+ TEST_ASSERT(clk_name, "failed to allocate buffer to read file");
+
+ TEST_ASSERT(fgets(clk_name, st.st_size, fp), "failed to read clocksource file: %d",
+ ferror(fp));
+
+ fclose(fp);
+
+ return clk_name;
+}
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index 271f63891581..f4eef6eb2dc2 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -69,7 +69,7 @@ static void *uffd_handler_thread_fn(void *arg)
if (pollfd[1].revents & POLLIN) {
r = read(pollfd[1].fd, &tmp_chr, 1);
TEST_ASSERT(r == 1,
- "Error reading pipefd in UFFD thread\n");
+ "Error reading pipefd in UFFD thread");
break;
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index d8288374078e..f639b3e062e3 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -170,10 +170,10 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
* this level.
*/
TEST_ASSERT(current_level != target_level,
- "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
+ "Cannot create hugepage at level: %u, vaddr: 0x%lx",
current_level, vaddr);
TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
- "Cannot create page table at level: %u, vaddr: 0x%lx\n",
+ "Cannot create page table at level: %u, vaddr: 0x%lx",
current_level, vaddr);
}
return pte;
@@ -220,7 +220,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
/* Fill in page table entry. */
pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
- "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
+ "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
}
@@ -253,7 +253,7 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
if (*pte & PTE_LARGE_MASK) {
TEST_ASSERT(*level == PG_LEVEL_NONE ||
*level == current_level,
- "Unexpected hugepage at level %d\n", current_level);
+ "Unexpected hugepage at level %d", current_level);
*level = current_level;
}
@@ -825,7 +825,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
struct kvm_regs regs;
TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
- " num: %u\n",
+ " num: %u",
num);
va_start(ap, num);
@@ -1299,3 +1299,14 @@ void kvm_selftest_arch_init(void)
host_cpu_is_intel = this_cpu_is_intel();
host_cpu_is_amd = this_cpu_is_amd();
}
+
+bool sys_clocksource_is_based_on_tsc(void)
+{
+ char *clk_name = sys_get_cur_clocksource();
+ bool ret = !strcmp(clk_name, "tsc\n") ||
+ !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
+
+ free(clk_name);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 59d97531c9b1..089b8925b6b2 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -54,7 +54,7 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
/* KVM should return supported EVMCS version range */
TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
(evmcs_ver & 0xff) > 0,
- "Incorrect EVMCS version range: %x:%x\n",
+ "Incorrect EVMCS version range: %x:%x",
evmcs_ver & 0xff, evmcs_ver >> 8);
return evmcs_ver;
@@ -387,10 +387,10 @@ static void nested_create_pte(struct kvm_vm *vm,
* this level.
*/
TEST_ASSERT(current_level != target_level,
- "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n",
+ "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
current_level, nested_paddr);
TEST_ASSERT(!pte->page_size,
- "Cannot create page table at level: %u, nested_paddr: 0x%lx\n",
+ "Cannot create page table at level: %u, nested_paddr: 0x%lx",
current_level, nested_paddr);
}
}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 9855c41ca811..156361966612 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -45,7 +45,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
/* Let the guest access its memory until a stop signal is received */
while (!READ_ONCE(memstress_args.stop_vcpus)) {
ret = _vcpu_run(vcpu);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
if (get_ucall(vcpu, NULL) == UCALL_SYNC)
continue;
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 8698d1ab60d0..579a64f97333 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -175,11 +175,11 @@ static void wait_for_vcpu(void)
struct timespec ts;
TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
- "clock_gettime() failed: %d\n", errno);
+ "clock_gettime() failed: %d", errno);
ts.tv_sec += 2;
TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
- "sem_timedwait() failed: %d\n", errno);
+ "sem_timedwait() failed: %d", errno);
}
static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
@@ -336,7 +336,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot);
TEST_ASSERT(gpa == guest_addr,
- "vm_phy_pages_alloc() failed\n");
+ "vm_phy_pages_alloc() failed");
data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr);
memset(data->hva_slots[slot - 1], 0, npages * guest_page_size);
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 6652108816db..6435e7a65642 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -49,15 +49,42 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIFENCEI:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSED:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKT:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKG:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNED:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSED:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKT:
/*
* Like ISA_EXT registers, SBI_EXT registers are only visible when the
* host supports them and disabling them does not affect the visibility
@@ -150,7 +177,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
/* Double check whether the desired extension was enabled */
__TEST_REQUIRE(vcpu_has_ext(vcpu, feature),
- "%s not available, skipping tests\n", s->name);
+ "%s not available, skipping tests", s->name);
}
}
@@ -394,15 +421,42 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
+ KVM_ISA_EXT_ARR(ZBC),
+ KVM_ISA_EXT_ARR(ZBKB),
+ KVM_ISA_EXT_ARR(ZBKC),
+ KVM_ISA_EXT_ARR(ZBKX),
KVM_ISA_EXT_ARR(ZBS),
+ KVM_ISA_EXT_ARR(ZFA),
+ KVM_ISA_EXT_ARR(ZFH),
+ KVM_ISA_EXT_ARR(ZFHMIN),
KVM_ISA_EXT_ARR(ZICBOM),
KVM_ISA_EXT_ARR(ZICBOZ),
KVM_ISA_EXT_ARR(ZICNTR),
KVM_ISA_EXT_ARR(ZICOND),
KVM_ISA_EXT_ARR(ZICSR),
KVM_ISA_EXT_ARR(ZIFENCEI),
+ KVM_ISA_EXT_ARR(ZIHINTNTL),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZKND),
+ KVM_ISA_EXT_ARR(ZKNE),
+ KVM_ISA_EXT_ARR(ZKNH),
+ KVM_ISA_EXT_ARR(ZKR),
+ KVM_ISA_EXT_ARR(ZKSED),
+ KVM_ISA_EXT_ARR(ZKSH),
+ KVM_ISA_EXT_ARR(ZKT),
+ KVM_ISA_EXT_ARR(ZVBB),
+ KVM_ISA_EXT_ARR(ZVBC),
+ KVM_ISA_EXT_ARR(ZVFH),
+ KVM_ISA_EXT_ARR(ZVFHMIN),
+ KVM_ISA_EXT_ARR(ZVKB),
+ KVM_ISA_EXT_ARR(ZVKG),
+ KVM_ISA_EXT_ARR(ZVKNED),
+ KVM_ISA_EXT_ARR(ZVKNHA),
+ KVM_ISA_EXT_ARR(ZVKNHB),
+ KVM_ISA_EXT_ARR(ZVKSED),
+ KVM_ISA_EXT_ARR(ZVKSH),
+ KVM_ISA_EXT_ARR(ZVKT),
};
if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name))
@@ -888,15 +942,42 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX);
KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN);
KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM);
KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ);
KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR);
KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND);
KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR);
KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI);
+KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL);
KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE);
KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM);
+KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE);
+KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkr, ZKR);
+KVM_ISA_EXT_SIMPLE_CONFIG(zksed, ZKSED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zksh, ZKSH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkg, ZVKG);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkned, ZVKNED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvknha, ZVKNHA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvknhb, ZVKNHB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvksed, ZVKSED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvksh, ZVKSH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkt, ZVKT);
struct vcpu_reg_list *vcpu_configs[] = {
&config_sbi_base,
@@ -914,14 +995,41 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_svpbmt,
&config_zba,
&config_zbb,
+ &config_zbc,
+ &config_zbkb,
+ &config_zbkc,
+ &config_zbkx,
&config_zbs,
+ &config_zfa,
+ &config_zfh,
+ &config_zfhmin,
&config_zicbom,
&config_zicboz,
&config_zicntr,
&config_zicond,
&config_zicsr,
&config_zifencei,
+ &config_zihintntl,
&config_zihintpause,
&config_zihpm,
+ &config_zknd,
+ &config_zkne,
+ &config_zknh,
+ &config_zkr,
+ &config_zksed,
+ &config_zksh,
+ &config_zkt,
+ &config_zvbb,
+ &config_zvbc,
+ &config_zvfh,
+ &config_zvfhmin,
+ &config_zvkb,
+ &config_zvkg,
+ &config_zvkned,
+ &config_zvknha,
+ &config_zvknhb,
+ &config_zvksed,
+ &config_zvksh,
+ &config_zvkt,
};
int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index f74e76d03b7e..28f97fb52044 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -245,7 +245,7 @@ int main(int argc, char *argv[])
} while (snapshot != atomic_read(&seq_cnt));
TEST_ASSERT(rseq_cpu == cpu,
- "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu);
+ "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
}
/*
@@ -256,7 +256,7 @@ int main(int argc, char *argv[])
* migrations given the 1us+ delay in the migration task.
*/
TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
- "Only performed %d KVM_RUNs, task stalled too much?\n", i);
+ "Only performed %d KVM_RUNs, task stalled too much?", i);
pthread_join(migration_thread, NULL);
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index bb3ca9a5d731..b6da8f71ea19 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -375,6 +375,32 @@ static void test_copy(void)
kvm_vm_free(t.kvm_vm);
}
+static void test_copy_access_register(void)
+{
+ struct test_default t = test_default_init(guest_copy);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ prepare_mem12();
+ t.run->psw_mask &= ~(3UL << (63 - 17));
+ t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
+
+ /*
+ * Primary address space gets used if an access register
+ * contains zero. The host makes use of AR[1] so is a good
+ * candidate to ensure the guest AR (of zero) is used.
+ */
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size,
+ GADDR_V(mem1), AR(1));
+ HOST_SYNC(t.vcpu, STAGE_COPIED);
+
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size,
+ GADDR_V(mem2), AR(1));
+ ASSERT_MEM_EQ(mem1, mem2, t.size);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
static void set_storage_key_range(void *addr, size_t len, uint8_t key)
{
uintptr_t _addr, abs, i;
@@ -1102,6 +1128,11 @@ int main(int argc, char *argv[])
.requirements_met = extension_cap > 0,
},
{
+ .name = "copy with access register mode",
+ .test = test_copy_access_register,
+ .requirements_met = true,
+ },
+ {
.name = "error checks with key",
.test = test_errors_key,
.requirements_met = extension_cap > 0,
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c
index e41e2cb8ffa9..357943f2bea8 100644
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ b/tools/testing/selftests/kvm/s390x/resets.c
@@ -78,7 +78,7 @@ static void assert_noirq(struct kvm_vcpu *vcpu)
* (notably, the emergency call interrupt we have injected) should
* be cleared by the resets, so this should be 0.
*/
- TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno);
+ TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno);
TEST_ASSERT(!irqs, "IRQ pending");
}
@@ -199,7 +199,7 @@ static void inject_irq(struct kvm_vcpu *vcpu)
irq->type = KVM_S390_INT_EMERGENCY;
irq->u.emerg.code = vcpu->id;
irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
- TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno);
+ TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno);
}
static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index 636a70ddac1e..43fb25ddc3ec 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -39,13 +39,13 @@ static void guest_code(void)
#define REG_COMPARE(reg) \
TEST_ASSERT(left->reg == right->reg, \
"Register " #reg \
- " values did not match: 0x%llx, 0x%llx\n", \
+ " values did not match: 0x%llx, 0x%llx", \
left->reg, right->reg)
#define REG_COMPARE32(reg) \
TEST_ASSERT(left->reg == right->reg, \
"Register " #reg \
- " values did not match: 0x%x, 0x%x\n", \
+ " values did not match: 0x%x, 0x%x", \
left->reg, right->reg)
@@ -82,14 +82,14 @@ void test_read_invalid(struct kvm_vcpu *vcpu)
run->kvm_valid_regs = INVALID_SYNC_FIELD;
rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
run->kvm_valid_regs = 0;
run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
run->kvm_valid_regs = 0;
}
@@ -103,14 +103,14 @@ void test_set_invalid(struct kvm_vcpu *vcpu)
run->kvm_dirty_regs = INVALID_SYNC_FIELD;
rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
run->kvm_dirty_regs = 0;
run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
run->kvm_dirty_regs = 0;
}
@@ -125,12 +125,12 @@ void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
/* Request and verify all valid register sets. */
run->kvm_valid_regs = TEST_SYNC_FIELDS;
rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
(run->s390_sieic.ipa >> 8) == 0x83 &&
(run->s390_sieic.ipb >> 16) == 0x501,
- "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n",
+ "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x",
run->s390_sieic.icptcode, run->s390_sieic.ipa,
run->s390_sieic.ipb);
@@ -161,7 +161,7 @@ void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
}
rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
"r11 sync regs value incorrect 0x%llx.",
@@ -193,7 +193,7 @@ void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
run->s.regs.gprs[11] = 0xDEADBEEF;
run->s.regs.diag318 = 0x4B1D;
rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
"r11 sync regs value incorrect 0x%llx.",
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 075b80dbe237..06b43ed23580 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -98,11 +98,11 @@ static void wait_for_vcpu(void)
struct timespec ts;
TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
- "clock_gettime() failed: %d\n", errno);
+ "clock_gettime() failed: %d", errno);
ts.tv_sec += 2;
TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
- "sem_timedwait() failed: %d\n", errno);
+ "sem_timedwait() failed: %d", errno);
/* Wait for the vCPU thread to reenter the guest. */
usleep(100000);
@@ -302,7 +302,7 @@ static void test_delete_memory_region(void)
if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR)
TEST_ASSERT(regs.rip >= final_rip_start &&
regs.rip < final_rip_end,
- "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n",
+ "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx",
final_rip_start, final_rip_end, regs.rip);
kvm_vm_free(vm);
@@ -367,11 +367,21 @@ static void test_invalid_memory_region_flags(void)
}
if (supported_flags & KVM_MEM_GUEST_MEMFD) {
+ int guest_memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0);
+
r = __vm_set_user_memory_region2(vm, 0,
KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_GUEST_MEMFD,
- 0, MEM_REGION_SIZE, NULL, 0, 0);
+ 0, MEM_REGION_SIZE, NULL, guest_memfd, 0);
TEST_ASSERT(r && errno == EINVAL,
"KVM_SET_USER_MEMORY_REGION2 should have failed, dirty logging private memory is unsupported");
+
+ r = __vm_set_user_memory_region2(vm, 0,
+ KVM_MEM_READONLY | KVM_MEM_GUEST_MEMFD,
+ 0, MEM_REGION_SIZE, NULL, guest_memfd, 0);
+ TEST_ASSERT(r && errno == EINVAL,
+ "KVM_SET_USER_MEMORY_REGION2 should have failed, read-only GUEST_MEMFD memslots are unsupported");
+
+ close(guest_memfd);
}
}
diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c
index 7f5b330b6a1b..513d421a9bff 100644
--- a/tools/testing/selftests/kvm/system_counter_offset_test.c
+++ b/tools/testing/selftests/kvm/system_counter_offset_test.c
@@ -108,7 +108,7 @@ static void enter_guest(struct kvm_vcpu *vcpu)
handle_abort(&uc);
return;
default:
- TEST_ASSERT(0, "unhandled ucall %ld\n",
+ TEST_ASSERT(0, "unhandled ucall %ld",
get_ucall(vcpu, &uc));
}
}
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index 11329e5ff945..eae521f050e0 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -221,7 +221,7 @@ int main(int argc, char *argv[])
vm_vaddr_t amx_cfg, tiledata, xstate;
struct ucall uc;
u32 amx_offset;
- int stage, ret;
+ int ret;
/*
* Note, all off-by-default features must be enabled before anything
@@ -263,7 +263,7 @@ int main(int argc, char *argv[])
memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
- for (stage = 1; ; stage++) {
+ for (;;) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
@@ -296,7 +296,7 @@ int main(int argc, char *argv[])
void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
/* Only check TMM0 register, 1 tile */
ret = memcmp(amx_start, tiles_data, TILE_SIZE);
- TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret);
+ TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
kvm_x86_state_cleanup(state);
break;
case 9:
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
index 3b34d8156d1c..8c579ce714e9 100644
--- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -84,7 +84,7 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
TEST_ASSERT(e1->function == e2->function &&
e1->index == e2->index && e1->flags == e2->flags,
- "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x\n",
+ "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
i, e1->function, e1->index, e1->flags,
e2->function, e2->index, e2->flags);
@@ -170,7 +170,7 @@ static void test_get_cpuid2(struct kvm_vcpu *vcpu)
vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
- "KVM didn't update nent on success, wanted %u, got %u\n",
+ "KVM didn't update nent on success, wanted %u, got %u",
vcpu->cpuid->nent, cpuid->nent);
for (i = 0; i < vcpu->cpuid->nent; i++) {
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
index 634c6bfcd572..ee3b384b991c 100644
--- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
@@ -92,7 +92,6 @@ static void run_test(enum vm_guest_mode mode, void *unused)
uint64_t host_num_pages;
uint64_t pages_per_slot;
int i;
- uint64_t total_4k_pages;
struct kvm_page_stats stats_populated;
struct kvm_page_stats stats_dirty_logging_enabled;
struct kvm_page_stats stats_dirty_pass[ITERATIONS];
@@ -107,6 +106,9 @@ static void run_test(enum vm_guest_mode mode, void *unused)
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
pages_per_slot = host_num_pages / SLOTS;
+ TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
+ TEST_ASSERT(!(host_num_pages % 512),
+ "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
@@ -165,10 +167,8 @@ static void run_test(enum vm_guest_mode mode, void *unused)
memstress_free_bitmaps(bitmaps, SLOTS);
memstress_destroy_vm(vm);
- /* Make assertions about the page counts. */
- total_4k_pages = stats_populated.pages_4k;
- total_4k_pages += stats_populated.pages_2m * 512;
- total_4k_pages += stats_populated.pages_1g * 512 * 512;
+ TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
+ stats_populated.pages_1g * 512 * 512), host_num_pages);
/*
* Check that all huge pages were split. Since large pages can only
@@ -180,19 +180,22 @@ static void run_test(enum vm_guest_mode mode, void *unused)
*/
if (dirty_log_manual_caps) {
TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
- TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
+ TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
+ "Expected at least '%lu' 4KiB pages, found only '%lu'",
+ host_num_pages, stats_clear_pass[0].pages_4k);
TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
} else {
TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
- TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
+ TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
+ "Expected at least '%lu' 4KiB pages, found only '%lu'",
+ host_num_pages, stats_dirty_logging_enabled.pages_4k);
}
/*
* Once dirty logging is disabled and the vCPUs have touched all their
- * memory again, the page counts should be the same as they were
+ * memory again, the hugepage counts should be the same as they were
* right after initial population of memory.
*/
- TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
}
diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
index 0a1573d52882..37b1a9f52864 100644
--- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h
+++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
@@ -41,7 +41,7 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
insn_bytes = run->emulation_failure.insn_bytes;
TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
- "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n",
+ "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
insn_bytes[0], insn_bytes[1]);
vcpu_regs_get(vcpu, &regs);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index f5e1e98f04f9..e058bc676cd6 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -212,6 +212,7 @@ int main(void)
int stage;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
vm = vm_create_with_one_vcpu(&vcpu, guest_main);
@@ -220,7 +221,7 @@ int main(void)
tsc_page_gva = vm_vaddr_alloc_page(vm);
memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
- "TSC page has to be page aligned\n");
+ "TSC page has to be page aligned");
vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
host_check_tsc_msr_rdtsc(vcpu);
@@ -237,7 +238,7 @@ int main(void)
break;
case UCALL_DONE:
/* Keep in sync with guest_main() */
- TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n",
+ TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
stage);
goto out;
default:
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 4f4193fc74ff..b923a285e96f 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -454,7 +454,7 @@ static void guest_test_msrs_access(void)
case 44:
/* MSR is not available when CPUID feature bit is unset */
if (!has_invtsc)
- continue;
+ goto next_stage;
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
msr->write = false;
msr->fault_expected = true;
@@ -462,7 +462,7 @@ static void guest_test_msrs_access(void)
case 45:
/* MSR is vailable when CPUID feature bit is set */
if (!has_invtsc)
- continue;
+ goto next_stage;
vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
msr->write = false;
@@ -471,7 +471,7 @@ static void guest_test_msrs_access(void)
case 46:
/* Writing bits other than 0 is forbidden */
if (!has_invtsc)
- continue;
+ goto next_stage;
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
msr->write = true;
msr->write_val = 0xdeadbeef;
@@ -480,7 +480,7 @@ static void guest_test_msrs_access(void)
case 47:
/* Setting bit 0 enables the feature */
if (!has_invtsc)
- continue;
+ goto next_stage;
msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
msr->write = true;
msr->write_val = 1;
@@ -513,6 +513,7 @@ static void guest_test_msrs_access(void)
return;
}
+next_stage:
stage++;
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
index 65e5f4c05068..f1617762c22f 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
@@ -289,7 +289,7 @@ int main(int argc, char *argv[])
switch (get_ucall(vcpu[0], &uc)) {
case UCALL_SYNC:
TEST_ASSERT(uc.args[1] == stage,
- "Unexpected stage: %ld (%d expected)\n",
+ "Unexpected stage: %ld (%d expected)",
uc.args[1], stage);
break;
case UCALL_DONE:
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
index c4443f71f8dd..05b56095cf76 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
@@ -658,7 +658,7 @@ int main(int argc, char *argv[])
switch (get_ucall(vcpu[0], &uc)) {
case UCALL_SYNC:
TEST_ASSERT(uc.args[1] == stage,
- "Unexpected stage: %ld (%d expected)\n",
+ "Unexpected stage: %ld (%d expected)",
uc.args[1], stage);
break;
case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
index 1778704360a6..5bc12222d87a 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
@@ -92,7 +92,7 @@ static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
break;
} while (errno == EINTR);
- TEST_ASSERT(!r, "clock_gettime() failed: %d\n", r);
+ TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
data.realtime = ts.tv_sec * NSEC_PER_SEC;
data.realtime += ts.tv_nsec;
@@ -127,47 +127,11 @@ static void enter_guest(struct kvm_vcpu *vcpu)
handle_abort(&uc);
return;
default:
- TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd);
+ TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
}
}
}
-#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource"
-
-static void check_clocksource(void)
-{
- char *clk_name;
- struct stat st;
- FILE *fp;
-
- fp = fopen(CLOCKSOURCE_PATH, "r");
- if (!fp) {
- pr_info("failed to open clocksource file: %d; assuming TSC.\n",
- errno);
- return;
- }
-
- if (fstat(fileno(fp), &st)) {
- pr_info("failed to stat clocksource file: %d; assuming TSC.\n",
- errno);
- goto out;
- }
-
- clk_name = malloc(st.st_size);
- TEST_ASSERT(clk_name, "failed to allocate buffer to read file\n");
-
- if (!fgets(clk_name, st.st_size, fp)) {
- pr_info("failed to read clocksource file: %d; assuming TSC.\n",
- ferror(fp));
- goto out;
- }
-
- TEST_ASSERT(!strncmp(clk_name, "tsc\n", st.st_size),
- "clocksource not supported: %s", clk_name);
-out:
- fclose(fp);
-}
-
int main(void)
{
struct kvm_vcpu *vcpu;
@@ -179,7 +143,7 @@ int main(void)
flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
- check_clocksource();
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
vm = vm_create_with_one_vcpu(&vcpu, guest_main);
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 83e25bccc139..17bbb96fc4df 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -257,9 +257,9 @@ int main(int argc, char **argv)
TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
__TEST_REQUIRE(token == MAGIC_TOKEN,
- "This test must be run with the magic token %d.\n"
- "This is done by nx_huge_pages_test.sh, which\n"
- "also handles environment setup for the test.", MAGIC_TOKEN);
+ "This test must be run with the magic token via '-t %d'.\n"
+ "Running via nx_huge_pages_test.sh, which also handles "
+ "environment setup, is strongly recommended.", MAGIC_TOKEN);
run_test(reclaim_period_ms, false, reboot_permissions);
run_test(reclaim_period_ms, true, reboot_permissions);
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index c9a07963d68a..87011965dc41 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -44,7 +44,7 @@ static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu)
get_ucall(vcpu, &uc);
TEST_ASSERT(uc.cmd == UCALL_SYNC,
- "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
+ "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 283cc55597a4..a3bd54b925ab 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -866,7 +866,7 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
* userspace doesn't set any pmu filter.
*/
count = run_vcpu_to_sync(vcpu);
- TEST_ASSERT(count, "Unexpected count value: %ld\n", count);
+ TEST_ASSERT(count, "Unexpected count value: %ld", count);
for (i = 0; i < BIT(nr_fixed_counters); i++) {
bitmap = BIT(i);
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
index c7ef97561038..a49828adf294 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -91,7 +91,7 @@ static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
int ret;
ret = __sev_migrate_from(dst, src);
- TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno);
+ TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
}
static void test_sev_migrate_from(bool es)
@@ -113,7 +113,7 @@ static void test_sev_migrate_from(bool es)
/* Migrate the guest back to the original VM. */
ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
TEST_ASSERT(ret == -1 && errno == EIO,
- "VM that was migrated from should be dead. ret %d, errno: %d\n", ret,
+ "VM that was migrated from should be dead. ret %d, errno: %d", ret,
errno);
kvm_vm_free(src_vm);
@@ -172,7 +172,7 @@ static void test_sev_migrate_parameters(void)
vm_no_sev = aux_vm_create(true);
ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
TEST_ASSERT(ret == -1 && errno == EINVAL,
- "Migrations require SEV enabled. ret %d, errno: %d\n", ret,
+ "Migrations require SEV enabled. ret %d, errno: %d", ret,
errno);
if (!have_sev_es)
@@ -187,25 +187,25 @@ static void test_sev_migrate_parameters(void)
ret = __sev_migrate_from(sev_vm, sev_es_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
- "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n",
+ "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
ret, errno);
ret = __sev_migrate_from(sev_es_vm, sev_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
- "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n",
+ "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
ret, errno);
ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
- "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n",
+ "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
ret, errno);
ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
- "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n",
+ "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
ret, errno);
kvm_vm_free(sev_vm);
@@ -227,7 +227,7 @@ static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
int ret;
ret = __sev_mirror_create(dst, src);
- TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno);
+ TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
}
static void verify_mirror_allowed_cmds(int vm_fd)
@@ -259,7 +259,7 @@ static void verify_mirror_allowed_cmds(int vm_fd)
ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
- "Should not be able call command: %d. ret: %d, errno: %d\n",
+ "Should not be able call command: %d. ret: %d, errno: %d",
cmd_id, ret, errno);
}
@@ -301,18 +301,18 @@ static void test_sev_mirror_parameters(void)
ret = __sev_mirror_create(sev_vm, sev_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
- "Should not be able copy context to self. ret: %d, errno: %d\n",
+ "Should not be able copy context to self. ret: %d, errno: %d",
ret, errno);
ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
TEST_ASSERT(ret == -1 && errno == EINVAL,
- "Copy context requires SEV enabled. ret %d, errno: %d\n", ret,
+ "Copy context requires SEV enabled. ret %d, errno: %d", ret,
errno);
ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
- "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n",
+ "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
ret, errno);
if (!have_sev_es)
@@ -322,13 +322,13 @@ static void test_sev_mirror_parameters(void)
ret = __sev_mirror_create(sev_vm, sev_es_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
- "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n",
+ "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
ret, errno);
ret = __sev_mirror_create(sev_es_vm, sev_vm);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
- "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n",
+ "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
ret, errno);
kvm_vm_free(sev_es_vm);
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
index 06edf00a97d6..1a46dd7bb391 100644
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -74,7 +74,7 @@ int main(int argc, char *argv[])
MEM_REGION_SIZE / PAGE_SIZE, 0);
gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
MEM_REGION_GPA, MEM_REGION_SLOT);
- TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+ TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
memset(hva, 0, PAGE_SIZE);
@@ -102,7 +102,7 @@ int main(int argc, char *argv[])
case UCALL_DONE:
break;
default:
- TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd);
+ TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
}
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index 00965ba33f73..a91b5b145fa3 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -46,7 +46,7 @@ static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
#define REG_COMPARE(reg) \
TEST_ASSERT(left->reg == right->reg, \
"Register " #reg \
- " values did not match: 0x%llx, 0x%llx\n", \
+ " values did not match: 0x%llx, 0x%llx", \
left->reg, right->reg)
REG_COMPARE(rax);
REG_COMPARE(rbx);
@@ -230,14 +230,14 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = INVALID_SYNC_FIELD;
rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
run->kvm_valid_regs = 0;
run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
run->kvm_valid_regs = 0;
@@ -245,14 +245,14 @@ int main(int argc, char *argv[])
run->kvm_dirty_regs = INVALID_SYNC_FIELD;
rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
run->kvm_dirty_regs = 0;
run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
run->kvm_dirty_regs = 0;
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
index 0ed32ec903d0..dcbb3c29fb8e 100644
--- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
@@ -143,7 +143,7 @@ static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
- "Expect UCALL_SYNC\n");
+ "Expect UCALL_SYNC");
TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
printf("vCPU received GP in guest.\n");
}
@@ -188,7 +188,7 @@ static void *run_ucna_injection(void *arg)
TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
- "Expect UCALL_SYNC\n");
+ "Expect UCALL_SYNC");
TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
@@ -198,7 +198,7 @@ static void *run_ucna_injection(void *arg)
TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
- "Expect UCALL_SYNC\n");
+ "Expect UCALL_SYNC");
TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
@@ -208,7 +208,7 @@ static void *run_ucna_injection(void *arg)
TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
- TEST_ASSERT(false, "vCPU assertion failure: %s.\n",
+ TEST_ASSERT(false, "vCPU assertion failure: %s.",
(const char *)uc.args[0]);
}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
index 255c50b0dc32..9481cbcf284f 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
@@ -71,7 +71,7 @@ int main(int argc, char *argv[])
break;
TEST_ASSERT(run->io.port == 0x80,
- "Expected I/O at port 0x80, got port 0x%x\n", run->io.port);
+ "Expected I/O at port 0x80, got port 0x%x", run->io.port);
/*
* Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
index 2bed5fb3a0d6..a81a24761aac 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -99,7 +99,7 @@ int main(int argc, char *argv[])
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
TEST_ASSERT(run->internal.suberror ==
KVM_INTERNAL_ERROR_EMULATION,
- "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n",
+ "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
run->internal.suberror);
break;
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index e4ad5fef52ff..7f6f5f23fb9b 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -128,17 +128,17 @@ int main(int argc, char *argv[])
*/
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
if (uc.args[1]) {
- TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n");
- TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n");
+ TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
+ TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
} else {
- TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n");
+ TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
}
- TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n");
- TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n");
+ TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+ TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
break;
case UCALL_DONE:
done = true;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
index a9b827c69f32..fad3634fd9eb 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -28,7 +28,7 @@ static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
- "Expected emulation failure, got %d\n",
+ "Expected emulation failure, got %d",
run->emulation_failure.suberror);
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
index e710b6e7fb38..1759fa5cb3f2 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -116,23 +116,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_DONE();
}
-static bool system_has_stable_tsc(void)
-{
- bool tsc_is_stable;
- FILE *fp;
- char buf[4];
-
- fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
- if (fp == NULL)
- return false;
-
- tsc_is_stable = fgets(buf, sizeof(buf), fp) &&
- !strncmp(buf, "tsc", sizeof(buf));
-
- fclose(fp);
- return tsc_is_stable;
-}
-
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
@@ -148,7 +131,7 @@ int main(int argc, char *argv[])
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
- TEST_REQUIRE(system_has_stable_tsc());
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
/*
* We set L1's scale factor to be a random number from 2 to 10.
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 67ac2a3292ef..725c206ba0b9 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -216,7 +216,7 @@ static void *vcpu_thread(void *arg)
"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
"Halter TPR=%#x PPR=%#x LVR=%#x\n"
"Migrations attempted: %lu\n"
- "Migrations completed: %lu\n",
+ "Migrations completed: %lu",
vcpu->id, (const char *)uc.args[0],
params->data->ipis_sent, params->data->hlt_count,
params->data->wake_count,
@@ -288,7 +288,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
}
TEST_ASSERT(nodes > 1,
- "Did not find at least 2 numa nodes. Can't do migration\n");
+ "Did not find at least 2 numa nodes. Can't do migration");
fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
@@ -347,7 +347,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
wake_count != data->wake_count,
"IPI, HLT and wake count have not increased "
"in the last %lu seconds. "
- "HLTer is likely hung.\n", interval_secs);
+ "HLTer is likely hung.", interval_secs);
ipis_sent = data->ipis_sent;
hlt_count = data->hlt_count;
@@ -381,7 +381,7 @@ void get_cmdline_args(int argc, char *argv[], int *run_secs,
"-m adds calls to migrate_pages while vCPUs are running."
" Default is no migrations.\n"
"-d <delay microseconds> - delay between migrate_pages() calls."
- " Default is %d microseconds.\n",
+ " Default is %d microseconds.",
DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
}
}
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
index dc6217440db3..25a0b0db5c3c 100644
--- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
@@ -116,7 +116,7 @@ int main(int argc, char *argv[])
vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
+ "Unexpected exit reason: %u (%s),",
run->exit_reason,
exit_reason_str(run->exit_reason));
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
index e0ddf47362e7..167c97abff1b 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -29,7 +29,7 @@ int main(int argc, char *argv[])
xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
TEST_ASSERT(xss_val == 0,
- "MSR_IA32_XSS should be initialized to zero\n");
+ "MSR_IA32_XSS should be initialized to zero");
vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 646f778dfb1e..a6f89aaea77d 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -307,7 +307,7 @@ TEST(ruleset_fd_transfer)
dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
ASSERT_LE(0, dir_fd);
ASSERT_EQ(0, close(dir_fd));
- _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ _exit(_metadata->exit_code);
return;
}
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 5b79758cae62..401e2eb092a3 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -9,6 +9,7 @@
#include <errno.h>
#include <linux/landlock.h>
+#include <linux/securebits.h>
#include <sys/capability.h>
#include <sys/socket.h>
#include <sys/syscall.h>
@@ -22,62 +23,8 @@
#define __maybe_unused __attribute__((__unused__))
#endif
-/*
- * TEST_F_FORK() is useful when a test drop privileges but the corresponding
- * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory
- * where write actions are denied). For convenience, FIXTURE_TEARDOWN() is
- * also called when the test failed, but not when FIXTURE_SETUP() failed. For
- * this to be possible, we must not call abort() but instead exit smoothly
- * (hence the step print).
- */
-/* clang-format off */
-#define TEST_F_FORK(fixture_name, test_name) \
- static void fixture_name##_##test_name##_child( \
- struct __test_metadata *_metadata, \
- FIXTURE_DATA(fixture_name) *self, \
- const FIXTURE_VARIANT(fixture_name) *variant); \
- TEST_F(fixture_name, test_name) \
- { \
- int status; \
- const pid_t child = fork(); \
- if (child < 0) \
- abort(); \
- if (child == 0) { \
- _metadata->no_print = 1; \
- fixture_name##_##test_name##_child(_metadata, self, variant); \
- if (_metadata->skip) \
- _exit(255); \
- if (_metadata->passed) \
- _exit(0); \
- _exit(_metadata->step); \
- } \
- if (child != waitpid(child, &status, 0)) \
- abort(); \
- if (WIFSIGNALED(status) || !WIFEXITED(status)) { \
- _metadata->passed = 0; \
- _metadata->step = 1; \
- return; \
- } \
- switch (WEXITSTATUS(status)) { \
- case 0: \
- _metadata->passed = 1; \
- break; \
- case 255: \
- _metadata->passed = 1; \
- _metadata->skip = 1; \
- break; \
- default: \
- _metadata->passed = 0; \
- _metadata->step = WEXITSTATUS(status); \
- break; \
- } \
- } \
- static void fixture_name##_##test_name##_child( \
- struct __test_metadata __attribute__((unused)) *_metadata, \
- FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
- const FIXTURE_VARIANT(fixture_name) \
- __attribute__((unused)) *variant)
-/* clang-format on */
+/* TEST_F_FORK() should not be used for new tests. */
+#define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name)
#ifndef landlock_create_ruleset
static inline int
@@ -115,11 +62,16 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
/* clang-format off */
CAP_DAC_OVERRIDE,
CAP_MKNOD,
+ CAP_NET_ADMIN,
+ CAP_NET_BIND_SERVICE,
CAP_SYS_ADMIN,
CAP_SYS_CHROOT,
- CAP_NET_BIND_SERVICE,
/* clang-format on */
};
+ const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED;
+
+ if ((cap_get_secbits() & noroot) != noroot)
+ EXPECT_EQ(0, cap_set_secbits(noroot));
cap_p = cap_get_proc();
EXPECT_NE(NULL, cap_p)
@@ -137,6 +89,8 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
}
}
+
+ /* Automatically resets ambient capabilities. */
EXPECT_NE(-1, cap_set_proc(cap_p))
{
TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
@@ -145,6 +99,9 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
{
TH_LOG("Failed to cap_free: %s", strerror(errno));
}
+
+ /* Quickly checks that ambient capabilities are cleared. */
+ EXPECT_NE(-1, cap_get_ambient(caps[0]));
}
/* We cannot put such helpers in a library because of kselftest_harness.h . */
@@ -158,8 +115,9 @@ static void __maybe_unused drop_caps(struct __test_metadata *const _metadata)
_init_caps(_metadata, true);
}
-static void _effective_cap(struct __test_metadata *const _metadata,
- const cap_value_t caps, const cap_flag_value_t value)
+static void _change_cap(struct __test_metadata *const _metadata,
+ const cap_flag_t flag, const cap_value_t cap,
+ const cap_flag_value_t value)
{
cap_t cap_p;
@@ -168,7 +126,7 @@ static void _effective_cap(struct __test_metadata *const _metadata,
{
TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
}
- EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value))
+ EXPECT_NE(-1, cap_set_flag(cap_p, flag, 1, &cap, value))
{
TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
}
@@ -183,15 +141,35 @@ static void _effective_cap(struct __test_metadata *const _metadata,
}
static void __maybe_unused set_cap(struct __test_metadata *const _metadata,
- const cap_value_t caps)
+ const cap_value_t cap)
{
- _effective_cap(_metadata, caps, CAP_SET);
+ _change_cap(_metadata, CAP_EFFECTIVE, cap, CAP_SET);
}
static void __maybe_unused clear_cap(struct __test_metadata *const _metadata,
- const cap_value_t caps)
+ const cap_value_t cap)
+{
+ _change_cap(_metadata, CAP_EFFECTIVE, cap, CAP_CLEAR);
+}
+
+static void __maybe_unused
+set_ambient_cap(struct __test_metadata *const _metadata, const cap_value_t cap)
+{
+ _change_cap(_metadata, CAP_INHERITABLE, cap, CAP_SET);
+
+ EXPECT_NE(-1, cap_set_ambient(cap, CAP_SET))
+ {
+ TH_LOG("Failed to set ambient capability %d: %s", cap,
+ strerror(errno));
+ }
+}
+
+static void __maybe_unused clear_ambient_cap(
+ struct __test_metadata *const _metadata, const cap_value_t cap)
{
- _effective_cap(_metadata, caps, CAP_CLEAR);
+ EXPECT_EQ(1, cap_get_ambient(cap));
+ _change_cap(_metadata, CAP_INHERITABLE, cap, CAP_CLEAR);
+ EXPECT_EQ(0, cap_get_ambient(cap));
}
/* Receives an FD from a UNIX socket. Returns the received FD, or -errno. */
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 50818904397c..9a6036fbf289 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -241,9 +241,11 @@ struct mnt_opt {
const char *const data;
};
-const struct mnt_opt mnt_tmp = {
+#define MNT_TMP_DATA "size=4m,mode=700"
+
+static const struct mnt_opt mnt_tmp = {
.type = "tmpfs",
- .data = "size=4m,mode=700",
+ .data = MNT_TMP_DATA,
};
static int mount_opt(const struct mnt_opt *const mnt, const char *const target)
@@ -283,6 +285,8 @@ static void prepare_layout_opt(struct __test_metadata *const _metadata,
static void prepare_layout(struct __test_metadata *const _metadata)
{
+ _metadata->teardown_parent = true;
+
prepare_layout_opt(_metadata, &mnt_tmp);
}
@@ -1962,7 +1966,7 @@ static void test_execute(struct __test_metadata *const _metadata, const int err,
strerror(errno));
};
ASSERT_EQ(err, errno);
- _exit(_metadata->passed ? 2 : 1);
+ _exit(__test_passed(_metadata) ? 2 : 1);
return;
}
ASSERT_EQ(child, waitpid(child, &status, 0));
@@ -3805,7 +3809,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes)
ASSERT_EQ(0, close(socket_fds[0]));
- _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ _exit(_metadata->exit_code);
return;
}
@@ -3859,9 +3863,7 @@ FIXTURE_SETUP(layout1_bind)
FIXTURE_TEARDOWN(layout1_bind)
{
- set_cap(_metadata, CAP_SYS_ADMIN);
- EXPECT_EQ(0, umount(dir_s2d2));
- clear_cap(_metadata, CAP_SYS_ADMIN);
+ /* umount(dir_s2d2)) is handled by namespace lifetime. */
remove_layout1(_metadata);
@@ -4274,9 +4276,8 @@ FIXTURE_TEARDOWN(layout2_overlay)
EXPECT_EQ(0, remove_path(lower_fl1));
EXPECT_EQ(0, remove_path(lower_do1_fo2));
EXPECT_EQ(0, remove_path(lower_fo1));
- set_cap(_metadata, CAP_SYS_ADMIN);
- EXPECT_EQ(0, umount(LOWER_BASE));
- clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* umount(LOWER_BASE)) is handled by namespace lifetime. */
EXPECT_EQ(0, remove_path(LOWER_BASE));
EXPECT_EQ(0, remove_path(upper_do1_fu3));
@@ -4285,14 +4286,11 @@ FIXTURE_TEARDOWN(layout2_overlay)
EXPECT_EQ(0, remove_path(upper_do1_fo2));
EXPECT_EQ(0, remove_path(upper_fo1));
EXPECT_EQ(0, remove_path(UPPER_WORK "/work"));
- set_cap(_metadata, CAP_SYS_ADMIN);
- EXPECT_EQ(0, umount(UPPER_BASE));
- clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* umount(UPPER_BASE)) is handled by namespace lifetime. */
EXPECT_EQ(0, remove_path(UPPER_BASE));
- set_cap(_metadata, CAP_SYS_ADMIN);
- EXPECT_EQ(0, umount(MERGE_DATA));
- clear_cap(_metadata, CAP_SYS_ADMIN);
+ /* umount(MERGE_DATA)) is handled by namespace lifetime. */
EXPECT_EQ(0, remove_path(MERGE_DATA));
cleanup_layout(_metadata);
@@ -4632,7 +4630,10 @@ FIXTURE_VARIANT(layout3_fs)
/* clang-format off */
FIXTURE_VARIANT_ADD(layout3_fs, tmpfs) {
/* clang-format on */
- .mnt = mnt_tmp,
+ .mnt = {
+ .type = "tmpfs",
+ .data = MNT_TMP_DATA,
+ },
.file_path = file1_s1d1,
};
@@ -4686,6 +4687,8 @@ FIXTURE_SETUP(layout3_fs)
SKIP(return, "this filesystem is not supported (setup)");
}
+ _metadata->teardown_parent = true;
+
slash = strrchr(variant->file_path, '/');
ASSERT_NE(slash, NULL);
dir_len = (size_t)slash - (size_t)variant->file_path;
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index ea5f727dd257..f21cfbbc3638 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -17,6 +17,7 @@
#include <string.h>
#include <sys/prctl.h>
#include <sys/socket.h>
+#include <sys/syscall.h>
#include <sys/un.h>
#include "common.h"
@@ -54,6 +55,11 @@ struct service_fixture {
};
};
+static pid_t sys_gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+
static int set_service(struct service_fixture *const srv,
const struct protocol_variant prot,
const unsigned short index)
@@ -88,7 +94,7 @@ static int set_service(struct service_fixture *const srv,
case AF_UNIX:
srv->unix_addr.sun_family = prot.domain;
sprintf(srv->unix_addr.sun_path,
- "_selftests-landlock-net-tid%d-index%d", gettid(),
+ "_selftests-landlock-net-tid%d-index%d", sys_gettid(),
index);
srv->unix_addr_len = SUN_LEN(&srv->unix_addr);
srv->unix_addr.sun_path[0] = '\0';
@@ -101,8 +107,11 @@ static void setup_loopback(struct __test_metadata *const _metadata)
{
set_cap(_metadata, CAP_SYS_ADMIN);
ASSERT_EQ(0, unshare(CLONE_NEWNET));
- ASSERT_EQ(0, system("ip link set dev lo up"));
clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ set_ambient_cap(_metadata, CAP_NET_ADMIN);
+ ASSERT_EQ(0, system("ip link set dev lo up"));
+ clear_ambient_cap(_metadata, CAP_NET_ADMIN);
}
static bool is_restricted(const struct protocol_variant *const prot,
@@ -530,7 +539,7 @@ static void test_bind_and_connect(struct __test_metadata *const _metadata,
}
EXPECT_EQ(0, close(connect_fd));
- _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ _exit(_metadata->exit_code);
return;
}
@@ -825,7 +834,7 @@ TEST_F(protocol, connect_unspec)
}
EXPECT_EQ(0, close(connect_fd));
- _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ _exit(_metadata->exit_code);
return;
}
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index 55e7871631a1..a19db4d0b3bd 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -314,7 +314,7 @@ TEST_F(hierarchy, trace)
ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
if (variant->domain_both) {
create_domain(_metadata);
- if (!_metadata->passed)
+ if (!__test_passed(_metadata))
/* Aborts before forking. */
return;
}
@@ -375,7 +375,7 @@ TEST_F(hierarchy, trace)
/* Waits for the parent PTRACE_ATTACH test. */
ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
- _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ _exit(_metadata->exit_code);
return;
}
@@ -430,9 +430,10 @@ TEST_F(hierarchy, trace)
/* Signals that the parent PTRACE_ATTACH test is done. */
ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
ASSERT_EQ(child, waitpid(child, &status, 0));
+
if (WIFSIGNALED(status) || !WIFEXITED(status) ||
WEXITSTATUS(status) != EXIT_SUCCESS)
- _metadata->passed = 0;
+ _metadata->exit_code = KSFT_FAIL;
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index aa646e0661f3..da2cade3bab0 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -58,7 +58,8 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
-all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) \
+ $(if $(TEST_GEN_MODS_DIR),gen_mods_dir)
define RUN_TESTS
BASE_DIR="$(selfdir)"; \
@@ -69,11 +70,29 @@ define RUN_TESTS
run_many $(1)
endef
+define INSTALL_INCLUDES
+ $(if $(TEST_INCLUDES), \
+ relative_files=""; \
+ for entry in $(TEST_INCLUDES); do \
+ entry_dir=$$(readlink -e "$$(dirname "$$entry")"); \
+ entry_name=$$(basename "$$entry"); \
+ relative_dir=$${entry_dir#"$$SRC_PATH"/}; \
+ if [ "$$relative_dir" = "$$entry_dir" ]; then \
+ echo "Error: TEST_INCLUDES entry \"$$entry\" not located inside selftests directory ($$SRC_PATH)" >&2; \
+ exit 1; \
+ fi; \
+ relative_files="$$relative_files $$relative_dir/$$entry_name"; \
+ done; \
+ cd $(SRC_PATH) && rsync -aR $$relative_files $(OBJ_PATH)/ \
+ )
+endef
+
run_tests: all
ifdef building_out_of_srctree
- @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \
- rsync -aq --copy-unsafe-links $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \
+ @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)$(TEST_GEN_MODS_DIR)" != "X" ]; then \
+ rsync -aq --copy-unsafe-links $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(TEST_GEN_MODS_DIR) $(OUTPUT); \
fi
+ @$(INSTALL_INCLUDES)
@if [ "X$(TEST_PROGS)" != "X" ]; then \
$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \
$(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \
@@ -84,11 +103,22 @@ else
@$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
endif
+gen_mods_dir:
+ $(Q)$(MAKE) -C $(TEST_GEN_MODS_DIR)
+
+clean_mods_dir:
+ $(Q)$(MAKE) -C $(TEST_GEN_MODS_DIR) clean
+
define INSTALL_SINGLE_RULE
$(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH))
$(if $(INSTALL_LIST),rsync -a --copy-unsafe-links $(INSTALL_LIST) $(INSTALL_PATH)/)
endef
+define INSTALL_MODS_RULE
+ $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)/$(INSTALL_LIST))
+ $(if $(INSTALL_LIST),rsync -a --copy-unsafe-links $(INSTALL_LIST)/*.ko $(INSTALL_PATH)/$(INSTALL_LIST))
+endef
+
define INSTALL_RULE
$(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE)
$(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
@@ -97,12 +127,14 @@ define INSTALL_RULE
$(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE)
$(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
$(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(notdir $(TEST_GEN_MODS_DIR))) $(INSTALL_MODS_RULE)
$(eval INSTALL_LIST = $(wildcard config settings)) $(INSTALL_SINGLE_RULE)
endef
install: all
ifdef INSTALL_PATH
$(INSTALL_RULE)
+ $(INSTALL_INCLUDES)
else
$(error Error: set INSTALL_PATH to use install)
endif
@@ -122,7 +154,7 @@ define CLEAN
$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
endef
-clean:
+clean: $(if $(TEST_GEN_MODS_DIR),clean_mods_dir)
$(CLEAN)
# Enables to extend CFLAGS and LDFLAGS from command line, e.g.
@@ -153,4 +185,4 @@ $(OUTPUT)/%:%.S
$(LINK.S) $^ $(LDLIBS) -o $@
endif
-.PHONY: run_tests all clean install emit_tests
+.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir
diff --git a/tools/testing/selftests/livepatch/.gitignore b/tools/testing/selftests/livepatch/.gitignore
new file mode 100644
index 000000000000..f1e9c2a20e99
--- /dev/null
+++ b/tools/testing/selftests/livepatch/.gitignore
@@ -0,0 +1 @@
+test_klp-call_getpid
diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile
index 02fadc9d55e0..35418a4790be 100644
--- a/tools/testing/selftests/livepatch/Makefile
+++ b/tools/testing/selftests/livepatch/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_FILES := test_klp-call_getpid
+TEST_GEN_MODS_DIR := test_modules
TEST_PROGS_EXTENDED := functions.sh
TEST_PROGS := \
test-livepatch.sh \
@@ -7,7 +9,8 @@ TEST_PROGS := \
test-shadow-vars.sh \
test-state.sh \
test-ftrace.sh \
- test-sysfs.sh
+ test-sysfs.sh \
+ test-syscall.sh
TEST_FILES := settings
diff --git a/tools/testing/selftests/livepatch/README b/tools/testing/selftests/livepatch/README
index 0942dd5826f8..d2035dd64a2b 100644
--- a/tools/testing/selftests/livepatch/README
+++ b/tools/testing/selftests/livepatch/README
@@ -13,23 +13,36 @@ the message buffer for only the duration of each individual test.)
Config
------
-Set these config options and their prerequisites:
+Set CONFIG_LIVEPATCH=y option and it's prerequisites.
-CONFIG_LIVEPATCH=y
-CONFIG_TEST_LIVEPATCH=m
+Building the tests
+------------------
+
+To only build the tests without running them, run:
+
+ % make -C tools/testing/selftests/livepatch
+
+The command above will compile all test modules and test programs, making them
+ready to be packaged if so desired.
Running the tests
-----------------
-Test kernel modules are built as part of lib/ (make modules) and need to
-be installed (make modules_install) as the test scripts will modprobe
-them.
+Test kernel modules are built before running the livepatch selftests. The
+modules are located under test_modules directory, and are built as out-of-tree
+modules. This is specially useful since the same sources can be built and
+tested on systems with different kABI, ensuring they the tests are backwards
+compatible. The modules will be loaded by the test scripts using insmod.
To run the livepatch selftests, from the top of the kernel source tree:
% make -C tools/testing/selftests TARGETS=livepatch run_tests
+or
+
+ % make kselftest TARGETS=livepatch
+
Adding tests
------------
diff --git a/tools/testing/selftests/livepatch/config b/tools/testing/selftests/livepatch/config
index ad23100cb27c..e88bf518a23a 100644
--- a/tools/testing/selftests/livepatch/config
+++ b/tools/testing/selftests/livepatch/config
@@ -1,3 +1,2 @@
CONFIG_LIVEPATCH=y
CONFIG_DYNAMIC_DEBUG=y
-CONFIG_TEST_LIVEPATCH=m
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index c8416c54b463..fc4c6a016d38 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -34,6 +34,18 @@ function is_root() {
fi
}
+# Check if we can compile the modules before loading them
+function has_kdir() {
+ if [ -z "$KDIR" ]; then
+ KDIR="/lib/modules/$(uname -r)/build"
+ fi
+
+ if [ ! -d "$KDIR" ]; then
+ echo "skip all tests: KDIR ($KDIR) not available to compile modules."
+ exit $ksft_skip
+ fi
+}
+
# die(msg) - game over, man
# msg - dying words
function die() {
@@ -42,17 +54,6 @@ function die() {
exit 1
}
-# save existing dmesg so we can detect new content
-function save_dmesg() {
- SAVED_DMESG=$(mktemp --tmpdir -t klp-dmesg-XXXXXX)
- dmesg > "$SAVED_DMESG"
-}
-
-# cleanup temporary dmesg file from save_dmesg()
-function cleanup_dmesg_file() {
- rm -f "$SAVED_DMESG"
-}
-
function push_config() {
DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \
awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}')
@@ -99,7 +100,6 @@ function set_ftrace_enabled() {
function cleanup() {
pop_config
- cleanup_dmesg_file
}
# setup_config - save the current config and set a script exit trap that
@@ -108,6 +108,7 @@ function cleanup() {
# the ftrace_enabled sysctl.
function setup_config() {
is_root
+ has_kdir
push_config
set_dynamic_debug
set_ftrace_enabled 1
@@ -127,16 +128,14 @@ function loop_until() {
done
}
-function assert_mod() {
- local mod="$1"
-
- modprobe --dry-run "$mod" &>/dev/null
-}
-
function is_livepatch_mod() {
local mod="$1"
- if [[ $(modinfo "$mod" | awk '/^livepatch:/{print $NF}') == "Y" ]]; then
+ if [[ ! -f "test_modules/$mod.ko" ]]; then
+ die "Can't find \"test_modules/$mod.ko\", try \"make\""
+ fi
+
+ if [[ $(modinfo "test_modules/$mod.ko" | awk '/^livepatch:/{print $NF}') == "Y" ]]; then
return 0
fi
@@ -146,9 +145,9 @@ function is_livepatch_mod() {
function __load_mod() {
local mod="$1"; shift
- local msg="% modprobe $mod $*"
+ local msg="% insmod test_modules/$mod.ko $*"
log "${msg%% }"
- ret=$(modprobe "$mod" "$@" 2>&1)
+ ret=$(insmod "test_modules/$mod.ko" "$@" 2>&1)
if [[ "$ret" != "" ]]; then
die "$ret"
fi
@@ -161,13 +160,10 @@ function __load_mod() {
# load_mod(modname, params) - load a kernel module
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_mod() {
local mod="$1"; shift
- assert_mod "$mod" ||
- skip "unable to load module ${mod}, verify CONFIG_TEST_LIVEPATCH=m and run self-tests as root"
-
is_livepatch_mod "$mod" &&
die "use load_lp() to load the livepatch module $mod"
@@ -177,13 +173,10 @@ function load_mod() {
# load_lp_nowait(modname, params) - load a kernel module with a livepatch
# but do not wait on until the transition finishes
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_lp_nowait() {
local mod="$1"; shift
- assert_mod "$mod" ||
- skip "unable to load module ${mod}, verify CONFIG_TEST_LIVEPATCH=m and run self-tests as root"
-
is_livepatch_mod "$mod" ||
die "module $mod is not a livepatch"
@@ -196,7 +189,7 @@ function load_lp_nowait() {
# load_lp(modname, params) - load a kernel module with a livepatch
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_lp() {
local mod="$1"; shift
@@ -209,13 +202,13 @@ function load_lp() {
# load_failing_mod(modname, params) - load a kernel module, expect to fail
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_failing_mod() {
local mod="$1"; shift
- local msg="% modprobe $mod $*"
+ local msg="% insmod test_modules/$mod.ko $*"
log "${msg%% }"
- ret=$(modprobe "$mod" "$@" 2>&1)
+ ret=$(insmod "test_modules/$mod.ko" "$@" 2>&1)
if [[ "$ret" == "" ]]; then
die "$mod unexpectedly loaded"
fi
@@ -280,7 +273,15 @@ function set_pre_patch_ret {
function start_test {
local test="$1"
- save_dmesg
+ # Dump something unique into the dmesg log, then stash the entry
+ # in LAST_DMESG. The check_result() function will use it to
+ # find new kernel messages since the test started.
+ local last_dmesg_msg="livepatch kselftest timestamp: $(date --rfc-3339=ns)"
+ log "$last_dmesg_msg"
+ loop_until 'dmesg | grep -q "$last_dmesg_msg"' ||
+ die "buffer busy? can't find canary dmesg message: $last_dmesg_msg"
+ LAST_DMESG=$(dmesg | grep "$last_dmesg_msg")
+
echo -n "TEST: $test ... "
log "===== TEST: $test ====="
}
@@ -291,23 +292,24 @@ function check_result {
local expect="$*"
local result
- # Note: when comparing dmesg output, the kernel log timestamps
- # help differentiate repeated testing runs. Remove them with a
- # post-comparison sed filter.
-
- result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \
+ # Test results include any new dmesg entry since LAST_DMESG, then:
+ # - include lines matching keywords
+ # - exclude lines matching keywords
+ # - filter out dmesg timestamp prefixes
+ result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \
grep -e 'livepatch:' -e 'test_klp' | \
grep -v '\(tainting\|taints\) kernel' | \
sed 's/^\[[ 0-9.]*\] //')
if [[ "$expect" == "$result" ]] ; then
echo "ok"
+ elif [[ "$result" == "" ]] ; then
+ echo -e "not ok\n\nbuffer overrun? can't find canary dmesg entry: $LAST_DMESG\n"
+ die "livepatch kselftest(s) failed"
else
echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n"
die "livepatch kselftest(s) failed"
fi
-
- cleanup_dmesg_file
}
# check_sysfs_rights(modname, rel_path, expected_rights) - check sysfs
diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh
index 90b26dbb2626..32b150e25b10 100755
--- a/tools/testing/selftests/livepatch/test-callbacks.sh
+++ b/tools/testing/selftests/livepatch/test-callbacks.sh
@@ -34,9 +34,9 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET
-check_result "% modprobe $MOD_TARGET
+check_result "% insmod test_modules/$MOD_TARGET.ko
$MOD_TARGET: ${MOD_TARGET}_init
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -81,7 +81,7 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -89,7 +89,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
@@ -129,9 +129,9 @@ unload_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_TARGET
+check_result "% insmod test_modules/$MOD_TARGET.ko
$MOD_TARGET: ${MOD_TARGET}_init
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -177,7 +177,7 @@ unload_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -185,7 +185,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
@@ -219,7 +219,7 @@ load_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -254,9 +254,9 @@ load_mod $MOD_TARGET
load_failing_mod $MOD_LIVEPATCH pre_patch_ret=-19
unload_mod $MOD_TARGET
-check_result "% modprobe $MOD_TARGET
+check_result "% insmod test_modules/$MOD_TARGET.ko
$MOD_TARGET: ${MOD_TARGET}_init
-% modprobe $MOD_LIVEPATCH pre_patch_ret=-19
+% insmod test_modules/$MOD_LIVEPATCH.ko pre_patch_ret=-19
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
test_klp_callbacks_demo: pre_patch_callback: vmlinux
@@ -265,7 +265,7 @@ livepatch: failed to enable patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch
livepatch: '$MOD_LIVEPATCH': completing unpatching transition
livepatch: '$MOD_LIVEPATCH': unpatching complete
-modprobe: ERROR: could not insert '$MOD_LIVEPATCH': No such device
+insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: No such device
% rmmod $MOD_TARGET
$MOD_TARGET: ${MOD_TARGET}_exit"
@@ -295,7 +295,7 @@ load_failing_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -304,12 +304,12 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
% echo -19 > /sys/module/$MOD_LIVEPATCH/parameters/pre_patch_ret
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
livepatch: pre-patch callback failed for object '$MOD_TARGET'
livepatch: patch '$MOD_LIVEPATCH' failed for module '$MOD_TARGET', refusing to load module '$MOD_TARGET'
-modprobe: ERROR: could not insert '$MOD_TARGET': No such device
+insmod: ERROR: could not insert module test_modules/$MOD_TARGET.ko: No such device
% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
@@ -340,11 +340,11 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET_BUSY
-check_result "% modprobe $MOD_TARGET_BUSY block_transition=N
+check_result "% insmod test_modules/$MOD_TARGET_BUSY.ko block_transition=N
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init
$MOD_TARGET_BUSY: busymod_work_func enter
$MOD_TARGET_BUSY: busymod_work_func exit
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -354,7 +354,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
@@ -421,16 +421,16 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET_BUSY
-check_result "% modprobe $MOD_TARGET_BUSY block_transition=Y
+check_result "% insmod test_modules/$MOD_TARGET_BUSY.ko block_transition=Y
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init
$MOD_TARGET_BUSY: busymod_work_func enter
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state
livepatch: '$MOD_LIVEPATCH': starting patching transition
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_TARGET: ${MOD_TARGET}_init
@@ -467,7 +467,7 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -475,7 +475,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_LIVEPATCH2
+% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -523,7 +523,7 @@ disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -531,7 +531,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_LIVEPATCH2 replace=1
+% insmod test_modules/$MOD_LIVEPATCH2.ko replace=1
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh
index 825540a5194d..730218bce99c 100755
--- a/tools/testing/selftests/livepatch/test-ftrace.sh
+++ b/tools/testing/selftests/livepatch/test-ftrace.sh
@@ -35,7 +35,7 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
check_result "livepatch: kernel.ftrace_enabled = 0
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: failed to register ftrace handler for function 'cmdline_proc_show' (-16)
@@ -44,9 +44,9 @@ livepatch: failed to enable patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch
livepatch: '$MOD_LIVEPATCH': completing unpatching transition
livepatch: '$MOD_LIVEPATCH': unpatching complete
-modprobe: ERROR: could not insert '$MOD_LIVEPATCH': Device or resource busy
+insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Device or resource busy
livepatch: kernel.ftrace_enabled = 1
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index 5fe79ac34be1..e3455a6b1158 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -31,7 +31,7 @@ if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]]
die "livepatch kselftest(s) failed"
fi
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
@@ -75,14 +75,14 @@ unload_lp $MOD_LIVEPATCH
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
livepatch: '$MOD_LIVEPATCH': patching complete
$MOD_LIVEPATCH: this has been live patched
-% modprobe $MOD_REPLACE replace=0
+% insmod test_modules/$MOD_REPLACE.ko replace=0
livepatch: enabling patch '$MOD_REPLACE'
livepatch: '$MOD_REPLACE': initializing patching transition
livepatch: '$MOD_REPLACE': starting patching transition
@@ -135,14 +135,14 @@ unload_lp $MOD_REPLACE
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
livepatch: '$MOD_LIVEPATCH': patching complete
$MOD_LIVEPATCH: this has been live patched
-% modprobe $MOD_REPLACE replace=1
+% insmod test_modules/$MOD_REPLACE.ko replace=1
livepatch: enabling patch '$MOD_REPLACE'
livepatch: '$MOD_REPLACE': initializing patching transition
livepatch: '$MOD_REPLACE': starting patching transition
diff --git a/tools/testing/selftests/livepatch/test-shadow-vars.sh b/tools/testing/selftests/livepatch/test-shadow-vars.sh
index e04cb354f56b..1218c155bffe 100755
--- a/tools/testing/selftests/livepatch/test-shadow-vars.sh
+++ b/tools/testing/selftests/livepatch/test-shadow-vars.sh
@@ -16,7 +16,7 @@ start_test "basic shadow variable API"
load_mod $MOD_TEST
unload_mod $MOD_TEST
-check_result "% modprobe $MOD_TEST
+check_result "% insmod test_modules/$MOD_TEST.ko
$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
$MOD_TEST: shadow_ctor: PTR3 -> PTR2
diff --git a/tools/testing/selftests/livepatch/test-state.sh b/tools/testing/selftests/livepatch/test-state.sh
index 38656721c958..10a52ac06185 100755
--- a/tools/testing/selftests/livepatch/test-state.sh
+++ b/tools/testing/selftests/livepatch/test-state.sh
@@ -19,7 +19,7 @@ load_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -51,7 +51,7 @@ unload_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -61,7 +61,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
$MOD_LIVEPATCH: fix_console_loglevel: fixing console_loglevel
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_LIVEPATCH2
+% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -96,7 +96,7 @@ disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH3
-check_result "% modprobe $MOD_LIVEPATCH2
+check_result "% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -106,7 +106,7 @@ livepatch: '$MOD_LIVEPATCH2': completing patching transition
$MOD_LIVEPATCH2: post_patch_callback: vmlinux
$MOD_LIVEPATCH2: fix_console_loglevel: fixing console_loglevel
livepatch: '$MOD_LIVEPATCH2': patching complete
-% modprobe $MOD_LIVEPATCH3
+% insmod test_modules/$MOD_LIVEPATCH3.ko
livepatch: enabling patch '$MOD_LIVEPATCH3'
livepatch: '$MOD_LIVEPATCH3': initializing patching transition
$MOD_LIVEPATCH3: pre_patch_callback: vmlinux
@@ -117,7 +117,7 @@ $MOD_LIVEPATCH3: post_patch_callback: vmlinux
$MOD_LIVEPATCH3: fix_console_loglevel: taking over the console_loglevel change
livepatch: '$MOD_LIVEPATCH3': patching complete
% rmmod $MOD_LIVEPATCH2
-% modprobe $MOD_LIVEPATCH2
+% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -149,7 +149,7 @@ load_failing_mod $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
-check_result "% modprobe $MOD_LIVEPATCH2
+check_result "% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -159,9 +159,9 @@ livepatch: '$MOD_LIVEPATCH2': completing patching transition
$MOD_LIVEPATCH2: post_patch_callback: vmlinux
$MOD_LIVEPATCH2: fix_console_loglevel: fixing console_loglevel
livepatch: '$MOD_LIVEPATCH2': patching complete
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: Livepatch patch ($MOD_LIVEPATCH) is not compatible with the already installed livepatches.
-modprobe: ERROR: could not insert '$MOD_LIVEPATCH': Invalid argument
+insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Invalid parameters
% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled
livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
diff --git a/tools/testing/selftests/livepatch/test-syscall.sh b/tools/testing/selftests/livepatch/test-syscall.sh
new file mode 100755
index 000000000000..b76a881d4013
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test-syscall.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2023 SUSE
+# Author: Marcos Paulo de Souza <mpdesouza@suse.com>
+
+. $(dirname $0)/functions.sh
+
+MOD_SYSCALL=test_klp_syscall
+
+setup_config
+
+# - Start _NRPROC processes calling getpid and load a livepatch to patch the
+# getpid syscall. Check if all the processes transitioned to the livepatched
+# state.
+
+start_test "patch getpid syscall while being heavily hammered"
+
+for i in $(seq 1 $(getconf _NPROCESSORS_ONLN)); do
+ ./test_klp-call_getpid &
+ pids[$i]="$!"
+done
+
+pid_list=$(echo ${pids[@]} | tr ' ' ',')
+load_lp $MOD_SYSCALL klp_pids=$pid_list
+
+# wait for all tasks to transition to patched state
+loop_until 'grep -q '^0$' /sys/kernel/test_klp_syscall/npids'
+
+pending_pids=$(cat /sys/kernel/test_klp_syscall/npids)
+log "$MOD_SYSCALL: Remaining not livepatched processes: $pending_pids"
+
+for pid in ${pids[@]}; do
+ kill $pid || true
+done
+
+disable_lp $MOD_SYSCALL
+unload_lp $MOD_SYSCALL
+
+check_result "% insmod test_modules/$MOD_SYSCALL.ko klp_pids=$pid_list
+livepatch: enabling patch '$MOD_SYSCALL'
+livepatch: '$MOD_SYSCALL': initializing patching transition
+livepatch: '$MOD_SYSCALL': starting patching transition
+livepatch: '$MOD_SYSCALL': completing patching transition
+livepatch: '$MOD_SYSCALL': patching complete
+$MOD_SYSCALL: Remaining not livepatched processes: 0
+% echo 0 > /sys/kernel/livepatch/$MOD_SYSCALL/enabled
+livepatch: '$MOD_SYSCALL': initializing unpatching transition
+livepatch: '$MOD_SYSCALL': starting unpatching transition
+livepatch: '$MOD_SYSCALL': completing unpatching transition
+livepatch: '$MOD_SYSCALL': unpatching complete
+% rmmod $MOD_SYSCALL"
+
+exit 0
diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh
index 7f76f280189a..6c646afa7395 100755
--- a/tools/testing/selftests/livepatch/test-sysfs.sh
+++ b/tools/testing/selftests/livepatch/test-sysfs.sh
@@ -27,7 +27,7 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
@@ -56,7 +56,7 @@ check_sysfs_value "$MOD_LIVEPATCH" "$MOD_TARGET/patched" "0"
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe test_klp_callbacks_demo
+check_result "% insmod test_modules/test_klp_callbacks_demo.ko
livepatch: enabling patch 'test_klp_callbacks_demo'
livepatch: 'test_klp_callbacks_demo': initializing patching transition
test_klp_callbacks_demo: pre_patch_callback: vmlinux
@@ -64,7 +64,7 @@ livepatch: 'test_klp_callbacks_demo': starting patching transition
livepatch: 'test_klp_callbacks_demo': completing patching transition
test_klp_callbacks_demo: post_patch_callback: vmlinux
livepatch: 'test_klp_callbacks_demo': patching complete
-% modprobe test_klp_callbacks_mod
+% insmod test_modules/test_klp_callbacks_mod.ko
livepatch: applying patch 'test_klp_callbacks_demo' to loading module 'test_klp_callbacks_mod'
test_klp_callbacks_demo: pre_patch_callback: test_klp_callbacks_mod -> [MODULE_STATE_COMING] Full formed, running module_init
test_klp_callbacks_demo: post_patch_callback: test_klp_callbacks_mod -> [MODULE_STATE_COMING] Full formed, running module_init
diff --git a/tools/testing/selftests/livepatch/test_klp-call_getpid.c b/tools/testing/selftests/livepatch/test_klp-call_getpid.c
new file mode 100644
index 000000000000..ce321a2d7308
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_klp-call_getpid.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 SUSE
+ * Authors: Libor Pechacek <lpechacek@suse.cz>
+ * Marcos Paulo de Souza <mpdesouza@suse.com>
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <signal.h>
+
+static int stop;
+static int sig_int;
+
+void hup_handler(int signum)
+{
+ stop = 1;
+}
+
+void int_handler(int signum)
+{
+ stop = 1;
+ sig_int = 1;
+}
+
+int main(int argc, char *argv[])
+{
+ long count = 0;
+
+ signal(SIGHUP, &hup_handler);
+ signal(SIGINT, &int_handler);
+
+ while (!stop) {
+ (void)syscall(SYS_getpid);
+ count++;
+ }
+
+ if (sig_int)
+ printf("%ld iterations done\n", count);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/livepatch/test_modules/Makefile b/tools/testing/selftests/livepatch/test_modules/Makefile
new file mode 100644
index 000000000000..e6e638c4bcba
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/Makefile
@@ -0,0 +1,26 @@
+TESTMODS_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+obj-m += test_klp_atomic_replace.o \
+ test_klp_callbacks_busy.o \
+ test_klp_callbacks_demo.o \
+ test_klp_callbacks_demo2.o \
+ test_klp_callbacks_mod.o \
+ test_klp_livepatch.o \
+ test_klp_state.o \
+ test_klp_state2.o \
+ test_klp_state3.o \
+ test_klp_shadow_vars.o \
+ test_klp_syscall.o
+
+# Ensure that KDIR exists, otherwise skip the compilation
+modules:
+ifneq ("$(wildcard $(KDIR))", "")
+ $(Q)$(MAKE) -C $(KDIR) modules KBUILD_EXTMOD=$(TESTMODS_DIR)
+endif
+
+# Ensure that KDIR exists, otherwise skip the clean target
+clean:
+ifneq ("$(wildcard $(KDIR))", "")
+ $(Q)$(MAKE) -C $(KDIR) clean KBUILD_EXTMOD=$(TESTMODS_DIR)
+endif
diff --git a/lib/livepatch/test_klp_atomic_replace.c b/tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c
index 5af7093ca00c..5af7093ca00c 100644
--- a/lib/livepatch/test_klp_atomic_replace.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c
diff --git a/lib/livepatch/test_klp_callbacks_busy.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c
index 133929e0ce8f..133929e0ce8f 100644
--- a/lib/livepatch/test_klp_callbacks_busy.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c
diff --git a/lib/livepatch/test_klp_callbacks_demo.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c
index 3fd8fe1cd1cc..3fd8fe1cd1cc 100644
--- a/lib/livepatch/test_klp_callbacks_demo.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c
diff --git a/lib/livepatch/test_klp_callbacks_demo2.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c
index 5417573e80af..5417573e80af 100644
--- a/lib/livepatch/test_klp_callbacks_demo2.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c
diff --git a/lib/livepatch/test_klp_callbacks_mod.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c
index 8fbe645b1c2c..8fbe645b1c2c 100644
--- a/lib/livepatch/test_klp_callbacks_mod.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c
diff --git a/lib/livepatch/test_klp_livepatch.c b/tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c
index aff08199de71..aff08199de71 100644
--- a/lib/livepatch/test_klp_livepatch.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c
diff --git a/lib/livepatch/test_klp_shadow_vars.c b/tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c
index b99116490858..b99116490858 100644
--- a/lib/livepatch/test_klp_shadow_vars.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c
diff --git a/lib/livepatch/test_klp_state.c b/tools/testing/selftests/livepatch/test_modules/test_klp_state.c
index 57a4253acb01..57a4253acb01 100644
--- a/lib/livepatch/test_klp_state.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_state.c
diff --git a/lib/livepatch/test_klp_state2.c b/tools/testing/selftests/livepatch/test_modules/test_klp_state2.c
index c978ea4d5e67..c978ea4d5e67 100644
--- a/lib/livepatch/test_klp_state2.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_state2.c
diff --git a/lib/livepatch/test_klp_state3.c b/tools/testing/selftests/livepatch/test_modules/test_klp_state3.c
index 9226579d10c5..9226579d10c5 100644
--- a/lib/livepatch/test_klp_state3.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_state3.c
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c b/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c
new file mode 100644
index 000000000000..dd802783ea84
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017-2023 SUSE
+ * Authors: Libor Pechacek <lpechacek@suse.cz>
+ * Nicolai Stange <nstange@suse.de>
+ * Marcos Paulo de Souza <mpdesouza@suse.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/livepatch.h>
+
+#if defined(__x86_64__)
+#define FN_PREFIX __x64_
+#elif defined(__s390x__)
+#define FN_PREFIX __s390x_
+#elif defined(__aarch64__)
+#define FN_PREFIX __arm64_
+#else
+/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER */
+#define FN_PREFIX
+#endif
+
+/* Protects klp_pids */
+static DEFINE_MUTEX(kpid_mutex);
+
+static unsigned int npids, npids_pending;
+static int klp_pids[NR_CPUS];
+module_param_array(klp_pids, int, &npids_pending, 0);
+MODULE_PARM_DESC(klp_pids, "Array of pids to be transitioned to livepatched state.");
+
+static ssize_t npids_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%u\n", npids_pending);
+}
+
+static struct kobj_attribute klp_attr = __ATTR_RO(npids);
+static struct kobject *klp_kobj;
+
+static asmlinkage long lp_sys_getpid(void)
+{
+ int i;
+
+ mutex_lock(&kpid_mutex);
+ if (npids_pending > 0) {
+ for (i = 0; i < npids; i++) {
+ if (current->pid == klp_pids[i]) {
+ klp_pids[i] = 0;
+ npids_pending--;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&kpid_mutex);
+
+ return task_tgid_vnr(current);
+}
+
+static struct klp_func vmlinux_funcs[] = {
+ {
+ .old_name = __stringify(FN_PREFIX) "sys_getpid",
+ .new_func = lp_sys_getpid,
+ }, {}
+};
+
+static struct klp_object objs[] = {
+ {
+ /* name being NULL means vmlinux */
+ .funcs = vmlinux_funcs,
+ }, {}
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+};
+
+static int livepatch_init(void)
+{
+ int ret;
+
+ klp_kobj = kobject_create_and_add("test_klp_syscall", kernel_kobj);
+ if (!klp_kobj)
+ return -ENOMEM;
+
+ ret = sysfs_create_file(klp_kobj, &klp_attr.attr);
+ if (ret) {
+ kobject_put(klp_kobj);
+ return ret;
+ }
+
+ /*
+ * Save the number pids to transition to livepatched state before the
+ * number of pending pids is decremented.
+ */
+ npids = npids_pending;
+
+ return klp_enable_patch(&patch);
+}
+
+static void livepatch_exit(void)
+{
+ kobject_put(klp_kobj);
+}
+
+module_init(livepatch_init);
+module_exit(livepatch_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Libor Pechacek <lpechacek@suse.cz>");
+MODULE_AUTHOR("Nicolai Stange <nstange@suse.de>");
+MODULE_AUTHOR("Marcos Paulo de Souza <mpdesouza@suse.com>");
+MODULE_DESCRIPTION("Livepatch test: syscall transition");
diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c
index 9df29b1e3497..4d5d4cee2586 100644
--- a/tools/testing/selftests/lsm/lsm_list_modules_test.c
+++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c
@@ -122,6 +122,12 @@ TEST(correct_lsm_list_modules)
case LSM_ID_LANDLOCK:
name = "landlock";
break;
+ case LSM_ID_IMA:
+ name = "ima";
+ break;
+ case LSM_ID_EVM:
+ name = "evm";
+ break;
default:
name = "INVALID";
break;
diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index 0899019a7fcb..e14bdd4455f2 100755
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Kselftest framework requirement - SKIP code is 4.
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 20294553a5dd..d2cfc9b494a0 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -138,7 +138,7 @@ FIXTURE_SETUP(hmm)
self->fd = hmm_open(variant->device_number);
if (self->fd < 0 && hmm_is_coherent_type(variant->device_number))
- SKIP(exit(0), "DEVICE_COHERENT not available");
+ SKIP(return, "DEVICE_COHERENT not available");
ASSERT_GE(self->fd, 0);
}
@@ -149,7 +149,7 @@ FIXTURE_SETUP(hmm2)
self->fd0 = hmm_open(variant->device_number0);
if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0))
- SKIP(exit(0), "DEVICE_COHERENT not available");
+ SKIP(return, "DEVICE_COHERENT not available");
ASSERT_GE(self->fd0, 0);
self->fd1 = hmm_open(variant->device_number1);
ASSERT_GE(self->fd1, 0);
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index 380b691d3eb9..b748c48908d9 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -566,7 +566,7 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot,
if (map_ptr_orig == MAP_FAILED)
err(2, "initial mmap");
- if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE))
+ if (madvise(map_ptr, len, MADV_HUGEPAGE))
err(2, "MADV_HUGEPAGE");
pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c
index 193281560b61..86e8f2048a40 100644
--- a/tools/testing/selftests/mm/map_hugetlb.c
+++ b/tools/testing/selftests/mm/map_hugetlb.c
@@ -15,6 +15,7 @@
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
+#include "vm_util.h"
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
@@ -58,10 +59,16 @@ int main(int argc, char **argv)
{
void *addr;
int ret;
+ size_t hugepage_size;
size_t length = LENGTH;
int flags = FLAGS;
int shift = 0;
+ hugepage_size = default_huge_page_size();
+ /* munmap with fail if the length is not page aligned */
+ if (hugepage_size > length)
+ length = hugepage_size;
+
if (argc > 1)
length = atol(argv[1]) << 20;
if (argc > 2) {
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 1d4c1589c305..2f8b991f78cb 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -360,7 +360,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
char pattern_seed)
{
void *addr, *src_addr, *dest_addr, *dest_preamble_addr;
- unsigned long long i;
+ int d;
+ unsigned long long t;
struct timespec t_start = {0, 0}, t_end = {0, 0};
long long start_ns, end_ns, align_mask, ret, offset;
unsigned long long threshold;
@@ -378,8 +379,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
/* Set byte pattern for source block. */
srand(pattern_seed);
- for (i = 0; i < threshold; i++)
- memset((char *) src_addr + i, (char) rand(), 1);
+ for (t = 0; t < threshold; t++)
+ memset((char *) src_addr + t, (char) rand(), 1);
/* Mask to zero out lower bits of address for alignment */
align_mask = ~(c.dest_alignment - 1);
@@ -420,8 +421,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
/* Set byte pattern for the dest preamble block. */
srand(pattern_seed);
- for (i = 0; i < c.dest_preamble_size; i++)
- memset((char *) dest_preamble_addr + i, (char) rand(), 1);
+ for (d = 0; d < c.dest_preamble_size; d++)
+ memset((char *) dest_preamble_addr + d, (char) rand(), 1);
}
clock_gettime(CLOCK_MONOTONIC, &t_start);
@@ -437,14 +438,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
/* Verify byte pattern after remapping */
srand(pattern_seed);
- for (i = 0; i < threshold; i++) {
+ for (t = 0; t < threshold; t++) {
char c = (char) rand();
- if (((char *) dest_addr)[i] != c) {
+ if (((char *) dest_addr)[t] != c) {
ksft_print_msg("Data after remap doesn't match at offset %llu\n",
- i);
+ t);
ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
- ((char *) dest_addr)[i] & 0xff);
+ ((char *) dest_addr)[t] & 0xff);
ret = -1;
goto clean_up_dest;
}
@@ -453,14 +454,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
/* Verify the dest preamble byte pattern after remapping */
if (c.dest_preamble_size) {
srand(pattern_seed);
- for (i = 0; i < c.dest_preamble_size; i++) {
+ for (d = 0; d < c.dest_preamble_size; d++) {
char c = (char) rand();
- if (((char *) dest_preamble_addr)[i] != c) {
+ if (((char *) dest_preamble_addr)[d] != c) {
ksft_print_msg("Preamble data after remap doesn't match at offset %d\n",
- i);
+ d);
ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
- ((char *) dest_preamble_addr)[i] & 0xff);
+ ((char *) dest_preamble_addr)[d] & 0xff);
ret = -1;
goto clean_up_dest;
}
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index cce90a10515a..2b9f8cc52639 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1517,6 +1517,12 @@ int main(int argc, char *argv[])
continue;
uffd_test_start("%s on %s", test->name, mem_type->name);
+ if ((mem_type->mem_flag == MEM_HUGETLB ||
+ mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
+ (default_huge_page_size() == 0)) {
+ uffd_test_skip("huge page size is 0, feature missing?");
+ continue;
+ }
if (!uffd_feature_supported(test)) {
uffd_test_skip("feature missing");
continue;
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index 45cae7cab27e..a0a75f302904 100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -29,9 +29,15 @@ check_supported_x86_64()
# See man 1 gzip under '-f'.
local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
+ local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;}
+ else {print 1}; exit}' /proc/cpuinfo 2>/dev/null)
+
if [[ "${pg_table_levels}" -lt 5 ]]; then
echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
exit $ksft_skip
+ elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then
+ echo "$0: CPU does not have the necessary la57 flag to support page table level 5"
+ exit $ksft_skip
fi
}
diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh
index 70a02301f4c2..3d2d2eb9d6ff 100755
--- a/tools/testing/selftests/mm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
set -e
diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
index 50ed5d475dd1..bcf51d785a37 100644
--- a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
+++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
@@ -218,7 +218,7 @@ static bool move_mount_set_group_supported(void)
if (mount(NULL, SET_GROUP_FROM, NULL, MS_SHARED, 0))
return -1;
- ret = syscall(SYS_move_mount, AT_FDCWD, SET_GROUP_FROM,
+ ret = syscall(__NR_move_mount, AT_FDCWD, SET_GROUP_FROM,
AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP);
umount2("/tmp", MNT_DETACH);
@@ -363,7 +363,7 @@ TEST_F(move_mount_set_group, complex_sharing_copying)
CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
ASSERT_EQ(wait_for_pid(pid), 0);
- ASSERT_EQ(syscall(SYS_move_mount, ca_from.mntfd, "",
+ ASSERT_EQ(syscall(__NR_move_mount, ca_from.mntfd, "",
ca_to.mntfd, "", MOVE_MOUNT_SET_GROUP
| MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH),
0);
diff --git a/tools/testing/selftests/mqueue/setting b/tools/testing/selftests/mqueue/setting
new file mode 100644
index 000000000000..a953c96aa16e
--- /dev/null
+++ b/tools/testing/selftests/mqueue/setting
@@ -0,0 +1 @@
+timeout=180
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 50818075e566..7b6918d5f4af 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -53,8 +53,7 @@ TEST_PROGS += bind_bhash.sh
TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
-TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
-TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh
+TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
@@ -84,6 +83,7 @@ TEST_PROGS += sctp_vrf.sh
TEST_GEN_FILES += sctp_hello
TEST_GEN_FILES += csum
TEST_GEN_FILES += nat6to4.o
+TEST_GEN_FILES += xdp_dummy.o
TEST_GEN_FILES += ip_local_port_range
TEST_GEN_FILES += bind_wildcard
TEST_PROGS += test_vxlan_mdb.sh
@@ -95,6 +95,9 @@ TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
TEST_FILES := settings
+TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+
+TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
@@ -104,7 +107,7 @@ $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
$(OUTPUT)/bind_bhash: LDLIBS += -lpthread
$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
-# Rules to generate bpf obj nat6to4.o
+# Rules to generate bpf objs
CLANG ?= clang
SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
@@ -139,7 +142,7 @@ endif
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
-$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS)
+$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS)
$(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh
index cde9a91c4797..2db9d15cd45f 100755
--- a/tools/testing/selftests/net/big_tcp.sh
+++ b/tools/testing/selftests/net/big_tcp.sh
@@ -122,7 +122,9 @@ do_netperf() {
local netns=$1
[ "$NF" = "6" ] && serip=$SERVER_IP6
- ip net exec $netns netperf -$NF -t TCP_STREAM -H $serip 2>&1 >/dev/null
+
+ # use large write to be sure to generate big tcp packets
+ ip net exec $netns netperf -$NF -t TCP_STREAM -l 1 -H $serip -- -m 262144 2>&1 >/dev/null
}
do_test() {
diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
index f30bd57d5e38..8bc23fb4c82b 100755
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ b/tools/testing/selftests/net/cmsg_ipv6.sh
@@ -89,7 +89,7 @@ for ovr in setsock cmsg both diff; do
check_result $? 0 "TCLASS $prot $ovr - pass"
while [ -d /proc/$BG ]; do
- $NSEXE ./cmsg_sender -6 -p u $TGT6 1234
+ $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
done
tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null
@@ -126,7 +126,7 @@ for ovr in setsock cmsg both diff; do
check_result $? 0 "HOPLIMIT $prot $ovr - pass"
while [ -d /proc/$BG ]; do
- $NSEXE ./cmsg_sender -6 -p u $TGT6 1234
+ $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
done
tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 8da562a9ae87..5e4390cac17e 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,5 +1,6 @@
CONFIG_USER_NS=y
CONFIG_NET_NS=y
+CONFIG_BONDING=m
CONFIG_BPF_SYSCALL=y
CONFIG_TEST_BPF=m
CONFIG_NUMA=y
@@ -14,30 +15,74 @@ CONFIG_VETH=y
CONFIG_NET_IPVTI=y
CONFIG_IPV6_VTI=y
CONFIG_DUMMY=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_BRIDGE=y
+CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_VLAN_8021Q=y
+CONFIG_GENEVE=m
CONFIG_IFB=y
+CONFIG_INET_DIAG=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
+CONFIG_NET_FOU=y
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_IP_GRE=m
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
+CONFIG_IPV6_SIT=y
+CONFIG_IP_DCCP=m
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_RAW=m
CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_L2TP_ETH=m
+CONFIG_L2TP_IP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_V3=y
+CONFIG_MACSEC=m
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MPLS=y
+CONFIG_MPTCP=y
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_NAT=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPIP=y
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_NETEM=y
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_PSAMPLE=m
+CONFIG_TCP_MD5SIG=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_KALLSYMS=y
+CONFIG_TLS=m
CONFIG_TRACEPOINTS=y
CONFIG_NET_DROP_MONITOR=m
CONFIG_NETDEVSIM=m
-CONFIG_NET_FOU=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_SCH_INGRESS=m
@@ -48,7 +93,10 @@ CONFIG_BAREUDP=m
CONFIG_IPV6_IOAM6_LWTUNNEL=y
CONFIG_CRYPTO_SM4_GENERIC=y
CONFIG_AMT=m
+CONFIG_TUN=y
CONFIG_VXLAN=m
CONFIG_IP_SCTP=m
CONFIG_NETFILTER_XT_MATCH_POLICY=m
CONFIG_CRYPTO_ARIA=y
+CONFIG_XFRM_INTERFACE=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 0d4f252427e2..386ebd829df5 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -38,6 +38,9 @@
# server / client nomenclature relative to ns-A
source lib.sh
+
+PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH
+
VERBOSE=0
NSA_DEV=eth1
@@ -97,6 +100,7 @@ log_test()
local rc=$1
local expected=$2
local msg="$3"
+ local ans
[ "${VERBOSE}" = "1" ] && echo
@@ -106,19 +110,20 @@ log_test()
else
nfail=$((nfail+1))
printf "TEST: %-70s [FAIL]\n" "${msg}"
+ echo " expected rc $expected; actual rc $rc"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
- read a
- [ "$a" = "q" ] && exit 1
+ read ans
+ [ "$ans" = "q" ] && exit 1
fi
fi
if [ "${PAUSE}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
- read a
- [ "$a" = "q" ] && exit 1
+ read ans
+ [ "$ans" = "q" ] && exit 1
fi
kill_procs
@@ -187,6 +192,15 @@ kill_procs()
sleep 1
}
+set_ping_group()
+{
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'"
+ fi
+
+ ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'
+}
+
do_run_cmd()
{
local cmd="$*"
@@ -835,14 +849,14 @@ ipv4_ping()
set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
ipv4_ping_novrf
setup
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv4_ping_novrf
log_subsection "With VRF"
setup "yes"
ipv4_ping_vrf
setup "yes"
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv4_ping_vrf
}
@@ -2053,12 +2067,12 @@ ipv4_addr_bind()
log_subsection "No VRF"
setup
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv4_addr_bind_novrf
log_subsection "With VRF"
setup "yes"
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv4_addr_bind_vrf
}
@@ -2521,14 +2535,14 @@ ipv6_ping()
setup
ipv6_ping_novrf
setup
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv6_ping_novrf
log_subsection "With VRF"
setup "yes"
ipv6_ping_vrf
setup "yes"
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv6_ping_vrf
}
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index d5a281aadbac..ac0b2c6a5761 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -2066,6 +2066,12 @@ basic()
run_cmd "$IP nexthop get id 1"
log_test $? 2 "Nexthop get on non-existent id"
+ run_cmd "$IP nexthop del id 1"
+ log_test $? 2 "Nexthop del with non-existent id"
+
+ run_cmd "$IP nexthop del id 1 group 1/2/3/4/5/6/7/8"
+ log_test $? 2 "Nexthop del with non-existent id and extra attributes"
+
# attempt to create nh without a device or gw - fails
run_cmd "$IP nexthop add id 1"
log_test $? 2 "Nexthop with no device or gateway"
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index b3ecccbbfcd2..73895711cdf4 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -743,6 +743,43 @@ fib_notify_test()
cleanup &> /dev/null
}
+# Create a new dummy_10 to remove all associated routes.
+reset_dummy_10()
+{
+ $IP link del dev dummy_10
+
+ $IP link add dummy_10 type dummy
+ $IP link set dev dummy_10 up
+ $IP -6 address add 2001:10::1/64 dev dummy_10
+}
+
+check_rt_num()
+{
+ local expected=$1
+ local num=$2
+
+ if [ $num -ne $expected ]; then
+ echo "FAIL: Expected $expected routes, got $num"
+ ret=1
+ else
+ ret=0
+ fi
+}
+
+check_rt_num_clean()
+{
+ local expected=$1
+ local num=$2
+
+ if [ $num -ne $expected ]; then
+ log_test 1 0 "expected $expected routes, got $num"
+ set +e
+ cleanup &> /dev/null
+ return 1
+ fi
+ return 0
+}
+
fib6_gc_test()
{
setup
@@ -751,7 +788,8 @@ fib6_gc_test()
echo "Fib6 garbage collection test"
set -e
- EXPIRE=3
+ EXPIRE=5
+ GC_WAIT_TIME=$((EXPIRE * 2 + 2))
# Check expiration of routes every $EXPIRE seconds (GC)
$NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE
@@ -763,44 +801,110 @@ fib6_gc_test()
$NS_EXEC sysctl -wq net.ipv6.route.flush=1
# Temporary routes
- for i in $(seq 1 1000); do
+ for i in $(seq 1 5); do
# Expire route after $EXPIRE seconds
$IP -6 route add 2001:20::$i \
via 2001:10::2 dev dummy_10 expires $EXPIRE
done
- sleep $(($EXPIRE * 2))
- N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l)
- if [ $N_EXP_SLEEP -ne 0 ]; then
- echo "FAIL: expected 0 routes with expires, got $N_EXP_SLEEP"
- ret=1
- else
- ret=0
- fi
+ sleep $GC_WAIT_TIME
+ $NS_EXEC sysctl -wq net.ipv6.route.flush=1
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection"
+
+ reset_dummy_10
# Permanent routes
- for i in $(seq 1 5000); do
+ for i in $(seq 1 5); do
$IP -6 route add 2001:30::$i \
via 2001:10::2 dev dummy_10
done
# Temporary routes
- for i in $(seq 1 1000); do
+ for i in $(seq 1 5); do
# Expire route after $EXPIRE seconds
$IP -6 route add 2001:20::$i \
via 2001:10::2 dev dummy_10 expires $EXPIRE
done
- sleep $(($EXPIRE * 2))
- N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l)
- if [ $N_EXP_SLEEP -ne 0 ]; then
- echo "FAIL: expected 0 routes with expires," \
- "got $N_EXP_SLEEP (5000 permanent routes)"
- ret=1
- else
- ret=0
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (with permanent routes)"
+
+ reset_dummy_10
+
+ # Permanent routes
+ for i in $(seq 1 5); do
+ $IP -6 route add 2001:20::$i \
+ via 2001:10::2 dev dummy_10
+ done
+ # Replace with temporary routes
+ for i in $(seq 1 5); do
+ # Expire route after $EXPIRE seconds
+ $IP -6 route replace 2001:20::$i \
+ via 2001:10::2 dev dummy_10 expires $EXPIRE
+ done
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (replace with expires)"
+
+ reset_dummy_10
+
+ # Temporary routes
+ for i in $(seq 1 5); do
+ # Expire route after $EXPIRE seconds
+ $IP -6 route add 2001:20::$i \
+ via 2001:10::2 dev dummy_10 expires $EXPIRE
+ done
+ # Replace with permanent routes
+ for i in $(seq 1 5); do
+ $IP -6 route replace 2001:20::$i \
+ via 2001:10::2 dev dummy_10
+ done
+ check_rt_num_clean 0 $($IP -6 route list |grep expires|wc -l) || return
+
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (replace with permanent)"
+
+ # ra6 is required for the next test. (ipv6toolkit)
+ if [ ! -x "$(command -v ra6)" ]; then
+ echo "SKIP: ra6 not found."
+ set +e
+ cleanup &> /dev/null
+ return
fi
- set +e
+ # Delete dummy_10 and remove all routes
+ $IP link del dev dummy_10
- log_test $ret 0 "ipv6 route garbage collection"
+ # Create a pair of veth devices to send a RA message from one
+ # device to another.
+ $IP link add veth1 type veth peer name veth2
+ $IP link set dev veth1 up
+ $IP link set dev veth2 up
+ $IP -6 address add 2001:10::1/64 dev veth1 nodad
+ $IP -6 address add 2001:10::2/64 dev veth2 nodad
+
+ # Make veth1 ready to receive RA messages.
+ $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2
+
+ # Send a RA message with a route from veth2 to veth1.
+ $NS_EXEC ra6 -i veth2 -d 2001:10::1 -t $EXPIRE
+
+ # Wait for the RA message.
+ sleep 1
+
+ # systemd may mess up the test. You syould make sure that
+ # systemd-networkd.service and systemd-networkd.socket are stopped.
+ check_rt_num_clean 1 $($IP -6 route list|grep expires|wc -l) || return
+
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (RA message)"
+
+ set +e
cleanup &> /dev/null
}
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 452693514be4..535865b3d1d6 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -112,7 +112,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
vxlan_symmetric_ipv6.sh \
vxlan_symmetric.sh
-TEST_PROGS_EXTENDED := devlink_lib.sh \
+TEST_FILES := devlink_lib.sh \
ethtool_lib.sh \
fib_offload_lib.sh \
forwarding.config.sample \
@@ -123,10 +123,14 @@ TEST_PROGS_EXTENDED := devlink_lib.sh \
mirror_gre_topo_lib.sh \
mirror_lib.sh \
mirror_topo_lib.sh \
+ router_mpath_nh_lib.sh \
sch_ets_core.sh \
sch_ets_tests.sh \
sch_tbf_core.sh \
sch_tbf_etsprio.sh \
tc_common.sh
+TEST_INCLUDES := \
+ ../lib.sh
+
include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
index 9af9f6964808..c62331b2e006 100755
--- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
@@ -327,10 +327,10 @@ locked_port_mab_redirect()
RET=0
check_port_mab_support || return 0
- bridge link set dev $swp1 learning on locked on mab on
tc qdisc add dev $swp1 clsact
tc filter add dev $swp1 ingress protocol all pref 1 handle 101 flower \
action mirred egress redirect dev $swp2
+ bridge link set dev $swp1 learning on locked on mab on
ping_do $h1 192.0.2.2
check_err $? "Ping did not work with redirection"
@@ -349,8 +349,8 @@ locked_port_mab_redirect()
check_err $? "Locked entry not created after deleting filter"
bridge fdb del `mac_get $h1` vlan 1 dev $swp1 master
- tc qdisc del dev $swp1 clsact
bridge link set dev $swp1 learning off locked off mab off
+ tc qdisc del dev $swp1 clsact
log_test "Locked port MAB redirect"
}
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index 61348f71728c..d9d587454d20 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -329,7 +329,7 @@ __cfg_test_port_ip_star_g()
bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
check_err $? "(*, G) \"permanent\" entry has a pending group timer"
- bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
check_err $? "\"permanent\" source entry has a pending source timer"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -346,7 +346,7 @@ __cfg_test_port_ip_star_g()
bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer"
- bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
check_err $? "\"blocked\" source entry has a pending source timer"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -363,7 +363,7 @@ __cfg_test_port_ip_star_g()
bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
check_err $? "(*, G) INCLUDE entry has a pending group timer"
- bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
check_fail $? "Source entry does not have a pending source timer"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -1252,14 +1252,17 @@ fwd_test()
echo
log_info "# Forwarding tests"
+ # Set the Max Response Delay to 100 centiseconds (1 second) so that the
+ # bridge will start forwarding according to its MDB soon after a
+ # multicast querier is enabled.
+ ip link set dev br0 type bridge mcast_query_response_interval 100
+
# Forwarding according to MDB entries only takes place when the bridge
# detects that there is a valid querier in the network. Set the bridge
# as the querier and assign it a valid IPv6 link-local address to be
# used as the source address for MLD queries.
ip -6 address add fe80::1/64 nodad dev br0
ip link set dev br0 type bridge mcast_querier 1
- # Wait the default Query Response Interval (10 seconds) for the bridge
- # to determine that there are no other queriers in the network.
sleep 10
fwd_test_host
@@ -1267,6 +1270,7 @@ fwd_test()
ip link set dev br0 type bridge mcast_querier 0
ip -6 address del fe80::1/64 dev br0
+ ip link set dev br0 type bridge mcast_query_response_interval 1000
}
ctrl_igmpv3_is_in_test()
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index 697994a9278b..8d7a1a004b7c 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -6,14 +6,49 @@ CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_VRF=m
CONFIG_BPF_SYSCALL=y
CONFIG_CGROUP_BPF=y
+CONFIG_DUMMY=m
+CONFIG_IPV6=y
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_MACVLAN=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_MPLS=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_SAMPLE=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_VLAN=m
CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_SCH_ETS=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_ACT_GACT=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_TEAM=y
+CONFIG_NET_TEAM_MODE_LOADBALANCE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_TABLES=m
CONFIG_VETH=m
CONFIG_NAMESPACES=y
CONFIG_NET_NS=y
+CONFIG_VXLAN=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
index 56eb83d1a3bd..1783c10215e5 100755
--- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -183,42 +183,42 @@ send_src_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_src_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
send_src_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_flowlabel()
@@ -234,14 +234,14 @@ send_src_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:4::2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:4::2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
custom_hash_test()
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index 4a546509de90..1fc4f0242fc5 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -28,6 +28,8 @@ PING=ping
PING6=ping6
# Packet generator. Some distributions use 'mz'.
MZ=mausezahn
+# mausezahn delay between transmissions in microseconds.
+MZ_DELAY=0
# Time to wait after interfaces participating in the test are all UP
WAIT_TIME=5
# Whether to pause on failure or not.
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
index 0446db9c6f74..9788bd0f6e8b 100755
--- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -278,42 +278,42 @@ send_src_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_src_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
send_src_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_flowlabel()
@@ -329,14 +329,14 @@ send_src_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
custom_hash_test()
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
index e4009f658003..efca6114a3ce 100755
--- a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
@@ -267,7 +267,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
index e449475c4d3e..a71ad39fc0c3 100755
--- a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
@@ -266,9 +266,9 @@ multipath6_test()
local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
ip vrf exec v$h1 \
- $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
- -B "2001:db8:2::2-2001:db8:2::1e" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \
+ -B "2001:db8:2::2-2001:db8:2::3e" \
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
index a8d8e8b3dc81..57531c1d884d 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh
@@ -220,7 +220,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
index d03aa2cab9fd..7d5b2b9cc133 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
@@ -64,7 +64,6 @@ ALL_TESTS="
ping_ipv6
multipath_ipv4
multipath_ipv6
- multipath_ipv6_l4
"
NUM_NETIFS=6
@@ -245,7 +244,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
@@ -264,34 +263,6 @@ multipath6_test()
local weight1=$1; shift
local weight2=$1; shift
- sysctl_set net.ipv6.fib_multipath_hash_policy 0
- ip nexthop replace id 103 group 101,$weight1/102,$weight2
-
- local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
- local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
-
- # Generate 16384 echo requests, each with a random flow label.
- for ((i=0; i < 16384; ++i)); do
- ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
- done
-
- local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
- local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
-
- local d111=$((t1_111 - t0_111))
- local d222=$((t1_222 - t0_222))
- multipath_eval "$what" $weight1 $weight2 $d111 $d222
-
- ip nexthop replace id 103 group 101/102
- sysctl_restore net.ipv6.fib_multipath_hash_policy
-}
-
-multipath6_l4_test()
-{
- local what=$1; shift
- local weight1=$1; shift
- local weight2=$1; shift
-
sysctl_set net.ipv6.fib_multipath_hash_policy 1
ip nexthop replace id 103 group 101,$weight1/102,$weight2
@@ -300,7 +271,7 @@ multipath6_l4_test()
ip vrf exec v$h1 \
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
@@ -339,14 +310,6 @@ multipath_ipv6()
multipath6_test "Weighted MP 11:45" 11 45
}
-multipath_ipv6_l4()
-{
- log_info "Running IPv6 L4 hash multipath tests"
- multipath6_l4_test "ECMP" 1 1
- multipath6_l4_test "Weighted MP 2:1" 2 1
- multipath6_l4_test "Weighted MP 11:45" 11 45
-}
-
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
index 088b65e64d66..370f9925302d 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
@@ -64,7 +64,6 @@ ALL_TESTS="
ping_ipv6
multipath_ipv4
multipath_ipv6
- multipath_ipv6_l4
"
NUM_NETIFS=6
@@ -248,7 +247,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
@@ -267,35 +266,6 @@ multipath6_test()
local weight1=$1; shift
local weight2=$1; shift
- sysctl_set net.ipv6.fib_multipath_hash_policy 0
- ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
- type resilient
-
- local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
- local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
-
- # Generate 16384 echo requests, each with a random flow label.
- for ((i=0; i < 16384; ++i)); do
- ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
- done
-
- local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
- local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
-
- local d111=$((t1_111 - t0_111))
- local d222=$((t1_222 - t0_222))
- multipath_eval "$what" $weight1 $weight2 $d111 $d222
-
- ip nexthop replace id 103 group 101/102 type resilient
- sysctl_restore net.ipv6.fib_multipath_hash_policy
-}
-
-multipath6_l4_test()
-{
- local what=$1; shift
- local weight1=$1; shift
- local weight2=$1; shift
-
sysctl_set net.ipv6.fib_multipath_hash_policy 1
ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
type resilient
@@ -305,7 +275,7 @@ multipath6_l4_test()
ip vrf exec v$h1 \
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
@@ -344,14 +314,6 @@ multipath_ipv6()
multipath6_test "Weighted MP 11:45" 11 45
}
-multipath_ipv6_l4()
-{
- log_info "Running IPv6 L4 hash multipath tests"
- multipath6_l4_test "ECMP" 1 1
- multipath6_l4_test "Weighted MP 2:1" 2 1
- multipath6_l4_test "Weighted MP 11:45" 11 45
-}
-
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
index d40183b4eccc..2ab9eaaa5532 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -280,42 +280,42 @@ send_src_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_src_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
send_src_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_flowlabel()
@@ -331,14 +331,14 @@ send_src_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
custom_hash_test()
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
index a257979d3fc5..32d1461f37b7 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
@@ -266,7 +266,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
index d208f5243ade..e1a4b50505f5 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
@@ -265,9 +265,9 @@ multipath6_test()
local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
ip vrf exec v$h1 \
- $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
- -B "2001:db8:2::2-2001:db8:2::1e" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \
+ -B "2001:db8:2::2-2001:db8:2::3e" \
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
index 58a3597037b1..24f4ab328bd2 100644
--- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
@@ -356,7 +356,7 @@ test_traffic_ip4ip6()
flower $TC_FLAG dst_ip 203.0.113.1 action pass
$MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \
- -B 203.0.113.1 -t ip -q -d 1msec
+ -B 203.0.113.1 -t ip -q -d $MZ_DELAY
# Check ports after encap and after decap.
tc_check_at_least_x_packets "dev $ul1 egress" 101 1000
@@ -389,7 +389,7 @@ test_traffic_ip6ip6()
flower $TC_FLAG dst_ip 2001:db8:2::1 action pass
$MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \
- -B 2001:db8:2::1 -t ip -q -d 1msec
+ -B 2001:db8:2::1 -t ip -q -d $MZ_DELAY
# Check ports after encap and after decap.
tc_check_at_least_x_packets "dev $ul1 egress" 101 1000
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 8a61464ab6eb..e579c2e0c462 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -8,6 +8,7 @@
PING=${PING:=ping}
PING6=${PING6:=ping6}
MZ=${MZ:=mausezahn}
+MZ_DELAY=${MZ_DELAY:=0}
ARPING=${ARPING:=arping}
TEAMD=${TEAMD:=teamd}
WAIT_TIME=${WAIT_TIME:=5}
@@ -29,23 +30,20 @@ STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
TROUTE6=${TROUTE6:=traceroute6}
-relative_path="${BASH_SOURCE%/*}"
-if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
- relative_path="."
-fi
+net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-if [[ -f $relative_path/forwarding.config ]]; then
- source "$relative_path/forwarding.config"
+if [[ -f $net_forwarding_dir/forwarding.config ]]; then
+ source "$net_forwarding_dir/forwarding.config"
fi
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source "$net_forwarding_dir/../lib.sh"
-busywait()
+# timeout in seconds
+slowwait()
{
local timeout=$1; shift
- local start_time="$(date -u +%s%3N)"
+ local start_time="$(date -u +%s)"
while true
do
local out
@@ -56,11 +54,13 @@ busywait()
return 0
fi
- local current_time="$(date -u +%s%3N)"
+ local current_time="$(date -u +%s)"
if ((current_time - start_time > timeout)); then
echo -n "$out"
return 1
fi
+
+ sleep 0.1
done
}
@@ -505,6 +505,15 @@ busywait_for_counter()
busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
}
+slowwait_for_counter()
+{
+ local timeout=$1; shift
+ local delta=$1; shift
+
+ local base=$("$@")
+ slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@"
+}
+
setup_wait_dev()
{
local dev=$1; shift
@@ -891,6 +900,33 @@ hw_stats_get()
jq ".[0].stats64.$dir.$stat"
}
+__nh_stats_get()
+{
+ local key=$1; shift
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ ip -j -s -s nexthop show id $group_id |
+ jq --argjson member_id "$member_id" --arg key "$key" \
+ '.[].group_stats[] | select(.id == $member_id) | .[$key]'
+}
+
+nh_stats_get()
+{
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ __nh_stats_get packets "$group_id" "$member_id"
+}
+
+nh_stats_get_hw()
+{
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ __nh_stats_get packets_hw "$group_id" "$member_id"
+}
+
humanize()
{
local speed=$1; shift
@@ -2001,3 +2037,10 @@ bail_on_lldpad()
fi
fi
}
+
+absval()
+{
+ local v=$1; shift
+
+ echo $((v > 0 ? v : -v))
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
index fac486178ef7..0c36546e131e 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-source "$relative_path/mirror_lib.sh"
+source "$net_forwarding_dir/mirror_lib.sh"
quick_test_span_gre_dir_ips()
{
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
index 39c03e2867f4..6e615fffa4ef 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -33,7 +33,7 @@
# | |
# +-------------------------------------------------------------------------+
-source "$relative_path/mirror_topo_lib.sh"
+source "$net_forwarding_dir/mirror_topo_lib.sh"
mirror_gre_topo_h3_create()
{
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index a0d612e04990..3f0f5dc95542 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -7,9 +7,12 @@ ALL_TESTS="
multipath_test
ping_ipv4_blackhole
ping_ipv6_blackhole
+ nh_stats_test_v4
+ nh_stats_test_v6
"
NUM_NETIFS=8
source lib.sh
+source router_mpath_nh_lib.sh
h1_create()
{
@@ -204,7 +207,7 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -218,7 +221,7 @@ multipath4_test()
sysctl_restore net.ipv4.fib_multipath_hash_policy
}
-multipath6_l4_test()
+multipath6_test()
{
local desc="$1"
local weight_rp12=$2
@@ -237,7 +240,7 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -251,34 +254,6 @@ multipath6_l4_test()
sysctl_restore net.ipv6.fib_multipath_hash_policy
}
-multipath6_test()
-{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
- local t0_rp12 t0_rp13 t1_rp12 t1_rp13
- local packets_rp12 packets_rp13
-
- ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13
-
- t0_rp12=$(link_stats_tx_packets_get $rp12)
- t0_rp13=$(link_stats_tx_packets_get $rp13)
-
- # Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
-
- t1_rp12=$(link_stats_tx_packets_get $rp12)
- t1_rp13=$(link_stats_tx_packets_get $rp13)
-
- let "packets_rp12 = $t1_rp12 - $t0_rp12"
- let "packets_rp13 = $t1_rp13 - $t0_rp13"
- multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
-
- ip nexthop replace id 106 group 104/105
-}
-
multipath_test()
{
log_info "Running IPv4 multipath tests"
@@ -301,11 +276,6 @@ multipath_test()
multipath6_test "ECMP" 1 1
multipath6_test "Weighted MP 2:1" 2 1
multipath6_test "Weighted MP 11:45" 11 45
-
- log_info "Running IPv6 L4 hash multipath tests"
- multipath6_l4_test "ECMP" 1 1
- multipath6_l4_test "Weighted MP 2:1" 2 1
- multipath6_l4_test "Weighted MP 11:45" 11 45
}
ping_ipv4_blackhole()
@@ -358,6 +328,16 @@ ping_ipv6_blackhole()
ip -6 nexthop del id 1001
}
+nh_stats_test_v4()
+{
+ __nh_stats_test_v4 mpath
+}
+
+nh_stats_test_v6()
+{
+ __nh_stats_test_v6 mpath
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
new file mode 100644
index 000000000000..7e7d62161c34
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: GPL-2.0
+
+nh_stats_do_test()
+{
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local stats_get=$1; shift
+ local mz="$@"
+
+ local dp
+
+ RET=0
+
+ sleep 2
+ for ((dp=0; dp < 60000; dp += 10000)); do
+ local dd
+ local t0_rp12=$(link_stats_tx_packets_get $rp12)
+ local t0_rp13=$(link_stats_tx_packets_get $rp13)
+ local t0_nh1=$($stats_get $group_id $nh1_id)
+ local t0_nh2=$($stats_get $group_id $nh2_id)
+
+ ip vrf exec vrf-h1 \
+ $mz -q -p 64 -d 0 -t udp \
+ "sp=1024,dp=$((dp))-$((dp + 10000))"
+ sleep 2
+
+ local t1_rp12=$(link_stats_tx_packets_get $rp12)
+ local t1_rp13=$(link_stats_tx_packets_get $rp13)
+ local t1_nh1=$($stats_get $group_id $nh1_id)
+ local t1_nh2=$($stats_get $group_id $nh2_id)
+
+ local d_rp12=$((t1_rp12 - t0_rp12))
+ local d_rp13=$((t1_rp13 - t0_rp13))
+ local d_nh1=$((t1_nh1 - t0_nh1))
+ local d_nh2=$((t1_nh2 - t0_nh2))
+
+ dd=$(absval $((d_rp12 - d_nh1)))
+ ((dd < 10))
+ check_err $? "Discrepancy between link and $stats_get: d_rp12=$d_rp12 d_nh1=$d_nh1"
+
+ dd=$(absval $((d_rp13 - d_nh2)))
+ ((dd < 10))
+ check_err $? "Discrepancy between link and $stats_get: d_rp13=$d_rp13 d_nh2=$d_nh2"
+ done
+
+ log_test "NH stats test $what"
+}
+
+nh_stats_test_dispatch_swhw()
+{
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local mz="$@"
+
+ local used
+
+ nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \
+ nh_stats_get "${mz[@]}"
+
+ used=$(ip -s -j -d nexthop show id $group_id |
+ jq '.[].hw_stats.used')
+ kind=$(ip -j -d link show dev $rp11 |
+ jq -r '.[].linkinfo.info_kind')
+ if [[ $used == true ]]; then
+ nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \
+ nh_stats_get_hw "${mz[@]}"
+ elif [[ $kind == veth ]]; then
+ log_test_skip "HW stats not offloaded on veth topology"
+ fi
+}
+
+nh_stats_test_dispatch()
+{
+ local nhgtype=$1; shift
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local mz="$@"
+
+ local enabled
+ local kind
+
+ if ! ip nexthop help 2>&1 | grep -q hw_stats; then
+ log_test_skip "NH stats test: ip doesn't support HW stats"
+ return
+ fi
+
+ ip nexthop replace id $group_id group $nh1_id/$nh2_id \
+ hw_stats on type $nhgtype
+ enabled=$(ip -s -j -d nexthop show id $group_id |
+ jq '.[].hw_stats.enabled')
+ if [[ $enabled == true ]]; then
+ nh_stats_test_dispatch_swhw "$what" "$nh1_id" "$nh2_id" \
+ "$group_id" "${mz[@]}"
+ elif [[ $enabled == false ]]; then
+ check_err 1 "HW stats still disabled after enabling"
+ log_test "NH stats test"
+ else
+ log_test_skip "NH stats test: ip doesn't report hw_stats info"
+ fi
+
+ ip nexthop replace id $group_id group $nh1_id/$nh2_id \
+ hw_stats off type $nhgtype
+}
+
+__nh_stats_test_v4()
+{
+ local nhgtype=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ nh_stats_test_dispatch $nhgtype "IPv4" 101 102 103 \
+ $MZ $h1 -A 192.0.2.2 -B 198.51.100.2
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+__nh_stats_test_v6()
+{
+ local nhgtype=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+ nh_stats_test_dispatch $nhgtype "IPv6" 104 105 106 \
+ $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index cb08ffe2356a..4b483d24ad00 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -5,9 +5,12 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
multipath_test
+ nh_stats_test_v4
+ nh_stats_test_v6
"
NUM_NETIFS=8
source lib.sh
+source router_mpath_nh_lib.sh
h1_create()
{
@@ -205,7 +208,7 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -235,7 +238,7 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -333,6 +336,16 @@ multipath_test()
ip nexthop replace id 106 group 104,1/105,1 type resilient
}
+nh_stats_test_v4()
+{
+ __nh_stats_test_v4 resilient
+}
+
+nh_stats_test_v6()
+{
+ __nh_stats_test_v6 resilient
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 464821c587a5..e2be354167a1 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -179,7 +179,7 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -195,7 +195,7 @@ multipath4_test()
sysctl_restore net.ipv4.fib_multipath_hash_policy
}
-multipath6_l4_test()
+multipath6_test()
{
local desc="$1"
local weight_rp12=$2
@@ -216,7 +216,7 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -232,38 +232,6 @@ multipath6_l4_test()
sysctl_restore net.ipv6.fib_multipath_hash_policy
}
-multipath6_test()
-{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
- local t0_rp12 t0_rp13 t1_rp12 t1_rp13
- local packets_rp12 packets_rp13
-
- ip route replace 2001:db8:2::/64 vrf vrf-r1 \
- nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
- nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
-
- t0_rp12=$(link_stats_tx_packets_get $rp12)
- t0_rp13=$(link_stats_tx_packets_get $rp13)
-
- # Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
- done
-
- t1_rp12=$(link_stats_tx_packets_get $rp12)
- t1_rp13=$(link_stats_tx_packets_get $rp13)
-
- let "packets_rp12 = $t1_rp12 - $t0_rp12"
- let "packets_rp13 = $t1_rp13 - $t0_rp13"
- multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
-
- ip route replace 2001:db8:2::/64 vrf vrf-r1 \
- nexthop via fe80:2::22 dev $rp12 \
- nexthop via fe80:3::23 dev $rp13
-}
-
multipath_test()
{
log_info "Running IPv4 multipath tests"
@@ -275,11 +243,6 @@ multipath_test()
multipath6_test "ECMP" 1 1
multipath6_test "Weighted MP 2:1" 2 1
multipath6_test "Weighted MP 11:45" 11 45
-
- log_info "Running IPv6 L4 hash multipath tests"
- multipath6_l4_test "ECMP" 1 1
- multipath6_l4_test "Weighted MP 2:1" 2 1
- multipath6_l4_test "Weighted MP 11:45" 11 45
}
setup_prepare()
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index b0f5e55d2d0b..589629636502 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -235,9 +235,6 @@ mirred_egress_to_ingress_tcp_test()
check_err $? "didn't mirred redirect ICMP"
tc_check_packets "dev $h1 ingress" 102 10
check_err $? "didn't drop mirred ICMP"
- local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
- test ${overlimits} = 10
- check_err $? "wrong overlimits, expected 10 got ${overlimits}"
tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
index 20a7cb7222b8..c2420bb72c12 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
@@ -209,14 +209,17 @@ test_l2_miss_multicast()
# both registered and unregistered multicast traffic.
bridge link set dev $swp2 mcast_router 2
+ # Set the Max Response Delay to 100 centiseconds (1 second) so that the
+ # bridge will start forwarding according to its MDB soon after a
+ # multicast querier is enabled.
+ ip link set dev br1 type bridge mcast_query_response_interval 100
+
# Forwarding according to MDB entries only takes place when the bridge
# detects that there is a valid querier in the network. Set the bridge
# as the querier and assign it a valid IPv6 link-local address to be
# used as the source address for MLD queries.
ip link set dev br1 type bridge mcast_querier 1
ip -6 address add fe80::1/64 nodad dev br1
- # Wait the default Query Response Interval (10 seconds) for the bridge
- # to determine that there are no other queriers in the network.
sleep 10
test_l2_miss_multicast_ipv4
@@ -224,6 +227,7 @@ test_l2_miss_multicast()
ip -6 address del fe80::1/64 dev br1
ip link set dev br1 type bridge mcast_querier 0
+ ip link set dev br1 type bridge mcast_query_response_interval 1000
bridge link set dev $swp2 mcast_router 1
}
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
index 0a51eef21b9e..5103f64a71d6 100755
--- a/tools/testing/selftests/net/forwarding/tc_police.sh
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -140,7 +140,7 @@ police_common_test()
sleep 10
local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
- local er=$((80 * 1000 * 1000))
+ local er=$((10 * 1000 * 1000))
local nr=$(rate $t0 $t1 10)
local nr_pct=$((100 * (nr - er) / er))
((-10 <= nr_pct && nr_pct <= 10))
@@ -157,7 +157,7 @@ police_rx_test()
# Rule to police traffic destined to $h2 on ingress of $rp1
tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
- action police rate 80mbit burst 16k conform-exceed drop/ok
+ action police rate 10mbit burst 16k conform-exceed drop/ok
police_common_test "police on rx"
@@ -169,7 +169,7 @@ police_tx_test()
# Rule to police traffic destined to $h2 on egress of $rp2
tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
- action police rate 80mbit burst 16k conform-exceed drop/ok
+ action police rate 10mbit burst 16k conform-exceed drop/ok
police_common_test "police on tx"
@@ -190,7 +190,7 @@ police_shared_common_test()
sleep 10
local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
- local er=$((80 * 1000 * 1000))
+ local er=$((10 * 1000 * 1000))
local nr=$(rate $t0 $t1 10)
local nr_pct=$((100 * (nr - er) / er))
((-10 <= nr_pct && nr_pct <= 10))
@@ -211,7 +211,7 @@ police_shared_test()
# Rule to police traffic destined to $h2 on ingress of $rp1
tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
- action police rate 80mbit burst 16k conform-exceed drop/ok \
+ action police rate 10mbit burst 16k conform-exceed drop/ok \
index 10
# Rule to police a different flow destined to $h2 on egress of $rp2
@@ -250,7 +250,7 @@ police_mirror_common_test()
# Rule to police traffic destined to $h2 and mirror to $h3
tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \
dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
- action police rate 80mbit burst 16k conform-exceed drop/pipe \
+ action police rate 10mbit burst 16k conform-exceed drop/pipe \
action mirred egress mirror dev $rp3
mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
@@ -260,7 +260,7 @@ police_mirror_common_test()
sleep 10
local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
- local er=$((80 * 1000 * 1000))
+ local er=$((10 * 1000 * 1000))
local nr=$(rate $t0 $t1 10)
local nr_pct=$((100 * (nr - er) / er))
((-10 <= nr_pct && nr_pct <= 10))
@@ -270,7 +270,7 @@ police_mirror_common_test()
sleep 10
local t1=$(tc_rule_stats_get $h3 1 ingress .bytes)
- local er=$((80 * 1000 * 1000))
+ local er=$((10 * 1000 * 1000))
local nr=$(rate $t0 $t1 10)
local nr_pct=$((100 * (nr - er) / er))
((-10 <= nr_pct && nr_pct <= 10))
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index eb307ca37bfa..6f0a2e452ba1 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -495,7 +495,7 @@ vxlan_ping_test()
local delta=$((t1 - t0))
# Tolerate a couple stray extra packets.
- ((expect <= delta && delta <= expect + 2))
+ ((expect <= delta && delta <= expect + 5))
check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
}
@@ -532,7 +532,7 @@ __test_ecn_encap()
RET=0
tc filter add dev v1 egress pref 77 prot ip \
- flower ip_tos $tos action pass
+ flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass
sleep 1
vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10
tc filter del dev v1 egress pref 77 prot ip
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
index ac97f07e5ce8..a0bb4524e1e9 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
@@ -616,7 +616,7 @@ vxlan_ping_test()
local delta=$((t1 - t0))
# Tolerate a couple stray extra packets.
- ((expect <= delta && delta <= expect + 2))
+ ((expect <= delta && delta <= expect + 5))
check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
}
@@ -653,7 +653,7 @@ __test_ecn_encap()
RET=0
tc filter add dev v1 egress pref 77 protocol ipv6 \
- flower ip_tos $tos action pass
+ flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass
sleep 1
vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10
tc filter del dev v1 egress pref 77 protocol ipv6
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
index a596bbf3ed6a..fb9a34cb50c6 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -750,7 +750,7 @@ __test_learning()
expects[0]=0; expects[$idx1]=10; expects[$idx2]=0
vxlan_flood_test $mac $dst $vid "${expects[@]}"
- sleep 20
+ sleep 60
bridge fdb show brport $vx | grep $mac | grep -q self
check_fail $?
@@ -796,11 +796,11 @@ test_learning()
local dst=192.0.2.100
local vid=10
- # Enable learning on the VxLAN devices and set ageing time to 10 seconds
- ip link set dev br1 type bridge ageing_time 1000
- ip link set dev vx10 type vxlan ageing 10
+ # Enable learning on the VxLAN devices and set ageing time to 30 seconds
+ ip link set dev br1 type bridge ageing_time 3000
+ ip link set dev vx10 type vxlan ageing 30
ip link set dev vx10 type vxlan learning
- ip link set dev vx20 type vxlan ageing 10
+ ip link set dev vx20 type vxlan ageing 30
ip link set dev vx20 type vxlan learning
reapply_config
diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh
index 24b77bdf41ff..977070ed42b3 100755
--- a/tools/testing/selftests/net/fq_band_pktlimit.sh
+++ b/tools/testing/selftests/net/fq_band_pktlimit.sh
@@ -8,7 +8,7 @@
# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped
# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped
#
-# Send packets with a 100ms delay to ensure that previously sent
+# Send packets with a delay to ensure that previously sent
# packets are still queued when later ones are sent.
# Use SO_TXTIME for this.
@@ -29,19 +29,21 @@ ip -6 addr add fdaa::1/128 dev dummy0
ip -6 route add fdaa::/64 dev dummy0
tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10
-./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+DELAY=400000
+
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000
OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
-./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000
OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
-./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000
OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
# Initial stats will report zero sent, as all packets are still
-# queued in FQ. Sleep for the delay period (100ms) and see that
+# queued in FQ. Sleep for at least the delay period and see that
# twenty are now sent.
-sleep 0.1
+sleep 0.6
OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
# Log the output after the test
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
index 19352f106c1d..02c21ff4ca81 100755
--- a/tools/testing/selftests/net/gro.sh
+++ b/tools/testing/selftests/net/gro.sh
@@ -31,6 +31,11 @@ run_test() {
1>>log.txt
wait "${server_pid}"
exit_code=$?
+ if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \
+ ${exit_code} -ne 0 ]]; then
+ echo "Ignoring errors due to slow environment" 1>&2
+ exit_code=0
+ fi
if [[ "${exit_code}" -eq 0 ]]; then
break;
fi
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
index fe59ca3e5596..12491850ae98 100755
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -367,14 +367,12 @@ run_test()
local desc=$2
local node_src=$3
local node_dst=$4
- local ip6_src=$5
- local ip6_dst=$6
- local if_dst=$7
- local trace_type=$8
- local ioam_ns=$9
-
- ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \
- $trace_type $ioam_ns &
+ local ip6_dst=$5
+ local trace_type=$6
+ local ioam_ns=$7
+ local type=$8
+
+ ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type &
local spid=$!
sleep 0.1
@@ -489,7 +487,7 @@ out_undef_ns()
trace prealloc type 0x800000 ns 0 size 4 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::2 db01::1 veth0 0x800000 0
+ db01::1 0x800000 0 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
}
@@ -509,7 +507,7 @@ out_no_room()
trace prealloc type 0xc00000 ns 123 size 4 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::2 db01::1 veth0 0xc00000 123
+ db01::1 0xc00000 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
}
@@ -543,14 +541,14 @@ out_bits()
if [ $cmd_res != 0 ]
then
npassed=$((npassed+1))
- log_test_passed "$descr"
+ log_test_passed "$descr ($1 mode)"
else
nfailed=$((nfailed+1))
- log_test_failed "$descr"
+ log_test_failed "$descr ($1 mode)"
fi
else
run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \
- $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1
fi
done
@@ -574,7 +572,7 @@ out_full_supp_trace()
trace prealloc type 0xfff002 ns 123 size 100 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::2 db01::1 veth0 0xfff002 123
+ db01::1 0xfff002 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
}
@@ -604,7 +602,7 @@ in_undef_ns()
trace prealloc type 0x800000 ns 0 size 4 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::2 db01::1 veth0 0x800000 0
+ db01::1 0x800000 0 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
}
@@ -624,7 +622,7 @@ in_no_room()
trace prealloc type 0xc00000 ns 123 size 4 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::2 db01::1 veth0 0xc00000 123
+ db01::1 0xc00000 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
}
@@ -651,7 +649,7 @@ in_bits()
dev veth0
run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \
- $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1
done
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
@@ -679,7 +677,7 @@ in_oflag()
trace prealloc type 0xc00000 ns 123 size 4 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::2 db01::1 veth0 0xc00000 123
+ db01::1 0xc00000 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
@@ -703,7 +701,7 @@ in_full_supp_trace()
trace prealloc type 0xfff002 ns 123 size 80 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::2 db01::1 veth0 0xfff002 123
+ db01::1 0xfff002 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
}
@@ -731,7 +729,7 @@ fwd_full_supp_trace()
trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0
run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \
- db01::2 db02::2 veth0 0xfff002 123
+ db02::2 0xfff002 123 $1
[ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down
}
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
index d9d1d4190126..895e5bb5044b 100644
--- a/tools/testing/selftests/net/ioam6_parser.c
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -8,7 +8,6 @@
#include <errno.h>
#include <limits.h>
#include <linux/const.h>
-#include <linux/if_ether.h>
#include <linux/ioam6.h>
#include <linux/ipv6.h>
#include <stdlib.h>
@@ -512,14 +511,6 @@ static int str2id(const char *tname)
return -1;
}
-static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2)
-{
- return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
- (a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
- (a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
- (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
-}
-
static int get_u32(__u32 *val, const char *arg, int base)
{
unsigned long res;
@@ -603,70 +594,80 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
int main(int argc, char **argv)
{
- int fd, size, hoplen, tid, ret = 1;
- struct in6_addr src, dst;
+ int fd, size, hoplen, tid, ret = 1, on = 1;
struct ioam6_hdr *opt;
- struct ipv6hdr *ip6h;
- __u8 buffer[400], *p;
- __u16 ioam_ns;
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ struct iovec iov;
+ __u8 buffer[512];
__u32 tr_type;
+ __u16 ioam_ns;
+ __u8 *ptr;
- if (argc != 7)
+ if (argc != 5)
goto out;
- tid = str2id(argv[2]);
+ tid = str2id(argv[1]);
if (tid < 0 || !func[tid])
goto out;
- if (inet_pton(AF_INET6, argv[3], &src) != 1 ||
- inet_pton(AF_INET6, argv[4], &dst) != 1)
+ if (get_u32(&tr_type, argv[2], 16) ||
+ get_u16(&ioam_ns, argv[3], 0))
goto out;
- if (get_u32(&tr_type, argv[5], 16) ||
- get_u16(&ioam_ns, argv[6], 0))
+ fd = socket(PF_INET6, SOCK_RAW,
+ !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6);
+ if (fd < 0)
goto out;
- fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6));
- if (!fd)
- goto out;
+ setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on));
- if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
- argv[1], strlen(argv[1])))
+ iov.iov_len = 1;
+ iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer)));
+ if (!iov.iov_base)
goto close;
-
recv:
- size = recv(fd, buffer, sizeof(buffer), 0);
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = buffer;
+ msg.msg_controllen = CMSG_SPACE(sizeof(buffer));
+
+ size = recvmsg(fd, &msg, 0);
if (size <= 0)
goto close;
- ip6h = (struct ipv6hdr *)buffer;
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level != IPPROTO_IPV6 ||
+ cmsg->cmsg_type != IPV6_HOPOPTS ||
+ cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr))
+ continue;
- if (!ipv6_addr_equal(&ip6h->saddr, &src) ||
- !ipv6_addr_equal(&ip6h->daddr, &dst))
- goto recv;
+ ptr = (__u8 *)CMSG_DATA(cmsg);
- if (ip6h->nexthdr != IPPROTO_HOPOPTS)
- goto close;
+ hoplen = (ptr[1] + 1) << 3;
+ ptr += sizeof(struct ipv6_hopopt_hdr);
- p = buffer + sizeof(*ip6h);
- hoplen = (p[1] + 1) << 3;
- p += sizeof(struct ipv6_hopopt_hdr);
+ while (hoplen > 0) {
+ opt = (struct ioam6_hdr *)ptr;
- while (hoplen > 0) {
- opt = (struct ioam6_hdr *)p;
+ if (opt->opt_type == IPV6_TLV_IOAM &&
+ opt->type == IOAM6_TYPE_PREALLOC) {
+ ptr += sizeof(*opt);
+ ret = func[tid](tid,
+ (struct ioam6_trace_hdr *)ptr,
+ tr_type, ioam_ns);
+ goto close;
+ }
- if (opt->opt_type == IPV6_TLV_IOAM &&
- opt->type == IOAM6_TYPE_PREALLOC) {
- p += sizeof(*opt);
- ret = func[tid](tid, (struct ioam6_trace_hdr *)p,
- tr_type, ioam_ns);
- break;
+ ptr += opt->opt_len + 2;
+ hoplen -= opt->opt_len + 2;
}
-
- p += opt->opt_len + 2;
- hoplen -= opt->opt_len + 2;
}
+
+ goto recv;
close:
+ free(iov.iov_base);
close(fd);
out:
return ret;
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
index 0f217a1cc837..193b82745fd8 100644
--- a/tools/testing/selftests/net/ip_local_port_range.c
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -16,6 +16,10 @@
#define IP_LOCAL_PORT_RANGE 51
#endif
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
static __u32 pack_port_range(__u16 lo, __u16 hi)
{
return (hi << 16) | (lo << 0);
@@ -361,9 +365,6 @@ TEST_F(ip_local_port_range, late_bind)
__u32 range;
__u16 port;
- if (variant->so_protocol == IPPROTO_SCTP)
- SKIP(return, "SCTP doesn't support IP_BIND_ADDRESS_NO_PORT");
-
fd = socket(variant->so_domain, variant->so_type, 0);
ASSERT_GE(fd, 0) TH_LOG("socket failed");
@@ -410,6 +411,9 @@ TEST_F(ip_local_port_range, late_bind)
ASSERT_TRUE(!err) TH_LOG("close failed");
}
+XFAIL_ADD(ip_local_port_range, ip4_stcp, late_bind);
+XFAIL_ADD(ip_local_port_range, ip6_stcp, late_bind);
+
TEST_F(ip_local_port_range, get_port_range)
{
__u16 lo, hi;
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index dca549443801..f9fe182dfbd4 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -4,6 +4,9 @@
##############################################################################
# Defines
+WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
+BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
# namespace list created by setup_ns
@@ -48,7 +51,7 @@ cleanup_ns()
for ns in "$@"; do
ip netns delete "${ns}" &> /dev/null
- if ! busywait 2 ip netns list \| grep -vq "^$ns$" &> /dev/null; then
+ if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then
echo "Warn: Failed to remove namespace $ns"
ret=1
fi
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index e317c2e44dae..4f80014cae49 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -22,8 +22,11 @@ CONFIG_NFT_TPROXY=m
CONFIG_NFT_SOCKET=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IP6_NF_FILTER=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_CLS_ACT=y
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 04fcb8a077c9..bc97ab33a00e 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -1,14 +1,15 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns="ns1-$rndh"
-ksft_skip=4
-test_cnt=1
-timeout_poll=100
+ns=""
+timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
ret=0
@@ -20,31 +21,23 @@ flush_pids()
ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null
- for _ in $(seq 10); do
+ for _ in $(seq $((timeout_poll * 10))); do
[ -z "$(ip netns pids "${ns}")" ] && break
sleep 0.1
done
}
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
- ip netns del $ns
+ mptcp_lib_ns_exit "${ns}"
}
mptcp_lib_check_mptcp
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-ss -h | grep -q MPTCP
-if [ $? -ne 0 ];then
- echo "SKIP: ss tool does not support MPTCP"
- exit $ksft_skip
-fi
+mptcp_lib_check_tools ip ss
get_msk_inuse()
{
@@ -61,21 +54,20 @@ __chk_nr()
nr=$(eval $command)
- printf "%-50s" "$msg"
- if [ $nr != $expected ]; then
- if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then
- echo "[ skip ] Feature probably not supported"
+ mptcp_lib_print_title "$msg"
+ if [ "$nr" != "$expected" ]; then
+ if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then
+ mptcp_lib_pr_skip "Feature probably not supported"
mptcp_lib_result_skip "${msg}"
else
- echo "[ fail ] expected $expected found $nr"
+ mptcp_lib_pr_fail "expected $expected found $nr"
mptcp_lib_result_fail "${msg}"
- ret=$test_cnt
+ ret=${KSFT_FAIL}
fi
else
- echo "[ ok ]"
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "${msg}"
fi
- test_cnt=$((test_cnt+1))
}
__chk_msk_nr()
@@ -91,6 +83,15 @@ chk_msk_nr()
__chk_msk_nr "grep -c token:" "$@"
}
+chk_listener_nr()
+{
+ local expected=$1
+ local msg="$2"
+
+ __chk_nr "ss -nlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0
+ __chk_nr "ss -nlHtON $ns | wc -l" "$expected" "$msg - subflows"
+}
+
wait_msk_nr()
{
local condition="grep -c token:"
@@ -111,20 +112,19 @@ wait_msk_nr()
sleep 1
done
- printf "%-50s" "$msg"
+ mptcp_lib_print_title "$msg"
if [ $i -ge $timeout ]; then
- echo "[ fail ] timeout while expecting $expected max $max last $nr"
+ mptcp_lib_pr_fail "timeout while expecting $expected max $max last $nr"
mptcp_lib_result_fail "${msg} # timeout"
- ret=$test_cnt
+ ret=${KSFT_FAIL}
elif [ $nr != $expected ]; then
- echo "[ fail ] expected $expected found $nr"
+ mptcp_lib_pr_fail "expected $expected found $nr"
mptcp_lib_result_fail "${msg} # unexpected result"
- ret=$test_cnt
+ ret=${KSFT_FAIL}
else
- echo "[ ok ]"
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "${msg}"
fi
- test_cnt=$((test_cnt+1))
}
chk_msk_fallback_nr()
@@ -166,29 +166,38 @@ chk_msk_listen()
chk_msk_inuse()
{
local expected=$1
- local msg="$2"
+ local msg="....chk ${2:-${expected}} msk in use"
local listen_nr
+ if [ "${expected}" -eq 0 ]; then
+ msg+=" after flush"
+ fi
+
listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN)
expected=$((expected + listen_nr))
for _ in $(seq 10); do
- if [ $(get_msk_inuse) -eq $expected ];then
+ if [ "$(get_msk_inuse)" -eq $expected ]; then
break
fi
sleep 0.1
done
- __chk_nr get_msk_inuse $expected "$msg" 0
+ __chk_nr get_msk_inuse $expected "${msg}" 0
}
# $1: cestab nr
chk_msk_cestab()
{
- local cestab=$1
+ local expected=$1
+ local msg="....chk ${2:-${expected}} cestab"
+
+ if [ "${expected}" -eq 0 ]; then
+ msg+=" after flush"
+ fi
__chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \
- "${cestab}" "....chk ${cestab} cestab" ""
+ "${expected}" "${msg}" ""
}
wait_connected()
@@ -206,8 +215,7 @@ wait_connected()
}
trap cleanup EXIT
-ip netns add $ns
-ip -n $ns link set dev lo up
+mptcp_lib_ns_init ns
echo "a" | \
timeout ${timeout_test} \
@@ -227,12 +235,12 @@ wait_connected $ns 10000
chk_msk_nr 2 "after MPC handshake "
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
-chk_msk_inuse 2 "....chk 2 msk in use"
+chk_msk_inuse 2
chk_msk_cestab 2
flush_pids
-chk_msk_inuse 0 "....chk 0 msk in use after flush"
-chk_msk_cestab 0
+chk_msk_inuse 0 "2->0"
+chk_msk_cestab 0 "2->0"
echo "a" | \
timeout ${timeout_test} \
@@ -247,15 +255,15 @@ echo "b" | \
127.0.0.1 >/dev/null &
wait_connected $ns 10001
chk_msk_fallback_nr 1 "check fallback"
-chk_msk_inuse 1 "....chk 1 msk in use"
+chk_msk_inuse 1
chk_msk_cestab 1
flush_pids
-chk_msk_inuse 0 "....chk 0 msk in use after flush"
-chk_msk_cestab 0
+chk_msk_inuse 0 "1->0"
+chk_msk_cestab 0 "1->0"
NR_CLIENTS=100
-for I in `seq 1 $NR_CLIENTS`; do
+for I in $(seq 1 $NR_CLIENTS); do
echo "a" | \
timeout ${timeout_test} \
ip netns exec $ns \
@@ -264,7 +272,7 @@ for I in `seq 1 $NR_CLIENTS`; do
done
mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001))
-for I in `seq 1 $NR_CLIENTS`; do
+for I in $(seq 1 $NR_CLIENTS); do
echo "b" | \
timeout ${timeout_test} \
ip netns exec $ns \
@@ -273,12 +281,28 @@ for I in `seq 1 $NR_CLIENTS`; do
done
wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
-chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use"
-chk_msk_cestab $((NR_CLIENTS*2))
+chk_msk_inuse $((NR_CLIENTS*2)) "many"
+chk_msk_cestab $((NR_CLIENTS*2)) "many"
flush_pids
-chk_msk_inuse 0 "....chk 0 msk in use after flush"
-chk_msk_cestab 0
+chk_msk_inuse 0 "many->0"
+chk_msk_cestab 0 "many->0"
+
+chk_listener_nr 0 "no listener sockets"
+NR_SERVERS=100
+for I in $(seq 1 $NR_SERVERS); do
+ ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \
+ -t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 &
+done
+mptcp_lib_wait_local_port_listen $ns $((NR_SERVERS + 20001))
+
+chk_listener_nr $NR_SERVERS "many listener sockets"
+
+# graceful termination
+for I in $(seq 1 $NR_SERVERS); do
+ echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 &
+done
+flush_pids
mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 7898d62fce0b..4c4248554826 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -1,6 +1,11 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"
time_start=$(date +%s)
@@ -13,7 +18,6 @@ sout=""
cin_disconnect=""
cin=""
cout=""
-ksft_skip=4
capture=false
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
@@ -29,6 +33,7 @@ do_tcp=0
checksum=false
filesize=0
connect_per_transfer=1
+port=$((10000 - 1))
if [ $tc_loss -eq 100 ];then
tc_loss=1%
@@ -60,14 +65,14 @@ while getopts "$optstring" option;do
case "$option" in
"h")
usage $0
- exit 0
+ exit ${KSFT_PASS}
;;
"d")
if [ $OPTARG -ge 0 ];then
tc_delay="$OPTARG"
else
echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2
- exit 1
+ exit ${KSFT_FAIL}
fi
;;
"e")
@@ -91,7 +96,7 @@ while getopts "$optstring" option;do
sndbuf="$OPTARG"
else
echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
- exit 1
+ exit ${KSFT_FAIL}
fi
;;
"R")
@@ -99,7 +104,7 @@ while getopts "$optstring" option;do
rcvbuf="$OPTARG"
else
echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
- exit 1
+ exit ${KSFT_FAIL}
fi
;;
"m")
@@ -116,21 +121,20 @@ while getopts "$optstring" option;do
;;
"?")
usage $0
- exit 1
+ exit ${KSFT_FAIL}
;;
esac
done
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns3="ns3-$rndh"
-ns4="ns4-$rndh"
+ns1=""
+ns2=""
+ns3=""
+ns4=""
-TEST_COUNT=0
TEST_GROUP=""
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
rm -f "$cin_disconnect" "$cout_disconnect"
@@ -138,21 +142,12 @@ cleanup()
rm -f "$sin" "$sout"
rm -f "$capout"
- local netns
- for netns in "$ns1" "$ns2" "$ns3" "$ns4";do
- ip netns del $netns
- rm -f /tmp/$netns.{nstat,out}
- done
+ mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" "${ns4}"
}
mptcp_lib_check_mptcp
mptcp_lib_check_kallsyms
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+mptcp_lib_check_tools ip
sin=$(mktemp)
sout=$(mktemp)
@@ -163,10 +158,7 @@ cin_disconnect="$cin".disconnect
cout_disconnect="$cout".disconnect
trap cleanup EXIT
-for i in "$ns1" "$ns2" "$ns3" "$ns4";do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
-done
+mptcp_lib_ns_init ns1 ns2 ns3 ns4
# "$ns1" ns2 ns3 ns4
# ns1eth2 ns2eth1 ns2eth3 ns3eth2 ns3eth4 ns4eth3
@@ -225,8 +217,9 @@ set_ethtool_flags() {
local dev="$2"
local flags="$3"
- ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
- [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
+ if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then
+ mptcp_lib_pr_info "set $ns dev $dev: ethtool -K $flags"
+ fi
}
set_random_ethtool_flags() {
@@ -254,16 +247,23 @@ else
set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
fi
+print_larger_title() {
+ # here we don't have the time, a bit longer for the alignment
+ MPTCP_LIB_TEST_FORMAT="%02u %-69s" \
+ mptcp_lib_print_title "${@}"
+}
+
check_mptcp_disabled()
{
- local disabled_ns="ns_disabled-$rndh"
- ip netns add ${disabled_ns} || exit $ksft_skip
+ local disabled_ns
+ mptcp_lib_ns_init disabled_ns
+ print_larger_title "New MPTCP socket can be blocked via sysctl"
# net.mptcp.enabled should be enabled by default
if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then
- echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]"
+ mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default"
mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default"
- ret=1
+ ret=${KSFT_FAIL}
return 1
fi
ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
@@ -271,16 +271,16 @@ check_mptcp_disabled()
local err=0
LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
grep -q "^socket: Protocol not available$" && err=1
- ip netns delete ${disabled_ns}
+ mptcp_lib_ns_exit "${disabled_ns}"
if [ ${err} -eq 0 ]; then
- echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]"
+ mptcp_lib_pr_fail "New MPTCP socket cannot be blocked via sysctl"
mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl"
- ret=1
+ ret=${KSFT_FAIL}
return 1
fi
- echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]"
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl"
return 0
}
@@ -301,8 +301,8 @@ do_ping()
ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1
if [ $rc -ne 0 ] ; then
- echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
- ret=1
+ mptcp_lib_pr_fail "$listener_ns -> $connect_addr connectivity"
+ ret=${KSFT_FAIL}
return 1
fi
@@ -320,24 +320,22 @@ do_transfer()
local local_addr="$6"
local extra_args="$7"
- local port
- port=$((10000+$TEST_COUNT))
- TEST_COUNT=$((TEST_COUNT+1))
+ port=$((port + 1))
if [ "$rcvbuf" -gt 0 ]; then
- extra_args="$extra_args -R $rcvbuf"
+ extra_args+=" -R $rcvbuf"
fi
if [ "$sndbuf" -gt 0 ]; then
- extra_args="$extra_args -S $sndbuf"
+ extra_args+=" -S $sndbuf"
fi
if [ -n "$testmode" ]; then
- extra_args="$extra_args -m $testmode"
+ extra_args+=" -m $testmode"
fi
if [ -n "$extra_args" ] && $options_log; then
- echo "INFO: extra options: $extra_args"
+ mptcp_lib_pr_info "extra options: $extra_args"
fi
options_log=false
@@ -349,10 +347,11 @@ do_transfer()
addr_port=$(printf "%s:%d" ${connect_addr} ${port})
local result_msg
result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
- printf "%s\t" "${result_msg}"
+ mptcp_lib_print_title "${result_msg}"
if $capture; then
local capuser
+ local rndh="${connector_ns:4}"
if [ -z $SUDO_USER ] ; then
capuser=""
else
@@ -378,12 +377,18 @@ do_transfer()
nstat -n
fi
- local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ local stat_synrx_last_l
+ local stat_ackrx_last_l
+ local stat_cookietx_last
+ local stat_cookierx_last
+ local stat_csum_err_s
+ local stat_csum_err_c
+ stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
@@ -427,7 +432,7 @@ do_transfer()
result_msg+=" # time=${duration}ms"
printf "(duration %05sms) " "${duration}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
- echo "[ FAIL ] client exit code $retc, server $rets" 1>&2
+ mptcp_lib_pr_fail "client exit code $retc, server $rets"
echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
cat /tmp/${listener_ns}.out
@@ -446,11 +451,17 @@ do_transfer()
mptcp_lib_check_transfer $cin $sout "file received by server"
rets=$?
- local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ local extra=""
+ local stat_synrx_now_l
+ local stat_ackrx_now_l
+ local stat_cookietx_now
+ local stat_cookierx_now
+ local stat_ooo_now
+ stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
@@ -459,75 +470,79 @@ do_transfer()
cookies=${cookies##*=}
if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
- expect_synrx=$((stat_synrx_last_l+$connect_per_transfer))
- expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer))
+ expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
+ expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
fi
if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
- printf "[ FAIL ] lower MPC SYN rx (%d) than expected (%d)\n" \
- "${stat_synrx_now_l}" "${expect_synrx}" 1>&2
+ mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \
+ "than expected (${expect_synrx})"
retc=1
fi
- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
+ if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
if [ ${stat_ooo_now} -eq 0 ]; then
- printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
- "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
+ mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \
+ "than expected (${expect_ackrx})"
rets=1
else
- printf "[ Note ] fallback due to TCP OoO"
+ extra+=" [ Note ] fallback due to TCP OoO"
fi
fi
if $checksum; then
- local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ local csum_err_s
+ local csum_err_c
+ csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
if [ $csum_err_s_nr -gt 0 ]; then
- printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]"
+ mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]"
rets=1
fi
local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
if [ $csum_err_c_nr -gt 0 ]; then
- printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]"
+ mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]"
retc=1
fi
fi
- if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
- printf "[ OK ]"
- mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
- else
- mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
- fi
-
if [ $cookies -eq 2 ];then
if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
- printf " WARN: CookieSent: did not advance"
+ extra+=" WARN: CookieSent: did not advance"
fi
if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
- printf " WARN: CookieRecv: did not advance"
+ extra+=" WARN: CookieRecv: did not advance"
fi
else
if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
- printf " WARN: CookieSent: changed"
+ extra+=" WARN: CookieSent: changed"
fi
if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
- printf " WARN: CookieRecv: changed"
+ extra+=" WARN: CookieRecv: changed"
fi
fi
if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
- printf " WARN: SYNRX: expect %d, got %d (probably retransmissions)" \
- "${expect_synrx}" "${stat_synrx_now_l}"
+ extra+=" WARN: SYNRX: expect ${expect_synrx},"
+ extra+=" got ${stat_synrx_now_l} (probably retransmissions)"
fi
if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
- printf " WARN: ACKRX: expect %d, got %d (probably retransmissions)" \
- "${expect_ackrx}" "${stat_ackrx_now_l}"
+ extra+=" WARN: ACKRX: expect ${expect_ackrx},"
+ extra+=" got ${stat_ackrx_now_l} (probably retransmissions)"
+ fi
+
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
+ mptcp_lib_pr_ok "${extra:1}"
+ mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
+ else
+ if [ -n "${extra}" ]; then
+ mptcp_lib_print_warn "${extra:1}"
+ fi
+ mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
fi
- echo
cat "$capout"
[ $retc -eq 0 ] && [ $rets -eq 0 ]
}
@@ -653,12 +668,12 @@ run_test_transparent()
# following function has been exported (T). Not great but better than
# checking for a specific kernel version.
if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then
- echo "INFO: ${msg} not supported by the kernel: SKIP"
+ mptcp_lib_pr_skip "${msg} not supported by the kernel"
mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
-ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
+ if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
flush ruleset
table inet mangle {
chain divert {
@@ -669,8 +684,8 @@ table inet mangle {
}
}
EOF
- if [ $? -ne 0 ]; then
- echo "SKIP: $msg, could not load nft ruleset"
+ then
+ mptcp_lib_pr_skip "$msg, could not load nft ruleset"
mptcp_lib_fail_if_expected_feature "nft rules"
mptcp_lib_result_skip "${TEST_GROUP}"
return
@@ -684,28 +699,26 @@ EOF
local_addr="0.0.0.0"
fi
- ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100
- if [ $? -ne 0 ]; then
+ if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then
ip netns exec "$listener_ns" nft flush ruleset
- echo "SKIP: $msg, ip $r6flag rule failed"
+ mptcp_lib_pr_skip "$msg, ip $r6flag rule failed"
mptcp_lib_fail_if_expected_feature "ip rule"
mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
- ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100
- if [ $? -ne 0 ]; then
+ if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then
ip netns exec "$listener_ns" nft flush ruleset
ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
- echo "SKIP: $msg, ip route add local $local_addr failed"
+ mptcp_lib_pr_skip "$msg, ip route add local $local_addr failed"
mptcp_lib_fail_if_expected_feature "ip route"
mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
- echo "INFO: test $msg"
+ mptcp_lib_pr_info "test $msg"
- TEST_COUNT=10000
+ port=$((20000 - 1))
local extra_args="-o TRANSPARENT"
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
${connect_addr} ${local_addr} "${extra_args}"
@@ -716,12 +729,12 @@ EOF
ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100
if [ $lret -ne 0 ]; then
- echo "FAIL: $msg, mptcp connection error" 1>&2
+ mptcp_lib_pr_fail "$msg, mptcp connection error"
ret=$lret
return 1
fi
- echo "PASS: $msg"
+ mptcp_lib_pr_info "$msg pass"
return 0
}
@@ -730,7 +743,7 @@ run_tests_peekmode()
local peekmode="$1"
TEST_GROUP="peek mode: ${peekmode}"
- echo "INFO: with peek mode: ${peekmode}"
+ mptcp_lib_pr_info "with peek mode: ${peekmode}"
run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}"
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
}
@@ -740,12 +753,12 @@ run_tests_mptfo()
TEST_GROUP="MPTFO"
if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then
- echo "INFO: TFO not supported by the kernel: SKIP"
+ mptcp_lib_pr_skip "TFO not supported by the kernel"
mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
- echo "INFO: with MPTFO start"
+ mptcp_lib_pr_info "with MPTFO start"
ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1
@@ -757,7 +770,7 @@ run_tests_mptfo()
ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0
- echo "INFO: with MPTFO end"
+ mptcp_lib_pr_info "with MPTFO end"
}
run_tests_disconnect()
@@ -768,7 +781,7 @@ run_tests_disconnect()
TEST_GROUP="full disconnect"
if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then
- echo "INFO: Full disconnect not supported: SKIP"
+ mptcp_lib_pr_skip "Full disconnect not supported"
mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
@@ -781,7 +794,7 @@ run_tests_disconnect()
cin_disconnect="$old_cin"
connect_per_transfer=3
- echo "INFO: disconnect"
+ mptcp_lib_pr_info "disconnect"
run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin"
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin"
@@ -805,10 +818,10 @@ log_if_error()
local msg="$1"
if [ ${ret} -ne 0 ]; then
- echo "FAIL: ${msg}" 1>&2
+ mptcp_lib_pr_fail "${msg}"
final_ret=${ret}
- ret=0
+ ret=${KSFT_PASS}
return ${final_ret}
fi
@@ -830,7 +843,7 @@ check_mptcp_disabled
stop_if_error "The kernel configuration is not valid for MPTCP"
-echo "INFO: validating network environment with pings"
+print_larger_title "Validating network environment with pings"
for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns1" $sender 10.0.1.1
do_ping "$ns1" $sender dead:beef:1::1
@@ -852,12 +865,13 @@ done
mptcp_lib_result_code "${ret}" "ping tests"
stop_if_error "Could not even run ping tests"
+mptcp_lib_pr_ok
[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
-echo -n "INFO: Using loss of $tc_loss "
-test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+tc_info="loss of $tc_loss "
+test "$tc_delay" -gt 0 && tc_info+="delay $tc_delay ms "
-reorder_delay=$(($tc_delay / 4))
+reorder_delay=$((tc_delay / 4))
if [ -z "${tc_reorder}" ]; then
reorder1=$((RANDOM%10))
@@ -866,17 +880,17 @@ if [ -z "${tc_reorder}" ]; then
if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
tc_reorder="reorder ${reorder1}% ${reorder2}%"
- echo -n "$tc_reorder with delay ${reorder_delay}ms "
+ tc_info+="$tc_reorder with delay ${reorder_delay}ms "
fi
elif [ "$tc_reorder" = "0" ];then
tc_reorder=""
elif [ "$reorder_delay" -gt 0 ];then
# reordering requires some delay
tc_reorder="reorder $tc_reorder"
- echo -n "$tc_reorder with delay ${reorder_delay}ms "
+ tc_info+="$tc_reorder with delay ${reorder_delay}ms "
fi
-echo "on ns3eth4"
+mptcp_lib_pr_info "Using ${tc_info}on ns3eth4"
tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 3a5b63026191..5e9211e89825 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -21,19 +21,19 @@ cinfail=""
cinsent=""
tmpfile=""
cout=""
+err=""
capout=""
ns1=""
ns2=""
-ksft_skip=4
iptables="iptables"
ip6tables="ip6tables"
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
-capture=0
-checksum=0
+capture=false
+checksum=false
ip_mptcp=0
check_invert=0
-validate_checksum=0
+validate_checksum=false
init=0
evts_ns1=""
evts_ns2=""
@@ -47,7 +47,7 @@ declare -A all_tests
declare -a only_tests_ids
declare -a only_tests_names
declare -A failed_tests
-TEST_COUNT=0
+MPTCP_LIB_TEST_FORMAT="%03u %s\n"
TEST_NAME=""
nr_blank=6
@@ -85,22 +85,12 @@ init_partial()
{
capout=$(mktemp)
- local sec rndh
- sec=$(date +%s)
- rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-
- ns1="ns1-$rndh"
- ns2="ns2-$rndh"
+ mptcp_lib_ns_init ns1 ns2
local netns
for netns in "$ns1" "$ns2"; do
- ip netns add $netns || exit $ksft_skip
- ip -net $netns link set lo up
- ip netns exec $netns sysctl -q net.mptcp.enabled=1
ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true
- ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
- if [ $checksum -eq 1 ]; then
+ if $checksum; then
ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1
fi
done
@@ -144,46 +134,22 @@ cleanup_partial()
{
rm -f "$capout"
- local netns
- for netns in "$ns1" "$ns2"; do
- ip netns del $netns
- rm -f /tmp/$netns.{nstat,out}
- done
-}
-
-check_tools()
-{
- mptcp_lib_check_mptcp
- mptcp_lib_check_kallsyms
-
- if ! ip -Version &> /dev/null; then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
- fi
-
- # Use the legacy version if available to support old kernel versions
- if iptables-legacy -V &> /dev/null; then
- iptables="iptables-legacy"
- ip6tables="ip6tables-legacy"
- elif ! iptables -V &> /dev/null; then
- echo "SKIP: Could not run all tests without iptables tool"
- exit $ksft_skip
- elif ! ip6tables -V &> /dev/null; then
- echo "SKIP: Could not run all tests without ip6tables tool"
- exit $ksft_skip
- fi
+ mptcp_lib_ns_exit "${ns1}" "${ns2}"
}
init() {
init=1
- check_tools
+ mptcp_lib_check_mptcp
+ mptcp_lib_check_kallsyms
+ mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}"
sin=$(mktemp)
sout=$(mktemp)
cin=$(mktemp)
cinsent=$(mktemp)
cout=$(mktemp)
+ err=$(mktemp)
evts_ns1=$(mktemp)
evts_ns2=$(mktemp)
@@ -199,14 +165,10 @@ cleanup()
rm -f "$sin" "$sout" "$cinsent" "$cinfail"
rm -f "$tmpfile"
rm -rf $evts_ns1 $evts_ns2
+ rm -f "$err"
cleanup_partial
}
-print_title()
-{
- printf "%03u %s\n" "${TEST_COUNT}" "${TEST_NAME}"
-}
-
print_check()
{
printf "%-${nr_blank}s%-36s" " " "${*}"
@@ -222,17 +184,17 @@ print_info()
print_ok()
{
- mptcp_lib_print_ok "[ ok ]${1:+ ${*}}"
+ mptcp_lib_pr_ok "${@}"
}
print_fail()
{
- mptcp_lib_print_err "[fail]${1:+ ${*}}"
+ mptcp_lib_pr_fail "${@}"
}
print_skip()
{
- mptcp_lib_print_warn "[skip]${1:+ ${*}}"
+ mptcp_lib_pr_skip "${@}"
}
# [ $1: fail msg ]
@@ -265,7 +227,7 @@ skip_test()
local i
for i in "${only_tests_ids[@]}"; do
- if [ "${TEST_COUNT}" -eq "${i}" ]; then
+ if [ "$((MPTCP_LIB_TEST_COUNTER+1))" -eq "${i}" ]; then
return 1
fi
done
@@ -300,14 +262,13 @@ reset()
TEST_NAME="${1}"
- TEST_COUNT=$((TEST_COUNT+1))
-
if skip_test; then
+ MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1))
last_test_ignored=1
return 1
fi
- print_title
+ mptcp_lib_print_title "${TEST_NAME}"
if [ "${init}" != "1" ]; then
init
@@ -380,7 +341,7 @@ reset_with_checksum()
ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
- validate_checksum=1
+ validate_checksum=true
}
reset_with_allow_join_id0()
@@ -413,7 +374,7 @@ reset_with_allow_join_id0()
setup_fail_rules()
{
check_invert=1
- validate_checksum=1
+ validate_checksum=true
local i="$1"
local ip="${2:-4}"
local tables
@@ -430,15 +391,15 @@ setup_fail_rules()
-p tcp \
-m length --length 150:9999 \
-m statistic --mode nth --packet 1 --every 99999 \
- -j MARK --set-mark 42 || return ${ksft_skip}
+ -j MARK --set-mark 42 || return ${KSFT_SKIP}
- tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${ksft_skip}
+ tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${KSFT_SKIP}
tc -n $ns2 filter add dev ns2eth$i egress \
protocol ip prio 1000 \
handle 42 fw \
action pedit munge offset 148 u8 invert \
pipe csum tcp \
- index 100 || return ${ksft_skip}
+ index 100 || return ${KSFT_SKIP}
}
reset_with_fail()
@@ -452,7 +413,7 @@ reset_with_fail()
local rc=0
setup_fail_rules "${@}" || rc=$?
- if [ ${rc} -eq ${ksft_skip} ]; then
+ if [ ${rc} -eq ${KSFT_SKIP} ]; then
mark_as_skipped "unable to set the 'fail' rules"
return 1
fi
@@ -462,12 +423,8 @@ reset_with_events()
{
reset "${1}" || return 1
- :> "$evts_ns1"
- :> "$evts_ns2"
- ip netns exec $ns1 ./pm_nl_ctl events >> "$evts_ns1" 2>&1 &
- evts_ns1_pid=$!
- ip netns exec $ns2 ./pm_nl_ctl events >> "$evts_ns2" 2>&1 &
- evts_ns2_pid=$!
+ mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
+ mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
}
reset_with_tcp_filter()
@@ -492,13 +449,15 @@ reset_with_tcp_filter()
# $1: err msg
fail_test()
{
- ret=1
+ ret=${KSFT_FAIL}
- print_fail "${@}"
+ if [ ${#} -gt 0 ]; then
+ print_fail "${@}"
+ fi
# just in case a test is marked twice as failed
if [ ${last_test_failed} -eq 0 ]; then
- failed_tests[${TEST_COUNT}]="${TEST_NAME}"
+ failed_tests[${MPTCP_LIB_TEST_COUNTER}]="${TEST_NAME}"
dump_stats
last_test_failed=1
fi
@@ -640,14 +599,9 @@ wait_mpj()
kill_events_pids()
{
mptcp_lib_kill_wait $evts_ns1_pid
+ evts_ns1_pid=0
mptcp_lib_kill_wait $evts_ns2_pid
-}
-
-kill_tests_wait()
-{
- #shellcheck disable=SC2046
- kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1)
- wait
+ evts_ns2_pid=0
}
pm_nl_set_limits()
@@ -801,18 +755,18 @@ pm_nl_check_endpoint()
line="${line% }"
# the dump order is: address id flags port dev
[ -n "$addr" ] && expected_line="$addr"
- expected_line="$expected_line $id"
- [ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}"
- [ -n "$dev" ] && expected_line="$expected_line $dev"
- [ -n "$port" ] && expected_line="$expected_line $port"
+ expected_line+=" $id"
+ [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}"
+ [ -n "$dev" ] && expected_line+=" $dev"
+ [ -n "$port" ] && expected_line+=" $port"
else
line=$(ip netns exec $ns ./pm_nl_ctl get $_id)
# the dump order is: id flags dev address port
expected_line="$id"
- [ -n "$flags" ] && expected_line="$expected_line $flags"
- [ -n "$dev" ] && expected_line="$expected_line $dev"
- [ -n "$addr" ] && expected_line="$expected_line $addr"
- [ -n "$_port" ] && expected_line="$expected_line $_port"
+ [ -n "$flags" ] && expected_line+=" $flags"
+ [ -n "$dev" ] && expected_line+=" $dev"
+ [ -n "$addr" ] && expected_line+=" $addr"
+ [ -n "$_port" ] && expected_line+=" $_port"
fi
if [ "$line" = "$expected_line" ]; then
print_ok
@@ -1014,7 +968,7 @@ do_transfer()
local srv_proto="$4"
local connect_addr="$5"
- local port=$((10000 + TEST_COUNT - 1))
+ local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1))
local cappid
local FAILING_LINKS=${FAILING_LINKS:-""}
local fastclose=${fastclose:-""}
@@ -1024,7 +978,7 @@ do_transfer()
:> "$sout"
:> "$capout"
- if [ $capture -eq 1 ]; then
+ if $capture; then
local capuser
if [ -z $SUDO_USER ] ; then
capuser=""
@@ -1032,9 +986,9 @@ do_transfer()
capuser="-Z $SUDO_USER"
fi
- capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}")
+ capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}")
- echo "Capturing traffic for test $TEST_COUNT into $capfile"
+ echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile"
ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
cappid=$!
@@ -1126,7 +1080,7 @@ do_transfer()
wait $spid
local rets=$?
- if [ $capture -eq 1 ]; then
+ if $capture; then
sleep 1
kill $cappid
fi
@@ -1263,7 +1217,7 @@ chk_csum_nr()
print_check "sum"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
if [ "$count" != "$csum_ns1" ]; then
- extra_msg="$extra_msg ns1=$count"
+ extra_msg+=" ns1=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1276,7 +1230,7 @@ chk_csum_nr()
print_check "csum"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
if [ "$count" != "$csum_ns2" ]; then
- extra_msg="$extra_msg ns2=$count"
+ extra_msg+=" ns2=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1320,7 +1274,7 @@ chk_fail_nr()
print_check "ftx"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
if [ "$count" != "$fail_tx" ]; then
- extra_msg="$extra_msg,tx=$count"
+ extra_msg+=",tx=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1334,7 +1288,7 @@ chk_fail_nr()
print_check "failrx"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
if [ "$count" != "$fail_rx" ]; then
- extra_msg="$extra_msg,rx=$count"
+ extra_msg+=",rx=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1369,7 +1323,7 @@ chk_fclose_nr()
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_tx" ]; then
- extra_msg="$extra_msg,tx=$count"
+ extra_msg+=",tx=$count"
fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
else
print_ok
@@ -1380,7 +1334,7 @@ chk_fclose_nr()
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_rx" ]; then
- extra_msg="$extra_msg,rx=$count"
+ extra_msg+=",rx=$count"
fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
else
print_ok
@@ -1514,7 +1468,7 @@ chk_join_nr()
else
print_ok
fi
- if [ $validate_checksum -eq 1 ]; then
+ if $validate_checksum; then
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
chk_rst_nr $rst_nr $rst_nr
@@ -1749,7 +1703,7 @@ chk_rm_nr()
count=$((count + cnt))
if [ "$count" != "$rm_subflow_nr" ]; then
suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
- extra_msg="$extra_msg simult"
+ extra_msg+=" simult"
fi
if [ $count -ge "$rm_subflow_nr" ] && \
[ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
@@ -2830,29 +2784,16 @@ backup_tests()
fi
}
-SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
-LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED
-LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED
-
-AF_INET=2
-AF_INET6=10
-
verify_listener_events()
{
- local evt=$1
local e_type=$2
- local e_family=$3
local e_saddr=$4
local e_sport=$5
- local type
- local family
- local saddr
- local sport
local name
- if [ $e_type = $LISTENER_CREATED ]; then
+ if [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CREATED ]; then
name="LISTENER_CREATED"
- elif [ $e_type = $LISTENER_CLOSED ]; then
+ elif [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CLOSED ]; then
name="LISTENER_CLOSED "
else
name="$e_type"
@@ -2865,23 +2806,11 @@ verify_listener_events()
return
fi
- type=$(mptcp_lib_evts_get_info type "$evt" "$e_type")
- family=$(mptcp_lib_evts_get_info family "$evt" "$e_type")
- sport=$(mptcp_lib_evts_get_info sport "$evt" "$e_type")
- if [ $family ] && [ $family = $AF_INET6 ]; then
- saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" "$e_type")
- else
- saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" "$e_type")
- fi
-
- if [ $type ] && [ $type = $e_type ] &&
- [ $family ] && [ $family = $e_family ] &&
- [ $saddr ] && [ $saddr = $e_saddr ] &&
- [ $sport ] && [ $sport = $e_sport ]; then
+ if mptcp_lib_verify_listener_events "${@}"; then
print_ok
return 0
fi
- fail_test "$e_type:$type $e_family:$family $e_saddr:$saddr $e_sport:$sport"
+ fail_test
}
add_addr_ports_tests()
@@ -2919,8 +2848,10 @@ add_addr_ports_tests()
chk_add_nr 1 1 1
chk_rm_nr 1 1 invert
- verify_listener_events $evts_ns1 $LISTENER_CREATED $AF_INET 10.0.2.1 10100
- verify_listener_events $evts_ns1 $LISTENER_CLOSED $AF_INET 10.0.2.1 10100
+ verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CREATED \
+ $MPTCP_LIB_AF_INET 10.0.2.1 10100
+ verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CLOSED \
+ $MPTCP_LIB_AF_INET 10.0.2.1 10100
kill_events_pids
fi
@@ -3340,16 +3271,17 @@ userspace_pm_rm_sf()
{
local evts=$evts_ns1
local t=${3:-1}
- local ip=4
+ local ip
local tk da dp sp
local cnt
[ "$1" == "$ns2" ] && evts=$evts_ns2
- if mptcp_lib_is_v6 $2; then ip=6; fi
+ [ -n "$(mptcp_lib_evts_get_info "saddr4" "$evts" $t)" ] && ip=4
+ [ -n "$(mptcp_lib_evts_get_info "saddr6" "$evts" $t)" ] && ip=6
tk=$(mptcp_lib_evts_get_info token "$evts")
- da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t)
- dp=$(mptcp_lib_evts_get_info dport "$evts" $t)
- sp=$(mptcp_lib_evts_get_info sport "$evts" $t)
+ da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t $2)
+ dp=$(mptcp_lib_evts_get_info dport "$evts" $t $2)
+ sp=$(mptcp_lib_evts_get_info sport "$evts" $t $2)
cnt=$(rm_sf_count ${1})
ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \
@@ -3357,6 +3289,77 @@ userspace_pm_rm_sf()
wait_rm_sf $1 "${cnt}"
}
+check_output()
+{
+ local cmd="$1"
+ local expected="$2"
+ local msg="$3"
+ local rc=0
+
+ mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
+ if [ ${rc} -eq 2 ]; then
+ fail_test "fail to check output # error ${rc}"
+ elif [ ${rc} -eq 0 ]; then
+ print_ok
+ elif [ ${rc} -eq 1 ]; then
+ fail_test "fail to check output # different output"
+ fi
+}
+
+# $1: ns
+userspace_pm_dump()
+{
+ local evts=$evts_ns1
+ local tk
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+
+ ip netns exec $1 ./pm_nl_ctl dump token $tk
+}
+
+# $1: ns ; $2: id
+userspace_pm_get_addr()
+{
+ local evts=$evts_ns1
+ local tk
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+
+ ip netns exec $1 ./pm_nl_ctl get $2 token $tk
+}
+
+userspace_pm_chk_dump_addr()
+{
+ local ns="${1}"
+ local exp="${2}"
+ local check="${3}"
+
+ print_check "dump addrs ${check}"
+
+ if mptcp_lib_kallsyms_has "mptcp_userspace_pm_dump_addr$"; then
+ check_output "userspace_pm_dump ${ns}" "${exp}"
+ else
+ print_skip
+ fi
+}
+
+userspace_pm_chk_get_addr()
+{
+ local ns="${1}"
+ local id="${2}"
+ local exp="${3}"
+
+ print_check "get id ${id} addr"
+
+ if mptcp_lib_kallsyms_has "mptcp_userspace_pm_get_addr$"; then
+ check_output "userspace_pm_get_addr ${ns} ${id}" "${exp}"
+ else
+ print_skip
+ fi
+}
+
userspace_tests()
{
# userspace pm type prevents add_addr
@@ -3436,24 +3439,35 @@ userspace_tests()
if reset_with_events "userspace pm add & remove address" &&
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
- pm_nl_set_limits $ns2 1 1
+ pm_nl_set_limits $ns2 2 2
speed=5 \
run_tests $ns1 $ns2 10.0.1.1 &
local tests_pid=$!
wait_mpj $ns1
userspace_pm_add_addr $ns1 10.0.2.1 10
- chk_join_nr 1 1 1
- chk_add_nr 1 1
- chk_mptcp_info subflows 1 subflows 1
- chk_subflows_total 2 2
- chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
+ userspace_pm_add_addr $ns1 10.0.3.1 20
+ chk_join_nr 2 2 2
+ chk_add_nr 2 2
+ chk_mptcp_info subflows 2 subflows 2
+ chk_subflows_total 3 3
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
+ userspace_pm_chk_dump_addr "${ns1}" \
+ $'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \
+ "signal"
+ userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1"
+ userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1"
userspace_pm_rm_addr $ns1 10
- userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED
- chk_rm_nr 1 1 invert
+ userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+ userspace_pm_chk_dump_addr "${ns1}" \
+ "id 20 flags signal 10.0.3.1" "after rm_addr 10"
+ userspace_pm_rm_addr $ns1 20
+ userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+ userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20"
+ chk_rm_nr 2 2 invert
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
- wait $tests_pid
+ mptcp_lib_kill_wait $tests_pid
fi
# userspace pm create destroy subflow
@@ -3469,13 +3483,20 @@ userspace_tests()
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
+ userspace_pm_chk_dump_addr "${ns2}" \
+ "id 20 flags subflow 10.0.3.2" \
+ "subflow"
+ userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2"
userspace_pm_rm_addr $ns2 20
- userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED
+ userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+ userspace_pm_chk_dump_addr "${ns2}" \
+ "" \
+ "after rm_addr 20"
chk_rm_nr 1 1
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
- wait $tests_pid
+ mptcp_lib_kill_wait $tests_pid
fi
# userspace pm create id 0 subflow
@@ -3490,11 +3511,13 @@ userspace_tests()
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
userspace_pm_add_sf $ns2 10.0.3.2 0
+ userspace_pm_chk_dump_addr "${ns2}" \
+ "id 0 flags subflow 10.0.3.2" "id 0 subflow"
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
kill_events_pids
- wait $tests_pid
+ mptcp_lib_kill_wait $tests_pid
fi
# userspace pm remove initial subflow
@@ -3518,7 +3541,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 1 1
kill_events_pids
- wait $tests_pid
+ mptcp_lib_kill_wait $tests_pid
fi
# userspace pm send RM_ADDR for ID 0
@@ -3544,7 +3567,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 1 1
kill_events_pids
- wait $tests_pid
+ mptcp_lib_kill_wait $tests_pid
fi
}
@@ -3558,7 +3581,8 @@ endpoint_tests()
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
speed=slow \
- run_tests $ns1 $ns2 10.0.1.1 2>/dev/null &
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
wait_mpj $ns1
pm_nl_check_endpoint "creation" \
@@ -3573,7 +3597,7 @@ endpoint_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
pm_nl_check_endpoint "modif is allowed" \
$ns2 10.0.2.2 id 1 flags signal
- kill_tests_wait
+ mptcp_lib_kill_wait $tests_pid
fi
if reset "delete and re-add" &&
@@ -3582,7 +3606,8 @@ endpoint_tests()
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
test_linkfail=4 speed=20 \
- run_tests $ns1 $ns2 10.0.1.1 2>/dev/null &
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
wait_mpj $ns2
chk_subflow_nr "before delete" 2
@@ -3597,7 +3622,7 @@ endpoint_tests()
wait_mpj $ns2
chk_subflow_nr "after re-add" 2
chk_mptcp_info subflows 1 subflows 1
- kill_tests_wait
+ mptcp_lib_kill_wait $tests_pid
fi
}
@@ -3606,7 +3631,7 @@ usage()
{
if [ -n "${1}" ]; then
echo "${1}"
- ret=1
+ ret=${KSFT_FAIL}
fi
echo "mptcp_join usage:"
@@ -3669,10 +3694,10 @@ while getopts "${all_tests_args}cCih" opt; do
tests+=("${all_tests[${opt}]}")
;;
c)
- capture=1
+ capture=true
;;
C)
- checksum=1
+ checksum=true
;;
i)
ip_mptcp=1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 022262a2cfe0..d529b4b37af8 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -6,9 +6,23 @@ readonly KSFT_FAIL=1
readonly KSFT_SKIP=4
# shellcheck disable=SC2155 # declare and assign separately
-readonly KSFT_TEST=$(basename "${0}" | sed 's/\.sh$//g')
+readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}"
+
+# These variables are used in some selftests, read-only
+declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED
+declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED
+declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
+declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED
+declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED
+declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED
+
+declare -rx MPTCP_LIB_AF_INET=2
+declare -rx MPTCP_LIB_AF_INET6=10
MPTCP_LIB_SUBTESTS=()
+MPTCP_LIB_SUBTESTS_DUPLICATED=0
+MPTCP_LIB_TEST_COUNTER=0
+MPTCP_LIB_TEST_FORMAT="%02u %-50s"
# only if supported (or forced) and not disabled, see no-color.org
if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } &&
@@ -47,6 +61,23 @@ mptcp_lib_print_err() {
mptcp_lib_print_color "${MPTCP_LIB_COLOR_RED}${*}"
}
+# shellcheck disable=SC2120 # parameters are optional
+mptcp_lib_pr_ok() {
+ mptcp_lib_print_ok "[ OK ]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_skip() {
+ mptcp_lib_print_warn "[SKIP]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_fail() {
+ mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_info() {
+ mptcp_lib_print_info "INFO: ${*}"
+}
+
# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
# features using the last version of the kernel and the selftests to make sure
# a test is not being skipped by mistake.
@@ -77,14 +108,14 @@ mptcp_lib_has_file() {
mptcp_lib_check_mptcp() {
if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then
- echo "SKIP: MPTCP support is not available"
+ mptcp_lib_pr_skip "MPTCP support is not available"
exit ${KSFT_SKIP}
fi
}
mptcp_lib_check_kallsyms() {
if ! mptcp_lib_has_file "/proc/kallsyms"; then
- echo "SKIP: CONFIG_KALLSYMS is missing"
+ mptcp_lib_pr_skip "CONFIG_KALLSYMS is missing"
exit ${KSFT_SKIP}
fi
}
@@ -146,12 +177,26 @@ mptcp_lib_kversion_ge() {
mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
}
+__mptcp_lib_result_check_duplicated() {
+ local subtest
+
+ for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do
+ if [[ "${subtest}" == *" - ${KSFT_TEST}: ${*%% #*}" ]]; then
+ MPTCP_LIB_SUBTESTS_DUPLICATED=1
+ mptcp_lib_print_err "Duplicated entry: ${*}"
+ break
+ fi
+ done
+}
+
__mptcp_lib_result_add() {
local result="${1}"
shift
local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1))
+ __mptcp_lib_result_check_duplicated "${*}"
+
MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}")
}
@@ -206,6 +251,12 @@ mptcp_lib_result_print_all_tap() {
for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do
printf "%s\n" "${subtest}"
done
+
+ if [ "${MPTCP_LIB_SUBTESTS_DUPLICATED}" = 1 ] &&
+ mptcp_lib_expect_all_features; then
+ mptcp_lib_print_err "Duplicated test entries"
+ exit ${KSFT_FAIL}
+ fi
}
# get the value of keyword $1 in the line marked by keyword $2
@@ -213,9 +264,9 @@ mptcp_lib_get_info_value() {
grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
}
-# $1: info name ; $2: evts_ns ; $3: event type
+# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
mptcp_lib_evts_get_info() {
- mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}"
+ grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
}
# $1: PID
@@ -271,7 +322,7 @@ mptcp_lib_check_transfer() {
local what="${3}"
if ! cmp "$in" "$out" > /dev/null 2>&1; then
- echo "[ FAIL ] $what does not match (in, out):"
+ mptcp_lib_pr_fail "$what does not match (in, out):"
mptcp_lib_print_file_err "$in"
mptcp_lib_print_file_err "$out"
@@ -298,3 +349,159 @@ mptcp_lib_wait_local_port_listen() {
sleep 0.1
done
}
+
+mptcp_lib_check_output() {
+ local err="${1}"
+ local cmd="${2}"
+ local expected="${3}"
+ local cmd_ret=0
+ local out
+
+ if ! out=$(${cmd} 2>"${err}"); then
+ cmd_ret=${?}
+ fi
+
+ if [ ${cmd_ret} -ne 0 ]; then
+ mptcp_lib_pr_fail "command execution '${cmd}' stderr"
+ cat "${err}"
+ return 2
+ elif [ "${out}" = "${expected}" ]; then
+ return 0
+ else
+ mptcp_lib_pr_fail "expected '${expected}' got '${out}'"
+ return 1
+ fi
+}
+
+mptcp_lib_check_tools() {
+ local tool
+
+ for tool in "${@}"; do
+ case "${tool}" in
+ "ip")
+ if ! ip -Version &> /dev/null; then
+ mptcp_lib_pr_skip "Could not run test without ip tool"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
+ "ss")
+ if ! ss -h | grep -q MPTCP; then
+ mptcp_lib_pr_skip "ss tool does not support MPTCP"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
+ "iptables"* | "ip6tables"*)
+ if ! "${tool}" -V &> /dev/null; then
+ mptcp_lib_pr_skip "Could not run all tests without ${tool}"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
+ *)
+ mptcp_lib_pr_fail "Internal error: unsupported tool: ${tool}"
+ exit ${KSFT_FAIL}
+ ;;
+ esac
+ done
+}
+
+mptcp_lib_ns_init() {
+ local sec rndh
+
+ sec=$(date +%s)
+ rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX)
+
+ local netns
+ for netns in "${@}"; do
+ eval "${netns}=${netns}-${rndh}"
+
+ ip netns add "${!netns}" || exit ${KSFT_SKIP}
+ ip -net "${!netns}" link set lo up
+ ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
+ ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
+ done
+}
+
+mptcp_lib_ns_exit() {
+ local netns
+ for netns in "${@}"; do
+ ip netns del "${netns}"
+ rm -f /tmp/"${netns}".{nstat,out}
+ done
+}
+
+mptcp_lib_events() {
+ local ns="${1}"
+ local evts="${2}"
+ declare -n pid="${3}"
+
+ :>"${evts}"
+
+ mptcp_lib_kill_wait "${pid:-0}"
+ ip netns exec "${ns}" ./pm_nl_ctl events >> "${evts}" 2>&1 &
+ pid=$!
+}
+
+mptcp_lib_print_title() {
+ : "${MPTCP_LIB_TEST_COUNTER:?}"
+ : "${MPTCP_LIB_TEST_FORMAT:?}"
+
+ # shellcheck disable=SC2059 # the format is in a variable
+ printf "${MPTCP_LIB_TEST_FORMAT}" "$((++MPTCP_LIB_TEST_COUNTER))" "${*}"
+}
+
+# $1: var name ; $2: prev ret
+mptcp_lib_check_expected_one() {
+ local var="${1}"
+ local exp="e_${var}"
+ local prev_ret="${2}"
+
+ if [ "${!var}" = "${!exp}" ]; then
+ return 0
+ fi
+
+ if [ "${prev_ret}" = "0" ]; then
+ mptcp_lib_pr_fail
+ fi
+
+ mptcp_lib_print_err "Expected value for '${var}': '${!exp}', got '${!var}'."
+ return 1
+}
+
+# $@: all var names to check
+mptcp_lib_check_expected() {
+ local rc=0
+ local var
+
+ for var in "${@}"; do
+ mptcp_lib_check_expected_one "${var}" "${rc}" || rc=1
+ done
+
+ return "${rc}"
+}
+
+# shellcheck disable=SC2034 # Some variables are used below but indirectly
+mptcp_lib_verify_listener_events() {
+ local evt=${1}
+ local e_type=${2}
+ local e_family=${3}
+ local e_saddr=${4}
+ local e_sport=${5}
+ local type
+ local family
+ local saddr
+ local sport
+ local rc=0
+
+ type=$(mptcp_lib_evts_get_info type "${evt}" "${e_type}")
+ family=$(mptcp_lib_evts_get_info family "${evt}" "${e_type}")
+ if [ "${family}" ] && [ "${family}" = "${AF_INET6}" ]; then
+ saddr=$(mptcp_lib_evts_get_info saddr6 "${evt}" "${e_type}")
+ else
+ saddr=$(mptcp_lib_evts_get_info saddr4 "${evt}" "${e_type}")
+ fi
+ sport=$(mptcp_lib_evts_get_info sport "${evt}" "${e_type}")
+
+ mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}"
+ return "${rc}"
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index c643872ddf47..e2d70c18786e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -1,6 +1,11 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"
ret=0
@@ -8,17 +13,14 @@ sin=""
sout=""
cin=""
cout=""
-ksft_skip=4
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
iptables="iptables"
ip6tables="ip6tables"
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns_sbox="ns_sbox-$rndh"
+ns1=""
+ns2=""
+ns_sbox=""
add_mark_rules()
{
@@ -40,17 +42,10 @@ add_mark_rules()
init()
{
- local netns
- for netns in "$ns1" "$ns2" "$ns_sbox";do
- ip netns add $netns || exit $ksft_skip
- ip -net $netns link set lo up
- ip netns exec $netns sysctl -q net.mptcp.enabled=1
- ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
- done
+ mptcp_lib_ns_init ns1 ns2 ns_sbox
local i
- for i in `seq 1 4`; do
+ for i in $(seq 1 4); do
ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
@@ -77,36 +72,18 @@ init()
add_mark_rules $ns2 2
}
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
- local netns
- for netns in "$ns1" "$ns2" "$ns_sbox"; do
- ip netns del $netns
- done
+ mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}"
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
}
mptcp_lib_check_mptcp
mptcp_lib_check_kallsyms
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-# Use the legacy version if available to support old kernel versions
-if iptables-legacy -V &> /dev/null; then
- iptables="iptables-legacy"
- ip6tables="ip6tables-legacy"
-elif ! iptables -V &> /dev/null; then
- echo "SKIP: Could not run all tests without iptables tool"
- exit $ksft_skip
-elif ! ip6tables -V &> /dev/null; then
- echo "SKIP: Could not run all tests without ip6tables tool"
- exit $ksft_skip
-fi
+mptcp_lib_check_tools ip "${iptables}" "${ip6tables}"
check_mark()
{
@@ -126,8 +103,9 @@ check_mark()
local v
for v in $values; do
if [ $v -ne 0 ]; then
- echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2
- ret=1
+ mptcp_lib_pr_fail "got $tables $values in ns $ns," \
+ "not 0 - not all expected packets marked"
+ ret=${KSFT_FAIL}
return 1
fi
done
@@ -135,6 +113,11 @@ check_mark()
return 0
}
+print_title()
+{
+ mptcp_lib_print_title "${@}"
+}
+
do_transfer()
{
local listener_ns="$1"
@@ -184,8 +167,9 @@ do_transfer()
wait $spid
local rets=$?
+ print_title "Transfer ${ip:2}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
- echo " client exit code $retc, server $rets" 1>&2
+ mptcp_lib_pr_fail "client exit code $retc, server $rets"
echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
@@ -194,10 +178,17 @@ do_transfer()
mptcp_lib_result_fail "transfer ${ip}"
- ret=1
+ ret=${KSFT_FAIL}
return 1
fi
+ if ! mptcp_lib_check_transfer $cin $sout "file received by server"; then
+ rets=1
+ else
+ mptcp_lib_pr_ok
+ fi
+ mptcp_lib_result_code "${rets}" "transfer ${ip}"
+ print_title "Mark ${ip:2}"
if [ $local_addr = "::" ];then
check_mark $listener_ns 6 || retc=1
check_mark $connector_ns 6 || retc=1
@@ -206,15 +197,13 @@ do_transfer()
check_mark $connector_ns 4 || retc=1
fi
- mptcp_lib_check_transfer $cin $sout "file received by server"
- rets=$?
-
mptcp_lib_result_code "${retc}" "mark ${ip}"
- mptcp_lib_result_code "${rets}" "transfer ${ip}"
if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
+ mptcp_lib_pr_ok
return 0
fi
+ mptcp_lib_pr_fail
return 1
}
@@ -235,7 +224,7 @@ do_mptcp_sockopt_tests()
local lret=0
if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then
- echo "INFO: MPTCP sockopt not supported: SKIP"
+ mptcp_lib_pr_skip "MPTCP sockopt not supported"
mptcp_lib_result_skip "sockopt"
return
fi
@@ -243,23 +232,27 @@ do_mptcp_sockopt_tests()
ip netns exec "$ns_sbox" ./mptcp_sockopt
lret=$?
+ print_title "SOL_MPTCP sockopt v4"
if [ $lret -ne 0 ]; then
- echo "FAIL: SOL_MPTCP getsockopt" 1>&2
+ mptcp_lib_pr_fail
mptcp_lib_result_fail "sockopt v4"
ret=$lret
return
fi
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "sockopt v4"
ip netns exec "$ns_sbox" ./mptcp_sockopt -6
lret=$?
+ print_title "SOL_MPTCP sockopt v6"
if [ $lret -ne 0 ]; then
- echo "FAIL: SOL_MPTCP getsockopt (ipv6)" 1>&2
+ mptcp_lib_pr_fail
mptcp_lib_result_fail "sockopt v6"
ret=$lret
return
fi
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "sockopt v6"
}
@@ -282,16 +275,17 @@ run_tests()
do_tcpinq_test()
{
+ print_title "TCP_INQ cmsg/ioctl $*"
ip netns exec "$ns_sbox" ./mptcp_inq "$@"
local lret=$?
if [ $lret -ne 0 ];then
ret=$lret
- echo "FAIL: mptcp_inq $@" 1>&2
+ mptcp_lib_pr_fail
mptcp_lib_result_fail "TCP_INQ: $*"
return $lret
fi
- echo "PASS: TCP_INQ cmsg/ioctl $@"
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "TCP_INQ: $*"
return $lret
}
@@ -301,7 +295,7 @@ do_tcpinq_tests()
local lret=0
if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then
- echo "INFO: TCP_INQ not supported: SKIP"
+ mptcp_lib_pr_skip "TCP_INQ not supported"
mptcp_lib_result_skip "TCP_INQ"
return
fi
@@ -337,15 +331,7 @@ trap cleanup EXIT
run_tests $ns1 $ns2 10.0.1.1
run_tests $ns1 $ns2 dead:beef:1::1
-if [ $ret -eq 0 ];then
- echo "PASS: all packets had packet mark set"
-fi
-
do_mptcp_sockopt_tests
-if [ $ret -eq 0 ];then
- echo "PASS: SOL_MPTCP getsockopt has expected information"
-fi
-
do_tcpinq_tests
mptcp_lib_result_print_all_tap
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 8f4ff123a7eb..6ab8c5d36340 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -1,77 +1,69 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"
-ksft_skip=4
ret=0
usage() {
echo "Usage: $0 [ -h ]"
}
-
+optstring=h
while getopts "$optstring" option;do
case "$option" in
"h")
usage $0
- exit 0
+ exit ${KSFT_PASS}
;;
"?")
usage $0
- exit 1
+ exit ${KSFT_FAIL}
;;
esac
done
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
+ns1=""
err=$(mktemp)
-ret=0
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
rm -f $err
- ip netns del $ns1
+ mptcp_lib_ns_exit "${ns1}"
}
mptcp_lib_check_mptcp
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+mptcp_lib_check_tools ip
trap cleanup EXIT
-ip netns add $ns1 || exit $ksft_skip
-ip -net $ns1 link set lo up
-ip netns exec $ns1 sysctl -q net.mptcp.enabled=1
+mptcp_lib_ns_init ns1
check()
{
local cmd="$1"
local expected="$2"
local msg="$3"
- local out=`$cmd 2>$err`
- local cmd_ret=$?
-
- printf "%-50s" "$msg"
- if [ $cmd_ret -ne 0 ]; then
- echo "[FAIL] command execution '$cmd' stderr "
- cat $err
- mptcp_lib_result_fail "${msg} # error ${cmd_ret}"
- ret=1
- elif [ "$out" = "$expected" ]; then
- echo "[ OK ]"
+ local rc=0
+
+ mptcp_lib_print_title "$msg"
+ mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
+ if [ ${rc} -eq 2 ]; then
+ mptcp_lib_result_fail "${msg} # error ${rc}"
+ ret=${KSFT_FAIL}
+ elif [ ${rc} -eq 0 ]; then
+ mptcp_lib_print_ok "[ OK ]"
mptcp_lib_result_pass "${msg}"
- else
- echo -n "[FAIL] "
- echo "expected '$expected' got '$out'"
+ elif [ ${rc} -eq 1 ]; then
mptcp_lib_result_fail "${msg} # different output"
- ret=1
+ ret=${KSFT_FAIL}
fi
}
@@ -105,14 +97,14 @@ check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal
check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment"
-for i in `seq 5 9`; do
+for i in $(seq 5 9); do
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
done
check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
ip netns exec $ns1 ./pm_nl_ctl del 9
-for i in `seq 10 255`; do
+for i in $(seq 10 255); do
ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
ip netns exec $ns1 ./pm_nl_ctl del $i
done
@@ -183,7 +175,7 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
subflow 10.0.1.1" " (nobackup)"
# fullmesh support has been added later
-ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null
if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" ||
mptcp_lib_expect_all_features; then
check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
@@ -194,6 +186,13 @@ subflow 10.0.1.1" " (nofullmesh)"
ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)"
+else
+ for st in fullmesh nofullmesh backup,fullmesh; do
+ st=" (${st})"
+ mptcp_lib_print_title "${st}"
+ mptcp_lib_pr_skip
+ mptcp_lib_result_skip "${st}"
+ done
fi
mptcp_lib_result_print_all_tap
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 49369c4a5f26..7426a2cbd4a0 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -453,6 +453,7 @@ int csf(int fd, int pm_family, int argc, char *argv[])
char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1024];
+ u_int32_t flags = MPTCP_PM_ADDR_FLAG_SUBFLOW;
const char *params[5];
struct nlmsghdr *nh;
struct rtattr *addr;
@@ -558,6 +559,13 @@ int csf(int fd, int pm_family, int argc, char *argv[])
off += NLMSG_ALIGN(rta->rta_len);
}
+ /* addr flags */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &flags, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
addr->rta_len = off - addr_start;
}
@@ -1079,6 +1087,7 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
1024];
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
+ u_int32_t token = 0;
int nest_start;
u_int8_t id;
int off = 0;
@@ -1089,10 +1098,12 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
MPTCP_PM_VER);
/* the only argument is the address id */
- if (argc != 3)
+ if (argc != 3 && argc != 5)
syntax(argv);
id = atoi(argv[2]);
+ if (argc == 5 && !strcmp(argv[3], "token"))
+ token = strtoul(argv[4], NULL, 10);
nest_start = off;
nest = (void *)(data + off);
@@ -1108,6 +1119,15 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
off += NLMSG_ALIGN(rta->rta_len);
nest->rta_len = off - nest_start;
+ /* token */
+ if (token) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
return 0;
}
@@ -1119,8 +1139,16 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[])
1024];
pid_t pid = getpid();
struct nlmsghdr *nh;
+ u_int32_t token = 0;
+ struct rtattr *rta;
int off = 0;
+ if (argc != 2 && argc != 4)
+ syntax(argv);
+
+ if (argc == 4 && !strcmp(argv[2], "token"))
+ token = strtoul(argv[3], NULL, 10);
+
memset(data, 0, sizeof(data));
nh = (void *)data;
off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR,
@@ -1130,6 +1158,15 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[])
nh->nlmsg_pid = pid;
nh->nlmsg_len = off;
+ /* token */
+ if (token) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
return 0;
}
diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings
index 79b65bdf05db..abc5648b59ab 100644
--- a/tools/testing/selftests/net/mptcp/settings
+++ b/tools/testing/selftests/net/mptcp/settings
@@ -1 +1 @@
-timeout=1200
+timeout=1800
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index ae8ad5d6fb9d..1b2366220388 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -1,21 +1,30 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns3="ns3-$rndh"
+ns1=""
+ns2=""
+ns3=""
capture=false
-ksft_skip=4
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
-test_cnt=1
+# a bit more space: because we have more to display
+MPTCP_LIB_TEST_FORMAT="%02u %-60s"
ret=0
bail=0
slack=50
+large=""
+small=""
+sout=""
+cout=""
+capout=""
+size=0
usage() {
echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
@@ -24,25 +33,19 @@ usage() {
echo -e "\t-d: debug this script"
}
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
rm -f "$cout" "$sout"
rm -f "$large" "$small"
rm -f "$capout"
- local netns
- for netns in "$ns1" "$ns2" "$ns3";do
- ip netns del $netns
- done
+ mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}"
}
mptcp_lib_check_mptcp
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+mptcp_lib_check_tools ip
# "$ns1" ns2 ns3
# ns1eth1 ns2eth1 ns2eth3 ns3eth1
@@ -64,12 +67,7 @@ setup()
trap cleanup EXIT
- for i in "$ns1" "$ns2" "$ns3";do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
- ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0
- done
+ mptcp_lib_ns_init ns1 ns2 ns3
ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
@@ -129,8 +127,7 @@ do_transfer()
local sin=$2
local max_time=$3
local port
- port=$((10000+$test_cnt))
- test_cnt=$((test_cnt+1))
+ port=$((10000+MPTCP_LIB_TEST_COUNTER))
:> "$cout"
:> "$sout"
@@ -138,6 +135,7 @@ do_transfer()
if $capture; then
local capuser
+ local rndh="${ns1:4}"
if [ -z $SUDO_USER ] ; then
capuser=""
else
@@ -189,12 +187,12 @@ do_transfer()
printf "%-16s" " max $max_time "
if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
[ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
- echo "[ OK ]"
+ mptcp_lib_pr_ok
cat "$capout"
return 0
fi
- echo " [ fail ]"
+ mptcp_lib_pr_fail
echo "client exit code $retc, server $rets" 1>&2
echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
@@ -241,7 +239,7 @@ run_test()
# completion (see mptcp_connect): 200ms on each side, add some slack
time=$((time + 400 + slack))
- printf "%-60s" "$msg"
+ mptcp_lib_print_title "$msg"
do_transfer $small $large $time
lret=$?
mptcp_lib_result_code "${lret}" "${msg}"
@@ -250,7 +248,8 @@ run_test()
[ $bail -eq 0 ] || exit $ret
fi
- printf "%-60s" "$msg - reverse direction"
+ msg+=" - reverse direction"
+ mptcp_lib_print_title "${msg}"
do_transfer $large $small $time
lret=$?
mptcp_lib_result_code "${lret}" "${msg}"
@@ -264,7 +263,7 @@ while getopts "bcdh" option;do
case "$option" in
"h")
usage $0
- exit 0
+ exit ${KSFT_PASS}
;;
"b")
bail=1
@@ -277,19 +276,19 @@ while getopts "bcdh" option;do
;;
"?")
usage $0
- exit 1
+ exit ${KSFT_FAIL}
;;
esac
done
setup
run_test 10 10 0 0 "balanced bwidth"
-run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
+run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
# we still need some additional infrastructure to pass the following test-cases
-run_test 30 10 0 0 "unbalanced bwidth"
-run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
-run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
+run_test 10 3 0 0 "unbalanced bwidth"
+run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
+run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"
mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 6167837f48e1..9e2981f2d7f5 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -5,7 +5,7 @@
# code but we accept it.
#shellcheck disable=SC2086
-# Some variables are used below but indirectly, see check_expected_one()
+# Some variables are used below but indirectly, see verify_*_event()
#shellcheck disable=SC2034
. "$(dirname "${0}")/mptcp_lib.sh"
@@ -17,21 +17,17 @@ if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
echo "userspace pm tests are not supported by the kernel: SKIP"
exit ${KSFT_SKIP}
fi
+mptcp_lib_check_tools ip
-if ! ip -Version &> /dev/null; then
- echo "SKIP: Cannot not run test without ip tool"
- exit ${KSFT_SKIP}
-fi
+ANNOUNCED=${MPTCP_LIB_EVENT_ANNOUNCED}
+REMOVED=${MPTCP_LIB_EVENT_REMOVED}
+SUB_ESTABLISHED=${MPTCP_LIB_EVENT_SUB_ESTABLISHED}
+SUB_CLOSED=${MPTCP_LIB_EVENT_SUB_CLOSED}
+LISTENER_CREATED=${MPTCP_LIB_EVENT_LISTENER_CREATED}
+LISTENER_CLOSED=${MPTCP_LIB_EVENT_LISTENER_CLOSED}
-ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED
-REMOVED=7 # MPTCP_EVENT_REMOVED
-SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
-SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED
-LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED
-LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED
-
-AF_INET=2
-AF_INET6=10
+AF_INET=${MPTCP_LIB_AF_INET}
+AF_INET6=${MPTCP_LIB_AF_INET6}
file=""
server_evts=""
@@ -54,20 +50,16 @@ app6_port=50004
client_addr_id=${RANDOM:0:2}
server_addr_id=${RANDOM:0:2}
-sec=$(date +%s)
-rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
+ns1=""
+ns2=""
ret=0
test_name=""
-
-_printf() {
- stdbuf -o0 -e0 printf "${@}"
-}
+# a bit more space: because we have more to display
+MPTCP_LIB_TEST_FORMAT="%02u %-68s"
print_title()
{
- _printf "INFO: %s\n" "${1}"
+ mptcp_lib_pr_info "${1}"
}
# $1: test name
@@ -75,36 +67,29 @@ print_test()
{
test_name="${1}"
- _printf "%-63s" "${test_name}"
-}
-
-print_results()
-{
- _printf "[%s]\n" "${1}"
+ mptcp_lib_print_title "${test_name}"
}
test_pass()
{
- print_results " OK "
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "${test_name}"
}
test_skip()
{
- print_results "SKIP"
+ mptcp_lib_pr_skip
mptcp_lib_result_skip "${test_name}"
}
# $1: msg
test_fail()
{
- print_results "FAIL"
- ret=1
-
- if [ -n "${1}" ]; then
- _printf "\t%s\n" "${1}"
+ if [ ${#} -gt 0 ]
+ then
+ mptcp_lib_pr_fail "${@}"
fi
-
+ ret=${KSFT_FAIL}
mptcp_lib_result_fail "${test_name}"
}
@@ -122,23 +107,18 @@ cleanup()
mptcp_lib_kill_wait $pid
done
- local netns
- for netns in "$ns1" "$ns2" ;do
- ip netns del "$netns"
- done
+ mptcp_lib_ns_exit "${ns1}" "${ns2}"
rm -rf $file $client_evts $server_evts
- _printf "Done\n"
+ mptcp_lib_pr_info "Done"
}
trap cleanup EXIT
# Create and configure network namespaces for testing
+mptcp_lib_ns_init ns1 ns2
for i in "$ns1" "$ns2" ;do
- ip netns add "$i" || exit 1
- ip -net "$i" link set lo up
- ip netns exec "$i" sysctl -q net.mptcp.enabled=1
ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
done
@@ -160,17 +140,23 @@ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
ip -net "$ns2" link set ns2eth1 up
+file=$(mktemp)
+mptcp_lib_make_file "$file" 2 1
+
+# Capture netlink events over the two network namespaces running
+# the MPTCP client and server
+client_evts=$(mktemp)
+mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid
+server_evts=$(mktemp)
+mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
+sleep 0.5
+
print_title "Init"
print_test "Created network namespaces ns1, ns2"
test_pass
make_connection()
{
- if [ -z "$file" ]; then
- file=$(mktemp)
- fi
- mptcp_lib_make_file "$file" 2 1
-
local is_v6=$1
local app_port=$app4_port
local connect_addr="10.0.1.1"
@@ -184,27 +170,8 @@ make_connection()
is_v6="v4"
fi
- # Capture netlink events over the two network namespaces running
- # the MPTCP client and server
- if [ -z "$client_evts" ]; then
- client_evts=$(mktemp)
- fi
:>"$client_evts"
- if [ $client_evts_pid -ne 0 ]; then
- mptcp_lib_kill_wait $client_evts_pid
- fi
- ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 &
- client_evts_pid=$!
- if [ -z "$server_evts" ]; then
- server_evts=$(mktemp)
- fi
:>"$server_evts"
- if [ $server_evts_pid -ne 0 ]; then
- mptcp_lib_kill_wait $server_evts_pid
- fi
- ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 &
- server_evts_pid=$!
- sleep 0.5
# Run the server
ip netns exec "$ns1" \
@@ -242,7 +209,7 @@ make_connection()
else
test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
mptcp_lib_result_print_all_tap
- exit 1
+ exit ${KSFT_FAIL}
fi
if [ "$is_v6" = "v6" ]
@@ -261,45 +228,16 @@ make_connection()
fi
}
-# $1: var name ; $2: prev ret
-check_expected_one()
-{
- local var="${1}"
- local exp="e_${var}"
- local prev_ret="${2}"
-
- if [ "${!var}" = "${!exp}" ]
- then
- return 0
- fi
-
- if [ "${prev_ret}" = "0" ]
- then
- test_fail
- fi
-
- _printf "\tExpected value for '%s': '%s', got '%s'.\n" \
- "${var}" "${!exp}" "${!var}"
- return 1
-}
-
# $@: all var names to check
check_expected()
{
- local rc=0
- local var
-
- for var in "${@}"
- do
- check_expected_one "${var}" "${rc}" || rc=1
- done
-
- if [ ${rc} -eq 0 ]
+ if mptcp_lib_check_expected "${@}"
then
test_pass
return 0
fi
+ test_fail
return 1
}
@@ -449,7 +387,7 @@ test_remove()
then
test_pass
else
- test_fail
+ test_fail "unexpected type: ${type}"
fi
# RM_ADDR using an invalid addr id should result in no action
@@ -462,7 +400,7 @@ test_remove()
then
test_pass
else
- test_fail
+ test_fail "unexpected type: ${type}"
fi
# RM_ADDR from the client to server machine
@@ -542,7 +480,7 @@ verify_subflow_events()
local remid
local info
- info="${e_saddr} (${e_from}) => ${e_daddr} (${e_to})"
+ info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})"
if [ "$e_type" = "$SUB_ESTABLISHED" ]
then
@@ -897,32 +835,11 @@ test_prio()
verify_listener_events()
{
- local evt=$1
- local e_type=$2
- local e_family=$3
- local e_saddr=$4
- local e_sport=$5
- local type
- local family
- local saddr
- local sport
-
- if [ $e_type = $LISTENER_CREATED ]; then
- print_test "CREATE_LISTENER $e_saddr:$e_sport"
- elif [ $e_type = $LISTENER_CLOSED ]; then
- print_test "CLOSE_LISTENER $e_saddr:$e_sport"
- fi
-
- type=$(mptcp_lib_evts_get_info type $evt $e_type)
- family=$(mptcp_lib_evts_get_info family $evt $e_type)
- sport=$(mptcp_lib_evts_get_info sport $evt $e_type)
- if [ $family ] && [ $family = $AF_INET6 ]; then
- saddr=$(mptcp_lib_evts_get_info saddr6 $evt $e_type)
+ if mptcp_lib_verify_listener_events "${@}"; then
+ test_pass
else
- saddr=$(mptcp_lib_evts_get_info saddr4 $evt $e_type)
+ test_fail
fi
-
- check_expected "type" "family" "saddr" "sport"
}
test_listener()
@@ -944,6 +861,7 @@ test_listener()
local listener_pid=$!
sleep 0.5
+ print_test "CREATE_LISTENER 10.0.2.2:$client4_port"
verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
# ADD_ADDR from client to server machine reusing the subflow port
@@ -960,6 +878,7 @@ test_listener()
mptcp_lib_kill_wait $listener_pid
sleep 0.5
+ print_test "CLOSE_LISTENER 10.0.2.2:$client4_port"
verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
}
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
index 4fe0befa13fb..6596fe03c77f 100755..100644
--- a/tools/testing/selftests/net/net_helper.sh
+++ b/tools/testing/selftests/net/net_helper.sh
@@ -8,13 +8,16 @@ wait_local_port_listen()
local listener_ns="${1}"
local port="${2}"
local protocol="${3}"
- local port_hex
+ local pattern
local i
- port_hex="$(printf "%04X" "${port}")"
+ pattern=":$(printf "%04X" "${port}") "
+
+ # for tcp protocol additionally check the socket state
+ [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
for i in $(seq 10); do
- if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \
- grep -q "${port_hex}"; then
+ if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
+ /proc/net/"${protocol}"* | grep -q "${pattern}"; then
break
fi
sleep 0.1
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index f8499d4c87f3..5cae53543849 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -17,6 +17,7 @@ tests="
ct_connect_v4 ip4-ct-xon: Basic ipv4 tcp connection using ct
connect_v4 ip4-xon: Basic ipv4 ping between two NS
nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT
+ nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT
netlink_checks ovsnl: validate netlink attrs and settings
upcall_interfaces ovs: test the upcall interfaces
drop_reason drop: test drop reasons are emitted"
@@ -473,6 +474,67 @@ test_nat_connect_v4 () {
return 0
}
+# nat_related_v4 test
+# - client->server ip packets go via SNAT
+# - client solicits ICMP destination unreachable packet from server
+# - undo NAT for ICMP reply and test dst ip has been updated
+test_nat_related_v4 () {
+ which nc >/dev/null 2>/dev/null || return $ksft_skip
+
+ sbx_add "test_nat_related_v4" || return $?
+
+ ovs_add_dp "test_nat_related_v4" natrelated4 || return 1
+ info "create namespaces"
+ for ns in client server; do
+ ovs_add_netns_and_veths "test_nat_related_v4" "natrelated4" "$ns" \
+ "${ns:0:1}0" "${ns:0:1}1" || return 1
+ done
+
+ ip netns exec client ip addr add 172.31.110.10/24 dev c1
+ ip netns exec client ip link set c1 up
+ ip netns exec server ip addr add 172.31.110.20/24 dev s1
+ ip netns exec server ip link set s1 up
+
+ ip netns exec server ip route add 192.168.0.20/32 via 172.31.110.10
+
+ # Allow ARP
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "in_port(1),eth(),eth_type(0x0806),arp()" "2" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "in_port(2),eth(),eth_type(0x0806),arp()" "1" || return 1
+
+ # Allow IP traffic from client->server, rewrite source IP with SNAT to 192.168.0.20
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=172.31.110.20)" \
+ "ct(commit,nat(src=192.168.0.20)),recirc(0x1)" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" \
+ "2" || return 1
+
+ # Allow related ICMP responses back from server and undo NAT to restore original IP
+ # Drop any ICMP related packets where dst ip hasn't been restored back to original IP
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \
+ "ct(commit,nat),recirc(0x2)" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,dst=172.31.110.10,proto=1),icmp()" \
+ "1" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20,proto=1),icmp()" \
+ "drop" || return 1
+
+ # Solicit destination unreachable response from server
+ ovs_sbx "test_nat_related_v4" ip netns exec client \
+ bash -c "echo a | nc -u -w 1 172.31.110.20 10000"
+
+ # Check to make sure no packets matched the drop rule with incorrect dst ip
+ python3 "$ovs_base/ovs-dpctl.py" dump-flows natrelated4 \
+ | grep "drop" | grep "packets:0" >/dev/null || return 1
+
+ info "done..."
+ return 0
+}
+
# netlink_validation
# - Create a dp
# - check no warning with "old version" simulation
@@ -502,9 +564,22 @@ test_netlink_checks () {
wc -l) == 2 ] || \
return 1
+ info "Checking clone depth"
ERR_MSG="Flow actions may not be safe on all matching packets"
PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
ovs_add_flow "test_netlink_checks" nv0 \
+ 'in_port(1),eth(),eth_type(0x800),ipv4()' \
+ 'clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(drop)))))))))))))))))' \
+ >/dev/null 2>&1 && return 1
+ POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
+
+ if [ "$PRE_TEST" == "$POST_TEST" ]; then
+ info "failed - clone depth too large"
+ return 1
+ fi
+
+ PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
+ ovs_add_flow "test_netlink_checks" nv0 \
'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \
&> /dev/null && return 1
POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index b97e621face9..5e0e539a323d 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -299,7 +299,7 @@ class ovsactions(nla):
("OVS_ACTION_ATTR_PUSH_NSH", "none"),
("OVS_ACTION_ATTR_POP_NSH", "flag"),
("OVS_ACTION_ATTR_METER", "none"),
- ("OVS_ACTION_ATTR_CLONE", "none"),
+ ("OVS_ACTION_ATTR_CLONE", "recursive"),
("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"),
("OVS_ACTION_ATTR_ADD_MPLS", "none"),
("OVS_ACTION_ATTR_DEC_TTL", "none"),
@@ -465,29 +465,42 @@ class ovsactions(nla):
print_str += "pop_mpls"
else:
datum = self.get_attr(field[0])
- print_str += datum.dpstr(more)
+ if field[0] == "OVS_ACTION_ATTR_CLONE":
+ print_str += "clone("
+ print_str += datum.dpstr(more)
+ print_str += ")"
+ else:
+ print_str += datum.dpstr(more)
return print_str
def parse(self, actstr):
+ totallen = len(actstr)
while len(actstr) != 0:
parsed = False
+ parencount = 0
if actstr.startswith("drop"):
# If no reason is provided, the implicit drop is used (i.e no
# action). If some reason is given, an explicit action is used.
- actstr, reason = parse_extract_field(
- actstr,
- "drop(",
- "([0-9]+)",
- lambda x: int(x, 0),
- False,
- None,
- )
+ reason = None
+ if actstr.startswith("drop("):
+ parencount += 1
+
+ actstr, reason = parse_extract_field(
+ actstr,
+ "drop(",
+ "([0-9]+)",
+ lambda x: int(x, 0),
+ False,
+ None,
+ )
+
if reason is not None:
self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason])
parsed = True
else:
- return
+ actstr = actstr[len("drop"): ]
+ return (totallen - len(actstr))
elif parse_starts_block(actstr, "^(\d+)", False, True):
actstr, output = parse_extract_field(
@@ -504,6 +517,7 @@ class ovsactions(nla):
False,
0,
)
+ parencount += 1
self["attrs"].append(["OVS_ACTION_ATTR_RECIRC", recircid])
parsed = True
@@ -516,12 +530,22 @@ class ovsactions(nla):
for flat_act in parse_flat_map:
if parse_starts_block(actstr, flat_act[0], False):
- actstr += len(flat_act[0])
+ actstr = actstr[len(flat_act[0]):]
self["attrs"].append([flat_act[1]])
actstr = actstr[strspn(actstr, ", ") :]
parsed = True
- if parse_starts_block(actstr, "ct(", False):
+ if parse_starts_block(actstr, "clone(", False):
+ parencount += 1
+ subacts = ovsactions()
+ actstr = actstr[len("clone("):]
+ parsedLen = subacts.parse(actstr)
+ lst = []
+ self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts))
+ actstr = actstr[parsedLen:]
+ parsed = True
+ elif parse_starts_block(actstr, "ct(", False):
+ parencount += 1
actstr = actstr[len("ct(") :]
ctact = ovsactions.ctact()
@@ -553,6 +577,7 @@ class ovsactions(nla):
natact = ovsactions.ctact.natattr()
if actstr.startswith("("):
+ parencount += 1
t = None
actstr = actstr[1:]
if actstr.startswith("src"):
@@ -607,15 +632,29 @@ class ovsactions(nla):
actstr = actstr[strspn(actstr, ", ") :]
ctact["attrs"].append(["OVS_CT_ATTR_NAT", natact])
- actstr = actstr[strspn(actstr, ",) ") :]
+ actstr = actstr[strspn(actstr, ", ") :]
self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact])
parsed = True
- actstr = actstr[strspn(actstr, "), ") :]
+ actstr = actstr[strspn(actstr, ", ") :]
+ while parencount > 0:
+ parencount -= 1
+ actstr = actstr[strspn(actstr, " "):]
+ if len(actstr) and actstr[0] != ")":
+ raise ValueError("Action str: '%s' unbalanced" % actstr)
+ actstr = actstr[1:]
+
+ if len(actstr) and actstr[0] == ")":
+ return (totallen - len(actstr))
+
+ actstr = actstr[strspn(actstr, ", ") :]
+
if not parsed:
raise ValueError("Action str: '%s' not supported" % actstr)
+ return (totallen - len(actstr))
+
class ovskey(nla):
nla_flags = NLA_F_NESTED
@@ -2111,6 +2150,8 @@ def main(argv):
ovsflow = OvsFlow()
ndb = NDB()
+ sys.setrecursionlimit(100000)
+
if hasattr(args, "showdp"):
found = False
for iface in ndb.interfaces:
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index f10879788f61..cfc84958025a 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -199,6 +199,7 @@
# Same as above but with IPv6
source lib.sh
+source net_helper.sh
PAUSE_ON_FAIL=no
VERBOSE=0
@@ -707,23 +708,23 @@ setup_xfrm6() {
}
setup_xfrm4udp() {
- setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0"
- setup_nettest_xfrm 4 4500
+ setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \
+ setup_nettest_xfrm 4 4500
}
setup_xfrm6udp() {
- setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0"
- setup_nettest_xfrm 6 4500
+ setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \
+ setup_nettest_xfrm 6 4500
}
setup_xfrm4udprouted() {
- setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0"
- setup_nettest_xfrm 4 4500
+ setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \
+ setup_nettest_xfrm 4 4500
}
setup_xfrm6udprouted() {
- setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0"
- setup_nettest_xfrm 6 4500
+ setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \
+ setup_nettest_xfrm 6 4500
}
setup_routing_old() {
@@ -1335,12 +1336,14 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
else
TCPDST="TCP:[${dst}]:50000"
fi
- ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile &
+ ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000,reuseaddr STDOUT > $tmpoutfile &
+ local socat_pid=$!
- sleep 1
+ wait_local_port_listen ${NS_B} 50000 tcp
- dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+ dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+ wait ${socat_pid}
size=$(du -sb $tmpoutfile)
size=${size%%/tmp/*}
@@ -1954,6 +1957,13 @@ check_command() {
return 0
}
+check_running() {
+ pid=${1}
+ cmd=${2}
+
+ [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ]
+}
+
test_cleanup_vxlanX_exception() {
outer="${1}"
encap="vxlan"
@@ -1984,11 +1994,12 @@ test_cleanup_vxlanX_exception() {
${ns_a} ip link del dev veth_A-R1 &
iplink_pid=$!
- sleep 1
- if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
- err " can't delete veth device in a timely manner, PMTU dst likely leaked"
- return 1
- fi
+ for i in $(seq 1 20); do
+ check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0
+ sleep 0.1
+ done
+ err " can't delete veth device in a timely manner, PMTU dst likely leaked"
+ return 1
}
test_cleanup_ipv6_exception() {
diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index a26c5624429f..4287a8529890 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
readonly ksft_skip=4
@@ -33,6 +33,10 @@ chk_rps() {
rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus)
printf "%-60s" "$msg"
+
+ # In case there is more than 32 CPUs we need to remove commas from masks
+ rps_mask=${rps_mask//,}
+ expected_rps_mask=${expected_rps_mask//,}
if [ $rps_mask -eq $expected_rps_mask ]; then
echo "[ ok ]"
else
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 4667d74579d1..bdf6f10d0558 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -440,7 +440,6 @@ kci_test_encap_vxlan()
local ret=0
vxlan="test-vxlan0"
vlan="test-vlan0"
- testns="$1"
run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
dev "$devdummy" dstport 4789
if [ $? -ne 0 ]; then
@@ -485,7 +484,6 @@ kci_test_encap_fou()
{
local ret=0
name="test-fou"
- testns="$1"
run_cmd_grep 'Usage: ip fou' ip fou help
if [ $? -ne 0 ];then
end_test "SKIP: fou: iproute2 too old"
@@ -526,8 +524,8 @@ kci_test_encap()
run_cmd ip -netns "$testns" link set lo up
run_cmd ip -netns "$testns" link add name "$devdummy" type dummy
run_cmd ip -netns "$testns" link set "$devdummy" up
- run_cmd kci_test_encap_vxlan "$testns"
- run_cmd kci_test_encap_fou "$testns"
+ run_cmd kci_test_encap_vxlan
+ run_cmd kci_test_encap_fou
ip netns del "$testns"
return $ret
@@ -803,6 +801,8 @@ kci_test_ipsec_offload()
end_test "FAIL: ipsec_offload SA offload missing from list output"
fi
+ # we didn't create a peer, make sure we can Tx
+ ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55
# use ping to exercise the Tx path
ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
index 2070b57849de..2070b57849de 100755..100644
--- a/tools/testing/selftests/net/setup_loopback.sh
+++ b/tools/testing/selftests/net/setup_loopback.sh
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
index a9a1759e035c..1f78a87f6f37 100644
--- a/tools/testing/selftests/net/setup_veth.sh
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -11,7 +11,7 @@ setup_veth_ns() {
local -r ns_mac="$4"
[[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
- echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+ echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
ip -netns "${ns_name}" link set dev "${ns_dev}" up
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
index a14818164102..e9fa14e10732 100644
--- a/tools/testing/selftests/net/so_incoming_cpu.c
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -3,19 +3,16 @@
#define _GNU_SOURCE
#include <sched.h>
+#include <fcntl.h>
+
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/sysinfo.h>
#include "../kselftest_harness.h"
-#define CLIENT_PER_SERVER 32 /* More sockets, more reliable */
-#define NR_SERVER self->nproc
-#define NR_CLIENT (CLIENT_PER_SERVER * NR_SERVER)
-
FIXTURE(so_incoming_cpu)
{
- int nproc;
int *servers;
union {
struct sockaddr addr;
@@ -56,12 +53,47 @@ FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen)
.when_to_set = AFTER_ALL_LISTEN,
};
+static void write_sysctl(struct __test_metadata *_metadata,
+ char *filename, char *string)
+{
+ int fd, len, ret;
+
+ fd = open(filename, O_WRONLY);
+ ASSERT_NE(fd, -1);
+
+ len = strlen(string);
+ ret = write(fd, string, len);
+ ASSERT_EQ(ret, len);
+}
+
+static void setup_netns(struct __test_metadata *_metadata)
+{
+ ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+ ASSERT_EQ(system("ip link set lo up"), 0);
+
+ write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001");
+ write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0");
+}
+
+#define NR_PORT (60001 - 10000 - 1)
+#define NR_CLIENT_PER_SERVER_DEFAULT 32
+static int nr_client_per_server, nr_server, nr_client;
+
FIXTURE_SETUP(so_incoming_cpu)
{
- self->nproc = get_nprocs();
- ASSERT_LE(2, self->nproc);
+ setup_netns(_metadata);
+
+ nr_server = get_nprocs();
+ ASSERT_LE(2, nr_server);
+
+ if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT)
+ nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT;
+ else
+ nr_client_per_server = NR_PORT / nr_server;
+
+ nr_client = nr_client_per_server * nr_server;
- self->servers = malloc(sizeof(int) * NR_SERVER);
+ self->servers = malloc(sizeof(int) * nr_server);
ASSERT_NE(self->servers, NULL);
self->in_addr.sin_family = AF_INET;
@@ -74,7 +106,7 @@ FIXTURE_TEARDOWN(so_incoming_cpu)
{
int i;
- for (i = 0; i < NR_SERVER; i++)
+ for (i = 0; i < nr_server; i++)
close(self->servers[i]);
free(self->servers);
@@ -110,10 +142,10 @@ int create_server(struct __test_metadata *_metadata,
if (variant->when_to_set == BEFORE_LISTEN)
set_so_incoming_cpu(_metadata, fd, cpu);
- /* We don't use CLIENT_PER_SERVER here not to block
+ /* We don't use nr_client_per_server here not to block
* this test at connect() if SO_INCOMING_CPU is broken.
*/
- ret = listen(fd, NR_CLIENT);
+ ret = listen(fd, nr_client);
ASSERT_EQ(ret, 0);
if (variant->when_to_set == AFTER_LISTEN)
@@ -128,7 +160,7 @@ void create_servers(struct __test_metadata *_metadata,
{
int i, ret;
- for (i = 0; i < NR_SERVER; i++) {
+ for (i = 0; i < nr_server; i++) {
self->servers[i] = create_server(_metadata, self, variant, i);
if (i == 0) {
@@ -138,7 +170,7 @@ void create_servers(struct __test_metadata *_metadata,
}
if (variant->when_to_set == AFTER_ALL_LISTEN) {
- for (i = 0; i < NR_SERVER; i++)
+ for (i = 0; i < nr_server; i++)
set_so_incoming_cpu(_metadata, self->servers[i], i);
}
}
@@ -149,7 +181,7 @@ void create_clients(struct __test_metadata *_metadata,
cpu_set_t cpu_set;
int i, j, fd, ret;
- for (i = 0; i < NR_SERVER; i++) {
+ for (i = 0; i < nr_server; i++) {
CPU_ZERO(&cpu_set);
CPU_SET(i, &cpu_set);
@@ -162,7 +194,7 @@ void create_clients(struct __test_metadata *_metadata,
ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
ASSERT_EQ(ret, 0);
- for (j = 0; j < CLIENT_PER_SERVER; j++) {
+ for (j = 0; j < nr_client_per_server; j++) {
fd = socket(AF_INET, SOCK_STREAM, 0);
ASSERT_NE(fd, -1);
@@ -180,8 +212,8 @@ void verify_incoming_cpu(struct __test_metadata *_metadata,
int i, j, fd, cpu, ret, total = 0;
socklen_t len = sizeof(int);
- for (i = 0; i < NR_SERVER; i++) {
- for (j = 0; j < CLIENT_PER_SERVER; j++) {
+ for (i = 0; i < nr_server; i++) {
+ for (j = 0; j < nr_client_per_server; j++) {
/* If we see -EAGAIN here, SO_INCOMING_CPU is broken */
fd = accept(self->servers[i], &self->addr, &self->addrlen);
ASSERT_NE(fd, -1);
@@ -195,7 +227,7 @@ void verify_incoming_cpu(struct __test_metadata *_metadata,
}
}
- ASSERT_EQ(total, NR_CLIENT);
+ ASSERT_EQ(total, nr_client);
TH_LOG("SO_INCOMING_CPU is very likely to be "
"working correctly with %d sockets.", total);
}
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 2672ac0b6d1f..8457b7ccbc09 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -134,8 +134,11 @@ static void do_recv_one(int fdr, struct timed_send *ts)
if (rbuf[0] != ts->data)
error(1, 0, "payload mismatch. expected %c", ts->data);
- if (llabs(tstop - texpect) > cfg_variance_us)
- error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
+ if (llabs(tstop - texpect) > cfg_variance_us) {
+ fprintf(stderr, "exceeds variance (%d us)\n", cfg_variance_us);
+ if (!getenv("KSFT_MACHINE_SLOW"))
+ exit(1);
+ }
}
static void do_recv_verify_empty(int fdr)
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
index 3f06f4d286a9..5e861ad32a42 100755
--- a/tools/testing/selftests/net/so_txtime.sh
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -5,6 +5,7 @@
set -e
+readonly ksft_skip=4
readonly DEV="veth0"
readonly BIN="./so_txtime"
@@ -46,7 +47,7 @@ ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
-do_test() {
+run_test() {
local readonly IP="$1"
local readonly CLOCK="$2"
local readonly TXARGS="$3"
@@ -64,12 +65,25 @@ do_test() {
fi
local readonly START="$(date +%s%N --date="+ 0.1 seconds")"
+
ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r &
ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}"
wait "$!"
}
+do_test() {
+ run_test $@
+ [ $? -ne 0 ] && ret=1
+}
+
+do_fail_test() {
+ run_test $@
+ [ $? -eq 0 ] && ret=1
+}
+
ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq
+set +e
+ret=0
do_test 4 mono a,-1 a,-1
do_test 6 mono a,0 a,0
do_test 6 mono a,10 a,10
@@ -77,13 +91,20 @@ do_test 4 mono a,10,b,20 a,10,b,20
do_test 6 mono a,20,b,10 b,20,a,20
if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then
- ! do_test 4 tai a,-1 a,-1
- ! do_test 6 tai a,0 a,0
+ do_fail_test 4 tai a,-1 a,-1
+ do_fail_test 6 tai a,0 a,0
do_test 6 tai a,10 a,10
do_test 4 tai a,10,b,20 a,10,b,20
do_test 6 tai a,20,b,10 b,10,a,20
else
echo "tc ($(tc -V)) does not support qdisc etf. skipping"
+ [ $ret -eq 0 ] && ret=$ksft_skip
fi
-echo OK. All tests passed
+if [ $ret -eq 0 ]; then
+ echo OK. All tests passed
+elif [[ $ret -ne $ksft_skip && -n "$KSFT_MACHINE_SLOW" ]]; then
+ echo "Ignoring errors due to slow environment" 1>&2
+ ret=0
+fi
+exit $ret
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config
new file mode 100644
index 000000000000..d3277a9de987
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -0,0 +1,10 @@
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_RMD160=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=y
+CONFIG_TCP_AO=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
index c48b4970ca17..24e62120b792 100644
--- a/tools/testing/selftests/net/tcp_ao/key-management.c
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -417,9 +417,9 @@ struct test_key {
matches_vrf : 1,
is_current : 1,
is_rnext : 1,
- used_on_handshake : 1,
- used_after_accept : 1,
- used_on_client : 1;
+ used_on_server_tx : 1,
+ used_on_client_tx : 1,
+ skip_counters_checks : 1;
};
struct key_collection {
@@ -609,16 +609,14 @@ static int key_collection_socket(bool server, unsigned int port)
addr = &this_ip_dest;
sndid = key->client_keyid;
rcvid = key->server_keyid;
- set_current = key->is_current;
- set_rnext = key->is_rnext;
+ key->used_on_client_tx = set_current = key->is_current;
+ key->used_on_server_tx = set_rnext = key->is_rnext;
}
if (test_add_key_cr(sk, key->password, key->len,
*addr, vrf, sndid, rcvid, key->maclen,
key->alg, set_current, set_rnext))
test_key_error("setsockopt(TCP_AO_ADD_KEY)", key);
- if (set_current || set_rnext)
- key->used_on_handshake = 1;
#ifdef DEBUG
test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}",
server ? "server" : "client", i, collection.nr_keys,
@@ -640,22 +638,22 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server
for (i = 0; i < collection.nr_keys; i++) {
struct test_key *key = &collection.keys[i];
uint8_t sndid, rcvid;
- bool was_used;
+ bool rx_cnt_expected;
+ if (key->skip_counters_checks)
+ continue;
if (server) {
sndid = key->server_keyid;
rcvid = key->client_keyid;
- if (is_listen_sk)
- was_used = key->used_on_handshake;
- else
- was_used = key->used_after_accept;
+ rx_cnt_expected = key->used_on_client_tx;
} else {
sndid = key->client_keyid;
rcvid = key->server_keyid;
- was_used = key->used_on_client;
+ rx_cnt_expected = key->used_on_server_tx;
}
- test_tcp_ao_key_counters_cmp(tst_name, a, b, was_used,
+ test_tcp_ao_key_counters_cmp(tst_name, a, b,
+ rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
sndid, rcvid);
}
test_tcp_ao_counters_free(a);
@@ -843,7 +841,7 @@ static void end_server(const char *tst_name, int sk,
synchronize_threads(); /* 4: verified => closed */
close(sk);
- verify_counters(tst_name, true, false, begin, &end);
+ verify_counters(tst_name, false, true, begin, &end);
synchronize_threads(); /* 5: counters */
}
@@ -916,9 +914,8 @@ static int run_client(const char *tst_name, unsigned int port,
current_index = nr_keys - 1;
if (rnext_index < 0)
rnext_index = nr_keys - 1;
- collection.keys[current_index].used_on_handshake = 1;
- collection.keys[rnext_index].used_after_accept = 1;
- collection.keys[rnext_index].used_on_client = 1;
+ collection.keys[current_index].used_on_client_tx = 1;
+ collection.keys[rnext_index].used_on_server_tx = 1;
synchronize_threads(); /* 3: accepted => send data */
if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) {
@@ -1059,7 +1056,16 @@ static void check_current_back(const char *tst_name, unsigned int port,
test_error("Can't change the current key");
if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
test_fail("verify failed");
- collection.keys[rotate_to_index].used_after_accept = 1;
+ /* There is a race here: between setting the current_key with
+ * setsockopt(TCP_AO_INFO) and starting to send some data - there
+ * might have been a segment received with the desired
+ * RNext_key set. In turn that would mean that the first outgoing
+ * segment will have the desired current_key (flipped back).
+ * Which is what the user/test wants. As it's racy, skip checking
+ * the counters, yet check what are the resulting current/rnext
+ * keys on both sides.
+ */
+ collection.keys[rotate_to_index].skip_counters_checks = 1;
end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
}
@@ -1089,7 +1095,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
}
verify_current_rnext(tst_name, sk, -1,
collection.keys[i].server_keyid);
- collection.keys[i].used_on_client = 1;
+ collection.keys[i].used_on_server_tx = 1;
synchronize_threads(); /* verify current/rnext */
}
end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
index c75d82885a2e..15aeb0963058 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/sock.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -62,7 +62,9 @@ int test_wait_fd(int sk, time_t sec, bool write)
return -ETIMEDOUT;
}
- if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen) || ret)
+ if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen))
+ return -errno;
+ if (ret)
return -ret;
return 0;
}
@@ -584,9 +586,11 @@ int test_client_verify(int sk, const size_t msg_len, const size_t nr,
{
size_t buf_sz = msg_len * nr;
char *buf = alloca(buf_sz);
+ ssize_t ret;
randomize_buffer(buf, buf_sz);
- if (test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec) != buf_sz)
- return -1;
- return 0;
+ ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec);
+ if (ret < 0)
+ return (int)ret;
+ return ret != buf_sz ? -1 : 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
index ac06009a7f5f..7df8b8700e39 100644
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -1,10 +1,33 @@
// SPDX-License-Identifier: GPL-2.0
-/* Author: Dmitry Safonov <dima@arista.com> */
+/*
+ * The test checks that both active and passive reset have correct TCP-AO
+ * signature. An "active" reset (abort) here is procured from closing
+ * listen() socket with non-accepted connections in the queue:
+ * inet_csk_listen_stop() => inet_child_forget() =>
+ * => tcp_disconnect() => tcp_send_active_reset()
+ *
+ * The passive reset is quite hard to get on established TCP connections.
+ * It could be procured from non-established states, but the synchronization
+ * part from userspace in order to reliably get RST seems uneasy.
+ * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state.
+ *
+ * It's important to test both passive and active RST as they go through
+ * different code-paths:
+ * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb()
+ * - tcp_v*_send_reset() create their reply skbs and send them with
+ * ip_send_unicast_reply()
+ *
+ * In both cases TCP-AO signatures have to be correct, which is verified by
+ * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters.
+ *
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
#include <inttypes.h>
#include "../../../../include/linux/kernel.h"
#include "aolib.h"
const size_t quota = 1000;
+const size_t packet_sz = 100;
/*
* Backlog == 0 means 1 connection in queue, see:
* commit 64a146513f8f ("[NET]: Revert incorrect accept queue...")
@@ -59,26 +82,6 @@ static void close_forced(int sk)
close(sk);
}
-static int test_wait_for_exception(int sk, time_t sec)
-{
- struct timeval tv = { .tv_sec = sec };
- struct timeval *ptv = NULL;
- fd_set efds;
- int ret;
-
- FD_ZERO(&efds);
- FD_SET(sk, &efds);
-
- if (sec)
- ptv = &tv;
-
- errno = 0;
- ret = select(sk + 1, NULL, NULL, &efds, ptv);
- if (ret < 0)
- return -errno;
- return ret ? sk : 0;
-}
-
static void test_server_active_rst(unsigned int port)
{
struct tcp_ao_counters cnt1, cnt2;
@@ -155,17 +158,16 @@ static void test_server_passive_rst(unsigned int port)
test_fail("server returned %zd", bytes);
}
- synchronize_threads(); /* 3: chekpoint/restore the connection */
+ synchronize_threads(); /* 3: checkpoint the client */
+ synchronize_threads(); /* 4: close the server, creating twsk */
if (test_get_tcp_ao_counters(sk, &ao2))
test_error("test_get_tcp_ao_counters()");
-
- synchronize_threads(); /* 4: terminate server + send more on client */
- bytes = test_server_run(sk, quota, TEST_RETRANSMIT_SEC);
close(sk);
+
+ synchronize_threads(); /* 5: restore the socket, send more data */
test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD);
- synchronize_threads(); /* 5: verified => closed */
- close(sk);
+ synchronize_threads(); /* 6: server exits */
}
static void *server_fn(void *arg)
@@ -284,7 +286,7 @@ static void test_client_active_rst(unsigned int port)
test_error("test_wait_fds(): %d", err);
synchronize_threads(); /* 3: close listen socket */
- if (test_client_verify(sk[0], 100, quota / 100, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
test_fail("Failed to send data on connected socket");
else
test_ok("Verified established tcp connection");
@@ -323,7 +325,6 @@ static void test_client_passive_rst(unsigned int port)
struct tcp_sock_state img;
sockaddr_af saddr;
int sk, err;
- socklen_t slen = sizeof(err);
sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
if (sk < 0)
@@ -337,18 +338,51 @@ static void test_client_passive_rst(unsigned int port)
test_error("failed to connect()");
synchronize_threads(); /* 2: accepted => send data */
- if (test_client_verify(sk, 100, quota / 100, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
test_fail("Failed to send data on connected socket");
else
test_ok("Verified established tcp connection");
- synchronize_threads(); /* 3: chekpoint/restore the connection */
+ synchronize_threads(); /* 3: checkpoint the client */
test_enable_repair(sk);
test_sock_checkpoint(sk, &img, &saddr);
test_ao_checkpoint(sk, &ao_img);
- test_kill_sk(sk);
+ test_disable_repair(sk);
- img.out.seq += quota;
+ synchronize_threads(); /* 4: close the server, creating twsk */
+
+ /*
+ * The "corruption" in SEQ has to be small enough to fit into TCP
+ * window, see tcp_timewait_state_process() for out-of-window
+ * segments.
+ */
+ img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */
+
+ /*
+ * FIXME: This is kind-of ugly and dirty, but it works.
+ *
+ * At this moment, the server has close'ed(sk).
+ * The passive RST that is being targeted here is new data after
+ * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST
+ *
+ * What is needed here is:
+ * (1) wait for FIN from the server
+ * (2) make sure that the ACK from the client went out
+ * (3) make sure that the ACK was received and processed by the server
+ *
+ * Otherwise, the data that will be sent from "repaired" socket
+ * post SEQ corruption may get to the server before it's in
+ * TCP_FIN_WAIT2.
+ *
+ * (1) is easy with select()/poll()
+ * (2) is possible by polling tcpi_state from TCP_INFO
+ * (3) is quite complex: as server's socket was already closed,
+ * probably the way to do it would be tcp-diag.
+ */
+ sleep(TEST_RETRANSMIT_SEC);
+
+ synchronize_threads(); /* 5: restore the socket, send more data */
+ test_kill_sk(sk);
sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
if (sk < 0)
@@ -366,25 +400,33 @@ static void test_client_passive_rst(unsigned int port)
test_disable_repair(sk);
test_sock_state_free(&img);
- synchronize_threads(); /* 4: terminate server + send more on client */
- if (test_client_verify(sk, 100, quota / 100, 2 * TEST_TIMEOUT_SEC))
- test_ok("client connection broken post-seq-adjust");
- else
- test_fail("client connection still works post-seq-adjust");
-
- test_wait_for_exception(sk, TEST_TIMEOUT_SEC);
-
- if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &err, &slen))
- test_error("getsockopt()");
- if (err != ECONNRESET && err != EPIPE)
- test_fail("client connection was not reset: %d", err);
+ /*
+ * This is how "passive reset" is acquired in this test from TCP_TW_RST:
+ *
+ * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249,
+ * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100
+ * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249,
+ * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0
+ * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251,
+ * options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0
+ * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251,
+ * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100
+ * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0,
+ * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0
+ */
+ err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC);
+ /* Make sure that the connection was reset, not timeouted */
+ if (err && err == -ECONNRESET)
+ test_ok("client sock was passively reset post-seq-adjust");
+ else if (err)
+ test_fail("client sock was not reset post-seq-adjust: %d", err);
else
- test_ok("client connection was reset");
+ test_fail("client sock is yet connected post-seq-adjust");
if (test_get_tcp_ao_counters(sk, &ao2))
test_error("test_get_tcp_ao_counters()");
- synchronize_threads(); /* 5: verified => closed */
+ synchronize_threads(); /* 6: server exits */
close(sk);
test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD);
}
@@ -410,6 +452,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(15, server_fn, client_fn);
+ test_init(14, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
index c5b568cd7d90..6b59a652159f 100644
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -110,9 +110,9 @@ static void try_accept(const char *tst_name, unsigned int port,
test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
out:
- synchronize_threads(); /* close() */
+ synchronize_threads(); /* test_kill_sk() */
if (sk > 0)
- close(sk);
+ test_kill_sk(sk);
}
static void server_add_routes(void)
@@ -302,10 +302,10 @@ static void try_connect(const char *tst_name, unsigned int port,
test_ok("%s: connected", tst_name);
out:
- synchronize_threads(); /* close() */
+ synchronize_threads(); /* test_kill_sk() */
/* _test_connect_socket() cleans up on failure */
if (ret > 0)
- close(sk);
+ test_kill_sk(sk);
}
#define PREINSTALL_MD5_FIRST BIT(0)
@@ -486,10 +486,10 @@ static void try_to_add(const char *tst_name, unsigned int port,
}
out:
- synchronize_threads(); /* close() */
+ synchronize_threads(); /* test_kill_sk() */
/* _test_connect_socket() cleans up on failure */
if (ret > 0)
- close(sk);
+ test_kill_sk(sk);
}
static void client_add_ip(union tcp_addr *client, const char *ip)
diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh
index 70a7d87ba2d2..1b3f89e2b86e 100755
--- a/tools/testing/selftests/net/test_bridge_backup_port.sh
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -124,6 +124,16 @@ tc_check_packets()
[[ $pkts == $count ]]
}
+bridge_link_check()
+{
+ local ns=$1; shift
+ local dev=$1; shift
+ local state=$1; shift
+
+ bridge -n $ns -d -j link show dev $dev | \
+ jq -e ".[][\"state\"] == \"$state\"" &> /dev/null
+}
+
################################################################################
# Setup
@@ -259,6 +269,7 @@ backup_port()
log_test $? 0 "No forwarding out of vx0"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -268,6 +279,7 @@ backup_port()
log_test $? 0 "No forwarding out of vx0"
run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
log_test $? 0 "swp1 carrier on"
# Configure vx0 as the backup port of swp1 and check that packets are
@@ -284,6 +296,7 @@ backup_port()
log_test $? 0 "No forwarding out of vx0"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -293,6 +306,7 @@ backup_port()
log_test $? 0 "Forwarding out of vx0"
run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
log_test $? 0 "swp1 carrier on"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -314,6 +328,7 @@ backup_port()
log_test $? 0 "No forwarding out of vx0"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -369,6 +384,7 @@ backup_nhid()
log_test $? 0 "No forwarding out of vx0"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -382,6 +398,7 @@ backup_nhid()
log_test $? 0 "Forwarding using VXLAN FDB entry"
run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
log_test $? 0 "swp1 carrier on"
# Configure nexthop ID 10 as the backup nexthop ID of swp1 and check
@@ -398,6 +415,7 @@ backup_nhid()
log_test $? 0 "No forwarding out of vx0"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -411,6 +429,7 @@ backup_nhid()
log_test $? 0 "No forwarding using VXLAN FDB entry"
run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
log_test $? 0 "swp1 carrier on"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -441,6 +460,7 @@ backup_nhid()
log_test $? 0 "No forwarding using VXLAN FDB entry"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -497,6 +517,7 @@ backup_nhid_invalid()
log_test $? 0 "Valid nexthop as backup nexthop"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
log_test $? 0 "swp1 carrier off"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -604,7 +625,9 @@ backup_nhid_ping()
run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 10"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
run_cmd "ip -n $sw2 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw2 swp1 disabled
run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
log_test $? 0 "Ping with backup nexthop ID"
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
index 84a05a9e46d8..74ff9fb2a6f0 100755
--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -1014,10 +1014,10 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
- log_test $? 254 "Flush by port"
+ log_test $? 254 "Flush by port - matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0"
- log_test $? 255 "Flush by wrong port"
+ log_test $? 255 "Flush by port - non-matching"
# Check that when flushing by source VNI only entries programmed with
# the specified source VNI are flushed and the rest are not.
@@ -1030,9 +1030,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
- log_test $? 254 "Flush by specified source VNI"
+ log_test $? 254 "Flush by source VNI - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011"
- log_test $? 0 "Flush by unspecified source VNI"
+ log_test $? 0 "Flush by source VNI - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1058,9 +1058,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\""
- log_test $? 1 "Flush by specified routing protocol"
+ log_test $? 1 "Flush by routing protocol - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\""
- log_test $? 0 "Flush by unspecified routing protocol"
+ log_test $? 0 "Flush by routing protocol - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1075,9 +1075,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
- log_test $? 1 "Flush by specified destination IP - IPv4"
+ log_test $? 1 "Flush by IPv4 destination IP - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
- log_test $? 0 "Flush by unspecified destination IP - IPv4"
+ log_test $? 0 "Flush by IPv4 destination IP - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1089,9 +1089,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2"
- log_test $? 1 "Flush by specified destination IP - IPv6"
+ log_test $? 1 "Flush by IPv6 destination IP - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1"
- log_test $? 0 "Flush by unspecified destination IP - IPv6"
+ log_test $? 0 "Flush by IPv6 destination IP - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1104,9 +1104,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\""
- log_test $? 1 "Flush by specified UDP destination port"
+ log_test $? 1 "Flush by UDP destination port - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\""
- log_test $? 0 "Flush by unspecified UDP destination port"
+ log_test $? 0 "Flush by UDP destination port - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1121,9 +1121,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
- log_test $? 1 "Flush by device's UDP destination port"
+ log_test $? 1 "Flush by device's UDP destination port - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
- log_test $? 0 "Flush by unspecified UDP destination port"
+ log_test $? 0 "Flush by device's UDP destination port - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1136,9 +1136,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\""
- log_test $? 1 "Flush by specified destination VNI"
+ log_test $? 1 "Flush by destination VNI - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\""
- log_test $? 0 "Flush by unspecified destination VNI"
+ log_test $? 0 "Flush by destination VNI - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1153,9 +1153,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
- log_test $? 1 "Flush by destination VNI equal to source VNI"
+ log_test $? 1 "Flush by destination VNI equal to source VNI - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
- log_test $? 0 "Flush by unspecified destination VNI"
+ log_test $? 0 "Flush by destination VNI equal to source VNI - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 7799e042a971..c6eda21cefb6 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -1002,12 +1002,12 @@ TEST_F(tls, recv_partial)
memset(recv_mem, 0, sizeof(recv_mem));
EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
- EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first),
- MSG_WAITALL), -1);
+ EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first),
+ MSG_WAITALL), strlen(test_str_first));
EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0);
memset(recv_mem, 0, sizeof(recv_mem));
- EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second),
- MSG_WAITALL), -1);
+ EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second),
+ MSG_WAITALL), strlen(test_str_second));
EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)),
0);
}
@@ -1485,6 +1485,51 @@ TEST_F(tls, control_msg)
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
+TEST_F(tls, control_msg_nomerge)
+{
+ char *rec1 = "1111";
+ char *rec2 = "2222";
+ int send_len = 5;
+ char buf[15];
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len);
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len);
+ EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len);
+ EXPECT_EQ(memcmp(buf, rec2, send_len), 0);
+}
+
+TEST_F(tls, data_control_data)
+{
+ char *rec1 = "1111";
+ char *rec2 = "2222";
+ char *rec3 = "3333";
+ int send_len = 5;
+ char buf[15];
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len);
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len);
+ EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len);
+
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+}
+
TEST_F(tls, shutdown)
{
char const *test_str = "test_read";
@@ -1874,15 +1919,15 @@ TEST_F(tls_err, poll_partial_rec_async)
/* Child should sleep in poll(), never get a wake */
pfd.fd = self->cfd2;
pfd.events = POLLIN;
- EXPECT_EQ(poll(&pfd, 1, 5), 0);
+ EXPECT_EQ(poll(&pfd, 1, 20), 0);
EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */
pfd.fd = self->cfd2;
pfd.events = POLLIN;
- EXPECT_EQ(poll(&pfd, 1, 5), 1);
+ EXPECT_EQ(poll(&pfd, 1, 20), 1);
- exit(!_metadata->passed);
+ exit(!__test_passed(_metadata));
}
}
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index 10f2fde3686b..ec60a16c9307 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -163,7 +163,8 @@ static void validate_timestamp(struct timespec *cur, int min_delay)
if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
fprintf(stderr, "ERROR: %" PRId64 " us expected between %d and %d\n",
cur64 - start64, min_delay, max_delay);
- test_failed = true;
+ if (!getenv("KSFT_MACHINE_SLOW"))
+ test_failed = true;
}
}
diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh
index 31637769f59f..25baca4b148e 100755
--- a/tools/testing/selftests/net/txtimestamp.sh
+++ b/tools/testing/selftests/net/txtimestamp.sh
@@ -8,13 +8,13 @@ set -e
setup() {
# set 1ms delay on lo egress
- tc qdisc add dev lo root netem delay 1ms
+ tc qdisc add dev lo root netem delay 10ms
# set 2ms delay on ifb0 egress
modprobe ifb
ip link add ifb_netem0 type ifb
ip link set dev ifb_netem0 up
- tc qdisc add dev ifb_netem0 root netem delay 2ms
+ tc qdisc add dev ifb_netem0 root netem delay 20ms
# redirect lo ingress through ifb0 egress
tc qdisc add dev lo handle ffff: ingress
@@ -24,9 +24,11 @@ setup() {
}
run_test_v4v6() {
- # SND will be delayed 1000us
- # ACK will be delayed 6000us: 1 + 2 ms round-trip
- local -r args="$@ -v 1000 -V 6000"
+ # SND will be delayed 10ms
+ # ACK will be delayed 60ms: 10 + 20 ms round-trip
+ # allow +/- tolerance of 8ms
+ # wait for ACK to be queued
+ local -r args="$@ -v 10000 -V 60000 -t 8000 -S 80000"
./txtimestamp ${args} -4 -L 127.0.0.1
./txtimestamp ${args} -6 -L ::1
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index af5dc57c8ce9..8802604148dd 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="../bpf/xdp_dummy.bpf.o"
+BPF_FILE="xdp_dummy.o"
# set global exit status, but never reset nonzero one.
check_err()
@@ -197,7 +197,7 @@ run_all() {
}
if [ ! -f ${BPF_FILE} ]; then
- echo "Missing ${BPF_FILE}. Build bpf selftest first"
+ echo "Missing ${BPF_FILE}. Run 'make' first"
exit -1
fi
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index cb664679b434..7080eae5312b 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="../bpf/xdp_dummy.bpf.o"
+BPF_FILE="xdp_dummy.o"
cleanup() {
local -r jobs="$(jobs -p)"
@@ -84,7 +84,7 @@ run_all() {
}
if [ ! -f ${BPF_FILE} ]; then
- echo "Missing ${BPF_FILE}. Build bpf selftest first"
+ echo "Missing ${BPF_FILE}. Run 'make' first"
exit -1
fi
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index dd47fa96f6b3..e1ff645bd3d1 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="../bpf/xdp_dummy.bpf.o"
+BPF_FILE="xdp_dummy.o"
cleanup() {
local -r jobs="$(jobs -p)"
@@ -85,12 +85,12 @@ run_all() {
}
if [ ! -f ${BPF_FILE} ]; then
- echo "Missing ${BPF_FILE}. Build bpf selftest first"
+ echo "Missing ${BPF_FILE}. Run 'make' first"
exit -1
fi
if [ ! -f nat6to4.o ]; then
- echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first"
+ echo "Missing nat6to4 helper. Run 'make' first"
exit -1
fi
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index c079565add39..9cd5e885e91f 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,7 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-BPF_FILE="../bpf/xdp_dummy.bpf.o"
+source net_helper.sh
+
+BPF_FILE="xdp_dummy.o"
readonly BASE="ns-$(mktemp -u XXXXXX)"
readonly SRC=2
readonly DST=1
@@ -37,6 +39,10 @@ create_ns() {
for ns in $NS_SRC $NS_DST; do
ip netns add $ns
ip -n $ns link set dev lo up
+
+ # disable route solicitations to decrease 'noise' traffic
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0
done
ip link add name veth$SRC type veth peer name veth$DST
@@ -78,6 +84,12 @@ create_vxlan_pair() {
create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6
ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad
done
+
+ # preload neighbur cache, do avoid some noisy traffic
+ local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST |jq -r '.[]["address"]')
+ local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC |jq -r '.[]["address"]')
+ ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC
+ ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST
}
is_ipv6() {
@@ -117,9 +129,9 @@ run_test() {
# not enable GRO
ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789
ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000
- ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args &
+ ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args &
local spid=$!
- sleep 0.1
+ wait_local_port_listen "$NS_DST" 8000 udp
ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst
local retc=$?
wait $spid
@@ -166,9 +178,9 @@ run_bench() {
# bind the sender and the receiver to different CPUs to try
# get reproducible results
ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus"
- ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 &
+ ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100 &
local spid=$!
- sleep 0.1
+ wait_local_port_listen "$NS_DST" 8000 udp
ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst
local retc=$?
wait $spid
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 7badaf215de2..1d975bf52af3 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -56,7 +56,6 @@ static bool cfg_do_msgmore;
static bool cfg_do_setsockopt;
static int cfg_specific_test_id = -1;
-static const char cfg_ifname[] = "lo";
static unsigned short cfg_port = 9000;
static char buf[ETH_MAX_MTU];
@@ -69,8 +68,13 @@ struct testcase {
int r_len_last; /* recv(): size of last non-mss dgram, if any */
};
-const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
-const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+const struct in6_addr addr6 = {
+ { { 0xfd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, /* fd00::1 */
+};
+
+const struct in_addr addr4 = {
+ __constant_htonl(0x0a000001), /* 10.0.0.1 */
+};
struct testcase testcases_v4[] = {
{
@@ -274,48 +278,6 @@ struct testcase testcases_v6[] = {
}
};
-static unsigned int get_device_mtu(int fd, const char *ifname)
-{
- struct ifreq ifr;
-
- memset(&ifr, 0, sizeof(ifr));
-
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCGIFMTU, &ifr))
- error(1, errno, "ioctl get mtu");
-
- return ifr.ifr_mtu;
-}
-
-static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
-{
- struct ifreq ifr;
-
- memset(&ifr, 0, sizeof(ifr));
-
- ifr.ifr_mtu = mtu;
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCSIFMTU, &ifr))
- error(1, errno, "ioctl set mtu");
-}
-
-static void set_device_mtu(int fd, int mtu)
-{
- int val;
-
- val = get_device_mtu(fd, cfg_ifname);
- fprintf(stderr, "device mtu (orig): %u\n", val);
-
- __set_device_mtu(fd, cfg_ifname, mtu);
- val = get_device_mtu(fd, cfg_ifname);
- if (val != mtu)
- error(1, 0, "unable to set device mtu to %u\n", val);
-
- fprintf(stderr, "device mtu (test): %u\n", val);
-}
-
static void set_pmtu_discover(int fd, bool is_ipv4)
{
int level, name, val;
@@ -354,81 +316,6 @@ static unsigned int get_path_mtu(int fd, bool is_ipv4)
return mtu;
}
-/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
-static void set_route_mtu(int mtu, bool is_ipv4)
-{
- struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
- struct nlmsghdr *nh;
- struct rtattr *rta;
- struct rtmsg *rt;
- char data[NLMSG_ALIGN(sizeof(*nh)) +
- NLMSG_ALIGN(sizeof(*rt)) +
- NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
- NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
- NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
- int fd, ret, alen, off = 0;
-
- alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
-
- fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (fd == -1)
- error(1, errno, "socket netlink");
-
- memset(data, 0, sizeof(data));
-
- nh = (void *)data;
- nh->nlmsg_type = RTM_NEWROUTE;
- nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
- off += NLMSG_ALIGN(sizeof(*nh));
-
- rt = (void *)(data + off);
- rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
- rt->rtm_table = RT_TABLE_MAIN;
- rt->rtm_dst_len = alen << 3;
- rt->rtm_protocol = RTPROT_BOOT;
- rt->rtm_scope = RT_SCOPE_UNIVERSE;
- rt->rtm_type = RTN_UNICAST;
- off += NLMSG_ALIGN(sizeof(*rt));
-
- rta = (void *)(data + off);
- rta->rta_type = RTA_DST;
- rta->rta_len = RTA_LENGTH(alen);
- if (is_ipv4)
- memcpy(RTA_DATA(rta), &addr4, alen);
- else
- memcpy(RTA_DATA(rta), &addr6, alen);
- off += NLMSG_ALIGN(rta->rta_len);
-
- rta = (void *)(data + off);
- rta->rta_type = RTA_OIF;
- rta->rta_len = RTA_LENGTH(sizeof(int));
- *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
- off += NLMSG_ALIGN(rta->rta_len);
-
- /* MTU is a subtype in a metrics type */
- rta = (void *)(data + off);
- rta->rta_type = RTA_METRICS;
- rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
- off += NLMSG_ALIGN(rta->rta_len);
-
- /* now fill MTU subtype. Note that it fits within above rta_len */
- rta = (void *)(((char *) rta) + RTA_LENGTH(0));
- rta->rta_type = RTAX_MTU;
- rta->rta_len = RTA_LENGTH(sizeof(int));
- *((int *)(RTA_DATA(rta))) = mtu;
-
- nh->nlmsg_len = off;
-
- ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
- if (ret != off)
- error(1, errno, "send netlink: %uB != %uB\n", ret, off);
-
- if (close(fd))
- error(1, errno, "close netlink");
-
- fprintf(stderr, "route mtu (test): %u\n", mtu);
-}
-
static bool __send_one(int fd, struct msghdr *msg, int flags)
{
int ret;
@@ -591,15 +478,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen)
/* Do not fragment these datagrams: only succeed if GSO works */
set_pmtu_discover(fdt, addr->sa_family == AF_INET);
- if (cfg_do_connectionless) {
- set_device_mtu(fdt, CONST_MTU_TEST);
+ if (cfg_do_connectionless)
run_all(fdt, fdr, addr, alen);
- }
if (cfg_do_connected) {
- set_device_mtu(fdt, CONST_MTU_TEST + 100);
- set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
-
if (connect(fdt, addr, alen))
error(1, errno, "connect");
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
index fec24f584fe9..6c63178086b0 100755
--- a/tools/testing/selftests/net/udpgso.sh
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -3,27 +3,56 @@
#
# Run a series of udpgso regression tests
+set -o errexit
+set -o nounset
+
+setup_loopback() {
+ ip addr add dev lo 10.0.0.1/32
+ ip addr add dev lo fd00::1/128 nodad noprefixroute
+}
+
+test_dev_mtu() {
+ setup_loopback
+ # Reduce loopback MTU
+ ip link set dev lo mtu 1500
+}
+
+test_route_mtu() {
+ setup_loopback
+ # Remove default local routes
+ ip route del local 10.0.0.1/32 table local dev lo
+ ip route del local fd00::1/128 table local dev lo
+ # Install local routes with reduced MTU
+ ip route add local 10.0.0.1/32 table local dev lo mtu 1500
+ ip route add local fd00::1/128 table local dev lo mtu 1500
+}
+
+if [ "$#" -gt 0 ]; then
+ "$1"
+ shift 2 # pop "test_*" arg and "--" delimiter
+ exec "$@"
+fi
+
echo "ipv4 cmsg"
-./in_netns.sh ./udpgso -4 -C
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C
echo "ipv4 setsockopt"
-./in_netns.sh ./udpgso -4 -C -s
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -s
echo "ipv6 cmsg"
-./in_netns.sh ./udpgso -6 -C
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C
echo "ipv6 setsockopt"
-./in_netns.sh ./udpgso -6 -C -s
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -s
echo "ipv4 connected"
-./in_netns.sh ./udpgso -4 -c
+./in_netns.sh "$0" test_route_mtu -- ./udpgso -4 -c
-# blocked on 2nd loopback address
-# echo "ipv6 connected"
-# ./in_netns.sh ./udpgso -6 -c
+echo "ipv6 connected"
+./in_netns.sh "$0" test_route_mtu -- ./udpgso -6 -c
echo "ipv4 msg_more"
-./in_netns.sh ./udpgso -4 -C -m
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -m
echo "ipv6 msg_more"
-./in_netns.sh ./udpgso -6 -C -m
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index f35a924d4a30..1cbadd267c96 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -375,7 +375,7 @@ static void do_recv(void)
do_flush_udp(fd);
tnow = gettimeofday_ms();
- if (tnow > treport) {
+ if (!cfg_expected_pkt_nr && tnow > treport) {
if (packets)
fprintf(stderr,
"%s rx: %6lu MB/s %8lu calls/s\n",
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 2d073595c620..5ae85def0739 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-BPF_FILE="../bpf/xdp_dummy.bpf.o"
+BPF_FILE="xdp_dummy.o"
readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
readonly BASE=`basename $STATS`
readonly SRC=2
@@ -218,7 +218,7 @@ while getopts "hs:" option; do
done
if [ ! -f ${BPF_FILE} ]; then
- echo "Missing ${BPF_FILE}. Build bpf selftest first"
+ echo "Missing ${BPF_FILE}. Run 'make' first"
exit 1
fi
@@ -247,6 +247,20 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+ip -n $NS_DST link set dev veth$DST up
+ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp
+chk_gro_flag "gro vs xdp while down - gro flag on" $DST on
+ip -n $NS_DST link set dev veth$DST down
+chk_gro_flag " - after down" $DST on
+ip -n $NS_DST link set dev veth$DST xdp off
+chk_gro_flag " - after xdp off" $DST off
+ip -n $NS_DST link set dev veth$DST up
+chk_gro_flag " - after up" $DST off
+ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp
+chk_gro_flag " - after peer xdp" $DST off
+cleanup
+
+create_ns
chk_channels "default channels" $DST 1 1
ip -n $NS_DST link set dev veth$DST down
diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.c
new file mode 100644
index 000000000000..d988b2e0cee8
--- /dev/null
+++ b/tools/testing/selftests/net/xdp_dummy.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define KBUILD_MODNAME "xdp_dummy"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index db27153eb4a0..936c3085bb83 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -7,7 +7,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
- conntrack_sctp_collision.sh xt_string.sh
+ conntrack_sctp_collision.sh xt_string.sh \
+ bridge_netfilter.sh
HOSTPKG_CONFIG := pkg-config
diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh
new file mode 100644
index 000000000000..659b3ab02c8b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/bridge_netfilter.sh
@@ -0,0 +1,188 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bridge netfilter + conntrack, a combination that doesn't really work,
+# with multicast/broadcast packets racing for hash table insertion.
+
+# eth0 br0 eth0
+# setup is: ns1 <->,ns0 <-> ns3
+# ns2 <-' `'-> ns4
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+ns3="ns3-$sfx"
+ns4="ns4-$sfx"
+
+ebtables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ebtables"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+for i in $(seq 0 4); do
+ eval ip netns add \$ns$i
+done
+
+cleanup() {
+ for i in $(seq 0 4); do eval ip netns del \$ns$i;done
+}
+
+trap cleanup EXIT
+
+do_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ ip netns exec $fromns ping -c 1 -q $dstip > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "ERROR: ping from $fromns to $dstip"
+ ip netns exec ${ns0} nft list ruleset
+ ret=1
+ fi
+}
+
+bcast_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ for i in $(seq 1 1000); do
+ ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "ERROR: ping -b from $fromns to $dstip"
+ ip netns exec ${ns0} nft list ruleset
+ fi
+ done
+}
+
+ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1}
+if [ $? -ne 0 ]; then
+ echo "SKIP: Can't create veth device"
+ exit $ksft_skip
+fi
+
+ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2
+ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3
+ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4
+
+ip -net ${ns0} link set lo up
+
+for i in $(seq 1 4); do
+ ip -net ${ns0} link set veth$i up
+done
+
+ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1
+if [ $? -ne 0 ]; then
+ echo "SKIP: Can't create bridge br0"
+ exit $ksft_skip
+fi
+
+# make veth0,1,2 part of bridge.
+for i in $(seq 1 3); do
+ ip -net ${ns0} link set veth$i master br0
+done
+
+# add a macvlan on top of the bridge.
+MACVLAN_ADDR=ba:f3:13:37:42:23
+ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private
+ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR}
+ip -net ${ns0} link set macvlan0 up
+ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0
+
+# add a macvlan on top of veth4.
+MACVLAN_ADDR=ba:f3:13:37:42:24
+ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa
+ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR}
+ip -net ${ns0} link set macvlan4 up
+
+# make the macvlan part of the bridge.
+# veth4 is not a bridge port, only the macvlan on top of it.
+ip -net ${ns0} link set macvlan4 master br0
+
+ip -net ${ns0} link set br0 up
+ip -net ${ns0} addr add 10.0.0.1/24 dev br0
+ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1
+ret=$?
+if [ $ret -ne 0 ] ; then
+ echo "SKIP: bridge netfilter not available"
+ ret=$ksft_skip
+fi
+
+# for testing, so namespaces will reply to ping -b probes.
+ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
+
+# enable conntrack in ns0 and drop broadcast packets in forward to
+# avoid them from getting confirmed in the postrouting hook before
+# the cloned skb is passed up the stack.
+ip netns exec ${ns0} nft -f - <<EOF
+table ip filter {
+ chain input {
+ type filter hook input priority 1; policy accept
+ iifname br0 counter
+ ct state new accept
+ }
+}
+
+table bridge filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept
+ meta pkttype broadcast ip protocol icmp counter drop
+ }
+}
+EOF
+
+# place 1, 2 & 3 in same subnet, connected via ns0:br0.
+# ns4 is placed in same subnet as well, but its not
+# part of the bridge: the corresponding veth4 is not
+# part of the bridge, only its macvlan interface.
+for i in $(seq 1 4); do
+ eval ip -net \$ns$i link set lo up
+ eval ip -net \$ns$i link set eth0 up
+done
+for i in $(seq 1 2); do
+ eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0
+done
+
+ip -net ${ns3} addr add 10.23.0.13/24 dev eth0
+ip -net ${ns4} addr add 10.23.0.14/24 dev eth0
+
+# test basic connectivity
+do_ping ${ns1} 10.0.0.12
+do_ping ${ns3} 10.23.0.1
+do_ping ${ns4} 10.23.0.1
+
+if [ $ret -eq 0 ];then
+ echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0"
+fi
+
+bcast_ping ${ns1} 10.0.0.255
+
+# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
+bcast_ping ${ns3} 10.23.0.255
+
+# same, this time via veth4:macvlan4.
+bcast_ping ${ns4} 10.23.0.255
+
+read t < /proc/sys/kernel/tainted
+
+if [ $t -eq 0 ];then
+ echo PASS: kernel not tainted
+else
+ echo ERROR: kernel is tainted
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
index f18c6db13bbf..b11ea8ee6719 100644
--- a/tools/testing/selftests/netfilter/conntrack_dump_flush.c
+++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
@@ -13,7 +13,7 @@
#include "../kselftest_harness.h"
#define TEST_ZONE_ID 123
-#define CTA_FILTER_F_CTA_TUPLE_ZONE (1 << 2)
+#define NF_CT_DEFAULT_ZONE_ID 0
static int reply_counter;
@@ -336,6 +336,9 @@ FIXTURE_SETUP(conntrack_dump_flush)
ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5,
TEST_ZONE_ID + 2);
EXPECT_EQ(ret, 0);
+ ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7,
+ NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
src = (struct in6_addr) {{
.__u6_addr32 = {
@@ -395,6 +398,26 @@ FIXTURE_SETUP(conntrack_dump_flush)
TEST_ZONE_ID + 2);
EXPECT_EQ(ret, 0);
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x07000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x08000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+
ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
EXPECT_GE(ret, 2);
if (ret > 2)
@@ -425,6 +448,24 @@ TEST_F(conntrack_dump_flush, test_flush_by_zone)
EXPECT_EQ(ret, 2);
ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone_default)
+{
+ int ret;
+
+ ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
index 0930e2411dfb..cd51d547b751 100644
--- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -5,6 +5,7 @@
#include <fcntl.h>
#include <limits.h>
#include <linux/types.h>
+#include <poll.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
@@ -129,6 +130,7 @@ FIXTURE(child)
* When it is closed, the child will exit.
*/
int sk;
+ bool ignore_child_result;
};
FIXTURE_SETUP(child)
@@ -165,10 +167,14 @@ FIXTURE_SETUP(child)
FIXTURE_TEARDOWN(child)
{
+ int ret;
+
EXPECT_EQ(0, close(self->pidfd));
EXPECT_EQ(0, close(self->sk));
- EXPECT_EQ(0, wait_for_pid(self->pid));
+ ret = wait_for_pid(self->pid);
+ if (!self->ignore_child_result)
+ EXPECT_EQ(0, ret);
}
TEST_F(child, disable_ptrace)
@@ -235,6 +241,29 @@ TEST(flags_set)
EXPECT_EQ(errno, EINVAL);
}
+TEST_F(child, no_strange_EBADF)
+{
+ struct pollfd fds;
+
+ self->ignore_child_result = true;
+
+ fds.fd = self->pidfd;
+ fds.events = POLLIN;
+
+ ASSERT_EQ(kill(self->pid, SIGKILL), 0);
+ ASSERT_EQ(poll(&fds, 1, 5000), 1);
+
+ /*
+ * It used to be that pidfd_getfd() could race with the exiting thread
+ * between exit_files() and release_task(), and get a non-null task
+ * with a NULL files struct, and you'd get EBADF, which was slightly
+ * confusing.
+ */
+ errno = 0;
+ EXPECT_EQ(sys_pidfd_getfd(self->pidfd, self->remote_fd, 0), -1);
+ EXPECT_EQ(errno, ESRCH);
+}
+
#if __NR_pidfd_getfd == -1
int main(void)
{
diff --git a/tools/testing/selftests/power_supply/Makefile b/tools/testing/selftests/power_supply/Makefile
new file mode 100644
index 000000000000..44f0658d3d2e
--- /dev/null
+++ b/tools/testing/selftests/power_supply/Makefile
@@ -0,0 +1,4 @@
+TEST_PROGS := test_power_supply_properties.sh
+TEST_FILES := helpers.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/power_supply/helpers.sh b/tools/testing/selftests/power_supply/helpers.sh
new file mode 100644
index 000000000000..1ec90d7c9108
--- /dev/null
+++ b/tools/testing/selftests/power_supply/helpers.sh
@@ -0,0 +1,178 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2022, 2024 Collabora Ltd
+SYSFS_SUPPLIES=/sys/class/power_supply
+
+calc() {
+ awk "BEGIN { print $* }";
+}
+
+test_sysfs_prop() {
+ PROP="$1"
+ VALUE="$2" # optional
+
+ PROP_PATH="$SYSFS_SUPPLIES"/"$DEVNAME"/"$PROP"
+ TEST_NAME="$DEVNAME".sysfs."$PROP"
+
+ if [ -z "$VALUE" ]; then
+ ktap_test_result "$TEST_NAME" [ -f "$PROP_PATH" ]
+ else
+ ktap_test_result "$TEST_NAME" grep -q "$VALUE" "$PROP_PATH"
+ fi
+}
+
+to_human_readable_unit() {
+ VALUE="$1"
+ UNIT="$2"
+
+ case "$VALUE" in
+ *[!0-9]* ) return ;; # Not a number
+ esac
+
+ if [ "$UNIT" = "uA" ]; then
+ new_unit="mA"
+ div=1000
+ elif [ "$UNIT" = "uV" ]; then
+ new_unit="V"
+ div=1000000
+ elif [ "$UNIT" = "uAh" ]; then
+ new_unit="Ah"
+ div=1000000
+ elif [ "$UNIT" = "uW" ]; then
+ new_unit="mW"
+ div=1000
+ elif [ "$UNIT" = "uWh" ]; then
+ new_unit="Wh"
+ div=1000000
+ else
+ return
+ fi
+
+ value_converted=$(calc "$VALUE"/"$div")
+ echo "$value_converted" "$new_unit"
+}
+
+_check_sysfs_prop_available() {
+ PROP=$1
+
+ PROP_PATH="$SYSFS_SUPPLIES"/"$DEVNAME"/"$PROP"
+ TEST_NAME="$DEVNAME".sysfs."$PROP"
+
+ if [ ! -e "$PROP_PATH" ] ; then
+ ktap_test_skip "$TEST_NAME"
+ return 1
+ fi
+
+ if ! cat "$PROP_PATH" >/dev/null; then
+ ktap_print_msg "Failed to read"
+ ktap_test_fail "$TEST_NAME"
+ return 1
+ fi
+
+ return 0
+}
+
+test_sysfs_prop_optional() {
+ PROP=$1
+ UNIT=$2 # optional
+
+ TEST_NAME="$DEVNAME".sysfs."$PROP"
+
+ _check_sysfs_prop_available "$PROP" || return
+ DATA=$(cat "$SYSFS_SUPPLIES"/"$DEVNAME"/"$PROP")
+
+ ktap_print_msg "Reported: '$DATA' $UNIT ($(to_human_readable_unit "$DATA" "$UNIT"))"
+ ktap_test_pass "$TEST_NAME"
+}
+
+test_sysfs_prop_optional_range() {
+ PROP=$1
+ MIN=$2
+ MAX=$3
+ UNIT=$4 # optional
+
+ TEST_NAME="$DEVNAME".sysfs."$PROP"
+
+ _check_sysfs_prop_available "$PROP" || return
+ DATA=$(cat "$SYSFS_SUPPLIES"/"$DEVNAME"/"$PROP")
+
+ if [ "$DATA" -lt "$MIN" ] || [ "$DATA" -gt "$MAX" ]; then
+ ktap_print_msg "'$DATA' is out of range (min=$MIN, max=$MAX)"
+ ktap_test_fail "$TEST_NAME"
+ else
+ ktap_print_msg "Reported: '$DATA' $UNIT ($(to_human_readable_unit "$DATA" "$UNIT"))"
+ ktap_test_pass "$TEST_NAME"
+ fi
+}
+
+test_sysfs_prop_optional_list() {
+ PROP=$1
+ LIST=$2
+
+ TEST_NAME="$DEVNAME".sysfs."$PROP"
+
+ _check_sysfs_prop_available "$PROP" || return
+ DATA=$(cat "$SYSFS_SUPPLIES"/"$DEVNAME"/"$PROP")
+
+ valid=0
+
+ OLDIFS=$IFS
+ IFS=","
+ for item in $LIST; do
+ if [ "$DATA" = "$item" ]; then
+ valid=1
+ break
+ fi
+ done
+ if [ "$valid" -eq 1 ]; then
+ ktap_print_msg "Reported: '$DATA'"
+ ktap_test_pass "$TEST_NAME"
+ else
+ ktap_print_msg "'$DATA' is not a valid value for this property"
+ ktap_test_fail "$TEST_NAME"
+ fi
+ IFS=$OLDIFS
+}
+
+dump_file() {
+ FILE="$1"
+ while read -r line; do
+ ktap_print_msg "$line"
+ done < "$FILE"
+}
+
+__test_uevent_prop() {
+ PROP="$1"
+ OPTIONAL="$2"
+ VALUE="$3" # optional
+
+ UEVENT_PATH="$SYSFS_SUPPLIES"/"$DEVNAME"/uevent
+ TEST_NAME="$DEVNAME".uevent."$PROP"
+
+ if ! grep -q "POWER_SUPPLY_$PROP=" "$UEVENT_PATH"; then
+ if [ "$OPTIONAL" -eq 1 ]; then
+ ktap_test_skip "$TEST_NAME"
+ else
+ ktap_print_msg "Missing property"
+ ktap_test_fail "$TEST_NAME"
+ fi
+ return
+ fi
+
+ if ! grep -q "POWER_SUPPLY_$PROP=$VALUE" "$UEVENT_PATH"; then
+ ktap_print_msg "Invalid value for uevent property, dumping..."
+ dump_file "$UEVENT_PATH"
+ ktap_test_fail "$TEST_NAME"
+ else
+ ktap_test_pass "$TEST_NAME"
+ fi
+}
+
+test_uevent_prop() {
+ __test_uevent_prop "$1" 0 "$2"
+}
+
+test_uevent_prop_optional() {
+ __test_uevent_prop "$1" 1 "$2"
+}
diff --git a/tools/testing/selftests/power_supply/test_power_supply_properties.sh b/tools/testing/selftests/power_supply/test_power_supply_properties.sh
new file mode 100755
index 000000000000..df272dfe1d2a
--- /dev/null
+++ b/tools/testing/selftests/power_supply/test_power_supply_properties.sh
@@ -0,0 +1,114 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2022, 2024 Collabora Ltd
+#
+# This test validates the power supply uAPI: namely, the files in sysfs and
+# lines in uevent that expose the power supply properties.
+#
+# By default all power supplies available are tested. Optionally the name of a
+# power supply can be passed as a parameter to test only that one instead.
+DIR="$(dirname "$(readlink -f "$0")")"
+
+. "${DIR}"/../kselftest/ktap_helpers.sh
+
+. "${DIR}"/helpers.sh
+
+count_tests() {
+ SUPPLIES=$1
+
+ # This needs to be updated every time a new test is added.
+ NUM_TESTS=33
+
+ total_tests=0
+
+ for i in $SUPPLIES; do
+ total_tests=$(("$total_tests" + "$NUM_TESTS"))
+ done
+
+ echo "$total_tests"
+}
+
+ktap_print_header
+
+SYSFS_SUPPLIES=/sys/class/power_supply/
+
+if [ $# -eq 0 ]; then
+ supplies=$(ls "$SYSFS_SUPPLIES")
+else
+ supplies=$1
+fi
+
+ktap_set_plan "$(count_tests "$supplies")"
+
+for DEVNAME in $supplies; do
+ ktap_print_msg Testing device "$DEVNAME"
+
+ if [ ! -d "$SYSFS_SUPPLIES"/"$DEVNAME" ]; then
+ ktap_test_fail "$DEVNAME".exists
+ ktap_exit_fail_msg Device does not exist
+ fi
+
+ ktap_test_pass "$DEVNAME".exists
+
+ test_uevent_prop NAME "$DEVNAME"
+
+ test_sysfs_prop type
+ SUPPLY_TYPE=$(cat "$SYSFS_SUPPLIES"/"$DEVNAME"/type)
+ # This fails on kernels < 5.8 (needs 2ad3d74e3c69f)
+ test_uevent_prop TYPE "$SUPPLY_TYPE"
+
+ test_sysfs_prop_optional usb_type
+
+ test_sysfs_prop_optional_range online 0 2
+ test_sysfs_prop_optional_range present 0 1
+
+ test_sysfs_prop_optional_list status "Unknown","Charging","Discharging","Not charging","Full"
+
+ # Capacity is reported as percentage, thus any value less than 0 and
+ # greater than 100 are not allowed.
+ test_sysfs_prop_optional_range capacity 0 100 "%"
+
+ test_sysfs_prop_optional_list capacity_level "Unknown","Critical","Low","Normal","High","Full"
+
+ test_sysfs_prop_optional model_name
+ test_sysfs_prop_optional manufacturer
+ test_sysfs_prop_optional serial_number
+ test_sysfs_prop_optional_list technology "Unknown","NiMH","Li-ion","Li-poly","LiFe","NiCd","LiMn"
+
+ test_sysfs_prop_optional cycle_count
+
+ test_sysfs_prop_optional_list scope "Unknown","System","Device"
+
+ test_sysfs_prop_optional input_current_limit "uA"
+ test_sysfs_prop_optional input_voltage_limit "uV"
+
+ # Technically the power-supply class does not limit reported values.
+ # E.g. one could expose an RTC backup-battery, which goes below 1.5V or
+ # an electric vehicle battery with over 300V. But most devices do not
+ # have a step-up capable regulator behind the battery and operate with
+ # voltages considered safe to touch, so we limit the allowed range to
+ # 1.8V-60V to catch drivers reporting incorrectly scaled values. E.g. a
+ # common mistake is reporting data in mV instead of µV.
+ test_sysfs_prop_optional_range voltage_now 1800000 60000000 "uV"
+ test_sysfs_prop_optional_range voltage_min 1800000 60000000 "uV"
+ test_sysfs_prop_optional_range voltage_max 1800000 60000000 "uV"
+ test_sysfs_prop_optional_range voltage_min_design 1800000 60000000 "uV"
+ test_sysfs_prop_optional_range voltage_max_design 1800000 60000000 "uV"
+
+ # current based systems
+ test_sysfs_prop_optional current_now "uA"
+ test_sysfs_prop_optional current_max "uA"
+ test_sysfs_prop_optional charge_now "uAh"
+ test_sysfs_prop_optional charge_full "uAh"
+ test_sysfs_prop_optional charge_full_design "uAh"
+
+ # power based systems
+ test_sysfs_prop_optional power_now "uW"
+ test_sysfs_prop_optional energy_now "uWh"
+ test_sysfs_prop_optional energy_full "uWh"
+ test_sysfs_prop_optional energy_full_design "uWh"
+ test_sysfs_prop_optional energy_full_design "uWh"
+done
+
+ktap_finished
diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c
index 7b1addd50420..8a64f63e37ce 100644
--- a/tools/testing/selftests/powerpc/math/fpu_signal.c
+++ b/tools/testing/selftests/powerpc/math/fpu_signal.c
@@ -18,6 +18,7 @@
#include <pthread.h>
#include "utils.h"
+#include "fpu.h"
/* Number of times each thread should receive the signal */
#define ITERATIONS 10
@@ -27,9 +28,7 @@
*/
#define THREAD_FACTOR 8
-__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
- 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
- 2.1};
+__thread double darray[32];
bool bad_context;
int threads_starting;
@@ -43,9 +42,9 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context)
ucontext_t *uc = context;
mcontext_t *mc = &uc->uc_mcontext;
- /* Only the non volatiles were loaded up */
- for (i = 14; i < 32; i++) {
- if (mc->fp_regs[i] != darray[i - 14]) {
+ // Don't check f30/f31, they're used as scratches in check_all_fprs()
+ for (i = 0; i < 30; i++) {
+ if (mc->fp_regs[i] != darray[i]) {
bad_context = true;
break;
}
@@ -54,7 +53,6 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context)
void *signal_fpu_c(void *p)
{
- int i;
long rc;
struct sigaction act;
act.sa_sigaction = signal_fpu_sig;
@@ -64,9 +62,7 @@ void *signal_fpu_c(void *p)
return p;
srand(pthread_self());
- for (i = 0; i < 21; i++)
- darray[i] = rand();
-
+ randomise_darray(darray, ARRAY_SIZE(darray));
rc = preempt_fpu(darray, &threads_starting, &running);
return (void *) rc;
diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
index 98cbb9109ee6..505294da1b9f 100644
--- a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -263,10 +263,10 @@ static int papr_vpd_system_loc_code(void)
off_t size;
int fd;
- SKIP_IF_MSG(get_system_loc_code(&lc),
- "Cannot determine system location code");
SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
DEVPATH " not present");
+ SKIP_IF_MSG(get_system_loc_code(&lc),
+ "Cannot determine system location code");
FAIL_IF(devfd < 0);
diff --git a/tools/testing/selftests/powerpc/primitives/linux/bitops.h b/tools/testing/selftests/powerpc/primitives/linux/bitops.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/linux/bitops.h
diff --git a/tools/testing/selftests/powerpc/primitives/linux/wordpart.h b/tools/testing/selftests/powerpc/primitives/linux/wordpart.h
new file mode 120000
index 000000000000..4a74d2cbbc9b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/linux/wordpart.h
@@ -0,0 +1 @@
+../../../../../../include/linux/wordpart.h \ No newline at end of file
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index d5a0d8a33c27..bbac5f4b03d0 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -567,7 +567,7 @@ then
torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog"
torture_set "clocksourcewd-1" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
- torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_read_retries=1 tsc=watchdog"
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog"
torture_set "clocksourcewd-2" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
# In case our work is already done...
diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index bcbca356d56a..1b339d6bbff1 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -3,106 +3,59 @@
#include <stdint.h>
#include "resctrl.h"
-struct read_format {
- __u64 nr; /* The number of events */
- struct {
- __u64 value; /* The value of the event */
- } values[2];
-};
-
-static struct perf_event_attr pea_llc_miss;
-static struct read_format rf_cqm;
-static int fd_lm;
char llc_occup_path[1024];
-static void initialize_perf_event_attr(void)
+void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config)
{
- pea_llc_miss.type = PERF_TYPE_HARDWARE;
- pea_llc_miss.size = sizeof(struct perf_event_attr);
- pea_llc_miss.read_format = PERF_FORMAT_GROUP;
- pea_llc_miss.exclude_kernel = 1;
- pea_llc_miss.exclude_hv = 1;
- pea_llc_miss.exclude_idle = 1;
- pea_llc_miss.exclude_callchain_kernel = 1;
- pea_llc_miss.inherit = 1;
- pea_llc_miss.exclude_guest = 1;
- pea_llc_miss.disabled = 1;
-}
-
-static void ioctl_perf_event_ioc_reset_enable(void)
-{
- ioctl(fd_lm, PERF_EVENT_IOC_RESET, 0);
- ioctl(fd_lm, PERF_EVENT_IOC_ENABLE, 0);
-}
-
-static int perf_event_open_llc_miss(pid_t pid, int cpu_no)
-{
- fd_lm = perf_event_open(&pea_llc_miss, pid, cpu_no, -1,
- PERF_FLAG_FD_CLOEXEC);
- if (fd_lm == -1) {
- perror("Error opening leader");
- ctrlc_handler(0, NULL, NULL);
- return -1;
- }
-
- return 0;
-}
-
-static void initialize_llc_perf(void)
-{
- memset(&pea_llc_miss, 0, sizeof(struct perf_event_attr));
- memset(&rf_cqm, 0, sizeof(struct read_format));
-
- /* Initialize perf_event_attr structures for HW_CACHE_MISSES */
- initialize_perf_event_attr();
-
- pea_llc_miss.config = PERF_COUNT_HW_CACHE_MISSES;
-
- rf_cqm.nr = 1;
+ memset(pea, 0, sizeof(*pea));
+ pea->type = PERF_TYPE_HARDWARE;
+ pea->size = sizeof(*pea);
+ pea->read_format = PERF_FORMAT_GROUP;
+ pea->exclude_kernel = 1;
+ pea->exclude_hv = 1;
+ pea->exclude_idle = 1;
+ pea->exclude_callchain_kernel = 1;
+ pea->inherit = 1;
+ pea->exclude_guest = 1;
+ pea->disabled = 1;
+ pea->config = config;
}
-static int reset_enable_llc_perf(pid_t pid, int cpu_no)
+/* Start counters to log values */
+int perf_event_reset_enable(int pe_fd)
{
- int ret = 0;
+ int ret;
- ret = perf_event_open_llc_miss(pid, cpu_no);
+ ret = ioctl(pe_fd, PERF_EVENT_IOC_RESET, 0);
if (ret < 0)
return ret;
- /* Start counters to log values */
- ioctl_perf_event_ioc_reset_enable();
+ ret = ioctl(pe_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (ret < 0)
+ return ret;
return 0;
}
-/*
- * get_llc_perf: llc cache miss through perf events
- * @llc_perf_miss: LLC miss counter that is filled on success
- *
- * Perf events like HW_CACHE_MISSES could be used to validate number of
- * cache lines allocated.
- *
- * Return: =0 on success. <0 on failure.
- */
-static int get_llc_perf(unsigned long *llc_perf_miss)
+void perf_event_initialize_read_format(struct perf_event_read *pe_read)
{
- __u64 total_misses;
- int ret;
-
- /* Stop counters after one span to get miss rate */
+ memset(pe_read, 0, sizeof(*pe_read));
+ pe_read->nr = 1;
+}
- ioctl(fd_lm, PERF_EVENT_IOC_DISABLE, 0);
+int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no)
+{
+ int pe_fd;
- ret = read(fd_lm, &rf_cqm, sizeof(struct read_format));
- if (ret == -1) {
- perror("Could not get llc misses through perf");
+ pe_fd = perf_event_open(pea, pid, cpu_no, -1, PERF_FLAG_FD_CLOEXEC);
+ if (pe_fd == -1) {
+ ksft_perror("Error opening leader");
return -1;
}
- total_misses = rf_cqm.values[0].value;
- *llc_perf_miss = total_misses;
+ perf_event_reset_enable(pe_fd);
- return 0;
+ return pe_fd;
}
/*
@@ -124,12 +77,12 @@ static int get_llc_occu_resctrl(unsigned long *llc_occupancy)
fp = fopen(llc_occup_path, "r");
if (!fp) {
- perror("Failed to open results file");
+ ksft_perror("Failed to open results file");
- return errno;
+ return -1;
}
if (fscanf(fp, "%lu", llc_occupancy) <= 0) {
- perror("Could not get llc occupancy");
+ ksft_perror("Could not get llc occupancy");
fclose(fp);
return -1;
@@ -146,163 +99,91 @@ static int get_llc_occu_resctrl(unsigned long *llc_occupancy)
* @llc_value: perf miss value /
* llc occupancy value reported by resctrl FS
*
- * Return: 0 on success. non-zero on failure.
+ * Return: 0 on success, < 0 on error.
*/
-static int print_results_cache(char *filename, int bm_pid,
- unsigned long llc_value)
+static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value)
{
FILE *fp;
if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
- printf("Pid: %d \t LLC_value: %lu\n", bm_pid,
- llc_value);
+ printf("Pid: %d \t LLC_value: %llu\n", bm_pid, llc_value);
} else {
fp = fopen(filename, "a");
if (!fp) {
- perror("Cannot open results file");
+ ksft_perror("Cannot open results file");
- return errno;
+ return -1;
}
- fprintf(fp, "Pid: %d \t llc_value: %lu\n", bm_pid, llc_value);
+ fprintf(fp, "Pid: %d \t llc_value: %llu\n", bm_pid, llc_value);
fclose(fp);
}
return 0;
}
-int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
+/*
+ * perf_event_measure - Measure perf events
+ * @filename: Filename for writing the results
+ * @bm_pid: PID that runs the benchmark
+ *
+ * Measures perf events (e.g., cache misses) and writes the results into
+ * @filename. @bm_pid is written to the results file along with the measured
+ * value.
+ *
+ * Return: =0 on success. <0 on failure.
+ */
+int perf_event_measure(int pe_fd, struct perf_event_read *pe_read,
+ const char *filename, int bm_pid)
{
- unsigned long llc_perf_miss = 0, llc_occu_resc = 0, llc_value = 0;
int ret;
- /*
- * Measure cache miss from perf.
- */
- if (!strncmp(param->resctrl_val, CAT_STR, sizeof(CAT_STR))) {
- ret = get_llc_perf(&llc_perf_miss);
- if (ret < 0)
- return ret;
- llc_value = llc_perf_miss;
- }
+ /* Stop counters after one span to get miss rate */
+ ret = ioctl(pe_fd, PERF_EVENT_IOC_DISABLE, 0);
+ if (ret < 0)
+ return ret;
- /*
- * Measure llc occupancy from resctrl.
- */
- if (!strncmp(param->resctrl_val, CMT_STR, sizeof(CMT_STR))) {
- ret = get_llc_occu_resctrl(&llc_occu_resc);
- if (ret < 0)
- return ret;
- llc_value = llc_occu_resc;
+ ret = read(pe_fd, pe_read, sizeof(*pe_read));
+ if (ret == -1) {
+ ksft_perror("Could not get perf value");
+ return -1;
}
- ret = print_results_cache(param->filename, bm_pid, llc_value);
- if (ret)
- return ret;
- return 0;
+ return print_results_cache(filename, bm_pid, pe_read->values[0].value);
}
/*
- * cache_val: execute benchmark and measure LLC occupancy resctrl
- * and perf cache miss for the benchmark
- * @param: parameters passed to cache_val()
- * @span: buffer size for the benchmark
+ * measure_llc_resctrl - Measure resctrl LLC value from resctrl
+ * @filename: Filename for writing the results
+ * @bm_pid: PID that runs the benchmark
*
- * Return: 0 on success. non-zero on failure.
+ * Measures LLC occupancy from resctrl and writes the results into @filename.
+ * @bm_pid is written to the results file along with the measured value.
+ *
+ * Return: =0 on success. <0 on failure.
*/
-int cat_val(struct resctrl_val_param *param, size_t span)
+int measure_llc_resctrl(const char *filename, int bm_pid)
{
- int memflush = 1, operation = 0, ret = 0;
- char *resctrl_val = param->resctrl_val;
- pid_t bm_pid;
-
- if (strcmp(param->filename, "") == 0)
- sprintf(param->filename, "stdio");
-
- bm_pid = getpid();
-
- /* Taskset benchmark to specified cpu */
- ret = taskset_benchmark(bm_pid, param->cpu_no);
- if (ret)
- return ret;
+ unsigned long llc_occu_resc = 0;
+ int ret;
- /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
- ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
- resctrl_val);
- if (ret)
+ ret = get_llc_occu_resctrl(&llc_occu_resc);
+ if (ret < 0)
return ret;
- initialize_llc_perf();
-
- /* Test runs until the callback setup() tells the test to stop. */
- while (1) {
- ret = param->setup(param);
- if (ret == END_OF_TESTS) {
- ret = 0;
- break;
- }
- if (ret < 0)
- break;
- ret = reset_enable_llc_perf(bm_pid, param->cpu_no);
- if (ret)
- break;
-
- if (run_fill_buf(span, memflush, operation, true)) {
- fprintf(stderr, "Error-running fill buffer\n");
- ret = -1;
- goto pe_close;
- }
-
- sleep(1);
- ret = measure_cache_vals(param, bm_pid);
- if (ret)
- goto pe_close;
- }
-
- return ret;
-
-pe_close:
- close(fd_lm);
- return ret;
+ return print_results_cache(filename, bm_pid, llc_occu_resc);
}
/*
- * show_cache_info: show cache test result information
- * @sum_llc_val: sum of LLC cache result data
- * @no_of_bits: number of bits
- * @cache_span: cache span in bytes for CMT or in lines for CAT
- * @max_diff: max difference
- * @max_diff_percent: max difference percentage
- * @num_of_runs: number of runs
- * @platform: show test information on this platform
- * @cmt: CMT test or CAT test
- *
- * Return: 0 on success. non-zero on failure.
+ * show_cache_info - Show generic cache test information
+ * @no_of_bits: Number of bits
+ * @avg_llc_val: Average of LLC cache result data
+ * @cache_span: Cache span
+ * @lines: @cache_span in lines or bytes
*/
-int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
- size_t cache_span, unsigned long max_diff,
- unsigned long max_diff_percent, unsigned long num_of_runs,
- bool platform, bool cmt)
+void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines)
{
- unsigned long avg_llc_val = 0;
- float diff_percent;
- long avg_diff = 0;
- int ret;
-
- avg_llc_val = sum_llc_val / num_of_runs;
- avg_diff = (long)abs(cache_span - avg_llc_val);
- diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100;
-
- ret = platform && abs((int)diff_percent) > max_diff_percent &&
- (cmt ? (abs(avg_diff) > max_diff) : true);
-
- ksft_print_msg("%s Check cache miss rate within %lu%%\n",
- ret ? "Fail:" : "Pass:", max_diff_percent);
-
- ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent));
ksft_print_msg("Number of bits: %d\n", no_of_bits);
- ksft_print_msg("Average LLC val: %lu\n", avg_llc_val);
- ksft_print_msg("Cache span (%s): %zu\n", cmt ? "bytes" : "lines",
+ ksft_print_msg("Average LLC val: %llu\n", avg_llc_val);
+ ksft_print_msg("Cache span (%s): %zu\n", lines ? "lines" : "bytes",
cache_span);
-
- return ret;
}
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 224ba8544d8a..4cb991be8e31 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -11,108 +11,254 @@
#include "resctrl.h"
#include <unistd.h>
-#define RESULT_FILE_NAME1 "result_cat1"
-#define RESULT_FILE_NAME2 "result_cat2"
+#define RESULT_FILE_NAME "result_cat"
#define NUM_OF_RUNS 5
-#define MAX_DIFF_PERCENT 4
-#define MAX_DIFF 1000000
/*
- * Change schemata. Write schemata to specified
- * con_mon grp, mon_grp in resctrl FS.
- * Run 5 times in order to get average values.
+ * Minimum difference in LLC misses between a test with n+1 bits CBM to the
+ * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4
+ * bits in the CBM mask, the minimum difference must be at least
+ * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent.
+ *
+ * The relationship between number of used CBM bits and difference in LLC
+ * misses is not expected to be linear. With a small number of bits, the
+ * margin is smaller than with larger number of bits. For selftest purposes,
+ * however, linear approach is enough because ultimately only pass/fail
+ * decision has to be made and distinction between strong and stronger
+ * signal is irrelevant.
*/
-static int cat_setup(struct resctrl_val_param *p)
+#define MIN_DIFF_PERCENT_PER_BIT 1UL
+
+static int show_results_info(__u64 sum_llc_val, int no_of_bits,
+ unsigned long cache_span,
+ unsigned long min_diff_percent,
+ unsigned long num_of_runs, bool platform,
+ __s64 *prev_avg_llc_val)
{
- char schemata[64];
+ __u64 avg_llc_val = 0;
+ float avg_diff;
int ret = 0;
- /* Run NUM_OF_RUNS times */
- if (p->num_of_runs >= NUM_OF_RUNS)
- return END_OF_TESTS;
+ avg_llc_val = sum_llc_val / num_of_runs;
+ if (*prev_avg_llc_val) {
+ float delta = (__s64)(avg_llc_val - *prev_avg_llc_val);
+
+ avg_diff = delta / *prev_avg_llc_val;
+ ret = platform && (avg_diff * 100) < (float)min_diff_percent;
+
+ ksft_print_msg("%s Check cache miss rate changed more than %.1f%%\n",
+ ret ? "Fail:" : "Pass:", (float)min_diff_percent);
- if (p->num_of_runs == 0) {
- sprintf(schemata, "%lx", p->mask);
- ret = write_schemata(p->ctrlgrp, schemata, p->cpu_no,
- p->resctrl_val);
+ ksft_print_msg("Percent diff=%.1f\n", avg_diff * 100);
}
- p->num_of_runs++;
+ *prev_avg_llc_val = avg_llc_val;
+
+ show_cache_info(no_of_bits, avg_llc_val, cache_span, true);
return ret;
}
-static int check_results(struct resctrl_val_param *param, size_t span)
+/* Remove the highest bit from CBM */
+static unsigned long next_mask(unsigned long current_mask)
+{
+ return current_mask & (current_mask >> 1);
+}
+
+static int check_results(struct resctrl_val_param *param, const char *cache_type,
+ unsigned long cache_total_size, unsigned long full_cache_mask,
+ unsigned long current_mask)
{
char *token_array[8], temp[512];
- unsigned long sum_llc_perf_miss = 0;
- int runs = 0, no_of_bits = 0;
+ __u64 sum_llc_perf_miss = 0;
+ __s64 prev_avg_llc_val = 0;
+ unsigned long alloc_size;
+ int runs = 0;
+ int fail = 0;
+ int ret;
FILE *fp;
ksft_print_msg("Checking for pass/fail\n");
fp = fopen(param->filename, "r");
if (!fp) {
- perror("# Cannot open file");
+ ksft_perror("Cannot open file");
- return errno;
+ return -1;
}
while (fgets(temp, sizeof(temp), fp)) {
char *token = strtok(temp, ":\t");
int fields = 0;
+ int bits;
while (token) {
token_array[fields++] = token;
token = strtok(NULL, ":\t");
}
- /*
- * Discard the first value which is inaccurate due to monitoring
- * setup transition phase.
- */
- if (runs > 0)
- sum_llc_perf_miss += strtoul(token_array[3], NULL, 0);
+
+ sum_llc_perf_miss += strtoull(token_array[3], NULL, 0);
runs++;
+
+ if (runs < NUM_OF_RUNS)
+ continue;
+
+ if (!current_mask) {
+ ksft_print_msg("Unexpected empty cache mask\n");
+ break;
+ }
+
+ alloc_size = cache_portion_size(cache_total_size, current_mask, full_cache_mask);
+
+ bits = count_bits(current_mask);
+
+ ret = show_results_info(sum_llc_perf_miss, bits,
+ alloc_size / 64,
+ MIN_DIFF_PERCENT_PER_BIT * (bits - 1),
+ runs, get_vendor() == ARCH_INTEL,
+ &prev_avg_llc_val);
+ if (ret)
+ fail = 1;
+
+ runs = 0;
+ sum_llc_perf_miss = 0;
+ current_mask = next_mask(current_mask);
}
fclose(fp);
- no_of_bits = count_bits(param->mask);
- return show_cache_info(sum_llc_perf_miss, no_of_bits, span / 64,
- MAX_DIFF, MAX_DIFF_PERCENT, runs - 1,
- get_vendor() == ARCH_INTEL, false);
+ return fail;
}
void cat_test_cleanup(void)
{
- remove(RESULT_FILE_NAME1);
- remove(RESULT_FILE_NAME2);
+ remove(RESULT_FILE_NAME);
}
-int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
+/*
+ * cat_test - Execute CAT benchmark and measure cache misses
+ * @test: Test information structure
+ * @uparams: User supplied parameters
+ * @param: Parameters passed to cat_test()
+ * @span: Buffer size for the benchmark
+ * @current_mask Start mask for the first iteration
+ *
+ * Run CAT selftest by varying the allocated cache portion and comparing the
+ * impact on cache misses (the result analysis is done in check_results()
+ * and show_results_info(), not in this function).
+ *
+ * One bit is removed from the CAT allocation bit mask (in current_mask) for
+ * each subsequent test which keeps reducing the size of the allocated cache
+ * portion. A single test flushes the buffer, reads it to warm up the cache,
+ * and reads the buffer again. The cache misses are measured during the last
+ * read pass.
+ *
+ * Return: 0 when the test was run, < 0 on error.
+ */
+static int cat_test(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ struct resctrl_val_param *param,
+ size_t span, unsigned long current_mask)
{
- unsigned long l_mask, l_mask_1;
- int ret, pipefd[2], sibling_cpu_no;
- unsigned long cache_size = 0;
- unsigned long long_mask;
- char cbm_mask[256];
+ char *resctrl_val = param->resctrl_val;
+ struct perf_event_read pe_read;
+ struct perf_event_attr pea;
+ cpu_set_t old_affinity;
+ unsigned char *buf;
+ char schemata[64];
+ int ret, i, pe_fd;
+ pid_t bm_pid;
+
+ if (strcmp(param->filename, "") == 0)
+ sprintf(param->filename, "stdio");
+
+ bm_pid = getpid();
+
+ /* Taskset benchmark to specified cpu */
+ ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity);
+ if (ret)
+ return ret;
+
+ /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
+ ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
+ resctrl_val);
+ if (ret)
+ goto reset_affinity;
+
+ perf_event_attr_initialize(&pea, PERF_COUNT_HW_CACHE_MISSES);
+ perf_event_initialize_read_format(&pe_read);
+ pe_fd = perf_open(&pea, bm_pid, uparams->cpu);
+ if (pe_fd < 0) {
+ ret = -1;
+ goto reset_affinity;
+ }
+
+ buf = alloc_buffer(span, 1);
+ if (!buf) {
+ ret = -1;
+ goto pe_close;
+ }
+
+ while (current_mask) {
+ snprintf(schemata, sizeof(schemata), "%lx", param->mask & ~current_mask);
+ ret = write_schemata("", schemata, uparams->cpu, test->resource);
+ if (ret)
+ goto free_buf;
+ snprintf(schemata, sizeof(schemata), "%lx", current_mask);
+ ret = write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource);
+ if (ret)
+ goto free_buf;
+
+ for (i = 0; i < NUM_OF_RUNS; i++) {
+ mem_flush(buf, span);
+ fill_cache_read(buf, span, true);
+
+ ret = perf_event_reset_enable(pe_fd);
+ if (ret)
+ goto free_buf;
+
+ fill_cache_read(buf, span, true);
+
+ ret = perf_event_measure(pe_fd, &pe_read, param->filename, bm_pid);
+ if (ret)
+ goto free_buf;
+ }
+ current_mask = next_mask(current_mask);
+ }
+
+free_buf:
+ free(buf);
+pe_close:
+ close(pe_fd);
+reset_affinity:
+ taskset_restore(bm_pid, &old_affinity);
+
+ return ret;
+}
+
+static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams)
+{
+ unsigned long long_mask, start_mask, full_cache_mask;
+ unsigned long cache_total_size = 0;
+ int n = uparams->bits;
+ unsigned int start;
int count_of_bits;
- char pipe_message;
size_t span;
+ int ret;
- /* Get default cbm mask for L3/L2 cache */
- ret = get_cbm_mask(cache_type, cbm_mask);
+ ret = get_full_cbm(test->resource, &full_cache_mask);
+ if (ret)
+ return ret;
+ /* Get the largest contiguous exclusive portion of the cache */
+ ret = get_mask_no_shareable(test->resource, &long_mask);
if (ret)
return ret;
-
- long_mask = strtoul(cbm_mask, NULL, 16);
/* Get L3/L2 cache size */
- ret = get_cache_size(cpu_no, cache_type, &cache_size);
+ ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size);
if (ret)
return ret;
- ksft_print_msg("Cache size :%lu\n", cache_size);
+ ksft_print_msg("Cache size :%lu\n", cache_total_size);
- /* Get max number of bits from default-cabm mask */
- count_of_bits = count_bits(long_mask);
+ count_of_bits = count_contiguous_bits(long_mask, &start);
if (!n)
n = count_of_bits / 2;
@@ -123,89 +269,124 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
count_of_bits - 1);
return -1;
}
-
- /* Get core id from same socket for running another thread */
- sibling_cpu_no = get_core_sibling(cpu_no);
- if (sibling_cpu_no < 0)
- return -1;
+ start_mask = create_bit_mask(start, n);
struct resctrl_val_param param = {
.resctrl_val = CAT_STR,
- .cpu_no = cpu_no,
- .setup = cat_setup,
+ .ctrlgrp = "c1",
+ .filename = RESULT_FILE_NAME,
+ .num_of_runs = 0,
};
+ param.mask = long_mask;
+ span = cache_portion_size(cache_total_size, start_mask, full_cache_mask);
- l_mask = long_mask >> n;
- l_mask_1 = ~l_mask & long_mask;
+ remove(param.filename);
- /* Set param values for parent thread which will be allocated bitmask
- * with (max_bits - n) bits
- */
- span = cache_size * (count_of_bits - n) / count_of_bits;
- strcpy(param.ctrlgrp, "c2");
- strcpy(param.mongrp, "m2");
- strcpy(param.filename, RESULT_FILE_NAME2);
- param.mask = l_mask;
- param.num_of_runs = 0;
-
- if (pipe(pipefd)) {
- perror("# Unable to create pipe");
- return errno;
- }
+ ret = cat_test(test, uparams, &param, span, start_mask);
+ if (ret)
+ goto out;
- fflush(stdout);
- bm_pid = fork();
+ ret = check_results(&param, test->resource,
+ cache_total_size, full_cache_mask, start_mask);
+out:
+ cat_test_cleanup();
- /* Set param values for child thread which will be allocated bitmask
- * with n bits
- */
- if (bm_pid == 0) {
- param.mask = l_mask_1;
- strcpy(param.ctrlgrp, "c1");
- strcpy(param.mongrp, "m1");
- span = cache_size * n / count_of_bits;
- strcpy(param.filename, RESULT_FILE_NAME1);
- param.num_of_runs = 0;
- param.cpu_no = sibling_cpu_no;
+ return ret;
+}
+
+static int noncont_cat_run_test(const struct resctrl_test *test,
+ const struct user_params *uparams)
+{
+ unsigned long full_cache_mask, cont_mask, noncont_mask;
+ unsigned int eax, ebx, ecx, edx, sparse_masks;
+ int bit_center, ret;
+ char schemata[64];
+
+ /* Check to compare sparse_masks content to CPUID output. */
+ ret = resource_info_unsigned_get(test->resource, "sparse_masks", &sparse_masks);
+ if (ret)
+ return ret;
+
+ if (!strcmp(test->resource, "L3"))
+ __cpuid_count(0x10, 1, eax, ebx, ecx, edx);
+ else if (!strcmp(test->resource, "L2"))
+ __cpuid_count(0x10, 2, eax, ebx, ecx, edx);
+ else
+ return -EINVAL;
+
+ if (sparse_masks != ((ecx >> 3) & 1)) {
+ ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n");
+ return 1;
}
- remove(param.filename);
+ /* Write checks initialization. */
+ ret = get_full_cbm(test->resource, &full_cache_mask);
+ if (ret < 0)
+ return ret;
+ bit_center = count_bits(full_cache_mask) / 2;
- ret = cat_val(&param, span);
- if (ret == 0)
- ret = check_results(&param, span);
-
- if (bm_pid == 0) {
- /* Tell parent that child is ready */
- close(pipefd[0]);
- pipe_message = 1;
- if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
- sizeof(pipe_message))
- /*
- * Just print the error message.
- * Let while(1) run and wait for itself to be killed.
- */
- perror("# failed signaling parent process");
-
- close(pipefd[1]);
- while (1)
- ;
- } else {
- /* Parent waits for child to be ready. */
- close(pipefd[1]);
- pipe_message = 0;
- while (pipe_message != 1) {
- if (read(pipefd[0], &pipe_message,
- sizeof(pipe_message)) < sizeof(pipe_message)) {
- perror("# failed reading from child process");
- break;
- }
- }
- close(pipefd[0]);
- kill(bm_pid, SIGKILL);
+ /*
+ * The bit_center needs to be at least 3 to properly calculate the CBM
+ * hole in the noncont_mask. If it's smaller return an error since the
+ * cache mask is too short and that shouldn't happen.
+ */
+ if (bit_center < 3)
+ return -EINVAL;
+ cont_mask = full_cache_mask >> bit_center;
+
+ /* Contiguous mask write check. */
+ snprintf(schemata, sizeof(schemata), "%lx", cont_mask);
+ ret = write_schemata("", schemata, uparams->cpu, test->resource);
+ if (ret) {
+ ksft_print_msg("Write of contiguous CBM failed\n");
+ return 1;
}
- cat_test_cleanup();
+ /*
+ * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle.
+ * Output is compared with support information to catch any edge case errors.
+ */
+ noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask;
+ snprintf(schemata, sizeof(schemata), "%lx", noncont_mask);
+ ret = write_schemata("", schemata, uparams->cpu, test->resource);
+ if (ret && sparse_masks)
+ ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n");
+ else if (ret && !sparse_masks)
+ ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n");
+ else if (!ret && !sparse_masks)
+ ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n");
+
+ return !ret == !sparse_masks;
+}
- return ret;
+static bool noncont_cat_feature_check(const struct resctrl_test *test)
+{
+ if (!resctrl_resource_exists(test->resource))
+ return false;
+
+ return resource_info_file_exists(test->resource, "sparse_masks");
}
+
+struct resctrl_test l3_cat_test = {
+ .name = "L3_CAT",
+ .group = "CAT",
+ .resource = "L3",
+ .feature_check = test_resource_feature_check,
+ .run_test = cat_run_test,
+};
+
+struct resctrl_test l3_noncont_cat_test = {
+ .name = "L3_NONCONT_CAT",
+ .group = "CAT",
+ .resource = "L3",
+ .feature_check = noncont_cat_feature_check,
+ .run_test = noncont_cat_run_test,
+};
+
+struct resctrl_test l2_noncont_cat_test = {
+ .name = "L2_NONCONT_CAT",
+ .group = "CAT",
+ .resource = "L2",
+ .feature_check = noncont_cat_feature_check,
+ .run_test = noncont_cat_run_test,
+};
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index 50bdbce9fba9..a81f91222a89 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -16,7 +16,9 @@
#define MAX_DIFF 2000000
#define MAX_DIFF_PERCENT 15
-static int cmt_setup(struct resctrl_val_param *p)
+static int cmt_setup(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ struct resctrl_val_param *p)
{
/* Run NUM_OF_RUNS times */
if (p->num_of_runs >= NUM_OF_RUNS)
@@ -27,6 +29,33 @@ static int cmt_setup(struct resctrl_val_param *p)
return 0;
}
+static int show_results_info(unsigned long sum_llc_val, int no_of_bits,
+ unsigned long cache_span, unsigned long max_diff,
+ unsigned long max_diff_percent, unsigned long num_of_runs,
+ bool platform)
+{
+ unsigned long avg_llc_val = 0;
+ float diff_percent;
+ long avg_diff = 0;
+ int ret;
+
+ avg_llc_val = sum_llc_val / num_of_runs;
+ avg_diff = (long)abs(cache_span - avg_llc_val);
+ diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100;
+
+ ret = platform && abs((int)diff_percent) > max_diff_percent &&
+ abs(avg_diff) > max_diff;
+
+ ksft_print_msg("%s Check cache miss rate within %lu%%\n",
+ ret ? "Fail:" : "Pass:", max_diff_percent);
+
+ ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent));
+
+ show_cache_info(no_of_bits, avg_llc_val, cache_span, false);
+
+ return ret;
+}
+
static int check_results(struct resctrl_val_param *param, size_t span, int no_of_bits)
{
char *token_array[8], temp[512];
@@ -37,9 +66,9 @@ static int check_results(struct resctrl_val_param *param, size_t span, int no_of
ksft_print_msg("Checking for pass/fail\n");
fp = fopen(param->filename, "r");
if (!fp) {
- perror("# Error in opening file\n");
+ ksft_perror("Error in opening file");
- return errno;
+ return -1;
}
while (fgets(temp, sizeof(temp), fp)) {
@@ -58,9 +87,8 @@ static int check_results(struct resctrl_val_param *param, size_t span, int no_of
}
fclose(fp);
- return show_cache_info(sum_llc_occu_resc, no_of_bits, span,
- MAX_DIFF, MAX_DIFF_PERCENT, runs - 1,
- true, true);
+ return show_results_info(sum_llc_occu_resc, no_of_bits, span,
+ MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true);
}
void cmt_test_cleanup(void)
@@ -68,28 +96,26 @@ void cmt_test_cleanup(void)
remove(RESULT_FILE_NAME);
}
-int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd)
+static int cmt_run_test(const struct resctrl_test *test, const struct user_params *uparams)
{
- const char * const *cmd = benchmark_cmd;
+ const char * const *cmd = uparams->benchmark_cmd;
const char *new_cmd[BENCHMARK_ARGS];
- unsigned long cache_size = 0;
+ unsigned long cache_total_size = 0;
+ int n = uparams->bits ? : 5;
unsigned long long_mask;
char *span_str = NULL;
- char cbm_mask[256];
int count_of_bits;
size_t span;
int ret, i;
- ret = get_cbm_mask("L3", cbm_mask);
+ ret = get_full_cbm("L3", &long_mask);
if (ret)
return ret;
- long_mask = strtoul(cbm_mask, NULL, 16);
-
- ret = get_cache_size(cpu_no, "L3", &cache_size);
+ ret = get_cache_size(uparams->cpu, "L3", &cache_total_size);
if (ret)
return ret;
- ksft_print_msg("Cache size :%lu\n", cache_size);
+ ksft_print_msg("Cache size :%lu\n", cache_total_size);
count_of_bits = count_bits(long_mask);
@@ -103,19 +129,18 @@ int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd)
.resctrl_val = CMT_STR,
.ctrlgrp = "c1",
.mongrp = "m1",
- .cpu_no = cpu_no,
.filename = RESULT_FILE_NAME,
.mask = ~(long_mask << n) & long_mask,
.num_of_runs = 0,
.setup = cmt_setup,
};
- span = cache_size * n / count_of_bits;
+ span = cache_portion_size(cache_total_size, param.mask, long_mask);
if (strcmp(cmd[0], "fill_buf") == 0) {
/* Duplicate the command to be able to replace span in it */
- for (i = 0; benchmark_cmd[i]; i++)
- new_cmd[i] = benchmark_cmd[i];
+ for (i = 0; uparams->benchmark_cmd[i]; i++)
+ new_cmd[i] = uparams->benchmark_cmd[i];
new_cmd[i] = NULL;
ret = asprintf(&span_str, "%zu", span);
@@ -127,11 +152,13 @@ int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd)
remove(RESULT_FILE_NAME);
- ret = resctrl_val(cmd, &param);
+ ret = resctrl_val(test, uparams, cmd, &param);
if (ret)
goto out;
ret = check_results(&param, span, n);
+ if (ret && (get_vendor() == ARCH_INTEL))
+ ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
out:
cmt_test_cleanup();
@@ -139,3 +166,16 @@ out:
return ret;
}
+
+static bool cmt_feature_check(const struct resctrl_test *test)
+{
+ return test_resource_feature_check(test) &&
+ resctrl_mon_feature_exists("L3_MON", "llc_occupancy");
+}
+
+struct resctrl_test cmt_test = {
+ .name = "CMT",
+ .resource = "L3",
+ .feature_check = cmt_feature_check,
+ .run_test = cmt_run_test,
+};
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 0d425f26583a..ae120f1735c0 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -38,7 +38,7 @@ static void cl_flush(void *p)
#endif
}
-static void mem_flush(unsigned char *buf, size_t buf_size)
+void mem_flush(unsigned char *buf, size_t buf_size)
{
unsigned char *cp = buf;
size_t i = 0;
@@ -51,39 +51,38 @@ static void mem_flush(unsigned char *buf, size_t buf_size)
sb();
}
-static void *malloc_and_init_memory(size_t buf_size)
-{
- void *p = NULL;
- uint64_t *p64;
- size_t s64;
- int ret;
-
- ret = posix_memalign(&p, PAGE_SIZE, buf_size);
- if (ret < 0)
- return NULL;
-
- p64 = (uint64_t *)p;
- s64 = buf_size / sizeof(uint64_t);
-
- while (s64 > 0) {
- *p64 = (uint64_t)rand();
- p64 += (CL_SIZE / sizeof(uint64_t));
- s64 -= (CL_SIZE / sizeof(uint64_t));
- }
-
- return p;
-}
+/*
+ * Buffer index step advance to workaround HW prefetching interfering with
+ * the measurements.
+ *
+ * Must be a prime to step through all indexes of the buffer.
+ *
+ * Some primes work better than others on some architectures (from MBA/MBM
+ * result stability point of view).
+ */
+#define FILL_IDX_MULT 23
static int fill_one_span_read(unsigned char *buf, size_t buf_size)
{
- unsigned char *end_ptr = buf + buf_size;
- unsigned char sum, *p;
-
- sum = 0;
- p = buf;
- while (p < end_ptr) {
- sum += *p;
- p += (CL_SIZE / 2);
+ unsigned int size = buf_size / (CL_SIZE / 2);
+ unsigned int i, idx = 0;
+ unsigned char sum = 0;
+
+ /*
+ * Read the buffer in an order that is unexpected by HW prefetching
+ * optimizations to prevent them interfering with the caching pattern.
+ *
+ * The read order is (in terms of halves of cachelines):
+ * i * FILL_IDX_MULT % size
+ * The formula is open-coded below to avoiding modulo inside the loop
+ * as it improves MBA/MBM result stability on some architectures.
+ */
+ for (i = 0; i < size; i++) {
+ sum += buf[idx * (CL_SIZE / 2)];
+
+ idx += FILL_IDX_MULT;
+ while (idx >= size)
+ idx -= size;
}
return sum;
@@ -101,10 +100,9 @@ static void fill_one_span_write(unsigned char *buf, size_t buf_size)
}
}
-static int fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
+void fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
{
int ret = 0;
- FILE *fp;
while (1) {
ret = fill_one_span_read(buf, buf_size);
@@ -113,67 +111,59 @@ static int fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
}
/* Consume read result so that reading memory is not optimized out. */
- fp = fopen("/dev/null", "w");
- if (!fp) {
- perror("Unable to write to /dev/null");
- return -1;
- }
- fprintf(fp, "Sum: %d ", ret);
- fclose(fp);
-
- return 0;
+ *value_sink = ret;
}
-static int fill_cache_write(unsigned char *buf, size_t buf_size, bool once)
+static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once)
{
while (1) {
fill_one_span_write(buf, buf_size);
if (once)
break;
}
-
- return 0;
}
-static int fill_cache(size_t buf_size, int memflush, int op, bool once)
+unsigned char *alloc_buffer(size_t buf_size, int memflush)
{
- unsigned char *buf;
+ void *buf = NULL;
+ uint64_t *p64;
+ size_t s64;
int ret;
- buf = malloc_and_init_memory(buf_size);
- if (!buf)
- return -1;
-
- /* Flush the memory before using to avoid "cache hot pages" effect */
- if (memflush)
- mem_flush(buf, buf_size);
-
- if (op == 0)
- ret = fill_cache_read(buf, buf_size, once);
- else
- ret = fill_cache_write(buf, buf_size, once);
+ ret = posix_memalign(&buf, PAGE_SIZE, buf_size);
+ if (ret < 0)
+ return NULL;
- free(buf);
+ /* Initialize the buffer */
+ p64 = buf;
+ s64 = buf_size / sizeof(uint64_t);
- if (ret) {
- printf("\n Error in fill cache read/write...\n");
- return -1;
+ while (s64 > 0) {
+ *p64 = (uint64_t)rand();
+ p64 += (CL_SIZE / sizeof(uint64_t));
+ s64 -= (CL_SIZE / sizeof(uint64_t));
}
+ /* Flush the memory before using to avoid "cache hot pages" effect */
+ if (memflush)
+ mem_flush(buf, buf_size);
- return 0;
+ return buf;
}
-int run_fill_buf(size_t span, int memflush, int op, bool once)
+int run_fill_buf(size_t buf_size, int memflush, int op, bool once)
{
- size_t cache_size = span;
- int ret;
+ unsigned char *buf;
- ret = fill_cache(cache_size, memflush, op, once);
- if (ret) {
- printf("\n Error in fill cache\n");
+ buf = alloc_buffer(buf_size, memflush);
+ if (!buf)
return -1;
- }
+
+ if (op == 0)
+ fill_cache_read(buf, buf_size, once);
+ else
+ fill_cache_write(buf, buf_size, once);
+ free(buf);
return 0;
}
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index d3bf4368341e..7946e32e85c8 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -22,7 +22,9 @@
* con_mon grp, mon_grp in resctrl FS.
* For each allocation, run 5 times in order to get average values.
*/
-static int mba_setup(struct resctrl_val_param *p)
+static int mba_setup(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ struct resctrl_val_param *p)
{
static int runs_per_allocation, allocation = 100;
char allocation_str[64];
@@ -40,8 +42,7 @@ static int mba_setup(struct resctrl_val_param *p)
sprintf(allocation_str, "%d", allocation);
- ret = write_schemata(p->ctrlgrp, allocation_str, p->cpu_no,
- p->resctrl_val);
+ ret = write_schemata(p->ctrlgrp, allocation_str, uparams->cpu, test->resource);
if (ret < 0)
return ret;
@@ -109,9 +110,9 @@ static int check_results(void)
fp = fopen(output, "r");
if (!fp) {
- perror(output);
+ ksft_perror(output);
- return errno;
+ return -1;
}
runs = 0;
@@ -141,13 +142,12 @@ void mba_test_cleanup(void)
remove(RESULT_FILE_NAME);
}
-int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd)
+static int mba_run_test(const struct resctrl_test *test, const struct user_params *uparams)
{
struct resctrl_val_param param = {
.resctrl_val = MBA_STR,
.ctrlgrp = "c1",
.mongrp = "m1",
- .cpu_no = cpu_no,
.filename = RESULT_FILE_NAME,
.bw_report = "reads",
.setup = mba_setup
@@ -156,7 +156,7 @@ int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd)
remove(RESULT_FILE_NAME);
- ret = resctrl_val(benchmark_cmd, &param);
+ ret = resctrl_val(test, uparams, uparams->benchmark_cmd, &param);
if (ret)
goto out;
@@ -167,3 +167,17 @@ out:
return ret;
}
+
+static bool mba_feature_check(const struct resctrl_test *test)
+{
+ return test_resource_feature_check(test) &&
+ resctrl_mon_feature_exists("L3_MON", "mbm_local_bytes");
+}
+
+struct resctrl_test mba_test = {
+ .name = "MBA",
+ .resource = "MB",
+ .vendor_specific = ARCH_INTEL,
+ .feature_check = mba_feature_check,
+ .run_test = mba_run_test,
+};
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index 741533f2b075..d67ffa3ec63a 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -59,9 +59,9 @@ static int check_results(size_t span)
fp = fopen(output, "r");
if (!fp) {
- perror(output);
+ ksft_perror(output);
- return errno;
+ return -1;
}
runs = 0;
@@ -86,7 +86,9 @@ static int check_results(size_t span)
return ret;
}
-static int mbm_setup(struct resctrl_val_param *p)
+static int mbm_setup(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ struct resctrl_val_param *p)
{
int ret = 0;
@@ -95,9 +97,8 @@ static int mbm_setup(struct resctrl_val_param *p)
return END_OF_TESTS;
/* Set up shemata with 100% allocation on the first run. */
- if (p->num_of_runs == 0 && validate_resctrl_feature_request("MB", NULL))
- ret = write_schemata(p->ctrlgrp, "100", p->cpu_no,
- p->resctrl_val);
+ if (p->num_of_runs == 0 && resctrl_resource_exists("MB"))
+ ret = write_schemata(p->ctrlgrp, "100", uparams->cpu, test->resource);
p->num_of_runs++;
@@ -109,13 +110,12 @@ void mbm_test_cleanup(void)
remove(RESULT_FILE_NAME);
}
-int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd)
+static int mbm_run_test(const struct resctrl_test *test, const struct user_params *uparams)
{
struct resctrl_val_param param = {
.resctrl_val = MBM_STR,
.ctrlgrp = "c1",
.mongrp = "m1",
- .cpu_no = cpu_no,
.filename = RESULT_FILE_NAME,
.bw_report = "reads",
.setup = mbm_setup
@@ -124,14 +124,30 @@ int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd)
remove(RESULT_FILE_NAME);
- ret = resctrl_val(benchmark_cmd, &param);
+ ret = resctrl_val(test, uparams, uparams->benchmark_cmd, &param);
if (ret)
goto out;
ret = check_results(DEFAULT_SPAN);
+ if (ret && (get_vendor() == ARCH_INTEL))
+ ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
out:
mbm_test_cleanup();
return ret;
}
+
+static bool mbm_feature_check(const struct resctrl_test *test)
+{
+ return resctrl_mon_feature_exists("L3_MON", "mbm_total_bytes") &&
+ resctrl_mon_feature_exists("L3_MON", "mbm_local_bytes");
+}
+
+struct resctrl_test mbm_test = {
+ .name = "MBM",
+ .resource = "MB",
+ .vendor_specific = ARCH_INTEL,
+ .feature_check = mbm_feature_check,
+ .run_test = mbm_run_test,
+};
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index a33f414f6019..2051bd135e0d 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -28,6 +28,12 @@
#define PHYS_ID_PATH "/sys/devices/system/cpu/cpu"
#define INFO_PATH "/sys/fs/resctrl/info"
+/*
+ * CPU vendor IDs
+ *
+ * Define as bits because they're used for vendor_specific bitmask in
+ * the struct resctrl_test.
+ */
#define ARCH_INTEL 1
#define ARCH_AMD 2
@@ -37,20 +43,52 @@
#define DEFAULT_SPAN (250 * MB)
-#define PARENT_EXIT(err_msg) \
+#define PARENT_EXIT() \
do { \
- perror(err_msg); \
kill(ppid, SIGKILL); \
umount_resctrlfs(); \
exit(EXIT_FAILURE); \
} while (0)
/*
+ * user_params: User supplied parameters
+ * @cpu: CPU number to which the benchmark will be bound to
+ * @bits: Number of bits used for cache allocation size
+ * @benchmark_cmd: Benchmark command to run during (some of the) tests
+ */
+struct user_params {
+ int cpu;
+ int bits;
+ const char *benchmark_cmd[BENCHMARK_ARGS];
+};
+
+/*
+ * resctrl_test: resctrl test definition
+ * @name: Test name
+ * @group: Test group - a common name for tests that share some characteristic
+ * (e.g., L3 CAT test belongs to the CAT group). Can be NULL
+ * @resource: Resource to test (e.g., MB, L3, L2, etc.)
+ * @vendor_specific: Bitmask for vendor-specific tests (can be 0 for universal tests)
+ * @disabled: Test is disabled
+ * @feature_check: Callback to check required resctrl features
+ * @run_test: Callback to run the test
+ */
+struct resctrl_test {
+ const char *name;
+ const char *group;
+ const char *resource;
+ unsigned int vendor_specific;
+ bool disabled;
+ bool (*feature_check)(const struct resctrl_test *test);
+ int (*run_test)(const struct resctrl_test *test,
+ const struct user_params *uparams);
+};
+
+/*
* resctrl_val_param: resctrl test parameters
* @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
* @ctrlgrp: Name of the control monitor group (con_mon grp)
* @mongrp: Name of the monitor group (mon grp)
- * @cpu_no: CPU number to which the benchmark would be binded
* @filename: Name of file to which the o/p should be written
* @bw_report: Bandwidth report type (reads vs writes)
* @setup: Call back function to setup test environment
@@ -59,12 +97,20 @@ struct resctrl_val_param {
char *resctrl_val;
char ctrlgrp[64];
char mongrp[64];
- int cpu_no;
char filename[64];
char *bw_report;
unsigned long mask;
int num_of_runs;
- int (*setup)(struct resctrl_val_param *param);
+ int (*setup)(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ struct resctrl_val_param *param);
+};
+
+struct perf_event_read {
+ __u64 nr; /* The number of events */
+ struct {
+ __u64 value; /* The value of the event */
+ } values[2];
};
#define MBM_STR "mbm"
@@ -72,6 +118,13 @@ struct resctrl_val_param {
#define CMT_STR "cmt"
#define CAT_STR "cat"
+/*
+ * Memory location that consumes values compiler must not optimize away.
+ * Volatile ensures writes to this location cannot be optimized away by
+ * compiler.
+ */
+extern volatile int *value_sink;
+
extern pid_t bm_pid, ppid;
extern char llc_occup_path[1024];
@@ -79,42 +132,84 @@ extern char llc_occup_path[1024];
int get_vendor(void);
bool check_resctrlfs_support(void);
int filter_dmesg(void);
-int get_resource_id(int cpu_no, int *resource_id);
+int get_domain_id(const char *resource, int cpu_no, int *domain_id);
int mount_resctrlfs(void);
int umount_resctrlfs(void);
int validate_bw_report_request(char *bw_report);
-bool validate_resctrl_feature_request(const char *resource, const char *feature);
+bool resctrl_resource_exists(const char *resource);
+bool resctrl_mon_feature_exists(const char *resource, const char *feature);
+bool resource_info_file_exists(const char *resource, const char *file);
+bool test_resource_feature_check(const struct resctrl_test *test);
char *fgrep(FILE *inf, const char *str);
-int taskset_benchmark(pid_t bm_pid, int cpu_no);
-int write_schemata(char *ctrlgrp, char *schemata, int cpu_no,
- char *resctrl_val);
+int taskset_benchmark(pid_t bm_pid, int cpu_no, cpu_set_t *old_affinity);
+int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity);
+int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource);
int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
char *resctrl_val);
int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
int group_fd, unsigned long flags);
-int run_fill_buf(size_t span, int memflush, int op, bool once);
-int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param);
-int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd);
+unsigned char *alloc_buffer(size_t buf_size, int memflush);
+void mem_flush(unsigned char *buf, size_t buf_size);
+void fill_cache_read(unsigned char *buf, size_t buf_size, bool once);
+int run_fill_buf(size_t buf_size, int memflush, int op, bool once);
+int resctrl_val(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ const char * const *benchmark_cmd,
+ struct resctrl_val_param *param);
void tests_cleanup(void);
void mbm_test_cleanup(void);
-int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd);
void mba_test_cleanup(void);
-int get_cbm_mask(char *cache_type, char *cbm_mask);
-int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size);
+unsigned long create_bit_mask(unsigned int start, unsigned int len);
+unsigned int count_contiguous_bits(unsigned long val, unsigned int *start);
+int get_full_cbm(const char *cache_type, unsigned long *mask);
+int get_mask_no_shareable(const char *cache_type, unsigned long *mask);
+int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size);
+int resource_info_unsigned_get(const char *resource, const char *filename, unsigned int *val);
void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
int signal_handler_register(void);
void signal_handler_unregister(void);
-int cat_val(struct resctrl_val_param *param, size_t span);
void cat_test_cleanup(void);
-int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type);
-int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd);
unsigned int count_bits(unsigned long n);
void cmt_test_cleanup(void);
-int get_core_sibling(int cpu_no);
-int measure_cache_vals(struct resctrl_val_param *param, int bm_pid);
-int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
- size_t cache_span, unsigned long max_diff,
- unsigned long max_diff_percent, unsigned long num_of_runs,
- bool platform, bool cmt);
+
+void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config);
+void perf_event_initialize_read_format(struct perf_event_read *pe_read);
+int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no);
+int perf_event_reset_enable(int pe_fd);
+int perf_event_measure(int pe_fd, struct perf_event_read *pe_read,
+ const char *filename, int bm_pid);
+int measure_llc_resctrl(const char *filename, int bm_pid);
+void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines);
+
+/*
+ * cache_portion_size - Calculate the size of a cache portion
+ * @cache_size: Total cache size in bytes
+ * @portion_mask: Cache portion mask
+ * @full_cache_mask: Full Cache Bit Mask (CBM) for the cache
+ *
+ * Return: The size of the cache portion in bytes.
+ */
+static inline unsigned long cache_portion_size(unsigned long cache_size,
+ unsigned long portion_mask,
+ unsigned long full_cache_mask)
+{
+ unsigned int bits = count_bits(full_cache_mask);
+
+ /*
+ * With no bits the full CBM, assume cache cannot be split into
+ * smaller portions. To avoid divide by zero, return cache_size.
+ */
+ if (!bits)
+ return cache_size;
+
+ return cache_size * count_bits(portion_mask) / bits;
+}
+
+extern struct resctrl_test mbm_test;
+extern struct resctrl_test mba_test;
+extern struct resctrl_test cmt_test;
+extern struct resctrl_test l3_cat_test;
+extern struct resctrl_test l3_noncont_cat_test;
+extern struct resctrl_test l2_noncont_cat_test;
#endif /* RESCTRL_H */
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 2bbe3045a018..f3dc1b9696e7 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -10,6 +10,19 @@
*/
#include "resctrl.h"
+/* Volatile memory sink to prevent compiler optimizations */
+static volatile int sink_target;
+volatile int *value_sink = &sink_target;
+
+static struct resctrl_test *resctrl_tests[] = {
+ &mbm_test,
+ &mba_test,
+ &cmt_test,
+ &l3_cat_test,
+ &l3_noncont_cat_test,
+ &l2_noncont_cat_test,
+};
+
static int detect_vendor(void)
{
FILE *inf = fopen("/proc/cpuinfo", "r");
@@ -49,11 +62,20 @@ int get_vendor(void)
static void cmd_help(void)
{
+ int i;
+
printf("usage: resctrl_tests [-h] [-t test list] [-n no_of_bits] [-b benchmark_cmd [option]...]\n");
printf("\t-b benchmark_cmd [option]...: run specified benchmark for MBM, MBA and CMT\n");
printf("\t default benchmark is builtin fill_buf\n");
- printf("\t-t test list: run tests specified in the test list, ");
+ printf("\t-t test list: run tests/groups specified by the list, ");
printf("e.g. -t mbm,mba,cmt,cat\n");
+ printf("\t\tSupported tests (group):\n");
+ for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) {
+ if (resctrl_tests[i]->group)
+ printf("\t\t\t%s (%s)\n", resctrl_tests[i]->name, resctrl_tests[i]->group);
+ else
+ printf("\t\t\t%s\n", resctrl_tests[i]->name);
+ }
printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n");
printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n");
printf("\t-h: help\n");
@@ -92,116 +114,63 @@ static void test_cleanup(void)
signal_handler_unregister();
}
-static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no)
+static bool test_vendor_specific_check(const struct resctrl_test *test)
{
- int res;
-
- ksft_print_msg("Starting MBM BW change ...\n");
-
- if (test_prepare()) {
- ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
- return;
- }
-
- if (!validate_resctrl_feature_request("L3_MON", "mbm_total_bytes") ||
- !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") ||
- (get_vendor() != ARCH_INTEL)) {
- ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n");
- goto cleanup;
- }
-
- res = mbm_bw_change(cpu_no, benchmark_cmd);
- ksft_test_result(!res, "MBM: bw change\n");
- if ((get_vendor() == ARCH_INTEL) && res)
- ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
+ if (!test->vendor_specific)
+ return true;
-cleanup:
- test_cleanup();
+ return get_vendor() & test->vendor_specific;
}
-static void run_mba_test(const char * const *benchmark_cmd, int cpu_no)
+static void run_single_test(const struct resctrl_test *test, const struct user_params *uparams)
{
- int res;
-
- ksft_print_msg("Starting MBA Schemata change ...\n");
+ int ret;
- if (test_prepare()) {
- ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
+ if (test->disabled)
return;
- }
- if (!validate_resctrl_feature_request("MB", NULL) ||
- !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") ||
- (get_vendor() != ARCH_INTEL)) {
- ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n");
- goto cleanup;
+ if (!test_vendor_specific_check(test)) {
+ ksft_test_result_skip("Hardware does not support %s\n", test->name);
+ return;
}
- res = mba_schemata_change(cpu_no, benchmark_cmd);
- ksft_test_result(!res, "MBA: schemata change\n");
-
-cleanup:
- test_cleanup();
-}
-
-static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no)
-{
- int res;
-
- ksft_print_msg("Starting CMT test ...\n");
+ ksft_print_msg("Starting %s test ...\n", test->name);
if (test_prepare()) {
ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
return;
}
- if (!validate_resctrl_feature_request("L3_MON", "llc_occupancy") ||
- !validate_resctrl_feature_request("L3", NULL)) {
- ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n");
+ if (!test->feature_check(test)) {
+ ksft_test_result_skip("Hardware does not support %s or %s is disabled\n",
+ test->name, test->name);
goto cleanup;
}
- res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd);
- ksft_test_result(!res, "CMT: test\n");
- if ((get_vendor() == ARCH_INTEL) && res)
- ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
+ ret = test->run_test(test, uparams);
+ ksft_test_result(!ret, "%s: test\n", test->name);
cleanup:
test_cleanup();
}
-static void run_cat_test(int cpu_no, int no_of_bits)
+static void init_user_params(struct user_params *uparams)
{
- int res;
-
- ksft_print_msg("Starting CAT test ...\n");
-
- if (test_prepare()) {
- ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
- return;
- }
-
- if (!validate_resctrl_feature_request("L3", NULL)) {
- ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n");
- goto cleanup;
- }
+ memset(uparams, 0, sizeof(*uparams));
- res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
- ksft_test_result(!res, "CAT: test\n");
-
-cleanup:
- test_cleanup();
+ uparams->cpu = 1;
+ uparams->bits = 0;
}
int main(int argc, char **argv)
{
- bool mbm_test = true, mba_test = true, cmt_test = true;
- const char *benchmark_cmd[BENCHMARK_ARGS] = {};
- int c, cpu_no = 1, i, no_of_bits = 0;
+ int tests = ARRAY_SIZE(resctrl_tests);
+ bool test_param_seen = false;
+ struct user_params uparams;
char *span_str = NULL;
- bool cat_test = true;
- int tests = 0;
- int ret;
+ int ret, c, i;
+
+ init_user_params(&uparams);
while ((c = getopt(argc, argv, "ht:b:n:p:")) != -1) {
char *token;
@@ -219,32 +188,35 @@ int main(int argc, char **argv)
/* Extract benchmark command from command line. */
for (i = 0; i < argc - optind; i++)
- benchmark_cmd[i] = argv[i + optind];
- benchmark_cmd[i] = NULL;
+ uparams.benchmark_cmd[i] = argv[i + optind];
+ uparams.benchmark_cmd[i] = NULL;
goto last_arg;
case 't':
token = strtok(optarg, ",");
- mbm_test = false;
- mba_test = false;
- cmt_test = false;
- cat_test = false;
+ if (!test_param_seen) {
+ for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++)
+ resctrl_tests[i]->disabled = true;
+ tests = 0;
+ test_param_seen = true;
+ }
while (token) {
- if (!strncmp(token, MBM_STR, sizeof(MBM_STR))) {
- mbm_test = true;
- tests++;
- } else if (!strncmp(token, MBA_STR, sizeof(MBA_STR))) {
- mba_test = true;
- tests++;
- } else if (!strncmp(token, CMT_STR, sizeof(CMT_STR))) {
- cmt_test = true;
- tests++;
- } else if (!strncmp(token, CAT_STR, sizeof(CAT_STR))) {
- cat_test = true;
- tests++;
- } else {
- printf("invalid argument\n");
+ bool found = false;
+
+ for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) {
+ if (!strcasecmp(token, resctrl_tests[i]->name) ||
+ (resctrl_tests[i]->group &&
+ !strcasecmp(token, resctrl_tests[i]->group))) {
+ if (resctrl_tests[i]->disabled)
+ tests++;
+ resctrl_tests[i]->disabled = false;
+ found = true;
+ }
+ }
+
+ if (!found) {
+ printf("invalid test: %s\n", token);
return -1;
}
@@ -252,11 +224,11 @@ int main(int argc, char **argv)
}
break;
case 'p':
- cpu_no = atoi(optarg);
+ uparams.cpu = atoi(optarg);
break;
case 'n':
- no_of_bits = atoi(optarg);
- if (no_of_bits <= 0) {
+ uparams.bits = atoi(optarg);
+ if (uparams.bits <= 0) {
printf("Bail out! invalid argument for no_of_bits\n");
return -1;
}
@@ -291,32 +263,23 @@ last_arg:
filter_dmesg();
- if (!benchmark_cmd[0]) {
+ if (!uparams.benchmark_cmd[0]) {
/* If no benchmark is given by "-b" argument, use fill_buf. */
- benchmark_cmd[0] = "fill_buf";
+ uparams.benchmark_cmd[0] = "fill_buf";
ret = asprintf(&span_str, "%u", DEFAULT_SPAN);
if (ret < 0)
ksft_exit_fail_msg("Out of memory!\n");
- benchmark_cmd[1] = span_str;
- benchmark_cmd[2] = "1";
- benchmark_cmd[3] = "0";
- benchmark_cmd[4] = "false";
- benchmark_cmd[5] = NULL;
+ uparams.benchmark_cmd[1] = span_str;
+ uparams.benchmark_cmd[2] = "1";
+ uparams.benchmark_cmd[3] = "0";
+ uparams.benchmark_cmd[4] = "false";
+ uparams.benchmark_cmd[5] = NULL;
}
- ksft_set_plan(tests ? : 4);
-
- if (mbm_test)
- run_mbm_test(benchmark_cmd, cpu_no);
-
- if (mba_test)
- run_mba_test(benchmark_cmd, cpu_no);
-
- if (cmt_test)
- run_cmt_test(benchmark_cmd, cpu_no);
+ ksft_set_plan(tests);
- if (cat_test)
- run_cat_test(cpu_no, no_of_bits);
+ for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++)
+ run_single_test(resctrl_tests[i], &uparams);
free(span_str);
ksft_finished();
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 88789678917b..5a49f07a6c85 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -156,12 +156,12 @@ static int read_from_imc_dir(char *imc_dir, int count)
sprintf(imc_counter_type, "%s%s", imc_dir, "type");
fp = fopen(imc_counter_type, "r");
if (!fp) {
- perror("Failed to open imc counter type file");
+ ksft_perror("Failed to open iMC counter type file");
return -1;
}
if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
- perror("Could not get imc type");
+ ksft_perror("Could not get iMC type");
fclose(fp);
return -1;
@@ -175,12 +175,12 @@ static int read_from_imc_dir(char *imc_dir, int count)
sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
fp = fopen(imc_counter_cfg, "r");
if (!fp) {
- perror("Failed to open imc config file");
+ ksft_perror("Failed to open iMC config file");
return -1;
}
if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
- perror("Could not get imc cas count read");
+ ksft_perror("Could not get iMC cas count read");
fclose(fp);
return -1;
@@ -193,12 +193,12 @@ static int read_from_imc_dir(char *imc_dir, int count)
sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
fp = fopen(imc_counter_cfg, "r");
if (!fp) {
- perror("Failed to open imc config file");
+ ksft_perror("Failed to open iMC config file");
return -1;
}
if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
- perror("Could not get imc cas count write");
+ ksft_perror("Could not get iMC cas count write");
fclose(fp);
return -1;
@@ -262,12 +262,12 @@ static int num_of_imcs(void)
}
closedir(dp);
if (count == 0) {
- perror("Unable find iMC counters!\n");
+ ksft_print_msg("Unable to find iMC counters\n");
return -1;
}
} else {
- perror("Unable to open PMU directory!\n");
+ ksft_perror("Unable to open PMU directory");
return -1;
}
@@ -339,14 +339,14 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
if (read(r->fd, &r->return_value,
sizeof(struct membw_read_format)) == -1) {
- perror("Couldn't get read b/w through iMC");
+ ksft_perror("Couldn't get read b/w through iMC");
return -1;
}
if (read(w->fd, &w->return_value,
sizeof(struct membw_read_format)) == -1) {
- perror("Couldn't get write bw through iMC");
+ ksft_perror("Couldn't get write bw through iMC");
return -1;
}
@@ -387,20 +387,20 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
return 0;
}
-void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
+void set_mbm_path(const char *ctrlgrp, const char *mongrp, int domain_id)
{
if (ctrlgrp && mongrp)
sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH,
- RESCTRL_PATH, ctrlgrp, mongrp, resource_id);
+ RESCTRL_PATH, ctrlgrp, mongrp, domain_id);
else if (!ctrlgrp && mongrp)
sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
- mongrp, resource_id);
+ mongrp, domain_id);
else if (ctrlgrp && !mongrp)
sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
- ctrlgrp, resource_id);
+ ctrlgrp, domain_id);
else if (!ctrlgrp && !mongrp)
sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
- resource_id);
+ domain_id);
}
/*
@@ -413,23 +413,23 @@ void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
int cpu_no, char *resctrl_val)
{
- int resource_id;
+ int domain_id;
- if (get_resource_id(cpu_no, &resource_id) < 0) {
- perror("Could not get resource_id");
+ if (get_domain_id("MB", cpu_no, &domain_id) < 0) {
+ ksft_print_msg("Could not get domain ID\n");
return;
}
if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
- set_mbm_path(ctrlgrp, mongrp, resource_id);
+ set_mbm_path(ctrlgrp, mongrp, domain_id);
if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
if (ctrlgrp)
sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
- RESCTRL_PATH, ctrlgrp, resource_id);
+ RESCTRL_PATH, ctrlgrp, domain_id);
else
sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH,
- RESCTRL_PATH, resource_id);
+ RESCTRL_PATH, domain_id);
}
}
@@ -449,12 +449,12 @@ static int get_mem_bw_resctrl(unsigned long *mbm_total)
fp = fopen(mbm_total_path, "r");
if (!fp) {
- perror("Failed to open total bw file");
+ ksft_perror("Failed to open total bw file");
return -1;
}
if (fscanf(fp, "%lu", mbm_total) <= 0) {
- perror("Could not get mbm local bytes");
+ ksft_perror("Could not get mbm local bytes");
fclose(fp);
return -1;
@@ -495,7 +495,7 @@ int signal_handler_register(void)
if (sigaction(SIGINT, &sigact, NULL) ||
sigaction(SIGTERM, &sigact, NULL) ||
sigaction(SIGHUP, &sigact, NULL)) {
- perror("# sigaction");
+ ksft_perror("sigaction");
ret = -1;
}
return ret;
@@ -515,7 +515,7 @@ void signal_handler_unregister(void)
if (sigaction(SIGINT, &sigact, NULL) ||
sigaction(SIGTERM, &sigact, NULL) ||
sigaction(SIGHUP, &sigact, NULL)) {
- perror("# sigaction");
+ ksft_perror("sigaction");
}
}
@@ -526,7 +526,7 @@ void signal_handler_unregister(void)
* @bw_imc: perf imc counter value
* @bw_resc: memory bandwidth value
*
- * Return: 0 on success. non-zero on failure.
+ * Return: 0 on success, < 0 on error.
*/
static int print_results_bw(char *filename, int bm_pid, float bw_imc,
unsigned long bw_resc)
@@ -540,16 +540,16 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc,
} else {
fp = fopen(filename, "a");
if (!fp) {
- perror("Cannot open results file");
+ ksft_perror("Cannot open results file");
- return errno;
+ return -1;
}
if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
bm_pid, bw_imc, bw_resc, diff) <= 0) {
+ ksft_print_msg("Could not log results\n");
fclose(fp);
- perror("Could not log results.");
- return errno;
+ return -1;
}
fclose(fp);
}
@@ -582,19 +582,20 @@ static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
int cpu_no, char *resctrl_val)
{
- int resource_id;
+ int domain_id;
- if (get_resource_id(cpu_no, &resource_id) < 0) {
- perror("# Unable to resource_id");
+ if (get_domain_id("L3", cpu_no, &domain_id) < 0) {
+ ksft_print_msg("Could not get domain ID\n");
return;
}
if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
- set_cmt_path(ctrlgrp, mongrp, resource_id);
+ set_cmt_path(ctrlgrp, mongrp, domain_id);
}
-static int
-measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
+static int measure_vals(const struct user_params *uparams,
+ struct resctrl_val_param *param,
+ unsigned long *bw_resc_start)
{
unsigned long bw_resc, bw_resc_end;
float bw_imc;
@@ -607,7 +608,7 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
* Compare the two values to validate resctrl value.
* It takes 1sec to measure the data.
*/
- ret = get_mem_bw_imc(param->cpu_no, param->bw_report, &bw_imc);
+ ret = get_mem_bw_imc(uparams->cpu, param->bw_report, &bw_imc);
if (ret < 0)
return ret;
@@ -647,20 +648,24 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
* stdio (console)
*/
fp = freopen("/dev/null", "w", stdout);
- if (!fp)
- PARENT_EXIT("Unable to direct benchmark status to /dev/null");
+ if (!fp) {
+ ksft_perror("Unable to direct benchmark status to /dev/null");
+ PARENT_EXIT();
+ }
if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
/* Execute default fill_buf benchmark */
span = strtoul(benchmark_cmd[1], NULL, 10);
memflush = atoi(benchmark_cmd[2]);
operation = atoi(benchmark_cmd[3]);
- if (!strcmp(benchmark_cmd[4], "true"))
+ if (!strcmp(benchmark_cmd[4], "true")) {
once = true;
- else if (!strcmp(benchmark_cmd[4], "false"))
+ } else if (!strcmp(benchmark_cmd[4], "false")) {
once = false;
- else
- PARENT_EXIT("Invalid once parameter");
+ } else {
+ ksft_print_msg("Invalid once parameter\n");
+ PARENT_EXIT();
+ }
if (run_fill_buf(span, memflush, operation, once))
fprintf(stderr, "Error in running fill buffer\n");
@@ -668,22 +673,28 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
/* Execute specified benchmark */
ret = execvp(benchmark_cmd[0], benchmark_cmd);
if (ret)
- perror("wrong\n");
+ ksft_perror("execvp");
}
fclose(stdout);
- PARENT_EXIT("Unable to run specified benchmark");
+ ksft_print_msg("Unable to run specified benchmark\n");
+ PARENT_EXIT();
}
/*
* resctrl_val: execute benchmark and measure memory bandwidth on
* the benchmark
+ * @test: test information structure
+ * @uparams: user supplied parameters
* @benchmark_cmd: benchmark command and its arguments
* @param: parameters passed to resctrl_val()
*
- * Return: 0 on success. non-zero on failure.
+ * Return: 0 when the test was run, < 0 on error.
*/
-int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param)
+int resctrl_val(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ const char * const *benchmark_cmd,
+ struct resctrl_val_param *param)
{
char *resctrl_val = param->resctrl_val;
unsigned long bw_resc_start = 0;
@@ -709,7 +720,7 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par
ppid = getpid();
if (pipe(pipefd)) {
- perror("# Unable to create pipe");
+ ksft_perror("Unable to create pipe");
return -1;
}
@@ -721,7 +732,7 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par
fflush(stdout);
bm_pid = fork();
if (bm_pid == -1) {
- perror("# Unable to fork");
+ ksft_perror("Unable to fork");
return -1;
}
@@ -738,15 +749,17 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par
sigact.sa_flags = SA_SIGINFO;
/* Register for "SIGUSR1" signal from parent */
- if (sigaction(SIGUSR1, &sigact, NULL))
- PARENT_EXIT("Can't register child for signal");
+ if (sigaction(SIGUSR1, &sigact, NULL)) {
+ ksft_perror("Can't register child for signal");
+ PARENT_EXIT();
+ }
/* Tell parent that child is ready */
close(pipefd[0]);
pipe_message = 1;
if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
sizeof(pipe_message)) {
- perror("# failed signaling parent process");
+ ksft_perror("Failed signaling parent process");
close(pipefd[1]);
return -1;
}
@@ -755,7 +768,8 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par
/* Suspend child until delivery of "SIGUSR1" from parent */
sigsuspend(&sigact.sa_mask);
- PARENT_EXIT("Child is done");
+ ksft_perror("Child is done");
+ PARENT_EXIT();
}
ksft_print_msg("Benchmark PID: %d\n", bm_pid);
@@ -769,7 +783,7 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par
value.sival_ptr = (void *)benchmark_cmd;
/* Taskset benchmark to specified cpu */
- ret = taskset_benchmark(bm_pid, param->cpu_no);
+ ret = taskset_benchmark(bm_pid, uparams->cpu, NULL);
if (ret)
goto out;
@@ -786,17 +800,17 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par
goto out;
initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
- param->cpu_no, resctrl_val);
+ uparams->cpu, resctrl_val);
} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
- param->cpu_no, resctrl_val);
+ uparams->cpu, resctrl_val);
/* Parent waits for child to be ready. */
close(pipefd[1]);
while (pipe_message != 1) {
if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
sizeof(pipe_message)) {
- perror("# failed reading message from child process");
+ ksft_perror("Failed reading message from child process");
close(pipefd[0]);
goto out;
}
@@ -805,8 +819,8 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par
/* Signal child to start benchmark */
if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
- perror("# sigqueue SIGUSR1 to child");
- ret = errno;
+ ksft_perror("sigqueue SIGUSR1 to child");
+ ret = -1;
goto out;
}
@@ -815,7 +829,7 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par
/* Test runs until the callback setup() tells the test to stop. */
while (1) {
- ret = param->setup(param);
+ ret = param->setup(test, uparams, param);
if (ret == END_OF_TESTS) {
ret = 0;
break;
@@ -825,12 +839,12 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par
if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
- ret = measure_vals(param, &bw_resc_start);
+ ret = measure_vals(uparams, param, &bw_resc_start);
if (ret)
break;
} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
sleep(1);
- ret = measure_cache_vals(param, bm_pid);
+ ret = measure_llc_resctrl(param->filename, bm_pid);
if (ret)
break;
}
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 5ebd43683876..1cade75176eb 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -20,7 +20,7 @@ static int find_resctrl_mount(char *buffer)
mounts = fopen("/proc/mounts", "r");
if (!mounts) {
- perror("/proc/mounts");
+ ksft_perror("/proc/mounts");
return -ENXIO;
}
while (!feof(mounts)) {
@@ -56,7 +56,7 @@ static int find_resctrl_mount(char *buffer)
* Mounts resctrl FS. Fails if resctrl FS is already mounted to avoid
* pre-existing settings interfering with the test results.
*
- * Return: 0 on success, non-zero on failure
+ * Return: 0 on success, < 0 on error.
*/
int mount_resctrlfs(void)
{
@@ -69,7 +69,7 @@ int mount_resctrlfs(void)
ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH);
ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL);
if (ret)
- perror("# mount");
+ ksft_perror("mount");
return ret;
}
@@ -86,41 +86,67 @@ int umount_resctrlfs(void)
return ret;
if (umount(mountpoint)) {
- perror("# Unable to umount resctrl");
+ ksft_perror("Unable to umount resctrl");
- return errno;
+ return -1;
}
return 0;
}
/*
- * get_resource_id - Get socket number/l3 id for a specified CPU
+ * get_cache_level - Convert cache level from string to integer
+ * @cache_type: Cache level as string
+ *
+ * Return: cache level as integer or -1 if @cache_type is invalid.
+ */
+static int get_cache_level(const char *cache_type)
+{
+ if (!strcmp(cache_type, "L3"))
+ return 3;
+ if (!strcmp(cache_type, "L2"))
+ return 2;
+
+ ksft_print_msg("Invalid cache level\n");
+ return -1;
+}
+
+static int get_resource_cache_level(const char *resource)
+{
+ /* "MB" use L3 (LLC) as resource */
+ if (!strcmp(resource, "MB"))
+ return 3;
+ return get_cache_level(resource);
+}
+
+/*
+ * get_domain_id - Get resctrl domain ID for a specified CPU
+ * @resource: resource name
* @cpu_no: CPU number
- * @resource_id: Socket number or l3_id
+ * @domain_id: domain ID (cache ID; for MB, L3 cache ID)
*
* Return: >= 0 on success, < 0 on failure.
*/
-int get_resource_id(int cpu_no, int *resource_id)
+int get_domain_id(const char *resource, int cpu_no, int *domain_id)
{
char phys_pkg_path[1024];
+ int cache_num;
FILE *fp;
- if (get_vendor() == ARCH_AMD)
- sprintf(phys_pkg_path, "%s%d/cache/index3/id",
- PHYS_ID_PATH, cpu_no);
- else
- sprintf(phys_pkg_path, "%s%d/topology/physical_package_id",
- PHYS_ID_PATH, cpu_no);
+ cache_num = get_resource_cache_level(resource);
+ if (cache_num < 0)
+ return cache_num;
+
+ sprintf(phys_pkg_path, "%s%d/cache/index%d/id", PHYS_ID_PATH, cpu_no, cache_num);
fp = fopen(phys_pkg_path, "r");
if (!fp) {
- perror("Failed to open physical_package_id");
+ ksft_perror("Failed to open cache id file");
return -1;
}
- if (fscanf(fp, "%d", resource_id) <= 0) {
- perror("Could not get socket number or l3 id");
+ if (fscanf(fp, "%d", domain_id) <= 0) {
+ ksft_perror("Could not get domain ID");
fclose(fp);
return -1;
@@ -138,31 +164,26 @@ int get_resource_id(int cpu_no, int *resource_id)
*
* Return: = 0 on success, < 0 on failure.
*/
-int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size)
+int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size)
{
char cache_path[1024], cache_str[64];
int length, i, cache_num;
FILE *fp;
- if (!strcmp(cache_type, "L3")) {
- cache_num = 3;
- } else if (!strcmp(cache_type, "L2")) {
- cache_num = 2;
- } else {
- perror("Invalid cache level");
- return -1;
- }
+ cache_num = get_cache_level(cache_type);
+ if (cache_num < 0)
+ return cache_num;
sprintf(cache_path, "/sys/bus/cpu/devices/cpu%d/cache/index%d/size",
cpu_no, cache_num);
fp = fopen(cache_path, "r");
if (!fp) {
- perror("Failed to open cache size");
+ ksft_perror("Failed to open cache size");
return -1;
}
if (fscanf(fp, "%s", cache_str) <= 0) {
- perror("Could not get cache_size");
+ ksft_perror("Could not get cache_size");
fclose(fp);
return -1;
@@ -196,30 +217,29 @@ int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size)
#define CORE_SIBLINGS_PATH "/sys/bus/cpu/devices/cpu"
/*
- * get_cbm_mask - Get cbm mask for given cache
- * @cache_type: Cache level L2/L3
- * @cbm_mask: cbm_mask returned as a string
+ * get_bit_mask - Get bit mask from given file
+ * @filename: File containing the mask
+ * @mask: The bit mask returned as unsigned long
*
* Return: = 0 on success, < 0 on failure.
*/
-int get_cbm_mask(char *cache_type, char *cbm_mask)
+static int get_bit_mask(const char *filename, unsigned long *mask)
{
- char cbm_mask_path[1024];
FILE *fp;
- if (!cbm_mask)
+ if (!filename || !mask)
return -1;
- sprintf(cbm_mask_path, "%s/%s/cbm_mask", INFO_PATH, cache_type);
-
- fp = fopen(cbm_mask_path, "r");
+ fp = fopen(filename, "r");
if (!fp) {
- perror("Failed to open cache level");
-
+ ksft_print_msg("Failed to open bit mask file '%s': %s\n",
+ filename, strerror(errno));
return -1;
}
- if (fscanf(fp, "%s", cbm_mask) <= 0) {
- perror("Could not get max cbm_mask");
+
+ if (fscanf(fp, "%lx", mask) <= 0) {
+ ksft_print_msg("Could not read bit mask file '%s': %s\n",
+ filename, strerror(errno));
fclose(fp);
return -1;
@@ -230,64 +250,200 @@ int get_cbm_mask(char *cache_type, char *cbm_mask)
}
/*
- * get_core_sibling - Get sibling core id from the same socket for given CPU
- * @cpu_no: CPU number
+ * resource_info_unsigned_get - Read an unsigned value from
+ * /sys/fs/resctrl/info/@resource/@filename
+ * @resource: Resource name that matches directory name in
+ * /sys/fs/resctrl/info
+ * @filename: File in /sys/fs/resctrl/info/@resource
+ * @val: Contains read value on success.
*
- * Return: > 0 on success, < 0 on failure.
+ * Return: = 0 on success, < 0 on failure. On success the read
+ * value is saved into @val.
*/
-int get_core_sibling(int cpu_no)
+int resource_info_unsigned_get(const char *resource, const char *filename,
+ unsigned int *val)
{
- char core_siblings_path[1024], cpu_list_str[64];
- int sibling_cpu_no = -1;
+ char file_path[PATH_MAX];
FILE *fp;
- sprintf(core_siblings_path, "%s%d/topology/core_siblings_list",
- CORE_SIBLINGS_PATH, cpu_no);
+ snprintf(file_path, sizeof(file_path), "%s/%s/%s", INFO_PATH, resource,
+ filename);
- fp = fopen(core_siblings_path, "r");
+ fp = fopen(file_path, "r");
if (!fp) {
- perror("Failed to open core siblings path");
-
+ ksft_print_msg("Error opening %s: %m\n", file_path);
return -1;
}
- if (fscanf(fp, "%s", cpu_list_str) <= 0) {
- perror("Could not get core_siblings list");
- fclose(fp);
+ if (fscanf(fp, "%u", val) <= 0) {
+ ksft_print_msg("Could not get contents of %s: %m\n", file_path);
+ fclose(fp);
return -1;
}
+
fclose(fp);
+ return 0;
+}
- char *token = strtok(cpu_list_str, "-,");
+/*
+ * create_bit_mask- Create bit mask from start, len pair
+ * @start: LSB of the mask
+ * @len Number of bits in the mask
+ */
+unsigned long create_bit_mask(unsigned int start, unsigned int len)
+{
+ return ((1UL << len) - 1UL) << start;
+}
- while (token) {
- sibling_cpu_no = atoi(token);
- /* Skipping core 0 as we don't want to run test on core 0 */
- if (sibling_cpu_no != 0 && sibling_cpu_no != cpu_no)
- break;
- token = strtok(NULL, "-,");
+/*
+ * count_contiguous_bits - Returns the longest train of bits in a bit mask
+ * @val A bit mask
+ * @start The location of the least-significant bit of the longest train
+ *
+ * Return: The length of the contiguous bits in the longest train of bits
+ */
+unsigned int count_contiguous_bits(unsigned long val, unsigned int *start)
+{
+ unsigned long last_val;
+ unsigned int count = 0;
+
+ while (val) {
+ last_val = val;
+ val &= (val >> 1);
+ count++;
+ }
+
+ if (start) {
+ if (count)
+ *start = ffsl(last_val) - 1;
+ else
+ *start = 0;
}
- return sibling_cpu_no;
+ return count;
+}
+
+/*
+ * get_full_cbm - Get full Cache Bit Mask (CBM)
+ * @cache_type: Cache type as "L2" or "L3"
+ * @mask: Full cache bit mask representing the maximal portion of cache
+ * available for allocation, returned as unsigned long.
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+int get_full_cbm(const char *cache_type, unsigned long *mask)
+{
+ char cbm_path[PATH_MAX];
+ int ret;
+
+ if (!cache_type)
+ return -1;
+
+ snprintf(cbm_path, sizeof(cbm_path), "%s/%s/cbm_mask",
+ INFO_PATH, cache_type);
+
+ ret = get_bit_mask(cbm_path, mask);
+ if (ret || !*mask)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * get_shareable_mask - Get shareable mask from shareable_bits
+ * @cache_type: Cache type as "L2" or "L3"
+ * @shareable_mask: Shareable mask returned as unsigned long
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+static int get_shareable_mask(const char *cache_type, unsigned long *shareable_mask)
+{
+ char mask_path[PATH_MAX];
+
+ if (!cache_type)
+ return -1;
+
+ snprintf(mask_path, sizeof(mask_path), "%s/%s/shareable_bits",
+ INFO_PATH, cache_type);
+
+ return get_bit_mask(mask_path, shareable_mask);
+}
+
+/*
+ * get_mask_no_shareable - Get Cache Bit Mask (CBM) without shareable bits
+ * @cache_type: Cache type as "L2" or "L3"
+ * @mask: The largest exclusive portion of the cache out of the
+ * full CBM, returned as unsigned long
+ *
+ * Parts of a cache may be shared with other devices such as GPU. This function
+ * calculates the largest exclusive portion of the cache where no other devices
+ * besides CPU have access to the cache portion.
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+int get_mask_no_shareable(const char *cache_type, unsigned long *mask)
+{
+ unsigned long full_mask, shareable_mask;
+ unsigned int start, len;
+
+ if (get_full_cbm(cache_type, &full_mask) < 0)
+ return -1;
+ if (get_shareable_mask(cache_type, &shareable_mask) < 0)
+ return -1;
+
+ len = count_contiguous_bits(full_mask & ~shareable_mask, &start);
+ if (!len)
+ return -1;
+
+ *mask = create_bit_mask(start, len);
+
+ return 0;
}
/*
* taskset_benchmark - Taskset PID (i.e. benchmark) to a specified cpu
- * @bm_pid: PID that should be binded
- * @cpu_no: CPU number at which the PID would be binded
+ * @bm_pid: PID that should be binded
+ * @cpu_no: CPU number at which the PID would be binded
+ * @old_affinity: When not NULL, set to old CPU affinity
*
- * Return: 0 on success, non-zero on failure
+ * Return: 0 on success, < 0 on error.
*/
-int taskset_benchmark(pid_t bm_pid, int cpu_no)
+int taskset_benchmark(pid_t bm_pid, int cpu_no, cpu_set_t *old_affinity)
{
cpu_set_t my_set;
+ if (old_affinity) {
+ CPU_ZERO(old_affinity);
+ if (sched_getaffinity(bm_pid, sizeof(*old_affinity),
+ old_affinity)) {
+ ksft_perror("Unable to read CPU affinity");
+ return -1;
+ }
+ }
+
CPU_ZERO(&my_set);
CPU_SET(cpu_no, &my_set);
if (sched_setaffinity(bm_pid, sizeof(cpu_set_t), &my_set)) {
- perror("Unable to taskset benchmark");
+ ksft_perror("Unable to taskset benchmark");
+
+ return -1;
+ }
+
+ return 0;
+}
+/*
+ * taskset_restore - Taskset PID to the earlier CPU affinity
+ * @bm_pid: PID that should be reset
+ * @old_affinity: The old CPU affinity to restore
+ *
+ * Return: 0 on success, < 0 on error.
+ */
+int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity)
+{
+ if (sched_setaffinity(bm_pid, sizeof(*old_affinity), old_affinity)) {
+ ksft_perror("Unable to restore CPU affinity");
return -1;
}
@@ -300,7 +456,7 @@ int taskset_benchmark(pid_t bm_pid, int cpu_no)
* @grp: Full path and name of the group
* @parent_grp: Full path and name of the parent group
*
- * Return: 0 on success, non-zero on failure
+ * Return: 0 on success, < 0 on error.
*/
static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
{
@@ -325,7 +481,7 @@ static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
}
closedir(dp);
} else {
- perror("Unable to open resctrl for group");
+ ksft_perror("Unable to open resctrl for group");
return -1;
}
@@ -333,7 +489,7 @@ static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
/* Requested grp doesn't exist, hence create it */
if (found_grp == 0) {
if (mkdir(grp, 0) == -1) {
- perror("Unable to create group");
+ ksft_perror("Unable to create group");
return -1;
}
@@ -348,12 +504,12 @@ static int write_pid_to_tasks(char *tasks, pid_t pid)
fp = fopen(tasks, "w");
if (!fp) {
- perror("Failed to open tasks file");
+ ksft_perror("Failed to open tasks file");
return -1;
}
if (fprintf(fp, "%d\n", pid) < 0) {
- perror("Failed to wr pid to tasks file");
+ ksft_print_msg("Failed to write pid to tasks file\n");
fclose(fp);
return -1;
@@ -376,7 +532,7 @@ static int write_pid_to_tasks(char *tasks, pid_t pid)
* pid is not written, this means that pid is in con_mon grp and hence
* should consult con_mon grp's mon_data directory for results.
*
- * Return: 0 on success, non-zero on failure
+ * Return: 0 on success, < 0 on error.
*/
int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
char *resctrl_val)
@@ -420,7 +576,7 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
out:
ksft_print_msg("Writing benchmark parameters to resctrl FS\n");
if (ret)
- perror("# writing to resctrlfs");
+ ksft_print_msg("Failed writing to resctrlfs\n");
return ret;
}
@@ -430,23 +586,17 @@ out:
* @ctrlgrp: Name of the con_mon grp
* @schemata: Schemata that should be updated to
* @cpu_no: CPU number that the benchmark PID is binded to
- * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
+ * @resource: Resctrl resource (Eg: MB, L3, L2, etc.)
*
- * Update schemata of a con_mon grp *only* if requested resctrl feature is
+ * Update schemata of a con_mon grp *only* if requested resctrl resource is
* allocation type
*
- * Return: 0 on success, non-zero on failure
+ * Return: 0 on success, < 0 on error.
*/
-int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
+int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource)
{
char controlgroup[1024], reason[128], schema[1024] = {};
- int resource_id, fd, schema_len = -1, ret = 0;
-
- if (strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) &&
- strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) &&
- strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) &&
- strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
- return -ENOENT;
+ int domain_id, fd, schema_len, ret = 0;
if (!schemata) {
ksft_print_msg("Skipping empty schemata update\n");
@@ -454,8 +604,8 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
return -1;
}
- if (get_resource_id(cpu_no, &resource_id) < 0) {
- sprintf(reason, "Failed to get resource id");
+ if (get_domain_id(resource, cpu_no, &domain_id) < 0) {
+ sprintf(reason, "Failed to get domain ID");
ret = -1;
goto out;
@@ -466,14 +616,8 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
else
sprintf(controlgroup, "%s/schemata", RESCTRL_PATH);
- if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) ||
- !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
- schema_len = snprintf(schema, sizeof(schema), "%s%d%c%s\n",
- "L3:", resource_id, '=', schemata);
- if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
- !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
- schema_len = snprintf(schema, sizeof(schema), "%s%d%c%s\n",
- "MB:", resource_id, '=', schemata);
+ schema_len = snprintf(schema, sizeof(schema), "%s:%d=%s\n",
+ resource, domain_id, schemata);
if (schema_len < 0 || schema_len >= sizeof(schema)) {
snprintf(reason, sizeof(reason),
"snprintf() failed with return value : %d", schema_len);
@@ -564,20 +708,16 @@ char *fgrep(FILE *inf, const char *str)
}
/*
- * validate_resctrl_feature_request - Check if requested feature is valid.
- * @resource: Required resource (e.g., MB, L3, L2, L3_MON, etc.)
- * @feature: Required monitor feature (in mon_features file). Can only be
- * set for L3_MON. Must be NULL for all other resources.
+ * resctrl_resource_exists - Check if a resource is supported.
+ * @resource: Resctrl resource (e.g., MB, L3, L2, L3_MON, etc.)
*
- * Return: True if the resource/feature is supported, else false. False is
+ * Return: True if the resource is supported, else false. False is
* also returned if resctrl FS is not mounted.
*/
-bool validate_resctrl_feature_request(const char *resource, const char *feature)
+bool resctrl_resource_exists(const char *resource)
{
char res_path[PATH_MAX];
struct stat statbuf;
- char *res;
- FILE *inf;
int ret;
if (!resource)
@@ -592,8 +732,25 @@ bool validate_resctrl_feature_request(const char *resource, const char *feature)
if (stat(res_path, &statbuf))
return false;
- if (!feature)
- return true;
+ return true;
+}
+
+/*
+ * resctrl_mon_feature_exists - Check if requested monitoring feature is valid.
+ * @resource: Resource that uses the mon_features file. Currently only L3_MON
+ * is valid.
+ * @feature: Required monitor feature (in mon_features file).
+ *
+ * Return: True if the feature is supported, else false.
+ */
+bool resctrl_mon_feature_exists(const char *resource, const char *feature)
+{
+ char res_path[PATH_MAX];
+ char *res;
+ FILE *inf;
+
+ if (!feature || !resource)
+ return false;
snprintf(res_path, sizeof(res_path), "%s/%s/mon_features", INFO_PATH, resource);
inf = fopen(res_path, "r");
@@ -607,6 +764,36 @@ bool validate_resctrl_feature_request(const char *resource, const char *feature)
return !!res;
}
+/*
+ * resource_info_file_exists - Check if a file is present inside
+ * /sys/fs/resctrl/info/@resource.
+ * @resource: Required resource (Eg: MB, L3, L2, etc.)
+ * @file: Required file.
+ *
+ * Return: True if the /sys/fs/resctrl/info/@resource/@file exists, else false.
+ */
+bool resource_info_file_exists(const char *resource, const char *file)
+{
+ char res_path[PATH_MAX];
+ struct stat statbuf;
+
+ if (!file || !resource)
+ return false;
+
+ snprintf(res_path, sizeof(res_path), "%s/%s/%s", INFO_PATH, resource,
+ file);
+
+ if (stat(res_path, &statbuf))
+ return false;
+
+ return true;
+}
+
+bool test_resource_feature_check(const struct resctrl_test *test)
+{
+ return resctrl_resource_exists(test->resource);
+}
+
int filter_dmesg(void)
{
char line[1024];
@@ -617,7 +804,7 @@ int filter_dmesg(void)
ret = pipe(pipefds);
if (ret) {
- perror("pipe");
+ ksft_perror("pipe");
return ret;
}
fflush(stdout);
@@ -626,13 +813,13 @@ int filter_dmesg(void)
close(pipefds[0]);
dup2(pipefds[1], STDOUT_FILENO);
execlp("dmesg", "dmesg", NULL);
- perror("executing dmesg");
+ ksft_perror("Executing dmesg");
exit(1);
}
close(pipefds[1]);
fp = fdopen(pipefds[0], "r");
if (!fp) {
- perror("fdopen(pipe)");
+ ksft_perror("fdopen(pipe)");
kill(pid, SIGTERM);
return -1;
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
index 887542961968..2348d2c20d0a 100644
--- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -24,6 +24,11 @@ bool rseq_validate_cpu_id(void)
{
return rseq_mm_cid_available();
}
+static
+bool rseq_use_cpu_index(void)
+{
+ return false; /* Use mm_cid */
+}
#else
# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
static
@@ -36,6 +41,11 @@ bool rseq_validate_cpu_id(void)
{
return rseq_current_cpu_raw() >= 0;
}
+static
+bool rseq_use_cpu_index(void)
+{
+ return true; /* Use cpu_id as index. */
+}
#endif
struct percpu_lock_entry {
@@ -274,7 +284,7 @@ void test_percpu_list(void)
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
for (j = 1; j <= 100; j++) {
struct percpu_list_node *node;
@@ -299,7 +309,7 @@ void test_percpu_list(void)
for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_list_node *node;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
while ((node = __percpu_list_pop(&list, i))) {
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 20403d58345c..2f37961240ca 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -288,6 +288,11 @@ bool rseq_validate_cpu_id(void)
{
return rseq_mm_cid_available();
}
+static
+bool rseq_use_cpu_index(void)
+{
+ return false; /* Use mm_cid */
+}
# ifdef TEST_MEMBARRIER
/*
* Membarrier does not currently support targeting a mm_cid, so
@@ -312,6 +317,11 @@ bool rseq_validate_cpu_id(void)
{
return rseq_current_cpu_raw() >= 0;
}
+static
+bool rseq_use_cpu_index(void)
+{
+ return true; /* Use cpu_id as index. */
+}
# ifdef TEST_MEMBARRIER
static
int rseq_membarrier_expedited(int cpu)
@@ -715,7 +725,7 @@ void test_percpu_list(void)
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
for (j = 1; j <= 100; j++) {
struct percpu_list_node *node;
@@ -752,7 +762,7 @@ void test_percpu_list(void)
for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_list_node *node;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
while ((node = __percpu_list_pop(&list, i))) {
@@ -902,7 +912,7 @@ void test_percpu_buffer(void)
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
/* Worse-case is every item in same CPU. */
buffer.c[i].array =
@@ -952,7 +962,7 @@ void test_percpu_buffer(void)
for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_buffer_node *node;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
while ((node = __percpu_buffer_pop(&buffer, i))) {
@@ -1113,7 +1123,7 @@ void test_percpu_memcpy_buffer(void)
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
/* Worse-case is every item in same CPU. */
buffer.c[i].array =
@@ -1160,7 +1170,7 @@ void test_percpu_memcpy_buffer(void)
for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_memcpy_buffer_node item;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
diff --git a/tools/testing/selftests/rust/Makefile b/tools/testing/selftests/rust/Makefile
new file mode 100644
index 000000000000..fce1584d3bc0
--- /dev/null
+++ b/tools/testing/selftests/rust/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_PROGS += test_probe_samples.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rust/config b/tools/testing/selftests/rust/config
new file mode 100644
index 000000000000..b4002acd40bc
--- /dev/null
+++ b/tools/testing/selftests/rust/config
@@ -0,0 +1,5 @@
+CONFIG_RUST=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLES_RUST=y
+CONFIG_SAMPLE_RUST_MINIMAL=m
+CONFIG_SAMPLE_RUST_PRINT=m \ No newline at end of file
diff --git a/tools/testing/selftests/rust/test_probe_samples.sh b/tools/testing/selftests/rust/test_probe_samples.sh
new file mode 100755
index 000000000000..ad0397e4986f
--- /dev/null
+++ b/tools/testing/selftests/rust/test_probe_samples.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# This script tests whether the rust sample modules can
+# be added and removed correctly.
+#
+DIR="$(dirname "$(readlink -f "$0")")"
+
+KTAP_HELPERS="${DIR}/../kselftest/ktap_helpers.sh"
+if [ -e "$KTAP_HELPERS" ]; then
+ source "$KTAP_HELPERS"
+else
+ echo "$KTAP_HELPERS file not found [SKIP]"
+ exit 4
+fi
+
+rust_sample_modules=("rust_minimal" "rust_print")
+
+ktap_print_header
+
+for sample in "${rust_sample_modules[@]}"; do
+ if ! /sbin/modprobe -n -q "$sample"; then
+ ktap_skip_all "module $sample is not found in /lib/modules/$(uname -r)"
+ exit "$KSFT_SKIP"
+ fi
+done
+
+ktap_set_plan "${#rust_sample_modules[@]}"
+
+for sample in "${rust_sample_modules[@]}"; do
+ if /sbin/modprobe -q "$sample"; then
+ /sbin/modprobe -q -r "$sample"
+ ktap_test_pass "$sample"
+ else
+ ktap_test_fail "$sample"
+ fi
+done
+
+ktap_finished
diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
index 7ba057154343..62fba7356af2 100644
--- a/tools/testing/selftests/sched/cs_prctl_test.c
+++ b/tools/testing/selftests/sched/cs_prctl_test.c
@@ -276,7 +276,7 @@ int main(int argc, char *argv[])
if (setpgid(0, 0) != 0)
handle_error("process group");
- printf("\n## Create a thread/process/process group hiearchy\n");
+ printf("\n## Create a thread/process/process group hierarchy\n");
create_processes(num_processes, num_threads, procs);
need_cleanup = 1;
disp_processes(num_processes, procs);
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index 5b5c9d558dee..b83099160fbc 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -4,7 +4,9 @@
*/
#define _GNU_SOURCE
#include <assert.h>
+#include <err.h>
#include <limits.h>
+#include <sched.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
@@ -38,10 +40,10 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples)
i *= 1000000000ULL;
i += finish.tv_nsec - start.tv_nsec;
- printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n",
- finish.tv_sec, finish.tv_nsec,
- start.tv_sec, start.tv_nsec,
- i, (double)i / 1000000000.0);
+ ksft_print_msg("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n",
+ finish.tv_sec, finish.tv_nsec,
+ start.tv_sec, start.tv_nsec,
+ i, (double)i / 1000000000.0);
return i;
}
@@ -53,7 +55,7 @@ unsigned long long calibrate(void)
pid_t pid, ret;
int seconds = 15;
- printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds);
+ ksft_print_msg("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds);
samples = 0;
pid = getpid();
@@ -76,8 +78,12 @@ unsigned long long calibrate(void)
bool approx(int i_one, int i_two)
{
- double one = i_one, one_bump = one * 0.01;
- double two = i_two, two_bump = two * 0.01;
+ /*
+ * This continues to be a noisy test. Instead of a 1% comparison
+ * go with 10%.
+ */
+ double one = i_one, one_bump = one * 0.1;
+ double two = i_two, two_bump = two * 0.1;
one_bump = one + MAX(one_bump, 2.0);
two_bump = two + MAX(two_bump, 2.0);
@@ -98,27 +104,65 @@ bool le(int i_one, int i_two)
}
long compare(const char *name_one, const char *name_eval, const char *name_two,
- unsigned long long one, bool (*eval)(int, int), unsigned long long two)
+ unsigned long long one, bool (*eval)(int, int), unsigned long long two,
+ bool skip)
{
bool good;
- printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two,
- (long long)one, name_eval, (long long)two);
+ if (skip) {
+ ksft_test_result_skip("%s %s %s\n", name_one, name_eval,
+ name_two);
+ return 0;
+ }
+
+ ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two,
+ (long long)one, name_eval, (long long)two);
if (one > INT_MAX) {
- printf("Miscalculation! Measurement went negative: %lld\n", (long long)one);
- return 1;
+ ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one);
+ good = false;
+ goto out;
}
if (two > INT_MAX) {
- printf("Miscalculation! Measurement went negative: %lld\n", (long long)two);
- return 1;
+ ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two);
+ good = false;
+ goto out;
}
good = eval(one, two);
printf("%s\n", good ? "✔ï¸" : "âŒ");
+out:
+ ksft_test_result(good, "%s %s %s\n", name_one, name_eval, name_two);
+
return good ? 0 : 1;
}
+/* Pin to a single CPU so the benchmark won't bounce around the system. */
+void affinity(void)
+{
+ long cpu;
+ ulong ncores = sysconf(_SC_NPROCESSORS_CONF);
+ cpu_set_t *setp = CPU_ALLOC(ncores);
+ ulong setsz = CPU_ALLOC_SIZE(ncores);
+
+ /*
+ * Totally unscientific way to avoid CPUs that might be busier:
+ * choose the highest CPU instead of the lowest.
+ */
+ for (cpu = ncores - 1; cpu >= 0; cpu--) {
+ CPU_ZERO_S(setsz, setp);
+ CPU_SET_S(cpu, setsz, setp);
+ if (sched_setaffinity(getpid(), setsz, setp) == -1)
+ continue;
+ printf("Pinned to CPU %lu of %lu\n", cpu + 1, ncores);
+ goto out;
+ }
+ fprintf(stderr, "Could not set CPU affinity -- calibration may not work well");
+
+out:
+ CPU_FREE(setp);
+}
+
int main(int argc, char *argv[])
{
struct sock_filter bitmap_filter[] = {
@@ -142,27 +186,36 @@ int main(int argc, char *argv[])
unsigned long long samples, calc;
unsigned long long native, filter1, filter2, bitmap1, bitmap2;
unsigned long long entry, per_filter1, per_filter2;
+ bool skip = false;
setbuf(stdout, NULL);
- printf("Running on:\n");
+ ksft_print_header();
+ ksft_set_plan(7);
+
+ ksft_print_msg("Running on:\n");
+ ksft_print_msg("");
system("uname -a");
- printf("Current BPF sysctl settings:\n");
+ ksft_print_msg("Current BPF sysctl settings:\n");
/* Avoid using "sysctl" which may not be installed. */
+ ksft_print_msg("");
system("grep -H . /proc/sys/net/core/bpf_jit_enable");
+ ksft_print_msg("");
system("grep -H . /proc/sys/net/core/bpf_jit_harden");
+ affinity();
+
if (argc > 1)
samples = strtoull(argv[1], NULL, 0);
else
samples = calibrate();
- printf("Benchmarking %llu syscalls...\n", samples);
+ ksft_print_msg("Benchmarking %llu syscalls...\n", samples);
/* Native call */
native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
- printf("getpid native: %llu ns\n", native);
+ ksft_print_msg("getpid native: %llu ns\n", native);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
assert(ret == 0);
@@ -172,35 +225,37 @@ int main(int argc, char *argv[])
assert(ret == 0);
bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
- printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1);
+ ksft_print_msg("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1);
/* Second filter resulting in a bitmap */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
assert(ret == 0);
bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
- printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2);
+ ksft_print_msg("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2);
/* Third filter, can no longer be converted to bitmap */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
assert(ret == 0);
filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
- printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1);
+ ksft_print_msg("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1);
/* Fourth filter, can not be converted to bitmap because of filter 3 */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
assert(ret == 0);
filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
- printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2);
+ ksft_print_msg("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2);
/* Estimations */
#define ESTIMATE(fmt, var, what) do { \
var = (what); \
- printf("Estimated " fmt ": %llu ns\n", var); \
- if (var > INT_MAX) \
- goto more_samples; \
+ ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \
+ if (var > INT_MAX) { \
+ skip = true; \
+ ret |= 1; \
+ } \
} while (0)
ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc,
@@ -218,31 +273,34 @@ int main(int argc, char *argv[])
ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2,
(filter2 - native - entry) / 4);
- printf("Expectations:\n");
- ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1);
- bits = compare("native", "≤", "1 filter", native, le, filter1);
+ ksft_print_msg("Expectations:\n");
+ ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1,
+ skip);
+ bits = compare("native", "≤", "1 filter", native, le, filter1,
+ skip);
if (bits)
- goto more_samples;
+ skip = true;
ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)",
- per_filter1, approx, per_filter2);
+ per_filter1, approx, per_filter2, skip);
bits = compare("1 bitmapped", "≈", "2 bitmapped",
- bitmap1 - native, approx, bitmap2 - native);
+ bitmap1 - native, approx, bitmap2 - native, skip);
if (bits) {
- printf("Skipping constant action bitmap expectations: they appear unsupported.\n");
- goto out;
+ ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n");
+ skip = true;
}
- ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native);
- ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native);
+ ret |= compare("entry", "≈", "1 bitmapped", entry, approx,
+ bitmap1 - native, skip);
+ ret |= compare("entry", "≈", "2 bitmapped", entry, approx,
+ bitmap2 - native, skip);
ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total",
- entry + (per_filter1 * 4) + native, approx, filter2);
- if (ret == 0)
- goto out;
+ entry + (per_filter1 * 4) + native, approx, filter2,
+ skip);
-more_samples:
- printf("Saw unexpected benchmark result. Try running again with more samples?\n");
-out:
- return 0;
+ if (ret)
+ ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n");
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 38f651469968..783ebce8c4de 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -784,7 +784,7 @@ void *kill_thread(void *data)
bool die = (bool)data;
if (die) {
- prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ syscall(__NR_getpid);
return (void *)SIBLING_EXIT_FAILURE;
}
@@ -803,11 +803,11 @@ void kill_thread_or_group(struct __test_metadata *_metadata,
{
pthread_t thread;
void *status;
- /* Kill only when calling __NR_prctl. */
+ /* Kill only when calling __NR_getpid. */
struct sock_filter filter_thread[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
@@ -819,7 +819,7 @@ void kill_thread_or_group(struct __test_metadata *_metadata,
struct sock_filter filter_process[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
BPF_STMT(BPF_RET|BPF_K, kill),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
@@ -1576,7 +1576,7 @@ void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
ASSERT_EQ(0, ret);
}
/* Directly report the status of our test harness results. */
- syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ syscall(__NR_exit, _metadata->exit_code);
}
/* Common tracer setup/teardown functions. */
@@ -1623,7 +1623,7 @@ void teardown_trace_fixture(struct __test_metadata *_metadata,
ASSERT_EQ(0, kill(tracer, SIGUSR1));
ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
if (WEXITSTATUS(status))
- _metadata->passed = 0;
+ _metadata->exit_code = KSFT_FAIL;
}
}
@@ -3088,8 +3088,7 @@ TEST(syscall_restart)
}
/* Directly report the status of our test harness results. */
- syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
- : EXIT_FAILURE);
+ syscall(__NR_exit, _metadata->exit_code);
}
EXPECT_EQ(0, close(pipefd[0]));
@@ -3174,7 +3173,7 @@ TEST(syscall_restart)
ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
if (WIFSIGNALED(status) || WEXITSTATUS(status))
- _metadata->passed = 0;
+ _metadata->exit_code = KSFT_FAIL;
}
TEST_SIGNAL(filter_flag_log, SIGSYS)
@@ -3709,7 +3708,12 @@ TEST(user_notification_sibling_pid_ns)
ASSERT_GE(pid, 0);
if (pid == 0) {
- ASSERT_EQ(unshare(CLONE_NEWPID), 0);
+ ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
+ if (errno == EPERM)
+ SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
+ else if (errno == EINVAL)
+ SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
+ }
pid2 = fork();
ASSERT_GE(pid2, 0);
@@ -3727,6 +3731,8 @@ TEST(user_notification_sibling_pid_ns)
ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
if (errno == EPERM)
SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
+ else if (errno == EINVAL)
+ SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
}
ASSERT_EQ(errno, 0);
@@ -4037,6 +4043,16 @@ TEST(user_notification_filter_empty_threaded)
EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
}
+
+int get_next_fd(int prev_fd)
+{
+ for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) {
+ if (fcntl(i, F_GETFD) == -1)
+ return i;
+ }
+ _exit(EXIT_FAILURE);
+}
+
TEST(user_notification_addfd)
{
pid_t pid;
@@ -4053,7 +4069,7 @@ TEST(user_notification_addfd)
/* There may be arbitrary already-open fds at test start. */
memfd = memfd_create("test", 0);
ASSERT_GE(memfd, 0);
- nextfd = memfd + 1;
+ nextfd = get_next_fd(memfd);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
@@ -4064,7 +4080,8 @@ TEST(user_notification_addfd)
/* Check that the basic notification machinery works */
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
- ASSERT_EQ(listener, nextfd++);
+ ASSERT_EQ(listener, nextfd);
+ nextfd = get_next_fd(nextfd);
pid = fork();
ASSERT_GE(pid, 0);
@@ -4119,14 +4136,16 @@ TEST(user_notification_addfd)
/* Verify we can set an arbitrary remote fd */
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
- EXPECT_EQ(fd, nextfd++);
+ EXPECT_EQ(fd, nextfd);
+ nextfd = get_next_fd(nextfd);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* Verify we can set an arbitrary remote fd with large size */
memset(&big, 0x0, sizeof(big));
big.addfd = addfd;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
- EXPECT_EQ(fd, nextfd++);
+ EXPECT_EQ(fd, nextfd);
+ nextfd = get_next_fd(nextfd);
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
@@ -4164,7 +4183,8 @@ TEST(user_notification_addfd)
* Child has earlier "low" fds and now 42, so we expect the next
* lowest available fd to be assigned here.
*/
- EXPECT_EQ(fd, nextfd++);
+ EXPECT_EQ(fd, nextfd);
+ nextfd = get_next_fd(nextfd);
ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/*
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index c60acba951c2..db176fe7d0c3 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -8,6 +8,7 @@ CONFIG_VETH=y
#
# Core Netfilter Configuration
#
+CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_MARK=y
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index b53d12909962..b73bd255ea36 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -649,5 +649,408 @@
"teardown": [
"$TC actions flush action mirred"
]
+ },
+ {
+ "id": "456d",
+ "name": "Add mirred mirror to egress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 egress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "mirror",
+ "direction": "egress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "pipe"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 egress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "2358",
+ "name": "Add mirred mirror to ingress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "mirror",
+ "direction": "ingress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "pipe"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "fdb1",
+ "name": "Add mirred redirect to egress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress redirect index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "egress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "stolen"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "20cc",
+ "name": "Add mirred redirect to ingress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "ingress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "stolen"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "e739",
+ "name": "Try to add mirred action with both dev and block",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1 blockid 21 dev $DEV1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC -j actions list action mirred",
+ "matchJSON": [],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "2f47",
+ "name": "Try to add mirred action without specifying neither dev nor block",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC -j actions list action mirred",
+ "matchJSON": [],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "3188",
+ "name": "Replace mirred redirect to dev action with redirect to block",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ],
+ [
+ "$TC actions add action mirred ingress redirect index 1 dev $DEV1",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions replace action mirred egress redirect index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "egress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "stolen"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "83cc",
+ "name": "Replace mirred redirect to block action with mirror to dev",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ],
+ [
+ "$TC actions add action mirred egress redirect index 1 blockid 21",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions replace action mirred ingress mirror index 1 dev lo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "mirror",
+ "direction": "ingress",
+ "to_dev": "lo",
+ "control_action": {
+ "type": "pipe"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
index be293e7c6d18..3a537b2ec4c9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
@@ -77,7 +77,7 @@
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq quantum 9000",
"expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $DUMMY",
- "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p buckets.*orphan_mask 1023 quantum 9000b",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*quantum 9000b",
"matchCount": "1",
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index 2d603ef2e375..12da0a939e3e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -167,6 +167,7 @@
"plugins": {
"requires": "nsPlugin"
},
+ "dependsOn": "echo '' | jq",
"setup": [
"echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
"$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI",
@@ -192,6 +193,7 @@
"plugins": {
"requires": "nsPlugin"
},
+ "dependsOn": "echo '' | jq",
"setup": [
"echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
"$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2",
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index caeacc691587..ee349187636f 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -541,7 +541,7 @@ def test_runner(pm, args, filtered_tests):
message = pmtf.message
output = pmtf.output
res = TestResult(tidx['id'], tidx['name'])
- res.set_result(ResultState.skip)
+ res.set_result(ResultState.fail)
res.set_errormsg(pmtf.message)
res.set_failmsg(pmtf.output)
tsr.add_resultdata(res)
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index c53ede8b730d..cddff1772e10 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -63,5 +63,4 @@ try_modprobe sch_hfsc
try_modprobe sch_hhf
try_modprobe sch_htb
try_modprobe sch_teql
-./tdc.py -J`nproc` -c actions
-./tdc.py -J`nproc` -c qdisc
+./tdc.py -J`nproc`
diff --git a/tools/testing/selftests/thermal/intel/power_floor/.gitignore b/tools/testing/selftests/thermal/intel/power_floor/.gitignore
new file mode 100644
index 000000000000..1b9a76406f18
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/power_floor/.gitignore
@@ -0,0 +1 @@
+power_floor_test
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/.gitignore b/tools/testing/selftests/thermal/intel/workload_hint/.gitignore
new file mode 100644
index 000000000000..d697b034a3a8
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/workload_hint/.gitignore
@@ -0,0 +1 @@
+workload_hint_test
diff --git a/tools/testing/selftests/uevent/.gitignore b/tools/testing/selftests/uevent/.gitignore
new file mode 100644
index 000000000000..382afb74cd40
--- /dev/null
+++ b/tools/testing/selftests/uevent/.gitignore
@@ -0,0 +1 @@
+uevent_filtering
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index ae2b33c21c45..554b290fefdc 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -33,8 +33,7 @@ void init_signals(void)
signal(SIGPIPE, SIG_IGN);
}
-/* Parse a CID in string representation */
-unsigned int parse_cid(const char *str)
+static unsigned int parse_uint(const char *str, const char *err_str)
{
char *endptr = NULL;
unsigned long n;
@@ -42,12 +41,24 @@ unsigned int parse_cid(const char *str)
errno = 0;
n = strtoul(str, &endptr, 10);
if (errno || *endptr != '\0') {
- fprintf(stderr, "malformed CID \"%s\"\n", str);
+ fprintf(stderr, "malformed %s \"%s\"\n", err_str, str);
exit(EXIT_FAILURE);
}
return n;
}
+/* Parse a CID in string representation */
+unsigned int parse_cid(const char *str)
+{
+ return parse_uint(str, "CID");
+}
+
+/* Parse a port in string representation */
+unsigned int parse_port(const char *str)
+{
+ return parse_uint(str, "port");
+}
+
/* Wait for the remote to close the connection */
void vsock_wait_remote_close(int fd)
{
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index 03c88d0cb861..e95e62485959 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -12,10 +12,13 @@ enum test_mode {
TEST_MODE_SERVER
};
+#define DEFAULT_PEER_PORT 1234
+
/* Test runner options */
struct test_opts {
enum test_mode mode;
unsigned int peer_cid;
+ unsigned int peer_port;
};
/* A test case definition. Test functions must print failures to stderr and
@@ -35,6 +38,7 @@ struct test_case {
void init_signals(void);
unsigned int parse_cid(const char *str);
+unsigned int parse_port(const char *str);
int vsock_stream_connect(unsigned int cid, unsigned int port);
int vsock_bind_connect(unsigned int cid, unsigned int port,
unsigned int bind_port, int type);
diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c
index fa927ad16f8a..081e045f4696 100644
--- a/tools/testing/vsock/vsock_diag_test.c
+++ b/tools/testing/vsock/vsock_diag_test.c
@@ -39,6 +39,8 @@ static const char *sock_type_str(int type)
return "DGRAM";
case SOCK_STREAM:
return "STREAM";
+ case SOCK_SEQPACKET:
+ return "SEQPACKET";
default:
return "INVALID TYPE";
}
@@ -342,7 +344,7 @@ static void test_listen_socket_server(const struct test_opts *opts)
} addr = {
.svm = {
.svm_family = AF_VSOCK,
- .svm_port = 1234,
+ .svm_port = opts->peer_port,
.svm_cid = VMADDR_CID_ANY,
},
};
@@ -378,7 +380,7 @@ static void test_connect_client(const struct test_opts *opts)
LIST_HEAD(sockets);
struct vsock_stat *st;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -403,7 +405,7 @@ static void test_connect_server(const struct test_opts *opts)
LIST_HEAD(sockets);
int client_fd;
- client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ client_fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (client_fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -462,6 +464,11 @@ static const struct option longopts[] = {
.val = 'p',
},
{
+ .name = "peer-port",
+ .has_arg = required_argument,
+ .val = 'q',
+ },
+ {
.name = "list",
.has_arg = no_argument,
.val = 'l',
@@ -481,7 +488,7 @@ static const struct option longopts[] = {
static void usage(void)
{
- fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n"
+ fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>] [--list] [--skip=<test_id>]\n"
"\n"
" Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n"
" Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
@@ -503,9 +510,11 @@ static void usage(void)
" --control-port <port> Server port to listen on/connect to\n"
" --mode client|server Server or client mode\n"
" --peer-cid <cid> CID of the other side\n"
+ " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n"
" --list List of tests that will be executed\n"
" --skip <test_id> Test ID to skip;\n"
- " use multiple --skip options to skip more tests\n"
+ " use multiple --skip options to skip more tests\n",
+ DEFAULT_PEER_PORT
);
exit(EXIT_FAILURE);
}
@@ -517,6 +526,7 @@ int main(int argc, char **argv)
struct test_opts opts = {
.mode = TEST_MODE_UNSET,
.peer_cid = VMADDR_CID_ANY,
+ .peer_port = DEFAULT_PEER_PORT,
};
init_signals();
@@ -544,6 +554,9 @@ int main(int argc, char **argv)
case 'p':
opts.peer_cid = parse_cid(optarg);
break;
+ case 'q':
+ opts.peer_port = parse_port(optarg);
+ break;
case 'P':
control_port = optarg;
break;
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 66246d81d654..f851f8961247 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -34,7 +34,7 @@ static void test_stream_connection_reset(const struct test_opts *opts)
} addr = {
.svm = {
.svm_family = AF_VSOCK,
- .svm_port = 1234,
+ .svm_port = opts->peer_port,
.svm_cid = opts->peer_cid,
},
};
@@ -70,7 +70,7 @@ static void test_stream_bind_only_client(const struct test_opts *opts)
} addr = {
.svm = {
.svm_family = AF_VSOCK,
- .svm_port = 1234,
+ .svm_port = opts->peer_port,
.svm_cid = opts->peer_cid,
},
};
@@ -112,7 +112,7 @@ static void test_stream_bind_only_server(const struct test_opts *opts)
} addr = {
.svm = {
.svm_family = AF_VSOCK,
- .svm_port = 1234,
+ .svm_port = opts->peer_port,
.svm_cid = VMADDR_CID_ANY,
},
};
@@ -138,7 +138,7 @@ static void test_stream_client_close_client(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -152,7 +152,7 @@ static void test_stream_client_close_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -173,7 +173,7 @@ static void test_stream_server_close_client(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -194,7 +194,7 @@ static void test_stream_server_close_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -215,7 +215,7 @@ static void test_stream_multiconn_client(const struct test_opts *opts)
int i;
for (i = 0; i < MULTICONN_NFDS; i++) {
- fds[i] = vsock_stream_connect(opts->peer_cid, 1234);
+ fds[i] = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fds[i] < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -239,7 +239,7 @@ static void test_stream_multiconn_server(const struct test_opts *opts)
int i;
for (i = 0; i < MULTICONN_NFDS; i++) {
- fds[i] = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fds[i] = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fds[i] < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -267,9 +267,9 @@ static void test_msg_peek_client(const struct test_opts *opts,
int i;
if (seqpacket)
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
else
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
@@ -295,9 +295,9 @@ static void test_msg_peek_server(const struct test_opts *opts,
int fd;
if (seqpacket)
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
else
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
@@ -363,7 +363,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
int msg_count;
int fd;
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -434,7 +434,7 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
struct msghdr msg = {0};
struct iovec iov = {0};
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -505,7 +505,7 @@ static void test_seqpacket_msg_trunc_client(const struct test_opts *opts)
int fd;
char buf[MESSAGE_TRUNC_SZ];
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -524,7 +524,7 @@ static void test_seqpacket_msg_trunc_server(const struct test_opts *opts)
struct msghdr msg = {0};
struct iovec iov = {0};
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -575,7 +575,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts)
time_t read_enter_ns;
time_t read_overhead_ns;
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -620,7 +620,7 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -639,7 +639,7 @@ static void test_seqpacket_bigmsg_client(const struct test_opts *opts)
len = sizeof(sock_buf_size);
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -671,7 +671,7 @@ static void test_seqpacket_bigmsg_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -692,7 +692,7 @@ static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts *opt
unsigned char *buf2;
int buf_size = getpagesize() * 3;
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -732,7 +732,7 @@ static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opt
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
int i;
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -808,7 +808,7 @@ static void test_stream_poll_rcvlowat_server(const struct test_opts *opts)
int fd;
int i;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -839,7 +839,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
short poll_flags;
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -906,9 +906,9 @@ static void test_inv_buf_client(const struct test_opts *opts, bool stream)
int fd;
if (stream)
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
else
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
@@ -941,9 +941,9 @@ static void test_inv_buf_server(const struct test_opts *opts, bool stream)
int fd;
if (stream)
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
else
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
@@ -986,7 +986,7 @@ static void test_stream_virtio_skb_merge_client(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -1015,7 +1015,7 @@ static void test_stream_virtio_skb_merge_server(const struct test_opts *opts)
unsigned char buf[64];
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -1108,7 +1108,7 @@ static void test_stream_shutwr_client(const struct test_opts *opts)
sigaction(SIGPIPE, &act, NULL);
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -1130,7 +1130,7 @@ static void test_stream_shutwr_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -1151,7 +1151,7 @@ static void test_stream_shutrd_client(const struct test_opts *opts)
sigaction(SIGPIPE, &act, NULL);
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -1170,7 +1170,7 @@ static void test_stream_shutrd_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -1193,7 +1193,7 @@ static void test_double_bind_connect_server(const struct test_opts *opts)
struct sockaddr_vm sa_client;
socklen_t socklen_client = sizeof(sa_client);
- listen_fd = vsock_stream_listen(VMADDR_CID_ANY, 1234);
+ listen_fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port);
for (i = 0; i < 2; i++) {
control_writeln("LISTENING");
@@ -1226,7 +1226,13 @@ static void test_double_bind_connect_client(const struct test_opts *opts)
/* Wait until server is ready to accept a new connection */
control_expectln("LISTENING");
- client_fd = vsock_bind_connect(opts->peer_cid, 1234, 4321, SOCK_STREAM);
+ /* We use 'peer_port + 1' as "some" port for the 'bind()'
+ * call. It is safe for overflow, but must be considered,
+ * when running multiple test applications simultaneously
+ * where 'peer-port' argument differs by 1.
+ */
+ client_fd = vsock_bind_connect(opts->peer_cid, opts->peer_port,
+ opts->peer_port + 1, SOCK_STREAM);
close(client_fd);
}
@@ -1246,7 +1252,7 @@ static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opt
void *buf;
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -1282,7 +1288,7 @@ static void test_stream_credit_update_test(const struct test_opts *opts,
void *buf;
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -1543,6 +1549,11 @@ static const struct option longopts[] = {
.val = 'p',
},
{
+ .name = "peer-port",
+ .has_arg = required_argument,
+ .val = 'q',
+ },
+ {
.name = "list",
.has_arg = no_argument,
.val = 'l',
@@ -1562,7 +1573,7 @@ static const struct option longopts[] = {
static void usage(void)
{
- fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n"
+ fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>] [--list] [--skip=<test_id>]\n"
"\n"
" Server: vsock_test --control-port=1234 --mode=server --peer-cid=3\n"
" Client: vsock_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
@@ -1577,6 +1588,9 @@ static void usage(void)
"connect to.\n"
"\n"
"The CID of the other side must be given with --peer-cid=<cid>.\n"
+ "During the test, two AF_VSOCK ports will be used: the port\n"
+ "specified with --peer-port=<port> (or the default port)\n"
+ "and the next one.\n"
"\n"
"Options:\n"
" --help This help message\n"
@@ -1584,9 +1598,11 @@ static void usage(void)
" --control-port <port> Server port to listen on/connect to\n"
" --mode client|server Server or client mode\n"
" --peer-cid <cid> CID of the other side\n"
+ " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n"
" --list List of tests that will be executed\n"
" --skip <test_id> Test ID to skip;\n"
- " use multiple --skip options to skip more tests\n"
+ " use multiple --skip options to skip more tests\n",
+ DEFAULT_PEER_PORT
);
exit(EXIT_FAILURE);
}
@@ -1598,6 +1614,7 @@ int main(int argc, char **argv)
struct test_opts opts = {
.mode = TEST_MODE_UNSET,
.peer_cid = VMADDR_CID_ANY,
+ .peer_port = DEFAULT_PEER_PORT,
};
srand(time(NULL));
@@ -1626,6 +1643,9 @@ int main(int argc, char **argv)
case 'p':
opts.peer_cid = parse_cid(optarg);
break;
+ case 'q':
+ opts.peer_port = parse_port(optarg);
+ break;
case 'P':
control_port = optarg;
break;
diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c
index a16ff76484e6..04c376b6937f 100644
--- a/tools/testing/vsock/vsock_test_zerocopy.c
+++ b/tools/testing/vsock/vsock_test_zerocopy.c
@@ -152,9 +152,9 @@ static void test_client(const struct test_opts *opts,
int fd;
if (sock_seqpacket)
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
else
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
@@ -248,9 +248,9 @@ static void test_server(const struct test_opts *opts,
int fd;
if (sock_seqpacket)
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
else
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
@@ -323,7 +323,7 @@ void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts)
ssize_t res;
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -347,7 +347,7 @@ void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
diff --git a/tools/testing/vsock/vsock_uring_test.c b/tools/testing/vsock/vsock_uring_test.c
index d976d35f0ba9..6c3e6f70c457 100644
--- a/tools/testing/vsock/vsock_uring_test.c
+++ b/tools/testing/vsock/vsock_uring_test.c
@@ -66,7 +66,7 @@ static void vsock_io_uring_client(const struct test_opts *opts,
struct msghdr msg;
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -120,7 +120,7 @@ static void vsock_io_uring_server(const struct test_opts *opts,
void *data;
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -248,6 +248,11 @@ static const struct option longopts[] = {
.val = 'p',
},
{
+ .name = "peer-port",
+ .has_arg = required_argument,
+ .val = 'q',
+ },
+ {
.name = "help",
.has_arg = no_argument,
.val = '?',
@@ -257,7 +262,7 @@ static const struct option longopts[] = {
static void usage(void)
{
- fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n"
+ fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>]\n"
"\n"
" Server: vsock_uring_test --control-port=1234 --mode=server --peer-cid=3\n"
" Client: vsock_uring_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
@@ -271,6 +276,8 @@ static void usage(void)
" --control-port <port> Server port to listen on/connect to\n"
" --mode client|server Server or client mode\n"
" --peer-cid <cid> CID of the other side\n"
+ " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n",
+ DEFAULT_PEER_PORT
);
exit(EXIT_FAILURE);
}
@@ -282,6 +289,7 @@ int main(int argc, char **argv)
struct test_opts opts = {
.mode = TEST_MODE_UNSET,
.peer_cid = VMADDR_CID_ANY,
+ .peer_port = DEFAULT_PEER_PORT,
};
init_signals();
@@ -309,6 +317,9 @@ int main(int argc, char **argv)
case 'p':
opts.peer_cid = parse_cid(optarg);
break;
+ case 'q':
+ opts.peer_port = parse_port(optarg);
+ break;
case 'P':
control_port = optarg;
break;
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
index 2456a399eb9a..afd18c678ff5 100644
--- a/tools/tracing/rtla/Makefile
+++ b/tools/tracing/rtla/Makefile
@@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \
-fasynchronous-unwind-tables -fstack-clash-protection
WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized
+ifeq ($(CC),clang)
+ FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS))
+ WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS))
+endif
+
TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs)
CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS)
-LDFLAGS := -ggdb $(EXTRA_LDFLAGS)
+LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS)
LIBS := $$($(PKG_CONFIG) --libs libtracefs)
SRC := $(wildcard src/*.c)
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
index 8f81fa007364..01870d50942a 100644
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -135,8 +135,7 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
if (params->output_divisor)
duration = duration / params->output_divisor;
- if (data->bucket_size)
- bucket = duration / data->bucket_size;
+ bucket = duration / data->bucket_size;
total_duration = duration * count;
@@ -480,7 +479,11 @@ static void osnoise_hist_usage(char *usage)
for (i = 0; msg[i]; i++)
fprintf(stderr, "%s\n", msg[i]);
- exit(1);
+
+ if (usage)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
}
/*
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index f7c959be8677..457360db0767 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -331,7 +331,11 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
for (i = 0; msg[i]; i++)
fprintf(stderr, "%s\n", msg[i]);
- exit(1);
+
+ if (usage)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
}
/*
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 47d3d8b53cb2..dbf154082f95 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -178,8 +178,7 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu,
if (params->output_divisor)
latency = latency / params->output_divisor;
- if (data->bucket_size)
- bucket = latency / data->bucket_size;
+ bucket = latency / data->bucket_size;
if (!context) {
hist = data->hist[cpu].irq;
@@ -546,7 +545,11 @@ static void timerlat_hist_usage(char *usage)
for (i = 0; msg[i]; i++)
fprintf(stderr, "%s\n", msg[i]);
- exit(1);
+
+ if (usage)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
}
/*
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 1640f121baca..3e9af2c38688 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -375,7 +375,11 @@ static void timerlat_top_usage(char *usage)
for (i = 0; msg[i]; i++)
fprintf(stderr, "%s\n", msg[i]);
- exit(1);
+
+ if (usage)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
}
/*
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
index c769d7b3842c..9ac71a66840c 100644
--- a/tools/tracing/rtla/src/utils.c
+++ b/tools/tracing/rtla/src/utils.c
@@ -238,12 +238,6 @@ static inline int sched_setattr(pid_t pid, const struct sched_attr *attr,
return syscall(__NR_sched_setattr, pid, attr, flags);
}
-static inline int sched_getattr(pid_t pid, struct sched_attr *attr,
- unsigned int size, unsigned int flags)
-{
- return syscall(__NR_sched_getattr, pid, attr, size, flags);
-}
-
int __set_sched_attr(int pid, struct sched_attr *attr)
{
int flags = 0;
@@ -479,13 +473,13 @@ int parse_prio(char *arg, struct sched_attr *sched_param)
if (prio == INVALID_VAL)
return -1;
- if (prio < sched_get_priority_min(SCHED_OTHER))
+ if (prio < MIN_NICE)
return -1;
- if (prio > sched_get_priority_max(SCHED_OTHER))
+ if (prio > MAX_NICE)
return -1;
sched_param->sched_policy = SCHED_OTHER;
- sched_param->sched_priority = prio;
+ sched_param->sched_nice = prio;
break;
default:
return -1;
@@ -536,7 +530,7 @@ int set_cpu_dma_latency(int32_t latency)
*/
static const int find_mount(const char *fs, char *mp, int sizeof_mp)
{
- char mount_point[MAX_PATH];
+ char mount_point[MAX_PATH+1];
char type[100];
int found = 0;
FILE *fp;
diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h
index 04ed1e650495..d44513e6c66a 100644
--- a/tools/tracing/rtla/src/utils.h
+++ b/tools/tracing/rtla/src/utils.h
@@ -9,6 +9,8 @@
*/
#define BUFF_U64_STR_SIZE 24
#define MAX_PATH 1024
+#define MAX_NICE 20
+#define MIN_NICE -19
#define container_of(ptr, type, member)({ \
const typeof(((type *)0)->member) *__mptr = (ptr); \
diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile
index 3d0f3888a58c..485f8aeddbe0 100644
--- a/tools/verification/rv/Makefile
+++ b/tools/verification/rv/Makefile
@@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \
-fasynchronous-unwind-tables -fstack-clash-protection
WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized
+ifeq ($(CC),clang)
+ FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS))
+ WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS))
+endif
+
TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs)
CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -I include
-LDFLAGS := -ggdb $(EXTRA_LDFLAGS)
+LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS)
LIBS := $$($(PKG_CONFIG) --libs libtracefs)
SRC := $(wildcard src/*.c)
diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c
index ad28582bcf2b..f04479ecc96c 100644
--- a/tools/verification/rv/src/in_kernel.c
+++ b/tools/verification/rv/src/in_kernel.c
@@ -210,9 +210,9 @@ static char *ikm_read_reactor(char *monitor_name)
static char *ikm_get_current_reactor(char *monitor_name)
{
char *reactors = ikm_read_reactor(monitor_name);
+ char *curr_reactor = NULL;
char *start;
char *end;
- char *curr_reactor;
if (!reactors)
return NULL;
diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore
index 9934d48d9a55..7e47b281c442 100644
--- a/tools/virtio/.gitignore
+++ b/tools/virtio/.gitignore
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
*.d
virtio_test
+vhost_net_test
vringh_test
virtio-trace/trace-agent
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
index d128925980e0..e25e99c1c3b7 100644
--- a/tools/virtio/Makefile
+++ b/tools/virtio/Makefile
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
all: test mod
-test: virtio_test vringh_test
+test: virtio_test vringh_test vhost_net_test
virtio_test: virtio_ring.o virtio_test.o
vringh_test: vringh_test.o vringh.o virtio_ring.o
+vhost_net_test: virtio_ring.o vhost_net_test.o
try-run = $(shell set -e; \
if ($(1)) >/dev/null 2>&1; \
@@ -49,6 +50,7 @@ oot-clean: OOT_BUILD+=clean
.PHONY: all test mod clean vhost oot oot-clean oot-build
clean:
- ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
- vhost_test/Module.symvers vhost_test/modules.order *.d
+ ${RM} *.o vringh_test virtio_test vhost_net_test vhost_test/*.o \
+ vhost_test/.*.cmd vhost_test/Module.symvers \
+ vhost_test/modules.order *.d
-include *.d
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index 2a8a70e2a950..42a564f22f2d 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_VIRTIO_CONFIG_H
+#define LINUX_VIRTIO_CONFIG_H
#include <linux/virtio_byteorder.h>
#include <linux/virtio.h>
#include <uapi/linux/virtio_config.h>
@@ -95,3 +97,5 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
{
return __cpu_to_virtio64(virtio_is_little_endian(vdev), val);
}
+
+#endif
diff --git a/tools/virtio/vhost_net_test.c b/tools/virtio/vhost_net_test.c
new file mode 100644
index 000000000000..389d99a6d7c7
--- /dev/null
+++ b/tools/virtio/vhost_net_test.c
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <limits.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <linux/vhost.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/in.h>
+#include <linux/if_packet.h>
+#include <linux/virtio_net.h>
+#include <netinet/ether.h>
+
+#define HDR_LEN sizeof(struct virtio_net_hdr_mrg_rxbuf)
+#define TEST_BUF_LEN 256
+#define TEST_PTYPE ETH_P_LOOPBACK
+#define DESC_NUM 256
+
+/* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */
+void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+
+struct vq_info {
+ int kick;
+ int call;
+ int idx;
+ long started;
+ long completed;
+ struct pollfd fds;
+ void *ring;
+ /* copy used for control */
+ struct vring vring;
+ struct virtqueue *vq;
+};
+
+struct vdev_info {
+ struct virtio_device vdev;
+ int control;
+ struct vq_info vqs[2];
+ int nvqs;
+ void *buf;
+ size_t buf_size;
+ char *test_buf;
+ char *res_buf;
+ struct vhost_memory *mem;
+ int sock;
+ int ifindex;
+ unsigned char mac[ETHER_ADDR_LEN];
+};
+
+static int tun_alloc(struct vdev_info *dev, char *tun_name)
+{
+ struct ifreq ifr;
+ int len = HDR_LEN;
+ int fd, e;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0) {
+ perror("Cannot open /dev/net/tun");
+ return fd;
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+ strncpy(ifr.ifr_name, tun_name, IFNAMSIZ);
+
+ e = ioctl(fd, TUNSETIFF, &ifr);
+ if (e < 0) {
+ perror("ioctl[TUNSETIFF]");
+ close(fd);
+ return e;
+ }
+
+ e = ioctl(fd, TUNSETVNETHDRSZ, &len);
+ if (e < 0) {
+ perror("ioctl[TUNSETVNETHDRSZ]");
+ close(fd);
+ return e;
+ }
+
+ e = ioctl(fd, SIOCGIFHWADDR, &ifr);
+ if (e < 0) {
+ perror("ioctl[SIOCGIFHWADDR]");
+ close(fd);
+ return e;
+ }
+
+ memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
+ return fd;
+}
+
+static void vdev_create_socket(struct vdev_info *dev, char *tun_name)
+{
+ struct ifreq ifr;
+
+ dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE));
+ assert(dev->sock != -1);
+
+ strncpy(ifr.ifr_name, tun_name, IFNAMSIZ);
+ assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0);
+
+ dev->ifindex = ifr.ifr_ifindex;
+
+ /* Set the flags that bring the device up */
+ assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0);
+ ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+ assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0);
+}
+
+static void vdev_send_packet(struct vdev_info *dev)
+{
+ char *sendbuf = dev->test_buf + HDR_LEN;
+ struct sockaddr_ll saddrll = {0};
+ int sockfd = dev->sock;
+ int ret;
+
+ saddrll.sll_family = PF_PACKET;
+ saddrll.sll_ifindex = dev->ifindex;
+ saddrll.sll_halen = ETH_ALEN;
+ saddrll.sll_protocol = htons(TEST_PTYPE);
+
+ ret = sendto(sockfd, sendbuf, TEST_BUF_LEN, 0,
+ (struct sockaddr *)&saddrll,
+ sizeof(struct sockaddr_ll));
+ assert(ret >= 0);
+}
+
+static bool vq_notify(struct virtqueue *vq)
+{
+ struct vq_info *info = vq->priv;
+ unsigned long long v = 1;
+ int r;
+
+ r = write(info->kick, &v, sizeof(v));
+ assert(r == sizeof(v));
+
+ return true;
+}
+
+static void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
+{
+ struct vhost_vring_addr addr = {
+ .index = info->idx,
+ .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
+ .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
+ .used_user_addr = (uint64_t)(unsigned long)info->vring.used,
+ };
+ struct vhost_vring_state state = { .index = info->idx };
+ struct vhost_vring_file file = { .index = info->idx };
+ int r;
+
+ state.num = info->vring.num;
+ r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+ assert(r >= 0);
+
+ state.num = 0;
+ r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+ assert(r >= 0);
+
+ r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+ assert(r >= 0);
+
+ file.fd = info->kick;
+ r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+ assert(r >= 0);
+}
+
+static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev)
+{
+ if (info->vq)
+ vring_del_virtqueue(info->vq);
+
+ memset(info->ring, 0, vring_size(num, 4096));
+ vring_init(&info->vring, num, info->ring, 4096);
+ info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false,
+ info->ring, vq_notify, NULL, "test");
+ assert(info->vq);
+ info->vq->priv = info;
+}
+
+static void vq_info_add(struct vdev_info *dev, int idx, int num, int fd)
+{
+ struct vhost_vring_file backend = { .index = idx, .fd = fd };
+ struct vq_info *info = &dev->vqs[idx];
+ int r;
+
+ info->idx = idx;
+ info->kick = eventfd(0, EFD_NONBLOCK);
+ r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
+ assert(r >= 0);
+ vq_reset(info, num, &dev->vdev);
+ vhost_vq_setup(dev, info);
+
+ r = ioctl(dev->control, VHOST_NET_SET_BACKEND, &backend);
+ assert(!r);
+}
+
+static void vdev_info_init(struct vdev_info *dev, unsigned long long features)
+{
+ struct ether_header *eh;
+ int i, r;
+
+ dev->vdev.features = features;
+ INIT_LIST_HEAD(&dev->vdev.vqs);
+ spin_lock_init(&dev->vdev.vqs_list_lock);
+
+ dev->buf_size = (HDR_LEN + TEST_BUF_LEN) * 2;
+ dev->buf = malloc(dev->buf_size);
+ assert(dev->buf);
+ dev->test_buf = dev->buf;
+ dev->res_buf = dev->test_buf + HDR_LEN + TEST_BUF_LEN;
+
+ memset(dev->test_buf, 0, HDR_LEN + TEST_BUF_LEN);
+ eh = (struct ether_header *)(dev->test_buf + HDR_LEN);
+ eh->ether_type = htons(TEST_PTYPE);
+ memcpy(eh->ether_dhost, dev->mac, ETHER_ADDR_LEN);
+ memcpy(eh->ether_shost, dev->mac, ETHER_ADDR_LEN);
+
+ for (i = sizeof(*eh); i < TEST_BUF_LEN; i++)
+ dev->test_buf[i + HDR_LEN] = (char)i;
+
+ dev->control = open("/dev/vhost-net", O_RDWR);
+ assert(dev->control >= 0);
+
+ r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
+ assert(r >= 0);
+
+ dev->mem = malloc(offsetof(struct vhost_memory, regions) +
+ sizeof(dev->mem->regions[0]));
+ assert(dev->mem);
+ memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
+ sizeof(dev->mem->regions[0]));
+ dev->mem->nregions = 1;
+ dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
+ dev->mem->regions[0].userspace_addr = (long)dev->buf;
+ dev->mem->regions[0].memory_size = dev->buf_size;
+
+ r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ assert(r >= 0);
+
+ r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+ assert(r >= 0);
+
+ dev->nvqs = 2;
+}
+
+static void wait_for_interrupt(struct vq_info *vq)
+{
+ unsigned long long val;
+
+ poll(&vq->fds, 1, 100);
+
+ if (vq->fds.revents & POLLIN)
+ read(vq->fds.fd, &val, sizeof(val));
+}
+
+static void verify_res_buf(char *res_buf)
+{
+ int i;
+
+ for (i = ETHER_HDR_LEN; i < TEST_BUF_LEN; i++)
+ assert(res_buf[i] == (char)i);
+}
+
+static void run_tx_test(struct vdev_info *dev, struct vq_info *vq,
+ bool delayed, int bufs)
+{
+ long long spurious = 0;
+ struct scatterlist sl;
+ unsigned int len;
+ int r;
+
+ for (;;) {
+ long started_before = vq->started;
+ long completed_before = vq->completed;
+
+ virtqueue_disable_cb(vq->vq);
+ do {
+ while (vq->started < bufs &&
+ (vq->started - vq->completed) < 1) {
+ sg_init_one(&sl, dev->test_buf, HDR_LEN + TEST_BUF_LEN);
+ r = virtqueue_add_outbuf(vq->vq, &sl, 1,
+ dev->test_buf + vq->started,
+ GFP_ATOMIC);
+ if (unlikely(r != 0))
+ break;
+
+ ++vq->started;
+
+ if (unlikely(!virtqueue_kick(vq->vq))) {
+ r = -1;
+ break;
+ }
+ }
+
+ if (vq->started >= bufs)
+ r = -1;
+
+ /* Flush out completed bufs if any */
+ while (virtqueue_get_buf(vq->vq, &len)) {
+ int n;
+
+ n = recvfrom(dev->sock, dev->res_buf, TEST_BUF_LEN, 0, NULL, NULL);
+ assert(n == TEST_BUF_LEN);
+ verify_res_buf(dev->res_buf);
+
+ ++vq->completed;
+ r = 0;
+ }
+ } while (r == 0);
+
+ if (vq->completed == completed_before && vq->started == started_before)
+ ++spurious;
+
+ assert(vq->completed <= bufs);
+ assert(vq->started <= bufs);
+ if (vq->completed == bufs)
+ break;
+
+ if (delayed) {
+ if (virtqueue_enable_cb_delayed(vq->vq))
+ wait_for_interrupt(vq);
+ } else {
+ if (virtqueue_enable_cb(vq->vq))
+ wait_for_interrupt(vq);
+ }
+ }
+ printf("TX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n",
+ spurious, vq->started, vq->completed);
+}
+
+static void run_rx_test(struct vdev_info *dev, struct vq_info *vq,
+ bool delayed, int bufs)
+{
+ long long spurious = 0;
+ struct scatterlist sl;
+ unsigned int len;
+ int r;
+
+ for (;;) {
+ long started_before = vq->started;
+ long completed_before = vq->completed;
+
+ do {
+ while (vq->started < bufs &&
+ (vq->started - vq->completed) < 1) {
+ sg_init_one(&sl, dev->res_buf, HDR_LEN + TEST_BUF_LEN);
+
+ r = virtqueue_add_inbuf(vq->vq, &sl, 1,
+ dev->res_buf + vq->started,
+ GFP_ATOMIC);
+ if (unlikely(r != 0))
+ break;
+
+ ++vq->started;
+
+ vdev_send_packet(dev);
+
+ if (unlikely(!virtqueue_kick(vq->vq))) {
+ r = -1;
+ break;
+ }
+ }
+
+ if (vq->started >= bufs)
+ r = -1;
+
+ /* Flush out completed bufs if any */
+ while (virtqueue_get_buf(vq->vq, &len)) {
+ struct ether_header *eh;
+
+ eh = (struct ether_header *)(dev->res_buf + HDR_LEN);
+
+ /* tun netdev is up and running, only handle the
+ * TEST_PTYPE packet.
+ */
+ if (eh->ether_type == htons(TEST_PTYPE)) {
+ assert(len == TEST_BUF_LEN + HDR_LEN);
+ verify_res_buf(dev->res_buf + HDR_LEN);
+ }
+
+ ++vq->completed;
+ r = 0;
+ }
+ } while (r == 0);
+
+ if (vq->completed == completed_before && vq->started == started_before)
+ ++spurious;
+
+ assert(vq->completed <= bufs);
+ assert(vq->started <= bufs);
+ if (vq->completed == bufs)
+ break;
+ }
+
+ printf("RX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n",
+ spurious, vq->started, vq->completed);
+}
+
+static const char optstring[] = "h";
+static const struct option longopts[] = {
+ {
+ .name = "help",
+ .val = 'h',
+ },
+ {
+ .name = "event-idx",
+ .val = 'E',
+ },
+ {
+ .name = "no-event-idx",
+ .val = 'e',
+ },
+ {
+ .name = "indirect",
+ .val = 'I',
+ },
+ {
+ .name = "no-indirect",
+ .val = 'i',
+ },
+ {
+ .name = "virtio-1",
+ .val = '1',
+ },
+ {
+ .name = "no-virtio-1",
+ .val = '0',
+ },
+ {
+ .name = "delayed-interrupt",
+ .val = 'D',
+ },
+ {
+ .name = "no-delayed-interrupt",
+ .val = 'd',
+ },
+ {
+ .name = "buf-num",
+ .val = 'n',
+ .has_arg = required_argument,
+ },
+ {
+ .name = "batch",
+ .val = 'b',
+ .has_arg = required_argument,
+ },
+ {
+ }
+};
+
+static void help(int status)
+{
+ fprintf(stderr, "Usage: vhost_net_test [--help]"
+ " [--no-indirect]"
+ " [--no-event-idx]"
+ " [--no-virtio-1]"
+ " [--delayed-interrupt]"
+ " [--buf-num]"
+ "\n");
+
+ exit(status);
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
+ (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1);
+ char tun_name[IFNAMSIZ];
+ long nbufs = 0x100000;
+ struct vdev_info dev;
+ bool delayed = false;
+ int o, fd;
+
+ for (;;) {
+ o = getopt_long(argc, argv, optstring, longopts, NULL);
+ switch (o) {
+ case -1:
+ goto done;
+ case '?':
+ help(2);
+ case 'e':
+ features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
+ break;
+ case 'h':
+ help(0);
+ case 'i':
+ features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
+ break;
+ case '0':
+ features &= ~(1ULL << VIRTIO_F_VERSION_1);
+ break;
+ case 'D':
+ delayed = true;
+ break;
+ case 'n':
+ nbufs = strtol(optarg, NULL, 10);
+ assert(nbufs > 0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+done:
+ memset(&dev, 0, sizeof(dev));
+ snprintf(tun_name, IFNAMSIZ, "tun_%d", getpid());
+
+ fd = tun_alloc(&dev, tun_name);
+ assert(fd >= 0);
+
+ vdev_info_init(&dev, features);
+ vq_info_add(&dev, 0, DESC_NUM, fd);
+ vq_info_add(&dev, 1, DESC_NUM, fd);
+ vdev_create_socket(&dev, tun_name);
+
+ run_rx_test(&dev, &dev.vqs[0], delayed, nbufs);
+ run_tx_test(&dev, &dev.vqs[1], delayed, nbufs);
+
+ return 0;
+}
diff --git a/tools/workqueue/wq_dump.py b/tools/workqueue/wq_dump.py
index d0df5833f2c1..d29b918306b4 100644
--- a/tools/workqueue/wq_dump.py
+++ b/tools/workqueue/wq_dump.py
@@ -50,6 +50,7 @@ import drgn
from drgn.helpers.linux.list import list_for_each_entry,list_empty
from drgn.helpers.linux.percpu import per_cpu_ptr
from drgn.helpers.linux.cpumask import for_each_cpu,for_each_possible_cpu
+from drgn.helpers.linux.nodemask import for_each_node
from drgn.helpers.linux.idr import idr_for_each
import argparse
@@ -75,6 +76,22 @@ def cpumask_str(cpumask):
output += f'{v:08x}'
return output.strip()
+wq_type_len = 9
+
+def wq_type_str(wq):
+ if wq.flags & WQ_BH:
+ return f'{"bh":{wq_type_len}}'
+ elif wq.flags & WQ_UNBOUND:
+ if wq.flags & WQ_ORDERED:
+ return f'{"ordered":{wq_type_len}}'
+ else:
+ if wq.unbound_attrs.affn_strict:
+ return f'{"unbound,S":{wq_type_len}}'
+ else:
+ return f'{"unbound":{wq_type_len}}'
+ else:
+ return f'{"percpu":{wq_type_len}}'
+
worker_pool_idr = prog['worker_pool_idr']
workqueues = prog['workqueues']
wq_unbound_cpumask = prog['wq_unbound_cpumask']
@@ -82,6 +99,7 @@ wq_pod_types = prog['wq_pod_types']
wq_affn_dfl = prog['wq_affn_dfl']
wq_affn_names = prog['wq_affn_names']
+WQ_BH = prog['WQ_BH']
WQ_UNBOUND = prog['WQ_UNBOUND']
WQ_ORDERED = prog['__WQ_ORDERED']
WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
@@ -92,6 +110,11 @@ WQ_AFFN_CACHE = prog['WQ_AFFN_CACHE']
WQ_AFFN_NUMA = prog['WQ_AFFN_NUMA']
WQ_AFFN_SYSTEM = prog['WQ_AFFN_SYSTEM']
+POOL_BH = prog['POOL_BH']
+
+WQ_NAME_LEN = prog['WQ_NAME_LEN'].value_()
+cpumask_str_len = len(cpumask_str(wq_unbound_cpumask))
+
print('Affinity Scopes')
print('===============')
@@ -133,10 +156,12 @@ for pi, pool in idr_for_each(worker_pool_idr):
for pi, pool in idr_for_each(worker_pool_idr):
pool = drgn.Object(prog, 'struct worker_pool', address=pool)
- print(f'pool[{pi:0{max_pool_id_len}}] ref={pool.refcnt.value_():{max_ref_len}} nice={pool.attrs.nice.value_():3} ', end='')
+ print(f'pool[{pi:0{max_pool_id_len}}] flags=0x{pool.flags.value_():02x} ref={pool.refcnt.value_():{max_ref_len}} nice={pool.attrs.nice.value_():3} ', end='')
print(f'idle/workers={pool.nr_idle.value_():3}/{pool.nr_workers.value_():3} ', end='')
if pool.cpu >= 0:
print(f'cpu={pool.cpu.value_():3}', end='')
+ if pool.flags & POOL_BH:
+ print(' bh', end='')
else:
print(f'cpus={cpumask_str(pool.attrs.cpumask)}', end='')
print(f' pod_cpus={cpumask_str(pool.attrs.__pod_cpumask)}', end='')
@@ -148,24 +173,13 @@ print('')
print('Workqueue CPU -> pool')
print('=====================')
-print('[ workqueue \ type CPU', end='')
+print(f'[{"workqueue":^{WQ_NAME_LEN-2}}\\ {"type CPU":{wq_type_len}}', end='')
for cpu in for_each_possible_cpu(prog):
print(f' {cpu:{max_pool_id_len}}', end='')
print(' dfl]')
for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
- print(f'{wq.name.string_().decode()[-24:]:24}', end='')
- if wq.flags & WQ_UNBOUND:
- if wq.flags & WQ_ORDERED:
- print(' ordered ', end='')
- else:
- print(' unbound', end='')
- if wq.unbound_attrs.affn_strict:
- print(',S ', end='')
- else:
- print(' ', end='')
- else:
- print(' percpu ', end='')
+ print(f'{wq.name.string_().decode():{WQ_NAME_LEN}} {wq_type_str(wq):10}', end='')
for cpu in for_each_possible_cpu(prog):
pool_id = per_cpu_ptr(wq.cpu_pwq, cpu)[0].pool.id.value_()
@@ -175,3 +189,65 @@ for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(
if wq.flags & WQ_UNBOUND:
print(f' {wq.dfl_pwq.pool.id.value_():{max_pool_id_len}}', end='')
print('')
+
+print('')
+print('Workqueue -> rescuer')
+print('====================')
+
+ucpus_len = max(cpumask_str_len, len("unbound_cpus"))
+rcpus_len = max(cpumask_str_len, len("rescuer_cpus"))
+
+print(f'[{"workqueue":^{WQ_NAME_LEN-2}}\\ {"unbound_cpus":{ucpus_len}} pid {"rescuer_cpus":{rcpus_len}} ]')
+
+for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
+ if not (wq.flags & WQ_MEM_RECLAIM):
+ continue
+
+ print(f'{wq.name.string_().decode():{WQ_NAME_LEN}}', end='')
+ if wq.unbound_attrs.value_() != 0:
+ print(f' {cpumask_str(wq.unbound_attrs.cpumask):{ucpus_len}}', end='')
+ else:
+ print(f' {"":{ucpus_len}}', end='')
+
+ print(f' {wq.rescuer.task.pid.value_():6}', end='')
+ print(f' {cpumask_str(wq.rescuer.task.cpus_ptr):{rcpus_len}}', end='')
+ print('')
+
+print('')
+print('Unbound workqueue -> node_nr/max_active')
+print('=======================================')
+
+if 'node_to_cpumask_map' in prog:
+ __cpu_online_mask = prog['__cpu_online_mask']
+ node_to_cpumask_map = prog['node_to_cpumask_map']
+ nr_node_ids = prog['nr_node_ids'].value_()
+
+ print(f'online_cpus={cpumask_str(__cpu_online_mask.address_of_())}')
+ for node in for_each_node():
+ print(f'NODE[{node:02}]={cpumask_str(node_to_cpumask_map[node])}')
+ print('')
+
+ print(f'[{"workqueue":^{WQ_NAME_LEN-2}}\\ min max', end='')
+ first = True
+ for node in for_each_node():
+ if first:
+ print(f' NODE {node}', end='')
+ first = False
+ else:
+ print(f' {node:7}', end='')
+ print(f' {"dfl":>7} ]')
+ print('')
+
+ for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
+ if not (wq.flags & WQ_UNBOUND):
+ continue
+
+ print(f'{wq.name.string_().decode():{WQ_NAME_LEN}} ', end='')
+ print(f'{wq.min_active.value_():3} {wq.max_active.value_():3}', end='')
+ for node in for_each_node():
+ nna = wq.node_nr_active[node]
+ print(f' {nna.nr.counter.value_():3}/{nna.max.value_():3}', end='')
+ nna = wq.node_nr_active[nr_node_ids]
+ print(f' {nna.nr.counter.value_():3}/{nna.max.value_():3}')
+else:
+ printf(f'node_to_cpumask_map not present, is NUMA enabled?')
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 10bfc88a69f7..0f50960b0e3a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1615,7 +1615,13 @@ static int check_memory_region_flags(struct kvm *kvm,
valid_flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
#ifdef __KVM_HAVE_READONLY_MEM
- valid_flags |= KVM_MEM_READONLY;
+ /*
+ * GUEST_MEMFD is incompatible with read-only memslots, as writes to
+ * read-only memslots have emulated MMIO, not page fault, semantics,
+ * and KVM doesn't allow emulated MMIO for private memory.
+ */
+ if (!(mem->flags & KVM_MEM_GUEST_MEMFD))
+ valid_flags |= KVM_MEM_READONLY;
#endif
if (mem->flags & ~valid_flags)